# HG changeset patch # User matteoc # Date 1482399931 18000 # Node ID 68a3648c7d91d5738f65b6030d0b52a100550eee Uploaded diff -r 000000000000 -r 68a3648c7d91 annotate_ends/annotate_ends.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotate_ends/annotate_ends.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,21 @@ + + Attach fosmid ends name based on similarity + /home/inmare/galaxy/tools/annotate_ends/attach.tags.pl $ends $fos $blast $minid $alnl $out $table + "approved by the boss" + + + + + + + + + + + + + +When Sanger sequencing of the fosmid was performed, assembled fosmid might be assigned to their putative clones by sequence similarity. This tool is designed to assist in this process parsing blastN output files. In order for the tool to work properly Sanger ends need to be provided in a single fasta files, and sequences need to be named according to the following convention: "fosmid name" followed by "_" and "F" for forward or "R" for reverse. The fosmid fasta file needs to be used as a query and the search must be performed against a database containing all the contigs derived from the assembly of the fosmid. This tool requires the "standard" (12 column) output from blastN, any other format might cause major flaws. The output consist in a new fasta file, where prefixes corresponding to fosmid names as provided in the input file are appended to contigs names. The prefix Unf (for unassigned fosmid) is appended to contigs showing no significant similarity to fosmids ends. Minimum alignment length and identity cut-off need to be provided in input, as a rule of thumb alignment length cutoff should be set to about half of the length of the Sanger sequences and identity cutoff should be above 90%. + + diff -r 000000000000 -r 68a3648c7d91 annotate_ends/attach.tags.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annotate_ends/attach.tags.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,129 @@ +#!/usr/bin/perl -w +use strict; + +my $fos_file=shift; +my $fosS=read_fa($fos_file); + +my $cont_file=shift; +my $contS=read_fa($cont_file); + +my $bfile=shift; +my %best=(); + +my $sim_coff=shift; +my $len_coff=shift; + +my $out_name=shift; +open(OUT,">$out_name"); + +my $out_table=shift; +open(TABLE,">$out_table"); + +open(IN,$bfile); +while() +{ + next if $_=~/\#/; + my ($in,$node,$ident,$alnL,$rs,$re,$score)=(split(/\s+/))[0,1,2,3,8,9,11]; + next unless $alnL>=$len_coff; + next unless $ident>=$sim_coff; + unless ($best{$in}) + { + if ($re<$rs) + { + my $tm=$re; + $re=$rs; + $rs=$tm + } + $best{$in}=[$node,$score,$rs,$re,$alnL]; + print TABLE "$in $node $rs $re\n"; + }else{ + next unless $score> $best{$in}[1]; + $best{$in}=[$node,$score,$rs,$re,$alnL]; + } + +} + +my %addT=(); + +foreach my $best (keys %best) +{ + my $node=$best{$best}[0]; + my $rs=$best{$best}[2]; + my $re=$best{$best}[3]; + my $alnL=$best{$best}[4]; + my $relL=$alnL/length($fosS->{$best}); + my $lseq=length($contS->{$node}); + my $a=$lseq-$re; + print TABLE "Add $node $best $rs $a $relL\n"; + if (($rs<=1500 || ($a)<=1500)) #&& $relL>0.35) + { + #print TABLE "Add $node $best $rs $a $relL\n"; + push(@{$addT{$node}},$best); + }else{ + #print TABLE "Discard $node $best $rs $a $relL\n"; + } + +} +my $unF=0; +foreach my $seq (sort keys %$contS) +{ + my $tag=""; + unless ($addT{$seq}) + { + $unF++; + $tag="unf$unF"; + }else{ + my @adds=@{$addT{$seq}}; + if ($#adds==1) + { + my $t=$adds[0]; + $t=(split(/\_/,$t))[0]; + $tag.=$t."_FR"; + }else{ + foreach my $t (@{$addT{$seq}}) + { + $tag.="$t"; + } + + } + } + my $SEQ=form($contS->{$seq},80); + print OUT ">$tag\n$SEQ\n"; + print TABLE "$seq\t$tag\n"; +} + + + +sub read_fa +{ + my $file=$_[0]; + my $seqF; + my $id=""; + open(IN,$file); + while() + { + chomp; + if ($_=~/^>(.*)/) + { + $id=$1; + $id=(split(/\s+/,$id))[0]; + }else{ + $seqF->{$id}.=$_; + } + } + return $seqF; +} + +sub form +{ + my $string=$_[0]; + my $len=$_[1]; + my $outS=""; + for (my $i=0;$i<=length($string);$i+=$len) + { + $outS.=substr($string,$i,$len)."\n"; + } + #print "A:$outS"; + # #$outS=~s/\s+//g; + return $outS; +} diff -r 000000000000 -r 68a3648c7d91 de_prokka/de_prokka.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/de_prokka/de_prokka.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,49 @@ +#!/usr/bin/perl -w +use strict; +my $fas_file=shift; +my $ann_file=shift; +my $out_file=shift; + +open(OUT,">$out_file"); + +my %seqS=(); +my $id=""; +open(IN,$fas_file); +while() +{ + if ($_=~/^>(.*)/) + { + $id=$1; + $id=(split(/\s+/,$id))[0]; + }else{ + $seqS{$id}.=$_; + } +} + +my %decode=(); +open(IN,$ann_file); +while() +{ + last if $_=~/^\#\#FASTA/; + next if $_=~/^\#/; + my ($id1,$id2)=(split(/\t/))[0,-1]; + $id2=(split(/\;/,$id2))[0]; + my @vl=(split(/\_/,$id1)); + $id1=""; + foreach my $v (@vl) + { + if ($v eq "length") + { + chop($id1); + last; + } + $id1.="$v\_"; + } + $id2=~s/ID=//; + $decode{$id2}=$id1; +} +foreach my $seq (sort keys %seqS) +{ + print OUT ">$decode{$seq}\z\z\z$seq\n$seqS{$seq}\n"; +} + diff -r 000000000000 -r 68a3648c7d91 de_prokka/de_prokka.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/de_prokka/de_prokka.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,15 @@ + + Parsing of Prokka output + /home/inmare/galaxy/tools/de_prokka/de_prokka.pl $prot $pfam $out + + + + + + + + + + This tool add information, derived from the gff file to the headers of a fasta files containing the sequences of proteins predicted by prokka. The operation is required in order to keep track of all the information related to the proteins and the scaffolds/contigs from which they are predicted, so that they are not lost in the functional annotation step. This tool is a simple parser based on regular expressions. Works only with Prokka version xxx. Any usage outside A-Game is discouraged + + diff -r 000000000000 -r 68a3648c7d91 de_prokka/fas --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/de_prokka/fas Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,2151 @@ +>PROKKA_00001 hypothetical protein +MMARPGRVTSHQASVMYARPSANMRPQVGWGGGTPMPRKESPLSQMMILATRTVPSTKSG +ERMLGRMWRRMMVRFRVPRLWAALTNSFSFRASTLPRTVRA* +>PROKKA_00002 Glutathione transport system permease protein GsiC +MGRYILKRLWHTVYVVVGISVIAFFFIHLSGDPVMLMLPADASHQEIEELRQQLGFNDPL +YVQYWRFATKAVQGDFGESLYYHVPAMELILERLPASLELALAAMAIALVVAIPLGILSA +VKRGSFIDLGSMLGALFGLSMPHFWLGIMLILLFSVKLGWLPTSGRGGWEHLIMPSLALG +MSLMAMFARLTRSVMLEVLGQDYVRTARAKGLKERLVIGKHALKNALIPLVTVAGMQFGF +LIGGTVIIETVFAWPGVGRLVVQAIFNRDYPLVQATVLVLAVLFVLVNLLVDLLYVYLDP +QISYLEEK* +>PROKKA_00003 Heme-binding protein A precursor +MKKFKRLCLALGVAALGLAILAGPALAKKDVLVVIQEAEPVGLDLMTSSIQTTMSVCYNI +HDTLFAPQEDASVKPRLAESWEKVDDLTWKIHLRRDATFHNGEPVNAQAVKFSFERSFKP +SIKNPHKGKLSAFKEVKVLDDYTLLISTKEPYAPGLYILGYYLPIVPPGYIKKVGDAKYN +TNPIGCGPYKLEKWVRGEEIVLTAYDKYYGPKPAFKKVIFKGVPEEASRIAALLTGEADV +ISGVSIHQRKRILASGKAYLTNQMGVMPYLGLNTYKPPFNDVRVRQAMNYAVNRELINKA +LFGGKAILCAGPISPRTFGHDPNLKPYPYDPAKAKKLLAEAGYPNGFQTRLAYPTYMSQI +QEQAEAIAADLAKVGVKVRLEPYERAVMWQRYKARKHAMYIYWWDDAPEPDRYMYSLFNS +KVRDYYYKNPEVDKLLDLGRTILDRKKRAEVYHKIDRLLYNDAPWVYLYVIPEVFAVSNQ +VAYQGRRDGFLDMRTAKPK* +>PROKKA_00004 Oligopeptide transport ATP-binding protein OppF +VAEVLRVKELVKHFPVRQGFFGRRQGVVHAVDGVSFTLEENQTLGLVGESGCGKSTIAFC +LLRLIDPTAGEVWFQGRNLAAAGSEELRRLRRDIQIVFQDPFGSLNPRMTVAQIVEEPLL +NHLELSAARRRELVAEGLSMVGLLPEHAQRFPHEFSGGQRQRICLARALVLRPKVIICDE +PVSALDVSVQAQVLNLLSRLQRQLGLSYIFVSHDLAVIRYVSQRVAVMYLGRIVEQAGVK +ELYARPMHPYTQALLSAVPVPNPRRRRRRIILEGDVPSPLDPPSGCHFHPRCGRAMEICR +HQAPELRPLADGHLVACHLYDEVRSAPGGTVEGG* +>PROKKA_00005 Oligopeptide transport ATP-binding protein OppD +VARAVDGVDLTVGRGEILGLVGESGCGKSALALSVLRLLPMPPAFFAGGQIRFKGRDLLK +MDPEELRRLRGNQISMIFQEPMTALNPVFTIGNQLGEVFRVHQGLARREARRRAVEMLEM +VGVPAPARRVREYPYQLSGGMRQRVMIAMALACRPALLLADEPTTALDVTIQAQILELIL +ELRDELGTAVVLITHDLGVVAETTERLAVMYTGRIVEQAPTVELFDHPLHPYTRGLLEAI +PSAEAELADKELHEIRGVVPSLLDLPPGCNFAPRCHLADERCARQEPELVEVRPGHRVAC +WRVDRG* +>PROKKA_00006 Primosomal protein N' +MTLMVDVALAAPLWQPLTYAVPAELAPLVKPLSRLLVPLRGGARLGFALGEPLAAGGGQD +ALKPVLDVLEDGKGPQVWPPELLPFFQRAAAYYHVPLGQVLAWCLPAGMGSARPAKALAP +KTQQVAVVSWRRGEDSRLPRPESQAARILRRLKARGPLPLPELREEFPRAAALCRDLEKR +GWVTISHRPLVKDLLGRPLLPEPEPEHYTPDQQRALDELLPAVHSGGFKSFLLHGVTGSG +KTELYMACVKAALEAGRTALLLTPEIGLCLRLEGLLRQRFGAGQVAVLHSGLSPAARRGQ +WLAIARGRARVVVGARSAVFAPLREPGVICVDEEQDEAYKQEDRFRYHARDLALLRGREQ +DCPVVLGTATPAVTTYHRAQEGNTVCLRLPRRVREAPLPRMELVDLRREGRLVGGFLSRR +LLAALEQTLEAGEQAILFLNRRGFAPAYLCTACGQTVGCPACAVSLTLHQGSDRLVCHVC +GHQRPRPRTCPACGAGEEKLRPLGLGTEAVAQKLGELLPGARIARLDRDTAGDPRRLGEL +LRAIAERRVEVVVGTQMITKGHDFPGIGLVGVLSADQALALPDFRAGERAYGLLTQVAGR +AGRQGGKSRVIVQAYDPDHHALRAALAQRPDEFYQTELAERRALGYPPFMRLVALRLEAV +DDRRCQRAAQALAAGLEEARRRLEPGARVLGPAPAALPRAKARHRWMILLKAPTAAAAGR +TLRLGLHRSPPLPAGVRLLVDVDPVSLI* +>PROKKA_00007 HTH-type transcriptional regulator IscR +LALHTMAYLAAHPGRLISNRVIARDLGVSAAHLSKVLQRLARAGLLESLRGPTGGFRLGR +PAGEISLMEVYEAIDGKFQPSSCLLGRPVCRGGKCVLGELGRNLERQTREYLLNTKLSEF +EDFMCFEEGN* +>PROKKA_00008 Anaerobic sulfite reductase subunit C +MPLGKGRGGALPWDPRAEAALGRVPFFVRSLVRRKVEERVAEAGGRRVGLEDFQEAEAAF +RAVRAGKSQKELEAMLPAENRPGVEMVVVQACRSRLSNCPNPLIDTQKWLERVQAWVEEL +DLSERLRRRVADDKILFHHKLKIAIAGCPNGCSRPQIADLALVGMTRPRLVEPEVCTACG +ACAEACPDGAVSQDDGPPEFHRELCQGCLSCSRACPVGGIELDPPGVRVLMAGKLGRHPH +LARPVMEATGPEPVLAYWTRELEEYLASAPPGRRFSAWWLEQHPAG* +>PROKKA_00009 hypothetical protein +MPIPGRLLTTAMAVMPHTDVDQALASALSLDIPFWPQLPRVNYYEDMYVQASEHFPGMVV +DHKERTLVFSMDKFMVELEETLAHLEEPEYFDISPEYSVVYHRFLELELADRPAIRGQLE +GPISFGLNVKDQDDRPILFDDTVRPFLLEVMARRVNVQLTRLKARNPNAFMFVDEPGLQF +IFSGLSGYSDRKAKEDLDQFFAAIERPRGIHLCGNPDWDFLLNLDLDILSLDVYSNGEVF +SSYARSIKRFLDRGGVLAWGLVPTNFEPFSAEDHVSLKARLKEIWSALESKGVDRELMLE +RSLLSPATCCLVNPDGEKTVDKAFALVRALSAELRDEYGLDG* +>PROKKA_00010 diguanylate cyclase +VEVDISLFSNQNQFVILRVGEQAYALPAAQVREMQVLPEVTEVPRAPAHLRGIISPRGEV +LPLFDLRRRLGMRSLAEEADELLKILEAREQEHKQWLEELESCIREEREFTLPTDPEKCA +FGQWYQNFTTEDLALASVLERLAAPHRRVHEVAGAALEALEKEGQAAAQEVIDRARRIIL +PKLLELFAELKRLIRETHQEIAVILESGRHTLALAVDNVDSVELLQPKDLQNLERFGPVD +GSQDLLESVGRRANGETVYILKTAEFFQAATDLTF* +>PROKKA_00011 putative phospholipid ABC transporter permease protein MlaE +MAHGQEPAYEMKSEPAGNGELRVDLSGRLDMNALEGAVDQFGRLLKEQRPRRVELAVGGI +DYLDSGGALALTLMEEAARKAGTKFQLVQAGPEVRGMLALVDMDKIRRRPLRPAERGLGF +VEQVGQASLEVWRDFVELVTFLGDFLIALGRSLRRPRLVRWQETFFYMEQVGVNGLPIVG +LISFLLGLIIAFMSSLQLKTFGADAYVAALVSVAMVRELGPIMTAILVAGRSGSAFAAEI +GTMRVNEEVDALEVMGFDPTDFLAMPKVLAALAVVPMLTIYSCVAGILGGMVVGIWGLGL +TPYTYLHHTIDSLSAYGIVTALIKSVAFALIIAGIGCQRGFMVRGGAQAVGSATTSAVVT +AMFLIIVADSAFAILFYYVF* +>PROKKA_00012 Arginine transport ATP-binding protein ArtM +MSAEDPIIEVRGLKAQFGEQVILRGVSFAVARGEVVVVAGGSGCGKSTLLKHMLGLYQPA +AGSVLIDGVDIAQADAAQLEWVRRRIGVLFQSGALLGSLTLLENVMLPLVGFTPLSRRGA +ELVARLKLSLVGLSGYENHLPSELSGGMQKRAGLARAMALDPQVLFFDEPSAGLDPVTSA +ELDLLIKRINRNLGTTMVIVSHELASIFEIAHRVILLDKQAKGIIAMGPPQELSGVPRLF +PGGIL* +>PROKKA_00013 hypothetical protein +MTAPRWSATRASASSQSSRAPPGRERCSCRRPSSRRRRAGSSSRARPREPPLMAALRRAS +WGSSPRRVSSSTTSSGSRGEKFTRRQRLTRVGSTSRGSCTVSTRMLLGGGSSRVLSRAPE +LARLSRPASRITTMRQPPPKGAKLSVRITSRTCSTLIWAEGESEGGSTTKKSSWLPAARR +RQGPQAPQGVSPGSWQLAAANRARAVVVRPTPRGPTKR* +>PROKKA_00014 Thymidylate kinase +MKRAPFITLEGGEGCGKSTHASLLAQRIRELGLPALLTHEPGATELGGALRRLLADPAGP +DPCPQAELLLYLADRAQHLEQVIRPALAAGEAVVCDRFADSTQVYQGLARGLGADRVREL +NRWLCGDTWPDLTIVLDLDPALGLARARHRQGKQGLDRLEQAGGEFHRLVREGFLELARQ +EPERVRLIEAAGSRPEVARRIWEVARPLLESWRKTREA* +>PROKKA_00015 Glycosyl hydrolases family 43 +VQFVQSSYPLAGLWRPGQETPPASGDTSGSGGVSGGAGFARMLDGKLRQGEPADLSTGAT +RQTGGPVRYSPHSPLLVGASIGAAPGLFTAPVVWSDPTPAAPKGAYRMHSANPRSTFPLR +TVPPPPAPPPSSEHENEDVGLPSAISTYHGSRFRAEVQVGERQEVNADVEQFHFPHLVQK +NGLTYAYFIDHSHGSENDVGLAVSKDGVNFQYQGKVLTKGPEGFDAQMASFPAVQYDGET +NTWYMLYEAKADHDDLNTVCLATSPDGRNWTKHGPVIEPGDAGEISAVDVGTPTMFKEGG +QWHVYFHTLAKDGRVRIGYAHGENLQDLTVNQGPLLDVDPQGIEGGTVGARSNVVKVGDF +YYMAYEVCSPNTDFHRSQWGTNLARASSPGGPWVKMSGRPLLVNDRPGMGMDGPELSLQD +GKLYLYYRHGANATARVELSGLGDSSKMYLAHQSSPGVPV* +>PROKKA_00016 hypothetical protein +MSKLFGILAGLIGIFLLVASVISFGHLVEDAWVRGGMLASLLFALLLLLGSAAFLLTAVL +LFRMRSHYLPRLYELEELEGLEEPPPKTKDSGESNGPRLA* +>PROKKA_00017 Flagellar hook protein FlgE +MEARTLFNGALGVKAHVRGLESVSDNIANVNTYGYKATRAQFSDLLYQEMAGGAGFPQQV +GNGALTAVENMMMQAPLEPTENVLDMAINGRGFFTVKHPDRNEGNRYTRAGQFYLDKDYF +LVNSEGYRVQGFAVDADGNVNVNQVQDIQIDNQIQDATATTSVDLAVNLDASDTTEFRQA +VAIDPTDSGTYNFRMGFQVVDEDGDTQDIAVFYQKLESYTGDAPAGSQSVWKAATFHNDS +GTLTADPSYPDNTFFLHFDTNGQLVGVTTGTPATGDSYTSNAEVSSTSASVSDRLGETFA +YTGAGNTQTLRSTATITFSGTTTAGDTVTIGGTNYTFAALSPSDAAAWLADQINANSAGS +YYAQDDASGTVTLYAKDGTAAAEVSASSVVISTDDTMSLTELVNTVDSGRKATGSLFVNI +AGLTAGSSTVTVAGHTFTYGPAQDFTTLSELTTLINDLSEVDATSSGHNIYITAASVGTS +GNSLGLATNDAANVAVSASTLLNGLDDSDATNIDASATTGSGGGQALKLDRTDVGASATI +DVATTNTLGSNLGLDFTGGNFTQNSTASDGNGTSNTTGEVPLTFTFTKSGSTLTQQVTLD +YSPTDGDDSTMLAGDYETFYLKTDGRGTGYLKYLEIDDQGLITAHYTNGQGVPQAALALT +TFIAPQELLREGDNLWRATAAAGVPTVAQAGDAQTAMGEVKSYALELSTVDLAQEFVNLI +NYQRSFQANSKSIITGDEMLKTAINLKG* +>PROKKA_00018 Metallo-beta-lactamase L1 precursor +MDHAALPQTLAPGLYRLGSYHLACFLVETPDAALLFETGMSLVAPLILAQLDELGVPREK +IRWIVHSHAHSDHSTGQAALLEALPRAELLLSPTSRRHLAKPSTAEQFAKEDDSTRRALE +RIGALPPGSLPDPLPLLPARHRTVEPGDTLDLGGLTVELRSAAGHVPGGLLAWLPELGAF +LASDSAGFHMAARPNYPLYFTGYREYLRTLEEIRRTNPELLCLGHQGWFRGGEARRYLEA +LKAHLAFEHATIWEAHRRGEDEESQARRLVERYYHDELAIYPRDILWYCCRLLVRRSLEA +GA* +>PROKKA_00019 Acyl-CoA dehydrogenase +VNYEPCVKHKVVRNTVRDFAEAELRPIAHEVDQNSRFPWEVVEKMRGLQYFGLQAPRELG +GAGLDSISYAIAIEELSRVCAGIGLCVTVHNSVALYPLLKFGSPEQIERLALDLISGRRI +GAFCLTEAGAGSDAGAVETLALPCDEGYLINGTKIFVTNGGVCGLALIFAKTDLDHPRGA +PSVLMVEKERSGFAVGEIEDLSGMRCNPVSSLFLEDCLVPPENLLGRRGDGLRIGLSALD +TGRLGIAAQALGIAQGAFEAAVRYAKERQQFGKPIARFQTIQNYLADMATKIDAARMLLY +RACAAKDQGQPFSAEAAKAKLFCSATAREVCNLAVQIHGGYGYSKEYEVERYYRDAKVTE +LYEGTSEVQRMVIARAILSAPA* +>PROKKA_00020 Acryloyl-CoA reductase electron transfer subunit gamma +MKLVVFLKQVPGVTEIPWDPASGHLRREKAPGMMNPACRHALEAALILKEQHGGELTAIS +MGPPAAEEILREALALGADRAVLLSDPRLAGADTPATSYTLSLAVRAVCPDCDLLLLGNQ +TSDSETGQVGPHLAEELDLPSAINVEELELDGEVLRVKRLCDNFLETLEMDLPALVTINT +QGHPPRQVPLGGVEDAFSRGEFLVLNAEDLKADLARVGMTGSAGRIVKVYPAGGERKGEL +IKGAPKRCVLELLERHGDLLGGYLRKDLGGGR* +>PROKKA_00021 Acryloyl-CoA reductase electron transfer subunit beta +MSRRQQENGAVWVFGDYRNYFQNRVTLQLLARARDLASHLDTKVAVVVMGYRVGRWVREY +VAHGADVVYVLDHPSLKYYLVQTYTRLMERLAGEHQPQIILVGATGFGKELAARLASRLG +TGLTADCVDLTVDDQGRFIQTAPSFGGNLLAQIMIPQARPQMATVRPGTFQELPHDADRR +GEIIKLPLPDDLPPEKARLIHSRRIKPRRRKLEKARVVICGGRGMGSKKKFKNLYALARL +LGAQVGATRPVVYQGWAPEDALVGQAGRDVHPEVLFSFGVSGAIQHTAGIHDAQFIVAVN +KNPAAQMMKMADVAIAADANQVCLALIRELKARLEKKK* +>PROKKA_00022 Isopentenyl-diphosphate Delta-isomerase +VNYTDPQELLPVVDAQDRVIGTMTRQEIHAKGLLHRAAHVLLFDPAGRLYLQKRSAAKDT +YPGKWTSSASGHVDPAESYAQCAARELAEELGLEAELRPLGRLPAGPRTENEFVEIFTGV +SAEPPRPNPQEIETGRFFTPAQALKLAADPTRACPSLGAVLELWQELEGD* +>PROKKA_00023 hypothetical protein +MESEVLQRVAEMLRSPGAVARNRNLLEFESEAGQRAWRCYRLFLSLLAELERAAQSPEVR +VSAQETEGGLQLVLVDPRVSYRRSCLVPPELVELFLDKLTALGLLGGEKT* +>PROKKA_00024 4-chlorobenzoyl coenzyme A dehalogenase-2 +VSRPRYVRTQRHGPVTVVVMDNPATMNAMDQDMGPRLVGALESLAADRSVRAVVLTGAGG +RFSAGGNLTRAEEFLEENPGRGAAPVFAQYTIWVHRLLAVLTRLPQPVVAAVERAASGGG +LGWLLACDLVVLAEDARLSTGFLAIGLAPAAGVSWHLPRLVGLPRAAELLMLGRTLGADR +ALELGLADQLTPPGGTLEAALELAGELARGPAQALAATKQLLGGAARRGLFPQAEAERRA +VLHTADQEEFARRLERFRQRRRRS* +>PROKKA_00025 hypothetical protein +MPMDWTPPPRGGGREPDINQVVQNLKNRLPVFKKARGLWLAVAVVLAIILGASSYYTCS* +>PROKKA_00026 hypothetical protein +VSRDWFPAKVRFMSGGRAQQTPLAIRVEGRWLEVRLLGEELVAPESGLAYVRRYRLEDRR +GRRWELRQRQEGWFCRELH* +>PROKKA_00027 hypothetical protein +MDQSKALAKYREFRPAWKSFLPYFLGVVIFTVGPRVNPQAPISPDLSHLIATCFLAFILI +TRFSNLYELADGRLGWRRSFPRALERQAPVEQITRIDLRRGIFHRLAGVAHVHVYLENQQ +EPYLKLFGVSEPEELRRLLLDLGASDQRVTGAWRK* +>PROKKA_00028 hypothetical protein +MALLVSFFGKGRRGACLLGLLGLLLLAPPAVAAGAGYVVAPVNKGADLRLLPSEAWPPPE +GSRADILIKLAYLRGIMDALQYLELAPRSAERVLNAYQGLSLQDLAARIDAFYLTDPRRR +DLPPAVVLFRLLAPAGKDAPAVKGPARPGGQGGRSGK* +>PROKKA_00029 hypothetical protein +MQRSSGSITQLARLVESLAELLRLHYGPDGLPAAQEEDLCSQEVEGRRLGELLAELWPDS +GPWRGPCLGQGAIRETERLLRYRLQQAEAQNRALATRLERVRRQVAAQRRTLLEQLRAAR +LQVEQGQARLEALSAELGGLRAELERRRERARRRRRYLEGVVPRGMNRFADAGGRILDQA +TLRRATRELRELQALVVRTKDACARLEHRWARARLELARARSHEAGLRAELARLEPYWQA +KTHRLARAKVVLAARQEELGPLERNLHRLRVMGLAHAEVVSRGRAALEPLLAPLASGESP +DPVESLEESLTQAGEQARRGRRLTALMERLGRRLERRLEAIGPVLKEQRRLNKEITRLEN +ELPGLLEPLLAGDGADPRNRQEAGARFSLLIARLEDLIPQARATQEQLDELRQALTIGLS +RGKAWQAAWRRAGKAERAALHQAQALVEEVRLAARQAARQAEHLRRRAEPAVKALSPLRS +QDLLPSLAALAQGVSRGQLKARQLEARAAELEGRIPRPYFGNLSKPPVALKPVSAGLRRL +SGKQVELERLAALERAARRWQGLLDGPLVEEIRRPVEQVALRLARSLTLLERQKNLLASR +HQKQGRELSTLKAELDQRRRREELARRRLEQVRDRNRRQQRTIRNYETELKQAQTRAALA +QRLEDELARLGEHAQTLARRLERSDKLAAALKRKSLERHRLYRRSQYAVEWLDYWRERAL +EQEKLLSSARAELELARREYQQARSLLASAVSERDQALKELATERAARARQALDLLGGKA +LSVELAASRSEAGRWAKLAQDMALALAASGEHHRQETADLRAQVDQLSAEAAMLKRQLER +IAAMVEVQVPGLEELADLPPAPSWRRPVALRLVPLGPKQVAQALDRLSAARRRLQNLGRG +TLGHWALIAALTCGLVLTPPGTPSKATRADAPLKPPRPVLRHLAQGSPLTPIYQVPAQAR +LLGDKVARGSLELNLLPLRGQPVAVPQSVKRRLKELAREAGLSPKVLLTSARALYAGQAA +VDPSALEELAHTARQLARRHPLIFRELSRRGLPPAASAVAALAPEPEKAQHLFLDRLYRE +YRSLGFSAEEALGALAANQRAFHRLTRQWTPPRRFIGKVQPVEAVEKMGLREFLQKITPY +IQSKLKVFLRQRGMSYSGDLTLYAKNLAFDMYCAAKKFQVPVTLLLAIAHQETWYANVLG +DANRSASPFQIYEPTRELIIKSMAEAGFVPPPKRIKLQRHLTMATFMASFHLRELMQRAY +TPPRRGRQAVVNLDRVLQRYNGSSRYAAQVALRKRQLARFLRRQG* +>PROKKA_00030 2-amino-4,5-dihydroxy-6-one-heptanoic acid-7-phosphate synthase +MIGKRIRLERVMDRGTRRTVLVPLVHGVGMGPIEGITDVLNTVDTISMGGANAVVLHKGI +VAAGHRRGGADIGLVIHLSATCADGSQTLVTEVEEAVCLGADAVSLRIEVGGADEDESLA +LLGAVSRVAADWGMPLLALMNPAPIPPAKMPKLLMRAARIGAELGADVVLVPYHKRFAEV +VAATPVPVVAIGGVKKTPPKQMLEMARAAVDAGAYGVSVGRTVFQYEKPGNMIKAICQVV +HRKATVKKAMEILAKKPIESTLYGGTVIW* +>PROKKA_00031 MmgE/PrpD family protein +LTTLETLGRFAAGLKADELPPRLGEAVNRCLIDLLGAACAGHGSGAARMVRAVAGPLFAA +GPAWLWFSGRRLASPGAALANAAAASAWDLDDGHRAAAGHPGAAIIPACLAAAQETGASA +RELEAALVLGYEVACRVAAARDLARLPTMASGRWVAYGVAAAAGRLHGLDAAGLAQAMAV +AGVLSPDLAAAGYSRLMGNLVKEGIPWATLTGLVAVKLAAHGFSGPLDILDHPDYYQAPG +ITAGLGGGQWAIEQVYFKPYACCRWCHAAIDALLALQDEQGLDADEIAEIQVHTFERALR +LSNETGPATLEGAQYSLPFTLAVAAVEGAAGLLPLRPELLGREDLVELAGRVRLEVDPEL +EAMFPERSPARVVAMTRSGRRHEHTVLDPLGDPANPLSTARLEEKFRALTAGLLPPSRQQ +ALLVRIHALEPEGLPPLLEELGRPLQPEK* +>PROKKA_00032 2,3-diketo-L-gulonate-binding periplasmic protein YiaO precursor +VARLSEDERRIRRMVGSRIRELRQALDLRATELASRAGISQSQLSKLENGKAAISIPVLT +SLCRVLDRPLSYLFQKEEEIPRVLGTMTTVSGPENRGLEWFAAEVNRRSGGRMSIIPLWA +TMLGSAPDQVAMLRQGVIDLFIEELIFYQHIAPAVKLISLPYVFADDAHLLSFLESPFFQ +ERVHGPLTKSGIRILNRRWNWRRGLERVLVARQPVTRPEEVKGLKVRIFDSPALARFWEE +LGARPVVVPWPRVREAWEAGEFDLLPTHRSHLYPLGFCRQGRFVTLLGDVPPALAVTVNE +QKYLSLPPAVQAALEESCDAAGGFFSIEIRRAEVDNQAANLAEYGAVYLKVDLEPWRRAA +GRVVERMAREGAVDLDAWQAVQELRPAGEGA* +>PROKKA_00033 Lysine 6-dehydrogenase +MSNRDIHNATGDSVKMNALVLGAGLQGKAVIHDLSRSELVDRILVADLDLAAAQRFLDKG +GYHKVRAVQADALDPAVLRRLISENRSDIVVCMLPAHLSGRIAEVCVECGVPFVNTSYAQ +WLGELDQRARDKGVILLPEMGFDPGIDLIVGRMALDELDQVEGFYSYGGGLPDPAACDNP +LKYKITWTFDGVLKAYCRPARLLRQGRPVEIPGDEIFQEENIHFIEVPELGRLEAYPNGD +ATRFVEVFGLGPELKEMGRFATRWPGHSAFWRIMAKLGFLGDQPVELGEGVSVSPREFLV +KLLEPRLQFRENERDVVVLRVKVWGRRGGRKRTVTYDLVDYRDLATGLFAMNRTVGFAAS +IGAQMVLKGEITGAGVLSPVKVVDGQRFLDELAARGIKVQRRLEEE* +>PROKKA_00034 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00035 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00036 D-3-phosphoglycerate dehydrogenase +MFKVMIRDSMSPVAREILTATGKIEVVTDNDKAANAPEVLAEMIGEFDGLAIRSGTQVTR +AVMEKAGRLKIIGRAGIGVDNIDLEAATRQGIVVMNAPGGNTVTTAEHTVSMMMALARNI +PQATASLREGAWDKKKLIGVEIAGKTLGIIGLGHVGRIVADRARGLKMRVIAADPYVSCD +AAARINVRLVSLDELFSASDFISLHVPRLKETVNMINADTLSRMKPGVRIINCSRGDVVN +VDDLYRALESGRVAGAAIDVFPKEPPDASLPLLKHPRVVLSPHIGASTGEAQVKVARMIA +EQMAACLIDGVITNAVNFPSVSMEEMARVV* +>PROKKA_00037 Ferredoxin +MNDPIYQKLATVLDTLPNGFPATEDGKEIRLLKKIFSPEEAELFCDLKLTFETAEQIANR +TARPVEELKARLSVMQEKGQIFGIDMGGVGIYKMLPWAFGIYEFQLPHMDRELAELCEQY +GKTYGKQFFANKPQLMQVVPIESEIKAEHEALPYERVSTIIENSRSFMYFDCICKKEKGL +MDEPCDKPVQVCTAFAPIPGVFDDHPYGKTMTKEEAYQLLNKAEEAGLVHLTWNVKSGHF +FICNCCGCCCGVLRGINELGIDASKVINSYYYAQIDAEACVACGTCADERCQVNAIMEGD +DAYTVIAEKCIGCGLCITTCPGDAISLVRKPAAQIETPPDDEMDWYEKRAQLRGVDISDY +K* +>PROKKA_00038 Nif-specific regulatory protein +MELTNDEREFFALVNRASLLNPFSDERNDVDLKLAGLPSAAPGTGRVKKAIQSVNERIRQ +LETDGRADISQYTGRDRELVEKAFLFELFYRFRKQFDELIESQIASDDVPARIPFYNDAF +SAMQKRGFTEEDFRRYFALAFQIRRAFYFIGRSLVGNSASMKSLRLNLWNNVFTHNMDLY +DRYLWNRMEDYSTLILGETGTGKGAAALAIGRSGFIPLKKKSFEESFTRSFISLNLSQFP +ETLIESALFGHKKGAFTGAIENYQGIFEQCSPYGAILLDEIGEVSKPIQIKLLQVIQDRV +FTPVGSQTRSRFNGRVIAATNRPLETLRGKGFFRDDFYYRLCSDIIVVPPLRQRVQEDPT +ELDVLLDFTINRLVGRSSPELVQIVREVIDRHLGNDYPWPGNVRELEQCVRRVLLKGIYT +GDAAVADIDLCRSLTTGIEQGNIDANSLTSGYCYLLYQRHRTFEEVARRTGLDRRTVKKY +IQDWTSSHSTDNPPETDIPG* +>PROKKA_00039 hypothetical protein +MKIQQIIIREFEEMMAELKEVLAKMTCPLLGEDWLPM* +>PROKKA_00040 Formate hydrogenlyase transcriptional activator +MAVDKNEFFRQATIRICGSLDIETALERCFHYLEQMLPVDEIGLYLYDPGLNVFQRIAGV +KSHGKNEFSPVSPLPEASKEKWSAIWADMGDITIINRVEERPEIQEVIEMYGLEHDISLM +SMRLELEGKRVGLLLLRTRGRDRYEEKHARLMLLLHEPFAIAMTNALQHQELIRLKDILT +DDNRYLRRQIRDLSISEIVGADLGLRHVMEMVQQVTQLDSPVLLLGETGVGKGVVAHAIH +DASPRKNAPFVSVNCGAIPESLFDSELFGHEKGAFTGAIAQKKGRFERADKGTIFLDEIG +ELPPHAQVRLLHVFQEKIIERVGGTTPISVDVRIISATHRNLEEMIRSGKFREDLWFRLN +VFPIHIPPLRQRKEDIPALVHHFIEKKTIDLKMQEQPRLSPGAMDQLMAYDWPGNVRELE +NIVERALIQFKGGMLRFDGLIFSPIASSRGGGHETTDRFLSIDEVNAIHIRRALKVTNGK +INGPGGAAELLGINPNTLRKRMNKLNIPYKKKEIGQGVD* +>PROKKA_00041 hypothetical protein +MRFKTLKYILVPVLAIGLSGCASTLTLLSPPSSRLVQGKNTAGAFNSYEYQYAVRGNKIY +IKRTPLCDEVKHVMRVEQKREIGYGPALLELPLFGLGLVDIANAHAISVNSKKVTPLADY +NTGKLMACGPLQPAANEKVIIENKNLNLYRMVRTDKNGVVNLDKVLSGIGNNVNLSVRLA +NNHNVAFSCMYIANR* +>PROKKA_00042 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00043 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00044 Pimeloyl-[acyl-carrier protein] methyl ester esterase +MWQEHIAGRKIGLETGPRPFDPHKPCLLMVHGSGGRGETFRPQLSGLAPYLNPAAIDLPG +HGNTPGPGRDQVAHYADWLAEFIRRGPLRPALLGHSLGGAIVMQLALDHPDLAPALVLVG +TGSRLRVLPAILDGLLSDFDATLDLVLKYAYAPGADPRWVQAGREIMSQPGPRVVHDDFA +ACDRYDITDRLGEITAPTLLIYGDQDQLTPPKYGRFLAERLPDARLEIVAGAGHMVNLER +HAEVNRLIHPFISAFSPPASS* +>PROKKA_00045 Secreted effector protein pipB2 +MSETPRRPRDSRYMWRGIRPSEEELKTILEDHAQWLERLRSWEYSWREFIEEIPPPHDLS +GADLLEADLSDADLTWAKLSNAILFEADLSNADLREADLSNAKLWWADLSDADLTWAKLS +NAKLLAADLSNAELWWADLSNAKLIKADLSNADLTGADLSNCDVTGVRYHGPWLGIPFIQ +IRKPNKLTCRGIRADTCYGSPRFRRDVMDQDFLEEMRETTGGRWLYRLWWLTSNCGRSFI +RWAFFSLSLAVAFACVFCSSLGPECFDLHRAEGSRWVAEVAGRYLEVDPAYLGTTAASRG +LPGDFWTMLYYSLVTFTTLGFGDVVPLTPWAAFWVTIEVVTGYIMLGGLVSILASKMARR +AG* +>PROKKA_00046 2,6-dihydropseudooxynicotine hydrolase +MRARLFKWAGVFFLSLAMAGPAWAASRATPHRRPVLKLHFADVSYDYELKRAMSYAVSGG +ADINECLTAARAITAGDGESWYRGWHRMARRLDQMADQALKAGHRQTARQFWLRASNYYR +AAEFFLHGNPKDPRILSAWGASRRCFRQAARLMDHPVEVIAIPYEGHKLPGYLVKPDASL +KPRKTLLLQTGFDGTGEELYMEVAWYAIQRGYNVLIFEGPGQGGALREQHLYFRPDWEKV +VTPVVDYALTRPEVDPKRLALMGLSMGGYLTPRAAAFEHRLAALVADPGDFDMMVGHRPT +PAEWAGMKKYPKQANQALRAKMKHDTGFRWLVNNGMFTTGRKTPLAFLEFFSRFELTPKI +AAQIKCPTLVVVGAGDHFASPKWQRLLYDNLTAPKTLLRFGPDNPARQHCQVGGLLWGNA +KIFDWLDQVLR* +>PROKKA_00047 Methionine--tRNA ligase +MQVVQAILSKVHEAGDIYFAKYGGHYCVGCERFLTEHEMIDGKCPDHGTEPVYQEEENYF +FRMSDYTQPLKEYIRANPDFIRPERYKNEVLAILDQGLEDLCISRPKTRLTWGIELPFDQ +NFVTYVWFDALINYLSGLDWPDGELLERFWTGPKADPQHLIAKDILKPHGIYWPTMLMAL +AKAEGRPLDHYLYRHLNVHGYWQVGEGKMSKSRGNVVKPLDLAGIYGVDPFRYFLLREMT +FGLDASFSEDLLVERYNADLANDLGNLFSRVLNMLSRYRDGLLPELHPQELTEADREMKG +ALAASLGPGAEHDFQAQVREFRFHTALADLWSQVRRANKYIVAREPWVMAKDPDRAAELD +NVLYILVQLLASVTHLAWPVMPATAEKMAAMLGLELVVPVDWQRLFALELMTPGAKAEKP +QALFPRVQTDKVKAKAARKEAKQAQQQPAAKGGGKQKAKPQDKAGLITIDEFAKVELRLG +RVLEAGAIKGADKLLKLKVDLGEPEPRQIVAGIARHYRPEELVGRQVVVVANLKPAKLRG +EISQGMVLACVAEGRVRLVAPEEELPPGSVVR* +>PROKKA_00048 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00049 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00050 Protein translocase subunit SecY +MFLAGALIVFRIGIHVPVPGVDPTAYAHLFNQNAGGILGIFNVFSGGALEQMSIFALGVM +PYISASIIIQMLTSVVPSLEALKKEGQAGQRKITRYTRYSTVALALFQSLGAAFALQSQG +VALTAGPGFIVTATVSLVTGTMFLMWLGEQVTERGLGNGISMIIFAGIVAGLPSAIASTL +ELVRNGELSSIVAILIFVGVLLITAFVVFVESGQRRITVNYAKRQQGRRMYAAQTSHLPL +KLNMSGVIPPIFASSIILFPVTLAGWLGQSSGFGWLNTLQLWLSPGQPLYVALFAVLIIF +FTFFYTALTFNSDETADNLKKSGAFIPGIRPGKQTAGYIDTVLTKLTLWGALYLTAVCLL +PEFLIAYAHVSFNFGGTSLLIVVVVAMDFMGQLQAHMMTHQYEGLLKRARMRGLQR* +>PROKKA_00051 50S ribosomal protein L36 +MKVRASVKKICRNCKIIRRRGAVRVICSDPRHKQRQG* +>PROKKA_00052 30S ribosomal protein S13 +MARIAGINIPPHKHTVIGLTAIYGIGRTRAAEICATAGVDPTRKVKDLSESELEAIRQAL +TAYKLEGDLRRELNMNLKRLMDLGTFRGIRHRRGLPVRGQRTRTNARTRKGRVRRSAKR* +>PROKKA_00053 30S ribosomal protein S11 +MAKPAAKARKRIKQQVVDGIVHVHASFNNTIITITDRKGNTLSWATAGGSGFRGSRKSTP +FAAQVAAERAGAVAQEYGVKNLEVEIKGPGPGRESAVRALNNLGFRVLSISDVTPIPHNG +CRPPKKRRV* +>PROKKA_00054 30S ribosomal protein S4 +MARYIGPKCKLARREGTDLFLKSPIKALDQKCKIDRIPGQHGQATRRGRMSDYGLQLREK +QKLRRMYGVLERQFRRYYKEAARRKGATGALLLQLLESRLDNVIYRMGFASTRAEARQLV +SHKGVTVNGQLVNIPSFEVKGGDEVALTERARKQNRVEMALEISRQIERPAWVEVDEKAC +KGTFKAMPEREELLPDINENLVVELYSK* +>PROKKA_00055 DNA-directed RNA polymerase subunit alpha +MEISVSEFLKPRIAGLTELGENRTRIVLEPLERGFGYTLGNSLRRVLLSSMPGAAVVEAE +IDGVLHEYTAIDGVQEDVVEILLNLKLLAIRMHAREEATLTLNATGAGVVTAGDIQVDHD +VEIVNKDLVIAHLAKNGKLSVRLKVMRGRGYMPVVKRYADESQGRKIGKLKLDATFTPIR +RVAYYVEAARVEQRTDLDKLILDIETNGTIGAEEALRRAAGILTDQLSVFADLSSVSSHT +PTESRSVKPILLKPVEELELTVRSSNALKAERIRFVGDLVQKSEDELLKTPNLGRKSLTE +IKDVLARHELALGMKLEDWPPAALAERRAS* +>PROKKA_00056 50S ribosomal protein L17 +MRHRNSGRALSRTSSHRAALMRNMSKSLIEHEQIRTTVPKAKELRRVVEPLITLAKSDSV +ANRRLAFSRLRDDAIVAKLFTDLGPRYRERPGGYLRILKAGFRPGDNAPVAIVQLVEEQE +TTSAAT* +>PROKKA_00057 Dihydrolipoyl dehydrogenase +MRKVRVAIVGAGTAGLTALAQVRRRTDEFVIVNDGPYGTTCARVGCMPSKALIHIANDFH +RRRRFAEVGIAEGETLRIDLSKALAWVRAYRDSRTADSIKLTDPLGERNIPGRAELLSAH +ELHIRRADGGEERIAADAVILAPGSTPVIPKSWDGFSARILTTDTLFEQRDLPRRMAVLG +LGAIGLEMGQALARLGLQVHGFELRDRLGALTDPQLIAPAIEHFSREFDLHLGAPAELHP +TGEYWRVETADAQVEVDAVLAAFGRRPRLDGLGLERLGVPLDAKGLPPVDPHTQRVADLP +IFLAGDANARSPIMHEASDDGYIAAVNALDGPTPLNRRVPLVMAFTDPEMAIVGASFESL +PAGSFDAAGYDFSRQGRAIAMRHAEGRLRVYAERNSGRLLGAEIFAPEGEHLAHLLALAL +DRGLNVAELLRMPIYHPVLEEGLRSALRALARRVYDQPPQEFRRLPEGCPGMSSSSP* +>PROKKA_00058 putative methyltransferase YcgJ +MNPYERYLLPWLIDAVCALPAAARERAKIVPRARGEVLEIGIGTGHNLPYYAPRRVAGVT +GIDPGVLRRRIMRRAHAAGIEVKLLSLSAESIPAEDASFDTLVSTFTLCSIPDVERALAE +MRRVLKPTGRLLYLEHGTAPDPRVRRWQDRLTPWWKPLAGGCHLNRDIPRLITGAGFDIV +EQHSEYIRGPRILSYVFRGEAQPIAVAGSK* +>PROKKA_00059 hypothetical protein +VVFVRLNLDAVTSNNKASDPDYFFFPASIFEKSHRHERMPKFILKDLSDYQQYRDNRELI +SSFFENLPNKTLQLIAKPLCGSSAAKLIALVAFAVL* +>PROKKA_00060 Sodium:sulfate symporter transmembrane region +MLVVIYALTTILTAFIGNNAAAVLVFPLAYAAATKLGQPFLPYAIAIAMAASASFTTPIA +YQTNLMVYGPGGYRLSDFVRFGLPLNLIVGVISVVVIAWLWMP* +>PROKKA_00061 hypothetical protein +METKTLSGNATPLTDEQWAGLARLGDLGNRLGALVDGPLSGPASAALDRIGALDGQYDLT +ALAEKLVGTLSALDRAGLLDLLRDNAQFIADNLNTLTPMLDQWLAHIAELPADEFKADAK +FALALLRKARLVTTFIQEKLAGELTTKAVEVTEFMQRNDTDEAVAEALVQLGRIYRSGLL +ARLGDLADTVAGLEEGTDLDDQIDVLIKSSTAGGLGTFLIFLKSVSIAMQKVGQEPEPKL +GGYIGMLHLLRDKEVQKGLRMLTVLPIYLEKRLEKSAS* +>PROKKA_00062 Pyridine nucleotide-disulphide oxidoreductase +MNAKPHVLVLGGNFAGLGSAQKIREFAGDAVDITVIDRKNYLLFVPNIPADVFENKDPAV +GQRLDLPPVLVKDDIYFVQGEVTELDVDNRIVHYTPSERPGAAPQKIAYDYLVVALGNRL +AFDKIEGFDEFGDSVSDIYLGNKLRKKLWEGGYKGGPIAVGSAMFHQGDGAKGLEPYPGG +SIPDALAACEGPPVEVMLSAATYLKKTGQGGPEKITVFTPAELIAEDAGEKVVGQLLDIA +SGMGFNYVNNAKDITRVTAEGVELANGQTIEAELKILFPDWVAHDFMRGLPISDSEGFVI +TDLLMKNPKYPEVFAAGDAAAVTMPKLGAIGHQECDIVGRQIACAVGRMNEAAANTPLQP +VVYCIGDMGDNQAFYIRSNSWFGGDTQVLKMGHTPFLLKMQYKNLFFRTQGKMPDWGLDF +SELMAEKIAS* +>PROKKA_00063 Citrate transporter +VSWEAILTLVVLGAVVLGLAWPRMPPDLPLVGGLAILAVTGCAPIDKVFSGFSNPGLIAI +AALYIVAAGLRHTGAVTAPARWLFGRSRRLWVAQLRIMLPTAVVSAFINNTPVVAALLPA +VLDWGKRHRFAASRLAMPLSFAAILGGTCTLIGTSTTIIVNGLLTSTTHGPGMGFFTIGA +VGLPVAIAGFIYILLFGRRLLPDRQGAMGEFTNPREYTVEMRVAAGSPLAGQTLEAAGLR +HLPGLYLVEIERGGNLIPAPGPEELLEENDQLVFAGIVESVADLQKMRGLIPTTGQIFKL +DTPRPDRRLIEAVIAPENPMVGRTVREGRFRSRYGAVVIAVARAGHRVTGKIGAITLIAG +DTLLIEAPSEFQRRYRHSREFLLLRPLEESVQPHYERAWIAWLILAAVIGLVTARIVPLA +PAAIFAAVAMVVTRCINLAAARRAIELQVILVIGAAFGIAAALVHTGAAALIAQPLLALA +EGSPLGMLVVVYALTTILTAFIGNNAAAVLVFPLAYAAATKLGQPFLPYAIAIAMAASAS +FTTPIAYQTNLMVYGPGGYRFSDFVRFGLPLNLIVGVISVVVIAWLWMP* +>PROKKA_00064 UvrABC system protein A +MDTIRIRGARTHNLKNINVELPRGSLTVITGLSGSGKSSLAFDTLYAEGQRRYVESLSAY +ARQFLALMEKPDVDSIEGLSPAIAIEQKASSHNPRSTIGTVTEIHDHLRLLFARAGTPRC +PHHGLTLDAQTVSQMVDTVLADPSERRVMLLAPVVHGRKGQYQELLEDLKSRGFIRARID +GTVYELDPLPRLDGHQPHDIEIVVDRFRIRSDMAARLAESFETALALADGSALIADIDKP +HTSEMVFSARHACPQCGWSIPELEPRLFSFNNPAGACPQCAGLGRESYFDPGRIITQPSL +SLAGGAIRGWDRRNPYYYRLIESLARHYEFDTEAPWSELSERTHRVLLYGSGEEEIDFTY +VSARGQKQQRRHTFEGVLNILERRYHETGSQAVRDELVRYQSSRACTACNGTRLGEIARN +VFIADTTLPDISNLAIDAVWRFFNDLDLPGRRGEIAKRIQHELHSRLGFLVDVGLGYLTL +ARSTETLSGGEAQRIRLASQIGSGLTGVMYVLDEPSIGLHQRDNRRLIDTLTRLRDLDNS +VIVVEHDEDAIRSADYLIDMGPGAGAHGGEVVATGTPEEVMNNPNSLTADYLSGRRTIPV +PQARRHPQPGQAIIIRDAHGNNLKHIEVSIPLGLFTCVTGVSGSGKSTLVLDTLQAAGER +LLNRARTEPAPHASIEGLDALDKVIAIDQSPIGRTPRSNPATYTGVFTAIRELFAQTPEA +RARGYKPGRFSFNVRGGRCEACQGDGLVKVEMHFLPDIYVPCDLCHGSRYNRETLQIHYK +GRTIEEVLNMTTEQAREFFANIPSIHHRLDTLVTVGLGYVKLGQSATTLSGGEAQRIKLA +RELSRRDTGRVLYILDEPTTGLHFHDIRQLLAVLLRLRSHGNTVIVIEHNLDVIKTADWI +IDLGPEGGHGGGQIIGEGSPEDIARLEHSHTGHYLMPLLKPHKLQKETTT* +>PROKKA_00065 Aspartate-proton symporter +MAGSGKLRKDAGIIGLLYFSLGGIIGSGWLFGPFDAAKAAGPWSIASWIIGAAVVMLLAL +VFAELATMMPKSGALIHISHIGHGELIGRIWSWILFLSSVVTPPIEVMAVLTYLNNKIPY +FVDPSTHVLSTIGFFAAIVLLGVVVVVNFFAIRFVLWINNIATWWKMFIPAISIIVLMSY +SFHPGNFHLDLGSVNAAGMLTAVSTAGIVFSFLGFRLAINLGGETKNPGKYIPIAVIGSV +LIATLIYVGLEVTTITSVRPSDFANGWPSLAFKGDAGPFAALAVTIGAVWWSWVLYADAI +VSPFGTGLIYTTNTSRLGYAMAEVGSAPKKMQKLSRQGVPWISLLVTYVIACIFFFPFPS +WHQLVGYVSDITVLSYGIGPVVLLIMRKRRPEEPRPFRLKGAKVIAPMAFIASNWVIFWT +GCTTVTFLFGLLGSLFAVYAIWYYIIARKPSKEFGWKYAWWVFPYFIGMWLLSYIGPSTL +GPAHVSLFNVQPLDILPLGWDMIAVAVFSLAVLYTATSSALPREEADRYFDELKKLNLPE +EYSEGTESP* +>PROKKA_00066 Cyclic di-GMP phosphodiesterase Gmr +VQATDWLHQIAGPALVLNFADGTVIDINAAGRRLVGIEGQGLIGQDFCGFFVSSDADCCW +PTLQRSINLQGGFRYEGLHLRTPGGAMRRVNVSAELLQSEQERAVLMLLQPGTASSPQAT +DHEKELAQYATVGLYRLDAEGRLTHANHALARLLGYETVGQLLDSAAVQRSQWYVCDGVS +EERVSDVNDAAIYRCKVQLRRAHGAAFRAVEAIREIRDVRGQLMSRIGTLREISDQSSSE +QALAISEDKYRSLVEHSQDGVFVIRDGVYVFVSQVYSSMLDYAPEEMVGESFLRFFAPED +RQKIVDVWHERQAGHWEQGAYEAHLLKKDGTRVLVSVRAGPIRFAGAMASTGTVRDITAY +RDTQQQLSMAEQRYRDIFEHAVIGIYQTAPDGRLLAANPAMAQILGYDSVEELQEQVDDV +GELFFDRIERDTLIEKLEAEGRMYGAELRLRHRKGTQLWVQDSARVVYDANGKLVCYEGM +VADITARKIVEQALHRSEQLFRTLVEHTHVGVIMVREGVVTYANRALAHMLDYAESDLLE +QPLASLFAPESRDCVERLEQELKTAVGSNIYESSMLAADGTRRVRANLSVASVIFEDNPV +MIITAHDLTREKRAEARLRRLATHDPLTNLPNRVVLRERLAQVLKKTRETGNVDWAVLFL +DLDAFKLVNDSLGHAAGDELLRQVAVRLRRAVRHDDLVCHHGGDEFVVLAFNITHEIDAV +ELAEHIETAMAEPFRISDHEIYNQITIGIALGRQEYELPEEVLRDADSAVAAGKRLGKVC +HVVFSSSMHVAAMERLELETTLRAGLTRGEFDCYYQPIFNVKNNRIESLEALLRWHHPEQ +GVLRPHSFLQVAEESGAIVPLGWIGLRRALAACSQWQSLGLEREVSVAVNLSDAQFRLPQ +LPEQLAHELEQAQLPFHLLHLEVTERVFLETPGLARRTLGRLHALGVKLYLDDFGTGYSA +LSYLRELPFDALKDRS* +>PROKKA_00067 Transketolase 1 +MPTRRELANAIRALAMDAVQKANSGHPGMPMGMADIAEVLYNDFLRHNPANPHWPGRDRF +LLSNGHGCMLQYAALHLSGYDLSMDEIRNFRQLHSKTPGHPEYGHTPGVEVTTGPLGQGV +ANGVGLALAEALLAAQFNRPGHKVIDHHTYVFCGDGCLMEGISHEAASIAGTLGLGKLVM +VYDDNGISIDGEVKGWFHDDTQKRFEAYGWHTIGPIDGHDAEALKKAFAEAQIETKRPSL +ILARTIIGFGAPDKQGTAEAHGSALGDAEVAKARKELGWKFPPFEIPESIYAGWDARARG +EQAETEWHERFAAYAKAHPQLAKELKRRLAGELPADWATTVEQHIAHVARNGKAQATRKA +SGATLAALAPTLPEIVGGSADLTPSNDTCWPEAKAVKPGTPEGNYLHWGVREFAMTAILN +GMAVHGGFVPYGGTFLTFSDYARNAVRLAALAHYPTILVYTHDSIGLGEDGPTHQPVEHV +ASLRAMPNLTLWRPADDVETAVAWRDAIERRDGPTMLVLTRQSVPHYERKAQQIEAIHRG +GYILHEPQNAPRALIIATGSEVDLAMQAARVLTEENLPVRVVSMPCQELFLAQDVDWQEH +VLPAQVTARVAVEAGVSMPWYRFVGIHGRVVAMERFGESAPAKQLFEEFSFTAERVAAAV +REAVAAAAG* +>PROKKA_00068 Glyceraldehyde-3-phosphate dehydrogenase +MIKVGINGYGRIGRNVMRALYESGRRDQLQVVAINDLGDAQTNAHLTRYDSVHGRFPGDV +QVEQGHLVLNGDVIQVLAERDPSKLPWGKLGVDLVLECTGLFTSREKASLHLQGGAKKVL +LSAPAKDDVDATIVYGVNHKTLEPEKHVIVSNASCTTNCLAPIAQVMHELAGIEGGIMNT +VHAFTNDQNLLDVYHKDLRRARAATASMIPTSTGAAKAIGLVLPELDGKLDGFAIRVPTQ +NVSFVDLTLNLTREVKVEDINRAMREAADGRLKGVLAYNEVPLVSIDFNHNSHSSTYDAG +FTKVKGRLVKVCSWYDNEWGFSNRMLDTAAVMFGRG* +>PROKKA_00069 Phosphoglycerate kinase +MGVKSLKDCELQNRRVLMRVDFNVPVNDGAIADDTRIRAALPSIHEALKAGARLMLMSHF +GRPEEGKPESRFSLHPVARRLGELLGFDVPLVTDYLARDPEPGSGRAVLLENVRFNVGEK +RNEETLARRYANLCDVFVMDAFGSAHRAQASTYGVARFAPQAVAGELLCAELKALGRALK +APDRPLIAIVGGSKVSDKIGVLDALIERCDGLVVGGGIANTFLAAAGHPVGRSLYEPGFV +DEAKRLMIAARERGVNFPLPVDAVVAEALAEDAEADVKPVHAVGAGDMVLDIGPETAVLY +RPLLANAATIVWNGPVGVFEIDQFAEGTRAVAEAVASSGAFSIIGGGDTIAALAKFGVTD +RVSYISTGGGAFLEFLEGKTLPAVDILEARASD* +>PROKKA_00070 Pyruvate kinase II +VTETMARFRRTKIVATLGPAVDEGDVLARMIAAGVDVVRLNLSHGTHAEHRKRVKAVRKA +AAEQGRDVGVLIDLQGPKIRIECFRDGPIELKEDDAFTLDCGLGSNAGDSKRVGVAYKNL +PRDVQAGDMLVLADGEIVLEVREVIGEQVHCRVETGGALSDHKGLNRRGGGLSAEALTQK +DQVDIQLAAELEADFLAISFPRVAADVERARALLRAAGGTAAIVAKIERAEAVENLDEII +DASEVVMIARGDLAVEIGDAPLPGVQKRIVRHARARNTVVITATQMMESMVTSPTPTRAE +VLDVANAVLDGTDAVMLSEETAVGRHPVKVVEAMARVCLGAEAEPREDRDRRIGGDRFEL +VDEAISMAAMSVSQHTDVTALVALTESGRTPLYMSRVRSGIPIYALTRHECTRRLLTLYR +GVYPIAFEDEHESDEVLPDVAAALLERGLVTPHALIIVTRGKLRHVSGGTNSLHLVQVAD +VLPEQVL* +>PROKKA_00071 Fructose dehydrogenase cytochrome subunit precursor +MNGHWAREKRRMRVINRLLGLSAVVMVLVLVSPTASASIASDAVAYPPVSLSNVPPAKAE +EIRKGEYLTKLSDCMACHTDHGNGKAGKPFSGGLAIKTPFGNIYSPNITPDKKTGIGNWT +FKQFDDAVRYGEGPNGYLFAAMPYNYYSMMNKDQVHAIWEYLKHVPAVNRRNKPLGMPPP +FRWRWLQFGWRFMFVKPTQGEFKYDPKHSKAWNRGRFIVEGPEHCGACHTPHNMLGGSEK +RFFLGGSDITGFWAPNISGLATKPHPIATIMRVFREGKGLGGGDLKGPMIDAIANSMRYM +TPADMRAVAVYIQSVQSEVPPGPRPVAMDEVNLARGEKTYQTDCAACHATGIGGAPRVGV +AKDWDALGKSPLFILFENVWHGVSIMPPKGGCKACTRDDVTSAIVYMLKRSTSRSSKPAV +QATTSKSGIPRDTVSLAVGDKIYHAHCAACHASGAAGAPRHGDIKEWASRLKLGLDKLHH +NALDGIGMMPPKGGCTSCSKDQILSAVDYLVDGSGGKALVEKSLSGKQGG* +>PROKKA_00072 Cytochrome bd-I ubiquinol oxidase subunit 2 +MSGDSILRIFWWLVLGAWMSGIGVMLGRELGLTVLLRYLGRNESERRELLAPHIERPSEG +HQVWLLLGGGALMAAWWPLFTATLFGGLWLVLLFMVLAVLVGPVGHGYRKRLSEHTRGPW +DLLWAGISLAALLVFGLAIGATVSGVPLHFDAHMDAMWGGFFSRFTPYSLLVPGLMAITF +GLWLAAARAAHECTGAVAARARALLLPVGGVTLLIFAGGAAWATQLPGYAVGGLPKVGAS +PLDGTTFAVGGAYLERFLSHLPLVIVPVLTALAIVGALFFSWRGRLQRVGPLVVIAVVGM +VATLGAMTYPVILPSFAEPAQSLTLWNAAAERPVLVAFLVWLGILVPVVLGYELWLRRRN +AQTVVAGSTAR* +>PROKKA_00073 Acyl-homoserine lactone acylase QuiP precursor +MNATAPLQEQEPEPGNRRFPGLLMGGARQVARWSWQHRRYGRWPLRILLVLLLVLIILVG +AGYGLLRGSLPQTEGTVRLPGLGGRVVVTRDAQGVPTIRAHNALDAWRVLGYLEAQDRFT +QMDFMRRVAAGDLAALVGPAALPLDRIHARFDLRARAERIYLDAPSVERARLEAYTLGVN +EGLDNLSVRPWAYLLLGERPRAWEPADSVLVIYAMGWMLQNPLGPRMRARAALRSLYPPA +VTAFLGAPDTHWAAPMAGQPPALPPVPGTQLINLSASGKSRATAPVPSTAMYADTVAKLI +LPQPFPGSNSFAVSGDLTGTGHALLANDPHLSLRVPATWYRARLIYPAPGATASQPVELT +GVFLPGVPALVIGTNGHIAWGLTNSGGDWTALVRVKATAAGSRGGPLVYATPSGTATLAI +QHVLLKVRGQTARPMSIRRTIWGPVIGTTADGALLVSHWALAQPGGVNLRFMQLDSQTTV +KQALMVAGSAGIPVQNFLVADDQGHIGWTLAGRIPVRKAGCDYAVPQSWADGSCGWTGWL +APGSYPSIVDPAQGYLATANNRVDARTAAVLALGDENFADGARAHQIVSDLKALAKRGKI +TANDLHDVQLDDRAQFLQRWHDLLLNVLSPSALEFHPHRQALREAVVNWGARAAVDSVGY +RMVRAFRNEVAASMFMPILKRLHTRDPGAGLPFSNQLEGPLWRLLQVRPHNWLNPAYPTW +NALLVHAADAVIHRFWNPVSGLADATWGARNTVRINQPLAVALGPLGHWLDMPPTQLPGD +SNMPRVQTPDFGASMRMVVSPQPSAPGLFELPGGESGHPLSPWYSDEFKAWAEGLLTPLA +PGPARKTLRFIPWSRRVSDRPTVSTGSVVPAQSGQ* +>PROKKA_00074 Chromosome-partitioning ATPase Soj +MKKIVIANLKGGSGKTTVSTTLAAFWASEGYKTCLLDLDPQRAATSWLRRRPESLPSIHT +LSLPNQTSGVTLSYALRIPRDTERLVVDTPAGLSGIALADTVRGAAAVLIPVLPGTMDSD +AAARTVADLLLIAKLGRHSGRMAVIANRVRRGTLGAERLQKFISALDIPLIATLHDLQAY +SHAILSGLGLHELPRRRIGGERMAWVPLLEWLERRELEITAQTALGPRSLLTQSAGQTPS +DSME* +>PROKKA_00075 Patatin-like phospholipase +VLAGIAEWYGEDEAVPFRVISGTSAGAMNAAYLSANMENFAHGTQRLAQVWSQLEAQQVY +RPEYRKVFGALLHWAWSLLSGGLGDSNPRSLLDNSPLRALLAENIDFDAIARNIERGLLR +GVSVTVAGYSTERSLSYFQAETGVQSWWRQRREGRPVQMTLDHVMASLGLPIIFPAVKVA +GEWCGDGSTREFAPLSPAIHLGAKRVLVIDTQYPAPQHVLGQDQAYPSLSKIMGYLFDSV +FSDSLYADLERTKRINRTLDYIKRQSGHEPPELGLSHIDTLVIAPSRRPLEIASRYESHL +PKSMRWILRSLGGDVSSGDQLLSYMLFQSGYCSEMVALGRHDAHARREEIGQFLGLSKIK +VR* +>PROKKA_00076 glmZ(sRNA)-inactivating NTPase +MNLILLGGLSGAGKTGALDMLEDLGYQIVDNLPLSLIEPAIDAMLGDDARHHSRLAIGIA +PHNTPEEFEALARQIEIWRTRPHGCTVIYLFCEPGTLVKRYRATRRRHPLTGPDTDLAAA +IEIETTLLEPLAQLADACIDTTHTNIHQLREIIRARVNEGGDHPMALQIESFGYRRGLAQ +DADLVFDMRCLPNPYWEPTLRELTGLDQPIAEYLETHGTVTRMLSNLVNFLNAWLPSYAA +SNRSYLTIAIGCTGGRHRSVYMAEQLAAQLAHGGWAVTVRHRDLDTPTRDVKPILADD* +>PROKKA_00077 Ribosome hibernation promoting factor +MQNTITGRHLDVTPALKDYVNTKLSRLGRHHEPPTSTQIILSVENLDHKAEGILQVRGGT +VYAEANETDMYAAIDILADRLDRQLVRHKERHASHHATPTARLNMEN* +>PROKKA_00078 RNA polymerase sigma-54 factor +MNSSPRLQTGISQRLGLSPRQTEALRLLALPRPDLEQILETALEENVMLERLEPETGEGD +PEVATVMEQTEPAGEWDELSWSSSAGTGERPDMQTFEDIRPPDLRQHLIEQLVLERFSDR +DFLIALALVDSLDDNGYLREDLDTVSQELDALDPSPELIEIEAILHRVQRLDPIGIGARD +TAECLSLQLEALPPDTAGLVVARELIDGHCARLTQADMATLASLTCSDEDSVRRALSLIQ +SLNPRPGNDYSAQTAEYLIPELRTYRTPDGWQVELYPGNHPRISINATYVAWLSANRLNE +ASQSLTRQLEEARWLIRSLAQRENTLLRVARVLVRRQTAFLDQGVMHLAPLTLREVAKEL +DMHESTISRAVQGKAMSTPRGVITLRHLFSNALSNDNDEAISARAVHERLRHLLNHEDPA +APLSDAALAAALARDNMPIARRTVAKYREALGFASTRARKRPAHSVAISKG* +>PROKKA_00079 Lipopolysaccharide export system ATP-binding protein LptB +MSQLRAENLHKRYRTREVVRGLNLNVNSGEIIGLLGPNGAGKTTTFYMILGLVPTDSGNI +YLDQRDITGLPIHARARAGLGYLPQEASIFRNLSVRDNLIAVLELGGHGTRAEQQRRADE +LLDELGVTHLAKDKGISLSGGERRRVEIARALANEPAFMLLDEPFAGVDPVSVADIKRII +DHLAKRGIGVLITEHNVRETLDICNRAYVMNRGTMLAEGSPKEIADNQTVREIYLGDKFT +L* +>PROKKA_00080 Lipopolysaccharide export system protein LptA precursor +MHPKLFACCTALLACCAFPALGTPAPGSANASSSVSGVMTINADHSSMANTTGQGTEVTY +SGHVIVTRGALRLYGHSAVIHGRSNTIGKVVVTGTPARFELREPGKPHVLGEADSITYNG +KTDILQLDGQVHFSRPGEHFSAAHITYRIATRQLEASGNGNGRVHAVLSPAARTSP* +>PROKKA_00081 Lipopolysaccharide-assembly, LptC-related +VRLSRGWAVASAWAALLGAAALTSWLFLRSHDHSPATDLASHAVERPDYLLHQAIVTRFA +KDGSRRYIIKARRIAHMPRNNIALLTRVDLDYFPVHGKPWHLQSDNGRLFANGTRLNLIG +HVRAHELDTPIPVHFLTTEVTVLLPEARLASRYRVILRQGHRETRGTGLAANLQTGTLSL +LKDVTSQYAP* +>PROKKA_00082 3-deoxy-D-manno-octulosonate 8-phosphate phosphatase KdsC +MSTPLAAIRLLALDVDGVLTDGRLWYSETAGEIKAFNAHDGAGIKRLMREGIAVALISAR +QSPIVTHRARELGIIQVHQGVKDKGHCLTETAQAVGVRLAFCAFMGDDEADLPAFAIAGL +RIAPANAVARVRDEADWCTQATGGQGAVREVCERLLAARQANAGGKS* +>PROKKA_00083 Arabinose 5-phosphate isomerase KdsD +VNRPASATGLIESGREVIQIEAAAVSALESRLNETFAAACGLLLACRGRVVVTGMGKSGH +IGRKLAATMASTGTPAFYVHPAEASHGDLGMITSEDVMIALSNSGQTPEVVTIVPLIKRL +GVALIALTGEPDSMLARASDCHLDISVSREACPLNLAPTASTSATLAMGDALALAVSAAR +GFTPEDFARSHPGGRLGRRLLVRVADIMHTDDAMPIINESTRLGDALVTMSAKGLGMAMI +TDVQGRLAGVFTDGDLRRHLDQGVNLDTPMQHLITRECTVATPDMLAAEALRLMETRHIN +SLPVVTDDKPVGAFNMHDLLQAGVV* +>PROKKA_00084 hypothetical protein +MNLPANPRAQRRIDGLMAGQASLPGESGIDQTGRKMYAILGLDLDFSVWERLFQELLQVC +PVHAIST* +>PROKKA_00085 UDP-N-acetylglucosamine 1-carboxyvinyltransferase +LNRLLIRGGGRLDGEIRISGAKNATLPILAASLLAETPITIGNVPHLHDVTTTVTLLRRM +GVDVTVGEHMALEVDSNTIKDMVAPYELVRTMRASILVLGPLLARFGCAEVSLPGGCAIG +SRPVELHLKGLSAMGADIDVRNGYIYAKANRLRGARIFMDMVSVTGTENLMMAAVLADGE +TVIENAAREPEVVDLANCLNKMGAEIEGAGTETLHIQGVAKLEGCYYEVMPDRIETGTYL +VAGALTGGRVRVKRTRPDMMESVLEKLREAGAEITSKDDWIELDMKGRRPQAVTLRTAPY +PAFPTDMQAQFTALNAVAEGSGAVTETVFENRFMHVQELVRMGARIQLEGNTAMIQGVER +LTGAPVLATDLRASASLVLAGLVAEGETVVDRIYHIDRGYECIEEKLAQLGAHIRRVTS* +>PROKKA_00086 Stringent starvation protein A +MILYSRPDDPAAHSIRLVLAEKAIGVKIVEVEPDSPPEDLLHLNPYGTLPTLVSREVVLY +DPRIIAEFIDERYPHPSLLPSDPVLRARARLFVSEIGGSWYELCDEVANGAGRGRTRARR +ELTEAVVSSDELFTGTAYLLGGDYGLADCVAAPVLWRLPHLGVRLPREAKAIRGYMQRVF +KRPTFVYALVASERAMIES* +>PROKKA_00087 hypothetical protein +MTEKPRPSRKPYLVRALHEWMGDASLTPQIIVDATVDHVDVPVEHVHDGKIVLNLSLEAV +RDLELGNDAITCTARFGGVARSLWVPMKAVLGIYARETGEGVAFACS* +>PROKKA_00088 Chloramphenicol acetyltransferase +MEKKITGYTTVDISQWHRKEHFEAFQSVAQCTYNQTVQLDITAFLKTVKKNKHKFYPAFI +HILARLMNAHPEFRMAMKDGELVIWDSVHPCYTVFHEQTETFSSLWSEYHDDFRQFLHIY +SQDVACYGENLAYFPKGFIENMFFVSANPWVSFTSFDLNVANMDNFFAPVFTMGKYYTQG +DKVLMPLAIQVHHAVCDGFHVGRMLNELQQYCDEWQGGA* +>PROKKA_00089 Tyrosine recombinase XerD +VTDRTVRTWIGEAVAAAAADGVTFSVPVTPHTFRHSYAMHMLYAGIPLKVLQSLMGHKSI +SSTEVYTKVFALDVAARHRVQFAMPESDAVAMLKQLS* +>PROKKA_00090 Replication initiation protein +MAETAVINHKKRKNSPRIVQSNDLTEAAYSLSRDQKRMLYLFVDQIRKSDGTLQEHDGIC +EIHVAKYAEIFGLTSAEASKDIRQALKSFAGKEVVFYRPEEDAGDEKGYESFPWFIKRAH +SPSRGLYSVHINPYLIPFFIGLQNRFTQFRLSETKEITNPYAMRLYESLCQYRKPDGSGI +VSLKIDWIIERYQLPQSYQRMPDFRRRFLQVCVNEINSRTPMRLSYIEKKKGRQTTHIVF +SFRDITSMTTG* +>PROKKA_00091 Sporulation initiation inhibitor protein Soj +MKLMETLNQCINAGHEMTKAIAIAQFNDDSPEARKITRRWRIGEAADLVGVSSQAIRDAE +KAGRLPHPDMEIRGRVEQRVGYTIEQINHMRDVFGTRLRRAEDVFPPVIGVAAHKGGVYK +TSVSVHLAQDLALKGLRVLLVEGNDPQGTASMYHGWVPDLHIHAEDTLLPFYLGEKDDVT +YAIKPTCWPGLDIIPSCLALHRIETELMGKFDEGKLPTDPHLMLRLAIETVAHDYDVIVI +DSAPNLGIGTINVVCAADVLIVPTPAELFDYTSALQFFDMLRDLLKNVDLKGFEPDVRIL +LTKYSNSNGSQSPWMEEQIRDAWGSMVLKNVVRETDEVGKGQIRMRTVFEQAIDQRSSTG +AWRNALSIWEPVCNEIFDRLIKPRWEIR* +>PROKKA_00092 Virulence regulon transcriptional activator VirB +MKRAPVIPKHTLNTQPVEDTSLSTPAAPMVDSLIARVGVMARGNAITLPVCGRDVKFTLE +VLRGDSVEKTSRVWSGNERDQELLTEDALDDLIPSFLLTGQQTPAFGRRVSGVIEIADGS +RRRKAAALTESDYRVLVGELDDEQMAALSRLGNDYRPTSAYERGQRYASRLQNEFAGNIS +ALADAENISRKIITRCINTAKLPKSVVALFSHPGELSARSGDALQKAFTDKEELLKQQAS +NLHEQKKAGVIFEAEEVITLLTSVLKTSSASRTSLSSRHQFAPGATVLYKGDKMVLNLDR +SRVPTECIEKIEAILKELEKPAP* +>PROKKA_00093 Protease HtpX +VLFTQIIAFVLVMVVYQAYDPAPPDYGWGWGLLLFITGPLLEWLLASVIARSGLRRLARP +AADPARSLQRSEILLHLSALTVFFLFMVSYDLKAGLIATPLLAASETLSGLAALFYYALL +LIPVWGHCHRLERAAGRALALDRRRYILEQARFVAPVAFPWFLVSALRDLLTLAWPGLTA +WLETPAGDLAFLGFFLLVISWLFPPLVRSWWGCPPLPPGRAREICQMVLKVARVRVGGIL +SWDVLQGRLVTAGILGLFPRFRYLLLTPALLEALSPTELAGVVAHEAGHVRLKHIPAYLM +FFMAFFLLAYALAEPLDILLRLALLTLAQSDWGAGLLNSPDAGSTLSITFALPLLALMIV +YLRFVMGFFMRHFERQADLFALNLMGEAAPLVGALEKLALMSGQTRDLPSWHHFSVAQRV +SHLLTAQANPPAWLHRQGRLIKKALAVYLAGMVLVLGLGWGMAGLDWSRQVNQELALELV +RHQLAQHPDDPRLRFQAGMLCYQLGREDRALSHFRRAFLAAPDNPELLNAMAWIFATSQD +PRRRRPQVALVLARRAVSLSPLPHIWDTLAEAYFAAGQPVKALAAARAALEAGPKARLDY +YRAQLERFKRAVEDLKKKGPAGRRPRPAAPAPGGRQG* +>PROKKA_00094 hypothetical protein +VEPTVPSPLLDRRQRGDAFLRRYWKLAAPLLILFMLALFVLPWVWFSFVEALCLQVGGAG +LLYILGRLFTTAFNPAYHQAPEPQDGDAGRRDPSSSDPPPRA* +>PROKKA_00095 hypothetical protein +VVAFGFFHIETPMLLLEELFFFAPEFCDALVRLAQAPPERDWQGSWTGFEIPDPASRGDL +HGAIAGRRLEGFIGALYARWPFPRDPHQFRQRTRGAAPREVVEQEAESFGRRREVVLEAR +AGGGEFAIGQYRFHRTGFLALVDYVWRGGMPGWEGGRRPEWLLEAARRIQESDSPWLAGL +DWDPARLGFTI* +>PROKKA_00096 Glutaryl-CoA dehydrogenase +MDFELSEELKMLREMAADFAKEQIAPYADKWDEEHYLPHEEVIKPMGELGFFGTVIPEEY +GGNEMGWLAAMILTEEIARASSSLRVQINMLELGCAFTIYRYSLHDEVKKKYIPKLVSGE +WLGGFGITEPNAGSDVMGMKSTAVDKGDYWLLNGSKTWISNADIANCIIYYAYTDREARG +KGLSAFVLDLKNEDGSRPEGISVTRLDKLGSHSSPTGEIYLDNVKVPKENILGKPGDGAK +IVFSSLNQTRLSAAAGGVGLAQACLDESIKYANEREQFGKPIAKQQMIQDQIAQMAIEVE +AARLLVYKAACQKDAGNLGNTLEVAMAKWKAGEVANFCAQQALRILGAYGYSTEYPVARF +YRDAPTYFMVEGSANICKWIVALDALGLRKANR* +>PROKKA_00097 Acyl-CoA dehydrogenase +LHLDQDQQDFLRHIQQVVETQLAPLALEIESQSRFPQQAREIFARAGLFTLAVPRSYGGQ +GADATRLALMVENIARVSPSAALLVFPSNAVLRTIALTGSEEQKERLFGELVQAGDQCLA +FCLTEPDYGSEAFNLQTRAERQGDHYVVNGTKTFITLGPNARYYLTFVRTGPAPKAGGIS +ALLIPHDAPGLGFGPPEKKMGLHGSVTTNMYMKDVPVPVANRLRGEGEGWQVLTRVCNPM +RVWGAAAMALGTAQGLFDQTLAYVKANAERLNPADRQSRDFALADMKMRIEACRSLIYRV +CRMVDDPRTPPQQVDAFVSMSKCYAADTGMETGELASRILGMDLMRPDCLAGRLYLDAKA +IQIFDGTNQIQRLVVAKSLALG* +>PROKKA_00098 hypothetical protein +LGEPLGSFEGETQTVMSVGDLAYWEPGNAFCIFFGPTPASSGPEPVAASQVYPLGRVEGD +WQALSALGASVTARISAA* +>PROKKA_00099 hypothetical protein +MMYGDSSAGAGIEAQSLLCPRCGKAQPVRKKLLLVLPEGDKYAYFCAVCGEEVGSKLEES +QGGPSFIPR* +>PROKKA_00100 hypothetical protein +MRSIVIDELSSPDVDRLSEHLDQTLTPSGLSGVYWLELPEDLLLPLQQEHRQSCGPHRVA +VVVEEGCLRLELLVRAQESLRCNCTAYASSAQRDFLLDYLDRLIEELGLRT* +>PROKKA_00101 hypothetical protein +MSLPKRITAPLLSGLVLPGLGQLINRQLGKGALLICLMSLFFMSFLFLTVYQVSHAMSAL +GEAAAQSADKWQALRAQLARQGTGWLWGLGTAGLGIWLFAVIDAARVGARLDRSAAEGGQ +GES* +>PROKKA_00102 Orotate phosphoribosyltransferase +VVMEEYQKTLARLLAESGGLFFQEGLRLKDGRPTPYFVNLGVFRTGRLALELGRCFSLWI +HHHGLDQDLDCIVGPSYKGSAIAQATAIALYELHGKEVAYDYDRKEAKTHGEATGHGYLF +VTGAALQGGKVLIIDDVGTSMSTKLELLKKLSWLKPRLERPMELLGVVLAVDREQTQAVY +DAQGRVREGVRGPDAMESFRQESGLEVWSLLGIRQALDYLYKEGIPVLIQGEMRPLDELT +MQIAREYLELYGREEA* +>PROKKA_00103 hypothetical protein +LNRSLQNQISARYWSSHISSRNNIQNNENCACDPRLAPCLLVCRASLIPFLPFLEGAKLK +KLGAFYLGRRRRLQAGKHAGSSRHQSGPGAPRRFLFGGNCDPPWRATHPPPRRQGRPAGR +PLSTMQNKPGKGITLLSSRPPAVPPRRGRSPAAPPSPWRPAWP* +>PROKKA_00104 hypothetical protein +MKDKRQAEDESAREPFTPEEQDVDKYKREVEELKAKLAAMEKEEQQDSSPAEEAPQEDEK +KEE* +>PROKKA_00105 HTH-type transcriptional regulator cbl +VEGLFDKLSAIQKFANDLRGNRSGVLSIASTPTLTYAFLANALKRFREERPGVRILLEVT +HTQRTLELASAGQIDLGFIHGPSENPLLQFERLAASEMVCVLPPDHPLAAQPALGPRDIS +AFPLITNIRNSIAPRIEEAFRKQGVERDFAIACNHTMTVYMLVEAGAGIGLVDPWVQAER +FPTLVRRPFRPRVEVSPRAVHSRSQSLSRLAEGFLAVVREEAAVSQ* +>PROKKA_00106 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00107 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00108 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00109 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00110 tRNA-specific 2-thiouridylase MnmA +MCNLLIKFGAFNDRFGKDFDRISTGHYATRYNTDEGVFLSTAADRVKDQTYFLGQITPEQ +LAKTMFPIGHLQKKEVRKIASDMKLPSAHRPDSQGICFLGKINYTDFIKKYAGEKPGEII +ELETGKVLGTHKGFWFHTIGQRRGLRLGGGPWFVVKKDIEKNIVYVSNGYDPIAQYDDKI +WLEDLHFLNKVHDYSKLNEIKFKIRHQPEFNSGKLVRDEKGIRIVSENKISGIAPGQFAV +VYDEEERTCIASSVIAENPEIAV* +>PROKKA_00111 YhhN-like protein +MNPLLLFILIVSGGLYLAGHYLHKPILKYIFKPFTTFIILFFAFMQLPDVSVQYKDYILI +GLLISLIGDIFLLWPEKRFIHGLGAFLLAHVLFILAMVSDFGPYYNWQYLIPIALYMVIF +LWIILPKSGKFVIPIIVYALVLMVFFWQAAGRAIYLAESSSMQAMFGATLFVASDSILAY +NKFVKNYKWAEFFIIITYWAALYFIALSV* +>PROKKA_00112 Alpha/beta hydrolase family protein +MDWIIVIILAILLVAFIILFLLIQKGFRNPVSEHTIPEDLPFDVQEVEYPTKNGKTIYGW +WIPADPKAATVVFVHGWGRNAQRMMPYLRKFCCGKFNLLAFDARGHGNSDHDGFSNMLQF +SEDIIASMNFIEQEHKAENNMFYLIGLSIGGAASIYAAGHDPRVKKVLTVGAFAHPASVI +TKQIKDRHIPYFPMIWFLYRYMKYVKNLDVDAIAPEKHIAKAQAHFLLVHGEIDQTVPVE +QGKRLKKAAGDKADLWLMPERGHSDCHLEHGFWEKLMEFFEAPKTKVQKS* +>PROKKA_00113 Apolipoprotein N-acyltransferase +MKKFHLLLLSLLSGLLLAAAWPLHGFTPLIFIALVPLFFVQQQMGDTGKRGMLLYAWLTF +LVWNGLTTWWIWNATPVGAIVAIVLDSLFLAIVFQVFHLSKKWLFNNKQGFFILIFYWIA +WEHFNANWDLSWPWLTLGNVFASKHLWIQWYEYTGVLGGSLWILSVNILIYNIIKSFLEK +RKQRALYTTILTVLFIAVPIIISLNIYHHYKETKNPVNVVVVQPNTDPYTEEFNLPPSAL +IKRNLKLAEQKVTDSTDYVVFPESTIQEQIWEGSLNRSQSIKTLRNYVMEHPNLSMVIGA +STFRWLKPGEHRTNAARFYKKGLYYYAYNTAFFIDHSPYIQVHHKSKLVPGVEKMPSWPI +LKPLEHLALNLGGTVGTLKEDDHVSLFTNDSSGTKIAPMICYESVYGDYVRQYVAHGAEL +IFVITNDGWWGNTPGYRQHFSFAILRSIETRRDVAQSSTTGYSGFVNQRGDVLQRTKYDE +KAALSQTLNLNDKLTYFMKKGDYLAHLAGFFSILILLAAIVQGFLKKRNLPH* +>PROKKA_00114 Putative NAD(P)H nitroreductase +MKTFLDLVNQRQSDRKYIDKPVEKEKLMRCLEAARLAPSASNSQPWTFVVVNQPELCQDV +GKAAMGPLYSFNKFASQAPVILAIIMEKPKVITEVGGRIKKKEYPLIDVGITAEHFCLQA +AEEGLGSCMLGWFDEKKVKELLHVPEEKSIPLLITVGYTPENYKHRKKIRKPIDSAVKFN +TYG* +>PROKKA_00115 Transcription antitermination protein RfaH +MAKQKTQEKIWYAIYVKSRAEKKVAIELEAEGIDFYLPLEKRLKQWSDRKKWVEEPLFRS +YIFVHISHKDYYRALVQNTVKYVTFEGKAVPVPPEQIEAVKVYLEEKEPIQPNDEDWETG +KEVEVISGKLTGLKGVLMEVKGRSRVKVEIEVVSSSIILHIPKSKLRLLE* +>PROKKA_00116 Colicin V production protein +MGVNLLDIILAVPLIFFGYHGYRKGLIIEVTSLAAFILGLYFAFYFSNFTAGILKEYFTI +QTKYMAAIAFVVTFIVVLLIVLAVGKIVEKFIDILLLGFLNKLAGGLFGVLKGALFLSII +IFVINYFDASHSIIKQKAKDNSVLYKPVESIAPALYSWLHLKNFDFHLPSEESVIKTITH +RANPD* +>PROKKA_00117 GDSL-like Lipase/Acylhydrolase +MGLSYRINFKLGANAMVISDPSMPVPASLQFRQLKSDEYVMLDIPTDSLEVAYWGTKKPV +PAQYVLTEAQTAKVESAITSYNAEIKSLAKKYNLAFVDFNSIMKSIEHGGLTVDGIHFTT +AFITGNLFSLDGVHLTPQGNAVVANYFIQAINKQYGSHIPSVMVSDYPSVVFP* +>PROKKA_00118 47 kDa outer membrane protein precursor +VKWDDNWALKLLIQNISLQAIFYQPTISFNLGNKFGVGAGLVYATGNVKMNSALNYSGNS +GFNLNGKTHNFGFNVGVHYKISDQWSLGATYRSEIKMNVKNGNAAFFVPGSLSSIIPPSN +HFSASLPLPANFDFGVAYQATKKLLLAAELDWVRWSVYDSLSFHFATNPQLLNNSSPKLY +KDQWIPRIGAQYQVSKKLMVRAGAYYELSPANVSGYYKTNTVVPGIGINYHF* +>PROKKA_00119 hypothetical protein +MSGKLQAEFRSDLKWQPAILRKILILPSKQVLIILSLMAVVVVQARPPLF* +>PROKKA_00120 Transposase DDE domain protein +MKIIKLAMFSKTQTMLQHKDINKLSELKNGFTQSWVEPDFIFRSLKCFSFSSLNKGLSPL +KAKGYSFEWVMSLLISLPFMGISSVNRLAGVVEAKKDVFYRLKNNSSISWRYIQWLFACK +FNTITSESTGNNIQPRCLIFDDTVIEKTGRFIEKVSRVWDHVQNRGVLGFKLLVMGYWDG +TSFLPLDFSIHREVGKNKEYPYGLRKKDYRKQFKKKRSSQTHGYDRSKEAGQSKIDNMIK +MFKRALSHGFSIDYVLVDSWFTCEAIIQAVTQVKNQTVHLIGMYKIARTLFEYQGTKQTY +SQIRNRLGRPTRCRKLRLYYLQATVGFKGHQLQLFFTRQGKNGKWKVLLTTDCSIGFIRL +VEIYQTRWTIEVFFKESKQLLGLGHCQSNDFDAHIADLTITMIQHMLLTLRYRYDTYESK +GALFENVKETIAIRKLNERLWGLFVELLQILTDLFEIVDAMELLEHIITNGQALERLKLL +FDLVPENNEAA* +>PROKKA_00121 Histidine-specific methyltransferase EgtD +MSQAEAPVIIKPNFKITNFLSKEPRSSLIHEIFSGLTAKQKYISSRFFYDRKGSALFEEI +TKLPEYYPTRTEKSILSAHAKEILGNPESLVIIELGSGDCSKISILFDNFPEQKMSNVKY +IPVDVSESAIIKSAEILSSRYEGLKIHGLLADFLKHLDLLPGATPRLICFFGSTLGNMTR +NQATDFLWNLKNIMNPGDRLLLGLDRVKGPEILYKAYNDKQGITAQFNKNILNVVNDVSG +TNFKTSDFGHLAFYNQNENRVEMHLKALYDMRITSKHFRDDIFIVKGESIHTENSHKFLP +EQIEQLALSSGLKFQASFTDVNKYFSLNLFEYPKLK* +>PROKKA_00122 Thiosulfate sulfurtransferase YnjE precursor +MPQFFKQLSTRELTERLNNKDVQLIDIRPVDAYNGWASRGESRGGHIRGAKSLPFAWTKY +VDWIEMVHRKKILPENEIVIYGYPDEGFRLVANRFKKSGFEKVSIYLNFLNEWVPDTTLP +MEKLFRFQNLVPASWVNELISGGKPQHFENDKYVIVHAHYRNRDAYLSGHIPGAIDMDTL +AVEDPETWNRRSPEELKQTFEQHGITVDTTVIVYGKFMFPDNSDEFPGSAAGDIGAIRIA +LIMMYAGVKDVRVLNGGFQSWLDAGYEVSYADEPKKPVADFGATIPAHPELAVDTPEAKE +MLASSRAELVCVRSWPEYIGEVSGYNYIEPKGRIPGAIFADCGSDAYHMENYRNFDHTTR +EYHEIEDIWKSNGITPDKHLAFYCGTGWRGSEAWFNAWLMGWPKVSVYDGGWFEWSADPA +NPVETGIPENYPNRN* +>PROKKA_00123 hypothetical protein +MKAIWNNTILAESNDIVKIEGNAYFPINSVKKEYLKTSETHTVCPWKGTASYYSLEVNGK +ANPDAVWYYPEPSDLAKGIKGRVAFWKGVQVVKD* +>PROKKA_00124 hypothetical protein +MIKYYLLLAFEKKTLIRAIRVAILVGIILNLINNPDFIFHFSTNYLSLGRVLLTFIVPFL +VSTYSSVLSNSALRTGSVSHIDAILKCKSCNKTHIHVPIGHEVEECPICKKETRWRPVRI +FSGSGNRDELLKSLALFARYNPTPLFRINSDSIINEANQAAKDIFGSDELTGKNLAGIIP +EIKDINLNQLIHDGAIKKTIIHKEGRDYNITLKGIPELNSVHGYLNEVTNFVEPEQKG* +>PROKKA_00125 Putative thiamine pyrophosphate-containing protein YdaP +MKENLEWHKVLEKKEDLPENRIITVNAGSKQIALSHFEGKICALDNHCPHQGGPLGEGSI +ENGILRCPWHGWDYHPCTGKAPGFDDGVATYRVEERGNGIFVGIPPKKPHKTTLSDIMVE +TMVHWGVDTVFGMVGHSNLGLADAFRRQEEKGKLKYIAIRHEGAGAFAASAYGKLKGKPA +ACFSIAGPGATNMFTGLWDAKVDRSPILALTGQVATQVVGTGNFQEVDLVRAFQTVAAFN +HRVQKDSKHAELMSLAIKHALLKRDVSHLTFPDEIQEILEGKEESQTPEGRMGELQISPA +AGSMDKAVDFITKSKRPVVIVGHGARFVMEQIISFAEHLNAPVLTTFKGKGLIPDDHPLA +AGVLGRSGTPVASWFMNESDLLIVLGASFSNHTGITPKKPIIQVDFDPLALSKFHKIDVP +VWGELSTTVNILMKRLPVKPNTVDQRTELAQRWKIWRTEKQKRLLEDRGKGISSIAVFDN +LSKLIHPEAVVCVDVGNNAYSLGRYFESKNQSFLMSGYLGSIGFAFPAALGAWAATRGKR +QIVAVAGDGGFAQYMAELATSVKYNMNIKLVLLNNSELGKITKEQRSGGFKKFATDMHNP +DFAEYARGCGALGIKVSKRKDLKTKMKEFLDYKGTALLEIVTDVLLV* +>PROKKA_00126 hypothetical protein +MQEYIKTDRQIHWLSQAIAKVNRDYVPAKKDDSHTNLFLDAAGKRLFGRWINTPKGKFIL +ALNLKTLSFEWLDNPLSVKTSISVFDKEGSSIEKEIREFPVSMGMSSKDISKPLHFEIPD +YGFSIIKSNRISSFGIKQWIYYRGLANFACLSVLGYLQSESEIRIWPHHFDTGVYAQVTD +SLGFGFGWAMADSMIGEPYYYLSGYKNSSIIIYNNLSKLNFGRWVTGEQWNGTVLPLHVL +ADNSTAKALEIINTYIKESIDWFLNL* +>PROKKA_00127 hypothetical protein +MNQEKKKLVLLAFVLLLAGISPNIFPAAQSGIASMSSLAVVLLIPSVVLIFILAILSQAL +GYNDLRKQILNGILAGLAGTVGLEIVREIGFHLGGMPGDMPKLLGVLLLNRFASGPDFWS +NVAGWSYHFWNGAAFGIIFSLIIGRGKIWMGIVYALLIGTGFMVSPATTSLGIGVFGLHF +KDGYQFLTTVYLAHIAFGSIVGLVVYKKNKDAPNIFKRLKLAFS* +>PROKKA_00128 hypothetical protein +MMVMSQLDQGKILSMTGLSVKSHSFFIGNPNVGNKAFSADPAAGLVIPVRVNVYEENGKT +YVSYFKPSDLFGSFKNAKVKMIGQMLDKKLGMMLKMVTR* +>PROKKA_00129 hypothetical protein +MSVLMHNNQDGAVRFVFNNLSNKQTKQKSAETLKKLDLYKTMSKFKSTGVVYFFDAKNKS +LISHISLARSNKQLAEALTNSEKMAK* +>PROKKA_00130 hypothetical protein +MMKKLMNILFLSCLKATELIEKKLYFKLSLKEKVQLKAHKMMCDACTNYEKQSIFLDKGI +SHLNQSKIKKEDLEEIKKSIQQKLNELK* +>PROKKA_00131 RNA polymerase sigma factor YlaC +MPQTKTTDLTYLVETYTEEMVSWAMYKVSDAELARDLVQDTFLAAAEKMDAFKGESSPKT +WLFSILNHKIIDVYRNKVKQPVSFDSQVFSTYFNERGDWKKEKEPKDWHQEEKQLLDDSA +FQQVLQKCLESLPEKWSTCVKMKYLSEKKGEIICQELGLNPTNFWQIIHRAKIKLRDCVD +QNWFRS* +>PROKKA_00132 Bifunctional PGK/TIM +MKTIDNYDFKGKKVIVRVDFNVPLNDQFEITDDTRIRATIPTIQKLRESGGAVILMAHLG +RPKSGPEDKFSLRHVVKNLSEKLQTEVQFANDCIGDEAREKAAALKGGDVLLLENLRFYK +EETAGDEAFAKKLADLADVYVNDAFGTAHRAHASTTIIAKFFPNDKMFGYLMENEVKSLD +KVLHHAERPFTAILGGAKVSGKIEIINHLLDKVDNLLIGGGMMFTFIKGDGGKVGSSLVE +DDLIETANAAREKADKLGVSLFIPKDAVTADKFANDANQKCRPSGEIPDGWMGLDIGVET +SETFRQVIENSATILWNGPMGVFEMDAFAEGTVDVAQAIVRATEKGAFSLVGGGDSVAAI +NKYNLQDKVSYVSTGGGAMLEYMEGKTLPGVAAIKDE* +>PROKKA_00133 Methionine aminotransferase +MSQLANDHKAINLSQGFPDFPISEELIDLVHYYMRKGYNQYAPMQGVLPLRKAISTMFQK +NYGIHYDPVSEINVTAGATQALFSAISAFIKDGDEAIIFEPAYDSYAPAVKINGGMVKYA +HLEFPDFNINWEDFPRLITNRTKLIIINTPQNPTGSVLSEDDLQRLERITSGTDIIVLSD +EVYEHLIFDGITHQSVCRFPELAKRTLVIGSFGKTFHATGWKTGFVLAPERLMKEFRKVH +QFTVFASNTPIQHAIADFIGNEDNYKNLGKFYQQKRDMFVKSLNGSKFNVLPCYGTYFQL +LDYSNISDKNEMDFARWLVEKHNIAAIPIAPFYHKKDDHKVLRFCFAKKDETLVEAGEIL +SKI* +>PROKKA_00134 2-oxoglutaramate amidase +MNDLKILYIQSRLAWEDAETNRKHFEEIIQKEAQHHDLIVLPETFTTGFPVDPVPFAETE +DGESVLWMREMAAQTCAVVTGSMLLKNDGVYTNSLIWMRPDGTYERYNKRHVFRMGGEHE +KIHPGDKILLVELKGWKIRPMVCYDLRFPVWTKNHYEKDAFEYDLALFVANWPAVRAYPW +DQLLIARAIENEAYVLGVNRIGKDGLGNDYNGHSKVVDAKGNVISEAPENEEAAISVKLS +YEALQKFRAKFNVGQDWDSFTIQK* +>PROKKA_00135 Molybdenum cofactor cytidylyltransferase +MKRLGKHFAVLILAAGYSGRMGMPKAFLPYDANRTFLEKIVSEYLEFGCNLVGVVLNEEG +MKLYEKMQLEHKNNITAILNPAPEKERFFSLQTGLKRLKSEGAVFLHNVDNPFLTQDILQ +ALASAFKTQAYVVPTYHEEGGHPILLSQEIVKALIETSDYEQNLRVFMESYDQIQVPVSD +PNVLANINSPQEYERLFGRSF* +>PROKKA_00136 Free methionine-R-sulfoxide reductase +MDKEKKQKRYQRLYKQIQDLIVKSSNNPLSNMATINAVLYHKMETFFWCGFYLYQDGKLQ +VGPYQGSLACINLAEGTGVCQAALTQQKTLTVPDVEAFPGHIACDSRSKSEIVIPVRNRE +NELVGVLDVDSKEHNSFDEVDEAELEKIVRLVYFPEG* +>PROKKA_00137 6-aminohexanoate-dimer hydrolase +VLIKKAMNRVERGKYGEVHSVLIYKDGKLVLDEYFKGHDYKWEAKKHYGPMVVWDADRAH +SAHSVSKSITSLCVGIAVDKGLIKDIHQSIFDYLPEKYQYLNVGDKKYITVENLLTCSSG +LLWQEWSAPLSSKRNDQVGIYFHKKGPLDFVLNRPFVAVPGQRFNYSGGGVEVLGEIVKN +VSGMAFDEFSQKYLFEPMGIKTASWALKYPTGEVHAAGSLKIRPRDMIKIGAMMLNNGIW +NGKRIVSEDWVEKSRKPWGNNRGIDLPGEDLRDMGYAYNWWTKNEKINGKAVHWFSANGW +GGQQIIVLPEINTVVVLTGANYNRKVKQYALLADYIFPAIK* +>PROKKA_00138 Demethylmenaquinone methyltransferase +MQQMEILERYLNENKTINEFKKVLWIYDFWGKLTEGKAAKKVLEFAGIKNGISVLDVACG +TGEMLEKVVKLNPDGQNSGIDLSPDMIAKARKKLSKTGHLNFNLKQGSALDLPFPDNSQD +LLINSYMVDLLPVDCFDKVATEFFRVLKPGGKVVMSTFSFGTKKVHRFWFWVARKFPALL +TGCRPVSFKHFLIKAGFEIVKDVEISQNTFPSQVLMALKKS* +>PROKKA_00139 Transposase IS200 like protein +MELNVQIDHVHLVVSVPPKVSVSRLMGILKGKLAIKLFKSYPSLKEKPYWGNHFWARGYF +VSTVGIDEDVIKRYVKYQEEEEKKIETQQQRFDF* +>PROKKA_00140 Rubrerythrin +MKSLKGTRTEQNLLKAFAGESQARMRYDYFASQAKKEGLEQISALFTETSLNEKEHAKRF +FKFLEGGPTEIVAAYPAGIIGTTLENLRAAAEGEHEEWTELYPEFARVAEEEGFKEVAAA +FKMIATVEKAHEARYSKLYKNLEAGKVFQRDGVVVWKCRNCGYLHEGKKAPKKCPACLHP +QSFFEVETFGY* +>PROKKA_00141 High molecular weight rubredoxin +MNIEAFYSLSYGLYIIGTASKGKKNGYVANTAFQVTASPEQIAISCNKDNLSEQMIDESG +YFSLSVLEKDASKEIINRFGYKSGKTLDKFEGTKYFETNNGIPVVTEECVAWFECKVEQK +VDVGTHIIFIGRVLNGEYLDENKESLTYTYYRQVRHGLSPKNSPTYVDKSLLPEKEKKEE +KAEETPAEKPKGKSMQKWECIVCGHIYDPAVGDPEQNIPPGTAFEDLPDDWVCPDCGAEK +EDFEPIG* +>PROKKA_00142 Efflux pump membrane transporter BepE +MFSKIFINRPITAIVISLFIIIVGIISIFKLPVAQLPKVTPPVVSVSGHYTGANASDVEK +AVATPVENSVNGATGMLYMNSTSANSGSFNLNVTFKIGTDVNVDAMEVQNRVNLATPILP +AEIRQTGLSVKKASTSMLEIVGLYSPHGTHDEKFLSNYAALYIQNALSRVDGVGDVHVFG +NSFAMRVWLNPQKMANLHLTTQDVINAVREQNAMIPAGSVGASPAPKGQTFQVTVQVKGR +LVTAKEFGNIVVGTNPATGSVIRLKDIARVKLGSSSYAGTPRLNGKVGCGLAVYQTPGGN +ALETADLVKAKMEQLSKNFPTDVAWTTMVDNTRFVQSSIDEVVKTLFEVLLLVIIVVFFF +LQTWRPTLITMLAVPVSIIGTFAIFTLIGFTINTLTLFAMVLAIGIVVDDAIVVVEAVQH +NIDRYGLTAKEAAIRAMSEVGGPVVAIALILTAVFIPVTFMPGITGMLYKQFAFTIAISV +LLSAFVALTLTPALCSIMMRPNPVNENSKGLNRMFYKFNIWFDKTVENYGATVRKTIKHA +PLMFILLGAIYIGTGLFSKYTSTSFLPNEDQGMVMAIAQLPPDASTQRTVKVLNQFGKIL +NHNKNVKRYFLAPGFSVLQGAQMSNFGTAFIRLTNWSKRKGKNSSIQAVIGQLMGASSQI +KGAKFMIIAPPPIRGLGRTNGFSFVLKQSTGSIQDLEKVQNKFLAALNKRPEIQMAYSTA +TFNYPDIRVTIDRVKAKKMGVSLSALDNTIQTFLGGYYINDFTLFNRTFRVYAQADSSYR +ANINDLSEYYVRNNQGNMVPVSALVNITRGTSAPVITHYNMDRNVNISGNAAPGYSSGDV +IKVLRQVAQQVLPEGYSYEFSGTTLQEIEGGKTSTFIFILAIVFVFLFLSALYESFAVPF +AVLLAVPIGIFGAYLSLHIGGLSSSIYAQIGIITLIGLAAKNAILIVEYCKMKYESGVPL +VQAAVEAAKLRIRPILMTSLAFDLGVIPLMIATGAGANARINIGYTVFGGMLTATLLAIF +FIPLFYVTIIKIRDRKKKPELVKTED* +>PROKKA_00143 Multidrug export protein AcrE precursor +MKSEVNGRVEDILFKEGGSVKKGQPLYTINKSLYQAAYDQAAAQLNIAETNWATDTTDAR +RYKNLWAHNAVDKIQLDHAIAKVNVAKASVIAAKANLESAKTNLDHATVRAPFSGSTDVS +KVRLGDVVVAYQTPLVTIVDNSNMNADFFITENDYLQLGSSDKSIKEKLSHFRLVLPNGK +LYPYKGKLYAVDNRVDPTTGTLMVRLKFPNPEDLLKSGMNCVVRSTQNSAGKVVVIPQQA +VTQLLNEFFVYTVNSKGIVSQQKVELGAEYGNMQVIKSGLKPGTKVIVEGIESVRPGAKV +KTVPMKTGGMAKQSKPE* +>PROKKA_00144 hypothetical protein +MEFKFEKLIIWQKAMEFGEEINSIAHKFPKDEVYNLSSQIRRAVDSIALNISEGSIGQSN +LEFKKFMSYAIRSLAEVVSCLHKAKRRNYITEDEFKKQYEFAYNLMNMMVAFREKIK* +>PROKKA_00145 Outer membrane protein TolC precursor +MLLKKLNQNKFFSAFLAVSFLLSGFSVSLKAQGQKAVYQFNLNDCIHYALQNQASVKNKI +LSEKISRENVKEAYSKLMPQVSAGAKYQYTIKRQVSFIQGNPVLFGVPHQLQGYLNVDQT +LFDPSVLGSAKAAHLSENLSKENTQLSKIDVAANVKKAFYGVLVFREQLNLLNANIKRDT +KSLADTKNQYKNGLAQKVDVDRIQVLVNNDVTARANASRNLNTLIQTLKYHMGMPIKDSL +VIKGTISDAMLTEILPENNPMFYKNRVEFQQAQTTLAATKLLKSNVIRSYFPTLSAFYTL +EAPYNSNTFPGLFKDKLYPTSFVGLQLSIPIFSGFNKHYQYQAAKMNIQISKNNISDLEN +NIKLEYGNYFRQYKSDIANLKTQKENTKLAKLNYDNLKYQYDNGVQPLIEVLNAETTLLQ +AQDNYINALYQALVDKVDLDKSLGKLKY* +>PROKKA_00146 High-affinity nickel-transport protein NixA +VVLLLSLAIIIAFRKFSSNIHFLENIGGVLGTVVSASFLTLIGIINFFILKNLYRMFKLY +KKGEGAEKRIEEITENLLNKRGLLNRFFRFAYRSIDKSFKMYPLGFLFGLGFDTATEVAI +LGISATVAKDSQLPIWGILAFPLLFAAGMSLMDSLDGLIMMRIYDWAMVDAVRKVFFNMV +ITGTSVFVALAIGTIEWLQVVSIEAKESLSFFSFLNHLDFSVLGVGVVIIMLISWLSAFV +YYRKVLS* +>PROKKA_00147 Polar-differentiation response regulator DivK +MDAGEEKASPKKTILIAEDDETSFFFLKFVLAKENVNILYAQSGQEAVDICEAHPEIDLI +LMDIKMAGMSGIEATQLIKKRNPRVPVIAQTAFALSSDKENILKAGCDDYITKPIRKEEL +LEKVNFFLYSKKES* +>PROKKA_00148 manganese efflux pump MntP +MLAISIVILLILSFQVMPVALGIDSHKMNVFHVASSIFLLILGQVLLFLLGILLGDKFMY +LMSGFKRFVLFIGFFIIATRMIMEALEIRKGKRTYLLDKAKQFILPSIAQAINTFLAGIL +FQLLIFNLSKDLIYLGIFALAFSVPFIFIKNEKQSMLAVSLLYMVGGGILSILSFYFLFI +* +>PROKKA_00149 6-carboxy-5,6,7,8-tetrahydropterin synthase +MKKHLNINEPTRIRITKEFKFEMAHALKGYDGLCRNIHGHSYELMVTVSGFPIEEENHPK +LGMVMDFGDLKKIVKEEIVGQFDHALVLSKKMPVPLVDELKNQFERIILTDYNPTSEMML +IDFAARLKARLPENITLKHMLLRETVTSYAEWFAEDQD* +>PROKKA_00150 Gram-negative bacterial tonB protein +MYKKLLLLSLTFFMFTSFTNIASAQSNNIEIARLKQIHKLLDYRFVGGFYGFEKLFFQTV +SYPDEARQNCTLGIMIASFTVNCDGDLVGIRIRNSLGKPLDNQVSKFLKATKGHWNPCQD +KKFTHFEIPIQFTLKGTETDSTAAALVYVGKSAGYSCYPDSYYSVPRLFPGGIL* +>PROKKA_00151 hypothetical protein +MGKIVGIRFKKGGKVYDFDAGHFVLSVGDMVIVETEQGQALGEVVRPPVSHVLPELAPKN +RCCEGCEDSGDEPAQLKQVYRLATEEDLRQLVENAKLEKEAFRYCQERIAARRMDMNLVK +VECFFDRSKLMFYFTAEGRQDFRELVRDLVSRFRTRIEMRQIGVRHEAKLLGGLGSCGRE +LCCATFLRDFEPVSVKMAKEQNLSLNPTKISGLCGRLMCCLTYEFETYKGLKQGMPKLGK +RVSLNSGLEGKVIRQNVLKRQLTVILSDGREFTGTPEELEQLEPLAKPQAPPKPRGGQRQ +QRNQQQGKGQQQTNSGGKSRSRNRRRKKKGS* +>PROKKA_00152 Bifunctional transcriptional activator/DNA repair enzyme AdaA +LHACRSTLEDPTRKQPRNTSTAKAYLDEHFQHKLTLETLAGVAHLSVRQLNELFRRQIGM +TPHHYLTEVRMQQAWQLLEGTDLSVQAVAERVGYSSLAAFSDRFHQHFGHPPSHFRRTGK +TLRQNR* +>PROKKA_00153 Aromatic amino acid exporter YddG +MSSPVRTATFIGAISVVLWGTLALLTKLTGGRIPPFQLMSMTFGIAFLLMAVRWWSRGES +GLGYIRQPFPAWLLGVGGLFGYHLAYFKAMTLAPAVDVSLIAYLWPLFIVLLSALLPGHS +LRAQHLVGAVLALAGCWLLVGRNSQGFDWTYADGYLVAFGCSLIWSSYSVLSRLVRSVPT +DAVGWFCGVTALLALGCHLLWETTVWPVGTLQWLGVIGLGLGPVGIAFFTWDHGVKYGNL +PLLGTLAYSAPLISVVLLLLAGFGQASGMLFLASALIVAGSFVAGRAKHASPELAEEPVP +E* +>PROKKA_00154 hypothetical protein +MKDIQLESTIICPACGHQKTEQMPTDACQYFYECESCHTLLKPRAGDCCVFCSFGTNPCP +PVQQGDDCCASD* +>PROKKA_00155 hypothetical protein +MNKGTVLRVARPTDQLEKIAQMYMEGLGFERLGEFREHDGFDGVMLGLRSHAYHLEFTQC +QHEKAGRAPTQDHLLAFYIPDAVEWVRTCEAMVKAGFVCKPSFNPYWDRLGKTFEDVDGY +RVVIQKEQWLD* +>PROKKA_00156 Cyclopropane-fatty-acyl-phospholipid synthase +MSHTSSDHSIAFPLRQKSKLRHLAQELLAKADIRIDGDRPWDMRILKEGVLERILGEGSL +GLGESYMDGEWDAERVDEFVYHLIRAQLDREVRPWNLILHGLRYRLFNMQSLRRAWMIGQ +RHYDLGNDLYEAMLDPLMTYSCGYWKTATHLAAAQEAKLELICRKLQLKPGMRLLDIGCG +WGSLMAYAAQHYGVECVGVTVSEEQVKWARHQYKGLPVEFRLQDYRTLDEQFDCIASVGM +FEHVGHKNYREFMQVAHRCLDDGGLFLLHSIGNNSRDSGSDPWIDKYIFPNGELPSVGQI +GDAADDLFVIEDLHNFGADYDKTLMAWHANFEAAWPKLAYLGERFRRMWTYYLLSCAGTF +RARDIQLWQWVLSKRGVQSGYIRPYF* +>PROKKA_00157 hypothetical protein +MQTLNIPSLATVTAITLLTTACANNPTPETLQSRGLHPLDTTQLHQLYSKTLQFDWRNAR +SRSGSGEYQPNGEISIEWSGESFNGKWRILNNHFCATYASIHNGQEQCYMVYQTGARRYV +AFLNGDYSYSFNVKKVK* +>PROKKA_00158 hypothetical protein +MLSRAIMAPMDLVLDTAEAQRMALFFDHILIWKLSRRTFNKEDNQRYSSELRYLRERGVA +LLCGLDIPNLISFGRADGTTWNPMEEMKKDCDLLLPFQVGTGVPDQAENEAHADRLIRHL +SSRLMYNDKPVVAHAEAVNLNTQGNELNALEITINNIPMPPENIPWEDLIQFRNEEETVA +KLRALRIWLKDRSSAGQSPREIQEELEHLLYEYRKYMEIQHKKFRQGILSTLISSTPEIV +ASVATLNFGAAIKSVFDIKGRYLGLSEAELSAPGREVSYIAKARDFLTS* +>PROKKA_00159 hypothetical protein +MQDLPVAVKDEEAAHPIAASCRPMLRKVVSAFVRGDYQLSQLIEGVSPVPPDVAAHIQDY +IAGYGETLVELPEETWSTSCVQWMWSYWDVLVDLYTEREGASDLVLTRRMDEVEGKPQFT +VGLVYVPKPQPNHAFQFVPWLTATPPDVLKRAAEFRGRL* +>PROKKA_00160 hypothetical protein +MNTQKLINRALSGLDDLGYQLDSMGITSKVNRHNVIAYVMAEQKHWEGEYDSLVARIDQQ +RFRVEQIVGRVEGLVRGGAEFALKPVSGLRSLVKA* +>PROKKA_00161 Sensor histidine kinase YycG +VKIHSQLRNSTFQIALLYMVVFATSVFLLLAFIYWRTAGFMTAQTDETIEAEITGLAEQY +RSRGINGLIAIVRERVARDPNGKSLYLFTTSDYDKLAGNLSAWPQNVQASNGWINFTLND +SVGWRGEPHLARARVFKVQGGLRLLVGRDVQELTTLKHLIERAIDWGMGITLALSLFGGF +MISRSTAKRIEVINQTARKIMNGHLSLRIPARGTGDDFDQLADNLNQMLDRIVHLMEGIR +HVSDSIAHDLRTPLTRLRTQLENTLLTVEGDAARDQVARAVAETDQLLATFNALLRIARL +EMTGHSADKSPVQLGPLVHDACELYEALAEDKEQEFVLDIPQDVTIEGDRDLIFQVVSNL +IDNAIKYTPPEGNIRVLVTQEEDDAIFQVEDSGIGVPDSEKDKVFERFYRVAKSRSQPGN +GLGLSLVSAVVDMHQGRIELADRYTDGRENPGLKVTLRFPRLKPNRRKEIKPTSTTEPEG +SAS* +>PROKKA_00162 Transcriptional regulatory protein CusR +MFGITATEAQSEYEPTEEPQGETKLRVLVIEDDQDVAAYLIKGLKESDYVVDHAADGKTG +LLMAAGEDYDMMIVDRMLPGMDGLNIIKTVRATGNTTPVLILSALGDVDDRVEGLRGGGD +DYLTKPFSFTELLARMEVLVRRTRSSNEPETVLKVADLEMDLLARTVKRAGQSIDVQPRE +FRLLEYLMRHAGQVVTRTMLLEKVWDYHFDPQTNVIDVHISRLRAKIDKGFEKPLLQTVR +GAGYMLREDT* +>PROKKA_00163 Hydroxypyruvate isomerase +MPRLAANLSLLFNEVPFLERFEQAARAGFRAVECQFPYAWAPEAIAAQLQGQGLQQVLFN +LPAGDWDGGERGIACLPGREADFREGVERALRYAEIMKCRQINCLAGPLPTGAPPEPYWA +TFEANLRWAAPRLAEQDITLLIEAINSKVDVPGFLLDHSKLALDLIDRLNLPNLKLQYDL +YHMQIMEGDLLRTLGANLPQIGHIQFADNPGRHEPGTGEINFRRIFEQLDAWGYEGWVAA +EYVPEVGTFDGLSCLKSWL* +>PROKKA_00164 2-hydroxy-3-oxopropionate reductase +MTDLPRIAFLGIGLMGRPMATNLINAGYPVTVWNRSPEKARALAGQAGVAESAAQAVAQA +DRIITMLENGDAVQQVLVEQGVAEAIQPGAVFLDMSSIAPEMAKSHAGRLKARGVGYIDA +PVSGGTVGAEQATLSIMAGGSHEDLEAVRPLLETLGRVTHIGPAGSGQLAKLANQAIVGI +TIGAVSEALLLAAKGGADPEAVREALLGGFAGSRILELHGQRMLARDFEPGAPSRIQLKD +MRMILDQARAEDLTLPLAQQAFQSYRALIALGEGECDHSALLLQLEHLNQTRMSDPSDGQ +ER* +>PROKKA_00165 RNA polymerase sigma factor RpoH +MGTSLQVMDKLIPGANVQAYIQGVNAIPMLTVEEERELAARLQQDNDLEAARRLVLSHLR +FVVHIARSYSGYGLAQADLIQEGNVGLMKAVKRFNPDYGVRLVSFAVHWIKAEIHEFILR +NWRIVKVATTKAQRKLFFNLRSAKKRLAWLNNDEVTAVAADLGVEPRVVREMEGRLAAQD +TAFDAPTDDDDDNAWQAPAYYLEDRRYDPAQQLEAADWTEDSNSRLLEAMDSLDERSQDI +LRERWLSESKSTLHELADKYGVSAERIRQLEKNAMKKIRKMMGEESIA* +>PROKKA_00166 Cell division protein FtsX +VDESREKSGRRGAGVAENPTRELLDAYASHHRKIARDSLIRLLRNPIGSLMTWLVMGVAL +ALPLGLMLLLASAQSLGEGWSDSSRINLYLKQNVDETAAMNLQGKLRSRGDVRDVQLVTR +KQALAQLRKDSGLSAAFDYLNDNPLPNTLIVAPALQDPGAVQSLSQSLKQLPQVAEVQVD +LAWLKRLRAMIGLVVNAVWALGVLLALAVLLVVGNTIRLAIENRRDEIVVAKLVGGTDAF +VRRPFLYTGAWYGLGGSIVAIILVALFEAWLDGPVNRLASLYGSHFQLQGAGFGDFLLVI +MVGVLLGWMGSWLAVKRHLDAIEPR* +>PROKKA_00167 Cell division ATP-binding protein FtsE +MIRFEHVTKRYEGGHVALRDVSFALERGEMAFLTGHSGAGKSTLLKLIMLMERASEGQVV +IGGQVLDKLPRRRIPYIRRHIGVVFQNHQLLFDRTVYDNVALPLEVMGIAPREVGRRVRA +ALDKVGLLSKERMNPMELSGGEQQRVGIARAVVNKPPLLLADEPTGNLDPELSASIMHLF +EAFNQVGVTVLVASHDISLIRHLGHRVITLDGGRLAQGDRMPDEEALYG* +>PROKKA_00168 Signal recognition particle receptor FtsY +MTAEWISVGLLALLVLAFVIDIGLRLRKPPQKPQPPVAERPPAAPEQREAVPEAKAPPPR +PEAPAAEEKPAKAEPEAEVVEPEAEAPPAPPVAEEAPPVEAPPAETPAAEPEVEEAPVNW +FARIKQGLGRTRGNFSEGLSNLLKGQKAIDDELMEDIETLLLTADVGVTATTEIIDTLTE +KLERKQLKDGDALKQALREELHGILAPSTAPLNIDDGHKPYVILMVGVNGVGKTTTIGKL +ARRFQDQGKRVMLAAGDTFRAAAVEQLQVWGERNNVPVIAQQTGSDSASVIYDAVQSAQA +RGFDVVIADTAGRLQNKENLMSELEKVVRVMKKLDPEAPHEVMLVLDAGTGQNALSQAQI +FQQAVGVSGITLTKLDGTAKGGIIFAIARQLKLPIRYIGVGEQVGDLRPFQAEEFVEALF +DEPA* +>PROKKA_00169 Ribosomal RNA small subunit methyltransferase D +MPPTRRSPPRQQRVTAKKAGGLSRLRIIGGQWRSRQVPFPPVEGLRPTPDRVRETLFNWL +AGDIPASRCLDLFAGSGALGLEALSREARHLVFVDTASEVIRTLRENLRTLGCQQADVFQ +QDAEQFLQRPPATPYDVIFLDPPFRQGWLDKVIPLLQQPGWLKPGGWVYVEHEAELNARP +WPSHWHEHRQKEAGQVVYRLFHVADALKDTAEGVERAS* +>PROKKA_00170 Lipase 3 precursor +MHLWRIVWVLIVLVAVLGGVYFLFPGTIVNADKSFELWRAGLAVHDINVDDQHIHYVDSG +GQGRVVLMLHGFAADYYSWPRMARYMKAGYRVIAPDLPGFGQSSRIAADNYGISQQAQRM +HDFLRALNVDKVDIVGNSMGGWIAAEFAARFPAQTRTLTLIDTGGITAPHPSPFMQAVEK +GENPLVVHNRAQFNHLLTIVFHHQPFIPGPLKGYFAKQAVEHAAFNEKVFKDLTDDYVDL +EPLLPKLTMPTLVMWGRYDQILDPSCVEVLKAGLPNATIKWFDTGHAPMLEQPKASAEVL +KAFLQANRGD* +>PROKKA_00171 Fatty acid desaturase +MWLYGLFHHLPAWQLILIALAMTHVTIVSVTVYLHRHSAHNSVDLNPVVAHFFRLWLWLT +TGMVTKEWTAIHRKHHATCETEEDPHSPVVKGFSEIMWRGAENYRAAISDEICERYGQRT +PEDWVERNVYSRYRLGGVALMAVIDLLLFGVNGIWIWAVQMMWIPIFAAGVINGIGHFWG +YRNFECADNARNIVPWGILIGGEELHNNHHTFPNSSKLSRRWWELDIGWGYIRLLQLFGL +AKPKGYRPIAHQIPGKMDMDVETVQAIANNRFHVMRLYRKRVLEPVLRQQRSVVEKDIKP +LYRRVRKLVFREESLIKPQERQSLEQVLQNSAVVRLIYEKSHELQAIWQRRPGMRPQDKL +NALVEWCHQAEESGVRYLEEFAATLRSYSLRPQTA* +>PROKKA_00172 hypothetical protein +MENINRITTGNCSIDPIDNTEPQAYQLRVF* +>PROKKA_00173 Formamidopyrimidine-DNA glycosylase +VPELPEVETTRRGIEPHLVGHTVTQLQVRESRLRWPVPDKLDQMLPGQKVGQVARRGKYL +LVHLERGTLLVHLGMSGSLRVVTRAEALRKHDHIDLTTDAGTIIRFNDPRRFGAWLWTED +WQHHPLLASLGPEPLSPAFSGHYLHRQSRRRKAPIKQFIMDSHMVVGVGNIYANEALFIS +GIDPRRPAGRISAARMEALVLAIQQVLENAIAVGGTTLRDFVNSEGQPGYFRQSLQVYGR +EGQPCRRCGKPLRQLRLGQRSTVFCGHCQR* +>PROKKA_00174 hypothetical protein +MRPKLSHALAAILAACVMSLAPMTQAETTQSTPSALAMTGDALFARPALLAMTLVGSAVY +VVSLPFSLLGGNASEAGKVLVVDPAKATFTRCLGCTMNQNRQNEQKNQNQVATADNTDTT +SN* +>PROKKA_00175 GMP/IMP nucleotidase YrfG +VLACFYFFADSPMLNWSQIDTALLDMDGTLLDLHFDSHFWLEHLPRRYAELKHLDPEHAR +QSLLSKIEQLRGKLDWYCIDFWSDLLDLDVVALKRETRDRIAWRPHSKAFLERLRACGIR +RVLVTNSHPDGLNLKIETTGIDQHLDRLFSSHSFGQPKEGPDFWEQLAQQEPFDPERTLL +IDDSLPVLESARRYGIRHLLAILSPDSQQPPRQPSHHPCVHDFDELFQSLDQFAHQKNRI +DGLSD* +>PROKKA_00176 Heat shock protein 15 +MTAHSDQQQQEARIRLDKWLWAARFYKTRTLAKEAIEGGKVHYNGQRTKPGKVVELGARI +RLKQGWAEKEVVIQGLSDRRGGAPQARELYQETDDSQQRREDEHWQRKMMQAAQMPPARR +PNKKQRRELQRLKSGQG* +>PROKKA_00177 33 kDa chaperonin +MTGQDQLQRFLFENSNIRGSIVRLDDTFQQATGQQDYPTVVRNLVGQSLAACALMGDSLK +FQGSLSLQAQGEGPLRLLVSDSTDQLTLRGLAHWNPEAAEAETLPALIGNGHLVITITPD +AGQRYQGIVPLEQDTLAGCLEDYFRLSEQLATFMCLFADEKGAAGLLLQQLPGELAGPDT +DLWPRAIKLAQTLTTEEALQLPSEELIHRLYHQEQVRLFPARATRFGCSCSRERTRLALE +SLGQDDCMALLDEQEVIEIDCHFCGQRYRYDRADVRAVFGGPRLH* +>PROKKA_00178 Phosphoenolpyruvate carboxykinase [ATP] +VSNTYTDLSSARLVELALERNEGKLAANGALVVNTGRRTGRSPMDRFIVEDPATAELIHW +GPVNRPFDAAKFDALWERVESHLEERDQFVSYVHVGADPEHYLPVKMTTETAWQNLFGRN +LFIRPDNYNPIDKGEWQILNAAGFVCEPERDGTNSDGCVILNFAERKVLIAGMRYAGEMK +KAMFSVQNFLLPEQDVLPMHCSANVGEDGDTCLFFGLSGTGKTTLSADEDRYLIGDDEHG +WGRGTVFNLEGGCYAKCINLSKKNEPIIWDAIRFGAIVENVVIDNDSREPDYDDVSLTEN +SRCAYPLEHVEKRVLENRGGEPRAVIFLTCDMTGVLPPVSILNKEGAAYHFLSGYTALVG +STEMGSSAKLRSTFSTCFGAPFFPRPAGVYANLLMKRMEEFGSRVYLVNTGWTGGPYGVG +KRFSIPTTRAIIRGIQTGALENVQTQHLDDLNLDVPVEVPGVDSNLLNPRNTWQDKEAYH +HKAQELIAQFVENFKKFDVSDAIVNAGPKLKD* +>PROKKA_00179 DctM-like transporters +MSEAIEQSSMYLQKRTIGGRSAREWFSSLPACILLMAVVLFTTSSDIHNKALQLGQVLWS +GYYKLRVDPVKPDCNPNVNVDAQVKRQIAAQAAQQDSMLGSLVGSSPVNPAAVRQSVINA +KQACEAQFADYNATKGRITEGVRVYRSVELFISDVVAFGLASQRYILALLVLVCAATATF +SRHHIAMRGMETRLDHIVSHFMQFIANTMLLISSFMYRQMSHNSGAVVTTGQEISHDIWI +AGFLLLTIVSLVQLFRVPEDAEEGGTLGHAFLCVPLYTTMCLISGTFFAFVGSPAGIGIY +LDKMMELADQFLNVGLYVWAGMMLKQTRLASLVFNVLRPLKLPPELLAVVAVMVAAVPTA +YTGASGIFVIAAGAVIYSEMRKAGARRQLALASTAMSGSLGVVLNPCLLVVVIAYLNREV +TTDSLFHWGGWVFLLTSTLFLITSLVVNRQKGFKVAPMNEALPEMVMRLKPLIPYVLVIA +GVVFFYWLLLGVTMNEFSAPRILPIIMVGILVYEHVHFRGDRNKVSGEVDHQGLEKSLRT +ATSETTAEIGALLLLFGLSVSIGGVIERSQVMSLFPQALPSPWLAMMLMVVILVILGMIM +DPFGAVILVSATIADLAYQSGIAPVHFWMVTLVAFELGYLSPPVALNHLLTRQVVGESEM +NLSYRESGSFYQRHERVLMPLLVMGSALLIVAFVPLLFYAR* +>PROKKA_00180 hypothetical protein +VKPSGKRFSLKTWSLASLTLAGMLALQPAANAGSLPKRSFCVFDPVGANGPLFNLMKSTK +PAALDWGVDLQMRAYTDEKIAAEDFKGGQCDSVLLTGTRAREFNKFTGTLEALGAITSNK +EERVLMDTLNQPKAAKLLTNGDYEVAGILPAGAVYLFTRNRNIDTVNKLQGKKIATLSYD +RASLTMVRHVGASVVGASSASFAGLFNNGSVDLAYAPAVAYTPLELYKGLSHDGGVLQYP +LAQMNFQIILHKSRFPKGYANHVREYAREHLNQAFSIINKATDEIKKKYWMYPTDKQTAS +YDQMLQSVRLSLRDKGVYDAKALKLMKIIRCKVQPSRGECSQNAE* +>PROKKA_00181 hypothetical protein +MRWLSHSAFLFSHGRSAVKALSLLLLFSLGGCSAVNNMMYKTTGEVMVGYAKAHAVPYVL +SSDDLGMSCAMSEALTPLLMSFGQVTAKPDQLGVMMQMSAGTCAEEKGWNAELAYMKELR +NQHPQNAEDDMIVEKRHYIEAADRYYSAWKHLVAYYGDPSTGQCPTFKNDEGQFIYMAGL +LAGVQALAAEIQSTSDEGVPKNIGSTVAQASGCLSDDKWWGVPMALRATVWSMIPGAKPD +GENPFQRLDESDKKANKARVRLAYVLHVIAAWNKGDTKLVKKLIREQQAQEAKYPADPRW +KMIDKLSTLYLRSISDRMWVEHTGHRTPIGGLGTFWDDSKGSGEVIDLDSVM* +>PROKKA_00182 DNA-binding protein HU +LENSTQNALHKPELAGKIADQTQLTRAQAHEVITAFTDQVSAAMARGETVALAGFGSFNV +RERQARTGRNPRTGEALQIPAHKTVGFRPGKAFREAIE* +>PROKKA_00183 hypothetical protein +MSFLIFVALLVALWLLWTISRNTADALDKQTAVQYEIIALEKRMEELSEALKAQSGETAK +PARRSSSSRSKKEDEKEKKEGE* +>PROKKA_00184 Dihydroorotase +MKLLIQNGQLLDSRTGQVRSGAVLIEDQKIVAVGEQVLDQAADRVFDADGAWISPGFIDL +CCFVREPGDDQKGTLASETRAAAHGGFTTVCASPESSPVNDSGAVTTLILERARKQGCVR +VLPVGALTRGLQGELLSDMASLARAGCVALSNGSLSRGNARVLRRCMAYAKTFGLTLFMR +PENPDLAADGYAHEGVVATRLGLPGIPEIAETIAVGELIQLAEDTGVRLHLSQLSAARSV +ALLRSARERGVPVTADVAIQQLAFNEGWLADFDSRFHCRPPLRTEADRQGLLAAVNEGWI +DAIVSQHQPHDPAAKQAPFGETEPGLSTVESLLGLGLKLVNAGELELPRFLQALTLGPAQ +VLNLPEPRLEAGSRADLTLFNPNGQWIPAPETLLSAGKHAPVLDQPLPGRVMLTLSRGKV +AYADPQTEFGL* +>PROKKA_00185 Aspartate carbamoyltransferase +MIDTCEAARQLQLNAAGSLRHFLTLDGLDRPLLTEILDTADSFIEVGERRIKKVPLLRGR +TVVNLFFEASTRTRSTFELAAKRLSADVLNLNISTSAASKGESLSDTLLNLEAMASDMFV +VRHAQSGAPHFIARHVTPGVGIVNAGDGRHAHPTQAMLDMLTIRQHKGGFEGRVVAIVGD +ILHSRVARSQIRALEILGADEIRVIGPNTLLPRDVESLGVKVFNDMQRGLKDVDVVIMLR +LQNERMEGALLPGEREFYRLYGLTTEKLRYAKPDAIVMHPGPINRGVEIESAVADSPRSV +ILNQVTNGIAVRMAVMSMVMSGQLAQLNQGDAAQEQSRTL* +>PROKKA_00186 Bifunctional protein PyrR +MTAQINVERLLETMCSQLEQTLEARGAVNPVLTGIRTGGVWLADYLHKRLRLEEPLGELD +ISFYRDDFSRIGLNPRVKPSNLPFATEDRHIILVDDVIMSGRTIRAAMNELFDYGRPASI +ILVTLLDLGARELPIQPDIVGQQMQLQRDQRVKLMGPDPLRVELRENVRENPAKDASTKS +H* +>PROKKA_00187 Putative Holliday junction resolvase +MPDLKPEGQRQVMAFDFGLRRIGVAVGQEMLGTASPVTMIGARDGIPRWDEVEALIADWK +PDFFVVGLPLNMDGSESEMCRRARKFARRLHGMYHRDYAMMDERLTSFAAKSAIVEREGG +RDFGVKGVDDLAAVLILEGWFLQQRDQQPKIPTP* +>PROKKA_00188 hypothetical protein +MSEANPYIRNQFLIAMPYMQDPNFNGTLTYICDHNDQGALGLVVNRPLDFSLGEILEQLD +IECGHLDVPVYSGGPVKVERGFVLHRSRGEWQSTLEISGDLSVTTSRDVLEAIAEETGPE +DYLVALGYAGWGAGQLEQELAGNFWLTCPADPDILFNVPWQQRLPAALARLGIDWSQLSD +SVGHA* +>PROKKA_00189 transport protein TonB +MAASAVRVSDTDRLTFTLFLALVLHAIVVLGVTFTAHKPQPSARTLDITLAQRDDQKAPK +HADYLAQTNQKGSGTLSKKAQITTRHRAPINASQVHKVKPIPRSQPQHSQAKPEKRHVVT +TVSQQATQQVDSDDKEQKAHQKHNHKSLMSRALEIASLEAKLDQETQRYAKRPRVLRVTA +ASTLKSTDAWYVQAWVNKVTRIGNLNYPEAARRRGIHGTLRLLVDILPNGHVKDIQVLQS +SGYKVLDQAAMRIVRLAAPFAPFPPELRKRKDVLEIIRDWSFEPRGLSTNG* +>PROKKA_00190 Glutathione synthetase +MSVKLGIVMDPIGAIHYKKDTSLAMLLAAQRRGWELHYMEMQDLYLRDGEPRARTQALTV +AANPDDWYSLGEPSDRALASLDVILMRKDPPVDKEFLVTTWMLEAAERLGTLVVNPPQAL +RDCNEKLFATWFPQCTPPLVVSRDAARLRAFHAEHGDVVLKPLDEMGGRSIFRVREDGDN +LGVIIETLTKDGSHQIMAQKYLPEITQGDKRILLVDGEPVPYALARIPSQGEHRGNLAAG +GRGEGRLLTDRDRWIVEQVQPMVREKGLLFVGLDVIGDYLTEINVTSPTCVRELDREYDL +DISDQLMQVIADRLARR* +>PROKKA_00191 Alkaline phosphatase synthesis transcriptional regulatory protein PhoP +MEDNFENLKIMVIDDSKTIRRTAETLLKKVGCEVITATDGFDALAKIADSHPDIIFVDIM +MPRLDGYQTCALIKNNSAFKSTPVIMLSSKDGLFDKAKGRIVGSDQYLTKPFSKDELLNT +IRQHIPSREST* +>PROKKA_00192 Response regulator PleD +MARILIVDDSPTEVKKISSLLEKHNHEVLTADNGADGVAKARAESPDLVLMDVVMPGLNG +FQATRQLTRSPDTADIPVVIVTTKDQETDRVWGTRQGAKGYLVKPVKEDELIKTIDDLLA +* +>PROKKA_00193 CheW-like domain protein +MSAQAAPFAVLSDIATRSRSQSRGLPAQEEAVELWNGIGFSLAGQLYVAPMGEVVEILHL +PRYTQVPGVRAFMVGVSNVRGRLLPLVDLGLFLDFPRSVV* +>PROKKA_00194 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00195 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00196 hypothetical protein +MSKPKYPFEKRLEVVNHYFTTDDGYRIISARFGVPRTQVRTWVALYEKHGEKGLIPKPKG +VSADPELRIKVVKAVIEQHMSLNQAAAHFMLAGSGSVARWLKVYEERGEAGLRALKIGTK +RNIAISVDPEKAASALELSKDRRIEDLERQVRFLETRLMYLKKLKALAHPTKK* +>PROKKA_00197 Integrase core domain protein +MNMVENMLDQAFKKLNPHEHPVLHSDQGWQYRMRRYQNILKEHGIKQSMSRKGNCLDNAV +VECFFGTLKSECFYLDEFSNISELKDAVTEYIEYYNSRRISLKLKGLTPIEYRNQTYMPR +V* +>PROKKA_00198 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00199 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00200 High-affinity branched-chain amino acid transport system permease protein LivH +VIAWVVLLALWLFVTRSRPGKAMLAASMSRTGLALVGYDIGKVYLQVWGLYGLLAGIAGV +LLASFTGASASIAISLTVNAFIIVVLGGLGNVAGSLGAAYIIGLLGTLTAYLISPSVREI +PGLLVLILILYVRPQGLFGRH* +>PROKKA_00201 leucine/isoleucine/valine transporter permease subunit +MASRIHWDWRAGLVLLALIVLAFLPFGVSGYILGVMTVAFYLAVYAMSWDLLFGYAGEVN +FGPTFLVGLGAYGAGLSNSVFNISVWPSVAIGTLAAVIGGLVLAGPALRLRGPYFGLVTL +VAVILLEKVIGLLSSYTGGEIGLTVMDVLTISQSGNYYYAFGFMVISAVILRIIARSSIG +LILEASGQDPVATEALGFNVTKFKFMAFTLSAFFSGLAGALTVFYLGSASPGTVVSVFVT +IQIIIATLVGGRRSIIGPILGAVFLIAAGEILRPLGQLSNAVVALIALLVVLFAPNGFIG +LFSRTGGAR* +>PROKKA_00202 High-affinity branched-chain amino acid transport ATP-binding protein LivF +MSVLKAQGLYKRFGGLQAVNNVSFSVDRGEVLGLIGPNGSGKSTTLSLLMGVTRPDRGSV +QLDGQEMAGWRTHRIAKQGLSMVFQHSRPLHRQTVLENIKLALLPDTLWQLFPPHTLDRR +AREIAERVGLHNVIDTLPGNLPFADLRRLEIAKALAQDPSVLLLDEPFAGLSPRETREFA +ELVHLFREEGRAVILVDHNVKEVAGLVDRIVAMHAGQVIAEGTPDEVTRDPKVREVYFGQ +SLENASGIHADGDRRSEGNGSEALLEIDLRSVRYGLAEALRDIQIQINQGECVSVVGING +AGKTTLFKSILDFQGYEGDVRWQGTSLTGQGPGQVASQGIALCPESRELFGFMTVRENLE +LGGHKLDRQAHESQMDRVFDLFPVLRQRQAQAAYTLSGGEQQQLTIGRALMQQPKLLILD +EPTLGLAPLVIENISEALHKLQQDSGMTLLLGEQNLTFALRHSQRIYLLETGNLRWHGPA +ERFIEEVGEDVL* +>PROKKA_00203 hypothetical protein +MFCNGNQGYDFSINSHPGHAHQMQRRSHATNSQSGPVP* +>PROKKA_00204 Putative alcohol dehydrogenase D +MQRTARAALCREWNGPIQVETIRVDPPRRNEITIKLRACGVCHSDLSAATGVIPFPPPLV +LGHEGAGTVIAVGEGVTDFQEGDHVVSSFIYMCGKCRQCSRGRPVLCEQAHKALHHLPDG +TVRTHDGDGNPLNVFGACGVMAEYATLHVNNAVKIDPDVPLERAALVGCAVMTGAGSVFN +TAQLEPGSTAAVFGVGGVGLNAIQGCAIAGARVIVAVDTNEEKLAMARQFGATHTVNARE +HDDAGKAVKKMTGGGDYAFECVGSGVTVAQAYGSLGRGGTAVVVGVADVKDKTTFRTLSL +PADERTLKGSWLGSARPQFDFPRLLGLYQGGRLKLDELVTHTYTIDEAPQAFEDLKAGRN +ARGVILFD* +>PROKKA_00205 3-succinoylsemialdehyde-pyridine dehydrogenase +MTIKSYDSVYFDGRWQPVDGERLSVYESGTGEVMASIPGAAPAVMQQAIDAAHNAFDSWS +RRPLKERLKYIEALHGQLVARAEEIATTISREVGMPLKLSRNIQAGLPIAITDSYLKLLP +DFPFEEKVGSSLVQYTPVGVVGCITPWNYPLHQVILKVVPALAAGCTVVLKPSEVSPLSA +FMLAEMFDAIDLPPGVFNLVSGLGHVVGDSLTGSNKVRMLSFTGSPGTGRRIFHAAAEDF +KRLALEMGGKSASVILPDADLATAVKGSVNNCYLNSGQTCIAWTRMLVPADKHDEACELA +VAAAKKLTLGDPLDENTRLGPLASKEQLERVRNYIRVGIEEGAKLMTGGPDAPAGLDKGY +FVEPTIFANVDPQSRIAQEEIFGPVLCIIPYRDEEEAIAIANGTPYGLSGGVWSADQDHA +IAVASRLRTGQVTVNGGAFNPEAPFGGFGASGLGREFGRWGLEEFLEVRSLQL* +>PROKKA_00206 hypothetical protein +MLRFIVALMLLFPLVAQAEDAIKPGQWKQTIHVTIPGSSVKIPPHSSTNCVKPEQAGSIK +SIIEEAQQPGCKLNEYSRSGNKVHWKMTCTGKSQASTEGVFTLQSKTSYHIHMNALMQTP +NGPYKTVVDSDGKWVGPCK* +>PROKKA_00207 Long-chain-fatty-acid--CoA ligase +MRAPDELINGRWASEITAALPARIHQGVAQHWEADPEAEALVDHQVRWSYRELSAAVAAA +RQWLVGQGVRPGDRLMLVSENGRALVALLLAASGLDAWAAIINARLADNEIDAIRDNCDP +RLLIYTTEVSPDARNHAQRHDADIVHLDPLGEFAVGPAAAQSLPQPVVKDGTQVAAMIYT +SGTTGQPKGVMLTHRAILFIARVSGGLRNLRPGRHVYGVLPSSHVFGLSSVMLGSLANGA +CLHTVPRFEAGALLDALAGERISVLQGVPAMYARTLEYLHQHNRKLVAPALDYLSAGGAP +LDTDLKTRVEATFGTTLHNGYGLTEASPTISQTRIGEDSEASSVGRILPGLDYEVVHLKS +RQPVLQGEVGELRVRGPSIMRGYFRKPEATRAVLDDAGWLDTGDLARIDPDGQLHIVGRA +KELIIRSGFNVYPPDVEAVLNEHPAVTLSAVVGRQITGNEEVVAYVQLAPGHDMTESALS +EFAAARLSAYKRPSEIHILDQLPVTPAGKILKARLRALANQTPDR* +>PROKKA_00208 Acyl-CoA dehydrogenase +VIRDKETLNQLIDTISRFVRERLVPSEEQVAQDDAIPEDILQEMKDMGLFGLSIPEEYGG +LGLTMEEEALVAMEIGRTSPAFRSIFGTNNGIGSQGILIDGTDEQKRRYIPRLATGELIS +SFCLTEPDVGSDAGSLRTTATRDGDHYVLNGTKRYITNGPEAGLFTVMARTDPDNKGAGG +ITAFIVEGDTPGLHRGRPDRKMGQKGAHTCDIIFDNCRVPAENIIGGREGVGFKTAMKVL +DRGRLHISGVCVGVAERVLDDALHFAMERTQFGKPIAEHQLIQALLADSKSEAYAGRCMV +LDAARRKDAGENVSTLASCAKLFCSEMVGRVADRAVQVHGGAGYMAEYAVERFYRDVRLF +RIYEGTSQIQQLVIARNMVREAD* +>PROKKA_00209 Pca regulon regulatory protein +VVDDNSDLGLDAALDPQNGAKDRKFVTALARGLEVLRAFRPGDGFLGNQEIARRTGLPKP +TVTRLTYTLTKLGYLSYSQRLERYSLGTGALALGYATLSTFGIRQIARPLMQELADDVDA +SVSLGARERLSMIYLENCRGSGAVTLRLDVGSRIPIATTAVGRAFLAALPEGERNYLMDH +IKRHAGNRWPPVRRGIERAIRQYQETGFVKTVGTWERDVNAVGVPLVQSDNGNIYAFNCG +GPSFVLPEERLDAELGPKLKQLVQNVEAALRRL* +>PROKKA_00210 Beta-ketoadipyl-CoA thiolase +MLDAYLYDGLRSPFGRHAGALSPLRPDDLLATVIQALIARSGFAKEQIEDIVIGCTNQAG +EDARNVARHAGLLAGLPVETAALTVNRLCGSGLAAVADAARMITCGEGELILAGGVESMS +RAPFVMAKAESAYSRQLRTFDSTIGARFPNPKVLAEFGSDTMPETADNVARDLGISREAA +DAYALQSQQRYEAARQDGFYREEVLPVEVPQGRKQPPRWVSADEHPRPDTDTAKLARLSP +LFEEGVVTAGNASGINDGAAALLIGSRSVGQRLDIKPRARILSAAAAGVPPRVMGLGPVP +AAQKALARAGLSLNDMDIIEINEAFAAQVLGCCQQLGIAGDDPRLNPNGGAIAVGHPLGA +SGARLTLTAMRQLERINGRYALVSLCIGVGQGVAAVIERM* +>PROKKA_00211 Acyl-CoA dehydrogenase +MKPFTWEDPLLLDLALDSDERMVRDSAHDYCQNKLMPRVLEANRHEVFHREIMNEMGELG +FLGPTIPEQYGGAGVNHVCYGLIAREVERVDSSYRSAMSVQSSLVMHPIYSFGSETVKQK +YLPKLASGEWIGCFGLTEPDHGSDPGSMITRAKKVDGGYRLSGAKTWITNSPLADVLVIW +AKLDDTITGFVLERGMEGLETPKIEGKFSLRASVTGQIMMDDVFVPEENRLDVTGLKGPF +SCLNKARYGISWGSMGAAEFCWHAARQYTLDRKQFNRPLAANQLIQKKLADMQTEITLGL +HGALRLGRLMDSGDWAPEMVSLLKRNNCGKALDIARTARDMHGGNGIADEYHVIRHVMNL +EAVNTYEGTHDVHALILGRAQTGLQAFTG* +>PROKKA_00212 Acetolactate synthase large subunit IlvG +MVERKHGGLIVAEHLQAAGISHLFALCGGHISPILVQAKALGIEVVDVRHEASAVFAADA +MARLTGRPGVAAVTAGPGVTNTITALKNAQMAQSPVVVIGGATPTVLKNRGSLQDIDQLA +LMKSLVKWQTSVGTLAQLDEAMRYALEVAAQGVPGPVFVEAPIDLLYPRDLVHSLYADQA +GLDKMKGPVGRLLRGGLDLYLLRQERQPALSVHPNLKTLTEPAAEWHAARQLGEVVKRLA +SAQRPALVLGSQVLVNRTAEQAKDIADAVERLGLPVWTGGMSRGLLGAEHDLLFRHHRGR +ALAEADLVIVCGFPLDFRLKYGRGFAKGATLVSVNLSLHDLLLNRKPTVPVLAHPGDFLQ +ALADRMSSRAAQWRAWLGELGKRENAREEEIDQQAAAPADKVNPLHFFRTLDRHLGEQDV +LVVDGGDFVATGAYTLKPRGPLAWLDPGVYGTLGVGGGFTLGAAAARPGSRIWLIYGDGS +SAYSLAEFDTYRRLGLAPIAIIGCDASWRQIAREQVEMLGDPVGTDLRDTDYHLVAEGYG +GHGILVEHNHQIDAALAEAIKLSDAGTAVCINLRLAVSEFRKGSISM* +>PROKKA_00213 Esterase EstB +MGRSDVIAGLSLERLDNIERHIDRKYLKPKRLPGTLTLVARRGQVAYVKAQGLMDVERNK +PVARDTIFRIYSMTKPVTSIAMMQLFEQGRFLLNDPVHKYIPAWKNLRVYQSGVYPQFLT +TPTLRAMTIRDLFTHMSGLTYGFMCRTNLDAAYRELKLDGGKEMTLDLLVERLSQLPLEF +SPGSAWNYSVATDVLGYLVQLLSDRPLDEYFREHIFDPLEMADTGFMVPESKRERFAACY +QFDPEQGYALQDDPADSHFTRPIKFLSGGGGLVSTVDDYYRFAQALNNGGQLNGARIIGR +KTLDFMTMNHLPGNQDLPGLSIGPFSETPYEGSGFGLGFSVKVDVAKSQTNGSVGEYGWG +GLASTNFLVDPVEDLIMVFMTQLIPSSTYPIRQELRSIINGAIVD* +>PROKKA_00214 Long-chain-fatty-acid--CoA ligase +MHGLMMNRQLLISQILEYAAVNYPEQEIVSRTTEGPIHRYRYPELRDRSCQLAHALAGLG +VTQDDRVATVAWNNYRHLEIYYAVSGMGAICHTINPRLPAEQFQFIVDHAQDQYLFVDLT +FVPLLEKLHPQLNSIKGYIIMTDEAHMPETGLPNAHCYETLIKDQPTRYDWPEFDENQAS +SLCYTSGTTGNPKGVLYSHRSTLLHAFSVMAFPGVDFGEESSLLPVVPMFHVNAWGMPYF +ALITGSKLVFPGPRLDGASLAELINSEGVTDAWGVPTVWLGLLRHMNESGERFSKLEHVQ +IGGSAAPRAMINEFQERYGVEAIQGWGMTEMSPVGSVSQPTPFMRERMSAEEQLTVRGKQ +GRALFGVEMKIVDADGKALPRDGKARGELLVRGPAITSGYYRNDEANAKAFDDEGWFRTG +DVATIDPDGYMEIVDRVKDVIKSGGEWISSIDLENEAVGHPEVAEAAVIGVRHSKWAERP +LLVVVRNPDSAVTAEAIVEYLSERVPKWWLPNDVVFVDELPHSATGKLQKTKLRDDFKDH +RFSDDEA* +>PROKKA_00215 Esterase YdiI +MTQNPDQKAPIWKRPATVEALNAHAKNTMVEHLAIEYLELGPDFLRARMPVDKRTHQPFG +LLHGGASVALAETLGSVGANLCIADPDKAGVGLEINANHIRSARSGWVYGTARPFHIGGA +TQVWEIRIQDEQDRLICISRITMAIVSAR* +>PROKKA_00216 1-deoxy-11-beta-hydroxypentalenate dehydrogenase +MKQMQNRVAVITGAASGFGLEFARVGAARGMKLVLADVQAEPLEQARAEMEAAGAEVLAM +LCDVRKSEQVQALADKTMERFGTVHLVFNNAGVGSGGLVWENTEQDWEWVLGVNLWGVIH +GVRIFTPLMLEAARRETDYEGHIVNTASMAGLLCPPTMAVYNVSKHAVVALSETLYQDLK +LVNAPISASVLCPYFVPTGISDSHRNRPAELQNDSGPTASQMIAQAMSQKAVSSGKVSAA +EVAQRTFEAIGEDRFYIYSHPEALGNVKHRMEDIVAGRNPGDPFAEAPQIGQMLRDKLQG +* +>PROKKA_00217 bifunctional 3-hydroxyacyl-CoA dehydrogenase/thioesterase +MGELVDVYRNSVQTWECDQMGHMNVQFYLDKADAGLLALTRMLGLNRRFLNERQARVRVL +ENHVRFLREQHAGSPLTLRAGLIDIRPDQLKLYFELTNPIQQAVAASFITQAVLESTAGK +DHLTLPQSALEKAQQYQIDWPRPEGPMGLESTPPRTPPTLQEADDLGMMPTYLGAVSAGM +CDADGHLAIRSYMGIVSDAVPHLLSRIRHDTREVPRPGGAALEYRWIYHQRPEQGDLVTL +RSAITHLGNKAYRLGHWLFDAETGHCLATTEAVAVMMDLDERKALVIPQTARASLEEMLV +KGFSI* +>PROKKA_00218 hypothetical protein +MKAGMVAPLDATQQSRLRVLLALFGLVWLINAGFQAVAWLAAPNASTHFIHALAKSTTVV +PRWVQPLLMTGLHSAQSLGLGIVAAIMVLLAILLGLALLTQRKVAFAARVGIIYSIICWI +FLDGFGFPYANGQTDPGVFVAYAIAFLFVLSVAPVFDREGTKAPEIDERLWHWARIAFGL +LWLFDAVLKWIPPFLLHFSSQITSVIPGQPHWIAAWLSFVAELVHAIGPIPVAVVVALAE +TAIAIGLLSGRWMRLVIPVGMLYSVAVWTTAEAFGGPYSTAGTGVRGNVLGNVLIYLIPF +LFLWVGNSSQRSAAETTGRTLTD* +>PROKKA_00219 Pca regulon regulatory protein +MVKKAPEVDVPSAKDRNFVTALARGLELLRAFGPEDDYLGNAELAERTGIPRPTVSRLTY +TLIELGYLRYCERLEKYRLGAGVLALGYRYLSRMGLRELARGPMQALADRTDCLVALGTA +DRLDMTYVETCQGAGPLVLRLEVGSRIPMATSAMGRAYLAALPDARRNEYREKIREVYTD +DYEAIWQGVEQGVEQYQKLGFCTALSDWNPHIAGVGVPLVLDGGSQIMAFNCGGAAMRLS +RSVLEKKLGPQLVEVVAEVQRQMHGRRLEAVS* +>PROKKA_00220 hypothetical protein +MQRRYLATLLAGLMAVPAVAVADSGSSSSMPDMNIKLHAHLHGSVDFSNTGGRAVPNTEA +YGEAAGTPARATNLSNNNSTIGFTGQHLVPGAFMAIFQVELALPGSETGVNNSYGKGNHV +SKNVGLHDTYFGIANPLGTLLFQPSFENQGAYLSRPFNMFKDTVGDFNSIIDTANFPNGG +PNGLPAISFAGQANYAISYASPKVKGFDAVLSYTEDANGGDFGTNNTYGSGYCAGTPNTN +HYPNCYGYPNANQHNNAWSFGVQYENEFSSLQSKVNGLINYSQINVQGNTSGVPLGGFTF +ADATQPNQTPPSSKLQLKALELAGKWDYEPTGTTAIAVWERSTGLYSRDAYSLGFSQAVP +GNNDLMVSWIHAGNLSSPLANICDPTKTVCSGSEVKQSGANEYVAGIKHHFDKQVSAYLI +YAYTRNNAEGLYGLGGPNHGQSVYPLNPGDNPQSLSLGMTWDF* +>PROKKA_00221 Long-chain-fatty-acid--CoA ligase +MFDRHHQVWPEFAPLHLTLPETSICYNLEVTAHRYPHKDAIIFYDRRISYGEFQRQVEIL +AGFLAREMGVEKGDRVLLYMQNSPQWMIAYYAILRANAVVVPVNPMNRRGELEHYASDTQ +ARVILCAQELFDQVSPLLGEEGLSRAVVAAYSEYLPDQTDLPLPDAVSEPARAINQSGVV +PWRQALAGEPAAPKALVGPEDHCVFPYTSGTTGAPKGCIHTHQSVMATLVGAVAWNPATA +DSVTLVSLPLFHVTGMQVSMNAPIFVGATMVIMTRWDRRVAGALIERYGVTEWRNIVTMV +IDFLSDPEARNYDLSSLRAIGGGGAAMPKAIAERLHEMTGLTYIEGYGLSETIAATHVNP +VDNPRAQCLGIPVFDVDCRVLDVASGQQQDVGEVGEIVINGPQVFKGYWNRPQATAEAFT +EVDGKSFFRTGDLGYYDEQGYFYLVDRVKRMINASGYKVWPAEVESMMYQHPAIRESCVI +SAPDERRGETVKAVVVLTDDAPADVTEAAIQQWCQDNMAAYKVPRIIEFRDSLPRSATGK +IQWRVLQEEERERAAG* +>PROKKA_00222 Phosphate transport system permease protein PstA +MQYSSDEISNSGLDRPRAGGYRLRLALSGLGWGSTALMFALLALALIAIIAFVVIRGGAH +VNWATLSQTTQGYHGLLNAIEGTLLVTVGSLLIAAPVGVVTGIYLSEYQHRRSARFFSFL +CDVMIGVPSIVLGMFGYIAMVNFFGWQFSLLAGCITLSFMIMPYIARTSELALLQVPNSV +REAAYALGAGDRVVIFRVVLASCVPQILNGLLFAAAISMGETAPLIYTLGWSNYMWGGEF +FHHPVGYLTYVIWSFISEPSSAAHQLAYVAALLTTGFALLINILARSTIRKQSQHQSQ* +>PROKKA_00223 Phosphate transport system permease protein PstC +MKTNRLFRWSITGIASVIPLALLAIFIFLLINSWPAIKFSGWHFLTGSQWSLGNEYGDLV +TVNGQEVPPGADYGIGFLIAGTLLSSFLALLIALPISVAASAFLAEAVPKRLQNTLALFV +ELLAGVPSVVFGLWGLVVLVPFMNHYIYPGLVHVLGDVPFFQPPTGAGYGLLTSSVVLAV +MIAPLITSTVRGAIERVPMVQREAGLALGATRFEVLWKTVLPSVRRVVIGAGILALGRAL +GETMAVLMVSGNALGYLPHNIYSPISTMAAFIVSQLDSALEDASGMATHALSEIALILFF +ITLIVNVIARLLLWLARD* +>PROKKA_00224 Phosphate-binding protein PstS 1 precursor +MKGKFSLKSTRTGSMMAKGALAAALLTAGMGVANASTTLQETGSSLLYPLFNQWIPAYSK +AHSDIQVNAASTGSGTGIAQSIAGNVQMGGSDAYLSGAMMKKHSDMLNIPVAISSQMVNY +NVPGLNDKHLKLSGPVLSRIYEGTVKYWDNKEIKAMNPGVDLPHHRIVPVHRSDGSGDTF +LFTQYLSFSHPYWHKKLGYGTTVNWPAVQGEIGATGNPGMVQALKDNPYSVAYIGVSYKG +QIDKDNLGEAMLKNKAGNFVLPNSTTVPAAAAAMVPKTPKDERISLIFAPGAKSYPIINY +EYVIMHANQGDLAAPLKQFLNWAVSPNGGNASQYLGAVNFMPLPKKAEELTKAQIAKIHS +* +>PROKKA_00225 Transposase DDE domain protein +MGTTFRPYSPDQELLLPPSLNEWLPEGHLAYFVSDVVEELDLSALYARYDGDGRRNSPFD +PRMMLKVLIYAYATGTFSSRKIARKLEEDVAFRVLAAGNFPRHRTICDFRKQHLAAFKAV +FIQVVRIAQEAELITLGTLAIDGTKVRANASKHKAMSYGRMQEEEKRLSKEVDELCRQAR +RTDEEEDQQFGPDQRGDELPEELQHRQARLDKIRAAKEKLEADQKERDKARGRSPDDDRR +SPRGGRNFKRDYGVPDDKDQSNFTDPQSRIMKTSDGFQQCYNGQLAVDGEFQLIVANHQG +SNPSDNGCLLPLLNDVKDTLGTYPRQCLADAGYRKEGDLQTLEVNGIDGYVSLRREGRKP +GEIDATRYPATARMAEKLATAAGRSVYGQRKHLVEAVNGWIKHVLGFRQFSLRGLNAVQG +EWDLVCLSLNLRRMSTLMRMV* +>PROKKA_00226 hypothetical protein +MRPGEEDKDPLGDFLASLERLSALPENIRVLPSHGLVFEGLHQRLKALQRHHELQIDRLL +ERCEHPQSARDVLSLMFRRPLDEHAILFAMGESIAHLHHLRLQGKLSQVEQAPFRYIRN* +>PROKKA_00227 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00228 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00229 ATPase MipZ +VASLDLDVRQASLTRYLENRAAFAARRGVSLPMPSHRRGNDPTAPDDVAEMIRAQAAGHD +VLLVDTPGRVDAVSMAAHVVADTIVTPVGESHLDLDLIGNVDRNQGRAIRPGPYAEFVWA +VRQERARAGRTPTDWVVCHNRRRAPQTRVGREVERTLADLSKRFAFRLVPGFSERTIFQE +LFPDGLTLLDLRESETGVGLTLSHVAARNEIRHLVDALDFR* +>PROKKA_00230 Unsaturated glucuronyl hydrolase +MITIDQTLTPKDLTEDLARFWTLSGQKIRQLASRWNAGDGAPVFTVEGRYTSRGWTEWTE +GFVYGSALLQFDATGEDWFLDYGRRGTREHMGGHVTHTGVHDHGFNCVSTYGNLRRLMDE +GKLPENADERAFLDMALASSGAVQAARWTNLGHGKGYIHSFNGAHSLFVDTMRSLRSLAV +AHMLGRDLKGEHDQSISLIERLAAHARTTAETAVFFGKGRDAYDEWGRTCHEAIFNVKDG +HFRCPNSQQGYSPFTTWTRGQAWITLGYAEQLEFFRALEAAGRPEADDCSDLMGIMRDGA +RATADHYIANTPTDGVCYWDTGAPGLAAMPDHKDRPADPFNEHEPVDSSASAIMAQGLLR +LAAVLAEEGDDSAERYRQAGLTVARALLKAPYLSEADGHEGLLLHTIYHRPNGWDHIPAG +RKVPCGESCQWGDYHVRELALMIGREAAGQEPYRFFNGLV* +>PROKKA_00231 Type I secretion system membrane fusion protein PrsE +MASPDIEYMSELRAAVSRRPTILANIILIAVLVFFILAILWASWAKIDQVSTGEGRVIPS +GHVQVVQNLEGGILSELYVAEGDHVKQGQVLMQLDDTQFSSDFMENRLKFLGLKAAVTRL +QAELEGTALKFPAEVEKQLPAVAQAERSLFEARRSEEDASLAKLQAQYQQKLHEVDETKA +KIEHLRTVIKLAKEEMAILEPLVKKGINAPIELIRLKREESQDAGDLAVARQQLLKLAAA +IDETKEAITEAKAQFRSRALKELNEAQVNMAALGQVVTSRDDRLRRTVIRAPVTGVVKQI +FLTTIGGVVRPGMDLMEIVPAEENLLVEAKIRPSDIAFMHPGLEATVRFTAYDYTIYGSL +KATLDQISADTIFDEAKKERFYMIRLRTKTNSLLDKTGKPLPIIPGMTVSVDVKTGRRTV +LQYLMKPFHKLSIRAFHER* +>PROKKA_00232 Toxin RTX-I translocation ATP-binding protein +MTADNASNIKVVHDPQTDEAPARPLGVDTPAGEAPGNDDRPAGPLEDALPGAEDDDSLLS +CLLFLVAYHGQPKSPSVLLSALPKPDGPIPVDLFKRAAARAGLSVQVVRRGLGRIHAWTL +PAVLLLRNRGAVVLTGKSDTGHFLVTAGDAGHGTTELSPEELERAYTGFAILVKPEVDLG +GERTGADLAKPRSWFWGTLAKNGWTYAQVGMASVVINIFAVANPLFTMNVYDRVLPNNAV +ESGIALAIGAATALLFDFILRNLRGWFIDFVGRRADVVLACRIFDQVLDLKASHRPQSSG +AFASMLREFETVRDFFTSATLAAFIDLPFSLFFLAVILMLSPEIGMVLGSAMALVLFWGM +FVQFPIAKSVKKLMVHGEHKHGVLVESLFGYETIKMVGAEGRMRAKWETVVGQSAAVGQR +MRLFNNLGVNFVQFVQQGTIIAMVVTGIFLVKDGTVTSGGLVASVILCGRALGPMAQVSQ +LMMRFHQTWTSLKSLDAVMRGPVERPPEANFLHRPRLRGQIEFQEVTFRYPGTDHDVLRD +ISFVIEPGERVGIVGRVGSGKSTIAKLLAGLYAPTSGTVLLDDTDLRHIEPSDARANVGF +VPQDVFLFKGSIKENIAISVPRATDDEITEVSQALGLHDFITQHPLGYDLEVGERGGGLS +GGQRQAVALARTLLKTPTILVLDEPTNSMDTGTEKKVVDTLARTTGGRTIILVTHRTSVL +SLVDRLIVMDAGRIVADGPKATILGGLAKGQVKTGK* +>PROKKA_00233 Outer membrane efflux protein BepC precursor +VKSFSDAGVAANRRNGHRNATPSGRLGHSLSWGSLLLSGLVVFGGPFMADAQAQQLREAV +EMAVQSHPMVASTEAEYRAAERSVDQVEAGFYPSLDLTADSGYQHARRVNESSIKENQWR +NKQRLAMTQMLYDGEGTANRAESAKASAQSAHFDVLSAATKIAQRAIRAYLDVARDRKLV +QYAVDNIDLHRRILADVEEAARSGGGSETRVTQVKTRLYNAQSQRRRAEGNLRNSISDFQ +EAIGETPETLEDYPMPTVAIPASVDEARDEALKNNPSFQAAVETERARTLTANAERSGYF +PQVDVEVAHEQRDGVDGVSGFETDSTALLTLSWNLYGGGADQAKVRRALEQSSAAMYRIH +EVERKIRRELEVALTDYEVARDQVALLRERAATAKEVTAAYREQFRLGQRTLIELLDSGN +ELFLARSDLTTAEYRQISAAYDFLAVRGTLLKDMGVKVATGKAPKAP* +>PROKKA_00234 Bifunctional hemolysin/adenylate cyclase precursor +VATLELIVERASGDVLKVELKPGTNVPAIHPGDKVQIVTPAGETLNAVVVGQDVQITPVD +ASGTVGETIVFKNLALYLHDGQSEVAVVNADTGQTTEITDVASLADLGTVPLQMASGEGT +GPVSPGTSSPFQNSDAIDHGGETAGNAAGTLGDILNRGAAGTDGGRAQLAGTGETGAGTG +TGSSTTDHVETPISESTEGGGSGTGTGTGTTTSGHAVDGYIVGATVFADANANGVLDSGE +ASTTTSYNGTFELSGSSGQLVMTGGVDQATGESFKGTLTAPAGSTVVTPLTTLIQSLVEA +GQSAADAQAAVKSALGLTGNSIDLTTANPVEDVENGVSGADDVLAAAIKIQNTVVQAASV +LQGAGGSTVAMSTATNAVFAQLATTLQNNPGSNPITDATAVQNLITGAANSSSLGLSSTA +KTQVGNAASDAASVIDAGNSHINGLSSTGSSLLTDLASAARVAQNGAAEALHDALNAVQG +TSNSANLSTATSSYTGANLTSEIGNASSGLGTVGTASSVGTSGDDTIQGTSGNDTLNGGA +GNDTISGGTGNDILIGGAGNDTLKGEAGNDTLDGGAGDDSLYGGAGTDKALFDGNFSGYQ +IATDSGSSGVITVTGSGTDTIDTTEVLKFKDLTVRMVGDSGGTANGYTSLSSALSAASVG +ERILILDSATDPSTLTLSKKVSVQKIGEDPLISIASDGALVVDGSQLSAVTTLDLSSLPG +TTTVRFTSLGSIASISTASTETLNLSASQLDGLTVSGSGKIQTSGIVATSADLSNLSSDL +SVASGQSLELTAAQASGKTIAGAGNVTVNALGSSAVDLSGITASGTLTANVPSSATLNTN +TDLGTFGVSVASGQTLTLSATLADGTTIGGDGNVTVTGLAAATDLSSVSASGTVTATVTS +TVDISSNTHLGSVDAYQVTGALTLTAAQVGDSTISGSGNVTVSGLAATTDLSGIASSLSL +TASVTSSIDISANTNLTTVDTYQVSSGQTLTLSAAQAAGHAISGGTVTISGNITANTDLT +DISSTLSFDDGDSGAISVASGTKLTVTPSQAAALQTAGQTITGDGTVLIDGNVTADTNLT +NISAAVDFNGNSVSVDSGHTLTLTATQASDTTISGAGAVALSGSDTNADLSSITADITVA +SGQTLSLSTAQLATLDSNAIPIGGDGTVSLTGNATSALNSDLSTYLGSSLNLAVPSGESL +SLTAIQANGLTMEVAGTANITGPAGTTAADFSNISFTGSGAATFTVGADLVISEANADFG +SVSINIPAGRTLTIDAADASGVTISGDGTLAVTGTLDSSVNMSNWGTGAIDLTNVSASNF +SLTQLDLNGSADYHLTYAQVQALSDGIDGNNSDNTLIIDVSTAGGVTYTNNAATIDLDIS +LLGGADRVKFDFGGTTDSGNTLTIRGPLGFGDGSDTLESRHGTISLTDPGLTLSGGPEAL +VANSGFSLTATLFDLLYGSSGVTLQGEGTYIVSIDAGFASGASPTLDLTVLDNFVPAGGV +LPTLQIVATGYSVVGGDDGTGDGIATLSDGTHTITIKLPDDPDNAGTFNPGNTPVIIEID +NGTSQFFLGGLDDQRAYYESETTVYTGSQFADLATAIATDASSLGINAGDIQTIKLGDSV +ILDSDSAVDLSAFGGVIDYNGQTIQVTSGDSLTLTAAAADGGTISGAGSVTITDLGTTAV +DFSGLTAASVTLAISSGTVDLSAISGLDLGNVGITVTTPGAVTLTAAQADGLSISGNGDV +TLTGLGASEVDLSGLTATNATAEISADLTLSSLTNLGSLDLTLDDGVTLTLSRAQLDGRD +ISLASGSATLAFGGNASGLDLSQIASGIAFEVVAGRTLTLSTAQINDGHTITGAGSLVVV +LSGTEVDLASTAIQVTGTRTAVVSTSATLDGNTDLGDFSVSILSGQTLTLSTAQAAEHAI +SGAGNVTVTGIGSTAVDLSTITVTGTKTVSLSADATLDPDTNLGNFAIDTAGHSLQLTVA +QANGLSITGSGTAVVTGLGSDTVDLSGITATASTTVSDTVALAAGTNLGSVAVTVDGTGS +LTLSASQADGHTISGTGAVTVTGLTAGIDLSSLASSLDVTATVPGTVDITGNTAQLATVD +TYEVAGALTLSASQASGHTIEGSGSIVVSGLDGSAAYDLSGITASASTTVSGTVTLDSGS +NLGTVAVSVTGSLTLAAAQATGHTLSGDGSVTVTGLAATTNLSGLASTLSLTAAVTGTVD +ISSNTLLGTVDSYTVSQSTDALSLTAAQASGHAISGSGTVAISGLGAAVVDLSGIAATVD +ATATLSGTEVILTAGTDLGTVALSVGSGQTLTLGAAQASGHAISGGGDVVVNGLSASTDL +STVTVTGTVTATVSADLDISASSQLGSVDTFQVVAGKTLTLTATQANGQAVEGGGNVTVT +GLAAATDLSGITATGTLTATVTADTDITANTHLGAVDSFQVASSTTLTLTADQADGQAVT +GTGNVTITGLGTTAVDLSAIAATGTLSAAISGDITLAGDTNLGDVTLTVGAGNTLTLTAA +QADGNTITGTGAVVITGDVAGYDLTHIAGTLDLTLPVTGDVLTLTDGETVHLTVAEANAY +DSITGDGTIQLSGNATANFDHLTSILGDGVSLAVADGDTLYLTATQASGVTIGGIGTVDA +SGTVTDGDFSGISADLNLTDATLDGTTTLPTVGAGHTLSMTSDQINAASIALADSTATLH +VAVSFDALSSSNDALPEIDISEIRVDGSNSPEAVWNSVDVASGSIVDKFKLFWISADKQY +YDSTPLGQDVDANRAFVELGNLYAAYLAGADGELGTADDGTPILDVVQTKSGGVADYDAR +QQSLHDNLLGNLSDGAIAGRFGTDDPRSDLAKLFGDRPYLAGSVDGNGLYTNDDSVAAVV +GWDLYHGLDYTASLSGGYAVLDGDNSVTGTSGSDYIYAGGGDDTVSGGDGADVLYGGSGD +DTLNGGAGDDTLYGGSGDDTLTGGAGADTLSGGDGTDTASYAASTEGVTVDLETGVGTGG +DAEGDTLSGIENVTGSAEADALSGDANANVLDGGAGDDTLTGGAGDDTLSGGDGTDTAVY +TAALTTDNITFDTDHWVVATDGAEGTDQLSGIEVIDHGGSGNILLVGGDGFASIQDAIDV +ASDGDTIMVAPGTYAESLTIDKSLTLIGDPATGDAGAGTSAPQILGSTDWTLATVSIEAE +NVTFSGFDVTNDTGPYGIHIKAGDADVSDNYVHDINGALSGDGIRAIFINPVDNVTVSNN +IVEDFGNADNPSAASYTKTAAGIYYWARGGTLPGGTADIAELHNVTIENNVIHNDGLPTF +TGTSVLGIWVGSSQGGSVLDTVSIAGNQISDLHTDNADRLTGGILVNHGSNPDGVDPLAS +LDTPGVTTGLEISGNTIDDVSGASVFAVGLRGQTPDASVIDNVISNLALASGSSDGLLAS +ISFQYNTTTHSVSLSGNDLGGYDLLQVGHDTSDDTLTPATADDTLTAIDGYDNILVGQSG +DDTLTGGDGNDTLLGGNGTDTLTGGGGTDTLDGGDGTDTAIFTGTRADYTIAVDTDGHLT +VTDTGGTDGTDSVSNVETLEFTDTSVSVLTVTETGANGTYSSIQAAIDAAADGDIIYVAD +GTYTQTGTLNVDKAVTLIGQSEAGVVIDASAVHGYGILLTADGATLSDFTLNGPQGGDET +VWSSYRVDYGIKVSPNGTASSLSDITLQNLTVSGSHNTEIDFNGIHDSTLSNITVDGGTG +VAGNGISLTDSSNITVNDVTAANNPWGGVAIYTDGTHYAGGSDGVTFTGDYTYDAGSTGA +SPIYIQATGNTYPVTNLTLPDGYDFAVTNSEYRADGNEFTFFFTSESDATAFGNSLGAGS +FVSTPDADTLTGTANADYLYGGGGDDHLSGAAGDDRLVGGSGNDTLDGGDGMDTAAVEGN +RADFTFTDDGSGHLVMSDTQGTNGTDTVSGVETLSFTDGNVLVVGAGSEYATIQSAIDAA +ASGDTIVIAGGTYAESLSLDKALTLQAVSGADVVIDPASGNGLTVSGDLAGGDVTVSGLT +FTDGTMGIQVAANADVGTLTLDGVTVEDNLQYGLRTDSGSMAAVIVTDSTFGDNGTQNVN +GSAQMKLYNFDGDATFTRVDLVGAPAGTDQNSRPDYGIELTGLSNTGLAEGGTSPDLGTV +VFTDVTVSGEFHKIGVAVYNYGQIDGLDIQSLDLSGTETNWGPVFNIDGVEDSTVDARNY +NITYPAGDAIVAELQGEVPDQTATDTTIYGTDANERLMGKAGDNVLHGGGGNDELYGADK +PGNPAEDDSGNDKLYGEAGDDLLAGGAGADILDGGDGIDTASYARAGATEGVAVDLANGT +ASGGDAQGDVLSNIENLVGSSYDDTLTGDGNDNVLTGGAGADALDGGDGSDTVSYAGSSA +AVNVDLATNTVSGGDAEGDILSNFENVTGSSHDDTLSGTSGDNVLVGGEGNDTVVYTTTV +AASDVSFDTDHWVVTTASAGTDQLSGIETIEHGGGSNILLVGGGGYATIQAAIEAAGAGD +TILVAPGTYAPFATSFGGPANITVQAMGDPGDVIIDATGGAPSNGRILDLRADGMTLDGF +TIEGPGHAGVGISINGQGITVENNVISNVLTGIQTGTQYDTGNVTITGNTVDADYGISLQ +NTANTVTDNTVHATTEGLGVLDVAATLSGNSFTVDAGGEGLALYGGATSSTFTTSGNTVT +VGEGANLQHATDLAGTDGTLNIGAGTYEQVISIAKDGLTVNGSDATLVVDGSSSDVNGIA +RVDAVTIYGDNVTLQGLTIVDSLVDQSYVTYGWPETTRGIVVKNGAENFTLTGNTIESTR +NGILINGIDNTGSVTDNVIDNTKSGISVQYTDASGIIIAGNQEGTYGNEWGLNLHLNGYW +DGTTYTSNNADNYPILGTAPTADWQASLLGLSTGNDGWAVMDQAYALYNRTLVTVDPDGS +PSSFSNQGSQRSPISTIQNGVDLVVAGGTVHAHAGDYSGESVTVHVDNLILDGDAGATGI +TVQLADGLSNLTLAGEADYTATGNAADNTLIGGAGDDVLTGGGGADTLTGGDGSDTASYA +ASAAAVDVDLGSGTASGGDAAGDTLTSIENVIGSAYDDMLTGDAGDNVLQGGAGADSLIG +GAGSDTASYADSSAAVDIDLAAGTIAGGDAVGDTYSGIENLTGSAYADSLTGDSGDNVLT +GGAGNDTLTGGAGNDTIDGGAGTDTAVLSGNRATYTLGVNADGNITLSGEGTDVVKNVET +LQFADDSASILVVDPGTSGAYATIQAAVTAASAGDIILITGGTYTENVTLDKQVTLLGAQ +AGVDADGRTGVTESVIEGNITVSGAADNATIDGLTIHNGASVGGDLAGVYLASGATGTEI +TNTIFTRDGTVDGDSSRGILTTYNGGNTDVTIAHNSFSGWATGTYVNPGSQDIQITDNQF +DGNYVGLSVDGPNGAVVTGNSFTGNQFEGLGIGPGTGISGITLSNNSFADNASQVGVYTD +AIDVNALSGNTFDGAVVISGSDTVYASIQDAVDASSDGDTILVYPGEYSELANYNPTTGE +NSGTGNPLGLLINKSVTIQGVTADGTYITDAGDVAATVTSGAQSNWGTNFFVTADDVSIN +GLDLVATGSTGQPYVNKSIEVVGDGFTLNHSVLGAADGLPMYTAVYVNDWSVDSGFTASA +IASYAVLNSQLYGDMVVTNGPGTGYTADQLDMRIVGNSFLTIDGGIPNDGILVTGNDDNI +AWRNASAALPTEISGNDFGDASGVLWVRGDGTQDFPTTAEVNGILADNSVPAYAYAVDGN +GDLAAGTYGSSSIPSLAIRATAADFAPSELSGAGAESLMVQQAGETTPHSYSLIVGADGV +ADSLTGTSGDEALIGGSGDDSLSGGGGNDILVGGDGNDTLTVGDGSAVVYGGDGTDTTAY +STTVSADDISFDTDHWVVNTSSGTDQLTGVEAIDTGGSNKILLVGGDSGYATIMEAVDAA +SAGDTILVAPGTYEPFSMGYWSPSDLTIQGMPGAVIDATSISTPARIVDLTAEGTTFSGF +TIVGPGDVDDAGISVGISISAQGVTVSDNTISDITTGIQNHTPADQTGASSILDNTISGA +NVGISLQNVNNTVSGNTVTTVEAHTLGVGEVALGVLGGDNTITHNTFTVSNSGKAIGLPD +LPAVANLTTSENVVTVGEGADLQNAADLAGTNGTLHVGAGTYAQELTITTDGLTVTGDDG +ATIQVADPGVYSPSSDAFAARTIAFTIAATGVSVSGFEINGPLSAYTYTTTDFATLGYTY +GFFINDGVQNTTLHDITIQDIRTGMSFEGDNTATVYDNVIDNTRGAFLVRSDGVDLHDNS +FGSTGNEWDLTMLAGTPSDYFGDPLTDPGTYGDNMMALSAANNDMTIADRMYGEGGVLAR +AASDPDLADQYAAVANRSHVEVLAGADNDTSAGLGETRGNGFGTERLPVGTLQDGVNAVV +QGGSVHVQGGDYSGESVTVHSDNITINGEASAIGIDVHLGIGLSAITLEGSADFTATGND +LDNTITAGAGDDILSGGDGADILFGGDGNNTLTGGDGADKFMISAHTDGSKDTITDFGQG +DSLDFHDVLSDPTDVVFTDDGSGNTQITTNAAPTIVLAVVEHVEPASLTVDDHGNVTLAQ +TS* +>PROKKA_00235 putative succinyl-diaminopimelate desuccinylase +MPAKIPDPVELTRRLVAFETINPPGQEQACAAFLADLLAGAGFDCVLHPLGDDRASLVAR +RGRPTEQRLPLAFTGHIDTVPLGAVPWKHDPFAGEIVDGRLYGRGSSDMKSGVAAFVVAA +IAEAERIGDGAGVELVITAGEETGCDGARALADGGHLGRAGALVVAEPTANRICVGHKGA +LWLKAITHGVTAHGSMPEHGDNAVYKAARAIGRLADFDFNVARHPVLGRPTLNVGTVSGG +LNVNSVPDRATVGIDIRTIPGMDHAPLRDGLGGVIGEGADLETLCDLPGIWTEPELPWVQ +RAAAAVAAVTEQPFAPESVAYFTDASVITPAYGDIQTLVLGPGEPSMAHQTDEYCEVERI +VEATDIYRRLIADWMKPES* +>PROKKA_00236 High-affinity branched-chain amino acid transport ATP-binding protein LivF +MTAVSSAVSDSAAVPQTADVAVEIEGLFTGYDKADVLLDVSLTVPKGQITCLLGSNGAGK +TTLIRSILGLTPPRQGTIRLFGEDTTGLPTHKVVARGVACIPEGRRMFSKLTVEENLRLG +AYQEPSEAKIRTSLEDVYQTFPRLAERRNQLSGTLSGGEQAMVSIGRGLMGAPRLLMIDE +PSLGLSPLYVQENFRIIENIRTRGITVFLVEQNVHQTLAISDYGYVVSGGRLVAQGAAAS +LQNDPEVHAAYFG* +>PROKKA_00237 Lipopolysaccharide export system ATP-binding protein LptB +MIGNYGGLEHGLLFNLFRRRAFRRQYEEALEEARGLVATFSRNLADRLFDPVGALPMIER +RRIEICRALIAHPKLLLLDEPSAGMTHDETHQLMDDILSVRDRLDGLAIIIIEHEMGVIE +RVSDHCVVLNYGRKIAEGSYQDVASDRLVQEAYLGSA* +>PROKKA_00238 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00239 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00240 Glutamate synthase [NADPH] small chain +MGDPKGFMTVPRKEASYRPRNERIYDFGEVEQTLNEEDRKLQTSRCMDCGVPFCHWGCPV +GSKIPEWQDAYYRGNEEVAYEILHSTNSFPEITGRICPAPCEKSCVLSINEEPVTIRENE +AANVENAFTQGWIKANPPSIRVV* +>PROKKA_00241 Ferredoxin-dependent glutamate synthase 1 +MEKKRKEMAFRFPKPDGLYNPANEHDNCGIGFVAHIKGEASHDIVERGLEVLRNLDHRGA +KGSDNASGDGAGVMVQIPHEFIKKVLKIDVPAKGSYGTGLIFLPQLEAEANACVDILSNI +IQEEGLQLIGYRDVPTDSSIPGEIARTTEPRIKQVFIKANLEEDILEQKLYIVRKRAEKA +VQASDLSQKEVYYHSSLSAKTMIYKGMLTPDQMKDYFTDLQHPLFKSALILIHSRFSTNT +FPTWDLAQPFRLVAHNGEINTIKGNRLWTQAREGLLKSEVFGDDLPKILPVLEEGKSDSA +SFDNVLEFLHRTGRSLHHSLCMMIPESFNEKNPIPESLKAFYEYHSTIMEPWDGPASIVF +SDGRYIGGTLDRNGLRPSRYVITKNDLIVMASETGVQDFAAEEILEKGRLRPGKILLVDT +RLGIIIPDEEVKEQLSRRNPYGMWLKENRLLMEDIKVRQRVPSTMDDFLTYAKVFSYSKE +DMEFLIQSMSNTAVEPINSMGNDTPAAIFSRQPQRLFNYFKQTFAQVTNPPIDAIREGLV +MSLTNYIGSLNSNILKESPDHCKLIKFPDPIVTNTDLGKIKDLKDEMFSHEIISIVFPVD +QGFEGFKKAFDEMLERAEKAVDDKKNFIILSDRAIDSKHAPFPSLLAVSAVHHHLIQKKK +RMQVGIAVETGEAREVNHYALLLGYGASVINPYLAFAAVDHLVKEGKLDMEYKDARRNYI +KSIKKGLLKIFSKMGISTVRSYHGAQIFGAFGLSKELVDKYFKGTSSPISGIGLEEIYEE +YSQFHKDAFREEATQEKFRFETTGVYAWRKNREDHAWNPDSIGLLQWATRTNSYEKFKEY +SRTVDEYNRKPSFIRGCFQVKRNSISIEEVEPVEEIMKRFVTGAMSYGSISKEAHESLAV +AMNTVGGRSNTGEGGEDHNRFGTEKQSAIKQIASGRFGVTSNYLTNAREIQIKIAQGAKP +GEGGQLPGYKVNEVIAKLRNSTPGITLISPPPHHDIYSIEDLAELIYDLKATNPKAKISV +KLVSQDGVGTVAAGVAKAFADLIIISGGEGGTGASPISSIKHAGLPVEIGIAEAQQTLVK +NNLRGRVKIQVDGQLKNGHDIVTMACLGAEEFGFATSALITLGCVMMRKCHLNTCPTGIA +TQDETLRERFTGNPQLVINFFRFLAGEVRELLAEMGFKKFDDIIGRADLLEENKEVFGWK +MKNVDFSAVLNRPAEADKFDIRYVPGSASLNLDGHLDHTLIEESGKAIKGKEKVWLHHPF +ANTDRAIGAMLSGVISQKYGEFGLPEDTIHATFDGSAGQSFGAFLAKGVTFRLEGDSNDY +IGKGLSGGKIIVVPPTGSTFTPEENIIIGNSTFYGATGGEAYIQGVAGERFCVRNSGMEA +VIEGAGDHCCEYMTGGRVVVLGKTGRNFAAGMSGGIAYVLDEDGDFDFYCNKGLVELLEV +EDKKDIKELQGLISKHLTYTQSPKAAKILTQWEEYLPKFVKVIPYEYRKVLRERELRELE +QKMKMTEDANVMQE* +>PROKKA_00242 Phosphoserine phosphatase +MLEQKELILLNISGEDKPGLTASLTEILSQHNVNILDIGQSVIHKDLGLGILFEVPKKYR +SASILKDLLFKAYELKSHIKFTPIPIEEYEKWVAEQGKERFIITLLAHKLTALHLSKVSS +LIASQKLNIDTISRLSGRKSLNGNNKVTNSVVEFSVRGTPLDINAMKQSLMNIASETGID +IAFQEDNIYRRSRRLVCFDMDSTLIQTEVIDELAQKAGVGDEVKKITESAMRGEIDFKES +FKKRVSLLKGLDESVMKGIAENLPITNGAERLLSTLKQYGYRTAILSGGFTYFGNYLKTK +LGFDYVFANELEIKNGKLTGKHLHEIVDGKRKAELLELLAFKEDIHLEQVIAVGDGANDL +PMLEKAGLGIAFHAKPKVKASAQHAISATGLDTILYLLGFRDREINAS* +>PROKKA_00243 N5-carboxyaminoimidazole ribonucleotide synthase +MLIQEASKWDIITYVLDNDETCPANSLATHFVKGSNLDFDSVYRFGKMVDLLTYEMENIN +IEALKKLKSEGHQIIPDPDILELIQDKGKQKEFYQDNNVPTAPFKIYSSRQDIVQAIKNG +EIKFPFVQKLRTGGYDGRGVAVISDENDLDKLLDGASIIEDKVNIAKEIAVIAARNKQGE +IKCFPVVEMVFDPEANLVDKLICPSKITAEQSEKAIEIAGKIIGLLGMQGLLAVEFFVDE +NGEVIVNESAPRPHNSGHHTIESIITSQFEQHLRAIFNLPLGSTRPKLPAVMVNILGGEG +YEGPVRYEGLTEIMAIEGVKIHLYGKKITRPFRKMGHITVLSDSLETALEKAEKVKQLIK +VKSWDKN* +>PROKKA_00244 N5-carboxyaminoimidazole ribonucleotide mutase +VGQKLVSIVMGSDSDLPVMKPAAEMLEQLGVEYEIDIVSAHRTPEKLFDFASNAHKRGIQ +VIIAGAGGAAHLPGMVASMSPLPVIGVPVKSSNSIDGWDSVLSILQMPGGVPVATVALNG +AKNAGILAAQIISVSDSQVREKIIEYKAGLKEAVMKKAKNLKG* +>PROKKA_00245 Cyclopropane-fatty-acyl-phospholipid synthase +MDPKTVIRELLTGTGVHLNGPHPYDVQVHDERAYERWLSEAELGLGESYMDGWWDCLALD +EFIERILRAGLEEKVKRNFSTAFYVLSKRLFNQQTRVKSKRVGREHYDLGNELFSKMLDR +RMVYSCGYWQRAKNIDQAQEAKLDLICKKLNLKPGMKVLDIGCGWGSFAKYAAEKYGVEV +LGVSISKRQIELGNELCKGLPVTLLYKDYRDVEGKFDAVVSVGFFEHVGYKNYDTYMKIV +DRCLTDNGISLLHTIGNNTTTHYVNRWTNKYIFPNGMLPSIAQVAKAAEPYFVIEDFHNF +GPDYDKTLMAWYDRFNKAWKELKNQYDERFYRMWRYYLLSSAGGFRSRATQLWQFVMTRT +GRQQPDCRFA* +>PROKKA_00246 hypothetical protein +VAHAATTGNNETLLCRFPTLHNNTIVFEAGGNLWRVDRTGGVATRLTTDPGYDMMPRFSP +DGKTIAFTGQYSGNVDVYTIPADGGAVTRLTYHSDVVRKAPTRWGPDNMVMTWTPNGKDI +VFLSRRDTWNSWFGQPFEVSKMGGLPTHLPLPKGGVMSYSPDGSKIAYNRIFRNFRTWKR +YKGGLAQDIWIYDFKTKKIQRVTKWKGTDTYPMWYKNTIYFASDRGANHRLNIWAYSLDT +KTFRQITHFKNYDVDWPSLGNNGIVFQDGGSLYVLDLPSEQLHKINVKVPTDGTQTLPRW +INASKMIRSLDISPNGKRVLFGARGDIFTVPAKHGATRDITQTSDAQEQYPAWSPNGKWI +AYLTDASGVNELAIRPSDGSGHQTYITNAKTGYYYNPTWSPNSQMLAYSDNNHVLWYISL +KDKKPVRIAQDKYNAMRDYHWSPDNNWISYSKTNASGLSQIYIYSLADHKSYKVSDGIYS +DNDPVFGPNGKYLFFVSARHENPLFSESESNVATEKMDGIYMVTLQKNEKSPFAPVSDEG +MPEAKKASSSASKKTESAKDVKIDFNGLMNRVIMLPIKSGDYGNIQVTGNKVFYQTRPLI +TIEGFLHGTGQSSIMVYDLKSKKGHTVVANGARTYGLSADGKTLVYMRRGKFFLMPSASV +NAKGSEPVNTSHMKMKIYPHQEWSEMFHQAWRLFNNFFYNTKMNGVNWNEVGANYGKLVP +LLGCREDVNYLVGEMIGELDNSHCYVWGGDDNYLGKTNPTGVLGVDFGLNKSSGRYYFKK +IYAGDNSRPGYGSPLDRPGVNVKTGDYLLAVNGHQLKAPMNPYSLFVNTVGQQTTLTLAD +KPDGKGEHTVTVKPINNSLNLRLLNWIRTKRAYVNKKSDGKIGYIYMSDMESLGMTQFIH +QFYPQLSKQGLIMDDRFNGGGFIDQIVLERLRRVLIGMSTNRAHAAMRYPEQVLHGYKAC +LLNHYSASDGDMFPFYFRKYGLGPLIGERTWGGVRGYNRVWTLLDGGNLVVSQNSIYGLD +SKWAIENHGVTPDIKVDNLPGQVMEGKDPQLDTAINYIMKKLKEHPMPIPQPPAEIPAYP +SGNDAGGTN* +>PROKKA_00247 Histidinol-phosphatase +MIRYNLHQHSIFSDGAAEPEAYVQSALNLGFEAMGFSEHSPLPFPTKFSLKAERVEDYIR +ETERLKEKYNDRIDLYRALEMDFIPGYSENFTEWRKKAQLDYAIGSVHMVQPEDDGELWF +IDGPDRSIYDDGLQNFFGGDIKKAVKTYFHQVNRMVETQDFEVVGHVDKIKMHNQNRYFT +EEEKWYRDLVEETLHLIKEKDFIVEVNTRGLYKKRSNRLFPDDYALQRISELGIPVLISS +DAHKPEELNLLFETAEKRLLDMGLGAVVRFDHGKWKDFPLS* +>PROKKA_00248 Ribose-phosphate pyrophosphokinase +MMIDAAKRASARKIVAVIPYFGYARQDRKDKPRVSIGAKMIANLLTTTGIDRLITMDLHA +DQIQGFMDFPVDNLYASVIFYPYLKKLNLPNLMMASPDTGGTRRAANYAKALDTGFVICY +KQRTRPNVVEQIQLIGDVAGKDVVLVDDIIDTAGTITKAARVILDKGANSVRAMVTHPIL +SGDAFKIIADSPFTEVVVTDTIPVKDDLGGKIKVLSTAQLFSEVIKRVENYKSISSLFNL +GNQSNK* +>PROKKA_00249 50S ribosomal protein L25 +MNTVSLSGSLRENVGKKDAKKQRRLGKVPCVIYGGSEQKHFTLDQLEFKPLVFTPEASVV +NLTLGEKTYECILQDVQYHPVTDEILHADFLEIHSEKPVNIALPVELTGTAPGVVKGGKL +RLKMRKLRVNGIIKLMPEHIVLDISKLDIGRSIKVRDINQANLTFLDPGNQVVVAVVAAR +GLSAEEEAEEAEEGEEGEAAEGGEEGGEKSAE* +>PROKKA_00250 Peptidyl-tRNA hydrolase +MKYLIAGLGNIGVEYANTRHNIGFIVADALVNELKGKFETERLASVASVKHKGRTLVVIK +PTTYMNLSGKAIKYWIDKEKIPIERVLIVVDDIALPLGTLRMRKKGGAAGHNGLSDIIMK +LGTEKFPRLRVGIGDDFAKGYQVDFVLGQWTDKEVNVMIPRVQKAVEIVQSFVSAGIDNT +MNLYNNK* +>PROKKA_00251 hypothetical protein +VFQGKHQLNVFINILTQIKTMLRIRNFNNFVSNFLKIFAKFYNQAKIISYRLIILLQASV +TGQPNHSIEMEKQRQQSGLLFKIK* +>PROKKA_00252 3-oxoacyl-[acyl-carrier-protein] synthase 3 +MAVEAVRGLLEKTGTKPEEVDLVICATVTPDMQFPATANLVSYKLGINNAFSFDMNAACS +TFIYALITGSKYVESGEYKKVIVIGADKMSSIVDYTDRATCVIFGDGAGAVMLEPTASDE +PGIMDHRFYTDGAGWIHLHQKAGGSLKPASHATVDAKEHFIYQEGQPVFKFAVTRMADVA +AEIMERNQLKSEDIAWLVPHQANLRIIDATARRMGVSKDQVMINIQRFGNTTNGTIPLCL +YEWEPQLKKGDNIVLAAFGGGFTWGSVYLKWAYDGKK* +>PROKKA_00253 Catabolite control protein A +MHYNPNAFALSLRNSYSKIIGLIIPEITLYAFPSMIRGVSEFCYNAGYNVLILSSNESYK +REVQNTELMLSSQVDGLLVAITKETRNHKHFDQLEKEGIPVVFFDRVFNNYGTSKVIIDD +RRAAYEATEHLIKTGRKNIAYFGGNAALYITQQRLMGFRKALSDYNLEEHDLVFADDSHM +ARNKALQIFKRKNYPDGIMSISDEVLTGIIPALQELNIKIPNEVGVISFSDGPISQMYKP +AISIVHHSLARVGQVAVDLLIQRIEHPEDMHQQIHIIDTELIARGSTAIGK* +>PROKKA_00254 Vitamin B12 transporter BtuB precursor +MKKVVQAFMVLLALTITTGLMAQGTIKGTLKTTKGKTVPGVNILLKGTTTGTTSSLNGSF +VLKVPAGKHVLLVSFTGFKPINYSFTIKDGETLTKNFVLHEDLLALDQVVVTGVQNKQTK +LQSSVAITTLSPQKISQIAPRSAADLLKAIPGFYVESSGGKGNANVFARGLPSSGGLRYV +QFQEDGMPVFEYGDLMFGNTDIMVRIDQTMSRMEAVRGGSASVLTSDAPGGIINIISKTG +GPTTKGVFMQTIGLTYMHARTDFDIGGPVSKHLRYNIGGFYRADNGIRSPGFLANNGGQI +KANFTYTFNKGYVRFRTKILNDKTIAYLPFPMMGNPAKSIPGFNANYGTMKSLDLLHLHA +TTPTGNSVNESLADGMHPKIFAFGGEAFFDLGNKWSLKDNFEKTFTHIQFNSIFGVNAPE +SASAYATAQGLTNYHYAFADGYNAGKPITNMSSLNGNGLVATYGWWSVGLNLQEFGNDFK +LTKQSTNNTFTAGWYFSTNQVGGNWWWHNMLVDISGHNTRKLNLINDNTGESLTTNGYSQ +YGTLYADYNALTVINAPYVYDEIDLGRLTINAGLRWDMGTITGRVENTGSYSYDVNGDGI +ISPAEKNIQYGNGTYTPFHYDYSVLSYSLGLNYEFNKSTAIFARASQGHRSPADRAYVFG +ATTSTPNGFPSSAKDESIEQYELGLKYNSSKVALFATGFYSFFNHIDFTDFVNVGGNLTA +IQQYYNTSAMGLELEAAAQLGKLNLSLTGTAQSAKYHNWVYHDQSGNLHDFNNHFIQRLP +KLYFTFRPSYNFGKLNVSAAWEYFGKRYTNPENKQVLPQFSQINAYIDYTVSPHITISAA +GNNLFNVIGLTEGNPRSGLVSTGGSQYFYARSILGRSAILSFKYSF* +>PROKKA_00255 Sodium/glucose cotransporter +VKHIDISPIDIAIIVIYIVGIAVWGLMYSKKKSKGKGHEGYFLAGRNMTWPIVGITLYAA +NMGSPALVGLAGDAYSTGISVFNYEWMALVVLVFFAIFFLPFYLRSRVYTMPEFLQRRFD +IRSRYYFSFITLVGNIIIDTAGVLFSGALIVKMIFPAMALWHIIAVLAIITAAYTITGGL +SAVMYTEAVQGVLLMLGAVLLTFFALKRIDFNVARIFTETPHHMMSLIRPNSDKAMPWLG +LVLGVPLLGFYFWGTNQFMVQRVLSAKNTNHGRWGALFAGILKLPGLFIVVLPGIIGRLI +FPHLSDPDLIYPMMLFHLLPVGILGIVLAGLIAAISSSISATLNSASTLMTMDFVNNLKP +GLTPKQLVRIGQIFTGVFVVISAAWAPMIAGFPSLFKYLQQVLALISPPVVAVFLLGLFW +KRANAQGAFYGLMGGLLMTIFAVIVRYVNPDIFPWLGHIQFLLVAPVLLVGTMAIIVPVS +LMTPPPPEEAVAQFTWSFKFFNAESMELAGTPWYKNYRYQAIGALLATAILVYIFR* +>PROKKA_00256 Plant neutral invertase +MEEAKNTYSKALDLLKNGLLKEGFVAALDQQANYRRVWARDSIITGLSALLADDTTLIEG +MKKTLISLKQHQHANGMIPSNVSFDADGNVTMVSYGTLTGKVDTNLWFIIGVMVYVRKTS +DTDLLKEMLPAIEKVFELLLSWEFNGRGLLYVPQGGNWADEFILEGYNLSEQLLYYWALS +EASAMDEKFSTKAKKLKDLIEINYWPTESNRSKVYHKTAFERQLEKGQTSHWLPGFKPAG +YHTFFDCFAHGLSFVLQFNSPEQEGEIIETLVRTTSETSGSLLPSFWPPVRETDAQWETL +QTNWIYKFRNQPGAYQNGGIWPISNGLLIAGLYRSGHKGMADKMKEALFLATALPENQFG +FYEYIDAFSWEPGGAKHQLWSAAGVIFAEKAAQNVFIV* +>PROKKA_00257 Uridine kinase +MEGDIINLNDEHRSTARKIVAPLIEEIKNCQTIYTFSVAGESGAGKSITAAAIAEQLELA +GFSVKVFQQDDYFFLPPFTNDQKRRKDLEWVGIKEVDLALIDEHLKAAKDGVKTIKKPLV +IYGKNKITSEVFDMKGVNVCIAEGTYTSLLKNVDKRIFIDRDFFDTHNDRKKRGRDLIDP +FTEKVLEIEQCS* +>PROKKA_00258 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00259 hypothetical protein +MAKGGLGVVVRGIPGYRARIRPIVSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAG +VIAKRPAPIALPNSCAAEWRMAPDAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00260 putative GTP-binding protein EngB +MQVYPKFLLSATAPAHFPPPSAPEFAFLGRSNVGKSSLINALLGSRQAKVSSTPGRTRAI +NFFSLTTSPNRQQPNFLFADLPGYGYAKISKSISAEWPKVV* +>PROKKA_00261 ATP-binding region +VKKVWLSWSSGKDSAWALHILRQQKDVEVVALLTTLNEHFDRVAMHSTRRDLVEAQARAA +GLPLVRVPLPWPCSNEQYEAAMGKACAKAVAEGVEAVAFGDLFLEDVRQYRVEKLAGTGL +EPLFPLWGLDTRALAREMIAAGVKTRLVCVDPRKMPREFAGRDLDEAMLRELPEGVDPCG +ENGEFHSFVYGGPMLGEEIPVESGEVVERDGFVFADVKLKH* +>PROKKA_00262 putative 3-hydroxybutyryl-CoA dehydrogenase +VSEIQIVGVVGAGTMGNGIAHVFAKSGFQVRLCDVEQRFLDRGMDTIRKNLGREVTKGKL +MQEEADAAVKRIEGTLARAALADCDLIVEAATEQLEVKRQIFEDLDRVAKPEVILASNTS +SISITKLAAFTERPERVIGMHFFNPVPVMKLVEVIRGLATTQETFEMVKALAERLGKTAV +EVNDAPGFVSNRVLMPLLNEAMYAVMEGVATPEAVDQVFQLGMAHPMGPLTLADFIGLDV +CLDIMRVLQEGLGDPKYRPCPLLIRMVDAGWLGRKSGRGFFEYGNA* +>PROKKA_00263 Putative metallo-hydrolase YycJ +MVRMTVLASGSKGNSTVVSSSRTRLLVDAGLSCREIFRRMQAAGEDVESLDAILVTHEHQ +DHVQGLAVTARKLGIPVYFTEATHRAWMRWMTPRKRLTYAQWLEQQKAIVAAGKEPAAAG +AEAQDAAEEDISGAKPKKDPCALPAVEYFSSGTDFQVGDIAITPYTVPHDAADPVGFVFE +AEGVRLGIATDLGYVTPNVHLHLKKCDVLMLESNHDVDMLRDGPYPWSVKQRVMSRVGHL +SNDAAADFLENSYDGRAAFVVLAHLSESNNLPELARVSAERALRDRMNLLGNKLMLASQQ +TPLEPIVL* +>PROKKA_00264 hypothetical protein +MKTCIDPVVGDILSSWRYDISGITPEMRIDYEEHLASCSVCRSRQRLHRAIDVVLIGLTT +LSTIVFVLALAVIHHVEPLRTFALFIFHIRDFSVVLTLQAVAFAGLVVSMFAWLLVAVAT +PVPTFLSGVAREQARELQSRIPEEWRNRFQRGAL* +>PROKKA_00265 Dihydroorotate dehydrogenase B (NAD(+)), catalytic subunit +MSATRGIQHGVDMRVEVADIQLPNPVLAASGTFAYGIEFEDVVNLDHIGGFVTKGISREP +LSGNPAPRLIETAAGMINAIGLQNIGADAFVQQKLPALARYQCPVIVNIFGYQMQDYIAV +IRRLNEAEGIAAYELNVSCPNTHAGGIAFGIDRAALSDLVAHARHYSRRPLIVKLSPNVT +SIATMARSAESSGADAISLVNTFVSLAIDVETRRPRLSNITGGLSGPAIKPIALRMVWEA +AQAVKIPVIGMGGITTPEDAVEFLLAGASAVQIGTASYADPRAVEHIGQGLAHWCSRHHI +AKVSELIGGMQTGQ* +>PROKKA_00266 Trehalose-6-phosphate phosphatase +MTKNQDCMTLPVRWEQAPAREDFWQELRTASRWLLLLDYDGTLAPFHQDRMKATPYAGVK +ERLEQLLKIEKGRIVVISGRQIEDLKQLLQLSQPVEIWGSHGREHLLHDGSYRLVDLTED +ERRVVEAVTARMSERGWAGQLERKPTAIAVHWRGLPVSEQKELREAAEQYFAEANPPDTL +EMMPFESGVELRSRSRTKGQVVAEVLAEEPADIPTAFLGDDWTDEDGFAELRGRGVGILV +RPEARESCADYHLTPPEELLEFLDRWLENAKESIR* +>PROKKA_00267 Trehalose-phosphate synthase +MSENQVIIVSNRLPLSMTVKFGSLKVGRSSGGLVTALQPILKSRGGTWIGNGGTREDKRM +ARALEEEARRSGFDCVPVFVTEQEDRNFYEGFSNQVLWPLFHDFIGECRFEPEYWDFYRK +VNGKFADAVMRVYNGKQILWVHDYQLMHVAASLRERGCKGRVAFFLHTPFPSYDVFAKLP +WRRHLLLAMLEYDLIGLQTERDVRNLVSCLRRLVPEASMSTDHGVHRVSWHGRTVVIQDF +PISIDFDEFARAANQPAVEERMRTILARMGMGQVIFGVDRQDYTKGIPHRLRAYGELLRR +RPEMVGKVKLVQIVVPSRQNIPGYEALKSRIEHLVASINGEYTQPGWVPIHYIHRAIPRE +ELLALYRAAHVGLVTPLKDGMNLVAKEFCASRIDDRGVLVLSEFAGAAAEMYRGALLVNP +FDLEGVADALEQALQMPGAQQQERMRKLRRFLKHANVHRWVEDFMEEIESVKAPRSRRG* +>PROKKA_00268 Anti-sigma-B factor antagonist +MSMKVTTRQVDGVTILDLSGRILLGEGSVQLRDAVRDLLAKGQKKILLNLGDVTYIDSSG +IGEMASALTAVRNQGGDLKLLNLTKRVHDVLQITKLYTVFDIKDDEASAIASYN* +>PROKKA_00269 Serine-protein kinase RsbW +LRGQQETRVTYTLASSLDSVDKVEQTAEQMARNAGIDEDEAFRVSMAVREAAVNAVLHGN +AYAPDKRITVTFENNGSDLIIHIMDQGEGLDPAALPDPLKPENLLSGSGRGIFLIRSFMD +EVHFKQLHPGTELTLVKHLGTAKQSLQGGNSL* +>PROKKA_00270 Magnesium transporter MgtE +MPVRLMPMPEVDAKVTLTMLLGTPVTDATGKLRGKVRDVAVATGAEAGRVAGLVVKNRDG +LQVVTSVDLRRTPSGTLELRADAQMRPLTGEESFLLLRQDLLDRQIIDVHGRKVVRVNDV +ELDWWNQERGAAGQQESLRVTGVAVGLRGALRRLLLGLMPQATLDRLARKVPQRSIPWEF +VDIVEVDPARRVKLKIEHERLARLHPSDIADILEDLAPAEREALLRSLSEELAADALEEL +DPKLWRSLLQSMDSETAAGIVEEMDPSAAADLLADLSKADSEAILGEMDPEERQEVKELL +EFREDSAAGRMTTEYVAVPEDATVADCVAALREFEGDPETITEIYLLGEDDLLVGVVPLA +RLVLAREETRAQVLSEPETITCELEAHQNEVAELFDKYNLRALPVVDEQRRLAGVVEADH +VIAFLRERR* +>PROKKA_00271 hypothetical protein +MHPQRSRAGHNNEPRHHVGKERAHHHVGPRSRVVLHSNALLHHRRLQIELHPRRNRRADD +ADHHEQVAVLPPHRAVRQRHRLQYGQVPVGRGQHARRHISNVENRSHQKDLFHPLVIALD +RQQPHQQRAHRHRGVLRHVEQLQAARNSGKLANHVAKVHHHQQHHHHKGDAQPELLADQV +AQPLAGHHPHAGAHFLNHDQRQRHRQNRPQQLVAKLGSRLRIDKNSARIVIDVGRDDPRP +QHRKEQKNPGSPALQHRVDSSLVCQLNSERA* +>PROKKA_00272 Divalent metal cation transporter MntH +VCEGLGFESGVDKKFSEAPFFYWLYTLLIVGGAAVVLIPHFPIIEFSIFSQMLNGILLPI +VVVFMLLLINRKDLMGEYTNSRWFNAVAWVTAVVVTVLSVVLMVQSIRQV* +>PROKKA_00273 hypothetical protein +MPNQCRHVRANGVQCRAHRVWKEDYCFFHLHHRTPNGTAKRSEDPPPPPPKNGIEIPLLE +DLASIQIAIGRVLTALAQGKITSAEARTYLYGLRLAASNVKQKDFAPVNTVETYVQYDNG +DTLGPEQFHAEKQPQHPLMDSGLLALRHLSNRLTYEATLDAYLTQGQEPPSTLRPPVAGP +PADKSELQNWIKTGWKACQSRAHALELARKAIDQPIPDPIDPKAAFSINANIQRTA* +>PROKKA_00274 hypothetical protein +MEKPTVIHSSFTLERSYPYPAEKVFAALADPEKKRRWFADSPNHEVVKFAMDFREGGAER +MEYRFNEGTPFPGVMLVNSGHYEDIVPGRRVVLCSTMTIAGRKISVSLVTFDLLPQGEGS +ALLCTFQGVFFEGADGPEMRKAGWEFLVERLGEEVAREG* +>PROKKA_00275 HTH-type transcriptional regulator +MRKRKPKVDRIFHALGDPMRRVMVELLRKRPYSVSALAEPLGITLTAVGQHLKILEEAGL +VRTEKLGRVRTVQLEPEGFAVLEAWAAEHKGEWALRLDRLGDVLADDG* +>PROKKA_00276 hypothetical protein +VTTANTFRANWPFVLIAAIFLGTALVGALTGRIWLQMRTITRTNDKVMYWATIWCSVIMA +GFMLLVAAHYAPVINLIKEL* +>PROKKA_00277 3-phosphoshikimate 1-carboxyvinyltransferase 1 +LHLSVSERTIAPARNIEGSLRLPGDKSISHRYALLSGLAEGTSRFTNFSTGADPSSSLAC +VEALGAKVVRGEDGSVEVTGVGGQFQPSSSPLDCGNSGSTMRMLAGFLAAQQGEFTLVGD +ESLSRRPMERIRKPLMQMGANISLTEGHAPMVIHGIPLKAMEYATPVPSAQVKSAILFAG +LQASGTTTVRESVRTRDHSELALRAFGAELERTIDSITIAGGQKLSAIEAKVPGDISSAA +FYLCAAALFPGSNLVFDDLGMNPTRASLLDVLTALGAHIGVIDLEDKASELIGTVKVNAP +PDGLTGTTVSGALAAQLIDELPVLAAIGPYTNNGIRIRDARELRVKESDRIDLVVKNLRA +MGAEVEEFEDGLDVPGGQTLHGAEIDSGGDHRIAMAFSVAALRAEGETLIRGAECASISF +PEFFDLLDAIAQR* +>PROKKA_00278 hypothetical protein +VNVLRKSFVVAVVLLSFGASAAVAQTTVAASVYGAFRSSTRTGGISNFTVENPSNAAGFL +LELRHISNPLMGYELTYSYHRANEAYSNTLKVLCPISPGGSCPEQITTAGVSANAQEVTG +DWVVSFPLANLKPFVLVGGGVIVTSPATGSVTATITDFDPVTNMMSQTTSSMPTQTQTKG +VFQYGAGLDWTVLPHIGLRFQYRGNVYKAANLTKVFTSTDKFTQTAEPVVGVFFRF* +>PROKKA_00279 Phosphoserine phosphatase RsbU +VYESRPQTTPEQHSSSIEGSHPTDAAAAAGSRSVELAQTDFLLRLTDALNTTLNLQTLLQ +RTADLVRTVIDYRIFAILLLDNRTNDLRMRFQTGHRPEVERMRIRLGQGVTGQVALTRKP +MLIPDVRDVENYINANPDVHSEIAIPLIVKNRVIGVIDIQSEQPNYFQPDHLHLLTLTAS +RIAHAIDNARLYTRVSRQAQTLEVLNEISRDLSSILDTDRLFERISQLLRRLFDYQMFTI +WTVRPIEHVLENRFALRFGERYYPNETIPVERGIVGAAIAERRPMNIPDVRRDPRYHKVN +EETRSELAVPLMYKSKVVGVLDVEHTQPHYFSEDHVRALTTLAAQIAIAIENAQLYQRVI +QQEQRLDHDLQMAREVQLRLLPPSLPSRPHAEFAARFLPARTIGGDLYDYLNYDDQRGAL +AIGDVSGKGAAAALYGAVVSGTMRSQASLKPSPAAMLQALNASLHERRLDSQYVAMLYTV +WNDENLTLQIANAGSVQPIYCRSGEIETVPVEGFPLGMFPQAEYEEISLSMRPGDSVFFF +SDGITDGENEAGESFEERLTDSIARHHHLPAEEAVNAVFEELQEFQGDCDRFDDETLIAL +RVV* +>PROKKA_00280 Hypoxanthine-guanine phosphoribosyltransferase +MPDQTQFPPAESLRIRFTRDQIQSRVREMGRHIREDLHGESVVLVGVLKGAAIFLADLAR +AINLDCTFDFVAVSSYKSGTRSSGAVQLIKDLTEPIEGRHIIVVEDILDTGVTLSFLQSH +FERHNPRSIRVAALLDKPSRRIRPIQGHYIGFSIPNEFVVGYGMDYAERYRNLPDIRILD +SIPAS* +>PROKKA_00281 Prolipoprotein diacylglyceryl transferase +MYPFIHIGHFTIPTYGIMMWLAAVAGCIVLYRNFKRWKVEGDAITIVAFATVIGIIGGKL +YHVLEKPVLLMHHPALLISRSGFAWYGGMIAGILALLFQAGTYNIRPLRMLDLCVPSAAL +GYGIGRLGCFFSGDGGYGPPTKMWFGMSFPHGTVPTTQKVYPTPLFEFVAAVIIFYILWR +RSRPAAERKLGHMTAEYLLLAGGARFLFEFIRINPKIFLGLSNAQWASIAEMLGGTALLW +WSRKYASTPQPGQQGRQPKEEPALVAAGDSGGPPTAEQTQ* +>PROKKA_00282 hypothetical protein +VSACSHRPDARPLRIGQAQKDLRIPANKLHQEASPAGPQQILGRHLSQLPRRPRPAPPPE +DIEDNAGPDKLKNRRRIHFLCCRHNSVRIAHPEPHLRRNAVIPVSGQLASNAPDSVPQRR +RGHAQIQHPQGTNLIGPGLEQQRQNPGNHPAKPGKPGPADQQRWMMHQQHWLFQHMVELC +AHHSSHRRKGDDADRIGINLPALEVLVKEVAPDHRGKPHHDAVCANR* +>PROKKA_00283 DNA polymerase I +VRLAQDAPVSFDPDTFKVTPPDPAVITPVLAQLEFNQLLNQFAAPPPKADYRRLSDPEEI +EDFLKPVARKKRLAIDTETTSIQPMLAELVGVSLCHQAGEAVYIPVAHNLTPGQSQADKE +AVLQTLAPVLADPAVTKIGQNIKYDLIVLGRCGMEINGPLFDTMVASYLLNPGKTSHNLA +SIAAEFLGRSVISYQEATGGKNRPFADTDLDQATDYAAEDADVAWQAAQVLEKKLAESHL +DGLFRDLEMPLVPVLARMERNGVGLDVQGLEDLGKELAAKLDEIERTCYRLAGHEFNLNS +PKQLAQVLFEELGLTPVKKTKKGKTSSTDVSVLTVLAAKHPLPAEVLNYRTLSKLKSTYI +DTLPKLVNPQTGRLHTSFNQAVTATGRLSSSDPNLQNIPVRSEIGERIRACFIAEKGNLL +VSADYSQIELRVLAHLSRDPLLVEDLTKGLDVHTQTAARLFDVMPELVTKPMRARAKTVN +FGILYGMSAFRLAREQGISRKEAQQIIDKYLGRYQGVARFQEENLRQAREKGYVTTLLGR +RRYLPAINAGDRLARQAAERMALNTPIQGTAADIIKLAMLAAHRLLEERFPQALMILQIH +DELLFEVPASQAEDLAQAVKQAMEGVIELAVPLVVDIGIGPDWAQAH* +>PROKKA_00284 hypothetical protein +VDHALLDRHFPRYTEYDPQVPVWDLTPQCPGAFHRFFDTSPLSPSGRYLAVTRYQPERLP +EPGEAAEVVLVDLHTGKSQVIWESRGWDTQLGAQVQWGATDEQLFFNDMDPADWQPFGVC +YNPLNGTSCRLAGTVYMVSPDGKQAVSPCS* +>PROKKA_00285 hypothetical protein +MAQPYEGLVGTSGRVQVNRNGRPAFSISTGAFADGWRGASLTPAKVGETTDGVCLGKISL +PDKLTIASALRATAAGKAMELRYTLTPKADAKLNSLHVSFGLPASFLKGASYTIEGETKE +VPAVLGATHLRAGDHVPSVRFTWPNGDWLQVDILSKTPVLFQDNRQWGDSFDLRLGPQMV +PAQTLPANQPVEIAMRVSAKDGMKLDFDRPVTITAGKDWVPLDLELDIEPGSALDFSGLG +QFDAPSGKHGWLQATPDGKFAFADSLDTPRRFYGVNLCFTAQYLSHDEAERLAERFLRLG +YNTVRFHHHEYPLIDRKNGCSTDLKPESIDQLDYLFAQFKKRGIYVTTDCYVSRPVYASE +IWDGAKGNVEMNEFKMLVPVNERAFENWKTYNRNFLTHRNPYTGMRYADDPTLAWLSMIN +EANFGNYIRSVSDRARPDWERAWGAWLKARYGSAEAITKAWGSTFDGDLSKPTAKLAKSF +TDDNRQSRDFAVFLADTERTMFLKMKKFLREEIGTKAMLTNMNGWTNTPQSQLARAEFDY +VDDHFYVDHPQFIEKSWRLPSRCPNTSPVLAGAPGGRGTAFNRLMNKPFTISEYNYSGPG +RYRGVGGILTGCMAALQDWSVVWRFAYSHRRENVLKPSTAGYFDMATDPLNQAAERASMC +LFLRGDLDPAPRSAAITLNPETLEKGDSHQGRTPPSWDELVPVIQVGTFLGDRQSKVPAD +IALPTTDAAPAAADVVMPKPYDSGKGSAILKELRAKGWLDAANKTDLDRKRGQSASDQFL +MDGEKDMMVLDTPRTAGGYAEAGQTIHTQAADFSILDTGATVWISSLDKQPITSSKRLLL +THLTDLQNTEVRYAERGRKTLLAWGKLPHLVRVGEAKISLHRSGAKLPKVYVLATSGHRL +GEVPVTKGKNGTLELAISTKGEAGAQLMYELDFR* +>PROKKA_00286 hypothetical protein +MERSSLALPCSGGSCLHAAVRAQMTMQRTAFSLSIALSAVLASALLVGIVPLLRGGDNYE +LAVKSALLVLVVGYAWLHWRAVTRRGRCLLKAAFCLNALVLVPLGATAALCHVFGGPKIV +PREAGAIGGAIAILAAAAAAMQVILLTRCRTIELTVPKGD* +>PROKKA_00287 Beta-lactamase hydrolase-like protein +VYFRQYKVEGLGCYSYLIGCPAAGTACVVDPERHTGQYIQTAEHQGLRITHVFDTHLHAD +HITGSAELAAATGATICVHPAIGAEYEHEDLLDGQHYRFGAAELEVVETFGHTPNSVSLA +LTDHGRSEDVFALLTGDLLFVGDVGRPDLAGADLLEEQIHNLYESLYTKLGRFPDWTEVY +PAHGEGSLCGKGMSAKPMTTLGFERLNNPLLADLEFAEFHRIMTEAFQVRPDNFAAIVAK +NQRGPQLLREAPAFMELSVLQAERALAAGAQIVDTRAQSAFGAAFLPGSLNIGVSPSSVN +WLGMLVPADTDIIIVADSKDVACQVADQFRRAGYDRLIGYVPDGVASWALQGKPMDHLPQ +LTPASLKHVVGRYGNHVILDVRTDAEWATGHIEGAIHLPLPRLVREGIDLGKDRHITTVC +RSGYRSNVAGSFLKSQGYEHVFSLIGGMTAWQAANR* +>PROKKA_00289 Transcription antitermination protein RfaH +MSISIASQPGLTWHPAYCRPRTEKVVDDYCKRHDIPCYLPLLRQRKRYQRRTVETYLPMF +PGYVFVQLGPDTRTTFLECHRIVHIVEVREAQERTLVAELTELQHLETAQATVDLEVMPD +IKPGTQVTITDGPLAGITGVVEKRKGKTRVTVNVELVGRSVVAEMDLGELELDGDA* +>PROKKA_00290 Phosphoenolpyruvate-protein phosphotransferase +LSDSSRTSSARNEPGETVLAGTGVSPGTVIGKAVVVGASVVSVREHELPVSQLEDEVGRF +RAALEKSRRELEELRDRARDEKNQDLVDILEMQVMVIEDGMLDQEVSDRIRDTRRNSGFV +LKNYVDEFCDQLVKAGSAFFAERTNDIQDLAGRILRQLLGSESVDLSELPEPCIIIAHDL +SPSDTAGMDRDNVLAFVTAMGSRTSHTAIMARALGIPAVVGLGESLAQVGDGVRLVVDGT +QGRVVVSPENATLVKYHERIEQEKAWRAKLEVNALLPAETRDGFHVSVAANVELPEEVER +IRRVHRVGIGLFRTEFLFVKGGSISDEEQQYAAYRRVAEDVAPHSVVFRTLDIGGDKFLS +HLDVPVEINPFLGMRAIRFCLRREDVFRSQLRAILRASAHGGVRILFPMITTMEELHATL +AILDDVKAELERQGIPHNPDLDVGIMIEVPAAALIADKLAPHVDFFSIGTNDLVQYMMAV +DRSNPDISYLYQPGHPSVVRLLDRVVRAANEHGRWVGICGEMAAEPLFVPLVLGLGIHEL +SMSPVAIPIVKDLVRDINMLEAEELVDQAMACGSAEEVTQLCRSFVERIAPELFLD* +>PROKKA_00291 Phosphocarrier protein HPr +MVGTSVTLLLRFAPWRGSFFLYFQALAIDTGERGTDTPGSAPMAQLQTRTTQVIVQNQLG +LHARPVTLIVKLAKTFSSRIAFERGGTVSDAKSVMALLLLAAGKGTELTITAEGHDAEEA +IEALERLFSDKFGEE* +>PROKKA_00292 RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) +MNIYVGNLPYSAGDDALRTAFEEYGSVDSARVIMDRDSGRSKGFGFVEMGNDDEAKAAIE +GLNGQDLDGRALTVNEARPRSDRGPRRGGGGFDRGPRY* +>PROKKA_00293 RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) +MNIYVGNLPYSVSDDDLRTAFEEYGAVDSARVIMDRDSGRSKGFGFVEMGNDNEAQAAIE +GLNGQDLGGRPLTVNEARPRADRGPRRGGGGGGFDRRPRY* +>PROKKA_00294 RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) +MNIYVGNLPYSVNDDELRGVFEEYGSVDSARVIMDRDSGRSKGFGFVEMGNDTEANAAIE +ALNGQDFSGRPLTVNEARPRADRGPRRGGGGGGFDRRPRY* +>PROKKA_00295 Alpha-amylase 2 +MKPHRLAIFLGTFPMLFSLLLAGEGTMKEAGKSLRFGNGRLILTFDRDTGIWTGLEASGG +AVCFRRTADSPSLNVQVDGKPVFGADRKMSLREQKVVQLPTASRLELTMGQGDWAVTAAY +TLWDSGTLQRQATFVYSGPKPEGDHEVRNALFVLPDVGLAGTDAFWFATAEYPPRDHPFR +HTDSGRRFGFPFSESTFHGFLARDPQAKLSLVSAYYTEDERAKLLVQEGKGTATAFHTHL +LAETLRPGLRFEVGSQLLRVVPGTRQDALRALQGFYDLPGLRTKIGMPPDTGRQIFYSAH +PGGTIDSSFRDVGGFANFTKLLPSIRDLGVNTLWLMPFWYGPVYAPYDYYRLDPKRCGTP +AELKALTDKAHALGMRVLGDLIPHGPREEPGAKPSFAEQHGDLVCRDKDGKMIQWWGCHY +CDYANPGWQDYMAKHAAYWVRECGLDGYRVDVAAGGAPNWRPYDHNRPSFSGLHGGLALL +RKARAACLKENPNTIFLAESTGPTMYSAVEHGYHWAFSTLLEDHVLKDAPADFVQAMSGY +LENQTYAFPADAFPIRFLTNHDKLRARYRYGPNLHRTLLALCAFMKGAPLLYEEEEMGNE +DFIAKLYRIRQTYDELSVGTVSYRSIPVEPKHVFCIEREYKGKRSVVLINFSNQMSEVKL +SLPKSDLKNPGIYEAVSGQRVDYAQDLTQSLDPYAYAVLVIRQRDELPPSVPKERGESPA +APDGRAMDIKITQEDSLTRVSTPLYSAVIDSARGGLLQEVRGADGKLLVNGVELKEGRRK +LFVGHDSVDFADCSVPLRILARDRQFPDGGKVSLLRGRAELRDGDGHAWMDLTVLYSLRA +KSLSLNVSLTPQYRLSPSKSDLGMKIHFVPTTHWFAETAEGNLLGHVIRRHPASHGFSGR +YWHGAGEAFFNGSLYPVVGEFGVLDTNRRIALGSMALRLDGAPLPVRLLEDEPPGSPVVL +GGPAAATADIPLLRGSQRAVWQQGKAKGMVVTLDFRSVPARFTQYPDSFGVRGWDTGTPK +LCYRGGWCTFGPEYLFRGYGMRATVVRSHGGELTALTDAAGNGLRVTDARFYTDQGLFGD +WRDPRGVLRKMSASNVNDPEPDTQLLHLFEGPQDSAPLRFRSFFRHPHAGGRSLLNPRVE +YEISYTPPTEKGKGLRIDCGVRPHLVKIGTGGFLAYKISLGGCDQWQVDGGEWQPLPAKG +GRLWENKEAGHLPKTLLLRNSKTGLWTRFSDFVGGPDQVENVFLHAGQGQVHLFVAFYDA +EPTDVRPVWRRAAFTMQAGGKQ* +>PROKKA_00296 hypothetical protein +MRIATFLLPLLVAIAGAAPKVETGTDGLRVRTARYVATFSTDSGLLASLALVDGTPLLTS +PRLYADVLPDGRKNFSAKAKAAPKAKPQPDGSLLVEVAGALLDKDGKPHPTYPFTYTASY +RFDDTAQVRVSVSVIPGFDSDAVFGFLGQVLSTASQREFFVNTADGLISEMAATHSGRTY +QSESEPLDLKDPYLGVLLKTGQILQFRLVSGAESLLNVFFHDSGAGPTHLFLCPLSGSNP +RQAKTGKAWQQELVIEAMPLAEWTKSR* +>PROKKA_00297 hypothetical protein +MRWQRAMLFVAVLVACGGCSWRGKPRANHVRMYCLVDLEPVAEKLIQGFTESQGVRVDVR +YIRPDELNKRLQRDEYVGLFLYANTWSGSAEENLLRARGNGGGRPAELGRFTPCLIVPKG +NPKGIRSAFDLGKPGMVHGRTRQGACLLARISESGYRKPPKGPDTEPSNIRVRDTDYDVA +RLVAGGSIDGAVVWSFTQSAMADEVEEVESKGLKRYGNVQHMVVGTPMSTPQPGLVKAFI +EFATGPDGQAILRKTGLDVDPKCR* +>PROKKA_00298 hypothetical protein +MTAFPRTDVGGVSVSRLIIGTNWFLGYTHSTSAQSRTNSERVNHRDVVAGIVETFVEFGV +DSIMCPHTDTVIPEAIEEARQRTGKPLVVISTFALPVTKRTALDGFDLGEVERILDEQVA +RDVDIAMPHQSVTDIMLDKCSREIRQMAPVCALIRDRNMVPGLSTHAPETVIYSDETGLD +VESYIQPFNLMGFLMQVEVDWIARIIQNAKKPVMTIKSMAAGQVRPFQALTFSWNVIRPQ +DMVTVGTSSKHEARELCEMSLQILDRRATTQELQRTRSKASISPA* +>PROKKA_00299 Aminodeoxychorismate synthase component 1 +VKPASVLLATPDGEGFYRFSDPVRVVTAGSLEEVLPTVTAVEAAVAQEGVFATGFVSYEA +GPAFDRALAAYPPGEFPLVWFGLYRNREVVPKTEMQDVPPLAWRPCLDQDEYVAAIRRVR +EYIEAGDTYQVNYTFRLHAPFAGDPEALFARLASAQACRYAAYVDTGRYVVCSASPEMFY +TQNADVFRSRPMKGTRPRGMTLAEDRAHREELLESEKDRAENVMIVDMVRNDLGHIAEAG +TVHVPELFSAEPYPTVWQMTSLVEARSRAGFGQTLKALFPPASITGAPKPRTTEIIRELE +TTPRHIYTGTIGYLGPEDARFNVAIRTVLIDRQTQQAEYGVGGGIVWDSDPLAEWEECMT +KTRVLRTVRPEFSLLESLLWTPDEGYALLDRHLARLCDTAEYFGYPVDVVSVRQKLEELA +GNLEPVPNKTRLLVDRHGEITVEGSPLGPAPDALVWRVCVHPERVDSHDPFLYHKTTHRA +VYTQAAAAHPDCDDVILQNERGEITESCRANVVVEMPEGRFTPPVSCGLLAGTQRAELLA +RGEITEKVLTPEDLYAATKVFLINSVHGWVVAELSDSSD* +>PROKKA_00300 hypothetical protein +MTQGLGPGLVSPAPFGAEEKPRGGLGEMGAFYAKGVTYWRSVTTP* +>PROKKA_00301 Phosphoenolpyruvate carboxykinase [GTP] +VKAIDGLEELQLTTNGHTVHFDNYYDQARKKEVTKYLVPADVELDKNLNQMEREAGVKEM +DDLQRGSYKGREMFVRFFCLGTTGSSFSIPCLQITDSAYVVHSEELLYRRGYEEFKRQNA +ADPNFEFFKYLHATGEVTERMTSKNVELNRVYMDYTRNCVRSVNTQYAGNTVGLKKLSLR +LAIRKADKEGWLAEHMFIMRCNGPNGRKTYLAGAYPSACGKTSTAMIPGENIVGDDLAYF +KVIDGEFRAVNVESGIFGIITDVNSKDDPVIWDVLHTPGELIFGNILVKDGKPYWQGMGE +DIPATGMNYCSTEWTEGMEGPDGKVASCSHKNARYTIRINDLANKDPEWDKPEGMPMGGI +IYGGRDSDTNAPVREAYSWEHGVCTMGAMLESETTAATIGAEGVRKWNVMSNMDFLSMSV +GRYIQNNLDFAKDIERPKVFGTNYFLKKDGSYTNGKLDKSVWVKWMELRIHGEADAIDAG +YGLIPKYEDLAKLFKQVLKEDYAKEDYAFQFRVNIPALIAKLDRMEEIYSTKVTDTPEIM +KAEMKAQRERLEAIKAAKGEIVSPFDLD* +>PROKKA_00302 hypothetical protein +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* +>PROKKA_00303 hypothetical protein +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* +>PROKKA_00304 hypothetical protein +MTTPDINFTDLLQFLFMGIQRGSIYAMVAMGVV* +>PROKKA_00305 Leucine-, isoleucine-, valine-, threonine-, and alanine-binding protein precursor +VKRTLLSTAIAVLFLLSAVSVYAAQKKPYKVGCVFAITGGASWLGGPERNTAEMLAKQIN +AAGGINGHKLELFIEDTQGDNTRAVNAVKKLIKKNHVCAIIGPSRTGTSMAVIPIVQQAK +IPMISCAAAESIIAPVSKRKWIFKTTQNDSDAVRRIYENMDKKGIHKIGIITGTTGFGAA +GREQLKALAPHYKIKIVADETYDPTDTDMTAQLIRIRNSGAQAVVNWSIVPAQSIVPQNM +RQLKMKIQLYQSHGFGNIKYVEAAGAAANGLIFPGGRLLAADTVSASNPQKAILMKYQKE +YEAAFKEPPSTFGGHAFDAISILAKALKKVGDNPAKLRNVIEHTNFVGITGVFHYTKTNH +CGLNQNAFEMLTVKNGKFVVLHQ* +>PROKKA_00306 acetolactate synthase 3 regulatory subunit +MKISQLSIFLENRSGRLARIATVLGNAGINIRAMSLADTSDFGILRLIVSDTEQAEKTLK +DQGFTVLISAVVAVAIPDSPGALGNVLSIMEHAGLNVEYMYAFVEKDMGQAIVIFRFDDV +DRAISTLIENDIAVLESKRVLRL* +>PROKKA_00307 N-acetyl-gamma-glutamyl-phosphate reductase +MIRVGVAGATGYAGAELVRILAGHREVRITALTSRQYAGVPFAKVFPALAGVVSNECEAF +DVERVCGQTDVIFTALPHKLPMAIVPGLIKNGKKVIDLSADFRFSDVRLYESAYQPHSSS +DLLSRAVYGLSEVYTDDIRKADLIGNPGCYPTSTLLPLVPLLKNRLVDSGGIIVDSKSGV +SGAGRSPSLTVHFSEVNESFKAYKVAAHRHEPEIESILTKSAGTPVDITFVPHLVPMTRG +MATTIYAGLAGNVTKHDIAACLCDYYAGRQFIRIDRDGHPPDTRNVRGTNYCDIAFVVDE +NNRRLILMSVIDNLVKGAAGQAVQNMNLMMGFEETAGLSAPPFPV* +>PROKKA_00308 hypothetical protein +MSRSFKSAGRMADPSDVAADICPVPAFIPDLMAVNGLADAGFSRLIFFPGMLFGSTEKWW +GDGGVRPSAHEGLDLCFFETSDGLRYRLDETVSVPAAFDGKIVRIMDDLLGRTVVVQSRC +APSDAPFYTFYAHIRPDNGLRQGDTLAAGTVFAAIARIVSPKIRLPAHLHITLARANDLP +PVDTLSWPVMNRLDRSVFLNPLDLLMCDYAIEDDTRFTPGSDAVKPVRRIRQDRKGA* +>PROKKA_00309 tetratricopeptide repeat protein +MGLFSIFGGKPPEELERRGDFHYEAGAFGDAKMAFEKAIDRIERRFPEKKHLLPRIMEKY +HLARNALAKMHVENGDHMITVRDYEEAGALYRLAMELTSDEAFAAEIHGKIAKLKDLIAD +EDEPEMEWVGDAREEAYVDDWGGAEEDAGGAEDDDAYADDDMTADAEGMADETDADAQLY +DSPENLFHVLVSALPEAVQDAYLGYGEAFAAGYIALNHGEFRKAVKELSRALEENASAKT +LIPVELATAYMHLNDPDHARQILEDFLKENPGEIRGYQLLCEILWEAGNTADARNLLSGA +PNDIQTTRPMQMLQGETLFQVGLYDEAEKVFTRCLEIHGKDEIVNRGLAKTYEAKGQIEK +ARDLYADILNRCIMCGSAADPIIKRRYADLCIKSGDKSLKLLELYFGLAKEDPDNRADYF +FRIADLYEAQGKDIEARKYRKLSTQAGGGKRPQ* +>PROKKA_00310 Patatin-like phospholipase +MNPIEAISHTGRSVRSRLKGFPRKKVLVLEGGGMRGIFTVGVLQAFSERGYAPWKTIIGA +SAGALSGVVYAAGQIHMARDAFFTELISGRFIRMSNIFRPEKHILNLDWLVDHIIGGDEP +LNIRRLRTTACPVLITVTRFSRDFPPDTLYLSTKTDSVPQALKATAAIPFFYRGFVHYRN +DLLLDGGVLDSVPFKKALSMGFPERDILVVLTRPKGYRKERDSFWIKTLYESYYKDSQYR +YLVNSLEHHFGNYNRMLDDLETNYDFDIIYPPDNFKVNRLTRSEDKIVDGFEQGVAAAKA +YLKPKS* +>PROKKA_00311 putative periplasmic serine endoprotease DegP-like precursor +MRKKIHLFLVIGCIVFCFSPAVFAKTGGPKVLHYPREDAVVRAVKLVSPAVVNISTQYEV +RTRVNPFANFGANDFFNNFFDQGIERKEKLTSLGSGVIIDGRRGFILTNAHVVVRGAKIT +VVLKDGRKFHADIVGIDPESDLAVLKIKTKSPLPSIAMGNSSDLMIGETVIAIGNPFGFS +NTVTVGVVSAVDRSFRIKNRIYRDLIQTDASINPGNSGGPLLNIDGQLIGINTAIYKNAE +GIGFAIPINRAKKIISDLIKYGEVVPGWIGLSVQNLNSRLAAYLNLPQHSGVVVRSVDPS +SPAGAAGIREGDILLAIDGHKIESIDDYKTAMRGYRKGQHAVVKIDRNGRHLTLSVRIEV +FPESLAPELVQRLLGVKVVGIGQKVRFNQTINADKGVIISEIDPQSSLAGIGVRPGDVIR +KVDAEATNTVQSFYKAMIKDRWKQSIVILLQRGDQGYYITLKLS* +>PROKKA_00312 hypothetical protein +MFQFLLFVGSITAFIIGGLIVLIGIGAITGCAGGILAMCSGAIIAVLGAWSAITFFLPSP +DPSVPARETINLIRRNGRWM* +>PROKKA_00313 PHB/PHA accumulation regulator DNA-binding domain protein +MHLVKKYANRKLYDTTDKQYITMEKLAELIKSGSEVMIIDNETGDDLTAQVVSQLLAREK +NEDDTALPSSVLMQMLRKGRGTLFGYGKKYISLWQSAVLMSRDENEKLINTLVKDKELSE +TEGRTLKKEITAYTNGLKTWIRENIDQRVNEALNMMNLASKEQVKELIDQVESLSLKVQS +LEREIRRKN* +>PROKKA_00314 Putative diacyglycerol O-acyltransferase/MT1809 +MRKQVEIMSSIDNFWLYMDHPTNLMIITGFLQFDKPINFERLKQTIKNRLLCYDRFKKRV +IRPMTGVGNATWELDPRFDLRSHLHRVALPAPGDKETLQELISDLTATPLDPTKPLWQLH +YIENCENGGSVLFARIHHCIGDGISLIRLLLSLTDTEPNAVWSDCLNEPKIEKETSFNLF +PPLESAMKKVTRARRRAQKVTRFVSREIEKSFSNPYHIVKRTRTVTKFALDVATVMSKIL +LLPADRKTVFKGELGVRKSVAWSDPLPLDDIKVIGKYFNATINDILVALVTGALRRYLQQ +CNNLVGDLDIRVAMPINIRPIDGDIELGNQFSLILVALPVHIDDPVLRIREVQRRINDLK +EAPDAAVAYAVLNALGVSSAKLAKTAATMFANKTTGVFSNVPGPRQQLYFCGEKINNIMF +WVPRIGGLGIGISIISYNNEVSLGIATDSGLVQDPKAILDHFANEFRMLLGMYKAGQMEK +EPLVINDRSVEPPVFAFNTEKIASVQAIRCKAITRSGTQCHNRAATNSMYCTLHLSKYET +IASREENDMPAEADNTLPAEDQAAG* +>PROKKA_00315 cell division inhibitor MinD +MNAPAPTPNNPRIVVCCGSGGVGKTTISAAIGLCGALMGKKTVVLTIDPARRLADALGIS +ALNMEAQRVPLEASVPASGELYAMMVDAKRTFDRLIGRYSSAGLRDRILENRYYQHVSNN +MAGSHEYMAMERLYEIYHEKRFDLIVLDTPPSRRALDFLEAPQRVINLLGHPYFLKLFKP +YIKAGQLSGRLFNLLAMPVLRAVGQVVGGQTISDIFSFFQLFNDMLFDGFSKRASAVESL +LSDPMTTFFAVTTPQEYPIQEATYLFRQLQQRNMPFGGFIVNRVHSDTADSPFDSEAADR +KRVLMEKIADKPIFQRLEIADRMDRKLARSDAAAIDRISSISPGLAVFPILFADETVNDI +SGLRVISTQLMKHPEFKI* +>PROKKA_00316 Arsenical pump-driving ATPase +MAKADESDRTQLEALFSIGAGVRSLDELLTRRLIFLMGKGGVGKTTLSVALALTAEMMGK +RVLLTEIGDSQGIGRYFDAQPDVRPRQVSSAIWAARVDPKDELTAYLHYHMKSGFIANRI +TQSRLFDYLLAATPGLKEIMTLARIWRWEKAKNKAGTPLYDTIIVDAPATGHGLSLLRLP +KMLVEMIRVGPIASQVNGVQQMLLNPERTALTLVTLPEELPVNETREMIDIAVDEVGIPV +QAVFINGVHPVFVTPDEFSRIQELDRDCPDADPDCPDLRFALDVARRQIVRNAAQQVQMN +EVHAAAPGHVIHVPYYYTNDLGPEEIRTIAASLHRQISEAPRGGGR* +>PROKKA_00317 PHB/PHA accumulation regulator DNA-binding domain protein +MYQIKRYANGRFYDTVEKNYVTREQISKLLGAGKKISIIDTRTEKDITDDIVSRIKAKKQ +NPSKSKKAGKSNKAVDDSTGMLVQLFRKGGDALFDYGKRYASMWQNMVTMSRDEVDKLVN +MLVKDNKLTELEGSKLKKEIDRYRTNIQGWITRNIDNRVNEVLNRMNLANRDQILELTGK +IEELNKRINRLGKEKKGPAKTKKTS* +>PROKKA_00318 hypothetical protein +MAKTATKKGETAQTKITGKIQKAAESVTDKVKGYNEKYVAKNIEKGKATLKEYNEKYLVK +TVEKGKDTLKEYNDKYITKAVEKGRSYVDGPYKKLSGTMDQWLEKGRSFEKDAWKKMDGY +VENGKKFMYKLPLVETVEKKVTSSLNSVPSVVNLPGKGDIEKLTLAMEALNSNIEALRKQ +SAQ* +>PROKKA_00319 hypothetical protein +MGKTIRRALVLSGGGARGAFEVGVMRYLNEVNWQPDLICGTSIGAINGAAFGSGMSVDEL +AHLWKTYHRKQMYKITFPAFFRTLLSGRKFSPLSDNRPTRSLLEKTIDIDALRNSTTEII +ISVLNMRTSQVRYFTHKAIGIEHLMAAGGIPMMFPWQYIDGDPYWDAGVMVNTPIMPAFE +RGATEIIVVLLSPLGAIPQRLPSTHREVSELVFEQFLIGSYTACLPNAGWRTNPEADVYD +TPLPDSPQLQLSMKGVRMATVYPTRMLGFRSLLDFSPRQAKTLLRDGYVNARMQLKSFFK +* +>PROKKA_00320 hypothetical protein +MPTATIRQQLIELLSENKYDARDLSQRLGVRETVVYDSIPHITRSVTSMGKKLKIVPSRC +TSCGYTFKDRKRAAKPSRCPTCKSERIAKPKFYIV* +>PROKKA_00321 3-dehydroquinate synthase +VKTVSISGQTGASKIVIGERLENLSNYLPDRRIVVITDTNVAGHYGKMFPDVEVITIGCG +ESIKTLDTAKMIYERLVSMAADRSVFIVGIGGGIVCDITGFIASTYMRGVRFGYVATTLL +AQVDASVGGKNGVNFMGYKNMVGVFNQPEFVICDPYVLGTLPPRELACGFAEIVKHAAIS +DKDYFADLEESHEKACARDPETLERIIRKSVVIKAGVVNADEKERGERRKLNFGHTLGHA +IEKTLGVPHGEAVSAGMVMAAELSANRGHLPRPDIRRLKDLLTHLDLPTALPIDPERIID +AMARDKKRQGEKIHFVLLSAIGAAFVDSISLAELEAVVTG* +>PROKKA_00322 Phospho-2-dehydro-3-deoxyheptonate aldolase +MLIVMRQDASREQIDAVIRAIEARGYTARSIPGGDRVSIGILNNRTAIDAAWFQDMPGVK +ETIPVTRPYKLVSREIQPHDTIIRVGGVEIGNGHLVIIGGPCAVESEAQVMATAERVKKA +GADIFRGGAFKPRTSPYAFQGLGEEGLKILARAREQFGMPIVTEVMDLEYFDMVEAYADI +VQIGTRNMQNFSLLRRAGESKKPILLKRGMSATIDEWLMAAEYVLSQGNPNIILCERGVR +TFVRHSRNTLDLSAIPVVQRESHLPIIVDPSHATGFRDQVIPLSRAAAAARAHGLMIEVH +NAPDTAQCDGSQSLYPDQFETLCRQVRSIFRILGETDETR* +>PROKKA_00323 hypothetical protein +MILIDILRRNWYDLLPLNEIGNICAVKFGNKAADP* +>PROKKA_00324 Fatty acid metabolism regulator protein +MTNATNDQPFRPARFTEQRLITAILDGTCPPGSVLPAERRLAEQFGVTRPTIRETLQRLA +AEGWITIRHGKPTRVNDFWETGGCS* diff -r 000000000000 -r 68a3648c7d91 de_prokka/p1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/de_prokka/p1 Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,2474 @@ +>NODE_25#PROKKA_00102 +VVMEEYQKTLARLLAESGGLFFQEGLRLKDGRPTPYFVNLGVFRTGRLALELGRCFSLWI +HHHGLDQDLDCIVGPSYKGSAIAQATAIALYELHGKEVAYDYDRKEAKTHGEATGHGYLF +VTGAALQGGKVLIIDDVGTSMSTKLELLKKLSWLKPRLERPMELLGVVLAVDREQTQAVY +DAQGRVREGVRGPDAMESFRQESGLEVWSLLGIRQALDYLYKEGIPVLIQGEMRPLDELT +MQIAREYLELYGREEA* + +>NODE_17#PROKKA_00047 +MQVVQAILSKVHEAGDIYFAKYGGHYCVGCERFLTEHEMIDGKCPDHGTEPVYQEEENYF +FRMSDYTQPLKEYIRANPDFIRPERYKNEVLAILDQGLEDLCISRPKTRLTWGIELPFDQ +NFVTYVWFDALINYLSGLDWPDGELLERFWTGPKADPQHLIAKDILKPHGIYWPTMLMAL +AKAEGRPLDHYLYRHLNVHGYWQVGEGKMSKSRGNVVKPLDLAGIYGVDPFRYFLLREMT +FGLDASFSEDLLVERYNADLANDLGNLFSRVLNMLSRYRDGLLPELHPQELTEADREMKG +ALAASLGPGAEHDFQAQVREFRFHTALADLWSQVRRANKYIVAREPWVMAKDPDRAAELD +NVLYILVQLLASVTHLAWPVMPATAEKMAAMLGLELVVPVDWQRLFALELMTPGAKAEKP +QALFPRVQTDKVKAKAARKEAKQAQQQPAAKGGGKQKAKPQDKAGLITIDEFAKVELRLG +RVLEAGAIKGADKLLKLKVDLGEPEPRQIVAGIARHYRPEELVGRQVVVVANLKPAKLRG +EISQGMVLACVAEGRVRLVAPEEELPPGSVVR* + +>NODE_3#PROKKA_00175 +VLACFYFFADSPMLNWSQIDTALLDMDGTLLDLHFDSHFWLEHLPRRYAELKHLDPEHAR +QSLLSKIEQLRGKLDWYCIDFWSDLLDLDVVALKRETRDRIAWRPHSKAFLERLRACGIR +RVLVTNSHPDGLNLKIETTGIDQHLDRLFSSHSFGQPKEGPDFWEQLAQQEPFDPERTLL +IDDSLPVLESARRYGIRHLLAILSPDSQQPPRQPSHHPCVHDFDELFQSLDQFAHQKNRI +DGLSD* + +>NODE_8#PROKKA_00285 +MAQPYEGLVGTSGRVQVNRNGRPAFSISTGAFADGWRGASLTPAKVGETTDGVCLGKISL +PDKLTIASALRATAAGKAMELRYTLTPKADAKLNSLHVSFGLPASFLKGASYTIEGETKE +VPAVLGATHLRAGDHVPSVRFTWPNGDWLQVDILSKTPVLFQDNRQWGDSFDLRLGPQMV +PAQTLPANQPVEIAMRVSAKDGMKLDFDRPVTITAGKDWVPLDLELDIEPGSALDFSGLG +QFDAPSGKHGWLQATPDGKFAFADSLDTPRRFYGVNLCFTAQYLSHDEAERLAERFLRLG +YNTVRFHHHEYPLIDRKNGCSTDLKPESIDQLDYLFAQFKKRGIYVTTDCYVSRPVYASE +IWDGAKGNVEMNEFKMLVPVNERAFENWKTYNRNFLTHRNPYTGMRYADDPTLAWLSMIN +EANFGNYIRSVSDRARPDWERAWGAWLKARYGSAEAITKAWGSTFDGDLSKPTAKLAKSF +TDDNRQSRDFAVFLADTERTMFLKMKKFLREEIGTKAMLTNMNGWTNTPQSQLARAEFDY +VDDHFYVDHPQFIEKSWRLPSRCPNTSPVLAGAPGGRGTAFNRLMNKPFTISEYNYSGPG +RYRGVGGILTGCMAALQDWSVVWRFAYSHRRENVLKPSTAGYFDMATDPLNQAAERASMC +LFLRGDLDPAPRSAAITLNPETLEKGDSHQGRTPPSWDELVPVIQVGTFLGDRQSKVPAD +IALPTTDAAPAAADVVMPKPYDSGKGSAILKELRAKGWLDAANKTDLDRKRGQSASDQFL +MDGEKDMMVLDTPRTAGGYAEAGQTIHTQAADFSILDTGATVWISSLDKQPITSSKRLLL +THLTDLQNTEVRYAERGRKTLLAWGKLPHLVRVGEAKISLHRSGAKLPKVYVLATSGHRL +GEVPVTKGKNGTLELAISTKGEAGAQLMYELDFR* + +>NODE_3#PROKKA_00181 +MRWLSHSAFLFSHGRSAVKALSLLLLFSLGGCSAVNNMMYKTTGEVMVGYAKAHAVPYVL +SSDDLGMSCAMSEALTPLLMSFGQVTAKPDQLGVMMQMSAGTCAEEKGWNAELAYMKELR +NQHPQNAEDDMIVEKRHYIEAADRYYSAWKHLVAYYGDPSTGQCPTFKNDEGQFIYMAGL +LAGVQALAAEIQSTSDEGVPKNIGSTVAQASGCLSDDKWWGVPMALRATVWSMIPGAKPD +GENPFQRLDESDKKANKARVRLAYVLHVIAAWNKGDTKLVKKLIREQQAQEAKYPADPRW +KMIDKLSTLYLRSISDRMWVEHTGHRTPIGGLGTFWDDSKGSGEVIDLDSVM* + +>NODE_3#PROKKA_00179 +MSEAIEQSSMYLQKRTIGGRSAREWFSSLPACILLMAVVLFTTSSDIHNKALQLGQVLWS +GYYKLRVDPVKPDCNPNVNVDAQVKRQIAAQAAQQDSMLGSLVGSSPVNPAAVRQSVINA +KQACEAQFADYNATKGRITEGVRVYRSVELFISDVVAFGLASQRYILALLVLVCAATATF +SRHHIAMRGMETRLDHIVSHFMQFIANTMLLISSFMYRQMSHNSGAVVTTGQEISHDIWI +AGFLLLTIVSLVQLFRVPEDAEEGGTLGHAFLCVPLYTTMCLISGTFFAFVGSPAGIGIY +LDKMMELADQFLNVGLYVWAGMMLKQTRLASLVFNVLRPLKLPPELLAVVAVMVAAVPTA +YTGASGIFVIAAGAVIYSEMRKAGARRQLALASTAMSGSLGVVLNPCLLVVVIAYLNREV +TTDSLFHWGGWVFLLTSTLFLITSLVVNRQKGFKVAPMNEALPEMVMRLKPLIPYVLVIA +GVVFFYWLLLGVTMNEFSAPRILPIIMVGILVYEHVHFRGDRNKVSGEVDHQGLEKSLRT +ATSETTAEIGALLLLFGLSVSIGGVIERSQVMSLFPQALPSPWLAMMLMVVILVILGMIM +DPFGAVILVSATIADLAYQSGIAPVHFWMVTLVAFELGYLSPPVALNHLLTRQVVGESEM +NLSYRESGSFYQRHERVLMPLLVMGSALLIVAFVPLLFYAR* + +>NODE_9#PROKKA_00319 +MGKTIRRALVLSGGGARGAFEVGVMRYLNEVNWQPDLICGTSIGAINGAAFGSGMSVDEL +AHLWKTYHRKQMYKITFPAFFRTLLSGRKFSPLSDNRPTRSLLEKTIDIDALRNSTTEII +ISVLNMRTSQVRYFTHKAIGIEHLMAAGGIPMMFPWQYIDGDPYWDAGVMVNTPIMPAFE +RGATEIIVVLLSPLGAIPQRLPSTHREVSELVFEQFLIGSYTACLPNAGWRTNPEADVYD +TPLPDSPQLQLSMKGVRMATVYPTRMLGFRSLLDFSPRQAKTLLRDGYVNARMQLKSFFK +* + +>NODE_8#PROKKA_00289 +MSISIASQPGLTWHPAYCRPRTEKVVDDYCKRHDIPCYLPLLRQRKRYQRRTVETYLPMF +PGYVFVQLGPDTRTTFLECHRIVHIVEVREAQERTLVAELTELQHLETAQATVDLEVMPD +IKPGTQVTITDGPLAGITGVVEKRKGKTRVTVNVELVGRSVVAEMDLGELELDGDA* + +>NODE_7#PROKKA_00260 +MQVYPKFLLSATAPAHFPPPSAPEFAFLGRSNVGKSSLINALLGSRQAKVSSTPGRTRAI +NFFSLTTSPNRQQPNFLFADLPGYGYAKISKSISAEWPKVV* + +>NODE_6#PROKKA_00245 +MDPKTVIRELLTGTGVHLNGPHPYDVQVHDERAYERWLSEAELGLGESYMDGWWDCLALD +EFIERILRAGLEEKVKRNFSTAFYVLSKRLFNQQTRVKSKRVGREHYDLGNELFSKMLDR +RMVYSCGYWQRAKNIDQAQEAKLDLICKKLNLKPGMKVLDIGCGWGSFAKYAAEKYGVEV +LGVSISKRQIELGNELCKGLPVTLLYKDYRDVEGKFDAVVSVGFFEHVGYKNYDTYMKIV +DRCLTDNGISLLHTIGNNTTTHYVNRWTNKYIFPNGMLPSIAQVAKAAEPYFVIEDFHNF +GPDYDKTLMAWYDRFNKAWKELKNQYDERFYRMWRYYLLSSAGGFRSRATQLWQFVMTRT +GRQQPDCRFA* + +>NODE_3#PROKKA_00153 +MSSPVRTATFIGAISVVLWGTLALLTKLTGGRIPPFQLMSMTFGIAFLLMAVRWWSRGES +GLGYIRQPFPAWLLGVGGLFGYHLAYFKAMTLAPAVDVSLIAYLWPLFIVLLSALLPGHS +LRAQHLVGAVLALAGCWLLVGRNSQGFDWTYADGYLVAFGCSLIWSSYSVLSRLVRSVPT +DAVGWFCGVTALLALGCHLLWETTVWPVGTLQWLGVIGLGLGPVGIAFFTWDHGVKYGNL +PLLGTLAYSAPLISVVLLLLAGFGQASGMLFLASALIVAGSFVAGRAKHASPELAEEPVP +E* + +>NODE_8#PROKKA_00284 +VDHALLDRHFPRYTEYDPQVPVWDLTPQCPGAFHRFFDTSPLSPSGRYLAVTRYQPERLP +EPGEAAEVVLVDLHTGKSQVIWESRGWDTQLGAQVQWGATDEQLFFNDMDPADWQPFGVC +YNPLNGTSCRLAGTVYMVSPDGKQAVSPCS* + +>NODE_3#PROKKA_00168 +MTAEWISVGLLALLVLAFVIDIGLRLRKPPQKPQPPVAERPPAAPEQREAVPEAKAPPPR +PEAPAAEEKPAKAEPEAEVVEPEAEAPPAPPVAEEAPPVEAPPAETPAAEPEVEEAPVNW +FARIKQGLGRTRGNFSEGLSNLLKGQKAIDDELMEDIETLLLTADVGVTATTEIIDTLTE +KLERKQLKDGDALKQALREELHGILAPSTAPLNIDDGHKPYVILMVGVNGVGKTTTIGKL +ARRFQDQGKRVMLAAGDTFRAAAVEQLQVWGERNNVPVIAQQTGSDSASVIYDAVQSAQA +RGFDVVIADTAGRLQNKENLMSELEKVVRVMKKLDPEAPHEVMLVLDAGTGQNALSQAQI +FQQAVGVSGITLTKLDGTAKGGIIFAIARQLKLPIRYIGVGEQVGDLRPFQAEEFVEALF +DEPA* + +>NODE_2#PROKKA_00131 +MPQTKTTDLTYLVETYTEEMVSWAMYKVSDAELARDLVQDTFLAAAEKMDAFKGESSPKT +WLFSILNHKIIDVYRNKVKQPVSFDSQVFSTYFNERGDWKKEKEPKDWHQEEKQLLDDSA +FQQVLQKCLESLPEKWSTCVKMKYLSEKKGEIICQELGLNPTNFWQIIHRAKIKLRDCVD +QNWFRS* + +>NODE_13#PROKKA_00035 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_25#PROKKA_00104 +MKDKRQAEDESAREPFTPEEQDVDKYKREVEELKAKLAAMEKEEQQDSSPAEEAPQEDEK +KEE* + +>NODE_2#PROKKA_00140 +MKSLKGTRTEQNLLKAFAGESQARMRYDYFASQAKKEGLEQISALFTETSLNEKEHAKRF +FKFLEGGPTEIVAAYPAGIIGTTLENLRAAAEGEHEEWTELYPEFARVAEEEGFKEVAAA +FKMIATVEKAHEARYSKLYKNLEAGKVFQRDGVVVWKCRNCGYLHEGKKAPKKCPACLHP +QSFFEVETFGY* + +>NODE_7#PROKKA_00277 +LHLSVSERTIAPARNIEGSLRLPGDKSISHRYALLSGLAEGTSRFTNFSTGADPSSSLAC +VEALGAKVVRGEDGSVEVTGVGGQFQPSSSPLDCGNSGSTMRMLAGFLAAQQGEFTLVGD +ESLSRRPMERIRKPLMQMGANISLTEGHAPMVIHGIPLKAMEYATPVPSAQVKSAILFAG +LQASGTTTVRESVRTRDHSELALRAFGAELERTIDSITIAGGQKLSAIEAKVPGDISSAA +FYLCAAALFPGSNLVFDDLGMNPTRASLLDVLTALGAHIGVIDLEDKASELIGTVKVNAP +PDGLTGTTVSGALAAQLIDELPVLAAIGPYTNNGIRIRDARELRVKESDRIDLVVKNLRA +MGAEVEEFEDGLDVPGGQTLHGAEIDSGGDHRIAMAFSVAALRAEGETLIRGAECASISF +PEFFDLLDAIAQR* + +>NODE_4#PROKKA_00226 +MRPGEEDKDPLGDFLASLERLSALPENIRVLPSHGLVFEGLHQRLKALQRHHELQIDRLL +ERCEHPQSARDVLSLMFRRPLDEHAILFAMGESIAHLHHLRLQGKLSQVEQAPFRYIRN* + +>NODE_1#PROKKA_00080 +MHPKLFACCTALLACCAFPALGTPAPGSANASSSVSGVMTINADHSSMANTTGQGTEVTY +SGHVIVTRGALRLYGHSAVIHGRSNTIGKVVVTGTPARFELREPGKPHVLGEADSITYNG +KTDILQLDGQVHFSRPGEHFSAAHITYRIATRQLEASGNGNGRVHAVLSPAARTSP* + +>NODE_2#PROKKA_00125 +MKENLEWHKVLEKKEDLPENRIITVNAGSKQIALSHFEGKICALDNHCPHQGGPLGEGSI +ENGILRCPWHGWDYHPCTGKAPGFDDGVATYRVEERGNGIFVGIPPKKPHKTTLSDIMVE +TMVHWGVDTVFGMVGHSNLGLADAFRRQEEKGKLKYIAIRHEGAGAFAASAYGKLKGKPA +ACFSIAGPGATNMFTGLWDAKVDRSPILALTGQVATQVVGTGNFQEVDLVRAFQTVAAFN +HRVQKDSKHAELMSLAIKHALLKRDVSHLTFPDEIQEILEGKEESQTPEGRMGELQISPA +AGSMDKAVDFITKSKRPVVIVGHGARFVMEQIISFAEHLNAPVLTTFKGKGLIPDDHPLA +AGVLGRSGTPVASWFMNESDLLIVLGASFSNHTGITPKKPIIQVDFDPLALSKFHKIDVP +VWGELSTTVNILMKRLPVKPNTVDQRTELAQRWKIWRTEKQKRLLEDRGKGISSIAVFDN +LSKLIHPEAVVCVDVGNNAYSLGRYFESKNQSFLMSGYLGSIGFAFPAALGAWAATRGKR +QIVAVAGDGGFAQYMAELATSVKYNMNIKLVLLNNSELGKITKEQRSGGFKKFATDMHNP +DFAEYARGCGALGIKVSKRKDLKTKMKEFLDYKGTALLEIVTDVLLV* + +>NODE_6#PROKKA_00241 +MEKKRKEMAFRFPKPDGLYNPANEHDNCGIGFVAHIKGEASHDIVERGLEVLRNLDHRGA +KGSDNASGDGAGVMVQIPHEFIKKVLKIDVPAKGSYGTGLIFLPQLEAEANACVDILSNI +IQEEGLQLIGYRDVPTDSSIPGEIARTTEPRIKQVFIKANLEEDILEQKLYIVRKRAEKA +VQASDLSQKEVYYHSSLSAKTMIYKGMLTPDQMKDYFTDLQHPLFKSALILIHSRFSTNT +FPTWDLAQPFRLVAHNGEINTIKGNRLWTQAREGLLKSEVFGDDLPKILPVLEEGKSDSA +SFDNVLEFLHRTGRSLHHSLCMMIPESFNEKNPIPESLKAFYEYHSTIMEPWDGPASIVF +SDGRYIGGTLDRNGLRPSRYVITKNDLIVMASETGVQDFAAEEILEKGRLRPGKILLVDT +RLGIIIPDEEVKEQLSRRNPYGMWLKENRLLMEDIKVRQRVPSTMDDFLTYAKVFSYSKE +DMEFLIQSMSNTAVEPINSMGNDTPAAIFSRQPQRLFNYFKQTFAQVTNPPIDAIREGLV +MSLTNYIGSLNSNILKESPDHCKLIKFPDPIVTNTDLGKIKDLKDEMFSHEIISIVFPVD +QGFEGFKKAFDEMLERAEKAVDDKKNFIILSDRAIDSKHAPFPSLLAVSAVHHHLIQKKK +RMQVGIAVETGEAREVNHYALLLGYGASVINPYLAFAAVDHLVKEGKLDMEYKDARRNYI +KSIKKGLLKIFSKMGISTVRSYHGAQIFGAFGLSKELVDKYFKGTSSPISGIGLEEIYEE +YSQFHKDAFREEATQEKFRFETTGVYAWRKNREDHAWNPDSIGLLQWATRTNSYEKFKEY +SRTVDEYNRKPSFIRGCFQVKRNSISIEEVEPVEEIMKRFVTGAMSYGSISKEAHESLAV +AMNTVGGRSNTGEGGEDHNRFGTEKQSAIKQIASGRFGVTSNYLTNAREIQIKIAQGAKP +GEGGQLPGYKVNEVIAKLRNSTPGITLISPPPHHDIYSIEDLAELIYDLKATNPKAKISV +KLVSQDGVGTVAAGVAKAFADLIIISGGEGGTGASPISSIKHAGLPVEIGIAEAQQTLVK +NNLRGRVKIQVDGQLKNGHDIVTMACLGAEEFGFATSALITLGCVMMRKCHLNTCPTGIA +TQDETLRERFTGNPQLVINFFRFLAGEVRELLAEMGFKKFDDIIGRADLLEENKEVFGWK +MKNVDFSAVLNRPAEADKFDIRYVPGSASLNLDGHLDHTLIEESGKAIKGKEKVWLHHPF +ANTDRAIGAMLSGVISQKYGEFGLPEDTIHATFDGSAGQSFGAFLAKGVTFRLEGDSNDY +IGKGLSGGKIIVVPPTGSTFTPEENIIIGNSTFYGATGGEAYIQGVAGERFCVRNSGMEA +VIEGAGDHCCEYMTGGRVVVLGKTGRNFAAGMSGGIAYVLDEDGDFDFYCNKGLVELLEV +EDKKDIKELQGLISKHLTYTQSPKAAKILTQWEEYLPKFVKVIPYEYRKVLRERELRELE +QKMKMTEDANVMQE* + +>NODE_9#PROKKA_00310 +MNPIEAISHTGRSVRSRLKGFPRKKVLVLEGGGMRGIFTVGVLQAFSERGYAPWKTIIGA +SAGALSGVVYAAGQIHMARDAFFTELISGRFIRMSNIFRPEKHILNLDWLVDHIIGGDEP +LNIRRLRTTACPVLITVTRFSRDFPPDTLYLSTKTDSVPQALKATAAIPFFYRGFVHYRN +DLLLDGGVLDSVPFKKALSMGFPERDILVVLTRPKGYRKERDSFWIKTLYESYYKDSQYR +YLVNSLEHHFGNYNRMLDDLETNYDFDIIYPPDNFKVNRLTRSEDKIVDGFEQGVAAAKA +YLKPKS* + +>NODE_4#PROKKA_00206 +MLRFIVALMLLFPLVAQAEDAIKPGQWKQTIHVTIPGSSVKIPPHSSTNCVKPEQAGSIK +SIIEEAQQPGCKLNEYSRSGNKVHWKMTCTGKSQASTEGVFTLQSKTSYHIHMNALMQTP +NGPYKTVVDSDGKWVGPCK* + +>NODE_8#PROKKA_00301 +VKAIDGLEELQLTTNGHTVHFDNYYDQARKKEVTKYLVPADVELDKNLNQMEREAGVKEM +DDLQRGSYKGREMFVRFFCLGTTGSSFSIPCLQITDSAYVVHSEELLYRRGYEEFKRQNA +ADPNFEFFKYLHATGEVTERMTSKNVELNRVYMDYTRNCVRSVNTQYAGNTVGLKKLSLR +LAIRKADKEGWLAEHMFIMRCNGPNGRKTYLAGAYPSACGKTSTAMIPGENIVGDDLAYF +KVIDGEFRAVNVESGIFGIITDVNSKDDPVIWDVLHTPGELIFGNILVKDGKPYWQGMGE +DIPATGMNYCSTEWTEGMEGPDGKVASCSHKNARYTIRINDLANKDPEWDKPEGMPMGGI +IYGGRDSDTNAPVREAYSWEHGVCTMGAMLESETTAATIGAEGVRKWNVMSNMDFLSMSV +GRYIQNNLDFAKDIERPKVFGTNYFLKKDGSYTNGKLDKSVWVKWMELRIHGEADAIDAG +YGLIPKYEDLAKLFKQVLKEDYAKEDYAFQFRVNIPALIAKLDRMEEIYSTKVTDTPEIM +KAEMKAQRERLEAIKAAKGEIVSPFDLD* + +>NODE_2#PROKKA_00116 +MGVNLLDIILAVPLIFFGYHGYRKGLIIEVTSLAAFILGLYFAFYFSNFTAGILKEYFTI +QTKYMAAIAFVVTFIVVLLIVLAVGKIVEKFIDILLLGFLNKLAGGLFGVLKGALFLSII +IFVINYFDASHSIIKQKAKDNSVLYKPVESIAPALYSWLHLKNFDFHLPSEESVIKTITH +RANPD* + +>NODE_1#PROKKA_00089 +VTDRTVRTWIGEAVAAAAADGVTFSVPVTPHTFRHSYAMHMLYAGIPLKVLQSLMGHKSI +SSTEVYTKVFALDVAARHRVQFAMPESDAVAMLKQLS* + +>NODE_5#PROKKA_00232 +MTADNASNIKVVHDPQTDEAPARPLGVDTPAGEAPGNDDRPAGPLEDALPGAEDDDSLLS +CLLFLVAYHGQPKSPSVLLSALPKPDGPIPVDLFKRAAARAGLSVQVVRRGLGRIHAWTL +PAVLLLRNRGAVVLTGKSDTGHFLVTAGDAGHGTTELSPEELERAYTGFAILVKPEVDLG +GERTGADLAKPRSWFWGTLAKNGWTYAQVGMASVVINIFAVANPLFTMNVYDRVLPNNAV +ESGIALAIGAATALLFDFILRNLRGWFIDFVGRRADVVLACRIFDQVLDLKASHRPQSSG +AFASMLREFETVRDFFTSATLAAFIDLPFSLFFLAVILMLSPEIGMVLGSAMALVLFWGM +FVQFPIAKSVKKLMVHGEHKHGVLVESLFGYETIKMVGAEGRMRAKWETVVGQSAAVGQR +MRLFNNLGVNFVQFVQQGTIIAMVVTGIFLVKDGTVTSGGLVASVILCGRALGPMAQVSQ +LMMRFHQTWTSLKSLDAVMRGPVERPPEANFLHRPRLRGQIEFQEVTFRYPGTDHDVLRD +ISFVIEPGERVGIVGRVGSGKSTIAKLLAGLYAPTSGTVLLDDTDLRHIEPSDARANVGF +VPQDVFLFKGSIKENIAISVPRATDDEITEVSQALGLHDFITQHPLGYDLEVGERGGGLS +GGQRQAVALARTLLKTPTILVLDEPTNSMDTGTEKKVVDTLARTTGGRTIILVTHRTSVL +SLVDRLIVMDAGRIVADGPKATILGGLAKGQVKTGK* + +>NODE_3#PROKKA_00189 +MAASAVRVSDTDRLTFTLFLALVLHAIVVLGVTFTAHKPQPSARTLDITLAQRDDQKAPK +HADYLAQTNQKGSGTLSKKAQITTRHRAPINASQVHKVKPIPRSQPQHSQAKPEKRHVVT +TVSQQATQQVDSDDKEQKAHQKHNHKSLMSRALEIASLEAKLDQETQRYAKRPRVLRVTA +ASTLKSTDAWYVQAWVNKVTRIGNLNYPEAARRRGIHGTLRLLVDILPNGHVKDIQVLQS +SGYKVLDQAAMRIVRLAAPFAPFPPELRKRKDVLEIIRDWSFEPRGLSTNG* + +>NODE_3#PROKKA_00172 +MENINRITTGNCSIDPIDNTEPQAYQLRVF* + +>NODE_3#PROKKA_00171 +MWLYGLFHHLPAWQLILIALAMTHVTIVSVTVYLHRHSAHNSVDLNPVVAHFFRLWLWLT +TGMVTKEWTAIHRKHHATCETEEDPHSPVVKGFSEIMWRGAENYRAAISDEICERYGQRT +PEDWVERNVYSRYRLGGVALMAVIDLLLFGVNGIWIWAVQMMWIPIFAAGVINGIGHFWG +YRNFECADNARNIVPWGILIGGEELHNNHHTFPNSSKLSRRWWELDIGWGYIRLLQLFGL +AKPKGYRPIAHQIPGKMDMDVETVQAIANNRFHVMRLYRKRVLEPVLRQQRSVVEKDIKP +LYRRVRKLVFREESLIKPQERQSLEQVLQNSAVVRLIYEKSHELQAIWQRRPGMRPQDKL +NALVEWCHQAEESGVRYLEEFAATLRSYSLRPQTA* + +>NODE_50#PROKKA_00228 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_9#PROKKA_00322 +MLIVMRQDASREQIDAVIRAIEARGYTARSIPGGDRVSIGILNNRTAIDAAWFQDMPGVK +ETIPVTRPYKLVSREIQPHDTIIRVGGVEIGNGHLVIIGGPCAVESEAQVMATAERVKKA +GADIFRGGAFKPRTSPYAFQGLGEEGLKILARAREQFGMPIVTEVMDLEYFDMVEAYADI +VQIGTRNMQNFSLLRRAGESKKPILLKRGMSATIDEWLMAAEYVLSQGNPNIILCERGVR +TFVRHSRNTLDLSAIPVVQRESHLPIIVDPSHATGFRDQVIPLSRAAAAARAHGLMIEVH +NAPDTAQCDGSQSLYPDQFETLCRQVRSIFRILGETDETR* + +>NODE_3#PROKKA_00194 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_8#PROKKA_00294 +MNIYVGNLPYSVNDDELRGVFEEYGSVDSARVIMDRDSGRSKGFGFVEMGNDTEANAAIE +ALNGQDFSGRPLTVNEARPRADRGPRRGGGGGGFDRRPRY* + +>NODE_6#PROKKA_00254 +MKKVVQAFMVLLALTITTGLMAQGTIKGTLKTTKGKTVPGVNILLKGTTTGTTSSLNGSF +VLKVPAGKHVLLVSFTGFKPINYSFTIKDGETLTKNFVLHEDLLALDQVVVTGVQNKQTK +LQSSVAITTLSPQKISQIAPRSAADLLKAIPGFYVESSGGKGNANVFARGLPSSGGLRYV +QFQEDGMPVFEYGDLMFGNTDIMVRIDQTMSRMEAVRGGSASVLTSDAPGGIINIISKTG +GPTTKGVFMQTIGLTYMHARTDFDIGGPVSKHLRYNIGGFYRADNGIRSPGFLANNGGQI +KANFTYTFNKGYVRFRTKILNDKTIAYLPFPMMGNPAKSIPGFNANYGTMKSLDLLHLHA +TTPTGNSVNESLADGMHPKIFAFGGEAFFDLGNKWSLKDNFEKTFTHIQFNSIFGVNAPE +SASAYATAQGLTNYHYAFADGYNAGKPITNMSSLNGNGLVATYGWWSVGLNLQEFGNDFK +LTKQSTNNTFTAGWYFSTNQVGGNWWWHNMLVDISGHNTRKLNLINDNTGESLTTNGYSQ +YGTLYADYNALTVINAPYVYDEIDLGRLTINAGLRWDMGTITGRVENTGSYSYDVNGDGI +ISPAEKNIQYGNGTYTPFHYDYSVLSYSLGLNYEFNKSTAIFARASQGHRSPADRAYVFG +ATTSTPNGFPSSAKDESIEQYELGLKYNSSKVALFATGFYSFFNHIDFTDFVNVGGNLTA +IQQYYNTSAMGLELEAAAQLGKLNLSLTGTAQSAKYHNWVYHDQSGNLHDFNNHFIQRLP +KLYFTFRPSYNFGKLNVSAAWEYFGKRYTNPENKQVLPQFSQINAYIDYTVSPHITISAA +GNNLFNVIGLTEGNPRSGLVSTGGSQYFYARSILGRSAILSFKYSF* + +>NODE_4#PROKKA_00215 +MTQNPDQKAPIWKRPATVEALNAHAKNTMVEHLAIEYLELGPDFLRARMPVDKRTHQPFG +LLHGGASVALAETLGSVGANLCIADPDKAGVGLEINANHIRSARSGWVYGTARPFHIGGA +TQVWEIRIQDEQDRLICISRITMAIVSAR* + +>NODE_8#PROKKA_00292 +MNIYVGNLPYSAGDDALRTAFEEYGSVDSARVIMDRDSGRSKGFGFVEMGNDDEAKAAIE +GLNGQDLDGRALTVNEARPRSDRGPRRGGGGFDRGPRY* + +>NODE_17#PROKKA_00046 +MRARLFKWAGVFFLSLAMAGPAWAASRATPHRRPVLKLHFADVSYDYELKRAMSYAVSGG +ADINECLTAARAITAGDGESWYRGWHRMARRLDQMADQALKAGHRQTARQFWLRASNYYR +AAEFFLHGNPKDPRILSAWGASRRCFRQAARLMDHPVEVIAIPYEGHKLPGYLVKPDASL +KPRKTLLLQTGFDGTGEELYMEVAWYAIQRGYNVLIFEGPGQGGALREQHLYFRPDWEKV +VTPVVDYALTRPEVDPKRLALMGLSMGGYLTPRAAAFEHRLAALVADPGDFDMMVGHRPT +PAEWAGMKKYPKQANQALRAKMKHDTGFRWLVNNGMFTTGRKTPLAFLEFFSRFELTPKI +AAQIKCPTLVVVGAGDHFASPKWQRLLYDNLTAPKTLLRFGPDNPARQHCQVGGLLWGNA +KIFDWLDQVLR* + +>NODE_4#PROKKA_00198 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_2#PROKKA_00122 +MPQFFKQLSTRELTERLNNKDVQLIDIRPVDAYNGWASRGESRGGHIRGAKSLPFAWTKY +VDWIEMVHRKKILPENEIVIYGYPDEGFRLVANRFKKSGFEKVSIYLNFLNEWVPDTTLP +MEKLFRFQNLVPASWVNELISGGKPQHFENDKYVIVHAHYRNRDAYLSGHIPGAIDMDTL +AVEDPETWNRRSPEELKQTFEQHGITVDTTVIVYGKFMFPDNSDEFPGSAAGDIGAIRIA +LIMMYAGVKDVRVLNGGFQSWLDAGYEVSYADEPKKPVADFGATIPAHPELAVDTPEAKE +MLASSRAELVCVRSWPEYIGEVSGYNYIEPKGRIPGAIFADCGSDAYHMENYRNFDHTTR +EYHEIEDIWKSNGITPDKHLAFYCGTGWRGSEAWFNAWLMGWPKVSVYDGGWFEWSADPA +NPVETGIPENYPNRN* + +>NODE_10#PROKKA_00006 +MTLMVDVALAAPLWQPLTYAVPAELAPLVKPLSRLLVPLRGGARLGFALGEPLAAGGGQD +ALKPVLDVLEDGKGPQVWPPELLPFFQRAAAYYHVPLGQVLAWCLPAGMGSARPAKALAP +KTQQVAVVSWRRGEDSRLPRPESQAARILRRLKARGPLPLPELREEFPRAAALCRDLEKR +GWVTISHRPLVKDLLGRPLLPEPEPEHYTPDQQRALDELLPAVHSGGFKSFLLHGVTGSG +KTELYMACVKAALEAGRTALLLTPEIGLCLRLEGLLRQRFGAGQVAVLHSGLSPAARRGQ +WLAIARGRARVVVGARSAVFAPLREPGVICVDEEQDEAYKQEDRFRYHARDLALLRGREQ +DCPVVLGTATPAVTTYHRAQEGNTVCLRLPRRVREAPLPRMELVDLRREGRLVGGFLSRR +LLAALEQTLEAGEQAILFLNRRGFAPAYLCTACGQTVGCPACAVSLTLHQGSDRLVCHVC +GHQRPRPRTCPACGAGEEKLRPLGLGTEAVAQKLGELLPGARIARLDRDTAGDPRRLGEL +LRAIAERRVEVVVGTQMITKGHDFPGIGLVGVLSADQALALPDFRAGERAYGLLTQVAGR +AGRQGGKSRVIVQAYDPDHHALRAALAQRPDEFYQTELAERRALGYPPFMRLVALRLEAV +DDRRCQRAAQALAAGLEEARRRLEPGARVLGPAPAALPRAKARHRWMILLKAPTAAAAGR +TLRLGLHRSPPLPAGVRLLVDVDPVSLI* + +>NODE_1#PROKKA_00071 +MNGHWAREKRRMRVINRLLGLSAVVMVLVLVSPTASASIASDAVAYPPVSLSNVPPAKAE +EIRKGEYLTKLSDCMACHTDHGNGKAGKPFSGGLAIKTPFGNIYSPNITPDKKTGIGNWT +FKQFDDAVRYGEGPNGYLFAAMPYNYYSMMNKDQVHAIWEYLKHVPAVNRRNKPLGMPPP +FRWRWLQFGWRFMFVKPTQGEFKYDPKHSKAWNRGRFIVEGPEHCGACHTPHNMLGGSEK +RFFLGGSDITGFWAPNISGLATKPHPIATIMRVFREGKGLGGGDLKGPMIDAIANSMRYM +TPADMRAVAVYIQSVQSEVPPGPRPVAMDEVNLARGEKTYQTDCAACHATGIGGAPRVGV +AKDWDALGKSPLFILFENVWHGVSIMPPKGGCKACTRDDVTSAIVYMLKRSTSRSSKPAV +QATTSKSGIPRDTVSLAVGDKIYHAHCAACHASGAAGAPRHGDIKEWASRLKLGLDKLHH +NALDGIGMMPPKGGCTSCSKDQILSAVDYLVDGSGGKALVEKSLSGKQGG* + +>NODE_3#PROKKA_00186 +MTAQINVERLLETMCSQLEQTLEARGAVNPVLTGIRTGGVWLADYLHKRLRLEEPLGELD +ISFYRDDFSRIGLNPRVKPSNLPFATEDRHIILVDDVIMSGRTIRAAMNELFDYGRPASI +ILVTLLDLGARELPIQPDIVGQQMQLQRDQRVKLMGPDPLRVELRENVRENPAKDASTKS +H* + +>NODE_2#PROKKA_00134 +MNDLKILYIQSRLAWEDAETNRKHFEEIIQKEAQHHDLIVLPETFTTGFPVDPVPFAETE +DGESVLWMREMAAQTCAVVTGSMLLKNDGVYTNSLIWMRPDGTYERYNKRHVFRMGGEHE +KIHPGDKILLVELKGWKIRPMVCYDLRFPVWTKNHYEKDAFEYDLALFVANWPAVRAYPW +DQLLIARAIENEAYVLGVNRIGKDGLGNDYNGHSKVVDAKGNVISEAPENEEAAISVKLS +YEALQKFRAKFNVGQDWDSFTIQK* + +>NODE_3#PROKKA_00159 +MQDLPVAVKDEEAAHPIAASCRPMLRKVVSAFVRGDYQLSQLIEGVSPVPPDVAAHIQDY +IAGYGETLVELPEETWSTSCVQWMWSYWDVLVDLYTEREGASDLVLTRRMDEVEGKPQFT +VGLVYVPKPQPNHAFQFVPWLTATPPDVLKRAAEFRGRL* + +>NODE_5#PROKKA_00237 +MIGNYGGLEHGLLFNLFRRRAFRRQYEEALEEARGLVATFSRNLADRLFDPVGALPMIER +RRIEICRALIAHPKLLLLDEPSAGMTHDETHQLMDDILSVRDRLDGLAIIIIEHEMGVIE +RVSDHCVVLNYGRKIAEGSYQDVASDRLVQEAYLGSA* + +>NODE_9#PROKKA_00314 +MRKQVEIMSSIDNFWLYMDHPTNLMIITGFLQFDKPINFERLKQTIKNRLLCYDRFKKRV +IRPMTGVGNATWELDPRFDLRSHLHRVALPAPGDKETLQELISDLTATPLDPTKPLWQLH +YIENCENGGSVLFARIHHCIGDGISLIRLLLSLTDTEPNAVWSDCLNEPKIEKETSFNLF +PPLESAMKKVTRARRRAQKVTRFVSREIEKSFSNPYHIVKRTRTVTKFALDVATVMSKIL +LLPADRKTVFKGELGVRKSVAWSDPLPLDDIKVIGKYFNATINDILVALVTGALRRYLQQ +CNNLVGDLDIRVAMPINIRPIDGDIELGNQFSLILVALPVHIDDPVLRIREVQRRINDLK +EAPDAAVAYAVLNALGVSSAKLAKTAATMFANKTTGVFSNVPGPRQQLYFCGEKINNIMF +WVPRIGGLGIGISIISYNNEVSLGIATDSGLVQDPKAILDHFANEFRMLLGMYKAGQMEK +EPLVINDRSVEPPVFAFNTEKIASVQAIRCKAITRSGTQCHNRAATNSMYCTLHLSKYET +IASREENDMPAEADNTLPAEDQAAG* + +>NODE_7#PROKKA_00262 +VSEIQIVGVVGAGTMGNGIAHVFAKSGFQVRLCDVEQRFLDRGMDTIRKNLGREVTKGKL +MQEEADAAVKRIEGTLARAALADCDLIVEAATEQLEVKRQIFEDLDRVAKPEVILASNTS +SISITKLAAFTERPERVIGMHFFNPVPVMKLVEVIRGLATTQETFEMVKALAERLGKTAV +EVNDAPGFVSNRVLMPLLNEAMYAVMEGVATPEAVDQVFQLGMAHPMGPLTLADFIGLDV +CLDIMRVLQEGLGDPKYRPCPLLIRMVDAGWLGRKSGRGFFEYGNA* + +>NODE_8#PROKKA_00295 +MKPHRLAIFLGTFPMLFSLLLAGEGTMKEAGKSLRFGNGRLILTFDRDTGIWTGLEASGG +AVCFRRTADSPSLNVQVDGKPVFGADRKMSLREQKVVQLPTASRLELTMGQGDWAVTAAY +TLWDSGTLQRQATFVYSGPKPEGDHEVRNALFVLPDVGLAGTDAFWFATAEYPPRDHPFR +HTDSGRRFGFPFSESTFHGFLARDPQAKLSLVSAYYTEDERAKLLVQEGKGTATAFHTHL +LAETLRPGLRFEVGSQLLRVVPGTRQDALRALQGFYDLPGLRTKIGMPPDTGRQIFYSAH +PGGTIDSSFRDVGGFANFTKLLPSIRDLGVNTLWLMPFWYGPVYAPYDYYRLDPKRCGTP +AELKALTDKAHALGMRVLGDLIPHGPREEPGAKPSFAEQHGDLVCRDKDGKMIQWWGCHY +CDYANPGWQDYMAKHAAYWVRECGLDGYRVDVAAGGAPNWRPYDHNRPSFSGLHGGLALL +RKARAACLKENPNTIFLAESTGPTMYSAVEHGYHWAFSTLLEDHVLKDAPADFVQAMSGY +LENQTYAFPADAFPIRFLTNHDKLRARYRYGPNLHRTLLALCAFMKGAPLLYEEEEMGNE +DFIAKLYRIRQTYDELSVGTVSYRSIPVEPKHVFCIEREYKGKRSVVLINFSNQMSEVKL +SLPKSDLKNPGIYEAVSGQRVDYAQDLTQSLDPYAYAVLVIRQRDELPPSVPKERGESPA +APDGRAMDIKITQEDSLTRVSTPLYSAVIDSARGGLLQEVRGADGKLLVNGVELKEGRRK +LFVGHDSVDFADCSVPLRILARDRQFPDGGKVSLLRGRAELRDGDGHAWMDLTVLYSLRA +KSLSLNVSLTPQYRLSPSKSDLGMKIHFVPTTHWFAETAEGNLLGHVIRRHPASHGFSGR +YWHGAGEAFFNGSLYPVVGEFGVLDTNRRIALGSMALRLDGAPLPVRLLEDEPPGSPVVL +GGPAAATADIPLLRGSQRAVWQQGKAKGMVVTLDFRSVPARFTQYPDSFGVRGWDTGTPK +LCYRGGWCTFGPEYLFRGYGMRATVVRSHGGELTALTDAAGNGLRVTDARFYTDQGLFGD +WRDPRGVLRKMSASNVNDPEPDTQLLHLFEGPQDSAPLRFRSFFRHPHAGGRSLLNPRVE +YEISYTPPTEKGKGLRIDCGVRPHLVKIGTGGFLAYKISLGGCDQWQVDGGEWQPLPAKG +GRLWENKEAGHLPKTLLLRNSKTGLWTRFSDFVGGPDQVENVFLHAGQGQVHLFVAFYDA +EPTDVRPVWRRAAFTMQAGGKQ* + +>NODE_2#PROKKA_00110 +MCNLLIKFGAFNDRFGKDFDRISTGHYATRYNTDEGVFLSTAADRVKDQTYFLGQITPEQ +LAKTMFPIGHLQKKEVRKIASDMKLPSAHRPDSQGICFLGKINYTDFIKKYAGEKPGEII +ELETGKVLGTHKGFWFHTIGQRRGLRLGGGPWFVVKKDIEKNIVYVSNGYDPIAQYDDKI +WLEDLHFLNKVHDYSKLNEIKFKIRHQPEFNSGKLVRDEKGIRIVSENKISGIAPGQFAV +VYDEEERTCIASSVIAENPEIAV* + +>NODE_6#PROKKA_00255 +VKHIDISPIDIAIIVIYIVGIAVWGLMYSKKKSKGKGHEGYFLAGRNMTWPIVGITLYAA +NMGSPALVGLAGDAYSTGISVFNYEWMALVVLVFFAIFFLPFYLRSRVYTMPEFLQRRFD +IRSRYYFSFITLVGNIIIDTAGVLFSGALIVKMIFPAMALWHIIAVLAIITAAYTITGGL +SAVMYTEAVQGVLLMLGAVLLTFFALKRIDFNVARIFTETPHHMMSLIRPNSDKAMPWLG +LVLGVPLLGFYFWGTNQFMVQRVLSAKNTNHGRWGALFAGILKLPGLFIVVLPGIIGRLI +FPHLSDPDLIYPMMLFHLLPVGILGIVLAGLIAAISSSISATLNSASTLMTMDFVNNLKP +GLTPKQLVRIGQIFTGVFVVISAAWAPMIAGFPSLFKYLQQVLALISPPVVAVFLLGLFW +KRANAQGAFYGLMGGLLMTIFAVIVRYVNPDIFPWLGHIQFLLVAPVLLVGTMAIIVPVS +LMTPPPPEEAVAQFTWSFKFFNAESMELAGTPWYKNYRYQAIGALLATAILVYIFR* + +>NODE_2#PROKKA_00108 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_2#PROKKA_00114 +MKTFLDLVNQRQSDRKYIDKPVEKEKLMRCLEAARLAPSASNSQPWTFVVVNQPELCQDV +GKAAMGPLYSFNKFASQAPVILAIIMEKPKVITEVGGRIKKKEYPLIDVGITAEHFCLQA +AEEGLGSCMLGWFDEKKVKELLHVPEEKSIPLLITVGYTPENYKHRKKIRKPIDSAVKFN +TYG* + +>NODE_3#PROKKA_00191 +MEDNFENLKIMVIDDSKTIRRTAETLLKKVGCEVITATDGFDALAKIADSHPDIIFVDIM +MPRLDGYQTCALIKNNSAFKSTPVIMLSSKDGLFDKAKGRIVGSDQYLTKPFSKDELLNT +IRQHIPSREST* + +>NODE_7#PROKKA_00273 +MPNQCRHVRANGVQCRAHRVWKEDYCFFHLHHRTPNGTAKRSEDPPPPPPKNGIEIPLLE +DLASIQIAIGRVLTALAQGKITSAEARTYLYGLRLAASNVKQKDFAPVNTVETYVQYDNG +DTLGPEQFHAEKQPQHPLMDSGLLALRHLSNRLTYEATLDAYLTQGQEPPSTLRPPVAGP +PADKSELQNWIKTGWKACQSRAHALELARKAIDQPIPDPIDPKAAFSINANIQRTA* + +>NODE_21#PROKKA_00094 +VEPTVPSPLLDRRQRGDAFLRRYWKLAAPLLILFMLALFVLPWVWFSFVEALCLQVGGAG +LLYILGRLFTTAFNPAYHQAPEPQDGDAGRRDPSSSDPPPRA* + +>NODE_7#PROKKA_00265 +MSATRGIQHGVDMRVEVADIQLPNPVLAASGTFAYGIEFEDVVNLDHIGGFVTKGISREP +LSGNPAPRLIETAAGMINAIGLQNIGADAFVQQKLPALARYQCPVIVNIFGYQMQDYIAV +IRRLNEAEGIAAYELNVSCPNTHAGGIAFGIDRAALSDLVAHARHYSRRPLIVKLSPNVT +SIATMARSAESSGADAISLVNTFVSLAIDVETRRPRLSNITGGLSGPAIKPIALRMVWEA +AQAVKIPVIGMGGITTPEDAVEFLLAGASAVQIGTASYADPRAVEHIGQGLAHWCSRHHI +AKVSELIGGMQTGQ* + +>NODE_4#PROKKA_00201 +MASRIHWDWRAGLVLLALIVLAFLPFGVSGYILGVMTVAFYLAVYAMSWDLLFGYAGEVN +FGPTFLVGLGAYGAGLSNSVFNISVWPSVAIGTLAAVIGGLVLAGPALRLRGPYFGLVTL +VAVILLEKVIGLLSSYTGGEIGLTVMDVLTISQSGNYYYAFGFMVISAVILRIIARSSIG +LILEASGQDPVATEALGFNVTKFKFMAFTLSAFFSGLAGALTVFYLGSASPGTVVSVFVT +IQIIIATLVGGRRSIIGPILGAVFLIAAGEILRPLGQLSNAVVALIALLVVLFAPNGFIG +LFSRTGGAR* + +>NODE_2#PROKKA_00138 +MQQMEILERYLNENKTINEFKKVLWIYDFWGKLTEGKAAKKVLEFAGIKNGISVLDVACG +TGEMLEKVVKLNPDGQNSGIDLSPDMIAKARKKLSKTGHLNFNLKQGSALDLPFPDNSQD +LLINSYMVDLLPVDCFDKVATEFFRVLKPGGKVVMSTFSFGTKKVHRFWFWVARKFPALL +TGCRPVSFKHFLIKAGFEIVKDVEISQNTFPSQVLMALKKS* + +>NODE_5#PROKKA_00233 +VKSFSDAGVAANRRNGHRNATPSGRLGHSLSWGSLLLSGLVVFGGPFMADAQAQQLREAV +EMAVQSHPMVASTEAEYRAAERSVDQVEAGFYPSLDLTADSGYQHARRVNESSIKENQWR +NKQRLAMTQMLYDGEGTANRAESAKASAQSAHFDVLSAATKIAQRAIRAYLDVARDRKLV +QYAVDNIDLHRRILADVEEAARSGGGSETRVTQVKTRLYNAQSQRRRAEGNLRNSISDFQ +EAIGETPETLEDYPMPTVAIPASVDEARDEALKNNPSFQAAVETERARTLTANAERSGYF +PQVDVEVAHEQRDGVDGVSGFETDSTALLTLSWNLYGGGADQAKVRRALEQSSAAMYRIH +EVERKIRRELEVALTDYEVARDQVALLRERAATAKEVTAAYREQFRLGQRTLIELLDSGN +ELFLARSDLTTAEYRQISAAYDFLAVRGTLLKDMGVKVATGKAPKAP* + +>NODE_12#PROKKA_00026 +VSRDWFPAKVRFMSGGRAQQTPLAIRVEGRWLEVRLLGEELVAPESGLAYVRRYRLEDRR +GRRWELRQRQEGWFCRELH* + +>NODE_4#PROKKA_00209 +VVDDNSDLGLDAALDPQNGAKDRKFVTALARGLEVLRAFRPGDGFLGNQEIARRTGLPKP +TVTRLTYTLTKLGYLSYSQRLERYSLGTGALALGYATLSTFGIRQIARPLMQELADDVDA +SVSLGARERLSMIYLENCRGSGAVTLRLDVGSRIPIATTAVGRAFLAALPEGERNYLMDH +IKRHAGNRWPPVRRGIERAIRQYQETGFVKTVGTWERDVNAVGVPLVQSDNGNIYAFNCG +GPSFVLPEERLDAELGPKLKQLVQNVEAALRRL* + +>NODE_1#PROKKA_00063 +VSWEAILTLVVLGAVVLGLAWPRMPPDLPLVGGLAILAVTGCAPIDKVFSGFSNPGLIAI +AALYIVAAGLRHTGAVTAPARWLFGRSRRLWVAQLRIMLPTAVVSAFINNTPVVAALLPA +VLDWGKRHRFAASRLAMPLSFAAILGGTCTLIGTSTTIIVNGLLTSTTHGPGMGFFTIGA +VGLPVAIAGFIYILLFGRRLLPDRQGAMGEFTNPREYTVEMRVAAGSPLAGQTLEAAGLR +HLPGLYLVEIERGGNLIPAPGPEELLEENDQLVFAGIVESVADLQKMRGLIPTTGQIFKL +DTPRPDRRLIEAVIAPENPMVGRTVREGRFRSRYGAVVIAVARAGHRVTGKIGAITLIAG +DTLLIEAPSEFQRRYRHSREFLLLRPLEESVQPHYERAWIAWLILAAVIGLVTARIVPLA +PAAIFAAVAMVVTRCINLAAARRAIELQVILVIGAAFGIAAALVHTGAAALIAQPLLALA +EGSPLGMLVVVYALTTILTAFIGNNAAAVLVFPLAYAAATKLGQPFLPYAIAIAMAASAS +FTTPIAYQTNLMVYGPGGYRFSDFVRFGLPLNLIVGVISVVVIAWLWMP* + +>NODE_13#PROKKA_00043 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_5#PROKKA_00229 +VASLDLDVRQASLTRYLENRAAFAARRGVSLPMPSHRRGNDPTAPDDVAEMIRAQAAGHD +VLLVDTPGRVDAVSMAAHVVADTIVTPVGESHLDLDLIGNVDRNQGRAIRPGPYAEFVWA +VRQERARAGRTPTDWVVCHNRRRAPQTRVGREVERTLADLSKRFAFRLVPGFSERTIFQE +LFPDGLTLLDLRESETGVGLTLSHVAARNEIRHLVDALDFR* + +>NODE_9#PROKKA_00307 +MIRVGVAGATGYAGAELVRILAGHREVRITALTSRQYAGVPFAKVFPALAGVVSNECEAF +DVERVCGQTDVIFTALPHKLPMAIVPGLIKNGKKVIDLSADFRFSDVRLYESAYQPHSSS +DLLSRAVYGLSEVYTDDIRKADLIGNPGCYPTSTLLPLVPLLKNRLVDSGGIIVDSKSGV +SGAGRSPSLTVHFSEVNESFKAYKVAAHRHEPEIESILTKSAGTPVDITFVPHLVPMTRG +MATTIYAGLAGNVTKHDIAACLCDYYAGRQFIRIDRDGHPPDTRNVRGTNYCDIAFVVDE +NNRRLILMSVIDNLVKGAAGQAVQNMNLMMGFEETAGLSAPPFPV* + +>NODE_11#PROKKA_00024 +VSRPRYVRTQRHGPVTVVVMDNPATMNAMDQDMGPRLVGALESLAADRSVRAVVLTGAGG +RFSAGGNLTRAEEFLEENPGRGAAPVFAQYTIWVHRLLAVLTRLPQPVVAAVERAASGGG +LGWLLACDLVVLAEDARLSTGFLAIGLAPAAGVSWHLPRLVGLPRAAELLMLGRTLGADR +ALELGLADQLTPPGGTLEAALELAGELARGPAQALAATKQLLGGAARRGLFPQAEAERRA +VLHTADQEEFARRLERFRQRRRRS* + +>NODE_8#PROKKA_00299 +VKPASVLLATPDGEGFYRFSDPVRVVTAGSLEEVLPTVTAVEAAVAQEGVFATGFVSYEA +GPAFDRALAAYPPGEFPLVWFGLYRNREVVPKTEMQDVPPLAWRPCLDQDEYVAAIRRVR +EYIEAGDTYQVNYTFRLHAPFAGDPEALFARLASAQACRYAAYVDTGRYVVCSASPEMFY +TQNADVFRSRPMKGTRPRGMTLAEDRAHREELLESEKDRAENVMIVDMVRNDLGHIAEAG +TVHVPELFSAEPYPTVWQMTSLVEARSRAGFGQTLKALFPPASITGAPKPRTTEIIRELE +TTPRHIYTGTIGYLGPEDARFNVAIRTVLIDRQTQQAEYGVGGGIVWDSDPLAEWEECMT +KTRVLRTVRPEFSLLESLLWTPDEGYALLDRHLARLCDTAEYFGYPVDVVSVRQKLEELA +GNLEPVPNKTRLLVDRHGEITVEGSPLGPAPDALVWRVCVHPERVDSHDPFLYHKTTHRA +VYTQAAAAHPDCDDVILQNERGEITESCRANVVVEMPEGRFTPPVSCGLLAGTQRAELLA +RGEITEKVLTPEDLYAATKVFLINSVHGWVVAELSDSSD* + +>NODE_11#PROKKA_00018 +MDHAALPQTLAPGLYRLGSYHLACFLVETPDAALLFETGMSLVAPLILAQLDELGVPREK +IRWIVHSHAHSDHSTGQAALLEALPRAELLLSPTSRRHLAKPSTAEQFAKEDDSTRRALE +RIGALPPGSLPDPLPLLPARHRTVEPGDTLDLGGLTVELRSAAGHVPGGLLAWLPELGAF +LASDSAGFHMAARPNYPLYFTGYREYLRTLEEIRRTNPELLCLGHQGWFRGGEARRYLEA +LKAHLAFEHATIWEAHRRGEDEESQARRLVERYYHDELAIYPRDILWYCCRLLVRRSLEA +GA* + +>NODE_1#PROKKA_00086 +MILYSRPDDPAAHSIRLVLAEKAIGVKIVEVEPDSPPEDLLHLNPYGTLPTLVSREVVLY +DPRIIAEFIDERYPHPSLLPSDPVLRARARLFVSEIGGSWYELCDEVANGAGRGRTRARR +ELTEAVVSSDELFTGTAYLLGGDYGLADCVAAPVLWRLPHLGVRLPREAKAIRGYMQRVF +KRPTFVYALVASERAMIES* + +>NODE_9#PROKKA_00306 +MKISQLSIFLENRSGRLARIATVLGNAGINIRAMSLADTSDFGILRLIVSDTEQAEKTLK +DQGFTVLISAVVAVAIPDSPGALGNVLSIMEHAGLNVEYMYAFVEKDMGQAIVIFRFDDV +DRAISTLIENDIAVLESKRVLRL* + +>NODE_1#PROKKA_00076 +MNLILLGGLSGAGKTGALDMLEDLGYQIVDNLPLSLIEPAIDAMLGDDARHHSRLAIGIA +PHNTPEEFEALARQIEIWRTRPHGCTVIYLFCEPGTLVKRYRATRRRHPLTGPDTDLAAA +IEIETTLLEPLAQLADACIDTTHTNIHQLREIIRARVNEGGDHPMALQIESFGYRRGLAQ +DADLVFDMRCLPNPYWEPTLRELTGLDQPIAEYLETHGTVTRMLSNLVNFLNAWLPSYAA +SNRSYLTIAIGCTGGRHRSVYMAEQLAAQLAHGGWAVTVRHRDLDTPTRDVKPILADD* + +>NODE_1#PROKKA_00048 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_2#PROKKA_00143 +MKSEVNGRVEDILFKEGGSVKKGQPLYTINKSLYQAAYDQAAAQLNIAETNWATDTTDAR +RYKNLWAHNAVDKIQLDHAIAKVNVAKASVIAAKANLESAKTNLDHATVRAPFSGSTDVS +KVRLGDVVVAYQTPLVTIVDNSNMNADFFITENDYLQLGSSDKSIKEKLSHFRLVLPNGK +LYPYKGKLYAVDNRVDPTTGTLMVRLKFPNPEDLLKSGMNCVVRSTQNSAGKVVVIPQQA +VTQLLNEFFVYTVNSKGIVSQQKVELGAEYGNMQVIKSGLKPGTKVIVEGIESVRPGAKV +KTVPMKTGGMAKQSKPE* + +>NODE_6#PROKKA_00244 +VGQKLVSIVMGSDSDLPVMKPAAEMLEQLGVEYEIDIVSAHRTPEKLFDFASNAHKRGIQ +VIIAGAGGAAHLPGMVASMSPLPVIGVPVKSSNSIDGWDSVLSILQMPGGVPVATVALNG +AKNAGILAAQIISVSDSQVREKIIEYKAGLKEAVMKKAKNLKG* + +>NODE_10#PROKKA_00003 +MKKFKRLCLALGVAALGLAILAGPALAKKDVLVVIQEAEPVGLDLMTSSIQTTMSVCYNI +HDTLFAPQEDASVKPRLAESWEKVDDLTWKIHLRRDATFHNGEPVNAQAVKFSFERSFKP +SIKNPHKGKLSAFKEVKVLDDYTLLISTKEPYAPGLYILGYYLPIVPPGYIKKVGDAKYN +TNPIGCGPYKLEKWVRGEEIVLTAYDKYYGPKPAFKKVIFKGVPEEASRIAALLTGEADV +ISGVSIHQRKRILASGKAYLTNQMGVMPYLGLNTYKPPFNDVRVRQAMNYAVNRELINKA +LFGGKAILCAGPISPRTFGHDPNLKPYPYDPAKAKKLLAEAGYPNGFQTRLAYPTYMSQI +QEQAEAIAADLAKVGVKVRLEPYERAVMWQRYKARKHAMYIYWWDDAPEPDRYMYSLFNS +KVRDYYYKNPEVDKLLDLGRTILDRKKRAEVYHKIDRLLYNDAPWVYLYVIPEVFAVSNQ +VAYQGRRDGFLDMRTAKPK* + +>NODE_1#PROKKA_00069 +MGVKSLKDCELQNRRVLMRVDFNVPVNDGAIADDTRIRAALPSIHEALKAGARLMLMSHF +GRPEEGKPESRFSLHPVARRLGELLGFDVPLVTDYLARDPEPGSGRAVLLENVRFNVGEK +RNEETLARRYANLCDVFVMDAFGSAHRAQASTYGVARFAPQAVAGELLCAELKALGRALK +APDRPLIAIVGGSKVSDKIGVLDALIERCDGLVVGGGIANTFLAAAGHPVGRSLYEPGFV +DEAKRLMIAARERGVNFPLPVDAVVAEALAEDAEADVKPVHAVGAGDMVLDIGPETAVLY +RPLLANAATIVWNGPVGVFEIDQFAEGTRAVAEAVASSGAFSIIGGGDTIAALAKFGVTD +RVSYISTGGGAFLEFLEGKTLPAVDILEARASD* + +>NODE_6#PROKKA_00248 +MMIDAAKRASARKIVAVIPYFGYARQDRKDKPRVSIGAKMIANLLTTTGIDRLITMDLHA +DQIQGFMDFPVDNLYASVIFYPYLKKLNLPNLMMASPDTGGTRRAANYAKALDTGFVICY +KQRTRPNVVEQIQLIGDVAGKDVVLVDDIIDTAGTITKAARVILDKGANSVRAMVTHPIL +SGDAFKIIADSPFTEVVVTDTIPVKDDLGGKIKVLSTAQLFSEVIKRVENYKSISSLFNL +GNQSNK* + +>NODE_3#PROKKA_00154 +MKDIQLESTIICPACGHQKTEQMPTDACQYFYECESCHTLLKPRAGDCCVFCSFGTNPCP +PVQQGDDCCASD* + +>NODE_2#PROKKA_00119 +MSGKLQAEFRSDLKWQPAILRKILILPSKQVLIILSLMAVVVVQARPPLF* + +>NODE_9#PROKKA_00303 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_9#PROKKA_00302 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_9#PROKKA_00315 +MNAPAPTPNNPRIVVCCGSGGVGKTTISAAIGLCGALMGKKTVVLTIDPARRLADALGIS +ALNMEAQRVPLEASVPASGELYAMMVDAKRTFDRLIGRYSSAGLRDRILENRYYQHVSNN +MAGSHEYMAMERLYEIYHEKRFDLIVLDTPPSRRALDFLEAPQRVINLLGHPYFLKLFKP +YIKAGQLSGRLFNLLAMPVLRAVGQVVGGQTISDIFSFFQLFNDMLFDGFSKRASAVESL +LSDPMTTFFAVTTPQEYPIQEATYLFRQLQQRNMPFGGFIVNRVHSDTADSPFDSEAADR +KRVLMEKIADKPIFQRLEIADRMDRKLARSDAAAIDRISSISPGLAVFPILFADETVNDI +SGLRVISTQLMKHPEFKI* + +>NODE_7#PROKKA_00258 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_1#PROKKA_00054 +MARYIGPKCKLARREGTDLFLKSPIKALDQKCKIDRIPGQHGQATRRGRMSDYGLQLREK +QKLRRMYGVLERQFRRYYKEAARRKGATGALLLQLLESRLDNVIYRMGFASTRAEARQLV +SHKGVTVNGQLVNIPSFEVKGGDEVALTERARKQNRVEMALEISRQIERPAWVEVDEKAC +KGTFKAMPEREELLPDINENLVVELYSK* + +>NODE_21#PROKKA_00093 +VLFTQIIAFVLVMVVYQAYDPAPPDYGWGWGLLLFITGPLLEWLLASVIARSGLRRLARP +AADPARSLQRSEILLHLSALTVFFLFMVSYDLKAGLIATPLLAASETLSGLAALFYYALL +LIPVWGHCHRLERAAGRALALDRRRYILEQARFVAPVAFPWFLVSALRDLLTLAWPGLTA +WLETPAGDLAFLGFFLLVISWLFPPLVRSWWGCPPLPPGRAREICQMVLKVARVRVGGIL +SWDVLQGRLVTAGILGLFPRFRYLLLTPALLEALSPTELAGVVAHEAGHVRLKHIPAYLM +FFMAFFLLAYALAEPLDILLRLALLTLAQSDWGAGLLNSPDAGSTLSITFALPLLALMIV +YLRFVMGFFMRHFERQADLFALNLMGEAAPLVGALEKLALMSGQTRDLPSWHHFSVAQRV +SHLLTAQANPPAWLHRQGRLIKKALAVYLAGMVLVLGLGWGMAGLDWSRQVNQELALELV +RHQLAQHPDDPRLRFQAGMLCYQLGREDRALSHFRRAFLAAPDNPELLNAMAWIFATSQD +PRRRRPQVALVLARRAVSLSPLPHIWDTLAEAYFAAGQPVKALAAARAALEAGPKARLDY +YRAQLERFKRAVEDLKKKGPAGRRPRPAAPAPGGRQG* + +>NODE_50#PROKKA_00227 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_4#PROKKA_00224 +MKGKFSLKSTRTGSMMAKGALAAALLTAGMGVANASTTLQETGSSLLYPLFNQWIPAYSK +AHSDIQVNAASTGSGTGIAQSIAGNVQMGGSDAYLSGAMMKKHSDMLNIPVAISSQMVNY +NVPGLNDKHLKLSGPVLSRIYEGTVKYWDNKEIKAMNPGVDLPHHRIVPVHRSDGSGDTF +LFTQYLSFSHPYWHKKLGYGTTVNWPAVQGEIGATGNPGMVQALKDNPYSVAYIGVSYKG +QIDKDNLGEAMLKNKAGNFVLPNSTTVPAAAAAMVPKTPKDERISLIFAPGAKSYPIINY +EYVIMHANQGDLAAPLKQFLNWAVSPNGGNASQYLGAVNFMPLPKKAEELTKAQIAKIHS +* + +>NODE_4#PROKKA_00213 +MGRSDVIAGLSLERLDNIERHIDRKYLKPKRLPGTLTLVARRGQVAYVKAQGLMDVERNK +PVARDTIFRIYSMTKPVTSIAMMQLFEQGRFLLNDPVHKYIPAWKNLRVYQSGVYPQFLT +TPTLRAMTIRDLFTHMSGLTYGFMCRTNLDAAYRELKLDGGKEMTLDLLVERLSQLPLEF +SPGSAWNYSVATDVLGYLVQLLSDRPLDEYFREHIFDPLEMADTGFMVPESKRERFAACY +QFDPEQGYALQDDPADSHFTRPIKFLSGGGGLVSTVDDYYRFAQALNNGGQLNGARIIGR +KTLDFMTMNHLPGNQDLPGLSIGPFSETPYEGSGFGLGFSVKVDVAKSQTNGSVGEYGWG +GLASTNFLVDPVEDLIMVFMTQLIPSSTYPIRQELRSIINGAIVD* + +>NODE_5#PROKKA_00236 +MTAVSSAVSDSAAVPQTADVAVEIEGLFTGYDKADVLLDVSLTVPKGQITCLLGSNGAGK +TTLIRSILGLTPPRQGTIRLFGEDTTGLPTHKVVARGVACIPEGRRMFSKLTVEENLRLG +AYQEPSEAKIRTSLEDVYQTFPRLAERRNQLSGTLSGGEQAMVSIGRGLMGAPRLLMIDE +PSLGLSPLYVQENFRIIENIRTRGITVFLVEQNVHQTLAISDYGYVVSGGRLVAQGAAAS +LQNDPEVHAAYFG* + +>NODE_11#PROKKA_00021 +MSRRQQENGAVWVFGDYRNYFQNRVTLQLLARARDLASHLDTKVAVVVMGYRVGRWVREY +VAHGADVVYVLDHPSLKYYLVQTYTRLMERLAGEHQPQIILVGATGFGKELAARLASRLG +TGLTADCVDLTVDDQGRFIQTAPSFGGNLLAQIMIPQARPQMATVRPGTFQELPHDADRR +GEIIKLPLPDDLPPEKARLIHSRRIKPRRRKLEKARVVICGGRGMGSKKKFKNLYALARL +LGAQVGATRPVVYQGWAPEDALVGQAGRDVHPEVLFSFGVSGAIQHTAGIHDAQFIVAVN +KNPAAQMMKMADVAIAADANQVCLALIRELKARLEKKK* + +>NODE_9#PROKKA_00311 +MRKKIHLFLVIGCIVFCFSPAVFAKTGGPKVLHYPREDAVVRAVKLVSPAVVNISTQYEV +RTRVNPFANFGANDFFNNFFDQGIERKEKLTSLGSGVIIDGRRGFILTNAHVVVRGAKIT +VVLKDGRKFHADIVGIDPESDLAVLKIKTKSPLPSIAMGNSSDLMIGETVIAIGNPFGFS +NTVTVGVVSAVDRSFRIKNRIYRDLIQTDASINPGNSGGPLLNIDGQLIGINTAIYKNAE +GIGFAIPINRAKKIISDLIKYGEVVPGWIGLSVQNLNSRLAAYLNLPQHSGVVVRSVDPS +SPAGAAGIREGDILLAIDGHKIESIDDYKTAMRGYRKGQHAVVKIDRNGRHLTLSVRIEV +FPESLAPELVQRLLGVKVVGIGQKVRFNQTINADKGVIISEIDPQSSLAGIGVRPGDVIR +KVDAEATNTVQSFYKAMIKDRWKQSIVILLQRGDQGYYITLKLS* + +>NODE_10#PROKKA_00001 +MMARPGRVTSHQASVMYARPSANMRPQVGWGGGTPMPRKESPLSQMMILATRTVPSTKSG +ERMLGRMWRRMMVRFRVPRLWAALTNSFSFRASTLPRTVRA* + +>NODE_1#PROKKA_00074 +MKKIVIANLKGGSGKTTVSTTLAAFWASEGYKTCLLDLDPQRAATSWLRRRPESLPSIHT +LSLPNQTSGVTLSYALRIPRDTERLVVDTPAGLSGIALADTVRGAAAVLIPVLPGTMDSD +AAARTVADLLLIAKLGRHSGRMAVIANRVRRGTLGAERLQKFISALDIPLIATLHDLQAY +SHAILSGLGLHELPRRRIGGERMAWVPLLEWLERRELEITAQTALGPRSLLTQSAGQTPS +DSME* + +>NODE_9#PROKKA_00323 +MILIDILRRNWYDLLPLNEIGNICAVKFGNKAADP* + +>NODE_2#PROKKA_00150 +MYKKLLLLSLTFFMFTSFTNIASAQSNNIEIARLKQIHKLLDYRFVGGFYGFEKLFFQTV +SYPDEARQNCTLGIMIASFTVNCDGDLVGIRIRNSLGKPLDNQVSKFLKATKGHWNPCQD +KKFTHFEIPIQFTLKGTETDSTAAALVYVGKSAGYSCYPDSYYSVPRLFPGGIL* + +>NODE_3#PROKKA_00167 +MIRFEHVTKRYEGGHVALRDVSFALERGEMAFLTGHSGAGKSTLLKLIMLMERASEGQVV +IGGQVLDKLPRRRIPYIRRHIGVVFQNHQLLFDRTVYDNVALPLEVMGIAPREVGRRVRA +ALDKVGLLSKERMNPMELSGGEQQRVGIARAVVNKPPLLLADEPTGNLDPELSASIMHLF +EAFNQVGVTVLVASHDISLIRHLGHRVITLDGGRLAQGDRMPDEEALYG* + +>NODE_5#PROKKA_00231 +MASPDIEYMSELRAAVSRRPTILANIILIAVLVFFILAILWASWAKIDQVSTGEGRVIPS +GHVQVVQNLEGGILSELYVAEGDHVKQGQVLMQLDDTQFSSDFMENRLKFLGLKAAVTRL +QAELEGTALKFPAEVEKQLPAVAQAERSLFEARRSEEDASLAKLQAQYQQKLHEVDETKA +KIEHLRTVIKLAKEEMAILEPLVKKGINAPIELIRLKREESQDAGDLAVARQQLLKLAAA +IDETKEAITEAKAQFRSRALKELNEAQVNMAALGQVVTSRDDRLRRTVIRAPVTGVVKQI +FLTTIGGVVRPGMDLMEIVPAEENLLVEAKIRPSDIAFMHPGLEATVRFTAYDYTIYGSL +KATLDQISADTIFDEAKKERFYMIRLRTKTNSLLDKTGKPLPIIPGMTVSVDVKTGRRTV +LQYLMKPFHKLSIRAFHER* + +>NODE_13#PROKKA_00036 +MFKVMIRDSMSPVAREILTATGKIEVVTDNDKAANAPEVLAEMIGEFDGLAIRSGTQVTR +AVMEKAGRLKIIGRAGIGVDNIDLEAATRQGIVVMNAPGGNTVTTAEHTVSMMMALARNI +PQATASLREGAWDKKKLIGVEIAGKTLGIIGLGHVGRIVADRARGLKMRVIAADPYVSCD +AAARINVRLVSLDELFSASDFISLHVPRLKETVNMINADTLSRMKPGVRIINCSRGDVVN +VDDLYRALESGRVAGAAIDVFPKEPPDASLPLLKHPRVVLSPHIGASTGEAQVKVARMIA +EQMAACLIDGVITNAVNFPSVSMEEMARVV* + +>NODE_4#PROKKA_00216 +MKQMQNRVAVITGAASGFGLEFARVGAARGMKLVLADVQAEPLEQARAEMEAAGAEVLAM +LCDVRKSEQVQALADKTMERFGTVHLVFNNAGVGSGGLVWENTEQDWEWVLGVNLWGVIH +GVRIFTPLMLEAARRETDYEGHIVNTASMAGLLCPPTMAVYNVSKHAVVALSETLYQDLK +LVNAPISASVLCPYFVPTGISDSHRNRPAELQNDSGPTASQMIAQAMSQKAVSSGKVSAA +EVAQRTFEAIGEDRFYIYSHPEALGNVKHRMEDIVAGRNPGDPFAEAPQIGQMLRDKLQG +* + +>NODE_10#PROKKA_00005 +VARAVDGVDLTVGRGEILGLVGESGCGKSALALSVLRLLPMPPAFFAGGQIRFKGRDLLK +MDPEELRRLRGNQISMIFQEPMTALNPVFTIGNQLGEVFRVHQGLARREARRRAVEMLEM +VGVPAPARRVREYPYQLSGGMRQRVMIAMALACRPALLLADEPTTALDVTIQAQILELIL +ELRDELGTAVVLITHDLGVVAETTERLAVMYTGRIVEQAPTVELFDHPLHPYTRGLLEAI +PSAEAELADKELHEIRGVVPSLLDLPPGCNFAPRCHLADERCARQEPELVEVRPGHRVAC +WRVDRG* + +>NODE_11#PROKKA_00015 +VQFVQSSYPLAGLWRPGQETPPASGDTSGSGGVSGGAGFARMLDGKLRQGEPADLSTGAT +RQTGGPVRYSPHSPLLVGASIGAAPGLFTAPVVWSDPTPAAPKGAYRMHSANPRSTFPLR +TVPPPPAPPPSSEHENEDVGLPSAISTYHGSRFRAEVQVGERQEVNADVEQFHFPHLVQK +NGLTYAYFIDHSHGSENDVGLAVSKDGVNFQYQGKVLTKGPEGFDAQMASFPAVQYDGET +NTWYMLYEAKADHDDLNTVCLATSPDGRNWTKHGPVIEPGDAGEISAVDVGTPTMFKEGG +QWHVYFHTLAKDGRVRIGYAHGENLQDLTVNQGPLLDVDPQGIEGGTVGARSNVVKVGDF +YYMAYEVCSPNTDFHRSQWGTNLARASSPGGPWVKMSGRPLLVNDRPGMGMDGPELSLQD +GKLYLYYRHGANATARVELSGLGDSSKMYLAHQSSPGVPV* + +>NODE_4#PROKKA_00205 +MTIKSYDSVYFDGRWQPVDGERLSVYESGTGEVMASIPGAAPAVMQQAIDAAHNAFDSWS +RRPLKERLKYIEALHGQLVARAEEIATTISREVGMPLKLSRNIQAGLPIAITDSYLKLLP +DFPFEEKVGSSLVQYTPVGVVGCITPWNYPLHQVILKVVPALAAGCTVVLKPSEVSPLSA +FMLAEMFDAIDLPPGVFNLVSGLGHVVGDSLTGSNKVRMLSFTGSPGTGRRIFHAAAEDF +KRLALEMGGKSASVILPDADLATAVKGSVNNCYLNSGQTCIAWTRMLVPADKHDEACELA +VAAAKKLTLGDPLDENTRLGPLASKEQLERVRNYIRVGIEEGAKLMTGGPDAPAGLDKGY +FVEPTIFANVDPQSRIAQEEIFGPVLCIIPYRDEEEAIAIANGTPYGLSGGVWSADQDHA +IAVASRLRTGQVTVNGGAFNPEAPFGGFGASGLGREFGRWGLEEFLEVRSLQL* + +>NODE_3#PROKKA_00180 +VKPSGKRFSLKTWSLASLTLAGMLALQPAANAGSLPKRSFCVFDPVGANGPLFNLMKSTK +PAALDWGVDLQMRAYTDEKIAAEDFKGGQCDSVLLTGTRAREFNKFTGTLEALGAITSNK +EERVLMDTLNQPKAAKLLTNGDYEVAGILPAGAVYLFTRNRNIDTVNKLQGKKIATLSYD +RASLTMVRHVGASVVGASSASFAGLFNNGSVDLAYAPAVAYTPLELYKGLSHDGGVLQYP +LAQMNFQIILHKSRFPKGYANHVREYAREHLNQAFSIINKATDEIKKKYWMYPTDKQTAS +YDQMLQSVRLSLRDKGVYDAKALKLMKIIRCKVQPSRGECSQNAE* + +>NODE_29#PROKKA_00105 +VEGLFDKLSAIQKFANDLRGNRSGVLSIASTPTLTYAFLANALKRFREERPGVRILLEVT +HTQRTLELASAGQIDLGFIHGPSENPLLQFERLAASEMVCVLPPDHPLAAQPALGPRDIS +AFPLITNIRNSIAPRIEEAFRKQGVERDFAIACNHTMTVYMLVEAGAGIGLVDPWVQAER +FPTLVRRPFRPRVEVSPRAVHSRSQSLSRLAEGFLAVVREEAAVSQ* + +>NODE_2#PROKKA_00129 +MSVLMHNNQDGAVRFVFNNLSNKQTKQKSAETLKKLDLYKTMSKFKSTGVVYFFDAKNKS +LISHISLARSNKQLAEALTNSEKMAK* + +>NODE_3#PROKKA_00164 +MTDLPRIAFLGIGLMGRPMATNLINAGYPVTVWNRSPEKARALAGQAGVAESAAQAVAQA +DRIITMLENGDAVQQVLVEQGVAEAIQPGAVFLDMSSIAPEMAKSHAGRLKARGVGYIDA +PVSGGTVGAEQATLSIMAGGSHEDLEAVRPLLETLGRVTHIGPAGSGQLAKLANQAIVGI +TIGAVSEALLLAAKGGADPEAVREALLGGFAGSRILELHGQRMLARDFEPGAPSRIQLKD +MRMILDQARAEDLTLPLAQQAFQSYRALIALGEGECDHSALLLQLEHLNQTRMSDPSDGQ +ER* + +>NODE_3#PROKKA_00195 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_3#PROKKA_00160 +MNTQKLINRALSGLDDLGYQLDSMGITSKVNRHNVIAYVMAEQKHWEGEYDSLVARIDQQ +RFRVEQIVGRVEGLVRGGAEFALKPVSGLRSLVKA* + +>NODE_10#PROKKA_00012 +MSAEDPIIEVRGLKAQFGEQVILRGVSFAVARGEVVVVAGGSGCGKSTLLKHMLGLYQPA +AGSVLIDGVDIAQADAAQLEWVRRRIGVLFQSGALLGSLTLLENVMLPLVGFTPLSRRGA +ELVARLKLSLVGLSGYENHLPSELSGGMQKRAGLARAMALDPQVLFFDEPSAGLDPVTSA +ELDLLIKRINRNLGTTMVIVSHELASIFEIAHRVILLDKQAKGIIAMGPPQELSGVPRLF +PGGIL* + +>NODE_3#PROKKA_00173 +VPELPEVETTRRGIEPHLVGHTVTQLQVRESRLRWPVPDKLDQMLPGQKVGQVARRGKYL +LVHLERGTLLVHLGMSGSLRVVTRAEALRKHDHIDLTTDAGTIIRFNDPRRFGAWLWTED +WQHHPLLASLGPEPLSPAFSGHYLHRQSRRRKAPIKQFIMDSHMVVGVGNIYANEALFIS +GIDPRRPAGRISAARMEALVLAIQQVLENAIAVGGTTLRDFVNSEGQPGYFRQSLQVYGR +EGQPCRRCGKPLRQLRLGQRSTVFCGHCQR* + +>NODE_8#PROKKA_00293 +MNIYVGNLPYSVSDDDLRTAFEEYGAVDSARVIMDRDSGRSKGFGFVEMGNDNEAQAAIE +GLNGQDLGGRPLTVNEARPRADRGPRRGGGGGGFDRRPRY* + +>NODE_8#PROKKA_00291 +MVGTSVTLLLRFAPWRGSFFLYFQALAIDTGERGTDTPGSAPMAQLQTRTTQVIVQNQLG +LHARPVTLIVKLAKTFSSRIAFERGGTVSDAKSVMALLLLAAGKGTELTITAEGHDAEEA +IEALERLFSDKFGEE* + +>NODE_7#PROKKA_00268 +MSMKVTTRQVDGVTILDLSGRILLGEGSVQLRDAVRDLLAKGQKKILLNLGDVTYIDSSG +IGEMASALTAVRNQGGDLKLLNLTKRVHDVLQITKLYTVFDIKDDEASAIASYN* + +>NODE_1#PROKKA_00088 +MEKKITGYTTVDISQWHRKEHFEAFQSVAQCTYNQTVQLDITAFLKTVKKNKHKFYPAFI +HILARLMNAHPEFRMAMKDGELVIWDSVHPCYTVFHEQTETFSSLWSEYHDDFRQFLHIY +SQDVACYGENLAYFPKGFIENMFFVSANPWVSFTSFDLNVANMDNFFAPVFTMGKYYTQG +DKVLMPLAIQVHHAVCDGFHVGRMLNELQQYCDEWQGGA* + +>NODE_3#PROKKA_00185 +MIDTCEAARQLQLNAAGSLRHFLTLDGLDRPLLTEILDTADSFIEVGERRIKKVPLLRGR +TVVNLFFEASTRTRSTFELAAKRLSADVLNLNISTSAASKGESLSDTLLNLEAMASDMFV +VRHAQSGAPHFIARHVTPGVGIVNAGDGRHAHPTQAMLDMLTIRQHKGGFEGRVVAIVGD +ILHSRVARSQIRALEILGADEIRVIGPNTLLPRDVESLGVKVFNDMQRGLKDVDVVIMLR +LQNERMEGALLPGEREFYRLYGLTTEKLRYAKPDAIVMHPGPINRGVEIESAVADSPRSV +ILNQVTNGIAVRMAVMSMVMSGQLAQLNQGDAAQEQSRTL* + +>NODE_6#PROKKA_00250 +MKYLIAGLGNIGVEYANTRHNIGFIVADALVNELKGKFETERLASVASVKHKGRTLVVIK +PTTYMNLSGKAIKYWIDKEKIPIERVLIVVDDIALPLGTLRMRKKGGAAGHNGLSDIIMK +LGTEKFPRLRVGIGDDFAKGYQVDFVLGQWTDKEVNVMIPRVQKAVEIVQSFVSAGIDNT +MNLYNNK* + +>NODE_1#PROKKA_00059 +VVFVRLNLDAVTSNNKASDPDYFFFPASIFEKSHRHERMPKFILKDLSDYQQYRDNRELI +SSFFENLPNKTLQLIAKPLCGSSAAKLIALVAFAVL* + +>NODE_7#PROKKA_00281 +MYPFIHIGHFTIPTYGIMMWLAAVAGCIVLYRNFKRWKVEGDAITIVAFATVIGIIGGKL +YHVLEKPVLLMHHPALLISRSGFAWYGGMIAGILALLFQAGTYNIRPLRMLDLCVPSAAL +GYGIGRLGCFFSGDGGYGPPTKMWFGMSFPHGTVPTTQKVYPTPLFEFVAAVIIFYILWR +RSRPAAERKLGHMTAEYLLLAGGARFLFEFIRINPKIFLGLSNAQWASIAEMLGGTALLW +WSRKYASTPQPGQQGRQPKEEPALVAAGDSGGPPTAEQTQ* + +>NODE_6#PROKKA_00251 +VFQGKHQLNVFINILTQIKTMLRIRNFNNFVSNFLKIFAKFYNQAKIISYRLIILLQASV +TGQPNHSIEMEKQRQQSGLLFKIK* + +>NODE_11#PROKKA_00016 +MSKLFGILAGLIGIFLLVASVISFGHLVEDAWVRGGMLASLLFALLLLLGSAAFLLTAVL +LFRMRSHYLPRLYELEELEGLEEPPPKTKDSGESNGPRLA* + +>NODE_2#PROKKA_00137 +VLIKKAMNRVERGKYGEVHSVLIYKDGKLVLDEYFKGHDYKWEAKKHYGPMVVWDADRAH +SAHSVSKSITSLCVGIAVDKGLIKDIHQSIFDYLPEKYQYLNVGDKKYITVENLLTCSSG +LLWQEWSAPLSSKRNDQVGIYFHKKGPLDFVLNRPFVAVPGQRFNYSGGGVEVLGEIVKN +VSGMAFDEFSQKYLFEPMGIKTASWALKYPTGEVHAAGSLKIRPRDMIKIGAMMLNNGIW +NGKRIVSEDWVEKSRKPWGNNRGIDLPGEDLRDMGYAYNWWTKNEKINGKAVHWFSANGW +GGQQIIVLPEINTVVVLTGANYNRKVKQYALLADYIFPAIK* + +>NODE_9#PROKKA_00318 +MAKTATKKGETAQTKITGKIQKAAESVTDKVKGYNEKYVAKNIEKGKATLKEYNEKYLVK +TVEKGKDTLKEYNDKYITKAVEKGRSYVDGPYKKLSGTMDQWLEKGRSFEKDAWKKMDGY +VENGKKFMYKLPLVETVEKKVTSSLNSVPSVVNLPGKGDIEKLTLAMEALNSNIEALRKQ +SAQ* + +>NODE_3#PROKKA_00163 +MPRLAANLSLLFNEVPFLERFEQAARAGFRAVECQFPYAWAPEAIAAQLQGQGLQQVLFN +LPAGDWDGGERGIACLPGREADFREGVERALRYAEIMKCRQINCLAGPLPTGAPPEPYWA +TFEANLRWAAPRLAEQDITLLIEAINSKVDVPGFLLDHSKLALDLIDRLNLPNLKLQYDL +YHMQIMEGDLLRTLGANLPQIGHIQFADNPGRHEPGTGEINFRRIFEQLDAWGYEGWVAA +EYVPEVGTFDGLSCLKSWL* + +>NODE_4#PROKKA_00208 +VIRDKETLNQLIDTISRFVRERLVPSEEQVAQDDAIPEDILQEMKDMGLFGLSIPEEYGG +LGLTMEEEALVAMEIGRTSPAFRSIFGTNNGIGSQGILIDGTDEQKRRYIPRLATGELIS +SFCLTEPDVGSDAGSLRTTATRDGDHYVLNGTKRYITNGPEAGLFTVMARTDPDNKGAGG +ITAFIVEGDTPGLHRGRPDRKMGQKGAHTCDIIFDNCRVPAENIIGGREGVGFKTAMKVL +DRGRLHISGVCVGVAERVLDDALHFAMERTQFGKPIAEHQLIQALLADSKSEAYAGRCMV +LDAARRKDAGENVSTLASCAKLFCSEMVGRVADRAVQVHGGAGYMAEYAVERFYRDVRLF +RIYEGTSQIQQLVIARNMVREAD* + +>NODE_25#PROKKA_00101 +MSLPKRITAPLLSGLVLPGLGQLINRQLGKGALLICLMSLFFMSFLFLTVYQVSHAMSAL +GEAAAQSADKWQALRAQLARQGTGWLWGLGTAGLGIWLFAVIDAARVGARLDRSAAEGGQ +GES* + +>NODE_12#PROKKA_00033 +MSNRDIHNATGDSVKMNALVLGAGLQGKAVIHDLSRSELVDRILVADLDLAAAQRFLDKG +GYHKVRAVQADALDPAVLRRLISENRSDIVVCMLPAHLSGRIAEVCVECGVPFVNTSYAQ +WLGELDQRARDKGVILLPEMGFDPGIDLIVGRMALDELDQVEGFYSYGGGLPDPAACDNP +LKYKITWTFDGVLKAYCRPARLLRQGRPVEIPGDEIFQEENIHFIEVPELGRLEAYPNGD +ATRFVEVFGLGPELKEMGRFATRWPGHSAFWRIMAKLGFLGDQPVELGEGVSVSPREFLV +KLLEPRLQFRENERDVVVLRVKVWGRRGGRKRTVTYDLVDYRDLATGLFAMNRTVGFAAS +IGAQMVLKGEITGAGVLSPVKVVDGQRFLDELAARGIKVQRRLEEE* + +>NODE_11#PROKKA_00023 +MESEVLQRVAEMLRSPGAVARNRNLLEFESEAGQRAWRCYRLFLSLLAELERAAQSPEVR +VSAQETEGGLQLVLVDPRVSYRRSCLVPPELVELFLDKLTALGLLGGEKT* + +>NODE_3#PROKKA_00176 +MTAHSDQQQQEARIRLDKWLWAARFYKTRTLAKEAIEGGKVHYNGQRTKPGKVVELGARI +RLKQGWAEKEVVIQGLSDRRGGAPQARELYQETDDSQQRREDEHWQRKMMQAAQMPPARR +PNKKQRRELQRLKSGQG* + +>NODE_2#PROKKA_00133 +MSQLANDHKAINLSQGFPDFPISEELIDLVHYYMRKGYNQYAPMQGVLPLRKAISTMFQK +NYGIHYDPVSEINVTAGATQALFSAISAFIKDGDEAIIFEPAYDSYAPAVKINGGMVKYA +HLEFPDFNINWEDFPRLITNRTKLIIINTPQNPTGSVLSEDDLQRLERITSGTDIIVLSD +EVYEHLIFDGITHQSVCRFPELAKRTLVIGSFGKTFHATGWKTGFVLAPERLMKEFRKVH +QFTVFASNTPIQHAIADFIGNEDNYKNLGKFYQQKRDMFVKSLNGSKFNVLPCYGTYFQL +LDYSNISDKNEMDFARWLVEKHNIAAIPIAPFYHKKDDHKVLRFCFAKKDETLVEAGEIL +SKI* + +>NODE_42#PROKKA_00197 +MNMVENMLDQAFKKLNPHEHPVLHSDQGWQYRMRRYQNILKEHGIKQSMSRKGNCLDNAV +VECFFGTLKSECFYLDEFSNISELKDAVTEYIEYYNSRRISLKLKGLTPIEYRNQTYMPR +V* + +>NODE_1#PROKKA_00064 +MDTIRIRGARTHNLKNINVELPRGSLTVITGLSGSGKSSLAFDTLYAEGQRRYVESLSAY +ARQFLALMEKPDVDSIEGLSPAIAIEQKASSHNPRSTIGTVTEIHDHLRLLFARAGTPRC +PHHGLTLDAQTVSQMVDTVLADPSERRVMLLAPVVHGRKGQYQELLEDLKSRGFIRARID +GTVYELDPLPRLDGHQPHDIEIVVDRFRIRSDMAARLAESFETALALADGSALIADIDKP +HTSEMVFSARHACPQCGWSIPELEPRLFSFNNPAGACPQCAGLGRESYFDPGRIITQPSL +SLAGGAIRGWDRRNPYYYRLIESLARHYEFDTEAPWSELSERTHRVLLYGSGEEEIDFTY +VSARGQKQQRRHTFEGVLNILERRYHETGSQAVRDELVRYQSSRACTACNGTRLGEIARN +VFIADTTLPDISNLAIDAVWRFFNDLDLPGRRGEIAKRIQHELHSRLGFLVDVGLGYLTL +ARSTETLSGGEAQRIRLASQIGSGLTGVMYVLDEPSIGLHQRDNRRLIDTLTRLRDLDNS +VIVVEHDEDAIRSADYLIDMGPGAGAHGGEVVATGTPEEVMNNPNSLTADYLSGRRTIPV +PQARRHPQPGQAIIIRDAHGNNLKHIEVSIPLGLFTCVTGVSGSGKSTLVLDTLQAAGER +LLNRARTEPAPHASIEGLDALDKVIAIDQSPIGRTPRSNPATYTGVFTAIRELFAQTPEA +RARGYKPGRFSFNVRGGRCEACQGDGLVKVEMHFLPDIYVPCDLCHGSRYNRETLQIHYK +GRTIEEVLNMTTEQAREFFANIPSIHHRLDTLVTVGLGYVKLGQSATTLSGGEAQRIKLA +RELSRRDTGRVLYILDEPTTGLHFHDIRQLLAVLLRLRSHGNTVIVIEHNLDVIKTADWI +IDLGPEGGHGGGQIIGEGSPEDIARLEHSHTGHYLMPLLKPHKLQKETTT* + +>NODE_22#PROKKA_00098 +LGEPLGSFEGETQTVMSVGDLAYWEPGNAFCIFFGPTPASSGPEPVAASQVYPLGRVEGD +WQALSALGASVTARISAA* + +>NODE_3#PROKKA_00156 +MSHTSSDHSIAFPLRQKSKLRHLAQELLAKADIRIDGDRPWDMRILKEGVLERILGEGSL +GLGESYMDGEWDAERVDEFVYHLIRAQLDREVRPWNLILHGLRYRLFNMQSLRRAWMIGQ +RHYDLGNDLYEAMLDPLMTYSCGYWKTATHLAAAQEAKLELICRKLQLKPGMRLLDIGCG +WGSLMAYAAQHYGVECVGVTVSEEQVKWARHQYKGLPVEFRLQDYRTLDEQFDCIASVGM +FEHVGHKNYREFMQVAHRCLDDGGLFLLHSIGNNSRDSGSDPWIDKYIFPNGELPSVGQI +GDAADDLFVIEDLHNFGADYDKTLMAWHANFEAAWPKLAYLGERFRRMWTYYLLSCAGTF +RARDIQLWQWVLSKRGVQSGYIRPYF* + +>NODE_4#PROKKA_00202 +MSVLKAQGLYKRFGGLQAVNNVSFSVDRGEVLGLIGPNGSGKSTTLSLLMGVTRPDRGSV +QLDGQEMAGWRTHRIAKQGLSMVFQHSRPLHRQTVLENIKLALLPDTLWQLFPPHTLDRR +AREIAERVGLHNVIDTLPGNLPFADLRRLEIAKALAQDPSVLLLDEPFAGLSPRETREFA +ELVHLFREEGRAVILVDHNVKEVAGLVDRIVAMHAGQVIAEGTPDEVTRDPKVREVYFGQ +SLENASGIHADGDRRSEGNGSEALLEIDLRSVRYGLAEALRDIQIQINQGECVSVVGING +AGKTTLFKSILDFQGYEGDVRWQGTSLTGQGPGQVASQGIALCPESRELFGFMTVRENLE +LGGHKLDRQAHESQMDRVFDLFPVLRQRQAQAAYTLSGGEQQQLTIGRALMQQPKLLILD +EPTLGLAPLVIENISEALHKLQQDSGMTLLLGEQNLTFALRHSQRIYLLETGNLRWHGPA +ERFIEEVGEDVL* + +>NODE_2#PROKKA_00126 +MQEYIKTDRQIHWLSQAIAKVNRDYVPAKKDDSHTNLFLDAAGKRLFGRWINTPKGKFIL +ALNLKTLSFEWLDNPLSVKTSISVFDKEGSSIEKEIREFPVSMGMSSKDISKPLHFEIPD +YGFSIIKSNRISSFGIKQWIYYRGLANFACLSVLGYLQSESEIRIWPHHFDTGVYAQVTD +SLGFGFGWAMADSMIGEPYYYLSGYKNSSIIIYNNLSKLNFGRWVTGEQWNGTVLPLHVL +ADNSTAKALEIINTYIKESIDWFLNL* + +>NODE_11#PROKKA_00019 +VNYEPCVKHKVVRNTVRDFAEAELRPIAHEVDQNSRFPWEVVEKMRGLQYFGLQAPRELG +GAGLDSISYAIAIEELSRVCAGIGLCVTVHNSVALYPLLKFGSPEQIERLALDLISGRRI +GAFCLTEAGAGSDAGAVETLALPCDEGYLINGTKIFVTNGGVCGLALIFAKTDLDHPRGA +PSVLMVEKERSGFAVGEIEDLSGMRCNPVSSLFLEDCLVPPENLLGRRGDGLRIGLSALD +TGRLGIAAQALGIAQGAFEAAVRYAKERQQFGKPIARFQTIQNYLADMATKIDAARMLLY +RACAAKDQGQPFSAEAAKAKLFCSATAREVCNLAVQIHGGYGYSKEYEVERYYRDAKVTE +LYEGTSEVQRMVIARAILSAPA* + +>NODE_8#PROKKA_00298 +MTAFPRTDVGGVSVSRLIIGTNWFLGYTHSTSAQSRTNSERVNHRDVVAGIVETFVEFGV +DSIMCPHTDTVIPEAIEEARQRTGKPLVVISTFALPVTKRTALDGFDLGEVERILDEQVA +RDVDIAMPHQSVTDIMLDKCSREIRQMAPVCALIRDRNMVPGLSTHAPETVIYSDETGLD +VESYIQPFNLMGFLMQVEVDWIARIIQNAKKPVMTIKSMAAGQVRPFQALTFSWNVIRPQ +DMVTVGTSSKHEARELCEMSLQILDRRATTQELQRTRSKASISPA* + +>NODE_7#PROKKA_00261 +VKKVWLSWSSGKDSAWALHILRQQKDVEVVALLTTLNEHFDRVAMHSTRRDLVEAQARAA +GLPLVRVPLPWPCSNEQYEAAMGKACAKAVAEGVEAVAFGDLFLEDVRQYRVEKLAGTGL +EPLFPLWGLDTRALAREMIAAGVKTRLVCVDPRKMPREFAGRDLDEAMLRELPEGVDPCG +ENGEFHSFVYGGPMLGEEIPVESGEVVERDGFVFADVKLKH* + +>NODE_17#PROKKA_00044 +MWQEHIAGRKIGLETGPRPFDPHKPCLLMVHGSGGRGETFRPQLSGLAPYLNPAAIDLPG +HGNTPGPGRDQVAHYADWLAEFIRRGPLRPALLGHSLGGAIVMQLALDHPDLAPALVLVG +TGSRLRVLPAILDGLLSDFDATLDLVLKYAYAPGADPRWVQAGREIMSQPGPRVVHDDFA +ACDRYDITDRLGEITAPTLLIYGDQDQLTPPKYGRFLAERLPDARLEIVAGAGHMVNLER +HAEVNRLIHPFISAFSPPASS* + +>NODE_2#PROKKA_00115 +MAKQKTQEKIWYAIYVKSRAEKKVAIELEAEGIDFYLPLEKRLKQWSDRKKWVEEPLFRS +YIFVHISHKDYYRALVQNTVKYVTFEGKAVPVPPEQIEAVKVYLEEKEPIQPNDEDWETG +KEVEVISGKLTGLKGVLMEVKGRSRVKVEIEVVSSSIILHIPKSKLRLLE* + +>NODE_7#PROKKA_00264 +MKTCIDPVVGDILSSWRYDISGITPEMRIDYEEHLASCSVCRSRQRLHRAIDVVLIGLTT +LSTIVFVLALAVIHHVEPLRTFALFIFHIRDFSVVLTLQAVAFAGLVVSMFAWLLVAVAT +PVPTFLSGVAREQARELQSRIPEEWRNRFQRGAL* + +>NODE_1#PROKKA_00070 +VTETMARFRRTKIVATLGPAVDEGDVLARMIAAGVDVVRLNLSHGTHAEHRKRVKAVRKA +AAEQGRDVGVLIDLQGPKIRIECFRDGPIELKEDDAFTLDCGLGSNAGDSKRVGVAYKNL +PRDVQAGDMLVLADGEIVLEVREVIGEQVHCRVETGGALSDHKGLNRRGGGLSAEALTQK +DQVDIQLAAELEADFLAISFPRVAADVERARALLRAAGGTAAIVAKIERAEAVENLDEII +DASEVVMIARGDLAVEIGDAPLPGVQKRIVRHARARNTVVITATQMMESMVTSPTPTRAE +VLDVANAVLDGTDAVMLSEETAVGRHPVKVVEAMARVCLGAEAEPREDRDRRIGGDRFEL +VDEAISMAAMSVSQHTDVTALVALTESGRTPLYMSRVRSGIPIYALTRHECTRRLLTLYR +GVYPIAFEDEHESDEVLPDVAAALLERGLVTPHALIIVTRGKLRHVSGGTNSLHLVQVAD +VLPEQVL* + +>NODE_9#PROKKA_00312 +MFQFLLFVGSITAFIIGGLIVLIGIGAITGCAGGILAMCSGAIIAVLGAWSAITFFLPSP +DPSVPARETINLIRRNGRWM* + +>NODE_4#PROKKA_00217 +MGELVDVYRNSVQTWECDQMGHMNVQFYLDKADAGLLALTRMLGLNRRFLNERQARVRVL +ENHVRFLREQHAGSPLTLRAGLIDIRPDQLKLYFELTNPIQQAVAASFITQAVLESTAGK +DHLTLPQSALEKAQQYQIDWPRPEGPMGLESTPPRTPPTLQEADDLGMMPTYLGAVSAGM +CDADGHLAIRSYMGIVSDAVPHLLSRIRHDTREVPRPGGAALEYRWIYHQRPEQGDLVTL +RSAITHLGNKAYRLGHWLFDAETGHCLATTEAVAVMMDLDERKALVIPQTARASLEEMLV +KGFSI* + +>NODE_10#PROKKA_00011 +MAHGQEPAYEMKSEPAGNGELRVDLSGRLDMNALEGAVDQFGRLLKEQRPRRVELAVGGI +DYLDSGGALALTLMEEAARKAGTKFQLVQAGPEVRGMLALVDMDKIRRRPLRPAERGLGF +VEQVGQASLEVWRDFVELVTFLGDFLIALGRSLRRPRLVRWQETFFYMEQVGVNGLPIVG +LISFLLGLIIAFMSSLQLKTFGADAYVAALVSVAMVRELGPIMTAILVAGRSGSAFAAEI +GTMRVNEEVDALEVMGFDPTDFLAMPKVLAALAVVPMLTIYSCVAGILGGMVVGIWGLGL +TPYTYLHHTIDSLSAYGIVTALIKSVAFALIIAGIGCQRGFMVRGGAQAVGSATTSAVVT +AMFLIIVADSAFAILFYYVF* + +>NODE_12#PROKKA_00027 +MDQSKALAKYREFRPAWKSFLPYFLGVVIFTVGPRVNPQAPISPDLSHLIATCFLAFILI +TRFSNLYELADGRLGWRRSFPRALERQAPVEQITRIDLRRGIFHRLAGVAHVHVYLENQQ +EPYLKLFGVSEPEELRRLLLDLGASDQRVTGAWRK* + +>NODE_2#PROKKA_00123 +MKAIWNNTILAESNDIVKIEGNAYFPINSVKKEYLKTSETHTVCPWKGTASYYSLEVNGK +ANPDAVWYYPEPSDLAKGIKGRVAFWKGVQVVKD* + +>NODE_2#PROKKA_00139 +MELNVQIDHVHLVVSVPPKVSVSRLMGILKGKLAIKLFKSYPSLKEKPYWGNHFWARGYF +VSTVGIDEDVIKRYVKYQEEEEKKIETQQQRFDF* + +>NODE_7#PROKKA_00276 +VTTANTFRANWPFVLIAAIFLGTALVGALTGRIWLQMRTITRTNDKVMYWATIWCSVIMA +GFMLLVAAHYAPVINLIKEL* + +>NODE_13#PROKKA_00042 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_7#PROKKA_00280 +MPDQTQFPPAESLRIRFTRDQIQSRVREMGRHIREDLHGESVVLVGVLKGAAIFLADLAR +AINLDCTFDFVAVSSYKSGTRSSGAVQLIKDLTEPIEGRHIIVVEDILDTGVTLSFLQSH +FERHNPRSIRVAALLDKPSRRIRPIQGHYIGFSIPNEFVVGYGMDYAERYRNLPDIRILD +SIPAS* + +>NODE_1#PROKKA_00077 +MQNTITGRHLDVTPALKDYVNTKLSRLGRHHEPPTSTQIILSVENLDHKAEGILQVRGGT +VYAEANETDMYAAIDILADRLDRQLVRHKERHASHHATPTARLNMEN* + +>NODE_1#PROKKA_00085 +LNRLLIRGGGRLDGEIRISGAKNATLPILAASLLAETPITIGNVPHLHDVTTTVTLLRRM +GVDVTVGEHMALEVDSNTIKDMVAPYELVRTMRASILVLGPLLARFGCAEVSLPGGCAIG +SRPVELHLKGLSAMGADIDVRNGYIYAKANRLRGARIFMDMVSVTGTENLMMAAVLADGE +TVIENAAREPEVVDLANCLNKMGAEIEGAGTETLHIQGVAKLEGCYYEVMPDRIETGTYL +VAGALTGGRVRVKRTRPDMMESVLEKLREAGAEITSKDDWIELDMKGRRPQAVTLRTAPY +PAFPTDMQAQFTALNAVAEGSGAVTETVFENRFMHVQELVRMGARIQLEGNTAMIQGVER +LTGAPVLATDLRASASLVLAGLVAEGETVVDRIYHIDRGYECIEEKLAQLGAHIRRVTS* + +>NODE_1#PROKKA_00082 +MSTPLAAIRLLALDVDGVLTDGRLWYSETAGEIKAFNAHDGAGIKRLMREGIAVALISAR +QSPIVTHRARELGIIQVHQGVKDKGHCLTETAQAVGVRLAFCAFMGDDEADLPAFAIAGL +RIAPANAVARVRDEADWCTQATGGQGAVREVCERLLAARQANAGGKS* + +>NODE_7#PROKKA_00269 +LRGQQETRVTYTLASSLDSVDKVEQTAEQMARNAGIDEDEAFRVSMAVREAAVNAVLHGN +AYAPDKRITVTFENNGSDLIIHIMDQGEGLDPAALPDPLKPENLLSGSGRGIFLIRSFMD +EVHFKQLHPGTELTLVKHLGTAKQSLQGGNSL* + +>NODE_1#PROKKA_00061 +METKTLSGNATPLTDEQWAGLARLGDLGNRLGALVDGPLSGPASAALDRIGALDGQYDLT +ALAEKLVGTLSALDRAGLLDLLRDNAQFIADNLNTLTPMLDQWLAHIAELPADEFKADAK +FALALLRKARLVTTFIQEKLAGELTTKAVEVTEFMQRNDTDEAVAEALVQLGRIYRSGLL +ARLGDLADTVAGLEEGTDLDDQIDVLIKSSTAGGLGTFLIFLKSVSIAMQKVGQEPEPKL +GGYIGMLHLLRDKEVQKGLRMLTVLPIYLEKRLEKSAS* + +>NODE_9#PROKKA_00308 +MSRSFKSAGRMADPSDVAADICPVPAFIPDLMAVNGLADAGFSRLIFFPGMLFGSTEKWW +GDGGVRPSAHEGLDLCFFETSDGLRYRLDETVSVPAAFDGKIVRIMDDLLGRTVVVQSRC +APSDAPFYTFYAHIRPDNGLRQGDTLAAGTVFAAIARIVSPKIRLPAHLHITLARANDLP +PVDTLSWPVMNRLDRSVFLNPLDLLMCDYAIEDDTRFTPGSDAVKPVRRIRQDRKGA* + +>NODE_2#PROKKA_00148 +MLAISIVILLILSFQVMPVALGIDSHKMNVFHVASSIFLLILGQVLLFLLGILLGDKFMY +LMSGFKRFVLFIGFFIIATRMIMEALEIRKGKRTYLLDKAKQFILPSIAQAINTFLAGIL +FQLLIFNLSKDLIYLGIFALAFSVPFIFIKNEKQSMLAVSLLYMVGGGILSILSFYFLFI +* + +>NODE_1#PROKKA_00056 +MRHRNSGRALSRTSSHRAALMRNMSKSLIEHEQIRTTVPKAKELRRVVEPLITLAKSDSV +ANRRLAFSRLRDDAIVAKLFTDLGPRYRERPGGYLRILKAGFRPGDNAPVAIVQLVEEQE +TTSAAT* + +>NODE_12#PROKKA_00031 +LTTLETLGRFAAGLKADELPPRLGEAVNRCLIDLLGAACAGHGSGAARMVRAVAGPLFAA +GPAWLWFSGRRLASPGAALANAAAASAWDLDDGHRAAAGHPGAAIIPACLAAAQETGASA +RELEAALVLGYEVACRVAAARDLARLPTMASGRWVAYGVAAAAGRLHGLDAAGLAQAMAV +AGVLSPDLAAAGYSRLMGNLVKEGIPWATLTGLVAVKLAAHGFSGPLDILDHPDYYQAPG +ITAGLGGGQWAIEQVYFKPYACCRWCHAAIDALLALQDEQGLDADEIAEIQVHTFERALR +LSNETGPATLEGAQYSLPFTLAVAAVEGAAGLLPLRPELLGREDLVELAGRVRLEVDPEL +EAMFPERSPARVVAMTRSGRRHEHTVLDPLGDPANPLSTARLEEKFRALTAGLLPPSRQQ +ALLVRIHALEPEGLPPLLEELGRPLQPEK* + +>NODE_1#PROKKA_00053 +MAKPAAKARKRIKQQVVDGIVHVHASFNNTIITITDRKGNTLSWATAGGSGFRGSRKSTP +FAAQVAAERAGAVAQEYGVKNLEVEIKGPGPGRESAVRALNNLGFRVLSISDVTPIPHNG +CRPPKKRRV* + +>NODE_12#PROKKA_00030 +MIGKRIRLERVMDRGTRRTVLVPLVHGVGMGPIEGITDVLNTVDTISMGGANAVVLHKGI +VAAGHRRGGADIGLVIHLSATCADGSQTLVTEVEEAVCLGADAVSLRIEVGGADEDESLA +LLGAVSRVAADWGMPLLALMNPAPIPPAKMPKLLMRAARIGAELGADVVLVPYHKRFAEV +VAATPVPVVAIGGVKKTPPKQMLEMARAAVDAGAYGVSVGRTVFQYEKPGNMIKAICQVV +HRKATVKKAMEILAKKPIESTLYGGTVIW* + +>NODE_1#PROKKA_00065 +MAGSGKLRKDAGIIGLLYFSLGGIIGSGWLFGPFDAAKAAGPWSIASWIIGAAVVMLLAL +VFAELATMMPKSGALIHISHIGHGELIGRIWSWILFLSSVVTPPIEVMAVLTYLNNKIPY +FVDPSTHVLSTIGFFAAIVLLGVVVVVNFFAIRFVLWINNIATWWKMFIPAISIIVLMSY +SFHPGNFHLDLGSVNAAGMLTAVSTAGIVFSFLGFRLAINLGGETKNPGKYIPIAVIGSV +LIATLIYVGLEVTTITSVRPSDFANGWPSLAFKGDAGPFAALAVTIGAVWWSWVLYADAI +VSPFGTGLIYTTNTSRLGYAMAEVGSAPKKMQKLSRQGVPWISLLVTYVIACIFFFPFPS +WHQLVGYVSDITVLSYGIGPVVLLIMRKRRPEEPRPFRLKGAKVIAPMAFIASNWVIFWT +GCTTVTFLFGLLGSLFAVYAIWYYIIARKPSKEFGWKYAWWVFPYFIGMWLLSYIGPSTL +GPAHVSLFNVQPLDILPLGWDMIAVAVFSLAVLYTATSSALPREEADRYFDELKKLNLPE +EYSEGTESP* + +>NODE_1#PROKKA_00090 +MAETAVINHKKRKNSPRIVQSNDLTEAAYSLSRDQKRMLYLFVDQIRKSDGTLQEHDGIC +EIHVAKYAEIFGLTSAEASKDIRQALKSFAGKEVVFYRPEEDAGDEKGYESFPWFIKRAH +SPSRGLYSVHINPYLIPFFIGLQNRFTQFRLSETKEITNPYAMRLYESLCQYRKPDGSGI +VSLKIDWIIERYQLPQSYQRMPDFRRRFLQVCVNEINSRTPMRLSYIEKKKGRQTTHIVF +SFRDITSMTTG* + +>NODE_4#PROKKA_00211 +MKPFTWEDPLLLDLALDSDERMVRDSAHDYCQNKLMPRVLEANRHEVFHREIMNEMGELG +FLGPTIPEQYGGAGVNHVCYGLIAREVERVDSSYRSAMSVQSSLVMHPIYSFGSETVKQK +YLPKLASGEWIGCFGLTEPDHGSDPGSMITRAKKVDGGYRLSGAKTWITNSPLADVLVIW +AKLDDTITGFVLERGMEGLETPKIEGKFSLRASVTGQIMMDDVFVPEENRLDVTGLKGPF +SCLNKARYGISWGSMGAAEFCWHAARQYTLDRKQFNRPLAANQLIQKKLADMQTEITLGL +HGALRLGRLMDSGDWAPEMVSLLKRNNCGKALDIARTARDMHGGNGIADEYHVIRHVMNL +EAVNTYEGTHDVHALILGRAQTGLQAFTG* + +>NODE_6#PROKKA_00257 +MEGDIINLNDEHRSTARKIVAPLIEEIKNCQTIYTFSVAGESGAGKSITAAAIAEQLELA +GFSVKVFQQDDYFFLPPFTNDQKRRKDLEWVGIKEVDLALIDEHLKAAKDGVKTIKKPLV +IYGKNKITSEVFDMKGVNVCIAEGTYTSLLKNVDKRIFIDRDFFDTHNDRKKRGRDLIDP +FTEKVLEIEQCS* + +>NODE_10#PROKKA_00009 +MPIPGRLLTTAMAVMPHTDVDQALASALSLDIPFWPQLPRVNYYEDMYVQASEHFPGMVV +DHKERTLVFSMDKFMVELEETLAHLEEPEYFDISPEYSVVYHRFLELELADRPAIRGQLE +GPISFGLNVKDQDDRPILFDDTVRPFLLEVMARRVNVQLTRLKARNPNAFMFVDEPGLQF +IFSGLSGYSDRKAKEDLDQFFAAIERPRGIHLCGNPDWDFLLNLDLDILSLDVYSNGEVF +SSYARSIKRFLDRGGVLAWGLVPTNFEPFSAEDHVSLKARLKEIWSALESKGVDRELMLE +RSLLSPATCCLVNPDGEKTVDKAFALVRALSAELRDEYGLDG* + +>NODE_1#PROKKA_00066 +VQATDWLHQIAGPALVLNFADGTVIDINAAGRRLVGIEGQGLIGQDFCGFFVSSDADCCW +PTLQRSINLQGGFRYEGLHLRTPGGAMRRVNVSAELLQSEQERAVLMLLQPGTASSPQAT +DHEKELAQYATVGLYRLDAEGRLTHANHALARLLGYETVGQLLDSAAVQRSQWYVCDGVS +EERVSDVNDAAIYRCKVQLRRAHGAAFRAVEAIREIRDVRGQLMSRIGTLREISDQSSSE +QALAISEDKYRSLVEHSQDGVFVIRDGVYVFVSQVYSSMLDYAPEEMVGESFLRFFAPED +RQKIVDVWHERQAGHWEQGAYEAHLLKKDGTRVLVSVRAGPIRFAGAMASTGTVRDITAY +RDTQQQLSMAEQRYRDIFEHAVIGIYQTAPDGRLLAANPAMAQILGYDSVEELQEQVDDV +GELFFDRIERDTLIEKLEAEGRMYGAELRLRHRKGTQLWVQDSARVVYDANGKLVCYEGM +VADITARKIVEQALHRSEQLFRTLVEHTHVGVIMVREGVVTYANRALAHMLDYAESDLLE +QPLASLFAPESRDCVERLEQELKTAVGSNIYESSMLAADGTRRVRANLSVASVIFEDNPV +MIITAHDLTREKRAEARLRRLATHDPLTNLPNRVVLRERLAQVLKKTRETGNVDWAVLFL +DLDAFKLVNDSLGHAAGDELLRQVAVRLRRAVRHDDLVCHHGGDEFVVLAFNITHEIDAV +ELAEHIETAMAEPFRISDHEIYNQITIGIALGRQEYELPEEVLRDADSAVAAGKRLGKVC +HVVFSSSMHVAAMERLELETTLRAGLTRGEFDCYYQPIFNVKNNRIESLEALLRWHHPEQ +GVLRPHSFLQVAEESGAIVPLGWIGLRRALAACSQWQSLGLEREVSVAVNLSDAQFRLPQ +LPEQLAHELEQAQLPFHLLHLEVTERVFLETPGLARRTLGRLHALGVKLYLDDFGTGYSA +LSYLRELPFDALKDRS* + +>NODE_9#PROKKA_00324 +MTNATNDQPFRPARFTEQRLITAILDGTCPPGSVLPAERRLAEQFGVTRPTIRETLQRLA +AEGWITIRHGKPTRVNDFWETGGCS* + +>NODE_25#PROKKA_00099 +MMYGDSSAGAGIEAQSLLCPRCGKAQPVRKKLLLVLPEGDKYAYFCAVCGEEVGSKLEES +QGGPSFIPR* + +>NODE_9#PROKKA_00309 +MGLFSIFGGKPPEELERRGDFHYEAGAFGDAKMAFEKAIDRIERRFPEKKHLLPRIMEKY +HLARNALAKMHVENGDHMITVRDYEEAGALYRLAMELTSDEAFAAEIHGKIAKLKDLIAD +EDEPEMEWVGDAREEAYVDDWGGAEEDAGGAEDDDAYADDDMTADAEGMADETDADAQLY +DSPENLFHVLVSALPEAVQDAYLGYGEAFAAGYIALNHGEFRKAVKELSRALEENASAKT +LIPVELATAYMHLNDPDHARQILEDFLKENPGEIRGYQLLCEILWEAGNTADARNLLSGA +PNDIQTTRPMQMLQGETLFQVGLYDEAEKVFTRCLEIHGKDEIVNRGLAKTYEAKGQIEK +ARDLYADILNRCIMCGSAADPIIKRRYADLCIKSGDKSLKLLELYFGLAKEDPDNRADYF +FRIADLYEAQGKDIEARKYRKLSTQAGGGKRPQ* + +>NODE_4#PROKKA_00218 +MKAGMVAPLDATQQSRLRVLLALFGLVWLINAGFQAVAWLAAPNASTHFIHALAKSTTVV +PRWVQPLLMTGLHSAQSLGLGIVAAIMVLLAILLGLALLTQRKVAFAARVGIIYSIICWI +FLDGFGFPYANGQTDPGVFVAYAIAFLFVLSVAPVFDREGTKAPEIDERLWHWARIAFGL +LWLFDAVLKWIPPFLLHFSSQITSVIPGQPHWIAAWLSFVAELVHAIGPIPVAVVVALAE +TAIAIGLLSGRWMRLVIPVGMLYSVAVWTTAEAFGGPYSTAGTGVRGNVLGNVLIYLIPF +LFLWVGNSSQRSAAETTGRTLTD* + +>NODE_6#PROKKA_00240 +MGDPKGFMTVPRKEASYRPRNERIYDFGEVEQTLNEEDRKLQTSRCMDCGVPFCHWGCPV +GSKIPEWQDAYYRGNEEVAYEILHSTNSFPEITGRICPAPCEKSCVLSINEEPVTIRENE +AANVENAFTQGWIKANPPSIRVV* + +>NODE_8#PROKKA_00287 +VYFRQYKVEGLGCYSYLIGCPAAGTACVVDPERHTGQYIQTAEHQGLRITHVFDTHLHAD +HITGSAELAAATGATICVHPAIGAEYEHEDLLDGQHYRFGAAELEVVETFGHTPNSVSLA +LTDHGRSEDVFALLTGDLLFVGDVGRPDLAGADLLEEQIHNLYESLYTKLGRFPDWTEVY +PAHGEGSLCGKGMSAKPMTTLGFERLNNPLLADLEFAEFHRIMTEAFQVRPDNFAAIVAK +NQRGPQLLREAPAFMELSVLQAERALAAGAQIVDTRAQSAFGAAFLPGSLNIGVSPSSVN +WLGMLVPADTDIIIVADSKDVACQVADQFRRAGYDRLIGYVPDGVASWALQGKPMDHLPQ +LTPASLKHVVGRYGNHVILDVRTDAEWATGHIEGAIHLPLPRLVREGIDLGKDRHITTVC +RSGYRSNVAGSFLKSQGYEHVFSLIGGMTAWQAANR* + +>NODE_9#PROKKA_00320 +MPTATIRQQLIELLSENKYDARDLSQRLGVRETVVYDSIPHITRSVTSMGKKLKIVPSRC +TSCGYTFKDRKRAAKPSRCPTCKSERIAKPKFYIV* + +>NODE_8#PROKKA_00300 +MTQGLGPGLVSPAPFGAEEKPRGGLGEMGAFYAKGVTYWRSVTTP* + +>NODE_2#PROKKA_00112 +MDWIIVIILAILLVAFIILFLLIQKGFRNPVSEHTIPEDLPFDVQEVEYPTKNGKTIYGW +WIPADPKAATVVFVHGWGRNAQRMMPYLRKFCCGKFNLLAFDARGHGNSDHDGFSNMLQF +SEDIIASMNFIEQEHKAENNMFYLIGLSIGGAASIYAAGHDPRVKKVLTVGAFAHPASVI +TKQIKDRHIPYFPMIWFLYRYMKYVKNLDVDAIAPEKHIAKAQAHFLLVHGEIDQTVPVE +QGKRLKKAAGDKADLWLMPERGHSDCHLEHGFWEKLMEFFEAPKTKVQKS* + +>NODE_11#PROKKA_00013 +MTAPRWSATRASASSQSSRAPPGRERCSCRRPSSRRRRAGSSSRARPREPPLMAALRRAS +WGSSPRRVSSSTTSSGSRGEKFTRRQRLTRVGSTSRGSCTVSTRMLLGGGSSRVLSRAPE +LARLSRPASRITTMRQPPPKGAKLSVRITSRTCSTLIWAEGESEGGSTTKKSSWLPAARR +RQGPQAPQGVSPGSWQLAAANRARAVVVRPTPRGPTKR* + +>NODE_5#PROKKA_00235 +MPAKIPDPVELTRRLVAFETINPPGQEQACAAFLADLLAGAGFDCVLHPLGDDRASLVAR +RGRPTEQRLPLAFTGHIDTVPLGAVPWKHDPFAGEIVDGRLYGRGSSDMKSGVAAFVVAA +IAEAERIGDGAGVELVITAGEETGCDGARALADGGHLGRAGALVVAEPTANRICVGHKGA +LWLKAITHGVTAHGSMPEHGDNAVYKAARAIGRLADFDFNVARHPVLGRPTLNVGTVSGG +LNVNSVPDRATVGIDIRTIPGMDHAPLRDGLGGVIGEGADLETLCDLPGIWTEPELPWVQ +RAAAAVAAVTEQPFAPESVAYFTDASVITPAYGDIQTLVLGPGEPSMAHQTDEYCEVERI +VEATDIYRRLIADWMKPES* + +>NODE_8#PROKKA_00290 +LSDSSRTSSARNEPGETVLAGTGVSPGTVIGKAVVVGASVVSVREHELPVSQLEDEVGRF +RAALEKSRRELEELRDRARDEKNQDLVDILEMQVMVIEDGMLDQEVSDRIRDTRRNSGFV +LKNYVDEFCDQLVKAGSAFFAERTNDIQDLAGRILRQLLGSESVDLSELPEPCIIIAHDL +SPSDTAGMDRDNVLAFVTAMGSRTSHTAIMARALGIPAVVGLGESLAQVGDGVRLVVDGT +QGRVVVSPENATLVKYHERIEQEKAWRAKLEVNALLPAETRDGFHVSVAANVELPEEVER +IRRVHRVGIGLFRTEFLFVKGGSISDEEQQYAAYRRVAEDVAPHSVVFRTLDIGGDKFLS +HLDVPVEINPFLGMRAIRFCLRREDVFRSQLRAILRASAHGGVRILFPMITTMEELHATL +AILDDVKAELERQGIPHNPDLDVGIMIEVPAAALIADKLAPHVDFFSIGTNDLVQYMMAV +DRSNPDISYLYQPGHPSVVRLLDRVVRAANEHGRWVGICGEMAAEPLFVPLVLGLGIHEL +SMSPVAIPIVKDLVRDINMLEAEELVDQAMACGSAEEVTQLCRSFVERIAPELFLD* + +>NODE_3#PROKKA_00157 +MQTLNIPSLATVTAITLLTTACANNPTPETLQSRGLHPLDTTQLHQLYSKTLQFDWRNAR +SRSGSGEYQPNGEISIEWSGESFNGKWRILNNHFCATYASIHNGQEQCYMVYQTGARRYV +AFLNGDYSYSFNVKKVK* + +>NODE_7#PROKKA_00272 +VCEGLGFESGVDKKFSEAPFFYWLYTLLIVGGAAVVLIPHFPIIEFSIFSQMLNGILLPI +VVVFMLLLINRKDLMGEYTNSRWFNAVAWVTAVVVTVLSVVLMVQSIRQV* + +>NODE_25#PROKKA_00103 +LNRSLQNQISARYWSSHISSRNNIQNNENCACDPRLAPCLLVCRASLIPFLPFLEGAKLK +KLGAFYLGRRRRLQAGKHAGSSRHQSGPGAPRRFLFGGNCDPPWRATHPPPRRQGRPAGR +PLSTMQNKPGKGITLLSSRPPAVPPRRGRSPAAPPSPWRPAWP* + +>NODE_9#PROKKA_00316 +MAKADESDRTQLEALFSIGAGVRSLDELLTRRLIFLMGKGGVGKTTLSVALALTAEMMGK +RVLLTEIGDSQGIGRYFDAQPDVRPRQVSSAIWAARVDPKDELTAYLHYHMKSGFIANRI +TQSRLFDYLLAATPGLKEIMTLARIWRWEKAKNKAGTPLYDTIIVDAPATGHGLSLLRLP +KMLVEMIRVGPIASQVNGVQQMLLNPERTALTLVTLPEELPVNETREMIDIAVDEVGIPV +QAVFINGVHPVFVTPDEFSRIQELDRDCPDADPDCPDLRFALDVARRQIVRNAAQQVQMN +EVHAAAPGHVIHVPYYYTNDLGPEEIRTIAASLHRQISEAPRGGGR* + +>NODE_3#PROKKA_00166 +VDESREKSGRRGAGVAENPTRELLDAYASHHRKIARDSLIRLLRNPIGSLMTWLVMGVAL +ALPLGLMLLLASAQSLGEGWSDSSRINLYLKQNVDETAAMNLQGKLRSRGDVRDVQLVTR +KQALAQLRKDSGLSAAFDYLNDNPLPNTLIVAPALQDPGAVQSLSQSLKQLPQVAEVQVD +LAWLKRLRAMIGLVVNAVWALGVLLALAVLLVVGNTIRLAIENRRDEIVVAKLVGGTDAF +VRRPFLYTGAWYGLGGSIVAIILVALFEAWLDGPVNRLASLYGSHFQLQGAGFGDFLLVI +MVGVLLGWMGSWLAVKRHLDAIEPR* + +>NODE_7#PROKKA_00279 +VYESRPQTTPEQHSSSIEGSHPTDAAAAAGSRSVELAQTDFLLRLTDALNTTLNLQTLLQ +RTADLVRTVIDYRIFAILLLDNRTNDLRMRFQTGHRPEVERMRIRLGQGVTGQVALTRKP +MLIPDVRDVENYINANPDVHSEIAIPLIVKNRVIGVIDIQSEQPNYFQPDHLHLLTLTAS +RIAHAIDNARLYTRVSRQAQTLEVLNEISRDLSSILDTDRLFERISQLLRRLFDYQMFTI +WTVRPIEHVLENRFALRFGERYYPNETIPVERGIVGAAIAERRPMNIPDVRRDPRYHKVN +EETRSELAVPLMYKSKVVGVLDVEHTQPHYFSEDHVRALTTLAAQIAIAIENAQLYQRVI +QQEQRLDHDLQMAREVQLRLLPPSLPSRPHAEFAARFLPARTIGGDLYDYLNYDDQRGAL +AIGDVSGKGAAAALYGAVVSGTMRSQASLKPSPAAMLQALNASLHERRLDSQYVAMLYTV +WNDENLTLQIANAGSVQPIYCRSGEIETVPVEGFPLGMFPQAEYEEISLSMRPGDSVFFF +SDGITDGENEAGESFEERLTDSIARHHHLPAEEAVNAVFEELQEFQGDCDRFDDETLIAL +RVV* + +>NODE_9#PROKKA_00304 +MTTPDINFTDLLQFLFMGIQRGSIYAMVAMGVV* + +>NODE_10#PROKKA_00004 +VAEVLRVKELVKHFPVRQGFFGRRQGVVHAVDGVSFTLEENQTLGLVGESGCGKSTIAFC +LLRLIDPTAGEVWFQGRNLAAAGSEELRRLRRDIQIVFQDPFGSLNPRMTVAQIVEEPLL +NHLELSAARRRELVAEGLSMVGLLPEHAQRFPHEFSGGQRQRICLARALVLRPKVIICDE +PVSALDVSVQAQVLNLLSRLQRQLGLSYIFVSHDLAVIRYVSQRVAVMYLGRIVEQAGVK +ELYARPMHPYTQALLSAVPVPNPRRRRRRIILEGDVPSPLDPPSGCHFHPRCGRAMEICR +HQAPELRPLADGHLVACHLYDEVRSAPGGTVEGG* + +>NODE_2#PROKKA_00146 +VVLLLSLAIIIAFRKFSSNIHFLENIGGVLGTVVSASFLTLIGIINFFILKNLYRMFKLY +KKGEGAEKRIEEITENLLNKRGLLNRFFRFAYRSIDKSFKMYPLGFLFGLGFDTATEVAI +LGISATVAKDSQLPIWGILAFPLLFAAGMSLMDSLDGLIMMRIYDWAMVDAVRKVFFNMV +ITGTSVFVALAIGTIEWLQVVSIEAKESLSFFSFLNHLDFSVLGVGVVIIMLISWLSAFV +YYRKVLS* + +>NODE_3#PROKKA_00192 +MARILIVDDSPTEVKKISSLLEKHNHEVLTADNGADGVAKARAESPDLVLMDVVMPGLNG +FQATRQLTRSPDTADIPVVIVTTKDQETDRVWGTRQGAKGYLVKPVKEDELIKTIDDLLA +* + +>NODE_5#PROKKA_00230 +MITIDQTLTPKDLTEDLARFWTLSGQKIRQLASRWNAGDGAPVFTVEGRYTSRGWTEWTE +GFVYGSALLQFDATGEDWFLDYGRRGTREHMGGHVTHTGVHDHGFNCVSTYGNLRRLMDE +GKLPENADERAFLDMALASSGAVQAARWTNLGHGKGYIHSFNGAHSLFVDTMRSLRSLAV +AHMLGRDLKGEHDQSISLIERLAAHARTTAETAVFFGKGRDAYDEWGRTCHEAIFNVKDG +HFRCPNSQQGYSPFTTWTRGQAWITLGYAEQLEFFRALEAAGRPEADDCSDLMGIMRDGA +RATADHYIANTPTDGVCYWDTGAPGLAAMPDHKDRPADPFNEHEPVDSSASAIMAQGLLR +LAAVLAEEGDDSAERYRQAGLTVARALLKAPYLSEADGHEGLLLHTIYHRPNGWDHIPAG +RKVPCGESCQWGDYHVRELALMIGREAAGQEPYRFFNGLV* + +>NODE_2#PROKKA_00109 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_1#PROKKA_00092 +MKRAPVIPKHTLNTQPVEDTSLSTPAAPMVDSLIARVGVMARGNAITLPVCGRDVKFTLE +VLRGDSVEKTSRVWSGNERDQELLTEDALDDLIPSFLLTGQQTPAFGRRVSGVIEIADGS +RRRKAAALTESDYRVLVGELDDEQMAALSRLGNDYRPTSAYERGQRYASRLQNEFAGNIS +ALADAENISRKIITRCINTAKLPKSVVALFSHPGELSARSGDALQKAFTDKEELLKQQAS +NLHEQKKAGVIFEAEEVITLLTSVLKTSSASRTSLSSRHQFAPGATVLYKGDKMVLNLDR +SRVPTECIEKIEAILKELEKPAP* + +>NODE_6#PROKKA_00247 +MIRYNLHQHSIFSDGAAEPEAYVQSALNLGFEAMGFSEHSPLPFPTKFSLKAERVEDYIR +ETERLKEKYNDRIDLYRALEMDFIPGYSENFTEWRKKAQLDYAIGSVHMVQPEDDGELWF +IDGPDRSIYDDGLQNFFGGDIKKAVKTYFHQVNRMVETQDFEVVGHVDKIKMHNQNRYFT +EEEKWYRDLVEETLHLIKEKDFIVEVNTRGLYKKRSNRLFPDDYALQRISELGIPVLISS +DAHKPEELNLLFETAEKRLLDMGLGAVVRFDHGKWKDFPLS* + +>NODE_39#PROKKA_00151 +MGKIVGIRFKKGGKVYDFDAGHFVLSVGDMVIVETEQGQALGEVVRPPVSHVLPELAPKN +RCCEGCEDSGDEPAQLKQVYRLATEEDLRQLVENAKLEKEAFRYCQERIAARRMDMNLVK +VECFFDRSKLMFYFTAEGRQDFRELVRDLVSRFRTRIEMRQIGVRHEAKLLGGLGSCGRE +LCCATFLRDFEPVSVKMAKEQNLSLNPTKISGLCGRLMCCLTYEFETYKGLKQGMPKLGK +RVSLNSGLEGKVIRQNVLKRQLTVILSDGREFTGTPEELEQLEPLAKPQAPPKPRGGQRQ +QRNQQQGKGQQQTNSGGKSRSRNRRRKKKGS* + +>NODE_13#PROKKA_00034 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_2#PROKKA_00120 +MKIIKLAMFSKTQTMLQHKDINKLSELKNGFTQSWVEPDFIFRSLKCFSFSSLNKGLSPL +KAKGYSFEWVMSLLISLPFMGISSVNRLAGVVEAKKDVFYRLKNNSSISWRYIQWLFACK +FNTITSESTGNNIQPRCLIFDDTVIEKTGRFIEKVSRVWDHVQNRGVLGFKLLVMGYWDG +TSFLPLDFSIHREVGKNKEYPYGLRKKDYRKQFKKKRSSQTHGYDRSKEAGQSKIDNMIK +MFKRALSHGFSIDYVLVDSWFTCEAIIQAVTQVKNQTVHLIGMYKIARTLFEYQGTKQTY +SQIRNRLGRPTRCRKLRLYYLQATVGFKGHQLQLFFTRQGKNGKWKVLLTTDCSIGFIRL +VEIYQTRWTIEVFFKESKQLLGLGHCQSNDFDAHIADLTITMIQHMLLTLRYRYDTYESK +GALFENVKETIAIRKLNERLWGLFVELLQILTDLFEIVDAMELLEHIITNGQALERLKLL +FDLVPENNEAA* + +>NODE_2#PROKKA_00142 +MFSKIFINRPITAIVISLFIIIVGIISIFKLPVAQLPKVTPPVVSVSGHYTGANASDVEK +AVATPVENSVNGATGMLYMNSTSANSGSFNLNVTFKIGTDVNVDAMEVQNRVNLATPILP +AEIRQTGLSVKKASTSMLEIVGLYSPHGTHDEKFLSNYAALYIQNALSRVDGVGDVHVFG +NSFAMRVWLNPQKMANLHLTTQDVINAVREQNAMIPAGSVGASPAPKGQTFQVTVQVKGR +LVTAKEFGNIVVGTNPATGSVIRLKDIARVKLGSSSYAGTPRLNGKVGCGLAVYQTPGGN +ALETADLVKAKMEQLSKNFPTDVAWTTMVDNTRFVQSSIDEVVKTLFEVLLLVIIVVFFF +LQTWRPTLITMLAVPVSIIGTFAIFTLIGFTINTLTLFAMVLAIGIVVDDAIVVVEAVQH +NIDRYGLTAKEAAIRAMSEVGGPVVAIALILTAVFIPVTFMPGITGMLYKQFAFTIAISV +LLSAFVALTLTPALCSIMMRPNPVNENSKGLNRMFYKFNIWFDKTVENYGATVRKTIKHA +PLMFILLGAIYIGTGLFSKYTSTSFLPNEDQGMVMAIAQLPPDASTQRTVKVLNQFGKIL +NHNKNVKRYFLAPGFSVLQGAQMSNFGTAFIRLTNWSKRKGKNSSIQAVIGQLMGASSQI +KGAKFMIIAPPPIRGLGRTNGFSFVLKQSTGSIQDLEKVQNKFLAALNKRPEIQMAYSTA +TFNYPDIRVTIDRVKAKKMGVSLSALDNTIQTFLGGYYINDFTLFNRTFRVYAQADSSYR +ANINDLSEYYVRNNQGNMVPVSALVNITRGTSAPVITHYNMDRNVNISGNAAPGYSSGDV +IKVLRQVAQQVLPEGYSYEFSGTTLQEIEGGKTSTFIFILAIVFVFLFLSALYESFAVPF +AVLLAVPIGIFGAYLSLHIGGLSSSIYAQIGIITLIGLAAKNAILIVEYCKMKYESGVPL +VQAAVEAAKLRIRPILMTSLAFDLGVIPLMIATGAGANARINIGYTVFGGMLTATLLAIF +FIPLFYVTIIKIRDRKKKPELVKTED* + +>NODE_1#PROKKA_00060 +MLVVIYALTTILTAFIGNNAAAVLVFPLAYAAATKLGQPFLPYAIAIAMAASASFTTPIA +YQTNLMVYGPGGYRLSDFVRFGLPLNLIVGVISVVVIAWLWMP* + +>NODE_2#PROKKA_00136 +MDKEKKQKRYQRLYKQIQDLIVKSSNNPLSNMATINAVLYHKMETFFWCGFYLYQDGKLQ +VGPYQGSLACINLAEGTGVCQAALTQQKTLTVPDVEAFPGHIACDSRSKSEIVIPVRNRE +NELVGVLDVDSKEHNSFDEVDEAELEKIVRLVYFPEG* + +>NODE_1#PROKKA_00052 +MARIAGINIPPHKHTVIGLTAIYGIGRTRAAEICATAGVDPTRKVKDLSESELEAIRQAL +TAYKLEGDLRRELNMNLKRLMDLGTFRGIRHRRGLPVRGQRTRTNARTRKGRVRRSAKR* + +>NODE_25#PROKKA_00100 +MRSIVIDELSSPDVDRLSEHLDQTLTPSGLSGVYWLELPEDLLLPLQQEHRQSCGPHRVA +VVVEEGCLRLELLVRAQESLRCNCTAYASSAQRDFLLDYLDRLIEELGLRT* + +>NODE_4#PROKKA_00223 +MKTNRLFRWSITGIASVIPLALLAIFIFLLINSWPAIKFSGWHFLTGSQWSLGNEYGDLV +TVNGQEVPPGADYGIGFLIAGTLLSSFLALLIALPISVAASAFLAEAVPKRLQNTLALFV +ELLAGVPSVVFGLWGLVVLVPFMNHYIYPGLVHVLGDVPFFQPPTGAGYGLLTSSVVLAV +MIAPLITSTVRGAIERVPMVQREAGLALGATRFEVLWKTVLPSVRRVVIGAGILALGRAL +GETMAVLMVSGNALGYLPHNIYSPISTMAAFIVSQLDSALEDASGMATHALSEIALILFF +ITLIVNVIARLLLWLARD* + +>NODE_2#PROKKA_00132 +MKTIDNYDFKGKKVIVRVDFNVPLNDQFEITDDTRIRATIPTIQKLRESGGAVILMAHLG +RPKSGPEDKFSLRHVVKNLSEKLQTEVQFANDCIGDEAREKAAALKGGDVLLLENLRFYK +EETAGDEAFAKKLADLADVYVNDAFGTAHRAHASTTIIAKFFPNDKMFGYLMENEVKSLD +KVLHHAERPFTAILGGAKVSGKIEIINHLLDKVDNLLIGGGMMFTFIKGDGGKVGSSLVE +DDLIETANAAREKADKLGVSLFIPKDAVTADKFANDANQKCRPSGEIPDGWMGLDIGVET +SETFRQVIENSATILWNGPMGVFEMDAFAEGTVDVAQAIVRATEKGAFSLVGGGDSVAAI +NKYNLQDKVSYVSTGGGAMLEYMEGKTLPGVAAIKDE* + +>NODE_11#PROKKA_00022 +VNYTDPQELLPVVDAQDRVIGTMTRQEIHAKGLLHRAAHVLLFDPAGRLYLQKRSAAKDT +YPGKWTSSASGHVDPAESYAQCAARELAEELGLEAELRPLGRLPAGPRTENEFVEIFTGV +SAEPPRPNPQEIETGRFFTPAQALKLAADPTRACPSLGAVLELWQELEGD* + +>NODE_9#PROKKA_00317 +MYQIKRYANGRFYDTVEKNYVTREQISKLLGAGKKISIIDTRTEKDITDDIVSRIKAKKQ +NPSKSKKAGKSNKAVDDSTGMLVQLFRKGGDALFDYGKRYASMWQNMVTMSRDEVDKLVN +MLVKDNKLTELEGSKLKKEIDRYRTNIQGWITRNIDNRVNEVLNRMNLANRDQILELTGK +IEELNKRINRLGKEKKGPAKTKKTS* + +>NODE_4#PROKKA_00200 +VIAWVVLLALWLFVTRSRPGKAMLAASMSRTGLALVGYDIGKVYLQVWGLYGLLAGIAGV +LLASFTGASASIAISLTVNAFIIVVLGGLGNVAGSLGAAYIIGLLGTLTAYLISPSVREI +PGLLVLILILYVRPQGLFGRH* + +>NODE_2#PROKKA_00145 +MLLKKLNQNKFFSAFLAVSFLLSGFSVSLKAQGQKAVYQFNLNDCIHYALQNQASVKNKI +LSEKISRENVKEAYSKLMPQVSAGAKYQYTIKRQVSFIQGNPVLFGVPHQLQGYLNVDQT +LFDPSVLGSAKAAHLSENLSKENTQLSKIDVAANVKKAFYGVLVFREQLNLLNANIKRDT +KSLADTKNQYKNGLAQKVDVDRIQVLVNNDVTARANASRNLNTLIQTLKYHMGMPIKDSL +VIKGTISDAMLTEILPENNPMFYKNRVEFQQAQTTLAATKLLKSNVIRSYFPTLSAFYTL +EAPYNSNTFPGLFKDKLYPTSFVGLQLSIPIFSGFNKHYQYQAAKMNIQISKNNISDLEN +NIKLEYGNYFRQYKSDIANLKTQKENTKLAKLNYDNLKYQYDNGVQPLIEVLNAETTLLQ +AQDNYINALYQALVDKVDLDKSLGKLKY* + +>NODE_42#PROKKA_00196 +MSKPKYPFEKRLEVVNHYFTTDDGYRIISARFGVPRTQVRTWVALYEKHGEKGLIPKPKG +VSADPELRIKVVKAVIEQHMSLNQAAAHFMLAGSGSVARWLKVYEERGEAGLRALKIGTK +RNIAISVDPEKAASALELSKDRRIEDLERQVRFLETRLMYLKKLKALAHPTKK* + +>NODE_12#PROKKA_00028 +MALLVSFFGKGRRGACLLGLLGLLLLAPPAVAAGAGYVVAPVNKGADLRLLPSEAWPPPE +GSRADILIKLAYLRGIMDALQYLELAPRSAERVLNAYQGLSLQDLAARIDAFYLTDPRRR +DLPPAVVLFRLLAPAGKDAPAVKGPARPGGQGGRSGK* + +>NODE_4#PROKKA_00207 +MRAPDELINGRWASEITAALPARIHQGVAQHWEADPEAEALVDHQVRWSYRELSAAVAAA +RQWLVGQGVRPGDRLMLVSENGRALVALLLAASGLDAWAAIINARLADNEIDAIRDNCDP +RLLIYTTEVSPDARNHAQRHDADIVHLDPLGEFAVGPAAAQSLPQPVVKDGTQVAAMIYT +SGTTGQPKGVMLTHRAILFIARVSGGLRNLRPGRHVYGVLPSSHVFGLSSVMLGSLANGA +CLHTVPRFEAGALLDALAGERISVLQGVPAMYARTLEYLHQHNRKLVAPALDYLSAGGAP +LDTDLKTRVEATFGTTLHNGYGLTEASPTISQTRIGEDSEASSVGRILPGLDYEVVHLKS +RQPVLQGEVGELRVRGPSIMRGYFRKPEATRAVLDDAGWLDTGDLARIDPDGQLHIVGRA +KELIIRSGFNVYPPDVEAVLNEHPAVTLSAVVGRQITGNEEVVAYVQLAPGHDMTESALS +EFAAARLSAYKRPSEIHILDQLPVTPAGKILKARLRALANQTPDR* + +>NODE_4#PROKKA_00214 +MHGLMMNRQLLISQILEYAAVNYPEQEIVSRTTEGPIHRYRYPELRDRSCQLAHALAGLG +VTQDDRVATVAWNNYRHLEIYYAVSGMGAICHTINPRLPAEQFQFIVDHAQDQYLFVDLT +FVPLLEKLHPQLNSIKGYIIMTDEAHMPETGLPNAHCYETLIKDQPTRYDWPEFDENQAS +SLCYTSGTTGNPKGVLYSHRSTLLHAFSVMAFPGVDFGEESSLLPVVPMFHVNAWGMPYF +ALITGSKLVFPGPRLDGASLAELINSEGVTDAWGVPTVWLGLLRHMNESGERFSKLEHVQ +IGGSAAPRAMINEFQERYGVEAIQGWGMTEMSPVGSVSQPTPFMRERMSAEEQLTVRGKQ +GRALFGVEMKIVDADGKALPRDGKARGELLVRGPAITSGYYRNDEANAKAFDDEGWFRTG +DVATIDPDGYMEIVDRVKDVIKSGGEWISSIDLENEAVGHPEVAEAAVIGVRHSKWAERP +LLVVVRNPDSAVTAEAIVEYLSERVPKWWLPNDVVFVDELPHSATGKLQKTKLRDDFKDH +RFSDDEA* + +>NODE_4#PROKKA_00220 +MQRRYLATLLAGLMAVPAVAVADSGSSSSMPDMNIKLHAHLHGSVDFSNTGGRAVPNTEA +YGEAAGTPARATNLSNNNSTIGFTGQHLVPGAFMAIFQVELALPGSETGVNNSYGKGNHV +SKNVGLHDTYFGIANPLGTLLFQPSFENQGAYLSRPFNMFKDTVGDFNSIIDTANFPNGG +PNGLPAISFAGQANYAISYASPKVKGFDAVLSYTEDANGGDFGTNNTYGSGYCAGTPNTN +HYPNCYGYPNANQHNNAWSFGVQYENEFSSLQSKVNGLINYSQINVQGNTSGVPLGGFTF +ADATQPNQTPPSSKLQLKALELAGKWDYEPTGTTAIAVWERSTGLYSRDAYSLGFSQAVP +GNNDLMVSWIHAGNLSSPLANICDPTKTVCSGSEVKQSGANEYVAGIKHHFDKQVSAYLI +YAYTRNNAEGLYGLGGPNHGQSVYPLNPGDNPQSLSLGMTWDF* + +>NODE_13#PROKKA_00041 +MRFKTLKYILVPVLAIGLSGCASTLTLLSPPSSRLVQGKNTAGAFNSYEYQYAVRGNKIY +IKRTPLCDEVKHVMRVEQKREIGYGPALLELPLFGLGLVDIANAHAISVNSKKVTPLADY +NTGKLMACGPLQPAANEKVIIENKNLNLYRMVRTDKNGVVNLDKVLSGIGNNVNLSVRLA +NNHNVAFSCMYIANR* + +>NODE_1#PROKKA_00084 +MNLPANPRAQRRIDGLMAGQASLPGESGIDQTGRKMYAILGLDLDFSVWERLFQELLQVC +PVHAIST* + +>NODE_8#PROKKA_00297 +MRWQRAMLFVAVLVACGGCSWRGKPRANHVRMYCLVDLEPVAEKLIQGFTESQGVRVDVR +YIRPDELNKRLQRDEYVGLFLYANTWSGSAEENLLRARGNGGGRPAELGRFTPCLIVPKG +NPKGIRSAFDLGKPGMVHGRTRQGACLLARISESGYRKPPKGPDTEPSNIRVRDTDYDVA +RLVAGGSIDGAVVWSFTQSAMADEVEEVESKGLKRYGNVQHMVVGTPMSTPQPGLVKAFI +EFATGPDGQAILRKTGLDVDPKCR* + +>NODE_7#PROKKA_00267 +MSENQVIIVSNRLPLSMTVKFGSLKVGRSSGGLVTALQPILKSRGGTWIGNGGTREDKRM +ARALEEEARRSGFDCVPVFVTEQEDRNFYEGFSNQVLWPLFHDFIGECRFEPEYWDFYRK +VNGKFADAVMRVYNGKQILWVHDYQLMHVAASLRERGCKGRVAFFLHTPFPSYDVFAKLP +WRRHLLLAMLEYDLIGLQTERDVRNLVSCLRRLVPEASMSTDHGVHRVSWHGRTVVIQDF +PISIDFDEFARAANQPAVEERMRTILARMGMGQVIFGVDRQDYTKGIPHRLRAYGELLRR +RPEMVGKVKLVQIVVPSRQNIPGYEALKSRIEHLVASINGEYTQPGWVPIHYIHRAIPRE +ELLALYRAAHVGLVTPLKDGMNLVAKEFCASRIDDRGVLVLSEFAGAAAEMYRGALLVNP +FDLEGVADALEQALQMPGAQQQERMRKLRRFLKHANVHRWVEDFMEEIESVKAPRSRRG* + +>NODE_22#PROKKA_00097 +LHLDQDQQDFLRHIQQVVETQLAPLALEIESQSRFPQQAREIFARAGLFTLAVPRSYGGQ +GADATRLALMVENIARVSPSAALLVFPSNAVLRTIALTGSEEQKERLFGELVQAGDQCLA +FCLTEPDYGSEAFNLQTRAERQGDHYVVNGTKTFITLGPNARYYLTFVRTGPAPKAGGIS +ALLIPHDAPGLGFGPPEKKMGLHGSVTTNMYMKDVPVPVANRLRGEGEGWQVLTRVCNPM +RVWGAAAMALGTAQGLFDQTLAYVKANAERLNPADRQSRDFALADMKMRIEACRSLIYRV +CRMVDDPRTPPQQVDAFVSMSKCYAADTGMETGELASRILGMDLMRPDCLAGRLYLDAKA +IQIFDGTNQIQRLVVAKSLALG* + +>NODE_11#PROKKA_00017 +MEARTLFNGALGVKAHVRGLESVSDNIANVNTYGYKATRAQFSDLLYQEMAGGAGFPQQV +GNGALTAVENMMMQAPLEPTENVLDMAINGRGFFTVKHPDRNEGNRYTRAGQFYLDKDYF +LVNSEGYRVQGFAVDADGNVNVNQVQDIQIDNQIQDATATTSVDLAVNLDASDTTEFRQA +VAIDPTDSGTYNFRMGFQVVDEDGDTQDIAVFYQKLESYTGDAPAGSQSVWKAATFHNDS +GTLTADPSYPDNTFFLHFDTNGQLVGVTTGTPATGDSYTSNAEVSSTSASVSDRLGETFA +YTGAGNTQTLRSTATITFSGTTTAGDTVTIGGTNYTFAALSPSDAAAWLADQINANSAGS +YYAQDDASGTVTLYAKDGTAAAEVSASSVVISTDDTMSLTELVNTVDSGRKATGSLFVNI +AGLTAGSSTVTVAGHTFTYGPAQDFTTLSELTTLINDLSEVDATSSGHNIYITAASVGTS +GNSLGLATNDAANVAVSASTLLNGLDDSDATNIDASATTGSGGGQALKLDRTDVGASATI +DVATTNTLGSNLGLDFTGGNFTQNSTASDGNGTSNTTGEVPLTFTFTKSGSTLTQQVTLD +YSPTDGDDSTMLAGDYETFYLKTDGRGTGYLKYLEIDDQGLITAHYTNGQGVPQAALALT +TFIAPQELLREGDNLWRATAAAGVPTVAQAGDAQTAMGEVKSYALELSTVDLAQEFVNLI +NYQRSFQANSKSIITGDEMLKTAINLKG* + +>NODE_4#PROKKA_00203 +MFCNGNQGYDFSINSHPGHAHQMQRRSHATNSQSGPVP* + +>NODE_1#PROKKA_00068 +MIKVGINGYGRIGRNVMRALYESGRRDQLQVVAINDLGDAQTNAHLTRYDSVHGRFPGDV +QVEQGHLVLNGDVIQVLAERDPSKLPWGKLGVDLVLECTGLFTSREKASLHLQGGAKKVL +LSAPAKDDVDATIVYGVNHKTLEPEKHVIVSNASCTTNCLAPIAQVMHELAGIEGGIMNT +VHAFTNDQNLLDVYHKDLRRARAATASMIPTSTGAAKAIGLVLPELDGKLDGFAIRVPTQ +NVSFVDLTLNLTREVKVEDINRAMREAADGRLKGVLAYNEVPLVSIDFNHNSHSSTYDAG +FTKVKGRLVKVCSWYDNEWGFSNRMLDTAAVMFGRG* + +>NODE_3#PROKKA_00155 +MNKGTVLRVARPTDQLEKIAQMYMEGLGFERLGEFREHDGFDGVMLGLRSHAYHLEFTQC +QHEKAGRAPTQDHLLAFYIPDAVEWVRTCEAMVKAGFVCKPSFNPYWDRLGKTFEDVDGY +RVVIQKEQWLD* + +>NODE_3#PROKKA_00174 +MRPKLSHALAAILAACVMSLAPMTQAETTQSTPSALAMTGDALFARPALLAMTLVGSAVY +VVSLPFSLLGGNASEAGKVLVVDPAKATFTRCLGCTMNQNRQNEQKNQNQVATADNTDTT +SN* + +>NODE_1#PROKKA_00073 +MNATAPLQEQEPEPGNRRFPGLLMGGARQVARWSWQHRRYGRWPLRILLVLLLVLIILVG +AGYGLLRGSLPQTEGTVRLPGLGGRVVVTRDAQGVPTIRAHNALDAWRVLGYLEAQDRFT +QMDFMRRVAAGDLAALVGPAALPLDRIHARFDLRARAERIYLDAPSVERARLEAYTLGVN +EGLDNLSVRPWAYLLLGERPRAWEPADSVLVIYAMGWMLQNPLGPRMRARAALRSLYPPA +VTAFLGAPDTHWAAPMAGQPPALPPVPGTQLINLSASGKSRATAPVPSTAMYADTVAKLI +LPQPFPGSNSFAVSGDLTGTGHALLANDPHLSLRVPATWYRARLIYPAPGATASQPVELT +GVFLPGVPALVIGTNGHIAWGLTNSGGDWTALVRVKATAAGSRGGPLVYATPSGTATLAI +QHVLLKVRGQTARPMSIRRTIWGPVIGTTADGALLVSHWALAQPGGVNLRFMQLDSQTTV +KQALMVAGSAGIPVQNFLVADDQGHIGWTLAGRIPVRKAGCDYAVPQSWADGSCGWTGWL +APGSYPSIVDPAQGYLATANNRVDARTAAVLALGDENFADGARAHQIVSDLKALAKRGKI +TANDLHDVQLDDRAQFLQRWHDLLLNVLSPSALEFHPHRQALREAVVNWGARAAVDSVGY +RMVRAFRNEVAASMFMPILKRLHTRDPGAGLPFSNQLEGPLWRLLQVRPHNWLNPAYPTW +NALLVHAADAVIHRFWNPVSGLADATWGARNTVRINQPLAVALGPLGHWLDMPPTQLPGD +SNMPRVQTPDFGASMRMVVSPQPSAPGLFELPGGESGHPLSPWYSDEFKAWAEGLLTPLA +PGPARKTLRFIPWSRRVSDRPTVSTGSVVPAQSGQ* + +>NODE_6#PROKKA_00239 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_22#PROKKA_00096 +MDFELSEELKMLREMAADFAKEQIAPYADKWDEEHYLPHEEVIKPMGELGFFGTVIPEEY +GGNEMGWLAAMILTEEIARASSSLRVQINMLELGCAFTIYRYSLHDEVKKKYIPKLVSGE +WLGGFGITEPNAGSDVMGMKSTAVDKGDYWLLNGSKTWISNADIANCIIYYAYTDREARG +KGLSAFVLDLKNEDGSRPEGISVTRLDKLGSHSSPTGEIYLDNVKVPKENILGKPGDGAK +IVFSSLNQTRLSAAAGGVGLAQACLDESIKYANEREQFGKPIAKQQMIQDQIAQMAIEVE +AARLLVYKAACQKDAGNLGNTLEVAMAKWKAGEVANFCAQQALRILGAYGYSTEYPVARF +YRDAPTYFMVEGSANICKWIVALDALGLRKANR* + +>NODE_3#PROKKA_00184 +MKLLIQNGQLLDSRTGQVRSGAVLIEDQKIVAVGEQVLDQAADRVFDADGAWISPGFIDL +CCFVREPGDDQKGTLASETRAAAHGGFTTVCASPESSPVNDSGAVTTLILERARKQGCVR +VLPVGALTRGLQGELLSDMASLARAGCVALSNGSLSRGNARVLRRCMAYAKTFGLTLFMR +PENPDLAADGYAHEGVVATRLGLPGIPEIAETIAVGELIQLAEDTGVRLHLSQLSAARSV +ALLRSARERGVPVTADVAIQQLAFNEGWLADFDSRFHCRPPLRTEADRQGLLAAVNEGWI +DAIVSQHQPHDPAAKQAPFGETEPGLSTVESLLGLGLKLVNAGELELPRFLQALTLGPAQ +VLNLPEPRLEAGSRADLTLFNPNGQWIPAPETLLSAGKHAPVLDQPLPGRVMLTLSRGKV +AYADPQTEFGL* + +>NODE_1#PROKKA_00057 +MRKVRVAIVGAGTAGLTALAQVRRRTDEFVIVNDGPYGTTCARVGCMPSKALIHIANDFH +RRRRFAEVGIAEGETLRIDLSKALAWVRAYRDSRTADSIKLTDPLGERNIPGRAELLSAH +ELHIRRADGGEERIAADAVILAPGSTPVIPKSWDGFSARILTTDTLFEQRDLPRRMAVLG +LGAIGLEMGQALARLGLQVHGFELRDRLGALTDPQLIAPAIEHFSREFDLHLGAPAELHP +TGEYWRVETADAQVEVDAVLAAFGRRPRLDGLGLERLGVPLDAKGLPPVDPHTQRVADLP +IFLAGDANARSPIMHEASDDGYIAAVNALDGPTPLNRRVPLVMAFTDPEMAIVGASFESL +PAGSFDAAGYDFSRQGRAIAMRHAEGRLRVYAERNSGRLLGAEIFAPEGEHLAHLLALAL +DRGLNVAELLRMPIYHPVLEEGLRSALRALARRVYDQPPQEFRRLPEGCPGMSSSSP* + +>NODE_29#PROKKA_00106 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_4#PROKKA_00225 +MGTTFRPYSPDQELLLPPSLNEWLPEGHLAYFVSDVVEELDLSALYARYDGDGRRNSPFD +PRMMLKVLIYAYATGTFSSRKIARKLEEDVAFRVLAAGNFPRHRTICDFRKQHLAAFKAV +FIQVVRIAQEAELITLGTLAIDGTKVRANASKHKAMSYGRMQEEEKRLSKEVDELCRQAR +RTDEEEDQQFGPDQRGDELPEELQHRQARLDKIRAAKEKLEADQKERDKARGRSPDDDRR +SPRGGRNFKRDYGVPDDKDQSNFTDPQSRIMKTSDGFQQCYNGQLAVDGEFQLIVANHQG +SNPSDNGCLLPLLNDVKDTLGTYPRQCLADAGYRKEGDLQTLEVNGIDGYVSLRREGRKP +GEIDATRYPATARMAEKLATAAGRSVYGQRKHLVEAVNGWIKHVLGFRQFSLRGLNAVQG +EWDLVCLSLNLRRMSTLMRMV* + +>NODE_13#PROKKA_00037 +MNDPIYQKLATVLDTLPNGFPATEDGKEIRLLKKIFSPEEAELFCDLKLTFETAEQIANR +TARPVEELKARLSVMQEKGQIFGIDMGGVGIYKMLPWAFGIYEFQLPHMDRELAELCEQY +GKTYGKQFFANKPQLMQVVPIESEIKAEHEALPYERVSTIIENSRSFMYFDCICKKEKGL +MDEPCDKPVQVCTAFAPIPGVFDDHPYGKTMTKEEAYQLLNKAEEAGLVHLTWNVKSGHF +FICNCCGCCCGVLRGINELGIDASKVINSYYYAQIDAEACVACGTCADERCQVNAIMEGD +DAYTVIAEKCIGCGLCITTCPGDAISLVRKPAAQIETPPDDEMDWYEKRAQLRGVDISDY +K* + +>NODE_3#PROKKA_00162 +MFGITATEAQSEYEPTEEPQGETKLRVLVIEDDQDVAAYLIKGLKESDYVVDHAADGKTG +LLMAAGEDYDMMIVDRMLPGMDGLNIIKTVRATGNTTPVLILSALGDVDDRVEGLRGGGD +DYLTKPFSFTELLARMEVLVRRTRSSNEPETVLKVADLEMDLLARTVKRAGQSIDVQPRE +FRLLEYLMRHAGQVVTRTMLLEKVWDYHFDPQTNVIDVHISRLRAKIDKGFEKPLLQTVR +GAGYMLREDT* + +>NODE_3#PROKKA_00177 +MTGQDQLQRFLFENSNIRGSIVRLDDTFQQATGQQDYPTVVRNLVGQSLAACALMGDSLK +FQGSLSLQAQGEGPLRLLVSDSTDQLTLRGLAHWNPEAAEAETLPALIGNGHLVITITPD +AGQRYQGIVPLEQDTLAGCLEDYFRLSEQLATFMCLFADEKGAAGLLLQQLPGELAGPDT +DLWPRAIKLAQTLTTEEALQLPSEELIHRLYHQEQVRLFPARATRFGCSCSRERTRLALE +SLGQDDCMALLDEQEVIEIDCHFCGQRYRYDRADVRAVFGGPRLH* + +>NODE_6#PROKKA_00243 +MLIQEASKWDIITYVLDNDETCPANSLATHFVKGSNLDFDSVYRFGKMVDLLTYEMENIN +IEALKKLKSEGHQIIPDPDILELIQDKGKQKEFYQDNNVPTAPFKIYSSRQDIVQAIKNG +EIKFPFVQKLRTGGYDGRGVAVISDENDLDKLLDGASIIEDKVNIAKEIAVIAARNKQGE +IKCFPVVEMVFDPEANLVDKLICPSKITAEQSEKAIEIAGKIIGLLGMQGLLAVEFFVDE +NGEVIVNESAPRPHNSGHHTIESIITSQFEQHLRAIFNLPLGSTRPKLPAVMVNILGGEG +YEGPVRYEGLTEIMAIEGVKIHLYGKKITRPFRKMGHITVLSDSLETALEKAEKVKQLIK +VKSWDKN* + +>NODE_5#PROKKA_00234 +VATLELIVERASGDVLKVELKPGTNVPAIHPGDKVQIVTPAGETLNAVVVGQDVQITPVD +ASGTVGETIVFKNLALYLHDGQSEVAVVNADTGQTTEITDVASLADLGTVPLQMASGEGT +GPVSPGTSSPFQNSDAIDHGGETAGNAAGTLGDILNRGAAGTDGGRAQLAGTGETGAGTG +TGSSTTDHVETPISESTEGGGSGTGTGTGTTTSGHAVDGYIVGATVFADANANGVLDSGE +ASTTTSYNGTFELSGSSGQLVMTGGVDQATGESFKGTLTAPAGSTVVTPLTTLIQSLVEA +GQSAADAQAAVKSALGLTGNSIDLTTANPVEDVENGVSGADDVLAAAIKIQNTVVQAASV +LQGAGGSTVAMSTATNAVFAQLATTLQNNPGSNPITDATAVQNLITGAANSSSLGLSSTA +KTQVGNAASDAASVIDAGNSHINGLSSTGSSLLTDLASAARVAQNGAAEALHDALNAVQG +TSNSANLSTATSSYTGANLTSEIGNASSGLGTVGTASSVGTSGDDTIQGTSGNDTLNGGA +GNDTISGGTGNDILIGGAGNDTLKGEAGNDTLDGGAGDDSLYGGAGTDKALFDGNFSGYQ +IATDSGSSGVITVTGSGTDTIDTTEVLKFKDLTVRMVGDSGGTANGYTSLSSALSAASVG +ERILILDSATDPSTLTLSKKVSVQKIGEDPLISIASDGALVVDGSQLSAVTTLDLSSLPG +TTTVRFTSLGSIASISTASTETLNLSASQLDGLTVSGSGKIQTSGIVATSADLSNLSSDL +SVASGQSLELTAAQASGKTIAGAGNVTVNALGSSAVDLSGITASGTLTANVPSSATLNTN +TDLGTFGVSVASGQTLTLSATLADGTTIGGDGNVTVTGLAAATDLSSVSASGTVTATVTS +TVDISSNTHLGSVDAYQVTGALTLTAAQVGDSTISGSGNVTVSGLAATTDLSGIASSLSL +TASVTSSIDISANTNLTTVDTYQVSSGQTLTLSAAQAAGHAISGGTVTISGNITANTDLT +DISSTLSFDDGDSGAISVASGTKLTVTPSQAAALQTAGQTITGDGTVLIDGNVTADTNLT +NISAAVDFNGNSVSVDSGHTLTLTATQASDTTISGAGAVALSGSDTNADLSSITADITVA +SGQTLSLSTAQLATLDSNAIPIGGDGTVSLTGNATSALNSDLSTYLGSSLNLAVPSGESL +SLTAIQANGLTMEVAGTANITGPAGTTAADFSNISFTGSGAATFTVGADLVISEANADFG +SVSINIPAGRTLTIDAADASGVTISGDGTLAVTGTLDSSVNMSNWGTGAIDLTNVSASNF +SLTQLDLNGSADYHLTYAQVQALSDGIDGNNSDNTLIIDVSTAGGVTYTNNAATIDLDIS +LLGGADRVKFDFGGTTDSGNTLTIRGPLGFGDGSDTLESRHGTISLTDPGLTLSGGPEAL +VANSGFSLTATLFDLLYGSSGVTLQGEGTYIVSIDAGFASGASPTLDLTVLDNFVPAGGV +LPTLQIVATGYSVVGGDDGTGDGIATLSDGTHTITIKLPDDPDNAGTFNPGNTPVIIEID +NGTSQFFLGGLDDQRAYYESETTVYTGSQFADLATAIATDASSLGINAGDIQTIKLGDSV +ILDSDSAVDLSAFGGVIDYNGQTIQVTSGDSLTLTAAAADGGTISGAGSVTITDLGTTAV +DFSGLTAASVTLAISSGTVDLSAISGLDLGNVGITVTTPGAVTLTAAQADGLSISGNGDV +TLTGLGASEVDLSGLTATNATAEISADLTLSSLTNLGSLDLTLDDGVTLTLSRAQLDGRD +ISLASGSATLAFGGNASGLDLSQIASGIAFEVVAGRTLTLSTAQINDGHTITGAGSLVVV +LSGTEVDLASTAIQVTGTRTAVVSTSATLDGNTDLGDFSVSILSGQTLTLSTAQAAEHAI +SGAGNVTVTGIGSTAVDLSTITVTGTKTVSLSADATLDPDTNLGNFAIDTAGHSLQLTVA +QANGLSITGSGTAVVTGLGSDTVDLSGITATASTTVSDTVALAAGTNLGSVAVTVDGTGS +LTLSASQADGHTISGTGAVTVTGLTAGIDLSSLASSLDVTATVPGTVDITGNTAQLATVD +TYEVAGALTLSASQASGHTIEGSGSIVVSGLDGSAAYDLSGITASASTTVSGTVTLDSGS +NLGTVAVSVTGSLTLAAAQATGHTLSGDGSVTVTGLAATTNLSGLASTLSLTAAVTGTVD +ISSNTLLGTVDSYTVSQSTDALSLTAAQASGHAISGSGTVAISGLGAAVVDLSGIAATVD +ATATLSGTEVILTAGTDLGTVALSVGSGQTLTLGAAQASGHAISGGGDVVVNGLSASTDL +STVTVTGTVTATVSADLDISASSQLGSVDTFQVVAGKTLTLTATQANGQAVEGGGNVTVT +GLAAATDLSGITATGTLTATVTADTDITANTHLGAVDSFQVASSTTLTLTADQADGQAVT +GTGNVTITGLGTTAVDLSAIAATGTLSAAISGDITLAGDTNLGDVTLTVGAGNTLTLTAA +QADGNTITGTGAVVITGDVAGYDLTHIAGTLDLTLPVTGDVLTLTDGETVHLTVAEANAY +DSITGDGTIQLSGNATANFDHLTSILGDGVSLAVADGDTLYLTATQASGVTIGGIGTVDA +SGTVTDGDFSGISADLNLTDATLDGTTTLPTVGAGHTLSMTSDQINAASIALADSTATLH +VAVSFDALSSSNDALPEIDISEIRVDGSNSPEAVWNSVDVASGSIVDKFKLFWISADKQY +YDSTPLGQDVDANRAFVELGNLYAAYLAGADGELGTADDGTPILDVVQTKSGGVADYDAR +QQSLHDNLLGNLSDGAIAGRFGTDDPRSDLAKLFGDRPYLAGSVDGNGLYTNDDSVAAVV +GWDLYHGLDYTASLSGGYAVLDGDNSVTGTSGSDYIYAGGGDDTVSGGDGADVLYGGSGD +DTLNGGAGDDTLYGGSGDDTLTGGAGADTLSGGDGTDTASYAASTEGVTVDLETGVGTGG +DAEGDTLSGIENVTGSAEADALSGDANANVLDGGAGDDTLTGGAGDDTLSGGDGTDTAVY +TAALTTDNITFDTDHWVVATDGAEGTDQLSGIEVIDHGGSGNILLVGGDGFASIQDAIDV +ASDGDTIMVAPGTYAESLTIDKSLTLIGDPATGDAGAGTSAPQILGSTDWTLATVSIEAE +NVTFSGFDVTNDTGPYGIHIKAGDADVSDNYVHDINGALSGDGIRAIFINPVDNVTVSNN +IVEDFGNADNPSAASYTKTAAGIYYWARGGTLPGGTADIAELHNVTIENNVIHNDGLPTF +TGTSVLGIWVGSSQGGSVLDTVSIAGNQISDLHTDNADRLTGGILVNHGSNPDGVDPLAS +LDTPGVTTGLEISGNTIDDVSGASVFAVGLRGQTPDASVIDNVISNLALASGSSDGLLAS +ISFQYNTTTHSVSLSGNDLGGYDLLQVGHDTSDDTLTPATADDTLTAIDGYDNILVGQSG +DDTLTGGDGNDTLLGGNGTDTLTGGGGTDTLDGGDGTDTAIFTGTRADYTIAVDTDGHLT +VTDTGGTDGTDSVSNVETLEFTDTSVSVLTVTETGANGTYSSIQAAIDAAADGDIIYVAD +GTYTQTGTLNVDKAVTLIGQSEAGVVIDASAVHGYGILLTADGATLSDFTLNGPQGGDET +VWSSYRVDYGIKVSPNGTASSLSDITLQNLTVSGSHNTEIDFNGIHDSTLSNITVDGGTG +VAGNGISLTDSSNITVNDVTAANNPWGGVAIYTDGTHYAGGSDGVTFTGDYTYDAGSTGA +SPIYIQATGNTYPVTNLTLPDGYDFAVTNSEYRADGNEFTFFFTSESDATAFGNSLGAGS +FVSTPDADTLTGTANADYLYGGGGDDHLSGAAGDDRLVGGSGNDTLDGGDGMDTAAVEGN +RADFTFTDDGSGHLVMSDTQGTNGTDTVSGVETLSFTDGNVLVVGAGSEYATIQSAIDAA +ASGDTIVIAGGTYAESLSLDKALTLQAVSGADVVIDPASGNGLTVSGDLAGGDVTVSGLT +FTDGTMGIQVAANADVGTLTLDGVTVEDNLQYGLRTDSGSMAAVIVTDSTFGDNGTQNVN +GSAQMKLYNFDGDATFTRVDLVGAPAGTDQNSRPDYGIELTGLSNTGLAEGGTSPDLGTV +VFTDVTVSGEFHKIGVAVYNYGQIDGLDIQSLDLSGTETNWGPVFNIDGVEDSTVDARNY +NITYPAGDAIVAELQGEVPDQTATDTTIYGTDANERLMGKAGDNVLHGGGGNDELYGADK +PGNPAEDDSGNDKLYGEAGDDLLAGGAGADILDGGDGIDTASYARAGATEGVAVDLANGT +ASGGDAQGDVLSNIENLVGSSYDDTLTGDGNDNVLTGGAGADALDGGDGSDTVSYAGSSA +AVNVDLATNTVSGGDAEGDILSNFENVTGSSHDDTLSGTSGDNVLVGGEGNDTVVYTTTV +AASDVSFDTDHWVVTTASAGTDQLSGIETIEHGGGSNILLVGGGGYATIQAAIEAAGAGD +TILVAPGTYAPFATSFGGPANITVQAMGDPGDVIIDATGGAPSNGRILDLRADGMTLDGF +TIEGPGHAGVGISINGQGITVENNVISNVLTGIQTGTQYDTGNVTITGNTVDADYGISLQ +NTANTVTDNTVHATTEGLGVLDVAATLSGNSFTVDAGGEGLALYGGATSSTFTTSGNTVT +VGEGANLQHATDLAGTDGTLNIGAGTYEQVISIAKDGLTVNGSDATLVVDGSSSDVNGIA +RVDAVTIYGDNVTLQGLTIVDSLVDQSYVTYGWPETTRGIVVKNGAENFTLTGNTIESTR +NGILINGIDNTGSVTDNVIDNTKSGISVQYTDASGIIIAGNQEGTYGNEWGLNLHLNGYW +DGTTYTSNNADNYPILGTAPTADWQASLLGLSTGNDGWAVMDQAYALYNRTLVTVDPDGS +PSSFSNQGSQRSPISTIQNGVDLVVAGGTVHAHAGDYSGESVTVHVDNLILDGDAGATGI +TVQLADGLSNLTLAGEADYTATGNAADNTLIGGAGDDVLTGGGGADTLTGGDGSDTASYA +ASAAAVDVDLGSGTASGGDAAGDTLTSIENVIGSAYDDMLTGDAGDNVLQGGAGADSLIG +GAGSDTASYADSSAAVDIDLAAGTIAGGDAVGDTYSGIENLTGSAYADSLTGDSGDNVLT +GGAGNDTLTGGAGNDTIDGGAGTDTAVLSGNRATYTLGVNADGNITLSGEGTDVVKNVET +LQFADDSASILVVDPGTSGAYATIQAAVTAASAGDIILITGGTYTENVTLDKQVTLLGAQ +AGVDADGRTGVTESVIEGNITVSGAADNATIDGLTIHNGASVGGDLAGVYLASGATGTEI +TNTIFTRDGTVDGDSSRGILTTYNGGNTDVTIAHNSFSGWATGTYVNPGSQDIQITDNQF +DGNYVGLSVDGPNGAVVTGNSFTGNQFEGLGIGPGTGISGITLSNNSFADNASQVGVYTD +AIDVNALSGNTFDGAVVISGSDTVYASIQDAVDASSDGDTILVYPGEYSELANYNPTTGE +NSGTGNPLGLLINKSVTIQGVTADGTYITDAGDVAATVTSGAQSNWGTNFFVTADDVSIN +GLDLVATGSTGQPYVNKSIEVVGDGFTLNHSVLGAADGLPMYTAVYVNDWSVDSGFTASA +IASYAVLNSQLYGDMVVTNGPGTGYTADQLDMRIVGNSFLTIDGGIPNDGILVTGNDDNI +AWRNASAALPTEISGNDFGDASGVLWVRGDGTQDFPTTAEVNGILADNSVPAYAYAVDGN +GDLAAGTYGSSSIPSLAIRATAADFAPSELSGAGAESLMVQQAGETTPHSYSLIVGADGV +ADSLTGTSGDEALIGGSGDDSLSGGGGNDILVGGDGNDTLTVGDGSAVVYGGDGTDTTAY +STTVSADDISFDTDHWVVNTSSGTDQLTGVEAIDTGGSNKILLVGGDSGYATIMEAVDAA +SAGDTILVAPGTYEPFSMGYWSPSDLTIQGMPGAVIDATSISTPARIVDLTAEGTTFSGF +TIVGPGDVDDAGISVGISISAQGVTVSDNTISDITTGIQNHTPADQTGASSILDNTISGA +NVGISLQNVNNTVSGNTVTTVEAHTLGVGEVALGVLGGDNTITHNTFTVSNSGKAIGLPD +LPAVANLTTSENVVTVGEGADLQNAADLAGTNGTLHVGAGTYAQELTITTDGLTVTGDDG +ATIQVADPGVYSPSSDAFAARTIAFTIAATGVSVSGFEINGPLSAYTYTTTDFATLGYTY +GFFINDGVQNTTLHDITIQDIRTGMSFEGDNTATVYDNVIDNTRGAFLVRSDGVDLHDNS +FGSTGNEWDLTMLAGTPSDYFGDPLTDPGTYGDNMMALSAANNDMTIADRMYGEGGVLAR +AASDPDLADQYAAVANRSHVEVLAGADNDTSAGLGETRGNGFGTERLPVGTLQDGVNAVV +QGGSVHVQGGDYSGESVTVHSDNITINGEASAIGIDVHLGIGLSAITLEGSADFTATGND +LDNTITAGAGDDILSGGDGADILFGGDGNNTLTGGDGADKFMISAHTDGSKDTITDFGQG +DSLDFHDVLSDPTDVVFTDDGSGNTQITTNAAPTIVLAVVEHVEPASLTVDDHGNVTLAQ +TS* + +>NODE_3#PROKKA_00152 +LHACRSTLEDPTRKQPRNTSTAKAYLDEHFQHKLTLETLAGVAHLSVRQLNELFRRQIGM +TPHHYLTEVRMQQAWQLLEGTDLSVQAVAERVGYSSLAAFSDRFHQHFGHPPSHFRRTGK +TLRQNR* + +>NODE_4#PROKKA_00221 +MFDRHHQVWPEFAPLHLTLPETSICYNLEVTAHRYPHKDAIIFYDRRISYGEFQRQVEIL +AGFLAREMGVEKGDRVLLYMQNSPQWMIAYYAILRANAVVVPVNPMNRRGELEHYASDTQ +ARVILCAQELFDQVSPLLGEEGLSRAVVAAYSEYLPDQTDLPLPDAVSEPARAINQSGVV +PWRQALAGEPAAPKALVGPEDHCVFPYTSGTTGAPKGCIHTHQSVMATLVGAVAWNPATA +DSVTLVSLPLFHVTGMQVSMNAPIFVGATMVIMTRWDRRVAGALIERYGVTEWRNIVTMV +IDFLSDPEARNYDLSSLRAIGGGGAAMPKAIAERLHEMTGLTYIEGYGLSETIAATHVNP +VDNPRAQCLGIPVFDVDCRVLDVASGQQQDVGEVGEIVINGPQVFKGYWNRPQATAEAFT +EVDGKSFFRTGDLGYYDEQGYFYLVDRVKRMINASGYKVWPAEVESMMYQHPAIRESCVI +SAPDERRGETVKAVVVLTDDAPADVTEAAIQQWCQDNMAAYKVPRIIEFRDSLPRSATGK +IQWRVLQEEERERAAG* + +>NODE_12#PROKKA_00032 +VARLSEDERRIRRMVGSRIRELRQALDLRATELASRAGISQSQLSKLENGKAAISIPVLT +SLCRVLDRPLSYLFQKEEEIPRVLGTMTTVSGPENRGLEWFAAEVNRRSGGRMSIIPLWA +TMLGSAPDQVAMLRQGVIDLFIEELIFYQHIAPAVKLISLPYVFADDAHLLSFLESPFFQ +ERVHGPLTKSGIRILNRRWNWRRGLERVLVARQPVTRPEEVKGLKVRIFDSPALARFWEE +LGARPVVVPWPRVREAWEAGEFDLLPTHRSHLYPLGFCRQGRFVTLLGDVPPALAVTVNE +QKYLSLPPAVQAALEESCDAAGGFFSIEIRRAEVDNQAANLAEYGAVYLKVDLEPWRRAA +GRVVERMAREGAVDLDAWQAVQELRPAGEGA* + +>NODE_10#PROKKA_00010 +VEVDISLFSNQNQFVILRVGEQAYALPAAQVREMQVLPEVTEVPRAPAHLRGIISPRGEV +LPLFDLRRRLGMRSLAEEADELLKILEAREQEHKQWLEELESCIREEREFTLPTDPEKCA +FGQWYQNFTTEDLALASVLERLAAPHRRVHEVAGAALEALEKEGQAAAQEVIDRARRIIL +PKLLELFAELKRLIRETHQEIAVILESGRHTLALAVDNVDSVELLQPKDLQNLERFGPVD +GSQDLLESVGRRANGETVYILKTAEFFQAATDLTF* + +>NODE_10#PROKKA_00008 +MPLGKGRGGALPWDPRAEAALGRVPFFVRSLVRRKVEERVAEAGGRRVGLEDFQEAEAAF +RAVRAGKSQKELEAMLPAENRPGVEMVVVQACRSRLSNCPNPLIDTQKWLERVQAWVEEL +DLSERLRRRVADDKILFHHKLKIAIAGCPNGCSRPQIADLALVGMTRPRLVEPEVCTACG +ACAEACPDGAVSQDDGPPEFHRELCQGCLSCSRACPVGGIELDPPGVRVLMAGKLGRHPH +LARPVMEATGPEPVLAYWTRELEEYLASAPPGRRFSAWWLEQHPAG* + +>NODE_3#PROKKA_00183 +MSFLIFVALLVALWLLWTISRNTADALDKQTAVQYEIIALEKRMEELSEALKAQSGETAK +PARRSSSSRSKKEDEKEKKEGE* + +>NODE_6#PROKKA_00249 +MNTVSLSGSLRENVGKKDAKKQRRLGKVPCVIYGGSEQKHFTLDQLEFKPLVFTPEASVV +NLTLGEKTYECILQDVQYHPVTDEILHADFLEIHSEKPVNIALPVELTGTAPGVVKGGKL +RLKMRKLRVNGIIKLMPEHIVLDISKLDIGRSIKVRDINQANLTFLDPGNQVVVAVVAAR +GLSAEEEAEEAEEGEEGEAAEGGEEGGEKSAE* + +>NODE_2#PROKKA_00113 +MKKFHLLLLSLLSGLLLAAAWPLHGFTPLIFIALVPLFFVQQQMGDTGKRGMLLYAWLTF +LVWNGLTTWWIWNATPVGAIVAIVLDSLFLAIVFQVFHLSKKWLFNNKQGFFILIFYWIA +WEHFNANWDLSWPWLTLGNVFASKHLWIQWYEYTGVLGGSLWILSVNILIYNIIKSFLEK +RKQRALYTTILTVLFIAVPIIISLNIYHHYKETKNPVNVVVVQPNTDPYTEEFNLPPSAL +IKRNLKLAEQKVTDSTDYVVFPESTIQEQIWEGSLNRSQSIKTLRNYVMEHPNLSMVIGA +STFRWLKPGEHRTNAARFYKKGLYYYAYNTAFFIDHSPYIQVHHKSKLVPGVEKMPSWPI +LKPLEHLALNLGGTVGTLKEDDHVSLFTNDSSGTKIAPMICYESVYGDYVRQYVAHGAEL +IFVITNDGWWGNTPGYRQHFSFAILRSIETRRDVAQSSTTGYSGFVNQRGDVLQRTKYDE +KAALSQTLNLNDKLTYFMKKGDYLAHLAGFFSILILLAAIVQGFLKKRNLPH* + +>NODE_4#PROKKA_00212 +MVERKHGGLIVAEHLQAAGISHLFALCGGHISPILVQAKALGIEVVDVRHEASAVFAADA +MARLTGRPGVAAVTAGPGVTNTITALKNAQMAQSPVVVIGGATPTVLKNRGSLQDIDQLA +LMKSLVKWQTSVGTLAQLDEAMRYALEVAAQGVPGPVFVEAPIDLLYPRDLVHSLYADQA +GLDKMKGPVGRLLRGGLDLYLLRQERQPALSVHPNLKTLTEPAAEWHAARQLGEVVKRLA +SAQRPALVLGSQVLVNRTAEQAKDIADAVERLGLPVWTGGMSRGLLGAEHDLLFRHHRGR +ALAEADLVIVCGFPLDFRLKYGRGFAKGATLVSVNLSLHDLLLNRKPTVPVLAHPGDFLQ +ALADRMSSRAAQWRAWLGELGKRENAREEEIDQQAAAPADKVNPLHFFRTLDRHLGEQDV +LVVDGGDFVATGAYTLKPRGPLAWLDPGVYGTLGVGGGFTLGAAAARPGSRIWLIYGDGS +SAYSLAEFDTYRRLGLAPIAIIGCDASWRQIAREQVEMLGDPVGTDLRDTDYHLVAEGYG +GHGILVEHNHQIDAALAEAIKLSDAGTAVCINLRLAVSEFRKGSISM* + +>NODE_6#PROKKA_00253 +MHYNPNAFALSLRNSYSKIIGLIIPEITLYAFPSMIRGVSEFCYNAGYNVLILSSNESYK +REVQNTELMLSSQVDGLLVAITKETRNHKHFDQLEKEGIPVVFFDRVFNNYGTSKVIIDD +RRAAYEATEHLIKTGRKNIAYFGGNAALYITQQRLMGFRKALSDYNLEEHDLVFADDSHM +ARNKALQIFKRKNYPDGIMSISDEVLTGIIPALQELNIKIPNEVGVISFSDGPISQMYKP +AISIVHHSLARVGQVAVDLLIQRIEHPEDMHQQIHIIDTELIARGSTAIGK* + +>NODE_1#PROKKA_00078 +MNSSPRLQTGISQRLGLSPRQTEALRLLALPRPDLEQILETALEENVMLERLEPETGEGD +PEVATVMEQTEPAGEWDELSWSSSAGTGERPDMQTFEDIRPPDLRQHLIEQLVLERFSDR +DFLIALALVDSLDDNGYLREDLDTVSQELDALDPSPELIEIEAILHRVQRLDPIGIGARD +TAECLSLQLEALPPDTAGLVVARELIDGHCARLTQADMATLASLTCSDEDSVRRALSLIQ +SLNPRPGNDYSAQTAEYLIPELRTYRTPDGWQVELYPGNHPRISINATYVAWLSANRLNE +ASQSLTRQLEEARWLIRSLAQRENTLLRVARVLVRRQTAFLDQGVMHLAPLTLREVAKEL +DMHESTISRAVQGKAMSTPRGVITLRHLFSNALSNDNDEAISARAVHERLRHLLNHEDPA +APLSDAALAAALARDNMPIARRTVAKYREALGFASTRARKRPAHSVAISKG* + +>NODE_2#PROKKA_00130 +MMKKLMNILFLSCLKATELIEKKLYFKLSLKEKVQLKAHKMMCDACTNYEKQSIFLDKGI +SHLNQSKIKKEDLEEIKKSIQQKLNELK* + +>NODE_3#PROKKA_00188 +MSEANPYIRNQFLIAMPYMQDPNFNGTLTYICDHNDQGALGLVVNRPLDFSLGEILEQLD +IECGHLDVPVYSGGPVKVERGFVLHRSRGEWQSTLEISGDLSVTTSRDVLEAIAEETGPE +DYLVALGYAGWGAGQLEQELAGNFWLTCPADPDILFNVPWQQRLPAALARLGIDWSQLSD +SVGHA* + +>NODE_17#PROKKA_00045 +MSETPRRPRDSRYMWRGIRPSEEELKTILEDHAQWLERLRSWEYSWREFIEEIPPPHDLS +GADLLEADLSDADLTWAKLSNAILFEADLSNADLREADLSNAKLWWADLSDADLTWAKLS +NAKLLAADLSNAELWWADLSNAKLIKADLSNADLTGADLSNCDVTGVRYHGPWLGIPFIQ +IRKPNKLTCRGIRADTCYGSPRFRRDVMDQDFLEEMRETTGGRWLYRLWWLTSNCGRSFI +RWAFFSLSLAVAFACVFCSSLGPECFDLHRAEGSRWVAEVAGRYLEVDPAYLGTTAASRG +LPGDFWTMLYYSLVTFTTLGFGDVVPLTPWAAFWVTIEVVTGYIMLGGLVSILASKMARR +AG* + +>NODE_1#PROKKA_00067 +MPTRRELANAIRALAMDAVQKANSGHPGMPMGMADIAEVLYNDFLRHNPANPHWPGRDRF +LLSNGHGCMLQYAALHLSGYDLSMDEIRNFRQLHSKTPGHPEYGHTPGVEVTTGPLGQGV +ANGVGLALAEALLAAQFNRPGHKVIDHHTYVFCGDGCLMEGISHEAASIAGTLGLGKLVM +VYDDNGISIDGEVKGWFHDDTQKRFEAYGWHTIGPIDGHDAEALKKAFAEAQIETKRPSL +ILARTIIGFGAPDKQGTAEAHGSALGDAEVAKARKELGWKFPPFEIPESIYAGWDARARG +EQAETEWHERFAAYAKAHPQLAKELKRRLAGELPADWATTVEQHIAHVARNGKAQATRKA +SGATLAALAPTLPEIVGGSADLTPSNDTCWPEAKAVKPGTPEGNYLHWGVREFAMTAILN +GMAVHGGFVPYGGTFLTFSDYARNAVRLAALAHYPTILVYTHDSIGLGEDGPTHQPVEHV +ASLRAMPNLTLWRPADDVETAVAWRDAIERRDGPTMLVLTRQSVPHYERKAQQIEAIHRG +GYILHEPQNAPRALIIATGSEVDLAMQAARVLTEENLPVRVVSMPCQELFLAQDVDWQEH +VLPAQVTARVAVEAGVSMPWYRFVGIHGRVVAMERFGESAPAKQLFEEFSFTAERVAAAV +REAVAAAAG* + +>NODE_3#PROKKA_00170 +MHLWRIVWVLIVLVAVLGGVYFLFPGTIVNADKSFELWRAGLAVHDINVDDQHIHYVDSG +GQGRVVLMLHGFAADYYSWPRMARYMKAGYRVIAPDLPGFGQSSRIAADNYGISQQAQRM +HDFLRALNVDKVDIVGNSMGGWIAAEFAARFPAQTRTLTLIDTGGITAPHPSPFMQAVEK +GENPLVVHNRAQFNHLLTIVFHHQPFIPGPLKGYFAKQAVEHAAFNEKVFKDLTDDYVDL +EPLLPKLTMPTLVMWGRYDQILDPSCVEVLKAGLPNATIKWFDTGHAPMLEQPKASAEVL +KAFLQANRGD* + +>NODE_3#PROKKA_00187 +MPDLKPEGQRQVMAFDFGLRRIGVAVGQEMLGTASPVTMIGARDGIPRWDEVEALIADWK +PDFFVVGLPLNMDGSESEMCRRARKFARRLHGMYHRDYAMMDERLTSFAAKSAIVEREGG +RDFGVKGVDDLAAVLILEGWFLQQRDQQPKIPTP* + +>NODE_3#PROKKA_00169 +MPPTRRSPPRQQRVTAKKAGGLSRLRIIGGQWRSRQVPFPPVEGLRPTPDRVRETLFNWL +AGDIPASRCLDLFAGSGALGLEALSREARHLVFVDTASEVIRTLRENLRTLGCQQADVFQ +QDAEQFLQRPPATPYDVIFLDPPFRQGWLDKVIPLLQQPGWLKPGGWVYVEHEAELNARP +WPSHWHEHRQKEAGQVVYRLFHVADALKDTAEGVERAS* + +>NODE_7#PROKKA_00275 +MRKRKPKVDRIFHALGDPMRRVMVELLRKRPYSVSALAEPLGITLTAVGQHLKILEEAGL +VRTEKLGRVRTVQLEPEGFAVLEAWAAEHKGEWALRLDRLGDVLADDG* + +>NODE_6#PROKKA_00238 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_7#PROKKA_00263 +MVRMTVLASGSKGNSTVVSSSRTRLLVDAGLSCREIFRRMQAAGEDVESLDAILVTHEHQ +DHVQGLAVTARKLGIPVYFTEATHRAWMRWMTPRKRLTYAQWLEQQKAIVAAGKEPAAAG +AEAQDAAEEDISGAKPKKDPCALPAVEYFSSGTDFQVGDIAITPYTVPHDAADPVGFVFE +AEGVRLGIATDLGYVTPNVHLHLKKCDVLMLESNHDVDMLRDGPYPWSVKQRVMSRVGHL +SNDAAADFLENSYDGRAAFVVLAHLSESNNLPELARVSAERALRDRMNLLGNKLMLASQQ +TPLEPIVL* + +>NODE_2#PROKKA_00128 +MMVMSQLDQGKILSMTGLSVKSHSFFIGNPNVGNKAFSADPAAGLVIPVRVNVYEENGKT +YVSYFKPSDLFGSFKNAKVKMIGQMLDKKLGMMLKMVTR* + +>NODE_29#PROKKA_00107 +VLKASFLASVVSFLCFCPWNEQWKSELIANNFV* + +>NODE_6#PROKKA_00252 +MAVEAVRGLLEKTGTKPEEVDLVICATVTPDMQFPATANLVSYKLGINNAFSFDMNAACS +TFIYALITGSKYVESGEYKKVIVIGADKMSSIVDYTDRATCVIFGDGAGAVMLEPTASDE +PGIMDHRFYTDGAGWIHLHQKAGGSLKPASHATVDAKEHFIYQEGQPVFKFAVTRMADVA +AEIMERNQLKSEDIAWLVPHQANLRIIDATARRMGVSKDQVMINIQRFGNTTNGTIPLCL +YEWEPQLKKGDNIVLAAFGGGFTWGSVYLKWAYDGKK* + +>NODE_12#PROKKA_00029 +MQRSSGSITQLARLVESLAELLRLHYGPDGLPAAQEEDLCSQEVEGRRLGELLAELWPDS +GPWRGPCLGQGAIRETERLLRYRLQQAEAQNRALATRLERVRRQVAAQRRTLLEQLRAAR +LQVEQGQARLEALSAELGGLRAELERRRERARRRRRYLEGVVPRGMNRFADAGGRILDQA +TLRRATRELRELQALVVRTKDACARLEHRWARARLELARARSHEAGLRAELARLEPYWQA +KTHRLARAKVVLAARQEELGPLERNLHRLRVMGLAHAEVVSRGRAALEPLLAPLASGESP +DPVESLEESLTQAGEQARRGRRLTALMERLGRRLERRLEAIGPVLKEQRRLNKEITRLEN +ELPGLLEPLLAGDGADPRNRQEAGARFSLLIARLEDLIPQARATQEQLDELRQALTIGLS +RGKAWQAAWRRAGKAERAALHQAQALVEEVRLAARQAARQAEHLRRRAEPAVKALSPLRS +QDLLPSLAALAQGVSRGQLKARQLEARAAELEGRIPRPYFGNLSKPPVALKPVSAGLRRL +SGKQVELERLAALERAARRWQGLLDGPLVEEIRRPVEQVALRLARSLTLLERQKNLLASR +HQKQGRELSTLKAELDQRRRREELARRRLEQVRDRNRRQQRTIRNYETELKQAQTRAALA +QRLEDELARLGEHAQTLARRLERSDKLAAALKRKSLERHRLYRRSQYAVEWLDYWRERAL +EQEKLLSSARAELELARREYQQARSLLASAVSERDQALKELATERAARARQALDLLGGKA +LSVELAASRSEAGRWAKLAQDMALALAASGEHHRQETADLRAQVDQLSAEAAMLKRQLER +IAAMVEVQVPGLEELADLPPAPSWRRPVALRLVPLGPKQVAQALDRLSAARRRLQNLGRG +TLGHWALIAALTCGLVLTPPGTPSKATRADAPLKPPRPVLRHLAQGSPLTPIYQVPAQAR +LLGDKVARGSLELNLLPLRGQPVAVPQSVKRRLKELAREAGLSPKVLLTSARALYAGQAA +VDPSALEELAHTARQLARRHPLIFRELSRRGLPPAASAVAALAPEPEKAQHLFLDRLYRE +YRSLGFSAEEALGALAANQRAFHRLTRQWTPPRRFIGKVQPVEAVEKMGLREFLQKITPY +IQSKLKVFLRQRGMSYSGDLTLYAKNLAFDMYCAAKKFQVPVTLLLAIAHQETWYANVLG +DANRSASPFQIYEPTRELIIKSMAEAGFVPPPKRIKLQRHLTMATFMASFHLRELMQRAY +TPPRRGRQAVVNLDRVLQRYNGSSRYAAQVALRKRQLARFLRRQG* + +>NODE_7#PROKKA_00283 +VRLAQDAPVSFDPDTFKVTPPDPAVITPVLAQLEFNQLLNQFAAPPPKADYRRLSDPEEI +EDFLKPVARKKRLAIDTETTSIQPMLAELVGVSLCHQAGEAVYIPVAHNLTPGQSQADKE +AVLQTLAPVLADPAVTKIGQNIKYDLIVLGRCGMEINGPLFDTMVASYLLNPGKTSHNLA +SIAAEFLGRSVISYQEATGGKNRPFADTDLDQATDYAAEDADVAWQAAQVLEKKLAESHL +DGLFRDLEMPLVPVLARMERNGVGLDVQGLEDLGKELAAKLDEIERTCYRLAGHEFNLNS +PKQLAQVLFEELGLTPVKKTKKGKTSSTDVSVLTVLAAKHPLPAEVLNYRTLSKLKSTYI +DTLPKLVNPQTGRLHTSFNQAVTATGRLSSSDPNLQNIPVRSEIGERIRACFIAEKGNLL +VSADYSQIELRVLAHLSRDPLLVEDLTKGLDVHTQTAARLFDVMPELVTKPMRARAKTVN +FGILYGMSAFRLAREQGISRKEAQQIIDKYLGRYQGVARFQEENLRQAREKGYVTTLLGR +RRYLPAINAGDRLARQAAERMALNTPIQGTAADIIKLAMLAAHRLLEERFPQALMILQIH +DELLFEVPASQAEDLAQAVKQAMEGVIELAVPLVVDIGIGPDWAQAH* + +>NODE_2#PROKKA_00149 +MKKHLNINEPTRIRITKEFKFEMAHALKGYDGLCRNIHGHSYELMVTVSGFPIEEENHPK +LGMVMDFGDLKKIVKEEIVGQFDHALVLSKKMPVPLVDELKNQFERIILTDYNPTSEMML +IDFAARLKARLPENITLKHMLLRETVTSYAEWFAEDQD* + +>NODE_1#PROKKA_00062 +MNAKPHVLVLGGNFAGLGSAQKIREFAGDAVDITVIDRKNYLLFVPNIPADVFENKDPAV +GQRLDLPPVLVKDDIYFVQGEVTELDVDNRIVHYTPSERPGAAPQKIAYDYLVVALGNRL +AFDKIEGFDEFGDSVSDIYLGNKLRKKLWEGGYKGGPIAVGSAMFHQGDGAKGLEPYPGG +SIPDALAACEGPPVEVMLSAATYLKKTGQGGPEKITVFTPAELIAEDAGEKVVGQLLDIA +SGMGFNYVNNAKDITRVTAEGVELANGQTIEAELKILFPDWVAHDFMRGLPISDSEGFVI +TDLLMKNPKYPEVFAAGDAAAVTMPKLGAIGHQECDIVGRQIACAVGRMNEAAANTPLQP +VVYCIGDMGDNQAFYIRSNSWFGGDTQVLKMGHTPFLLKMQYKNLFFRTQGKMPDWGLDF +SELMAEKIAS* + +>NODE_3#PROKKA_00165 +MGTSLQVMDKLIPGANVQAYIQGVNAIPMLTVEEERELAARLQQDNDLEAARRLVLSHLR +FVVHIARSYSGYGLAQADLIQEGNVGLMKAVKRFNPDYGVRLVSFAVHWIKAEIHEFILR +NWRIVKVATTKAQRKLFFNLRSAKKRLAWLNNDEVTAVAADLGVEPRVVREMEGRLAAQD +TAFDAPTDDDDDNAWQAPAYYLEDRRYDPAQQLEAADWTEDSNSRLLEAMDSLDERSQDI +LRERWLSESKSTLHELADKYGVSAERIRQLEKNAMKKIRKMMGEESIA* + +>NODE_7#PROKKA_00271 +MHPQRSRAGHNNEPRHHVGKERAHHHVGPRSRVVLHSNALLHHRRLQIELHPRRNRRADD +ADHHEQVAVLPPHRAVRQRHRLQYGQVPVGRGQHARRHISNVENRSHQKDLFHPLVIALD +RQQPHQQRAHRHRGVLRHVEQLQAARNSGKLANHVAKVHHHQQHHHHKGDAQPELLADQV +AQPLAGHHPHAGAHFLNHDQRQRHRQNRPQQLVAKLGSRLRIDKNSARIVIDVGRDDPRP +QHRKEQKNPGSPALQHRVDSSLVCQLNSERA* + +>NODE_3#PROKKA_00182 +LENSTQNALHKPELAGKIADQTQLTRAQAHEVITAFTDQVSAAMARGETVALAGFGSFNV +RERQARTGRNPRTGEALQIPAHKTVGFRPGKAFREAIE* + +>NODE_9#PROKKA_00321 +VKTVSISGQTGASKIVIGERLENLSNYLPDRRIVVITDTNVAGHYGKMFPDVEVITIGCG +ESIKTLDTAKMIYERLVSMAADRSVFIVGIGGGIVCDITGFIASTYMRGVRFGYVATTLL +AQVDASVGGKNGVNFMGYKNMVGVFNQPEFVICDPYVLGTLPPRELACGFAEIVKHAAIS +DKDYFADLEESHEKACARDPETLERIIRKSVVIKAGVVNADEKERGERRKLNFGHTLGHA +IEKTLGVPHGEAVSAGMVMAAELSANRGHLPRPDIRRLKDLLTHLDLPTALPIDPERIID +AMARDKKRQGEKIHFVLLSAIGAAFVDSISLAELEAVVTG* + +>NODE_2#PROKKA_00124 +MIKYYLLLAFEKKTLIRAIRVAILVGIILNLINNPDFIFHFSTNYLSLGRVLLTFIVPFL +VSTYSSVLSNSALRTGSVSHIDAILKCKSCNKTHIHVPIGHEVEECPICKKETRWRPVRI +FSGSGNRDELLKSLALFARYNPTPLFRINSDSIINEANQAAKDIFGSDELTGKNLAGIIP +EIKDINLNQLIHDGAIKKTIIHKEGRDYNITLKGIPELNSVHGYLNEVTNFVEPEQKG* + +>NODE_6#PROKKA_00246 +VAHAATTGNNETLLCRFPTLHNNTIVFEAGGNLWRVDRTGGVATRLTTDPGYDMMPRFSP +DGKTIAFTGQYSGNVDVYTIPADGGAVTRLTYHSDVVRKAPTRWGPDNMVMTWTPNGKDI +VFLSRRDTWNSWFGQPFEVSKMGGLPTHLPLPKGGVMSYSPDGSKIAYNRIFRNFRTWKR +YKGGLAQDIWIYDFKTKKIQRVTKWKGTDTYPMWYKNTIYFASDRGANHRLNIWAYSLDT +KTFRQITHFKNYDVDWPSLGNNGIVFQDGGSLYVLDLPSEQLHKINVKVPTDGTQTLPRW +INASKMIRSLDISPNGKRVLFGARGDIFTVPAKHGATRDITQTSDAQEQYPAWSPNGKWI +AYLTDASGVNELAIRPSDGSGHQTYITNAKTGYYYNPTWSPNSQMLAYSDNNHVLWYISL +KDKKPVRIAQDKYNAMRDYHWSPDNNWISYSKTNASGLSQIYIYSLADHKSYKVSDGIYS +DNDPVFGPNGKYLFFVSARHENPLFSESESNVATEKMDGIYMVTLQKNEKSPFAPVSDEG +MPEAKKASSSASKKTESAKDVKIDFNGLMNRVIMLPIKSGDYGNIQVTGNKVFYQTRPLI +TIEGFLHGTGQSSIMVYDLKSKKGHTVVANGARTYGLSADGKTLVYMRRGKFFLMPSASV +NAKGSEPVNTSHMKMKIYPHQEWSEMFHQAWRLFNNFFYNTKMNGVNWNEVGANYGKLVP +LLGCREDVNYLVGEMIGELDNSHCYVWGGDDNYLGKTNPTGVLGVDFGLNKSSGRYYFKK +IYAGDNSRPGYGSPLDRPGVNVKTGDYLLAVNGHQLKAPMNPYSLFVNTVGQQTTLTLAD +KPDGKGEHTVTVKPINNSLNLRLLNWIRTKRAYVNKKSDGKIGYIYMSDMESLGMTQFIH +QFYPQLSKQGLIMDDRFNGGGFIDQIVLERLRRVLIGMSTNRAHAAMRYPEQVLHGYKAC +LLNHYSASDGDMFPFYFRKYGLGPLIGERTWGGVRGYNRVWTLLDGGNLVVSQNSIYGLD +SKWAIENHGVTPDIKVDNLPGQVMEGKDPQLDTAINYIMKKLKEHPMPIPQPPAEIPAYP +SGNDAGGTN* + +>NODE_2#PROKKA_00127 +MNQEKKKLVLLAFVLLLAGISPNIFPAAQSGIASMSSLAVVLLIPSVVLIFILAILSQAL +GYNDLRKQILNGILAGLAGTVGLEIVREIGFHLGGMPGDMPKLLGVLLLNRFASGPDFWS +NVAGWSYHFWNGAAFGIIFSLIIGRGKIWMGIVYALLIGTGFMVSPATTSLGIGVFGLHF +KDGYQFLTTVYLAHIAFGSIVGLVVYKKNKDAPNIFKRLKLAFS* + +>NODE_2#PROKKA_00121 +MSQAEAPVIIKPNFKITNFLSKEPRSSLIHEIFSGLTAKQKYISSRFFYDRKGSALFEEI +TKLPEYYPTRTEKSILSAHAKEILGNPESLVIIELGSGDCSKISILFDNFPEQKMSNVKY +IPVDVSESAIIKSAEILSSRYEGLKIHGLLADFLKHLDLLPGATPRLICFFGSTLGNMTR +NQATDFLWNLKNIMNPGDRLLLGLDRVKGPEILYKAYNDKQGITAQFNKNILNVVNDVSG +TNFKTSDFGHLAFYNQNENRVEMHLKALYDMRITSKHFRDDIFIVKGESIHTENSHKFLP +EQIEQLALSSGLKFQASFTDVNKYFSLNLFEYPKLK* + +>NODE_2#PROKKA_00117 +MGLSYRINFKLGANAMVISDPSMPVPASLQFRQLKSDEYVMLDIPTDSLEVAYWGTKKPV +PAQYVLTEAQTAKVESAITSYNAEIKSLAKKYNLAFVDFNSIMKSIEHGGLTVDGIHFTT +AFITGNLFSLDGVHLTPQGNAVVANYFIQAINKQYGSHIPSVMVSDYPSVVFP* + +>NODE_1#PROKKA_00091 +MKLMETLNQCINAGHEMTKAIAIAQFNDDSPEARKITRRWRIGEAADLVGVSSQAIRDAE +KAGRLPHPDMEIRGRVEQRVGYTIEQINHMRDVFGTRLRRAEDVFPPVIGVAAHKGGVYK +TSVSVHLAQDLALKGLRVLLVEGNDPQGTASMYHGWVPDLHIHAEDTLLPFYLGEKDDVT +YAIKPTCWPGLDIIPSCLALHRIETELMGKFDEGKLPTDPHLMLRLAIETVAHDYDVIVI +DSAPNLGIGTINVVCAADVLIVPTPAELFDYTSALQFFDMLRDLLKNVDLKGFEPDVRIL +LTKYSNSNGSQSPWMEEQIRDAWGSMVLKNVVRETDEVGKGQIRMRTVFEQAIDQRSSTG +AWRNALSIWEPVCNEIFDRLIKPRWEIR* + +>NODE_13#PROKKA_00039 +MKIQQIIIREFEEMMAELKEVLAKMTCPLLGEDWLPM* + +>NODE_6#PROKKA_00256 +MEEAKNTYSKALDLLKNGLLKEGFVAALDQQANYRRVWARDSIITGLSALLADDTTLIEG +MKKTLISLKQHQHANGMIPSNVSFDADGNVTMVSYGTLTGKVDTNLWFIIGVMVYVRKTS +DTDLLKEMLPAIEKVFELLLSWEFNGRGLLYVPQGGNWADEFILEGYNLSEQLLYYWALS +EASAMDEKFSTKAKKLKDLIEINYWPTESNRSKVYHKTAFERQLEKGQTSHWLPGFKPAG +YHTFFDCFAHGLSFVLQFNSPEQEGEIIETLVRTTSETSGSLLPSFWPPVRETDAQWETL +QTNWIYKFRNQPGAYQNGGIWPISNGLLIAGLYRSGHKGMADKMKEALFLATALPENQFG +FYEYIDAFSWEPGGAKHQLWSAAGVIFAEKAAQNVFIV* + +>NODE_3#PROKKA_00193 +MSAQAAPFAVLSDIATRSRSQSRGLPAQEEAVELWNGIGFSLAGQLYVAPMGEVVEILHL +PRYTQVPGVRAFMVGVSNVRGRLLPLVDLGLFLDFPRSVV* + +>NODE_3#PROKKA_00178 +VSNTYTDLSSARLVELALERNEGKLAANGALVVNTGRRTGRSPMDRFIVEDPATAELIHW +GPVNRPFDAAKFDALWERVESHLEERDQFVSYVHVGADPEHYLPVKMTTETAWQNLFGRN +LFIRPDNYNPIDKGEWQILNAAGFVCEPERDGTNSDGCVILNFAERKVLIAGMRYAGEMK +KAMFSVQNFLLPEQDVLPMHCSANVGEDGDTCLFFGLSGTGKTTLSADEDRYLIGDDEHG +WGRGTVFNLEGGCYAKCINLSKKNEPIIWDAIRFGAIVENVVIDNDSREPDYDDVSLTEN +SRCAYPLEHVEKRVLENRGGEPRAVIFLTCDMTGVLPPVSILNKEGAAYHFLSGYTALVG +STEMGSSAKLRSTFSTCFGAPFFPRPAGVYANLLMKRMEEFGSRVYLVNTGWTGGPYGVG +KRFSIPTTRAIIRGIQTGALENVQTQHLDDLNLDVPVEVPGVDSNLLNPRNTWQDKEAYH +HKAQELIAQFVENFKKFDVSDAIVNAGPKLKD* + +>NODE_4#PROKKA_00219 +MVKKAPEVDVPSAKDRNFVTALARGLELLRAFGPEDDYLGNAELAERTGIPRPTVSRLTY +TLIELGYLRYCERLEKYRLGAGVLALGYRYLSRMGLRELARGPMQALADRTDCLVALGTA +DRLDMTYVETCQGAGPLVLRLEVGSRIPMATSAMGRAYLAALPDARRNEYREKIREVYTD +DYEAIWQGVEQGVEQYQKLGFCTALSDWNPHIAGVGVPLVLDGGSQIMAFNCGGAAMRLS +RSVLEKKLGPQLVEVVAEVQRQMHGRRLEAVS* + +>NODE_1#PROKKA_00081 +VRLSRGWAVASAWAALLGAAALTSWLFLRSHDHSPATDLASHAVERPDYLLHQAIVTRFA +KDGSRRYIIKARRIAHMPRNNIALLTRVDLDYFPVHGKPWHLQSDNGRLFANGTRLNLIG +HVRAHELDTPIPVHFLTTEVTVLLPEARLASRYRVILRQGHRETRGTGLAANLQTGTLSL +LKDVTSQYAP* + +>NODE_8#PROKKA_00286 +MERSSLALPCSGGSCLHAAVRAQMTMQRTAFSLSIALSAVLASALLVGIVPLLRGGDNYE +LAVKSALLVLVVGYAWLHWRAVTRRGRCLLKAAFCLNALVLVPLGATAALCHVFGGPKIV +PREAGAIGGAIAILAAAAAAMQVILLTRCRTIELTVPKGD* + +>NODE_11#PROKKA_00025 +MPMDWTPPPRGGGREPDINQVVQNLKNRLPVFKKARGLWLAVAVVLAIILGASSYYTCS* + +>NODE_1#PROKKA_00079 +MSQLRAENLHKRYRTREVVRGLNLNVNSGEIIGLLGPNGAGKTTTFYMILGLVPTDSGNI +YLDQRDITGLPIHARARAGLGYLPQEASIFRNLSVRDNLIAVLELGGHGTRAEQQRRADE +LLDELGVTHLAKDKGISLSGGERRRVEIARALANEPAFMLLDEPFAGVDPVSVADIKRII +DHLAKRGIGVLITEHNVRETLDICNRAYVMNRGTMLAEGSPKEIADNQTVREIYLGDKFT +L* + +>NODE_1#PROKKA_00083 +VNRPASATGLIESGREVIQIEAAAVSALESRLNETFAAACGLLLACRGRVVVTGMGKSGH +IGRKLAATMASTGTPAFYVHPAEASHGDLGMITSEDVMIALSNSGQTPEVVTIVPLIKRL +GVALIALTGEPDSMLARASDCHLDISVSREACPLNLAPTASTSATLAMGDALALAVSAAR +GFTPEDFARSHPGGRLGRRLLVRVADIMHTDDAMPIINESTRLGDALVTMSAKGLGMAMI +TDVQGRLAGVFTDGDLRRHLDQGVNLDTPMQHLITRECTVATPDMLAAEALRLMETRHIN +SLPVVTDDKPVGAFNMHDLLQAGVV* + +>NODE_4#PROKKA_00222 +MQYSSDEISNSGLDRPRAGGYRLRLALSGLGWGSTALMFALLALALIAIIAFVVIRGGAH +VNWATLSQTTQGYHGLLNAIEGTLLVTVGSLLIAAPVGVVTGIYLSEYQHRRSARFFSFL +CDVMIGVPSIVLGMFGYIAMVNFFGWQFSLLAGCITLSFMIMPYIARTSELALLQVPNSV +REAAYALGAGDRVVIFRVVLASCVPQILNGLLFAAAISMGETAPLIYTLGWSNYMWGGEF +FHHPVGYLTYVIWSFISEPSSAAHQLAYVAALLTTGFALLINILARSTIRKQSQHQSQ* + +>NODE_7#PROKKA_00274 +MEKPTVIHSSFTLERSYPYPAEKVFAALADPEKKRRWFADSPNHEVVKFAMDFREGGAER +MEYRFNEGTPFPGVMLVNSGHYEDIVPGRRVVLCSTMTIAGRKISVSLVTFDLLPQGEGS +ALLCTFQGVFFEGADGPEMRKAGWEFLVERLGEEVAREG* + +>NODE_1#PROKKA_00051 +MKVRASVKKICRNCKIIRRRGAVRVICSDPRHKQRQG* + +>NODE_7#PROKKA_00266 +MTKNQDCMTLPVRWEQAPAREDFWQELRTASRWLLLLDYDGTLAPFHQDRMKATPYAGVK +ERLEQLLKIEKGRIVVISGRQIEDLKQLLQLSQPVEIWGSHGREHLLHDGSYRLVDLTED +ERRVVEAVTARMSERGWAGQLERKPTAIAVHWRGLPVSEQKELREAAEQYFAEANPPDTL +EMMPFESGVELRSRSRTKGQVVAEVLAEEPADIPTAFLGDDWTDEDGFAELRGRGVGILV +RPEARESCADYHLTPPEELLEFLDRWLENAKESIR* + +>NODE_11#PROKKA_00014 +MKRAPFITLEGGEGCGKSTHASLLAQRIRELGLPALLTHEPGATELGGALRRLLADPAGP +DPCPQAELLLYLADRAQHLEQVIRPALAAGEAVVCDRFADSTQVYQGLARGLGADRVREL +NRWLCGDTWPDLTIVLDLDPALGLARARHRQGKQGLDRLEQAGGEFHRLVREGFLELARQ +EPERVRLIEAAGSRPEVARRIWEVARPLLESWRKTREA* + +>NODE_4#PROKKA_00210 +MLDAYLYDGLRSPFGRHAGALSPLRPDDLLATVIQALIARSGFAKEQIEDIVIGCTNQAG +EDARNVARHAGLLAGLPVETAALTVNRLCGSGLAAVADAARMITCGEGELILAGGVESMS +RAPFVMAKAESAYSRQLRTFDSTIGARFPNPKVLAEFGSDTMPETADNVARDLGISREAA +DAYALQSQQRYEAARQDGFYREEVLPVEVPQGRKQPPRWVSADEHPRPDTDTAKLARLSP +LFEEGVVTAGNASGINDGAAALLIGSRSVGQRLDIKPRARILSAAAAGVPPRVMGLGPVP +AAQKALARAGLSLNDMDIIEINEAFAAQVLGCCQQLGIAGDDPRLNPNGGAIAVGHPLGA +SGARLTLTAMRQLERINGRYALVSLCIGVGQGVAAVIERM* + +>NODE_7#PROKKA_00259 +MAKGGLGVVVRGIPGYRARIRPIVSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAG +VIAKRPAPIALPNSCAAEWRMAPDAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_4#PROKKA_00199 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_7#PROKKA_00270 +MPVRLMPMPEVDAKVTLTMLLGTPVTDATGKLRGKVRDVAVATGAEAGRVAGLVVKNRDG +LQVVTSVDLRRTPSGTLELRADAQMRPLTGEESFLLLRQDLLDRQIIDVHGRKVVRVNDV +ELDWWNQERGAAGQQESLRVTGVAVGLRGALRRLLLGLMPQATLDRLARKVPQRSIPWEF +VDIVEVDPARRVKLKIEHERLARLHPSDIADILEDLAPAEREALLRSLSEELAADALEEL +DPKLWRSLLQSMDSETAAGIVEEMDPSAAADLLADLSKADSEAILGEMDPEERQEVKELL +EFREDSAAGRMTTEYVAVPEDATVADCVAALREFEGDPETITEIYLLGEDDLLVGVVPLA +RLVLAREETRAQVLSEPETITCELEAHQNEVAELFDKYNLRALPVVDEQRRLAGVVEADH +VIAFLRERR* + +>NODE_1#PROKKA_00049 +VSRITIHWPSFYNVVTGKTLALPNLIALQHIPLSPAGVIAKRPAPIALPNSCAAEWRMAP +DAVFSPYASVRYFTPHMVHSQYNLL* + +>NODE_1#PROKKA_00075 +VLAGIAEWYGEDEAVPFRVISGTSAGAMNAAYLSANMENFAHGTQRLAQVWSQLEAQQVY +RPEYRKVFGALLHWAWSLLSGGLGDSNPRSLLDNSPLRALLAENIDFDAIARNIERGLLR +GVSVTVAGYSTERSLSYFQAETGVQSWWRQRREGRPVQMTLDHVMASLGLPIIFPAVKVA +GEWCGDGSTREFAPLSPAIHLGAKRVLVIDTQYPAPQHVLGQDQAYPSLSKIMGYLFDSV +FSDSLYADLERTKRINRTLDYIKRQSGHEPPELGLSHIDTLVIAPSRRPLEIASRYESHL +PKSMRWILRSLGGDVSSGDQLLSYMLFQSGYCSEMVALGRHDAHARREEIGQFLGLSKIK +VR* + +>NODE_3#PROKKA_00161 +VKIHSQLRNSTFQIALLYMVVFATSVFLLLAFIYWRTAGFMTAQTDETIEAEITGLAEQY +RSRGINGLIAIVRERVARDPNGKSLYLFTTSDYDKLAGNLSAWPQNVQASNGWINFTLND +SVGWRGEPHLARARVFKVQGGLRLLVGRDVQELTTLKHLIERAIDWGMGITLALSLFGGF +MISRSTAKRIEVINQTARKIMNGHLSLRIPARGTGDDFDQLADNLNQMLDRIVHLMEGIR +HVSDSIAHDLRTPLTRLRTQLENTLLTVEGDAARDQVARAVAETDQLLATFNALLRIARL +EMTGHSADKSPVQLGPLVHDACELYEALAEDKEQEFVLDIPQDVTIEGDRDLIFQVVSNL +IDNAIKYTPPEGNIRVLVTQEEDDAIFQVEDSGIGVPDSEKDKVFERFYRVAKSRSQPGN +GLGLSLVSAVVDMHQGRIELADRYTDGRENPGLKVTLRFPRLKPNRRKEIKPTSTTEPEG +SAS* + +>NODE_8#PROKKA_00296 +MRIATFLLPLLVAIAGAAPKVETGTDGLRVRTARYVATFSTDSGLLASLALVDGTPLLTS +PRLYADVLPDGRKNFSAKAKAAPKAKPQPDGSLLVEVAGALLDKDGKPHPTYPFTYTASY +RFDDTAQVRVSVSVIPGFDSDAVFGFLGQVLSTASQREFFVNTADGLISEMAATHSGRTY +QSESEPLDLKDPYLGVLLKTGQILQFRLVSGAESLLNVFFHDSGAGPTHLFLCPLSGSNP +RQAKTGKAWQQELVIEAMPLAEWTKSR* + +>NODE_6#PROKKA_00242 +MLEQKELILLNISGEDKPGLTASLTEILSQHNVNILDIGQSVIHKDLGLGILFEVPKKYR +SASILKDLLFKAYELKSHIKFTPIPIEEYEKWVAEQGKERFIITLLAHKLTALHLSKVSS +LIASQKLNIDTISRLSGRKSLNGNNKVTNSVVEFSVRGTPLDINAMKQSLMNIASETGID +IAFQEDNIYRRSRRLVCFDMDSTLIQTEVIDELAQKAGVGDEVKKITESAMRGEIDFKES +FKKRVSLLKGLDESVMKGIAENLPITNGAERLLSTLKQYGYRTAILSGGFTYFGNYLKTK +LGFDYVFANELEIKNGKLTGKHLHEIVDGKRKAELLELLAFKEDIHLEQVIAVGDGANDL +PMLEKAGLGIAFHAKPKVKASAQHAISATGLDTILYLLGFRDREINAS* + +>NODE_1#PROKKA_00072 +MSGDSILRIFWWLVLGAWMSGIGVMLGRELGLTVLLRYLGRNESERRELLAPHIERPSEG +HQVWLLLGGGALMAAWWPLFTATLFGGLWLVLLFMVLAVLVGPVGHGYRKRLSEHTRGPW +DLLWAGISLAALLVFGLAIGATVSGVPLHFDAHMDAMWGGFFSRFTPYSLLVPGLMAITF +GLWLAAARAAHECTGAVAARARALLLPVGGVTLLIFAGGAAWATQLPGYAVGGLPKVGAS +PLDGTTFAVGGAYLERFLSHLPLVIVPVLTALAIVGALFFSWRGRLQRVGPLVVIAVVGM +VATLGAMTYPVILPSFAEPAQSLTLWNAAAERPVLVAFLVWLGILVPVVLGYELWLRRRN +AQTVVAGSTAR* + +>NODE_2#PROKKA_00135 +MKRLGKHFAVLILAAGYSGRMGMPKAFLPYDANRTFLEKIVSEYLEFGCNLVGVVLNEEG +MKLYEKMQLEHKNNITAILNPAPEKERFFSLQTGLKRLKSEGAVFLHNVDNPFLTQDILQ +ALASAFKTQAYVVPTYHEEGGHPILLSQEIVKALIETSDYEQNLRVFMESYDQIQVPVSD +PNVLANINSPQEYERLFGRSF* + +>NODE_13#PROKKA_00040 +MAVDKNEFFRQATIRICGSLDIETALERCFHYLEQMLPVDEIGLYLYDPGLNVFQRIAGV +KSHGKNEFSPVSPLPEASKEKWSAIWADMGDITIINRVEERPEIQEVIEMYGLEHDISLM +SMRLELEGKRVGLLLLRTRGRDRYEEKHARLMLLLHEPFAIAMTNALQHQELIRLKDILT +DDNRYLRRQIRDLSISEIVGADLGLRHVMEMVQQVTQLDSPVLLLGETGVGKGVVAHAIH +DASPRKNAPFVSVNCGAIPESLFDSELFGHEKGAFTGAIAQKKGRFERADKGTIFLDEIG +ELPPHAQVRLLHVFQEKIIERVGGTTPISVDVRIISATHRNLEEMIRSGKFREDLWFRLN +VFPIHIPPLRQRKEDIPALVHHFIEKKTIDLKMQEQPRLSPGAMDQLMAYDWPGNVRELE +NIVERALIQFKGGMLRFDGLIFSPIASSRGGGHETTDRFLSIDEVNAIHIRRALKVTNGK +INGPGGAAELLGINPNTLRKRMNKLNIPYKKKEIGQGVD* + +>NODE_9#PROKKA_00305 +VKRTLLSTAIAVLFLLSAVSVYAAQKKPYKVGCVFAITGGASWLGGPERNTAEMLAKQIN +AAGGINGHKLELFIEDTQGDNTRAVNAVKKLIKKNHVCAIIGPSRTGTSMAVIPIVQQAK +IPMISCAAAESIIAPVSKRKWIFKTTQNDSDAVRRIYENMDKKGIHKIGIITGTTGFGAA +GREQLKALAPHYKIKIVADETYDPTDTDMTAQLIRIRNSGAQAVVNWSIVPAQSIVPQNM +RQLKMKIQLYQSHGFGNIKYVEAAGAAANGLIFPGGRLLAADTVSASNPQKAILMKYQKE +YEAAFKEPPSTFGGHAFDAISILAKALKKVGDNPAKLRNVIEHTNFVGITGVFHYTKTNH +CGLNQNAFEMLTVKNGKFVVLHQ* + +>NODE_1#PROKKA_00087 +MTEKPRPSRKPYLVRALHEWMGDASLTPQIIVDATVDHVDVPVEHVHDGKIVLNLSLEAV +RDLELGNDAITCTARFGGVARSLWVPMKAVLGIYARETGEGVAFACS* + +>NODE_4#PROKKA_00204 +MQRTARAALCREWNGPIQVETIRVDPPRRNEITIKLRACGVCHSDLSAATGVIPFPPPLV +LGHEGAGTVIAVGEGVTDFQEGDHVVSSFIYMCGKCRQCSRGRPVLCEQAHKALHHLPDG +TVRTHDGDGNPLNVFGACGVMAEYATLHVNNAVKIDPDVPLERAALVGCAVMTGAGSVFN +TAQLEPGSTAAVFGVGGVGLNAIQGCAIAGARVIVAVDTNEEKLAMARQFGATHTVNARE +HDDAGKAVKKMTGGGDYAFECVGSGVTVAQAYGSLGRGGTAVVVGVADVKDKTTFRTLSL +PADERTLKGSWLGSARPQFDFPRLLGLYQGGRLKLDELVTHTYTIDEAPQAFEDLKAGRN +ARGVILFD* + +>NODE_21#PROKKA_00095 +VVAFGFFHIETPMLLLEELFFFAPEFCDALVRLAQAPPERDWQGSWTGFEIPDPASRGDL +HGAIAGRRLEGFIGALYARWPFPRDPHQFRQRTRGAAPREVVEQEAESFGRRREVVLEAR +AGGGEFAIGQYRFHRTGFLALVDYVWRGGMPGWEGGRRPEWLLEAARRIQESDSPWLAGL +DWDPARLGFTI* + +>NODE_2#PROKKA_00111 +MNPLLLFILIVSGGLYLAGHYLHKPILKYIFKPFTTFIILFFAFMQLPDVSVQYKDYILI +GLLISLIGDIFLLWPEKRFIHGLGAFLLAHVLFILAMVSDFGPYYNWQYLIPIALYMVIF +LWIILPKSGKFVIPIIVYALVLMVFFWQAAGRAIYLAESSSMQAMFGATLFVASDSILAY +NKFVKNYKWAEFFIIITYWAALYFIALSV* + +>NODE_1#PROKKA_00050 +MFLAGALIVFRIGIHVPVPGVDPTAYAHLFNQNAGGILGIFNVFSGGALEQMSIFALGVM +PYISASIIIQMLTSVVPSLEALKKEGQAGQRKITRYTRYSTVALALFQSLGAAFALQSQG +VALTAGPGFIVTATVSLVTGTMFLMWLGEQVTERGLGNGISMIIFAGIVAGLPSAIASTL +ELVRNGELSSIVAILIFVGVLLITAFVVFVESGQRRITVNYAKRQQGRRMYAAQTSHLPL +KLNMSGVIPPIFASSIILFPVTLAGWLGQSSGFGWLNTLQLWLSPGQPLYVALFAVLIIF +FTFFYTALTFNSDETADNLKKSGAFIPGIRPGKQTAGYIDTVLTKLTLWGALYLTAVCLL +PEFLIAYAHVSFNFGGTSLLIVVVVAMDFMGQLQAHMMTHQYEGLLKRARMRGLQR* + +>NODE_7#PROKKA_00282 +VSACSHRPDARPLRIGQAQKDLRIPANKLHQEASPAGPQQILGRHLSQLPRRPRPAPPPE +DIEDNAGPDKLKNRRRIHFLCCRHNSVRIAHPEPHLRRNAVIPVSGQLASNAPDSVPQRR +RGHAQIQHPQGTNLIGPGLEQQRQNPGNHPAKPGKPGPADQQRWMMHQQHWLFQHMVELC +AHHSSHRRKGDDADRIGINLPALEVLVKEVAPDHRGKPHHDAVCANR* + +>NODE_2#PROKKA_00144 +MEFKFEKLIIWQKAMEFGEEINSIAHKFPKDEVYNLSSQIRRAVDSIALNISEGSIGQSN +LEFKKFMSYAIRSLAEVVSCLHKAKRRNYITEDEFKKQYEFAYNLMNMMVAFREKIK* + +>NODE_2#PROKKA_00141 +MNIEAFYSLSYGLYIIGTASKGKKNGYVANTAFQVTASPEQIAISCNKDNLSEQMIDESG +YFSLSVLEKDASKEIINRFGYKSGKTLDKFEGTKYFETNNGIPVVTEECVAWFECKVEQK +VDVGTHIIFIGRVLNGEYLDENKESLTYTYYRQVRHGLSPKNSPTYVDKSLLPEKEKKEE +KAEETPAEKPKGKSMQKWECIVCGHIYDPAVGDPEQNIPPGTAFEDLPDDWVCPDCGAEK +EDFEPIG* + +>NODE_2#PROKKA_00118 +VKWDDNWALKLLIQNISLQAIFYQPTISFNLGNKFGVGAGLVYATGNVKMNSALNYSGNS +GFNLNGKTHNFGFNVGVHYKISDQWSLGATYRSEIKMNVKNGNAAFFVPGSLSSIIPPSN +HFSASLPLPANFDFGVAYQATKKLLLAAELDWVRWSVYDSLSFHFATNPQLLNNSSPKLY +KDQWIPRIGAQYQVSKKLMVRAGAYYELSPANVSGYYKTNTVVPGIGINYHF* + +>NODE_10#PROKKA_00002 +MGRYILKRLWHTVYVVVGISVIAFFFIHLSGDPVMLMLPADASHQEIEELRQQLGFNDPL +YVQYWRFATKAVQGDFGESLYYHVPAMELILERLPASLELALAAMAIALVVAIPLGILSA +VKRGSFIDLGSMLGALFGLSMPHFWLGIMLILLFSVKLGWLPTSGRGGWEHLIMPSLALG +MSLMAMFARLTRSVMLEVLGQDYVRTARAKGLKERLVIGKHALKNALIPLVTVAGMQFGF +LIGGTVIIETVFAWPGVGRLVVQAIFNRDYPLVQATVLVLAVLFVLVNLLVDLLYVYLDP +QISYLEEK* + +>NODE_9#PROKKA_00313 +MHLVKKYANRKLYDTTDKQYITMEKLAELIKSGSEVMIIDNETGDDLTAQVVSQLLAREK +NEDDTALPSSVLMQMLRKGRGTLFGYGKKYISLWQSAVLMSRDENEKLINTLVKDKELSE +TEGRTLKKEITAYTNGLKTWIRENIDQRVNEALNMMNLASKEQVKELIDQVESLSLKVQS +LEREIRRKN* + +>NODE_7#PROKKA_00278 +VNVLRKSFVVAVVLLSFGASAAVAQTTVAASVYGAFRSSTRTGGISNFTVENPSNAAGFL +LELRHISNPLMGYELTYSYHRANEAYSNTLKVLCPISPGGSCPEQITTAGVSANAQEVTG +DWVVSFPLANLKPFVLVGGGVIVTSPATGSVTATITDFDPVTNMMSQTTSSMPTQTQTKG +VFQYGAGLDWTVLPHIGLRFQYRGNVYKAANLTKVFTSTDKFTQTAEPVVGVFFRF* + +>NODE_3#PROKKA_00158 +MLSRAIMAPMDLVLDTAEAQRMALFFDHILIWKLSRRTFNKEDNQRYSSELRYLRERGVA +LLCGLDIPNLISFGRADGTTWNPMEEMKKDCDLLLPFQVGTGVPDQAENEAHADRLIRHL +SSRLMYNDKPVVAHAEAVNLNTQGNELNALEITINNIPMPPENIPWEDLIQFRNEEETVA +KLRALRIWLKDRSSAGQSPREIQEELEHLLYEYRKYMEIQHKKFRQGILSTLISSTPEIV +ASVATLNFGAAIKSVFDIKGRYLGLSEAELSAPGREVSYIAKARDFLTS* + +>NODE_11#PROKKA_00020 +MKLVVFLKQVPGVTEIPWDPASGHLRREKAPGMMNPACRHALEAALILKEQHGGELTAIS +MGPPAAEEILREALALGADRAVLLSDPRLAGADTPATSYTLSLAVRAVCPDCDLLLLGNQ +TSDSETGQVGPHLAEELDLPSAINVEELELDGEVLRVKRLCDNFLETLEMDLPALVTINT +QGHPPRQVPLGGVEDAFSRGEFLVLNAEDLKADLARVGMTGSAGRIVKVYPAGGERKGEL +IKGAPKRCVLELLERHGDLLGGYLRKDLGGGR* + +>NODE_13#PROKKA_00038 +MELTNDEREFFALVNRASLLNPFSDERNDVDLKLAGLPSAAPGTGRVKKAIQSVNERIRQ +LETDGRADISQYTGRDRELVEKAFLFELFYRFRKQFDELIESQIASDDVPARIPFYNDAF +SAMQKRGFTEEDFRRYFALAFQIRRAFYFIGRSLVGNSASMKSLRLNLWNNVFTHNMDLY +DRYLWNRMEDYSTLILGETGTGKGAAALAIGRSGFIPLKKKSFEESFTRSFISLNLSQFP +ETLIESALFGHKKGAFTGAIENYQGIFEQCSPYGAILLDEIGEVSKPIQIKLLQVIQDRV +FTPVGSQTRSRFNGRVIAATNRPLETLRGKGFFRDDFYYRLCSDIIVVPPLRQRVQEDPT +ELDVLLDFTINRLVGRSSPELVQIVREVIDRHLGNDYPWPGNVRELEQCVRRVLLKGIYT +GDAAVADIDLCRSLTTGIEQGNIDANSLTSGYCYLLYQRHRTFEEVARRTGLDRRTVKKY +IQDWTSSHSTDNPPETDIPG* + +>NODE_1#PROKKA_00055 +MEISVSEFLKPRIAGLTELGENRTRIVLEPLERGFGYTLGNSLRRVLLSSMPGAAVVEAE +IDGVLHEYTAIDGVQEDVVEILLNLKLLAIRMHAREEATLTLNATGAGVVTAGDIQVDHD +VEIVNKDLVIAHLAKNGKLSVRLKVMRGRGYMPVVKRYADESQGRKIGKLKLDATFTPIR +RVAYYVEAARVEQRTDLDKLILDIETNGTIGAEEALRRAAGILTDQLSVFADLSSVSSHT +PTESRSVKPILLKPVEELELTVRSSNALKAERIRFVGDLVQKSEDELLKTPNLGRKSLTE +IKDVLARHELALGMKLEDWPPAALAERRAS* + +>NODE_10#PROKKA_00007 +LALHTMAYLAAHPGRLISNRVIARDLGVSAAHLSKVLQRLARAGLLESLRGPTGGFRLGR +PAGEISLMEVYEAIDGKFQPSSCLLGRPVCRGGKCVLGELGRNLERQTREYLLNTKLSEF +EDFMCFEEGN* + +>NODE_2#PROKKA_00147 +MDAGEEKASPKKTILIAEDDETSFFFLKFVLAKENVNILYAQSGQEAVDICEAHPEIDLI +LMDIKMAGMSGIEATQLIKKRNPRVPVIAQTAFALSSDKENILKAGCDDYITKPIRKEEL +LEKVNFFLYSKKES* + +>NODE_1#PROKKA_00058 +MNPYERYLLPWLIDAVCALPAAARERAKIVPRARGEVLEIGIGTGHNLPYYAPRRVAGVT +GIDPGVLRRRIMRRAHAAGIEVKLLSLSAESIPAEDASFDTLVSTFTLCSIPDVERALAE +MRRVLKPTGRLLYLEHGTAPDPRVRRWQDRLTPWWKPLAGGCHLNRDIPRLITGAGFDIV +EQHSEYIRGPRILSYVFRGEAQPIAVAGSK* + +>NODE_3#PROKKA_00190 +MSVKLGIVMDPIGAIHYKKDTSLAMLLAAQRRGWELHYMEMQDLYLRDGEPRARTQALTV +AANPDDWYSLGEPSDRALASLDVILMRKDPPVDKEFLVTTWMLEAAERLGTLVVNPPQAL +RDCNEKLFATWFPQCTPPLVVSRDAARLRAFHAEHGDVVLKPLDEMGGRSIFRVREDGDN +LGVIIETLTKDGSHQIMAQKYLPEITQGDKRILLVDGEPVPYALARIPSQGEHRGNLAAG +GRGEGRLLTDRDRWIVEQVQPMVREKGLLFVGLDVIGDYLTEINVTSPTCVRELDREYDL +DISDQLMQVIADRLARR* + diff -r 000000000000 -r 68a3648c7d91 de_prokka/tbl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/de_prokka/tbl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,6309 @@ +##gff-version 3 +##sequence-region NODE_10_length_13322_cov_17.9253_ID_19 1 13322 +##sequence-region NODE_11_length_12385_cov_59.906_ID_21 1 12385 +##sequence-region NODE_12_length_10380_cov_58.6117_ID_23 1 10380 +##sequence-region NODE_13_length_9239_cov_583.686_ID_25 1 9239 +##sequence-region NODE_17_length_5536_cov_76.8441_ID_33 1 5536 +##sequence-region NODE_1_length_50014_cov_374.697_ID_1 1 50014 +##sequence-region NODE_21_length_3078_cov_41.6478_ID_41 1 3078 +##sequence-region NODE_22_length_2891_cov_11.4392_ID_43 1 2891 +##sequence-region NODE_25_length_2759_cov_91.0869_ID_49 1 2759 +##sequence-region NODE_29_length_2081_cov_1172.16_ID_57 1 2081 +##sequence-region NODE_2_length_41759_cov_213.726_ID_3 1 41759 +##sequence-region NODE_39_length_1472_cov_66.0355_ID_77 1 1472 +##sequence-region NODE_3_length_39215_cov_155.142_ID_5 1 39215 +##sequence-region NODE_42_length_1523_cov_23.4108_ID_83 1 1523 +##sequence-region NODE_4_length_32829_cov_185.347_ID_7 1 32829 +##sequence-region NODE_50_length_1320_cov_1878.61_ID_99 1 1320 +##sequence-region NODE_5_length_30845_cov_61.6996_ID_9 1 30845 +##sequence-region NODE_6_length_29555_cov_90.739_ID_11 1 29555 +##sequence-region NODE_7_length_23944_cov_113.632_ID_13 1 23944 +##sequence-region NODE_8_length_21143_cov_115.55_ID_15 1 21143 +##sequence-region NODE_9_length_20531_cov_182.121_ID_17 1 20531 +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 51 356 . + 0 ID=PROKKA_00001;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00001;product=hypothetical protein +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 865 1791 . - 0 ID=PROKKA_00002;gene=gsiC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P75798;locus_tag=PROKKA_00002;product=Glutathione transport system permease protein GsiC +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 1853 3352 . - 0 ID=PROKKA_00003;gene=hbpA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P33950;locus_tag=PROKKA_00003;product=Heme-binding protein A precursor +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 3401 4405 . - 0 ID=PROKKA_00004;gene=oppF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P24137;locus_tag=PROKKA_00004;product=Oligopeptide transport ATP-binding protein OppF +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 4398 5318 . - 0 ID=PROKKA_00005;gene=oppD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P24136;locus_tag=PROKKA_00005;product=Oligopeptide transport ATP-binding protein OppD +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 5673 7919 . - 0 ID=PROKKA_00006;eC_number=3.6.4.-;gene=priA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P17888;locus_tag=PROKKA_00006;product=Primosomal protein N' +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 8063 8455 . + 0 ID=PROKKA_00007;gene=iscR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0AGK8;locus_tag=PROKKA_00007;product=HTH-type transcriptional regulator IscR +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 8455 9315 . + 0 ID=PROKKA_00008;eC_number=1.8.1.-;gene=asrC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A1Y2;locus_tag=PROKKA_00008;product=Anaerobic sulfite reductase subunit C +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 9345 10373 . + 0 ID=PROKKA_00009;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00009;product=hypothetical protein +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 10482 11309 . + 0 ID=PROKKA_00010;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK09894;locus_tag=PROKKA_00010;product=diguanylate cyclase +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 11401 12543 . + 0 ID=PROKKA_00011;gene=mlaE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P64606;locus_tag=PROKKA_00011;product=putative phospholipid ABC transporter permease protein MlaE +NODE_10_length_13322_cov_17.9253_ID_19 Prodigal:2.60 CDS 12543 13280 . + 0 ID=PROKKA_00012;gene=artM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P54537;locus_tag=PROKKA_00012;product=Arginine transport ATP-binding protein ArtM +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 92 748 . + 0 ID=PROKKA_00013;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00013;product=hypothetical protein +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 821 1477 . - 0 ID=PROKKA_00014;eC_number=2.7.4.9;gene=tmk;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9HZN8;locus_tag=PROKKA_00014;product=Thymidylate kinase +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 1691 3073 . - 0 ID=PROKKA_00015;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF04616.8;locus_tag=PROKKA_00015;product=Glycosyl hydrolases family 43 +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 3339 3641 . + 0 ID=PROKKA_00016;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00016;product=hypothetical protein +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 3860 6106 . + 0 ID=PROKKA_00017;gene=flgE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A1J1;locus_tag=PROKKA_00017;product=Flagellar hook protein FlgE +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 6213 7121 . + 0 ID=PROKKA_00018;eC_number=3.5.2.6;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P52700;locus_tag=PROKKA_00018;product=Metallo-beta-lactamase L1 precursor +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 7197 8345 . + 0 ID=PROKKA_00019;eC_number=1.3.99.-;gene=acdA_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45867;locus_tag=PROKKA_00019;product=Acyl-CoA dehydrogenase +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 8374 9192 . + 0 ID=PROKKA_00020;gene=acrB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:G3KIM7;locus_tag=PROKKA_00020;product=Acryloyl-CoA reductase electron transfer subunit gamma +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 9189 10205 . + 0 ID=PROKKA_00021;gene=acrA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:G3KIM6;locus_tag=PROKKA_00021;product=Acryloyl-CoA reductase electron transfer subunit beta +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 10278 10790 . - 0 ID=PROKKA_00022;eC_number=5.3.3.2;gene=idi;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P26173;locus_tag=PROKKA_00022;product=Isopentenyl-diphosphate Delta-isomerase +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 10787 11119 . - 0 ID=PROKKA_00023;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00023;product=hypothetical protein +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 11233 12027 . + 0 ID=PROKKA_00024;eC_number=3.8.1.7;gene=fcbB2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9LCU3;locus_tag=PROKKA_00024;product=4-chlorobenzoyl coenzyme A dehalogenase-2 +NODE_11_length_12385_cov_59.906_ID_21 Prodigal:2.60 CDS 12140 12319 . + 0 ID=PROKKA_00025;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00025;product=hypothetical protein +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 274 513 . - 0 ID=PROKKA_00026;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00026;product=hypothetical protein +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 510 977 . - 0 ID=PROKKA_00027;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00027;product=hypothetical protein +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 987 1460 . - 0 ID=PROKKA_00028;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00028;product=hypothetical protein +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 1514 5431 . + 0 ID=PROKKA_00029;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00029;product=hypothetical protein +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 5657 6466 . + 0 ID=PROKKA_00030;eC_number=4.1.2.-;gene=griI;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:A0JC77;locus_tag=PROKKA_00030;product=2-amino-4%2C5-dihydroxy-6-one-heptanoic acid-7-phosphate synthase +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 6537 7886 . - 0 ID=PROKKA_00031;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03972.8;locus_tag=PROKKA_00031;product=MmgE/PrpD family protein +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 7883 9058 . - 0 ID=PROKKA_00032;gene=yiaO;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P37676;locus_tag=PROKKA_00032;product=2%2C3-diketo-L-gulonate-binding periplasmic protein YiaO precursor +NODE_12_length_10380_cov_58.6117_ID_23 Prodigal:2.60 CDS 9065 10285 . - 0 ID=PROKKA_00033;eC_number=1.4.1.18;gene=lysDH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9AJC6;locus_tag=PROKKA_00033;product=Lysine 6-dehydrogenase +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 732 833 . + 0 ID=PROKKA_00034;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00034;product=hypothetical protein +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 926 1183 . - 0 ID=PROKKA_00035;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00035;product=hypothetical protein +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 1234 2226 . - 0 ID=PROKKA_00036;eC_number=1.1.1.95;gene=serA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A544;locus_tag=PROKKA_00036;product=D-3-phosphoglycerate dehydrogenase +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 2465 3550 . + 0 ID=PROKKA_00037;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P00198;locus_tag=PROKKA_00037;product=Ferredoxin +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 3589 5091 . + 0 ID=PROKKA_00038;gene=nifA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P03027;locus_tag=PROKKA_00038;product=Nif-specific regulatory protein +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 5369 5482 . + 0 ID=PROKKA_00039;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00039;product=hypothetical protein +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 5567 7126 . - 0 ID=PROKKA_00040;gene=fhlA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0CL46;locus_tag=PROKKA_00040;product=Formate hydrogenlyase transcriptional activator +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 7397 7984 . + 0 ID=PROKKA_00041;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00041;product=hypothetical protein +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 8057 8314 . + 0 ID=PROKKA_00042;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00042;product=hypothetical protein +NODE_13_length_9239_cov_583.686_ID_25 Prodigal:2.60 CDS 8407 8508 . - 0 ID=PROKKA_00043;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00043;product=hypothetical protein +NODE_17_length_5536_cov_76.8441_ID_33 Prodigal:2.60 CDS 179 964 . - 0 ID=PROKKA_00044;eC_number=3.1.1.85;gene=bioH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q8GHL1;locus_tag=PROKKA_00044;product=Pimeloyl-[acyl-carrier protein] methyl ester esterase +NODE_17_length_5536_cov_76.8441_ID_33 Prodigal:2.60 CDS 1107 2195 . + 0 ID=PROKKA_00045;gene=pipB2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q8ZMM8;locus_tag=PROKKA_00045;product=Secreted effector protein pipB2 +NODE_17_length_5536_cov_76.8441_ID_33 Prodigal:2.60 CDS 2298 3593 . + 0 ID=PROKKA_00046;eC_number=3.7.1.19;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q93NG6;locus_tag=PROKKA_00046;product=2%2C6-dihydropseudooxynicotine hydrolase +NODE_17_length_5536_cov_76.8441_ID_33 Prodigal:2.60 CDS 3695 5413 . - 0 ID=PROKKA_00047;eC_number=6.1.1.10;gene=metG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P23395;locus_tag=PROKKA_00047;product=Methionine--tRNA ligase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 732 833 . + 0 ID=PROKKA_00048;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00048;product=hypothetical protein +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 926 1183 . - 0 ID=PROKKA_00049;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00049;product=hypothetical protein +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 1278 2528 . + 0 ID=PROKKA_00050;gene=secY;inference=ab initio prediction:Prodigal:2.60,protein motif:HAMAP:MF_01465;locus_tag=PROKKA_00050;product=Protein translocase subunit SecY +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 2542 2655 . + 0 ID=PROKKA_00051;gene=rpmJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A7Q6;locus_tag=PROKKA_00051;product=50S ribosomal protein L36 +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 2738 3097 . + 0 ID=PROKKA_00052;gene=rpsM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A7S9;locus_tag=PROKKA_00052;product=30S ribosomal protein S13 +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 3121 3510 . + 0 ID=PROKKA_00053;gene=rpsK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A7R9;locus_tag=PROKKA_00053;product=30S ribosomal protein S11 +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 3529 4155 . + 0 ID=PROKKA_00054;gene=rpsD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A7V8;locus_tag=PROKKA_00054;product=30S ribosomal protein S4 +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 4174 5166 . + 0 ID=PROKKA_00055;eC_number=2.7.7.6;gene=rpoA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A0Y1;locus_tag=PROKKA_00055;product=DNA-directed RNA polymerase subunit alpha +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 5180 5560 . + 0 ID=PROKKA_00056;gene=rplQ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0AG44;locus_tag=PROKKA_00056;product=50S ribosomal protein L17 +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 5568 7001 . + 0 ID=PROKKA_00057;eC_number=1.8.1.4;gene=lpdA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A9P0;locus_tag=PROKKA_00057;product=Dihydrolipoyl dehydrogenase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 6998 7630 . + 0 ID=PROKKA_00058;eC_number=2.1.1.-;gene=ycgJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O31474;locus_tag=PROKKA_00058;product=putative methyltransferase YcgJ +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 7667 7957 . - 0 ID=PROKKA_00059;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00059;product=hypothetical protein +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 8151 8462 . - 0 ID=PROKKA_00060;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF00939.13;locus_tag=PROKKA_00060;product=Sodium:sulfate symporter transmembrane region +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 8690 9526 . - 0 ID=PROKKA_00061;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00061;product=hypothetical protein +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 9601 10893 . - 0 ID=PROKKA_00062;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF07992.8;locus_tag=PROKKA_00062;product=Pyridine nucleotide-disulphide oxidoreductase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 11782 13551 . - 0 ID=PROKKA_00063;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03600.10;locus_tag=PROKKA_00063;product=Citrate transporter +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 13548 16400 . - 0 ID=PROKKA_00064;gene=uvrA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A698;locus_tag=PROKKA_00064;product=UvrABC system protein A +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 16503 18152 . + 0 ID=PROKKA_00065;gene=yveA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O07002;locus_tag=PROKKA_00065;product=Aspartate-proton symporter +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 18374 21304 . + 0 ID=PROKKA_00066;eC_number=3.1.4.52;gene=gmr;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P77334;locus_tag=PROKKA_00066;product=Cyclic di-GMP phosphodiesterase Gmr +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 21644 23653 . + 0 ID=PROKKA_00067;eC_number=2.2.1.1;gene=tktA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P27302;locus_tag=PROKKA_00067;product=Transketolase 1 +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 23714 24724 . + 0 ID=PROKKA_00068;eC_number=1.2.1.12;gene=gap;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P00362;locus_tag=PROKKA_00068;product=Glyceraldehyde-3-phosphate dehydrogenase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 24728 25909 . + 0 ID=PROKKA_00069;eC_number=2.7.2.3;gene=pgk;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q3K5F2;locus_tag=PROKKA_00069;product=Phosphoglycerate kinase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 25902 27365 . + 0 ID=PROKKA_00070;eC_number=2.7.1.40;gene=pykA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P21599;locus_tag=PROKKA_00070;product=Pyruvate kinase II +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 27365 28957 . + 0 ID=PROKKA_00071;gene=fdhC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:M1V1V5;locus_tag=PROKKA_00071;product=Fructose dehydrogenase cytochrome subunit precursor +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 28959 30074 . + 0 ID=PROKKA_00072;eC_number=1.10.3.10;gene=cydB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0ABK2;locus_tag=PROKKA_00072;product=Cytochrome bd-I ubiquinol oxidase subunit 2 +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 30088 32715 . + 0 ID=PROKKA_00073;eC_number=3.5.1.97;gene=quiP;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9I4U2;locus_tag=PROKKA_00073;product=Acyl-homoserine lactone acylase QuiP precursor +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 32636 33370 . - 0 ID=PROKKA_00074;eC_number=3.6.-.-;gene=soj_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q72H90;locus_tag=PROKKA_00074;product=Chromosome-partitioning ATPase Soj +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 33526 34614 . + 0 ID=PROKKA_00075;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF01734.1;locus_tag=PROKKA_00075;product=Patatin-like phospholipase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 34623 35519 . - 0 ID=PROKKA_00076;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK05416;locus_tag=PROKKA_00076;product=glmZ(sRNA)-inactivating NTPase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 35624 35947 . - 0 ID=PROKKA_00077;gene=hpf;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0AFX0;locus_tag=PROKKA_00077;product=Ribosome hibernation promoting factor +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 35952 37367 . - 0 ID=PROKKA_00078;gene=rpoN;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A171;locus_tag=PROKKA_00078;product=RNA polymerase sigma-54 factor +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 37399 38124 . - 0 ID=PROKKA_00079;eC_number=3.6.3.-;gene=lptB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45073;locus_tag=PROKKA_00079;product=Lipopolysaccharide export system ATP-binding protein LptB +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 38121 38651 . - 0 ID=PROKKA_00080;gene=lptA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45074;locus_tag=PROKKA_00080;product=Lipopolysaccharide export system protein LptA precursor +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 38641 39213 . - 0 ID=PROKKA_00081;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF06835.7;locus_tag=PROKKA_00081;product=Lipopolysaccharide-assembly%2C LptC-related +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 39210 39713 . - 0 ID=PROKKA_00082;eC_number=3.1.3.45;gene=kdsC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P67653;locus_tag=PROKKA_00082;product=3-deoxy-D-manno-octulosonate 8-phosphate phosphatase KdsC +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 39710 40687 . - 0 ID=PROKKA_00083;eC_number=5.3.1.13;gene=kdsD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9HVW0;locus_tag=PROKKA_00083;product=Arabinose 5-phosphate isomerase KdsD +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 40756 40959 . - 0 ID=PROKKA_00084;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00084;product=hypothetical protein +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 41007 42266 . + 0 ID=PROKKA_00085;eC_number=2.5.1.7;gene=murA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q3KHZ4;locus_tag=PROKKA_00085;product=UDP-N-acetylglucosamine 1-carboxyvinyltransferase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 42352 42951 . + 0 ID=PROKKA_00086;gene=sspA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0ACA3;locus_tag=PROKKA_00086;product=Stringent starvation protein A +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 42962 43285 . + 0 ID=PROKKA_00087;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45206;locus_tag=PROKKA_00087;note=Stringent starvation protein B homolog;product=hypothetical protein +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 43712 44371 . - 0 ID=PROKKA_00088;eC_number=2.3.1.28;gene=cat;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P62577;locus_tag=PROKKA_00088;product=Chloramphenicol acetyltransferase +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 44644 44937 . + 0 ID=PROKKA_00089;gene=xerD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A0P0;locus_tag=PROKKA_00089;product=Tyrosine recombinase XerD +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 46332 47087 . + 0 ID=PROKKA_00090;gene=repE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P03856;locus_tag=PROKKA_00090;product=Replication initiation protein +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 47675 48841 . + 0 ID=PROKKA_00091;eC_number=3.6.-.-;gene=soj_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P37522;locus_tag=PROKKA_00091;product=Sporulation initiation inhibitor protein Soj +NODE_1_length_50014_cov_374.697_ID_1 Prodigal:2.60 CDS 48841 49812 . + 0 ID=PROKKA_00092;gene=virB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A247;locus_tag=PROKKA_00092;product=Virulence regulon transcriptional activator VirB +NODE_21_length_3078_cov_41.6478_ID_41 Prodigal:2.60 CDS 254 2167 . + 0 ID=PROKKA_00093;gene=htpX;inference=ab initio prediction:Prodigal:2.60,protein motif:HAMAP:MF_00188;locus_tag=PROKKA_00093;product=Protease HtpX +NODE_21_length_3078_cov_41.6478_ID_41 Prodigal:2.60 CDS 2076 2384 . - 0 ID=PROKKA_00094;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00094;product=hypothetical protein +NODE_21_length_3078_cov_41.6478_ID_41 Prodigal:2.60 CDS 2397 2972 . - 0 ID=PROKKA_00095;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00095;product=hypothetical protein +NODE_22_length_2891_cov_11.4392_ID_43 Prodigal:2.60 CDS 77 1258 . - 0 ID=PROKKA_00096;eC_number=1.3.99.32;gene=Acd;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:C3UVB0;locus_tag=PROKKA_00096;product=Glutaryl-CoA dehydrogenase +NODE_22_length_2891_cov_11.4392_ID_43 Prodigal:2.60 CDS 1319 2467 . - 0 ID=PROKKA_00097;eC_number=1.3.99.-;gene=mmgC_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45857;locus_tag=PROKKA_00097;product=Acyl-CoA dehydrogenase +NODE_22_length_2891_cov_11.4392_ID_43 Prodigal:2.60 CDS 2621 2857 . - 0 ID=PROKKA_00098;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00098;product=hypothetical protein +NODE_25_length_2759_cov_91.0869_ID_49 Prodigal:2.60 CDS 55 264 . - 0 ID=PROKKA_00099;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00099;product=hypothetical protein +NODE_25_length_2759_cov_91.0869_ID_49 Prodigal:2.60 CDS 261 596 . - 0 ID=PROKKA_00100;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00100;product=hypothetical protein +NODE_25_length_2759_cov_91.0869_ID_49 Prodigal:2.60 CDS 627 998 . - 0 ID=PROKKA_00101;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00101;product=hypothetical protein +NODE_25_length_2759_cov_91.0869_ID_49 Prodigal:2.60 CDS 995 1765 . - 0 ID=PROKKA_00102;eC_number=2.4.2.10;gene=pyrE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9KVD5;locus_tag=PROKKA_00102;product=Orotate phosphoribosyltransferase +NODE_25_length_2759_cov_91.0869_ID_49 Prodigal:2.60 CDS 2032 2523 . + 0 ID=PROKKA_00103;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00103;product=hypothetical protein +NODE_25_length_2759_cov_91.0869_ID_49 Prodigal:2.60 CDS 2429 2620 . - 0 ID=PROKKA_00104;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00104;product=hypothetical protein +NODE_29_length_2081_cov_1172.16_ID_57 Prodigal:2.60 CDS 140 820 . - 0 ID=PROKKA_00105;gene=cbl;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q47083;locus_tag=PROKKA_00105;product=HTH-type transcriptional regulator cbl +NODE_29_length_2081_cov_1172.16_ID_57 Prodigal:2.60 CDS 899 1156 . + 0 ID=PROKKA_00106;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00106;product=hypothetical protein +NODE_29_length_2081_cov_1172.16_ID_57 Prodigal:2.60 CDS 1249 1350 . - 0 ID=PROKKA_00107;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00107;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 732 833 . + 0 ID=PROKKA_00108;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00108;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 926 1183 . - 0 ID=PROKKA_00109;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00109;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 1393 2184 . + 0 ID=PROKKA_00110;eC_number=2.8.1.-;gene=mnmA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q931Q6;locus_tag=PROKKA_00110;product=tRNA-specific 2-thiouridylase MnmA +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 2297 2926 . + 0 ID=PROKKA_00111;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF07947.8;locus_tag=PROKKA_00111;product=YhhN-like protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 2936 3808 . + 0 ID=PROKKA_00112;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF12695.1;locus_tag=PROKKA_00112;product=Alpha/beta hydrolase family protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 3830 5428 . + 0 ID=PROKKA_00113;eC_number=2.3.1.-;gene=lnt;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P23930;locus_tag=PROKKA_00113;product=Apolipoprotein N-acyltransferase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 5438 5989 . + 0 ID=PROKKA_00114;eC_number=1.-.-.-;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q5XCB9;locus_tag=PROKKA_00114;product=Putative NAD(P)H nitroreductase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 5982 6494 . + 0 ID=PROKKA_00115;gene=rfaH_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q8FBI4;locus_tag=PROKKA_00115;product=Transcription antitermination protein RfaH +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 6555 7112 . + 0 ID=PROKKA_00116;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF02674.10;locus_tag=PROKKA_00116;product=Colicin V production protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 7109 7630 . - 0 ID=PROKKA_00117;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF00657.1;locus_tag=PROKKA_00117;product=GDSL-like Lipase/Acylhydrolase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 7846 8544 . - 0 ID=PROKKA_00118;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P80603;locus_tag=PROKKA_00118;product=47 kDa outer membrane protein precursor +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 9843 9995 . - 0 ID=PROKKA_00119;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00119;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 10338 11813 . + 0 ID=PROKKA_00120;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF01609.15;locus_tag=PROKKA_00120;product=Transposase DDE domain protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 11917 12927 . - 0 ID=PROKKA_00121;eC_number=2.1.1.44;gene=egtD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:A0R5M8;locus_tag=PROKKA_00121;product=Histidine-specific methyltransferase EgtD +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 12948 14255 . - 0 ID=PROKKA_00122;eC_number=2.8.1.1;gene=ynjE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P78067;locus_tag=PROKKA_00122;product=Thiosulfate sulfurtransferase YnjE precursor +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 14286 14570 . - 0 ID=PROKKA_00123;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00123;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 14853 15569 . - 0 ID=PROKKA_00124;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00124;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 15745 17688 . - 0 ID=PROKKA_00125;gene=ydaP;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P96591;locus_tag=PROKKA_00125;product=Putative thiamine pyrophosphate-containing protein YdaP +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 17707 18507 . - 0 ID=PROKKA_00126;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00126;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 18637 19311 . - 0 ID=PROKKA_00127;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00127;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 19464 19763 . - 0 ID=PROKKA_00128;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00128;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 20185 20445 . - 0 ID=PROKKA_00129;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00129;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 20913 21179 . - 0 ID=PROKKA_00130;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00130;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 21183 21743 . - 0 ID=PROKKA_00131;gene=ylaC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O07627;locus_tag=PROKKA_00131;product=RNA polymerase sigma factor YlaC +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 21952 23145 . - 0 ID=PROKKA_00132;gene=pgk/tpi;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P36204;locus_tag=PROKKA_00132;product=Bifunctional PGK/TIM +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 23327 24418 . + 0 ID=PROKKA_00133;eC_number=2.6.1.88;gene=ybdL;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P77806;locus_tag=PROKKA_00133;product=Methionine aminotransferase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 24430 25224 . + 0 ID=PROKKA_00134;eC_number=3.5.1.111;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q93NG1;locus_tag=PROKKA_00134;product=2-oxoglutaramate amidase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 25365 25970 . + 0 ID=PROKKA_00135;eC_number=2.7.7.76;gene=mocA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q46810;locus_tag=PROKKA_00135;product=Molybdenum cofactor cytidylyltransferase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 26176 26649 . + 0 ID=PROKKA_00136;eC_number=1.8.4.14;gene=msrC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P76270;locus_tag=PROKKA_00136;product=Free methionine-R-sulfoxide reductase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 26995 28020 . - 0 ID=PROKKA_00137;eC_number=3.5.1.46;gene=nylB';inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P07062;locus_tag=PROKKA_00137;product=6-aminohexanoate-dimer hydrolase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 28380 29045 . - 0 ID=PROKKA_00138;eC_number=2.1.1.163;gene=ubiE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P67062;locus_tag=PROKKA_00138;product=Demethylmenaquinone methyltransferase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 29296 29580 . - 0 ID=PROKKA_00139;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF01797.10;locus_tag=PROKKA_00139;product=Transposase IS200 like protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 29866 30441 . - 0 ID=PROKKA_00140;gene=rbr;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9AGG3;locus_tag=PROKKA_00140;product=Rubrerythrin +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 30468 31211 . - 0 ID=PROKKA_00141;gene=hrb;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9FDN6;locus_tag=PROKKA_00141;product=High molecular weight rubredoxin +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 31492 34632 . - 0 ID=PROKKA_00142;gene=bepE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q8G2M6;locus_tag=PROKKA_00142;product=Efflux pump membrane transporter BepE +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 34663 35616 . - 0 ID=PROKKA_00143;gene=acrE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P24180;locus_tag=PROKKA_00143;product=Multidrug export protein AcrE precursor +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 35905 36258 . - 0 ID=PROKKA_00144;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK12275;locus_tag=PROKKA_00144;product=hypothetical protein +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 36421 37767 . - 0 ID=PROKKA_00145;gene=tolC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q54001;locus_tag=PROKKA_00145;product=Outer membrane protein TolC precursor +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 38142 38885 . - 0 ID=PROKKA_00146;gene=nixA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q48262;locus_tag=PROKKA_00146;product=High-affinity nickel-transport protein NixA +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 39449 39853 . + 0 ID=PROKKA_00147;gene=divK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7BBW0;locus_tag=PROKKA_00147;product=Polar-differentiation response regulator DivK +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 39994 40536 . + 0 ID=PROKKA_00148;gene=mntP;inference=ab initio prediction:Prodigal:2.60,protein motif:HAMAP:MF_01521;locus_tag=PROKKA_00148;product=manganese efflux pump MntP +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 40553 41029 . + 0 ID=PROKKA_00149;eC_number=4.1.2.50;gene=queD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O31676;locus_tag=PROKKA_00149;product=6-carboxy-5%2C6%2C7%2C8-tetrahydropterin synthase +NODE_2_length_41759_cov_213.726_ID_3 Prodigal:2.60 CDS 41193 41717 . + 0 ID=PROKKA_00150;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03544.8;locus_tag=PROKKA_00150;product=Gram-negative bacterial tonB protein +NODE_39_length_1472_cov_66.0355_ID_77 Prodigal:2.60 CDS 184 1179 . - 0 ID=PROKKA_00151;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF04468.6;locus_tag=PROKKA_00151;note=PSP1 C-terminal conserved region;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 22 402 . + 0 ID=PROKKA_00152;eC_number=2.1.1.-;gene=adaA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P19219;locus_tag=PROKKA_00152;product=Bifunctional transcriptional activator/DNA repair enzyme AdaA +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 462 1367 . + 0 ID=PROKKA_00153;gene=yddG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P46136;locus_tag=PROKKA_00153;product=Aromatic amino acid exporter YddG +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 1387 1605 . - 0 ID=PROKKA_00154;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00154;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 1703 2098 . - 0 ID=PROKKA_00155;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00155;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 2529 3689 . + 0 ID=PROKKA_00156;eC_number=2.1.1.79;gene=cfa_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A9H7;locus_tag=PROKKA_00156;product=Cyclopropane-fatty-acyl-phospholipid synthase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 3856 4269 . + 0 ID=PROKKA_00157;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00157;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 4476 5345 . - 0 ID=PROKKA_00158;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00158;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 5359 5838 . - 0 ID=PROKKA_00159;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00159;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 6601 6888 . + 0 ID=PROKKA_00160;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00160;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 6942 8393 . - 0 ID=PROKKA_00161;eC_number=2.7.13.3;gene=yycG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q45614;locus_tag=PROKKA_00161;product=Sensor histidine kinase YycG +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 8380 9132 . - 0 ID=PROKKA_00162;gene=cusR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0ACZ8;locus_tag=PROKKA_00162;product=Transcriptional regulatory protein CusR +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 9334 10113 . - 0 ID=PROKKA_00163;eC_number=5.3.1.22;gene=hyi;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P30147;locus_tag=PROKKA_00163;product=Hydroxypyruvate isomerase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 10122 11030 . - 0 ID=PROKKA_00164;eC_number=1.1.1.60;gene=glxR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P77161;locus_tag=PROKKA_00164;product=2-hydroxy-3-oxopropionate reductase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 11133 11999 . - 0 ID=PROKKA_00165;gene=rpoH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0AGB3;locus_tag=PROKKA_00165;product=RNA polymerase sigma factor RpoH +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 12280 13257 . - 0 ID=PROKKA_00166;gene=ftsX;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0AC30;locus_tag=PROKKA_00166;product=Cell division protein FtsX +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 13288 13977 . - 0 ID=PROKKA_00167;gene=ftsE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A9R7;locus_tag=PROKKA_00167;product=Cell division ATP-binding protein FtsE +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 14018 15292 . - 0 ID=PROKKA_00168;gene=ftsY;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P10121;locus_tag=PROKKA_00168;product=Signal recognition particle receptor FtsY +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 15463 16119 . + 0 ID=PROKKA_00169;eC_number=2.1.1.171;gene=rsmD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0ADX9;locus_tag=PROKKA_00169;product=Ribosomal RNA small subunit methyltransferase D +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 16283 17215 . + 0 ID=PROKKA_00170;eC_number=3.1.1.3;gene=lip3;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P24640;locus_tag=PROKKA_00170;product=Lipase 3 precursor +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 17373 18560 . - 0 ID=PROKKA_00171;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF00487.18;locus_tag=PROKKA_00171;product=Fatty acid desaturase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 18737 18829 . + 0 ID=PROKKA_00172;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00172;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 18845 19657 . + 0 ID=PROKKA_00173;eC_number=3.2.2.23;gene=mutM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P05523;locus_tag=PROKKA_00173;product=Formamidopyrimidine-DNA glycosylase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 20002 20370 . + 0 ID=PROKKA_00174;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00174;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 20450 21187 . + 0 ID=PROKKA_00175;eC_number=3.1.3.5;gene=yrfG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P64636;locus_tag=PROKKA_00175;product=GMP/IMP nucleotidase YrfG +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 21284 21697 . + 0 ID=PROKKA_00176;gene=hslR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0ACG8;locus_tag=PROKKA_00176;product=Heat shock protein 15 +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 21761 22618 . + 0 ID=PROKKA_00177;gene=hslO;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A6Y5;locus_tag=PROKKA_00177;product=33 kDa chaperonin +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 23057 24595 . + 0 ID=PROKKA_00178;eC_number=4.1.1.49;gene=pckA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q3KJP4;locus_tag=PROKKA_00178;product=Phosphoenolpyruvate carboxykinase [ATP] +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 24776 26881 . - 0 ID=PROKKA_00179;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF06808.6;locus_tag=PROKKA_00179;product=DctM-like transporters +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 26992 28029 . - 0 ID=PROKKA_00180;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00180;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 28105 29163 . - 0 ID=PROKKA_00181;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00181;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 29400 29696 . + 0 ID=PROKKA_00182;gene=hup;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A3H0;locus_tag=PROKKA_00182;product=DNA-binding protein HU +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 29965 30213 . - 0 ID=PROKKA_00183;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00183;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 30339 31634 . - 0 ID=PROKKA_00184;eC_number=3.5.2.3;gene=pyrC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O66990;locus_tag=PROKKA_00184;product=Dihydroorotase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 31631 32653 . - 0 ID=PROKKA_00185;eC_number=2.1.3.2;gene=pyrB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P56585;locus_tag=PROKKA_00185;product=Aspartate carbamoyltransferase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 32684 33229 . - 0 ID=PROKKA_00186;gene=pyrR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q1I3U3;locus_tag=PROKKA_00186;product=Bifunctional protein PyrR +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 33244 33708 . - 0 ID=PROKKA_00187;eC_number=3.1.-.-;gene=yqgF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A8I1;locus_tag=PROKKA_00187;product=Putative Holliday junction resolvase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 33701 34258 . - 0 ID=PROKKA_00188;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK00228;locus_tag=PROKKA_00188;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 34309 35184 . - 0 ID=PROKKA_00189;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK10819;locus_tag=PROKKA_00189;product=transport protein TonB +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 35335 36288 . - 0 ID=PROKKA_00190;eC_number=6.3.2.3;gene=gshB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P04425;locus_tag=PROKKA_00190;product=Glutathione synthetase +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 36849 37244 . + 0 ID=PROKKA_00191;gene=phoP;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P13792;locus_tag=PROKKA_00191;product=Alkaline phosphatase synthesis transcriptional regulatory protein PhoP +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 37286 37648 . + 0 ID=PROKKA_00192;gene=pleD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9A5I5;locus_tag=PROKKA_00192;product=Response regulator PleD +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 37680 37982 . + 0 ID=PROKKA_00193;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF01584.13;locus_tag=PROKKA_00193;product=CheW-like domain protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 38033 38290 . + 0 ID=PROKKA_00194;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00194;product=hypothetical protein +NODE_3_length_39215_cov_155.142_ID_5 Prodigal:2.60 CDS 38383 38484 . - 0 ID=PROKKA_00195;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00195;product=hypothetical protein +NODE_42_length_1523_cov_23.4108_ID_83 Prodigal:2.60 CDS 344 865 . - 0 ID=PROKKA_00196;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00196;product=hypothetical protein +NODE_42_length_1523_cov_23.4108_ID_83 Prodigal:2.60 CDS 942 1307 . - 0 ID=PROKKA_00197;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF00665.20;locus_tag=PROKKA_00197;product=Integrase core domain protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 732 833 . + 0 ID=PROKKA_00198;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00198;product=hypothetical protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 926 1183 . - 0 ID=PROKKA_00199;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00199;product=hypothetical protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 1343 1768 . + 0 ID=PROKKA_00200;gene=livH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0AEX7;locus_tag=PROKKA_00200;product=High-affinity branched-chain amino acid transport system permease protein LivH +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 1778 2707 . + 0 ID=PROKKA_00201;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK11301;locus_tag=PROKKA_00201;product=leucine/isoleucine/valine transporter permease subunit +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 2704 4182 . + 0 ID=PROKKA_00202;gene=livF_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P22731;locus_tag=PROKKA_00202;product=High-affinity branched-chain amino acid transport ATP-binding protein LivF +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 4172 4288 . + 0 ID=PROKKA_00203;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00203;product=hypothetical protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 4254 5360 . + 0 ID=PROKKA_00204;eC_number=1.1.1.1;gene=adhD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O53303;locus_tag=PROKKA_00204;product=Putative alcohol dehydrogenase D +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 5370 6791 . + 0 ID=PROKKA_00205;eC_number=1.2.1.83;gene=ald;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:H8ZPX2;locus_tag=PROKKA_00205;product=3-succinoylsemialdehyde-pyridine dehydrogenase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 6879 7298 . - 0 ID=PROKKA_00206;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00206;product=hypothetical protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 7437 9014 . - 0 ID=PROKKA_00207;eC_number=6.2.1.3;gene=lcfB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O07610;locus_tag=PROKKA_00207;product=Long-chain-fatty-acid--CoA ligase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 9014 10165 . - 0 ID=PROKKA_00208;eC_number=1.3.99.-;gene=mmgC_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45857;locus_tag=PROKKA_00208;product=Acyl-CoA dehydrogenase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 10191 11012 . - 0 ID=PROKKA_00209;gene=pcaR_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q52154;locus_tag=PROKKA_00209;product=Pca regulon regulatory protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 11194 12396 . + 0 ID=PROKKA_00210;eC_number=2.3.1.174;gene=pcaF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q43974;locus_tag=PROKKA_00210;product=Beta-ketoadipyl-CoA thiolase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 12488 13657 . + 0 ID=PROKKA_00211;eC_number=1.3.99.-;gene=acdA_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45867;locus_tag=PROKKA_00211;product=Acyl-CoA dehydrogenase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 13749 15512 . - 0 ID=PROKKA_00212;eC_number=2.2.1.6;gene=ilvG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P66946;locus_tag=PROKKA_00212;product=Acetolactate synthase large subunit IlvG +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 15531 16748 . - 0 ID=PROKKA_00213;eC_number=3.1.1.-;gene=estB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9KX40;locus_tag=PROKKA_00213;product=Esterase EstB +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 16927 18570 . - 0 ID=PROKKA_00214;eC_number=6.2.1.3;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q5SKN9;locus_tag=PROKKA_00214;product=Long-chain-fatty-acid--CoA ligase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 18600 19049 . - 0 ID=PROKKA_00215;eC_number=3.1.-.-;gene=ydiI;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P77781;locus_tag=PROKKA_00215;product=Esterase YdiI +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 19150 20052 . - 0 ID=PROKKA_00216;eC_number=1.1.1.340;gene=ptlF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q82IY9;locus_tag=PROKKA_00216;product=1-deoxy-11-beta-hydroxypentalenate dehydrogenase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 20210 21127 . + 0 ID=PROKKA_00217;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK07531;locus_tag=PROKKA_00217;product=bifunctional 3-hydroxyacyl-CoA dehydrogenase/thioesterase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 21359 22330 . + 0 ID=PROKKA_00218;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00218;product=hypothetical protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 22582 23400 . + 0 ID=PROKKA_00219;gene=pcaR_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q52154;locus_tag=PROKKA_00219;product=Pca regulon regulatory protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 23458 24849 . - 0 ID=PROKKA_00220;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00220;product=hypothetical protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 24962 26632 . - 0 ID=PROKKA_00221;eC_number=6.2.1.3;gene=lcfB_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O07610;locus_tag=PROKKA_00221;product=Long-chain-fatty-acid--CoA ligase +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 27037 27933 . - 0 ID=PROKKA_00222;gene=pstA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P07654;locus_tag=PROKKA_00222;product=Phosphate transport system permease protein PstA +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 28016 28972 . - 0 ID=PROKKA_00223;gene=pstC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0AGH8;locus_tag=PROKKA_00223;product=Phosphate transport system permease protein PstC +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 29060 30142 . - 0 ID=PROKKA_00224;gene=pstS1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P15712;locus_tag=PROKKA_00224;product=Phosphate-binding protein PstS 1 precursor +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 30554 31879 . - 0 ID=PROKKA_00225;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF01609.15;locus_tag=PROKKA_00225;product=Transposase DDE domain protein +NODE_4_length_32829_cov_185.347_ID_7 Prodigal:2.60 CDS 32377 32736 . - 0 ID=PROKKA_00226;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00226;product=hypothetical protein +NODE_50_length_1320_cov_1878.61_ID_99 Prodigal:2.60 CDS 137 394 . + 0 ID=PROKKA_00227;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00227;product=hypothetical protein +NODE_50_length_1320_cov_1878.61_ID_99 Prodigal:2.60 CDS 487 588 . - 0 ID=PROKKA_00228;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00228;product=hypothetical protein +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 132 797 . + 0 ID=PROKKA_00229;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF09140.5;locus_tag=PROKKA_00229;product=ATPase MipZ +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 1047 2429 . + 0 ID=PROKKA_00230;eC_number=3.2.1.179;gene=ugl;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9RC92;locus_tag=PROKKA_00230;product=Unsaturated glucuronyl hydrolase +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 2574 3893 . - 0 ID=PROKKA_00231;gene=prsE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7ANN5;locus_tag=PROKKA_00231;product=Type I secretion system membrane fusion protein PrsE +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 3900 6170 . - 0 ID=PROKKA_00232;gene=apxIB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P26760;locus_tag=PROKKA_00232;product=Toxin RTX-I translocation ATP-binding protein +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 6298 7701 . - 0 ID=PROKKA_00233;gene=bepC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q8G0Y6;locus_tag=PROKKA_00233;product=Outer membrane efflux protein BepC precursor +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 8495 28303 . + 0 ID=PROKKA_00234;gene=cya;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0DKX7;locus_tag=PROKKA_00234;product=Bifunctional hemolysin/adenylate cyclase precursor +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 28356 29495 . - 0 ID=PROKKA_00235;eC_number=3.5.1.18;gene=dapE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99SN6;locus_tag=PROKKA_00235;product=putative succinyl-diaminopimelate desuccinylase +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 29500 30261 . - 0 ID=PROKKA_00236;gene=livF_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P22731;locus_tag=PROKKA_00236;product=High-affinity branched-chain amino acid transport ATP-binding protein LivF +NODE_5_length_30845_cov_61.6996_ID_9 Prodigal:2.60 CDS 30282 30755 . - 0 ID=PROKKA_00237;eC_number=3.6.3.-;gene=lptB_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45073;locus_tag=PROKKA_00237;product=Lipopolysaccharide export system ATP-binding protein LptB +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 732 833 . + 0 ID=PROKKA_00238;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00238;product=hypothetical protein +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 926 1183 . - 0 ID=PROKKA_00239;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00239;product=hypothetical protein +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 1234 1665 . - 0 ID=PROKKA_00240;eC_number=1.4.1.13;gene=gltB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O34399;locus_tag=PROKKA_00240;product=Glutamate synthase [NADPH] small chain +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 1701 6245 . - 0 ID=PROKKA_00241;eC_number=1.4.7.1;gene=gltB_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P55037;locus_tag=PROKKA_00241;product=Ferredoxin-dependent glutamate synthase 1 +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 6766 7992 . + 0 ID=PROKKA_00242;eC_number=3.1.3.3;gene=serB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:A0QJI1;locus_tag=PROKKA_00242;product=Phosphoserine phosphatase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 8237 9340 . + 0 ID=PROKKA_00243;eC_number=6.3.4.18;gene=purK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A695;locus_tag=PROKKA_00243;product=N5-carboxyaminoimidazole ribonucleotide synthase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 9325 9816 . + 0 ID=PROKKA_00244;eC_number=5.4.99.18;gene=purE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9WYS7;locus_tag=PROKKA_00244;product=N5-carboxyaminoimidazole ribonucleotide mutase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 9839 10951 . - 0 ID=PROKKA_00245;eC_number=2.1.1.79;gene=cfa_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A9H7;locus_tag=PROKKA_00245;product=Cyclopropane-fatty-acyl-phospholipid synthase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 12207 15476 . - 0 ID=PROKKA_00246;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9RDE2;locus_tag=PROKKA_00246;note=Tricorn protease homolog 1;product=hypothetical protein +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 15757 16602 . + 0 ID=PROKKA_00247;eC_number=3.1.3.15;gene=hisK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O34411;locus_tag=PROKKA_00247;product=Histidinol-phosphatase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 17064 17804 . + 0 ID=PROKKA_00248;eC_number=2.7.6.1;gene=prs;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A717;locus_tag=PROKKA_00248;product=Ribose-phosphate pyrophosphokinase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 17827 18465 . + 0 ID=PROKKA_00249;gene=rplY;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A7B3;locus_tag=PROKKA_00249;product=50S ribosomal protein L25 +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 18727 19290 . + 0 ID=PROKKA_00250;eC_number=3.1.1.29;gene=pth;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q6YP15;locus_tag=PROKKA_00250;product=Peptidyl-tRNA hydrolase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 19760 20014 . - 0 ID=PROKKA_00251;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00251;product=hypothetical protein +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 20798 21631 . + 0 ID=PROKKA_00252;eC_number=2.3.1.180;gene=fabH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O67185;locus_tag=PROKKA_00252;product=3-oxoacyl-[acyl-carrier-protein] synthase 3 +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 21788 22663 . - 0 ID=PROKKA_00253;gene=ccpA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P25144;locus_tag=PROKKA_00253;product=Catabolite control protein A +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 23170 25830 . + 0 ID=PROKKA_00254;gene=btuB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P06129;locus_tag=PROKKA_00254;product=Vitamin B12 transporter BtuB precursor +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 26077 27687 . + 0 ID=PROKKA_00255;gene=sglT;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P96169;locus_tag=PROKKA_00255;product=Sodium/glucose cotransporter +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 27696 28892 . + 0 ID=PROKKA_00256;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF04853.6;locus_tag=PROKKA_00256;product=Plant neutral invertase +NODE_6_length_29555_cov_90.739_ID_11 Prodigal:2.60 CDS 28910 29488 . + 0 ID=PROKKA_00257;eC_number=2.7.1.48;gene=udk;inference=ab initio prediction:Prodigal:2.60,protein motif:HAMAP:MF_00551;locus_tag=PROKKA_00257;product=Uridine kinase +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 732 833 . + 0 ID=PROKKA_00258;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00258;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 926 1252 . - 0 ID=PROKKA_00259;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00259;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 1234 1539 . - 0 ID=PROKKA_00260;gene=engB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9X1H7;locus_tag=PROKKA_00260;product=putative GTP-binding protein EngB +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 1593 2258 . + 0 ID=PROKKA_00261;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF01902.11;locus_tag=PROKKA_00261;product=ATP-binding region +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 2342 3202 . + 0 ID=PROKKA_00262;eC_number=1.1.1.157;gene=mmgB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P45856;locus_tag=PROKKA_00262;product=putative 3-hydroxybutyryl-CoA dehydrogenase +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 3308 4234 . + 0 ID=PROKKA_00263;eC_number=3.-.-.-;gene=yycJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:C0SP91;locus_tag=PROKKA_00263;product=Putative metallo-hydrolase YycJ +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 4333 4797 . + 0 ID=PROKKA_00264;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00264;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 4884 5828 . - 0 ID=PROKKA_00265;eC_number=1.3.1.14;gene=pyrD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P25996;locus_tag=PROKKA_00265;product=Dihydroorotate dehydrogenase B (NAD(+))%2C catalytic subunit +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 6018 6845 . + 0 ID=PROKKA_00266;eC_number=3.1.3.12;gene=otsB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P31678;locus_tag=PROKKA_00266;product=Trehalose-6-phosphate phosphatase +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 6842 8281 . + 0 ID=PROKKA_00267;eC_number=2.4.1.-;gene=otsA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:A0R4M9;locus_tag=PROKKA_00267;product=Trehalose-phosphate synthase +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 8386 8730 . - 0 ID=PROKKA_00268;gene=rsbV;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9WVX8;locus_tag=PROKKA_00268;product=Anti-sigma-B factor antagonist +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 8727 9185 . - 0 ID=PROKKA_00269;eC_number=2.7.11.1;gene=rsbW;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P17904;locus_tag=PROKKA_00269;product=Serine-protein kinase RsbW +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 9429 10718 . + 0 ID=PROKKA_00270;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q5SMG8;locus_tag=PROKKA_00270;product=Magnesium transporter MgtE +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 10756 11571 . - 0 ID=PROKKA_00271;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00271;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 11886 12218 . + 0 ID=PROKKA_00272;gene=mntH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99UZ7;locus_tag=PROKKA_00272;product=Divalent metal cation transporter MntH +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 12270 12980 . - 0 ID=PROKKA_00273;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00273;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 13422 13901 . + 0 ID=PROKKA_00274;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF08327.5;locus_tag=PROKKA_00274;note=Activator of Hsp90 ATPase homolog 1-like protein;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 13907 14233 . + 0 ID=PROKKA_00275;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O53478;locus_tag=PROKKA_00275;product=HTH-type transcriptional regulator +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 14336 14578 . - 0 ID=PROKKA_00276;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00276;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 14727 16028 . - 0 ID=PROKKA_00277;eC_number=2.5.1.19;gene=aroA1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9KCA6;locus_tag=PROKKA_00277;product=3-phosphoshikimate 1-carboxyvinyltransferase 1 +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 16336 17046 . + 0 ID=PROKKA_00278;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00278;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 17066 18877 . - 0 ID=PROKKA_00279;eC_number=3.1.3.3;gene=rsbU;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P40399;locus_tag=PROKKA_00279;product=Phosphoserine phosphatase RsbU +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 19097 19654 . - 0 ID=PROKKA_00280;eC_number=2.4.2.8;gene=hpt;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99085;locus_tag=PROKKA_00280;product=Hypoxanthine-guanine phosphoribosyltransferase +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 19780 20622 . + 0 ID=PROKKA_00281;eC_number=2.4.99.-;gene=lgt;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P60962;locus_tag=PROKKA_00281;product=Prolipoprotein diacylglyceryl transferase +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 20873 21556 . - 0 ID=PROKKA_00282;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00282;product=hypothetical protein +NODE_7_length_23944_cov_113.632_ID_13 Prodigal:2.60 CDS 21579 23522 . + 0 ID=PROKKA_00283;eC_number=2.7.7.7;gene=polA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P00582;locus_tag=PROKKA_00283;product=DNA polymerase I +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 67 519 . - 0 ID=PROKKA_00284;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00284;product=hypothetical protein +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 744 3548 . + 0 ID=PROKKA_00285;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00285;product=hypothetical protein +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 3736 4218 . + 0 ID=PROKKA_00286;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00286;product=hypothetical protein +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 4221 5591 . + 0 ID=PROKKA_00287;eC_number=3.-.-.-;gene=blh;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q8UAA9;locus_tag=PROKKA_00287;product=Beta-lactamase hydrolase-like protein +NODE_8_length_21143_cov_115.55_ID_15 Aragorn:1.2 tRNA 5694 5770 . - 0 ID=PROKKA_00288;inference=COORDINATES:profile:Aragorn:1.2;locus_tag=PROKKA_00288;product=tRNA-Arg(ccg) +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 5835 6365 . - 0 ID=PROKKA_00289;gene=rfaH_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q8FBI4;locus_tag=PROKKA_00289;product=Transcription antitermination protein RfaH +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 6377 8167 . - 0 ID=PROKKA_00290;eC_number=2.7.3.9;gene=ptsI;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P08838;locus_tag=PROKKA_00290;product=Phosphoenolpyruvate-protein phosphotransferase +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 8130 8537 . - 0 ID=PROKKA_00291;eC_number=2.7.11.-;gene=ptsH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9EYQ9;locus_tag=PROKKA_00291;product=Phosphocarrier protein HPr +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 8687 8983 . - 0 ID=PROKKA_00292;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF00076.16;locus_tag=PROKKA_00292;product=RNA recognition motif. (a.k.a. RRM%2C RBD%2C or RNP domain) +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 9080 9382 . - 0 ID=PROKKA_00293;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF00076.16;locus_tag=PROKKA_00293;product=RNA recognition motif. (a.k.a. RRM%2C RBD%2C or RNP domain) +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 9477 9779 . - 0 ID=PROKKA_00294;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF00076.16;locus_tag=PROKKA_00294;product=RNA recognition motif. (a.k.a. RRM%2C RBD%2C or RNP domain) +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 10566 14414 . + 0 ID=PROKKA_00295;eC_number=3.2.1.1;gene=amyB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P14898;locus_tag=PROKKA_00295;product=Alpha-amylase 2 +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 14411 15214 . + 0 ID=PROKKA_00296;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00296;product=hypothetical protein +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 15244 16038 . - 0 ID=PROKKA_00297;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00297;product=hypothetical protein +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 16239 17096 . + 0 ID=PROKKA_00298;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00298;product=hypothetical protein +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 17104 18843 . + 0 ID=PROKKA_00299;eC_number=2.6.1.85;gene=pabB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P28820;locus_tag=PROKKA_00299;product=Aminodeoxychorismate synthase component 1 +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 18983 19120 . + 0 ID=PROKKA_00300;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00300;product=hypothetical protein +NODE_8_length_21143_cov_115.55_ID_15 Prodigal:2.60 CDS 19306 21012 . - 0 ID=PROKKA_00301;eC_number=4.1.1.32;gene=pckG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9AEM1;locus_tag=PROKKA_00301;product=Phosphoenolpyruvate carboxykinase [GTP] +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 732 833 . + 0 ID=PROKKA_00302;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00302;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 926 1183 . - 0 ID=PROKKA_00303;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00303;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 1234 1335 . - 0 ID=PROKKA_00304;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00304;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 1474 2625 . - 0 ID=PROKKA_00305;gene=braC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P21175;locus_tag=PROKKA_00305;product=Leucine-%2C isoleucine-%2C valine-%2C threonine-%2C and alanine-binding protein precursor +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 2723 3154 . - 0 ID=PROKKA_00306;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK11895;locus_tag=PROKKA_00306;product=acetolactate synthase 3 regulatory subunit +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 3194 4231 . - 0 ID=PROKKA_00307;eC_number=1.2.1.38;gene=argC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9X2A2;locus_tag=PROKKA_00307;product=N-acetyl-gamma-glutamyl-phosphate reductase +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 4476 5189 . - 0 ID=PROKKA_00308;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00308;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 5186 6547 . - 0 ID=PROKKA_00309;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK11788;locus_tag=PROKKA_00309;product=tetratricopeptide repeat protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 6580 7500 . - 0 ID=PROKKA_00310;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF01734.1;locus_tag=PROKKA_00310;product=Patatin-like phospholipase +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 7519 8913 . - 0 ID=PROKKA_00311;eC_number=3.4.21.107;gene=mucD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q4KGQ4;locus_tag=PROKKA_00311;product=putative periplasmic serine endoprotease DegP-like precursor +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 9020 9262 . + 0 ID=PROKKA_00312;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00312;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 9345 9914 . - 0 ID=PROKKA_00313;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF07879.5;locus_tag=PROKKA_00313;product=PHB/PHA accumulation regulator DNA-binding domain protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 10479 12176 . + 0 ID=PROKKA_00314;eC_number=2.3.1.20;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06795;locus_tag=PROKKA_00314;product=Putative diacyglycerol O-acyltransferase/MT1809 +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 12271 13407 . - 0 ID=PROKKA_00315;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK10818;locus_tag=PROKKA_00315;product=cell division inhibitor MinD +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 13404 14444 . - 0 ID=PROKKA_00316;eC_number=3.6.3.16;gene=arsA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P08690;locus_tag=PROKKA_00316;product=Arsenical pump-driving ATPase +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 14531 15148 . - 0 ID=PROKKA_00317;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF07879.5;locus_tag=PROKKA_00317;product=PHB/PHA accumulation regulator DNA-binding domain protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 15353 15904 . - 0 ID=PROKKA_00318;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00318;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 16373 17275 . + 0 ID=PROKKA_00319;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK10279;locus_tag=PROKKA_00319;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 17361 17648 . + 0 ID=PROKKA_00320;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00320;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 17663 18685 . - 0 ID=PROKKA_00321;eC_number=4.2.3.4;gene=aroB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9KNV2;locus_tag=PROKKA_00321;product=3-dehydroquinate synthase +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 18710 19732 . - 0 ID=PROKKA_00322;eC_number=2.5.1.54;gene=aroF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q9WYH8;locus_tag=PROKKA_00322;product=Phospho-2-dehydro-3-deoxyheptonate aldolase +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 20103 20210 . + 0 ID=PROKKA_00323;inference=ab initio prediction:Prodigal:2.60;locus_tag=PROKKA_00323;product=hypothetical protein +NODE_9_length_20531_cov_182.121_ID_17 Prodigal:2.60 CDS 20207 20464 . + 0 ID=PROKKA_00324;gene=fadR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0A8V6;locus_tag=PROKKA_00324;product=Fatty acid metabolism regulator protein +##FASTA +>NODE_10_length_13322_cov_17.9253_ID_19 +GGGCGTCGCGCAAGCCGTCGCCCAGCAGGTTGATGGCCAGGATGGTTAGCATGATGGCCA +GACCGGGCAGGGTGACCAGCCACCAGGCGTCGGTGATGTATGCGCGTCCCTCGGCCAACA +TGCGGCCCCAGGTGGGCTGGGGCGGGGGCACCCCCATGCCGAGGAAGGAGAGTCCGCTCT +CCCAGATGATGATATTGGCCACCCGCACCGTGCCCAGCACCAAAAGCGGCGAGAGGATGT +TGGGCAGGATGTGGCGGCGGATGATGGTGCGGTTCCGGGTGCCCAGGCTGTGGGCCGCCT +TGACGAACTCCTTCTCCTTCAGGGCCAGCACGCTGCCGCGCACCGTGCGGGCGTAGGTCA +CCCAGTCGGAGACGCCCAGCACCAGGATGAGGTTGAAGATGGAGTTGCCCAGCACGGCGA +TGATGGAGATGGTGAGCAACATGAAGGGATAGGCCATGAACACGTCGGCGATGCGCATGA +TGATGGAATCGGTGCGCCCGCCGTAGTAGCCCGACAAGAGCCCCAGCACGATGCCCAGGG +CCTCGGCCACCAGCACCGAGGCGAACCCCACCAGCAGGCTGACCCGGCTGCCGTAGAGTA +TGCGGGCCAGCACGTCGCGCCCCAGGTTGTCGGTCCCCAAAGGATGCGCCAGGCTGCCGC +CGGCCTGCCAGGCAGGCGGCGTGAGGCGGGCCATGAGGTCCTGGGCCAGCGGGTCCACCG +GGTAGTAGAGGGGAAAGGTCAGGCTGATGCCCACGATGAGCGCCAGCATGATCCCCCCCA +CCACCGCGCTGCGGGAGCGTTTCAGCTCGGTCAGGGTGTCGGCCCATAGGCGCAGGGTGG +GGACCAGGGCGGCTACGCGCAGAGTCATTTCTCCTCCAGATAGCTGATCTGGGGGTCCAG +GTAGACGTACAGCAGGTCCACCAACAGGTTGACCAGCACAAACAGCACCGCCAGCACCAG +CACCGTGGCCTGCACCAGGGGATAGTCGCGGTTGAAGATGGCCTGCACCACCAACCGGCC +CACCCCCGGCCAGGCAAAGACGGTCTCGATGATCACCGTCCCCCCGATGAGAAAGCCGAA +CTGCATGCCAGCCACGGTGACCAAGGGGATCAGCGCGTTCTTGAGCGCGTGCTTGCCGAT +GACCAGGCGTTCCTTGAGCCCCTTGGCCCGGGCGGTGCGCACGTAGTCCTGGCCCAGCAC +CTCCAGCATCACCGAGCGGGTGAGCCGGGCGAACATGGCCATGAGGCTCATGCCCAGGGC +CAGCGAGGGCATGATCAGGTGCTCCCAGCCGCCGCGGCCCGAGGTGGGCAGCCAGCCCAG +CTTCACCGAAAAGAGCAGGATCAACATGATCCCCAGCCAGAAATGCGGCATGGAAAGCCC +GAACAACGCCCCCAGCATGGAGCCCAGGTCGATGAAGGAGCCGCGTTTCACCGCCGAGAG +GATCCCCAGCGGGATCGCCACCACCAGGGCGATGGCCATGGCCGCCAGGGCCAGTTCCAG +GCTGGCTGGCAGACGCTCCAGGATCAGCTCCATGGCCGGCACATGGTAGTAGAGCGACTC +GCCGAAGTCGCCTTGTACGGCCTTGGTGGCAAATCGCCAGTACTGCACGTAGAGCGGGTC +GTTGAACCCGAGTTGCTGGCGCAGCTCCTCGATCTCCTGGTGCGAGGCGTCGGCCGGCAG +CATGAGCATCACCGGATCGCCCGAGAGGTGGATGAAGAAGAAGGCGATCACCGAGATGCC +CACCACCACGTAGACCGTGTGCCAGAGCCTTTTCAGGATATAGCGTCCCATGCCGCTTCC +GCCGGGCGGGCCGGGGGGCGAGCCCCCGGCCCGGTACCGGGTCTTGAGGTTGTTACTTGG +GCTTGGCGGTACGCATGTCCAGGAAGCCGTCGCGACGGCCCTGGTAGGCCACCTGGTTGC +TCACCGCGAACACCTCGGGGATCACGTAGAGGTAGACCCAGGGCGCGTCGTTGTAGAGCA +GCCGGTCGATCTTGTGGTAGACCTCGGCCCGTTTCTTGCGGTCCAGGATGGTGCGGCCCA +GGTCCAAGAGTTTGTCCACCTCGGGGTTCTTGTAGTAGTAGTCGCGCACCTTGGAATTGA +ACAGCGAGTACATGTAGCGGTCCGGCTCCGGGGCGTCGTCCCACCAGTAGATGTACATGG +CGTGCTTGCGGGCCTTGTAGCGCTGCCACATCACCGCCCGCTCGTAGGGCTCCAGCCTGA +CCTTCACACCTACCTTGGCCAGATCGGCCGCGATGGCCTCGGCCTGCTCCTGGATCTGGG +ACATGTAGGTGGGATAGGCCAGCCGGGTCTGGAAGCCGTTGGGATAGCCAGCCTCGGCCA +AAAGCTTCTTGGCCTTGGCCGGATCGTAGGGATAGGGCTTGAGGTTGGGGTCGTGCCCGA +AAGTGCGGGGGCTGATGGGCCCGGCGCAGAGGATGGCCTTGCCGCCAAAGAGCGCCTTGT +TGATCAGCTCGCGGTTGACGGCGTAGTTCATGGCCTGGCGCACCCGCACGTCATTGAAGG +GCGGCTTGTAGGTGTTGAGCCCCAGGTAGGGCATCACGCCCATCTGGTTGGTGAGGTAGG +CCTTGCCCGAGGCCAGGATCCTCTTGCGCTGGTGGATGGACACGCCGCTGATCACGTCGG +CCTCGCCAGTCAGAAGCGCGGCGATGCGCGAGGCCTCCTCGGGCACGCCCTTGAAGATCA +CCTTCTTGAAGGCCGGCTTGGGGCCGTAGTACTTGTCGTAGGCGGTGAGCACGATCTCCT +CGCCGCGCACCCACTTCTCCAGCTTGTAGGGCCCGCAGCCGATGGGGTTGGTGTTGTACT +TGGCGTCGCCCACCTTCTTGATGTAGCCCGGCGGCACGATGGGCAGGTAGTAGCCCAGGA +TGTAGAGCCCCGGCGCGTAGGGTTCCTTGGTGGAGATGAGCAGGGTGTAGTCGTCCAGGA +CCTTCACCTCCTTGAAGGCGCTCAGCTTGCCCTTGTGGGGGTTCTTGATGCTGGGCTTGA +AGGACCGCTCGAAGGAGAACTTAACCGCCTGGGCGTTGACCGGCTCGCCGTTGTGGAAGG +TGGCGTCGCGGCGCAGGTGGATCTTCCAGGTCAGGTCGTCCACCTTCTCCCAGCTCTCGG +CCAACCGCGGCTTGACCGAGGCGTCCTCCTGGGGCGCGAAAAGGGTGTCGTGGATGTTGT +AGCAGACGCTCATGGTGGTCTGGATGGAACTGGTCATCAGGTCCAGCCCCACCGGCTCGG +CCTCCTGGATCACCACCAGCACGTCCTTTTTAGCCAGCGCCGGGCCGGCCAGGATGGCCA +GGCCCAACGCGGCCACGCCCAAGGCCAAACACAGTCTCTTGAATTTTTTCATCTGGTCCC +TCCTCCTGAGTATTGGTCCTGTGCCGGGCCCGGCCCGGCGTTATCCCCCCTCAACCGTTC +CGCCCGGGGCGGAGCGAACCTCGTCATACAGGTGGCAGGCCACCAGGTGGCCATCGGCCA +GCGGCCGCAGCTCCGGGGCCTGGTGCCGGCATATCTCCATGGCCCGGCCGCAGCGGGGAT +GGAAGTGGCAGCCGGACGGCGGGTCCAGGGGGCTGGGCACGTCGCCCTCCAGGATGATGC +GCCGGCGGCGGCGCCGGGGATTGGGCACCGGCACCGCCGAGAGCAGGGCCTGGGTATAGG +GATGCATGGGCCGAGCGTAGAGTTCCTTGACCCCAGCCTGCTCCACGATGCGCCCCAGGT +ACATCACCGCCACCCGCTGGCTGACGTAGCGGATCACCGCCAGGTCGTGGGAGACGAAGA +TGTAGGAGAGCCCCAACTGGCGCTGCAACCGGGAGAGCAGATTGAGCACCTGGGCCTGCA +CCGACACGTCCAGGGCCGATACCGGCTCGTCGCAGATGATCACCTTGGGCCGCAGCACCA +GGGCCCGGGCCAGGCAGATGCGCTGGCGCTGGCCGCCGGAGAACTCGTGGGGGAAACGCT +GGGCGTGCTCGGGCAAAAGCCCCACCATGGACAGGCCCTCGGCCACCAGCTCCCGCCGCC +GCGCCGCCGAAAGCTCCAGGTGATTCAGAAGCGGCTCCTCGACGATCTGGGCCACGGTCA +TGCGGGGATTCAGCGAGCCGAAGGGGTCCTGGAAGACGATCTGGATGTCGCGGCGCAGCC +TGCGCAGCTCCTCGCTGCCCGCGGCGGCCAGGTTGCGCCCCTGGAACCAGACCTCGCCGG +CGGTGGGGTCAATGAGACGCAGCAGGCAGAAGGCGATGGTGGACTTGCCGCAACCGCTCT +CGCCCACCAGCCCCAGGGTCTGGTTCTCCTCCAGGGTGAAGGAGACCCCGTCCACCGCGT +GCACCACCCCCTGCCGGCGGCCGAAGAAGCCCTGGCGCACCGGGAAGTGCTTGACCAGCT +CCTTTACGCGCAGAACCTCAGCCACGATCCACCCTCCAACAGGCCACCCGGTGCCCGGGT +CGCACCTCCACAAGCTCCGGCTCCTGCCTGGCGCAGCGCTCGTCGGCCAGGTGGCAGCGC +GGCGCGAAGTTGCAGCCCGGCGGTAGATCCAGCAGGCTGGGTACCACTCCCCTGATCTCG +TGCAGCTCCTTGTCCGCCAGCTCGGCCTCGGCGCTGGGGATGGCCTCCAATAGACCCCGG +GTGTAGGGGTGCAGGGGGTGGTCGAACAGCTCCACGGTGGGGGCCTGTTCCACGATGCGT +CCGGTGTACATCACCGCCAGGCGCTCGGTGGTCTCGGCCACCACCCCCAAATCGTGGGTG +ATGAGCACCACCGCGGTGCCCAGTTCATCGCGCAACTCCAGGATCAGCTCCAGGATCTGG +GCCTGGATGGTCACGTCCAGGGCGGTGGTGGGCTCGTCGGCCAAAAGCAGGGCCGGGCGG +CAGGCCAGGGCCATGGCGATCATCACCCGCTGGCGCATGCCGCCCGAGAGCTGATAGGGG +TACTCCCTGACCCGCCGCGCCGGGGCCGGTACCCCCACCATCTCCAGCATCTCCACCGCC +CGGCGCCGGGCCTCGCGGCGCGCCAGGCCCTGGTGCACCCGAAAGACCTCGCCGAGCTGG +TTGCCGATGGTGAAGACCGGATTCAGGGCGGTCATGGGCTCCTGGAAGATCATGCTGATC +TGGTTGCCGCGCAGCCGGCGCAGTTCCTCGGGGTCCATCTTCAGCAGGTCGCGCCCCTTG +AAGCGTATCTGCCCCCCGGCGAAGAAGGCCGGCGGCATGGGCAGAAGACGCAGCACCGAA +AGCGCCAGGGCGCTCTTGCCGCAGCCGCTCTCACCCACCAGGCCCAGTATCTCGCCCCGG +CCCACGGTGAGGTCCACACCGTCCACCGCCCGGGCCACGCCCTGGTTGGTCTGGAAACAG +GTCTTGAGCCCCTTGATCTCCAGGACGGGAGGGGAGGGCTCGGCTTTGGAAACGCCCGCG +TGACTGATGGCTGTCTCCCCGTTGCTGCTGGTTTTGCTGGAAAGGAGTCCCGCTGTGCCC +GGCGGCAGGATCCCTGCGGTCGCCGCCGGCAGCCCACCCGAATACGCACGCTTGTGAACA +TCATACTGTCAAGGGCGTCATATTCCTGTCAAGAATATTTTGCGGCCCGCAACGATTTTC +GGGCGCATTATTATTTCCAGATAGTGAACCCTCCGCCGGCCGGCGGGGACAAAGAACAAA +TTCATTCAACTAATTAGGGCCGAGCAGCAACATCAAATCAGGCTCACGGGGTCCACGTCC +ACCAAAAGCCGCACCCCAGCGGGCAGCGGCGGACTGCGGTGCAACCCCAGGCGCAGGGTG +CGCCCCGCCGCGGCGGCGGTGGGGGCCTTGAGCAGGATCATCCAGCGGTGGCGGGCCTTG +GCCCGGGGCAGGGCCGCCGGGGCCGGGCCCAGCACCCGGGCGCCGGGCTCCAGGCGCCGG +CGGGCCTCCTCCAGCCCGGCGGCCAGGGCCTGGGCCGCCCGCTGGCAGCGCCTGTCGTCC +ACCGCCTCCAGGCGCAGGGCCACCAGGCGCATGAAAGGCGGATAGCCCAGGGCGCGGCGC +TCGGCCAGCTCGGTCTGGTAGAACTCGTCGGGGCGCTGGGCCAGAGCGGCGCGCAGAGCG +TGGTGGTCGGGGTCGTAGGCCTGCACGATGACCCGGCTTTTGCCTCCCTGTCGCCCGGCG +CGGCCGGCCACCTGGGTGAGCAGGCCGTAGGCCCGCTCGCCGGCCCGGAAGTCGGGCAGG +GCCAGGGCCTGGTCGGCCGAGAGCACCCCCACCAGGCCGATGCCGGGGAAGTCGTGCCCC +TTGGTGATCATCTGGGTGCCCACCACCACCTCCACCCGCCGCTCGGCGATGGCGCGCAGA +AGTTCGCCCAGCCGGCGGGGGTCGCCGGCGGTGTCGCGGTCCAGGCGGGCGATGCGCGCA +CCGGGCAGGAGCTCGCCCAGCTTCTGGGCCACGGCCTCGGTGCCCAGGCCCAGGGGCCGC +AGCTTCTCCTCCCCGGCGCCGCAGGCGGGGCAGGTGCGGGGCCGCGGCCGCTGGTGGCCG +CAGACGTGGCAGACGAGCCGGTCGCTGCCCTGGTGCAGGGTGAGGCTCACCGCGCAGGCC +GGGCAGCCCACGGTCTGGCCGCAGGCGGTGCAGAGATAGGCCGGGGCGAAGCCACGGCGG +TTCAGGAAAAGGATGGCCTGCTCGCCGGCCTCGAGGGTCTGCTCCAGGGCCGCCAGCAGA +CGCCGGCTGAGGAAGCCCCCCACCAAGCGCCCCTCCCGACGCAGGTCCACCAGCTCCATG +CGCGGCAGGGGGGCCTCGCGCACTCGGCGCGGCAGGCGCAGGCAGACGGTGTTCCCCTCC +TGGGCGCGGTGGTAGGTGGTCACCGCCGGCGTGGCGGTGCCCAGCACCACCGGGCAGTCC +TGCTCGCGGCCCCGCAAAAGCGCCAGGTCCCGGGCGTGGTAGCGGAAGCGGTCCTCCTGC +TTGTAGGCCTCGTCCTGCTCCTCGTCCACGCAGATCACCCCCGGCTCCCGCAGGGGCGCA +AACACCGCCGAGCGCGCCCCCACCACCACCCGGGCCCGGCCCCGGGCGATGGCCAGCCAC +TGGCCGCGCCGCGCCGCGGGGCTGAGCCCCGAGTGCAGCACCGCCACCTGCCCGGCGCCG +AAGCGCTGGCGCAAAAGCCCCTCCAGGCGCAGGCACAAGCCTATCTCGGGCGTGAGCAGG +AGCGCCGTGCGCCCGGCCTCCAGGGCGGCCTTGACGCAGGCCATATACAGCTCGGTCTTG +CCCGAGCCGGTGACCCCGTGCAGCAGAAAGGACTTGAAGCCCCCGGAGTGCACCGCGGGC +AGCAGCTCATCCAGCGCCCGCTGCTGGTCCGGGGTGTAGTGCTCGGGCTCGGGCTCGGGC +AGGAGGGGACGGCCCAGCAGGTCCTTGACCAGGGGGCGGTGGCTGATGGTCACCCAGCCG +CGTTTCTCCAGGTCGCGGCACAGGGCCGCGGCGCGGGGAAACTCCTCCCGCAGCTCGGGC +AGGGGCAGAGGCCCCCGGGCCTTGAGCCGCCGCAGGATGCGCGCGGCCTGGCTCTCGGGC +CGGGGCAGGCGCGAGTCCTCCCCCCGGCGCCAGCTCACCACCGCCACCTGCTGGGTCTTG +GGGGCCAGGGCCTTGGCCGGCCGGGCCGAGCCCATCCCCGCCGGCAGGCACCAGGCCAGC +ACCTGCCCCAAGGGGACGTGGTAGTAGGCCGCGGCGCGCTGGAAAAAGGGCAGCAGCTCG +GGCGGCCACACCTGGGGGCCCTTGCCGTCCTCCAGCACGTCCAGCACCGGCTTGAGCGCG +TCCTGCCCCCCTCCTGCCGCCAGGGGTTCACCCAGGGCGAAGCCCAGGCGGGCGCCTCCC +CGAAGCGGCACCAGCAGCCGGGACAAGGGCTTCACCAGAGGCGCCAGCTCCGCGGGCACC +GCGTAGGTCAGCGGCTGCCACAGGGGCGCGGCCAGGGCTACGTCCACCATGAGGGTCATG +GCTGTAAAGGGTAGCAGAAAGGGCGGGGTGCAGAAAGCGGGGGATTGTTATTGACTGGGC +AGGGTATTTAGTTATAATGGTACTAAAATACTTATTAGGCAAGAACATGTCACATTGGTT +TAAGATATCAGAGGCATCCTCCTTGGCCCTGCACACCATGGCCTACCTGGCCGCCCACCC +GGGACGGCTCATCTCCAACCGGGTGATCGCCAGGGATCTGGGGGTCTCGGCGGCCCATCT +CTCCAAGGTCCTCCAGCGGCTGGCCCGCGCCGGGCTGCTGGAGTCCCTGCGCGGGCCCAC +GGGAGGGTTCCGCCTGGGGCGCCCGGCGGGCGAGATAAGCCTAATGGAGGTCTACGAGGC +CATCGACGGCAAGTTCCAACCTTCTTCCTGCCTGCTGGGACGGCCGGTCTGCCGGGGCGG +CAAATGCGTGCTGGGCGAACTGGGGCGCAACTTGGAGCGCCAGACGCGGGAATATCTTCT +GAATACCAAGCTGTCGGAGTTCGAGGACTTCATGTGTTTCGAGGAGGGAAACTGATGCCC +CTGGGCAAAGGGCGTGGCGGCGCGCTGCCCTGGGACCCCCGCGCCGAGGCGGCCCTGGGG +CGGGTGCCCTTTTTCGTGCGTTCGCTGGTGCGGCGCAAGGTGGAAGAGCGGGTGGCCGAG +GCCGGTGGCCGGCGGGTGGGCCTGGAGGACTTCCAGGAGGCCGAGGCGGCCTTCCGGGCG +GTGCGCGCCGGCAAGAGCCAAAAGGAACTCGAGGCCATGCTGCCGGCGGAGAACCGCCCC +GGTGTGGAGATGGTGGTGGTCCAGGCCTGCCGCAGCCGGTTGAGCAACTGTCCCAACCCG +CTCATCGACACCCAGAAGTGGCTGGAGAGGGTGCAGGCCTGGGTGGAGGAGCTGGATCTC +TCCGAGCGCCTGCGCCGGCGGGTGGCGGATGACAAGATACTCTTTCACCACAAGCTCAAG +ATAGCCATCGCCGGCTGCCCCAACGGCTGCTCGCGCCCCCAGATCGCGGACCTGGCCCTG +GTGGGCATGACCCGGCCGCGGCTGGTGGAGCCGGAGGTCTGCACCGCCTGCGGGGCCTGC +GCCGAGGCCTGCCCCGACGGGGCCGTGAGCCAGGACGACGGCCCGCCGGAGTTCCACCGC +GAGCTCTGCCAGGGCTGCCTCTCTTGCAGCCGGGCCTGCCCGGTGGGGGGCATCGAGCTG +GACCCGCCCGGGGTGCGGGTGCTCATGGCCGGCAAGCTGGGGCGGCATCCCCATCTGGCC +CGGCCGGTGATGGAGGCCACCGGGCCAGAACCGGTGCTGGCCTACTGGACCCGGGAGCTG +GAGGAATACCTGGCTAGCGCCCCGCCGGGGCGGCGCTTCAGCGCTTGGTGGCTGGAGCAA +CACCCCGCGGGCTGAACCCGGAAGTCCCAGCAAGGAGGCGCGCCATGCCCATACCCGGCC +GGCTTTTGACCACCGCCATGGCGGTGATGCCCCATACCGACGTGGACCAGGCGCTCGCCA +GCGCGCTTAGCCTGGACATCCCCTTCTGGCCCCAACTCCCCCGGGTCAACTACTACGAGG +ACATGTACGTCCAGGCCTCGGAGCACTTCCCGGGCATGGTGGTGGACCACAAGGAGCGCA +CGCTCGTCTTTTCCATGGACAAGTTCATGGTCGAGCTGGAGGAGACCCTGGCTCATCTGG +AGGAGCCGGAATACTTCGACATCTCGCCGGAGTACTCGGTGGTCTACCATCGCTTCCTGG +AGCTGGAGCTGGCCGACCGGCCCGCCATCCGCGGCCAGTTGGAAGGGCCCATCAGCTTTG +GGCTCAACGTCAAGGACCAAGACGACCGGCCCATCCTCTTCGACGACACGGTGCGCCCCT +TCCTGCTGGAGGTCATGGCCCGACGGGTCAACGTGCAGCTCACCCGGCTCAAGGCCAGAA +ACCCCAACGCCTTCATGTTCGTGGACGAGCCGGGGCTTCAGTTCATCTTCAGCGGGCTGT +CGGGCTACAGCGACCGCAAGGCCAAGGAGGACCTGGACCAGTTTTTCGCCGCCATCGAGC +GGCCCCGGGGCATCCACCTCTGCGGCAACCCGGACTGGGACTTTCTGCTCAACCTGGATC +TGGACATCCTGTCGCTGGACGTCTACTCCAACGGCGAGGTCTTCTCCTCCTATGCCCGCT +CCATCAAGCGCTTTCTGGACCGTGGCGGGGTGCTGGCCTGGGGGCTGGTGCCCACCAACT +TCGAGCCCTTCTCGGCCGAGGACCACGTCTCGCTCAAGGCCCGGCTCAAGGAGATATGGT +CCGCCCTGGAGTCCAAGGGGGTGGACCGGGAGCTGATGCTGGAGCGCAGCCTGCTCTCGC +CGGCCACCTGTTGCCTGGTGAACCCCGACGGCGAGAAGACGGTGGACAAGGCCTTTGCCC +TGGTGCGGGCGCTGTCTGCCGAGCTGCGGGACGAATACGGCCTGGACGGTTGAGGGCCAG +CGCCCTTGACGCCTGGGTCCCGCTGTGCCATTTAGATAAGGTGAGGCCTGCCCGCAAGCG +CCCGAAAACACGTTACTGGTAGATGGCCAGGAGGGAGCGCCGTGGAAGTGGACATCAGCC +TGTTCTCCAACCAGAACCAGTTCGTCATCCTGCGGGTGGGCGAGCAGGCCTACGCCCTGC +CGGCGGCCCAGGTGCGGGAGATGCAGGTGCTGCCCGAGGTCACCGAGGTGCCGCGGGCGC +CGGCGCACCTGCGGGGCATCATCAGCCCCCGGGGCGAGGTGCTGCCGCTTTTCGACTTGC +GGCGCCGCCTGGGGATGCGCTCCCTGGCCGAGGAGGCCGATGAACTCCTGAAGATACTGG +AGGCCCGCGAGCAGGAGCACAAACAGTGGCTGGAGGAGCTGGAGTCCTGCATCCGCGAGG +AACGGGAGTTCACCCTGCCCACCGATCCCGAAAAATGCGCCTTTGGCCAATGGTACCAAA +ACTTCACCACCGAGGACCTGGCCCTGGCCTCGGTGCTGGAGCGCCTGGCCGCCCCCCACC +GCCGGGTACACGAGGTGGCCGGCGCGGCCCTGGAGGCCCTGGAAAAAGAAGGGCAGGCCG +CGGCCCAGGAGGTCATCGACCGGGCCCGGCGGATCATCCTGCCCAAGCTGCTGGAGCTCT +TCGCGGAACTCAAGCGCCTGATCCGCGAAACCCACCAGGAGATCGCGGTGATCCTGGAAA +GCGGGCGCCACACCCTGGCGCTGGCGGTGGACAACGTGGACTCGGTGGAGCTTCTACAGC +CCAAGGACCTCCAGAACCTGGAGCGCTTCGGACCCGTCGACGGTTCGCAGGACCTCCTGG +AGTCGGTGGGCCGCCGCGCCAACGGCGAGACGGTCTACATCCTGAAGACTGCCGAGTTCT +TCCAGGCTGCCACCGATCTTACCTTCTAGGTGCGCCGGCGGGGCGGGACCGGCCACGGGT +TCTCCCTGGCGGCCGGCGCGACTTTCCGGACATGACAATATATCCACACGAAGGGCGGCC +ATGGCCCACGGGCAAGAACCAGCCTATGAGATGAAAAGCGAGCCCGCCGGTAACGGCGAG +TTGCGGGTGGATCTCTCCGGGCGGCTGGACATGAACGCCCTGGAAGGGGCGGTGGATCAG +TTCGGCCGTCTGCTAAAGGAGCAGCGTCCCCGCCGAGTGGAGCTGGCGGTGGGGGGGATA +GACTACCTGGACAGCGGCGGGGCACTGGCGCTCACCCTGATGGAGGAGGCCGCCCGCAAG +GCCGGGACCAAGTTCCAACTGGTCCAGGCCGGCCCCGAGGTGCGGGGCATGTTGGCCCTG +GTGGACATGGACAAGATCCGCCGCAGGCCGCTTAGGCCCGCGGAACGGGGGCTGGGCTTC +GTGGAGCAGGTGGGCCAGGCCAGCCTCGAGGTCTGGCGCGATTTCGTGGAGCTGGTCACT +TTCCTGGGCGACTTCCTAATCGCCCTGGGGCGCTCCCTGCGCCGTCCGCGGCTGGTGCGC +TGGCAGGAGACCTTCTTCTACATGGAGCAGGTGGGGGTCAACGGGCTGCCCATCGTGGGG +CTGATCAGCTTCCTCTTGGGGCTCATCATCGCCTTCATGTCCTCCCTACAGCTGAAGACC +TTCGGGGCCGACGCCTACGTGGCCGCCCTGGTCTCGGTGGCCATGGTGCGCGAGCTGGGT +CCCATCATGACCGCCATCCTGGTGGCGGGGCGCTCGGGCTCGGCCTTTGCGGCCGAGATA +GGCACCATGCGGGTCAACGAGGAGGTGGACGCCCTGGAGGTCATGGGCTTCGACCCCACC +GACTTCTTGGCCATGCCCAAGGTGCTGGCCGCGCTGGCGGTGGTGCCGATGCTAACCATC +TACTCCTGCGTGGCCGGCATCCTGGGCGGCATGGTGGTGGGCATCTGGGGCCTGGGGCTC +ACGCCCTACACCTATCTGCACCACACCATCGACAGCCTGTCGGCCTACGGCATCGTCACA +GCGCTCATCAAGTCGGTGGCCTTCGCGCTGATCATCGCCGGCATCGGCTGCCAGCGCGGC +TTCATGGTGCGCGGCGGGGCCCAGGCGGTGGGCAGCGCCACCACCTCGGCGGTGGTCACC +GCCATGTTCCTCATCATCGTGGCCGACTCGGCCTTCGCCATCCTCTTCTACTACGTGTTC +TGATGAGCGCTGAAGATCCTATCATAGAAGTGCGGGGGCTCAAGGCCCAGTTCGGGGAGC +AGGTTATCCTGCGTGGGGTGAGCTTTGCGGTGGCCCGGGGCGAGGTGGTGGTGGTGGCCG +GCGGCTCGGGCTGCGGCAAGTCCACCCTGCTCAAGCACATGCTTGGCCTGTACCAGCCCG +CGGCGGGTAGCGTACTCATCGACGGGGTGGACATCGCCCAGGCCGATGCGGCGCAACTGG +AGTGGGTGCGGCGTCGCATCGGGGTGTTGTTCCAGTCCGGGGCGCTTCTGGGCTCGCTGA +CCCTTCTGGAGAACGTGATGCTGCCGCTTGTGGGCTTCACCCCGCTGTCGCGCCGGGGGG +CCGAGCTGGTGGCGCGGCTCAAGCTATCCCTGGTGGGGCTTTCGGGCTACGAGAACCACC +TGCCCTCGGAGCTGTCGGGCGGCATGCAGAAGCGGGCCGGGCTGGCCCGGGCCATGGCCC +TTGATCCCCAGGTGCTGTTTTTCGACGAGCCCTCGGCCGGGCTGGACCCGGTGACCTCGG +CCGAGCTGGACCTGCTCATCAAGCGCATCAACCGCAACCTGGGCACCACCATGGTGATCG +TTTCCCACGAGCTGGCCTCCATCTTCGAGATCGCCCACCGGGTGATCCTGCTGGACAAGC +AGGCCAAGGGCATCATCGCCATGGGCCCGCCCCAGGAGCTGAGCGGTGTTCCTAGGCTGT +TTCCTGGTGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTTGAGTATTCTATAGTC +TC +>NODE_11_length_12385_cov_59.906_ID_21 +GCCCTGGTGGCCGACCACCGCGGCGCAGGCGGCCTGGTCGCGGAAGTACTGAGCCCACAG +CTCCAGGGTCTGGGCCAGCCCCTGGCGGTCGATGACCGCGCCGCGGTGGTCGGCCACCAG +GGCCTCGGCCAGTTCCCAGTCCTCCAGGGCGCCGCCGGGGCGGGAGAGGTGCTCTTGCAG +ACGCCCCAGCTCCCGGCGCAGGCGCGCCGGGTCCAGCTCCAGGGCGCGGCCCAGGGAGCC +GCCGCTCATGGCGGCCTTGAGGCGGGCGTCTTGGGGGTCCAGCCCTCGGCGGGTGAGCTC +GTCGACCACCAGCTCGGGGTCCAGGGGGGAGAAGTTCACCCGGCGGCAGCGACTCACCAG +GGTGGGCAGCACCTCCCGGGGGTCCTGCACCGTGAGCACCAGGATGTTGTTGGGCGGGGG +CTCCTCCAGGGTTTTGAGCAGGGCCCCGGAGCTGGCCAGGTTGAGCCGGCCCGCCTCGCG +GATCACCACCATGCGCCAGCCGCCGCCAAAGGGAGCGAAACTGAGCGTGCGGATCACCTC +ACGCACCTGCTCCACCTTGATCTGGGCCGAGGGCGAGTCCGAAGGCGGCTCCACCACCAA +GAAGTCCTCGTGGTTGCCCGCGGCCAGGCGGCGGCAGGGGCCGCAGGCGCCGCAGGGGGT +TTCGCCGGGGTCCTGGCAGTTGGCCGCGGCGAACAGGGCGCGGGCGGTGGTGGTGCGGCC +CACCCCCCGGGGGCCCACGAAGAGGTAGGCGTGAGCCAGGCGGCCGGCGGCCAGCTCGGC +CTCCAACTGGGCCCGGGCGCGGGCCTGTCCCAGTACGCCGTCAAGCCTCACGGGTCTTCC +TCCATGACTCCAGCAGGGGCCGGGCCACCTCCCAGATGCGCCGGGCCACCTCCGGCCGGG +AGCCGGCGGCCTCGATCAGCCGCACCCGCTCGGGTTCCTGCCGGGCCAGTTCCAAAAAGC +CCTCCCGCACCAGGCGATGGAACTCGCCGCCGGCCTGCTCCAGGCGGTCCAGTCCCTGTT +TGCCTTGGCGATGGCGGGCGCGGGCCAGGCCCAGGGCGGGGTCCAGGTCCAGCACGATGG +TGAGATCGGGCCAGGTATCCCCGCAGAGCCAGCGGTTCAGCTCCCGCACCCTGTCGGCGC +CCAGCCCTCGGGCCAGCCCCTGGTAGACCTGGGTGGAGTCGGCGAAGCGGTCGCAGACCA +CTGCCTCTCCCGCGGCCAGGGCGGGCCGGATCACCTGCTCCAGGTGCTGGGCCCGGTCGG +CCAGATACAGCAGCAGCTCGGCCTGGGGACAGGGGTCGGGTCCGGCGGGATCGGCCAGCA +GACGGCGCAGGGCCCCCCCCAGCTCGGTGGCCCCCGGCTCGTGGGTGAGCAGCGCCGGCA +GGCCCAGTTCCCGGATGCGCTGGGCCAGCAGGCTGGCGTGGGTGCTCTTGCCGCAGCCCT +CGCCTCCTTCCAGGGTTATGAAGGGCGCACGTTTCATGACGACTCCGGCTCTGGGGCGGC +TTCCGGCCGACGGCCGGCGGGACAACCCAAAAATGCCAGAAGCGGAGCCGGGAGTCGACC +TCTAACTTGAAGCGGGTGGTTGGCGGATCGCAGGGGGTTTGTGTCGGGGGTCTTCACCAG +GTCAGGGATGAAGCTCTCGCGTGTCGACGGCATCGCCCGACACGTTTTGCGAGCCGGAGT +TTACGGCGGCTCAGACCGGTACGCCAGGGGAGGACTGGTGGGCCAAGTACATCTTCGAGG +AGTCGCCCAGGCCGCTCAGCTCCACCCGGGCGGTGGCGTTGGCCCCGTGGCGGTAGTAGA +GGTAGAGCTTGCCGTCCTGGAGGGAGAGCTCGGGGCCGTCCATGCCCATGCCGGGACGGT +CGTTGACCAGAAGCGGCCTGCCGGACATCTTGACCCAGGGGCCACCGGGGCTGCTGGCCC +GGGCCAGGTTGGTGCCCCATTGGGAGCGATGGAAGTCGGTGTTCGGCGAGCAGACCTCGT +AGGCCATGTAGTAGAAGTCTCCCACCTTGACCACATTGCTGCGCGCCCCCACGGTACCGC +CCTCGATGCCCTGTGGGTCCACGTCCAGCAAAGGCCCTTGGTTCACGGTAAGGTCTTGCA +GGTTCTCGCCGTGGGCGTAGCCGATGCGAACCCTGCCGTCCTTGGCCAGGGTGTGGAAGT +AGACGTGCCACTGGCCGCCTTCCTTGAACATGGTGGGGGTGCCCACGTCCACGGCGCTGA +TCTCGCCGGCGTCGCCGGGCTCGATCACCGGGCCGTGCTTGGTCCAGTTGCGGCCGTCGG +GGCTGGTGGCCAGGCAGACCGTGTTCAGGTCGTCGTGATCGGCCTTGGCCTCGTAGAGCA +TGTACCAGGTATTGGTCTCGCCGTCGTACTGCACGGCGGGGAAGCTGGCCATCTGGGCGT +CGAAGCCCTCGGGTCCCTTGGTGAGCACCTTGCCCTGGTACTGGAAGTTCACCCCGTCTT +TGCTCACCGCCAGGCCCACGTCGTTCTCCGAGCCGTGGCTGTGGTCGATGAAGTAAGCGT +AGGTGAGTCCGTTTTTTTGCACCAGGTGGGGGAAGTGGAATTGCTCCACGTCGGCGTTCA +CCTCCTGGCGTTCACCCACCTGCACCTCGGCCCGGAAGCGGCTGCCGTGGTAGGTGCTGA +TGGCCGACGGCAGGCCGACGTCCTCGTTTTCGTGCTCGCTGGAGGGCGGGGGCGCGGGCG +GCGGCGGCACCGTCCGCAGGGGGAAGGTGGAGCGCGGGTTGGCGGAGTGCATGCGGTAGG +CGCCCTTGGGCGCGGCCGGGGTGGGATCGCTCCACACCACCGGGGCGGTGAACAGCCCTG +GCGCAGCGCCGATGCTGGCCCCCACCAGCAGCGGGGAGTGGGGGCTATAGCGCACCGGCC +CGCCGGTTTGGCGCGTAGCGCCGGTGGAGAGATCTGCGGGCTCCCCCTGGCGCAGCTTGC +CGTCCAGCATCCGGGCAAAGCCGGCGCCGCCGGAGACGCCGCCACTGCCGGATGTGTCGC +CGGAGGCGGGGGGCGTCTCCTGTCCCGGACGCCACAGGCCCGCCAACGGGTAGCTGCTTT +GTACGAACTGCACCGTCCAACCGCTCCTTGGGCTTTGGTTCCACTGCGCTAACATGCAAA +TCACCTGCCAAGCGCCCCCAGCCCCCCTTGCGGTCAGCGGCTATTGCCATTATTATAGGA +CGGCACAAAGCATTATGCCTTTGACCTGCCCTGCCTGCTCTTGCCTGCTCCTGCCTGCTC +CTCTGCCTGCTCTTGCCTGTTTAAGTCTGCACAAGTGAAAAAGAGGAAGTTTTTTCCTGC +TCACTGGGTGAGAATGGAGGTAGAGGTAACGGGGAAGTATGAGCAAGCTTTTCGGCATAC +TGGCGGGTCTGATCGGCATCTTTCTTCTGGTAGCCAGCGTAATTTCTTTTGGCCATTTGG +TGGAGGACGCATGGGTCAGAGGTGGGATGTTGGCCTCACTTTTGTTTGCTCTGCTGCTCC +TGCTGGGCAGTGCGGCCTTCCTCCTCACGGCGGTGCTCCTTTTCCGTATGCGTTCCCACT +ACTTGCCCAGACTCTACGAGTTGGAGGAGCTGGAAGGGCTGGAGGAGCCCCCACCAAAGA +CGAAAGATTCAGGGGAATCGAACGGCCCCCGCCTGGCCTGATCCCGCGGCAAGGGAAAAA +AATGCCCCATCCCCAAGTCCCGCTTTCCTGAAACCTCGTCCTGAATAAAGCCCTTTAATC +CCCACGGTTAGCCCATAAGGCCCGCCGTTGGCATCCGCTTTGCTTGAGTTGAGGCCGAAG +TGGTATCCCCGCTTGGCGGGGGCAACTGGATGAGTGCCCCGGGTTCCTCGTGGGACCCGG +CGGGAAGGGATGTCTCGGCATGGAAGCCAGAACCTTGTTCAACGGCGCCCTGGGGGTCAA +GGCCCACGTCCGGGGCCTGGAGAGCGTTTCGGACAACATCGCCAACGTCAACACCTACGG +CTACAAGGCCACCCGCGCCCAGTTCAGCGACCTGCTCTATCAGGAGATGGCGGGGGGCGC +CGGCTTTCCCCAGCAGGTGGGCAACGGCGCCTTGACCGCGGTGGAGAACATGATGATGCA +GGCTCCCCTGGAGCCCACCGAGAACGTGCTGGACATGGCCATCAACGGCAGGGGGTTCTT +CACGGTCAAGCATCCCGACCGCAACGAGGGCAACCGCTACACCCGCGCCGGCCAGTTCTA +CCTGGACAAGGACTATTTCCTGGTCAATTCCGAGGGCTACCGGGTGCAGGGCTTTGCCGT +GGACGCCGACGGCAACGTGAACGTGAACCAGGTCCAGGACATCCAGATCGACAACCAGAT +CCAGGATGCCACCGCCACCACCAGCGTGGACCTGGCCGTGAACCTGGACGCCTCGGATAC +CACCGAGTTCCGCCAGGCGGTGGCCATCGATCCCACCGACAGCGGCACCTACAATTTCCG +CATGGGGTTCCAGGTGGTGGACGAGGACGGCGACACCCAGGACATCGCGGTGTTCTACCA +GAAGCTGGAGAGCTATACCGGCGACGCCCCTGCCGGCAGCCAGAGCGTCTGGAAGGCGGC +CACCTTCCACAACGACAGCGGCACCCTGACCGCCGACCCCAGCTACCCGGACAACACCTT +CTTCCTGCACTTCGACACCAACGGCCAGCTGGTGGGGGTGACCACCGGCACCCCGGCCAC +CGGCGACTCCTACACCTCCAACGCCGAGGTGTCCAGCACCAGCGCCTCGGTGAGCGACCG +CCTGGGCGAAACCTTCGCCTACACCGGCGCCGGCAACACCCAGACCCTGCGCTCCACCGC +CACCATCACCTTCTCCGGCACCACCACCGCCGGCGACACCGTGACCATCGGCGGCACCAA +CTATACCTTCGCCGCGCTCTCGCCCAGCGACGCCGCCGCTTGGCTGGCCGACCAGATAAA +CGCCAACTCGGCAGGCAGCTACTACGCCCAGGACGACGCCTCCGGCACCGTGACTCTCTA +CGCCAAGGACGGCACCGCCGCCGCGGAGGTGAGCGCCTCCTCGGTGGTGATCTCCACCGA +CGACACCATGAGCCTCACCGAGCTGGTGAACACCGTGGACAGCGGCCGCAAGGCCACCGG +CTCGCTTTTCGTCAACATCGCCGGGCTCACCGCCGGCAGCTCCACGGTCACCGTGGCGGG +CCACACCTTCACCTACGGCCCGGCCCAGGACTTCACCACCCTGAGCGAGCTTACCACGCT +CATAAACGATCTCTCCGAGGTGGACGCCACCAGCAGCGGCCATAATATCTACATCACCGC +CGCCAGCGTCGGCACCAGCGGCAACTCCCTGGGGCTTGCCACCAACGATGCCGCCAACGT +GGCGGTCAGCGCCAGCACCCTGCTGAACGGCCTGGACGACAGCGACGCCACCAACATCGA +CGCCTCGGCCACCACCGGCTCCGGCGGCGGGCAGGCGCTGAAGCTGGACCGCACCGACGT +GGGCGCCTCGGCCACCATCGACGTGGCCACCACCAACACCCTGGGCAGCAACCTGGGGCT +GGACTTCACCGGCGGCAACTTCACCCAGAACTCCACCGCCTCCGACGGCAACGGCACCAG +CAACACCACCGGCGAGGTGCCGCTGACCTTCACCTTCACCAAGAGCGGCTCCACCCTCAC +CCAGCAGGTGACCCTGGACTACTCCCCCACCGACGGCGACGATTCCACCATGCTGGCCGG +CGACTACGAGACCTTCTATCTCAAGACCGACGGCAGGGGCACCGGATACTTGAAGTACCT +GGAGATAGACGACCAGGGCCTCATCACCGCCCACTACACCAACGGCCAAGGCGTGCCCCA +GGCCGCCCTGGCGCTCACCACCTTCATCGCCCCCCAGGAGCTCTTGCGCGAGGGCGACAA +CCTCTGGCGGGCCACCGCCGCGGCCGGGGTTCCCACCGTGGCCCAGGCCGGCGACGCCCA +GACCGCCATGGGCGAGGTCAAGTCCTACGCCCTGGAGCTCTCCACCGTGGACCTGGCCCA +GGAATTCGTGAACCTCATCAACTACCAGCGCTCCTTCCAGGCCAACAGCAAGTCCATCAT +CACCGGCGACGAGATGCTCAAGACCGCCATCAACCTCAAGGGCTAGCCCGCGGCACTTGC +CAAGCCCACCCTCCGCACGGTAAAGGGGCGGTATGGCCCCTGGGCCGCCGGCCCCGCCGG +GCGGCGCGTTTCACCCGCGCGGAGGAGTAGAGATGGACCACGCAGCCTTGCCGCAAACCC +TGGCCCCCGGGCTGTACCGCCTGGGCAGCTATCACCTGGCCTGCTTTTTGGTCGAAACCC +CTGATGCCGCCCTTTTGTTCGAGACCGGCATGTCGCTGGTGGCGCCGCTGATCCTGGCGC +AACTGGACGAACTGGGAGTGCCGCGGGAGAAGATCCGCTGGATCGTGCACAGCCACGCCC +ACTCGGACCACTCCACCGGCCAGGCCGCCCTGCTCGAGGCCCTGCCCCGGGCCGAGCTCC +TGCTCAGCCCCACCAGCCGCAGGCACCTGGCCAAGCCCTCCACCGCCGAGCAGTTCGCCA +AGGAAGACGACTCGACCCGCCGCGCCCTGGAGAGGATCGGCGCCCTGCCCCCCGGTTCCT +TGCCCGATCCCCTGCCTTTGTTGCCCGCGCGCCACCGCACGGTGGAGCCGGGGGACACCC +TGGACCTGGGCGGGCTCACCGTGGAGCTGCGCTCCGCTGCGGGGCACGTGCCCGGCGGGC +TCTTGGCCTGGCTGCCGGAGCTGGGGGCCTTTCTGGCCTCGGACTCGGCCGGCTTCCACA +TGGCCGCCCGGCCCAACTACCCGCTGTACTTCACGGGCTACCGCGAGTACCTGCGCACCC +TGGAGGAGATCCGCCGCACCAACCCCGAGCTGCTCTGCCTGGGGCACCAGGGTTGGTTCC +GGGGCGGGGAGGCCCGGCGCTATCTGGAGGCGCTCAAGGCGCACCTGGCCTTCGAGCATG +CCACCATCTGGGAGGCCCACCGCCGCGGCGAGGACGAAGAGAGCCAGGCGCGGCGGCTGG +TGGAGCGCTACTACCACGACGAGCTGGCCATTTATCCCCGGGACATCCTGTGGTACTGTT +GCCGCCTGTTGGTGCGGCGCAGCCTGGAGGCGGGCGCCTGAATCGTCCCGCCGGGGTTGC +CCTGGTGCGCTCTGGAAGGTAAAAAGGGCGATAACCCCGGGGGGAGAGGACCGCTGGTGA +ACTACGAGCCCTGCGTGAAACACAAGGTGGTGCGCAACACGGTGCGCGACTTCGCCGAGG +CCGAGTTGCGCCCCATCGCCCACGAGGTGGACCAGAACTCCCGCTTCCCCTGGGAGGTGG +TGGAGAAGATGCGGGGCTTGCAGTATTTCGGCCTCCAGGCCCCCCGCGAGCTGGGCGGCG +CCGGCCTGGACAGCATCAGCTACGCCATCGCCATCGAGGAGCTGTCCCGGGTCTGCGCCG +GCATTGGGCTGTGCGTCACGGTGCACAACTCGGTGGCGCTGTACCCTCTGCTCAAGTTCG +GCAGCCCCGAGCAGATCGAGCGCCTGGCTCTGGATTTGATCAGCGGCAGGCGCATCGGTG +CCTTTTGCCTCACCGAGGCCGGGGCCGGCTCAGACGCCGGGGCGGTGGAGACCCTGGCCC +TGCCCTGTGATGAGGGCTACCTCATCAACGGCACCAAGATATTCGTCACCAATGGCGGCG +TCTGCGGCCTGGCGCTCATCTTCGCCAAGACCGATCTCGACCATCCCCGTGGCGCGCCCT +CGGTGCTCATGGTGGAGAAGGAGCGCTCCGGCTTTGCGGTGGGCGAGATCGAGGACTTGT +CGGGCATGCGCTGCAACCCGGTCTCCTCGCTTTTCCTGGAGGACTGCCTGGTGCCGCCCG +AGAACCTGTTGGGCCGCCGGGGCGACGGGTTGCGCATCGGGCTCAGCGCCCTGGACACCG +GTCGGCTGGGTATCGCCGCCCAGGCCCTGGGCATCGCCCAGGGCGCCTTCGAGGCCGCGG +TGCGCTACGCCAAGGAGCGCCAGCAGTTCGGTAAACCCATCGCCCGCTTCCAGACCATCC +AGAACTACCTGGCCGACATGGCCACCAAGATAGACGCCGCCCGCATGTTGCTCTACCGGG +CCTGCGCGGCCAAGGACCAGGGCCAGCCCTTCTCGGCCGAGGCGGCCAAGGCCAAGCTGT +TCTGCTCGGCCACCGCCCGGGAGGTCTGCAACCTGGCGGTCCAGATCCACGGTGGCTATG +GCTACAGCAAGGAATACGAGGTGGAGCGCTACTACCGCGACGCCAAGGTGACCGAGCTCT +ACGAGGGCACCAGCGAGGTGCAGCGCATGGTCATCGCCCGGGCCATCCTCTCGGCCCCGG +CCTGAGCGCCGGCGCCGGCTGCGTGAGGGAAACATGAAACTGGTCGTCTTCCTCAAACAG +GTTCCGGGCGTCACCGAGATCCCCTGGGATCCGGCCAGCGGTCACCTGCGCCGTGAAAAG +GCCCCGGGCATGATGAACCCCGCCTGCCGCCACGCCCTGGAGGCGGCGCTCATCCTCAAG +GAGCAGCACGGCGGCGAGCTCACCGCCATCAGCATGGGCCCGCCGGCGGCCGAGGAGATA +CTGCGCGAGGCCCTGGCCCTGGGGGCCGACCGCGCGGTGCTTTTGAGCGATCCTCGTCTG +GCCGGGGCCGACACCCCTGCCACCTCCTACACCCTCTCCCTGGCGGTGCGGGCGGTCTGC +CCCGACTGCGACCTCCTGCTTCTGGGCAACCAGACCAGCGACTCCGAGACTGGCCAGGTG +GGGCCGCACCTGGCCGAGGAGCTGGATTTGCCTTCCGCCATCAATGTGGAGGAGCTGGAA +CTGGATGGCGAGGTGCTCAGGGTGAAGCGGCTGTGCGACAATTTCCTGGAAACCCTGGAG +ATGGACCTGCCCGCCCTGGTGACCATCAACACCCAGGGCCACCCCCCGCGCCAGGTGCCC +CTGGGCGGGGTGGAGGACGCCTTCTCCCGGGGAGAGTTCCTGGTGCTTAACGCCGAGGAC +TTGAAGGCCGACCTGGCCCGGGTGGGGATGACCGGCTCGGCCGGGCGCATCGTGAAAGTT +TACCCCGCCGGCGGCGAGCGCAAGGGCGAGCTGATCAAGGGAGCGCCCAAGCGCTGCGTG +CTGGAGCTCCTGGAGCGCCACGGCGACCTGTTGGGCGGCTATTTGCGCAAGGACCTGGGG +GGTGGGCGATGAGCCGCCGGCAGCAGGAAAACGGCGCGGTGTGGGTCTTCGGCGACTACC +GCAACTACTTCCAGAACCGGGTCACCCTCCAACTCCTGGCCCGGGCCCGGGACCTGGCCT +CCCATCTGGACACCAAGGTGGCGGTGGTGGTGATGGGATATCGGGTGGGGCGCTGGGTGC +GCGAGTACGTGGCCCACGGCGCCGACGTGGTCTACGTATTGGACCATCCCTCGCTCAAGT +ACTACCTGGTGCAGACCTACACCCGGCTTATGGAGCGCCTGGCCGGCGAGCACCAGCCCC +AGATCATCCTGGTGGGCGCCACCGGCTTCGGCAAGGAGCTGGCCGCCCGCCTGGCCAGCC +GCCTGGGCACCGGGCTCACCGCCGACTGCGTGGATCTCACCGTCGACGATCAGGGCCGCT +TCATCCAGACCGCCCCCTCCTTCGGCGGCAACCTCCTGGCCCAGATCATGATCCCCCAGG +CTCGCCCCCAGATGGCCACCGTGCGCCCGGGCACCTTCCAGGAGCTGCCCCACGACGCCG +ACCGCCGCGGCGAGATCATCAAGCTGCCCCTGCCCGACGATCTGCCCCCCGAGAAGGCCC +GGCTCATCCACAGCCGGCGCATCAAGCCCCGGCGGCGCAAGCTGGAGAAGGCCCGGGTGG +TCATCTGCGGCGGGCGCGGCATGGGCAGCAAGAAGAAGTTCAAGAACCTGTACGCGCTGG +CGCGGCTGCTGGGCGCCCAGGTGGGGGCCACCCGCCCGGTGGTCTACCAGGGCTGGGCCC +CGGAGGATGCCCTGGTGGGCCAGGCCGGGCGCGACGTGCATCCCGAGGTGCTGTTCTCCT +TCGGCGTCTCCGGCGCCATCCAGCACACCGCCGGTATCCACGACGCCCAGTTCATCGTGG +CGGTGAACAAGAACCCCGCCGCCCAGATGATGAAGATGGCCGACGTGGCCATCGCCGCCG +ACGCCAACCAGGTCTGCCTGGCGCTGATCCGGGAGCTCAAGGCCCGCCTGGAGAAGAAGA +AGTAAGGCTCGGCGTGGGGTGACGGCGTAATCGGGGGGTTGGGGATTGGGAGCCGGCCGG +GGTCCGCGCGGCGGAGCTCAGTCGCCCTCCAGCTCCTGCCACAGCTCCAGCACCGCCCCC +AGGCTGGGGCAGGCGCGGGTCGGGTCGGCGGCCAGCTTCAGGGCCTGGGCCGGGGTGAAG +AAGCGTCCGGTTTCTATCTCCTGGGGATTGGGACGGGGCGGTTCGGCGCTCACCCCGGTG +AATATCTCCACGAACTCGTTCTCGGTGCGGGGGCCGGCCGGCAGCCGGCCCAGGGGCCGC +AGCTCGGCCTCTAGCCCCAGTTCCTCGGCCAGCTCGCGGGCGGCGCATTGGGCGTAGCTC +TCGGCGGGGTCCACGTGCCCCGAGGCCGAGGAGGTCCACTTGCCGGGGTAGGTGTCCTTG +GCCGCGGAGCGCTTTTGCAGGTAGAGCCTTCCGGCGGGGTCGAAGAGCAGCACGTGCGCC +GCCCGGTGCAGCAGGCCCTTTGCGTGTATCTCCTGGCGGGTCATGGTGCCGATCACCCGG +TCCTGGGCGTCCACCACCGGCAAGAGCTCCTGGGGATCGGTGTAGTTCACGTCTTCTCCC +CTCCCAGCAGGCCCAGCGCCGTCAGTTTGTCCAAAAACAGCTCCACCAGCTCCGGCGGCA +CCAGGCAACTGCGCCGGTAGCTCACCCGGGGGTCCACCAGCACCAGTTGCAGCCCCCCTT +CGGTTTCCTGGGCGCTCACCCGCACCTCGGGACTCTGGGCGGCCCGCTCCAGCTCGGCCA +GAAGCGACAGAAACAGGCGATAGCAACGCCAGGCGCGTTGGCCCGCCTCGCTCTCGAACT +CCAGCAGGTTGCGGTTGCGGGCCACCGCCCCCGGTGAGCGCAGCATCTCGGCCACGCGTT +GCAGAACCTCCGATTCCATCTTCGGACCTCCTGCCCGGTCACCATATCTCCCTCTCTCTA +GCTTGCCCCAAGCGGCCCCTTGTGGCAACCTGGGCCGGCAGAGTTTTACCCGTAAGCCCG +GAGGTCCGCCCCGTGTCCCGGCCCCGCTACGTACGCACCCAGCGCCATGGCCCTGTGACG +GTGGTGGTGATGGACAACCCCGCCACCATGAACGCCATGGACCAGGACATGGGTCCCCGC +CTGGTGGGCGCCCTGGAGTCCCTGGCCGCCGACCGCAGCGTGCGCGCCGTGGTGCTCACC +GGCGCCGGGGGGAGGTTCTCGGCCGGGGGCAACCTGACCCGGGCCGAGGAGTTTTTGGAG +GAGAACCCCGGCCGGGGCGCGGCGCCGGTCTTCGCCCAGTACACCATCTGGGTGCACCGC +CTGCTGGCGGTGCTCACCCGCCTGCCCCAGCCGGTGGTGGCGGCGGTGGAACGCGCCGCC +TCGGGCGGCGGTCTGGGCTGGCTTCTGGCCTGCGACCTGGTGGTGCTGGCCGAGGACGCC +CGGCTCAGTACCGGCTTTCTGGCCATCGGCCTGGCCCCGGCCGCCGGGGTGAGCTGGCAC +CTGCCGCGGCTGGTGGGCCTGCCCCGGGCGGCGGAGCTTCTGATGCTGGGCCGCACCCTG +GGGGCGGACCGCGCCTTGGAGCTGGGACTGGCCGACCAGCTGACCCCGCCGGGAGGGACC +CTGGAGGCGGCGTTGGAGCTGGCTGGCGAGCTGGCCCGGGGGCCGGCCCAGGCGCTCGCC +GCCACCAAGCAGCTTCTGGGCGGGGCCGCCCGCCGGGGACTCTTTCCCCAGGCCGAGGCC +GAGCGCCGAGCGGTCTTGCACACCGCCGACCAGGAGGAGTTCGCCCGCAGGCTGGAGCGC +TTCAGGCAGCGGCGGCGGCGGAGTTAGGCTCCCGCCGGCCAGGGACTTATATACGGGCGC +CGGTTGCCCCCCGGCCCGTCAATTGGTATTATTCCTACGAGTCTTCAAACATTACCCGCC +AAAGCTTGAAGGAGCAGCCATGCCCATGGACTGGACGCCGCCCCCGCGCGGCGGTGGGCG +AGAGCCCGATATAAACCAGGTGGTGCAGAACCTGAAGAACCGCCTGCCCGTATTCAAGAA +GGCCCGGGGGCTTTGGCTGGCGGTGGCCGTGGTACTGGCCATCATCCTGGGGGCCTCCTC +CTACTACACGTGTTCCTAGGCTGTTTCCTGGTGGGATCCTCTAGAGTCGACCTGCAGGCA +TGCAAGCTTGAGTATTCTATAGTCT +>NODE_12_length_10380_cov_58.6117_ID_23 +CCTTGAGCAGCCAGATGAACTCGTCCCACCATCCTATGAGTCCGATGATTCCCAGAATCA +GCGCGGCCAGTCCTCCAACCAGCGCGACCATATCATGTACCTCCCGTGGGATATAAGGTT +TGTCGGCCGCGGGCCAGGGAGCCGGAAGGCGTCTTCATGCGCCCCGGCAAGGCGCCTTTC +TTTGCGCCCCTACCCCTTCCGCGGGTCCCTTTCCGTAAATCCTTAATGTTTTATGCCTTG +TTTACCACCTCTAGCACGGGGGGGACGGCGATGTCAATGCAACTCGCGGCAGAACCAGCC +TTCTTGGCGCTGCCGCAGCTCCCAGCGCCGGCCGCGGCGATCCTCCAGGCGGTAGCGCCG +GACATAGGCCAAGCCGCTTTCCGGGGCCACCAGCTCCTCGCCCAGCAGCCGCACCTCAAG +CCACCTCCCCTCAACTCGTATGGCCAGCGGGGTCTGCTGGGCGCGGCCGCCGCTCATGAA +GCGCACTTTGGCTGGGAACCAGTCGCGGCTCACTTGCGCCAGGCCCCGGTGACCCGCTGG +TCGCTGGCCCCCAGGTCCAGGAGCAGGCGGCGCAGCTCCTCCGGCTCGGACACCCCGAAT +AGCTTGAGATAGGGCTCTTGTTGATTCTCCAGGTAGACGTGCACGTGGGCCACGCCGGCC +AGGCGGTGGAAGATGCCGCGTCTCAGGTCGATGCGGGTGATCTGCTCCACCGGGGCCTGG +CGCTCCAGGGCGCGGGGGAAGGAGCGGCGCCAGCCCAGCCGGCCGTCTGCCAGCTCATAT +AGGTTGCTGAAGCGGGTGATGAGAATAAAGGCAAGAAAGCAGGTGGCGATGAGGTGGCTT +AGGTCGGGGCTGATGGGGGCCTGGGGGTTGACCCGGGGCCCCACGGTGAAGATCACCACC +CCCAGGAAGTAGGGCAAAAAGGACTTCCAGGCCGGGCGGAACTCACGGTATTTGGCGAGG +GCCTTACTTTGGTCCATACTGGTTCCTCACTTCCCCGACCGGCCGCCCTGCCCGCCGGGA +CGGGCAGGGCCCTTCACCGCCGGGGCGTCCTTGCCGGCCGGGGCCAGCAGCCTGAAGAGC +ACCACCGCCGGCGGCAGGTCGCGGCGTCTGGGGTCGGTCAGGTAGAAGGCGTCGATGCGG +GCGGCCAGGTCCTGGAGGCTGAGCCCCTGATAGGCATTGAGTACCCGCTCGGCGCTGCGC +GGGGCCAACTCCAGGTACTGCAATGCATCCATTATGCCACGCAAGTAGGCCAATTTTATC +AAAATATCTGCCCTGCTCCCCTCGGGGGGCGGCCAGGCCTCGCTGGGCAGCAGACGCAGG +TCGGCGCCCTTGTTCACCGGGGCCACCACGTACCCCGCCCCGGCCGCCACCGCCGGAGGG +GCCAAAAGCAACAGCCCCAGCAGGCCCAGCAGGCAAGCCCCGCGGCGGCCCTTACCAAAA +AAGCTGACCAAAAGCGCCATGTTATGCTATCCTTGCTGGAGTAAGCACCAGGGAAAGGCC +CAAAAACAGGCCCATGCAGCGCTCAAGTGGCAGCATAACCCAACTGGCCCGATTGGTCGA +GAGCCTGGCAGAGTTGCTAAGGCTTCACTACGGCCCGGACGGCCTGCCCGCCGCCCAGGA +GGAGGACTTGTGCTCCCAGGAGGTGGAGGGCAGGCGTTTGGGTGAGCTGCTGGCCGAGTT +GTGGCCCGATTCCGGACCCTGGCGGGGTCCCTGTCTGGGTCAGGGGGCCATCCGAGAGAC +CGAGCGGCTGTTGCGTTATCGCCTGCAACAGGCGGAGGCGCAGAACCGGGCCCTGGCCAC +CCGCCTGGAGCGGGTACGACGCCAGGTGGCGGCCCAACGGCGGACCCTCCTGGAGCAGTT +GCGGGCGGCCAGGCTACAGGTGGAGCAGGGTCAAGCCCGGCTGGAGGCGCTGAGCGCGGA +GCTTGGCGGCCTGCGGGCGGAGCTCGAGCGGCGGCGGGAGCGGGCCCGGCGGCGGCGGCG +CTACCTGGAGGGGGTGGTCCCCCGGGGGATGAACCGCTTTGCCGACGCCGGGGGCCGGAT +ACTGGACCAGGCCACCCTGCGGCGGGCCACCCGGGAGCTGCGGGAGTTGCAGGCCCTGGT +GGTGCGAACCAAGGACGCCTGCGCCCGCCTGGAGCACCGCTGGGCCCGGGCACGTCTGGA +GCTGGCCCGGGCGCGCTCCCATGAGGCCGGTCTGCGGGCCGAGCTGGCCCGCCTGGAGCC +CTACTGGCAGGCCAAGACCCACCGCCTGGCGCGCGCCAAGGTGGTACTGGCCGCGCGGCA +GGAGGAGCTGGGTCCCCTGGAGCGCAACCTCCACCGACTGCGGGTCATGGGTCTGGCCCA +CGCCGAGGTGGTGTCCCGGGGGCGGGCGGCCCTGGAGCCGCTGCTGGCCCCGCTGGCCAG +CGGGGAGTCTCCAGACCCGGTGGAGAGCCTGGAGGAGTCCCTGACCCAAGCCGGGGAGCA +GGCCCGGCGCGGCCGGCGGCTGACCGCCCTCATGGAGCGCCTGGGCCGCCGCCTGGAGCG +CCGCCTGGAGGCCATCGGCCCGGTCCTCAAGGAACAACGCCGTCTCAACAAGGAGATCAC +CCGCCTGGAGAACGAGCTGCCCGGGCTGTTGGAACCCCTGCTGGCCGGCGACGGGGCCGA +CCCCAGGAACCGCCAGGAGGCCGGGGCCCGCTTCTCCCTGCTCATCGCCCGCCTGGAGGA +CCTGATCCCCCAGGCCCGGGCGACTCAGGAACAACTGGACGAGCTGCGGCAGGCGCTTAC +CATCGGCCTGTCGCGGGGCAAGGCCTGGCAGGCCGCCTGGCGCCGAGCCGGCAAGGCCGA +GCGGGCCGCCCTGCACCAGGCCCAGGCCCTGGTGGAGGAGGTGCGCCTGGCCGCCCGCCA +GGCCGCCCGCCAGGCCGAGCACCTGCGCCGCCGGGCGGAGCCGGCGGTGAAGGCGCTCTC +CCCCCTGCGTAGCCAGGACCTGCTGCCCTCCCTGGCCGCCCTGGCCCAGGGAGTGAGCCG +AGGTCAGCTCAAGGCCCGGCAACTGGAGGCCCGGGCCGCCGAGCTGGAAGGGCGCATCCC +CCGCCCCTACTTCGGCAACCTCTCCAAGCCGCCGGTGGCCCTCAAGCCGGTCTCGGCCGG +CCTGCGGCGGCTCAGCGGCAAGCAGGTGGAGCTGGAGCGGCTGGCCGCCCTGGAGCGCGC +CGCCCGCCGCTGGCAGGGACTGTTGGACGGGCCCTTGGTGGAGGAGATCCGCCGTCCGGT +GGAGCAGGTGGCCCTGCGCCTGGCCCGCAGCCTCACCCTGCTGGAACGCCAGAAGAACCT +GCTGGCCAGCCGCCATCAAAAGCAGGGCCGGGAGCTATCCACCCTCAAGGCCGAGTTGGA +TCAGCGCCGCCGGCGCGAGGAGCTGGCCCGCCGGCGCCTGGAGCAGGTGCGGGACCGCAA +CCGCCGGCAGCAACGCACCATCCGGAATTACGAGACGGAGCTGAAACAGGCCCAGACCCG +GGCGGCCCTGGCCCAGCGCCTGGAGGACGAGCTGGCCCGGCTGGGCGAGCACGCCCAGAC +CCTGGCCCGGCGCCTGGAGCGGAGCGACAAGCTGGCCGCGGCGCTCAAGCGCAAGTCCCT +GGAGCGCCACCGCCTCTACCGCCGCTCCCAGTACGCGGTGGAGTGGCTGGACTACTGGCG +CGAGCGGGCCCTGGAGCAGGAGAAGCTGCTCAGCAGCGCCCGGGCCGAGCTGGAGCTGGC +CCGCCGCGAGTACCAGCAGGCCCGCAGCCTGCTGGCCAGCGCGGTGAGCGAGCGCGACCA +GGCCCTCAAGGAGCTGGCCACCGAGCGCGCCGCCCGTGCCCGCCAAGCCCTGGACCTGTT +GGGCGGCAAAGCGCTTTCGGTGGAGCTGGCCGCCAGCCGCAGCGAGGCCGGGCGCTGGGC +CAAGCTGGCCCAGGACATGGCCCTGGCTCTCGCCGCCAGCGGAGAGCATCACCGCCAGGA +AACCGCCGATCTCAGGGCCCAAGTGGACCAGCTCTCTGCCGAGGCGGCCATGCTGAAGCG +CCAACTGGAGCGCATCGCCGCCATGGTGGAGGTACAGGTCCCGGGGCTGGAGGAGCTGGC +CGATCTGCCGCCCGCCCCCTCCTGGCGACGGCCGGTGGCCCTGCGCCTGGTGCCCCTGGG +GCCCAAGCAGGTGGCCCAGGCCCTGGACCGCCTGTCCGCCGCCCGCCGTCGCCTGCAAAA +CCTGGGGCGCGGCACCCTGGGCCACTGGGCGCTCATCGCCGCGCTCACCTGCGGCCTGGT +GCTCACCCCGCCCGGCACGCCCTCCAAGGCCACCCGGGCCGACGCCCCGCTCAAGCCCCC +GCGTCCGGTGCTGCGCCACCTGGCCCAGGGCAGTCCGCTGACCCCCATCTACCAGGTGCC +GGCCCAGGCCCGCCTGCTGGGCGACAAGGTGGCCCGGGGGTCTCTGGAGCTCAACCTGCT +GCCTTTGCGCGGCCAGCCGGTGGCCGTGCCCCAGTCGGTCAAAAGACGCCTCAAGGAGCT +GGCCCGCGAGGCCGGGCTCTCGCCCAAGGTGCTGCTCACCAGCGCCAGGGCGCTCTACGC +CGGCCAGGCCGCGGTGGACCCCAGCGCCCTGGAGGAGCTGGCCCACACCGCCCGGCAACT +GGCCCGGCGCCATCCCCTCATCTTCCGCGAGCTCTCCCGCCGCGGGCTGCCGCCGGCCGC +CTCGGCGGTGGCCGCCCTGGCGCCCGAACCCGAGAAGGCCCAACATCTCTTCCTGGACCG +ACTCTATCGCGAGTACCGCTCCCTGGGCTTCTCCGCCGAGGAGGCCCTGGGCGCCCTGGC +CGCAAACCAGCGCGCCTTCCACCGCCTGACTCGCCAGTGGACCCCGCCGCGGCGTTTCAT +CGGCAAGGTCCAGCCGGTGGAGGCGGTGGAGAAGATGGGCCTGCGGGAGTTCCTGCAAAA +GATCACCCCCTACATCCAGTCCAAGCTCAAGGTCTTTCTGCGCCAGCGCGGCATGAGCTA +CTCTGGTGACCTCACCCTCTACGCCAAGAATCTGGCCTTCGACATGTACTGCGCGGCCAA +GAAGTTCCAGGTGCCGGTCACCCTGCTCTTGGCCATCGCCCACCAGGAGACCTGGTACGC +CAACGTCCTGGGCGATGCCAACCGCTCGGCCAGCCCCTTCCAAATCTACGAACCCACCCG +CGAGCTCATCATCAAGTCCATGGCCGAGGCCGGCTTCGTGCCGCCACCCAAGAGGATCAA +GCTCCAGCGCCACCTCACCATGGCCACCTTCATGGCCTCCTTCCATCTGCGCGAGCTCAT +GCAGCGGGCCTACACCCCGCCGCGCCGGGGACGCCAGGCGGTGGTCAACCTGGACCGGGT +GCTCCAGCGCTACAACGGCTCCTCGCGCTACGCGGCCCAGGTGGCCCTGCGCAAGCGCCA +ACTGGCCCGTTTCCTCCGCCGCCAGGGCTGAGGGGGCCAGGGAGGCGAGACCAGACGGGC +GCGGGACGCGGGGAGCAGGCGGGAGACGCGAGCCGAGGGGGCAGACCGGGGAGAGACAAG +GCGCCCCTGGCGAATCACCTGACGGCCACCGCGAGCAGGCAAGCGAGACGGCAGTGACGC +GCCGGCCGGCGCGAGGCGCTAATCAATCCAACCCCCGCCGCCCGGGGCGGGGCGCTTCAC +GGGAGGGAAGGGTGTCATGATCGGCAAGCGCATCAGGCTGGAACGGGTGATGGACCGCGG +CACCAGACGCACGGTGCTGGTGCCCTTGGTGCACGGCGTGGGCATGGGGCCCATCGAGGG +CATCACCGACGTGCTCAACACCGTGGACACCATCTCCATGGGCGGGGCCAACGCGGTGGT +GCTCCACAAGGGCATCGTGGCCGCCGGCCATCGCCGCGGTGGGGCCGACATCGGCCTGGT +GATCCACCTGAGCGCCACCTGCGCCGACGGCAGCCAGACCCTGGTCACCGAGGTGGAGGA +GGCGGTCTGCCTGGGGGCCGACGCGGTGTCGCTGCGTATCGAGGTAGGCGGCGCCGACGA +GGACGAGTCCCTGGCGCTGCTGGGTGCGGTGAGCCGCGTCGCGGCCGACTGGGGCATGCC +GCTGTTGGCCCTCATGAACCCCGCCCCGATCCCACCTGCCAAAATGCCCAAGCTCCTCAT +GCGGGCCGCCCGCATCGGGGCCGAGCTGGGGGCCGACGTGGTGCTGGTGCCCTACCACAA +GCGCTTCGCCGAGGTGGTGGCCGCCACGCCGGTGCCGGTGGTGGCCATCGGCGGGGTGAA +GAAGACCCCGCCCAAGCAGATGCTGGAGATGGCCCGGGCCGCGGTGGACGCCGGCGCCTA +TGGGGTGAGCGTGGGGCGCACCGTGTTCCAGTACGAAAAGCCGGGCAACATGATCAAGGC +CATCTGCCAGGTGGTGCACCGCAAGGCCACGGTGAAGAAGGCCATGGAGATCCTGGCCAA +GAAACCCATCGAGAGCACCCTCTACGGCGGCACCGTCATCTGGTGAGAGGCCGCCGGGGG +CTGCCTGGCTTGAACGCGCAGGCGGCGGGCCGGGGCCGACAGGCTCCGGCCCGTTTCTAT +TTCTCCGGCTGCAAGGGCCGGCCCAGTTCCTCCAGCAAGGGCGGCAGGCCCTCTGGCTCC +AGGGCGTGGATCCGCACCAGCAGCGCCTGCTGGCGCGAGGGCGGCAGCAGGCCGGCGGTG +AGCGCCCGGAACTTCTCCTCCAGGCGGGCGGTGGAAAGGGGATTGGCCGGGTCGCCCAGG +GGGTCCAGCACGGTGTGCTCGTGGCGGCGTCCGCTGCGGGTCATCGCCACCACCCGCGCC +GGCGAGCGCTCCGGGAACATGGCCTCCAGCTCCGGGTCCACCTCCAGCCGCACCCGCCCG +GCCAGCTCCACCAGATCCTCCCGTCCCAGCAGCTCCGGCCGCAGGGGCAAAAGCCCCGCC +GCACCCTCCACCGCCGCCACCGCCAGGGTGAAGGGCAGGCTGTACTGGGCCCCCTCCAGG +GTGGCCGGGCCGGTCTCGTTGCTGAGCCTCAGAGCGCGTTCGAAGGTATGGACCTGGATC +TCTGCGATCTCGTCGGCGTCCAGACCCTGTTCGTCCTGGAGAGCCAGCAGGGCGTCGATG +GCGGCGTGGCACCAGCGGCAGCAGGCATAGGGCTTGAAGTAGACCTGCTCGATGGCCCAC +TGCCCGCCGCCCAGCCCGGCCGTGATGCCCGGGGCCTGGTAGTAGTCGGGGTGGTCCAGG +ATGTCCAGGGGACCGCTGAAGCCGTGGGCGGCAAGCTTCACCGCCACCAGGCCGGTGAGG +GTGGCCCAGGGGATGCCCTCCTTCACCAGGTTGCCCATGAGCCGGGAGTAGCCCGCCGCG +GCCAGATCGGGCGAGAGCACCCCGGCCACGGCCATGGCCTGGGCCAGACCGGCGGCGTCC +AGGCCGTGCAGCCGCCCGGCGGCCGCGGCCACGCCGTAGGCCACCCAGCGCCCGCTGGCC +ATGGTGGGCAGCCGGGCCAGATCCCGCGCCGCGGCCACCCGGCAGGCCACCTCGTAGCCC +AGCACCAACGCCGCCTCGAGCTCCCGGGCCGAAGCCCCGGTCTCCTGGGCCGCGGCCAGG +CAGGCCGGGATAATGGCCGCGCCGGGGTGGCCGGCCGCCGCCCGATGCCCGTCGTCCAGG +TCCCAGGCGCTGGCCGCGGCCGCGTTGGCCAGCGCCGCCCCGGGGGAGGCCAGCCTCCGG +CCCGAGAACCACAGCCAAGCCGGGCCGGCGGCAAACAAAGGCCCGGCCACCGCCCGCACC +ATGCGGGCCGCCCCGCTGCCGTGGCCGGCGCAGGCCGCACCCAAGAGATCGATGAGGCAG +CGGTTGACGGCCTCGCCCAGCCGGGGCGGCAGCTCGTCCGCCTTCAGCCCGGCCGCGAAA +CGGCCCAAGGTCTCCAGGGTGGTCAAGCTCCTTCTCCCGCGGGACGCAGCTCTTGCACCG +CCTGCCAGGCGTCGAGGTCCACGGCCCCTTCCCGGGCCATGCGCTCGACCACGCGCCCCG +CCGCCCGGCGCCAGGGCTCCAGGTCCACCTTCAGATACACCGCCCCGTACTCGGCCAGGT +TGGCCGCCTGGTTGTCCACCTCGGCCCGGCGTATCTCAATGCTGAAGAACCCCCCGGCCG +CGTCGCAGGATTCCTCCAGGGCGGCCTGCACCGCCGGCGGCAGGGAGAGGTACTTCTGCT +CGTTTACCGTCACCGCCAAGGCCGGCGGCACGTCGCCCAGCAGGGTAACGAAGCGCCCCT +GGCGGCAGAAGCCCAGCGGGTAGAGGTGGGAGCGGTGGGTGGGCAGCAGGTCGAACTCGC +CGGCCTCCCAGGCCTCCCTGACCCGCGGCCAGGGCACCACCACCGGCCGCGCCCCTAACT +CCTCCCAGAAACGGGCCAGGGCGGGCGAGTCGAAGATGCGGACCTTGAGGCCCTTCACCT +CCTCGGGACGGGTCACCGGCTGGCGGGCCACCAGCACCCGCTCCAGGCCCCGGCGCCAGT +TCCAGCGCCGGTTGAGGATGCGGATGCCGCTTTTGGTCAGCGGCCCGTGGACCCGCTCCT +GGAAGAAGGGGCTCTCCAAAAAGGAGAGCAGGTGGGCGTCGTCGGCGAAGACGTAGGGCA +GGGAGATCAGCTTCACCGCCGGGGCGATGTGCTGGTAGAAGATGAGTTCCTCGATAAAGA +GGTCGATGACCCCCTGGCGCAGCATGGCCACCTGGTCGGGCGCCGAGCCCAGCATGGTGG +CCCACAGGGGGATGATGGACATGCGCCCGCCGCTGCGGCGGTTGACCTCGGCCGCGAACC +ACTCCAGCCCCCGGTTCTCCGGCCCGGAAACGGTGGTCATGGTGCCCAGCACCCGGGGTA +TCTCCTCCTCCTTCTGGAAGAGGTAGCTCAAGGGGCGGTCCAGCACCCGGCAGAGGCTGG +TCAGCACCGGGATGGAGATGGCCGCCTTGCCGTTCTCCAGCTTGGAGAGCTGACTCTGGG +ATATCCCCGCCCGGGAGGCCAGCTCGGTGGCCCGCAGGTCCAAGGCCTGACGCAGCTCGC +GGATGCGCGAGCCCACCATACGACGTATGCGTCTCTCGTCTTCGGACAAGCGGGCCACCG +CTCTCTACTCCTCCTCCAGCCGCCGCTGCACCTTGATACCGCGCGCCGCCAGCTCATCCA +AGAAGCGCTGGCCGTCCACCACCTTCACCGGCGAGAGCACCCCGGCGCCGGTTATCTCCC +CCTTGAGGACCATCTGGGCCCCGATGCTGGCGGCGAAACCCACGGTGCGGTTCATGGCGA +ACAGGCCGGTGGCCAAGTCGCGGTAGTCCACCAAGTCGTAGGTCACGGTGCGTTTGCGCC +CCCCGCGGCGGCCCCAGACCTTGACCCTGAGCACCACCACGTCGCGCTCGTTCTCGCGGA +ACTGCAAGCGCGGCTCCAGCAGCTTCACCAAAAACTCGCGCGGGCTGACGGACACCCCCT +CGCCCAACTCCACCGGCTGGTCCCCCAAAAAGCCCAGCTTGGCCATGATACGCCAGAAGG +CGCTGTGGCCGGGCCAGCGGGTGGCGAAGCGGCCCATCTCCTTCAGCTCCGGCCCCAGGC +CGAATACCTCGACGAAGCGGGTGGCGTCGCCGTTGGGGTAGGCCTCCAGGCGCCCCAGCT +CCGGCACCTCGATGAAGTGTATGTTCTCCTCCTGGAAGATCTCGTCACCGGGGATCTCCA +CCGGCCGCCCCTGGCGCAGAAGCCGCGCCGGCCGGCAGTAGGCCTTGAGCACCCCGTCGA +AGGTCCAGGTGATCTTGTACTTGAGCGGGTTGTCGCAGGCCGCCGGATCGGGCAGCCCGC +CGCCGTAGCTGTAGAACCCCTCAACCTGGTCCAGCTCGTCCAGGGCCATGCGCCCCACGA +TGAGGTCGATCCCCGGATCGAAGCCCATCTCGGGCAGCAGGATGACCCCCTTGTCCCGGG +CCCGCTGGTCCAGCTCCCCCAGCCACTGGGCGTAGCTGGTGTTCACGAAAGGCACGCCGC +ACTCCACGCAGACCTCGGCGATGCGCCCACTCAGGTGGGCCGGCAGCATGCAGACGACGA +TATCGGATCGGTTCTCGCTTATCAAGCGCCTAAGCACCGCCGGGTCCAGGGCGTCGGCCT +GCACGGCCCGAACCTTGTGATAACCGCCCTTGTCCAGAAAGCGCTGCGCGGCGGCCAAAT +CGAGATCGGCCACCAGAATCCGGTCCACCAACTCACTGCGGGAAAGGTCGTGGATCACCG +CCTTTCCCTGTAACCCGGCCCCAAGTACCAGAGCGTTCATCTTTACACTGTCTCCTGTGG +CGTTGTGGATATCCCGGTTAGACATTCCTCAACCTGGGATCCAGGGCGTCGCGCAAGCCG +TCGCCCAGCAGGTTGATGGCCAGGATGGTTAGCATGATGGCCAGACCGGGCAGGGTGACC +>NODE_13_length_9239_cov_583.686_ID_25 +CCCACTCGTATCGTCGGTCTGATTATTAGTCTGGGACCACGGTCCCACTCGTATCGTCGG +TCTGATTATTAGTCTGGGACCACGATCCCACTCGTGTTGTCGGTCTGATTATCGGTCTGG +GACCACGGTCCCACTTGTATTGTCGATCAGACTATCAGCGTGAGACTACGATTCCATCAA +TGCCTGTCAAGGGCAAGTATTGACATGTCGTCGTAACCTGTAGAACGGAGTAACCTCGGT +GTGCGGTTGTATGCCTGCTGTGGATTGCTGCTGTGTCCTGCTTATCCACAACATTTTGCG +CACGGTTATGTGGACAAAATACCTGGTTACCCAGGCCGTGCCGGCACGTTAACCGGGCTG +CATCCGATGCAAGTGTGTCGCTGTCGACGAGCTCGCGAGCTCGGACATGAGGTTGCCCCG +TATTCAGTGTCGCTGATTTGTATTGTCTGAAGTTGCTTTTACGTTAAGTTGATGCAGATC +AATTAATACGATACCTGCGTCATAATTGATTATTTGACGTGGTTTGATGGCCTCCACGCA +CGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTTTCCGGTGATCCGACA +GGTTACGGGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGC +GTTTCCGTTCTTCTTCGTCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAA +GGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTT +GTCCGTGGAATGAACAATGGAAGTCCGAGCTCATCGCTAATAACTTCGTATAGCATACAT +TATACGAAGTTATATTCGATGCGGCCGCAAGGGGTTCGCGTCAGCGGGTGTTGGCGGGTG +TCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCG +GTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATT +CAGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTG +GCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCA +CGACGTTGTAAAACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAG +CTCGGTACCCGGGGATCCCACGTACAACGACACCTAGACCACCCGTGCCATTTCCTCCAT +TGAAACGGACGGGAAGTTGACGGCGTTGGTAATCACCCCGTCGATAAGGCAGGCGGCCAT +CTGCTCGGCGATCATGCGGGCCACCTTGACCTGGGCCTCTCCCGTACTCGCACCGATATG +CGGGGAAAGAACCACGCGGGGATGCTTGAGAAGCGGAAGCGATGCATCCGGCGGTTCCTT +TGGAAACACATCGATGGCCGCGCCGGCCACCCGGCCGCTTTCCAGCGCACGGTAAAGATC +ATCTACGTTCACCACGTCGCCACGGGAACAGTTGATGATCCGGACTCCCGGCTTCATGCG +GGACAGGGTGTCTGCGTTGATCATGTTCACGGTCTCTTTCAGACGCGGGACATGAAGTGA +AATAAAATCCGATGCGGAAAAAAGTTCGTCCAGGGAAACCAGCCGCACATTGATTCTTGC +CGCCGCATCGCAGCTGACATACGGATCGGCCGCAATTACCCGCATCTTCAAGCCCCTGGC +CCGGTCGGCCACGATGCGTCCGACATGCCCCAGCCCGATGATTCCCAGGGTTTTGCCGGC +GATTTCGACCCCGATGAGTTTTTTCTTGTCCCATGCGCCCTCCCGTAAAGAGGCGGTGGC +CTGGGGGATATTGCGGGCCAGGGCCATCATCATCGAGACCGTGTGTTCGGCAGTGGTCAC +CGTATTGCCGCCCGGTGCGTTCATGACAACGATTCCCTGGCGGGTCGCCGCCTCCAGATC +GATATTGTCCACACCGATGCCGGCCCGTCCGATGATCTTCAGACGTCCGGCCTTTTCCAT +GACCGCGCGCGTGACCTGCGTTCCGCTGCGGATGGCCAGGCCGTCGAACTCACCGATCAT +CTCCGCCAGCACCTCCGGCGCATTGGCCGCCTTGTCGTTGTCCGTGACCACCTCGATCTT +TCCGGTCGCCGTCAGTATTTCCCGTGCGACCGGAGACATGGAATCCCGAATCATGACCTT +GAACATGTCGCCCCCACCAATATATAATGGTTAAATGCGGATGATCCCGATTCCATATAT +TCAGAAACGCTCGGCTACGTCAAATTGAAATGATCCGGGACCGGCCTTTCCGGCGGATAA +GCGTTGAAGAGTTGAATGAGGATATAAATGGGTGTATCGTAAATGTCATCCATCCGGAAA +CCGGGGCATGCGCCTTCCGCAGGTATGTGCTCCAGCTTATTCCTCTTATAAAAGGAGGTA +TTCCATGAACGACCCGATTTATCAAAAACTGGCCACCGTTTTAGACACACTCCCCAACGG +ATTTCCCGCAACCGAAGACGGGAAAGAAATCCGGCTCCTGAAAAAAATTTTCTCCCCGGA +GGAGGCTGAGCTCTTCTGCGACCTGAAATTGACATTTGAGACGGCCGAACAGATCGCAAA +CCGAACGGCGCGCCCCGTGGAAGAGTTGAAAGCGCGTTTGTCCGTCATGCAGGAAAAGGG +CCAGATTTTCGGCATCGATATGGGCGGGGTCGGTATTTACAAGATGCTGCCCTGGGCTTT +CGGCATCTACGAGTTCCAGTTACCGCACATGGACCGGGAGCTGGCCGAACTGTGCGAGCA +ATACGGCAAAACTTACGGGAAGCAGTTTTTCGCAAACAAGCCCCAGCTCATGCAGGTGGT +CCCCATCGAATCCGAAATCAAGGCGGAGCATGAAGCCCTTCCCTATGAGCGGGTTTCCAC +CATCATCGAAAACAGCCGGTCCTTCATGTATTTCGACTGCATCTGCAAGAAGGAAAAGGG +ACTGATGGACGAACCGTGTGACAAACCCGTCCAGGTCTGTACCGCCTTTGCACCGATTCC +CGGTGTTTTCGACGATCACCCGTATGGCAAAACCATGACAAAGGAGGAGGCCTACCAGCT +CCTCAACAAGGCCGAGGAAGCCGGACTGGTGCACCTGACCTGGAACGTGAAAAGCGGCCA +CTTTTTTATCTGCAATTGCTGCGGCTGCTGTTGCGGCGTATTGCGGGGCATCAACGAACT +GGGAATCGATGCCTCAAAGGTTATCAACTCCTATTATTATGCGCAGATCGATGCGGAAGC +CTGCGTCGCCTGCGGAACCTGCGCCGATGAGCGATGCCAGGTGAACGCCATCATGGAAGG +GGATGATGCCTACACGGTTATCGCGGAGAAATGCATCGGCTGCGGCCTGTGCATCACCAC +CTGCCCCGGCGACGCCATATCCCTTGTCCGTAAACCCGCAGCGCAGATCGAAACGCCGCC +GGACGACGAAATGGACTGGTATGAGAAACGGGCGCAATTGAGAGGCGTCGACATCAGCGA +TTATAAATAGTGCCCACCGGCAAATCATGAAGGATAAATTGGCTTAACATGGAGCTGACA +AACGACGAAAGGGAATTCTTTGCGCTGGTCAACCGGGCCAGCCTGCTGAATCCATTCAGT +GATGAACGAAATGACGTTGATCTGAAGCTGGCCGGTCTGCCGTCCGCAGCGCCCGGAACC +GGACGGGTCAAAAAAGCCATCCAGTCGGTAAATGAGCGGATCCGTCAGCTTGAAACCGAC +GGCCGGGCCGATATTTCACAATACACCGGCCGGGACCGCGAGCTTGTCGAAAAGGCGTTT +CTCTTCGAGCTCTTTTATCGGTTCAGAAAACAGTTCGATGAACTCATTGAAAGCCAGATT +GCGTCCGATGATGTTCCCGCCAGAATTCCCTTTTATAATGATGCCTTTTCCGCGATGCAG +AAGCGGGGTTTTACCGAGGAGGATTTCCGCCGCTATTTCGCACTGGCCTTCCAGATCCGG +CGTGCGTTCTATTTCATCGGCCGGAGCCTGGTGGGCAACAGCGCCAGCATGAAGTCCCTG +CGGCTCAACCTGTGGAACAACGTGTTCACCCACAATATGGACCTCTATGACCGGTATCTC +TGGAACCGGATGGAGGATTATTCGACCCTGATTCTGGGGGAAACCGGCACCGGCAAGGGG +GCCGCGGCCCTGGCCATCGGCCGGTCCGGATTCATTCCGCTCAAGAAAAAAAGCTTCGAG +GAGAGCTTCACCCGGTCCTTTATTTCCTTAAACCTGTCCCAGTTTCCCGAAACACTTATC +GAATCGGCGCTTTTCGGCCATAAAAAAGGCGCATTTACCGGGGCAATCGAAAACTACCAG +GGCATCTTTGAACAGTGCAGCCCCTACGGCGCCATCCTGCTGGACGAGATCGGCGAGGTG +TCGAAACCCATCCAGATCAAACTTCTCCAGGTCATCCAGGACCGCGTCTTCACGCCGGTG +GGAAGCCAGACCCGGAGCCGGTTCAACGGCCGGGTTATTGCGGCCACCAATCGTCCGCTC +GAAACACTCAGGGGGAAGGGGTTCTTCCGCGATGACTTCTACTACCGCCTCTGCTCGGAT +ATCATTGTCGTCCCGCCCCTGCGTCAGCGCGTCCAGGAAGACCCGACCGAACTGGATGTG +CTGCTCGATTTCACCATCAACCGGCTGGTCGGCCGGTCATCGCCGGAACTGGTGCAGATC +GTCCGTGAGGTCATCGACCGGCACCTGGGCAACGACTACCCCTGGCCCGGAAATGTCCGG +GAACTCGAGCAGTGCGTACGCCGTGTCCTGCTCAAAGGGATCTACACCGGGGATGCCGCT +GTCGCGGACATCGATCTGTGCAGGAGCCTGACAACCGGTATCGAACAGGGGAACATCGAC +GCCAACAGCCTGACATCCGGGTACTGTTACCTCCTGTACCAGCGGCACCGGACCTTCGAG +GAGGTCGCCCGCCGGACCGGACTGGACCGGCGGACCGTGAAGAAATATATCCAGGATTGG +ACCAGCAGCCATTCAACTGACAATCCGCCGGAAACGGATATTCCGGGATAAACACCATTT +TCGTTTTCATTCCCGCACCACCCCCTGTCGCCGCCGTTTTATTTTCTGTACCTGTTCTGT +ACACGCCACCGACACAAACTGATGATTTCCGCTTTGAGAAAAAAGGGCCCGCGGCTGTCA +TATTTTTTATTTTCATATAAAAAACAATATTTTAATCGTTTTTCAAAAAAGCTGGCATGA +TGCTTGTAATATCAATAGACGGTTATTGTCGCAGTCGGATCGCAAAGCCATCCGCAATTG +AAACCCGTTTTAAAATAAGGAGTGCATCATGAAAATCCAGCAGATCATCATCCGGGAATT +TGAAGAAATGATGGCGGAACTCAAAGAAGTCCTTGCAAAAATGACCTGCCCGCTTCTCGG +AGAAGACTGGCTCCCCATGTAGCCGCCGGCGGTTATCCGGGGATGGGCGGCCATCCCCGG +ACGGCTGCGCTGCAGGCCTCCGCGGACGGCCTGCAACCCCTGCCCTTCAATCCACGCCCT +GCCCGATCTCCTTTTTTTTATACGGGATGTTGAGTTTGTTCATCCGCTTCCGGAGCGTGT +TGGGATTGATGCCTAAAAGCTCGGCCGCACCACCCGGGCCGTTTATTTTGCCATTGGTGA +CTTTCAGCGCCCGCCGGATATGGATGGCATTCACCTCGTCGATGCTCAGGAACCGGTCGG +TCGTCTCATGCCCGCCGCCCCGGGACGAGGCGATGGGGCTGAAGATCAGTCCGTCAAAAC +GGAGCATCCCCCCCTTGAACTGGATCAATGCCCGCTCGACAATATTTTCAAGTTCCCGGA +CATTTCCCGGCCAGTCATAAGCCATGAGCTGATCCATGGCGCCGGGGGAGAGCCGGGGCT +GTTCCTGCATCTTAAGATCGATCGTCTTCTTTTCGATAAAATGATGGACCAGGGCCGGTA +TATCCTCTTTCCGCTGCCGCAGCGGCGGGATGTGAATCGGAAACACATTGAGACGAAACC +AGAGATCCTCCCGGAATTTACCGGAACGAATCATCTCCTCCAGGTTCCGATGGGTGGCCG +ATATGATCCGGACATCCACGGAAATGGGCGTCGTTCCGCCGACCCGCTCGATGATTTTTT +CCTGAAAGACATGCAGGAGCCGGACCTGGGCATGGGGCGGGAGCTCGCCGATTTCATCCA +GAAAGATGGTCCCCTTATCGGCCCGCTCGAAGCGTCCCTTCTTCTGGGCAATGGCCCCGG +TAAACGCCCCCTTCTCATGCCCGAAAAGCTCGCTGTCGAACAGGGATTCCGGAATAGCAC +CGCAGTTGACGCTGACAAACGGTGCATTCTTCCGGGGCGATGCGTCGTGAATGGCATGCG +CCACCACGCCCTTTCCCACACCGGTTTCGCCTAAAAGCAACACAGGGCTGTCAAGCTGCG +TGACCTGCTGCACCATTTCCATCACGTGCCGCAACCCCAGGTCGGCGCCGACAATTTCGG +AAATCGATAAATCCCGGATCTGGCGACGCAGGTAACGGTTGTCATCCGTCAAGATGTCCT +TGAGGCGGATCAACTCCTGGTGCTGCAATGCATTGGTCATGGCAATGGCAAACGGCTCGT +GCAGCAAAAGCATCAACCGCGCATGCTTCTCCTCATACCGGTCCCGCCCCCGGGTCCGGA +GCAGCAGCAGCCCGACCCGCTTCCCTTCCAGTTCCAGCCGCATCGACATGAGGGAAATAT +CGTGTTCCAGGCCGTACATTTCGATAACCTCTTGAATCTCCGGCCGTTCTTCCACCCGAT +TGATGATCGTGATATCTCCCATGTCCGCCCAGATGGCCGACCACTTCTCCTTGCTCGCTT +CGGGCAGGGGAGATACCGGGCTGAATTCATTTTTCCCGTGGCTCTTGACACCGGCGATCC +GCTGAAAGACGTTCAGGCCCGGATCATAGAGATAAAGCCCTATTTCATCGACCGGCAGCA +TCTGTTCCAGGTAGTGAAAGCAGCGCTCCAGCGCCGTTTCGATATCCAGGCTCCCGCAGA +TCCGGATCGTTGCCTGCCTGAAAAATTCATTTTTATCCACAGCCATATCGCTTTCCCTCC +CATCCGTTTTCTCACACCAAACCAGAAATACCCCATAGGATAACGATGGTCCTATTTTTA +CACTATACGACAATATATTTTATCCCATAGCACAATCAAAAAATCAATATTTTTATAACC +AACTGATATTCAACAATTTAAAAAGTTGGCACACTTTTCGCTATTATCTGACAATAAAAC +GTCAAGTTGATGCATTAAAAAATGGGATCGGCCACGGCCGGAAAAACCCTTAACCCGATT +TAGAAAGGATATAACGATGCGATTCAAAACACTCAAGTACATCCTGGTCCCGGTACTGGC +AATCGGACTCAGCGGATGCGCCAGCACATTGACTTTACTCAGCCCGCCGTCCTCCCGGCT +GGTTCAGGGGAAGAATACGGCCGGTGCGTTCAACAGTTACGAATACCAGTACGCCGTCCG +CGGCAACAAGATCTATATCAAGCGGACACCGCTGTGCGATGAAGTAAAGCATGTGATGCG +GGTGGAACAGAAGCGGGAAATCGGCTATGGCCCGGCGCTTCTGGAGCTGCCGCTGTTCGG +GCTGGGACTTGTCGACATTGCCAATGCCCATGCAATCTCCGTTAACAGCAAAAAAGTCAC +CCCGCTTGCCGATTACAACACCGGGAAATTGATGGCCTGCGGTCCGCTGCAGCCCGCTGC +GAACGAGAAAGTGATTATCGAAAACAAGAATTTGAACCTTTACCGGATGGTCAGGACGGA +TAAAAACGGTGTGGTGAACTTAGACAAGGTCCTGTCCGGCATCGGAAACAATGTCAACCT +CAGTGTCCGACTGGCGAACAACCATAATGTGGCGTTTTCCTGCATGTACATCGCAAACCG +GTAGTCGCCGCCGGATCGTGGTCTAGGTGTCGTTGTACGTGGGATCCCCGGGTACCGAGC +TCGAATTCGCCCTATAGTGAGTCGTATTACAATTCACTGGCCGTCGTTTTACAACGTCGT +GACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCC +AGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCTGA +ATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACC +GCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGTTAAGCCAGCCCCGAC +ACCCGCCAACACCCGCTGACGCGAACCCCTTGCGGCCGCATCGAATATAACTTCGTATAA +TGTATGCTATACGAAGTTATTAGCGATGAGCTCGGACTTCCATTGTTCATTCCACGGACA +AAAACAGAGAAAGGAAACGACAGAGGCCAAAAAGCTCGCTTTCAGCACCTGTCGTTTCCT +TTCTTTTCAGAGGGTATTTTAAATAAAAACATTAAGTTATGACGAAGAAGAACGGAAACG +CCTTAAACCGGAAAATTTTCATAAATAGCGAAAACCCGCGAGGTCGCCGCCCCGTAACCT +GTCGGATCACCGGAAAGGACCCGTAAAGTGATAATGATTATCATCTACATATCACAACGT +GCGTGGAGGCCATCAAACCACGTCAAATAATCAATTATGACGCAGGTATCGTATTAATTG +ATCTGCATCAACTTAACGTAAAAGCAACTTCAGACAATACAAATCAGCGACACTGAATAC +GGGGCAACCTCATGTCCGAGCTCGCGAGCTCGTCGACAGCGACACACTTGCATCGGATGC +AGCCCGGTTAACGTGCCGGCACGGCCTGGGTAACCAGGTATTTTGTCCACATAACCGTGC +GCAAAATGTTGTGGATAAGCAGGACACAGCAGCAATCCACAGCAGGCATACAACCGCACA +CCGAGGTTACTCCGTTCTACAGGTTACGACGACATGTCAATACTTGCCCTTGACAGGCAT +TGATGGAATCGTAGTCTCACGCTGATAGTCTGATCGACAATACAAGTGGGACCGTGGTCC +CAGACCGATAATCAGACCGACAACACGAGTGGGATCGTGGTCCCAGACTAATAATCAGAC +CGACGATACGAGTGGGACCGTGGTCCCAGACTAATAATCAGACCGACGATACGAGTGGG +>NODE_17_length_5536_cov_76.8441_ID_33 +TTTTTCAAGCAGAAGACGGCATACGAGATGCGCAGGAGTCTCGTGGGCTCGGAGATGTGT +ATAAGGGAGAGGGATTCGAACCCCCGGACCTTTCGGTCAGCGGTTTTCAAGACCGCCGCC +TTCGACCACTCGGCCATCCCTCCAACACCCCCCTATCTACCATCGGAGGGGGGGATAGTC +AACTTGAGGCGGGCGGGGAAAAGGCCGAGATGAAGGGGTGAATCAAGCGGTTGACCTCGG +CGTGGCGCTCCAGGTTCACCATGTGCCCGGCCCCGGCCACGATCTCCAGCCGGGCGTCGG +GCAACCGCTCGGCCAGGAAGCGGCCGTACTTGGGCGGGGTGAGCTGGTCCTGGTCGCCGT +AGATGAGAAGCGTCGGGGCGGTGATCTCGCCCAGGCGGTCGGTTATGTCGTAGCGGTCGC +AGGCGGCGAAGTCGTCGTGGACCACCCGGGGCCCGGGCTGGCTCATGATCTCGCGGCCGG +CCTGGACCCAGCGGGGGTCGGCGCCGGGGGCGTAGGCGTATTTGAGGACCAAGTCCAGGG +TGGCGTCGAAGTCGCTGAGCAGGCCGTCCAGGATGGCGGGCAGGACCCGCAGGCGGGAGC +CGGTTCCCACCAGGACCAGGGCTGGTGCCAGGTCTGGGTGGTCCAGGGCCAGTTGCATGA +CGATGGCGCCGCCCAGGGAATGGCCCAGGAGTGCGGGGCGCAGGGGTCCGCGCCGGATGA +ACTCGGCCAGCCAGTCGGCGTAGTGGGCCACCTGGTCGCGGCCGGGTCCGGGGGTATTGC +CGTGTCCGGGCAGGTCGATGGCGGCGGGGTTGAGGTATGGGGCCAGGCCGCTGAGCTGGG +GGCGGAAGGTTTCGCCGCGGCCGCCGGAGCCGTGGACCATGAGCAGGCAAGGCTTGTGGG +GGTCGAAGGGCCGGGGCCCGGTCTCCAGGCCGATCTTCCGGCCGGCGATGTGTTCTTGCC +ACATGGGGCGCGTCCTTTCGCTGGTGGTGATGGTCATAGGTTAATGCGAAACCGGGCGAA +GTCAACGGGGAAGGGGGTGGAAGGACGGTGGGCTTTGGGGTTATGATGTCGGGCGCCGCC +CCGCCATCCCACCGCGAGGCAGAACTATGAGCGAGACACCCCGCCGTCCCCGGGATAGCA +GGTACATGTGGAGGGGCATACGTCCCAGCGAGGAGGAGCTGAAGACCATCCTGGAGGACC +ATGCGCAGTGGCTTGAGCGGCTCCGATCCTGGGAGTACTCCTGGCGAGAATTCATAGAAG +AAATTCCTCCTCCCCACGACCTCTCCGGCGCCGACCTGCTCGAGGCCGACCTCTCAGACG +CTGACCTGACCTGGGCCAAACTCTCAAACGCCATACTGTTCGAGGCAGACCTCTCAAACG +CCGACCTGCGCGAGGCAGACCTCTCAAACGCCAAACTGTGGTGGGCCGACCTCTCAGACG +CTGACCTGACCTGGGCCAAACTCTCAAACGCCAAACTACTCGCGGCCGACCTCTCAAACG +CCGAACTGTGGTGGGCCGACCTCTCAAACGCCAAACTGATCAAGGCAGACCTCTCAAACG +CCGACCTGACCGGGGCCGACTTGTCCAACTGCGATGTGACTGGGGTGCGATATCATGGCC +CCTGGCTTGGGATACCCTTCATCCAGATACGGAAGCCCAATAAACTCACGTGCCGCGGCA +TCCGGGCTGACACCTGCTACGGCAGTCCGCGCTTTCGGCGCGACGTTATGGACCAGGACT +TCCTGGAGGAGATGCGGGAGACCACCGGCGGCAGGTGGCTTTACCGCCTATGGTGGCTCA +CCTCCAACTGCGGGCGCAGCTTCATCCGCTGGGCATTTTTTTCACTGTCTTTGGCGGTTG +CCTTTGCCTGCGTCTTTTGTTCCAGCCTGGGGCCGGAGTGCTTCGACCTGCACCGGGCGG +AGGGGAGCCGCTGGGTGGCGGAGGTGGCCGGCAGGTACCTGGAGGTTGACCCGGCGTACC +TGGGTACCACCGCCGCCTCGCGCGGACTGCCGGGGGACTTCTGGACCATGCTCTACTACA +GCCTGGTCACCTTCACCACCCTGGGCTTCGGCGACGTGGTGCCGCTCACCCCCTGGGCCG +CCTTCTGGGTCACCATAGAGGTGGTGACAGGCTACATCATGCTGGGAGGACTGGTCTCCA +TCCTGGCCAGCAAGATGGCGCGCCGCGCGGGCTGAGCAGGGGCCTGGACCCAACCAGCGG +CCGCCTTTACTAACCCTGGCCGGGCATGCTAGCTTTGCCACATCGTTACTACCAGGGGGG +AGTGGGGAGGTTTCGCCATGCGCGCCCGGTTGTTCAAGTGGGCCGGGGTGTTTTTCCTGT +CGCTTGCCATGGCCGGCCCGGCCTGGGCCGCCAGTCGCGCCACACCGCACCGCCGGCCGG +TGCTCAAGCTGCACTTCGCGGACGTGTCCTATGACTACGAGCTGAAACGGGCCATGAGCT +ATGCGGTCAGCGGCGGTGCGGACATAAACGAATGCCTCACGGCCGCCCGGGCCATCACCG +CCGGCGACGGCGAGAGCTGGTACCGCGGCTGGCACCGGATGGCCCGGCGCCTGGACCAGA +TGGCCGACCAGGCCCTGAAGGCCGGCCACCGCCAGACCGCCCGCCAGTTCTGGCTGCGGG +CCAGCAACTACTACCGCGCCGCCGAGTTCTTCCTGCACGGAAACCCCAAAGACCCCCGCA +TACTGAGTGCCTGGGGGGCCAGCCGGCGCTGCTTCCGCCAGGCGGCCCGGCTCATGGACC +ATCCCGTGGAGGTGATCGCCATCCCCTACGAGGGACACAAGCTGCCCGGCTACCTGGTCA +AGCCCGACGCCTCGCTAAAGCCGCGCAAGACGCTGCTCCTCCAGACCGGCTTCGACGGCA +CCGGCGAGGAGCTCTACATGGAAGTGGCCTGGTACGCCATCCAGCGCGGCTACAACGTGC +TCATCTTCGAGGGCCCCGGCCAGGGCGGCGCCCTGCGCGAGCAACACCTCTACTTCCGGC +CCGACTGGGAGAAGGTGGTCACCCCGGTGGTGGACTACGCCCTCACCCGCCCGGAAGTGG +ACCCCAAGCGCCTGGCGCTCATGGGGCTCAGCATGGGCGGCTACCTGACCCCCCGGGCCG +CGGCCTTTGAACACCGCCTGGCCGCCCTGGTGGCCGATCCCGGCGACTTCGACATGATGG +TGGGACACCGCCCCACCCCCGCAGAGTGGGCCGGCATGAAAAAGTATCCCAAGCAGGCCA +ACCAGGCCCTGCGCGCCAAGATGAAGCACGACACCGGCTTCCGCTGGCTGGTCAACAACG +GCATGTTCACCACCGGCCGCAAGACGCCGCTGGCCTTCCTGGAGTTCTTCAGCCGCTTCG +AGCTGACCCCCAAGATCGCGGCCCAGATCAAGTGCCCCACCCTGGTGGTGGTGGGCGCGG +GGGATCACTTCGCCAGCCCCAAGTGGCAACGCCTGCTCTACGACAACCTCACCGCCCCCA +AGACCCTGCTGCGTTTCGGGCCGGACAACCCCGCCCGCCAGCACTGCCAGGTGGGGGGCC +TGCTTTGGGGCAACGCCAAGATCTTCGACTGGCTGGACCAAGTGCTGCGCTGAGCGCGGC +CAGGGCCGCCGCGGGGAACCAGGCCAAAAGACCACCGGCCGGGAGGGACGCCCTCCTGGC +CGGTGTCATGTCTGTACCACAGGCGGGGGGAAGCTCAGCGCACCACGCTGCCGGGGGGCA +GCTCCTCCTCGGGCGCCACCAGGCGCACCCTGCCTTCGGCCACGCAGGCCAGCACCATGC +CCTGGCTGATCTCGCCGCGCAGCTTGGCCGGCTTCAAGTTGGCCACCACCACCACCTGGC +GGCCCACCAGCTCCTCGGGGCGGTAGTGCCGGGCGATGCCGGCCACGATCTGGCGGGGCT +CGGGCTCGCCCAAATCCACCTTGAGCTTGAGCAGCTTGTCGGCGCCCTTGATGGCCCCGG +CCTCCAGGACCCTGCCCAGGCGCAGCTCCACCTTGGCGAACTCGTCGATGGTGATGAGCC +CGGCCTTGTCCTGGGGCTTGGCCTTCTGCTTGCCGCCGCCCTTGGCCGCCGGCTGCTGCT +GGGCCTGCTTGGCCTCCTTGCGGGCGGCCTTGGCCTTGACCTTGTCGGTCTGCACCCGGG +GAAAGAGCGCCTGGGGCTTTTCAGCCTTGGCGCCGGGGGTCATGAGCTCCAGGGCGAACA +ACCGCTGCCAGTCCACCGGCACCACAAGCTCGAGCCCCAGCATGGCGGCCATCTTCTCGG +CGGTGGCGGGCATCACCGGCCAGGCCAGGTGGGTGACCGAGGCCAGAAGCTGCACCAGGA +TGTACAGCACGTTGTCCAGCTCGGCGGCGCGGTCCGGGTCCTTGGCCATGACCCAGGGCT +CGCGGGCCACGATGTACTTGTTGGCCCGGCGCACCTGGCTCCAGAGGTCGGCCAGGGCGG +TGTGGAAGCGGAACTCGCGCACCTGGGCCTGGAAGTCGTGCTCGGCGCCGGGGCCCAGGG +AGGCGGCCAGGGCGCCTTTCATCTCCCGATCGGCCTCAGTGAGCTCCTGGGGGTGCAGCT +CCGGCAGCAGGCCGTCGCGGTAGCGCGAGAGCATGTTCAGCACCCGGGAGAAAAGGTTGC +CCAGGTCGTTGGCCAAATCCGCGTTGTAGCGCTCCACCAGCAGGTCCTCGGAGAAGGAGG +CGTCCAGGCCGAAGGTCATCTCCCTGAGCAGGAAGTAGCGGAAGGGGTCCACTCCGTAGA +TGCCGGCCAGGTCCAGGGGCTTGACCACGTTGCCGCGGCTCTTGGACATCTTGCCCTCGC +CCACCTGCCAGTAGCCGTGCACGTTGAGGTGGCGGTAGAGGTAGTGGTCCAGGGGCCGGC +CCTCGGCCTTGGCCAGAGCCATGAGCATGGTGGGCCAGTAGATGCCATGCGGCTTGAGGA +TGTCCTTGGCGATGAGGTGCTGGGGGTCGGCCTTGGGTCCGGTCCAGAAGCGCTCCAGCA +GCTCGCCGTCCGGCCAATCCAGGCCGGAGAGGTAGTTGATCAGGGCGTCGAACCAGACGT +AGGTGACGAAGTTCTGGTCAAAGGGCAGCTCGATGCCCCAGGTGAGACGGGTCTTGGGCC +GGCTGATGCAAAGGTCCTCCAGGCCCTGGTCCAGTATGGCCAGGACCTCGTTCTTGTAGC +GTTCGGGCCGGATGAAGTCGGGGTTGGCCCGGATGTATTCCTTGAGCGGCTGGGTGTAGT +CGCTCATGCGGAAGAAGTAGTTCTCCTCCTCCTGGTAGACGGGCTCGGTGCCGTGGTCGG +GGCACTTGCCGTCGATCATCTCGTGTTCGGTGAGGAAGCGCTCGCAGCCCACGCAGTAGT +GGCCGCCGTACTTGGCGAAGTAGATGTCGCCGGCCTCGTGGACCTTGCTGAGTATGGCCT +GCACCACCTGCATGTGCTGGGGGTCGGTGGTACGGATGAAGTTGTCGAACTGGATGTTGA +GCTGGGGCCAGAGGCTGCGGAACTTGCCGCTGATGCGGTCGGTGAACTCCTTGGGCTGGA +TACCTTCCTTGGCCGC +>NODE_1_length_50014_cov_374.697_ID_1 +CCCACTCGTATCGTCGGTCTGATTATTAGTCTGGGACCACGGTCCCACTCGTATCGTCGG +TCTGATTATTAGTCTGGGACCACGATCCCACTCGTGTTGTCGGTCTGATTATCGGTCTGG +GACCACGGTCCCACTTGTATTGTCGATCAGACTATCAGCGTGAGACTACGATTCCATCAA +TGCCTGTCAAGGGCAAGTATTGACATGTCGTCGTAACCTGTAGAACGGAGTAACCTCGGT +GTGCGGTTGTATGCCTGCTGTGGATTGCTGCTGTGTCCTGCTTATCCACAACATTTTGCG +NACGGTTATGTGGACAAAATACCTGGTTACCCAGGCCGTGCCGGCACGTTAACCGGGCTG +CATCCGATGCAAGTGTGTCGCTGTCGACGAGCTCGCGAGCTCGGACATGAGGTTGCCCCG +TATTCAGTGTCGCTGATTTGTATTGTCTGAAGTTGCTTTTACGTTAAGTTGATGCAGATC +AATTAATACGATACCTGCGTCATAATTGATTATTTGACGTGGTTTGATGGCCTCCACGCA +CGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTTTCCGGTGATCCGACA +GGTTACGGGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGC +GTTTCCGTTCTTCTTCGTCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAA +GGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTT +GTCCGTGGAATGAACAATGGAAGTCCGAGCTCATCGCTAATAACTTCGTATAGCATACAT +TATACGAAGTTATATTCGATGCGGCCGCAAGGGGTTCGCGTCAGCGGGTGTTGGCGGGTG +TCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCG +GTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATT +CAGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTG +GCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCA +CGACGTTGTAAAACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAG +CTCGGTACCCGGGGATCCCACGTACAACGACACCTAGACCACACATCTTTCGTTCGGGTG +ATCTGCGTCGTCGGCTGATGTTTCTGGCCGGCGCGCTGATCGTGTTCCGTATCGGGATTC +ACGTGCCCGTACCGGGCGTCGATCCGACCGCCTATGCGCACCTGTTCAATCAGAATGCAG +GTGGCATTCTGGGGATTTTCAATGTCTTCTCCGGTGGTGCGCTGGAGCAGATGTCGATCT +TTGCCTTGGGGGTTATGCCCTACATATCGGCCTCGATCATCATTCAGATGCTCACTTCGG +TAGTGCCCAGTCTGGAAGCCTTGAAGAAGGAAGGTCAGGCGGGTCAGCGCAAAATCACCC +GTTATACGCGCTACTCCACAGTTGCGCTGGCCCTGTTCCAGTCGCTGGGCGCAGCATTTG +CGTTACAAAGCCAGGGGGTCGCATTGACAGCGGGACCAGGCTTCATCGTTACAGCTACCG +TCAGTCTTGTCACGGGCACCATGTTCCTGATGTGGTTGGGTGAGCAGGTGACCGAACGTG +GCCTGGGCAATGGTATCTCGATGATCATCTTCGCGGGTATCGTGGCCGGGCTGCCTTCAG +CTATTGCCAGCACTCTGGAGTTGGTGCGCAACGGCGAATTATCCTCGATCGTGGCCATCC +TCATTTTCGTGGGCGTATTGTTGATCACTGCTTTTGTGGTGTTCGTGGAAAGCGGACAGC +GACGCATCACCGTGAATTATGCCAAACGCCAACAGGGGCGTCGCATGTACGCGGCTCAAA +CCAGTCACTTGCCGCTCAAATTGAACATGTCGGGCGTGATTCCGCCTATTTTTGCCTCCA +GCATCATCTTGTTCCCGGTTACCCTGGCGGGATGGTTGGGGCAGAGTTCTGGCTTTGGCT +GGTTGAACACTTTGCAGTTATGGCTTTCGCCTGGGCAGCCCTTGTATGTGGCTCTGTTCG +CAGTGTTGATTATATTCTTCACATTCTTCTACACCGCGCTGACGTTTAACAGTGACGAAA +CCGCGGACAATCTGAAGAAATCCGGCGCGTTTATTCCGGGCATCCGGCCAGGCAAGCAGA +CAGCAGGCTATATAGATACCGTGCTGACGAAACTGACTTTGTGGGGCGCGTTGTATCTGA +CGGCAGTCTGTCTGTTGCCGGAATTCTTAATTGCTTACGCACACGTATCGTTCAACTTCG +GCGGTACGTCACTTTTGATTGTCGTTGTCGTGGCGATGGACTTCATGGGTCAATTGCAGG +CCCACATGATGACGCACCAGTACGAAGGATTGCTCAAGCGGGCCCGCATGCGCGGTTTGC +AGAGGTAGTCGGGAGTTAACAATGAAGGTAAGGGCTTCGGTCAAGAAAATTTGCCGCAAT +TGCAAGATCATCCGCCGCCGTGGTGCGGTGCGCGTGATCTGCTCGGATCCGCGGCATAAG +CAACGCCAGGGCTGAGTCGGCTTGAGTCGACGGGTTTCTGTTGTTAAAATGAACAGTTCA +CTTCGCGATTCTTTTGAATTGCGCTGAGAGATAAGTAATGGCACGTATTGCAGGTATCAA +TATTCCGCCGCACAAGCACACGGTGATTGGCTTGACGGCAATCTATGGCATCGGACGGAC +CCGAGCAGCCGAAATTTGCGCCACGGCTGGGGTGGACCCGACTCGTAAGGTCAAGGACCT +TTCCGAAAGCGAGCTTGAAGCCATCCGTCAGGCGCTGACCGCTTACAAGCTCGAGGGCGA +TTTGCGCCGTGAGCTCAACATGAACCTGAAACGATTGATGGACCTCGGTACTTTCCGTGG +TATCCGTCACCGTCGTGGGTTACCGGTGCGTGGTCAGCGGACCCGTACTAACGCCCGCAC +GCGCAAGGGTCGCGTGCGTCGTAGCGCCAAGCGCTGAAGATTTCCGGAGAATACTGATTC +ATGGCAAAACCGGCGGCTAAAGCGCGTAAGCGCATCAAGCAACAGGTGGTGGATGGCATC +GTGCATGTGCATGCGTCGTTCAATAACACCATCATCACCATTACCGATCGCAAGGGTAAT +ACGCTCTCGTGGGCGACAGCTGGTGGCTCGGGCTTCCGGGGATCGCGCAAATCCACGCCG +TTTGCTGCGCAGGTGGCTGCCGAACGTGCTGGTGCCGTGGCCCAGGAGTACGGGGTCAAG +AACCTGGAAGTCGAGATCAAGGGGCCGGGGCCGGGTCGCGAATCTGCTGTACGTGCATTG +AACAATCTGGGGTTCCGCGTGTTGTCGATTTCCGACGTGACACCGATCCCGCACAATGGT +TGCCGGCCTCCCAAGAAGCGCCGGGTTTAATTAGGGGATTTGTAAAAAATGGCACGATAT +ATCGGTCCCAAGTGCAAGCTGGCACGCCGCGAGGGTACGGATCTTTTTCTGAAGAGCCCG +ATCAAGGCACTTGATCAAAAGTGCAAAATTGATCGGATCCCCGGGCAGCATGGTCAGGCA +ACCCGTCGTGGTCGCATGTCCGATTATGGTTTGCAGCTGCGCGAAAAACAGAAACTGCGC +CGCATGTATGGTGTGCTGGAGCGCCAGTTCCGACGTTATTACAAGGAGGCTGCGCGCCGC +AAGGGTGCAACCGGTGCCCTGTTGTTGCAGTTGCTGGAGTCTCGTCTGGATAACGTAATT +TATCGGATGGGCTTTGCCTCCACCCGGGCTGAAGCACGTCAATTGGTCAGCCATAAGGGT +GTAACTGTCAACGGTCAGCTAGTGAACATCCCGAGCTTTGAAGTGAAGGGTGGGGATGAA +GTTGCACTGACTGAGCGTGCGCGCAAGCAGAATCGCGTGGAAATGGCGCTGGAAATCTCA +CGCCAGATCGAACGGCCCGCTTGGGTCGAAGTCGATGAAAAGGCGTGCAAGGGCACCTTC +AAAGCTATGCCCGAACGCGAAGAACTTTTGCCAGATATCAACGAAAATCTGGTTGTCGAG +TTGTATTCGAAATAAACGTCGAGGTTCGAACACATGGAAATTTCCGTATCCGAGTTCCTC +AAGCCGCGTATTGCCGGTTTGACCGAGCTGGGTGAAAACCGTACCCGCATCGTTCTGGAA +CCCTTGGAGCGCGGCTTCGGCTATACGCTTGGGAACAGTCTGCGCCGGGTTTTACTGTCC +TCGATGCCTGGGGCTGCTGTAGTAGAAGCAGAAATCGATGGCGTGCTACATGAGTACACC +GCCATTGACGGCGTGCAGGAAGACGTAGTTGAAATCCTGCTCAACCTGAAGCTGCTCGCC +ATTCGCATGCATGCCCGTGAAGAAGCCACGTTGACTCTGAACGCAACGGGTGCGGGCGTC +GTTACGGCTGGCGACATCCAGGTTGATCACGATGTGGAAATCGTCAATAAGGATCTGGTT +ATTGCCCACTTGGCGAAAAACGGCAAACTGTCAGTGCGTCTCAAGGTGATGCGGGGACGT +GGCTATATGCCAGTGGTCAAGCGCTATGCCGACGAATCCCAGGGGCGCAAGATCGGTAAG +CTCAAGTTGGACGCCACGTTCACACCCATCCGCCGTGTGGCCTATTACGTTGAGGCTGCG +CGCGTCGAACAGCGTACCGATCTGGATAAACTGATTCTGGATATCGAAACTAACGGTACG +ATTGGTGCCGAAGAAGCTCTGCGTCGCGCAGCAGGCATCCTGACTGACCAGCTTTCGGTG +TTTGCCGATCTTAGTTCGGTCAGCTCGCATACGCCAACCGAGAGCCGCAGCGTCAAGCCG +ATCCTGCTCAAGCCGGTTGAGGAACTTGAATTAACCGTACGGTCTTCTAACGCCCTGAAA +GCCGAACGTATTCGTTTCGTTGGAGATCTGGTGCAAAAGAGCGAAGACGAGCTGCTCAAG +ACGCCAAACTTGGGGCGCAAATCACTGACCGAAATAAAAGATGTACTGGCCCGCCATGAA +CTGGCGCTCGGAATGAAACTTGAAGACTGGCCCCCGGCAGCGCTTGCCGAGCGGCGCGCT +TCCTGAGGAGGTAACTGTTATGCGTCACCGTAATTCAGGTCGTGCGCTCAGCCGCACCAG +TTCTCATCGCGCTGCCCTGATGCGCAACATGAGCAAGTCACTAATTGAGCACGAACAGAT +TCGTACCACCGTGCCCAAGGCCAAAGAATTGCGTCGGGTTGTGGAGCCTTTAATCACGCT +GGCAAAGAGCGATAGCGTGGCGAATCGTCGTTTGGCGTTCAGCCGTTTGCGTGACGATGC +CATCGTGGCTAAGCTGTTCACCGACCTCGGCCCGCGCTACCGCGAACGTCCGGGCGGCTA +CCTGCGCATCCTGAAGGCGGGTTTCCGGCCGGGCGACAATGCGCCCGTGGCAATCGTCCA +ACTGGTGGAAGAGCAAGAAACCACATCCGCTGCCACCTGAGCGCACCATGCGTAAGGTGC +GGGTAGCCATTGTCGGGGCCGGTACTGCCGGACTTACCGCTCTGGCCCAGGTGAGGCGCA +GAACCGACGAATTCGTCATCGTCAATGATGGGCCCTATGGCACGACCTGCGCCCGTGTGG +GGTGCATGCCTTCCAAGGCGCTGATACACATCGCGAACGATTTTCATCGCCGTCGCCGCT +TTGCGGAAGTCGGCATCGCGGAGGGCGAGACGCTGCGGATCGATCTGTCCAAGGCCCTCG +CCTGGGTACGTGCTTACCGTGATTCACGAACAGCCGACAGCATCAAGCTTACCGACCCGC +TGGGCGAGCGCAACATACCCGGACGGGCCGAGCTGCTCTCCGCGCATGAACTGCACATCC +GGCGCGCCGATGGTGGAGAGGAACGCATCGCAGCCGATGCGGTGATTCTTGCCCCTGGTT +CGACGCCGGTGATTCCGAAAAGCTGGGACGGTTTTTCCGCGCGCATATTGACTACCGATA +CGCTGTTCGAGCAGCGCGATCTGCCGCGGCGCATGGCCGTTCTCGGGCTCGGCGCCATCG +GCCTCGAAATGGGGCAGGCGCTCGCACGGCTCGGCTTGCAGGTACACGGCTTCGAGCTGC +GAGATCGCCTTGGAGCATTGACCGACCCGCAACTCATCGCGCCGGCAATCGAGCACTTCT +CCCGCGAATTCGATCTGCATCTTGGTGCGCCTGCCGAGTTGCACCCGACTGGCGAATATT +GGCGGGTCGAAACCGCGGACGCACAGGTGGAGGTTGATGCGGTACTGGCCGCTTTCGGGC +GCCGCCCGCGTCTCGACGGCCTGGGTCTCGAACGTCTCGGCGTTCCGCTGGACGCAAAGG +GTCTGCCACCGGTTGATCCCCATACCCAGAGAGTAGCGGACCTGCCGATTTTTCTGGCGG +GCGATGCCAACGCGCGCAGCCCGATCATGCACGAAGCCTCGGATGACGGCTATATTGCCG +CAGTGAACGCCCTGGACGGGCCGACGCCTCTGAATCGGCGGGTGCCACTGGTCATGGCGT +TTACCGATCCCGAAATGGCTATCGTGGGCGCATCGTTCGAATCCTTGCCCGCCGGCAGTT +TCGATGCTGCGGGATACGATTTTTCCCGCCAGGGCAGGGCGATAGCCATGCGCCATGCCG +AAGGGCGGCTGCGCGTGTATGCCGAGCGTAACAGCGGACGATTGCTGGGCGCGGAGATCT +TCGCGCCGGAAGGCGAGCATCTTGCCCATCTACTCGCGCTGGCCCTCGATCGCGGGCTGA +ATGTGGCGGAACTGCTGCGCATGCCCATCTACCATCCGGTCCTCGAAGAAGGCCTGCGCT +CCGCCCTGCGTGCCTTGGCGCGCAGGGTTTACGACCAGCCTCCCCAGGAGTTTCGTCGCT +TGCCTGAGGGGTGCCCAGGCATGTCTTCATCCTCTCCATGAATCCCTACGAACGCTACCT +TCTGCCCTGGCTCATCGATGCAGTCTGCGCCTTGCCGGCAGCGGCCCGCGAGCGCGCCAA +GATCGTGCCCCGCGCCCGGGGCGAGGTGCTGGAAATAGGCATCGGCACTGGGCACAACCT +GCCCTATTACGCCCCACGCCGGGTTGCGGGGGTAACCGGCATCGATCCCGGCGTCCTGCG +TCGCAGGATCATGCGCCGTGCCCACGCCGCTGGTATCGAAGTCAAACTGCTATCACTTTC +CGCGGAAAGCATCCCGGCCGAAGACGCTAGCTTCGACACGCTGGTCAGCACCTTCACCCT +GTGTTCCATTCCGGATGTCGAGCGCGCGCTGGCGGAAATGCGCCGCGTACTCAAACCCAC +TGGCCGCTTGCTTTACCTCGAGCATGGCACCGCGCCCGATCCGCGTGTCCGTCGCTGGCA +GGATCGGCTCACGCCGTGGTGGAAACCGCTGGCCGGAGGCTGCCATCTCAACCGCGATAT +CCCGCGGCTGATCACCGGGGCCGGATTCGACATCGTAGAACAACACAGCGAGTATATACG +TGGCCCCCGTATCCTGAGCTATGTATTTCGGGGCGAAGCACAGCCGATCGCTGTGGCAGG +TAGTAAATAAGGTCTACCGCCGGAATCGAGGTATGACTGTAATCTACTACAGCACTGCGA +ATGCCACTAACGCTATAAGCTTCGCCGCCGACGAACCGCAGAGCGGTTTGGCGATCAGCT +GGAGCGTCTTGTTAGGGAGGTTCTCAAAAAATGATGATATTAGCTCCCGGTTGTCTCGAT +ACTGTTGATAATCGGAAAGGTCCTTGAGTATGAATTTCGGCATCCTCTCGTGCCTGTGCG +ACTTCTCAAAGATAGAAGCAGGGAAAAAGAAGTAGTCTGGATCAGATGCCTTGTTGTTGC +TCGTTACCGCATCAAGATTCAACCTTACGAACACCACGAAATCACAATCGAAGCGTCTGA +TCAGGAATCCTCCAGCTTCCCGTGTCCATCTACTCTTGACTTGAATCCTGGCAGAGCCGT +ATAAATCTGTTGGAACGTTTCTAATACGCTGTTATTCTCAGCTGTTTTCCTTGTTTGGAG +GTTGAGGCGGCCCAAAATTTTCGGATGCGGTTTCGCGTGCAGAGATCATTTCAGGGCATC +CACAGCCAGGCGATGACGACGACGGAGATGACGCCCACGATGAGGTTCAACGGCAGGCCG +AAGCGCACGAAATCGCTTAAGCGGTAACCGCCGGGACCATACACCATGAGGTTGGTCTGG +TAGGCGATCGGTGTGGTGAAGCTGGCCGAGGCGGCCATGGCAATGGCGATGGCATAAGGC +AGGAAGGGTTGGCCGAGTTTTGTGGCTGCCGCGTAGGCGAGCGGGAATACGAGTACGGCT +GCGGCGTTGTTGCCAATGAAGGCGGTGAGGATCGTGGTGAGGGCATAGATAACGACCAGC +ATGCCTAGCGGAGAGCCTCAAGTGGCTAGCTTCGTATTCTAAAGGCAAGGTTTCGCGCGG +GTGGCAAAGTGTTCGCGGCCATTATCGAGTTGGTTGAAGGCAAGGAAACTGAAACCGCTT +AAAGTGGGCGGAGAATTCCGACACATAAAAAACCGCCTTGCGGCGGTTTTTTTACTTTAA +GTTTCAGCGTGGATTAAACGCCTTGCTTAGAATTTGAGTTTTGCGGGAGTCAGCTGGCGC +TTTTTTCAAGCCGTTTTTCCAGGTAGATGGGCAACACGGTGAGCATGCGTAGACCCTTTT +GCACTTCCTTGTCGCGCAGCAGATGTAACATGCCAATATAGCCACCGAGTTTGGGTTCCG +GTTCCTGGCCCACTTTCTGCATGGCGATTGAGACACTCTTGAGGAATATCAAGAATGTAC +CAAGACCACCCGCCGTGCTTGACTTGATGAGAACGTCAATCTGATCATCCAAGTCCGTGC +CTTCTTCCAGGCCGGCGACAGTGTCGGCAAGATCGCCGAGTCGTGCAAGCAGACCGCTGC +GATAGATACGCCCCAACTGAACGAGTGCTTCAGCGACAGCTTCGTCTGTGTCGTTGCGCT +GCATGAACTCCGTCACTTCGACGGCTTTAGTGGTCAGTTCGCCCGCCAGTTTTTCTTGAA +TGAAGGTGGTGACCAGTCGCGCCTTGCGTAGCAGTGCGAGTGCAAATTTGGCATCGGCCT +TGAATTCATCGGCTGGCAATTCAGCGATGTGTGCAAGCCACTGGTCAAGCATTGGCGTCA +ATGTGTTGAGGTTATCGGCAATGAACTGCGCATTGTCGCGCAGCAAGTCGAGTAGGCCGG +CGCGATCAAGCGCGGATAGTGTGCCGACAAGTTTTTCAGCCAGCGCAGTTAGATCGTACT +GGCCGTCGAGTGCGCCGATGCGATCAAGTGCGGCGCTGGCAGGGCCGGACAGTGGTCCAT +CAACCAGTGCGCCAAGACGATTACCGAGGTCGCCCAAGCGGGCGAGTCCAGCCCATTGCT +CGTCTGTGAGCGGGGTAGCGTTCCCCGATAGTGTTTTTGTTTCCATGGCATTCTCCTTGT +CTAATCGAGAGAGTTCAGGCGTCCGGCTGCGAGTTGCACAGCCGGACGTTCCCGATGCCA +TCAGGAGGCTATCTTTTCGGCCATCAGCTCGGAGAAGTCGAGGCCCCAGTCGGGCATTTT +GCCCTGCGTACGGAAGAAAAGGTTCTTGTACTGCATTTTCAACAGGAATGGTGTGTGGCC +CATTTTGAGGACCTGTGTATCGCCGCCGAACCAGCTATTGGAACGGATATAAAAGGCTTG +GTTGTCGCCCATGTCGCCAATGCAGTAAACCACCGGTTGCAACGGTGTGTTGGCGGCGGC +CTCGTTCATGCGGCCGACTGCACAGGCAATCTGCCGGCCGACGATATCGCATTCCTGATG +ACCGATGGCTCCGAGCTTGGGCATGGTGACCGCAGCGGCGTCCCCGGCAGCGAAAACTTC +CGGATATTTCGGGTTTTTCATCAACAGATCGGTGATAACGAAGCCTTCGCTGTCACTGAT +CGGCAGACCGCGCATGAAGTCATGGGCTACCCAATCGGGGAACAGAATCTTCAGTTCAGC +CTCAATGGTCTGACCATTGGCCAGCTCCACACCTTCGGCAGTGACGCGGGTTATGTCCTT +GGCGTTGTTGACGTAGTTGAAGCCCATACCCGAGGCGATGTCGAGCAATTGGCCGACCAC +CTTCTCACCAGCGTCCTCGGCGATCAGTTCGGCAGGGGTGAATACGGTGATCTTTTCGGG +GCCGCCCTGGCCGGTTTTCTTCAGGTACGTGGCGGCGGAGAGCATGACCTCGACCGGTGG +CCCTTCGCAGGCAGCCAGCGCATCGGGAATGCTGCCACCGGGGTAGGGTTCGAGGCCTTT +GGCTCCGTCACCCTGGTGAAACATGGCCGAACCGACCGCGATCGGACCACCCTTGTAGCC +GCCTTCCCACAGTTTTTTGCGCAGCTTGTTACCAAGGTAGATGTCGCTGACGCTGTCACC +AAACTCATCGAAACCTTCGATCTTGTCAAAGGCCAGGCGATTGCCGAGCGCGACTACCAG +ATAGTCGTAGGCGATTTTCTGTGGTGCGGCGCCGGGACGCTCGCTGGGTGTGTAGTGAAC +TATGCGGTTGTCTACATCCAGCTCGGTCACTTCGCCCTGAACGAAGTAAATATCGTCTTT +GACTAGCACCGGGGGGAGATCCAGGCGCTGTCCGACGGCAGGGTCTTTGTTCTCGAACAC +GTCGGCCGGGATGTTGGGCACGAACAACAGGTAATTCTTGCGGTCAATCACGGTAATATC +AACGGCGTCACCCGCAAATTCGCGAATTTTCTGCGCGCTTCCCAGTCCGGCGAAATTGCC +GCCCAGTACGAGTACATGCGGTTTGGCGTTCATGTTGTTGCCTCCTTGAGATTATTGAAT +AGCGCCCGGCGGAGCAGATCCGCGCCAGACGGATCGAATGCAGTATTTCAGATGCTTGCG +CCAGAGCCCTTGTTCCATCATCGGTGCGAATATCTGATGATACGGGCCGTGGTCGAGATG +CGCAAGTACACCGTTGCAGTGGATCGGATGTTTGTGACGTGCAACCCTGAGGCGGAGTGC +GCCGGCTTTATCCAGGCAGGGCAGAATGCGTCGATTTTCTCGGAATTGACAGCTATGATC +GGATCTCCGATCCGACATTGTTATCTCCTTGCTTCGTTGCGAATTGATGAACGAATGCGT +TTTGGCAGATATTGGTTCAGACAACGAAAATCGGCTTTAGTTCAATACGATTGCGTATCC +GGAGGGTTAGCCGTGTCTTTACGGACGGTCTTAAACGTTTGTCATTACAGATCCATTGTG +GCGGATCTTCTTCTCATACGGCGTAATGTCTACTGACCGGGATCTTCGGACACTGACACA +TCAGCAGAGTGGGGAACTCGCTGCGACAACGTCGGATCGCCTGATACCTCGGGACGACTT +CCCGGCAAAGTACGTTGCCGCTGCACGAAAAAGCGCTGTTCCTTCTGAGCCAGGGCTGGC +TCGCGCTTTATCCGCGCAGCCCCCGGCCACGGGCCACCTCGTGCTCTTGAAGACCCTGCC +GCCGCGCGTTCCGAGTGGCACGTGAGCAGGTTCTGTCCTAATGCCCGACTTTCGAGCAAT +CCGGGCACGATTGACTCCAGGCTGTACTTTTTCGCTTAGATATACACGCTTCTTTCGCCC +GGTATCGGGCTTCTTTGCAGGGCGGCAGAATGGGGTAGCTCCCCAAAATCTACGGATGCA +GTTTTGCGTGCAGGGATCATTTCAGGGCATCCACAGCCAGGCGATGACGACGACGGAGAT +GACGCCCACGATGAGGTTCAACGGCAGGCCGAAGCGCACGAAATCGCTGAAGCGGTAACC +GCCGGGACCATACACCATGAGGTTGGTCTGGTAGGCGATCGGTGTGGTGAAGCTGGCCGA +GGCGGCCATGGCAATGGCGATGGCATAAGGCAGGAAGGGTTGGCCGAGTTTTGTGGCTGC +CGCGTAGGCGAGCGGGAATACGAGTACGGCTGCGGCGTTGTTGCCAATGAAGGCGGTGAG +GATCGTGGTGAGGGCATAGACAACGACCAGCATGCCTAGCGGAGAGCCTTCGGCCAGTGC +AAGCAGCGGTTGTGCGATCAGGGCGGCTGCCCCGGTGTGGACAAGCGCTGCGGCAATACC +GAAGGCCGCGCCGATTACAAGAATGACTTGCAGCTCGATGGCGCGTCGCGCCGCCGCGAG +GTTGATGCAGCGGGTGACGACCATGGCGACGGCGGCAAAGATGGCGGCGGGGGCGAGTGG +CACGATGCGGGCGGTGACAAGGCCGATGACGGCGGCGAGGATCAGCCAGGCGATCCAGGC +GCGTTCATAGTGCGGTTGCACGCTTTCTTCGAGTGGACGCAGTAGCAGGAATTCACGGCT +GTGCCGGTAGCGACGCTGGAATTCCGAAGGGGCCTCGATCAGTAGTGTGTCGCCAGCAAT +CAGCGTGATGGCACCGATCTTCCCGGTCACGCGGTGGCCTGCGCGGGCGACGGCGATGAC +GACGGCGCCATAGCGCGAACGGAAGCGGCCTTCGCGCACGGTGCGCCCGACCATGGGGTT +TTCGGGTGCGATCACGGCTTCGATCAGCCGCCGGTCTGGGCGTGGCGTGTCGAGCTTGAA +GATCTGGCCGGTGGTGGGAATGAGGCCACGCATCTTTTGCAGGTCGGCTACCGATTCGAC +AATACCGGCAAAGACGAGTTGATCGTTTTCTTCCAATAGCTCTTCTGGTCCCGGGGCAGG +AATGAGGTTGCCACCACGTTCGATCTCCACCAGGTATAAACCGGGCAGGTGACGTAGTCC +GGCCGCTTCGAGTGTTTGTCCGGCCAGTGGGCTGCCTGCCGCCACGCGCATCTCCACGGT +GTATTCGCGAGGATTGGTGAACTCGCCCATTGCGCCCTGGCGGTCGGGCAGCAGTCGGCG +GCCAAAGAGCAGGATGTAGATGAAACCGGCAATAGCCACGGGTAAACCCACTGCGCCGAT +CGTGAAAAAGCCCATACCGGGGCCGTGCGTGGTTGACGTCAGCAGGCCGTTCACGATGAT +AGTGGTGCTGGTGCCGATCAGGGTGCAGGTGCCGCCGAGGATGGCGGCGAAAGACAGAGG +CATCGCCAGGCGTGAGGCGGCGAAGCGGTGACGTTTTCCCCAGTCGAGCACGGCGGGCAG +CAGGGCGGCGACGACCGGTGTGTTGTTGATGAACGCGCTGACGACAGCCGTCGGGAGCAT +GATGCGCAACTGGGCGACCCACAGGCGGCGACTGCGTCCGAACAGCCAGCGCGCCGGGGC +AGTGACGGCGCCGGTATGCCGGAGTCCGGCAGCCACGATGTAGAGCGCGGCAATCGCAAT +AAGGCCGGGGTTGGAAAAGCCGCTGAATACTTTGTCGATGGGCGCGCAGCCGGTGACGGC +CAGGATGGCGAGTCCACCGACAAGTGGCAGGTCCGGGGGCATGCGCGGCCAGGCCAGACC +GAGTACCACGGCACCCAGCACGACCAGGGTCAGGATGGCTTCCCAGCTCACGTGGTCGTT +TCCTTTTGCAGTTTGTGGGGTTTGAGCAGAGGCATGAGGTAGTGGCCGGTGTGGGAGTGC +TCCAGCCGGGCGATGTCCTCGGGGCTGCCTTCGCCGATGATTTGGCCTCCGCCGTGACCA +CCTTCGGGGCCGAGATCAATGATCCAGTCGGCGGTCTTGATGACGTCCAGGTTGTGCTCA +ATCACGATCACGGTGTTGCCATGGGACCGAAGACGCAGCAGAACGGCGAGAAGTTGGCGG +ATGTCATGGAAGTGCAGGCCGGTCGTGGGTTCGTCGAGGATATAGAGTACACGCCCCGTG +TCGCGTCGACTCAGTTCCCGGGCGAGTTTGATACGCTGAGCTTCGCCGCCCGAAAGGGTG +GTGGCGCTTTGGCCCAGTTTGACGTATCCGAGTCCGACCGTGACCAATGTGTCCAGGCGA +TGATGGATGCTGGGGATGTTGGCGAAGAATTCCCTGGCTTGCTCGGTGGTCATGTTTAGA +ACTTCTTCGATTGTGCGGCCTTTGTAATGGATTTGTAGCGTTTCCCGGTTGTAACGGGAG +CCGTGGCACAAGTCGCAGGGCACGTAGATGTCGGGTAGGAAATGCATTTCCACTTTGACC +AGACCGTCGCCCTGACAGGCTTCGCAGCGGCCTCCACGAACGTTGAACGAGAAACGGCCG +GGTTTGTAACCGCGTGCCCGGGCTTCGGGCGTTTGTGCAAACAGTTCGCGAATGGCGGTG +AACACGCCAGTGTAGGTGGCTGGATTCGAGCGTGGCGTTCGACCAATGGGGCTTTGGTCG +ATGGCGATGACCTTGTCGAGCGCGTCCAACCCTTCGATGGATGCATGGGGCGCCGGTTCG +GTGCGCGCGCGGTTGAGCAGCCGCTCGCCGGCGGCCTGCAGAGTGTCGAGTACCAATGTG +GATTTACCCGAACCGGAGACGCCGGTCACGCAGGTGAACAGGCCGAGCGGAATGCTGACC +TCGATGTGCTTGAGGTTGTTGCCATGCGCATCGCGGATGATGATGGCTTGACCCGGTTGG +GGATGGCGGCGAGCCTGCGGTACAGGGATCGTGCGGCGACCGGACAGATAGTCGGCGGTC +AGCGAATTCGGGTTGTTCATGACTTCTTCAGGCGTGCCGGTTGCTACCACTTCGCCGCCA +TGAGCACCAGCGCCAGGTCCCATGTCGATCAGATAATCGGCGCTGCGGATCGCATCCTCG +TCGTGTTCGACCACGATTACGCTGTTGTCGAGGTCGCGTAACCGCGTCAGGGTATCGATC +AGACGCCGGTTATCGCGCTGGTGCAACCCAATGGAGGGTTCGTCCAGTACGTACATCACG +CCGGTCAGGCCTGAACCGATCTGGCTTGCCAAGCGGATGCGTTGAGCTTCACCGCCGGAC +AGGGTTTCGGTACTGCGGGCGAGAGTCAGGTAGCCCAGTCCGACATCCACCAGAAAGCCC +AGGCGTGAATGCAGTTCGTGCTGGATGCGTTTGGCAATTTCGCCGCGTCGTCCGGGCAGA +TCGAGGTCGTTGAAGAAACGCCAGACCGCATCTATGGCCAGGTTGCTGATATCCGGCAGC +GTGGTATCCGCAATGAAAACATTGCGGGCAATTTCGCCGAGTCGCGTACCATTGCAGGCA +GTGCAGGCGCGGCTTGACTGGTAGCGCACCAGTTCATCGCGCACGGCCTGTGAGCCGGTT +TCGTGGTAGCGGCGTTCAAGAATATTGAGTACCCCTTCGAAGGTGTGACGGCGTTGTTGT +TTTTGTCCGCGTGCCGATACATAGGTGAAGTCGATTTCTTCTTCGCCGCTACCGTAAAGC +AGTACGCGGTGTGTGCGTTCAGACAGTTCGGACCACGGCGCTTCGGTGTCGAATTCGTAA +TGCCGGGCCAGACTTTCGATCAAGCGGTAATAATAGGGGTTGCGGCGGTCCCAGCCGCGG +ATGGCGCCGCCTGCCAGCGAGAGCGAAGGCTGGGTGATGATGCGCCCCGGGTCGAAATAA +CTTTCTCGCCCCAAACCGGCGCATTGCGGGCAGGCACCGGCAGGGTTGTTGAATGAGAAT +AATCGTGGTTCCAGTTCCGGGATGCTCCAGCCGCACTGTGGGCAGGCATGGCGGGCAGAG +AAGACCATTTCGGACGTGTGGGGCTTGTCGATATCAGCGATCAGTGCACTGCCGTCCGCA +AGCGCCAGTGCCGTTTCGAAAGATTCGGCGAGCCGGGCCGCCATGTCGGAGCGGATGCGG +AAACGGTCAACCACAATTTCGATGTCGTGGGGCTGGTGCCCATCGAGCCGGGGCAGCGGG +TCAAGTTCGTAGACGGTGCCGTCAATACGCGCCCGGATGAAGCCGCGGGATTTGAGATCT +TCCAGTAGTTCCTGATATTGCCCTTTGCGACCGTGCACCACGGGGGCAAGCAACATGACC +CGGCGTTCGGATGGATCGGCGAGCACCGTGTCCACCATTTGTGAAACGGTTTGCGCATCC +AGGGTGAGACCGTGATGTGGGCAGCGGGGCGTGCCCGCGCGCGCAAAGAGCAGGCGCAGG +TGGTCGTGAATCTCGGTGACCGTGCCGATAGTTGAACGCGGATTGTGTGAACTGGCTTTC +TGCTCGATGGCAATGGCGGGTGACAACCCTTCTATCGAGTCCACATCCGGCTTTTCCATG +AGCGCCAGGAATTGACGTGCATATGCGGAAAGAGATTCGACATAACGGCGCTGTCCTTCG +GCGTACAAAGTGTCGAAAGCCAGCGAACTTTTACCCGATCCGGACAAGCCTGTAATCACG +GTAAGACTGCCTCGCGGAAGCTCTACATTGATGTTTTTCAGATTGTGTGTGCGGGCGCCG +CGTATGCGGATTGTGTCCATCAGGCAGCGTTGCTCAGGCAAATGTTTTACTATACGGCGG +CGTGCAGGGCTTTGCAAAACACCGATCAGCCCTGTAGCTCAAAGCCAAAAAGGAGAGTAA +GCATGGCTGGATCAGGCAAGCTTAGAAAGGATGCGGGCATTATCGGTCTGCTGTATTTTA +GCCTGGGTGGCATCATCGGTTCGGGATGGCTTTTCGGTCCCTTCGATGCAGCCAAGGCCG +CGGGTCCCTGGAGTATCGCCTCCTGGATTATTGGTGCCGCCGTGGTGATGTTGCTGGCAC +TGGTGTTCGCGGAACTGGCGACAATGATGCCCAAGAGCGGGGCATTGATTCATATCAGTC +ATATCGGGCATGGCGAATTGATAGGGCGCATCTGGTCCTGGATTCTGTTTTTATCCAGTG +TGGTGACACCGCCGATCGAAGTTATGGCCGTATTGACTTACCTTAACAACAAGATTCCCT +ATTTTGTCGATCCCTCGACCCATGTGCTCTCTACGATAGGTTTTTTCGCAGCCATCGTAT +TGTTGGGTGTGGTGGTGGTCGTGAACTTCTTCGCGATCCGTTTCGTATTGTGGATCAACA +ATATCGCGACCTGGTGGAAAATGTTCATTCCGGCAATTAGCATTATAGTGCTGATGAGCT +ATTCCTTCCATCCGGGCAATTTCCATCTTGATCTGGGCTCGGTAAATGCCGCAGGGATGC +TTACGGCCGTGTCCACTGCGGGTATCGTTTTCAGCTTTCTGGGTTTTCGCCTCGCCATCA +ATCTGGGCGGAGAAACGAAGAATCCGGGCAAATACATTCCGATCGCTGTCATCGGTTCGG +TGCTGATTGCGACCCTGATCTACGTGGGCCTGGAGGTGACGACCATCACCTCAGTGCGCC +CTTCGGATTTCGCCAATGGATGGCCGTCGCTGGCTTTCAAGGGTGACGCCGGTCCCTTCG +CAGCTCTCGCCGTGACCATCGGCGCGGTGTGGTGGTCCTGGGTTCTATACGCTGATGCCA +TCGTTTCACCGTTTGGCACGGGACTCATTTATACCACCAATACCTCTCGGCTGGGTTACG +CCATGGCCGAGGTGGGCAGTGCGCCGAAGAAGATGCAGAAGCTGAGTCGTCAAGGGGTAC +CGTGGATTTCGTTGCTGGTGACCTATGTGATTGCCTGCATATTCTTTTTCCCGTTCCCGT +CCTGGCACCAGTTGGTTGGCTATGTCTCCGACATCACGGTGTTGTCATACGGCATTGGTC +CGGTGGTGCTGTTGATCATGCGCAAGCGCCGCCCGGAGGAGCCTCGTCCGTTCCGGCTCA +AGGGTGCCAAGGTTATTGCGCCGATGGCATTTATTGCTTCCAACTGGGTGATTTTCTGGA +CTGGCTGCACGACGGTGACCTTTCTGTTCGGGTTACTGGGTTCGCTGTTCGCGGTTTATG +CAATCTGGTATTACATCATTGCCCGAAAGCCGTCAAAAGAGTTTGGTTGGAAGTATGCCT +GGTGGGTGTTTCCCTATTTCATCGGGATGTGGTTGTTGTCTTATATCGGACCTTCCACTT +TGGGACCGGCGCATGTCAGTCTGTTCAATGTCCAACCGCTTGACATCTTGCCGCTGGGCT +GGGACATGATTGCCGTGGCGGTATTCAGTCTGGCTGTGCTCTATACTGCCACGTCCAGTG +CTTTGCCACGCGAAGAGGCGGACCGGTACTTCGATGAGCTTAAAAAGCTTAATTTGCCGG +AAGAGTACTCCGAAGGCACGGAATCGCCTTGATTCTGCGTGTGTCCGGGCGTGTGGCCGA +TAGCTTTGGCCACACGCCCGGAGTGTATCTGTGGGTCTGGGAAGCCCGGTATGAAAGCGA +TGTACGACTTTTCAGTGAATAATCATAAGCATTTTTAGCAGCGTGATACCCCCGAACAGA +TGGTTTTGAGCTACAGTTGGAGTTCGGGCGACCGCACACTCTGATTCTGACCTGAGCGTG +CCAAGGGGGAGCGGTGCAAGCAACCGATTGGTTACACCAGATCGCGGGGCCGGCGCTGGT +GCTGAACTTTGCCGATGGTACGGTTATTGATATCAATGCCGCAGGCCGACGGCTTGTGGG +CATTGAAGGCCAGGGATTGATTGGGCAGGATTTTTGTGGGTTTTTTGTGAGCAGCGACGC +AGATTGTTGCTGGCCAACTTTACAACGCTCCATCAATCTTCAGGGCGGCTTCCGTTACGA +AGGATTGCATTTGCGTACGCCGGGCGGCGCCATGCGACGGGTCAATGTCAGCGCGGAATT +GCTGCAATCCGAACAAGAACGCGCTGTCCTCATGTTGTTGCAACCGGGCACTGCGTCATC +GCCCCAGGCTACTGATCACGAAAAAGAGTTGGCTCAGTATGCAACCGTGGGGCTTTATCG +TCTGGATGCGGAAGGGCGACTCACCCACGCGAATCATGCATTGGCCCGGTTGCTGGGCTA +CGAGACGGTGGGTCAGTTGCTTGATTCCGCAGCGGTTCAACGCAGCCAGTGGTACGTCTG +CGATGGGGTTTCCGAGGAGCGGGTCAGCGATGTGAACGACGCTGCAATTTACCGATGTAA +AGTGCAACTGCGGCGTGCCCATGGAGCAGCGTTTCGGGCGGTTGAGGCGATACGTGAAAT +ACGTGATGTCCGGGGGCAGTTGATGAGTCGGATTGGCACGCTGCGGGAGATTTCCGATCA +ATCGTCTTCCGAACAGGCACTTGCCATTTCGGAAGATAAATATCGCAGTCTGGTCGAGCA +CAGCCAGGACGGAGTTTTTGTTATTCGCGACGGCGTCTATGTCTTCGTCAGCCAGGTTTA +TTCGTCCATGCTGGATTATGCGCCGGAAGAAATGGTGGGCGAGTCATTCCTGCGTTTTTT +TGCACCGGAAGACCGCCAGAAGATTGTCGATGTCTGGCATGAACGTCAGGCGGGGCACTG +GGAGCAGGGAGCCTACGAGGCGCATCTGCTCAAGAAGGATGGTACACGGGTGCTGGTATC +CGTGCGGGCAGGCCCGATTCGTTTTGCAGGTGCGATGGCCAGTACCGGTACCGTGCGTGA +CATTACCGCATACCGCGATACTCAGCAACAGTTGAGCATGGCCGAGCAGCGCTACCGCGA +CATATTCGAACACGCGGTGATTGGGATTTATCAAACCGCGCCCGATGGCAGGTTGTTGGC +GGCCAATCCGGCGATGGCACAGATTCTGGGTTACGACTCTGTCGAGGAACTGCAGGAGCA +GGTGGATGATGTCGGAGAACTGTTTTTTGATCGGATTGAGCGCGATACGCTGATTGAAAA +ACTTGAAGCCGAGGGCCGCATGTATGGGGCCGAATTGCGGCTACGTCATCGCAAGGGGAC +GCAACTATGGGTTCAGGACAGTGCGCGCGTTGTATACGACGCAAACGGCAAGCTGGTTTG +CTATGAAGGTATGGTTGCCGACATTACGGCACGGAAAATCGTCGAGCAGGCGCTGCACCG +TTCGGAACAATTGTTCCGGACACTGGTGGAGCACACACATGTCGGCGTGATCATGGTGCG +CGAGGGCGTCGTGACTTACGCCAATCGCGCGCTGGCTCACATGCTGGATTATGCGGAGTC +CGATTTGTTGGAACAACCGCTGGCGTCTTTGTTCGCCCCGGAGTCCCGCGACTGCGTAGA +ACGGCTGGAGCAGGAGCTGAAGACAGCCGTCGGATCCAATATTTACGAATCCTCGATGTT +GGCAGCCGATGGAACGCGACGGGTGCGAGCCAACCTGAGCGTAGCGTCGGTCATCTTTGA +AGATAATCCGGTGATGATCATCACGGCGCATGATTTGACCCGTGAAAAACGGGCCGAAGC +CCGTTTGCGGCGTCTGGCTACCCACGATCCATTGACGAATCTGCCCAATCGCGTGGTGCT +GCGGGAACGGTTGGCTCAAGTGCTGAAAAAAACGCGTGAGACCGGTAATGTCGATTGGGC +GGTGCTATTTCTCGACCTGGATGCCTTCAAACTGGTGAATGACAGCCTGGGACACGCCGC +CGGCGACGAGTTGCTGAGGCAGGTGGCCGTGCGTCTGCGTCGGGCTGTGCGTCACGACGA +TCTGGTCTGCCACCACGGCGGCGATGAATTCGTGGTGCTGGCCTTCAACATTACGCACGA +AATCGATGCAGTGGAACTGGCTGAGCACATTGAAACTGCAATGGCCGAACCGTTCCGGAT +CAGTGACCATGAAATTTACAATCAGATCACGATCGGTATTGCTCTGGGGCGACAGGAATA +TGAATTGCCCGAAGAGGTGCTGCGCGATGCCGATTCCGCTGTTGCTGCGGGTAAGCGCCT +GGGAAAAGTCTGCCATGTGGTGTTCAGTTCCAGCATGCATGTTGCGGCGATGGAGCGTCT +GGAGCTGGAAACCACATTGCGTGCTGGCCTGACGCGAGGCGAGTTCGATTGCTACTACCA +ACCCATCTTCAACGTCAAAAACAATCGCATCGAGAGTTTAGAGGCTTTGCTGCGGTGGCA +TCATCCGGAACAGGGCGTGTTGCGACCGCACTCATTCTTGCAGGTGGCAGAGGAAAGCGG +TGCCATCGTGCCGCTGGGTTGGATCGGTTTGCGGCGCGCGCTTGCGGCGTGCAGTCAATG +GCAAAGCCTGGGCTTGGAAAGAGAGGTCAGCGTCGCGGTCAATTTGTCCGATGCCCAGTT +TCGCTTGCCGCAACTGCCGGAGCAGTTGGCTCATGAGCTGGAACAGGCTCAATTGCCGTT +CCATCTGTTGCATCTGGAAGTAACCGAGCGGGTGTTTCTGGAAACGCCGGGTCTGGCACG +GCGTACATTGGGTCGATTGCATGCTTTGGGGGTCAAGTTATACCTGGACGATTTTGGCAC +CGGTTATTCGGCGTTGTCCTACCTGCGTGAACTGCCGTTTGATGCCTTGAAGGATAGATC +GTAGCTTCATTGCCGATTTGCCCCGGGATGCGCGTACGGTCGCCATCGTGAGAAATATCA +TTTCATTGGCCAATGACCTGGAGCTCTTGGTCGTTGCCGAAGGGGTGGAAAAGCCCGAAC +AGGCGCGCATACTCGCTGATATGGGGTGCTATCGTTTTCAGGGGTATTACTACGGTGAGC +CCATGGATGGCGAGACCAGTTGCAAACACCTCGATAACCGGAGGAGGCAACAAGTCGCTC +AGCATGCCGTTGATTGAGCCGGCTGCCAGCGGATTCGGCCTCCCCGGTTGCCGCAACGGG +AGCAGTCGGCGACAATATACGTCCTACTTCAGGAATCAGTCCCATGCCAACACGCCGCGA +ACTCGCCAATGCCATTCGGGCATTGGCCATGGATGCCGTACAAAAAGCCAATTCTGGACA +TCCGGGCATGCCGATGGGGATGGCCGATATAGCCGAAGTCCTGTACAACGATTTTCTGCG +CCATAATCCAGCCAACCCGCACTGGCCGGGGCGTGATCGGTTCCTGCTCTCCAACGGCCA +TGGCTGCATGTTGCAGTACGCAGCGCTGCATCTGTCGGGTTACGACCTGTCGATGGATGA +GATCCGCAATTTTCGCCAGTTGCACTCGAAAACCCCGGGTCATCCCGAATACGGTCATAC +GCCCGGCGTCGAAGTTACCACTGGCCCGCTCGGACAGGGCGTCGCCAACGGTGTGGGCCT +GGCGTTGGCCGAGGCGTTGCTCGCCGCACAGTTTAATCGCCCAGGGCACAAAGTCATTGA +CCATCACACGTATGTGTTCTGCGGCGACGGCTGTCTGATGGAAGGCATTTCCCACGAAGC +GGCCTCCATCGCCGGTACGCTGGGTCTCGGCAAGCTCGTGATGGTCTATGACGACAACGG +CATTTCCATCGATGGCGAGGTCAAGGGTTGGTTCCATGACGACACGCAAAAGCGCTTCGA +GGCATACGGCTGGCATACGATCGGGCCGATCGACGGTCACGATGCCGAGGCTCTCAAAAA +AGCCTTCGCCGAGGCTCAGATCGAAACGAAGCGACCGAGCCTGATTCTGGCGCGCACCAT +CATCGGTTTTGGCGCGCCCGACAAGCAGGGCACGGCAGAAGCGCACGGTAGTGCGCTCGG +CGATGCAGAAGTGGCCAAGGCCCGCAAGGAGTTGGGATGGAAATTCCCACCGTTCGAAAT +TCCCGAAAGCATTTACGCCGGCTGGGATGCGCGCGCTCGTGGCGAGCAGGCCGAAACCGA +ATGGCACGAACGCTTTGCCGCTTACGCCAAGGCGCACCCTCAATTGGCAAAAGAATTGAA +GCGTCGGCTGGCAGGTGAATTGCCGGCGGATTGGGCCACGACGGTAGAGCAGCATATCGC +GCATGTCGCCAGGAATGGCAAGGCACAGGCGACGCGCAAAGCCTCGGGCGCTACACTTGC +GGCGCTGGCGCCGACGTTGCCGGAAATTGTCGGCGGATCGGCGGATTTAACGCCGTCCAA +CGACACTTGCTGGCCGGAAGCGAAAGCCGTCAAGCCCGGCACGCCGGAAGGCAATTATCT +GCATTGGGGTGTGCGCGAATTCGCGATGACCGCGATTCTCAATGGCATGGCTGTCCATGG +CGGGTTTGTTCCTTATGGCGGTACGTTTCTGACTTTTTCCGATTACGCACGCAATGCAGT +GCGTTTAGCGGCCTTGGCGCATTACCCGACCATCCTTGTTTATACCCACGACTCGATCGG +TCTGGGCGAAGACGGCCCTACACATCAACCCGTGGAGCATGTGGCGAGCCTGCGGGCCAT +GCCGAACCTGACTTTGTGGCGCCCGGCTGACGACGTGGAAACCGCCGTTGCGTGGCGCGA +TGCGATCGAACGTCGCGACGGGCCGACCATGCTTGTGCTGACTCGCCAGTCGGTGCCGCA +TTACGAGCGCAAGGCACAGCAGATCGAGGCCATACATCGAGGTGGCTACATCCTGCACGA +ACCCCAGAACGCCCCGCGGGCGCTGATTATCGCGACCGGTTCCGAGGTGGATCTGGCGAT +GCAGGCTGCGAGGGTATTGACCGAAGAAAATCTGCCGGTGCGGGTCGTGTCGATGCCCTG +CCAGGAATTGTTCCTGGCACAGGATGTCGATTGGCAGGAACACGTATTGCCGGCACAGGT +GACGGCTCGTGTAGCGGTTGAAGCAGGGGTGTCCATGCCTTGGTATCGTTTCGTCGGCAT +CCATGGCCGGGTTGTCGCGATGGAGCGCTTTGGGGAATCGGCCCCGGCCAAACAGTTGTT +TGAGGAATTCAGTTTTACGGCAGAGCGTGTTGCGGCCGCAGTGCGTGAAGCGGTGGCCGC +TGCAGCCGGTTAACGGTGAGGTTGCGGGCAGAATGCCCGGTCTGTTTTGGTAATGGTTTC +TGAGGAGCAGAAAATGATCAAGGTGGGTATCAACGGCTATGGCCGAATTGGACGCAACGT +TATGCGCGCGTTATACGAAAGTGGACGTCGTGATCAGTTGCAGGTAGTTGCGATCAATGA +TTTGGGCGATGCACAAACGAATGCCCATTTGACCCGTTACGACTCGGTCCATGGCCGTTT +CCCGGGTGATGTACAGGTTGAACAGGGTCATCTTGTTCTCAATGGCGATGTGATTCAGGT +ATTGGCCGAACGCGACCCGTCCAAATTGCCTTGGGGGAAGCTGGGCGTGGATCTGGTGCT +GGAATGCACTGGTCTGTTTACTTCGCGCGAGAAAGCCAGTCTTCATTTGCAGGGCGGCGC +GAAAAAGGTGCTGTTGTCTGCGCCAGCCAAGGATGATGTGGATGCCACCATTGTTTATGG +TGTGAACCATAAGACGCTTGAACCTGAAAAGCACGTGATCGTTTCCAATGCGTCCTGCAC +CACCAACTGCCTGGCACCAATCGCGCAGGTCATGCATGAGCTGGCCGGTATCGAGGGCGG +TATCATGAATACGGTGCATGCATTCACCAACGATCAGAATTTGTTGGATGTCTATCACAA +GGATCTGCGTCGTGCGCGTGCCGCCACGGCGAGCATGATTCCGACATCGACCGGTGCTGC +CAAGGCGATCGGATTGGTGCTGCCCGAACTCGACGGCAAACTCGACGGCTTTGCCATTCG +AGTGCCGACCCAGAACGTGTCCTTTGTGGATCTGACACTCAACCTGACTCGTGAAGTGAA +GGTCGAAGATATCAACCGCGCCATGCGCGAAGCGGCGGATGGACGGTTAAAAGGCGTACT +GGCTTACAACGAGGTTCCGCTGGTTTCAATTGACTTCAACCACAATTCGCATTCGTCGAC +TTATGATGCCGGTTTTACCAAGGTCAAAGGCCGACTGGTGAAAGTCTGTTCGTGGTACGA +CAATGAGTGGGGCTTCTCCAACCGCATGCTCGACACGGCCGCGGTCATGTTTGGTCGCGG +CTGACGCATGGGAGTCAAATCACTCAAGGATTGCGAGCTGCAAAACCGGCGTGTGTTGAT +GCGGGTGGACTTCAATGTGCCCGTCAACGACGGAGCCATTGCGGATGACACACGTATTCG +GGCCGCGCTGCCCAGTATTCACGAGGCATTGAAGGCTGGGGCGCGCCTGATGCTCATGTC +ACATTTTGGTCGTCCTGAAGAAGGCAAACCGGAATCGCGCTTTTCGTTACATCCGGTCGC +CCGGCGTCTGGGTGAGTTGCTGGGATTCGATGTGCCACTGGTGACCGATTATCTGGCGCG +GGACCCGGAACCCGGATCGGGACGTGCCGTACTGTTGGAAAATGTCCGTTTCAATGTCGG +CGAAAAACGCAATGAAGAAACACTCGCCCGACGCTATGCGAACTTGTGCGATGTCTTCGT +GATGGATGCCTTTGGCAGCGCGCATCGTGCCCAGGCTTCCACGTATGGTGTGGCGCGATT +TGCGCCACAGGCCGTCGCGGGGGAGCTGCTCTGCGCCGAGCTCAAGGCACTGGGTCGTGC +GCTAAAGGCGCCCGATCGACCACTGATAGCTATCGTTGGCGGCTCCAAGGTTTCAGACAA +AATAGGGGTGCTCGATGCATTGATCGAGCGATGTGATGGGCTTGTCGTTGGCGGTGGTAT +TGCCAATACTTTTCTGGCTGCGGCAGGGCATCCCGTGGGCAGATCGCTTTACGAACCCGG +TTTTGTGGACGAAGCGAAACGTCTGATGATAGCGGCCCGTGAACGAGGGGTGAATTTCCC +GTTGCCGGTCGATGCCGTGGTGGCTGAAGCCCTGGCCGAAGATGCCGAGGCCGACGTCAA +GCCGGTGCATGCGGTGGGTGCAGGCGATATGGTTCTCGATATCGGCCCGGAAACCGCCGT +GCTGTATCGTCCTTTGCTGGCGAATGCGGCCACCATCGTGTGGAATGGTCCGGTGGGGGT +GTTCGAAATTGACCAGTTTGCCGAGGGTACCCGCGCGGTGGCCGAAGCGGTGGCTTCCAG +TGGGGCTTTTTCGATTATTGGCGGTGGCGACACCATCGCGGCGCTGGCCAAGTTCGGCGT +GACCGACCGGGTGAGTTATATTTCCACCGGTGGCGGGGCGTTCCTGGAATTTCTCGAAGG +CAAGACCCTGCCCGCCGTGGACATTCTGGAGGCGCGTGCGAGTGACTGAGACAATGGCGC +GGTTTCGGCGTACCAAGATCGTTGCCACGCTCGGACCGGCGGTGGATGAAGGGGACGTGC +TGGCCCGTATGATTGCTGCTGGTGTGGACGTTGTGCGCCTCAACCTTTCGCACGGCACGC +ACGCCGAGCATCGTAAGCGCGTCAAAGCGGTTCGCAAGGCTGCGGCCGAGCAAGGCCGTG +ACGTGGGTGTGTTGATCGACCTGCAGGGACCCAAGATCCGGATCGAATGTTTCCGTGATG +GGCCGATCGAGCTGAAGGAAGATGATGCCTTCACGCTGGATTGTGGCCTGGGTAGTAACG +CAGGCGACAGCAAACGAGTGGGTGTCGCCTACAAGAATCTGCCCCGCGACGTGCAGGCTG +GCGACATGCTGGTGCTGGCCGATGGGGAAATTGTTCTTGAGGTCCGTGAAGTGATCGGCG +AGCAGGTGCATTGCCGGGTCGAGACGGGTGGTGCCCTGTCGGATCACAAGGGGCTGAATC +GCAGAGGTGGAGGACTCAGTGCCGAAGCGCTGACGCAGAAAGATCAGGTGGATATCCAAC +TTGCGGCCGAGTTGGAAGCGGATTTTCTGGCGATCTCGTTTCCCCGTGTTGCTGCGGATG +TGGAACGGGCCCGCGCTTTGCTGCGTGCGGCGGGTGGGACGGCGGCCATCGTGGCCAAAA +TTGAACGTGCCGAGGCGGTCGAGAATCTGGATGAGATCATCGATGCCAGTGAGGTCGTGA +TGATTGCCCGCGGTGATTTGGCCGTCGAGATTGGCGATGCACCGCTGCCGGGGGTGCAAA +AGCGCATTGTTCGCCATGCTCGCGCACGCAACACGGTCGTGATTACTGCAACTCAGATGA +TGGAATCGATGGTGACCAGCCCGACGCCGACCCGCGCCGAGGTACTGGATGTTGCCAATG +CGGTGCTCGACGGTACCGATGCGGTGATGCTCTCGGAAGAAACCGCTGTGGGCAGGCATC +CGGTCAAGGTTGTCGAGGCCATGGCCCGGGTGTGCCTGGGGGCCGAAGCCGAACCACGTG +AAGACCGTGATCGACGAATTGGGGGTGATCGTTTCGAGCTTGTGGACGAGGCCATTTCCA +TGGCGGCCATGTCTGTCTCGCAGCATACCGATGTGACGGCTCTGGTTGCTTTAACGGAAT +CGGGCCGTACACCGCTGTATATGTCACGGGTACGTTCGGGCATACCGATCTACGCCCTGA +CGCGTCACGAATGCACCCGCCGTCTTTTAACCTTGTATCGCGGCGTGTATCCCATTGCTT +TCGAAGACGAGCATGAATCCGACGAAGTCTTGCCGGATGTGGCGGCGGCGTTACTGGAGC +GTGGTTTGGTCACACCACACGCGCTTATCATTGTTACCCGGGGCAAGCTACGGCATGTCT +CGGGTGGCACCAACAGCTTGCATCTGGTGCAGGTAGCTGATGTCCTGCCTGAGCAGGTTT +TATAATGAACGGTCATTGGGCCAGGGAAAAGAGGCGCATGCGAGTAATAAACAGGCTTTT +GGGACTTTCCGCAGTGGTGATGGTGCTGGTCTTGGTCAGTCCCACCGCATCAGCCAGTAT +TGCGAGCGATGCGGTGGCGTACCCACCGGTTTCGTTAAGCAATGTGCCTCCGGCCAAGGC +CGAGGAGATTCGCAAGGGTGAATACCTCACCAAATTATCCGATTGCATGGCCTGTCATAC +TGACCACGGTAATGGCAAAGCAGGGAAACCGTTCTCAGGCGGCTTGGCTATCAAGACGCC +TTTCGGCAATATCTACAGCCCGAATATTACCCCCGACAAGAAAACCGGCATCGGCAACTG +GACGTTCAAGCAGTTTGACGATGCGGTGCGTTATGGCGAGGGTCCGAATGGTTATCTGTT +CGCCGCCATGCCGTACAACTACTACAGCATGATGAACAAGGATCAGGTCCATGCCATCTG +GGAATATCTCAAGCATGTGCCGGCTGTAAACCGTAGAAACAAGCCACTTGGCATGCCGCC +GCCATTCCGCTGGCGTTGGCTGCAATTCGGCTGGCGATTCATGTTCGTCAAGCCCACACA +AGGTGAATTCAAATACGATCCCAAGCATTCCAAGGCATGGAATCGCGGACGTTTTATCGT +TGAAGGCCCGGAACATTGCGGTGCTTGCCATACGCCGCATAACATGCTGGGCGGTTCCGA +GAAACGTTTTTTTCTGGGTGGTTCGGATATCACCGGGTTCTGGGCTCCGAATATCAGCGG +TCTGGCGACCAAGCCGCACCCCATCGCAACCATCATGCGGGTCTTTCGTGAAGGCAAAGG +CCTGGGAGGGGGCGACCTGAAGGGGCCGATGATTGATGCCATAGCCAACAGCATGCGGTA +TATGACTCCTGCGGACATGCGAGCTGTGGCGGTTTATATCCAGAGCGTGCAGAGTGAAGT +GCCGCCGGGACCGCGGCCAGTGGCCATGGATGAGGTGAACCTTGCCCGGGGTGAAAAAAC +TTATCAGACCGATTGTGCGGCCTGTCACGCAACGGGCATTGGAGGGGCGCCGCGTGTGGG +TGTGGCCAAAGACTGGGATGCGCTTGGCAAAAGCCCGTTGTTCATATTGTTCGAAAACGT +GTGGCATGGCGTGAGCATCATGCCGCCCAAGGGCGGTTGCAAAGCATGTACACGAGACGA +TGTCACATCGGCCATCGTGTACATGCTCAAGCGGAGCACCTCCCGGTCAAGCAAACCGGC +AGTGCAGGCCACGACTTCGAAAAGCGGCATTCCACGGGATACGGTCAGCCTGGCGGTTGG +CGACAAGATTTATCACGCACACTGTGCCGCTTGCCATGCCAGCGGAGCTGCTGGGGCACC +GCGTCATGGGGACATAAAGGAGTGGGCAAGTCGGCTCAAACTGGGGCTGGACAAGTTGCA +CCACAATGCACTCGATGGCATCGGCATGATGCCGCCCAAGGGTGGATGCACCAGCTGTAG +CAAGGATCAAATCCTCTCTGCGGTTGATTATCTGGTCGACGGCAGCGGTGGCAAGGCGCT +GGTGGAGAAATCGCTCAGCGGCAAGCAGGGAGGTTGAGATGAGCGGGGATAGCATACTCA +GGATTTTCTGGTGGTTGGTGCTCGGTGCCTGGATGTCCGGCATCGGTGTGATGCTGGGCC +GTGAATTGGGCTTGACCGTTTTACTGCGATACCTGGGGCGTAACGAATCCGAACGCCGCG +AATTGCTGGCACCACATATCGAACGTCCCAGCGAGGGGCATCAGGTGTGGCTGCTGCTCG +GCGGAGGCGCTCTGATGGCGGCCTGGTGGCCCCTGTTTACTGCAACGCTGTTTGGTGGAC +TCTGGCTGGTACTGCTCTTCATGGTACTGGCCGTGCTGGTGGGCCCTGTTGGGCATGGTT +ACCGGAAACGCCTGAGCGAGCATACACGTGGGCCATGGGACCTTTTGTGGGCAGGGATTT +CTCTGGCGGCATTGCTGGTGTTTGGGCTTGCCATTGGTGCGACTGTCAGTGGTGTGCCGC +TGCACTTTGACGCACACATGGATGCGATGTGGGGCGGGTTCTTTTCCCGTTTTACGCCCT +ATAGCCTGTTGGTACCGGGACTGATGGCCATCACTTTCGGCTTGTGGCTTGCTGCGGCAC +GCGCGGCGCATGAATGTACGGGAGCCGTGGCTGCTCGGGCACGCGCGCTGTTGTTGCCGG +TGGGGGGGGTGACATTGCTGATTTTTGCCGGTGGCGCGGCTTGGGCCACGCAGTTGCCGG +GTTATGCCGTAGGCGGTTTGCCGAAAGTCGGAGCGAGTCCGCTTGATGGCACCACCTTCG +CAGTGGGGGGGGCCTATCTCGAACGTTTCCTGTCGCATCTGCCGTTGGTGATTGTGCCGG +TATTGACCGCGCTGGCCATAGTGGGTGCCCTGTTTTTCAGTTGGCGAGGGCGACTGCAAC +GAGTGGGGCCATTGGTTGTCATCGCTGTAGTTGGCATGGTCGCGACACTGGGTGCCATGA +CCTACCCCGTGATTCTGCCTTCGTTCGCTGAACCGGCCCAAAGTTTGACGCTCTGGAATG +CAGCGGCCGAGCGTCCTGTGCTGGTGGCTTTCCTGGTGTGGTTGGGTATCTTGGTTCCTG +TCGTCCTGGGTTACGAACTCTGGCTTCGTCGCCGGAATGCACAGACGGTAGTGGCCGGGA +GTACGGCGCGCTGAATCAAGTGCGGCCATGAACGCGACCGCGCCATTGCAGGAGCAGGAG +CCAGAGCCGGGTAACCGGCGCTTTCCTGGTCTTTTGATGGGTGGCGCACGGCAGGTCGCC +AGATGGTCCTGGCAACATCGCCGCTATGGTCGCTGGCCGCTGCGTATCTTGCTGGTGCTT +CTGCTGGTGCTCATCATTCTGGTGGGCGCAGGCTACGGTTTGTTGCGCGGTAGCCTGCCG +CAAACCGAAGGCACGGTGCGTCTACCCGGTCTGGGTGGACGGGTCGTAGTGACGCGAGAT +GCCCAGGGCGTGCCTACTATTCGCGCACACAATGCGCTCGATGCCTGGCGGGTGCTGGGG +TATCTCGAAGCGCAGGACCGTTTCACGCAGATGGATTTCATGCGCCGAGTCGCGGCAGGT +GATCTGGCGGCCCTGGTTGGGCCGGCGGCATTGCCGTTGGATCGTATTCATGCGCGGTTT +GATCTGCGCGCGCGTGCCGAACGCATCTATCTGGATGCGCCATCGGTCGAGCGGGCTCGC +CTTGAGGCTTACACCCTGGGCGTGAACGAGGGGCTCGACAATCTGTCTGTACGTCCCTGG +GCGTACCTGTTGCTGGGTGAGCGACCGCGTGCCTGGGAGCCCGCCGATTCTGTACTGGTG +ATCTACGCCATGGGCTGGATGCTCCAGAATCCACTCGGCCCCAGGATGCGCGCACGTGCG +GCATTGCGTAGCCTCTATCCACCAGCAGTGACGGCCTTTCTGGGAGCGCCGGATACGCAC +TGGGCGGCGCCCATGGCAGGGCAACCGCCAGCTTTGCCTCCGGTGCCCGGCACACAGCTC +ATCAATTTGTCAGCATCGGGAAAATCCAGGGCAACGGCACCTGTGCCGAGCACGGCGATG +TACGCAGACACGGTAGCCAAACTGATTTTGCCGCAACCTTTTCCGGGTTCGAACAGCTTT +GCCGTAAGTGGAGATTTGACTGGAACCGGTCACGCCTTGTTGGCCAACGACCCGCATCTG +AGTTTGCGTGTGCCGGCAACCTGGTATCGAGCCAGACTCATCTATCCCGCGCCCGGCGCT +ACGGCCAGTCAACCGGTTGAACTGACGGGCGTGTTTTTACCCGGCGTACCTGCACTGGTT +ATTGGCACCAATGGCCACATTGCCTGGGGGCTGACCAACAGTGGCGGTGACTGGACTGCG +CTCGTGCGCGTCAAAGCGACGGCGGCCGGGTCCCGAGGCGGGCCGCTGGTGTATGCCACG +CCGAGCGGTACGGCGACGTTGGCGATTCAGCATGTGCTATTGAAGGTGCGCGGTCAGACG +GCTCGTCCCATGTCGATTCGACGCACGATATGGGGTCCGGTTATCGGTACGACCGCAGAT +GGGGCGTTGCTGGTTTCGCACTGGGCACTGGCGCAACCGGGCGGAGTGAATCTCCGCTTC +ATGCAACTCGACAGCCAGACGACGGTTAAACAGGCGTTGATGGTGGCCGGCAGTGCCGGT +ATTCCGGTACAGAATTTTCTGGTTGCCGATGATCAAGGGCACATTGGCTGGACGCTGGCA +GGACGTATTCCCGTGCGCAAGGCCGGATGCGATTACGCAGTGCCGCAATCCTGGGCTGAT +GGCAGCTGCGGGTGGACCGGCTGGTTGGCGCCCGGCAGTTATCCGAGCATTGTGGATCCC +GCGCAGGGCTATCTTGCGACCGCGAACAATCGAGTTGATGCACGCACTGCGGCGGTGCTT +GCGTTAGGCGACGAAAATTTTGCCGATGGGGCCCGAGCGCATCAGATTGTTTCAGACCTG +AAGGCGTTGGCAAAACGGGGCAAAATCACGGCCAATGACCTTCATGACGTGCAATTGGAT +GACCGCGCGCAATTTCTGCAACGTTGGCATGATTTGTTGTTGAATGTGTTGTCGCCATCG +GCGCTCGAATTCCATCCGCATCGGCAAGCGTTACGCGAAGCGGTGGTGAATTGGGGCGCA +CGTGCCGCTGTCGATTCCGTGGGCTATCGTATGGTGCGCGCGTTCCGTAATGAAGTGGCT +GCTTCGATGTTCATGCCGATATTGAAACGGTTGCATACCCGCGATCCGGGTGCGGGTCTG +CCTTTTTCCAACCAACTCGAAGGGCCGTTGTGGCGTCTGCTTCAGGTGCGTCCGCATAAC +TGGCTGAACCCGGCATATCCCACCTGGAATGCCTTGCTGGTGCATGCGGCTGACGCAGTG +ATCCATCGGTTCTGGAATCCGGTCAGCGGTCTTGCCGATGCGACATGGGGGGCACGCAAT +ACCGTGCGTATAAATCAGCCGTTGGCGGTGGCGCTGGGGCCGTTGGGGCATTGGCTGGAC +ATGCCGCCGACGCAACTGCCCGGCGATAGCAACATGCCGCGTGTACAGACGCCGGATTTC +GGAGCTTCCATGCGCATGGTGGTGTCACCACAACCGAGTGCTCCCGGTTTGTTCGAACTG +CCGGGTGGCGAAAGTGGACACCCGTTATCGCCCTGGTACAGCGATGAATTCAAGGCGTGG +GCCGAGGGGCTGCTGACACCGCTTGCGCCGGGACCAGCGCGAAAGACCTTGCGTTTTATT +CCATGGAGTCGGAGGGTGTCTGACCGGCCGACTGTGTCAACAGGCTCCGTGGTCCCAGCG +CAGTCTGGGCAGTGATTTCAAGCTCGCGCCTCTCCAGCCATTCGAGCAGCGGCACCCAAG +CCATGCGTTCGCCGCCGATACGTCGGCGTGGCAATTCGTGCAAACCCAGGCCACTGAGGA +TTGCGTGGCTATAAGCCTGCAGATCATGCAGTGTGGCAATGAGCGGGATGTCGAGTGCGG +AGATGAATTTTTGCAGGCGCTCGGCGCCGAGTGTGCCACGGCGAACGCGGTTGGCGATTA +CGGCCATGCGGCCTGAATGACGGCCAAGCTTGGCAATCAATAATAGATCGGCCACTGTGC +GTGCGGCCGCATCGCTGTCCATGGTGCCGGGCAATACCGGGATCAACACGGCCGCTGCGC +CGCGTACAGTATCGGCCAGTGCGATTCCGCTCAGTCCTGCCGGCGTATCTACAACCAGGC +GTTCCGTGTCCCGTGGAATGCGTAGTGCGTAACTCAGGGTTACGCCCGAGGTTTGATTGG +GCAGACTCAGGGTGTGAATGGACGGAAGCGATTCAGGACGGCGTCGCAACCAGCTTGTGG +CTGCCCGTTGCGGGTCGAGGTCCAGCAGGCAGGTTTTATACCCTTCGCTTGCCCAGAAGG +CCGCAAGCGTTGTCGAGACGGTGGTCTTCCCGCTGCCGCCTTTCAAGTTGGCGATGACGA +TCTTTTTCATCATCGATTCCGATGAATTGTCCAGGTTAGTTTACTGCCTTGCCGCATGTA +CTGTGCGATCATCTGTTTGCATATGTGTTGAAGTACACTAAACCGATGTCAATAACAGCC +TTAATTCTGCAAGGTGGTGGGGCGCGCGCGGCTTATCAGGCCGGAGTGCTGGCGGGTATA +GCCGAGTGGTACGGCGAGGATGAGGCCGTACCTTTCCGGGTTATTTCGGGAACGTCGGCG +GGTGCCATGAATGCCGCCTATCTGTCGGCCAACATGGAAAATTTTGCGCACGGCACACAG +CGTTTGGCCCAAGTGTGGTCGCAACTGGAAGCGCAGCAGGTTTATCGACCAGAATACCGC +AAGGTGTTTGGTGCCTTATTGCATTGGGCCTGGTCGCTACTGTCCGGCGGGTTGGGGGAT +TCCAATCCGCGTTCCTTGCTGGATAATTCGCCGCTACGCGCCTTGCTGGCCGAGAATATA +GATTTTGACGCTATTGCGCGCAATATCGAGCGTGGCCTCCTGCGCGGTGTGTCGGTGACA +GTGGCGGGATACAGCACAGAACGTTCACTGAGTTATTTTCAGGCTGAAACCGGAGTACAG +TCCTGGTGGCGGCAACGACGGGAAGGACGCCCGGTGCAAATGACGCTCGACCATGTCATG +GCTTCACTGGGACTGCCCATAATTTTCCCTGCCGTGAAAGTTGCCGGCGAGTGGTGCGGG +GATGGTTCTACCCGAGAGTTCGCTCCGTTATCGCCAGCAATTCACCTGGGAGCAAAGCGG +GTGCTGGTGATTGATACACAGTATCCTGCGCCACAGCATGTGCTAGGTCAGGACCAGGCA +TATCCTTCATTGTCAAAAATAATGGGCTATCTGTTCGATTCCGTGTTTTCGGATAGCCTG +TATGCCGATCTGGAACGAACCAAACGCATTAATCGCACGTTGGATTACATCAAGAGGCAG +AGTGGGCACGAGCCCCCTGAGTTGGGTTTGAGCCACATCGACACTTTGGTGATTGCACCA +TCGCGTCGTCCATTGGAGATTGCCAGCCGCTATGAATCGCATTTGCCCAAATCCATGCGT +TGGATTCTGCGTTCTCTGGGCGGTGATGTGAGTAGTGGAGATCAATTGCTCAGTTACATG +CTTTTCCAGAGCGGCTATTGCAGCGAAATGGTGGCCTTGGGTCGGCACGATGCGCATGCT +CGTCGTGAGGAGATTGGCCAGTTTCTCGGCTTGTCGAAAATCAAGGTACGCTGAATGACG +TTTTAGTCGTCGGCAAGGATCGGTTTGACGTCTCGTGTCGGCGTATCCAGGTCACGATGG +CGAACGGTCACGGCCCAGCCGCCGTGCGCGAGCTGTGCGGCCAATTGTTCCGCCATGTAA +ACCGAGCGGTGGCGCCCGCCGGTGCAGCCGATGGCAATGGTGAGATAGCTGCGGTTGCTG +GCGGCATAGCTGGGCAGCCAGGCGTTCAGGAAGTTCACCAGATTGCTCAGCATACGAGTG +ACGGTGCCATGCGTTTCCAGGTATTCCGCGATGGGCTGGTCAAGCCCCGTCAATTCGCGC +AGGGTGGGTTCCCAATAAGGGTTAGGCAGGCAACGCATGTCGAAAACCAGATCGGCATCT +TGAGCCAGCCCTCGACGGTAACCGAATGATTCGATTTGCAATGCCATGGGATGATCGCCG +CCTTCGTTCACGCGTGCCCGGATGATTTCGCGTAGCTGATGGATATTGGTATGCGTGGTG +TCGATACAGGCGTCAGCCAATTGGGCTAAAGGTTCCAATAGCGTGGTTTCAATCTCGATT +GCCGCAGCCAGGTCCGTGTCGGGGCCTGTCAATGGATGACGTCGCCGCGTGGCCCGATAG +CGTTTGACCAGGGTGCCGGGTTCGCAAAACAAGTAGATTACGGTACAGCCGTGAGGGCGC +GTCCGCCAGATTTCTATCTGGCGGGCCAAGGCTTCGAATTCTTCCGGTGTGTTATGTGGG +GCAATACCGATCGCAAGCCGTGAATGGTGTCGGGCGTCATCGCCCAGCATGGCGTCAATC +GCAGGTTCGATCAGCGACAGAGGCAAATTGTCGACAATCTGATAGCCGAGATCCTCGAGC +ATATCGAGTGCGCCGGTTTTACCGGCACCTGAGAGTCCACCCAGCAAAATGAGATTCATT +CGGGTGAGTTTAGCGTATGAGCGTACGGGGTGCCGGTTGGCCTCGTTCCGACACCTTGCG +ACGGTGCCGGAACGAGGTTGGGTGAGGAAAGGAATGGATGAGTTCAGTTTTCCATATTCA +GCCGCGCGGTTGGGGTGGCGTGATGGCTGGCATGCCGTTCCTTGTGGCGCACGAGTTGCC +GGTCAAGCCTGTCAGCCAGTATATCGATGGCAGCATACATGTCTGTTTCGTTGGCCTCTG +CGTAAACGGTTCCTCCACGTACTTGCAGAATGCCTTCCGCCTTGTGATCAAGGTTTTCGA +CGCTGAGGATGATCTGCGTCGAAGTGGGGGGTTCGTGGTGTCGTCCGAGTCGCGACAGCT +TGGTATTGACGTAATCCTTCAAGGCTGGTGTGACATCCAGATGGCGGCCGGTGATCGTGT +TTTGCATAACGTTACCCCTTGCTGATAGCTACGGAATGAGCCGGACGTTTGCGCGCCCGA +GTGGATGCGAAACCGAGGGCTTCGCGGTATTTGGCGACGGTGCGTCGTGCAATCGGCATG +TTGTCCCTGGCCAGTGCCGCTGCCAGTGCAGCATCGCTCAGAGGAGCTGCAGGGTCTTCA +TGGTTCAACAGATGGCGCAGACGTTCATGCACGGCACGGGCTGATATGGCTTCGTCATTA +TCATTAGACAAGGCGTTGGAAAATAAGTGCCGCAAAGTAATGACCCCGCGAGGGGTGGAC +ATGGCTTTCCCCTGTACGGCTCGGGATATGGTGGACTCATGCATGTCGAGTTCTTTTGCC +ACTTCGCGCAACGTTAATGGCGCCAGATGCATGACCCCCTGATCAAGAAAGGCTGTCTGG +CGCCTGACCAGCACACGTGCAACACGCAGCAGGGTGTTTTCGCGTTGAGCAAGGCTGCGT +ATCAGCCAGCGGGCTTCTTCCAGTTGCCTGGTCAGGGATTGGGATGCCTCGTTGAGCCGG +TTTGCGCTGAGCCAGGCGACATAGGTTGCGTTTATCGATATGCGGGGGTGATTGCCCGGA +TAGAGTTCTACTTGCCAACCATCGGGTGTGCGGTAGGTGCGTAATTCCGGGATCAGGTAT +TCAGCTGTTTGCGCGGAATAGTCGTTTCCTGGCCTCGGATTCAGGCTCTGTATCAGGCTC +AACGCTCTGCGGACGCTGTCTTCATCGCTACAGGTCAGGCTTGCCAGCGTGGCCATATCT +GCTTGGGTCAGGCGGGCGCAATGGCCGTCTATCAGTTCACGGGCGACAACCAGTCCTGCC +GTATCCGGCGGCAAAGCTTCCAGTTGCAGGGACAAGCACTCGGCGGTATCGCGGGCGCCG +ATGCCGATCGGGTCAAGGCGTTGGACACGGTGCAGTATGGCCTCGATTTCAATCAGTTCG +GGGCTGGGATCGAGGGCATCAAGTTCCTGACTCACAGTGTCGAGATCTTCGCGCAGATAT +CCGTTGTCATCCAGAGAGTCGACCAGGGCCAGTGCGATCAGGAAGTCGCGATCCGAGAAT +CGCTCCAGAACGAGTTGTTCGATCAGGTGTTGGCGCAAATCGGGTGGACGAATATCTTCG +AAAGTCTGCATGTCGGGGCGCTCCCCCGTGCCTGCACTGCTCGACCAGGACAACTCATCC +CACTCGCCAGCAGGTTCGGTCTGCTCCATGACGGTGGCAACCTCAGGATCGCCTTCTCCA +GTTTCAGGCTCAAGCCGTTCGAGCATGACGTTTTCTTCCAACGCCGTTTCCAGTATCTGT +TCCAGATCCGGGCGCGGTAATGCCAGCAAGCGCAGGGCCTCGGTTTGGCGGGGTGAAAGG +CCCAGGCGCTGGGATATCCCGGTTTGTAGACGCGGACTGGAATTCATGAGTGGAAAATCG +TCATTCCCATGTCCCTAGTTATAGCGTAAATTTGTCGCCCAGGTAGATTTCCCTTACGGT +TTGGTTGTCGGCGATTTCTTTCGGACTGCCTTCGGCCAGCATGGTTCCACGGTTCATGAC +GTAGGCCCGGTTACAGATATCGAGTGTTTCACGCACGTTGTGTTCGGTGATCAAGACGCC +GATGCCGCGTTTGGCCAGATGATCGATGATGCGCTTGATATCGGCTACCGATACCGGATC +GACGCCTGCAAAAGGTTCGTCCAGCAGCATGAAGGCAGGTTCATTGGCCAGCGCCCGGGC +GATTTCCACCCGTCGTCTTTCTCCGCCCGACAGGCTGATGCCCTTGTCCTTGGCCAGGTG +CGTCACGCCGAGTTCATCGAGCAACTCATCGGCGCGACGTTGTTGCTCCGCGCGGGTGCC +GTGCCCACCCAATTCCAGCACGGCAATCAGATTGTCACGCACGCTCAGGTTGCGGAAGAT +CGAAGCTTCCTGAGGCAGGTAGCCGAGTCCGGCGCGAGCGCGGGCATGTATGGGCAGGCC +GGTTATATCGCGCTGATCCAGGTAAATATTGCCGCTGTCTGTGGGCACCAGGCCCAGGAT +CATGTAGAAAGTCGTGGTCTTGCCGGCACCGTTGGGGCCAAGCAGGCCGATGATCTCGCC +GGAATTGACGTTGAGGTTCAGCCCGCGCACGACCTCGCGGGTGCGATAGCGTTTGTGCAG +GTTCTCGGCGCGTAGTTGGCTCATGGTGAGGTACGCGCGGCCGGGCTCAGTACGGCGTGT +ACACGGCCGTTGCCATTGCCACTGGCTTCGAGCTGGCGGGTGGCGATGCGGTACGTGATG +TGCGCGGCGGAAAAATGTTCGCCCGGGCGCGAAAAATGCACCTGTCCGTCGAGCTGCAGG +ATATCGGTTTTTCCGTTGTATGTGATGGAATCGGCTTCGCCGAGTACGTGCGGTTTTCCG +GGTTCGCGCAATTCAAACCGGGCGGGTGTACCGGTTACAACGACCTTGCCAATGGTATTG +GATCGACCATGAATGACAGCGGAATGCCCGTACAGGCGCAGTGCGCCACGTGTAACGATC +ACATGTCCCGAATAGGTCACCTCGGTGCCTTGTCCGGTGGTGTTGGCCATGGATGAGTGA +TCGGCATTGATGGTCATCACGCCAGACACCGAAGACGAGGCGTTGGCCGACCCCGGAGCG +GGCGTGCCCAGTGCGGGAAAGGCGCAACAAGCCAGGAGGGCGGTGCAGCAGGCGAACAAC +TTAGGGTGCATATTGACTGGTAACATCCTTGAGCAAAGACAGCGTACCGGTTTGCAGGTT +CGCGGCCAAGCCCGTGCCTCGTGTTTCCCGGTGTCCCTGGCGCAGGATCACGCGGTAGCG +GCTGGCGAGGCGCGCCTCGGGCAGCAGCACGGTGACTTCGGTGGTCAGGAAATGCACGGG +TATGGGTGTATCGAGTTCATGCGCCCGCACATGACCAATCAGATTCAGCCGGGTACCGTT +TGCGAACAGGCGGCCGTTATCGGATTGCAAATGCCAGGGTTTGCCGTGAACGGGAAAGTA +GTCGAGATCGACGCGGGTGAGCAGGGCAATATTGTTGCGTGGCATGTGTGCGATACGGCG +TGCTTTGATGATGTAGCGCCGGGAGCCGTCCTTGGCGAAGCGGGTAACTATGGCTTGATG +GAGCAGGTAGTCGGGGCGCTCTACGGCATGGGATGCCAAGTCGGTCGCGGGGGAATGGTC +ATGACTGCGCAGGAACAGCCAACTTGTCAGGGCCGCAGCACCGAGCAAAGCTGCCCAGGC +GCTGGCGACGGCCCAGCCGCGCGATAGCCTCACGACTTGCCGCCCGCGTTGGCCTGACGG +GCGGCGAGCAAGCGCTCGCAAACCTCCCGTACCGCACCTTGGCCGCCCGTTGCTTGCGTA +CACCAATCGGCCTCATCGCGAACCCGGGCCACGGCATTAGCCGGAGCAATGCGCAAGCCC +GCTATGGCAAAAGCGGGCAAATCGGCTTCATCGTCGCCCATGAAGGCGCAGAATGCGAGT +CTGACGCCAACGGCTTGGGCAGTCTCGGTCAGGCAATGTCCCTTGTCCTTGACGCCCTGA +TGAACCTGGATAATACCCAATTCCCGAGCGCGATGCGTGACGATGGGAGACTGACGCGCG +CTGATCAGAGCCACGGCAATGCCTTCGCGCATCAGCCGTTTGATGCCGGCACCGTCATGG +GCGTTGAAGGCCTTGATTTCGCCAGCCGTTTCCGAATACCAGAGGCGTCCGTCGGTCAGC +ACACCGTCCACATCCAGTGCCAGCAGACGGATAGCGGCCAGTGGCGTACTCATACCACCC +CCGCTTGCAGCAAGTCATGCATGTTGAAGGCACCGACGGGTTTGTCATCCGTGACCACCG +GCAGGCTGTTGATATGGCGGGTTTCCATTAGCCGCAGGGCCTCTGCTGCGAGCATATCGG +GCGTAGCCACGGTACATTCGCGCGTGATCAGATGTTGCATGGGGGTATCGAGATTGACGC +CCTGGTCCAGGTGACGGCGCAAATCGCCATCGGTAAACACCCCCGCGAGCCGGCCTTGCA +CATCGGTGATCATTGCCATGCCGAGTCCTTTGGCGCTCATGGTGACCAATGCGTCGCCCA +GGCGCGTACTTTCGTTAATGATCGGCATGGCGTCATCGGTATGCATGATGTCGGCGACAC +GGACCAGTAGCCGTCGGCCGAGTCGACCGCCGGGATGCGAGCGGGCAAAATCCTCGGGAG +TGAAACCGCGGGCCGCGGATACGGCCAGAGCCAACGCATCGCCCATGGCCAGCGTGGCGC +TGGTGCTTGCCGTGGGGGCGAGGTTCAGGGGGCAAGCCTCTCGCGATACGGAAATATCGA +GGTGGCAGTCGGAGGCGCGCGCGAGCATCGAATCGGGTTCTCCGGTCAGCGCGATCAGTG +CTACGCCCAGGCGTTTGATCAGCGGCACGATGGTGACCACTTCGGGGGTCTGGCCGGAAT +TCGACAAGGCAATCATCACATCCTCAGATGTAATCATGCCCAGGTCGCCATGGCTGGCTT +CCGCAGGATGTACGTAAAAGGCAGGGGTTCCCGTGGAGGCCATGGTGGCCGCGAGTTTGC +GCCCGATGTGCCCGGATTTTCCCATGCCGGTGACCACGACACGCCCGCGGCAGGCCAACA +GGAGTCCACAGGCGGCGGCAAAGGTTTCATTCAGTCGCGACTCCAGGGCGGATACCGCAG +CGGCTTCAATCTGGATGACTTCGCGCCCGGACTCGATCAAACCGGTCGCTGACGCAGGAC +GGTTCACAAACACTTCTCCGATAGTGGTGCGCCCATTTTAGCCGCACGGCGCTCTGGTAA +ACTTGGTGGTCTCAGTCAAGTGGAAATTGCATGGACCGGACAGACTTGCAGCAGCTCCTG +GAACAGGCGTTCCCAAACGCTGAAATCGAGGTCCAGTCCGAGGATGGCGTACATTTTGCG +GCCCGTCTGATCGATGCCGGATTCTCCGGGCAGGGACGCCTGGCCCGCCATCAGACCGTC +TATGCGGCGTTGGGCGCGCGGGTTGGCGGGGAGATTCATGCATTATCCCTGCAGACGTTG +ACCCCCGAAGAAGCCGGAGCCCGCGCTTGAACCGGCTGTTGATACGTGGTGGGGGCCGAT +TGGACGGAGAAATCCGTATCTCCGGTGCCAAGAATGCCACCTTGCCCATTCTTGCCGCGT +CACTGTTGGCCGAAACACCGATCACCATCGGCAACGTTCCGCATCTCCATGACGTGACTA +CCACCGTAACCTTGTTGCGCCGGATGGGTGTGGATGTCACGGTGGGCGAACATATGGCGC +TGGAAGTCGATTCCAACACTATCAAGGACATGGTTGCGCCTTATGAACTCGTGCGTACCA +TGCGCGCCTCCATTCTGGTGCTGGGGCCGTTGCTGGCCCGTTTCGGATGCGCCGAGGTCT +CGTTGCCCGGCGGTTGCGCTATCGGTTCGCGTCCGGTTGAGCTGCACCTGAAGGGTTTGT +CCGCCATGGGGGCCGATATCGATGTGCGTAACGGCTACATCTATGCGAAAGCGAACCGTT +TGCGCGGTGCGCGGATATTCATGGATATGGTCTCGGTGACCGGCACCGAGAACCTGATGA +TGGCTGCGGTGCTGGCCGATGGTGAAACGGTTATCGAGAATGCTGCGCGCGAACCGGAAG +TCGTTGATCTGGCCAATTGTCTGAACAAGATGGGCGCCGAAATCGAGGGTGCCGGCACCG +AGACCTTGCATATCCAGGGCGTGGCCAAGCTCGAAGGGTGTTATTACGAAGTGATGCCTG +ATCGTATCGAAACCGGCACCTATCTGGTGGCGGGCGCCCTGACCGGTGGTCGGGTGCGCG +TCAAACGCACCCGACCCGACATGATGGAGTCAGTACTCGAGAAGCTGCGTGAAGCGGGTG +CCGAAATCACCAGCAAGGATGATTGGATCGAGCTCGACATGAAAGGCCGCCGCCCACAGG +CCGTCACCCTGCGCACGGCACCATATCCCGCGTTCCCCACAGACATGCAGGCCCAGTTCA +CGGCTTTGAACGCAGTGGCCGAAGGCAGTGGTGCAGTTACCGAAACCGTGTTCGAGAATC +GTTTCATGCACGTACAGGAATTGGTACGCATGGGAGCGCGCATCCAACTCGAAGGCAACA +CCGCCATGATTCAGGGGGTAGAACGTTTAACAGGCGCGCCGGTGCTGGCAACCGACCTGC +GTGCATCGGCCAGCCTGGTGCTGGCGGGTTTGGTTGCGGAAGGCGAAACCGTGGTGGACC +GCATCTACCATATTGACCGCGGTTACGAATGTATCGAGGAAAAGCTGGCGCAACTGGGCG +CGCACATCCGCCGCGTTACCTCATGAATATTGAACGTCTTCGATCCCGGTCTACCCCACG +CTTGCCATGGCTAGGCTCTATAATCAAGGCGTTCTTGCCACCGCGGCTCTCATGATCCTT +TACTCACGACCCGATGATCCTGCTGCCCACAGTATCCGTTTGGTACTGGCGGAAAAAGCA +ATCGGCGTCAAAATCGTGGAAGTCGAACCCGACTCGCCACCGGAAGATTTGCTGCACCTG +AATCCTTACGGCACCCTGCCGACACTGGTCTCGCGAGAAGTCGTCCTGTACGATCCGCGC +ATCATTGCCGAATTCATTGATGAGCGTTACCCGCATCCGTCGTTGTTGCCCAGCGATCCG +GTGCTGCGTGCGCGAGCACGTTTGTTTGTCAGCGAGATTGGCGGAAGCTGGTACGAGTTA +TGTGACGAAGTCGCCAACGGTGCCGGTCGCGGACGTACACGCGCCCGACGCGAGCTGACC +GAAGCGGTGGTCTCCAGTGACGAGTTGTTTACCGGCACGGCATATCTGCTGGGCGGAGAC +TATGGACTCGCCGATTGCGTGGCGGCGCCCGTGCTTTGGCGTTTGCCGCATCTGGGTGTG +CGTTTGCCACGCGAAGCGAAAGCCATTCGCGGCTATATGCAGCGGGTATTCAAGCGTCCG +ACCTTTGTGTATGCCCTGGTGGCTTCCGAACGGGCCATGATCGAAAGCTGACCTCTGCCG +CATGACCGAAAAGCCCCGTCCGTCCCGCAAGCCCTACCTTGTTCGCGCCCTCCATGAGTG +GATGGGGGATGCCAGCTTGACGCCACAAATCATTGTTGATGCAACCGTGGACCATGTCGA +TGTTCCTGTCGAGCATGTACATGATGGCAAAATCGTGTTGAACCTGAGCCTGGAAGCGGT +GCGCGATCTCGAACTGGGGAATGACGCAATCACCTGCACGGCCCGGTTTGGCGGTGTGGC +CCGATCATTATGGGTGCCCATGAAGGCTGTGCTGGGGATTTATGCGCGCGAGACCGGCGA +AGGCGTGGCGTTTGCGTGTTCCTAGGCTGTTTCCTGGTGGGATCCTCTAGAGTCGACCTG +CAGGCATGCAAGCTTGAGTATTCTATAGTCTCACCTAAATAGCTTGGCGTAATCATGGTC +ATAGCTGTTTCCTGTGTGAAATTGTTATCCGCTCACAATTCCACACAACATACGAGCCGG +AAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGTT +GCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGG +CCAACGCGAACCCCTTGCGGCCGCCCGGGCCGTCGACCAATTCTCATGTTTGACAGCTTA +TCATCGAATTTCTGCCATTCATCCGCTTATTATCACTTATTCAGGCGTAGCAACCAGGCG +TTTAAGGGCACCAATAACTGCCTTAAAAAAATTACGCCCCGCCCTGCCACTCATCGCAGT +ACTGTTGTAATTCATTAAGCATTCTGCCGACATGGAAGCCATCACAAACGGCATGATGAA +CCTGAATCGCCAGCGGCATCAGCACCTTGTCGCCTTGCGTATAATATTTGCCCATGGTGA +AAACGGGGGCGAAGAAGTTGTCCATATTGGCCACGTTTAAATCAAAACTGGTGAAACTCA +CCCAGGGATTGGCTGAGACGAAAAACATATTCTCAATAAACCCTTTAGGGAAATAGGCCA +GGTTTTCACCGTAACACGCCACATCTTGCGAATATATGTGTAGAAACTGCCGGAAATCGT +CGTGGTATTCACTCCAGAGCGATGAAAACGTTTCAGTTTGCTCATGGAAAACGGTGTAAC +AAGGGTGAACACTATCCCATATCACCAGCTCACCGTCTTTCATTGCCATACGAAATTCCG +GATGAGCATTCATCAGGCGGGCAAGAATGTGAATAAAGGCCGGATAAAACTTGTGCTTAT +TTTTCTTTACGGTCTTTAAAAAGGCCGTAATATCCAGCTGAACGGTCTGGTTATAGGTAC +ATTGAGCAACTGACTGAAATGCCTCAAAATGTTCTTTACGATGCCATTGGGATATATCAA +CGGTGGTATATCCAGTGATTTTTTTCTCCATTTTAGCTTCCTTAGCTCCTGAAAATCTCG +ATAACTCAAAAAATACGCCCGGTAGTGATCTTATTTCATTATGGTGAAAGTTGGAACCTC +TTACGTGCCGATCAACGTCTCATTTTCGCCAAAAGTTGGCCCAGGGCTTCCCGGTATCAA +CAGGGACACCAGGATTTATTTATTCTGCGAAGTGATCTTCCGTCACAGGTATTTATTCGC +GATAAGCTCATGGAGCGGCGTAACCGTCGCACAGGAAGGACAGAGAAAGCGCGGATCTGG +GAAGTGACGGACAGAACGGTCAGGACCTGGATTGGGGAGGCGGTTGCCGCCGCTGCTGCT +GACGGTGTGACGTTCTCTGTTCCGGTCACACCACATACGTTCCGCCATTCCTATGCGATG +CACATGCTGTATGCCGGTATACCGCTGAAAGTTCTGCAAAGCCTGATGGGACATAAGTCC +ATCAGTTCAACGGAAGTCTACACGAAGGTTTTTGCGCTGGATGTGGCTGCCCGGCACCGG +GTGCAGTTTGCGATGCCGGAGTCTGATGCGGTTGCGATGCTGAAACAATTATCCTGAGAA +TAAATGCCTTGGCCTTTATATGGAAATGTGGAACTGAGTGGATATGCTGTTTTTGTCTGT +TAAACAGAGAAGCTGGCTGTTATCCACTGAGAAGCGAACGAAACAGTCGGGAAAATCTCC +CATTATCGTAGAGATCCGCATTATTAATCTCAGGAGCCTGTGTAGCGTTTATAGGAAGTA +GTGTTCTGTCATGATGCCTGCAAGCGGTAACGAAAACGATTTGAATATGCCTTCAGGAAC +AATAGAAATCTTCGTGCGGTGTTACGTTGAAGTGGAGCGGATTATGTCAGCAATGGACAG +AACAACCTAATGAACACAGAACCATGATGTGGTCTGTCCTTTTACAGCCAGTAGTGCTCG +CCGCAGTCGAGCGACAGGGCGAAGCCCTCGGCTGGTTGCCCTCGCCGCTGGGCTGGCGGC +CGTCTATGGCCCTGCAAACGCGCCAGAAACGCCGTCGAAGCCGTGTGCGAGACACCGCGG +CCGGCCGCCGGCGTTGTGGATACCTCGCGGAAAACTTGGCCCTCACTGACAGATGAGGGG +CGGACGTTGACACTTGAGGGGCCGACTCACCCGGCGCGGCGTTGACAGATGAGGGGCAGG +CTCGATTTCGGCCGGCGACGTGGAGCTGGCCAGCCTCGCAAATCGGCGAAAACGCCTGAT +TTTACGCGAGTTTCCCACAGATGATGTGGACAAGCCTGGGGATAAGTGCCCTGCGGTATT +GACACTTGAGGGGCGCGACTACTGACAGATGAGGGGCGCGATCCTTGACACTTGAGGGGC +AGAGTGCTGACAGATGAGGGGCGCACCTATTGACATTTGAGGGGCTGTCCACAGGCAGAA +AATCCAGCATTTGCAAGGGTTTCCGCCCGTTTTTCGGCCACCGCTAACCTGTCTTTTAAC +CTGCTTTTAAACCAATATTTATAAACCTTGTTTTTAACCAGGGCTGCGCCCTGTGCGCGT +GACCGCGCACGCCGAAGGGGGGTGCCCCCCCTTCTCGAACCCTCCCGGTCGAGTGAGCGA +GGAAGCACCAGGGAACAGCACTTATATATTCTGCTTACACACGATGCCTGAAAAAACTTC +CCTTGGGGTTATCCACTTATCCACGGGGATATTTTTATAATTATTTTTTTTATAGTTTTT +AGATCTTCTTTTTTAGAGCGCCTTGTAGGCCTTTATCCATGCTGGTTCTAGAGAAGGTGT +TGTGACAAATTGCCCTTTCAGTGTGACAAATCACCCTCAAATGACAGTCCTGTCTGTGAC +AAATTGCCCTTAACCCTGTGACAAATTGCCCTCAGAAGAAGCTGTTTTTTCACAAAGTTA +TCCCTGCTTATTGACTCTTTTTTATTTAGTGTGACAATCTAAAAACTTGTCACACTTCAC +ATGGATCTGTCATGGCGGAAACAGCGGTTATCAATCACAAGAAACGTAAAAATAGCCCGC +GAATCGTCCAGTCAAACGACCTCACTGAGGCGGCATATAGTCTCTCCCGGGATCAAAAAC +GTATGCTGTATCTGTTCGTTGACCAGATCAGAAAATCTGATGGCACCCTACAGGAACATG +ACGGTATCTGCGAGATCCATGTTGCTAAATATGCTGAAATATTCGGATTGACCTCTGCGG +AAGCCAGTAAGGATATACGGCAGGCATTGAAGAGTTTCGCGGGGAAGGAAGTGGTTTTTT +ATCGCCCTGAAGAGGATGCCGGCGATGAAAAAGGCTATGAATCTTTTCCTTGGTTTATCA +AACGTGCGCACAGTCCATCCAGAGGGCTTTACAGTGTACATATCAACCCATATCTCATTC +CCTTCTTTATCGGGTTACAGAACCGGTTTACGCAGTTTCGGCTTAGTGAAACAAAAGAAA +TCACCAATCCGTATGCCATGCGTTTATACGAATCCCTGTGTCAGTATCGTAAGCCGGATG +GCTCAGGCATCGTCTCTCTGAAAATCGACTGGATCATAGAGCGTTACCAGCTGCCTCAAA +GTTACCAGCGTATGCCTGACTTCCGCCGCCGCTTCCTGCAGGTCTGTGTTAATGAGATCA +ACAGCAGAACTCCAATGCGCCTCTCATACATTGAGAAAAAGAAAGGCCGCCAGACGACTC +ATATCGTATTTTCCTTCCGCGATATCACTTCCATGACGACAGGATAGTCTGAGGGTTATC +TGTCACAGATTTGAGGGTGGTTCGTCACATTTGTTCTGACCTACTGAGGGTAATTTGTCA +CAGTTTTGCTGTTTCCTTCAGCCTGCATGGATTTTCTCATACTTTTTGAACTGTAATTTT +TAAGGAAGCCAAATTTGAGGGCAGTTTGTCACAGTTGATTTCCTTCTCTTTCCCTTCGTC +ATGTGACTTGATATCGGGGGTTAGTTCGTCATCATTGATGAGGGTTGATTATCACAGTTT +ATTACTCTGAATTGGCTATCCGCGTGTGTACCTCTACCTGGAGTTTTTCCCACGGTGGAT +ATTTCTTCTTGCGCTGAGCGTAAGAGCTATCTGACAGAACAGTTCTTCTTTGCTTCCTCG +CCAGTTCGCTCGCTATGCTCGGTTACACGGCTGCGGCGAGCGCTAGTGATAATAAGTGAC +TGAGGTATGTGCTCTTCTTATCTCCTTTTGTAGTGTTGCTCTTATTTTAAACAACTTTGC +GGTTTTTTGATGACTTTGCGATTTTGTTGTTGCTTTGCAGTAAATTGCAAGATTTAATAA +AAAAACGCAAAGCAATGATTAAAGGATGTTCAGAATGAAACTCATGGAAACACTTAACCA +GTGCATAAACGCTGGTCATGAAATGACGAAGGCTATCGCCATTGCACAGTTTAATGATGA +CAGCCCGGAAGCGAGGAAAATAACCCGGCGCTGGAGAATAGGTGAAGCAGCGGATTTAGT +TGGGGTTTCTTCTCAGGCTATCAGAGATGCCGAGAAAGCAGGGCGACTACCGCACCCGGA +TATGGAAATTCGAGGACGGGTTGAGCAACGTGTTGGTTATACAATTGAACAAATTAATCA +TATGCGTGATGTGTTTGGTACGCGATTGCGACGTGCTGAAGACGTATTTCCACCGGTGAT +CGGGGTTGCTGCCCATAAAGGTGGCGTTTACAAAACCTCAGTTTCTGTTCATCTTGCTCA +GGATCTGGCTCTGAAGGGGCTACGTGTTTTGCTCGTGGAAGGTAACGACCCCCAGGGAAC +AGCCTCAATGTATCACGGATGGGTACCAGATCTTCATATTCATGCAGAAGACACTCTCCT +GCCTTTCTATCTTGGGGAAAAGGACGATGTCACTTATGCAATAAAGCCCACTTGCTGGCC +GGGGCTTGACATTATTCCTTCCTGTCTGGCTCTGCACCGTATTGAAACTGAGTTAATGGG +CAAATTTGATGAAGGTAAACTGCCCACCGATCCACACCTGATGCTCCGACTGGCCATTGA +AACTGTTGCTCATGACTATGATGTCATAGTTATTGACAGCGCGCCTAACCTGGGTATCGG +CACGATTAATGTCGTATGTGCTGCTGATGTGCTGATTGTTCCCACGCCTGCTGAGTTGTT +TGACTACACCTCCGCACTGCAGTTTTTCGATATGCTTCGTGATCTGCTCAAGAACGTTGA +TCTTAAAGGGTTCGAGCCTGATGTACGTATTTTGCTTACCAAATACAGCAATAGTAATGG +CTCTCAGTCCCCGTGGATGGAGGAGCAAATTCGGGATGCCTGGGGAAGCATGGTTCTAAA +AAATGTTGTACGTGAAACGGATGAAGTTGGTAAAGGTCAGATCCGGATGAGAACTGTTTT +TGAACAGGCCATTGATCAACGCTCTTCAACTGGTGCCTGGAGAAATGCTCTTTCTATTTG +GGAACCTGTCTGCAATGAAATTTTCGATCGTCTGATTAAACCACGCTGGGAGATTAGATA +ATGAAGCGTGCGCCTGTTATTCCAAAACATACGCTCAATACTCAACCGGTTGAAGATACT +TCGTTATCGACACCAGCTGCCCCGATGGTGGATTCGTTAATTGCGCGCGTAGGAGTAATG +GCTCGCGGTAATGCCATTACTTTGCCTGTATGTGGTCGGGATGTGAAGTTTACTCTTGAA +GTGCTCCGGGGTGATAGTGTTGAGAAGACCTCTCGGGTATGGTCAGGTAATGAACGTGAC +CAGGAGCTGCTTACTGAGGACGCACTGGATGATCTCATCCCTTCTTTTCTACTGACTGGT +CAACAGACACCGGCGTTCGGTCGAAGAGTATCTGGTGTCATAGAAATTGCCGATGGGAGT +CGCCGTCGTAAAGCTGCTGCACTTACCGAAAGTGATTATCGTGTTCTGGTTGGCGAGCTG +GATGATGAGCAGATGGCTGCATTATCCAGATTGGGTAACGATTATCGCCCAACAAGTGCT +TATGAACGTGGTCAGCGTTATGCAAGCCGATTGCAGAATGAATTTGCTGGAAATATTTCT +GCGCTGGCTGATGCGGAAAATATTTCACGTAAGATTATTACCCGCTGTATCAACACCGCC +AAATTGCCTAAATCAGTTGTTGCTCTTTTTTCTCACCCCGGTGAACTATCTGCCCGGTCA +GGTGATGCACTTCAAAAAGCCTTTACAGATAAAGAGGAATTACTTAAGCAGCAGGCATCT +AACCTTCATGAGCAGAAAAAAGCTGGGGTGATATTTGAAGCTGAAGAAGTTATCACTCTT +TTAACTTCTGTGCTTAAAACGTCATCTGCATCAAGAACTAGTTTAAGCTCACGACATCAG +TTTGCTCCTGGAGCGACAGTATTGTATAAGGGCGATAAAATGGTGCTTAACCTGGACAGG +TCTCGTGTTCCAACTGAGTGTATAGAGAAAATTGAGGCCATTCTTAAGGAACTTGAAAAG +CCAGCACCCTGATGCGACCACGTTTTAGTCTACGTTTATCTGTCTTTACTTAATGTCCTT +TGTTACAGGCCAGAAAGCATAACTGGCCTGAATATTCTCTCTGGGCCCACTGTTCCACTT +GTATCGTCGGTCTGATAATCAGACTGGGACCACGGTCCCACTCGTATCGTCGGTCTGATT +ATTAGTCTGGGACCACGGTCCACTCGTATCGTCG +>NODE_21_length_3078_cov_41.6478_ID_41 +GTAAGGGGGGATAGCAAAGAGGGGGGATGGGGCGGTGGCCCGGGAGGTTTCGGGTGCCGC +CTGCCTTGACATAACGAGCCCAAGCGGAGAGATGACCGAGTGGCCGAAGGTGCTCGCCTG +CTAAGCGAGTGAGGGGTGACAAGCTCCTCCGAGGGTTCGAATCCCTCTCTCTCCGCCATC +AGATATCACAGGGCCTGGGCCGTCCGGCCCGGGCCCTGTTCCTTTACCCTCTCACCCGCT +AGGGGCAAGCCGCGTGCTCTTCACCCAGATCATAGCCTTCGTCCTGGTCATGGTGGTCTA +CCAGGCCTACGACCCCGCCCCGCCGGACTACGGCTGGGGCTGGGGGCTTTTGCTTTTCAT +CACCGGCCCCCTGCTGGAGTGGCTCCTGGCCTCGGTCATCGCCCGCAGCGGCCTGCGCCG +CCTGGCGCGCCCCGCCGCCGATCCGGCCCGCAGCCTGCAACGCAGCGAGATCCTTCTGCA +CCTCTCGGCCCTGACCGTCTTCTTCCTGTTCATGGTCAGCTACGACCTCAAGGCCGGGCT +CATCGCCACCCCCCTGCTGGCCGCCTCGGAGACCCTCTCCGGCCTGGCCGCGCTTTTCTA +CTACGCCCTTCTGCTCATCCCGGTCTGGGGCCACTGCCACCGCCTGGAGCGCGCCGCCGG +CCGCGCCCTGGCCCTGGACCGCCGGCGCTACATCCTGGAACAGGCCCGCTTCGTGGCCCC +GGTGGCCTTCCCCTGGTTTCTGGTCTCCGCCCTGCGCGACCTCTTGACCCTGGCCTGGCC +CGGTCTCACCGCCTGGCTGGAGACCCCGGCCGGCGACCTGGCCTTTCTGGGCTTCTTCCT +CCTGGTCATCTCCTGGCTCTTTCCGCCCCTGGTGCGATCCTGGTGGGGCTGCCCGCCGCT +CCCCCCCGGCCGGGCGCGGGAGATCTGCCAGATGGTCCTCAAGGTGGCCCGGGTGCGGGT +GGGGGGCATCCTCTCCTGGGACGTGCTCCAGGGACGCCTGGTCACCGCCGGCATCCTGGG +GCTGTTTCCCCGCTTCCGCTACCTGCTGCTCACGCCTGCGCTTCTGGAGGCGCTCTCGCC +CACCGAGCTGGCCGGGGTGGTGGCCCACGAGGCGGGGCACGTGCGCCTCAAGCACATACC +CGCCTACCTCATGTTCTTCATGGCCTTCTTCCTGCTGGCCTACGCCCTGGCCGAGCCCCT +GGACATCCTGCTCCGCCTGGCGCTGCTGACCCTGGCCCAGAGCGACTGGGGCGCCGGCCT +GCTCAACTCGCCCGATGCCGGCTCCACCTTGAGCATCACTTTCGCCCTGCCGCTGTTGGC +GCTCATGATCGTCTACCTGCGCTTTGTCATGGGCTTTTTCATGCGCCACTTCGAGCGCCA +GGCCGACCTTTTCGCCCTGAACCTCATGGGCGAGGCCGCGCCTTTGGTGGGCGCCCTGGA +GAAGCTGGCCCTGATGTCGGGCCAGACCCGCGACCTGCCCTCCTGGCACCACTTCTCCGT +GGCCCAGCGGGTGAGCCACCTCCTCACCGCCCAGGCCAACCCCCCCGCCTGGCTGCACCG +CCAGGGACGCCTCATCAAGAAGGCGCTTGCGGTCTACCTGGCCGGCATGGTCCTGGTGCT +GGGACTGGGCTGGGGCATGGCCGGGCTGGACTGGAGCCGCCAGGTCAACCAAGAGCTCGC +CCTGGAGCTGGTCCGCCACCAACTGGCCCAGCACCCGGACGACCCCCGCCTGCGCTTCCA +GGCCGGTATGCTCTGCTACCAACTGGGCCGGGAGGACCGCGCCCTGAGCCACTTCCGCCG +GGCCTTTCTGGCCGCGCCCGACAACCCTGAGCTGCTCAACGCCATGGCCTGGATCTTCGC +CACCAGCCAGGACCCACGCCGCCGCCGTCCCCAGGTGGCCCTGGTCCTGGCCCGCCGGGC +CGTGAGCCTGTCGCCTCTGCCCCACATCTGGGACACCCTGGCCGAGGCCTACTTCGCCGC +CGGCCAGCCGGTCAAGGCCCTGGCCGCGGCGCGGGCCGCCCTGGAGGCCGGGCCCAAGGC +CCGCCTGGACTACTACCGCGCCCAACTGGAGCGCTTCAAGCGCGCGGTGGAGGATCTGAA +GAAGAAGGGTCCCGCCGGCCGGCGTCCCCGTCCTGCGGCTCCGGCGCCTGGTGGTAGGCA +GGGTTGAAGGCGGTGGTGAACAGCCTCCCCAGGATGTAGAGCAGCCCCGCCCCGCCCACC +TGTAGGCAAAGGGCCTCCACGAAAGAGAACCACACCCAGGGCAGCACGAAAAGCGCCAGC +ATGAAGAGGATCAAAAGCGGCGCCGCGAGCTTCCAGTAGCGCCGCAGGAAGGCGTCGCCC +CGCTGCCGGCGGTCCAGGAGGGGGGAAGGGACGGTTGGTTCCACGGGCGCGCCTCCTCAG +ATGGTAAAGCCCAGCCGGGCCGGGTCCCAGTCCAGGCCCGCCAGCCAGGGCGAGTCGCTC +TCTTGGATGCGCCGGGCCGCCTCCAGGAGCCACTCCGGCCGGCGGCCTCCTTCCCAGCCC +GGCATGCCCCCCCGCCACACGTAGTCCACCAGGGCCAGGAAGCCCGTGCGATGGAAGCGG +TACTGGCCGATGGCGAACTCCCCGCCCCCGGCCCGGGCCTCGAGCACCACTTCCCGGCGC +CGGCCGAAACTCTCCGCCTCCTGCTCCACCACCTCCCGCGGGGCCGCACCACGGGTGCGC +TGGCGGAACTGGTGCGGGTCGCGGGGAAAGGGCCAGCGGGCGTACAGGGCCCCGATGAAA +CCCTCCAGCCGGCGGCCGGCGATGGCCCCGTGCAGATCGCCTCGTGAGGCGGGGTCCGGA +ATCTCGAACCCCGTCCAGGAGCCTTGCCAGTCCCGCTCCGGCGGGGCCTGCGCCAGCCGT +ACCAGGGCGTCGCAGAACTCTGGGGCAAAGAAAAAGAGTTCCTCCAAAAGCAGCATGGGG +GTCTCGATGTGGAAAAAGCCGAAGGCCACCACTCCGTGGCTGCTGGAATGGAAGGCAAGC +GGTCTCTTATACACATCTCCGAGCCCACGAGACTCCTGAGCATCTCGTATGCCGTCTTCT +GCTTGAAAAAAAAAACAG +>NODE_22_length_2891_cov_11.4392_ID_43 +GTGGCGGGGGGCGCGCCAAAAGCAAAGCGGGGCCCCGGTGTTGGGGCCCCACCGGTGGTT +GCGAGGGTATGGCAGGCTAGCGGTTGGCCTTGCGCAGGCCCAGGGCGTCCAGGGCCACGA +TCCACTTGCAGATGTTGGCGCTGCCCTCCACCATGAAGTAGGTGGGGGCGTCGCGGTAGA +AGCGGGCCACGGGGTACTCGGTGGAGTAGCCATAGGCGCCCAGTATGCGAAGCGCCTGCT +GGGCGCAGAAGTTGGCCACCTCGCCGGCCTTCCACTTGGCCATGGCCACTTCCAGGGTGT +TGCCCAGGTTGCCGGCGTCCTTCTGGCAGGCGGCCTTGTAGACCAGAAGCCGGGCGGCCT +CGACCTCGATGGCCATCTGGGCGATCTGGTCCTGGATCATCTGCTGCTTGGCGATGGGCT +TGCCGAACTGCTCGCGCTCGTTGGCGTACTTGATGGACTCGTCCAGGCAGGCCTGGGCCA +GCCCCACCCCGCCGGCGGCGGCCGAGAGCCGGGTCTGGTTGAGGGAGGAGAAGACGATCT +TGGCGCCGTCGCCGGGCTTGCCCAGGATGTTTTCCTTGGGGACCTTCACGTTGTCCAGGT +AGATCTCGCCGGTGGGCGAGGAATGGCTGCCCAGCTTGTCCAGGCGGGTGACGCTGATGC +CCTCGGGGCGGGAGCCGTCCTCGTTCTTGAGGTCCAGCACGAAGGCGGAAAGCCCCTTGC +CCCGGGCCTCACGGTCGGTGTAGGCGTAGTAGATGATGCAGTTGGCGATGTCGGCGTTGC +TGATCCAGGTCTTGGAGCCGTTGAGCAGCCAGTAGTCGCCCTTGTCCACGGCGGTGGACT +TCATGCCCATGACGTCGCTGCCGGCGTTGGGCTCGGTGATGCCGAAGCCGCCCAGCCACT +CGCCCGAGACCAGCTTGGGGATGTACTTCTTCTTCACCTCGTCGTGCAGGCTGTAGCGGT +AGATGGTGAAGGCGCAGCCCAGCTCCAGCATGTTGATCTGCACCCGCAAGGAGGAGCTGG +CGCGGGCGATCTCCTCGGTAAGGATCATGGCGGCCAGCCAACCCATCTCGTTGCCGCCGT +ACTCCTCGGGGATGACGGTGCCGAAGAAGCCCAACTCGCCCATGGGCTTGATCACTTCCT +CATGAGGCAGATAGTGCTCCTCATCCCACTTGTCGGCATAGGGGGCGATCTGCTCCTTGG +CGAAGTCGGCCGCCATCTCCCTTAGCATCTTTAGTTCTTCGCTGAGTTCGAAGTCCATTT +TTTCTCCCTCGAAAAAGAGACCGTGGTTTATGGGGAGCGCCCAAGTCTTGGTGTGGGTTT +AACCCAAAGCGAGGGATTTGGCAACCACCAAGCGCTGTATCTGGTTGGTGCCGTCGAATA +TCTGGATGGCCTTGGCGTCCAGGTAGAGACGTCCGGCCAGGCAGTCCGGGCGCATGAGGT +CCATTCCCAGGATGCGGCTGGCCAGTTCTCCGGTCTCCATGCCGGTGTCGGCGGCGTAGC +ACTTGGACATGGAGACGAAGGCGTCCACCTGCTGCGGCGGGGTGCGGGGGTCGTCCACCA +TGCGGCAGACCCGGTAGATCAAGGAGCGGCAGGCCTCGATGCGCATCTTCATGTCGGCCA +GGGCGAAATCCCGCGACTGCCGGTCGGCGGGGTTCAGCCGCTCGGCGTTTGCCTTGACAT +AGGCCAAGGTCTGGTCGAAGAGCCCCTGGGCGGTGCCCAGGGCCATGGCCGCGGCGCCCC +ACACCCGCATGGGGTTGCAGACCCGGGTGAGCACCTGCCAGCCCTCGCCCTCGCCCCGCA +GCCGGTTGGCCACCGGCACCGGCACATCCTTCATATACATGTTGGTGGTCACCGAGCCGT +GCAGGCCCATCTTCTTCTCCGGGGGGCCGAAGCCGAGCCCCGGGGCGTCGTGGGGGATCA +AAAGCGCACTGATGCCGCCGGCCTTGGGAGCGGGTCCGGTGCGCACGAAGGTAAGGTAGT +AGCGGGCGTTGGGGCCCAGGGTGATGAAGGTCTTGGTGCCGTTCACCACATAGTGGTCCC +CTTGGCGCTCGGCCCGGGTCTGGAGGTTGAAGGCCTCGGAGCCGTAGTCCGGCTCTGTGA +GGCAAAAGGCCAGGCACTGATCGCCGGCCTGGACCAACTCGCCGAACAAACGTTCCTTCT +GTTCCTCGCTGCCGGTCAGGGCGATGGTGCGCAAGACCGCGTTGGAGGGAAAGACCAAAA +GCGCGGCGCTGGGGCTCACCCGGGCGATGTTCTCCACCATCAGCGCCAGGCGGGTGGCGT +CGGCGCCCTGGCCGCCGTAGCTGCGGGGCACGGCCAAGGTGAAAAGCCCGGCCCGGGCGA +ATATCTCCCGGGCCTGTTGGGGGAATCGGCTTTGACTCTCGATCTCCAGTGCAAGCGGCG +CCAGCTGGGTCTCCACTACCTGCTGGATGTGTCGTAGGAAGTCTTGCTGGTCCTGGTCAA +GGTGCAAATTTCACCTCCTGCCGGGATGGGTTCGACGGGGGATCTGGGCAGTTATAGCCG +ATTGCGCCTTTGTGAACAACCAAAAAGCATACTGCCGCCCTGGGGCGGGAATCTAACAAA +AAAAATCAAGGCTGCCCACGCCGGATTGGCGGCGGGAGCCTCAAGCGGCACTGATCCTGG +CGGTGACCGAGGCGCCCAGGGCGGAGAGCGCCTGCCAGTCGCCCTCCACCCTGCCCAGGG +GGTAGACCTGGGAGGCGGCCACCGGCTCGGGTCCGGAGGAGGCCGGGGTGGGGCCGAAGA +AGATGCAAAACGCGTTACCGGGCTCCCAGTAGGCCAGATCGCCCACCGACATCACGGTCT +GGGTCTCTCCCTCGAAGCTGCCCAGGGGTTCGCCCAAATCGCCGTAGTACTCCTCGCCCC +AACGGCTGAGC +>NODE_25_length_2759_cov_91.0869_ID_49 +CGCCCGCGTCCTCGGCCGCTCCCCCTGTCGCCCCCCTTGGCCCCCTTGGCCTTCCTAGCG +GGGGATGAAGCTGGGGCCGCCCTGGCTCTCCTCCAGCTTGGAGCCCACCTCCTCGCCGCA +GACCGCGCAGAAATAGGCGTACTTGTCGCCCTCGGGCAGCACCAGAAGCAGTTTCTTGCG +CACCGGCTGGGCCTTGCCGCAACGGGGACACAGAAGCGACTGGGCCTCTATGCCGGCGCC +AGCGGAAGAATCACCATACATCATGTCCTAAGCCCCAACTCCTCGATGAGCCGGTCCAGG +TAGTCCAGGAGGAAGTCGCGCTGGGCGCTGGAGGCATAGGCGGTGCAGTTGCAGCGCAGG +CTCTCCTGGGCCCGCACCAGAAGCTCCAGCCGCAGGCAGCCCTCCTCCACCACCACCGCC +ACCCGATGCGGTCCGCAACTCTGGCGGTGCTCCTGCTGCAAGGGCAGCAGCAGGTCCTCG +GGCAGCTCCAGCCAGTAGACCCCGCTGAGCCCCGAAGGGGTCAGGGTCTGGTCCAGGTGT +TCGCTGAGCCGGTCCACGTCCGGCGAAGACAACTCGTCGATCACGATGGAGCGCATATCC +TCTCGTTTCCCCTCTGGCGGGTGCCCTCAGCTCTCCCCCTGCCCGCCCTCCGCAGCGGAG +CGGTCCAGGCGGGCCCCCACCCGGGCGGCGTCGATCACGGCAAACAGCCAGATGCCAAGC +CCCGCCGTGCCCAGGCCCCACAGCCAGCCGGTGCCCTGGCGGGCCAGTTGCGCCCGCAGG +GCCTGCCATTTGTCGGCCGACTGGGCGGCGGCCTCGCCCAGGGCGCTCATGGCGTGGCTC +ACCTGGTAGACGGTGAGGAACAGAAAGCTCATGAAGAACAGGCTCATGAGGCAGATCAAG +AGCGCGCCCTTGCCCAGCTGGCGGTTTATGAGCTGGCCCAGCCCGGGCAGTACCAGCCCC +GATAAAAGCGGCGCAGTTATGCGTTTGGGAAGGCTCATGCCTCCTCCCGGCCGTAGAGTT +CCAAATACTCCCGGGCGATCTGCATGGTGAGCTCGTCCAGGGGACGCATCTCGCCCTGGA +TCAACACCGGGATGCCTTCTTTATAAAGATAGTCCAGGGCCTGGCGGATGCCCAAGAGCG +ACCAGACCTCAAGCCCGCTCTCCTGGCGGAAGCTCTCCATGGCGTCGGGGCCGCGCACCC +CCTCGCGCACCCGGCCCTGGGCGTCGTAGACCGCCTGGGTCTGCTCGCGGTCCACCGCCA +GCACCACGCCCAGAAGCTCCATGGGGCGCTCCAGGCGGGGCTTGAGCCAGGAAAGCTTCT +TCAAGAGCTCCAGCTTGGTGCTCATGGAGGTGCCCACGTCGTCGATGATGAGCACCTTGC +CCCCCTGGAGGGCGGCGCCGGTGACGAAGAGGTAGCCGTGGCCGGTGGCCTCGCCGTGGG +TCTTGGCCTCCTTGCGGTCGTAGTCGTAGGCCACCTCCTTGCCGTGCAGCTCGTAGAGGG +CGATGGCGGTGGCCTGGGCAATGGCCGAGCCCTTGTAGGAGGGCCCCACGATGCAGTCCA +GGTCCTGGTCCAGGCCATGGTGGTGGATCCACAGGCTGAAGCAGCGCCCCAGTTCCAGGG +CCAGTCGCCCGGTGCGGAAGACGCCCAGGTTCACAAAATAGGGGGTGGGGCGGCCGTCTT +TGAGGCGCAGCCCCTCCTGGAAGAACAAGCCGCCGCTCTCGGCCAGCAGGCGGGCCAGGG +TCTTTTGATACTCCTCCATGACCACTTCCCTTTCCTCGCGTCTGGCGCACCGCCCGGCGC +CCCTTGGGGCCCGGCGCGGTGAACCGTCTTGGCTGATTTGGTGAGGATCGCACCGCCAAT +TTAGTCCAGCCAGCCGGGGTTGTGCAAGCTTTCTTGTGGGATTTCGGGGGGTGCCGGCGG +GTACCAGTATCGTTACCCGTCGGTTCCAGGTTTTGAACCGTTATTTTTGTTTTTATTTCA +CGATATTAACGCCACAACGCCCTTGGGGGCGCCAGAAGGCGGTTCAAATTTTTGAACCGA +TCCCTCCAGAATCAGATCTCGGCCCGATATTGGTCTTCACATATATCATCTCGAAATAAC +ATACAAAACAATGAAAATTGTGCCTGCGACCCAAGGTTGGCACCCTGCTTGCTTGTATGT +AGGGCATCCCTAATCCCCTTTCTCCCTTTCTTAGAGGGCGCCAAGCTAAAAAAGCTTGGC +GCTTTCTATTTGGGGAGACGCCGGCGCCTCCAGGCTGGCAAGCACGCCGGGAGCTCCCGC +CACCAGTCCGGCCCTGGAGCACCCCGGCGCTTTCTATTTGGGGGGAACTGCGACCCACCT +TGGCGTGCAACCCACCCGCCTCCACGCAGACAGGGACGCCCAGCGGGGCGCCCCTTGTCG +ACAATGCAAAACAAACCAGGAAAGGGGATTACTCTTCTTTCTTCTCGTCCTCCTGCGGTG +CCTCCTCGGCGGGGGAGGAGTCCTGCTGCTCCTCCTTCTCCATGGCGGCCAGCTTGGCCT +TGAGTTCCTCCACCTCCCGCTTGTACTTGTCCACGTCCTGTTCCTCGGGGGTGAAGGGCT +CGCGGGCGGACTCGTCCTCGGCCTGGCGCTTGTCCTTCATCTCCTCCAGGCCCAGGTAGG +CGGCCAGGACGCCGCCCAGGATGAGGATGGGAGGGATGGCGCCCTTGAGCAGCCAGATGA +ACTCGTCCCACCATCCTATGAGTCCGATGATTCCCAGAATCAGCGCGGCCAGTCCTCCA +>NODE_29_length_2081_cov_1172.16_ID_57 +ACACTAGATCGCTCGTCGGCAGCGTCAGATGTGTATAAGAGACAGGGGGTGGCGAGTTGC +ATGGGCGGATCAGTCCGGTTGAAGGATGGGGCGGAAAGTGCCGCGAGCGGGGCCGCGCGT +CAAGAACCGCTATCCCCGTTTATTGGGAGACCGCGGCTTCCTCCCGGACGACGGCGAGAA +ATCCCTCCGCCAGCCGGGACAGCGACTGCGAGCGCGAATGGACGGCGCGCGGGCTGACCT +CGACGCGCGGCCGGAAGGGCCGGCGCACCAGGGTCGGGAACCGCTCGGCTTGCACCCAGG +GGTCGACCAGGCCGATGCCGGCGCCCGCCTCGACCAGCATGTAGACGGTCATCGTGTGGT +TGCAGGCGATGGCGAAATCCCGCTCGACCCCCTGCTTGCGGAAGGCCTCCTCGATGCGCG +GGGCGATGGAGTTGCGGATGTTGGTGATCAGCGGGAAGGCGCTGATGTCGCGCGGGCCCA +ATGCCGGTTGGGCCGCCAGCGGATGATCGGGCGGCAGGACGCAGACCATCTCCGAGGCGG +CCAGGCGCTCGAACTGTAGCAGCGGGTTTTCCGACGGGCCATGGATGAACCCCAGGTCGA +TCTGGCCGGCCGAGGCCAGTTCGAGGGTCCGTTGCGTATGCGTGACCTCGAGAAGGATCC +GCACGCCCGGGCGCTCCTCGCGGAACCGTTTGAGGGCATTGGCCAGGAAGGCGTAGGTCA +GGGTCGGCGTGCTGGCGATGGACAGCACCCCCGAGCGGTTGCCGCGCAGGTCGTTGGCGA +ATTTCTGGATGGCGCTCAGCTTGTCGAACAGGCCCTCCACCTCGGTATAGAGGTCGAGCG +TGGTCTAGGTGTCGTTGTACGTGGGATCCCCGGGTACCGAGCTCGAATTCGCCCTATAGT +GAGTCGTATTACAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGC +GTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAA +GAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCTGAATGGCGAATGGCGCCTGA +TGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCA +GTACAATCTGCTCTGATGCCGCATAGTTAAGCCAGCCCCGACACCCGCCAACACCCGCTG +ACGCGAACCCCTTGCGGCCGCATCGAATATAACTTCGTATAATGTATGCTATACGAAGTT +ATTAGCGATGAGCTCGGACTTCCATTGTTCATTCCACGGACAAAAACAGAGAAAGGAAAC +GACAGAGGCCAAAAAGCTCGCTTTCAGCACCTGTCGTTTCCTTTCTTTTCAGAGGGTATT +TTAAATAAAAACATTAAGTTATGACGAAGAAGAACGGAAACGCCTTAAACCGGAAAATTT +TCATAAATAGCGAAAACCCGCGAGGTCGCCGCCCCGTAACCTGTCGGATCACCGGAAAGG +ACCCGTAAAGTGATAATGATTATCATCTACATATCACAACGTGCGTGGAGGCCATCAAAC +CACGTCAAATAATCAATTATGACGCAGGTATCGTATTAATTGATCTGCATCAACTTAACG +TAAAAGCAACTTCAGACAATACAAATCAGCGACACTGAATACGGGGCAACCTCATGTCCG +AGCTCGCGAGCTCGTCGACAGCGACACACTTGCATCGGATGCAGCCCGGTTAACGTGCCG +GCACGGCCTGGGTAACCAGGTATTTTGTCCACATAACCGTGCGCAAAATGTTGTGGATAA +GCAGGACACAGCAGCAATCCACAGCAGGCATACAACCGCACACCGAGGTTACTCCGTTCT +ACAGGTTACGACGACATGTCAATACTTGCCCTTGACAGGCATTGATGGAATCGTAGTCTC +ACGCTGATAGTCTGATCGACAATACAAGTGGGACCGTGGTCCCAGACCGATAATCAGACC +GACAACACGAGTGGGATCGTGGTCCCAGACTAATAATCAGACCGACGATACGAGTGGGAC +CGTGGTCCCAGACTAATAATCAGACCGACGATACGAGTGGG +>NODE_2_length_41759_cov_213.726_ID_3 +CCCACTCGTATCGTCGGTCTGATTATTAGTCTGGGACCACGGTCCCACTCGTATCGTCGG +TCTGATTATTAGTCTGGGACCACGATCCCACTCGTGTTGTCGGTCTGATTATCGGTCTGG +GACCACGGTCCCACTTGTATTGTCGATCAGACTATCAGCGTGAGACTACGATTCCATCAA +TGCCTGTCAAGGGCAAGTATTGACATGTCGTCGTAACCTGTAGAACGGAGTAACCTCGGT +GTGCGGTTGTATGCCTGCTGTGGATTGCTGCTGTGTCCTGCTTATCCACAACATTTTGCG +CACGGTTATGTGGACAAAATACCTGGTTACCCAGGCCGTGCCGGCACGTTAACCGGGCTG +CATCCGATGCAAGTGTGTCGCTGTCGACGAGCTCGCGAGCTCGGACATGAGGTTGCCCCG +TATTCAGTGTCGCTGATTTGTATTGTCTGAAGTTGCTTTTACGTTAAGTTGATGCAGATC +AATTAATACGATACCTGCGTCATAATTGATTATTTGACGTGGTTTGATGGCCTCCACGCA +CGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTTTCCGGTGATCCGACA +GGTTACGGGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGC +GTTTCCGTTCTTCTTCGTCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAA +GGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTT +GTCCGTGGAATGAACAATGGAAGTCCGAGCTCATCGCTAATAACTTCGTATAGCATACAT +TATACGAAGTTATATTCGATGCGGCCGCAAGGGGTTCGCGTCAGCGGGTGTTGGCGGGTG +TCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCG +GTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATT +CAGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTG +GCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCA +CGACGTTGTAAAACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAG +CTCGGTACCCGGGGATCCCACGTACAACGACACCTAGACCAGTCAGAAGAAGACATTGAA +ATCACAACCTGGATTGCAAAAAAATATGGTTGCAAGTTCGATGTGGTTGATCTCCAAAAG +GAATATTGGGATACGGTAGTAAAATATACTATTGAAAGTGTAAAAGCCGGATTGACGCCG +AATCCTGACATTATGTGTAATCTCCTGATTAAATTCGGGGCTTTCAACGATCGTTTTGGG +AAAGACTTTGACCGGATTTCTACCGGTCATTATGCAACACGTTATAACACGGATGAAGGC +GTTTTTCTTTCTACCGCAGCCGACCGCGTAAAAGACCAGACATACTTCCTGGGACAAATT +ACCCCTGAACAGCTGGCAAAAACCATGTTCCCTATCGGCCATCTGCAAAAAAAAGAAGTC +CGCAAAATCGCTTCGGATATGAAACTTCCAAGCGCACACCGCCCTGACAGTCAGGGAATT +TGTTTTCTTGGGAAAATCAACTACACCGACTTTATCAAGAAATACGCAGGCGAAAAACCG +GGAGAGATAATTGAGCTGGAAACAGGAAAAGTGCTGGGAACGCACAAAGGTTTTTGGTTT +CACACGATTGGTCAGCGACGTGGACTGCGACTTGGTGGCGGCCCTTGGTTTGTCGTCAAA +AAAGACATTGAAAAGAATATTGTCTACGTCTCAAATGGCTATGACCCCATCGCTCAATAC +GATGACAAAATCTGGCTTGAAGATCTCCACTTTTTGAATAAAGTCCACGATTACAGCAAA +CTCAACGAAATCAAATTCAAAATCAGGCATCAGCCTGAATTTAATTCAGGTAAGCTGGTT +CGTGATGAAAAAGGGATAAGAATTGTTTCGGAAAATAAAATTTCGGGTATTGCTCCCGGC +CAATTTGCTGTAGTTTACGATGAGGAAGAAAGAACCTGTATCGCCAGCAGTGTGATTGCA +GAGAATCCTGAAATTGCTGTTTAACAGAAACATCAGACCACACAAAAATTTCTTCATTTC +ACCGAAGGAAAAACCTACTTTGTTTTTAAAACAGTAAGTTTTTATAACTTAGCTTGTCTA +AAAATTCTCGAGAACTATGAATCCTCTCCTTTTGTTCATCCTGATTGTTTCAGGTGGCCT +TTACCTGGCCGGCCATTATCTTCATAAACCAATTCTGAAGTACATCTTCAAACCGTTTAC +TACGTTCATAATTCTGTTTTTTGCTTTTATGCAATTACCGGATGTATCTGTGCAATACAA +AGATTACATTCTTATCGGTTTATTGATTTCCCTTATCGGGGATATTTTTCTGCTGTGGCC +GGAAAAAAGATTTATCCACGGACTGGGAGCATTTCTTCTGGCCCACGTCCTCTTCATCCT +GGCAATGGTTTCCGACTTCGGCCCTTACTACAATTGGCAATATCTCATTCCCATTGCTCT +GTACATGGTTATTTTCCTGTGGATTATTTTACCCAAATCAGGTAAATTTGTTATTCCGAT +AATCGTTTATGCCCTGGTCCTGATGGTATTTTTCTGGCAGGCAGCTGGACGAGCCATTTA +TCTGGCAGAAAGCAGCAGTATGCAAGCCATGTTCGGAGCCACGCTGTTTGTAGCCTCAGA +TTCCATTCTGGCTTACAACAAGTTTGTTAAAAATTACAAATGGGCGGAATTTTTCATCAT +CATTACTTACTGGGCAGCACTGTATTTTATAGCTTTGTCCGTTTAATTGAATATCATGGA +TTGGATCATCGTAATCATACTGGCAATCCTGCTGGTAGCCTTTATAATTTTGTTTCTTTT +AATACAGAAAGGATTCAGAAATCCCGTATCAGAACATACGATTCCTGAAGATTTACCTTT +TGACGTTCAGGAAGTTGAATATCCCACTAAAAATGGGAAAACGATTTATGGATGGTGGAT +ACCGGCAGATCCCAAAGCCGCTACTGTAGTTTTTGTCCATGGCTGGGGAAGAAACGCGCA +GCGCATGATGCCTTATCTCCGGAAATTCTGTTGCGGAAAGTTCAACCTCCTGGCTTTTGA +TGCCCGTGGACATGGTAACAGTGATCACGACGGCTTTTCCAACATGCTGCAATTTTCAGA +AGATATCATTGCTTCGATGAACTTTATAGAACAAGAGCATAAAGCAGAAAATAACATGTT +TTACCTGATTGGTTTATCCATCGGCGGTGCCGCTTCAATTTACGCAGCAGGTCACGATCC +CCGGGTGAAAAAAGTGCTGACCGTAGGTGCTTTTGCTCATCCGGCATCCGTTATCACAAA +ACAAATCAAAGACCGTCATATACCTTACTTTCCCATGATTTGGTTCCTCTATCGTTACAT +GAAATACGTTAAGAATCTTGATGTGGATGCTATCGCCCCGGAAAAACATATTGCAAAGGC +CCAGGCGCACTTTTTGCTGGTGCATGGGGAAATTGACCAAACCGTTCCCGTAGAGCAGGG +AAAACGGCTGAAAAAAGCAGCCGGAGACAAAGCTGACCTGTGGCTCATGCCTGAAAGAGG +CCATTCTGACTGCCATCTCGAACACGGATTCTGGGAAAAGCTCATGGAATTTTTTGAAGC +TCCCAAAACAAAAGTTCAAAAATCTTAATTTCGCCACAAATCATTGCATATGAAAAAATT +CCACTTGTTGTTGCTTTCTTTGCTCAGCGGTTTATTGCTGGCAGCAGCCTGGCCGTTACA +TGGGTTTACACCCCTTATATTTATTGCTCTGGTGCCCTTGTTTTTCGTACAACAGCAAAT +GGGCGATACCGGGAAAAGGGGAATGCTCCTCTATGCCTGGCTGACTTTTTTGGTGTGGAA +CGGACTGACAACCTGGTGGATATGGAATGCTACGCCTGTAGGAGCTATTGTTGCTATCGT +TCTCGATTCGTTGTTTCTGGCCATTGTCTTCCAGGTGTTTCATCTTTCGAAAAAATGGCT +TTTCAACAACAAACAGGGATTCTTCATTTTAATTTTTTACTGGATAGCCTGGGAACATTT +CAACGCAAACTGGGACCTGTCCTGGCCCTGGCTCACGCTGGGAAATGTCTTCGCCAGTAA +ACATCTCTGGATTCAGTGGTACGAATACACCGGCGTTCTGGGTGGCAGTTTGTGGATTCT +AAGCGTCAATATTCTTATCTACAACATCATCAAAAGCTTTTTGGAAAAAAGAAAACAAAG +AGCCCTTTACACGACCATACTTACAGTACTTTTCATTGCTGTTCCCATCATCATTTCCCT +GAACATTTATCATCATTATAAAGAAACCAAAAATCCTGTAAACGTGGTGGTCGTGCAACC +AAATACCGACCCATACACCGAAGAATTCAATCTGCCGCCATCCGCACTGATAAAGCGAAA +CCTGAAACTGGCAGAACAAAAAGTAACCGACAGCACAGATTATGTGGTTTTCCCGGAATC +CACCATCCAGGAGCAGATTTGGGAAGGAAGTCTGAACCGCTCCCAAAGCATCAAAACACT +CAGGAATTATGTCATGGAGCATCCGAATCTCTCTATGGTTATCGGCGCTTCAACCTTTCG +CTGGCTGAAACCGGGCGAGCACCGGACCAACGCTGCGCGTTTTTACAAAAAAGGACTTTA +CTATTACGCCTACAACACGGCATTTTTTATTGACCATTCACCTTACATCCAGGTGCATCA +CAAATCGAAACTTGTTCCCGGTGTTGAAAAGATGCCTTCCTGGCCCATTCTGAAACCGCT +TGAACATCTGGCACTGAACCTGGGTGGAACCGTTGGAACCCTGAAAGAAGATGACCACGT +ATCTTTATTTACCAATGACAGTTCAGGAACGAAAATTGCTCCGATGATTTGCTACGAGTC +CGTTTACGGAGACTATGTCCGTCAATATGTCGCGCATGGAGCTGAGCTGATTTTCGTTAT +CACGAACGATGGCTGGTGGGGCAACACGCCGGGATACAGACAACATTTTTCATTTGCCAT +TTTGCGTTCCATTGAAACACGTCGTGATGTAGCACAGTCTTCCACCACAGGATATTCAGG +TTTTGTAAACCAGCGCGGAGATGTGCTGCAAAGAACAAAATATGATGAAAAGGCTGCACT +GAGTCAGACGCTGAACCTGAACGACAAACTAACTTATTTTATGAAAAAGGGAGATTACCT +GGCCCATCTGGCAGGTTTCTTCAGTATCCTGATTTTGTTGGCTGCCATAGTTCAGGGATT +TTTGAAAAAGAGGAATTTACCGCACTAAATACTGACTATGAAAACATTTCTGGACCTGGT +AAATCAACGGCAAAGCGACCGGAAATACATCGACAAACCCGTCGAAAAGGAAAAACTGAT +GCGTTGTCTTGAAGCGGCACGTCTGGCCCCTTCAGCCAGCAATTCACAGCCGTGGACTTT +TGTGGTGGTTAACCAGCCGGAACTATGTCAGGATGTGGGAAAGGCAGCCATGGGTCCGCT +GTACAGCTTCAATAAATTTGCATCACAAGCTCCTGTCATTCTCGCAATTATCATGGAAAA +ACCCAAGGTTATTACCGAAGTTGGCGGTCGCATCAAGAAAAAAGAATATCCGCTCATCGA +CGTAGGAATTACAGCAGAACACTTTTGTTTGCAGGCAGCAGAAGAAGGCCTGGGGAGTTG +CATGCTGGGATGGTTTGATGAAAAGAAGGTGAAAGAACTATTACATGTTCCGGAAGAGAA +AAGCATCCCGTTGCTTATCACAGTCGGATACACCCCTGAAAATTACAAACACCGAAAAAA +AATACGCAAACCTATCGATTCGGCAGTAAAATTCAATACTTATGGCTAAACAAAAAACCC +AAGAGAAAATCTGGTATGCAATCTATGTTAAATCACGCGCAGAAAAGAAGGTTGCAATAG +AACTGGAAGCGGAAGGCATAGATTTCTATTTACCGCTTGAAAAGCGCTTAAAACAATGGA +GCGACAGGAAAAAATGGGTAGAAGAACCACTGTTTCGCTCCTATATTTTCGTCCATATTT +CCCACAAAGACTATTACCGGGCGCTGGTTCAAAACACGGTAAAATACGTGACCTTTGAAG +GGAAAGCCGTACCTGTACCACCGGAACAAATCGAAGCTGTTAAAGTCTATCTCGAGGAAA +AAGAACCCATTCAACCAAACGACGAGGACTGGGAAACCGGAAAAGAAGTGGAGGTCATAT +CCGGAAAACTTACAGGGCTAAAAGGGGTACTGATGGAAGTTAAAGGCAGAAGCCGTGTCA +AAGTGGAAATTGAAGTAGTAAGCAGCTCCATCATTTTACACATTCCTAAGAGCAAATTGC +GGTTGTTGGAATAAAACTTAACATTTCCAAAATTTGTATCTTTGAAATTCGTTACTTGTT +CAAACAACAACCACATGGGCGTTAATTTACTGGATATTATCCTGGCAGTTCCGCTGATAT +TTTTTGGCTATCACGGCTACCGAAAAGGGCTCATTATAGAGGTTACCTCGCTGGCAGCTT +TTATCCTCGGTCTGTATTTTGCCTTCTATTTTTCCAATTTTACGGCCGGAATCCTTAAAG +AATATTTCACCATTCAAACCAAATACATGGCCGCCATCGCCTTTGTAGTAACGTTTATTG +TGGTGCTGCTCATTGTGCTTGCCGTTGGAAAAATCGTGGAAAAATTCATTGATATTCTTT +TGCTTGGTTTCTTGAACAAACTGGCAGGAGGGTTGTTTGGCGTACTGAAAGGCGCGCTGT +TTTTGAGCATTATCATATTCGTAATCAACTACTTCGATGCCAGTCATAGCATCATCAAAC +AAAAAGCCAAAGACAATTCGGTTTTGTACAAGCCTGTAGAGTCCATCGCACCGGCACTCT +ATTCCTGGTTGCATTTAAAGAATTTCGATTTCCATCTTCCCAGCGAAGAAAGTGTTATAA +AAACCATCACGCATCGCGCTAACCCGGATTAAGGAAAAACAACAGACGGATAATCGCTAA +CCATGACCGATGGAATATGAGAACCGTATTGTTTATTAATAGCCTGAATAAAGTAATTAG +CAACAACGGCATTGCCTTGTGGGGTTAAATGAACGCCATCGAGTGAAAACAAATTCCCTG +TAATAAACGCTGTAGTGAAATGTATTCCATCAACGGTTAATCCTCCATGTTCGATGCTTT +TCATAATACTGTTAAAATCAACAAAGGCCAAATTATATTTCTTTGCAAGGCTCTTGATTT +CTGCATTATAACTCGTTATGGCCGACTCTACTTTTGCAGTTTGAGCCTCTGTCAATACAT +ATTGCGCCGGAACAGGTTTTTTTGTTCCCCAATAGGCAACTTCCAGTGAATCGGTAGGTA +TATCGAGCATTACATATTCATCACTTTTTAATTGTCTGAATTGCAGGCTGGCAGGCACGG +GCATGGAAGGATCGGAGATTACCATCGCGTTGGCGCCGAGCTTAAAGTTAATCCTGTAAC +TAAGGCCCATTTGTTTCATAGCAGCATTTAACTGATCAACTTCTGTTTGAGTAGTAAAGG +GGATGGCATTATAAGGTATCGTGTTAATATAGGGGATGGATGTAATATCCGGAATGCATT +GGGGGTAAAATTATTTATATTGGGCTTACAGGAGGCCAGAAAAACCAGAAATACAAAAAA +TAAAATTTTATAATATCGTCTCATGACCTTAATTTTATAAATAGATTAAAAGTGATAATT +AATTCCGATACCGGGGACTACCGTATTGGTTTTATAATAACCGCTAACATTGGCCGGAGA +TAATTCATAATAGGCCCCTGCCCGAACCATAAGCTTTTTAGAAACCTGGTACTGAGCCCC +GATTCTGGGAATCCATTGATCTTTATATAACTTTGGGCTGGAATTATTGAGTAATTGCGG +GTTGGTTGCAAAATGAAAGGATAAAGAATCATAAACACTCCACCTTACCCAATCCAGCTC +TGCTGCAAGAAGTAACTTTTTTGTAGCCTGATAAGCAACACCAAAATCAAAGTTGGCAGG +AAGTGGAAGGCTGGCACTAAAATGGTTCGACGGAGGTATTATACTGCTAAGGGAACCGGG +GACAAAAAATGCTGCATTTCCATTTTTCACGTTCATCTTTATTTCGGAACGGTATGTTGC +CCCTAACGACCACTGATCGGAAATTTTATAATGGACACCTACATTAAAACCAAAATTATG +AGTCTTCCCATTTAAATTAAAACCGGAGTTCCCACTATAATTCAAAGCACTGTTCATTTT +CACATTTCCCGTAGCGTAAACCAATCCGGCGCCAACACCGAATTTATTTCCCAAATTAAA +GGATATAGTTGGCTGATAGAAAATGGCTTGAAGTGAAATGTTCTGAATCAACAGCTTTAA +TGCCCAGTTGTCATCCCATTTCACTGAACTGCCATATGGTGTATAAATTGCCAGTCCGAC +AGCAATATTTTTATTCAATTTTTCAGCTGCATAAAAATAAAATGGTGTTCCTATAGGATT +ATTAGTCCTTGCCAGATAGTCACTTCCGTAACTCTCCTAAGACACATTGGAAAATATGGC +ACTTCCTCCCAATTCAACACCAAACTTTCCTTCCATAAATGACAAAGCCCCCGGGTTATA +AAACATCGAACTCGCCCCAAAACTGAGAGGGGCGCCAATTAACCCCATTCCTGTTTGTTT +ATTTCCCTGAAGGCGTACCTGGTAGCCTCCGGCAAAAACGGCTGTAGATATAAATAAAAA +CCCCACAACTAAAAAAATTGTTTTTTTCATGTGTATCTGGTTTTCTAAGTGTATAAATTA +TCTCATATAAAATTTCAAACCACCCTTCATTAATAAAGAATAGTTCAAACCTTTCTTATA +AATTGAATTTCAGGCACAACACCTTGCCCTTATACAAAATTAGATAAAAACAAACTCTTA +AACGTTAAAGAATGTTAAATCTAATTATGATGGGCAACAGGTAATAATTATTAATCTCGT +GAACGAAAAATCTTCACCATTCACACTCTGATTTTAATAGATAAACTCAGAAATCATAAC +GGTCTATCTATCGACAGAAAAAAAGAAATTCCTTGATGGGTTTTTAAAGGCATAAAGGCC +TCTAAAAATATCAAAAGGGACCCTGAAAAGAATCCCTTTGTTTTGACATTTTTGTCTGTC +ACCAAAGTTTATCCCTGGCGAGTTGCTACCCAGTAAAGCTCGCTTCCGTTTAACTTAATA +AATCAAAGGTAAGATAAAGAAAATGCTGAAGGATTTTTCTGTTAGCAAAATCACCAACTA +TTTTGTCCACATGAACAACTCGCGTTAAAAAAACCAACTATTTTGTCCATTACGCCCTAT +ATTGTCATATCCGCAGGCCCGGGCGATTACTTTGATCAGCTCATTTGTGGCGTCAAAAAA +ATTGAACAATTGCTTTGCCGATGACTCGATGATAAGCCTGCTTCTTAACTTTTCTTTTTG +GGTAGCCACGCCCACGGGGCAATTGTTGGTATGACAAGCACGCATTCCTAAACATCCAAT +GGCCTGCAAAGCAGCATTTGAAACAGCAACTGCATCAGCGCCAAGCATGAGCGCTTTGGC +AAAATCATCTGCAATTCGCAATCCTCCCGTGATAATTAACGAAACATTTTTTACGCCTGA +CTTGTCGAGATATCTTCTGGCCCTTGCCAATGCAGGGATAACAGGCACATTAATATTATC +CCTTAAAATAACGGGGGCCGCGCCTGTACCACCACTACGGCCATCAAGGATAATATAATC +AACACCTGTTTCGAGGGCAAAATCAATATCTTCCTCAATATGGCTGGCTGCCATTTTAAA +TCCGACCGGAACTCCGCCTGTAATTTCCCGGACATGGTTGGCCACCTTCCGGAAGTACCC +CGCCTTCTTCGCTGCAAATACCTGTGCCGGCCATTTCCGCCCCTTTGGCTAGAGCTATTT +TTTCCTCCTTTGAAAATGCACCAAAACTCATATCAGAAACAAATACCGGTTTCTCTAATA +CCAAGGGCCTTTTTGCCCCAGGCACTATTACTACCTGAGTGTTAACCCGTGTATCATCAA +ACAAAGGTTTTACGGCCAGTTGCGCCGGTAAAAATTGTATCATAATAATCATTTTACTAA +AGTTTCGCACAATCTAAAAAGCCTAAAAACATTGAAAAAATGAGTTAAAAAAAGTAGCAT +GATTGTTAGCTAATTTAATGAAAATCATTAAATTAGCAATGTTTTCCAAAACACAAACCA +TGCTACAGCACAAAGATATAAATAAGTTGTCAGAGTTGAAAAACGGATTTACCCAAAGCT +GGGTTGAACCGGATTTTATTTTTCGTTCTTTGAAATGTTTTTCATTTTCGAGCTTGAACA +AGGGGCTGTCACCACTTAAAGCAAAAGGGTACAGTTTTGAATGGGTAATGAGCCTGTTGA +TATCGCTACCATTTATGGGTATATCTTCAGTAAACAGGCTGGCCGGTGTGGTAGAGGCGA +AAAAAGATGTTTTTTACCGTCTTAAGAACAATTCTTCAATTTCCTGGCGATACATCCAGT +GGCTTTTTGCCTGCAAGTTCAATACCATTACTTCTGAAAGTACGGGTAACAATATCCAGC +CCAGGTGTTTGATTTTTGATGACACGGTCATAGAAAAGACAGGCCGGTTCATTGAGAAAG +TTTCCCGTGTGTGGGACCATGTTCAAAACCGCGGTGTGTTAGGGTTCAAACTCTTGGTAA +TGGGCTATTGGGACGGGACTTCCTTTTTGCCGCTTGATTTCAGTATTCACAGGGAAGTGG +GGAAGAACAAAGAATATCCCTACGGTTTACGGAAAAAGGATTACCGCAAACAGTTTAAGA +AAAAAAGGTCATCCCAGACACACGGTTACGATCGCTCAAAGGAAGCCGGACAAAGCAAGA +TCGACAACATGATAAAGATGTTCAAAAGGGCCCTTTCCCATGGGTTCAGCATTGATTATG +TTTTAGTTGACAGTTGGTTTACCTGTGAGGCAATCATCCAAGCGGTTACGCAAGTCAAAA +ACCAAACCGTCCATCTGATCGGCATGTATAAAATAGCCAGGACCTTATTTGAATATCAGG +GCACCAAACAAACCTACAGCCAGATAAGAAATCGTCTTGGGCGGCCTACACGCTGCCGGA +AGCTCAGGCTTTATTACCTACAGGCAACTGTTGGGTTCAAAGGCCATCAGCTCCAATTGT +TTTTCACCAGGCAGGGCAAAAACGGCAAATGGAAAGTACTGTTGACCACAGACTGCTCGA +TCGGTTTTATCCGTTTGGTTGAAATTTACCAGACACGCTGGACCATAGAAGTCTTCTTTA +AGGAATCGAAACAGTTGCTGGGGCTGGGACATTGTCAGTCGAATGATTTTGATGCCCATA +TTGCAGACTTAACCATTACCATGATCCAGCATATGCTTTTGACCTTAAGGTACCGCTACG +ACACTTACGAGTCAAAAGGGGCACTTTTTGAGAACGTTAAAGAAACAATAGCCATACGAA +AGCTCAATGAGCGGTTATGGGGGCTGTTTGTAGAACTCCTGCAGATACTGACTGATTTAT +TTGAAATAGTAGATGCAATGGAATTACTTGAACATATTATCACAAACGGCCAAGCCCTTG +AACGGTTAAAATTACTGTTTGACCTGGTGCCAGAAAATAACGAAGCAGCCTGACTGAAAA +TGGAAATTTTAAAACATTGAAAAATAATAAAATACAAAAATTTACTCCTGTTAACCTGTT +TTTTTACCGTGGGTTATGGAATGTGCGAAACATTAGTCATTTTAGTTTTGGATATTCAAA +CAAATTCAATGAAAAATACTTATTTACATCCGTAAAGGATGCCTGAAATTTCAGCCCCGA +TGATAAAGCAAGCTGTTCTATTTGTTCAGGCAAAAACTTATGTGAGTTCTCTGTATGGAT +GGATTCCCCCTTAACAATAAAAATGTCATCCCGAAAGTGTTTACTTGTTATTCTCATGTC +ATACAATGCTTTTAAATGCATTTCAACACGATTTTCATTTTGATTATAAAAAGCAAGATG +CCCAAAATCTGATGTTTTGAAATTAGTACCGGACACATCATTTACCACATTCAGGATATT +CTTGTTGAACTGCGCGGTAATCCCCTGTTTATCGTTATAGGCTTTATACAATATTTCAGG +GCCTTTTACCCTGTCCAACCCCAACAAAAGCCTGTCTCCCGGGTTCATAATGTTTTTCAG +GTTCCATAAAAAATCCGTTGCCTGATTACGGGTCATGTTGCCGAGCGTGCTTCCAAAAAA +GCAAATAAGCCGCGGTGTTGCTCCGGGTAACAAATCAAGGTGCTTCAAGAAATCAGCAAG +TAATCCATGAATCTTTAGCCCTTCATACCTGGATGATAATATTTCAGCCGATTTTATAAT +GGCCGATTCACTCACATCCACGGGAATATATTTTACATTACTCATTTTCTGTTCAGGAAA +ATTGTCAAAAAGAATGGATATTTTTGAACAATCACCGCTGCCCAGTTCGATGATTACCAG +GGATTCGGGGTTACCTAAAATTTCCTTTGCATGGGCAGAAAGAATTGATTTTTCTGTTCT +CGTGGGATAGTATTCCGGTAATTTGGTGATTTCTTCAAACAGAGCTGAGCCTTTTCTATC +GTAAAAGAAACGGCTGGAAATATATTTTTGTTTTGCGGTTAATCCTGAGAAAATTTCATG +GATTAAACTGCTGCGGGGTTCCTTTGACAGAAAATTTGTGATTTTAAAATTCGGTTTGAT +AATAACAGGGGCTTCAGCCTGACTCATAATTTAAATTTTAAGTATGATTAATTTCGATTT +GGATAATTTTCCGGTATTCCTGTTTCGACCGGGTTTGCCGGGTCAGCGCTCCACTCGAAC +CAGCCACCATCGTAAACAGAAACTTTTGGCCAGCCCATGAGCCAGGCATTGAACCAGGCT +TCACTGCCACGCCATCCGGTACCGCAATAAAAAGCCAGATGTTTGTCGGGTGTTATCCCG +TTAGATTTCCAAATATCTTCAATTTCATGATATTCGCGGGTGGTGTGGTCGAAATTGCGG +TAATTCTCCATGTGATAGGCGTCGCTGCCGCAATCGGCAAATATGGCGCCGGGGATGCGG +CCTTTGGGTTCAATATAATTGTAACCACTTACTTCGCCAATGTATTCCGGCCAGCTTCTT +ACACAAACCAGTTCGGCCCGGGACGATGCAAGCATTTCCTTAGCCTCCGGGGTGTCAACT +GCCAGTTCAGGATGGGCGGGAATAGTTGCTCCAAAATCTGCAACCGGCTTTTTGGGCTCA +TCTGCATAAGACACTTCATAACCGGCATCCAACCATGATTGGAATCCGCCATTCAGTACC +CTGACATCTTTTACTCCGGCATACATCATAATTAATGCTATGCGAATGGCCCCAATATCG +CCGGCAGCGCTTCCCGGAAATTCATCCGAATTATCAGGGAACATAAACTTGCCGTAAACG +ATGACTGTGGTATCGACTGTTATGCCGTGTTGCTCAAAAGTCTGTTTCAGTTCTTCAGGC +GAACGGCGGTTCCATGTTTCAGGGTCCTCGACAGCCAGCGTGTCCATGTCAATGGCACCG +GGAATGTGGCCGGACAGGTAGGCATCCCGGTTCCGGTAATGTGCATGAACAATAACATAT +TTGTCGTTTTCAAAATGTTGTGGTTTCCCACCGGAAATTAACTCATTTACCCAGGAGGCA +GGGACCAGATTTTGAAAACGGAAAAGTTTCTCCATCGGCAACGTTGTATCAGGGACCCAT +TCGTTAAGAAAGTTGAGATAGATACTCACCTTTTCAAACCCCGATTTTTTAAAACGATTT +GCAACAAGGCGAAAGCCTTCGTCAGGGTATCCGTAAATAACAATTTCGTTTTCCGGGAGG +ATTTTCTTCCGGTGTACCATTTCAATCCAATCTACATACTTTGTCCAGGCAAAAGGCAGA +CTTTTTGCCCCCCGGATGTGCCCTCCGCGGGATTCACCCCGGGATGCCCAGCCATTGTAT +GCATCCACAGGGCGGATGTCTATTAATTGAACGTCCTTATTATTTAATCTTTCTGTTAGT +TCCCTGGTAGAAAGCTGTTTAAAAAACTGAGGCATAGCATAATCCTTTCTTTAAAACAAA +ATTGATTAATCTTTTACTACCTGAACACCCTTCCAAAAGGCCACCCGTCCCTTAATACCT +TTTGCCAGATCACTGGGTTCGGGATAGTACCATACTGCATCGGGATTGGCTTTACCGTTT +ACTTCGAGCGAATAATAAGAAGCGGTTCCTTTCCAGGGACACACTGTATGCGTTTCACTG +GTTTTTAAATATTCTTTTTTTACAGAGTTAATGGGGAAATATGCATTTCCTTCAATTTTC +ACGATGTCGTTGCTTTCGGCAAGGATCGTATTATTCCATATAGCTTTCATTTTTTTTAAT +TTTATATTTTAAACTTGTTATTTTATGATTGGTGAACTTAAATCAACAGATTAGACGGAG +TTATTTCAAATCCTGACATCGCTTATTGATTTTTTGAGAAACAATTATAACAAAATATTT +CCAAAAATTTGGTAATACAAATTTTAAAACACAATTAAAGAATGTGAAATGATTTAGGTT +TTTTACTGTATTCGTATGCATTTGCTTAGTTTTTATAAGCTGTTTAAAATTCACAACATT +TAAGACTCTCAGTTTAAAGGAATTTAAAACTTTCACCCTTTCTGTTCTGGTTCAACAAAA +TTTGTCACCTCATTTAAATATCCGTGTACCGAATTTAATTCCGGAATTCCTTTAAGCGTT +ATATTGTAATCCCTTCCCTCTTTATGTATGATGGTCTTTTTAATTGCACCGTCATGTATC +AATTGATTAAGATTAATGTCTTTAATTTCGGGTATTATTCCTGCAAGATTCTTTCCGGTT +AATTCATCAGAACCAAAGATATCTTTGGCTGCCTGGTTTGCTTCATTAATAATACTGTCT +GAATTAATTCTAAAAAGGGGAGTTGGATTATACCGGGCAAAAAGTGCCAGACTTTTAAGG +AGTTCATCACGGTTTCCTGAACCGGAAAAAATTCTTACGGGGCGCCAGCGGGTCTCCTTT +TTACATATCGGGCATTCTTCCACTTCGTGGCCTATAGGCACATGGATATGAGTTTTGTTG +CAGCTTTTACATTTTAATATGGCATCGATGTGCGATACGGAACCGGTCCTAAGTGCACTA +TTACTTAATACCGATGAATAGGTTGATACCAAAAATGGGACTATGAAAGTAAGCAAAACC +CTTCCTAAGCTTAGATAATTTGTAGAAAAATGGAAGATAAAATCCGGATTATTGATCAGG +TTCAAAATGATGCCTACCAGAATAGCTACCCGTATTGCACGTATCAGGGTTTTCTTTTCA +AATGCCAACAATAAATAGTACTTAATCATTTATGATAGCTTATTTTAAATATCGAGTAGC +TTACCTGTCTGTGGGGCGAAGGGTATGGGGCTGTTTCGAATAAAAACATATCATTTGAAT +TTTACCAGGGAAGCTGGTTCTATTCAAAACTTAAAATCATGAAACTTTTGGTCCGTTGTA +ATTCATTTTACAACCAGGCCTTTTTTATACCAATAGTACATCAGTTACAATTTCCAATAA +AGCAGTCCCCTTATAATCCAGAAATTCCTTCATTTTAGTTTTCAAATCCTTTCTTTTGGA +AACCTTTATCCCCAAGGCTCCGCAACCTCTGGCGTATTCAGCAAAATCAGGATTATGCAT +ATCCGTGGCAAATTTTTTAAAGCCGCCTGAACGCTGTTCTTTGGTAATTTTTCCAAGTTC +CGAATTGTTTAACAAAACAAGCTTAATATTCATGTTATATTTAACAGAGGTGGCAAGCTC +GGCCATGTATTGTGCAAATCCCCCATCACCCGCGACGGCTACTATCTGGCGTTTCCCTCG +CGTTGCAGCCCAGGCTCCCAATGCAGCGGGAAATGCAAATCCAATGGAACCGAGATAACC +TGACATGAGAAAACTTTGGTTCTTACTTTCAAAATATCTTCCCAGGGAATAGGCATTGTT +ACCTACATCAACACAAACAACAGCCTCCGGATGAATCAGCTTGGATAAATTATCGAAAAC +AGCTATTGAACTTATCCCTTTCCCACGATCCTCCAACAACCGTTTTTGTTTTTCCGTACG +CCAGATTTTCCAGCGTTGCGCTAATTCGGTCCTCTGGTCAACTGTATTTGGCTTAACCGG +TAATCGCTTCATCAAAATGTTGACTGTAGTGGACAACTCACCCCAAACCGGGACATCAAT +TTTATGAAATTTACTCAATGCCAGCGGGTCAAAATCAACTTGTATAATGGGCTTTTTCGG +TGTGATTCCGGTATGGTTACTGAAAGAAGCACCCAGTACAATGAGTAAATCGCTTTCGTT +CATGAACCAGGAAGCAACCGGCGTACCGCTTCGACCAAGAACGCCTGCGGCCAGCGGATG +GTCATCTGGGATTAACCCTTTTCCTTTAAAAGTTGTCAAAACGGGGGCGTTTAGATGTTC +GGCAAATGAAATTATTTGTTCCATAACGAACCGGGCCCCATGACCAACAATAACTACCGG +ACGTTTTGATTTTGTGATAAAATCTACAGCTTTATCCATACTTCCGGCAGCTGGTGAAAT +TTGCAATTCTCCCATACGTCCTTCCGGGGTTTGGGATTCTTCTTTACCCTCAAGTATTTC +CTGAATTTCATCAGGAAAAGTGAGATGAGAAACGTCCCTTTTTAAGAGGGCATGCTTAAT +GGCCAAGCTCATCAATTCTGCATGTTTACTATCTTTTTGAACCCGGTGGTTAAAAGCTGC +CACGGTTTGAAATGCCCTAACGAGGTCAACTTCCTGAAAATTACCTGTGCCTACCACCTG +TGTTGCCACCTGGCCGGTCAGGGCCAGGATTGGGCTGCGATCAACCTTTGCATCCCACAA +ACCTGTAAACATATTGGTTGCCCCCGGTCCGGCAATTGAAAAACAAGCCGCAGGTTTACC +TTTTAATTTACCATAAGCGCTGGCTGCAAATGCACCGGCTCCCTCATGTCTGATGGCTAT +ATATTTCAGCTTCCCTTTTTCTTCCTGTCTTCTGAATGCATCAGCAAGTCCTAAATTTGA +ATGCCCGACCATTCCGAAAACTGTATCAACCCCCCAGTGGACCATCGTTTCTACCATAAT +ATCCGAAAGCGTCGTTTTATGCGGTTTTTTGGGAGGAATACCTACAAAAATCCCGTTGCC +CCGTTCTTCTACCCGATAGGTCGCAACCCCATCATCAAATCCGGGGGCTTTTCCGGTACA +GGGATGATAATCCCATCCATGCCAGGGACAACGTAAAATACCGTTTTCAATACTTCCTTC +ACCCAGGGGACCTCCCTGGTGGGGACAATGGTTATCCAATGCACAGATTTTTCCTTCAAA +ATGAGACAGTGCAATTTGCTTGCTTCCGGCGTTGACAGTAATAATCCTGTTTTCAGGCAG +GTCTTCTTTTTTTTCAAGAACTTTATGCCATTCTAAATTCTCTTTCATCTGTTGATGTTT +TATTGTTTACAAGTTTAAAAACCAGTCGATACTTTCTTTTATGTAAGTGTTAATAATTTC +CAAAGCTTTCGCCGTAGAATTATCAGCCAAGACATGCAAGGGGAGAACGGTGCCATTCCA +TTGTTCTCCGGTTACCCATCTGCCAAAATTTAATTTGCTTAGATTATTATAGATTATTAT +AGAAGAATTCTTATATCCGCTCAAATAATAGTATGGTTCCCCTATCATTGAATCTGCCAT +GGCCCATCCAAATCCAAATCCCAGCGAATCTGTAACCTGAGCATAAACGCCCGTGTCAAA +ATGATGCGGCCATATCCGTATTTCACTTTCCGACTGAAGATAACCTAACACAGATAAGCA +GGCAAAATTGGCAAGTCCCCGGTAATATATCCATTGTTTTATACCAAAGGATGATATTCT +GTTGGATTTTATGATGGAAAAACCGTAATCAGGTATTTCGAAATGTAACGGCTTCGATAT +ATCCTTGCTGCTCATCCCCATTGAAACCGGGAATTCCCGGATTTCTTTTTCGATGGATGA +CCCCTCTTTGTCAAAAACAGAGATGGATGTTTTTACTGATAATGGATTGTCCAGCCATTC +AAAACTAAGGGTTTTCAGGTTTAAAGCCAGGATGAATTTACCTTTTGGAGTGTTAATCCA +CCTGCCAAACAAACGTTTACCTGCAGCATCCAAAAAAAGATTAGTATGGCTGTCATCTTT +TTTTGCCGGAACGTAGTCCCGGTTTACTTTTGCAATGGCCTGGCTGAGCCAATGAATCTG +CCGGTCGGTTTTAATATATTCCTGCATAATTTATCCCTGCTATTGTCTATGATTTCTAAT +CGGGCAATGAATGGTAATTGTTTAATTTTATGCAAATCACAAATTTATTTTGTAATGGCT +ATAAATGGATTTTTAAAGACTCTGATTGGTTTAATTTTATGAAAAAGCCAGTTTTAACCT +TTTAAAAATATTCGGGGCATCTTTGTTCTTTTTATAAACCACAAGCCCAACAATACTTCC +AAAGGCAATATGGGCAAGATAAACCGTAGTGAGAAACTGGTATCCGTCTTTAAAATGCAA +TCCGAACACCCCGATACCTAAAGAGGTTGTAGCCGGACTCACCATAAATCCTGTCCCGAT +AAGTAAAGCGTAAACAATACCCATCCAGATTTTGCCACGGCCAATAATCAAACTGAAGAT +GATGCCAAAGGCCGCGCCATTCCAAAAGTGATAGGACCACCCGGCTACGTTTGACCAAAA +GTCAGGGCCTGAGGCAAAGCGGTTGAGCAGCAATACCCCGAGTAATTTGGGCATATCACC +CGGCATTCCGCCGAGGTGGAAACCAATCTCCCTGACAATTTCCAAGCCAACGGTACCAGC +CAGTCCGGCCAAAATCCCGTTTAAAATTTGTTTTCGTAAATCATTATATCCGAGTGCCTG +AGACAAGATTGCCAAAATAAAAATAAGCACAACGGATGGAATTAATAACACAACGGCCAG +GGATGACATACTTGCAATACCAGACTGTGCAGCCGGAAAAATGTTTGGGGAAATCCCAGC +CAGCAAAAGCACGAACGCAAGCAATACTAATTTCTTCTTTTCCTGATTCATAACAGCATG +TTTAAAAGTTTTAAAACAATTTATGAGGGTAGTCTAAAAAGCCTTCAAACTGCTGATTTT +TATGCTTTTTAGACCCGTCTCATTTATATTAGATCGTAAAAACGGCCGGATGGCATTTTG +CACACCCGGCCATTATAAATGGTTTACCGGGTAACCATTTTGAGCATCATCCCTAACTTT +TTATCCAGCATTTGGCCAATCATTTTGACCTTTGCATTTTTAAAAGACCCAAAAAGGTCG +GACGGTTTAAAGTAACTTACATAGGTCTTGCCGTTTTCTTCGTAAACATTGACCCTGACG +GGGATTACCAGTCCGGCAGCCGGATCTGCAGAAAACGCTTTGTTCCCTACATTGGGATTA +CCAATAAAGAATGAATGTGACTTAACGCTCAAACCGGTCATAGATAATATCTTACCTTGG +TCCAGTTGCGACATGACCATCATACCGCCGCCAGAGACAGCCTTTTTAATGGCATTAATG +GTTTGCTCAAAATTTTTAGAGGATTGAACCGTAACCGGTTTAACGGATTGGGCCTGTATG +TTTGTGGAAAAACTTAAAAGGCCTGCGACTAAAACTGCTACAAACAGTGTTGAAAGTCTT +GATTTACTCTTTTTCATAAAATTACTTTTTTTAGTTAAAAATTAAAACTGTTCACTTCCA +AAAGAAAAAGGTGCAATTGGTTGATGAACTTAATGTCATCATAATAGACGGAGCCTGTTC +CCAATCCTGACATCGCTCATTGTTTTTTTTTGAAATTATTTTCAGCCACCATCGTTTCAG +CACACAAAAGAGGCTTACCTCTTTGAGGCAGCCTCTAATCTGAATCCCATGGAATTCTTC +GAATCGTTCTAAACGTAATACTTTTTACTTTGCCATTTTTTCGGAGTTTGTCAAAGCCTC +GGCCAGTTGTTTATTACTTCGGGCAAGGCTGATATGACTAATCAGGGATTTGTTTTTGGC +ATCGAAAAAATACACTACTCCGGTGCTTTTAAATTTGGACATTGTTTTATAGAGGTCCAG +TTTTTTTAACGTTTCGGCTGACTTTTGTTTGGTCTGTTTGTTAGATAAGTTGTTAAAAAC +AAACCGGACAGCACCATCCTGATTGTTATGCATCAAGACAGACATGGCTCTTTTACCATT +CTTCTGACATACTGGGCACCAGCTGGCTTTGTTTACCACGGCTATTACCTTTGACCCTGA +CTGGGCCATTGCTGAAGCTGACACAAGAAGTACCAGCATTAAACTTATGACAATTGATTT +TGAAGTTTTCATTTTACCAAAAAATTTAAAGATTAATATTTAGAATCGTCGGGAAAATAA +ACAATACCTGACACCTAAATTTAATTTTAACATTTTTTTAGTACAAATTTTACGAATAAC +AATTAGTATCTACCTGTTTTTGAGGTTATTTAATTTCAAATCAACTTAGGTATTCCTGGC +AAAAAAAGTTCAGCAAAACTTATATTTAAAAAGTATTGCAAAAAATTAAACTTAACGGAC +GGGGAGAAAATACCTCAGTTTTCTAAGCGAAATAAGAAAGGCAAATTAAAACATGCTCCA +TTTCTTTTACAGGCAAGAAATTCTAAAATGCTTTATTTTAATTCATTCAGTTTCTGTTGG +ATTGATTTTTTTATTTCTTCAAGGTCTTCTTTTTTTATTTTAGATTGGTTTAAATGCGAA +ATGCCTTTATCCAGAAAAATACTTTGTTTTTCATAGTTTGTGCAGGCATCGCACATCATT +TTATGAGCTTTCAATTGTACTTTCTCCTTCAGGCTTAGTTTGAAGTATAATTTCTTCTCA +ATAAGCTCAGTTGCTTTCAAACAAGAAAGAAAGAGTATGTTCATTAGCTTTTTCATCATC +GTCTAACTCCTAAACCAGTTTTGATCAACACAATCCCGTAATTTAATTTTAGCCCTGTGA +ATGATTTGCCAGAAATTAGTCGGATTTAATCCCAATTCCTGACAAATTATCTCCCCCTTT +TTCTCCGATAAGTATTTCATTTTAACACAAGTACTCCATTTTTCCGGTAGCGATTCAAGG +CATTTTTGCAACACTTGCTGAAACGCACTGTCATCCAGAAGCTGTTTCTCTTCTTGATGC +CAATCTTTCGGCTCTTTTTCTTTTTTCCAATCTCCCCTTTCGTTAAAGTAAGTAGAAAAC +ACCTGACTATCAAAACTTACAGGCTGTTTCACTTTATTTCGGTAAACGTCTATGATTTTA +TGGTTGAGAATGGAAAACAACCAGGTTTTAGGAGAACTTTCTCCTTTGAAAGCATCCATT +TTTTCAGCCGCTGCCAAAAAGGTGTCCTGAACCAGGTCGCGAGCCAATTCAGCATCTGAT +ACTTTATACATAGCCCAGGAAACCATTTCTTCTGTGTAGGTTTCTACCAGATAAGTCAAG +TCAGTTGTTTTGGTCTGGGGCATTTATTTTTAATCTATTGACGTCCGCTAAATTTTAAAA +TGATAAAATTAAGATATTTTCCGCTCTCCTAAATTTCAGCTATAGTCAAATAGTTTCACC +TAACTTTAAAGTACCAAATCATTTTTCCACCTTTTCAACCTCCTCAATTAAAAAGCAAAA +GGCCATCCGCAATAAGCAGACAGCCTTCAATCTATTATCTCTGAAAAAATTCTATTCGTC +TTTAATAGCGGCAACGCCCGGCAGGGTTTTGCCTTCCATGTATTCCAACATGGCACCACC +ACCGGTAGAAACATAACTTACTTTATCCTGGAGGTTGTACTTGTTGATAGCAGCAACAGA +ATCACCGCCACCGACCAAAGAAAAAGCTCCTTTTTCAGTTGCGCGGACAATGGCCTGGGC +CACATCAACCGTACCTTCAGCAAAGGCATCCATTTCGAAAACCCCCATCGGGCCGTTCCA +CAAAATAGTAGCTGAATTTTCAATCACCTGACGGAACGTTTCGCTGGTTTCCACACCGAT +ATCCAGTCCCATCCAGCCATCCGGAATTTCACCGGAAGGTCTGCATTTCTGGTTGGCATC +ATTAGCAAATTTGTCAGCTGTCACAGCATCCTTTGGAATGAAAAGACTTACTCCTAGCTT +GTCAGCTTTTTCGCGGGCTGCGTTGGCAGTTTCAATCAAATCATCTTCAACCAGTGAAGA +ACCTACTTTTCCACCGTCACCTTTGATAAAGGTAAACATCATACCGCCGCCAATCAGCAG +GTTATCCACTTTGTCAAGCAGATGATTGATGATTTCAATCTTTCCGGAAACCTTGGCACC +ACCCAAAATAGCCGTAAAAGGTCTTTCTGCATGATGCAGGACTTTATCGAGACTCTTCAC +TTCATTTTCCATCAGGTAGCCAAACATCTTGTCATTCGGGAAGAATTTGGCTATGATGGT +AGTAGAAGCATGAGCCCTGTGAGCCGTTCCAAAGGCATCGTTTACATAAACATCAGCCAG +GTCGGCAAGCTTTTTGGCGAAAGCTTCATCACCGGCAGTTTCTTCTTTGTAAAAACGCAG +ATTTTCCAACAACAGGACATCACCGCCTTTTAATGCTGCTGCTTTCTCACGAGCTTCATC +TCCAATGCAATCGTTAGCGAACTGCACCTCTGTTTGAAGTTTTTCTGAAAGATTTTTTAC +AACATGACGTAATGAGAATTTATCTTCGGGGCCTGATTTTGGCCGGCCCAAATGTGCCAT +AAGTATAACAGCACCGCCACTTTCACGCAGCTTTTGTATGGTTGGAATGGTCGCCCGGAT +ACGGGTGTCATCTGTGATCTCAAATTGGTCATTTAAGGGCACATTAAAGTCCACACGGAC +GATGACTTTCTTTCCCTTAAAGTCGTAATTATCAATAGTTTTCATGCTAAATTAATTTAA +TTGGATTAACGTGTTACTTCATGCAAATCTAAAAAGATTCGATGTACGAGAATCCTAATG +ACTCTTAAAAAAGTATTTTTGTATCACAAACACAGCTTATATGGAATCATTTAACGGAAG +CGTCCAATCGAAATTACCGCAGACAGAGACATCCATTTTTGCGGTCATGTCACAACTGGC +CAACGACCACAAAGCCATCAATTTATCGCAGGGTTTCCCCGATTTTCCCATCTCGGAAGA +ATTGATTGACCTGGTGCATTATTACATGCGAAAAGGTTACAATCAGTACGCACCAATGCA +GGGCGTTTTGCCGCTTCGGAAAGCCATTTCGACTATGTTTCAGAAAAATTACGGCATCCA +TTACGATCCGGTTTCTGAAATTAACGTTACGGCAGGAGCCACGCAGGCACTGTTTTCTGC +AATTTCCGCTTTCATCAAAGACGGGGATGAGGCTATCATTTTTGAACCAGCATACGACTC +TTATGCTCCGGCGGTGAAAATTAACGGCGGTATGGTAAAATATGCGCACCTGGAATTTCC +GGATTTTAATATCAACTGGGAAGATTTCCCGCGCCTGATAACCAACCGTACCAAGCTGAT +TATCATCAATACACCGCAAAACCCGACCGGAAGTGTCCTCAGTGAAGATGACCTGCAACG +GCTGGAAAGAATAACATCCGGAACCGACATCATTGTTCTTAGCGACGAAGTTTACGAGCA +TCTGATTTTCGACGGCATCACCCATCAGAGTGTTTGCCGATTCCCGGAACTAGCCAAACG +CACTTTGGTGATTGGTTCCTTTGGGAAAACCTTTCACGCGACGGGCTGGAAGACTGGTTT +TGTGCTGGCTCCTGAACGCCTGATGAAGGAGTTCCGGAAAGTGCACCAATTCACTGTTTT +TGCTTCCAATACACCCATCCAGCATGCCATTGCGGACTTTATCGGGAACGAAGACAATTA +CAAGAACCTGGGTAAATTTTATCAGCAAAAGCGGGATATGTTTGTCAAATCCTTGAATGG +TTCAAAATTTAATGTTTTACCTTGCTACGGAACCTATTTCCAATTGCTTGATTACAGCAA +TATCTCCGATAAAAACGAAATGGATTTTGCCCGCTGGCTGGTGGAAAAACACAACATTGC +CGCTATTCCTATTGCGCCTTTTTATCACAAAAAAGACGACCACAAAGTGCTGCGTTTCTG +CTTTGCCAAAAAGGATGAAACGTTGGTGGAAGCAGGTGAAATACTGTCAAAAATCTAAAA +ATATACATCATGAATGATTTGAAAATATTATACATCCAGTCGCGGTTGGCCTGGGAAGAT +GCCGAAACCAACCGGAAGCATTTTGAGGAAATCATTCAGAAAGAAGCACAGCACCACGAT +TTGATTGTGCTGCCCGAAACATTCACCACAGGATTTCCAGTGGACCCGGTACCCTTTGCC +GAAACCGAAGACGGGGAAAGCGTTCTCTGGATGCGTGAAATGGCGGCTCAGACATGCGCC +GTTGTCACGGGAAGCATGTTACTGAAAAACGATGGAGTTTATACCAATTCTCTCATCTGG +ATGCGGCCCGATGGCACATACGAACGGTATAACAAACGTCATGTATTTCGCATGGGCGGG +GAGCATGAAAAGATTCACCCGGGCGATAAAATACTTTTGGTAGAACTGAAAGGCTGGAAA +ATCCGCCCCATGGTTTGCTACGATTTGCGTTTTCCCGTTTGGACCAAAAATCACTATGAA +AAGGATGCTTTCGAATACGATCTGGCTTTGTTTGTCGCCAACTGGCCAGCCGTCAGAGCC +TATCCGTGGGACCAGCTTCTGATCGCCCGCGCCATCGAAAATGAAGCATACGTTCTTGGT +GTAAACCGCATCGGAAAAGACGGATTGGGCAATGATTACAACGGCCATTCCAAAGTGGTG +GATGCAAAAGGAAATGTAATTTCGGAAGCCCCGGAAAATGAAGAAGCTGCTATTTCCGTG +AAGCTTTCTTACGAAGCACTGCAAAAGTTCCGCGCCAAGTTCAATGTGGGGCAGGATTGG +GATTCTTTTACGATTCAGAAATAAAGAACCTTACAAGTTCAGGGAAGAATTGTCATTCTG +AACGAAGTGAAGAATCTGCAATAATTGATAAGTATTAATTGATGAAACCCTTCGTTTCAC +TTAGGGTGACAAAAAAGTTTTACAAAAAAACCAAATAAACAAAGATGAAAAGACTTGGAA +AACATTTTGCCGTGTTGATTCTTGCCGCCGGTTATTCCGGGCGGATGGGAATGCCCAAAG +CCTTTTTGCCCTATGATGCCAACCGGACTTTTCTGGAGAAAATTGTCTCAGAATATCTGG +AATTTGGTTGCAATCTGGTTGGAGTTGTATTGAATGAAGAAGGCATGAAGTTGTATGAAA +AAATGCAATTGGAACACAAAAACAACATCACCGCCATTCTCAATCCAGCGCCTGAAAAAG +AACGTTTTTTCTCTCTGCAAACCGGGTTGAAAAGGCTGAAATCAGAAGGAGCGGTTTTCC +TTCACAACGTTGACAATCCATTTCTAACACAGGACATTCTACAAGCTTTGGCCTCAGCAT +TCAAAACACAAGCTTATGTTGTTCCAACTTATCATGAAGAAGGCGGACATCCCATTTTGC +TTTCGCAGGAAATCGTAAAAGCGTTGATAGAAACATCTGATTACGAACAGAATTTGAGGG +TTTTTATGGAATCTTATGACCAGATTCAGGTTCCGGTAAGCGATCCAAACGTCTTGGCAA +ATATTAATTCTCCTCAGGAATACGAAAGACTATTTGGAAGATCTTTTTAATAGTTTAAGC +ATGACAGGCTGAACGCATGTTTTTCTGTCATTTCGAACCTTTGGGTAGCCAGTGTGTTGG +CGTGAGAAACCTCCCGGGCAGCAGGTAGAAGCTTAGGGGATTTCTCAGTCGTCCCTCCTT +CGAAATGACAGGGTCTTTCTCTGGGAAACAATCCCAAAAAACTTTCCTTAATTTTGCACC +CTAATTTAAAATCTCATGGACAAAGAAAAAAAACAAAAGCGCTATCAGCGTTTATACAAA +CAAATACAGGATTTAATTGTCAAAAGCAGCAACAATCCCCTTTCCAACATGGCCACCATC +AATGCAGTATTGTATCATAAAATGGAAACTTTCTTCTGGTGCGGCTTCTATCTTTACCAG +GATGGAAAACTGCAGGTTGGTCCTTATCAGGGTTCGCTGGCCTGCATCAACCTGGCAGAA +GGCACGGGTGTTTGCCAGGCTGCACTGACACAACAAAAAACGCTGACCGTTCCTGATGTG +GAGGCCTTTCCCGGACACATCGCCTGCGATTCACGTTCCAAATCAGAAATTGTTATCCCT +GTAAGAAACCGGGAGAACGAACTCGTTGGCGTGCTGGATGTGGACAGTAAAGAACACAAC +TCCTTTGATGAGGTTGATGAGGCTGAACTGGAGAAGATTGTAAGGCTTGTTTATTTCCCG +GAAGGCTAATCTAAAAAGTGTCTTAAATGAACTAAAGTGAGCTTAAGTGACTAAAGTTAA +GAGAGAACCCCAACTTTACAGACTTTATGAACTTAAGTCACTTTATGAACTCATCTCAAC +CGGTTTACACAAATGATCCCCGGTATTGCTTACCCGGACACATTTCCGGCAAATAAAACG +GGGTTCATTGACCATTAAAATAAGCTCACCCAGTTGTTCTTTTATATCGTCTTTGCTTAA +ATCGCAAAATGTTTTTTTCTTTTTCATGACCTGATGCTTTTTCTGATCGGGTGACTAAAG +TACGAAACTTGTTTTATCATCCGTAACTTTAGGCATTTTAGGCACTCTATGAACTTACTT +AATCGCAGGAAAAATATAATCCGCCAGCAGGGCATACTGCTTCACTTTCCGGTTGTAATT +GGCTCCGGTAAGCACAACGACCGTGTTAATTTCAGGTAGTACAATGATTTGCTGGCCACC +CCAGCCGTTGGCGGAAAACCAGTGAACAGCCTTTCCATTAATCTTTTCATTCTTTGTCCA +CCAGTTGTAGGCATAACCCATGTCGCGGAGGTCTTCGCCCGGAAGATCGATACCCCGGTT +GTTTCCCCAGGGCTTTCTGCTTTTTTCAACCCAGTCTTCAGAAACAATGCGTTTACCATT +CCAGATACCATTGTTCAGCATCATGGCTCCGATTTTTATCATATCCCGAGGCCTGATTTT +CAAGCTGCCGGCGGCATGGACCTCGCCCGTAGGATATTTTAGCGCCCAGCTGGCTGTTTT +TATTCCCATAGGTTCAAACAGATACTTTTGAGAGAATTCATCGAAGGCCATCCCCGAAAC +ATTTTTCACAATTTCACCTAGTACTTCTACCCCGCCTCCCGAATAATTGAATCGTTGTCC +GGGTACGGCCACAAAAGGCCGGTTTAGAACAAAATCAAGAGGTCCCTTTTTATGAAAATA +AATGCCGACCTGGTCGTTCCTTTTACTGCTTAGCGGGGCACTCCATTCTTGCCAGAGCAA +ACCACTGCTACAAGTCAGCAGATTTTCAACGGTGATATACTTTTTGTCACCCACATTCAG +ATATTGATACTTTTCTGGCAGGTAATCAAAAATGGATTGATGAATATCTTTAATAAGCCC +TTTGTCAACGGCAATACCGACGCACAAAGAGGTTATGCTTTTGCTGACGGAATGGGCACT +GTGGGCCCTATCGGCATCCCAGACAACCATGGGGCCGTAATGTTTTTTAGCCTCCCATTT +GTAGTCGTGCCCCTTAAAATATTCATCAAGAACCAGTTTTCCGTCTTTGTAAATGAGCAC +CGAATGAACCTCACCGTATTTACCGCGTTCGACTCTGTTCATGGCTTTTTTTATGAGCAC +TGTGTCAATGCCTACTTCAGATAAGGTTCCTGTTTTCAGACCATCAGTGGTTTCTGCAGG +TTGATGATACGAGTACTGGCCTTGTGTGCAGCCCATAGAAATGAAAAGCACCAGCATGGC +TGCAAGTTGAAAGAGTTTGAGTGTTTGCTTAGTCATGATCGTAAACTTTAGTCGTGAGTC +AGAAAATACTTTTTGGGTTTGGAAAGGAATGTTTTTCCTTGTTTTTTCAAATATAAGAAA +TAATTTTAAGAAACACAATGTGAGGGTATTGGAAAGGAATGGGATAAGTGTCTTAAGTGA +ACTAAAGTGAGCTTAAGTGGCTAAAATTAGAGAACCCAAACTTTGCAGACTTTACGAACT +CAGCTCTTCTTGAGAGCCATAAGAACTTGTGACGGAAAAGTATTCTGACTGATTTCAACA +TCTTTGACAATCTCAAAACCAGCTTTTATCAAAAAATGTTTAAAAGACACCGGCCGGCAT +CCGGTTAAAAGTGCGGGAAATTTCCTGGCTACCCAAAACCAAAAGCGGTGGACTTTTTTT +GTCCCAAAAGAAAAGGTGGACATGACAACTTTTCCTCCCGGCTTCAACACCCTGAAAAAT +TCTGTAGCAACTTTATCAAAACAATCAACCGGCAGCAAATCAACCATGTAACTGTTTATC +AGCAAATCCTGTGAATTATCAGGAAAAGGCAAATCCAGCGCACTACCTTGCTTTAAATTA +AAATTCAGATGGCCGGTTTTTGATAATTTCTTGCGGGCTTTGGCTATCATATCCGGCGAA +AGATCAATACCTGAGTTTTGCCCGTCAGGATTCAATTTCACTACCTTTTCCAGCATCTCA +CCTGTCCCGCAAGCCACATCCAAAACAGAAATTCCATTTTTTATTCCTGCAAATTCCAGA +ACTTTTTTTGCGGCTTTCCCTTCGGTAAGTTTTCCCCAGAAATCATATATCCAGAGAACT +TTTTTGAATTCATTTATGGTTTTATTTTCGTTTAAATAGCGTTCTAATATTTCCATTTGT +TGCATTATTTGGCGGCTGCGATTCCATCACAACCAACTTAGTCGTTTTCCAAAAGTCGGA +AATAATTTTCAAACAACTGCAAATGGATAAAATTGGCATGGAACGGCACGCGTTCGGAAA +CACGCGACAGTATTTTGGTAAGAAAATAAGTGCCTTAAGTGAACTAACCTGCCAGCCGGT +AGGCTGGAGTGAGCTTAAGTGAGTAAATAATCCACCTTCTAAGAAGGTGGCTTTGATTTG +CCCCCACAGGGGGCTTTAAAAGTCAAACCTTTGTTGTTGGGTTTCTATTTTTTTCTCTTC +TTCCTCCTGATATTTTACATATCTTTTGATTACATCTTCATCTATTCCTACCGTACTAAC +AAAATACCCACGTGCCCAAAAATGATTTCCCCAATAGGGCTTCTCCTTTAATGATGGGTA +ACTCTTAAATAATTTGATGGCCAATTTCCCTTTCAATATCCCCATTAAACGGGACACTGA +TACCTTGGGCGGGACGGATACAACCAGATGAACATGATCAATTTGAACATTCAATTCCAT +TGGCTCACATCCTTGCCACTCACATAACATTTTTATATCATTTGTGACAATGTCTTTGAT +TTGACCCGTCAAAACCCGAAATCGATATTTGGGTACCCATACTATGTGATAATCACACTT +ATAAACTACATGTGATAACTTCTTATATTTGCTCATGGTACAAATATAAGGCAAAGCATC +AGTACATTACCACCGTCTAAGACGGTGGGTTTTCAAGTTGAAGTAAAGTTAAGAGAGACC +CCCAACTTTACGGACTTTATTAACTTAAGTCACTTTATTAACTTCTCAGTATCCAAAAGT +CTCTACTTCAAAAAAAGACTGCGGATGCAGGCAGGCGGGGCACTTCTTTGGAGCCTTTTT +ACCTTCATGCAGATAACCGCAGTTGCGGCATTTCCAAACTACCACACCGTCTCTTTGAAA +GACTTTGCCGGCTTCCAGGTTTTTATAAAGTTTTGAATACCGTGCTTCGTGTGCCTTTTC +AACGGTGGCAATCATTTTGAAAGCTGCAGCCACTTCTTTAAAACCTTCTTCTTCTGCAAC +CCGCGCAAACTCAGGATAAAGTTCTGTCCATTCCTCATGCTCGCCTTCTGCTGCTGCCCT +CAGGTTTTCCAATGTGGTACCGATAATTCCGGCCGGATAAGCGGCAACAATCTCTGTGGG +ACCACCTTCCAGAAATTTAAAGAACCTTTTGGCATGCTCTTTTTCATTCAGCGAAGTCTC +TGTAAAAAGCGCTGAAATCTGCTCCAACCCTTCCTTTTTAGCCTGACTGGCAAAATAATC +ATAGCGCATTCTTGCCTGGGATTCCCCGGCAAAAGCTTTCAAAAGATTTTGTTCCGTTCT +GGTTCCTTTTAATGATTTCATGGTTAATTTTTTTAGGTTATCGTTTATTATCCAATGGGT +TCGAAATCTTCTTTTTCGGCACCGCAATCGGGGCAAACCCAATCGTCGGGAAGATCTTCA +AACGCCGTTCCCGGAGGAATATTTTGTTCCGGATCCCCCACAGCCGGATCATAAATATGG +CCGCAGACAATGCATTCCCATTTCTGCATGGACTTGCCTTTGGGCTTTTCAGCCGGCGTT +TCTTCTGCTTTTTCCTCTTTCTTTTCTTTTTCAGGCAACAAGGACTTATCCACGTAAGTA +GGAGAGTTCTTGGGCGAAAGTCCGTGGCGCACCTGACGGTAGTAGGTATAGGTCAGGGAT +TCTTTGTTTTCATCGAGATATTCGCCATTAAGCACCCTGCCAATGAAAATGATGTGCGTG +CCAACATCCACTTTCTGCTCCACCTTGCATTCAAACCAGGCAACGCATTCTTCCGTTACC +ACCGGGATACCATTATTCGTTTCAAAATACTTTGTCCCCTCGAACTTATCCAGCGTTTTT +CCGCTCTTATATCCGAAACGGTTGATGATTTCCTTGGAAGCGTCTTTTTCGAGCACAGAG +AGGCTGAAATAGCCGCTTTCGTCAATCATTTGTTCACTCAGGTTGTCTTTGTTGCAACTG +ATGGCTATTTGTTCTGGAGAAGCCGTCACCTGAAAAGCCGTGTTGGCGACATACCCGTTT +TTTTTGCCTTTGCTGGCCGTTCCGATAATATATAATCCGTACGAAAGGGAATAAAAAGCT +TCAATATTCATGTCATTCTTATTTAATGTGTTACAAGTGTTGTTTGAGGTCTTAAAGGTC +CTAGAAGCTCTGTTTGCAGGCCTTTTCACCTACAAAGATAAAGGAAAAGTGTCTAAAGTG +TACTTAAGTTAATAAAGTGACTAAAGTTATTTTCTAACGACAGGGAACAACGTTTTAGTG +AAAAGGCACGCGTTTGGAAACGTGCGCCAGTTAATGGTTAATGGTTTTAAAAAGAAGGGA +CGCAAAATTTTGCGTCCCTGCATCTTCGGTCTTCTGACTTCGGTCTTCCGTCTAATCTTC +CGTCTTCACCAATTCCGGTTTCTTCTTCCGGTCACGGATTTTAATAATCGTAACGTAGAA +CAACGGGATGAAGAAAATGGCAAGCAGCGTGGCTGTTAACATACCACCGAATACGGTGTA +ACCGATATTGATACGGGCATTGGCGCCGGCACCTGTGGCAATCATCAACGGAATGACACC +CAAATCGAATGCCAGTGAGGTCATCAAAATCGGACGGATACGCAGTTTGGCTGCTTCCAC +CGCAGCTTGCACCAGTGGCACACCGCTCTCGTATTTCATCTTACAGTATTCCACAATCAA +AATGGCGTTCTTCGCCGCAAGGCCAATAAGCGTGATGATACCGATTTGCGCATAAATACT +GCTGGATAACCCTCCTATATGCAGTGACAAGTAAGCTCCGAAAATTCCGATAGGAACCGC +CAGCAACACAGCAAACGGCACCGCAAAACTTTCGTAGAGTGCTGACAGGAAAAGGAACAC +AAACACTATAGCCAGTATGAAAATGAAAGTACTGGTTTTGCCGCCTTCAATTTCTTGCAG +CGTGGTTCCGGAAAACTCATACGAATAGCCTTCCGGCAATACCTGTTGTGCCACCTGTCT +CAAAACTTTAATAACGTCTCCCGAGCTGTAGCCCGGCGCAGCATTTCCACTGATATTCAC +ATTCCTGTCCATGTTGTAGTGCGTAATGACAGGTGCAGAAGTACCTCGTGTAATATTAAC +TAAAGCACTAACAGGCACCATGTTACCCTGATTATTCCGGACGTAGTATTCCGACAAATC +GTTGATGTTGGCGCGATAGCTGGAGTCGGCTTGGGCGTAGACACGGAATGTTCTGTTAAA +CAGCGTAAAATCATTGATGTAATAACCACCGAGGAAAGTCTGAATGGTATTATCCAGTGC +ACTCAATGAGACACCCATTTTTTTGGCTTTTACCCGGTCGATGGTCACGCGGATATCCGG +ATAATTAAACGTGGCCGTTGAATATGCCATCTGTATTTCGGGCCGCTTGTTTAATGCTGC +CAGGAATTTGTTTTGAACTTTTTCCAAATCCTGAATACTTCCGGTACTTTGTTTCAGGAC +GAATGAAAAACCGTTGGTCCTTCCAAGCCCCCGGATCGGAGGTGGAGCAATAATCATAAA +TTTTGCACCTTTAATTTGAGATGAGGCTCCCATCAATTGCCCGATGACGGCCTGAATGGA +ACTATTTTTCCCCTTTCGTTTGCTCCAGTTGGTGAGGCGGATAAACGCTGTTCCGAAGTT +GGACATCTGAGCACCCTGCAGCACACTAAAACCGGGAGCCAGAAAATAACGTTTTACGTT +CTTATTGTGATTTAGAATTTTACCGAATTGGTTCAGCACCTTAACCGTCCGCTGGGTTGA +TGCATCCGGCGGAAGCTGGGCAATAGCCATCACCATGCCCTGGTCTTCGTTGGGCAGGAA +TGAAGTGGAAGTATATTTTGAAAATAATCCGGTTCCAATATAAATTGCTCCCAGTAAGAT +AAACATCAAAGGGGCATGCTTGATAGTCTTTCTAACGGTTGCTCCATAGTTTTCAACTGT +TTTGTCAAACCAAATATTGAATTTATAGAACATCCGGTTCAGGCCTTTACTGTTTTCGTT +TACCGGATTGGGACGCATCATAATTGAACAAAGTGCCGGCGTCAGTGTCAGCGCTACAAA +AGCAGAAAGCAAAACGGAAATAGCAATCGTAAATGCAAACTGTTTGTAAAGCATTCCGGT +AATGCCGGGCATAAACGTTACCGGGATAAACACAGCGGTGAGGATTAAAGCAATGGCAAC +AACAGGGCCACCCACTTCACTCATGGCACGGATGGCTGCTTCCTTCGCCGTAAGGCCATA +GCGGTCAATATTATGCTGCACGGCTTCTACCACCACGATGGCATCATCCACCACAATACC +AATGGCGAGCACCATTGCAAACAATGTCAATGTATTGATTGTGAATCCAATCAGTGTAAA +TATCGCAAAGGTTCCGATGATAGATACCGGAACGGCCAACATGGTAATCAATGTCGGGCG +CCAGGTTTGAAGGAAAAAGAACACCACGATAATCACAAGCAGCAACACTTCAAAAAGCGT +TTTTACCACCTCATCAATAGAAGACTGCACAAAACGAGTGTTATCAACCATGGTTGTCCA +GGCCACATCGGTAGGAAAATTTTTGGATAGCTGTTCCATTTTAGCTTTTACCAAATCGGC +AGTTTCCAATGCATTTCCTCCGGGCGTTTGGTACACGGCCAAACCACAACCTACTTTTCC +ATTTAATCGTGGGGTTCCGGCATAGCTTGATGAACCCAGTTTCACACGAGCAATATCTTT +CAGGCGAATCACAGAACCTGTTGCAGGGTTGGTTCCCACCACAATATTTCCGAATTCTTT +AGCCGTCACAAGTCTTCCTTTCACCTGCACAGTAACCTGAAATGTCTGTCCCTTTGGTGC +CGGAGAAGCGCCCACCGAACCGGCAGGTATCATCGCGTTTTGTTCTCTGACTGCATTTAT +AACATCCTGAGTGGTAAGATGTAAGTTAGCCATTTTCTGTGGATTTAACCACACGCGCAT +GGCAAAGCTGTTACCGAAAACATGCACGTCACCAACACCATCAACGCGGGACAATGCGTT +TTGTATGTATAAAGCAGCATAGTTACTCAGGAATTTCTCATCATGCGTACCGTGCGGTGA +ATAAAGCCCGACGATTTCCAGCATACTGGTGGAAGCTTTTTTCACAGAGAGTCCTGTCTG +CCTGATTTCTGCAGGCAAAATCGGCGTTGCAAGGTTGACACGGTTCTGTACTTCCATGGC +ATCCACATTCACATCCGTGCCGATTTTAAAAGTAACGTTCAAATTAAAACTACCACTGTT +GGCACTTGTAGAGTTCATGTAAAGCATGCCCGTAGCGCCGTTCACCGAGTTTTCAACCGG +TGTGGCCACCGCCTTTTCAACATCCGAAGCATTGGCACCGGTATAATGCCCCGAAACGCT +TACTACCGGAGGCGTTACTTTCGGAAGCTGGGCAACAGGTAGCTTGAAAATGGAAATAAT +CCCTACGATGATGATGAACAGCGAGATGACAATAGCTGTTATAGGTCTGTTAATAAATAT +TTTTGAAAACATTTGTTCCTGTTTTATTCTTTTTGAATCGTTTTACTCAGGCTTGGATTG +TTTTGCCATTCCTCCGGTTTTCATAGGAACCGTTTTGACTTTTGCTCCTGGTCTGACAGA +TTCTATTCCTTCAACAATAACTTTAGTTCCCGGCTTTAATCCGCTTTTGATGACCTGCAT +ATTACCGTACTCTGCGCCCAGCTCCACTTTTTGCTGAGATACAATTCCTTTACTATTCAC +TGTATACACAAAGAACTCATTCAACAACTGTGTGACAGCCTGTTGGGGAATCACAACAAC +TTTACCGGCGCTGTTTTGCGTGGAGCGAACCACACAGTTCATACCTGATTTTAGCAGATC +TTCCGGATTGGGAAATTTTAACCGAACCATTAAGGTTCCTGTGGTGGGATCCACCCGGTT +ATCAACAGCATACAACTTGCCCTTGTAAGGATACAATTTTCCATTAGGAAGTACCAGGCG +GAAATGCGAAAGCTTTTCTTTGATGCTCTTATCGCTGGAACCCAATTGCAAATAGTCATT +TTCCGTAATGAAAAAGTCAGCATTCATATTGCTGTTGTCCACGATGGTTACCAAGGGTGT +TTGATAGGCTACCACTACATCGCCAAGACGAACTTTCGAAACATCGGTTGAACCGGAAAA +TGGTGCTCTTACAGTGGCATGATCCAGATTGGTTTTGGCACTTTCCAGGTTAGCTTTGGC +AGCGATAACAGAAGCTTTTGCTACGTTTACCTTGGCGATAGCATGGTCGAGCTGGATTTT +ATCTACTGCATTGTGAGCCCACAAATTTTTATAACGACGGGCATCCGTAGTATCTGTAGC +CCAGTTTGTTTCAGCAATATTCAGTTGGGCTGCAGCCTGATCGTAAGCAGCCTGGTATAA +ACTTTTATTGATGGTATAAAGCGGCTGGCCTTTCTTCACAGAGCCGCCTTCCTTAAACAA +AATATCTTCCACCCTTCCGTTTACTTCCGATTTCATAGTAATTTGCCTGTTGGCGACAAG +TGTAGCGGAATATTGATCCTGTATCGGAGCAATTTTGTCTTGAGCAGTCGTCACCACCAC +GGTTGCCATTCTGCCGTACATGCCTCTTTTCTTGTTTGTTCCGCACGAAACCAGTAGAAA +TGAGAGAGCTATTGCTGATATTATTGTTATTCTTGATTGAATCTTTTTCATTTGTGATTC +CATTAAATTGTTTTTAAGAAGTCCGAAGACCGAAGTCGGAAGACCGAAGAGGTCTAAAGC +CTGGAGTTCGGAAAATATTATTCATTATTTAATTTTTTCTCTAAAAGCAACCATCATATT +CATCAAATTATAAGCAAACTCATATTGCTTTTTAAATTCATCTTCAGTGATGTAATTTCT +TCTTTTAGCTTTATGCAAGCAAGAAACAACTTCCGCCAGCGACCTAATAGCATAGGACAT +AAACTTTTTGAATTCCAGATTAGATTGTCCGATAGAACCTTCAGAAATATTTAAAGCAAT +TGAATCGACAGCCCTTCTTATCTGGGATGAAAGATTATACACTTCATCCTTTGGGAATTT +ATGAGCTATAGAATTTATTTCCTCTCCAAATTCCATGGCTTTCTGCCAAATAATTAATTT +TTCAAACTTAAATTCCATCTCTCATTAAACCTTATAGTTTTATTCGTCCCTTCTTCCGAC +TTCGGTCTTCCGACTTCCGACTTCTGACTTCCGACTTCTGACTTCCGTCTTCCGTCTTCC +GTCTTCCGTCTTCCGTCTTCCGTCTTCCGTCTTCCGTCTTCCGTCTTCCGTCTTCCATCT +CTAATACTTTAATTTCCCCAGACTCTTATCCAAATCAACCTTATCGACCAGCGCCTGATA +CAACGCATTGATGTAATTATCCTGCGCCTGAAGTAGCGTCGTTTCTGCATTTAAAACTTC +GATGAGTGGCTGCACACCATTATCATACTGGTATTTCAGATTATCATAATTCAGTTTGGC +CAATTTTGTATTTTCTTTTTGTGTTTTGAGATTGGCGATATCTGATTTGTATTGCCTGAA +GTAGTTTCCATATTCAAGCTTGATGTTGTTTTCCAAATCACTGATGTTATTCTTGGAGAT +TTGAATGTTCATCTTTGCAGCCTGATATTGATAATGCTTATTAAATCCCGAAAAAATCGG +AATACTCAGTTGCAATCCAACGAAAGATGTGGGATAAAGTTTATCCTTAAATAACCCGGG +AAAAGTATTGCTGTTATAAGGCGCTTCCAGTGTGTAAAATGCAGACAATGTTGGAAAATA +GCTTCTAATAACATTGCTTTTCAACAGTTTGGTTGCTGCCAGCGTAGTTTGTGCCTGTTG +GAATTCGACTCTGTTTTTATAAAACATCGGATTGTTCTCCGGCAAGATTTCCGTAAGCAT +AGCATCGCTGATGGTTCCTTTGATTACCAAAGAATCTTTTATGGGCATTCCCATGTGATA +TTTCAAAGTCTGAATCAGGGTATTAAGATTTCTCGAAGCATTGGCACGAGCGGTAACGTC +GTTGTTCACCAACACCTGTATCCTATCCACATCTACTTTTTGCGCCAGCCCGTTTTTATA +CTGGTTTTTGGTATCTGCCAAAGACTTGGTATCACGTTTGATATTCGCATTCAGCAAATT +CAGCTGTTCGCGGAATACCAGTACACCATAAAAAGCCTTCTTGACATTGGCGGCAACATC +AATTTTGGACAGTTGGGTGTTCTCTTTTGAAAGATTTTCGGACAAATGAGCAGCCTTAGC +GCTTCCTAATACAGAAGGATCAAACAAGGTTTGATCTACATTGAGATAACCTTGCAATTG +ATGAGGAACACCAAATAAAACAGGATTTCCCTGAATAAAAGAAACCTGCCTTTTGATAGT +ATATTGATATTTAGCTCCTGCACTCACCTGAGGCATCAGCTTGCTGTATGCTTCTTTGAC +ATTTTCCCGGCTGATTTTCTCAGACAATATTTTATTTTTTACACTAGCCTGATTCTGCAA +CGCGTAATGAATACAATCATTCAGATTGAATTGATACACTGCTTTTTGTCCTTGGGCCTT +TAAGCTCACGGAAAATCCGGAAAGCAAAAAAGAAACAGCTAAAAAAGCACTGAAAAATTT +ATTCTGATTTAACTTTTTAAGTAGCATTGAACTTCCTTTATTGTTTTTGACCATTTAGGG +GCTTCATTTTGTAATTGGTTTACCCTACAACTAAAAATGAAATGCAAAATAAACGAAATT +TAGTTTACCCCACAACTATTCCTATTAAGAAATCTTAAACTTTTTAACATACATTAATTT +TCCAAACCCGTTGCCGGTATTGGTTTTCAATAATATGTAAGAAGATCTATACATCAAATC +CTTTTCAATTAATCAATCTTACAAAACTACTTGTGGGGGATTAAAGCCGAAATGGCTTCA +GACTTCCATCTTTAAAAATTACAAAAGAAATTGAATCCTTTTTAGACAGATATTTGTTTA +AAATGTATATTAATAAAGGCTTACGCCTGATTTTTAATCACTTAAGACAGCACCTTTCTG +TAATACACAAATGCACTGAGCCAGCTTATCAGCATAATAATTACAACCCCCACGCCCAGC +ACACTGAAATCCAGGTGATTTAAAAAACTGAAAAAAGACAGACTTTCTTTCGCCTCAATA +GACACAACCTGCAACCACTCAATCGTTCCGATGGCAAGTGCCACAAAAACACTGGTTCCT +GTAATGACCATATTAAAAAACACTTTTCTTACGGCATCAACCATAGCCCAGTCATAAATT +CTCATCATGATTAAACCGTCCAGCGAATCCATCAGGCTCATACCTGCAGCAAATAAAAGC +GGAAACGCCAGGATACCCCAAATGGGTAGTTGCGAATCTTTCGCCACCGTGGCCGAAATT +CCAAGAATGGCCACTTCTGTAGCTGTGTCAAAACCCAGTCCAAAAAGAAATCCCAAGGGA +TACATTTTGAAACTTTTGTCAATGGAACGATAAGCAAACCTAAAAAATCTGTTTAACAAT +CCTCTCTTATTCAGTAAATTTTCAGTTATTTCTTCAATTCTTTTTTCTGCACCTTCACCT +TTCTTGTACAGTTTAAACATGCGATAAAGGTTTTTAAGAATGAAAAAATTGATGATTCCG +ATCAATGTCAGAAAACTGGCAGATACCACAGTACCAAGAACACCACCGATATTTTCCAAA +AAGTGAATGTTCGAAGAAAATTTTCTGAAGGCAATAATAATTGCTAAGGACAATAGTAAA +ACCACAGTTGAATGTCCCAGTGAAAAAAACAATCCTACCGATACAGGTTTCTTTCCATCC +TGCCTCAGTTTGCGGGTTACATTATCAATGGCGGCAATATGGTCAGCGTCAAAAGCATGA +CGCAATCCAAAAAAATAAGCCAAAGGCCCAAGACTTAAAAGCGGTAGTGAGAATTTTGAA +ACCACAATTAGAAAAGCCCATGCTGATAAGTTAAAAAGCACCAGAAAAGTTATCAGGGAA +TATGTTCTTTTCTTTGCAGACTCCATTGCCTTTCGATTATTGTTGCGGTTTTCTTAAAAA +GTTAAAAATAACAAAATTATATAAAATTAACAGGTAGAAATCATTTGGTCTAATTTAGAA +ACTGTTAAAATATTCTATCGATGCATTAATTGATATTATAAAAAACCAGAAAATCTTCTC +CTGAAGAAAAATATTATTTTTCGGAAAATTGCCTGATAATTTTATAACTTTGGAAACTGG +AAGAAATACCCCAGCGGGTAAAAGATACCACAGGCAATAGGTTATTGCCCAGGTCAAAAG +AAAGTAGATAGATCTAAATACAAGTACTATGGATGCAGGTGAGGAAAAGGCGTCTCCCAA +AAAAACAATTCTGATTGCTGAAGATGATGAAACCAGTTTCTTTTTTCTGAAATTTGTGCT +GGCGAAAGAAAACGTGAATATTCTTTATGCACAATCAGGACAGGAAGCTGTTGATATCTG +CGAGGCTCATCCGGAAATTGATCTGATTCTAATGGATATTAAAATGGCCGGCATGAGCGG +GATTGAAGCAACACAGCTTATAAAAAAACGAAATCCGCGGGTTCCGGTTATTGCACAAAC +GGCTTTTGCGCTCAGCAGCGATAAAGAAAATATCTTAAAAGCCGGATGCGATGACTACAT +AACCAAGCCCATTCGCAAAGAAGAACTTCTGGAAAAAGTAAACTTCTTCCTCTATTCTAA +AAAGGAATCATAACACGATTTTCTCTTTTTTTTCTTTAAGCCTGCTATCTACTAATCAGC +AATAAACATGCTGGTTTTTAAGCATTTTCATTGAAATGGAAAGCGCTGATACCGCAGGAA +TTTTTACTATTTTTGCCAACAAATTTTTTCAATATGCTTGCCATTTCTATTGTTATTCTG +TTGATTTTATCCTTTCAGGTTATGCCTGTCGCATTAGGTATTGATAGTCATAAAATGAAT +GTGTTTCATGTGGCATCCAGCATATTTCTGCTTATTTTGGGCCAAGTGTTGCTTTTCCTG +CTGGGTATCCTCCTGGGCGATAAGTTTATGTATTTGATGAGTGGGTTTAAACGTTTTGTT +TTATTCATTGGGTTTTTCATCATCGCCACGAGAATGATTATGGAGGCGCTTGAAATCAGG +AAGGGAAAACGCACTTACCTGCTTGACAAGGCGAAACAGTTTATTTTACCTTCCATTGCT +CAGGCCATCAACACTTTTTTAGCTGGAATCTTGTTTCAACTTCTAATTTTCAATCTATCG +AAGGATCTGATTTATCTCGGCATCTTCGCACTGGCATTCTCAGTTCCTTTTATTTTCATT +AAAAATGAAAAACAATCTATGCTTGCGGTTTCCCTGCTCTATATGGTCGGCGGAGGTATC +CTGAGCATTCTCTCATTTTACTTTTTATTTATTTGATTTTCAATTTCTTTCCATGAAAAA +GCATTTAAACATAAACGAACCAACACGCATCCGGATAACCAAAGAATTTAAGTTCGAAAT +GGCACATGCCTTAAAAGGATATGATGGTCTCTGCCGAAATATTCACGGCCATTCTTATGA +ACTAATGGTGACCGTTTCCGGTTTCCCCATCGAAGAAGAAAATCATCCGAAACTAGGCAT +GGTAATGGATTTCGGAGATTTGAAAAAGATTGTCAAAGAAGAAATTGTAGGTCAGTTTGA +CCATGCTTTGGTACTTTCCAAAAAGATGCCCGTTCCTTTAGTGGATGAATTGAAAAATCA +ATTTGAGCGCATCATTCTTACCGATTACAATCCCACCAGCGAGATGATGCTCATTGACTT +CGCCGCCCGGTTAAAGGCACGACTTCCTGAAAATATTACACTGAAACATATGTTGCTCCG +GGAAACGGTAACGTCTTACGCAGAATGGTTTGCTGAAGACCAGGATTAAGAATTCACTAA +AAAAAGGTAAGTGGCCCTGGTCAATTAAGACCATCCTTTTCTCAATGATGTTTTAAACAT +TCTGCAAATAGGTTTATTAAAAGATCAAAAACCACATTTACCGGGAAACATTTATTTAGC +TATTTTTATGGCGCTTAAAAATTGTGGAAGTTATGTATAAGAAATTATTACTGTTAAGCC +TGACCTTTTTCATGTTCACAAGTTTTACAAATATTGCTTCAGCCCAAAGCAATAATATTG +AAATTGCCCGGCTGAAACAAATTCATAAATTACTGGACTACAGATTTGTAGGAGGTTTTT +ACGGTTTTGAAAAACTCTTTTTTCAAACTGTTTCTTATCCCGACGAGGCACGGCAAAACT +GTACGTTGGGAATTATGATTGCCTCTTTCACTGTCAATTGTGATGGCGATTTAGTAGGTA +TCAGGATCAGAAATTCTTTGGGTAAGCCACTGGACAACCAAGTCAGTAAATTTCTTAAAG +CAACAAAAGGGCATTGGAATCCTTGTCAGGATAAAAAGTTTACGCATTTTGAAATTCCTA +TTCAGTTTACACTGAAAGGAACAGAAACAGACTCCACAGCTGCTGCCCTTGTTTATGTGG +GCAAGAGTGCGGGTTATTCATGTTACCCGGATAGTTATTACAGTGTTCCTAGGCTGTTTC +CTGGTGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTTGAGTATTCTATAGTCTC +>NODE_39_length_1472_cov_66.0355_ID_77 +GGCCTGGGCGATCTTGTCGCCGTGCTCGTCGGTGCCGGTCTGCATACGGACCTGGCGGCC +GGCCAGGCGCTGGTAGCGGCAGGCCACGTCGGCCACGATGGTGGTGTAGGCGTGGCCCAG +GTGTGGTTCGGCGTTGACGTAGTAGATGGGTGTGGTGAGGTAGAACGTCTCGGACATGGT +GTGTCAGGATCCCTTCTTTTTCCTGCGTCGATTGCGCGAGCGGGACTTGCCGCCCGAGTT +GGTCTGCTGCTGGCCCTTGCCCTGCTGCTGGTTGCGCTGCTGGCGCTGTCCGCCGCGGGG +CTTGGGTGGGGCCTGGGGTTTGGCCAGAGGCTCCAGTTGCTCCAGCTCCTCGGGGGTACC +GGTGAACTCGCGGCCGTCGCTCAGGATCACGGTAAGCTGGCGCTTGAGCACGTTCTGGCG +GATGACCTTGCCCTCCAGGCCGCTGTTGAGGCTGACCCTTTTCCCCAGCTTGGGCATGCC +CTGCTTGAGGCCCTTGTAGGTCTCGAACTCGTAGGTGAGACAGCACATGAGCCGGCCGCA +GAGCCCGCTGATCTTGGTGGGGTTGAGCGAGAGGTTCTGCTCCTTGGCCATCTTGACCGA +GACCGGTTCGAAGTCGCGCAGGAAGGTGGCGCAGCACAACTCGCGGCCGCAGGAGCCCAG +CCCGCCCAGGAGCTTGGCCTCGTGGCGCACGCCGATCTGGCGCATCTCGATGCGGGTTCG +GAAGCGGCTGACCAGGTCGCGCACCAGCTCGCGGAAGTCCTGGCGGCCCTCGGCGGTGAA +GTAGAACATGAGCTTGCTGCGGTCGAAGAAGCACTCCACCTTGACCAGGTTCATGTCCAT +GCGGCGGGCGGCGATGCGCTCCTGGCAGTAGCGGAAGGCCTCCTTCTCCAGCTTGGCGTT +CTCCACCAGCTGCCGGAGGTCTTCCTCGGTGGCCAGGCGGTAGACCTGCTTGAGCTGGGC +GGGCTCGTCGCCGGAGTCCTCGCAGCCCTCGCAGCAGCGGTTCTTGGGGGCCAGCTCGGG +CAGCACGTGGCTCACCGGCGGGCGCACCACCTCGCCCAGGGCCTGGCCCTGCTCGGTCTC +CACGATGACCATGTCGCCCACCGAGAGCACGAAGTGGCCGGCGTCGAAGTCGTATACCTT +GCCGCCTTTCTTGAAGCGGATACCTACGATCTTGCCCATGAGCGTCCCTTGGCGGGTGTG +GTGCCGTCTCGCGGCGGCCTGCCGGTCGTCGGCCCGGTTCCCCTGGCGCACGCCCCGGTG +GGGCGGCGGTTATATTCTACAGGCGGATTGCACGCTTGGCATCCCCTGCCCCTGCCGCGC +GCGGGGTGCGGAGGCTATTTGAGGCATGGTAGTGCAAAACCGGCCGCGGCACAACCCGGG +CCTGCGGGTTTCGCATTGGCGCCGGGTCTGCATCGGGGGGCGCACTTGGTTTTTCTGACA +AGCTGCCCGACCCCCTACGGGAGCCGAAGCCG +>NODE_3_length_39215_cov_155.142_ID_5 +GAGACTATAGAATACTCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCACCAGG +AAACAGCCTAGGAACACCAGCACCGCCAAGGCCTATCTGGACGAACACTTCCAGCACAAA +CTGACATTGGAAACGCTGGCCGGGGTCGCTCACCTGAGTGTGCGCCAGCTTAACGAACTG +TTCCGGCGCCAGATCGGCATGACGCCCCATCACTACCTGACCGAAGTGCGAATGCAACAG +GCCTGGCAATTACTGGAAGGGACCGATCTGTCCGTCCAGGCGGTGGCGGAGCGGGTGGGT +TACAGCTCGCTGGCCGCCTTCAGCGACCGTTTCCACCAGCACTTTGGCCATCCACCCAGC +CATTTCCGCCGAACCGGCAAAACACTCCGCCAGAATCGGTAAGACACGCACCGTTCCACC +CCCTAGACTGCCATCCGGAATGAGTCATCCGGGAGCCAGCCATGTCATCGCCTGTCAGAA +CTGCCACTTTCATTGGTGCGATTTCCGTTGTCCTCTGGGGCACGCTGGCCCTGTTGACCA +AGCTCACCGGTGGCCGGATTCCGCCTTTCCAGCTGATGAGCATGACCTTTGGCATCGCGT +TCCTGTTGATGGCCGTACGCTGGTGGAGCCGCGGCGAGTCCGGACTTGGCTATATCCGCC +AGCCTTTCCCGGCCTGGCTTCTGGGTGTCGGCGGCCTGTTTGGCTATCACCTGGCGTATT +TCAAGGCCATGACGCTGGCCCCGGCGGTGGACGTCAGCCTGATCGCCTACCTCTGGCCTC +TGTTCATCGTCCTGCTGTCCGCCCTGCTGCCCGGGCATTCGCTGCGCGCACAGCATCTGG +TCGGCGCGGTCCTGGCACTGGCCGGCTGCTGGCTACTGGTGGGCCGAAACAGCCAGGGCT +TTGATTGGACCTACGCTGACGGCTACCTGGTCGCTTTCGGCTGCTCGCTGATCTGGTCGA +GCTATTCAGTGCTGAGCCGCCTGGTGCGTTCGGTGCCGACCGATGCCGTAGGCTGGTTCT +GCGGCGTCACGGCGCTGCTGGCGCTGGGCTGCCACTTGCTTTGGGAGACCACGGTCTGGC +CCGTGGGCACCCTGCAGTGGCTGGGCGTGATCGGCCTTGGTCTGGGTCCGGTAGGGATCG +CTTTCTTTACCTGGGATCACGGGGTGAAATACGGCAACCTCCCGCTGCTTGGCACACTGG +CGTACAGCGCACCGCTGATCTCGGTGGTGCTGCTGCTTCTGGCGGGCTTTGGCCAGGCCA +GTGGAATGCTGTTCCTGGCGAGTGCACTGATTGTGGCAGGGTCGTTTGTGGCTGGCCGGG +CAAAGCATGCATCACCCGAGCTGGCAGAAGAACCGGTGCCCGAGTAGAGACGGGCTCAAG +GCGCGGTTAATCCGACGCGCAACAGTCGTCACCTTGCTGGACAGGCGGGCAGGGGTTGGT +GCCAAAAGAACAGAAGACACAGCAATCGCCGGCCCTCGGCTTCAGTAACGTATGGCAGGA +CTCACACTCATAGAAATACTGGCAGGCGTCCGTCGGCATTTGTTCGGTTTTTTGATGGCC +GCAGGCCGGACAGATGATCGTGGATTCCAGCTGGATGTCTTTCATTGCCGCCTACCTTAC +CAGGTGCGCCCTGTGTGGCCCTGAAAGATTCAGATGCCATGGTTGGCATGGGCGATAAGT +CCAGCTCCCGGGCCTTTCTCCATCAATCCAACCATTGCTCCTTCTGAATGACAACCCTGT +AGCCGTCAACGTCTTCAAAGGTTTTACCCAGTCTATCCCAGTAGGGGTTGAATGACGGCT +TACAGACAAACCCGGCCTTTACCATCGCCTCGCAGGTCCGCACCCATTCCACCGCATCGG +GGATATAAAAAGCCAGCAGATGATCCTGTGTCGGGGCCCGGCCTGCCTTCTCGTGCTGAC +ACTGTGTGAACTCAAGATGGTAGGCGTGCGATCTCAGCCCCAACATAACGCCATCAAAGC +CATCATGCTCCCTGAACTCTCCCAACCGCTCAAAGCCGAGGCCTTCCATATACATCTGAG +CAATCTTTTCCAGTTGATCGGTGGGTCTGGCCACCCTTAGAACAGTGCCTTTATTCATGG +GGAACTCGCAGTATTGGCTTTTGTCGGAGCGGACGTTCTCTAATTCCCCGAATAGCCGGG +AAAAGACGCAATATAGTCCCTGCCCGCAAATTTAACCCAAAGGTTGCCATTTCGGTATTT +GGCAATGAAGACCATATGCCCAAAACTCAATCCTGAACCACAGCCCGCCCAAGACCTGCC +TTCCGGAGGCGATTGCACCTCTCACCGCATGCTGTGGTCGAACCCGGCATTTAAAATCAT +CGGCAATACGTCCTGTTCCGTCAAACCGGCGCCCCCATTTATTCCAAGCTTTATTCCACT +CTCGCCCTGTTGGTTCGCGCTGCTGGATTGAACCTCTGAGTTTTACCAGATCAATCCACC +GAAAGAATCCCCGGCCGAAACGGCGTTAGACTGTTCAGTACATGGCTGTCTGAATCAGAG +GAGCCTGTATGAGTCATACATCAAGCGATCATTCGATTGCCTTTCCCCTGCGCCAAAAGT +CAAAGCTACGCCATCTCGCTCAGGAGCTTCTCGCTAAAGCGGATATCAGAATTGACGGAG +ACCGCCCCTGGGACATGCGTATCCTCAAGGAGGGCGTGCTTGAGCGCATCCTCGGGGAAG +GCAGTCTCGGCCTTGGCGAGAGCTACATGGACGGCGAGTGGGACGCCGAGCGCGTGGATG +AGTTTGTCTATCACCTGATACGGGCGCAACTGGATCGTGAGGTGCGCCCCTGGAACCTGA +TCCTGCACGGTTTGCGCTATCGCCTGTTCAACATGCAGAGCCTCAGGCGAGCCTGGATGA +TCGGGCAACGCCACTATGACCTCGGTAATGACCTCTATGAGGCGATGCTGGATCCACTGA +TGACCTACTCATGCGGTTACTGGAAGACCGCCACCCATCTGGCAGCCGCACAGGAAGCCA +AGCTGGAACTGATCTGCCGGAAACTGCAGTTGAAACCCGGGATGCGCCTGCTGGATATCG +GTTGCGGCTGGGGCAGCCTCATGGCGTATGCGGCGCAGCACTATGGCGTCGAGTGTGTCG +GCGTCACCGTTTCGGAGGAGCAGGTGAAGTGGGCGAGGCACCAGTACAAAGGGTTACCGG +TCGAGTTCCGCCTGCAAGATTACCGAACGCTCGACGAGCAATTTGACTGCATCGCCAGTG +TCGGTATGTTCGAGCACGTCGGGCACAAGAACTATCGGGAGTTCATGCAGGTGGCCCACC +GCTGCCTGGATGATGGGGGCCTGTTTCTGTTGCACAGTATTGGCAATAACAGCCGCGACT +CCGGTAGCGACCCCTGGATCGACAAATACATCTTTCCCAACGGCGAACTCCCATCCGTGG +GACAGATCGGCGATGCCGCGGACGATCTGTTCGTGATCGAGGATCTGCACAATTTCGGCG +CCGACTACGACAAAACCCTGATGGCCTGGCACGCAAACTTTGAAGCCGCCTGGCCCAAAC +TCGCCTACCTGGGCGAACGCTTCCGGCGCATGTGGACGTACTACCTGCTTTCCTGTGCCG +GCACATTCAGAGCGAGGGACATCCAGCTGTGGCAGTGGGTACTCTCCAAGCGCGGCGTCC +AGTCCGGGTATATTCGTCCGTACTTCTGAACCGGGCCGCATTTATCAAGTGTCCCGTCTG +ATCTGGCGGTAGTATGATTAAAAACGTACACCGCCATTACCTCGGGCACCGACGCCTTCA +AGTTCAGCGTTTTTGCATCAGCAGCCCTTACTGAACTCGCCGGATCAAGACAAGCTCTTA +CCGGGAGACCATCCAATGCAAACGCTCAATATCCCCAGCCTTGCCACAGTCACCGCCATT +ACACTTTTGACCACCGCCTGCGCAAACAATCCAACGCCGGAAACACTTCAATCCAGGGGA +TTGCACCCTTTGGATACGACCCAGCTTCATCAGCTCTATAGCAAAACGCTTCAGTTCGAT +TGGCGAAACGCTCGCAGCCGGTCAGGTTCGGGCGAATACCAACCCAATGGTGAGATTTCC +ATCGAATGGAGCGGCGAATCCTTTAATGGCAAATGGCGAATCCTGAATAACCATTTCTGC +GCCACCTATGCTTCGATTCATAACGGGCAGGAACAGTGTTACATGGTCTACCAGACAGGC +GCCAGGAGATACGTGGCGTTCCTGAACGGCGATTATTCCTATAGTTTCAATGTCAAGAAG +GTTAAATAGCTGGTGGTTTAGCGTTTACAGGGGATTTCTCTGAGACTCAACCAAGCATAT +TTAAAGCACCGGCCGGGCACGCCGCCATAAAACACGCATCGGGACAGGGGGAGGGGACCT +AACAGGCTCTGCCCCGCCCACACTACTTCTTGATGAAGTCCCCAGAACCCGCTGGCGATA +ACGTCTCTGTAGGATATCGATAGATGTTGGGCCCGTTACGAAGTTAAAAAATCTCTGGCC +TTAGCAATGTAAGAGACTTCTCTACCCGGGGCCGACAATTCCGCTTCAGATAAGCCTAGA +TATCGACCTTTAATATCGAAAACCGATTTTATTGCTGCTCCAAAATTTAGAGTAGCTACA +CTGGCTACAATTTCTGGAGTGGAAGATATAAGGGTTGATAGTATCCCTTGACGAAATTTC +TTATGTTGGATTTCCATATACTTTCTGTATTCATATAAAAGATGCTCTAGCTCCTCCTGT +ATTTCTCTCGGCGATTGACCAGCAGACGATCTATCTTTTAACCAAATTCTAAGAGCGCGC +AATTTAGCAACAGTTTCTTCTTCATTCCTAAACTGAATTAAATCTTCCCAAGGGATGTTC +TCTGGAGGCATAGGAATATTGTTAATTGTAATTTCAAGTGCGTTAAGCTCATTCCCTTGG +GTATTAAGATTGACCGCCTCTGCATGGGCTACAACTGGTTTATCGTTATACATTAGCCTG +CTTGATAAATGCCTGATGAGGCGATCGGCGTGAGCTTCATTTTCGGCTTGATCTGGCACT +CCCGTTCCCACCTGGAATGGTAAAAGAAGGTCACAGTCCTTTTTCATTTCCTCCATCGGG +TTCCATGTAGTTCCGTCTGCTCTACCAAAGCTAATAAGATTGGGAATATCCAAGCCACAT +AAAAGTGCAACCCCACGCTCTCTAAGGTATCGAAGTTCTGATGAATAACGCTGATTATCT +TCTTTATTGAATGTGCGCCTCGACAACTTCCAAATCAGAATGTGGTCAAAGAACAACGCC +ATTCTTTGAGCTTCCGCTGTATCCAAAACCAGATCCATTGGTGCCATTATTGCTCTACTG +AGCATCAGTATGATCTCCTTATAGCCTACCGCGAAATTCAGCGGCACGCTTTAGCACGTC +CGGTGGAGTGGCCGTGAGCCACGGAACAAATTGGAATGCATGGTTAGGTTGCGGCTTTGG +CACATAAACCAAGCCAACGGTGAACTGTGGTTTGCCCTCCACCTCATCCATTCGTCTGGT +AAGCACCAGATCACTGGCACCTTCCCTTTCGGTATAAAGATCCACAAGCACATCCCAATA +AGACCACATCCATTGCACGCATGATGTACTCCATGTTTCCTCAGGCAGCTCCACAAGAGT +CTCACCATAACCAGCAATATAGTCTTGGATATGCGCAGCAACATCAGGCGGAACCGGAGA +CACGCCCTCAATTAGCTGGCTTAATTGATAGTCACCTCTCACGAACGCGCTGACGACTTT +GCGCAGCATCGGACGACACGAAGCGGCGATTGGGTGCGCTGCCTCTTCATCTTTTACTGC +AACTGGTAGGTCTTGCATCAGTTATGATTGCCTAACAGGTAATTCACCGTAAACTTACAC +ATATCACTCACGTCCGCCATCCCCACCCGCATGCAAACTTCATAACAACCAACCCATTGT +TTCAATTGACTTAATCCAAGTTAAATCGACCAATCCATCCGGGAAACTCCGAGGCGTGAA +ACATTCCACACGTGTTTCACGGACACATCTCCAACTTCACCACATATTCAATTCCCGGGC +AAACGCAGGGTCGTCCACTCTTTGCATGGCATCTTCGCCACTGGATAGTCGACAGCGTTC +GGTGATCCACAGCACAGCCATCGCGATGGTGCGAATTGCATTAGGGATGAGGGATTGTGG +CTCAGGAGGCTGGTCGCTTCAGGCGCGCGCCCGTGCCGGGCGCACAAAAAAAACCCCGCG +GGAGCGGGGTTAAGGCTAGCGCTGTCTTGGAGGGAGCAAATAACAATCAGAGAGCACAGA +ACAGGGATCTCTCATTGCTCCACCGATCCCTGCCATTGTTATTCTACAGATCAGGCCTTA +CCTCCCAGTGGCCGCAGAATTACGTTTAGGTAATTGCCACGTTTCCGGTTTGAAGTGGCT +CATCCCCAGTGCCAGCCAGGTATGCGGGTGTTAGGTTGTGCACTCTATTTTTTGCCCTTA +ATATTCCGCCATTACTCAAAATGCCCTGGATAGACAAGCCCTTACGTATCCGCGCTATCC +GGTGGCCCGGCCTTGAGGTGGTGTCAGGCCGTTTGCGGATCAATGAATGGAGAGCGACAC +ATGAACACACAAAAGTTGATTAACCGGGCGCTGAGCGGTCTGGACGATCTGGGCTATCAG +CTGGATTCCATGGGGATCACCAGCAAGGTTAACCGCCATAATGTGATTGCCTATGTGATG +GCCGAGCAGAAACACTGGGAAGGCGAATACGACAGTCTGGTGGCGCGGATTGACCAACAG +CGTTTTCGGGTCGAACAGATCGTGGGCCGTGTGGAAGGACTGGTGCGCGGCGGTGCTGAG +TTTGCGCTGAAGCCGGTCAGCGGACTGCGCAGCCTGGTCAAAGCCTGAGTCAGCGGCTGG +GGCTCAGGCCTACTGACGCCTGAGCCCTGCACGCGCCCCGACTAGCTGGCTGAACCTTCC +GGTTCGGTTGTGCTGGTGGGTTTGATTTCCTTGCGCCTGTTGGGTTTCAGACGGGGGAAA +CGCAGGGTCACTTTCAGCCCGGGGTTTTCCCTGCCGTCCGTATAGCGATCCGCCAGCTCA +ATCCTGCCCTGGTGCATGTCCACCACGGCGCTGACCAGACTCAGCCCCAGGCCATTCCCC +GGCTGCGAGCGGCTCTTGGCCACCCGGTAAAAGCGCTCGAATACCTTGTCCTTTTCTGAA +TCCGGCACGCCGATGCCACTGTCTTCCACCTGGAAGATCGCATCATCCTCCTCCTGCGTC +ACCAGCACCCGGATATTACCTTCAGGCGGGGTGTACTTGATGGCGTTATCGATCAGGTTG +CTCACCACCTGAAAGATCAGATCCCGGTCGCCCTCGATGGTGACATCCTGCGGGATATCC +AGCACAAATTCCTGTTCCTTGTCTTCCGCCAGGGCTTCGTACAGTTCACAGGCATCGTGC +ACCAACGGGCCCAGCTGCACGGGCGACTTGTCGGCGGAGTGGCCGGTCATTTCCAGGCGG +GCAATTCTTAACAGCGCATTGAAGGTCGCCAGAAGCTGATCCGTCTCGGCCACCGCGCGG +GCAACCTGATCCCGGGCGGCGTCACCCTCCACCGTGAGCAGGGTATTTTCCAGCTGGGTG +CGCAGGCGCGTCAGGGGGGTGCGCAGGTCGTGGGCGATGCTGTCGGAGACGTGGCGGATG +CCTTCCATCAGGTGCACGATACGATCGAGCATCTGGTTGAGGTTGTCCGCCAGCTGATCG +AAATCATCCCCGGTGCCCCGGGCAGGTATGCGCAGCGACAGATGCCCGTTCATGATCTTG +CGGGCGGTCTGGTTAATCACCTCGATGCGCTTGGCGGTACTTCGGCTGATCATGAAGCCG +CCGAACAGCGACAATGCCAGGGTAATACCCATCCCCCAATCGATGGCACGTTCGATCAGG +TGCTTGAGGGTGGTGAGTTCCTGGACATCGCGGCCTACCAGCAAGCGGAGGCCGCCCTGA +ACCTTGAACACCCGGGCCCTCGCAAGGTGGGGCTCGCCACGCCAGCCGACGGAGTCGTTC +AGGGTAAAGTTGATCCAGCCGTTGCTGGCCTGCACATTCTGTGGCCAGGCGGACAGGTTG +CCGGCGAGCTTGTCGTAATCGGAGGTGGTGAACAGGTACAGGGACTTGCCGTTGGGGTCC +CGGGCAACCCGTTCACGCACGATGGCGATCAGTCCATTGATGCCGCGGCTGCGGTATTGC +TCGGCCAGGCCCGTGATTTCCGCCTCGATGGTTTCATCGGTCTGGGCGGTCATGAACCCG +GCCGTTCGCCAATAGATGAAGGCGAGCAAAAGGAACACGGACGTCGCGAAGACGACCATG +TACAGCAGCGCAATTTGGAAGGTGGAGTTTCTAAGTTGGCTATGTATCTTCACGCAGCAT +ATATCCAGCTCCCCTTACTGTCTGGAGCAGGGGTTTCTCGAAGCCTTTGTCTATCTTGGC +TCGTAAACGGCTGATATGCACGTCAATCACGTTGGTTTGGGGGTCAAAATGATAATCCCA +GACCTTTTCCAGCAACATGGTGCGGGTGACCACCTGGCCGGCGTGGCGCATCAGGTATTC +CAGCAGACGGAATTCCCGGGGCTGCACATCGATGGACTGACCGGCGCGTTTGACGGTACG +GGCCAACAGATCCATCTCCAGGTCGGCCACTTTGAGTACGGTTTCCGGTTCGTTGGAACT +GCGGGTGCGGCGCACCAGCACTTCCATGCGCGCCAACAACTCGGTGAAGGAGAACGGTTT +GGTCAGGTAATCATCGCCACCACCGCGCAGGCCTTCCACCCGGTCGTCCACATCGCCCAG +GGCACTCAGGATCAGCACCGGGGTGGTGTTGCCGGTGGCGCGGACCGTCTTGATGATGTT +GAGGCCGTCCATGCCCGGTAGCATGCGGTCGACAATCATCATGTCATAGTCTTCGCCCGC +GGCCATCAACAGGCCGGTTTTGCCGTCGGCTGCATGGTCCACCACGTAATCCGATTCTTT +CAGACCCTTGATCAAGTAGGCGGCCACATCCTGATCGTCTTCAATCACCAGTACGCGCAA +CTTGGTTTCTCCTTGCGGTTCTTCCGTCGGTTCGTATTCCGATTGAGCTTCGGTCGCGGT +TATACCAAACATTGTTCAAACCTTTCGCCAGTGATGGGTTTCACGGCAACACCTGCATCA +AAATCAGCGGCCCTGTTGGGCAGACACAGACGTCTAATTTTTCGCCAACATGCATAAAAG +ATTACTGTTTCCCTGGCGAATGGCCCAGTTACGATGTCGTAATGGCCCTTCTGGCACAAC +AGTTGATGTCAAGCTTTTCGCTGAAAATGTAATCTAGAGCCATGATTTAAGACAAGATAG +CCCGTCGAATGTTCCCACTTCCGGTACATATTCGGCGGCCACCCAGCCTTCATACCCCCA +GGCATCCAGCTGCTCAAAAATTCGCCTAAAGTTAATCTCCCCGGTGCCGGGTTCATGACG +ACCGGGATTGTCGGCAAACTGGATATGGCCAATTTGGGGCAGATTGGCACCCAGGGTTCG +CAGCAGATCCCCTTCCATGATCTGCATGTGGTAGAGGTCGTATTGAAGCTTCAGATTGGG +CAGGTTGAGGCGATCAATCAGGTCCAGGGCCAGTTTGGAGTGGTCGAGCAGGAAACCGGG +GACATCGACCTTGCTGTTGATGGCCTCGATCAGCAATGTGATGTCCTGTTCCGCCAGCCG +GGGCGCCGCCCAGCGCAGATTGGCTTCAAAGGTGGCCCAATAGGGTTCAGGCGGGGCCCC +TGTGGGCAACGGACCAGCCAGGCAGTTGATCTGCCGGCACTTCATTATCTCCGCATAGCG +CAGGGCACGTTCCACACCCTCCCGAAAGTCCGCTTCGCGTCCGGGCAGGCAGGCGATACC +CCGTTCGCCGCCATCCCAATCACCCGCGGGCAGGTTGAACAGGACCTGCTGCAGCCCCTG +GCCCTGCAGTTGGGCGGCGATCGCCTCAGGTGCCCAGGCATAGGGAAACTGGCACTCGAC +CGCCCGGAAACCGGCCCTGGCGGCCTGCTCGAAGCGTTCAAGGAAGGGGACTTCGTTGAA +CAGCAGACTGAGATTGGCAGCAAGTCGCGGCATGGTGTTGATCAGCGCTCCTGACCATCA +GACGGGTCACTCATACGGGTTTGATTGAGGTGTTCCAGTTGCAGCAGCAAGGCGCTGTGA +TCACATTCCCCCTCGCCCAGCGCAATCAGGGCGCGGTAACTCTGAAAAGCCTGCTGGGCC +AGAGGCAGCGTGAGATCTTCGGCACGGGCCTGGTCAAGAATCATGCGCATGTCCTTGAGC +TGGATACGCGAGGGCGCGCCGGGCTCAAAATCGCGCGCCAGCATCCGCTGTCCGTGCAGC +TCCAGGATGCGACTGCCTGCGAAGCCGCCCAATAAAGCCTCCCGTACCGCTTCCGGGTCG +GCCCCGCCCTTGGCGGCAAGTAACAGTGCCTCGGAGACCGCACCAATGGTGATGCCGACA +ATCGCCTGATTCGCCAACTTGGCCAGCTGGCCGCTACCGGCAGGGCCGATATGCGTGACC +CGCCCCAGTGTTTCCAGCAAGGGACGCACGGCCTCCAGGTCTTCATGACTACCGCCAGCC +ATAATGGACAACGTCGCCTGTTCGGCGCCCACCGTGCCCCCGGAGACAGGGGCATCGATG +TAGCCCACCCCCCTTGCCTTCAGTCTCCCTGCATGGGACTTGGCCATCTCCGGGGCGATG +GAACTCATATCGAGAAAAACCGCGCCCGGCTGGATCGCCTCGGCCACTCCCTGCTCAACC +AGTACCTGCTGCACGGCGTCGCCGTTCTCCAGCATGGTGATGATGCGGTCCGCCTGAGCC +ACGGCCTGGGCAGCGGATTCAGCCACCCCGGCCTGGCCGGCCAGCGCACGGGCCTTTTCC +GGCGAACGGTTCCAGACGGTCACCGGATAGCCGGCATTGATCAGGTTGGTGGCCATGGGG +CGGCCCATCAGGCCAATGCCCAGGAAGGCGATCCTTGGCAGGTCGGTCATGCGATGCGGC +CTCGCAGTCACAGTTTCAGGGAGAGGGATAGCTTAAACAAAAAGGGCCGCCCGAAGGCAG +CCCTTTTCGACATCCTGCTTGCCGAAGACACATCAGGCGATGGATTCTTCACCCATCATC +TTGCGGATCTTCTTCATGGCATTCTTTTCCAGCTGGCGGATACGCTCAGCGGAAACGCCG +TATTTGTCTGCCAGCTCGTGCAGGGTCGACTTGCTTTCCGACAGCCAGCGCTCGCGCAGG +ATGTCCTGGCTGCGCTCGTCCAGCGAATCCATGGCCTCCAGCAGACGGCTGTTGGAATCT +TCAGTCCAGTCGGCCGCTTCCAGTTGCTGGGCGGGGTCGTAACGCCGGTCTTCGAGATAG +TAGGCCGGTGCCTGCCAGGCGTTGTCGTCGTCATCATCCGTCGGGGCATCGAACGCGGTG +TCCTGGGCGGCCAGACGCCCTTCCATCTCCCGCACGACCCGGGGCTCAACGCCCAGGTCT +GCCGCCACGGCGGTGACTTCGTCGTTGTTGAGCCAGGCCAGCCGCTTCTTGGCGCTGCGC +AGATTGAAGAACAGCTTGCGCTGGGCCTTGGTGGTGGCCACTTTGACGATACGCCAGTTG +CGCAGAATAAACTCGTGAATTTCCGCCTTGATCCAGTGCACCGCAAACGACACCAGGCGC +ACACCATAATCAGGGTTGAAGCGCTTGACCGCTTTCATCAGCCCCACGTTACCTTCCTGG +ATCAGATCTGCCTGCGCCAGTCCGTACCCGGAATAACTGCGGGCAATATGGACGACAAAA +CGCAGGTGTGAAAGCACCAGACGACGCGCGGCTTCCAGATCGTTGTCCTGCTGCAAACGC +GCTGCCAGCTCCCGCTCTTCTTCCACCGTGAGCATGGGAATCGCGTTAACTCCCTGAATA +TACGCCTGAACATTGGCGCCAGGGATCAGCTTGTCCATGACTTGAAGACTGGTACCCATG +CGTAACCTCCGCAATATTGGACCGTATAAGATAGCACTTGGCTCTCAAGACATCCAGCTC +GAACAAAAGTTCCCTTTAATGCCCGCTTTTCATTAAACATCAATAGCTTAAGCGTGAGTA +GGGAGATCGCCGGAAACCGAAGTGCTGTTTAACCACTGGTATGAGGCTAGTTCAGACACC +CTTCCAACTTCAATACGCGCATGCCACTACAGTGGATGAATCCATCAGAAGTGGGGATGA +ATCGGTCAAATACAAGCCGATTGATGAAAGACAGGCGCCTCAACGCGGCTCAATGGCATC +CAGGTGACGTTTCACCGCCAGCCATGACCCCATCCAGCCCAGCAGTACCCCGACCATGAT +CACCAGCAGGAAATCGCCAAAACCGGCACCCTGCAACTGGAAATGGCTGCCATAGAGGCT +CGCCAGCCGATTGACCGGGCCATCCAGCCAGGCTTCAAACAGTGCCACCAGGATAATCGC +CACAATACTGCCACCGAGCCCATACCAGGCCCCGGTGTAGAGGAACGGGCGACGCACGAA +GGCATCGGTGCCTCCCACCAACTTGGCGACCACGATTTCATCCCGGCGGTTTTCGATGGC +CAGACGGATGGTATTGCCCACCACCAGCAGCACGGCCAGTGCCAGTAGGACACCCAGGGC +CCAGACCGCGTTCACCACCAGGCCGATCATGGCCCGCAAGCGCTTGAGCCAGGCCAGGTC +CACCTGAACCTCGGCCACCTGGGGCAGCTGCTTCAGGGACTGGCTCAGGGATTGAACTGC +ACCGGGGTCCTGCAGCGCGGGTGCAACGATCAGGGTATTGGGCAGCGGGTTGTCATTCAG +GTAGTCGAAGGCGGCACTCAGGCCCGAATCCTTGCGCAGCTGGGCCAGTGCCTGCTTCCG +GGTCACCAGCTGCACATCGCGCACGTCGCCACGACTGCGAAGCTTGCCCTGCAGGTTCAT +GGCCGCGGTTTCGTCCACGTTCTGTTTCAGGTACAGGTTGATGCGACTGGAATCCGACCA +GCCTTCCCCCAGGGACTGGGCACTGGCCAGCAGCAACATCAGGCCCAGGGGCAGCGCCAG +CGCCACGCCCATGACCAGCCAGGTCATCAGACTGCCGATCGGGTTGCGCAGCAGGCGAAT +CAGGCTGTCGCGGGCGATCTTGCGATGATGGGAGGCATAGGCGTCGAGAAGTTCCCGTGT +GGGATTCTCTGCGACGCCAGCGCCGCGCCGGCCACTTTTTTCCCGGGATTCATCCACCAA +GTTTCACCTCAACCATCCAGCTTCACCTTAACCATAAAGCGCTTCCTCATCCGGCATACG +GTCGCCCTGTGCCAGCCGCCCGCCATCCAGGGTGATCACCCGGTGACCCAGATGGCGAAT +CAGGGAAATATCGTGACTGGCCACCAGCACCGTAACGCCCACCTGATTGAAGGCTTCGAA +CAGATGCATGATGGAAGCGGACAGTTCCGGATCCAGGTTACCGGTGGGCTCGTCCGCGAG +GAGCAGCGGCGGTTTGTTGACCACGGCGCGGGCGATACCCACCCGCTGCTGCTCGCCGCC +GGACAGTTCCATGGGATTCATCCGCTCCTTGCTGAGCAGCCCCACCTTATCCAGCGCCGC +CCGCACCCGCCGGCCCACTTCCCGGGGCGCGATGCCCATCACTTCGAGTGGCAGGGCGAC +GTTATCGTAAACCGTGCGGTCAAACAGGAGCTGGTGATTCTGGAACACCACGCCGATGTG +GCGGCGGATGTAGGGAATGCGGCGGCGGGGCAGTTTATCCAGCACCTGGCCGCCAATCAC +CACCTGACCTTCGCTGGCCCGCTCCATCAGCATGATCAGCTTGAGCAGCGTGCTCTTGCC +GGCACCGGAGTGGCCTGTCAGAAAGGCCATTTCGCCGCGTTCGAGCGCGAAGCTGACATC +CCGCAGGGCGACGTGTCCACCTTCATACCGCTTGGTTACATGCTCAAACCGGATCATCGC +TACTGCACTCTTGAAAAAAATCAGGCTGTGAAAAGGATCAGGCCGGCTCGTCAAACAGGG +CTTCGACGAACTCTTCAGCCTGGAACGGGCGCAAGTCACCCACCTGTTCGCCCACACCGA +TATAACGGATGGGCAGCTTGAGCTGACGGGCGATCGCGAAAATGATGCCGCCCTTGGCGG +TGCCATCCAGCTTGGTGAGGGTAATCCCGGAGACACCCACCGCCTGCTGGAAGATCTGGG +CCTGGCTCAGCGCGTTCTGACCGGTGCCCGCGTCCAGAACCAGCATCACTTCGTGCGGCG +CCTCAGGGTCGAGCTTCTTCATTACCCGAACCACTTTCTCCAGCTCGGACATGAGGTTTT +CCTTATTCTGCAGGCGACCCGCCGTATCGGCGATCACCACGTCGAAACCCCGGGCCTGGG +CCGACTGCACCGCGTCGTAGATAACCGAAGCGCTGTCCGAGCCGGTCTGCTGGGCAATCA +CCGGCACGTTGTTGCGCTCACCCCAGACCTGGAGCTGTTCCACCGCGGCGGCCCGGAAAG +TATCGCCAGCGGCCAGCATGACACGCTTGCCCTGATCCTGGAACCGTCGCGCCAGTTTGC +CGATGGTGGTGGTCTTGCCCACGCCGTTCACGCCCACCATCAGGATGACATAGGGCTTGT +GCCCATCGTCGATATTCAGTGGGGCGGTGCTGGGCGCCAGGATGCCATGCAGTTCTTCAC +GCAGCGCCTGCTTGAGCGCATCGCCGTCCTTGAGCTGCTTGCGCTCCAGCTTTTCGGTCA +GCGTGTCGATGATCTCGGTGGTGGCGGTGACACCGACATCGGCGGTGAGCAGCAGGGTTT +CGATGTCCTCCATCAGCTCGTCGTCGATGGCCTTCTGCCCTTTCAGCAGGTTACTGAGCC +CCTCGGAGAAATTGCCACGGGTACGACCCAGCCCCTGCTTGATGCGGGCAAACCAGTTGA +CCGGCGCTTCTTCAACTTCAGGCTCGGCAGCCGGAGTTTCAGCCGGAGGCGCCTCGACCG +GCGGGGCTTCCTCGGCCACCGGTGGCGCGGGTGGGGCCTCGGCCTCCGGTTCGACCACTT +CAGCTTCAGGCTCGGCCTTCGCCGGCTTCTCTTCAGCGGCGGGGGCTTCCGGCCTGGGCG +GGGGTGCCTTCGCCTCGGGCACGGCTTCCCGCTGCTCAGGAGCAGCCGGCGGGCGCTCGG +CGACCGGAGGTTGGGGTTTTTGCGGCGGCTTGCGCAGCCTCAGGCCGATATCGATAACAA +AGGCCAGTACCAACAGGGCCAGTAGGCCAACGGAAATCCACTCTGCAGTCATTTACGCGC +TTCCGGTTTCAAATTTGGAGGCTTGGCAGCGCCATGTGAGGCCGAATCAGGACGGGCCAG +AAAAATAAGGCGCCAACGCCGGAGAAACGCCGTATTCTACCGGGCGGTTTGTGGGGCGTC +CAAACCCCCGCTAACATAGCGACATCTTCAAGGAGTCCATTGATGCCCCCAACCCGTCGT +TCCCCGCCCAGACAGCAGCGCGTCACCGCAAAAAAAGCGGGCGGCCTGTCGCGGCTAAGG +ATCATTGGGGGCCAGTGGCGCAGCCGCCAGGTGCCGTTTCCGCCGGTGGAAGGCCTGCGG +CCGACGCCCGACCGGGTACGGGAAACCCTGTTCAACTGGCTGGCTGGCGATATCCCCGCC +AGCCGCTGCCTGGACCTGTTTGCCGGCAGCGGCGCCCTGGGGCTGGAAGCCCTGTCACGG +GAGGCACGGCACCTGGTTTTCGTCGACACCGCTTCCGAGGTAATCCGCACACTGCGGGAA +AATCTGCGCACCCTGGGCTGCCAGCAGGCCGACGTCTTCCAGCAGGACGCCGAACAATTC +CTGCAACGACCACCGGCCACGCCTTACGATGTCATCTTTCTTGACCCGCCGTTCCGTCAA +GGCTGGCTGGACAAAGTGATCCCGCTGCTGCAGCAACCGGGCTGGCTGAAACCGGGCGGC +TGGGTCTATGTCGAACATGAAGCGGAGCTGAACGCCAGGCCATGGCCCTCCCACTGGCAT +GAACACCGTCAGAAGGAAGCCGGTCAGGTGGTGTATCGACTGTTTCACGTGGCCGATGCC +CTCAAGGACACAGCTGAAGGTGTGGAACGGGCATCCTGAAAACAGAAGCCGACAACCATT +GACCGGCTCATCACAATCGGTTTAAAACAACCGGTTTAATACAATCGATTTAATACGATT +AATTTAAAACGGCCGATTTAATGCGATCGATTCAATAAGGTCGACTCAATGCAGTCGATC +AATCACAACAGGGGGATATCCAATGCACCTTTGGCGCATTGTGTGGGTACTGATCGTTCT +TGTGGCCGTCCTTGGCGGCGTTTATTTCCTGTTTCCCGGCACGATTGTCAACGCCGACAA +AAGTTTCGAGCTCTGGCGGGCCGGCCTGGCTGTCCACGACATCAACGTCGACGACCAGCA +CATTCACTATGTGGATAGCGGCGGCCAGGGCCGGGTGGTGCTGATGCTGCATGGCTTTGC +GGCGGATTATTACAGTTGGCCGCGCATGGCGCGTTATATGAAGGCCGGCTACCGCGTGAT +CGCGCCGGATTTGCCGGGTTTCGGTCAGAGTTCACGCATCGCTGCGGACAACTACGGTAT +CAGCCAGCAGGCCCAGCGGATGCATGACTTCCTCAGGGCGCTGAATGTCGACAAAGTGGA +CATCGTCGGCAACTCCATGGGTGGCTGGATCGCAGCAGAGTTCGCCGCCCGCTTCCCCGC +CCAGACCCGTACCCTGACGCTGATCGATACCGGCGGCATTACCGCGCCTCACCCCAGCCC +CTTCATGCAGGCGGTGGAAAAGGGCGAGAATCCGCTGGTGGTCCACAACCGGGCCCAGTT +CAATCACCTGCTGACCATCGTATTCCACCACCAGCCGTTTATTCCCGGTCCGCTCAAGGG +CTACTTCGCAAAACAGGCGGTTGAGCACGCCGCGTTCAATGAGAAGGTCTTCAAGGATCT +TACGGATGACTACGTGGATCTGGAACCGCTACTGCCCAAGCTGACCATGCCTACCCTGGT +CATGTGGGGCCGCTATGACCAGATCCTCGATCCCAGCTGCGTGGAGGTGCTCAAAGCCGG +GCTGCCGAACGCCACCATCAAATGGTTCGATACCGGGCACGCACCAATGCTGGAGCAACC +GAAGGCGAGTGCGGAGGTCCTGAAGGCCTTTTTGCAGGCGAATCGTGGGGATTAGGTAGG +TGAGGAGTGAGGGGTGAGGCCGCGCGCTTTGCGCGCTGGTGAGGGGTGAGGAAATCTCTA +AAACCTCCTAACTCCTAACCGCCTTACCCCTAACCCACCCACAAAAAAGGGCACCCATTC +CTGAGTGCCCCTCTTCCAGTTTCCTACCCGGTTCAGGCGGTCTGCGGCCGCAGCGAGTAG +CTGCGCAGCGTGGCGGCAAACTCTTCCAGATAGCGCACACCACTTTCTTCTGCCTGATGG +CACCATTCGACCAGCGCGTTCAGCTTGTCCTGCGGGCGCATGCCCGGACGGCGCTGCCAG +ATCGCCTGCAGTTCATGGCTCTTTTCATAAATGAGCCGGACCACCGCACTGTTCTGAAGC +ACCTGTTCCAGGGACTGACGCTCCTGCGGCTTGATCAGGCTTTCCTCACGGAATACCAGC +TTGCGCACGCGGCGGTACAGGGGCTTGATGTCCTTCTCCACCACGCTGCGTTGCTGACGC +AGGACCGGCTCCAGCACGCGCTTACGGTACAGGCGCATGACATGGAAACGGTTGTTGGCG +ATGGCCTGTACGGTTTCCACATCCATGTCCATCTTGCCCGGAATCTGGTGGGCGATGGGT +CGGTAGCCTTTGGGCTTGGCCAGACCAAACAGCTGCAGCAGGCGGATGTAGCCCCAGCCG +ATATCCAGCTCCCACCAACGGCGTGACAGCTTGGACGAATTGGGGAAGGTGTGATGGTTG +TTGTGCAGCTCTTCACCGCCAATCAGGATGCCCCAGGGGACGATGTTGCGGGCGTTGTCG +GCACACTCGAAGTTACGATAGCCCCAGAAATGGCCGATACCGTTGATAACACCGGCAGCA +AAGATAGGGATCCACATCATCTGTACAGCCCAGATCCAGATGCCGTTCACACCGAACAAC +AGCAGATCGATCACCGCCATCAGGGCAACACCGCCCAGGCGATAGCGGGAATAGACATTG +CGCTCAACCCAGTCTTCAGGCGTGCGCTGTCCATACCGTTCACAGATTTCATCGGAGATT +GCGGCGCGATAGTTTTCGGCACCCCGCCACATGATTTCACTGAACCCCTTCACCACGGGG +CTGTGGGGATCTTCTTCAGTTTCGCAGGTAGCGTGGTGCTTGCGATGGATTGCCGTCCAC +TCTTTGGTCACCATGCCGGTCGTCAGCCACAGCCACAACCGGAAAAAATGCGCCACCACC +GGGTTCAGATCCACAGAATTGTGGGCCGAGTGCCGATGGAGGTAGACCGTCACGCTGACG +ATAGTGACATGCGTCATCGCCAGGGCGATCAGAATCAGTTGCCACGCGGGCAGATGATGG +AAAAGGCCGTAAAGCCACATTGAGCAGTTCCTCACTTTTTCACTGCACCCCGTTGGAGAC +CGGGTGCATATTATTGCCGATGGTAGCTTACACCTGTACGCCGAACAAGGCGTAGATACC +GCAAAATTACAAAAAGGCACAGCAGTACTGTTCTCAGTTGCAAGCCCCGTCACACGGCGT +TTCCGGAGGCCATTTGATGGAGAACATCAACCGCATCACCACCGGGAATTGTTCTATCGA +CCCCATTGACAACACCGAACCGCAGGCGTATCAATTGCGCGTTTTTTAACCCGGGACTGC +CATTGTGCCTGAATTACCCGAAGTTGAAACCACCCGGCGCGGTATTGAGCCGCATTTGGT +CGGTCATACCGTCACACAGCTGCAGGTACGTGAATCCCGTTTGCGCTGGCCCGTTCCCGA +CAAACTGGATCAGATGCTGCCGGGCCAGAAGGTCGGCCAGGTCGCACGTCGCGGTAAATA +TCTGCTGGTCCACCTGGAGCGGGGCACATTACTGGTTCACCTGGGCATGTCCGGCAGCCT +GCGCGTGGTCACCCGTGCCGAAGCCCTGCGCAAGCACGACCATATCGACCTGACCACTGA +CGCCGGCACCATCATCCGGTTCAATGACCCGCGACGCTTCGGTGCCTGGCTCTGGACCGA +AGACTGGCAACACCACCCCCTGCTAGCTTCCCTGGGACCAGAGCCGCTTTCGCCTGCCTT +CAGCGGCCACTACCTGCATCGCCAGTCCCGCAGGCGCAAAGCGCCGATCAAGCAATTCAT +TATGGACAGTCACATGGTAGTGGGCGTCGGCAACATCTACGCCAATGAGGCCCTTTTTAT +CAGCGGCATCGACCCGCGCAGGCCCGCCGGCCGGATCAGCGCCGCCCGCATGGAGGCATT +GGTCCTGGCCATTCAGCAGGTGCTGGAGAATGCCATTGCGGTAGGGGGCACTACTCTGCG +GGATTTCGTCAACAGCGAGGGGCAGCCGGGCTATTTCCGCCAATCCCTGCAGGTTTACGG +ACGCGAGGGCCAGCCCTGTCGCCGTTGCGGCAAACCATTACGTCAACTGAGGCTGGGTCA +GCGCAGCACGGTTTTCTGTGGTCATTGCCAACGCTAGCGGCCGGATGGTCAATCAAATTG +ATGCCCAACAAGGCAGGACACCAAAATACAACAAAAATCCTTTTGAAATTGACCAAATAT +GGACCATATTTAAAGAAGTACTAAAAATGTAATCCGGATTCCGGCGCTGGTAAGTTACAC +TGGCGCGCGCCGAGGACAGCCACAGTTAAACAGGGAAAGGACCGGAACAAAACGCTTTTC +CATTAACGGACTCAAAGGCTTCCTAACACAATTCGCAACGAGCAACTATACTCGTTTCGA +GCTACCCGAGATTCTTGGCCGAACCTGGACATCACAACGACGTCAGGTTGGCTTATGAGT +GGTTAAAAGCAGGAGAAAACCATGCGGCCCAAATTGTCCCATGCCTTGGCAGCCATCCTC +GCTGCCTGCGTCATGAGCCTGGCACCGATGACCCAGGCGGAAACCACCCAATCCACCCCT +TCGGCCCTGGCCATGACCGGTGATGCGCTTTTCGCCCGTCCGGCACTGCTAGCCATGACG +CTGGTCGGAAGTGCGGTCTACGTGGTGTCCCTGCCGTTTTCGCTTCTGGGTGGTAACGCC +TCCGAAGCCGGCAAGGTGCTGGTGGTGGATCCGGCCAAGGCAACCTTTACCCGCTGTCTG +GGCTGCACCATGAACCAGAATCGGCAGAACGAGCAGAAAAATCAGAATCAGGTCGCCACG +GCGGACAATACGGACACCACCAGCAACTGAGCAGGTGCCTGTACCCGACCTGGACGAACC +GGCACCAATGGTGCCCGGAATGACCGTTTAAACATCAAGGCCAGCCTCTGTGCTGGCCTG +TTTTTATTTTTTCGCTGATAGCCCCATGCTGAACTGGTCCCAAATCGACACCGCTTTGCT +GGATATGGACGGGACTCTGCTCGACCTGCATTTCGACAGCCATTTCTGGCTGGAGCATTT +GCCCCGCCGTTATGCCGAACTCAAACACCTGGATCCGGAGCATGCCCGCCAATCACTGCT +GAGCAAGATTGAACAGTTGCGCGGCAAGCTGGACTGGTACTGCATCGATTTCTGGAGCGA +CCTGCTGGATCTGGATGTGGTGGCGCTCAAGCGGGAAACACGGGATCGCATTGCCTGGCG +CCCCCACAGCAAGGCCTTCCTGGAGCGCCTGCGCGCCTGTGGTATCCGGCGGGTGCTGGT +GACCAACTCCCACCCCGACGGCCTCAACCTGAAAATCGAGACGACCGGCATTGACCAGCA +CCTGGACCGTCTGTTTTCCAGCCACAGCTTTGGCCAGCCCAAGGAAGGCCCCGATTTCTG +GGAGCAGCTGGCTCAACAGGAGCCGTTCGACCCCGAGCGCACCCTGCTGATCGACGACAG +CCTGCCTGTGCTGGAAAGCGCCCGCCGTTACGGCATTCGTCACTTACTGGCCATTCTCAG +CCCCGATAGCCAACAACCGCCCCGTCAACCGTCCCATCATCCCTGCGTCCATGATTTCGA +TGAACTGTTCCAGTCACTGGACCAGTTCGCCCACCAGAAAAACCGAATCGACGGTCTCAG +CGACTGAAAAACCGGCACGGCAGGGAAATTTCTTCGGCACGCCGGCCTTCGGCCCGAACC +CCAGGCGCGGTATAATGGGCCAAGGCCAACATGACGGAGCACCATGACAGCCCATTCAGA +CCAGCAACAGCAAGAGGCGCGTATCCGGCTCGACAAGTGGCTTTGGGCCGCGCGCTTTTA +CAAAACGCGCACCCTGGCCAAGGAAGCGATCGAAGGTGGCAAGGTCCATTACAATGGCCA +GCGCACCAAACCCGGTAAAGTGGTGGAACTCGGAGCCCGCATCCGGCTCAAGCAGGGCTG +GGCGGAAAAGGAGGTCGTCATCCAGGGGCTGAGCGACCGGCGCGGCGGTGCTCCGCAGGC +CCGCGAACTTTATCAGGAGACCGACGACAGCCAGCAGCGCCGGGAAGACGAGCACTGGCA +GCGCAAGATGATGCAGGCGGCCCAGATGCCACCGGCGCGACGCCCCAACAAAAAGCAGCG +CCGCGAGTTGCAGCGGCTGAAAAGCGGACAGGGCTGAAGCAGAACCGGGCCCGCCCTCCA +CCACCCGAACCCGACGACATTGCATTCAGGGGATCATGGCATGACCGGCCAAGACCAGTT +ACAGCGTTTTCTGTTCGAAAATTCAAATATTCGCGGCAGCATTGTGCGGCTGGACGACAC +CTTCCAGCAGGCAACCGGGCAACAGGACTATCCGACAGTGGTTCGCAATCTGGTCGGCCA +GAGTCTCGCTGCCTGCGCACTGATGGGGGACAGCCTGAAATTCCAGGGCAGCCTGTCCCT +TCAGGCCCAGGGGGAAGGCCCGTTGCGCCTGCTGGTAAGCGACAGCACCGACCAGCTCAC +CTTGCGCGGCCTGGCACACTGGAACCCGGAAGCCGCCGAGGCCGAAACCCTGCCCGCGCT +GATCGGTAACGGTCACCTCGTGATCACCATTACGCCGGATGCAGGCCAGCGCTATCAGGG +CATCGTGCCGCTGGAGCAGGACACCCTGGCCGGTTGCCTGGAGGACTATTTCCGGCTCTC +CGAGCAGCTGGCCACCTTTATGTGCCTGTTTGCGGACGAGAAGGGGGCCGCAGGCCTGTT +GCTGCAACAGTTGCCGGGAGAGCTGGCCGGCCCCGACACTGACCTCTGGCCCCGCGCTAT +CAAGCTCGCGCAGACCCTGACCACTGAAGAGGCCCTGCAACTGCCATCGGAGGAGCTGAT +ACACCGCCTTTATCATCAGGAACAGGTGAGACTGTTCCCGGCCCGTGCAACCCGCTTTGG +CTGCAGCTGCTCGCGCGAGCGCACGCGGCTGGCACTGGAATCCCTGGGCCAGGACGATTG +CATGGCCCTGCTCGACGAGCAGGAGGTCATCGAGATCGACTGCCATTTCTGCGGCCAGCG +CTACCGTTACGACCGGGCGGATGTCCGGGCGGTCTTCGGCGGTCCGCGTCTGCATTAGGG +AGCCTCTGATTAAGCCCCTTTCAGGCAAAGCGTGATCCTCATCAGACGTTTTGCGATGGC +CTCGAAGTCGAATTTTTTGACAGCCTCTGGCATAATGCGCGCCTGACCCGGCCGGACCGT +TTTCCAGGCACCCGATTTTCAGACGCATCAAGACAGGGTTCCAGGAAGACAGAGCGCGCC +TTTGCGAAGGCAAGGCAGTTCCGCCTGACGTCAATGAATTTTACGGCGCATGGTTGATCT +CCGTGCGCCGGCATTGAATTTTGAGCCCGTGCTGCAGGTAGAGCGTTCGACACAGGGCGG +TTGCAGGCATGAGAACCGCGCCATCTGGTTGAGCCCCTTTTTTTACCGCCCCGTCAACTT +CGGCAGGCATTCCGAATGACGCTTCCCGCACCAGGCCGCTGTGGATTCCACCATTTAAAA +TAGGGCGAGGTTGAGAGTGAGCAACACTTACACAGATCTGAGCTCAGCACGACTGGTCGA +GCTGGCACTGGAGCGTAATGAAGGGAAACTGGCCGCCAATGGCGCACTGGTTGTCAACAC +GGGCCGCCGCACCGGGCGTTCACCGATGGACCGCTTCATTGTTGAAGATCCCGCTACGGC +CGAACTGATCCACTGGGGTCCGGTCAACCGGCCTTTCGATGCTGCCAAATTTGATGCACT +CTGGGAACGGGTTGAAAGCCATCTGGAAGAGCGGGACCAGTTCGTGTCCTATGTCCACGT +AGGCGCCGACCCGGAACATTACCTGCCGGTCAAGATGACCACCGAAACCGCCTGGCAGAA +CCTGTTCGGCCGTAACCTGTTTATCCGCCCGGACAACTACAACCCCATCGACAAGGGCGA +ATGGCAGATTCTCAACGCTGCCGGCTTCGTCTGCGAACCTGAGCGTGATGGCACCAACAG +TGACGGCTGTGTGATTCTCAACTTCGCTGAGCGTAAGGTGCTGATCGCAGGCATGCGTTA +TGCCGGTGAAATGAAAAAAGCCATGTTCTCCGTGCAGAACTTCCTGCTGCCCGAGCAGGA +CGTGCTGCCGATGCACTGCTCCGCCAATGTCGGCGAAGACGGCGATACCTGCCTGTTCTT +CGGCCTTTCCGGCACCGGCAAGACCACCCTGTCCGCCGATGAAGACCGCTATCTGATTGG +TGATGACGAGCACGGCTGGGGTCGCGGCACCGTTTTCAACCTGGAAGGTGGCTGCTACGC +CAAGTGCATCAACCTGAGCAAGAAAAACGAGCCGATCATCTGGGACGCGATCCGCTTTGG +CGCCATTGTCGAAAATGTCGTCATCGACAACGACAGCCGCGAGCCGGATTACGACGACGT +ATCCCTGACTGAGAACAGTCGCTGCGCCTACCCGCTGGAGCACGTCGAAAAGCGCGTGCT +GGAAAATCGCGGGGGCGAGCCCCGCGCCGTGATCTTCCTGACCTGCGATATGACAGGCGT +GCTGCCGCCCGTATCCATCCTCAACAAAGAAGGGGCGGCGTATCACTTCCTGAGCGGCTA +CACCGCCCTGGTAGGCTCCACCGAGATGGGCTCCAGCGCCAAGCTGCGCAGTACCTTCTC +CACCTGCTTTGGTGCGCCTTTCTTCCCCCGCCCGGCCGGCGTTTACGCCAACCTGCTGAT +GAAGCGTATGGAAGAGTTTGGCAGCCGCGTCTACCTGGTCAACACCGGCTGGACCGGTGG +CCCCTACGGCGTCGGCAAGCGTTTCAGCATCCCCACCACCCGGGCGATCATTCGCGGCAT +TCAGACCGGCGCCCTGGAAAACGTCCAGACCCAGCACCTGGATGACCTGAACCTGGACGT +CCCGGTGGAAGTGCCCGGTGTCGACAGTAACCTGCTGAACCCGAGAAACACCTGGCAGGA +CAAGGAAGCCTATCACCACAAGGCGCAGGAGCTGATTGCCCAGTTCGTGGAAAACTTCAA +GAAATTCGACGTATCCGATGCCATCGTCAATGCCGGACCGAAGTTGAAGGACTGAAGCAA +AGACAGTCAGCGGCCGGTTGATTCAGCCGGCCTGCGAGCGAAAAGGGCGCCTCTTGGGGC +GCCCTTTTTTATGAGGTGGTGAGGTGGTGAGGTGGTGAAAAAGTATGACTCAGCTCGTCT +TACCCCTCACCAGCGCGCACCGCGCGCATCCCTAACCCCTCACCCGGTCTTATACTCACC +GCGCATAGAACAATAGCGGCACAAAAGCCACGATCAGCAAAGCGCTCCCCATCACCAGCA +GCGGCATCAGAACCCGTTCATGACGCTGATAGAAGCTACCACTCTCGCGATAGGACAGAT +TCATCTCGCTCTCACCCACGACCTGCCGTGTCAGCAAGTGGTTCAATGCCACGGGCGGGC +TCAGATACCCCAGCTCGAACGCCACCAGCGTCACCATCCAGAAGTGCACCGGTGCAATGC +CACTCTGATACGCCAGGTCGGCAATGGTGGCACTTACCAGGATGACCGCACCAAAGGGGT +CCATGATCATGCCCAGGATGACCAGGATCACCACCATCAGCATCATGGCGAGCCAGGGGC +TGGGCAGCGCCTGTGGGAACAGGCTCATGACCTGTGAGCGCTCAATCACACCACCAATAC +TGACCGACAGCCCAAACAGCAGCAGCAGGGCACCAATTTCCGCCGTGGTTTCAGAGGTGG +CGGTGCGGAGGCTCTTTTCCAGACCCTGATGATCCACTTCACCACTGACTTTGTTGCGGT +CGCCGCGAAAATGGACATGCTCATAGACCAGGATGCCCACCATGATAATCGGCAGGATGC +GTGGTGCGGAGAACTCGTTCATCGTCACACCCAGCAGCAGCCAGTAGAAGAACACCACAC +CGGCAATCACCAGGACGTAGGGAATCAGAGGCTTGAGGCGCATCACCATCTCCGGCAGCG +CCTCGTTCATCGGCGCGACCTTGAAGCCTTTCTGGCGATTAACGACCAGAGAAGTAATCA +GGAACAACGTCGATGTCAGCAAAAATACCCAGCCGCCCCAGTGGAACAGGGAGTCGGTGG +TTACCTCCCGGTTGAGGTAGGCAATGACCACCACCAGCAGACAGGGGTTCAGGACCACGC +CCAGCGAACCGGACATGGCGGTCGAGGCCAGGGCCAACTGGCGCCGGGCACCCGCCTTGC +GCATTTCCGAGTAGATGACAGCGCCGGCGGCGATCACGAAAATACCCGAGGCACCGGTGT +AGGCGGTCGGCACTGCCGCCACCATGACCGCGACCACAGCCAGCAGTTCGGGTGGCAGCT +TGAGCGGGCGCAGTACATTGAAGACCAGACTCGCCAGCCGGGTCTGCTTGAGCATCATGC +CTGCCCAGACATAAAGGCCCACATTCAGGAACTGGTCCGCCAGCTCCATCATCTTGTCCA +GGTAGATGCCAATTCCCGCGGGACTACCGACAAACGCGAAGAACGTGCCGGAAATCAGGC +ACATGGTGGTATAGAGCGGCACACACAGAAAGGCATGCCCCAGCGTGCCGCCTTCCTCGG +CATCCTCCGGCACCCGGAACAGTTGCACCAGACTGACGATGGTCAGCAGCAGGAAACCGG +CAATCCAGATGTCGTGGGAAATTTCCTGGCCCGTGGTCACCACGGCACCGGAGTTGTGAC +TCATCTGCCGGTACATGAAGCTCGAGATCAGCAGCATGGTATTGGCGATGAACTGCATGA +AATGAGAGACGATATGGTCCAGTCGCGTCTCCATCCCTCGCATGGCGATGTGGTGCCGGC +TGAAGGTTGCCGTTGCGGCACAGACCAGCACCAGTAACGCCAGTATGTAACGTTGTGATG +CCAGTCCAAACGCCACTACATCAGAGATGAATAACTCGACTGACCGGTAGACGCGCACCC +CTTCGGTGATCCGCCCCTTGGTCGCATTGTAATCCGCGAACTGGGCCTCACAGGCCTGCT +TGGCATTGATGACAGACTGGCGAACCGCTGCCGGATTAACCGGGCTTGAGCCCACCAGGC +TGCCCAGCATGCTGTCCTGTTGAGCAGCCTGCGCGGCTATCTGCCTCTTGACCTGGGCGT +CGACATTCACATTGGGGTTACAGTCCGGTTTGACCGGGTCGACCCGCAGCTTGTAATAGC +CACTCCAGAGCACCTGCCCCAATTGCAGCGCCTTGTTGTGGATATCGCTGCTGGTGGTGA +ACAGCACCACCGCCATCAGCAGGATGCACGCCGGCAGGGATGAAAACCATTCCCGGGCCG +AGCGACCGCCTATCGTGCGCTTTTGCAAATACATGGAACTCTGTTCGATGGCTTCTGACA +TGGCGCTATTCCGCAAAAAAGCCCATTGCCTGTCTGGCGGCAATGAGGCCTGGTTACTTC +TTGTCCTGGACTGAGCGGGCGGTCCGAACCAAACTGGACCGCCCCCAATCCCTATTCCGC +ATTTTGCGAGCACTCACCCCGACTGGGCTGAACCTTGCAGCGGATAATCTTCATCAGCTT +GAGCGCCTTGGCGTCATATACACCTTTGTCACGCAGGCTGAGGCGGACCGACTGCAGCAT +CTGGTCATAGGACGCGGTCTGCTTATCGGTGGGGTACATCCAGTACTTCTTCTTGATCTC +GTCAGTCGCCTTGTTGATGATGCTGAAGGCCTGGTTGAGATGTTCACGGGCATACTCACG +CACGTGATTGGCATAACCCTTGGGGAAACGCGACTTGTGCAGGATGATCTGGAAGTTCAT +CTGCGCCAGCGGGTATTGCAGCACGCCACCATCGTGTGACAGCCCTTTGTAGAGCTCCAG +GGGGGTATAGGCCACTGCCGGTGCGTAGGCCAGGTCCACGCTACCGTTGTTGAACAGGCC +AGCGAAGCTGGCGGAGCTGGCACCGACAACGGAGGCACCCACGTGACGCACCATTGTCAG +CGACGCCCTGTCGTACGACAGGGTGGCGATCTTCTTGCCCTGAAGCTTGTTCACGGTATC +GATGTTGCGGTTACGGGTGAACAGGTAAACCGCGCCAGCCGGGAGAATCCCCGCCACTTC +ATAGTCGCCGTTGGTCAGGAGTTTGGCCGCCTTGGGCTGATTCAGGGTGTCCATCAGCAC +ACGCTCTTCCTTGTTGCTGGTGATGGCCCCCAGCGCTTCCAGCGTACCGGTAAACTTGTT +GAATTCCCGCGCCCGGGTGCCGGTCAGCAGGACCGAATCACACTGGCCCCCCTTGAAGTC +CTCAGCCGCGATCTTTTCATCGGTGTAAGCCCGCATCTGGAGGTCGACGCCCCAGTCCAG +GGCCGCCGGTTTGGTGGACTTCATCAGATTGAACAGGGGGCCGTTGGCACCGACCGGATC +GAAGACGCAAAAACTGCGCTTGGGCAGGGAGCCCGCGTTGGCGGCCGGCTGCAGCGCCAG +CATGCCAGCCAGGGTCAGGGAGGCCAGGCTCCAGGTTTTCAGCGAAAAACGTTTTCCAGA +CGGCTTCACAACAAATTTCTCCTCAACATTATCCTGTCTCTGTTTCTTCCTTGGCGCCGG +GCCCGTTTCAGAACCCGGCGTTGTTTACATGACCGAATCCAGGTCGATCACTTCGCCGGA +ACCCTTGGAGTCGTCCCAGAAGGTCCCCAGACCGCCGATCGGGGTCCGGTGCCCGGTATG +CTCCACCCACATGCGGTCGGAAATGGACCGCAGATAGAGGGTGGAAAGCTTGTCGATCAT +CTTCCAGCGCGGATCGGCCGGATATTTGGCTTCCTGGGCCTGCTGCTCCCGGATCAGTTT +CTTCACCAGCTTGGTGTCGCCCTTGTTCCAGGCGGCAATCACGTGCAGCACGTAGGCCAG +GCGAACCCGTGCCTTGTTGGCCTTCTTGTCGGACTCGTCCAGACGCTGGAAGGGGTTCTC +GCCATCGGGCTTGGCGCCGGGAATCATGGACCACACGGTGGCGCGCAGTGCCATGGGCAC +CCCCCACCATTTGTCGTCACTCAGGCAGCCGCTGGCCTGGGCCACGGTGGAACCGATGTT +CTTGGGCACGCCCTCGTCGGAGGTGGACTGAATTTCGGCCGCCAGTGCCTGAACGCCGGC +CAGCAGGCCTGCCATATAGATAAACTGGCCTTCGTCATTCTTGAAGGTCGGGCATTGACC +GGTGGAAGGGTCACCGTAGTAGGCAACCAGGTGTTTCCAGGCGCTGTAGTAGCGATCAGC +CGCCTCGATGTAGTGGCGCTTCTCGACGATCATGTCGTCTTCGGCGTTTTGCGGATGCTG +GTTACGCAACTCCTTCATGTAGGCGAGCTCGGCGTTCCAGCCCTTCTCCTCGGCGCAGGT +GCCGGCGGACATCTGCATCATCACGCCCAACTGATCCGGCTTGGCGGTCACCTGGCCAAA +GGACATCAGCAAAGGCGTCAGAGCCTCGCTCATGGCACAGCTCATGCCCAGGTCATCACT +GGACAGCACATAGGGGACCGCATGGGCCTTGGCATAACCCACCATGACCTCACCCGTGGT +CTTGTACATCATGTTGTTGACCGCACTACAGCCGCCGAGACTGAACAGCAACAACAGGGA +AAGGGCCTTGACCGCGCTACGCCCGTGAGAGAACAAAAATGCTGAGTGAGAGAGCCACCG +CATGCTTACGTCATCCCTTATTTTTGTGTTTGCGTGTTTTTGTCCGGCAGCTCAATAGGA +CCACCGAAACCACTGTTACATTAAGTAACTGTTGGTAACGCGTCGCATTATCTACGAGTT +ACCAACAAAAATGTGAGACAGGCCCCACAAAGCGCTATTATCAAAAGAGGCTCAAAGCGC +CTTTATATCAACGTGTTCAATCACAATGACCCATCCCAAAACCACGATGGAGGCAGACCT +TGGAGAACAGCACCCAAAATGCCCTGCATAAGCCGGAACTCGCCGGGAAAATCGCCGACC +AGACCCAGCTGACCCGTGCCCAGGCCCACGAGGTCATTACCGCTTTTACGGATCAGGTGT +CCGCCGCCATGGCCCGTGGGGAAACCGTTGCGCTTGCCGGTTTCGGTTCCTTCAATGTGC +GCGAACGCCAGGCCCGTACCGGTCGTAATCCACGTACCGGGGAGGCGCTGCAGATTCCGG +CGCACAAAACCGTTGGTTTTCGTCCCGGCAAAGCCTTCAGGGAAGCCATCGAATAGGCTT +CAGGGTGAGCCACACCCCTGTTTGGCCCCAAATGAATGGTGACTCAAAGTATCGGGGAGG +CGGGTGGGAAGCGTTGGCATCAATGCCGCGAAAAACCGGCAGTGCGGCAAACCTCGTTAA +CCGGCCGGCAATACGTTCTGGCGTGACTGCAATAACGGCGGGCGATTAAGAGGAATCAGT +TCAGGTCGGCAAGGAAAACCGGCCCGATAACTCGGGCCGGCGTATTTTTTGAAAGTGTGA +AGACGGTCAACCGTGGGAAGACTGTCACTCGCCTTCTTTTTTCTCTTTTTCGTCTTCTTT +CTTGCTGCGCGAGGATGACGAACGCCGGGCGGGCTTGGCGGTCTCACCAGACTGGGCCTT +GAGTGCCTCACTCAACTCTTCCATGCGCTTCTCCAATGCAATGATTTCGTATTGCACTGC +CGTCTGCTTGTCCAGGGCATCCGCCGTGTTGCGGGAAATCGTCCACAGCAGCCACAGGGC +GACCAGCAGAGCCACAAAGATCAGGAAACTCATCAAACTCTCTCCTCCAAGCCTGTTGGT +TGAAAAGGTAGCGCAAAAAACGACGGAGCGCTCCCCAAAACGGCGCAAAAATCACCATGC +CCCATCGTCGCCGCCGCGGCAAGCTTCAGTTGCTCCGACTAAAGACCGAATTCCGTCTGT +GGATCGGCATAAGCCACCTTTCCACGGCTCAAAGTGAGCATGACACGGCCAGGTAGCGGT +TGATCCAGCACCGGTGCATGCTTGCCAGCTGAAAGCAGCGTTTCGGGTGCCGGTATCCAC +TGCCCGTTGGGATTGAAGAGCGTGAGATCCGCACGGCTCCCCGCTTCCAGGCGCGGCTCG +GGCAAATTCAGCACCTGCGCCGGGCCCAGCGTCAAAGCCTGCAGGAAACGCGGGAGTTCC +AGCTCTCCGGCATTGACCAGCTTCAGCCCCAACCCCAACAGGCTCTCGACCGTTGAAAGG +CCCGGCTCGGTTTCCCCAAAGGGAGCCTGCTTGGCAGCCGGATCATGGGGCTGGTGCTGG +CTCACGATAGCGTCGATCCAACCTTCGTTGACCGCCGCCAGCAAACCCTGCCGGTCTGCT +TCGGTCCGTAAGGGTGGACGGCAATGAAAGCGGCTGTCGAAGTCAGCCAACCAGCCTTCA +TTAAACGCGAGCTGCTGAATGGCCACATCGGCCGTAACGGGCACACCGCGCTCGCGCGCG +GAACGCAGCAGCGCGACGGATCGGGCAGCGGAAAGCTGGCTCAGGTGGAGCCGGACACCG +GTATCCTCTGCCAGCTGGATCAATTCGCCCACTGCGATGGTTTCAGCGATTTCCGGGATT +CCGGGCAAACCCAGCCGCGTTGCCACCACACCTTCATGGGCATAACCATCGGCCGCGAGA +TCAGGATTTTCCGGACGCATAAACAGCGTCAGTCCGAAGGTTTTGGCGTAAGCCATGCAA +CGGCGCAGCACCCGGGCGTTGCCACGGGACAGCGAGCCATTACTCAAGGCGACACAGCCG +GCGCGCGCCAGGGAGGCCATATCGCTTAACAGTTCCCCCTGCAATCCGCGGGTCAGAGCG +CCCACAGGCAGCACCCGCACGCAGCCCTGCTTGCGTGCCCGCTCCAGTATCAGCGTGGTG +ACGGCACCGGAATCATTGACCGGCGAGGATTCCGGCGAGGCGCAAACCGTCGTGAACCCG +CCATGAGCGGCAGCGCGGGTTTCACTGGCGAGCGTCCCTTTCTGGTCATCACCGGGCTCA +CGCACGAAGCAGCACAGGTCGATGAAACCGGGACTGATCCAGGCCCCGTCCGCATCGAAG +ACCCGATCTGCGGCCTGATCCAATACCTGCTCACCCACAGCGACGATCTTCTGGTCTTCG +ATCAGGACGGCACCGGACCGGACCTGCCCCGTACGGGAATCCAACAGCTGACCGTTTTGA +ATCAGTAGTTTCATAGTGTCCTGCTTTGCTCCTGGGCGGCATCACCTTGATTCAGCTGGG +CCAGTTGACCACTCATGACCATCGACATCACTGCCATGCGCACCGCAATGCCGTTGGTCA +CCTGGTTCAGAATCACTGACCGGGGACTGTCCGCCACGGCGGACTCAATTTCCACCCCGC +GGTTGATCGGCCCCGGATGCATGACAATCGCGTCGGGTTTGGCGTAGCGCAGCTTTTCCG +TGGTCAGGCCGTAGAGACGGTAGAATTCCCGCTCTCCGGGCAATAGCGCGCCCTCCATCC +GCTCATTCTGCAGCCGCAACATGATCACTACATCCACGTCCTTCAGCCCGCGCTGCATAT +CGTTGAAGACTTTGACGCCCAGGGACTCCACATCTCTTGGTAGCAGGGTATTGGGGCCGA +TCACCCGGATTTCGTCAGCCCCCAGGATTTCCAGGGCCCGAATCTGGGAACGGGCCACAC +GGGAATGCAGAATATCACCGACAATCGCTACCACCCGGCCCTCGAAACCGCCTTTGTGCT +GCCGGATCGTCAGCATGTCCAGCATGGCCTGGGTGGGATGGGCATGACGGCCGTCACCGG +CATTGACGATGCCCACACCGGGCGTAACGTGGCGCGCGATAAAATGCGGCGCGCCGCTCT +GGGCATGACGCACGACAAACATATCGCTGGCCATGGCTTCCAGATTCAACAGGGTGTCCG +AGAGCGACTCGCCCTTGGAGGCGGCGGAGGTACTGATATTGAGGTTGAGTACGTCAGCGG +ACAGTCGTTTGGCCGCCAGCTCGAAGGTACTGCGGGTGCGGGTACTGGCCTCGAAAAACA +GATTGACCACCGTACGCCCACGCAACAGGGGAACTTTCTTGATGCGACGCTCGCCCACTT +CGATAAAGGAGTCGGCGGTATCGAGAATTTCAGTCAGTAGCGGACGGTCCAGGCCATCCA +GGGTAAGAAAGTGCCGCAGGCTTCCAGCGGCATTCAACTGCAGCTGCCGGGCAGCTTCGC +AGGTATCAATCATTCCAGGCAACCTCTCTGGGGAGCCCCCTGATCAGTGACTTTTGGTTG +AAGCATCCTTGGCTGGGTTTTCCCGCACATTTTCCCGCAACTCTACACGCAACGGATCCG +GGCCCATCAGCTTGACCCGCTGGTCACGCTGAAGCTGCATTTGCTGACCCACGATATCCG +GCTGGATCGGCAGTTCCCGCGCGCCCAGATCAAGCAGAGTCACCAGAATGATGCTGGCGG +GGCGGCCATAGTCGAACAGCTCGTTCATGGCTGCACGGATGGTGCGCCCGCTCATAATCA +CATCGTCCACCAGAATGATGTGGCGGTCTTCGGTGGCAAACGGCAGGTTCGAGGGTTTGA +CCCGGGGGTTCAGGCCGATGCGGCTGAAATCGTCGCGATAGAAGGAGATATCCAGTTCGC +CGAGGGGTTCTTCCAGGCGCAGACGCTTGTGCAGATAATCCGCCAGCCAGACACCACCGG +TCCGGATACCTGTCAACACCGGATTAACGGCGCCGCGCGCTTCCAGGGTTTGCTCGAGCT +GCGAACACATGGTTTCCAGCAGGCGCTCGACATTGATCTGGGCAGTCATGGATACACCTC +GGTTCAGGGCGTGGGTATCTTAGGCTGCTGGTCCCGTTGCTGCAAGAACCAGCCCTCCAG +GATCAGCACCGCCGCCAGGTCATCGACCCCCTTCACACCGAAATCCCGTCCACCTTCGCG +TTCCACGATGGCCGATTTGGCGGCGAAACTGGTCAGTCGTTCGTCCATCATGGCGTAATC +GCGGTGATACATGCCATGCAGGCGGCGGGCAAACTTGCGCGCCCGGCGACACATTTCGCT +TTCCGAGCCATCCATGTTGAGCGGCAGGCCCACCACGAAGAAATCCGGCTTCCAGTCCGC +GATCAACGCTTCCACTTCATCCCAGCGCGGGATCCCGTCACGAGCGCCGATCATGGTGAC +GGGACTGGCGGTGCCCAGCATTTCCTGACCAACGGCGACACCAATCCGGCGCAAGCCGAA +ATCGAATGCCATGACCTGGCGCTGACCTTCGGGTTTCAAATCAGGCATGGCCCACGCTAT +CGGACAGCTGGCTCCAGTCAATACCCAACCGTGCCAGGGCCGCGGGCAGCCGCTGCTGCC +ATGGAACGTTGAACAGGATGTCCGGGTCGGCGGGACAGGTCAGCCAGAAATTGCCGGCCA +GCTCCTGCTCCAGCTGACCCGCGCCCCAGCCGGCATAGCCCAGGGCAACCAGATAGTCTT +CAGGCCCGGTTTCCTCGGCAATGGCCTCCAGCACATCCCTGGAGGTGGTGACGGAAAGAT +CGCCACTGATCTCAAGCGTACTCTGCCACTCGCCCCGGGACCGATGTAAGACGAACCCCC +GCTCTACCTTGACCGGCCCGCCGGAATACACCGGCACATCGAGGTGGCCGCATTCGATAT +CGAGCTGTTCCAGGATCTCACCCAGGGAGAAATCCAGGGGTCGGTTCACCACCAGCCCAA +GGGCCCCCTGATCGTTGTGATCACAGATATAGGTCAGGGTCCCATTGAAATTGGGATCCT +GCATGTAAGGCATGGCAATCAGGAACTGATTGCGGATGTAGGGATTCGCTTCGCTCATGC +GGGTAGTATTCGCTGAGCAAACGGCTTTGGCAAGGCTATAAAGCCGGATCAGCCGTTGGT +GGACAATCCCCGCGGCTCAAACGACCAGTCACGTATGATTTCCAGTACATCCTTGCGCTT +CCGCAGCTCCGGCGGGAAGGGCGCAAACGGCGCGGCCAGCCGGACGATGCGCATCGCGGC +CTGATCCAGCACTTTATAACCGGATGACTGCAGGACCTGAATATCCTTGACGTGACCATT +GGGCAGGATATCCACCAGCAGCCGCAAGGTTCCATGGATACCACGCCTCCGGGCCGCTTC +AGGGTAGTTGAGGTTGCCGATTCGGGTCACCTTGTTGACCCAGGCCTGAACGTACCAGGC +ATCGGTGGATTTGAGTGTGGAAGCGGCGGTCACGCGCAATACCCTTGGCCGCTTGGCATA +ACGCTGGGTTTCCTGATCCAGCTTGGCCTCAAGGCTGGCGATCTCCAGCGCCCGGGACAT +CAGACTTTTATGATTGTGTTTCTGATGCGCCTTCTGTTCCTTGTCGTCGGAATCCACCTG +CTGCGTCGCCTGTTGGGAGACGGTCGTCACCACATGGCGCTTTTCCGGCTTGGCCTGGCT +GTGCTGGGGCTGGGAACGGGGAATCGGCTTCACCTTGTGCACCTGCGAAGCGTTGATCGG +CGCCCGGTGCCGTGTGGTGATCTGGGCTTTCTTGCTCAGAGTGCCACTGCCCTTCTGATT +GGTCTGGGCCAGGTAATCGGCATGTTTCGGCGCTTTCTGGTCATCCCGCTGGGCCAGGGT +GATATCCAGGGTACGGGCACTGGGTTGGGGCTTGTGCGCGGTGAACGTCACACCCAACAC +CACGATGGCGTGCAAAACCAGGGCCAGGAAGAGAGTAAAAGTCAGCCGATCGGTATCACT +GACCCGAACTGCGGATGCCGCCATAAACGCTTTTCGTAACAGGGCTTAATCTTAAATGGA +CAGGCCGGAACCCGGTCCTCGTCGATCCTTTAGCGCCGTTGGCGCCCCATGAAGTTTGAC +TGTAAATGGCCAACCTGTCCATGTCAGCACATTACAGTTGCGTTCACATTTTGTTCAGCG +CCGGGCCAAACGGTCAGCAATCACCTGCATCAGCTGGTCGGAGATATCCAGATCGTACTC +CCGATCCAGTTCCCTTACGCAGGTGGGGCTGGTGACGTTGATTTCCGTCAAATAATCACC +GATCACATCCAGTCCCACGAAAAGCAGGCCCTTTTCGCGAACCATCGGCTGCACCTGCTC +CACGATCCAGCGGTCCCGGTCAGTCAGTAATCGCCCCTCGCCACGACCACCTGCGGCGAG +GTTTCCGCGGTGTTCACCCTGGGAGGGAATACGAGCCAGTGCATAGGGCACCGGTTCGCC +GTCAACCAGCAGGATACGTTTGTCACCCTGGGTGATTTCAGGCAGGTATTTCTGGGCCAT +GATCTGGTGGCTGCCGTCTTTGGTCAGGGTTTCGATGATCACACCCAGGTTGTCGCCGTC +CTCGCGCACCCGGAAAATCGAACGACCACCCATTTCATCGAGCGGCTTCAACACCACATC +ACCGTGTTCGGCATGAAAGGCCCGCAACCGGGCTGCATCGCGGCTGACCACCAGCGGCGG +CGTGCATTGAGGGAACCAGGTGGCAAACAGCTTTTCGTTGCAGTCTCGCAGGGCCTGGGG +TGGATTCACCACCAGTGTGCCCAGCCGCTCGGCGGCCTCCAGCATCCAGGTGGTTACCAG +GAACTCCTTGTCCACGGGTGGGTCCTTGCGCATCAGGATGACATCCAGCGAGGCCAGCGC +GCGGTCTGACGGTTCACCCAGCGAGTACCAGTCGTCCGGGTTGGCCGCCACGGTGAGTGC +CTGGGTCCGCGCGCGGGGCTCACCGTCGCGCAGGTAAAGATCCTGCATCTCCATATAATG +CAGTTCCCAGCCGCGGCGCTGGGCCGCCAGCAACATAGCCAGGGACGTGTCCTTTTTGTA +ATGGATCGCCCCGATGGGGTCCATGACAATACCGAGTTTCACACTCATGAGCTGAAGTTT +CCTCTACCAGGCGGGGTACGACGCAATCGCCGCTGCATTATCGGGCGGTCGCAGCCTTTC +AAATAGCTGCTATGCTGTGAGAATGAGCGGTTTTGTGCAAGCGGTGTTTTGTCCCGCTAT +ACGCTGCCGCCATGAGGATGGATTCGGTGGGCTTGGACGCCTGACTGCAGGCAGGACGAG +CGCGGCGCGGTCCGCCAGGCGGTCAGGTGACGGGACGCGGTGTCGATCACATTTAGACAC +AAACAAAATCTTTTGCGGTCCAAATCCATACACTATAACTAGTAAAGGCTTATAGAACAG +CTTCCGAATCTGTGTTAAAAAGGCGCGTCAACGTGCCAGCCGCGATAGGAAAAGGCCTCA +AGGAACAGACGGCATCAGTCGTTTTACGCGCATAGCGGAGCGCGTCAGATCCCTGCGATA +GCGTCGATCCGGTGCCCGACAAGGTGCCATCGCGAGCGACCCGCGGCAGGCGGCGAACGA +CTGCTGAGGCGGGCTGTCCCAGGGGCCCATTAACAACGAATCCCCGTTATGCGCAAGGCT +GTTGGACAATGGAAGACAACTTCGAAAATCTCAAAATCATGGTGATCGACGATAGTAAAA +CCATTCGTCGCACCGCGGAAACACTCCTGAAAAAAGTGGGGTGTGAAGTGATTACGGCAA +CGGACGGATTCGATGCCCTTGCCAAGATTGCAGACTCCCATCCAGACATCATTTTCGTCG +ACATCATGATGCCGCGGCTGGATGGCTACCAAACCTGCGCCCTGATCAAGAACAATTCAG +CGTTCAAGAGCACGCCAGTGATCATGCTGTCCAGTAAGGATGGTCTTTTTGACAAGGCCA +AGGGGCGTATCGTCGGGTCGGATCAGTATCTGACCAAACCCTTTAGCAAGGACGAATTGC +TCAACACGATTCGTCAGCACATACCCAGTCGGGAAAGCACTTGATTGCAATACGCTCAAG +ACGCCTGAAACGTACCGAGGATCCCATGGCTCGCATTCTTATCGTTGATGACTCACCTAC +GGAAGTGAAAAAGATTTCTTCGCTGCTGGAGAAGCACAATCACGAGGTGCTGACCGCAGA +CAACGGCGCAGACGGTGTCGCCAAGGCCCGTGCCGAAAGTCCGGACCTGGTGTTGATGGA +CGTGGTCATGCCCGGATTGAACGGCTTCCAGGCAACTCGCCAACTGACCCGCTCTCCGGA +TACCGCGGATATTCCCGTGGTCATCGTCACCACCAAGGATCAGGAAACCGACCGCGTCTG +GGGAACCCGCCAGGGCGCCAAGGGCTATCTGGTGAAGCCGGTCAAGGAAGATGAACTGAT +CAAGACGATTGACGACCTGCTTGCATAACGGGTCGCCCCTGTAACCGACGGAGTACCGCA +TGTCCGCCCAGGCAGCTCCTTTCGCCGTTCTTTCGGATATCGCCACGCGCAGCCGGTCGC +AATCCCGGGGGCTGCCCGCCCAGGAAGAAGCCGTCGAGCTGTGGAACGGGATCGGGTTCA +GCCTTGCCGGGCAACTCTACGTGGCACCCATGGGCGAGGTGGTGGAAATCCTGCACCTGC +CACGCTATACCCAGGTTCCCGGTGTCCGCGCGTTCATGGTCGGAGTTTCCAACGTTCGCG +GCCGTCTCCTGCCGCTGGTGGACCTGGGGCTTTTCCTGGATTTTCCCCGTTCCGTGGTCT +AGGTGTCGTTGTACGTGGGATCCCCGGGTACCGAGCTCGAATTCGCCCTATAGTGAGTCG +TATTACAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACC +CAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCC +CGCACCGATCGCCCTTCCCAACAGTTGCGCAGCTGAATGGCGAATGGCGCCTGATGCGGT +ATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAA +TCTGCTCTGATGCCGCATAGTTAAGCCAGCCCCGACACCCGCCAACACCCGCTGACGCGA +ACCCCTTGCGGCCGCATCGAATATAACTTCGTATAATGTATGCTATACGAAGTTATTAGC +GATGAGCTCGGACTTCCATTGTTCATTCCACGGACAAAAACAGAGAAAGGAAACGACAGA +GGCCAAAAAGCTCGCTTTCAGCACCTGTCGTTTCCTTTCTTTTCAGAGGGTATTTTAAAT +AAAAACATTAAGTTATGACGAAGAAGAACGGAAACGCCTTAAACCGGAAAATTTTCATAA +ATAGCGAAAACCCGCGAGGTCGCCGCCCCGTAACCTGTCGGATCACCGGAAAGGACCCGT +AAAGTGATAATGATTATCATCTACATATCACAACGTGCGTGGAGGCCATCAAACCACGTC +AAATAATCAATTATGACGCAGGTATCGTATTAATTGATCTGCATCAACTTAACGTAAAAG +CAACTTCAGACAATACAAATCAGCGACACTGAATACGGGGCAACCTCATGTCCGAGCTCG +CGAGCTCGTCGACAGCGACACACTTGCATCGGATGCAGCCCGGTTAACGTGCCGGCACGG +CCTGGGTAACCAGGTATTTTGTCCACATAACCGTGCGCAAAATGTTGTGGATAAGCAGGA +CNCAGCAGCAATCCACAGCAGGCATACAACCGCACACCGAGGTTACTCCGTTCTACAGGT +TACGACGACATGTCAATACTTGCCCTTGACAGGCATTGATGGAATCGTAGTCTCACGCTG +ATAGTCTGATCGACAATACAAGTGGGACCGTGGTCCCAGACCGATAATCAGACCGACAAC +ACGAGTGGGATCGTGGTCCCAGACTAATAATCAGACCGACGATACGAGTGGGACCGTGGT +CCCAGACTAATAATCAGACCGACGATACGAGTGGG +>NODE_42_length_1523_cov_23.4108_ID_83 +AAATCTCTTTGGAGAACATTAGGGGCGGTTTGCCCTACCTCTCCTCTGTAAGAGCGGTAT +CGCTTGACCTTAATCGCTGCTTTAAGTGAGAGGGTTCCCATCAGGCGCTGAACAGCTTTA +TGGTTAATCTGTTTCCCTTCTCGATGAAGAGACAGCGTTACCCTACGGTATCCGTATCGG +CCTCTATTCTCGTGATAAATCTCACTAATACGCTTTTTAACGTCCGCATACTTGTCAGGC +TTGCTGAGAGCCTTTAGATGATAATAAAACGTACTGCGCGGTATCTCCGCAGCCCTGAGA +AGCTCATCAAGAGGATAAAACTGCCTTAGCTCGTTGAGTACTTTCACTTTTTCGTGGGAT +GAGCTAAGGCTTTCAGCTTTTTTAGATACATAAGCCGCGTTTCAAGAAATCGAACTTGCC +TTTCAAGATCCTCAATGCGTCGGTCTTTTGACAGCTCCAATGCTGATGCCGCTTTTTCTG +GATCAACTGATATTGCAATGTTTCTTTTGGTGCCAATCTTGAGCGCGCGTAAACCAGCTT +CTCCGCGCTCTTCATAGACCTTCAGCCACCTGGCTACAGAACCACTACCAGCAAGCATAA +AGTGAGCAGCAGCCTGATTAAGGGACATGTGCTGCTCGATCACAGCTTTCACGACCTTAA +TACGCAACTCTGGATCAGCACTAACGCCTTTAGGTTTGGGAATTAAACCTTTTTCTCCAT +GTTTTTCATAGAGGGCAACCCATGTCCTGACCTGGGTTCGGGGGACACCAAAACGTGCCG +AGATGATCCTGTAACCATCATCAGTTGTGAAGTAGTGATTCACGACTTCAAGGCGCTTTT +CAAAAGGGTATTTTGGCTTTGACATATTAGGGGCTATTCCATTTCATCGTCCAACAAAAT +GGGTGCAGTACACCTTGTACTGACCCCAAAAAGTTGGACAGTTAAACACGAGGCATATAG +GTCTGATTCCGATATTCAATTGGAGTCAGACCTTTTAATTTCAGGCTAATTCTTCTGCTG +TTGTAGTATTCAATATATTCCGTAACAGCATCCTTCAGTTCGCTTATATTACTGAACTCA +TCAAGATAAAAACACTCCGACTTTAAGGTTCCAAAGAAACACTCCACCACAGCATTATCC +AGACAATTGCCTTTTCTGGACATGCTTTGTTTAATACCATGTTCTTTAAGGATATTTTGA +TATCTTCTCATACGATACTGCCATCCCTGGTCAGAGTGCAGAACAGGATGCTCGTGAGGA +TTAAGCTTTTTGAATGCCTGATCGAGCATATTCTCAACCATGTTCATCACTGGTCTTTCC +GAAAGGCTGTAAGAAATAACTTCGTTGTTGAAGAGATCTATTACTGGAGACAAATACAGC +TTGCGCCCATTGACTGCAAATTCAGTAACATCGGTAACCCACTTCTCGTTTGGCCGCGTA +GCCTTGAAATCTCTTTGGAGAACATTAGGGGCGGTTTGCCCTACCTCTCCTCTGTAAGAG +CGGTATCGCTTGACCTTAATCGC +>NODE_4_length_32829_cov_185.347_ID_7 +CCCACTCGTATCGTCGGTCTGATTATTAGTCTGGGACCACGGTCCCACTCGTATCGTCGG +TCTGATTATTAGTCTGGGACCACGATCCCACTCGTGTTGTCGGTCTGATTATCGGTCTGG +GACCACGGTCCCACTTGTATTGTCGATCAGACTATCAGCGTGAGACTACGATTCCATCAA +TGCCTGTCAAGGGCAAGTATTGACATGTCGTCGTAACCTGTAGAACGGAGTAACCTCGGT +GTGCGGTTGTATGCCTGCTGTGGATTGCTGCTGTGTCCTGCTTATCCACAACATTTTGCG +CACGGTTATGTGGACAAAATACCTGGTTACCCAGGCCGTGCCGGCACGTTAACCGGGCTG +CATCCGATGCAAGTGTGTCGCTGTCGACGAGCTCGCGAGCTCGGACATGAGGTTGCCCCG +TATTCAGTGTCGCTGATTTGTATTGTCTGAAGTTGCTTTTACGTTAAGTTGATGCAGATC +AATTAATACGATACCTGCGTCATAATTGATTATTTGACGTGGTTTGATGGCCTCCACGCA +CGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTTTCCGGTGATCCGACA +GGTTACGGGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGC +GTTTCCGTTCTTCTTCGTCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAA +GGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTT +GTCCGTGGAATGAACAATGGAAGTCCGAGCTCATCGCTAATAACTTCGTATAGCATACAT +TATACGAAGTTATATTCGATGCGGCCGCAAGGGGTTCGCGTCAGCGGGTGTTGGCGGGTG +TCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCG +GTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATT +CAGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTG +GCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCA +CGACGTTGTAAAACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAG +CTCGGTACCCGGGGATCCCACGTACAACGACACCTAGACCACCTATGTCTTCGGCTCCAG +CCCTATCACCACACCCTCGCTGCTGGACGGTGTCTGGCGGCTGGATGGCGTAGCCATTCC +CTACAGCCAGATCCTGATTGGCGTGATCGCCTGGGTCGTATTGCTGGCACTGTGGCTATT +CGTCACCCGTTCGCGCCCCGGTAAGGCCATGCTGGCCGCCTCCATGAGCCGGACCGGTCT +GGCTCTGGTGGGTTACGACATCGGCAAGGTCTATCTGCAGGTATGGGGACTTTACGGCTT +ACTGGCCGGTATCGCGGGTGTACTACTGGCGTCGTTTACCGGCGCCAGTGCGTCGATTGC +CATCTCGCTCACGGTCAACGCTTTCATCATCGTCGTGCTGGGGGGCCTGGGTAATGTCGC +GGGCTCACTGGGTGCCGCCTACATCATCGGCCTGCTGGGCACGCTGACGGCTTATCTGAT +TTCGCCGTCCGTGCGCGAAATCCCCGGGCTGCTGGTGTTGATACTGATTCTTTACGTACG +GCCCCAGGGCCTGTTCGGCCGGCATTGAGGAGGCATCATGGCATCGCGGATTCATTGGGA +TTGGCGCGCCGGTCTGGTTTTGCTGGCGCTGATCGTGCTGGCCTTTCTGCCTTTCGGGGT +ATCCGGCTATATTCTGGGCGTCATGACCGTGGCGTTTTACCTGGCGGTCTATGCCATGTC +CTGGGACCTGCTGTTCGGCTATGCCGGCGAGGTCAACTTCGGTCCCACCTTTCTGGTGGG +TCTGGGCGCCTATGGCGCCGGGCTCAGTAACAGCGTATTCAACATCAGCGTCTGGCCCAG +CGTGGCCATCGGCACCCTGGCAGCCGTAATCGGCGGGCTGGTGCTCGCCGGTCCCGCATT +GCGACTGCGCGGACCGTACTTCGGCCTGGTGACGCTGGTCGCTGTCATACTTCTGGAAAA +GGTCATTGGCCTGCTCTCCAGCTATACCGGTGGTGAAATCGGGCTGACGGTCATGGATGT +GCTCACCATCAGCCAGAGTGGCAACTACTACTATGCCTTTGGTTTCATGGTAATTTCCGC +CGTCATCCTGCGCATTATCGCCCGCTCTTCCATTGGCCTGATTCTGGAAGCCAGCGGCCA +GGACCCGGTCGCTACCGAAGCGCTGGGCTTTAACGTCACCAAGTTCAAGTTCATGGCATT +CACCCTGTCCGCGTTCTTTTCCGGACTGGCCGGCGCGCTGACGGTGTTCTACCTGGGTTC +GGCCTCACCGGGCACGGTCGTCTCCGTGTTTGTCACCATCCAGATCATTATCGCCACCCT +GGTGGGGGGCCGGCGCTCGATCATCGGGCCGATTCTGGGGGCGGTGTTCCTGATCGCCGC +CGGCGAGATACTCCGCCCGCTCGGACAACTGAGTAATGCCGTGGTGGCGTTGATTGCGCT +GCTCGTGGTGCTGTTCGCACCCAACGGCTTTATTGGCCTGTTTTCCCGCACGGGAGGTGC +CCGATGAGCGTATTAAAGGCACAAGGACTCTATAAGCGTTTTGGTGGGTTACAGGCGGTC +AACAACGTGTCGTTTTCGGTGGACCGCGGGGAAGTGCTGGGGTTGATCGGCCCCAACGGC +TCGGGTAAATCCACAACGCTTTCCCTGCTGATGGGCGTCACCCGGCCTGATCGTGGCTCC +GTCCAGCTGGACGGGCAGGAAATGGCGGGCTGGCGCACGCACCGGATTGCCAAGCAGGGC +CTGTCGATGGTGTTTCAGCACTCCCGGCCCCTGCACCGCCAGACCGTGCTGGAAAACATC +AAGCTGGCGCTGCTGCCGGACACTCTCTGGCAACTGTTTCCGCCGCATACGCTGGACCGG +CGGGCCCGGGAAATCGCCGAGCGGGTTGGTCTGCATAATGTCATCGACACCCTGCCAGGC +AACCTGCCCTTCGCCGACCTGCGAAGACTGGAGATCGCCAAAGCCCTGGCCCAGGACCCG +TCTGTACTGTTGCTGGACGAACCCTTCGCCGGCCTCTCCCCCAGGGAAACCCGGGAGTTC +GCCGAGTTGGTGCACCTGTTCCGGGAAGAAGGCCGGGCTGTGATCCTGGTGGATCATAAC +GTCAAGGAAGTGGCGGGACTGGTGGATCGTATTGTGGCCATGCACGCCGGGCAGGTGATT +GCCGAAGGAACGCCGGATGAGGTCACCCGCGACCCGAAAGTCCGCGAAGTCTATTTCGGT +CAGTCGCTGGAAAATGCCAGCGGTATTCACGCCGACGGCGATCGGCGCTCCGAAGGCAAT +GGCAGTGAAGCCTTGCTGGAGATCGATCTGCGGTCGGTCCGTTACGGCCTGGCCGAGGCC +CTGCGGGATATACAGATACAGATCAATCAGGGGGAGTGTGTTTCCGTGGTCGGCATCAAC +GGGGCCGGCAAGACCACCCTGTTCAAATCCATACTCGATTTCCAGGGTTATGAGGGCGAT +GTCCGCTGGCAGGGCACCTCACTGACCGGCCAGGGTCCGGGCCAGGTCGCCTCCCAGGGG +ATTGCCCTGTGCCCGGAAAGCCGCGAGCTGTTTGGTTTTATGACTGTGCGCGAAAACCTG +GAGCTGGGTGGCCACAAGCTGGACCGGCAAGCGCACGAAAGCCAGATGGATCGGGTTTTC +GACCTGTTCCCGGTATTGCGCCAACGCCAGGCGCAGGCCGCCTACACGCTCAGCGGGGGC +GAGCAACAGCAGCTCACCATTGGCCGGGCCCTGATGCAGCAACCCAAACTGCTGATTCTG +GACGAACCCACACTGGGGCTGGCGCCGCTGGTGATCGAGAACATCTCCGAAGCCCTGCAC +AAGTTACAGCAGGACAGTGGCATGACCCTGCTGCTGGGCGAACAGAACCTGACGTTCGCA +TTACGCCATTCGCAGCGTATCTATCTGCTGGAAACCGGTAATCTGCGCTGGCATGGACCG +GCGGAACGGTTTATCGAGGAGGTGGGCGAGGATGTTCTGTAACGGCAATCAGGGGTACGA +TTTTTCCATCAATTCACATCCTGGCCATGCCCATCAGATGCAACGGAGGAGCCATGCAAC +GAACAGCCAGAGCGGCCCTGTGCCGTGAATGGAACGGCCCGATCCAGGTGGAGACGATTC +GTGTCGACCCACCCCGCCGTAACGAAATCACCATCAAGCTGCGGGCGTGCGGCGTCTGCC +ACAGCGACCTGTCCGCCGCTACTGGAGTTATTCCCTTCCCACCGCCGTTGGTGCTGGGGC +ATGAGGGGGCCGGCACCGTAATTGCGGTGGGCGAAGGCGTGACGGATTTCCAGGAAGGCG +ACCATGTGGTCAGCTCGTTCATCTATATGTGCGGCAAATGCCGCCAGTGCAGCCGCGGCC +GCCCAGTCCTCTGCGAGCAGGCCCACAAGGCACTGCACCATCTGCCCGATGGCACGGTGC +GCACCCATGATGGCGACGGCAATCCGCTCAATGTCTTCGGCGCCTGTGGTGTGATGGCCG +AATATGCCACGCTGCATGTGAACAATGCCGTGAAGATTGACCCCGACGTGCCCCTGGAAC +GGGCCGCGCTGGTGGGGTGTGCGGTGATGACCGGCGCTGGCAGCGTTTTCAATACCGCGC +AGCTGGAACCGGGGTCCACGGCGGCGGTATTTGGTGTCGGCGGCGTGGGCCTGAATGCCA +TCCAGGGCTGCGCCATTGCCGGGGCCCGGGTGATCGTGGCGGTAGACACCAACGAGGAGA +AACTGGCGATGGCCCGTCAGTTTGGCGCCACGCACACGGTCAACGCCCGTGAGCACGACG +ATGCCGGCAAGGCGGTAAAGAAAATGACTGGCGGGGGGGATTATGCCTTCGAGTGTGTGG +GATCCGGCGTCACCGTCGCACAGGCCTACGGCTCGCTGGGCCGGGGCGGAACCGCCGTGG +TGGTCGGCGTTGCGGACGTTAAGGATAAGACCACGTTTCGCACCTTGTCATTGCCCGCGG +ACGAGCGCACTCTCAAAGGCAGCTGGCTGGGCTCGGCCCGGCCCCAATTCGATTTTCCCC +GTCTGCTGGGCCTGTATCAGGGCGGCCGGCTCAAGCTCGATGAGCTGGTCACCCATACCT +ACACCATCGACGAGGCGCCGCAGGCCTTCGAGGACCTCAAGGCCGGTCGCAATGCGCGCG +GCGTCATCCTGTTTGACTAGGGAGTTACCATGACGATCAAAAGCTACGACAGTGTCTATT +TCGACGGCCGCTGGCAGCCGGTGGACGGCGAGCGGCTCTCGGTTTACGAATCCGGCACCG +GCGAGGTGATGGCCAGCATTCCCGGCGCCGCACCGGCGGTGATGCAGCAGGCCATTGATG +CCGCCCACAACGCCTTCGACAGTTGGTCGCGTCGACCACTGAAAGAGCGGCTCAAGTACA +TTGAAGCCCTGCACGGCCAGCTGGTGGCCCGCGCCGAAGAAATCGCCACCACCATCAGCC +GTGAAGTGGGCATGCCGCTCAAGCTCTCGCGCAACATCCAGGCCGGCCTGCCCATCGCCA +TTACCGACAGCTACCTGAAACTGCTGCCGGACTTTCCCTTCGAGGAAAAAGTGGGCAGCT +CGCTGGTGCAGTACACACCGGTCGGCGTGGTGGGCTGCATTACCCCCTGGAACTACCCGC +TGCATCAGGTGATCCTGAAAGTGGTACCGGCGCTGGCCGCGGGTTGCACCGTGGTGCTTA +AGCCCAGTGAAGTGTCGCCCCTGAGCGCGTTCATGCTGGCCGAAATGTTCGATGCCATCG +ATCTGCCGCCCGGAGTTTTCAATCTGGTCTCGGGCCTGGGCCACGTGGTGGGCGACAGCC +TGACCGGTAGTAATAAGGTGCGCATGCTGTCCTTCACTGGCTCGCCCGGCACCGGCCGGC +GCATCTTCCATGCCGCCGCCGAGGACTTCAAACGACTGGCACTGGAAATGGGCGGCAAAT +CAGCCTCGGTGATCCTGCCCGACGCCGATCTGGCGACAGCGGTCAAGGGATCGGTCAACA +ACTGCTACCTCAATTCGGGCCAGACCTGCATCGCCTGGACCCGCATGCTGGTCCCCGCCG +ACAAACACGACGAAGCCTGCGAACTGGCCGTTGCCGCCGCCAAAAAACTCACGCTGGGCG +ATCCGCTGGACGAGAACACCCGACTGGGTCCGCTGGCATCCAAAGAACAGCTCGAGCGGG +TGCGCAATTACATCCGGGTAGGTATCGAGGAGGGCGCCAAGCTGATGACCGGCGGCCCGG +ATGCTCCGGCAGGGCTGGACAAGGGTTACTTCGTCGAGCCCACCATCTTCGCCAATGTCG +ACCCCCAGTCGCGTATTGCCCAGGAGGAGATCTTCGGGCCGGTACTGTGCATCATTCCCT +ACCGGGATGAAGAGGAAGCCATCGCCATTGCCAACGGCACGCCTTACGGTTTGTCGGGGG +GAGTCTGGTCGGCGGATCAGGATCACGCAATCGCTGTGGCCAGCCGGCTGCGCACCGGTC +AGGTAACCGTCAATGGTGGCGCTTTCAACCCCGAGGCACCGTTTGGCGGCTTTGGCGCCT +CTGGCCTGGGTCGGGAGTTTGGCCGCTGGGGCCTGGAGGAGTTTCTGGAGGTGCGGTCGT +TGCAGCTTTAAGCTGCAACGCCCTGTCTGAGGTCGCAGGCCTGACGTCGCTTGATCGTTG +GCCTGCGAATATCAGACGTGCGATGCCAGACGTTCAAATCACTTACAGGGCCCTACCCAC +TTACCATCACTGTCCACAACGGTCTTATAGGGGCCATTGGGCGTCTGCATGAGGGCATTC +ATGTGAATGTGATAGGACGTCTTGCTTTGAAGGGTAAACACGCCTTCGGTGCTTGCCTGT +GATTTGCCGGTACAGGTCATTTTCCAGTGGACCTTGTTACCGCTCCGGGAGTATTCGTTC +AGCTTACAGCCGGGTTGCTGGGCCTCTTCAATGATCGATTTGATCGAGCCAGCCTGCTCA +GGCTTTACACAATTGGTGGAAGAGTGAGGCGGAATCTTGACTGACGAGCCGGGAATGGTG +ACATGAATCGTCTGTTTCCACTGACCGGGCTTGATTGCGTCTTCGGCCTGGGCCACCAAC +GGGAAAAGCAACATCAGGGCGACAATGAAACGAAGCATGAGCTCTCCTTACTCTTGTGTC +GACAAATAGGACGGAACCGCAATTATAACGCGGATCGGCCACGAAAGTCCCTTATGACAC +CCCCGGCTCTGCGGGATTTTTCCCAGCCTGCGATCTCAGACTTTCGACCTCAGACGTTAC +CGGTCAGGCGTCTGGTTGGCCAATGCCCGCAAACGCGCTTTCAGAATCTTGCCTGCCGGC +GTCACAGGCAGCTGATCCAGAATATGGATTTCGCTCGGCCGCTTGTAGGCTGAAAGCCGC +GCGGCGGCGAATTCGGACAGCGCCGATTCAGTCATGTCATGGCCCGGCGCAAGCTGCACG +TAAGCGACGACCTCTTCATTGCCCGTAATCTGCCGGCCCACCACGGCAGAGAGCGTCACG +GCCGGATGCTCATTCAAAACGGCCTCCACGTCCGGTGGATAGACGTTGAAACCGGAACGG +ATAATCAATTCCTTGGCCCGCCCGACAATATGCAGCTGACCGTCCGGATCGATACGGGCC +AGATCGCCGGTATCCAGCCAGCCCGCGTCGTCCAGCACCGCCCGGGTCGCTTCCGGTTTG +CGGAAATAACCACGCATGATGGAGGGCCCCCTCACCCGCAGTTCTCCCACCTCGCCCTGC +AGCACCGGCTGACGGCTTTTCAGGTGCACCACCTCGTAGTCCAGTCCCGGCAGGATACGG +CCGACGCTGCTGGCTTCACTGTCTTCGCCGATCCGGGTCTGGCTGATGGTGGGGCTGGCT +TCAGTGAGGCCATAACCGTTATGCAGCGTGGTACCGAAGGTCGCTTCGACGCGGGTCTTG +AGGTCGGTATCCAGGGGCGCGCCACCAGCAGACAGATAGTCCAGCGCCGGTGCGACCAGC +TTCCGGTTGTGCTGATGGAGATACTCCAACGTACGCGCATACATGGCCGGCACCCCCTGC +AGCACCGAAATCCGCTCCCCGGCCAGGGCGTCCAGCAGCGCACCCGCTTCAAACCGGGGC +ACGGTATGCAGGCAGGCGCCGTTGGCCAGTGAGCCCAGCATCACCGAAGACAGGCCAAAT +ACATGGGAACTGGGCAGCACCCCGTAAACATGACGGCCCGGACGCAGATTGCGCAAGCCG +CCGGAGACGCGGGCAATGAACAGAATAGCCCGATGGGTCAGCATCACGCCCTTGGGTTGG +CCGGTGGTGCCGGAGGTGTAGATCATCGCCGCCACCTGGGTGCCATCCTTCACCACCGGT +TGCGGCAGGCTTTGCGCGGCGGCAGGACCTACCGCAAATTCGCCCAGGGGATCCAGGTGG +ACGATATCGGCATCGTGACGTTGCGCATGATTACGGGCATCCGGGGAGACTTCGGTGGTG +TAGATCAACAGGCGCGGGTCGCAGTTGTCCCGGATGGCATCAATCTCGTTATCCGCCAGG +CGGGCATTGATGATGGCGGCCCAGGCGTCCAGCCCGCTGGCCGCCAGTAACAGCGCCACC +AGGGCACGGCCATTCTCACTCACCAGCATGAGGCGGTCACCCGGGCGCACTCCCTGCCCA +ACCAGCCATTGCCGGGCCGCCGCGACCGCCGCAGACAGTTCCCGGTAGGACCAGCGGACC +TGGTGATCTACCAGCGCTTCAGCCTCCGGATCAGCCTCCCAGTGCTGGGCGACGCCCTGA +TGAATGCGGGCAGGCAGCGCGGCGGTGATCTCGCTGGCCCACCGACCATTGATCAGTTCA +TCCGGGGCCCGCATTAATCGGCCTCCCGCACCATATTGCGGGCGATCACCAGCTGCTGAA +TCTGGCTGGTGCCTTCATAGATCCGGAACAGCCGTACATCGCGGTAGAAACGTTCCACCG +CGTACTCGGCCATATAGCCGGCGCCACCGTGAACCTGCACGGCCCGGTCCGCCACGCGGC +CCACCATTTCTGAACAGAACAGCTTGGCGCAGGAAGCCAACGTGCTGACGTTCTCACCGG +CGTCCTTGCGTCGCGCCGCATCCAGTACCATGCAGCGGCCGGCATAGGCTTCGGATTTGC +TGTCCGCCAGCAGGGCCTGGATCAACTGGTGCTCTGCAATAGGTTTGCCGAACTGGGTCC +GCTCCATGGCGAAGTGCAGCGCGTCGTCGAGGACACGCTCAGCGACGCCGACGCAGACAC +CGGAGATATGCAGCCGCCCCCGATCCAGCACCTTCATCGCGGTCTTGAAGCCGACGCCCT +CCCGGCCGCCGATAATGTTTTCCGCCGGCACCCGGCAGTTATCGAAGATGATGTCGCAGG +TATGGGCACCTTTCTGGCCCATCTTGCGGTCGGGTCTGCCCCGATGCAGCCCTGGCGTAT +CGCCTTCCACGATAAAGGCCGTGATGCCACCGGCACCTTTATTATCGGGGTCGGTCCGTG +CCATTACCGTAAACAGGCCGGCTTCAGGGCCGTTGGTGATATAGCGTTTGGTGCCGTTCA +GCACATAATGATCGCCATCCCGGGTAGCGGTGGTGCGCAGGGATCCGGCGTCCGAGCCCA +CATCCGGCTCGGTCAGGCAGAAAGAGCTGATCAGCTCGCCGGTGGCCAAACGGGGAATGT +ACCTGCGCTTCTGCTCGTCCGTACCATCGATCAGAATGCCCTGGGAACCAATCCCGTTAT +TGGTGCCGAAAATGGAACGAAAGGCCGGGGACGTCCTGCCGATCTCCATGGCGACCAGCG +CCTCTTCTTCCATAGTCAGACCCAGGCCACCGTATTCCTCGGGGATCGACAGGCCGAACA +GGCCCATGTCCTTCATCTCCTGCAGAATGTCCTCGGGAATGGCATCATCCTGCGCTACCT +GCTCCTCGCTGGGAACCAGCCGTTCACGCACAAATCGGCTGATGGTATCGATAAGCTGGT +TGAGGGTCTCTTTATCACGAATCACGGTTGATCACCTGCTGCAATGAATTCTAAAGACGC +CGCAGCGCGGCTTCGACGTTCTGTACCAGTTGCTTGAGTTTCGGGCCCAATTCCGCATCC +AGCCGCTCTTCAGGCAACACGAAGGACGGGCCGCCACAATTGAAGGCGTAGATATTGCCG +TTATCCGACTGCACCAGTGGCACGCCCACGGCGTTGACATCCCGTTCCCAGGTGCCGACG +GTCTTGACGAACCCCGTCTCCTGGTACTGGCGTATGGCCCGTTCGATTCCTCGCCGGACC +GGTGGCCAGCGATTACCCGCGTGGCGCTTGATATGATCCATCAGATAGTTGCGCTCACCT +TCCGGCAGGGCTGCCAGAAACGCCCGGCCGACGGCGGTCGTCGCTATGGGAATGCGCGAG +CCCACGTCCAGCCGCAACGTCACCGCCCCGCTGCCGCGGCAATTCTCCAGATAGATCATG +CTGAGCCTTTCCCGGGCCCCCAGGGACACGGAGGCATCGACGTCGTCCGCCAACTCCTGC +ATCAGAGGCCGGGCGATCTGGCGGATGCCAAACGTCGACAGCGTCGCATAACCCAGGGCC +AGTGCGCCGGTGCCCAGGGAATAGCGTTCCAGGCGCTGGGAATAGGACAGGTAACCCAGC +TTGGTCAGGGTGTAGGTCAGCCGGGTAACGGTCGGCTTGGGCAAGCCTGTACGCCGGGCG +ATCTCCTGATTACCGAGAAAACCGTCGCCGGGGCGAAACGCCCTTAACACCTCCAGGCCC +CGAGCCAGCGCGGTTACGAACTTGCGGTCCTTGGCGCCATTCTGCGGGTCCAGCGCCGCG +TCCAGGCCCAGGTCACTGTTGTCATCCACCACGATTTGCCCCCACACCCCGCGTGCTTGC +CAATACCAACCCAAGCGGCGCCCATGGCGCCTTTCAGTTGCATTGGACTCTGTCACGCCC +TATGCTATCTGTCAAGTTTGCGAAACGTTGTTTCGCTTTGCGGAATACACGTGAGTCCAT +GGCCTGAGCCGCACCGTTCACGAGGACCCCACCATGCTCGACGCCTATCTTTACGACGGT +TTACGCTCCCCCTTCGGCCGACACGCCGGCGCACTGTCGCCCCTGCGTCCTGACGACCTG +CTGGCTACCGTCATTCAGGCTCTGATCGCACGCAGCGGCTTTGCGAAGGAACAGATCGAG +GACATCGTGATCGGCTGTACCAACCAGGCGGGTGAGGACGCGCGCAATGTCGCCCGCCAT +GCCGGGTTGCTGGCGGGCCTGCCTGTGGAAACGGCGGCCCTGACCGTGAACCGCCTGTGC +GGCAGTGGTCTTGCCGCCGTCGCGGACGCCGCCCGCATGATCACCTGCGGCGAAGGCGAG +CTGATTCTGGCCGGGGGCGTGGAGAGCATGAGTCGTGCCCCCTTCGTCATGGCCAAGGCG +GAGAGCGCCTATAGCCGGCAGTTGCGCACCTTCGACAGCACCATCGGCGCGCGCTTTCCC +AATCCGAAAGTGCTGGCTGAATTCGGTTCCGATACCATGCCCGAAACCGCCGACAATGTG +GCCCGGGACCTCGGCATCAGCCGCGAGGCTGCCGACGCCTACGCCCTGCAGTCCCAGCAG +CGCTATGAAGCCGCCCGTCAGGATGGTTTCTACCGAGAAGAAGTGCTGCCGGTGGAGGTT +CCCCAGGGTCGAAAGCAGCCGCCACGCTGGGTAAGCGCCGACGAACATCCGCGACCGGAT +ACCGACACCGCAAAACTGGCTCGCCTTTCCCCCCTGTTTGAAGAGGGCGTGGTCACCGCC +GGCAACGCTTCCGGCATCAATGATGGCGCCGCGGCACTGCTGATTGGCTCACGGTCTGTC +GGCCAGAGGCTGGACATCAAGCCCCGCGCTCGCATCCTGTCTGCCGCCGCGGCCGGTGTC +CCGCCACGTGTTATGGGCCTGGGCCCGGTCCCGGCTGCACAAAAGGCCCTGGCCCGCGCC +GGTCTCAGTCTGAACGACATGGACATAATTGAAATCAATGAAGCCTTCGCCGCCCAGGTG +TTAGGCTGCTGCCAACAACTGGGTATCGCGGGCGACGACCCGCGCCTGAATCCCAATGGC +GGCGCCATCGCCGTCGGCCACCCCCTGGGCGCCTCCGGCGCAAGGCTGACACTGACCGCC +ATGCGCCAACTGGAACGCATCAACGGACGCTACGCGCTGGTCAGCCTCTGTATTGGCGTT +GGACAGGGCGTCGCGGCCGTGATTGAGAGAATGTAAGGCGTGAGGAAGTCAGGAGTCAGG +CACAGAATTCACTACGCTTCTCCTTTCGCCTCACCCCCTAACCCCTAACCGTTGCAAAGG +AGTCACCATGAAACCCTTCACCTGGGAAGACCCCCTACTCCTGGACCTGGCCCTGGACAG +CGACGAACGCATGGTCCGCGACTCGGCCCACGACTACTGCCAGAACAAGCTCATGCCCCG +CGTGCTGGAAGCCAACCGCCATGAGGTGTTTCACCGTGAGATCATGAACGAGATGGGCGA +ACTGGGTTTTCTAGGGCCGACCATTCCCGAACAGTATGGCGGGGCCGGCGTTAATCATGT +CTGCTACGGCCTGATTGCACGGGAAGTGGAACGCGTCGATTCCAGCTACCGCTCCGCCAT +GAGCGTGCAGTCATCCCTGGTGATGCACCCGATCTACAGCTTCGGCTCCGAAACGGTCAA +GCAGAAATACCTGCCCAAACTGGCCAGCGGCGAGTGGATCGGCTGCTTTGGTCTGACCGA +GCCCGATCACGGCTCCGACCCGGGCAGCATGATCACCCGCGCCAAGAAGGTGGATGGCGG +CTACCGGCTCTCCGGCGCCAAGACCTGGATTACCAACAGTCCGTTGGCTGATGTTCTGGT +GATCTGGGCCAAGCTGGACGACACCATTACCGGTTTTGTCCTTGAGCGTGGCATGGAGGG +CCTGGAAACACCCAAGATCGAAGGCAAGTTCTCGCTGCGTGCCTCTGTCACCGGCCAGAT +CATGATGGATGACGTATTCGTGCCGGAGGAAAACCGCCTGGACGTCACCGGCCTGAAAGG +TCCGTTCAGCTGCCTCAACAAGGCCCGCTACGGCATTTCCTGGGGCTCGATGGGGGCGGC +CGAATTCTGCTGGCATGCCGCACGCCAGTACACCCTGGACCGCAAACAGTTCAACCGCCC +CCTGGCCGCCAACCAGCTGATCCAGAAAAAGCTCGCCGACATGCAGACCGAAATCACCCT +GGGCCTGCACGGCGCCCTGCGTCTGGGCCGGTTGATGGACAGCGGCGACTGGGCGCCGGA +AATGGTCTCACTGCTCAAGCGCAACAACTGCGGCAAGGCCCTCGACATCGCCCGCACCGC +CCGTGACATGCACGGTGGCAACGGGATTGCCGATGAGTACCACGTGATCCGGCATGTGAT +GAATCTGGAGGCGGTGAATACCTATGAAGGGACGCACGATGTGCATGCGTTGATTCTGGG +TCGGGCGCAGACGGGATTGCAGGCTTTTACTGGGTAAGGAGGTTAGGGGTAAGGAGGTTA +GGAGTGAGGCAGAGGGTACAGTCCAATCACCTCCTAACCCCTAACTTCCTAACTTCTCAC +CTCAAATCTCACATCGAAATACTCCCTTTACGAAACTCACTCACCGCCAACCGCAGATTG +ATGCAAACCGCTGTCCCGGCATCGGACAGCTTGATCGCTTCCGCCAGGGCCGCGTCAATC +TGATGGTTGTGCTCCACCAGGATACCGTGGCCACCGTAGCCCTCGGCGACCAGATGGTAG +TCCGTGTCCCGCAGATCGGTGCCAACCGGGTCGCCCAGCATCTCCACCTGCTCCCGGGCA +ATCTGACGCCAGCTGGCGTCGCAGCCGATGATGGCGATCGGGGCCAGGCCCAGACGGCGG +TAGGTGTCGAATTCGGCCAGGCTGTAGGCGCTGGAGCCGTCGCCGTAGATCAGCCAGATG +CGGCTGCCGGGTCTGGCTGCAGCCGCACCCAGCGTAAAGCCGCCGCCGACGCCCAGCGTG +CCGTAGACGCCGGGGTCGAGCCAGGCCAGGGGGCCGCGGGGTTTTAAGGTGTAAGCCCCC +GTAGCGACGAAATCGCCACCGTCCACGACCAGCACGTCCTGCTCCCCAAGATGCCGATCC +AGTGTGCGGAAGAAATGCAGGGGATTGACCTTGTCGGCCGGTGCCGCGGCCTGTTGGTCG +ATTTCCTCCTCGCGCGCGTTTTCGCGCTTACCCAGTTCACCCAGCCACGCCCGCCATTGC +GCCGCGCGGCTCGACATCCGATCTGCCAGCGCCTGCAGAAAATCGCCGGGATGCGCCAGC +ACCGGGACGGTGGGTTTGCGGTTGAGCAGCAGGTCATGGAGAGAGAGATTGACGGACACA +AGGGTCGCGCCTTTGGCAAAACCCCGACCGTACTTGAGCCGGAAATCCAGGGGAAAACCG +CAGACAATGACCAGATCCGCTTCGGCCAGTGCGCGGCCGCGGTGGTGGCGGAACAGCAGG +TCATGTTCGGCACCCAGCAGCCCCCGTGACATACCACCGGTCCAGACCGGTAGCCCCAGC +CGTTCCACCGCATCAGCGATGTCCTTCGCCTGCTCAGCGGTGCGATTCACCAATACCTGA +GAACCCAATACCAGGGCCGGCCGCTGTGCCGAGGCCAGCCGCTTGACCACTTCGCCCAGC +TGCCGGGCGGCATGCCACTCGGCCGCCGGCTCAGTCAATGTTTTCAGATTGGGGTGAACC +GACAGGGCTGGCTGACGTTCCTGGCGCAATAGATAAAGATCCAGCCCCCCACGCAGCAGA +CGGCCCACCGGCCCCTTCATTTTGTCCAGACCGGCCTGATCGGCATACAGGGAGTGCACC +AGATCCCGCGGATACAGCAGATCAATCGGCGCCTCCACAAACACCGGGCCCGGCACCCCT +TGGGCGGCCACTTCGAGGGCATAGCGCATGGCCTCATCCAGCTGAGCCAGGGTCCCCACC +GATGTCTGCCATTTGACCAGCGACTTCATCAGCGCCAGTTGATCGATGTCCTGCAACGAG +CCCCGGTTCTTCAACACCGTTGGCGTGGCGCCGCCAATCACCACCACAGGCGACTGGGCC +ATCTGGGCATTCTTGAGCGCGGTGATGGTATTGGTGACCCCCGGACCCGCGGTGACTGCC +GCCACACCCGGGCGCCCGGTCAGCCGCGCCATGGCATCAGCGGCAAACACCGCCGAGGCT +TCGTGACGCACATCCACCACCTCGATGCCCAGCGCCTTGGCCTGCACCAGAATCGGCGAA +ATGTGACCGCCACAAAGGGCAAATAGATGGGAAATGCCCGCCGCTTGCAAATGCTCGGCA +ACGATTAATCCGCCATGCTTGCGTTCAACCATCGCCAAACCTCCTCACCCTCAATCCACA +ATGGCCCCATTGATGATCGACCTCAACTCCTGGCGGATCGGATAGGTCGACGAGGGAATC +AACTGGGTCATGAACACCATGATCAGGTCTTCCACCGGATCCACCAGGAAATTGGTACTG +GCCAACCCACCCCAGCCGTATTCGCCCACCGAACCGTTGGTCTGGGATTTCGCCACGTCC +ACCTTGACCGAAAAGCCCAGGCCGAAACCGGAACCCTCGTAGGGCGTCTCACTGAACGGC +CCGATCGACAGTCCCGGCAGATCCTGGTTGCCGGGGAGATGGTTCATGGTCATGAAATCC +AGGGTCTTGCGGCCGATAATGCGGGCGCCGTTCAACTGGCCCCCGTTATTCAGCGCCTGG +GCGAAGCGGTAATAGTCATCCACGGTGGACACCAGCCCACCGCCGCCGGAAAGAAATTTG +ATCGGGCGAGTGAAATGGGAGTCCGCGGGATCATCCTGCAGAGCATAGCCCTGTTCAGGA +TCAAACTGATAGCAGGCGGCAAAGCGCTCACGCTTGCTCTCCGGCACCATAAAGCCGGTG +TCCGCCATTTCCAAAGGATCGAAAATATGTTCCCGGAAGTACTCATCCAGCGGTCGGTCC +GACAGCAGCTGCACCAGATAACCCAGCACATCGGTGGCCACCGAGTAATTCCAGGCACTG +CCGGGTGAGAATTCCAGCGGCAACTGGGACAGGCGCTCGACCAGCAGGTCCAGCGTCATC +TCCTTGCCCCCATCCAGCTTGAGCTCACGGTAGGCCGCATCCAGGTTGGTGCGGCACATG +AAGCCGTAAGTCAGCCCAGACATATGGGTGAACAGGTCACGGATGGTCATGGCCCGCAGG +GTTGGTGTGGTCAGAAACTGCGGATAGACACCGCTCTGATACACCCGCAGGTTTTTCCAG +GCCGGAATGTATTTGTGCACCGGGTCGTTGAGCAGAAAACGCCCCTGCTCGAACAACTGC +ATCATGGCGATGGAGGTGACCGGCTTGGTCATGGAATAGATGCGGAAAATGGTATCCCGC +GCGACGGGCTTGTTCCGTTCCACGTCCATCAGGCCCTGGGCCTTCACGTAAGCCACCTGG +CCCCGCCGGGCCACCAGCGTCAACGTCCCCGGCAGCCGTTTGGGCTTGAGATATTTGCGG +TCGATATGCCGCTCAATGTTGTCCAGCCGCTCCAGAGACAAACCGGCAATCACGTCAGAA +CGCCCCATTACCTTCCCCCTATCCAACAAACAGGCATGCCAGCTTTGCGTATTGCTCAAG +GGTCAGGGGGGTGGCCGATTCGGGACCGTCCGAGGCCAGGACGGCCGAGGTCGAACTTAC +AGGGACGTATTTACAGCGTGTCCCGAATCGGCCACCCCCCTGACTCGAGTTACGATACAG +GCACTCTCATGCCTCATCATCACTGAATCGATGATCCTTGAAATCATCCCGCAACTTGGT +TTTCTGCAGTTTCCCTGTCGCACTGTGCGGCAGCTCATCCACAAAAACCACATCATTGGG +CAGCCACCACTTCGGCACCCGCTCGCTCAGGTACTCGACAATTGCTTCCGCCGTAACGGC +CGAATCCGGATTGCGCACCACCACCAGCAAGGGACGCTCCGCCCATTTACTGTGTCGCAC +CCCGATCACGGCCGCTTCCGCCACTTCCGGATGCCCGACCGCCTCATTTTCCAGATCGAT +GGAACTGATCCATTCACCCCCGGATTTGATCACATCCTTGACCCGGTCCACGATCTCCAT +GTAACCATCTGGATCAATGGTGGCCACATCGCCCGTGCGGAACCAGCCCTCATCGTCGAA +CGCCTTGGCATTGGCCTCGTCATTGCGATAATAGCCGCTGGTAATCGCGGGTCCGCGCAC +CAGCAGTTCGCCACGGGCCTTGCCATCACGGGGAAGCGCCTTGCCGTCGGCATCGACGAT +TTTCATTTCCACCCCGAACAGTGCCCGGCCCTGCTTGCCACGCACCGTCAGCTGCTCTTC +CGCCGACATCCGCTCTCGCATGAAGGGCGTCGGCTGGGAGACGGAGCCGACTGGGCTCAT +CTCCGTCATTCCCCAGCCCTGAATCGCCTCAACCCCATAGCGCTCCTGAAACTCGTTGAT +CATCGCCCGAGGCGCCGCCGAACCACCAATCTGCACGTGCTCCAGCTTGGAGAACCGCTC +ACCTGACTCATTCATATGCCGCAGCAGCCCCAACCAGACGGTCGGCACACCCCAGGCGTC +GGTCACCCCCTCGCTGTTGATCAACTCCGCCAAAGAAGCACCGTCGAGCCTCGGCCCCGG +AAACACCAGCTTGGAGCCGGTGATCAACGCAAAGTAAGGCATGCCCCAGGCGTTGACGTG +AAACATCGGCACGACCGGCAACAGTGAACTCTCCTCGCCGAAATCCACCCCCGGGAAAGC +CATCACGGAGAAGGCGTGTAACAGTGTGGAGCGGTGGGAATACAGCACGCCCTTGGGATT +GCCCGTTGTGCCCGAGGTGTAGCACAGGGACGACGCCTGATTCTCATCGAATTCCGGCCA +GTCATAGCGGGTCGGCTGGTCCTTGATCAGGGTTTCATAGCAGTGGGCATTTGGCAGCCC +GGTTTCCGGCATATGGGCCTCATCAGTCATGATGATGTAGCCCTTGATGCTGTTGAGCTG +GGGATGCAGTTTTTCCAGCAGGGGGACAAACGTCAGATCCACGAACAGGTACTGATCCTG +CGCGTGATCGACAATGAACTGGAACTGCTCCGCCGGCAAACGCGGGTTGATGGTATGACA +GATGGCACCCATACCGGAGACCGCATAATAGATTTCCAGGTGACGGTAATTGTTCCAGGC +CACCGTGGCAACACGGTCATCCTGAGTGACGCCAAGACCTGCCAGGGCATGGGCCAGCTG +GCATGACCGGTCCCGCAGTTCCGGATACCGGTAACGGTGGATGGGGCCTTCCGTCGTGCG +GGAAACGATTTCCTGTTCCGGATAATTGACTGCGGCGTACTCCAGGATCTGGGAGATCAG +GAGTTGCCGGTTCATCATCAGTCCATGCATCATCTACCCCCTTTATTGGCAGGCCATCCT +CAGCGCGCGCTGACGATCGCCATGGTGATTCTTGAAATACAGATCAGTCGATCCTGCTCG +TCTTGGATACGGATTTCCCATACCTGGGTAGCGCCTCCGATATGGAACGGTCGGGCCGTG +CCATAGACCCAGCCCGAACGGGCTGAACGTATATGATTGGCATTGATTTCCAGGCCCACG +CCCGCCTTGTCGGGATCGGCAATGCACAGGTTGGCACCGACGCTGCCCAGGGTTTCGGCC +AGCGCCACCGAAGCCCCGCCATGCAGCAGACCAAAAGGCTGATGCGTGCGCTTGTCCACC +GGCATCCGCGCTCGCAGGAAATCCGGCCCCAACTCCAGATACTCGATGGCGAGGTGCTCG +ACCATCGTGTTCTTCGCATGCGCATTCAATGCTTCAACCGTTGCCGGGCGCTTCCAGATG +GGCGCCTTCTGATCGGGATTCTGGGTCATTCACTATTCCTCGTTACTACCTACCGTTACT +ACCTACCGTTACTACCCACCGTTACTACCACCGGCCCTCGTTAAAACCACCGGCTCAGTG +CGTACCGCCTTAACCCTGCAGCTTATCACGGAGCATCTGTCCAATCTGGGGCGCCTCGGC +GAAGGGATCACCCGGATTGCGGCCGGCCACGATATCCTCCATACGATGTTTTACGTTACC +CAGGGCTTCCGGATGGGAGTAGATATAGAAGCGGTCCTCCCCAATCGCCTCGAAAGTCCG +TTGCGCCACTTCCGCTGCACTCACCTTGCCCGAGGACACCGCCTTCTGGGACATGGCCTG +GGCAATCATCTGGCTGGCCGTGGGGCCACTGTCATTCTGCAGCTCGGCGGGGCGGTTGCG +GTGGGAATCGCTGATACCGGTGGGCACGAAATAGGGGCACAGTACCGAGGCACTGATGGG +GGCGTTGACCAGCTTTAAATCCTGATAGAGGGTTTCCGACAGGGCCACCACCGCGTGTTT +GGAAACGTTGTACACAGCCATGGTGGGCGGACACAGCAAGCCGGCCATGGAGGCGGTATT +GACGATATGCCCCTCATAATCCGTCTCGCGCCTGGCGGCCTCCAGCATCAGCGGCGTGAA +GATCCGTACCCCATGAATCACGCCCCACAGGTTGACGCCCAGGACCCACTCCCAGTCCTG +CTCGGTGTTTTCCCAAACCAGGCCGCCGGAACCGACACCGGCATTGTTGAACACCAGATG +CACCGTGCCAAAGCGCTCCATGGTTTTATCCGCAAGCGCCTGAACCTGCTCGCTCTTGCG +CACATCGCACAGCATTGCCAGCACTTCGGCACCGGCTGCTTCCATCTCCGCCCGGGCCTG +CTCCAGCGGCTCTGCCTGGACATCCGCCAGCACCAGTTTCATTCCCCGTGCCGCGCCCAC +GCGGGCAAACTCCAGGCCAAAGCCGCTGGCCGCGCCGGTAATGACCGCAACCCGGTTCTG +CATCTGTTTCATAGGGCCCCACCTCTTGTCATTTGTTCTGTAAGCGGGTCCTTTCACCCA +CATTGCCTGCATGGCAATCTGGCATGGACCTGTCTTGAGGTCAACTTTTTGGAATGCTAT +TTTGCTAAGCGAAATTCAGCAACATCAATCCACCAAAGGAAAGCTGTCCATGGGTGAACT +CGTCGACGTCTACCGCAACAGTGTCCAGACCTGGGAATGCGACCAGATGGGCCATATGAA +CGTTCAGTTCTACCTGGACAAGGCGGACGCGGGTCTGCTGGCCCTGACCCGGATGCTGGG +CCTGAACCGGCGCTTTCTGAATGAAAGGCAGGCACGGGTGCGGGTGCTGGAAAACCATGT +CCGCTTCCTCAGGGAGCAGCATGCCGGCAGCCCGCTGACGCTGCGCGCCGGGCTGATCGA +TATTCGCCCTGACCAGCTGAAACTCTATTTCGAACTCACCAATCCGATCCAGCAAGCCGT +CGCAGCCAGTTTCATCACCCAGGCGGTGCTGGAGTCCACCGCCGGCAAGGATCATCTCAC +CTTACCGCAAAGCGCACTTGAAAAGGCGCAACAATACCAGATCGACTGGCCGCGCCCTGA +AGGCCCCATGGGGCTCGAATCCACCCCACCCCGCACGCCACCCACCCTTCAGGAAGCGGA +TGACCTGGGCATGATGCCCACCTATCTGGGCGCGGTTTCAGCAGGCATGTGCGATGCCGA +TGGGCACCTTGCCATCCGCAGCTATATGGGCATCGTCTCCGACGCCGTCCCCCACCTGCT +CTCGCGCATCCGCCATGATACCCGTGAAGTTCCCCGTCCCGGCGGTGCTGCGCTGGAATA +CCGCTGGATCTACCACCAGCGCCCCGAGCAGGGTGACCTGGTCACCCTGCGCAGTGCCAT +CACTCATCTGGGCAACAAGGCCTACCGCCTGGGACACTGGCTGTTCGACGCCGAAACCGG +TCACTGTCTGGCAACGACCGAAGCGGTGGCAGTGATGATGGATCTGGATGAAAGAAAGGC +GCTGGTGATTCCGCAAACGGCCCGGGCGTCGCTGGAGGAAATGCTGGTAAAAGGCTTTTC +CATCTGAACACGTAATGGCGTTTGCATCGTCAGCCTCCGGGCGCGCCGATGATTCTGGCG +CTCCCGGTTACAAGCTCATCGTATAGATCAAGTCGCCCTTCTGCGCTGCCTGAACTTTGA +TTCCGGCCCTTGGTCTTTGATACAGGTCGTCGGTCATGCCTGCATTGCCGGTATGAGCAC +CTCCCGGACAGGTCGGGTGGGCGCGATCCTCCCGATGCCTGTATCTGCGGAGGTGAGCAT +GAAAGCTGGCATGGTGGCGCCATTGGATGCGACGCAGCAATCCAGACTGCGCGTTCTTCT +CGCACTGTTCGGCCTGGTCTGGCTGATCAACGCAGGTTTCCAGGCCGTGGCGTGGCTGGC +CGCGCCCAATGCCTCAACCCATTTCATCCACGCGCTCGCCAAGTCCACGACGGTGGTCCC +CCGGTGGGTTCAGCCCTTGCTGATGACAGGGCTGCACAGTGCGCAGTCGCTTGGTCTTGG +AATCGTCGCCGCCATCATGGTTCTTTTGGCGATCCTGCTCGGCCTGGCACTGCTGACTCA +AAGAAAAGTCGCGTTCGCAGCGCGGGTGGGGATCATTTACAGCATCATCTGCTGGATCTT +TCTGGACGGCTTCGGTTTTCCCTACGCCAACGGTCAGACCGATCCCGGCGTTTTTGTCGC +CTATGCCATCGCCTTCCTGTTTGTATTATCCGTGGCACCGGTATTTGACAGAGAAGGCAC +CAAGGCCCCTGAAATCGACGAGCGTCTGTGGCATTGGGCCCGTATCGCGTTTGGTCTGCT +CTGGTTGTTCGATGCCGTATTGAAATGGATCCCGCCGTTCCTGCTCCATTTCAGCAGCCA +GATTACGTCAGTGATTCCGGGCCAGCCGCACTGGATCGCGGCCTGGCTTTCGTTTGTGGC +CGAACTTGTCCATGCGATCGGGCCGATTCCGGTTGCGGTGGTGGTGGCGCTGGCCGAGAC +CGCAATCGCGATCGGTTTGCTGAGCGGCCGATGGATGCGCCTTGTGATTCCCGTCGGCAT +GCTTTATTCAGTGGCGGTATGGACCACCGCCGAGGCCTTCGGCGGACCTTACTCGACGGC +GGGGACTGGCGTGCGTGGCAACGTTCTCGGTAATGTTCTGATCTACCTGATACCCTTTCT +GTTCCTGTGGGTAGGCAATAGCTCCCAACGAAGCGCAGCGGAAACCACCGGCCGCACCCT +GACAGACTGACACTGTCTATTGCTGGCCGGACACCAATAAAGTGCAGTGACCGATCAATT +CCAGCCCATGGCTGTCTGCCATGACTTGATCCAGTCGGAATAACAATGCCCCCCATAGAT +GACGCCCCCGCCCAAGCGCTGCTATCATCATAAATAGCAACTATATCGGGTCATAAATAA +TATTTATTTAGGCTAGCCGTTCATCTAAGCATCCCCGATGGGCTTTCCATTGTTTGTCGC +CTGTCCGGGTTGGGTACTGTTATGGTCAAGAAAGCACCTGAAGTCGACGTTCCATCCGCG +AAGGACCGTAATTTTGTCACCGCCCTGGCACGCGGCCTGGAGTTGCTGCGTGCGTTCGGG +CCTGAGGATGATTACCTGGGCAACGCTGAACTGGCTGAGCGCACGGGGATTCCCCGTCCT +ACGGTTTCCCGTCTGACCTATACCCTGATAGAACTGGGTTATCTGCGTTATTGCGAGCGT +CTGGAGAAATATCGGCTGGGCGCGGGTGTGCTGGCACTGGGGTATCGCTACCTTTCGCGG +ATGGGACTTCGCGAACTGGCCCGTGGACCGATGCAGGCGCTGGCGGATCGTACGGACTGT +CTGGTGGCCCTGGGAACGGCCGATCGCCTGGACATGACATACGTGGAAACCTGTCAGGGC +GCCGGACCGCTGGTGTTGCGTCTGGAGGTGGGCTCACGCATTCCCATGGCAACCAGTGCC +ATGGGCCGCGCCTACCTGGCTGCACTGCCGGATGCACGCCGGAACGAGTACCGGGAGAAA +ATCCGCGAGGTCTATACCGATGATTATGAAGCCATTTGGCAGGGGGTCGAGCAGGGGGTC +GAGCAGTATCAGAAGCTCGGGTTCTGTACCGCGCTCAGTGATTGGAACCCACATATTGCC +GGTGTGGGCGTGCCTTTGGTGCTGGATGGCGGCAGCCAGATCATGGCCTTCAATTGTGGC +GGCGCGGCGATGCGCCTGAGTCGCTCAGTGCTGGAGAAGAAGCTGGGTCCGCAACTGGTG +GAAGTGGTGGCGGAAGTACAACGCCAGATGCATGGACGGCGCCTGGAGGCGGTGTCCTGA +TATAGCAGGAACGATGGAAAAAACGGCGGGCCAAGGCCCGCCGTTTTCATTGGTGGTTTA +GAAATCCCAGGTCATGCCCAGGGACAGGGATTGCGGATTATCGCCAGGATTCAGCGGATA +GACGCTCTGTCCATGGTTGGGTCCGCCCAGGCCGTATAGACCTTCGGCATTGTTCCGGGT +GTAGGCATAAATCAGGTAGGCCGACACCTGCTTGTCGAAGTGATGCTTGATACCCGCCAC +GTATTCGTTGGCACCGGACTGCTTGACTTCGCTGCCGCTACACACGGTCTTGGTGGGATC +ACAGATATTGGCCAGGGGCGAGCTGAGGTTGCCGGCATGAATCCAGCTGACCATCAGATC +GTTGTTACCCGGCACCGCCTGACTGAAGCCCAGGCTGTAGGCATCGCGCGAATACAGACC +GGTTGAACGCTCCCACACGGCAATCGCCGTGGTTCCCGTCGGCTCATAATCCCACTTACC +TGCCAATTCCAGAGCTTTCAGCTGCAGCTTGCTGCTGGGGGGCGTCTGGTTCGGCTGGGT +GGCGTCGGCGAAAGTGAATCCTCCAAGCGGCACTCCCGAGGTGTTACCCTGAACATTGAT +CTGACTGTAGTTGATCAACCCGTTGACCTTGCTCTGCAGACTGGAGAATTCGTTCTCATA +CTGAACACCAAACGACCAGGCATTGTTATGCTGGTTGGCGTTGGGGTAGCCGTAACAGTT +GGGGTAATGGTTCGTGTTCGGGGTACCCGCACAATAGCCGGACCCATAGGTATTATTGGT +ACCAAAGTCCCCACCGTTGGCATCCTCAGTGTATGAAAGGACCGCATCGAACCCCTTCAC +CTTGGGACTGGCATAGGAAATGGCGTAGTTGGCCTGCCCGGCAAAGCTGATTGCGGGTAG +ACCGTTAGGGCCACCATTGGGGAAATTTGCGGTATCGATGATGGAGTTAAAGTCACCCAC +GGTGTCCTTGAACATGTTGAACGGCCGCGAGAGGTATGCACCCTGGTTCTCGAATGAGGG +CTGGAACAGCAGCGTGCCCAGAGGATTGGCAATACCGAAATAGGTATCGTGCAGGCCCAC +GTTCTTGCTCACATGGTTGCCCTTGCCATAACTGTTATTGACGCCGGTCTCACTGCCGGG +CAACGCCAGTTCCACCTGGAAAATGGCCATGAAGGCGCCCGGCACCAGATGCTGGCCGGT +AAAACCGATTGTGGAATTGTTGTTGGAGAGATTAGTGGCCCGTGCCGGAGTGCCGGCGGC +TTCGCCGTAGGCTTCGGTATTGGGCACGGCACGACCGCCGGTATTGGAGAAATCCACCGA +ACCGTGCAGGTGTGCATGCAGTTTGATATTCATATCAGGCATGGAGCTGGAGCTGCCGCT +GTCCGCTACCGCCACCGCCGGTACAGCCATCAGCCCTGCCAACAGGGTTGCCAGGTATCG +TCTCTGCATTTTTGACATCCTCTTTTGTGTTTATTTTCGAGTCTGCTTCCATGCAGCTCC +CATCCCCCACGGATGAGAGTCCACTCGCCGTTCCTGGCCAGGTTCCAGCTGCGCAAGCCG +CTCAACCGGCGGCGCGCTCGCGTTCCTCCTCCTGCAACACCCGCCACTGGATCTTGCCGG +TCGCCGACCGGGGCAGGCTGTCACGAAATTCGATGATGCGGGGTACCTTGTAGGCGGCCA +TGTTGTCCTGGCACCACTGCTGGATCGCCGCTTCGGTCACATCCGCGGGAGCATCGTCCG +TCAGGACGACCACCGCCTTGACCGTCTCGCCCCGACGTTCATCTGGCGCGGAGATCACAC +AGGATTCGCGAATCGCCGGGTGCTGGTACATCATGGATTCCACTTCAGCGGGCCAGACCT +TATAGCCGGAGGCGTTGATCATGCGCTTGACCCGGTCCACCAGGTAGAAATAACCCTGCT +CATCGTAATAACCGAGATCGCCGGTCCGGAAGAAGGATTTGCCATCGACCTCAGTAAACG +CCTCGGCCGTGGCCTGGGGCCGGTTCCAGTAGCCTTTGAACACCTGCGGACCGTTAATCA +CGATCTCGCCCACCTCACCGACATCCTGCTGCTGGCCGCTGGCCACATCGAGCACGCGGC +AGTCCACATCGAAGACCGGGATACCCAGGCACTGGGCCCTGGGATTGTCGACAGGATTGA +CGTGAGTGGCGGCGATGGTTTCCGATAATCCATAGCCCTCGATGTAGGTCAGACCGGTCA +TCTCGTGGAGGCGCTCAGCGATCGCCTTGGGCATGGCTGCTCCCCCACCGCCAATGGCAC +GCAGGCTGGAAAGATCGTAGTTGCGGGCTTCAGGGTCGGACAGGAAATCGATCACCATGG +TAACGATATTGCGCCATTCGGTAACGCCGTAGCGCTCGATCAGCGCACCGGCGACCCGCC +GGTCCCAGCGGGTCATGATAACCATGGTCGCGCCCACAAAGATGGGGGCGTTCATCGACA +CCTGCATGCCGGTGACATGGAACAGCGGCAGACTGACCAGGGTCACCGAATCGGCTGTCG +CGGGGTTCCAGGCAACGGCGCCCACGAGGGTCGCCATGACGCTTTGATGGGTATGGATGC +ACCCCTTGGGCGCGCCGGTGGTGCCGGAGGTATAGGGGAAGACGCAATGGTCCTCCGGCC +CCACCAGGGCCTTCGGGGCCGCAGGTTCCCCGGCCAGCGCCTGCCGCCAGGGAACAACAC +CGGACTGGTTGATCGCCCGGGCCGGTTCACTGACCGCATCCGGCAAGGGCAAGTCCGTTT +GATCGGGAAGGTATTCACTGTAGGCCGCCACCACTGCCCGGCTCAGACCTTCCTCACCCA +GCAGTGGCGAAACCTGGTCGAACAGTTCCTGGGCACAGAGAATGACACGCGCCTGCGTGT +CCGAGGCGTAATGCTCCAGCTCGCCACGACGGTTCATGGGGTTGACCGGCACGACCACCG +CATTGGCCCTCAGAATCGCGTAGTAGGCAATCATCCACTGCGGGGAGTTCTGCATATACA +ACAGCACCCGGTCGCCCTTCTCCACGCCCATTTCGCGGGCCAGAAAACCGGCCAGGATCT +CGACCTGACGCTGGAATTCGCCGTAGCTGATACGGCGGTCGTAAAAGATGATGGCATCCT +TGTGGGGATAGCGGTGAGCGGTCACTTCCAGGTTGTAGCAGATACTGGTTTCCGGCAGCG +TCAAGTGCAGCGGAGCGAACTCGGGCCATACCTGGTGATGGCGATCAAACATCGAATTTT +TTCCTCCTCTGGGCCCAATAAGCGGGCACTTGTTCTTGTATCGAGGAAAGACTCTTGTTC +GTAAAAGTCTCGGGACGGGAAAAGTTCCGAGGGCGTTGTAAGGAAACGGTTTTTCCTTGT +GGCCCGGCCAACTCCCTGTCTATATTGGCCAATCTCGCATGGAATACTCAGGGCGTCAAT +ATTTTTGGAATTTAATTCCGCAGAGCAGAATAAGGGAAAAAAGAATAGAAACCAGATCAC +GGAAACGGCATTCCAGGCTCCGCCCTGAAGTGCAAGCTGGGTACGAAGACATTGAGTTGA +AGATGACGCTACAGGACCCCTGTAGACAAGGGGCCTGCTTAAGCAGGGTCAATCTGCCCT +GTCCCCGGACTCCTGCGGGGACCAGGGCAGAAGTCCCTATTGGGACTGATGCTGGGATTG +TTTGCGGATGGTGCTGCGGGCAAGGATATTGATGAGCAGGGCAAAGCCGGTGGTCAGGAG +CGCCGCCACATAGGCCAACTGGTGGGCCGCGCTGGAGGGTTCGCTGATAAAACTCCAGAT +GACGTAGGTCAGGTAGCCCACCGGATGGTGGAAGAACTCGCCCCCCCACATGTAGTTGGA +CCATCCCAGCGTGTAGATCAGCGGCGCCGTTTCGCCCATCGAGATGGCGGCGGCGAACAG +CAGACCGTTGAGAATCTGGGGAACACAGCTGGCCAGCACCACCCGGAAAATCACCACCCG +GTCACCGGCACCCAGCGCATAGGCTGCCTCCCGGACGCTGTTGGGGACCTGCAGCAGGGC +CAGCTCGCTGGTACGCGCAATGTAAGGCATGATCATGAACGATAGCGTGATGCACCCGGC +CAGTAACGAGAACTGCCAGCCAAAAAAGTTGACCATGGCGATGTAACCAAACATGCCCAG +CACAATGGAAGGCACACCGATCATCACATCGCACAGGAAGCTGAAGAATCGGGCACTCCT +GCGGTGCTGATACTCGGAGAGGTAAATGCCGGTGACCACACCCACTGGTGCAGCAATCAA +CAGCGAGCCCACTGTCACCAGCAGCGTCCCCTCAATCGCATTCAATAGCCCGTGGTACCC +CTGGGTGGTCTGCGACAGGGTGGCCCAATTCACGTGAGCACCACCGCGAATCACCACAAA +GGCGATGATCGCAATCAATGCCAAGGCCAGCAGAGCGAACATCAACGCCGTGCTTCCCCA +ACCCAGCCCGCTCAGAGCAAGGCGCAACCGGTAGCCACCGGCACGGGGCCGATCAAGTCC +GGAGTTGCTGATTTCATCGCTGGAATATTGCATGGTCAGGGCCTATTAAAATTTCCCGGG +CATTCTGCTTCGAGCCAGACTCCACTGCGGGTGCGCAGCTCGGGTTCATTTGCAATCAGT +CGCGTGCCAGCCACAACAGGAGCCGGGCAATGACATTGACGATCAGCGTGATGAAAAAAA +GGATCAGTGCGATTTCCGACAGTGCATGCGTGGCCATCCCCGAAGCGTCTTCCAAAGCAC +TGTCGAGCTGGGAAACGATGAATGCCGCCATGGTGGAAATCGGGCTGTAGATATTATGAG +GCAGATAGCCCAGGGCATTGCCACTGACCATCAATACCGCCATGGTTTCGCCCAGGGCCC +GGCCCAGCGCCAGAATGCCGGCACCAATCACCACCCGGCGCACACTGGGCAGCACCGTTT +TCCAGAGCACTTCAAAACGGGTGGCGCCCAGGGCCAGCCCCGCTTCACGCTGTACCATCG +GCACCCGCTCAATAGCGCCACGGACCGTGGATGTAATCAGTGGTGCAATCATCACCGCCA +GAACCACCGACGAGGTTAAAAGCCCGTAACCGGCGCCCGTCGGCGGTTGAAAGAAGGGGA +CATCTCCCAGCACATGCACCAATCCGGGGTAGATGTAATGGTTCATGAAGGGCACCAGTA +CCACCAGTCCCCAAAGCCCGAATACCACACTGGGTACGCCGGCGAGTAATTCCACGAACA +GGGCGAGCGTATTCTGTAAACGCTTGGGTACCGCCTCTGCCAGAAACGCCGAAGCCGCCA +CGGAAATCGGCAGCGCGATCAACAGGGCGAGAAAGGAACTCAACAGGGTACCGGCAATCA +GAAAGCCGATGCCGTAATCGGCTCCTGGCGGCACTTCCTGGCCGTTGACTGTCACCAGAT +CGCCGTATTCGTTGCCCAGTGACCATTGGCTGCCGGTCAGAAAATGCCAACCACTGAACT +TGATCGCAGGCCAGCTGTTGATCAGCAGAAAAATGAAAATGGCCAGTAGTGCCAGCGGGA +TAACCGACGCGATACCGGTAATCGACCAGCGAAAAAGCCGATTTGTTTTCATCAAACCAC +TCCACTCACTTGGGTATGCCAGATACAAACAGGGTCCGACTGCCCCAAAGCAGCCGGACC +CGGTACATCCCAAGGTGGATCAGCTGTGGATCTTGGCGATCTGCGCCTTGGTCAGCTCTT +CAGCCTTCTTGGGCAGCGGCATGAAGTTCACAGCGCCCAGGTACTGACTGGCGTTACCGC +CGTTGGGGCTGACAGCCCAGTTGAGGAACTGCTTGAGCGGCGCAGCCAGGTCACCCTGGT +TGGCGTGCATGATCACATACTCATAGTTGATGATCGGGTAGGACTTCGCACCCGGCGCAA +AAATCAGGCTGATGCGCTCATCTTTCGGCGTTTTGGGGACCATGGCGGCGGCAGCAGCCG +GAACCGTGGTGGAGTTGGGCAGTACGAAGTTGCCAGCTTTGTTTTTCAGCATGGCTTCGC +CCAGGTTGTCCTTGTCAATCTGGCCCTTGTAGCTGACACCGATGTAGGCAACGCTATATG +GGTTGTCCTTCAGTGCCTGAACCATGCCCGGGTTACCGGTGGCACCGATTTCACCCTGAA +CCGCCGGCCAGTTGACCGTGGTGCCATAGCCCAGCTTCTTGTGCCAGTACGGATGACTGA +ACGACAGGTACTGGGTGAACAGGAAGGTGTCGCCACTGCCGTCGCTGCGATGAACCGGCA +CGATGCGGTGATGGGGAAGATCCACACCCGGGTTCATGGCCTTGATTTCCTTGTTGTCCC +AGTACTTCACAGTACCTTCATAGATGCGGGACAGAACCGGACCGCTCAGCTTGAGGTGCT +TGTCGTTCAGGCCGGGAACGTTGTAGTTCACCATCTGGGAAGAAATGGCCACCGGGATAT +TCAACATATCCGAGTGCTTCTTCATCATGGCGCCGCTCAGGTAAGCATCAGAGCCGCCCA +TCTGCACGTTGCCGGCGATGGACTGGGCGATGCCGGTGCCGCTACCGGTGCTGGCCGCGT +TTACCTGAATATCAGAGTGGGCCTTGCTGTAAGCCGGAATCCACTGGTTGAACAGCGGGT +ACAGCAGGCTGGAACCGGTTTCCTGCAGGGTGGTGGACGCATTGGCAACGCCCATACCAG +CGGTCAGCAGTGCAGCAGCGAGCGCGCCCTTGGCCATCATCGAGCCGGTACGGGTGCTTT +TCAGACTGAACTTTCCTTTCATCTCGTTACCTCTATCTGTGCTTTGAGTCATTCATTCGG +GCAAAAACAACGACTGGCCGATCTCCGCTTCCCTACAGCCGACCTGGGTGGTTGCCTATG +GATTGAATCCGTTTGCAAAGCGCAACGCCGCTTCCTGAACGGCAACTTGCGGTCTCTGCA +ACGCGGCAAAGAATGCACCTCAGGCAATGACAGTTTCGTGACAGCACCGGGGCCAAATTG +ACCTAGGAGTCTGCGCGGCAGAAGTATTTTAAGCCAGCACTCCCACAACACCTGACAACG +GTGTGGGCTCATGGTCGCGGCCGCCCGAGATATCCAGCCAGGCTAAATGTCGGTTTACCG +CCGTCGCCTTGGCTACAGGCAAGGTGACAGCTGTACATTGTGCACTCTATACGTCCAGGT +ATTCTCAGATTTTCTACACCATTCTCATCAATGTGCTCATTCGCCTGAGGTTGAGCGACA +GGCAGACCAGATCCCATTCACCTTGTACCGCGTTGAGACCTCGCAGACTGAACTGCCGGA +ACCCCAACACATGCTTGATCCAGCCGTTGACCGCTTCAACCAAGTGCTTGCGCTGCCCAT +AAACGGATCGTCCGGCGGCGGTGGCCAGTTTCTCCGCCATCCGCGCCGTCGCCGGATACC +GGGTCGCGTCTATTTCTCCGGGTTTCCTACCTTCCCGCCGCAGTGACACATAACCATCAA +TGCCGTTGACCTCCAGCGTCTGAAGATCACCCTCCTTACGGTAGCCGGCATCGGCCAGAC +ACTGCCGGGGATAGGTGCCCAAGGTGTCTTTCACGTCGTTCAGTAAGGGCAGTAGACAAC +CGTTATCACTGGGATTGCTGCCCTGATGATTGGCAACAATAAGCTGGAACTCACCATCAA +CGGCCAGCTGGCCGTTGTAGCATTGCTGGAACCCATCCGATGTTTTCATAATCCGGCTTT +GGGGATCGGTGAAGTTACTCTGATCCTTGTCGTCCGGTACCCCGTAATCCCGCTTGAAAT +TCCGGCCGCCACGGGGTGAGCGGCGATCGTCGTCCGGGGATCGGCCCCGGGCCTTGTCTC +GCTCCTTCTGGTCCGCTTCCAGCTTCTCCTTGGCGGCACGGATCTTATCCAGTCGTGCCT +GGCGGTGCTGAAGCTCTTCAGGGAGTTCATCACCGCGTTGATCCGGGCCGAACTGCTGAT +CTTCCTCTTCATCCGTTCGACGAGCCTGGCGGCACAGCTCATCCACCTCTTTGGACAGGC +GTTTTTCTTCTTCCTGCATACGGCCATAACTCATGGCTTTGTGCTTGCTGGCATTGGCCC +GAACCTTGGTGCCATCAATCGCCAGCGTTCCCAGGGTGATCAGCTCTGCTTCTTGGGCGA +TCCGGACGACCTGAATAAACACAGCCTTGAACGCCGCCAGATGCTGTTTGCGGAAATCGC +AAATGGTTCGATGCCTGGGAAAATTGCCTGCTGCCAGTACCCGGAAGGCCACGTCCTCTT +CCAGCTTCCGGGCGATTTTGCGGGATGAAAATGTGCCCGTCGCGTAGGCGTAGATCAGAA +CCTTCAGCATCATCCGTGGATCGAAGGGGGAATTGCGCCGGCCGTCCCCGTCGTAACGGG +CATAAAGCGCACTTAAATCCAGCTCCTCGACGACATCGCTGACAAAATACGCCAGATGGC +CTTCAGGCAGCCACTCGTTCAGGCTTGGGGGGAGGAGCAATTCCTGGTCGGGAGAATACG +GGCGGAAGGTGGTTCCCATCGGTCTTCCTGATGCTGAAGATTTTGGTCTATTCTGCCAGA +AAGTCACTTCTGCCGCGCAGACTCCTAGGGCATCTTCGGAAAGCGCCAGGAAAGAAAAAT +GTGTTTGTGACCTAACGTCAAAGACGGGATCAGAGTGCGCAGTTCAGGCCCGGCCGGAGG +GCGACCGGGCGTTGCCAGGGACCCTGAGCTGGGCTCTGACATACGACGAACTTATGTTGA +GGGACACTTAGGGAAGCACTGATTAATTCCAGCGCCCTTCTGGCTTACAGAGGATTCCAG +AATCAAGGCGCGGTTTTCAAGGTGGGCTGGTTGCCCATCGAAAAACCGCAACGCGGAGTC +TGGTATCCTCTGTAAGCCCCTTCGGGCAAGGCCTGAGAAGCGCATTGGACTGCGTTGCGG +CTCTGGCAAAGGGCTGCGGCCATTCACTGCGAGCCACGCCTTGCCACTGCGCTTCTCAGG +TCTTGCAGAAGGGTGCTGGAATTAATCAGTGCTTCCTTAGTTGCGGATATAGCGGAAGGG +GGCTTGCTCGACCTGACTCAACTTCCCCTGCAGGCGCAGGTGATGCAAATGGGCAATGCT +CTCGCCCATGGCGAAGAGAATGGCATGCTCGTCCAGAGGGCGCCGAAACATCAGCGACAG +AACGTCCCGGGCTGACTGGGGGTGCTCGCAACGCTCAAGCAATCGATCGATCTGGAGTTC +ATGGTGTCGCTGAAGCGCTTTGAGCCGTTGGTGCAGACCTTCAAAAACCAAACCATGAGA +GGGCAGCACGCGGATATTTTCCGGCAATGCGGAGAGACGTTCCAGTGAGGCCAGAAAATC +GCCCAAGGGATCCTTGTCCTCTTCCCCCGGCCGCATACTGATGTTGGTGGAGGTGTTCCT +AGGCTGTTTCCTGGTGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTTGAGTATTC +TATAGTCTC +>NODE_50_length_1320_cov_1878.61_ID_99 +GGGCGTCGCGCAAGCCGTCGCCCAGCAGGTTGATGGCCAGGATGGTTAGCATGATGGCCA +GACCGGGCAGGGTGACCGTGGTCTAGGTGTCGTTGTACGTGGGATCCCCGGGTACCGAGC +TCGAATTCGCCCTATAGTGAGTCGTATTACAATTCACTGGCCGTCGTTTTACAACGTCGT +GACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCC +AGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCTGA +ATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACC +GCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGTTAAGCCAGCCCCGAC +ACCCGCCAACACCCGCTGACGCGAACCCCTTGCGGCCGCATCGAATATAACTTCGTATAA +TGTATGCTATACGAAGTTATTAGCGATGAGCTCGGACTTCCATTGTTCATTCCACGGACA +AAAACAGAGAAAGGAAACGACAGAGGCCAAAAAGCTCGCTTTCAGCACCTGTCGTTTCCT +TTCTTTTCAGAGGGTATTTTAAATAAAAACATTAAGTTATGACGAAGAAGAACGGAAACG +CCTTAAACCGGAAAATTTTCATAAATAGCGAAAACCCGCGAGGTCGCCGCCCCGTAACCT +GTCGGATCACCGGAAAGGACCCGTAAAGTGATAATGATTATCATCTACATATCACAACGT +GCGTGGAGGCCATCAAACCACGTCAAATAATCAATTATGACGCAGGTATCGTATTAATTG +ATCTGCATCAACTTAACGTAAAAGCAACTTCAGACAATACAAATCAGCGACACTGAATAC +GGGGCAACCTCATGTCCGAGCTCGCGAGCTCGTCGACAGCGACACACTTGCATCGGATGC +AGCCCGGTTAACGTGCCGGCACGGCCTGGGTAACCAGGTATTTTGTCCACATAACCGTGC +GCAAAATGTTGTGGATAAGCAGGACACAGCAGCAATCCACAGCAGGCATACAACCGCACA +CCGAGGTTACTCCGTTCTACAGGTTACGACGACATGTCAATACTTGCCCTTGACAGGCAT +TGATGGAATCGTAGTCTCACGCTGATAGTCTGATCGACAATACAAGTGGGACCGTGGTCC +CAGACCGATAATCAGACCGACAACACGAGTGGGATCGTGGTCCCAGACTAATAATCAGAC +CGACGATACGAGTGGGACCGTGGTCCCAGACTAATAATCAGACCGACGATACGAGTGGGA +>NODE_5_length_30845_cov_61.6996_ID_9 +CTCCCTCCGCCACCTCCGATGAGGGACAAAAGCCCGCCCGCGTCCTGGTCGTCGGCAACG +AGAAGGGCGGCAGCGGCAAGTCGACCGTCGCCATACACCTGACCGTCGGCTTCCTGCGAG +CCGGCCTCAAGGTGGCCAGCCTCGACCTCGATGTCCGCCAGGCGTCGCTGACCCGCTACC +TGGAAAACCGCGCCGCCTTCGCGGCCCGGCGCGGCGTCTCGCTGCCGATGCCGAGCCACC +GGCGGGGGAACGACCCCACGGCGCCCGACGACGTCGCCGAAATGATCCGCGCCCAGGCCG +CCGGGCACGATGTGCTGCTGGTCGACACGCCGGGCCGCGTCGACGCGGTATCGATGGCCG +CCCATGTGGTCGCCGACACCATCGTCACCCCGGTCGGCGAAAGCCACCTCGACCTCGACC +TGATCGGCAACGTCGACCGCAACCAGGGCCGCGCCATCCGCCCCGGCCCCTATGCCGAGT +TCGTCTGGGCGGTGCGCCAGGAACGGGCCCGGGCCGGCCGCACGCCGACCGACTGGGTGG +TCTGCCACAACCGGCGGCGCGCGCCGCAGACCCGCGTCGGCCGCGAGGTCGAGCGCACCC +TGGCCGACCTGTCCAAGCGCTTCGCCTTCCGCCTGGTGCCCGGGTTCTCCGAACGGACCA +TCTTCCAGGAACTGTTCCCCGACGGGTTGACCCTGCTGGATCTGCGCGAGTCCGAGACCG +GCGTCGGCCTGACCCTGTCGCACGTCGCGGCGCGCAACGAGATCCGCCACCTCGTCGACG +CGCTGGACTTTCGGTGATCGAGCCCCAATCTGGTGGACGGGGCACGCAATAATTCCGGTG +ACAGTGCACTTGATTCACTCGCGCGGAGCTTGCCGGCAATTCCGGTGACAGGCAATTCCG +GTGACAGTGCACTTAATTCCGTCCTCACGTCTCGATTGTCTTGGTAAAGTCGACGCTTGG +GAATTAAGTGCACTGTCACCGGAATTAGGAATTGACGCCGAAATTAAATCAATTAACCGA +AATCAGTTAATCAGGATTGCCGCCGCATGATCACCATTGACCAGACCCTGACCCCGAAGG +ACCTGACCGAGGACCTCGCCCGCTTCTGGACGCTGTCCGGGCAGAAGATCCGCCAGCTGG +CGTCCCGCTGGAACGCGGGCGACGGCGCGCCGGTTTTCACCGTCGAGGGCCGCTACACCA +GCCGGGGCTGGACCGAATGGACCGAGGGCTTCGTCTACGGCTCGGCGCTGCTGCAGTTCG +ACGCCACCGGCGAGGACTGGTTCCTCGACTACGGGCGGCGCGGCACCCGCGAGCACATGG +GCGGGCACGTCACCCATACCGGGGTGCACGACCACGGCTTCAACTGCGTCTCCACCTACG +GCAACCTGCGCCGCCTGATGGACGAGGGGAAGCTGCCGGAGAACGCCGACGAGCGGGCCT +TCCTCGACATGGCGCTGGCCTCCAGCGGCGCCGTGCAGGCGGCGCGCTGGACCAACCTGG +GCCACGGCAAGGGCTACATCCATTCCTTCAACGGCGCCCATTCGCTGTTCGTCGACACCA +TGCGCTCGCTGCGCTCGCTGGCCGTCGCCCATATGCTCGGCCGCGACCTCAAGGGCGAGC +ACGACCAGTCGATCTCGCTCATCGAGCGCCTCGCCGCCCATGCCCGCACCACGGCGGAAA +CCGCCGTCTTCTTCGGCAAGGGCCGGGACGCCTACGACGAATGGGGCCGCACCTGCCACG +AGGCGATCTTCAACGTCAAGGACGGCCATTTCCGCTGCCCCAACTCGCAGCAGGGCTATT +CGCCCTTCACCACCTGGACGCGGGGCCAGGCCTGGATCACCCTGGGCTACGCCGAGCAGC +TCGAGTTCTTCCGCGCGCTGGAGGCCGCCGGTCGTCCCGAGGCCGACGACTGCAGCGACC +TGATGGGCATCATGCGCGACGGCGCGCGGGCCACCGCCGACCACTACATCGCCAACACGC +CGACCGACGGCGTCTGCTACTGGGACACCGGCGCCCCCGGCCTGGCCGCCATGCCGGACC +ACAAGGACCGGCCGGCCGATCCCTTCAACGAGCATGAGCCGGTCGACAGCTCGGCCTCGG +CGATCATGGCCCAGGGCCTGCTGCGACTGGCCGCCGTGCTGGCCGAGGAAGGCGACGACA +GCGCCGAGCGCTACCGCCAGGCCGGCCTCACCGTCGCCCGCGCCCTGCTGAAGGCCCCCT +ATCTGAGCGAGGCCGACGGCCATGAGGGGCTGCTGCTGCACACCATCTACCACCGGCCCA +ACGGCTGGGACCACATCCCGGCCGGCCGCAAGGTGCCCTGCGGCGAGAGCTGCCAGTGGG +GCGACTACCACGTCCGCGAACTGGCCCTGATGATCGGCCGCGAGGCGGCGGGCCAGGAGC +CTTATCGGTTCTTCAACGGGCTGGTGTGATCGGGGTTCTCTGATTTAACGGACGACGCCC +GTCCTAATTCCGGTGACAGTGCACTTAATTCCTTGTTCCGCATGGCGGCCATCTTGGAGA +GATTGGCGCATGGGAATTAAGTGCACTGTCACCGGAATTGACCGTCCGCGGAATCACCGC +TCGTGGAAGGCGCGGATCGACAGCTTGTGGAAGGGCTTCATCAGGTATTGGAGGACGGTC +CGCCGGCCGGTCTTGACGTCGACGCTGACCGTCATGCCGGGGATGATCGGCAGCGGCTTG +CCGGTCTTGTCGAGCAGGCTGTTGGTCTTGGTCCGCAGGCGGATCATGTAGAAGCGCTCC +TTCTTCGCCTCGTCGAAGATGGTGTCGGCGCTGATCTGGTCGAGCGTCGCCTTCAAGGAG +CCGTAGATGGTGTAGTCGTAGGCGGTGAATCGCACCGTCGCCTCCAGCCCCGGATGCATG +AAGGCGATGTCGGAGGGGCGGATCTTGGCCTCGACCAGCAGGTTCTCCTCGGCCGGCACG +ATCTCCATCAGGTCCATGCCGGGGCGCACCACGCCGCCGATGGTGGTGAGGAAGATCTGC +TTGACGACGCCGGTCACCGGCGCGCGGATCACCGTGCGGCGCAGCCGGTCGTCGCGCGAC +GTCACCACCTGGCCGAGGGCGGCCATGTTGACCTGGGCCTCGTTCAGTTCCTTCAGGGCG +CGGCTGCGGAACTGCGCCTTGGCCTCGGTGATCGCCTCCTTGGTCTCGTCGATGGCCGCC +GCCAGCTTGAGAAGCTGCTGCCGGGCGACCGCCAGATCACCCGCGTCCTGGCTCTCCTCG +CGCTTGAGCCGGATCAGCTCGATCGGCGCGTTGATGCCCTTCTTCACCAGCGGCTCGAGG +ATGGCCATCTCCTCCTTGGCCAGCTTGATGACGGTCCGCAGGTGCTCGATCTTGGCCTTG +GTCTCGTCGACCTCGTGGAGCTTCTGCTGGTACTGGGCCTGGAGCTTGGCCAGCGAGGCA +TCCTCCTCGCTGCGCCGCGCCTCGAACAGGGACCGCTCGGCCTGGGCCACGGCGGGGAGC +TGCTTCTCGACCTCGGCCGGGAATTTCAGCGCCGTTCCCTCCAGTTCGGCCTGCAGGCGC +GTGACCGCCGCCTTGAGGCCGAGGAACTTCAGGCGGTTCTCCATGAAATCCGAGGAAAAC +TGCGTGTCGTCCAGCTGCATCAGCACCTGGCCCTGCTTCACATGGTCGCCCTCGGCGACG +TAAAGCTCGGAGAGGATGCCCCCTTCCAGGTTCTGGACCACCTGGACATGGCCCGAGGGG +ATGACCCGCCCCTCGCCGGTGCTGACCTGGTCGATCTTGGCCCAGGACGCCCAGAGGATG +GCCAGGATGAAGAACACCAGCACCGCGATGAGGATGATGTTGGCCAGGATCGTCGGCCGC +CGGCTGACCGCGGCGCGAAGTTCGGACATGTATTCGATGTCTGGCGATGCCATGGTTCTC +TACTTGCCCGTCTTGACCTGGCCCTTGGCCAGGCCGCCGAGAATCGTCGCCTTCGGTCCG +TCGGCGACGATGCGTCCGGCATCCATCACGATCAGGCGATCCACCAGGCTGAGGACCGAC +GTCCGGTGCGTCACCAGGATGATCGTCCGCCCGCCCGTCGTCCGGGCCAGGGTGTCGACC +ACCTTCTTCTCGGTGCCGGTGTCCATCGAATTGGTCGGCTCGTCCAGCACCAGAATGGTC +GGGGTCTTGAGCAGGGTGCGCGCCAGGGCCACCGCCTGGCGCTGGCCGCCCGACAGGCCG +CCGCCCCGCTCGCCGACCTCGAGGTCGTAGCCCAGCGGGTGCTGGGTGATGAAATCATGC +AGACCCAGCGCCTGGGAGACCTCGGTGATCTCGTCGTCGGTGGCGCGCGGCACGCTGATG +GCGATGTTCTCCTTGATCGATCCCTTGAACAGGAACACGTCCTGGGGCACGAAGCCGACG +TTGGCCCGCGCGTCGCTGGGCTCGATGTGGCGCAGGTCCGTATCGTCCAGCAGGACCGTT +CCCGAGGTCGGCGCGTAGAGCCCGGCCAGCAGCTTGGCGATGGTGCTCTTGCCCGATCCC +ACCCGGCCGACGATGCCGACCCGTTCGCCGGGCTCGATGACGAAGGAAATGTCCCTCAGC +ACGTCATGATCGGTCCCCGGATAGCGGAAGGTCACTTCCTGGAACTCGATCTGGCCGCGC +AGGCGGGGACGGTGCAGGAAGTTGGCCTCGGGCGGCCGCTCGACCGGGCCGCGCATCACC +GCGTCGAGCGACTTGAGCGAGGTCCAGGTCTGGTGGAAACGCATCATCAGCTGCGAAACC +TGGGCCATCGGCCCCAGGGCCCGTCCGCAGAGGATGACCGAGGCGACCAGCCCGCCCGAG +GTCACGGTGCCGTCCTTGACCAGGAAAATGCCGGTCACCACCATGGCGATGATGGTGCCC +TGCTGGACGAACTGGACAAAATTGACGCCCAGGTTGTTGAACAGCCGCATGCGCTGGCCG +ACGGCGGCGCTCTGGCCGACGACGGTCTCCCACTTGGCCCGCATCCGCCCCTCGGCGCCG +ACCATCTTGATGGTCTCGTATCCGAACAGGCTCTCCACCAGGACGCCGTGCTTGTGCTCG +CCATGCACCATCAGCTTCTTGACGCTCTTGGCGATGGGGAACTGCACGAACATGCCCCAG +AACAGCACCAGGGCCATGGCGCTCCCGAGCACCATGCCGATCTCGGGGCTGAGCATGAGG +ATCACGGCGAGGAAGAACAGCGAGAACGGCAGGTCGATGAAGGCCGCCAGGGTCGCCGAG +GTGAAGAAGTCGCGGACCGTCTCGAACTCGCGCAGCATGCTGGCGAAGGCGCCGGACGAT +TGCGGCCGGTGCGACGCCTTGAGGTCGAGGACCTGGTCGAAGATGCGACAGGCCAGCACC +ACGTCGGCCCGCCGGCCGACGAAATCGATGAACCAGCCGCGCAGGTTCCGAAGGATGAAA +TCGAACAGCAGCGCCGTCGCCGCGCCGATGGCGAGCGCGATGCCGGATTCGACGGCATTG +TTGGGCAGCACCCGGTCATAGACGTTCATGGTGAACAGCGGATTGGCGACGGCGAAGATG +TTGATCACCACCGAGGCCATGCCGACCTGGGCATAGGTCCAGCCGTTTTTCGCCAGGGTG +CCCCAGAACCAGGAGCGCGGCTTGGCCAGGTCGGCCCCGGTTCGCTCGCCGCCCAGGTCG +ACCTCGGGCTTCACCAGGATGGCGAAACCGGTATAGGCCCTCTCGAGCTCCTCGGGCGAC +AACTCGGTGGTGCCGTGACCGGCATCACCGGCAGTCACCAGGAAATGGCCGGTATCCGAC +TTGCCGGTCAGGACCACGGCGCCGCGGTTGCGCAGCAGAAGGACGGCCGGCAGGGTCCAG +GCATGGATGCGGCCGAGCCCGCGCCGCACCACCTGCACCGACAGCCCGGCCCGCGCCGCC +GCCCGCTTGAACAGGTCCACCGGAATCGGCCCGTCGGGCTTGGGCAGGGCGGACAGCAGC +ACCGATGGCGACTTGGGCTGCCCGTGGTAGGCGACCAGGAACAGCAGGCAGGACAGCAGG +GAGTCGTCGTCCTCGGCCCCCGGAAGGGCGTCCTCCAGCGGCCCGGCCGGCCGATCGTCG +TTCCCCGGGGCCTCGCCCGCGGGGGTGTCGACGCCGAGCGGGCGCGCCGGAGCCTCGTCC +GTTTGCGGATCGTGGACGACCTTGATGTTCGAGGCGTTGTCGGCGGTCATCTCCCTACCC +GTCGCAACCCAGACCCCGCGCGGCGGGGACGCGGATCCCCCGCCTTGCCAACGCCAAGCC +TGCCGTTCGGCGAGACGCTGGGCGCCATGAATGATAGGGGATTCCAGCCGCCCTTGGCTA +GGGAGCCTTGGGCGCCTTGCCGGTCGCGACCTTGACGCCCATGTCCTTCAGCAAGGTGCC +CCTGACGGCCAGGAAGTCGTAAGCCGCGGAGATCTGCCGGTATTCCGCCGTCGTCAGATC +GCTCCGCGCGAGGAACAGCTCGTTGCCGGAGTCGAGGAGCTCGATCAGCGTCCGCTGCCC +CAACCGGAACTGCTCGCGATAGGCGGCCGTCACCTCCTTGGCCGTGGCCGCCCGCTCGCG +CAGCAGCGCCACCTGGTCTCGGGCCACCTCGTAGTCGGTAAGGGCGACTTCCAGCTCGCG +GCGGATCTTGCGCTCCACCTCGTGGATCCGGTACATGGCGGCGCTGGACTGCTCGAGCGC +CCGGCGCACCTTGGCCTGGTCGGCGCCACCGCCGTAGAGGTTCCACGACAGGGTGAGCAG +GGCCGTCGAATCGGTTTCGAAACCGCTGACTCCATCCACGCCGTCGCGCTGCTCATGCGC +CACTTCGACGTCGACCTGGGGGAAATAGCCGCTGCGCTCGGCATTGGCCGTCAGCGTCCT +TGCCCGCTCGGTCTCGACCGCCGCCTGGAACGAGGGATTGTTCTTCAGCGCCTCGTCACG +CGCCTCATCGACCGAGGCCGGGATTGCCACCGTCGGCATCGGGTAGTCCTCGAGGGTTTC +GGGCGTTTCGCCGATCGCCTCCTGGAAATCGCTGATCGAGTTGCGCAGGTTGCCCTCGGC +CCGGCGGCGCTGCGACTGGGCGTTGTACAGGCGCGTCTTGACCTGGGTCACCCGCGTCTC +GGAGCCGCCGCCGGAACGGGCCGCCTCCTCGACGTCGGCCAGGATGCGCCGGTGAAGGTC +GATATTGTCGACCGCATACTGGACCAGCTTGCGATCCCGGGCCACGTCCAGATAGGCGCG +AATGGCGCGCTGGGCGATCTTGGTCGCCGCCGACAGGACATCGAAATGCGCCGACTGGGC +ACTCGCCTTCGCCGATTCCGCCCGATTGGCCGTTCCTTCGCCGTCATACAGCATCTGGGT +CATCGCCAGCCGCTGCTTGTTCCGCCACTGGTTCTCCTTGATCGACGACTCGTTGACCCG +GCGGGCGTGCTGATAACCGGAATCCGCCGTCAAATCGAGCGACGGATAGAAACCGGCCTC +GACCTGATCGACGCTGCGCTCGGCGGCCCGATACTCGGCCTCCGTCGAGGCCACCATCGG +ATGGGACTGGACGGCCATCTCGACCGCCTCGCGGAGTTGCTGCGCCTGGGCATCGGCCAT +GAAGGGGCCACCGAAGACCACGAGGCCGCTCAGCAGGAGGGACCCCCAGGAGAGAGAGTG +GCCGAGCCGGCCACTGGGCGTCGCATTACGGTGGCCGTTTCGTCGATTCGCCGCGACCCC +GGCGTCAGAGAATGACTTCACGATGTTCCCCCACTCGTTTCTGTTCCGAGAGATCGGCGA +TATGACAACCGCATACTTCAAGGCGTTCCGCCCTTCGTCTGTCGCCACTCCCAGGCACCC +GAGGCCGCCCGATTCCCCTCCTAAATCAAGCCCCTCGACTACCGATAATGGAACCAAGCC +ACGATAATACGGATAATTGGCGAATCAAAGCACAACCTTTCGTCCCGCCTCACCGCCGTC +CGCCCTTCGGAAACGCCCGGTTTCCCGGTTGCATCGCCCCTCGCCGCCATGCGGCAACAC +ATGCCAAATTGGCAGAAATGCCAGTCATCCTCCGGCTGCCGCTCGCGGACTTGACCGATG +TCCGAATCACCCTCGCCCTCGCTTGCCTGTCAGCACACCCTCATAACGCAGGATTTTCAA +CGAACACCGAAACAATCTTGAATTGATCGACCAGCGATTCCGCCAACCCACGATGCCTCG +CGGCCAATGGAAAGCAAGGGGCAGAAATGCCACTTGGCACCTATATTTCACTTTTGGCAT +TTCACAAGCTGGTCAGCCAACACCTTTCCGGGTATTGATCATTCATTGCTGCAACGGATA +AAATACAGACACTGGATTGATCGCCCCCTTAAACAGGGCCGATCGTGGTCGGGGGACCTC +AACCATAAGAAATATTTATTCAATGCCTAATAGCTGGAAGGTTGATACATCACCTCCCAT +ATCGATTTGTTGATGCATAGGGCGCACCCGGGGAGGGCGTGCCATTCGAGGTAGCGCCGG +TTTGGAGGGGCTTGGCGCAGAGGGGGAGAAAGCCGTGGCTACGCTTGAACTGATCGTCGA +GCGCGCGTCGGGGGACGTGCTCAAGGTTGAACTGAAGCCCGGAACGAACGTTCCCGCCAT +CCATCCGGGCGACAAAGTCCAAATCGTCACCCCGGCAGGCGAAACCCTGAATGCGGTGGT +CGTCGGCCAGGACGTGCAGATCACGCCTGTCGACGCCTCGGGCACGGTCGGCGAAACCAT +CGTCTTCAAGAACCTGGCTCTCTACCTGCATGACGGGCAGAGCGAGGTTGCCGTCGTCAA +CGCGGATACCGGCCAGACCACCGAGATTACCGATGTCGCCTCCCTGGCCGATCTCGGCAC +CGTCCCGTTGCAGATGGCCTCGGGCGAGGGAACCGGGCCGGTTTCCCCCGGCACCTCCTC +CCCCTTCCAGAATTCTGACGCCATCGACCACGGAGGCGAAACCGCCGGCAACGCGGCCGG +AACGCTGGGCGACATCCTGAACCGCGGCGCCGCCGGCACCGATGGCGGACGCGCTCAACT +GGCCGGCACCGGCGAGACGGGGGCGGGGACAGGAACGGGCAGCAGCACCACCGACCATGT +CGAGACTCCGATCTCCGAGTCGACGGAGGGCGGCGGCAGCGGTACCGGAACCGGCACCGG +CACGACCACCTCCGGCCATGCGGTCGACGGCTACATCGTCGGCGCGACGGTCTTCGCCGA +CGCCAATGCCAACGGCGTGCTCGACTCCGGCGAGGCGTCGACGACCACCTCGTACAACGG +TACATTCGAACTGAGTGGCAGCAGCGGCCAGTTGGTCATGACCGGCGGCGTCGATCAGGC +CACCGGCGAATCCTTCAAGGGCACGCTGACGGCGCCCGCCGGCTCGACGGTGGTGACCCC +CCTCACCACCCTGATCCAGTCGTTGGTGGAGGCCGGGCAAAGCGCGGCCGACGCCCAGGC +GGCGGTGAAAAGCGCCCTCGGCCTGACGGGAAACAGCATCGACCTGACCACCGCCAACCC +GGTGGAGGACGTGGAAAACGGCGTCAGTGGCGCCGACGACGTGCTGGCGGCGGCGATCAA +GATCCAGAACACGGTGGTCCAGGCGGCCTCCGTCCTTCAGGGCGCCGGCGGCTCCACCGT +CGCCATGAGCACGGCGACCAACGCGGTGTTCGCCCAACTGGCGACGACGCTTCAGAACAA +TCCGGGCAGCAATCCGATCACCGACGCGACCGCCGTCCAGAACCTGATTACCGGCGCCGC +GAACTCCAGTTCGCTGGGTCTGAGCAGCACCGCCAAGACGCAGGTCGGTAACGCCGCCTC +GGACGCCGCGTCCGTCATCGATGCCGGCAACAGCCATATCAACGGCCTGTCCTCGACCGG +CAGCAGCCTGCTGACCGATCTGGCCTCGGCGGCCCGGGTGGCGCAGAACGGCGCCGCGGA +GGCGCTCCATGACGCCTTGAACGCGGTCCAGGGGACCAGCAACAGTGCCAATCTGAGCAC +CGCCACCAGCTCCTATACCGGCGCCAACCTGACCTCGGAGATCGGCAACGCGTCCTCGGG +CCTGGGAACCGTCGGAACGGCCAGTTCGGTCGGCACATCGGGCGACGACACCATCCAGGG +AACCAGCGGCAACGACACCCTGAACGGCGGCGCCGGCAACGACACGATCAGCGGCGGCAC +CGGCAACGACATCCTGATCGGCGGAGCCGGCAACGACACGCTGAAGGGCGAGGCCGGCAA +CGACACCCTGGATGGCGGCGCCGGCGACGACTCCCTGTATGGCGGCGCGGGAACAGACAA +GGCCCTGTTCGACGGCAATTTCAGCGGCTATCAGATCGCCACGGACTCCGGATCGTCCGG +CGTCATCACGGTCACCGGCAGCGGCACGGACACCATCGACACGACCGAGGTGCTGAAGTT +CAAGGACCTGACGGTCCGCATGGTCGGCGACAGCGGTGGCACGGCCAACGGCTACACCTC +CCTTTCCAGCGCGTTGAGCGCGGCGAGCGTCGGCGAGAGGATCCTGATCCTCGATTCCGC +CACCGATCCCTCCACCCTTACCCTGTCCAAGAAGGTGAGCGTGCAGAAGATCGGCGAAGA +CCCGCTGATCTCCATCGCCTCCGACGGCGCCCTGGTGGTGGACGGCAGCCAGCTTTCCGC +CGTCACCACGCTCGACCTTTCAAGCCTGCCCGGCACCACGACGGTGCGCTTCACCAGCCT +GGGCAGCATCGCCAGCATCAGCACCGCCTCCACCGAAACCCTGAACCTGTCGGCCAGCCA +ACTGGACGGACTGACGGTCTCGGGAAGCGGCAAGATCCAGACTTCCGGCATCGTCGCCAC +CAGCGCCGACCTCAGCAACCTGTCGTCCGACCTTTCGGTCGCCTCGGGACAGTCCCTGGA +GCTGACCGCGGCCCAGGCCAGCGGCAAGACCATCGCCGGCGCCGGCAACGTGACCGTCAA +CGCGCTCGGATCTTCGGCCGTCGACCTGTCGGGAATCACCGCGAGCGGCACGCTGACGGC +CAACGTCCCGTCCTCGGCGACACTGAACACCAATACCGATCTCGGCACATTCGGCGTCTC +GGTCGCCAGCGGCCAGACTCTGACTCTGTCCGCCACCCTGGCCGACGGCACCACCATCGG +CGGCGACGGCAACGTGACCGTCACCGGGCTGGCGGCCGCCACCGACCTGTCCAGCGTTTC +GGCGTCGGGCACGGTCACGGCCACCGTGACGTCCACCGTCGACATCAGCAGCAATACCCA +TCTGGGCAGCGTCGACGCCTATCAGGTGACGGGCGCCCTGACCCTGACCGCTGCGCAGGT +CGGCGACAGCACGATTTCGGGCAGCGGCAACGTGACCGTATCCGGGTTGGCGGCGACGAC +CGATCTGTCGGGCATCGCCTCGTCCCTTTCCCTGACGGCGTCGGTAACGAGCTCCATCGA +CATCAGCGCCAACACCAACCTGACCACGGTGGACACCTATCAGGTCTCCTCGGGCCAGAC +TCTGACTCTGTCCGCCGCCCAGGCCGCCGGTCATGCGATTAGCGGCGGAACGGTGACGAT +CAGCGGCAACATCACCGCCAATACCGACCTGACGGACATCTCCTCGACCCTCAGCTTCGA +TGACGGCGACAGCGGGGCGATCTCCGTCGCCTCCGGTACGAAGCTGACCGTGACCCCGAG +CCAGGCCGCGGCCCTGCAGACCGCAGGGCAAACCATCACCGGCGACGGAACGGTGCTGAT +CGACGGCAACGTCACGGCCGACACCAACCTGACGAACATCAGCGCGGCGGTGGACTTCAA +CGGCAACAGCGTCTCCGTCGACAGCGGCCATACCCTGACCCTGACCGCGACCCAGGCCAG +CGATACGACGATCTCCGGCGCCGGCGCCGTCGCCCTTTCGGGCAGCGACACCAACGCCGA +CCTCAGCAGCATCACGGCCGACATCACGGTCGCCAGCGGCCAGACCCTGAGCCTGAGCAC +CGCCCAGTTGGCCACCCTCGACAGCAACGCAATCCCGATCGGCGGCGACGGCACGGTGTC +CCTGACCGGCAATGCGACATCGGCTCTGAACAGCGACCTGAGCACCTATCTGGGCAGCAG +CCTCAACCTGGCGGTGCCCTCCGGCGAAAGCCTGAGCCTGACCGCGATCCAGGCCAACGG +CCTGACCATGGAGGTTGCCGGCACGGCGAACATCACCGGGCCCGCCGGCACAACGGCCGC +CGATTTCTCGAACATCTCCTTCACCGGCAGCGGGGCCGCCACCTTCACGGTCGGCGCGGA +CCTCGTGATCAGCGAAGCCAACGCCGACTTCGGCAGTGTCTCGATCAACATCCCGGCCGG +CCGGACGCTGACCATCGACGCGGCCGACGCCAGCGGCGTGACGATCAGCGGCGACGGCAC +GCTGGCCGTCACCGGCACCCTCGACAGCAGCGTCAATATGTCGAATTGGGGGACGGGCGC +CATTGACCTGACGAACGTCTCCGCCAGCAACTTCAGCCTGACTCAGCTGGACCTCAACGG +CTCGGCGGACTACCACCTCACCTACGCCCAGGTCCAAGCCCTCAGCGACGGCATCGACGG +CAACAATAGCGACAACACCCTCATCATCGACGTCAGCACGGCCGGCGGCGTTACCTACAC +CAACAACGCCGCGACGATCGATCTGGACATCAGCCTCCTGGGCGGCGCCGACCGCGTGAA +GTTCGACTTCGGCGGCACGACCGATAGCGGCAACACGCTCACGATCAGGGGGCCCCTCGG +CTTCGGCGACGGCTCCGACACCCTGGAATCCCGCCATGGCACCATTTCGCTCACCGATCC +CGGCCTCACCTTGTCCGGTGGGCCGGAAGCCCTTGTTGCCAATTCGGGATTCTCACTGAC +GGCGACCCTCTTCGATCTGCTCTACGGCAGCAGCGGCGTCACGCTCCAGGGCGAAGGCAC +CTACATCGTCTCGATCGATGCGGGATTTGCTTCGGGCGCATCGCCAACTCTCGATCTGAC +CGTGCTCGACAACTTCGTGCCGGCCGGCGGGGTCCTGCCGACCCTGCAGATCGTTGCGAC +GGGGTATTCGGTGGTCGGTGGTGACGACGGAACAGGGGACGGGATCGCCACCCTGAGCGA +CGGAACCCACACCATAACCATCAAGCTGCCGGACGACCCGGACAACGCCGGGACCTTCAA +TCCAGGCAATACGCCGGTCATTATCGAAATCGATAATGGCACGAGCCAGTTCTTTCTCGG +CGGCCTCGACGATCAGCGCGCCTATTACGAATCGGAGACCACCGTCTACACCGGCTCACA +GTTCGCCGACCTGGCCACCGCGATCGCCACCGACGCAAGCAGCCTCGGCATCAATGCGGG +TGACATCCAGACCATCAAGCTCGGTGATTCCGTCATTCTCGACAGCGATTCCGCGGTCGA +CCTTTCCGCATTCGGCGGTGTCATCGACTACAACGGCCAAACCATCCAGGTCACCAGCGG +CGACAGCCTGACCCTGACCGCGGCGGCGGCCGATGGGGGCACCATCAGCGGTGCCGGCTC +CGTCACGATCACCGACCTCGGTACGACAGCCGTCGATTTCTCCGGCCTGACGGCGGCATC +CGTTACCCTTGCGATCAGTTCCGGAACGGTGGACCTCAGCGCGATCAGCGGCCTCGATTT +GGGCAACGTGGGCATCACGGTCACCACCCCCGGTGCCGTGACCCTGACGGCGGCCCAGGC +GGACGGCCTGTCCATCTCGGGCAACGGCGATGTCACCCTCACCGGTCTGGGCGCCAGCGA +GGTCGACCTTTCCGGCCTTACGGCGACCAACGCCACCGCCGAAATCTCGGCCGACCTCAC +GCTCAGCAGCCTCACCAACCTCGGCAGTCTCGACCTGACCCTCGACGACGGCGTCACCTT +GACGCTGAGCCGGGCGCAACTCGACGGACGCGACATCTCGCTCGCCAGCGGGTCGGCGAC +CCTGGCCTTCGGCGGCAATGCCTCGGGGCTCGACCTCAGCCAGATCGCCTCCGGCATCGC +CTTCGAGGTCGTCGCCGGGCGCACCCTGACGCTGTCGACAGCACAGATCAATGACGGCCA +TACGATCACCGGCGCCGGCAGTCTCGTCGTCGTGCTCAGCGGCACGGAAGTCGACCTGGC +CAGCACGGCGATCCAGGTGACGGGGACCAGGACCGCCGTCGTGTCGACGTCGGCCACCCT +GGACGGCAATACCGACCTCGGCGACTTCTCCGTCTCCATCCTCAGCGGCCAGACCCTGAC +GCTGAGCACCGCCCAGGCGGCCGAGCATGCGATCAGCGGCGCCGGCAACGTCACCGTCAC +CGGGATCGGCAGCACGGCGGTGGATCTGTCGACCATCACCGTCACCGGGACCAAGACGGT +CAGCCTGTCGGCCGACGCGACACTGGATCCCGATACCAACCTCGGCAACTTCGCCATCGA +TACGGCGGGCCACAGCCTGCAATTGACGGTGGCGCAGGCCAACGGCCTGTCGATCACCGG +CAGCGGCACGGCCGTCGTCACCGGCCTCGGCAGCGACACCGTCGACCTGTCGGGAATCAC +CGCGACGGCCAGCACGACGGTCTCCGACACCGTCGCCCTGGCGGCCGGCACCAATCTCGG +CAGCGTCGCCGTGACGGTGGACGGCACCGGAAGTCTGACTCTGTCCGCCTCCCAGGCCGA +CGGCCACACCATATCCGGCACCGGCGCGGTCACCGTGACCGGACTGACGGCCGGCATCGA +CCTGTCCAGCCTCGCGTCCTCGCTCGACGTGACGGCGACGGTCCCCGGCACCGTCGATAT +CACCGGCAATACCGCCCAGTTGGCCACGGTCGACACCTACGAGGTTGCGGGCGCCCTGAC +GCTGTCGGCCTCGCAGGCCAGCGGCCATACCATCGAGGGCTCCGGCAGTATTGTCGTCAG +CGGTCTCGACGGTTCCGCCGCCTATGACCTGTCGGGCATCACGGCGAGCGCCAGCACGAC +GGTCTCCGGCACCGTCACGCTCGATTCCGGCAGCAACCTGGGGACCGTCGCGGTCAGCGT +GACGGGCAGCCTGACCCTGGCCGCCGCCCAGGCGACCGGCCATACCCTCAGCGGGGATGG +CAGCGTCACCGTGACCGGCCTGGCGGCGACGACCAACCTGTCGGGCCTCGCCTCGACACT +TTCCTTGACGGCGGCGGTCACGGGAACCGTCGATATCAGCAGCAATACCCTTCTCGGCAC +CGTCGACAGCTATACGGTGAGCCAGTCGACCGACGCCCTGAGCCTGACGGCGGCGCAGGC +CAGCGGCCATGCCATCTCGGGCAGCGGCACCGTTGCCATCAGCGGTCTCGGGGCTGCTGT +CGTCGATCTGTCGGGCATCGCGGCAACCGTTGATGCCACGGCGACGCTCTCCGGCACCGA +GGTCATCCTGACGGCGGGCACCGATCTCGGCACGGTGGCCCTCAGCGTCGGCAGCGGCCA +GACCCTGACCCTGGGAGCCGCGCAGGCCAGCGGCCATGCCATCAGCGGCGGCGGCGACGT +CGTCGTCAACGGACTGAGCGCGAGCACCGACCTGTCGACGGTGACGGTGACGGGAACCGT +CACCGCAACGGTTTCCGCCGACCTCGACATCAGCGCCAGCAGCCAGTTGGGCTCGGTCGA +CACCTTCCAGGTGGTCGCCGGCAAGACCCTGACCCTGACGGCAACCCAGGCCAACGGCCA +GGCCGTCGAGGGCGGGGGCAACGTCACGGTCACCGGCCTGGCCGCCGCCACCGACCTGTC +CGGCATCACCGCGACCGGAACGCTCACCGCCACGGTGACGGCCGACACCGACATCACCGC +CAATACCCATCTCGGCGCGGTCGATAGCTTCCAGGTCGCCAGCAGCACGACCTTGACCCT +GACGGCCGATCAGGCCGATGGCCAGGCCGTCACCGGAACCGGTAACGTCACCATCACCGG +GCTGGGCACCACGGCGGTCGACCTCAGTGCGATCGCCGCCACCGGAACGCTGAGCGCGGC +GATCAGTGGCGATATCACGCTGGCCGGTGACACCAACCTCGGCGACGTCACCCTGACGGT +GGGGGCAGGCAACACGCTGACCCTGACCGCCGCCCAGGCCGACGGAAACACCATCACCGG +CACCGGCGCCGTGGTCATCACCGGCGACGTGGCGGGATACGACCTGACCCACATCGCCGG +GACGCTCGACCTCACCCTGCCGGTGACCGGCGACGTGCTGACCCTGACCGACGGCGAGAC +GGTCCACCTGACGGTCGCCGAGGCCAACGCCTACGACTCGATTACCGGTGACGGCACGAT +CCAGCTGAGCGGCAACGCGACCGCGAACTTCGACCATCTGACAAGCATCCTCGGCGATGG +CGTCAGCCTGGCGGTGGCGGACGGCGACACGCTGTACCTGACGGCAACGCAGGCCTCCGG +CGTGACTATCGGCGGCATCGGCACGGTGGATGCCAGCGGCACCGTGACGGACGGCGATTT +CTCGGGCATTTCGGCCGACCTGAACCTGACCGACGCGACCCTGGACGGCACCACGACCCT +GCCCACGGTGGGTGCCGGGCACACCCTGAGCATGACGTCGGACCAGATCAACGCGGCGAG +CATCGCCCTCGCCGATTCGACGGCAACGCTGCATGTGGCGGTCTCGTTCGATGCCCTGAG +CTCGAGCAACGACGCCCTGCCGGAGATCGACATCTCCGAGATCCGGGTCGACGGGAGCAA +CAGCCCCGAGGCCGTCTGGAATTCCGTCGACGTCGCCTCCGGATCGATCGTCGACAAATT +CAAGCTGTTCTGGATCAGCGCCGACAAGCAGTACTACGACTCGACGCCGCTCGGCCAGGA +CGTCGATGCCAACCGGGCGTTCGTCGAACTGGGCAATCTCTACGCCGCCTACCTGGCCGG +CGCGGACGGCGAACTCGGCACCGCCGACGACGGCACGCCGATTCTCGATGTGGTGCAGAC +CAAGTCCGGCGGCGTCGCCGATTACGACGCCCGCCAGCAGTCCCTGCACGACAACCTGCT +GGGCAATCTGAGCGATGGTGCGATCGCCGGCCGGTTCGGCACCGACGATCCACGCAGCGA +TCTGGCCAAGCTGTTCGGCGACCGCCCCTACCTGGCGGGTTCCGTCGACGGCAACGGCCT +CTACACGAACGACGATTCGGTTGCCGCCGTGGTCGGCTGGGACCTGTACCACGGCCTCGA +CTATACGGCCAGCTTGAGCGGCGGCTACGCGGTGCTCGACGGCGACAACAGCGTGACCGG +CACCAGCGGGAGTGACTACATCTACGCCGGTGGCGGCGATGATACCGTCAGCGGCGGCGA +CGGGGCCGACGTCCTCTATGGCGGTTCCGGCGACGATACGTTGAACGGCGGCGCCGGCGA +CGACACCCTTTACGGTGGTTCCGGCGACGACACCCTGACCGGTGGCGCCGGGGCCGATAC +GCTGAGCGGCGGCGACGGAACCGATACGGCCAGCTATGCCGCTTCGACCGAGGGCGTGAC +CGTCGACCTGGAGACCGGCGTCGGGACCGGCGGCGACGCCGAGGGCGACACCCTGAGCGG +CATCGAGAACGTCACCGGCTCGGCCGAGGCCGACGCCCTGAGCGGCGACGCCAACGCCAA +CGTGCTCGACGGCGGGGCCGGCGACGACACCCTGACCGGCGGCGCGGGCGACGACACCCT +GAGCGGCGGCGACGGCACCGACACGGCCGTCTACACCGCGGCGCTGACCACAGACAACAT +CACCTTCGACACCGACCATTGGGTGGTGGCGACCGATGGCGCCGAGGGGACGGACCAGCT +GAGCGGCATCGAGGTCATCGACCACGGCGGCAGCGGGAACATCCTGCTGGTCGGCGGCGA +CGGCTTTGCGAGCATCCAGGACGCCATCGACGTGGCGAGCGACGGCGACACCATCATGGT +CGCCCCGGGGACCTACGCGGAATCGCTGACGATCGACAAGTCCCTGACCCTGATCGGCGA +TCCGGCGACCGGCGATGCCGGGGCCGGTACGTCCGCCCCTCAAATACTCGGCAGCACGGA +CTGGACCCTGGCCACGGTTTCCATCGAGGCGGAGAACGTCACCTTCAGCGGCTTCGACGT +CACCAACGACACCGGCCCCTATGGCATCCACATCAAGGCCGGCGACGCCGACGTGTCCGA +CAACTATGTCCACGACATCAACGGCGCGCTCAGCGGCGACGGCATCCGGGCGATCTTCAT +CAACCCGGTCGACAACGTCACCGTTTCAAACAACATCGTCGAGGACTTCGGCAACGCCGA +CAACCCGTCGGCGGCCAGCTACACCAAGACCGCCGCCGGCATCTACTACTGGGCGCGTGG +CGGCACCCTGCCCGGCGGCACGGCCGACATCGCCGAACTGCACAACGTCACCATCGAGAA +CAACGTCATCCACAACGACGGCCTGCCGACGTTCACCGGCACGTCGGTCCTTGGCATCTG +GGTCGGAAGTTCGCAGGGTGGCTCGGTATTGGACACCGTGAGCATCGCCGGCAACCAGAT +CAGCGATCTGCACACCGACAACGCGGACAGGTTGACCGGCGGGATCCTCGTCAACCACGG +CTCCAACCCCGACGGGGTCGACCCCCTCGCCAGCCTCGACACCCCAGGGGTGACGACCGG +GCTGGAGATTTCAGGCAACACCATCGACGACGTGTCGGGCGCCTCGGTGTTCGCCGTCGG +GCTCCGGGGCCAGACGCCGGATGCTTCCGTCATCGACAACGTCATCAGCAATCTGGCGCT +GGCGTCGGGTTCCTCCGACGGGCTGCTCGCCTCCATCTCGTTCCAGTACAACACGACCAC +GCACAGCGTTTCCCTGAGCGGCAACGATCTCGGCGGCTATGACCTGCTCCAGGTGGGCCA +CGACACGTCCGACGACACCCTGACCCCCGCCACGGCCGACGACACCCTCACCGCCATCGA +CGGCTACGACAACATCCTCGTCGGCCAGAGCGGCGACGACACCCTGACCGGCGGCGACGG +CAACGACACCCTGCTCGGCGGCAACGGCACCGACACCCTGACCGGCGGCGGCGGGACCGA +CACGCTGGACGGCGGCGACGGCACGGATACCGCCATCTTCACCGGGACCCGCGCCGACTA +CACCATCGCCGTCGACACCGACGGCCACCTGACAGTGACCGACACTGGCGGCACCGACGG +CACCGACAGCGTCAGCAACGTCGAGACCCTGGAGTTCACTGACACCTCGGTGTCCGTCCT +GACGGTCACCGAGACCGGGGCCAACGGCACCTACAGCTCGATTCAGGCGGCCATCGACGC +CGCTGCCGACGGAGACATCATCTACGTCGCCGACGGCACCTACACGCAGACAGGCACCCT +CAATGTCGACAAGGCGGTCACGCTGATCGGCCAGAGCGAAGCCGGCGTCGTCATCGACGC +CAGTGCCGTCCACGGCTATGGCATCCTGCTCACCGCCGACGGCGCGACGCTCTCGGACTT +CACCCTGAACGGACCGCAGGGCGGCGATGAAACCGTCTGGTCCTCCTACCGCGTCGACTA +CGGGATCAAGGTTTCGCCGAACGGCACCGCCTCCTCGCTCTCCGACATCACCCTCCAGAA +CCTGACCGTTTCGGGCTCGCACAACACGGAGATCGACTTCAACGGCATCCACGATTCGAC +GCTGAGCAACATCACCGTCGATGGCGGAACCGGGGTCGCCGGCAACGGCATCTCGCTGAC +CGACAGCAGCAACATCACCGTCAACGACGTCACGGCGGCCAACAACCCCTGGGGCGGCGT +CGCCATCTATACCGACGGCACCCACTACGCCGGCGGCTCGGACGGCGTCACCTTCACCGG +CGATTACACTTACGACGCCGGCTCGACCGGCGCCTCGCCGATCTACATCCAGGCGACCGG +CAACACCTACCCCGTCACCAACCTGACCCTGCCGGACGGCTACGACTTCGCCGTGACCAA +CAGCGAATACCGCGCGGACGGCAACGAGTTCACCTTCTTCTTCACCAGCGAGAGCGACGC +GACGGCCTTTGGCAACTCGCTCGGCGCCGGTTCGTTCGTCAGCACCCCCGATGCCGACAC +CCTCACCGGCACCGCCAACGCCGACTATCTGTACGGCGGCGGGGGGGACGACCATCTTTC +CGGCGCCGCCGGCGACGACCGGCTGGTCGGCGGCAGCGGCAACGACACTCTCGACGGCGG +CGACGGCATGGATACCGCCGCGGTCGAAGGCAACCGGGCCGACTTCACCTTCACCGACGA +CGGATCCGGCCACCTGGTGATGAGCGACACCCAGGGAACCAACGGCACCGACACGGTCAG +CGGGGTCGAGACCCTGTCCTTCACCGACGGCAACGTCCTGGTCGTCGGCGCCGGCAGCGA +GTACGCGACCATCCAGTCCGCTATCGACGCGGCGGCCTCCGGTGACACCATCGTCATCGC +CGGCGGCACCTACGCCGAATCCCTGTCGCTGGACAAGGCGCTGACCCTGCAAGCGGTGTC +GGGCGCCGATGTCGTCATCGATCCGGCAAGCGGCAACGGCCTGACCGTCAGCGGCGACCT +CGCCGGCGGCGACGTGACCGTGAGCGGGCTCACCTTCACCGACGGCACCATGGGCATCCA +GGTGGCGGCGAACGCCGACGTCGGTACGCTGACGCTGGACGGCGTGACGGTCGAGGACAA +CCTGCAATACGGCCTGCGGACCGATTCCGGGTCGATGGCCGCCGTGATCGTCACCGACAG +CACGTTCGGGGACAACGGCACCCAGAACGTGAACGGCTCCGCCCAGATGAAGCTCTACAA +CTTCGACGGCGACGCCACCTTCACCCGGGTCGACCTCGTCGGAGCGCCGGCCGGGACGGA +CCAGAACAGCCGGCCCGACTACGGCATCGAGCTGACCGGCCTTTCCAACACCGGGCTGGC +GGAAGGAGGCACGTCCCCCGACCTGGGCACCGTGGTCTTTACCGACGTGACCGTCTCCGG +CGAATTCCACAAGATCGGCGTCGCCGTCTACAACTACGGCCAGATCGACGGCCTGGATAT +CCAGAGCCTCGACCTTTCCGGCACCGAGACCAACTGGGGCCCGGTGTTCAACATCGACGG +TGTCGAGGACAGCACCGTGGATGCCCGGAACTACAACATCACATACCCGGCGGGCGACGC +CATCGTCGCCGAGCTGCAGGGCGAGGTCCCCGACCAGACCGCCACCGACACCACGATCTA +TGGCACCGACGCCAACGAGCGCCTGATGGGCAAGGCGGGAGACAACGTGCTTCATGGCGG +CGGCGGCAACGACGAGCTGTACGGCGCCGACAAGCCGGGCAACCCGGCGGAAGATGACAG +CGGGAACGACAAGCTCTACGGCGAGGCGGGCGACGACCTGCTGGCCGGCGGCGCCGGTGC +CGATATTTTGGATGGCGGCGACGGCATCGACACGGCCAGCTATGCCCGCGCCGGCGCCAC +CGAGGGGGTGGCCGTCGACCTGGCGAACGGCACGGCCAGCGGCGGCGACGCCCAGGGCGA +CGTCCTGAGCAATATCGAGAACCTGGTCGGCTCGTCCTATGACGACACCCTGACCGGCGA +CGGCAACGACAACGTGCTGACCGGCGGCGCCGGGGCGGACGCCCTCGATGGCGGCGACGG +CAGCGATACGGTCAGCTATGCCGGCTCGTCGGCGGCGGTCAACGTCGACCTTGCCACCAA +CACGGTTTCCGGCGGCGACGCCGAAGGCGACATCCTGAGCAACTTCGAAAACGTCACCGG +CTCGTCCCATGACGACACCCTGAGCGGCACCAGCGGCGACAACGTGCTGGTCGGCGGCGA +GGGCAACGACACGGTCGTCTACACCACGACCGTCGCCGCCAGCGACGTCAGCTTCGACAC +CGACCACTGGGTGGTGACGACGGCCTCCGCCGGCACCGACCAGCTTTCGGGCATCGAGAC +CATCGAGCATGGCGGCGGCAGCAATATCCTGCTGGTCGGCGGCGGCGGCTACGCCACCAT +CCAGGCGGCGATCGAAGCCGCCGGCGCGGGCGACACCATCCTCGTCGCCCCGGGTACCTA +CGCCCCCTTCGCCACCTCGTTCGGCGGCCCGGCGAACATCACCGTCCAGGCCATGGGCGA +TCCGGGCGACGTGATCATCGACGCCACCGGCGGCGCGCCGTCCAACGGCCGTATCCTCGA +CCTGCGGGCCGACGGGATGACGCTGGACGGGTTCACCATCGAGGGACCGGGCCACGCCGG +CGTCGGCATCTCCATCAACGGCCAGGGCATCACGGTCGAAAACAACGTCATCAGCAACGT +GCTGACCGGCATCCAGACGGGCACCCAGTACGACACCGGGAACGTCACCATCACCGGCAA +CACGGTGGACGCGGACTACGGCATCAGCCTGCAGAACACCGCCAACACCGTCACCGACAA +CACGGTCCATGCGACGACGGAGGGACTTGGTGTTCTCGACGTCGCCGCCACCCTGAGCGG +CAACAGCTTCACCGTCGACGCCGGCGGCGAGGGCCTGGCCCTGTATGGCGGCGCCACGTC +CTCGACCTTCACGACGTCGGGCAACACGGTGACGGTCGGCGAAGGCGCCAACTTGCAGCA +CGCAACCGATTTAGCGGGCACGGACGGCACCCTCAACATTGGGGCTGGCACCTACGAGCA +GGTGATCAGCATCGCCAAGGACGGACTAACGGTGAATGGCAGCGACGCCACGCTGGTGGT +CGACGGCAGTTCCAGCGACGTGAACGGGATTGCCCGGGTCGATGCCGTTACCATCTACGG +CGACAACGTAACGCTCCAGGGTCTCACCATCGTCGACTCATTGGTGGATCAGTCCTACGT +CACCTACGGCTGGCCCGAAACCACCCGAGGCATCGTGGTGAAGAACGGCGCCGAGAACTT +CACCCTTACCGGCAATACCATCGAGAGCACCCGTAACGGCATTCTGATCAATGGCATCGA +CAACACGGGCAGCGTTACCGACAACGTCATCGACAACACCAAGAGCGGCATCTCGGTCCA +ATACACCGACGCCAGTGGCATCATCATCGCCGGCAACCAAGAGGGGACCTACGGCAACGA +ATGGGGCCTCAACCTGCACCTGAATGGTTACTGGGACGGCACCACCTACACCTCCAACAA +CGCGGACAACTATCCGATCCTCGGGACAGCGCCAACGGCGGACTGGCAAGCAAGCCTGCT +CGGCCTGAGCACCGGCAATGACGGCTGGGCCGTCATGGACCAGGCCTACGCCTTGTACAA +CCGCACTCTGGTAACGGTGGATCCAGACGGCTCGCCGAGTTCGTTCAGCAACCAGGGCAG +CCAGCGGTCGCCGATCAGCACCATCCAGAACGGCGTCGATCTCGTCGTTGCCGGCGGCAC +CGTCCATGCCCATGCCGGCGACTACAGTGGCGAGTCGGTCACCGTGCATGTCGACAACCT +GATCCTCGACGGTGACGCTGGCGCCACCGGCATCACCGTGCAACTGGCCGACGGGCTTAG +CAACCTGACCCTCGCCGGCGAGGCCGACTACACGGCCACCGGCAATGCGGCGGACAACAC +GCTCATCGGCGGGGCCGGCGACGACGTCCTGACCGGCGGCGGCGGAGCGGACACGCTAAC +CGGCGGCGACGGCAGCGACACCGCCAGCTATGCCGCCTCGGCGGCGGCGGTGGATGTCGA +CCTCGGGAGCGGAACGGCTTCCGGCGGCGACGCCGCCGGCGACACGCTGACCAGCATCGA +GAACGTGATCGGCTCGGCCTACGACGACATGCTCACCGGCGATGCCGGCGACAACGTCCT +GCAGGGCGGCGCCGGGGCGGACTCTCTGATCGGCGGCGCCGGCAGCGACACCGCCAGCTA +CGCCGATTCGAGCGCGGCGGTCGACATCGACCTGGCGGCCGGCACGATCGCCGGCGGCGA +CGCGGTGGGCGACACCTACTCCGGCATCGAGAACCTGACCGGCTCGGCCTACGCCGACAG +CTTGACCGGCGACAGCGGCGACAACGTGCTCACCGGCGGCGCCGGCAACGACACGCTGAC +CGGCGGCGCGGGCAACGACACCATCGACGGTGGTGCCGGCACGGATACCGCGGTGCTGAG +CGGCAACCGGGCGACCTACACCCTCGGCGTCAATGCGGACGGCAACATCACCCTGTCCGG +CGAAGGCACCGACGTGGTGAAGAACGTCGAAACCCTTCAGTTCGCCGACGACTCGGCGTC +GATCCTGGTGGTCGACCCCGGCACAAGCGGTGCCTACGCCACCATCCAGGCGGCGGTCAC +CGCCGCCTCGGCCGGCGACATCATTCTGATAACCGGCGGCACCTACACGGAAAACGTCAC +CCTCGACAAGCAGGTCACCCTTCTGGGCGCCCAGGCCGGGGTCGACGCGGATGGCCGCAC +CGGCGTCACCGAGAGCGTGATCGAAGGCAACATCACGGTTTCCGGAGCGGCGGACAACGC +CACCATCGACGGCCTGACCATTCACAACGGCGCCTCCGTCGGCGGCGACCTGGCGGGCGT +CTATCTGGCATCCGGCGCCACCGGCACCGAAATCACCAATACGATCTTCACCCGCGACGG +CACGGTCGACGGCGACAGTTCCCGCGGCATCCTCACCACCTACAACGGTGGCAACACGGA +TGTGACGATCGCCCACAACAGCTTCTCCGGCTGGGCCACCGGCACCTACGTCAATCCGGG +CTCACAGGACATCCAGATCACCGACAACCAGTTCGACGGCAACTACGTCGGCCTGTCGGT +GGATGGCCCCAACGGCGCGGTCGTCACCGGCAACAGCTTCACCGGCAACCAGTTCGAGGG +CCTGGGCATCGGCCCGGGCACCGGGATCTCGGGCATCACGCTGTCGAACAACAGCTTCGC +CGACAACGCGTCGCAGGTCGGGGTCTATACCGACGCGATCGACGTCAACGCCCTCTCCGG +CAACACCTTCGACGGCGCCGTGGTCATATCGGGCAGCGATACCGTCTATGCCTCCATCCA +GGATGCCGTCGACGCCTCGAGCGACGGCGACACCATCCTGGTCTATCCGGGCGAATACAG +CGAGCTCGCCAACTACAACCCGACGACCGGCGAGAACAGCGGGACCGGCAACCCGCTCGG +CCTGCTGATCAACAAGAGCGTGACCATCCAGGGCGTGACGGCCGACGGCACCTACATCAC +CGACGCCGGCGACGTGGCCGCCACCGTCACCTCGGGCGCCCAGTCCAACTGGGGGACCAA +CTTCTTTGTTACCGCCGATGACGTGAGCATCAACGGACTCGACCTGGTGGCGACCGGCAG +CACCGGTCAGCCCTATGTCAACAAGTCGATCGAGGTCGTCGGCGACGGGTTCACCCTGAA +CCACAGCGTCCTCGGCGCGGCCGACGGGCTGCCGATGTATACCGCCGTCTATGTCAACGA +CTGGTCGGTGGACAGCGGCTTCACGGCTTCGGCCATCGCCAGCTATGCCGTGCTGAACTC +CCAGCTTTACGGCGACATGGTCGTCACCAACGGCCCGGGAACCGGTTATACCGCGGACCA +GTTGGACATGCGGATCGTCGGCAACAGCTTCCTGACAATCGATGGCGGCATCCCGAACGA +CGGCATCCTCGTCACCGGCAACGACGACAACATCGCCTGGCGGAACGCCTCGGCCGCGCT +GCCCACCGAGATCAGCGGCAACGACTTCGGCGACGCCAGCGGCGTCCTCTGGGTGCGCGG +CGACGGCACCCAGGACTTCCCGACGACGGCGGAGGTCAACGGCATCCTCGCGGACAACAG +CGTCCCGGCCTATGCCTATGCGGTGGACGGCAACGGCGACCTGGCGGCGGGCACCTACGG +GTCCTCGTCGATCCCGTCGCTGGCCATCCGTGCCACGGCCGCCGACTTCGCCCCCAGCGA +ACTGTCGGGAGCCGGCGCCGAGAGCCTGATGGTTCAGCAGGCCGGCGAGACCACGCCGCA +CAGCTACTCGCTCATCGTCGGCGCCGACGGCGTCGCCGATAGCCTGACCGGCACCAGCGG +CGACGAGGCGCTGATCGGCGGCTCCGGCGACGACAGCCTGTCGGGCGGCGGCGGCAACGA +CATCCTGGTGGGTGGCGACGGCAACGACACGCTGACCGTCGGCGACGGCAGCGCCGTCGT +TTACGGCGGCGACGGGACCGACACGACGGCCTATTCGACGACCGTCTCCGCCGACGACAT +CAGCTTCGACACCGACCACTGGGTGGTGAACACCAGCTCGGGGACCGATCAATTGACCGG +CGTCGAAGCCATCGACACCGGCGGCAGCAACAAGATCCTGCTGGTCGGCGGCGACAGCGG +CTACGCCACCATCATGGAGGCCGTCGACGCGGCCTCCGCCGGCGATACCATCTTGGTCGC +CCCGGGTACCTACGAGCCCTTCTCGATGGGCTACTGGAGCCCAAGCGACCTCACCATCCA +GGGCATGCCCGGCGCGGTGATCGACGCGACATCCATTTCGACACCCGCCCGCATCGTCGA +TCTGACGGCCGAGGGCACCACGTTCAGCGGCTTCACCATCGTCGGACCGGGCGACGTCGA +CGACGCCGGCATTTCCGTCGGCATCTCCATCTCCGCCCAGGGCGTCACGGTGTCGGACAA +CACCATCAGCGACATCACGACCGGCATCCAGAACCATACCCCGGCGGATCAGACCGGAGC +CTCGTCGATCCTCGACAACACCATCAGCGGGGCGAACGTCGGAATCAGCCTGCAGAACGT +CAACAACACGGTTTCTGGCAACACGGTCACCACGGTCGAGGCTCATACACTCGGCGTCGG +CGAGGTCGCGCTCGGCGTCCTCGGGGGCGACAATACCATCACCCACAACACCTTCACCGT +CAGCAACAGCGGCAAAGCCATCGGCCTGCCGGACCTGCCCGCCGTTGCCAATCTCACCAC +GTCCGAAAACGTGGTGACGGTCGGCGAAGGCGCGGACCTTCAGAACGCGGCCGATCTGGC +CGGCACCAACGGCACGCTCCACGTCGGCGCCGGCACCTACGCGCAGGAACTCACCATCAC +CACCGACGGCCTGACCGTCACCGGCGACGACGGAGCCACGATCCAGGTCGCCGATCCGGG +GGTCTATTCGCCCTCGAGCGACGCCTTCGCCGCCCGGACCATCGCCTTCACCATCGCCGC +GACCGGCGTGTCCGTCTCCGGCTTCGAGATCAACGGGCCGCTGAGCGCCTATACCTACAC +CACCACCGACTTCGCGACGCTCGGCTATACCTACGGTTTCTTCATCAACGACGGCGTCCA +GAACACCACCCTGCACGACATCACCATCCAGGATATACGGACCGGGATGTCCTTCGAGGG +CGACAACACGGCCACCGTCTATGACAACGTCATCGACAACACGCGCGGCGCCTTCCTGGT +GCGTTCCGACGGTGTCGACCTGCACGACAACAGCTTCGGCAGCACCGGCAACGAGTGGGA +TCTGACCATGCTCGCCGGCACGCCCAGCGACTACTTCGGCGATCCGCTGACCGACCCCGG +CACCTACGGCGACAACATGATGGCCCTGTCGGCCGCCAACAACGACATGACGATCGCCGA +CCGGATGTACGGCGAGGGCGGCGTGCTGGCCCGGGCCGCCAGCGATCCGGACCTGGCCGA +CCAGTACGCGGCGGTCGCCAACCGCTCGCACGTCGAGGTCCTGGCCGGCGCCGACAACGA +CACCAGCGCGGGCCTGGGCGAAACTCGGGGCAACGGCTTCGGCACCGAGCGCCTGCCCGT +CGGCACCCTGCAGGACGGCGTCAACGCCGTGGTCCAGGGCGGCTCGGTCCACGTCCAGGG +CGGCGACTACAGCGGCGAATCCGTCACCGTCCATTCCGACAACATCACCATCAACGGTGA +AGCCAGTGCCATCGGCATCGACGTCCATCTGGGGATCGGGCTGTCGGCCATCACGCTGGA +AGGCAGCGCCGACTTCACCGCCACCGGCAACGATCTCGACAACACGATCACCGCCGGGGC +CGGCGACGACATCCTGTCGGGCGGCGACGGCGCCGACATCCTGTTCGGCGGCGACGGCAA +CAACACCCTGACGGGCGGGGACGGGGCCGACAAATTCATGATCTCGGCCCATACCGACGG +CAGCAAGGACACGATCACCGACTTCGGCCAGGGCGACTCCCTCGACTTCCACGACGTGCT +GAGCGACCCGACCGACGTCGTTTTCACGGACGACGGCAGCGGCAATACGCAGATCACGAC +GAATGCCGCCCCGACCATCGTCCTCGCCGTGGTCGAGCATGTCGAGCCGGCAAGCCTGAC +GGTGGACGATCACGGCAACGTGACCCTGGCCCAAACCAGCTGACGGCAAAAGGGGGGCGA +AAGCCCCCCCTTTTCCGTTTGGCCGTGCCCGGTCGTCAGGATTCGGGTTTCATCCAGTCG +GCGATCAGTCGACGGTAGATGTCGGTTGCCTCGACGATGCGCTCGACTTCGCAATACTCG +TCGGTCTGATGGGCCATGGACGGTTCGCCCGGGCCGAGCACCAGGGTCTGGATATCGCCG +TAGGCCGGCGTAATGACCGAGGCATCGGTGAAATAGGCCACCGATTCCGGTGCGAACGGC +TGTTCCGTGACGGCCGCGACCGCCGCCGCGGCCCGCTGCACCCAGGGAAGTTCCGGCTCG +GTCCAGATGCCGGGCAGATCGCACAGGGTTTCCAGGTCCGCCCCCTCGCCGATCACACCG +CCCAGCCCGTCGCGCAGGGGGGCATGGTCCATGCCGGGGATGGTGCGGATGTCGATGCCG +ACGGTGGCACGGTCGGGCACCGAGTTGACGTTGAGGCCGCCGCTGACCGTACCGACGTTG +AGGGTCGGCCGGCCGAGGACGGGATGACGGGCGACGTTGAAATCGAAATCCGCCAGCCGG +CCGATCGCTCGCGCCGCCTTGTAGACGGCGTTGTCGCCGTGCTCGGGCATCGAGCCGTGC +GCCGTCACGCCATGGGTGATCGCCTTCAGCCACAGGGCTCCCTTGTGGCCGACGCAAATC +CGGTTGGCCGTCGGCTCGGCGACCACCAGGGCGCCGGCCCGTCCCAGATGCCCCCCGTCG +GCCAGGGCCCGGGCGCCGTCGCAACCGGTCTCCTCGCCGGCGGTGATCACCAGCTCGACG +CCGGCACCGTCGCCGATCCGTTCGGCCTCGGCGATCGCCGCAACGACGAAGGCGGCGACG +CCGCTTTTCATGTCGCTCGATCCCCGGCCGTAAAGCCGCCCGTCGACGATTTCACCGGCG +AAGGGATCGTGCTTCCACGGCACGGCCCCCAGGGGCACGGTATCGATATGTCCGGTAAAG +GCCAGCGGGAGGCGCTGTTCCGTCGGCCTGCCCCGTCGCGCGACCAGGCTGGCCCGGTCG +TCGCCGAGCGGATGAAGGACGCAATCGAAGCCGGCCCCGGCCAGCAAGTCGGCGAGGAAG +GCGGCGCAGGCCTGCTCCTGTCCCGGCGGATTGATGGTTTCGAAGGCAACCAGGCGGCGG +GTCAGTTCGACGGGATCGGGAATTTTGGCGGGCATGGTCCTATCCGAAATAGGCGGCGTG +CACTTCCGGGTCGTTCTGCAGGGACGCCGCCGCTCCCTGCGCGACCAGTCGCCCGCCCGA +CACCACGTAGCCGTAGTCCGAAATGGCCAGGGTCTGATGGACGTTCTGCTCGACCAGGAA +GACGGTTATACCGCGGGTGCGGATGTTTTCGATAATGCGAAAGTTCTCCTGCACGTATAG +CGGCGACAGGCCCAGCGAGGGCTCGTCGATCATCAAAAGGCGCGGCGCGCCCATCAGTCC +CCGGCCGATCGACACCATCGCCTGCTCGCCTCCCGAAAGGGTGCCGCTCAGCTGGTTGCG +GCGCTCGGCCAAACGGGGGAAGGTCTGATAGACGTCCTCCAGAGAAGTCCTGATCTTCGC +TTCCGACGGCTCCTGGTAGGCGCCGAGGCGCAGGTTCTCCTCCACCGTCAGCTTGGAGAA +CATGCGGCGCCCCTCGGGAATGCAGGCAACGCCACGGGCCACGACCTTGTGGGTCGGCAG +GCCGGTGGTGTCCTCGCCGAACAGGCGGATGGTGCCCTGGCGCGGCGGGGTCAGGCCGAG +GATGGAGCGGATCAGCGTCGTCTTGCCGGCGCCGTTCGATCCCAGCAGGCAGGTGATCTG +CCCCTTCGGCACGGTGAGGGATACGTCGAGCAGGACATCGGCCTTGTCGTAACCGGTGAA +CAGCCCCTCGATCTCGACCGCCACGTCCGCGGTCTGCGGCACGGCGGCGCTATCGGAAAC +CGCGGATGAAACGGCGGTCATGGTTGCACTCCCTCCCATTCTCAGGCCGATCCCAGATAG +GCTTCCTGGACCAGCCGGTCGCTCGCCACGTCCTGGTAACTGCCTTCGGCGATCTTGCGG +CCATAGTTGAGGACCACGCAGTGGTCGCTGACCCGTTCGATGACGCCCATCTCGTGCTCG +ATGATGATGATGGCCAAGCCGTCCAGGCGATCGCGCACCGACAGGATGTCGTCCATCAAC +TGATGCGTCTCGTCGTGGGTCATGCCGGCCGACGGCTCGTCGAGCAGCAGCAGCTTGGGG +TGGGCGATCAGGGCCCGGCAGATCTCGATGCGACGGCGCTCGATCATCGGCAGGGCGCCG +ACCGGGTCGAACAGACGGTCGGCCAGATTCCTGCTGAAGGTCGCGACCAGTCCGCGCGCC +TCCTCCAGGGCCTCCTCGTACTGGCGTCGGAAGGCCCGCCGGCGGAACAGGTTGAACAGC +AGGCCGTGCTCGAGACCGCCGTAGTTGCCGATCATGATGTTGTCGAAGGTGTTCCTAGGC +TGTTTCCTGGTGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTTGAGTATTCTATA +GTCTC +>NODE_6_length_29555_cov_90.739_ID_11 +CCCACTCGTATCGTCGGTCTGATTATTAGTCTGGGACCACGGTCCCACTCGTATCGTCGG +TCTGATTATTAGTCTGGGACCACGATCCCACTCGTGTTGTCGGTCTGATTATCGGTCTGG +GACCACGGTCCCACTTGTATTGTCGATCAGACTATCAGCGTGAGACTACGATTCCATCAA +TGCCTGTCAAGGGCAAGTATTGACATGTCGTCGTAACCTGTAGAACGGAGTAACCTCGGT +GTGCGGTTGTATGCCTGCTGTGGATTGCTGCTGTGTCCTGCTTATCCACAACATTTTGCG +CACGGTTATGTGGACAAAATACCTGGTTACCCAGGCCGTGCCGGCACGTTAACCGGGCTG +CATCCGATGCAAGTGTGTCGCTGTCGACGAGCTCGCGAGCTCGGACATGAGGTTGCCCCG +TATTCAGTGTCGCTGATTTGTATTGTCTGAAGTTGCTTTTACGTTAAGTTGATGCAGATC +AATTAATACGATACCTGCGTCATAATTGATTATTTGACGTGGTTTGATGGCCTCCACGCA +CGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTTTCCGGTGATCCGACA +GGTTACGGGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGC +GTTTCCGTTCTTCTTCGTCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAA +GGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTT +GTCCGTGGAATGAACAATGGAAGTCCGAGCTCATCGCTAATAACTTCGTATAGCATACAT +TATACGAAGTTATATTCGATGCGGCCGCAAGGGGTTCGCGTCAGCGGGTGTTGGCGGGTG +TCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCG +GTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATT +CAGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTG +GCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCA +CGACGTTGTAAAACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAG +CTCGGTACCCGGGGATCCCACGTACAACGACACCTAGACCACCCTGATGCTGGGCGGATT +GGCCTTAATCCAACCTTGAGTAAACGCATTTTCAACGTTAGCAGCTTCATTTTCCCGGAT +GGTAACCGGTTCTTCATTGATAGACAGCACGCATGATTTTTCGCACGGTGCCGGACAAAT +CCTGCCGGTAATTTCCGGGAAGCTGTTGGTGGAATGCAGAATCTCATAAGCCACTTCCTC +ATTACCACGATAATAGGCATCCTGCCATTCGGGAATTTTACTCCCTACAGGACAGCCCCA +GTGGCAAAAAGGCACGCCGCAATCCATGCAGCGCGAGGTCTGGAGTTTGCGATCCTCCTC +ATTCAGCGTTTGTTCTACTTCTCCAAAATCGTATATCCGTTCGTTCCGCGGGCGATAAGA +AGCCTCTTTTCTCGGAACCGTCATAAAACCTTTCGGGTCGCCCATTTTATATCTTATTTA +ACTTATTATCAATCAATTTATTACTCCTGCATCACGTTAGCATCTTCCGTCATTTTCATT +TTCTGCTCAAGCTCTCTCAGCTCTCTTTCGCGAAGTACTTTACGATATTCATAAGGAATA +ACCTTCACGAACTTGGGCAGATATTCTTCCCACTGTGTCAGGATTTTTGCAGCCTTTGGA +CTTTGCGTGTAAGTCAGGTGCTTACTAATCAGTCCCTGAAGTTCTTTGATGTCTTTTTTA +TCTTCTACTTCCAACAGCTCAACCAGCCCTTTGTTGCAGTAAAAATCAAAGTCACCGTCT +TCATCCAGCACGTAAGCAATTCCACCGCTCATTCCGGCTGCAAAGTTTCTTCCGGTTTTT +CCCAGAACAACAACCCGGCCGCCCGTCATATATTCACAACAATGGTCGCCGGCTCCTTCG +ATAACCGCTTCCATTCCGGAGTTTCTGACGCAAAAGCGCTCACCGGCTACGCCCTGAATA +TACGCTTCTCCGCCTGTAGCTCCATAAAATGTGGAATTACCAATGATGATATTTTCTTCA +GGTGTAAAGGTTGAACCCGTGGGCGGAACCACAATAATCTTTCCCCCGGATAATCCTTTT +CCGATGTAATCATTAGAATCGCCCTCCAGGCGGAAGGTGACTCCCTTTGCCAGAAAAGCA +CCAAAACTCTGACCTGCTGAACCGTCAAATGTTGCATGAATGGTATCTTCAGGAAGCCCG +AATTCGCCATATTTCTGAGAAATCACTCCTGATAACATTGCACCTATTGCCCTGTCTGTA +TTGGCAAACGGATGATGTAACCAGACTTTTTCTTTTCCTTTAATGGCCTTTCCGGATTCT +TCAATTAATGTGTGGTCAAGATGACCGTCCAAGTTCAGAGAAGCGGAACCCGGAACATAA +CGAATATCGAACTTGTCAGCTTCTGCCGGACGGTTCAATACAGCCGAAAAATCAACGTTT +TTCATTTTCCAACCAAACACCTCCTTGTTTTCTTCCAACAAATCAGCGCGTCCGATAATA +TCATCAAACTTTTTGAAGCCCATCTCAGCAAGCAACTCACGCACTTCTCCGGCCAGGAAC +CGGAAAAAGTTAATCACCAGCTGCGGATTTCCTGTAAAACGCTCTCTCAAAGTTTCATCT +TGAGTTGCAATACCAGTCGGACAAGTGTTTAAATGGCATTTACGCATCATGACGCAGCCG +AGGGTGATGAGTGCAGAAGTAGCAAAACCGAACTCTTCAGCCCCCAGACAGGCCATGGTG +ACAATGTCGTGTCCGTTTTTGAGTTGTCCGTCTACTTGGATTTTAACACGACCGCGTAAA +TTGTTTTTTACCAATGTTTGTTGTGCTTCCGCGATGCCAATTTCTACCGGAAGTCCTGCA +TGTTTGATGGAACTGATAGGACTTGCTCCCGTGCCGCCTTCACCACCTGAAATAATGATC +AAATCAGCGAAAGCTTTAGCCACACCGGCAGCTACAGTGCCCACGCCATCCTGGGAAACC +AGTTTCACAGAGATTTTAGCTTTTGGATTGGTGGCTTTCAAATCATAAATCAACTCCGCC +AAATCCTCAATGGAATAAATATCATGATGAGGAGGCGGTGAAATCAAGGTAATTCCAGGC +GTTGAATTTCGCAGCTTGGCAATCACTTCATTCACTTTATAACCTGGCAACTGACCGCCT +TCACCCGGTTTTGCTCCCTGGGCAATTTTGATTTGAATTTCACGAGCGTTTGTCAAATAG +TTACTTGTGACACCAAAACGTCCGGAGGCAATTTGCTTAATAGCACTTTGTTTCTCCGTA +CCAAACCGGTTGTGGTCTTCACCACCTTCACCTGTGTTACTGCGTCCACCAACCGTGTTC +ATGGCAACAGCCAGCGATTCATGAGCTTCCTTGCTAATAGAACCGTAGGACATAGCGCCT +GTAACAAAGCGTTTCATAATTTCTTCCACAGGCTCGACTTCTTCAATAGAAATGGAATTG +CGTTTTACCTGAAAGCATCCTCTGATAAAAGAAGGTTTCCGGTTGTACTCATCCACAGTC +CGGCTGTATTCTTTAAATTTTTCGTAGCTGTTGGTACGCGTGGCCCATTGAAGCAGTCCG +ATAGAATCCGGATTCCAGGCGTGATCTTCTCGATTTTTGCGCCATGCGTAAACTCCGGTT +GTTTCAAATCTGAATTTCTCTTGGGTGGCTTCTTCACGAAATGCATCCTTATGGAACTGG +CTGTATTCTTCGTAAATTTCTTCCAGACCAATACCACTGATGGGCGAGGAAGTTCCTTTG +AAATATTTGTCCACCAGCTCTTTACTTAAACCAAACGCTCCAAAAATCTGAGCGCCGTGA +TAACTTCTTACTGTTGAAATTCCCATTTTCGAGAATATCTTCAACAGACCTTTTTTGATG +GATTTGATATAATTTCTGCGGGCATCTTTGTATTCCATGTCCAGCTTGCCCTCTTTCACC +AGGTGATCGACAGCAGCAAAAGCCAGATAAGGATTGATGACACTGGCTCCATAACCCAAA +AGCAACGCATAATGATTCACTTCGCGGGCCTCACCGGTTTCTACAGCAATTCCCACCTGC +ATCCTTTTCTTTTTCTGAATCAAGTGGTGATGTACGGCCGAGACCGCCAGCAATGACGGG +AAGGGGGCATGTTTTGAATCAATGGCCCGATCAGAAAGAATGATGAAATTCTTTTTGTCG +TCAACAGCTTTTTCAGCCCTTTCCAGCATCTCATCAAACGCTTTCTTAAATCCTTCAAAC +CCCTGATCTACCGGAAAGACTATCGAAATGATTTCGTGGGAAAACATTTCATCTTTCAGA +TCTTTGATTTTTCCAAGGTCGGTGTTGGTAACGATGGGGTCGGGGAATTTGATGAGTTTA +CAGTGGTCGGGGCTTTCTTTGAGGATGTTGGAATTTAAAGAACCGATGTAGTTGGTAAGC +GACATGACCAGGCCTTCACGAATAGCATCTATAGGCGGGTTAGTGACTTGGGCAAACGTC +TGCTTAAAATAATTAAACAGACGCTGGGGTTGTCGTGAAAATATCGCTGCCGGTGTGTCG +TTACCCATGGAATTGATAGGCTCAACGGCCGTGTTGGACATGGATTGAATCAGGAATTCC +ATATCTTCTTTAGAATAAGAGAAGACCTTGGCGTAAGTGAGAAAATCGTCCATAGTCGAC +GGCACCCGTTGTCGTACTTTGATATCTTCCATCAGTAAACGATTTTCCTTCAACCACATT +CCATAAGGATTGCGACGACTAAGTTGTTCTTTTACCTCTTCGTCAGGAATAATGATGCCC +AGCCGCGTGTCTACCAGCAATATTTTTCCGGGACGCAAACGTCCTTTTTCTAGAATTTCT +TCCGCTGCAAAATCCTGCACGCCGGTTTCCGAAGCCATCACAATTAAATCATTTTTGGTG +ATGACATACCGCGAAGGACGCAAGCCGTTCCGGTCGAGCGTTCCACCAATGTATCTGCCA +TCAGAAAAAACAATGGATGCCGGCCCGTCCCAGGGTTCCATGATGGTGGAATGGTATTCG +TAAAAAGCTTTCAAGCTTTCCGGGATAGGATTCTTTTCGTTGAAAGATTCAGGAATCATC +ATGCAAAGACTGTGGTGCAGCGAGCGGCCTGTACGATGCAGAAATTCCAACACATTATCA +AATGATGCAGAGTCTGATTTCCCTTCTTCCAGTACAGGAAGAATTTTCGGTAAATCATCT +CCAAAAACTTCAGATTTCAACAGCCCTTCACGGGCTTGTGTCCACAAACGATTTCCTTTG +ATGGTATTAATTTCACCATTGTGGGCCACCAGACGGAAGGGTTGTGCCAAATCCCATGTT +GGGAAAGTATTGGTACTGAATCGGGAGTGAATTAAAATCAGTGCCGATTTAAACAACGGA +TGTTGCAAATCGGTAAAATAGTCTTTCATTTGATCCGGCGTCAACATCCCTTTATAAATC +ATCGTTTTCGCCGAAAGGCTCGAATGATAATAAACTTCTTTTTGTGAAAGGTCGGAAGCT +TGTACTGCCTTTTCAGCCCTTTTTCTGACGATATACAATTTCTGTTCAAGGATATCTTCT +TCAAGGTTGGCCTTGATAAAAACTTGTTTAATGCGCGGTTCTGTAGTGCGTGCAATTTCT +CCCGGGATGCTACTATCAGTAGGAACATCCCGATACCCAATCAATTGCAGGCCTTCTTCC +TGGATGATATTTGACAAAATATCAACACAAGCATTTGCTTCTGCTTCTAGCTGAGGCAAA +AAGATCAAACCGGTTCCATAGCTCCCTTTCGCAGGGACATCTATTTTCAGTACCTTCTTA +ATAAATTCGTGAGGTATCTGCACCATGACACCGGCACCATCACCACTGGCATTATCCGAA +CCCTTGGCGCCCCTGTGATCCAAATTCCTGAGTACCTCCAGCCCTCTTTCTACAATATCA +TGCGATGCCTCTCCTTTGATGTGGGCAACAAAACCTATCCCGCAATTATCATGCTCATTG +GCGGGATTGTACAAACCGTCCGGTTTTGGAAATCTAAACGCCATTTCCTTCCTCTTTTTT +TCCATTCTAAAAAATTCCTTTCTTGTGAACCGACTTTAAGGTCAAGGATAAATCCGCCCG +GGTTTATCCTATTTCATCTCTACGCTACAAAACCACCTTCACACAAAATGGTTTCCAGTT +CCTTTTAGGTTTAAATCCTCTTAATCCTCCAAAATCTCTGCCAGGTCGGCTTTGAGATTG +TTACGTTTTCACATATTCAAAAATACATATTTCAATAGGGGTTGGTCAAGACGGGCGAAG +GTGACCAGAATTTCGCCCATAAAATTTTAAACCCTTTTTATCGCTATAATTATTTATTTA +TCAACTTTTTAGTAGAATTATTTGACTATTTGACAATGAATAGTACAAATGGCATCGTGC +GAAGGGAAAGATAATTTTCGCAAACGATTTACCTGCAACAAATAAAAGTGCCGGAAAAAA +TTTAAACGAATCGTTAAAAAAATATAAGCTTTAAGCGGGAAAGCCATCGAAAAGAGGGTT +AACTTGGCAATAAATAATACTTTTGGCATCTAATTATCATTCATCATGTTGGAACAAAAA +GAACTGATTCTTTTAAACATTTCGGGCGAAGACAAACCGGGATTAACAGCTTCACTGACT +GAAATTCTGTCGCAACACAACGTCAATATTCTTGATATTGGACAGTCTGTAATCCATAAA +GATTTAGGATTGGGTATTTTGTTTGAAGTCCCGAAAAAGTATCGTTCTGCCAGTATTTTG +AAAGATCTACTGTTTAAAGCCTATGAATTGAAAAGCCATATTAAGTTTACTCCGATTCCC +ATCGAAGAATACGAAAAATGGGTGGCGGAACAAGGAAAAGAACGTTTCATCATCACCTTA +CTTGCCCACAAACTAACAGCACTGCATCTTTCCAAAGTTTCAAGTCTGATTGCCAGTCAG +AAGTTGAATATCGATACAATTTCCAGGTTATCCGGCAGGAAGTCCTTAAACGGGAATAAC +AAAGTAACCAACTCTGTGGTTGAATTTTCAGTTCGCGGAACTCCTCTGGATATCAATGCC +ATGAAACAGTCGTTGATGAATATTGCATCCGAAACCGGCATTGATATTGCGTTTCAGGAA +GATAATATTTATCGCAGAAGCCGTCGTTTGGTTTGCTTCGATATGGATTCCACACTGATT +CAAACGGAAGTAATCGATGAGCTGGCACAAAAAGCAGGCGTTGGCGATGAAGTGAAAAAA +ATTACGGAAAGTGCCATGCGCGGTGAAATTGATTTTAAAGAAAGTTTTAAAAAACGGGTT +TCATTGCTCAAAGGTCTGGATGAATCTGTGATGAAAGGAATTGCTGAGAATCTGCCGATT +ACAAACGGTGCAGAAAGATTACTCAGCACGCTGAAACAATACGGTTACCGGACAGCTATT +CTCTCCGGAGGTTTTACTTATTTCGGAAACTACCTGAAAACCAAGCTCGGTTTCGATTAT +GTTTTTGCCAACGAACTGGAAATTAAAAACGGGAAACTAACAGGAAAACATTTACATGAA +ATCGTGGATGGAAAACGAAAAGCTGAGTTATTAGAACTGCTGGCTTTCAAAGAAGATATC +CATTTAGAACAAGTGATTGCTGTGGGTGATGGTGCCAACGATCTTCCCATGCTGGAAAAA +GCCGGGTTAGGAATTGCTTTTCATGCCAAACCCAAAGTAAAAGCATCGGCACAACATGCT +ATTTCCGCTACCGGATTGGATACGATTCTGTACCTTCTGGGTTTCCGTGATAGAGAAATT +AATGCCTCATGAAGTTCATAAAGTTGGAGTTCCTTACAAAAAGTGTATGGGGGATTTTTC +TATTTTCGTGCTGGAATTCTGTTGAAAATAATTGAAGTTTTTTTCGCTACAGGATTAACT +TGTATTATGCGAAAATCAGTAGTTAATCAGGCATTCTTTTAAAAAGTAAGGAACTAATAT +CCAATATATTAAGATGGAAAAACTAGTTACATCAGATTTTAAACTGGGAATTATAGCAGG +CGGACAACTTGCCAAAATGTTGATTCAGGAAGCCAGCAAATGGGATATCATCACTTACGT +TTTGGACAATGATGAAACCTGTCCGGCGAATAGTCTGGCAACGCATTTTGTCAAAGGAAG +TAACCTGGATTTTGATTCGGTGTACCGGTTTGGGAAAATGGTTGATCTCCTGACTTATGA +AATGGAGAATATCAATATTGAAGCTTTGAAAAAGCTCAAGTCCGAAGGCCATCAGATTAT +TCCCGATCCGGATATTCTTGAACTCATTCAGGATAAAGGAAAACAAAAGGAATTTTATCA +AGATAACAACGTACCTACGGCGCCTTTTAAAATCTATAGCAGCCGACAGGATATTGTACA +AGCCATAAAGAATGGCGAAATCAAATTCCCGTTTGTTCAGAAATTAAGAACCGGAGGATA +CGACGGGCGTGGGGTGGCTGTTATTAGTGATGAAAATGATTTGGATAAATTACTTGACGG +TGCTTCTATTATTGAAGATAAAGTCAACATAGCCAAAGAAATAGCGGTGATTGCCGCCCG +TAATAAACAAGGAGAAATAAAATGTTTCCCCGTAGTTGAAATGGTATTTGACCCTGAAGC +AAACCTCGTGGATAAACTGATTTGTCCTTCAAAAATTACCGCCGAACAATCGGAAAAAGC +CATTGAGATTGCTGGTAAAATCATCGGTTTACTCGGAATGCAAGGATTGCTGGCCGTTGA +GTTTTTTGTTGATGAAAACGGGGAAGTCATTGTAAATGAATCTGCTCCGCGTCCTCACAA +CAGCGGGCATCATACCATTGAAAGTATTATTACTTCACAGTTTGAACAACACTTACGTGC +CATTTTTAATTTACCTTTGGGTAGCACCAGACCCAAACTTCCTGCTGTCATGGTAAATAT +TTTAGGAGGTGAAGGTTATGAAGGTCCGGTCAGGTATGAAGGATTGACTGAAATTATGGC +CATCGAAGGTGTGAAGATTCACCTTTACGGGAAGAAAATAACCCGGCCGTTCAGAAAGAT +GGGACACATTACCGTGTTATCAGACTCCCTGGAAACTGCGCTGGAAAAAGCTGAAAAAGT +TAAACAATTAATAAAGGTAAAATCGTGGGACAAAAATTAGTAAGTATCGTCATGGGTTCA +GATTCCGATCTACCCGTCATGAAGCCGGCTGCTGAAATGCTGGAACAATTGGGTGTCGAA +TACGAAATTGACATCGTATCAGCGCACAGAACCCCGGAAAAATTATTTGATTTCGCTTCC +AACGCACACAAACGTGGTATTCAAGTTATCATCGCCGGTGCGGGCGGGGCTGCTCACCTG +CCGGGCATGGTTGCATCCATGTCACCCTTGCCGGTTATCGGCGTACCGGTAAAATCTAGC +AACTCCATTGACGGCTGGGATTCAGTACTTTCCATCTTACAGATGCCCGGTGGAGTTCCT +GTTGCTACGGTAGCCCTGAATGGTGCCAAAAACGCGGGCATTCTTGCAGCTCAAATCATT +TCTGTATCCGACAGCCAGGTAAGAGAAAAAATTATTGAATACAAAGCCGGACTCAAAGAA +GCCGTCATGAAAAAAGCTAAAAACCTGAAAGGATAGAACAGCTTTTTAGAGAAACTTTTC +AGGCAAAACGGCAATCAGGTTGTTGACGACCGGTTCGTGTCATCACAAATTGCCAGAGTT +GTGTGGCACGTGAGCGAAAACCACCGGCGCTGCTCAACAAATAATAGCGCCACATCCTGT +AAAACCGTTCATCGTATTGGTTTTTCAATTCCTTCCATGCTTTATTAAAACGGTCGTACC +AGGCCATTAATGTTTTATCATAATCGGGGCCAAAGTTGTGAAAGTCCTCAATGACAAAAT +AAGGCTCGGCTGCTTTGGCAACCTGGGCAATGGAAGGCAGCATTCCGTTAGGAAAAATAT +ATTTATTGGTCCACCGGTTCACATAATGCGTCGTTGTATTATTTCCAATTGTATGCAGCA +GCGAAATGCCGTTATCCGTCAGACAACGGTCAACAATTTTCATGTAAGTATCATAATTTT +TGTAGCCCACATGTTCAAAAAACCCGACGGAAACAACGGCATCAAATTTACCTTCAACAT +CCCGGTAGTCCTTGTAAAGCAAAGTCACCGGCAAACCTTTACAAAGTTCATTACCCAGCT +CGATTTGTCTCTTTGAAATGCTCACCCCCAGCACTTCCACGCCATATTTTTCGGCTGCAT +ATTTGGCAAAGCTTCCCCAACCGCAACCGATATCCAGCACTTTCATGCCCGGTTTCAGAT +TTAGCTTTTTACAAATTAAATCCAGTTTGGCTTCCTGGGCCTGATCGATGTTTTTGGCTC +TTTGCCAGTAACCGCAGGAATAAACCATGCGCCTGTCCAGCATTTTACTGAACAACTCAT +TTCCCAAATCGTAATGCTCGCGACCCACCCGCTTCGATTTGACTCTGGTTTGTTGATTAA +AAAGCCGTTTGGATAAAACATAAAAAGCCGTACTGAAATTCCTTTTGACTTTTTCTTCCA +GTCCGGCCCGTAAAATCCTTTCAATGAATTCATCCAGGGCTAAACAATCCCACCAACCGT +CCATGTAAGATTCGCCCAAGCCTAATTCGGCTTCGGAAAGCCATCTTTCATAGGCTCTTT +CATCATGAACTTGTACGTCATAAGGATGCGGTCCGTTCAGGTGAACACCTGTACCGGTCA +GTAATTCCCGGATAACAGTTTTCGGATCCATGGGTTGGATATTTTGCGGTTCTCTTCAAA +GGTATGAAAATATGGAGTTCATAAAGTGACTAAAGTGCGCTAAAGTCCATAAAGTTGAGG +ATGCTAGCTTCAAAAAAGCTTACTCCCCAAAGGCACTTCCTTATCGGGAACACAAAGCGC +TACACGGCCTTTTTCATCTGCAAATCCGGTGGTAAGACATTCGGAAAAGATGGGTCCGAT +TTGCTTTTTCGGGAAATTTAAAACGCCAACTACAAGTTCACCAACCAATTCTTCAACAGT +ATATAATTCCGTGATTTGTGCGCTGGATTTTTTGACACCGAATTCAGGGCCAAAGTCCAC +ATGCAAAATGTACGCAGGTTTCCGGGCTTTTTCAAAAACTTCGGCCTGGACAATTTTTCA +CACCCGAAGCTCGACCTTTTCAAAGTCCTTCCATTCAATGAGTTCCATAAATATTCATTC +TGAAATTTTTGGTGATTCTTTTTCTTCAGCAGCTTTAGATTTGACTTTTTCAGTACCACA +TTTATATAAAAGCCAGCCTACAAAAGCCCAGAAAAACGGAAGTAAATAAAAACGCACATA +TGTCCAGGTATAAAAACCCAATTTAGACCCTGTGTGATTAGCAGGCGGCTGCATAACCGG +AGGTTTTCCATAAGCTAAATTTATAAAACCATTTGCTGCTAATAAAAAGAAAATAACAGC +AACTATTATCATTATTATTCCGTAGCGTTTCGAATGTTCCACACTTTTTAATCTTAAAGG +ATTAATAAAATTTGCCGACCAATTTGTTCGGAGAAACAAAATTAACGTTTAGCAAACAAT +TTCCAAGATCGGCTGATGCAACATTTGCTGAAAAACTTTTAAAATGATTTTGCTTTTATG +TATTTGGTGTTTCTTCTGAGGCCTCATTTACTTTTTCTATCCCAAATTTCAATAATAACC +ATCCTCCAATAGCAATAATAACAGGGGTAACATAAAACCCAAAAAATTCACCTGTTTTAT +AAGGAATATCATCATATTTAAATGGAGCAGTAAATGATGGATGGCCTGCCGGAAGTTTTG +AAAAGATTATATGATAAAAGCCAACAGCGGCAAATAAAAAAAGTAAGCCTGAAATTAACA +TGGTAAAAGTACCATACCTTAATGGATGTTTTTTCATAATTTTTGTTGTTTCAAGAGTGA +CTTGTTAAAGACGTTTGCAAATTAAAATACCAAAAAACAAAAGGCTGTCCGCCTGAGGCG +GACAGCCCTTTGTTAAAAATCATTTATTAATTAGTTCCGCCTGCGTCATTTCCGCTTGGA +TAAGCCGGAATTTCTGCCGGTGGTTGTGGGATTGGCATCGGATGCTCTTTGAGCTTCTTC +ATGATGTAGTTGATGGCAGTATCAAGCTGAGGATCTTTTCCTTCCATTACCTGACCCGGA +AGGTTATCCACCTTGATATCTGGTGTAACACCATGATTTTCAATAGCCCATTTCGAATCA +AGTCCGTAGATACTGTTCTGCGAAACCACAAGGTTTCCACCATCCAGAAGTGTCCATACG +CGGTTGTAACCACGAACACCACCCCAGGTTCGTTCGCCAATTAACGGCCCTAAACCATAT +TTCCTGAAGTAGAACGGGAACATATCGCCGTCTGATGCCGAATAATGGTTCAGCAGGCAA +GCTTTGTATCCGTGCAGCACTTGCTCAGGATAACGCATAGCTGCGTGAGCTCTGTTGGTG +CTCATACCAATCAATACGCGGCGCAATCTTTCCAGCACAATCTGGTCGATAAATCCACCA +CCATTGAAACGGTCGTCCATAATCAAACCTTGTTTGCTAAGCTGCGGATAGAATTGATGG +ATAAACTGAGTCATACCCAAAGATTCCATATCGGACATGTAGATGTAACCGATTTTTCCG +TCAGATTTCTTGTTGACGTAAGCACGTTTCGTCCTTATCCAATTGAGCAAACGCAAATTC +AGAGAATTATTAATCGGTTTCACGGTAACGGTATGTTCTCCTTTACCGTCAGGTTTATCA +GCCAGGGTCAGCGTAGTTTGCTGTCCCACAGTGTTCACAAACAAACTGTAAGGATTCATC +GGTGCTTTCAGCTGATGACCATTCACAGCAAGCAGATAATCACCGGTTTTCACATTAACA +CCAGGCCTGTCAAGCGGAGAGCCATATCCCGGACGGGAATTATCACCAGCATAAATTTTC +TTGAAATAATAACGACCGGAAGATTTGTTTAATCCGAAATCAACACCAAGCACGCCGGTT +GGGTTTGTTTTGCCCAGATAATTATCATCACCACCCCAGACGTAACAGTGTGAGTTATCA +AGCTCACCAATCATCTCACCTACAAGATAATTCACATCTTCGCGGCAACCCAATAATGGT +ACCAGTTTACCGTAGTTTGCTCCCACTTCATTCCAGTTCACACCGTTCATTTTGGTGTTG +TAGAAGAAGTTATTGAAAAGCCGCCAAGCCTGATGGAACATCTCACTCCATTCCTGATGA +GGATAAATCTTCATTTTCATATGGGAAGTGTTTACAGGCTCACTGCCTTTTGCATTTACA +GAAGCCGAAGGCATCAGGAAGAATTTACCTCTTCTCATATAAACCAAAGTTTTTCCATCA +GCACTCAGTCCGTAAGTTCTGGCACCGTTTGCCACTACGGTATGGCCCTTTTTACTCTTC +AGGTCGTACACCATAATAGAAGACTGACCGGTTCCATGTAAGAAACCTTCAATTGTTATC +AATGGACGTGTTTGATAAAAGACTTTGTTGCCGGTTACCTGAATATTTCCATAATCACCC +GATTTAATCGGAAGCATGATAACCCGGTTCATCAATCCATTGAAGTCTATTTTAACATCT +TTAGCACTTTCTGTTTTTTTCGAAGCGGAAGAAGAAGCTTTCTTTGCTTCAGGCATTCCT +TCGTCGGAAACCGGGGCAAAAGGAGATTTCTCATTTTTTTGCAAGGTTACCATATAAATA +CCGTCCATTTTTTCAGTAGCAACATTGGATTCGCTTTCGCTGAAAAGCGGATTTTCGTGA +CGTGCAGAAACAAAGAACAGATATTTTCCATTGGGGCCAAACACGGGATCATTATCAGAA +TATATTCCGTCACTTACTTTGTAGGATTTGTGATCGGCCAGACTGTAAATATAAATCTGT +GAAAGACCACTAGCATTGGTCTTGCTGTAAGAAATCCAATTGTTATCAGGTGACCAGTGA +TAATCCCTCATAGCGTTGTATTTATCCTGTGCAATCCGAACCGGTTTTTTATCTTTTAAA +CTGATATACCAAAGAACGTGATTGTTATCAGAGTAAGCCAGCATTTGGCTGTTTGGAGAC +CAGGTAGGATTGTAGTAATATCCTGTTTTAGCGTTTGTTATATAAGTTTGATGTCCTGAA +CCATCAGAAGGACGGATGGCCAGTTCATTGACACCGCTGGCATCTGTCAGGTAAGCAATC +CATTTGCCATTGGGTGACCAGGCAGGATATTGTTCCTGAGCATCAGATGTTTGTGTAATA +TCGCGGGTAGCGCCGTGTTTGGCAGGTACGGTGAAGATATCACCACGTGCACCAAAGAGA +ACGCGTTTTCCGTTAGGAGAAATATCCAGGCTGCGGATCATCTTGCTGGCATTAATCCAG +CGCGGGAGTGTCTGAGTTCCGTCAGTAGGAACTTTGACATTGATTTTGTGAAGTTGTTCT +GAAGGAAGATCCAAAACATACAAAGAACCACCATCCTGGAAAACAATGCCGTTATTTCCC +AGACTCGGCCAATCCACATCATAATTTTTAAAATGTGTTATTTGACGGAACGTTTTTGTA +TCTAAGCTATAAGCCCAGATATTCAAACGGTGGTTTGCGCCTCTGTCAGAAGCAAAATAA +ATGGTGTTTTTATACCACATGGGATACGTGTCGGTACCTTTCCATTTGGTAACCCGCTGG +ATTTTCTTTGTTTTGAAATCATAAATCCAGATGTCCTGTGCCAAGCCACCTTTATAGCGT +TTCCAGGTACGGAAGTTCCTAAAAATACGATTATAAGCAATCTTGGAGCCGTCAGGGCTA +TAGGACATCACGCCGCCTTTAGGCAAAGGCAGGTGCGTTGGCAGGCCACCCATTTTACTT +ACTTCAAACGGTTGGCCAAACCAGGAATTCCAGGTATCTCTTCTGGAAAGGAATACAATG +TCTTTTCCGTTGGGTGTCCAGGTCATAACCATATTATCTGGTCCCCAGCGGGTGGGAGCT +TTTCTCACCACATCCGAGTGATAAGTTAAACGGGTAACAGCGCCTCCGTCAGCGGGAATT +GTATAAACATCCACATTTCCGGAATACTGACCGGTAAAGGCAATGGTTTTTCCGTCGGGG +GAAAACCGGGGCATCATATCATAGCCCGGATCGGTTGTCAATCGTGTGGCAACACCGCCT +GTACGATCAACTCGCCAGAGATTTCCGCCAGCTTCAAAAACAATGGTATTGTTATGAAGG +GTTGGAAACCGGCAAAGCAAAGTCTCGTTGTTTCCTGTTGTGGCAGCATGAGCCACGAAT +GGTGCCCCGCCTATTAATAATATCAACAGCAAGGACAACAAATTATAATGACGATTCATA +ATAAAAAATTTAATGAGTTTATTAAATAAGCTGATCCATTAAGGTTTCTTATTTCTTGAG +AACCTTCAAAATTAATCAAATTACCTTTCTGACAGGTAACTTTGCATGAGATTTAACGAT +TAATAACGCTTTAAAGGGACAAGTCCCCTTTTTTTATCGATAAATGCCCCCATAGTTTCT +ACGTAATGGTTTATTTTGCATAAATATTTTTTGCACATGATTCGTTACAACCTACATCAG +CACAGTATTTTTTCAGATGGCGCAGCGGAACCCGAAGCCTACGTACAAAGCGCTCTAAAT +CTGGGGTTTGAGGCTATGGGATTTTCGGAGCACAGCCCCTTGCCATTTCCCACAAAATTT +AGTTTGAAAGCAGAACGGGTGGAAGATTATATCCGGGAAACAGAACGGCTCAAAGAAAAG +TACAACGACCGCATTGATTTATACCGGGCACTGGAGATGGATTTTATTCCGGGGTATTCA +GAAAACTTTACCGAGTGGCGTAAAAAGGCTCAACTTGATTATGCCATTGGTTCGGTGCAT +ATGGTACAACCGGAAGATGACGGAGAATTATGGTTTATTGATGGCCCCGACCGCTCTATA +TATGATGACGGACTTCAGAATTTTTTCGGCGGAGACATTAAAAAAGCAGTGAAAACATAT +TTTCATCAGGTGAACCGGATGGTGGAAACCCAGGATTTTGAAGTTGTCGGTCATGTGGAT +AAAATCAAAATGCACAACCAGAACCGTTATTTCACAGAAGAAGAAAAATGGTACCGCGAT +TTAGTGGAAGAAACACTGCACCTGATAAAAGAAAAAGATTTCATTGTAGAAGTGAACACG +CGGGGGCTTTATAAAAAGAGGTCCAACCGGCTTTTTCCTGATGACTATGCTTTGCAACGC +ATCAGCGAGCTGGGCATTCCAGTGTTGATTTCTTCGGATGCCCATAAACCCGAAGAACTG +AACCTGCTTTTTGAGACAGCCGAGAAACGACTATTGGACATGGGATTGGGTGCTGTGGTT +CGTTTCGACCATGGAAAATGGAAAGATTTTCCACTCTCCTGAAACGCACATTTATACCTT +AAAATATATAGCTACTGTGTTGTGAAGGAGACGCGTACTGAAGTGCTAAAAAACCCCTTT +AGAAAAAACATCAAGTTTACAGCTGAATTAATTTCGTAAACAGCATTTTTTGTCAATTTT +TCAGAATTCGACGCAAAAATTTTCTAATTTGAACAATATTATCAAATGGAAGATTTTCCA +TTTATTTTTTTACTTTTGCAGCGCTTTCTGAAACCTAAATTGTCGTCATGAGTGAACCAA +TCGTTAGTATCTTTTCAGGTCGTACAAGCCGCTACCTGGCGGAAAAAATCGCCCAAGCAT +ATGGTACCCCATTGGGAAAATCTGAAGTAATCAATTTCTCCGACGGAGAGTTTCAAACAT +CTTACGAGGAAAATGTAAGGGGTCGTGATGTGTTTATCATCATGTCAACACCCCCACCAG +CTGAAAATATTTTGGAGCTTTTGATGATGATCGACGCTGCCAAAAGAGCTTCGGCGCGGA +AAATTGTGGCGGTGATTCCTTATTTCGGCTATGCCAGACAAGATCGTAAAGACAAACCAC +GGGTTTCTATTGGAGCAAAAATGATCGCCAATTTACTGACTACTACGGGAATTGACAGGC +TAATAACCATGGATTTACACGCCGATCAAATTCAAGGTTTTATGGATTTTCCGGTGGATA +ATCTTTATGCATCGGTTATCTTTTATCCATACCTGAAAAAGCTGAATCTTCCCAATTTGA +TGATGGCTTCTCCGGATACGGGTGGCACACGCCGGGCAGCCAACTATGCCAAGGCTTTGG +ATACCGGTTTTGTAATTTGTTACAAGCAAAGGACAAGACCCAATGTGGTTGAACAAATAC +AATTGATTGGTGATGTCGCAGGAAAAGATGTGGTTTTGGTTGATGATATTATTGATACGG +CAGGCACCATCACCAAAGCAGCCCGGGTAATTCTGGATAAAGGTGCCAACAGCGTACGCG +CTATGGTTACGCATCCTATTCTTTCGGGAGATGCTTTTAAAATTATAGCCGATTCACCAT +TTACCGAAGTAGTGGTTACAGATACCATCCCGGTAAAAGATGATTTAGGCGGAAAAATTA +AAGTGTTATCAACGGCACAGTTATTTTCTGAAGTTATCAAAAGAGTTGAAAATTATAAAT +CGATTAGTTCCCTTTTCAATTTGGGGAACCAATCCAATAAATAATCATTTTATCAAATAT +TAAGTTATGAACACAGTATCATTGAGCGGTTCTCTTCGCGAGAACGTAGGGAAAAAAGAT +GCTAAAAAACAGCGCAGGCTGGGTAAAGTTCCTTGTGTCATTTATGGCGGAAGCGAACAA +AAGCACTTTACATTAGATCAATTGGAATTTAAACCTTTGGTTTTCACTCCTGAAGCTTCT +GTGGTGAATCTCACCCTGGGAGAAAAAACTTACGAATGCATTTTGCAAGATGTTCAGTAT +CATCCTGTTACGGACGAAATATTGCATGCGGACTTTCTGGAAATTCATTCTGAAAAGCCG +GTTAATATTGCATTACCTGTTGAATTGACAGGAACAGCTCCCGGAGTTGTAAAAGGTGGA +AAATTACGGTTGAAAATGAGGAAACTCCGTGTAAACGGAATCATCAAACTCATGCCCGAA +CACATCGTTCTGGATATTTCAAAACTGGATATCGGACGTTCCATCAAAGTAAGGGATATC +AACCAGGCAAATCTTACCTTCCTTGATCCCGGTAACCAGGTTGTGGTAGCTGTTGTTGCT +GCCCGTGGTCTGAGCGCCGAAGAAGAAGCTGAAGAGGCTGAAGAAGGTGAAGAAGGCGAA +GCTGCTGAAGGTGGCGAAGAAGGAGGCGAAAAAAGCGCTGAATAATTTTAGAAAACTTTT +AATAGAAAGGCTGTCTCAAAAACTTCTGAGACAGCCTTTTTTGTCTACGATTGTCATACA +ATAGTCAGACTGTAGTCAAATGATGGTCAGACAATTGTTTGATAACCGCTTGAAAATTGT +TTGAAGATTGTCTCTTCTCCTTTTACCCTGCAACCTTTAAACCATTAAACCCTTAAGTCC +AAAAAAGAAACTTCCATTTCACGCCAATCCCTCGTATCTTTGCCGAATCTTTGATAATTC +AAAAAAATGAAATATCTAATCGCCGGGCTGGGAAACATTGGGGTTGAATATGCCAACACA +CGCCACAACATAGGTTTTATTGTGGCCGATGCGCTGGTGAATGAGTTGAAAGGAAAATTC +GAAACCGAACGGCTGGCGTCGGTGGCTTCCGTGAAACATAAAGGAAGAACACTGGTGGTT +ATCAAACCCACAACTTATATGAACCTCAGTGGGAAAGCCATAAAATACTGGATAGATAAG +GAAAAGATTCCTATTGAACGTGTGCTGATAGTGGTGGATGATATTGCTTTACCGCTGGGC +ACTTTGCGCATGCGAAAAAAAGGTGGGGCCGCCGGTCACAACGGATTATCTGACATCATC +ATGAAGCTGGGAACGGAAAAATTTCCCAGGCTGCGTGTGGGTATTGGTGATGACTTTGCC +AAGGGCTACCAGGTTGATTTTGTTTTGGGACAATGGACAGATAAGGAAGTCAATGTGATG +ATTCCGCGCGTGCAGAAAGCCGTGGAGATTGTTCAGAGCTTTGTGAGTGCCGGCATTGAT +AATACGATGAATCTTTATAATAATAAATAGTATCATTTTTTCCTGGTTGTTTTCACGAAA +AGTATCATTTCATTAACTAATTTTAGGCGATAGACACCAACCCAGGGAAACAACATGTCA +TTTCGAACCACCAGGCAGGCCGGTGTGTTAGCGTGAGAAATCCCCCGGGAGAGAACCTGC +TGCTCAGGAGATTTCTCCTCACTCCGTTCCTTCATTCGTCGAAATGACAATCTGGTTTTT +TGGCGAAAAACCACCCCCAAAATACCAACGTCATGAAATCCCACAAGCATTTTAAAACGA +AACTGTTAAAAAATTTGGTGAAACAGATAATTATGAATAATTTTGCCGCCCAATTTACAG +TAAGGTGAATAAACATAGTGAGCTTATTATCCCGTTTAAAGGGTTAAGCATCGGAAATCA +TCATTACGATTTTGTGATAAGTGATTCGTTCTTTGAGAGTTTTGAGCCTCTTAACATTCA +GAAAGGGAAGATTGACTTATCACTTGATCTTGAAAAGGAGTCCGGATTGCTGTCGCTGCT +TTTCCATTTCAATGGAATGGTTAGGCTGACCTGTGACCGATGCCTGGAGGAGTATGATCA +GCCTGTAGCTGATGATTTTCGCCTGATTGTAAAATTTGGCGAAGATTTTCAGGAAATTAC +TGACGAAATTGTTGAAATTCCGGATACGCAACATCGTTTTGATTTGAGTCAGTATATTTA +TGAATACATTCAGCTGATGCTTCCCCTGAAACACGTGCATCCTGACGACGAGTTGGGAAA +CAGCACCTGTAAACCGGAAATGCTGGAGAAGCTCAAAGAACTTTCCAAACCGGCAACCGA +TCCAAGATGGGATGCTTTATCAAAATTGAAGGAAAAATAAAACAAATATAAAACTGAAGG +AAAATGGCACATCCAAAAAGAAAAACTTCTAAAACCAGAAGAGATAAAAGAAGAACGCAT +TTGAAAGCAAATGCCCCTGTTTTGACCACTTGTCCTACCACGGGCGCAGTGCATGTTTAT +CACCGTGCTTATTATGTGGACGGCGATCTCTATTATAAAGGTAAACTGGTTGCTCAGGCT +GCTGAAGCTTAAGAACTTTTTAGTCTGCTGAAAAGTTTTTTATCTGTTTCGAGCGACCCA +AAAACACAGCCTTTGTTCAAAAAAAGACAAAGGCTTTTCTTTTTGGTTACGCTCCGGTTA +AAAACGCAAAACTTATTGGCATTATGAGTTTTGAGGATTTTAAACAAATCGTATTTACCG +GAAATTTCATCCGGAAAAAAGTTAGTAAAGGATAGTTCTGTTAATTCAGATCGTGTCCTT +TTTTTAAAAATGCTATTTTTGTCTCCATCATAATTAAGAAAAGTTCACTATGACCAAAAT +CAGAGCAGCCATTACCGGCATACACGGATGGGCTCCCGAAGACGTGCTCACGAACAGTGA +TCTCGAAAAAATGGTTGATACAACTGATGAGTGGATTACCACCCGAACAGGAATCAAAGA +AAGACATATTTTAAAAGGAGAAAAAGGAACTTCTGATATGGCCGTGGAAGCGGTGAGAGG +TCTCCTGGAAAAAACCGGGACAAAACCCGAAGAAGTAGACCTGGTAATTTGCGCTACGGT +GACTCCTGACATGCAGTTTCCTGCTACCGCGAATCTGGTGAGTTATAAATTAGGAATCAA +TAATGCATTCAGTTTTGACATGAATGCCGCGTGCTCCACTTTTATTTACGCTTTGATTAC +CGGTTCAAAATATGTTGAATCAGGTGAGTACAAGAAAGTAATTGTAATTGGAGCCGATAA +GATGTCCTCCATTGTGGACTATACCGATCGGGCTACTTGCGTGATTTTTGGTGATGGTGC +CGGTGCTGTGATGCTGGAACCTACTGCTTCAGACGAACCGGGCATCATGGACCATCGTTT +TTATACCGACGGTGCCGGATGGATTCATCTGCATCAAAAAGCCGGCGGTTCGTTGAAACC +CGCTTCTCACGCTACTGTTGACGCTAAGGAGCATTTTATCTACCAGGAAGGACAACCGGT +ATTCAAATTTGCCGTAACCCGCATGGCAGATGTTGCTGCTGAAATTATGGAACGTAATCA +ACTGAAATCGGAAGATATTGCCTGGTTGGTACCCCATCAGGCTAATTTGCGTATCATCGA +TGCCACAGCCAGACGCATGGGCGTATCGAAAGATCAAGTCATGATCAACATTCAGCGTTT +TGGAAATACCACCAACGGAACCATCCCACTCTGTCTTTACGAATGGGAACCGCAGTTAAA +AAAAGGTGATAATATTGTGCTGGCTGCCTTCGGCGGTGGATTTACATGGGGCTCAGTTTA +TTTGAAATGGGCTTACGACGGGAAGAAATAAATTCCTTTAGGAAATACCAATATTTAAAT +CACAAATTTCAAACAAATTCCAACTTCAAAAATTCAAATTCCAGGACCGAGGCATTAGGA +GTTTGATATATTGAAGTTTTGGTAATTATTTGATATTTGAATATTGTTATTTGGAATTTT +AGGTATTCTACTTTCCTATAGCAGTACTTCCCCTTGCAATCAACTCCGTATCAATAATAT +GTATTTGTTGATGCATGTCTTCAGGATGTTCGATCCGCTGAATCAGCAAATCCACAGCCA +CCTGACCTACCCGGGCTAATGAATGATGTACAATGGAAATGGCGGGTTTATACATCTGAG +AAATCGGACCATCGCTGAAACTGATAACGCCCACCTCGTTCGGAATCTTGATATTCAATT +CCTGTAAAGCAGGAATAATGCCTGTCAGTACCTCATCACTGATACTCATAATTCCATCCG +GGTAATTTTTCCTTTTAAAAATTTGCAACGCTTTGTTTCTGGCCATGTGCGAATCATCGG +CAAACACCAAATCATGTTCTTCCAGATTATAGTCGCTGAGTGCTTTTCTAAAACCCATCA +GCCTTTGCTGTGTGATGTATAAGGCGGCATTCCCACCAAAATAAGCAATGTTTTTTCTTC +CCGTTTTAATAAGATGCTCCGTAGCTTCATAAGCAGCCCTTCTATCATCAATAATCACCT +TTGAGGTTCCGTAATTATTAAAAACACGGTCAAAAAACACCACCGGAATTCCCTCTTTTT +CAAGCTGATCAAAATGTTTATGGTTTCGGGTTTCCTTCGTGATTGCCACCAGCAAACCAT +CCACTTGACTGGAAAGCATCAACTCTGTGTTCTGAACCTCCCGCTTATAAGATTCGTTGG +AAGATAAAATCAATACATTATAACCGGCATTATAACAGAACTCTGAAACCCCGCGAATCA +TTGAAGGAAAAGCATAAAGTGTGATTTCAGGAATAATCAGCCCGATAATCTTGCTGTAAG +AGTTTCTCAAACTCAAAGCAAAAGCATTGGGATTATAATGCATTTTCTGAGCCAGCTTTC +TGATTTGTTCTTTTCTTTCTTCACTGATATCAGGATGATTTCTCAGCGCTCTTGAAACTG +TGGTATGCGATACTCCAAGCTGTCCGGCAATATCTTTCAAAGTAACACGTCTTCTCCTAT +TCTGCGTCATTTTAAGCAAATTATGTGCGTGCTAATATAAAGAATATCTGGCAGTTCTGA +GAAAAGATTCTTTCAAGACACCATTTTCATAAAAGTTATTAACAATATCACACGACTTTT +CAACAATTCAGGTGTGTGTTCACGTGTGCGGAAAAAACACGGAATAACGACCTTGTGTTT +CAGTTAATTAATGATATATCTTTACACGACCATTGTCCTATATTCATGAATAATAATGGG +AATTAAAACAACATGAATTTTGCACCGGCATATCGGATTTTTTCGAGAAAACCTTTCTGA +GTTGACCAAAAAGAAGGGAAGGTAGAGCAGGTGTTTAGTAAGTATTTTAACCATTAAAAA +CTTAAGAGAATGAAAAAAGTTGTACAAGCCTTCATGGTATTGCTGGCCCTTACCATAACA +ACGGGGCTCATGGCGCAGGGGACTATAAAAGGTACCCTGAAAACGACCAAAGGAAAAACT +GTTCCGGGGGTCAACATCTTACTGAAAGGAACAACTACCGGAACTACTTCCTCACTAAAC +GGAAGTTTTGTTCTTAAAGTTCCTGCCGGCAAGCACGTCCTACTGGTCTCTTTCACCGGG +TTTAAACCGATAAACTATTCGTTTACCATTAAAGACGGTGAAACCTTAACTAAAAATTTT +GTCCTCCACGAAGACCTGCTGGCACTAGACCAGGTTGTGGTGACCGGGGTACAGAATAAG +CAAACCAAACTACAATCCAGCGTAGCCATTACCACATTATCGCCTCAGAAAATTTCACAA +ATTGCTCCGCGAAGCGCTGCTGATTTGTTAAAAGCCATTCCGGGATTTTACGTTGAAAGT +TCAGGAGGTAAAGGAAATGCCAACGTATTTGCCCGCGGACTTCCAAGTTCGGGCGGATTG +CGTTACGTTCAGTTTCAGGAAGACGGGATGCCTGTATTTGAATATGGCGACCTGATGTTT +GGTAACACAGACATTATGGTACGTATCGATCAGACCATGTCACGTATGGAAGCTGTCAGA +GGTGGTTCTGCTTCAGTTCTTACCAGTGACGCTCCCGGTGGTATCATTAATATCATATCC +AAAACCGGTGGCCCGACAACAAAAGGTGTATTTATGCAGACTATCGGTTTGACCTACATG +CATGCCCGTACCGATTTTGATATCGGCGGACCTGTTTCGAAACATCTTCGTTATAACATC +GGAGGTTTTTACCGTGCTGATAACGGAATCCGTTCTCCCGGTTTCCTCGCCAACAACGGA +GGTCAGATTAAAGCAAACTTCACTTACACGTTTAACAAGGGTTATGTCCGGTTCAGAACC +AAGATTTTGAATGACAAAACCATTGCTTATCTTCCGTTCCCGATGATGGGGAATCCTGCT +AAAAGTATTCCAGGATTCAATGCCAATTACGGGACCATGAAGAGTTTGGATCTTTTGCAT +CTTCACGCAACAACTCCCACAGGAAATTCTGTAAATGAGAGTCTTGCAGACGGAATGCAT +CCGAAAATTTTCGCTTTCGGAGGTGAAGCATTTTTTGACCTTGGCAACAAATGGTCATTA +AAAGATAACTTTGAAAAAACTTTTACTCACATCCAATTTAACTCCATTTTTGGGGTGAAT +GCTCCTGAAAGTGCCAGCGCTTATGCTACCGCCCAAGGACTTACCAATTATCACTATGCT +TTCGCAGACGGTTATAACGCCGGTAAGCCAATTACCAATATGAGTTCACTGAACGGGAAT +GGTCTTGTAGCCACTTATGGATGGTGGTCAGTGGGCCTGAACCTCCAGGAATTTGGCAAT +GATTTTAAACTCACCAAGCAGTCAACAAACAATACTTTTACTGCCGGTTGGTATTTTTCA +ACCAATCAGGTGGGTGGTAACTGGTGGTGGCACAACATGTTGGTGGACATTTCCGGGCAT +AACACCCGCAAATTGAACCTCATCAACGATAACACCGGTGAGTCGCTCACAACCAATGGT +TATTCACAATACGGAACGTTGTACGCCGATTATAATGCATTAACCGTTATCAACGCACCT +TATGTATATGATGAAATTGACTTGGGACGTTTGACAATCAACGCCGGTCTGCGTTGGGAT +ATGGGAACAATTACCGGTCGTGTTGAAAACACCGGAAGTTATTCTTATGATGTGAATGGT +GATGGTATCATCTCACCTGCTGAAAAAAATATTCAGTATGGTAATGGAACTTACACCCCT +TTCCATTATGATTACAGTGTGTTGTCCTATTCACTGGGGTTGAATTATGAATTCAATAAA +AGCACAGCCATTTTTGCCAGAGCCAGCCAGGGACACCGCTCTCCGGCAGACCGTGCTTAT +GTTTTCGGTGCAACCACTTCTACACCCAACGGTTTTCCGTCCAGTGCAAAAGATGAAAGC +ATCGAACAATACGAGCTGGGATTGAAATATAATTCGTCGAAAGTGGCTTTGTTTGCCACC +GGTTTCTATAGCTTCTTCAACCACATTGACTTCACTGATTTCGTGAATGTCGGTGGAAAT +TTAACAGCCATCCAGCAGTATTACAATACTTCTGCTATGGGTCTGGAATTGGAAGCTGCT +GCACAATTGGGCAAACTGAACCTGTCCTTAACAGGAACCGCACAGAGCGCCAAATATCAT +AACTGGGTATATCACGATCAAAGTGGTAATCTGCATGATTTCAACAATCATTTCATTCAG +AGATTACCTAAATTGTACTTCACCTTCAGACCTTCTTATAACTTTGGAAAACTGAATGTA +AGTGCTGCCTGGGAATACTTTGGCAAACGATACACCAATCCTGAAAACAAACAGGTATTG +CCTCAGTTCTCACAAATCAATGCTTATATCGATTATACTGTAAGCCCGCATATTACAATT +TCCGCTGCCGGTAACAATCTGTTTAATGTCATTGGTCTGACTGAAGGAAATCCGCGTTCA +GGACTTGTAAGCACAGGTGGAAGCCAGTATTTCTACGCCCGTTCTATACTGGGCCGTTCT +GCAATTTTGTCTTTTAAGTATAGCTTTTAATTAATAAAAACTGTCCTTTTCATTTTTATC +GCTTTATTTCACAAGCTGTTAACTTTGTAAATAAAAAATAAATTTCACAACGATTTATTT +TAGAAAAAGTGAAATAAAGCAAAGACAAATAACATGTTGATTCTTTTCATAGAGAAGCCG +GGTAGGTTTCTCTATGAAAAGGATTTTATAAAAAGTCAGTGGCTGGTTGAGAACATGAAA +TGATGCCTGGTTCATCTGATAAATAAAAATAAGCACGTGAAACATATTGATATAAGCCCC +ATTGATATTGCCATCATTGTAATTTATATCGTGGGAATCGCCGTTTGGGGATTAATGTAC +AGTAAGAAGAAGTCGAAGGGCAAAGGCCATGAAGGCTATTTTCTGGCTGGAAGAAACATG +ACCTGGCCCATTGTGGGAATCACGTTGTACGCAGCTAACATGGGTAGCCCGGCGTTGGTT +GGGCTGGCTGGAGATGCTTACAGCACAGGGATTTCGGTATTTAACTACGAATGGATGGCC +CTGGTAGTACTCGTCTTTTTTGCCATTTTCTTTTTGCCGTTTTATTTGAGGAGCCGGGTT +TATACCATGCCCGAGTTTTTGCAGCGGCGGTTTGATATTCGTTCACGTTATTATTTTTCA +TTTATCACGCTGGTCGGAAATATTATCATCGATACAGCTGGAGTCCTCTTTTCAGGAGCC +CTAATCGTCAAGATGATTTTTCCGGCGATGGCATTGTGGCATATTATTGCTGTTCTGGCA +ATTATTACGGCAGCTTATACCATAACAGGAGGACTTTCCGCAGTGATGTACACCGAAGCG +GTTCAGGGTGTTTTACTCATGCTTGGCGCCGTATTATTGACCTTCTTCGCGTTGAAACGA +ATTGATTTCAACGTGGCACGCATCTTTACTGAAACACCTCATCACATGATGAGTCTTATC +AGGCCCAACAGCGATAAAGCCATGCCCTGGCTTGGACTTGTGCTGGGCGTTCCACTGCTG +GGCTTCTATTTCTGGGGCACCAACCAGTTTATGGTTCAGCGTGTTCTCAGTGCAAAAAAT +ACAAATCACGGCCGTTGGGGCGCGTTGTTTGCAGGTATCTTAAAATTACCCGGTCTTTTT +ATTGTTGTTTTACCCGGAATTATTGGACGCCTCATTTTTCCTCATCTCTCCGACCCTGAT +TTGATTTATCCTATGATGCTATTCCACCTGCTTCCGGTAGGTATTCTGGGAATTGTGCTG +GCGGGTTTGATTGCCGCAATCTCATCGAGTATCAGTGCGACACTAAACTCTGCAAGTACA +TTGATGACCATGGACTTTGTCAACAACCTAAAACCGGGGCTAACCCCAAAACAACTCGTA +AGAATCGGGCAGATTTTTACAGGAGTTTTCGTGGTGATTTCCGCGGCATGGGCACCGATG +ATAGCAGGATTCCCTTCATTGTTTAAATACCTGCAACAGGTGTTGGCACTTATTTCGCCA +CCCGTTGTGGCTGTCTTTTTACTGGGTCTTTTCTGGAAGCGGGCCAATGCTCAGGGTGCT +TTCTACGGTTTGATGGGAGGACTGCTCATGACCATTTTTGCAGTCATTGTCAGGTATGTA +AATCCGGATATTTTCCCGTGGCTGGGACACATCCAGTTCTTGCTGGTAGCTCCGGTTTTG +CTGGTGGGAACCATGGCCATTATCGTTCCGGTTTCACTGATGACACCGCCGCCACCGGAG +GAAGCTGTGGCACAGTTCACCTGGTCGTTTAAATTCTTCAATGCCGAAAGTATGGAACTG +GCCGGAACACCCTGGTATAAAAACTATCGTTATCAAGCCATTGGCGCATTATTAGCAACG +GCTATTTTGGTTTACATTTTCAGGTAGAAACTTCAATGGAGGAAGCAAAAAACACATATT +CCAAGGCCCTTGACCTGTTAAAAAACGGATTGCTGAAAGAAGGGTTTGTCGCTGCACTGG +ACCAGCAGGCTAATTACCGGCGTGTGTGGGCCCGCGACAGCATTATTACCGGCCTGAGTG +CCTTACTTGCAGATGACACTACATTAATAGAAGGAATGAAAAAAACGTTGATTTCATTGA +AACAACACCAGCATGCCAATGGTATGATTCCTTCCAACGTGTCTTTTGATGCTGATGGAA +ATGTCACCATGGTGAGTTACGGTACCCTTACGGGGAAGGTTGATACAAACCTTTGGTTCA +TCATCGGGGTAATGGTTTATGTAAGAAAAACTAGTGATACGGATCTCTTGAAAGAAATGC +TTCCGGCTATTGAAAAGGTCTTTGAGCTTTTGCTTTCCTGGGAATTTAACGGGCGTGGAC +TACTGTACGTTCCTCAAGGTGGAAATTGGGCTGACGAATTCATTTTGGAAGGTTATAACC +TTTCTGAGCAGCTCTTGTACTACTGGGCGTTGTCAGAAGCATCTGCCATGGATGAAAAAT +TTTCAACCAAAGCCAAGAAACTGAAAGACCTCATAGAGATAAATTACTGGCCCACAGAAT +CCAATCGTTCTAAAGTATATCACAAAACCGCTTTCGAACGCCAATTAGAAAAAGGGCAAA +CCAGTCATTGGCTGCCCGGATTCAAACCGGCGGGTTATCATACTTTTTTCGACTGTTTTG +CCCATGGCTTGTCTTTTGTTTTGCAATTCAATTCGCCGGAACAAGAAGGGGAGATTATTG +AAACGCTGGTCAGGACCACTAGTGAAACATCAGGATCCTTGCTTCCGTCTTTCTGGCCAC +CTGTCAGGGAAACGGATGCACAGTGGGAAACCTTGCAAACCAATTGGATATACAAATTCA +GAAATCAGCCCGGCGCTTATCAAAACGGTGGAATCTGGCCGATCTCCAACGGTCTTTTGA +TTGCCGGATTGTATCGTTCGGGCCATAAAGGCATGGCCGACAAGATGAAAGAGGCATTAT +TTCTGGCAACAGCATTGCCGGAAAATCAGTTTGGATTTTACGAGTACATCGACGCATTCA +GCTGGGAGCCCGGTGGGGCCAAACACCAACTGTGGTCTGCGGCCGGTGTCATCTTTGCCG +AAAAGGCTGCTCAAAATGTTTTTATTGTTTGAAAGGAGATTTGAGATTTATGGAAGGAGA +TATTATAAATCTAAATGATGAGCACCGGAGCACGGCAAGAAAAATAGTAGCTCCGCTTAT +CGAAGAAATAAAAAATTGTCAAACCATATATACGTTTTCCGTTGCCGGTGAATCAGGTGC +CGGGAAATCCATCACAGCAGCCGCCATTGCGGAACAGTTGGAACTGGCCGGATTTTCCGT +GAAAGTTTTTCAGCAGGACGACTATTTTTTCCTGCCTCCGTTTACCAACGACCAGAAACG +CAGAAAAGATTTGGAATGGGTGGGTATCAAAGAAGTGGACCTTGCCCTGATTGACGAACA +TCTGAAAGCCGCAAAAGACGGCGTTAAAACCATCAAAAAACCACTGGTCATTTATGGTAA +AAATAAAATTACTTCAGAAGTATTTGACATGAAGGGCGTGAATGTTTGCATTGCAGAAGG +AACCTATACTTCGCTTTTGAAAAACGTTGACAAACGTATTTTCATCGACCGTGATTTTTT +TGACACCCACAACGACCGGAAGAAACGGGGTCGCGATTTAATCGACCCTTTTACCGAAAA +AGTGCTTGAAATTGAGCAGTGTTCCTAGGCTGTTTCCTGGTGGGATCCTCTAGAGTCGAC +CTGCAGGCATGCAAGCTTGAGTATTCTATAGTCTC +>NODE_7_length_23944_cov_113.632_ID_13 +CCCACTCGTATCGTCGGTCTGATTATTAGTCTGGGACCACGGTCCCACTCGTATCGTCGG +TCTGATTATTAGTCTGGGACCACGATCCCACTCGTGTTGTCGGTCTGATTATCGGTCTGG +GACCACGGTCCCACTTGTATTGTCGATCAGACTATCAGCGTGAGACTACGATTCCATCAA +TGCCTGTCAAGGGCAAGTATTGACATGTCGTCGTAACCTGTAGAACGGAGTAACCTCGGT +GTGCGGTTGTATGCCTGCTGTGGATTGCTGCTGTGTCCTGCTTATCCACAACATTTTGCG +CACGGTTATGTGGACAAAATACCTGGTTACCCAGGCCGTGCCGGCACGTTAACCGGGCTG +CATCCGATGCAAGTGTGTCGCTGTCGACGAGCTCGCGAGCTCGGACATGAGGTTGCCCCG +TATTCAGTGTCGCTGATTTGTATTGTCTGAAGATGCTTTTACGTTAAGTTGATGCAGATC +AATTAATACGATACCTGCGTCATAATTGATTATTTGACGTGGTTTGATGGCCTCCACGCA +CGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTTTCCGGTGATCCGACA +GGTTACGGGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGC +GTTTCCGTTCTTCTTCGTCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAA +GGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTT +GTCCGTGGAATGAACAATGGAAGTCCGAGCTCATCGCTAATAACTTCGTATAGCATACAT +TATACGAAGTTATATTCGATGCGGCCGCAAGGGGTTCGCGTCAGCGGGTGTTGGCGGGTG +TCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCG +GTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATT +CAGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTG +GCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCA +CGACGTTGTAAAACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAG +CTCGGTACCCGGGGATCCCACGTACAACGACACCTAGACCACCTTTGGCCATTCGGCGGA +GATGGATTTGGAGATTTTGGCGTAGCCGTAGCCGGGAAGGTCCGCGAAGAGGAAGTTTGG +CTGCTGGCGGTTGGGCGAGGTAGTGAGGCTGAAAAAATTGATGGCGCGGGTGCGTCCGGG +CGTGGAGGAGACCTTGGCTTGGCGTGAGCCAAGCAGCGCGTTAATGAGGCTGGATTTGCC +AACGTTGGAACGGCCCAGAAAGGCGAACTCGGGTGCCGATGGCGGAGGGAAATGAGCCGG +GGCGGTGGCGGAGAGAAGGAACTTGGGATAGACCTGCATGCAGGTTTCAGTATAGGGCTC +GGGCGCGGAACCGGTAGTCTGGAGCGATGAAGGTGAAAAAGGTCTGGTTGAGCTGGAGCA +GCGGAAAAGACAGTGCGTGGGCACTGCACATTCTACGGCAGCAAAAAGATGTGGAAGTGG +TGGCGTTGTTGACGACGTTGAATGAGCACTTCGACCGCGTGGCCATGCACAGCACCAGGC +GCGACTTGGTGGAAGCGCAGGCGCGGGCAGCGGGGCTGCCGCTGGTGCGGGTCCCGCTGC +CATGGCCTTGCTCGAACGAGCAGTACGAAGCGGCGATGGGTAAGGCGTGCGCGAAAGCTG +TGGCGGAAGGTGTGGAAGCGGTAGCCTTTGGGGATTTGTTTCTGGAAGACGTGCGGCAGT +ATCGCGTGGAGAAACTGGCTGGAACGGGACTAGAGCCGTTGTTCCCGTTATGGGGGCTGG +ATACGCGGGCACTGGCGCGGGAAATGATCGCCGCAGGTGTGAAGACTCGACTGGTGTGCG +TGGATCCACGCAAGATGCCTCGGGAATTTGCAGGGCGTGACCTAGATGAGGCGATGCTTC +GGGAGCTGCCGGAGGGAGTGGATCCGTGCGGCGAGAACGGGGAGTTTCACAGCTTTGTGT +ATGGCGGACCGATGCTTGGGGAGGAAATCCCGGTAGAGAGCGGCGAGGTGGTGGAGCGCG +ATGGGTTTGTCTTTGCGGATGTGAAGCTAAAGCACTGATGGGCGTCGGTTTCGGCTCGCG +CGAGGAGGCTGGGCGCTGGTAGACTCTCCTGTTTTGAGTTTTGTGCAGAAGGAGAGGAAT +CGTGAGCGAAATCCAGATTGTAGGTGTAGTGGGAGCGGGCACGATGGGCAATGGGATTGC +GCACGTGTTTGCAAAGAGCGGCTTTCAGGTGCGGCTGTGCGACGTGGAGCAGAGGTTTCT +GGATCGTGGCATGGATACGATCCGGAAGAATCTGGGGCGCGAGGTGACGAAGGGCAAGCT +TATGCAGGAAGAGGCCGATGCAGCGGTGAAGCGGATTGAGGGGACGCTGGCCCGCGCGGC +GCTGGCCGACTGCGACCTGATTGTGGAGGCGGCGACCGAGCAGTTGGAGGTAAAGCGGCA +GATCTTTGAGGATCTGGACCGGGTGGCCAAGCCGGAAGTGATTCTGGCGAGCAATACCTC +GTCGATCTCGATTACCAAGCTGGCGGCGTTTACCGAGCGGCCGGAGCGGGTGATCGGGAT +GCACTTTTTCAATCCCGTGCCGGTGATGAAGCTGGTGGAGGTGATTCGCGGGCTGGCGAC +GACGCAGGAGACCTTCGAGATGGTGAAGGCGCTGGCCGAGCGGCTGGGAAAGACTGCGGT +GGAAGTGAACGATGCTCCGGGATTCGTTTCGAACCGAGTGCTGATGCCTCTGCTTAACGA +GGCCATGTACGCGGTGATGGAAGGAGTGGCGACGCCGGAGGCGGTCGACCAGGTTTTTCA +GCTGGGGATGGCGCATCCGATGGGGCCGCTGACGCTGGCAGATTTTATCGGCCTCGATGT +GTGTTTGGATATCATGCGCGTGCTGCAGGAGGGGCTGGGAGATCCCAAGTATCGGCCCTG +CCCGCTGCTGATCCGGATGGTGGATGCCGGATGGCTGGGGCGTAAGAGCGGACGCGGATT +TTTTGAGTACGGCAACGCGTAAGGTATTGGCCTGAAGGCGGATCTGCCCGGCCGAGGTGG +CCGGTATGGTTCGGCGGCGGGCTTTTCGCCGAGTCTTCCACATCGTGTCTGGCGTATTCT +CGAAGCTATGGTTCGGATGACAGTGCTCGCCAGTGGTTCGAAAGGCAACAGCACCGTTGT +CTCCAGCAGCCGGACACGCCTACTGGTGGATGCGGGGCTGTCGTGCCGGGAGATATTCAG +GCGAATGCAGGCGGCAGGCGAGGACGTGGAGAGCCTGGACGCCATTCTGGTGACACACGA +GCATCAGGATCACGTGCAAGGGCTGGCGGTGACGGCGCGGAAGCTGGGGATTCCGGTGTA +CTTTACCGAGGCAACGCATCGGGCGTGGATGCGCTGGATGACGCCGCGCAAGCGGCTGAC +CTATGCGCAGTGGCTGGAGCAGCAGAAGGCGATTGTAGCGGCCGGTAAAGAGCCGGCGGC +AGCGGGAGCCGAGGCCCAGGATGCGGCCGAGGAGGATATCAGCGGAGCGAAGCCGAAGAA +GGATCCATGTGCACTGCCGGCGGTGGAGTATTTTTCCTCCGGGACAGACTTTCAGGTTGG +AGACATTGCGATTACGCCATACACGGTGCCGCATGACGCGGCCGATCCGGTTGGATTTGT +CTTTGAGGCCGAAGGCGTAAGGCTGGGGATTGCGACAGATCTAGGTTATGTAACTCCTAA +CGTGCATTTGCATCTTAAGAAGTGTGACGTTTTGATGCTCGAGTCAAATCATGACGTCGA +TATGCTGCGGGACGGGCCGTATCCGTGGTCGGTTAAGCAACGGGTGATGTCCCGCGTGGG +GCACTTATCGAACGACGCCGCGGCGGATTTCCTGGAGAATTCGTATGACGGACGGGCGGC +GTTTGTGGTGCTGGCGCACTTGTCGGAGAGTAATAATCTTCCGGAGTTGGCCCGTGTCAG +TGCTGAAAGAGCACTGCGGGACCGAATGAACCTGCTGGGGAACAAGCTGATGCTGGCCAG +CCAGCAGACACCTCTGGAACCTATCGTTTTGTAGATAAGTGTGTTAAGCTTCCGGTAAGC +CCCCTTTGAACCATGGGAAGCTTTCAAAGCTGAAAGAGGGACTCCAAGGGTCGCAGGGGA +AGGTCAAAAAGAATGAAAACTTGTATTGATCCGGTAGTTGGCGACATTCTTTCGAGTTGG +CGCTATGACATTTCCGGGATCACTCCCGAGATGCGCATTGACTACGAGGAGCATCTGGCA +AGCTGCAGCGTATGCCGTTCGCGGCAGCGGCTGCATCGGGCTATTGACGTTGTGCTGATT +GGCCTGACAACGTTGTCAACAATTGTGTTTGTGCTGGCGCTGGCCGTGATCCACCACGTG +GAACCATTGCGGACTTTCGCGCTGTTTATCTTCCATATCCGGGATTTCTCGGTAGTGCTG +ACGCTGCAGGCCGTGGCCTTTGCCGGACTGGTGGTCTCGATGTTTGCCTGGCTGCTGGTT +GCGGTGGCAACTCCGGTGCCGACGTTCCTGTCCGGAGTAGCGCGTGAGCAGGCACGGGAG +CTGCAGTCGCGGATTCCCGAGGAATGGCGGAATCGCTTCCAGCGGGGAGCGTTGTAGGCG +AGAAGATTCGGCAAGTCATGGCGAAGGGCCGGATGGAAATCCGGCCCTTTGCCTTTGCTG +TGGACCCGGTTTGCGGCTGGGGGTTATTGCCCGGTCTGCATGCCGCCAATCAGCTCTGAG +ACTTTGGCGATATGGTGGCGGGAGCACCAATGGGCCAGCCCCTGGCCGATGTGCTCAACG +GCGCGCGGATCTGCGTAACTGGCGGTGCCTATCTGGACGGCAGAGGCTCCGGCGAGAAGA +AATTCAACGGCGTCTTCAGGGGTGGTAATGCCGCCCATGCCAATGACGGGAATCTTGACG +GCCTGGGCGGCCTCCCATACCATGCGCAATGCGATGGGCTTGATGGCCGGGCCGGAGAGT +CCTCCGGTGATGTTACTGAGGCGGGGACGGCGGGTTTCAACGTCAATGGCGAGCGAGACG +AAGGTGTTGACCAGAGAAATCGCGTCGGCACCGGAAGACTCGGCGCTGCGGGCCATGGTT +GCTATGGAGGTTACGTTGGGGGAAAGTTTGACGATGAGCGGGCGGCGCGAATAGTGGCGG +GCATGGGCTACGAGGTCGGAGAGTGCGGCCCGGTCGATGCCAAAGGCAATGCCGCCAGCG +TGGGTGTTCGGGCAGGAGACGTTGAGCTCGTAGGCCGCGATGCCTTCGGCTTCGTTAAGG +CGGCGGATGACAGCAATATAGTCTTGCATCTGGTAGCCGAAGATGTTGACGATGACGGGG +CACTGATAACGGGCCAGGGCTGGGAGCTTTTGTTGGACGAAGGCATCTGCTCCAATGTTC +TGCAGGCCAATGGCGTTGATCATGCCGGCGGCGGTTTCAATCAGGCGTGGTGCCGGGTTG +CCGGAGAGGGGTTCGCGCGAGATGCCCTTAGTGACAAATCCGCCAATATGGTCGAGGTTG +ACGACGTCTTCGAATTCAATGCCATAGGCGAAGGTTCCGCTGGCGGCGAGGACCGGATTG +GGCAACTGGATGTCGGCAACTTCGACGCGCATATCCACGCCGTGCTGGATGCCGCGTGTG +GCGCTCATGATCGCCTCCGGTAGATGGCGGCGGAGCAGGCCAAACAAGAGATGGATGGAT +AGAAGAGGAGATGGTTTGTCGAAAGCATTGCAAATTGAGTGTAAATCGGAGAGGGAAATT +GTCCCTATGGGCATGTTGAAGGTTTTGAATAAGATCGAGGCATTCCACGGCGAAAAAACT +GCATCTAAGGCGTATAAATGACGAAAAATCAAGACTGCATGACACTGCCAGTGCGCTGGG +AGCAGGCTCCCGCGCGGGAAGATTTCTGGCAAGAGTTGAGAACCGCATCACGTTGGCTGC +TTTTGCTGGACTATGACGGCACACTGGCGCCGTTTCATCAGGATCGGATGAAAGCTACGC +CCTATGCGGGGGTGAAGGAGCGGCTGGAGCAGTTGCTGAAGATTGAGAAGGGCCGGATCG +TGGTGATCAGCGGGCGGCAGATTGAAGATCTGAAGCAGCTGTTGCAACTGAGCCAGCCAG +TGGAGATCTGGGGATCGCATGGGCGGGAACATCTGCTGCATGACGGCTCGTACCGCCTGG +TGGATTTGACCGAAGATGAGCGGCGCGTAGTGGAAGCAGTGACAGCGCGGATGAGCGAGC +GTGGATGGGCCGGGCAACTGGAGCGGAAGCCGACGGCGATCGCCGTTCACTGGCGGGGAT +TGCCAGTGAGCGAGCAGAAAGAGCTGCGCGAGGCCGCAGAGCAGTATTTTGCCGAGGCGA +ATCCGCCCGATACGCTGGAGATGATGCCGTTTGAGTCCGGCGTGGAGCTGAGGTCGCGTT +CTCGGACCAAGGGACAGGTGGTGGCCGAGGTTCTAGCCGAGGAGCCCGCGGATATTCCGA +CAGCCTTTCTGGGAGATGACTGGACAGATGAGGATGGATTCGCGGAGCTTCGCGGACGCG +GTGTGGGCATCCTAGTGCGTCCTGAGGCGCGCGAGTCGTGTGCCGACTATCATTTGACGC +CGCCGGAAGAGTTGCTGGAATTTCTAGACCGGTGGCTGGAAAACGCGAAAGAGAGCATTC +GATGAGTGAAAACCAGGTCATTATTGTTTCCAACCGGCTTCCGCTGTCGATGACGGTGAA +GTTTGGATCGCTGAAGGTGGGAAGAAGCAGTGGAGGATTGGTGACGGCATTGCAGCCGAT +CCTGAAGAGCCGGGGAGGGACGTGGATCGGAAACGGCGGGACGCGCGAAGACAAGAGGAT +GGCGCGCGCGCTGGAGGAAGAGGCCCGGCGGAGTGGGTTTGATTGTGTTCCGGTATTCGT +GACCGAGCAGGAAGATCGCAATTTTTACGAAGGTTTTTCTAACCAGGTACTGTGGCCGCT +CTTCCATGACTTTATCGGCGAGTGCCGGTTTGAGCCGGAGTACTGGGACTTTTACCGCAA +GGTGAATGGCAAGTTTGCAGATGCGGTCATGAGGGTCTACAACGGCAAGCAGATCCTGTG +GGTGCATGACTACCAGTTGATGCACGTGGCGGCCAGTTTGCGGGAGCGGGGATGCAAGGG +GCGCGTTGCATTCTTTCTGCATACTCCATTTCCTTCCTACGATGTATTTGCGAAGTTGCC +GTGGCGGCGCCATCTGCTGCTGGCTATGCTGGAGTACGACCTGATCGGGTTGCAGACCGA +GCGCGACGTGCGGAATCTGGTGAGCTGCCTGCGGCGGCTGGTGCCGGAGGCCTCGATGTC +GACGGATCATGGCGTGCATCGGGTGTCATGGCACGGGCGCACGGTTGTGATTCAGGACTT +TCCGATTTCGATTGATTTCGATGAGTTTGCGCGAGCGGCCAACCAGCCGGCGGTTGAAGA +GCGGATGCGCACGATTCTGGCACGTATGGGCATGGGACAGGTGATTTTCGGGGTAGACCG +GCAGGACTACACGAAGGGAATTCCACACCGGCTGAGAGCCTATGGTGAGCTGTTGCGGCG +CAGGCCTGAGATGGTCGGCAAGGTGAAGCTGGTGCAGATTGTGGTGCCCAGCCGGCAGAA +TATTCCCGGATATGAGGCGCTGAAGTCTCGAATCGAGCACCTGGTGGCGAGCATCAATGG +CGAATACACGCAGCCGGGTTGGGTGCCGATCCATTACATTCACCGGGCGATTCCGCGCGA +GGAACTGCTGGCGCTGTACCGAGCCGCGCATGTGGGGCTGGTGACACCGCTGAAGGACGG +GATGAACCTGGTGGCCAAGGAGTTCTGTGCTTCGCGGATTGACGACCGAGGTGTGCTGGT +GCTGAGTGAATTTGCCGGTGCGGCGGCGGAGATGTACCGCGGCGCTCTGCTGGTGAATCC +CTTTGATCTGGAAGGCGTGGCCGATGCTCTGGAACAGGCGCTGCAAATGCCAGGCGCCCA +GCAGCAGGAGAGGATGCGCAAACTTCGCCGTTTCCTGAAGCATGCCAATGTGCATCGCTG +GGTGGAGGATTTTATGGAGGAGATTGAATCGGTCAAAGCGCCACGCTCGCGGCGGGGATG +ATTAGTGGGGACGCGACTCAGGATGGGTAATTTCCGAGGAATACGAAAAGAGGCTGCCTT +AAAGGCCGCCTCTTTTTCGTGCAGGAAAACTGATTAGGGAAAACCTTAGTTGTAGGAGGC +GATGGCCGAGGCCTCATCGTCCTTGATGTCGAAGACGGTATAAAGCTTGGTGATCTGGAG +CACGTCATGAACGCGCTTGGTGAGATTCAGGAGCTTAAGGTCGCCGCCCTGGTTGCGCAC +CGCAGTCAGAGCGCTGGCCATTTCGCCTATGCCCGAGCTGTCGATATAGGTCACATCCCC +AAGGTTGAGGAGAATCTTCTTCTGGCCCTTTGCCAGGAGATCACGGACTGCGTCACGGAG +CTGAACGCTGCCTTCGCCAAGAAGAATTCGGCCGCTAAGGTCGAGAATTGTCACACCGTC +CACCTGACGGGTTGTCACTTTCATGCTCATAAAGAGTTTCCTCCCTGGAGACTCTGTTTG +GCTGTCCCCAAATGTTTAACGAGTGTAAGCTCCGTTCCGGGATGCAGTTGTTTGAAGTGT +ACTTCATCCATGAACGAACGGATGAGAAAGATACCGCGGCCGGATCCACTCAGCAGATTC +TCAGGCTTGAGGGGATCGGGCAGGGCGGCAGGATCTAAACCTTCGCCCTGATCCATGATG +TGGATGATGAGATCGGAGCCATTGTTCTCAAAGGTGACCGTAATGCGCTTGTCAGGGGCA +TAAGCATTGCCGTGGAGAACAGCGTTGACGGCGGCTTCGCGCACGGCCATGCTGACTCGA +AAGGCTTCGTCCTCATCAATACCTGCGTTGCGGGCCATCTGTTCGGCGGTTTGTTCGACT +TTGTCAACGCTGTCGAGCGAGGATGCAAGCGTGTATGTGACGCGGGTTTCCTGTTGTCCC +CTCAAAAAGCTCTCCCTCGCATGGCGTTTGTGGCTTTGGTCTTGTCGGCAGTTTGCTTGC +TGGTGTGGCTGATGTCAAGCAATCCAGTCCCGACGAGCCGTCCAACCATTTACGGAAACC +AATATACCTGAGATGAGGCACGACGTGCGCGAAGACTGCGATGGCCTCTGGCCCAGGGCC +GTGATGCTGCTATTGAGTGATGACCACGGCTGGGACAGGGGAGCGGATTCTGATAGGGAG +CGGGCGGAATGCCTGTTAGACTCATGCCGATGCCAGAGGTGGATGCCAAGGTAACGCTGA +CCATGCTGCTCGGCACTCCGGTGACCGACGCCACGGGAAAGCTGCGCGGCAAGGTACGGG +ACGTGGCCGTGGCCACGGGCGCCGAGGCCGGCCGGGTGGCGGGGCTGGTGGTGAAGAATC +GCGACGGGCTGCAGGTGGTGACGTCGGTGGATTTGCGGCGGACGCCGAGCGGAACCCTGG +AGTTGCGAGCCGATGCGCAGATGCGGCCGTTGACCGGGGAAGAAAGCTTTCTGCTGCTGC +GGCAGGATCTGCTGGACCGGCAGATTATCGACGTGCACGGGCGCAAGGTGGTGCGCGTGA +ATGACGTGGAACTGGACTGGTGGAACCAGGAGCGGGGCGCGGCCGGGCAGCAGGAATCGT +TGCGGGTAACCGGAGTAGCGGTGGGCTTGCGGGGCGCGTTGCGGCGGCTGCTGCTGGGGC +TGATGCCGCAGGCGACACTCGACCGGCTGGCGCGGAAGGTGCCGCAGCGGAGCATTCCCT +GGGAGTTTGTCGACATAGTAGAGGTGGACCCGGCGCGGCGGGTGAAGCTGAAGATTGAGC +ATGAGCGGCTGGCGCGGCTGCATCCTTCTGACATTGCCGACATTCTGGAAGATCTGGCTC +CGGCCGAGCGCGAGGCGCTGTTGCGGAGCCTGAGCGAGGAGTTGGCCGCCGATGCGCTGG +AGGAGTTGGATCCGAAGTTGTGGCGGTCGCTGCTGCAGTCGATGGATTCGGAGACGGCAG +CCGGGATCGTGGAGGAAATGGATCCTTCGGCGGCGGCCGATCTGCTGGCGGATTTGAGCA +AGGCGGATTCGGAAGCGATTCTGGGCGAGATGGATCCGGAAGAGCGGCAGGAAGTGAAGG +AGCTGCTCGAGTTTCGGGAGGACTCGGCGGCAGGCCGCATGACGACCGAGTATGTTGCCG +TGCCGGAGGACGCGACGGTGGCTGACTGCGTTGCGGCGCTGCGGGAGTTTGAAGGGGATC +CGGAGACGATCACCGAGATTTATCTGCTGGGCGAAGACGATTTGCTGGTGGGCGTGGTTC +CGCTGGCGCGGCTGGTACTGGCGCGCGAGGAGACGCGGGCGCAGGTGCTGAGCGAGCCGG +AGACGATTACCTGCGAGCTGGAAGCGCATCAGAACGAGGTGGCCGAGCTGTTCGACAAAT +ACAATCTGCGGGCGCTGCCGGTGGTGGATGAGCAGCGGCGGCTGGCGGGCGTGGTGGAGG +CCGACCACGTGATTGCGTTTCTGCGCGAGCGGCGCTGATGGCGCGTGATATGTTCGCTGA +GGGAGCTTAAGGCTTTTAGGCCCTCTCTGAATTCAACTGACAGACAAGTGATGAGTCGAC +CCGATGCTGAAGCGCTGGCGAACCCGGATTCTTTTGTTCCTTGCGGTGCTGGGGCCGGGG +ATCATCACGGCCAACGTCGATAACGATCCGAGCGGAATTTTTGTCTATTCGCAGGCGGGA +GCCAAGTTTGGCTACGAGCTGCTGTGGACGATTTTGCCGGTGACGCTGGCGCTGATCGTG +ATTCAGGAAATGTGCGCCCGCATGGGGGTGGTGACCGGCAAGGGGTTGAGCGACCTGATC +CGCGAGGAGTTCGGGCTGCGCATCACCTTTGTGATGATGGTGCTGCTGGTGGTGGTGAAC +TTTGGCAACGTGATTGGCGAGTTTGCCGGAATTGCGGGCAGCCTGGAGCTGTTCCACATG +ACGAAGTACGCCTCGGTGCCGGTGTGCGCGCTGCTGGTGTGGCTGCTGGCGGTCAAGGGC +GATTACAAGCGGGTGGAAAAGATCTTTCTGGTGGGATCGGTTTTCTACATTGCTTATGTG +GCGACGGGCGTGCTGGCCGCGCCCAACTGGCACCTGTCCATACTGAAGACGGTGACGCTG +CCGCACCGCTCGGTGTGGCGGCAGGACGGCTACCTGTTCATGGTGATCAGCATCATCGGC +ACGACGATTGCGCCGTGGATGCAGTTCTATCTGCAGTCGTCGGTGGTGGAGAAGGGCATT +CGAGTGCAGGACTACGCGGCTTCGCGGGCCGACGTGGTGGTGGGCTCGTTCTTTACCGAC +GTGGTGGCGTGGTTCATTATTGTGGCCTGCGCGGGAACGCTGTGGGTGCATGGGCTGGGC +AAGATCAACCTGCCTTCGGACGCGGCGGTGGCGATGCGGCCGCTGGCGGGCAATTACGCA +TTTCTGCTGTTTGCACTGGGGCTGTTTAACGCGGGATTTTTTGCCGCCTCGGTGCTGCCG +ATTTCCACCGCCTACACGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCCTCGGGGCTGCCGATTTCAACCGCCT +ACACGGTGTGCGAGGGGCTGGGGTTTGAGTCGGGCGTGGACAAGAAGTTCAGTGAGGCTC +CGTTTTTCTACTGGCTTTACACGCTGCTGATTGTGGGTGGCGCGGCGGTGGTACTGATTC +CTCATTTCCCGATCATTGAGTTTTCGATCTTTTCGCAGATGCTGAACGGGATTCTGTTGC +CGATTGTGGTGGTGTTCATGCTGCTGCTGATTAACCGCAAGGACTTGATGGGCGAGTATA +CGAACTCGCGCTGGTTCAATGCCGTGGCGTGGGTGACGGCGGTGGTGGTGACAGTGCTCT +CCGTGGTGCTGATGGTGCAGTCAATCCGGCAGGTGTGAGGACTGGCCGTCCAGGCAGCTT +CTAGCTCTTAGCCTCTAGCTGTTAGCTGTTCATGCGGTTCGCTGGATGTTTGCGTTGATG +GAGAAGGCGGCCTTGGGATCGATGGGGTCGGGGATGGGTTGGTCGATGGCTTTGCGTGCG +AGTTCGAGGGCATGGGCGCGGGACTGGCAGGCTTTCCAGCCGGTTTTGATCCAGTTCTGG +AGTTCGGATTTGTCGGCGGGAGGGCCGGCGACGGGCGGACGGAGGGTTGAAGGCGGTTCT +TGGCCCTGGGTGAGGTAGGCGTCGAGGGTGGCTTCATACGTGAGCCGGTTGGAGAGATGA +CGGAGGGCCAGTAATCCCGAGTCCATGAGGGGATGCTGGGGCTGCTTTTCGGCGTGGAAC +TGCTCGGGACCGAGGGTGTCTCCGTTGTCGTACTGGACGTAGGTTTCGACGGTATTGACG +GGGGCGAAGTCCTTTTGCTTGACGTTGCTGGCGGCGAGGCGGAGGCCGTAGAGGTAGGTG +CGGGCTTCGGCAGAGGTGATTTTGCCCTGGGCGAGCGCGGTGAGGACGCGGCCGATGGCG +ATCTGGATGGAGGCGAGGTCTTCGAGGAGCGGGATTTCAATGCCGTTTTTGGGCGGCGGC +GGGGGAGGGTCCTCGGAGCGTTTGGCTGTGCCGTTGGGCGTGCGGTGATGGAGGTGGAAG +AAACAGTAGTCCTCTTTCCAGACGCGGTGGGCNCGGCATTGGACTCCGTTGGCGCGGACG +TGGCGGCATTGGTTGGGCATGGATGGCTCCTTGTTTTGGGTCGGCGCTTTGCGCCGATGC +AGTCCCACACTTTCTCCTCGAACAGCGCGAGGAGAAAGTATGGGGCACCCGGATTTTTCC +GTTCGATTAGTACTTTGTAAGTGATTGATTCGGGGAGGGTTGCGGGAGGATTAGTCTCGT +AAGTGCTGCGGAATCAGTAAGTTATGTTTTGAGTGTTGCGGACAGCAAAAAGGCCCAGAA +AAAGACCGCTGGTGCGGTCCTTTTCTGAGCCTCTATTTATAGGGTAGCAATTTGAGGGTG +AATTCCCGGCAAGAAAGTTGGCCGGTACTGGCCGCGGAACGGCTGCGGGGCCCCGTCACT +TGGCATCCTTCATGATCTGAAATAGTGAAGTAAAAGCTTCACTATTTATACTGGGTGCTA +TATGGTGAAGTTTATACTTCAGTTTTTAGAGGAGATTCGAAATGGAGAAACCGACGGTGA +TTCACAGCAGTTTTACCCTGGAGCGCAGTTATCCGTACCCGGCGGAGAAGGTGTTTGCGG +CTTTGGCCGATCCGGAGAAGAAGCGGCGGTGGTTTGCCGATTCTCCGAACCATGAGGTGG +TGAAGTTTGCAATGGATTTCCGTGAGGGCGGCGCGGAAAGGATGGAGTACCGATTTAACG +AGGGGACGCCGTTTCCGGGCGTGATGCTGGTGAACTCCGGACACTACGAAGATATTGTTC +CGGGGCGGCGTGTGGTGCTGTGTTCAACGATGACGATTGCGGGGCGGAAGATTTCGGTTT +CGCTGGTGACGTTTGACCTGCTGCCGCAGGGGGAAGGCTCGGCGCTGTTGTGCACGTTCC +AGGGAGTGTTCTTTGAGGGCGCGGATGGACCGGAGATGCGGAAGGCTGGATGGGAGTTTC +TGGTGGAGCGCCTGGGCGAGGAAGTGGCACGAGAGGGCTGAGACGCATGAGGAAGCGGAA +GCCGAAGGTGGACCGGATTTTTCATGCACTGGGCGACCCGATGCGAAGGGTGATGGTGGA +GCTGCTGCGGAAGAGGCCGTACTCCGTTTCAGCTCTGGCGGAGCCTTTGGGTATTACGCT +GACGGCGGTGGGGCAGCACCTGAAAATTCTGGAGGAAGCCGGGCTGGTGCGGACGGAGAA +GCTGGGGAGAGTTCGGACGGTTCAGCTGGAGCCGGAGGGTTTTGCCGTGCTGGAAGCCTG +GGCGGCAGAACACAAGGGAGAGTGGGCGCTTCGACTGGACAGGCTGGGAGATGTGCTGGC +GGATGACGGGTAGCGCAGAAACGGGAAGGGGAGATTTCAATCGTTTGGGGGCTGAAGTTG +CGTTGTTGACGCTGTCCGAAATGGGAGCCCTTTTCCCGGTTGGCCTGGGCTGTTCTCAGA +GTTCCTTGATGAGATTGATGACCGGCGCATAGTGAGCTGCTACCAGCAGCATGAATCCGG +CCATAATTACAGAGCACCAAATAGTTGCCCAGTACATGACTTTGTCATTGGTTCGAGTGA +TCGTGCGCATTTGCAGCCATATGCGCCCGGTCAAGGCTCCCACCAGCGCGGTGCCAAGAA +AGATTGCGGCAATGAGTACGAAAGGCCAATTCGCTCTAAATGTGTTCGCGGTGGTCACAA +GTTTTCTCCTCGGGTATCAGCAATTGTGTCCAGGCGTCACAGCTTTTGCGACTTCCTCCG +AATTTAACTCTCGTTGCCCCTTTTGTCATCCGTGCGGAATGCCTTCCGGCTAAAGCAAGC +CGGAAGGCATTCTGCTTGTTCCGGCGTCAGCGCTGGGCGATGGCGTCGAGCAGGTCAAAG +AATTCAGGGAAGGAAATGGAGGCGCATTCGGCGCCGCGGATGAGGGTTTCTCCTTCGGCG +CGAAGCGCGGCCACGGAGAAAGCCATGGCGATGCGATGGTCGCCGCCGGAATCGATTTCC +GCGCCGTGGAGGGTCTGGCCGCCAGGGACATCGAGGCCGTCTTCGAACTCCTCGACTTCA +GCGCCCATGGCGCGAAGATTTTTGACGACGAGGTCGATGCGGTCCGATTCCTTGACGCGC +AGCTCGCGGGCGTCGCGGATGCGAATGCCGTTGTTGGTGTAGGGGCCGATGGCGGCGAGG +ACGGGCAGCTCGTCGATGAGCTGGGCGGCGAGCGCGCCGGAGACGGTGGTTCCGGTGAGG +CCGTCCGGCGGGGCGTTGACTTTGACGGTGCCGATGAGCTCGGAGGCCTTGTCCTCAAGG +TCGATGACGCCGATGTGGGCGCCGAGAGCGGTAAGGACGTCGAGCAGCGAGGCGCGGGTG +GGGTTCATTCCGAGATCGTCAAAGACGAGGTTGGAGCCGGGGAAGAGCGCGGCGGCGCAG +AGATAAAATGCCGCCGAGGAGATGTCGCCCGGAACCTTGGCTTCGATGGCGGAGAGCTTT +TGACCTCCGGCGATGGTGATGGAGTCGATGGTGCGTTCGAGTTCGGCACCAAAGGCGCGG +AGGGCCAGTTCGCTGTGGTCGCGGGTGCGGACGGATTCACGCACGGTGGTGGTTCCGCTG +GCCTGCAGACCGGCGAAAAGAATGGCGGACTTGACCTGCGCGCTGGGCACGGGCGTGGCG +TATTCCATGGCTTTGAGCGGGATGCCGTGGATGACCATGGGCGCATGGCCCTCGGTGAGG +GAAATGTTGGCGCCCATTTGCATCAGCGGCTTGCGGATGCGCTCCATGGGGCGGCGGCTG +AGGGATTCGTCGCCGACGAGGGTGAATTCGCCTTGCTGGGCGGCGAGGAAGCCGGCGAGC +ATGCGCATGGTGGAACCAGAGTTTCCGCAGTCGAGCGGGGAGGATGAGGGCTGGAACTGG +CCGCCAACGCCGGTGACTTCGACCGAGCCGTCCTCGCCGCGGACGACCTTTGCGCCGAGG +GCTTCGACGCAGGCGAGGCTGCTGGAGGGATCGGCTCCGGTGGAGAAATTGGTGAAGCGG +CTGGTGCCTTCGGCGAGACCGCTGAGCAGGGCGTAGCGGTGGGAGATGCTCTTATCGCCG +GGCAGGCGCAGGCTGCCCTCAATGTTGCGCGCGGGCGCAATAGTGCGTTCTGAAACAGAA +AGATGCAAGATAATCGGCTCCTGTTAACAGGATAAAGCAGGGAGACCAGCCGGTGCTGGC +CGCACGGGAGAGCCTACGCGTTGCGTGCCAAAAGTCTGCCTAATCCGCGCTTCAGGCTTA +CTTAGGAGTGGCGCGGGCGTGGCTACGGCAGAGTGGGGAATTGCTGCTACCGGCCGCCTG +TGGGTAGATGGCAGAAATTTACGGTGGAGCGGGAAACCTCGGGGGGGGGGGGGGAATTTG +CATCTAAGTGCGCTGGAGTCATCAAGCCGTAAACCTTAAGTGCCATAAGAATCTTTTTCG +CGGAGGATTGCACTGGTGAATGTGTTGAGAAAGAGCTTTGTGGTGGCGGTGGTGTTGCTG +AGTTTTGGAGCTTCGGCTGCCGTGGCGCAGACGACGGTCGCGGCCAGCGTGTATGGGGCG +TTTCGGTCTTCGACGAGGACGGGCGGGATTAGCAATTTCACCGTTGAAAATCCGTCGAAC +GCGGCGGGATTTCTGCTGGAGCTGCGGCATATTTCGAATCCGCTGATGGGATACGAACTG +ACGTACTCGTATCATCGGGCGAATGAAGCGTATTCGAACACGTTGAAGGTGCTGTGCCCG +ATTTCACCGGGAGGGTCTTGCCCAGAGCAGATCACGACGGCTGGGGTTTCGGCGAACGCG +CAGGAGGTGACGGGAGACTGGGTGGTTTCTTTCCCGTTGGCGAATTTGAAGCCGTTTGTG +CTGGTAGGCGGTGGGGTGATAGTTACGTCTCCTGCGACTGGCAGTGTAACTGCAACAATT +ACGGATTTCGATCCGGTGACAAATATGATGTCTCAGACCACCAGCAGCATGCCGACGCAG +ACGCAGACGAAGGGTGTGTTTCAGTACGGAGCAGGGCTGGACTGGACAGTGCTGCCGCAC +ATCGGGCTGCGCTTTCAATATCGCGGCAACGTGTACAAGGCTGCGAATCTGACCAAGGTG +TTTACCTCTACAGACAAGTTCACGCAGACGGCAGAGCCGGTGGTGGGAGTGTTTTTCCGG +TTCTGACGGAGGTTGCGCGGAGGATTTAGACGACGCGGAGGGCGATGAGGGTTTCGTCGT +CGAAGCGGTCACAGTCACCCTGAAATTCCTGCAGCTCCTCGAAGACGGCGTTGACGGCTT +CTTCTGCGGGCAGGTGATGGTGACGGGCAATAGAGTCGGTGAGGCGCTCCTCGAAGCTTT +CTCCGGCTTCGTTTTCGCCGTCGGTAATGCCGTCGCTGAAGAAGAAAACGGAGTCGCCGG +GGCGCATGGAAAGGCTGATTTCCTCGTATTCGGCCTGGGGGAACATGCCGAGCGGGAAGC +CTTCGACGGGGACGGTTTCGATTTCGCCGCTGCGGCAGTAGATGGGCTGGACCGAGCCGG +CGTTGGCGATCTGGAGGGTGAGGTTCTCGTCGTTCCAGACGGTGTAAAGCATGGCCACGT +ACTGGGAGTCGAGGCGGCGCTCGTGGAGCGAGGCGTTGAGAGCCTGCAGCATGGCCGCGG +GTGAGGGTTTCAGGCTGGCCTGGGAGCGCATGGTTCCGCTGACGACAGCGCCGTAAAGGG +CGGCGGCGGCGCCTTTGCCACTGACGTCGCCGATGGCGAGCGCGCCGCGCTGATCGTCGT +AGTTGAGATAGTCGTAGAGGTCGCCGCCGATGGTGCGCGCGGGCAGGAAGCGGGCGGCGA +ATTCGGCGTGGGGGCGGCTGGGCAACGAAGGCGGCAGCAGGCGGAGCTGGACTTCGCGGG +CCATTTGCAGGTCGTGGTCAAGACGCTGTTCCTGCTGGATGACACGCTGATAGAGCTGGG +CGTTCTCAATGGCGATGGCGATTTGCGCGGCCAGGGTGGTGAGCGCACGGACGTGATCCT +CGCTGAAGTAATGGGGTTGGGTGTGTTCGACGTCGAGGACGCCGACGACCTTGCTCTTGT +ACATGAGCGGGACGGCCAGCTCGGAGCGGGTTTCTTCGTTGACCTTGTGGTAGCGGGGAT +CGCGGCGAACATCGGGGATGTTCATGGGGCGGCGCTCGGCAATGGCGGCGCCGACAATGC +CGCGTTCGACGGGGATGGTTTCGTTGGGATAGTAGCGTTCGCCAAAGCGAAGCGCGAAAC +GGTTTTCGAGCACGTGCTCGATGGGGCGGACGGTCCAGATGGTGAACATCTGGTAGTCAA +AGAGACGCCGCAAGAGCTGGCTGATGCGCTCGAAGAGACGATCGGTGTCGAGGATGGAGC +TGAGGTCGCGGCTGATTTCGTTCAGGACTTCAAGGGTTTGCGCCTGGCGGGAGACGCGGG +TGTAGAGGCGCGCGTTATCAATGGCGTGGGCGATGCGCGACGCGGTGAGAGTGAGCAGGT +GGAGGTGGTCTGGCTGGAAGTAGTTGGGCTGCTCGGACTGAATGTCGATGACGCCGATGA +CGCGATTCTTGACGATGAGCGGAATGGCGATCTCGGAGTGAACGTCGGGGTTGGCGTTGA +TGTAGTTTTCGACGTCACGGACGTCGGGGATGAGCATGGGCTTGCGCGTGAGTGCCACCT +GGCCGGTAACGCCCTGGCCGAGGCGAATGCGCATACGCTCGACCTCGGGCCGGTGGCCGG +TCTGGAAGCGCATGCGAAGGTCGTTGGTGCGGTTATCGAGCAGAAGGATGGCGAAGATGC +GGTAATCGATCACCGTGCGGACAAGGTCGGCGGTGCGCTGCAGCAACGTCTGGAGATTGA +GCGTGGTATTGAGGGCGTCGGTGAGCCGCAGAAGGAAGTCAGTCTGGGCCAGCTCGACGG +AGCGCGAACCTGCAGCCGCAGCCGCATCCGTGGGGTGGCTGCCCTCAATGGACGAAGAGT +GCTGTTCCGGGGTGGTCTGGGGGCGCGATTCGTACACGGTTCTCCTGTTGAACTGCCATG +ATAGAGCATCTTCGAGGCTTCTGTAGAGTGACCGGGGATAGCCCTCCCTGGACGCGGATC +AAGGGAAGGGCCACATTTATTTTGGAACAGCTGGGTTTGCCGCTGGAATTTTTCTGAATG +CTTTGTTCCCGTGGCGATAAAAGCTCTGTCCCCCACGGCGCAAAGTTGCCACAAAAAAGA +CTGGCCGCTGCAGGGGTCAGGATGCGGGGATGGAATCGAGGATGCGGATATCGGGGAGAT +TGCGGTAGCGTTCAGCGTAGTCCATGCCATAGCCGACGACAAACTCATTGGGGATGCTGA +ACCCGATGTAATGGCCCTGAATGGGACGGATGCGGCGCGAAGGCTTGTCGAGCAGGGCCG +CGACGCGAATGGAGCGCGGGTTGTGCCGTTCAAAGTGGCTTTGCAGAAAGCTGAGCGTGA +CGCCGGTGTCGAGGATGTCTTCGACGACGATGATGTGACGGCCTTCGATTGGTTCGGTGA +GGTCTTTGATGAGTTGGACGGCGCCGCTGGAACGGGTTCCGGACTTGTAGCTGGAGACGG +CGACGAAATCAAAGGTGCAGTCGAGGTTGATGGCGCGGGCCAGGTCGGCCAGAAAGATGG +CCGCGCCCTTGAGGACGCCAACCAGCACGACGCTTTCGCCATGGAGGTCTTCGCGGATGT +GGCGGCCCATTTCCCGGACGCGGGACTGGATCTGATCGCGAGTGAAGCGAATGCGAAGAC +TCTCGGCGGGCGGGAATTGGGTTTGATCGGGCATGATGTTCGAACTCAGGTTTCCAGATT +ATCGCGAAAAGCATGAGGAAAAAGCTGTGGTGGCCAAAGATCGGTGATGCCATTGAGCGT +TTCGTGCGTCCAATACGAAGACAGCTGTCGTTACTATCTATGTATCCATTTATTCATATT +GGTCATTTCACTATCCCCACATACGGCATCATGATGTGGCTCGCCGCCGTGGCTGGATGC +ATCGTCCTTTACAGGAACTTCAAGCGCTGGAAGGTGGAAGGCGATGCCATCACCATTGTG +GCGTTTGCGACGGTGATCGGCATTATCGGCGGAAAACTCTATCACGTGCTGGAAAAGCCG +GTGCTGCTGATGCACCATCCCGCGCTGTTGATCAGCCGGTCCGGATTTGCCTGGTACGGC +GGGATGATTGCGGGGATTCTGGCGCTGTTGTTTCAGGCCGGAACCTATAACATTCGTCCC +CTGCGGATGCTGGACCTGTGCGTGCCCTCGGCGGCGCTGGGTTACGGGATCGGGCGGCTG +GGATGTTTCTTTTCCGGCGATGGCGGCTATGGTCCCCCGACCAAGATGTGGTTTGGCATG +AGTTTCCCACATGGGACCGTGCCCACGACACAGAAGGTGTATCCGACGCCGCTGTTTGAA +TTTGTCGCCGCGGTCATTATTTTCTATATTTTGTGGCGGCGCAGCCGTCCAGCAGCGGAG +CGGAAACTGGGGCACATGACGGCGGAATACCTGCTGTTGGCCGGCGGAGCGCGCTTCCTG +TTCGAATTTATTCGCATCAATCCGAAGATCTTTCTTGGGTTGTCGAATGCGCAGTGGGCG +AGCATCGCGGAAATGCTGGGTGGAACCGCCCTGCTGTGGTGGTCACGCAAGTATGCTTCG +ACTCCGCAACCCGGACAGCAGGGAAGGCAGCCGAAAGAAGAGCCGGCGCTGGTTGCCGCA +GGAGATTCCGGTGGGCCGCCTACAGCGGAACAGACTCAATAAATCGTGGAAAAACGACAG +ATTTTATTCAGGAAACAGCTCTTCCCAATGGAGGGGCTGTTTTTTGCGTGAAAGCGTTCA +GGGCAGACTGCGCTGGAGGCTGGAAACGGATCTGTCGTGGTGGAGTTAGAGCGGAAAAAG +TGCATGGGGGCGAAAAGCGAGAGACGGAGAATTTGCGAGCGTAGGAGCAGCGTTTTCGTC +TATATACTCTAGATTACGGTTTTCCATGTATCCATTTATCCATGTTGGTCCCTTAACGAT +TGGCACATACGGCATCATGATGTGGCTTGCCGCGGTGATCGGGTGCTACCTCCTTCACAA +GAACTTCAAGCGCTGGAAGATTGATGCCGATGCGATCAGCATCATCGCCTTTGCGACGGT +GGCTGGAGTGGTGGGCGCAAAGCTCTACCATGTGCTGGAAAAGCCAGTGCTGCTGATGCA +TCATCCAGCGCTGCTGATCAGCCGGGCCGGGTTTGCCTGGTTTGGCGGGATGATTGCCGG +GATTCTGGCGCTGCTGTTCCAGTCCGGGACCTATAAGATTCGTCCCCTGCGGATGCTGGA +TTTGTGCGTGCCCTCGGCGGCGCTGGGGTACGGAATCGGGCGCATTGGATGCCAGCTGGC +CGGAGACGGGGATTACGGCATTCCGACGAAGATGTGGTTCGGGATGAGCTATCCGCACGG +AATTGTGCCGACAACACAGAAAGTGTATCCGACGCCGATTTTTGAGTTTATCGGGGCCTG +CATTATCTTCTATATCCTCTGGCGGCGGAGCCGGCCGGGGACGCCGCGGAAGCTGGGACA +GATGACGGCCGAGTATCTGCTGTGGACCGGCGGGGCTCGCTTCCTGGTGGAGTTTATTCG +CCGGAATCCGAAGATCTTTCTGGGCCTGACCAATGCGCAGTGGGCGAGCATCGGGTCGAT +GGGAGCACGCCGACACCGCCCGCCTGGCCAGGAAGCTGGTGCGGCTGGCCCAGGACGCGC +CGGTCAGCTTCGATCCCGACACCTTCAAGGTAACCCCTCCCGACCCAGCCGTGATCACCC +CGGTACTGGCCCAACTGGAGTTCAACCAACTGCTCAACCAGTTCGCAGCCCCACCACCCA +AGGCCGACTACCGCCGCCTCAGCGACCCGGAGGAGATCGAAGACTTCCTGAAGCCCGTGG +CCCGGAAGAAACGCTTGGCCATCGACACCGAGACCACCTCCATACAGCCCATGCTGGCCG +AGTTGGTGGGCGTCTCCCTCTGCCATCAGGCGGGCGAGGCGGTCTACATCCCCGTGGCTC +ACAACCTGACCCCCGGCCAGAGCCAGGCCGACAAAGAAGCCGTGCTCCAGACCCTGGCCC +CGGTGCTGGCCGATCCGGCCGTCACCAAGATCGGCCAGAACATCAAGTACGACCTCATCG +TGCTGGGCCGCTGCGGAATGGAGATCAACGGGCCGCTCTTCGATACCATGGTGGCCAGTT +ACCTCTTGAACCCCGGCAAGACCAGTCACAACTTGGCCTCCATCGCCGCCGAGTTCCTGG +GCCGCTCGGTCATCTCCTACCAGGAGGCCACCGGCGGCAAGAACCGCCCCTTCGCCGACA +CCGACCTGGACCAGGCCACCGACTACGCCGCCGAAGACGCCGACGTGGCCTGGCAGGCCG +CCCAGGTGCTGGAGAAGAAGCTGGCCGAAAGCCACCTGGACGGGCTTTTCCGCGACCTGG +AGATGCCCCTGGTGCCGGTGCTGGCCCGCATGGAGCGAAACGGCGTGGGCCTGGACGTGC +AGGGCCTGGAGGATCTGGGCAAGGAGCTGGCCGCCAAGCTGGATGAGATCGAGCGTACCT +GCTACCGCCTGGCCGGACACGAGTTCAACCTCAACTCGCCCAAGCAACTGGCCCAGGTGC +TCTTCGAGGAGCTGGGGCTCACCCCGGTCAAGAAGACCAAGAAGGGCAAGACCTCCTCCA +CCGACGTCTCAGTGCTCACCGTCCTGGCCGCCAAGCACCCCCTGCCGGCCGAGGTGCTCA +ACTACCGCACCCTGAGCAAGCTCAAGTCGACCTACATCGACACCCTGCCCAAGCTGGTGA +ACCCCCAGACCGGCCGCCTGCACACCTCCTTCAACCAGGCCGTCACCGCCACCGGCCGCC +TCTCCTCCAGCGACCCCAACCTCCAGAACATCCCCGTGCGCAGCGAGATCGGCGAACGCA +TCCGCGCCTGCTTCATCGCCGAGAAAGGCAATCTCCTGGTCAGCGCCGACTATTCCCAGA +TCGAGCTCAGGGTCCTGGCCCACCTGAGCCGCGACCCGCTTCTAGTGGAGGACCTCACCA +AGGGCCTGGACGTGCACACCCAGACCGCCGCCCGGCTTTTCGACGTCATGCCCGAGCTGG +TCACCAAACCCATGCGCGCCCGCGCCAAGACCGTGAACTTCGGCATCCTCTACGGCATGA +GCGCCTTCCGCCTGGCCCGCGAACAGGGTATCAGCCGCAAGGAGGCCCAGCAGATCATCG +ACAAGTACCTGGGACGCTACCAGGGCGTGGCCCGTTTCCAGGAGGAGAACCTCCGCCAGG +CCCGCGAGAAGGGCTACGTCACCACCCTGCTGGGCCGCCGCCGCTACCTGCCTGCCATCA +ACGCCGGCGACCGCCTGGCCCGCCAGGCCGCCGAACGCATGGCCCTGAACACCCCCATCC +AGGGCACCGCCGCCGACATCATCAAGCTGGCCATGCTCGCCGCCCACCGCCTCCTGGAAG +AACGCTTCCCCCAGGCCCTCATGATCCTTCAAATCCACGACGAGCTGCTCTTCGAGGTCC +CCGCCTCCCAGGCCGAGGACCTGGCCCAGGCGGTCAAGCAGGCCATGGAGGGCGTCATCG +AACTCGCCGTGCCCCTGGTGGTGGATATCGGCATCGGCCCCGACTGGGCCCAAGCCCACT +AGGCTAGGCCTAGAGCAATATTTAGAACGGGCGATCTTCTGACTGGGGCAGCGTTTTTTA +GGGGTATCGCCGGATGTCGCGCGCCACGCGCCACGGTGTCCCCCGCTAGGCGGGGGGCAG +TGGGGGGAACGGGCGCTCTTCTGTTTGGGGTTACGCTTTTTAAGTGTATCGCCGGATGTC +GCGCACCACGCGGGGCGTCGACCCGAAGGGAGCCTATGACCTTACGGCGAGGAGAAACCG +CGCCAGCGATTTCATCCAGGTTTCTTTGAAGGGGACGGGGACACCGCTACTGGGGCGGGT +TCTTTCCGAGCTAGACCTCAAAGGCCCATCCCCGGCCAGACACCGTCCCGCATCCCTCTA +TTGCTCCAGGCCTAGCCTGGCGGGGAACACTTTCTTTTTTAAAAGAAAGTGTTCCCCGGC +GGTG +>NODE_8_length_21143_cov_115.55_ID_15 +AGACTATAGAATACTCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCACCAGGA +AACAGCCTAGGAACACGGACTCACGGCCTGTTTGCCATCGGGCGAGACCATGTAAACAGT +CCCAGCCAGTCGACAAGAGGTCCCATTGAGTGGGTTGTAGCAGACACCGAACGGCTGCCA +GTCGGCAGGATCCATGTCGTTGAAGAAGAGTTGCTCGTCGGTAGCGCCCCACTGGACCTG +CGCGCCGAGCTGGGTGTCCCAGCCCCGGCTTTCCCAAATGACTTGGCTCTTGCCGGTGTG +CAGATCGACCAGAACCACTTCTGCCGCTTCGCCGGGCTCCGGCAGGCGTTCCGGCTGGTA +ACGGGTGACAGCCAGGTAGCGCCCCGAAGGACTGAGCGGCGAGGTGTCGAAGAAGCGGTG +GAACGCGCCGGGACACTGCGGTGTGAGGTCCCAGACAGGGACCTGAGGGTCGTACTCCGT +GTAACGGGGGAAGTGGCGGTCCAGCAACGCGTGATCCACGGGCATTTCTCCTTGTGCAAT +CGTGCTCCCATGACCGTAGTGCCGCGATGCAACCGTGCCAAGGGGAATCGCCGCCCGGGA +CCGGGCAGGGAGCGAGAACTTGCACCTTCAGTGATGCGAAGCGATAGTCCAAATCTCGAT +TTGATCCCCCAACCAATCGCAGAACTAGGAACATGCCTGTGAAGAACGTCCTTCTCCTCA +CCCTTGCAATCGGAGCTGCTGCCATGGCTCAGCCATACGAAGGTCTTGTCGGCACCAGTG +GGCGTGTCCAGGTAAACCGCAATGGCCGACCCGCATTCTCGATCTCAACTGGAGCCTTTG +CCGATGGCTGGAGAGGTGCGTCTCTGACGCCGGCGAAGGTGGGAGAAACGACGGACGGAG +TCTGCCTCGGGAAGATCTCCCTACCCGACAAGCTGACCATTGCCTCTGCCCTTCGGGCCA +CCGCGGCCGGGAAGGCGATGGAACTCCGCTATACACTCACGCCGAAAGCCGATGCCAAGC +TGAACAGCCTGCATGTGTCCTTCGGACTGCCAGCGTCCTTCCTGAAGGGTGCCTCCTACA +CCATCGAGGGAGAGACCAAAGAGGTCCCGGCCGTGCTTGGTGCCACTCACCTCCGGGCCG +GGGACCATGTTCCCAGCGTTCGGTTCACTTGGCCTAACGGCGACTGGCTACAGGTGGATA +TTCTCTCCAAGACCCCTGTTCTCTTCCAGGACAACCGTCAATGGGGCGACAGCTTCGATC +TGCGTCTTGGCCCGCAGATGGTCCCGGCCCAGACGCTTCCGGCCAACCAACCGGTCGAGA +TTGCCATGCGCGTGAGCGCAAAGGACGGCATGAAGCTGGACTTCGACCGGCCGGTCACGA +TCACGGCCGGCAAAGACTGGGTGCCACTGGATCTGGAGCTGGACATCGAGCCGGGATCGG +CTCTGGACTTCTCCGGCCTGGGACAGTTCGACGCGCCGTCCGGCAAGCACGGCTGGCTGC +AGGCGACGCCCGACGGCAAGTTCGCCTTTGCCGACAGCCTGGACACGCCGCGCCGGTTCT +ACGGGGTCAACCTCTGTTTCACCGCGCAATACCTCTCGCACGACGAGGCGGAGCGGCTGG +CCGAGAGGTTCCTGCGCCTCGGCTACAACACCGTCCGGTTCCACCACCACGAGTACCCGC +TGATCGACCGCAAGAACGGCTGTTCCACCGACCTCAAGCCCGAATCCATTGACCAGCTCG +ACTACCTCTTCGCCCAGTTCAAGAAGCGGGGCATCTACGTGACGACCGACTGCTACGTCT +CCCGCCCGGTCTACGCCAGCGAGATCTGGGATGGGGCCAAGGGCAACGTGGAGATGAACG +AGTTCAAGATGCTCGTTCCGGTCAACGAGCGGGCGTTCGAGAACTGGAAGACCTACAACC +GCAACTTCCTCACCCATCGCAATCCTTACACGGGCATGCGCTACGCTGACGACCCCACCC +TCGCCTGGCTGTCGATGATCAACGAGGCCAACTTCGGCAACTACATCCGCAGCGTGTCGG +ACCGGGCCCGGCCCGACTGGGAGCGGGCTTGGGGCGCGTGGCTGAAGGCACGGTACGGCT +CTGCCGAAGCCATCACGAAGGCTTGGGGCAGCACCTTTGACGGCGATCTGAGCAAACCGA +CGGCTAAGCTCGCCAAGTCGTTCACCGACGACAACCGGCAGAGCCGGGACTTCGCGGTCT +TCCTCGCCGATACCGAGCGGACCATGTTCCTGAAGATGAAGAAGTTCCTGCGCGAGGAGA +TCGGCACGAAGGCGATGCTCACCAACATGAACGGCTGGACGAACACGCCCCAAAGCCAGC +TGGCCCGGGCTGAATTCGACTACGTGGACGACCATTTCTACGTGGATCATCCGCAGTTCA +TCGAGAAGTCCTGGCGGCTGCCCTCCCGTTGTCCGAACACCAGCCCGGTCTTGGCGGGCG +CTCCCGGCGGGCGTGGCACGGCCTTCAATCGACTGATGAACAAGCCGTTCACGATCAGTG +AGTACAACTACTCCGGCCCCGGCCGTTACCGTGGCGTCGGCGGCATCCTGACCGGCTGCA +TGGCCGCGCTCCAGGACTGGTCGGTCGTCTGGCGCTTCGCCTACAGCCATCGCCGTGAGA +ACGTCTTGAAGCCGAGCACGGCCGGCTACTTCGATATGGCCACGGACCCGCTGAACCAGG +CCGCCGAGCGCGCCAGCATGTGTCTGTTCCTGCGCGGGGACCTGGATCCCGCTCCCCGAA +GCGCAGCCATTACCCTCAATCCAGAGACGTTGGAGAAGGGCGACTCGCACCAGGGACGCA +CGCCACCGAGCTGGGATGAACTCGTCCCGGTCATTCAGGTGGGTACGTTCCTCGGCGACC +GCCAGAGCAAGGTCCCGGCCGACATTGCCCTGCCTACGACCGATGCCGCACCGGCGGCGG +CCGACGTGGTCATGCCAAAACCCTACGACTCGGGCAAAGGCTCAGCCATCCTCAAGGAAC +TGCGCGCGAAAGGATGGCTGGACGCTGCCAACAAGACTGATCTGGACCGCAAGCGCGGGC +AGAGCGCGAGCGACCAGTTCCTCATGGACGGCGAGAAAGACATGATGGTCCTGGACACGC +CCCGGACGGCAGGTGGCTACGCCGAGGCGGGCCAGACCATCCATACCCAGGCCGCAGACT +TCTCGATCCTCGACACGGGAGCAACCGTCTGGATCTCCAGCTTGGACAAGCAGCCGATCA +CAAGTAGTAAGCGGCTGTTGCTGACCCACCTCACCGACCTGCAGAATACCGAGGTCCGCT +ATGCGGAACGGGGTCGGAAAACGCTCCTGGCCTGGGGCAAACTGCCGCATCTGGTCCGGG +TGGGAGAGGCCAAAATCTCCCTGCATCGCAGCGGTGCGAAGCTGCCGAAGGTCTATGTGC +TGGCCACGAGCGGACACCGCCTCGGCGAGGTCCCCGTGACCAAGGGCAAGAACGGCACGC +TTGAGCTCGCCATCTCGACCAAGGGCGAAGCGGGTGCCCAACTGATGTACGAACTGGATT +TCCGGTAGCTCTCTCTCGGAGAGTCGTTGCGCCCCGGCCGACAATGGACCTCGGCCGGGG +CGTTTTCGTCTGGCACCAACCGGGGCTCAACCAGCAAGCGTTTCCCAGACGAAGGGAAGG +CGCGGCTGGCTTGCCGGCCACCGTCCTGCCTGCTTGCTTGCCCCTTGGCGGCAACGCTGG +CGGAGTGTACCTTCTATGGAGAGAAGTTCCCTCGCGCTGCCATGTTCTGGCGGCTCTTGC +CTTCACGCCGCAGTCAGGGCCCAGATGACCATGCAACGGACTGCCTTCAGCTTGAGCATT +GCTCTTTCGGCCGTCCTTGCGTCGGCCTTGCTGGTGGGAATTGTGCCTCTGTTGCGAGGC +GGCGACAACTACGAGCTTGCCGTGAAGTCGGCGCTTCTCGTGCTCGTGGTCGGCTATGCT +TGGCTGCATTGGCGGGCTGTGACGCGGCGCGGCCGGTGCCTCCTGAAGGCAGCGTTCTGC +CTCAATGCGCTTGTGCTCGTCCCGCTTGGAGCTACGGCTGCCTTGTGCCATGTGTTCGGC +GGACCCAAAATCGTGCCACGTGAGGCGGGCGCAATAGGTGGCGCGATCGCGATTCTCGCC +GCTGCAGCTGCCGCCATGCAGGTCATCCTGCTGACGCGTTGCCGCACGATCGAACTCACT +GTCCCAAAAGGAGACTGAGCGTGTATTTCAGACAATACAAAGTCGAGGGGTTGGGCTGCT +ATTCCTACCTGATCGGCTGTCCGGCAGCAGGAACAGCCTGTGTGGTCGATCCGGAACGGC +ACACCGGGCAATACATCCAGACCGCCGAGCACCAGGGCCTGAGGATTACGCACGTATTCG +ACACGCACCTGCATGCCGACCACATCACCGGCAGCGCCGAGCTCGCTGCCGCTACGGGGG +CCACGATCTGCGTCCATCCTGCCATCGGGGCGGAGTACGAGCACGAGGACCTGCTGGACG +GGCAGCATTACCGGTTCGGCGCGGCTGAACTCGAGGTGGTCGAGACGTTCGGCCATACGC +CGAACTCCGTCAGTCTAGCGCTGACCGACCATGGCCGGTCCGAGGATGTGTTTGCCCTCC +TGACCGGAGACCTGCTCTTTGTTGGCGATGTCGGCCGTCCCGACTTGGCGGGCGCAGACC +TGCTCGAGGAACAGATCCACAACCTATACGAGAGTCTCTACACCAAACTGGGGCGCTTCC +CCGACTGGACCGAGGTCTATCCCGCCCATGGCGAAGGTTCCCTTTGTGGCAAGGGCATGA +GTGCCAAGCCCATGACCACTCTCGGCTTCGAACGCCTAAACAACCCGTTGCTTGCTGATC +TGGAGTTCGCGGAGTTCCACCGCATTATGACCGAGGCGTTCCAGGTGCGCCCCGACAATT +TCGCGGCCATCGTGGCCAAGAACCAGCGTGGCCCTCAGCTTCTGCGGGAGGCACCCGCTT +TCATGGAACTGTCCGTGCTGCAAGCGGAGCGCGCCCTGGCCGCAGGGGCACAAATCGTCG +ACACCCGGGCCCAGAGTGCCTTTGGCGCCGCATTCCTGCCCGGATCACTCAACATCGGAG +TCAGTCCCTCTTCGGTGAACTGGCTCGGCATGCTTGTCCCCGCCGACACGGATATCATCA +TTGTGGCCGACAGCAAGGACGTTGCCTGCCAAGTGGCAGACCAGTTCCGGCGAGCGGGCT +ACGACCGCCTGATCGGCTATGTTCCCGACGGCGTGGCCAGCTGGGCTCTACAGGGCAAGC +CGATGGACCACCTGCCCCAGCTAACGCCCGCCAGCCTGAAGCATGTTGTTGGCAGGTACG +GCAACCACGTGATTCTGGACGTGCGCACCGATGCCGAATGGGCCACCGGCCATATCGAGG +GAGCCATTCACCTCCCCCTCCCGCGACTGGTCAGAGAGGGCATCGATCTCGGCAAGGACC +GCCACATCACCACAGTCTGCCGCTCCGGGTACCGCTCCAACGTCGCTGGCAGCTTCCTGA +AATCCCAAGGCTACGAACATGTGTTCAGCCTGATCGGTGGCATGACCGCCTGGCAGGCCG +CCAACCGTTGACACCGGTACCAGCCGGGTCCTTTGCCCTCCTTTCCCCATCAAGCAAAAA +CCCCCGCCTAGGCGGGGGTCAAAGCGGTTCCTCAGGGGAACACGGAACAGTTCTGGTATG +CCCGGCGCGATTCGAACGCACGACCTTCTGCTCCGGAGGCAGACGCTCTATCCAGCTGAG +CTACGGGCACCCATGAAAGGGAATCATACTGTACAGCTTGCCCAACCTAACGCAAGTGGT +GCAGCTCGCCGTTTTCAGGCGTCCCCGTCCAATTCGAGCTCACCGAGATCCATTTCGGCG +ACCACCGAACGCCCAACCAGTTCGACGTTGACGGTGACCCGGGTCTTGCCCTTCCGCTTC +TCCACAACCCCGGTGATGCCCGCCAGGGGGCCATCGGTGATGGTGACCTGCGTGCCCGGC +TTGATATCGGGCATAACTTCCAGGTCCACCGTCGCCTGGGCGGTCTCCAAGTGCTGCAGC +TCGGTCAGTTCGGCGACCAGCGTCCTCTCCTGGGCCTCCCTGACCTCCACAATGTGGACA +ATGCGATGGCACTCGAGGAAGGTGGTGCGGGTGTCGGGGCCGAGCTGGACAAACACGTAG +CCGGGGAACATGGGCAGGTAGGTTTCGACCGTGCGGCGCTGGTAGCGTTTGCGTTGGCGC +AAGAGAGGCAGGTAGCAGGGAATGTCGTGCCGCTTGCAGTAGTCGTCGACGACCTTCTCG +GTGCGGGGACGGCAATAGGCGGGATGCCACGTAAGCCCGGGCTGGCTAGCTATGGAAATG +GACATGGTAAGAGGAATCAGTCGAGAAAAAGTTCAGGGGCGATGCGTTCCACAAAACTAC +GGCAGAGCTGGGTGACTTCCTCAGCGCTGCCGCAAGCCATGGCTTGGTCCACCAATTCCT +CAGCTTCCAGCATATTGATATCGCGTACCAGATCCTTGACGATCGGGATGGCGACGGGAC +TCATGCTCAATTCGTGGATGCCAAGGCCGAGGACCAACGGCACGAAGAGGGGTTCTGCGG +CCATCTCGCCGCAGATGCCGACCCAGCGCCCATGCTCGTTGGCAGCACGGACGACACGAT +CCAGCAGACGGACGACGGAGGGATGGCCGGGTTGGTAGAGGTACGAGATGTCGGGGTTCG +AACGGTCCACAGCCATCATGTACTGGACGAGGTCGTTGGTACCGATGCTGAAGAAGTCCA +CGTGCGGCGCCAGCTTATCCGCGATGAGGGCGGCGGCAGGAACCTCAATCATGATGCCAA +CGTCGAGGTCCGGGTTGTGGGGAATGCCCTGGCGCTCGAGCTCCGCCTTGACGTCGTCGA +GAATGGCCAGCGTGGCGTGCAATTCCTCCATGGTGGTGATCATGGGGAACAGGATGCGGA +CGCCACCGTGGGCGCTGGCGCGGAGGATGGCCCGAAGCTGACTGCGAAAGACGTCTTCGC +GTCGCAGACAGAAGCGGATGGCCCGCATGCCGAGGAAGGGATTGATCTCCACCGGTACGT +CGAGATGGGAGAGGAACTTGTCACCACCAATGTCTAGCGTGCGGAAGACGACCGAGTGGG +GAGCAACATCCTCGGCGACCCGGCGATAGGCCGCGTACTGCTGCTCCTCGTCGGAGATGC +TGCCGCCCTTGACGAAGAGGAACTCCGTCCGGAAGAGCCCGATGCCGACCCTGTGCACCC +GACGAATCCGTTCCACTTCCTCGGGCAGTTCGACGTTGGCGGCCACGCTGACGTGGAAGC +CGTCCCGTGTCTCCGCGGGAAGCAGGGCGTTGACCTCCAGTTTGGCGCGCCACGCTTTCT +CCTGCTCGATACGTTCATGGTACTTGACGAGAGTGGCGTTCTCGGGACTGACGACAACCC +GTCCTTGGGTGCCGTCCACGACCAGCCTGACCCCGTCCCCGACTTGGGCCAGGGACTCGC +CCAGCCCCACAACGGCCGGAATGCCCAAGGCACGGGCCATGATGGCGGTGTGGCTGGTGC +GACTGCCCATGGCGGTAACAAAGGCGAGAACGTTGTCGCGATCCATGCCTGCCGTATCGG +AGGGCGACAGATCATGGGCGATAATGATGCAGGGCTCGGGGAGTTCGGAGAGATCGACAC +TCTCGCTGCCCAGCAACTGACGGAGGATACGTCCGGCCAAGTCCTGAATGTCGTTGGTGC +GCTCGGCAAAGAAGGCGCTGCCAGCCTTGACGAGCTGGTCGCAGAACTCATCGACGTAGT +TCTTCAGCACGAATCCCGAGTTGCGGCGCGTGTCTCGGATACGGTCGCTGACCTCCTGGT +CGAGCATCCCGTCCTCGATGACCATGACTTGCATCTCCAGGATATCGACCAAGTCTTGGT +TCTTCTCGTCGCGGGCGCGATCGCGCAGTTCCTCCAGCTCCCGGCGGGATTTCTCGAGAG +CCGCACGGAACCGGCCCACCTCGTCCTCCAGCTGGGAGACGGGAAGTTCATGCTCCCGGA +CACTGACCACGGAGGCGCCAACCACGACGGCCTTGCCGATCACGGTACCGGGGGACACCC +CCGTTCCGGCCAACACGGTCTCGCCGGGTTCATTCCTCGCCGAACTTGTCCGAGAAGAGT +CGCTCAAGGGCCTCTATGGCCTCCTCTGCGTCATGCCCTTCGGCGGTTATGGTCAGCTCC +GTTCCCTTGCCTGCGGCAAGGAGCAGCAGAGCCATCACGCTTTTGGCGTCACTCACGGTT +CCGCCCCGTTCGAAGGCGATCCGGGACGAGAAGGTCTTGGCCAGCTTCACGATCAACGTC +ACAGGACGGGCGTGCAGCCCCAACTGGTTCTGCACGATAACCTGGGTTGTTCTGGTCTGT +AACTGTGCCATCGGAGCCGAGCCGGGGGTATCTGTACCTCGCTCCCCCGTGTCTATCGCT +AGTGCCTGGAAATATAGAAAGAAAGAGCCCCGCCAAGGGGCAAACCGCAGCAAGAGCGTG +ACCGAAGTCCCTACCATCACTTGCGGTACCGAGTACTATAGGATTGCGCGGAGGAATGTC +CACCAACCCACGATGTTGTCGCTGGTTGGTCCCTGGTGAGAGAAAAGGCGCGGCGGCACC +CACCCGAAGGCAGGCGCCGCCGCGCCAGAGAGAACTAGGCTCGGTGCTAGTACCGGGGAC +CCCGGTCGAAGCCGCCACCGCCGCGACGGGGGCCGCGGTCGGAGCGCGGACGCGCCTCGT +TGACCGTCAGGGCACGACCGTCGAGATCCTGGCCGTTGAGGCCCTCGATCGCGGCCTTGG +CCTCGTCGTCGTTGCCCATCTCCACAAAGCCGAAGCCCTTGGAGCGGCCGCTGTCGCGAT +CCATGATCACGCGAGCCGAATCAACGCTACCGTATTCTTCGAACGCGGTACGGAGGGCGT +CGTCGCCGGCGGAGTAAGGGAGGTTGCCAACGTAGATATTCATCGTGTGGACTGTTCCTG +ATTCATCTAGGAGTGAGGACTCGGTGCCGCCCCGTTTTGGACGGCACACGAGACCGGGGT +CAACTGATTCTGGTCTGGCCTAGTACCGGGGACGACGGTCGAAACCGCCACCGCCACCGC +CACGACGGGGGCCGCGGTCGGCGCGGGGACGCGCCTCGTTGACGGTGAGAGGACGGCCGC +CGAGATCCTGGCCGTTCAGGCCCTCGATGGCGGCCTGGGCCTCGTTGTCGTTGCCCATCT +CCACGAAGCCGAAGCCCTTGGAGCGGCCGCTGTCGCGGTCCATGATGACGCGAGCGGAGT +CAACGGCACCGTATTCCTCGAAGGCGGTACGGAGGTCATCGTCGCTGACGGAGTAGGGAA +GATTGCCGACGTAGATATTCATGGTCTGAATGAATCCTGATTCGTTCTGGACTGGTGGTC +CGGTGCCGCCCAGATGGTTGCGGACAGCACCGAGCCGGGGTCAATTCTGGTCTCGACTAG +TACCGGGGACGACGGTCGAAACCGCCACCGCCACCGCCACGACGGGGGCCGCGGTCGGCA +CGAGGACGAGCCTCGTTGACCGTCAGGGGACGACCGCTGAAATCCTGGCCGTTCAGGGCC +TCGATGGCGGCGTTCGCCTCGGTGTCGTTGCCCATCTCCACGAAGCCGAAGCCCTTGGAG +CGGCCGCTATCGCGATCCATGATGACGCGGGCGCTGTCGACGCTGCCGTATTCCTCGAAC +ACACCGCGCAGCTCGTCGTCATTGACGGAGTAGGGAAGGTTGCCTACGTAGATATTCATC +GGGGGGAGATCCTTGCTCTCGCCAAATGGTACTCGTACGTCCCGGCGTCCACTTTGGCGA +CCATGGACAGGCGGGGGGAGTCCGCAGACTCACAGAAAGGGAATTGTGGAGGGAAGATGG +GTAGAACCCGTCTTGCAGGATGCCTGAGCGACCTGCGTTGCTCAAGGGTCGCTGCTGCAG +AATCCCGCGAGAGCCATCAAAGCCTCGCGATCACTGTGTACACCAACCGGCGGTCAACAA +CATCTCGCTGCACTGTCTGTCCGCAGGGCAGAACAGCCAGCCCGATTTCTACGCCTCTGT +CCCCTATCGGGCGTCCGAGCAGAGACGTCGCCACTAGTGCGTGGCGAATTCTGGGGCGCC +GTTCGGGTGCGGCGCGTAAGAAACCAGCCTTCGACCTCCGGATGTGTACTGTAGAGCTTA +GTCGGCGGCGCCGACTCCTGCATGCAGCGGGTTCGCAAGAACCTGCCTTCGGCTAAGAAT +GTACAGTCTGCAATAGGTCGTGCAAGTGCGACACCCAAACAACCGCAGATATTTGTGAAA +ACCGAGGAGCCCAGACCAGCCAACACGAGGGCCAAAAGCCCCGGTAAACGGCCGCGCAAA +CTGGGCAACCAACCACTCTTCGGACCCTGATTGGGGGCCTGGAAATCGCGCCTGCTCGGC +AACCGAAAACAGGCACCGAAACTTGCCCTGCCCGACGCCCGAACCAGAGGCAAAGTGGCA +GCGCAAATGATGGGCCGCTAGGTTCCCAACGTAGCTTGTTTGTGTATCGCCAACGAGGAA +CCCGCATGAAACCGCATCGGCTCGCCATCTTCCTGGGCACGTTCCCCATGCTGTTCTCTC +TCCTGCTGGCCGGGGAAGGCACCATGAAGGAGGCCGGCAAGAGCCTTCGCTTTGGCAATG +GTCGCCTGATCCTGACCTTTGACCGGGATACCGGCATCTGGACTGGGCTGGAGGCGTCCG +GCGGCGCGGTCTGCTTCCGGCGAACGGCGGACAGCCCCTCGCTGAACGTGCAGGTGGACG +GCAAGCCGGTGTTCGGAGCGGACAGGAAGATGAGCCTGCGCGAGCAGAAGGTCGTCCAGC +TCCCGACAGCCAGTCGACTGGAGTTGACGATGGGACAGGGCGATTGGGCGGTCACGGCCG +CCTACACGCTATGGGATTCAGGCACGCTGCAGCGCCAAGCCACGTTCGTGTACTCCGGTC +CCAAGCCGGAGGGCGACCACGAGGTCCGGAACGCGCTCTTCGTTCTGCCCGATGTGGGTC +TGGCCGGTACCGACGCCTTCTGGTTTGCGACGGCGGAATACCCCCCTCGGGACCACCCGT +TCCGCCACACCGATTCGGGCCGACGCTTCGGCTTCCCCTTCTCCGAGTCCACCTTCCACG +GCTTTCTTGCGCGCGATCCCCAAGCCAAGCTGAGTCTGGTTTCGGCCTACTACACGGAAG +ACGAACGGGCCAAACTCCTAGTACAGGAGGGCAAAGGGACCGCAACCGCGTTCCACACTC +ACTTGCTGGCAGAAACGCTCCGTCCCGGGCTGCGGTTCGAGGTGGGCAGCCAGTTGCTGC +GGGTCGTTCCCGGCACACGTCAGGACGCACTTCGAGCCCTGCAAGGATTCTATGACCTGC +CCGGGCTGCGCACCAAGATCGGCATGCCCCCCGACACGGGACGGCAGATCTTCTACTCGG +CTCATCCCGGCGGCACCATCGATTCGTCGTTCCGGGACGTCGGCGGCTTCGCCAACTTCA +CCAAGCTGTTGCCGAGCATCCGGGATCTTGGCGTGAACACCCTCTGGTTGATGCCCTTCT +GGTATGGCCCCGTGTACGCTCCCTACGACTACTACCGCCTCGATCCCAAACGATGCGGCA +CCCCGGCGGAACTCAAGGCCCTGACCGACAAGGCCCACGCTCTCGGCATGCGCGTGCTGG +GCGATCTGATCCCACACGGGCCGCGCGAGGAGCCCGGTGCGAAGCCGAGTTTCGCCGAAC +AACACGGGGATCTGGTCTGCCGCGACAAGGACGGGAAAATGATCCAGTGGTGGGGTTGCC +ACTACTGCGACTACGCCAATCCGGGCTGGCAGGACTACATGGCCAAACACGCCGCGTACT +GGGTGCGCGAATGCGGGCTGGACGGCTACCGGGTCGATGTGGCGGCGGGCGGCGCGCCCA +ACTGGCGGCCCTACGACCACAACCGGCCGAGCTTCTCCGGCCTGCACGGCGGGTTGGCGC +TGCTGCGCAAGGCCCGCGCGGCCTGCCTGAAGGAGAACCCAAACACCATCTTTCTCGCCG +AGAGCACCGGCCCGACCATGTATTCCGCCGTCGAGCACGGCTACCACTGGGCCTTCTCGA +CTCTGCTGGAAGACCACGTGCTCAAAGACGCTCCCGCCGACTTTGTCCAGGCCATGTCCG +GCTATCTGGAAAACCAGACCTATGCGTTCCCGGCGGACGCGTTCCCGATCCGCTTCCTGA +CCAACCACGACAAACTCCGTGCCCGCTACCGCTACGGCCCGAACCTGCACCGGACGTTGC +TTGCCCTCTGCGCCTTCATGAAGGGCGCTCCCCTGCTCTACGAGGAGGAGGAAATGGGCA +ACGAGGACTTCATCGCCAAGCTGTACCGCATTCGGCAGACCTACGACGAGCTATCCGTCG +GCACGGTTAGCTACCGGAGCATCCCGGTCGAACCCAAGCATGTCTTCTGCATCGAGCGCG +AGTACAAGGGCAAGCGCAGTGTCGTTCTCATCAACTTCTCCAACCAGATGTCCGAAGTGA +AGCTCTCGCTTCCAAAGTCAGACCTGAAGAATCCCGGCATCTACGAGGCAGTATCGGGCC +AGCGAGTCGATTACGCACAGGATCTCACCCAGTCCCTTGATCCCTACGCCTATGCTGTGC +TCGTCATCCGCCAACGGGATGAATTGCCGCCCTCCGTCCCCAAGGAGAGAGGCGAGAGTC +CGGCAGCTCCCGACGGAAGAGCGATGGATATCAAGATCACCCAAGAAGATAGCCTGACTC +GCGTCTCGACCCCGCTCTACAGCGCGGTCATCGACAGCGCGCGTGGGGGCTTGCTACAGG +AAGTGCGTGGTGCCGACGGCAAACTGCTCGTGAATGGTGTGGAACTAAAGGAAGGTCGGC +GCAAACTCTTTGTGGGGCACGATTCTGTGGACTTTGCCGACTGTAGCGTCCCGCTCAGGA +TTCTCGCCAGGGATCGGCAGTTCCCGGATGGCGGGAAGGTCTCCCTTCTCAGGGGCAGAG +CAGAGTTGCGCGACGGCGATGGGCATGCCTGGATGGACCTCACGGTTCTCTACTCTCTCA +GGGCCAAGAGTCTGAGCCTTAATGTTTCCCTTACTCCCCAATACAGGTTGTCACCAAGCA +AGTCCGACCTCGGGATGAAGATCCACTTTGTGCCGACCACCCACTGGTTTGCGGAGACTG +CGGAAGGGAACCTGCTGGGACATGTCATCCGGCGGCATCCTGCTTCGCATGGGTTTTCCG +GGCGGTATTGGCATGGTGCCGGGGAGGCGTTCTTCAATGGCTCTCTGTATCCAGTCGTTG +GCGAGTTCGGCGTTCTGGACACCAATCGACGGATTGCCCTCGGCAGCATGGCGTTGCGAC +TTGATGGAGCCCCGCTTCCCGTGCGTTTGCTGGAGGACGAGCCCCCCGGTTCCCCGGTGG +TTTTGGGGGGCCCGGCAGCAGCCACCGCAGACATCCCGTTACTTAGGGGGTCTCAGCGTG +CTGTGTGGCAGCAAGGAAAAGCGAAAGGCATGGTGGTCACACTGGATTTCCGGTCCGTGC +CCGCACGGTTCACTCAGTACCCAGATAGTTTCGGCGTTCGGGGATGGGATACCGGAACGC +CGAAGCTGTGCTACCGCGGCGGCTGGTGCACGTTCGGCCCTGAGTACCTGTTCCGTGGCT +ATGGAATGCGCGCGACCGTGGTCCGATCCCACGGTGGGGAGTTGACGGCGCTGACCGATG +CCGCTGGGAATGGCCTGCGCGTGACGGATGCGCGGTTCTACACCGACCAAGGCCTTTTCG +GCGACTGGCGCGACCCACGCGGCGTTCTGCGCAAGATGAGCGCGAGCAATGTGAACGACC +CGGAACCGGACACTCAGTTGCTGCACCTCTTCGAGGGACCACAGGATTCCGCTCCTCTCC +GCTTCCGCAGCTTCTTCCGCCATCCGCACGCAGGAGGCAGGTCGCTGCTGAATCCGAGAG +TCGAGTATGAGATCTCGTACACCCCGCCGACCGAGAAGGGCAAGGGGCTGCGAATCGACT +GCGGCGTCCGGCCGCACCTGGTCAAGATCGGGACCGGGGGATTCCTGGCCTACAAGATCA +GTCTGGGCGGTTGCGACCAATGGCAGGTGGACGGCGGCGAGTGGCAGCCGCTGCCAGCGA +AGGGAGGGCGACTCTGGGAAAACAAGGAGGCCGGGCATCTGCCCAAGACGTTGCTCTTAC +GCAACAGCAAGACAGGTCTGTGGACCCGGTTCTCCGACTTTGTGGGCGGTCCCGACCAAG +TGGAGAATGTGTTCCTGCACGCGGGGCAGGGCCAGGTGCACCTGTTTGTCGCTTTCTACG +ATGCCGAACCCACCGACGTTCGCCCGGTCTGGCGGCGCGCCGCCTTCACCATGCAAGCCG +GAGGCAAGCAATGAGAATCGCTACCTTCCTTCTACCATTGCTGGTCGCCATCGCCGGAGC +CGCCCCCAAGGTCGAGACCGGAACCGATGGTCTGCGTGTGCGCACGGCGCGCTACGTGGC +TACCTTCAGCACGGATTCCGGCCTGCTGGCGTCTCTGGCGTTGGTCGACGGTACGCCATT +GCTGACCAGTCCCCGCCTCTACGCCGATGTCCTGCCGGATGGGCGCAAAAACTTCTCTGC +CAAAGCCAAAGCCGCACCCAAGGCGAAGCCCCAGCCGGATGGCTCTCTGCTGGTCGAGGT +GGCAGGGGCGCTGCTCGACAAGGACGGCAAACCACACCCGACCTACCCCTTCACCTATAC +AGCCAGCTACCGGTTCGACGACACGGCACAGGTCCGTGTATCTGTCTCGGTGATCCCCGG +CTTCGACAGCGACGCGGTGTTCGGTTTCCTGGGGCAGGTGTTGAGTACGGCGAGCCAGCG +GGAGTTCTTCGTGAACACGGCGGACGGGCTGATCTCCGAGATGGCGGCCACCCATAGCGG +CCGCACGTACCAGTCGGAGAGCGAGCCGCTGGATCTGAAGGACCCCTATCTCGGCGTGTT +GCTGAAGACTGGTCAGATCCTGCAGTTCCGTCTTGTCTCGGGGGCGGAATCCCTGCTGAA +CGTGTTCTTCCACGATTCCGGAGCGGGACCGACCCACCTTTTCCTCTGCCCCCTGAGCGG +CTCCAACCCCCGCCAAGCCAAGACGGGCAAAGCCTGGCAGCAGGAACTGGTGATCGAGGC +CATGCCGCTGGCCGAGTGGACCAAGAGCCGCTGAGCCGGCGGGCGGATCGCCGAGCATCG +TGCTTATCTGCATTTGGGGTCCACGTCCAGCCCTGTCTTGCGCAGGATGGCTTGCCCGTC +AGGTCCGGTCGCGAACTCAATGAAGGCCTTGACCAGTCCAGGCTGTGGCGTGCTCATGGG +CGTTCCGACTACCATGTGTTGAACATTCCCATAGCGCTTCAGGCCTTTGGACTCCACTTC +TTCCACCTCGTCCGCCATCGCGGACTGCGTGAAGCTCCAGACCACGGCACCGTCGATGCT +CCCACCCGCCACGAGCCGCGCCACATCGTAGTCCGTGTCCCGGACCCGGATGTTGCTGGG +CTCCGTGTCCGGGCCTTTCGGTGGCTTCCGGTACCCAGACTCAGAGATCCGCGCGAGCAG +GCACGCGCCCTGCCTGGTCCTACCATGCACCATGCCCGGCTTGCCGAGATCAAAGGCGGA +CCGGATTCCCTTGGGGTTGCCTTTCGGCACGATCAGGCACGGGGTGAACCGCCCAAGCTC +GGCAGGCCTTCCGCCGCCATTTCCTCTGGCCCGAAGCAGGTTCTCTTCCGCGCTTCCGCT +CCATGTGTTGGCATACAGGAACAGACCGACATACTCGTCCCGCTGCAGACGTTTGTTCAG +TTCGTCGGGGCGTATGTATCTCACATCCACTCGGACGCCCTGGCTTTCGGTGAAGCCTTG +GATCAGCTTCTCCGCCACCGGTTCCAGGTCCACCAGGCAGTACATACGGACATGGTTTGC +CCGTGGCTTTCCCCGCCAGCTGCAGCCACCACACGCCACCAACACGGCGACGAACAGCAT +TGCGCGTTGCCATCTCATTCCCAGCACTCCCTGTCTGTCTGCCTGCGCCACTCACGCCAG +AACCAACCGGCGACGGACGATAGCCGTTCCCAGACCATGTTGCCAGCCGTTGGCGGCCTC +GCAGCGCAATCGTCGCTTGTCCTGCCGTTTGCCAAGCGCAAGACGTCTCCGAAGGACTAC +AGTACGCGTCATGTTTCACCCGCAACCGGAGGCCTACCATGACTGCATTCCCCAGAACTG +ATGTCGGTGGTGTTTCTGTATCCCGCCTGATCATTGGCACAAACTGGTTCCTCGGGTATA +CCCATTCGACTTCCGCGCAGAGCCGCACGAACTCCGAGCGGGTGAACCACCGCGACGTCG +TAGCAGGAATCGTCGAGACCTTCGTCGAGTTCGGCGTGGACAGCATCATGTGCCCCCACA +CCGACACGGTGATCCCCGAAGCGATCGAGGAAGCCCGGCAACGAACCGGCAAGCCGTTGG +TGGTCATCAGCACCTTCGCTCTGCCCGTGACCAAGCGCACCGCGCTGGACGGCTTTGACC +TCGGCGAAGTGGAGCGGATTCTGGACGAGCAGGTGGCGCGGGACGTGGATATTGCCATGC +CGCACCAATCCGTCACCGACATCATGCTGGACAAGTGCTCCCGCGAAATCCGGCAGATGG +CCCCGGTCTGTGCCCTGATTCGCGACCGGAACATGGTCCCCGGACTCAGCACCCACGCCC +CCGAGACCGTGATCTACTCGGACGAGACCGGCCTGGACGTGGAGTCCTACATCCAGCCCT +TCAACCTCATGGGTTTCCTCATGCAGGTGGAAGTGGACTGGATCGCCCGCATTATCCAGA +ACGCCAAGAAACCGGTGATGACGATCAAGAGCATGGCGGCGGGCCAAGTGCGTCCCTTCC +AAGCTCTGACCTTCAGCTGGAACGTGATCCGGCCGCAGGACATGGTCACGGTCGGCACGA +GCTCCAAGCACGAGGCGCGCGAGCTATGCGAGATGTCGCTGCAGATTCTGGACCGGCGCG +CCACGACCCAGGAACTGCAGCGCACCCGCTCCAAAGCCAGCATCTCCCCAGCGTAGGGAC +TACGTGAAACCTGCCAGCGTACTTTTGGCGACCCCGGATGGCGAAGGCTTCTATCGCTTC +TCCGATCCCGTGCGCGTGGTGACGGCGGGCTCCCTTGAGGAGGTATTGCCTACCGTGACG +GCAGTGGAGGCGGCGGTGGCGCAGGAGGGGGTGTTTGCGACGGGCTTTGTTTCCTATGAG +GCCGGGCCTGCTTTCGACCGGGCGCTGGCCGCCTATCCCCCCGGAGAATTCCCGCTGGTC +TGGTTCGGCCTGTACCGCAACCGGGAGGTCGTGCCGAAAACCGAGATGCAGGATGTGCCG +CCTCTGGCTTGGCGACCTTGCCTCGACCAAGACGAGTACGTGGCCGCGATCCGGCGCGTG +CGCGAATACATCGAGGCGGGGGACACGTACCAAGTCAACTACACATTCCGTCTGCACGCC +CCGTTCGCTGGGGACCCAGAGGCCCTGTTTGCTCGTTTGGCATCGGCTCAAGCGTGCCGC +TACGCCGCCTATGTGGACACCGGGCGATATGTGGTCTGTTCAGCCTCGCCGGAGATGTTC +TATACGCAGAATGCAGACGTGTTCCGCTCGCGACCGATGAAGGGGACGCGGCCTCGCGGC +ATGACCTTGGCCGAGGATCGAGCGCACCGCGAAGAGCTGCTTGAGTCGGAGAAGGACCGA +GCGGAGAACGTGATGATCGTCGACATGGTCCGCAACGACCTCGGCCACATCGCGGAGGCG +GGGACTGTCCATGTTCCGGAGCTGTTCTCGGCGGAGCCGTACCCCACCGTCTGGCAGATG +ACCTCGCTGGTGGAGGCGCGGTCACGGGCTGGTTTCGGACAGACGCTCAAAGCTCTGTTC +CCGCCCGCCTCGATCACCGGCGCTCCGAAACCGCGTACGACCGAGATCATCCGCGAACTG +GAAACGACCCCGCGCCACATCTACACGGGCACCATTGGCTACCTTGGCCCCGAGGATGCC +CGGTTCAACGTCGCCATCCGTACGGTCCTGATCGACCGACAGACCCAGCAAGCCGAGTAC +GGCGTAGGGGGCGGAATCGTCTGGGATTCCGACCCGCTCGCCGAATGGGAGGAATGCATG +ACCAAGACACGTGTGTTACGGACGGTTCGCCCCGAATTCTCTCTGCTGGAGAGCCTGTTG +TGGACTCCCGACGAGGGCTATGCGTTGCTTGATCGGCACCTGGCACGTCTATGCGACACG +GCCGAGTACTTCGGCTATCCTGTAGACGTTGTATCCGTTCGGCAGAAGCTTGAGGAGTTG +GCCGGGAACCTGGAACCTGTGCCGAACAAGACCCGGTTGCTCGTGGATCGTCACGGGGAG +ATCACGGTCGAAGGCAGCCCGCTGGGACCGGCGCCCGACGCACTCGTCTGGCGGGTATGT +GTGCACCCCGAACGGGTCGATTCCCATGATCCCTTCCTGTATCACAAGACAACCCATCGG +GCGGTCTACACCCAAGCCGCTGCCGCCCACCCCGACTGCGACGATGTGATCCTCCAGAAC +GAGCGCGGGGAGATTACGGAGTCTTGTCGCGCCAACGTGGTGGTCGAGATGCCTGAGGGA +CGATTCACGCCACCGGTCTCCTGCGGCCTTCTGGCGGGCACGCAACGGGCAGAACTTCTG +GCTCGTGGCGAGATCACCGAGAAGGTGTTGACGCCGGAAGACCTCTACGCGGCCACAAAG +GTGTTCCTCATCAACTCCGTGCATGGCTGGGTTGTCGCAGAGTTGTCCGACTCGTCGGAC +TAGTCGGACCAGTCGGACCCGAACTCACCGCCCCGGGCACGAGCGTCGCGCAACCTGAAC +GCCCTGATTCAGCCGCCTTGTCGTTTTGGGGAACCCAGAGGAAGGCAAGGCCGGGGACAG +GTCAGGACAGGAGGGGAGGACCATGACCCAGGGCTTGGGCCCTGGGCTGGTATCTCCCGC +CCCATTCGGGGCTGAAGAGAAGCCAAGAGGAGGGCTGGGGGAGATGGGTGCCTTCTACGC +CAAAGGCGTTACCTATTGGCGGTCCGTGACGACGCCCTGACCTGCCACAGTGCTCCGCCA +ACGTGCCATGCCTTGCACCATGGTCGAGCAACCGGGCGTTCAGCCTGCGCGTGCCGCTGA +GCTCTGGTCTGACTGGCCTGTGCGGCAACAGAAACGCCCCGCAACGATTGCTCGTTGCGG +GGCGCTGAATCACCAGTCGCTAGTGGCTAGCGACGGATCGGTCGACTAGTCGAGGTCGAA +GGGGCTGACGATCTCGCCCTTGGCGGCCTTGATGGCCTCGAGGCGCTCGCGCTGGGCCTT +CATCTCGGCCTTCATGATCTCGGGAGTGTCCGTGACCTTCGTGGAGTAGATCTCTTCCAT +GCGGTCGAGCTTGGCGATCAGAGCAGGGATGTTGACGCGGAACTGGAAGGCGTAGTCTTC +CTTCGCGTAGTCCTCTTTCAAGACCTGCTTGAAGAGCTTGGCGAGATCCTCGTACTTCGG +GATCAGGCCGTAGCCGGCGTCGATGGCATCGGCTTCGCCGTGGATCCGCAGTTCCATCCA +CTTGACCCAGACGGACTTGTCCAACTTGCCGTTGGTGTAGCTGCCGTCCTTCTTGAGGAA +GTAGTTGGTGCCGAAGACCTTCGGGCGCTCGATGTCCTTGGCGAAGTCCAGGTTGTTCTG +GATGTAGCGGCCGACGGACATGGAGAGGAAGTCCATGTTGGACATCACGTTCCACTTACG +GACGCCTTCCGCGCCGATGGTGGCGGCGGTGGTTTCGGACTCAAGCATGGCACCCATGGT +GCAAACGCCGTGCTCCCAGCTGTAGGCTTCGCGCACGGGGGCGTTGGTGTCGCTGTCGCG +ACCGCCGTAGATGATGCCGCCCATGGGCATGCCTTCGGGCTTGTCCCACTCGGGATCCTT +GTTGGCCAGGTCGTTGATGCGGATGGTGTAGCGGGCGTTCTTGTGGCTGCAGCTGGCGAC +CTTGCCATCCGGGCCTTCCATGCCCTCGGTCCACTCGGTGGAGCAGTAGTTCATGCCCGT +GGCCGGGATGTCTTCGCCCATGCCCTGCCAGTAGGGCTTGCCGTCCTTGACGAGGATGTT +GCCGAAGATCAGCTCGCCGGGCGTGTGCAGCACGTCCCAGATCACCGGATCGTCCTTGCT +GTTCACGTCGGTGATGATGCCGAAGATGCCACTCTCGACGTTGACCGCGCGGAACTCGCC +ATCGATGACCTTGAAGTAAGCAAGGTCGTCGCCGACGATGTTCTCGCCGGGGATCATGGC +CGTGGAGGTCTTGCCGCAGGCGCTGGGATAGGCACCGGCGAGGTAGGTCTTGCGGCCGTT +GGGGCCATTGCAGCGCATGATGAACATGTGCTCGGCCAGCCAGCCTTCCTTGTCGGCCTT +GCGGATGGCGAGGCGCAGGGAGAGCTTCTTCAGACCGACGGTGTTACCGGCGTACTGGGT +GTTGACCGACCGCACGCAGTTCCGGGTGTAGTCCATGTACACACGGTTCAGCTCGACGTT +CTTGGAGGTCATGCGCTCGGTGACTTCACCGGTGGCGTGCAGGTACTTGAAGAACTCGAA +GTTCGGGTCGGCGGCGTTCTGGCGCTTGAACTCTTCGTAGCCGCGACGGTACAGCAGCTC +TTCGGAGTGGACGACGTAGGCGCTGTCGGTGATCTGCAGACAGGGGATCGAGAAGCTGCT +GCCGGTGGTGCCCAGGCAGAAGAAGCGCACGAACATCTCGCGGCCCTTGTAGGAGCCACG +CTGAAGGTCGTCCATTTCCTTGACGCCGGCTTCGCGTTCCATCTGGTTCAGGTTCTTGTC +CAGCTCGACATCGGCAGGCACCAGGTACTTGGTGACTTCCTTCTTGCGCGCTTGGTCGTA +GTAGTTGTCGAAGTGGACCGTGTGGCCGTTGGTGGTGAGCTGGAGCTCTTCGAGACCGTC +GATGGCCTTCACGCGGCAGTAGGCGGCATCGCTGTCGCTGTCGTCGCCGACGAAGACGGT +TTCGGGGTGTTCCTAGGCTGTTTCCTGGTGGGATCCTCTAGAGTCGACCTGCAGGCATGC +AAGCTTGAGTATTCTATAGTCTC +>NODE_9_length_20531_cov_182.121_ID_17 +CCCACTCGTATCGTCGGTCTGATTATTAGTCTGGGACCACGGTCCCACTCGTATCGTCGG +TCTGATTATTAGTCTGGGACCACGATCCCACTCGTGTTGTCGGTCTGATTATCGGTCTGG +GACCACGGTCCCACTTGTATTGTCGATCAGACTATCAGCGTGAGACTACGATTCCATCAA +TGCCTGTCAAGGGCAAGTATTGACATGTCGTCGTAACCTGTAGAACGGAGTAACCTCGGT +GTGCGGTTGTATGCCTGCTGTGGATTGCTGCTGTGTCCTGCTTATCCACAACATTTTGCG +CACGGTTATGTGGACAAAATACCTGGTTACCCAGGCCGTGCCGGCACGTTAACCGGGCTG +CATCCGATGCAAGTGTGTCGCTGTCGACGAGCTCGCGAGCTCGGACATGAGGTTGCCCCG +TATTCAGTGTCGCTGATTTGTATTGTCTGAAGTTGCTTTTACGTTAAGTTGATGCAGATC +AATTAATACGATACCTGCGTCATAATTGATTATTTGACGTGGTTTGATGGCCTCCACGCA +CGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTTTCCGGTGATCCGACA +GGTTACGGGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGC +GTTTCCGTTCTTCTTCGTCATAACTTAATGTTTTTATTTAAAATACCCTCTGAAAAGAAA +GGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCCTTTCTCTGTTTTT +GTCCGTGGAATGAACAATGGAAGTCCGAGCTCATCGCTAATAACTTCGTATAGCATACAT +TATACGAAGTTATATTCGATGCGGCCGCAAGGGGTTCGCGTCAGCGGGTGTTGGCGGGTG +TCGGGGCTGGCTTAACTATGCGGCATCAGAGCAGATTGTACTGAGAGTGCACCATATGCG +GTGTGAAATACCGCACAGATGCGTAAGGAGAAAATACCGCATCAGGCGCCATTCGCCATT +CAGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTG +GCGAAAGGGGGATGTGCTGCAAGGCGATTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCA +CGACGTTGTAAAACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAG +CTCGGTACCCGGGGATCCCACGTACAACGACACCTAGACCACCCCCATGGCCACCATGGC +GTAGATGCTTCCCCTTTGAATTCCCATGAACAGAAACTGAAGGAGATCGGTAAAATTGAT +ATCGGGTGTCGTCATGTTCCCACTGCTGTATTCTTGTGAATGTGTTCCGGTGCCGGGTCC +CATCCGATAAAATATTGTCCGCTGTCGCAGCGGAATTGCGACAGCGGACAATATGACCGG +TTCAACAGATTGCCGTCTTCATGATCCGGCAGATTATTGGTGTAAAACGACAAACTTTCC +GTTCTTTACGGTCAGCATCTCGAATGCATTCTGGTTCAGCCCGCAGTGATTGGTCTTGGT +ATAGTGAAAAACACCGGTAATCCCGACAAAGTTGGTATGTTCAATTACATTGCGGAGTTT +TGCGGGATTGTCTCCGACCTTTTTCAGGGCCTTTGCGAGAATTGAGATCGCATCAAAGGC +ATGCCCGCCGAATGTGGAGGGCGGTTCTTTGAAGGCCGCCTCGTATTCTTTTTGGTATTT +CATCAATATGGCTTTCTGGGGATTGCTGGCGGAAACAGTGTCTGCCGCCAGAAGACGCCC +ACCCGGAAAAATGAGGCCGTTTGCCGCCGCACCCGCAGCCTCGACATACTTGATGTTGCC +GAATCCATGGCTCTGGTAGAGCTGGATTTTCATTTTAAGCTGGCGCATGTTTTGCGGAAC +GATCGACTGGGCCGGCACGATGGACCAGTTCACGACCGCCTGGGCGCCGGAATTTCGGAT +CCGGATCAGCTGGGCCGTCATGTCCGTATCGGTCGGATCATAGGTTTCATCCGCCACAAT +TTTGATCTTGTAATGGGGGGCCAGGGCTTTGAGCTGCTCACGGCCGGCAGCGCCGAAACC +GGTCGTGCCGGTGATGATCCCGATCTTGTGGATCCCTTTTTTATCCATGTTTTCATATAT +CCGTCGAACGGCATCACTGTCGTTCTGCGTGGTTTTGAATATCCATTTCCGTTTCGATAC +CGGAGCGATAATGCTCTCGGCGGCAGCGCAGGAAATCATGGGGATCTTTGCCTGCTGGAC +AATCGGGATTACCGCCATCGAAGTCCCGGTTCTGGAAGGGCCGATAATGGCGCATACATG +ATTTTTTTTGATCAGTTTCTTGACCGCATTGACGGCGCGGGTGTTGTCGCCCTGGGTATC +TTCAATGAAAAGCTCCAGCTTGTGGCCGTTGATACCGCCGGCCGCATTGATCTGTTTGGC +CAGCATCTCGGCCGTGTTTCGTTCCGGTCCGCCCAGCCAGGACGCCCCGCCGGTGATGGC +GAATACACAACCGACCTTGTACGGTTTTTTTTGTGCGGCATATACGCTTACCGCCGATAA +AAGGAATAAGACTGCAATTGCTGTGGATAATAATGTCCGTTTCACGTAACCCCCCTCACT +TCTTCAGGTTTAGAGTTAAAGGTCATTAACCCGGATTGAAGTTAATGCCATCGGATGCCT +CGTGGTGTCGGCGGTCAGCCCGTTACAGGCGCAGCACCCGCTTCGATTCCAGCACGGCGA +TGTCGTTTTCGATCAGGGTCGAAATGGCCCGGTCAACATCATCAAAACGGAAGATCACGA +TCGCCTGTCCCATGTCCTTTTCCACGAAGGCATACATGTATTCCACATTCAGGCCGGCAT +GTTCCATGATGGAAAGCACGTTCCCCAGCGCACCCGGGCTGTCGGGAATCGCAACCGCGA +CAACCGCCGAAATCAGAACGGTAAACCCCTGGTCTTTTAAAGTTTTTTCTGCTTGTTCGG +TATCGGACACGATCAGGCGGAGGATCCCGAAATCGGAGGTGTCCGCAAGAGACATGGCCC +GGATATTGATCCCGGCGTTTCCCAGAACCGTGGCGATTCTGGCCAGCCGGCCGGACCTGT +TCTCCAGAAAGATGGATAACTGTGAAATTTTCATGCTCCCCCCCTCACCCCCCGGTCGGA +AAGAAACCGGCCGCTATACCGGAAACGGCGGCGCCGACAGCCCGGCGGTCTCCTCGAATC +CCATCATGAGATTCATGTTCTGAACCGCCTGGCCGGCCGCCCCCTTTACCAGATTGTCGA +TCACGGACATCAGTATGAGCCGCCGGTTGTTTTCATCCACTACAAAAGCGATGTCGCAAT +AGTTGGTTCCCCGGACGTTGCGGGTGTCCGGCGGATGACCGTCCCGGTCGATCCGGATGA +ACTGCCGGCCGGCATAATAATCGCACAGACACGCCGCGATGTCGTGCTTCGTCACATTGC +CGGCAAGCCCCGCATAAATCGTGGTGGCCATTCCCCGCGTCATGGGGACCAGGTGCGGTA +CGAACGTAATGTCCACAGGTGTGCCGGCCGATTTTGTCAGAATCGATTCGATCTCCGGTT +CATGCCGGTGTGCGGCGACCTTGTAGGCCTTGAACGACTCATTGACTTCCGAAAAATGGA +CGGTTAAAGACGGAGACCGCCCGGCGCCGCTGACCCCGGATTTCGAATCCACGATGATGC +CGCCGGAATCGACCAGTCGATTTTTCAGCAACGGCACCAGCGGCAGCAGTGTACTGGTGG +GATAACAGCCCGGATTCCCTATCAGATCGGCTTTCCGGATATCATCGGTATATACCTCGC +TCAATCCGTAAACCGCCCGGGACAGCAGATCCGACGATGAATGGGGCTGGTAGGCGGATT +CATACAGCCGGACGTCGGAAAACCGGAAATCCGCGGACAGGTCGATGACTTTTTTCCCGT +TTTTGATAAGACCCGGAACGATGGCCATGGGAAGCTTGTGCGGCAGGGCCGTGAAGATCA +CATCCGTCTGCCCGCAGACCCTTTCAACGTCAAAAGCCTCGCATTCATTGGAAACGACGC +CGGCCAAGGCCGGGAAAACCTTTGCAAACGGAACGCCGGCATACTGCCGGGATGTCAGAG +CCGTAATCCGAACCTCCCGATGGCCGGCAAGAATCCGGACCAGTTCCGCACCCGCATATC +CGGTGGCGCCAGCCACTCCGACCCTGATCATATCGCCTCCTGTATCCGTATCCGATCCCG +GAATCCCTCCGGGCATAAATCCATTCATAAATCTTTAAGGCGCGCGGCCCGCGCCCTTCC +TGCCGTGGCATACAATTCACCTGCGCTGCGGCCCGCCGCGGATTACCGCAACACGCCGGT +CGCAAATACGCTTATTGTGATTAGCAGAGCTTTGGTGCGATTTCAAGGCGTTTCATTCAT +TTCACCGACGTTTTCCGCAAAAACCGGTTCCGGTGTCATGCCCCCTTCCGATCCTGCCGA +ATGCGCCTCACCGGTTTAACGGCATCAGAACCCGGTGTAAAGCGCGTATCGTCTTCGATG +GCATAATCACACATCAGCAGGTCAAGGGGATTTAAGAACACCGACCGGTCGAGACGGTTC +ATCACCGGCCACGACAGGGTATCGACCGGTGGCAGATCGTTTGCCCGGGCCAGGGTAATA +TGAAGGTGTGCCGGCAGCCGGATCTTCGGGGATACGATCCGCGCGATGGCGGCGAATACC +GTCCCCGCAGCAAGGGTATCGCCCTGCCGGAGCCCGTTGTCCGGGCGAATATGGGCATAA +AAGGTGTAAAAAGGCGCATCCGATGGAGCGCACCGGCTTTGAACGACTACCGTGCGCCCC +AGAAGGTCATCCATGATGCGGACGATTTTCCCGTCAAAGGCCGCTGGGACCGACACGGTC +TCATCCAGGCGGTATCTCAGCCCGTCGGACGTTTCAAAAAAGCAAAGGTCCAGGCCTTCA +TGGGCAGAGGGTCTTACGCCGCCATCCCCCCACCATTTTTCAGTCGATCCGAAGAGCATG +CCCGGGAAAAAAATCAACCGGGAAAAGCCGGCATCCGCCAGGCCGTTTACGGCCATCAGG +TCGGGAATGAAGGCCGGTACGGGACAGATATCTGCGGCAACATCCGATGGATCCGCCATC +CTGCCGGCCGATTTGAACGAGCGGCTCATTGCGGTCGTTTCCCGCCGCCGGCCTGGGTCG +AAAGCTTGCGGTATTTCCGGGCTTCGATGTCCTTTCCCTGGGCTTCATACAGATCCGCGA +TCCGGAAGAAATAGTCCGCCCGGTTGTCCGGATCTTCCTTGGCCAGGCCGAAATAAAGCT +CAAGAAGCTTCAGCGATTTGTCGCCGCTTTTGATGCACAGATCGGCGTACCGGCGTTTGA +TGATCGGATCGGCCGCACTGCCGCACATGATGCAGCGGTTTAAAATATCGGCATACAGAT +CCCGGGCCTTTTCAATCTGACCTTTCGCCTCATAGGTTTTGGCCAGCCCGCGGTTGACGA +TTTCATCTTTTCCATGGATCTCGAGGCATCGGGTAAACACCTTCTCCGCCTCGTCATAAA +GTCCCACCTGAAACAGGGTCTCGCCCTGGAGCATCTGCATCGGCCGGGTCGTCTGGATAT +CGTTCGGGGCGCCGGAAAGCAGGTTCCGGGCGTCAGCGGTATTGCCCGCCTCCCAGAGGA +TTTCGCACAGCAACTGGTAACCCCGGATTTCCCCGGGGTTTTCTTTTAAAAAATCCTCGA +GAATCTGCCGGGCGTGATCCGGATCGTTCAAATGCATATAGGCCGTGGCCAGCTCGACCG +GTATCAGGGTCTTTGCAGACGCATTCTCCTCGAGCGCCCGCGACAGCTCTTTCACTGCCT +TCCGGAATTCACCGTGGTTCAGTGCGATGTAGCCGGCCGCGAAAGCCTCCCCGTATCCGA +GATAGGCATCCTGCACCGCTTCGGGCAATGCGCTGACCAGTACGTGAAACAGATTTTCCG +GCGAATCATACAGTTGTGCATCCGCATCCGTTTCATCCGCCATACCTTCGGCATCGGCCG +TCATATCGTCGTCTGCGTATGCATCATCATCTTCGGCCCCGCCGGCGTCCTCTTCCGCGC +CGCCCCAATCATCGACGTACGCCTCTTCCCGGGCGTCACCGACCCACTCCATTTCGGGTT +CGTCCTCATCGGCGATCAGGTCCTTCAGCTTTGCAATCTTCCCGTGGATCTCAGCGGCAA +AGGCTTCATCCGACGTGAGTTCCATGGCCAGCCGGTAAAGCGCCCCGGCCTCCTCATAGT +CCCTGACCGTGATCATGTGGTCGCCGTTTTCCACATGCATTTTGGCAAGCGCATTCCTGG +CGAGGTGGTACTTTTCCATGATGCGCGGCAGCAGGTGCTTCTTTTCCGGAAAGCGCCTCT +CAATCCGGTCGATGGCTTTTTCGAACGCCATTTTGGCATCCCCGAACGCGCCGGCTTCAT +AATGGAAATCCCCCCGCCTTTCAAGTTCTTCCGGCGGTTTCCCCCCAAAGATGCTGAACA +GACCCATAGTTTTCCTCTTTGTTTGAATTGATGTGACCGCTAAGATTTCGGTTTCAGATA +GGCTTTGGCCGCGGCAACTCCCTGCTCGAATCCGTCAACGATCTTGTCCTCGCTCCGTGT +CAGCCGGTTGACCTTGAAATTATCGGGCGGATAGATGATGTCGAAGTCATAGTTGGTTTC +CAGATCGTCCAGCATCCGGTTGTAGTTACCGAAATGGTGCTCCAGTGAGTTCACCAGATA +ACGGTACTGGGAATCCTTGTAGTATGACTCGTAAAGCGTCTTGATCCAGAAGGAGTCCCT +TTCCTTGCGATATCCTTTCGGCCGGGTGAGCACCACCAGGATATCACGCTCGGGAAAGCC +CATGGAAAGGGCCTTTTTAAAGGGAACCGAATCCAGCACTCCGCCGTCTAAAAGGAGATC +GTTGCGATAATGGACAAACCCTCGATAGAAGAAGGGAATAGCGGCCGTGGCCTTCAATGC +CTGCGGCACACTGTCGGTTTTCGTGCTCAGATAGAGCGTATCGGGCGGGAAATCCCGGCT +GAATCGGGTAACCGTAATCAGCACGGGACAGGCGGTCGTTCTGAGCCTGCGGATGTTTAA +CGGCTCATCACCGCCGATGATATGGTCCACCAGCCAGTCGAGATTCAGGATATGCTTTTC +GGGTCGGAAGATGTTGCTCATCCGGATGAACCGGCCGCTGATCAGTTCGGTAAAAAAGGC +GTCCCGGGCCATGTGAATCTGGCCCGCCGCATAGACGACGCCGCTCAATGCACCGGCGGA +CGCACCGATGATTGTCTTCCACGGCGCATATCCGCGTTCGGAAAATGCCTGGAGCACGCC +CACGGTAAAGATGCCCCGCATGCCGCCCCCTTCAAGGACCAGCACTTTTTTCCGGGGAAA +GCCCTTGAGCCGGCTCCGAACAGATCTTCCGGTATGGGAAATGGCTTCAATCGGATTCAT +CTTCCGTCGGTCCGTCTGCTATGACAACTTCAGCGTGATATAGTATCCCTGGTCTCCGCG +CTGCAGCAGGATGACAATCGACTGTTTCCACCTGTCCTTGATCATCGCTTTGTAAAAGGA +TTGGACGGTATTGGTGGCTTCCGCATCGACTTTCCGGATCACATCCCCGGGCCGGACACC +GATGCCGGCGAGGGATGACTGCGGATCGATTTCGGAAATAATGACACCCTTGTCCGCGTT +TATGGTTTGATTGAAACGGACCTTCTGCCCGATACCGACGACCTTGACCCCCAGCAGTCT +TTGAACCAGTTCCGGCGCAAGCGATTCGGGGAACACCTCGATCCTGACCGAAAGGGTCAG +GTGCCTGCCGTTCCGGTCGATCTTCACAACCGCATGCTGCCCTTTCCGATATCCCCGCAT +GGCGGTCTTGTAATCGTCAATCGATTCGATTTTGTGGCCGTCGATGGCCAGCAGGATATC +CCCTTCCCGTATGCCGGCGGCTCCGGCCGGACTCGACGGATCGACCGAACGGACGACGAC +GCCGGAATGCTGCGGCAGATTGAGATAGGCGGCAAGCCGGGAATTGAGGTTCTGAACCGA +CAGGCCGATCCAGCCGGGGACGACCTCGCCGTATTTGATCAGATCAGAGATGATCTTTTT +CGCGCGGTTGATGGGGATGGCAAATCCGATCCCCTCGGCATTCTTGTAAATGGCCGTGTT +GATGCCGATCAGCTGACCGTCGATGTTCAGCAGCGGCCCGCCGCTGTTTCCCGGATTGAT +CGAGGCATCGGTCTGGATGAGATCACGATAAATCCGGTTCTTAATCCGGAAACTCCGGTC +GACGGCGCTGACAACGCCGACGGTGACGGTATTTGAAAAACCGAACGGATTGCCGATGGC +GATGACGGTCTCCCCGATCATCAAATCGGATGAATTGCCCATTGCGATGGACGGAAGCGG +GGATTTGGTCTTGATCTTCAACACGGCGAGATCCGATTCCGGATCGATTCCGACAATGTC +GGCATGGAACTTACGGCCGTCTTTCAATACCACCGTAATCTTGGCCCCCCGGACAACCAC +ATGTGCGTTGGTCAGGATAAATCCGCGCCTGCCATCGATGATCACGCCGGATCCCAGGCT +GGTCAGCTTTTCCTTGCGCTCGATGCCCTGATCGAAAAAATTATTGAAAAAATCGTTTGC +CCCGAAGTTGGCAAAAGGATTCACCCGGGTCCGGACTTCATACTGGGTGCTGATATTGAC +GACCGCGGGGCTGACCAGCTTGACGGCCCGGACGACGGCATCTTCACGCGGATAGTGCAG +GACTTTGGGCCCCCCGGTTTTGGCGAACACGGCCGGGGAAAAGCAGAAAACGATACATCC +GATGACAAGAAACAGGTGAATTTTTTTACGCATACGGGTCCCGATATTTCTGTTGATTAT +CAAATGGGGTCATGATATTAAATTTTAACATCGTCGCTCCATTCCATAATATAAAATTTG +AGGAATTGAAAGCGTAAACATGTTCCAGTTTCTATTGTTTGTCGGAAGTATCACAGCCTT +TATCATCGGAGGGCTGATCGTTCTGATCGGGATCGGCGCCATTACCGGTTGTGCCGGGGG +AATACTGGCAATGTGCAGCGGCGCCATCATCGCGGTTCTCGGCGCATGGTCCGCCATCAC +CTTTTTTCTTCCCTCGCCCGACCCCAGTGTTCCCGCCCGGGAAACGATCAATCTGATCCG +CCGAAACGGCCGGTGGATGTAACCGCGTACCGGTGAATTCCGAAAAGGCGCCCCCGCCCC +TCCCCTGACGGGATACGACGGACCGGTATCCACCGGATCCGGCGCTAGTTCTTTCTCCTG +ATTTCACGCTCCAGCGACTGAACCTTCAGGGAAAGCGATTCAACCTGGTCGATCAGCTCC +TTTACCTGTTCCTTTGAGGCGAGATTCATCATATTGAGGGCCTCGTTGACGCGCTGATCG +ATATTTTCCCTTATCCATGTTTTCAGGCCGTTGGTATAAGCGGTAATTTCCTTTTTGAGC +GTCCGGCCTTCGGTTTCGGAAAGTTCCTTGTCCTTGACCAGTGTGTTGATCAGCTTTTCG +TTTTCATCCCTGGACATGAGGACCGCACTCTGCCACAGGGAGATATATTTTTTGCCGTAG +CCGAACAGTGTCCCGCGACCCTTCCGCAGCATCTGCATCAGAACGCTCGACGGCAGCGCC +GTATCGTCTTCATTCTTTTCGCGGGCCAGAAGCTGCGAGACCACCTGGGCCGTCAGGTCA +TCGCCCGTTTCGTTGTCGATAATCATGACCTCGCTTCCGGACTTGATCAATTCGGCGAGT +TTTTCCATGGTGATATACTGTTTATCGGTCGTATCATACAATTTGCGATTGGCATATTTT +TTGACGAGATGCATGTCCCCTCCTGAAAGTTCAACAATATAATGCTGCTTTAATATGACG +CATTGCGTTATCCTTGTCAAGGGATTTGCCGCCGTTGCACCAACTGACAGAATACATTGA +AATGTTTAAATAATCCAATGTATCGCATCAAACAAAGCAGACACCGCGTTGTCCGTATGC +GACGAAAGGGATACCCTTATCTTATTGCTGGAAATAAAAAAAATAAGAGATTGAAGAGAT +GAAGGGAAAAAGCAGAGAGCGCCGGCATCAAGGACCTCCGGTTCAGGTATGCAGTAGACG +AAAAAAGGCATATGCCGCCGTCCGGAAAAGCCAGAAGAAAAAAGCGGACAAATCGAAAAT +CCCCGCTTCCGGAGATCATCGGCACTGCGACGCAATGATATATTTTTACATTGCATTCGG +CCTTCGGATCTGTTAATTGTAGTTTAAAAGTCACATCTGAACCACAATCCTTGCACAGTT +CAAATCCAGTTGCCATATTTAAGATATGGACGGGTACCCCGCATGATTGAAGGATACAGG +CGGGGAGTGGTATTTTATTTTCCCAAGGAGGCGCTACGATGAGAAAACAGGTTGAAATCA +TGTCCAGTATCGACAATTTCTGGCTCTACATGGATCATCCCACCAACCTCATGATTATCA +CCGGATTTCTCCAGTTCGACAAGCCGATCAATTTTGAGCGTTTGAAACAGACCATCAAAA +ACCGGCTTCTCTGTTATGATCGCTTCAAAAAAAGGGTGATCCGGCCGATGACCGGTGTGG +GAAATGCAACGTGGGAACTCGATCCCAGGTTCGATCTCCGCTCCCACCTGCATCGCGTCG +CGCTTCCCGCGCCCGGTGACAAAGAGACGCTCCAGGAACTGATCAGCGACCTGACCGCCA +CCCCGCTGGACCCCACAAAACCGCTGTGGCAGCTGCATTATATCGAAAACTGTGAAAACG +GCGGATCGGTTCTGTTCGCCCGCATCCACCATTGCATCGGTGACGGCATCTCATTGATCC +GGCTGCTGCTGTCCCTGACCGACACCGAACCGAACGCGGTCTGGAGTGATTGCTTAAATG +AGCCGAAGATCGAAAAGGAAACCAGCTTCAACCTGTTTCCGCCGCTGGAATCGGCGATGA +AGAAGGTCACCCGCGCCAGGAGACGGGCCCAGAAAGTGACGCGGTTCGTGAGCCGGGAAA +TCGAAAAAAGCTTTTCGAATCCCTACCACATCGTCAAGCGGACCCGAACCGTTACCAAAT +TCGCCCTGGATGTCGCAACCGTCATGAGCAAGATCCTCCTGCTGCCCGCCGACCGGAAAA +CCGTTTTCAAGGGGGAACTGGGCGTGCGGAAAAGCGTGGCATGGTCCGACCCGCTCCCCC +TGGACGACATCAAGGTGATCGGCAAATATTTCAACGCCACGATCAATGATATCCTGGTCG +CTCTCGTGACCGGGGCGCTTCGCCGGTATCTGCAGCAGTGCAATAATCTGGTGGGAGACC +TCGACATCCGGGTGGCCATGCCGATCAACATCCGCCCGATAGACGGGGACATCGAACTCG +GCAATCAGTTCAGCCTGATACTGGTCGCCCTGCCGGTGCATATCGATGATCCGGTCCTCC +GTATCCGGGAAGTGCAGCGCCGGATCAACGATCTCAAGGAAGCGCCGGATGCCGCCGTGG +CCTACGCGGTCCTGAATGCCCTCGGGGTGTCGTCCGCCAAACTGGCCAAAACAGCGGCCA +CCATGTTCGCCAACAAAACCACCGGCGTGTTCAGCAACGTACCCGGCCCCCGGCAGCAGC +TCTATTTCTGCGGGGAAAAGATCAACAATATCATGTTCTGGGTCCCCCGCATCGGCGGAC +TGGGGATCGGGATCAGCATCATCAGCTACAACAATGAGGTTTCACTCGGTATTGCGACGG +ATTCCGGACTCGTGCAGGACCCCAAAGCGATTCTGGACCATTTCGCGAACGAATTTCGGA +TGCTTCTGGGGATGTACAAAGCCGGACAGATGGAAAAGGAACCCCTGGTCATCAACGACC +GGTCCGTGGAACCGCCCGTTTTCGCCTTTAACACGGAAAAAATCGCCAGCGTTCAGGCCA +TCCGCTGCAAGGCCATCACCCGGAGCGGCACCCAGTGCCACAATCGCGCCGCCACCAATT +CCATGTACTGCACCCTGCATCTGTCAAAGTATGAAACCATCGCCAGCAGAGAAGAAAATG +ACATGCCGGCAGAGGCCGACAACACCCTGCCGGCCGAAGACCAGGCCGCCGGATAAATCC +GGAAGATCAGGCGGGTTGATGAAAAATTTCAAGATTTGAAAACGCAAAAGAGGGGATCTG +ATCGGATCCCCTCTTTTTTTATGGCAAGGATTAAATCTTAAATTCCGGATGTTTCATGAG +CTGTGTACTGATGACCCGAAGCCCTGAAATATCATTGACGGTTTCATCGGCAAAGAGGAT +CGGGAATACGGCCAAGCCGGGACTGATGGATGAAATGCGGTCGATGGCCGCGGCATCGCT +TCGGGCGAGTTTCCGGTCCATGCGGTCGGCGATTTCCAGACGCTGAAAAATAGGTTTATC +GGCTATTTTTTCCATCAGGACGCGCTTCCGGTCCGCCGCTTCCGAATCGAACGGCGAATC +GGCCGTGTCGGAATGGACCCGGTTGACGATGAACCCGCCGAAAGGCATATTCCGCTGCTG +CAGTTGCCGAAAGAGATAGGTCGCCTCCTGAATCGGATATTCCTGCGGGGTCGTGACAGC +GAAAAAAGTCGTCATCGGATCGGACAGCAATGATTCGACCGCCGACGCCCGTTTGCTGAA +ACCGTCAAACAGCATATCGTTGAAGAGCTGGAAAAAAGAAAAGATATCCGATATCGTCTG +CCCGCCGACGACCTGCCCTACAGCGCGCAGCACCGGCATGGCCAGGAGATTGAAAAGGCG +CCCGCTCAGTTGACCGGCCTTGATATACGGCTTGAACAGTTTTAGAAAGTACGGGTGTCC +CAGAAGGTTGATGACCCGCTGGGGGGCTTCCAGAAAATCGAGGGCGCGACGGCTCGGCGG +CGTATCTAGAACAATCAGATCGAACCGCTTTTCATGGTAGATCTCGTAAAGCCGCTCCAT +GGCCATATATTCGTGGGAACCGGCCATATTATTGGACACGTGCTGGTAGTAGCGGTTTTC +CAGAATCCGGTCGCGCAGTCCGGCTGATGAATACCGGCCGATCAGACGGTCGAAGGTCCG +TTTGGCATCCACCATCATGGCATACAACTCACCGGAGGCAGGTACCGATGCCTCCAGCGG +AACCCGCTGCGCCTCCATGTTCAGGGCCGATATGCCGAGTGCATCGGCCAGCCGCCGGGC +CGGATCGATGGTGAGCACCACGGTTTTTTTGCCCATCAAAGCGCCGCATAAGCCGATAGC +GGCGGAAATCGTCGTCTTTCCGACGCCGCCGCTGCCACAGCAGACGACAATCCTGGGATT +GTTCGGTGTCGGCGCCGGAGCGTTCATCGCCCGCCTCCACGGGGAGCTTCGGATATCTGA +CGATGCAGCGACGCGGCAATCGTGCGGATCTCCTCCGGCCCCAGATCATTTGTATAATAG +TAAGGGACATGGATCACGTGACCGGGAGCGGCCGCGTGAACCTCGTTCATCTGGACCTGC +TGGGCGGCATTGCGGACGATCTGCCGCCGGGCGACATCAAGCGCGAACCGCAGATCCGGA +CAATCCGGATCTGCATCGGGACAATCCCGGTCAAGCTCCTGGATCCGGGAGAATTCATCG +GGTGTGACAAACACCGGGTGTACGCCGTTAATGAATACCGCCTGAACGGGAATCCCCACC +TCATCGACGGCAATGTCGATCATTTCACGGGTCTCATTGACGGGTAATTCTTCGGGAAGG +GTCACGAGGGTCAGGGCCGTGCGCTCCGGGTTCAAGAGCATCTGCTGGACCCCGTTGACC +TGGGATGCGATCGGTCCCACCCGGATCATCTCCACGAGCATTTTCGGCAGCCGAAGCAGG +CTCAACCCGTGGCCGGTCGCCGGCGCATCAACGATGATAGTGTCATAAAGCGGCGTGCCG +GCCTTGTTCTTCGCCTTTTCCCATCGCCAGATCCGGGCCAGCGTCATGATCTCCTTGAGA +CCGGGAGTGGCGGCCAGCAGGTAGTCGAAAAGCCGGCTCTGGGTGATGCGGTTGGCGATA +AAACCGGACTTCATGTGGTAATGAAGGTAGGCCGTCAGTTCATCTTTCGGATCAACACGG +GCAGCCCAGATGGCCGATGAGACCTGCCGGGGACGGACATCGGGCTGTGCATCGAAATAG +CGGCCGATGCCCTGGCTGTCGCCGATTTCGGTCAACAGCACCCGCTTGCCCATCATTTCG +GCCGTCAATGCGAGAGCAACAGACAGGGTGGTCTTGCCCACCCCCCCCTTGCCCATCAAA +AAGATCAACCGACGGGTCAGCAGTTCGTCGAGTGAGCGCACGCCGGCGCCGATCGAAAAC +AACGCCTCAAGTTGTGTCCGGTCAGATTCATCCGCTTTTGCCATGGCATGTTCCCGAAGA +CACAATAATCATTTACAGTGCCGGGTGAAGCGCGCAACACCCTGTAATGGCCCGGGCGGG +CGATGCCGCATCACGAAGTTTTTTTGGTTTTAGCGGGCCCTTTTTTTTCTTTTCCCAATC +GATTGATTCGTTTGTTCAGTTCCTCGATTTTTCCGGTCAGCTCAAGGATCTGATCCCGGT +TGGCGAGGTTCATCCGGTTGAGCACCTCGTTGACCCGGTTGTCGATGTTGCGGGTGATCC +ATCCCTGGATATTGGTGCGATAGCGGTCAATCTCCTTTTTGAGCTTGCTGCCCTCGAGCT +CGGTGAGCTTGTTGTCCTTGACCAGCATATTGACCAGTTTATCGACCTCGTCTCTGGACA +TGGTCACCATGTTCTGCCACATCGAGGCATACCGTTTTCCGTAATCGAACAGGGCGTCGC +CACCTTTTCGAAAGAGCTGTACCAGCATACCGGTCGAATCGTCAACCGCTTTGTTACTTT +TTCCGGCCTTTTTGGATTTTGACGGATTCTGTTTTTTGGCTTTGATCCGGGAAACGATGT +CGTCGGTGATATCCTTCTCGGTCCGGGTATCGATAATCGAGATTTTTTTACCGGCGCCGA +GAAGCTTTGAGATCTGCTCCCGGGTCACATAGTTTTTTTCCACCGTGTCATAAAACCGGC +CATTGGCGTAGCGCTTAATCTGGTACATAACTCCCTCCTGTGTTTGATGGCAGCCCCTTT +ACGGAGCCTGTGTGGAAATGGTCAGAGACATACCTTTTCGGCACGGGAGAAGATCAATCG +GAGATGGATGTAAGGGAATGTCCTGCCTGACTGTATGTCGTTTCGGAAGAAATACCGGGC +GATTCACGTTCAATCCGCAAATCGCCCGGTACCGGTGTTGCTTTTGTTCGAATTACTGGG +CGCTCTGCTTTCTCAATGCTTCAATATTGGAATTCAAGGCTTCCATTGCCAGCGTCAGTT +TCTCGATGTCGCCCTTGCCGGGCAGATTGACCACCGAAGGAACGGAATTCAAACTGGAGG +TCACCTTCTTTTCGACCGTTTCGACCAGGGGGAGCTTGTACATGAACTTTTTGCCGTTTT +CGACATAACCGTCCATCTTCTTCCAGGCGTCTTTTTCAAAGGACCGGCCTTTTTCAAGCC +ACTGATCCATGGTGCCGGAAAGCTTCTTGTAGGGTCCGTCGACATATGATCTGCCTTTTT +CAACGGCCTTTGTGATATATTTGTCGTTGTATTCCTTAAGGGTATCTTTGCCCTTTTCAA +CGGTTTTTACCAGATATTTTTCATTGTATTCTTTCAGGGTCGCCTTTCCTTTTTCGATAT +TTTTGGCGACGTATTTTTCATTGTAGCCTTTTACTTTATCCGTCACCGATTCGGCGGCCT +TCTGAATTTTTCCGGTGATCTTGGTCTGGGCGGTTTCGCCCTTTTTGGTTGCGGTCTTTG +CCATGGTACGGTCCTCCTTGGTTAAATTTGATTTCACATCCTGTTCATATCCGGGTCATC +AGAAAAATGGTATATATGGAAAACGCAAAACTGCGATCAGAAAGAATCGGCAACCGGTTA +TATTGCATTGCGTCATGACTGAAGAATATGACGCAATGCGTCATATGTCAAGAAAAAAAA +TCTCCGCGCAGCTGAAATTTGAAACGAAATCCGGATAACGCATTGCGTTATCTACTTGTG +ACGCATTGCGTTATTTTTGTCAAGCTTTTTTCCCCTCAAAATCCGTCCGATTCCCTTAAA +CCCGGGATGAGGCGAATACCCCATGCCCCTCATCCCGCACAATCGATCTGCACGATATGA +TTTTCCATGTTGCGATTTTATCAAACCGGATCTATTTTAGTTGTAAAGCAGGAAGCGAAA +CCAGATTGAAGACAAAAGAGGGGACCAAACCCCTCCTCAAGCGGAGCAGATAATGGGCAA +AACCATTCGACGGGCGCTTGTGCTGTCCGGTGGCGGAGCGCGGGGCGCCTTTGAAGTCGG +TGTGATGCGGTACCTGAATGAAGTGAACTGGCAGCCGGATCTGATCTGCGGCACGTCGAT +CGGCGCGATAAACGGCGCGGCCTTCGGTTCCGGCATGTCGGTGGATGAGCTGGCTCACCT +CTGGAAAACATACCACCGCAAACAGATGTATAAGATCACCTTTCCGGCATTTTTCCGCAC +CCTGTTGAGCGGCCGCAAATTCTCCCCTCTATCGGATAACCGGCCGACACGGTCCCTGCT +GGAAAAGACCATCGACATCGACGCCTTGAGGAACAGCACCACTGAAATTATCATTTCCGT +CCTGAATATGCGGACATCACAGGTTCGTTATTTCACCCACAAAGCGATCGGTATCGAGCA +TCTCATGGCGGCCGGCGGGATTCCCATGATGTTCCCGTGGCAGTACATAGACGGGGATCC +TTACTGGGATGCCGGCGTCATGGTCAATACGCCGATCATGCCCGCGTTTGAACGGGGAGC +GACGGAAATCATCGTGGTGCTGCTGTCGCCCCTCGGCGCCATTCCCCAGCGTCTGCCCAG +CACCCATCGGGAAGTGAGCGAGCTGGTGTTCGAACAGTTTTTGATCGGTTCGTATACCGC +CTGCCTGCCCAATGCCGGATGGCGGACGAATCCGGAGGCGGACGTCTACGATACGCCCCT +GCCGGACTCCCCCCAGCTTCAGCTTTCGATGAAAGGCGTCCGGATGGCGACCGTATATCC +GACCCGGATGCTTGGATTTCGCTCTTTGCTTGACTTTTCTCCCCGGCAGGCCAAAACCCT +GCTCCGGGATGGTTATGTCAATGCGCGCATGCAATTGAAGTCTTTTTTTAAATAAGACGG +GATATTGCAGAAACGAGCGCGACCGGATTGACATCAAAGTGGCATTAAAATCCGAAAATC +ATCAGCAAAAGGAGAAAAAAATGCCGACAGCAACGATCCGACAGCAACTGATCGAACTTT +TGAGCGAAAACAAATATGATGCCCGGGACCTGTCCCAGCGTCTGGGCGTCCGTGAAACCG +TCGTATACGACAGCATCCCCCATATCACCCGGTCCGTGACATCCATGGGCAAAAAACTGA +AAATCGTTCCATCACGCTGCACGTCGTGCGGATATACGTTCAAGGACCGCAAACGCGCGG +CAAAACCCAGCCGATGCCCGACCTGCAAGAGTGAGCGGATCGCAAAGCCGAAGTTTTATA +TCGTCTGACCGGAAAAGTCCCGTCAGCCGGTAACGACCGCTTCCAGTTCGGCAAGGGAGA +TGGAATCGACAAATGCGGCCCCGATGGCGGAAAGCAGGACGAAATGGATCTTTTCCCCCT +GGCGCTTTTTATCCCGGGCCATGGCGTCGATGATGCGCTCCGGATCGATGGGGAGCGCGG +TCGGCAGGTCCAGGTGCGTGAGCAGATCCTTTAACCGCCGGATGTCCGGGCGGGGGAGAT +GCCCCCGGTTTGCGGACAACTCCGCCGCCATGACCATTCCGGCGCTCACCGCCTCGCCGT +GCGGCACACCGAGGGTTTTCTCGATGGCATGGCCGAGGGTGTGCCCGAAATTCAGCTTTC +GGCGCTCTCCCCGCTCCTTTTCGTCGGCGTTGACCACCCCGGCCTTGATCACGACCGATT +TGCGGATGATCCGCTCCAGCGTTTCCGGGTCCCGGGCGCACGCCTTTTCGTGGCTTTCTT +CCAGATCGGCAAAGTAATCTTTGTCGGAAATGGCGGCGTGTTTGACGATTTCGGCAAAAC +CGCAGGCCAGCTCCCTGGGCGGAAGCGTACCGAGCACATACGGATCGCAGATGACGAACT +CGGGCTGGTTGAAGACTCCGACCATGTTCTTGTAGCCCATGAAATTCACGCCGTTTTTCC +CGCCCACACTGGCGTCCACCTGGGCCAGGAGCGTGGTGGCCACATAGCCGAACCGCACCC +CCCGCATGTAGGTCGAGGCGATAAAGCCGGTGATGTCGCAGACGATGCCGCCGCCGATGC +CGACGATAAACACCGACCGGTCCGCAGCCATTGAAACGAGCCGCTCATAAATCATCTTCG +CGGTATCGAGCGTCTTGATCGATTCGCCGCACCCGATGGTAATGACCTCGACGTCGGGAA +ACATTTTGCCGTAATGGCCGGCCACATTCGTGTCCGTGATCACCACGATCCGGCGATCCG +GAAGGTAGTTAGACAGGTTTTCCAGTCGCTCGCCGATGACGATTTTCGATGCACCGGTCT +GGCCGCTGATTGAAACGGTTTTCACAGGTATTCTCTCCAATAAGGAATATCAACGGGTTT +CATCGGTTTCGCCGAGTATCCGGAAAATGGACCGGACCTGGCGGCACAACGTTTCGAACT +GATCGGGGTACAACGACTGCGATCCGTCGCACTGGGCGGTATCCGGGGCATTATGGACTT +CGATCATCAATCCGTGGGCCCGTGCCGCCGCCGCCGCCCGGCTCAGGGGAATCACCTGGT +CCCGGAATCCGGTGGCATGACTGGGATCGACGATGATCGGCAGATGGCTTTCCCGCTGCA +CCACCGGAATGGCCGACAGGTCCAGCGTATTGCGACTGTGCCGGACAAACGTGCGGACCC +CCCGTTCACACAGGATGATGTTCGGATTCCCCTGCGACAATACATATTCCGCCGCCATCA +GCCATTCGTCGATGGTGGCGGACATCCCGCGCTTTAAAAGAATCGGCTTTTTCGATTCGC +CCGCCCGCCTGAGCAGGCTGAAATTCTGCATGTTGCGGGTACCGATCTGAACGATATCGG +CATACGCTTCGACCATGTCGAAATATTCCAGATCCATTACCTCGGTCACGATCGGCATCC +CGAACTGCTCCCGGGCCCTGGCAAGGATCTTGAGCCCTTCCTCGCCGAGTCCCTGGAACG +CATAGGGAGAGGTCCGGGGTTTGAAGGCCCCGCCCCGGAAAATGTCGGCGCCCGCTTTCT +TGACCCGTTCGGCGGTGGCCATGACCTGCGCTTCGCTTTCCACCGCGCACGGCCCGCCGA +TGATGACCAGATGACCGTTTCCGATTTCGACGCCACCGACCCGGATAATGGTGTCATGCG +GCTGAATCTCCCGGCTGACGAGTTTATAGGGACGTGTCACCGGAATCGTCTCCTTGACAC +CGGGCATATCCTGGAACCAGGCCGCATCGATCGCCGTGCGATTGTTTAAAATTCCGATCG +ATACCCGGTCCCCGCCCGGTATGGAGCGGGCCGTATATCCCCTGGCCTCGATCGCCCGGA +TCACAGCATCAATCTGTTCCCGGCTGGCATCCTGACGCATCACGATGAGCATGGTCTCTC +CTCCTGCAAAATCATTGTCGAATGGGTCATCCTTTAAAATAAAAAACCGTGGGGTCCTCG +CGGGGCCCCACGGTCTCAGATACTTTTCGATACCTCAAAAACGGATCAATCCGGTATCGG +CCGAAGGGAAAACCCCGCTGATACGGGCTTTCGCCATCCGCGCCAGAATCCGTAAATCAA +TACTGCAAATCCATATTGTGGCATTTTAAAAACCGCCCGATTGAACCGGTTTGAATCATT +TGGCAATTTTACATCAATTTTTATTTCAATAGGCCTTTATACGCGGCATCGTCAAGAAAA +ATATTGTTTACTCTGGCGATTCATGGCCGACACATCAGAAAAATATGCGACAAGGTACGA +AAATGATTCTGATTGACATTTTGCGGCGAAACTGGTATGACCTCTTACCATTGAATGAGA +TCGGCAATATCTGCGCGGTAAAATTTGGCAACAAGGCGGCGGATCCATGACGAATGCGAC +AAACGACCAGCCGTTTCGACCGGCCCGGTTCACCGAACAGCGGCTGATCACGGCCATTCT +GGACGGCACCTGCCCGCCGGGGTCCGTGCTGCCGGCTGAACGCCGGCTCGCCGAACAGTT +CGGGGTCACCCGCCCCACTATCCGGGAAACCCTGCAGCGGCTTGCGGCCGAAGGGTGGAT +CACGATCCGCCACGGGAAACCGACCCGGGTCAACGACTTCTGGGAAACCGGCGGGTGTTC +CTAGGCTGTTTCCTGGTGGGATCCTCTAGAGTCGACCTGCAGGCATGCAAGCTTGAGTAT +TCTATAGTCTC diff -r 000000000000 -r 68a3648c7d91 filter_Fb/filter.fb.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_Fb/filter.fb.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,22 @@ +#!/usr/bin/perl -w +# +$f=shift; +$out=shift; +open(IN,$f); +open(OUT,">$out"); + while() + { + if ($_=~/^#/) + { + print OUT; + next; + }else{ + $vl=(split())[5]; + $gt=(split())[-1]; + $gt=(split(/\:/,$gt))[0]; + next unless $gt eq "1/1"; + print OUT if $vl>20; + } + } + close(OUT); + diff -r 000000000000 -r 68a3648c7d91 filter_Fb/filter.fb.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_Fb/filter.fb.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,13 @@ + + filter low quality variants from vcf file" + /home/inmare/galaxy/tools/filter_Fb/filter.fb.pl $f1 $o1 + + + + + + + + + + diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/4mers.list --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/4mers.list Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,256 @@ +AAAA +AAAC +AAAG +AAAT +AACA +AACC +AACG +AACT +AAGA +AAGC +AAGG +AAGT +AATA +AATC +AATG +AATT +ACAA +ACAC +ACAG +ACAT +ACCA +ACCC +ACCG +ACCT +ACGA +ACGC +ACGG +ACGT +ACTA +ACTC +ACTG +ACTT +AGAA +AGAC +AGAG +AGAT +AGCA +AGCC +AGCG +AGCT +AGGA +AGGC +AGGG +AGGT +AGTA +AGTC +AGTG +AGTT +ATAA +ATAC +ATAG +ATAT +ATCA +ATCC +ATCG +ATCT +ATGA +ATGC +ATGG +ATGT +ATTA +ATTC +ATTG +ATTT +CAAA +CAAC +CAAG +CAAT +CACA +CACC +CACG +CACT +CAGA +CAGC +CAGG +CAGT +CATA +CATC +CATG +CATT +CCAA +CCAC +CCAG +CCAT +CCCA +CCCC +CCCG +CCCT +CCGA +CCGC +CCGG +CCGT +CCTA +CCTC +CCTG +CCTT +CGAA +CGAC +CGAG +CGAT +CGCA +CGCC +CGCG +CGCT +CGGA +CGGC +CGGG +CGGT +CGTA +CGTC +CGTG +CGTT +CTAA +CTAC +CTAG +CTAT +CTCA +CTCC +CTCG +CTCT +CTGA +CTGC +CTGG +CTGT +CTTA +CTTC +CTTG +CTTT +GAAA +GAAC +GAAG +GAAT +GACA +GACC +GACG +GACT +GAGA +GAGC +GAGG +GAGT +GATA +GATC +GATG +GATT +GCAA +GCAC +GCAG +GCAT +GCCA +GCCC +GCCG +GCCT +GCGA +GCGC +GCGG +GCGT +GCTA +GCTC +GCTG +GCTT +GGAA +GGAC +GGAG +GGAT +GGCA +GGCC +GGCG +GGCT +GGGA +GGGC +GGGG +GGGT +GGTA +GGTC +GGTG +GGTT +GTAA +GTAC +GTAG +GTAT +GTCA +GTCC +GTCG +GTCT +GTGA +GTGC +GTGG +GTGT +GTTA +GTTC +GTTG +GTTT +TAAA +TAAC +TAAG +TAAT +TACA +TACC +TACG +TACT +TAGA +TAGC +TAGG +TAGT +TATA +TATC +TATG +TATT +TCAA +TCAC +TCAG +TCAT +TCCA +TCCC +TCCG +TCCT +TCGA +TCGC +TCGG +TCGT +TCTA +TCTC +TCTG +TCTT +TGAA +TGAC +TGAG +TGAT +TGCA +TGCC +TGCG +TGCT +TGGA +TGGC +TGGG +TGGT +TGTA +TGTC +TGTG +TGTT +TTAA +TTAC +TTAG +TTAT +TTCA +TTCC +TTCG +TTCT +TTGA +TTGC +TTGG +TTGT +TTTA +TTTC +TTTG +TTTT diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/clusterF.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/clusterF.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,19 @@ + + /home/inmare/galaxy/tools/fosm_cluster $f1 $l $o1 $o2 + k-means clustering of assembled fosmids. + The tool was designed to tentatively assign contigs from incomplete fosmid assemblies to clusters, ideally corresponding to single fosmids. Clustering is performed based on tetra-nucleotide frequencies of the contigs and coverage. The current version is only compatible with SPAdes output as coverage is recovered from the fasta headers. Future version migth require a different set of input files. Full details are in Chiara et al. #paper id. Clustering of contigs is performed by a custom script based on the R implementation of the K-means algorithm, using 1500 starting positions for the centroids. The clustering is performed on metrics based on coverage, GC composition and tetra-nucleotide composition of each contig, which are computed directly from the fasta file. The user must input the desired number of clusters, contigs are partitioned accordingly." + + + + + + + + + + + + + + + diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/compute.stats.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/compute.stats.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,81 @@ +#!/usr/bin/perl -w +$file=shift; +$ncluster=shift; +$ofile=shift; +$fasfile=shift; +$flist="4mers.list"; +open(IN,$flist); +while() +{ + chomp; + push (@let,$_); +} +close(IN); +open(OUT,">$file.stats"); +open(IN,$file); +%c=(); +$name=(split(/\./,$file))[0]; +while() +{ + chomp; + if ($_=~/^>(.*)/) + { + $id=$1; + $M{$id}=$name; + }else{ + $c{$id}.=$_; + } +} +close(IN); +foreach $s (keys %c) +{ + ($len,$cov)=(split(/\_/,$s))[3,5]; + $seq=$c{$s}; + $at=0; + $gc=0; + $le=length($seq); + $tt=0; + %DD=(); + @seq=(split('',$c{$s})); + + foreach $l (@seq) + { + if ($l eq "A" || $l eq "T") + { + $at++; + }else{ + $gc++ + } + } + for ($i=0;$i<=length($seq)-4;$i++) + { + $subs=substr($seq,$i,4); + $rc=reverse($subs); + $rc=~tr/ACGT/TGCA/; + $tt+=2; + $DD{$subs}++; + $DD{$rc}++; + } + $gc=$gc/$le; + next unless $len > 1000; + print OUT "$s\t$gc\t$cov\t"; + foreach $L (sort @let) + { + $val=$DD{$L} ? $DD{$L}/$tt : 0; + print OUT "$val\t"; + } + print OUT "$M{$s}\n"; +} + +system("./kmeans.R $file.stats $ncluster $ofile ")==0||die("no kmeans"); + +open(OF,$ofile); +$l=; +open(FAS,">$fasfile"); +while() +{ + ($id,$cluster)=(split())[0,1]; + $NC{$cluster}++; + $id=~s/\"//g; + print FAS ">$cluster\_$NC{$cluster}\#$id\n$c{$id}\n"; +} diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/kmeans.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/kmeans.R Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,11 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE); +data<-read.table(args[1],sep="\t",row.names=1); +data<-data[,1:258]; +data[,2]<-data[,2]/sum(data[,2]) +file<-(args[3]) +num=as.integer(args[2]) +K<-kmeans(data,num,nstart=1500) +clust=K$cluster + +write.table(clust,file); diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/res.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/res.fasta Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,38 @@ +>9_1#NODE_2_length_40000_cov_63.1617_ID_3 +CCAGCGGCCAGGTCGATCGACAGCACGCACTGCCCGGTGTAGGGCAACAGGCGCTCGCGGTCGTCCAGGCTGCCCGCGCAGGGCTTGACCACCATGACATCGTTGGCACCGGTTTCCAGCAGATGGTCGATCACGCCGAGCAACTGCCCGCCCTGGTCGATCACCTTCAGGCCTTCCAGCTGGTGCCAGTAGTACTCACCTTCCTCGAGAGAGGGCAACTCGCTACGCGGGATGCAGATCTCGTAACCGGTGAAGGTGCGGGCCTCTTCGCGATCGTCGAGCCCCTTGAGCTTGGCGGCCAGGACCTTGCCATGCAGGCGCCCCCTGACCAGCTCGGCCTGCCGAATCTCGCCGTCGCGCCGGAGCGTCCAGCGGCGATAGTCCAGCAGGTTGTCCAACGGGTCGGTAAAGGAATACACCTTCACCTCACCGCGGATGCCGTACACCGAAACGATCTTGCCGATCACGACCAGGTCGTCGGCGGGTGTCGGCATTTCACTCATGACGGCTTAGGCGTTTGCCTTGGCAGCGTCCTTGAGCAGCTGAGCAACGCGCTCAGACGGCTGTGCACCCTGGCCGAGCCAGTAGGTAGCACGCTCCTGGTCGACGGACAGACGCACTTCGCCACCAGTCGCAACCGGGTTGAAGAAACCGATGCGCTCGACGAAGCGACCATCGCGCGCATTGCGGCTGTTGGTCACGGTCAGGTGGTAGAAGGGGCGCTTTTTGGAGCCGCCACGAGCAAGACGGATGGTTACCATGTGAACTTCGTTCCTGTAGTCGGTGCTTGCAAAATGAATGCACGCTGGGCCCACGGCCCGAAAGGCCGCATATTCTAAGGATTATCGGGGAATTTGCAAATCTCTTTTTCGGCCACCCATCGGCCGGCGCGGAGAGCGCGGGAAGCACACGGTCCGCCCGCCGGCCGACGGCGCGAAACCGCCGGCGTGGCCAGCGGCCGCGCGTCCGATCAAAGCTTGGGCATGCCCCCCGGGAACATACTGCCCATGCCTCGCATCATCTTGGCCATGCCGCCCTTGGCGGTGACCTTCTTCATCATCTTCTGCATCTGCTTGTGCTGCTTGATCAGCCGGCCGACGTCCTGCACCTGGGTACCGGAGCCAAGGGCGATGCGGCGCTTGCGCGAGCCGCTGATCATTTCCGGATCGCGCCGTTCGCCGGGCGTCATCGAGTTGATGATCGCCTCCATCTGCTTGAACTGTTTCTCCGCAGCACCCTGGGCATTGCCCATCTGCGCCAGGTTGACCCCGCCGAGCATCGGCAGCTTGTCCATGAGCCCGCCGAGGCCGCCCATGTTCTTCATCTGTTGCAACTGGTCGCGGAAGTCTTCCAGGTCGAAGCCCTTGCCCTTCTTGATCTTCTTCGCCAGCTTCTCGGCCTTGTCGCGGTCGAGGTTCTGCTCGGCCTGTTCGATCAGGCTGAGCACGTCGCCCATGCCGAGGATGCGCGAGGCCACGCGGTCGGGATGGAACGGATCGAGCGCTTCGCTCTTTTCGCCCATGCCGAGGAACTTGATCGGCTTGCCGGTGATCGCCCGCACCGAGAGCGCGGCACCACCACGCGCGTCGCCGTCGACCTTGGTCAGGACCACGCCGGTCAGCGGCAAGGCGTCATTGAAGGCCTTGGCGGTGTTGGCGGCGTCCTGGCCGGTCATGGCATCGACCACGAACAGGGTTTCCGCCGGCTTGATCGCCGCGTGCACCTGCTTGATCTCGTCCATCATGTCGGCATCGATGTGCAGGCGGCCTGCGGTATCGACGATCACCACGTCGATGAACTTCAGTCTCGCCTCGCGGATCGCCGCCTCGGCGATGGCCACCGGCTTCTGGCTGACGTCGGAAGGGAAGAAGGTTACGCCAACCTCGCCCGCCAGGGTTTCCAACTGCTTGATCGCGGCGGGTCGGTAGACGTCGGCGGAAACCACCATCACCGACTTCTTCTTGCGTTCCTTAAGGAAGCGCGCCAGCTTGCCCGCGGTGGTGGTCTTGCCCGCGCCCTGCAGGCCGGCCATCAGGATCACCGCCGGCGGCGCGACGCTCAGCGCCAGGTCCTCGTTGGCCGCCCCCATCAGCTCCTCGAGCTCGGCGCGGACGATCTTCACGAACGCCTGTCCCGGGGTCAGGCTCTTCGAGACCTCGGTACCGACGGCGCGCTCCTTGACCTTGTTGACGAAGTCCTTGACCACCGGCAGGGCCACGTCGGCCTCGAGCAGGGCCATGCGCACTTCGCGCAGAGTGTCCTTGATGTTGTCCTCGGTCAGCTTGGCCTTGCCGGTGACATGGCGAAGCGTCTGCGAGAGGCGGTCTGTAAGGTTTTCGAACATGCGCGATCCTTCCACGGGGGTTGCGGCAAGCGGCGGATTATAACCAAGAGCGCGTCGCGGGACACGCGATGAAAGACGGAGTGTGTGTCCGGCGACGACCGGAGGCCAGCCTCAAGTCCTTGATTTGCCGATGCGTTCCGTCTTTGCCGAAGCGACCGCCGGACCATGAGGTCGCGGCGCCCGCGGCAGGCGGTCCGCCAACCCGGCCACAGGTACGAGAGAACTGTTCGCAACGGCTTTCTCGCGACGCTCGATCTATGCCAAACTCCCATCCTTTCGGGCCCGCGATTACAAGGACTTATGCATCCCCTGCTGCCCAGCCTCATCGCTGCTGTTCTCTATCTCGGCACCGCCGCCTACCAGGGTGCCTGTGTGTCCAAGCGCACCGCGCCGGGCAAACCCCTGCTGCTCCTGTTCGGACTCCTGGCACTGGTCGCCCACGCCTTCAGCCTCTACCAGCAATTGCTGACCCCGGCCGGCCTGGTGCTGGACTTCTTCAACGCCGCCAGCCTGATCGCCGCCGCGGTAATTCTCCTGACCCTGCTGGCGACCTTGCGGATACCGGTGGAGAACCTGCTCCTCCTTCTATTCCCGCTGGGTGCCCTCACCACCCTGCTGGCGGTACTCATCCCCCACGGCACCGTCGAGCCGATCAACGAACAGCCAGGCATTCTGGCCCACATCCTGCTCTCGATCCTGGCCTACGGCCTGCTCACCATCGCGGTGTTCCAGGCGCTCTTGCTGCTGCTCCAGGACTACCGCCTCAAGCACAAGCATCCGTCCGGGCTGATCCGCAACTTCCCGCCCCTGCAAACCATGGAAAGCCTGCTGTTCGGCTTCCTCTGGGGCGGCTGGTCGCTGCTTTCGCTGTCGCTGCTGTCCGGATGGCTGTTCGTCGACAACCTGTTCGCCCAGCACCTGGCGCACAAGACCATCCTGTCCTGCTTCGCCTGGGTGGTCTTCGCCGTGCTCCTCTGGGGTCGCCACCAGCTCGGCTGGCGGGGCCACAAGGCGATCCGCTGGACCCTGGCCGGTTTCTGCCTGTTGATGTTGGCGTACTTCGGCAGCAAGCTGGTGCGGGAATTCATCCTGCACATCTGATGGGCCTCCTTCATGGATGAGCTGCACCCCGGGTACCTGGTCGGCCTGCTGGTTCTCCTGGTCGCCTGCTCGGCGTTCTTCTCCTGCGTCGAGACCGCCCTGCTCAACCTCGACCGTTACCGCCTGCGCCTGCAGGCCAAGCAAGGCCTGCGCGGGGCCCGGCGCAGCAGTTGGCTGCTGCTGCACGACGACCGCCTGCGCGGCACCCTGTTGTTCGGCCGCACCCTGGTCAACGTCAGCGCCGCCGCCCTGGCCAGTTGGGCCGCGCTGCGCCACTGGGGCGTCATCGGCCTGGCCGTCGCCATCCCCGGCATGACCCTGCTCCTGCTGCTGTTCGGCGCCCTGCTGCCGCGCCGCTACGGCGCCCTTCGCTCGGAACGCGTCGCCCTGCCGCTCAGCCTGCCGCTCCTGATCCTGCAGCGCCTGTGCTGGCCCCTGCTGTGGCTGCTGACCCTGCTGAGCAACGCGCTGCTGCGCCTGCTCGGCGTCGCCGCGGCGGAACAGGACGACAGCGGACGCAGCCGCGACGAAGAGGCCCTGCATCCGGCCGACAATCCCCAGGCCAGCGAGGTCAACCGGCATGACATGCTGCTCGGCCTGCTGGACCTGGAGAAGGTCACGGTCAACGACCTGATGATCCCGCGCAACGAAATCGAAGGCATCGACCTCGACGACGAACTCGAGGTCATCGTCGAGCAGTTGCGCACCACCAGCCACACCCGCCTGCCGGTCTACCGCGACGACGTCAACCAGATCGAAGGGGTCGTGCACATGCGCCAGATCGCGCGCCTGCTGACCCAGGGCCGCCTGACCAAGGAGAATCTCCGCCAGGCCTGCATGGAGCCGTATTTCGTGCCGGAAAGCACGCCGCTGTCGACCCAGTTGGTGAACTTCCAGAAGGAGAAGCGCCGTATCGGCGTGGTGGTCGACGAATATGGCGAGGTGATCGGCATCGTCACGCTGGAGGACATCCTCGAAGAGATCGTCGGCGACTTCAACGACCTCGACAGCCTCGACAACCCGGATATCCAGGCCCAGGAGGACGGCAGCTTCGTCATCGACGGCAGCGCCAACCTGCGCGAGCTGAACAAGTCGCTTGGCTGGCAATTGCCCTGCGACGGCCCGAAGACCCTCAACGGCCTGGTCACCGAAGCCCTGGAGCAGATCCCCGATTGCGCGGTGTGCCTGCGCATCGGCCCCTACTGCCTGGAAATCCAGCAGTCGGCGGAAAACCGTGTGAAAAGCGTGCGCGCCTGGCATCCCCGCGCGCTGACCCCGCTGGTCGAAAGCGACGCCAGCGTCTGATCCGGGATTGCCGAACCGGCCCCGCGGCGCTCTATAATCGACCCCAGCTTATCCAGCCCTGCCGCCGGTTCCGCCGCTACCCGCAGCCCGAGCCCCGGCCAGTCACCCGCCAGTCCCGCCTGCCCGCCCGGCCCCGATGCCGACGGTACCAGCCGGCGCACGGCGCGCAGCCCCATCCTCCCTGCGCGACCGATCTCGTCCGCCCCACCCGGGCGCCGCGCCGCCGCATCGGCGCACTGGCCATAACCATCCGGCCGCACGGAAGCACTCCGTCGTGCCCCGACTGCCAGGGACCTTCCCACCATGACCGCCGCCACCCTCGCCGCCGACGCCGCTCCGGAACCGGCCAACTCGACCACCCGGGTCGCAGTCGCCAGCTTCATCGGCACCGCCATCGAGTTCTACGATTTCTACGTCTATGCCACTGCCGCCGCCCTGGTGATCGGTCCGGTATTCTTCCCGCAGACATCCGGCACCGCGCAGATGCTCAGCGCCTTCCTCACCTTCGGCATCGCCTTCCTCGCCCGTCCGCTGGGCTCGGCGCTGTTCGGCCACTTCGGCGACCGCATCGGGCGCAAGTCGACCCTGGTCGCCTCGCTGCTGCTGATGGGGGTGTCCACCACCCTGATCGGCGTCCTCCCCGGCTACGACAGCATCGGCTACTGGGCGCCGTTGCTGCTCTGCGTGCTGCGCTTCGGCCAGGGCCTCGGCCTCGGCGGCGAGTGGGGCGGCGCCGCGCTGCTGGCCACGGAGAACGCGCCGGCCGGCAAGCGCGCCTGGTTCGGCATGTTTCCCCAGCTTGGCCCGTCGATCGGCTTCCTCGCCGCCAACGGCCTGTTCCTGGCCCTGGCGATGCTGCTCAGCGAGGAGCAGTTCCGTGAGTGGGGCTGGCGGATCCCGTTCCTGCTCAGCGCGGCGCTGGTTGTGGTCGGCCTCTACGTACGCCTGAAGCTGGCGGAAACCCCGGTGTTCGCCAAGGCCATGGCCAAGCACGAGAGGGTCCGCCTGCCGATCGCCGAGCTGTTCGCCCAGCACTGGCGGCCGACCCTGCTCGGCGCCCTGGCGATGGTGGTGTGTTATGCGCTGTTCTATATCTCCACGGTGTTCTCGCTGAGCTACGGGGTGGCCAGCCTCGGCTTCAGCCGCGAGGAGTTCCTTGGTCTGCTGTGCCTCGCGGTGCTCTTCATGGCCGCCGCCACGCCGCTGTCGGCCTGGCTCAGCGACCGCTTCGGACGCAAGCCGGTACTGCTGCTCGGCAGCCTGGCGGCGATCGCCTCCGGCTTCGCCATGGAGCCGCTGCTCAGCCAGGGCTCGACGTTCAGCGTCGCCCTGTTCCTCTGCATCGAGCTGTTCCTGATGGGGGTCACCTTCGCCCCGATGGGCGCGCTGCTGCCGGAAATATTCCCCACCCACGTGCGCTACACCGGCGCGTCGGCGGCCTACAACCTGGGCGGCATTCTCGGCGCCTCGGTGGCGCCCTATATCGCCCAGAAGCTGGTCGGCATCGGCGGCCTGGGCTGGGTCGGCGGCTACGTCTCCGCAGCGGCGCTGCTCAGCCTGCTGGCGGTGCTGTGCCTGAAGGAAACCCGCGACAACGACCTCGGCGCAGTGTCCTGAGTCTCGCTCAGGCTACCGCGACGCCGCCCTTCTCCCTCGCCGCCAACCAGTCGGCGAGGGCCCCGGCCCAGACCTGGTAGCCCAGCTGCGACGGATGAAAGCCGTCGATGGCCAGGTAGCCCGGAAGCAGGTCCAGCGAGATCGGGCAGTAGCCGGCGCCGGTCGCGCCAGCGAGATCGCACAGCGCCTCGTCGAGCAGCCGCCCGCGCCACCCCAGCAGGGCCCGCAGCAACCCGGGCAAGGCGGAAAAATGCTGCAGCGGCGGCACCGCGGTGCAGGTCACCTGCGCGCCGGCCGCCTGGAAGCCGTCGATCAGGGCCGCGCAGTCGGCGCGCCAGCGTTCGAGGGAGCGCAGGCTGGTGGTGTCATTGACGCCGAAGACCAGGATCACCTGGTCGAAGAAACGTCCCGCCACCTGCGGTAGCAAGCGTTCGCGCGCCTCGCCGCTGGTGATGCCGTTCTCGCCCAGCGCCTGCCAGGCCACCGGGCGCCGCAGACGCTCGCCCAGCGCCGCGGCCAGGCGGCCGGCCAGGGCGTAGTCCAGGCAACTGGCGCCGACCCCGGCCACTGTCGACTCACCGAACAACAGCAAGCGCAGCGGCTCTTCGGCACCGGACCCACCGACCAGCCCTTCGCGAGGTCCCGCCGCGGGCGCCAGGCGCAAGGCTCGGCGGCGGGTGCGGATAGCCAGCGGCACCGCCAGCGGCAGCAACGGCAGAGCCGCCGCCCACCAGGCCAGCGTTGCCAGCCGGCGCATCTCAGAGTTCGACGCGAACCGCCTGGGCGGCCCGGGTCGCCTTGGCGCGGGCGGCGTCGATAGACTCGTCGCGCGCCAGGGCCACGCCCATCCGCCGCTGGCCATCCACTTCCGGCTTGCCGAACAGGCGTAGCGCCGTATCCGCCTCGCTCAGCGCGGCACCGAGGTTGGCGAAAGCGACCTGCCGGGACTTGCCTTCCACCAGGATCACCGCCGAGGCCGACGGGCCGAGCTGGCGGATCACCGGGATCGGCAGGCCGAGGATCGCCCGCGCGTGCAGCGCGAACTCGGAAAGATCCTGGGAAATCAGGGTCACCAGGCCGGTATCGTGCGGGCGCGGCGACACCTCGCTGAACCACACCTGGTCGCCCTTGACGAACAGTTCGACGCCGAACAGCCCGCGTCCGCCGAGGGCCTCGGTCACTGCCCGGGCGACCCGCTCGGACTCGGCCAGGGCCTGCGCGCTCATCGCCTGCGGCTGCCAGGACTCGTGGTAGTCGCCCTTGACCTGGCGGTGACCGATCGGCGCGCAGAAAGTGGTGCCGTCGACGTGGCGCACGGTGAGCAGGGTGATTTCGTAGTCGAAATCGATGAAGCCCTCGACGATCACCCGGCCCTTGCCGGCGCGCCCGCCTTCCTGGGCATAGTCCCAGGCCGCCTGCAGGTCGTCGGGGCCTTTCAGGACGCTCTGGCCCTTGCCCGACGAACTCATGATCGGCTTCACCACGCAGGGGTAGCCGACGCGCTCGACGCCACGGCGGTAGTCCTCGAAGGTATCGGCGAAGTGATAGGGCGAGGTCGGCAAGCCGAGCTCCTCGGCAGCCAGCCGGCGGATACCCTCGCGGTTCATGGTCAGTTGCGCGGCGCGGGCGGTGGGCACCACGGTGTAGCCTTCGGCCTCCAGCTCGACCAGGGTCGCGGTGGCGATGGCCTCGATCTCCGGCACGATGTAGTGCGGCTTCTCCTGCTCGATCACCGCGCGCAGGGCGGCGCCGTCGAGCATGCTGATCACGTGGCTGCGATGTGCCACCTGCATGGCCGGCGCGTTGCCATAGCGATCGACGGCGATCACTTCGCAACCCAGGCGTTGCAGCTCGATGGCGACTTCCTTGCCCAGCTCGCCGGAGCCACAGAGAAGGACGCGGGTCGCGCTCGGCGACAGGGGGGTACCGATACGGGTCATGGAAGAACCTCGGAAATGAAAGAAAACGGAATCAGCTTCAGGCGGAACCGCTGACCAGCCCTTGCGCCTTGGCGCGCTCGAAACAGCGACCGAGCACCTCGCGGCGCTCGTCGTTGCTCATCAGGCCCCAGCGGGTGATCTCGGCCGCCGTGCGCTGGCAGCCGATACAGATGTCCTGCTCGTCCAGCGCGCAGACATGCACGCAGGGCGAAGCCACCGGTCGCTCGTTCACGCCGGGTCCTCGACGAGGTCGCGGGCGTAACGGCGGGCATTGTGCACATAGTGGGCGGCGCTGGCCTCGAGCATTTTCTTCTGCGGCTCGCTCAGCTCGCGCACTACCTTGCCCGGCGAGCCCATCACCAGCGAGCCGTCGGGAATCTCCTTGCCCTCGGGAATCAGCGCGTTGGCGCCGATGATGCAGTACTTGCCGATCTTCGCCCCATTGAGGATCACCGCGTTGATCCCCACCAGGCTGTAGTCGCCCACGCTGCAACCATGCAGCATGGCGTTGTGGCCGACGGTGACGCCCTTGCCGAGGGTCAGCGGATAGCCCATGTCGGTGTGCATCACCGAGCCGTCCTGGACGTTGCTGTGCTCGCCGATATGGATCAGCTCGTTGTCGCCGCGCAGCACCGCGCCAAACCAGACGCTGGCGCCGGCATCCAGGCGAACCTTGCCGATCACCGCTGCGCTGGGCGCGATCCAGCTGTCGGGATGGGTTTCGACGCGGGCATCGCCCAGGCGGTATTTCATCGCGATCTCCTCATGGCCGGGGTCTGCGCCCCGTCGGTCAACTCAGTTTGATGAACGAAGCCGGCGGCGTGTGCAGGTCGATGGGGTCGTCGTACAGCAGGTTGACCAGCTCCACCACCATGATGGCCGTCAGGCCCCAGATCTTGAACTCGCCGAAGCGATAGCTGGGCACGTACCAGCTGCGGCCGAAGTAGTCGATACGGTGGGTGACCTCGCGCGGGTCGTCGCGAAAGAAACTCAGCGGCACGTTGAACACCGCGGCGATCTCGCCGTCGTTGGGCTGGTACTCGACGAAGTCGGGAATGAAGGCGACATAGGGAGTGACCTCGATGCCATGCCGCGAGACCAGGGTGCTCAGCGGCCCGACCACTTCCACCAGGCCCGGCGGCAAGGCGATCTCCTCCTCCGCCTCGCGCAGGGCGGTGCGCACCAGGTCGGCATCTTCCGGGTCGCGCCGCCCGCCGGGGAAGGCGACCTCGCCGCCATGGGTGGACAAACCGGCGGCGCGCAGGGTCAGCACCAGCTCCGGATCGTCGCTGCGGGTGATGGGCACCAGCACGGCCGCTTGGGGGAAGCGCTGGTCGGTATCCAGTTGGCGGGGGCGGTGCGCCTCGATGCGTTGGCGCAGCTCGTCGAGCGTGCAGTTCATACTGCCGGGGTTTCCGAGGTCTTGTCGTTTCCCCGAGATCATGGCACGAAAGCCCCGGCCAGCCCACCCCTCCCGCACGGGGCCTTGTCGCCTCCCGGCCAGCCGAGCAAGATAGGCCTTTCCCCGAAGAAGAACCTGGCATGAAATTCTGCAGCCTGTGCGGCGCCACGGTGGTCCAGCGCATCCCGGACGGCGACAACCGCCTGCGCTACGTCTGCGACGCCTGCCACACCGTGCACTACCAGAACCCGCGTATCGTCGCCGGCAGCCTGCCGGTGTGGGACGGCCAGGTCCTGCTCTGCCGTCGCGCCATTGCGCCGCGCCTGGGCTACTGGACGCTGCCGGCCGGCTTCATGGAGAACGGCGAGACCCTCGCCCAGGCGGCGGCCCGCGAGACAGAGGAAGAAGCCAACGCGCGGATCGGCGACCTGCAGCTCTATACCCTTTTCGACCTGCCGCACATCAGCCAGGTCTACCTGTTCTTCCGCGCCGAGCTGCTCGACCTGGATTTCTCCGCCGGCGATGAAAGCCTGGAGGTGCGGCTTTTCGACGAAGCGGAGATTCCCTGGTCGGAGCTGGCTTTTCCGACCATCGGCCGTACCTTAGAATGCTACTTCAGCGACCGCCGCGAAGGCTGCTTCCCGGTGCGCAACGAAGCCATTGCACCTATGCTGGCTTCCTATAAGAAAGACTGAACGCCAAACACTCGAACACCGCCGCCGTTGCGGCTCAAGGGAACATCGGGGATTTCACTTCCATGCGCTGGTTGCTTGCTTTGCTGTGCTTGACCTTCGCCAGTCTGACCCAGGCCAATGCCGCGCCGAGCCTGGAAGGCAAGGTGGACAAGGTCCTGGTGCTCAAGTCCGAGCGAAAACTGCTGCTCCTCAACAAAGGCAACGTCCTGAAAAGCTACCGTGTCTCGCTGGGCAAGCGCCCGACCGGCCCGAAGCTGGCCGAAGGCGACAACCGCACCCCGGAAGGCTTCTACTGGATCGACTGGCGCAAGACCAGCAACAACTACAACCTCTCCATGCACATTTCCTACCCCAACGCCCGCGACGTGGCCAAGGCCCGGGAAAAGGGCCTGCCGGCCGGTGGCATGATCATGATCCACGGCACCCCGCTGGATGACGAATACCCGGAGTGGTACTTCTCCACCCTGGACTGGACCAATGGCTGTATCGCCATGAACAACACCGACATGCGCGAAGTCTGGAGCGTGGTGAAAGACGGCACGCTGATCGAGATCCGCCCCTGACAGAACCAGGCTGATACCACTCGTCAGCCGTCAGCCACGCCAGGAACGCCGCCGCGAGATTTTCCGGCGGCGTTTTCTTTTATCCGTCAAACCCTTGCATCCCTTACTCAGGCCGAGTCAATCGACGGGACGGCTGGCTGACGCGTGCGCGACGCACAACTGGCATTGACGTGGTATTCAAGTGGTATTAGTTTTCCCAGCATTACCGGGCGACAACAACACAATCCGCATCGGAACCTCTCATGAAGACAGCCCACGACCTGCTCCTGGCCCTGCGTCCCGACGAGGCGCAACCCACCCCCCTCTATCTGCAGCTGGCGCGCAATCTCGAGAGCGCGATCCATGCCGGCCAGTGGAAAGCGGAGGAAGCGCTGCCCTCCGAACGCAACCTCAGCGAAACCCTGAACATTTCCCGCGTCACCGCGCGCAAGGCCCTCGAAGTACTCTTCGAGCAGGGCCTGATCCGCCGCAACCAGGGCTCCGGCACCTTCATTACGCCGCGTCTCGAACAACCGTTGTCGCGTCTCTCCAGCTTCAGCGAGATGCTCCGCCTGAAAGGCTTCACCCCCGGCTCCACCTGGCTGGAGCGCGGCATCGCCCTGCCCACCCACGACGAACTGATCCGCCTCGGCCTCTCGCCGACCGAGAAGGTCACGCGCATGAAGCGCCTGCGCAAGGCCGACGGCACAGTGATGGCGATCGAGAACAGCACCCTGCCGGCGCGCCTGCTGCCGGACCCGACAGCGGTCGGCGATTCCCTCTACGAATACCTGGACGGCATCGGTCGCCCAGTGGTCCGCGCCCTCCAGCACGTGCGCGCGATCAACGCGTCCGCGTCCGATGCCGCGCTGGTCGGCATCGCGCCGGGCACCGCGATGCTGCTGATGACCCGGATCGGCTACCTCGAGGACAACACCCCGATCGAGCTGACCGACACCTACTGCCGCAACGACTACTACGACTTCGTCGCCGAACTGCGACGCTGACCGACACGGAGCCGCCATGCTCGAAGGCAACATTCTCACCCCGCAAGGCTGGGTCCTCGGCCGCCTACACATCCAGGACGGGCGTATCCAGCGGATCGAAGGCGAGCCCTGCGACCCGGCCGGCAACGCCGATCCGTACCTGCTGCCAGGCTTCATCGACCTGCATGTGCACGGCGGCGGCGGCCGCGACATCATGGAAGGCGGCGACGCCTTCGCCACCATCGCCCGCACCCACCGGCGCTTCGGCACGACCTCGCTGCTGGCCACCACCATGACCGCACCGCGCGAGGAGATCGCCGACATCCTCAGCCAGCTCGGCGCTTACTGCCGCCGCTCGCTGGAGGGCGGTTCACGGATACTCGGCGTGCACCTGGAAGGCCCCTACATCAACTCCGGCAAGCTCGGCGCGCAGCCCAACTTCGCCCATGCCGCGGTGCTCGAGGAAGTCGAGGACTACCTGCGCCGCGCGCCGATCCGGGTGATCACCATCGCCCCCGAGATCGCCGGACACCGGCCGCTGATCCGCGCCCTCGCCGAGCGCGGCGTACGCCTGCAGATCGGCCATACCCTGGGCAGCTACGAGGACGGCGTGGCCGCACTGGAAGCCGGCGCCAGCAGCTTCACCCACCTCTACAACGCGATGACCGGCCTGCACCACCGCGAACCGGGTATCGTCGGCGCGGCCCTGGCCCACGCACGCTACGCCGAGCTGATCCCCGACCTGCTGCATGTCCATCCCGGCGCCATCAGGGTGGCCCTGCGTTCGATCCCGTGCCTGTACTGCGTGACCGACTCCACCGCCGCCGCCGGCATGCCCGACGGCCAGTACAAGCTCGGCAGCCACACGGTGACCAAGTGCCTGGGCGGCGTGCGCCTGCCCGACGGCACCCTGGCCGGCAGCACCCTGACCATGGACCAGGCGCTGCGCAACCTGGTGAAGATCGGCCTGCCCCTGGCCGAAGCCTCGCAACGCCTTTCGCAATTCCCCGCCGATTACCTCGGCCTCGCCGAGCGCGGCCGCCTGGCGCCCGCAGCCTGGGCCGATGTGGTGCGCCTGGATCGCTCCCTGGAACTCGACGCCGTGATGGTGGAAGGAGAACTCGAATGACTTCGCTGATGCTCGAGGAAGCGCTCTCCGCCGCCGCGGTGGCGAGCCGTCAACTGGCCGTGCTCGACGCTTGCCTGCCGGCGCTTGGCCAGCGCCTGCGGGAGGTCGATCCAAACCTGGCGCTGACCGTCGCGCGCGGCAGCTCCGACCATGCCGCCAGCTACTTCGCCTACCTGGCCATGCAGCACGCCGGCTTGCCGGTGGCCTCGCTGCCCATGTCGGTAGTGACCCTGAACCGCTCGCCGCTGCGTGTCGCCGGGCAGGCCGTCTTCGCTTTCTCGCAGTCAGGACAGAGCCCCGATCTCGTGGACAGCCTGCGCATCCTGCGCGAACGCGGCGCCCTGGGCATCGCCCTGGTGAATGCCGAAGATTCGCCGCTGGAAGCCGCCAGCGAATTCGTCGTGCCGCTCTGCGCCGGCACCGAGCGCAGCGTCGCAGCGACCAAGAGCTTCATCGCCACGCTCAGCGCCAGCGCCCGCCTGCTCGCCCACTGGCAGCGCGACAATGCCCTGCTGGCCGCCGGCCAGGGCCTGGCGGCCGGGCTGGAGCAGGCCGCCAGGCTCGACTGGTCGCCGGCCATCGAGGCCCTGCGCGACTGCCAGCGGCTGATGGTGATCGGTCGCGGCGCCGGCTACGCCATCGCCCAGGAGGCCGCGCTGAAATTCAAGGAAACCTCGGCGATCCAGGCCGAGGCCTTCAGCAGCGCCGAGGTCCGCCATGGACCGATGGCGCTGGTCGAGGAGCGCTATCCGCTGCTGGTGTTCGCTCCCCGCGGCCCGGAGCAGGAAGGCCTGCTGGCGCTCGCCGAGGATATGCGCCAACGCGGTGCGCAGGTATTGCTCGCTGCCCCGGACGACATCGCCGAACGCGACCTGCCGCTGCAGCGCGCCGCGCATCCGGCGCTCGATCCGATCCTGGCGATCCAGAGCTTCTACGTCATGGCCGCCGGCCTGGCCGAAGCCCGCGGCATGGACCCGGACCAGCCACGGCACCTGAGCAAAGTCACCCGAACCCACTGAGCCTGAGCGCGCGCCCATGAACAACAAGAATCTCGCCCTCAAAGCGCCCCTGAGCGGCCCCGTGATGCCGCTCAACCGAGTACCCGACCCGGTGTTTTCCAGCGGCACCCTGGGCGAAGGCATCGCCATCGACCCACTCAACGACTGCCTGCACGCGCCCTGCGCCGGGCTGGTCAGCCACCTGGCGCGAACCCGCCACGCGCTCAGCCTGCGCGCCGACAACGGTGCCGAGCTGCTGCTGCACGTCGGCCTCGATACAGTACAGCTGCAAGGCGAAGGCTTCGAGGCGCTGGTCGAGGAAGGTGCGCGGGTGATCGAGGGCCAGCCGCTGCTGCGCTTCGACCTGGACCGCGTCGCCCGCGGCAGCCGCAGTCTGATCACGGTGATGATCCTGACCAACGGCGACGGCTTCCAGGTACGCCCGCTGACCACCAACCCGGTGGAGGTCGGCGCGCCGCTCCTGCAACTGAGTCCGGAGAAGGCCGAGCAACGTCCGGCCAATCCCGCGCCTGGCGAAGGCTCCGCGCAGCGCCAGGTCCGCGGGCGGGCGCGGGTCGCCCATCATGGCGGTCTGCACGCACGCCCGGCAGCGCTGCTACGGAAGACCGCGCAGGGCTTTTCCAGCCAGGCCGAACTGCACTTCGCCGGGCAGGTGGCCAGCGTCGACAGCCTGGTCGGCATCATGGGCCTGGGGGTCGCCGAACAGGACGAGGTAGAGGTGATCTGTCGCGGCGAAGACAGCGAGGCGGCCCTCGACGCCCTGCTCGCGGCCCTCGCCAGCGCCACCGCGGGGGCTCCGAAGGAGGCCCCGCGCGCCATCGCCTCGGGCGAGCCGGCGCGACCTGCGGCTGTCGCCGGTACCCTGGCCGGCGTCTGCGCTTCGCCCGGCCTGGCCAGCGGTCCCCTGGCGCGGCTCGGCGCCATCAGCCTGCCGGCGGATGACGGCCGGCATCGTCCCGAGGAACAGCACCTCGCCCTGGACCAGGCCCTGCAACGGGTGCGCGACGACGTACAGGGGAGCCTGCAGCAGGCCAGGCTCGGCGGCGACGAGAACGAAGCGGCGATCTTTTCCGCGCATCTCGCGCTGCTGGAAGACCCGGGTCTGCTGGACGCCGCCGACATGCTGATCGACCAGGGCGTCGGCGCCGCCCACGCCTGGCACCGGGCGATTCAGGCCCAGTGCGAGATTCTCCAGGCGCTGGGCAACCTGCTGCTGGCCGAGCGCGCCAACGACCTGCGCGACCTGGAAAAACGCGTGCTGCGGGTGCTGCTCGGCGACACCGCACCGTTGCGGGTGCCTGCTGGGGCCATCGTCGCCGCCCGGGAGATCACCCCCTCCGACCTCGCGCCGCTGGTGGATGCCGGCGCGGCTGGCCTGTGCATGGCCGAAGGCGGCGCCACCTCCCACGTGGCCATCCTCGCCCGTAGCAAGGGCCTGCCGTGCCTGGTGGCGCTCGGCGCCGGGCTGCTGGAGCTGGAGGAAGGCCGGCAGGTAGTACTGGACGCCGGCCAGGGCCGGCTGGAACTCAGCCCCGACGCTCGACGCCTGGAGCAGGTCGCCTTGCAAGTGGCGCAGCGCGAGGAACAACACCGCCGCCAGCAGGCCGATGCGCAGCGCGAGGCGCTCACCCGCGACGGCCGGCGCATCGAGATCGGCGCCAACGTCGCCTCGCCGCGCGAGGCCGCCGAAGCCTTCGCCAACGGCGCCGACGGGGTCGGCCTGCTACGCACCGAGTTTCTCTTCCTCGAGCGCCGCGCCGCGCCCGACGAAGAGGAGCAGCGCAACGCCTACCAGGAGGTCCTGGACGCCATGGGCCAGCGCAAGGTGATCATCCGCACCATCGATGTCGGCGGCGACAAGCACCTCGACTACCTGCCGCTGCCGGTGGAAGAAAACCCAGCACTGGGCCTGCGCGGCATCCGCCTCGGCCAGGCGCGCCCGGAGCTGCTCGACCAGCAGTTGCGCGCGCTGCTGAGGGTCGAACCACTGGAACGCTGCCGGATACTGCTGCCGATGGTCAGCGAGGTCGACGAACTGCGCGCCATCCGTCGCCGCCTCGGCGAGCTGGCCACGCAGTTGGGGATCGAGCGCCTGCCCGAACTCGGGGTGATGATCGAGGTGCCCTCCGCCGCCCTGCTCGCCGATCAACTGGCCGAACACGCCGACTTCCTCTCCATCGGCACCAACGACCTGTCGCAGTACGCCCTGGCCATGGACCGTTGCCACGCCGGCCTGGCCGACCGCATCGACGCCCTGCACCCGGCGCTGCTGCGGCTGATCGCCCAGACCTGCGCCGGAGCAGCCCGCCACGGCCGCTGGGTCGGCGTCTGCGGTGCGCTGGCCTCGGACCCGCTGGCGACACCGGTACTGGTCGGCCTCGGGGTCGAGGAATTGTCCGTCGGGCCGAACCTGGTCGGCGAGATCAAGACCCGGGTGCGCCAGCTCGACGCCGCCGAATGCCGCCGCCACGCCCAGGCGCTACTGGACCTGGGCAGCGCCCGGGCGGTGCGCGACGCCTGCCTGCAACACTGGCCGCTGGCCTGACAAGAACAATCGGAGACCACCGCCATGCCCCCGTTCCTGATCGAAAGCCTGCAACGCCTGGGCCGCGCCCTGATGCTGCCGATCGCGATCCTGCCGATCGCCGGCCTGCTGCTGCGCCTGGGCGACGTCGACCTGCTCGACATTCCGCTGGTCCACGACGCCGGCAAGGCGATCTTCGCCAACCTGGCGCTGATCTTCGCCATCGGCATCGCCGTCGGTTTCGCTCGCGACAACAATGGCACCGCCGGCCTCGCCGGCGCCATCGGCTACCTGGTGATGATCTCGGTGCTCAAGGTGATCGATCCGGGCATCGACATGGGCGTGCTCTCCGGCATCATCAGCGGCCTGGTGGCTGGCGCCCTGTATAACCGCTTCAAGGACGTGAAGCTGCCGGAATACCTGGCGTTCTTCGGCGGCCGCCGCTTCGTGCCGATCGCCACCGGGATCAGCGCGGTCTGCCTGGGCCTGCTGTTCGGGGTGATCTGGCCACCGCTGCAACAGGGCATCAACGGCCTCGGCCAACTCATGCTGGAAAGCGGCAGCTTCGGCGCCTTCGCCTTCGGCGTGCTGAACCGCCTGCTGATCGTCACCGGGTTGCACCACATCCTCAACAACCTGGTGTGGTTCGTCTTCGGTAGCTTCACCGACCCGGAAACCGGGCGCGTCGTCACCGGCGACCTGGCGCGCTATTTCGCCGGCGACCCGAAGGGCGGCCAGTTCATGGCCGGGATGTTCCCGGTGATGATGTTCGGCCTGCCCGCCGCCTGCCTGGCGATGTACCGCAACGCCCGTGCGGAGCGGCGCAAGCTGATCGGCGGGCTGCTCCTGTCGATGGCCTTGACCGCCTTCCTCACCGGCGTCACCGAGCCGGTGGAGTTCGCCTTCATGTTCCTCGCGCCGCTGCTCTACCTGCTGCATGCGCTGCTCACCGGCCTGTCCATGGCCCTCACCGACCTGCTCGACATCCGCCTCGGCTTCACCTTCTCCGGTGGCGCCATCGACCTCGCCCTGGGCTGGGGCCGCTCCACCCACGGCTGGATGCTCTGGCCGCTGGGCCTGCTCTATGCCGGCATCTATTACCTGGTGTTCGACTTCTGCATCCGCCGCTTCAACCTGAAGACCCCGGGCCGCGAAGACGACGCGAGCAGCGAATCGGGCGACAACGCCGAAGCCGAGCGCGCCCCGGCATTCATCCGCGCCTTGGGTGGCGCAGCCAACCTCGAAGTGGTGGACGCCTGCACCACGCGCCTGCGTCTGCGCCTGGTCGACCGCGACAAGGCCTCGGACGCCCAACTCAAGGCCCTCGGTGCGATGGCCGTGGTACGTCCGGGCAAGGCCGGCAGCCTGCAGGTGGTGGTCGGCCCACAGGCCGACAGCATCGCCGACGAGATCCGCCGCGCCCTGCCCTTCGATACGCAACCGGGCGAAGCCGTACCGCCGCTGGGCAGTCCGCACACAGCCGAAGAGGTGGTGGCGATGCAGGCCACCGTCGACGCCGCCGAAGCCCAGGCCTGGCTCGGCGCCCTCGGCGGCGCCGGCAACCTGCGCGAGGTACGGGACGTCGCACTGACCCGCCTGCGGGTCAGCGTGGCGGACGAACGCAAGCTGGCCACCGAGCAACTGCGTCGTCTCGGCGGACAGGGCGTCAGTTCCCTCGCCGGCGGCATCTGCCATATCCTGGTGGGCCCCCGCGCCGCGGCCCTGAGCCAGGCCCTGCAACCGCTGCTGCGGCGCTGACGGCAGGAGGGCTGGACCTCGGCCAATCGGCGGATAACCGCCAAGCGGTTATTCGCCCCACGCCACAGCTCCCCCGTTCGGGTTCGATAATCGCGAAGCGGTTGTTCGCGCCACGCCACAGCCCCGGGTCGGGCTCTCCCCGTGAGGCGGATAACGAAAAGGGCGGCCACTGGCCGCCCTTCTTGCTTGTCCGAGGAAAACTCAGAAGACCATGTCGGTCAGGCGCCAGACTTCGTAGGCCGGGGTCTCATAGGGATGCGCCGCCTTCAGCGCCTTGACGCTGGCGTGGATGAGCTCGTCGGCCACCACCAGCTCCACTTTCCACTCCGCCACGTGCTCGACCTGGCCGACCTGGCCCAGGTAGGGTTGGCTACCGTCCAGGGGCCGGAACTGGCCCTGGCCAAGGGACTGCCAGCAGCAACTGTCGTACGCCCCGATGCGCCCGCCACCCGCGGCGAACACCGCTTGCTTGACGACATCCAGATGGCTCTCAGGCACATAGAAACACAGCTTGTACATCGTTTTCTCCGCACGAATCGAATGCTGCACGAGGCGGTCGGTGGTTGTGCGACTGCCGACGCCAAATAATGGCACTTTGCCTTGACATGATGAATATTTGACGCTGCCTTTACGACAGATAAGCGCAACATTTGTTCGCTCGTCACAACACCTTCGTGCGGGCGTTCACAGATTTACGTTCAGCCGCGGAAACGCCGCCACCTGGCCTTCAGCCCGGAGGCGGAAGGCGCATGGCTGCCGTCGCAATACGGCAGGTCGGCGGAGCGCCCGCAGCGGCACAACAGCAACAGGCGCTCGCGGGTCGCGTGGAGCGTCAGGCCGTTGCGGCAGTCGGCGGGACAATCGGGAAGCGAAGCCGAAGCCCCGCAGGTACAGAGCCGCAAGGTGTCGCCGGGAGAGACCCGGCGAACCTCGGGAAGAAGGTCGGGCGACGAGTTGGAGTCCGCATCGCCCGCCATGCCGGATCAGTCCACCCAGACCCGGGCGTTGCGGAACATGCGCAGCCAGCCGCCGTCTTCCTGCCAGTCGTCCGGACGCCAGGAATTCTGCACGGCGCGGAACACCCGCTCGGGGTGCGGCATCATGATGGTGACCCGGCCGTCGCGGCTGCTCAGCCCGGTGATGCCGCGCGGCGACCCGTTGGGGTTGGCCGGGTAGGCTTCGGTGACCTTGCCGTGGTTGTCGACGAAACGCAGCGACACGCAGCCGGAAAGGTCGGCCTCGAGCAACGCTTCCTCCGACTCGAACTCCGCATGGCCTTCGCCATGGGCGATGGCGATCGGCAGGCGCGAACCGGCCATGCCCTGCAGGAAGATCGACGACGACTCCTGCACCTGGACCATCGCCACCCGCGCCTCGAACTGCTCGGAACGGTTGCGCACGAAGTGCGGCCAGAACTCGGTGCCGGGAATCAGCTCGTGCAGGTTGGACATCATCTGGCAGCCGTTGCAGACGCCGAGGGCGAAGCTGTCCTTGCGCGCGAAGAACGCCTGGAAGCCGTCGCGGGCGCGGGCGTTGAAGAGGATCGACTTGGCCCAGCCCTCGCCGGCGCCGAGCACGTCGCCGTAGGAGAAGCCACCGCAGGCCACCAGGCCCTTGAAGGCGTCCAGGTCGACCCGCCCGGCGAGGATGTCGCTCATGTGCACGTCGATCGCGGCGAAGCCGGCGCGGTCGAACGCCGCGGCCATCTCCACCTGGCCGTTGACGCCCTGCTCGCGGAGGATCGCCACTTTCGGCCGCACGCCCTTCTTGATGTAGGGCGCGGCGATGTCGTCGTTGACGTCGTAGCTGAGCTTGATCGACAGCCCGGGGTTGTCTTCGTCCAGCAGCGCGTCGAACTCCTGCTCGGCGCAGTCGGCGTTGTCGCGCAGACGCTGGATCTGGTAGCTGGTCTCGCTCCAGATGCGTTGCAGGATGCGGCGCTGGGCGCTGTAGACGGTCTCGCCGTTGTAGTTCAGGTTGATCTCGTAGCCGTTGACCGGCTGGCCGATCACCGCCACGCAGTCGTCGAGACCGGCGGCGCTGAACTGCGCGAGGACTTCCGGAGTGGCACCCTCGCGAACCTGGATCACCGCGCCCAGTTCCTCGCTGAACAACACGGCCGCCAGCTCTTCGCGGCTATCGGCCAGGGCATCGAGGTTCAGCTCGACGCCGCAGTGACCGGCGAAGGCCATCTCCAGCACGCTGGTGATCAGGCCGCCGTCGGAACGGTCGTGGTAGGCGAGGATATGGCCGTCGGCATTGAGCCCCTGGATCACCGCGAAGAAGGCTTTCAGGTCCTCGGCGTCGTCGACGTCCGGCACGGCGCGGCCGAGCTTGCCGTGGACCTGGGCCAGGATCGAGCCGCCGAGACGGTTCTTGCCGCGGCCGAGATCGATCAGGATCAGGTCGGTCTCGCCCTTGTCCAGGCGCAGTTGCGGGGTCAGGCTCTGGCGCACGTCGGCGACCGGGGCGAAGCCGGTGACGATCAGCGAGACCGGAGAAGTGACGCTCTTGTCCTCGCCATTGTCCTGCCAGCGGGTCTTCATGGACATCGAGTCCTTGCCCACCGGGATGGTGATGCCCAGTTCCGGGCACAGTTCCATGCCCACGGCCTTGACCGTGTCATACAGGCGCGCGTCCTCGCCGGGGTGGCCGGCGGCGGCCATCCAGTTGGCGGAAAGCTTGATGTCGGACAGCTTGCCGATACGCGCGGCAGCCAGGTTGGTGACCGTCTCGCCGATGGCCATGCGTCCGGAAGCCGGGGCGTCCAGCAGCGCCAGCGGGGTACGTTCGCCCATCGCCATGGCCTCGCCGGTGTAGACGTCGAAGCTGGTGGCGGTGACGGCGCAGTCGGCCACCGGCACCTGCCAGGGCCCGACCATCTGGTCGCGGGCCACCAGCCCGGTGATGGTGCGGTCGCCGATGGTGATCAGGAAGCTCTTGCTGGCCACGGCGGGATGGCGCAGGACGCGCTCGACGCTTTCCTGCAGCTCCAGCCCGGCGGCGTCGAAATCGTCGCCCAGCTCGGCCTCGCGGGTGACCGCGCGGTGCATGCGCGGCGCCTTGCCGAGCAGGACTTCCAGCGGCATGTCCACCGGCTTGTTGTCGAAATGGCTGTCGGCGACGGTCAGCTGGCGCTGCTCGATGGCCTCGCCGACCACCGCGAACGGGCAGCGCTCGCGCTCGCAGATGGCCTTGAAGGTCTCGAAGTCGGCGGCGTCCACCGACAGCACGTAGCGCTCCTGCGACTCGTTGCACCAGATTTCCAGCGGGCTCATGCCCGGCTCGTCGTTGGGCACCGCGCGCAGCTCGAAGCGACCGCCGCGGCCGCCGTCGTTGATCAGTTCCGGCAGGGCGTTGGACAGGCCGCCGGCACCGACGTCATGGATGAAGCTGATCGGGTTGCGCTCGCCGAGCTGCCAGCAGCGGTCGATCACTTCCTGGCAACGTCGCTCCATTTCCGGGTTGTCGCGCTGCACCGAGGCGAAGTCGAGGTCGGCGGAGCTGGCGCCGGTGGCCATCGAAGAGGCGGCGCCGCCGCCCAGGCCGATCAGCATGGCCGGGCCGCCAAGGACGATGAGCTTGGCGCCGACGCTGATCTCGCCCTTCTGCACGTGTTCGTCGCGAATGTTGCCCATGCCGCCGGCGAGCATGATCGGCTTGTGGTAGCCGCGCACTTCCTCGCCGTGAGGGGTAGCGATCTTCTGCTCGAAGGTACGGAAGTAGCCGGTCAGCGCCGGACGGCCGAACTCGTTGTTGAACGCGGCGCCGCCCAGCGGGCCCTCGATCATGATGTCCAGCGCGGTGACGATGCGCTCGGGCTTGCCGTAGGGCACTTCCCAGGGCTGTTCGAAACCGGGGATCTGCAGGTTGGACACGGTGAAGCCGGTCAGGCCGGCCTTCGGCTTGGCGCCGCGGCCGGTGGCGCCCTCGTCGCGGATCTCGCCGCCGGAACCGGTGGAGGCACCGGGGAACGGCGCGATGGCGGTCGGGTGGTTGTGGGTCTCCACCTTCATCAGGATCTGCACCGGCTCGCGGCTGGCGGCGTATTCGCGGGTCTGCGGATCGGGGAAGAAACGCCCGGCGACATGGCCGACGATGACCGCGGCGTTGTCCTTGTATGCGGACAGCACGCCTTCGCGGTTCATCTCGTAGGTGTTCTTGATCATGCCGAACAGGCTCTTGTCCTGGGCCTGCCCGTCGATGTCCCAACTGGCATTGAAGATCTTGTGCCGGCAATGCTCGGAGTTGGCCTGGGCGAACATCATCAGCTCGACGTCGTGCGGGTTGCGCCCCAGTTCGCCGAAGCTCTTCAGCAGGTAGTCGATCTCGTCCTCGGCCAGGGCCAGGCCCAACTCGACGTTGGCCTTCTCCAGCGCGGCGCGACCGCCGCCAAGCACGTCGACGGCGGTGAGCGGGCGGGGCTGCGCATGGCTGAACAGCTCGGCGGCGCCTTCCAGGCGGTCCAGCACCAGTTGGGTCATGCGGTCATGCAGGCGGGCGGCGACCTGCTGGGCGTCGCTCTCGGACAGTTCGCCCTGCACATAGTAGGCAATGCCGCGCTCCAGCCGGTCGATCTTCGCCAGGCCGCAGTTGCGGGCGATGTCGGAGGCCTTGCTCGACCACGGCGAGATGGTGCCGAAGCGCGGCACCACCAGGAACAGCCGTCCGCTGGGCTCCTGCACCGGCACGCTCGGGCCGTATTTCAGCAGCCGCGCCAGCACCTGCTCCTCGTCGGCGGTGAGCGCGCCGGTGACGTCGGCGAAATGCGCGAACTCAGCGTACAGCCCGGTAACGGCAGGTACGTGCTGGGTCAGTTGCTCGAGCAGTTTGCCGTGGCGGAAAGCGGAAAGGGCGGGAGCGCCGCGCAGGATCAGCATTGTCGGAACAGCCTCTGGAGACGGGAAGGTGTGGGGGCCGGCCGGGCCGGCCCGCAGAGGCCGTGCATTCTACCGTAAAGCACCGTCCGGCGGCACCCGCGCCGATGGCTATCGCATAGCCCTCCATCGGATCCGCGGCGGCCCGCTTCGAAGCACTTCGAGAACGGCGCCGCAGCGCTCTGCGACGGGGCAAGCGAAACGGCGGACGAACGGTATCCCGGCGATAGCAGAGAAGGTCCCGGCTGTCGAGATATGGCGCCGTCTCGCCTTTGCGTATACTGCGCCGATGTTTGCCCTGACCGCGTACCGCTTACGTTGCGCCGCCTGGCTGTTGGCGACCGGCATCTTTCTGCTGCTTGCGGGCTGTAGCGAGGCGAAAGCCCCTACCGCCCTGGAGCGCGTGCAGAAGGAGGGCGTACTGCGCGTGATCACCCGCAACAGCCCGGCCACCTACTTCCAGGACCGCAACGGCGAAACCGGCTTCGAATACGAACTGGCCAAGCGCTTCGCCGAGCGCCTCGGCGTCGAGCTGAAGATCGAGACCGCCGACAACCTCGACGACCTCTATGCCCAGCTTTCCCGCGAGGGCGGCCCGGCGCTCGCCGCGGCCGGCCTGACCCCGGGACGCGAAGACGACGCCAGCGTGCGCTACTCGCACACCTACCTCGACGTCACCCCGCAGATCATCTACCGCAACGGCCAGCAGCGCCCGACCCGCCCGGAAGACCTGGTCGGCAAGCGCATCATGGTGCTCAAGGGCAGCAGCCACGCGGAGCAGCTCGCCGAGCTGAAGAAGCAGTATCCCGAACTGAAGTACGAAGAATCCGATGCTGTCGAAGTGGTCGACCTGTTGCGCATGGTCGACGTCGGCGACATCGACCTGACCCTGGTCGACTCCAACGAACTGGCGATGAACCAGGTGTACTTCCCCAACGTCCGCGTCGCCTTCGACTTCGGCGAAGCCCGCGGGCTGGCCTGGGCCTTGCCGGGGGGCGACGACGACAGCCTGATGAACGAGGTCAACGCGTTCCTCGACCAGGCCAAGAAGGAAGGCCTGCTGCAACGCCTGAAGGACCGTTACTACGGGCATGTCGACGTACTCGGCTACGTCGGCGCCTACACCTTCGCCCAGCACCTGCAGCAACGCCTGCCGCGCTACGAAAGCCACTTCAAGCAGAGCGGCAAGCAGCTGGATACCGACTGGCGCCTGCTCGCCGCCATCGGCTACCAGGAATCGCTGTGGCAGCCCGGCGCCACCTCCAAGACCGGCGTGCGCGGCCTGATGATGCTGACCAACCGGACCGCCCAGGCGATGGGCGTGTCCAACCGGCTCGACCCGAAGCAGAGCATCCAGGGCGGCAGCAAGTATTTCGTGCAGATCCGCAGCGAACTGCCCGAGAGCATCAAGGAACCGGACCGCAGCTGGTTCGCCCTGGCCGCCTACAACATCGGCGGCGCGCACCTGGAAGACGCGCGCAAGATGGCCGAGAAGGAAGGCCTCAACCCGAACAAGTGGCTGGACGTGAAGAAGATGCTGCCGCGCCTGGCGCAGAAGCAGTGGTACGCCAAGACCCGCTACGGCTATGCGCGCGGCGGCGAGACCGTGCACTTCGTACAGAACGTGCGGCGCTACTACGACATCCTCACCTGGGTGACCCAGCCGCAGATGGAAGGCAGCCAGATCGCCGAGAGCGGGTTGCACCTGCCCGGCGTGAACAAGACGCGCCCGGAAGAAGACAGCGGCGACGAGAAACTCTAGCCGCCCCCTCCCCTCAGGATTCTCCGTCGCTTGCCAGCCAGGCGGCGTGCTCGCGCACATCGGTCGGCCAGCCGGCGATCCGCTCGGCGAAGCCCTCGCGATCCCCGGCGAACAGCGCACGCGTCGCCTCCTCGAAGCCCGGCAGGTCGCCGGCCATCGCATTCATGAAATGGTAGGCGCGCTCCTGCGCCTGCCGCGCACGCCCGGACTGGCTGCGGCGCGCCTCCTCGACCAGCTTGCGCAGCGCCACCGAGGCGCCCCCGGGCTGCGCGGCGAGCCATTCCCAGTGGCGCGGCAACAAGGTCACCTCGCGCGCCACCACGCCCAGCTTCGGCCGCCCGCGCCCGCGCGGCGCGGCCTCCTCCGCCGGTTGCCGCTCGCCTGCGACGACCAGGCGCGCCAGCTGGTCCTCGCTGGAACCACGGATATCGAGATCGACGGAGCGCCCAGTGGCATTGTCGAATACCAGCAGCGGTCCGGCTGCACCGCCGGCGACCGCTTGCTTCAAGGCCAGCGCGACCTCCGGCAGCGCCGCCGTCAGCAGCCGGCGCGTGCCGTCGAAGCAGGTGAAGGATTGGATGTATGGATCATTGGCCATGACAGGCTCCCATGGATGGAACGCCCGAATAATACCCGGATATAAATAGAGCGACAATATTATCCGGATAAAATAAATCCCATGAAAAAGGCCGCCCTATGGCGGCCTCTTCCGGCAGCGATGACCTACCAGCCGGCCGGCAGCAGGTTCATGGCGCTGCAAATGGCGTAGGCGGCGCTGGCCAGGTAGATCGTCACGATCACCGCCTGGATCGCCGGATGGGCGATCCAGCCGCATTTCCAGGCCGGCTCGAAGTCGCCGTTCCTACGCGCCGAGCGCAGCATCAGGATCGGCATGATCGAGAGAATCACCCCGCTGAAGGCCCCGGCGAAGTACAACGCGTTGACGAAGCCGACCAGCCCGCTGTAGGCCAGGACGAACGGCGGCAGCGCGACGATCGCCAGCACCAGCAGGCGGGTCTTCGGCCGGTTCTCCGGACCGAGCTTGTGGAACTTGTCGAAGATGTTGGTCAGGAAGCTACCGCCCAGGCCCCAGTAGGACGTCAGCATGGCGCACAGGGCGAAGGTGTTGGCGGTGAAGAAGGCCCATTCGCCGAGCGCCTTGCCCCAGGCCAGGGTGGCGACTTCGGACTGGTTCTCCAGGCCGGTCAGGGCGATCACCGAGAGCGGCACGATGCTGAGCAGGACGAAGGTGGTCAGCATCCCGGTGATCACCGCCTTCGGCAGGCGCTCCGGGGCGTGGCTGAAACCACGGGCCATCTCCGGGACGATGTACTGGGCGGAGAAGCAGAACACCGCGATGTTGAATACCGGCACCATGTAGATCCAGTCGCCGTCGAGCAGACGGGCGAACTCGGTATTGTCGTTGAGCAGGGTGGCCGCCACCAGGATCAGGATCATGCTGACCATGCCGATGCTGATGAACTTCTCGCCCTTGCCGATGGCGCTCAGCCCCAAGTACAGCACGCCGGCGGCCGGAATGAAGAACAGCACGCTGCCCAGGGCCGGGCTGATGCCGAAGAAGGCGCTGAGGATCTTGCCGCTGCCGCTCATGTAGGCGATCAGCGCGCCGATGCTGTTGACCGCCACGGACAGGAAGATCGCCCAGGCGCCGAAGGAGCCGACATAGCGCTGGGCAAGGCCGCTCAACTGGTTGTGGGTGCGCGTGCGCAGGGCGGTTTCCGAGACGTAGAGCATGGAAATGGTGGTGAACAGGCCGGCCACCGCCAGCCAGAGCAGCAATGGCATGAAGCCGGCCTTGCGGCTGGCGTAGGCCATGGACAGCACGCCTGCGCCGATGTTGGTTCCGACGATCATCGCCACCGCCTCGAGGAACGTCAGGCGCTTGACCTCCAGTCCCGAGGAATCGACGGCTTCCGCCCCGGCGTGATCCTGATACTGCTGGACCCGGGTATCACTCATGACAACAACTCCCTGCATTTCGAATTGGCATTGTGCGGGAGGCGGCCAGGACCGCCGTACCGATGGCCTTCTTCGAGGACAATGCAGGGCCGGGAATCGCGGACGCCTCCGGACCGGCCGCGCTTTTCGGCTGCGCGGTCCGGCGGGGGTATCCGCAGGCGGGCTCGCTCTCACCTGAAGCCATTGCCTGCGGGCCGCTTGCTCAAGCGGCCGTGCAAAATATCACACAATTCGACGAAGGAAGCGCGCGGCGACTACGACTTTACGCCTATGCCGATCGTCCGAAATATCCGGATAAATAATTGATGCAGATGGCTTTTGAAGGAGAGGAAGGGGCAAGGACGTGCTTTAACGTTTCAGCCGGAGCGTTCGAAAAGAGAAACATCCGTCAGTCTTCGCGAGGCTTCTTCGCCGCTTCGCGCCGCGCGCGGAAGAACGCCGAGAGCGCCTGGCTGCATTCTTCCGCCAGCACGCCACCCTCGACCATGACCCTATGGTTGAGATGCTCCTGCTCGAAGAATCGCCCACGGCTTTCCACTGCGCCCGACTTGGGCTCGACCGTGCCGTACACCAGGCGCTGGATGCGTGCGTGCACCAGCAGTCCGGAACACATGCTGCAGGGTTCCAGGGTCACATACAGGGTGCTGCCTGGCAGCCGGTAGTTGCCCGCCTCCGCCGCTGCCATGCGGATCGCCAGCATTTCGGCGTGGGCACTGGGATCGTGGCTACTGATCGGCCGGTTGAAGCCCCGCCCGATGATCTCGCCCTCGCGCACCAGGACCGCGCCCACCGGCACTTCGCCGAGGGCCGCGGCCTTTTCCGCCTCTGCCAGGGCCTCGCGCATGAAATGCGGGTCCCGGCTGCGATCGATGATCGGACGCACGCCTTTTACCGTCGGGTAGCCCTTGCCGTTGCTGATCAGCTTAGTCACCCGCACCAGGCCTCGCCGACCGCGATGGTCCCCATCAGGCCGGTCTCCATATGGTCGATGACGTGGCAGTGGAACATCCACAGGCCCGGATTGTCCGCCACCAGCGCCACGCGGGCAGTCTCGTTCTTGCCCAGCAGGTAGGTGTCGGTGAAGTACGGGATGATCTCCCGGCGATCCGAATCCAGCACCTTGAAGGCCATCCCATGCAGGTGGATCGGGTGCTGGTACTGCGCCATGTTGCGCAGCTCGAAGATGTAGCTCTGGCCTTCCTTGAGCTTGGCCAGCGGCGGCGCGTTGTGCTTGTGCTCCTCGCCGCCTTCCCAGGCCTTGCCGTTGATCTGCCAGAACGACGGATAGGGATTCTTGCCGCTGGTATCGGACATCGCCCCGACCCACTCGAAACGGAAACCGATCTTCTCGGCATTGGCCAGGTCCGGTTCGCTCACCGGATTCGCCGGCAACGGCTTCGGCCAGTCGCCGGCAGGTGCCTCGGCGCTGGCCACGCTGCGGATCGTCGCCAGGCGTACCGGCCCGTCGCGCAGGGACAGCTCGGTGCCCGCTTCCGGCACCTTCAGCGCCAGCTCCAGGCGCATCCCGGGACCGATCCAGTACTGCCCCTCGAAGCCGCGTGGTTCCACCGGATGGCCATCGATGGCATAGATCCTCGCCTCGCCGTTGGGCAGGTTGAGACGGTAGGTGACGGTGTTGTCGACGTTCAGCAGGCGCACCCGGACGATCTGCCCGGCCGGCAGGTCGATGGTCGGCACATGCTTGCCGTTGATCGTCGAATAGCGCCCGCGGGTGCCTTCGCGCGCGGCCTGGCGCGGCACGCTGAAGGGGGTGAAGGCGCCCTGCTCGTCCACGTGCCAGGTCTTCAGGCAGAGAACCTTCTCGTGGCGGAACCCGGTGGGTTCGCGCTCCTCGATGATCAGCGGGCCGACCAACCCGCGGCCGAGCTGCTCGCTGCTCATCAGGTGCGGGTGGTACCAGTAGCTGCCGGCGTCCTGGGTCTTGAACTGGTAGATGAAACTCTCGCCCGGCTGCACCGGCGGCTGGGAAATGTACGGCACGCCGTCCATCTCGATCGGCAGGCGGATGCCGTGCCAGTGGATGGTGGTCGGTTCGTCCAGCCTGTTGGTGAAGCGCACCCGCAGCCACTCGCCCTGCTTCGCCCGCAACTCCACGCCGGGGCACTGGCCGCCATAGGCCAGCGCCGGACTGCTGAAGCCCGGGACGATCTCCAGGTCCAGCGGCGCGGCGATCAGTTCGTAGTCGTACTCCTGCGCCACCTGCGGACGCGCCAGCCAGAAGCGCGCGCCGCCGGCGCCGAGGCCGACCACCGCCAGCCCGGCGAGGCCGCCGAGCACTTGTCTACGGGTAAATGTCATGGAGCCTTCACCTTCGCGGTACTGCGGCTGGGAAATGATAGATTTTCTCATTTCCACGCCCACAAGGCGAGTTCTCGCTCGGAACAAATACACTCCCGCTGACCTGCAGCAAGCAAAAGAGCTCCCCAGCGCCAGCGGCGGATCGCTGGTCGCGGACTACGCTCAGGCTACCTTCACGATGCGCTTGCCGAAGTTCTCGCCATTCAACAGGCCGACCAGAGCCGCCGGCAGGCTTTCCAGCCCGTAGAGCACATCCTCGTATACCTTGATCTGCCCGGAGTCGACCCATGCCTTGAGATCGGAGAGCGCCCGCTCTCTGCTCTCGAGAAAGTCGCCAAGAAGGAATCCGCGCAACGTCAGGCGCTTGATGATGAGTAGCCCGGGGATCCCGGCAGGACCGGCGGCGGGTCGGTCCAGGTCGTATTGCGAAATCGCGCCGCAACAGGCGACCCGTCCATGCTCCGCCATGTTCGGCAGGCAGGCATCGAGGATTGCACCGCCAGTGTTGTCGAAATACGCGTCGATCCCGTCCGGACAGGTCCGCCGCAAGTCATCGGCCAGCGTCCCCGTCTTGTAGTCGAGTGCCGCGTCGAAACCCAGTTCACGAACCAGCCAGGCGCATTTGCTGGCGCCTCCCGCAATGCCGATGGTACGGCAGCCCCTGATCCTGGCGATCTGCCCGACGATAGAGCCGACGGCACCGGCGGCGGCAGACACTACCAGCGTTTCGCCCTGCACCGGCCTGGCGCATTCGAGAAGGCCGAAATAGGCCGTCAGGCCTGCTACGCCGTAGACGCTGAGCAGATGCGTGAGGGGCTCCAGGCGCGGCAGCCTGATGAGCCCGGCCGCCGGCACGACCGCGAAAGTCTGCCAGCCGGTCTCGGCACAGACCAGGTCGCCGGGCTGGAACGACGCCGAGCACGATTCCACCACCTCGGCCAGTCCCAACCCCGCCATCACCTCCCCGGCCACGAGGCCGGGGCGATAGGTCGCGCCTTGCATCCAGGCCCGGCTGGCGGCGTCCAGCGGTATGTACAGGACACGAAGCAACAGCTCACCGTCGGCCGGAACCGGACAACGCACCTCGCGCCCCTGAAAATGCCGGACAGCGAGCGCAGTCCGGGGAAGCTCGCTCAGAACGATCTGACGGTTGATCATCGCTTCTCTCCCGCCGCGAGAGCGACACTGTCGCTGGCCTCACCGCCGAAAGCGGGAGGCATGTGAAACTGTCTGTCGTGCATGCTGTTCTCCGAGTCGATGGCAACCTGATGCAATCTCATTGTGAGAAAACATCTCCGGACTTAGAATGCAAAAAACTCCTATCTTCTATCGCGATCGTCCAGCATCAATGGATCCTCTCTCCGAAGTCCTTTCCCTGGTCAACAGCCAGGACTCCTCCTTCGGCGCCCTGAAGACCGGCGGCGATTGGGCACTCCGCTTTCCGGCACCGGAAGGTGTGAAGTTCAACGTGGTAGTCCGTGGCGCTTGCCTGCTGGCCACCGACGGCATGGAAGAGCCGATCCGACTGGAAGCGGGCGACTGCTTTCTGGTGTCCTGCCGCAGTCCGCTGCTGGTCGGCAGCGATCTATCCCTTCCCGCCGCCGACGCCACGCTGCTCTACCGAGATGCCCCTGACGGCGTCGCGCACTATGGGGAAAGCGAGGATTGTTTCCTGATCGGCGGTCGCTTCGCCTTTGGCGAAGAAGCCAACCTGCTGTTCGACGGGCTGCCTCCGGTGACCGTCGTGAAAAGCGATTCGGACCAGGCATCCGTACTGAGCTGGGCGCTGCACCGACTGGCCCATGAGTTTTCCTGCCCGTCACCCGGTAGCGCGCTGATCGCTCACCACCTGGGCCACATCATGCTGGCGCAGGTGCTGCGCCTCTATCTCGCCGGCAAGGGCAGCGACACGCCCAGTTGGCTGCTGGCGTTGTCCGATCCGCGCATAGGCGCGGCGATCCGGGCGATCCACGCCGAGCCGGCCAAAGTCTGGACGGTCGAGCGCCTGGCCGACGTAGCGGGTACCTCGCGCTCCACGCTGGCCCTGCGTTTCAAGCAAACCGCGGGACTGGCGCCGCTGGAATATGTATCGCACTGGCGTATGCAATTGGCGGCTCGCGCCCTGCGCGACAGCAAAGCGACGATTTCCTCGATCGCGCAAACCCTGGGCTACGGCTCGGACAGCGCCTTCAGCAACGCCTTCAAACGCATCATGAAATGCTCGCCCAGAGACTATCGCAGCCGGCAGGCCAGCAGGGCATAGCGCTCGGCGAACCGCCCATGCGGCAGGTTCCCCGAGCGGTCGAGCGCATGGTCAAACGACCGACGGAGCTTGCCACGGTATCTTTCCCGCCCGAAGGATGATCCCGATGGCCAGGACCACGCACGCGCCGAAGGCATAACGCAGGCCGAACGCTTCGGAGAGAAAGCCGATCAGCGGCGGGCCGATCAACGTGCCCAGGAATCCGATCGTCAGAACGGCTGCGATAGCCGAACTCGGCGCCATGCTCGAGGAGCGGGCGGCGGCCCCCGCCACCAGCGGAATGACGGTACAGATGCCGAGACCGACAAGGCAGAAGCCAATGATCGACGCCAGCAGGCTCGGATAGGCGATCGTCGTGACCATGCCGATCAAGGCGAGCCCCCCGCTCCACTGCAAGGTGCTGCGCGTACCGAAACGGTCGGCGACTCGGTTCAGCAGAAGCCGGCCGACAGTCATGGCGCCCATGAACACCATGAAGCCGACTCCGACCAGTTGCCGCTCGGCCGAGACCACGTCCTGGAAATAGACGACGCTCCAGTCATACATGATGCCTTCGCAGGCCATCGAGAAAAAAACGATCAGCCCGTAGTTGAAAAGCCTCCTGTCCGGCATCGTGAAGGCCCGCCCGACCCGCTCCGCGCCAGGCTGGTCGTGGAGAAAGCGCCAGCAGGCCAGGGCGGCCAGGAAGGCGATCACCGATATCACGCCGAAATGCGTGGACAAGGCAAAGTCCTGCCCGATCATCAACGCCCCGAACCCGGCGCCGACGAATCCCGCCAACCCCCATATACCGTGGAAAAACGGTAACTCGGTCTTTCCGATGAGCGCCTCCGACAGGCTTGCCTGGGTATTGTTCGACACATTGACCGCGCTATAGGAGAACCCCGATACGAACAACAGAGCAGCAAGCTGGTAGACGCTAGAAGCCAAGGGAACCAACGACAGCAGAACGGCGTTCGCGAGAATCGCCGTTACTCCAACGTTTCTGCTGCCGAGCTTCGCGATCATCCAGCCGGATACCGGCAACGAGAGCACCACCCCGGCCGACATGGCGAAAAGCGCACTGCCGAACACCCCGTCGGAAAGCTGGAGTCCGTCTTTTATCGTGGCCATCCGGGAGCTGAGGCTCCCGAAACACAATCCGAGCATGAAGAACATGCCGGAGATGGCAATACCTTTCCCTACGCTGCTGGAGGTCATTCGATCGATCCCGGTGCTTGTTTGATGGACAGAAAGCGCGGTATCCGCGACGTCTTCCGTGTGCTCGACTAGGACGCGAATGCGTCGATCCCGGTGAGCGTCGCCGAAAGCCTCCAGAGACGGCGGGCCTGCTCCGGATCGATCGCATGCTCGCGTACGCCGGCCAGGGTGCGCGAGTCGTCCGGCGCGATGCCTGCGATGTCGCAATCCTCGCAGTAGAGCCCGCCCATCCCCGCCAGCAGCGGAGAGGTCGCAGCCCACAGCTGGGTGGCAGCCCCCTGCTGCGGGGTCTTGAGCGTGTCGGGGCTGGCCGGATTGCCGGCTTCGTCCATCCAGCCCAGGGCGATCATCTCCGCCCGTGGAACATGGCGTTGCAACGGTGTGGCGATGCTGCCCGGATGCAATGCGAAGGCATGCACACCCCTCTCGCGACCCAGCGCATCCAACTGGATGGCGAACAGGGCGTTGGCAGTCTTCGACTGCGCATAAGCCAGCCACTTGTCGTAGCCCCGTTCGAACTGCACGTCGTCCCAGCGGATCGCCGAGTAGCGGTGCCCGGCGGAGGACACGGCGACCACTCGCGCGTCATCCGCGAGCGACGGCCACAAGAGGTTCACCAGGGCGTAGTGGCCAAGGTGATTGGTCGCGAACTGCGCTTCCCAGCCCGGGCCTACGCGGGTCTCGGGACAGGCCATCACACCGGCGTTGTTGATGACGATATCGGCACGACGGCGCGAGGCGGAGAAGGCCTCGGCGAACCGACGCACGCTGGCCAACTCGGCCAGGTCGAGCGCCTGCACCTCGACTTCGTCGAGCCCGAGCAGCGCCTCCCTTGCCACATCCGGTCTGCGCGCCGTAACAATGACCCGCGCGCCCGCGTCGGCCAGCGCCTTCGTGGTTTCCAGGCCAAGACCGGAGTAGCCGCCGGTCACGATGGCGGTCTTTCCGGAGAGGTCTCGCCCCGCCATCACCTCGCTGGCCTGCGTTTTCGCTCCGAATCCCGAATTGATGGGGGTTTGCCTGATAGCCATGAGAACTCTCCTCGCATGCGGAAGCGGCCAGGCGGATAGCGCTGGCTCTTGGGACGAGGGTCATTATCGAAGTGGAAAACTGGATTCCGAATGCAGCGGAATCCAGTTTCCTATCGAGATCGTCCGATCCCGTAAGCAGCTGTTTCATCACCGCTGCAAGAAAAGCACGGGACGGCGCAGAATTTGCTGTACGTTGGCGAACTCGCCAACAGCAGCTGAAGCAGGAAAAATTGATATCAACCCCAGGGCCTACTAACTAGGCAGCTGTACTAAAACTGGGGGCAGGTCACTCCACCGAACACTTCCGGGAAGGCATGGAACACAATGACGGAAAGCACCGCCACTGCGCGCAAGCCATCAATATCAGAGCGATACTTTGGTGGATAACCGTTCGACATGAGCCTGACTTGTACTTGCTTGTTGAAGCCGCTCCCTCGCTTGCTCGGGAATAGGTAAAACCACAGAAGCTTCCGCCGCCGAGCGAAGAAAATCGATGGCTAACATCACAAATTCGCCGGCCTGCCAAGACGCATACATAGAAGACGGCGACCTGTCGGCCACCCTGCCCCATGTGCGTCTCTACGACCAGTAGTCGCTCCGCCCAGTTCGTGGGTAACTTGCTGGCGACGCGGAAATGGCTCAGCCCTTGCTGAGCAGCCTGGCCACCAAACGGCGATACCCCGCCAGAAGCGAAAATCTGTCCTGGTAGAAATCGAGGATCATGGTTTGCCCATGAATGTCGATGGTATGCGGACCGCGTGCGTCCTGGAACGCCAGGTTTGCTACACGCTTCTCCGAATAATGCTCTGGATCGAGCACCTCGATTTCGTTTACGGCCAGACTGCTTCCATAACCATAGGAGTTGTTCTGACCGAACCGGTAAAGCTTTCCTCCCTCCTTGAAGATACGCCCGCCCATTCGTGCGGAGCCGGGATTCATCACCACAGGGTTACAGGGATGGGAAGTGTAGGGTCCTTCCAGGCTCTCGCCGACATAAAGATAAAGGTTGTCCGAACCTGATACCGCCTGGCCGCAGAACAGGTAGTACCTGCCCCCGTGTTCGAAAAGCGTGCCATCCAGAATCCGCTCACCCTCCATTCCCTGGAATAGCTTCTTGCTCTCCAGAGCGAAAGGAGGAGGCAGAAGACAGGGAGCGGAATGAGAGGCGACTTCCGGTATCAGATATTCGACACCAGACGCCTCGAACGAATAGGGATAGGAAAAATGATTCCCCTTCAAGATCACACGTGAAAAGTCAAGACTCTGGGCCTTCAATTCAATGATTTCACCCAGACCGTTGCTAGCATTCAAGGCTTCGAGACGTATCAGCTTACCGTCCGCACTAAAGAAAGGATCGGCGTAGAAAGTATACCCCTTCTCCACCTTCGGAATCTTTCCTGCTGAGACGGACAATTCTTGCAACGAAGGAATATCGTTGTAAGGAAGCGCAACGACATTCCATTTTTTTTCGAAAAACGCCCCGTACGATAGCCGGGCAAGCCCCCGGAAGATAGTCTTGCAGAAAAACTTGAAGACGGTGAAATTGCTGGGCAGACGGTAGTTTTTGCCCAGCTTTTCTAATACCACCTGCTCTCCCCGAGAGTAATTGACAAGCGCCTTGCGAAGCAGCACGACCGAATTCAGATAAAAGTTTCTTGAGGTTTTCTTGTAGGAGTGGTGATGAACCTTGGAGTACCCGCGCACCAAGACCTCGCCGGCGTCCAGTTTGTTGGAGAGCTTCTGGACTATGATCCCGACGCTGTCTGCATTTTCATAGATCTCATAAAAGCCCGCGGGCCGCCCCCTGTAGTACTCTGGATCGCCATGGTGGTAGGAAAGAATGTCCAGCCTCTGCAATCCTCCATCGATGCGGAGCAGAGACATGCCAAACTTGATGACCAGCTTTATTCCCTTATCCAGAATTCGGGCGCAGACATCTTCCGGGATGCGCTGCCAGGCCCCTTCGTAATCAGAATCGAAATGAATGACTTCTGAGCCACGAGAGTCTAACTGCACTCGACGAGTCATATCATTCTTCAGGGAAAGAATATTGAGAAAATAATACCCGCAATGCTTGATGACACTTTTCTTGGTCGCGCTATTACGACAACTGAGAACCAATTGAATATCAAGATACTCAGAAGAGTCTTCAATCGCCCGCTTTTGCCACTCACTCAATGAAAGATCATCAACAATCAACGCGGCCTTTATTTTCATCCCGAAACTCGCCTCTGAAACAATGAAACCCCAAAAATGCTGCGAAATTTCGACAGCCCCGAGAAACCGACACTTTTCACTGAAAAGCTGCCATGACAATTAGCAAAGCATCATTAAAGCGAAGCAGTATATCTCATGATCGACGGGCCGATTCAACCGTAAAACGGCCGTTAAACAGTTGCTGGACCACCAGCCAACGCTATACAGACATCATTTGGCGGAGCCACAAGGCTTTCGGCGGAGCCATAAAAGACAACGGCCTTACGGCCGTTGTCTTTCGGATGAATCACCAGGGAAGAAGTGGCTTCACTCCCACTCGATAGTCGCCGGCGGCTTGCTCGACACGTCGTAGGTGACCCGGGAGATACCGGCAATCTCGTTGATGATGCGGTTCGAGACCTTCTCCAGCAGTTCGTAGGGCAGGTGCGCCCAGCGCGCGGTCATGAAGTCGATGGTCTCCACCGCGCGCAGGGCGACGACCCAGGCGTAGCGACGGCCGTCGCCGACCACGCCGACCGATTTCACTGGCTGGAACACCACGAACGCCTGGCTGGTCTTGTGGTACCAGTCGAAGGCGCGCAGTTCCTCGATGAAGATGTGGTCGGCCTGGCGCAGCAGGTCGGCGTACTCCTTCTTCACCTCGCCGAGGATGCGCACGCCCAGGCCCGGGCCGGGGAACGGGTGGCGGTAGACCATGTCGTAGGGCAGGCCCAGCTCCAGGCCGATCTTGCGCACTTCGTCCTTGAACAGTTCGCGCAGCGGCTCGACCAGTTCGAACTGCATGTCCTCCGGCAGGCCGCCGACGTTGTGGTGCGACTTGATCACGTGGGCCTTGCCGGTCTTGGCGCCGGCCGACTCGATCACGTCGGGGTAGATGGTGCCCTGGGCGAGGAACTTCACGTCCTGCAGCTTGGTGGCTTCTTCGTCGAAGACTTCGATGAAGGTGCGGCCGATGATCTTGCGCTTCTCTTCCGGGTCGGCGACGCCGGCCAGGCGGCCGAGGAACTTGTCCTCGGCGTTGGCGCGGATCACCTTCACGCCCATGTTCTCGGCGAACATGGCCATCACCTGGTCGCCTTCGTGCAGGCGCAGCAGGCCGTTGTCGACGAACACGCAGGTCAGTTGGTCGCCGATGGCCTTGTGCAGCAGCGCGGCGACCACCGAGGAGTCCACGCCGCCGGAGAGGCCTAGCAGGACCTTGGAGGAACCGACCTGGGCGCGCACGGTGGCGATGGCGTCGTCGACGATGTTCGAGGGGGTCCACAGCGCGGCGCAACCGCAGATGTCGAGGACGAAGCGCGAGAGAATGCGCAGGCCCTGCTTGGTGTGGGTGACTTCCGGGTGGAATTGCACGCCGTAGTAGGCGCGGGCATCGTCGGCCATGGCGGCGATCGGGCAGCTCGGGGTGCTGGCCAGGATGTGGAAGCCGGCCGGCATCTCGGTGACCTTGTCGCCGTGGCTCATCCACACGTCGAGGCCGAGCACGCCGTCGTCGTCCACGTGGTCCTCGATGCCGTCCAGCAGGCGCGCCTTGCCGACCACGTCGACGCGGGCGTAGCCGAACTCGCGCAGGTCGGAGCCCTGCACCTTGCCGCCCATCTGCTCGGCCATGGTCTGCATGCCGTAGCAGATGCCGAACAGCGGCACCTTCAGGTCGAACACCGCCTGCGGCGCGCGCGGGCTGTCGGCTTCGTGTACCGACTCGGGGCCACCGGCGAGGATGATGCCGCGCGGCGCGAAGGCGATGATCGCCTCGTTGCTCATGTCGAAGGGATGGATTTCGCAATACACGCCGATCTCGCGCACGCGGCGGGCGATCAGTTGGGTGTACTGGGAGCCGAAATCGAGGATCAGGATACGGTGGGCGTGAATGTCTTGGGACATGGCCATCTCTCGCTACGGAATTCGAAAACGACACGGGGCTGAATCGAACAGCCCCGTGCAGCACTCATTTCAATCAGCCTCAACCAACCCGGTAGTTGGGGGCTTCCTTGGTGATCTGGACGTCGTGGACATGGGACTCGGCCATGCCCGCGCCGGTGATCCGGACGAACTGCGGCTGCGTGCGCATCTGCTGGATGTCGGCGCTGCCGGTGTAGCCCATGGCGGCGCGCAGGCCGCCCATCAGCTGGTGGACGATGGCGGACAGGGCGCCCTTGTACGGCACGCGACCCTCGATGCCTTCCGGCACCAGCTTCTCGGCGCCGGCGGAGGCGTCCTGGAAGTAGCGGTCGGACGAGCCCTGGGAGCCGGACATGGCGCCCAGCGAACCCATGCCGCGGTAGGACTTGTAGGAACGGCCCTGGAACAGCTCGATCTCGCCCGGCGCTTCCTCGGTACCGGCGAACATCGAGCCCATCATCACACAGTAGGCGCCGGCGACCATGGCCTTGGCCAGGTCGCCGGAGAAGCGGATGCCGCCGTCGGCGATCAGCGGTACGCCGGTGCCTTCGAGGGCGGCGGCAACGTTGGCGATGGCGGAGATCTGCGGCACGCCGACACCGGCGACGATGCGGGTGGTGCAGATCGAGCCCGGGCCGATGCCGACCTTGACCGCGTCGGCGCCAGCCTCGGCGAGGGCCTTGGCGGCCTCGGCGGTGGCGATGTTGCCGCCGATCACCTGGACGTCCGGGAAGGTCTGCTTGACCCAGCGCACACGCTCGATCACGCCCTTGGAGTGGCCGTGGGCGGTGTCCACCACCACCACGTCGACCCCGGCGGCGACCAGCGCGGCAACGCGCTCGCCGGTATCGGCGCCGGTGCCGACGGCGGCGCCGACGCGCAGGCGGCCCTGCTCGTCCTTGGACGCCAGCGGGTAGGTCTTGGCCTTCTCGATGTCACGGAAGGTCACCAGGCCACGCAGGTAGAAGTTCTCGTCGACCACCAGCATCTTCTCGATGCGGTTCTCGTAGAGCTTGGCCTTCATCTCTTCCAGCGGGGTGCCTTCGCGGGCGGTGACCAGCTTGTCCTTCGGGGTCATGATCGCGGCGACGGTATCGCCGGCGTTCGGCTTCACCCGCAGGTCGCGACCGGTGACGATACCGACCAGCTCGCCCTGCTCCACCACCGGGAAACCGGAGAAGCCGTACTCGCGGGCCATCTGCAGCAGTTCGATGATCTTGGTCGAGGGGGTCACGGTGACCGGGTCGCGGACGATGGCCGTTTCGTGCTTCTTGACCTTGCGGACTTCCGCGGCCTGCTGCTCGATGCCCATGTTCTTGTGGATGATGCCGATGCCGCCTTCCTGGGCCATGGCGATGGCCAGGCGGGCTTCGGTCACGGTATCCATCGCGGCGGATACCAGCGGGATGTTCAGTTCGATGCCGCGGGTCAGGCGAGTTTTCAAACTCACGTCCTTGGGCAGGACTTCGGAATAACCGGGGATCAGAAGGACGTCGTCGAAAGTCAGGGCTTCTTGACTGATTCGCAGCATGGCGGGCGCTCCCAGACGGGAAAAATGGAAGCGCGACATTATACCCAGCCACGGGCTTCCGCTCAACGCAGTTGATCGCCGAAAACGGCGAATGGGCAAATATCTTGCAAGGCGCCCGCCAGGCCCCGTTCCGCCTGTCCTATCCCGCATTTGCAGGATGGCAGCGCGGCGGCGGATGGCCGGATACTGCGTACGTCACCCAGCGACCGCCCAGGCGGCGGGACGCCGATTCCAAGAACTATAAAAGTGGGACGGGGGAGAGAATGAAAGGGAGCACTGACCCGCATTCGTCATCCGCAGCCTTGCTCGCGGCGTTCGGCGCCGCCGTGCTGGACATCAACCGGCTGGCGCGCGACAAGACCCTGGAGCGTTTCCATCGCACCGCGCTGGAACGTCTGCAGCAACTGGTGCCGTTCCAGCGTGCCTGGTGGGGCCGCGCGGCGCTGATCGACGGCGTGCCGGTGGAGCACAGCGCACACCTGTTCAACCTCGAAGAGCACTATGTCGAGGACTGGAAATCGATCAGCCACGACGACATCACCGTCGGACTGGTACA +>6_1#NODE_3_length_40000_cov_63.0619_ID_5 +CATATCCCCAAACTAATTCTAAAAGCTTATCTCTTGAAAAAACTTGTTTTGGTTTTTGAGCCATCGTGTAAAGTAAGTCAAATTCCTTTGGAGTTAAGCCTTCAACAATATTATCATCAAAGCGCACTTCTCTTCGATCTTTTGAAATCTTTAGATGCTTAGTAATTACATCATACTTTACGCCATTAGAAGCATGCATATCTTTTTCTATACGACTTCTTCGATATAAAGCTTTAATGCGGGCAATTAGTGCTAATGGACTGAATGGTTTAGTAACATATTCATCTGCACCAATGCCCAAACCCAAAATTTGATCAGTTTCTGAGTCACGTGCAGTTAACATAATGATTGGTATTGAAAGAGAAATAGCCCTTATTTCTTTAGCTACTTCCATTCCATCCTTTCTAGGTAAGTTAAGATCTAACGTTACAATGTCATAAGAATCTGGATGCGCTTTAAACATCTCAACAGCTTCTATACCATCCTTAGCTATATCTACTTCCCATTGTTCTTTACTGAAGAACATCTTCATCATTTCTGCAACAGAATTGTCATCTTCAACCATTAATATCTTTAGCATTATTTATCCTTAAATCCTTTTACTCTAGTATGTTTAATTTTATCTGGTTCAATATAATCATGTGGTAATTCATGCCGGGCACGTAAGAAAGCCTGCAATTTTTTCTCAGTTAAGATTATCGGAGCCAAAATAAACAAATAAAAAGTAAGCAACCAGATCAAGAAATAGCGATCCCAGTGTAAGAAATGAATTCCTAATCCCAATAGAAATTGAACTAAGCCAAAAATCATGAAATAATTTCGTGCAACTTTTTGCGCATACTTGTAACTTGTTTCATTAACTGAAGCCAAATACGACATATATCCATAAACTTGATTCGGATTAGGTGAAGGAGCGATCAGCCAAATGACTCCGATTACAAACATTATTACCCCGCAACCAATATATATCATTTGACCAACTCCATTCACTTAAATTTATTCTTTACCAGGAGCTGAAATTACTAAACGTAAGTCACCGCTAAAACTAAAGTTCTTCATCTCATTAGGAATAATAAAATTAGTACCCAATTTGATATTATATTCTTTACCGTCAGCAATAAACTTACCTTTACCTTTAATTACAGATACTAAAAGATAAGGATGATCATTTAGACTCCAATCAAAACTCCCATCAACATCGATCTGCCATAAATAAAAATGCGGTGACATTGGTGGTTGAGCTAAAGTAGTAATCTTTGCATCTCCCAATGTTTCAGATGTAATTTTCAGCTTCGGATCTTTATGTGGTACTTGAATAGTATCAAGAGATTGTTTAATATGCAACTCTCTCTTTTTACCAGTTTTCTTATCTACTCGATCCCAATCATACAATCTATAGGTTACATCACTTGATTGTTGTGTTTCAATAACCATAATTCCTTTAGTTAATGCGTGAACCGTTCCAGAAGGTACATATAAAAAGTCTCCTTCTTTCACTGGTACATAACGTAAAAGCTTCGACCACTTTCCATTTTTGATCCAATCTTCTAGTTCTTCTTTACTAGTTGCATTGTGCCCATAGACAAGTTTAGCACCAGGTTCGGCATGAAGCACATACCAACTTTCAGTTTTTCCAGAATCATTTTCATGAATTCTAGCGTATTCGTCATCTGGGTGTACTTGAACAGACAAATTATCATTTGCATCTAAGAATTTAACTAATAGTGGAAATTCCTTAGCCTTGGGATTACCAAATAATTCTGGATGCTCTAAATATACTTCTCTTAACGTCTTTCCTTTTAATGGACCGGCATTAACTGTCGAAGCGTCATCTTTGTATCCAGATATTACCCAAGCTTCTCCAACTTTTCCATCTGGAATATCATATCCTAAAACTGTATTTAACTTACGTCCACCCCAAATTTTGGGTCTGAAATATGGTGTTAAAAATAATGGTTCCATAAAAAAATCACCTCAATAAAAGTATATATCTTTTTTAGATGATTTTGAAATCGATTTCAGCTATTTTTTTGAATATTTTTTTATTCCACTAATTAAACGTTTAATTCCATCCTCTACCATAGAAATTGGTGAAGCTAGATTAAGCCGTAAAAATTTATTACCATTACCGCGATAAATACTACCAGGTGAAACAATTAGACCAGTTTCTTTTCTAATATAATCAGCTAATTCTTGACTATTTGACGAAACTTTACTTACATCAAGCCACAACAAGTAAGTTGCTGGACCTGATACAATTTTTACATCTTCCAGATTATTAGTTATTTCTTTTTCAGCATAATTAAAATTACGATTAATTATCACTAACAATTCATGTAACCAGCTAAAGCCTTCAGTATATGCAGCAATTGTTGCTGGAATAGCTAAAAGATTTGGCTCAGCCAATTCATCGCTGTTAAGACCTCGATTGACGATATAACGTAAATTTTCATCTGGAATAATTACGGTTGCAGCATGAAGAGCAGCTACGTTAAAAGTCTTACTTGGAGAAACTAAACTAATTATATTATCCCTCAAAGCTTCAGAAACTGAAAATGCAGGGGTGTATTTAACGTTATCTCTGACCAAGTCTCCATGAATTTCATCTGATAGAAGAATTACGTGATATTTTTGACATAAAGATGCAATTCTCTCTACTTCCGTAGGTGTCCAAACTATCTTTACCAAAATAATCACCATTAATTTCAGCATTTTTAATTTTGACGTCCTCACAAGTCCACATTGTTTCTTCTGCATTAGAAAAGAAAACATGATCTAATGAAATATGCTTACACCGTCTAAATAACTTAGGAGCTTGTAAATCTGAATTTTTAATTGAAATATTATTAGTATACCAAATACCGCTTCGCGACATTGTTTCAAAGGTACTGTCAGCAACTTTAATATTATTCGAATACCATAAAGGATATTTATATTTAAAAATAGTAGATTTTATTTCTAAACTTTGCGTTTCTTTTAATGGTGATTCTCCTTCACCAAAAGTAATATTCTCTAAAATTGTATCCGAAAGGCCATATAGCGGTCTTTCTCCTTCAAAATACTTTTCTTTAATTATTTGCATAGATTTCCTCTTTTATTAACTCTTTCAATATTTAGTCTAAATTATGAAAAATAAATGTCTAATATTTGCTAACTATAACGAAATATATTTTTATGCTATAGATTAGTTTTGCACCAAGATTACTTTTCAAATTTATTATAGAAAAAATAAAATTTTGAGAAAATTTACAGTTTTTCATACTAGAATAAATTAAATTTATAGCAAAAAAAGGATGAGTTTCAAAGCTCATCCTTTTTAATCAATTTAGTTATTACGTTTACGCTTAGAAACACCAGCTAAGCCGATAACGCCAAGCATTGAACTTACAGCACCTACTATAGCTGCAACAATACCATTTTTAGCTCCAGCTTTTGGAAGTTCTGATTGAGTTGTCTTCTTAACTGAAGTTGTTGGAACTTCTGAAGTTGGTTTAGTTGGATTATGCGGAGTATCAGGAGTATTTGGTGTTGTTGGTGTATTAGGTTTCTCTGGAGTAGTTGGAACATTTGGAATTGGAGTTACAGGTTGATGTGGCTTCAAGTTATATCCACCGATTAAGTCCCAATCAACTGGTTTGCCCCATTCACCAGGAAGAGCAGCACTATTTGGTGCATCTTTTTTAAGAACACCCATCATATCATCGTTTTCGCTAGTGTCATCCCAAATCCAGTCACCTTGGTTAGTCATTTTAGCTAAAACAGTAGTAGTATTATCTGGATTAATTTGTAACAAGAAACTTGGTGCCCAAGTTGCGTGCATACCCTTTCCAGCAACTTCACCACGGTTAGTGATATAAGAAGTAATCAATAATTGATCATCTCTTCCTTCTACTGGAACTGCATAGTATGAGTAAGTTGCAGTACGCCAGTTTGCTGGAACAGAAGCAGTTAAAACTACTCCAGATTCATTTAATGGAACGTATCCATGAGTTAGGTTATCAGAAACGTAACCAATCATTGCAACGTTATCACCAACTGCTTTATTAGCAGCCATCCATGCATCGTCGTTACTTCCACGGTTTAATCTAGTAGCAGCAAATAAGTAATACTTGTTGCCCAAACGAACTACATCAGGACGTTCAATTTCATCACTTACCATTGGAGAACTAATAAGTGGTGTGTAGACCTTCTCAACACCAGGGTTCTTAGTATCATTGTTTAACTTAATAATACCGATTGCAGCATTAGACCATTTTGCTCTATCTTTAATATCAGAGTTAGACAAGATTTGGAAGAAATCACCTAAATTATCTTTGTTAGTGCCACCGTAATTTAACCATTGATAAATTTGGTCAGCACCTTGATAATTCTCTGTACCAGTACTTGCTTCAAAAACGATGTGGTCATTATCGACGTGTGCAATAGAAATCTTGTCTTGATCCTTTTCAAGATTTAAGTAGACAGTTGCACTTGCAATCTTTTGGTGGTTAGTGTTGTTATCACTTGTATCAACCTTAGTATAGTAAAGTTGGATTGAACCATCTTTATTTAAAGTTGCTGAGCCAGACCATTGTTGAATAACTGGAGTACCTAAGCCAAAAATAGGACCAGCATTCTTCCAATTATTAAAGTTATTGTCACCGTACTTGTTGTAAAGAAGATAAATATGATTGTCATTAGTATTTGGAACTCCCATCATTCCAACTACTAATTGATAACCATTCCAGTTAGAAACATAACCAGTTTTGCGTCTTGAACTGGCCATGAATCCCAAATTTCTAAGTCTTCTACTTTTCCTGTTTGGGCATCCATAGTTTTTGCTGCAGGCATGTTCTTAATTTTACTTGCATTAAAGAAAGGAATAGCGTAACGAGCATCTTGTTCAATTAAAGTTTTAGCAATTTTCTTAAAGTCACTGTAAGTTAATTGAGTACCGCTCTTTGCTTCTTTAGAAAAGTTAATCTTATTTAATTCATTAATTTGATCATTGGTTAATTTACTGGTATCTACATCAGATTCTTTAACAATATCTTTAATTTGACCCTTTAATTCAGCTTTGTTATCGTTTGATGCTGTTTTTGTTGAAGTTTCGTTAGCTTTTTCTTCAGTATTTGTCTTAAATGAAGTAGCTGCTTTTTCATCTTGTTTTACTGCATCGTTCTTTTTAACATCAGCAGAAGAATTAACAGGTGTCTTCTCAGTACCATCTACTGTTTCTTTAGTAGTTGTTGTAGAGGTGGTAGCAGCTTTAACATTATTTACTGGTGAAGTATTAACAGCATCTGCTTTAACTTCTTTAGTTGCTGCTGAAGTCACAGTAGTAGCTTTTTCTGCTTTAGTATCAACACTCTTTTCTGAGCTTGAATTTACTTGATTATTATTTTTATTATTTGCTGAAGTAGTAGTACTTACTGCTACAGTTTTGTTTTCATTAACGTTGTCTGCATTAGTAGTAGCAGCATTTACAGTGGAAGCACTTAATACAATTGCTGCTGTAGACAAGGTTCCCATTAATAAAGATTTTCCGCTTAAAGACATCTTCTTATGATTTTTATTTTCCAACATCTTTTCTCCTCCTACATGCTAAGCGTTTAACATTTCATCTTACAGACTATCACCTTTTGTTATAAAAATAAATATTATTTTAAAAAAATTGTATAGATTTGGCAAAATTTACTGTTTTTGTATTCAATTGCCACTTGTCAATCGTTTATCACATTAATACTAAAAAAGCCACACCAGATAATTACTCTAGTGTGGCTTTTCCAACTATTTATGGAGATGAGGGGGTTAGAAATTTACCCTTGCAAGTCCTTTAATATCAATGCTTTTAGCTATTTTAACTTTTACGTGAAGCCCAATATGAAGCCTTAAGATTGCATGTAATTAGCAAAGTTGGTGGCTGTTTCTTTGGCCTTATCCTTAGTTACATGAGTATATATATTCATAGTTGTAGTAATATCTGAATGCCCTAGCCTGTTCTTAACATCTTTCATAGATACACCAGCATCAAATAATAAGGAAGCGTGTGTATGCCTGAACCCGTGAACCTTAATCCGTCTTAATTCATACTTATCACAAATTGCAATATTCCAACGACGTGGCTTAGATAATGCTGTATGTTCTCCCTTAGTATTAGCAAAAACTAAATTACTATCATCTAAAAAGTTTAGTCCAAGTTTAAACATTTCTTTCTGTTGTTCTGTTCTCCACTCTTTCAAGTAATTCATTGTTTCGCTATCCATATCTAGCACCCTTACACTATTAGCCGTTTTTCCTGGTGTTATGATTAGTCGATTATTTTCACCCGATGATAAAGACTTATCTATATTAATGGTATTATTTTTAAAATCAATATCAGCCCATGTAAGAGCTAGACACTCACCGCGTCGCATCCCTGAATAGGCAAGCAAGCGAAAGAACATAAAATATTTAAAATTATATTCTTTGGCATCTTTCAAAAAGGCTTTTAATTCTTCCTTACTATAGTAATCAGTAAACTTTTTATTATCCCTTTCAATTTTTGGTATTCTTACCTTTTGCATTGGATTTCTTTTAACTAGCTCCATATCCATTCCATACTTAAGTAATTTTGAAGTATAGAATTTGAACCGTTTAAAAGTCTTAGGTGCTTGCTTAGACCATTCATTTACTACCTTTTGACATTCAGCAACGGTTAATTTATCCACGTAGATATTACCTAAATCTTTTAAAATATGGTTCTTGAATATCTTTTCAGTGGTCACGAAAGTACTTTCTTCTACTTCCATGCGATACTGTTTAACCCACATATCATGCAAATCTTTAACCCGTAGCTTTTTTTGTTCAACTGGTTTATACTCACCTTTAAGAACTTTCAATTTATAATTCATAAAACACTCTAAGGCTTTATCTCGACTGGTAAAGCCTTGTTTTTTTATCTGGATTGTATGCCCTGTTTTTTCATCCTTACCAGCATAAATTAGAAACTTATATCTTTTCTTACCACTTGGTGTGGTATATTCCTTAATTTCATCATTCATGTTAAATTTACCTTTCACTAATAAAAGGCGTACTGATTGGCTGATATTAAACATTTAACAAAACTGTGGAAGTTATAGGAATAATTCAAATGATTAGCTATTAGTAAAGATGATTTAAATGGTCTTAGAATTGCAATCTCAGCATTAACAAGGGAACTCATAAGTTACCATGTACATTTACACTTTAAGGTGTATTTGTAACTACACGTTGATAGCCTGTACTTTTAGGTACTCCCTACCAGCATATGTAAAAGCATGGAGGGATGTCCGTAACTGTAGTTACACTCATAGATTATACATATGCAGTTACAACTGCGGTTGTTAATTCCTAAAGGGATGCTAGCAAGCCAAATTATTATTTAACCACCACCCTGATTCAGGTTACTGGTTTGAGGGATGAACTAAGCGCTCAGCCCCTGCACTACCGAACTAGTACCGCAGATTAGGGATGCTAGTTAAACACGTCCACAGGGAGTGTTATATATATTTTTGATTTATTGCAATTTGTCCTATTTCATTTTTAGCATTAGTTAACGCATCAATTATTTTATTATATGCTTGCTCATCCATATCCTTACGAACTTTAAATGACTGCACTTTTAAACCGTTTTTCTCTTCTGGTGGCTCTAAAAAAGCTCTATTAACATTACCTATTAAACGCGTTAGCTGAAACACTATTTCTTTTTGTACTCCCTGCGTAGTCGAATAAGAAGCACCATCTAAACTTTTAACTGCTTGGTCTATTTGATGCTGAACATCGCTAGAAGCGTCTAATCTATCGGTATCTATTAGCCTTTTGATTTCATCTTTTATTTGTTTAACTGAGTAGCCCGTGTTTTTAGCCCACTCATCCCAACCGTTAATATCATTAGATAAACCCATCAAATAGGAAGTCGGAACTTGAAAGAACATTGCTAGCTTATCCCAAGTTTCTATTTTTGGCTCTCGCTTTCCACGTTCATATTGGCTAATAGTTACATATGACATATTTACTTGTTCACTCATATCTCTTAATGACAAGCCCTTTGCTTGTCTTAATTTTTTAATTCTATTCATGCTTTCACTTCCATTCTTTCACTAATCTTAACATAATGTATTCGTTTTGAAAAAATATTTGCGTTTTCATTTTGAATACTTTATCATGTATTCATAAAGAATACAAAAAAGAAGGCGAAAATATGCAACTAACAGAAAACCAAGTAACAGCGATTAAGCGAAAACGTGGCGAACTTGACATGTCTATTACAGTTTTAGCCAATACTACCAAGGTTAGTAAACGTACTTTAATAGATATTTTCAAACACAACCACAGAAATGTGACTAAAAACACGTTTAAGAAGCTAAATGATTGGCTGATTGATGAATATGAAAGGAAGCAATAGCATGAAGCGAATTTATAGAGCATTCAAAGACTGGCTATATTTACCAGATAGCAATGAAGCCGAATTATACAACTTGCTACTTTTCGGCGTTCTATTCTTAAGTGCAATTGTAGCAATCGTAAGTTTTACCAGAACACTTGCAATGTTTTTGGCATTGATTAGATAAGAATGGAGTTTTAGAAATGATGAAGAAGATGAATATTGTTTATCTACCACTGGTAGAACAAGCTTATCAGATGATGAAAAAGCTTAATAAGAACGTATCTAAGCAAGAGATTTACAAGAAACTAATTGAAACTAATATGATTGACCAGCAAGGTAATCCTACCAAGTGGGCGCTTGATAATGGGCTAGTTAGTGAATTTAACACAATTGAGGAAGCAAGAAATAAGCTTAATCAAATAAGCCCACAAAAAATAGAAGACCAAGTGGACGCAGATATAAACAACGTATTTAGTCGCGTGCCAGTTAGTGCCTTTAGATGGAAGAATGAACACGATGGATATGCAATTGATAGTGCTGAACTTAAAAAGGCTATTCTATCAGCATTAAAAGATGGTAGCTTATCCCCTATCGGTCGTAAGCACTGGCTAGAAGTATTAGCAGATATAAATTCACAGGAGAATTAAGAACAATGAAATATATTGAAGTTAAAACCTTACCTATTAATGAAGATGAAGTAATCACATTAGCAATGAATGCAAACAATCTTACCGCTGGACTTGACTACCTTGAAAGCATGGCTTTAAACCTAGAGGGCAAAAACCAGCAAGAAATAGAACAACTATCAGGATTAATAGCAGGTATGAAAGAATTATCAGCAAAGAACGCTAGAATTTTAGCAAATATCAATCTTTAGGAGGTTACACAATGGATATTAAGCTAGACGCATCCCAAGAACAGGATTTAAGCAATCACATTATTAATTTAGTGAAAGACAGCGTAAAACAAGCAGTAAACAATGAAAGCAAGCCCTATTTAAATCGTAAAGAGATAGCTAAGTATTTTGGAGTTGCTGACAGTACAATAAGTTACTGGGCTACTTTAGGGATGCCTGTTGCAGTTATTGATGGACGCAAGTTATACGGTAAGCAAAGTATAACTAACTGGCTTAAGAGCAAAGAAAAAGCCGTGAAAAGCTGAGACAATTCACGACATAAAAAATAAAAACACAATTAAATATTAACACATATAAGTGTACTGGTTGGCTGATATTAAATATTTAATATTAAAAAAAGAAAAATGGAGGTTAAAAATGAACAGCAATTCAGACTGGGCTCTTAACTATGCTAAACAGGGCTTTTCTGTCGTCCCTACTAACCCAAGTAATGTAAAAATACCCGCGATAGAACACGCAGGAAAACCACCGCTAACTCAGGAAGAAATAAAACAGCTATGGACTGAAGAACCTAATTATGGCATTGCTCTAAAAATGACTAATATTTTTAGCATTGACGTAGATACTCCACAGCATGCAGGAACAACTAAAATAGACGGCTTTAAGTCGTTAAAAGAGTGCATCCCGTCAGAATGGTTACCAGATACGTTAAACGCATTTACCCCAAGCGGTGGGATGCACTTTTATTATATGAAAGTAAACGGCTTGCCTAATAAATCAGGTGCAGCAATCATCCCTGGCGTAGACGTTCAAGCATCCCCTAATAGTATCTCTGTAGTACCACCAACTAAGCGCCAAGATGGAATTTATGAATGGAATCTAGTACCTGGGTCTAAAAATCCCGTTGCTATTCCACCTAGAGAATTAATTGATTTTATTCAAGAGAAAACAAACCAAAACGCATCAAAGCCTATTCTTTTTAAAACATTCAAAGCTAAAAATTACGCAGGCAAGTTATTAGACGCTTTATGCTTTCAACAAGTCAAGGGACAACGTAATAGCTATTTAACTTCATTAATTGGCAAAATGCTATTCTGTGGTGCTGAAGAAGAAAACTGCTACACCCTTGCAATGTTTGCCAATAGTCAATTTCAAGAACCATTACCAGAAAAAGAAGTTACTTCCATTTTTAACAGCATATTAAGGAAGGAACTAGCTAATGAAAAATAAAAAAGTAAAAATGAATAAAGAAACAAAAGAGCTAATTAATTCTCACAAGAAACAAAAAGAAAAAGCGCCTAAATTGCCAGCTTGGTGTTATGTAGATGACAATGGCAATATAAAGGTCAATATTGGATCACTCGGCGAATTAATACAGAAAGAACGCAATTACTTATTCATTATTAATAAGGATAAGGAAACACTTTACGAATATAATTATAAGCTTGGTTATTGGTTACCTATCTCTAAAGGATCAATTTCAAAAGCTATTCATGATAAATTAACGTCGGTTGGTAAATGGACGTCACAAAATCAAAGAAAAACCTATGAATTTATAAATAGTGGTATTCCAAGAAAACAGTTCCAAGATACAATCGGACACTCGCCAGAAATGGCATTTAATTTTCTTAACGGCGTTTACAACTGGTCAACGGGCAAATTAGAACCCCATAATAAAAAATATTACTTTGAGGGATGCACCAGCTACCCACTAGACATGGCAAACAATCAAACTTCTGAAACTAATAAATATTTTAAGTTGCTTTTAGGCGAAAATGCTAAAACCATGATGGAATTTATTGGTTACTCATTTTACCCAAGTTATGAACCTATTCAATGCATCGTGATATTGAAGAATGAAGGTGGAGACGGTAAAACTTGGTTTACAAACCATGTTATAAATAAAATGCTTGGAATAAATAATGTTTCTAATATTAGTTTAAATCAATTAGCGGACGTAAAAAATAACAAATTCAAACCAGCGGAACTATTTCACAAATATGTAAATGTTTCCAGCGAATTGAGTGAAAGCGAAAGCTCATTATTACCTACTGACTCTTTAAAAAAGTTATCTGGTAATGACTATATAAATGTAGATAATAAAGGACAACGTGACACACGCTTTCAAAATTACGCAAAGTTATTAATTATAACTAACACCCTTGTTCATTTTAGAGATGATAGTGATGGCTTTAGTCGTCGTGTTTATATTATGCCAGCTCATAAAATACCAGACTTTGAAAACACTATAGATGTTAGGAAAATGGAGGCAGAACGTGGAGCATTTGCGTATAAGTGCATCGAATTAGCTAAAGACGCAATGAAGCGCAAGCCTGTTAAAGGGAAAAGATACCTTACTAAAACCAACAGTATTAGCCGACTAGTAAACAACTGGATGCTAGACAATGACCCAGTGCAACAGTTTATTAATGATTGCGTTACCAAAGCACAAGGTAAAAGAGAAAAAGCGCTAGATGTAATTCAAGCTTGGAAAAATTGGTGTAGTGATAACGATTATAAGCCGTTAGGCAAAACCAACTTTAAAAATAAAATGATTAAAAAGGGGTTTAATTATCATGAAGAGAGAAAACGCGGTTCAGATGGCAAGCAACTACCGAGAAAGTATTATTTTACTAATATGACGCTTAACGAAAATGCTAACCCAGAGGGAAAAATAAACCCAGTTACTAATAACTATGACTTTAATAAATAGAAAAAAACTGAAAAATTGGATTTATGCCACTTTTTAAAATCGGGTTGAGCCCTAGGGGCTGTAGTAATATGCCACTTATCCACTTTTTTTATTAATAAAAATTAAAAATATAAATATATATAATACACATAATATATATTTATTACGTAAAAATATATTACACAGAAACCCAATGGAAAGAAAAAAAGTGGCTTTTCAGCGCATCCCTTGTGGCTCTAAGCATGAATCCACTTTTAAAAACTGGATCAAACTGGAACATAACCACTTTTTGCAATTAATTTAAGGAATTTACAGAATGAAACAAGATAGATTAAAATACGAGCTTAAAAAATACCTAAATAGTAAACTGACCTACCCACTACACACTGTGGAAGATATTAGCATAGAAAGAATATTAAAGGTTTATGGCAGCGAAAAGCCTAGCGGTTATGTAATTCAACTAATAATAACTGGGCAAGTAGGTACAATACGAAAGGATAAATAGAATGGTAGACAAAAATCTTGAACATACCGTGTTCTTTCACAGCATGGACAGCTTGAACGAATGGCAAAGAAAATACTACAAAAAGATAATTATTACTGAACGCATGGAGACAGCTAATAACGATTACTTAATTACTTTCTTTTTAAATCGGAAACATTTAAGCGTAAAGCCTAGCTATACCGACCAGCTTATGAATATTGCACTGGATGCGCAAACTATAGACTACTATCACAACGGAGTTAACATAGTAGATTTACTTTAACGCTGGTATTATAATATAGACATACATCCATATAAGAAGAGATCAGTAGCTTAATTGTCTGGCTGCTGGTCTTTTTTTGTTCGCTCTCATCCCTACCAGATGGCTTATATTAGCTTGCTGGCTAGGCTAATTGCGGTTCTCTTATGATTTATCATTTACACCTGTGGAGGTCTCACGCTGGCTTATATGCTTGTCTGCTTGGTCTTCTGGCTGTCCTGTCTTTTCGTTTTTGCTTTCTCTTTCACTGGTTTCATGCCTTGAGTTTCAGCAAGCTCAGCTAAAGAATTTAAATTAGCAACTAATTAATTAAGAAAGCTAGCCGTCCATACAATGGCTCTCTCAGCATATCGAGGGATGCGTACGAGCGTAGTTACAACTACAGACATGCTAGCACTAATAGATTATTCATACCTGTCTGCTTTATGGTCACTCAATGTAACCGCCGAAATAACTCGTTATCAATCTAATTGATTTAGAAAATTAAACCGCAATCAAAAATAAGCAATGAATGAATTTAATTTCACTCACTGTTTTTTTATTTTGCACCCCGCCCTACCTTTCAACGTTGCAGAGCCACACATTGCTGTCGCCTCGTGTGAAACATCAGTTGTTTCACAATTAATTGAGGGGGGTTATTTATAAACCTTATTACTACAGTATTTATATATGTATTCCTGCTATTTCTAGACGGAAAAAAAAGGCTATTCAAACCTTTCTGGAATGAATAACCTTGACTGAATTGATTCTTACTAGTTTTTATTATGCCGCTTCTTTATATTCGTGCAAGTCTAGAACATCTTTTACACCCTGTTTTAACTGTTCCATCAGATAAACATCGGCAAACTGCAAGAGCGCCCTTTTACGCAATACATTATATTGAGTGTGACCATATCCAAGGCAATTAGCAATGTCCCAGCTAGCTAAATGCTTAATATATCTAAGCTCTAGAATGCTCCTGTATGGCTTAGTTGGTTCATTGGTGCAATTGCCTATGGCTGGCTTTAAACACGCTAGACGGTCGTTATATGCAATTATTAAGTTATATCTGCTCGCCACTGCATCGGGATTAATTGAACTATTTACCTTGATATTATCAAAATTCAAGCCGTGGACGTTGTACCGACCAGCGCACATTGTCTCATACCATTCTAAATCCTTAGCAAAAAATTTCTCCACGTTTTTAACTGTTTCTGGTTCATTGATTTCTAGCATACCTAGCAACTCCTATATTTATATATTATTGGAATATCTAACATAATTATATTTTACTATGTTTTACCAGCTGGCTTATTAACTGCAAAATAAAAGAGCTACCTTAAAAAGGCAACTCAAGCCGACTAATTAAATATCTAAAATCAACCAATCAATACGCTATTACACTTTTAAGAATATGAAGCCTAATATGAAGCCCGAAAATACTATTTTTTATTAATTTGATTTAATCATAGAAAACAAAAAGCCTTTAATATCAACGTTTCTAATCCATTGACATTAAAGACTTTTAAATTTATGGAGATGAGGGGAATTGAACCCCTGTCCAAACGTATTCCGTCGTTAACCTCTACGATCATAGTTATATTACTTAAACTTCGCTTTGATAAAACGCCATATAACAGGGCTAAACTATCAAAACTAACCTAAGAATCTTATTTCTAACTATTAAGGTAAGTAGTTAAACGTAGCTCGTTAAATGGTAAGACCCGCAGACAGACACGAGCAATCCGTCATTGGATCACGCAGCCTGACTAAGCAGCTACTGCGTAAGAATTTTCGTTATTTGCAGTTATAATTTAACTGTGACGTTTTAACGTAGACGTGACCTACGAATCGCAGTCAACGCGAATCTACGCCTGTCGAATCCCAAAACATCCCCAGATGTTTCAATGATATCACAGTCACTGTTAAAATGCTATAACTTTGCTTTCACAGCTATTTATCTAATTTAGCTAAACGAACAACGTCACGTGCAATCATTAACTCTTCATTTGTTGGAACGATCATCGCAGTAATCTTTGATTTCGGTGTAGTGATAATTCCTTCCTTGTTAGCTTTATTAGCTTTTTCGTCATATTCAAGACCAAGCCAAGTAAGACCATCCATAATCTGTTTTCTTACACTTGCATCATGTTCACCGATTCCTGCAGTAAAGACTAAAACATCTAAGCCACCCATTTCAGTCATATAAGCCCCAATATAGCGAACAATTCGGTTAATAAAAATGTCTTTTGTAAGTTTAGCCTGTTTATCACCATTTTTAATTGCTTTTTCAATGTCTCTCATATCTGGGGAGATTCCTGAAAGACCGAGTAAACCAGATTTAGTGTTTAACATCTTAATAATTTCATTAAAGCTGGTAATGTTGCCTTTTTTCATAATAAATTGAAGCAAGGAAGGATCTACATCCCCACTTCTAGTACTCATTGTAATTCCTGCAACCGGACTGAATCCCATTGAAGTATCAAAAGACTTTCCATCTTTAATAGCAGTAACTGATGCACCACTACCTAGATGACAAAGAACCATCTTCAAATCTTCAACTGGCTTCTTTAATAAGTCAGCTGTCCGACGTGATACATAGCGAGCAGAAGTACCATGTGCACCATATTTTCTAGCACGGAACTTTTCATAATACTTATATGGTACTGAATATAAGTATTGAACAGGATCTAATGATTGGTGAAAAGAAGTATCAAAAACAGCAACCTCAGGAACGTTAGGTAAAACTTTCATAAAAGCATAGATACCGTCGGCTTCAGCTGGGTTATGCAATGGTGCATAGTCACTCATATTATAAATCTTCCAAAGATTATCATCTGTAATGACTGTACTATCAGTAAATTCTTCACCACCAGCAACAACACGATGTCCTACCCCAGCAATATCTGCTAAAGAATCGATCACATTGTATTCTTTAAGCCAGCTAAGCAACTTTGGGACTGCTTCTTCTTGATTAGCAATATCACTTTGTTCATCATGTTGACTGCCATCTGCTAAAGTCATTGTGAAAACAGATCCTGGCAAACCAACACGGTCAGCCATACCAGATGCAATTACTTTTTCATTATCTAGAGAAAATAATTTGTATTTAAATGATGAACTACCTGAGTTTACTGCTAAAACTTTTTTCATTTTATATACCTCTCGTATTATATAAATTAATCTTCACTCTTATGATACCAGTCATTTAGTTTCATATTAAATGCTACTAAAGATTCTGGTTTCTTTAATGAATCTAATTTAGCCACCAGAACCTCACGCTTAACAGCATGTTCTCCATGATTTTGAAAAACTAAAATAGATTTTTGTTGAATCTGACTTGAAAACATATCATCGGGCAAATCGACAATTGCCTGAATATTAACTTTTTTAGCTAACCAAGTCATGAATTCAGTAGATCCTTTACCAGTAAACAGTAATCTAGGTACTACTAAAAATGCAAAACCATCACGTTTAAGGTTATTTACTATTTGTTCAATAAATAAAGTATGAGCAAAAGAATGTCCCTCTTTTGCGTGATTTTCGAAGCGCTCAGCATTATTATCCAATGGATAATAACCTACTGGCACATCACTTACCACTATATCCGCCTTTTCAATCATCCATGGATCTAAAGCATCTTGACAGTATAAATCAATTTTTAAATCTTCAAGATGAGCACCAATATCTGCTAAATCCAATAATGCTTCTTCATTATCAATTCCAATTAATTTGTAATTGTTTTGTGAATGATTTTCCTGAATCAACTGTCTAATAACCGAATAAAGTAAATTTCCAGTTCCAATTGCGGGGTCAACCACAGTTTTTTTACCTTTAGAAACGATTCTTTGCCAAATTAAAGCAATTATTGTAGCAATCACTGAAGGTGTTGGCATCAAGTTATAGTCACTGGCATCTTGAGTAACTGCTTTTAAAGTTAATAGAGTAAATATTTGTACTTTTAGAGCACGTGGCAAATTATCATAATCTAATTGACGATACTCTTCAGTTAATTCAGCTACAGTTTCTTTATCTGGGGCACCCGATTCGACTTTGATCTTACCATTTTCTAGATTATCAAAGGTTTCCGTTAATGCCGATGAAAAAGAGACATTCAGAGCTTTTTGTAAATGCTCAATAGCCTTCTGAAACTTTGGATATAATTCTTCTACTTTTTGCATTTTTCTTCCTTCTTTAATATTCCTTCTAATATTTTATAGGTATCATCAAATACTAGCAAGCAAACAATGTTACTTGGAAAAATAATTTATTAAAGAAAGATTACTTTCCATATTTTCAGTAAAACTATGTTGATACATCATTACGAAAGCAATACAAATATTAATAAGCAGTAAACTACTAATTAAGGCACTTCCCTTAAGTCTTTTTACTATTATCTTTTTTCTCATCCTTTATTTTATCTTCCTTCTTCTCTGGTAAATCTGTTTTGAAAAACATCTCACTTTGCCTACCATCTTTTTCAGTTACTAAAATAGTAAAAGAATCTTTTGCAGTAGAACATTTTATTCTTTTAACTTGAAAAAGCAGTGGCATGTGCCCTCCCTCATCTGTTCTCATTCTCAAAACATCATTTGAACTCATATCAATTACATATGTATCGTAAATTAAATCACCATTCTTTTTCTTCTTATCCGATAACTTTCTAAGAACAATTTTAGTTGGATCTGAACCAGTTGTACTTACTTCAACATGTCCACTTTCTTTTAAGAAATTATTTATTTGTACATAACTATACGTAATCTCATTAACTCCGGTTTTTTGGCGATTAATTTTTCTAAGTGTACCTACAAGTCCAAATAATATTTGTGCACATAAAATAGTAACTGCGATTGCAAACACAGCTTCTAATAAGGTAAAGCCCTTTATCTTTTTCATTATTCTTACCTACGATAGAGATGATCATGCACTAATAACGTTTTTTTATCACTATGTCTAAAAATATATTCCGCATATGCCTTATCTACTCGTTTTTCAATTTGTTTCTCAGTCTGCCTATTTTGTTTAATTGTCATCCCTAAAAGCAATACTCCTAGACAAGCTATCGTAAAGCCTACACATGCTTCCCACAAAATGAAGCCCTTTATTTTTCTATTCTTCAACCATCCTACCCCATGTCATCTGTATTTTTATCTTTTTCTCTTTACTTCCACTCTTAACAGTAATTGTTCTTGGCGATATCGTTCCGCGATTGCTAATGTATAAATTATTAAAATTATAAACTCTAACATGTCGATTCAAATATACTTTTGATACTTGATGTGGCTCCATTATCTGAATTGAAGAATTATTGTCAAAATATGATAAAAAATATGACTTCTTTTCTACTGTTGAACGGTGTAAATACTTATTTATAGTTGATCTAATTTGACGCGTAGTACTATCCAAAATTATTTGTTCCTTAATATTTTTCACGTAAAGCGTTGGCATTAACACAAGTAAACAAACTATTGCTAAAGTAACTATCGTTTCAACTAAAGTAAAAGCCTTTATTCTCTTAAAAATCATTTTTGTGGTAATTTTTCATATCTCTCTTTTTGTTTTTGAGAAATATATTCTTTCTTTACCAGCTCTTCTAATGAAGTACCATCTCCATTATCTGCAGCTAATTGTCTTTGAGTCTCAACCGTTGTTCTTAAGGCTTCATCTGTTTTACTAGTTGCTCTCTCTTTTTGCTTACTTAGTCCTGGAACAATTAGCAAAATCAACATCGCAATAATGGCAATTACAATTACCATTTCAACTAAGGTAAATCCTTTTACCCGTTTATTTTTCATAATAAATTGTTTTAACTTTTTCATTTTTCCCTCTACATTGTTTCCATTAAATGATACATTGGCATTAAAATTTTCAAATACATCCCCAGAATACAAATTCCAATAAAAATAAAGCATAGTGGCTGCAAATTAACGATTAAACCATTTAATTTGAGATTTAGTTCATAAAAAAGTGTCTTGCTAAGTAATAGCGCTCTTTTCCCAATTTGCTCTCTTGTTGTTCCGGTTTCTAAAAGCATAACTAAGTTATTAGAAATAAAGGCTTCATTCTCAATAATTGTTTTAATCTCAGTTCCTTTTTCTAACTGATTCTTCACTTTTGTTCCTAAGACTTGCTGTAATGAGTTCTTTGGTTGCTGATCAGTAAGTTGGCAAATTTTTTGTAATGAAAAGCCGTTGATTAATAAAACAGCTAAGTCAGACACGATAAGATAGTGAACATATAAATTTAATGTTGGTCCAATTACAGGTATCCTAGTTAATTTCTTCAGTGCATAATAATCTTGCCTATTAAGTAGCCTGATTACCTTACTAATAAAAAATGCACCTATAATAACTAACATCACAAGTCCACCCAGCATTACATTACTCGTCCAGTCATTATCTGACATTTCTGTCTTTAAAAAAGTTTGCATACAAATTAATAGCGTAACCATCATTCCAATTAAAAGTGCTGGATAAGCTAATTCTCCTCGCAGCTTCTTTAACTGCTTATTTTTCAAGCGAATAAGTTTACTTAATTGGTCCAAACAAGACAATAAATTGCCATCAATAATTGCCATATTTATTTGTGCAGCTAAAGTACTAGAAAATCCTACTTCATGCAATACATCCCCAAGTTGCCTACCTTCTTCAACCTGTTTACTTATATAAACTAATTGATTAGAATCACCACTCCATATTTTAGGCAGCAATTTTAAACTAGCATTAAGCGAATAACCGTTAATCAGTGCTTGTCTTAAATAATCAATAAAAATTAATTGAGCAGCGCTATTTAGCTTATCCTTCTTCATAAAGATGCTTCGTATCTTCACTAATCTTTCCTCTTTCAACCAACTGCTCTAAAGCACTTTGCCAGTTGCGCAAATCATGACGCTTATCATTAGACATAATAGCTTGATCAAGTATTTTTCCATATCCAATATCCATTAAACAGGCAACTTTATTTCGATCTTTTATCGGTAACAATCTTTGATAAGAAGCAGCCGTCAAGCAATTGCACAATTCGTCTTTAGTGATTCCTAATCCTTCAAGCCGTGAAATTGTTTGTAAAGCTGTTTTTGCATGAATAGTTGCTAAAACCAAATGTCCACTTAGAGCTGCGTTAATACTGATTTTGGCAGTTTCTTGGTCTCGAATTTCTCCAATAATTAAAATATCCGGTCTATGACGTAAAGCAGCCTTTAGTAAATCTGGGTAAGTAATTCCCGCAACAAGATTCACTTGTGTTTGTAAAAAACTTGGATTCCAAACCTCGACTGGATCTTCAATAGTCATTACAACCTTTTTTTCTCCAACTAATTGCGCTAACTCATACATGGTTGAGGTCTTTCCTGATCCTGTCGGACCGCTAGTCACTATCAGCCCACGTTGATTGGTGAGTCTTTTTAGCAAATCAAATTGTTCGGGGAGAAAATAATTATTACTTTTCCCTTCATAAATTAATCGAACTACTAAAGACTCATCTCCTTCAAATTCCCCTACACTTGAGAAGCGTAAAAAGACCTTTTCACCCCGAAATTCAGTCTGATACGCCCCCACTTGTGGTCTACGACGCTCAGATATATCCATTTGCGCCTGAAACTTGAAATAATTAAGTAATTCCTTTCCTACTTTCTGGCTTAATTCTTTTATTTTACTCAAACCTAGGGCAGTTCGAATCTTTATTTCATATCCTCTAATAGCGGGAAAAATAAAGATATCACTAGCATGGCTTTCAATTGCTTTTTCTAAAATAGCTTCTGATACTTCGTTCATAATTAATCCCTCCTTTGCCATACACTTATTAGTACGCGAAAATAGGCCATATTTTTTATCATTTTAAAAATTAAGAAAAAAAAGACTGACACAAAGGTCAGCCATAAAAAATAGACCTAACAAAATTAGGTCTACTTCTTGATAAATTATTATTCATCATCATCTGCAGCAGCAGTATAAACATTAGAAACATCATCGTTATCTTCTAATGCATCAATTAAATGAGTAAATTGCTCTTTTTTATCTGCTGGAACTGGAGTGGTGTTTTGCGGAATCATAGTTAATTCAGCGTTAGCTAACTTGTAGCCAGCTTTTTCTAAAGCATCACGTACAGCAGTAAATTGCTTAGGATCAGTGTAGATTTCGAATGCATCATCACTAGTTTCTAAATCATCCCCACCAGCATCCATAACATCAAGTAAAACTTGGTCTTCATCAGCATCAGTAGTGGAACGATCAATTACGATGTAACCCTTACGGTCAAACATGTAAGCAACGGAACCAGTTGCACCGAGTGAACCACCATTACGAGTAAAGGCAACACGGACATCAGAAGCAGTACGGTTCTTGTTATCAGTTAAAGCTTCAACTAAAACTGCAACACCACCTGGTGCATAGCCTTCATAGGTAATTTCATCATAATGTTCATCTGAATTACCTTCAGCCTTCTTAATAGCACGTTCAATGTTAGTCTTAGGCATGTTTGCTGCACGTGCTTTATCCATAACCATACGTAAAGTAGGATTTCCTGAAGGGTCAGGACCACCACTCTTTGCAGCCATATAAATTTCACGAGATAATTTTTGGAAAACTTTACCTCTCTTAGCGTCTTGCGCATTCTTGCGGCCTTGAATATTGTGCCATTTTGAATGTCCTGACATAAGAATCCTTCTTTCTTTTTATTTAATATAGTTAACCTTGTCAATCATAACGCACTCAAGGTTTAAATTCAATCTATGATTAATTAGGAATTGCTTTTTTCTTGATGGTGTCTAATCCAGTAAACAAAAATACAGAGTAAAATAGCTAAAAGCGCTCCAGCTGAATCAAGAGCAACATCGTGAAGACTTGGGGTTCTATCTCCTGTCAAATATTGATGAAATTCATCTAATCCAGCTAATCCAATAATTCCTAACCAGATAAACAAAGGTCCACACCATTTTTTAAATAAACGATCTAATCCTAAACAAGCAAATAAACCAACTAAAAAATACGATGAAAAATGGGCAAATTTTCTCATGACAAATTGTGTCATTCCAGCTTCCCCATTGTCTAAAAATGCATTATGCCAGCGACCAGCATAATAAATATTCCATTTTCCAACTATTCTTTCAATAATCGGAAAATGTCGATGAATAAATCCTGGTGACATCTCTTGTTCGTGATAAGTCATCGAACTGGAAATAAATAAACCTAAAAGCACTAAGAGCATCAAACAGACAAAGATCATTTCTCGCTTAGTTAATACTGTTTTTTTCATTTTTCAACTTTCCTACTTTCAAGTTCTTCAGCAACCAAAACAATTATTTTTTTAGCAATTTCAACTTTAGTTGTTTCTGTAATATTTTCAGTTGTTTTATCTTGCCTTAAAACCATGACTTTATCTTCATCACTACCAAATACTCCATGACTGACATCATTTGCAACAATCATGTCAGCTTTTTTCTCTTGCAATTTTTTCGAAGCATTTTCTAATAAGTTATTTGTTTCAGCAGCAAAACCAACAACAACTTGATTCGCTTTTTTTATACTTCCCATTTTTTTCAAAATATCAGGAGTTTCTTTCAAATAAATTTTTAACTCATCCCCCTGATCCTGCTTTTTGATTTTATGAGCAATATAATTTACCGGTTCATAATCTGCTACAGCTGCTGCCATAATTAAAGCATCTGCACCTAAAAAAGCTGTCTTAACTGCTGAGAGCATCTGTTCTGTAGTTTCTACCTTAATGTTTTTTAAACTAGGCGAATTAGGTAATGAGACAGCAATATGACCCGAAATTAAAATCACCTCGGCTCCAGCAACAAGAGCAGCTTTAGCTAAAGCTATTCCCATCTTACCACTTGAACGGTTACCTAAAAAACGAACCGGATCAAGCGGAGATATTGTTCCACCTGCAGTAATTACAATTTTTTTACCAGCTAATATTTGTTTAGCTTGAAAAGAATCATCTATCCAAGCCATAATGTTATCTGGCTCTGGCATCCTTCCCTTACCACTATATCCTTCAGCTAAGCGCCCAGTAGCAGAATCCATTACCGCAATTCCATCTTGTTTTAATAAGGCTAGATTTCTTTGAAAGGCTGGATTACTCCACATATGACTATTCATAGCTGGAACTACATATTTAGGTGAAGCTGTTGCTAGGAAAGTTGTACTTGCCGCATCATCAGCAATACCATTTGCAATTTTAGCAATAAAATTAGCCGTTGCTGGAACTACAACAGCAATTTCTGTCCAATCGGCTAATTCAATGTGCTGAATCTGATCAGCTCTTTCTTTTTCCCAGAGATCAGTCAACACTGGATACTTAGTTAAAGCAGCTAATGTTTGCGTACCGATTAAATGGACAGCTTCTTTTGTCATAACTACTCTTACTTCATGTCCCTCTTTTTGAAAATTTCGTACAACGTTAATTGCTTTATAGGCGGCAATACTACCTGTAATATAAACTGTAGCTTTCATTTTTCTCAACCTTAATTTCATAATTTCTTTCTACTATAACACAAGCTAACTAAGACAAATTATTAAAAAATAAATTATCAAATTAAAAATCATTAACGGATTCTGATATGATTTATATTGAATAAAGATATCGATCACATACCTTGAAAGGATAAATATACAAAATGAAAAAATATTATCAAAAGATTTTTTCTCTCTTTATTGTAATGGGGGCTTTATTTCTAGTTCTTACTGGCTGCTCCAAAAAAGAAAATTCATCGTCTAACAAAATCTCGATTGTTACCAGCACCAATGTTTATGCTAATATTGCACAAAATGTTTTAGGTAAGTACGGCAAAGCAACAGCCATTATTACTAGTAGTTCCACCGATCCCCACGATTTTGAACCAACTACTGCAGATGCAAAAAAAGTTCAAGATGCCAAAATTGTTGTAGCTAATGGTTTAGGATATGATTCCTGGCTAGCAAAACTGGCTAAATCAAGTAATAAGTCTGCTGTTTTAGTTGGTGAAGACTTAATGAATTTGAAAAGCGGTGATAATCCTCATATCTGGTTTGACTTAAATATGCCAACAAAATATGTCAACTATTTGGTAAAGCGCTTATCTAAAATTGATAAAAAACACGCAAACTATTATAAAGAGAATGGAATTAAATATTTAGCAAAAATCAAAAAAGTCAAAAAAATAGCTGATTCAATTGATGGTACAAAACAAAAACCAGTATATGTTAGTGAACCAGTATTCGACTATGCTTTGAAAGCCACCCACTTTAAGATTGGCGATAAAGATTTTGAAGAGGCAATCGAAAATGAAACTGATCCGAGTGCCAAAATTGTTCATCAAATGAATCAAACTATTAATAATCGTGGTATCTCTTTCTTTGTTAAAAACTCCCAAGTAAGTAGTTCTACCGTTAACAATTTCGTTAAAAGAGCCAAATCAAAGAAGATTCCAATCTTGCAAGTTCGTGAAACTATTCCAAACAATACAACTTATTTGAAATGGATGACAGAAAATTATCAAAATTTAGCAAATATCGCTAAAAAGTTAAAATAATTACCAAAAAAGAGGCATGAGCCTCTTTTTTATTACCCTAAAATATCCTTTAATTTGTTATTAAATTGCTTAGGAATTTCTGCCATAATTACGTGACCAGTTGGTGACATAATAAAACTATCTACTTGATCATTTTTTTCTTCAAGCAACTTTTCATATCCTTCACGATAATACGGACTCTCCTTAGCAATAAAAAAAGTAATCGGAATTGTTGCGTTTTCTATAACTTTTCGCCAATCTAAGGACATATGATTTTCTAACAAGTCCACATTATCTTTTCTATTAAAGGGGTTTTCTTCTTTTGCTTTCATAAGTTTGGCATAAACACCATCATCTAGTCCTGCGAAAGTTTCATGAACATGGGGTCTTTCCTGACATTTTTTATGATAATTTTCACTTGTATAGTCCATAAAGCCATACTTCCAATTCTTATCATTAAGCATCTTAGGTGATTGATCTACCACTAATGCACGCTTAACAACAGTTGGCCATCTTTTTATTAGACAGAAAATAATCGAAGCGCCCATTGAATGTCCCATGAAAATAGCCTGTTTAATTTTCAGAAAAGAAATTAATTCTTCTAAATCTTGTGTTAAACGAACAATATTATGTCCTCTTTCTGTGCGTTGGCTCTTCCCCATATTTCGGTGGTCATAAGTAAGTACTTGATACCCTAAATTATTAAGAAATGGTACTTGAGCAGACCATATTTCTTGATATGCACCAAAACCATTTACTAGGACAATCGTCTTCCCTTTCCCAGTTAATTCATAATTAATTTTTACATTATCACTTGTTTCAAACAGCATTTTTACTCCTTATTCTGTTTAATTCTAATGAATAAACATCCAACTGCAGCAATAATCGCAATAATCAAAGCTTGACTTAATCCACTACTTAAGCTTACGTTGATCCCGTCATAAACGCTTTGCTGGAGCGAGCTTGTGGCAATTCCAGCCGCGTATGATAAAACACCATATGAACCAATTGCAGCAATCATAAGCGGTAAAATTGCAATCTTTTTACCAAATAAAACCATTACCATTGCCAAAATGTAGAGACCAATTAGTAGGAAATAAATCAAATGATAAATCTCAGCTGCTTGTTTTACATCCGCAGAATTCTGTCTTAATTGTTCATTTACTAATTGCAATATTAACTGATTAAAAACTTGTTCTTGTTGCGTATCATTATTTAGTTTTAGATCAGATGCAGCTAATTTACCATTGGCTAAATAATTAGTACTCAATTGATTTAGCCCTTGATATGATGTTTTTAATTCAATATTCTTGGGTAATTGTGATAAAAGCGTATCTTCGATTCCCGATTCTTGCAAAAAACTGACTCCCATTTGCAAATCACTATTATTCGCTTGGTCAACTACCTTATTAATTGAGCTTTTCACAAGACCAGCAGTACTATCAACTTTCAATTCAACTGGACTTGTTAAAGTCAAACTTAGTGAGATAAAGATTAATCCAATTACAACTAATCTCCAAATCCATTTTATTAATTTCATTTTTTCTCCTAACGAAAATTTTAATAGTCTTTTATAATCGCTAAAATACTACATAAAGTTTTTTTTAGTGCTATATAATAAAAGTGTAACAAATATTAAAGGAGTTCAGCATGATTGATGAAAAACTAAAAGAATTTTTCAATAAATTTCCTCATATTAAGGAAAATAAGCCTTTACAAAATACTCTGAGCAAGAAAATCAACAACCTAATAAAAAAAGAAATCCAACATGGTGCAAGTCAAGTTGAAGCGGAACAGAAAGCACTTTTAAATTTAACAGATCTAGACACTCTATTATCACAACTAAAAAATTTAGAAGAAACTGACTACAGTAAATATAATGATTTCTTTGCTAAAATCTTTGATTCAAAATTAGTTAATGAATTAAAACTTAAATTAAATCAAATTGATGAAATTCACCTTAATTACCGCTTGGGTGATATTTTAGTATTACCCACTAATTCATCAAACTTAATTGTGCATGATTTTATGTCTAGAGATATTGAAAATCTTCATTCAACGGTCGAAAAAATTGGTAACGTCTTAAAAATTACTCAAGGTCCACGTAAGCTGGTAGGAATTTTCAAAAATAAAACTTTATTATTTTTACCTAAAAATTTTACTGGCTTCCTAACAATCAGAAGTCAAAGTGGTGATATCTATATTAATAAAGTTCCAAATTATTGTATGCTGGATGTTACTGCAGTTTCTGGCAATTTATTATTGGCTCATTCTAAACTTAAACGTGTTCAAGCAGACTTAAAGTCAGGCGACATTGCTGTTTCTAATACATCTGCAAATGTATTTCACATTAATGCTCATTCTGGAATTCTAACTAGCGATCATGTTTGTGCAAAAGAAGAGATTTCTTATCATACAAGTAGTGGAAATATAGACTTAGAAAGTATTTCTACTAAAAACTTTTTTATCGATACTAAGAGTGGAAAAATTACTATTAATGACCTTAATAGCGAAAGAATGGATTTAGTTACTGATACAGGAAATATGAGCTTAAATAAGGTTAACGGTAGCGGTGATATTAAGTCTGGAATTGGGAAAATATCCTTATCAATTGCTGATAAATCTAATTTTAATTTTTCAATCACAAGTAAAGTTGGAAGTATCCGCGTAAATGTACCAAAGAAAGAATTATTTAAATTCAAAATCAATACTAAAAAAATCGGCCCTACTGATCTTCCACTTGACTCCATTATTTATGGTAATAATGAATATGATGAAATTGAAGGATATGTTGGTAATGAAAATTCAAGTAACTCACTAACGATCACAAGTGAAATTGGAAAAGTATCAATTAAGAACGAAAATTAAACAACTTCAAGCGAAAATGCTAGCAAACCTAAAATTTGCTAGCATTTTTTCTAAATTATTAAGCCTGCTTTATATCCTGCAAAAACTAAAATTAAGCCGCCACCATAGGATAATAAAGCATAAAGAAGGAATGTTTTGTAATTATGATCCTGCCAATGAGAAAGCAACTCAACATTTAATGTGGAAAAGGTCGTATAACCTCCCAGTACCCCGGTTCCAACTAAAGCATAAACAAATGGCGAAAAATTGCGCGAAAAAACTAGCCCTAACAAAAATGCCCCAGTAAGATTTATTAATAAAGTAGCATAAGGAAAGTTGCTAGACCAATGTTTTTTACCATAATTAGTAATTCCGTACCTAAGAATTGCTCCCCAAATTGCTCCAAAACCAGCTGTTACAAGTGTCGTTATCATGCTCTTATTGCCAACTTTCTACCCAATATTTTTCCTGTAATCATTCCTAAATAAGCAAATAAAAATCCAATAAAGATTGAACTAAAGAAATATATCAGCGCCTGATTATTCATTCCGCTTTCTAATTGCTTGAATGTATCCAAATGGAAACTAGAAAAGGTTGTAAATGCTCCTACAAAACCTGTACTTAATCCTGTAACTAGCCAATCTCGTCCTTCCCTAAATTCAATAAAGAAATATGTTAGAAAGGCTAATAAAAAGCAACCAATAATATTAGCTACAAAAGTTCCAGCCTGTGACCAAATCACATTTAAATAAGCACGTAATGCTCCTCCACAAAATGCAAATAAACCAACACTGATATAATTTTTCAACTTTTTATTCATTAAAAATCAACCTTCTTATTGCATACGCAATTCCATCTTCATTATTTGTTCGAGTAACATAATTAGCTCGCTTCTTTATCTCTAATACCGCGTTTCCCATAGCTACTTTATAAAAATCTGGAACGTCAAACATGGAAATATCATTCTTTTGATCACCAAAAATCATTATTTCATCTTGTCTAAGATTAAGTTTCTGCGCCAATTCTTTTAAAGCGTTTCCTTTTGAAGCTTGCTTAGAATTAATTTCGATTAATGTTTTATCAGAGGTAGAAACGTTATATTTTTCAATAACTTCTTTGGGCAGGTGCCGATAAACTCTTTCCATTTCTGATGGAGCACCTGAAAACATGGCTTTTGTAAATTTTAAATCTTTAGCTATATCTTTAAATCCACAAACCTCAATCGACATTCTAGTCAGCCAGCTTTCACGCGATAAATAATAATTAATAAAGTGATCTAATGTAATAAATTTATCACTCACTTCAATATGAAAGTTAGCCTTCAATTGTTGAGCTATTTTTTCAAAAAAAAGGAAATCTTGATAACTTAATAATTCTTCAACTACTGCATTTCCTTGTGCATTCAAAACTAGAGCACCATTAAAAGCCACAACATATTGATTAGGATCTTGCAGATCTAACTGATCTAAATACTCTTTAATTCCTGGAAATGGTCGCCCCGAACAAGGAACAATTTTAATTCCTAATCGAGATGCTTTTTGTAAAGCTTCCCTTGTTTTAACACTTATTTCTTGTTTATCATTCAATAAAGTTCCATCAAGGTCAACGGCAATTAATTTTGTCTTCATCAATTTTCTCCTATGTAATCGTTCACTTTCCTTTCTATTATACAAAAAAAGACGACACTACTCCCTTATTACACATAAGAAAGTAGGCGTCGTCAGATCTTAATCGGTTAAATTGGTGAACGCCATCACCTATTATAATTTAATAAAAACAGTTTACTTTGAGCACATCTCTTCTTCAACGACCTTAGCAACTTCTTCACAATACCCATCAGCTAATTCTTGTGTTGGTGCTTCAGTCATAACACGGAGTAAGCTTTGAGTTCCAGATGGACGAACAAAAATCCGTCCTTCATCGCTCAACTCTTCCTCTACCTTTTTAATAGCTTCGGTAATTCGCTTATGTTCTTTCCAGTCTTTCTTATCTTTGACAGGAACATTGATTAAGCGTTGTGGATATTCTTTAAAGTCGCTTAATAATTCACTTAATGACTTACCAGTATCCTTCATTACGTATAATAAATGCAATCCGGTAAGCATTCCATCACCAGTATTATGATAGTCACTGATAATAACGTGACCAGATTGTTCACCGCCTAGATTATAACCATTAGCTCTCATTTCTTCTGAAACATAACGGTCACCAACTTGAGTTCTGACATTCTTAAGGCCGCGTCTTTCTAAAGCCTTAGTAAAGCCTAAGTTACTCATTACAGTTGTAACAATCGTATCTTTCTTTAAACGGCCATGATCGGCAAGATAGGAACCAATTACATACATAATATGGTCGCCATCAACTTCATTACCGTTTTCATCAACAGCGATACAACGATCAGCATCCCCATCGAATGCTAAGCCCAGTTGAGCACCTTGCTTTACAACTTCTTCTTGTAACTTTTTAGTATGAGTTGCACCTACATGATCATTAATATTTAAGCCATTGGGATGAGTTGCGATTGTAGTAAAATCAACGCCCATATCAGCAAATAATCTTGAAATCAAAGCACTAGCTGCACCATTTGCACCATCTACAACAACTTTAATTCCACCTAATTCTTCAGGTAAAGTATTTTCAATAAATTGCAAGTACTTTGAAGCACCTTCGTGGTAATTAGTTACAGTACCTAAGCCTTCAGCTGAAGGACGTGGAAGCTTATCTTCTGGTGCATCAATTAATTTTTCAATTTCTTCTTCTTTAGCATCAGATAGTTTCAAACCATCACTACCAAAGAACTTAATTCCATTATCTTCAACTGGATTATGAGATGCTGAAATTTGAACCCCAGCATCTGCACCTTGAGCACGAACTAAGTAAGAAAGGCCCGGTGTTGTAATCACACCAACTTCTAAAACTTCAATTCCGACGGAAAGTAAACCAGAAATCAATGCATACTCTAACATTTGTCCAGAAATACGAGTATCGCGTGAAACTAATACTTTCGCTCTTTCTCCATCTTTTTTATCTTTTGTAAGAACATATCCTCCATCACGACCTAATTTAAAAGCCATTTCTGGAGTTAAGCCAGCATTAGCAACCCCCCGTACACCATCTGTTCCAAAATATTTAAGCATAATTTAACAACCTTTCCTTGTTTTATTCTTTTTTAGCAGTAGTATCAGTTACCTTAATATGAACTGGTATTACTGATGGTGAAGCCTTAACTACGCCCTTCGGCAATTTAATTGTTACATCTTTTGTAACATCTCGATTGATACCACTTAAATCAACTTTTAAGGGAAGAGATGTTATCTTCTTTAGCGTATTCTCATCACCATATATCTCAACTGTCTCTACTTTTGCGGTTAATGAGTAAACATGAGTTGATGATTCATTTTTGGACGTCACGTTTATTTTAACCTTCTTTTTCGACAAGTTAATTGGGATAGTAATGTGCGCTGTTGCTGGATCAATAGCAACATTGAGTTGATGTCCCTCTTTATCAAGTGCTACCAGCATTTCTTCACGCTCAAATGTGCTGTCTATTCCCTTTGGTAAATTTGCGTGAGCTACAACACGATCAACTTGGTTAACCTCGCTCTTAGCGCCAGTAATGTTCACAACTTCTGGATCACTCTTTGCTGTGCCAAGATTATACCCATGAGATACAGCACTCTTATTATACTCTATTTGCACAGGAAGGGTACGAGATTTTCTCTTTTGAATGTTTACACGTACTTTACTTGGACTCACACTATAAGTTAATTGAGTACTTAAACCATTTACCTTAATTGGCACCGTATGTTCTCCAGTTTTTAAATGGCTTAAATCAATATAAACACGGAAATTTTGAGTATTAATAGTAGAAGTAACCAGCGCATTTGATCCTTCTAAAGTAAGATTTATTTTTTCAGGATAACCAGTTACATAATATTTATCCGTATTAACAGAAACTTGTAGAGGAGCTTTAATTACTTGCGTCTTAGTTGCAGTTTTCAAAGTTTCTTCTCGACGTCCTTGAGTTACAAATCCCTGTTGATTAGACGAAACATAAATAGCCAGTAAAATTGCCAATATCAATGCAACAATGCGATAGAACCAAGGTTTATCAAAAAACTTTTTCATTTTTTATTTGACCCCCAATTCCAAACATGGTTTACAATTCGTTGATACCATTTCGGCTTTTCTTCTTCTTCCTTTGGAACTAGTTGAGCATTTAAGTACTTCAAATACTCTTCTCTAGTCAAATCAAGCATAAACTGGCTATTACGAGTAATTGTTACCCCACCAGTTTCTTCAGATACAACAATTGTAATCGCATCAGTAACTTCTGAAATACCTACCGCAGCCCGGTGACGAGTTCCAAGTTTTTTAGGAATCATACTATTATCTGAAAGTGGTAAATACGCAGCAGCAACTGCAATCCGATTATTTCTAATAATTACTGCACCATCATGCAATGGAGTGTTAGGAATAAAGATGTTGATTAATAACTCGCCAGTAATATCTGCATCAATTGGAATACCTGTTTCAATATAATCCTCTAAACCAGTATTTTGCTGAATTGTAATTAAAGCACCGATTCTTCTTTTGGACATATATTGAATCGCCTTATCTAGTTCTCCAATCATTTTTTCAGCGGCCTGTTTTTCAGTCATAGTTGTGCCACCAAAAATTGGCGATCTTCCTAAATGTTCAAGTCCACGTCTAATTTCTGGTTGAAAAATAACGATTATTCCAATTACAGACCATGAAAGAATTTGATCAACGAAATACGTTAGTGTATGTAAGTGCAATAAACCGGCTATAATTCTAACTATGATAATTAGTGATATTCCTTTAACTAATTGAACAGCCTTGGTTCCTCTTACCAACATAATTAAGCGATAAATAATATACCAAACAATCAGAATATCAATAATATTCATTAAATTTTGCCAAGTAAATATTTGCTCGATGTTAAATTTCACTACCCATCCCTCCTAAACCTTAATTATACCTAAACTACTGTCTTATCGACCATACATTTTAAATTCAAGAAATAGATCATTATATTCCTGAATTTTCTTAGGACCTAAATCCTGATAGACTTGTAAATTTTTCATTGCTTGTTTACTTGGATAAAATTGCTTATCGTTTTTTATCTCTTTAGGTAATAATTTTTGAGCTTTGATATTTGGCGTTGCATAACCAATATACTCCGCATTTTGAGCAGCATTTTTTGGATCAAGCATGAAATTAATAAAAGCATATGCTCCTGCTTTATTTTTGACAGTGCGAGGAATAACAAAATTATCAAACCACAAGTTTGAACCTTCTGGTGGAACCACATAATGTAAATGCTTATTATCACTTAACATTGTCCGCGCTTCGCCAGACCATGTTACACCAACTGCTGCCTCATTCTGAATCATATACATTTTTAGCTCATCAGAAATAATCGCTTTTACATTGGGACCTAATCCATCTAATTTAGTTTTAGCTAATTTAAGATCTAATGAATTAGTAGTATTTAACGACTTTCCCATCGATGCTAATGAAAGTCCCATAATATCTCTAGCAGAATCAACTAATAAAATATTATGACGATACTTTTTTGACCAAAGATCGTTCCAGTGCTTTATTTGTCCCGGTTTTACAAACTTATCATTATATACAATTCCTAAAGTTCCCCAAAAATACGGGACTGAATAAGTATTCTTAGGATCAAATGAGTGATGCAAAAATTCACTCCCAATATTTTTATAATTTGGAATTTTTTTGGTATCAATTTTCTCAAGTAACTTAGCCTTTCGCATTTTCGAAATCATGTAATCCGAAGGAACACAAATATCGTAAGCGGTGCCTCCTTGCTTTATCTTCGTATACATAGCTTCATTAGAATCAAAAGTTTCATAAATTACATGATAGCCAGTTTGCTTTTCGAATTTTTTTATCAATTTAGGATCAATATAATCACCCCAATTATAAATAATTAAATTCTTATTATTGGCGCTAACACCGCTATTATCTAACTGCTTAGCCCAAGCCTCTAATCCCAAGCAAACTGCTAGAATAGCAAAAATTCCAATCAATAACTTTTTCATTGTGCACGCCCCTTACCGATATGGGATTTATGATTAGTAATTACATAATAAATTAATACCAAAACCATTACGAAGATAAACATTAAGGTACTTAAAGCATTAATTTCTAAATTAATACCCTGCCGTGCGCGTGAATATATTTCTACAGATAATGTTGAAAAACCATTTCCAGTCACAAAAAAAGTAACTGCAAAATCATCTAATGAATAAGTCAAAGCCATAAAAAAGCCAGCGAGAATTCCTGGTGTAATAGCTGGAATCATTACTTTACTATAAACTTGCCAAGTAGAAGCACCTAAATCTCTAGCAGCATCGACCAAAGAGTAATCAAACTCCTTTAAACGCGGCAAAACCATTAAGACAACAATCGGAATCGAAAATGCAATATGACTTAATAATACTGAACCAAAGCCCAAACCAATTCCTAAAAAAGTAAAAAAGATTAAAAAGCTGGCACCAATAATTACATCTGGTGAAACCATCAAAACGTTATTTAACGCTAATAATGTTTTTTGGCCTTTTTTATTTTTAGTCTGACTAATTGCAATTGCTCCAAAAGTTCCAATTACTGTTGCAATCAAACTAGATAAGAGAGCTAATAAGATTGTTTCTAAAAAAATGGCTAGTAGTCGATTATCGTTAAATAAGTCTTGATAATGACTCAAAGTAAATTTTTCAAAATGATCCATATTATGACCACTCGAAAAAGAATAATATATTAAATAAAAAATAGGTAAATACATCGAAACAAGGACAAAAGCAAAATAAATCCGAGACCATTTTATTTTTTTCATAGCTTGATCTCCTTTTTACGACGATGATCAGACGAAGTAAATAGCATAACAATTACCATCAATACTATTAATACAACTCCAATTGTCGATCCCATTGACCAATTCATCGTAGTCATAAAATATTCTTCAATTGCAGTACCAAGCGTAATTACTCTGTTTCCGCCAATCAATCTTGTCAACATGAATAAAGAAAGTGACGGAATAAATACTGCTTGAATTCCCGACTCAACTCCAGATTTAGAAAGTGGCCATAAAACCTTAATAAACGTTTGCCACTTACTTGCTCCTAAATCATAAGCAGCTTGAATTACAGCCGGATTAATATCACAAATTGCATTATAAATTGGCAAAATCATAAATGGAATTTCAATATAAGCTGCTACAAAAATAAAAGCAAAATCAGTAAATAAAATATTCGCTGGAGCGATGCCAAAAAGCCTTAAAAAGTTATTTAGTAATCCATGTTTGCCGAAGATTCCAATAAATGCATACGCTTTTAAAAGTAAATTAATCCAAGTAGGTAGAATAATCAAAAGCAGCCAAAATTGCTGATTTTTCATTTGACTTAAAATATATGCAATTGGGTAGGAAATCAACAACGTAATTAAGGTAATTAAAAACGCATACCAAAAGGAATTTAGCGTCATTCTCAAAAAAGTTCCATTGACAAAAAACTGCTGGAAATTTTTCAAGGTAAATCCATCATCACCTTTAAAGGCATTAATAGTAATTAAAATAATCGGAGCAATCACAAATAAACTAAGCCAAAGTACATAGGGTACAAGAAAAAATAGCCTGCTCTTTTTCATATCTTACTCGTCTCCTTCATATGCCTCTAAGCGTTTATCAAATTCTGCTTCGCTTTCACCAAAACGCATCACATGAATATCTTCAGGATCAAAATATACTCCTACTTCTTTTCCAATATTAGTTGGATTAGTTGAGTGAATCAGCCATTCATTTTCATCACTATCAATGGCTTTAATTTCAAAATGATCACCCAAAAAGAGCTGACTTTCAACCATAACTCTTAATTTTCCATGCTCAATATCAGTAATATCTAGATCTTCTGGTCTTAAAACAACTTCTACTTTTTCTCCTGGTTTAATTCCAGCGTCAGCACATTCAAAACGATGATTACCAAATTCTACTTCATAATCTTTAATCATTCTTCCACTCAGAATATTTGAATCACCTATAAAGCGAGCTACGAAATCATTAACTGGCTCATCGTAAATATCAACTGGACTTCCGCTTTGTTGGATTTTTCCTTCATTTAAAACAAAAATCTCATCACTCATAGCTAAAGCTTCTTCTTGATCATGAGTAACAAAAATAAATGTAATCCCCAGTTTCTTTTGAATTTCACGTAATTCAAATTGCATGTCTTTTCTTAAACGCTTATCTAAAGCCGATAAAGACTCGTCTAAAAGTAGAACTTTTGGCTGATTAACAATTGCTCTTGCAATTGCAACACGTTGCTGTTGTCCACCACTTAATTCAGAAATTTCGCGATTTGCAAAGCCATCTAACTGAACCATATGCAGAGCTTCTTTAACGGCCGATTTTATTTCTTGCTTATCCTTTTTCTTTATTTGCAAGCCAAAAGCTACATTTTCAAAAACATTCATATGCGGAAATAAAGCATAATTCTGAAAAACCGTATTAATTTTTCTCTTGGCTGCATCTAAGTTAGTAATATCTTTTCCATCAAAAAATACTTGACCACTTGTTGGTTCACTAAATCCAGCAATTATTCTTAAAATAGTGGTCTTTCCTGACCCTGATGGTCCTAGCAATGAATAGAATTTTCCTGATTCAATCGTCAAATTAATATCTTTTAGAGCCACAAAACCATCATCATACTCTTTACGTACATGCGTTAATTTAATAATATCGCTCAACTTTTTCACCATCTCATCAAAAAAGCTACAAGCCGTTACTTGTAGCTTTGTCGTAGTATCTAATCTTTTTCTTTTCCTATAATTCTTACTTCAGTCTGCAAATCAACATCAAAATCTTTTTTGATTGTTTTTTGAATTAAATGAATTAAATCTAAATAATCAGTAGCCGTAGCACCACCAACATTAACAATAAAGCCAGCATGCTTCTTTGAATCTTCCGCTCCACCAATTCTTTTTCCTTGAAGACCAGCTTTAATTATCATTGGGCCTACAAAATGACCTGTTGGACGTTTAAAGACACTTCCACAAGAAGGATATTCAAGTGGTTGCTTTGCTCTACGTAAGCCATTGAAGTATTCCATTTTAGCCTTAATTGCCCACTTATCTCCTGGTTCAAGTCCAAAAGTAGCACTAATTACAATATCTCCAGTTTCCTGAACTAGTGAATGACGATAGCCAAATTCCATTTCATCGTGAGTATAAGTTTTAAATTTGCCTTCACGAGTTAAGACCCGAACAGATTTAATGACAAATTCCGTCTCGCCGCCATAAGCACCAGCATTCATGAAGACGGCTCCGCCGACGCTGCCCGGGATTCCTGCTGCAAACTCTAAGCCACTTAAACTAGCTTCACAGGCTGCTTCAGACGTATCGATAATTCTCGCTCCAGCATCCGCAGTAACTGTTGCTTCTTCTTGATTTGCAACAATTTTATCCATTTTAGTAAGAATTAAAACTAGACCCGCAATTCCTCCATCTCTAATAATTAAGTTAGAAGCATTTCCAATAACAGTTAAAGGTAAGTTATTAGTTTTAACTGTTTCCACTAAGATTTTTAATTCTTCTAAATTTTTAGGAAAAGCAAGATATTGCGCAGGCCCACCGGTCTTAGTAAACGTAAAGCGGCTCAATGGAATATTTTCTTGAATATCAATTCCCTGTTTTTTCAAATCCATCAGTTGCATTTTCAATCTCTCTTTTCCTTAAATAATCGTCCCTTTAATCATACCGCAATAAATAGCTAGTTTTCTACCTTAAGTGTTATACTAAAAAGAAAAATAATAATTTATTTAGAGGATAAAAATGAACTTTACTGCAATGGATTTTGAAACGGCAAACAGTCATCCTGAAAGTGCATGTTCTCTTGCCTTAGTTATGGTTAGAAACAATGAAATTGTCGACCGTTTTTATACAGTTATTAATCCGCAAATGCCTTTTGATAGTCGAAATATCAGAGTTCATGGTATTACTGCCGAAGACGTTAAAAATGCACCAACAATGGCTGAGGTTTGGCCTAAAATTAAAAAATTATATCAGCCGGGAATGTTAGTAGCTGCTCATAATGCACGTTTTGACTGCCGGGTGATGGAAAAGTCACTTGCTCGCTATAATATTCCAGCCCCTCACTACTTTGCAATTGATACTTTAGCAACTAGCAAAGCATTTGAACCCAATCTTCCTAATCACAAATTAGACACAGTTTCAGAAGCTTTAGATATCAACTTATGGCATCACCATAACGCTTTAAGTGACAGTGAAGCTTGCGCAGGAATTTTAATTGAAGAAAATAAACGTGTTGGGGATGATCCAATCAAAAAAATGGTTAAGCAAATTTAAGCTTAACCATTTTTTATTATTCAAAAGATTTTTTTATATCTGATAACCAGACTTGGTTTCTCTCACCAACAGTTTTAAATTCAACTATTCGTTTGGTTGAATCCCACGAATCATCAACACGCTTAATCGTTAATTCAAGATAATCATTTGGCAGATCATCAATAATAAATTGTGGCCATTCAATTACCACTAATCCATCTTCTGCTAGATATCCAGGCATATCAATACTTGATAAATCACCATCTTCTAGTCGATACATATCCATATGAAAAAGTGGCATTTTACCTTCACGATACTCTCTAACAATTGTAAATGTTGGACTTTTTACAGGACGTCTAATACCTAATGACCGAGCAATTCCTTTAGTCAAAGTGGTCTTTCCAGCACCTAAATCGCCAGATAAGAGGAGCAGATCATGCCCCTGGCTATTTTTTCCAATTGCCTGGCCTAATTTTTGCATTTGTTCATCAGAATTTATTTCTAATGATTCCATTTTACTTCTCCATATTTGCTTGAGCAGCAGTTAAAATAGCTAACAAGTATACATCTTCTGTTTTACATCCACGTGATAAGTCATTAATTGGAGCAGCTAATCCCTGCAATACAGGTCCAATTGCACTAAAGCCACCTAAACGCTCAGCAACTTTATATGAAATATTACCCGATTGAAGTTCTGGAAAAATGAAAACATTAGCATGTCCAGCAACTGCAGAACCTGGAGCTTTTGCTTCTCCTACTCGTGGAACAACAGCAGCATCAAACTGTAATTCACCATCGCAAACTAAATCTGGATACTTTTGATGTACTAAATCTGTTGCATCTTGAACTTTAGTTACCATCGGTCCCTTTGCTGATCCCTTAGTTGAAAAACTAAGCATTGCAATTTTAGGATCAAGCCCAACCATTTCAGCTGTTTTACTGGATTGGTAAGCAATTTCAGCTAATGTCTCACTATCAGGATCAATATTAATTGCACAATCCGCAAATATATATTTTTCGTCTTTGCGTTCCATAATCATAGCGCCAGAAACACGGCTCATTCCTTTTTTAGTTTTAATAATCTGCAAAGCTGGGCGAACCGTATTAGCTGTTGAATGAGTAGCACCGGAAACCATTCCGTCAGCCTTTCCTTCATACACTAACATTGTGCCAAAATATGAAACATCTTGTAGAATTTCCTTTGCTTCTGCCAAAGTATTCTTTCCTTTTCTTAATTCAACAAAATCTTGGCACATTTTTTCAAAATCTGAATAGACAGCTGGATTAATTATCTCAATTGAACCTAGATCAACATTTAATTTCTCAGCAGATTTTATTACTTCATCAGGTTTTCCTAACAAAAGTGGTTCAACAATTCCCTCTTCTGCGAGACGAACAGCAGCTTTAATTATTCGCTCATCATTTCCTTCAGGAAAAACAATCACTTTTTTTGATTCTTCTGCTTTTTTCTTAAGTGAGTCAAATACTTTCATAACAAATTAAGCCTTTCTTTTTATCTCTAGCTAATATTTTACAATTTCAATGCTTTTTCTCAAAACAAAAACTATGCCATATCTGATTGACTTACTTCCTGTGGCAATTGCCAATCAATTGGACTCTCACCAAATTTAGTCAAGGCATCATTACATCTAGAAAACGGACGAGAGCCAAAGAAACCACGGTCTGCTGAAAAAGGACTAGGGTGTGCAGACTTGATAATTACGTTCTTTTCTTCATCAATTAAAGGAATTTTATTTTGGGCAAATCTTCCCCACAAAATAAATACAACTTTTCCGCGATCACTAAGAGCTTTAATAGCTGCATCAGTGACTATTTCCCAGCCTTTTCCCTGATGACCATTAGCATTTCCATAAGGCACTGTTAATACTGCATTAAGCAATAAAACTCCTTGATCCGCCCACTTTTTCAAATAGCCATGATTTACTGGGATGGCACCAACATCATCATATAATTCTTTATAGATATTTTTTAACGATGGTGGCAAAGCAACCCCAGGATTAACGCTGAAACTCATCCCTGTGGCTTGGCCAGGGTTATGGTAAGGATCTTGACCTAAGATCACAACCTTCGTATCTGCAAAAGAAGTTAACTTAAAAGCAGTAAAAATGTGATACATATCTGGAAAAATCTGCTTTGTTCGATATTCACTTTTTAGAAAATCATGTAATTTTTGATACTGTTCACTCTCAAAAGCGGGTGCCAAAATTTCATCCCAATCATTTCCAATTAATTTTTTCATATTTATTCTATCCTCTTGTATCTCTAAGAAATCCAAGTTCATGATATCATATAGATAAAAGATCGGAGGACTTAAAATGATTAAACTTATTGTAAGCGATATGGATGGCACACTATTAAATAAACAAATGCAGATTTCTTCTGAAAATATTTCTGCTATTAAAGAAGCACAAGCTAAAGGAATCGAATTTTTAGTAGCAACTGGACGAGCACCTTCTGAATCTCAAGGAATACTGGCTAAAGCTGGACTTCACACTGGCTTCATTAACTTAAATGGCGCAATGGTTTTTAATACAGAAGGTAAACTAATTGTAAACGAGCCAATTCCTAAAGAAGAGAG +>1_1#NODE_10_length_39995_cov_63.156_ID_19 +TAGATGCTCAATGTGACATATACCACATCCACAATATACCATAATATGACAAACACCACAAAAAGTTCCATCCACAGAAACGCATTGCTGCGGCGCTCGTTCCATATCTGTTTCAATAAATTCCTGTTCATGATCTAACGTTGGTTTAAAGCGTCCGTAATATTCATGCGCGATGCCCTCCAGGCAGGGATGCACGCTGACAGCAAATTCAAGAGAAGGCAGAAAATGAATGCGGCCAAGAATATCCAGGGACTGAACAACATATCTGCCGAAAGACTGGTTTCTCCTATTTGGGCACGGTTCTCGCTATTGGAAAATAAGAAATCATTCAATAAAAAAGTACATGCATAACTGAAAAGCATTCCTACTGCTCCGGCTATGAGCGTCAATAATAAATTTTCATAAAACACTTGCCTCAACAAGACATTGGCAGTAGCACCAAATGCCTTCCTCACCCCTATTTCCGACATGCGCTTCCGCATACGGGACAAAGTCATGCTACTCAAATTTATAGCCGGTACCAGCAATAAAATAACAATCACCAACAGGTAGTGCAAATAAGCTTCCTTCGCCTGTAACTCTCTTCCCCAGTGACGAAAGATAAAAGAGAAACGATCATCGGGTTGTCCACGATAAAAAATTTCGGAATCCTGCAATCCGTCATTGAATTTCTGACGCAGGCGTTCCGCCTCTTCTCTGATGGCAGGAAAATCTTTCTCCGAATGTGCCAGAATGACCACACGAAAAGTTCCCATCGCATTGTCATACCAGCACTTCCTGGCTATTTCTGTTGAATTGAAAGGTATCCAAATCTGCGCATAAGAAGAAACAGCCAGTTTGGATACATCTTTCACAACACCCACTACCGTATAATCGGCCAAGTTGAGTTGGACCGTACGTCCTACCGCTTCTACCGTACCAAACAAATTCCGGGCCACAGAAGCACTAAGTACAACCTTAGGAAGTCCGCTTTCACTATCGGCAGCCGTAAACGGCTTTCCGGCAATAAAATCAAAAGAGAACACTTTCCAAAAGGTATCGTCAGTTTCAAGGTTATCCACTGTCATCATTTTTCCAGCAGGTACAGAAATACGCATCCTGCCTCCTGTAGACACTATAGTTACCGCCTCAGGAACAGTAAGTGCCTTAAAACACTCTTTAGCCACTGTGAGTGACATACAGCCGTTTGATGAATTATCACTGGTATCTCCTTTCTGATGAATAGACATCGCTTTCATATAAAGAGAACGTGAACGGTTCACCTCGGGCACACAATCCGTAATGCGTACTTCCAACACCAGGACAATGACCATGATCATACAAATAGCCAATGCCGTTCCTATGATAGAAACCAAGCTGATGAGCCTATTCTCACGAAGTTGGTAAAAGCCTGTTTAATATATAGTTTTATCATGTTTGTGCTATGCTCTAAAACCTTTTAACAAAAAAGAGATTACTCATTATGCAATGCTTCCGCCGGCTGAACTTTCATAGCCTTACGTGCGGGAATCCAAATGCCTACTACAATCATCAATGCAATCAAAATAAAGGAAATAAGAACTGTTATTATGAAACGTCCACCTTCAATAGTAGTGCCATTCATCCAAGCATTCAATTCACTATTTGCCAGATTCCAATCAATAAAAATCGCAGGAATTGTGGCCACCGCCAATAATATCAATCCCTCTGCCAACTGACGGACAAAGATACTATGATCTGTTCCACCCAATGATTTCATCAATGCAATCTCACCACGTCGTTGCTGAGTACGGAACCAGAAAGTACCTAGCAATCCCAAGAATATATTCAACAACAAGAAACCCATTCCGAAAAGGTAACTGTTCCAGGCATTCGTATGTGACTGCTGAAAGTTACGACGGATATCGGTAAAAGAACGCACATCCGCGATAAACACATTGCCCACGCGATACAGTTTCTCACTATCCGCTTTCAAACGGGCTATAAAATCGCGATCTTGATCCTCTTTTACGCGTACACAAAACTCCGTGCCCAAATCATACCATTCTTCCGGCATAGGAGCCACCATGCAGTAACTGTTCCATGCCTCAAAATAATCCCCATAACGGACAACCTGCAAAGAAGCCGCTAAATTATATGTATTCGTTGTATCCCCGAACAAATAAAATTGTTTGTCTACCAATGAAGTCAAATCACATCCATAGCGCCGTTTATAAAGATTATCTGAAGCTAAGAAATTCTTCGGATGTTTCAGCATCTCAGCCAGTTGTTCAGGGGTCTCCCCACGGGTGCCACGATAACGGAACACACGGACAAAATCGGGAGACACCAATCTGCGTATAGTCCATCCCGGCGAACGTAATGTATCATACACCACTGACGCACTGCTGTTACTGCCATTATACGGGTAAGAATTTTGTCCTAATCCGGCTGCCTCTATGTCCGGTCTATGCTGAAGACGACTTAGAAGTTCCTTGATATCCTGCCGCTTTTCCTCGTCTGTCTGATTGGGAATAAAGTCTGGACTTTTGTCCGTAAGACGTCCCATTTGCACCAAATAGCAATGAGAAATATCAAATCCCCGTGGTTCATTATAAATTGAGGTCTGTACATACATATAGTCCACAATATACCATAAAACGACACTTACCAACAATAATTCGGTGACAAGCCAAAGATTAGACCGCCATTCATTTTTTATCTGAGTAAATAGTTTCTTATTCATCTTGATTCTTTACTTTTAATTCGTTATCACCTTAGTGAATCCTTCCTCCCAGTGCATTGACAATGCTAGTCCTTGAAGCCCTCCAAGCAGGAATGCCGCTACTCAGCAAATTCAAAACAAAGCAAAACAGCAATGCATAAAGGAAAGTGGAAGGGTGCAATAAAATACTGGAATCAACTGTAGGGGCATTCAATGTCGCACTATAAGGCTGTGCAAACAGGATATCGGTACCTAAATAAGCCATAACAATACTGATAAACAAACCTACGGCACCAGCCAACAAAGTCACGACCAGGTTTTCCATGATTATTTGTCCAACCATTTCCATGCGGGTGCTCCCAAAAGCACGGCGTACTCCTATTTCCGCCACACGTTGGCGCAACCGGCTTTGCGTCATACTGCTCAGATTTATAGCAGGTACTAACAACAGAATGATAAAAATGATAGCACGTTCACGACGAGCCGACTTCAAATCCGGCTCCAAGTTAGCTGCAAATGAAATAGACTGAGTTTCCTGATCGTATGGCCGGTTACGGTCTATCAACGTATACCCCTGATCGGCAAGAATAGAATTATACTCACTCATACGCCGCTTGGCTTCAGCACGGATTTCTCCAAAATCATCATGGCTTCGAGCCAAAATAGTGACACTCATCATCCCCATGTGCTGGTCACTCCACGTATCATTAGGCAAATCCGTCGAAGTAAAGGGAATCCATACTTGGCCATAAGAAGCATCGGCAAGAGTAGATACATCTTTCACTACCCCTACCACACGATAGGGAGCATGATTCAACAAAAACTCTTTGCCCGCAGATTCTGTGGAACCAAACAAAGCCCGCGATATGCTCTCAGTAATCACAGCCACAGGAGTGCCTGCATTGAAAGTAGCTTCATCGTATGGTTTTCCATTCACAAAACGAAAATCGAACACTTTAAAAAAAGTATCATCCGTTTCCCGCACATCCGCTCCAATAGCCGGCATAGCGGGTAATGAAACAGGAGTCGATACCGGCATACTGCAATAAATAGTGACAGCTTCAGGGGTTTTCAATGATTTATATATCTCACGCGCTGTCCGCACACTCATCGGTCCGTTACTTGTTCCGTCCCCCCAATCCTTATGACTAATACTCATATAATGCACATGCAGAAAACGATCCCGGTTTGACTCGGGAGAAAAAGGAGCGACCTTCACTTGTTGTAGCATCACTACCAACATAATAAGGAAAATGCTAAGAGCAGTTCCTGCAATGCTGATGACACTGATAAGCGGATGCTGGCGAAGTTGCGCCAAAGCTTGCTTAAAATACTGTTTAATCATCTCTGTTATTTCTTAATAAATTCCTTTTCATAGTAATCAGTAGTTATTGCACCTGACGCCCATCAAAGAAACGAATGGTTCTGGAAGTCTGTTTGGCTTGTTCTTCATTATGGGTTACCATCACAATGGTCCGTCCATCCTCTTTATTTAATTTATGCAACAATTCCATGACCTCTGCACCCATCTTTGAGTCCAGATTACCGGTAGGCTCATCCGCAAGAATAATTTCAGGATTACCCACAATAGCACGGGCTATCGCTACACGCTGGCACTGTCCTCCGGAAAGCTGTGTGGGCATGTGCCGCATACGATGACTTAAACCGACACGATCCAACACTTCTTTTGCCAGACGGGTACGCTCCTTGGCAGCCATCTTTCTATAGAGCAAAGGCAACTCCACATTGTCAATCACATTCAGCGAATTAATCAGGTGGAACGACTGGAATACAAAGCCCAAGGTCTTGTTACGGAAAGCAGCCAGTTCCTTGTCCTTCATGCTTTCTACAGACGTGCCGTTTATTTCAATCTTTCCACTGCTGGGAGCGTCCAGCAGCCCCATAATATTCAGCAAGGTAGACTTGCCGCAACCGGAAGGACCCATAATGCTGACAAACTCCCCTTTAGCCACATCCAAATTTACATTTTCCAGCGCTAATGTTTCAATCTCATTCGTACGGTAGATTTTATTGATACCGGTTAATTTAATCATTGCCATATTTTTTTCTTATTTAAATTGTTAATCTTATACACTTTCTATTTTCATGAATCAAAAGACATTCCAAAGCTATAATCATTTAATATTCAACGAATTTTAATTCTACATACTGTCCGATAACAGACGGTGTGACCGTTTTTACCGGACTAATACGACTGATGCTGTATGCCGGAAAAAAATGTCAACAAGCACTATGAATAATATAAAAAGGTAAACGATCAAAAAAACGACATAGCCTAAGAACAAATTAAATTATGTTCGGGAAATGAGTGAAAAGGATGAAAAAACACTTCATTTCCCGTAACAATCAATGTATTAATTAAGAGAGAGTTTTATCTTGTTGTTTATGTCATAGCTGCTTCTTATTTCAACTTCAGCTTATTCTTGTTTTTGTAACTGCTCATATCGCTGACAACCACCTTGTCACCCGGTTTCAACCCCGATATGACTTCCACATATTCAAAATTACTATCACCCAATTGAACCTTACGCTTTACTATTTCATCCTTACTATCCTGAACAAATAATTCATACTCACCGCGACCGACATAGTAAGAAGCATTCGCCAATCTTAAAACCCCTTCTTTCACGGCATTCATTACATATACATCCGTCTTCAAGCCGGAACGCAATCGCTTGTTATTATCTTCATTCAATTGTACAATAAAGGAAATCACCCCATTCTTACTCAACGGTGTCACACTACTTACCGTTCCTTCCAGTTTCTCATTGCCAATCTTGACTATGGCACGTCCACCTGCTGCCACACGGTCACCGTATGTATCGGCTATTTCACCTTCCACTTTAAAGTGGCTCAAATCAGAAATGATAGCTACCTGGCTGCCTTCCGCTACCTGCGCACCTACCTGATTATTTATATAGGTAAGAATCGCTTTACGCGGTGAACGAATTTGAGCGTCATCCAATGTACGTTTCATTTCAGCAAGACTCTTTGAGAAAATATTGAATTCCAATTCCTTCACTTTCAAATCAGCCTCTTTTACTTTCGATTCATTCGCGTATTGCTGACGGAGCTGTTCCAATTCCAATTTTCCGGTATTAAAGTTCAGTTCAGCCTGACGAACCTTGTCCGTTGTTCCAGAGCCCAAACTATCCAAATAACGTTCGTTACGAAGTTCCACCTCCATACGGTTCAATTTCATGGCGGAAATTTTCACTTGCATCGATAAATCACTCAGATACGTATTATTATTCACTTTCAACTGCTCTAACTGATAACGTTTCATCTGTTCTTCATCCAGTAATTTCTTGTATTCAGTTTCCGTACTCTGCAAATCAAGTTTCAAAATCGGCGTACCTACATCCACACTGTCTCCTCCCTTACGGTAAACTTCCACGATACGAGTATTAATAGGTGAGTTGATAATCTCTTCAAAAGCCGGAACAACCTTACCGGAAGCACTGACACTTACTTCAATAGTTCCATTGTCCACTTCTGAAAACACCAAATCTTTCCTATTGACGCTGCTTCGCATAAACGAAATCAATACAGCTATACATACCACTGCCGCCACACCTATTGCACCATATTTAATGAACTTCTTTTTGCGCTCCTTATCACGCACTTCTTTAGGAATTTCTCTGTCCATAACTTATTGTAATTTTATTATTATCATTTTTGTGCTAAAGAATAAACAACATCCGTGCCAAGGAAACAACTTATTGATAATCAATAACAAATAAAGCGTACAAGGTGTCCGATATCGGACACCCTGTACGCTTTTAAACATCAGTTACCGTACAATAATGATTAACGCAAACGCAATATATCACCTACTTTAGGAGAAGGGTCTTCCGGTTTCATCTTATTCATCTTATATAAATTTTTCAGACGAATACCATATTTTTGGGAAATGGAATACATAGATTCTCCCCCACGGACAACATGAACTATGTGTTCCTTATCCGCACGACGATGCTTTTTATCCAAATAAATAATATCACCTGGCTTCAATACATATCCCTTATAAAGATCATTGTATTTCCTCAACTTGCGTTGACTGATATCAAATTCTTTAGAAAGTTTTTTAAATGTATCCCCCGGACGTACCACAATATAGAGTAAATCATTAGCCAAATAAGGCTGATGCGGGTTTGGGAATTCCTTCATCCACTTTATACCATCCTTGGTATCATACTTATGTAATTCATATAATTCAATGATATCAATTAAACGATATGCATAGCGAGGGTCAGTAGCATATCCCGCTTTCTTCAGCCCATGCGCCCATCCTTTATAATCGGTGATTTTTAATTTAAAAAGAGATGCATAACGTGAACGTCCTTTTAAAAATTTAGAATGATCTTCATATGAATCACGAGGGTGCTTATAGGCACGAAAACATTCATTACGGGCATCATCATCATGCCGTACAGTACGACCAGTCCAGTCACCACCACATTTTATACCGAAATGATTATTGGATTTACGAGCCAGTGTACTTTTTCCCGCTCCTGATTCCAATAATCCTTGAGCCAGAGTGATGCTGGCAGGAATACGATACCGTTTCATCTCATCGATAGCGAGATCTTTATACTTATGAATATATTCCTCATACTGCCGGTTACGGGTTTGCGCTTGCACTGTAAAATTACATAGACAACAAAACAACCCGATTAATATAAATTTCAACGTATGTTTCATCATATAAAACTGTTTCAAAATCGCGACTATACTTTTTCCGCTATATATAGTTGCAAACTTACCATTTTTATCCGGCTGTATGCAATCTTTTTAAATTAATTACGAATTTCATTCGAATTCCAAAAGTCCAAAGAAATCCGGACGATGAAAATCCGGCTTCTCTATTTTAATAGGATTCCATGATAAGAAATGAGGCTTTTGCAATTCATCTCCACATTTATAAAAATTAGCCCGTATGCTCATTCCATCCAGCCCGGTAATGGCATGCTTAAAAAATACCTTATACGGAATCAGCAAAGCCACCTCCCAAGTACATTCTCCTATCTTTTCTTCAAAAGTTTCTCTGCCCAAGCTAGCCCAACGCTTCACCTGATCCGTTATTTCAGAAGGAGCCATTTCACGATTATTACGCTCCGAACCTGCTGCTAAAAGAATCGTAGCGATACAGTTACATTCCAAATTATAATAAATGCCATCTCCCGCAGGAATAGAGAAAAATTCCACGCAGGAATCTGTCCAAACACTACCGTTATCTTCACCGTATTTGGCACGAACACTGTCTTCTACTACTTTATAATGGACTAATATAGCATCATTTGTATAGGCTATACGAAAAGAGACTTGAGGTTGATAAGGATATTCTTTCCAGTTCACTATATTAATAGGATGATACGTAATGTTCTCATTATCAAACAAGGTCGGTATTTGTCGTGCATCTTGAACTTTACCACTTAATTTCTTTACTTTCATCGTATTGTTTTTATTAGTTAAATAGCTACTATTAGCTTCACAAAAATATATACTTCCACTGAGAAAACAAAATAGTGGCTACCATAAACAGCAGCCACTAAAAGAATAAAGCAATAAAGACAATGAAAGCACAATTTTTACATTTTCATCCATATTTATTTCTTCACATCGCCCTGTGGCATATGTCTTCTCCCATCATGCTGCTTTTTCATTCCTTGTCTTTTTCTCATCTCTTTACGGAATTTATCGCCATTGTGTTTCTCCATATTATACATTTTCTGAATTTGTTTAGGAGAAAGAAATTTCCGGAACTCATTATAATACTTCTCACGAACATCCAGTATCTTTCTACTCTGCGCGAAACGTGCCTTGATAGCCTGCTCCACTTCAGCATCTGTAGGAAGCGGTTTGGGGGTCTGTTTATCTGCAGCCGTTCTATTTGCAATATTTCGACAGGCTCCCATATGACGAGTAGCCCGCATTTCTTCCATGTATTGTTTATACACCGGAATAAATTTAGCAGTCGTTGCGTCATCCAATGCCAATCCTTTTATAATTTGATTGCACTGTATTTCCAGCATCTGCTCTTTATTAAACTGCCTTCTTTCAGATTTAGCTCCTTTCTTTTCTTGTGCAAAAAGTGTTACCTGACTTCCCATAACAATGGCTGCCAACATCATTAAAAAAAATTTTGTTCTCATCATTTTTTTGTTTTTAGTGATTAATCAATTTATAAATATATCATTTTCAGACAATTCCACCCATCCTGCTAATTCCTCATCAGACATAGATTCTATATAACGATCCAAAGTATCTGAATAAGAAGAGTCTAAATCCGTAGAAACAATAAGTGAATGAGAAGGCAACTCCTCTGCCACAGGTTGGGATATCGGAAAGAAAAGAACTCCCAACATGACAGCCGCAATAGCTAAAGCAGCCCCAATAATCAATTTTATTCTATGCTGCCTATTAAACCTCTCAGCCTGTGTACGCTCCAATACTTGCTTCTGCATTTTTCCGAAAAAGCCATCAGGGGTACGATAAGGCATCCGCTTCCCAATATTCTTAAAATCAAACTCCTTTTCCATCCTGTCTTTAATTATTTATCATGTACTCTTTTATCTTCTCTTTTGCATAATGATAATTCACTTTCAACGTATCCACTTTCGTGTCCGTAATCCTGCTAATTTCCTCATATTCCAGTTCATCATAATAACGAAGATTGAAAACTACCCGCTGCTTTTCAGGAAGCATCAAAATAGCCTGCTGAAATTTCACCGCCATTTCGTTCTCATAATCCACATAATCTGACGCCATCAACTTATTCATTAACTCTTCCTGAACTTCTTCCGCCGAAACAGCTTGTTCCTTACGCATATTCAGAAAACGGAAACACTCATTGGTCGCTATCCGGTAAATCCATGTTCCCAATGAACTTTCCTCCCTAAACTGAGGAAGATGGCGAAAGACGCGAATAAAGACTTCCTGCAGAATATCCTCTGCATCCTCATGTGATACAACCAATCTCCGGATATGCCAATATATGGGCTGTTGAAAAAAATCTATCAACAGTTTAAATCCCCGTTCCGGGTTGGAAGCCCATACTTCCCTAATTTTATCTTCGTTTATCATCTATTCGTCAAAAGTTTATAGGTTAGAACCACCCGAGAACAAGAAGTTAAAAGTGAAAATGAAGTTTTTACATTAATTATAGATGAAGTTAAAATAATCAATCACATTTCCTACAAGAGAAAAATACAACCAATCTTCCGGAATATGAATTTAAAGAATTCAAACTCAATCTTCCTATCAATAAATTTCAGTTCTTAGAAGAATTACCATATAAACAATAGGGAAATCCCTAAAAATAAATAGGGGAAAAGATAGAAGAATATACCTTTTCCCAACTTATCTTTGTTACAAACAAAAAAACAAAAAAATGAGAAAATTGAAACAAACATTATTAGCGTTAACCATTTGTACTTTAGTCATCAGTTGCAGTTTGACTACTAGTAGCACTATTATGAAAGTACAGAGAGGGATGTCCCAAGAAGAAGTCAGCCATTTACTTGGGAAACCCGATTTCCGTAGATTCGACAACGGCTCGGAACAATGGGAATATACCAAGACCAATGTGTCCACAGCTGCAAACACGGTAATTATTATCGATTTTGTAGACGGAATGGTAACAAACATGGATTCTTTTGAGTCCAACATTACTCCGCCTCCAGTTGCAGTGTGTCCGCCAAATGAAATCATTACGGTAGTTCCACCCAACCACCCTGATCATTCTGGCCCGCACAGACCAAAACACAAAGCCATGAATCCACATGATTTTGAAAACCTCTACAAAAAAGTAAAAAACAAAGCATTTAAGGACGACCAAATGGAATTATTATCTGTAGGAGTTGTAAACAACTACTTCACCTGTAAACAAACTGCCCGACTCATGTCCATATTTACATGGGACGATGAAAAAATGAAAGTATTAAGAATGGTTTCCAACCGCATTGTAGACCGTGAAAATGGAAAAGAAATCATCAAGACACTGGATTCTTTATTTAAACAAGATGATGCACGTAAAATATTGGGAATCACTAACCAATGGTAATTCAACCTAAACAACACACAAAAACATAAATATATGAGAAAATTCAAACAACTATTATTGCTTGTTATCAGTATAACACTTGTCAGCTGTCATACCACAGGAAGTATCAAACAGAAAGCATGGAAAGTACAGCAGGGAATGACAACAGAAGAAATAGGCCAACTTCTTGGAAAACCCGATTTCCGACGTTTCGATGGTTCTTTGGAACAATGGGAATATCAGAGCGGAGGAATTGCAACGTCTTGCAAATTTCTAATTATCGAATTTCGAAACGGAAAGGTAACAAGCATGGATTCCTACAATGAAATAGCTAAAGAGACTTCAGTAGGAGACCTATATTCCAGCAAAATATCCCTCCATACCGTTGGTTCAATAGACGATAACGAATTTGAAAAGATATACAATGAAACAAAAAATTCCGTATTCAAAGATTCAACTCTGGAAAAAGCCATTATAAATAAAAAATTAAGTTGCGCACAATGCCTTAAACTTATGTCACTTTATACTTTTGATAATGACAAGTTAAAAATGTTACAAGTACTGAAAGACCATATAGCTGACACAACAAATTACGATAATATTGTCAACTCACTAGATTTCATTTCAAGTAAAAACAAGGCCAAAGAGATATTAGGAATACCCTAATACAGTAAAAGCAAACGTACCTGTTGCAATAAACCTAAAAAATATTGCAACAGGCACATCTGCTTTACCTCAATCTTCTATTCTGACAGCTGTGCCGCTTGCGGTTACCATCAGCATACTACCACTGTCCCCCACAGTTTCATAATCCAAATCCACCCCAACTACAGCATTAGCTCCCAATAAAGCAGCCTGATCCTGCATTTCTCGTAACGCAGTATCTTTAGCCTGTCGAAGTACTTCTTCATAAGAACCGGAACGACCGCCTACAATGTCACGAATACTAGCAAAAAAATCACGAAACAGATTAGCACCGATAATAGTTTCACCTGTTACAATGCCATAATATTTGGTTATACGTTTTCCTTCGATAGTAGGTGTTGTTGTTGCTAACATAATCTTTCTCTTTTTTAATTATATATTCATTAGACGCACAAATAAAAGAAAAAGTTGCAACTTTCTCCTTTTTTATCGGTAACTATATTATTCTTTTCACATTCAACTCTGACAAGCAATAAGAAAACAAATAAATTTAAAACCAGATTTTAGAAAAGAAACATTGGGAGATTAAAAACAAAACATTACATTTGCAAGAAAACAAAGAGGAACGTATCATGAAACAAATCAAACTGGAAATAAACATAGAGGCTTGCCATTATGACGAACTAACAGAGAAGGACCGCAAACTGATAGACGCCGCGTGTGAAGCAACGAAAAGAAGCTATGCTCCTTACTCACATTTTGCTGTTGGAGCCGCCGCACTGTTGGAAAATGATATCGTCATTACCGGAACCAATCAAGAAAATGCAGCCTACCCATCTGGTCTTTGTGCAGAACGTACCACTTTGTTTTATGCCAATTCTCAATATCCGGATCAAGCAGTAAAAACACTCGCCATTGCTGCCCGCACAGAAAATGGTTTTCTTGACACACCTATTCCTCCTTGCGGAGCCTGTCGGCAAGTATTATTAGAAACTGAAAAACGATATGGAAAACCTATGCGGATACTGCTTTATAGTAAGACTGATATTTATATATTAGAAAACGTAAGCGGACTTTTACCTTTATCGTTTGATGGAAATTATCTGAAATGATACCTACCATCCGTCTGCACAGCCAACAGTTAATAAATCCGGTTTTCAATAATCCGAAAGATCTTGTATCATGGATGGGAGGCATTCAAGCGCAAGATTACACCATGTCCAAATGGGCTATTGGTATCCGGCTAAAGGCAGGAAATCTGCAAACAGTCAATGAGGCACTGGCAAAAGGAGATATATTGCGCATTCATGTCATGCGTCCTACTTGGCATTATGTAGCTGCAGAAGATATACGCTGGATGCTGAAGCTTTCATCCCGACGCATCATTACTGCCAATGACTCTTTTGCCAAGTCCAGAGGACAAGACATTTCAGTTGACATTTACAACAAAGCAAACCGATTATTGGAAAAGGCACTAGCAGGACACAATCATCTGACCAAGCAAGAAATTGACAACGTATTTAAAGAAGGAGGGCTGGAAACCAATGAAAGATTATCCAACCGTTTCCTGATTCACGCTGAAGCTGAAGGACTTATTTGCAGCGGAGCAGATAAAAATAATAAAATCACCTTCGCACTTTTGGATGAGCGTGTTCCTCCAATACAAGAATTACATAAAGAAGAGGCGCTAGCTATCTTGGCACGCAAGTATTTTAGAAGCCACTCTCCTGCCAGTCTGAAAGACTTTGTATGGTGGTCCGGACTTTCGGTAACGGAAGCTAGACAAGGTATAGCCGCCATTGAACAAGAATTGCTTACTGACCGTTTCCTAGCACAAAAATTATATGTTCATCAGTCTTATAAAGAAGAAAAAACAACCGACATATTGCATATCCTTCCTTCATACGATGAATACCTAATCAGCTACAAAGACCGTACTGATGTATTGAACAAAGAATACCAACACAAAGCATTCAATTCTTTCGGGATATTCCGTCCGGTTATTCTGTACAACGGGCAAATAGTTGGAAATTGGAATAAAGTCATACAAAAACAAACAACACATATAGAAATGAACTGGTTCAAGAAAAATACAAAAATCAGGTACTGTCCAAAATTTTGTGTAAATGGAAACAGGATTCAGTTGTAAGTTTGTTCTTATATCTGGATTCTGTTTTCAAATATAAGCATAAACTGGTTCATAATCAATCCCCAGTTTGAAATAGGCATTGTCCATTTCTTTTCAATCTCCATAAGTGAAAGATACACGGTCTTTTTTACGGCATCGTCCGACGGGAATGAAAGCTTTGATTTGGTGTACTTTCTGATTTTTCCGTTCAGATTCTCAATAAGATTTGTGGTATAGATTATTTTCCTGATTTCCAATGGGAACTGGAAGAAAACAGTCAGATCATCCCAGTTGTTTCTCCATGAAAGTATGGCGTATGGATACTTTCCTCCCCATTTCTTTTCCAGATTGTCAAGTTCTGCGGCAGCAACTTCCTTATTAGGTGCATTGTAGATATTCTTCATATCCGCCGTAAACTCTTTCTTATCCTTATAAACGACATATTTACAGGAGTTCCTGATCTGATGTACCACACAGATCTGAGTGGATGACTGGGGGAATACGGTACGGATGGTATCTGTAAATCCATTCAGATTGTCAGTACAGGTAATCAGTATATCCTGCACTCCACGAGCCTTCAAGTCGGTCAGGACACCCATCCAGAAAGAGGAACTTTCCGATTTGCCGACCCACATGCCAAGGACTTCCTTCAGGCCGTTCTGTTTCAGACCGACACAAAGATAGACGGTCTTGTTTATAATCTTGCCGTTATCCCGTACCTTGAAGACGATACCATCCATCCAGACTATCAGATAGACCGGATCCAGAGGACGGTTCTGCCATTCCTGCGCAGCCTGGCTTACCTTGTTTGTAATAATGGAAATAGCTGATGTAGAGAGCTCTATTTCATAAATCTCACGCATCTCCTCCTCTATATCGGAAACACTCATTCCTTTGGCATACAGGGAGATAACAAGCTTCTCTATAGAAAGTCCCCGGCTTTCATGCTTGGGGACTGCTATCGGTTCAAACTGCCCGTTGCGGTCACGCGGAATGGAGATGACAGACTCTCCATGTCCGGTCTGAATTTTCTTCGGATAACTGCCATTCCGGGAGTTGCCGGTGTTGTTCCCTGCCACGGAATTCTTCTCATACCCCAAATGGGCATCCATCTCACCTTCAAGCATCTTTTCCAATACCTGCGCATGCAACTGATTCAGAAACCTGCTCACATCCGCTTCTGTCTTGAACTGGCTAAGGAACTCCTTGCTTAATACCTCATCAGGCACTACTTGATTCTTTTCTTTCATAATCTTTTTCATTTGGTAAATGTATAAAATAAAAAATACGGAACTCGATTTTGAATCCCGTATTTTCCATTTACACAAAATATTTTATAGTGCCAAAAATCAAAAAAGAATTACTTTCCCTAGCGGAGAGAAAATACCTTACTTTTTTCTCCGAACTGTAAGAAAATTGTAAATGGATTATTTCTGCGTCATCACATTTTAAAAATAATCCATTACATTTACATTATTCAGTATTGCAATATAAAACTCTCAGCCACTTTCAAGAAACGTTCATGACCTTTTTCATTCAAATGTGCAGTATCTGTATTATTCTTGCTGTTCTGAAAATATATTTTCCTAAAATGATCATTACTTGCATAAATTCCTCCTTTTCGGGCGCTGTCAAAAATAGGAATACTATAATTTCCACAAACCTCAATCATAGCATCCACCACTTTCTCAGCATCACTTCCTGCAAAATTTTTACAGTTCCATCGGGTAAAGAAAAAAATCTTAGCTGTGGGATACTTTTCAATAAGCCCTTCACATAGCATTGCCAGCCTCTCTTTAAATACATCAATACCGCCAATTGAATCCAATTTAAAACCATCATTGTGCCCACCCACAACAATAACATAATCCAAATCATCCGGCATTTCCTTATATCTTACATACATCGCTTCCCCCCAACGCGGGCTACTATAAGCAATGCTGCTGCCGTTCTTGCCATAATTTAAATATTCCATACCATGTTTCTCGGCAAATTTATAATGCCAGGTATTCTTAACAGGTTCCTTATGATTCTTCACATAACTATCTCCAATAATTCCCAAACGTTTTCCTTTCAATTCATCAGTCCTGACGGTAACTCCTTTAATAATTTGTTCCAACTGCTTATATACCTGTTGTCCCAAATACTCTGCCGAATGAGCCGTGAAATGAAGTCTGTCATTTAACAATTCCGCGCCTGACATATCTATCAGGTGCATGTTCGGATCTTCAGCAGCCAATTGCTTCATCGCATTCTCCACTTCTCTGCTGAAATATTTATTACTTCGGGCAACAGTACCAAAAATAAAAGGCAAACGAGAATAATCCTTTCCGGTTTTTTCTGTCAGATGCATACGCACATAAGCCACCATTGTCTTCAAGTTACGGTAATAATCTTTACTTTTAGCATAATCACTTTCCCCCTGATGCCAAAGAAATGCATCTATCTGATAGCCATCCTTCAGGCGGGAAAGAGTCTTGTCTATACACATGTCTATCTCTTGAATGAAAGAAAGCAACAAGGAGTTTCCTCCATCAGAAGTTGGCTTAGCCTGTGCCAGCCATTCAGGCGCCGCAGACCAAAAACGTCCCTTGGAAGCATTATAATCAGGTGCTATAGAAGTTCCGCCAACAGCCCATTTCACCACATAAAATTTCTCCTGCAACAACTGCTCCAACCAATAATATGTCACTGCATCAAAAGCCCACATATTATTTTTTCCGCTACGTTTGGCACGAGGCCAGAAAGGAATAAATTCTCCCTTTCCGTCATTCTGAGCTATCTGACAATAACGGTAGGCACCTTCGGCATACGTCAGTGTATCCGTAGCCAATGCTTTAATATATGCCGGCAAATCTTCATTAGGAGTTCGTCCATCGGTATTAGACTGTCCGGCAGTTATAATCACATGTACAGGATTGGAAGCCATCGTATTCAAGCCAACAGCTGCAAAAATCAAAATCAAAATTAAGTTTTTCATAAGTATCATTTCATGTTTTTAATGGACACAAAAATAATCCTAATTCTAATATAACGACATGGATTATCGTCCATTATCATAGATTTTTGAACTCATTATGGTTCATACAGAATGAGAATTGCGGCTAAACCATAAAAAAAGAGCCACAGGAACAATGGCTCCCAATGACTCTTTCTTTTTCAGCTCATATTTATAATTTAATTATCTCCATACATACGCGCACGCATTTCTTTAATATGATCAGAAGTGATGTATTCATCGTATTCCATCATCTTGTCAATGATACCATTCGGGGTCAATTCAATAATACGGTTCGCCACCGTTTCAATAAATTCGTGGTCGTGGGAAGAGAACAGCACATTTCCCTTATAAGTCTTCAGATTATTATTGAAAGCCTGAATTGATTCCAAGTCCAAATGGTTGGTTGGAGTATCCAAAATCAGACAGTTCGCATTACGCAACTGCATACGGGCAATCATACAACGCATCTTCTCACCTCCGGAAAGCACATTCACTTTCTTCAATACTTCCTCTCCGGAAAACAACATACGCCCCAAGAAACCTTTCATATAGACTTCGTTACCTTCGCCAAACTGGCTCAGCCAATCTACCAAATTCAAATCACTTTCAAAGAAATCCGTATTATCCAAAGGCAAATAAGCCGTAGTAATAGTAACGCCCCACGCGAATTTACCAGCCTGCGGAGTACGATTTCCATTGATTATTTCAAAAAAAGCAGTCATGGCACGCGGATCACGGCTCAAAAAGACAATCTTATCTCCTTTTTCTACATTAAAATTCACATCATTGAACAATACCATACCGTCTTCAGTTTCCGCACGCAAGCCGGATACTTCCAAAATCTGATTGCCCGGTTCACGATCCGGAGTAAAGATGATACCGGGATATTTACGGGACGACGGCTTAATTTCATCTACATTCAGTTTTTCCAACATCTTCTTTCGGCTGGTTGTCTGCTTGCTCTTAGCCACATTGGCACTAAAACGACGGATAAATTCTTCCAACTCTTTCTTCTTCTCTTCAGCTTTAGCCTTCTGGTTCTGCTGCTGGCGAAGTGCCAACTGACTTGATTCATACCAGAAGCTATAGTTACCGGCAAACATATTCACTTTACCGAAATCAATATCTACAGTATGAGTACAAACAGAGTCCAAAAAGTGACGGTCATGACTTACCACCAACACCGTGTGTTCGAAATTAGAAAGATATTCCTCCAACCAAGTCACCGTATCCATATCCAAATCATTGGTAGGTTCATCCAACAACAAGTTGTCAGGATTACCGTAAAGTGCCTGCGCCAACATGACACGCACCTTTTCCTTACCACTCAGTTCCCCCATCAACATATAGTGCTTGTCTTCCTTTATGCCCAATCCGCTCAACAAAGAAGCAGCATCACTCTCGGCATTCCAACCATCCAGTTCGGCAAACTTCTCTTCCAATTCAGAAACTTTCAAGCCATCTTCATCTGTAAAATCAGTTTTCGCATACAGTTCTTCACGCTGTTTCATAATGTTCCACAACACAGTATGTCCCATCATAACCGTATCCATGACTGTATATTCATCCCACTTAAAGTGATCCTGGCTCAATACAGACAGACGTTCGCCCGGTCCTAATACCACTGAGCCCTTAGTCGGTTCCAGCTCACCTGAAATTGCTTTCAAGAAAGTAGATTTTCCGGCGCCGTTCGCACCGATAACCCCGTAAATATTACCATTGGTGAACTTCATATTTACGTCATTATACAACACTCTTTTACCAAATTGAATGGCCAAGTTCGAAACTGTAATCATCCTAATATACCTTATTTATATAATTATTCTATTTTGGAATTGCAAAGGTAGGCATTTTAATTCAGAAAAGTATGCCAGTGTGGCAGAAATTCCGTACTTTTGCAAAATTATTACTCTAAGAAAACAACTAGGCACGAAATTTGTTAGCAGTAAATTAACCTAAAAGACCTGACTGTTATGAGTAAAAATAAGAAAAACAAAAAGTTTAATAAGAATATGAACCCAACAGAGAAGAACCAACCTCAAGACGAGGAAGTTTTGAAAAATCAGGAAGCGGCAGAAGCTGCCATTGATGAAGAAACTCAGAAAGAAGCAACAGAAGAACTGAACGCTGAAGAGAAGGTGAACAAGGAATTGGCGGAAGCTCAGAAAACAATAGAAGAGCAGCATGACAAATACCTGCGTCTTTCAGCCGAATTTGACAATTACCGTAAACGCACCATGAAAGAGAAGGCCGAATTGATTAAGAATGGTGGAGAAAAGGCCATTACTGCCATTCTTCCTATTTTGGATGACTTGGAGCGTGCAGTAAAGACTTCAGAAACTTCGGATGATGTAAAAGCAATGCGTGAAGGAATTGAGTTGATCTACAACAAATTCCTGAAAGTATTGAACCAGGAAGGACTTCAGAAAATAGAGACTGACGGTGAGAACTTTGATACCGATTATCACGAAGCAATTGCATTAGTTCCAGCTCCTTCCGAAGAGAAGAAAGGAAAAATATTGGACTGTGTACAAACCGGTTATAAGCTGAATGACAAAGTAATACGCCACGCCAAAGTAGTTGTGGCTCAATAAAACAGTATCATAAAACGACATGGCAAAAAGAGACTATTACGAAGTTCTGGAGGTGGACAAAACAGCCACCCTTGATGTTATAAAAAAAGCATACCGCAAAAAAGCAATACAATATCATCCGGACAAGAATCCGGGAGATAAAGAAGCGGAAGAAAAATTCAAGGAAGCTGCCGAAGCTTATGATGTGTTGAGCAACCCGGATAAACGCGCCCGTTACGACCAATTCGGACATGCAGGAATGAGTGGAGCTGCCGGTGGCGGTTTTGAAGGATTCGGACAAGGTATGTCCATGGATGATATTTTCTCTATGTTCGGTGACATCTTCGGTGGACATGGAGGAGGTTTCGGAGGCTTTGGAGGTTTCGGTGGTGGCGGACGTTCCGCACAACGCAAGTTCCGTGGTTCAGACCTCCGTGTAAAAGTAAAACTGAACTTAAAAGAGATTTCTACGGGAGTAGAAAAGAAATTCAAACTGAAAAAGTACGTAACATGTGACCATTGCCATGGTTCAGGAGCCGAAGGAGAAGGAGGAACGGAAACGTGTCCCACTTGTCATGGAACAGGAAGTATCACCCGCACCCAACAAAGCATTTTTGGAATGGTGCAATCACAAAGCGTATGTCCACAATGTAACGGAGAAGGGAAAATAATTAAGAACAAATGTAAAGCTTGTGCAGGAGAAGGTATTGTATACGGAGAAGAAGTCGTGGAAGTGAAAATTCCTGCCGGTGTGGCTGAAGGAATGCAACTTTCTGTCAATGGGAAAGGAAATGCGGGCAAACATAACGGTGTTCCCGGTGATTTGCTTGTTGTCATAGAAGAAGAATCCCATCCAGACCTAATACGCGATGAAAATGATTTGATTTACAATCTGTTACTAAGCGTTCCGACTGCTGCTTTAGGAGGCACTGTAGAAATACCAACTATTGATAGCAAAGTAAAAGTAAAAATTGAACCAGGCACCCAACCAGGTAAGGTCTTACGCCTAAGAGGTAAAGGATTGCCCAATGTCAATAGTTATGGTTACAGTAACGGTACAGGTGATTTATTAGTCAATGTAAGCGTATATATTCCAGAAACGTTGAATAAAGATGAAAAACAAGCACTAGAGAAAATGCAAGAATCGGATAACTTCAAACCGAATACAAGCATTAAAGAAAAAATATTCAAGAAGTTCAAAAACTTCTTCGATTAATTTTTATACTACAAAGTGTCACAGAAAGAATACCAGGTTTTATATCTGCCTGGCATTCTTTTGTGACACTTTGCGGTAAATAAAAACATTTGCCATGAATTACGAAGAAACATTGGATTATTTATATAATAGTGCCCCATTATTCCAACATATAGGAAAAGATGCATACAAGGCAGGATTAGAAAACACTTATCTTTTAGACAAGTATTTCAACCATCCCCATCGCCAATTCCGAACCATTCATATAGCCGGGACCAATGGAAAAGGCTCTTGTTCACATACTTTAGCCGCCATTTTACAATCAGCCGGATACAAGACAGGACTTTACACTTCTCCACATCTGATAGATTTCCGTGAACGAATCCGCGTCAACGGAATTCCTGTATCCAAAGAATATGTCATAGACTTTGTAGAAAAACACCGTGCCTTTTTTGAACCTCTGCATCCTTCTTTCTTTGAACTGACTACCGCAATGGCTTTCCACTATTTTGCCCAAAGCCAAGTAGATGTAGCCATTATAGAAGTCGGCCTGGGAGGACGAATAGATTGTACAAACATCATCCGTCCGGATCTATGTGTCATAACCAATATCAGTTTCGACCATATACAATTCTTAGGTAACACATTAGCCAAAATAGCCACAGAGAAGGCCGGAATCATCAAAGAAAAGACCCCGGTAGTTATCGGTGAAACCACACCCGAAACCAAACCTATATTTACAACCCGTGCTAAAGAAGTAAACGCTCCTATCTACTTTGCAGAAGAAGAACAGTTATTACACTCCTCCAGCATAAATGAAAAAGGTAAACGAATATATCAAACAACCGACTACCTCAATCTGGAAGGCGAACTGGAAGGGCTTTGCCAACTTAAAAACACCAATACACTTTTATCTGCCATCCGCCTATTAAAACAAGCAGGTTATCAACTTACCGAAAGCAATATACGCAAAGGATTCTCACAAGTATGTGAACTCACCGGCCTGATGGGAAGATGGCAAAAATTAGAAAGTGAACCGACTTTAATATGCGATACAGGACATAATGTAGGAGGTATTTCGTATATCATAGAACAATTAAAACATCAGAAATATGAACGATTACACATTGTAATAGGTATGGTAAACGACAAGGATATCAGTGGAGTATTGTCCATGCTTCCTAAAAATGCCACCTATTATTTTACCAAAGCCAGCGTAAAGCGTGCTTTATCCGAAAAAGAATTACAAAGCTTAGCGATGCAATCCGGGCTGCACGGAGATACTTATCCTGATGTAGAAACAGCTGTAACAGCCGCTAAAGAAAAGGCTAACAAAAATGACTTTATTTTTGTTGGAGGAAGCAGCTTTATCGTTGCGGATTTATTAAAATTTCACGTTTAGTCCGTTCACTTTTATAGAAAAAGTTTGGGAATACCGAAGAAAATCTTTTTATTTGCATTAGTTTTATACATAAAATAAAAAGATTTTCAATAACATGATAAACACATCTACTTCCGATGAGAATTTATGCGGCCTGAAACGGGCCGATTTTCAGACAACAGTGAACGGGAAACAAACAGACCTCTTCATTCTGAAAAATGAAAACGGAGCTGAAATAGCTGTCACCAACTATGGTGGAGCAGTATTAGCTATTATGGTTCCGGACAAAAACGGTAAATATGCCAATGTCATTCAAGGACATGATAGTATAACCCATGTTATCAATAGCCACGAACCTTTCCTCAGTACTCTTATCGGTCGTTACGGAAACCGTATAGCAGGAGGCAAATTCATTTTAGAAGGGAAAGAATATTCACTGACTATCAACAATGGTCCCAACTCGCTGCATGGCGGTCCTACCGGATTTCATACCCGCATTTGGGATGCAGAACAGGAAACTCCCCAAAGCTTGAAATTGCATTATTTATCTGCTGATGGCGAAGAAGGATTTCCGGGAAATTTGGATATTCACGTAACGTACACTTTAAGTAATCAAAATGAATTCATCATTACTTACCACGCTACAACAGACAAAACAACATTGGTAAATCTTACTCACCACGGTTTTTTCAGCCTATCAGGCATTGCCAATCCTACGGCAACCGTTGATAACAACATCGTAACCATTAATGCTGATTTTTATACTCCGATAGATAATGTATCCATCCCTACAGGCGAAATTGCCAAAGTAGAAGGTACTCCTATGGATTTTCGTACTCCTCAAAGAGTAGACAGTAGAATCAATGACCCATTCGAACAGCTAGAATTCGGTGCCGGATATGACCACTGCTATGTATTAAACAAACGTGAAGCAGGCACACTGAGTTTTGCAGCAAAATGTGTGGAGCCCGAAAGCGGTCGTAGCATGGAAGTATACACTACAGAGCCCGGTGTACAAGTTTATACTTCAAACTGGCACAATGGTTTCGAAGGTGCTCATGGAGCAACTTTCCCAGCAAGAAGTGCCATCTGTTTTGAAGCACAACATTTTCCTGATACGCCCAATAAAGGCCATTTCCCTTCTTGTGTTCTACATCCGGGGGAAACTTACAATCAAGTAACCATCTACAAATTCGGTGTAGAAAAATAATTTTATGAAATAACCTAATTAATAAACTTATAATTTTTTCAAAACCATGAATCAACAAAAACAGAACGGTAATATCATCGCTATCATTACAATGTTCTTCCTTTTCGCGATGATTTCCTTCGTTACTAATCTTGCTGCGCCTTTCGGCACAATATGGAAAAACCAATATGCAGGTGCCAATACCTTGGGTATGATGGGAAATATGATGAATTTCCTCGCATATCTGTTTATGGGGATTCCTTCAGGTAATATGCTTGTAAAAATCGGATATAAAAAAACAGCCCTCATTGCCATGGCAGTAGGTTTCATCGGTTTGTTTATCCAATATATTTCCAGTTTGTTCGGTGCAGATATAGACGTATTCAATTTAGGAGAATATGCCATTAAAATGAATTTTATCATCTACCTGCTTGGTGCTTTCGTCTGTGGTTTTTGTGTATGTATGCTGAACACAGTAGTTAACCCGATGCTAAATCTTTTAGGTGGTGGTGGTAACAAAGGTAACCAATTAATCCAAGCCGGTGGTGCTCTAAACTCATTGTCAGGTACTTTGACTCCGCTTTTCGTAGGTGCCTTGATTGGTTCTGTTACCCCTCAAACAGCTATGTCAGATGTAGCTCCTCTGCTTTTCATCGCAATGGGTGTATTCGTATCAGCGTTTATCGCTCTTTCATTCATCGCCATTCCAGAGCCTCATCTAAGAAAAGCAGGTCATGAAAAAGAGAAATTCTCTCATAGTCCTTGGAATTTCCGCCATACTGTATTAGGTGTAATCGGTATCTTTGTGTATGTAGGTATTGAAATCGGTATTCCAGGCACATTAAATTTCTATCTTGCCGACCCAACGGAAAAAGGTGCGGGTCTGCTTGCCAACGGTGCCGCTATCGGTGGTGCTATTGCTGCCATCTATTGGTTGCTCATGTTAGTGGGACGTTCTGCAAGTAGTGTCATCAGCGGTAAAGTAGCTACACGCACACAATTGATTGTTGTTTCCGCAACAGCTATCTGTTTTATATTAATCGCAATCTTCACTCCAAAAGAAATCACTGTCTCTATGCCGGGATATAGTGTAGAAAATGGATTCGAAATGGCCTCTGTACCTGTCAGTGCCCTATTTTTGGTACTTTGTGGTTTATGTACGTCCATTATGTGGGGAGGTATCTTCAACCTCGCTGTAGAAGGTTTAGGCAAATATACCGCACAAGCTTCAGGTATTTTTATGATGATGGTTGTCGGTGGCGGTATTTTCCCATTATTGCAGCAATTCATTTCTGACGCTGTGGGATATATGGCCAGCTATTGGTTAATTATCGCTTTGCTTGCTTATCTGTTGTTCTATGGTTTGGTAGGATGCAAGAACGTAAACAAAGACATTCCTGTAGAATAATTATAAAAACATATAGTATTAACTTTAAACTATTAATATCATGGATATAGAATACGTAAGAAGTCGCTTCATCAAACATTTTGATGGCACAACAGGTTCAGTATATGCATCACCCGGACGTATTAACCTTATTGGAGAACATACCGACTATAACGGTGGTTTTGTCTTTCCCGGAGCAGTAGATAAAGGTATGATTGCTGAAATCAAACCTAATGGTACTGATAAGGTACGTGCTTATTCTATTGATCTAAAAGACTATGTAGAATTCGGCTTGAACGAAGAAGATGCCCCTAAAGCCAGTTGGGCAAGATATATTTTCGGCGTTTGCCGTGAAATGATCAAACGTGGTGTAGATGTGAAAGGTTTTGACACTGCTTTTGCGGGTGACGTACCTTTGGGTGCAGGTATGTCTTCATCTGCTGCATTGGAAAGTACGTATGCTTTCGCTATCAACGAACTTTTCGGTGACAACAAAATAGATAAATTTGAATTAGCCAAGGTAGGTCAGGCAACAGAACATAATTATTGTGGTGTAAATTGCGGTATTATGGATCAATTTGCATCTGTATTCGGAAAAGAAGGTAGTTTGATCCGTTTGGACTGTCGTTCATTGGAATATCAATACTTCCCGTTCAAACCGGAAGGTTACCGATTGGTATTGGTTGACTCGGTAGTCAAACACGAATTGGCTTCTTCTGCTTATAATAAACGTCGCCAAAGCTGCGAAGCTGCTGTAGCCGCCATCCAGAAAAAACATCCTCATGTAGAATTTTTACGTGATTGTACAATGGAAATGTTGCAGGAAGCAAAAGCCGAGATCAGTGAAGAAGACTATATGCGTGCAGAATACGTCATTGAAGAAATCCAACGTGTACTTGATGTTTGCGATGCTTTGGAAAGAGGTGATTACGAAACCGTAGGACAGAAGATGTATGAAACCCACTATGGCATGAGTAAATTGTATGAAGTAAGCTGTGAAGAACTTGACTTCTTGAATGATGTTGCTTTCGACTGCGGTGTCACCGGTTCACGTGTCATGGGTGGAGGTTTCGGTGGATGTACAATCAACCTAGTTAAAAATGAACTATACGAAACATTCATCACCACTGCCAAAGAAAGATTCAAAGAGAAGTTCGGAAGAAGTCCCAAAGTTTACGATGTAGTCATCAGTGACGGTTCTCGCAAGCTGGTATAAAAAAGATCTCAAATTCGCTAACATATGAAAAAACCGCTTTGGATTCAAAGCGGTTTTTTCTTCTTTATGCTATAAAAGTGTAGGAAATACACTTTTATGTTGCACAACATACTCTTTTCATATCCAATGTATTAGTAATAGTGTTACTTTTACACCCAAAATCATATACTCTATTACTAATTAGTACTTTTATATCATGAAAAGCATGAAAAAAACCTTTCTGGGAACCGGTATCGCACTGACCTTGTTAAGTGCTTGCGCTCCCAAACAATCTCAAGAAACACTTACAAAGTCCGGATTAAACCCCACTAATTATGAAACAATAGTGGATGGCGTAAAACCTGTTAAACTGTATACGCTAAAGAATGCAGCCGGAATGGAAGTATGCGTAACTAATTTTGGCGGACGTATTGTATCTATCATGGTCCCCGACAAAAATGGAAATCTAAAAGATGTAGTGCTGGGTTTTGACAGCATTGCTGACTACCAAAATATACCAAGCGACTTCGGTGCTTCTATCGGACGCTATGCCAACCGCATTAATAAAGGAGTCATCGTAATGGATGGAGAAACCATTCAATTACCCCAAAATAATTTTGGTCACTGCCTGCATGGAGGTCCTAAAGGATGGCAATACCAAGTTTATGAAGCAAACCAATTAAACGACAGTACTATGACACTGACTATGAAGTCACCGGATGGAGATGCCAATTTTCCGGGAAATGTAACCGCTACTGTTACTTATGCACTGACAAGAGACAATGCCATCGACATAAATTACGAAGCGACAACTGACAAAAAGACTGTTATCAATATGACCAACCACTCTTATTTCAACTTGAGTGGAAACCCTGCCAATCCGGCCACCGACCATATTCTTTATGTAAATGCAGACAGCATCACTCCCGTTGACAGCACATTCATGACCACTGGAGAAATGATGGCAGTAACAGAAACTCCATTTGATTTCAATACTCCTAAAACCATTGCCCCCGATGTGACTAACTTTGAAAACGAACAAATAAAATTCGGTAACGGATTCGACCATAATTGGGTACTTAACACGAAAGGCGATATCAACCAGCTGGCTGCCAAACTGACCAGCCCTACTAGTGGTATCACTTTGGAAGTATATACTAATGAACCGGGAATACAAGTTTATACAGGCAACTTTTTGGATGGAACCGTAAAAGGGAAAAAGGGAATTACTTATCCGCAACGCGCGTCTGTTTGTCTGGAAACACAGCACTATCCCGACAGCCCCAACAAATCACAATGGCCTTCTGTAATTCTGGAACCGGGACAGACCTATCACAGCCAATGTATCTTTAAATTCGGTGTTGAAAAATAATTGTTAAACTTTAAATATTTAATATCATGAATTGGAATTCACATGAATTCATCTGGCTGGACTGGACAATACTGGCAGTCGGCATTGTAGCTGTGATATGGGCGGTATGGCGCTCTGTACAAAAAGACAAACGCTCGCAACAAGGAGCAAGCAGTGAAGATTATCTATTTGGCAAAGGTGAGCCATGGTACATCATTGGTGCTGCTATCTTTGCAGCCAATATCGGTTCGGAACATCTGGTAGGTTTGGCAGGAACCGGTGCCAAATCCGGAGTAGGTATGGCACACTGGGAAATGCAAGGTTGGATGATTCTTCTTCTAGGATGGCTTTTTGTTCCATTCTACCAACTATTAAACAATAAAATGGGCAAAATCATTACCATGCCCGATTTCCTTAAATACCGTTATACCCCGCGTACCGGTTCATGGCTTTCTATCATCACACTGATAGCCTACATTCTTACTAAAGTGAGTGTAACCGCCTATACCGGAGGTATCTTTTTGGAGTTCTTACTTGGACTTCCTTTTTGGTATGGGGCAATCGGACTTATTGTCCTGACCGGCATCTTCACCGTACTGAGTGGAATGAAAGGAGTAATGACCCTCTCAGCTATTCAAACCCCTATTTTAATCATCGGTTCTTTCTTGGTTCTTTTCCTGGGACTGTCAGCCTTGGGAGACGGTAATATTGCTACAGGATGGACAGAAATGATGGATCATGCGCGTAGCGCCATGAATGTAGGAGCAGACGGACACGCTTATGGTGCCAACCATATGTTTCACTGGACCGAAGCAGACCCGATGTATCAAGATTATCCTGGATTCTGGGTATTTATCGGAGCTTCCATCATCGGTTTCTGGTATTGGTGTACCGACCAGCATATCGTACAGCGTGTACTTGGACAGCGTAAAGGCGAAGATAATGATGTGGTAATGAAGCGTGCCCGCAGAGGTACTATTGCAGCCGGTTATTTTAAAATCCTGCCTGTCTTCATGTTTCTTATTCCGGGGATGGTAGCTGCCGCCTTGGCTGCAAAAGGTGAATTTGATATGTCAAATACAGATGCGGCTTTTGCTGTAATGGTAAAGGATGTTCTGCCTGCCGGTGTAAAAGGTATTGTAACTATCGGATTCATCTGCGCATTGGTTGCTTCACTGGCTGCTTTCTTTAATTCATGCGCCACACTTTTTACAGAAGACTTTTACAAGCCCATGTTTAAAAACAAAAGTGAAGCTACTTATGTTATGGTAGGACGTATCGCCACCGTAGTTGTAGTTATCCTCGGCATGGCATGGATTCCTGTAATGATGAGCCTCGGCAGCCTTTACGACTACCTGCAAGGCATACAGTCCCTCCTTGCACCTGCAATGGTAGCCGTATTTGCACTTGGTATATTCTCCAAGAAAATCACTCCGAAAGCCGGTGAAACAGCCATGATTGTAGGTTTCCTAATTGGTATGCTCCGTTTGCTGACTAATATCCTCACCAATACCGGAAAAGATGTGATGACCGGCTGGTTTTGGGAAAATACAACTTGGTTTTGGCAGACAAACTGGCTTATTTTTGAAATATGGCTACTTGTATTCCTCTTGTTGTTAATGGTTGTTGTATCATGCTTCACTCCGGCTCCAACCGCCAAACAAGTGGAAGCCATCACCTTTACCGGCAGTTACAAGGAACTTATCAGAAAGAGTTGGAACAAATGGGATGTTATTACCTCTTTAGGTGTAGTTCTGCTTTGTGCACTATTCTATGCTTATTTCTGGTAAACCATAAATTAAAGGCTGTATGCTTTGAATTATCTTGAAGAATACAGCCTGTTAATTTTAAACCGACGCTAAACCATGACCACCTATTATAACATAAACCCTAAATTCTATGTATCTGTAGACTGTATCATTTTCGGTTTTGACGAAGGAGAATTAAAACTGCTTCTACTAAAACGAAATTTTGAACCGGCTATGGGAAAATGGTCTTTGATGGGAGGATTCGTACAAGAAGACGAAAGTGTAGACGCTGCCGCCAAACGGGTATTGGCAGAACTGACCGGATTGGAAAATGTATATATGGAACAAGTAGGCACATTTGGCGACTTAGAGCGCGATCCGGGAGAACGAGTCATATCAGTAGCCTATTATGCTTTAGTCAATGTAAATGAATATGACAGGGAACTAGTGCAGCAGCACAATGCCCATTGGACAAAAATAGACGAACTGCCACAGCTCATTTTCGACCATCCGATCATGATTTCCAAAGCACGTGAACTGATGAAGCACAAAGCATCATACAATCCAATTGGTTTTAACCTGCTGCCCGAACTGTTTACCCTGACACAGTTGCAAAATCTATATGAGGCAATCTATGGCGAACCGATGGACAAGCGGAATTTCCGCAAGCGGGTAGCCGAAATGGATTTCATTGAAAAAACAGATTTGATAGACAAATCAGGTTCCCGACGAGGTGCTTATCTTTATAAATTTAATGATAAGGCCTACAGAAAAGATCCGAAATTTAAACTTTAAAATAATGTGCCGATGCAATAATTTGCCAATTGACAAATTAGCAGATTGCCACATTACCCAATTATTCATTATGTTAGAAAAATTAAAAGAAAAAGTATTCCGTGCCAATCTGGATTTGGTGAAACACGGACTAGTTATTTTTACATGGGGAAATGTTTCTGCCATCGATCGTGAAACAGGTCTGGTAGTAATCAAACCCAGTGGCGTATCATACGATGATATGAAAGCTGAAGACATGGTTGTAGTAGATCTGGACGGCAACGTGGTAGAAGGTTCTCTTCGTCCGTCTTCAGATACTCCCACCCATGTAGTTCTATATAAAGCATTCCCCGAAATAGGTGGAGTGGTACATACCCACTCCACCTATGCTACCGCCTGGGCACAAGCCGGAATGGATATCCCCAATATCGGAACGACTCATGCCGATTATTTCCATGACGCCATTCCTTGTACCGCAGATATGACTGAAGAGGAAGTAAAAGGTGCCTACGAACAGGAAACAGGCAATGTGATTGTGAAACGTTTCAAGAACCTGAACCCTGTACATACTCCGGGAGTACTAGTAAAAAATCATGGTCCTTTTGCATGGGGAAAAGATGCCAATGATGCTGTACATAATGCAGTAGTCATGGAACAGGTAGCCAAAATGGCAAGTATCGCTTTTGCCGCCAACCCTCATTTAACGATGAATTCTTTATTAATAGAAAAGCATTTCAGCCGCAAACACGGCCCCAATGCCTATTATGGACAGAAATAAGAAACAAAGAAATTAATAACCCTTTTAAAATATAACATTATGGAAAAAGCATTTGATCAGTATGAAGTATGGTTTGTAACAGGAGCACAGCTTCTGTACGGAGGTGACGCAGTTATAGCAGTAGACGCTCACTCCAATGAAATGGTAAACGGACTGAACGAATCAGGGAAACTTCCTGTAAAAGTAGTATATAAAGGAACAGCCAACTCTTCTAAAGAAGTGGAAGCAGTGTTCAAAGCAGCCAACAACGATGAAAAATGTATCGGTGTCATCACTTGGATGCACACTTTCTCCCCAGCTAAAATGTGGATTCACGGTCTGCAACAGTTAAAGAAACCACTGTTACATCTACACACTCAATTCAATAAGGAAATTCCTTGGGATACCATGGATATGGACTTCATGAATCTGAATCAATCCGCCCATGGTGACCGCGAATTCGGACATATCTGTACCCGTATGCGCATCCGCCGCAAAGTAGTGGTAGGTTACTGGAAAGACGAAGACACCCAGCACAAGATTGCCGTTTGGATGCGTGTTTGTGCAGGTTGGGCAGACTCTCAGGATATGCTGATCATCCGTTTCGGCGATCAGATGAATAATGTAGCTGTAACTGACGGTGATAAAGTGGAAGCAGAACAACGTATGGGGTATCACGTAGATTACTGTCCGGCAAGCGAACTGATGGAATACCATAAAAACATCAAGGATACAGATGTAGAGGCACTTGTAGCAACTTACTTCAACGAATACGACCACGATGCTTCATTAGAAGATAAATCAACCGAAGCTTATCAGAAAGTATGGAACGCTGCAAAAGCCGAATTAGCTCTTCGTGCCATCCTTAAAGCCAAAGGCGCCAAAGGATTCACTACTAATTTTGATGATTTGGGCCAAACAGACGGCAGCTACTTCGATCAGATTCCGGGACTGGCTTCCCAGCGTCTGATGGCTGAAGGCTACGGATTCGGTGCGGAAGGTGACTGGAAATCAGCAGCTCTCTACCGTACCGTATGGGTGATGAACCAAGGACTTTCCAAAGGTTGTTCTTTCCTGGAAGATTACACATTGAACTTCGATGGTGCAAACAGTGCTATCCTGCAATCACATATGCTGGAAGTTTGCCCTCTTATCGCCGCTTCCAAACCACGTCTGGAAGTACACTTCCTAGGCATAGGTATTCGCAAGAGCCAGACAGCCCGTCTTGTATTCACTTCAAAAGTAGGCTCAGGTTGTACCGCCACTGTAGTAGACTTGGGTAACCGTTTCCGTCTGATCGTGAACGACGTAGAATGCATCGAGCCGAAACCGTTGCCCAAATTACCGGTTGCTTCCGCCCTGTGGATTCCGATGCCTAATTTCGAAGTAGGTGCAGGCGCATGGATCCTGGCAGGTGGAACACATCATTCTTGCTTCTCTTATGATCTGACAGCAGAATATTGGGAAGACTATGCTGAGATTGCAGGTATCGAAATGATCCGTATCGACAAAGATACTACCATCAGCAACTTCAAGAAAGAGCTTCGCATGAACGAAGTCTACTATATGCTGAACAAAGCACTTTGCTAATTCATCCATGAAGAGGGGATGTGTCAAGGCTAATAAGATTCTTGGCACATCCCACCTTATTTTATCATCATTATTGTTGTACCTTAAATTATAGGTTTTAATATGAAATCAGATGCAAAATCAACCATCCAAGCAGGAAAAGCTATTCTAGGCATAGAATTCGGATCAACACGAATCAAAGCTGTTTTGATTGACCAGGAAAACAAGCCCATTGCCCAGGGAAGCCACAGCTGGGAAAACCAACTGGTAGACGGACTGTGGACTTACAGTGTGGAAGCTATCTGGCATGGCCTGCAAGACTGCTATGCAGATCTCCGTTCCAACGTAAAAAAGCTATATGACACAGAAATAGAAACCTTGGCGGCAATCGGTGTCAGTGCCATGATGCATGGTTATATGGCATTCAATAAAGAAGAAGAAATCCTTGTACCTTTCCGTACATGGAGAAATACCAATACCGGTCCGGCAGCCGCCGCTTTATCTGAATTATTCGTATATAACATTCCTCTGAGATGGAGCATTTCTCATTTATACCAAGCTATTTTAGACAATGAAGAACACGTAAGTAACATTGACTATCTGACCACCCTTGCAGGTTTCATTCATTGGCAAATTACAGGTCAGAAAGTTCTGGGCATAGGTGATGCATCAGGAATGCTTCCCATAGACCCGGCTACCAAGAATTATTCTGCCGAAATGATAGCCAAGTTCGACAAGTTGGTAGCTCCTAAAGGATACCCTTGGAAACTGACAGATATCCTGCCCAAAGTCTTACCCGCCGGCGAGAATGCAGGGTTCCTTACCCCGGAAGGTGCCAAGAGGCTGGACGTGTCGGGCCACTTGAAAGCAGGTGTACCTGTCTGCCCTCCGGAAGGAGATGCCGGAACCGGCATGGTAGCAACCAACGCTGTCAAGCAACGCACCGGGAATGTATCAGCAGGCACTTCCTCATTTTCCATGATTGTATTGGAAAAAGAGTTGTCCAAGCCATACGAAATGATTGACATGGTTACCACTCCCGACGGAAGCCTTGTAGCTATGGTACATTGCAACAACTGCACCTCCGACCTCAATGCCTGGATCAATCTGTTCAAAGAATACCAAGAACTGCTGGGTATACCCGTAGATATGAATGAATTATATGGAAAACTTTACAATCATGCGCTCGCAGGCGATGCAGATTGCGGTGGTCTTATTTCGTATAATTATATTTCAGGTGAACCCGTGACAGGACTTGCCGACGGAAGGCCATTGTTTGTACGTTCTGCAAATGACAAATTCAACCTCGCCAACTTTATGCGTACCCATTTATACGCTTCAGTCGGAGTTCTTAAGATAGGTAATGACATTCTTTTCAATGAAGAGAAAATCAAAGTAGACAGAATCACAGGACACGGCGGATTATTCAAAACGAAAGGTGTAGGCCAGAGAATACTTGCAGCAGCTATAAACTCCCCTATTTCCGTAATGGAAACTGCCGGTGAAGGTGGCGCATGGGGAATCGCCCTGCTAGGTTCTTACCTTGTCAACAATGAAAAGAACCAATCCCTGGCAGATTTTCTGGAAGACAAAGTATTCGCCGGTGATGCCGGTATTGAAATATCGCCAACAGCCGAAGATGTAGCCGGATTCAACACATATATCGAAAACTATAAGGCAGGACTGCCTGTTGAAGAAGCAGCGACCCGATTTAAGAAGTGAGTCAAACGGGCTTAAAAAAGAGCCTCTTTACATGTTGCGGTTTACACATAAAGAGTTATCTTTGCAGATATTAAATAAAATAGCAAAGATAAACCGACTCACTAATAGCATTCAAATTATAATATTATTTTAATATGAACGAAAAGAAACTTATGAACCGTGCAGCGGACAACATCCGTATTCTGGCTGCTTCGATGGTAGAGAAAGCTAATTCAGGACACCCGGGTGGAGCTATGGGTGGCGCTGATTTTGTAAACGTGCTTTTCTCTGAGTTTTTAGTTTACGATCCTGAAAATCCGCGTTGGGAAGGGCGTGACCGTTTCTTCCTTGATCCAGGCCACATGTCACCGATGCTGTATTCCACTTTGGCACTGACCGGCAAGTTCACCATGGAAGAACTGGCACAGTTCCGCCAATGGGGCAGTCCGACTCCGGGACACCCTGAAGTGGACATCATGCGCGGTATTGAAAATACTTCCGGCCCGTTAGGACAAGGACACACTTTTGCAGTAGGTGCTGCTATCGCTGCCAAATTTCTGAAAGCCCGTCTGGGTCATGTCATGGACCAGACCATCTATGCCTATATATCCGATGGGGGTATTCAAGAAGAGATTTCTCAGGGTGCAGGCCGTCTGGCAGGTCATTTGGGGCTGGACAACTTGATCATGTTTTATGATTCAAACGACATCCAGCTGTCTACCGCTACCGATGCTGTTACCAGCGAAGATGTTGCCAAGAAATACGAAGCATGGCACTGGAAAGTAATTACCATCGACGGTAATGATCCCGATGCAATCCGCACGGCTCTGACAGAAGCGAAAGCCGTAACCGGCCAGCCCACATTAATTATCGGTAAGACCATTATGGGTAAAGGCGCACGCAAAGCGGATGATTCCAGTTATGAACGTAACTGTGCCACCCACGGTGCTCCTTTGGGAGGTGATGCTTACATCAATACAATCAAGAACTTAGGCGGCGATCCAACCAATCCTTTCCAAATCTTCCCTGAAGTAAAAGAGTTGTATGCCAAACGTGCAGAGGAACTGAAAAAAATTGTTGCAGAAAAATATGCGGCTAAAGCTGAATGGACTAAAGCTAACCCCGAACTGGCTGCTAAATTGGAACTATGGTTCTCTGGCAAAGCTCCGAAAGTAAATTGGAATGTTATCGAACAAAAAGCCGGAGATGCTACACGCAGCGCTTCTGCCAAAGTTCTCGGTGTACTGGCTACAGAAGTAGAAAACATGATCGTTTCTTCTGCCGACCTGTCAAATTCAGACAAGACCGACGGTTTCTTGAAGAAAACACATGCATTCACAAAAGACGACTTCACCGGTGCATTTCTGCAGGCAGGTGTTTCCGAATTAACCATGGCTTGCTGCTGCCTGGGTATGGCACTTCACGGAGGTGTGATTGCTGCATGCGCTACCTTCTTCGTATTCTCAGACTACATGAAACCCGCCATACGTATGGCTGCCTTGATGGAACTTCCCGTCAAATTCATCTGGACACACGACGCATTCCGTGTAGGTGAAGACGGTCCTACTCACGAACCGGTAGAACAAGAAGCACAAATCCGCCTGATGGAAAAACTGAAAAACCACAAAGGACACAATTCTATGTTGGTACTCCGTCCGGCAGATGCGGAAGAAACCACTGTGGCATGGAAATTGGCCATGGAAAATACCAGCACCCCCACCGCACTGATCTTCTCCCGCCAGAATATCGCGAATCTGCCTGCAGGAAACGACTATTCACAGGCTGCCAAAGGCGCTTATATTGTTGCAGGCTCCGATGAAAATCCGGATGTAATCCTAGTCGCTTCAGGTTCCGAAGTTTCCACTTTGGAGGCAGGTGCAGAACTTCTCCGCAAGGATGGCATAAAAATACGTATCGTATCCGCTCCTTCTGAGGGATTGTTCCGCAGCCAAAGCAAGGAATACCAAAACAGTATCATTCCCACAGGGGCTAAAGTATTCGGTCTGACTGCCGGTCTTCCTGTAAACCTTGAAGGTTTGGTAGGTGCCAACGGTAAGGTATTCGGTCTGGAATCATTCGGATTCTCCGCTCCTTACAAAGTATTGGATGAAAAGCTGGGCTTTACCGCACAGAACGTGTATAACCAAGTAAAAGAAATGCTGGCATGAAAAAAGTAGGACTGGCATCCGATCATGCAGGATTTGAACTGAAAGAGTTTGTAAAGACATGGTTAACTGAAAAAGGCTATCCATGCAAAGATTTCGGTACATACAGTACAGAAAGCTGTGACTATGCAGATTATGCCCATCCGCTTGCACTGGCTATAGAAGCCGGAGAATGCGGACCCGGAGTGGCAATTTGCGGAAGCGGTGAAGGCATCAGCATGACATTGAACAAACATCAGGGTATCCGTGCGGCACTATGCTGGATGCCCGAGATAGCCCATCTGTCACGTCAGCACAATGACGCCAATGTTCTGGTAATGCCGGGACGATTCATTGACCATGAAATGGCCGAAAAAATATTGGATGAATTCTTTAACAGCGGCTTCGAAGGCGGACGTCACCAAAAGCGCATTGAAAAGATACCTGTAAAGTAAAATTCACGCAATAATAATAAAAGTAAAAAAAGAGAGGCATTATAGCTTCTCTTTTTTTTTATTCCTATCTTTGCTTAGATTAAAATATAAAATAAAAAATATTACTATGAAGAAGATTCTTTTTTTAATGCTCCTATGTCTGCCATTCATCGCTATGGCACAGACAGATCCCAAATATCTGGCGGGAGCGATAACTATGGATGACGGTAAAGTTTCTTTCAAAACAGAGATACAGGCACCGTCTTTAACGAAAGACCAGCTATATGGCACCATGCTGAAATGGGCCACAGAACGTTTCAAGCCCGAAGGCAAGTTCAATGCACGTGTTCTTTACACCAATGAAGATGAAGGAACCATTGCAGCAGGTGGCGAAGAATATCTGGTATTCTCCTCCTCTGCATTGTCATTGGACAGAACCAGAATTTATTATCAGTTGTTTATCACCTGCGAAAACGGAAAATGTGATATCGAAATGACCCGTATCCGTTATTGGTATGATGAAGCTCGTGACGGAGGAGAAAAATACAGTGCAGAGGAATGGATTGTAGACGATATGGCTTTAAACAAATCGAAGACCAAGCTGGCTCCCATCTGCGGCAAATTCAGAAGAGAGACTATTGACTTGAAAGACACACTGTTCAAATCCATCCAAGATACATTGGGCAACAAGGTCTTGAACAACTCACAAATTGCAGTTGCTCCTGCTCCGGGCGTGACAGCTACTCCGATATCAAATGCAACAACAATCGTTACCGCCACTCCGGTAACTCCCCCTGCCCAACCGGCTGTCATCGGTGGTTCTGAGGGTAATACCGAAATAAAAGCAGCCAATAATGCAACTCCCAGCAAAGAACAAAGCATAGATGACCAAATCAAGGCATCCTCACGTATGACAATTACTGCCGGAAACGATGAACAATTCGAAATCGGTAAAGAATGCTGGGGAGGTTTCGGCCAGTTATTCGGCAAAGAAGTAGCATTCTGCGTAATCGACCAAGCCAAATCAATGGGCAATATGCTGATGGATCAGAGCGATAATTATAAAATTTCGTTCTACAAGCAAGGTAATAGCGAGCCGTGGTTGATAGTAAACTGCAAGAAACTGATGAAACAGACCGTTACCGGTGAGGAAGCAAAAAAGATGAACCCCAGTAATGACGGTCAGAAGGCATATAATATGTATGTAGGTGAAGTAATAAAATAGTCTTTTCTCCCATATATTTATACCGTGACAGTTTATTCTGATAATACATAAAACTTTTTTTAGGGGGAGATAACCTGACAACTCCCCCTAAAAAACATACAAACGATCTACCATATACTTCCCCAAAAGCACCATGACAAATACCATTCAAACATGTCTCCCCTTTAAGAATAAATAACACCGTTCCTCTATCAACCGTTTCACTTGAAACCATTTCTATAATAATCAAACCATTAGAAGAAATCATTCTTCCATTTTTTCCCTATCTTTCTCTCCTTTTTTTAAATGTTTTTAGCAAGAAAGCCGTTTTTTCTGCTCCAATCTTCCCTTTCATAACAAATAAAGAGTATCTTTGTAAACAACAATGAGACAATCATGCCTTATGACGGAACAATTACAGGACATAAAGACACTTATCGAACAGGGTGATACGGAAAGAGCCATTCATGCGCTAACTAATTTCATCCGAAACGATGCGCACGTCAACGATGAACCCTATTACTTACTGGGTAACGCCTACCGCAAAATGGGAGACTGGCAACAAGCTTTGAACAATTATCTGGAAGCCATCGAGCGTAATCCGGAAAGTCCGGCCGTTTCCGCCCGTGACATGATTATGAATATTCTGAATTTCTATAACAAAGACATGTATAATCAATAATAACTAACCCATGGCAAAGATTAAAGGTGCTATTGTAGTAGACACAGAGCGTTGCAAAGGTTGCAATCTATGTGTAGTGGCATGTCCGCTGCATGTAATCGCGCTCAATGCCAAACAAGTGAATAAAAAAGGATACACATTCGCCCACCAGGTATTAGAAGATACCTGCAATGGTTGCGCATCATGTGCCACAGTCTGTCCGGACGGATGCATCACTGTGTATAAAGTAAAACAACAATAAATGTAACAAATATGGCAGAAGAAGTCGTTTTAATGAAGGGAAATGAAGCCATAGCCCACGCCGCTATCCGTATCGGAGTAGACGGTTACTTTGGCTATCCCATCACTCCCCAATCGGAAATATTGGAAACGCTGGCCGAAGAGAAACCTTGGGAAACTACCGGTATGGTGGTGTTGCAGGCAGAAAGTGAAGTCGCAGCAATCAACATGGTATATGGCGGTGCAGCGAGCGGTAAAATGGTAATGACCTCATCCTCCAGTCCCGGCGTCAGTTTAAAGCAGGAAGGCATCTCGTACATTGCCGGTGCCGAACTCCCCTGCCTGATTGTCAATGTCATGCGCGGAGGTCCCGGATTAGGAACCATCCAACCTAGTCAGGCCGACTATTTCCAAACAGTGAAAGGCGGCGGTCACGGAGACTATCGCCTCATAGCCCTGGCTCCTGCATCAGTACAGGAAATGGCAGATTTCGTAGGTATAGCTTTTGATCTGGCTTTCAAATACCGCAATCCCGCCATTATTCTGGCAGACGGCGTTATCGGACAGATGATGGAAAAAGTAGTATTGCCCGAACAGCGCACACGCCTGACAGATGAAGAAGTCATAGCCCGTTGTCCGTGGGCCACTACCGGCAGAACCCACCACCGTACTCCCAACATCATCACCTCATTGGAACTCGATCCTGCCGAGATGGAAAAACGTAATATCCATCTTCAGAAAAAATATGCAGAAATAGAAGAAAACGAAGTACGTTTCGAAGAACTTCACTGCGAAGATGCCGAATATTTGATCGTAGCTTTCGGTTCTTGTGCCCGCATTGCCCAAAAAGCAATGGAAATGGCTCGGGAAGAAGGCATCAAAGTTGGTTTGCTCCGTCCTATTACCCTATGGCCGTTCCCGTCGAAAGCTATTGCAGCACGCGCCGCACAGGTAAAAGGTATCCTCACTGTAGAATTGAACGCCGGACAAATGGTAGAAGATGTCCGTCTGGCCGTAGAATGCAAAGTGCCTGTAGAACACTTCGGACGTCTGGGTGGTATTGTTCCCGATCCGGATGAAGTAATCACTGCACTGAAAGAGAAACTAATAAAATAAGACCGTATGAATATCGATCTGATCAGAAACATATTGAACATACTTTTCATGGCACTGGCACTAGCGGCTGTCATCACTTACTTCATGGCAAGTGACTTCAAAGTATTCATCTATGTCTGTGCAGCAGCTATTTTTCTCAAGCTGATGGAATTCTTTATGCGGTTCATGTTATAAGCAATAGGAGGAGAAACTATGACAAAAGAAGAAATCATAAAACCCGAAAACCTGGTTTATAAGAAACCGACACTGATGAATGACAATCCGATGCATTACTGCCCCGGATGCAGCCACGGTGTGGTTCACAAGTTAATAGCCGAAGTTATTGAAGAAATGGGCATGGAAGATAAGGCAATCGGTGTGTCACCGGTAGGATGTGCCGTTTTTGCATACAACTATCTAGACATCGATTGGCAAGAAGCCGCCCACGGACGCGCACCAGCCGTAGCTACCGCCATCAAGCGTTTATGGCCCGGACGTCTGGTATTTACCTACCAGGGAGATGGTGACCTGGCATGTATCGGCACTGCCGAAACCATCCATGCTTTAAATCGTGGCGAAAACATTACTATCATCTTTATCAACAACGCCATTTATGGAATGACCGGTGGGCAAATGGCTCCTACCACGCTGGTTGGCATGAAGACAGCCACCTGCCCCTATGGTCGTGATGTAGCCATTCATGGCTATCCGCTGAAAATGACAGAAATTGCCGCCACTTTGGAAGGCACCGCCTACGTTACCCGCCAGGCCGTACATACGGTTCCTGCCATCCGCAAAGCAAAAAAGGCTATCCGCAAGGCATTCGAAAACTCAATGAATGGCAAAGGATCCAATTTGGTGGAAATTGTATCGAC +>7_1#NODE_12_length_39995_cov_63.3136_ID_23 +TAAATTCTTTTACTATGTTCCTTAAGATTAATAATTGAATTTATAGTTAGGTGAATTTATGAATTTTAATAATAATATATCCATTAAAAAATTAACATTACTTATTTTTGTAATTATGTATATTATATTTTTTCTAATAACAAGTTTAAATTTATATTCGTACTCAAAATACGAATTTAATAAAACAGAAAAACTTATAGAAAATTTCAATGTCAGTCTGAGTCAGCAAGTTGTAGAAAAATTAAATAATATTTCTGATGTATCAAAATATCCATTACTTATTCCTGAAGTGAGAAACTTATATTCTATACTTGCTGCTGATAAACCTTATGATATAAGTGAATATAATTATTTAAAGTACATTTGTGATATGATGCTAATCCAAAACACCTCAATAAATGGTGCTTACATATATGATTTGAGCGGAAGAGGCGTATCAAGTACACGTAATAGTTCTAATGATAAACTTAAAAATCCAAAGTCAGAAAAATGGTTTATAGATTCCTTAAATTCTAATGAATTTACATCAATTTTTCCTAATATAAATGCAAGCGATATATTTGAATTTACTTCGCAAGACTCTAAACAGTTAATAGCATTGGCTAGAAAAATAATAGATATTAAAACAAAGAAAGTAACTGGAGTATTATTAATTACTATTCCAATTGATGAAATTCGCAATTTATTAGTAAAAGATCATCTTCCATTTAATAATCAGATAGTTTCTATCTATGATATTAATGGAAATTTAATTCTAACCACAGGCGAGGAATCCAATGTATTTATTCCTACTTATGACCAGTTACACAATGCAACCACTACTCCTAGCATACAGTATATGGATAGTAATATTGAATATATAGTTTCCTACAATACTATACCTTCAACAAGCTGGATAATTGTAAATTCAATTCCTAAGTCAAATGCATATCATATAGACAACTTATATATTTTTTCGTTTATTATTAACATAACTTTCTTCTTAATTTTATTTATTGTGCTATATATATTTTTTATAAAAAGAATATTTAATCCTTTAAAGTTTTTAATCAAGAATATGGAGAGCAATGTTGAAAATAATTTAAATTATAAAGTATCTTATACTAAAAATGACGAAATTGGAATATTAATGAAATCCTATAATGAAATGAAAAGCAGAATAAGCGATTTAATTAATATTAATTATAAGAGCCAAATTGAACAGAAAGAACTAGAACTTAAGCAACTTCAAAATCAAATTAATCCTCATTTTATATATAATACCTTAGAATCTATTCGCATGATGGCAGAAATAAATGACGATATAGAAACTTCAACCATGTCTGAATATTTTGGCTCTATTACAAGGTACAGTATGAATAGAAAAATAAACACCGTCTTATTAAAGGAAGAAATAAGCATTATTGAAAATTATATTTACCTTCAAAAGATTAGATTCGATCAGCTATTCACAATTGAAAATTTGATTACCTCCGAAATTCTAGACTGCGAAATAATTAAAATGATAATTCAGCCATTAATTGAAAACTCTATCTATCATGGCTTAAGTGAATGTAGCGGTGATGGGAAAATAGTAATTAAAGGTGAACATATAACTGAAAATCTAGTACTAACCATTTCTGACAACGGTATAGGTATGGATCATGTAAAATTAAAAAAATTAAATGATTATATTAATGATAAGAATAATGATTTTAGCGGTACCGCCTTAAGGAATATAAACAAAAGATTAAAGTTGAATTATGGCAATGACTATGGATTAGAAATTCATAGTATTCTCGGAAAAGGAACAACTATGGTGCTAACTATTCCATACATTGTAAAGTGAAACTAAGTCCATGGAGCCCCCTATGGACTTAGCCTTATTTTTCTTATTTACTTATTCCCTATAAACATCTATATTCCTATTTCAGAGTTGAAAATTAAAACCTTTTATATCAATTTTACTATTTACATAACTACTCCTTTATTTCCAAAATATCACTTTACCTCGTTTATTACAAACAATTGCGACATAAAATCTTCACGTTTTCTCGACCAATATTCACTAGCTTTTTCTGTATAATCTTTTGATAATACTATTCCTAAATTCATAAGTTCATCACCGTAGTAATTTCCTTCTCTACCTTCTATTGAATACAGTTTATCCGCGTCAAGTCCCTGTAATTTTATACGATTATATTTTTCATTTGGCCTGCATAAAGCTTGATAGTAGCCCACAATGGCTTGTGACTTATCATCAGACACTACCATCCATGAAGTTTCATTGCTTTCAAAAGGATTTAGCATTCTATAAAAATCACCTTTTTGAATCAACGATTTATTTTTCTTAAAGAATTTAACTTGTTCTTTGACTATTTCCTTTTCCTCATCACTCATAGTATTAACATTAAGCTCATACCCAAAAGTTCCAAAATATGCTACATTGGCTCTTGTGTTAAGCGGAGTAAGTCTTCCTACCTGATGATTTGGTACTGCTGATACATGACTTCCTATTGCGCTTAATGGATAAATCATTGAAGTACCATATTGTATCTTTAATCTTTCTACAGCATCAGTATCATCGCTTGCCCAAGTCTGTGGTGCATAGTATAGCATTCCAGGATCAAATCTAGCCCCGCCGCCTGCGCATGATTCAATTAAAAGCTCTGGGTATTTAGTTATTATCTTTTCAGCCAACTTATATACTCCAAGAATATATCTATGGAATACTTCACCTTGTCTATCTTCTTCCAGCGCTAATGAATACGGTTCTGTTATATATCTGTTCATATCCCACTTAATATAAGAAATTTTAGAATTTCCCATTATGTTATCCATCAAACTAAATACATATTCAACAACTTCATCCCTTGAGAAATCTAGGATGAATTGGTTTCTTCCATGAGACATGTTTCTGTTAGGTGTTTGAATAGCCCATTCTGGATGTTCTTTAAATAATTTAGTATCCTTACATACCATTTCTGGCTCAAACCACAATCCAAATTTAAGTCCTATCTCTTCTACTTTTTCAGATAAACCAGCTATTCCATTTGGAAGTTTCTCTTTGTTTTCGAACCAATCTCCAAGAGAACTTGTATCATCATTTCTTTTTCCGAACCAACCATCATCAAGTACAAACAGCTCTATTCCCAGGTCCTTTGCAGCCCTTACTATAGAAAGTATCTTTTCTTCATCAAAGTCAAAATATGTTGCTTCCCAATTGTTTACAAGAATAGGACGTTCTTTATCTCTCCATTTACCTCTTGCAAGTCTTGTTCTATAAAGTTTGTGGTAAGTTTGACTCATTGCATTTAAACCATCATTAGTGTAGACAATTACACATTCTGGCGTTTGAAATTCTTCTCTTGAATCAAGTTTCCATTTGAACTCATAAGGATTAATACCCATCATAACTCTTGATACTCCATACGAATCAACTTCAACTTGTCCTAAGAAATTTCCACTGTATACTAAACTAAAGCCATACACTTCTCCTACATATTCAGTTGTATTTGGACGTTTTAATATCACAAATGGATTATGAACATGACTACTTGCACCTCTTAAACTGTGAATAGATTGCACACCTTCTGTTAACTTTCTCTTTTTAAGATGACACTCTCTAGCCCAAGCACCTGATGTATGAATCATTTCAAAATCTGAATCTGCTAAATCAACGCTACAGCTCATAGCATTGTCTATATAAAATGCCTCTTCCCCTTCATTAGTTATTTTTGCATTTCTGCATATTGCATCAAGACCTTCGTATATTGTATAACTTAGTATTAGTTTACTATTTAATATCTCATCAAATAATGCAATTTCTAATGTTTGAGCTTCTGAAGCTGCTTCAATATAAGTCGATGGTAATCCATTTAACTTATTCTTTCCTTCAAATATTTTATATCCAACATATTTATAGTTACTAATTCTACTTCCATTTTTACTTATAATCGTATGAGCCGGATATCTGTAGTCTGTAATTCCGTAGCTTGGATATTCCTGCTTTATATGTTCTAATGACGAAGTATGATTTCCCTCAAACATATTGCATGAAGGTTTAAGTTCTCTTTCCATTATATGTTTAAATGACTCTCTATGCCTTAATCGTTTACCATAATAAACATTTTCCATCTGTTCTATGCCATCGATTACTCTAATAATATAACTTATATTATTATTATAAAGATGAAACTCTCTAGTATTTTCATTAAATTTAATTCCCATCTTATCTTTCTCCTTCTTAAGTATTTACAAAATAATACTTTAGTGTTTAGTACACTGTAGATAGTGCTAAAATTCACTTAAACAATCATTTGAAGTTGGTTTACTTTTTTAAATATTCTGAAGAATATATATAGTAATATTCTTCAGAATATTTGTTTTAATTTTAAACTAAATATTTTTTATATAAATAGTTTAATAGCAATTATTTATTAATTTATAATTACCACCTAAACTACAGGATAACTTTTCAAAACTTTAAGCTTTCCTTTTATTTCATAATTTCCAAGTGCTGCAGGTATTAAATAGCTATCTCCCATTTTTATCTTTTCTAAATACCCATTTCCTTCTATTATTCCTTCTCCATCCACACAAGTTAAGATGTCAAATTTATCTTCATCACTGCTATCTTGCAATGTTTCTTCTATAGTTAATTTTTCTATTCCAAAATATTCATTTTTGCATAGTAAACTCTTTTTATAACCATCAAATGCTATTTCTTCTCCTTTAAGATTTTCACATTGAAGTTCAAAGTTTGTCACGTCTAATGCTTTTTCTACATGTATTTCTCTTGGTCTACCGTAATCATATACTCTATAGGTTACATCACTATTTTGTTGTATTTCAGCTATTATAACACCTTCACATATAGCATGAATAAGTCCACTATTTATTAAAAAGCAATCACCTTTTTTTACATCTATCTTATTTAAATATTTTTCAACTTCTCCTGATTTTATTGCCGCTTCAAATTCTTCCTTTGTACATCCTTTAGTACCAACTATCAAACTAGCATCTGGTTTAGCATCTACAACATACCATGCTTCTGTCTTACCATAATCACCTTCATATTTAGCTGCATATTCATCTCCAGGATGAACTTGTACTGAAAGTTTTTCTCTTGAATTAATTAATTTCACTAGTAAAGGTAACTTTTCTAGGCTTACCTTAGCACCAACGATGCTAGTTCCGTATTCTTTTATCAGTTCATCAAATCTGATTCCCTTGAATTCACCATTTGCTACAATTCCTGTTCCATTTGGATGACAAGCTATATCCCAGCTTTCTCCGATTTTTCCCTCTGGAAGGTTATCTCTAAAAGTTTCTAGGTCTCTTCCTCCCCATATTTTTTCGTAGTATAAATTTTCAAATCTTATTGGATACATACTTCCTCCTACTCCCTCACTCGATATATAAAAAACATATCATATATTATTAAAAACCTCACTTTAAATATATATTCTTTTCGGCAACTTGTCTATACTTTTGCAGTTAAAAGCATACTTTATTCATTTAAGATAAAATTTATTAACTTATATGTACTCACATAAACTTATGACTCTTCTCTCGAAACTCCATTCAATTAATACAGCGACTCAGCACACTCTCTCTTAATTACATATTTAGTATTAAATTTCCACTAAGGCATTTATATAAATCACTTTCAATATCCAAAATTTACATACTTCCTTAGACGAAAAAGATGCTGCGATAATGTATAATATTACTGCAACACCTTTAATTTATTTATATATTAATTTATTTATATAACTGTTATAGCTTTAAATTCAAAATTGTCATATCTGTGTCTAGAAAACGAGTACTCGAACGCTCTTCCATTTTCTAAGTATACAACCTTTTCCACTTCTGCTATTGGATCTCCATCCATTAACAATAAATATTCTTTATCAAATTCATCTGCTTTACTTACCCTAACCGTCGAATGAGCACTTTGAATCTTTAATCCTAGCTTTCTCTGTAAAAAATTATATATAGACCCATTAACATCTTCTTTTTTTATTCCCTCTGCAATTGATAACGGCATATAAGTTTTTTCTATTACAATAGGTGCCTCATCTACATACCTTACTCTATGTATGAAGTAGACAAAATCACCGTTATCTATCTTAAATGTACTTGCAATTTTCTCATCGGCATTAATAATCTTAAAATCTAAAACTTTGCTTGTGACCTCATGCCCTAAATTAGTATTAGTTAATCCCGAAAACTGTTTTTTTCGAATAATCTCTTGAATCTCTCCTTCGGTTATGTCCTTTATAAAACTTCCCGATCCCCTTCTCTTTACAACCAATCCTTCTGCAACTAACAAATCTAGAGCTTTCTTCACTGTCATTCTACTAGCATTAAATCTTTCGCATAGTTCTTTTTCAAACGGTAATTGTTCATTAGGCTTGTACTTGCCACTCAATATATATTCTTTAATCTGGGATGCTATTTTCTGATACTTTATCATAACTATTCCTTTCATATCTAATTCTTCTTATTATGCAGACTTATAATTTAATTTTTTAAATATTTAAATATCTCACATGCTCTTGACTTAAAGGGAATTATGTGGTGGTTTTCCATTGCATTTGTTATTATAACTTATTATTCCTTATACAACTTTCTTTATCAGTAATTATTGACCATTTCACTAAAAACCCCCACCATATTCCATATATTTTTTATACAACCTGCACAAATATACTCTACCCTTAATATATAATATATGTCTTTCATGCACTCCTGTCTATACTTTTATTTAGTACTTGTTTATCTCACCAATAAATAAAGAATAGGCTGATTAATACCAACCTATTCTTCGTCATTACTATTTTACTACTATATAAATAAAAATTATTTAAAATTATAATATATCCCCCTATAAAAAGACTTTTATCAAATAATCTATCTTATAGGGGATATTTTAATTGAGATTATAAGCTTTATTAATGATCTTGTTGAGCTGCTAATTCATCAGCATAAGCCATAGCATCTGCTTTCTTAAAAAATGGGAAGTAAATTACAGTAGAAACTATTAAGAATACAAATTGTGCTAAAGCGTAACTAGGTCCACCAAGAATAAATCCTGAAATTACAGTTGGAACTGTCCATGGTACTAATATTGCCCCCATTGGTGTTAGTAAGCCTGATGCAATCGCGAAGTAAAGTAATACTGCATTTATTACTGGACCAAGTATAAATGGTACACCCATGATTGGATTCATTACTACTGGAATACCAAATAATATTGGTTCATTAATATTAAATAAATTTGGAGTAAGCGCTAACTTACCTAATTGCTTAAATTGAGCCGATTTTGCAAGGAATAGTAGTAGTAGTGTAAGTCCCAATGTTTGACCACTACCTGTTAAATTAACAAAACTATCATAGAATTGTTGAGTCACTATATGAGCACCATGAGCAAGGTCTAAAGTCCCCGCTTTTTGTAATTCTGCATTGAATGCAGTATTAGCAGTTAATAATGGGCCTACCATTCCTCCTACTGTAATACCTCCATGAATTCCAAAAAACCATAAGAATGGAACAGCAAATGCAATAAGCATAGCTCCAGGTAATGAATCTGATGCTGCTTGTAATGGTGTTTGAACCACTTTATAAATAATCTCTACAAAAGAAGTTTGGAATCCAAATTTGCATAATGCATAAACAACAGCAGCTCCACTAAATATAACAGTAGCCGGAATTAAAGCTGAGAATGCATTAACAACACCTTGTGGAACACCCTCTGGCATTTTTATTGTAATGTTTCGCTTAATCATTGCGCAATAAACAAATGAAACAATCAAACTAACTACAATTGCAGTAATCATACCTGCTCCACCGGTCCAAGATGTAGGAATAGCACCGCCCACTTGAACTGGAGTCGCGTCTGGTGTCGGCTGATAAGTAACAAAGTTATTAGTAACTATAACAAATACACTTAGAGCAATTACTGATGGTGAAAACGGCTCTACTCCCTCATTCTTGCAGTATGAGTAAGTCATTCCTATAACTCCAACCAATGCCATAATGCCCATTGTTGATCCTTGTACTTTGTATAATGGATCATTCCAACCTGCACCAAAAGTTGAAGTCATAAAATCTGTAAATGCTTTTAATGGAAAAGCTGCAATTAATAAAAATACAGATCCAATAATATTCAATGGTAATGTAAACATCATACCATCTTTTAACGCTTGGACACCTTTAAGATTAACGAATTTCATAATTACTGGAACTATTTTTTCGTTAAGTGTTTGATTAAATGACATAATTATTCCTCCTTTGAATTAAAAATATAATATAAATAATTACAAATATAAATTTGCTATTATTGCCTTATTGAATTTTTATTACTTTAAATGTTTTTAAACTTAATATTATCTAAGTACTATAGCGTCTAGATTCAATATATATGCTTAAAGTACTATTTCTTAATTAAACTTAATGCTAAATCTAATACTTTAGCTCCATTCATCATTCCATAATCAACCATAGGTATCACTTCTACTGGAACTCCTTTTGGTTCACAAAGACTTTTTGCTTTTCCTAAAGTATATCCAACTTGTGGTCCTAAAAGTGCAACATCCATGCTATCTAGATGTCTATCCATTTGTGATTCTGGGAATGCTATAATTTCTGCATCTATCCCCTTAGCCTTAGCTGCTTCCTTCATCTTATTAACTAAAACACTTGTTGACATCCCTGATGCGCAGAATAATTTAATTGTAATCATAATTGTTCCTCCTTAAAATCTTTCTATAAATAAAATAATGTATTAAATACTTAAGTACGTTTACATCAACGCACTAGTAATATGTTGATAAGCAAATAATATATTTATTTATTAAGTAAAGTATTTACAACTTTTCTTAATTCTATAATTTGTTCTATTAAAGTTTTCTCAGATATAGCTGTCATTAAATGATCTTGTGCATGAACAAACAAAACTGATAATTCTATTTTTTCACCTGCGGCTTCTTTCTGAAGCATTTCTGTTTGTGCATCGTGTGCCTTAGCTAAAGCATCATTAGCCAAACCCATCTCTTTTTCAGCTTCTTCATACTTACCTTCATTTGCCATTCCTAGCGCCATATAAGCATGATTTTTACAATCTCCTGCATTAATTATAATATTCATTATTGCCATTTCTAGTTGATCCATTTTTACTCCTCCAATTTGCGTGTTATTTTTTATGCAATTATATAGAGCAATAAATATGCCAAACACTATTAATGATATAAGTTTTTATAATTTTCATCCCCAAACTTATATATATTTATAAAACAAAATGAATGAATATTATATATTATAAGTATATACATTTATTTTAAAAAGATTATATATTTTATTTTCCAAGCACACAATATTTATCAATATATGTAGTGTTTAAGCAAAGCATACAGTGTTTATATAAATATTTATAGTGTTTGAAAAAATAATATCTGGTGTTTCATTTTTTCAAACACTGAACCTTTTATTATAAAACAATATTCGATACTAAACCCTTTATCCATTATCATATATGCATCTATTACTGCTGACTTATTTTTCACTAGCCAACAATCTTCTGATATAGTCTCGTAAATCAATTTACAGAATCGAAAATTAACATATAAAAAGAGAAAGTCAAATGATTCCTTCATCTAAACTTTCTCTTAAAAACATGTTCTAAACTCGCTACTTTAATTTTTATATTTCATTATACCCAAGCACCACTTGCATCTACTGTATAACCGTCAATAACTGTATCACTAGCCATAAATCCATCACTATATAAATAGTACCATTTATCATCTACTTTCTGCCAACCTATTTGCATTTGGCCAAATGAAGCCAATAAGTACCACTTACCATTTACTAGCTGCCAGCCTGTTTGCATTGCACCAGTTTTAGCTAGCAAATACCAATTCCCCTTTACTTGTTGCCAGCCCGTTTGCGCTGTACCAGTTTCATTAGCTAAATACCAAGTACCATTTATTTGTTGCCAACCCGTTTTAGGTTGTCCATTGCTATCGAGCAATTTTACAGTCCCATCAGCACTTGTGTTTAAAATTGCTGTAGTTACACCTGTTGTAACTGAATTTTCAGTTGTTGTGCTCGTTGTTGTAGTTGACCCACTGTTCGATGAACTACTATTTGATGATGTACTTGAACTACTACTTGATGAATTTCTCGCAATAGAAGACGTAGTACTTTCAAATATCTTACCATTATAAGTAACTAATAATTTAATATAATATCCTTTATCACTGCCAATTAACCTATAAGTTTTATCAGTTCCCACTAATGCACCGTTAGTGATACTATCATCTTTATTAAATAACCTATACCAAGCATATGTTACACCTTGAGATGGAACAAACTCTGTAGAATCTGCCTTTAATAATTCAGCAGTTAGAGTTCCACCAACCCTTTCAGTTCCATTAATTGCAACTCTTGAAGGCATTACTACTTCAGCTGTTCCACCTGTTGATCCTCCTGCATTTGTATTTCCTCCTGTTTGTTCTCCACCAGTATTATTGCTACCTGAATTTGAAGAACTTACCCCTTTAATATCATCAAAATACATTACTGAGTTAACAGTATCTGTTCCAATTGTATTGCACCATATTCCCATCTTCTCTATATGAGCTTGGTCAAATGTTCCATTGTTTTTTCCTTTAAATTGGCTAAAAGGTATAGTTAGAAGTTTAGCTTCAGTTGTTGCTGCAAAATCCGGCATCCAAACTTCAAAATCTTCACCGTTTGATGTAAGTTGTATAACGAGTTTTTGACCTTTGCCATCTGGCTTAACCCAAATTTGTAATGCATCGCAACCAGACCAATCCGCATCTAATGATTGTGTAATACCAGTCCATCCGTTTGGAGCAATCTTATAATTAAACGCAAGTCCATAATCGCCACTATTTTTATTATTTGTATCTAACTTTGGTGTAACACTGCAGCCTGATCCAACATTAGTAGACCAAACGCCTTGAAGCAATGCATCTTCTCCTGAATATGATTCAAAATTATCTACCAATTTAGGATCTTTCTCTGCCTCTTTTATATTAAATATAGCTTTTATTGTATCTAATTTATTATCATCTGAATATAAATCAATAGACCCAACAGCTTGTCCTATTTTATCGAGAATAGTTTGTGAAATTGCAGCTGAATATATACCATTTTCATCTTTTGTAGCGTTAAGAGTTTCAATAGCATCTCCTGCTTTATTTTTCAATACAAACTTAACTTGTCCATCATATTCTTTTACGCTTGCTGTTACTATAGTTGGTTCAAGAATACGGCTGCTTGAAACTGGTCCTGTTATGAATCCATAAGAATATGCTGACTCTTTAGTTATTGCAGAACTAGAGTAATCTTCCACTCCATCAGCAAATATTGATTTATCTTCATTATAGTAATTTATAAAGTTATTAATCATTTCATGACCTTTTGTATCACTTACCATATATGGAGCAAAGAAACCATCCGTTTCACCAAAATTCGCCCAAGTCATATAGTACGGCATATCCGAACTTGATATTATATCAGCTACATGCTTAAACCAATCTTTATCGGCATTTCCTGATAGAGCCATAAAACCATTATTTTCATTGTTACGCAATCCTACTTCAGATACTGCTGAGAGTTTACCTTTATTCTTTGCTATATTTTGAACTAGGCTAATAGTGTCTTTGAAACTCGCCATCCATGGATCTGTAGATGCATCTTCAGTAGGATTATCATCATAATAATCAAAAGCCAAGATATCAACAAATTCATCACCTGGATAACGTGATAAGTAATCTTCCTCATTTTCAAACGGCCCATTTGGTGAATATACATATAAGAAATTATGAACTTTATCAGTATCCCTTAAATATTCAACTGTATAACGATACATATTCTTGTACGCTTCCTCGTCACAATATGCTTTACCCCACCAGAACCAGCTTCCATTATTCTCATGGAATGGCCTGAATATCACCGGAACTCCAGCATTCTCTAGTTTATGTGCATAAGTTGCAAGCATATCTAAATACCCAACATATACATCATTCAAATCTCCACCTGGCATAATCCTTGACACTATATTACCAGTTGTTGTTCCTGGAGTATATCCTGAATAATCATAATTTCCGTTTTTATCTTTTCCTTTTTCTTTAACTAATTCAAAATTTGGCATATGAGCTGACAAAGTTATTATACCGCCTTCATTTGACGCCTCTATGCTTAAATCTGCTGCTTTTGAAATCAAATCCTTTCCAGCAGCTTTCTCTTCATCAGTTAATGATAATTCGGATCCCGTTAATGACAGGGCATCAATTCCAACAATCGCAGAAATAGAACTTGTTATATCCTTTGTGTCTGAATTAGTTGGACCATTTCCAGCCTTATGATGTGTATCATTTTGATGTCCATATAAAACATAATCCGATTTACCTACACCAGTTAAATATGAGTATAAATTTGCTGTTTTTGATGTTGCACTCGAGTCAACTAACTTTACTTTTGAAGGCATTAAATCGCTTACATCAATTATATTTTGTGTTGTAGTAGTTGTTGTCTTTTCAACATATTTATCTGCTACTTGTTCTTGACTAAGTTTAATATTATCTATATATATATCACCATTATAGTTAGTGCTTGCTCCTATAATTCCAATAGTAATTGAACTGGCATCTACATCCTTAGAAGTAAACTTAAGAGTTACCTGTACTTTCTTTAATCCAGTATTACCAACATCTACCGAATTATTTAAGTCAATTGAGCCATAGGTATTTAAGCTATCTGTAATGAATAATTTTGTTTGAAATCCTCCAGTAGTCATCTTACTTGGGTTATAAATAAAATCATATGTTAATATATTATATCCTTTAAATGAAGTTGCTGCACCCAAATCTTTGTTAATTTTAAATTCACTCCAGCCACTTGCAGAATCCTTAGAATAATCAAGGGAAAGCTTCAAAGCCCCACTGCCAATCGTAGTGTTATCATAATTTACAACGTTATCAGAAGATCCATGATAATCATAGGCTCCTCCATAAAACCATCCATCACTATCATTCTCAGAATTATCAAAATTCCATTTTAAATCAGAAACACTCGGAAGTGGTGCCGTATCACCTGAAGCTTGGTTGTTTACAATTTTAATGTTATCTATAAATAGCTTACCATTGTAATTGCATTGATTTCCTACAATTTTAATATAGAAATTCGATAATCCCACAGTATCTTTGTCAGTTTTTATAGATACAGTTTTTCTTATATAACCATTAACAGCGCTTTCATTTTTGAAACCAGTTGAATCTACCGATCCCGAACCAATACCTGCATCCTTCCAATTGGAGTCTTGCAATGTGCTTTCCAAGTGAATCTCGCCAGTAAAATTAGCATTTGTTTGTGGAATTATAATATCAAATTGAATCTCAGAACCTGCTTTAATTTTTTCTGAGCTTTTAGCAATAAAATCAAGACAAGTTTCAGAATTCCATGTACTTACTCCGTCATAATTTGCATTAAATCCTAACGCATTATTTCCATTTATATCTTCTATTGCTACACTTGATATATCTGCCTTCCAATTAACTTGATCCGGCAACTTCCCATCTTCAAAACCATTATAATAAACAGCAGAAGGGGTTGTACTAATTTCAGACAGCGCTGCGACTGTGTTAGTCTCATTAGTAGCTGCGGTAACTGCTGCACTACTTGTACTGCTATTTAAATTTGTTGCATCGTTACTCCCCCCCTCCATAGTAACTGTTGTACTAGAGATGCCTGTACTATTTTCTAAAACTGTTGCATTATTATTTTCCGCTATTCCTATTGCTGAAACCGGTAAGATAGTAGTTGAAGTTGAGATTACCATAACTGTCGCTAATAGTTTTTTAATTTTTTTTGTTTTCATTAATTTTCCTCCTAATACCAAGTTATCGTTTAATTAACATTGCAAACTTTTAAAATTCACTTTTCTGATTTAGAGAATTCTTATTAGTTATTTATTTATTTTTTAATTGTAAATCTTTGGTTAAACTTTATCAGTTAAAGCATAATTGATACTATTACTTATTTGATTAATTTGCCTTGTTTATATTTTACTTTAACGATTAATTAAAGCATTTACATTATATAATAATTTAATAAGCTTTTCAATATATGGAAAAATGTTTTTATATATGAAAAAAAACTCTTTATTATACATAAAACTTATCTTGTTAAAAATTCATTACCAGCAAGACAAGCTTTTTCCATAACACATAATTGTTTAATAGTAATAAGAAAATATCTTTCTCTTAACTACTTAAAATGAATTCTTTCTAAAGAAATAACCATGAATAATCAAAAAATTATATTTCTAACTATCCATGGTCCTCATTTATAATAATTTTTAATTGAAATATTTTAGAAAGCTATTTGCATTACTCACCTTATTTAGATAATATTCCCCTTAAATATTGAGTCAACTCTTTCGCTGCTTTCTGATGCGATAGAAATCCAGGATGTTGTCTTGATCCTACTGTTTCTTCTGTTGTATTTGGAAGCTGGAATACAGAAACCTTTTTATCACCAGTCTCTTTAATATATGCATCCACAGCACGATAAATAGACGGAAGCATTGGAATTCCTAACATCCCATATACCCATACTATATTAGCCTTTTTATTATATTTTCTAAGTTTAACTAGAAAGTTATAAGCCGCTTTTTCAAAAGCTTCTAAATCTTCCTCATTAAATGTGCCATCCTCATTTAATCTTTGTTTATGAGTCTCTCCTGTAACTTCATCTCTCCACTCAGGTGAAGTAAAAGCTCCTTCATCATTTGTTCCAAGGTTAACTACTACAACATCTGGCTGCCAAGATTCAAAGTGGTTATCTTCGAAAGCTCCTAGAGATTCATTTTTATCACCAGTAAGAAGTCCGCACACCTTTTCATAATACTCTGGAATATTATAATGAGGATTATTATCCCAACTTGTAAGTATCCCCCATCCACTTTGAGAAATAATTCTATGTTCTGCATTTAAGGCTTCTGCAGTCATTGTGCTATAATTATCTATCGCACTAAACCACATAGAAATCCAATCTTCTTCTTTCTTAGCCCCTATTACTCCTTCTCCTGATGTAATACTATCTCCAATAAATTCAATCTTGTACGTTTTTTCTTCTATAGGTAGACATTCCCCATCAAATTTCACAGCATGTATTTGCATATAACTTCCTGGGTCGCCATTCATAGCCTGAACATCTCTTATAATTTTAACATTTTTTATTTCATGCTCTGTCATCCCTCTAAATATACAAACCCAGTACCTTCCGGCAATTAACATTTGTCTGCTTACAGTAGCGGAATTAACAACTATACTAATCCAAGGCTCATACATATCATAGTCCACTTCTACTTCAACCCAAAGTTCAGAGCCTTTCGCATTTAGCTCAATACCACTTCCTGTCCAAAATACTGTTAGTGGTGAAAGACATCCAGTCGTTCTCCCATGAACTTTTAAATTTTCAATATCTGATAATGGGTATATTTTTAGTTTTTCATTTTCTCTCATTATTTTCCCTCCCACAAGTTTTCAATATTCAAACTATATTTTTATTTAATCCTCATTCTAATTTAAATTATATTTATTTTTTACTCACTCTCCCTGGCTACTAATCACTTTTTGTTTCTAGTAGGCCCTCTTCATTACTTTTATATTACATCTGTAATTTCTTTAATTTATAGAACTCACCTTTTTTCTCCATCAGTTCATCATAAGTCCCAAACTCTTCTAATCCACCATTTCCTACAACTGCAATCTTATCTGCATTTCTAATTGTAGATAGCCTATGTGCAACTACTAAGGTTGTTCTATCTTTCACTAAGCTTTCAATTGATTCTTGTATCTTTTTTTCCGAAACGCTATCTAAAGCAGATGTTGCTTCATCAAGAACCAATATCTTCGGATTTCTCACAAAAGCTCTTGCAATTGATATTCTCTGACGTTGTCCACCAGAGAGATTACTTCCATGTTCAGTAATTATGGTATCTAATCCATTTGGGAGTGATTCTATTAACTCCTCCAAATTTGCCGCTTTAATTATTTTACTTAACTCTTCATCAGAAATGTTTTTTATTCCATAAAGGATATTTTCCCGAATAGTATCAGAAAATAATATTGCATTTTGTGGTACAACCGCAATCTTGGAACGATAGCTTTGAAGATTAATATTTTTAATATCCTGATTATCAATCAGTACTTGTCCGTCTGTGGCTTGGAAAAATCCTATTACTAGATTTAGAATTGTCGATTTCCCGGCTCCTGAAGTGCCTACAAACGCTACTGTTTCTCCTGGATTTATAGAGAAATTTAGATTTTTTAAAATTGGACCAGTACTATCTTTGTACTGAAATTCCACATCCTTAAATGTAATTTTCCCTTCTACATCCTTAATTTTCTTTTTCTTACGGTTATTTTCTACATCGTCAGAAAGTAATATATCTCCAATCGAATCCACAGATTCCAGTCCTTTTGCAATAGTCGGCAATAGTGTAATAATACCCGAAATCTGTGCTACTATAGACCCAAAATAAGTTTGATATAATACAACTTCACCAATTGTAATGTTTCCTTTTAATGCTATATAACCTGTAAAACCAAGACAAATTATTTGGAACACTTGAAACGAAACCCAGCTTATGGATGAGAAATATGCCTGAATAAGATCTAACTTCAAACCCTTTTTAGCAACATTCACCAACTGACTATCTATTTTCTTTGTTTCTTGCTTTTCTAACGCATGAGCTCTGGTAACAGGAATTAATTCCACCATTTCCATTACACGAACTGAAGTCTCTTCCATCTCTTTCCTATAGTCTGTATTATACCTTTTTATCTTTCCTCCAAAAGCAACTATAATAAGTACTGCTACAGGTATAGTTGCAAGAAAAAACACAAACACTGTTAAACTCTTAAAAATAACAACTCCTAAAGCTGCTACAATATTTAATACAATACCTAATATTGTTATAAAAACTTGAGATGATAGATTCTCAATCTGTTCAACATCACGCATAATCTTTGACTGTAATCTTCCTGATTGCATTTGATTATGATATGCAATAGAAAGCTGCTGAAGTCTTTTTACAAGAGTACTTCTGAGTTCTCTTTCTACACTTCTAATAGTTTTTGCATATAGAACTGTATGTACATAATTTGTTGGTATATTTTGCAGTACCATAACAAGAATTATTATAATATTAATTACAATAATATGAACTGCATTATCACCTTTATCGGTTGCTGCATTGATAATATTCGCGGATACAACTGGCAGAACCCATACAGGAGAACTTTTTAATAGGAAAAATATAATTGAAAGAAATAACTTTAAGTAATGTCCTTTATATATACCAACAAGAGTTTTTAAAGTACTATTTTCATTCTGTTTAAAAATTTTCAAAAGAGCTTCTTCGCCTGGTTCTAACTTGTCATTTGTTTTATCTTCTTCACCTATCACTTTTCCACCACCATATCATAATACTTCTTTATAACTTTTTCTGCTGGTTTTCCATATACATCATAACCTTTATAATTTTCAGCTCTATTTAGAGAATATTGATTGCACGCCCAATCCCACATAGCAAAACCAGATACCCATTCACGTTTAAGCGAAGATTCAAACATAGCCTTGTACCATTCTGCTTGTTCATTTAAATCTATATCACCTTCAAGGCTCCAATCGTTTGGAACTTCTGAAGAACCTTTAGTGGACATACAACCAGCTTCAGCAAAGAAAAATGGCTTATCGAAATTTTTGACTACTTTTTCGATTCGATCTAACTCTTTTTCCCAATCCTTAAGTGGATAATAACCACTTGAAGATATAACATCAACACAATCCCACCATTTTACATTATGCTCTTGATATTTATCAGTATTATAAGATACAGCTCCATGATAAACATCTCTAATATCAGCAATTAGTTTACGCCATTCATTTTCTCTACGCTCCGACATAACCATTTCACAGCCTGCAATAAACATTTCACAACCTGTTTTTTCTGCTATACGAGCATAATGAAGTTGGAAATCCGTGTAAGAAGCAAACCATTCACTCCATTTTGGCTCACAATGAACGTCTTCATCAAAGAAATTTATATGAGCCCTCCAAGTTCCATTTTTACAATTTACAGTCGGCTTAATCGCTATTCTAAGACCAATATGCTTTGCATAATTTATGATATTTTCTAACTCTGTATCTGAAATAGTAGAATTAGAACTATAACAAATCTCTTCTGATTGAGGAGTATCCTGAAGACCATTTGGTACCAATATTATAAAATTAATTCCAACCCTTTCTCTTAAATTATCTAAACTCTTATATGTTTCTTTTTTTAAATAGCTCCCTTTCCTCGCAAAAGGCGCAAAAGTAAACCCTTTAATAAATTTCATAAATCCCATTCCTTTCTTTAAATTAAGCGCCATGAAGAAAATAACAGATCAAAATGTAATGTATATTTTCTCAAAACAAATCAACTCTTTTGCTGTCATTTAACAAAAGATTTATAAAACAAAAGTTCTAATTCTTGTACGAAGGTTTTATATACGCTAGGCAGAATAAGGCAAATCTTGTTTGTAGCAGATTTTATATAAAAAATAACCGACGTAATATGATATTAAATAAACAAAAACATGCTGATCTATTATTTTTTTCATTGCGCCTCCGCTTACATATACTATATCAATTTCATTTTTCATCTGATATAATACAATTGTTTAAAAATAGAACAAATCTATTTTACGGAGGCTATTATGAATTATTTATTTGAAGAAATTAATGTTTTAACCTCACCCTATGAAGCCTTTTTAGCTGGCACTAAACATGGTGGCTTCCCAATAAAGGCTCACTGGCATTATTATATGGAAATTATTTTTATCATTAAAGGATCTTCATTAATAAATTGTAATAATAAGGAATATGTCCTCGAGCCTGGAGATTTAATTTTATTTCATCCACAAGCGGTGCACTCTATTTTTTCAGCTTCAAGCGAACCTTTAAAGTATGGTGTACTTAAATTTGATATAAATAATCTTCATATAAATAGTAACTATACTCCAAAACTTGGCTCAATATTCAAATGCGCTATAGACGATCCTTATGCACCTATATTACTTCCATCAAAATTATTTAAAAATTCACCAGAACGTTTATTTAATAGCTGCATTGAAGAAGTAGTAATTAAAGATTATGGTTATGACATTCGCTTTCAGTCTTTAGTTTCATCTCTACTGATAGAAATTCTTAGAATATGGAGAAAAACTGGATTTGATACGAATAACATCACTTTAATGCCCTATGATACTGATTCGTTACATACAATATTACAATATATAGATGAGCATTCACATGAATCAATAAAAGTAGAAGACCTTGCTGAAAAATGCCATATGAGTTATTCGTATTTTGCAAAAAAATTTCACGAATTGTATGGTCAATCCTGTAAGGACTATATTGAATTTATAAGATTGAGCAAAGTTAAAGATTTATTATTATTTACTAATTTTGATTTAAATTATATTAGTCAAGAGACGGGATTTGCGGACTGCAGTCATTTAATTAGAACGTTCAAAAAAAAGACAGGTATAACACCAAAACAGTTTAGAATGCTGCACATTATAGATAAATAACATGTAACTTATAGTTATGCGTATAACTAGCAAAAAAAGAAAACCCCCTGTTACTAGGCTGCCAGAAAAATTAATTGTTAATTCATTCCAATAAAAGTTGTGTCAACCATGAATGCTCCCGGTGCAAGCTCGTGACAAGCGGCGATAGAACAACTTTTATTGAAGAAATACATACAATTATTTTTTCAACACAACACTTCGTAAAAGGGCACCTTCTCTTTCTAGTATAGTATATATATCAAAGCATAAGTATGATTTTTAACTAGCTTATCTACAGTTTCTATAAATTACTACTTAAGTCTAGTAAATGTTAGAACTTGTATATCTGTAACCCAAAATTCCTACAAGCTAGATTTCAGTTTAGCTAGATCTTCTAAATTTATAACATATTCATCGTTGTAGAATTTCATCAATTCTTCTTTGGTATTCCATTCTAATTCTTTAACAAAGTTATTCCAAGTCATAAACCATAGCCAATGAGTTTCTGTTTCCTTAAGTATTTTAGGATCTGGAATCGGTCCATTTTCACTTAGAGCTACTGGTTTTACACTATTTGGAATAGCTTCTGTATTTAGAAATTCAATTGTTAGTGGGCCATGATTTTCAAGAGGAGCATAATAATCATTACTGTTAATATCTACTACATCATCTCCCGGATACCACTCTACATTGGGCGAGTTCCACACCCAAATTAAATTATTTAATTCATGATAACTTACATATCTATCATACATCAACCTATATAGCTTTATGTAGGCTTGGGGACCTTTATCTCCCCACCAAAACCATCCTCCAGAAGCTTCATGCAACGGTCTCCATAAAATCGGAATATTGTTGTTTTTAAATATCTTTAACTGTTCAGCTATTGTATCAAGATCTCTTATCATATTAATATTTTCTTCAGTACCTTCAACTAGTGCTTTTTCTAAATCAAAGTTAGTATTTTCAGTATAAAAGCTCTTCCCTCTTCCATTCATTGGAGAGAACCAGTGCCAGCATAAAGTTATAATTGCATCTGTATTTTTTCCCCAAAATAGAGCTGTTTCTATAGTCCCCTTATTATTAGCTAACTCATCTATACACTCAAAATTACTATCAGGTGTTTCAGTTGCTAGTGAGTAACTTAGTAAATCAAAGCCGACTATAGCTGGAGTCTTTCCTGTCATTCTCTTTATATATTCAAAATCTGTTCCACTTGCCTTATTGCAATGTTGACCCGATAAAATTCCTTTACCATATATTTTAGAGAATATTTTCATTAATTCTATGCATTCTTTACTTGCATCTTTATTACTTAATACAAAATTGGGGTTATCCAAAATACAATTAGTACCCTCCGTAGCTATTATGCTATCAATATTCAAATATCCAATTTGCTTTGATATTTTAATCTCATTAACTCCACTTTTTAATTTTAAGCTTCCGATTTCCTTGATAGCAAATTCACTAGCTATATCAAATACTACACTTCCATAGCATTCATCGTTAATAAAAATTCTGTGTACAGATTGTTTCTGAAATGCTGATTCAGAGATTTCTTCAGGAACCGATAATTCGGCTTTTGAAGGTAGAGCATACTTAATTCCTAATCTGTAAAATCCATTATGTGATAAATTAATTTCAGCAGTTATATAATCATCATCATTTATTAATGCAATTAAGTATCCTGAACCGCTATATCCTTTAATTATATTATTTGTTTTACCATTTATATGCTTAATCTCTGTAGAATCTATTATTATAAGGTTATTTATCATAGATTTTCTCCCTAATTCATAAATAATTAGTCTTTTAAAATTTCAGCGCTACATCACTGCCTTAAGTGTCCATGACCTAGATGAAAAATCCCCTATAACTCCTATAGTATCAATTCCAGAGTACATTAACTCATCGCCACCTGCCTCAATATCAGTACCTTCAATTATATAAGTATATTCTGGATTTAGTCCCTTAAATCTTATTTTTTTCGGAAATCTATTAGGTACTTCCAAGTCTTTAAAATAGAATGCAACCGCTTCAGTCTTATCTTCTGTAACATAAATCCAAGCAGTTTCATTCCCCTCAAACGGGCTTAGAATTCTATAAAATTCTCCGTATTGAATAATATGACGTATTTCTTTATAGCGAGCTACTTGAGATTTAACAATTTCTTTTTCCTCGTCTGAAAACTTTGTTAAATCAAGTTCGTACCCAAAATTTCCCGACATAGCCACATTACCTCTAGTTTCCAAGCTTGTTGTCCTTCCAACTTGGTGATTAGGACATGCTGATATATGAGATCCCATTGAACTTGCTGGGTATACTAAACTTGTTCCATATTGAATTTTTAATCTTTCTATCGCATCTGTGTCATCACTAGTCCAAGTTTGCGGCATATAATATAGCATTCCTGGATCAAATCTTCCACCTCCAGAGGCACAACTTTCAAATAGCACTTCTGAGAACTTACTTGTTATGGTCTCAAGAATATAATAAAGCCCTAGTACATATCTATGAGATATTTCCATTTGCTTGTTACTAGAAAGTCTGGCTGACCCTAAATCAGATATCCCCCTATTCATATCCCACTTAACATAAGATATAGGCGCACTTTCTAATATATTAGAAAGCATTTTTATAACTTCATCACATACATCCTCTCTACTTAAATCAAGAACAAGTTGAGATCTCTGAGCTCTTGATTGTTTTGGTTCTCTATTTGGTACATGAATACACCAATCCGGATGCGCACGATATAAATCACTATCAGGAGATACCATTTCTGGTTCAAACCACAATCCAAAACGGATCCCTTTATCAGTAATCTTATTTACAAGACTGTCTAACCCATCTGGTAATTTTCTCTTATCTACTACCCAATCACCCAAAGAGCAGTTATCCGAGTCTCTCTTCCCAAACCATCCATCATCTAATACAAAAAGTTCCATTCCAAATTCTTTCCCTGCCATAGCAATTTCTTCAATCTTCTCAGTAGTAAAATCAAAATAGGTAGCCTCCCAATTATTGATAAGTATTGGTCTTTCTTCAAACTGATATTTACCACGACATAATCTTTCACGATATAGTTTATGATATGTTCTCGACATTTCACCTAATCCAGCATCAGAATATACCATAACAACTTCCGGCGTTTCAAAAATATCTCCCGCAGATAATTCCCAATTAAAATCAAATGGATTTATTCCCATCTGTACTCTTGTGCAGCTAAATTGATCTACCTCTACTTCTGCTAAAAAGTTTCCACTATATACTAAGCTAAAACCATATACTTCACCATTATCTTCATTTGCATTCTTTGTTAAAAGAGCTAAGAAAGGATTCTGCTGGTGACTGCTTATACCCTTCTTACTATCTATAGACTGCATCCCCGGTACCAATGCTCTCTTATAAATATGCTTCTCACGACACCACGCACCTGATAATTGTAACAAATCAAAATTAGAATGTTCTAAATCGATGCTAAAACTTAAAGCTTTTCGCAAATTTATCTTCTCTCCATTAGACTTGCTATTTACAAACTTAGCATATCTAGTTATAACATTATAACTATTGTAAATAGTATACGATAAGATAACCGTAAGCCCCGCAATATCATCACTTAATTTTACTTCTAGTGTTTCAGCTTCCTCTTTATTATTAACATAAGTTGCAGGTAAAGACTCCATTTTAGATTTACCAGGTATAATATCATGTTTCACGTATTTTAATTCGCTAGTACTAGCTCCATTCTCAAGAATTGCTTCATAACTCGGCGTCCTAAAATCCCCTGAACCAAATGCCGAAAATTCTTGTGGTACTACATCTAAATTGGTTTCTTCAACACTAGATGAACCAATTAGTTCATAGCTATTAACTCTTTTCCCATAATATCTATGAGCTAAATAGCCCTCATTGGTTATCTGCATCACATAACTAGTTTCGTCATTGCATAAATGTAATGTTTTACTCTTTTCATTAAAACTAATAGGCATAAAATTTCTCCTCAAATATACTCTAATTTCAAATAATTTTGTATAAAATCATTTACATATACTATACCAACTTCACTACTAAGACGATATAACACAACTTACTAAAAATAGAACAAATCTATTTTGAGAATTTACTAAAACTTTAAGTTAATTTTTACACTTTCAATAAGCCATATGTTATTTACGTATAAAAATAAGGTATTACCTCCACTTCCGCAAGTAGCTGTAATACCTTATATTCTTTCTTTATGAAACTAATATTCCTAAAATAGTTATTTCTTTAGCTATATCTTCTTTAACCACATCAATCTCAATTTTATATCTCTTAACTTCATCATTCAATTCTAGTATTTCTGTTGCCGAATAATCCCCCCAACCATCTTTAAAATAAGTATCTAAAAACGCCTTGCTCTTACCATTTACACTTATATTGATTTTCCCCGAGTTTATAGAAATGCTCTTTTTATATAGTAAGATAATGTTTCTCGCCTCTAACTCAAAAACTAGTTTTCCATCTTTATTATCTTCTATAGTATATTTCCACCCATTTTGAAACACTTGAAATCCTTCATCATAATTATCAAATCCTAACATTTCTTTTGGTTGAATATTAGCATTATTAATTATTATTCCATTTATATATCTATCTTCGAACACACAAGGAGTATTTAAAACATTATCTGTTATTTTTATAATCTCTCCATCTTTATAATCATTTTCGTAGATATTATTAATAAAATTTATCAATAGTTTAGATATTATATAATGACCATCATCATTAGGATGAACTTCATCAGTTAGTACATCAGCAAATTTCATTTTGTTTTTCACAACCTCACACTTTAGAGCATCTCTAAAGCTTATCATCGGTAAGTTATATCGCTTTCCTATTTCTATTTGTTGTTCTTGAACATTAAACCCTGTATCCATCGTCATAAATACTTCTACTACTGCCGGTTTATTTTCGCTAGTTAATAATTTTCTAATTAAGCTTTCGTAAGCAACCTTACACGAATAATCTTCATGGTCATTAACTGCGGCATCAACAAAAACTATATCTGGATTTTGACTTATCACCTGCTTTTCTACTCTGTGAACTCCTATTAAAGATCCAGTTGCTCCAACTCCTGCATTTATATACCTAACATTTACCCTATTGAATTTTTCTTTTAACCAAGCATAAGTTAATTCAACATAACATTTTTCCTGAACTGTTGAGTTACATCCCTGAGTTATAGATCCTCCTAAAAATGCTACTATTAAATCTTCTCCCATCTCAGCTTTTCTGATTAACTTTAATATTCTACTTATATCCCCTTTATTAACTAATGAACTTTTAAACATTTCTTCTGTTATTTTAAATCCCATGTGTCATCCTCCTAACAACTTTTTATATAATTAGTTAATTTAATAGTATTTTAACTTAATCGTTTAAATAATAATTGAATAATTTTATTAATATAAGTATAATGATTTTTACAAACATAAACTTGCATTATTTCAGCAAATAATAACACTATTTTGATATTAAATTGAATTTATAATAAAGAGGTATTATTTATGAATAAAAATATTTTTAAAGAAACTATGTATTCTATTGATTCTATGTTCCCTTTATATAGTTCTGATATTAATATTGGGGATAAAACAAACCCGCTTAATTGCCACTGGCATGATGAAATTGAGTTTATTTTAATAACTTACGGAAAAGCCATATTTCAAATAGAAAATTCCTCCTACGAAGTAAACGAAGGCGATATCATCATCATTGGATCTGGCGAACTTCATTCTGCTTACAGCCAAAGTTTTAGTGATTCGTGTATTTGCAAATCATTGACTTTTAATTGTGATATGCTAAGCAGTAAAAGTAGTGACTCGATTCAGATTAAATTTATTAATCCGTTAATAAATAATCAATTGAACTTGCCTCATCATTTAAAATGCATCAACGATAACGAAAGAATGATAAGATCATTTTTATTAGAATTAATGTCTACACTGAGTATTAGAGAAAGCAACTTTGAATTAACAGCTAAATCATATCTATATATGATTTTTTCAAAAATAATGCTTATGGTTACTCATAAGAATGCTAATGAGTTATTAAATGTTAGTAATTCAAATAAGATAGATAACTTTAAACATGTTCTTAACTATATTCATATGAACCACAATAAATCCCTTACAATAAAAGAGCTATCTCTGCAACTTAATATGAGCGAAGGGCATTTCTGTCGTATCTTTAAATCCCTAACTTTTAAAACTCCTATTGATTACATTAACTACTATAGAACGACAAAAGCGCAGGAGTTTCTAATAAATAGCGATAAAAAAATCTTAGAAATCAGCATTGATGTTGGTTTCAATAATCTTAGCTACTTCATAAATATTTTCAAGAAGAACACTGGCTTTACGCCATCTGAATTTAGAAAAAAAATACATCTTCGTGGAGCAGACATTGAAAAATGAGCTACTATATATCTTATATAATATTTTTAAGACATATTGTAATTATTTACACAGTTGTGTTATTATTTATTATATAAGCTAAACGTTAATTCTTAATTTAATCAATCTTTTTTGCTAATACATACTTCCAATGCAAGCTACATCAAAATTTCATAATACTTAAATTTACGTATAATACTGTGAATATATAGGGGGGACAAACAAATGATTAAAAAAATACTGAAACCAGAGAAAATTAAAGTAAAAATCTTCAATTTTAAGCCAACAAATCTCAAATATATTAGCTTAAATTTTGTAAAAGAAAAATTCAAAAATCTATCTATCCGCAAAAAGTTATTTTACTCTTTTATGTTAGTATCATTTATCGGAATAATTTCCGGATTAATAGGTTTGACATTTATACAAAAAACAACCAGTGAATATAATTCTGCTTTAAAAAATTACGGTTTTTCTCAAGGTGACATAGGAAAACTTGGAATAGAAATAGAAAAGTCTAATTCTTTAGTTAGAGATACTTTATTTTTAACTGATGCTAATGAACAAAAGGATGCCAAAAATTCTCTTAATAAATCTTTAGATGAAATCGAAGAATTACTAAATACTGTTACAAAATCTGTTACTTCAAACGAAGAAAAGGAAATATTAAACAGAATAAAGATCAATCTAGCAGCATATAAACAAATCAGAACCACTGTTGTTGTAAAAGGTTTAGCAAACGATAAAGAATCTGGGCTAAAAATCTTCAAATCTGACGGATCCATCTTGATGAATAAAATCAGCTCTGATATTTCACTCTTATTGCAAACTAAAATAGATACATGTAATCTACTTTCTACCAAATTAAATGTTTTAAAATTTGTTAGTATAATAATTGTTATTACAAGCATGATAATTTCTTTAATTCTTGGCATTTTCTTAGGCAAAAATATTATAAAATCGATCAGCAACCCTATTGATCATATGAAAAATGTGGCAAGCGAAATGGCTAATGGTAACCTTGAAGTTTCAATAGATATATCTTCAAATGATGAAATAGGTGAATTAGCTTTATCTTTCTCTCAAATGATTAAAACTCTAAAAAAATATATCACTGAAATATCTACCGTTTTAGGCAGCATTTCTAATAGAAATTTTGATATCCATATTATAGAGGATTACAAAGGAAATTTTATTCAAATAAGGAGTTCACTGGACAATATAGTAGCATCCCTAAGCAATGTATTCTTAGAGATTAAAGATGCAACTATGCAAGTCAATAGCGGAGCTAGTCAAGTAGCTAGCACTTCGCAGATTATATCAGAAGGGGCAACCGAACAAGCTAATTCTATCGAAGAATTATCTGCCTCTATAGAAACGATTTACAACCAAGTTCAAAATACTGTGACTACTGCGGATAATACTAATTTAATTACCATGAACTTAGTTAAAAGTATACAAAATAGCAATTCCCAAATGAATCAAATGCTTTCTGCCATAAATGATATAGAAAGATCTTCTAAATACATCAGCAATATAATTAAGGCAATTAGCGATATAGCAACCAAGACGAATCTTCTTGCTTTAAATGCTGCCATCGAAGCCGCACGAGCTGGTGAGGCCGGTAAAGGTTTTTCCGTAGTAGCCGATGAAGTAAGAAAATTATCTTTCCAAAGTGCAGATGCAGCTAAACAAACATCATTACTTATTACCGATTCTATCAAAGCTGTAAATAAAGGTAGAGATTTAGCCAATAGCACTGCTAAAACCTTATTGGAAGTAGTTAATAGTGCGACTAATGTTACCGAATTAATATCAAATATTACATCAGTCTCTAAGGCCCAGGCCAATTCAATCGATCAAATACATGATAGTATTTTAAAAATATCTGATGTTGTTCAATCAAATTCAGCAATAGCTGAAGAAAGTGCAGCTTCTAGCGAAGAATTGACTGCCCAAGCTGAAACTTTAAATACAATGGTAGATAAATTTAAGCTTAAATCCTAAATAAAAAATAAAACAGCCGATAGCTTTACTGAATGTAAAACTATCGGTTAAATAATATAATAAAACTTATATTCCTTTTTCACTTTTATTCATAAGAATACCGGTGCAAATGGGCTAATTTCTCTTATGAATTTTTCTCTAATTTTCTAACGGTATCTTTTATAACAATTTTTCCACTTATAACTCTTCTTGCCGGCTTATAATCGCTCTTATGCACTTTCTTTATTATAGCACCAACTGCCGTTTCAGCCATTGTCTTCATATCTACTTCAAAAGTTGTGATCCCTAGGTCTGATAGTTCTGTTATTAAATAGTTATCAAATCCAACTACAGAAACATCTTCCGGAACCCTATATCCTTTATCCTGTAATTTCTTAATCAAAATATATGCTACTGAATCACAATTACATGCAAAAGCAGTTGGCATTTTCTCTGGCAACTCAAAATTTATTTCTTTCTTGCCAACTTCTCTATCTGAAATATAACTATTTGAAGAATAATCAATACCATTTTCAAGTAAAGCCTTTGTAAATCCTAAAAAACGATCTTGAATGCTGCTTGTAGCACGTGGATTTCCCACAAACGCAATATCCCTATGACCCATTTCAATTAAGTAATCAGTTAATAAATACATCCCATAATAATTATCCGAAGTTACAGTATCATAGTTACTATTTTTATCGTATGTATCTAACATCACTACAGGAACTTCATTAAGATCATTTAGATATTCTATATAATCTTTTTTCAATTCTCCCATAACTATAATTCCATCTATTTTACTATCTTGTATCATTCTTGGAATTACTTTATTATTCTCATCATCTTTGCTTATAATCTCCATAATCCCATAATATGATTTTTTAGTTAATGCATTTAATACATTTTGATACAACTCCCAATAAAAGGAATGATACATTTCAACAAATCCTTCTGGAATTAGCACCCCTACATTATAGGTATGACCATCTCTTAGAGATCGCGCAATAGAATTTTGACGATATCCCATTTCAGCCGCAACTTTCTTAATTTTTTCCCTTAATTCATCACTAACTCCCTCTTTGTCAGATAGTGCTTTAGAAACTGTAACTTTACTTACATTTAAAGCTTCAGCAATGTCAGCAAGTTTAATAGCTTTTGCCATATTATTCATTCCCTTCTTGTTGTACGTATAACTAACTATCATAGATGCTGCTTGTTTACTAACTATGATAACATACGGAATTATCAAAATATTTCAATGTTGTTGTTAAACTAATCCACTTAATAAATTTTATAACTTTACACATCTGCATTTAATATATTATAGAATGATGGGTATTATTTAATATTTTCATATGATAAAAAAGTTTTTAACTATAATTATTGTCTATAGTCAAAATTTTATCCTATTCTGTTTAAAAAATTTTCTGCTCACATTCAATTCTTTATAAATAAACGGTTAGTTTTCATTGTTCTCTAATATTATACAATAAAAAGCTGTATTTCGCTATTAGTATATACTCCAAAAGAGTTTTAATTTTAGCTAAAATACAGCTTAATAATTTTATTTTTTTATATTCTTACTTCACTTTTTAAAGTTAACGTTTAAGCGGTTTATAAAAAAAATTATAATTTCCCCTCTTGCCCGTCTATCGTTCCAAGTTCATTATGTTCTTTAATAAACTTAACAGTTTCATCAATAGTAGTATACGCCAAACCTACATACGTATCCGCAGCACCATAGTAAATAGCAATTCTTCCTGTTTCAGCATCAGTTAACGCCGCACATGGGAATACTACATTGGCTACAAATCCACGTTCTTCATACCATTCTTCTGGTGTTAACATTATACTTCCAGCCCTATATAACACTTTCGATGGACAATTCTTATCTAATATTGCTGCACTCATAGAGTAAACCAAACCGTTGCATGTTCCAGTTACACCATGATAGAACATTAACCATCCTTCATCTGTTTCAATTGGTGCAGGTCCACATCCAATTTTAATTGACTGCCACCATCCTGATCCACCTTTTTGCATTACTATTCTATGTTCTCCCCAATATTTTAAATCAGGACTTTTGCTTAAGAATACATCCCCAAATGGAGTATGTCCATTATCACTTGGTCTAGATAACATAACATATTTTCCATCAATTTTCTTAGGAAATAAAACTCCATTACGGTTAAATGGCAAGAATGGATTTTCTAGCCTTATAAATGTCTTAAAATCAGTTGTTTTAGCTATTCCAATTGCGGCACCATCAAAATCCCCACACCAAATAATATAATACGCGTCCTCGATTTTTAGTAATCTCGGGTCATATGCATAAAGTGGTTGATAATCACTTCCTTCTTCATCTACAAATTTTATTTTTTCTTTTTCAAACTCCCAATTAATTGCGTCTTTACTATACCCTAAATAAATATGTGGTCTTCCATTTATAGTTTCGCCACGGAACACTCCTATAAATTCATCCCCGTAAGGCATAACTGCACTATTAAATATTCTAGCTACACCCTCTACAGGATTTCTTTTAATAATTGGATTTTCAGTATGTCTCCATATTGGACCATTAAAGTTAGCTGGCTTCTCTTGCCATGGAATATTAGGTAGATTTTCGCCTAAAATTTTACTCATATTAATTCACCTTTCTTAAGAAATAGCCAATCTTATGTATTATTATATAATTCTAAAGAATACACAATTTAAATAGCATTTCTTTCTATATTTATTATTATTTATTAGCTTTGTTATAAATATAATTAACTTATTCTTTTACTGACCCCGATACTAATCCACTATAAATATGCTTTTGTAAAGTAACAAATACAAGAAACGTTGGAATCATAATCAGAACAATACCCGCAGATATTATTTCCCAATGGGCACCAAATGGTCCTTTAAACTTAAATAACGTTGTTGATAACGTTAATAAGTTTGGGCTAGGTGTATATAAAAATGGTGTGTAGAAATCATTATAAACACCAACACCTTTTGTGATCATAACTGTTACTATTGCTGGGGAAAGTAGCGGAAGAATAATTTTATAATAAATTTGAAAATAGTTTGCACCGTCAATAATCGCAGATTCATCTAACGAAACTGAAATATTCTCTAAAAATTGTAGGAATATATATATAGTCATTATATCAGTACCGCAATATAGAATAATCGGAGCTAATCTAGTATTAAATAAACCTAACGCATGTACAATCTGGAATGTTGCAACTTGTGTTGCTATCCCTGGGATCATTACAGCAACTAGGAACATACCATTTACAAATTTACTAAACTTTGTTTTAAATCTGCTAAATATATATGCTGCCATAGATCCAGTAAGTATTGAACCTATTAATGAAAATACTAAAACAAATAATGTATTTTTAAAACCATTTACCATATTACCTTTTTCAAAAACAGCTTTAAAATTATCTAGGTTAAGGAAAGACTTAGGTAATGCTAACGGGCTTGTAGTAGTATATTCAGCACCAGTCTTAAACGCTGTAAAAAATACACTTAATATAGGAATTATAAATACTAGTACAAAAAAGATTAAGATGGCATATTTAATTACTTCATAAACAATCTCTTTTCCACTCATTTTTTTTATTTTCATACTCTTACATTCCTTTCCTTTTCATAACTGCATTTTGTATACTAGTGATAATTATACACATCACCAATAATACCATTGCCATTGCTGAAGCCAGTCCCACTCTATGATTTTGGAATGCATACTGTACTGTTTGAACTATGAAAGTCATACTTCCATTTCCACCATTGGTCATTATATATGGTGTTTCAAACGCTGATAATGCTCCCGTAAGCGATAAAAATAATTGTAAACCTAGAATTGTTGAAATACCTGGTATTATTATATATCTAAGTTGTTGGAATCTATTAGCTCCATCTACTCTTGCCGCTTCTAATACATCTGATGAAATAGATGCCATTGCAGCCGAAAACATTACGATATTATACCCAATATATCTCCATACCGATACAGAAGCTAATGATATATTAACAATACTCGGATCCTGTAACCAATATTTCACGAAACCACCTAAACCAACTGCCTTTAAAACAGTATCTAAAGTACTATTAGGTTGGAAAAAGAATATAAAAACAAAACTAATTGCAACACTATTTATTAGCGATGGGAAGAAATATACACCTTTAAATAAATTTGAAAATTTACACCCAAATGAAACTAAGTATGCAATTGCAATACCTAATGCGATTTGAATAAATGATGCAACAAAATAATATATACTTACAAATAACGGCTCAAAATATTTAGCATCACTCATTATAAGCTTATAATTTTTAAGCCCTACAAATTCCTTAGTCTTAGAAATACCATCCCAGTCAGTAAAACTATATCCAGCCATTGCTATTGCCGGCAAATAAGTAAAGACCCCTAATAGCACTACTGGTACCATTAAAAACAGGACTGAAATTATAATTTTTTGTTTGTTATAAGGCAATGAGGAGAAGCTAATTTTTTTCTTCATATTGCTGGTGCTAGTTGCTGCATTTTTAGTATTTGCTGTGGCATTTTCAGCATTTGTTGTCATAATAACACTCCTTTACTCTTAAATTAAAAAGGCGATATTTTATTTTATTTTATATCGCCTCTTTTCAAATTAATGACTACATTAAAATACTATTTACCAATTGAGTCTACTCCAGTAGCCCAGCTCTTATTTAATGAAGCCATATATTCATCAAAAGTTTGTTTTCCATTTCCTAAACCAATCTCGATTACTGTTTTAACCCATTTACTATCATTAAGGTTAAGTAATGATTCTTTTTGAACTTTATCTAAATCAGTAGCCATTTGTGTAGTACCCATCTTTTGTTGAACTAATGTTGCTCCTGATCCCGCTAAGAAGTCTGGTAATTTTGCACCAACAAGAGAAGAAACCATATCCGAATCGTTTGGATATTTTTCTACGAAGTATTTTACGAATTCTTTTGCAATTTCTTTGTTTTTACTATGAACATTTACACCCATCATATAATCTGGGGCAATTTGCATAACTTGTTTTCCATCATGTGATATAGGTGATGCCATAAATTTAATGTCTTCAGGTGTTTTAGATTTAGCCTTCATTTGACCAACTGCCCATGATCCTAAAGCCATTACACCTATTTTACCATCAGCCACCATTTGCTTTGAAGTCTCCCAATCTGTTGTCATCGGATCTTCTTCAACTAACTTATTAGCAACTGCATCATTTAATAACTTTAATGATGTATAAAGAGGTTGCCCTTGTGCAAATACATTTTTGTCATAAATCATTTTATTTGGGTAGTCTACATCACCTGACGCGTTAACTAATTGCGCCGCAGACCAGTTAGTTAAGGCCCAGTCATCTTTATAGTTTGTATACAACGGAACTGCAGATGTCTTTTCTTTAATGGCTTTTAAATCAGCTATAAATTCCTCTGGTGTTGTTGGAGTTTTTGTAATTCCAGCTTCTTTAAAAACCTTTTCATTATAGAGAAAACCATTAGCGTTAGCACCAGTTGCTAAACCATAAACAGTTCCATCTACATCTGCATTATCAAGGAAATCATATTTACCTTTAAGTTCTTCTCTAGTTCCAATTGGTTCATAATAATCTTTATATTGATTTTTAGTTATGTTAGCAGGCAGCATTAAAACATCACCATAATCTTCTGTTCCCATTCTAGTGTTCATTGTATTTTGATAATCGTTTAACGCTTCAAAATTAACTGTGGTTCCAGGATGTTTTGATTCAAACTCTTCTTTATACTTTTTAAAAGTTTCGTCCATGTCAGTTCTATGTGTTAATACTGTAATAGTTCCACCTGATTTTCCATCACTTTTTTCTGATGTACTAGTACTACTGCTACCACAACCAGTTAGCGATCCAACAGCTACCGCACCAGCTAATAATAATGCTAATTGTTTACGGATTTTCATTTTTCATACCCCTCTTCTTTTTTAAGTTAACGTTTAATCGTTTTTAGTATATATTCAAATTTAATAATTGTCAATATTGTTAACCATAATTATATTACTTTTTAATTAATTTAACTTAAATTTTAATTATTTTTTTAATTTTTAATTAACTGATTTTTATTACTCTCTATCCTCACTATTCCTCTCCTTTTCACTTTTTAATATATATTTTTTTATTCCATTCCAAGCACAAAAACTGCTCAAAAAACTGCAAAAAGAAAATGTGAACAAAAAAAGAACAGGTATAGTGAATGGACAAAATAAAATACTTATTTCTATCAGAGATATGCATAGTATGAATGTTACGAAAGAATATTTCATATACACAACACTTAATAACATTGAATTTTTAACAATTATCCTTAATGGTAAATTTATACCTATACTCATTGGATATATGTATATCAATATAATACAGAAGAATACTCCTATTATGATAATAATTGAAGTTGCTATACTATAAATTAATCCTTCTGTGGCAATACAAAAAGCTAACAACCTTCCTAGAATCACCATAAATGCAATCACAAATAACCAAACAAGAAAGGATTGCTTCCAATTTTTCTTAAAAGCCTCCTTAAAATCATAAAATAAATCTGAAGGTTTATCCTGCACCATTTTAACTGTTACTGATGTCATCGCTCCAATAGCGGGGCCAATAGATATAATAGGAATACAGCAAATTATAAACAGTAAGTTCAATTTAAGCAGCGACGTAAATTCACGCGCAAATATATCCCCAAATAAGGCCAACCCCTCTTTAGGTGGTGCATCTTTGGAAACACCTCGCCCCTCTTTATTGTAATCAAAGAAAAATAATCTCATAATGTACCTCCAAGTTAGGTTCCAACCTTAAAGCATTAAATTTATAATAAAGCTACTTTACCCGTTATAAGTTCTATACAGCCAAAGATTGTCTACCAATTATCCAAGGATGTTCAATATAGAAAAATAGTTGGTAGCACGAACAAAAAATACCACATTAAATTCATCAAGCTAATCTATACACTTAGATTTTATCAACCACTCTATTACTTTTTCCAACTAGATTAATTTCAAATTGTTTTTCCTGTCTCTTTATCAAATAAATGCATCTTATATTCATCAATTGCTATCTTTATCATATCTCCAGGTTCTACTATAGTATCTGCTGATGCGCGAACAGTTAACTTTTTGTTATCGAAATTAACATAAAGATACACCTCCGCTCCCATTAATTCATGAAGGGCTATTTGAGCATATAGTGTACTTTTTTCATGTTCTTCTAAGAAATCATTTTCTATGCTGACATGTTCTGGCCTAATTCCAAGAACAACATTTTTTCCAATATACGCTTCAAAAGTTTCCTCTCCACCTTTACTTTTAGGAATAGGAATACTATAGCTATTGAATTTTACATAGTATTGGTTATCTTTCTTTTCAATAGTTGAATCTATAAAATTCATTTGTGGCGAGCCGATAAATCCAGCAACAAATAGGTTAGCCGGCGAATCATACAACACCATTGGCTTATCTGCTTGTTGTATGTATCCATCTTTCATAACTACAATTCTATCTCCCATTGTCATTGCCTCAATTTGGTCATGAGTAACATATACAAAGGTAACGCCTAATCTCCTATGGAGTTTAGTAATCTCCCCTCTCATTTCAGCACGAAGCTTTGCGTCTAAATTCGAAAGCGGTTCATCTAAAAGAAAAACTGCTGGTTCACGAACCATGGCTCGTCCTAAAGCTACACGCTGTCTTTGACCTCCTGAAAGCTCCTTAGGCTTACGGTTAAGTAAATACTCAATATCTAATATTTTAGCTACTTCTCTAGTTTTCTTATCTATTAATTTCTTATCTTCTTTTCTTAAATTTAGTCCAAATCCAATATTTTTGTATACAGTCATATGAGGATATAATGCATAATTTTGGAATACCATTGAAATATCTCTATCCTTAGGTGAAACCTCATTAACTAAATTATCCCCAATATATAGTTCCCCTCCACTTATATCTTCTAATCCTGCAATCATTCTTAACGTAGTAGATTTACCACATCCTGATGGTCCAACAAGAACAATGAATTCACCATCATTAATTTCTAAGTTTAGATTTTTCACAACGGTTGCATTACCAGGATAAGTTTTTTCTATATTTTTAAGTGAAATACCAGCCATATTACGCCTCCTTATTTACTTGCGTTGTTATTAAAACTATTTTTAGGTTAACGTTAATCTAAGAATAAATATAATATACCATTGTTTTTTAATCCTTACAAGATTTAAAATATTATTTTTTGTAGTTTTTAGCCCTTCAATAATTTTTAATCTTTATTGCTTACTATAGAATTTTCCGCTATCACTCTTAGTAAACGTTCTCTTATAAATAGTTTATTTATTCTATCAATGCACGTCAATGTTATAGTAATAATAGTTTATTTTTCGGTATTTTTGCATATTTCTATGTTAACTTTATAATCTAATTAACTTAATTTTTACAGAACAAAATTATTTAATTTGTTAATAATATTTTTTTACTGTAAGATCTTTATAAGACCTAATGATTAATACATTTTAATTTAAAATTACCTTATCGTTATACTTTTTTTAACTTTTTTCTATTGACTTTCATAGTGATACTAAATTATAATTTGTCTGTAAACGATATATCATAAAAGGGGGAAAAGTTATGATTAAAAAATCTAAATTCAAACTAATGCTATCTTTGTTACTGTTATCTGCATCGATGTTTACTTTCCAAACTCCAGCTAGTGCAGCCGAAGACTCTTCATGGATAAGAGGCACGAACGTACCTGCAATCTGGTATGATAGTCAATCCTATTCTTCTCTTAACAAAATTAATAGCGAAGGATTTAATACTGTTCGTCTTGTTTGGAGCACTAGCGGATCAACTTCTAGATTAAATGATTACCTTACAAAATGTGATAATTTAGGCCTTAAAGCAATAGTTGAACTTCATGATGCAACTGGTGGTACCACTACAGACTCGTTGAATACTTGTGTGAATTATTGGGTTCGTAGTGATGTATTAACAGTAATGAAGAATCACCCTAAGGCTTGGTTAAATATCGCTAATGAATGGGGTCCAGCTAACAGTAGTGTTTGGAGAGATGGTTACAAAAGTTCTATAAGTAAAATACGTACTGCTGGTTATACTGGTACTATTGTAATCGATGCAGGCGGATGGGGACAAGATAGCAGTGATATACTAAACTATGCTCTTGATGTGTACAATTCCAATACTAATAAGAATGTTATATTCAGCATACACATGTATGGATCATGGAATAGCAATTCAGATATAGATTCATTCTTAAGCAGTTGTAAAAGTAAAGGTATTCCAATTATAGTTGGAGAATTTGGTTATAACTATAATAGCGGTAATAATAATCTTGGTTGCAAAGTTGATGTTGCTCATTTATTAAGTTATTGCAAACAAAATAAAATCGGATTTATCGCATGGTCTTGGGCTGGAAATGATTCTGCAAATTCATGGCTAGATATGACTAATTCTTGGGGAAGTTATACTTGGTGGGGACAATATGTAAAAGATAATATGTGGTAATATATTATAATCACATATGTTGCTTTGTCTCGGATATAAAGAGATATTATTCTATAGATATTCCATGAGAAAGGAAATCATATTTTGAGCCAATAGCAATTTTAAACTACAAAAAAGAATTAGTGAGCTAACTAAAAAAATACTTATTTTCTATGAACGTCTCTATGATACAGATATCTTCCATTGAAATTTAAAATAATATTTAAAGAAAAATTACCGAAACGTTAGCAATATAGTTTATCCATTTTGTGAAATTACCTTGCTTCCATGAAAAACAAATATATTTCTTCATTCAAAAATTAATTGCTTAATTATATTATAAAAAATTTAAGTAAAACCTTATAATTTGCAACTATATAAAATTCATAAACATATTTTACAAAAGGAAGTAAAAGTAAGTTAATCAAATCTTATTCTTTTCATTTCCCAAAATATACCTTTCTATTCAACAAAAAAATCAGAACTTTTTCAACAAGTCCTGATTTTTTATTTATATAGGCTTAAATATCTTATAAAACTGTAACTATTTCAAAGATCAACCGTTTTAGTCAATTTGTGCTTAGCATAAATACAATTACAATTTTTCTCTTCTCTTTGTAGACTTTATAAATATCTCTTTTAAGGTTTCTTTGTCATCATATTTTATTGCATCTTTTATTTTATCCAATTCTACTTCAAAATTTTCAATGCTTTTTAATAGATTCTCTTTATTTCCAAGGAAAAGTTCGCTCCAAAGGTCTTCATTTATATTAGCAATTCTTGTTAAATCACGATAGCTATCTCCAATGAAACTTCCTGTTTCTCTTCCTTCAACATCGCTATTTACCAGTGCTACAGCTAATGAGTGCGGAAGCTGACTTGTGTAGCCAATCATTTCATCATGATATTCTGGAGTTATTCTTTTAACTCTTTTAAATCCAATTTTATAAATCAGATCTTCCACCATGTTTAAATTTTCTTCTTTATTTCTAGAAACCGGTGTTAAAATATAATTTGCACCTTTAAAAACTTGGCTACTTGCAAAATCTATTCCTTTTTTCTCCCGTCCTGCCATTGGATGGCCAAAAACAAAATCTATATTCTCCGGTAATATATTTACAATATCTTCGATAAACATTTTTTTAATTCCAGTAGCATCTGTTATTACTGCACCATCTTTAAAATTGCTTTTATTATCTATAATAAATTGCTTAACTAATCTAGGATATAATGAAATAATTATAAAATCTGCACTCTTAATTACTTCTTCTCCGTTTTTAAAACCTTCTCTTATTAATCCAAGTTTTTTAGCTTTTTCTAAAGATTCTTCATTAATATCTATTCCATAAACATCATTATACCCAGCTTCTTTTAAAGCCATAGTAAATGATCCCCCAATTACTCCAAGACCTACAACTACTATTTTCATATCTATAGCTCCCTTATATAGAATTAGATTTGCTTATCTTCAATTTTTGCTACAGCTTTAACTTTACTCATTAGCACATCAAATTGATCTGGAGTAAGTGATTGTTGTCCATCGCTTAGTGCATTTTCAGGATCGTTATGAACTTCTATCATAAGTCCATCAGCCCCAGCTATAATTGCTGCTTTTGCCATTGGTTCTACCAAGTAAGCATAACCTCCTGCATGACTTGGATCAACAATTATAGGTAAATGTGATAATTTCTTAATTACTGGAACTGCTTGCAAATCTAATGTATTTCTTGTAATAGTTTCGAATGTTCTTACTCCTCTTTCACAAAGAATTACATTTTCATTTCCACCTGCCATGATATATTCTGCTGACATAAGCCATTCTTCTATAGTTGCAGATAAACCTCTCTTTAATAGTATTGGTTTATTTGTTTTACCTATTTGCTTTAATAGATCGAAATTTTGCATATTTCTAGCACCAATTTGAATCATATCCACTTCTTCAACAAAAGTATCTACGTAATCAGTAGACATAAGTTCTGTTACTATAGGAAGTCCTGTTTCCTGTTTTGCTGTTTTTAAAAGTTTTAATCCTTCAAGCTCTAAACCTTGAAAGCTATAAGGTGAAGTTCTTGGTTTAAATGCCCCACCTCTTAAAAAGTTGGCTCCTGCTGCTTTTACTCTTTTTGCGATTTCAACAATTTGTTCTTCACTTTCTACAGAACACGGTCCAGCCATTATTCCAAGTCTCCCACCGCCAACTATTGATCCCTCAATATTAACTACTGTATCTTCAGGCTTAAATATCCTGTTTGCTTTCTTAAAAGGTTCTTGAACCTTCATAACCCTATCTACACCTTTTAATACTTGTAATTTTTTAGGATCTAATATTGAAGTATCACCAACAGCTCCTACTATAAAATATGTGTCTCCTTTAGAAAGATGAGC +>8_1#NODE_18_length_37282_cov_67.8523_ID_35 +ACAATCACAGTGCCTGTAGTCTTGCCTGGCTTAGTTACATCTGGAGTCTTCTCCCAAGTGTACTTAGTGCCGCTTGGCATATCATCCTTGTTCTTAATGCCCTTTTCTGCTGGTGGTACTACACCTGGGGTAGTGTGAACATCTTGACCTTCTGGGGTGTACTTGTCAGCATCAGTTGGAGTTGTAGTCTTAGTGTAAGCAACTACCACATTGGTATTTGGTGTGTTAGCATTTACATTCTTATCAGCTAAAACAACTTTTGCATCTTTGCCATCAACGGTTGAAGTATAGTCAGTTACTTGTGGCACATTGTATTCTGGCCAAGTGCCTTCAGTTGCTCCAGTTGACTTCCAAGTATTGCCATCTTTTACGCCAACAGTCCATGGAGTATCAGCTTCTACATTTTCACCTTCACCGCCACGAACAAAGTGAACAGTTTGAACAATCGTTTCAGTTGTTCCATCAACCTTATTAACAGTAATCGTACGAACAACGTCTTTGTTCATGTCCTTATTATTTGGATCATATGGTGTTGGAGTTACTGTATTACTCTTTTGGTAAGTAATATGAACTGCTTCACCATCTATTCAATTGCGGAAGCTGATTGAATTTCAGTTGCTTTAGTGCCATTTACGTATGAATCGTAACTAGAAATTTGTTCTACTGCTGCAGTTGGATATTCTGCTTCGCCGCCTAAAACGAACTTGTTGTTTTCGATCTTACCAACTTCCCAAGCACCAGTTCCAGTAACTTTTCCAGTTACTAAGTCTTCAACACCGTTTCTACCAAAGTTAACAGTTTGAGTTGCAATTAATTGCTCTGCTTTGCCTGGCTTAGTTTGGTAAATCTTACGGCTAACGCTAGTAAACATGTCCTTGTACTTGTACTTAGGATCAGTTGGATTTACACCTGGAGTTGTTGGAGTAATGTCCTTTGTTTCGTGAACTAAGTTAATAACAACTGCTTCATTCTTAGCAGGAATAACTACATTTTCTGGTAATGTTTGACCTGCAGCTAGCTTGTAACCTGCTGGAATGGTTAAGTTAGTCTTTACATTAACGCCTGGCTTACCACTTACAACAATTGGGTTACCAACATTCTTCTTGTCAACAGTGTCATAGAATTGATAACTTACAGTTACATCTGCAACTGGAGTTTCACTTTGTTGGTAAGTAATGTGAACTGCTTCACCATCTACTGGCTTGCCGTTTTCAACTGCGGAAGCTGACTGAATTTCAGTTGCTTTAGTTCCATTTACATATGAATCGTAACCAGAAATTTGTTCTACTGCTGCAGTTGGATATTCTGCTTCGCCGCCTAAAACGAACTTGTTGTTTTCGATCTTACCAACTTCCCAAGCACCAGTTCCAGTAACTTTTCCAGTTACTAAGTCTTCAACACCGTTTCTACCAAAGTTAACAGTTTGAGTTGCAATTAATTGCTCTGCTTTGCCTGGTTTAGTTTGGTAAATCTTACGGCTAACACTTGTAAACATGTCCTTGTACTTAGGATCAGTTGGATCTACGCCTGGAGTTGTTGGAGTGATGTCCTTTGTTCCGTGAACTAAAGGAACAATGATTGTATTATCTGTTGTTTGATCAACTGGATAAACGAAACTTGGATCAACTTTCCAATTGGTTGGAACTTCACTATCAATATCAGCCTTCAAAGTAGAAGCCTTTAAGCCTTGATTCTTATTGAAGTTAAAGGTTAACTTGTCTTTGCCATTCTTAGTTTGCATTGGAGTATTAGTTTCAGCATCAATGTACTTAACAATTACAGTTGCATTGTAGTCTGAATCATCATGTTGCTGTTTACTTTGAGTTACTGGAACAACTAAAACTGCAGGTTGACTAGTAATATTGCTTTCAGCCGCTGGATAACTATAGCCATCTACAATACTGTAGTTAGCTGGTAAATTACTGTTAATAATATTCTTTAGTTGCTCAGCACTTAAACTTGAACCCTTGGCAATATTACCAATAGTGTAAGCTGCAACTTCTTTGCCATCATATTGGTAAGAAACTTGTTGGTTAAAGCTAGTCTTTTGATCATCTGGCTTACCTGAATTTCCATCTTTTTCAACGTTGATATTAGCCTTAACGTCAAGATAAGTACCATCGTTAAACATAATTCTTACAGTACCTGGAATATAGGCTAAAACTTCTGGTGCATTAGAATTAGTATTTTGACCATTCCAACCTTCTCTACCAAGGCTATTAACGTTAATTAAAGTACGATATTGTTATTGAGTTAAATCTGAGATATTACTATTTTGCTTAAAGTTCAATGTTGAACCTGTAGTAGCACCGTAGAAGTTGAAGTAATTATTTAGCCAATTGTTATATGCATTAACACCTAAACCTAATTTAGAAACAACATCAGCATCGTTAATTTTGAAGTTATAACGCCACTTAGAATTTCCTGGTAAACTTTCGCCTGTAGTAGTCTGCTCTGCTGGATTTACTTGACCGTTAGCTAATTGATACAAAGTATCAGCTGAACCATTAGCAAAATTACTTACATTAGTGTTTGGTGCAGAAACGCCTGGTAAATTAGTAATCCAAGAATAAGTAAATGCACTAGCTGCAAATTTATCGTTCGCATCTTTTTCGTTGACATAGTTCTTACCATCTGTACTTAGCTTATAAGTAGTTATCTTCTTGTAATCACTTGCTGAATTACCAGTACCATTCCAGTCACTGTAGAATTTAATAGTACTAATAGCACTATCATTAACCGATGGAGTGAAATCAACACTAGTCTTATGAACACTGGCTACTGGAGCAGTAAAACTAGTCATCGTTTGATCGCCATAATAAGTATTATGCTCATGATCCATACCAGTTACACTAGCTGGAACATTAACAATCTTGCTTGAGCCATCTTTGAATGTAACAACAACTGGTACATTAAAGGAACCAAGTTGAGCATTTGAATCTACTGTTTCAGTTACTTTACCTGTTGTTTGGTCAACATCCACGGTAACACCAGTTGGAGCTGTGAAAGTTCCAATCCTATAGTTAGAAACTTCATCGTTTGGCATGTCTGCTGGAATTGTTGGGTTAACAGAACCAGTATTTTGTCCTGAATCTGGACGTTCAACATTCAAATGTGGATAAGTAATGTGATATTTTTCAGAATCTTTTTCTTCTGTAGTTACATTAATAGAACTTGCTGGAATATTAATATCTAGATAAGTATTTTGTCCTTGAGCATCTTTATCATTGAAAGTAATTCTAATTACTCCGCCTTGATCTTGAACAGGCTTAGTTTGCCATGCGGTGGACTGAATTTCACTATCTGGAATATTGTTTTGAACTAAAGCCTTAAATTGTGAGCTAGTTAAATCTGAACCAGCCAAAATATCAATTGGCTCAGCTAAAGTCTTAGCACCAGCACCTTTAGCATCAATATCAAATGGGTTAGTCTTGTCAGTCTTTCCATCAATAGTGATATCAATTACGTTATTAGCAATATTCTTACCGGCAGCTGTAGCTATTTCTACAACAGTATTAGGTGTATTTTCCCAATCTGCAGTGTAATTTGTAATAGTTGTTGGCGTAGTAGCAATTTGGTGATTTTGGTCAATACTGTAAGATTGAACAGATGTAATCATGCCACTGGCAGAAATAACTTGTGAATTGTCACTTGTTTCATGTGCGTTAGCATTAGCACGACTAATTGTTACTACAGTTGCATTATCATTATTCCAAATAACAGTTTGAGTACCTTTAGTTACAACAACTGGTACATTAGCATTTTGCTTGCTGCCATCTGCGTATGTAATAGTAACAGGAATGTTGTAAGCGCCCGGTGTAGTGCTAGTTGTAGGAGTTAAAGTAACTCTACCTGTTGAAGGATCAATTTTGCTCCAAGTGGGGGTTGCTTCAGTTCCTGCAAAATGAGTACCGTCTGGAGTAGTAATTACTTCGCCAATCTTATTAGTAAAGGTTGGATCAGTTGTAGCTGTCTTTCCTTGTTCAACACTAACTGGCTTATATGAAGGAGTATATTGGTTATTTGTTTGTAATTCAGAGCCAAATTTTACACCAGAGGCATTCCACCAACTAAAGTGATTAATAAAGCTATTCAAAGCATTGGAAGCTGCAGTTTCAGTTCCTTCAGGCACAACAGAACCATCAGAGAAACTGTTCGAACCCGTCTCAGAAGGATTTGCAGTTAAAATTACTTCGTTAACACCTTTAGCTGGAGTATCAGCATTAACAGAATCAATCATATATTCTGAGCCTGGTTGATTCCAAGCACCATATTGCTTGTGGGAACGAAAGGCATAGGCAAAGTCCCCACCTTGACTAGTATTGTTTAAGTACTTGATGTTCCAAGTTACTGGATCACTAGCACCTGCTGGCACCATGGTTAATGGAGTATTATAGTTGCTATCAGCTGAATTAATCACAATGTGAGTTTGTTGGTTAGCGACATCACCAGCACCATCAGTCTTAATTAAGTAGCCTGGCTTGTTTGCTGCAGTACGTTGTAAGTTAACTAACTTAGCATTAGTGAAGTTAATGTAGTTTTGACTACTTGAACCCCACATAGTTAAAATAGCAGGCCAGCCATTTCTATATGAGCCTTTAGTTAAAGTATAGGTATCTGGTAAGTAACTTGAATATGAGGAATCTTCCAAATCTAAGCTACCACCATTCACATCAAGATGATAAGTTGCACCAAGTCCGTCCTTTGGTCCCATTGCAAGCAATGGTGAATCAGAATCAGTCACTTTAGTTCTAATAATTGAAAGTAAAGCATTTTGACCAACTCTCACCGTGCTATCAGCAGTCTTGCCATCATAATCTAAGCCAATAAGAGCAGCTGATCTCCAACCAGTGGTATTCATCTTGCTAGTTGTAACTTTTAATGAAGAATTCTCTTGTAAATCAAGATCGCTAGCATTCATAATTCCCATTGAGGAACCAGAGCCCAACTCCATCGTTACTTTAGCATTAGGCATTAAGCGAACAACACCTGAGGTTGCATTATCAATAGTTTGATTTCCTGCACCGGCACCATTAGGAATAGTAATTCCAGCCATGTTAGAAGTATTAGCACTGTTGAAATCAACTGTAGTACCATTACTTACTACAACATTGCCTGAAACGTTAATATTGCTCAAGTTATTTGAAGTACTGTTATTAGCAGCTAAAGTCGTTGATCCGTTAAGGAAGTTAACATTATTAGCTTGAATCAAGGCATTCCCAGTTGTATTTGAACTAGTTAAATTGCCTTGCAAATTATTCTTACCCTCGAAGTTAACATTAGTACCTTTATTATTAAGGAGTTGACCTGAGTCTTGACTCGTAGTAACGCCGTTAAAGGTAATTGCATCATTATTAGCAGCAGAATTCTTAAACCAGAATGGGCCATAACCACTAACAGTTTGCAAACTTAGGTCTTTTAAGACTACGTTCCAGCTATGACTAGCAGTGCTTGCTCCTTGATCATAGTGCGTATTGTCACCTAAGCTAATGTAGCGATTGCCCATGTCTAAACCATATTGATTTTGACCATCAATGGTAACTTGACGCGCACTACCATAATTATTAATTGTTTGGTAAGTATTATTACTCAAGTTAGTATTACTAAAGTCAATATTTTGATTTAATTTAATTGTATTTACGTTAGCATCTTGTAATGCATTGATGAATTGAGAATAATCACTAACTGATTGTTCTTGATTATTTGATGTGTTTTGAGCTTGAGTAATTGAGTTTGTCTGAACAGTAGACTTTACTTGCTTAGTTGCAGCTGTTTTTGCTACAGCGTTAGTGTTAGTAGTTTGCTTTACAGACTCATTTTTATTAGTTTGAACCTTATTATCAGAAACTACCTCTTTCTTTTGTACATTAGTCGTTTCTGCAGTTCTATTAGTGTCTAAAGTAATGTTTTGATTATTAGTTTGATTTTTAACTAGGCCTGGGGCTTCTTTGGAAGCTTTTGCAGTATTAGTAACATTTACTTGATTTGGATTTAAGGTATCAGCTTTTACGCTTTTACTACCTGCACCAAGACCGAAAATAGTAAAGCCAATTAAAACTGAAGCAGCTCCCACTGAAAATTTTCTAATTGAGAAACGCTCTTTTTTATTTTCCATCTTCCGAATTTTTTCTTGATAATTATTCTTAGATAACGTTATTGAATCTCCTCCGTCAATTCTACAGACCTTTCAGTGTTAACTATAGTTCTTCTTTTTATAATATTCAAGGATTTAATAACATATATTGTTTAATGCAGAACTATAAATTACGCTCTAAATATCAATAGCAAAAATTAACAGAAAATCAATATGCTATTCAGTTATTTTATGCTTAAAATCCTTGTTATAGCATAGTGTTTATACGTATAAGCATTTTATTATTGAATAGACAATTCAAAATAAAATATTGTATTTTCATTTGCAAAATATTAATTAATATTAGCCAGGGTATAAACGCCTGTTATTTTAAAACAAAAAAATAAAAATAATTAAAAAAGGAGCCGTATACTACACGACTCCCTCTTTAAATCTCTTTATCTACTAATACTATCAGCACGGCCTTTATAATCACCGTTTTCAGTTCCGATAATTAATTCTTCGCCTTCTTTAATAAAATCTGGAACAGTTACAACTAAACCAGTATCCATCGTTGCAGGTTTACCACCGCCAGCAGCAGTTGCACCCTTGATTTCTGGTTGAGTTTCTTTAACAGTCATCTTTACAGTAGATGGTAAGTTAATTCCGATTAATTTACCTTCATCAGTAAACTTAAGATCAACATCAATATTAGGCATCAAGAACTTAGCTTCTGCTGCAAGGTGTTCCTTAGGAATTAGATATTGTTCATAAGTATCAGTATCCATAAAGATAAAGTTAGCGTCATCATCATAAAGGTATTGTGCCTTCTTTAAAGCAACGTTAACTAACTCCACCTTCTCACTTGGACGCATTGTTTTGTGAACAACAGCTCCACTCATCACGTCACGTAAGTCCATTTGCATAACCGTGTTACCTTTACCAGGCTTGTGGTGGTTACTCTTTAGTACTTCAATTAACTTACCATCTTGGCTAAAAATCATACCCTTTTTTAATTCGATTGCTTGCATGTTTTCTCCCTAAAATTTTCTAATATACATATCGCTGATCTCATGATTACCATACTTTTTAATAATCATATCAGCTCTCTTCTTAGTTGGTTCGATATAGTCATGTAAATTCTTCAAGTTAACATCACGCCAGACTCTTTTGGCAAAATTATCTGCTTGTTCAAGCGGGACGTTAGCCCACTTATAAAAGAAATTATCTGGATTATTGCGATTAATCTCGAGCATTAAATGGTAGCGATCAAGATACCATTTCTCTAAGTCTTCTTCGCTTGCATCCAGATAAATTACATAATCAAAAAAGTCACTTGGGGGAGCTTGCCCTTCTGGAGGCACCTCCAGCAGGTTAATTCCCTCAACTACCAAGATATCAGGAACGCTGACATACCCCAATTCATTAGGCACCAGATCAGAAATTTCCTGCGAGTATAGTCTATATGGTACTCTTTCTTTACCAGCTTTAACGCTTGCTAAAAAAGTATAAAAGGCATCCCAGTTAAATGACGATGGGAAGCCCTTTTGATCCATTAAGTTTTTAGCTTTAAGTTCTGCATTCGACATTAAAAAACCATCAGCAGAAACTTGGGCAATTGTTTTATCTGGTTCAAGGCGTTCGAACAAGCGAGTTATTTTCTTTGCAAAAGTAGATTTCCCTACAGCAACAGACCCAGTTACTCCGATGATAAAAGGTATTTTTTGCCATTTTTCGTTATAAAAGGTCTGCTTAGTGCGGTAGATTGCATTTTTTTGGCTCATATTGTACTGAAGATTTTTCATCATGAAAATCTCCGCACTAAATTCATCGCTTGGAGGGATTAGTGTCTGCCACTTTTCAGATGTCAGATGTGTAAATTGCTCTTGCATAAAAAAGCCGCCTTCCTTCTGCCTTGTTCCTAGTATTTTTACATTGTTCAGCCTATCATATATATTGAGAGAAAGAAAAGAAGATATCTATGAAAAAATTAATTCTTTTTGGTGATTCCCTACTTGCAGGCTATATTGATGGACGCGCAACTAATATTGTAACTCAAGGACTGCAAGAAAAATTACCTAAATTTACAATTATTAATAATTCTGTACCTGGGACAACTACTGAAGAGGCAATTGATTTTTATGAGTTACGAATCAAACCTTTTAAGTATGATCTAGTTATTCTGGCCCTAGGCACTAACGACGCCAACATGCAGTTTGGTTTGAGCGCTGGACGATATGCGCATAACTTACAAGTATTGGTCGATTTAATTGGGGCTGATAAAACATTATTAATGGGACCTTCCTATACTAATTGGAAAATTGCTCAAGATCAAGCTTGGCCTAAAACTTTGCAATTTGAATTAGTTGCTCAAGAATGCCATGTTGAAAATAAAATTCCATTCTTAAATTTTGCCAAAGTAATGCGGCAGACTGGTCACCCCAATAAACTATTGCAAAAAGATGGTATTCACCTAAATAAAGCAGGAAATAAGTTATTGATTGAAAGATTAGCAGACCTAGTTGAAGAAAAAGAATTGGTTACTGCATCGTAAAAAAGCATCCCATTGTTGGGATGCTCTTTTGTACTCCGGATATTTTATTCAGTTACTTCTTGCTTTCTTAATTCATTTTGTTCAGCAACTCTAAAGAATGGTAAATAAATAAAAATACCGATAATGATTAAAATCACTTCAAAAATAATATTTCTCCAATCCATTGAACTCATATATGCCTGGAAGAAAAACGGTGTAAATGAAGGATCTACAATATACCCCATTCCAATCCAGTGTAAACTTTCAGCAATATAAGTAAGAATAATATTAATAATTGGAGCAATCAAAAATGGAATACCTAAAATAGGATTAAAACAAATTGGTGTACCAAAAATTACTGGTTCATTAATACTACAAATTCCAGGAATAATTGATAGCTTACCTAATGCACGATATTTTTCTACTTTGCTTCTCATCATTAGAATTACTAAAGCAAGTGAATTACCAGATCCACCTAAAACAGCAATTCTAAACATCTGTAAATTCATTAAATGAGTAGGTGCTAATCCCTTTGCTACTAAAGCAGCATTAGCACCGGTAGCTGCAATACCAGTTGCAAAGACAATTGGAAAAATTACATTACTTCCGTTAACACCAATTAACCATAATAGATTTCCTAAAGTTACGATCAAAATATAACCCCATAAGCTATTAGCCAAAAATGTAGCTGGAGTCAAACACTTCATAATGAAGGTAGTAAAATCAGCATGCGTTAAGTTGACTAAAATTAAATTAGCACCATAGAAAATAAAGATATTTGCTAATAGCGGTACTAAGGAATTAATAAAGTTGGCAACCATTGGGGGAATCGATGCTGGCAACTTTATCTTTAACTTACTCTTTTCAATCCCGCGGTCAATTTCGACAGCTAATAATCCAATGATAATTGCAACAAATAGTCCATTTGTTCCTAAATTATTGAGACTAATCTTATTATTCTTATCAACAGTAGTTGCAACTATTAAAAAGGTAACTAATGAGATCATCCCATCAGTAGCAGGATGAAGCCTACGATATGACTTACTTAATTCATATGCAATACCAAAAGCACAAATCATACCAAAGATTCCCATAGTCATATTGTATGGGATAGTTATTTGAGCATAATGTGCGACAGCCCAATATTTCCAACCAGCAAGCCATTGCATAAAAATATTAGCTGTTCGTGGATTATATCTATCAAGTACAATTGGTGGGTTAACAACAATTAAGAAGAATGAACCAATTACCAAAAATGGCAATCCGAAAAGCATACCATTTGATATAGCTTGTAGATGCTTTTCATTTCCAAGTTTTTGTCCAATAGGCGTTAAAACTTTATCCAATTTAGTAATCAGTGAATCTTTTTTAGCAGCAGTTTCAGCCATTAGAGATCACCTATTTTCCTTACTAAAGATAAATACAACTTAAATTTGATATGGATTGAACTTGAAAGCATTAGGTATAACTTTACTGTCATCCCAATTTTCTCTAATTTTGTGGTATTCGTCCCAATAGTCATTGTCCACTTTTAATTGTGGAGTAACCTTAGAATCGACTTCTAGAAAATGCATACTTCCTCTTCCCATCTTTTTGCCACGAGTGGTGTGCTTGTCTAATGCATAATCTGGAATTTCAGGAACATAACCTAGAGCAAAGTTCTTAATAATGATATTCTTAAGTAAATCAGAAGACCGATCCTTAGTTGATTCACATAAGTAACGAATTGCATGAACAAACATCATGGCACGATCCGGCTCATTGTAACTAAAGTTTTGTCTCATTTGATTTAGACTATTAATCAAAATTGGAGCTTGTGGATTTCCCATTCCTATATCTTCAACTGAAATAGCTTGAAGTCTACGCCATAGTTTTTCTTCCATTTGCGGAGAAGAAATATACATTTCATAAGCAAATTCACATGCTGCTCGTTCTTTGCCTTTTCTGATTGATTTTTGCAATGCAGAGATAACTTCATCAGCTGCTAATCCATTTCTAGTCCTAGTTCTGGCCCATGGATCAGCTAAAAATTCCTTATCTCTATTTTGTGGTACTTCATTTTTCTTAATATCTTTTTCTGACATCGTGAGTCCTTCTTTCAATATATAGGAGAAAACAATGAATATTCAAACTATTAGCGAAAAGTTTCAACTGAATACTGATGAACAAAAAATATTAATTTATATGAATCAACACCGTAATGAAATTAAAAATATCAATATTCGTGAACTCGCTAAACGAACATTTACATCTCCTAGCTTTATCGTTAAGACCTGTAAAAAGATGAAGCTTTCAGGATATTCAGAATTGGTCTTTCTAATTGCAGATGCGCCAAATTTTCCTAATAATACTGAAAACGACTTAAAAGTAGAGTCATACGTAAAGCCTTTTTCCAACTTGATGGATAAGCATAAAGACTCAATGATCATGATATTGGGCAGTGGATACTCTCAAAATATTGCTAATTACATGAGTGAATATCTTAATTTGAATGGCTTTCGTTGTACATCCAATTCGCATCTTGAGATGCTTAGAAAGCATAAGAATACTTTAATAATTATCATCAGCAATTCTGGAGAAACAAAGCGATTAGCTGAACTTTGCATACAAGCTCAGAAAAATAATCGTGATGTTATTTCCTTTACAGGAGATAAGAACTCAACAATTGCAAAACACTCAACGCTTGCAATAAGCTCAGATACTTTTAACCCCACTTCTTTTGACAGCCATTACCCTCAGCTGTTTTTTGGTCTAACACTCATCTATTTTGAATTATTAATGAGTAACTTTTTGTCTAATTAGGTACCTTCTTAAGACAATTCTTATACTACTTTGGTTTTTAACAATTTTGAATACTTTTCCTTGTTTTGAAAACATGTTTCCATTTTAAAAAACATTTTTATTCAGGCATCAAAAAAGGCATCCCTAACGGGATGCTTTTTTTACACACACTACTCTACGTACAAGGCTTGCTTACCTTGCTGATTATAGCTAGCCTCTATTTTCTTTAAACTTACTTTTTTATCTTTAACGCCATAAGGATCATTTACGTAAACAATCCCCTTCTTTTTATTATATCCAGTAACCACACAAGCATGAGAAGAAGGTGTCACATTAACCTTTCCCTGCTTAGTCTGCCATGTCCGCATATCATTTACCCGGTCATATTTTAAAGTCGTAATAATCATTACTGGGTGTCCTTTAGAAACTAACTTCAACACCCTAACAAAATCATTGCCAGTATAATTACGAATTCTCTTAGTATACTTCTTAGCAACATCATAGAGCGGCTCATTATAAACACACCACCCCGCGTTTTCGATACTCATATAACCAACAAAACCGACATGAGGATTTCCGCGATAATCATTGCCAATAAAAGAGGAAACATGCTTAATGTTAGAAGACAATTCTAATTTACTTACATTAATATCATAATAATTTAGGAGCATTGACAGTGACGTAACTTCACAGCCATTTGGCAGGTCCGGTAATTGCCTTTCTAAGGGGACATTCAATTTTTGCTCAGGTCTTAAAGTAAGCCAGTCATATTCATTTTGAATTTTTTCAAAATTAAGGATAATCCCAACTACTCCAGCTGTAATCAGTATTAAAATTCCATATAAAATCTTTCGTCGACGTCGATATCTCTTTGCTCTGCTTATTCTTTTCATGCTGAATATTCTATATCAGAAATGTCAAAACGCAAATTAAGTTGGCAATGTAAGAATTAAATAGATTTCATTTCTTTTGTGCTTTTAGCATAAGAATTAAATTAACTTTATTTATACTCGAAATATGGAACAAGAATTAAGTTAATTTAATTTATATGCTAAAATTAAATTAAGAATTAAATTAGTTAAAATCTTGTATCAACTATTATTAAAATTAGAGGTTAACAATGGTACCACGTGATTCTTACTTAAATATTTTAGATAAACTACGTAATAAACAAATCATTAAAGTCCTAACTGGAGTACGTCGCTGCGGTAAATCTACTATTCTTCAGTTATATCAAGAGCGATTGCTCAAGTCTGGAGTAAATACTAATCAAATTCAAACGATAAATTTTGAAGATTTAGACTTAGTATCAATTAAGACCTACCTAGACCTATATAACTACATCAATGAACATCTCATTCCTAACAAAATGAATTACATCTTTATTGATGAAATCCAAAGCATTCCTAATTTTGAAAAAGCACTCGATAGTCTTTATATAAAAAATAATGTAGATTTATATGTCACAGGATCTAATGCCTTTATGCTTTCTGGCGAATTAGCTACTCTTCTTTCGGGACGCTACATAGAAATCCCTATCTATCCTTTTTCTTTTAAAGAATTTTTACAAACAACTGAACTTTCAAAAGAAGAAGCCTTTTCAACTTACTTAGAACGAGGTGGATTTCCATTTGCTACTGAATTAAACGACAATAATACTTATCTTTCTTACATTCAAGGAATTATCAATACAGTATTAATAAAAGATATCCTAACGAGAGTGAATCGCGGTAATGCAACTTTACTTGAAGCAATTGCTAGTTTCTTAACTGAAGCCAACGGCAGTCTCGTTACTCCTGCTAAAATTGCCAATACTCTAACCTCTAATGGAATCAAGACTAGCAACGCCACAGTAATTTCATACCTCGAAAAATTAGTAAATTCCTATTTATTCTACCAATGCAATCGATATGATATTGCAGGCAAAAAATATCTACAAATTAATAGCAAGTATTACCCCGTTGACCCCGCCCTCAGACGTGCTCTTTTAGGTCAGAAACGTCCAAACATGGGCAGCAGGCTTGAAAATATCGTGTTTATGGAATTAAAGAGACGCGGTTATGAAGTATATGTCGGTAGTCTTAAAAATAAAGAAATTGATTTCGTTGCAATCAAAGACGGTGTTAAGCAATATTATCAAGTTAGTCTCACAGTCCAAGACGATAAGACTTATAATAGGGAAATTGCGCCCTTTCTTGAAATTACTGATAACTATCGGAAAATTCTTCTCACTCAAGACCCCGGTAGCTACAACGACAATGGTATTGAACAAATAAACGTTATCGACTGGCTACTAAAAGAGCAATAAATTTAAATAAACCCACAAAAAATGGAATGCCAAATTTGGCACTCCATTTTTTTATTTATTCACATAAGTCTTCTCGTCTAAATCATCAAACAGTGGGCTCTTCTCGCCTGTATAAATCAAGTCAAGTTTATACATTGCACGTGAAGTAATTGTGTAAAGCAATTGCGTTTCATCAAGTTGGTGATAATTATCCCTTGAAACATTCCAAGCAATTACTGCATCAAATTCAAGGCCTTTAGCTAAATATGATGGCATAACTAAAGTTCCAGGCACTAAACGCTGGTTAGCTGACCCAATTAAAGTTGCCTTAATTTTTCTTTCTTTCAAAACCTTAGCCACTTCTTTTGCTTCAGCCAAAGTCTTAGTAATAATGGCAGTAGTTAACTTTTGCTCATCATTTTCTACTAAGATATCTTCTAAAGCATTATATTCTTTTTCAAGACTGTCACGCTTGTAAAAAGCAGGCTTAGGACCACGTCTATCAAAGGCCTCAATCTTTTCACCTTGCCGCAGAATTTGCTTAGTAAAATTAGTTAATTCCTTAGTTGAACGATAAGACTTGGTTAATTGAACAACATCAGTCTTTTCAGGATCAAACAACTTGGAAATCTGACCTAAAAGAGTCTTACTTTCATCTTTAGTAAAAATAGCCTGGTTCAAATCTCCGAGCATCGTAAATTTAGCTCTTGGGAAGTTGTATTTAAGGTAAGCTAACTGGAATGGAGTGTAATCTTGAATTTCATCAATAAAGGCATAACGCATTTCATAGTCAGTTCTACGCCCTGTAACTAAATCGTACAAGTACAAATATGGTGAAATATCAGCCATCTTGATCTTGCCGTTTCTGAACCTATCTTTAACATTTTCAATATGTGCTTCCCACTCTTCTTGGCTAATCTCCCACTTGCTTAAATCAATCAACTTAGGAACAGCACGCAAGAAGCGCAAGTATTGCGCACGAATATTCAAGAAACGGTTCTGATGAATCTTACGACTAACTTGCTTTAATTGCTTAATTACAATTTTACGAGCTAAGAAGTGCTCTTCTTTAGCACTTGATTCAAATTCTTGGTCTGGACGATCATATAACTTATTCAATTGCTCTTGATCTAACGATTCAATCGTCTTATCAACCCAAGCCTTCTTAGTTTCTGGCTCAATTCTTCTATTTAAGCTATTAATTAAAGCTTCTTTGGTAGCTTCAATTCTGTTTCTTAAATGGTAATTTTCATTAAAACCATAGTAAATTTCCTTGATCTTTTCTTTATCAAAAAATGGTTTCTTCTTATTCTTGAAATAAATATTTTTAAAAATTAAACCATTCTTTTCAAGGTGCTTAGCATAGCGAGTTACCGCATTAAAGAATTGTAAGGAATCTTTTAAGTTAACGATCTTGTCATTTTGCTCATTATCTTCGAATTGTTCAAATAAGTTTTCAACATTCATCCCTGGAACACGACGAGAAACAAATTGCCAGTAAGTCATCTGCACCATATTTTGTTCACCCATTTCAGGTAGAACATTTTTAACATAATCGTTAAACAATTGATTAGGGCTAAACATAATAACATCGCTAGAAGTTAAATTCCCGTGATAACGGTAAAGTAGGTACGCCACACGTTGCATAATAGCTGAGGTCTTACCTGATCCTGCTGCCCCTTGAACAAATAAAAGATCTGCACTAGTATTTCTAATAATCTTATTTTGCTCACGCTGAATCGTTGTTACGATCGACTTCATCTGTGTAGAAGATTTTTCGTTTAAGACATTTAAGAGCATTTGATCCCCAATTGATTCATTGGTATCAAACATATTGGTAATCTTACCATCTTCAATTAAGAATTGCCGTTTCTTAGTCATATCAACGGTTTGCACACCATCGGGCGCATTATAAGAAACTTTACCTAACTTACCATCATAATAAATAGAAGATATCGGTGCACGCCAATCATAGATTAAAAAATGATCTTCTTTATCTGCAAAAGACCCTAAGCCAATATAAATCGTTTCAGGATCTTCATTAGGTTCTTGGAAATCAACTCTGGCAAAGTACGGTCTCTTTTCTAGACGCTGCAAAGTTGATAATTGCTTAGCAGAATGTTGCCAGGCATTTTGACGTTCTGCTGACATTTGTTGCTGCTGATGAATTGACAAGGCAGTATCCATTGAAGTTGAATAGCCATCATAATCTAATTTCACATCATCAAAAAAGTGCGAATTAATATTTTGCGCTTCATTTTGCGCATGTTCAATTGAATGATCTAATTCGCTTTCTTTCTTTTTAATTAAGCCTAGAACTTTATCTAAATGTTTTTGTTCTTTTTGTTGTTCAGTTTCTTTTACCAAAATTAAGTCACCTATTTCTAGTATTAGTAAAAACTTTGCATACTATTTTATCATATGTATACGCTTTCTAAAAGGATTTACAACTTTGAGAAAGTCTCAATTATTATTTAGTCTTTTCTTAATTAATTAGTTTTATAATTAAATAAAATTACCTAGCGAGATGAAAAAGTGAAACGATTCTTTAAAAGTATTCTAGTTCTAGCTCTTTTATTTATTGGACTTAACTTTGCTTATCAAAAGACTGCGCCAGAAATTGAAAAAACTTTTGGAACTAGAAACCCACTCCCATATTTAACAGCCAAAGTACAACAATTTATTTCACCTGAAAAAATTCAAAATGATGATACAAATGCCGACAGTTCCAAAGGACATACTTTTGAAACTAACTCTGCATCTGTTTATCTTGACCTCTCTGATCCCACCCTCAGGCAAGCCGCAATTGATGGAATCAATATTTGGAACAATACAGGGGCCTTTAATTTCAAAATTACGAATAACAAAAATAATGCCAAAATCATCATTAAAGCAATGAATGATGGACAGACTAACGCGGCTGGTTTAACTGATACGCAATATAATTCCCTAACAGGGCACTTAATTAAAGCAACTGTCCGTTTAAATTCATATTACCTTTTAAAATCCAAGTTATGGCTACAACCACGGTCGAATCGTCAACACAGTTGAACACGAATTAGGCCATGCAATCGGGTTAGGCCATAAGAACGGCATTTCCGTCATGTATCCACAAGGATCTTTCTATACCATTCAGCCAAGCGATGTTGAAGATGTTAAAAAATTATATCAAGAACAATAAGAGCGATTCTATTTTTAAGAACCGCTCTTATTTTGTATTTTTCTTTTTTAATTGGTGTTGCTTTTTAAAATAAGGAATCCAGAAAGTTATGACATAACCTACTGCAATACCTACAAAAGTCCATACCATAATATTTTTAACAAATAAACCGATAAACAAACCAATTAAGCCAAAAATCCCAATAATAAATGATTGATCTAGTTGCTTCATACTTATTTCTCCATTTCTATCTTAACTATAACAAAATTATCTTATTATAAGCATATTTTTAGACTTACTTTGCCTTGCATTTTATACTATATATCATAAATGTTTTTAGATATATTTGCTTATATGCAAACAAATATCAACACTATACAAATAAAGAAATGTATTTTAATAAAAGAAAGGAATGGTATATCCTTTATGGATAATCAGAAACAACCTCCCTCGTATTCTCAAACAACAAACGAGTTATTTTCAGATTTAGAAACAAGTTCTACTGGCTTAAGTGAGCCAGAAGCTCAGCGGCGGTTAAAGAAATATGGACCTAACGCTCTAGCTGAAAAGCCACCAAAATCAACCTTAATGATGCTCAAAGAGCAAATCATTGATCCGATGATTTTGATCTTATTAGGAGCAGCTGCTTTTTCTGCAATTTTAAATGAATGGGTAGAAGCCAGCGTAATTTTCTTTATTGTTGTCGTTAACTCAATTATTGGAATTATTCAGGAGAAAAAAGCTCAGTCTTCTCTAGCGGCTCTCAAAACGATGAGCGCGCCAACTGCTACAGTTATGCGTGGCGGTATTGAAAAGATCATCCCTGCTAGCGAATTAGTTATCGGGGATTTAGTTATTCTAGCTAGTGGCGACATGGTTCCAGCTGATTTGCGTTTAATTCAATCTGCTAACTTAAAAATTGCAGAAGCCTCTTTAACCGGTGAATCAATTGCTAGTGAAAAGAATGCGAAAGCTGTTCTTTCAGTAGATTGTCCTCTAGGCGACAGAAAGAATATGGCTTATACTTCTTCAATCGTTACTTACGGACGAGGAAGCGGGATTGTAACTAAAACTGGAATGGACACCGAAATTGGCCAAATAGCCGGTATGCTTGAAAATGATGATGCTGGTGATACTCCATTAAAGCGAAAACTTAATACAGTAGGTAAAGTCTTAACAATCATTGGCTTAATTATCTGCGTTTTAATCTTTGCAATTGGTGCCTTTTACGGCCGCCCACTTTTACCCCAATTTTTAGTAGCAATTTCTTTAGCAATTTCAATTATTCCAGAAGGTCTACCAGCAACTGCAACCATTATTATGGCGCTAGGCGTGCAACGAATGGCAAAACAACATGCCTTAATTAAGAAGTTGCCTGCTGTTGAAACTCTAGGAAACGCGACGGTTATTTGTTCTGATAAGACAGGAACATTAACGTTAAATAAAATGACTGTAACTCATTTAGCTAATGGTGATGATTTCCTTAATAAAAAAGTTCTAAGTGTAGAAAAAGCAAGTAAGGATTCTAATTCATATAAGCAATTAATCTATGCATCTAGTCTTTGTAACGATGCTAGTTTTAATTTAGAAAATCCTAAAGAAGTAATTGGAGACCCGACTGAAGGTGCACTTCTTCCTCTAGCACAAGATTTAGGTTACTCGGCTCTTAACTTAAGAAAAGAGTATCCAAGACTTAGCGAATATCCTTTCGATTCAATTAGAAAGAGAATGACTACGGTTCATGAAATAAACAATGAATACGTCGCTTACACTAAAGGTGCGCTCGATGAGTTACTGCCACTTTGTGACTACATCTACACAAATAATGGTATGCGTAAATTAACTAAGTCAGATAAAGATAATATTCTTACCTTATCGCACAAGATGTCTGATCAAGCATTAAGAGTCTTAGGCTTTGCCAGCAAAAATATGCTAAACCTGCCTCAAGAAGGAGAAAATATTGAACAGCATTTAGTTTTCTTAGGTACAGTCGGTATGATTGATCCTGCCAGAGATGAGGTTAAGGCATCAATTAAGATGGCTCGCGAAGCTGGAATTAAGACCATTATGATCACGGGTGACCACAAAAATACAGCAGTCGCTATTGCTAAAAATTTAGGTATTTATACTAATGGAAACACTGTTATTTCAGGTACTGAATTAAACGAAATGACAGATAATGAACTAGATCAAGCAGTTAAATCCGCCACTGTTTTTGCCCGCGTTTCGCCTAATGATAAATTAAGAATCATTCAAAGCCTAAAACGAAATAATGAAGTAGTAGCCATGACAGGAGATGGGGTCAATGACTCACCCACCCTAAAGGCTGCTGATATCGGTGTTGCTATGGGTATTGGTGGTACTGATGTTGCTAAAGATGTTTCAGATATGATTTTACTTAATGATAGCTTTACAACAATTACGGCAGCAATTAGAGAAGGTCGAAAAGTATATCGCAATATTCAGAAAGTAATTCAATTTTTACTAGTTGGTAATATTGCCGAAATTACTACTTTATTTGTAGCTACAATCTTCAACTGGGATGCACCGCTACTTGCTGTGCATATTCTCTGGGTTAACTTAGCTACAGCTAGTTTGCCTGCTTTAGCTTTAGGAGTAGACCCAGCAAGTAAAAATATCATGAAACATAAGCCAGTAAAGACTGGAACTTTGTTTGAAAAAGACCTTGTTTGGCGTGTTATCAGTCAAGGAATTTTCGTCGCCTTAATGACCTTAATAGCTTACTGGATTGGAGAGTCGTTTGATAATCCAATTGCTGGTCAAACAATGGCATTTTGCGTTTTAGCCTTATCCCAAATGCTTCGCGCATTTAACCAACATTCTAATACTGATCCAATCTGGGTTAGAGGCAATAAAATAAATGTTTGGTTGATTATCTCCTTTATTGTTTCAGCGGTCCTAATGGGAATCATTCTTTTCACTCCTAACTTACAAACTCTCTTCCATTTGACTAGTCTTACTTCAAGACAATGGTTAGTAGTAATTATTCTTTCTCTCTTCTCTATCCTGCAAGTTGAGATAAGCAAGTGGATTAAGAAGCTAATTAAAGCTAGACAAAAAGAAAACAAGCTGCAAACAACCAGTGACTAACATAATATAATAATTTAATTATTATGAGCATATTTTTCGACTTATGATGATTTAATTTATATAATATAAGTATAAATTTAACACTTACATCTGGCATAAAGACTTAAACAACCTTTATGCCTTCCACTTTTTGGTAATCAAAATAATCGGTATCGTAGATACCAAAACAGCCAAATTGTAGTGACCCCCGAACTTCGGACACGGATTCATCCTTACACAAACTATTTTACGGTCTCAAAGAAAGAGCTTTCAATGTCTATATACAATATTTTATATACTGTTTTAGCATAATATATATTTATGGAATATTATTTTTTTAAGATTAATTGTTGACTAAAAGTAAGTTATAGGATATTATAACTTGTGAAAAGTAAGAAAGGAGCTAATGATTATGTTTAATGTACGTCAAGCAAATTCTTGTTGTTGTAATTGTCAAAAAATTTGGCAATTATTTGGCGTACAGTCATTAGTTCACTTCAGGAATTTCTCTAGTTAGAAACTAGCTTTTCAAAGTTATAACTATGATTTTTCTCAAGAGCTGTTGGCACTAGGCCAATGGCTCTTTTTTGTTTGCTTAGAATCGGGGTTGAATAAATTGCAAACTACTCTCATTGTTTTAGTAGCCGTCGCTATTCTAGCTGGCATGGCTTACTTACATAAAAAGAATTGGGGATTTACCAAGCTAGTATTTCTCTCATTAGTCGTTGGAATCGTCTTTGGAGTTTCTATTCAATTAATGTTTGGTGCCAAAAACGAGATTGTAAAAAATAGTATTGATTGGATTTCCATTGTTGGAGATGGGTATGTCTCCTTATTACAAATGTTGGTTATCCCACTTCATTAGTTGGTGCCTTTACGCAATTAAAAATGACTACTAAAATCCGAAAAATTGCTACAAGTGTCTTAGCTATCTTGCTAGGAACTACTGCAGTTGCTTCATTCCTGGGCTTTAGCAGTGTTGCAATTTTTAATTTAGGCGGAGCTGGCTTTGCTAAGGGAATGACTGCTTCTTCAACTGCTCTTAGTGCAATTAAGGACCATCAAGAACAATTGAAAGGCCTAACTTTACCGCAACAAATTACTTCGTTTTTCCCGCAAAATATTTTTGCCGACTTTGCAGGAATGCGTTCAACTAGTACAATTGCAGTTGTAGTTTTCTCTATCTTTGTCGGAATTGCATTCTTACAAATTAAGAAAGAAAAAGCTGAAGTCGCTGCTACCTTTGCACGTGGTATTCAAGCATTACGTGCAATTATCATGCGGATAGTTAAAATTGTACTCGAACTCACTCCATACGGAATCTTTGCTCTAATTGCTAGAACTACTGCAACTAACAGTTTCGCAACAATGAGCAAATTATTGGTCTTCATCGTTGCCGCTTATGTTGCAATAATAGTTATGTTTATTGTTCACGCTGTTTTACTTTTAATTAATGGAATTAATCCTATTACTTACTTTAAGAAGGCGTGGCCAGTTTTAGTTTTTGACTTCACTTCCAGAACTAGTGGTGGTAGTTTGCCACTTAATGTTCGTACTCAACGTGAATCAATGGGCGTTAGCGACACAATTGCCGATTTCGCCGCTAGCTTTGGTTTAACTATTGGTCAAAATGGATGTGCAGGTATTTACCCATCGATGGTTGCAGCAATTACTGCTCCCCTTGTTGGAGTTAATATTTTCTCATGGCAATTTGTCTTAACCTTAGTTGTAATTGATGTTATTTCAAGTTTCGGCGTTGCCGGTGTTGGTGGCGGTGCAACATTTACTACTTTAATGGTACTTGGAGCACTTAACCTGCCAGTTACAGTACTTGGAGTTTTAATTGCTATTGACCCAATCGTTGACATGGCAAGAACTGCTCTTAATGTTAATGATTCGATGGTCGCTGGTGTAATTACTGCTAAGCGTACTGGTGAATTAGATTGGAATATCTTTAATAATCAAAAAGATGATGTAGATACTGAAATTGAATAATTTTTATAAATATTCTTGACATTATTATTTAGTCATTGTAGTATTATGACAACAAAATTAATTAAAGCTTAGAAAAGATGAGTAAGCACTTAATGCATCTCACAGAGAGTCGGAACAGGTGTAAGCCGATGTTGCTAGAGTGAAGAAGATGGTCTTGGAGCTAAATTAAAATGCGAGCTATTTAAAGTAAGCAATTTACGATTGACGTACGTTATCACGTCTGAGTCTTCCATATTTTGGAGTACTTATTAAGGAACTGATTGTGAAATCGGTTTGAATATAGGGTGGTAACACGGTATTAAGCGTCCCTAGTGTAGTGTAATAGCTATACTAAGGACGCTTTTTCTTTTGCCGATTTTTTTTTGTCAAAGGAGGGATCATTATGAAAAATAAGAATATTATTTTTAACGAACGGGTATTTAATCGAATGTGCTACTTAAATATGATCCTTCTTGTTAGTGGATCCTCAAAAAACATTTAAGTGAAAAATTTAGTTAAAATTAAGAGGTAGATTACTATGACAGAATTAGCACATTTCGACATTGTTGGTAGTTTTTTAAGACCGGAAGAATTAAAACAGGCTAGAGATAAATTTAATCACGGAGATATTTCGCAAGCTGAACTAATTCAAGTAGAAAACCAAGATATTGAAAAGTTAATTCATACAGAAGAAAACTTAGGCTTAAAAGCTGTAACTGATGGCGAATTTCGTCGCAGTTGGTGGCATCTTGAATTTTTATGGGGTTTAACTGGCGTTAAAAAATATGATTATCACGAAAGCTACAAGTTCCATGGCGCTAAAACTAGAACAGATAATGCAGAATTAGCTGGCAAAGTAGCTTATAATCCCGAGCATCCATTCTTTAAAGCGTTTGAGTTTGTTAAAGAGCATACTAATGTGACTCCTAAACAAACTATCCCATCCCCTACACTGCTTTTTAGAGATAACCGATCAGATAATTGGCCTAATTTTTATAATAATAAACGAGCTTATCTTGATGATCTGGCTAAAGCGTACCATGAAACAATCAAGCATTTTTACGACTTAGGTTGTTGCTATCTTCAAATTGATGACACAACTTGGGCTTTTTTAATCAGTAAATTGAACGAATCCAAAAATGATCCCAAGGAACATGAAAAATATATTCAATTAGCGGAAGATTCAGTGTATGTAATTAATAAGTCACTAGAAAATCTACCAGATGATCTGACTGTTGCTACACATATTTGTCGTGGTAATTTTAAATCTACATTTCTTTTTTCAGGCGGATATGAACCAATTGCTAAGTATTTGGCGCAATTAAATTATAATCGCTTTTTCTTAGAATACGACAATGACCGTGCTGGAGATTTTGCTCCAATTAAAACAATCTGGAACAACCGGGATGACGTCACAATTGTTTTAGGATTAATTACATCAAAAGATGGACAGTTAGAAAATCCTGCTGCAATTATTCAAAGAGTTAATGAAGCAGCTAAATTAGTTCCATTGTCTAACCTCGCGCTTAGTACGCAATGCGGCTTTGCCTCAACAGAAGAAGGTAATATTCTCAGCGAAGCTGATCAATGGAAAAAGATAAAATTAGTCGTTGATACTGCTAACAAAATTTGGAAATAAAAGATATTTTTATAAGGAGGAAATTTATTATGGGTAAAGTTGAAAGTTTTGAATTAGATCACACTAAAGTTAAGGCACCTTATGTTCGCCTAATTACTGTTGAAGAAGGTAAAAAAGGTGACAAGATTAGTAATTTTGATCTTCGCTTAGTTCAGCCAAATGAAAATGCCATCCCTACTGGAGGCTTACATACTATCGAGCACTTGTTAGCTGGATTATTGCGTGACCGTATTGACGGCTACATTGATTGTTCACCATTTGGTTGCCGAACGGGCTTTCATCTTTTGGTTTGGGGTACCCCATCAACTACAGATGTTGCTAAGTCTCTTAAGGAAGCACTTGAAGAAATTCGCGATAATATTCAATGGGAGGATGTTCCTGGCACTACTATTGAATCTTGTGGGAATTATCGCGACCATTCTCTATTTTCTGCTAAACAATGGTCAAGAGATATTTTAGAAAAGGGTATTTCTGATGATCCATTTGAAAGAAACGTTGTAGAATAATTATTTTTGAAAAATAGGACAAAAAAGATGCATATTACTGCTTCAAACAGTAAAAATGCATCTTTTTTCGTATTTATTTAATTTTTATCTTTTTCTTCTCTTCCTTTCTTTACCACCAATTAGTCCAATTATTGCTCCGACAGAAGCTAATGCTAATCCTGCTAATGATAAATCTGTCTTTTGACTACCAGTTTGCGGTAGTGTTGTTCTAGCACGATTAATTTGCTTAGCTGATGTGACATTTCTTGAAGTTCCAACTGCATAAGTTGTATGAGTTAAGCTTGTAGAAGTGTTTGAAGCTTCAACATTCTTGGACTTGTTTAACTTATTAATTTGCGTAACAATCTTAGATGATTCTGTTTCATTCGATTGATTTGGCTTGTTAGGTATTGTTTGTTCTTCCGGCTTTGTTGGTTCACTCGGTTGTGCTGGCTTACCTGGTGTTGTCGGTTCTTCCGGCTTCGTTAATTATTCTTTTTATATCTCTTCCACAAGACTATATTCCAAATTATCAACAAGCACACCCAAAAGATGGCTAAAATCATCGTATTCCGATAACCACTGCCTTTTTGAATCTTATTCCAAATTTCGCTAAAGTATGTTCCACTTAAAAATGCACTTAAAGTTATTATCGATAAAGTCAAAGACATCTTTTTAATTTTATCCTTAACCTTCATCAACTGAATCCAGTTATAGACAACTTCACAAATTACGATTAGTAAAACTAGTAAGTCATACCACCATTCACGTTGTTTAACAAATTCATCAATGGTAGTAATAAAATAGTAACCTGCAAGGATTGCAAATATGTTTAAGATCAACAGCTGTCCCACAACTGTACTTTGTTCTTTTTCTTTCATGATCTTCCGCACTTTCTACTTTTTCTCTTCATATAAATTATAAAACATTCGCTTTTTTTCTTGCTTAACCTAAGGCTCTATTTTTTAAACTGCCGCCAATAATTAGCAAGGTATTCTGTTGTTAAACTCACGGGATGGTTAATCAAATCTTGTGTTTGTCCTGCAGCAACGACTTTTCCACCATTTTTTCCGCCACGTGGCCCTAAATCAAGAATATTATCTGCATTAACTATCATATTTAGATCGTGCGTAATCGTAATAATAGTTGCCCCTTGATCTAGCAATTTCTGCATTACTTGAACTAAAACTTTCACATCAAGCGGATGTAATCCAATTGTAGGCTCATCAAATACAAATAAAGTATCGTCCTGTTTATGACTTAAATGTGTCACTAACTTCAAACGCTGTGCCTCTCCACCAGATAAAGTTGGTGTACTTTCACCTAAATGAAGATAATCTAGTCCAACTTCTTTTAGTAAAAGTAAGTCACGCTCAATTTTAGGTTCTTTCTTAAAAACAGGAATTGCTTCATTAATATCTAAATTTAAAATATCAACAATTGAGTAGCCGTTCCACTTTACTTTTTGAACTTCTGGATTATAGCGATTACCCTCACAGGTCGGACAAATCTGCTGCATATCAGGTAAAAATTGAATATCTAAAGTTACAATTCCAGTTCCACCACAAGTTGGACAAGCTCCCTGTTTATTATTATAAGAAAAGTAAGTTGGCGTATAATGCTTGTTTTTTGCAAGAGGCTGCCGAGCAAATAATTTACGTAAATTATCCATAATTGAAGTATATGTAGCAACAGTTGACCTGGTACTTTTACCAATTGGTGAAGCATCAACGCTAACTACTTGATTAATTGGCGATTCAAAGTTCTTAACTTGTTTAGGTAAATTTTCACCTTTTGCTTGTGCTTGAATTGCTGGAACTAAACTATCTAGAATTAAACTTGTCTTACCTGCTCCAGAAAATCCAGTCACAGCAGTTAATTGATTAACAGGAATATTCGCATGAACATCTTGCAGATTAAAATAATGATCCACATTAAATGAAATTTTTACAGAGTTAACTTTTTCTGCTCGCTTTCTCGCCATTAATTCAGCAGTTCCATCAAGATAAGGTCTAATTAAAGATTGCTTGTCTTTTTTTATTTGATCAACGCTACCTTGATCCAGAATTTCACCGCCTTGTTCACCTGATCCTGGTCCAATCTCAATTATTTCATCAGCTGCTTTAATAATATCCACATTGTGATCCACAACTACTAGTGAATTTCCTTGTGCTACTAATTTATGCAAGACCTTAATTAAGCCGTCAACATTAGCAGGATGTAGCCCAATTGAAGGTTCATCTAAGACATATAAGACGCCAGTTGTTTCTGTTCTCAACGTGCGTGCTAACTGAATTCGCTGTAATTCACCTGTGGATAAGGTGTTACCATTTCGAGCCATTGTTAAATAATCTAAGCCTAAATCAAGTAGTGGTTGCAAGTTCTCCACAAACTCTGTGAATAAGGCGCTGGCCATTTTATGCATCTCAGCTGGTAAACTCTTAAGCACTTGACCTTTCCAAGCAATCAAATCATCTAACGGCATTTCAGCTACTTCGTTAATATTTAAACTACCTACTAATTGCTTCAATAATCCTGGCTTTAATCTTGAACCATGACACACAGGACAAGTCTGATATGAGAAAAATTCTGAAATCCGCTTTTGCGCACGTTCGCTCTTACTAGTTTTGGCAGAACGTAAAACAGCTTGGTGAGCATTTTCATAAAGGGCATTAAAATCATGAAAAACTCTCCCCGTTCCTGATAAAAAGTCCATTTTATACTTCTTTTCAGGACCATTAAGAACAAAATCTTTTTCTTTATCAGTTAAATCCTTATAGGGAATGTCAATCCTAACCCCCGCATGTTCTGCTACATTGGGCATAAAGTTTCTACCAGGTAGCGACCACGAAGCAACTGCTCCCTCTTTAATAGAAAGACTAGGATCAGCAATAAACTTACTATCATCCAATTGACGTACTTTTCCTGTCCCGCCACATTCTTCACAAGCTCCATCAGAATTAAAGGCAAATTGCTCAGCACTTGGCACATAAAACTTAACACCGCAAACTGGACAAGTAAGTTGACCCATTTCTTCACCTGACTTTGCCATTGCTTCTGCAATTTCTAGACTTGGCTTTAAACGGTGACCATTAGGACAAACAGGCGAGCCTAATCTAGAAAAGATCAACCGCAAAATATTAAAAGTTTCGCTCATTGTTCCAACCGTTGCTCGTTCAGATGGAATAGTTGGACGCTGTCTTAAGGCTAAAGCTGAAGGAATATGCTTGACGCTAGTAACATTTGCCTGATTTCCCTGTTTGATTCGTCTACGCATATAAGTAGACAAGGCATCTAAATATCTTCTGGAGCCTTCTTCATATAAGATACCCATGGCTAAAGAAGATTTTCCTGAGCCTGACAAACCAGAGATTGCAACAAATTTATGAAGTGGGATATTAACATCAATATTTTTCAGATTATGAACACGTCCCCCACGCACTTCAATCTGAGTTGGTAACTTTGCAGTCAAAGTCAGTTCCTCCTTACTTTTTATCAACTACTATCTTACAACAATTATTATAAAAAATAGCCATTCTATCGTTGCCTATACAAAAAGAAGACTCTGCTTTATAAAAGTAGAGTCTTCTGAAGGAATATAACTATGAAAATTTAATTTAAGGAAATATTATTTAAGAATTGTGTAATTTGTGTTGATATATGAGGATTATGGAAAATTAAAATTTTAAGTTATTTTGTAAAGCCAACATCGTACCATGAGTTTGCTGATGGCTTAAGTGACCAGCCAGTTACCTTGTCGTTAACTGCAGTTACTGACCAGCTGTTTGTAGTAAGTACAACGTAGGCATTGTCATACATCCATTGTTGCCACTTATCAAATGCTTGTACACGGTACTTGTGGTTGAAGGCCTTAGTTGAATCAATGTTAGTAAGAAGCTTAGATTGAGTTGGAGAAACAAAACGAGCCATGTTGTATGGAGCACCTTCACTGTAAAGGTCCATTGGTGATGGTTCAGATGATAGGCTCCAACCACCTTCAAAGACATCAATCTTAGGACTATCTGCTTGAACATCTTGTACCCAAGAGTTGAATTCCATTGGACGGCCACCAACAAATTTAGCATCTAAGCCAATCTTTTGCCATTGTTGGATGTAGTTAGTCCAGATCTTTTCAGCGTTTGGTTGTGTATTACGAACAGCAACGTTAATAGTTAACTTCTTACCATTTGGTTGACGACGGTATTTTTCACCCTTACGCTTCTTGTATCCAGCCTTGTCTAACAATTCATTGGCTTTCTTCAAGTTGTATGGGTAGCCCTTGATACTCTTGTCGGAGAAGTCTCCAAATTGTGCTGGAATTAAAGTATTAATACGGAAAGTCAAGCCATTGCTGTAGCGCTTGTTAACAGCATCAATGTTCATTGCATAAGCCATCGCTTTACGTAATGAAACATTGTTCATCTTAGCGTTCTTATCTTCAACGTTCTTACCAGTCTTCTTGTCAAACTTACCTACCTTAAAGCCTAAGTAGTTGTAAGATAGAGGAATCTTACCAATGAAGTTAACGCCCTTAGTATCCTTAACATTGTTCCATTGAGAGTTGAGCACACCAGTAATATCAAACTTGTGACTCTTAATAGCTTGAGAAACAGAGTTAGTTCCAATTACTTCCATAGTAATCTTGTCTAAGTTTGGCTTACCACGCCAGTAATGTTCGTTTGGTACGTAAGTAACTGATTGGCCACGAACAACCTTTTGAACCTTGTAAGGGCCAAAGAATAGTGGTTGCTTCCGAACTTTATCATCAGATAAAAGTTTCTCAAATGGAACATCTTTCAAGTAGTGGTATGGTTCTGCACTTTCAAGGAAGTAACCGTTACCTGATTGAAGCATACCTGGCTTCATTTCTTTGAAGTGAAGTACAACTTTTCGACCATTTTCGCCATTTGGCATTTCAATACCAGAAATCTTGTCGGTCTTACCCTTATGATATTCTTCTAAACCAACAAGGTTAGCTAAAGAATCTGTATATCTTGAAGTCTTGGTCTTAGGGTTACCAACAATTTCGTAGGCGTATTCAAGGTCCTTAGCAGTTACTTGCTTACCATCAGACCACTTAACACCCTTTTTAACTTCAATGGTAATTGTCTTTGCATTCTTATCTAACTTAAAGGTAGCTGCACCCTTATTGTTAATCTTATAGCTATCGTCAACACTAAAGAGTGATTCTAAACCTGGACTTTGAATTTCTGTGTCAGTAGACGTATCAGATAATTCTGGTAAGAAAATACCTGTGAAAGGTGAGTCACTTTCAATTGCATACTTTAATGTTCCACCCTTTTTAACTTGTTTCTTAGGAACAGCTTCTTTAAAACTAACTTTCTTATCGCTAGCGTTATTATTGTTGTTATTCCCACAAGCAGTTAAAGTCAATGCCGCACCGGACAATACTGTAATTGCTCCTAACCACTTAACTTTTTTCATTTTTTCTCCCCCTTTGTGAGATGACTTAAATTTCTTGATTTTTATTATATGATATTAAAATGAAAAAAACAATACAAAATTAAAATATTTTTTGGCTAAATTTGTGTTTTATTAAAAATCTAGTATTAATACACTTGTTTTCTTTAAGGTGTAATCTGATAGTTATAAAAATCGCCAAAAGTTATAAAGAGTTATAAAAAAAGAAAGATGATACACATGAATAATAATCCTTTCAATCCTAGTTTTGGCAAAATACCAAGTATCTTTCTAAAACGCGATAATTTATCTCAAAGAATAATCGACGAATTAAATAGAGAAAATTCTCCATTTCAAACTTCACTCATTTATGGTCAAAGAAGGTCTGGAAAAACCACCCTTATGTCTGAAATTTCTTCTAAACTTAAAGAAAATAAAGACTGGATCATTATTGACTTAGTCTTTGATAATGATTTACTCATTTCTCTAACTAATCAACTTCAAGAGCATTTATTAAAATTAAAACTAATTAAAAATTTAGATATAAAAATGAACTTTCTCGGCATTGATATTAATGCTTCCTTAGCTCAAAACATCGATGCCAATTTTCAACAAATTTTGCAAACTAGTCTTGAGAAATTAACAAGAAAAGGCAAAAATGTGCTAATTAATATTGATGAGGTTCATTTAACTCCTTTACTAAAGAAATTTGCCAATTGTTATCAGATCATGATTAGAAAAAATCTTAAGGTTTCTTTATTAATGGCTGGTCTTCCAGAAAATGTTTCTGAAATTCAAAATGACGATGTCTTAACTTTTCTTCTGCGATCTAATCGGATTGTCCTCAATCCATTGAACTTAGAAACTATTAAACTTAGCTACAAGCATATTTTTCAAAATGCTAACTTCAATATTGATGATAGAACTATTTTATATATGACCAAGCAAACACAAGGATTTGCATATGCCTTTCAACTTTTGGGTTATCATATCTGGAGATATGCTACAGAACAGAATAAAAAAACTATTTCTCTTTCTTTAGTAGACGAAATCTTAGACATTTATTTGAGTGATTTAAATCGCAATGTTTACTTTAAGGTTTACAATGATCTTTCTTCTAAAGAAAAAGAGTTCGTGCAAGCAATGGTAAAAGTTGGAAAACAAAAAGTCAAAAGCCAAGAAATCGGGAAAATAATGAATAAAGGTGCTAATTATTTAGCTGTTTATCGGAGAAAATTAATAGATGATCAAGTAATCAAACCTGATGGTTATGGCTATGTCAGTTTCTTACTCCCGCATTTTGACAAGTTCATTGAACAAGAAATGATCTTAAATGAATTCTAAAATTTCAAGCAAAAAAAGCTGGTAATACCAACGCCATTTCAAGCGAAGATATTGCCAGCTTTTCTTTATATTCTCTTTTCAGAAGTTTTTTTAATTTTTCTCTTAGGTGTGAAATTCAAAGCTAAAACTGCCCCAATTACTAAAAATGTTCCTAGCCAATCCATCCCTGACATCACCAAACCAAAGATTAAAACTGAACCAACTGTAGCTGATAAGGGCTCAAAAGCATCAAGCAAACTAACCGTTGACGGCTTAACATAACGTAATGCATTCGCCATAATCTGAAAAGGCACTATTGTTCCCAGAATAATAATTGCTCCTACCCAGAGCCAAACTTTTGGTGTGTTTGGAACAGCGGGAAAGCTAGGATGAAAAATCACCAAACCAAGTCCAGCAAAAATCATCCCCCAACCAGTCAAAACTAAACTCGATATTCTTTTAACAATTTTAACTGGAATCAACGTATAGCTTGCTTCACCTAGAGCAGATAATAAGCCGAAAAATAATGCCACTGGAGTAATTGTTAAATGATTTAAATTTCCATGAGTTGACAGCAAAAATACACCAATAAAAGCCATCAGTGCTGCTAAAATATCTAACCTTCTTAAAACTTGCTGATGAGTTAAAGCAAGATAAGCAAGAACAAAAAACGGTCCAATAAATTGTAAGATAGTCGCAATCGAAGCATTCGCCATTTCAATGACAATAAAGTAAAAAATCTGTACTGGTAATAAGCCAAATACCCCATAAGCAATAATGTGCAACGTATTTTTCTTGTCTTTTAAAACTGAAATTGGCTTTTGATGTAAAATTGTCGCAATGATTAATAAGACAATCCCAGAAATAATTAGTCTTACTTGTGTCAGCCAAATAGGTGTAATTTTAGAACTGATCTTAAATAAAGACTCCGCAAATAAACCTGATATTCCCCACATCACAGCTGCTAAAGCTGCCAAAACTGTCCACAGTCTGCTTCTAGTAACTTGCTTATCCATCCAGCTTCTTCCTCACAAATAAAAATCAATCTAAATCATCATAGCATAGTTACTCTAAGCAATCATTGTAATTTCGACAAAAAAATACTGTTCAATTAGGTAAGCAACCTGAACAGTATTTTTTAATTCAAATTAATCTAAATCTTTTCCATCAGATTCAATAACTTTCTTGTACCAATAGAATGAATCCTTGGGCATTCTCTTTAAAGTACCGTTGCCTTCATCATCTCTATCCACATAGATAAAGCCATAACGCTTACTCATTTGACCGGTACCTGCAGAAACTAAGTCAATACAGCCCCAAGTAGTGTAACCAATTAAATCTACACCATCATCAATTGCACGCTCCATTGCCTTAATATGCATTCTCAAGTAATCAATCCGGTAATCATCATGAATCTTGCCATCATCGCTGATCTTATCAACTGCGCCAAGACCATTTTCAACAACCATCATTGGAATGTCGTAGCGATCATACATTACTTCAAGATAATATTGAAGTCCATCTGGATCAGTTGCCCAGCCCCATTCGGAATACTTCAAGTATGGGTTCTTAACACCAGCAGCAAAATTACCACCAACTTTATCTTTTACTTCATGGGTAGTAATAATGTTAGACATGTAGTAAGAGAAGGTATAAATGTCAACTTTTCCTTCAAGCAAGTCTTTTCGATCTTGCTCGGTAATATTTAAATGAACATTATGCTCATTCCATAAACGCTTAGCATAAGTTGGGTATTTACCTTTTGCTTGCACATCACCGCAGGTTCCCACATATGACGGTTAGCCAAAATATCCTTAGGATCTGGTGTTAATGGATAGTCTACAATACCACAAATCATATTACCAACTACATAGTTAGGATCTAATGCATGAGCAATCTTAACTGCGCGAGCACTAGCAACAAATTGATAATGCAACTTTTGGTAGGCATGTTGATATACCTTATCATCAGTTACATTATTGCCAAATGCACTCAGCATTAAAAGCGTTGAGTTAATTTCATTAAATGTAAGCCAGTACTTAACCAGTCCTCTGTATTCTTCAAACAAAGTAGTTGCATACTTAACATACATATCGATCATTTTGCGATCTCCCCAGTCATGGTATTTTTCGCTGAGGTATAGGGGATCTTCATAGTGAGAAATAGTAACTAAAGGTTCAATCCCGTACTTTTTACATTCTTCAAATACACGGTGATAAAAATCTAACCCAGCTTGATTTGGCTTTTCTTCATCGCCTTTAGGGAAAATTCTAGTCCAGGCAATTGAAAGGCGGAAGATCTTAAAGCCCATTTCAGCAAACATCTTAATGTCTTCTTTATAGTGGTGATAAAAATCAATTGCCACGTGGTTTGGGTAATATTCATTTGGATCAATTGCACCAACTGCGCCTTCTGGCAAGCCCGCCCCAGGAATACCTGGAGTTTTTTCTAACTTACCATTCAACTTATAAGTCAACATTCTTGGTGCATCTAAACTACCAGCAGTTGTAATATCAGTGACTGATAATCCTTTTCCATCTTCATCATACGCACCTTCAATTTGATTTGCGGCAGTAGCTCCGCCCCATAAAAAGTTTTTTGGAAATGACATTTATATTTCTCCTCTTTTTCAATACTAGTCATTTTTATTATATATGAAATCGATTACATTATCTAAGCCATTTGAGGATTTGAACTAGCTGCTTCTGTTTCACTAGCTCCTTCTAAGTCTAAAATCTCTTGTCCTACTTTTACCTTGCCTTGACTTACAACAGTGATCTTTTCATAATCTTTTGAGTTAGTAACTACGATTGGTGTCGTTACAACATAGCCCTTAGCATGAATTGCATCGATATCAAACTTAACTAATTCATCACCTGCTTTAACTTCGTCGCCTTTTGCAACTAAGGTTTCAAAACCATGTCCTTGTAAATTAACAGTATCCATTCCGATGTGCATCAAGATTTCTGCACCATCGCTTGTCTTCATTCCGATAGCATGTCCTGTTGGGAAGGTCATTACAACTTTACCATCTGCTGGTGCATGTAGAACTCATTCGCTAGGCTCAACAGCAACGCCTTCACCCATAGCACCGCTTGAGAAGACTTCATCTTTTACTTCTGATAGTGGCAATAATTCACCAGCAAGTGGACTAACTAGTTCAGTTGAAACATTTAAGTCGCTAGTACTTGCTAAGCTTGGGGCAGCTTTCGCAATAGTATCTGCTTGGTTTGCAACTTCTTGAACAGTCTTTGCTTGTTTTTCATCGTAAGCTTGATCAACACTCTTCTTGCCAAGAACCATTTGTAAAATAAATCCAAGAACGAATGAGACAGCCATTGCAATCATTAAACCGTAAACACTCATGTCAACACCAGTCTTTGGACCAATAGCAGCAGGGATTACGAAGACTCCCATACCACCCATCATGTACATCTTAGTACCGAAGAATCCAATCAAACCGCCACCAATACCAGCTGCAATACAACTTAAGACAAATGGCTTTTTACGTGGAAGAGTTACACCATAAATAGCTGGTTCAGTAACACCGAAAATTCCTGATAAGAATGCTGGAAGAGCAATACCTTTTAATTTTTGATCTTTAGTTTGTAATAAGATTGCTAAAACAACACCAATTTGTGCAAATGAAGCACCTAATGATAACCCTAAGATTGGGTCATAACCTAATGCAGCAATATTTGACATCATCACTGCAACAAATCCCCAGTGAACGCCGAAGATAACGAATACTTGCCAGAAGCCACCAAGTAAAATGCCAGCTAATACTGGACTAAAGTTGTAAACAGCTGAAGTAATGGCAGCAAGTGCATTACCAATCCAAGTAGCAAGTGGCCCGATAATTAAGAAAGTTAAAGGAACTACAATCAGTAAAGTAACAAATGGAACTAAGAAAGTTTTTACCACAGTTGGAATCCATTTCTTGCACCATTTTTCAACAACAGAAGCAAACCAAACCGCTAAAATAATTGGAATAACTGTTGAAGTATAGTTCATTGAAATAATTGGAATACCTAAGAATGTAGCATGAACTTGTGAATGTAAGAACGTTCCGTTAAATAAATCAAACAAAACTTTTTTGCTGCTATTCATTGCTACCATTGCTGGATAACACATTGCAGCACCAATTGTAATGGCTGTAAAGCGGTCTACCTTAAACTTCTTAGCACTAGTAATTGCTAGAATAAGTGGCAAGAAGTAAAAGAAACCATCCCCAATCGCATATAAAACTTCATAAGTACCAGAGGTTTTAGCAAGCCAGCCAAATGAAGCACACATTGCAGTTAAGCCTTTAATCATACCTGCTGCAGCCATTGGCCCCAAGATCGGAGTAAAGATACCTGAAATAAGATCGATAGCTTTATCCATTAAACTCATATTGCTGTCATCTAAATCATCATCAGCAACTTGGCCACCACCAGAAAAGCCACCTTCTTTTAAGACTGCCTCGTATACGTCAGCAACTTCATTACCAATAACTACCTGATATTGTCCACCAGCTTTAACAACTGTAACAACACCATCAGTATCTTTTAAGGCATCATCATTAGCCTTTTTTTCATCTTTTAATTTAAAACGCAAGCGTGTTGTACAGTGAACAACGCTGATTACGTTGTCTTTTCCGCCAACGTCCTTAATAATCGTTTTTGCTAGAGCATCATAGTTCTTAGCCATAGCTTTTTTCCTCCCAATAATAAAACCCAAGCTACTCAAAAACTAGAGAACGTAAATGTTCAATGTCTTTCAGTAACTTGGGTTCTTGCCTAATCGAATTAGTAACATACCTAAATATGAAAATATTTAATTTTCTTGGCGGTTTGTCACGCGCCAAATATGTAAATAAGTTGCGATTCTCTCTGCCGTATCATAGGCATGATTGTACTTAATCTTCATAAAAGCTAATAGCGAATCATCAAGTTCGCCACTTTTTTGATGCTTATTTCTTAAAAGTCTTACCAATAATACTCTTAAGTGAGTAATGAAGCGACTATAATTAAACGACTCTGTATCTAAAGTCATTTGGTATTGATACTGAATAATATCAATAATCCCGCTAATCAACTCAGTGATCTCAACTGTTTCTTGAACCTGATCATTATCAGAAGCTGCATTAACGAAGTGATAAGTCATAAAAATACTTTCACTCGGTGGTAAGTTGACTTTCATTTCTTTATTAATTAAAGCAATGACTTTTTCGCTAATCTTATACTCCTTAGGAAAAAGATTCTTAACTTCCCAGCGTGTATTAGCAGCACTCATATCAATATGATCTTCAATTCTTGAAAGAGCAAAATCAATATGGTCAGCTAAAGCTAAGTATTGAAAATCATTGAACTTCACTTTTAGAAGTGGCTCCACCATCTGGATCACCTTATTGGTTAAATCGATGGTTGAAGCATTGATTTCTTTAACTTGATTGACTTCATCTTGATGAGAAGTAGCGGTAAACCGCCTTTCAATTTTATTCTCATCAATCTTATCGCCTTTCTTTAAGCCAAAGCCCACGCCTTTGCCCAGGACTACTTGTTCTTGGCCTTGATCGTCCTGAACCAAGGCTGCACTATTGTTAAATGTTTTAAGAAATATCATCCGACGAATCACCTTTATATAGAAAAAAGCTACCACAAACTACAATATACACTTTAAGTATATCATCATTATTTGTAGTAGCTCAGACCTGATCGCATCAGTAATCCGCTTATTTACAGTAATTATAATAGCGCTTTCACATATTTGTGTCAACAAGCATTTGCAGAAAAATGCTTATTTTCATGTCAGAAAAAAAGGCTAGACCTTAATGTCTAGCCTCTTTTAGGGATTTCAAACTAACTTTTATTATTTTTGACGCTTTTTTCTATCTACGCCTAATCCGAATAAACTACCTACTGCGGCAATTGCTAAGCCTAAGATACCAGCTGTATTTTCTGACTTAGCACCAGTTTGTGGCAATGTATTCTTTTCAGCATGGTGATTTTCATTTACAGTAGCTGCTTTTGCAGTATTGGTTTCAGAATTAGTTTCTGTAGTCTTACCACCATTGTTGTCACTTGTTCCAGTAGCGTCTACATAGACCTTAACTGTTACATCTACGCTAGAGCCATCTGGATAAGTTACCGTAACAACGCCTGTATGTTCACCAATTGTAGAAACATTTGGCACTTCTTTCCAAGTGTACTTAGTACCTTCTGGCATATCGTTCCTGTTCTTAATACCTTCTGATGGGTTTGGAACAACACCTGGAGTAGTGTGAATTGGTTGTGGCTCTGGAGTTACATGGTTCGTACCGATAATTACTGTTACGTTAACCTTATCTTTTGAACCATCTGGGTAAGTCACAATCACAGTGCCTGTAGTCTTGCCTGGCTTAGTTACATCTGGAGTCTTCTCCCAAGTGTACTTAGTGCCGCTTGGCATATCATCCTTGTTCTTAATGCCCTTTTCTGCTGGTGGTACTACACCTGGGGTAGTGTGAACATCTTGACCTTCTGGGGTGTACGC +>5_1#NODE_5_length_39999_cov_63.277_ID_9 +CGATCAATTGGTAACAGATATTCGTGAAATGAACCAGAAACCAGTTAAAAAGCAATTGCGTTTAGGCTTAACTACTTTATTTGCTGTACAATTTATGAAAGAAATTTCACGATTTTTAACAACCCATCCACATGTGAATTTAATATTACAGCAAGATGGCTCGCCAAAATTACAAACGATGTTAGCAAATAAAGAAATTGATATGGGACTAATTTCTTTCCCAAATACCTTACCTGAAATTATTCATATTGAACCTTTAGAAACGACTACCAAAGGCTACCATGTTTATGTCGTAGTACCAGAATCAAATCCTCTCTCCCAATATGAAAAATTAACTTTTAAAGATTTGAAAGATCAACGATTTTCATCGTTAAGTGATAATTTTATGATTGGTCGCCTACTACTAGATCGGACTCGGAGCTTCGGTTATGAGCCGAATATCATTTTGCATAACGATGATTTACAAGTACTTCTTTATAGTTTACAAAAAAATAATTCGATTTGTTTGCTGCCGATTGAATATTATGAAGTGGGAAAAAGTCAGGGACTGAAATGGATTCCTTTAAAAGATAAGTTTGACTATTTCCCCATCGGCATTGCTTTGCGCCGCGATTTTAGTATGACAGAAGATGTTAGAGATTTTATCCAAATCATTAAAGAAAATTAATGGATAATCTAGCCTTGATAAGCAAAGACGACTTTACTATCATAAAGTTTCAACACTTTTTCAAATAAGAATTTTGAGAATAAACCCGCGACAATTGGAACAACAAACCAAGAAAAGAATAAGGGTAAGATCCCTAAGCCCGCATCCAATGAAGCAAGTGGTCCCACGAGTCCAACCAAGCCAAATCCAGCCGATTGTGGGGTTCCTGAAATGTTGAATAGAACCACAGGAATTGCTGAAATAGTCGCTGTAAATAAACAAGGAATTAAAATAATTGGGTATTTAAATAAATTAGGCATCATCATTTTCATGCCGCCCAAAGAGACAGCCAACGTCACTCCCGATTGGTTGACATTCCAAGAATTAATCACAAGAACAACAGTTGTGGCTGCAATTCCCATTGCAGCAGCACCTGCAGACAAGCCATTCAACTGAATCGCCAAACCGATTGCCACAGTAGTGATTGGTGAAATAATCAGTGCAGCAAATGAACAAGCAATTAAAATACTCATGATAATTGGCTGGAAATCAGTAAAATTGTTAATCACTTTCCCAATGGCGACCGTAATTTGTGTCACATATGGATAAATCAACATGCCAAACAAACCTGCACCTACTCCAACAACGATCGGTAAAGCAATGATTTCCACTGAACCGAATTTTTTATCGATGACTAATAAAAGTAAGACTGCAACAGAAGCGGTAATCATAATATTAATGATGTCACCAGTGCCTGCACCAATAAAGCCTTTCACTTCTGAATTAAATTTAATCACGCCAGAGCCAGCAAATGCTGCACCGCCAGCAATCATCATTTTCTGTGGCGTTAAGCCAAATTGAAAAGCAATCAGTCCACCAATAATTAAAGGGGTGGCTAGTTGGAAAATTTGCGCCGCATGAATAATCATTTCGATAATCTTGTATTCTGCAAAATATTTTAAAATGGCTCCTAATACAGCATTGGGAATTAAGGCGATAATGGTGCCAGAAGCTGTTCCAGCTAACACTTTGTTAAAAAATATTCTTGGGGTTAATTGAGCATCCAGTTTATCAGTGGGCATTTTCAACACTCCTATTGATTATTTTTGTTTTTTTAGAACAAGTTTGAGAATGATTGAACTTTCTATTAGATAAAGAGCGGCCTCAAAGAAGAGCGCTTATTTCTTTGAGGCGCTTTTTATTTAACTAAAGCAAGGAACGATTTTTAATATACATCCGTATCCGCATTATATTTCGCTAAATTATCTTTAACATCTCGCAAGAACTTACCTGCTTGTTGTCCATCTAAAATTCGATGATCAATAGAAAGACATAAATTAACCATATCTGCCACTTTAAAGCCTCCATCTGCGGTTGGCACAAGGCGTTTATTAATTGACTCTACTTGTAATATCGCAGCTTGTGGATGGTTAATAATTCCCATCGATTGGACGGAACCTAGTGTCCCAGTATTATTAAGAGTAAAGGTCCCTCCTTGCATTTCCTTGCTTGCTAGCGTTCCTTGTCGGACTTCTTGTGCCAAGCGATTGATTTCTTTAGCTAGCCCTGCAATCGAATAGTTATCCGCTTGTTGAATCACTGGCACGTAAAGATGCTCGTCTGTCGTTACAGCTATGGATAAATTAACATCTTTATGATAAATAATTGAGCCATCATCCCAAGATGTATTGATTTTTGGATTTTTCTTTAACGCTTGAATGACTGCTTTGGCAAAAAATGGGAAGAAGCTTAGAGACAGGCCTTCTTGTTGTTTAAATTCATCTTTTAATGAATTTCTAAGTTGGACTAAGTTGGTCACATCCGCTTCCACCATTAACCAAGCGTGAGGGATTTCGTTGACACTTTGGACCATTTTTTTAGCAATCGCTTTACGTACAGGATCTGCAGAGACAATTTTATCTGGGCTTGTTTCAGTTGGACTAGCTGTTGCGGCGCTTTCATTTTGCGAAGCAACTGGCTCCTCTGAAATTGAAGGGCTAGTTCCTGGACTCACTGTTTTCTCAGGTGTTCTTCCCTGTGTTGGTGTAAAGTTTGTAACATCTTTTCTAGTAATTCGACCATCACGACCTGTCCCTGTAACTTGTGTTAAATCAATCTTTTTTTCTTGGGCAATTTTAAGGACTGCTGGGGAATAACGACCATTATTTTTTTGATGGGACGTCGCAGTGCTTGTTGTTGCAACCGTTTCATGTTCTTGCGCTTGTTCCGCACTTGCTTCTTTCACTGGTGCTAAAGTAGCTACTTCTGTTTTTTCGGTTGTTTCTTCTGTCTCTAACGTCATCACCGCTGTTCCGATGGGCACATCTGTATCTAGAGAAATCAGAAATTCTTTTACTACACCATCAAAATCTGATGGGACTTCCGTGGTGACTTTATCAGAAACGACTTCCATTAAGGGATCATAGCGTTTCACTGAATCTCCTGGTTTAACTAACCATTGGACAATGGCTGCCTCTGTGACACTTTCGCCTAGATGAGGCATTTTGATTTCTTTTGTGGCCATTTATTTTTTCCTCCTTGCTTCAATTAAAATTCTGCTAATTCTTTCATTGCTGCTAATACTTGTTCTTCATTGATTAAAAATTCTCTTTCTAAAGGTAATGCATAAGGCATGCTTGGACAATCAGGTCCTGCAAGTCGTTGAATCGGTGCATCTAAATCAAAGAGAGCATCTTCTGAAATCATTGCCGCAATTTCACTCATCACGCTGCCTTCTTTATTGTCTTCTGTTACGAGTAAGACCTTCCCTGTTTTCTTAGCTGCAGCTACTAATGTTTCGCGATCTAAAGGATATAATGAGCGAACATCGACGATTTCGGCATCAATTCCTTCAGCGACTAGTTTTTCCGCCGCAGCTAAAGCTAATTGCAAGGTCATGCCGTAACTAATCACGGTTAAATCACTACCCGTTCTAACGACATTGGCTTTGTCAATTGGTACGATATAATCATCTGCAGGCACTTCATCTTTTAGTAAACGATATAAACGCTTATGTTCGTAAAAAATGACTGGATCATCGGAACGAATCGCTGCTTTAATCATCCCTTTTGCATCATAAGGATTAGAGGGGGTCACAACTCTTAATCCTGGTTGTCCACAAAAAACTTTTTCTGTAGACTGAGAATGATACAGCCCACCACGAACACCGCCGCCATAAGGGGTCCGATAAACGATCGGCGCAGTCCAATCGCCTTTTGTTCTGTAGCGCATTGTCCGAGCCTCTGATAACAATTGATTGGTTGCAGGCAAAATGTAATCCGCAAATTGAAATTCACCAATTGCGCGATAGCCCATTAAACCTAAGCCGACAGCTAAACCACCAATTAAACCTTCTGTTAACGGTGTATTAAAACAACGCTCGTCACCGTATTTAGCAGCCAAGCCCTTTGTTACACCGAACACGCCGCCTTTGTCGCCGCCGACATCTTCTCCAAAAATAACTACTTTTTCATCACGAGCCATTTCTTCAGAAATTCCTAAGTTAATTGCTTCTAAATAAGTCATCTCAGCCATTATATTTCTTCTCCTCTTCTCTTATTTTGCATACACTTCTTCTAAAATTGATGTCGGTACAGGATCTGGCATTGCTTCTGCTTCATCCGTTGCTTGATTGATTTCTGCACGAATTTCTTCATCAATTTTGGCAATGTCTTCGTCTGTTAAATAGCCCTCTTCTAATAATTGTTTTTCAAAAAGCTTCACTGCATCGTTCTTTTTCATTTCTTCAATTTCTTCTTTTGAACGATAAACAGATTGATCATCGTCAGCGGAATGAGAAGTCAAGCGCGAAACCATTAATTCAATCAATTTTGGTCCTTTTTTCCCGCGAGCCGCTTTTACTGCTTCTTTAAATGCTAGATAGACTTCAGTAAAATCACTACCATCAACGGTTACACCTTCAAAGCCATAAGCTTTCGCGCGATCGGCCATTCGTTTATTGGCATACTGTTCTTCAATTGGGACAGAAATCGCATATTCATTATTTTCAACAACAAAAATGACTGGTAATTTTTTTACGCCTGCAAAGTTCATAGCTTCTTGGACTTCTCCTTGATTGGCAGAGCCTTCCCCAGTGGTGGTCAATGCAACAAAATCAGCTTTTTGAAGTTGCGCTGCATAACCAACACCTGTTGCTAATGGCATTTGTGTACTTACTGTTGAAGAGAAGGAAACAATATTATGCTCTTTTGAACCATAATGATTCGGCATTTGACGACCATGGGAAGAAGGATCCGCTTCTTTTCCAAAAGAACCCATTAAAATATCTTTGGAGGTCATGCCCCAAACCAAGCACGCGGTCATATCACGATAATACGGTAAAAAATAATCTTTTTGAGGATCAAAAGCCATCGCCATCGCTACTTGTGCAACTTCTGCCCCTTGACCAGAAATATTGAAAGAGGTCTTACCAATCCTTGTTAATTGCCACAACCGTTCGTCTAAACGACGTCCTCGTAGTACTTGACGATATGCCTGAATCAATTCTTCTTTTGATAAACCCGATTTTTTAAGCGTTTTCATTTTCATCAACCTTTCTCTATTTGTGAATCGCTAAGCCATAAGTATCTAAGGCTGCTTCTTGCAATACTTCTGTCATTGTTGGATGCGCATGGATTGCTTCACCAATTTCAATGGGCGCGGCATCTAAATACATGGCAGTACTTGCTTCGGCAATTAAATCCGTGACATGTGGCCCAATCATAGAAACCCCTAATAAATCATCGGTCTTCTTGTCACGAATTACTTCTATAAAGCCATCTGTTTCTCCATAAACAAGTGATTTACCATTACCATTAAAATTAAAGGTGCCAATCACTACTTCTTTATCTGCTGGTAAAGTTTCTCTAGTATAGCCGACACTTGCTATTTCAGGATTTGTATAAACACCTCGAGGCACGTTTGTATAATTTAAAGGTTCAACTGTCTCGCCTAAAAGATGCTGAACAGCTAACTCTCCTTCTTTCATAGCCACATGTGCTAGTTGGAGTGTATCGATACAATCACCGATCGCATAAATATGTCCTTCTGTGGTTTGATAAAATTCATTTACTTCAATGCCTTTGTCAGTATATTTCACTGAGGTATTCTGTAACCCTAACTTATTGATATTGGGTTGACGCCCAATTGCTACCATGACTTTGTCAACAGCGAGGCTTTCTTGTCCTGCAACTTCTATTTGAACCTTCTGCCCGGTAACTTTCGCTTCTTGGACTTTGCTACCTAATAAAATGTTAATTCCTCGTTGCTCTAAACGTTTCTTTAATTCTTTAGAAATTGTTGCACTCTCATTTATAAGTAACCGATCCAAAAATTCAATGATAGTTACGTTGACACCTAAACTATTTAATAAAGAAGCCCACTCGACACCAATAACGCCACCACCAATAATTGCAATTGATTCTGGTAGCTCTTCTAATTCAAGCATCCCATCAGAAGATAAGATAAATTCTTCATCTAGTGGCAAATTAGGTAACGTTTTAGGACTAGAACCTGTCGCAATAATGACATTTTTAGGAACGATAATTTCTTCTTCCCGTGTTGGATCATTAAACGTAACTGCCACTGCGCCAGAGACTGGTGAAAAAATGGAGGGACCTAAAATCGCACCTTCGCCTGCTAACATTTTGATTTTATTTTTTTTGCATAAGCCTTCAACACCTTTGTGTAACTGCTCAATAATTCCTTCTTTCCGTTGTTGTATTTTAGAAAAATCAATGGACGCTGCCTCTGTTTCAATCCCAAAAGAAGCCGCTTGTTTTAAGGTATCAAAAACCTCTGCGCTTCTTAATAAAGCTTTTGTAGGGATACAGCCTTTGTGCAAACAAGTGCCACCTAATTTGTATTTTTCAACAATTGTGACGTTTAGCCCTTTTTGTGCGGCTCGAATTGCTGCTACATAGCCTCCTGTTCCGCCACCTAAAATCAGTAAATCTGTTTGTTCCGCCATTTTGATCACTCCTGCTCAATTTTTGCTTCACTATAATTTAAAGCTTGTTCTTCCCCTGTTAATACTCGATTGACGCCTTCATATAAAGCAGCCATCTCCATTTCACCTGGGTAAACCTTAATTGGTGCAATCCAAGTGACTTTTTGACTAATTTCTTGAACAACGGTTTGCGAATAGGCTGCGCCCCCTGTTAAAATAATTGCATCAATTGTGCCTTCAAGAACCACGGCCATCTCGCCAATACTTTTTGCGATTTGGTAACACATTCCTTTTAAATAGTAATTTGCTGTTTGATCACCTGCAGCTATTTGTGCTTGGATATGCCGTAAATCTGTTTCACCTAGGTATGATTTAAGGCCGCTATTACCCGCAATTAGCTTTTTCACTTGGCTAATCGTTAACTCCTGTTCAAGTATCCATTGCGCAAATTCAACGAGCGGTAAGGCACCACTGCGTTCTGGCGTATAAGGACCTTCACCATCAAGCCCATTTACCACATCCACCATGCGGCCTTTTTGATGCGCCCCCAAGCTAATGCCCCCACCTAAATGAACAACGATAAAATTGCTTTGCTCATATGTTTTCCCTAAATCCTCAGCGATTTTTCGAGCGACCGCTTTTTGATTTAAGGCGTGACCAACGCTACGCCGGTGAATGCCTTTTAATCCGGAAATCCTGGCTAATGGTTGTAGCTCATCTACAACAACAGGATCGACAATAAAGGCTGGCACGTGATATTTTTCTGCAAACTCATTAGCTAAAATCGCCCCTAAATTTGAAGCATGGGTATTAAAGCGTTCTGTCCGCAAGTCTTCTAGCATTTGTTGATCGACAAGATACGTACCGCCAGGAATAGGCTTAAGCAAACCACCACGCCCTACGACAGCGGCTAATTGTGTGATGTTATGTGTTTCTAAAAATTCCGCGATCATTTGTTTTCGAAATGGTGTTTGACTGACAACATTTTCAAAAGGTGCCAATTCTTGGACGCTATGTCTAAGCGTTTCTTCCGCCAAGCAATCATGATTAGCAAAAAGCGCTAACTTCGTGGAGGTCGATCCAGGATTAATAACCAATACTGTTTCCATTGTTTTCTGCTCCTTTTTTTAAACTTGTCTCATGGCAAATCTTAATGAATGAAATTTGCTTTCTGTCGAATCACTTCTAGATGTTAAAACGACTGGGACTTTTGTGCCAACAATCGTTCCTCCCACCTTTGCGTGCCCAAACAAGGTTAATGATTTGTATAAACAATTTCCAACATCAATAGTTGGGACCACTAAAATATCTGCATCCCCCATGATAGGACCGCTGTAACGTTTATGCGCAACCGCTTCTTCAGAGGTCGCTAAATCAAGCGAAAGTGGCCCAAAAACCGTAGCCTCTTGTTGATCATTAAAATGTGCCGTGACTTCTTTTGCTAAAACAGACGAAGGCATTTTAGGATTGAAATTTTCCGCTGCGCTTAACAAAGCAATTTTCGGATGATGCAGTCCCAATTTTTGGGCGACTTCTTTAGCATTTTCAACAATTTCAATGAGGGTCGCTTGAGTGGGGGCGATATTCATCGCACAATCGGTTAACAAGAAGGTTTTTCCCGCAGGCAGCTCCACCATTGCTACATGGGAAAGAATCGGTTTATTTTTTAATTGATGCTCACTTTTCAACATTTCTTTTAGTAATGTGTGAGTCTGAATAATTCCTTTCAATAAAATTTGTGCTTGACCGGTTGCAACTAGACTGACAGCTTCCTGTGCTACCGCGGCCTCATCTGAGCAATGAACATATTTCCAGAGATTTTCAGTATCAAGATTTTCATTTGTATCAAATACAATAAACTGCAACGGTTGCTCGGCTTCTTTTAGCGCTTTTTTAACTAGCTGTAAAATCTCTGGTTGTGAACCTCCTGCAATTGAAACAGTGATCATTTTCTCACCTCATTTTTCTTCTTTACACTTTAAATCTACCTCAGTATGTAAGCGCATACAAATTGTTAATTTTTATGTCCTTATTAATATTTTTTATAGAGAAGGAAAAGCTTAGTAAATCAAGCTTTTCTTTCTCTAAACAATTGTTATCTAATTTACTTTTCATGAAAAAACTTCTTTTCTAAGAATGAAAGGAAAACTTATGCTTCGGCACAGCTGTTTATTTATGTCAGATTCTAAAGCATGTTATACTTTAGATACTTATTTTTAGGAGGAATTAAATGGCACTACTACTATTCTTTTTGTTTATCGCCCTACTAGGATTTGGTATATTAAAAATTAATAACCGCAGTATCCTCGGCGGTATCACTCTGGCTTCCGGCACCTTATTGTCATTAGTCACCTTACTATTTATCGGACTAGACAAAATTTATTTACATTTTAAGAATGGCGACCTAATTACCTTGGCCATTGCTTATCTATTAATTCCCGCTGTGTTTATCGGCATTTGCCTTTACTTTATTTTTAACTCACGTACGATGCAAACGAAAGAAGGCAAAAGTGTCACGGCTAAATTGTCGGCGGGCTTAGGGTTGAACTTATTAATTGTTTTACCAGCCTTTCTATATTTATTATCAGTTGGCACAGCGCAAATACCTTATGTGCTTTTTCTGTTCTTACTTTTTCTATTGTTAATGGATCTTTTGCTGACCTTTCTATTTGCTGCTTACGTCTTGTATTCATGGATGTACCAAATGATTCCTTTAAAAAAAGCGGTTGATTATATCATTGTTTTAGGTTCGGGAATTCGTAGCGAGGAAGTACCTCCACTTTTGAAGAGTCGGTTAGATAAAGGGATTGAATATTATGAAAAAAATCCCACCGCTAAATTTGTAGTCAGCGGTGGTCAAGGTCCTGACGAACCTGTGGCCGAAGCTTTCGCAATGAAAAAATATTTGCTTTCACAAAATATTCCTGCAGAAGCAATTTTGATGGAGGACCAATCCACAACCACTTATGAAAATATGTTGTTTTCTAAAGCAATAATTCAGGCAGATTGGCAAAAGATGCCGTCTGATTCTAAACAACCCTCTGTAATTTTTTCAACAAATAACTATCATGTCTTACGAGGAGCTATGTATGCCCATCGTGTCGGCTTAAAAGCTGAAGGTGTCGGGGCGCCAACTGCCTTATATTTTTTACCAACTGCTCTAATCCGAGAATACACTGCCTTACTGGTTCATGATAAGCGAATTGTGCTTTTTGTTTTTCTACTTGTCACTCTTCTTTTAGGAATCAGTATCTTACCCATCTAAAAAAACAGCGACTCACATTTTTGTGAGTCGCTGTTTTTATTCTTTTAATATAAAAAGGTTTAATGAACGTAATTGGCTATCAATGCAATCAGCCATAAATGAAGCGGATAGAATCCGTAAAATAAATATTTGAAAAATGGTGCTTTGCTCCCTCTCTCACCATTGTAAAGCGATATAAATGGTAACACAGTGATAAACATAAAATCAGAATTATAAGCTAACATTTCAATTGTCGTTGGCCAATCTCCCAACCATTGAAAACTTGTAACTAAGAAAAAGAGCGCTAATGCACCATATAAACAGTTCCGTAAGACAAGTCTCTTTCTGGCAAGATACGTAATCAACATAAAAGGTAACATCACAACGCCACCTTCAGCAAACATTGCTCCTATTCCTAAAATGGCTAATATACTAATCAATAAAACAATTTTTAAACTGATTTTTGGCATCTCTAGCATCGCTTTTGTCACAATTAACATACTGACACCAAGGGCTAAGGTGAAGAAAATATTGTTGTGAACAGCGACAGCTGGATTATTCACTAAATGATTTAGAAGCGTATTTCCAACAAACATGATTGCTGCCCAAATATACAAACGACCATTGTAACGATAAATATTCCGTGTATAGTTGAACCCTTCTACAGCCATGTAACCAAAAAACACACCGACACAACGAGTAATTACATGAAAAATCAGTGCCCATTCTGGCGGAACAAAATAACTAATATGGTCAAGAACCATTAACCCCATCATCAATAATTTTAAGCGATTTGCGTTCATGAATCGTCTCCTTTATTTTTTCTACAGAAACTATCATATCTTTTTATATAACAGAAAACAACGAACATTCTTGTTAGATTCAACCATTCTTTTTGTCACTTTTGTTAAACTTGGTACCGCAAAGATAAATCTTCAAATAAATAATGCTGGTATTTAAATTGTTCAAAATGAATTTTTCGAATTTCTTGCTTAATGGCTATTAAGCATTCATCGACCGTCATCAATGATTCTTCTTTCAGAAGCGCCTCTTCTATTAATAATTTGCATTGGCAGTCAATCAATAAAAGTTGACTATAGAATGTGTAAAATGTTTCTTTATTTAATTTTTCAATCAAATTTTTAGCTTGATTCTGCAGCTTTTGATAACATTTGGTCAATTTTTGATAATCCATTAAATCAACTAACTGACCTTGTTTTAACATGTTACTAAAGTCTAAATCCCAAAGCATTTCTTTAGTCGGCTGTTGCAATAAATAATACATAGAATGACTACTAATAATTTCAGTGGTCGGTTCTTCAATTGTTTCATAAAAAGTCACAAAATATTCTCGCTCAATCACAGTAATAATTTCTTCTTCTGAAAGCTCACAATCTTCTAAATTTTGATAGAATATCAATAATTTAATCCGCTCATCTATTTTATTCAGCGAAGCGACTAATTCAGGAAAAAGAACCGACTGACAATTCTCTACTAATTCTCTTGCCTTATTTTGTTCATTTTTATAAAAGCTGTATAACTTTTTCCGCTGTTTTAACGAGTTTAATAAGTTATCGGGTATATTCCAGTAATATTGTTTCTTCATAGCATTCCCACTCTTCTTTTTTATGTACAATTTTACAAAATTGGTCTTAGCCAATTTCGTCTCTAGTTATTCTATCAAAAAAGTGACGCTTTTTTACTGACAATAATTACTCGTTTTACGCCCAAAAAGTGAAAATCTAGCTGTTAAGTAATTATTGGGGCTTGTTTCCCGCGGGCCGAAAAAACTTTTTCCTTTTAAAAGTCCAGAAAAAGACCTACACTAGAGATAGACAAACGTTGGAGGGATTTAAATGGATTTACACTTAACGAATAAATTAGCATTAATTACTGGTTCAACCAAAGGAATTGGCAAAGCAATTGCGATTGAAATGGCTCGCGAAGGGACCGATGTCATTATCAATGGGCGTAATGAAGCCGAAGTAATCAAAGTTGTTGAAGAAATACAAACAATGTTTCCAGACACTCATCCTCAAGCAGGAACTGCCGATATTTCCATTGAAAGTCAACGAACTACTTTGCTTGAAAAATTCCCTAAAGTCGACATTTTAGTGAACAATATGGGGATTTTTGAACCAATGGAATACTGGGACATCGATGACGCCACTTGGGAAAAATTTTTTACTGTGAACGTGTTGTCAGGCAATGCATTAGCAAAAGCTTATCTACCTAAAATGCTTGCACAAGATTTTGGTCGCATTATTTTCATCGCTAGCGAAGAAGCGGTGATGCCTTCTGGCGAAATGCCCCAATATAGCATGACAAAAACGATGAATCTTTCCTTAGCTAAAAGTTTATCCAACTTAACTGTCGGCACACATGTCACCGTTAACACGGTTATGCCTGGCTCAACCCTTACCGAAGGTGTAGAAAAAATGTTGGAAGATATGTACGCTGATTCAGACATTCCCAAAGAGGATTGGGAAAAAGATTTCATGAAAAATCATCGTTCTCGTTCACAAATCCAACGGCTCATTCGTCCAGAAGAAATTGGTCGTTTTGTTACCTTTGTGGCCAGCCCGGATTCTTCTTCCTTCTCAGGCGAAGCCTTAAGAATCGATGGCGGCTTAGTTCCAACGATCTTCTAAATTAAACAAAAAAGACAGCCAGCAAAAATTCACATTTTTTTGCTGGCTGTCTTTTTAAGCATATTAATTGATTACTTACCAAAAGTAAGTTATATTTATCTCGAATTCGAAATAAAAAGAGGTGAAACAATGAACCAACAGCAAGAAGCTTTAAAAGCCTATATCGGTTTATTAAGAACCAGCCATCGACTAGAGCAACTTGCCAAGCAAGATGTTACTTGTTATGACTTAAACATTACAGAATTTTCAGTGTTAGAGCTGTTACTCCATAAAGGTCCTCAGACCATCCAAAAAATCAAGGAGAAAATTTTAATCGCTAGCAGTAGCACCACTTATGTTATTGACCAATTACATAAAAAAGGCTATGTAACGCGCACTCCCAGTGAAAAAGACCGACGCATTACTTACGTCGAATTAACAGAAGCTGGAAAAACATTAATTAAAGAAATTTTCCCGACGCATGCAAAGCGAATTGCAGAAGCATTTGAACAACTCTCTTCCGAAGAATTAACACTTCTTCAAAAAACTTTACGAAAAATAACAAATGAAACGAAATGAGGAAATAATGATGAAAAAAGAAGATCAATTATTAGGAATCCACCACGTTACAGCTATGACAAGTGATGCAGAAAAAAACTATCACTTCTTTACAGATGTTTTAGGGATGCGTTTAGTCAAAAAAACAGTGAATCAAGATGATATCTATACTTACCATACCTATTTTGCTGATGATTTGGGTACACCAGGTACAACCATGACCTTTTTCGATTTTCCCAATAACCCTAAAGGATTAAAAGGAACCAATACAATTTCAAGAACAGGGTTCCGGGTTCCTTCAGATGCAGCTTTGACTTATTATGAAAATCGCTTCAATGAATTTGCTGTCAAACACACAGGTATTTCTGAAGAATTCGGGAAAAAAGTCCTTCGCTTTTGGGATTTTGATGATCAAGCGTATCAATTAATCTCTGATGAATTAAATCAGGGCGTTGCAGCGGGCACCCCTTGGAAAAAAGGACCTGTTCCAACAGAATTTGCGATTTATGGATTAGGACCTGTCGAAATAGCTATTTCCTATTTTCATGAATTCAAAGAAGTCTTTGAAGAAATTCTAGGCTTTCACCTAGTGGCACAAGAAGGCAATCGCTATTTACTAGAAGTTGGCCAAGGTGGCAATGGTGCCCAAGTCGTTTTGGTAGACGATGATACTAGCTCACAAGCGCAACAAGGATATGGTGAAGTACATCACGTTGCATTCCGCCTAGCGGATCGTAAATCACTTGGGACTTGGCAAGCGCTCTTTGATCATTTAGGCTTACAAAACTCTGGCTATGTCGATCGTTATTACTTTGAATCATTGTATGTTCGCATTGGACATATTTTAGTCGAATTAGCCACCGATGAACCAGGGTTTATGGGGGATGAACCTTACGAAACATTAGGAGAAAAGTTATCTCTTGCGCCATTTTTAGAAAACCGTCGTGAGTATATTGAGAGTGTTATCAAGCCTTTCAATACAAAACGAGCCTAAGGAGGAAAAAAAACATGCATTCAATTTTAAAAAAAGGACATCCTGAAGCACCTGTCTTTGTGCTACTTCACGGTACAGGTGGTGATGAAACATCTCTCCTACCAATTGCCCAAGAACTAAATAAACAAGCTACTGTGCTAAGTATTCGTGGTGATGTTTCAGAAAATGGAATGAATCGTTATTTTAAGCGCCTAGCGGAAGGTCATTATGACTTAGAAGATCTAGAAAAACGCGGCGAGGCGCTTCATAAGTTTATTCAACAAGCCGCTAACGAGCATCAATTTTCATTGGATAAAATTATTTTTATTGGCTATTCAAATGGGGCCAATATCGTTATTCAATTATTGCTTACTCATCCCGATAGTTACCATCAAGCTGTCCTCTATCATCCCATGTTTCCTGTTGAATTGACCAATCAACCAGACTTGACCGACACTTCTGTTTTATTATCTCTAGGAGAGCATGACCCGATTGTTCCACTTCCTGAAAGTATGCGTGTGATTCAATTATTTCAGAATCATGGAGCAACCGTACAAGAGGTTTGGACACAAAGTCATCAATTAACTTATCAAGAAATTAAGGAAACACAAACTTGGTTGGCACATCTGTCCTCTTAACAAAAAAGTGGGTCCTAAGTCGAAATGACTTAGGACCCACTTTTTCTATTTTAAGATAACTCACTGTCTGAGCTTCTTTTTATTCTGCCATTTGAACAACTATTTTACCAACTGCATGGTGCGTTTCACTGAGTGCATGTGCATCATAAATCCCTTGTCTAGAAAAAGGGAAAACTTCACCAATAATTGATTTCACTTTTCCAGCTGCCATCAAATCAGCAATTTTTTGTAATTGTTCTCCATTTGGTTGAAGCCAAATACTTTCAGCAGAAACATTTTTTTCGGCTGCCAATTGTTTATCTTCAATGCCCACAATTGAAACAAGACGACCTGTGTTTGGTTTTAAGACGGCAAAACTATTTTTTTGAACCTCACCACCCATTGTATCAAAGACCAAATCAACATCAGCTAATACCTCTGCAAAATTCGTTGTATGATAATCAATCACTTCATCTGCACCAATTTTTTTCAGTAAGGCATGATTTTTGGCGCTAGCGGTCGTGATGACATGTGCGCCCGCTTCTTTTGCTAGTTGAATCGCATAAGTACCGACCCCACCTGCACCAGCATGAATTAAAACAGTTTCTCCTTCTTTAAGGTGACCATGATCAAACAATGCTTGCCAAGCGGTTAAACCAGCCAACGGAACGGCAGCCGCTTCTTCAAAGCTAATTGTTTCAGGGATTTTTGCTAATAAATGATCATCCACAATCGTTACTTCTGCATAGGTACCAAAACGAGTAGTTTCAGGACGAGCAAAAACTTTATCGCCAACTTGCCAATCCGTTACTTGACTCCCCACCTCTGTAATCACACCAGCGACATCCCAACCAAGAATAATCGGAAAGGACCAATCAAACATCTGTTTTAAATATCCTTCACGCAATTTCCAATCAATCGGATTAATTGATGTCACGTATTCTTTTACCAACACTTGATGTTCCGATAATTCTGGCAGTGTTACTTCTGCTTCTTCAAGTACCTCTTTACTTCCGTACTGATTAATCACAACAGCTTTCATAAACCAACTTCCTCTCATTTGTTTCATTTTTCACACTTTTAGTATACGCCTATTGCCTTCTTTTTATATAAGAATATGCTTTAGTTAAATTGAGAGAAAGTAGCTGTTTTTAGTCTATTATTCATAACGTAAAGATTCAATTGGATCTAATTTTGCTGCTCTTCGAGCTGGTAAAGTTCCTGCTAAAAAAGCAATAAACATAATAACTAAGATAATCGTTAGTGAAGATGGCAATGAAAATTGAATTAATTTAAATCCTGTTAAAGCCTTTAAAAAAGAATCCGTTGCCAAACGATTAACTAAATTTCCAACCCCAACAGCTCCTAAAATGCCTAATATTGAGCCAAAAAAGCCAATTAAAGCCGCCTCAACACTAAAAATTGTAAAGACTTTCCCATTGCTAAGTCCCATGGCTTTCATTAAACCAATTTCTCGCGTTCTTTCTTGGACAGACATATACAGCGTGTTAATAATGCCAAAGCTTGCAGCTAACAAGGCAATTGCACCAAACATGGTTAAGACACCAGTGATTGCATTGATTATATTACGAATCATGCCAATTTCATCTTCGACAGTAGTCGCTAAATAGCCAGCTTTGTCTAAATCTTTTTTGATGTCCTTAATCTGCTCTGGCGTGCTGTCTTTTTTAACTTCAGCAATAATCATCGCGTATTGATTTTTTAAATGTTCTGGAAGATCTGCTTGATTGATTGAGACAACTTTATCAATCAATGCTTTATTCATCAGCGAGAGCCCATTTTGAATGACACTGGCATTTCTAACGCCAACAATTTTTGCTTCAATAACTTGCTCTTGTCCTTTCAAGGAACTTGAGATACCTAATTGAACTGTTTCCCCCACGGCAGCTTTACTAGATGTATAGCCGAGCGCTTTTACATATTCTGGTGACAAATTAATTTCGAAGTCCTGGCTCGTTTGCGAAACTTTACGGCCAGCCGCTAAATCAATGGTCATCTCATCTAGCGCAGAAGTTGCTGAAAATACATATTTATGCTTATCTGCTCCTTTTATATAATCGATAGCAACGGATTTCATTGGTTCCACTGATGTAACATCCGAGATTTTTTTAATCTTCTCAATATCTTTTTCAGCAAGCATACTTTGTTGTTGAATCGTACTGGTTTTTTTCTCTGGATTATATTTACTTGGTTCTGTTCCGTTACCAACATTCATCTCCATTTTTGGCTGAATAAATAACTGATTTGCACCACCGACACTGCCGACTTGTTTGTCAATATAATCATTTACACCAATATTGACTCCTGTAGTTAAAGCTATGGTAAATGCTCCAATAAAGATTGCAATAATGGTCAATACGGTTCGACCTTTATTACGCATTAAATTGGTACTAGCTGATTTTAAAATATCTCTAAATTTCATTTACTCATCTCCTCCTACAATCAAGCCGTCTCGAACATGAACTTGCCGATCACAACGTGCTGCTAAGTCTGGATCATGTGTCACAATAATTAAGGTAATTCCTTTATTTTTATTTAAATCAAATAACAATTCTTCAATTTTCTTCCCAGTGGCAGAATCTAAATTCCCTGTAGGTTCATCTGCAAAAATAATTTGCGGATTATTAACTAAAGCACGCGCTATACATACCCGTTGTTTTTGGCCACCAGATAAATTATTCGCTTTATTTTGAACTTTATCTTCCAAACCGACTGCTTTTAACGCATCTAAAGCCATCTTTTTCCTCTTGCTACCGGAAATCCCACCAATTTTTAATGGTAAAAGAACATTATTTAAAACTGTATCCTTGGCATTCATAAAGAATTGTTGAAAAACAAAACCAAATTCTTCATTTCTCGTTTTATTCAGCACTTTCTTTCTAATACTAGTAACATTTTTACCATTTAAATAAATATCTCCTGAAGTTGGTTGATCTAACAATGCCAAAATGTGCATAAATGTTGATTTCCCAGAGCCACTTTTACCAATTATGGCCACAGATTCACCTTTTTCAACTTTTAAATCAACACCTTTTAACGCATCAAATTTTGTTTCGTTCTTTCCATAACTTTTTTTGATATTTTTTGCTTCAATTACCGCCATGTTACCCCTCCATTTTTCATTCTTTGTCGTTTACTTATTTACAAAGTTTTTTACTCTTTTCATACTCCTTTCTTCTCTTTCTATTTTGACGGAAGCCCTTTCCTTTGTCATGGTAATTCCGATGTATCTCTCAATTAAAACAATAAGAACATGTTCCAAATTCACCTCATTGAACATGTTCTTATTAATTAGTTATGCATTTACGCGTTTAATTTCACGATGTTTATTCAATAAATAGGCCAAACCAAGATTAATGCCAACAAAGCAACTGATTGTCAGAAGAAGTGGTAGGACAGTAAAACCAGTGGATTGATAACCAACCATTGCTTGCAAGACATGGAGCATTCTCAGGCGCATTGGTGCCCCCCCTAATTGTGCAATCGTGCCCAGTAAAATAGGAATCATTAATAACAGCAATAAAACAATGAGTTTTTTTACGCCATCTATCCGATCATTAAAGACACCTGCTAACAAGCCAAGTGAGCCGCTAAGCATTAGTAAGATAAAAAAGAGCAACCACGACATGAAAAAGTTTCCTTGCGCATAGACATCAATTAATTTCATTGATAATTGAAAATGAGAAATGAAATTGCCGCTAAATACTTTAATCAGTACAAGTACAGCCAAGGATCCAACCAAAGATAAAATAGCATTACTTACAAAATTCACCAAAAAAATTGTCCAGCGGGAAAGCCCGTTTTGAATAAACAGTTTAAAATCTGTGTTCATTCCCAAAAATGATAGTATCCCCATAAAAACCAGACAAGGAATTACTGCATCAGAGCTGACAGTATTCACGTCGTTAGAAAAGAGCAACCCAATAAGAGGAAAAAGAATGCCGAACAAAGCATAAAAGCCAAAATAAATTGCTAAGGAACGGACTTGATAAATAACACGATACCGCAAAGCTGTTTTAAATTTCATTTTACTTTTCCTTCTTTCTATTGGTTAATTGCACAAAATATGTTTGTAAATTTAGCGGCGCAATTTGAACCTCTGCTTTTTCAGTCGGCAAGTCGCCGTACACGTAAGCCGTAACTGCCCCACCTAAAGTGTCCATGCCTAAGATTTCCAATGACTGAGTATAGCGCTCTACTTGTTCTTTCGGCCCAGAAACAATGCGACCGTTTTTCAAAATTGTTTCAATAGATTCTGCGCGAATCAATTTTCCTTGATCAATGATGATAATATCTTCTAATAAATTCGCAATTTCTTCAATCAAATGGGTGGAAATCACAAACGTACGGGGACGTTCTTGGTATGTCTCAATTAAATAAGTATAAAATAATTCCCGATGATTGGCGTCTAAACCTAAGACTGGCTCATCTAAAAAAATATACTCACAGGGGACGGATAGCGCCACTATTAGTTTAGCAATACTCCGATAACCAGTTGATAATTTTTTGAATGTCTTTTTACCATCTAAACCAAAATCGCTTAACATTTGTTCGGCTAAAGACCAGTCAAAGGATCCATAAAAACCTTCAGTCGTTTTAAAAATATCTTTAATTTTTAACTGAGGAGGAAATAAATTATCTTCACTCATTAAGTAAATATGATTCAAAGCAGTCTCATTATCGGTCACTGTTTCTCCCGCCAATTGAACGGAACCTGAAGTGGCAAAACTTCGATTATTAATGATATTTAATAACGTACTTTTACCAGCCCCGTTTCTGCCTAAGAGCCCATAAATCGTTTCTTTTTCAAAAGTAATTGAAATATTATCTAGTGCTTTTTTTTGATGATATTTTTTACTTACTGATTCCACCCGCAAACTCATGCGTCATACCCTCTTTCAATTAGTTGTTTTAATTGTTCAGCTGTAATTCCTAATTTTTTAGCTTCTGCGACAACTTCTAAGACTTCTTTATTTAAAAATTCTTCTTTCCGTGCACTTCTTACTCTCTCTTGAGCGCCTGGTAAAACAAACATGCCAATCCCCCGTTTCTTTTCAATCAACTGTCGTTCTACTAATAAATTCATTCCTTTTAAAACGGTGGCTGGATTGATTTGATAACTTTTTGATATTTCTGTGGTTGAGGGAATTTGTTCGCCTTCTAAGTAAGCGCCATTAAAAATTCCTTCGGCAATCTGATCGGCTACTTGCTGAAATAAAGGCTTTTCCCCCGAAAAATTAAATTCCATTTTTACACCTCGCTTAAATAGTTAACTACTTATGTAACTAACTATATATCCTAGAAATTCTTTTGTCAATAAAAAAACCATTGTTTCCAAAGAAATTTTTCTTTGGAAACAATGGTGATACTTTTGAGTTTATTGAATGATTTCGTTCTTATAGGGTTCCATTGTATAACAATTCATCGGTTGTTTCGTTGACAATTTTTAAGCGATTTTGTTTTTTTATCAAAATACTTTTATAGGTCGTTCCTGTAGCCTTTTCTTCCATCACAAGAATGCGTTTATACTCATTATCAGTGACTTTCTTAATCGTTGTTTGATTTAAGGACGGAATTTTTTCTTTTAGCGTTTTTTCTTCCGCCAGACCTTTTAAGATTTCTGATTCTGTAGTGGAACTTGTGGGTTTATTAGAATCGCTTGATACACCTTCGTTATTAGACGCACTTTCAGATGACTTGCGACTTGATTCAGTTACTTTAGGCTGTGATGATTCACTGGTATTGGTTGAATTCATTTGAGTTTGGGTTTGATTCCCACATGCACCAATTAGCAACCCAACGCCTAAGATAGAACTTGCTAAAATTGTTTTTCTAATCATTTGCATGACCTCCTACATCTATCATTACTTTAACCTTAACATTTTTCAGCTAAGAGAACAATTTTTCAGATTAGAAAAGACCACTAGTCGTTATTTAAATAAACGAACTAGTGGTCTTACTTATTTAGGAATGATCTTCATATGAGTGAGTGGGTAATAAACAAATTGTGCTTTCCCTAAGATTTGATCTGCATGTATTGCACCAAAAGAACGGCTGTCTTTGGACATACGGCGATTATCACCAAGCACAAAATAGCTATCTTTAGGTAATTTTTCTTGCATTAACAATTCTTTTGAATCAAAATTCGTAGTATAAGGCATCGTTTCATGATCTTTTTTTCTGTTTTTAGTTAAATACGGTTCAGCGATTGGCTGATTATTGACATATAATTGATCGTTTTCGTAACGCACAGCTTCTCCTGGTAAACCAATCACACGTTTAATCAGAATCGATCCTGTGTCTGTTTTAAAGACCACCACATCAAACCGTTTAATGGCGGAGAATTTTTCCATCACAATCATATCTCCTTGATTCAAGGTTTTTTGCATAGAATGGCCATCCACCCGAACAGGAATCAGGAAAAATCCTCTTAAAATAAAAACGGCTACGATTGCTGGTACTAAAATTTTCAGAAAGTACATTAAATACCCAACATAATCACGTTTCTTCTTCAACTTCGTGCCACCTTTTTTCTAGTTTTTTTGAATTTCTTTGACTGCTGTATCGTAGGCAGCGTCATTTTCAAGAATTAATTTACCTAAAGTGGCTTCAATTTTTGTGGCTGTTTCATTGTCGATTTCTCCAGTTACAGGCAAGCCATTTTTCTGTTGAAGATCGCTAACCGCTGCTTTTGTTTCTGCGGTGTAGTTTGCATTGTTTTCATCAATAGGATACGCTAATACAGCTAAAATAGCATTTAAGTTCTGAATATCTTCAGATTGATCTCCTTCTTTTAGCGTTTTATCGCGAGGAATCAATTTTAAATAGGCATACTCGGGGTAATCAGCTTTGATGGTTGGTTCAATTCCTTTTTCGTTGATCCATTCACCTTTTGGTGTTAACCATTTTAAAACAGTCAATTTTATTTCGGTTTGGTCATTTAAATCTTTCACCGTTTGGACCGTTCCTTTGCCAAATGTTTTCGTTCCAATTAGTGGGACATTCGCAGATTCATGCAGTGCTGCAGCAAAAATTTCGGATGCACTGGCACTATTGCCATCAATGATAACTGCAACAGGTTCTTTTACTTTAAAGCCGCCGTCTAATTCTTTGGAAGCAACTTCTTTCATCGTACGTCCTTTTTTGTCTTCAAATTGAACAATTGTTTCGCCATTTTTTAAAAACATACTGGCCATTCGCTCTGCTTGATCTAGCAGTCCACCAGGATTTTGGCGTACATCAATTACAAAGGATTTAGCTCCTTTTTTACGTAAATTCGTGATTGTTTCTTTCAATTCTTGATATGTTTTTTTACCAAAGGACGTAATTTTAATCGAGCCAATTTGTGCATCTTTTTTGTCTAACTCACCTGTCACTGTTTTAACGGGAATTTTCCCACGTTTGATTGAGATATTTTTAGTTTCTCCCTCTCTTTGGATTGTCAATTCAACGGAAGTTCCTTTTTTACCTCGGACTTTTGAGACGACTTCTGCTAGTTTCATCCCTTTTGTAGCTGTACCATCAACTTTTTCAATAATATCGCCTTCTTTGATTCCCGCTTTTTCAGCTGGGGAATCTGCTACAGGGGCCTCTGCTACAACTGGTTCGCCATCTTTCATTGTCATAGTGGCACCAATTCCTTCAAAATTGCCAGATAAGCTTTCATTTAAATCATTGGCAGCAGATTCATTTAGATAAGTTGAGTATGGGTCTCCAATGGCTTCAGACATGCCTTTTAAGGCGCCTTCAACTAATTCATTTTTATCTACTTCACCTACATAATTGGTACTAATTTCATTATATAAATCCTGCACTTTGCTTAAATCAGCATTGGTAATTGCCCCTTCTTGACTCATCTTTTTAACACGATGATCAAAATAAATATAACTACTTCCTCCAGCTAAAAATGCGACACAGAGGAGCGAAATAATATATTGATAAAAAGGAACAGTTCGTTTGTTCTTCATAGATTTCATCTCATTTCTAACAAATTCGTCACTACTTTTGTTTACTATAACATGAATGAAAAAAGAGGAAAAGCTTTGTTTGCTTTTCCTCTGGATTTATTTATTGTTTTCAAGGTACTTTTCCCATAACTCATCAAAGATGTCCATGTTGCTTAAATAGTCAGCATTCATTTCTAAATAAGAAGAAAGTTCATGATAATCCTCGGTTTGTTTTGGAAACTGAATATCTTTGGCCGCTTCATTGGCAAAATCAGTTTCAGGATCTTTAGCCGGACCTTTTAATGTCATTAAGTAATGGTAAAAGCTTCGTCTCATGTAAATTAATCACTCCATTTTTCTTCGATAAATTCACTTCTACGTTGATGGGACAATCCATAACGCAAAGCGTCTTTTTTATAAAAGTCTTGATGGTATTCTTCAGCAGGATAAAATGGTGCTGCTGGTTCGATGGTGGTAACAATTGGTTCTGTAAAACGACCACTATTAGCTAGACGTTCTTTACTTTTCTCAGCAATTTCTTTTTGTTCTTGGGAACGATAAAAAATAACTGGCCGATAATTATCGCCGCGGTCTTGAAACTGACCGAAAGCATCAGTAGGATCTGTTTGTTGCCAATAAATATCCACTAATTGTTCGTAAGAAATAATCGCTGGATCAAAAGTGATTTCTACTGCTTCCGTATGGCCTGTTGTGTGTGTTAGAACCTGTTCATATGTTGGATTAGGCACGTGCCCACCTGTATAACCCGAAACAACTGAGATAATCCCTGGTTGTGTATCAAAAGGCTGCACCATACACCAGAAACAGCCGCCTGCAAAAATTGCTTTTTCTTCCATCTCTAACACTCCTTTTTTATTTCTCTTTTGGCAAGTATATATTCATCCGAATGTCATCGTCAACTAAATTAATCTTTTCTGCTCGAACAAATAGACCATTTTGCATTCTAAACTGATCTAAACGTAATAAAACAGTTTGATCATCTGGATTGATTTCAACCCATTTAGGTAGTTTGTAAGAACGTTTAGCAAATTTCAATACTTCTTTAATTGGCAAGCCTAACGTTCCTATAGATAAACTTTTTGCTTTTAATTGTACATTACCATTCGCCATCACGTAAGGATCAAAATATAAGTAAAAAGGAATGTCATGCCCTAACACCTGAAATGTTCCATTTAATAAAGCATCATTTTCTAAATAAAATTTATATTTAATCTCCGAGCCTTTTTGAAAGTCGGCTAAATAAAAGTCAATCAGTTTGTTCACTTGTTGTTTTTTTGACTGAATGGTGACGACTGGCTCGCCTTCTTTTTCAACAATTGCTGGTATTTTCTTTAAATCTGGTTCGCGAACTTGTGTTGCTCGAAATGTCACGAAAGCTACACTACCAATGACTAAACCCACTAGAACAAGAAAAGCGATTTTCCAGGGATTTCGTTTAAGATTGGTTGGTTGTGTCTTTTTAATTGATTTACTTGTTTTTGGCTTTTCTTCATTCATTTTATTCACTACCTTCTGATTTGGTTATCCATTCTTTCTCTGTCTTGACCATTTCATCCCGGACAGCTCCGGCCATAATTTGGTAGCCAAGATTATTCGGATGGAAGCGATCTTCTTCATATAACAAATTGTTTAAATCCTCTTTGCTAGCACTACTTCCTGTAGTTTCTGAGTCACCACCAGTTACGCCAACTTCATCGCCACGACCTTTATAGAGTAAATCATTGATAGGAATGAAATAGGCTCGTTTTTGCTCTTGAACCATTTCTTCGGTTGCTTGATTCCAGTTATCAACGATTTCTTGCATTTCTGTAATTTCGGAAAAGTTTAAGTAGAAGGGATTGTAAATTCCTAAGACATAAATAGGGGCCTTCTCGTTGTACTCTCGGATTTCTTCAAGTAGTCGTCTTACTCGGCGTTGATAGGCTTTTTGTGGACGATTGAACGAGCTGACTTTCAAGTCGAAAATATTACTACTAATTACTTTCATTAAGTCATTTCCGCCAACCGTTAGCGTAATCACGTCCGCAGAGGCAAGGCCTTTTTGAATTTCAGGTTTTTCTTTGATTCGTTTTAAAATTTGATCACTACGATCCCCATTTTTCCCAAAATTGTCTGTTTGAACACCGTTCAAGTTGTAGTGTTCTTTTAAATCATCTGCCACAATAGGAACAAAACCTCCACTATTCGTCAAATCGCCAATTCCTTCAGTTAAAGAATCACCGATTGCTGTATAATGAATGACTTCTTTTTGATTTTTTTGAGCAGTTGTGGCCACTTTCTCTTGCTTTAATAGTGGCTTTGCTTTGGGGATGGCCACACTTAACAATGTAAAAACACCTAGCGCAATGAGGATAGGTGTTAAGACGGTCAGCAAAATATGCTGTGTTTGTTTTTTCATCTTCGTCACTTCCTTTTAAATAAACCAACAAGAAATTTTCCTTTTCTTGTTAGAAAGAGGTTGCGTCTACTCATTTAAGAGTGAGACCCAACCTCGTTATTTCTTAGTCTGTATAGTACATAATAGCAAAGGCATTTTTTCCTGTATGTGTTGCAATGACTGGATTCGTATGTAATACGGGAATATCCATGTCTTTAAAGATTGCTTGTAATCCTTCTTTGAATCCATTTGCTAGTTCTAGCCCATCCGCATGAGAAATGCCAATTTGTCGAACATTTGGAATCTTACTTAATTCTGATTTCAATTCGTCAAACCATTTATTAAACGTTTTAACGCCGCGGCCTTTCGCTACAGGAATCAATTCCGTATTTTCAAAGTCCATGACAACTTTCATATTAAAAATGTTTGATAATAATCCTGTTGTACGGCTGATTCGTCCACCTTTAACCAAATTATCCAATGTTGAAATGCCGATGTATAATTTGGTATTTTGTTTGACGCGTTCAATTTCAGCTAAAATTTCTGGAACACCCGCTCCCGCTTGTGCCAATTTTGCTGCTTGAATGACTTGGAAAGACAAACCTTGGTCTGTAAAATCGCTATCAATCACCGTTACTTTACTTGATGATAAATTGCTAGCTTGACGGGCCGCTTCAACCGTTCCACTTAAGCCTTTTGTCATGTGAATCGAAATTACTTCACTGCCATCTTCACCTAAGCGATCATATAATTCTACAAATTCACCAATTGGTGGTTGACTCGTTTTGGGTAAGGCCTTCGCATTAGCCATCATGTCCATAAATTTTTCGCCTGGCAAATGATCATCATCTGGATAAACAACGCCATCAACCATAATTGATAAAGGCATCATATGGATATTTAATTCATCTCTAAGACTTTTTTCCATCGTACATGAAGAATCCGTTACGATTTTAACGTTTGTCATAAATTTATCACTCTTTCTTTAAAAGGAACCTGTTTTCATGGTAGAATACCATTAAACGGTTAATTATCTAGTTATAGTATAACAGACACTGCCGAATTTTTCATCAAATTAAAATAAAGGAAGTGAATTTATTGGAAAAAACGCATTTCTCAAAAAAATACCTAATCGTGAACGAAGTCTTGAATGCAGTTACACATGGCATAGGCGCAGGTTTAAGTATTGCTGGCTTAGTCATTTTACTTGTTAAAGGAGCTCGTTTAGGCTCACCGATTCACGTGGTATCTTATGCTATTTATGGCTCCATGTTGATTCTACTTTTTTTATCTTCAACGTTATTTCATAGTTTAATTTTTACAAGAGCCAAAAAGGTCTTTCAGGTCTTCGATCATAGTTCTATTTTCTTATTGATTGCTGGCAGCTATACGCCGTTTTGCTTAATTAGTATTGGCGGTTGGTTAGGTTGGACCTTATTTAGTTTAGTCTGGTTAATCGCCATCGTCGGTATTGTCTATAAATCCCTCACGTTGCATAAACAAGAAACAGTGAAAAACATTTCAACGATTATTTATATTGTTTTAGGTTGGCTCTGTATCATCGCTGCTCGTCCGTTATATGAATCTCTTGGGTTTACAGGAACAGCGTTATTGGTCGCAGGGGGTGTGTCTTACACATTAGGTGCAGCCTTCTATTCATTGAAAAATGTGCGGTTTATGCATGTGGTTTGGCATTTGTTCGTCATGCTTGCAGCGATCCTCATGTATTTTTCTGTTCTCTTTTATACGTAATCACAGCACAATAAGAACACGATGCTCAAAGCATCGTGTTCTTTTATTGCCCTAAATCTCTTTATTATAAAGGATTATTCTTCTTTTCGAATTATTTTAACATAAATTAGTAGAAAAGACTTTACAAAACGAACATACATTCGTATAATAACAATACGAACACTCGTTCGAATCAAATGAAGAGGTGATACACATGGAAGCAATTCGTCGTGTTGGATTTTTATTTTTTGTATTGGTTATAGGTATTTTTTTGGGAACATTAGGATTACGTCTTGCTTTTATGATCGTGACGCCGTTATTTATTCTTTGGTTTATGTCATGGGATGAGAAGCGTTATACACGTACACGTAAACAACAGCAAGCAAGATACGTTTATCGTAAGTTCCCTTAATAAATAATAAAACGGTGTGTTCCCCCCAAGCTACGCTCCGTTTTTATAAATATGTCAAAGAAAGAGCGAAGGATTAAATCCTTCGCTCTTTCTAATTAAATTAGATTAGTATTCCATTAAGGTAACAATATCGTAGCCATCAATTTTATCGCGGCCATGTAAATCCATTAATTCAATTAGGAACGCACAACCAACCACAATACCACCTAATTGTTCGACTAATTCGATGGTTGCTTTAATTGTACCACCAGTTGCTAACAAGTCATCACAAATCAAAACACGTTGGCCTGGTGTGATGGCATCTTTGTGCAACGTTAATGTATCTGAACCATATTCTAAGTCATAAGTCACTTCGATGGTTTCACGAGGAAGTTTTCCTTTTTTACGAACCGGAGCAAAGCCAACCCCTAGTTCATAAGCCACTGGACAGCCGACAATAAAGCCACGAGCTTCCGGTCCTACAACCATATCAATTCTTTTTTCTTTCGCGTAATCCACGATTTGTTTCGTTGCTTCACGATAGGCATCCCCATTAGCCATTAATGGAGAAATGTCACGGAACACGATCCCTTTCTCAGGATAATCTGGAATGCTTGCAATGTAATCTCTTAAATCCATTTTTATTTGTCCTCCTCATTCCACAGCCATTGTTGTATTGTTTGACAATCACTGTAAAGTAGAAATTCTTCTGTTTTTATTTTTTTCAATCGTTGTTGGTACACTTGACTTTCAGTCAATGGTCGATTGTCGGGTTTCTCAATACTGTTTAAAACACCGCTTTCTATTGTAACAAATCCTAAGTCAAAAAACACCTGTATCATGAAAATTAATAATTTTTCTTGGATATTTAAGTAATTTGCCACCTTCGATAGCTGGGAACGTAGATTTACTTCTTTTTGCTGCAAAATAAATTTATACAATGTTGCAAATTGTTCACGTGAAGCCATACCGTTTAAATAAGCTTCTTCTGGTGAAATAAACATCATGTATATACGTTGTATCTCAGTCGCTTCAACAATTTCTTTAACCGTGATTGCTTCCACAGGACAGTCGACAAAAACCAGTTGTTCGATTTGATTTTGAGAGACAGCTTCCACCAATTCTTCTTGGTTTGCCCAAACAATAATATTAGCGGTTGGATCTGAAATGAATTTTTGATTTTTTTCATCAAACAAAAGATAAGCTGTTGCTTCTGAAGGAATTGGTTTCGTTTGATTATTTTTGCCACGAAAATCAAATAATTGTCTTCCCAATACAGCAAAATCAGTCACCATTAATTGTGGTTTCTTACGACCATTCCATTCATTGATGGAAAGTTGGCCTGCCACATCGGCGGTTCCTTGTGCTAGTTCATCCGCTTGAGGCCCCATTTGAAAAGCAATTGCATCCAATTGTGCCCCTTCTTGATTCATTTGAAATTTCAGATGTGCATTATCTGCACCAATTTGCCGAATTTGCGTAGGCGTGATTTCCTTAAATACAAAAGTTGGAACAGTATTATCGGTCCCAAAAGGCGCCAAAATACGCAATTGATCAATAAAAGTAGTCGTGGCCTGCGAAACCGCTAAGCTCTCACTAATGAGCAACTCTTGTCCATTAGCCATATCAATTTGATTTTTTTCAATAAAGTGTGCCAAATGTTCTTGAACAAAAGGAATATTTTCAACAGGTAAAGTCATTCCCGCTGCCATATGATGCCCACCAAAATGAGTGAATTGCTCACGCACTTCGTTCAACGCCTCATAGAGATTCAATGCGCTAATGCTACGTCCAGAGCCTTTAGCGGTCGTACCAGATTCATCTATTGCTAAAATAATGGTTGGTTTCCCCGTTTCTTGCATAATGCGGCCCGCGACAATCCCTAAGACGCCTTCATGCCAGCCTTGTTTCGCTAAAATATGGACCGGCGCATTCGGGTCGATTAAATCCAGGGCTTCTTTGGCAATCGTCGTTACAATGTCTTTTCGTTCATTGTTTTGTTGATCAATATATTTAGCGATTTCCAGCGCTTGCTCTTCATCAAATGTAGTCATTAATTCAACACCTGGTGCGGCTTCCCCTAATCGACCTAACGCATTTAAACGAGGTCCGATGGTAAAGCCAATTGATTCTTCTGAAACAGCTTCCTTTTTCACACCCGCTTCTTGTAGTAAAACGTCTAAACCAATTCGATCACCAGTTTGAATCATTTGAAGGCCCATTTTGACAAACGTTCTATTTTCATCGGTTAATGAAACCAAGTCAGCAATTGTCCCAATTGCTACCAAATCTAATAACTCAATTGGCAATTCACCAAGTAACGCAGTAGCGACTTTGAAAGCCACGCCAACACCAGCTAAATCTCCAAAAGGATAGTCTCCTTGCGGATGTCTAGGGTGGACAATCGCATAGGCTTCTGGTAATTGTTCAGGCAATTCATGATGGTCAGTAACGATTACATCTACCCCTTGAGCCATAGCGTAGTTAATTGCTTCATGTCCAGCTACACCATTATCAACGGTTACAATTAATTGGACTCCTTGTTCAATTTGTTCAGCAAATACGTCTTTATTAGGCCCATATCCATGAACAAAACGATTCGGTAGAAAGTACTGAACCATGCCACCGACTAATTCGATTGCTTCTTTCATAACCGTGGTACTAGTGATGCCATCTGCATCATAATCCCCATAGACAAGGATTTGTTCACCCGCTTCGACGGCTTGCTGGATGCGGGCTACCGCTTTCTCCATATCATGCATTAAGAACGGATCATAAATATCTTCGATAGTCGGATGTAGAAACTTACGTAATGCTTCTTCTGTCCGGATATTGCGGTGCCATAAAAGTTGGCCAATTAATGGATTAATTTGTTCTTTTTTTAATTGTTCTATAAATTCTACAGGTAATTCTGTCTTTGTCTGTAACTGCCATTGATAGTTTGATTTTTTCACGACGTCACTCCTAACCAAGTAATTATAGCAAAATTTTTCCAAAAACTAAAGAAAGCATTTTTTTAAATCTGAGAAAAAAACTGTGATATCTGTTAGATAAATAGCAGATATCACAGTTTTATTTTAAAAGAAACTTAGTGATAAAGTCTCTTCCTTATTTTTGTTCCTCTTCATTCACACGAGGTTTCACATAACGATCAATACGACTACCTGACACAGGCGTTGCGTCTACTTCTGTGTTTGATGTTTGTTCGTTGACCACTGGTTCTGCGACTTGTGGTGCTGCCGCTTGTTTGGCTTGCAAATCAGCCAATTGATTATCAAAAGAACGTTGAACTTTTTCCGTTTCTTCTTTGACTAATTTATCCATATCATTTTTATAAATATCCAACTCTTTTTGTACAGCTTTTAATTCCTTACGTTGGTTCCAAATAGTGGTTGTGGAGGTTAGTAACCCAACTAGTGCGCCAATAATTGCAGAACCTAAAATAATTAAGATTAACGGTCCACTAATTTTAGTAAAACCAAAGTTTACTGGAACAGCTTGATTATTTAAAACAGCAAAAATAACAACAATTAAGACCAACACAAGGCCCAAAATAACACGCCATTGATTTTTCATGACTTGATCGACTCCTTTATTTTTTATTTAAAATATTTCCCGCTATGAAATCTCCTAAATGTGGAAACAATGTATAAAAGCGGGCAGCCGCTTCCATGACAAATGGACGATTAATCTCACGTCGAGAAGTCCCCATACTCCCAACCACTTCTTTCGCTAATTTCGTAGGGTCCAAAACAATTTTATCCACCGCAGCTAGATATGTGCCTGTCGGGTCCGCTTTATCAAAGAATTCTGTTTGGATTGGTCCTGGATTGACAGTCGTTACTGCTACACCCAACGGTTTCAATTCTAAACGTAAAGCATTTGAAAAACCTAACACAGCAAATTTCGTTGCAGAATAAACGGTTGATTTAGCTGTAGCCATCTTCCCAGCCATTGAAGCAACGTTGATAATATGTCCTTGCCCAGCTTCAATCATTTTAATAGCTACTTTTTGAGTAAACGTCATCATTCCTAGAACGTTGACATCAAACATTTGGCGTGCAACAGCCAAGTCAATTTCAACAAAATTTTCAAACAAGCCAAAACCAGCATTGTTGACTAAAACATCTATAGGACCAACTTCTGCTGAAATAGCTTCAACCACACGTTCAACACTTTCTGGATCGGCAATATCTAATTGATAAGAAAATGCTTCTCGTCCGCTTAAAACAGCACACTGCTCGCGAACTTTACCAATCAAATTAATTCTGCGGGCGCAGACAACGACGACCGCCCCTTGTTTAGCTGCTTCATAACAAATTTGTTCACCTAAACCAGCTGAACCTCCAGTTACGACAACGACTTTATTTGTTAAATCCATCTAAGCTCCTCCTTCATTGGCAAATGGAATCTCAATGATATCCATATCTTTTACGATTTTTGTATTGGGGAAAATTTCTTGGGCTTCTTCTTGAAGCTGATAGGCCTCTTTGGTTAAATAACGGGCACTAATATGCGTTAAGATAAGTTGTTTTACTTGCGCTTCTTTTGCAACTTCAGCTGCTTGTTGACTGGTTGAGTGGAAGTAGGCTTTTGCCATTTTTGCTTCGTGTTTATTAAACGTACTTTCATGGACTAAAACATCCGCTCGTCGTGCTAAGGTTACGCTATTTTTGGTTTTGCGAGTGTCTCCTAAAATCGTCACGATTCGTCCAGGTTTACGCTCTCCCACAAACGCTTGTCCATTAATTTCCTGTCCATCAAAAACAATCGTTTCACCACGTTTGAGTTTGCCATACAACGGCCCAGAAGGAATCCCTAAAGCTTGAAGTTTCTCTACTTGTAATTCTCCTTCATGCGCTGCTTCCTCAATTCGATAGCCAAAGCTCGTGATGCCATGGTCCAAGATATTACAGCGAACAGTGAATTGTTTATCTTTAAAAATAACGTCATTTTCTTTCGTTAATTCAATAAATTTCAGTGGATAGGACAACCGCGATTGCGAGACCCGTAAAGAAGTCTTGACAAAGTCAGCAATCCCAACTGGTCCATAGATTTCTAAAGGTTCTGTCCCGCCCTGAAAAGAACGACTACTTAACAAACCAGGTAAACCAAAAATATGATCCCCATGTAAATGTGTAATAAAAATCTTTTCAATTTTTCTTGGGCGAATACTACTTTTTAAAATTTGCAATTGAGTCCCTTCACCACAATCGAAGAGCCAAACTGCATTTCGTTCATCTAATAATTTCAACGCAATCCCTGTTACATTGCGATGTTTTGCTGGTACACCAGCGCCTGTTCCTAAAAATTGTATTTCCATCTGCTACCATACTTTCTTTTAAACTTCCTTTTTATTTTAGAAGAAGTTTGCTCCTTTAGCAAATAATAATTTATCTATTTTCTCAAAGTTTTCTTCTTAGTTTACCGCATTTTACGATAAAATAACAAAAAATTAACAAACTACCATAAAAAACCGTGATTTCTTAGTATTTTAAAAGACTGTTTCCCTTTATTCAGAAAACAGTCTTTGCTCCTTATTCTTCTTTCGATGTCGCTTCTTTGTGTTGTTCTTTTTTTGCTTGCCACGATTGTTTCGCTTCATTTGCAACTTCGTTCGCATCGTTAATTTCTTGTTGGACTTCCTCTAAATCAAGCCGCGTGATTTCGCCACCTAAGTCAAACATAACCAATTGGTTGAGCGGACGATTGTCTTCTTCTTCTGCAATTAATAATAATCCCGTTTGTCCTTCATCAATCTTTTGAATCACATGTTCAAAAACCGTTTGTGCTTCTTGAATTTCTTTGGCATCTTTGCTTGCACCATACATACTTCCAGCAAACCAGCCAAAAAGAATGCCCAGAGGACCACCTAATATGCCTACTAGCATCCCAATCATGCTATCTTTTGATGTATGATTGTTGCCTGTAAAATCGATAAAATCATTGATTTTAAATTGATGCTGGCCATCATTAACATGCGTAACAACCGCCATTTGCTCACCTTTAAGCTGTCTTTCTGCCTGCATCTTTTTTATTTCGGAAAATGCTTGGTACGATTTACTTTCAATATCAAAGTTCATAATGATAATTCGTTTTGTCATACAAATCGCTCCTTTATTCTGGATACATCTATTTTAGCAAGAAAGGCTTGTCTCTAGCAATGAAAACTTTTTGGTTATGTATAAAAAAAGAAGATAGCTAGGCTACCTCCTTGTTAATGCTTATTTACCATCAAGAATATTTAAACGGACCTTTTTCGGACCATCGATACGTACACTATAAACAATTGTCCGAATTGCTTTTGCAACACGCCCTTGTTTACCAATGATACGTCCAATATCTTCTTTGGCCACAGTCAAATTATACTCGAGGAAATCAGTTGATTCCTCAATTTCTAATGAGACTGCTTCAGGTTGACTGACTAACGGACGGACAATAGTTAAAATTAACTCTTTCACATCTGCCATATTCAATCACCTTATTTCTTAACGTTTTTAGCTTCATGATGTTTTTTCATAACGCCTTCTTTTGAAAGGATGTTACGAACTGTATCAGAAGGTTGCGCACCTTTAGATAACCAATCTAAAACTAAATCTTCTTTTAAAACTACTTCTGCAGGGTCTTTCAAAGGGTTGTAAGTACCTACAGTTTCGATGAAACGTCCATCACGAGGAGAACGAGAATCAGCTACTACAATACGGTAAAAAGGACTCTTTTTAGAACCCATACGTTTTAAACGAATTTTAACTGCCATTATTAAATACACCTCCATTAACTTAATCACAAGAGTTAGTTTAACAGATTAACAACAACCTGTAAAGAGTTTTTTCTTGACACCTTTACTTTTTTTCAATTTTTTTAGCCAATTTGCAGGAAAACAAACGAAATAATGAAGAATTTTTACAAAATTTGATAATAGTTGTACTGGTTTCTCACTTGAAAGCCTTCTGTTTGATAGAGATGAAGCGCTGCTTGATTTTCTGTTTCAACTTCTAAAAAAATAGTCGCTGAAGCTGAATTTTCTAAAATCATTCGGAGCGCAGACTGTAACACTTGACGGCCAAGACCGCGACCTCGTTGCGTTTTTGTCACAACAAAGCCATAAATCCCCCATTCGTTTTCAAAATGATCCAAACGTAGCGTGGCTAAAAGTTGATTGTTTTCTTTATAAATAAGTGTTCGTTGAAGGTCCTCCGGCAAAGGCGTACCTTCTAGTAACCTAGCCAAGGAATGCAAATCAGCCACTTGTGGTCGGAGAAGTGTCAGCTTCGCCAATTCCTGTGAGTAAGTTTCTCTTTGAAAAACCAGATACTGTTCTGAAAAAGCTGGCACAATTCCTTGTCTATTAAAATAATTCAGGCCAACAGATTGCTTGTCGTCCATCACGAGAAAGACTTCTTTCATTCCCCACAAAGCTGCTTGCTTTTCGAGTTCTTGAACAAGTTCTTCAAAAAATGGCCCTACATTTGGCGCAACAATCGTCGCTTCCAAGGTTTCCCCATCAAAACAATACCCTACGATATACGCTTGTATGGTTTGTTCCTTCCTGTATAAACCATAATAAGTATAAGCTGATTCCTCAGGAAATGTTTGTTCCAATTTATAAAAAGTTCCTTGTTTAATCATTTCCTTGTTTTTTAATTCAGTCAATGTTTGTTTCTCCAACTGTGTTAACTGTCGGCGCCAATTTATTTTTTTATTCAAGCAGTTGCTCCTTTACTCATCCTTTTACTAGCTATTATACGTGAAGTTACGAAGTTCAACAATGTACAAGGATAAAACGTTTGCGCTTTTTTTGAAAAAGTACTATCATAAAGTAAGTAAAAGAAAAATGGAGGGATTTACATGACAAAAAAAATTGGTATTTTTGTAGGGAGTTTAAGAAAAGATTCATTTAACAAATTGGTAGCAAAAACAATGGCGGACTTATTTCCAGCTGATTTCGAACCTGTTTTTATTAACATTGGTGATTTAGAATTATACAATCAAGATTTGGATGATGAAGGAACCCCCACCGAAGCTTGGACAACATTCCGTGAAGAAGTCAAACAAGTAGATGGTGTGATGTTTGTAACACCAGAATATAACCGTTCAGTACCAGCTGTTTTAAAAAATGCATTGGATGTCGGTTCACGTCCGTATGGCGAAAGCGTTTGGGATAAAAAACCAGGACTTGTGGTGAGTGTTTCACCAGGTGCAATTAGCGGATTTGGTGCGAACCATCATTTAAGACAATCATTAGTCTTCTTAAATGTTCCAACTTTACAACAACCTGAAGCCTATATTGGCGGTATTACGAACTTAATTGGTGAAGATGGTAAAATTATTGATGGCACTGTTGGCTTCTTACAATCAATTGTAGATGCTTACGTGGATTTCTTTAATCGCTTAACTGCCTAATTGAATCAATCGAGAAAAAGTGAGGCAACTTTTTAAAGGTTGTCTCACTTTTTTTATTTTTTTGCTTGACCCTCTCGTTACGTAAAGGTTTATGATAGATTTTGTCAGGAGGAATGAATGATGGAATATACAATTAAAAAAATGGCTTCACTATCTGGCGTCAGTGCACGGACGTTACGATATTATGATGAAATTGGTCTTCTTCAACCAGCCAGAATTAATTCTTCTGGTTATCGAATTTACGGGCAAGCCGAGGTGAATCGTTTGCAACAAATTCTTTTTTACCGTGAATTGGATCTGAAACTAGATGAAATTAAAGAAATTTTGGAGCAACCTGACTTTAATGTTGAACAGGCTTTATACGAGCATCAACAAAAACTATTGGAAAAGCGCAATGAAATTGATCGTCTTTTAGCCTCTGTTCAACAAACCTTACACCATTACAAAGGAGAGATAAACATGTCAGATCAGCAAAAATTCGAAGCATTCAAACAACAAAAAGTTCAAGAAAACGAAGAAAAATATGGTAAAGAGATTCGGGAAAAATATGGCAACGAAACGATTGAACAAGCCAACAAAAAATATTTAAACTTGACGGAAAAAGACATGCAAGCAATGCAAAACGTGGAAAAAGATTTATTTTCTAAGTTAGCCATGTATCAAAAATCACCCAAGTTGACTAGCCAACTTGCGCAAGAAATTTTCCAATTACACAAAGATTGGTTAATGTACTCTTGGTCAAGCTATTCGCCAGAAGCACATAAAGGGTTAGGGCTTATGTATGTTGGTGATGAACGATTTACGTCATATTATGAACAACACGGCGCTGGTTTTGCCGAATCGTTAAACGCAATTATCCAAAACTACGCTTAAACAGAAAGTGCGCCATCAGTCGAAGAGACTGATGGCGCACTTTCTGTTTCTTACGATAATTAACTAAAGCCTGTTTATCTTTTTTTCTTTTTCTTCTTTTTGTTTTTCTTCATCATACGATTCATAGCCATTTTACCTAGCTTGCCTTTAACGCCGCCACCTAACATTTGATCCATACCAGGAATGTTCATATCCCCTTTGGACATTTGTTGCATCATTTTTTTGGATTCTTTAAATTGTTTAATCATACGATTGACTTCAACCACACTATTTCCTGAACCAGCTGCAATTCTGCGGCGGCGACTAGGATTTAATAGATCAGGATTTTCACGTTCTGCAGGGGTCATTGATAAGACCATCGCCCGTTTCCGTGCCACATCTTTTGGATCGACTTTGACATTTTCAATACCAGGCATGTTACTCATTCCAGGGATCATTTTTAATAAGTCCTCAATCGGTCCCATGCCCATAACTTGATCCAATTGCTCAATAAAATCGTTAAAGTCAAAACTGTTTTCTTTCATTTTTTGAGCAAGTTCTTCTGCTTTTTTCTCATCGTAATCTTGTTGCGCTTTTTCAATTAGCGTCAACATGTCCCCCATACCTAGGATACGACTCGACATACGATCGGGATGGAAAATTTCTAAATCGGTTAATTTTTCACCAGAACCGACAAATTTAATCGGAGCGCCCGTTACTGCCCGAATTGACAGCGCAGCCCCCCCACGAGTATCGCCGTCCATTTTGGTAATAACAACCCCAGTAATTCCAAGCTGTTCATTAAAACTATCTGCAACGTTGACAGCATCTTGCCCCGTCATCGCATCAACAACTAACAGAATTTCATTGGGATTAGCCAACTCTTTAATTTGTTTCAATTCGTCCATTAAAGCTTCGTCAATGTGTAAACGGCCGGCCGTATCAATTAAGACATAATCATTTTTCTTTTCTTTTGCTAATGCTAACCCTTGACGAACAATTTCCACTGGATTAGCATCTGTTCCCATATCAAAAACGGGAACTTCTAATTGTTGACCTAAAACCTTCAACTGATCAATCGCTGCTGGACGATAAACGTCACCAGCGATTAAAAGCGGACGAGCGTTTTCAGTTTTCATTAAGTGTTTTGCTAATTTACCAGTAAAAGTTGTTTTACCAGCCCCTTGTAACCCTGTCATCATAATCACTGTCGGGATTTTTGGAGATTTATTCAGTTCAACCGTTTCTGAACCTAACGTTTTCGTTAATTCTTCATCAACAATTTTTACAATTTGTTGGGCTGGTGATAAGCTTTCTAATACTTCGACTCCTACTGCCCGTTCTCTGACGCGTTTTGTGAAATCTTTGACCACTTGTAAATTAACGTCGGCTTCTAATAAAGCCAAACGGATTTCTCGCATCATTTCTTTTACGTCGGCTTCGGAAACTTTTCCCTTACGACGGATTTTACTCATTGCCTGTTGTAGGCGGTTTGTTAAACTCTCAAAAGCCATAGTTTCATTCCTCTATTTCTTGAATTTGTTCAATATATTTTTTGATTTCACTATCTTTGGGATAAGTTTCTTGAACATACTGCTGTAGTTGTTCAAGATATTCTTTCCGTACAACATAGTTTGAATAAAGATGTAACTTTTTTTCGTATTCTTCTAAAATTTTTTCGGTTCGTTTAATGTTATCATAAACCGCTTGGCGACTAACCTCATATTCTTCCGCAATTTCTCCTAAAGAAAAGTCATCAGCATAGTAAAGCTCCATATAATTCATCTGCTTTTCCGTTAATAATGTCGAATAAAATTCAAATAACGCATTCATGCGGTTTGTTTTTTCCATTTCCATGAGCCGTGCTCCCATCTTCAATCCCTTATAGGTTACCGATTTCATAGGGAATAGTCAATCTTTTTTTACTAAGTTTTTACCTTTTAAAAAGAAATGTCCGCAAGTTGCCATGTTTCTTCAGAATAAACGGTCATTCCTTGTTGAATAAAATAAGCAGTTGCGACTCCAACTCCAGCTTTTTTAATGCCAGAAAATTGTCCATCATAAATCAGCTGGCTGCCACAGCTGGGACTTTTTTCTTTCAAAATCAAGGTTGTAATTTGTTGAGTTTTTAATTTTTGATACGCGCGTTGTGCCCCTGCTTTAAATGCCTCAGTCACATCTTCCCCGTTATCCGTATATACATAAGCAGTATTCGCCCAAACATCAAAGCCATCCCCGCCAATGATTTCTGCTGGCGGACGTGGAATAGGCAACTCTCCTAAAACTTCGGGACAAATCATTATTGCTTTTCCATTAGTTACAAGTTGTTTTAACGCTTCTTGTTCTTGAGATTTTCCATCGTAACGGCAACAAATCCCACCTAAGCAACTACTAATACCAATCATTCGCTTCTCCTCATTTCATTTCTCTATTATACCATGAGTTGACTTTTCTCAAAAGTAGCATTCTCGCTTACTTTAAAGTACGTTCTTCCTCTAAAAATATGTTAAACTAACAATAGTGAAATATACACTTATTTTCAGAAATTTCTTGAAGGAGTTATGTAAATGAAAAAAGTTCTTGTCGTCGATGATGAACCATCGATTTTAACTTTGTTAACCTTTAATTTGGAAAAAGAAGGCTATCAGGTGACTACTTCCGAAGATGGTAAAAACGGCTTCGAATTAGCTTTGTCTAATCAATATGATTTTATAATTTTAGATGTAATGCTTCCTGGTATGGATGGCTTAGAAATTACCAAAGCACTTCGTCGAGAAAAAATTGACACGCCTATTTTGATTTTAACTGCCAAAGATGAACAAGTAGATAAAATCATCGGCTTAGAAATTGGGGCAGATGATTATTTAACAAAACCTTTTAGTCCCAGAGAAGTCCTAGCACGCATGAAAGCTATTTTTCGTCGTTTAAAACCTACCACGACCGAACCGCTTCAAGAGGACACACCTAAAGCTCCGCTTGTGATTGGCGAGATTCGTGTGGATGAACAAAATTATGAAGTCTTTGTGCGCAATCAGCCCATTGAACTAACACCGAAAGAATTTGAATTACTCGTTTACTTTATGAAACGCAAAGATCGGGTCATTAATCGGGAAACCTTGTTGGAACGAATTTGGCAATATGACTTCGCTGGACAAAGTCGCATCGTCGATGTCCACATTAGTCATTTACGTGATAAAATCGAGCCTGATCCCAAACGGCCCGTTTACTTAGTAACTGTCCGTGGCTTTGGCTACCGTTTTCAGGAGCCAAAACGATGAAAAAGAGACTGCGGATTGAATATTTTTTAGTTGCGGCAGTCATGCTTTTATTATTTGTCGGAAGTATTGCAGCGACCAATTTTTTCTTTCAAAAAGAAATGGTTGCTCAGCAAGAAACCTATTTAAGAAGAAAAAATACCCTTTTAACAGACCAGTTGCCCCCTTCCGTTTTCGAAAAAGGGCAACTGACAAACCAACAACAACTCCTTGTCACCCATGCGTTAGATGATGCAGAAGAACGGGTAACTTTGTTGCAAAAAAATGGTACCGTCTTTTTTGACAGCAGCCAAAATGAACCGCTAGAGTCGCACAAAAAACGACCTGAAATCGCTGCGGTTCTATCAGGAGCTACCTATGGTTCAGC +>4_1#NODE_16_length_39898_cov_63.337_ID_31 +CGCGCGCGCCGTTATTCAGTAGCTCTTCTGCCAGCGAAATCCCCATTTGTTCGGCATCTTGCGGCGCACCGCGGCGTTCACCGCGAATAATCTGCGAACCGTCCGGCGCGCCGACCAGCGCACGCAGCCAGATTTCGCCATCAATAAGCTCGGCGTAGCTACCAATTGGCACCTGACATCCGCCTTCGAGACGGGTATTCATGGCGCGTTCTGCGGTAACGCGCAGTGCAGTTTCGTGGTGATTCAGCGCGGCAAGCAGCTCGCGAGTGCGTGAATCATCAAGGCGGCATTCAATACCCACCGCACCTTGTCCTACCGCCGGAAGAGAAATCTCGGGTGGCAACGCGGCGCGAATACGTGACTCCAGACCTAAACGTTTTAGTCCGGCTACGGCAAGAATGATGGCATCGTATTCGCCGTTATCCAGTTTGCTCAGGCGAGTGCCGACGTTGCCGCGCAGGGAGCGGATAATCAGATCCGGACGGCGTTCAGCCAGTTGGCACTGGCGACGTAAACTGGACGTCCCGACGATACTGCCTGCCGGTAACGCATCCAGACTGTCATAGTTATTGGACACAAAGGCATCGCGAGGATCTTCACGCTCACAAATAGTGACCAGTCCCAGACCTTGCGGGAATTCAACCGGCACATCTTTCATTGAGTGTACGGCGATATCGGCGCGATTTTCGAGGAGCGCGACTTCCAGCTCTTTTACAAATAAGCCTTTTCCGCCTACTTTCGCCAGCGGCGTATCAAGAATCACATCGCCGCGCGTCACCATCGGTACCAGTTCAACGACCAGGCCCGGATGGCTCGCCATCAACTTGTCTTTGACATAGTGTGCCTGCCAGAGTGCAAGTGGGCTTTGGCGTGTGGCAATTCTTAAAACATTGTCTAACATGCTTGTTACCGTCATTATCATCCGTGGTCCATCCTAACATCCTTGCCAGAGTGATGTCAGTGTTGTGGTGAAACGTAGACGCCTGCGCAAACCGTAAAATGAGGTCTGGCAGTGGATCCTGACAGGCGTTTCACGCCGTTGTAATAAGGAATTTACAGAGAATAAACGGTGCTACACTTGTATGTAGCGCATCTTTCTTTACGGTCAATCAGCAAGGTGTTAAATTGATCACGTTTTAGACCATTTTTTCGTCGTGAAACTAAAAAAACCAGGCGCGAAAAGTGGTAACGGTTACCTTTGACATACGAAATATCCCGAATGCCGCGTGTTACCGTTGATGTTGGCGGAATCACAGTCATGACGGGTAGCAAATCAGGCGATACGTCTTGTACCTCTATATTGAGACTCTGAAACAGAGACTGGATGCCATAAATCAATTGCGTGTGGATCGCGCGCTTGCTGCTATGGGGCCTGCATTCCAACAGGTCTACAGTCTACTGCCGACATTGTTGCACTATCACCATCCGCTAATGCCGGGTTACCTTGATGGTAACGTTCCCAAAGGCATTTGCCTTTACACGCCTGATGAAACTCAACGCCACTACCTGAACGAGCTTGAACTGTATCGTGGAATGTCAGTACAGGATCCGCCGAAAGGTGAGCTTCCAATTACTGGTGTATACACCATGGGCAGCACCTCGTCCGTAGGGCAAAGTTGTTCCTCTGACCTGGATATCTGGGTCTGTCATCAATCCTGGCTCGATAGCGAAGAGCGCCAATTGCTACAACGTAAATGTAGCCTGCTGGAAAACTGGGCCGCCTCGCTGGGTGTGGAAGTCAGCTTCTTCCTGATTGATGAAAACCGCTTCCGTCATAATGAAAGCGGCAGCCTGGGGGGCGAAGATTGTGGCTCCACCCAGCATATACTGCTGCTTGACGAATTTTATCGTACCGCCGTGCGTCTCGCCGGTAAGCGTATTCTGTGGAATATGGTGCCGTGCGACGAAGAAGAGCATTACGACGACTATGTGATGACGCTTTACGCGCAGGGCGTGCTGACGCCAAATGAATGGCTGGATCTCGGTGGCTTAAGCTCGCTTTCTGCTGAAGAGTACTTTGGTGCCAGCCTTTGGCAGCTCTACAAGAGTATCGATTCCCCATACAAAGCGGTACTGAAAACACTGCTGCTGGAAGCCTATTCCTGGGAATACCCGAACCCACGTCTGCTGGCGAAAGATATCAAACAGCGTTTGCACGACGGCGAGATTGTATCGTTTGGTCTCGATCCATACTGCATGATGCTGGAGCGTGTTACTGAATACCTGACGGCGATTGAAGATTTTACCCGTCTGGATTTAGTACGTCGCTGCTTCTATTTAAAAGTGTGCGAAAAGCTCAGCCGTGAACGCGCCTGCGTAGGCTGGCGTCGCGCAGTGTTGAGCCAGTTAGTGAGCGAGTGGGGTTGGGACGAAGCTCGTCTGGCAATGCTCGATAACCGCGCTAACTGGAAGATTGATCAGGTGCGTGAGGCGCACAACGAGTTGCTCGACGCGATGATGCAGAGCTACCGTAATCTGATCCGCTTTGCGCGTCGCAATAACCTTAGCGTCTCCGCCAGTCCGCAGGATATCGGCGTGCTGACGCGTAAGCTGTATGCCGCGTTTGAAGCATTACCAGGTAAAGTGACGCTGGTAAACCCGCAGATTTCACCCGATCTCTCGGAACCGAATCTGACCTTTATTTATGTGCCGCCGGGCCGGGCTAACCGTTCAGGTTGGTATCTGTATAACCGCGCGCCAAATATTGAGTCGATCATCAGCCATCAGCCGCTGGAATATAACCGTTACCTGAATAAACTGGTGGCGTGGGCATGGTTTAACGGCCTGCTGACCTCGCGCACCCGTTTGTATATTAAAGGTAACGGCATTGTCGATTTGCCTAAGTTGCAGGAGATGGTCGCCGACGTGTCGCACCATTTCCCGCTGCGCTTACCTGCACCGACACCGAAGGCGCTCTACAGCCCGTGTGAGATCCGCCATCTGGCGATTATCGTTAACCTGGAATATGACCCGACAGCGGCGTTCCGCAATCAGGTGGTGCATTTCGATTTCCGTAAGCTGGATGTCTTCAGCTTTGGCGAGAATCAAAATTGCCTGGTAGGTAGCGTTGACCTGCTGTACCGCAACTCGTGGAACGAAGTGCGTACGCTGCACTTCAACGGCGAGCAATCGATGATCGAAGCCCTGAAAACTATTCTCGGCAAAATGCATCAGGACGCCGCACCGCCAGATAGCGTGGAAGTCTTCTGTTATAGCCAGCATCTGCGCGGCTTAATTCGTACTCGCGTGCAGCAACTGGTTTCTGAGTGTATTGAATTGCGTCTTTCCAGCACCCGCCAGGAAACCGGGCGTTTCAAGGCGCTGCGCGTTTCTGGTCAAACCTGGGGGTTGTTCTTCGAACGCCTGAATGTATCGGTACAGAAACTGGAAAACGCCATCGAGTTTTATGGCGCGATTTCGCATAACAAACTGCACGGCCTGTCAGTGCAGGTTGAAACCAATCACGTCAAATTACCGGCGGTGGTGGACGGCTTTGCCAGCGAAGGGATCATCCAGTTCTTTTTCGAAGAAACGCAAGACGAGAATGGCTTTAATATCTACATTCTCGACGAAAGCAACCGGGTTGAGGTATATCACCACTGCGAAGGCAGCAAAGAGGAGCTGGTACGTGACGTCAGTCGCTTCTACTCGTCATCGCATGACCGCTTTACCTACGGCTCAAGCTTCATCAACTTCAACCTGCCGCAGTTCTATCAGATTGTGAAGGTTGATGGTCGTGAACAGGTGATTCCGTTCCGCACAAAATCTATCGGTAACATGCCGCCTGCCAATCAGGATCACGATACGCCGCTATTACAGCAATATTTTTCGTGATGAACGTGCCGGAAAGCGAGGCTTATCCGGCATGCAATCTTAGCGGAAACTGACTGTTTCACCCGCCTGCTGCGTCGCCGCCTGTTCCAGCAAATCCCAGAAGGTTTCGCCGCTGCGATCACAAATCCACTCATCGCCTTTCAGGTCAAAATGGTAGCCGCCCTGTTTGGTTGCCAGCCATACCTGGTGCAGCGGCTCCTGGCGGTTGATAATGATTTTGCTGCCATTCTCAAAGGTAATGGTCAGTACGCCGCCGTTGATTTCGCAGTCGATATCGCTGTCGCCATCCCAGTCGTCCAGGCGTTCTTCAATGGTCAGCCAGAGTTGATCAGCCAGGCGATGAAATTCACTGTCGTTCATTGTTGTATCCTGTTTTTAAGTGATGGCGGCAGTATAGCGGCATGGGGTCAGGGCTTCAAAGTTTGCACCTCTGCGGCTGCGTTCCGGCACGATTCATCCGTCACCGGAATAATGATGTCTCTGTGTAGCGAAAGATTTGTCTCTTCATTAGGGCGCAGTTACACCACGTCTTTCCCTGTTTCTGGTAAACATTATGATCAGGTTTACCGAGCGAGCATCCTCACGCTGACGGAACTAAAAAAGACAACAAACAAACCACATTGCGATAGTGCATAAAGCCATCCTGGCGCGAGGTGCCGATCACGAAACTACCAGCAAAACATAAATCCCCACGAGTAAGCGTTATACTCGCAGCATTTCCTCACTTTTCAGACTTCATAAAGAGTCGCTAAACGCTTGCTTTTACGTCTTCTCCTGCGATGATAGAAAGCAGAAAGCGATGAACTTTACAGGCAATCCATAATGAAAAACGTGTTTAAGGCACTCACTGTATTACTTACTCTCTTCAGCCTGACGGGCTGCGGTCTGAAAGGTCCGCTCTATTTCCCGCCTGCAGATAAAAACGCACCGCCGCCGACCAAACCGGTAGAGACGCAAACGCAATCCACGGTGCCGGATAAAAACGATCGCGCCACTGGCGATGGTCCATCCCAGGTGAATTACTAAAAGTCAGTTTCTGTACCCGCGTGATTGGAGTAAATGATGCAGTTCTCGAAAATGCATGGCCTTGGCAACGATTTTATGGTCGTCGACGCGGTAACGCAGAATGTCTTTTTTTCACCGGAGCTGATTCGTCGCCTGGCTGATCGGCACCTGGGGGTAGGGTTTGACCAACTGCTGGTGGTTGAGCCGCCGTATGATCCTGAACTGGATTTTCACTATCGCATTTTCAATGCTGATGGCAGTGAAGTGGCGCAGTGCGGCAACGGTGCGCGCTGCTTTGCCCGTTTTGTGCGTCTGAAAGGACTGACCAATAAGCGTGATATCCGCGTCAGCACCGCCAACGGGCGGATGGTTCTGACCGTCACCGATGATGATCTGGTCCGCGTAAATATGGGCGAACCCAACTTCGAACCTTCCGCCGTGCCGTTTCGCGCTAACAAAGCGGAAAAGACCTATATTATGCGCGCCGCCGAGCAGACAATCTTATGCGGCGTGGTGTCGATGGGAAATCCGCATTGCGTGATTCAGGTCGATGATGTCGATACCGCGGCGGTAGAAACGCTTGGTCCTGTTCTGGAAAGCCACGAGCGTTTTCCGGAGCGCGCCAATATCGGTTTTATGCAAGTGGTTAAGCGCGAGCATATTCGTTTACGCGTTTATGAGCGTGGGGCAGGAGAAACCCAGGCCTGCGGCAGCGGCGCGTGTGCGGCGGTTGCAGTAGGGATTCAGCAAGGTTTGCTGGCCGAAGAAGTACGCGTGGAACTCCCCGGCGGTCGTCTTGATATCGCCTGGAAAGGTCCGGGTCACCCGTTATATATGACTGGCCCGGCGGTACATGTCTACGACGGATTTATTCATCTATGAAGCAACCAGGGGAAGAACTGCAGGAAACACTCACGGAGCTTGATGACCGGGCGGTTGTCGATTATCTGATTAAAAATCCTGAGTTTTTTATCCGTAATGCGCGCGCAGTAGAAGCGATACGTGTGCCGCATCCGGTACGCGGCACCGTTTCGTTGGTCGAGTGGCACATGGCCCGCGCACGTAATCATATTCATGTTCTGGAAGAGAACATGGCGCTGTTGATGGAACAGGCTATCGCCAACGAAGGCCTGTTTTATCGCCTACTCTACCTGCAGCGCAGTCTCACCGCCGCCAGCAGTCTCGACGATATGCTGATGCGCTTTCACCGCTGGGCGCGCGATCTCGGCCTGGCAGGTGCGAGTCTGCGCCTGTTTCCGGATCGCTGGCGCTTAGGTGCGCCGTCGAACCACACTCATCTGGCATTAAGCCGTCAGTCTTTCGAACCGCTGCGTATTCAGCGTTAGGGGCAGGAACAGCACTATCTTGGGCCGCTTAACGGACCAGAGCTGCTGGTGGTGCTACCGGAAGCGAAAGCGGTGGGATCGGTGGCGATGTCGATGCTGGGAAGCGATGCTGATTTGGGTGTCGTGCTGTTTACCAGTCGCGATGCCAGTCACTATCAACAAGGGCAAGGAACGCAGTTACTTCATGAAATTGCGCTGATGTTGCCGGAGCTTCTGGAGCGTTGGATTGAACGCGTATGACCGATTTACACACCGATGTAGAACGCTACCTACGTTATCTGAGCGTGGAGCGCCAGCTTAGCCCGATAACCCTGCTTAACTACCAGCGTCAGCTTGAGGCGATCATCAATTTTGCCAGCGAAAACGGCCTGCAAAGCTGGCAGCAATGTGATGTGACGATGGTGCGCAATTTTGCTGTACGCAGTCGCCGTAAAGGGCTGGGAGCAGCAAGTCTGGCGTTACGGCTTTCTGCGCTACGTAGCTTTTTTGACTGGCTGGTCAGCCAGAACGAACTCAAAGCTAACCCGGCGAAAGGTGTTTCGGCACCGAAAGCGCCGCGTCATCTGCCGAAAAACATCGACGTCGACGATATGAATCGGCTGCTGGATATTGATATCAATGATCCCCTCGCTGTACGCGACCGTGCAATGCTGGAAGTGATGTACGGCGCGGGTCTGCGTCTTTCTGAGCTGGTGGGGCTGGATATTAAACACCTCGACCTGGAGTCTGGTGAAGTGTGGGTTATGGGGAAAGGCAGCAAAGAGCGCCGCCTGCCGATTGGTCGCAACGCTGTGGCGTGGATTGAGCACTGGCTTGATTTGCGCGACCTGTTTGGTAGCGAAGACGACGCGCTTTTTCTGTCGAAACTGGGCAAGCGTATCTCCGCGCGTAATGTGCAGAAACGCTTTGCCGAATGGGGCATAAAACAAGGGCTGAATAATCACGTTCATCCGCATAAATTACGTCACTCGTTCGCCACGCATATGCTGGAGTCGAGCGGCGATCTTCGTGGTGTGCAGGAGCTGCTGGGTCATGCCAACCTCTCCACCACGCAAATCTATACTCATCTTGATTTTCAACACCTTGCCTCGGTGTACGATGCGGCGCATCCACGCGCCAAACGGGGGAAATAATGCGTTTTTACCGGCCTTTGGGGCGCATCTCGGCGCTCACCTTTGACCTGGATGATACCCTTTACGATAACCGTCCGGTGATTTTGCGCACCGAGCGAGAGGCGCTTACCTTTGTGCAAAATTATCATCCGGCGCTGCGCAGCTTCCAGAATGAAGATCTGCAACGCCTGCGCCAGGCGGTACGGGAAGCGGAACCCGAGATTTATCACGACGTGACGCGCTGGCGTTTTCGTTCGATTGAACAAGCGATGCTCGACGCCGGGCTGAGTGCCGAAGAAGCCAGTGCAGGCGCACACGCAGCAATGATCAACTTTGCCAAATGGCGCAGCCGAATCGACGTCCCGCAGCAAACTCACGACACCTTAAAACAGCTGGCGAAGAAATGGCCGCTGGTGGCGATCACCAACGGTAACGCCCAGCCGGAGCTGTTTGGTTTGGGGGATTATTTTGAGTTTGTGCTGCGCGCTGGCCCGCACGGGCGCTCAAAACCGTTCAGCGATATGTACTTTTTGGCTGCGGAAAAACTCAACGTGCCGATCGGCGAGATCTTACATGTTGGGGACGATCTCACCACTGACGTGGGTGGGGCAATTCGCAGCGGAATGCAGGCTTGTTGGATCAGACCGGAAAATGGCGATCTGATGCAAACCTGGGACAGCCGTTTACTGCCGCATCTGGAAATTTCCCGGTTGGCATCTCTGACCTCGCTGATATAATCAGCAAATCTGTATATATACCCAGCTTTTTGGCGGAGGGCGTTGCGCTTCTCCGCCCAACCTATTTTTACGCGGCGGTGCCAATGGACGTTTCTTACCTGCTCGACAGCCTTAATGACAAACAGCGCGAAGCGGTGGCCGCGCCACGCAGCAACCTTCTGGTGCTGGCGGGCGCGGGCAGTGGTAAGACGCGCGTACTGGTGCATCGTATCGCCTGGTTGATGAGCGTGGAAAACTGCTCGCCATACTCGATTATGGCGGTGACGTTTACCAACAAAGCGGCGGCGGAGATGCGTCATCGTATCGGGCAACTGATGGGCACGAGCCAGGGCGGTATGTGGGTCGGCACCTTCCACGGGCTGGCGCACCGTTTGCTGCGTGCGCACCATATGGACGCCAATCTGCCGCAGGATTTCCAGATCCTCGACAGTGAAGACCAGCTACGCCTGCTTAAGCGTCTGATCAAAGCCATGAACCTCGACGAGAAGCAGTGGCCGCCGCGGCAGGCAATGTGGTACATCAACAGCCAGAAAGATGAAGGCCTGCGTCCGCATCATATTCAAAGCTACGGTAATCCGGTGGAGCAGACCTGGCAGAAGGTGTATCAGGCGTATCAGGAAGCGTGTGACCGCGCGGGCCTGGTGGACTTCGCCGAGCTGCTGCTGCGCGCTCACGAGTTGTGGCTTAACAAGCCGCATATCCTGCAACACTACCGCGAACGTTTTACCAATATCCTGGTGGACGAATTCCAGGATACCAACAACATTCAGTACGCGTGGATCCGCCTGCTGGCGGGCGACACCGGCAAAGTGATGATCGTCGGTGATGACGACCAGTCAATCTACGGCTGGCGCGGGGCGCAGGTGGAGAATATTCAGCGTTTCCTTAATGATTTCCCCGGTGCCGAAACTATTCGTCTGGAGCAAAACTACCGCTCTACCAGCAATATTCTGAGCGCCGCTAACGCCCTGATTGAAAACAATAACGGGCGTCTGGGTAAAAAACTGTGGACCGATGGCGCGGACGGTGAGCCTATTTCCCTCTATTGCGCTTTTAACGAACTCGATGAAGCGCGTTTTGTGGTTAACCGCATCAAAACCTGGCAGGACAACGGCGGAGCGCTTGCCGAGTGCGCCATTCTCTACCGCAGCAACGCCCAGTCGCGGGTGCTCGAAGAGGCGTTATTGCAGGCCAGTATGCCGTACCGTATTTACGGCGGGATGCGCTTCTTCGAACGCCAGGAAATCAAAGATGCGCTCTCGTATCTGCGCCTGATTGCCAACCGCAACGACGACGCGGCCTTTGAGCGTGTGGTGAATACGCCAACGCGGGGTATTGGTGACCGGACGCTGGACGTGGTACGTCAGACATCGCGCGATCGCCAGTTAACACTCTGGCAGGCATGTCGTGAGCTGTTGCAGGAAAAAGCCCTCGCCGGGCGAGCTGCCAGCGCCTTGCAGCGATTTATGGAATTAATCGACGCCTTAGCGCAGGAAACTGCCGATATGCCGCTGCATGTACAGACTGACCGGGTAATTAAAGACTCCGGCCTGCGTACCATGTATGAGCAGGAGAAGGGCGAAAAAGGTCAGACGCGTATCGAAAACTTAGAGGAACTGGTGACGGCAACGCGCCAGTTCAGCTACAACGAAGAAGACGAAGATTTAATGCCGCTGCAGGCGTTCCTCTCCCATGCGGCACTGGAAGCAGGTGAAGGGCAGGCGGATACCTGGCAGGATGCGGTGCAGTTGATGACGCTACACTCGGCGAAAGGCCTGGAGTTCCCGCAGGTGTTTATCGTTGGTATGGAAGAGGGCATGTTCCCAAGCCAGATGTCGCTGGATGAAGGCGGGCGTCTGGAAGAAGAACGCCGTCTGGCCTACGTTGGCGTAACCCGCGCGATGCAGAAACTGACGCTGACCTACGCGGAAACCCGCCGTCTGTATGGTAAAGAGGTTTACCATCGCCCGTCGCGCTTTATCGGCGAGCTGCCGGAAGAGTGTGTGGAAGAGGTGCGCCTGCGCGCCACGGTAAGCCGCCCGGTCAGCCATCAGCGGATGGGTACGCCGATGGTCGAGAACGACAGCGGCTACAAGCTCGGCCAGCGCGTACGCCACGCTAAGTTTGGTGAAGGCACCATTGTCAATATGGAAGGCAGCGGTGAGCATAGCCGTTTGCAGGTGGCATTTCAGGGCCAGGGTATTAAATGGCTGGTGGCGGCATACGCCCGGCTGGAGTCGGTGTAACGTTGCCGGATGCGGTGCTGCGCACCTTATTTGGCCTAAAAAATCATTCAGATTCAATAAATTGCAACGTCATGTAGGCCGGATAGGGCGTTTACGCCGCATCCGGCATCTGCGCCATCTTCAGTATCTGACACAAAACTATCGTTTTAACCTTTCCGCTCGACGGAAATCATGGTGACAAAAGGATAGCGTTGCCACGGGATTGCCCCGCCTTTCATATACATATGTGAAATCGTGCCATCAAGATAAAGCAGCTGCTCAACGTTCAGTTTCGCTTTGGCATAACAGGCAAAATCATAAAAATTTGTTGCCTGCTGGCTCAACAAAAACACGGCGTTCCCATGTTTATTAATCCCAACACCGTTACGAATTTTGCTTGAGGCGACGTTGGGATGAATACGCGGATTAATTACACCGTTTTCCATCAACATTGGCCCTGACTGCACCGCAAACTGAATCTCTTTACTGGTTTTGAAGGCATCCAGACGAACGATGCCGACTTTATCTCCCGCGACATAAAACACGCCGCCAGGACGGATAAAGAAATTCCCTTCACCTGAAGCGAGATTTAACGCCACCTTCTGCTGACCGTTTTCGATGTACAAACCGAGCGGCGCATAGCTTTCATCATAGATGCCGCCGTTCATCGCCATCTGCACCTGACCCTGACTATTAATATCCGCCAGCAGAGCATGTAACGTTCCCCACGCTTCGCCATTGGCTTTTTGCCAGTACATTTTCACCCGCTCTGTTTGAGGATTAACGGTATACGCCTGTACGGTCAGCGTCGGATCTGAGAGTGCGCAATCATCAGCGGCAACAGCAAACAAGGGAAGTAAGGTGAGGGCGAGAAAAATCCGTTTGAGATTCAAGGTGATCATTCCTTTACCAATGAGTAGCTGATGCGCCATTATAGGTCCTGGATGTGGGATTTTTTTATCCTGTTAGCGACCTTGACGAGTACCAAAAAGCGCGAAGTTCAACTATTGTTCTGTGGTGTTCTGTTGCGTGTTGACGGCAAAATTTTGCTGGCGTAACATGCGCGCACGATCACTCTAAGAGGACATTCGCCTTGGACACACCCAGTAGATACTGGCTCACTATCCTGTCATCCAGGATCAACTCCTAAGGCTATCCCTTTTTGCTGATAGCCTTAGCGGTTGTCAGCGACCTCAATTTTTCCCGTCGCGCTGAGTCAGGCTGTTTAATGGTCTGAAACCCAATTTGTTTCTGTGTGCCCACCGAACTGTCCGATATTTTAAGCATTGGGAGTCCCGGTCATGCTGAGCGCATTTCAACTGGAAAATAACCGACTGACCCGGCTGGAAGTCGAAGAGTCACAACCCCTTGTAAATGCAGTATGGATTGATCTTGTCGAACCGGACGACGACGAGCGACTGCGCGTACAATCTGAACTTGGCCAGAGCCTGGCAACCCGCCCGGAACTGGAAGACATCGAAGCATCGGCACGTTTCTTTGAAGACGACGACGGCCTGCATATTCACTCCTTCTTCTTCTTTGAAGATGCGGAAGATCACGCCGGTAACTCCACTGTGGCATTTACCATCCGTGATGGTCGTCTGTTTACTCTGCGTGAGCGTGAACTGCCCGCTTTTCGTCTGTATCGTATGCGTGCCCGTAGCCAGTCGATGGTAGACGGTAACGCCTACGAGTTGCTGCTGGATCTGTTCGAAACCAAAATCGAACAGTTGGCAGATGAAATTGAAAATATCTATAGCGACCTGGAGCAGTTGAGCCGGGTGATTATGGAAGGGCATCAGGGCGATGAGTACGACGAGGCGCTCTCCACTCTGGCGGAACTGGAAGATATCGGCTGGAAAGTTCGCCTGTGTCTGATGGATACCCAGCGCGCGCTCAACTTCCTGGTGCGTAAAGCGCGTTTACCGGGTGGGCAACTGGAGCAGGCGCGTGAAATCCTGCGAGATATCGAATCCCTGCTGCCGCATAACGAATCCCTGTTCCAGAAGGTGAACTTCCTGATGCAGGCGGCAATGGGTTTTATCAACATCGAGCAGAACCGCATCATCAAAATCTTCTCGGTGGTATCCGTGGTATTCCTGCCGCCGACGCTCGTTGCTTCCAGCTATGGCATGAACTTTGAGTTTATGCCAGAACTGAAGTGGAGCTTCGGCTACCCTGGCGCGATTATCTTTATGATCCTCGCGGGCCTGGCACCGTATCTGTACTTTAAGCGGAAGAACTGGTTGTAAAAAACGAGAGCGGTGGCTTAGTCTGGCTAAGCCACCTGTTATTCAAAGGCTCCAGGTATTTAACCCTTTTACCTCTTTCTCATAGAACCATTTGTTCGTGTTAACAGCAACATAGGCTGCTACGGCAATTCCCAGAATGTTAACGCCAATTAGTGCACCAACAAATAGAGATAGAATACCAAGTAATAGAACTATAATTGCTTTTTTCCATAACCCCAGGACAAATAAATATATCCAGGAACAGAAGAAAGCAATGAAATTCATTTGAATAGTTAAGCGTTGTCTTACTTTTAATGCTTTAAATGCTGCTTTATATTCTGGTGTTGCCCCCCAGAATCCAGGAAAACCATGTTGATCATAAAAATTAAATCGGTATTTCCATTTTTCACTTAATGAACCATCGTTCATATATTCCTTACTCATAAATACTCCATAACTATTGTTTTGATGAATCAGTAGGTGCAAGCATTAGCATACTGAAAGTGGAAAAATAACAAATCAAAAAAATCATCGAACCATTGCCTGAACAGGCAAAATCTTCGGCTATCATTGTGATGATAGAGATGATATATACTGCTAATGTACCAAAAACATAAGTTTTTATATAGATGAAACCACTATCACGGAGTCGCTGGCAATTCATGTTGATGACGAGATAATGGAGTACGATGGTAGAGACTATAACAAGAAAGCCTGCTTCTCCATCGTTAAAAAAGATAATAAGAAAGGCAAAAATGAAATTTATTAAAATAAATGAAAATATATAACGACGTCTGGAAATCTTACCGTTAGATGTTGGGATAAATATACGTAACATAAATTTTACATCCTTGTATGAGTCTCCGGTCAGCATGGCAATATGCCCACTCTCATGCAAGAGTCGGCATATTTTTTCAGAATATATTTATTTTTTATTTGGACGTTCTACGCTGCGTATAAATCGCATCCATCACAAAAATTGCCAGCGCCACCCAAATAAAGGCGAAAGTCACCATCTTATCGGCACCCGGTTTTTCACCATAAAACGTCACAGCCAGCAGGAACATCAGCGTCGGGCCAATGTACTGGAAAAAGCCTAACGTTGAGAGACGCAAGCGCGTGGCAGCGGCGGTAAAACACAACAGCGGTACGGTAGTGACAATACCGGCGGCGATCAGCAGTAAATTCAGCGACATCGGGTTTTGCCCCATATGGCTGGTTGAGCTGTCGGCAATAGCAAACAGGTAAATTGCCGCCACGGGCAGCAGCCACATGGTTTCGATTAACATGCCGGTTTGCGCTTCAACGGCAATCTTCTTGCGTACCAGACCGTAGAAGGCAAAACTAAATGCCAGTCCCAGCGCGATAATAGGTAGCGAACCAAAAGTCCACAGCTGGACTAACACGCCACATATCGCCAGAATCACCGCCAGCCATTGCATCCGGCGGAATCGCTCGCCGAGGAAAATCATCCCCAGCACAATGTTCACCAGCGGGTTAATAAAGTAACCAAGGCTCGCTTCCAGCATATGGTGATTGTTCACCGCCCAGATAAACAGTAGCCAGTTGCCACCAATCAGCACGGCAGAGACTGCCAGCATAAAAATTTTCTGTGGCGTCTGAATCAGCGTTTTTAAATAGGACCACTGGCGGCAAATGCTCATCAGCACCACCATAAAGAAAAACGACCAGATCACGCGATGCGTCAGGATTTCATCGGCGGGCACGTAGTAAATCAACTTGAAGTACGCTGGCGCTATACCCCAAATAAAATAAGCGGCAAGAGCGAGTAATACGCCCTGCCGCGTTTGTTTTGCATCCATCGGGAATACTCATTTTTAATTGGTAACAGCAGTTTACCTGCTTTTATGTCTTCAACCTACCATATAGGTGGCGGTGGCACTGGCAATATAAAGCTGTTCTTCATTGTGTAATTCAACGCGGGCGACGGCGACTTTATTGCCTGCACGCAACAGGCTACTAGTAGCAGTAAAACGCTCGCCCCTGCCTGGGCGCAGATAATCAACGCGAAGATCAATGGTCCCCATCCGCGATAGCCGCTGGCGTAGTTCATCTTCACTGATGGTTTCGTGGCGGGTTAAGGTACTTCCCACGCACACCAGACCGGCGGCGACATCCAGCGCCGACGCAATGACCCCGCCGTGCAAAATGCTTTGCGCCCAGTTGCCCACCATCATTGGCTGATTTTTAAAGGCCAGCTGTGCGAACTCTTTTTCGTAACGCTCCAGTTCCATCCCCAATGCGCGGTTAAATGGCATGTGATAAACAAACATCTCACCCACTAATTTCAGGGCTTGTTCAGCGGTCAGTACGGCAGACATATCATCCTTACACTTCATTGGTTAATGAAATGTTGATTTTATGCTTCTTTGTTGTTGGTTTCTACTTTAGGAAGGGATAACTAACGGCTATGGAGTTAAGTATGTAAAATAGCCCGCAGAAAAATATTCACCTTATCAATAATTCGTTACGGAGAACACGACCGATGCGGACTCTGCAGGGCTGGTTGTTGCCGGTGTTTATGTTGCCTATGGCAGTATATGCACAAGAGGCAACGGTGAAAGAGGTGCATGACGCGCCAGCGGTGCGTGGCAGTATTATCGCCAATATGCTGCAGGAGCATGACAATCCGTTCACGCTCTATCCTTATGACACCAACTACCTCATTTACACCCAAACCAGCGATCTGAATAAAGAAGCGATTGCCAGTTACGACTGGGCGGAAAATGCGCGTAAGGATGAAGTAAAGTTTCAGTTGAGCCTGGCATTTCCGCTGTGGCGTGGGATTTTAGGCCCGAACTCGGTGTTGGGTGCGTCTTATACGCAAAAATCCTGGTGGCAACTGTCCAATAGCGAAGAGTCTTCACCGTTTCGTGAAACCAACTACGAACCGCAATTGTTCCTCGGTTTTGCCACCGATTACCGTTTTGCAGGTTGGACGCTGCGCGATGTGGAGATGGGGTATAACCACGACTCTAACGGGCGTTCCGACCCGACCTCCCGCAGCTGGAACCGCCTTTATACTCGCCTGATGGCAGAAAACGGTAACTGGCTGGTAGAAGTGAAGCCGTGGTATGTGGTGGGTAATACTGACGATAACCCGGATATCACCAAATATATGGGTTACTACCAGCTTAAAATCGGCTATCACCTCGGTGATGCGGTGCTCAGTGCGAAAGGACAGTACAACTGGAACACCGGCTACGGCGGCGCGGAGTTAGGCTTAAGTTACCCGATCACCAAACATGTGCGCCTTTATACTCAGGTTTACAGCGGCTATGGCGAATCGCTCATCGACTATAACTTCAACCAGACCCGTGTCGGTGTGGGGGTTATGCTAAACGATTTGTTTTGATGAACGGTTGAGTGGTTGGCAAATCTGGAATCCAGCATCCAGGATTACCCTCTCAGAGACTAAAAGCATTGCAGTTTCTCGCGCAGGCGCTGAAAATAGCGCCTGTTTTTATTTCAGGCAATCGGGGTGAATGTGGCGCAGGCGGAAGTGTTGAATCTGGAGTCCGGAGCTAAACAGGTTTTACAAGAAACCTTTGGCTACCAACAGTTTCGCCCCGGCCAGGAAGAAATTATCGACACTGTGCTTTCCGGCCGCGATTGCCTCGTCGTCATGCCCACTGGTGGCGGAAAATCCCTTTGCTATCAAATCCCTGCCTTATTGCTAAACGGCCTTACCGTGGTTGTTTCACCGCTGATTTCGTTGATGAAAGATCAGGTGGATCAACTGCAAGCCAACGGCGTGGCGGCGGCGTGCCTTAACTCGACGCAAACCCGCGAACAGCAACTTGAAGTGATGACAGGCTGCCGCACCGGGCAAATTCGTCTGCTTTATATCGCCCCGGAACGCCTGATGCTGGATAACTTTCTTGAGCATCTGGCGCACTGGAATCCGGTGTTATTAGCCGTTGATGAAGCGCACTGTATCTCCCAATGGGGCCACGATTTCCGCCCGGAATATGCCGCGCTCGGTCAGTTGCGCCAGCGGTTCCCGACGCTGCCGTTTATGGCGCTGACCGCCACAGCCGACGACACCACGCGCCAGGATATCGTGCGCCTGCTGGGGCTGAACGATCCGCTGATTCAAATCAGCAGTTTTGACCGTCCGAATATTCGCTACATGCTGATGGAGAAGTTCAAACCGCTCGATCAGTTGATGCGCTACGTGCAGGAACAGCGCGGTAAGTCAGGCATTATCTACTGCAACAGCCGCGCGAAAGTAGAAGACACCGCTGCGCGCCTGCAAAGCAAGGGAATTAGCGCGGCGGCCTATCATGCCGGGCTGGAAAATAATGTTCGCGCCGATGTGCAGGAAAAATTCCAGCGCGATGACCTGCAAATTGTGGTGGCGACGGTGGCGTTCGGCATGGGCATCAATAAACCAAACGTTCGCTTCGTGGTCCACTTTGATATTCCGCGCAATATCGAATCCTATTATCAGGAAACCGGACGCGCCGGGCGTGATGGCCTGCCCGCGGAAGCGATGCTGTTTTACGATCCGGCTGATATGGCGTGGCTGCGCCGTTGTCTGGAAGAGAAGCCGCAGGGGCAGTTGCAGGATATCGAGCGCCACAAACTCAATGCGATGGGCGCGTTTGCCGAAGCGCAAACTTGCCGTCGTCTGGTATTGCTGAACTATTTTGGCGAAGGGCGTCAGGAGCCGTGCGGGAACTGCGATATCTGCCTCGATCCGCCGAAACAGTACGACGGTTCAACCGATGCTCAGATTGCCCTTTCCACCATTGGTCGTGTGAATCAGCGGTTTGGGATGGGTTATGTGGTGGAAGTGATTCGTGGTGCTAATAACCAGCGTATCCGCGACTATGGTCATGACAAACTGAAAGTCTATGGCATGGGCCGTGATAAAAGCCATGAACATTGGGTGAGCGTGATCCGCCAGCTGATTCACCTCGGCCTGGTGACGCAAAATATTGCCCAGCATTCTGCCCTACAACTGACAGAGGCCGCGCGCCCGGTGCTGCGCGGCGAATCCTCTTTGCAACTTGCCGTGCCGCGTATCGTGGCGCTCAAACCGAAAGCGATGCAGAAATCGTTCGGCGGCAACTATGATCGCAAACTGTTCGCCAAATTACGCAAACTGCGTAAATCGATAGCCGATGAAAGTAATGTCCCGCCGTACGTGGTGTTTAACGACGCAACCTTGATTGAGATGGCTGAACAGATGCCGATCACCGCCAGCGAAATGCTCAGCGTTAACGGCGTTGGGATGCGCAAGCTGGAACGCTTTGGCAAACCGTTTATGGCGCTGATTCGTGCGCATGTTGATGGCGATGACGAAGAGTAGTCAGCAGCATAAAAAAGTGCCAGTATGAAGACTCCGTAAACGTTTCCCCCGCGAGTCAAATGTATGTTGATGTTATTTCTCACCGTCGCCATGGTGCACATTGTGGCGCTTATGAGCCCCGGTCCCGATTTCTTTTTTGTCTCTCAGACCGCTGTCAGTCGTTCCCGTAAAGAAGCGATGATGGGCGTGCTGGGCATTACCTGCGGCGTAATGGTTTGGGCTGGGATTGCGCTGCTTGGCCTGCATTTGATTATCGAAAAAATGGCCTGGCTGCATACGCTGATTATGGTGGGCGGTGGCCTGTATCTCTGCTGGATGGGTTACCAGATGCTACGTGGTGCACTGAAAAAAGAGGCGGTTTCTGCACCTGCGCCACAGGTCGAGCTGGCGAAAAGTGGGCGCAGTTTCCTGAAAGGTTTACTGACCAATCTCGCTAATCCGAAAGCGATTATCTACTTTGGCTCGGTGTTCTCATTGTTTGTCGGTGATAACGTTGGCACTACCGCGCGCTGGGGCATTTTTGCGCTGATCATTGTCGAAACGCTGGCGTGGTTTACCGTCGTTGCCAGCCTGTTTGCCCTGCCGCAAATGCGCCGTGGTTATCAACGTCTGGCGAAGTGGATTGATGGTTTTGCCGGGGCGTTATTTGCCGGATTTGGCATTCATTTGATTATTTCGCGGTGATGCCAGACGCGTCTTCAGAGTAAGTCGGATAAGGCGTTTACGCCGCATCCGACATTATTTTTCACGCATGCCTCGCCGATGCTAACAGCGCTCCCACCAGCATAAACAACGAGCCGAAAATCTTATTCAGCGCCTTCATCTGCTTTGGTCCTTTAATCCATAGAGCAATCCGTTGAGCAAGGGTGGCGTAACCGATCATCACAATAATATCGACCACAATAGTGGTGACGCCGAGCACGATATACTGCATCAGTTGCGGCTGTTGCGGCATGATGAATTGCGGAAATAGCGCCGCCAGAAACACAATACTTTTGGGATTGGTGAGATTCACAAAAACTGCGCGCTGGAACAAATGTCGACGCGATTGAGTAGAGGCCAGCGATTTAAGGTCAATTGCACCAGCGGCGCGCCACTGCTGGATTCCCAGCCAAATCAAGTAAGCCGCGCCTGCCCACTTCAACACTTCAAACGCAATCACTGAGCGGGAAAATAGCGTCCCCAACCCCACGCCAACCAGCACAATATGAATCGCCAGTCCGGTCTGAAGCCCAGCAATAGACGCCACCGCGCCGCGATAACCGTGGTTGAGCGAGGTGGTCATAGTGTTGATTGCACCAGAGCCTGGCGACAGGCTTAAAATGATCGATGTCAGCAGGTAGGCAAACCACCATTCTAAGGTCATGATGAACTCCCGGTGTGTCTATTTTTGTGCCACAATACGCTACTGTCGCAGCGTTGTGTCAGGCACGCTAAAAAAAACGATTTTACGTGGTTTAAGAGGCAGATTACCCGATGTTTCAGCAGCAAAAAGACTGGGAAACAAGAGAAAACGCGTTTGCTGCTTTTACCATGGGACCGCTGACTGATTTCTGGCGTCAGCGTGATGAAGCAGAGTTTACTGGTGTGGATGACATTCCGGTGCGCTTTGTCCGTTTTCGCGCACAGCACCATGACCGGGTGGTAGTCATCTGCCCGGGGCGTATTGAGAGCTACGTAAAATATGCGGAACTGGCCTATGACCTGTTCCATTTGGGGTTTGATGTCTTAATCATCGACCATCGCGGGCAGGGACGTTCCGGTCGCCTGTTAGCCGATCCGCATCTCGGGCATGTTAATCGCTTTAATGATTATGTTGATGATCTGGCGGCATTCTGGCAGCAGGAGGTTCAGCCCGGTCCGTGGCGTAAACGCTATATACTGGCACATTCGATGGGCGGTGCGATCTCCACATTATTTCTGCAACGCCATCCAGGTGTATGTGACGCCATTGCGCTAACTGCGCCAATGTTTGGGATCGTGATTCGTATGCCGTCATTTATGGCACGGCAGATCCTCAACTGGGCCGAAGCGCATCCACGTTTCCGTGATGGCTATGCAATAGGCACCGGGCGCTGGCGCGCGTTGCCGTTTGCTATCAACGTACTGACCCACAGCAGACAGCGATATCGACGTAACTTACGCTTCTATGCTGATGACCCAACGATTCGCGTCGGTGGGCCGACCTACCATTGGGTACGCGAAAGTATTCTGGCTGGCGAACAGGTGTTAGCCGGTGCGGGTGATGACGCCACGCCAACGCTTCTCTTGCAGGCTGAAGAGGAACGCGTGGTGGATAACCGCATGCATGACCGTTTTTGTGAACTCCGCACCGCCGCGGGCCATCCTGTCGAAGGAGGACGGCCGTTGGTAATTAAAGGTGCTTACCATGAGATCCTTTTTGAAAAGGACGCAATGCGCTCAGTCGCGCTCCACGCCATCGTTGATTTTTTCAACAGGCATAACTCACCCAGCGGAAACCGCTCTACAGAGGTTTAAATTTCTTATGTACCAGGTTGTTGCGTCTGATTTAGATGGCACGTTACTTTCTCCCGACCATACGTTATCCCCTTACGCCAAAGAAACTCTGAAGCTGCTCACCGCGCGCGGCATCAACTTTGTGTTTGCGACCGGTCGTCACCACGTTGATGTGGGGCAAATTCGCGATAATCTGGAGATTAAGTCTTACATGATTACCTCCAATGGTGCGCGCGTTCACGATCTGGATGGTAATCTGATTTTTGCTCATAACCTGGATCGCGACATTGCCAGCGATCTGTTTGGCGTAGTCAACGACAATCCGGACATCATTACTAACGTTTATCGCGACGACGAATGGTTTATGAATCGCCATCGCCCGGAAGAGATGCGCTTTTTTAAAGAAGCGGTGTTCCAATATGCGCTGTATGAGCCTGGATTACTGGAGCCGGAAGGCGTCAGCAAAGTGTTCTTCACCTGCGATTCCCATGAACAACTGCTGCCGCTGGAGCAGGCGATTAACGCTCGTTGGGGCGATCGCGTCAACGTCAGTTTCTCTACCTTAACCTGTCTGGAAGTGATGGCGGGCGGCGTTTCAAAAGGCCATGCGCTGGAAGCGGTGGCGAAGAAACTGGGCTACAGCCTGAAGGATTGTATTGCGTTTGGTGACGGGATGAACGACGCCGAAATGCTGTCGATGGCGGGGAAAGGCTGCATTATGGGCAGTGCGCACCAGCGTCTGAAAGACCTTCATCCCGAGCTGGAAGTGATTGGTACTAATGCCGACGACGCGGTGCCGCATTATCTGCGTAAACTCTATTTATCGTAATCGTTCTTTATTTGGTCAGTTGTCAACCTGATACTTCGCTACAATGGATACCCGTTAATCAAAGAGTTTTCCATTGTGGCGCTACTTATCATCACCACGATTCTGTGGGCCTTCTCCTTTAGCTTTTATGGCGAGTACCTTGCGGGGCACGTCGATAGCTATTTTGCGGTGCTGGTGCGCGTTGGCCTGGCGGCACTCGTTTTTCTGCCGTTTCTGCGTACCCGTGGCAATAGCCTGAAAACGGTCGGCCTGTATATGCTGGTGGGCGCGATGCAGCTTGGCGTGATGTATATGCTGAGTTTCCGCGCTTATCTCTACCTGACGGTTTCCGAGCTGCTGCTGTTCACCGTGCTGACGCCGCTCTACATCACGCTGATTTATGACATCATGAGTAAGCGCCGTCTGCGCTGGGGCTATGCCTTTAGCGCCTTGCTGGCGGTGATTGGTGCCGGGATTATTCGCTATGATCAGGTCACCGACCATTTCTGGACTGGCTTGCTGCTGGTGCAACTCTCCAATATCACTTTTGCCATTGGCATGGTGGGTTACAAACGCCTGATGGAAACTCGCCCGATGCCACAGCATAACGCCTTTGCGTGGTTCTATCTTGGCGCGTTTCTGGTGGCAGTGATTGCATGGTTCTTGCTGGGAAATGCGCAGAAAATGCCGCAAACCACGCTGCAATGGGGCATTCTGGTGTTTCTTGGCGTGGTGGCTTCCGGGATTGGCTACTTTATGTGGAACTACGGCGCGACGCAGGTGGACGCCGGAACGCTGGGCATTATGAATAATATGCACGTTTCGGCAGGGCTGCTGGTAAACCTGGCTATCTGGCACCAACAGCCGCACTGGCCAACGTTTATTACAGGCGCGCTGGTGATCCTGGCCTCACTGTGGGTGCATCGTAAGTGGGTCGCTCCGCGCTCTTCACAAACGGCAGATGATCGCAGGCGTGATTGCGCGCTGAGCGAATAAACGCTTCCGTAACTGGCTGACGCTGCTCGCCATCGCGCACGGCGGCGTACAGTCGGCTCCACAAGCCTTCGCCCAGGGTTTTGGTCACCACCAGACCCTGGCGCTCAAAACTCTCTACTACCCAATGCGGTAGCGCGGCAATACCCATCCGCGCGGCAACCATCTGAATCAACAATAAGGTGTTATCGACGCTTTTCAGTGACGGGCTGACGCCTGCCGGCTGAAGAAAATGCCGCCAGACATCCAGTCGACTACGCTGCACCGGATAAATTAATAGCGTCTCGCTGGCGAGATCTTCCGGTGTAATTCGCGTTTTCGCCGCCAGTGGATGGTCAGGTGCTAACACCAGACGCACTTCATAGTCGAACATCGGCGAATAATGCAGGCCACTGCGCGGCAGAATATCGGACGTCATTACCAGATCCAGCTCTCCCTGTTGCAAGGCGGGCTGCGGGTCAAATGTCACGCCCGATTTAAAATCCATCTCTACCTGCGGCCAGTTCTTATGGAAATTTTCTAACGCGGGTGTCAGCCACTGAATACAGCTATGGCACTCAATGGCAATGCGCAGACGCGTCTGCTGCGGTTCATTGCAGGCTTGCAGGGCCTGGCTAATTTGCGGCAGTACCTGGTTTGCCAGTTGCAACAGGATTTCTCCCTGCGGTGTAAAGCGTAGCGGCTGGCTCTTACGCACAAATAGCCGGAAGCCAAGGCGTTGTTCCAGATCGCTAAACTGGTGAGACAGGGCGGATTGCGTCTGATGCAACGTCGCCGCAGCGGCTGCGAGCGAGCCGCAGTTCCGCAACGCTTGTAGCGTTTTCAGGTGTTTTACTTCGATCATGAAAGTCCTTCACTTCGGCATGAATAATTTGCGCTTGAGGAATATACAGTAACCGCCAATTATGGATGTGTAAACATCTGGACGGCTAAAATCCTTCGTCTTTTAAATTTATGGTGCGTTGGCTGCGTTTCTCCACCCCGGTCACTTACTTCAGTAAGCTCCCGGGGATGAATAAACTTGCCGCCTTCCCTAAATTCAAAATCCATAGGATTTACATATAATTAGAGGAAGAAAAAATGACAATATTGAATCACACCCTCGGTTTCCCTCGCGTTGGCCTGCGTCGCGAGCTGAAAAAAGCGCAAGAAAGTTATTGGGCGGGGAACTCCACGCGTGAAGAACTGCTGGCGGTAGGGCGTGAATTGCGTGCTCGTCACTGGGATCAACAAAAGCAAGCGGGTATCGACCTGCTGCCGGTGGGCGATTTTGCCTGGTACGATCATGTACTGACCACCAGTCTGCTGCTGGGTAACGTTCCGGCGCGTCATCAGAACAAAGATGGTTCGGTAGATATCGACACCCTGTTCCGTATTGGTCGTGGACGTGCGCCGACTGGCGAACCTGCGGCGGCAGCGGAAATGACCAAATGGTTTAACACCAACTATCACTACATGGTGCCGGAGTTCGTTAAAGGCCAACAGTTCAAACTGACCTGGACGCAGCTGCTGGACGAAGTGGACGAGGCGCTGGCGCTGGGCCACAAGGTGAAACCTGTGCTGCTGGGGCCGGTTACCTGGCTGTGGCTGGGGAAAGTGAAAGGTGAACAATTTGACCGCCTGAGCCTGCTGAACGACATTCTGCCGGTTTATCAGCAAGTGCTGGCAGAACTGGCGAAACGCGGCATCGAGTGGGTACAGATTGATGAACCCGCGCTGGTACTGGAACTACCACAGGCGTGGCTGGACGCATACAAACCCGCTTACGACGCGCTCCAGGGACAGGTGAAACTGCTGCTGACCACCTATTTTGAAGGCGTAACGCCAAATCTCGACACGATTACTGCGCTGCCTGTTCAGGGTCTGCATGTTGACCTCGTACATGGTAAAGATGACGTTGCTGAACTGCACAAGCGCCTGCCTTCTGACTGGTTGCTGTCTGCGGGTCTGATCAATGGTCGTAACGTCTGGCGCGCCGATCTTACCGAGAAATATGCGCAAATTAAGGACATTGTCGGCAAACGTGATTTGTGGGTGGCATCTTCCTGCTCGTTGCTGCACAGCCCCATCGACCTGAGCGTGGAAACGCGTCTTGATGCAGAAGTGAAAAGCTGGTTTGCCTTCGCCCTACAAAAATGCCATGAACTGGCACTGCTGCGCGATGCGCTGAACAGTGGTGACACGGCAGCTCTGGCAGAGTGGAGCGCCCCGATTCAGGCACGTCGTCACTCTACCCGCGTACATAATCCGGCGGTAGAAAAGCGTCTGGCGGCGATCACCGCCCAGGACAGCCAGCGTGCGAATGTCTATGAAGTGCGTGCTGAAGCCCAGCGTGCGCGTTTTAAACTGCCAGCGTGGCCGACCACCACGATTGGTTCCTTCCCGCAAACCACGGAAATTCGTACCCTGCGTCTGGATTTCAAAAAGGGCAATCTCGACGCCAACAACTACCGCACGGGCATTGCGGAACATATCAAGCAGGCCATTGTTGAGCAGGAACGTTTGGGACTGGATGTGCTGGTACATGGCGAGGCCGAGCGTAATGACATGGTGGAATACTTTGGCGAGCACCTCGACGGATTTGTCTTTACGCAAAACGGTTGGGTACAGAGCTACGGTTCCCGCTGCGTGAAGCCACCGATTGTCATTGGTGACATTAGCCGCCCGGCACCGATTACCGTGGAGTGGGCGAAGTATGCGCAATCGCTGACCGACAAACCGGTGAAAGGGATGCTGACGGGGCCGGTGACCATACTCTGCTGGTCGTTCCCGCGTGAAGATGTCAGCCGTGAAACCATCGCCAAACAGATTGCGCTGGCGCTGCGTGATGAAGTGGCCGATCTGGAAGCCGCTGGAATTGGCATCATCCAGATTGACGAACCGGCGCTGCGCGAAGGTTTACCGCTGCGTCGTAGCGACTGGGATGCGTATCTCCAGTGGGGCGTAGAGGCCTTCCGTATCAACGCCGCCGTGGCGAAAGATGACACACAAATCCACACTCACATGTGTTATTGCGAGTTCAACGACATCATGGATTCGATTGCGGCGCTGGACGCAGACGTCATCACCATCGAAACCTCGCGTTCCGACATGGAGTTGCTGGAGTCGTTTGAAGAGTTTGATTATCCAAATGAAATCGGTCCTGGCGTCTATGACATTCACTCGCCAAACGTACCGAGCGTGGAATGGATTGAAGCCTTGCTGAAGAAAGCGGCAAAACGCATTCCGGCAGAGCGCCTGTGGGTCAACCCGGACTGTGGCCTGAAAACGCGCGGCTGGCCAGAAACCCGCGCGGCACTGGCGAACATGGTGCAGGCGGCGCAGAACTTGCGTCGGGGGTAAAATCCAAACCGGGTGGTAATACCACCCGGTCTTTTCTCATTACAGCGACTTCTTCCCACCATACTGCTTAAACCATTCCAGCATACGCTGCCAGCCATCTTCTGCAGATGCGGCATGATAGCTCGGGCGATAATCAGCGTTGAATGCATGCCCGGCGTCCGGGTACACGATAATCTCTGCTTTCGCATTAGCAGCCCGCAGCGCCTGGCGCATGGTTTCAACGCTCTCCTGCGGAATGCTGTTATCCTGACCACCATATAAGCCGAGAATCGGCGCGTTAAGATCGGTTGCGATATCAACAGGTTGTTTCGGTGAATTCAGCGACTTGTCGCCCGTCAGTTTGCCGTACCACGCCACTGCGGCTTTTAGCTGTGGATTATGCGCGGCATACAGCCAGGTGATACGTCCACCCCAGCAGAATCCGGTGATCATTAAACGATGAACATCGCCGCCGTTGCGGGAAGCCCAACTGGCGACATGATCGAGATCGGCCAGCACCTGCGAGTCAGGCACTTTTGCTACCAGACCGCTAAGCAACGTGGGGATATCGGCAAAATCATTCGGATCGCCTTCGCGGAAGTAAAGTTCAGGTGCGATAGCCAGATACCCCTCCAGCGCCAGACGGCGACAAATGTCGCGGATATGTTCATGCACGCCAAAAATTTCCTGCACTACAATGACCACTGGCAGTGGGCCATCGCTTTGCTTTGGTCTGGCATGGTAAGCAGGCATGTTATCCCCTTGTGAAGGGATAGAGGTGAAGCCCGCCACAATCGCGTCGTCCGGGGTCTGAACGATGGTCGAAGCGAGAGGCGATGCAGCAGGTGCAAATCCAGATTGTTGTGTTGTTGCCATGGTATTCTCCGTACCCTTATAAAAATGTTGCGCAATGTTAACTATAGTCAGCATGCAACAAATCACATTGCCTGAATCGGCTCATCTTTTATGCAGTCCTGCAGAATGAAGGGTGATTTATGTGATTTGCATCACTTTTGGTGGGTAAATTTATGCAACGCATTTGCGTCATGGTGATGAGTATCACGAAAAAATGTTAAACCCTTCGGTAAAGTGTCTTTTTGCTTCTTCTGACTAAACCGATTCACAGAGGAGTTGTATATGTCCAAGTCTGATGTTTTTCATCTCGGCCTCACTAAAAACGATTTACAAGGGGCTACGCTTGCCATCGTCCCTGGCGACCCGGATCGTGTGGAAAAGATCGCCGCGCTGATGGATAAGCCGGTTAAGCTGGCATCTCACCGCGAATTCACTACCTGGCGTGCAGAGCTGGATGGTAAACCTGTTATCGTCTGCTCTACCGGTATCGGCGGCCCGTCTACCTCTATTGCTGTTGAAGAGCTGGCACAGCTGGGCATTCGCACCTTCCTGCGTATCGGTACAACGGGCGCTATTCAGCCGCATATTAATGTGGGTGATGTCCTGGTTACCACGGCGTCTGTCCGTCTGGATGGCGCGAGCCTGCACTTCGCACCGCTGGAATTCCCGGCTGTCGCTGATTTCGAATGTACGACTGCGCTGGTTGAAGCTGCGAAATCCATTGGCGCGACAACTCACGTTGGCGTGACAGCTTCTTCTGATACCTTCTACCCAGGTCAGGAACGTTACGATACTTACTCTGGTCGCGTAGTTCGTCACTTTAAAGGTTCTATGGAAGAGTGGCAGGCGATGGGCGTAATGAACTATGAAATGGAATCTGCAACCCTGCTGACCATGTGTGCAAGTCAGGGCCTGCGTGCCGGTATGGTAGCGGGTGTTATCGTTAACCGCACCCAGCAAGAGATCCCGAATGCTGAGACGATGAAACAAACCGAAAGCCATGCGGTGAAAATCGTGGTGGAAGCGGCGCGTCGTCTGCTGTAATTCTCTTCTCCTGTCTGAAGGCCGACGCGTTCGGCCTTTTGTATTTTTGCGTAGCGCCTCGCAGGAAATGCCTTTCCAACTGGACGTTTGTACAGCACAATTCTATTTTGTGCGGGTAAGTTGTTGCGTCAGGAGGCGTTGTGGATTTCTCAATCATGGTTTACGCAGTTATTGCGTTGGTGGGTGTGGCAATTGGCTGGCTGTTTGCCAGTTATCAACATGCGCAGCAAAAAGCCGAGCAATTAGCTGAACGTGAAGAGATGGTCGCGGAGTTAAGCGCGGCAAAACAACAAATTACCCAAAGCGAGCACTGGCGTGCAGAGTGCGAGTTACTCAATAACGAAGTGCGCAGCCTGCAAAGTATTAACACCTCTCTGGAGGCCGATCTGCGTGAAGTAACCACGCGGATGGAAGCCGCACAGCAACATGCTGACGATAAAATTCGCCAGATGATTAACAGCGAGCAGCGCCTCAGTGAGCAGTTTGAAAACCTCGCCAACCGTATTTTTGAGCACAGCAATCGCCGGGTTGATGAGCAAAACCGTCAGAGTCTGAACAGCCTGTTGTCGCCGCTACGTGAACAACTGGACGGTTTCCGCCGTCAGGTTCAGGACAGCTTCGGTAAAGAAGCACAAGAACGCCATACCCTGACCCACGAAATTCGCAATCTCCAGCAACTCAACGCGCAAATGGCCCAGGAAGCGATCAACCTGACGCGCGCGCTGAAAGGCGACAATAAAACCCAGGGCAACTGGGGCGAGGTAGTATTGACGCGGGTGCTGGAGGCTTCCGGTCTGCGTGAAGGGTATGAATATGAAACCCAGGTCAGCATCGAAAATGACGCCCGCTCGCGGATGCAGCCGGATGTCATCGTGCGCCTGCCGCAGGGAAAAGATGTGGTGATCGACGCCAAAATGACGCTGGTCGCCTATGAACGCTATTTTAACGCCGAAGACGACTACACCCGCGAAAGCGCGCTACAGGAACATATCGCGTCGGTGCGTAACCATATCCGTTTGCTGGGACGCAAAGATTATCAACAGCTGCCGGGGCTGCGAACTCTGGATTACGTGCTGATGTTTATTCCCGTTGAACCCGCTTTTTTACTGGCGCTTGACCGCCAGCCGGAGCTGATCACCGAAGCGTTGAAAAACAACATCATGCTGGTTAGCCCGACTACGCTGCTGGTGGCGCTGCGCACTATCGCCAACCTGTGGCGTTATGAGCATCAAAGCCGCAACGCCCAGCAAATCGCCGATCGTGCCAGCAAGCTGTACGACAAGATGCGTTTGTTCATCGATGACATGTCCGCGATTGGTCAAAGTCTCGACAAAGCGCAGGATAATTATCGGCAGGCAATGAAAAAACTCTCTTCAGGGCGCGGAAATGTGCTGGCGCAGGCAGAAGCGTTTCGCGGTTTAGGAGTAGAAATTAAACGCGAGATTAATCCGGATTTGGCTGAACAGGCGGTGAGCCAGGATGAAGAGTATCGACTTCGGTCGGTTCCGGAGCAGCCGAATGATGAAGCTTATCAACGCGATGATGAATATAATCAGCAGTCGCGCTAGCCCATTGGGAGTAGTTAAGCCGGGTAGAAATCTAGGGCATCGACGCCCAATCTGTTACACTTCTGGAACAATTTTTTGATGAGCAGGCATTGAGATGGTGGATAAGTCACAAGAAACGACGCACTTTGGTTTTCAGACCGTCGCGAAGGAACAAAAAGCGGATATGGTCGCCCACGTTTTCCATTCCGTGGCATCAAAATACGATGTCATGAATGATTTGATGTCATTTGGTATTCATCGTTTGTGGAAGCGATTCACGATTGATTGCAGCGGCGTACGCCGTGGGCAGACCGTGCTGGATCTGGCTGGTGGCACCGGCGACCTGACAGCGAAATTCTCCCGCCTGGTCGGAGAAACTGGCAAAGTGGTCCTTGCTGATATCAATGAATCCATGCTCAAAATGGGCCGCGAGAAGCTGCGTAATATCGGTGTGATTGGCAACGTTGAGTATGTTCAGGCGAACGCTGAGGCGCTGCCGTTCCCGGATAACACCTTTGATTGCATCACCATTTCGTTTGGTCTGCGTAACGTCACCGACAAAGATAAAGCACTGCGTTCAATGTATCGCGTGCTGAAACCCGGCGGCCGCCTGCTGGTGCTTGAGTTCTCGAAGCCAATTATCGAGCCGCTGAGCAAAGCCTATGATGCATACTCCTTCCATGTGCTGCCGCGTATTGGCTCACTGGTCGCGAACGACGCCGACAGCTACCGTTATCTGGCAGAATCCATCCGTATGCATCCCGATCAGGATACCCTGAAAGCCATGATGCAGGATGCCGGATTCGAAAGTGTCGACTACTACAATCTGACGGCAGGGGTTGTGGCGCTGCATCGTGGTTATAAGTTCTGACAGGAGACCGGAAATGCCTTTTAAACCTTTAGTGACGGCAGGAATTGAAAGTCTGCTCAACACCTTCCTGTATCGCTCACCCGCGCTGAAAACGGCCCGCTCGCGTCTGCTGGGTAAAGTATTGCGCGTGGAGGTAAAAGGCTTTTCGACGTCATTGATTCTGGTGTTCAGCGAACGCCAGGTTGATGTACTGGGCGAATGGGCAGGCGATGCTGACTGCACCGTTATCGCCTACGCCAGTGTGTTGCCGAAACTTCGCGATCGCCAGCAGCTTACCGCACTGATTCGCAGTGGTGAGCTGGAAGTGCAGGGCGATATTCAGGTGGTGCAAAACTTCGTTGCGCTGGCAGATCTGGCAGAGTTCGACCCTGCGGAACTGCTGGCCCCTTATACCGGTGATATCGCCGCTGAAGGAATCAGCAAAGCCATGCGCGGAGGCGCAAAGTTCCTGCATCACGGCATTAAGCGCCAGCAACGTTATGTGGCGGAAGCCATTACTGAAGAGTGGCGTATGGCACCCGGTCCGCTTGAAGTGGCCTGGTTTGCGGAAGAGACGGCTGCCGTCGAGCGTGCTGTTGATGCCCTGACCAAACGGCTGGAAAAACTGGAGGCTAAATGACGCCAGGTGAAGTACGGCGCCTATATTTCATCATTCGCACTTTTTTAAGCTACGGACTTGATGAACTGATCCCCAAAATGCGTATCACCCTGCCGCTACGGCTATGGCGATACTCATTATTCTGGATGCCAAATCGGCATAAAGACAAACTTTTAGGTGAGCGACTACGACTGGCCCTGCAAGAACTGGGGCCGGTTTGGATCAAGTTCGGGCAAATGTTATCAACCCGCCGCGATCTTTTTCCACCGCATATTGCCGATCAGCTGGCGTTATTGCAGGACAAAGTTGCTCCGTTTGATGGCAAGCTGGCGAAGCAGCAGATTGAAGCTGCAATGGGCGGCTTGCCGGTAGAAGCGTGGTTTGACGATTTTGAAATCAAGCCGCTGGCTTCTGCTTCTATCGCCCAGGTTCATACCGCGCGATTGAAATCGAATGGTAAAGAGGTGGTGATTAAAGTCATCCGCCCGGATATTTTGCCGGTTATTAAAGCGGATCTGAAACTTATCTACCGTCTGGCTCGCTGGGTGCCGCGTTTGCTGCCGGATGGTCGCCGTCTGCGCCCAACCGAAGTGGTGCGCGAGTACGAAAAGACATTGATTGATGAACTGAATTTGCTGCGGGAATCTGCCAACGCCATTCAGCTTCGGCGCAATTTTGAAGACAGCCCGATGCTCTACATCCCGGAAGTTTACCCTGACTATTGTAGTGAAGGGATGATGGTGATGGAGCGCATTTACGGCATTCCGGTGTCTGATGTTGCGGCGCTGGAGAAAAACGGCACTAACATGAAATTGCTGGCGGAACGCGGCGTGCAGGTGTTCTTCACTCAGGTCTTTCGCGACAGCTTTTTCCATGCCGATATGCACCCTGGCAACATCTTCGTAAGCTATGAACACCCGGAAAACCCGAAATATATCGGCATTGATTGCGGGATTGTTGGCTCGCTAAACAAAGAAGATAAACGCTATCTGGCAGAAAACTTTATCGCCTTCTTTAATCGCGACTATCGCAAAGTGGCAGAGCTACACGTCGATTCTGGCTGGGTGCCACCAGATACCAACGTTGAAGAGTTCGAATTTGCCATTCGTACGGTCTGTGAACCTATCTTTGAGAAACCGCTGGCCGAAATTTCGTTTGGACATGTACTGTTAAATCTGTTTAATACGGCGCGTCGCTTCAATATGGAAGTGCAGCCGCAACTGGTGTTACTCCAGAAAACCCTGCTCTACGTCGAAGGGGTAGGACGCCAGCTTTATCCGCAACTCGATTTATGGAAAACGGCGAAGCCTTTCCTGGAGTCGTGGATTAAAGATCAGGTCGGTATTCCTGCGCTGGTGAGAGCATTTAAAGAAAAAGCGCCGTTCTGGGTCGAAAAAATGCCAGAACTGCCTGAATTGGTTTACGACAGTTTGCGCCAGGGCAAGTATTTACAGCACAGTGTTGATAAGATTGCCCGCGAGCTTCAGTCAAATCATGTACGTCAGGGACAATCGCGTTATTTTCTCGGAATTGGCGCTACGTTAGTATTAAGTGGCACATTCTTGTTGGTCAGCCGACCTGAATGGGGGCTGATGCCCGGCTGGTTAATGGCAGGTGGTCTGATCGCCTGGTTTGTCGGTTGGCGCAAAACACGCTGATTTTTTCATCGCTCAAGGCGGGCCGTGTAACGTATAATGCGGCTTTGTTTAATCATCATCTACCACAGAGGAACATGTATGGGTGGTATCAGTATTTGGCAGTTATTGATTATTGCCGTCATCGTTGTACTGCTTTTTGGCACCAAAAAGCTCGGCTCCATCGGTTCCGATCTTGGTGCGTCGATCAAAGGCTTTAAAAAAGCAATGAGCGATGATGAACCAAAGCAGGATAAAACCAGTCAGGATGCTGATTTTACTGCGAAAACTATCGCCGATAAGCAGGCGGATACGAATCAGGAACAGGCTAAAACAGAAGACGCGAAGCGCCACGATAAAGAGCAGGTGTAATCCGTGTTTGATATCGGTTTTAGCGAACTGCTATTGGTGTTCATCATCGGCCTCGTCGTTCTGGGGCCGCAACGACTGCCTGTGGCGGTAAAAACGGTAGCGGGCTGGATTCGCGCGTTGCGTTCACTGGCGACAACGGTGCAGAACGAACTGACCCAGGAGTTAAAACTCCAGGAGTTTCAGGACAGTCTGAAAAAGGTTGAAAAGGCGAGCCTCACTAACCTGACGCCCGAACTGAAAGCGTCGATGGATGAACTACGCCAGGCCGCGGAGTCGATGAAGCGTTCCTACGTTGCAAACGATCCTGAAAAGGCGAGCGATGAAGCGCACACCATCCATAACCCGGTGGTGAAAGATAATGAAGCTGCGCATGAGGGCGTAACGCCTGCCGCTGCACAAACGCAGGCCAGTTCGCCGGAACAGAAGCCAGAAACCACGCCAGAGCCGGTGGTAAAACCTGCTGCGGACGCTGAACCGAAAACCGCTGCACCTTCCCCTTCGTCGAGTGATAAACCGTAAACATGTCTGTAGAAGATACTCAACCGCTTATCACGCATCTGATTGAGCTGCGTAAGCGTCTGCTGAACTGCATTATCGCGGTGATCGTGATATTCCTGTGTCTGGTCTATTTCGCCAATGACATCTATCACCTGGTATCCGCGCCATTGATCAAGCAGTTGCCGCAAGGTTCAACGATGATCGCCACCGACGTGGCCTCGCCGTTCTTTACGCCGATCAAGCTGACCTTTATGGTGTCGCTGATTCTGTCAGCGCCGGTGATTCTCTATCAGGTGTGGGCATTTATCGCCCCAGCGCTGTATAAGCATGAACGTCGCCTGGTGGTGCCGCTGCTGGTTTCCAGCTCTCTGCTGTTTTATATCGGCATGGCATTCGCCTACTTTGTGGTCTTTCCGCTGGCATTTGGCTTCCTTGCCAATACCGCGCCGGAAGGGGTGCAGGTATCCACCGACATCGCCAGCTATTTAAGCTTCGTTATGGCGCTGTTTATGGCGTTTGGTGTCTCCTTTGAAGTGCCGGTAGCAATTGTGCTGCTGTGCTGGATGGGGATTACCTCGCCAGAAGACTTACGCAAAAAACGCCCGTATGTGCTGGTTGGTGCATTCGTTGTCGGGATGTTGCTGACGCCGCCGGATGTCTTCTCGCAAACGCTGTTGGCGATCCCGATGTACTGTCTGTTTGAAATCGGTGTCTTCTTCTCACGCTTTTACGTTGGTAAAGGGCGAAATCGGGAAGAGGAAAACGACGCTGAAGCAGAAAGCGAAAAAACTGAAGAATAAATTCAACCGCCCGTCAGGGCGGTTGTCATATGGAGTACAGGATGTTTGATATCGGCGTTAATTTGACCAGTTCGCAATTTGCGAAAGACCGTGATGATGTTGTAGCGTGCGCTTTTGACGCGGGAGTTAATGGGCTACTCATCACCGGCACTAACCTGCGTGAAAGCCAGCAGGCGCAAAAGCTGGCGCGTCAGTATTCGTCCTGTTGGTCAACGGCGGGCGTACATCCTCACGACAGCAGCCAGTGGCAAGCTGCGACTGAAGAAGCGATTATTGAGCTGGCCGCGCAGCCAGAAGTGGTGGCGATTGGTGAATGTGGTCTCGACTTTAACCGCAACTTTTCGACGCCGGAAGAGCAGGAACGCGCTTTTGTTGCCCAGCTACGCATTGCCGCAGATTTAAACATGCCGGTATTTATGCACTGTCGCGATGCCCACGAGCGGTTTATGACATTGCTGGAGCCGTGGCTGGATAAACTGCCTGGTGCGGTTCTTCATTGCTTTACCGGCACACGCGAAGAGATGCAGGCGTGCGTGGCGCATGGAATTTATATCGGCATTACCGGTTGGGTTTGCGATGAACGACGCGGACTGGAGCTGCGGGAACTTTTGCCGTTGATTCCGGCGGAAAAATTACTGATCGAAACTGATGCGCCGTATCTGCTCCCTCGCGATCTCACGCCAAAGCCATCATCCCGGCGCAACGAGCCAGCCCATCTGCCCCATATTTTGCAACGTATTGCGCACTGGCGTGGAGAAGATGCCGCATGGCTGGCTGCCACCACGGATGCTAATGTCAAAACACTGTTTGGGATTGCGTTTTAGAGTTTGCGGAACTCGGTATTCTTCACACTGTGCTTAATCTCTTTATTAATAAGATTAAGCAATAGCATGGAGCGAGCCTCACCATCGGGTTCGGTGAAAATGGCCTGAAAGCCTTCGAACGCGCCTTCGGTAATAATCACCTTATCACCCGGATAAGGGGTTGCCGGATCGACAATGTCTTTCGGTTTATATACCGATAGCTGATGAATAACCGCCGATGGGACTATCGCTGGCGACGCGCCAAAGCGCACGAAGTGGCTGACACCGCGGGTCGCGTTGATAGTCGTGGTATGAATCACTTCTGGGTCAAATTCCACAAACAGGTAGTTGGGGAACAATGGCTCACTGACTGCAGTACGTTTTCCACGCACGATTTTTTCCAGGGTGATCATCGGTGCCAGGCAATTCACAGCCTGTCTTTCGAGGTGTTCCTGGGCACGTTGAAGTTGCCCGCGCTTGCAGTACAGTAAATACCAGGATTGCATAATGACTCTTATCCGTTTAATCGGGGCGCAAGGATAGCAAAAGCTTTACGCTAAGTTAATTATATTCCCCGGTTTGCGTTATACCGTCAGAGTTCACGCTAATTTAACAAATTTACAGCATCGCAAAGATGAACGCCGTATAATGGGCGCAGATTAAGAGGCTACAATGGACGCCATGAAATATAACGATTTACGCGACTTCTTGACGCTGCTTGAACAGCAGGGTGAGCTAAAACGTATCACGCTCCCGGTGGATCCGCATCTGGAAATCACTGAAATTGCTGACCGCACTTTGCGTGCCGGTGGGCCTGCGCTGTTGTTCGAAAACCCTAAAGGCTACTCAATGCCGGTGCTGTGCAACCTGTTCGGTACGCCAAAGCGCGTGGCGATGGGCATGGGGCAGGAAGATGTTTCGGCGCTGCGTGAAGTTGGTAAATTATTGGCGTTTCTGAAAGAGCCGGAGCCGCCAAAAGGTTTCCGCGACCTGTTTGATAAACTGCCGCAGTTTAAGCAAGTATTGAACATGCCGACAAAGCGGCTGCGTGGTGCGCCCTGCCAACAAAAAATCGTCTCTGGCGATGACGTCGATCTCAATCGCATTCCCATTATGACCTGCTGGCCGGAAGATGCCGCGCCGCTGATTACCTGGGGGCTGACAGTGACGCGCGGCCCACATAAAGAGCGGCAGAATCTGGGCATTTATCGCCAGCAGCTGATTGGTAAAAACAAACTGATTATGCGCTGGCTGTCGCATCGCGGCGGCGCGCTGGATTATCAGGAGTGGTGTGCGGCGCATCCGGGCGAACGTTTCCCGGTTTCTGTGGCGCTGGGTGCCGATCCCGCCACGATTCTCGGTGCAGTCACTCCCGTTCCGGATACGCTTTCAGAGTATGCGTTTGCCGGATTGCTACGTGGCACCAAGACCGAAGTGGTGAAGTGTATCTCCAATGATCTTGAAGTGCCCGCCAGTGCGGAGATTGTGCTGGAAGGGTATATCGAACAAGGCGAAACTGCGCCGGAAGGGCCGTATGGCGACCACACCGGTTACTATAATGAAGTCGATAGTTTCCCGGTATTTACCGTGACGCATATTACCCAGCGTGAAGATGCGATTTACCATTCCACCTATACCGGGCGTCCGCCAGATGAGCCCGCGGTGCTGGGTGTCGCACTGAACGAAGTGTTTGTGCCGATTCTGCAAAAACAGTTCCCGGAAATTGTCGATTTTTACCTGCCGCCGGAAGGCTGCTCTTATCGCCTGGCGGTAGTGACAATCAAAAAACAGTACGCCGGACACGCGAAGCGCGTCATGATGGGCGTCTGGTCGTTCTTACGCCAGTTTATGTACACTAAATTTGTGATCGTTTGCGATGATGACGTTAACGCACGCGACTGGAACGATGTGATTTGGGCGATTACCACCCGTATGGACCCGGCGCGGGATACTGTTCTGGTAGAAAATACGCCTATTGATTATCTGGATTTTGCCTCGCCTGTCTCCGGGCTGGGTTCAAAAATGGGGCTGGATGCCACGAATAAATGGCCGGGGGAAACCCAGCGTGAATGGGGACGTCCCATCAAAAAAGATCCAGATGTTGTCGCGCATATTGACGCCATCTGGGATGAACTGGCTATTTTTAACAACGGTAAAAGCGCCTGATGCGCGTTTGTTTTGCCCTATTTATCGATCCGACAGAGAAAGCGCATGACAACCTTAAGCTGTAAAGTGACCTCGGTAGAAGCTATCACGGATACCGTATATCGTGTCCGCATCGTGCCAGACGCGGCCTTTTCTTTTCGTGCTGGTCAGTATTTGATGGTAGTGATGGATGAGCGCGACAAACGTCCGTTCTCAATGGCTTCGACGCCGGATGAAAAAGGGTTTATCGAGCTGCATATTGGCGCTTCTGAAATCAACCTTTACGCGAAAGCAGTCATGGACCGCATCCTCAAAGATCATCAAATCGTGGTCGACATTCCCCACGGAGAAGCGTGGCTGCGCGATGATGAAGAGCGTCCGATGATTTTGATTGCGGGCGGCACCGGGTTCTCTTATGCCCGCTCGATTTTGCTGACAGCGTTGGCGCGTAACCCAAACCGTGATATCACCATTTACTGGGGCGGGCGTGAAGAGCAGCATCTGTATGATCTCTGCGAGCTTGAGGCGCTTTCGTTGAAGCATCCTGGTCTGCAAGTGGTGCCGGTGGTTGAACAACCGGAAGCGGGCTGGCGTGGGCGTACTGGCACCGTGTTAACGGCGGTATTGCAGGATCACGGTACGCTGGCAGAGCATGATATCTATATTGCCGGACGTTTTGAGATGGCGAAAATTGCCCGCGATCTGTTTTGCAGTGAGCGTAATGCGCGGGAAGATCGCCTGTTTGGCGATGCGTTTGCATTTATCTGAGATATAAAAAAACCCGCCCCTGACAGGCGGGAAGAACGGCAACTAAACTGTTATTCAGTGGCATTTAGATCTATGACGTATCTGGCAAACCATGCCCGATGCGACGCTGTCGCGTCTTATCGTGCCTACAAATAGTCCGAACCGTAGGCCGGATAAGGCGTTTACGCCGCATCCGGCAATTGGTGCATGATGCCTGATGCGACGCTGGCGCGTCTTATCAGGCCTACATTGGTGCCGGATCGGTAGACCGGATAAGGCGTTCACGCCGCATCCGGCAAGTGGTTAAACCCGCTCAAACACCGTCGCAATACCCTGACCCAGACCGATACACATCGTCGCCAGACCAAACTGAACGTCTTTGCGTTCCATCAGATTCAGCAGCGTGGTGCTGATACGCGCACCGGAACAACCCAGCGGATGACCCAGCGCGATCGCGCCACCGTTGAGGTTGATCTTCTCGTCAATCTGCTCAATTAGTCCCAGATCTTTAATACATGGCAGGATCTGCGCGGCAAAGGCTTCGTTCATTTCAAACACGCCGATATCGCTGGCAGAAAGCCCCGCTTTTTTCAGCGCCAGTTTCGAGGCCGGAACCGGGCCGTAACCCATAATCGATGGGTCACAACCAACGACCGCCATCGAACGCACACGAGCGCGCGGCTTAAGACCTAATTCATGGGCGCGGCTTTCACTCATCACCAGCATGGCAGCTGCGCCATCGGAAAGTGCAGAAGATGTGCCCGCCGTTACCATACCGTTTACTGGATCAAACGCCGGACGCAGCGTGGCGAGGGCTTCCACGGTGGTTTCCGGGCGAATCACTTCGTCGTAATTAAACTGCTTCAGGACGCCGTCGGCATCGTGACCACCGGTCGGGATGATTTCATTTTTAAATGCGGCCGACTGCGTGGCGGCCCAGGCGCGGGCGTGTGACCGCGCGGCAAAGGCATCCTGCATTTCACGGCTGATACCGTGCATACGCGCCAGCATTTCTGCCGTTAAGCCCATCATGCCCGCCGCTTTGGCGACATTGCGGCTCAGGCCGGGGTGAAAATCGACGCCGTGACTCATCGGCACATGGCCCATATGCTCCACGCCGCCAACCAGACATGCCTGCGCATCGCCAGTCATGATCATTCGTGCTGCGTCATGCAGTGCCTGCATGGATGAACCACACAAGCGATTAACGGTAACCGCCGGGACAGAGTGTGGTACTTCTGCCAGCAGCGCCGCGTTACGGGCGATATTAAAACCCTGCTCCAGCGTCTGCTGCACACAACCCCAGTAAATATCGTCGAGGGCCGCCGCTTCCAGCGCCGGGTTACGCGCCAGCAGGCTACGCATTAAATGAGCGGAGAGATCTTCTGCACGCACGTTACGAAAAGCACCGCCCTTCGAACGGCCCATCGGGGTGCGAATTGCATCGACAATGACAACCTGTTCCATTGTGACTCCTTAAGCCGTTTTCAGGTCGCCAACCGGACGGGCTGGCTCAACCGGAGGATAGTACGGTTCGTTATGACGCGCTTTATTACGCAGACCTTCCGGCACTTCATACAGCGGGCCGAGGTGCTGATATTGCTGTGCCATATCGAGGTATTTTGCGCTACCGAGGGTGTCCAGCCAGCGGAACGCGCCGCCGTGGAACGGAGGGAAGCCCAGGCCGTAGACCAGCGCCATATCCGCTTCCGCCGGAGTGGCGATAATGCCTTCCTCCAGACAGCGCACCACTTCGTTGACCATCGGGATCATCATGCGGGCGATAATCTCTTCTTCGCTGAAATCGCGCTTCGGCTGGCTCACTTCTGCCAGCAGGTCTTCAACGGCGGCGTCTTCTTCTTTCTTCGGCTTACCTTTGCTGTCTTCTTTATAACGCCAGAAACCGAGGCCGTTCTTCTGACCAAAGCGGTTGGCATCAAACAGCGCGTCGATGGCATCGCGGTAATCTTTCTGCATCCGCTGCGGGAAGCCTGCTGCCATGACAGCCTGAGCGTGATGCGCGGTATCAATGCCCACAACGTCCAGCAGATATGCCGGGCCCATCGGCCAGCCAAACTGTTTTTCCATCACTTTGTCGATCTTGCGGAAATCCGCGCCGTCGCGCAGCAGCTGGCTGAAACCGGCGAAATACGGGAACAGCACGCGGTTAACAAAGAAGCCGGGGCAGTCGTTAACCACAATCGGCGTCTTGCCCATCTTGCTCGCCCAGGCGACAACTTTCGCGATGGTTTCGTCGGAGCTTTTCTCGCCGCGAATAATTTCTACCAACGGCATTCGGTGGACCGGGTTAAAGAAGTGCATCCCGCAGAAGTTTTCCGGGCGTTCCAGCGCGTTGGCCAGTTCGCTGATAGGAATGGTTGAAGTGTTAGACGCCAGCACGGTATCCTGGCGTACTTTTTGTTCGGTTTCTGCCAGTACGGCTTTTTTCACTTTCGGGTTTTCAACAACCGCTTCTACCACAATATCCACGCGGTCAAATCCGGCGTAGTCGAGCGTTGGGTGGATTGTGGAGATCACGCCAGCCAGTTTCAGACC +>7_2#NODE_1_length_40000_cov_62.8079_ID_1 +TGTTATGGCTCAAGCTGTTAAAAGAATTTATAAGGATTCAAAGCTTGCTATAGGACCTGCAATAAATAATGGTTTCTATTATGATTTTGATATTGAAAACTCATTATCCAATGAAGATTTAGATAAAATTGAAGCAGAAATGAATAAAATAATAAATGAAAATCTTAGCTTTGAAAGAATCGATATTTCTAGAGACGAAGCAATAAAATTAATGGAGGAAAAGGGAGAAACATATAAAGTAGAACTTATTAAGGATCTTCCAGAAGCTGAAAAGATATCTTTATATAAACAGGGCGATTATATAGATCTTTGCAGAGGTCCTCATATTCCATCAACAAAGTATATTAAAGCATTTAAACTATTAAGCGTTGCTGGAGCATATTGGAGAGGAAATGAAAAAAATAAGATGCTTCAAAGAGTTTATGGAGTTGCATTTTCTAGTAAGAAAGAATTAGAATTACATTTGCATAATTTAGAAGAAGCAAAGAAAAGAGATCATAGAAAATTAGGAAAAGAATTAAAGTTATTCACTTTTGCAGAAGAAGGTCCAGGATTTCCATTTATGCTTCCTAAAGGAGTAATATTAAAAAATACCCTAATAGATTTTTGGAGAAAATTACATTACGAAGATGGTTATGTTGAAATTGAAACTCCAATAATGCTTAATAAGAAACTATGGGAGACTTCAGGGCATTGGTATCATTATAGAGAAAATATGTATACTTCAACAATTGATGAAGAAGAATTTGCATTAAAGCCAATGAATTGCCCAGGTGGAATGTTAGTTTATAAATCAGAATCACATTCATATAGAGATTTTCCAATGAGAGTTGGAGAACTGGGACGAGTTCATAGACATGAGCTTTCTGGAGCACTCCATGGTCTTATGAGAGTAAGAGCATTCACACAAGACGATGCACATATATTTATGTTACCAGATCAAATAAAGTCAGAAATAAAAGGTGTTATTAATTTAATTGACAAGGTATATTCAAAATTCAGATTTAAGTACAATTTAGAACTTTCTACAAGGCCAGAGGATTCAATGGGAAGCGATGAAGAATGGGAATTAGCAGAAAGCTCATTGAAGGGTGCCTTAGATGAATTAAATCTTGAATATAAAATAAATGAGGGTGACGGAGCTTTTTACGGACCTAAAATAGATTTTCATCTTGAAGATAGCATAGGCAGAACTTGGCAATGTGGAACAATTCAGTTAGATTTTCAATTGCCTCAAAGGTTTGAGTTAGAGTATGTAGGTAGTGATGGAGAAAAGCATAGACCAATAGTAATCCATAGAGTAGTCTTTGGAAGTATAGAAAGATTTATAGGAATATTAATAGAACATTTTGCTGGAAAATTCCCAACATGGCTTTCTCCAGTTCAAGTAAAAATACTTCCTATATCAAATAAATTTAATAGCTATTCAGAAAAGATTAAGGATAAATTAAGCTCAGAAGGTATAAGAGTTGAAATTGATCAGAAAGATGAAAAGATAGGCTATAAAATAAGAGAAGCTAGAAATGAGAGGGTTCCTTACATTATTATTGTTGGAGAAAAGGAAGAAGCAGAAAATAATATATCATTACGCAGCAGAAGTAATGGCGATGAGGGAACATTAAATTTAGAAGATTTAATAGAAAGAATAAATAATGAAGTTAAAAATAAAGCTCTATAATTAAGTAGAGATAATACTGAAATATTAAGATATTAAATTTAAGGAATAATTAAAGTAAATATCACTTTAATTATTCCTTTTAATATTGTATATGGACAACATCATCGAAATTTAAAATATTATTTTAAATTTCTGCAGTTTTTAATTTATTAAACAAGCAGTTATATATTATAATGTTATATATCACTTGATGAAGGAGAAGGAAAATATGTTTTTATTTGGCGGGAATAAATTAAAGGTAAGAAGCGAAGAAGATGAGGATGGAGTTAAAACTTTCGTGGAATACTATGGAATGAAGAAAAGCGATGAATTTAAAGTAAAGGTTTTAAATGAAATGATTGGAGAAAACCTTTGTTTAATAGTTTTAGACTCTAGAATGTTGTATTTTGGAAATCAAGTGGAACATGAGGTGCCAGTTGAAGAAATAATAGAGCATTTAGGAATTTCAAGAATAGCATATAAGAAGTTTGAAATAAAGAAGGTACCAGAAGTATCTATGTTTGGTATTAGCATAAAAAAGGGAACTAAAAAAACTGATAAAGATTATGTAATAGGATTTGTTGTAAATAAGTATAATTTTAAAAGAATTGAAGAATATGTAAATAGAATGAATCTATATTATTTCATAGATAACGCTGGACTTAGTGAAGAGGATTTATTACAGAAGTTGAGTGAAAATTATGAAGAAATTGATGAAATGAGTAAGGAATTTTACTGTGAGATATTTAATAATAATTACATAAGTCAGTTAGTTATTTCATCAGAAAATGAGAACGCTATGGCAATAAAGGAAACGGTTGGCAGGTGCCATTCTGAGTTAAAATAAATAGTTTCACTCTAAGAAGAATGTATGCAAATTTTATTATTTATTAATTTACTAGTGAATTCAACGTCTAATAGAATTAGCAAACATTCTTTTGCCTTGACAACATTATTAGATTATGGAATGATAATTAGATGTCGGAAAACTACTAATTGATTAATTATTTTACAAATGAAAAATTAGTTGAGATTAAAAAATGATATAAATATTATCTAGATTAAATATGAGGGAGGTATAGGAATACTTTTTTGATAGAATTATTTCTATGCGTTTTTTTGAAGCTTAACAAATTGCAAGTTTAATCTATATAATAGTAGCATACTGAAAATTTTTTAGAAAAGGTTAATGAGTAGTATGAAGAGAATAAATGAAGCTTTTAGTGATTATGAATCGGCTGGCAGCATAAATACTGCCGTTATACAATCCGTAGTCTTGCGTAAGAAAACTAAAGTGCTGGAAATGGAAATTAGTTCAGATAATTATATTGAACTAGGGGAAATTGAAAGTTTTAATAGTTTCATAAAAGATAAATTCGGATTAAATGATTCTAAAATTATTGTAAAGTATAGTGACGAGGTTGAAATAAGACCTATAGAAAAAGAATTGAAAAATATCGTATATTCCTTATCTACAAAGTATCCTGCATTAAAGGCAGCGGTTAATAATAGTGACTATGAAATAGAAGGAAATACTATAAATTTTAATTTTAAAATTCCTGTAGCGGGTTTTCTAAAAACTATGGAATATGATAAGCAAATTAATAAAGCTATAAAACATATGTATGGGAAACATTATAATATCAATTTTATTGATCAAATAGATAGCGAAGAATTGGTGAAAATCGCAGAAGATAAACGTGCTAATGAGATGAAAGTTATTAAAGAAATAAAAATTACTCAAAGCAATAATCCTCCTGAAGTACCTAAAGTTGAAGAAGGTAAGGCAGAAGTAAAAGCTGAGGGCGATGGCAAAAAGGCAAGTAATCCATTCTTGATTTTAGGAAGAAATGCTAATATTAAGGAAAACATAATTAAAATTAATGATATTACACCTGATGAAGGAAGAGTGGCTTTAGAAGGTGAAATATCAAATTTAGAAGCAAAAGAATTAAGAAGTGGAAAAATGCTAATTTCCTTTGATTTATATGATGGATCAAATTCGATGACTTGTAAGATTTTTGCAAAACCTAATGAGTATAATGAAGTGTTTTCTAGAATTAAAAAGGCTAAGGGTCTTAGACTTGCTGGAAATGCAGGCTATAGTAATTTCTCTCATGAAGTTGAACTTATTGCTAATACTGTTATTGAAACAAATGGTATAAGGAGATATAAGAGGCAGGATAATTCAGAGGTGAAGAGAGTAGAGCTTCACATGCATACTCAAATGAGCCAGATGGATGCAATGTCTAGTGCTAGTGATTTAATTAAGAGAGCTATGAGCTGGGGAATGAAGTCTATTGCTATAACTGATCATGGAGTAGTTCAGTCATTTCCAGAAGCGCACAAATTGCTTGGAAGAGATAATCCAGATATGAAAGTCATATATGGAGTTGAAGCTTATCTAGCTCCAGATAAAAAGCCATCTGTAACAAATGTTAGGGAAGAAAGTATTGATACAGTATACTGTGTTCTTGATTTAGAAACTACAGGTTTTTCTCCTCAGACAGAAAAGATTACTGAAATAGGAGTAATGAAAATTAAGGATGGTAAGGTTATAGATAAGTTCAGTACTTTTGTAAACCCTCAAAAGTCAATTCCTATGAGAGTTGTAGAGGTTACAAAGATAACTGATGATATGGTAAAAAATGCAGAAACTATTGATAAAGTTTTCCCTAAATTGCTTGAATTTATTGAAGGAAGTGTTTTAGTTGCACATAATGCTGACTTTGATATTGGATTCTTAAAGCATAATGCGAAAGTTTTAGGCTATGAGTTTGATTTTACCTATATAGATACTTTAGGATTAGCGCAAGATGTATTCCCTGATTATAAATCTTATAAGCTAGGAAGAATTGCTAAAAATCTTGGAATAAAGGTTGAAGTTGCCCATAGAGCTTTAGATGATGTTGATACAACTGTTAAGGTATTTAACATAATGATTGAAAAGCTAAAAGAAAGAGGCGCGCAAACTTTATCAGATATAGATTTATATGCAGCCGATGAAGAAGCTAAGAAAGTGGCATATAAAAAGGTTAAAACTCACCATGCAATAATACTAGCAAAAAATTACGTGGGATTAAAGAATTTATATAAATTGGTATCATATTCTCATTTAGATTATTTTTATAAAAAGCCACGTATATTAAAGAGCATGTTTAAGAAATATTCTGAAGGTTTAATTATTGGAAGTGCTTGTAGTGAAGGGGAATTATATCAAGCCATACTTCTTGGAAAACCAGAAGAACAAATTGAGGAAATTGCTAATTTCTATGATTACTTAGAAATTCAGCCTTTAGGAAATAATGATTACTTAGTAAGACAGGAGCAAGTTCCAAGTAAAGAATATTTAAAAGAAATTAATAAAAAGATCGTAGAACTTGCAGAAAGATTAGGTAAGCCTGTAGTGGCTACTGGAGATGTTCACTTCCTAGATCCTGAAGATGAAATATACAGACGTATATTAGAAGCAGGACAGGGATTTAAGGATGCAGATAATCAAGCACCATTATATTTAAGAACTACTGAAGAAATGCTCGATGAATTCTCTTATTTAGGAAGAACAAAAGCTTATGAGGTTGTAGTTACAAATACTAATATAGTAGCAGATATGTGTGAGCAAATAAGCCCTATTTCTCCTGAAAAATGTCCACCTCACATAGAAGGCTGTGAGCAGACAATAAAAGATATAGCTTATGAAAAAGCCCATGAACTTTATGGAGATCCACTTCCAGAAATAGTTCAGGCGAGACTTGATAAAGAGCTAGATTCTATTATAAAAAATGGATTCTCAGTAATGTATATCATAGCTCAGAAGCTGGTATGGAAATCAAATGAAGATGGATACTTAGTAGGTTCCAGAGGATCTGTTGGTTCATCCTTCGTTGCAAATATGACTGGTATAACAGAAGTAAATGCGCTTCCACCTCATTACAGATGCCCTAAATGTAAGTATTCTGATTTTGAGGATTATGGCGTTCTAAATGGCTTTGACTTGCCAGATAAAGTATGTCCTGTTTGTGGAGAAAATCTGTATAAAGATGGTATAGATATTCCGTTTGAAACATTCCTGGGCTTTAATGGAGATAAAGAGCCGGATATAGATTTAAACTTCTCAGGGGAATATCAGGCAAAGGCCCATAGATATACAGAAGTTATCTTTGGAAAGGGAACAACATTTAAAGCAGGGACTATTGGTACTATAGCAGAAAAAACAGCGTTTGGTTATGTTAAAAAATATTATGAAGAAAAAAATATTACAATAAACAAAGCTGAAACAATAAGAATTTCAGTAGGGTGCACTGGTATAAAGAGAACTACAGGTCAGCATCCAGGAGGAATTATAGTTGTACCAAAGGGAAGAGAAATATTTGAATTCTGCCCTGTACAGCATCCTGCGGATGACCCTAACTCAGATATTATAACAACACATTTTGATTATCACTCTATTGACCAAAATCTATTGAAGCTTGATATACTAGGGCACGATGATCCGACGGTTATAAGAATGCTTCAAGATATAACAGGGGTTGACCCACATGAGATTCCTATGGATGACAAGGATACTATGTCCTTATTCTTCTCAACAAAGGCTCTTGGGGTAACTCCAAATCAGATAAATTCAGAGGTTGGAACCTTTGGAATTCCTGAGTTTGGTACTAAGTTCGTAAGAGGAATGCTTGTGGATACAAAACCAAAGACTTTTTCAGATTTATTATGTATATCAGGACTTTCACATGGTACAGATGTATGGCTTGGAAATGCTAAGGACTTAATTGATAATGGAGTAATTACCAGCATAAGTGATGCGGTATGTACCAGAGATGATATTATGGTTTACTTAATTAGAAAAGGACTTCCACCTAATACCGCGTTTAAAATAATGGAAACTGTTCGTAAAGGTAAAGCCTTAAAAGAACCTAAATTTCCAGAATATGAAGCTATGATGAGAGAACATGATGTACCAGAATGGTATATAGAGTCTTGTAAAAAGATAAAATACATGTTCCCTAAAGCCCATGCAGCAGCTTATGTAATGATGGCATTTAGAATAGCATGGTTTAAAGTTCATATACCTCAGGCTTATTATGCAACATACTTTACTATAAGAGCAAAAGCCTTTGATGCAGAATTTATGATCTTTGGAAAAGAAAAGGTTAAAGCAAAGATGCAGGAAATTCAGGCTCTTGGAAATGAGGCTGGTCCTAAGGATAAGGATATGTATGATGACCTTGAAATAGTTTTGGAAATGTATGAAAGAGGATTTAAATTCCTTCCAATTGATTTATATAAATCTAGTGCTACAAAATTCCAGTTAGAAGAAGAGGGAATAAGACCGCCATTAAACAGTATAGCAGGTATGGGAAATGTAGCTGCAGAAGGTATAGCAAGTGCAGCAAAAGAAAAAGAATTTAATTCTGTAGATGATGTAAAGAAACGTTCAAAGATTGGAAATGCTGCCATAGAGCTACTTAGAAAATTTGATTGTTTAAATGGTCTTCCAGAAAGTGATCAAATGAGCTTCTTTGACGCAGTTTAGATTTATGACTATAGATAAAATAACTTTGAAACTAAACTTAAGTGGTAACTTACCGAAATCATAAATATTTTGTTCCGAAAAACTATGAAAATATTGCTGAAAATCCTAAGTTGCAGATTGCACCACTTCAGCATGCTCCCACTTTCAGTGTGACAAGCAAAAGTGGAACAACCTACAACTAAGAATTTTTAGCAGCTCATTTTCAAATGTTTTCTGCACAAATATATTTATGATTTCTAGTGAAGATATTACCTCAAAGTTTTAGCAAGTTTGTAAATATGTTTATATTATAGATTTAAATATTAATTTTGATATTATAGATAAACTAACTAAGATCTGAACTGGAGTATTCTCTCTTTATAGTATGGTCTAAATTATAAGTGGAATTTAGATTTTCTAGTGAAGTTGTACTTTTATTTTATTAAGTATTTTAAAATTTGAACTTAAAAATTATATAAAAAATAACGTAAAAAACACAGCCTAGTGATTAAATATATAGGCTGTGTTTTTTGTTATATATTTATTGTTAATTTAGTTATAGTATATTTAAGTAAAATAGATTATTTTCCATTATCTGTTGAGCCAACTTGATTGTTGCCTTTATCACGATTATTCTTGCCTTTCATCTTATCAACATTGCTCTTATCATTCTTATTTCTATTGTATTGAGAAGATCCCATTTTATATCACCACCTTACTTTTTATTTATTATTCGCGGAAAGAGTTTGTGATATTCATATTGAGATTAGTTTTTCGCTCTTTTCGTAAATATTGTGGGCAAATATAAAATTAGATTTTGAGCAAATAATCATTTTCTGTTTTCAAAATGCTCAATTCTAATATAATAATATTTGACATTTATAACTATAAGTATATAAACTTAAAACAAATAAGGTTATATACATTATATTTACAATTAGGAGGAATAGATTATGGCAAAACTATTAGATAATGAATATTCAATGGTATCATTTAAGGCTCGTAAAGGAGATAAGGAAGCCAGGAGAAACACAATTATAAATATTATATTAAATATTTTTGTATTAGAAGGTAAAGGTAGCAGTGGTACTAAGGAATTTATGTTTACACTTACAAATAATCATCTATACATAGATGATATTGGGTACGATTTAACTGGTCAGGTAGACATCTATCTCACAGAAAAATTTGATAGAAAAGATATAAAATCTTTTAAGGTTAGAAAAGAAGGCAATAAAGAAATAATCTCTCTTGTAACCACTAATGGCAAAACTGTAACTTATGTAAGGGACAATGAAAACGCTTCAGATTTAGCAACAGAAATGGCTAAACTAATAATTGAAAATGCAGGAAATTAAATAATATTCAATAGTCAAAGGGGGTGCTTAGATAACTCCCTTTTAAATATTGTTATATTTTTCACTATAATACATTAATAGGGTGTAAATCCTTAATAATTATTGATTTATTCATTGCGAGTACGTTTGTCAGTAAAGTTTTCTTCAACTCAACTTTATCAAGTGATTTATTTAGTTCAAGAAATCTATGCCAAAATAAATAATTATCCATATATTTAGTTGATACACCATTAAAGTGAAGATTAATCCACTTTCCAAGCCTTTTATGATATTCATTAACATGTTGCATATGATATAATTTTTCGATTACATATTGTTTTTTAGAAGCATTTACTTGCTTATGCTCTAAACCCTTCATTTTTGCAAATGATAAATAATTCTTTGCCGAGTCACTTATTAATAAAGCATTAGGGGCAATATAATTGCCTATGACATTATCTATTTGTGTAGCAGTAATACGCCCCATACCAGCATTTCTACTAATTATGTGACCATTTCTATCCATAGCAACTAAAATACAAACCTGTTCATGGCTTATGCCTCTAAATTTAGATTTACCACCTCTTTTGTTGGGTTTGCGAGTTTTACATTGGTTTTTCCCTTTAAATGGCTCTATGAAGAAGGTTTCATCTGACTCAATGATACCAGACAATTGTTCTATATCCATTGAACGTAAAGAATTAAGCACTTTATGTCTCCAATAAAAGGCTGTTGAAATACTTATATTAAGAGCCTTTGCGATTTTGGGTAGAGTTGTTCCCTCTGATATAAATTGAAAATATTTAGCCCATTTTTGAGGATAATGTGTTCCTGCCATTGGACTACAAGATACATCATTAAAGGTTTTGCCACAATCTTTACATTTATAACGTTGTCGGCTACGATATTTTCCATGACCAATAATATGTTCACTATTACAATGAGGACAAAACGTACCCTTTGAAAAACGTGTTTCTCTAATGGTTTCTATATAATCATTAACTCTATCAGAACTGTTCTTAACAAGAGGATATATCAAGTTGAAAAACATATTTTGAACCACTGGTGATAACTTTTGAAATTCATCAAATAAGATATTTATATCCATGAATGATATTCACTTCCAGAGCTGGTAAACTCAATAAAGTCATCTGACAGCATATCATTTATTTTATCTTCTGACTTTCTAACTTCTGATTTTAATAACTCATGTTCTAATTGTAATAGGTGATCTTTAATACTATTCGTGCATTCACCTCATCTTATGATATATATTTCAATATCAATCTTTCTATCAATAATGTTATTAATAATGAGTATTTAAATAATTTGCTATTTCTTCAATAAGTTCATTTCTATCTAAATCCAAAGCCATGATTGAGCAGCCATCCATATGATCGCAAGAGATATTGATTTTTAGTAATAGGAATTAAATTATGAAGAGGATTAAGACAAGTTTAGAATACATTTTTGTAAGTTCATAAGTATTGAAAAAGTAAACTATGATTTTTAGTTGTAATAATTTTAAAAAATAGGTTTACAATTTGAACTTAAAAATACTTAGAGTGTATATAAAGTAAAAATTTTCATGTTAAATTTTATATTCCAGATGAATTTAGTTAAACTTACCCCCACAAGGGGCACAATGTAGCCACAAGGTGAAAATGTCTGAGGATATATGTTATTAGCTGGTACTCCTCAGAAGTGATTGGAGTATTAGAATGGCTATATATCAGATGGATGTACATCCATGAGATTGATGCCCTCATGGAAAGATTAAAAAGGTTTAAAGTATTTCTTTTAAAGGAGAGTGAGAAACATGTCAGATATTAAATGGATTAAGTTATCTACTAACATGCATGATGATGAAAAGATGAAATTAGTAGATGCAATGCCAGAGAGATACTATTCATTATTTATGGATAAGATTACTTATACAAGCTGGCAAAACTAATGATAATGGACTTATATATCTAAATGAGAATATCCCATATACTGATGAAATGTTGTCTACAATATTCTCAAGGCCTCTAGCTTCTATTAGACTTGCACTTAAAGTATTATCAGATTTTCAAATGATTGAAATAGCTGAAAATAACGTAATTAGAATAGTGAATTGGGAGAAACACCAAAATGTTGAGGGAATGGACAGAGTTAGAGAGCAAAATAGAAAAAGAGTTCAAAACCATAGAGAAAAGAAAAAACAACTTGAAGCTGTAGTTAATGAAAGTGAAGAAACTAGTGTTGAAATTGAAGAACATAGCTTTGCTGAAATTGAAAAATATAGTATTGAAGTTACAAATGATATTTGTGATACTTCAAAAAAATCTTGTAGCGTTACAGATGATAAAAATAACGTTACTGTAACGGTGCAGAATAAGAGAGAGAATAAGAACAAGATTAAGAATGAGAATAATAAAGATAGAGAGATAAAGAAAGAAGAGAATACAGGTTATGATGCAAATGAAACTCCTACTTCTAATGCAGCGTACGTAAGTGATTTGCACAACAGACAAGCTTTTAGATCTCTGGTTAAGGAAACTTATATTCCTAATGCCATGGCAGTAAGTGATCCACATGAAACCACAGATTTTAGTTCTCTGCTTAAATCAGAATCAGCAAAGTCTTCTATCTCTCAATCTAAAGACTCAATTGCTGAGCAGGATAATAATTTAGAGGACATAAACTCTAAATCCATTGAACTTGCTAAGTATTGTGAACTAATAGCTGGAATACCTAATGTTCTTAATTTAGGCGCACTTAGGTTAGCTATAGGAATGCATGGTCAAGAATATGTAAAGATGGCTATAGATATAGCATTAAAAGCCAATAAACCTAATATGACCTACATTGATGGCATACTGAAAAATTGGAGAAGAGAAGAATATCCAGATGACAAGGAGGTAAACAAAAATGTCAATAGGAGCTATGGAAAGAATAGCAACTCAGATAAAAACAAATTTGCAGGATTCAAACCAAAGGAAACACGACGAAGCCTTACAGATGAGCAACGAAAGAAAGCTCTAGAAAACCTCATATAAGTGTGAGAAGTATTGAAATACTTCTTAGAAGAAAAGACAAGAGTAATTCAAGGCAAGAAATCATCTCAATATATTGTGGTTATGATGGCTTAGTAAATATATAAAATTTGAGATATACAAGAAATGATTTAATCATATAAAAGAAAATAATTTTGTATAGGGGAATGTAAAAATGAGAGCATCAGGAATAGTGAGAAAGCTTGATCCACTTGGAAGGATTGTAATACCAAAAGAAATAAGAAAGGTACTTGGAATTAATGATGGAGATTCTATGGAAATAATTAAAGTTGATAATGAGGTAGTTGTTAGGAAATATAGTAAAGGCTGCATATTTTGTGGAAATGATAAAGGGGTTTTTAACTTTAAAGATATACTGATTTGTAACGAGTGTAGGGAGGCATTAAAGGGAAATTAATTGAGAAAATTTAAGATGAGTTGATTAAGCTAAGCCATTAAATTTAAATTTTGAATATGTAAAGTTGACTTAGTAATAAATAAAAAAGATGGTCTTTGAAAATTTAATAATTCGATATTTACAGTTTATGCTATAATTGATTTATATTAGAAAAAGGCATGTGTTAAATAAAATAAATCGGAGAAGTAAAATGAAGAGATTAAAATTAAAAAAAGTAATATCTAGTTCATTATTAGTAATTTTAGTATTTGTATTAAATCCAATAGTTGTAAGTGCTGAATGGAAACAAGATTCTAATGGATGGTGGAATGCAGAAGGAAATTCATGGTCAGTAGGTTGGAAAGAGATTGATGGAAAGTGGTACTATTTTGGACAAGATGGATATATGAAAATGGGATGGATACAGTATGAAGATAAGTGGTATTACTTAGGTGATGATGGTGCTATGCTTAAAAATACTACTATTAACGGTTACAAATTAGATTCTGACGGAGTGTGGATTCAATCATCACAAAATGATTCATCAAATGTTGAGTCGAAAGATAAGAATGTAAGTTATGAAACCGAAGTAAGTAATAGAAAGCCAATTATCAAAGATGGAAACTATCTATATGAATCTGGTAATTTATCCAAAGTTAATATTGATGGAACAAATAAAACATACTTTGAGCAGGCATATGGAAGCATTATTGGCGTTTCTGATGGCTGGGTTTACTATGTTGATGGAAATACTGAACACCATGGCAGTAAAGGTATATACAAAATAAAGGATAATGAGAAAGTTAAGTTAACAAGTGAGGATTATATTGTTGATGCTGTCTTTTATAAAGGATCAATATATTATGCATTAGGACCTGAAATGGTTGAAAATAATGTGGTTAAGGGTGGATTATATAAAGTTGATATTGATGGAAAAAATAAAGTTCAACTTTCTGATCAGTTGGTTGAGAATATAAATATATATAAAGATTGCATATATTACAGCGGTACAAGCTCTGTTACTACTGATAATACAGTTAGCAACGAGAATTCAGGTATATTTAAAATGAACACTGATGGCACAAGCAGAACAAAAATCAGTGATCATTATGCTTCATTCTTAAAATTAAGTGGTAATAATATTTATTTTAGCAGTTTTGATTATAACTACAAATTATATAAGATTAGTATTGATGGGACAAATGAAAGAAAATTAAATGATGATATCAGCTGGGATATTCAAGTGGATGGAAATTGGGTATACTATAGTAATAATCCTTACGATGGGCATACTGATATGATTCTACCACTTGATAAGATGAGAGGAAATATTTATAAAATTACTATTGATGGAATGAATAGAACTAAGATTAATAGTGAGGCAAGTAGGTTATCTGAAGTATTAGATGGGTGGATATATTATAATAATTGGTCCGATAATAAATCTTATAAAATGAAATTAGATGGTAGCAATAAATCAGTTGTGGAATATTAGTATTATTATAATAATTCCGCTAAATAATTGATTATGTTTTATTATAGTGTTGAAATACAGCCTAAAATATATACATGGAATTGTTTATTATAATACTCAATATTCAAACAGTATAAATCACTAAAAATAAATAGTAAATTGTAAAATCGTATTATTCAAGAAATTGAATTTGCGATTTTTTTTATTCTCAAAGAGATTATATTATTGGACAACTGTGTGGACATTGTGCCCCTTGAGGGTATAACCTATGAAAATAGTTATTTTATTTTTGAAAAAATATATGCTGCAATTCATAGTGAATAATTATGGATTAAATAATTGTAAAATTTTTAAAATTTACAATGAAGAATTATTTGAAGAATTATTTTGGGAACATATATATAAAATCTATAAAAAGGGGATAAACAAAATGGAAGGGTTACGAATTTTTAAAGATGAAAGATTTGGAGAAATAAGAGGGTTAAAAATTAATAATAAAGATTATGCAGTTGGAATTGATATAGCAAAAGCTTTGGGATACAAAAACCCAAGAGATGCAATTTTAAGGCATTGCAAGGGTGTCGTGAAACACGACATAGGGGTAGTTACTGGAAAGAGGAGAGATGGAACAGAGGTTATTCAAAATATAGAAATGAGTGTAATTCCTGAAGGGGATATTTACAGGTTAGCAGCCAAATCAGAATTACCTGTGGCAGAGAAATTTGAAGCATGGATTTTTGATGAAGTATTACCTAGCATACGAAAAACAGGGATGTATGCTACAGATGAATTATTAGATAATCCTGACTTACTTATTGCTGCAGCGACTAAGTTAAAAGAAGAGAGAAAAGCAAGGCTAGAGGCTGAAAATAAAGTGAAATTATTAGAACCTAAAGGACAATTTTATGATGATGTCGCAGGATCTAAAGATAGCATTGAAGTGGGACATGTTGCAAAGGTTCTTGCTATAAGAGGAATGGGAAGAAATAATTTATTCTCACTCTTAAGAGAAAAGAAAGTTTTGGATAAAAACAATATTCCTTATCAACAGTTTGTAGATTTAGGCTATTTTAGAGTATTGGAGCAGAAATACACAGTACCTAGTGGTGAAACAAAGATAAACATTAAAACAATGGTATTTCAAAAGGGGATAGAGTTTATAAGGAGAAAAATTGGAGAGTAATAAACCCCATTCTCCTTATTTTAGGATATGCAAATATAAGAGCTTGAATATTTACATATATTTAATATAAAGCACCATGAAATTTATTAATATAAATTATATTTTAAATTTTAAAAGCAGTAGTTAGGAAGCCTTTAACTACTGCTTTTTATATTAAAAGATAAATATGAAAGGTGATAATAATTTTTTCTTATTCATGACAAAAAAATAATATGCATGGGAAAAAATGCAGAATATATTAATTAATATATTCTGGTTAAAAAATCTAAACTTAGTACTGAAAAATTTACATTTAATGTGTGGATTTGGACGTTTAGAGAAAATGCTATTTAATTAATATGAGTATTATGAGAATATGTAATTGTAAATATAAAGAATGGAAGTGAAGTTGATTTATGTCAAAAGTACTTTTTTTAAGTATACCTGCACATGGTCATATAAATCCTACATTAGGATTAATAAATGAATTAGTAAAACAAGGAGAAGAAATTACATATTTCTGCTCAGAAGATTTTAAAGAAAAAATTGAAAAAACAGGTGCCAAATTTAAAAGTTATAGAGTGGAACTGAGCCTTTTTAAAAGAAAACATAAGACTTCTAATGATATGGGGCCAGATAAACTTCTTGATTATATAAATGAAACGCTTAAATCAAGTGATAAAATTATAAAAGATATTTTAAATCAAATTGAAGGCGAAAAATTCGATTATATTATGTATACAGCCATGTTTCCTTTTGGAAGTATCATTGCTCAAATACTGAAAATACCTTCAGTTTCTTCCTTTGCAGTGTTTGCAACTCCTAAAGAATTAAGGTCGCAGCACAAAGAATTAATGAGTGAAAATTTAATAAAAAATCATCCAGTTATAGAGACTTATAAAAAAGTTTCAAGACAATTAAAAGCAGAATTTAATGTGGAAATGTCCCATAATATCTTTGATTTATTTTTTAATAAGGGTGATATAAATATTGCATATACTTCTAAATATTTTGTTGCACATCCAGAATATTATGATGATAGCTTTAAATTTATAGGGCCACCAATATATGATAGAAAAGAAAATTTAGATTTTCCTTTTGAAAAATTAGAAGGAAAGAAAGTTGTTTATATTTCACTAGGTACAGTGTTTAATAATACAGACAGCAATCTCTATGATATATTCTTTAAAACATTTGGTAATACCGATGAAGTTGTTGTAATGGCAGCATACAATGTAGACCTATCTGAATTTGATATACCTAATAACTTTATTGTAAGAAACTATGTTCCTCAATCAGAGGTTTTAAAGCATACTGATGTAGCAATAACTCATGCAGGAATGAACAGTACCAGTGACTTATTGTATAACAATGTGCCTTTTGTAGCAATACCTATAGGTGCGGATCAACCATATATGGCAAAAAGAGCTGAAGAGTTAGGTGCAACTATTTCTCTTGATAAAGATAATATTACTCCAGAGATTTTAAGGGAATCTGTTGAGAAAGTTTTAACTGATCCAAGTTATCTTGAAAATATGAGAAAAATAAGTGATTCTTTTAAAGAATCTGGTGGTTATAAAAAAGCAGTTGAAGAAATTTTTAAATTAAAACGAGATAAGATAGATCAATAAGATTTGTTAATACTAATGCATGAAAAAATAAAAGTTGCAATAAAGGTAGATTAGGTAAAAATACTTAGTCTGCCTTTTACTATGTATGTGGAAAAATCTTTGGAGCAAAGGGTTTCCTTTTTAAGAAGAACTTGTAATACGGAATACTCAAGAAAGCTATGAACGTTCTTATTATTCATGTTTACTATGCAAGCAGAAAGTAGATTGATTATTATCGCAAATTCCAAGTTGTAGTTAAATTATTTCAAATATACCTAATAAAGAAAATATTTAAGAGATTTGTAATGTAGCAGCATGATAAGTGCTGTTACATTTATTTTTTGCAGAAGTAGATAATTATTTTGTACATGAAAAAATTGTATTATAATGATATATATGGATCTATAGATAATAATAAGATTTTATTCTTATAACGTCTATAAAAGGAAGCGAGAGAGCTTCAAAAGATGTGTGTTAAATTAAAATGATAATTAATAAAATCAGTTGTATAAAAAAATGTGATTTGTGAGTAGATAAAATACTTTATTTTGTAAGCTAAAAAACAATTAAAATAAAAGTGGTGAAAATATGAAAATATCTATTGAGGAAATTGGAAGAGAATTTGAAGAAGAAATTATTGTTAAATGTCATGAGGTTAATGATGATATTTTAAAATTAATAAGCAAATTAAAAACAGAAAAAGCTGTGGTTCTTGGTTATGATGGATGTAATATTCACAGGATTCGTATTTCAGATGTGTATTATTTTGAGGCAGTAGACAACAAAGTATTTATCTACTGCAAAGACAAGGTGTTTGAGTCTAAGCAAAAGCTTTATGAATTGGAGGAAATGTGTCAAGGAAAAAAGTTTTTCAGAGCATCCAAGTCGACTATTATAAACATGACAAAAGTATTATCAGTACGACCATCTTTAAGTGGCAGGTTTGAATCAAAGCTGGATAATGGAGAAAAGGTAGTTATCTCTAGACAGTTTGTACCTATATTAAAAGAAAGGTTAGGTTTATAGGAGGAAATAGCATATGGAATTAATGGATTATATAAAAAAGTTAATAAGAAATTATTTTACTATATTTTCACTTATAGTAATATGTGTAACAGTATCAAGTCAGATTTTTTTACCAAATAAATCATTAGAATTAAAAGATATATATATTTATATGATATGCTCATTAATCATTGATTTGCTGAGCCTAATATTATATTCCACAGAAAAATTATCAGAAAAGCAAATGTGGATAAGGAGAATTATTCACTTTATTATACTCAATGGAGTGCTATTAATTTTTACTAATGTGATTGGATTTATGCATGATGCTTTAGATATAATTATATTAGAAATTCAGATAATAGTGATATATGCTATATTTCAATTTTTAGTATGGATGTACGATAAGAAGGACGCTAATGAAATTAATGAAAAATTAAATATAATGAGAGAAGAATTAGAGATTAAGAAGGAAGAAGAATAAAATGTAATTTTCAGCTTACCTCTTCTATTTTACAACTTACATGTCGTTAGTTACAGGATACTGCTTAAGCTATGTACATTTGGCTTAGTAAGTATTATTTTAGTTTTGTAGATGAAGTATGAAATTAAAGTATACTTTTAAGATTAAGGCTGGTTGTAATTAAAAGAATAAATTGAAGAGGTGGAGGAAGAATGGGAATAATAGTAGTTTCTATTGCAGCAGTTATAGAAATTGTTTTTGGAATATACTGCTTTAAGACAAAATCAAATCAGAAGAAAATAAGAAGTTTAATTCACATTAGTGTATTTATAGCATTTGTTGTATTTACATTTCTATCAGTAATACAATGGAGTTTCCGGTGGATGCTTTTGGGTGGGGTATTGCTTATTTTTGCAATTACTTCGGTAATATCACTTATACTAAAAAGGGAAGATAAGAAAAAATACAAAATTAGAAAAATGGTTGGTAAGTCTATCGCAATGTGCATAGTTGTAGTTTTAGCTGTCAGTCCTGAATTAATCTTTCCACAGCATAAGATGCCTGAATTAACAGGAAAGTACAAAGAGATAAAAACGGCAACTTATACATTCAAGGACAATAATCGTATTGATATATATAGCCCCAAAGGTGAAAATAGAGAAGTTACAGTAGAATTTTGGTATCCAGAAGATGATGAAGTCACATCTCATTATCCTCTTGTAGTATTTTCCCATGGTGCATACGGAATGAAAACTAGTAACATGTCTACTTTTAGAGAGCTAGCTAGTAATGGATATGTGGTCTGCTCAATAGATCACCCATATCAATGTTTATTTACCAGAGATGCTGATGGAAAGATAACAACAATAGATAAATCATTCATGGACGAGCTTAATGGTGCAAATAATGGAGCTTATGATGAAGAGACTGAATATAATCTTGGAGTAAAATGGTTAAATGTACGCACCGAAGATATGAATTTTGTACTTGATACAATTATTAAGAACGTAAAGGAAGGTAATACAGAAAAAGTTTATAGTCTTATTGATGCTGGTAAAATTGGATTAATAGGTCATTCATTAGGAGCTTCAGCAAGTGCACAGCTTGGAAGAGAACGCAGTGATGTAAGTGCTGTAGTTAATCTTGATGATGACTTAATAGGAGAATACACTGGTTTTAATAATGGCAAGTTGCAAATTAATCATAATATTTATCCTATACCCATTCTTAGCATTTACAGTGATGATTTAAAGCGAATATATGAAAAAACTGATCCAAGCATTATACCTCAAAAATTAATTTCAGCGACTGCTTTAAAGTCCTTTGAAGTATATTTTGATGGAACAAATCACATGAGTTTTACTGATCTTCCATTAGCATCTCCAATTCTTACAAAATTGTTATGCAATACAGCAAACAGTAAGATTGGTATACAAAAAGCAGACAAGTACTATGTAATAGAGAAGATGAATAATTTAGTATTGCAGTTCTTTAACTGTTATCTTAAGAATGAAGGAGATTTTCGACCAGAAGATAAATATTAAATCGTAGTAGTAAAATCTAATTTTGAATAGCATGGCAATGAAAAAAGCGCTATCATGTGCTATTGATATGGTGAATATTATAAAAAAATATAAGTTGTAATTTAATGAGGTGGAGAGATGAAGGTTTTAGGAATTAATGGTAGTTCAAGAAAAGATGGAAATACAGCATTGATTATGAAGATTGTTTTTGATGAATTAGAAAAAGAAGGAATTGAGACGAAAATAGTTCAATTCTCTGGTAATATAATTGAGCCTTGTAAGGGGTGCTTTGCATGTAAAGGAAAGAATAATTGTGTTTTTAAGAAAGACATATTTTATGAATGTTTTGAAAAAATGAAGTTAGCAGACGGAATTCTTTTGAGTTCTCCAGTCTATTCAGCAGATGTAACAGCAAATATGAAAGCTTTTTTAGAAAGGGCAGGGGTTGTTGTTGCAACAAATCCAGGTCTACTTAAACATAAGGTTGGTGCATCTATATCTGCTGTTCGCAGAGCTGGAGGTATGACAGCAGTAGATACAATGAATCATTTTTTATTAAACAAAGAAGTTGTTATAGCTGGTTCTACATATTGGAATATGGTTTATGGAAAAGATGTAGGCGATATATTTAAAGATGAAGAAGGAATCAATAATATGCATAATTTGGGACAAAATATGGCGTGGCTGCTAAAGAAGATTAATTAATGCTATTAACAGGATCTAAAATGGTATTTCAAAAGGGAATAAAGTTTATAAGAAGAAAAATTAATAATAAATAAAGTTGTCGTAAGCATGAAAATAATTAATATATATTGCAAAAATAATCCTTCATCATAATTCCAAAAATATTACAAGAATTTTAGCTGTAATTGTGAAATGTGAATTGCAACATATATATACTATTTTCATGCTTATATTTATAAATTAAATGGGAATTCGTCATGAGAGGAATTATATTAGGTTTGAAAATTGCGGTGTGTAAAGAGGAAAATCAATAGTGTGTGTAAAATATATATAAGTAAAGGAAAAAGTATCAATCATTTAGGGGAGACGTACTTATGACGAGAGAAAGTAATTTTGAATATGTAAAGGGGACAGATATAGAAGAATGCTACGAAAATTTAAAGAAGGCGGAATTTGTATATCAAAGTTTTCCTGATGCAACAATTATGTGTTTAAGATCAATAGCGGAAACTGTTTTAATAAAAGTCAGGAATGAATTTTACAAAAGTGCAGATAATAGTGGAACTTATTATGGAATAATGGGTGAACTCATAACTGTTTCTGAAGTAAAGTTCTCAAAAGTGTATAGAGAATTTAATATTATAAGACTTAATGGGAATTATGTACTTCATCCTGAGGCTGTAATAGAAGAAGAAACAAAAAGGACGCCCACTGAACTTTTAGAGATTATGCATAACATTCTTATCTGGTATCTTCAAGATATAAAAAATAAAAGTTCTATTGATAGTAGTAAAATCAAGTTTATAAATCCAAATAATTTTAATAATGAAAAAAAAGAATTAACTGAGGTAAAAAAGAAGATTTCAGAAAAGAATAAGAAAATATCTGAATTGAAATTAAAGATAAAGCAGCTTAAGGATTCAGAGAAGGATAAGGCTGATCAGTTATTTAAGTTAAACAAAGAAATAGATAATATAAATGTGGAAAGACTTGAATTAAAAGAAAAAGATGAGTTATTAGAAAAGAAGATTAAAGAGCATGATAAAGAAATAGAGACTATAAAAAAGAAATATAAAATTGATCTCCAAGAAAAAATTGAGAAATTTAAAAAGGAACATAGAGAATCTGAAAAAGCTTCTAATGAGGCTGAATTAGCAGCAGTTAAGAATGAAATTTTAATAAAGAATAATGAAATAGCAGAATTAAAAAGTAAGATAGATGATTTCAAAGAAGAATCTAAGGATAAATTAGAATTACAAAAGGAAATTGAAAAAATAAAAGCTGAAAAAGAAAAACTAGAATCAATGGATTCTATTTTAACTAAAGGGATTAAAGAGTATGATATTGAGGTAAAAAGAATTAAGGAGCAGCACAAAAGGGACTTAGATGACAAAATTAATGAGTTAAAAAGTAAAGAAGATACATATTTAAAAGAAAAAGAAGAATTAAATAATATTAGACAAAAAATACATTCAAAAGAAAATGAAATAAATGAACTAAAAGCAAAAGTAGATGAGTTAAGGCAACAGTCTAAAGAAATAAATTTACTTAAAATAAGTATAGATGATTTGCAAAGTGAAAAAGGTTATTTAGAAAATCGTGATAGAATATTAACTAATACGATTCTTGAAAAGAATAAAGAACTAGAAAATATAAAAGAAGCTTATAAAGCTAATGTTGAAAAGATTGAAATATTACGTAAAGAGCGTAATGAATCAAATGCGTCTCTTAAAAATAAGGAAGAAAAGCTTATTAAGGTTGAAAAAGAGAACTTTGAACTAAAAAAACAACTTAAGGAAATAGAAGAAACAGCTAAAATTGAAGCTATAAAGAGAGATGAAGAGTTAAAGAGAAAAGAAAAAGAGCTGCATGAAGGGATAGAAAAATTAAGACAAGCTTATAAAAATTCTTTTGAATTAACCAGGGAGTATCAAGATGTTCTAGAAAAAAGTGAGTATTCTTATGATAAAGAAGAAGAAAGATTATTAAATATTCAAAAGACAGATGTTAAAGAAAAATTAGTTGAAGAGGATAAGAGCTTTCATAATAATTTAAATGAATATAGTGAGGGCGTGAAAGAAACTAATGAAAGAATAAGAACCTTCAAGAAGGTATTAAAAGAGAAGAGCATTAAGGAAGGTAAACACGTAGCATTTTATAGAGGCTTTTTAGGATTAGAAGCAGAGCAACTTAGAATTTTATATACAATGCTTACTAAGACCAATATTTCCTCAATTTTAATTAGTAAGTCTAAGGAATTACTATCACAATCTAATGAAGATAAGTTCATGGAATTTATACATAAAAAAGCACGAGAATTGAATCATTTATCAGATGAGGAAGTAAGATTAAAAATTTATTATAGATTAATTAAATTAACTGATATTGAATATAAAAACATTTATGAAAGAAGAAGTTTTGTTGAAACTTTAGATGAAATAGTAAATGCAGGATATTCAATATTAGAAGGAAAAAAAGATTTCAAAGGTGGAGGCTCAAAGTTAGAAGCCATAGGTACATATTATTTAGAAAAAGTACTAGAGGATTTTAAAAATAAATATGATAGTGGAGATATAAAAGTACAGCAGGAATTAATTGATAACATTTATAATAATTTTCAAAGGTTAAGTGAAAAAGCTAAAAAAGAAATTTATGACGAGTTACATTTGAAAAGTACTTCTGAAAGTACAGTTAAAGCAGCCATAAGATCAGCCCCCTTTGTATTTTTATCTACTACTATAACTGTTGGGGGATTTTCAGCATTTAGTGCAGTTTCAAGTATAATTTTCGGAATTTCACATTTACTAGGAACAACTTTTTCATATGGGGTATATACTGGGGCTTCATCATTATTAAGCTTTTTCAGCGGGCCTTTTATGGTAATACTTTTTATTGTAAATGGAGGATTTTTGCTTACTCAACATAAAAAACAGCAATTAGAGTTAGTACCTTTATTTATAATGCAAACCATAATTACTAATGTAGCTATAGAAAAAACAAAAATAAACTTTGATAACTATGATACTATGATTGAATTATGGAAAAGAGAAAAAGGGAATTTTGACAAAATTACCATACAAAAAAGATTGCAGGAAAATATCTTAAATAATTATACAAAAGAAAGAGATGAACTTGCTGCTAAATCAGAACAAGTGTCTAGAGCTATTAATACATTATGGGAAGAACATGCAAAATATAATTATAAGTTTAAAGATATTGTGTTAAGCTCTGATAAAAAAGTATATCTAAACTCATATGCGGAGTATAGCAGTACTATAAATAAACTGGAAATGGTTCAAAAAGAAATAGATGAAACTAAAAACAAGGTAGGAACCTTAAAAAGCATATTTAGTTTAGATATGTGGAAAGGGCAATCTTCAAAGCTTGTAAATGAAATGAACATAATAAATATGGAAAAGTCATTAGTTGAAGAAGCAAAACAAAGCCAATACTTTAAGGAAGAAAGTGCTTTATTTTCAGAATTGCAGCAAAAGATAGATGTATTAAATGAAATTCAAAATAAAACTAAAGAAAAGATAAAAGATAAAAGATAAAAATAACCTAATCAGCGTTTTGAAAAATAAAGTTAACTTACTCGAAAATGATTTGGTTAAAATAAATTCAGAATATCCAGATATAAATAACGCTGAATTTAGGGCAATTGGATAGTCAGGATAAGGATAGCAAATTCATTTATTTGGGCTATAATAAATATAGATGTGGAGTAATACAGTCAGAATCTAATTACTTTTGTATGTTTCCAAGACAAGCTCGTGATATGCAGAAATAGAACAACCTTGTAAAAATACACCTTAACTTTTAAGAAGTTTCATAAATTTAAGGACTGCAATAAGATAAAAAATATCAATCTTATTGCAGTCCTTATTTTTAAGCTACTTCATATTTAGTAACTTGTCTTTGAGTAACCTGAGCACCGGATTTCTTAACTAGATCGCCAGCCTTAGTTTTAAAAACATTTTTTGAAATTATAGTGTCCATAAGTGCGTTAACTTCATCTTTTGTAAGAGTAGTTTTAACACCAGAAACACTTAAAGTGCTTTTTTCACCAGCAGAAGTTAAAAAAGTCATAGCTAAAGTATATTCCATTTAATTCACCACCTTTCATTAAGTTTAGAGTTAAGGAAACTACTCCTACTCCTATAGTTTTAGGAGTACTCACAGAGTAAGAGATCATTATCAAATCATAGATGCCTACAGGGAAAGTTCGAAGAGCGAAATATAAAATGCTCATAGAGAAAGTTCTGCTTACCAAATGTAAAATTTGGAGCATCACTTTTCGGTTCTCACTTTTGATATGCCTGCTTAACCGATTTAACTAAGTTCGAGAAGTCCAATCATAGATTTGGACTCTCACTCTTATCTCACTTATCTGCTTAAGCATTTGCTAAACTAGAAGATTCATTAATAAAGTAATCTCTAGTGTTAGCTTCTAATACACCTTTAATTGCATCTGCAATAGCATAGACATTTTCAGGTGTAGCATCTGTTTTGATACCTGAGAAAGTTTTCTTGGTATAAATTGGGTCTCCAGCCTTGTCTAGACCTTTTTGAACCTCAATACTAAGAGAAGCAGAGTCAATAGTTTTTGTTACAGCCATGTTTCATTACCTCCTTTACTTTGTTTTCATAAAGGATATATGGATGAATTTTATTTATTACATAAATAGATGAAAATTATTTGTCAATTTAGCACTATGCTGTTAGATGGATGAGTTCTATTAGTGGCAAGAATCATTTATGCCGTATGAGCCATGAAGTATTGCAAAAAAACACATGATTCTTAGTGAAGAATTAGAGAAGATAGAGGTTAATAATGCTAGTGATAAAGAAAATATTTAAGAGATTTATAATGTGGTAGCATAAACGAATATTGTCATATTTATTTCTTGTATAATTAGATAATTATTCCATGTATGAAAAAATTAGATTATAATGGGTTATATAGTATTATGGATAATGATAAGCCTGTATTCTTACAATGTTTATAATGAGAAGTTCGTGGCATGAATTTAATGATATAAAGAGGTGGAATAATTATGCTATTTGGATTATTTAATAGTAGAAAAGATAACGATGAAATTCATAATATAAAGCAGTATGCAGTAGATATAGCGACTAATTTTAATGACGAAGACAGGGAATATTTGAAGCCAATTTATACAATTAAGTATTATTCTAAATTAAATAATTCAGTTGTTAAAGAAGCAATTTTTTCTAAAAATATAGAAAATGGTGATATTACTTTAGGAGAAGCATTTTTAACGTTAGAGGAAATATTAAAACAGAGTATAGAGAGTAAGTTGTTAATTAATACAATGATTAATAAAAATTATGTGCTTACCAATGAGGAAGAAATTACAATTAATAAACTTGAAGATATAATAAGATTTGGGTTGCTATAATCAATCCAAATTTAAAAAGTGTGAAGCTTAATATAATAGATAATGTGTGTATATTCTAAAATGGATATGGAAAGAGCAATAATTTAACTTATTGCTCAGATATTAATTATGCCAATACCACGAATTTGGGATTCAGGTGGACAAAAACAACTTGTTCTTTGAGAATTGAACAGTATTATATTTTGAAAATATGTTGTTAGGATGAATTAAAATATGAATAAAGGAATGGAATAAGCTGTTATGCATATGAAGGTGAAATAGATGAATAGTTGTAAATTGTTAGAGGTTAAAGCTATAGTTCAAAATCCATATTATAAGGCTCCAAGTTGGATTGATGACAAGATCCAGAGGAGAGAATTTGTTGTTTTAGATGGGGATTCATCAGAAAACGATGTGGAATTATTTTTAATTGAATTATTAGGCTATAATAATATTAACATTGAGCAGGATACAGAATTAGTTATGAAAGAAGTTTTAGAGGAAGTTGAAATAGTAATAGCAGGAGGAATATTATTTGTTGGAGAAAATAGAAATATATTTCCAAGCTGTTGTTGCGGTTTGGAAAGTTGGAATGAAGTATTAGCTGCAGTTATTGATAAATCATCACCATGGTTAGGACACGATCCATATCCCTGCTTTGAGTATGTTGGAAACAACATTCGTATATGGTCTGATGATTTTAAGACAAAACAGTCAAGTGAGATTTACTTTGTTGAGTGTGATAGGGATATTTTAATCGAAAAACTAAAATTAGTTAGAAATGATTTAATTAAATTCTCTAAAGGGCCTTTATATAATCATATCAGTAAACATGCAAGAACATATACTGATTTGATGGTACAGCAATTTGAAAAATGGTTTTCTTTAAACCTTGTGTAATATAATCTGAATGACATGTAGCTAATTGATGAACTGATATAAATCATTATGGGATTTTAATATAAAATGATTTAATTTAATTTGTGGAGGAGGAGAATTTTAAGCTTGACTAAAAGAAATCATAAGTTTGAAATAGCGATATTAACATAGTAGTTATTGTAAATTATTTGCTGAAGGTGATGAGTTGAAGAGTATTATTTTTGAAGGTTGGAAAATAGAAATTGATAAGGAAAAAACTAAAAATTATTATTCACAGTTATATATAGATAGTTTACGTGAATGTTGTTCAGATTGTAGAAATTATTATTATGGAATAAGGCATTTGCCTAAAGTTTTTATAGATTTTTTAGAGAGCTTATTTATTGTTCCAGAAACACCTATATGCGCTTATAAGCTGGATAAAGTTGCTGATAATATATTCCTATATGATGTAAATTATTTAATTAGGGGTAACATATTAGAAAAACCTGATAATGCTGAAACTGTTTCAGAAAATGGAAAGAAGGTATTTAACACAAGTACGTTAACTGATGAGGTAGAGTTTTATTTTACAAAAGAATTACATCCAAGTTTGCGAAATCTTTCTGGTTATGAATCGTATTTTGAACTTCAATTATTTTGTGAAATTCCTTGGGTTATAGATGAGCCGTATGAATAATTGCTGGTTGAAAATTATTGCATCGCACATCTATATAATAAAAAGTTATAAATATTTAAAATAGATGAAAAACTATTAAACAATATAATTGACACTTAACAATATAAGGTATATAATTATACAAAAATAAAAGCTATGAAGAGAAGAGTAATTATGAGGGAATACTCAGAGAGTTTCCATTTGGTGTGAGGAAATTATGAAATCATAATGAAACAAGTCTCGGAGCTGCATACGGATCTTATAAAGTAAAACTTTTCAGGTGGAATTTTATTTTCGCTAAATTTAGTTTAACTTATCTTAGAGAGTGCAACCTATAACTCACATTTGAAGAAGATATGATGTTACGGATGCCTAGCTATAAGATAAAAATTAATTATAAGTGATGCTATGACGTAAGTTCACGTTACAGAACCAGAGTATGATTCAACCTATAATTTTAGGGAGATGGTGTACTTGAAGAGCCTAAAATGGTGACATTTTAGGGAAAAGGGTGGCATCGCGAAATAGAAGTTTCGCCCCTGTAGATATATTTTCTACAGGGGCTTTTTTGACTTTAAAATTAAGCATATAAAAAATTATAAGTTTAAAATATCATGGATATTTTATTTTTCGATTATCTCAAGTATGAAATTTAGAAACTTAAGGAGTGAAAAGTTTATGATAAGTAAAAGAATAGAAAAAAACGAAAGACCAACATTTCCAAAGAAGGCTGTAGTAACAGCTGGAATGCCTTATGGAAATAAGAATCTTCATTTTGGCCATGTTGGAGGAATGTTCATTCATGCAGATATATTTGCAAGATTTTTAAGAGACAGAATAGGGAAAGAAAATGTTATTTTTGTATCTGGTACAGATTGTTATGGTTCACCTATTCTTGAAAGTTATAGAAAGCTAAAGGAAAATGGATACGAAAATACTATGGAGGACTATGTTAAGGCAAATCATTTGAGTCAAAAGAAAACCTTAGAAGATTACAATATCAGCTTAAATATTTTTGGAGCTTCAGCCATTGGAAGGACAGGAGAATTTCATAATGAAGTTTCAGAAGAAATATTTAATACTTTATTTAAAAATGGTTATATAAAAAAGATGTCAGCACTGCAATTTTATGATGAAGATAAGAAAATCTTTCTTAATGGAAGGCAAGTAATAGGAAAATGTCCAATAGCCGGATGTAACTCTGATAAAGCTTATGCAGAGGAATGTTCATTAGGGCATCAATATATGGCATCAGAATTAATTAATCCCATTAGCACTTTATCAGGAAATAAACCAATATTAAAAAGTGTAGATAATTGGTATTTTACATTAGATGAATCCATGGATATAATGAAGGAACTTAATGAGTTTTTAAAGAAGAATACTAATAGACGAAAGTATGAAATTAACACTATAGATGAATTTTTAAAGAAGCCATTAATATATGTTCCAAGAAAATATATAAAGGATGTAAATGAGCTAGAAGCTAAATTTCCAAGCCATGAAACTATAGATGAAGAGAAAAAATCTTCACTAGCGTTTATTTTCCAAACACTAGAGGATAGAGATAAAGCAAAAGAAATATTGGATAATTTAAATATTAACTATACTAGTGGTAAGACATTGGTTCCTTTTAGATTGTCAGGAAATATAGAGTGGGGAGTAAAAGTACCAGATAAAGAAGATTTAAAAAATTTAACATTCTGGGTGTGGCCAGAATCCTTATGGGCTCCAATTTCTTTTACAAAAGCATATTTGGAGTCAATAGATAAGAGTCCTGAAGAGTGGAGTAATTGGTGGGATGCTGATGATTCAATGGTATATCAATTTATAGGGGAAGATAATATATATTTTTATTCAATTGCTGAAATGGCCATGTTTATTGGCTTAAAGGTGGCTAAGGGTGAAAATGTAGATGTTACTAAGCTAAATTTGCCACACATTGTTTCTAATAAGCACATTTTATTTATGGATAAAAAAGCAAGCAGCAGTTCAGATATTAAGCCACCAATGGCAGATGAATTATTAAAGTTTTATACGAAGGATCAATTACGCATGCACTTTATGAGTCTTGGATTATCTTCAAAAAGCGTTGGCTTTAAGCCACAAGTTTACATGAAGGAAGAAGAAAAAGTAGGAGCGGATCCAGTATTAAAAGAAGGAAACCTTTTAACCAATGTATTTAATAGACTCATTCGCTCATGTTTTTATACTCTACAAAGTCTTAATGAAGATATTCCTAAAGAAGAGGTAAGCAAAAAAATTAAAGAATTAACAGAAAAAGCAGTATTAGAATATGAAAGACATATGTATAATCAAGATTTTCATAGAATAGCATATGTTCTCGACGATTATATAAGGGAAGTAAATAAACATTGGGCTAGTAATATTAAAAATGAGGAATTAAAAAGAAACGTAATAGCAGATTGTTTTTACGCTTGCAAAGTTATAGCAGTACTAATACACCCTATAGCACCAGAAGGATGTGAAATGTTTAAAGATTATCTAAATATAGATGATGAACTATGGAATTGGGATAAAGTATTTGAACCTATTACTTCTTATTTTGAAGATGCTGATAACCATAAGTTTAAATTTCTTGAACCCAAAGTAGATTTCTTTAAGAAAATGGAATATCAATATTAATGGAAAAATGATATGGTTTTTACAATTTATGTTGTAATTAATCTAATTCAGATATTGTTTTATTAGAGTGTGTTGGAACAATATCGGATTAGATTAATTATTTGTTAGGGTGGTTTCTAGTGAAATATTGTTGTACTGAGTCAAATAGAAAAGGCACATGTTATCATGAATTTCGAAAAGGAAAATTTACTGGCTCTTTTTGGAATGAAGATTCGCTTTTAATACATGATGATAATTTGTATAGTTTACATCTAGCAGATATATTTCGTAGTGTTGTTCCTTGTATGAAAAAAAGATTATAATGGGTTATATAGTCTTATAGATGACAATAGGATTGTATTCTTATAATGTCTATAATAGGAAGTTGATGTGAGTAGATAAAAGTACAAAGAATACTGATTAAGGTTTAGAATAATTAAGGGTGAACCGTACCACAAGTAACGGAGGTTTTTTTGAAGCATAATTATAATCATACTATATAAGCAAATATAAATATTTAAGAGTGTAGAGCTTCATACTAATTAAAGCCAATATGAAGCTCTATACTCTATAATAAATAGACTAAATCAATAAGAAAATAAATATTTTAAAATATTCCACAAAAAAATTGTAACTGCAATGCAAGCTGAACATTGATAAATTGTTTGATAGAATAAATCATTACGACCTTTTGCTTCTGACCAGTTAACAAATTTCCGTATTAATATAATCACGATAGGTAGAAGCAGTAATTGTAAAAGAAATCAACTGTAACAGAGATTATTTAAATCTTGAAAATGGAATGCTCAAAGGAAACTGTTAAAGTACTAAGAAAAGCAATAGAAAAATAATCAAATAAGAAGATTATGAATTGAGAAATTGAAGATGTGAGAGATATGTATAAGTGACACAGCAGGATAATATATAAAAATAAATGGATGAAATACCAGGAGAAAGAAAAGTACCTGGTATTGTTTGTAATTCTAATATTCCTTGCAAACAACATCTTTAAAGAGCTTATGATTATTAGGAGCATCATGTTCACTGCATGCTTGCTGAAGTTCGGGGATATCAGTAGTTCCAGCTTCTAAAGCAGTTTTGTAGTAATTACATTTATAATTTATTAACTCCAAAGATTTCTGAAGAATTTCAATTCGTTTTTCTGCTTCTGCCTTTCTTTCTAAGAACATTTTATATCGTTTTTCAATACTAGAATCGCCTTCCACGTACCCTTCAAAAAATGTCTTGATATCTTTAATCTGCATTCCTGTATTTTTCAAACATTCAATCATATTGAGCCAGCTTATATCGTTGTCATTGAATCTTCGGATACCACGTTCTGTTCTTTCTAATAAAGGAAGAAGTCCTTCTTTATCATAATAGCGCAGTGTTGGTACTGAAAGGTTCATGATTTTTGCCACTTCACCTATAGTATATTCCATTTGTAGGCTCCTTTCTAATTTTAGGTATTGACCTAAAGTTAACTTTAGGGTGTATTATATGAATAAGCTTAATCTAAATTTATTGAAAAGTCAAGAAATATGGATTAAGAAAATATGAAACATAGCATAACAAATAAAATTTAGAGCCGATTTAAGGAGGGAATAAAACGACCAGTTCAAATGATTAGTTATGGAAATTATAAAAAGTTTAACAGAGAGGAAAGAGTTGTTATGATGAAACATGTAACATTTAAAAATGCTAATATAGAAATGAAGGGGGATTTATACACTCCAGAAAACTTTGATGAAAGCAAGAAATATCCAGCAATTGTGGCTGTTCATCCAGGTGGAGCAGTAAAGGAACAAGTATCAGGTCTTTATTCGAAGAGAATGGCAGAAATGGGATACATTGCACTAGCGTTTGACGCATCACATCAGGGAGAAAGCGGTGGAGAACCTCGCTATCTTGAAAATCCTACAGAAAGAGTAGAAGATATTCGCAGTGCTATAGACTATCTTAGTACACTTTCTTATGTTGATATGGAAAAGGTAGGAGTACTTGGTATCTGTGCAGGTGGTGGATACTCCATTAATGCTGCTTTGACAGAAAAAAGAATTAAGGCAGTTGCTACAATAAGTGCATTTGATATTGGTGCTGGTTTTAGAGGAGATGGTATGCCAGGTGGCTTAGAAGCAACATTAGAAACATTAAATAAGGTTGCAGCTATGCGTAGCGCACAGACCAACGGAGAAGATTCTTTATATATCACATATGTTCCTAATACAGAAGAGGAAGCAAAAGCTAATCCAATGGTGCTTATGAAAGAAGCATATGATTATTATCGTACACCTCGTGCACAACACCCAAATTCAACAAATAAATTACTATTTACTAGTCTTGATAAGATTATAGCATTTTCAGCATTCGGATTTGTGCCAACATTATTAACACAGCCTATTTTAGCAATAGTAGGAAGTGAAGCTAATACAAAAGGATTTAGCGAAAATGCAATTGAATTGGCAAATAGTCCAAAAGAATTATTTGAAGTAAAAGGTGCGACACATATCGCAATGTATGATGTACCTGAGTATGTTAATCAAGCAGTTAATAAACTTGGAGAGTTTTTTGGGGAAAATCTATAATTAAGGACAGCTTTAAAAAGGAAGTACTTATAATATTTAATACATTGGCTATTATAAAGTTATCAGATGATAGAAAGTGTCAGTTCTGAAATATTTCTAAATTTAAAAGATTATGTGATTGGACAACAAGCCTTAATTACCGGTGAAACAGTGATTAAGGCTTTTTCTAGTGTAAAAAATATGGAAGATTACTATGCTGAGAAAAAATGCTTTAAGACACTAAAATATCGGAAATATCAGCAATACATTAGAAGGCAGTATTGTTTAAAATTAATGATTAGTTTGCATTTCTTTTATTATTTCTAAAAACTCAATCACATCCTTGGATGGATTTAAGGGATAGAACAAGCCATAAGGTATAGTGAAATCCAAATCGCTAGGGATGGTCACTAATGAGGGATGAATATCTGACCAGGCATCTAATGTTAAGAGGATAAGGTTATTTTGCCCGCAGCGATTGAATACATCAATATCATAAAGACGAGGAACACTTTTAATATGAATTTCAGGATGGTTCTTTATGAAAAATTCACTGATTTCATCAATGGATGCACTAGTTCCACCTTTTATTACAACCAACTTTTCTCCATACAAATCTGTTACAGAAAGTTTTTTCTTAGAAGCCAGCCGATGTTTGCGAGATACAGCAAAGCAAAAATGGTAAGTTCCTAGCTTAAGTTTATGGGAATAATCATTCCAATTTTCTGTGAGGAATGGTCCTACTATGAAATCAAAATGTTTGCCAATGTTACGATAAGTGTTTGGTAATGTATATGCATCATCCTCAAAAGGGACAATTTTTATCTCGAACTGGGGATGTTTATTACTGATTTCATTCCAAAGATCCAGTAAAACCTTACAGGGATTAAGTATAGAGGTCCCAACCCGAATAATATGCTTATCTACATGCGTTGTATTACGTGTACGCAGTAGTGCTTCTTGAAAATACTGCAAAATAAATTCTGCATCCTTATTGAAGGATTTTCCTGCATCGGTCAGTTCAACACCATGGTTGGTGCGAATGAAAAGAGGTACACCCACTTCCTTCTCAAGGGAATTAATCTGTTTCATCACAGCCGTTGGGGAAACAAACAGTTTTTCAGCAGCCTTGGAAAAGCTACCAGACTCTGCTACCTGAATAAAAGCATTCAATTGTTCATTCATTTAAAGAATCTCCTTTTTGCATAAACTTTTAGTTAATGCCATGCTAACCTATTTGAACTTCCAAATCAAGCGATTATACATTAATATTAGAACTATACTAAAGAAACACAAGTTAAAAAAAGGAGATGCTTACTATGAAAACAGCATTAGTTACTGGTTCCAACAAAGGAATAGGTTTTGAAATTTCTAAAAAATTGTTAAAAGAAGGATATCACGTTTTAGTTGGTGCTCGAGATGAAAGCCGTGGCAAGAATGCAATAACAAAACTCGCTAAGTATGGTCCTGTTGATTTAATAAAAATAGATTATTCAGATAGTGAAAGTATAAAAGAGGCAGTACAGCGAGTTTCAAATGAATATAAAAAGCTTAATTTATTGGTTAATAATGCAGGCATACCTGGACCACTTATTAAACGCCCTAGCTGGGAATTTACAAAAGAAGAGCTTTTAGAAACTTATACAGTAGATTTTCTTGGTCCATTTGAGCTGTCGAAAGGATTATTACCCATTCTTATAGAGAACCATGGCAAAATAGTCAATGTTTCAATTCCAATTGAACCAATGCCATTTCCTAATATGAACCCTTTTGCATATCTAACTGGAAAAGCACCACTTAATATAATGACAAAGTCTTGGGGAATTAGCATTGATGAGATTGCTCTTCCGGTACAGATATTTGCAGTGATGCCCGGTGCCGTTTCAACTGATTTGAATAATCATACAACAGGAAAGGGTGTAAAAATGCCTGAAGAAGCTGCAGAATGGATTGCAGGTTTAGCTTTGGATGAAAAAAATCACAATGGTCAAGTTATCAATTTTGAAGGTAAGCTAGCTGACTATAAAAATTTAGTATAGTTTATTTTGTCAATAAAAAAGACACTGGCAAATGGACTTGTTATTTGATTGTGCCTAAATATAATAAGAGAGTAAGGAAAATGGAGTATCACTTATTAATGGATACGGTGAATCATAATTCCTTCTCTTATTTTCTCTTCATTAATATTTGTATAGTTCAAGCGTAAGGTATTTTTATGACCGCCATTTGGGAAAAATGCTTCGCCTGAAATAAAAGCTATACCATTTTTTTAATGCACATTTTAATAATTCAAAGTGGTACTCCGTCTAGACATCAGATAAATAAATGAATTAGAAGTTTAAAATCGGGAACTGATAATGCACAATATATTAGGAGAGATGCCATAACAATATTGAATAACTTTCTATGATGGCTAAGAATTTTTTGAAATAATGCACCAAAAAGAGTCCAGCAGGACATTGATATAAATCCTACGAATGCAAGGAATATAGCAAATAAGTATAATATTATGCTCGATTTATAATAGGGGATTATAAAAATTGAAGTTGCCGTTATACCATAAAGTATTGTTTTGGGATTCATGAATTGCATAAGAATTCCAGATTTAAATGTGTAGATGCTGTAATCATTAATTGCATTTGGGTTAGAATTACTTTTGAGTATCTTGATAGCTAAATATATTATGTATGCTGAACTAAATACCTCCATAGACAACTTTATGTTAGGAAGAAGTTTAAATAAAAATAAGTTAAGATAACCACACAATAACATGATCACAGCCATACCAGTTGTAGCACCTAGAACAAATTTGAATGTTTTTTTATATCCAAAGTTAGTACCATTTGTCATAGACATAATATTATTTGGACCAGGTGTAAAAGTTGAAACAAGAACATAAGATAAAAGTGCAGATAAATTGAACATAGCTGACCTCCTTAATGATTTAATTTGAAAAGATAGTAGTAATATGATATCATGCTACTAACAATTAGTGAAATCAATAGTTTAAATTGTAACAATCAAAATAAATGATTGATTTGGAGGCATATACAAATGGAATTTCGTCAATTAAATGCATTTATTACCGTAGCTAAGCTGAGTAATTTTACAAAAGCAGCATTTGAACTAGGATATTCACAATCTGCTATCACTGCTCAGATACAACAGTTGGAAAAAGAATTAGGTGTTAATTTGTTTGAAAGATTAGGGAAAAATATATCTTTGACTTCAGAAGGAGAACAGTTTCTTGTTTATGCTAAACAAATAATTAAGCTTTGTGATGAAGCAAAAAGTAATTTAAGTACATCAGATGTAGTAAAGGGAACTTTGACAATAGGGGCTAATGAATCACTTTGCGCTGTTAGGCTTCCTCCTCTATTAAAAGAATTTCATGACCGCTATCCGGAAATTGAAATCCTTTTAAAAATGGAAGGTAATAATAAATGTAAAACATTGATAAGAGAAAATCAGATTGATGTTGCCTTTATTATTGGTCAGAAAATAAATGATTCTGATTTAATCACAGAATTGGAATTTCCTGAACCTTTAGTTTTATTAGCAATTCCAGGACATCCACTTGCTTTCAAGAAACATGTTTATCCTGAAGATATTGCTGACTATAACATAATTGTTGCAGAAAAAGGCTGCGGATACCGAAATCTTTTTGAACGAAGTCTCAATGATGCTGGGGTAACTCCGAAATCAATAATGGAAATGGGAAGTATCCAATCAATCAAACAATTGACAATGAGCGGGCTTGGAATAACCCTGCTTCCCAAGATTGCTGCTCAGGAGGAGTTGAAACGAAAGCAATTAAGAGAACTTCAGTGGTGGCAAGATTCCTTTTATTTGACTACGCAAATGGTATATCATAGAGATAAGTGGGGTTCAAGGGCATTAAGAGCTTTTATAAATTTGAGTAAAGAAATGATGAATAGTTAATATAATATTTTGAATTCAAATAACAAAGGTATGTATTTTGTGTTCTTAGTGTTTTCTTATTTAATTAAACATTAACATTTTTATATGACGTAACAGTTAAAAATGGAATGATAAGAAGCATCTAAATAAGAATTCTTTATTTTTCAACGAAAGAACTTTTTATATAGATGATAAAGGAAAAAGAGTTATCACTTGCTATTGATACGGTGAACCATGTCATAAGTGAGTGATAAATTTATTAAGGCCAAATAATGAAAATGAATTAGAATAAAGGAACTTACCATATAAGAAAGTAATATGGTAAGTTTTATTTAATTTATAGACAATATAGAGACAAACAGAATAGAAAATTCATTGCAAAGAGCCTATAAAAAATGAAAAGAACACTATTTGATATAATGTTCTTTTCATCTTTTTAAAGTACCCATACCTGTAGAAACAAACCCAATAATCTCAGAAATTAGAACTTCTTGAGGGAGTGGTCTATTCTGAACTTCCCATTTGTAAGTTACATTATAAATGGAAGTGCTCATCATTGTTGCAACTAATTGGGCTCGTATTTGTTCATTTTCAGCTAATTCATGGTCTTTTATTAGCATCTCATAGATTATGGAATATAGCTCACAAGATATTTTATCTCCTATTAAAGGTAAAATAGAGGCAAAACCGCACTTGCAGATGCTTTTTACCATTTCAAAGTACTCACAAACGCAAAGAATGAGGCTACGTATAGTATTCTCATTTAGGCGCATCTCGCCTAGTAGTTTATGGGAAAAGATCTCAGAGAATTTCTCTACTATGACGGTGTCCAGTAGTGCATATTTATCTGTAAAATGTGCATAGAAGGTTGCACGGTTTATGGTGGCTGCGGTAGTAATATCCTTAACTGTAATGGAGTCAAAGCCTTTTTTAGTAACTAGTGAAATAAAAGCGTTGATAATAAGATCATGGGTTCTTTTTACTCTAGGATTATTTATATTTACTGACATATCATTACCTCATTTAAAAGAATTTTAGTTAAACTCATATTGCAGATTTAGTTTTATGTAAGCAACACTTTATCAATTTTGTTGGTTGAAAAATGTATAAGCTAATTTTATAATCATTATAAGCAACACGAGTTGGCTTTGCAATATGTGTAGCTATAATAATTGTTTGAAATAAATATTTATTTCTAGGGGGTTAAATATGTCAACTATAAATCCAAACTGTAAATGCATAAATAAAAGCTGTAAAAATCACGGGAATTGTAAAGCATGTCGAGAGTTTCATAAATCACAATCATATCTTAATGAAACATATTGTCAGGCTGGTGCAGTTAAATTCATAACTAAAGGATTAATATCTAAGTTAAAGTTACATTCTTAGCACGCAACCTAGCAATTCAAATGCGGACTTACTATAAAATAAAAAGGGTGGGAGTGCTGAAATATGAATTTAGATAATCCGATTTTCAAGAAGCCAATTACAGAAGTTATTAAAGCAAGAACATCTATGAGGTCATATAATGGGGTGCCTTTGGACAAAACAATAGGCGATAGCATAATGGATGTCATAAATCAGGTTAAAGCACCTTTTTGCACAAATATACGAGTTAAGATGATAAACTCTAAAGATTCAGATTTAAAGCTTGGAACCTATGGTATTATAAAAGGAACTTCTACCTTTATAGTCTCATCAATTTCAAAATCAGATAATGCATTAGTTGATTTAGGATATTTATTGGAAAGAGCAGTGCTGTATGCAACTGATTTAAATTTGGGTACATGTTGGCTAGGCGGTACCTTTAATAAAGGTCAATTTGCAGAGGTTATGGAGCTTAAAGATAATGAAATCCTTCCAATAGTTATACCTGTAGGGTATCCTAGTGAAGCAAGAAGAGGTATGGATACTTTCGTTAGGTTTATGGCAGGTTCTAAAAATAGAAAAGCCTGGTCTAAACTATTCTTTGATGTAGATTTCAATACACCTTTAAAGGAGCTGGAAAGCTTAGAGTATTTTATTCCCCTTCAAATGGTAAGGCTTGCGCCTTCAGCAGCAAACAAGCAACCTTGGAGAATAATTAAAAGTTCTAATTCATACGATTTCTATTTAGAACGAAGTAAAGACTCAAAAAGTGATAGTTATAATGATATGCACAAGATAGATATTGGCATAGCTATGTGTCATTTTGAGTTAACTTCAGCAGAATTAGGGTTAGAGGGACATAGGGAGAAACTTAAGCTTTCATCTCAAGATAATAAAAAATATATTATTTCTTGGGTTAGAAAATAGGGAAGCGAATTTGCTATAAATGAAATATATGTTGAATATGTAAGAGATATTGATTACTATAGCAAGTTATTTTCACATTTTTCTTCATGAACTTTTTTTCTTTCTTCGTTAATAATTTGCGCCGCGTTAGGAGATGTATACTTCACTATTTTGGTATGAATAAGGTCTAAATTTTCATTGAGATCATTTTAAAGGAGTTTTCTTTTTGACACAGAAACTAATTCCGCTTATTGCTGACAAAGGTGGGATTGTGAACACTTCAACAGGATTAACTCGTTTTACATCTCCAGGTTCCAGACTTTATGCTGCTGCGAAAGGTTCAGTAGAAGTCTTCACTAGATATTTAGCAAAAGAATTAGGAACTAGAGGTATAAGAGCGAATACGATAGCGCCTGGAGCTAGTAAAGGTGCCCCTTATTTTAGAAATCGCTTCATTGATTTAGTTACTTATAAATTAGAGGATGAAGTAAATGTTACAGAAGGAGTAAATTCAGGATTAAGGAAAGATGCTGGCAAGCAATTTCTTGGGCAAGCTATAGTAGGATTGATAGAATGGTGGTTTATAAATGGAATGCCTTTTTCTCCTGATGTCTTGGTGGAACATTTAGGAGAAGTGATAGAGAGAAATTTGTAATCATTAATTTAGACTGATTCAAATCCATTTAATTATAGATAGGAAATATTAAAGGAGGTTTAGAATAATTAATAGTGAACCATATCACAAATAAAGGATAATGAGTAAGTTTGGGTAAAATTAGGGATAAGAAATAAATTTAATCCTAGTAAAGCTTATTGTTGCATTACTAGGATATTTTAATACGTTCAATATTATAAAAATAGGTGCCAAAATTTTGACTCTTACGTTTAAATTGAAAAATTGTAATTGCTAGACTAAAGTATATTAAGGTTAAAAAAATATAATTAGTGAGGTGAAATCAAATGTATATTATACTGATTGCAGGAATGCCAGCAGTTGGTAAGACAACATTTGCTAACAAGGCATAGAGGTCACAGAATATCCTGAATCAATAAATCACAATGCTATTCCTGATCAAATAAAATGGGTTTCAGTGCAAATCACAAGCACTGAAACCCATTTTAATTTGAAGCTTAACGTATATATTAAAATATCATTACTAATATTTTAGGTAACAGCATGGAAACACCTACTGCATAAAGTAAATCAAGCCACATTACAGAACCAACCGCGCCAATATAAATAAGACCAATGACAGGCGCTACTAATATTTTTATTAAAAATGATACTTCTTTATTCTTTAAAATAGCATTTACAATACATTTTGCATCACCTGTACTTGGGAAGGCATGCATTAAAATAGAAAATCCAATCCAACAAATTAATAGATTAATTAAGTTTGAATATTGTCCAAATTCTACTATTTCAATTGATACTGGTAATGTAATTAGAGCTCCTATGATAGTATTTATTAAAAATGGACCTACTGAAATAAAAAATACTTTTAATGGATTATCACTAGGTTCGTGCAAAACATATCCACAAGGATTTGATAATTGGAAATATTTAACTTCATAGACTGGCACTCTCATTAGTCTGCAAAATATCTGATGTGCTAGTTCATGAACAATTACTCCAGGAAAGGTTACAATAGAAATTAAAAAACCAGGTATTATCATTAATTTTGCCCTCCATTACTTAGGTAATAATCATTTATATTTATTTTTCCATTAAAATAATCTAATGTAGTATTATCAAATTTGTTTCCTGTAGAAAGATATTCATCTTTGATTTTATTAACTTCTTCAGTT +>6_2#NODE_17_length_39819_cov_63.5061_ID_33 +TTCAGCTTTCTTCGTTTCATTCCATTCCAATAAAATTTTATGATTTTCTTCAGGAAGAGTAATGTTTATTTTTCCAATCGGCTCGTCTTTATGATAATTATTTATCACTAATTCTAATAGATTCATAAATCTTTCTTTATGCAATGCTAGTTCCGCGCCGTTATATACTTCCGGATTCGCATCAAAATGAATCATTAACTCATTTTCATCGAAGCGCTTATATACATTAATCGATAAGTCATCGACAGGTCCTGCTGATAAATTATGTGTAATACCACGATTTCCAGCAAAGTTAAGCCCGTAATCAAAAGGCATAACATTCACTAACGGCCCAAACAACCTTTGATTTTCACCTAGTAACTTTAAGTCTCTTCTTAATTCCTCATGACGATATTTATGATGACGACGTACGTCTCGAATTTCTTTAGAAACTTGCTGTATTAATTCTACAAAAGTTATATTTGGAGTTAACTTTAGACGAAGTGGAACTAAATTCATTACCATGCTAGGTGTATGAATAGATACAGAACCAAGACGCCCCATCATCGGTAAACCTAAAACAATATCATTTGCACCTGTTAATTTATGCATATAAATACTTGTTACAGCTACAACAAATTCGGGCCAACTCGTTAACGAAATATTAATATCTTCTAGTAAAGTTTTCGTACTAGCATTTGATAAATAAGCAGTTTCTCGTAAAAATCCATTTGATGTTCTCGGCGCTTGTTCTGCTAAACTTATAACTTCCGGTTCATCTGCAAACTTTTCTAGCCAAAAAGTACGATTTTCCTGAAATTGTTTAGATTCACGATACTCATTATCTTCTTGTACAATTTTGGCAAGAGAACCAAATGGCTTTTCATTCTTATTTGTTTCTTCTATAAGTGATGTATACTCATTCGCTACCTTTTGGCTAAGAAGTGAAAATCCATAGCCATCCATCACAATGTGATGGATGCGCTGATACCAAAAGAAACGATTATTTTCAACTTGAATAAGTGCTTCAGTAAATAATTTATCTTCTTTTAAATCTATCGGTACAGATAAATCATTTTTCATCCATACTTTAGCAGCTTCTTCAGGATTTCCCTCTTTACGAACGTCAATAAAATGCATATGAAACGTTGACGATTCCTCAATAACTTGCCATGGGCCAATTTCATCCTCTTCGAAACGAACATGAAGTGCTTCTGCTTCTTTTACCACTTTACGTACAGCTAACTCGAAAATTTCATGATTAATATTTCCGTTTATTTCTATATATTCCCCAGTATTATAAATCGGATTAAGCGGATCTAATTGTTGCGCAAACCACATACCAGACTGCGCAGATGATAACGAATGGCGAATCTTTTGACTATTAGGCATTCTTTACTACCCCTTTATTATGTAGTCTATTTGTTACAGCACTTGTGCTGCTTGTGAAGATAGTAAACGGTACCAATCATCAACAGTTGGACGTTCTGCTAAATCTGCAAAAGTGATTTCTTTCCCTTCGCGGCGCCACTTCTCTACTAAACTCATAATTCTGACCGAATCAAGTCCTCTATTTAATAAGTCTTCATCAACTTCAATACTCTCTACTGGCTCACGGAGTAGTTGTGCAACTAGTTCATGTACTTTCTGTAAAGTGATTCCTTCACTCTCATCGCTTTTTACACTTTGTAAATCTTTTAATAGTAAGTTTGTTGATGTCGTTACTGCACATCTATTAGATGCATACTGCAATGCTTGTTTATGATGCTCTAGCGAAAAATCAGCAACTGCATCTGCTACAAAAAATGGCTCTATACCATCCATAAATGCTTCACAAGCTGTTAAAAGGCAGCCAATATGCGCATAAATACCGCAAATAATAAGTTGATCTCTTCCCTGCTCATTTAAAATTTCTAATAGATTCGTCTTTTTAAATGCACTATATCTCCATTTGGTTAGGAATATATCATCCTCATCTGGAGTAAGCTCATCGACAATTTTCTTTTTATCTGGTCCAGCAGGAATACCGTCACCCCAAAAGTCTTGTAATAACCCTCGTTGTTCTAACGTTTGTCCACCAGGTTGTGCTGTATAAACAACTGGTATACCAAGTTCCTTGCATCTTTCTCTTATCACCTTAATATTTGAAATTAGTTCTACTTTTGGCGATTCTTTATCGCTATATGCATCAAGAAAATATTCTTGCATGTCATGGATTAGAAGAACCGCACGTTTCGGATCTGGCGTCCAATTCACTTTATTTTTTGGTAGTTCTGATTCAATTGGCATTTTATATACTGAAATAGATGGGATAGCCATCTAATCACTTCCTTCCGATTTTTATTATTGTTTTACTGTAATAAGTTTTTCAGCAATGACTTTACGTAATTCTTTTTTGCTTACTTTTCCAACACCTGTTTGTGGGAATGATTCAATAAATTCAATTCGATCTGGAATCTTATAAGCTGCTATACCACGTTCTCTTAAGAACATTTTTAATTCGTTTGCAGTTAGGACTTGTCCACGAGCGATAACAAAAGCGCAAGTGCGTTCTCCTAAATAATCGTCAGGCATAGATACAATTGCTACATCATGTACTGCATCATGTGCTAATAGATGATTTTCAACCTCTTCCGCAGCAACCTTCTCACCGCCACGATTAATTTGATCTTTATCTCTTCCTTCTACAATGATGTAACCTTGTTCATTTACTTTCACAAGATCTCCTGTACGATAAAATCCATCCTTTGTAAATGATCGTGCATTATGCTCTTCCGCTTTATAATAACCACGAATTGTATATGGCCCTCTTGTTAATAAACTTCCTACTTCACCGGGTTTTACATCGTTATCATTTTCATCAACAACCCTTACTTCATCGAATGTAGACATCGGTCTACCTTGCGTATGAATAATGATTTCTTCAGGATCATTTAATCTTGTGTAATTTACTAACCCTTCTGCCATACCGAACACTTGCTGTAACGTACATCCAAATGTAGGGCGAATACGCTTCGCAACTTCGGCACTAAATTTCGCACCTCCTACTTGAATAACTTGCAGGCTCGATAAATCGTCGTTACGGGAAGATGCTGCATCAAGCCAAATCATTGCTAATGGTGGAACGAGCGCTGTAATCGTAACTTTTTCTTTTTCTATAAGAGCAAATGCCTCATCTGGACTACCTCCAGTTGCCAATACTACTTTTCCACCCGCATAAAAAGTTCCAAATGTCCCTGGAGAACTCATCGGGTAATTGTGTGCTACTGGAAGAACTGCCATATAGACACTTTCTGCATTTAAATTACAAATGTCAGCGCTAACACGTAAACTATAAATATAGTCATCATGTGTTCTAGGAATTAATTTAGAAAGTCCTGTCGTCCCTCCTGATAATTGGAGAAACGCAACATCACTTGGCTGAACTTCTGGTAACGGAATCGGATCCATATAAAGATCAGTTATGTTCACGAATTCTTCTTCTTCCCCCACTACGATTACATGTTGTAAAGCGGGAACTTTCTCTTTCACTTCTCTTGCTAGTTTTCGGTAATCAAAGCCGAGAGCCTTATCTGAAATAATGTAAGCACTCGCCTCACCGAACTCGCAAAAATAACTAATTTCACTACTGCGATGAGAAGGTAGTGCAAAAACAGGAAGTGCTCCAATGCGAAATAGCGCAAAGCATATTTCGAAAAACTCTATAATATTAGGTAACTGGATTACAACTCGGTCCTCTTTCTTTATTCCTAAATTCAGTAAACCTGAAGCTAAGCGATCTACCTTTTTATCAAGTTCACTATACGTTATATGCGTATTACTACTTACAACCGCAATTTGATCCCCATATTTTTCAGCACGTTCTTTTAACATCGAACCAAATGTTTCTCCAAGCCAACACCCTTCTTCTCGATAGCGATTGGCAAATTCTTTTGGCCATTCCGTATAACCTATTAACATTCTTTCTTCCCCCTATTTTTCATTCAGCAAATTATCTCTTAACCCTAAAGCCTTCAACATTGTTTGGAACTTAGCTGATGTTTCTGCTAACTCATCTTCTGGCTTTGATTCAGCAACAACTCCAGCACCTGCATATAAGCGAAGTGTATTCTCTTGCACTTCAGCACAACGAATTGTAACAATCCATTCGCCATCTCCATCTAAATCGCTCCACCCTAACATTCCTGTAAAGAATTCACGGTCAAATGGCTCAATTTTTTGAATAGCCTCTCTCGCTTCTTCCATTGGAGTTCCGCAAACTGCTGGCGTAGGATGAAGAGCAATTGCTAATTCTAAAGAAGACGTATTTGGGTCCTTTAGTTCACCTTTCACTTCCGTAGACAAATGCCACATTGCTTCACTATGAATAACTGAGGGTTTTTCTGGAACATGTAATATATGACAGTAAGGGCGAAGCGCAGCGGCAACCGCTTCAACTACTACCGCATGTTCATGTAAATCTTTTGGAGAAGAAAGTAATTCTTCTGCCCTTCTTTTATCTTCTACTGGATCCTCACTTCGTGGTCTTGAGCCAGCCAACGGATTAGAAATTACTTGCATACCATGACGTGAAACAAGTAATTCAGGACTTGCTCCAATTAACGTCTTACTGTACTCGTTTTCATCTTTCGGCAAATTAACAGCAAATGTGTAGCCGTGCTTATTATGTTCCGCTAATTCACGAAGTAGTTTTTGTTGATCAATTTTCTCGGAAGACTTAACATCCAATGATCTAGATAGAACGATTTTCCTTAAATCTCCGTCCTGAATTTTTGCTATTCCTTGCTTCACACCATCCATATAAACTTCAGGAGCTGGAACCGGTGTCATTTCAAATGTCAGTTTCTCATTTCGTTTTATCTCATTTGTCGTCTCTAACTGTAAACGCTCAGTAATTCTACTATGTTCTGGTACGATAAGTTGAACTTCTTTTCTACGATCAAAGGGCAAAGCACCAACAGCGATAGGATTCGGATTTCCGGCTTGTTTTGCATTACTTAATACCGCTTGTACAAGTTCTGGAAAGCTTTCAATTTCACGATGTTTTACTGTAGTAAACTCTCCTTCTGCTAATATTGTTCGAGTTGGTGAAGCGAAAAAGAATGAAGATTCTGTCTTATAATCTTCTAAAAGTTTTTCTGACAGTTCCTTTACGGCTGTAAGTTCATTCATAGTATAATAACCTCCTGAATTTTTTATTAAACTCCTAATGTAGCTCCGCCATCGACACATAAATTGTGCATTGTAATATGACTCGCTTTATCAGAAGCTAAAAACAATACTGCTTCAGCAATTTCTGAAGGTTGTGCAATTTTTTGTAACGGAATACCGAGTCTATATGTATTTTGAGAACCAGCAATTATATTTTCAGCTCCATTCTCATCAGCCCATAGTAATCTTTGCATTTCAGTTTCAGTAGAACCAGGAGAAACTAAGTTGCAGCGAATATTGTATGCTGCAAGCTCTAACCCTAAACACTTCATAAACATTGTCGTTGCGGCTTTTGATGCAGCGTACGCAGCCATTTCCATTCTCGGAGTATTTGCCGCATTTGAACCAACTGTAACAATTGCCCCTGACTTTCTTTGCATCATATGTTTACTTACTGCTCGAGACATATAGAAAACTCCTGTAGAATTTACAGAGAAGGTTTTATTCCAATCTTCATCACTTAAAGAGTGAATTGGTCCCATACGTAATATCCCTGCAACATTTATTAATATATCTATTGGTGCTATATCATTTTCAATACCGTTTACCATATCTTCAACAGCGGTACTATCACTCACATCTAAATGAAATATTTTCATACGTGTTTCATTTAATTCATTTTCATTTAAGAGTACGTTTAACCCCTCTTCATTTTGATCAACCGCAATAACTGTAGCTCCTCTTTCTAAAAACATTTTGGCAACAACACTGCCTATACCTTGAGCTGCACCTGTTACTAAAACCGTTTTTCCATCAAATTCCCCTAAGTTCATTTCCTCTATCCTTTCATTTGAAAATTAATCTAATTAAACAAATAGCAATAATGATAACGATTATCACTATCAATGTAAAAAATAATACACTTACTAAACTAATTGGTCAAGTGTACTTTTCATCAAAATATTATTCATTGATAATGTTTTTCATTTAATATTCTTACATCGAAATGATAATCTTTCTCAACAAACTTGTCAACCTATTTTTATTTTTCAGATAAAATATTTTAGTACTTCTCAATACAATTACATCTAAAAGCATTAATAAGTACTTACATTAAAAAATTTAATTCGTACTCCTCTTTTTAAAATTGAAGAAGATAAACTTTAGATACCTTCATTTACAAAGCCTTACCAAAAACTCAAGTATATATTTTCTCAACATTTTTCTGAAAATATCAAGAAAGTATATCTTGCAATTACCGATGCATGTCGTGTTTATAATTTAAGATTTAATAAGTATTTCGAATATAATATATCAATTTATGATATTCCAATATAATCCAAACGGATACTTTACTCATTTTTTTGCCTCTAGAAAATTCCACATGTTGTAATTCATAAAAAAGCACCCTATTAAATAGGGTGCTTTTTAGTAAACATAGTGTTAAAGAAGATTTATATACTTACGCTACACTGCGATACTCTTCTTCTTTTTCTTTTTTCTGCTTAGGCACTTGGATTAAAAGTGCGATAATAAATGATACGACACATAATACGCCGATCACCATGAAAGTTGGTTTGAATCCACCTAGAACTGCACCGATAAAGGAACCAGCGAGCGCTCCAAATCCAAATCCTTGATACACAATTCCGTAGTTCTTACTATGGTTTTTCATACCGTAGAAATCACCAACGATAGCTGGGAAAATAGTGATATTTCCACCAAAGCAAAACGCTACACTTGCTACACATACGAAGTAGATACCATAATTTAAATCTACGAAACTTAAAACCAAGACTGACGCTGCCATAACAACAAATGTACCAGTAACGATTTTTAAACGGCCGATTTTATCTGATAACGGTCCTAAAATAATACGACCTAATGTATTGAAGATTGCAACCATAGCCACCGCATTAGCTGCTGTCGCTGCGCTAAGCCCTACAAGTTGAACACCAATGTCTTTTACCATACCAATTAAGTATAAGCCACTCATACATGATGTAAATAACATAATAAATAATAAGTATACTTGTTTTGTGCCCAACATTTCTTTCGTTGTATATTCCTGAGTTTTTGTTTCATGAACTGCTCCTTGATCTGCAGCTTGATGGATTAAACAAGCGCCAAGGACAATCATAGCTGTAACAATTAAACCCCAGTATATAAACGCTTGTGATACACCAACTGATTCAATTAACTGTGCGTTAATATATTTAAAAATTAAGCTACCTGAACCATATGCAGAGACAGAAATACCAGCAATTAAACCTTTACGCTTTGGAAACCACTTTATTAAATTGGATAATGAAGTGATATATGCTGTACCATCTGCATAACCTACAACAACCCCTGCTAGTACATAAAGCAATATTAATGAGGAAGCTTGTGAACTAAGTATTAATCCAATTCCTAATGCTAGTCCAGCTATCATAATAAGTTTACGAAGTCCCCATTTTTCTTGCAATTTACTCGCAAACAAAGTTGAAAACGCTAAAGACAGGCTAGTAATTGAGAAAGTTATAGCAACAGCGTTAAGACTCCAACCGTATTTACTCACTAAGGGCTGATTGAATAAACTCCATGTATATATCGTACCAAGCCCCATTTGTACTATGACTGTTCCAAGGACAACAAGCCATGGATTAACCGATGATTTTTTCATACCATCTCTCCTCCTCTTTCTATTTCAATGCTATTCGCTAAATATGCTTTTTTTATTTTTATTTTCGCCACCTCATTCGACTTAACAACTTCATTGTATAGGATGTTTTCGCTTATGTAAGCACTTACAAGATGGAGCGTCAAAAATATGGAATCAGTTACAGAAAACGAAGGGTGAAATACAAAAAACTACTTGTTAAATACGTAGTAGTTTTTATAATGTTTTACATCAATACGACTTATGTGTGTTTTCATATCCTCTTTTATACAAAGTTATGCGCCCTGTTACACCATGGCTGGACTTCAAAATTTTGATTATCCTATATAAACAAAAAAACACCATATTCTTTTTAACTCCCCAGCTTTATAACATGTTGCGTCTATATTTCTTAATACAAATCATTTAATAGTCTTGTAATCTGGCTTCTCGTTACAATGCTTGTAATCAAAAATCCCACCATTTCCACTTCCGTATATCTTGTTTTTCTAACGAAACATACAAAAGTACAACGTTTTTTACAATTTTCTACATAAAAACAGAAAATATATAATTCATTTACATAATTTTGCACTTTAATTTTTATTAAACAACAAATATTTTATTGATAAATAAAACTAAACATTCACTAATATATCTATACGTGGTTTAATATATATAATAAACAAACATTTCTGACTTTTATAAGGAGGTACTATGAAAGCAAACGTAGGCGATACTATACTATTTCAACGAAATAACTTAAAGATCACAGCATCTGTACTAAAACTATACACTGAATCGGTCTTAGTTGAAGTCACAGATGTAAGCGGTGGTACTTTTGAATTTGACCGAACAATTGTAAATCATAAAAACTATAAAATTTTAAATGCAAATAAATAAAACAATACAATACAAAATAGCCCCTGCTTAACAGCGGAGGCTATTTTGTATAATAACAATTCATTAATTTACATATCCTAAAACACTAACAATCTTGCTTGCTTTTAAATTATTAATATAATAATAAATTTCTAACCCTCTTCTCTCAGCTCTTAGCACTTTACCTTTCATTTTTGATAAATGCTGCGAAACAGTCGATTGAGGAATATTTAATATTTCTGTTAATTGTGTTACATTACAAGTTTTGCGTGCACTTAATTCATTTACAATTTGTAATCGTATTGGATGCGCCATTATTTTTAAAATATCCACATCTTCTTCAGTTACTAAACTTTTTTTTATCTCGCTCGTATATGTTGTCATAAGACATTCCCCTTTTATTTGAATTCCTTTTTATTAAAATCTGCTTCTAAATAAAATATTATCTCTAAATATACAATTAAGACCCTTATATAGAAATAATAGGATTTTGATTATTTAAAAACAGCAATACTATACAATTTCATCTATTTGGTACATATACATAATACAATTTCAATTTGTGCTCTATGTGAAGTTAGCATAAATTTATAATGAAGATTTTTCAAACAATTCACATCTAACTATATACTTCCTCATAACACTATCCAGTGATTTTAACATATTTTAGGCGTTTGTATATACTATTGCAAAACACATTAATATTGAGGGGGACTTTATATGCCAACACCGTCACAAGAACAAGTTGAAGAAGCTCGTGAAACATATGTCAGTCGTATTATTTTAGTTGGTGGTTCAGCTTTATTTTTAACAGGTGGTGTTATATCTGCAATTTCAGCCTTTAAAAGTTATAATCGTTTGGCAAATACGCCACCTTCTTCAAATGATTCATAATTGCATCATTCATTCCTTTAAATAATAAAGATGCCGCCTCTACTATAGAGACGGCTTTATATAAAAACATTATTAAATTCATTTGAACTTAATTGAAAAAACGTTATAACAATTTGGTACAATGTTCAAAGTAAGGATAGATACTTTCAATTTGACAAGCTTATTTCACCACAAACCTCGTCTGACTACACCCATCCTTCTGCTTCGTTACTTCTAATACAGCTGGCATTGCATTTTTCAGTTCTTGAACGTGTGAAATTACACCGATAAAGCGACCTGATTTTTGCAAGTCGATTAGGGCGTCAACTGCTTTCGTTAATGACTCTTCATCTAATGAGCCGAATCCTTCATCGATAAACATCGTTTCGATGGAAATACCACCCTCATACGCTTGAATTACATCTGCCATTCCAAGTGCTAAGCAAAGTGATGCGTTAAATTTCTCACCGCCAGATAATGTTTTTACATCTCGGGTTTGACCTGTATATGCATCATATACATCTAATCCTAATCCACTTTGTCGATTCCTCTTTTCAACTCGTTCGCTTCGTTTTAAATAAAATTGTCCATTTGATAATTTACGTAGTCGTTCGTTCGCAATTTGAACAATTTGTTCTAAATACTCAATTAAGATGTAACGTTCAAAGGATATACGACTATCGTTATCCCCTTTCATTACTTCATATAAATCAACAAGTTCTTGGAATGCTTTTTCTTCCTCATGAATTTGTTCATCAATTCGTTTAATATTTTCATGTAAATCCGTAATATATGATACGGCGTTTTGAGCACGTTGTCGTTTTTCTTTAATAATATCAAGGCTAATTTCCAATTCTTTCACTTGCTCATCTAATGAAGAAATGTCCATATACTCTTTATCTTTTAAGTCTGCTTTTAATTCTTCAATTTGTTTTGTAAGTACTTCTAGAGATGAGTAATAACTTTGGATTTCTTGTTGTAACTTATCCATCTCAGCATCATTTAATTTGGATTCTTTATATGTTAATTGATCTGTAAATCCGCTTTGTTCTAGCTCTTTCATAAAGCGTGTGAACGTCTCTTCTTTCTTCTCTTTTGCACTATTAAATTGATTAGAAGCATTATCATGTTCTGCTTGAATACGTATATTTTCATTTTGCCAATGTTGATACGCCTCTTGTACTTTCTCCCATTCATCTTCCATTAACCTAAGTTCTTGCAGAGCTTGGTCAAACTGTACTTTCCAAGCTTGTACCGTTTGCAAGCTTTCCGGAATATTTTGCTTATCATGTTCATATGACGTACGAAGCTGCATACACTCCATTTCAGTACGGTGCTGCACAGTTTCTACTTCACGCTTTTGTTTCTGGAGTTCTTCTACTTTTTCTTCTACGCTTTTTATATTCACAGCAATTTGCTTACGCGTTTCTTCACTTGCTTTTAACGCATTAACTTCTGCTGCTAATTGCTTTCCTTTTTGAACGAGCGCACTATATGTTTCTACTAATTCTTCTGAGCGATAGCCTCGCTTTAGCACTTCTTCTATCACTTGTTCATATTGAACTCGATAGAAATTCCACTTTTCTTCTAATTGAACATGTAATTTTTCCGCAATAGTTTTCTTTTCTCTTAGCTCATTTAACTCTTTTTCATCAATTGCATTACTTTGCTCTGTAGCTTTTTTCGGGTGGTCCATACTACCACATACTGGACAAGATTCACCATCATGTAAATGAAGAGCTAATATACCAGCTTGTTCACTTAACCAGCGGCGTTCCATATTTTCATATGTACTAACTGCCTGTTGCATCTTATGAAACGCACTTTCTTTTTCTAGCTCGTATTTTTGTTTTTCTTGCCATACATCATATGCCTGCTTTAATACTTTTGCATCTTCTCGCATATTCGTTAGTTCTTCTACTTTAGCTACATATTGTTCAAGCGCTGCTTCTAATTGCTGTAATTCGCCAGACATTCTTTGTTTTTCATTTTTATACGCTTCTAGTTGTTCATCTAACTTTTGCATACCTTCTTTTAATTTTCCTATGTGAATTTCAGCATTTTGCAAATTTAACTTTTTCTCAGCTAATGATGCAATAATTGGCTGTAACTCTTCTAGTCTTTGAACTTGTTTTTTAGCATTTTCTCGCTCGGATGCCTTATTCTTTAATGCTTCATATTTCTCCTGAGCAAGTGCAAAACTGTTCATTATATGCTCTTTTTTTGCGATTATCTGTTTTAACAAACTTTCAGACTGCTGTTCATATTGCATCGCTTCCTCATGCCATTGCTCAAATGGTAATAATCGCTTCGCCTGTTCCGCATGTTTAAAAGATTTTTCTTTCATTTCGATTACGGCACGGTTTTCTTGTAAAATACTGTACTTCTCATTCTTTTGTTCTAAATCTTTGAATTTCTCATTTACAGCTTTTGCTGCATGAAAACGTGCCTCTGCTTCTTTTAATTGTTTCGTTTTGTCATTTTGTTCTAAGTGTAATTGCTCAACCTCTGCATTGTACCAATTTGTTTCTTGTTCTAATGCCTCTACTACTTGATGTGTATTTACATGCTCTTGCTCCACTAATGTCTCTAATAATGCCCCATCACGTATTGGTAATTTAAATACATTACGGAAATATAGCTCACGTTCTTTTTGTTTTTCTTGTAAGACGTCTTTCCATTGTTTACGCTTTTGATCTAATATCTCACGCATTAATTTATAACGATCTGTTTTAAAAATGCGGCGTAGTATTTCTTCTTTATTTTCCGTTTCAGATGTTAATAATTTTCGGAACTCTCCTTGCGGTAACATGACAATTTGGCTAAATTGATGTTTACTTAATCCAATTAAATCTTCAACTTTTTTATTTACATCCGTTACATGAAAACGATCGACACATGGAACTTTCTCCTCATCAATTACTTCATATAATTCGACTGCATGTCCTGTAATTGTTTTATTTCCTTGTTTTTTATGGCCCAGTTGTCGTTTAATTTCATAACTTTTTCCTTTTAATTGAAAAGTTAATTCCACACTTGTATAAACATCATCATCAGCAAATTGGCTACGAAGCATGTTAGTATCACTGCGCTCTTCTCCACTAGCTTCTCCATATAATACATAACAAATTGCATCAAAAATCGTTGTCTTTCCAGCTCCTGTATTTCCAGAAATAGCGAAAATACGATGCTCACCAAGATCTTTAAAATCGATTACTTCTTTCTGTTTATATGGACCAAACGCGGTCATTATAAGCTGAATCGGTCTCATCCTCGCTCACCTTCCCGTTCTTGCACTGTTTGCAAAACATCTAAAAATAGACGTTCTTTCTCTTCTGGTAGATCTAGCCCTTTCATTTCTTTATAAAATGCTTTTAAAAGAGATAAATCATCCGTTTTATGTCTTGAAACAGTTACTTCATTTTCATCTGTGAACTCTCGTCTTTGAATAGATCTTTCAACATGCATTGCATTTGGGTATACAGAACGTATTTTTTCCATTGGCTGCAAGACAGGATTTTCATCTAATAATTTCACAAATACATAATCTTCACTCACTGGATGAAGCAATAAATCGTCTATTTTCGCTTCTACTGTACGCATTTTACGACGTGGTGTAAGTAAACGTTTTTCAATTGTTGTTTCACCTTTTTCATCCAGTTCCACAATATAATATCCTTTTTTATGCTTTTCTTCAGAAATAGAATATGCAAGTGGTGAACCTGAATAACGAATTGTCTCATTACGTACAAAATGCGCTTGATGTAAATGGCCAAGCGCCGTGTAATGAAATTTATCAAAATAATGGCTATTTACATATTCAGCACCACCAATTGAAAGTGGTCGTTCCGCATCACTCGTATTTTCCTCTGCCTCTCCTGAAGAAGTTACAAATGCATGTCCTACAAATATGTGTCTCGCTTCTTTATCCATCGTTTCAGAGAGTTCGTTCATAAAAATACGCATCGCATCATCATGAGAACGAACATCTTCATTTTTCAATATATGTCTAACAATACTTGGATCTGCATACGGAACGAGATGGAAATGAACCTCTCCGTATTCATCATTTAAAACAACGGGATTGTATGGAAACTGGAATTGTCCAACAATATGTAATCCTTGTTTTTTCATTAAATTACTACCAAAATGTATGCGGTCCGGACTATCGTGGTTTCCTGCAACTGCGATTATTGGTGTTTGTAAATCAATAACTATCTTTTGTAATACATCATTTAATAAGTCTACTGCTTCTGTAGGTGGAATTGCTCGGTCATATAAATCTCCTGCAATAATTACAGCATCCGGTTTTTCCTCTTCAACAGCTTGTACAAACTGATCTAACACAATTTTTTGATCTTCAGTCATATATACACCATGAACAAGCTTACCTAAATGCCAATCCGCTGTATGAAAAAACTTCATATTACCCTTCTCCTTTTAGTTTACTGATTGTAGTGAAACAAGCTGATAGTTTAGTGTAGTTTTTCCGTTCTCCCATTCTAACTTTTGGACAATTGCACTACCGATTCTTTCTTCGTTTGATTTATATAAAGGCAATATTTCTTGAATAGTAAATAGATGATAACCATCTAAAATTAATTGAAATAAATTATCTTCTATATGTAATCTTACTTCTTTTTCATTTGAAATAATTTTTGTATGCATCTCAAATTTCATGCTAGATACAACCCCTTTTTTCTAATCTCTACATTTTAATAGTATAGTGGTCATGCAAGATTGACAAGTAAATACACCGGGATGCACATACTTTTTTAATTATAAATATTTTATGTTTTTTTATATGAAATACGGTATGTGAAATTATTAATCTACAAAAATAAAAATGCCCTTTCAGGCATCCTTATTTTTGTAAGTACTCTTTCATAACGTAGTACAATACAGTTATTAAGACAATCGCTACAAAAAACGCAAATACGGCCACAACATACCCATCTTTATTTAATAAAAAGCATATGGAAATAGATATTATAAAAATCGGAAACACGAGACGTACCTTAGCCTTCACTTCTTCTGTCGTATCTGTATGATGAAAATACTGTGTAATTCCTACCATAAGAGATGACAAAATAATAACCGACAATAGTAAAGGTCCTGGCATTCCGATCTCTCCTTTCTCTATTTCTAATTATATACAAACCCCGCGCTCTTTAAAAGTTAGATAATTCAGCCTTTTAACTTTTTCTACATCCACTCTTTAATTTACACCACGTATACTAACTCAATATAAGTTATACATATAATAGCGCCTTCAACACGACATTTTCAAATAATAGCACTTACAGTAATGTTGTAATATTCCTCTTCATTCTCTGAAATCCTGCAAATATAACTCGTCAAAATATGCGATAAGTACTAACACTAAGCACCTAATTTAGACAAAAATTATAGAGGACATTTCAGCATGTATCAAGAAATAAGAATAGAAAGATAAGTAAGGGAGGATTTATAATGACTGAACACGTTTTATTTTCTGTTAGCGAAAACGGCGTTGCATCTATTACTTTAAACCGTCCAAAAGCACTTAATTCTTTATCCTATGACATGTTACAACCAATTGGGCAAAAACTTAAAGAATGGGAGCAAGATGAGCGTATTGAACTCATCGTTTTAAAAGGAGCTGGGACGAAAGGTTTTTGTGCAGGTGGTGATATTAAAACGCTATACGAAGCACGTTCTAACGAAGTTGCATTACAACATGCAGAGCAGTTTTTTGAAGAAGAATATGAAATTGATACATTTATTTATCAATACAAAAAACCAATTATCGCTTGTTTAGATGGAATTGTAATGGGCGGTGGTGTCGGTCTGACAAATGGAGCGACGTATCGAATTGTAACAGAGCGTACGAAGTGGGCAATGCCTGAAATGAACATCGGTTTCTTCCCGGATGTCGGTGCTGCTTATTTCTTAAATAAAGCGCCTGGATATACTGGTCGATATGTTGCTTTAACGGCATCTATTTTAAAAGCTCCTGATGTATTATATATTAACGCTGCTGATTACTTTATGACATCAGATTCATTACCAAATTTCCTTACCGCACTTGAAAATGTAAATTGGAAAAAAGAAGATGTACATACTCATTTAAAAGAAGTTATTCGTACATTTGCAACTGCTCCAAACTTAGATGGCAATCTTTCTTCTTCAGTAGAAGAAATTAATTCGCATTTTGCATTCGATACAATTGAGGGAATCATTCAATCGTTAGAGAAAGATCAAAGTCCATTTGCCCAAACAACGAAAGAAAAGCTATTATCCAAATCCCCTGTTTCATTAAAGGTAACATTAAAACAGTTTATTGATGGCCAAGAAAAGTCAGTTGAAGAATGTTTCGCGACAGATCTTATACTCGCTAAAAACTTCATGCGACATGAAGATTTCTTTGAAGGAGTACGCTCCGTTGTAGTTGATAAAGACCAAAATCCAAATTATAAATATAAACAATTAAGTGATGTTTCAGAAGAAGATGTAAATCGATTCTTTAACTTACTTAACGCTTAAGCAAAGAGGCCATATATTGTAGCACACTCTCAGCTACTCTATATGGCCTCTTCTTTTTAGTTAAATAATTTTAAATATAATTTAATTTTATTTAACTAAAATGTATGTTAAACTAACGTTAATCGAATTTTCTAAAAACAAAAAAGGAAGGCGGATAACATTATGGAAGATTTTTTTCTGTTTATTATTATGTCTATATGTCTTATTATTCTACCCGGTCCTGATACAGCAATGGCTACGAAAAACACATTGATTGCCGGCAAAATCGGCGGCGTAAAAACTGTTTTTGGTACTTGTGTTGCGCTTTTAATTCACACTTTAGCTGCTGTAATTGGTCTTTCAGCACTTATTGTAAAGTCTGCTCTTTTATTTTCTATTTTTAAATATGTTGGTGCTGTATATTTAGTCTATATTGGTATTAAAGCCCTTTTAGCAGTAAGAAACACAGAAGACTTAAATACAAATGACGTTCCAATAAATAATGACAATAAACATACTTCTTGCTTTCGCCAAGGGTTTCTTACAAATTTACTAAACCCTAAGATTGCAGTCTTCTTTTTAACTTTTTTACCACAGTTTTTAAATCCAAATCATAATACATTTATACAACTTCTCGTTATGGGACTTACTTATCTCATTTTAACCGTCATTTGGTTTGCTTTTTATATATTTTTAATTGATAAAATTAGTGCTTTTATGAAAAAACCGAAGACACAGCGTTATATTCAAGGGATTACAGGAATCGTCTTAATTGGGTTTGGTATTAAATTAGCCTTTGAAAGAAATAGTTAACACAATACAAACGAAAATACACTACTCGATTATGAGTAGTGTATTTTCGTTTGTATTATTTTTATAATGAGTGGGAATGCACCCCACTCATTATCATTTCACTTTATTAAAACCAGCGCTCAGTTACAACTTTTTTACGTGTATAAAATTGAACGCCATCCGTACCATTTGTACCAAGATCTCCAAAGAATGAAGCTTTGTTTCCTGCAAATGCGAAGAATGCCATTGGTGCTGGAACATTAACGTTTACACCAATCATACCAGCATCAATATTGTCTCGGAATGTTTGTGCATGTTTTCCATTTGATGTATAAATAACAGCACCATTCGCAAATTTCGATTGATTTGTCAGCTTAATACCTTCTTCTAAATCTTTTACTCTTACAATACTTAATACCGGAGCGAAAATTTCATCTTGCCAAATTTTCATTTCTTGATTTACACCATCAAAGATTGTTGCACCAACAAAGTAACCTTCTCCAACTTCTTCATTGATTTTACGGCCATCTACTAGTAAAGTTGCTCCATCTGCTACACCACTATTAATATAGCCTAAAACACGTTCTTTATGAGATTCACGGATTAATGGTCCAACATAATTATCTTCGTTGAAACCGTCACCTACTTTTAACTTCTTCGTTTCCGCTACTAATACATCAATGAATTCATCAGCAATTTCATCAACTACTGCTACTACTGAACATGCCATGCAGCGCTCTCCACTACTTGCGAACGCAGACCCAATTACACCTTGTACTGTTTTCTCAAGGTTGCAATCTGGCATAACAACCGCATGGTTTTTCGCACCTGCTAACGCTTGTACACGTTTACCATTTTTTGTACCAGTTTCATATACGTAGCGTGCCACTGGCTCAGAGCCAACAAACGAAACTGCTTGAATATCTTTATTTTCTAAAATACTATTTACAACATCTTTTCCGCCTTGTACTAAATTTAATACCCCTTTTGGAAAACCAGCTTCATAGAATAGCTCTACAAGTCGCTCAGCTAAAAGTGGCGTTCTTTCAGATGTTTTTAATACGAATGTATTACCGCAAGCAATTGCAAGTGGGAACATCCATAATGGAATCATCATCGGGAAGTTAAACGGTGTAATACCAGCAACAACTCCGATTGGGTAGCGCCAAATCGATCCATCAATTCCACTAGCAATATTCGGAAGGGCTTGTCCCATCATTAAATTTGGTGCTGATGTTGCAAGTTCTACCGCTTCAATACCACGCTGTACTTCACCAGTTGCATCCGTTAGCGTTTTACCGTTTTCTAGCGTAATGATTTTTGCAAGCTCTTCTTTATTTTCTTGTAACAGTTGTAGATATTTGTATAGTTGTCTTGAACGATTTGGAACTGGCACTTTAGACCATGTTTTGTATGCCGCTTTTGCCGCTTCAACAGCTTTTTCAACATCTTCTTTTGGAGAAAGTGGAACGTAAGCGATGATTTTTCCGGTTGCCGGATTCGGAACCGCTTCTACTTCCGTACCAGTAGATTCTACCCATTCGCCATTAATATGATTTTTCACTCGTTTAATTTCAGTTGTAATCATTATATTCTCTCCTTTTTTATCATGGTTTTTATAAGTGCACTATTTTTGATTAGAAATTAATTGTTCGCTAACTTTCTTATATAAAGCAGCCATATCGTTTTCACCATATCCTGCCTCACTCGCTTCTTCATATACGTTTAATAGCATCTCACTTACTGGTAAGTGAAGTTCGCTTTCTTTCGCTAAATCTACCGCAAATCCTAAATCTTTCTTTAATAAATTCACAGTAAAGCCTGGCTCATAATTTTCTGATGCAATGAAACTTTTATAATTACGCTCATAAATTCTACTTTGACCGTAACTTACATTTAAAATTTCAAACATTTTATCTAAATCCATATTGTTCTTTTTCGCTAATGTTAAAGCTTCACTCACACCAGCTGTATAAAAACCAATTAATAGATTATTAATTAATTTAACAGTTGTACCACTATCAATCTGCTCACTAACATGGAAAACATTCGCTCCAAGCACTTCCATGACAGATTCAGTTTTCTCATACACTTCTTTCGATCCACCAACCATAAACGTTAATGTACGGTTTTCTGCACCAATTACCCCACCGCTAACAGGTGCTGCTAAAAAGTCTACTTTCTTTTCCTTCGCCGATTCCTCTAATTGTTTGTTTAATTGTGGAGATACTGTACTTGTATCAATTAAAGCTACATTCGAGTGGCTATTTTCAAATAATCCTTCTTCCCCAAAATATACCGCTTCAACAGCACGAGGTGATGGTAAGCTTGTAAAAATCACATCGCATGTCTCTGCTAGTTTTGAGATTGATAGGCCGATAATTCCTCCTTCTTTTTCAAAGGAAGCTTCCGCCTCTTTATTCAAGTCCACTCCATATACTGTGTAGCTAGATTTAACTAAATTTTTAGACATTGGAAGACCCATGTTACCTAAACCGATAAAACCAATCTTTTTCATATCGCTTTCCTCCTATTTACACAATATATTTCCCGCGATTTATAATAACTTTCGCAATATCTCTCTTTTTCGTAAATAAGTTTGAGTATAAAGGCACTAATAATTGACGGATTTCAGCTAATATTTCTTGTCTATTTTGTTCTTCTGTGACAGCTGCTGAAAGAATGGAAATCGCGGATTCTTCTACTTTACGATATCCTTCTTCACAAATAACATCTGTTATCATTTGTTTCGTACGCTCTTTTTCCTCACCATTTTTACTAACTGCTTTCCTCGTACGTAAAAATGCTGATTCCATGACGTACACGTCCGTCAACATATTTGATAATACGCGTGAATATTCTTGCTCTTGCTCAATTTTTAAACCTGGAGTTTTAGAGAGCGTTTTCAAAGATTGTTTCAACAATTTTTTCGCTAATAAAATGTAACGATGGTTTCTTTCTACATTTGCCACATCAATTTCTACTTCAGCATCTTCTATTTGCTCGATTTGCTTCATTAACATTTTTGCAACTGTTAATCTATTAATTTCATTCGTTCCTTCAAAAATACGACTAATCCTAGCATCACGATATAATCGTTCTACTTCGTATTCTTGCATATAACCGTAACCACCATGAATTTGTACAGCTTCATCTACGATATAAGCAAGTGTTTCAGAAGCGTTTACTTTATTCAGTGCACATTCAATTGCAAATTGGGACATTTTTTTCATAAGATCTTCATCACTCTCATGAATTGCTTCATCAATTACACCCGCTGTACGGTAAGCTGCACTTTCTGCTCCATATGTAGAAATAATCATATTGGCAATTTTCTCTTGAATCATCGTAAAATCTACTAATTCCGTTTGGAACTGTTTTCGCTCTTTTCCATATTGAACCGATAAACCAATTGCTTGTTTTGCTGTTCCAATATTTCCAAAAGCCAGTTTTAGTCTAGCAAAGTTAAGGATATTAAGAGCTACGTGATGCCCTTTCCCAACTTCCCCTAAAACATTTTCAGCAGGGATGACAACATCTTCTAAAATAAGCGTCGCTGTTGAAGAACCTTTAATCCCCATTTTCTTTTCTTCTAGTCCAATGGATACACCTTCACATGTTCTTTCGACAATAAACGCTGTCATTCCTTTATTTGTCTTCGCAAAAACAACGTATACATCTGCCATATGAGCATTTGTAATCCACTGCTTCTCACCATTTAACTTCCAAGCAGTTCCATCTTCATTCAATACTGCACTCGTTTTTGCACTTAATGCATCAGAACCAGCATTTGGCTCAGTTAAAGCATAAGCCCCAATCCATTCTCCAGACGCAATTTTCGGCAAATATTTTTCTTTCTGTTCTTTCGTTCCATAATATATGTAAGGCAACGTACCTACACCGGCGTGTATATTAAAAGAAACGCTAAATGCACCAGCGTAACCCATTTTCTCTGCTACAAGACCTGAGACAGCCTTTCCTAACTCGAATCCGCCATAATCTTCTGGTACCTCAATGCCTAATAATCCAAGTTCCCCAGCTTTCTCAAATAATTGACGAGAAACTTTATAGTTATGTTGTTCAATATTCTCAATTTGCGGGACAATTTCTTGTTTAACGAATTGTTCTGTCGTTTTTGCAATTAAATCTTCATCCCCTGAAAAATCTTCTGGTGTAAAGAAAGTATTATTCACATCTTTATTGAGTGAAAAAAATTCATCCCATGGTAACTTCGTTTTCTCCATCTGAATCCCCCTATCATTTCGGCCTTCGTTGATTTTTGCATATGCTCGTCTACGATGAGTGGAATATCTTATTATTGAAAACAGAAGGCGATGATTTTTCACATCAAGCAGTTCGTTTTGCCAAAGATGTATTTTCATCGCCTAAATCCTTCTACCTATTGATCTTCAGATAATACTGTTTTTGCAATTCCAGTATCTAACTCTTCAAAGTCATGATATGAAATTGTTTCATATAATTCACTTCTCGTTTGCATATTAGAAAGTGCATCTTTTTGAGAACCCGTTTCCTTAATTAGCGTGAACACATTCTCATATGCTTTTGCAGCAACACGAAGTGAAGTTACAGGATAAATTACCATTTGGAAGCCCATATTCGCAAATTCCTCTGCACTATAATATGGTGTTTTTCCGAACTCAGTCATATTTGCTAGTAAAGGTGCATTCACTTTGCTAGTAAATAAACGGAATTCTTCTTCCGATTGAAGCGCTTCTGGGAATATTGCATCTGCCCCTGCTTTTACATATGCATTCGCTCTTTCAATCGCTGCATCTAATCCTTCCACGCCGCGAGCATCTGTGCGTGCTACAATATATAAACTTGGCGCAACTTCTTTAATCGCTTTAATTTTTTGAACTAATTCTTCTGTAGTAACAAGTTTCTTACCATTTAAATGTCCACATTTCTTTGGTAATTGTTGATCTTCAATCTGAACAGCCGCAACTTTCGCTTCCACCATTTCTACAGCTGTCCTCGCTACGTTTAGTACTCCACCAAATCCTGTATCAATATCAACAAGAACTGGTAAATCTGTAGCTCTAACTAGATCCCTTGCTCTCTCTGCTACTTCAGTAGACGTCACGATTCCTAAATCTGGTAGTCCTTTACTTGCAGTGTAAGCAGCTCCCGATAAATATAGAGCTGAAAAACCTGTGTTTTTCGCAACAAGAGCCGCCATTGCATCATGAGCACCTGGAATTTGCAAAATTTCATTTGCTTCTACTAAAGCTCGGAAGCGATTCGCAAGCTCCTCTTGTGTTGACTGTTTATTCACAACCCAAGCCATTCTAAATTCCCCCTATTATTAAATTAAGAATAGATCTACAAATTCGTTTACATTCATATTTTCTAGTTTTTCTTCATTTAGACAAGCATCATGGATTTTTTCTTGTTGCTTACTAGAATAATGACCTGCCATATTTGCAGTGAATTTTCGAACAACTTTTGGAATTGCTTCGTCTCTACGGAAACGGTGGCCAAGTGGGTATTCACATTCCACATTTTCTGTTACAGTACCATCTTTAAAATGAACTTGAACAGCGTTGGCGATTGAGCGCTTGTTCGGGTCAAGGTAATCTAAACTGTACTGTTTGTTTTCAACAACAACCATCTTATTACGTAATTCATCTACACGTGGATCATTCGCTACTACATCCTCATAATCATCCGCAACGATATCTCCTTTTAATAAACCTATTGCCGTAATGTATTGTAGGCAGTGATCGCGATCAGCTGGGTTATTTAATGGACCTTCTTTATCGATAATACGAATTGCTGACTCATGAGTTGTAATTGTAATACGATCAATTTCATCTAATCTTTCTTTAAGTTCCGGATGTAATTTCACTGCACATTCTGCAGCTGTTTGTGCATGGAATTCTGCTGGGAATGACACTTTGAATAATACATTTTCCATTACATAAGATTCTAGCGGTCTTGCTAATTTTAATTCTTGCTTGTTAAATAACACATCTTGGAATCCCCATCCTGGTGCAGATAATGCCGTTGGGTAACCCATTTCACCTTTTAATGCAGTCATTGCAAGATGAACACCGCGGCTCGTCGCATCACCTGCTGCCCATGATTTACGTGAACCTGTATTTGGAGCATGACGATATGTACGAAGACTAGAATTATCAATCCACGCATGTGATAATGCATTAAAGATTTCCTCACGCGTTCCGCCAAGCATTTTTGCAACTACTGCAGTCGTCGCTACTTTTACGTATAATACGTGATCAAGACCAACACGGTTTAAACTGTTTTCTAAAGCAAGTACACCTTGAATTTCATGTGCTTTAATCATCATTTCTAGTACTTCACGTACTTTTAATGGTTCTTTTCCTTCTGAAATACGAACGCGGCTAATATAATCTGCAACTGCTAAAATTCCGCCTAAGTTATCCGATGGATGTCCCCACTCTGCTGCAAGCCAAGTATCGTTATAGTCTAACCAACGGATCATACATCCGATATTAAATGCGCCTTTTACTGGATCTAGCACATAAGATGTTCCTGGTACACGTGTACCATTTGGCACGATTGTTCCTGGTACAACTGGTCCTAATAATTTCGTACACTCTGGGTATTGTAGTGCTAAAATTCCACATCCAAGTGTATCAAGTAATACGTAGCGAGCCGTACTGAATGCCTCTGCACTTGTAACCTCTTTATTTAATACATAGTCCGTAATTTCTTCTAATAATGCATCTTTTTGTTTAATTTCATTTGTTTTAATCATGCTATTCTTCCCTTTCTGTCGAAAATTCGTTATTATTAATGATGGGTGTCGGTCAAACACCCATCTGGCGGGCGGCGCTAACAACTCCCCAGTTGTTTTTATTTACTAAATGCATGTCGCTCGCCAATATAATTTACACGTGGACGGAACAAACGATTGTTCGCATGTTGCTCAATTACGTGTGCACATAGCCCTACTGTTCTAGAACTAAAGAAGATTGGTGTGTAAAGTTGAATTGGAATACCAAGCATCCAATATACTGGAGCAGCATAATAATCAAGATTCGGATAAATTCCTTTTTCCTTCTCCATAATTTTTTCTCCAGCTTCACACATTTCATATAGTGTATAATCGCCTTTCACATCACATAACTGCTTTAATGCTTCCTTCATCATAAGTGCTCTTGGATCCATCTTTTTCATATAAACACGGTGTCCAAAGCCCATAATTTTTTCTTTGTTATATAGTTTCTTCTGTAATAACTCTTCAAATTTCTCAACATTACCAGCTTCTAAAAGCATGTACATAACCGCTTCATTCGCACCGCCATGAAGACTTCCTTTTAAGGATGCAACTGCTCCTGTTAAAGCGCCGTATAAGTCGGATTGCGTTGATGCAATAACGCGTGCTGTAAATGTAGAGTTTGGCATTTCATGTTCACTATATAAAACAAGGGAACGATCAAAGATTTTCTCTTCAAGTTCAGTTGGTTTCTTTCCAGTTAACATATAGAAGAAATTCGCACTATATGACAATTCTTGAAGAGGCTCAATTGGTTCCTCATTATTTAAAATATGGTAGCTATTCGCTACAATATTTGGCACTTTACTTAAAAGCTTGTATCCTCGACTTTTATTCACTTCTAACGAGCGGTTTTCGATATCACTATCGTAACCAGCTAATGCAGACACACCTGTACGTAGCCCATCCATAGGATGCGTTTCCTTAGGCAATGCCTTTAGAACATTGAATACACCTTCTGGCACTGCATATTCTTCTTTTAATTTCTTTTCAATTGTTGCTTTCTCATCCTCATTCGGTAAACGTTCTTCTAATAAAAGGTGCACAATGTCTAAATACTCTTTTGTTTTTGAGAGTTCGATTAAATCATACCCTTGAATTACAATTTCACCTTTTACTGTGTCAAGAAACGAAATTTTCGTTTCCGCTGCTACTACACCATCTAATCCCGGGGAAAATTTTTCTTCAGCTTTCATCATTATTTCCTCCAATCCTAATAGATCTTCGTGCACATAACTGTAAGCCTTTACAATAATATGAAACTACATTTCTATTTCAATGTATCAGAGTTTTTAGAAGATTTCAATTTATTTTATAATGATAAGTTTCCTATACTTTTACCGATGTAATAATACAATTTTTCAAAAATAACTTTCATCTTTTATAAAGAAAGGACAGACATCCTTTTACAATGTTTTAAAACGAAATATAACAATTATCCATATAATCCCACTAAATATACCTTTAATCCCTTTTTCTTCAACAAACATAAAAAACCGAGCTGCACTCATGCACAGCTCGGTTTTTTATAATATTTATTTCATATTCCCATAAATATAATTCTTATATAAAATTTATTTATTTTTAATAAAATTATTATTTTACATAATTAAATTAAATTTCTTCCGTTTCTTTACGATAGCTTACAACTAATTCATAGTTTTGAAAGTACGAAAGATCATCAAAATCTATAAAAATAAACTACCGTCTTTCTCAACTGACACTATTTCCCGAACTGATTTCTCAGAAACAATCCCTAAAGAATATCCGGGCTGAATTGAGCTATTTCCTCCGTACCTTACAATACATCAAATAGATTCATCATGCTTTATACGCAAAATCTTTTTTGCCTAATCGTTGTTTTACAAAAATGACAAATATAATTAAAGCTATTAATCCTGAAATTGCACTACCTGCAAATAAAATACGGTAACCAAACATTTGTGAAACTACTCCAAGTAAAATCGCACCTAATCCAATCCCTAAATCAAAAGCGGTAAAGAATGAAGCGTTAGCAACTCCTCTTTTACTAGGATCAACAATTGTCAGCATCGCTGCTTGTAAAGCGGGCTGTGCTGAACCGAATCCAACACCATATAATGCCGCTGCAATCATTACACCTATCAAACCATTTGAAATCGTTAGTACAACTATCGCTAAAATAGTAATACATAGTGCCGGAAGTATGATAAAAACTTCTCCATACTTATCTAATAGTTTTCCTGAAATAGGTCGTACAATTGTTAAAGCAATTGCATATACAAGAAAGAATGTCCCAGGATTCACATCAATTGATGACGCGAATAACGGTAAAAATGTTGTTATTCCTCCATATGCAAATGATAAAAAGAAAACAACAACCGTTATTGATAAAACGGATTTTTCAAACAGCTGAATTTTCCCTTTTTCTTTTTGTGGCGTGAATGGCATTTTCGTTATTAACGATAATACGACTGCCATAAAGGATAAAAGAGTTGCTAACAAGAATAGACCATGAAATGAATAATTTTGTACAACCCATAATCCAATCATCGGGCCAATTGCCATCGCGATTGTCATCGCCATGCCATACCAGCCCATCCCTTCACCACGACGTGAATCTGGAATAATATCAGTTATCGCTGTTCCAACTGCTGTTGTAGAAACAGCCCACGTCACTCCGTGAATAACGCGTAAAACAGCTAAAAGGACAATAGTCGATGCTAAATTATACGAATACATCGTTATCCCAAAAAATATGAGTCCGAAAATAATAAAAGATCTTCTACCATATTGATCTAGCATACCTCCAATAATCGGTCGTATTACAACTGCCGCTATTGTAAACATTCCCATCATCAGTCCAACTTGCGATTCATTTCCACCAATCTCTTTAATAAAGAGCGGGAGCGTTGGAACAAGTAAATAAAACCCCGTAAATAAAAATAACATCGCGATAGTCATTTGTATAAATGATTTCGTCCATAATCGTTCCATTAAAATCCCTCCATAAACACGTAAAATATTACAATCAACACATCAATTGTATATTTTTCATGTTCCCTTGCCCTCCGCTAATAGACGTATCTTTTCATAATAAATGGTCTACCATCAATTTAAGTTGGTCTTACGACTTTATGCCTATGTTTTAAGCAAAAAACCTATATATGTACCTTTATATGTTTTAGAAGAATATACTATTTTATTATTAAACTTAGGAGTGATCGTTTTCATGTGGGTATCACCAAGAATAACCGCCGTTGTCCCATCAGCTCCCTCAATACATGTTCCTGTCGTTCATACAGCTAGTTCTGATGTTGGTATCGAACTTCCTGCCGCTTGGCAACAATATCAATCGACTGACCAAGTCACCCCTTCATTTTGGCATCATGGTACACACCCGAGTAGTACTACTTTTTATGGCCCTAGTTATCCAATACCGCCAACTCAAATTTTTTATCATTTCCCATCAATTTACTTTCAAAATTTCTATGGTACTTTTAATATTTAAAAATAATATTACTCTTTATAATACAAAAAGCACCAAAGAAACCGTTTTTCCCGGTACCTTCGGTGCTATTAAAATGCATTATTTATTGTCCATTATTGGAAGGAGCCTCTCCTGTGTTCCCGCCAGTAGTTGGTGGATTTGGAGTTCCTTGGCCACCTCCAGTGGTTGGTGGATTTGGAGTTCCTTGGCCACCTCCAGTGGTTGGTGGATTTGGAGTTCCTTGACCATTTCCATTGTTTTGTTCATTTTGCTTATTCTGTTCATCTTGTTTCTTTTGCTCTTCTTGCTTCTTAAGTTCATCTTGTTTCTTTTGCTCTTCTTGTTTCTTTTGCTCTTCTTGTTTCTTAAGTTCTTCCTGTTTCTTTAATTCCTCTTGATTTTTCTCTTCTTCAGTTTTTTGCTGTTGATCTTGTTTTGGTTGTTCCGTTGTATCCGGTACGCTCGTATTTGGAGATGAATCACGTTTTTCACCTTTTATACGCAATTCACTACCTTCTTGAATAACACTACTTGGCATTTTAAAGCGTGATTTATCTGTAGCCATCTCGCTCATCATTTCTTTAAAGATCAACTGTGCAATTTTCGTATTTTTACTACTAATATACTCGTCTTTACCATCTTTCGTATATCCAGTCCATACTGCCATCGTATATTGCGGTGTATATCCTGCAAACCAACTATCACGAGTTGCACTTTCTGGAATTCCATATTGTGCTAGTTGTTTTGAAGAATAGTTAGTTGTACCTGTTTTACCAGCTACATCTAAAGAGGCCACATTTGCCGATGTACCAGTACCTGATGTAACTACAGAACGAAGCATATCAGTAATCATATATGCTGTAGAATCCGCCATAACTGATTTTGGTTTTTGTCCAAAACTTTGTGACTTGCCGTCTGGATAAACTACTTTCTTAACAAAATGCGGCTTCGTATACTTACCGTCATTCCCAAATGCCGCATAAGCACCTGCAATTTCAGTTGGTGACGCTTCATTTGTACCGATCGCTGTTGATTCTGTCGGTGCTACATTAAATGTAATACCTAATTTCTCAGAGAACTCTTTCGATTTACTAATACCTACCTCTTTAGCTGTTTTAATAGCTGGGATATTACGTGACTTTGTTAGTGCTTCACGCATTGAGATTGGCCCTAAATGACTTCTATCTGCATTTCTAACTTCTTGGCCAGTTGAATACTTAAATGGAGAGTCATCAATTTGATGATAAGTAGCCCATTTTAAATATTCAATTGCAGGAGCGTAATCAAAAATTGGTTTCATCGTTGAACCAGCTGCACGATCTAATTCAATCGCCATATTATGCCCTTTAAATACTGCTTTATTTTCACCGCGCCCACTACCTATAGCGCGAACTTCTCCAGTTTTCGTATCCATAAATGTGAAAGCACCTTGGAATTTATCATTTGGATAATTAATAATATTTGTATTCAAAATATTATCAGCTAATTTCTGTGCTTTCGGGTCTAATGTTGTATAGATTTCTAAACCATCTGATCCAATATTAACATCTGGTATTTCCTTTTCTACTTCTTTCACAACTGCATCCATAAATGCAGTATGAGGCATTGCTTGAACAGTTGCAGTCTTAAGTCCATCTGTTACTTTAACTTTCGAAGCTTCTTCCATTTCTGCCTTTGTAATATAACCATGTCTATTCATTAAGTTTAGTACAGTATTTCTTCTTTCTGTTGCCCTTTGAACATTTTCTTTTTTCGTTGGGTCATAGTTATTCGGTGCTTTTGGTAAGCCAGCTAGCATCGCAACTTCTGGTAATGTTAAATCTTTCAATTCTTTACCGTAGTAGTTTTGTGCTGCTGTTGCGATTCCATATGAACGGTTTCCTAAATTAATTTTATTTAAATACATCTCTAATATTTCATGTTTTGAATATTGTTGTTCTAGCTTATACGCTAAATATATTTCCTGAACTTTACGTTTTGACGTTTTTTCCATCGATAAGAAGTAGTTTTTAATAACTTGCTGCGTTATCGTACTACCACCTTGAGAGCCATAATCTCCCTTAAGACTAACTAAAACTGCACGAGCAGTACCTTTAAAATCTACTCCGCTATGCTCATAAAAACGCGAATCTTCTGTTGCTAAAAATGCATTTTCTACCAATTTAGGGATTTGATCATACGTAACATTCGTCCGCTTTTCTTTCCCATATTCATATACCAAATTCCCGTCTTTATCATAGATTTTTGAGGATAACGGATTAACAAGTTTTGCTTTCTCAAGTTTTGGTGCATCCTTAATCATTACGAAAAAGGTAGCAACCCCTGCCACTAAACCTACAATACCAAGTAGTAAACAACTGATTAAAAATTTACGAAAAAACGATGTTTTACCTTTTGGTTTTGTTTTTTTAGAAGCTGGCTGTTTTTTCTTTTGAACTTGTCGTCGCTCCTCTCGAGAACGATAATTTTCTGACATGTTACTTTCTCCTGCCTTTCAATTCTCCCGTTGATTTACTTGAAAATATTGAGGTCACTTTCCCAAATACATTTAATCAAGAAAGTATATTGACCTGGAGCATTCAATATTTTCCCAATTCACAGATTATACATTTTAGAATACTTCTTATACAAACTATTTTCACTTCTATTTTTTCTAATAATACTGCCCCTTAAAGCATAGAATGTAAGAAGTGAATTTTTAATACCTCGTATAAATAATATACCTTTTTTTCAAATGGTCAATATTTTCTTATTTATAAATCAAAGGTGAAATAAAAATAGCACAACGATATGTACTTCGTGTGAACTTCCTTATAATTACGTATAACAAACATTTAATATATTGCATATAAGTATAATACTCCTTTAATTATTAGGATTTCAACACGACAATCCCCTTCTCTACAAGAAACCATCTAAAATGAAAAATAAGGAGTTGCGTAATGATGGAGCTATCTCCCGTTATTTCAAAAAAATTAGTTGCTGTTGGCTATAATCCTTTTTCTATGATTTTACGTATTCAACTAAAGAATGGTATGTATGATTTCTTTAATGTGCCAGAAAGCATTTACACCGGTTTATTAAACGCACATTCTAAAAGTTATTATCATAATACTTATATTAAAAATTCTTACCGCTATACTAAAATTTAAACTTGATTGGCATACAAAAAAGAAAGGCGAATCAAATCCCCTTTCTTTTTTGTATGTCAATCGCAATTTACCTTCTATTTCTTTTATTAATTTCATCATACGCCTGAACTTGTTTTTTAAGTCGCCTGTCGGCTTTTTTATCATGTACGACAAGAACTGCGTGTATCGCTCCTGGAACCCAAAAAATTAAGGTTAAAATGAAATTAATAATCGCTTGAAACGGCTTTCCGCAAAATAAAACAGCTACCGGCGGAAGTAGAATTGCTAATAAGTACATCATGTCTGAAAAACTCCTTAATGTTTATTTTTCGATTCTTCCCCATCCATCTCCTACACTATCAAACCTATGAAATCATGTTATTTCTAAAAGTGTATATAAGAACTCATCATTACAAAAATCACATATAATAGTATAACTAAATGAGGAACCTCTCACTAACGTTAACAAGTTATTTTCACCTGTTCGTTTATATGTATATTCTACGCCTATATGAAAAAGCCTACAAAAACAATTAACACATCCATCTTGTCCTACTTCTAAAATTGAAATACTCCAAACTTCATTGAAATGAGTATTTCATCACCTTTTCTTAGATTCCAATCGTTGTTCCTCTACTATTTCGGTTAATCCAATTGCTGAAATTACAGTACCTATTGCCTGTATCCAAATACCTATTAGCCCTATTATACGTTGATCTTCTTTATCTATATTACTGTCTTCTTTTCCCTTTTCTTCTTTTGGTATATTTTTAATACCTTCATATGCTTGTAATCCCGCACCTAAACTTTGCATTGAATTCCCTAATACGATAATCCACTGTATTCCATTTATCTCATTCAATGCTACTTCTACTCCTAAAAAAGCGCCAATTGATTGCAAACTATTTCCAGTAATTATAAGGAAATCATTTTCTTTCATTTGTTCATTGAGATTAAAATACGTTCCTATTACATTTGAAATATTACCTAAAGCTAATAACTCAATCCCCGTTTTCGCTAGCGCTTTATTTTTATTATCTATAGTTACCTCTTGCTTCTCACATTTATCTTCATCTTTCGCATCATTAGTTGCTATAATTTGTAATATATATCCGAGAGCTTGTAATGAACTCCCTACAATAACGAGATCCGACTCAATCTTTTCTTCCCCTGCAAACCCTCTAGTTGTTCCAATTGCTGCTATAAGATTTCCTCCTACTTGAAACCAGGCACCTGTCAATTCTAAATCGCGTAGGTTCATTTCCCATCACCTATATTTAAATTTTCTAGTACATTCAAAACAAACAAATATCACCTTTAATTCATTATTCTATTTTTCATATGTAATTTTGTGCATCACATAATGTACTAATAATACAAAAGAGCACCTTTCAGGCGCTCTTTTTGACAAGGAGTTTCTCAATGAATGTATGCCGGACATGTCAATGACTCACCGTTTTTATCTATTGATATATTTAATATATAATGAATTGCTTTATGTGACCACATTTCAGGTGTAGGAGATTCTTTTACATATTGAGGTGCACATAAGTGAAGCATTGGAGTGCTAATACTGCCACCTGGATCTAAAGCAACTACTGCCATACCATTTGGCAGTTCCATTGCCATTGACTGCGTGATACCTTCAACAGCAAACTTCGAGGCACAATATGGCGCAAGTTCAGCCTCGCCTTCTCTTCCCCAACTAGAGCTCATGTTAATAATAATTCCTTCTTTCCTAGCTATCATCGCCGGGACGAACGCCCTAATGACATTTACTACCCCGTTCACATTTACACTCATTACACTTTCAAATTCTTGTGCGGTAACTTTCCATAGAGGTGCATTTTGATTAACAATCGATGCATTATTTATTAACATATCGGGAGCCCTATACTTATTAAGAATACGACTTGCCCATTTACTAACTTGGTGATGATTTGAAACATCAATTATTTGAAAATCATGTGAAGCACCGTAATATTTATTAAGTTCTTCAATTTTATTTTTTGAGCGTCCACACCCTGCTATGTTCCACCCCAATTCATGAAACCTATCAACCATTGCACGTCCTAACCCTTGCGTTACTCCAGTAATAATTACAAGTTTCCCAATGTCGTTTCGCTCCAACTTCATCTCCTCCTTTCCATTAATATTACTCTTTTTATTCTTATTCAACTTCTCACTTAAGTATCAATTCATTTCATACTTTAGAAGTATTTTTGATATTACATATTACAAACAAAAAGAGTAACCATCATAGTTACTCTTTTTGTTTGTATCATTATTATTTTTAAATTGTATCTTCTATTCCCTACATTTCTCACATCGATAAATATAGGTTACACCTTAATGAAAGTATATTTATGTTTACGGTTCGATTTATCCAAATACTTTTCAATCCACTTTTTCAACTTAACAGAGCCCACCTCTATCAATAAATTTCCACCTAGTCTTATAACATACTTTTATTTAAATTCTCAATCTCATTTTGCAACTTTTTAAAATTCAGCTCTTCATTAATTTTAACATACTTTTTGCTCGATTGATTAAACAAAGCTTCAAACATTTTTCTATTCCGGAATATTTGTCAAATGAACCAATTAGACTATACCTTCATATAGTAATGCAAACAATTACTTATAAAGCAGGTGAAAATTATGCCCTCAGTTGTTGGGAATTTAGTTGTACAAAATAGTAACGGATCTTTCAACTTAGGTGATTTCTACAATGTTTCTCCAAAAGAGAATACGAAAGCTTATAACGGCTCTGGCGCATCTAACGTTGGTTTCGTAGTGAACACTTTTAGTGGGGTTAGCGCGACTAATACATTTGATTCTGATCTAGCAGATCAAAACCAAGTTGGTACGGCCTAACCAGATCTGTAATCATTTTTGTATAACATTTTAAAATTTGTCTATACTATACTCAGGCTGATACACAGCTATCATTGATTTTTTGAGTGCCAATCTTTTTCTCCCTTTCCCTGGGACTAAGCAGCTAGCTTGCAGCTAGCTGCTTTTTCATTTAGTTCTTCACCTCTTCAAAAAATTAGGTTTTAACAGGTAGCCTGACTCACAGCCTAATTTTGAGGTGTGAATCTGCCTGTCCGTAAATAGCTAGATAAATTAAAGAAGCTAAACTTCAAAATTCAGCTTCTTTTAGGTTACATCAATTATGTCCATTGCAGAAAATATATATGTTTTATAAAATGCATCTGTACATTTTACAATTTGTTGAATTGGATTTATATGAACCACTGTCATATAACTCGTAAGTATGTATCCTTCTTCAAAATATATCACTAACATCTCTTCTTCAGACAATAATGAAGTTAATAATTTGTCTGAAATGCTGTCTTTCGCTTCATTTGTTATTAGCGGTCTTTGGACTTTTAATTTCTCTCCAATAATTTTTTTCATACTTAATAATTGTTCCGACATTACTGTAAACGGAACCCACTCTCTTCCTTTTTCCTGTATCTTTACATAATTCATTTTCTTTACTCCTCAATTTTCTATATCTATCCATTCTTGAACTACCCCACTTTCACTTCGTTTAGAAGTGGGGGATTCCTAAGTAAAGAGTTCTATCGAACTCTAATTTATTAGGCTATCTCCGCAGCCCTTGCGGTTAGAAGCCTTATCGCTTCATTCTTTATCCATCTCTTATTTATACGTAGCGTAGCTTTCCAATTCAAAACTATTTATGAATTTGTAGACATCTCCACTTCAAAAGAACATTCGTTCTAATTATACACGAACGAACGTTCTTACGGAAGTGCAACATGTAAATTTTTCAAACAAAATGCAACTTTAATATATGAAGTTTATTCATACTCATTTATTACTCCCTTTTATTAATCAAATTTCTTTGCACTTCCTATCGCATCATAGAATACGTGTAGCAACAATAAAAGCGACTCCACCTAATGTGAAGTCGCTTTTATTGAACTAATTATATTTCTAATTTTACTTTTCTATCCTAACTAACGTTCTTTCTGTAATACAAATTTCAGCTACATCTCATTAACCATCCTTCAACAAAAGCATACACTTTTATTATGAACGAACTCACAAAGCTTTAGAAACTTATGATGTAAAACAATTCATTCCTCATAAGTTTCATACATATCGACAATTACTACCATTTATCCTGCTATTTGCAGGCAGTAAGATTCACACTCATCAATTTTTCACTTTCTTTATTGAATATGCTTGGGTTGCATTATAATATGTAATTCTCTCTCTGTTTCTGTTTTTTTCGGATTCCGATAATATTCCAGTTTATATTCTAATACTTCATACTCAACCATCCCAATCTTTATCCCTTTTGTTTCTTTTAATACAGCTAATAGAACGCTTATATTTTCATTAGATGATTCAAAAATTTTCCCTCCGCCTTCTCTCTTAAAAATAACTTTCATATATTCTCCCCAAAATTAATTTAATAATCTAGTAAATATAAAAATATTCATCCTATTTTAATAAAGTGAAGGCTAATAATCAGTGGGGGTTTTACGAGCAGTAAGACTCCCACAAATAGCGGGATAAATAATTCTTATCATCATTAAGAACATCTTTTTGTACTAAATGTTCAACATGTCACTTCCTATAAAGTTAATTTATACTCCAATAAATATGATATGTACCATTCTTTAAATTATGTCCTCAATTCACTCTCCAAGAAAATATATTTAGTTTATATTTAACAAAATTAATTAACTAAAACATTATCATTTCAACATTTTGCGAATTTAATTGCTTATTCCTTTTATCTACATCCAAATTTTCCTATTGTAATTCTTAATATTTAAATGTAATATTGTATTGTCTTACTATCTATTTATAACTCACACACTAACGCGAAAACAAAAAAAACAGAAATATAGCATTATGCTAAATCTCTGTTTTTCTTCTAAACCTTATTTCTCATCTAAGAATTTAAATGTATCTTTAAATTCTTTATCTGTTACTTTAATATCTGCTTCTTTCAATAGATCATTGATTACTTGTTGTTTCCATTTACCTGTCGCATCTTGCAGTCTTTGTTGTTCTAAATCTTTTCGAATCGTATCTTTTACTTCATCAAATGGTTTCAATTCTTTTTTATCCGTCACTTTTATTATATGATAACCATAAGATGTTTTTATTGGCTCACTTACTTGTCCTGCATCTAATTTATACGCAGCTTCCTCAAACTCTTTCACAGTTTGACCAGGAGCAAAACCAGCGATTTCTCCGCCCTGTTCCTTTGAACCAGTATCTTCCGAATACTGTTTCGCTAAAGCAGTAAAATCTTCACCATTATTTATTTTCTCTTTTACTTCTTTCGCAGTTTTTTCATCTTTCACTAAAATGTGACTTACCTTCATTTCAGGTTTATGGTTATCTTTCACATCTTTATCTGTGACAGTCGCTCTAATCGCTTTCTCAAGTGCAATTTCTGGCTTCATTCTCTCTTTTAATTCATCTTCATTCTTTAATCCTACTTGCTCTAAAGCGACTTTAAACTTGTCACCCATTTGTTCCTTCGCTGCTTCTACTTGTTTTTTAGCTTCTTCATCTGAAACTTTATATTTATCTAGCAATGCCTTATTTAACATCATTTGAGATAAAGTACTTTTTCCATATGTTTGCCTTAATTCTTTACTTAACTCTTTCTCTGTCACGTTTCCTACTTTTGATGTTGCAACATTTTCTGAGGAACCACACGCAGACAATGTTAGCGCCACACATGCAATAATAGTTCCCATAAATAGCTTTTTCTTTTTCAATTCAAATACCTCACTTTGATAAATGTATATTAGGAACTATACAGCGCTATTGTGAAATCAATGTGTTTTCTGCAAGTATTTCTCTTCATGCCAGAGACAAGCTTTATTAAATAAAAAAGCCGGCATAAATCCGACTTTTAACATCATTTAATCTGCACATGTTCTTCTTTGATCCATTTGTTCCCACCTATATCTATATATCCATCTTGACGATTCCAAATACGATAAGGAACCCTCCCATCGACCGCACCTGTATCATTCCCGTTTGGCCCATCGTACGTTCTAATTTCATATCCAGGTGTATACTTTGAATATGCATAAAACCATTGTACATTAAAATGTTCTAACTTAGCCCACTGTTGTTCACCACCTAAACAAATCATATCTTTTTCGCCACCTCCCCAATAACCTTTATAGATTAAATATGGGATTTTTTTCGTAAGTCTTCCTATAAATTGTGGATTCGCAGGATTTTCATATAAATTAATTCCGTATCCATCATCATATTTTGAAACTGCAATACCAATTCCATTTGGTCGCTTACTGTCTCCAATACTAGATTGATTTGTTTGATTTGAACCTATAAACCAAGAAAGATTTTTATTCCCAATTAATTGATTCAAATCACATTTTCCAATACCAGGAACATTTCCTGAATCAGTATATTGCCAAATATCACAAGGATATGCTGGTTTATTCCCTCCATAACGAGGAATCCAAATAAAATCAGCATCAATATTGCGAGCTCCAAATGATACATACGTGTGATGTCCTACATATAAACCAATTTTTTTTGCACCTAATCGGCGTAATTCATCAATAAATGCTTGAGTCCCGCCTTGCATATCTATCATTGTTTGAACTTCTACATCTGCCACCCAAAATTTAGCGTTTTTATCGCCACGATTCCAAAAATCTTGTGCTTCTTTTTTTGCATCTGAAATAGATATAAAACGGCAAAATGCATAATTCCCAAATGGGATACTACGTTTCTTCATTTCTTTCACATAACCTTGATACATAAAATCGATCGTATTCGAACCATCTTGAACTCTAGCAATAACTAAATCTAGCTGAGACGCTGCAATATCCCAATTAATATTTCCATTCCATTTCGAAATATCTACAATATAACCCATTACTAATCAACTCCTTTTTATTATCTTATTCAAAGCTTGTTTTATTGCTAGACCCATACGCCTAATAGCAAGCTTCTATAAGACAAATAAATATATTTTCATCATTTTTTAAAACTAAATAATTTCTTTTTATCCATAATAAATAAGTCCTTCTTTTTTTGCATTTGTAAACGATAAACTTGCGTTTATTTCATACTAAATATTGGAATAAAACTATAAATAATGTACATACTACATATAAGCAATTTTTGCTTATACTCATGAATGACCTCCAAGATTTTTCAATCTCCCTGTAAAAAGGCAGCTAAAAAAAGCTGCCTTTTTACAATTTATTATATAGAGACAAGTTAATTAACGCTTATTCCCTCATATTAGTCTGTTTTTAAGCTTTTTATAAACATGATATATATTTTTTCACAACACTATTTTGATACGTAATAGAAATTACATCAACTCATACAGTATATGTAATAATCAACTACAATAAAAATTACATATTATATTAATATGTCTTCCTAATTAAAAGGAAATAAACATATAACCTCTGTTATGTAAGGAGTTTATTTAATGAAAAAAATCCCTATCGTATTCAAAGTTCCCCCGAATTCAAAGCTAAAAGTTACATTCTATGGTCCTTGTAACGAAGTAATTACAAATGTATCTTTAATTAATCAACTACTCACCACTACCTGCCAAACTGTATCTCAATATCCAGACTTCAAGAAATATATAACTGAAGTCCGATCATTATCAAATTGTTAATCGTATAATTTTATGATTTATCATTTCGAATAGTACGATAACAAAATAATAAAAAGAGCCGCATCTTTCAAAAGAAACGGCTCTTTTTCATTTCACAAAACATGACTTCATTAAAAAACTAAAATAACTCATTAATTGATATTCTGTTGATATTTTATTGATATTTTACACATATTAATGAAATGCGCATATATTATCGTATGGAGATTCTCCACTCATAAAACCTATCTTTTTTTGAGCATACTTTATTTGTATGCTCCTTTTTTATTCTTTACTTAACGGCTTACCCTGTTAATATTTCCAAATATCATCTGAACGAACATTGAATATTTCAATCTTTTCTTATTATGTGCTATGAATATAAGTTTAATAATTTATCTGTAGCTACATTTTCAGTATCACTTAAATCGACAAAATAATGAAGTTGCCACAATTGACTCTTCTTAGCTTGATTACTAATAGGTTTATCCCAAATAGGGTAAACCCTCATAGCCATTTTCCCTTTTTGATTTTGAGTTTTTAATATTTTTTCTTTTAGAAGAATCTCTTTTGGAAAAATAAATTGTCCTAGGTTATTATCCGCTATACAAGTGATAACTAAAAGGTTCGGAGCTGAATCATAAGAAAATGCCTGATTGCTCATGGAGGCATTCTTCTCCCAAAAAGAAACAAATTGTCCAATCTTATTAGGCGTAATTTTTGATACCCTAAATCGGATGCTTCTATTATTTAGCTGGAATACCCCTCCTGCATATTCTGAATTTTGCTTTTCTTCTTTTCGTTTAGTAATTAATAGATTATTAGGTTTGTAAATCATTTCATCTAGTTTTTCAATTATATTACTAAAATTTGTCATGTAAACGCACCTTTTTTATTTTCCAGTATACCAAGAACGTTCGTTCTTGTATATTAAACTTAAACAAATGTTGGCTTTGAATAGTCTTTCAAAGATTATTTTATCTACTCCATCCATAACTTCAAATTTACTTTTATAGTATTTATTACATTACTCTTCCAATTCCTTATATTCTTCATTATTTCATTAATATATTAATAAAAAACGAGGTACATATTACATGAATCCATCCAAAATTGACTTCGGCTCCATAGAGCAACCTTTATTCCTATTAATTTTATATCTTATCGCAATTCTTTTAACACTCGGCATTACATTTAGTATATGTCATTCCCTTTTACTAAATTTAAATGCACCGAGATGGGTAGCAAAACTGTTAGCTATCATATTAACTCTAGGTGTTGCTTATCAAGTGTTTATGAACTTATTTTAATATATTACCAATTTAACACCTTTAACTAAAAGGCCCTACCATAAGGTAGAGCCTTTTTCAAAAAAATAGTCCATAAATCACAAACATGTCTTTTATTTTATTGGAGATGGGAATCTTGCAGCTTCAAAATGCGTACTAATAAATCCTGGATCATTTTGTATAATCAGTCCATCTACATCACTCTCAGCAAAAATTTCAACTACATCTCCTGCATGTAATTGAAGTATTGTTGAAACAGCTACGACATTTGCAAAATTGGGTGGACCAAAAAAGTCATTATCTATCGCTATAGCTGGATTATCATTTACCCGAATTTCTACACGCGCTCTATAATTCGTATTACTATCATTAGGTGCAAAGCTGATTGTACCAATAATAGAATAAACCCCTCTAGTCTTCGGAATAAAATCTGATGTTACTGGATTATATTCATTCGCTAAATCAAACTGTTCATTTTGGAATAATACTTTAACAAAAGTATTTGCAGTGACATTTTGATTTACTGTACTAACCGCTCTAAAAGCAGATGCCCTTACAAGTTTATCTTTCTCATTATCACAACACTTATCGTAATAATCTGTACTCTTCTTACAACAAGTATGCTTTGTCTTTTTATAGCCATTGCAATGCTTACAATAATAATAAGC +>9_2#NODE_9_length_39996_cov_63.0617_ID_17 +GCGGGATGCGCCTGGGCGTGCCGTTGCGCGAGCAGGAGTTGCGCGGGGCGCTGTGGCGCTTGCTCGAGGACCCGGCCATGGCGGCGGCCTGTCGGCGTTTCATGGAATTGTCACAACCGCACAGTATCGCTTGCGGTAAAGCGGCCCAGGTGGTCGAACGTTGTCATAGGGAGGGGGATGCGCGATGGCTGAAGGCTGCGTCCTGAACGGTGCTGGCATAACAGATAGGGTTGCCATGATTTTGCCGTATCGGCAAGGCTGCGCGCTTGACAGCGTCATACCCCGGGCCAATTCTGCTGTGATGCATTTTATCGATCAGGGCTTACTGCAATGAGGAATGACGGAGGCTTTTTGCTGTGGTGGGACGGTTTGCGTAGCGAGATGCAGCCGATCCACGACAGCCAGGGCGTGTTCGCCGTCCTGGAAAAGGAAGTGCGGCGCCTGGGCTTCGATTACTACGCCTATGGCGTGCGCCATACGATTCCCTTCACCCGGCCGAAGACCGAGGTCCATGGCACCTATCCCAAGGCCTGGCTGGAGCGATACCAGATGCAGAACTACGGGGCCGTGGATCCGGCGATCCTCAACGGCCTGCGCTCCTCGGAAATGGTGGTCTGGAGCGACAGCCTGTTCGACCAGAGCCGGATGCTCTGGAACGAGGCTCGCGATTGGGGCCTCTGTGTCGGCGCGACCCTGCCGATCCGCGCGCCGAACAATTTGCTCAGCGTGCTTTCCGTGGCGCGCGACCAGCAGAACATCTCCAGCTTCGAGCGCGAGGAAATACGCCTGCGGCTGCGTTGCATGATCGAGTTGCTGACCCAGAAGCTGACCGACCTGGAGCATCCGATGCTGATGTCCAACCCGGTCTGCCTGAGCCATCGCGAACGCGAGATCCTGCAATGGACCGCCGACGGCAAGAGCTCCGGGGAAATCGCCATCATCCTGAGCATTTCCGAGAGCACGGTGAACTTCCACCACAAGAACATCCAGAAGAAGTTCGACGCGCCGAACAAGACGCTGGCTGCCGCCTACGCCGCGGCGCTGGGCCTCATCTGATGCTTAGGGCGCGCCGGCTGGCGCGCCCTACCAGATCTGGCAGGTTGCCTGCCGTTCATCCTCCTTTAGTCTTCCCCCTCATGTGTGTGCTGGTATGTCCTCCGACTGAGAGGGCCCAGGAGTATCAGGGTAGGGATGCCGCCTTTTTTTCTCGGCCGGCACGACACGGGGACTTGGTCATGATCGAATTGCTCTCTGAATCGCTGGAAGGGCTTTCCGCCGCCATGATCGCCGAGCTGGGACGCTACCGGCATCAGGTCTTCATCGAGAAGCTGGGCTGGGACGTGGTCTCCACCTCCAGGGTCCGCGACCAGGAGTTCGACCAGTTCGACCATCCGCAAACCCGCTACATCGTCGCCATGGGCCGCCAGGGTATCTGCGGTTGTGCCCGCCTGTTGCCGACGACCGACGCCTACCTGCTCAAGGAAGTCTTCGCCTACCTGTGCAGCGAAACCCCGCCCAGCGATCCGTCGGTATGGGAGCTTTCGCGCTACGCCGCCAGCGCGGCGGACGATCCGCAACTGGCGATGAAGATATTCTGGTCCAGCCTGCAATGCGCCTGGTACCTGGGCGCCAGTTCGGTGGTGGCGGTGACCACCACGGCCATGGAGCGCTATTTCGTTCGCAACGGCGTGATCCTCCAGCGCCTCGGCCCGCCGCAGAAGGTCAAGGGCGAGACGCTGGTCGCGATCAGCTTCCCGGCCTACCAGGAGCGCGGCCTGGAGATGCTGCTGCGCTACCACCCGGAATGGCTGCAGGGCGTACCGCTGTCGATGGCGGTGTGAGGTCGTCAGCCATTTCGCGCACTTTTTTCCGCTTCTCCTGCCGCATGCTCGGCCCGCGCCCCGGCGTCATCGGGCGTTCCCCTGCATTCCGGGATTTGGCCGCGGCTGCCGACTTGCGTAGTCTCTCTGCGGTCCGCCATCCCGAGGAGTCGCCATGCCGAAGTCATTCCGCCATCTCGTCCAGGCCCTGGCCTGCCTTGCGCTGCTGGCCAGCGCCAGCCTCCAGGCGCAGGAGAGCCGCCTCGACCGCATCCTCGAAAGCGGCGTGCTGCGCGTCGCCACCACTGGCGACTACAAGCCCTTCAGCTACCGCACGGAAGAGGGCGGTTACGCCGGTTTCGACGTGGACATGGCGCAGCGCCTGGCCGAGAGCCTGGGGGCCAAGCTGGTAGTGGTGCCGACCAGTTGGCCGAACCTGATGCGCGATTTCGCCGACGACCGCTTCGACATCGCCATGAGCGGCATCTCGATCAACCTGGAGCGCCAGCGCCAGGCGTATTTCTCGATTCCCTACCTGCGCGACGGCAAGACGCCGATCACCCTCTGTAGCGAAGAAGCGCGTTTCCAGACCCTGGAGCAGATCGACCAGCCGGGCGTGACGGCCATCGTCAACCCCGGCGGCACCAACGAGAAGTTCGCCCGGGCGAACCTGAAGAAGGCCCGGATCCTGGTGCATCCGGACAACGTGACGATCTTCCAGCAGATCGTCGACGGCAAGGCCGACCTGATGATGACCGACGCCATCGAGGCCCGCCTGCAGTCGCGTCTGCACCCGGAACTCTGCGCCGTGCATCCGCAGCAACCCTTCGACTTCGCCGAGAAGGCCTACCTGCTGCCGCGCGACGAGGCCTTCAAGCGCTACGTCGACCAGTGGCTGCACATCGCCGAGCAGAGCGGCTTGTTGCGCCAGCGCATGGAGCACTGGCTCGAATACCGCTGGCCCACCGCGCACGGCAAGTAATACAGGGCCGGCGAGGGTGGCCGCGGGCCCGCGCGGCCTTCCTTGGCGGCGGCAAAAACGTTATGGTCGGCGCCCCATCCTGGTGCCTGGTCCATGCGTTATCTACTGTTCGTCACCGTCCTCTGGGCGTTCTCCTTCAACCTGATCGGCGAGTACCTCGCCGGCCAGGTCGACAGCTACTTCGCCGTGCTTACCCGGGTCCTTCTCGCTGGCCTGGTGTTTCTCCCGCTGACCCGCTGGCGCGGCGTCGAACCGCGTTTCGTCGGCGGGGTGATGCTGGTCGGCGCGCTGCAGTTCGGCATCACCTACGTCTGCCTGTACCTGAGCTTCAACGTGCTGACGGTGCCCGAGGTGCTGCTGTTCACCGTGCTGACGCCGGTCCACGTGGCCCTGTTCGACGACCTGCTCAACCGCCGCTTCAACTTCTGGGCCCTGGCCGCCGCGCTGGTGGCGGTGCTGGGCGCGGCGATCATCCGCTACGACGGGATCACCGGCGAGTTCCTCCAGGGCTTCCTGCTGCTGCAACTGGCCAACGCCACCTTCGCCGCCGGCCAGGTGCTGTACAAGCGCCTGGTGCGCAAGTACCCGTCCGAGCTGCCGCAGCGCCAGCGCTTCGGTTATTTCTTCGTTGGCGCGCTGCTGGTGGCGTTGCCTGCCTGGCTGCTGTTCGGCGATCCGCAGCGCCTGCCGGCCGGCGAGCTGCAATGGGGCGTACTGGTATGGATGGGGCTGCTGGCCACCGCCCTCGGCCAGTTCTGGTGGAACAAGGGCGCCACCGAGGTGGACGCCGGTACCCTGGCGGTGATGAACAACCTGCACGTGCCGGTCGGGTTGCTGCTCAACCTGCTGATCTGGAACCAGCACGCCGACCTGCCGCGCCTGGCCCTGGGCGGCGCGGTGATCGTCGCTTCGCTGTGGGTCAACCGGCTCGGCCGGCGCGAGGTGCGTGCATGAGGATTTCCGGACAGGGCGTGCTGCTGTCGCTGGCCGCCTCGGTGCTGTTCGTTACCCTGCCGGGCTACGTCCACCTGCTGGAGCCGCTGGACAGCCTGCAGGTGGTGGCGCATCGGGTGGTCTGGTCGATCCCGATGGTATTCCTGCTGGTCGTCGCCACCCGCCAGTGGCCGACCCTGCGCGCTGCCTGGCGCCGGCTGTTCGCCGAGCCCTGGCTGCTGGCCTGCTTCCCGCTGACCGCGGCGATGATGCTGCTGCAATGGGGCATCTTCATCTGGGCGCCGTTGGCCGGGAAAACCCTTGAACTGTCGCTGGGCTACTTCCTCCTGCCGCTGGCGATGGTGCTGGTGGGGCGGGTGTTCTACGGCGAGCGCCTGACGCCGCTGCAGGCTATCGCCGTGGCCTGCGCGCTGGCCGGGGTGCTCCACGAGTTCTGGCTGACCCGCGCGTTCTCCTGGGTCTCCCTGGTCACCGCGCTGGGCTATCCCCCGTACTTCATGCTGCGGCGCAGGATGGGCGTGGACGCGCTGTCCGGGTTCGTCTTCGAGATGCTCTTCCTGCTGCCGCTGGCGTTGGCCGCGCTGTACTGGCTGGGCGACGAGAGCCAGGCCTTCCGCGAGGCGCCGCGCCTGTGGCTGCTGCTGCCGATGCTGGGGCTGATCAGCGCGCTGGCCTTCGGCGCGATGATGGCTTCCAGCCGGCTGCTGCCGATGGGGCTGTTCGGGATTCTCAGCTACGTCGAACCGGTGCTGCTGTTCCTGGTGGCGGTACTGTTCCTCGGCGAGGCGTTCCGTCCCGAGCAGCTATGGACCTACGCGCCGATCTGGCTGGCGGTGCTGTTGACCGGCTGGGACAGCGCGCGCCTGCTGAGGAAACAGGCGCGGCGGGGCATCTGAGCGAGCCGCAGGCGCCGGAATCAGCGCTTGCGGGTGCTTTCGACCTGGGCGAGGGTGGTCGGCAGGATGCGCTTGCCGGCCAGGTAGTGGCGTTTCCAGTAGCTGCTGTTGAGGTCGGAGACGCGGACCTTCTTGCCGCGCCGCGGCGCGTGCACGAAGCGGTCGTTGCCGACGTAGATGCCGACGTGGTCGACGCTGCGGCTGCGGATCCGGAAGAACACCAGGTCGCCCGGTTGCAGCTTGCCGCGGGAGACCTTGTTGTTGTCCATGTTGTAGATGGCGCGGGCGGTCCGCGGCAGGTCGACGTCATCGACGTCCTGGAACACATAGTTGACCAGGCCGCTGCAATCGAAGCCTTTCTTCGGCGTGGTGCCGCCCCAGCGGTAGGGGGTGCCGATCATGCTGAAGGCGCGGTCGGTGACTTCGGAAGCGGCGGCCGCGACCACCGAGCGGTTGGCGGACTGGCGGGTGCTGACGACCCGGCTGGGCGACAGGGTGGAATTCTTCGACGGCTTGAAGGTGTGGGTGACCTGGTTGGAAGCCAGGCTGGGCGTCGAGAGGGCGGCCAGAAGGGCCGCGAAGCCTACAGACAGGCATGTCAAAAAGGGTGAACGCATTTCGGCATGTCTTCGCGTGCGTGAATAACGGTCCAGGCTTCCGCTGGTTCGTGGTTACTCTCTGTTCATCGGAGTGTCTTCTGGGTGACCGGAACGAGCCCTCCTGACTCTTCCGACCAGGCCGTCAAACGAAACGGCCAACTGCTTCCGGGCCTGGATGCATTCAGGTTGTCCCGGCGATAACGGGCTTCACGAAGGAAAATCGGGGACTCATCGGCGTTCCCAGCGTATTCCCCTACGAAAGCGGCGGCTATTGTGACCTATTTTGACGCCGGCCTTCTGACCATTCGTCGAATGATCGGTTCCGGATGTGACGCGCCGGTTTCGCCGTGCAAGTGGCGATACTTGCGGGTCCGGCAATGACCTGTTGTTCGCCGTGGCCTTGGAAAGCCTCTGCGCCGGGGGTATGTTCGTGTTCCCCGTACCCGAGTGAAGCCCGTAGAAGCGAGTCCAGATATGACCGAAACAGCCAAGCGTCCCTTGTACGTTCCCCATGCCGGCCCATCGCTGCTGGAGATGCCGCTGCTGAACAAAGGCAGCGCGTTCAGTACCCAGGAACGCATCGATTTCAACCTGCAGGGCCTGCTGCCGCACAACATCGAGACCATCGAGGAGCAGACCGAGCGCGCCTACAGCCAGTACAACCTGTGCAACACCGATCTGGACCGCCACATCTTCCTGCGCTCGATCCAGGACAACAACGAGACCCTGTTCTTCCGCCTGCTCGAGGAGCACCTGGAAGAAATGATGCCGATCATCTACACCCCCACGGTCGGCCAGGCCTGCCAGGAGTTCTCGAAGATCTACCGGACCCACCGCGGCCTGTTCATCTCCTACCCGGACCGCGAGCGGATCGACGACATCCTGCGCAGCGCCACCAAGAACAACGTGAAGATCGTGGTGGTCACCGACAGCGAGCGGATCCTCGGCCTGGGCGACCAGGGCATCGGCGGGATGGGCATCCCGATCGGCAAGCTGTCCCTGTACACCGCCTGCGGCGGTATCAGCCCGGCCTACACCCTGCCGGTGGTGCTGGACGTAGGCACCAACAACCCGGACCTGCTCAACGACCCGATGTACATGGGCTGGCGCCACGAGCGGGTGAGCGGGGCGCAGTACGAGGAGTTCGTCGACCTGTTCATCCAGGCGATCAAGCGCCGCTGGCCCAACGTCCTGCTGCAATTCGAGGACTTCGCCCAGACCAATGCCATGCCGTTGCTGGAGCGCTACAAGGACGAGCTGTGCTGCTTCAACGACGACATCCAGGGCACCGCCGCGGTGGCCGTGGGCACCCTGCTGGCGGCTTGCAAGGCCAAGGGCGAGAAGCTCAGCGAGCAGACCGTGACCTTCGTCGGCGCCGGCTCCGCCGGTTGCGGCATCGCCGAACAGATCATCGCCGCCATGCAACTGGAGGGCCTGGACGAGGCCCAGGCGCGTCGGCGCATCTTCATGGTCGACCGCTGGGGCCTGCTCACCGACGACATGAGCAACCTGCTCGACTTCCAGCACCGCCTGGCGCAGAAACGCGCCGATCTCGGTGCCTGGGGCGGCCAGCAGGGCGACGACCTGGCGTTGCTGGAAGTGATCCGCAATGCCCGGCCGACGGTGCTGATCGGCGTCTCCGGGCAGCGCGGGCTGTTTTCCGAAGAGGTCATCCGTGAGCTGCACAGCCATTGCAAGCAGCCGCTGGTGATGCCGCTGTCCAACCCGACCTCGCGGGTCGAGGCGACTCCGCAGGAAATCCTCAACTGGACCGACGGCCAGGCGCTGGTCGCCACCGGTAGCCCGTTCCAACCGGTGCAGGTGGGCGACAAGCGGATTCCCATCGCCCAGTGCAACAACGCCTATATCTTCCCCGGCATCGGCCTGGGGGTGATCGCCGCGCGGGCCAACCGGGTCACCGAAGGCATGCTGATGGCCGCGGCCAACGCCCTGGCCAACTGCTCGCCGATCGTTACCCAGGGCGAGGGCGCCGTGTTGCCGGCGCTGGGCGACATCCGCGAGGTCAGCAAGCGCATCGCCGTCGCCGTGGCCAAGCAGGCCCAGGCCGAGGGCAAGGCCCTGCATACCAGCGACGAGGTACTGAACGACGCCATCGAGGCGAACTTCTGGTTCCCGCGCTACCGTGCCTACCGCCGGACTTCGTTCTGAGGCTCAGCCGCAGGCTGCGCACGGGGGAGAGGCGTTAACCGCCGATATTCCGCGCGATCGGTGCGAGAACGCGAAAAGGCCACGAGGTGGCCTTTTTCGTTGACGGTAGCGCTAGCGCTAGCGCACATGATCGGGGCGCGTCCGGTGTTCGGCCTGGGTTTCCGGGTTTTCCGCCGCGCGCCGCGCCAGGGCCGGCAGGACATGATCGCGCAGCAGCGGCGCGAGATTGTCCGGCTGCGCTTGGCCGGGGACCAGCCAGGCCAGTTCCTCCAGCTCCGCCTGCGCACAGACGGCGTGGGGCAAGCGGGCGACGTAGATATCGGCATCGACGCGGGTGTTCGCCTCGTTGGCCGCCGGGGCCTGGAAGCTGCCGAGGTGCTCGAAGGTCGAGGCGCCCATCGGCAGGCGCAGTTCCTCCAGCAGTTCGCGCTGTAGCGCGGCGAGGGGCGTTTCGCCCGGCTCGCGCTTGCCGCCCGGGAGCATGAAGGCCTGGGTGCCGCGCTTGCGCACCAGCAACAGGTTGCCCTGGTCGTCGAACAGGCAGGCGGCGGAAATGCTCAGCAGGTTGTCGGTCATTGCGGGTCGTCCTCGGCGATCACCTGGTAGCGCATCTGTACGATGCCGCTGTTGTAGCCATGTTGTTCCAGCAGTTGCAGGCGCCGCTCGCGGCCGCCGGCGAACAGCGGGATGCCGGCGCCGAGCAGCTGCGGAATGACGCTGACGATCACCTCGTCGAGCAGCCCGGCCGCCAGGCAACTACCGGCGAGGCTGCCGCCGCCGGCCAGCCAGACCCGCCGGCAGCCCTGTTCGCCGAGGCGTGCGAGGCCTTCCTGGGGCGTGCCATGGCGCAACTCGACGCCTTCCACCGCGCTCTCCCGGGGATTGCGGGTGAGCACCTGGCAGGGCTTGCCCGGATACGGCCAGTCGCCGAAGCCGCGCACGATATCGTAGGTGCCGCGGCCCATCAGCAGCCCGTCGATGCCCTGGTAGAAGCCGTTGTAGCCATGGTCGTTGCCGCCCTCGGCGAAACGGTCGAGCCAGTCGACGCTACCGTCGGGACGGGCGATGAAGCCGTCCAGGCTGGTGGCGACGTAGTAGATCAGGTGCGGTTTCATGGACGGCGCTCCCCTCGCTTCAGACAGTGGCTTCGTCGCTGCGGCTCGGTGCGGCCAACGCCTCGAACGAACGGGCCTCGGCGTCACGCCCTTCGAGCATGCCCTGCAGTCCCTGGCGCAGGTCCTGGCCGCTGGGTTGCTGGTAGGCGCCGAGGCCGAACTCCGGCATCACCGCCAGCAGGTAGTCGAACACGTCGCCCTGGATGCGCTCGTAGTCGACCCAGGCGGTGGTGCGGGTGAAGCAGTATATCTCCAGCGGGATGCCCTGGGCGGTCGGCTGCAACTGGCGGACCATGCAGGTCATGTGCGGATGGATGTCCGGATGGTTCTGCAGGTAGGCGAGGGCGTAGGCGCGGAACGTACCGATGTTGGTCAGCCGGCGGCGGTTGGCGGCCAGCGGGGCGACCCCGCCGTTGGCGGCATTCCACTCCTGCAACTCGGTCTGCTTGCGGGCCATGTATTCGGTGAGCAGGCGCACCTGGCTCAGGCGCCGCTCTTCCTTCTCGTCGAGGAAACGCACCTGGCTGGAGTCGACGTAGATCGCCCGCTTGATCCGCCGTCCGCCGGACTGCTGCATGCCGCGCCAGTTCTTGAACGACTCCGACATCAGGCGCCAGGTCGGCACCGAGACGATGGTCTTGTCGAAGTTCTGCACCTTCACGGTGTACAGGGTGATGTCCACCACGTCGCCGTCGGCGCCGACCTGCGGCATCTCGATCCAGTCGCCGACTCGCAGCATGTCGTTGCTGGTGAGCTGCACGCTGGCGACGAAGGACATCAGGGTGTCCTTGTAGACCAGCAGGAGGACCGCCGACATCGCGCCGAGGCCGGACAGCAGCAGGAGCGGCGAGCGGTCGATGATGATCGAGACGATGATGATCGCGCCGAACACGAACACGGCGATCTTCGCCAGTTGCACGTAGCCCTTGATCGAGCGGGTGCGGGCGTGCTCGGTACGCGCGTAGATGTCCAGCAGGGCATTCAGCAGCGCGGTGAGGGCGAGGATCAGGAACAGCACGGTGAAGGCCAGTGCCAGGTTGCCGAGGAAGTGCGCGGCCTTGTCCGGCATCTCCGGGATCCATGGCAGGCCGAATTGCACCAGCAGCGAGGGCGTGGTCTGCGCCAGGCGCTGGAACACCTTGTTGCGGATCAGGTCGCCGAGCCAGTGCAGCGCGGGCTGGCGGGCGAGCAGGTTGGCGGCGTAGAGGACGATGTAGCGGGCCACCCGGCCGAGGATCAGGGCGCCCACCAGCAGCAGGCCGAAGGCCAGGCCGGCGTGCAGGATAGGGTGTTGGTCGAGCGCCGACCAGTACCCGGAGAGCTGGTCGAGAAGGGATGTGGAGTCCATGGACAGGCGACTTCCTTTGTTGCGGGCGGGGGGGACGGGCCGGCCCCGCGGTTGAACGGACATCCCGACGCGGGGGCCGGGAACGCACAAAGGAACCAAGAGTGCCCCATCGGGCGCCGGATCGCATCCCCGACGTGTCTACAAAGGTTTTCCGCCGACGCGCGGGCTCAGCCGGCAAGCTCCTCCAGGGTGCGCCCGCGGGTCTCGATGCCGAAGGCCCAGACCACCAGGGCGGCGACGCCGAAACAGAGGGCGCCGAGGGTGAACACCCCGCCCTGGCCGGTGAGCGGCAGGACCAGCCCGGTGACCAGCGGGCCGAGCAGCGAGCCGATCCGCCCGACCGCCGAGGCGAAGCCCGAGCCGGTGGCGCGCGCGGAGGTCGGATAGAGTTCCGGGGTATAGGTGTAGAGCACCGCCCACATGCCGAACAGGAAGAACTGCATGGCCAGGCCGAAGCCGATCAGCAGTGCCAGGCTGCCGCCGAACACCGCGGTCTGGCCGTAGGCGTAGGCCATCGCGCCGCCGCCGAGGAGCATCAGCACGCAGCTCGGCTTGCGGCCCCAGCTTTCCACCAGCCAGGCCGCGCAGAGGAAGCCGGGTATCCCGGCCAGCGAAATCAGCACCGTGTAGTACACCGACTGGGTGACGGCGAAGCCGGATTGCTGGAGCAGCGTGCTGAGCCAGGAGGTCAGGCCGTAGAAACCGAGCAGGGCGAAGAACCACAGGCCCCAGACGGTCAGGGTGCGGCGCCGGTAGGCTGGCGACCAGAGTTCGGCGAAGGCGCTGAAGAAGCCCGGACGGCTGCGTTCCCGCTGCGGCTGGCGCAGCGGCGGCGGCAGTTCGGACAGGCCCAGCGAACGCATCACCCGTGTTTCTATGTCGCGCAATACCCGGTCCGCCTGTTCGCGCCGGCCGGCCTGCTCCAGCCAGCGCGGCGACTCGGGAATCAGGAAGCGGATGGCGAGGACGAACACCGCCGGCAGCGCCAGCACCAGGAAAATGCTGCGCCAGCCGGTCAGCGGCAGCAGGAAGTAGGACAGGCAGCCGGCAGCGACGAAGCCCAGCGGCCAGAAGCCGTCCATCAGCGCAATGTACTTGCCGCGGCGGCTGGCCGGGATCATCTCCGACAGCAGCGACTGGGCGATGGGAAACTCCATCCCCATGCCGATCCCCAGCAATACCCGGTAGAAGGTCAGGCTGTCGAGGTCGCCGGCGGTGGAGCACAGGTAGCTGGCCAGGCCCCAGAGCACGATGCTGGCCTGGAATACCGGCTTGCGCCCGAAGCGGTCGGCGAGCATGCCCGACAGGGCCGCGCCGATCACCATGCCGAAGAAGCTCGAGCTGGCCAGCAGCCCGGCCTGCGCCGAGTCGAGCCCGAACTCGGCTTTGATCGAGCCGAGCAGGAAGGTCATCATCGCCAGGTCCATCGAGTCGAAGAAGAACGCCAGCGCGATGATCACGAAGACCAGTCGGTGATACGGACTCAGCGGCAGCCGTTCGAGGCGTTCGGCGGCGGATGCGGGCGCGTCGGGCATGACCTGGTTCCTCTGGGCGGCGGGCGTCGAACGAGTCTGCGATCGCCGCCCGCGCCGTGCTTGCCTGGCGGCGACCCGGCGTCGAGCCGATTGCGACGCCGGCGCTGGCCGTCGCGCCGCGCGAATAAAGCCTGCGCGCCGGCGCGGCAACCGTTAGGCTATGCACCTGTCCATTTTCACGTGCAATGCCGAGGTTTCCCTTGTTCAGCCAATTTCCCCTCCACGAACGCCTGCTGAAGGCACTGGAAAGCCTGTCCTTCAGCGAGCCGACGCCGGTCCAGGCCGCGGCCATTCCCAAGGCGCTGGAGGGGCACGACCTGCGGGTCACGGCGCAGACCGGCAGCGGCAAGACCGCGGCCTTCCTGCTGCCGCTGCTGCACCGCCTGCTGGCCGAGGACAAGCCGCGCTCCCTGGCGCGCGCGCTGATCCTCCTGCCGACCCGCGAGCTGGCCCAGCAGACCCTCAAGGAAGTCGAGCGCTTCGCCCAGTTCACCTTCATCAAGGCCTGCCTGATCACTGGCGGCGAGGACTTCAAGGTGCAGGGCGCGCGCCTGCGCAAGAACCCGGAGATCATCATCGGCACCCCCGGCCGCCTGCTCGAGCAGCGCAACGCCGGCAACCTGCCGTTGCAGGACATCGAGGTGCTGGTGCTGGACGAGGCCGACCGCATGCTCGACATGGGCTTCGCCGACGACGTGCTGGCCCTGGCCAATGCCTGCCCGGCCGAGCGCCAGACCCTGCTGTTCTCCGCCACCCACAGCGGCGCCGGGTTGAACAAGGTGATCGCCGAGGTGCTGCGCGAGCCGCAAGTGCTGCGCCTGAACCAGGTCGGCGAGCTGAACGAGAATGTCCGCCAGCAGGTCATCACCGCCGACGACGTGGCGCACAAGGAACAGTTGCTGCAATGGCTGCTGAGCAACGAGACCTACACCAAGGCCATCGTCTTCACCAATACCCGGGTCTCCGCCGATCGCCTGACCGGGCGGCTGATCGCCAACCAGCACAAGGTCTTCGTCCTGCATGGCGAGAAGGACCAGAAGGACCGCAAGCTGGCCATCGAGCGCCTGAAGCAGGGCGCGGTGAAGATCCTCGTCGCCACCGACGTCGCCGCACGCGGCCTGGACGTCGAAGGCCTGGACCTGGTGATCAACTTCGACATGCCGCGCAGCGGCGACGAGTACGTGCACCGCATCGGCCGGACCGGTCGCGCCGGCGCCGAGGGCCTGGCGATCTCGCTGATCTGCCATGGCGACTGGAACCTGATGTCGAGCGTCGAACGCTACCTCAAGCAGAACTTCGAACGACGCAACATCAAGGAACTGAAGGCCGCCTACCAGGGGCCGAAGAAACTCAAGGCGTCGGGCAAGGCCGCCGGCAGCAAGAAGAAAAAGCAGGACAGGAAGGGCGCCGCCGCCAAGCCCGCGGCCAAGCGCAAGCCCGCCGCGCGGCCGAAGGCCGGGCCGTCGGCGGTGGTCAGCGCCGACGGCATGGCGCCGCTCAAGCGCAAGAAGCCGACGGCGGAGTGATCGCGCGCCGCCGACCGCTTCGGAGGGCGGATAACCGTTTGCGGTTATCCGCCGCGCCGGCGTTGGATGGCGGATAACGCCGCTGGCGCTGTCCGCCCCGGGGGACTGCCTGGCCCCGTTCTCAGCTCCCCGCCGTCGCGCCCGCCTCGTCGTTGGCCGGCGGCGTGGCACTCTTCACCAGCGGCGTATCGTAGCTGCTCGCCTCGTAGCTCACGCAATGGCGGATCTGCGGCGGACCGTCGCCGGCATGCAGGGCGGTGACGTTGTCGATGACGATCTTGTCGGCCAGGGTCATGCGGTCGAGCATGCGCAGGTGCTGGATCCAGTTGTCCACCAGGAACAGTTCCTGCCAGACCTCCGGGTTGCTCACGTCGCGGAACAGCGACCAGCGCTCCGCGCCGTTGCGCAGGCGCAGGCGGCGCAGCGGCTGGGCGGCGCGAACGAAGTCGCGGGTGCGCTCGGCGGGAATCCGGTATTCGATGGAGACCAGCACCATGCCCCGCCGCGTGTTGAAGACGAAGCTCGGCTGCCCGGGCATGCTCGCCGGCGCGCGGGAAATACTGGCGGCGTCCATTTCCGGCAGGCGCGAGTTGTACAGGAGGATCACCGAGGCCAGCAGCAGGCAGCCGGCGGCCAGCAGCGCGCCGTGCACGGTCATGGTCTCGGCGAGATGGCCCCAGAGGAACGAACCCAGCGCCAGGCCACCGTACAGCGCGGTCTGGTACAGCGCCAGGGCGCGCGCCTTGATCCAGTCGGGGACGAGGATCTGCACCGCCGAGTTGTAGGTGGCGAGGGCGCCGATCCAGCAGCCGCCACCGAGGATCAGCACCGGGAACAGGACCCAGAGGTTGTCCACCAGGCCGAGGGTCAGCAGGATCAGCGCCAGGGTGAAGCCGGCCAGGCTGATCAGCCGGCTGCTGCCGATCCGCTGGCGCAGACGCGAGACCTGGGTGCTGCCGAGGATCGCGCCGAGGCCGAGGGCGCCGAGCATGTAGCCGTAGATCGCCGCGTCGCCGTCCGGGTTGCGGTGCGCCAGCAGCGGCAGCAGGGCCCAGACCGCGCTGGCGGAGAGGCCGAAGGCGAAGGAGCGCATCATCACCAGGCGGGTGACGGTTGAGTACTGGGTGAAGCGCAGCGCCGCGGTGACGCCTTCGAGGATGCCTTCCGGCGGCAGGCTGCGCTTGGGCACGTCGCGCCGCCACTGCCAGATCGCCCAGATCAGGGCCATGTAGCAGAAGCTGTTGAACAGGAACACCCAGGCCGGTCCCACCGCGCTCAGCAGCAGGCCGCCGAGGGCCGGGCCGGCGGCGCGGGCGACGTTGTAGTTGACGCTGTTGAGCAGCACCGCGTCGCTGACCATGCGCGCCGGCACCTGCTCGTTGACCGCCGCCTGCCAGGCCGGGATGGTCACCGAGCCGCCGAGGGAGATCCAGAGGATGGAGATGATCAGCAGCACCGGGTCGAGGTAGCCGAGGAAGGCCAGCAGGGTGGCGAACATCGCTCCGGTCATCTCGAAGCTGAGCCCCCAGAGCATGATCTTGCGCCGGTCGTGGTTGTCGGCGATGACCCCGGAGAGGATCGACAGCAGCACCAGCGGCAGTGCCGCGGCGACCTGGATCATCGCCACCATCAGCGGGCTGGCGTGGGCGTCGGTGACCACCCAGGCGGCGGCCACCGACTGGGCCCAGGTGCCGAGGTTGGCGAACAGGTTGCAGATCCAGATGATGCGGAAGGCCTGGATCGAGAACGGCGCCCAGGTGCCGGTGCGTTCTGGCTTGGCCGCTTGGCCTTCGGGTTTGAGGGGCAGGTCGTGCTTGGGCGAAACGGACTGGAGCATCAGATGGGTGCCTTGTATCAGCGTGCCTGATGTCGGGAAAGTCTAGCCGCCTGGCGCCTGGGCGCCATTGATGGCGGTCATGGGGCCGGCCCGGCGTGGATAGACATTGGTCCGATGGCGCTATCGGAGAGTTCCGATGTCCAGGCAGGACAGTGCCCTGGGTGTCCCGCATTGGTAGGCTTGCCGGCACCGAGTCGAGACAAGAGCGAAAAGCCGATGAAGTTGCTGTTGCGTTGCCTGCTGCTGGGGGCGTGGGTCGTCTCGCCGAGCCTGTGGGCCTGGTCCAACCATACGGTGGGCAGCTACCTGGCATTGCGCGAGCTGGCGGCCATCCGCGAGGCGCCGGAAGTCGAGGTGGAGCCGCTGGAGGCGTTCCTCGCCGCCGAGCGCGGCGGACTGGCCGCCCTGCTGGACGAGCAGGAAGCCTACGCCCGAGCGCATATCGGCAACTACCCGGCGCGTCCGGACGCCCTGCGCTTCGCTGCCGAGGGCGAGGCCGGCGACTTGCGTCAGGCGTTCCTCGCGGCGCTGCGGGTCAACCCGGAGATCCGCCTGGCGCTGGCGCTCCAGCCGCTTCCCGGGCAGGACCAGCCGCAGCGCCCGCACCTCAAGCCACAGGAGGTGCTGGTGTTCCAGAACCTCTCGCCGTGGACCGCCTGGCGTTTCATCCGCCTGGAGCCCGGCGAGCGAGTCGCGCCCCTGGCAGTGCTGGCCACCGCCGCCGACGAACCGGACTACGGCCACGACATCAACCTGTTCAGCGACAACCCCGGCGAAGCCGGCCAGCGCTACGGCTTCGGCACCCAGCCATTCGGCGACCCGCGCTTCGAGTTCAGCTCCCAGGCGCCGTTCCACATGGGCTTCTATCACGAGGCGGCGGTGATCTACAGCGGCGCGCCGTTCCTCGCCCGGGCCTGGCCGGAATGGCGTGCCTACCAGTACTTCGGCCTGTCCCGCTTCGCCTTCGCCAACGGTCATCCCTACTGGGGCTACCGGTTCCTTGGCTGGGGCATGCACTACATCCAGGACATCACCCAGCCCTACCACTCCACGCCGCTGCCCGGTGCCAGCCTGGCGGCGATGCTACAGATGGAGGGCAAGGCATTGCTCGGCTACCCGGAGGAAAAGCAGGCGGCCATCGAGCGTGTCGCGAACCGCCATACCGCGGTGGAGAAATACCAGTTCGACTGGCTGCGCCAACTGCTCCGCGACGGCCGCCCGCAGCCGATGCTCGACGCCTACGCCGACACCCGCCGCGACGGTGCCTATCCGGCCTATTCGCCGACCTACTTGCGCGAGGTGGTGGCCGCCGAGTCGAACGCCCACGCCGCGGCCTTCGACGCCGCCATCGGCGAATGGCTGGCCGCCCGCCCGGCCTCCGCCGCGCAGGACTTCAGCGAGAGCAACCAGCCACGCCCCGAGGCCCACGACAATGCGGGGCTGAACGCGCAACTGATCGAGCTGATCGGGCATTTCGGCGCGCATAGCCGGAACATCGTCAGGGCGGCGTTGGAGACGGAGGAGGGTGGGGCGAAGGAGTGAGGGGGCGGCGGATAAGCGGCGGATAACCGCGAGCGGTTATTCGCCCTACGGAGCGGGGGCACTGTCCGGCGTTTCCGGGGCGTGTAGGGCGAATAACGCCATGGGCGTTATCCGCCGATGTCGCGGATAAGCGGCGGATAACCGCGAGCGGTTATTCGCCCTACGGAGCGGGGACGCTGTCCGGCGTTTCCGGGGTGTGCAGGGCGAATAACGCCATGGGCGATATCCGCCGATGTCGCGGATGAGCGGCGGATGACCGCGAGCGGTTATTCGCCCTACGGAGCTGGGGCGCAGTCCGGCTTTTTCGGGGTGTGTAGGGCGAATAACGCCATGGGCGTTATCCGCCGATGTCGCGGATGAGCGGCGGATGACCGCGAGCGGCTATTCGCCCTGCGGAGCAGGGGCGCTGTCCGGCGTTTCCGGGGTGTGCAGGGCGGCGAACCCGCCCCGCCAGGCGGGTCGATGCCGCGACTCAGTCGCGCAACAGCTCCGCAGGCACCTCGCTGCGCAACATCAACTGGCACTGCTGGCTCTCCGGGTCGAACAGGATCACCGCCTCGCCCCGCCGCAAGGCATGCCGGGCGCGTTCTACACGCACGTCCAGCGGGGTCTCGTCGCCATTGTCGGTGCCTTCGCGGGTGACGAAGTCTTCCAGCAGGTTGTTCAGGGTGTCGGCTTCGAGCAGGTCGTGGGGGATCAGCATTGGCGGTACTCCGGGATGGCGGCGAATGCTAGCGCGACGCATCGCGCACTGTCATGCCTGTCGCGAAGGCTGGCCGAGCAGGCGTTCGAGGGTGGCATAGAGTTCCGCCCGGTCCACCGGCTTGCCGAGGTAGGCGTCCATGCCAGCCTCGATTCCGGCCCGGCGGTGTTCGTCGAGGATGTGCGCGGTCAGGGCGACGATCGGTACGCGTGGCCAGCCCTGGGCACGCTCCTCCCGGCGGATCAGGCGGGTCGCCTCGAAGCCGTCCATTTCCGGCATCTCGCCATCCATCAGGATCAACTGGATGCCGTTCGGATCGCGCAGGTACTCGTCCAGCGCGAGCCGTCCGTTGCCGGCCAGGCGCACCGCGTAGCCGCGCTTGGCGAGGAAGCCGCGCACCACCAGCTGGTTCACCGGGTTGTCCTCGGCGACCAGGATGCATGGGGCGTCCAGGCGTTCGTCCGGGGCCGCCTCGCTGCTCCGCCCTGGCTGTCGGCGCCGCTCCCGGTACAGTTCCAGCAGGGCTTCGCGCAGGGCCTTCACCGCCACCGGCTGGGCCAGGGCCAGCAGGCGCAGGCCTTCGTGCGGCGGCAGGTCCTGGCAGTGCTGCGGCGGGCATAGCAGGAGGATGCGCTGGCCTTGCTCCAGTTGCGCATAGAGGGTGTCCAGCCAGATCGACGGCGGCCCCGGCCAGGGCGCCGCCAGGACCAGCAGGGGCGGTGCGCTGAAGTCCTCCAGGTAGGCGTTCAGCCGTCTCGGCTGCAGGCAGCGTTCGACCCGCAGCCCCCAACGCTCCAGCAATGCCTGCAGGCAGTCCAGGGTCAGGTTGTCCTCGCTGGCCAGCAGGGCCGGGCGGTGTTGCAGCAGTTGCGCCAGTTCATCCGCTTCGCCGGCGTCGAGGGCGGGGCTGAGCGGCAGGTCGACGCTGAACCGGGTGCCCTTGCCCGGCTCGCTGCTCACCTCGATGCGCCCGCCCATCATCTGCACCAGCTCCCGGCTGATCGCCAGGCCCAGGCCGCTGCCGCCGTAGCGGCGGGTGGTGCTGGAGTCGGCCTGGGAGAAGGATTCGAACAGGGTCTTCTGCGCCTGGGCGGATATCCCGATGCCGCTGTCGCTGACGCTGTACAGCAGGCGCTCGTGTCCGCCCTCGTCGAAGCGCCGCTGCACGCGCACGGCGACATGGCCTTCGGCGGTGAACTTGAGGGCATTGCTGAGCAGGTTCATCAGTACCTGGCGCAACCGCGTCGGGTCGCCGTTGAGGCGGCGCGGCACGCCCCTGTCCAGGCCGAGATGCAGGCGCAGGCGTTTTTCCACGGCCTGGGCGCTGAACAGCGCCAGGGTGTCGGAGAGCAGTTCCTCCAGGTCGAAGTCGATGCGCTCCAGGTGCAGCTTGCCGGACTCGATGCGAGCGTAGTCGAGGATATCGTTGATCACCGACATCAGCGCGCTGCCGGAGCTGGCGATGGTCTCCACGTAGGCCGCCTGGCCGCGGTCCAGCGGGGTGTCGCGGAGCAGTTGCAGCATGCCCAGCACGCCGTTCAGCGGGGTGCGAATCTCATGGCTCATCTTCGCCAGGAAGCGGCTCTTCGCCTCGTTCTCCACCCGCGCCTGCTCGGCCGCCTGGCGCGAGCGGAAGCCTTCCTCCTTGAGCGCATTGATGCGGTCGGCGAGGCCGATGGACAGGGTCACCAGTTCCACGGTCATGCCGATCTTGACCACGCTGCTGCCGAACAGGCCGAACAGTTCGAAGCCCAGCGAGGCGGCGGTGGTGACCAGGAACGACAGCAGCAGCGCGCCCCAGGCCAGGATGTAGTAGAGGCCGTAGCGCAGGCCCTGGCGCCAGACGTGGACGCCGGCCAGCAGCAGGCTGAGGGAGACCAGCATCACCGTCAGGCTGGCGAGGACGTTCCAGGCGCGCAGCCCCACCAGCGGCTCGCTGGCCAACAGGACCACGCAGGCCAGCAGCAGGCCGCGGAGGAAGCGGTCGAGGCGCGGGAAGTCGCGGCGGGTGTAGAGGTAGCCGCGGCTGAACTGGATCGACACCAGGCAGCTCAGGTACATCAGCAGGTAGATGCCGGCCGACTCCAGCGCCACGTGTCCGGGCAGCAGCTTGAACAGCAGGCCGTCGAAGCTGGCGGAAAACAGGCCGAGGCTGAGGTTGTACAGCAGGTACCAGGCATAGGTGGCCTCGCGTAGCGAGACGAAGAGGAACAGGTTGTAGCAGAACAACCCGAACAGCACCCCGTAGAAGGCCCCGTTGAAGCCCATCAGGGTTTCCTGGCTGGCCGCGCTGGCCGCGTAGGTGCTGAAGTACAGCGGCACGTAGACGGTGCTGGTGCTCTGCACGCGCAGCAGCAGGGTGCTCTCGCCGGGCGGCAGTCGCAGCGGGAACCAGAAGTTGCGCACTTGCACCGGGCGCTGGGAGAAGGCGAACAGGTCGCCGCTCTCCTGCTGTTCGATGCGCCCATCGGCGGTCAGCAGGTAGACCTTGAGGTCGTCCAGCAGCGGGTAGTTGATCTCCAGGAAACCGGCCAGGTCGGTGCCGTTGCGGTTGTCCAGGCGCACCCTGAACCACCAGGCTGCGGCGTTCTTGCCGAAGTTCGCGTGGTCGCCGCGCAGGGCGGCGAAGGCCTGCGCCGGCAAGGCGAGGATGTCGCCCAGACGCGCCTTGCCGCCGGCGTCGCGGTAGTACTCGGCGTACTCGCCCAGGGACAGGCGCAGGTCCTCTCCGTCCAGCGGCGCGGGCGGCTGCGCCCGGGCCGGCAGCGCGCAGCAGAGCAACAGCAGGACCAGGCAGAGTCGAGACATGCAACTACTCCATGGACACGAGTCGGCACGCATGATGCTACGGAGGAAACGAGACAGGCGAGGGAACCTGCCATCAGCCTAGCAGTTGGCTGGAGAGGAGGGCGGCCTCCCCGGAGGCGGGGAGGCCGGCGCGGGCTCAGCCCTTGTCGCTGTTGTTGCTGCTGTCGAGCACTTCGTCCACCGGCGGGACGTGGGTGCAGCTTTCCATGTGCACCGGATGCTCGAGCTGCTTGTTGAAGCGTTCCAGCGAGGTTTCCCGGGGCTTGGCGTCGCTGGCGAAGACCGGCGGGCTGAGGATGTAGGCGGTGAGCAGCCGGCTCAGCGCGGCGAGACTGTCGATGTGCGTGCGCTCGTAGCCGTGGGTGGCGTCGCAGCCGAACGCCAGCAGCGCGGCGCGGGTGTCGTGGCCGGCGGCGATCGCCGACTGCGCGTCGCTGTGGTAGTAGCGGAACAGGTCGCGGCGCAGGGCGATGTCGTGGCGCTCGCCGAGCCGCAGCAGGTGCCGCGACAGATGGAAGTCGTAGGGCCCGCCGGAGTCCTGCATGGCCACGCTGACGCTGTGCTCGTTGGAATTCTGGCCTTCGGCCACCGGGGCGATGTCGATGCCGACGAACTCGCTGACGTCCCAGGGCAGGGCGCCGGCGGCGCCGGAGCCGACTTCCTCGGTGATGGTGAACAGCGGGTGGCAGTCGATCGGCGGCTCCTGGCCGCTTTCCTTGACCGCCTTCAGCGCCGCCAGCAGCGCCGCGACCCCGGCCTTGTCGTCCAGGTGGCGGGCGCTGATATGGCCGCTCTCGGTGAACTCCGGCAGCGGATCGAAGGCCACGTAGTCACCCACCGAGATGCCCAGGCTCTCGCTGTCGGCGCGACTGGCGGTATAGGCGTCGAGACGCAGCTCGATGTTGTCCCAACTGATCTTCAGGTTGTCCACCTCGGTATTGAAGGCGTGGCCCGAAGCCAGCAGCGGCAGCACGCTGCCACGGAACACGCCCTGTTCGGTGAACACCGTGACCCGGCTACCCTCGGCGAAGCGGCTCGACCAGCAGCCTACCGGGGCCAGGCCGAGGCGGCCGTTGGGCTTGATCTCGCGGACCAGCGCGCCGATCGTGTCGAGGTGCGCGGAAACCGCCCGGTCGGGGGTATTCAGGCGGCCCTGGAGGGTCGCCCGGATGGTTCCCCGGCGAGTCATCTCGAAGGGCACGCCAAGTTCGTCGAGCTGCTCGGCGACATAGCGCACGATGGTATCGGTGAAGCCGGTGGGGCTGGGGATGGCGAGCATCTCCAGCAGCACCCGTTGCAGGTAGTTGAGGTCCGGTTGTGGCAGAGGGGTCATGAAGGCACCTCGCGGCTGAGGGGGAAGAGAAGATCGACGAAGCGTTCGGCGGTCGGCTGCGGCTCGTGGTTGGCCAGGCCGGCGCGCTCGTTGGCTTCGATGATCACGTAGTCGGGCTGGTCGGCGGCCTCGACCAGGAAGTCCAGGCCCACCACCGGTATTTCCAGGGCCCGCGCGGCCTGGATTGCGGCCTCGCGGAGGGCCGGATGGAGGGCGTCGGTGACGTCCTCGAGGATGCCGCCGGTGTGCAGGTTGGCGGTGCGCCGCACGGCCAGGTGTTCGCCGCTGGGCAACACGTCGTCGTAGCCATAGCCGGCGGCCCGCAGGGTGCGCTCGGTCTCGTGGTCGAGCGGGATGCGGCTTTCCCCGCCGGTGGCGGCCTGGCGCCGGCGGCTCTGCGCCTCGATCAGCTCGCGGATGCTGTGCCGGCCGTCGCCGATCACCGCGGCGGGACGACGGATGGCGGCGGCCACCACCTCGTAGCCAATCACCAGCACGCGCAGGTCGTGGCCGGCGTGGTAGCTCTCCAGCAGCACGCGCGAATCGAAATGGCGGGCGGCCTCGATGGCCTCCTCGACCTCTTCCGCGGTGCGCAGGTCCACCGCCACGCCCTGGCCCTGCTCGCCGTCCACCGGCTTGACCACCAGCGAACCGTGCTCGGCGAGGAACGCGGCGTTTTCCTCCGCGCTGCCGGCCAGGCGCTGCTGTGGCTGGCGCAGGCCGGCGTGCTGCAGGGCGCGGTGGGTGAGGACCTTGTCCTGGCAGAGGGTCATGCTTACCGCCGTGGTCAGGTCGGAGAGCGATTCACGGCAGCGGATCTGCCGGCCGCCCTGGGTCAGGGTGAACAGGCCGCCCTCGGCGTCGTCGACGCGCACCTCGATGCCGCGCCGGTGGGCTTCGTCGACGATGATCCGGGCGTAGGGGTTGAGGTCTTCCTCCGGGCCGGGGCCGAGGAACAGCGACTGGTTGATGCCGTTCTTGCGCTTGATGGTGAAGGTCGCCAGTTCGCGGAAGCCGAGCTTGCGGTAGAGCGCCTTGGCCTGCTGGTTGTTGTGCAGCACCGAGAGGTCGAGGTAAGCCAGTTCGCGGCTCATGAAGTGCTCGACCAGGTGCCGTACCAGCGCCTCGCCGACGCCCGGCCGGCTGCATTGCGGGTCCACCGCCAGGCACCAGAGGCTGGAGCCGCCCTCCGGATCCTGGAACGCCTTGGCGTGGTTGAGGCCCATCACCGTGCCTACCGCGACGCCAGTGTCGGCGTCCTCGGCCAGCCAGTAGATCGGCCCGCCCTGGTGGCGCGGGCTGAGGCGCTCGGTCTGCACCGGAAGCATGCCGCGCGCCAGGTACAGGCGGTTGATCGCCGCCCAGTCCTCCTCGGTATGCACCCGGCGGATGCGGAAACCGCGTGGCGGACGGCGCGCCGGGCGGTAGTCGGTGAACCACAGGCGCAGGGTATCCGAGGGATCGAGGAAGAGCTGCTGCGGGGCGTGGGCCAGGACCTGCTGCGGCGCCGCCACGTAGAGCGCGATGTCGCGTTCGCCGGGGGCTTCCTGCAACAGGTCGCGGGCCAGTTCCTCGGCGTCCGGGTAGGTCTGCCCGATCAGCAGGCGGCCCCAGCCGCAGTGGATGCTCAGCGGTTTCTCGGTGGGCTCGCTGTGATCTTCGGCGAGACGCGCCTGGAGGCGTTCGTAGGTGGGCGCCTGGATACGTTGCAGGCGCTGGTTGTGCGGGTGAGGCAGATGCAAGTGGCCTTTCATGATCGCTCCTGGTTCGGGCCCCTGCGTTGGGGGCCCATTTTCAAAGTCCCTGTTCGCTGAGCCAGAGGTTGAGGGCGGCCAGCTGCCACAGCTTGGAGCCGCGCAGCGGTGTGAGGTCGCCGTCCGGATCGCTCAGCAGGCGATCGAATATCGCCGGCTGGAACAGGCCGCGGTCCTGGCTCGGGTCGAGCAGCAGTTCGCGTACCCATTCGCGGGTGCGGCCCTGCAAGTGCTTGAGGCCGGGCACCGGGAAGTAGCCCTTGGGCCGGTCGATGACTTCGCTGGGAATCACCTTGCGCGCGGCACCCTTGAGCACCTGCTTGCCGCCGTCGCCGAGCTTGAAGCGGGCGGGAATGCGCGCCGACAGCTCGGCCAGGCGGTAGTCGAGGAACGGCACGCGGGCTTCCAGGCCCCAGGCCATGGTCATGTTGTCGACCCGCTTGACCGGGTCGTCGACCAGCATGATCGTGCTATCCAGGCGCAGCGCCTTGTCCACCGCGGCCTCGGCGCCGGGGCTGGCGAAGTGGTCGCGGACGAAGCGCCCGGCGACGTCCTCGACGCGGAAGCGCTCGCCGACGGTGGCCAGGTATTCCTCGTGGTCGCGGTCGAAGAACGCCGCGCGGTACGCGGCGAAGGCGTCGTCGGCGCCATCCACCTTCGGATACCAGTGGTAGCCGGCGAACAGCTCGTCGGCGCCCTGGCCGCTCTGCACCACCTTGCAGTGCTTCGAGACTTCCCGCGCGAGCAGGTAGAAGGCGATGCAGTCGTGGCTGACCATCGGCTCGCTCATGGCGCGGAAGGCCGCCGGCAACTGCTCGATCACTTCGTGCTCGCCGATGCGCAGGCGATGGTGACGGGTGTGATAGCGCTCGGCGATCAGGTCGGAATACTGGAACTCGTCGCCGCGCTCGCCGCCGGCATCCTCGAAGCCGATGGAGAAGGTCAGCAGGTTGTCCACCCCGGCTTCGTGCAGCAGGCCCACCAGCAGGCTCGAATCGACCCCGCCGGAGAGCAGCACGCCGACCTCGCGGGCGGCGCGCTGGCGGATCGCCACCGCTTCGCGGAGGCCGTCGAGGACGCGCTCCTGCCAGTCGTCGAGCGTCAGCTCGCGCTCGTCCGGGCGTGGGCCGTAGTCCAGCGTCCACCAGGTGCGCTGCTCGCAGCTGCCGTCGAGGTCGACGCTCATCCAGGTCGCCGGCGGCAGCTTCTTCACGCCCTCCAGCAAGGTGTGCGGGGCGGGAACCACGGCGTGGAAGTTGAGGTAGAAATTCAGCGCCTGCGGATCGAGGTCGCTGGCGATGTCGCCGCCCTTGAGCAGGGCCGGCAGGCTCGAGGCGAAGCGTAGGCGCGAGCGGTCGAGGCTGTAGTAGAGCGGCTTGATGCCGAGTCGGTCACGGGCCAGGAACAGCCGCTGGCGATCGCGCTCCCAGACGGCGAAGGCGAACATCCCATTGAGCCGGGGCAATAAATCGGCGCCCCAGGCATGGTACCCCTTCAAGAGAACTTCGGTGTCGCCGCCGGAGAAGAACCGATAGCCAAGGCTTTCCAGTTCGCCACGCAGTTCGGGGTAGTTGTAGATGGCGCCATTGAACACCATGGCCAGGCCGAGATCGGGATCGATCATCGGTTGGCCGGAAGCTTCCGCCAGGTCCATGATCTTCAGCCGTCGATGTCCGAGGGCGATCGGACCCTGGGCGTGGAAGCCCCAGGCGTCGGGGCCGCGAGGGGCCAGGTGGTGGGTGATGCGTTCGACCGCTGCGAGGTCGGCTGCCTGGTTGTCGAAACGAAACTCTCCTGATATGCCGCACATATTCCTTACCGGTTCTCCGTTGGGGAAGTGGGTTGCGTACCCCTTCCGGGTACATGACTTACCTAACAGAGGACCGTTACAGTTGGAAGGAGTTCTATACGAATTTTCCGGGCCTATCAGGACCCTTAACGAAGAATTTATTATGGTTGAAAGTAATAAGACCGCTGCCGATACCTATTCGCTCGCTATTTTCCGCGCCATCCGCCGTCTCCAGCAGGCCGCCGAAATCCACTCCAAGCGTCTCAGCCGATACGGTGGCCTGACCCCGCTGCAATTGCTGATCCTGCATGTCCTGGCGGTGGAGGGAGAACTGACCGCGACGCAACTGGCCAAGCTGGTCAGCCTGTCCCAGGCATCGCTCTCCGGCGTGCTCGACCGCCTCGAAGGACGCGGCCTGCTCTATCGTCGGCGCGACGAACAGGACCGGCGCAAGTCCTGGCTGCACCTCGACCCGGCCGGCCACGAAGCCCTGGCGGAGGCGCCGCCGCTATTGCCGGAGTACGTGATCGAGCGCTTCGCCGCGTTGCCCGAGTGGGAGCGCCACGGCCTGCTGGCCGCGCTGCTGCGCGCGGCCGACCTGTTCGGCTTGCCGGAAGAGGACGTCGAAGAAGAGTGAGGGCGCCAGGTTCGCTACGAAAGACAGTCGCGTCGGCGGTCTTTACCTGGCGAACCTGCCGATGCCTGGTGGACAAGCGCGTTGTACGCCATGGCGTGACCTTCGCGTAGGGCGCATAACGCCGCTTCCCCGGTGCCCCGGCCGCCGAGGAAAGCTACCCGCCGTAGAGCTTCTGCGCCGCCAGGGAGAAGCTCACCGCCACCAGCATCAGCGCGAACAGGCGTTGCAGGGTCGGGCCGCCGAGGCGCTGCGCCAGCAGGTTGCCGCCGAGCACGCCGACCGCGCCGCCGGCGGCGAGGCCGCCGAGCAGCGGCAGCGGCGGGTGGGCGCCGGTCAGGTAGATCAGGAAGCCGCCGCCGGAGACCAGCGCGATCACCGCCATCGAGGTGGCGGTGGCGGCCATCATCGACAGCGGGGTGAACCAGAGTAGCCCCGGCACCACCAGGAAGCCGCCGCCCACGCCCATCAATCCCGACAGCAAGCCGACCGCCAGGCCGATGCCGAGCAGCGGCAGGCTCCGTGCCTGGTCGCTGGCCTCGCGTTTCATCCCGGCGCCGCGCCACATGCGCCAGGCCGACCAGAGCACCAGCAGGCAGAAGGCGACGATCAGCACGCCTTCCGGGACGAAGCGGCCGAGCCATTGGCCGACGGCGTTGCTCGGCAGGCCGGCCAGCGCCAGGACCAGCACCGGCCGCCAGGCCACCTGGCCCTGGCGCGCGCGCGGAATGGCGCCAATCAGCGCCGACAGGGCGACCGCGCCGAGGCTCACGCCGATGGCGTCGCGCAGCGGCAGGTGCAGGCTGAGCAGCAGGGGCAGGGCGACCAGCGAGCCGCCGGCGCCAGTGAGGCCGAGCAGCAGGCCGAGAACGGCGCCGATGCCCAGGGATTCGAAGAGCAGTACATCCATCGTCGATCAACTGTTTCCGCGGGTGAGCCAGCCGGTGGTCCGGTATGGCCTGCGCGTGCGGCAGGCCCGAAGGGTAGCGCAAATCGGTGCGCCCTGGGCTTCAGCCGCGCTGCCCGCGCACCAGGTCGCGGAGCAGGAAGCGGTTCGGGTGGCAGGCCTCGGCCACCGCGCGCGGCAGCGGCAGCGGCTCGCCGCAGATCCACGCCGCCAGCAGTTCGCCGGACAGCGGCGCACTGATCAGCCCGCGCGAGCCGTGGCCGCTGTTCAGGTACAGGCCTGGCAGCCAGGGGCAGGCACGCTCCGGCGCCTGGCGGGCGTCGCGGGCGAGCACCGCGTAGGCCTCGTCGAAGGCCGCGCGCTCGGCCAGCGGGCCTACCAGGGGCAGGTAGTCGGGGCTGGTGCAGCGGAATGCCGCGCGGCCTTCCAGGCGCTCCAGCGGCAGGTCGTCGGCGCCCAGGCGCTGCAACAGGTCGGGGGATATTTCCCGCAGCAGTTCGAGGTTGCCCTGGTGTTCGGCGAGGGTCGGCGCCAGGTCTTCGCTCTTGAAGTCGAAGCTGGCGCCGAGGGTGTGTTCGTCGCCACGCGGCGGTGCGACGTAGCCCTCGGCGCAGACCACGGTGCGCAGCGCCCGGCTTTGCGGGGTGGCCGGCAGGCGGGTGACCTGCCCGCGAATGCGCTTCAGCGGCAGCTCGGCGGCGGGCGGGAAGTCGCGGATGTCGGCGGCGGTGGCGAGGATCGCGAGTGGCGCGCGGGCCAGGCATTCGTCGCCGGCGTAGGCGCACCAGTCGTCGCCCTCGCGGCGCAAGCGTACTGCCCGGCCGCTCAGCAGGGTGATCCCGGGGGTGGCGGCCAGGGCCTGGCACAGCGCGGGCGGATGGACCCAGCCGGCTTCGGGATAGAACAGGCCGCCGGCGGGCAGGGCGACGCCGGCGAGGCGCTCGGCCTGCTCGCGCTCCAGGCCGTGCAGCAGGTCGGCGGGGAAGGCTGCGGCCAGTTGCGCCTGGCGTTGCGCTTCCTTGGCGTCGAAGGCCAGTTGCAGTACGCCGCAGGCATCCCAGTCGTGGCCGCGCCGCAGGCGTTCGAGCAGCCGGCGGGTATGGCCGAAGCCGCTGAGCACCAGGCGCGACAGCGGCGTGCCGTGGGCCGAGAGTTTCAGGTAGAGCACGCCTTGGGGGTTGCCCGAGGCTTCCCGGGCGAGGCCGGGATGGCGTTCGATCAGGGTCACCTGCCAGCCGCGCGCGGCGAGGCTGGCGGCGCTGGCGCAGCCGGCCAGGCCGCCGCCGACCACCAGTGCCGCGCGGCGCCCGGCGTGGGGCGCGGGGCGTGCGTACCAGGGCTTGCCGGCGTTCGCCGGCGGCCCCTGGTAGGTGCCGCTGAGCATCTCGCGCTTCTGTCCGTAGCCCGGTACCCGTTGCATGGCGAAACCGGCCTCGACCAGCCCGCGGCGAACGAAACCGGCGCTGGTGAAGGTGCCCAGGGTCGCCTGCGGCGCCGACAGTCGCGCCAGTTCGGCGAACAGTACGGGCGACCACATGTCCGGGTTCTTCGCCGGGGCGAAGCCATCGAGGAACCAGGCGTCGACCCGCGCGTCGAGCTGCGGCAGGCACTCCAGGGCATCGCCCAGCAGCAGGGTCAGGCCGACCCTTCCGCCCGCGAACGCCAGGCGCTGGAAGCCCGGGTGCACGGCAAGGTACTGGCCCAGCAGGGCCTCGCTCCAGGGCGCCAGTTCCGGCCATAGCGCCAGGGCGCGGCGCAGGTCGGCGGCGGCGAGGGGGAATTTCTCCACGCTGACGAACTCCAGGCGCGCGCCGGCCGGCGCGACCCGCTCGAACAGCTGCCAGGCGCAGAGGAAGTTTAGTCCGGTGCCGAAGCCGGTCTCGCCGATACACAGCACCTCGCCGTCGCCCAGCGCGGCGAAGCGCTCGGCCAGGCGGTTGGTGGCGAGGAACACATGGCGCGTCTCGTTGAGCCCGGAATGCCGGGAAAAATATACGTCGCCGAAGGCGCGGGATAGCGGCTGGCCGTTTTCGTCCCAGTCGAGCTGGGCATGCTGGAAGTCGGACATGGCAGAACCTGAAAAAGCGGATGGCGCATTTTACGGGGCTTTGCCGGCCAGCGGCGAGGCGCTGCGCGAACATGCCGCGGCGCCGCGCAGACAGCCGCACGCCGATCCGCTACCTTGAAGGACTCAGGTTAAGGGAGACCTCCATGTTCGAATCCGCGGAAGTTGGCCACAGCATCGACAAGGACACCTACGAGAAGGCCGTCATCGAGTTGCGCGAAGCGCTGCTCGAGGCGCAGTTCGAGCTCAAGCAGCAGGCGCGCTTCCCGGTGATCATCCTGATCAACGGCATCGAGGGCGCCGGCAAGGGGGAGACGGTCAAGCTCCTCAACGAGTGGATGGACCCGCGCCTGATCGAAGTGCAGAGCTTCCTCCGTCCCTCCGACGAGGAACTGGAGCGGCCGCCGCAGTGGCGCTTCTGGCGGCGCCTGCCGCCCAAGGGGCGGACCGGTATCTTCTTCGGCAACTGGTACAGCCAGATGCTCTACGCGCGGGTCGAGGGGCATATCAAGGAGGCCAAGCTGGATCAGGCCATCGATGCCGCCGAACGCTTCGAGCGCATGCTCTGCGACGAAGGCGCGCTGCTCTTCAAGTTCTGGTTCCACCTCTCCAAGAAACAGCTCAAGGAACGCCTCAAGGCGCTGGAAAAGGACCCGCAGCACAGCTGGAAGCTCAGTCCGCTGGACTGGAAGCAGAGCGAGGTCTACGACCGCTTCGTGCATTACGGCGAGCGCGTGCTGCGCCGCACCAGCCGGGACTACGCGCCCTGGTACGTGGTGGAAGGCGTGGACGAGCGCTACCGCGCCCTGACCGTCGGTCGCATCCTCCTCGAAGGGTTGCAGGCCGCGCTGGCCACCAAGGAGCGCGCCAAGCGCCAGCCGCACGCCGCGCCGCTGGTGTCGAGCCTGGACAACCGCGGCCTGCTGGACTCCCTGGACCTGGGCCAGTACCTGGACAAGGACGCCTACAAGGAGCAGCTCGCCGCCGAGCAGGCCCGCCTGGCCGGGCTGATCCGCGACAAGCGCTTCCGCCAGCATTCGCTGGTCGCGGTGTTCGAGGGCAACGACGCGGCCGGCAAGGGCGGCGCCATCCGCCGTGTCACCGACGCGCTGGACCCGCGCCAGTACCATATCGTGCCGATCGCCGCGCCGACCGAAGAGGAGCGTGCGCAGCCCTATCTCTGGCGCTTCTGGCGGCACATTCCGGCGCGTCGCCAGTTCACCATCTTCGACCGTTCCTGGTACGGCCGCGTGCTGGTGGAGCGCATCGAGGGCTTCTGCGCACCGGCCGACTGGCTACGCGCCTATGGCGAGATCAATGACTTCGAGGAGCAGCTCAGCGAGTACGGGATCATCGTGGTGAAGTTCTGGCTGGCGATCGACAAGCAGACCCAGATGGAGCGCTTCAAGGAACGCGAGAAAACCCCCTACAAGCGCTACAAGATCACCGAGGAAGACTGGCGCAACCGCGACAAGTGGGACCAGTACGTGGACGCGGTGGGCGATATGGTCGACCGTACCAGCACCGAGATCGCGCCCTGGACCCTGGTCGAAGCCAACGACAAGCGCTTCGCCCGGGTCAAGGTGCTGCGCACCATCAACGACGCCATCGAGGCGGCGTACAAGAAGGACAAGTGAGGCCTGCCGGAATGCGGCGCCGTCCTCTCCAGGGGATGGCGACGCATACGCCGGATGAATGATCGGTGCCTCGCGCGATTGCCTCGCCTTATCCTCGCTCGATCCGGCCGCTGCCCCGGCGGCCGACCGACAATAACAGCGAGGGTACCCCCATGCGTGAAGTGGTGATCGTCGACAGCGTCCGGACCGGCCTGGCCAAGTCCTTCCGCGGCAAGTTCAACCTGACCCGGCCGGACGACATGGCCGCCCACTGCGTCGACGCGCTGCTGGCGCGCAACGACCTCGACCCGCTGCTGGTGGATGACTGCATCGTTGGCGCCGGCTCCAACGAAGGCGCCCAGGGCCACAACATCGGGCGCAACGTGGCGGTGCTCTCCGGCCTCGGCATCCAGGTGCCGGGGATGACCCTCAACCGCTACTGTTCCTCCGGCCTGCAGGCGATCGCCATCGCCGCCAACCAGATCGCTTCCGGTTGCAGCGAGGTGATCGTCGCCGGCGGCGTCGAGTCGATCACCCTGACCCTGAAGAGCGTCAACACCGACCACCTGGTGAACCCGCTGCTGCAAAGGGAGGTGCCGGGCATCTACTACCCCATGGGGCAGACCGCCGAGATCGTCGCCCGTCGCTACGGCATCACCCGCGAGGCCCAGGACGCCTACGCCCTGCAGAGCCAGCAGCGGATGGCGCGAGCCCAGGCGGACGGGCTGTTCGCCGACGAGATCGTGCCGATGACCACCCGCTACGCGGTGGAGGACAAGGCCAGCGGCGAGAAGCAGGTGCTCGACGGAGTGGTCGACCGCGACGACTGCAACCGCCCGGACACCACCCTCGAAGGCCTGGCCTCGCTGAAGCCGGCGTTCGCCGAGGACGGTTCGGTCACCGCCGGCAACGCCTCGCAACTCTCCGACGGCGCCTCGATGACCCTGCTGATGAGCCTGGAGAAAGCGCTGGCGCTGGGCCTGGAGCCGAAGGCCTTCTTCCGTGGCTTCACCGTGGCCGGCTGCGAGCCGGACGAAATGGGCATCGGTCCGGTGTTCTCGGTGCCGAAGCTGCTCAAGGCGAAGGGGCTGAAGATCGCCGACGTCGATCTCTGGGAACTCAACGAAGCCTTCGCCTCGCAGTGCCTGTACTGCCGCGACCGGCTGGAGATCGACAACGAGAAGTACAACGTCAATGGCGGCTCCATCGCCATCGGCCACCCGTTCGGCATGACCGGCTCGCGCCAGGTCGGTCATCTGGTCCGCGAACTGCACCGGCGCAACCTGCGCTACGGCGTGGTGACCATGTGCGTCGGCGGCGGCATGGGCGCCAGCGGGCTGTTCGAAGCCGTACGCTGAGCCCGTGCGCGACGGCGGGCCCCGCAGGTCGGGGCTCGCCTCATGGGTTTTATTTACATGACGCTCCGTGCCATCCCCGGCGACTCGCGCTAGGCTAGGCGCCTTTTTCCTTGCATAGGGATGTAGCGATGTCCACCGAGCCCAACTCGCCGTTCGTCGACGACCCGCTGAGCGCCGTCGATGCACGGGTTCTCGGCAGCCTCGTCGAGAAGCAGGCGACCACCCCGGAAACCTATCCGCTGACCCTCAATGCCCTGGTCCTCGCCTGCAACCAGAAGACCAGCCGCGATCCGGTGATGAACCTCACGCCCGGCCAGGTCGGCCAGAGCCTGCGCCAGCTCGAGGGGCGCGGCCTGGTCAGGCTGGTGATGGGCAGCCGCGCCGACCGCTGGGAGCACACCCTGGGCAAGGGCCTGGAATTGGTGGCGCCACAGGTGGCGCTGCTCGGCCTGCTGTTCCTGCGTGGTCCGCAGACCCTCAACGAACTGCTCACCCGCAGCAACCGCCTGCACGATTTCGACGACGTCGAGCAGATCCGCCACCACCTGGAACGCCTCGCCGGCCGTGGCCTGGCCGTGCACCTGGAGCGTCGCGCCGGGCAGCGCGAGGAGCGCTACATGCACCTGCTCGGCAGCCAGGCCGACCTCGAGGCCGCCGTGGAGGCGATGGGCAGCGACCCGGAACGCGCCGCGCCGGCCGCTCTGTCGGCCGATGCGGAAGCGCGGATCGCCGAGCTGGAGACGCGCCTGGCGGCGCTGGAGGAGCGCCTGGCCAGGCTCGAAGGAGGGGCGTGATATCGCGGGAGGGCGCAATCGGTGACATGCAGGTTCCGGCGGATAACCGCGAGCGGTTGTTCGCCCTACGGCCCGCAGTGACTTTGTGGGCGGCGCTGCCTGTCGGGTAGGGCGGATAACGCCAGCGGCGTTATCCGCCGATGCGCCGCCCCGCAAGGCGAACGCCAACCCAAAAAAGCCCGGCTATTGCCGGGCTTTTTCATGCGCGCGCGTCGGTGGCCGATCAGGCCTCCGGCTGTACCGGCACGTGGATCAGTTGCAGGCGCTCGCTGCTCCAGGCACGGGTGCTGTTGGTCAGCTCGATGTCGTTGTTCAGCTTGCGCCCGTAAGACGGCACGATTTCCTTCAGGCGTGCCTGCCACTCGGGAGTAGCGACCTTGTCCTTGAAGGCCTTCTCCAGCACCGACAGCATGATCGGCGCGGCGGTCGAGGCGCCCGGCGAGGCGCCGAGCAGGGCGGCGACGGAGCCGTCGGCGGCGGTCACCACTTCGGTGCCGAATTGCAGTACGCCGCCTTTCTCGGCGTCCTTCTTGATGATCTGCACGCGCTGGCCGGCCTGCACCAGCTTCCAGTCCTCGTCGCGGGCTTCCGGGAAGTACTCGCGCAGCGAGGCCATGCGGTCGTCCTGGCTGAGCATCAACTGGCCGATCAGGTACTGGCTGAGATCGAAGTTGTCGATGCCGGCGTTGAACATCGGGCCGATGTTGCCGCTGGTCACCGAGCCCGGCAGGTCCCACAGCGAGCCGTTCTTCAGGAACTTGGTGGAGAAGGTGGCGAACGGACCGAACAGCAGAACCGGCTTGCCGTCGATCATGCGGGTGTCGAGGTGCGGCACCGACATCGGCGGCGAACCCACCGAAGCCTTGCCGTAGACCTTGGCCAGGTGGCGCTTGACCACGTCCGGGTTGGTGGTGGCGAGGAACGAACCGCCCACCGGGAAGCCGGCGTAGCCTTCGGCCTCGGGGATGCCGGACATCTGCAGCAGCTTCAGCGCGCCGCCGCCGGCGCCGATGAAGACGAACCTGGCCTTGACGCTGGTTTCCTTGTCGCCATTGGCCAGGTCGGCCATGGTCACGGTCCAGGTGTTGTCGTCGTTGCGCTTGAGGTCGCGGACCTCATGTTGCAGGCGCAGCTTGAAGGTGTCCTTGGCCGACAGCGAGCCCACCAGTTGGCGAGTGATCTCGCCGAAGTTGACGTCGGTGCCGATCGACATGCGCGTGGCGGCGATCTTCTGCCCCGGCTCGCGGCCTTCCATCACCAGCGGCACCCACTGCTTGATCTGCTCGGGGTCTTCCGAGTACTCCATGCCGCGGAACAGCGAGCTGTGCTGCAGGGCGGCGTGGCGCTTCTTGAGGAAGGCGACGTTGTCGTCGCCCCAGACGAAGCTCATGTGCGGGACGTTGTTGATGAACGACTTCGGATCGTTCAGCACCTTGCGGTCGACCTGGTAGGCCCAGAACTGCTTGGAGATCTCGAAGTTCTCGTTGATCGCCACCGCCTTGCTGATATCCATCGAGCCGTCCGCGGCCTCGCTGGTGTAGTTCAGTTCGCAGAACGCCGAGTGACCGGTGCCCGCGTTGTTCCAGCCGTTGGAGCTTTCTTCGGCGACCTTGTCCAGGCGCTCGACCATCTCGATGGTCCAGCCCGGTTCCAGTTCGTTCAGATAGGTACCCAGGGTGGCGCTCATGATGCCGCCACCGATCAGGAGCACGTCGACCGGTTTTTCCGATTCGACGCTGTTTTTGGAGCAACCCAGCACGCTGACGCACAAAAGCATCAATAAGATTTTTTTCATGGATTTACCGCTAGAAAACGTGGAGAAGCGTGGATACGCACGACATGGCAGGTATTCACCGGGTGAATGATACTGGAACTCACCCCCGATGTGAGTGCGTTCACATGCGGTGACATTTGCCTCGGCGTTCCGCCGAGAATACCTGCCTTCCCTTTCTTGAACGTTTCAGCCACCGGGAAATAAATTCAGCGCCTGTATCGTTTCAGCGGGAACCCGGCGCGTGGATTTCCGCCGATTGGCGCATCCTGCGGATATACGCGTCGATTTCCCGCTCCGCGTCGAAACGGGTGAAATAGGGACCTTCCAGGGTCCCTTCGCGGGTGGAGAAGAAGTACTGCCCATTCACCGAACTTATGCGATCGCTGCGAAAGCGGGTGGCCGGGGTGGGATCGGTGGAACGTTTCCCCAACATGACGGCGACTCCTCGATAGCGGGCCAAGGTGTTTTCAGTGTAGGCGGGGGTTGGCCGGGCGGCAGGCCGGCGGACGAAAGGAGACGGATTTCCAGGCGAAGGGTTGGGTATTCTATTTTTGGTTATAAGAAAATCTGAATTTATTCTTTTTAACGTCAGCCGGCCTTGGGCATAGTGGGCTCCAACACGAACAAGGAGTGACCCCATGAGCCTCAGACTCGGCGACATCGCCCCCGATTTCGAACAGGACTCCAGCGAAGGGCGCATCCGCCTCCACGAGTGGCTGGGCGACAGCTGGGGCGTGCTGTTCTCCCACCCCGCCGACTTCACCCCGGTGTGCACCACCGAGCTGGGTTTCACCGCCAAGCTCAAGGACCAGTTCGCCCAGCGCGGGGTCAAGGTCCTGGCGCTGTCGGTGGATCCGGTGGAGTCGCACCTGAAGTGGATCGACGATATCAACGAGACCCAGGACACCCGGGTCAATTTCCCGATCATCGCCGACGCCGACCGCAAGGTTTCCGAACTCTACGACCTGATCCACCCGAACGCCAACGACACGCTGACCGTGCGTTCGCTGTTCATCATCGACCCGAACAAGAAGGTGCGCCTGATCATCACCTACCCGGCGAGCACCGGGCGCAATTTCAACGAGATCCTCCGGGTGATCGATTCGCTGCAACTGACCGACGAGCACAAGGTCGCCACGCCAGCCAACTGGGAGGACGGCGACGAGGTGGTAATCGTGCCTTCGCTGAAGGACGAGGAAGAGATCAAGCGGCGTTTCCCCAAAGGCTACCGCGCGGTGAAGCCCTACCTGCGCCTGACGCCCCAGCCCAACCGCTGAGGCCGGATTCGTCCCGCCTGCATGGGCAGGCACGGCAGGCGGGACCTTTTATTCGACGGTGCGTGCCGTCATGCAACGCTGTTTAGCCACGAGCAGGGTTTTTGGCCGGTTTCGACCGGCCCTCTTTTTTATGTGCGCCAGCGCGTCGCCGCGACGCTTTTTATCCGCCATCCATTTCCGAAACCGGCTCTGACTTGCCCGCCAGACGGGGAGAAGGCTCGCTCATCCACCTCCTTCCAGGAGCAGACGCATGATCGCCGCTCTCCGTCGCTGCCGGGCCTGGCTGGCCCTGCTCGCCGTTCTTGCCGTGTTTCCCGCGGCAGAGGCCGTCGCCCCCGCTGAAATCCGTCTCGACTATGCCTACTACGCCCCCACCAGCCTGGCGCTCAGGAAGTTCGGCTGGCTGGAGAAAAGCCTGGACGGCAGCGGCACCCGGGTGCGCTGGGTGTTCAGCCAGGGCAGCAACCGTTCCCTGGAGTACCTCAACGCCGGCAGCGTCGATTTCGCCTCGACCGCCGGCCTCGCCGCGGTACTCGCGCGGGCCAACGGCAGCCCGGTACGTACCGTATACGTCGCCAGCCGCCCGGAGTGGACCGCGCTGCTGGTGCGCAAGGATTCGCCGATCCGCAGCCTCGCCGAGCTCAAGGGGCGCAAGGTGGCTGCCACCAAGGGCACCGATCCCTACCTGTTCCTGCTCCGCAGCCTGCACAGCGTGGGCCTGGACAAGAACGACCTGCGTATCGTCCACCTGCAGCATCCCGACGGCCGGGTAGCCCTGGAGAAGGGCCAGGTCGACGCCTGGGCCGGACTCGATCCACACATGGCGGCCAGCGAGTTGCAGGCCGGCTCGCGGCTGCTCTACCGCAACCTCGGCTTCAACAGCTACGGCGTGCTCAATGTCCGCGAGGATTTCGCCGAACGCCATCCGCAACTGATCCGCCAGGTGCTGGCGGCCTACGAGCAGGCGCGCCACTGGGTGATCGGGCATCCCGACGAGGCCGCGCAACTGCTTGCCGAGGAAGCCGGCCTGCCGCTGGAAGTGGCTCGCCTGCAACTGTCGCGCACCGATTTCAGCCAGCCGCTGCCGGGCGCCGAACAGGTCGCCGCGCTCAAGGCCGCGGCGCCGATCCTGGCGGACGAGCGCCTGGTGCGGCCGGGCGTGGACGTGCAGAAGGTGGTCGACGAACTGATCGCGCCGCAGTGGGCCGCCGAGGTCATCGGCGGCGCCCCGTTGGCACGCACGGAGCCCTGAGCCATGGTCAGCCAGAGCCTGCGACTGGCCCGGCGAGTCTCGCGCCGGGCGTGGCGACTGCGCGTGCCGGGCGCCTGGCGGGCCTGGGCCTTGCCGCTGCTGGCACTATCGTCGTGGGAGGCGCTGGTGCGCCTGGGCTGGCTGCCGGCCTACCAGATGCCGGCGCCGAGCGGCATCCTGCTGACCCTGGTGGAACTGGCTCGCGGCGAGCTGTGGGGGCATGTCGGCGCGAGCCTGGCGCGGGTCGCCGCCGGCTTCGCGATCGGCAGCGGCCTGGCCCTGGTGGTCGGTACCTGGGTCGGTCTCAGTCGTCGCGCCGAAGCCTACCTGGAGCCGAGCTTCCAGGCCCTGCGGGCGATCCCCAGCCTGGCCTGGGTGCCCCTGCTGCTGCTCTGGTTCGGCATCGACGAGACGCCGAAGATCGTCCTCATCGCCCTCGGCGCCTTCTTCCCGGTCTACCTCGCGCTGGTCGCCGGGGTACGCGGGGTCGATCGCAAGTGGGTGGAACTGGGGCGGCTCTACCGTTTGTCGCGCTTCGCCCTGGTCCGCCGCATCCTCCTTCCGGCGGCGCTGCCGAACCTGTTCACCGGCCTGCGCGGGGCGCTCAGCCTGAGCTGGATGTTCCTCGTCGCGGCGGAGCTGATCGCCGCTACCCAGGGCCTCGGCTACCTGCTCAGCGACGGTCGGGAAACCTCGCGTCCGGACTTGGTGATCGCCGCGATCCTGGTCCTGGCGGCCCTCGGCAAGCTCAGCGATGGCCTGCTCCGCTCGCTGGAGCGCCGGGCCCTGCGCTGGCGCGACAGCTTCGACGGGGAGGGTGGCGCATGAGCGGCCTGCTCGACCTGCTGGAGATTCGCAAGGCCTATGGCGATACGCGGGTGCTGGAGGGCGTGGCGCTGTCGCTGGCGCCCGGCGAGGTGGTCAGCCTGCTCGGCCCCAGTGGTTGCGGCAAGAGCACCCTGCTGCGGATCGCCGCCGGACTGGACGATGACTTCCAGGGCACTGTCGAACGCAACCCGATCCTCGGCTTCGGCCCGGACGGCGAGAACGGCCGCAGCGGCGGAATCGGCGTGGTGTTCCAGGAGCCGCGCCTGTTGCCCTGGCTGACGGTGGCGCAGAACGTCGGTTTCGCCGACGGCTGGCTGGAGGACGAGCACTGGGTCGAGCGCCTGCTCGCCGATGTCGGACTGGCCGGCTGCGGAGGATTGCTGCCCAAGCAACTGTCCGGCGGCATGGCGCAGCGCGCGGCGATCGCCCGCGGTCTCTACGGACGGCCGCAGGTGCTGTTGCTCGACGAGCCGTTCAGCGCGGTCGACGCCTTCACCCGCATGCGCCTCCAGGACCTGCTGCAGGACGTGGTGCAGAACTACGAGATCAGCGTCCTGCTGGTCACCCACGATCTCGACGAGGCGTTCTATCTCGCCGACCGCGTGCTGCTGATGGGCGGCCGTCCCGGACACATCCGCCGCGAGTTCCACGTACCGCTGGCGCGTCCCCGCGATCGCCGGGCGGTGGAGCTGGCCTACTTGCGCGGCGAGGCCCTGACCGAAATGCAGCGGGCGCACGTGCTCTGAGGCAAGACCCCGAACCGGGTTTTTATATGCTTTATGGAATAAATAAATGAATAAAAAACATTTATTGGAATATGAGGCCGCCTGTTAAGGTCTATGCAACTTGGTTAGCAAGCCAAACGAATATTCTGTTTTCCGGTTATAAGGAATCCCCCCATGCTCGTCGTTTCCATCGCCGGAAGTCCCAGCGTACGCTCCCGCTCCGGGGTGTTGCTGGAGCGTGCCAGGGACTGGCTGAGCCGTCGCGGCGTCGAGGTGGCCAGCCACCAGGTGCTGGATTTTCCCGCCGAAGACCTGCTGCGTGCCCGCCTCGACAGTCCGCCGGTACTGGCGCTGGCCGAGCAGATCGGCCGCGCCGACGGCCTGCTGGTCGCCACCCCGGTGTACAAGGCCTCGTTTTCCGGCGCGCTGAAGGTCCTGCTCGACCTGTTGCCCGAGCGCGCCCTGGAGCACAAGGTGGTGCTGCCGTTCGCTACCGGTGGCAGCAGTGCGCACATGCTGGCCGTGGACTATGCATTGAAGCCGGTCCTGGCCGCGCTCAAGGCCCAGGAGGTACTGCATGGCGTATTCGCCGTGGACAAGCAGATCGCCTACGCCACCGAGAGCGACCCGGCGCGCCTGGAACCGGTCCTCGAGCAGCGCCTGGAAAACGCCCTGGAAACCTTCCACCTGGCCCTCTCGCGACGGCCGCAGCCGATCGACCCGCAGTTGCTCAACGAGCGCCTGGTGAACGCCCGCTGGAGCATCTGATCCGTCCGCCGCCAGCGCACGCGCGCCGCGGCCCGATTGCACAACGCACACACGACGACCTCACTGTTCCGTCAACGGCGCGGCAGGTTCGGCACCCACAACGACAAAGGAGAGCGCCATGCGCACCATCGCTTTGCGTCGTGGACTGGCGGCCCTGCTGGTGGCGGCCCTGTCCTACGGCGTTCAGGCCGACGAGAAGTCGGCCAACACCCTGCGGATCGGCTACCAGAAATACGGCACCCTGGTCCTGCTCAAGGCCCGCGGCACCCTGGAGAAACGTCTCGCCGAAGACGGCGTGAAGGTGCAATGGACCGAGTTTCCCGGCGGCCCGCAACTGCTCGAGGGACTCAATGTCGGCAGCATCGACTTCGGCGTGACCGGGGAAACCCCGCCGGTCTTCGCCCAGGCCGCCGGCGCCGACCTGCTCTACGTGGCCTACGAACCGCCGGCGCCGACCAGCGAGGCGATCCTCCTGCCGAAGGACTCGCCGATTCGGTCGGTGGCCGAGCTGAAGGGCAGGAAAGTCGCCCTGAACAAGGGCTCCAACGTGCATTACCTGTTGGTCCGCGCCCTCGAGCAGGCCGGCCTGAAGTACAGCGACATCCAACCCGTCTACCTGCCGCCGGCCGACGCCCGCGCCGCCTTCGAGCGTCACAGCGTCGACGCCTGGGTGATCTGGGACCCCTACCAGGCCGCCGCCGAGAAGCAGTTGTCCGCGCGGGTCCTGGTGGATGGTCGCGAGCTGGTCGACAACCACCAGTTCTACCTCGCCACCCGGACTTACGCGCAACGGCATCCGCAGGTGCTCGACAAGCTGGTGGACGAGATCCGCGAGGTCGGTGATTGGTCGCGGGCCAACCCGCAGCAGGTGACCGAGCAGGTCGCGCCGCTGCTCGGGCTGCCCGCCGACATCACCCTTACCGCGGTGAAGCGCCAGGGTTACGGCGCGCAGTTGATCACCCCGGCGGTGGTCGAGGCGCAGCAGAAGATCGCCGACACCTTCACCCAGCTGAAGCTGATTCCCAAACCGCTGAGCATCAAGGACGTGATCTGGACGCCACCGGCCGGCAAGGTCGCCAGCGCACCCTGAGCCCGTACCGCCCACATACCGACACGATCATCGCGCGGCTACGCCACGCCGGCGCGCTCACCCTGGAGTCAATTGCGATGAGCCTCGAGATTTTCTGGTTCCTTCCCACCCACGGCGACGGCCACTACCTGGGCACCACCCAGGGCGCCCGTGCCGTCGACCACGGCTACCTGCAGCAGATCGCCCAGGCCGCCGACC +>8_2#NODE_7_length_39998_cov_63.2738_ID_13 +CTACTTGCTGTAGCAGCAGGACAAAAGGTAATTGATCAAGTTACTAATCATTCACCTTTTATGGAAATGATCATTGTTTGGATTGTGGCGACGACATTTCAACAATTTTTACCATCCCTTTCGACAATGGTTCAAGGAGTTCTAACTGATAAATTAACAGGCTTTATCAATATTAGCTTGATGAAAAAATCAGCAGACTTACAATCGATCAGTATTTTTGATGATAGTAAGTATTTTGATGATTTACAAATGCTCAGAGATGATGCAAGCTGGCGTCCGGTTAATTTAATTGTTTATGGAGTATCAGTCTTACAGTCATTTCTAACGCTAGCTTTCATGCTAATATATTTGGCACGATACAATTGGTGGCTAGCCTTGCTTTTATTAGTAGTAATGGTACCGCAGAGTCTTTCTTACTATCGTATTCAGCAGCAGTCATTTGAAACAATGGTTGAAAGAAGCAAGAATGCGAGATACTTGCACTACTATAGTGGATTGTTGCTTGATCGCAGGGATGCTAAAGAAGTTAGACTTTTCAACATGTTTCCTAAGATCATCGAAAAGTATACAAGCTTATTTGAACAAACGAAAAAAGACGTTAACCAAATTCGTAAAAAGCAACTTGCGACTAGTTCACTGTTTGTTGTATTGACTGTCGGAGTGTTTGGCTATGGCTTTTATTGGTTTACTAATTCAGTAAGAACAGGTGCATTAGAAGTTGGCGTATTATTGATGTTTGTTTCAGTAATTGGCTATATTTCTACTAGCATGGCTCGGGTAGTAGAAGACAGCAGCTTGTTATACGATTCATTATTATGGGTTGAAAAATACTTTAAGTTTCTTGAGTATCAAGATGATTTCAAAAACGGTGGTCAGAACTTTCCTGATGATTTTGATGATATTAATATTAAGAATCTGTCTTTTACATATCCGTTTTCTGATACTGAAATTTTGCACAATGTTAGTTTCTCAGTTAAAAGTGGAGAAAAAGTTGCAATTGTTGGAGAAAATGGGTCAGGAAAATCCACTCTGGTAAAGCTATTAATGCGTTTTTATGACCCGACTAATGGAAAAATTTCTGTTGATAATTATGATTTAAAAGACTTTAATATCTTCGACTTACATAAAAATTTATCAGCTACTTTTCAGGATTTTTCTCGCTTTAAATTAACGCTTAAAGAAAACGTGATTACCGGATATTCATTCAATAAAGGTAGGGTAAATAATGTTCTTAAAGCAGCAGGATTGGGTGATTTGCTAGCTAATGATCATCTTAATCTGAATACGATGCTGGCTAAAGATTTCGAAAATGGAACTGATTTGTCAGGTGGTCAATGGCAAAAAATAGCTTTAGCACGAGACTTATATGCTAATGGCAAGATTGAATTTTTAGATGAACCAACGGCAGCCTTAGATGCTAAGAGCGAATCGGAAATTTATCAACGCTTTTTGAAAGAAAATGATAAAAAGACAATTTTCTTTGTTACTCACCGTTTGTCAGCAGTTAGATTTGCTGATAAGGTATTATTTCTTGACGGTGGAAAAGTTAGTGGATTTGACACGCATACTAATTTATTGCAAACTAATCCAAAATATAAAGAAATGTACGACTTACAGAAAAATGCATATCTGTAAAGTTAAATTTTGATAGATAATAAAATTCTTCATGCAAGCAGGCATGGAGAATTTTTTTGCCTTTTAACAGTTAAGATTTTTCAGTGATTTTTACGTTATAATAGAACATAGGTTTTTGGAAGAAAGGATATTTGGTAACGAATGGCAAAAAAAGATACGACACCAATGATGAAGCAATACTACGAAATTAAGGAACAATATCCCGATGCGTTTTTGTTTTATCGTGTGGGCGATTTCTATGAATTATTTGAAGACGATGCAGTTAAGGGTGCACAAATTCTAGAGTTGACCCTAACTCATCGTTCAAATAAAACTAAAAATCCAATTCCAATGGCAGGAGTTCCTCACCTAGCTGTGGATACTTACGTTAATACTTTGGTGGAAAAGGGATATAAAGTTGCGCTTTGTGAACAACTTGAAGATCCAAAAAAAGCTAAAGGAATGGTAAAGCGTGGAATTATTCAGTTAATTACGCCAGGTACCATGATGCACGAACGTCCTGACCAAGCAAAAGACAGTAACTACTTAACGTCAGTTATTTCAACAAATTCTGGCTTTGGCCTAGCTTACAGCGACTTATCCACTGGGGAAACTTTTTCCACACACTTAACTGACTTTGAAGGTGTCGCAAATGAATTATTATCCCTTCAAACAAGAGAAGTTGTGTATAACGGTCATTTAACAGAAGCAAACAAGGACTTTTTAAAGAAAGCTAATATTACTGTTTCTGAACCGGTTAAAGTAGAGGGCGAACATGCTGAGATTTCCTATGTAACGCAAAATTTAACTGACGATGCAGAAATTAAGGCTACCAAGCAACTAGTAGCTTATTTGCTGTCGACGCAAAAGCGTAGTTTGGCTCACTTACAAGTAGCACAAAGTTATGAGCCTACGCAATACTTGCAAATGTCGCATACTGTGCAAACGAACTTAGAGTTGATTAAATCAGCTAAGACTTCTAAGAAGATGGGATCTTTATTCTGGCTCTTAGATAAAACAAGCACGGCAATGGGAGGCAGACTTCTTAAATCTTGGATTGAACGTCCGCTTTTATCAGTTACTGAAATTACTCGTCGTCAAGAAATGGTTCAAGCGCTTCTTGATGATTACTTCACCAGAGAAAAAGTTATCGACAGCTTAAAGGGTGTGTATGACTTAGAAAGATTAACTGGTCGAATTGCTTTTGGTTCAGTTAATGCTCGTGAAATGCTACAACTAGCGCATTCTTTAGGTGCAATTCCTGATATTTTGAATTCCTTGCTTGAAACAAATAATCCACATCTGCAAAACTTTGCTAAGCAAATTGATCCGTTAAAGGGAATTCATGATTTGATTGTTAACACTATTGTGGATAATCCACCACTTTTAACGACGGAAGGTGGCTTGATTAGGGAAGGAGTTTCAGACCAATTAGACCGCTACCGTGATGCCATGAACAACGGTAAAAAGTGGCTATCTGAAATGGAAAGTCACGAGCGTGAAGTAACTGGAATTAACAACTTAAAGGTTGGCTACAACAAGGTCTTTGGCTACTATATTGAAGTTACGAATTCCAATAAAGATAAGGTGCCAACTGATCGCTACACTAGAAAGCAAACATTAACTAATGCAGAGCGTTATATCACGCCTGATTTAAAGGAACATGAATCCTTAATTCTAGAAGCGGAAGCTAAGTCAACAGGCTTAGAGTATGACTTGTTTGTAAAATTAAGAGAAAATGTTAAAAAGTATATTCCAGCTTTACAAAAATTAGCTAAACAAGTTGCCAGTTTAGACGTTTTGACGAATTTTGCGACAGTTAGTGAACAAAATAATTATGTTCGTCCAGACTTTACGGTTGATAAGCAAGAAATTAATGTAGTTAATGGTCGCCACCCCGTTGTTGAACAAGTGATGACGGCTGGTAGTTATATTCCTAATGACGTGAAGATGGATCAAGATACCGATATTTTCTTAATTACTGGACCTAATATGTCTGGTAAGTCTACTTATATGCGTCAAATGGCCTTAATTGCAATCATGGCGCAGATTGGCTGTTTCGTTCCAGCAGATAGTGCAACGTTGCCAATTTTTGATCAAATCTTTACTCGAATTGGTGCAGCAGATGATTTGATTTCTGGTCAGAGTACTTTTATGGTAGAAATGAGCGAAGCTAATGACGCTTTGCAGCATGCGACAAAACGTTCTTTAGTTTTATTTGATGAAATTGGACGAGGTACTGCTACCTATGATGGGATGGCCTTAGCCGGAGCAATCGTGAAGTATCTTCATGATAAGGTCGGGGCAAAAACGCTCTTTGCGACTCACTACCACGAATTAACTGACTTAGACCAGACTTTAAAGCATTTAAAGAATATTCACGTTGGGGCAACCGAAGAAAATGGCAAGTTGATTTTCTTACATAAGATCCTACCAGGACCAGCAGATCAATCTTATGGTATTCACGTTGCTCAATTGGCAGGCTTACCGCATAAGGTTTTAAGAGAAGCAACTACGATGCTTAAGCGATTAGAGAAGCAAGGAGCAGGAGAGTTGCAGCCAGCTAGCGAGCAACTTGATCTCTTTACTGCTGAAGAAGCGAGTGTACCTGCAATTTCAGATGATGAAAAAGATGTCTTGGATGATATTCAAAATGTATATCTTGCTGACAAAACTCCTTTGCAGGTTATGGAACTTGTAGCTCAATGGCAGCAAGAGTTGAAAGATAAGGATTAATAATCATGAGCAAAATTCATGAACTTTCACCTGAATTAACTAATCAAATTGCTGCTGGTGAAGTTATTGAACGGCCAGCTAGTGTTGTTAAAGAATTATGTGAAAATTCCCTTGATGCCGGTAGCAGCCGCATTAGAATTAACTTTATTGATGCAGGCTTAAAGCAAATTACGGTGCAAGACAATGGGAGCGGAATTGCAAAAGACCAGCTTAATTTAGCTTTTACTCGGCATGCAACAAGTAAGATTGCAACAGAGCGAGATTTATTTAATATTTCTACTTTAGGCTTTCGCGGAGAGGCTTTAGCGTCAATTGCGGCCGTTTCTCATGTGGAAGTAATGACTAGTAGCGATAATTTAGGCGGAGTTCGGGCAATTTTTGTAGGTAGTGAAAAAAAGCTCCAAGAAGATGCTGCTTCACCAAAGGGAACCAAGATTTCAGTTAGTGATTTATTTTTTAATACGCCAGCTAGGCTGAAATATTTACGCTCTGAAAGAACTGAAACTCTAAAGATTGTCGATATTGTTAACCGACTTGCTTTAGGACATCCTGATGTTTCTTTTACTCTTACTAATAATGGTAAGATCTTATTAAAGACCAACGGACGTAATGATTTAAGACAAGATATTGCAAATATTTATGGCCGTCAACTTGCAGAAAAGATGGAGGTTTTGAAAGGAAGCAGTCCTGATTTTAAGATTACCGGCTTACTTTCTGATCCTAATACAACTCGTTCAAGTCGAAACTTTATTTCTTTATTATTGAATGGCCGTTATATAAGAAATTACCGTCTAACTCAGGCGATTATAGCTGGATATGGCAGTAAATTAAGACCACGCCGTTATCCAATTGCTGTTGTAAACATTGAGCTAGATCCACTTTTAGTGGATGTTAACGTCCATCCAACTAAACAAGAAGTGCGCTTGTCAAAGGAGCAGGAATTAGAGCGCCTGTTAACTACAAGTATTTCAGAAGCCTTAGATCAGAATAGTCAGATTGATTCAGGTTTGAATAATTTATTAACTCCTAAAAAGTCGACAAATATTGACCAATTAAAATTTAACTTAAATAAAGATGTGGTTGATACTGTTCGTCCAGTCGAATTTACTCCGCAAGTTGAAGCTGATAAAAGTGCTGAAGTTCATGAAACAGCGGCAAGTTTTGTTAGCTTAGATAAGGTTCGTAATGACGATAAATATGTGATTACTGCAACTTGGGATGAAAATGTGGATAAGCAGGTGCAGCTAAGTCCATTTGATGAGGAAAAAGATCTGCAAGGAAAAGATGATAGCATTATTTCTTCAGGGGATGAAATCTTAGCTAATAGCCTACCTGAATTAATTTATTTAGGTCAGACTAAATCATATTTAGTTGCACGTCATGATGAGGACTTATACTTAATTGATCAAGTAGCAGCTTATCGACGCTTAGCTTATGATCAAATTTTTCGTGATTTAAATAGTGAAAATATTTCTCAGCAAGGTTTACTTAGTCCTTTAATTTTAGATTTTAGTAATGTTGACTATTTGAAGTTAAAAGAAAATCTTAATAATTTACAAGAATTAGGGATATTTTTAGAGGATTTTGGTCAAAATAGTCTGATCTTAAGAACCTATCCAATTTGGCTCCAACCTGACGTAGAAAAAAATGTCCGCATGATTTTAGATTTATATTTAAATCAAAATGAACAAGATTTTACCAAGCTTAAAGCACAAATTGCTGGTGAAATAACGCTGCGTCAAAGTACTAGAAGAAGAAATCTAAATCCAGTTGAAGCACAAGAATTATTGAAAAATTTGAGAAATAGTAGTGATCCTTATCAAGATTTTGAGGGAAAAATTATTATTATTCAGTTAAGTGAAAATGATCTCAATAAGATGTTTAAAAAAGATGAGTAGGTAAAATGTTTGAATATCTCAAAGGAATAGTAGCAAAAATCGATCCAGCTTATGTAGTATTAGATGTAAATGGCATTGGGTATAAGATCCTTTGTCCCACTCCATATAGCTATCAAGAAAATCAGCCAGCAACTATCTATGTTGAACAAGTTGTTAGAGATACAGGGATTACCTTATATGGCTTTTTATCTTTAGAAGACAAAGAATTATTTTTAAAATTATTAAGCGTTTCAGGAATTGGACCAAAGTCAGCTGTGGCGATTATGGCGGCTGAGGATACAGATTCGTTAGCTAGCGCAATTCAGAATGGAGAGGTGAAATATCTAACTCGTTTTCCAGGGGTTGGAAAAAAGACTGCTTCTCAGATTGTCTTAGACTTAAAGGGAAAGTTAGGCGACTATGTAAAGAAATCAGCAGTTGCTACCGATCTTACACCGAGCCTGCAAGATGCTTTGCTTGCCTTGGTAGCACTTGGTTATACCCAAAAAGAAGTTGACCGAATTACTCCAAAGTTAGCAAAATTGCCTGAAAATACTGCTGATGGTTATATTAAAGAGGCTTTAGCTTTACTACTTAAGAAGTAAAAGTAAAAATTAGGTAAGGATAAGGAAGTGAAAGATGTGAATGATGAAGAACGAATTATAGGCGCTGAGAGTAATGAAGAGGATGAAACGATTGAATTGTCTTTGCGTCCGCAACTACTTGCTCAATATATTGGACAAGATAAAGTAAAGAGCGAGATGAAAATCTATATTAAAGCGGCTAAGCAGCGAGACGAAGCTTTAGATCATGTTTTGTTATATGGCCCACCTGGTTTAGGTAAAACTACTTTGGCATTTGTAATCGCTAATGAAATGGGCGTTCACTTAAAGAGCACCTCGGGTCCGGCAATTGAAAAGGCGGGCGATTTGGTGGCTCTGCTTTCGGAATTAAATCCAGGGGATGTCTTATTTATTGATGAGATTCACCGTTTAGCAAAGCCAGTCGAAGAAGTCTTATATTCGGCAATGGAAGATTTTTATATCGATATTGTGGTTGGAGAAGGACAAACAACTCATGCTGTACACGTGCCACTTCCTCCATTTACTTTGATTGGAGCAACTACCAGGGCTGGTCAATTATCTGCGCCGCTAAGAGATCGTTTTGGAATTATTGAGCACATGCAGTACTATTCTATTGATGATCTAGAAAAAATAATTCAAAGATCAAGCGTAGTCTTTAACACTAAAATTGATCCAGAAGCTGCTATTGAACTAGCTCGACGTTCTCGTGGAACGCCGCGTGTTGCTAACCGTTTACTTAAGCGGGTTCGCGATTTTGCGGAAGTAAAGGGAGAAGAAGCTATTTCTTTAGCAACTACTAAGCACTCACTTCATTTGTTAGAAGTTGACGATGAAGGTTTAGATCAAACTGACAGAAAACTTTTAAGAATGATGATTGAAAATTATGGTGGCGGTCCGGTCGGGATTAAGACTATTGCAGCTAATGTTGGTGAAGATACAGATACAATTGAAGAAGTTTATGAACCATATTTACTTCAAAAAGGATTCATTACTCGAACTCCTAGAGGGAGAAGCGTTACGCAAAAAGCTTATTTACAATTAGGATATCCACCAAAAAAAGCAGAGTAAGTTTTTAGTTGGACCAAAAATACGGTATAATTATTATTTGAAAGTTAATTAATAAAAATAATGAGGTGTAATACTTTGAATACATTTTTCTTAGCACAAAGTGCTGCTGGTATGAATAACATGTTCATGATTATTGCAATGATTGCAATTTTTGTCTTCTTCTACTTCTCCATGATCAAGCCGCAAAAGAAGCAACAACAAGAACGGATGAAGATGATGTCTGAACTTAAGAAGGGCGATCAAGTAATTATGGTTGATGGCCTTCATGGTAAAGTTGATTCAATCAATGACGCAGATAAGACTGTTGTGATTGATGCCGATGGAATTTTCTTAACATTTGAAAGAATGGCTATTCGTCGCGTTCTTCCAACTGCTGCCGCTCCTGCTAAAGATGTTGAAGCTAATGAAGCAAAAGAAGAAAAAGTTGAAACAGAAGAAAAGCCTACTGAAAGCAGTAAACCGGAAGAAACAGCTTCTGATAAAACTGATGATACTAATTCTGAAGATAAATAAAAAAACGTCCGCAAGGACGTTTTTATTTTTTCATTGTCATGGCTTAGTATTGTAAAATATGAATTGGTAAAGTAAATTTTAATAAGTTTAATAGGTAGGTAGTTAGAATGGAAAATATTCCAGTTGTCATGATTATTTTTGGTGGCTCGGGTGATTTAGCTCATAGAAAGCTTTATCCAGCCTTATTCAACTTATATCAAAAGGGATTAATTCATGATCATTTTGCCGTAATTGGTACAGCTAGAAGACCATGGAGTCATGAATACTTACAAGAACAAGTTGTTGAAGCAATTAAAGAAAGCAACAGCAGCTTTGATGAGAAAGATGCAAAAGAATTTGCTTCGCATTTTTATTACCAATCACATGATGTAACTGATGTTGATCACTACATTGCTCTAAAAGAATTAGCTACAAAGCTTGATAAGAAATATCACGCAGAAGGCAACCGTATTTTCTACATGGCAATGGCACCAAGATTTTTTGGCACAATTGCAACCCATATTAACGATCAAAAGCTAGTTGGGAGTGGTTTTAACCGCTTGGTAATTGAAAAGCCATTTGGTCATGATCTAGCTTCTGCTGAAAAATTAAATCAAGAAATTAGCGAAAGTTTTGCTGAAGATTCGGTATATCGAATTGACCATTATCTCGGCAAAGAGATGGTCCAAAATATTATGCCGCTTAGAATGACTAATCCAATTGTTAATAATATTTGGTGCAAGAAATACATTGCTAATATGCAAGTTACCCTTGCTGAAAGCTTAGGCGTTGGTACTCGTGGCGGATATTATGAGACTTCTGGTGCTTTACGCGATATGGTTCAAAATCATATTTTTCAAATTATTACCCTTTTAGCCATGCCAGAACCTAAAGCCTTAGATTCAGATCATATTCATGAAGCAAAGCAAGAACTACTCGATAGTTTGGTCATTCCGACTCCAGAAATGGTAAAGCAGCATTTCTCACGTGGTCAATATTTAGCAAGTGATGATGAAGTGGAGTATTTAAAGGCCGATCAAGTTGCTCCTGATTCTAAAGTAGAAACTTTTGTTGCTGGCGAAGTTAACTTCAAGAAGGGTCCCGTAGCTGGTGTTCCAATCTACTTTAGAACCGGTAAGAAGATGAAAGACAAAGTTTCTAGAATCGATATTGTACTCCACCACATGAATAATTTATACGGCAATGCACATTCAAATAATATTTCAATTATTATTGATCCAAGAAGCGAGATTTTCTTTACTATTAACGGAAAGAAGATTACTACTGAGGGCTTAAGAAGAGAAAATCTCAGCTATAAGTTCTCTAAGGAAGAAATGGCTCAAGTCCCAGATGGCTACGAAAGATTACTTCACGATGTCTTTGTAGCTGATCGTACAAACTTTACGCACTGGTCTGAATTAAAGCAATACTGGAAGTTTGTTGATGCGGTTGAAGCTGCATGGCAAGATGAAAATAAAGATATTAAGCAACTTGAGCAATATCCAAGTGGTGAATTTGGTACTGAATCGAGTAACCATATTTTTGAAAAAGATACTGAACACTGGATTTACCGTTAATGGATTTACTGCCTAAAAATAATACTAAGCGTAAGATTATTCATCTGGATATGGATGCTTTTTATGCGTCAGTAGAGATTCGTGACAATCCTGCTTTAAGAAATAAGGCTGTTTTAATTGGCGGCGATCCTAAGAAAAATAGGGGACACGGGGTTGTGGCAACTGCTAATTATGTGGCTCGTCAATATGGTGCTCATTCAGCTATGCCGACAGCTAAGGCAATTAGGTTAATTCCAGCAGAAAAACTGGTAATTATTCAACCACATTTTGAAAAGTATCGGGCTGTTTCAGCTGAAATTCATCGTTTAATGCATAAAATGACTGATCGAGTAGAATCGGTTGCCTTAGATGAAGCATACTTAGATGTAACTGAGAATAAACTTCATCTAGCTGATCCTGTTCGGATTGCGACTATACTTCAAGAGCAAATTTATCAAAAGGTTGGATTAACAAGCTCATTCGGCGTTTCTTACAATAAGTTCTTAGCCAAAATGGGATCTGAATATGCTAAACCCTTTGGCAGAACCGTAATTCGGCCTGAAACTGCACTTGATTTCTTGGCAAAACAAAAAATTGAAAAGTTCCCTGGAATCGGTCCTAAGACGCAAGAAAGATTAGCAGAGATGGGAGTCTACACTGGAGCTGACTTAATAAAAATTCCAACGGATGTGTTGATTAAGAAATTTAATCGCATGGGCTATTTGATCGCCCAACATGCACATGGCATCGACTTACGAGCGGTAGTTACTGATTCAGAAAGAAATCGAAAGTCGATTGGGATTGAGCGAACTTTTAATCAGAGTTTGTTTGATGAAAATGAAGCTTTAACTAGACTGCGAGCTTATAGCGGTGAACTTGAGAACCGGTTGAAAAAGCGACATTTTCTTGCTAATTGTGTTGTCTTAAAGATTCGCGATGCTAATTTTAAGACAATTACTAAAAGAAGAAAGCTAAAACAAGGCACGAATGATAAAATTGTTATTTACGACACTGGTCGTGTTTTGTTTGAAACAGAAAAAGGGATGCTCACAACTGGAGTACGTTTATTAGGTCTTACGGTAACCGATTTTGAAGAACATCCAGTTGAGAATTTATCGTTAGATATTTTTGAAAACAAGTAATGATAAAATAAATTTAGTAAACAACAAGGAGAAAAGATTAATGGCAACTTTTGATGAAATTTATGAAAAAATAAAAGAATACCCAACAATTATTTTGCACCGTCACACTAGTCCTGACCCAGATGCAATTGGTTCTCAAGCTGGTTTAGCAAGATCATTGAGATTAGCTTTTCCAGATAAAAAGATTCTTTGTGCTGGTGAAAATGACGAAGGTGACCTGATCTGGATTAATAAGATGGATGAAGTTACACCTGAAGATTATCAAGGTGCTTTAGTAATTACTACTGATACTGCTAATACTCCACGTATTTCTAACAAGAATTACGATAAGGGTGATTTTTTGATCAAGATTGACCACCACCCAGATGTAGATCCATATGCAGATATGAGCTATGTAGATCCAGACGCACCAGCTGCTTCAGAAATTGTTTTTGACTTTTTGAAAGCAGAGAACTTGCCAGTTACTAAGGAAGTTGCAGCAGCTTTATATGCAGGTATTGTTGGCGATACAGGTAGATTTATGTATCCTGAAACTTCAGCTCACACTTTTGACGTTGCAGCTGAATTAACTAAGACAGGAATTAACATTACTGATATTGCTAGAGAAATTGCGGATGTTACTTTTGATGAAGCAAAATTGCAAGCTTTAGCAATGGATAAGATGGAAATTAACCCAGTTGGAGCAGCTTACACAATTTTGATGCAAGACGATTTGAAGAAGATGGGCTTAACTGATGACCAAGCAAACGTTGCTGTTTCAACTCCAGGTCGGATTAAGGACGTGTTAGCTTGGAATGTATTTGTTGAAAAGCCAGATGGTACTTTCCGCGTTCATTATCGTTCAAAGGGTCCTGTAATTAACCATTTAGCTGAAAAACATGATGGTGGTGGCCATGCATTAGCATCTGGTGCCAATGCCAAGGATATGGATGAAGTAAAACAAGTCTTTGACGAAGTAGTAGAAGTAACAAAGAAGTACAATGAAGAACATGGCACAAACAAATAGCATATTTCAAAGTGAAAAGATTAGACCAGAATTGCGAGCTGGTCTAGAAAAAATTAATTTTAGCAAACCAACTAAGGTTCAAGCTGCAGTCATTCCGGCTCTTTTAAGTAATAAAAGTGTTGTGGTTCAAGCAGCAACTGGTTCAGGTAAGACTCATGCCTACCTGCTCCCACTTTTGAACATGATTGATGAAAATGCCCCAGTAACGCAAGCAGTAGTTACCGCGCCTAGCCGAGAGTTAGCAAATCAATTGTATAAGGTAGCGCGTCAATTAAGGGATGCGAGTGGTCTAAATATTTCCATTGAATATTTGGGCGGTGGTAATGATCGTAATCGTCAAATTGAAAAAGCTGAAAATAGAGCTCCACAATTAATTATTGCTACACCAGGTCGTTTACATGACTTTGCCTCAAAGAAAGTCATTAATTTAGATAACGTCAAAGCCTTCATTATTGATGAAGCAGACATGACGCTTGATATGGGCTTTTTAAGTCAAATGGATGAGATCATCTCTAAACTAGATAAAAAGGTCGTTTTAGGCGCTTTTTCAGCTACTATCCCAGTTAAGCTAGAGAACTTTCTCAGAAAGTATATGTCAAAGCCAGACTTTATTGTTATTGATAATCCAGCAATTATTGCTCCTACAATTCAAAATGACTTAATTGATGTTGGTTCAAGAGATAAAAAAGAAATTTTATATAAGCTCTTAACGATGGGACAACCATATTTGGCCTTAGTTTTTGCTAATACTAAAAAGACTGTCGACGAATTAACTAATTATCTTGAAGAACAAGGGCTTAAGGTGGCTAAGATCCACGGCGGGATTACTGAACGTGAGCGCAAGCGTATTATTCGCCAAGTTAGAGAAGGGCAGTACCAATATGTTGTAGCTAGTGATTTAGCCGCTAGAGGAATTGACGTACCAGGAGTTAGCTTAGTAGTTAACTATGAAATTCCTAAGGATCTTGAATTTGTCATTCACCGAATTGGGAGAACTGGTCGAAATGGGCTTGAAGGTCATGCAATTACCTTGATCTATGATGAAGAGATGCCTCAAATTGAAGACTTAGAAAAGCTAGGTATTCACTTTGACTTTAAAGAAATTAAAAATGGTGAATTAGTTGAAAGAACACACTACCATCGTCGTGATAATCGCCAAGCTCGGAGTCATAAGCTTGATAATCGCATGATTGGAATGGTTAAGAAGACTAAGAAAAAAGTAAAGCCTGGTTATAAGAAGAAAATTAAGCAAGCTATTCAAAAAGATCGCCAGCAAAAGCGTAAGATCGAAGAGCGTCATCAGATTAGAAAAGCTAAACGTAGACGTAAGCGTGAACGCGAACAAGCACGCGGTAATTTTGACAACTAGAAAATTTGTAGTAAAATAAATCACGTTTAGGACATATTACTGAAGTATTTTCTTCAAAGAGAGGGCCAGGCAGGTGAGAAGGTCTAGAAATACTTAGTAATGCTACCTAATTTAACTAAAGAATTAAATATTTAAAGAGGTAACAAACACTGTTGCAATCAAGGTGGTACCGCGCTGGGAGCGTCCTTGTTTTATGCAATAGTGTTTTTTTATTTGTAGGAGACTTTTTATGAAACAATTGACTAGTTCACAAGTACGTCAAATGTTCTTGGACTTTTTTAAAGAGCATGGCCACATGGTTATGCCAAGTGCATCATTAATTCCACAAGATGATCCAACCTTATTGTGGATTAACTCTGGTGTTGCTACGATGAAGAAATATTTTGATGGTTCTGTTGTGCCTAAGAATCACCGAATTACTTCTTCTCAAAAATCAATTAGAACTAATGATATTGAGAATGTTGGTAAAACTGCACGTCACCAAACTTTCTTTGAAATGCTTGGTAACTTCTCAGTTGGGGACTACTTTAAGAAAGAAGTTATCCCTTGGGCATGGGAATTTTTAACTAGTCCAAAATGGTTAGGCTTAGATCCAGATAAGCTTTATGTAACTGTTTACCCTAAGGATACAGAAGCATATCATATGTGGCATGATGTTGTTGGCTTACCGGAAGATCACATTGTGAAGTTAGAAGACAACTTCTGGGATATTGGTGAAGGTCCATGTGGTCCTGACTCAGAAATTTTCTATGATCGTGGTCAAGAAAATAACGATGTTGCGGAAGACGATCCTGAAAACTTCCCAGGTGGGGAAAATGCTCGCTACCTTGAAATTTGGAACATCGTCTTTTCACAATTCAATCACTTACCAAATGGTAAATATGTTGATCAACCACATAAAAACATTGATACCGGTATGGGATTAGAGCGTGTTGTTTCAATTATTCAAGATGCACCAACTAACTTTGAAACTGACTTATTTATGCCAATTATTAAAGAAACTGAAAAGCTAAGTGATGGCAAGAAGTATGCAGCAAACAAGGAAGATGACGTAGCATTTAAGATTATTGCTGACCACGTTCGTGCTGTAAGTTTTGCGATTGCTGATGGGGCTCTTCCTTCAAACTCAGGTCGTGGATATGTTTTACGTCGTTTAATTAGACGTGCTGACTTGAATGGTCAACGTTTGGGTATTAAGGGCGCATTTTTGTACAAATTAGTACCTGTAGTTGGCGAAATTATGAAGAGTCACTACCCAGAAGTTGTTGATCAACAAGCATTCATTCAAAAAGTAATTAAGAATGAAGAAGAAAGATTCCAAATAACGCTTTCATCTGGTTTGAACTTGCTTGATAATATTATTGCTGAAGCTAAGAAGAGCGATGATAAGACTGTTTCTGGTAAAGATGCGTTTAAGTTATTTGATACTTATGGCTTCCCATACGAATTAACTTTTGAAGCTGCTCAAGATGCAGGTCTTAAGGTTGATAAGAAGGGCTTTGATGAAGAAATGAAAGCCCAAAAGGAACGCGCACGTAAGGCTCGTGGTAACTTACAATCAATGGGTTCACAAGATGTTACTTTGATGAATATCAAAGACAAGAGTGAATTTGAGTACGGTACTTTAGAAGAAAAGCATGCTAAGTTAATCGATATTGTTGTTAATGATAAGTTAGTTGATAAAGCTGACGGTGAACACGCAACTTTAATTTTTGATAAGACTCCATTTTATGCAGAACGTGGTGGACAAGTTGCCGATCACGGTGAAATTTTGAATCAAAATGGTGAATTGGTTGCTCGTGTAACAGATGTACAACATGCACCAAACGATCAAAACCTACACTTTGTTGATATTATTTTGCCACTTGAAAAGGGACAAGAATATATCTTGAAAGTTGATCAAAAACGTCGTCGCGGCTTAAAGCACAACCATACTGCTACTCACTTATTGCATGCTGCTTTACGTGAAGTTTTGGGTACTCACACTCACCAAGCTGGATCTTTAGTTGAACCAGATTACTTACGTTTTGACTTTACTAGCTTAGAGCCAATGACTAAGAAGGAAATTGCTAACGTTGAAAAGATCGTTAACGAAAAGATTTGGGAAGAAATTCCAGTTAAGACAACAGTTACTGATCCAGATACTGGTTTGAAGATGGGTGCTTTAGCCTTATTTGGTGAAAAATATGGTGACACAGTTCGTGTTGTTCAAATTGATGACTTCTCAACTGAATTCTGTGGTGGTACCCACTGTGAAAACACTGACCAAATCGGAATGCTTAAGATTGTTTCTGAATCTGCTGTTGGTGCTGGTACTCGTAGAATTATTGCTGTTACTGGTCCAGAAGCTTACAAATATGTAACAGACCGTGACGAAATTTTGAAAGAAGTTCAAGATGAAGTTAAGGCAACTAAGGCTGAAGATGTAACTAACAAGATTTCTTCTCTTGAAGAAGACTTACGTGCAAGTCAAAAAGAAGCTGAACAATTAAAGGCACAAATTAACAAGGCTAAAGCCGGCGACTTGTTTAATGATGTTAAGCAAGTTAAAGGTTTAACTGTAATTGCTGCTCAAGCTGATGTAGAAGGCATGAATGATTTACGTGAACTTGCTGATAACTGGAAGAGCAGTGATAAATCTGATGTGTTAGTTTTAGCTGCTGAAGTTAATGGCAAAGCCAACATGGTTATTAGTTTGAACGATAAGGCAATTAAAGCGGGTCTTAAAGCCGGCGACTTAATTAAGACTGCTGCTCCAATCTTTGGTGGTGGCGGTGGCGGTCGTCCAAATATGGCACAAGCTGGTGGTAAGAACCCAGCAGGCTTAAAGGACGCTATTGCTAAAGTGTTACAAGAAGTTGAAGAAAAACAAAATTAATTGAGTTCGTTCGATATTTCAAGTAAAATTAAGAAGATAGGAGGAATTTTATTATGAGTTCGCTAGATAAAACGATGCATTTTGACTTTAACCAAAATAAGGGTAAGAATGTATACGATACTCTACAAGACGTATACAATGCACTTGAGGAAAAGGGCTACAGCCCAATTAATCAAATTGTTGGTTACTTACTCTCAGGCGACCCTGCATATATTCCTCGGCATAATGATGCCCGTAATTTAATCTTGAAACATGAACGTGATGAAATTATTGAGGAATTGGTTAAGAGTTATTTAGGTAAAAATAAATAATGCGATTACTTGGACTAGACGTTGGCTCTAAGACTGTGGGGGTTGCAATTAGCGATCCTTTGGGAATCACTGCTCAAGAGCTTGAAACAATCAAAATTGACGAGAGCAAGTTTAGTTTTGGCATGCGCCAAATCAGAAAGCTTGTCCGTAAATACGACGTCGAAGGTTTTGTTTTAGGGCTACCCAAAAACATGGATGGCAGTAGTGGACATTCTGTAGAAAGAAGCAAGCAGTACGGTGAACGCTTGAAAGAGAAGTTTGACTTGCCTGTTCATTATATGGATGAGCGGCTTACAACCGTACAGGCTGATCGTATTCTGGTTCAAGAAGCCGGCGTACATGATCGTGTTGAAAGAAAAAAAGTTATTGACCAAATGGCTGCTGTTTTGATTTTACAAAGTTATCTAGAAGCAACCAGAAAGGATAAGTAAAATGAGTGAAAAAATTAACGCTAACCAAGATAATGATCGTCAAATCACTTTAGTTGATGATCAAGGCAATGAAGAACTTTTTGAAATATTGTTCACTTTTACGTCTGAAGATTATGGTAAATCTTATGTTTTACTATATCCAGCAGCCGTTAGCGACGATGATGATGTCGAAGTACAAGCTTTTAGCTATGATGCAGATGAAGACGGTGATGTAACCAGTTCTGACTTACACGAAATCTCTGATGATGACGAGTGGAACATGGTACAAGGCGTTTTAAATACATTTTTGTCAGACGATCGCTTAAGTGGCGAATAATCTTCAAAAAAGACTGGCGCAGAAAGGCTAGTCTTTTTTGCTTAATAAGGATGAACAATGATTTCTATTCTCATTTTATTAATTTTTATTTATTGTTGTTATGTAGGTTATCGACGTGGCATAGTGTATGAAGGCTTAGCTGCAGGTGGATATGTAGTAGGGTTAATCTTGGCAACATTATTATATAAACCTTTTAGTAACTTCTTAAATCTTTGGGTGCCATATCCCTCAGCAAGTGATCGAAGTAGCTTTGCTTTCTTTGATAAGACTACAGGATTAACATTAGATAAGTCATTTTATGCAGCAATAGCTTTTTCAATTGTTTTGGTGCTTGTTTGCTGTATATGGCGTTTGGTGATGTTAGGTTTTAATCAGTTAAGATATGTTACTGTAGATGCTAGATTAAATACATGGGGCAGCATTATTATTGCTTTTATTGTGACCCAAATTAGTGTTTATTTGCTTTTATTTATCTTAGCTACGATTCCTAACAATAGTTTGCAAAATATGCTGGGGCATTCTATTTTAGCAAGTGGTATCTTGCATTTCTCTCCAGGAATTTCGCAAATTTTCATAAAATTATTCATTACAACAATATAAAAGTGGGCCCAGCTACGGGGCCCTTTTTGCTAGGTGGAAGATATGAACTCTAAAATTATTGAAAAATTAGAATATAATCGCATAATTAGACAATTAAGTGACTTGGCAATTACTGCACCTGCAAAAGCTCAGGCTCTGAAATTAATGCCAAGTAGTGACTTTGATGAAGTAAAAAAATCAATTGATCAAACTAGAGTACTTTCGAATATCTTGCGAGTTAAGGGGCCAATGCCGATAACTGACTTTAAAGATGTTCGACCTAGTTTGAAGCGATTGAAAGTTAAGGCTAACTTAAATGGTGAAGAATTAGGTAATATATTTCTAGTTCTTAGTTTGGCTAAAGATGTAGGACAATTTGCATCGGACCTTGAAGAGCGTGAAATTGATACACGTCCTATTGAAAAGTACCTCAAAAATTTAGCAGTTCCAGAAGATTTGTTTAAGAAGTTAAATCAAGCAATTGAATATGATGGAACAGTTAAAGATACAGCTTCTTCTAAATTGATGCAGCTTAGACATGATATTCAGAGTAATGAAACTGATATTAAGAACCACATGAATGACTATATTAGTGGCAAGCATACGCAATATTTGTCAGAAAATATTGTGACTATCAGAGATGGACGTTATGTTTTGCCAGTAAAACAAGAATATAAAAATAAATTTGGTGGAGTTGTTCATGATCAAAGTGCTAGTGGTCAAACTTTATTTGTTGAGCCACAAGCAGTCTTGGTACTTAATAACCGTCAACAAAATTTGCTGGCTCAAGAACGCCAAGAAATTCACCGTATTTTAATCGAGTTATCCGAACTTGCTGGTGCTTATCAAAAAGAAATTAATAATAATGCATTAGCATTAACTCAATTAGACTTTTTAAGTGCTAAAAGCAAATTAGCTAAGAAGATGAAAGCGACAGAGCCTGTCTTAAATCAAGATCATATCATTAAGTTGAGAAAGGCACGCCACCCCTTAATTGATCCTAAAAAGGTAGTACCAAATAATATTGAATTAGGAACTACTTTTGATACTATGCTAATTACAGGACCAAACACTGGTGGTAAAACTATTACCTTAAAAACTTTAGGATTGCTACAATTAATGGCTCAAGCTGGTTTATTTATCACAGCTGAAGAAGGTAGTCAATTAACGGTCTTTAATGAAATCTATGCTGATATTGGAGATGAACAATCCATTGAGCAATCTTTGAGTACATTTTCATCGCATATGGATCAGATTATTAAAATTATGAATAATGTTACTGAAGATGATTTAGTTTTAATTGATGAACTTGGTGCTGGAACTGATCCAGAAGAAGGTGCAAGTTTAGCAATTGCAATCTTAGATGATTTACGCCAAACACAAGCAAAGATTGCGATTACTACACACTATCCAGAATTAAAGTTATATGGTTATAACCGCAAGCGAACTACTAATGCCTCAATGGAATTTGACTTAAAGAAATTGGCACCAACTTATCGCTTGAGAATTGGAATTCCTGGTCAAAGTAATGCTTTTGCGATTGCACATCAATTAGGAATGAATGAAGCTGTTGTAGATAAGGCTAGGGACCTAATGAATGATGAAGACAGCGACATTAATAAAATGATTGAGCGTTTAACAGAACAAACTAAGGCAGCTGAACAACTTCATGAAACTTTGAAGCAAAATGTTGATCAAAGTATCACCCTTAAACGTCAACTGCAAAATGGGCTTGATTGGTATAACCAACAAGTACAAAAGCAACTCGAGAAGTCTCAGGAGAAAGCTGATGAAATGCTTGCTAAGAAACGTAAGCAAGCTGAAAAGATTATCAATGATCTTGAAGAGCAAAGAAGAGCTGGCGGACAAGTTAGAACCAATAAAGTAATTGAAGCTAAAGGCGCTTTGAATAAACTTGAGCGTGAAAATCAAAACTTGGCTAATAATAAGGTGCTACAGCGTGAAAAAAAGCGTCATGATGTAAGTGTAGGCGACAATGTTAAAGTCCTGTCATATGGTCAACAAGGTGTAATTACGAAGAAGTTAGGCGAGCATGAATTTGAAGTCCAAATTGGTATTTTGAAGGTGAAGGTAACTGATCGTGATGTTGAAAAGATCGCTGCACAAGCCAGTCAAAAAAAGCCAGAGAAATCTGTTCGCTCTAGTCGTGGTCTTCGTTCTAGCCGTGCAAGTAGTGAACTTGATTTAAGAGGGCAGCGTTATGAAGAAGCTTTGACTAACTTAGATCGCTATCTTGATGCTTCACTTTTGGCTGGGTTGAATACAGTAACCATCATTCATGGTATTGGTACAGGCGCAATTAGAAATGGTGTTCAACAATATTTAAAGCGTAATCGACATGTAAAGAGTTATAATTATGCTCCAGCTAATCAAGGTGGAACAGGGGCTACAATAGTTAATTTGCAATAAGCAATATACTTGCAAAAAGTAAGTAATTATACTAAACTATACTTATAATTTAAAGAATTTATTTTGTGAGGTAATAACTATGGTTGATGAAATTACAGATGCAACTTTTGAAGATGAAACTAGCGAAGGTGTTGTTTTAACTGATTTCTGGGCAACTTGGTGTGGTCCATGTAAAATGCAATCACCAGTAATTGACCAACTTTCAGAAGAAATGGACGATATAAAGTTCACTAAGATGGATGTTGACCAAAATCAAGAAACAGCTCGAAACTTAGGAATTATGGCAATTCCAACTTTACTGATTAAAAAAGATGGTAAAATTGTTGACCGCTTAACTGGTTATACTCCTAAGGAAAAGCTTGAACAAATCTTAGATCAATACACTGATTAATTTATAGTCAAAAAGGGTAAACCGCGATTGCGATTTACCCTTTTTGATATTAGTCTTTTTGTATTCATTAAGTTGATCAATCAGACTGAGCTGTAATTTGGACGACATGCTTTTTAGTAATCGTAGCTGAGGCCTCAGTAGGAGTGCCATTTTGGCGTTCTAAACATTCTGCAATAATTCCACTTTCCAATGAAAATTCATCACTTTGGCTATCTAGTCGATCAGTAACAACTTGTCCTGAAAGTTCGAAAACTGCAGTGTGTTTACGATCCTTGAGTACTTTTAAAGTTCCAAATTGAGCCATGTCAAAAAATTCGATCAAGTCATCGATGTCTGATAAGTCATATTTGCGACTGACGTGTTTACCGGCCCAGTAAAGAATTTCTTTATCTTCGCTACCTAAGATAGTAGGTAAGATAAAGTCACGGTATAATGAATTTAAAAAATAAAGATGTTCGTTTGTGTGTTCCATGCTTTCATTTCCTTTTGTCTTCTATAACATTTTAACTTAAAATAGGTGGAGGAGAAAAGAGATTTTAATCATCATTTGTTTAAAGGAGTTTTTTTAATGGATAATCGACCAATTGGAGTTTTAGATTCAGGCTTAGGTGGCTTAACTGTTTTAAAAAAAGTAATTGAGAAGATGCCTAACGAATCAACTATTTTTATTGGTGACCAGGCTAATATGCCCTATGGAGATCGCTCAAAAGAAGAGATTATTTCTTTAACTCGCGACAGTGTAAACTTCTTATTAAGTAAAGATGTAAAGATTATTATTTTTGGTTGCAATACTGCCACTGCGGTTGCAATGTCTACTATTAAAAAAGAAATTCCTTTGCAAATAATTGGAGTAGTTCAATCTGGTGCCTTAGCTGCAGCTAGAACCACAGAAACAAAGAATGTTGCGGTAATTGGTACTAAGGCAACTGTTAATAGTCATTCTTATTTAAAAGAAATTCAATATCGTGATCCAAAAATTCAAGTAAGCGAATTTGCACAACCTAAATTAGCGCCTCTTGCTGAGGAGGATCCTGCTGAAGAGATTAAGCAGGCTGTGGTCAGCGAAAGCTTAGCGCCGTTGAAAAAGGCTGATTACGATACCCTTGTTTTAGGATGCACCCATTATCCATTATTAAGAAAGGAAATTGTTGCAGTGGTTGGTCAAGATAGAAAGATTGTTGACCCAGCTGATCAGGTGGCACAATATACCTATAATGTTTTACGACGTGATGGTTTATTTGCGGCTGGTAATTCTGATACAAAACACGAATACTATACGACAGGTGAAGCTAAGAAGTTTACTGAAATAACACGTCAATGGATGAATGATGAAACAATTGTTGGTCATCATGTAGATGCTGAGGATTAAAAATGGATACTTTATTATTTGCAACTAACAATAAAAACAAGGCTAAGGAAGTTGAAGAAGCCTTAAAAAAGAATAACTTTCCGATTCATGTGATTACTAATCAAGATTTGACTGACCCACCACATGTGTTAGAAACTGGTACCACATTTTTAGCTAATGCCAAGCTCAAGGCTCACCAGATGGCAGAATTTAGCAATCTACCGACTTTAGCTGATGATTCAGGATTATCGGTCGATAAATTAAATGGTGCTCCTGGCGTTTATTCTGCGCGGTATGGCGGGGAAGCTCATAATGATGCTCTGAACAATGCTAAATTATTAGCAGAATTAGGCGGGGTGCCAAAAGAAGCGAGAAAGGCTACTTTTCATACAACGATGGTTGTTTCTTGGCCAGGTAAATTTGAGGATGATCTAGTAACTGAAGGCGAAATTCGTGGTGAAATTTTAACTTATCCACAAGGTGAAGGCAATTTTGGTTATGATCCGCTTTTCTTTGTTTCTGATAAAGGAAAAACATTTGCTGAAATGACGGTGGATGAAAAGAATGCAATTTCTCACCGAGGTCAAGCTTTGAGAAAATTACTTGCGGAACTACCGGCCTGGTGGAAAAAGATGGAAAACAAATAAATTTCGAAGAACTTATTCTGATTAGGGGATAAGTTCTTTTTGTTTTTTCTGCTTTTAATTATAAGTATGAAAGCGCTATACTAAAATTGTGAGTTTACTTAATAAATGTAAGTTTAATTGAAATTGAGGTTATAGCGTATGAAAAAAGTTTTAGCAATTAATTCAGGAAGCTCGTCCTTTAAATATAAATTATTTTCTTTTCCTGATGAAAAAGTTATCGCTTCAGGGATGGCTGATCGAGTTGGGATGGAAAATGCTGTCTTTAAAATTAAGTTGAGTAATGGGCGAGAATATATCAAAAATATGCCGATTCATGATCAAGAAGAGGCAGTGAAGCTTTTAATTGAGGATTTGAAAAAATTTCATGTAGTTGAAGATCTGAGAGAAATTACTGGAATTGGTCACCGAATTGTAAATGGCGGTGAAATATTTAAAGAATCAGTTCGTGTTGGTGACAAAGAGTTACAAGAAATATTTGATTTAGGTGAACTTGCGCCTTTGCACAACATTCCTGAGGCAAATGGAATTAAGGCCTTTATGAATATTGTTCCTAATGTTCCACAAGTAGCAGTTTTTGATACTTCATATCATCAAACTTTAGATCCAATTCACTATCTATATTCAATTCCTTATGAATATTATGAAGATTATGGAATTAGAAAATATGGTGCGCATGGAATTTCTATTTCTTATGTTGCACCGCGTGCAGCCAAGATGCTTAAGAAAAATCCTAACTTAGTTAACTTGATTGTTTGTCACTTAGGATCAGGTGCTTCAGTCACTGCGGTTAAACGCGGAAAGTCTTATGATACTTCAATGGGAATTAGTCCGTTAACTGGTGTAACAATGGGGACGAGAAGCGGAGATTTTGATCCTTCAGCACTTCAGCGTTTAATGCATAAGACCGGAATGAATATTGATGAAGCAATTGATGTTTTGAATTATAAGTCAGGTTTATTAGGTATTTCAGGTGTATCTTCTGATATGCGTGACTTAATTGAAAGTAAAGATAAGCGTGCCAAGTTAGCACGTAAGATATTCATTAACCGTGTTGTTCGCTATGTTGGTGCTTATGCAGCAGAGTTAGGTAGAATTGATGCAGTTGTCTTTACTGCAGGAGTAGGTGAACATGATCCTGGTATTAGAGCTGGTATTATGTCCTCATTAAGATATTTGGGGTTAGAACCAGATTTTAAGGCTAATCGAACAGATGGTGAAAAGTTTATTTCTAAACCTAAGTCTAGAGTAAAGGCAATTATTGTCCCAACTAATGAAGAATTAATGATTGCTCGCGAAGTAGTTCGCGTTGTGCAATAAAAAAGGATTAAGCAATAGATAAAAGTCTGGTTGCTTAATCCTTTAAGATGCAGAGCTATTTTCTTGTTTTGTTGGCATGGGACTGAATTTAAGCCCGCGTTGATTGAATTCTTTGATATAAGCACTGAGATAGGCATTTTTAATCGTACTTTGGCTATTTGCAATTACTTGAAAATGAATTTGATAGGTAATACTTGAGCCGGTTTGATTAATGATTCCAATAATTTGAGGGCCCTTTTTGATTTGTTTTTTAAACTTATCTTTTAAAGCGGCGTTTACCGTATCAACGGCATGATTAATTTCAGTAAGATTATTTTCTACTGAAAGATTTAAATTAATATCCATGCTCCAGCCAGAGTGTGAACCATGACTCAAATTTTCAACTACGGTAATGTTGCGATTAGGAATATAGATTACAGCGCCATTAGTAGTTTTAAGGCGGGTATTGCGCAAGGTAAAGGAGAGAACAGTTCCGGTGTAGTTGCCAATTTTGACAGTATCGCCGATATTGTATTCACCTTCGCTTAAGATGTTCATCCCAGTTATGACATCGCTAACTAAGCCTTGTGCGCCCATCCCTAAAGCTAGAGATACTAATCCTACACTAGCTAATAGCGTTCCGACTGGAATTCCCATTAACGTTAAAATACTATATAAATAAAATAAGAGAATAGTATATTGAAAAAGAGCAATTCCTAGTTGAGCTAAGGTTCTTTTTCTTCCAGTCATGCGTTCCTGAAATTTAGGATTTTTTAATAGGTATTTTGTTAATAAGCGCTTTCCTATATGCCAAATTAAAAAGAAAATAATTGTAGTAATAATAATTTGGATGCCACGGCTTAACAAAGTGTGCAGTACATTGTCCCAGTTAATTAGGTAATCGATATTATTAGTTTTTTTAGTTGTAGAAGTTCCTTTTAGAAGAGTAGACCAGTTCAACTCATGCCCTCCTTATCTAGTTTCTTTAAAAATAGTATATCAAAAACTTGGGTACTATTTCATTGCTTGAGGGGAGGAATAATGCTAGACTTTTTTATGAGAGGAATAATTAATGGAGGATAGATTATGTCAATTTCATATGGTGCATTAGCTGGCTTAATTGCAGCAGTTGCCTTTTTAATCTTGGTCTTATTTACTTTACCTTTGATTGTTAGAGTTACTAAGACAATGAAAAAAGTTGACTCAACTATGGACAGCGTTAATACGGCTGTTGATGATTTAACGAAACAAACTTCTGTATTAATGAAGCAAAGTGAAGATTTATTAGAAAAAAGTAATGCTCTTTTAGCTGACGTAAATGGAAAAGTAACAGAATTAGAACCAGTTGTAAAGGCAGCAGCAGATTTAGGTGAAAGCGTGTCAGATATCAATTCTTCATCAAGAAGAATGGTTGAACGCTTCTCTGGTATGGGTATTAGAGGAGCTGGAATTGGCATCTTTTCATCTCTAGTTAGTCGCATGTTTGCAAGACGTAAACGTCGTCGCGGTGAAGACTAATATTTATTAAAGGAGAACAATTATGCGTAAACTTGGTGGATTTTTATTAGGTAGCGTTGTTGGTTTAGGTGTTGGCTTAATTGCTGGCTCATTATTGTTGCCAGAAGACGCAACTGACGATGTAAAGAAAAAACTTGCTGAAAATGAAAAATTGCAAGATTTGAAAGAAAAGTATGATAAAGGAACTGAAGCTATTAAGAATCAATTAGCTTCTTTCCCTAAGTCAGTTGAAGATGATTCCGAATTAAGAGATTTTGATGATATCGTAATTGATGACACTAACAAAGATCTTGGCGAAGATGAAAAATCTGACAAGGATGCTGTTAGCGATTTAAACAATGCTGAAGATACTGAAAATAACTAAAATTAAATATTTATAAAGAGAATCAAGTTCTACTTGGTTCTTTTTTTGCAATAAAAAACCTGTTGATAAAATTTATCAACAGGTAATTTTAGGCCTAGTCTTTAACTGGAATATATTTTAATTCCTTGCTTGTCTTAGTGAAGGTTTCAAAACCGTTTTTAGTGACTACACCACAATCTTCAATTCTAACTCCAGCTACGTTAGGGATATAAATACCAGGTTCAATTGAGAAGCACATGCCTTCCTCGAGAACTACGTCGTTTCCTTCCATGATTTGTGGAAATTCGTGAACGTTCATTCCAATACCGTGGCCAAGACGATGAATGAAGTATTCACCATAACCAGCCTTAGTGATAATATTTCTAGCAACAGCATCTAATTCGCTAGCAGTAATGCCTGGTTTAGCTGCTTCAATAGCTGCTTGTTGCGCCTCACGGTCGACTTCATAAATTTCTTTTTCTTTAGCAGTTGGTTCGCCGTAAGCGACAGTTCTACTTGAGTCAGAAGCATAGCCGTTGTGCATGGTTCCAAGATCGAATAAAACTAATTCGTTTGGTTCAATTTTGTTCATTGTTGGACCAAGGTGAGGGTTAGCAGCATTTGTACCAGCTTGGACAATTGTCTCAAAACTAGTGTGCATTACACCTTTTTGAAGCTTTAGTTGGTAATCAATTTGTCCAGCAACATAACGCTCAGTTACGCCGTTTCTTAGGGCATTAAAACCAATTTGAAAGGCAAAATCAGCTTCTGCACCAGCAGCCTTTAATTGTTCCACCTCTTCTGGAGTTTTACGTAAACGTGCTTCAGCTACGAATGGAGAAACATTATTGGTAAATGAAGAATCAGGAAAGGCAATGCGTAATTGTTCTAAACGGTCAACTGAAAGATGCGATTTTTCGATTGCAATATTATTTGGTTTACCAACACGTTGATTTACTAAATCAGCAATTTTTGCCCATGGGTTTTCATGGTCAAGATAGCCATAAACATCGCCATCCCAAGCAGAGTTTTTAGCTTCTTCAACATTTAATTCAGGTGCAAAAATAAATGCAGGCTTGTCTTTAAAAGCAATTAAAGCAAAAATTCTCTCATGTGGATCCATGCTATAGCCGGTGAAATAATTAATACTGATTGGATCAGAAATATAAGCAACATCATTATTGGTTGAAATTAACCAATTTTGTAACTTGTCTAAGTTCATAATTTTGGCTCCTTTTTTAATATATACTGGGAGTATACCATATAATATGGTAAAATAATGAGTTTTTAGTCGTAAACGGTTGCAATGTGTGAATATTCTTGATAAGTTATAAAAGAGTGTTTTTACAGGAGTGTGAGAAATTTATGCAAAAGCAAGAAGTAACAATTTATGATGTTGCTCGTGAAGCAAAAGTTTCTATGGCTACTGTTTCAAGAGTAGTAAATGGAAATAATAATGTACGTAAAGAAACACGTGAAAAAGTATTAGCTGTCATTGATCGTTTGCATTATCAACCTAATGCAGTTGCTCAAGGATTAGCTTCAAAGAGAACTACAACAGTTGGATTGATTGTCCCTGATTTGACAAATATGCACTTTGCTGAATTATCTAAGGGAATTGACGATATTGCTACAATGTATAAGTACAATATCTTACTTTCAAGTGTCGGAAATACCTTGTTAAATGAAGATCAAGTTATCCAAAATTTACTTAATAAGCAAGTTGATGGTGTAATTTACATGTCAAACTTGATGAATGAAAAGGCACAAGAAATTTTCAATCGCACTAACACACCAGTTGTTTTAGCTGGTACTGCTGATGCTAACCAAGAATTTTCTAGTGTAACTATTGACTATAAGGCTGTTGAAAAGGAAGCTTTAGGTCTTCTATTAAAAAATGGTAAGAAGAACTTGGCCTTAGTCGTAGGTGACGGAAAAGCTTATGTTAACAAGGACAACCGTTTCGTTGCCTATAAGGATTTCATGGAAGAACATAATTTAAAGAACGTTCACATCTATGAAGATGCCAAGACTTATGAAGATGGCTATGAATTATTTAAGCAAATCAAAGCTGATAATGTAGATGGAATTATTGCTACTCGTGATGTAACAGCTGCAGGTATCGTTAATGCTGCTGCTGATGCTGGTGTAAAGATCCCAGAAGATTTAGAAATTATCTCCGCTGCTTCAACTAATGTCGCTAAAATTGTTCGTCCTCAATTAACAACTGTTCAACAACCGCTTTATGATATTGGTGCTGTTGCAATGAGAATGTTAACTAAGTTGATGAATGATGAAGAATTAGACGATTCACATGTTATCTTGCCATATACTTTGAAGAAGTCTCGTTCTACTAAGTAGATCTAACTATAGAAAATCCCTTTAGGTAATTTACCTAAAGGGATTTTTTAATATATAATTTTAATTTTTCTACGGAGCAGTCGGACCACTATCAGTGGCTGTATTGCCTTCACTAGTTTCGCTGCCAGTATTTGTATTAGCAGCTGAACCTTCATTAGAACTAATATTTGAATTTACATCATTTGCGGCATTGTTGCTCTCGGAATTACTATTTTCAGTTGAAGAAGATGATTCACTTGTGGAACTTGCACGACTCCGTCTGGTAGAAGTAGATGAAGAAGAACCATTACCATTTCCGCCTTGACCAGTACTTCGAGCTTGCTGATCATTAGCTCCGACAGTCCGATTAGTCGAAGATGAGGAATTATTATTTGATTCACGATATACTTCGTGGTTTTTACCATAGTAATTTTTAGCATCTTCTGAAGTTGAGACATCAGCTACAGCAAACAGACTGTTGATATTTTTCTTCTTAGAAATTGTCTTACCAGTGTAGATTAAGACTTTACCATAATCATTATTCTTGCCTTCTAAGTAGTCGTAGAAGAGGTTGTAGTCTTTAGTTGATCCACCGATACCAAATCTTGCAGTAGCAGCTGGAGCAGGGGATAAACTTAATGCGGTAGTCTTTTTACCAGAAAGTTCAATATTATCGCCATCATAGCTTACAGTTCCTGGCAATGTTCCAGTTCTTGCTAAGACTTTATTTTTATAAACAGAGCTTGGACGACTTGGTGCATCATTTAACTTAAAGATTGACGGATCTTCTTCGTAAAGTGCATTGGCGATATTTGCCCAAAGGTTTAAGTTGGTTTCACTGGAATTAGAATCTAGGTTATATGAGTGACCATAGAAGTTGTCATACCCCATCCAACTAGAAATTGTGATTCCAGGAGTAGACCCATTGAACCAAATATCACGATAATCATTACTGGTTCCAGTTTTACCGATTAAGTTCTTGTAGTTAAACTTCAAAGTACCAGTTAAGCTTGAAGCAGTACCCTTAGTTACTACTTGGTGAAGCATCTTTTGCATGATGTAAGAGGTGCCGGTAGAGAAGACTTTTTGTGGATTTTGTTTATGCTTATAAATTACTCTACCTGATGGATCTTGAATTTCTTCAATGTAGTAAGGATCAGCTCGCTTACCATTGTTATAGAAGTTAGAGAAGGCACTAGCGTTATCAGCAACTGAAAAACCATAATCTGTACCACCGAGGGCCAAACCTAGGTTTTCAAATTCGCTTTTACTCAAGTTTAAGCCTAGTTTTTTCATATCCTGACGTAAATTAATGTTCTTATCGTTGACTAATTTATTGTATAAGTTTACAGCAGGTAAGTTATAAGATTTACTTAGAGCTTCTTGAGCTAAGATAAACCGGTTTTCAACAGTGGAATTGTAGTCAGTTGGAATATAGTTGCCAAATCTAGTAGGGAAGTCTGCTAATGCTGTTTGACTAGAAATAAACCTGTGTTCAATTGCTGGACCATAAACTAGATATGGCTTAATAGATGAACCAGGTGAACGATAAGTATCAAAGGCGTGATTAATTTGAGAGTTCTTAAAGTCAACTCCGCCGCTAAAAGCTAATACCTTGCCAGTTGCATTATCAATTACAACACTACCGTTTTCCACATGCTCGGTAGTATTAACCCACTTGTTAGTAGATGAATCAAAATCACGAGAAGTCTTATCTTGACCATATTTATTTTGCTTAACTATGCGTTGCATAGTTTGATAAAGTGGCTGTCTAATCGTGGTCTTAATATGATAGCCCTTTTGATGAAGCAGTTCAGTTGCACTAGTTAAGTATTGATTATAACGATTAGTATCTTGTTTAACATCTGATACCTTTAAACCATCTTGCTCAATTAATTTTTCTGCTAAAAGACTAGTACTCTTATTCATTACTAAATTATATAAGTAGCCACTTTGTTTCTTTTGTTTAGATGCTTTTTCAGGAGCAAGGAAGTCAGCACGTAAATCATATTTTTTAGCTGCAAGATAGTCTTTTTTACTGATATCGCCGTTTCGATACATCCGAAATAGAACAATATCTTTTCTACGCATCGCTAAATCAAGGTCTTTTTTAATTTTTCCGTTTAAACGATATGGGGTATAAACAGATGGACTTTGAGGTAAACCAGCAATAAAGGCAGCTTGAGGAAGGTTAAGTTCAGAAATAGACTTACCAAATATCCCTTGAGCTGCAGTTTTAATACCTACGATGTTTTCTCCACGGTTGTTTTTACCATAAGGAGCAGCATTTAGATATGAACGGAGAATATCTTCTTTTGAGAAATGCTTTTCAATCTTATGAGCATAAAACATTTCGGTAACTTTTCTTCTCCAAGTGGTCTGACTAGTTAAAAACTGCATTTTAACTAATTGCTGAGTTAAAGTCGATCCACCAGTTTGTACACCAACACCAGTTAGCTCAGAAAGAACAGCTCTAACTAGGGATTTAGGTAAAACCCCTTTATGAATATAGAAATTTTCATCTTCAGTAGCTGTCACAGCATTCTTAACTAAAGGAGTTAGTTCGCTTTCACTTGCCTTTTTAGTGACGGTATCTGGACGAACTGTAGCAATTTTTTTATTGCCAGCATAGTAAAGTGTGGCTGAATTTTGTGCATGGTTTATTTGCTGATTTAATTCGCTAATTGTTGGAATGGGCTGCTGGCGAACGATGCCAAGAGCATAACCACCACCAAAACCGATTAGGAGAAACAAACCAAAAACTGCAATTAAGATCAAATAATGAAAAACACGACGTAAAGTTAAATAAACGATGCCACTGTAAAATTTCCATTGGCTTTCGTCTGATTTGCTTTGAAGTGTTTTAACTTCTGGCCCCGCAGTTAGGAACTCTATGATTTTTTCTTTTAAATTTTTCACAGATCAGGATCCTTTGTTAGTTACTTTCCCCCTATTATAGCAAAAAAAGGACTAGCAATGACGCTGTCCTTTTTTACGAATTTATTAATATTTTGATTTATTTAGTTAATTCATAGATTGCTTCGGCATAAATAGCAATTGAATTAATTAAATCGTCAACCTTCATGTATTCATTTGGTTGGTGCATAACAAGTGGAGCACCTTCTGGTTGAGCACCAAAGGCTACACCATGTTTGAATAAACGGCCATATGTTCCGCCACCAATGATAACTTCGTGACCTTTCTTACCAGTTTGCTTTTCATAAACTGAAAGCAGGGTCTTAACAATTGGATCGTCTCCAGGAACGTAATGAGGAGCTTCTGCTGAACCATCAATTCTGGCGTCTAAGATATCGCCGAATTTTTCGTTAATATTTTTAACCATTTCTTCTGGCTCAATACCTTGTGGGTAGCGAACATTGTTTAATAGGTATGCGTTTTGACCTTCAAAGTTAAACATACTTGGTGAACTAGTTAGGTCGCCCATTAAGTCGTCATGATGGAAAATACCTAATTTCTTACCGTTAAAGTCTTTGTGTTCAACAGTTGCAAGGAAGTGTAAGAAATTCTTCGCTTGACCGTCAAAGTTAAGGCTATCAAGGAAGAGAGCAAGGTAAGTAGCAGCATTTCTACCAGTTTCAGGAGCAGAAGCATGAGCACCATGACCGGTTAAAGTTAATGAAAGACGGCCGCTAAGCATTTCTGCCTTTCCTTCAAGCTTATGCTCTTTCAGGAATAAGTTGAACTTTTCTTTGATGCCATCGATATCGCCTTCAAGTTGAGCATGAGCTGTTTGAGGGATAACATTAGTTGCAATTCCTGATTGGAATGTCCAAAGTTTAACATCGCCTTGGCTTGGGTCATCTTTAAAGTCAAGTTTTAAGGTAACAATGCCTTGTTCACCATTGATAATTGGAAATTCTGCGTCAGGAGAGAAAGCAACATCTGGAGCTGGTTGATGCTTTAAGTAATAGTCAATTCCAACCCAGTTAGTTTCTTCATTAGTTCCAACTACGAAGTCAATTTTCTTTTTAGGTTTAAAGCCATGCTCTTTTAAAATCAGCATGCCATAGTAAGCTGCTAAGGATGGGCCTTTATCATCAGCACTACCACGCCCATAAATTTTGCCATCTTTAATAGTCATCTTAAATGGATCAGTTTTCCAGCCTTCGCCAGCAGGAACGACATCCATGTGGCCGATAATACCAACGCGTTTGTCGCCAGAACCCATGTTGATTCGACCAGCATAGTTATCAAAGTTTTCAGTATCAAAGCCGTCACGTTTAGCAAATGATAAAAACTTCTTCATAGCCTTCACTGGGCCAGGACCTACAGGATATTCTTTGCTAGTATTGTTTAAATCTTCAGAAGAATCAATTGCAATCAATTCTTTTAAATCATTTAAAATAGCATCTTTCTTTTCTTGTGCTAATTTTTTATAATTTAGTTCTTCCATAACGTACTCCTTTAATTTTTATCTCTGATTAAATATTACAGCATTTACATCTGAATAAGTCGACCAAAAGCTTTTATTTGGGACTTTTTCAACTGGTAAATAAGGAAATAGGGCAACGTTTGTTTGACGAAAAGCAGGATCGAGTTTTCGATAATTCTTATAGGCAAATGTGTTTTTATTAAATAAGCCATAATATAAGACTTTAGACGACTTAACCCAGCCATCATCAGTAAGAAGCCATAAATCTTTAACTCCGCCACTTACACCTAGTATTTTATAGTGTTTTCTTGGCATAAGTTGATTCTTTTTACCAGATTGCTTATCAGGTTTATTATAAGTGAAAATTGGATTTTGAGTAGGGTTAATAATACTTAAATTCCAATATGGTAAAATCCATTCAGGTTGTAGGTCCAAACTAACAAGGTCAGCGGGAACAAATTCCTTATCGCCAATTTGGTAAAAAGTCTGTCCTGAAGCAATAACAGCTGAAGAAACATGAACTGGGTCTAAAAAATTAAGTTCTTTTGATGTTGTCACCTTTAGATATGGATTCTCACTAATTGTAGTAGGTACACGGCCAAAGTAAATTCCGTCCTTGTTAACGACATATTTTTTAGTGCCAGTTTTAGCTAATTTGTATTTATAGCCTCTTGCGGGGAAATTAGTGACAACTCCATCAACATCATTGTTGATTAACCAATTCCATAACTTAGGCGACTCATTCATTTCGGCCCAAACAAATACCTTTTGATGTAGCGCGTGAAGTTGACTAATTAGCTTGGGATTTTGGGTAACTAGATCAGATGAGATATTGATTCCGTTTACGTAACTTAGTACTTCAAAATTAATTCTTTGCAAAGAACCAACAATAAAGATTCTTGGAACATCTGGTAACAGTTGACTCATAGTTTTTAAACTTGGTGCAGAAAAGCTGTGAATCATTACTCGATCTTGCATATGGTATTTCTTGATACTGGCAGCAAGGAGTTCTTCCATATTTTTAGGACTATTATGCTTAGTTTTTTTTGTTTCAAGCAAAAATTTAGTGTTAGGTTTATCTTTGTAGTAATCAAATAATTGATCTAAAGAAATAATTGATTCACCATTAGCTTGTTTTAAGGTGTTTAAATAGGAAAAATTATTTTGTGATACAATAACCGATGATCCAACAACTCTGCTTAGATCGCGGTCATGAGAAACAACTAAGACGTTATCTTTGGAAACATGCAGGTCGAGCTCTACATAGTCCGCGCCATCTGCAAAAGCTGAGTTATCACTTTGAATTGTTTCTTCTGGATATTTACTGGGATTACCGCGGTGACCAATGACTAGGAATCCCGATTCAAAAATAAAAATCAAACTAAAAAAAAGGGCTAGGAAATAAAGCCTGCTTGTTTTCATGTGTACACCTCTATTCTTTTTAACAATAGCATGGATGTATACTTTTTTCTATAATTAATATAGTAGTTAATAATCATTTTTAAGAAAGTAGATGCTGCAATATGGCTTATGAGCAACTAGATTTAATCGAAGAAGTTACTCGCAATGATGGATCTAAATATTATGAAATTTCAAATATTGATCAAAATGGTATCGCTGAATTAGCCGTAGATCGTGGATTGATAAAGAAGGTGCGGATACTTCAGCTAAATTTGGCTAGAACAAAAGCACTTCAATTATATGAAGAATATATTAATAAAACGTACCAACTAGAGACTTTAACCAATGAGGATGATTGGAAAGATCCGCAGTGGGTTGAGTGGGAAAAGCCTAAAGGAAAAGTTTTAGACGCCTTTAATACAGTATTAAAAGCCAATCATATTGGTTAGAAGAGAGTAAAGTATGGAAAAATTACGTATTGTTCATACCAATGACCTACATTCTCACTTTGAACAATTTCCTAAAATAAAAAGATATTTACATCAAGCACAAAATGACAAAAGTGTTGATCAAACTTTTACCTTCGATGCTGGAGACTTTATGGATCGTTCTCATCCCTTATCTGATGCAACTGAGGGCCAAGCTAATATTAAGCTGATGAATGAATTTAACTATGATGCGATCACAATTGGTAATAATGAAGGAATCTCTAATTCACATGCAGTTTTAGAAAAATTATTTGATCATGCGAATTTTCCAGTTATTTTAGCTAACTTACGTGAAGAAGATGAAAGTATGCCTAAATGGTGTACTTCATATAAGATTTTTGAAACTAAGAAGAAAACGAGAATTGCAGTTGTTGGCTTAACTGCTGCATATCCTATGACCTATGGTCCTAACCACTGGCATGTGAAGTTGATTTCTGATACTTTAGATAAGTTACTTCCTACAATTAACGGGCAATATGATATGTTAATTATCGTTAGTCATATTGGCTTAAGAATGGATCGGTATATTGCACATCATTATCCAGAAGTGGATTTGATTGTTGGTGGGCATAGTCATGATTATTTACCAAAGGGCGAGAAAGTAAATAAAACATGGATTACCCAAACTGGAAAATGGGGTCAGCATATCGGCGATATTGCGATTCAATTAAATGATGATCACCAGGTAGTGTCAATTGTACCAAATACTGTCAAAACCGCTGATTTACCAAGTTCGGAATCTGATCAAGCTAAAATCGATGCTTGGCGTAAGCAGGGTGAACAAATGCTGCAATCACGTCAAATTGCAAAGTTACCTGCTAAATTTAACGATGATAAAATGGCAGCAATTCAAGTTTCTCTAGATGCAATTAGTGATTTTGCTGGAACTGATATTGCGGTTTTAAATAGTGGCTTATTTTTGACGCCATTTCATCAGGGAATTTTAACAGAAGCTGATTTACAAGCTGCTTTACCTCACCCAATGCATGTAGTGCAAACTAAATTATTTGGAAGTGATGTTTGGCGCATGGTAATGGAGATGGAAAAGAACCGTCATTTTCTAAGCAAGTTTGCCCTCAAAGGAATGAGTTTTAGAGGGAAAATTTTTGGCGATCTTGTTTATAAAGGCATTCAGGTTGATCATGCAAGTAGAACAGTCTACATTAACGGAAAAGAGATTGATCCTGAAAAAGAGTATACAATTGCCTTGCTTGATCATTATGTTTTAATTCCATTTTTCCCTACCATTTCAATTATGGGTAAAAACAAGTTTTTATTCCCTGATTATTTACGAACTGTAATTGGAAACTACTTAAAAGAGAAATATCCAATAGATAAATAGAAAGAAAAATAATGGAGTATAAAGAAGCAGATAAAGAACAACCGCTTTATCATCCTTTGGCTCATGTAATCGTTAATGAAGATAAGATAATGATTAATGGGCGAAAGTATGAAATTTTAGCTAATGTAAAAGATGCTCTAGATATTGAAATGCTAAAAGAGAAATATGATCCTTTCCTTGACCAGTACGATTATTTAGTAGGGGACATTTCAAGTGAGCATTTAAGATTAAAAGGATTTTACGATGAAAAAGATCGCGTTGCAATTGATAAAAAAGCTAATGCGATTGTGGATTATTTAGAGGAATATTGTAATCCTGGCAGTGCATATTTTGTTTTACGACTAGCTCAAGAAAATCAGATTAAAAAGGTTGCATTTGGCAGACAAAATAAGAGAAAAAATATGTCTAATAATCGCTATCATTCTAAACGTCCCTACTTTAAAGAAAGACGTGTCCACAAAACAAGAATTGGTGGCCATAAGACAGCTGTAAAATTTCAACGTGGAAAAGGAAGTCACAAGAATAAAGGCTTCGTAATTAAGAAAAGAAAGGGCTAATTGTGGAACATAAAGATTATAAATGTTACTTAATTGATTTGGATGGGACTATTTATCGCGGTAGCGATACGATCGAAAGTGGCGTTAGATTCATTCATCGTTTGCAAGAGAAAAATATTCCCCACTTATTTTTAACTAATAATTCTACTCGTACGCCGCGAATGGTAGTTGATAAATTGCGCGGACATGGGGTTAACACAGATATCTATCATATTTATACGCCGGTTTTAGCTACGGAGTCTTACTTGCTTGCTCAAAATCCAGATACTGCTAAGATACCAGTTTATATCATTGGACAGACAGGATTAGTGCAAGGTCTGTTAAAAAATGAACGTTTCTACTACGATGATCGAAATCCTAAATATGTTGTAGTTGGAATGGATACGGATTTAACTTATCATAAAATTCGCGTTGCTACTCGCTCAATTAGAAATGGCGCAACTTTTATTGGAACAAATGCTGATAAAAATTTGCCTTCTGGGGATGAGCTATTGCCTGGTAACGGTGCACTATGTACAATGCTAGAAGTAGCAACAGGTGTTAAGCCCATTTATATTGGTAAACCTTCATCAATTATTGTGGCTAGTGCTTTGAAGATGCTAAATGCTCAAGGCAGGGATGCAATTTTGGTTGGGGATAACTATGATACTGATATTATGGCAGGCATTAACTGTAATATTGACTCTTTGTTAACTTTGACCGGTGTAACTACTAAGAAGCAATTGGCAGAAAGAGATAAGCAGCCAACTTACGTTGTAGAAAATCTGGATGAGTGGAAACTATGAAGAAGAAACCAATCGTCGCCGTTGTTTACAATTGTTTGATGGCTCTTGCTACTGCTACTTTAGGTGCAGTAGTTTTAAGTTGGCCATTATTAGCAGTTTTTGTAAAAATTCAAAAAACGAATTTGATCGTAAAGACAACCTTAGCAAAACTATACACAACTCTTAGATTATTTATTGTCGCCATTTAATGATAGACTTCAGATGTCAAATTTCCCGACTTCTCCAAGTGCAGCGCGACATTTTTATGAGTGTAAGTTACTATTTGAATTAGCGATAATTGTTTTTGTAGTTGGATTAATTATTCTTATTTTCTTAAAAATGAGAAAAAGAATGAATTATATTTATATTTCGAGAACAACAGCGCTAATTTTCATGATTTTGCCTGTGATTATCTTACCGTTTGCTTTAATGAATTTTGATGAATTTTTTGTAGCATTTCATCATCTATTGTTCAACAATGGGGATTGGCTATTTAACCCTGCGACAGACCCAATTATTAATGTATTAACCGAAGAGTTTTTTGCTGGTTGCTTTGCAACTGGCGGAATAATTTATGAGTTATATTTTTCTTGTTTTATTTTGTCAAAAAAATAAAGGAGCTTTAACGAAGCTCCTTTTTTAATACTAATTTTTTCTTTAACCATATAATCAAGACTGGTAAAACTGAAATTAAAATGATTGCAATCACGATCAATGAGAAATGTTCCTGGACGATTGGAATATTTCCAAAGAAGAATCCTAATGCGGAAAATAAGGTTACCCAGAGAAAACCACCAATGAAGTTATAAATGATGAAAGTTCGATAATGCATCTTACTTGCACCTGATACAAATGGAACGAATGTACGAATAAATGGAATAAATCTTCCGATTACAATTGTAATACCGCCGTGTCGATCGAAGAAGTTTTCAGCGGCTTTTCGTTTATCTTGATTAATTAACTTGTTGAACCAACTGTGTTTAGTCCCTTCAGCTTGTGACCAATGTCCGATTTCATAGTTAATCGTATCACCTAAAACAGCAGCTGCTATGAAGATTAAATAGCAAACCCAGAAGTGAAGATTATATTTGGGCGTTGCTGCCATAGCACAACATGCAAATATAAGTGAATCTCCAGGTAGAAATGGAAAGATAACCAGACCTGTTTCGATAAAAATAATTGCAAATAAGATTAAGTAAGTCCAGTTTCCAAACATATTCACAATGGTGACTAAATGGTCATCAATATGAAGAATAAAATCGATAAGTCCCATAAATAAAAATAGATTCCTTTCACAAATTAAATACTGGTAGAATGAAAAGTCATTGTTTTTTCTGGAAATAGGTCTTGCATAATGTTGCTAATTGCAATTTGTGCCTCGCCAAATCCTAAAGCAATCATTGGCACTCTTCCAGGATAAGTAATAGCATCGCCAATTGCATATACATGAGGAAGGTTGGTATGCATTTTTTGAGAAACAGCGATTAGATTATGGTCTAATTTGATCCCCCATTTACGTAATTGTCGATTATCGCTCTTGAAGCCATATGCAACTAGGATTTCATCAACATTTTTAGTAACAAAATCTTGACTTGCACCAACTTTATGCAAAACTAATTCCATTCTATTATTGTTTAACTGCAGATCTTTTGGAAGATAAGGAGTTAAAAGTTCAACATTTTTTAAACTTTTAAGCTTATTGACGGAGCTTTCAAGACCGCGAAATTCATTTCTCCGGTGAATTAAGAATACATCTGATGTGTTTGCTAATTCTAGTGCCCAGTCAAGTGCAGAATCTCCACCTCCAAGAATGGCTACTTTTTTGTTAGCAAAGATTTCAGGATGCTGCATTGAGTAATGGATATGTTGGTTTACGTCTAAAGTAGTTGATAAAGGAAGAGTTTTGGGCTTAAACGCGCCAAGTCCCGTAGCAATTAATAAACTTTTTACTTCATATTTTTTGTCAACTAGGATATTTTCATTTTCTAAAAAAGAAATCTCTTTTACTTTATGAGAAAGAATGAATTTAGTATTGATGTTTTGCTCAAGTAGCTGAGAAACTAGCTTTTTACCTTTAATAGAAGGAAAAGCGGGAATATCCTTAATATTTTTTTGTGGATAAAGCATATTAATTTGACCGCCAATTTCGTCTAATGCATCAAAAGAAATTGTCTTTAGTCCATGTAAATTAGCAAAAGAGGCTGCAAAAAGTCCAATAGGACCTGCACCAATAATAGCTAAATCATATTTTTCCATCTTTATAAGAAATCCTCCATAAAAATTAAGCATAGAAGTCATGTCTTTAGCGTTAATTAAAAAGACAGTTATAATTAATATTAGTAAGGGATGAGATAGTATGCAAGTTAAAATTGGCGATATAGTTCATGGAAAAATTAGTGGGATACAACAATATGGGATTTTTGTAAGGCTAGATAGCAAAGTAGAAGGCTTGATCCATATCTCTGAAATACATGGTGGATATGTTAAAGATATCGGAAGAGAGTATCAAGTTGGTGAAACAATAAAGGTACAAGTAATTGATATAGATCCATATTCTAATCAGATTAGTTTATCTAGAAGGGCAGTCCTTCCTGAAGTAAAAGAAGCAAGAAAAAAGCGAGTCCATTTTTGGACTTCAAAACGAGTTAAGAAAGGTTTTACTCCTTTAAAAGAAGTATTGAATACTCAAATTAAAGAAGCAAAAAGTAGATATTCTAAATAAAGAGTAAATAAACGTGTTAAGGAGTAGTGCTTAACACAGTTTTTTTATATCCGAATTTT +>1_2#NODE_6_length_39999_cov_63.2183_ID_11 +GGAGGTCACTGAGCTGGGGACGTTTTCCGTCAATACCCTCACGACTTTCCACACCACGGTTCAGCTGGCTCAGTGCGATGATGGGGATGTTCAGTTCTTTGGCTAACCCCTTCAGTGAACGTGAAATGGTACTGACCTCTTCCTGACGGCTACCGAATGACATTCCGCTGGCATTCATTAACTGAAGGTAGTCAATGATAATGATTTTTACTCCATGCTCGCGTACAAGCCGTCGTGCTTTGGTGCGTAATTCAAAGACGGACAAGGATGGTGTGTCATCCACATACATCGGAGCATCGTATAATTCTTTTATTTTATAATCCAATTGTCCCCATTCATACGGAGCCAACTGACCACTCTTGATTTTCTCACCCGGAATTTCGCACACGTTGACAATCATACGGTTGACCAACTGTACGTTGGACATTTCAAGAGAGAACAAAGCCACAGGGATTTTCGCATTTACCGCCATGTTCTTTGCCATGGAAAGCACAAAAGCCGTTTTACCCATAGCGGGGCGGGCGGCAATAATCACCAGGTCGGAATTTTGCCAGCCCGAGGTCATTTTGTCCAATGCATGGAAACCGCTTTCCAGACCACTCAGCCCGTCCGTTCGTGCCGCCGCTTTTTGAAGCATCTCATACGCTTCCTGAATGACAGGGTTGATTTGCGTATAATCCTTCTTCATGTTCTGTTGCGAGATTTCGAACAGTTTTCCTTCCGCTTCCTGCATCAGGTCGTCCACGTCTTGGGTTTCGTCGAAAGCCTTTGTCTGGATGTTGCTGGTAAAGGTAATCAGTTCGCGTGCAAGAAACTTCTGCGCGATGATACGTGCGTGATATTCTATGTGTGCGGAAGAGGCTACCTTGCCGCTTAGCTGTGTTATGTAAAAAGGACCGCCTGCATCTTCCAGTTCACCGGTGCTGCGTAATTGTTCGGCCACAGTCAAGATATCGACTGGTTGCTGGCGTAAAGCCAAAGAGGTAATAGCCGAATAAATAAGTTGGTGACGGCGTTCATAAAAAGATTCGGGACGAAGAATCTCACTTACTAATGAGTAAGCATCTTTTTCTATCATTAAGGCTCCCAATACAGCTTCCTCCAGCTCAGGTGCCTGGGGCTGTAAATGTCCGTATTCGTCAACAGGCTTTGCTTTCGAAGTTTTGGGAGTACGTGTCATTTTTCTCGTTTCTGCCATTGTCGTCTGTTTAAAGTAGGGCAAAGATAGAACTTTTTGCTGAGTTGGACACTCGTCCATTCCCAAAAATAATAAAAGAAATATGAACCTGAATCTACGGAGAAATGAACAATCTGGACAGTAAATGGTTTTTTTATATGGTAAAATGAAAAGTTTGTTCTATATTTGGCGCTCTGTCCGGATAAAAGATGGCGTGCATCTTTGTAGAACGGGCAACAATCTAACGAAACACACTAAAAATAACTATTTAAAGAACAGAATATGATTACATTTCCTAATGCAAAGATTAATTTGGGACTTAATATTGTAGAAAAGCGCCCGGATGGATACCACAATCTGGAAACCATATTTTATCCGATAAACCTGCAAGACGCTCTGGAAGTGACGCGGCGGGAAAACAATGACAAAGAATACACTTTACACATAAGCGGATCTCCTTTGGAGGGTGAACCCGAAGATAATCTGGTAGTAAAAGCCTATAAGTTATTAAAAAAAGACTATCCCGGACTGTTGCCGGTAGATATACACATGTACAAGCATATACCGGCGGGTGCCGGACTAGGTGGCGGATCATCAGATGCCGCTTGTATGATAAAGCTTCTGAATGATAAATTTTCATTGGGACTCAGTACGGAACGGATGGAGGAATATGCGGTAAAACTAGGGGCAGACTGTGCTTTTTTCATTCGGAACAAACCTGTTTTCGCTACCGGAATAGGGAATCTGTTCGAGCCTGTGGAATTGTCATTAAAGGGATACCATATAATTTTGATTAAACCTGATATCTTTGTGTCCACCCGGGATGCGTTTGCCGAAATAAAGCCTGTGCGTCCGGCTGTTTCTTTGAAAGAAATTGTAAGGCAACCTATGGAAACATGGAAAAACAGTATGAAGAATGATTTTGAAGATAGTGTCTTCAAAAAGTTCCCTGAGATTGCTGCTATCAAAGATGAATTGTATGATTTGGGGGCAGTGTATGCCGCCATGAGTGGCTCGGGGTCTTCCGTCTATGGTATTTTCAAAGCACCGATAGAGAATGTGGAAGATAAATTCTGTGGATGCTTTTGCAGACAGAGAGCGTTGGAATAGTGTGGCAGATCAGTGGTTAGTGATTAGTGGACTGCGCTGTCATGCCGCAGGTATTAACCACTAACCACTATAGTCTATCTTTTACATAGTGCCTTGAAATCACAATAGGTACATTTTTCTATGATTTCTGTTTGGGTAAATGATTTTTCCGGATTAAATATCTCTTCTAATAATCCTTGAAGTCGTTCACGGTATTCTTTTTCATATTTACTGAAATCTTCCACAGCCTCTTTGGGCTTTCGTGGCTCGCCCATTTGTATAACGGGAGAATAGGTTTCTGTAGCGGCCCGATGAATATAGAGAAGGGCCGGAGCTATCTTCATCGTTGGTTGCTTGCGGCACATGATGGCGGCATACAGGAACGTCTGGAATACATAGTTGGAACGTTTCTTGTCCGGAATAAACAGAGATTCCACATGGGGTGGGGTGTCTGCATCACCGCCTGTCTTGTAGTCCACAATACGGAGTGTGCCATCCTTGCTGTCCATACGGTCAATAATTCCCCCGATACGTGATTTTATGACTCCTTTGGGTGTTTGTATGTCGATGGGTTCATCCACTTCCATTTCCGATGCAATAAAAGTGAAGGGAGCGTAACGTAAATCATTTTGTAAAAGTTGTTTTAAATATCTTGCAATAACGGCAGAGTTGATAAGTTGTATGCCGTTATATTCCGGTTTCTCGTTTTGCGGAACATTAAAGAATAATTTCTTGAATGCTGTATCCACATAATCTTGCAGCTTCACTTCATTGCGTAGCAACGTTTCCAATGCTTCCTTATTAATGACTTTGCCATGAGTGGTCAGGTCTTTATAAATATGTTCGGCTGCATAATGGAAAATACTTCCGAATGTGGCAGAATCTATTTCCGCACTCACTTCATCGGGGGCGGAGAGGCCGGCTACATACCTATAATAAAATTTTAAAGGACAATCCAGGTAATAATTCAAGGCTGATGGAGAAAATTTTGCTTTGGGATTGGCACGGACATCAAACAGGCTTTGCATCCGGCGCATGACATCCGGGGTCTTTTCTACAGTGATAGGCGAGGTGCCTTGGGGGGACTGTCCGGCCTCCAGAAACTGGCGTGTGATGGGGTGAGGCCATTCAATAAGGAATTGCAGCATGAAACGGCTCCATTCGCCGCGGTTCAGTCCGTCCGAACTGGTATTGTACATCAAGGTAATCCTTTCTGCCCGTTGCAGCAGACGATAGAAATAGTATGCATATACGGCAATCTTATGCTCTATAGTAGTCATTCCGAAAGCTTTGCGCAGGTTGTAAGGAATAAAGGAAGAGTCTCCTCCCGATTTGGGAAGCTGCCCTTCGTTGACCGATAGTAATACCAAGTGGCGGAAATCCAGGTTACGTGTTTCCAATACCCCCATGACCTGCATACCAATGGCCGGTTCGCCATGGAATGGAATATTGGTCGCTGACAAGACCTTGACCAGCAATCGCCGGAATGTTTCGCTCTGTACGGTTAATTCATCTTCTTCTATCAGTGTGCGGAACCGGTTGATGGTGGTATAGGCTTTGAAAAGCGACTCCCGGTAAAGTTGGTTGAATGCATCTGTGTCCTCGGTACCGGAAGTGTTTGCCTGGTAAATGCCTGCCACTTGCTGCAAAGTTTCGGACAGGCGTATGCACAGATTCAGGTTTCCGGAAAGCGGAGTGAAAAGCCGGGTAAGGAATTCATCTTTACCCAATTCGCCGGGTAACGGATAGAAACGGTTGTTCCGGGTCAGTTCTTTTTCCAACAATTCGGCTTGACCGGTGAGTTGCCGGGTATAGGGGTGTTTTAACAGTGTAACAACCGATTGAAACGTGTAGCGGCCGCTTTTGAAATTAAATCCGTGGGTATGCAATTCCAGTAAGGCGATGAGGAAACTATATACAGGAGTTTGCGATAACGGAAATCCCATGGTGATATTGACATGTTTTACTTCTGCCGGCAATGAATGGAGGACCGGCTGAAGCAATGCTTCGTTACACAATACGACTGCGGTTTCCTTTTCCGGTGTAGTCAGATTGTTTCGTATCCATTGAGGCAGGTAACGGGCTTGTGCATTTTCAGTAGAAGATGCGATGTAGTGTACCTCTTTAGGTTTCGAGAGATTTTTGAATAACTCACCGGACAAAGGAGAAGGAAAGTCGCGTAAGTTCCGGCGGATAAACTCTCCGGCTTCATGGGTGACGGCTTGTCTGTTTTCCTTCATATAAAATTCGTCGTAATCCCAATAAAAAACAGCTTTCCCGGCATCTTTCAATTGGGTGAACAACGTATGTTCTACTTTGTTCAGTACATTAAATCCTACAAAAACATATTTCTCGTATGGAAGTTTGTCCACATTCAGATGCTCTATGACGTGGCGGTACATCATTCCTTCGTAGGCAATGTTTTGAGAAGCTAAGGATTCACGGAACCCTTTGTAGATGTTTCCCAATACGTCCCATAAAGAGATAAACCGTTCTTTCAGCGCTGTTCTCCGTTCGATGGAGAAGTTCTGGAAGAACTGCCGGATAGCTTCTTCCTGCTCATCGTCAATGAAGGTGTAGTCGTCCATTATGTTGCGCAGATCCTGTAAATTGGAAAACAGTTTGTCGGTATCCACTTTATTTTTGTCGGCATCGTCAAAATCACTGATCAGCATTTCTCCCCAAAAATAGAAATCATCCAGTGTTTCCGTACTTTGAGTTTCCCGTCGGAATATCTTATAAAGTTCGCATACCAGTTTTACCGGATCACCCACTTCCCATGGAGAAAGGCTACGGAACAATTCGCTGATGCTGACGTATGCGGGCGACCAAATAGGTGAATCCGACTCTTGGGCGAGATACTCATTAAAGAAAAGTCCGGCACGTTTGTTAGGAAAAACCACGGCTGTATGTGCCAGGTTTCCTTCCGTATGTTTATATAAATCGGCTGCTACCAGTTTCAAAAAGCTTTCCATTATTCTATTTCCTCCAGTTCGTTGTTAAATACATACCATAAATAGCCACGGATATGCTCATATCCCATATCCGATAATAAATCCATATATTCTTTCACCTGCTTGTTGTAAGCTTTCCGCTTTTTACCGAATTTAAAGTCCACCACAATCACCTCTCCGTCTTTCATCATCACGCGGTCCGGACGGCGGGTTTGCAGCACTCCTTTTTCCCGATAAATAATGGCACACTCATTGTATAGTTCCCAGCGGCCCGAATACCATTCTTTGACCAGCGGATGCTTCAAAGCCCATTCGGTGAGTTTTCGTATTTGTTCTTCCTGTTGTGCCGACTCTATGATTCCTTCGAAACGCAGGCGTTCTATGGCGGGAGGCACATCGTCCGTGGTACGGATTACGGAGAACAGATTATGCAACAACTGTCCTTGCCGGATGTATTTGTCTTCCGCTTCCTCTTCTCCCCGGATAAATTCGGCAGAACGGTTGGACTGCTTGAATTCGATGTTGGTTTCCAGCGTTTCCAGATGGATAGGCAAACGTCGGGCTACAGTCAGCAGCTTGTTGCCGGATACTTGTTTTTCCTCTTCGTGAGATGAGAGATAGAGCGTTCCCAGTTCGTATATTTCTTCTCCGGTGGCATAGCTCTTTCCGGTCATGTCGGATAAGGCACTTTCCAACAATTCGGATACCGTTCCTTTCTGTTCCGCCTTTCCATATATAATCAAGTTCTTTTTGGCACGGGTAAATGCCACATAAAGCAGGTTCAGATTATCCACCCATAATTGCAAGCGTTCATTCAGGAACTCTTCCCGATAAATGGATTGTTGCATGGCTGTGGAATAGTTGATGGGAACAATGTCAAGATCGCTAAACGGAGCTTGCCGGGGAGCACACCATACCAGATGATTGTAAGTCTCATTCTCCATTTTCCAGTCGCAGAAAGGTAGCAGTACGGTATGGTATTCCAATCCTTTGGACTTGTGGATGGAGAGAATGCGAATTCCTTCCACCTCGCCGGAAGGGATGGTCTTGCTGCCTAGTTTTTCTTCCCAGTAGGTGATAAAAGCGGATAATTCGGAAGAATTGCTTTGCAGATATTCCGTCACGGCATCAAAGAAGGCGCACAGATAGGCATCCTGTTGTTCGATGCAGGACATCTGGAACAGATTGAAAAGTTTTTCCATCAGCTCATACAGTGGCATGAGGCGAAGTTGCTCCGCTTCTTTTATGAAATCGAATGGCAGATAATCATCTATCTCGTTCAGCAATAGAGTGTTGAGGTCTATTCCTTTATGTAATACTTCATTCTGATAGGCAGTAGCCAGTTGTGCCTTGGCAATCCGGTTTTCGGGTTGTGACAAGTAGCGCAGGCCATCCATTATCATACATACCGCCAGTGAGGCATCCAGTCGGAATGCCTCATCGGATACGATTTTGTAAGAGGTGTTCTTGTCGAAGTAATCGGCTATTAAAGGAATGCTTCTGTTCTTGCGCACCAGGATGGCAATATCTTTCAGTTGCACGCCCTGCGCCACAAGCAGCTCCACTTCTTCTCCTAGGTGGTGCAGGGTGTTTTCCATATAGGTCATGTCTTCCGTATCCGATAGAAATTCCACTTTGACATACCCTTTTCCGGGATCTTTGTATGTTTCCTGACACACGTCGTTGTAGGCTTCCTTCAGTTCCTTGCATTCCTTTTTTTGTTCTTCCTTGTAAATGTTGTTCAACACCTCGCAGGCGGCTGTAAACACTTCGTTGTTGAAGTGAATGATGTTTGCGGCGCTCCGGCGGTTGGTGGTCAGTGTCTTTACTTTGACGGGAAAGGCTTCGATGTTTGTTTTCAATCCGTTCAGTATGCCCCAGTCACCGTTTCTCCAGCGGTAGATGGATTGTTTCACGTCTCCTACAATCAAGCTGTCTGCTCCCTGAGACAAACCTTCGAGCAGCAACAGACGGAAATTGTCCCATTGCATACGCGAAGTGTCCTGAAATTCATCGATCATTACGTTGCGTATGGTTGTTCCTATCTTTTCGAATACAAACGAGGAATCTCCTTCTTTTACCAGGTTATGAAGCAACGCGTTGGTATCCGACAGCAGGAAGCGGTTGTTTTCATGGTTCAATTCGCGTACCTCTTCGTCTATATTGGCCAGCAGACGGATATTGTTTACGTGGCGCAGGGAGAGTTGGCAGGAATTGGCCAGCATATTGTTCCGGCTTCTGTATTTTTCCGATTCATTCAGCAGAGGAATCAGTTCTTTTTCTGCCAGACCTTGGATAGCGTTGCGTATGGGGGAGGTTTTTTTCACCCATTCGTCGGGGCAGTCCAGACATTTTTCCACTGTTACATTGCGGACACTGTCATCCAGTTTACCCGATTGTAATTTATTAAAATAGCTGGCTATACCTCGGGAGCCGTTTTTCAAATCTTCTACTTTCACCCCGTTTGTATCCAATATTCCGAAGAATTGGTCGGCAAATCCTTTCATTTGTTCCTGCACCTCTTCCAAAATGGCTTGCAGCGTTTCGCGGTAATTCTTGATACAGTCTTTGTCCCGTAATTTTTCCCGTAGTCCGTTTCCTTTTTCTATATATCCTTCATCGAAGATATTTCGTCCGAAATTCTTTATCTCACCGGAAACATTCCACCTCTTGTCATCGGCTATTCTTTCTTCTATGTATTCCAACAGCCAATAGAGTACGGGAGATTGTCGGTCCAGCTTTTCTATCATGGAGTCCACGGCATCACTCAGTACCTCCATATTGTTCAGTTCGATATTCAGATTGGCACCCAATTCCAGCTCCCGTGCCAAATTGCGCATCACCGACTGGAAGAATGAGTCAATGGTTTCCACCCGGAAGCGGCTGTAATCGTGTATCATGTAGTGCAGTGCCGTACCGGCGGCTGTCCTTATGTCTTCTTGGGGCCTTTCCAGTTCTTCTGTGATTTTCTGTAAATAGGGATCAGAATCCTTATCCTTTATCCATATTCCGTATAGCTGGCTCAGAATCCGCTCTTTCATTTCGGTAGTGGCCTTGTTGGTAAAAGTCACAGCCAGTATATTACGGTATGCCCGTGGGTTCTGTATCAGCAGTTTTATATATTCCACGGCCAGTGTGAATGTTTTTCCCGAACCGGCAGAGGCTTTATAAACTAATAATTCAGGAGAATGTTTCATTTTAAATAATTTGCTAATTTGCTAATGGCTGCGCTGTGTACCGGATGGTAATTGGCTGACACATTAGTACATTTTTATTTTTAATTAAAAACCAGCACAAAAACAGTGGCCTGTCCCTTTTCCTTGTAAGGCAGCAGGGTCAGTGTGCCACCGCTCAGTCTCATGATTTGTTTGCTCAGGCTCAGTCCGATGCCGCTTCCTTCCTCTTTGGTCGTGAAGAAAGGTACAAAAATTTGTCCGGCTGCATCGGGAGGAATGGCGGGACCGTTGTTGGCTATCTCTATACGGATACTTTCTTGCGGATCACAATAAGCTTTCAAGGTGATCTTTCCGTCGGGAGCATTACCTATGGCTTGAATCGCATTTTTCAGCAGATTCGTAACCACTTGGGCTATCAGGTTCTCGTCGGCAAAGACTATCAGGTCTTCCCGGGCTTCGAGGATGGAAAGGGTGATGTTGGGGCATGGATGCTGGTGCTGTGCCAGTCGGATCATCCGTTCCAGGAAAGGACGGACATAGAACAGCGATGGCTCAGGTGATGGCAGTCGTGTCAGTTTGCGATAAGACATGACAAAATTTATCAATCCTTTACCTGTAGAGTGAATCGTTTCCAGCCCTTGTTTCATTTCTTCATTTTCTGCTCCGGGAAGAGCCAGCAGTGTTTCGCTGAGGGAAGTCACCGGGGTCAGGGAGTTCATAATTTCGTGTGTCAGCACGCGGGTAAGGCGTATCCATGAATCTATTTCCCGTTCATCCAATTCCCGATTGATATCACTCAAGGCAATAATGCGGAGTTCCTCGTCTTTGATACGGATGCCGGACACCCGTAAGGCAAGATGCACCGTTCCCCGTTCCGTATTGAACTGAATTTGCAGCTTGTCACCCGGAACTGCTTTTTCCAAAGCAGTCATCAGTTCATCCGAGATGCGGGAGAGTTGTTTGACATGAGTCAGAACATCCAGCCCCAACAGATTCATGGCCTCTTTGTTCTTTTGATAAACGGAGCCTTTGCTGTTCAGTACCACTATGCCGGTTTCTACAAAGTCCATGATGAGCTCATAATACTTCTCGCGTTGTGCCGTTTCCTGTTTTACATTGTACAAAATACGTGCGATACGGTTCAGCATGACATTGACCATGGAAGAATCGCCAGGGGAGGCATGTTCGTAAAAACGAACAGCGGGGTCGTCGTTTTCGATGGCGTCGAGCAGAAAGGCTATCTTCCGGGTATTAAAAGTGTACAGGCGGTAAAACCAGCCTATGGAAACGAGGAATGAAGGTACGATTACGCATAGCCACAACCAGTTGCGCTGCTGGATTAGTAAGGTGCAGGCAATGGCTAGAATCAGTACGGTAAATAGGCGGAAGAAGAATTGTTTCACAGCTTAATTATTTATTATTTCGTTTGAATGTTCCTTTAGTTCCTGTTTAAAAATAGATTATTGTTATTTGTTTTGTCATCCTGAACCATCGTGAAGAATGAGATGAGATAATAGGTCATAATCCGAATTTTTTCATCTTATTATAAAGCGTCTGCCGGGTAATTCCCAACTGTGCCGCTACAGCCGAGAGGTTTCCGTTGCATTTCTCTATCGCTTTCTGAATCATTTGCAATTCCATCTCCTCCAGAGTTGAGACACTTGTTTCTGTCGCAGGAGCCGCTATTTTTCGGGGAAAATGAAAATGTTCCTCACTCAATATACCATCTTCATTGATAATGACCGCTTTCTCTACGGCATGTTCCAGTTCGCGTATGTTTCCATACCACGGATATGTCCGTAACTTTTCTTGCGCACCGGTAGAAAGAAGGATATTTCCTTTGTCATATTGTTTGCAGAACCGGTCTATAAAGCGTTCGGCCAAAGGGATAATATCTTCTTTGCGTTCCCGCAGAGGGGGAATCTCGATGTGGATGGTATTGATGCGGTAAAGCAAATCTTCGCGGAATTTGCCTTTGGCCACCATCTCTTCCAAATTACAATTGGTGGCGCAAATCAAACGTATGTTGACAGGTATCGGTTCGTTGCTTCCTACTCGCACCACGCTCCGGCTCTGAATGGCGGTGAGTAGTTTGGATTGCAGGTGATAGGGCAGGTTGCCTATTTCGTCCAGAAACAAAGTCCCTTCATGGGCGGCTTCAAATTTACCTGCACGGTCGGTATGGGCATCGGTAAACGAGCCTTTTTTATGACCGAACAGTTCGCTTTCGAACAGGGATTCGGTGATGGCACCCATGTCCACTGTTATCATATCCCGCCGGTATCGGTTGGACAAGGCGTGTATTTCCCGCGCCAGCATTTCTTTTCCTGTGCCGTTCTCTCCTGTAATCAGAATATTGGCATCCGTTTGCGCCACTTTCTCTATCAGCGTCCGCAGTTGCTTCATGGCATTGCTTTCTCCCCAATACATAGAAGATACGGTTTTAGGGACTTCTGCCCTCTTCTTGTTCTTTGAGGAATTGCGGCAGGCAGACAGCAAGGTTTCTATCAACCGGGTATTGTCCCAGGGTTTGACGATGAAGTCTGTCGCCCCTTCCTTGATGCCGCGCACTGCCAAGTCTATGTCAGCGTATGCTGTGAAAAGTACTACGGGCAGGGAGGGGTGTATTTTCTTTATCTCGTGTAACCAGTACAGACCTTCATTCCCGGTATTCAGTCCGCTGCTGAAATTCATATCCAGCAGCACCACTTGCGCATTTTCTTCACGTAACGCAGCGGGCAGTGTGATGGGTGAGGGCAGTGTGACGATATGTTCGAAATGATTTTTCAGCAATAGCTTTACTGCCGACAGCACTCCTTTGTTATCATCTACCACAATGATGGTTCCTTGTTTGCTCATAAGTCGTTGTAGCTTCTTTTTTCTTTAAAGATATCTTTAGTTGTACGTTTCATGCAAAGGTACTATCTCTTCACACACTCTCATTTCCATTCCTTTATAGACCACTACTATTTTGTGTAGCTGTGTGGTGCCCACAGCATTCTTCACCCGGTCGGAATCAGCATAACGATTTGCCTGAGCGATCCCTTCCAGCCTCAGTTCTTCCACACGACTTTCAGAATCTTTGTATTTGGCATATTTCAACTCTATGATATAACTGTGCTTCATGTCCGGATAGATTTCCAGCAAAGGACACAGGAAAAGATCAGCGTATCCTTCTTGTGTATCCGCTTCAGAAATAGGGCGGTAGAAGCGGTTTTGCGCTGTCATGGCCAGTGTGAATCCATGTACGAAAAACTCTCCTTTCTGTTTGTCACGTTGTGAAGCGTAACGTTTCAGACAGTCGGCTATGTAACCGAAGTAGGCCTGCCAGTTACCATCGTATGCCAAAGCCGAGGCCAGTTCGTCTTTCTCATGACTGCTGAAACTAAGGTCGGCCTCATTGTATGTATTCAGCAAATAGGTGTATAGTTGCTCTTCCACTACCTGATTGGGAATGGTGAGTTTGTTTTTTCCTTTATGCATTCCGCTGATGGTAAGCATACCGAAATAATAGAGCAGACTTATGAAATTGTCCGGATCGACAATATTCGCAGCCGGAAAGCTATCCTTCAGTTCTCCGGTGATATATCCTTGACTTACCAGGGTTTGTATGATGGATGCATCATGAGCAAACTCCTTGTCCTTGCGAATCAGCATCCGGAGTTTTTCGTAGTCTATGCGTATGTTGCTTTCTATCATTTTCTGTGGCGCTTTGCCGCGCAGAATATAGTTTTTGACAAAATAGAGAACCATGTTGGAATTGTACAGGGTAGTTTCTCCGTAGCATTCCGGTGCGAAGCAGTAGTTGTCATACCACGGCTTCATGATATCTATCAGTTGGCCGACGGTATGGTTGAAAGGACTGTTTGTGGAATAATACGTCAGCATCTCACGCACTTCCTTTTCTGTGAATCCTGTCATTTCGTTGAACTCTGGTGAGAGGGAGTAATTGGTACCTATATTGAATCCGCTGGTGAGATCATCCATGGTTACGGGGCTTACTCCGGTGATGAAGCAACGCTTGATGCAGGAATCTGTTCCGGCTTTCACTTTGTTGAAGAATGCGCGCAGGTAACCCTCTTTGTGTGTCTCTTCAGTGTATCGGTGCAGACTTTCGGCATCGGAGAGAATGGCATTGGTAAAGTGGTCATACTCATCGATAAAAAGATAAATTTTCTGCTCAGTCTTGTTGCACTCCGTAAACAGATATTCCAATTGTTCTACCGCTCCTGATTTTTCATCCAGCTTCTCTTTGATACCTTGTGGGAGATAGTCGGCATAGATATCGCAGAAATAGTCGAACATGGTCTGGCAGTGTGCGTCCAGTCCTTGACGGTAGTTGTGCAATTCCCCGCTGATACCGGAAAAATTGAGTTTCAGCACTAGATAGCTGTTGCGGTCCGGCGTGGGGTGCTTTCCTATGTAGAGGTCTCCGAACAGGGCGTCAAACTTATCGCGGGTGCGTACATCATAATAATGTTGCAGCATGCTCAAGGTCAGACTTTTGCCAAATCGCCGGGGACGGATGAAGAAGAAAAACCGGTCCGACTGTTCTATCAGTGGGATGAAGGCGGTTTTGTCCACATAATAATAATTGTCGAAACGGATGTCTGCAAAGTTCATCATACCGTATGGCAATCGTTTCCTGTTTGAAGGTGCATATTCCATTAGTTATATTATTTTTTTTATGAATAAACTCTGGTAAAAGAGATATGTATGGCATACAAAGATACTTTCTTTTCCCTGCTTCTCCAAAAAACAAAGGTTCTTTCTTTTTGGTAACATGGAGGAAGCTGTCCGATAGGTATCCGCCAATCTTTTCTTTCAGAGAAAACTTTTCAATTTCATGAATTGTTCTATTATAAAATAGCATTTCAAATGAAAACATTTATATTAAGCATAATTGTAACCATGATTACATCGGTAGCTGGTGCACAGCAAAAAAGTTTTTATGACTTTACGGTCAAGACTATTGACGGGAAGGACTTGCCACTTTCCACATTCAAGGGTAAGAAAGTATTGGTAGTCAATGTGGCATCTAAATGCGGTTTCACTCCCCAGTACGCCAAGTTGGAGGAGTTGTATGAGAAATATGGAAAAGATGATTTTGTCGTTATCGGATTTCCTGCCAATAATTTCCTGCATCAGGAACCCGGAACGAATGAAGAAATCAAGGAGTTTTGTACGTTGAATTATGGAGTGACTTTTCCTATGATGGCTAAAATATCTGTAAAGGGAAAAGATATAGCTCCCCTCTATCAATGGCTTACGCATAAAAGTGAAAACGGGGTTTCGGATGCTAAGATAGGGTGGAACTTTCATAAGTTTCTGATTGATGAAAATGGTAGATGGGTTGCCTCAATCGGCTCTACTACCAGTCCGCTTTCACAGGAAATTGTAGGATGGATTGAAGAATAAAATGACCAGAAGATAAATAATGCCCTATTGTTATTGCACCAGCTTTCCGTTTTTCAGTTCCGACATCAGCAGGTCATAGATGTATGCCGGGCTTTCGATATACAGGCCGGTGGAGAAGTCTTCCAGTTTGACGAATGTCTCCGAATTGTGGAATGTGGAGATGGCTTCCTGCAAATCCATATCATTACGTTCCACCAAGTAACGGACTACATCGGCGGATATTCCTTCAAAGAGGAATTTTGCGGTATTCGTGTTCATAGTCGTTTCAGCATTTTAATCGCTGCCTCTGAGTGAAAGAAATATTGTGAAGTCACTTTCGAAAATGTCAGTTCGTGCAATAGTTGTTCTTCGTTAATAAAATTCTCGGTGTATAAACGTAGCAAGCGCGCAATGGTATCATCGGCAACAGGGCCAATTACGATATCGTAATCGTGGCAAGGCTGAGTAGTGTTTATATCGCGGTTCGACATGACAAAACGTGCCCATTCCCAGTCTGGCTCCTTGAAAACCTTGACTTTCAATTCGCTCGACTGCGCTTCTTCGAGGTCAAACTCAAACATAGTCAGAGTGGGGTTTCCGCTGAACATCCGCGCTGTTCTTGCGGCCATCCTTTGGGCCTGTTCCCTAATGTCGGTCAGGTAAAATCCTCTGCCGAAATCCTTATAGGGGCGGCATCTGTTGAGGTCGATGCCGTCGATTGCCGTATTTGAACCGTGGTAAAGTATCATCGCAATGTTCCTCCATTGTTTTGGCAGACGATGGTCAGGTCATCCACACAGTCGTTAAAAGAAAGCGTGTGTTCCACATCGTAAAATTCGGTGAGAAAATCAATGCCTTTGAAACGTCTGATATAATTGCTTGCCTCCCGGATTGTCAAATCGTGACGGCGCGCAAATTCGACAATGACCGTCATCACATAATCGATTAGGTTCTTGTCCGCTTTCTTTTTGGACTTGAGCGCCACTATCGGTTCGACACCCAACGCCTCCGCTATGCGGTTGACAGTGCCAAAATTAACACACAGACCGTTTTCCACTTTGGAAACCATTGCTTTTTGGACGCCAATCATCACCCCGACTTCTTCTTGTGTCAATCCTTTGTTTTTTCGGCATTGGGCCAAAAGTTCTCCTATTTTTTGCAAATTCGTATTCATACCACAAAAATAGATAAAGTTTCTTGTGTAGGAAACTTTTTAACTACAAATATATTCTTTAGATAATTATTATCTTTCCAATTAAATGGCAGCTCTGCTCTGACACGCAAGAACCCGTGTGCCGGATGATGATTGTTAGGAAAACAGGAGGGAAATCAGTTCCTTTGTACATCATAGGTTATGGAACGGTGGTTCGAAAAGAGTTGTCGCTATTTTTTTGCTTATTTATTTGTGGGGATAAATATTTATCCCTATATTGTATTAGTAAAATTAAAAATATCATGGAGAATGATTATTAAAACTGAAAGTTATGGATACAATAAAAGACATTTGGTTTGATGCTAACCGCATTTATATGAAAACGGATGGTGGAGAAACTTTCAGCCGTCCCTTAGAGGCATTCCCTTTGCTGAAGGATGCAAGTGATAGAGAGCGCCTTGATTTCAAGATAGGAAAGTTTGGTGACGATGTTCGTTGGGAATCATTGGATGAGGATATTCATATATCCAGTTTTTTCGATACAGCGGAGCCGGATTATGAAAATGAGATAGCAATGATATTTAAACGTTTCCCTCAACTAAATGTGTCAGAGGTTGCACGTAGTATGGGAATAAACAAGAGTCTTCTTTCCAAGTATATCTATGGTATTAAAAAGCCGAGTGATATAAGGAAGACGCAGATTAAAGAGGCTCTCCATCTTTGGGGAAAAGAATTACTTGCCGTTTGATTAATCTCTTAATAATAGTTTAGGTGTGATTCCATTTGGTTTCACGCCTTTTTTATGTTAATTTTTCCACAATCATAGTTTCAATAGAAAGATAATTAAATATAAGGAAGATATGGCTAGAATAAACAAATACGGCTATGTTGCTGCGTGACGTTTTTGCATCCGTCAGGGAGATGCGGTGAACAAGCCAAGCTCATACCAAGAACCGGGCAAGCATGTGATGCAGGCAGGGTAGGCGTGTACCGAGTACCAGGTAGCCTTGTTTGCTGTATCTTATATCTTACCATCTGGGCGATGATGCGCCCGGAAGAATTTGCATCAAGGTAAGTATACCGATAATCTGTTTCTTGTTTGTTTTTTGAGTAATCAAGAAACATCACTAAAAATAGCGGAATATATTTTCGTCCTATTTATTAAATTGCTATCTTTGCCACGTTTTCCACGCCGGTACTTCCGCAAGGAAGTTAGGGAGGTGGGAAATATACATATATAGAAAAGACGTTTACTTGACGTTTTGTTCTTGGCTCTACAGAACTTCGCAAATTCTAAAATCCAGCCAAAACAAAGCAACAGTAGGTGTCTGCGGGGTATGTTTATATCCTTGGTGCGCCTTTGCCGGTCAAAGGTGCATTCAAGTGTCATTATACATATCGGCGTGGGCTTCTGCGTTGCTCGTTCAGACTGGATTGGGCAATGCGAAGGCCTTGTAGGGCGGGACGAGTAACAGGTACAGATTCCCGCGCTTTTTTTATATATAGGTATTATAAATTCTGATTTAACACATAAGAACCGTCCGGTTCGGGAATCTGCGGAAGGTCATCGGTTGACCCGACAGGCTGCATCTACATCGTGCGGATTCCCATCCTCCATTTGTTGAAATAGCTTTTCTTTGGTCAGCTTAATCACAAGAGGCCGATTCCTCTTGTATTTTACGAAACTTCTCGTAATTGATTGAATTGCAGATGTATATCTGTACTTCAAAGCTCCATAACTATGCTTTCAAGTGAAAAAATGCAATGGTTTGCCATGCGTGCCACTTATCGCCGGGGGATGCAGATCAAGGCTTTGTTGGATAAAGAAGGAATCAACAATTTCATTCCCATGCGTTACGAAGTTCGTATAAGGAACGGGTGCAAGAGGCGTGAGCTGGTTCCCGTTATCAGTGATCTGATATTTGTTCATTCCGTCCAGTCCGAACTTCAGAAGGTGAAGTTCAAACTTCCGTATTTTCAATATATGATTGATATCCGCAACGGGCAGAAAATCATTGTGCCCGATGATCAGATGAGGCAGTTTATCGCTGTGGCTGGAACGTATGACGAGCATCTTATCTTCTTCAGTCCCGATGAGGTGAACCTGCGTAAAGGTACGAAAGTCCGTATTACGGGTGGTGACTTCGAGGGCTACGAGGGGGTGTTTGTAAAGGTGAAAGGTGCGAGAGACCGTCGTGTGGTTATCAGTCTGCAAGGGGTGATAGCCATGGCCATGGCGACGCTTTCGCCGGACTTGATAGAAGTGATAGAAGAGCCGAAGAAGAAATAACTTCATTTGCTACGTTAAACATTCTTATTCCTTGATTAATGGGGAAGTAAACGATCTGATAATATAAATCTTATGTATAATAATGTATAGAACCACATTCATGCCGTCAGGCATCCGGCTCCAGCATCTGATTCAGAATCAGTATAAGGAAATTTTAGCCAGGGAAAAGAATAATGATAAATTCATTCATCTGTATGACATTGGTGCGTATTGGGTGGCTTTCGAGTGTTCCGCCTGTAGGTTGAGCGGTTTGTTTTCTAAAAGTGAGCTGACTTTGTTTCGTGTCCCGGATTGTGTGGAATATGTGGTGATGGTTTCTGTTCCTGCCGATGAGGCGGAGGGCTGTTTGGGTGAGTACATTATTTTGCACGATGGAATTTACCGGAAAGTGTGGTCGGAGCATGTTTTGCCGATGGGGGATTATCGTCATTGGCATGAGATGGCTGTGAGGTCTGTTTTGTTGTAAAGAAAAGTATGCGGACTTCGTGGACCAAGTAAGTGGTGTTACTTTGCTTGGGAACATATCTTACTTTGCTTACGAAGATATCTTACTTTTTTCAATGGAATATGGTCATTCGTCATGCTCTATAGGGAATACCCTGAAAAATCCTTCAAAAACTTCAGCCTGCTCGCAAACGCCCTACCCGTGGGCATTCCGGCTGAAGGATTGGGCTCAGGAATCCTTCAGCTGGGGGTTGGAATGTTTCAGCCGAACTGATCTTCCTCCGATTCTTAGGGAATCGTTTTTCCTGAAGGATATTTTCCGGCTGAAACATTTCACAATGCATATCCTTCAGCCGGAATGCCGACAGGCAGGGTGTTTGCGGAGTGGCTGAAAGATCTGAAGGAAAAAAAGGCGTTTCGGCATGTAGATACCGTTATGGAAAAAACAACAGGTCAAATTTTAAAAAGCTCTTGTTCTTTCAATGAAAACGTCCTGTTCTTTTTTTTAAAGACCTTGTTCTTTTCGGAAAAGAACAGGATCTTTTAAGGGTTAGTAACGTGTATGCCGGAAGGTTGGTAACGTGCCACCCCACACGTTACTAATGTGTGGGGTGTAAGATTAGTAATGTTGCAGGTGGAAGATTGCATATCTTGCTAGTGATACAATCATAAACGTATGAAGTTCCAGTATATAAAAGATAAAGTTAGTGCTTATTTCACTAAGGGGAATGAGCGTAGTGTTGCGGTGAAGAAAAATATCGCCGTTTCATTAGTGTTGAAATGTATAAGTATTTTAGTTTCCTTGCAGGTCGTTCCATTGACCATCGGTTATGTTAATCCTACGAAGTATGGCATTTGGCTGACGTTGAGTTCCATTATTGCCTGGCTGTCTTATTTTGATTTGGGGTTTGCCCACGGATTTCGTAATCGTTTTGCTGAGGCAAAAGCAAAAGGGGATATGAAGTTGGCAAAAGAATATGTCAGTACTACATATGCAGTGCTCTTCTTGCTCTTTTCCGTAATTTTATTGATAACCTTGGTGGTGAATAATTATTTGGATTGGAGTCGTATTTTAAATATTGATCCTGTTTATAAGGATGAGCTGAGTCTGGTTTTCGGTTTGTTGGCATGCTTCTTTTGCCTGAACATTGTGGCAAGTGTCTTTACTACCATGCTGACGGCAGATCAAAAGCCTGCTTTAGCTTCGTTGATTCAAACGGGTGGACAGGTATTGGCATTTGTCTGTATTTATATATTGACAAAGACGACTTCAGGAAGTTTGAGCGCGTTGGCTTTTGTCTTTTCGGGGGTACCTTGTATACTGGTAATCACTATTTCTGTTATTATGTTTCATGATAAAAGATACAGTTTAGTAGCTCCTTCATTACTAAGTGTCCGTTTAGTGTTGACACGGAATATTTTGGGACTGGGTGGACAATTTTTTGTCATTATGGTTTCTACATTGTTCATTTTTCAATTCGTCAATATCATTTTATCTAGAACACAAGGACCGGAGGTCGTAACCCAATACAATATTGCCTATAGGTATTTTAATGTGGTAAATATGATATTTATCATTATCCTTAATATTTTTTGGTCTGCATTTACAGATGCATATATTAAAAAAGATAGTGGATGGATGAAGAGAATCATTAAAAAATTGGATACATTATGGCTTTTATGTATTCCTGTACTAATTTTAATGATATTGTGTTCTGAAATTATTTTTCAATGGTGGATTGGTAATTCTGTATCTGTTCCTTTTTCATTATCTGTTTGCATAGCTGTATATGTTTTTCTTCAAACAGGAGGTAATATTTATATGTACCTTATTAATGGAACAAGTAAAGTGCGTATACAATTGATTGTCTATTTATTATTTGCTTTAACAGCTATACCGTTGATGACTTTTTTTGCGAAACGTTTTGGAGTGGAAGGTGTTTTGATAGTTCCTGCTGTAGTTTTTGGCTTACAAGCTTGTATTGGAAGAATACAGATCTTGAAGATTGTAAATGGTACAGCTAAAGGTATTTGGCTGAAGTGATGAGAATAATAATGTTTAAAATAAGATTTTCAATGAGACATTGTTTGATTATGACAGCTTATAAGGATGCAGAGATGATAAATTCAATTATAGATGAAACACCTGTAAGTTGGGGAGTCTATATACATATTGATGCAAAATCATCACTTTTATCTTCCATGATAAACAATAGAGCAATTGTTATAAAAAAATATCGTATATATTGGGGTGGAATAGAACATTTATATGCATTTATAGAGCTCATGAGTATGGCGTTAAATAGTGGGGAAAATTATGATTATTATCATTTAATAACAGGTCAGGATTATTATGCTATACCTCCGCTGCAATTTGATACTATATTAGGTGGTGATGGGATGAATTATCTTGACATATTTCCACTGCCAAGACAAGGGTGGTGGGGAGATGGCTTGGATATTTTAAGGTATAGAACATTTTCCTCAAGAACAGATATTCGTAAAGGTATATATCGAAAATTGGATTCATTATGGCGTATAACTCAGAAGATGTTAGGTTTGCAGCGATCACTTCCTTCATATTCTATATATGGGGGTAGTGTGTATTGTTCGTTGACAAAAAACGCTGTAAACGAGGTCGTTAATGGAGAGACTTCAGAAGACTTATTGCAAAGGCTTAAAAATACGACTTGTGGGGAGGAAGTGTATTTCCAAACGATTTTAATGAATTCAAACTTGAGAGATACGATTTTTAATAATCAGTTACGCTATATAGATTGGAATGTGAAAAATGCACCAGGAGTATTGATTGACGAGGATTTTGATAAAATTGTTAAGGGAAAAGCTCTCTTTTGTCGGAAACTCGATTCAACAGTATCCAAATCACTATTAATTAAACTGAAAAAATATATGAGTGATTTTTCCGTTAGATAAAACGATAGGAAACTTAATAGGTATAAGCTATTTTCAAGAAAGGTGTTGGTACAATTAGTAATCATTCTTATCTGTATTGGATGCAGTATACAGATATTGAGAAATAATGGAGATAAACGGTTGGTTTATTTTTTATGGGGAATAATGTTTTGTAATTATGGCGTAGAAGTAATTCCTCATTTAAGTGTGTTGCGTGTTCTGACCATTGCTTTTTATATATCAGTGTTTATGAGAAAGAGTAACAGGCTTATTTTATTGAATATTCCATTAAAAAGACAATTCATTCTACTTTTTATTGCATATTTATGTACTGGGCTATTTGATGATAGAGGGTGTAGTGTAGGATTGTATAAAGCCATTTTGGAGTATATGGAAACTTTTGGATTTATATTGTTAGGATATATAAGTTTTTATCATACAAACGATTTACATAAATTGCTGCATGCAATATTACAGATAGCTTTATGTGTATCTATTTATTCGATAATTACATATTTGATAGGAGCGGATATTGTAAATTCACAAATACCAGGAACAGATTTTAGTATGAATAGTGGAGCGAGAGTCAGAATACCCTCTTTTTATTACAGTTCTCACATAGCAGGAGCTGCTATATCATCAATGTTGATTATAATTATTTCTCCTGAAAATAACAAAAATATAAAACATAGATTATTACTTGCATTTTTGTTATTTATAGCATTGTTATTTACGGGTTCCAGATCTTCCATTTTAGCTTTATTTGTAGGTTTGATATTTGTATATTATTATTTTTATTCTTCAAAAGTGCAATCACACAAAGTATTATTACTTGTTCTAGGATTGGGTGTTGTTGGGGTGGTGGCATCTTTTATAAGCAGGATTCCTTTTCTTTCTGATATGTTTGAGGAGGGAGGGGGAGATACTGGCGGAAGTAATATTTTTATGAGATTGCAACAATTAGTATATAGCTATGAACTTTTCTTACAATCTCCATATTTTGGAAATGGCTTTAAATATTTTTGGGAAGTTGTAAAAGTTGATGATAGCTTTTTAAGTAGTATGCTTTTGGGAGCGGAAAGTTATGTATTTGTTTTATTGATAGAGAGAGGACTAATACAAATTGTTACTATCGTTTATTTCTTCTTCTCATTAATAAAAATTTTCATGCAATATAAAGACAAAATCTTTATCATAACGTGCATGGGTTTAACATGGGCTTTCCTTTTTAATTCAATAGTTACCGGAAATGGAGATAAATGGTCGTATATGATGATATATATAGGCTGTGGCTTATCGGTAATTAGAAATTGCAGAAAGAATATATCATGTACTAAAAAACAATTATAAGATAATGTTTTCTATTATTATTCCTCTATATAACAAGGCTGATTATATTGCAGAAACACTGAAGTCAGTTCTGAATCAAACATATTGTGATTATGAAGTAATCGTGGTGAATGATTCATCGACAGATAATAGTTTGGAAGTGGCTTCTAGTTTTCAGGATGAACGAATTCATATTTATACAAAAGAAAATGAAGGTGTATCAGCAGCACGTAACTATGGTATTATGCGTGCTAAATATGATTACATTGCTTTTCTGGATGCAGATGATATTTGGGAATCCGATTATTTGGAATGCCAAAAGAAGTTGATAGAAATCTATCCTAATGCTGGGATTTATTCTACTGCTTTCTACTCTTTGGAAAAGGGAAAACGGAAACTTCGTAATGTCTTGATTAATGAACACACACACTTCCTTGTTCATGACTATTTTAAAGAATCCGTAATGAATGGATTATCCATTTGCTGGACTAGTTCCTTGTGTTTAAAGAAAGAAATTATAGAGCGGATACCAATGTTTCGAGTAGGTATTAAGCGTGGTGAGGATTTGGACCTTTGGCTTAGAATAGCGTTGAATTATGATGTAGCCTATTTGAACCTTCCGAAAGTCTTTTATAAAACAGGGTTATCTGATAGTCTTACATCAGATTATTCTAAAAATGGAGAATTTCCTTATTATGAGTGGCTTAACTTTACATCGGAAAGTTCTTATTATAGAAAGTATGTGATTTTAGCTATGTATATACATGCAAAAAATGCATTTATACATCATGATTACGATACATGTTTGGCCGAGTTGTGGAGTGTTAAAACTATTGCTTGGAGATTCAAATGGGCAAAGCGATTATTATTACTAGTAATTTCATGGATAAAAAGTAAATAATGAATATCATTTATTTTGTTCCCAATATTACCATTGCAGGAGGTATATTCCGTATAGTGTCAGATAAGATGAATTATTTGGCGGAAAATATGGAAGGACGGCTTTTTTTGGCATATTATGGTAATGGACAGGAAAAACCTATATATCCTTTGCACCCGAACATTCAACTGTTACCTATTGATATAGAGTGGAAGGTAGGCTTTGGAAAGAAAATTAATAGAGTATGGAAAAATATAAGTATTATACGTCACATATTTAAAAAATATAAGATAGATATTGCTGTCAATGCCAATGCTCCTTTATTGATATGGATATTACCATTTATTTGTCGGCGGATAAAGAAGATCCATGAGTTTCATTTTTCTTATAAAGGACAACAGATATTAGATGAGGAAATTTTTAAATCGCGAGGAAAGAAATTTTTAGTTCAATATTTAAGAAAGTGTTGTTTGACAAAATTCGATAAGGTAATTGCATTAACAGAGTCTGACAAGAAAATGTGGAATTTACCGAATATATTTGTAATACCTAATTTTAGCAATATACAACTACATGAAAGAAATGGTAGAAAAAGCAAAGTTGCTATATCAGCAGGTAGATTAGAATCTGTAAAAGGTTATAATCGATTGATTGCAGCTTGGGTGATAGTTGCACAAAAGTGTCCGGACTGGCAATTGGAAATTTGGGGTGAAGGAAGTCTGCGAGAAAGTCTTCAACGACAAATTGATGCTTTGCATCTTTCATCAGTTGTTCATCTGAAAGGTGTTTCACCTAGTATAGGTGAAGTATATTCTCATTCTTCTTTTTTTGTCATGTCATCTTTGTATGAAGGTTTCCCGTTGGTATTGGTGGAGGCGATGAATTGTGGCTTGCCTTGTGTTAGTTTTGACATTACTGGAGCAAATAGTATCATAGACAATGGTAAGAATGGATTTTTAGTGCCGGATAATGATGTAAATGCCTTGGCGGAAGCTTGTATAAAATTGATAGGAGATAGAACGTTGTTGGAAAATATGAGTAAACAAGCTTACATTTCTAGTGCTCGTTTTTCTAAATTAAAGGTGATGCAGAAATGGTTGGATTTATTTCATGAGTTGTCGGAGAAATAAATGTTATAGTTGAGATTAGATATAGCTTTTATTTTTGTAAAAATGAATTATTAAAGAATGAATTATGCTTTTATAGACTCCATGGGGTCATTGACATACAATAATGGTATTAAAATTCAAGCCATAATGTGGAAGAATGGACTTGAAAAATTGGGGCACAACGTTAAATTGGTTAATTTGTGGGAGAATATTGACTTCTCAAGTTATGATGCTGTAATCATCTTTGCTATGGGGGCAAATATTTATAAACTGATAAAAGGTCTTTCTAGGATTAATGAAAATATCATAGTAGCTCCTATTATAGATCCTAACAGAAGTGATAGGTTTTATAAATTCTTGTTTAAGTTTTATGGTAGTACAAGATTAGCGTTATCCAATCATTACCATGATATGTGGTCTGTAAAGGAAAAGGTAAAGCTATGGCTTGTTCGTTCAGAACAAGAAAGACATTATGTGAGTTATTGCCTTGATATACCGAACGATAAAATAGCTAAAGTACCATTGAATTATAGAATTCCTGAGATTGGGCAATTAGGTGAAAAAGAGGATTTCTGTTTACATGTGTCTAGATTAGATGCACCTAATAAAAATGTTCCAAGACTTATTGAAGCGGCAAAAAAATATGGTTTTGATTTGAAATTGGCAGGTCATATATCTGGAGAAAAAGAAGAGAAAAAAATCATTTCACTTATTGGTAGTACAAAGAATATAGAGTATTTGGGAGAAGTGAATGAAGAGGAATTGGTATCACTTTATAAAAGAGCTAAAGTATTTGCTCTTCCTTCTTTAAGAGAAGGAGTAGGTATGGCAGCACTTGAGGCGGCTGCTTATGGATGTGAGATAGTTCTTACTTGTGTAGGTGCGCCTAAAGAATATTATGAGGGTCGTGCTTTGTTAGTTAATCCGCAAAGTATAAATGAAATAGGTGCTGCCATAATAAAAGCTATTAATAAAGGGTATTCACAGCCTGAGTTAAAATCTTATATAGAAAAGCGTTATAGTGAGAACGCTTGTTTGAAATTATTAAATGAAAATCTGTTGAAGTTGAATGAGTGTTCACTGATATGAAAACTGTTAAGTGTTTATATAACATTCTTATATCGTTTTAATATTGAATTTATAGTTATGGAAAATAAACTGGTTTCAATAATTATACCTGTCTATAATGTTGAAAAATATATTAAAAAATGCCTTGATACTGTTTTATGTCAATCACATCATAATCTTGAAATAATTTTGGTGGATGATGGTTCTTCGGATGCTTCAGGAATTATTTGTGATGAGTATGCCCAGAAAGATAAACGTGTAAAGGTGATTCATAAAGAGAATGAAGGTGTAAGTGTAGCACGAAATACAGGCATTGATATAGCTACAGGTGAATACATTTGCTTTTCTGATGCTGACGATTATTTGCAATTGGATTATGTTGAATACCTTTTGAAGATCGCAAATGAAAATGATGCTGATATAGCAGTAACAACTGATTGGTTTATTACCTTTTTAGGAAAAGAATTTCCGGAGGATTTAAGAGATATACCACTAGTTCTGAGTCCTGAGCATGCAGCAGCAGCCATTTTATATTATCATATTCCAATAGGATGTAACAGTAAAATCTTTAAGCGAAGTTTGTTAAATAATAAGATTAGATTCTATCCTCAGCTATCCGTAGGTGAAGGCTTTAATTTTAATGTAAAAGCTTTTTTATCTGCTAATAAAGTTGCTGTAACCAAAAGGCGAATATATTATTATCGTCGTAATAATCCTGCGAGTTGTATGACAAAGTTTAAATTAGAAAAATGTAATACAGCATTGTATGCCATTCAAATGATTAAGGATAATCTTATGATTAAATCAAATAGTTTAAACAGAGCATGTGATTTTGCGGATTGGCATACACATGGTGATATGTATAATTGGATGGTATTAGCAAAAGTGAAAGATATGTATCCAGATATGTATATGAGATGCTTCAATAAAGTACGTTCTTATTCATTTAAAGCGATATTTGCTCCAATAAGTAAAAAAGAAAAATTTAGAGCGATACTTCAGTTTATACATCCTAGATTGTTGGCATTTGTATTAGAGCTTAGAAGAGCTTATTATCAGCGGTAACTATATATTTATCTGAAATAAAGCGATGAAAGTAGCAATTCTCTCCATGCAGCAAATTAAGAACTACGGTTCTTTTCTTCAAGCCTTTTCTTTAAAAAAGAATATTGAGTCATTAGGTCATACCTGTGAATTTATAAATATTATTCCCGGAGAACAATTAGAAGGATATAAGACCAGTTGTTTTTACAATATTAAATTACTTTTCCAGCGCTTATGGGGATGGGATTTTTACAAACGTTTCCAAACGATATTTGTTTTTCAGAATCGTTTTCGAAAAGAGTTCCTACCTTATTTGGGAGTAAAAAAAGGAATAAATACAAAACATTATGATGTAGTGGTTATTGGAAGTGATGAAGTATTTAATTGTACTCAAAAGACTTGGTTTGGTTTTTCGTCTCAATTATTTGGGAAAGGACTGAATGCAAGTCGGATCATTACGTATGCAGCATCCTTTGGTGCGACTACTATAGACAAATTACAACTTGTCGGCAAAAAGGAAATTGTCTCAAGTTTATTGCATGATTTAGATGCCATTTCTGTGCGTGACGAGAATAGCATGAAAGTTATTGAAGAACTAACCGGTAGAACTCCTTGGCTTCATGTTGATCCAGTGTTGATGTTTGATTATAATCAATTTATACCTGATAAATTCAATAGGAATGAATATATAATTGTTTATACATATCCGGGAAGAATTACTGATAAAAAGGAAATAAGTTCTATCAGAAATTTTGCTAAATCAAAAGAACTGAAGTTGATATCAATAGGACATTATTTTTCATGGTGTGATGAAGTTGTTATTCCTACTCCGTTTGAAGTACTAGCTTATTTTAAAGGAGCATCTTATATTATTACTGATACATTTCATGGTAGTGTATTCTCTATAAAATTTAATAAAGAATTTTGTACTATAGTACGAGATATGAATAGTAATAAATTGGTGTCTTTATTAAAACAGTTTGGACTAGAGAATCGTATAGTTACCGATATGAATAAAATGCAAAAGATTTTGGAAACTCCCATTGATTATGCAGGTGTAAATAAAATAATAATGGAGGAAACAAAACGGTCTATTACTTATTTGACACAAAATATCAGATAATGAACTCTTTGTTAGTTACCTACATCTTGCAAACAGAGAAATGGACGTCTCCCGAACTTTTCGAAACGAAAAACAGAATTTACACCTTTCTCAACCCCGTATCTTACCTCATAGCTTTAGAAAACAAATCTTTATTTGAACAATTCGATGGTATTTTTGCCGATGGCTCTTTACTGGTTTCTGCCATAAGATTAGTATACGGAAAGCGAGTGACCAGACGTAGTTTTGATATGACCTCCCTTGCTCCCGAATTGTTGAATTACCTCATGGAAAGCCGTAAAACGCTCTATATCGTAGCCTCGGGTCAAGAACAAGTGGAGTGTTCTGTGAGGATATTTCAAGAGCAGTATCCGGAGTTACGCATTGCAGGGTTCAGGAACGGATATTTCTCTTCGAATGCTGAAATGAATAAGGAAGCTTCTCATATAGTCGAATTAAATCCGGATTTTCTGATAGTAGGTATGGGGGCCTTGATGCAGGAAAAGTTTCTGCTTAAGGTAAAAAAGATGGGTTACCAAGGCATCGGCTTTACGTGTGGTGGCTTTGTTCACCAAACGGCAATGAATAGGATGCACTATTATCCGAACTGGGTTGATAAGATGAATTTGCGTTTCGTTTATAGAATGTATAAGGAGAAACATACACGTACAAGATATCTGCAAGCTGCATTCTTGTTTCCGGTACGCTTTATCGGGGAAAGAATTTTCGGATGAAAGAGCAGACTTTGCTTATTGCCATGTAAATAATGGAGAATGGCAACAACAGGACGAAGATAGCAGTAGGAATAATTTTTCCGAGTATTTTTAATACATCTGTTTTCATGCTGCAAATATAAATTAAAATTCATAGAATAGTGAAATTAAGAACTCTAATATTTCTGGGGGGACTCTTTGGAGGAATGAATGTTATCAAAGGGCAGGCTTTTTATGGTACTACAGGACTACTTCATGCACCTACTGCCGTTATGCAGAAAGATAAAACCGTTATGCTGGGAGGAAATATGCTTGATGTAAATATACTTTCCCGATATTGGGTGAGAAGTGAATATCATCCCTATACCTATAACTATTATATAAATTGTACTTTGTTTCCTTGGCTGGAAGTAGCATATACTTGTACGTTGGTAAAGGGGATACATGGTTCTTCTTATTGGCCTCAGCAAACTTGGGGCAGGTTTACCAATCAGGACAGATCGTTTCATTTCCGGTTGAGGGCATGGAAAGAAGGCTGGTGGAAGGCTTGGACGCCTCAGGTAGTGATTGGTGCGAATGATCCGGGAAGCCATTCTTCAAATGGAGGTGGAGACATAGATTGGGGGGGCGGTGGTAGCGGAAACCACAATTACTTGACACGTTATTATCTGGCGGCAACGAAACATGTTGAATTTAGTGGAATAGGTACAGTGGGGGTTCATGTAGCTTGGGTTATCGGTAAAGCTATGAGTGATGTGCATTATAGCCGTCCTGCTGCAGGGGTAAACTTTCATTTTGGCATGAAGGGCGAAGGTTTCTGGCAGAAAGCTCTGAATGGATTTAATTTGATGGCGGAAGTGTGTCCCGGACATGCGGAAGATCTGCATACAGCAACTTATACTGTGAATGTGGGTGGAACTTATTCAATATGGAAAGACCATATCAATCTTATTGCTGAACTGAATGACGGTAAATTTTTTAGCGGAGGTATATTCTTTAAATTACATCTTAAATAACTGACTTGATTTCTAGTGGATGAAGCATTATAAGAATAGCCGGAATCACTTGGAAAAAGTAGTTCCGGCTATTCTTATATAACGATATTTGTATGTCCTGTTTTTATTATCCTACAATGTATTTAGCATTCTTGCCGCTTATTTTACGAATGAATACCACAAGCAGCCACGATACGCCAAATGCCACGATTGCGGCAGCGGGAATTTGTAAAGGCACGGGAACACCGATATTGCGCATCAGGACTACACCCGGACCTGTGAAAAAATAATGTATCATATAGATGCCAAAACCACATACGGTCAAGTTTGCCAGCATACGCTGGATCAGTTCTGAGCGTACTTTTACTTTCTTGGCTATGAGGAAGAAGGGGATAGTCATCATAATCACATTCAAAGAATTATAGGTGAAGAACAGTTCCAACATTTCATCAGTGCATTGTGGTAATGCGGTCATGTGGCGGAAACCGAAGAATGTGACCATGTAACCTGCCAGAAACATGGGGATACAGACTGACAGTGTCTTACTCCATGACCAGTCGTGGTTGCGCAGATAGTGTCCCAATAGCAAGTACCCGTTGAATCCGGCAAAATAATAGAGCATATTGAACGAGTTCCACGCACAGCCTCCCCAAATGTATGGGGATACGAACTGATAATAATAAGGTAACAAGCCGGTTATTCCCCATGCGACAAGAAACCATAGTTTGGCTTTCTCCGAGGCCTTTTCTACCCATGCGGAGAAAATAGGAAGGTATAGATATAATCCGATAAGCAGATAGATATACCACATATGTACGTCTAATAAAGAGAAATTGAGAGGCATTTCTGCTATATAACCCAATGATACGTTGAGTGACTGACGGGCGGCTTCTTCACCGCTATAAGGGAAAAAGTCAAGGATTATTTCAGGATTAAGTCCCAACAATCCTGTAAACCAGGGAAATAAATTATAAATGACACTCCAGATAAGGAACGGCCAGAATACACGGCTGATTCTTTTTTTGTAGAAAGCGGATATTTCGCCACGTACGGGGAGCAGCAATGCTCCTGTAATCATGACAAACAGTGGGACACACGGACGGAGAAAGGCTCCGTAGGCGGCTCCCCAGAATTTGATTTCATCAATGTTGGCTGGCGGTTCGCCGGGGTAAAAGTTGAACGGGTCTGCACTATGGCAGCAAACTACGGTGAACATGGCTACCAAACGAACGACATCCAGCCATACCACATGTTTTCTTGTGGTGTTCATAGCAATAGGTTGATTCATAATTTAATTTTTGATTTGACAGGCACAAAGGTACGCTTTCTCTGAGGATTATTTCTTGCTTTGGAACAAAAAATAAAAAGAACTATCCGCAATCATACTACAAAATGAGAATCCTCCCACTGCAAGAGTCACAATCATTTGTCTTTTGCAGGGGGAGGAATTCTATTTAGGGTTAGAGGGTTATTTTAGTCCTCTGTTCTTCAACAAGGGATCAATACCGGGTTCTTTGCCACGGAAATTGACATACATGTCCATCGCATCGTCAATGCAGCCGGGAGTGAGGATATATTTGCGGAACTTTCCTGCTACTTCTTGATTAAAGATATCGCCTGTTTCCTTGTATGCCTCGTATGCGTCACAATCCAGCACTTCTGCCCAAATATAACTATAGTAACCGGCAGTATATCCACCACCCATGGTGTGGTTGAAATAAGTGGTGCGATAGCGTGACGGAATTTGTTTCAGCAATCCGCGCTTACCTAATGTTTCTGCTTCGAACTGCATAACGTCCATATCATCGGGAATTTCTTTTAATACATGATAATCCATATCCAGCAGGGAAGCAGCTAAATATTCAGCCGTAGCGAAGCCTTGTCCGTATTTGCCACTCTTGTCCATTTTTTCAACCAGTCCGGCAGGAATAACTTCACCTGTCTGATAATGTTTGGCATATACATTCAATACCTCCGGTTCAAAAGCCCAATGCTCATCTATCTGTGACGGCAGTTCTACAAAATCACGCGGAACACTAGCCACACCATAATAATGTACATCTTTGAAAAGATTATGTAAAGCGTGTCCGAATTCGTGGAACAAGGTATTCGCTTCGTCTGCACTGAGCAAAGCGGGTTGTCCGGCTGCCGGTTTGGTAAAGTTGCATACCACGGTTACTACAGGAGCCACTTTCTTGCCGTCCTTGTAGGTCTGTGAACGATAGCTTCCGCACCATGCGCCACCTTTCTTGCTGGCACGCGGAAAGAAGTCCATATAGAGTACACCTAGATGAGAACCGTCTTTGTCCTTACATTCGAATGCCTGTGCATCGGGATGAGGGAGGGGAAGATTGTCCAGCTGGGTGAAAGTGATACCGTACAGTTTGTTGGCAACATAGAACACACCGTCACGTACGTTTTCCAGTTTCAGATAAGGACGTACCTGATTTTCATCCAAGTCGAATTTGGCTTTCTTGGCTCGGTCTGCATAATAACGCCAGTCCCAACCTTCGGCTGTGAAGGTTTTTCCGTCTTTCTTTATTTCGGCATTGATATCTGCCAGTTCTTCTTTGGCTTTGGACAGAGTAGGTGTCCAAATCTGATCTAATAATTTATATACGGCATCCGGTGTTTTTGCCATACGTTCTTCCAGTGCAAAGGAAGCATAGTTTTCATATCCCATCAGCTTGGCTTTTTCCAGACGGGCCTTGAGCAGTTTGCGCACCACTTCCTTGTTGTCGTATTCATTTCCGTTATTTCCTCTGTTAATGTATCCTTTGAATATCTTTTCACGCAGGTCTCGGTTATCGGCATATTGCAGAAAAGGCATGACGCTGGGATTATGCAGGGTGAAAATCCATTTGCCTTCCATGCCTGCTTCTTTAGCTGTTTCGGCAGCACTGGCAATCAGGTTTTGGGGTAATCCGGCTAAGTCTTCTTCTTTGTCCACGATCAATTGGAAAGCATTGGTTTCCTTTTGCATATTTTGGCCGAAGGTGAGCTGTAACATGGAAATCTCACTATTTAATTCCCGCAGTTTTTTTTGACTGTCGGCATCCAGGTTGGCACCTCCACGTACAAAATCTTTATAGGTTTCTTCCAACAACTTCTTTTGCTCTTTGTCCAAATTGCCGGGATTTTCATAGACAGCTTTGATGCGTGCAAAAAGTTTTTCATTTAAGGCGATGTCATCGCTGTGTTTTGATTGTAGTGGAGAGAGTTCACGGCTCAGTGCGTCCATTTCATCATTGGTATTGGCGCTTTTCAGTCCGTAGAATACGGTGCTGACTTTACGAAGTAACGCTCCGCTTTGATCCAGTGCTGCAATGGTATTCTGGAAAGTGGCGGGTTCGGGGTTGTTCACTATAGCGTCAATCTCTTTCTGTTGTTCTTCCATGCCCTGCAAAAAGGCGGGTTTGTAATGTTCCATCTTGATTTGGTCGAAAGGAGGAACCTGGAAGGGAGTTGTGTACTCTGAAAGGAATGGATTTCCGGTAGGGGCTGCCTGTTTCCCTTGCGAACCGCAACCGCTTAATATGGCAACCGCACAGGCTGCCAAAAGAATGTGTTTCATAATAATTTGTTTTATATAGGTGATAATGCAAAGATAGTAAAATAAGGCAAATTCCAATAAGTTTGAAGCGTAGAACTATTTGTTTGTTTTCATAGTCTGTCTAATTATCATACATCATTGTCTAATAATTAGACAGACTGTTTCTTTTTGTTATTTTGCAATGTGTTGTTAATCAGTTATTTGTTTGCGTGGCACGGTTTTGGCATTTATATTATAGAGAACGAATTATAGAAACTTTAGTTTCTTATACACTTTATGGGAAGTAACGTTGTGAAACGGGACTTCTCTCTTTTAAAATAATAATAATTAAAAACACGAAATATGAAACAGATTTATTATGTCATTCAGGCTTTGATACATGGACGTGGAGCTAATATTATAAAAGTCGTTTCGTTAGGCTTGGGCCTGACTATGAGTATTCTGCTTTTCTCAAGAGTTGTGTATGAGCAGAGTTTTGATACTTGTTTTAAGGATCATGATAAATTATACCAATTGTGGAATATATGGACAGTAAATGGAGAGCCTTTTCCTCCGAGTGAGTTCATTATCGGTGCAGCTGCCGGAGGCATACTGGATGCAATGCCGGAAATTGTGGAATCGGCGGCAAGTACAGGCTCGTGGCCGGTTTCGGCACCTATTTATAACGGGAGTGTCCGCTTTGATGATTTTAAAGTTGCTGCTGATTCATTGTTTTTTCAGACAATGGGTATTGAGGTCTTGAGTGGTGATCCGGTACGTGAATTGCAGCAAAAAGATGTAATTTTCCTGAGTAAAGATTTGGCGGATAAAATGTTTGGCGGTGAGAACCCGATTGGTAAAATAATTAGTTTCAATAAGGAAATTGAGTTGACAGTCAAAGGAACGTATGCTGCTTTACCTGAGAATTGTACTATGCGACCGAAGGCTGTCATTTCCTTGCCTTCTATCTGGAGCCGCCGGATAGGAAATTATTCATGGAACGGTGGGGACAGCTGGAAGGAATATATCCGGTTGAAGCAGGATATTGATTTGGATGAATTGAACAAGCGTATTGATATGGTAGTACAGCAACATATTCCCAGAAGTGATAAGTTGGGAATCACTGTCATGGCAAAACCTGTTCGGGATACGTACCGTGGATATGATGAAGTGAAAAGAATGCGTAATATTATGCTTATTTTAGGTATATCTATTTTGTTTATCACGACATTGAATTATGTGTTGATTTCTATTTCTTCATTGAGCCGGCGTGCCAAAAGTATTGGTGTTCATAAATGCAGCGGAGCGGGAACGGGTACTGTTTTTGGTATGTTTATGTGGGAAACCGGTATTATTATTTTGCTTTCATTGTTTTTGATGGTTTTTTTAATGTTTAATTTCCGGGAATTTGTGGAAGATACTACAGCTGCTAAACTGGAGTCTCTTTTTGCGGTGGAACGTATTTGGGTGCCTTTTGGAGTGACTGCTGTCTTATTCCTCATTGGTGGTGTGCTGCCGGGACGAATTTTTTCAAAGATACCGGTCACACAGGTTTTCCGGCGTTATACAGAGGGTAAGAAAGGATGGAAACGTCCGTTATTGTTTATCCAATTTGCAGGAGTGGCTTTTATATGTGGATTGATGTGTGTAGTGATGCTCCAGTATCATTATGTTATTAATAAAGATCCGGGTTATAATCCTGAGCGGGTAGTGATTGGCGTTAATAATGCACCGGATGCTAAAGCCCGGCTTGCTGCCCGTCACTTTTATGAGGGGCTTCCATACGTGGAAGCATTGACTTCGGCAACTAGCTATCCGTCTAATGGATATAGCGGGCAGATGATACCGGATGAGAAAGGAACGAGTCTTTTTTCCAGCCGTTATGATTTTACTCAGGAGAATTATGTAGCTTTTATGGGAATGGTCATACAGCAAGGGCGTGTGCCTCGTGAATCCGGTGAGGTGGCAGTGAATGAGGAATTTGTACGGAGAATGCATTGGGGAAAAGATGTTTTAGGTAAGAGTATACAGACAGAAGAAGGCCGTGTCAAAATAGTAGGTGTGATTAAAGATTTTAATATTGGCGGATTCTACTCAGAGCTGAAACCGTTTGTCCTGCATCATCATCCCAAAGATTTGGCTGATCTTGTTTATCTCCGTCTGAAGGAACCCTTTGGTGAGAATCTTCAGAAACTGAAGCGTGATGCTGCCGAAGCTTTTCCTAATCAAACAGTCGGCTTTGAAAGCTTGGAACAAAAAATGGCGGATAGTTATAATTCCGTTCGTGTATTTCGTAATGCGACTTTATTGGCGGCAATTGCCATCTTGTTTATTACTTTGATGGGACTGATAGGCTATATTAATGATGAATTACAGCGACGTTCGAAAGAAATTGCTATTCGTAAAGTGAATGGAGCTGAGTCATTTGCGATATTGGAAATGTTGGTGCAAGATGTTCTTTGGATTTCTTTCCCGGCTGTAGTCGCAGGTACACTAGGGGCATGGTATGTAGGTGGTTTATGGATGGAACAGTTTGCTGTGACTGTAGGTTCGCTTGTTCCTTATTATGTATGTGTAGCTATCGTGGTCTTGATATTGATTGTCAGTTGTGTCATTTCCAAAACATGGCGCATTGCCAATGAGAATCCAGTAAAGAGTATTAAATCAGAATAAAGATCAATAGAACAAACAATTAAAATTTATTATTATGATACAGATTGAAAATTTAAGTAAGGTATTTCGTACTTCAGAAGTAGAAACAATTGCGTTGAATCATGTAAACATAGAGGTGAAAGAAGGAGAGTTTGTAGCTATTATGGGACCATCAGGTTGTGGAAAATCCACCTTATTGAATATCCTTGGTCTGCTGGACAATCCTACTGAAGGCAGTTACAAGTTGCTGGGCAACGAAGTAGCCAATCTGAAAGAAAAAGAACGCACCCGCCTGCGTAAAGGTGTAATAGGTTTTGTATTTCAAAGCTTTAATCTGATAGACGAACTGAATGTTTTTGAAAATGTGGAATTACCGTTGACTTATCTTGGAATCAAGGCCAGTGAACGTAAGGAGCGTGTTCTGGAAATTTTAAAGCGAATGAACTTGAGTCATCGTGCCAAGCATTTCCCGCAGCAGCTTTCCGGCGGTCAACAACAGCGTGTGGCTATTGCCCGTGCTGTGGTTACTAACCCTAAATTGATCCTCGCCGATGAGCCTACCGGTAATTTGGACTCAAAGAACGGTGCTGAAGTAATGAATCTATTGACCGAACTTAACAAGGAGGGGACTAGTATTGTCATGGTAACTCACTCTCAGCATGATGCCGGATTTGCTCATCGTGTCATTCATTTGTTTGACGGTAGTGTAGTGGCGAACATTAAAGAATAATAATGAAAGGGTGGTTGTAATGAAACAAATCTATTATGCAATACAATCGACATTACACGGCCGTGGAAGCAATGTGACAAAAATCATATCGCTTTCATTAGGCTTGACTATTGGTGTTCTGCTTTTTTCACAAATAGCATTTGAACTCAATTATGAAAAATGTTACCCTGATGCCGACCGATTGGTGTTGGTACGTGGAGGAGGAGAGAATGTAAAGACTGGAGAGAAAGGAGAAGGATATGATGATAGCTTGTTTGCTCCGATGGCAGAAGCATTCCGCAGTGATTTGTCACAATGGATTGAAAATGCTACAGTCATTTTCAATTTTGAGACATTAAATGTATTCAAGGATGGTCATAAGCTGAAAGACGTAAACTACGCTTATGTAGATACTTGCTATTTCCGTACTTTCGGCATCAAAGTATTGAAAGGGAATCCTGAGGAATTGCAAAGGGCAGGGAGCATATTCGTTTCTGAAACTTTTGTTCGTGATGTCTTTGGTGGACAGGACCCTGTGGGACAAAAATTATCTTTGGATAAACAGCATGAACTGACAGTCCGTGGTGTTTATCAGGATACTCCGGAAAATACGGCCTATCATTTTGATTTTGTAGCTCCCATTTATGCCGGAGGTGGTTATATAGGGGGTGGAACATGGGGGCGTAATGACATTTATTATACTATTCTCCGTTTGCGCGATGGGGTGGACAGGGAAGAAATCAATCGTCAGATATACAAGGCTATGCAAAAATATTATCCTGATTCTGCTGATGATGAATGGAGAAATTTTTATGATGCACAGCCATTGCCCGAAATTCATCTGGATGACTCAAATACCCGCACTCGCCTTTACATTTATGGTTTTTTGGGCTTTGCCATATTTTTTGTGGCAATCATGAACTATGTATTAGTAGCTATTGCTACCATGAGCCGCAGGGCAAAAAGTATAGGTGTGCATAAGTGTAGTGGTGCCAGTGCAATCAATATTTTCAGTATGTTCCTTTTTGAGACGGGGATTGTGGTATTGATGTCCGTTATAGTGGCCCTCTTTATTATTTTCAATACCAAAGATCTGATAGAAGATTTACTTTCGGTGCAGTTATCCTCACTCTTTACATTGGAAACTTTATGGGTTCCGATGCTCATTGTTTTTGTACTGTTCATAGTAGCCGGAGTGCTGCCGGGAAGATTGTTTTCACGCATTCCTGTCACTCAAGTTTTTCGTCGTTACACAGATGGGAAAAAGGGGTGGAAGCGTTCTCTTTTATTTGTACAATTTATGGGGGTATCTTTTGTGATGGGGATTTTGTTAGTATCTTTGATGCAGTATCATCATCTGATTAATAGTGATATGGGTATCCGTACTCCAGGATTGGTGGAGGCGGAAACATGGATGTCACCCGAAGAAGCAGAGAATATGGTTAACGATCTTCGTCGTCAACCCATGGTGGAAAATGCCACCCGTTCTATGCATGGTGTTCTGGGAGAATATTGGACTCGTGGGCTGATTGATAATAGTGGCAAGCGAATAGAAACCTTGATGTATAATCCTTGTGATAAGAATTATGCTGAAACCATGGGGATTACTATTATTGAAGGGAAAGATATGCAGAATGAGGGAGATGTATTGGTCAACGAAGAAGTAGTGCGGCTGATGAAATGGACAGATGGGGCAGTAGGCAAACGGTTGAATGATTTTGATAAAGCCGGAACCATTGTTGGTGTATTCCGTAATGTGCGCAATACGTCTTTTCTTTATAAGCAGTTTCCCGTGGCTTTGGTTTATAGTCATAATACCAGTCACACTTTTGATGTACGTTTAAAACAGCCATACGATGAGAGTTTGAAAAAGCTGAACGAATACATGGAGCAGGTGCATTCCACTAAGGCATTGGAATTTATTCCGATAGATACCATGCTGAAGGAAATCTATCGCAATGTATATCGTTTTCGTAATTCTGTATGGATTACTTCTACTTTTATTTTGCTTATTGTTGTTATGGGGTTGATAGGTTACGTGAATGATGAAACACAACGTCGCAGTAAGGAAATAGCTATCCGTAAGGTGAATGGTGCTGAAGCTTCGACTATTCTTCGGTTGCTGTCCCGTGATATATTATATGTTGCTGTCCCATCGGTCTTGATAGGAATTGTAGTATCGTACTTTACAGGAAAGGCTTGGTTGGATCAATTTGCCGAAACGATAGATATGAATGCGTTGTATTTTGTCGGAACAGCATTGGTGATTATCGCTCTGATAGTGGTGTGTGTCGTTGTTAGAGCTTGGCGTATTGCTAATGAGAATCCGGTAAAGAGTATTAAAATTGAGTAACTAAATAGATAGCGATGAAGCAATTATATTATGCAATACAAACGATATTACACGGTCGTGGAAGTAATGTGACGAAAGTCATATCGCTTTCATTGGGGCTGACTGTTGGCATCCTGTTGTTCTCGCAGATTGCATTTGAGTTAAGTTATGAAAAATGTTATCCTGAAGCTGGAAATCTAGGAATAGTCCGCGCATATTACCATAATTTGGAAACAGGTGAGTCGATGGGGGACGATGGGGATATCTATGATTATTCCGTTTTTGCTCCGATAGCGGCGGCTTTGGCTGAGAATATGCCAATAGAAGTGGAAAAAGCTACTTGTATAAATTCTTACACAGCCAACGGTAATTATTATTATGAAAATCAATTGTTGTCGGATGACGAACAGTGTCTTATGGTGGATACTTGTTTTTTCCAAACCTTTGGCATTCCGGTTTTAAAGGGAAATCCTAACGATTTGATTAACTCCAATACTTTGTTTGTGTCCGAGAGTTTTGCACGCCGTTTTTTGGGTGATGCAGATCCTATAGGTAAAGTGTTAATGTTGGAGCGGAAAACTGAAATGATTGTCCGTGGTGTTTATCGTGATATTCCGGAGAATACCATGTTTAAGGCAGACTTTGTAGTAAGTGTACATAAAGAAGGAGGTTATAAGGATGGAGCCGGATGGGGAGGAAATGATATTTTTTATGCAGTGTTCCGGACCGGAGAAGCTTCGGATATAGAGGTGGTAAACGATAACATCCAGCGGATGGTGGAGAAATACATGCCGACTGAACATAACGGTTGGAAACTGGAACTCAGTGTCATTCCATTGGCAACAAAGCATCAGACTTCTTCAGAAACAACAAAGCGTCTAGTTATCTATGGCTTTTTGGGTTTCTCTATCTTCTTTGTCGCTATTATGAACTATATGCTTATTGTAATTGCTACTTTGGGACGTCGTGCCAAGAGTGTCGGTGTCCATAAGTGCAATGGTGCCAATAATGGGAATATCTTCAGTATGTTTATGATGGAAACGGGCATTATTGTTCTGGTATCCATTCTGGTCTGTGCATTTATCATTTTCAATGCGCGGGATTTGATAGAAGATTTGCTGTCGGTGCGACTTGCATCACTCTTTACATGGGGAACTTTGTGGGTACCGCTGCTGGTTGTTGTACTTTTATTTTTGTTGGCAGGTGTATTGCCCGGACGTATTTTTTCACATATTCCTGTCACACAGGTATTCCGTCGTTATACTGATGGTAAAAAAGGATGGAAACGGTCACTGTTGTTTATACAGTTTACCGGTGTGTCTTTTGTCTTGGGATTATTACTGGTTACGTTAATGCAGTATAGTCATTTGATGAACAGGGATATGGGGATCAATACTGCCGGACTGGTAGAAGCCGAAAGTTGGCTGGACATTGAAATTGTTCCTCATATGCGCGATGAGATCCGGCGTCAGCCTATGGTAGAGAGCGTGGCTACAGCCAGCCATAGTGTATTGGGCCAATATTGGACAAAAGGATTGATGGGCAGTGACGGCAAGCGTATAGGTGTTCTTAATATGAACTATGTGGATTTTAATTATCCGGATGTGGTAGGCATTACGATTATAGAAGGAAAGCCGATGACCCATGCCAAAGATCTGCTGGTGAATGAAGAGATAGTCCGGCTTAAGAAATGGACGGATGGAGCCGTAGGAAAACCTTTTGATGAGATTAAGGAAGGAACTATAGTAGGTGTGTTCCGTGATGTGCGCAATCAAAGTTTCTATCAGGCGCAGCAACCCATTGTGCTGATAGGTGGTAATGCGTTTAATCATACCTTCAATGTACGGTTGAAAGAACCCTACGATGAAAATTTGAAGCGCCTGAATGAATTCATGGACAAAACTTTTCCACAAGTGGCTTTAACATTCCATACAGTGGATG +>3_1#NODE_8_length_39997_cov_62.8364_ID_15 +GGGACAAAAACGCCGTGTTTATCGCAGCTTGGTTAGCTGAATTCCAAAACTGCACGGACTATTGTCTTATGTCAAATGCTCTAATTACGCCAGAGCGAATTGCTCTTGCGCCGCGAGCAGGCCGCTGAGCTGCTCGGCGGTGAGCGGGTAGGAGAGCGCGTAGCCCTGCCCGAGCGGGCAGCCGAGTTGGCGGGCGGCCTCGACGACCACTCAACCGCTCTTATCTCGCATCCTGCCCCGCAGGATGCACCGCACTACCCTTCGCCCTTCACGCCTCGTCTTGCATTCTGTTGATTTTGAGCAGCATCACAAATGTAGACACCCCCGTGCCCATAGCACTGTGGAACCACCCCACCCCATGCCGAACTGGGTCGTGAAACACAGCAGCGCCAATGATACTCGGACCGCAGGGTCCCGGAAAAGTCGGTCAGCGCGGGGGTTTTTTCGTTGCGGGAGTAGCTCAGCTGGTAGAGCACTACCTTGCCAAGGTAGATGTCGCGAGTTCGAATCTCGTCTCCCGCTCCATCATCCCCCCTTCGCCTCTTCCGAGGTGGAGGGGCTTTTTTGTTGGCTTGCCGCATTTCTATGAACCAGTGCGGCACAATAACAGCTGTGTTTGAATTTGAAATCAAGCAGCGTGATGGACGTGCCCGCACGGCGACGTTCCAGACGCCGCGCGGAGCGGTCACCACACCTATGTTTATGCCCGTCGGCACCCAGGGCACCGTAAAGGGCATCAGCCCACAGGAACTGCTGGAAATCGGTTCGCAGATGATCCTGGCCAATACCTATCACCTGATGCTGCGGCCAGGAGAGCAGCTGGTCAAGGCGCATGGTGGTTTGCCGGGATTCACCGCCTACCCAGGCCCCTTTCTGACTGACTCAGGCGGTTTTCAGGTCATGAGCCTGGGGCACATGCGCAAAATCAGTGAGGAAGGGGTCGTGTTCAAGAACCACCTTGATGGCAGTCGCGTCGAGCTGACGCCGGAGCGCAGCATTCAGGTGCAGGAGGCCCTGGGAGCAGACGTCATCATGGCCTTTGACGAGTGCCCTCCCTATCCTGCCGAGCGGCCTTACATCGAGGCGAGCCTGGACCGCACGGTGCGCTGGCTCGAGCGCTGCCACGCGGTGAAGACTAAAGATGACCAGGCCCTCTTCGCCATCGTGCAGGGTGGCGTGCATGAGGATTTACGTCTCAAGAGTTTAGAGGCCACCCTGCCTTTCGCCACGCCTGGGTTCGCGGTCGGCGGCCTGGCGGTCGGGGAAAGCAAGGAAGAGATGTATCCGGCGGTGGCGTTTACAGCCGGCCGTCTTCCCGAAAATAAGCCGCGTTATTTGATGGGTGTGGGCCACCCCGAAGACCTTGTGGCAGGAGTGGCACTGGGAATCGACATGTTCGACTGTGTGTATCCGACACGGACCGGGCGCTTCGGCTACGCGCTGACGGACGACGGACGGCTCAACCTCAACTCCAGTGCGCCGCGCACCCAGCTCCAGCCCATCGACGCGGAATGTGACTGCTACGCCTGCCGTCATTACACCCGCGCTTACCTGGCGCATCTGCTGAGAGCGGAAGAAATGCTGGCTCCGCGCATGTTGTCGCTGCATAACCTGCGGTATCTGCACCGGTTGGTCGAGCGAATGCGAGTGGCGATCAATGGGCAGCAGTTCCATCCCTGGGCAGCAGACTGGAGCGAGCGTTATTTTCACGGCAATGTCCCAGGCTGGTTCACCAGTGCGTTTGAGCGCAGTACCCAGTCCGAAATTTAATAAGACGGACTCGCTGCGGCCTTTTCGGCGTCACTTAAACGGTCAAAGTTGAAAATGAGAAGAAAATCTGATGGTCAACTGACGGAGAAGGGGCGAAAACAGTTTATTTGACGGTATTTAGCTCGCTCTTAGGGTTGACCCGCCCATCTTCATGCTTGTATCATCTGCACGATCTGCCCTCTTCTGTAGCCACAGGCTGCGAAGGGTGGGTTGCGCGAAGAAGGCGGAGTAAGGCCCAACTTCTCCTGGGAGAGGAAACATGGCAACTCGCCCCATCAACATCCTGCGTGCTGGAAACGCTGCTCGACGCGCACTGAGATTGGGGGTTCCCTATGAAGAAAAGTCTGATCGCTCTTACCACGGCGCTGTCGTTCGGCCTCGCTGCCGCCCAGACCGCCGCACCGGTGAGTGCACCCCAGGTTCCCGCCCTGACCGACGTTCCTGCCGGCCACTGGGCGAAGGACGCCATTGACCGTCTAGTGAGCCGCGGCGTCATCCTGGGCTACCCCGACGGCACGTTCCGTGGCACCCAGAACCTGACCCGCTACGAAGCCGCCATCATCATCGCCCGTCTGCTCGACCAGATGCGCGACGGCGAAACCCCCGCTGGCATGACCGCCGAGGACATGACCGCGCTGCAAAACGCCATTCAGGAACTGGCCGCCGACCTGGCCGCCCTGGGCGTGCGCGTCAGCGACCTCGAAGCGAACGCCGTCAGCAAGGACGACTTTGCCCGCCTGGAAGCCCGCATCGAAGAAGTGGCGGCTGCTGGCGGCGAGCAGGGCGCGACCGAAGCCCTCCAGGGCCAGATCGACGACCTGACCGCCCGTGTGGACGAGTACGACGCGCTGCGTGCCGATGTCGACGACAACGCCAGCAGCATCGCTGCCCTCAACGACCTGACCGTGCTGCTCAACCAGGACATCCTGGACCTGCAAGACCGCGTCAGCGCCGTGGAAGCCGCGCAGGCCGACTTCGTCCAGCGCAGCGACTTCGACGCCCTCGGTGGCCGCGTGACCACCGTCGAAACTCGCGTCGAAACCGTCAACAACTCGCTGACGGGCCGCATCGCTGCCCTGGAGCGCAACGCGTTTAGCGTCAAGCCCAGCCTGACCATCGGCTACAGCGTGAGCCGCACCAGCCGCAACTTCGACGTTGACCGCCTGTTCCCCCTGAACGCGGACGGCACCGTGGCCAACAACGCCTTCACCAGCGGCGGCATCGACACCGACACCGGTGCTCAGCGCCGTGACTTCGGTGACTTCGGCAACGCCTCTGACCCCGTGGTCGCGGGTGCGGCGGGCCTGTACGGCTTCGCGGACGGCGTGAGCTACACGGTGTACTTCACCGACGGCTCCACGGCGACCTTCGACGGCCTGAACCCCGCCGATTACAAGGTCCCCACCGGGAAGGTCATCGATACCACCAAGGGTCGCAACGGCTTCGGCTTCAACAACCTGGCCCGCTACAAGGAAGGCAGCACCGACATCGGTATTAGCCTGGGCTTCGACACCAGCGGCCAGTTCAGCCAGGTCACCAGCGGCACCGGTGGCAGCCTGTTCAGCACCGCGGGCCGTCTCCAGGTCAACCAGATTGACCTGAACTTCGGTCTGGTCACCGGCCTGCCGAGCGACGCTTACGTCGACACCAACGGCAACGGCAAGAAGGACGACGGCGAGGCGACTGGCCGCGGCACGTACCTCGGCAGCGGCGGCACGGCGGCCATCCTCCGCGATCCCGCTGGGAACGTCTACCGCCCCGTGTTCTTCCGCTTCAAGAACGCCACCACCCAGTTCAGCGTGGGCAACAACCCCGTTATCGTGACCTTGGGCCAGCAGCAGAAGTTCTACTTCAGCGACTACGTCTTCGACAACAACTACGATGGCCGTGGCGACGGCTTCACCGTGACCGTGGACGGCAGCAACGTGCCCGTGATCGGCGCCTGGAAGCCCCAGATCAAGGGCGTGTACGGCAGCCGCAGCGGTCTCGACGGCACCGCCGAAGCCGGCTACGGCGTGTACTACCGCGGCGTGCGTGCGCAGATCACCCCCGTCGGCACCCTGACGGCGGGCATCCACTACGCGCAGGAAGGCCGCGACATGTTCGGCGCAGCTCAGAACACCACCAGCACGCCTTCCGACGTCACCACCTACGGCGCCGACCTGCACGGCAAGGCCTTCGGTGTGGAACTGCACAGCGAGTACGCCACCAGCCGCGTGCGCCCCAACACGGCCAATGCTGCCGTTCAGACCAGCAACGCGTTCTACGCCCGCGTGGCGACCCGCAAGGACAACCTGGCGTTCGACCTGAACACGCCCGCCGCCAAGTTCGGCAACGACACCTTCGGCGTGTCGCTGTACGACCTGAACTACCGCAAGATCGACGCGGGCTACAACAACGTGGCTGGCATCAGCGAGTATGGCTACGGCTCGTACAGCCGCACCTCGGCCCAGAACATCGCCTACAACCCCGACACCGGCGTGACGGCTCCCTTTGCCAACCTCGACCGTCAGGCCTACACCGACGCCAATAACGACGGCACGTCTGACCGCAACGCTGACGGCACCGTCGTTGCAACCAACACCAAGATCGGTCAAATGGGCTTCGGCGTGAAGGCAGCGGCCAACCTCGGTCCCGTGGCCATCGGCGGCTACTACGACACCAGCACCGGCGCCAACGGCGACAATGCCAACCGCATGACCGAAGCGGGCGGCTCGGCCAAGGTGGCCTACAGCATCTTCTCGCTGCGTGGCACGTACAACACCCTGGACAGCAACCGTCCCCAGATCTACCGTGACGCCGCCGGCACCCAGATCATCGGCGACGCCAAGGTGCGCCGCTACGCCGTGCAGGCGGACGTGACCCCCGGCCTGGGCCTGTTCGTGGGCGCTTACTACCGCGACGTGAACGTCAACGGTGTGCGCTCGACCACCGACCGTGGTCTGCTGGGCCGCGGCTACCTGGCTTCCAGCTTCGAGCCCGGCGTGGGAAACAATGCCTACCGCACCGGTCTGCGCTGCGCCGACAACAACTTCGGCACCGGTACCCGGGACATCGACGGTGTGGGCGGCGTGCTCAACCCGGCGGTCAACCTCGACCAGAGCCGCACCGCCACCTGCTTCACCTCCTACGGGGTCGAAGCGGGCCACGCGGGCGACAACGCCAACGCCCTGGTCAAGGACCTGTTCTTCCGCGTGGGCTACTCGCGCGTGTACGTGCCCACCACCGCCACCGCGACGACGGGCGATTTCAGCGGCTCGGTGACCTACGGTGACGCCCGTTACGACCGCAAGGTCGGCGTGGCGAACGTGCGCCTCGCGGGCTCGTTCTCGACCACCAACACCCAGCTCGACAGCCGTCCTGCCGGGACCCGTGGCGCGGTCGGCCTGATTGTGCGCACCGATCCCCTGGAAAACGTGCCCTTCCGTCCCCAGTTCAACGGTCAGGTGGGCTACTACACCGCCGACAACCGTGTGGCCGCTGGGAACTACAACGCCAACGCGACCAAGTACGGCGCGGGCGTGGTCCTGAACGACTTCCTGCTGCCCCAGACCAAGATCGGCGTGCGCTATGACGGCTATATGGCTCAGAACCGTCAGTACACCCCCTTCGACGGCGACGGCACCCAGGGCTACTTCAGCGACGCCAACAACAACCGCCGGACCAACCTGAACGGCGTGTACGTGGAAGGCGCCTACCAGGACCTGATCTTCAGCTACGGCACCTACACCCTGAGCCAGAAGGATCTCAACGGCGTCGAGTACGGCAGCGGCATCAACAACGGCCAGCCCGCTCGCGGCCAGACCTTCAAGATCAGCTACAAGGTCAACTTCTAAAGCCCCTAGTTCCCGGCATTGCCGGGCAACGGCCCCCGCTTCGGCGGGGGTTTTTGTTGTTCTGTAGTTAGTTCCGGTGCGGCGCGGCCAAAGTCGGTAGACTGACGCCATGCTCGCTGTTTTTGGACACCTCAACCCCGATACCGACGCCATTTCGGCGGCGATGGTCTACGCCCGGCTGCTGACGCGGCAGGGCACCGAGGCCCAGGCTTACCGGCTGGGCGAACCGAACTTTGAGACGGCCTATGTGCTGCGTGAACTGGGCCTGGAAGCGCCGCCACTCCTGACCGAACTGCCCGCCGGCAGCAAGGTGGCGCTGGTGGACCACAACGAAAGCGCGCAGTCGCTGCCCGCCCTCGGCGAACTCGACGTGACCCGCGTGGTGGATCACCACAAGCTGGGCGACCTGACGACCATCAACCCGCCCTACCTGCGCTTCGAGCCGGTAGGCTGCACGGGCACCATCCTGCTGAAGCTGCACCGCGAGGCTGGCCTGAGCGTGGAACCCCAGGACGCCAAGCTGATGCTGAGCGCCATTCTGAGTGACACCCTGCACTTCCGCAGCCCGACCACCACCCAGGACGACCGCGACGCGGTGGCGTTCCTGGCCCCGGTGGCCGGGGTAAACGACGTGGAGGCCTACGCACTGGCCATGTTCGCCGCCAAGAGCGATCTGGGCAACACGCCCGCCGAGACTCTGCTGCGGATGGACTACAAGGTCTTTCCCTTCGGGGACCCTGTGCAGCCGCAAAACTGGGGCATCGGCGTGATCGAGACGACCAACCCGGCCTACGTGTTCGGGCGGCAGCAGGAACTCCTCGCAGCGATGGATCAGGTCAAGGCCGAGGACACCCTCTCGGGCATGTTGCTGAGCGTGGTGGACATCCTGAACGAAACCAACCGCACGCTGGTGCTGGGCGCCACCGAGGCCAAAGTGCTGCGCGAAGCCTTCGGCGCCGAAGCTGAGGGACAGGTAGCCGATCTGGGCAACCGCATCAGCCGCAAGAAGCAGATCGTGCCGACGCTGGAAAAATACTTCGCGCCCGAAGCCTGAGCGGGCTCTTTCAACAGGGGAGACGGGGAGCACCTCGTCTCCCTATTTACTTGCACAGTCCAGAGCGGCGGTATGCTCCGCGCGTGACGCAGGACACGAATGTGGAATGGCTGTTCGCCCGGCAGCGGTTCGGAGTGCACCCCGGCCTGGACCGGGTGCGCGAGCTGCTGGCCCGGCTCGGCGACCCGCAGCGGCAGTTCGGGGCCGTGCTGGTCGGCGGAACCAACGGCAAGGGCAGCACCGCCGCCACGCTGGCCGCCATGCTGCGGGCGGACCGCAGACGCACCGGCCTCTTCACCAGCCCGCATCTGACCCGGCTGAGCGAGCGTTTCGTGGTGGACGGGGAGGAAGTCTCGTCGGCGCAGGTCACGGCAGCGCTGGCCCGCGTGCGCCCGGTGAGCGAAGCGGTGGAGGCGTCTTTTTTTGAGGTGGTCACTGCGCTGGGGTGTGTGCTGTTCGCCGAGGCCGGAGTAGAAATCGCCGTGATGGAAGTCGGGTTGGGAGGACGGCTGGACGCCACCAACGCGCTGGAGCCCCGGCTGAGCGTGATTACCAATGTGGCACTCGACCACACCGCCATCCTGGGCGGCACCGTCGAGCAGATCGCGGCGGAGAAGGCGGGCATTCTGCGGGCCGGGCGACCTGCCGTGACGGCTGCCGCGCCCGCCGTGCTGCCCGTGCTGGAGCAGCGGGGCGCCGATCTCTGGGCGCTGGGTCGGGACTGGACGGCGCAGACCCGCAGCCTGGGCTGGCAAGGTACGGCGGTCGACGTGCAGTGGCCCGGCGGTGCGGCGCAAGTGCAGACCCCGCTGCTGGGCGAACATGGGGGCCTCAACGCGGCGCTCGCGGCAGTGGCGGCGGCGCGGCTGGGCGTCTTTAATGAAGCCATCCGGCAAGGAGCCGCGCAGACCCGCTGGCCGGGGCGGTTGGAGGTTGTCCCCTGGCAAGGGCGGCGGGTACTGCTAGACGGGGCCCATAACCCGGCGGGAGCACAGGCACTCGCGCAGGCGCTGCGTCCATTGCTGGCGGCGAGTGGCCGTGCCCAGTTGCCCATCATCTTCGGGGCAGCGGAAGACAAGGACCTGGGCGGGGTCGCCGCAGAACTGCTGCCCCTCGCCTCACGGGTCATCCTGACGCGGGCTGCGCTCAGTCCCCGGGCCGCCGACCCGGCGCAGCTCGCGGCTCTGTTTCCAGGCGTGCCATTGGAGTTGACCCAGACGCCCGCCGAGGCGCTAGCAGTACTGCGGTCTGACGATGACCTCGTCCTTATCTGCGGGAGCCTCTATCTGCTCGGCGAACTGCGTCCTTTGCTGCTGGGCGAGCAGTCGGAAGGCCACGAACGCTGGCAATAGCCACAGCAGCGGCCAATTTCTGGTTTCGAAACAGGAATAAGGCAAGTTGACTGCCCTCATTCTGTATTGACCGTAGCAAGGAGCGGTGTTACACTCCGACCAAGGGTGCCTGATTCTGCCCACAAGCAAACGCCAGGCGGGTTCCACATCATCTGTCTCTCGGGCATCCCCGACGGGTGACGGGTGGTCCGCCGCAGCCCAGGAGGTGAATGACATTGAAACTGCACGAACGACTTCGTGAATTGCGCAGCGAACGTGGGCTGCGGCTCAAGGACGTGGCCGAGGTTGCCGACATCAGCGTGCCGTACCTGAGCGACCTGGAGCGCGGGCGCACCAACCCCAGCCTCGAAACCCTCCAGACCCTCGCCGGGGCGTACAACATCACGGTTCACGACCTGCTCGAAGGGGTCGAGTTCTACGGTGAATCCACCGACGGCGCCCTGCCCAAGGGCCTCGCGGACCTGATCGCCGACCCCACGCTGGGCCCGCAGATCACGCCCGACTGGGTGCGGACCCTCTCGCGCATCGAGTTGCGTGGCAAGCGCCCGCGCGACAAGCAGGACTGGTACGAGATCTACCTGCATCTCAAACGCATCCTGAACTGATTTCCGCACAACTCCGGCCCCGTCGCCGGGGTTTTGTTCTTGGGAGATTCAAAAATGGCGCCCCCTCGCTGACCTTTCCGTCGAGGGGGCGCCGTTCTGTGAGGAGGCGTGTCAGGTCAGCGGTAAGACCTGCCGGTGCTCGGCTTCTCGCCGTTGCAGCACCAAGGTTCCCGCTCCCCAGGCGCCGCCGACGAGGGCGAGCGCGAAGGCCAGCGGACTCCAGCTCAAGCTGCCCGCAAAGAGGCTGATGCCCAGGAAAGCGCCGACCGTATCGGGCACTGGCAGGCGCAGGCGGCAGGCGAGGGCGCGGCCCAGGTCGTAGACGCTGACGCTCAGGCCAGTGGCGATCAGCAGCCCCGCCAGCGCAAGCACCAGCAGCGCCGGGCCGAGCAGACCCGCAAAGGCCAGCGCTAGTGCCGGCAACAGCAGCGCCGCGAGCAGCAGCACGCCGAGCGCCAGCGTCCGCATCGGCGCGTGCCGTTGCTGCCGGGCGAGATCTGGGGCTGTGCCTGCCACGAAGAGCAGCAGCAGCAGGCCCGCCGTGAGCGTCACGAAAATCTGCGGCCACGCGGCGGCGCCCAGCCAACCGAGCAGGGGCCGGAAGGCCGCCGCCGTTGCCAGCCGCACCCCCTGCGGCGCCGGCAACGCCCCGACCTCAGCGGCCTCGCCGGGGGTGCGGCCCAGCACCGCGCTCATTTCGCCGCTGACCTGGGCGCCGGGCGCCTGTTTGATGTCGCCCAGCAGAGTGACCACCCGGCCATTGACAGCGGCGTCGGGCGCCAGCACCACGTCTCCGCCGATGGCGACGATATTGCCCTCGACGGGTCCATGAACCTCCACCGGGTGGCCGAAGCTCAGGCCGGGTTCGCCGTTGACCAGGCTGGCGAGCGGCGGCAGCGACAGCGCGCCGGCCAGCACGAAGGCCCCGGCCCCGAAGCGCCGCACGGCGGGCGTGGGCATCCAGGTGACCAGCGCACTCGTGAGCAGCAGCAGCAACAGGCCCAGGCCAGCGACGGGTGCCACGTGCTCCAGCACCGTCTGCATGACCAGGGCGCCCGCCGCCAGGTTGGGCCACGCCGCCGTGACCGTCAGCAGGGTGAGTCCGGTCAGCAGCCCACCGACGAGCAGCAGCGGAGCGTGGCGGCTGTGGGAGGCAGAGACGGATCTGGAAGCTGGGACTTGGGCTGGTGGCGTCACTTCCAGTGAAGGTGGTGCGGAAACAACAGCCTCGGCCTGCTGCCCCTCCTGCGCGATGCGGCCCGCCAAAGATGCCGCCAGCGAGCGGGGCGGCGGGGGCACCGGCGCCTGCTGCAACACATTTCGCGTGCGGGCGTCGCTGGCAATCTCGGCAGCTAAAGATGCGGCGACAGGACGTAGCATTGGAGGCGGCGTCTGCAACAGCGCGGCGGCCCGCAGGTCGCTGAGGACGGCAGGTGCCACCGAAGCAACTGAAGGCAGAGGTGCGGCCCCCAGTTGCCGGGCCAGCGCGATGTCCTGCGAGACACTGGCCGCGAGTGAACGTGGGAGGGCTGGCGGAGCAAGAACACCGCGCAGGGTGGCACTGCTGGCAATTTCACTGCTCGTTTCGGCAGCCAATTGGTGAGCGAGTTCAGGTGCCAGCGTCGGCGCGGTGAGCTGTTTCCCCAGGCGAAGATCGGCCAGCACCGCGTCGGCCACACTGCGGGGAGCAGGCGGATTGGGGCCGGTCAGCAGCGAGATGGCCCGGTGCAGTGAGCGCCGCGCCTCCTGCACCTGCGGGTCAAGGCAGGCGGCGTCCAGGGCGGCCTGTTCGTCCGCGCTCAGGTCGACGTCCGCCTCACGGTGCAGCAGCGCCCGCAGCTCCGGTGTCAGAACCTGCGTCCATTCTGGTGTGTTCCACCCCCCCTCTTGCCGTGCGCCCATCGTCGGTCCCTCCTCGTCCCTTCCTTTACGTCAAATCCGCCTGCAAAGTTCCCGGCGGTGGGAGAACGGTCGCCGGAAATTATCTAGACAAATTAGGGATTAAGGCTGGGGCGGACGCTCGGCGGCCTGGTAGTCGCCGCTTTTGCCGCCGGTCTTGGACAGCAGCCGCACGCCGCTGACCTCGATGGCCTTGCTCGTCGCCTTGAGCATGTCGTAGACGTTGAGCGCGGCCACCGTGACGGCGGTGAGCGCTTCCATCTCGACGCCAGTGGGGGCAGTGGTTTTGACGAGGGCGGTGATGTGGATGCCCGCGTCTTTCAGCGTCACGTCCACCTCGGCACTCGTGACCGGAATCGGGTGGCACAGAAAAATCAGGTCGGCGGTGCGCTTGCTGCCTGCGAGCCCGGCGAGGCGCGCCACGCTCAGGGGGTCGCCTTTGGGCGTTTGCCCGGCGAGCAGCGCGGCCCGCGACTCGGGCGGGAGCAGCACCCACGCTTCGGCGCGGGCGGTGCGGGTGGTGGGCGCCTTGGCCGACACGTCTACCATGCGCGGCTGGCCGCCGACGAAATGGGTGAGCTGCGGGGCGTCGCCTTGCGGGTCGCTCATTCGTCGGGCAGCTCCAGGTCTTTCAGGCGCGCGAAGGGGTTTTGCTTGGCGTGGCTGCTCCCCTCGGGCGAACCGAGTTCGACGACCTGCTCCTCGATGGGCACCTGGGCGCTGTGTTCGCAGGGGCCTTCGTTGAGGTCGTGGCCGCAGACCTGACACAGGCCCTTGCACGCGGGGTCGTGCAGCACGCTCAGCGGCGCGGCGAGCAGGGTGCTTTCGGCCAGGTACGCGCTGAGGTCAAGGTCGGGATTGCCGAAGACGAGGACTTCCTCGCCAGTTTCGGCTTCTTCGATGTAAGGCTCCTCGGCGGAGGGTTCGTAGCGCATCAGCGTGCCGAGGTCGATGTCGAGCGGCACCTGCACCTCGCGCAGGCAGCGGGCACATTCCATGACCAGCGTGGGCGAAAACTGGCCTTGCAGGTACATCTCGTCGCCGCCAAGCGAGTTGATGTCCACCTCAAAAGCGGCAGGCTCGGCAAAACGCAGGGTTTGCGGCTGCTTGCCCTGAAGGTACTGGAGGTGGTCGAGTTCGCCCTCGGCGTGCGCGTCGTCGGCGGTTCGCAGCAGGGCTCCCAGGTGAATGCGGGGTTGATCGTTCATGGGCGCCATGATAGAGGGCTGGCCTGCGGGCAACTGCGGCGGCGTGGGGGTTGCGCCCTGGTGAGCGATACGGGGGAAGGGCACCGCTGCCGGTGTTCCGTTCAGGGTTCATCTGTCCTGCCCGGCGCGGCGGCATACTGCGGGGCAGTGCTCGCCCCCTCCCGTGACCAGGCCCCTGTGACTGACTTCCTTTCCTCTACCCCCCTGACCGCCTTCCGGCGTGCGCTGGCCCGTGTGCTCGCGCTGCTGGTGGGTTTCGGTCCGCTGGCGGCGCACGCTCAGGCGGCGGGCACAGCGGCGGTGAAGCCCCTGGTCGCCACGGTCGCTACGCCGTCGGCCTCGCGCCCAAACGCGGCGCTGCGCTGGACGCGGCTGGGGAACCCCAGGTCGCCCTCGCTGCTGCAAGTGCCCGCCGATTGCCAGGTGCGCAGTTGCCCGCTGGTGGTGGTGTCGCACCCCCGCGCCCAGGACGCCGCCCGACTGCGCGACAGTGCCCAGGTCGGCAAAATCTCGCAGGCGCTGCTGGCCGCCGATTTCGCGGTGCTGCTCAGCGGCGACGGCGGCGTAAACACCTGGGGCAGTCCGGCAGCGCTGCGGGAAGTCGCGCAGGTTCACCGTGAGGCCACCAGCCGATTTCGCTGGAACAGCCGGACCTACGCCCTGGGCCTGAGCATGGGCGGGCTCCTCTCGCTGCGCAGCGCCCTGCCCGATTCGCCCTATACCGTCCGGGGCGTAGCTCTTATTGACGCCTGGGTCAGCCTGCGTGGCGCGTGGGGCAGCGCCCTGTCGCGCCAGCGTGAAATCGGCAAGGCGTATGGCCTGACCGTGCCGCCCACCCCCACCCTCGACCCCCTGCCCCTCGCGCAGCGCCTGGCTCCGGTGGCGCCGCTGCCGCTGTTTCTGGCCTACAGCCCCACCGACACGGTGGTGTCCTCGCGCAAGAATGCCGAACTGCTGATTCCCAAGGCCGAACAGGGCGTGAGCGAGATCGTGCGCCTCGACGGGCCCCACCTCGGCGGCAACCGCTTCTCACCGCAGATGATTGCCCGGCTGGTCGGCTTTTATCAGGGCCTGGAGCAGCGCGCCATTGCCCGCCAGCACAAGGAATTTCCGCCTGGTCTCCCGGCTCCGGCGGTGCAGCGGGCTCAGACGGAGACGTCGAAAAGCTAAAGGCGAGGGGGCAGAGAAACCGACTTCTCTGTCCCCTCGCTTTTAATGGGTTTATACGGATTTCGTCCAATTCCCCCACCTCCGGAACGGCACCGGAGGCGGGACCATTTCCCGAAATCCTTTTTACTGCTTCTCACTCCGTTCGGATGGAATCCCCAGAACCGGGGGATCCCATCGCAGCCGGTTTACATCGCCGCCGGAATCTCGATGCCGATGAGGTCCAGGGTGTCCTCGAAGGCTTTGCGCAGCCGCACGATCAGCGCCAGCCGCGCTTCCCGCAACCCTTCTTCCGATTGCAGGACGTTGGTGGCGGGTTTGCCCTGCTTGGTTTTGGCGTTGTACCACGCGTTGAAGGATGTGGCGAGATCGAGCGCGTACTGCGCCACCACGTGCGGCGAGTGGATGCGGGCGGCCTGCGCGGCGACTTCGGGCAGCTTGGCGATCTGCTTGGCGAGCACGAGGTCGATGTCGGGCAGCGCGTCCCAGTCGGCCCCGGTGCCGTCGGTGGCATAACCGGCTTCCTCGGCCTTTTTCAGGATGTTGGCGGCGCGCACGGCGGCGTACTGCACGTAGGGCGCGGTGTCCCCGTTCAGCGCCAGCGCCTGCTCCCAGCGGAAGTCGATTTTGCGGGTCGGCTCGGCCTTGAGCATGGCGAAGCGGATGGCGCCCAGGCCGATGCGCCGGGCGATTTCGGCGGCGTCCTCGCGCGCGGCCAGGTCGGGATTGATGCCCTGCAAGACGCTCAGCGCGCGCTTCTGGGCCTCGTCCATCGCGTCGTCGGCACTGACCGCGATGCCCTTGCGCCCGCTGATGGTCTGGCCTTCCAGCGTCACGAAGGCGTAGGAGAGGTGGATGCTGCGCTCCTCCTTCTCCTGCTCGCCCGCCACACCCAGCGCCGAACGCACCACCGTCTGCGGGTGGTCCTGGCGCGAGTCGATCACGTTGATGACCTCCTGCGCGTGCCCGAAGCGCCGCTCATCGTCGGGCTGGCCGTCCGGGGCACTCGTCCAGATGGTGTTGCCTTCGGGGTCCTGCATGAAGGGCTTGAATTTCATCCCCTCGAACAGGCCGAATTTCCAGAACTGGTAGCCGATGTCCTTGGCGGCGTACATGGCGGTGCCGCCCGAGCGCACGAGGACCACGTTCGGTTCTTCCAGCCCCGGCATAAATTCCGACACGTCCATGATGAACGCCCCGGCGTACTTGCCCTCGGTGGGGCGCGAGGTGTACCTGCTGCCTTCCAGAATGTTCATGGCCTGCGCCAGAAACCCGCTGCCCACCACGTCGGATTCCCAGTTCAGCAGGTCGTAGCGCGCGCCGAGGCGAAAGCAGGTCTGAAGCTGCGCCTTCACCGTCTGCTCGACCAGGGGCCGCAGCTCCCCGGCTTCGAGCTTGTGCATGATCTCCATGATGCCGCTTTCCAGCTCGGGCTTCTGGGGGTCGGCGTTGAGCTGCACGTAGCCTTCACCGAGCCACTGGTCGTACTTCTGCACGCCGTCCCAGACGCGGCCGTAATGCTGGGTGGCGAACAGCGACTCGGCGGCCTGTCGCCCGGTGTCGTCGATGTAGTTCTGTACCTCGACGGTGTGCCCCGCCGCCCGCAGAATCCGCGCCATGGAGTCGCCCAGCACCACGTTACGCAGGTGCCCGACGTGCAGCTCTTTGTTGGGGTTGACCGAGGTGTGCTCGATGACGACCTTGCCCTCGCGCTTGGGCAGCTCGAAGGGACGCTCGACCACGCCGCGCACGAAAGCGCCCGCGTCGAGGAAAAAGTTCAGGAACGGCCCCGTGGCCTCCACCCTGCGGATGCCGGCGGGCAGCACCACCGTCTGCGCGAGCTGCGCGGCGATCTGGGCGGGGTTGCCCCCGGCCGCCTTCGCCATCTGGAAGGCGGCGGGCGTGCCGTAGTCGCCGGGTTTATTGGCGGGCGTTTCCTGAATGGCGGCGTCGACGGGCATTCCCATCTGGTGCGCGGCCTGCTCGACGGCTGCTTTGAGTTGGGCCTTCAAATCCATTCGGGCAGTTTACGGGGCGCGGGCGCTGACAGGGTCACGGGGCAGGAAAAGAGGCCATGCGCCACTTCCCGCCCCTGTGCTGAAGGCCAAAACTGTCCGGAATCGGGTATTATACGGCTTTAATCCGATTCCCGAACATCCGGAAAGGCGCCGGATGTCCGTCTATCTCCTTAAAACCGTATTTTTCCATGCGCTCCGCGCAAAATTGCGCCTGGACATGTCCGGGACTCAATTTGAAACCGTATTACCGCTGTTTCAGCGCCGCGTCCCGGTAAAAGCGCTCCAGCCCCAGCGCAATGGCCTGCGCCAGTCGCTCGCGGCCCGTGTCGCTCATCAGGGTGCGGAGGTTGCTCTTGTCGGTCAGGAAGGCCGTCTCGATCAGGATGGAAAGCTGCGTGGTGGGCCGGGTCAGCGCGAGGTTCTGGTAATGCACGCCGTCGTTGCCCACGTCGGGCAGTTTTTCCACCAGGGCACCCTGCACCGCGTCGGCCAGGGCGCGGGCCTGTGGATTGTAAAAGTACACGCCGCTGCCGCGCTTGGTCCGGGGGTCCACGCCGTCAGGCAGCGCGTTGGCGTGGATGCTGACGAGCAGCTCGGCGTTCTTCGCCTCGGCGAGCAGTGGGCGGTTGTAGATAGGCACGGTGGTGTCGGCCTCACGGGTCAGAATCACGTTGGCGCCCTTCTCGCGCAGCAACTCGGCAAGGCGTAGCGTCAGCGGCAGCGTGAGGTTCTTCTCCGGCACCCGCAGCGGTCCCGCTCCGCCGAACTCGTCGCCGCCGTGACCGGGGTCGAGGACGATGGTGCGTCCGGCAAGCGGCTGGCGGGCGTTGATGGCCGGAGCATTCCGCACCCGCAGGGTCAGGGTGTTGCGCTCGGCGGAGTCGTCGGCGCCGTAGGTCGCGTCGTAGCCCCAGGGCGCCCCGTTCAGGTCCACGTGCAGGCGGACCACGCCGTCGGCGTCCTGCACCCAGCGCACGTCGCGCACCGCGCCCGTCGGGACGTCGGAGACGATGTACTCCACGTCGGACACCGCGTGGAACAGCCGCAGGTCGAGGCTGCCGCCCTGTCCGGCCTGCTGCTCGACGGTAAAGGGCACGCGCTCGGGGAGCAGCACCTGCACTTCGCTGTGGGTGCCGCTGTTTTTCACATTGATGTTGGCGAACACGGCGCGGGGAAGGGGCGTGCCTTCCGGGCGCAGCGTCAGGGAGCTCTTCGGCGCGTTGAGCGTCAGGCCATCCGACACCTGCACGGTGTAGGTGCCCCCTTCCTCGCCCACCACGATGGTCCGGGCGCCGGGGCGGGGGTAGACCACGAAGTTGCGGCCCGCGCCGTTGCGCCACACCTGCCCGGCGGCTTGCAGGCCGCGCCCAGCGACGGTGGCGGTGACCTCGGCCACGCGGGGGCCGGTGCCAGTCACGCTGAGTTTGCCGGTGCTCTCGGCGGTGGCCGTGGTGCCGTCCGCGCCGGTCAGCGTGAAGGACACGGGCGCAGCGGCGAGTTGGGCGGGCAGCAGGAAGGTGCCCTCGTACTCGCCGGGCGACGTTTCAGCCAGGGGAAAGGGGCCGAGGTCGCCGACCTTGTAGCTCGCCTTCCCACCGGGCGTGCCGCTGAAGCCGACCGGCACGCCGCGCGTTTCCAGGTTCGCCGGTTGCAGGTAGGCCACCCGGTTCTCGTCGGGCAGCACGCTGTCGGCCACGATTTGTGCCGCGCCCCTCAGGACTGTCTGCGGGCGGCTGGTCACGCGCAGTTCTTTGGTGCTGGTCACGCCGCCGAGGGTGGTTTCGAGCTTCAGCACATTCTCGCCGGGGGTCAGCGGCACCCACTCGATAAACAGGCCGTCGGCGCCCACGTCCACCGGCTGTCCGCCGAGGCTGAAGGTCGCGCCGGGCTTGACGCTGCCTTCGAGCAGCACGTGGTCGAAACCCACGCTGTACTTGTCGGGCGGGTACGCCACAAAAATCGGCTCGTCGCTGATAGGCGGCAGCAGTCGGGTCGCGCTGGCGGGCGCCGGCGTAGGCGAGGGAGCTGCGGGCGCGGCGGTTTGCGCGGCAGCGAACGAGGCGAGCAGCAGGGCGGGGAGCAGGGTACGTTTCAAGATAAAGACCTCCCTCGAATTGTGGTTCGGGGCGCAGTGTACGTCAGTTGCCCGACTTGAGGCCAGCGCGTCTGAGTTCGTTTCTGAGTTCCGCCACCGAGTAGCGGCGCGAAGGCAGCCGCAGCAGCGTGAGGCCCCCGGAGCGGAAAATCACGTCCTTGACCGAGTCGCGGTACTGCTGCTCGGCTTTCTGGTGCGACTTGCCGTCGAGCTCGATGGCCAGCCGGGGCCGGAAGTGGTCGGCGGCGTCCACGATGATGAAATCCACGTGCTTGTCGCGCAGCCGCCCCAGCACCGCGCCGCGCTGGGCGGGGTCGTCGATGGTAAAGAGGTCGTTGAGCCGCACATTGGGAAAGGCCCGGTAGCTCGTGCCGCGCAGCGCGTCTTCCAGAGCGCGAAAAAAAGCGTTCTCGTCGGCGGAAAAGAAGTAGCGCCGCGCCGAGACGGGCAGCGAAGTCGGCACCCGTTTTTCGCCGGCCAGAGGAGATGGAGGTGAGGAAGAAGCCGGCGCGGGCGCGGCGTCCCGCAGGCCGAACAGTGAGGCGAGGCATCCCAGGGGCATGGGAGCAGTGTAGGCAGGCATGGAATCGCGGTGGATTTGGGCCTCTGCATGAGACGCGGGTCAGCGGCGGCTGGCCTATGCTATAGGGAGCCGCCAACTCGGTATACTCTCCCGAGTCTTTGTACTGAGTTCAATATTCCGCCTGCCCCGCTGCGCCGCGCCCCTGCGCTTCCCCCGGAGGTTCCCGAGTTGCTCCCCCTTGCCCATCAAATCGCCTTTTTCATCTTCGCTCTGGTGACCGGCGCTGTCGGCGCTTACGGCTTTTACCGGCTGTTTTTGCGGATTCGCCGGGGCGTCCCTGCCAGCGAGTGGCGCTGGAACGACGCCCCGCAGCGCATCGGCTACGCGCTGGTCACCTCGCTGACCCAGGAACGCACCTTCCGCAAGCGGCCCTGGATTAGCGTCCTGCACGCCTTTATCTTCTACGGCTTTACCTACTACTTGCTCGTCAACGTCGTGGACGGGCTGGAAGGCTACCTTCCCTTCTTGCACATCAGCAGTGCCACTCCGCTAGGCGCGCTCTACAACGTCCTGGCCGACATCCTGAGCCTCGGCGTGCTGATCGGCGTGATCGGGCTGGTCATTCGCCGCCTGTGGGGCAAGAGCCGGCGCGACTTCCGCTTTACCGACAAGACGCTGCTGCATCCCCTCGTCAAAGACAACTACATCAAGCGCGACTCGCTGATCGTCTCGGCCTTCATCACCTTCCACGTCGGGAGCCGCATTATCGGCAACGCCGCCAAGATGGTGCAGGAAGCCCGGCTGGACGCGGGGCACTACGACGCCTTCCAGCCGTTCTCCTCGGCGGTGGGCAGCGCGCTGTTCGGCGGCCTGAGCGACTCGGCCTTGCAGGGCTGGCGCCTCTTCGGCTTCTGGGGGGCGCTGGGCAGCATTCTGCTGTTCCTGAGCTACTTCCCCTTCTCCAAGCACATCCACATCTTCATGGCGCCGGTCAACTACGCGCTCAAGCGCCCCGTCAACAGCGGCACCCTGCCCCCCATGAAGGGCCTGGAAGAAGCGATGGAGGCCGAGGAACCCAAGCTGGGCGTGGAAAAACTGGAAGACCTGGAATGGCCCCGGTTGCTCGACGCCTACGCCTGTATTCAGTGCAACCGCTGTCAGGACGTGTGCCCGGCGAACGCCACCGGCAAGGCCCTCTCGCCCGCCGCGCTGGAAATCAACAAGCGCATGGAACTGAACGTGATCGCGGCGCAGCACAACCCCTTCGTGCTCAGGCCCGTGCCGTTCGAAGCCGGCGAAAGCACCGCCCGTCCGCTGCTGGAATACGCCATCAACGAGGAATCGGTGTGGGCCTGCACCACCTGCGGCGCCTGCATGCAGGTGTGCCCGGTGCAGGACGAGCAGATGCTCGACATCGTGGACATCCGCCGCAACCTGGTGATGGTGCAGGGCGAGTTCCCGCCGCAGCTCCAGACCGCCTTCCGCGGCATGGAACGCGCCAGCAACCCCTGGGGCATCTCGCGCGACAAGCGTATGGAGTGGGCCGAGGGCCTGAAAGTCCCCACTATCGACGAAAACCCCGAACCCGACGTGATCTACTGGGTGGGCTGCGCCGCGAGCTACGACCCCGGCGCTCAGAAGGTGGCCCGCTCCTTCGTGCAACTGCTCGACAAGGCGGGCGTGAACTACGCCGTGCTGGGCAAGAAGGAAGCCTGCACCGGGGACAGCGCCCGGCGCGCCGGGAACGAATTCTTGTACCAGCAACTCGCGCAGGAAAACGTGGAAACGCTCAATCAGGTGGCGCCCAAGCTGATCGTGGCGACCTGCCCGCACTGCATGAACGCCATCGGCAACGAGTACCGGCAGATCGGCGGCGACTACCGCACCATTCACCACACCGAATACCTCGAGCAACTCGTGGCGGCGGGCAAGCTGCCGACCGCGCAGCTGCACGACAACGTGGTCTACCACGACCCCTGCTACCTGGGCCGCCACAACGGCGTGTATGAGGCGCCCCGCCAGCTGATTTCGCAGATGGCTGGGCAAATTCTGGACATCGAACGCCAGCGCGAGAACTCGTTCTGCTGCGGTGCGGGCGGCGCTCAGTTTTGGAAGGAAGAGGAAGAAGGCCGCGAGCGCGTCTCCGACAACCGCTTCCGCGAATTGCAGGCGCGGCTCGACACCGCCGCCGAGGCGAGCGCCGAATTTGAACGCACCGGCAAGGTGCTGGCGGTGGGCTGCCCCTTCTGCAAATCCATGATGAACTCCACGCCCGAGAAGCAAAAGCGCGACGACATCATCGTGAAAGACGTGGCCGAGCTGATGCTCGAAAGCGTGCAGCGCGCCAGTGGCGAGTGGGTCGAACCTGCCGTGGCGCCCTCGCCCGAAGTCGAAGTGCCCAACGCGGCGCTGCCGATGGAACGCACCGGCGACGCCCCCTCCGCCGACGCCCCGCGCGACGACGTGGTGGGCACCACCAGCGCCGACGTGGAAAACGCCCAGCCCGGCAGCCCGGTGGCGAACGCCGGCACCCAGCCCGAGGCTCAGGCGGCGGCCCCCAGCCCTGCACCCAGCGGCGAAGGCACGCCTGTTCCGGCGGCCCGCAAGTCGTGGAAGCCCAAGGGCGGCGACGACGTGAGCGCGGCGGCTCCGGCGCCCGCCCCTGCCGCCTCCACTGCCGAGGGGGCCGCTCCCGCCCGCAAGAGCTGGAAGCCCAAGGCGAGCGCGGATGATGTGGCCTCCACCGCGACGGTTATCGAAGCCGCACCCGCAGCTCCGGCCCCCGAAGCTACCGGCACCCGCAAGGCCTGGAACCCCAAAGCGAAGGCCGACGACGTGGGCACCGCGCCCGCCGCGCAGCCTGCCGTTCCTGCTCATGCCCCTGCCGCTGAAGGTGCCGCCCCCGCCCGCAAAGCCTGGAATCCGAAGGCGAAGCCCGAGGACGCTGCCCCTGTGGCGCCCGCCCCCATGCGCGACGACGTGAACCCGGCGCCGGCAGCTCAGGCCACGGCATCTGCCGCCCCCGCCGAAACGGGCGCCACGGGACGCAAGGCCTGGAACCCCAAGGCGAAAACTCCGGCAGCGGCGGAAGGGTCGGCCCCGGTTCAGGAAGCGGTGCAGGAAGTTGCTCCTCAGCCTGCCGTCGCTCCTGCCCCCGTGCAAGGCGATATTCAGGATCCGCTGCCCCAGCCTCCGGTCCAGCAGGCCAGTGCCCCCGCCCAGAGCGGTGAGCGCAAGAAGTGGAACCCCAAGGCGAAAGCCGAAGCGGCGCCCGCTCCTGCTGCCCCCGCCGCGCCCATCACCGAACACGTCGTGCTCGACGAAGTCGGCGTCAACGGGCTGGAAGAAGGCCCCGAGGCGACCTCCGCCCCCGACGCGGCCCCGGTCACGACCCAGCCCGCCACCCCCGAGCCCTCTGCCTCCCAGACCGGCGAAGTCAACGCCGAGACGGGCCGCAAGAAGTGGCAGCCCAAGAACAAGGGCTGACACCCACTCCTGAGTCGAGTCCACTGAGTGAAGTACGCCCCCGCCGGGAAACTGGCGGGGGTTTTGCTTTGCCTGCCCTCTACAATCCGGGCGTGAACCTCCTGCTCATTCGGCACGCGCAGTCCACCAACAACCTGCTCTACGCCCAGACCGGAGGCAGCGAGGGCCGCTCCGCCGACCCGCCCCTGACCGAACTCGGCCACGCGCAGGCACGGGCGCTCGCCGAGTTCGCCCGCACCGACGAGACCCTGCGCGGCCTGACCCACCTGTATTGCAGCCTGACCACCCGCGCGGTGCAGACGGCGGCGCCACTCGCGGCGGTGCTGGGGCTCGGCGTGCAGGGCCTCACGCACGCTCACGAGACCCAGGGCCTCTTCCTGCGCGACGAGGCCGGGGTGCCCCGGCCGGTGCCGGGCCGCACCCACGCCGAGCTGCTGACTGAGAATCCCGCGCTGCTCTGGCCCGCCGACCTCGCAGCGGAGGCGGCCTGGGAAGGCGGTTTCGAGCCCGAGGACCACGCGGCTTATCTGGCCCGCGCGGCCCGCGTGGTGGGTGAACTGCGCGCGGCGCACGGCCCTCAGGACACGGTGGGGCTCGTCACCCACGGCCACTTCACCCAGTTTCTGCTGCGCGAGCTCATCAGCCACGGCACGGCGTTTTTCCGCGTCGCCAACACCTCGACCACCCTGCTCACCCTGCCCGGCCCGCAGGACCCGCCCGAGTTCGGCCCGCTGGTGGGCTGGGTCAACCGGCACGACCACCTGACGCCGGAGCAGGTCACCGTTTGACCGGAGCTGTAGTTTTGCTTACAGAGACGCACTTTTGCGGCCCTTCCCGTTAGAAATACTAAGGACGGCTGCCATCCGGTCAGGGTTTCTGCGGGTCATGCTGAGGCACTGCCACTTCGGAACCCGAGGTGGACAAAGGAGCACCCCATGAAACTGAAGATGTCCGACCTGATGATTCTGCTGGGCTACGCCTCCATCGGCTATAGCGCCTACCGTTACTTCACTGCCAGCGACGACGACTCCAAGCGCGACGCCCTGTTCGTCGGTCAGTGGGCGCCCACCTTCTTCATCCTGGGTGTGGGGGCCGAAAACCGTGAGTACCGCAAGCAGAATACCCTGGCCCTCGACGCCAACGCCTGAGCCCCGAACCTCTCCGCAGAGGCCGTCCATCCGGGCGGCTTTTTGCTTTTGCGGCCCTCCGCCCCTTTTCGATGTCAAGGTGACTTATGAACCTGACTCCCGCTCAACGCAGCAATTTCTTTCCCCTGACCGCCTGGGGGTATCCCCTGTGGCGCGCCCGTTCGCTGACGTGGCTGGCCGGAGAACCGTTCGGGCTGGCGCGGGAAGCGCGGCTGTTTCTCGGGCTGTGCCGCCCTCATGCTGGGGAACGCTGGCTGGACGTGGGCACCAGCACCGGGTTTTACGCCGGGGTGCTGGCGCGGGCGGGCTGCCGGGTGGTGGCCGCTGACCTCAGCCCCGCCATGCTGCACGCGGCGGCGCGGCGGGAACCCCGGCCCCAGATCGAGTGGGTGCAGACGAACGTGGAAGAGACCGGCTGGCGCACGGCGAGCTTCGACGGCGTGGTCGTTGGGGCGACGTTGAACGAAACCGCCGACCCCGCGCGGCTGCTCGGCGAGTGCGCGCGGCTGCTGCGTCCCGGTGGCAAGCTGTGGCTGATGTTCGTGCCGCGCACGGGCGGGCCGCTGCAAGGGCTCCTCGCCCGGCTCGGTGGGCTGACCTTTCCTGACCTGAGCGCGGTGGAGACTGCCTTGCCCGGCGGCGCCATCGTTCACGCCCGGCGGGCGGGGGCCGTGCAGTTCGCTCTGTTCGTCCGGGGCAACGGGTAGGCTGGCAACATGCCGCCCCTCCGCTCCCAGATTCGGCCCGGCCTGACCGTGGACATCGTGCAAAAGCAGGACCAGCCCACCGGCAAGCTGACCCGTGGCGTGGTCGCGGCGCTGCTCACGCGCTCGCCCTCGCACCCGCACGGCATCAAGGTCCGGCTGACGAGCGGGCAGGTGGGCCGGGTGCAGGCGGTGGTCGGCGGGGAATAAAGAAAGTCCGACTTTCTGGCGCGGGGCCGCGAGGCTTACGTTAACGCCGGAGGTGCGCCATGCCCAAAGCCTGGAGCAACAAAGACGAACGTCAGTACGAGCACGTCAAGGACAGTGAGGTCAAGCGCGGCGAGTCGCCAGACCGCGCCGAGGAAATCGCCGCCCGCACCGTCAACAAATCCCGCCGCGAAGAGGGCCGCACCCCGAACAAGCGCACCCAGGGCACCGGCAACCCGGACGCCGCGCTGAGCGACCTGACCCGCGACGAGCTCTACAACCGCGCCAAGGAAAAAGGCATCGCGGGCCGCAGCCGCATGAGCAAGGCGGAACTGGTCAGGGCGCTGAGCTGACCCCCGCTGCACCATAAAAAAGCCCCTCCCAGCCGGAGGGGTTTTCTATTTGGCCGGTGGCTCAGCGCTTGGGCATGCTCAGGCCGAGGCCGGTGAGAAAGCCGCCCGCGACGGCGAGCGCGGTCATGAGCATGACGTTGTTGACGGGGTTGGGGCCCTCGTTGCTCTTGTTGGCGTAGGCGATGGCGTGCAGCTGGTTGATGTCGATGTAGTTCTTCGCGAGATAGAACAGGCTGGCCACGATGACCACCAGGCCGATCAGCACCACCACCTTGGAGAGAATCTGCATGGTTCAAATTGTAAGTCTCCGCCCGTTTGTGCCTCTTGGCCGTTCGGAAAGATTGTGGCGGGGCAGGGTCATCAGGGTCAGTGGGTCAGCGTCCGTGCGGCGTCGCGTCCAGCCAGGCGGTCAGCGCCTGAGCCACGCCCGCCGGGTCGGTGACGACCACCGGCACGCCGAGCGAGGCTGCCGCGCTCGCTGCCGCGCACTCGCCCTGTTCCCCCATTACGGTCCCGGCGCCGGTCCAGGCGGGAAACGGGCCGCCTTCCTGGTAGGGCGCGCCGCTGAGGTCGTCCACCACCCGCGCGGGCACCTCCACCGCTGGGCCACCTGCGAGGGCCCGCTCGGCCACCACCAGCACGCCCCGGTGTTCGCCTTCTTCCAGCGCGCCGAGCAGCGGGGTCAGTCCGTCGTGGACGAGCAGCTCGAAGGTGCCCTTGCCGTCCACTGCCGCGAGGGCGGCGGGCAGCGCAGCGGCGAGTTCGGGCGGCGCGGCGACCAGCCAGCGGTGGCCCCCGGCGCGGCCCTCGAAGCTGGCGCGGGGCGAGCCGTACACGTCCGTCCAGGTGCGGGTGTGTTGCATGGCGTCATCATCGGGCATGGGGGCCAGAGGGGCAAGCGGAGTCGCCCCGCTATGCTGCGCTCATGACGCTGCCTGCTCCCCGCACCCACGGTTCCGTTCGTCTGTGGTCGCTCCCCACCGGCCCCCTTCAGGAAAACGCGCTGCTCGTGGCGGGCTCGGACAACCAGGGCTTCTTGATTGACCCCGGCGACGAAGCGGCGCGCATTCTGGACCTCGTGCGCGCGGCGGGGGTGGACGTGCAGGCGATTTTGCTCACCCACGCGCACTTCGACCACATCGGCGCGGTGCAGCCGGTGCGCGAGGCGCTCGGCGTGCCGGTTCACCTCCACGCGGACGACCTCGCCACCTATCACCTCGGCGCGGCGTCGGCGGGGCGCTGGAACCTGCCCTTCGTGCAGCCCGCCGACCCCGACCAGCAGATTGCCCAGGACCAGACCTTCACGGCAGGCGACCTCACCCTGACCGCCCGTGAGCTGCCGGGGCACGCGCCGGGGCATGTGGTGTTCGTGGGCGACGGCTTCGTGATTGCGGGCGACACCCTCTTTGCCGGGGGCATCGGGCGCACCGACCTGCCGGGCGGCAACCACCCGCAGCTCATCGCGGGCATCGAGCGCGAACTGCTGAGCCTGCCGGACGACACCTACGTCTACCCCGGCCACGGCGGCTTCACGACGATTGGACGGGAGAAGCGGAGCAATCCGTTCCTGTAGAGCCTTTGACAAAAAGATGGCACAGCTTTTTGGCGAGCGGAGCGAGTGCAAAACACGGAGCAGAACGGACTTGCAAAGCTGCGAAGCAGAGAATGGAGCGGGTGGCGGTGCTGTTCCGACGCACGCGTCATTCGGAGAACTGCTCTAAGGCAGGAAAAAAGGCGCCAAGGTGAGTTGACCTTGGCGCCGTTTCTTTTTTCTCTTCCCTCAGTCGAAATCCATCGCCCACCACGCCTCGCGCTGTTCGGCCATGCGGGCGCGGGGGTCGCCGAGGGTGTCGAGCGACGCTTGCAGGCCCTTCCAGTCCTGTTCGCTGAGGGGGTCGAGCGTGAGCGCGCGCTGGTGGTATTGCGCGGCCTCCTTGGGCTTGCCGGCCAGGCTCGCGGCGCGGGCGGCGAGGCCGAGCAGGTTCATCTGGGTCTGTTCGAGCCGCGAGCGCACGTCGTCCACCCAGGGGCTGTCGGCGCCGGGCAGGAACGTGCCGTACTGCCCGACGAGTTCCTTGAGCTCCTCGTAGCCCAGACTGCCCGCTTCGGCCTGTCCGGCGAGCAGTTCGTAGCGTTGCACGTCGTACTCGGGGTTCAGGCCAGCGGCGAGCGCGTACTTGCGGTTCTGGCTGGTCACGGCCTCGTTGTTCAGGCTCTTGCGCAGGCGGTGCAGCGTGGTGTGAAAGAGGCTGCTCGCCCGCGACTCGTCTTTTTCGGGCCACAGCGCCTCGGCGGCTTCCCAGCTGGTGACTTCCTTGTGTTCGAGCAGGTAGAAAAACAGTTCGAGCGCCTTGCGCGACACCCACGACACCTGCTGCCCCTGCCACAGCACCTGCGCGGTGCCGAGCCCCTTGGCCTGGGTGCCGCTTTCGGCCTGCGCGGTCAGGCCCACCCGGCGCAACCGGGCGTCCACAGCGGTGGTCAGGCCCTGCGGCGTAAAGGGCTTGGGCAGGTAGTCGTCGGCGCCGAGGTTCATGCCCCGGCGCACATCGGTCCGCTCGGCGTGGCTCGAGAGCAAGATGAACGGCATCGCCGAAAGGTTCTCGTGGTCGCGCACCTGTTCCAGAAATTCCAGGCCAGTCATGTACGGCATCACCACGTCGCTGATGATCAGGTCAGGGGTAAACACTTTGAGCAGGTCCAGCGCTTCCACCGGATGCGAACAGCTCCGCACCTCGTGCCCGGCGCGGCTGAGAATGACGCTGATGAGCTTGACAATGGCGGCGTCGTCGTCCACGACCAGAATGCGCGGCATATGAGAAGTCTAACAGTTCGTCTCCGCCGCCGGCCGCCCCCACCCTGACGAATAAGGGCCCCCCTTACAATCTCGTGAGAAGCGGCCTGAAGTCGGTTTGTGCGGGCGTGTGTCCTGTTGTCCTGTTTGCCCCGCTGCCGCTGTCTGCCCGCCCTCCAGATGCGCAGCGGTCAGGGCGCTATCCTTTTTCCCATGACGTATCAGGCGGTCATCGGGCTCGAAGTGCACCTGCAACTGAACACCCGGTCCAAAATCTTCAGCGCCTGCCCCGCCGACTACCACGGCGCGGGGCCGAACGAATTCACCGACCCCTTGACCCTGGGCCTGCCGGGCACGCTGCCCACCCTCAACCGCCGGGCGGTGGAACTCGCCATGATGTTCGGTCTGGGGCTGGGCTGCGACGTGTCGGGTTTTACGCAGTTTCACCGCAAGAACTACTTTTACCCCGACGCCCCCAAGAACTTTCAGCTCTCGCAGTACGATCGCCCGATTGCGCGTGACGGGTATCTGGACGTGCCGGGAGAGGGCGGCCCCGAGCGCATCCGCATCAAGCGGGCGCACCTTGAAGACGACGCGGGCAAGCTCGTGCACCCGACCTACGCGCCCTACTCGCTGCTCGACCTCAACCGGGCGGGCTCGGCGCTGATTGAAATGGTCACGGAAGCCGACATCACCGGCCCCGAGCAGGCCCGCGCCTTTCTGGAAAGCGTGCAGGCCATTGCGCAGTCGCTCGGCGTGTCCGACGCGACCCCGGAGGAAGGCAAGATGCGCTGCGACGTGAATATTTCCATTCACAAGCCCGGCGAGCCTTGGGGCACCAAGGTGGAAGTCAAGAACCTCAACTCCTTTCGCTCGGTGGCCCGCGCCATCGAGTACGAGGCCGCGCGGCAGGCGAAGGTGCTGGACGCGGGCGGCATCATCACCCAGGACACGCTCGGCTGGGACGAGGGCGGCCAGAAGACCTTCCTGATGCGGACCAAGGAAGGCGAGGCCGACTACCGCTACTTCCCCGAGCCCGACCTGCCGCCGCTCGACATCACCCCCGAGTGGATTGCCGAGGTGCGCGCCCGGATGCCCGAACTGCCCGCGCAAAAGCTGGAGCGTTACCGGGCAGCGGGCGTACGCGAGAGCGACGCGCAGACCCTGAGCCTGAGCGTCAGCCTTTCGAAGTTCTACGACGAGGCTCTCAAAAGCGGCTCCGACACCCAGAAACCGGATGCCCAGAAGCTCGCCAACTGGCTGCTCACCGACGTGGCCGGGGCACTCGCCGCGCAGGAAAAGGGAGTTGAGGACAGCGACCTCCAGCCCGCACACCTCGCTGCGCTGGTCGGCCTGATTGACGCCGGAACCATCAGTGGCAAGATTGCCAAAGACCTGCTGCCCGACGTGCTGGCGGGCCACGACCCCGCGCAGCTGGTGCAGGAGCGCGGGCTGAGCGTGGTGACTGACACCGGGGCGATTGACGCCGCCATTGACGCCGCGATGGAGGCCGACCCCGCGACTGTAGAGAAGGTGCGCGGTGGCAACGCCAAGGCGATGAACGCGCTGTTCGGCCCAGTCATGAAGGCGATGGGCGGCAAGGCCAAACCCGAAGTCGTCCGCGAGCGCCTGACCGCCAAGCTGGGCCTGTGACCGCGCCGGCAGGGGCTTCCGGCAACCGCTGGCGCCTCCCGGCTCTCACCGCGCAGTGGCTCTACGTGGCCGCCAGCTTGGGGCTGACCGCGTATACGCTGCTGCCGGGGCTGAATGAAGGCACGGTCGTCACCGGCCTGCGCTCGCTGCTCGCCGCGCTGGTGGGGGCGTGGTGGGCGGTCATTTTCGGGCGTTATCTGCTCGGCCAGGGCACGCCGGACACCGACGGCACGCTCAGGGCGCTGCGGATTTTCTTTCCCTGGCTCACGGCGCTGCGGCTGTCTTTGTGGCTCATCGGGCTGCTGGCGCTGAGCGGCAACGTGACGGCGGAGGTCAACCCGGTGGCGCTGACGGCGCTGCAAACCCTGTCGTTCGGGTACATCTTCGCCAAGAACGCGGTGTACGGCACGCTCGCCCGCTACGCGACCGACCCGGCGAACGCGCTGGGGCGCCGGCGCCTGGGCGAGTGGCTCAACGTGGCCGCGCCGCTCGCCCTTGCCATCGGGGTCATCAACACCGTGCCGCTGGGCGGCGTGAGCGAGGGCTTCCGGACGCAGGACGTGGTGATGTATGGCGTCCACGCCGCGCTGGACCTGCTCGCGCTGGGGCTGAGCCTGCTCGCACTGCGCGAGATGCAGACCCCGGTGCGGGAGCGGGAGGCCTGAGCTGGGCCGAGTGCGGCAGCCCCCTTACTCTCCTGGCCCCTTGCTATCCTGAGCCTTACGCATGACAGACGCCTCTTTTCCGGCCTTGCCGCCGCCGCTGGTGTCCCTGGGTGACCTCGCCTGGGACGTACTGGCCAAACCCGACACCCTGCTGCTGCCGGGCGGCGACACCACCGGGCGCCTGGAACTCTCCGGCGGGGGCAGCGCCGCCAACCTGGCCGTGTGGGCCGCGCGGCTCGGCGCCCCGACCACCTTCGTCGGCAAAATCGGCCAGGACCGCTTCGGCGAACTGGCGACCGCCGAACTGCGCGCCGAGGGGGTGCGCGCCGAGGTGCTGGCAAGCGCCGCGCACCCCACCGGCGTCATCCTCGCCCTGATTGACCGCCGGGGCCAGCGCGCCATGCTGACCGGGCAGGGCGCCGACTGGGAACTGCTGCCGGAGGAGCTGCCACGCGACGTGCTCTCCAGTGCCGGACACCTGCACCTGACCGCCTGGAGCCTGTTTCGTGACCCGCCCCGCGCCGCCGCGCTCGAAGCGGCCCGCATCGCCAAGGCGGGGGGCGCCACCCTCAGCTTGGACCCCGGCAGCTTTCAGATGATTCAGCAACTTGGGCGCGAAGCCTTCCTGAACATCGTGGACGCGCTGCCCTTCGACGTGATGTTTCCCAACGACGACGAGGCCCGCGCCATGAGCGGCGAGCGCGACAACGAAGCGGCGCTCACCTGGCTGCGGGCCCGCTACCCCCGCGCCCTGATTGCCCTGAAGATGGACGAGGACGGTGCCCTGATCGAAGGCCCGCAGACCGCCCGCGTGCAGGTGCCCGCCACCCGCGACCCGCTGGTGGACGCCACCGGGGCCGGGGACGCCTTCGGGGGCGCTTTCCTGTCGCAGTGGCTGCGGCACCACGACGCCGAGCGCGCCGCCCGCGTCGCCGTGCAGGTGGGCGGCTGGGTGGTCTCGCGCTTCGGGGCTCGCCCGCCCGCTGACCCTGACCTCACCCGGCGGCTGGCGAGCGTGGGCGCCGATCTGTTGACCCCTGACCCCGCAGCTGCCCCTGACCCTTCCCAAGACTCTCCTGAGGTAAACGCATGACGACCCTGAATTCCCGCCGTGGCCTGCCGCTCTGGGCCAAACTGCTGCTCGGCTTGATCGTGCTGGGCCTGCTGGCCGTGCTCGGCGCCTTTGCCTATTTCCGCAGCCTCTTCGGCCCGGCGGGCGGCGCCCCCTACACGCTGGAAGTCACGCCCGGCGAGACCGTGCCGCAAATCGCCCGCGAACTCGAAAACAAGAAGATCGTCAAAAACGCCCGCATCCTGCGCTACGCCATGCAGCAAAGCGGCGCGGCGGCGCGGCTCAAGGAAGGCGCCTACGACCTCAACGGTCAGATGACGGTGGATGAGGTCGTCAAGACGCTGGACGGCCCCGCCCGCGTTCCGGTGGTCAACGTGACCGTGCCCGAGGGCCGGCGCATCAAGGACCTGCCCGAGATTTTCCAGAAGGCGGGCTTCGACGCGGCGGCCATCACGGCGGCGCTCAACGACGCGTCGCTCAGCCGCTACGCCCGGGGCAAGCAGAAGAACCTCGAAGGCTTCGTGTTTCCCGCCACCTACGAGTTCCGCCCCAAGGACAGCGCCACTGACGTGGTGAAGAAGATGGTGGAGCGCATGGAAACCGAGTTCACGCCGGGCAACGTCGCCAAGGCGAAGGCGCTCGGCCTCGACGTGCGCGACTGGGTGACGCTGGCGAGCATGGTGCAGGCCGAGGCCGCCAACAACGAAGAGATGCCGGTCATTGCCGGGGTCTTCCTCAACCGCCTGCGCGACGGCATTCCGCTCGGCAGCGACCCCACCGTGGCCTATGGGCTGGGCAAGGACCTGCCGGAACTCGACCGCTCGGCGGGGGACTTCAAGGTGGATACGCCTTATTCCACCTACACCCGCCAGGGGTTGCCCGCTGGCCCCATCAACAACCCCGGCGAGGCCGCGCTCCTGAGCATCGTCAACCCGCAGCGCAAAATGGCGGATGGGCGCGACGCCCTGTACTTCCTGCACGCGGGCGGCAAAATCTACGTGAACCACACGTATGCGGAGCACCTGCGCGACAACGACCGCTACCGCTGAGCCGACAAAAGGGAGGGGGTGGAGTGCCCGGGACACCGTTCCCGCCGCGCTCCACCCCTTCGCTGCCCTTCTCTTTCAGCCCGTTACTTCTGCGCTTTGAGCCACGTCGTCAGGTCGTTGAGCGGCTGGGCGTCCATCCAGCCCAGAGTCGGCCTGCCGTCTTTGAGCGGCGAGAGCGAGTGCCCCAGGCCCGGATAGACGATGAGCTTCACGCGCTTGTTGCCCGCTTTTTCGGCGGCGGCCTGCGCCAATTTCGCGCCGGGGAGGATGGTCTGCGGATCGGCGGCGCCGTGCAGCATCAGCACCGGGCCGCCGTAGCCGGGCAGGTTCTGCGTGACCGAGCCGTAGGCATCGGCATCCTTGAAGTAGCGGGCGGCAGCGGGGCCGAAATCCGGCAGGTTGGGGTACGGGGCAAACGCCGCCTCCAGCCGGTCCTTGATGGGGCCGTCAATCCGGACGCGGCCCCCGGCGTCGGCCTTCATCTCGGGAAGAAAGGTCCACGTCTTGTCCTTTTGCACCACGCCGAAGGCGGACGCTTGATCGGCGAGGTCGAGCCCGAAGCGCTCGAAGCCGGCGGTCAGCTCGGCAATCGTCAGGAAGCCGTCGTGGTCGGCGTCCATCAGCTCGTGAACCTGCGCGAGGGGCCGGTCGACGAGCTGGAAATGCAGCGTTTCGCGGAAGGTGTAGCCCACCGTGCCGATGAGCACCAGCCCGCGCACCAGCTCCGGGTGCTCGCGGGCGATGCGGGCGGCGAGCATGGTGCCTTCCGAATGACCGAGCAAAAAGACCTCACCGGGGTTGACCTCGGGCAAAGTCCGCACGGTCTGGAGCACGCCCAGCGCGTCGGCGGAAAACTGCGAGACGGTGGCCTGTTCGGGGCGGGCCGCCGGGTCGATGCGGGGGCCAGCGCCCAGCACGCCGCGCTTGTTGTAGCGCACCACCGCGAAGCCCGCCTCGTTGAGCTGCCGGGCGAGTTGCAAAAAGACCTTGCTGCCGCCGGGCACGCCGCTCACCTGTTCGGGCAGCGTTTCGTTGAGGTCGTTGGGGCCGCTCCCGTGCAGCAGAATTACGGTGCGGAACTTCTGACCCGGCTTGGCCCCCGGCGGGAGGCTGAGTTCGCCGGTCAGCGCATAGCCGTCCGCGAGCTTGAAGGTCAGCGGGCGAGTGTTTTGCAGAACGATCTGCGCGGGTGGGGCCGGAGGTGGGGTGGTCATGCCCAGAGTGTCGGGCTAGGCTACTTGCCATGCCAGAGCAAAATGCAAGTAGCGTACTCGTCCAGTCCGAGGGCGCGGCGCTGGCGCTGCTGGGCGGCATCGTGAGCGGCGAACTGGGGCCTTTTCGCCAGGGGCCGCTGAGTCCCGCGCAGTGGGCCGCGCAGCGAGGGGAACCGCTCTACCGCACGCTGCGCCGGGTGCGCCGCTGGCAGGCGCTCGGCGTGCTGGACGTGCAGGACACCAAAAAAAGGGCGGGGCGGCCCGTGCGGCTCTATGGCCTGACCTCCCCCTCCTTCTACATTCCGCACGCCGTGTTGCCCGTTGACGAGGTGCTGGCGCGCATCGGTGAGCCGATGGAACGCCAACTGCGCGCCGACATCGCTCGGGCCTTTGCCGACCTGCCCGACATCGGCGGCACCCAGATCGTGATTCACGGCGAGACCTACGGCGCGTTTCTCGCTCACTCGCCCGGTCAGCCCTGGGGCGGCAAAGACGACCTGGTGCTGATTGACCGCTGGGCGAACCTGCGCCTGACCCACGCCGACGCCCTCGCCTTTCAACACGAACTCGACGAGCTGTTTGCCCGCTACGAGCACCGGTCGGGGCCACAAAGCTACTCGTTGCATCTGCGGCTGGTGGCCTCGGCGGGGGGGGGGGCGAGCCGTTAGAGAGGCACGAGCCGCGTGGAAGCAAACAAAATGCGCTGCGGCTATTCCAGAAATGCCAGTAGCTCTTGATTCACCCGCAGCGTTTCGTCACGCATGACCCAGTGGGTGGCGTGGGGAAACTTGACGGTTTTCAGGTTGGGCACCCACTTGCCGAGGTTGTCGGCGAGCTCGGGCACCAGAGCCGAATCCTTTTCGCCCCACAGCAGCAGGGTGGGGGCGTGAACCTGCCGGCCCTTGACGTTCCCGAAGCGGATCAGGGCGCGGTAATAGTTGATCATGGCGGTGGCCGCGCCGGGCTGTGTCCAGGCGGTTTCGTAGTGGTGCATGTCCTCGGCGCTGTAGCTGCCGGGCCGCGCTCCGCCCAGGCCCTTGCGTCCAAAGGGCACCAGCAGGCGCTCGGGCAGCCAGGGGAGCTGGAAAAAGCCCACGTACCACGAGCGCTTTCTCTGCTCGGGACGGCGCATTTCCCGTCCGAAGGCGCCGGGGTGCGGGGCGTTGAGAATCACCAGTTTGTCCACCACGGCGGGCCGCGAGATGGCGACCGCCCAGGCAATCACGCCGCCCCAGTCGTGCCCGACGATATGGGCGCGCTCGTGCCCGAGGTCGTGGATGAGCGCCGCCACGTCGGCCACCAGCGTATCGATGTGGTACGACTCCACGCCCTGCGGCTTTTCGCTGATGTTGTAGCCGCGTTGGTCGGGCACCACCACCCGGAACCCCGCGCGGGCGAGCGGCCCGATCTGGCGGTCCCAGCCGCGCCAGAACTCGGGAAAGCCGTGCAGCAAGATGACCGGGGGGCCCTGTTCGGGGCCGGCCTCCACGCAGTGCAGCCGCACCCCGTTCACGAGGCGTTCGGTGTGCCGAAGTTCGCTTGACATGGTCCCAGTATCCTCCCCGGCGTGCGCGGCGCCGTGACCGCTGCCGGTGCCATTGTCTTTAGTTCGGCCGCGCTAGAGCATTTGACATAAGAATGTTTCGCATTTTTGACCCTCTACCCTGGTAGGACTCGAAGAGCTGCACCAAGGGGTCTTTTTCTGTCAAATGCTCTAGAATGTCCGGCGCATTTCGCGTTCCCCCGGACCCTGACGGCCCGGCGCGCCGGACGCACGCTCTACCTTTTTTTCGCGCAGGTTCGCACAGCACAGGAGACTTCTGAAACATGGCCGGTCATAGCAAGTGGGCGCAGATCAAGCGCAAGAAGGGTGCCAACGACAAGAAACGCAGCGCGATGTATTCCAAGCACATCCGCGCCATTCAGGCCGCCGTCCGCTCGGGCGGCAGCGGTGACCCGGCGGGCAACCTCAGCCTGAAAAACGCGATTGCGGCGGCCAAGACCGACACGGTGCCCGCCGACAACATCGAAAACGCCATCAAGCGTGCGGTGGGCGCGGGCGAAGGCGCCGCCGAGTACAAGGAGCAGACCTACGAGGGCTACGGCCCCGGCGGCACCGCCATTTTCATCGAGACGCTGACCGACAACGTCAACCGCACCGTGGCCGACATCCGCGCCGTGTTCAACAAGCGCGGCGGCAGCATGGGCAACTCTGGCTCGGTGGCGTGGCAGTTCGAGAAAAAGGGCATCATCCTGCTTCGCGATGCCTCCGAGGCCGCGCAGGAAGTCGCCATCGAAAACGGCGCCGAGGACATTCAGGAATCCGACGAGGGCCTGGAAATCAGCACCGCGCCGAACGACCTCTACGCCGTGCAGGACGCCCTGAGCGCCGCCGGCTACGCCGTGGAAAGCGGCCAGATCACCATGCTGCCGACCAACACGGTGGCGGTGGCTGGCGACGACGCCCGCAAACTGCTGACGCTGGTGGAATACCTCGAAGAACTCGACGACGTGCAGAACGTGTACACCAACGCCGATCTGCCCGAGGACGAAGAGGACTGACCGAAACGAGAAAGAACAAAAAAGCGGAGGCGTAGGCTTCCGCTTTTTGCTTTGGGTGGGGATGCGTTTTCCTGAGCCGTCGCTTCAATCACACGCCAAGGCCAGCTCCGCTTGCCGGGCGAGCTGCGTCTCACGGGTCAGATACCAGCGGCTGAGGTCCGAGCCGAGTTCGCGGATGATCGCTTCGGCCTGCGGGAGCGCGGCGCGGCGGCTCTGCATATTGCGCTGCACGACGGCGGTCAGGTCGTCGAGGTTGTAGAGGTGCGCGCCGGGCACGCTGGCAATCTCGGGGTCAAGGATGCGCGGCACGCTGATGTCAATCAGGAACATGGCGCGGCCCGGACGCCCGGCGAGCGCCTCGCGCACGTTCTGGGCGGTGACCACGTAGTGCGGCGCGGCGCTCGATGCGATGACCACGTCGACTTCGGGGAGCGCGGCGGACAGTTCCTCGGCGGGGCAGGCGCGGCCCCCCAGTTTTTCGGCGAGGGCGCGGGCCCGCTCGGCGGTGCGGTTGACCACCAGCACGTCCTGCACGCCTGCCGCCCGCAGGTGGGTCAGAGTCAGCTCGGCGGTTTCGCCCGCCCCCAGAATCAGGGCGCGGCGCTGCGAGAGCTCACCGAGCGCGGCCTGCGCGAGTTCCACGGCGGCGCTCGACACGCTGACCACCTTGTCGCTCAGGCCGGTGTGCGAGCGCACCCGCTTGCCCGCCGCCAGTGCGCCCTGCACCACTTTGTTCATCAGGGTGCCGCTCAGACCGCGCTCGCGGGCACTTTGCCAGGCACGTTTGACCTGTCCCTGAATCTGGGTTTCGCCGATGACGAGGCTGTCGAGCCCCGCCGTCACGCGGTAGAGGTGCCGCACGGCGGCCTCGCCCCGGTACACGTAGAGGTGATCGAGCAGCGCGTGGCCCCAGGCGCCCTCGAAGGCGGCGAGCGGGTCGCCGTGCACCCCGGCGAGATAGACCTCGGTGCGGTTGCAGGTGGCGAGCAGCAGCACCTCGTCGGCGTGGCGCGCGAGGTGGCCGAGCAGCGCGCCTTCTTCCTCGGGCCGCACGGCGACGCGCTCGCGGACCTCGACTGGCGCGGTCTGGTGATTGAGGCCGACCACCACAAAATCGAGCGGTGCGGGCTGCGGCACCGTACGCCGGGCGAGCAGGTGCTGGGCAGTCGGACACGCGAGCGTCATGCCTGCGCCTCTGGCCGGGCCGTCCGCAGACCGCGCGGCGTGAGGGAAGCAGAGAACAGGAGCATGTTGGGCAGCATAGTCTTCCGGCGCGGGGCGCGCGTCCCGTACCGAACGGTAAGAAAGCTGTCTGGGTCGCCTTTTTTCGTTTCCGGGATTCCTGGGATGCGGGCCCCTGACATGACGACACGTCCTTCTGACGCTACCCTGCAACCGACCTATGACTGCTCCTCCCGCCTTGCAAGACGTGTATGGCGTGCCCATTCCACCCGGCAGGCCGCTGGCCCAGGGCCGGCACGCCGACCTGCTGACCCTCGGCGAGGTGCGCGGGCAGGTAGCGAAGGTCTACCTGGCGCCGCCGCCGCCCGCCCAGGCCCAGAAGGTCGAGGAACTGGTCAAGGTGCCGCCCCACCCGGACGTGGCCCGGCGCCTGGTGTGGCCGCTGGCCGCGCTCTACGGCCCCAACCGGCAACTCGCGGGGCTGCTCTTGCCCCTGCTGCCGCCCCAGCAGTTTCAGGGGGTGCGGGCGCTGCTGCGGGCCGACACGCGCCGCCGCAGTTTTCCGGAGGCCGACTGGCCCTTCGCCGTGCGGGTCGGGCGCGAACTGGCGCTCGCCTTTGCCGAACTGCACGCCGCCGGGCACCACATGGGCGACGTGCGGCCCGAGCACGCGCTGGTGTCCGACGCGGGCGAGGTGCGGCTGGTTGGGGCCGACGACTACGCGCTGCACCTCGCTGGGCGCGACTTTGCCGGGCCGGTGGCGAGCGCGGAATATCTGCCCCCCGAGCGTCAGCGCGTTGGGAGCGCCACTGGCGCTGGGGCCGACGCCGAGAGCGACGCCTTCGGGCTGGCCGTCCTGCTGTTCGAGCTGCTGCTGGGCCGCCACCCCTACGCCGGGATTCAGGCACGCGGGGCGGCGCCGGGGCCGGGCGGGGCGATTGCCGCCGGGCTCTTTGTGGACGCGCCGCAGGCCGGGCCAGGGCGGCGGACCGCGCCGGGCGAGTGGCCCTTCGCGGCGTTGCCCCCGGCGGTGCAGGCGCTGTTCGTGCAGGCGTTCGCGGTGCCGGTGGTCCCCCGCCCATCTCCCGAAACCTGGGCAGCGGCGCTCGGCGCTCTGGCCGCCGAACTGGTGCCCTGTGCCCGCCGCGCCGGTCACTGGCAGGTGCCGGGGCTCCCTTGCCCGAGTTGCGCCGCCGAGCGCGAGGGGCCGGCGCCCAGCAGCGGGGAAGACGTAACGGCGCGGGTGCAGCGGCTGTGGAACGACGTGCAGCGAGTGGTCGCGCCGCCGCCGTCGCCCCCGGTGGCCCCGGTCATCGAGGCGCCGCCGCTGCCCCCGCTGCCGCTGGCGCTGCCCGACAAACCACGGGGTTTAAACCACCGCCAGCAGTTGCAACTCCTGACCTGGACGCTGCGGGTCGCCGTCCTCGCCGCGCTGATGCTGGGGGTCGGGCTGGTGCAGCGCTCGGTGCTGGCGGGGCTGGTGGTGCCCGCACTGATCGTCTTTGCCCTCACGCTGGGGCGGCGCTTTGCGGTGGACTGGGACGGCCTGATCGACCGTTACGAGCGCTGGGAACACGACCTCGTGCGGGGGCTGTTGCCGCGCCGGGGGCCGCAGCAGCGCTACCGCCGGGCGGTGCGGCGGCGCCGGGCCGAGGTGAAGACCGAGCTCGCCGCCCGCGTCGCCCAGCGCGAGGAACTGCGTGAGCGCTACCACGTCGAGAACGCCGCCGCCCTGCACCTGCGCGAGCAGCAGGCGTTGGAGCAGCGCCGGGCGCACCTGCTGAGCCTCAGCGCGGGCGGGAGCCTGGAGGCGCTACTGGGCCGCTGGCGCGAACGCTCCCGGCAAGACTATTTGCGCCAGCAGCAGCTCAGCAGCAGCGGCGTGCCGGGCGTCGGCCCCCGCGAACTCGGGCTGGCGGTGGCGCAGGGCATTCGCACCGCGTTGGACGTGACCGCCGAGCGGGTCCGCGCTCTGCCTGCCCCGCTGGGCCGCGAGTTGCTGGCGTGGCGGCGCGGCCTGGAAGACTTTTTTCAGTTCGACCCCGGCGCAGTGCCCCGCGCCGAGCTCAACGCCGTACAGCAGCGCGGGCAGCAGCACCTCGGCGACGAAATCGAGACCTTCGAGCGGGCCGTTCATGCCTACACCAATGCTCGCTGGGACCGCCACGAGGCCGAAATTTCCCGGCAACTTGGCGTCGTCGAGCGCGAAATCGAGCAGTACAAAAAGGCGCTGAGCGAACTGCGGGCCATCAAGGTCTGAGCCTGTGCTGTTTGGCACTTTTTCGCCTTCTTTGCACCCCGTCTCAACATCGCGGGGGCGAGCACAAGGGCGCGAAATACTCTCTTGACCCCGTGTCGCGCCTGATTGGTCCGGCTCGCGGGGCACAAGGTGAACCATGACTGACAAGGCACAGACTTCTCCGGCGCGCCTGCCCCGCATCATTCAGGGGGGCATGGGCGTGGCCGTTTCCGACTGGCGGCTGGCGCAGGCGGTGTCGCGCACCGGCGAACTCGGCGTGGTGTCGGGCACCGGCATCGACAACGTGCTGGTGCGGCGCCTTCAAGACGGCGACCCGCAGGGCCACGTCCGCCGGGCGCTCGCCGAGTACCCCAACCCGGCCAAAGCGCAGGAATTCATCGCCAAGTATTTCCTCGAGGGAGGGCGTGCCGAGGGGCAACCCTACGCCCGCGTGCCGCTGCCCAGCCACCGCAACCACCAGCTCGCCTGGGAACTGTCTATCGCCGGGGCCTTTGTCGAGGTCTGGCTCGCCCGCGAGGGGCACAGCAATCCGGTGGGGCTCAACCTGCTCACCAAGCTCGAACTGATGACCCTGCCCTCGCTCTACGGCGCCATGCTCGCCGGGGTGGACACGGTCATCATGGGCGCGGGCATTCCGCGTGAAGTGCCCGCCGCGCTCGACAACTTCGCGGCGGGGCGGCCCGGCACCTTCAAACTCAGCGTGAAGGGCGACCCACAGGGCGACACGCCGGGCGTGACCCTCGACCCCGCCGCTTACGGCTTTGACGGAGTGACCACTGGGCGCCCGAAGTTCTACCCCATCGTGACCTCGCACGTGCTCGCCGGGGCGATGCTGCGCAAAAAGACCGGCGGCATCGAAGGCTTTATCGTCGAAGGCCCCACGGCGGGCGGCCACAACGCGCCCCCGCGCGGCAACTACGCGCTCGACGACCTCGGCCAGCCGGTCTACGGCGAGCGTGACGTGTGCGACCTCGGCGAAATGCGCAAATTCGGCGTCCCCTTCTGGCTGGCGGGCGGCTACGGCAAGCGCGGCGGCCTGCAAGACGCGCTCGCGGAGGGCGCTGCCGGCATTCAGGTCGGCACGCTGTTCGCCTACGCCCAGGAAGCGGGCATCCGCGACGAGTTGCAGCGGCGGGTGCTCGGCGAGGTGCAGCAGGGCGACCTCAAGGTCTACACCGACCCGCTCGCTTCGCCCACCGGCTTTCCCTTCAAGGTGGTGCAGCTCCCCGAAACACTTTCCAACCCTGAGGTCTACGCCCGCCGGATGCGGATTTGCGACATCGGCTACCTGCGCGAAGCCTACTGGGACATGGCCCCCGGCAAGGAAGGCAAAGTGGGCTGGCGCTGCGCCGCCGAGCCGGTGGACCAGTACGTCGCCAAGGGCGGCAAGGTCGAGGACACCGTGGGCCGCAAGTGCCTGTGCAACGCCCTGATGTCCGACGCGGGGCTGCCACAGATTCAGAAAAACGGCGACGTGGAAAAGTCCCTGCTCACCAGCGGCGACGGCCTGACCGAACTCGGCAGCTGGAAGCCGGGGTATACGGCGGCGGACGCTATCGAGTTTTTGCGCGGCTGAGGTGCGGCGAAGCAGGCTGGTCTCCTTCCTGCTGCTCGCCGCTGCCGTAGTGGGAGCAACGCTCAAGGTGGTGAAGGATGACCGAAGCGCCGTCCGCCTACCCCCTGCCGTGTACGTTCCGCCAAGCGGTCCCTTGATTGAGCCCAAGGAAAATGCCGGACGTTGCCTTGCCACCGCCAAAATCACGCTGCCCAAGTCCTACAACGCCTTTGCCAACCACTGGCGTTTTACGGAAGCTAAACAACTGACCGCCGAAACCTGGCAGATTCGCGGCGAACTGAGCGAGTATTCGGCGACGGGCGTCAAGGTGTTTATTTTTCAGTGTGTGGATAGCAATGAAGCGACGGGGGTAGAGCTTCACCCGAAGTAAAAATTGGGCAAGTTGGGGGCCAGGGCTGCGGCGTTGGCCCCTTTTTCCTATTGTCCGATCTCCCCCAAAATCTCCCCAATCACCTCTTCCAATCCCAGCGGTCCGGTGTCGATCACGCGGGCGTCGGGCGCGGGGGCACTCTGGGCGGCGTCCTTCCTGTCCCTGGCGATGAGGG +>2_1#NODE_19_length_33054_cov_76.4496_ID_37 +GGCCTGTCCGGCTGACGTGCTGGCTGCGCGTGCTGAGTCCGCAGCATCAGCCGCATGGGTTGCCGCCTCACGGGCTGATGTGCTGGCATCACTGGCTGACTTCTTCGCGGCTGCCGTGTTCTGTGCCACCGCGGACGCGTTACGCGCCACCTCTTCCACCATCAGTTCAAAACGGCGCAGTGCCTCCGGACGGGCATCATCCTCCGTCATGGCACCGAGAAAATCATTCAGCGTACCGGGTTGAGAATCTTCATACACGGTAATGGTCCCGGCATGTGACGGCGGGAATCCTTCCACCAACAGAATAACGCTGTACTGACCGTACTCAACGTCCATGCTGTAACGCCCGGCTTCATCCGGATTTTCTGAGGCCAGCGTGTTCACCACCACCGTGGTGCTGTTACGTTTTGCTTTCAGCTGGATTGTGCAGTTCTGTACCGGTTTTCCTGTGCCGTCTTTCAGTACACCTGAAATCTTTACTGCCATATTCACCCCACAAAAAAGCCCGCCTGAACCGGCGGGCTGTCATAACACTGTGTTACCTGGCTAATCAGAACTTATAACCGACACCCACGATGAAACCGTCAGTGCGCCAGTCGCCACTGCCGGGGCCTTCATAAGCAATATCAATGGCCACGGATTCGGTCGGGTTAAACTGCACGCCAGCTCCCCACGCCAGAGACGTGTTGCTGTGGCGACCGTCATCACTTCCGGTCAGCACGTCGTGCGTTTTCCCCTTGGGAAGGTGCGAACAAGTTCCTGATATGAGATCATCATATTCATCCGGAGCGCATCCCAGAGGGACATCATGAGCCATCAACTCACCTTCGCCGATAGTGAATTCAGCACTAAGCGCCGTCAGACCCGAAAAGAGATTTTCCTCTCCCGCATGGAGCAGATTCTGCCATGGCAGAATATGACCGCTGTCATCGAGCCGTTTTATCCCAAGGCGGGCAATGGCCGACGGCCCTATCCGCTGGAGACCATGCTGCGTATTCACTGCATGCAGCATTGGTACAACCTGAGCGACGGTGCCATGGAAGATGCCCTGTACGAAATCGCCTCCATGCGCCTGTTTGCCCGATTATCCCTGGATAGCGCCCTGCCGGATCGCACCACCATCATGAATTTCCGCCACCTGCTCGAGCAGCATCAACTGGCCCGTCAATTGTTCAAGACCATCAATCGCTGGCTGGCCGAAGCAGGCGTCATGATGACCCAAGGCACTTTGGTGGATGCCACCATCATTGAGGCACCCAGCTCTACCAAGAACAAAGAGCAGCAACGCGATCCGGAGATGCATCAGACCAAGAAAGGCAATCAGTGGCACTTTGGCATGAAGGCCCACATTGGTGTCGATGCCAAGAGTGGCCTGACCCACAGCCTGGTCACCACCGCGGCCAACGAGCATGACCTCAATCAGCTGGGTAATCTGCTTCATGGAGAGGAGCAATTTGTCTCAGCCGATGCCGGCTACCAAGGAGCGCCACAGCGCGAGGAGCTGGCCGAGGTGGATGTGGACTGGCTGATCGCCGAGCGTCCCGGCAAGGTAAAAACCTTGAAGCAGAATCCGCGCAAGAACAAAACGGCCATCAACATCGAATACATGAAAGCCAGCATCCGTGCCAGGGTGGAGCACCCGTTTCGCATCATCAAGCGGCAGTTCGGCTTCGTGAAAGCCAGATACAAGGGGCTGCTGAAAAACGATAACCAACTGGCGATGTTATTCACCCTGGCCAACCTGTTTCGGGTGGACCAAATGATACGTCAGTGGGAGAGATCTCAGTAAAAACCGGAAATAACGCCAGAAATGGTGGAAAAAATAGCCTAAATAGGCTGATTCGATGTGTTTGCGGGAAAAAAATCGGCCCAGATCCGCGAAATTTTAATCAGCGAGTCAGCTTGGGAAGAAATGACCTGCTTATTCGCACCTTCCCTTGTTGTCAGTTACGCGGAGATAATCCCCGGAGAAAGTCGACACACGGCTGTAAGCCATACCCGCCATCGCATACGCGCTGAACCATTCATTCACGCGCACAGACGGCCCCACCATCACGCTGAACCAGCGGTTACGCACGGAATCTTCATGCCAGCGGGTATCGCTGTAACGGGTAAGCTGGCGATTCTTGTCTCCTGCATAGCTGAATGGCCGCCCTGCTGCATTAACACCATAAATGGCGACTGACCAGTGGACAACCCAACAACAATATCCGTCATTTGTGCAGGCAACATGCGCATAGCAAAAGCCGTTTGTTTTGCCGACATTCCGGTTTTGCTTAATTGTGATTGAGTAACCTCAAGCTCACTCCGCATAGCACGAAGTTTTCCAGAAAGCTCCTCATACATTTCAGGAGAAAGCATCCCCTTAGCTTTTGCTTCATTGAGCTGTTTCTGTTGTTCTACCAGACGATTAAAAGCAGTTCCGACAGGATCAAGTTGAGCAATCAGACGTTGCAAAGCAACAACCTGTTCATCATGCGCTTTTGCTGCTTCTCGCTCTGCCTGAGCCTCTCCGGTAAGCTCTCGCCGTGTTTCCTGTATTTTTCGGCTATAATTCTCAAACTGAGAACCATTTATTTTCCCGGATGCAAACGCAGCATTAAGTTCATCATGCTGTTGTTCAAGATTTCTTAGCGCCGCAGCCAGAGGGTCGATCTTGTCCAGCATTCTTTGAAAGGCCTGAGCCTGCGCTTCCTGCTGAGCGGCAGCAAGTTTTCCGGCCTTCTCGGCTTCTCTCTGCGCTTGCGCAACCCCGCTCAATTCCTCTGTGGTTTCATTAAGTTTACGGACAAGAAATTCATATTCTTCTTTATCAATAAGCCCTTTATCGAAAAATTTCTTTAATTCAGAATAGCGTCGACCGACAGTATCAATTGCGGCACCAACTGGATCAATAGCTGCTTTTAATTTTGCGAGCGCGTTCTTCTCATCTTCTGTTGCCTTAGTCACTTTCCCTGCGCTATTTGCAGCAGTTTCCCCAGCCTGCGTCATTTTGACTAATGAGGAGGTCAGATTGTCAGCATTATTTTTCGCTCCAGTGCTATCAATAATTATTGCGAGACGCGAGGTTTGCTCTGCCATTTATTAAAACTCCTGACAACAAAAAACCCACCGCGAAGTGGGTTTCAGGCGACATAATAGTAGATATAGCGATTACGAGGCCACGCAATGCTTTTCTCCAGGAGCATCATCGATTTAATTAAAGACACCATCACATCTCTGTAACAGAGTGTACGTAATTAACAACTACACACACTGCTCCTGAAAATACTGGTCATCCAGTGCAAAGATCACTGCTTCAAATTCATCGCGCTCAATCAATACCGGATGAGTGGCTAAATATTCATTTATCTCTGTCAGAGATAAAGGCAAAGGCACCCCAGCCATTCCAGCATAACGTCGGGCACGGGATATTACCGAATAGGCGTACAACAACTCCTTAAGCACCGGGTCTATTTCTGGTTCCGGTATCGGTGGCAACCTGAGTTTTTCTCGCTTCCATCTTGCCTTTTCCCCCCTTTCTCCCCCGAACTCCGATAACCACCGCTGGGCAGCTATGGCTTTTTTATCGTATCCTGCTTCTGCTGCTCTTTACCCTGGGCGATGCTGGCTGCTTCTGCAAGGATCTGCCAGTACAACTCTGGATTCTGCTTAAGCAGCGCGATCCCTCGTTCTGCCGTATATTCCAGTGCAACCTCAACACCATTAACCAGTTCACCAACCCCTTTCCAGTCTTTCAGCAGATAACGAGCGGCATTATCAATGAGTAAATCATCAACAGAATCCACCTCGGAAACCTTTGAAATATCAAACTCCTTCGTTCCGACGTGCAAACTGGCATCCATTTTCTCAATGTGGCGACGGATTAATGCATTACGGGAGCGATACTGATCGTTATCGCTGCTTCTGCTCATCCGTCAGGCCAGACAGAACAATGACCGGAACAGAATCCATTTTGAGCATTTCAGCCGCCATAACACGACCGTGACCCGCAATAATTTCGCCCTTTTCGTCAATCAGCACCGGATTAGTCCAGCCGAATTGCTTAATACTTTCTACCAGTTGTGCCACCTGCTCAGTACTGTGCGTCCTGGCGTTGTGCGCATACGGTGACAATTCTTGTAATGGGCGATAGACTATCTTTAATTTCTCGCTCATACAGCCTTGCTTTATGAATAAAACGCACCCCAGCAGCCAGTGCTACTGGGGACGGAGGTGTTGCTGGTAAAGTTAGGTATTGGATCAATGAGTGAGTCAACATAATATTAAACTCACAATTATAAATCAGCCATATATTAGGAGCGCCAAAAAAAACCTGAAAACAATATAATAACAGGATAAATTTCAAGGCGACCAAGAATCATAGCTATGCACATTAAGCATTTTGCAATGTCATTAAGCACTCCGAATGACGATGCAGTAGCCCCAAAACCTAATCCCATATTATTAATACATGCAGCCACTGTTGCAAATGATGTAAGAAAATCATATCCCATACCATTTAACACCAGTATAAAAAACACCGTGAAGAGAGTATAAAGAAAAAAGAAACTCCATACAGACCTCATTACACGATCTGTAACTATCTTCCCTCCTACATTTACACTCAACAACGCTCTGGGATGAGAAAGCTGATTTATCTCGTGTTTGCTTTGTTTGAAAAGTATAAGAAATCGAAGTGACTTAATTCCACCACAAGTTGAACCTATACATCCCCCAAAGAAACTTGACAACAGCAAAAACACTATCGTGTGCGTGGGCCAACTTGCATAATCCTGCGTAGCTAAACCATTATCAGTGAGCATGGAGCTGGCAAGAAAAAACGAATGAATAAAACTTCCATGCAAGTCATACATACCTATATGCCAGACCTGGAAAGAGGTAACAATGATCACCCCTAAGGCTATTAACAGAAAGAAACGAAGTTCAATATCTCTGATTAAAGGTTTTATCGTTTTCCTGCTAATAACAATATACCAAAGAGTGAAGTTGAAAGCCGATAGCAGGGAAAAAGAACCAGCCACCAGCTCAACCAAATAGTTATTAAAATATCCGATACTCTCGCTATGAGTTGAGAAACCACCAAGCGAAACTGTGGAAATCCCGTGACAAATAGCATCAAACAAAGGCATTCCTGCAAGTCTATAACAGACAATACAAGCAATACCTAATAAAGAATAAGTTATCCACAGTGTCCGTGACGTATCGGCCAGGCGGGGAGTGAGTTTGTCATCCTTAAATGGCCCCGGCATTTCTGACTGATAAAGCTTTGCACCACCAATACCCAATAATGGCAATACAGCAACCGCCAGAACAATAACTCCTAAACCACCTATAAAATTTAACTGTGACCGATAGTACAAATATGCCCGAGGTAATGAACTAACATCATCAATTACAGTTGCTCCTGTTGTTGTTATTCCAGAAACCCCTTCAAACAGAGCATCAATGAACGTTAAATTAAGTTCTGAGTCAATCCATAAAGGGAATGCACTAATAACAGAAAACAAAATCCAAAACATTACAATTATAATAAACCCATCACGGGTACGTAATTGAATGCCAGATTTCTTAGTTGTATACCACGCTCCGCCACCAATGCAAAAAAATATAACGAAAGTTATAAAGAAAACAAACAGGCTTTTTTCTTTATAAAACAATGCTACAACCATTGGTGGCAACATTGAAAGACTATAGAGCCAAACCAGGAACCCACACATATGAGTAACAACTCTTACATGAGATGTATTCATATCTAAATATTCTTTCAATTATAACCACCTTGCTGCAATATTATGATTATACTGTATAAAATTTAACTCCTCTTAGATCTTACTTCACTGTTCCTTATGAAACAATCATCAAAATGAATCATATTGTAGTTAAGATTTTACTTTAAACACTGCTCGGTTATGTATTGCTGAGCACCTTCAAGTTGGGCCTGCATCATTACCAGTCGTTCCCGGAGGGTGAAATAATCCCGTTCAGCGGTGTCTGCCAGTCGGGGGGAGGCTGCATTATCCACGCCGGAGGCGGTGGTGGCTTCACGCACTGACTGACAGACTGCTTTGATGTGCAACCGACGACGACCAGCGGCAACATCATCACGCAGAGCATCATTTTCAGCTTTCGCATCAGCTAACTCCTTCGTGTATTTTGCATCGAGCGCAGCAACATCACGCTGACGCATCTGCATCTCAGTAATTGCCGCGTTCGCCAGCTTCAGTTCTCTGGCATTTTTGTCGCGCTGTTCTTTGTAGGCGATGGCGTTATCACGGTAATGATTAACACCCCATGACAGGCAGACGACGATGCAGATAACCAGAGCGGAGATACCGATGCTGCTTTCGGCTCTGCTGGTAAATTATCGCCCGGTATGCAGTAACGAAATTTACCGCCCTGATTTACGCGAATCAGACGACCTTTGCTGATTGCCATTGCCAGCGTTGAAGCCACTTTGCGTGATGTGGTACCAAACAATGTAGCCAGCTCATCAGCCGTTTGTGGTCCTCGTTGTTCAATCGTCGCGGTTAAATCGCACTCTGAGATTTTCGCTACTGTTGCTGTGGTGATTTCTTCCGGCAGTTCTGCCTGCGCTGGCTGTTCCTGCTGAACATTGTTATCAGCCACACGCCAGGTGTACGCGCTTTTATCAACAAAACCAGCCTTTTTCAGTTCCCATAGTTCGTTCAGCACTTCTTCACGACTGATATCAAGTCGCGCAGCAAGTTCTATGGATGTGGCTTTTCCCATTGCTTTCAGTGCGTCAAAAACAGTCTCCATTAAATTTTTCTCCCGGTAAAAATTACTTCGCAATTCCTGGCTGGACGACATTCGGACGCCAGCTCTCCCAGTTAAAATTCACCCATCGCCCGCCGTTCATGGTCATGCGATCCATAATCCGCTCGCCGAGCAATGTTTTCATGGCCTCATAGTTCAGGTTTGTCAGCATCCCCACGCTGCGCATCGACGCTGTCCGGCGATCAACAATCTGGTGCAGTACCACCTGCTCGTTTTTCGTCTCGCGCTGAATGCCAATTTCATCAAGAACCAGCAGATCCACTTCGCACAGTTCCCGCAAAAATTTTTCGCCTGACTGCCCATCGTCATAGCTGGCGTGCAGGGCACTCATAACATCAGCCACGGTAACCACAATCACTGTCTGACCGTCTTTCAGCAGGCGATTCCCGATAGCTGCCGCTAAGTGGTTCTTCCCGGTACCAGGTTTTCCGCTGAACGCAAAATTTGTACACCCGGTCATCAGTTCATCAGCGATGGATTTCGCCTGACTCAACGCGTATCGCTGCCCTTCGTTCTGCACCTGGTAATTCGAAAACGAGCATTTGCGGTGCAATGGCTGGATGCCAGAGCGATTCAGAATTTTTTCCACCCGCAACTGACGATTCTGACGGTTGATCTCCTCACAACGTTTCTGGCCTTCGGAAAGTTGCCACTCGCGCCACTCCGCTACCGTCTTGAATGGCGCGGTTACATGTGACGGGGCCAGTCTGCGGATACGTTCAAGAACATCGCCTGTCGCAATATTTTTCATGGTCAGTTACCCCCTGAAGCCTGGCGGGATCGCACTATCCGGTAACGAGACGGTGTTAACCTGTCGGAGTAACGTCTCAGGTCGAACACCTTTCGGCGCGAACAAGCCCTGGTATTCATTGGCGATGCTGTGTCGAATCACCTGCTCAGGTGAAAAACCCTGCTGGCGGAATTTTTCCAGCTCCCGTATCGCCCCGTTAGCGCCCTGCTCCGTTCGAATCGGTTTTCGCAATGCCTGGCGAAATTCAACCCACTCACGCCAAAGCGAGACAGAAATCCAGTTCGGCAAAGTAATATCCAGAGGGTCAAACTTTTTGACACCTCGATTCCCCCGGGGGGGATTTAGGGGGGGATCTGTTTTTAGATCTTTATCTGTATCTTTATTAGTTGCCTTTGTGTTGACATCATGTTCAAACACCACTTCAACATCTGTTTGAACACCTGTTAAATTTCTCTCTTGTTTTGTTTGAACATCTGCTTCCTTTCTGCTTCTTCTGGCCTGAACAGATGCTTTTCCTGCGGCTGATTTTTTGGTTAATTTTTCCCTGACTGATGCCAGATCTTCCTCAATCCGAAGATGCACCCATTCCTCGCCGTTATCGCAAAAAAACTCCTGCAAGGATGGTTCAACATCAGCCCATCGCTCGTTAGTCAGACGGGCAATTTTTGCCAGCCTGTTTTTAGGTATTGGCTTTCCTGTTTGCCAGTAATTGAACATCAGCAACAAATACGCACCATGCTCCTCTGCTGACAAATGCATGGTGTCAGCCAGGTAATCAGCTATGTACAGTTGCATGTATGGTAATGCGGCCATAATTGCCCCGTATGATGCTGCCCGGTGGCTTAGAATAAGCACAAACAGCATGGAAACTTTTGCTTAATGAACAATGACAGAATCGTCGGAAGAACCGCCGCCGCTGAAATGCGCTTTCCGGTAAACGGCTTGGACTGCATCATCATGCGCATCAATTGCCGTACTTAACGCTTCCTGCGCCGCCAGTAATGCACGGCGTTCCAGGGTATCGAAGATGCAGAGTCGGTGACGCAGCTCGCGCGGAAGGATTGCCAGAATTGCTGGGATCAGCTTCTGAATTTTTTCTCTTTGCGTTTTCGTTTCACCTTTCAACCAACGGTGATAGATATTCTGCTGATTGTTCCAGTCCTTGCCTGGAACCAGGGGCAATTCGCCGCCCCCCTGGCGCAGATATTCTTCAGTAATTGCATTGGCTACCCATGCCTGCCCTTTTTCGGCTGCTAGGGCAAACAACACTGATTCGATGTGCTCATGCTTGATTTTCATGAATCATTTGCCTCTTGATGTTTCAGGTATGATCAAATGAGGATTTGTTACTGTCATTTAGTTGCTTCACTGACATATTCTGCGAACAACATGCCGAACGTCGTAAATATGACCAGTCAATATCAGGACGAAGTTCTTCGCACAGAACCTCACCTCTTGTTGCACGTTCAATTGCTGGACATCTCTCGGCAGGCAATTGACGTACCCCTTTGATCCATTGATTTACGCTTGGAGGTGATACACCTAAAAGCCTAGCCATTGCTGATTGCCCACCGACAACAGCACAAGCTTGCTTGAATGAATAGTTCTCTTTTTTCATCGAATGAACTCCAAAAACACACAGAAATATTAGGCGACGCCTAACGCAATTGTCAATAGGCTGTGCCTAATGCAGTAAGGGTAGGGATTGCCTAATGTAATGCGCATAGGAGAATATTAAGCAATGCTTAGTGGTAAAGACTTAGGCCGAGCGATAGAGCAGGCCATTAACAAAAAAATCGCATCGGGATCCGTCAAATCAAAGGCGGAGGTCGCACGCCACTTTAAAGTCCAACCACCATCAATTTATGACTGGATTAAGAAAGGCTCTATAAGTAAAGATAAACTTCCAGAATTATGGCGTTTCTTTTCTGATGTTGTTGGTCCAGAGCATTGGGGGCTTAACGAATACCCCATACCAACCCCCACCAATTCAGATACAAAAAGTGAACTTTTAGATATAAACAACCTTTATCAAGCAGCCTCTGATGAAATAAGAGCGATTGTAGCTTTCCTGTTATCTGGAAATGCTACAGAACCAGATTGGGTTGACCACGATGTTCGCGCCTACATAGCAGCGATGGAAATGAAAGTGGGTAAGTATCTGAAAGCTCTTGAATCTGAACGGAAAAGCCAGAACATCACAAAAACTGGAACTTAAACTTATATGGTCTGACGGAAAACTCCTGGATTCCGTTATTTAACCCCCCCATCACTTTCTGCTGTCGCCATCACCTATTAGGTTACGCTCAAAACATTAGGCATAGCCTATTGACAATCAATTAGGCATTACCTATAGTTCCAGCATACCACCCACCCCGCCCCACAGAACGCCGGGCAATACTTCGAGTTACCAGGCAGTGGTAAGGGGTTAAGTAGCCAGCCCGAGGCGTATGAACATGACGGCGGGATTCAAATTTTGCAGTGCAGCAGTTAGTTCCGCCACCCGGCGTTAAGGGGAGAGATAAGATGGTGCATTACGAAGTAGTTCAGTATTTGATGGATTGTTGCGGTATCACTTACAACCAGGCTGTGCAGGCTTTACGCAGCAACGACTGGGATCTCTGGCAGGCAGAAGTCGCTATACGTAGCAACAAGATGTGAGATTCGCAAAATGCAAAAAATCGACCTCGGCAACAACGAATCCCTGGTGTGCGGCGTGTTCCCCAACCAGGATGGAACGTTCACTGCCATGACGTATACCAAAAGCAAAACATTTAAAACCGAAACTGGTGCGCGCCGATGGTTGGAGAAGCACACAGTAAGCTAACGATTAAAACGTCTACTCCTGCTGTTCCAGAATAACTTCATAAAATGGGAGTATTTTTCGGTGACGAGATAATAAGAACAGTTTGCGCTATCACTCTGATGTTGAATGATGCCCTTCCGTTCTAATTTTTTCATAACCGGGTTACGGCAAGGAGAAGTGATAATAAGATTTCCTGTTTTAAGGAAATCTTTAAATACAGCGATTTCTTTCTCAGATAAACGAAGCAATACTCGTTGCTCTGGTAGTAATGAATAATGCTTTTGAATATGTGCTCGCAATCTTGAGAAGGAAATGGCGACCACGAAAGAAAAGGCAAAAACGATAATCTGAAAGAGCCAAGGTATTTCAGTATAAGCATTGAATGCGACAGTAAACTCTTTCGGTATCAGCCAGAGAGTGAGACCAAAAATGATAATCGTATACATAAGTCTTTCGAGTGGCTCGTTAGCAAAAAGTTTCAACAATGGAGTAAATACATCCAACATATCAATAACTCTCAACTGTAAGGGTATTGAAATGTTAACACAAGCTCTCGCTGTAGGGGTATAGCCGAGACCACCGAAGCCCGGAGGTGGTGAAATAAAACCGGGCACAACACGAAGGCGCATTTCCGATATCCATAAAGAGTCGGTCTTGTCTGTTAAATTTAAATGGTGGGAGTGCGCCTCCGGTTGTAAATAACGACATTGCTGTGTGTAGTCCTGGCGGCATCAGTTTTTTTCTTGAAGTTCGGCTGATGTCCGCCCTTTTTAAAGTGAATTTTGTGATGCGGTGAATGCGGCTAAGCGCACGTGGCACAGTTAAAAGTCATGTTAGTCCTTATTGGTTTGGGTGGGAAAGCCGACTGTAATTGTTAACTGGTTGCAGTCACCTGGAGGCACCAGACACCGCATCAACAAAGTTCATTTGTAAAAATGGAGATAATTATGATTGCACATCACTTCGGAACTGATGAAATACCACGTCAGTGTGTGACTCCTGGCGATTATGTTCTTCATGAAGGCCGGACATATATTGCCTCGGCAAACAATATTAAAAAGCGAAAACTATATATTCGTAACCTGACCACAAAAACATTCATTACTGACCGCATGATTAAAGTCTTCCTCGGTCGTGATGGTTTACCTGTAAAGGCGGAGTCATGGTGATGACTAAGAAAATAAAATGTGCTTACCACCTTTGCAAAAAAGACGTTGAAGAAAGCAAAGCTATTGAAAGAATGCTTCACTTCATGCACGGGATTTTATCAAAAGACGAACCGAGAAAATATTGCAGTGAAGCTTGTGCCGAAAAAGACCAGATGGCACATGAACTTTAATTAATTGACTATTCGAAACTGAATTTATGCCAGAAATGGCAGGTATTCGCTCAACCTTAATTAAGGAGAAAAACATGATTACCAATTATGAAGCCACTGTTGTAACTACCGATGACATTGTTCACGAGGTGAATCTGGAAGGAAAGCGCATTGGCTACGTAATTAAAACAGAAAATAAAGAAACCCCATTCACTGTGGTTGATATCGATGGTCCATCAGGCAACGTAAAAACACTTGATGAAGGTGTCAAAAAAATGTGCCTGGTGCATATCGGAAAGAATCTGCCCGCAGAAAAAAAAGCCGAATTTCTGGCAACTCTAATTGCAATGAAATTAAAAGGTGAAATCTGAAAGAAATAGCCTGCGTATGGCGCAGGCTATGAACAGTGTGTATCCGGCAAGATCATTCACTGAACAAAACGAATTTTAATCTGAGTTGAGGTTAAAAAACAATGAGCACAAAACCACTCTTCCTGTTACGGAAAGCGAAAAAATCATCCGGTGAACCTGACGTCGTCCTGTGGGCAAGCAACGATTTTGAATCGACCTGTGCCACTCTGGACTACCTGATCGTTAAGTCAGGTAAAAAACTGAGCAGCTATTTTAAAGCTGTTGCCACGAATTTTCCTGTCGTTAATGACCTGCCCGCTGAAGGTGAGATCGATTTTACCTGGAGTGAACGCTATCAACTCAGCAAAGACTCCATGACATGGGAACTAAAACCGGGAGCAGCACCAGACAACGCTCACTATCAAGGCAATACCAACGTCAACGGCGAAGACATGACTGAGATTGAGGAGAATATGCTACTCCCAATTTCTGGCCAGGAACTGCCCATTCGTTGGCTTGCTCAACACGGCAGCGAAAAACCGGTAACGCACGTTTCACGCGACGGACTCCAGGCATTACACATTGCTCGGGCTGAAGAACTACCGGCTGTTACTGCCCTGGCTGTTTCCCACAAAACCAGCCTGCTCGACCCGCTGGAAATTCGCGAACTCCACAAACTGGTTCGTGACACTGACAAAGTTTTCCCTAATCCTGGTAATTCAAACCTGGGACTGATAACTGCTTTTTTCGAAGCATACCTGAACGCTGACTACACCGATCGAGGACTGCTGACAAAAGAGTGGATGAAGGGTAATCGTGTTTCACACATCACTCGCACGGCTTCCGGTGCTAATGCTGGCGGCGGAAACCTCACCGATCGCGGCGAAGGTTTCGTACACGATCTGACGTCACTGGCGCGCGACGTAGCCACTGGCGTACTGGCCCGTTCAATGGATCTGGACATCTATAACCTTCATCCGGCACACGCTAAACGCATTGAGGAAATTATCGCTGAAAATAAACCGCCCTTTTCTGTTTTCCGCGACAAATTCATCACCATGCCTGGCGGGCTGGATTATTCCCGCGCCATCGTGGTTGCGTCCGTAAAAGAAGCACCAATTGGGATCGAGGTCATCCCCGCGCACGTCACTGAATATCTGAACAAAGTACTGACTGAAACCGATCATGCCAACCCTGATCCGGAAATCGTGGATATTGCCTGCGGTCGCTCCTCTGCCCCGATGCCGCAGCGAGTAACAGAAGAAGGAAAACAGGATGATGAAGAAAAACCGCAACCATCTGGAACAACGGCAGTTGAACAGGGAGAGGCTGAAACAATGGAACCGGACGCAACTGAACATCATCAGGACACGCAGCCGCTGGATGCTCAGTCACAGGTAAATTCTGTTGATGCGAAATATCAGGAACTGCGGGCAGAACTCCATGAAGCCCGGAAAAACATTCCATCAAAAAATCCTGTCGATGACGATAAATTGCTTGCTGCATCACGTGGTGAATTTGTTGACGGAATTAGCGACCCGAACGATCCGAAATGGGTAAAGGGGATCCAGACTCGCGATTGTGTGTACCAGAACCAGCCAGAAACGGAAAAAACCAGCCCAGATATGAATCAACCTGAGCCAGTAGTGCAACAGGAACCGGAAATAGCCTGCAATGCCTGCGGCCAGACTGGCGGGGATAACTGCCCTGACTGTGGTGCGGTGATGGGCGACGCAACATACCAGGAAACATTCGATGAAGAGAGTCAGGTTGAAGCTAAGGAAAATGATCCGGAGGAAATGGAAGGCGCTGAACATCCGCACAATGAGAATGCTGGCAGCGATCCGCATCGCGATTGCAGTGATGAAACTGGCGAAGTCGCAGATCCCGTAATCGTAGAAGACATAGAGCCAGGTATTTATTACGGAATTTCGAATGAGAATTACCACGCGGGTCCCGGTATCAGTAAGTCTCAGCTCGATGACATTGCTGATACTCCGGCACTATATTTGTGGCGTAAAAATGCCCCCGTGGACACCACAAAGACAAAAACGCTCGATTTAGGAACTGCTTTCCACTGCCGGGTACTTGAACCGGAAGAATTCAGTAACCGCTTTATCGTAGCACCTGAATTTAACCGCCGTACAAACGCCGGAAAAGAAGAAGAGAAAGCGTTTCTGATGGAATGCGCAAGCACAGGAAAAACGGTTATCACTGCGGAAGAAGGCCGGAAAATTGAACTCATGTATCAAAGCGTTATGGCTTTGCCGCTGGGGCAATGGCTTGTTGAAAGCGCCGGACACGCTGAATCATCAATTTACTGGGAAGATCCTGAAACAGGAATTTTGTGTCGGTGCCGTCCGGACAAAATTATCCCTGAATTTCACTGGATCATGGACGTGAAAACTACGGCGGATATTCAACGATTCAAAACCGCTTATTACGACTACCGCTATCACGTTCAGGATGCATTCTACAGTGACGGTTATGAAGCACAGTTTGGAGTGCAGCCAACTTTCGTTTTTCTGGTTGCCAGCACAACTATTGAATGCGGACGTTATCCGGTTGAAATTTTCATGATGGGCGAAGAAGCAAAACTGGCAGGTCAACAGGAATATCACCGCAATCTGCGAACCCTGTCTGACTGCCTGAATACCGATGAATGGCCAGCTATTAAGACATTATCACTGCCCCGCTGGGCTAAGGAATATGCAAATGACTAAGCAACCACCAATCGCAAAAGCCGATCTGCAAAAAACTCAGGGAAACCGTGCACCAGCAGCAGTTAAAAATAGCGACGTGATTAGTTTTATTAACCAGCCATCAATGAAAGAGCAACTGGCAGCAGCTCTTCCACGCCATATGACGGCTGAACGTATGATCCGTATCGCCACCACAGAAATTCGTAAAGTTCCGGCGTTAGGAAACTGTGACACTATGAGTTTTGTCAGTGCGATCGTACAGTGTTCACAGCTCGGACTTGAGCCAGGTAGCGCCCTCGGTCATGCATATTTACTGCCTTTTGGTAATAAAAACGAAAAGAGCGGTAAAAAGAACGTTCAGCTAATCATTGGCTATCGCGGCATGATTGATCTGGCTCGCCGTTCTGGTCAAATCGCCAGCCTGTCAGCCCGTGTTGTCCGTGAAGGTGACGAGTTTAGCTTCGAATTTGGCCTTGATGAAAAGTTAATACACCGCCCGGGAGAAAACGAAGATGCCCCGGTTACCCACGTCTATGCTGTCGCAAGACTGAAAGACGGAGGTACTCAGTTTGAAGTTATGACGCGCAAACAGATTGAGCTGGTGCGCAGCCTGAGTAAAGCTGGTAATAACGGGCCGTGGGTAACTCACTGGGAAGAAATGGCAAAGAAAACGGCTATTCGTCGCCTGTTCAAATATTTGCCCGTATCAATTGAGATCCAGCGTGCAGTATCAATGGATGAAAAGGAACCACTGACAATCGATCCTGCAGATTCCTCTGTATTAACCGGGGAATACAGTGTAATCGATAATTCAGAGGAATAATTCAGCCTGGCGGTGTAATGCACCGCCAACTTGAAATATTTTTTATGAGAAAAATTATGAGATATGACAATGTTAAACCATGTCCATTTTGTGGTTGTCCATCAGTAACGGTGAAAGCCATTTCAGGATATTACCGAGCGAAGTGTAACGGATGCGAATCCCGAACCGGTTATGGTGGAAGTGAAAAAGAAGCACTCGAAAGATGGAATAAACGAACCACTGGAAATAATAATGGAGGTGTTCATGTATAAAATTACCGCCACTATTGAAAAGGAAGGTGGCACTCCTACTAACTGGACAAGATATTCAAAATCTAAACTAACGAAATCAGAATGCGAAAAAATGCTCTCAGGTAAAAAAGAAGCAGGCGTTTCCAGAGAGCAGAAAGTAAAACTGATAAATTTTAATTGCGAGAAACTTCAGTCCTCGAGAATTGCATTGTATTCAAATTAAAACTTCATAGCTGATTATTAATAATCAACATCGGGCGTCAATTTCAGTCTAACATTGGCGCCTGCCAGAGGTGATGCGATGGCACAAGTAATCTTTAATGAAGAGTGGATGGTTGAATACGGCCTGATGCTTCGCACTGGTCTGGGGGCCAGACAAATTGAAGCATACCGCCAGAACTGTTGGGTGGAGGGCTTCCACTTCAAACGAGTATCTCCTTTAGGTAAGCCAGACAGCAAACGAGGGATTATCTGGTACAACTATCCAAAGATAAATCAGTTTATCAAAGACTCATGATATGTCTAAATTACCAACAGGTGTCGAGATTAGAGGTAGATACATTCGCATCTGGTTCATGTTTCGAGGAAAACGATGTCGGGAAACATTAAAAGGCTGGGAGATTACAAACAGTAATATTAAAAAGGCCGGAAATTTAAGAGCGCTGATAGTTCATGAAATAAACTCCGGTGAATTTGAGTATTTAAGACGTTTTCCCCAGTCCAGCACTGGGGCAAAAATGGTGACAACGAGAGTCATAAAAACGTTCGGAGAGCTTTGTGATATCTGGACAAAAATTAAAGAGACAGAGTTAACAACAAACACAATGAAGAAAACGAAATCACAATTAAAAACACTCAGAATAATAATTTGTGAAAGTACCCCGATATCACATATTCGTTATAGCGATATCTTAAACTACCGGAATGAACTGCTGCATGGAGAAACGCTTTACCTGGATAATCCAAGATCCAACAAAAAAGGAAGAACCGTGCGCACAGTTGATAACTATATCGCCCTGCTCTGTTCGCTGTTGCGTTTTGCGTATCAGTCGGGATTTATATCAACCAAACCATTTGAAGGAGTAAAAAAATTACAGCGAAACAGAATAAAGCCTGATCCGTTATCTAAAACAGAATTCAATGCATTAATGGAAAGTGAAAAAGGACAGAGCCAGAACTTGTGGAAATTTGCCGTTTACTCAGGACTTCGTCACGGGGAACTGGCAGCTCTGGCGTGGGAGGATGTGGATCTCGAAAAGGGAATAGTGAATGTCAGAAGAAACCTGACGATACTTGATATGTTCGGTCCCCCAAAAACAAATGCCGGGATCCGAACAGTAACACTACTGCAGCCTGCTCTTGAAGCACTGAAGGAGCAATACAAACTGACCGGGCATCATCGCAAAAGCGAAATCACCTTTTATCATCGGGAGTACGGCAGAACCGAAAAGCAAAAACTGCATTTTGTTTTCATGCCCAGGGTGTGTAACGGAAAACAAAAACCTTATTACTCGGTAAGCAGTTTGGGGGCAAGGTGGAATGCAGCAGTAAAACGTGCTGGTATTCGCCGCCGTAATCCGTACCATACGCGGCATACTTTTGCCTGCTGGCTGTTGACGGCAGGAGCGAACCCGGCATTTATAGCCAGCCAAATGGGGCATGAAACTGCGCAGATGGTGTATGAAATTTACGGTATGTGGATTGATGACATGAACGACGAACAGATAGCCATGTTGAATGCGCGGTTATCGTAGTTGCAAAGTTTGCCCCCAATTTGCCCCATTTAGTACCAGAGAACTGAAATAATGCAAGAAAATCAACAAATTACAAAGAAAGAACAATACAACCTGAACAAATTACAAAAACGTCTGCGTCGTAACGTGGGCGAAGCCATTGCTGACTTCAATATGATTGAAGAAGGCGATCGCATCATGGTTTGCCTCTCCGGGGGTAAAGACAGCTATACCATGCTGGAGATTCTGCGCAATTTGCAGCAAAGCGCGCCAATCAATTTTTCGCTGGTGGCTGTTAACCTCGATCAAAAGCAACCGGGCTTCCCGGAACACGTTCTGCCCGAGTATCTTGAAAAGCTGGGCGTTGAGTACAAGATTGTTGAAGAGAATACTTACGGTATCGTGAAAGAGAAGATTCCAGAGGGCAAAACCACTTGCTCACTGTGTTCTCGCCTTCGTCGCGGTATCCTTTATCGTACCGCAACGGAACTGGGGGCGACGAAGATCGCGTTGGGTCACCATCGTGACGATATCCTGCAAACGTTGTTCTTAAATATGTTCTACGGCGGTAAGATGAAAGGTATGCCTCCGAAACTGATGAGCGATGATGGCAAACATATCGTTATTCGTCCGCTGGCCTACTGCCGCGAGAAAGATATTCAGCGATTTGCCGATGCAAAAGCGTTCCCGATTATTCCGTGCAACCTGTGCGGTTCACAGCCTAACCTGCAACGTCAGGTGATTGCTGACATGTTGCGTGACTGGGATAAACGTTATCCAGGGCGTATCGAGACGATGTTCAGCGCGATGCAGAATGTGGTGCCGTCGCATCTGTGCGATACCAACCTGTTCGATTTCAAAGGCATCACCCACGGTTCTGAAGTGGTTAACGGGGGTGATCTGGCGTTTGATCGCGAAGAGATCCCACTACAACCGGCGTGCTGGCAGCCAGAAGAAGATGAAAATCAGTTGGATGAGTTACGGCTGAATGTGGTTGAAGTGAAATAACCAGGATAGCGCCCGATGCGCAAGCGTATCGGGCTACTCTTATGGAGGCCGGATAAGACGCGGCCAGCGTCGCATCCGGCAATCCCGAATAAGATGTTTACTCTTGCACCCGGCAATTCAACATTTCATTATTTTAATAACCGCACCCGGCACGTTTTTCCTTTAATCTTCCCGCCCTGTAACTGTTTCCATGCTTTATGAGCAACAGCCTGACGGACCGCGACATAGACATGCGCCGGATGCACGGCGATTTTGCCAATATCTGCGCCATCAAGCCCGATATCTCCTGTCAGTGCACCTAATACATCACCCGGGCGCATTTTGGCTTTTTTCCCGCCATCGATACACAACGTTGCCATTTCTGCTTCCAGCGTCGCAATGGAACTATTAGCTGGCGGCGTTTGCCAGTTAAGTTTTATCTGCAACATGTCAGAAATGATATTGGCCCGCTGTGCTTCTTCCGGAGCACAGAAACTGATCGCCAGACCGCTATTTCCTGCACGAGCTGTACGACCGATGCGATGTACATGAACTTCAGGGTCCCACGCCAGCTCAAAGTTCACCACCAGCTCAAGCGATTTAATATCCAGACCACGCGCAGCAACATCAGTCGCGACCAGTACACGGGCGCTACCGTTAGCAAAACGTACCAGGGTCTGATCGCGATCGCGTTGCTCCAAATCGCCGTGTAATGACAATGCACTTTGCCCTACTTCATTCAGCGCGTCGCAGACAGCCTGGCAATCTTTTTTGGTATTGCAAAACACCACGCAAGAGGATGGCTGATGCAAGCTTAATAACCGTTGCAACAGAGGAATTTTGCCTTTGCTGGATGTCTCATAAAATTGTTGTTCAATGGGTGGCAAAGCATCTGTTGAGTCAATTTCAATCGCCAAAGGATCGCGTTGCACTCGTCCGCTGATTGCAGCGATGGCTTCCGGCCAGGTTGCCGAAAACAGAAGCGTCTGTCGAGATGCAGGCGCAAAACGGATGACATCATCAATGGCATCGCTAAATCCCATATCCAGCATGCGGTCGGCCTCATCCATCACCAGCGTATTCAACGCATCCAGTGATACCGTGCCTTTTTGCAGGTGATCCAGCAAACGCCCCGGCGTTGCCACGATAATATGCGGCGCATGTTGCAACGAATCACGCTGCATACCGAACGGTTGACCACCGCACAACGTCAAAATTTTGGTATTTGGCAGAAAACGCGCCAGCCGACGCAATTCACCTGCCACCTGATCCGCCAGTTCACGCGTAGGACACAGCACTAAAGCCTGGGTTTGAAATAGCGACGCATCAATTTGCTGTAACAAGCCGAGGCCAAAAGCCGCCGTTTTGCCGCTGCCGGTTTTCGCCTGCACGCGAACATCTTTTCCGGCAAGGATCGCCGGAAGCGCGGCGGCCTGCACCGGCGTCATGGTTAAATAACCCAACTCATTAAGGTTCGTGAGTTGGGCGGGAGGCAAAACATTCAGGGTAGAAAAAGCGGTCACAATCTATTCTCGTGGTCATCGACGCAAAGTTAGCAGGCGCGTATCCTCGCAGATCTACGCTCACGATGCGACAATTTAATCGGTTCTTCATCGGGTGGTGGGTCAGGCATGGGTTGCGGGCGAGGGATCGGATCGGGCACTGGAACAGGATCGCCAGGAATCGGTTCAGGGACAGGAATTTGCAAATAAATAAGTGTCGTCATATTTCCCTCTGGTCATTGGGTGGACTCTTAAAGGGTAGACGCTGATAAATAACAGGCAAAAAAAAGCCGACTCATCAAAGTCGGCGTCGTACGAATCAATTGTGCTATGCAGTAATTCAAAAAAGGAAGTAAGACAATATGGAGCGCAACGCCCATCGCTTGACGTTGCATTCACCTGCAAGAGAGATATTGCCCTGAATGGGTAGAGAGTTTATTGACTTCGCTCAAACTTTGCGGCGTTTTTGTATACAGACAGCCGGAAAAATTGCTTTTGTTACAACCATTTACTACGATGCAACCATAAAGCAACACCACCAATAAGAACAACTAACAGAATACAAAAAATTGAAAATCCGAATTGCCACCCGCCGCCAGGGATCCCACCAAGGTTGACGCCAAATAACCCGGTCAGAAAGGTACTGGGTAAAAAGACCATTGCCATCAACGACATTGTATAGGTACGACGAGCTAAATTTTCCTGCATCACCTGAGCGATTTCATCCGCCATCACGCCAGTCCGTGCTATACAGGCGTCGATTTCGTCAAGGCCGCGCCCAAGGCGATCGGCAATATCCTGCATCCGACGGCGTTGGTCATCGCTCATCCACGGCAAACGTTCACTGGCAAGACGAGCATAAACATCACGTTGCGGTGCCATATAGCGACGCATCACAATTAATTGTTTGCGCAGCAGAGCCAGGAATCCACGCGGTGGAATTTGCTGATCAAGGAGATTATCTTCAAGGTCGATAATTTTATCGTGCAGCTGCTCGATAAATTCACTGGAATGATCGGTCAACGCATCGCACACATCCACCAGCCATCCCCCGCAATCGGTCGGACCCGTGCCCTCTTCCAGATCGCTCACCACATCGTCCAGCGCCAGCACTTTGCGTTGTCGGGTCGAAACAATTAACCGCCCGTCCATATATACACGCATGGCGACCAGTTGATCGGGGCGTTCATCGGTGCTGCCGTTTATACAGCGCAATGTAATCAGCGTGCCTTCACCGAGACGGCTGACTCGGGGACGCGTGCTCTCGCCCGCCAGCGCATCACGTACGTTATTGGGAAGCAGCGGTGTTGTCGCCAGCCATTGGGCGCTATCATGGTGTACATAATTAAGGTGGAGCCAACAGGGATGCGCTTCATCAATCACATCTGTATTTTCCAGCGGTTTAACGCCGCCTCTACCATCCAGCATCCAGGCAAATACTGCATCCGGGACATTAACGTCCGATCCCTTAATCGCTTCCACAGTGCCTCCATCATCAACGCATTATTTTGTAGTCTAGCCTTCTGGCCCTGTTACGCAACATCTCATCACCCCATTACCCTGAAATGATTAATAAAATTCTGTCTAAATTGAATACAAAAAGCAAAATGCTTTTCCGTATACAAACCGTGTGAAGTGTTAAATAGCGTCTATCATTATCAGAATTATCTGATCATATGACGTGGCTTTTTTGCGATCGGATAGCAACAAAAATTGATAAAAATAACGGGATCTCAATGATTACGCACAACTTCAATACCCTGGACTTACTCACCAGTCCTGTCTGGATCGTTTCGCCCTTTGAGGAACAGTTAATTTATGCCAATAGCGCGGCGAAACTGTTGATGCAAGACCTCACGTTTAGTCAGCTACGAACCGGACCCTATTCCGTCTCCTCACAAAAAGAACTGCCGAAATACCTCTCCGATCTGCAAAACCAACACGATATTATCGAAATCCTCACTGTTCAGCGTAAAGAAGAGGAAACAGCATTGAGCTGTCGGCTTGTTTTGCGAAAGCTGACAGAAACAGAACCGGTGATTATTTTCGAAGGTATCGAAGCGCCGGCAACGCTGGGTTTAAAAGCCAGTCGCTCGGCAAATTATCAGCGCAAAAAACAAGGTTTTTATGCGCGCTTTTTTCTGACTAACTCTGCACCAATGTTGTTGATTGACCCGTCACGAGATGGACAAATCGTCGATGCTAACCTCGCCGCGCTCAATTTCTATGGTTATAACCATGAAACGATGTGCCAGAAACATACCTGGGAAATAAATATGCTCGGGCGTCGCGTCATGCCTATCATGCATGAAATCTCGCATTTACCCGGTGGTCATAAACCTTTGAATTTTGTTCATAAACTGGCGGATGGTTCGACTCGTCATGTGCAGACCTATGCCGGACCGATTGAAATTTATGGCGACAAGCTCATGTTATGTATTGTGCATGATATTACTGAGCAAAAACGGCTGGAGGAGCAGCTGGAACATGCTGCTCACCATGACGCGATGACCGGATTACTGAATCGGCGACAGTTTTATCACATTACGGAACCAGGCCAAATGCAGCATCTCGCCATCGCTCAGGATTACAGCTTGTTGCTCATCGACACCGATCGTTTTAAACACATTAACGATCTCTATGGGCATTCTAAAGGTGATGAGGTGTTATGCGCCCTCGCCCGCACCCTCGAAAGTTGCGCTCGCAAAGGCGATTTGGTGTTTCGTTGGGGAGGCGAAGAGTTTGTCTTATTGCTACCAAGAACCCCACTGGATACCGCGCTTTCGCTGGCTGAAACTATCCGCGTAAGCGTGGCAAAAGTGAGTATTTCGGGCTTACCACGCTTTACCGTCAGCATTGGTGTGGCGCATCACGAAGGAAATGAAAGCATCGATGAACTGTTTAAACGCGTTGATGATGCTTTGTATCGGGCGAAAAATGATGGACGCAACCGCGTGCTGGCGGCATAAGCCGCGGATGCGTCTCGAGATCAACGACTGCGCTTAGCGTGGCGCTCCCAGTTTTCTTGCTTCGCCTGCGCCGTTTTACGTAGTGCGACGTAACACGCCCCGCTGCCGCCATGATGCGGTAGCGCGGTGCAATATGCCTGAACATCATCAAATTCGGTCAGCCAGCGCGCCACATAGCTGCGGACAATATTGGCATGCGATTTATCATCCCGCCCTTTACCATGAATAATCAGCACGTTACGCAAACCATCCGCCAGGGCTTGTTGAATAAAACTGAACACCATTTTGCGGCACTCTTCCACCGGCTGGCGCAAAAGATTCAGGCTCGCCTGTTGCGGATATTTACCACTGCGCAGCTTATCCAGCACCCCATGTTGCAACCCTTCCCGCCGAAACTCCAGCGGCTGACTTAGTGGGATGATGTCGAGAAATCCGGTGGTGAGGAAATTATCAAGCTGCAGCGTGTCGATACGCTGCGGCGCACGTTGGTTACGCGTTGGATGCCAGTGGACATCGGTAGCACGTTTCAGCGGCTGGACATCTTCCATGGCGTCAAGAAACAGCGATTTGTCGTCAAGGTTCATGTTACATCCTCCCGCAATTAAGAGCGCGATATGATAACCAGACCGGGTCGGTCCAACAACGTATTACCCAAATTTCCAGTAATAAGTTCCAAATATTGCCGATATTTTAAGCAAAATACTTATGCATGATTATTCATTCACGATATTAATAATGTAACTTATATTTTCGTGAAATCTGTCACTGAAGAAAATTGGCAACTAAAGGTTAAAACCGTTATAACACAGTCACCGGCGCAGAGGAGACAATGCCGGATTTAAGACGCGGATGCACTGCTGTGTGTACTGTAGAGTCTGGCGGATGTCGACAGACTCTATTTTTTTATGCAGTTTTAACTTTGCAGATAGCCGCATTCTCGCCGGAATTGCGTGATTAATGACGCGGTCAGTGGTGTCTGGCGACTATCACGCCGCTGAATCAAATAATAGGCCGCTTTCGGTAAAATTTCGCTAACCGGCAACATCACCAGCCCCTGTCCGTGCAAGGGATCGCAGCCCATTTCTTCAGGCAGTTTGCTGAGAAAATCGCTTTTTGCCACCAGACTGATACAGGCTGAGAACGTCTCGCAGACTACACCGACCTGTGGCGTTTGCGCCTGATCGTCAAGCAATTCACTCAACTGTTTGTAGTAGCTGCCGTGTGGCGTCGGCATTGTCCAGCTGTAATCCAGTAACTGTTTGATCGAACGGGCACCAATGGCGGGGTGTCCCGGGCGGCAAAAGATCGCGAATTGCTTTTCCAGTAATTTCTCAAAAGTAAATTCGTGGTCGTACGGTCCCTGATAATAGGTATTGATGGTGAAATCCAATTCTCCCTGACGCAATTCATTAATCATCGACACCAGTTGCCCTTCCATAATGCGTACTTTTACCTGCGGATGCTGCTGATGAAAACGAGATATGACAGCTGGCATCAGACTGCGGGAAATACTGGCCCCCATGCCGATATTAATCTGCCCTGCCAGTTGCCCTTGTCGTTGGCGAATATCCTCTTGGGCTGCGCGCAGCTCTTCAAGAATTAGACTGGCGTGCTGATAAAAACTTTCACCGGCATCAGTTAACGTCACGCCTTTACTACGGCGAAAAAAGAGTTGCGCCGCTAACCCTTCTTCTAGCTCCTGAATAGATTTACTCAGTGCCGGTTGCGACATATTCAACATTCGGCTCGCTCCGCGAATGCTGCCCTGACGAGCCACTTCAACAAAAGCCCGAATTTGATGAATTTTTACCTGAAAAGCCATGACGCCACCGATAACCGTTATTTATCAGACCAAAGAAACTGGCATCTACTTTAATGCAGATGATTGTGTCAGGGTAATTTATGAACGGTTAAAACTGTGAAAAATCAGTTAGTGATAAGTAAAAACTATCGCTACGTGAACCGGGTCACACTTTTTACTGATGACGGGAAAGGTTATGGAGTCTTTGAATCAATTTGTTAATTCGCTTGCCCCAAAATTATCGCACTGGCGACGTGATTTTCATCACTATGCAGAGTCTGGCTGGGTGGAATTCCGCACTGCCACCCTTGTTGCGGAAGAATTGCACCAGCTCGGCTATTCACTGGCGCTGGGTCGCGAAGTAGTTAATGAAAGTAGCCGGATGGGATTACCTGATGAATTCACTCTACAACGCGAATTCGAGCGCGCTCGTCAACAGGGTGCGCTAGCACAATGGATTGCGGCTTTTGAAGGTGGTTTCACTGGTATCGTCGCCACCCTGGATACCGGTCGCCCCGGTCCGGTGATGGCTTTCCGTGTCGATATGGACGCGCTGGATCTCAGTGAAGAGCAGGATGTCAGCCATCGCCCCTACCGCGACGGTTTTGCGTCATGTAACGCCGGAATGATGCATGCCTGTGGTCATGATGGACATACCGCCATTGGGCTTGGGCTGGCGCATACCCTTAAACAGTTCGAGTCCGGACTACATGGCGTCATCAAACTGATTTTTCAGCCTGCAGAGGAAGGTACGCGTGGCGCGCGGGCGATGGTCGATGCAGGTGTCGTAGATGATGTTGATTATTTTACTGCCGTGCACATTGGCACTGGCGTACCTGCGGGCACCGTGGTGTGCGGCAGTGATAATTTTATGGCAACCACCAAATTTGACGCGCACTTCACCGGTACCGCCGCTCACGCAGGCGCAAAACCAGAAGACGGTCACAATGCCTTGTTGGCGGCAGCACAAGCCACTCTTGCACTGCATGCAATCGCCCCGCACAGCGAAGGAGCTTCCAGAGTAAACGTGGGCGTTATGCAGGCAGGAAGCGGTCGTAACGTTGTTCCTGCCTCGGCGTTGCTGAAAGTGGAAACACGCGGGGCCAGCGACGTCATTAATCAATATGTTTTTGACCGTGCACAACAAGCGATTCAGGGCGCAGCAACCATGTATGGTGTCGGCGTTGAAACTCGTCTGATGGGTGCAGCTACCGCCAGTTCTCCTTCGCCGCAATGGGTCGCATGGTTGCAAAGTCAGGCGGCTCAGGTCGCGGGGGTCAATCAGGCCATTGAACGTGTTGAAGCGCCTGCGGGTTCCGAAGATGCCACATTAATGATGGCCCGCGTGCAGCAACATCAAGGGCAAGCCTCCTACGTGGTGTTTGGCACACAGCTGGCGGCAGGTCATCACAACGAAAAATTCGATTTTGACGAGCAGGTTCTCGCTATTGCCGTCGAAACGCTGGCGCGCACCGCGCTCAATTTTCCCTGGACGCGAGGTATCTGATGCAGGAAATCTATCGTTTTATCGACGATGCGATTGAAGCCGATCGCCAACGTTATACCGATATTGCCGATCAAATCTGGGATCATCCAGAAACACGTTTTGAAGAGTTCTGGTCAGCGGAGCATCTGGCTTCGGCGCTGGAATCTGCAGGCTTCACCGTTACCCGCAACGTAGGCAATATCCCAAATGCCTTTATTGCTTCGTTTGGTCAAGGCAAACCGGTTATCGCCCTGCTGGGAGAATATGACGCCCTGGCAGGTTTAAGTCAGCAAGCAGGTTGCGCGCAACCTACATCCGTGACGCCCGGTGAAAATGGTCACGGTTGCGGACACAATTTGCTGGGAACCGCCGCCTTTGCCGCTGCAATAGCCGTCAAGAAATGGCTGGAACAATATGGGCAAGGCGGCACGGTGCGCTTTTATGGTTGTCCTGGCGAAGAAGGCGGCTCGGGTAAAACGTTCATGGTTCGCGAGGGGGTATTTGATGATGTGGATGCGGCACTCACCTGGCACCCGGAAGCCTTTGCCGGTATGTTCAATACCCGCACGCTGGCAAACATTCAGGCATCATGGCGCTTTAAAGGGATCGCAGCACATGCCGCGAATTCCCCTCATTTGGGACGCAGCGCCCTTGATGCCGTAACGTTGATGACCACTGGCACCAACTTCCTCAACGAACATATTATTGAAAAAGCGCGCGTACACTATGCCATCACAAATAGCGGCGGGATCTCGCCCAACGTGGTCCAGGCGCAGGCAGAAGTGCTTTATCTTATCCGCGCCCCCGAAATGACCGACGTGCAGCATATTTATGATCGGGTCGCCAAAATCGCCGAAGGTGCGGCATTGATGACCGAAACCACGGTTGAATGCCGCTTCGACAAAGCCTGTTCCAGTTATCTCCCGAATCGCACCTTAGAAAATGCCATGTACCAGGCCCTATCCCATTTTGGTACCCCGGAATGGAACTCCGAAGAACTGGCTTTTGCGAAACAAATTCAGGCTACGCTCACCTCCAACGATCGGCAAAACAGTCTGAATAATATCGCCGCAACCGGTGGCGAAAACGGCAAGGTTTTTGCACTACGTCATCGTGAAACGGTACTGGCGAATGAAGTCGCTCCATATGCCGCCACCGATAACGTGCTTGCGGCATCGACTGATGTCGGCGACGTCAGTTGGAAACTGCCTGTTGCCCAGTGTTTCAGCCCCTGTTTTGCCGTCGGTACACCGCTACATACGTGGCAACTGGTTAGCCAGGGGCGAACATCTATTGCTCATAAAGGAATGCTGCTGGCGGCGAAAACTATGGCAGCAACCACAGTCAATCTCTTCCTTGATTCAGGGCTATTGCAAGAATGCCAACAAGAGCATCAGCAAGTAACGGACACGCAACCGTATCACTGCCCTATCCCGAAAAACGTGACACCGTCACCTTTAAAATAACAACAACAACGCAAACACAACAACCGAGGAATGCCCATGAGTATGTCATCCATACCGTCGTCCTCCCAATCCGGGAAGCTCTATGGCTGGGTCGAAAGAATTGGTAACAAGGTTCCCCATCCTTTTCTGCTCTTTATCTATTTGATTATCGTACTCATGGTGACGACGGCAATTTTGTCGGCCTTTGGCGTCAGTGCGAAAAACCCGACCGATGGTACGCCGGTCGTGGTGAAAAACCTGCTCAGTGTGGAAGGATTACACTGGTTTTTACCCAATGTTATTAAAAACTTTAGCGGTTTTGCTCCACTTGGTGCGATCCTGGCGCTGGTTTTAGGTGCCGGTCTGGCGGAGCGCGTCGGCTTACTGCCAGCACTAATGGTTAAAATGGCATCGCATGTTAATGCCCGCTACGCCAGTTATATGGTGCTGTTTATTGCTTTTTTCAGCCACATTTCTTCCGATGCGGCGTTAGTGATCATGCCACCGATGGGTGCGCTGATTTTTCTGGCGGTGGGCAGGCATCCAGTTGCAGGTTTACTGGCTGCCATTGCAGGCGTAGGTTGCGGCTTTACGGCTAATTTACTGATTGTCACAACCGACGTGTTGCTGTCGGGGATCAGCACGGAAGCGGCAGCTGCGTTCAATCCGCAAATGCACGTCAGTGTAATTGATAACTGGTATTTTATGGCCAGCTCCGTAGTCGTACTGACGATTGTTGGCGGCCTGATAACCGACAAAATCATCGAGCCACGGTTAGGTCAATGGCAGGGAAACAGCGATGAGAAACTGCAGACATTGACCGAAAGTCAGCGTTTTGGTTTACGCATAGCAGGTGTCGTATCGCTACTTTTTATTGCTGCGATTGCGCTGATGGTGATCCCGCAAAACGGGATATTGCGCGATCCGATTAATCACACCGTGATGCCATCACCCTTTATTAAAGGTATCGTGCCACTGATCATTCTTTTTTTCTTTGTTGTCTCGCTGGCTTATGGCATCGCTACCCGCACAATTCGACGTCAGGCGGATTTACCGCATTTAATGATTGAACCGATGAAAGAGATGGCGGGATTTATCGTGATGGTTTTTCCCCTCGCCCAATTTGTCGCCATGTTTAACTGGAGCAACATGGGGAAATTCATCGCCGTGGGGCTGACCGATATACTGGAAAGTTCAGGGCTTAGCGGCATCCCGGCGTTTGTCGGTCTGGCGTTGCTTTCCTCTTTCTTATGCATGTTTATTGCCAGCGGTTCCGCAATCTGGTCGATTCTCGCCCCCATTTTCGTACCAATGTTTATGCTACTTGGCTTTCACCCGGCATTTGCGCAAATCCTCTTTCGTATTGCCGACTCATCCGTATTGCCTTTAGCGCCGGTATCTCCTTTTGTTCCACTGTTTCTTGGATTCCTGCAACGCTACAAACCAGACGCGAAACTGGGTACTTACTATTCGTTAGTCTTGCCCTATCCACTTATCTTTTTGGTGGTATGGCTGCTGATGTTGCTGGCGTGGTATCTTGTCGGTCTGCCGATAGGTCCGGGTATTTACCCACGTTTGTCTTAAGAGAGAACGGATGCTGAGATTACTTGAAGAAAAAATTGCCACGCCACTGGGTCCACTGTGGGTGATTTGCGATGAGCAATTTCGCCTGCGGGCGGTTGAATGGGAAGAGTACAGCGAACGCATGGTGCAGCTGCTGGACATCCATTATCGCAAAGAAGGCTATGAGCGCATTTCTGCCACCAATCCAGGCGGTTTAAGCGACAAGCTTCGTGAATATTTTGCCGGTAATCTTAGCATTATTGATACGCTTCCCACTGCTACGGGGGGGACGCCATTTCAGCGCGAAGTCTGGAAAACACTACGCACTATCCCCTGCGGGCAGGTAATGCATTACGGCCAACTGGCTGAGCAATTGGGCCGTCCTGGCGCGGCGCGTGCCGTTGGTGCGGCAAACGGATCGAATCCCATCAGCATCGTCGTACCTTGCCATCGGGTTATTGGCCGAAACGGCACCATGACCGGATATGCAGGCGGAGTTCAGCGAAAAGAGTGGTTATTGCGCCATGAAGGTTATCTTTTGCTGTAAACATTAAACAATTTGTGCCAGCTTGTTCACACTTTTATGTAAAGTTACCCTTAACAACTTAAGGGTTTTCAAATAGATAGACATATATTTACATCTAATATCGGAATTCTCTGCTGTTAAGGTTTGCTTAGGGAAGGTGCGAATAAGCGGGGAAATTCTTCTCGGCTGACTCAGTCATTTCATTTCTTCATGTTTGAGCCGATTTTTTCTCCCGTAAATGCCTTGAATCAGCCTATTTAGACCGTTTCTTCGCCATTTAAGGCGTTATCCCCAGTTCACAATATAGTTAAATGCGATGTTTTTGACGGTGTTTTCCGCGTTACCAGCAGCGTTAACGGTGATGGTGTGTCCATGTGAACCAATCGCAACGGAGTGCGTATGAGCACCAATACCGACAGTATGCGCGTGTGCACCTGCGCTTGCAGCAGTGCCGGACAGTGAGTGGGTATGTGCGCCAGCAGATGATGTTGCATAGTTTTGATTATGCACAACAGACAATCTTGTTGATGCACTACCAGCACCGGAGTTAGCACTAGCCGTGTTCACGTTGGCTAGTGAGTGTGTGTGTGCTCCAGCCGAGTTTGTAGAGCCGCTCACACTGTGTGTATGTGCCCCGGTGTTATTCGTGGATTTAGTGCCGTAATCAAACGACGATGTGGTTTTCGTCCCCAAATCTGTACTGGATGCGCTGGCGCTGTGGGTATGCGATTTAATGCCGTCCTGTTCCTGAGACAATACGGCCCGACCACTGGCAGGTTTGCCCTTAATCGTCCAGCCACGCATATCAGGGATCACGCCTGACGGATAAGCGGCTGCAAGTTTCGGGTAAGCAGATTTGTCAAAAGCCTGCCCCTGCATCAGGGCATAACCAGACGGAACGGTATCTGATGGCCACGGGATTGGTGCGCCGACTGGGTAGCTTTCTGGTGGAAGATTTTTCGAGGTATAAACTTCTGCCCAGTCTTCCTCAAAACCATAACCGTCTCTTGAAGAACGGTAGAACAGACCACCATTTCTGTAATGCGCCTTCATCTGCAGGGTCCGGCAACTTCCGACTCCGGTATAGAAGTTAACCAGAATATAGCTGTCGCCAGAGCGGGTGACATTGTAAGCGCCAGATTCGGCATTCCATGGAACGCCACCATCCGCATCGGCATATGTATCCGTTGCCCTTCTGGCAAAAGCAGCCACATGCGCGGCGGTTAAAGTAATATCTTTGGAACCATCAAACTCAACACCAGAAACCCGTCTTGGCGTTTGCAGCTTTGTTGCTGTTAATGCATTACCGTTCAGACTTGCGGACAGTTTGGTTCCAATAACCAGTTCGCCGGTTGCGTTATCAATAGCAAACGGTCTTAATGTATTCCAGCCACCATAAACATCACCTTGATTGGTAAGCAGCAGGTAAGTTTTAGCGCCATCATTACGCCATAATGCACCATACTCCCCACCTATCATTCGAATCTGATTACCACCACGCGCTACAATTTCGTCTGTGGCAAAAAGTTTTTTGCACGACAAGTTATCGTTAACGATTAACGAATGAGACTCATAAAAACCACGCCCACTCTTAAAATCAAGGATAACGTCCGCCGCGATACATTCAGTCGCCGGATTTGTTGCCCCAAACTTATAGGTCGTATCATTAACAACGAGATCAGCACCAGGTGCGGATATTGACAGGCCATCTTCAATAAACGCAAAAACAGGGAAAGCAGCGCCATCAACATAGAACACAGAGCGCAAATCATCGCCCTTATTACTCATCATTATTGAGTGAATGGCTCGTTCATTGTTTTGATATTGCCAGAACATGCCATAAGCATAACGCCCCCTGTCAGTCCAGCCACCAGGCATAACAAATCCGTTAAACTCGCAGTTATTCATCGGATCGCCTGCGGTTCGCGTTGCCGTGGTGATAATGACTCTTGATGCCAGTTCGCTTACTGAGCCAGCAGAACGCATAACAACAACAGGGTAATATTTTCCAGATGTTGCACCTGCAGGAGCGTTAACCCGCACATAACGCATACCACGCTTATCAGCAAAGTCTGTTTTACTGACCGCGTTAATGTTGTTCAGGAAGCATCCCTTATCGGGTATATCAGCGCCGTTCTGGTCTTTCTGCAGACGTTTCTCTGCATTGTCATAGGCTGATTTTACTGCCTTTGGCGTTGCCGCCAGCGTTTCAGACGTACTGTTGGTCGCACTGCTGAGCTGTACTATCCCCTTTTTCGTCGTACTTGCATCCTCAAGCGCCACGGCGGATGCAATATCCTCTGCCCGTTTAGCTGCTGTCTCGGCGCGCGTTGCCGCGGATTCCGCCGTACTTTTGCTCTGAGCTGCCGCCGTCGCACTGCCAGCAGCCTCTGTCGCCTTCGTGGATGCCGTCGTGGCGCTGCTCTTCGCTGCTGACGCTTGTCTGGTCGCCTCATCTTTTGAAGCAGACGCCGATGATGCCGATGACGCCGCCGAACTGGCTGACGATGCGGCAGCCGTTTTTGAGGATTCTGCGCTTGTTTCCGACGCTTTCGCGTTCGTTTCGGATGTCTTCGCTGCGGAAGCAGACCTCGCTGCTGCGCTGGCCTGTTCAGTGGCTTCGCCAGCCTTCGTTGTGGCTGTTGAAGCAGACGATGCGGCGCTTTCTGCCGATTTTCCGGCGGCGGTGGCACTGGCTGAGGCCTGCCCGGCACTTGTTGACGCTGCACTGGCAGACGACGCAGCCGCTGTTTTTGAGCCTGCCGCAGCCGAGGCGCTCTGTCCCGCTGCCGTTTCAGAAGACCTGGCGTTCGTCTCGGACGTTTTTGCCGCCTTCGCGGAATTTCCTGCCGCCGTTGCCGAGGAAGCTGCACTACTGGCGCTTGATGATGCGTTCGTTTCTGATGATTTTGCCGCTTCTTTTGAGGCCGCCGCATCCCGGGCCGAGGTCGCAGCTTCTGATGCCTTCGTGGTCGCGGTGGATGCAGATGTGGCTGCTGATTGTAGTGACGCTGAAGCATTCGTTTCTGACGTTTTCGCCGCACCGGCACTGGTGGCCGCCGCGCTTTTTGAGGACTCTGCAGCGGCAGCACTTTTTGATGCTTCAGTGGCCTTTGTTGATGCCGTTCCTGCGCTGGAAGACGCTGACTGAGCCGACGACGCGGCCTGTCCGGCTGACGTGCTGGCTGCGCGTGCTGAGTCCGCAGCATCAGCCGCATGGGTTGCCGCCTCACGGGCTGATGTGCTGGCATCACTGGCTGT +>5_2#NODE_15_length_39993_cov_62.9495_ID_29 +TCTTGATTATTGGCGATAAAGTCCCAAAAAGTATCGTGTGCTGTTTGTCCTTGGGGAATCTCATTATGTGGCTCTGGTTTAAGAGCATGAATAAGATCCGGAAATTTAATACCATCTTGAATGAAGAAAATCGGAATGTTATTACCCACTAAATCGAAATTTCCTTCCTCTGTATAAAACTTAACAGCGAATCCTCGTACATCTCGGTTCGTTTCATTTGCACCTTTTGATCCAGCAACTTCAGAAAAACGAATAAAAATAGGTGTCTTTTTAGATGGGTCTTGTAAAAAATGAGCCATTGTTAAGTCTTCTAAAGAATCATATAGCTCAAAAACACCATGTGCTCCGTAACCGCGGGCATGAACGACTCTTTCAGGAATTCGTTCACGGTCAAAATGAGCGAGTTTTTCTCGCATCAGGAAATCTTCTAAAAGTGTAGGACCACGATCGCCGGCGGTTAAAGAGTTTTCATCATTTGAAATTTTGACACCCGTATTCGTTGTCATCTCTTTTCCTTCATTGTTTTTCGTAAAGGATTGTAACTGTTCTATCTTCTTATTTTCACGACCGTCCATATAAAGCACCTCCCATATTATACGTATTATTGTTCATGGTTTTTATGCTTACATATGGTATGGGGAAGTATATTATCTATGGATTGATTTTCCTCATTTGTTCAATTTATCTTTGTTTGTCTATGAAAATCATTTATTATAATAAATGGTATTCTTTTGAAGGGGTGTTGAAGTGGAAGAGAGTTTTCTATCAGCTAAAGAAGAAAAGAAAAACGCTAAGATTTTCTTATGGGTAATACATGTTATTTTAATTGTATACGAGGTTGCATACGCGATTATATTAGAAGATACAATGCCTTTAGCAAATTGGCATAAAGGGATATGGAAGCTTGCATATATTATGGCTATATTAGGTATTAGTGTTTACTTATTTGAGAGAGAAAAAGCATATTTAGTTAAATATACATATTTATTTGCATACATGATCGCAGAGACTTTTAATATTGGATGGTATGCTTTTCATAATACAATAGCATTTGATGAAGGGAATGTAATTGAATATATTTTCATTTTCTTCGTACCGATCTTTTTGAGTAAAAGATATTTATTTGTCTTAGCGCCATTCCTTATAGGGAAATATATGATATACCTATTTGTATTTGGAGAACTTAACCTGTTTATGTCTCTTGTTATAAATATGGTGTTACTTTTCGTGTCATTTATTATTTTAAATCGATTTTTACAATATCTTTCAGCAGTAAAGGGACGTATTGCAGAGGCAAGTCATTCACAAAAATTGGCGGTTATCGGAAAAATGGCAGCGACAGTCGGACATGAAATTAAAAACCCACTTGCTTCATTAAAAGGGTTTACGCAATTACAAAAAGAGAAACACGAAAAAGATGCAACATATGAACAAATGATTCTCGAAATCGAAAATATGAATAATATGATTAGTGAATTAATGGAGGTTGCTGCATGTAAACCTTCTGTTTATGAAAAACACATTGTAAGTGATGTTTTAGTACAAGCGGTAGAAAACATGCGCGGAAAAATGAACGAGTTAAATATAAATTGTACCTTTAATGAAGAGCAAAATAGAAGCGAAATTGAATGTGATAAACGCAAATTAAAAGGAGTATTTTTGTATGTTATTAAAAATGCTTTAGAGGCAATGGAACATGGCGGAACATTAAAAATACAAGTTGAAAATAAAAAAAGAGATTATGTAATAGTAAGTATAGTAGATAGTGGTTTTGGGATAAAAAAGGATAATTTAGGACGAGTTAAGGATGCTTTTTATACAACGAAGCAAGATAGAATTGGATTAGGTCTTACGGTAGCAGAGCGAATTGTGACAGAGCACCTTGGAGAATTACACATTTCTAGTGAAGTAAAGAAAGGAACGAGAGTAGAAATACTGCTCCCGAAAAAATGTGAGCGCAATGTGACACAAGATTAAGAGGTGTTGTTAAAGGAGCTATCATATGGAAAAAGGAAATATATTTGAAAAAGAAGAGATAAAGGCATTAATAATATTTTTAAGCTTATTCTTCGTTATATTTTTTGCGTATGATTTTGCTGAAAAAGCTATTGTCCTTTTATCAGATAAAAATCAAAAACTAGCAGATGCTTTTGGAGAAGGATTAGGTTTATGGCTATATAGTTTTATGGTTGGATTATTCTTTATAGGACTTTATTTTATGAAATGGAAAAAGCCGTATATTGTGAAGTATATTATTTTAATTGGCTATAATATATTGGATTTTATTAATAACTTCATTATTTATTACGGAAGTGATGCGGAATTTGATGGCGGGAATATAGTAGAAGGATTCTTTATTTTATTTGCACCAATATTTGTGAATAAGAGGTACTTTTGGTTAGTTGCGGGAACTATTGTTGGAAAATATGCACTTATGGGATTCGTTGTTCAATCCTTTATTGTTCTTATCCCAATAGCATTATATAGCGTGTTTGTTATTATATGTTGGATTATATTTTTAAGATTCCAATCTTACGTTCGTACACTTGAAATGATGGATAAAGAAATACGAAATGTAGAGAAATTAGCAATGGTTGGAAAAATGGCGACAGTAATTGGCGATAAGATTAGAAGACCGTTAGAAAAATTGAAAAAACTTGTGAATAAGCAAGCGAAAAAATATCCAGAAGATAAAATTTATAGTGAAATTATGAGACAGGAAGTAGAGCGAATTCATACAATTGCTACAGAACTAAATGGGTTTGAGAAATCTAAATCGGTAGAATCCGAAACGTATAATATTAAAGAAATCATCTCTTATGTTATCAGGGTTATGGAAAAGCCAGCTTTAGAACAAGGAATAAAAATGCATGCTATTTATAGTAAAGATATACCATCAATTACATGTGAGGAAAAACGATTGAAACAAGTATTTTTTAATTTAATAAAAAATGCGATTGAAGCAATGTCAGTTGGCGGAACTATTACTGTAAAAGTTATAGTGGAAGATGTGATCATTGTTCAAATTATAGATGAGGGATGCGGCATTCCGAAAGATAAAATTCCGAAGCTAAATGAAGCTTTTTACACAACGAAAGAAACTGGAACAGGCTTAGGATTAGTAGTTACGGAAAAAATTATTAAAGATCACCACGGTAAATTGAACTTTGAAAGTGAAGTTGGAGTTGGAACGACTGTTGAGATTATGTTGCCGATTTAATTATAGTGGGATATTTTAAAAAGGGGAAATCACTTTCTTTAGAGTGATTTCCCTTTTCAGTGTGGAAGGAAGTACGGAATTTTAATAAACGGGATGTTTACAACATTGACAAACTGTACAAATTCACAATCCAAAATATGGGTGTTTTAAAGCGAGAGTATATAATAAATTTGTCTGTTATTTGTTTTAATTTATTTGCAATTTTTTTGGCCTCGGCATCGAGGTCCTTTTTGTAAATGAATGATTTGTAAGTGTGGATTATATATCAGTGATTAGAAAAATAGAATATACAGAACTTGCCGTAATCCCGGGGGTAATATAAAGAGATATCGCATCCCCCCTCGAAAACAGGTTTCCGGTTGTAATACTAGTCCAAGTTACAGTGGCAGGTCCAGGTGCTGGAGTAGGGTTATACACTTGAGGTTGTCTAGAGCCAATATCCGTAGGTTTAATTGAAAATGTCATAGTTCCCGTAATTTTATCTGTAGTAGTTAAAGTAATAGTAGATAGGATGTAACTAGAGGTAGGGGCAGAGAGGTTTGTTGGGACATTCCTGCAAATATCAATCGTATAAACAGCAGCGGGTAAGTTATTAATATTTATAGATGTAGCAAAACCAACTATATTTCCAGCTCCAGCAGTTACATACGGAATAACGTTAGAATCAATTCCTGGAGAACCTGCTATACGTTGAAACCCAGCATTCGTCCCTTGAAAAACAATACTTTTCGCTGTTGAACTACCAGCAGGTCCAGTAGTCCCGGTAGAACCAGTGGCACCAATAGTTCCGGTAGCTCCGGTAGAACCAGTGACTCCAGTAGAACCAGTAGCTCCGGTAGAACCAGTGACTCCAGTAGAACCAGAAGCTCCGGTAGAACCAGTGACTCCAATAGTCCCGGTAGCTCCGGTAGAACCAGTGACTCCGGTAGAACCAGTAGAACCAGTAGTTCCGGTAGAACCAGTAGAACCACCTTCTACCAATACGATATAATCTTGAGACACATTCGGAAATCCTTGAGGTGCATCTTTTTTTACAACATAGCCGCTACTCGTGTAAGTGACAACTTGTCCCGTTTTATAGTTAGGAGCGGCTGCTAGACTAAAAGCGGTAGTACTTTGCAAACCGGCCCCGGCATTGCCAGTAGGTCCAGTAGCACCAGTGGGTCCAGTAGCACCGATAGAACCGCTGTTGCGTCTACCGTCACAACTAAAGCAATTGCGTTGGTACATATATTATCTCCTTTCTATATTTTTGAAAATCTAGTTTTAAAAGTAGAACTACTATATATATATTGTAGATTCTATATTTTTGTGTGGTCGATCGTATTGTGTATATAGATTTTAAAAATAGGGGAGGTGTTATTAGTTTTATAACCACCAGGTCTCCTGCGGAATCAAAGTAACATCTACATCAAAATTCTCTTTAAACCAATCATACATCAATGTCTCTTTCACTAAATATTCAGAGGCAGAGTGTGACAAACCGATGAGTGACATATTTGTTTCTTTCGCATAATCCATAATGAGTGAGTATTTATGCCTACCATAATCATTGTCGATATGGCAATGGATTTCTCCTGTAATAAATGATTAAGTTATTGTTCTATTTCTTTTAAGGCTGGCATGTTTGGTAATCCTCCTTAAGATAGTTCTTACTTTGTTAGTTGTTTTTGTTTTTTTATGGAAAAATAAGAAGCGGATAGGAAACAAATAAAACCTAGAATCCAGCCAATAGAAGGAGAATAGGGAAGTATATGTTGTAATACCATTCCGCTAGAAATAAAAAAGCTACCTAATAGACCTAAGAAAATGAGATATCGCTGTAGTTTGAACATCAGTGATACAACCTCCTTTATCACGCATTAACTGCCCGTAACCTGATTGGCGAGGGCTAATAATCAGAGGGGGATGGATACCTCCCTGATTAAAGTTTCACTTTATCACATATATTTCAAAGTAGTCTTAATTTTCTGCATATAAGATTCGAGAAAAATGGTGGTACAATATATAAATGATACGAGAAGAGAGGATGTAGGTTAACACATGTACGTTACTGTAACAGAGGCAGCATATAAAAAGATTATGGATACGATTCCAAGTGAAACGAAATATATAAAGTTATTTTATGATAATGAAGGTTGCGGTTGTGTTATGAGCGGGATTATCGATTTAGTAGCCGTTTCAGAGAAAGATGAGCGCGATGTAGATATCGAATCAAGCGTACTAAACTTTATTGCAGATCGCACAAAGCTTGTATTTATGGATGATAAATTAACAGTTGATTGGCATGAAGTTGGAGGGACTTTCCAGCTGAAGAGCCCAAGCCAGTTTTATAATCCGAATATGAAGTTACATGTTCGAGTATAATGAAGAGAGAAGGAGTATATAATATGCTCCTTTTTCTTATGGATTTTTTCACATAATGAAATGTACATATAGTTGAATTTTCTGTATAATTCTCTTTGCGAAGGGAGTTGCGCATGAAAAGATGGGGAATTGAGTTATTAATTTTAAGTGTCGTTATTATTTGGGGGATTAACTATACGATTGCGAAATATGGACTTTTAGAATTTACAGCAATTGAGTTTACTGCAGTTCGAATGATGACAGCAGCACCACTACTGTTATTACTTACGTTCTTTATCGAAAAGTCACTTTATATGGAGCGAAAAGATATACCAAGATTAATCATCGTTAGCGTTGTAGGTATTGTACTGTATCAAACGTTATTTATGGAAACTGTAAAATACACATCCGCTACAAATGCTTCTTTACTCATATCTATTTCACCTATTTTTACAACTGTATTTGCGATTTTCTTGAAACAAGAAAAATTTTCTTCTCGAAAGTTAGTTGGTTCCATGATTGCCTTTATTGGTGCTACATTAGTTTTAGTAGCGGGGCATTCACTCGCTAGTTCTTTCTACGGAAATGGGATTGGACTTATTACATCAATATGTTGGGGACTTTACCCCGTTTTAGCAGGGCCGCTTATTAAAAAATACTCAGCATTACGTGTTACTGCATGGTCGACATTAGTTGGAGCGATTCCGCTCTTATTGTTAAGTGGCCCACATGTATTTGTCATGCCATTTCACATTACACACGGAATGACACTATTTGCTTTACTATATTCTGTTTTCTTTGTAACAGTATTCGGTTTAGTCATGTGGTATGTTGGTGTTCAAAAAATTGGTGCGTCACATACGATGGTATATATGTATATTACGCCGCTTGTAGCTGTTTTATTTGCAGCTGTATGGGCAAATGAATATGTATCGTTTCAACAAATAATCGGTGGAATCATCATTTTCTTCGGTCTATGGTTTGTGAAATCAGAGAAAGTAGAAGCTAATTCTATCGTGCAAGAACCTATATCAAAATAGGAAAACAGATCACCTATGATCTGCTTTCCTATTTTTTTATTTTTGGTAAAAATACGAGGAATAAAATGGCAGTAATAAGTGGGATTAAATTAAGAAAAGGAATGCGGAAGAGTGTAATAAGAATAATACCTGGAAGAAGAACACCGAGGACAGCATAGAAAATACTATGTTTTTTCGTGTACCAATACAAGGAAAGTCCAATAAGTGCAGGAACAATGAGATGAATGAGAGCCATTAAAAAATAAATCATTAGACGCCCCCTTTATTTATGTGCTTATTACATCATATCCGTATGTTCATAGATTGATATCAATTTTTTCATAATTGGACGGAAATGTTTTGAAAAACGTCACATTTATGAGGGATTCTATATTAAAAATCTTGTATAATATTATTTCCTAAAAGCATTTTAACAACATAAACATACCGTTTATGTTGTTTTTAATTTGAATGAAATAGATTTTAAGTAGTATGGACTCGAAAGAAATGAGTTATACAGCTTTTGTTGGATAAATAGAAAAAACATTGAAGGGGAGTACAGCATGATTAGTTTGTCATTAAAATCGTTTAAAGTTCAATCATATTATCTACTAGGTATTCTGTTATTAGGCTGGATGTTAACACCGTTTTCAGCACATTTTCTAGGTGCAGGAATTGGACTTATTGTAAGCATGTATTGTGTTTGGATTTTAGGAAGGCGTATCGAAAAGCTTGGAGATAGTATAGTAAAGAAGACGAAAGCACCGACGCTTGGTATGTTTAATCGCTTTGCAGCTGCAATTTTGGGTGCTATTATCATGTACGAAATTGAGCATCATATGGTAATGTGGGCATTTGCAATAGGTATTCTGGGTGGTTATTTCTTAATCGTTATTAATTTAGGGTATTATAGTATGAAGGATGAGGAAGAATTAACGAAGGGCTGAAAAAGATAAAAAGATGAAAAGCCTCTTTTTAGATGGGATAGAGGATCCAGCGGTGGATAGAAGCCGTTAGGGCCTTTTTTAGCCACATGCGGTGCGAAACCAGAGCGAGGTAGTGAGCAGGAATATAATTTATATGTTCGAAGCCGTAATTTGTATATATGAAAATCTGGTTATTTAAATTGAAAATTGGCTTACTCTGGAATAAGGGATCCGGCGATGGATAGTACAACAGGAAGCGTAACTTTGTATAAAATATAGGTTTCCGCTTCCAATAAGTTAATATGAAATCACTTAGTAGTATGACTCTTAGGTTCTTTAAAAAAGTTGGATACAATACAACAAACTATTCCGGTAATTAGAATAATGTATTTTATCGTTTCACTTGTATGCTCAAATAAATTAACGTTAAGCATTACAAGTCCCCAAAACATTTGAATCGCTCCATAACTGAATAAAATTTTTTCTGTACGAGAAAAATTTTTCATGGGCCTCACCCTTTCCTTTATATGTAAAATATATCATAGGAAACTACAAACGAATGATTATAAAATGAATTTTCTGTAAATTTATAGGTATTGGTATGCAATAGATGAAGAGATTAAAGTTTCACTTTATATAAGATATAGAAAGAAGCTTACGTTTTTAACGTAAGCTTCTTTTTTATGTAAGTGTGTGTAACCGTTAAGCCGCATTGGGTTTTGTCGGTGAGTACGCGGAACTTTACGGTTACACGACACTTATTCATATGAGTAAAAGGGTTATTCCAACAGAAAATTTGTAATTAAGCGATTGGACCGCCTAAGTTAATAATAGCTTCAGAAACGCTATCGAATTTTTTGAAGTTTTCTTTAAATTCGTTTGCAAGCTCAATTGCTTTCACTTTGTAAGCATCTTTATCAGCCCAAGTTTGTTCAGGCATTAATACTTCATCAGGTACACCTGGAACATGACGAGGTACTTCAAGGCCAAAGATGTCGTGTTTTGCAGTTTCAGCTTTAGCAAGTTCACCGCTTAGTGCTGCTTGAATCATTGCACGAGTGTAACCTAAGTTCATACGTTTACCAACGCCGTATTCGCCACCAGTCCAGCCAGTGTTTACTAAGAATACTTTCGCATCATGTTTCTCGATTTTTTCACCAAGCATTTCAGCATAACGAGATGCATCAAGCGGTAAGAACGGTGAACCGAAGCAAGTAGAGAATGTAGCTTGCGGAGATGTAACACCGCGCTCTGTTCCTGCTAGCTTACTAGTGTAACCGCTTAAGAAATGGTACATAGCTTGCTCTTTTGATAACTTACTGATTGGAGGCAATACGCCAGATGCATCAGCAGTTAAGAAAATAATTGTATTTGGATGTCCTGCAACACTTGGCAGTACGATATTGTCAATCGCATGCATAGGGTATGCAGCACGTGTATTTTCTGTTAAAGTAGTATCGTTATAGTCAGCGATGCGTGTTTGACCATCAATGACAACGTTTTCTAAAACTGAACCAAATTTGATTGCATCGAAGATTTGTGGTTCTTTCTCATGAGAAAGGTTTACACATTTTGCATAGCAACCGCCTTCAATATTGAATACGCCGTTATCAGACCAACCGTGCTCATCGTCACCGATTAATTTACGGTTTGGATCAGCAGATAATGTTGTTTTGCCTGTTCCAGATAAACCGAAGAATAGTGCTACGTCGCCTTCTTCGCCTACGTTTGAAGAGCAGTGCATAGAAAGAATGTCTTGTTCAGGTAGTAAGAAGTTCATAATAGAGAAGATTGATTTTTTCATTTCTCCAGCGTATTCTGTACCACCGATTAGTACGATACGTTTTTCGAATGAAACCATAATGAATGCTTCAGAATTTGTACCGTCAATTGCTGGATCTGCTTTGAAGTTTGGTGCAGAAACAATTGTGAACTCTGATTCGTGAGTTGCTAATTCTTCTTCATTTGGACGAATAAATAATTGATGTACGAACAAATTATGCCATGCATATTCGTTAACAACTTGAATTGGTAGGCGATAGTTGCGATCAGCGCCAGCAAATCCTTTGAAGATGAATAACTCTTCTTTTTCTTTTAAGTATTCTAAAACTTTTATATATAATTTATTAAAATGTTCTTCAGAGATCGGTTGGTTCACGGCTCCCCAAGCAATTTTGTCAGCAACCGATGCTTCCTTCACAATAAATTTATCCTTAGGAGAACGTCCTGTGTATTTTCCTGTTGAAGCAGAAACGGCACCAGTAGAAGTTAATTTCCCTTCGTTTCGCATTAATACTTTTTCCACTAATTGCGGAACACTTAATTGAATCTGTGCATTGCTTCCGTTCAATAATTCATGTAAACCAATTTGGACATTCACAGTACTCATATTTATATACCATCCTTTTCATTTAATGAAATATTTCTCGTAATCCCCATCAATAGTATAACACAATTATATAAATAATGTATACTATTTATTTATTTTTGTTTGTGTGAATGTATCATTTCCCTATTAATATTTCATTCTATGGGTATTGAGAAAAACTTTCAGGAAAATGTGAATATTAATTGACAAATGAATATCATTTCTTTAGTATAGGTTGGGACGGATACTCTCTTATCCCGAGCTGGCGGAGGGACAGGCCCGATGAAGCCCAGCAACCTCATTTGTAGTGGTAAATACAGGTGAATAGGTGCTAAAACCTGTGCGAGGCTAACGGTCTCGAACGATAAGAGCAAAGGGCAAAAAGCAGTATGCAAGTAGCAAATTAAACCTTTCCTCTATGTTAAGTAGGAAAGGTTTTTCTGTATGCTTGTGTGGGAGAATAAATGTATGTCGCAGTTTGTGGCAAATTAAGGATGAGTTCCGTACAATATATACAATTACTGTAGGGAGGTTTACCACATGACAAAAAAACGTCATCTGTTCACATCTGAGTCTGTAACTGAAGGACATCCAGATAAAATTTGTGACCAAATTTCTGATTCAATTTTAGATGCGATCTTAGCAAAGGACGCAAATGCACGTGTAGCTTGTGAAACAACTGTAACAACTGGTTTAGTATTGGTAGCGGGGGAAATTACGACTTCTACTTACGTAGATATTCCAAAAATCGTTCGTGAAACAATTCAAGGCATTGGTTACACACGCGCAAAATACGGATTCGATGCAGAAACTTGTGCAGTTTTAACATCTATCGATGAGCAATCTGCTGACATCGCTATGGGTGTTGACCAAGCACTAGAAGCACGCGAAGGTCAAATGACTGACGCTGAGATTGAGGCAATTGGTGCGGGAGACCAAGGTTTAATGTTTGGCTTCGCATGTAATGAAACACAAGAATTAATGCCACTTCCAATCTCGCTTGCTCACAAATTAGCTCGTCGTTTAACTGAAGTACGTAAAGATGACACATTATCATACTTACGTCCGGATGGAAAAACGCAAGTTACAGTTGAGTATGATGAAAATGGTAAACCTGTACGTGTGGATACAATTGTAATTTCTACACAACATCATCCAGATGTTACGTGGGAAGAAATCGATCGCGATTTAAAAGAGCACGTAATTAAAGCTGTAGTTCCAGCAGAATTAATGGATGGAGAAACGAAATTCTTCATTAACCCAACTGGCCGCTTCGTAATTGGTGGACCACAAGGTGATGCTGGTTTAACAGGACGTAAAATCATCGTTGATACTTACGGTGGATACGCTCGCCATGGCGGTGGTGCATTCTCTGGTAAAGATGCAACGAAAGTTGACCGTTCTGCAGCATATGCAGCTCGTTATGTTGCGAAAAACATCGTAGCAGCTGGTCTTGCTGACAAAGCAGAAGTACAACTTGCATACGCAATTGGCGTAGCACAACCAGTATCAATTTCAGTTGATACATTAGGCACTGGTAAAGTATCTGAAGACGTATTAGTAGAACTAGTTCGTAACAACTTCGATCTTCGCCCAGCTGGTATTATTAAAATGCTAGACTTACGTCGCCCAATTTACAAACAAACAGCAGCTTACGGCCACTTCGGACGTACTGATGTAGATCTTACATGGGAACGTACAGACAAAGCAGCTACTTTAAAAGAGCAAGCTGGTCTATAATATATGAAAAAAAGCTTTGCGCTGTGTGCGCAAAGCTTTTTTTATTTGGTTTTATCCCGCATTAACTGCCCGTAAAAGCCCGATTGGTTCAACTAATAATCAGTGGGGAAGAACAAAATCCCCACTGATTAAAGTTTCACTTTATTTACTACTTGAATGCAAGAACTACAATAAACTTCCCCGTCTAGCTCAATATACTTCTTCATATTTGTCATACCACTTGCCGGAAATGGCATATGAATCCACGCTTTTTCATACGTTTGTATTTCCTTTTCACATGATTTACAAAGGATTGGTTTCTTACGGAACATAGACGGTCACCCTTTCAGTTGGCGTGCTTTTCCTGCGCAAAAAGCACTGCTCAGTAAAAATAGCGTTGCGAATATAATACTCACTAATGTAGCGTATGGAATCGCGCCTTTTAAGGAAAAGCCGACAGTGATGGAAGCAACAAATAAGAGAATGAATGTTGTTGTTAATTTTTTGTAAAGTTCCATAGTGAGTGAACCTCCAAATTATTTTGTAATTTAAAATGAAATCTACTATGTAGAAAATATAAGGGGATTTAAAGAGTAGCTAATGGTGTGTAGTCCAGGCGTATAAGAAAGTATATTTTCCTTTTTGTGGTCGTATATACTTATATTGTATCACGGTGCATCATTGGCTTCATACATAATTGTGTAAAGATATGTATGATAAAATGAAGAAAAGTCTTTAGGAAAGGAAAGATGAAAAATGAGCGTAACTGAACATAAAAAACAAGCTCCAAAAGAAGTGCGCTGCAAAATCGTGACGATTTCCGATACACGTACGGAAGAGACGGATAAGAGCGGACAACTATTACATGAATTATTAAAAGAAGCAGGACATAAAGTGACCTCTTATGAAATTGTGAAAGATGATAAAGAGAGTATTCAGCAGGCTGTGTTAGCTGGTTATCATAGGGAAGATGTTGATGTCGTGCTAACGAATGGTGGAACTGGTATTACGAAACGTGATGTAACGATTGAGGCAGTATCAGTGTTATTAGATAAAGAAATTGTTGGATTTGGTGAGTTGTTCCGTATGATCAGTTATTTAGAAGATATCGGAAGTAGCGCAATGTTAAGTAGAGCGATCGGTGGTACAATCGGGCGTAAAGTTGTCTTTTCGATGCCAGGGTCTAGCGGAGCGGTTCGTCTTGCGATGAACAAGTTAATTTTACCGGAATTAGGTCATATTACATTTGAGTTGCATCGTCAATGAGTAAGTGTGCTGGAATTGTATTAGCAGGAGGTATGTCGAGCCGATTCGGTGAGCCGAAAGCGTTAGTAGGCTGGAAAGAAAGTACTTTTATTGAGCATATTGTGAAAGTAATGGAAAGCGCTGTGCAAGAAATTGTAGTCATTAGTCATACTGATATAAAAGAGAGAGTAGAGCAATTAGTACAAGTTCCAGTTATAGAAGATATGTCGCACTATAAAGGAAATGGGCCACTTGCTGGAATAGTGTCAGGAATGGAATACATAGATTCAGATTGGTATATTATCATGCCTTGTGATGCACCGAACGTTTCAAATGAATGGATTACCATTTTATTAGAGCAAACGAGCGATGAATATGATGCTGTTGTCCCTATTATTAATGGAAGAAAACAACCGTTACTTGCAGCGTATAATAACCGCGTGAAAGAAAAGATTTATGCTTTGCTTCAAGAAGAAAAAAGAAGTATGGGCCAGCTTTTATCACAATGTAATGTGAAATATATTGCTGGTGAAGATGTACAAGCGAATGTAGATTGGTTTATAAATGTGAATACGAAAGAAGAATATGTGCAGGCTCAAAAAGACCTTTCAAATGAATGAAAGGTTTTTTGGTATACAGAAATTGGAGAACAATTGCTGTATAAAGGTATGTGAGTAAGTTGATTTTAAGGGGATTTCGTGAACTACCCGCCACTTAACGTCCTAAAGGAGTGTTTGAAGTGAGGGCTTCTCGGTTAATCATTACTTCTGATAGCTAACGAATTCGTCCTAAGACAGCCGAGCTAACTCCCTTGTTCCAAAGGTTATTTTATTACAATTTATGCTGACGCTAAAAGACGTATTGCTTCATTTTTGATATTGATGGCTGAGTTATAATCTCTATCGAGATTTACACCGCATATACAAGAATAGATTCGTTCAGACAATGTCAGACTTTTCACATTTCCACAGCTACTACATGTTTTTGTTGATGGAAACCATTTGTCTATTTTCACAAGTTGTTTTCCTTGTTCGTTTAGTTTATAAGTTAAGAAAGAAGTGAACATACCCCACCCATTATCAGCGACACTTTTTCCAAAATGAAGTGCTTGCGATATTCCTTTCATGTTTAGATCTTCAATAGCTACAACATCAAAATTGGCAGCTAACTTTTTAGACTCATGATGAAGAAAATTCTTACGTTGGTTAGCTACTTTCTCATGTAACGTAGCTACACGAATACGTTGTTTATTCCAACGTTCAGAACCTTTATTGCGTCTTGATAATACTCGTTGCGCCTTTGTTAATCGGTCTAACATGTTACGATAGAACTTAGGATAATTGGCTTTCTCATCCTCAGAACTGACGTATAATCCATCCATTGCAAAGTCTAATCCAACAACTGTTTCTACTTCTTTTTGTACAATCTCTTTTTCGTATTCAGTCAAGATTGACACATAGTATTTACCAGTAGGTGTCATAGAAATCGTACAAGACTTGATCATATAATCTTGTGGTATCTCTCTATGTTTTTTGATACGTACCATTTTCAGTTTTGGCAATTTGATATGACCATTAAGCAACAGAATATTACCGTTTACTACATTCGTTGTATAAGACTTTCTGTCCTTTTTACTTTTGAATGTTGGGAAGTTATTTTGACCACTAAAGAAATTCTTATATGCAGTTTGCAAGTGTAGTTGAGCATTTGCAAAAGCTAATGAATCAACTTCTTTTAACCACTCAAATTCTGCTTTATATTTCGCAGGGGTTGGAGGCTTTTGTTTCTTTAGTTGTTCCTTATCATCTTTGTATTTTTCGTACGCTTCTTTTCGCTCAGCTAACATTCTATTGTACACGAAACGTACACAACCGAAGGTTTTACGTATAAGATTTGCTTGTTCTTCTGTTGGATACAACCGGAATTTATATGCTTTATTCTGCTTTGTCATATCATTTCACCTCACTTTTTACCTTGATTTTCAATATGTTTCTTTACTACGTCTATTGGCGAACTACCAGTAGTTAGTAAGCAAAAACTTCTTGACCAAAACATTTCTTTCTAAAGTTTTTTTCACTTGTGGAAAGTCTTTCTTGATTAGTTGAGAACTTGCACTTTTATAAGCATTAATAAATTTTGTCATTTCTTTTTAAGCGAGTAATTCTGATGCGGTGTGCTGAAGGACTGTTTCATATTGGTCAAGTAAGCCACGAAAGATTCCATCCCAGCTTTTTGATTGGGCATAAGACGAAGCAGCTATACCCATTTGCATAAGCTGCTCTTCATTTTGTAATAAAGAATGGATGGATGATAGAAATGAATCTGCATGTTTGGGTGGGCAAAGAACTCCTGTTTTTCCATCTGTAATAATATTTTTAACTCCGCCACTATTTGCACCAATGACAGGTGTACCGCATGCAAATGATTCAAGTACGACATTTCCGAACGTTTCAGTAGCTGATGGAAATACCATCATATGAGAACAAGCATATGCTTCAGCTAAATCCGTACCTTGTAAATATCCAGTAAAAGTGACATTTGTTTTCGGAACAGCTTCACGCAAATTTGTTGCTAGAGGCCCGTCTCCAGCGATGAGCCAATGAATATCGTCACGTTCTTTGTTTGTCGTTTGAATAAGTGTTTGAAGAGTATCGATATCTTTTTCAGGAGCAAGGCGTCCGACGTAGGAAAGAATATGTTTCGCTGTAATATTATATTTTTTTCGGAATAGGTCTTTATTGTAAGTTGGATGAAAGAGAGTACAATCTACACCACGTCCCCAAATATAGAGCTGTTGAAATCCTTTTTTCTTTAATTGATGTAATGTTTCAGGAGAAGGAACAAAGTTTTTTTGCATATGACTATGAAACCAGCTTAAATAGTTCCATAGCATATTGGAGAGAAATTCGATTTTGTAATAGCGTAAATAGGCGTCAAAATCAGTATGATAAGAACCGACAACTGGGATATTTAACTTTTTGGCATAATATAACCCACAAAGTCCCATATTGAAAGGCGTAGCGATGTGAATGATATCAGGTTTAAAGGCAAGAAGTTCCCGCTTAATGCGCGGAGTAGGAAAGGCAAAGCGACATTCTGGATATAATATTGTTAACGGGATACTTCTCATTTTGTTCACATTCGCTACGAAATTATCTTCTGCCGTATGCTGAGGGGCGAAAACAGAATAGGCGATATTTTCTTTTTGAAAATATTTAGTTAAACGTTCCAAAGTTTTCGCCACACCGTTGACTTGTGGTGTAAATGTATCGGTAAATATGGCGACTCTCATCATATCGCTCCTTTACATGAGTGGAATGAGTTGATAAAAAGAGATGATGCCAGAGCAAGTGCCAAGGCACATGCCTACAAATACATCTGACGGATAATGAAGCCCTAAATAAATACGGGAAATACCGACGCATAATGCTAACGGTAATAGAAAAGCAAGCAAGTTTGGATTATAGCAAATAAATGGAATGAGGACAGAGAAAACAGCCGTTGTATGTCCGGATGGAAAAGAATGATCTTTTAATGGATGAACTGGATATTTCGCATCCTGAATTGTTAAATAAGGGCGTTTTCGTGGATACCATCTTTTTAATATTTGCACAGGAATATGGCTAATTGTTAAAGAAATAGCAGTTGCAATTGCAGCTTGATGCAAATTCCCTGTTGCGAAAATTAAAAAGAATAGGGTAAGTGCAATGGAAAAACTTGCACCACCGATATGGGTAATATTGCTGAAAAAGATGTTTAATGTTTTTTGATCAAAGTAGCGATTAATTCCTTTGAAGATGTAACATTCTATTTTATATAGTCCACTGACCTTCATGAGATTTTTCCTCCCTCATTTACATATATATGGAGTTATTAACTTCTATTTTAATAAGATTTTATTGAGGGAATAATAATGTTTTGTAAAGAAAAAGTTAGATTTTTTCATAAAAAAAGCTGCCAGTCTTATGGCAGCGATAAAATCACTTTTGTAACGATTTATAATATTGTCCTTTTTCAACGTATTGTGTGCGGATACGTTCCATATCTTTACGGTCTTCTTCTGTTAATTCGCGAATGACCTTCGCAGGGCGACCGAAAGCTAACGTATTAGGAGGGATTTTCTTTCCTTGTGAAACGAGACTACCAGCACCGATGAAAGCTCCTTCACCAATTTCAGCGCCATCTAATATGATAGATCCCATTCCGATTAAAGCATCTTTTTCAATTTTGCAGCTATGTAAAATAACTTGATGCCCAACGGTAACGTCATCTTCTAAAATAAGAGGATACTGGGGGCTTTGGTGAAGTGTACATTGATCTTGTACATTTACTCTATTTCCAATTATCGTTGGTGATACATCACCGCGGATGACTGTATTAAACCAAATGCTAGATTCTTCGCCAATTGTCACATCGCCTGTAATGGTAACATAGTCAGCGATAAAAGCACTACTCGCAATTTTCGGATTTTTTTCTTTGTAAGGATATATCATGTAAAGCCTTCCTTTCCTAGAGACTAGTTTTAGTGTATCAAATTATGAGAAAGAGTGAAATGGAGGAGGTCCGTATGTGGAATTATGAAGCAGAGGAAGCGAAAGCTGTAGTCGTTATCGTGCACGGCGCAATGGAATATCACGGACGTTACGAAGCTATAGCAGAAATGTGGAATCATATCGGCTACCACGTTGTGATGGGAGACCTTCCATCACATGGAACGACTTCGAGAAATAGAGGACATATTGATTCATTTGATGAATACATAGAGGAAGTTAAATTATGGGTGAAAGAGGCAAGGAAGTATCGATTACCTATTTTTATATTTGGTCATAGTATGGGTGGTCTTATTGTCATTCGTATGATGCAAGAAACGAAGAGAGAAGATGTAGATGGTATTATTTTAAGTTCACCATGTTTAGGTGTATTAACTGGACCTTCTGCTCCGCTTCGAGCTGCCTCAAAAATATTAAATGTTGTTGCTCCAAAATTGCAATTTGCAACGAATCTTACAGTGGAAATGTCAACTCGTAATCATGAAGTGAGAGATGCGATGGAGAATGATTCATTGTTCTTGCGCAAAGTATCAGTACGTTGGTATAGTGAATTGATTAAGTCTATCGAAATTGCGCATAAAAAAATAGGCGATTTTCCAGACGTTCCACTCTTGCTAATGCAGGCTTGTGAGGATAAACTTGTAGATAAAACACGTGTCCGTACTTGGTTTGATAATGTTAAAATAAGTGATAAGGCTTTTAAAGAATGGCCGAATTGTTATCATGAGTTATTAAATGAGTATGAGCGTGATGAAATTTTGAATTATATTCAGTCATTTACTGAAATACGCATCAATAACATAATAGAAACAAATAAGTAAATTATTTGTACATTGAATAGAGGAGATGAAGGAAGAAGTGAACGTACCGAGTAATCCCATAACGCTCATGGCGAAAGTATACCGTGATGTGTTTCCGGTAGTACACCATGAGCTAGCGATGTGGAAAGAGCGTGCCTACCATATTCCGAATGACGAGCTTCATAGTCAGGCAATCGCAAGTATTGAGAATAAAACGTTTCATTGCGAGGGCGGTGGCATTTTAGCGCTACTAGCAAATGAACATCGTGAGGAATGTATTCGTTTTATCGTAGCTTATCAAACGATTAGCGACTATTTAGATAATTTATGTGATCGTAGTACATCACTTGACCCGAAAGACTTTGCGGCTCTTCATGAGTCGATGGTAATGGCATTAAGCCCTGAAGTTGAAGGGGGCGGTAACTATTATCGTTATCGTGATGATCAAGATGATGGTGGTTATTTAGATGAGCTTGTTGAAACATGTCAGGATGTTTTAAAGAAAACGAAGCATTATGATAAAATTGCTCCAATTCTTCATGAACTTGCTTGTTATTATTGTGATTTGCAAATTCATAAACACGTGAAGTTAGAAGAAAGAGAACCACGCTTAAAAACATGGTTTGAAGCGCATAAAGAAAATTTACCACCGATGAGTTGGTTTGAGTTTTCAGCATGTGCAGGTTCTACGCTTGGAATTTTCTGTCTTGTCGCATATGCATTTCATGATGAATTACATGATGAAGATATTGCGAAAATTAGACAAGGATACTTCCCTTACGTACAAGGACTTCACATTTTACTTGATTATTTCATCGATCAAGAAGAAGACCGTATAGGCGGGGATTTGAATTTCTGTAGTTATTATGAAAACGAGCAAGCTATATTAGATCGTATGAAACATTTTGTAGAAGAAGCAGAGAAGAGCATTGGTGATTTGCCTCATGCGAAGTTTCATCGTCTCATAAGCCGAGGATTACTTGGTATTTATTTATCAGATCAAAAAGTATCAGCGCAAAAGAATATGCATAAAATGGCACGGCGCATTGTAAAATACGGAGGGCTCACTTCACGATTCTTCTACTGGAACGGGAAGATGTACCGAAAGAAAATGGCGCAGTGATGAAGAGACCATCTCAAAAGGTATTTTGGGATGGTCTTTTCCTATTGTCGTATTTTGTAGTTAAGTCGATATAATTTCATGTGCTGTTGCAATCTTGCTCCATAATGAAAAAGGGACAGTCCAAAATTAGATTTTTTGGACTGTCCCTTTTTACTATGATCGTATTTTTAAAAATACATATATGAGAAGTTCAATCGCGAAAATTACTAAAACAGATAGTCCGAATAAAGGCATGACAGCGCCTAGTATAACCATCATGATAAAGAAAACGATTATGCTTTTCTTATCTCTTTGCTTTGGCGGTGCTGCTAATTTTCCTTTTGGCTTTCTTGCTAACCACATTTTTATTCCGTAATAAATGAGAAGTAATAAAGATAATGTCGTTAGTAAGCATAATATTTTATTTGGCCATCCGAATAAATGTCCTTCGTGAAGCGGGATACCGTAAGTGAACCATTGTGCAAATAATCCATAATCACGATAGTCCGTTTTCGAAATGAGTTCTCCGCTATATTGATCAAAGTAAGCTGTTATTTCTTCATTTGGTGCGACATGCATACCCGTAATACCGGAACCGCTTGATTTCGAAACGGTGAATACACCTTTCGGATCAGCCGGTAGGGAGATAACATATGGCTTCTTTATTTCAATTCCTTTTTGTAATTCATCGACAGAAATTGCTTTTGGCTCATTTGAATTCGATTCAGGAGGAGCTTCTTTTCTTGTTGCCCACGGCAATTCTTTTACCTTTGATTCAGGCGGTGCCATATACAACTTTGGATATCCAATCGATTCATTCGACGATGCGATTTTATAAATTTGGTTACCCATAAATCCTGACCAAGGCAATCCAGATGCGACTAGCAGAACGAGTGGAATTGTAAATATAATACCGATAATAGAATGACGTCGTTTTGCTTTTTCTCGCTTATTGGATGCTGGTGTGTTTTTGAATTGGCGTATACTCATATACAACCCAGTCACAATTAAAAAGATTGTCCAGCACGCTGCAAGTTCTACAATATAATTGACGACAGTGCCACCGACTAAAAGGGAACTATGTAATTCCCGCATTATATTGGAAAACGTTTCAGCTGTATTTTGATCCCCAACAATTTGATTATTGCTATCTAAATACACAAATTTTTGCTTTCCTGTATATTCATTAGCAATCGTAAGCCTAGTATTATAATCCCCATTAAACTCACTAATTTTCGTCACGCTATAATGCGGATATTTTTTCTCTGTTAAGAAAAGCGAATCAGCCATCGAAATAGATTCTGTTTGGGCGCTCTTCCCAAAATATAAATCTTTATAGATGAAATCTTCAACTTCCTCACGAAATAAATACCCAATTCCGCTCAGTGACAAAGTAATAAGAAGCGGCGTAATAAAAAGCCCGGCATAAAAATGCCAACGCCAAAAAATATAATGAAGCGAACGATTTTGTTTCATACGACAGTTCCCCTAACCTTCCCTATATGAATAAACTACTTATTTAATATAGCGAGATTAAGTGTGATTACTTTGAAAGAATTGTGAAAAAAGTTAAAAGTTTTGGGGATTTTTATCAAACACAAAGCACATTTCAAAAGGGATGTGTTTTATGTTTATGAAGACCTTTTTATATTTGATGCATAGATCCTAACAACTGCTTCCGCAAGACATTGAGCGGAATCAACAAAGTCCTCACTCACCACCACATTCAAATCGGTACATGCGATAATAGCGGTATCTACTTCGTCTTTTAACTGTAAAACGAGCAGACTCCATAATTTGCGAGCTTCCTCAATTTCTCCGCCTTTAATACAAGTAATAATTTGATTAATCATTTCTTGCCATTGTTCATTATGAATGTACTCTATATTACGTTTTGTAATCCCGTCTTGGTAAATACCTGCTTGAACAGTCGCTTCTGTTGCAAGAAGAGCGACTCTTTTTGTATTTTCAGGAATTGCTTTTAACGTCTCATCGACTATATTCAAAATAGGAATGGTAATAGAACGCTGTAATTCTTCGAAATAAAGATGCGCTGTATTACATGGCATTGCGATGAAATCTACACCAGTACTTTCAAGTCTTTGCGCTCCTTCAATAATCGCTTTTTTCATCGCCGCATGATCAATAGGGCGATCCATATAAAACGGTGTTGGGCACGAATAAATCATCATATGAGGAAAGTCCATATCATGCTTTGCTCCATATATTGTTTGGCACCCTGCTACAACTGTATCGACGAATGGTCCGGTTGATTTTGGACCCATTCCTGCTAGTATTCCAATCATTGTTTAGTCCCCTTTAATATGTAATCAAAAACGTTTTCTGCGAACGAGAAAAAGAAATAAAGTAAGTAATAAAAATGAATATACGATAGCCCAAGCTGTAAGCCATAGAGTATGAGTAAGTATATTATATTTGTATTGTCCAAGCATTATAAAGCTTTCGACAGGTGGTAAAAACCATAGTAACTCTTTTTTTAACCGGATTGAGGCAATCGTCATAAGTAATATAAATGTAAGACTAAGCCAAGCGGTGCTTGCTTTTTGAATAATATTTCTAGTAAATAATGTTGCGATTGAAATTCCTAGTATAGATAAAGACATGTGAGCGAGAAATCCGACTAGAAATAAAGAAGCTGTCATTTTTTCATTAAACATTTGCAAAATAATTGGATAGATTACGGATATAAAAGATAAAACAGTACAAATAAGAAGAGCCGTAACATATTTACCAACGTATAATGCCGAAATATTATTTGTATGTGAAATAGTGATTTGTTCTTGTACAAGATCTTCAGTATGAAAAATTGTGACTGTAATCCAGGCAGATAATAAATAAAGTGCGAGTGCTGTTTCTAAGTATGTCGGTACAATGGGAGTTGGTTTATATGTATATACAAAAAGTAAGCTTATGCAGTACATCGCAATAGGAGGAATGTACTTATACGATTTTGTGTAGTCGAGAAAATGGTAACGAATAAGCGCAAACATAATAAACCTTCTTTCGATTTAAAAATTAGGCTGTAGCAGTGTGATAGATGCTTTTTTATGTAATAAAAACTGAAGCATTTCGTTTGTATATTCCTTTTCGATTTGTAATTGAATGAGGTTTTGATTTGAATTGTGTGTAACTTGTATAAATCCTGATTGCTTTTGTAGTTCTATAGATGAAAATGTTTCGTGAACAATTGCCTCAATATAGACCTGTTCTGTTGATTTTTGCACAGAGGTATCTTCCGCAATTGTATGATTTGCTAATGTCACAATTCTATCGGCGAAGTTTTCTAGTAGTTGTTTTTCGTGACATGTGAAGAGTATAGATATGCCTTGTTGTTTTAATGAGAGTAAAATATGCTCTAGTTCTTGCTGAGAGTTAGGGTCGAGTCCGGAAAGAGGTTCATCTAAAATTAATAAATGGACGTCTGTAAGTAACGCTTGCATAATGCCTGTTTTTTGTTTCATACCTTTTGAAAAGTTACGTACAATGGAATGCTTTGCATGATGTAAATGAAAAGATTCAAGGAGGATCGGAATTTTATTTCTTAAATATTTTGTTGATAAACCGTGAATGTGGCCGAGATGATATAAATAATCCTCTAATGTAAAACGAATTTCTTCTGGAAAATGTTCAGGAACATATCCGATTTGTATATGTTCTTTTCTTTGAAGTGTCCCTGCCGTAGGTGAAATAAATCCTGCAATTATTTTGAGTAGAGTACTTTTTCCAGTCCCGTTCCCGCCAATAATAGCGAGTGCTTCTCCTTCTGGAATGGATAAATCGATGTTGTCTAGTATGAGTGATTTGCTGTACTTCTTTTGAATTCCCCTTAGTTCTACTAGCAAAGTTCTATCCTTCTTTCTTCAAATATTGTATACATCCATTATATAGGAGAATAAGAAAAAGACGTGCAATGAGTGCACGTCTTTTTTTGTTATGTTTTGTCGAACAATCGATATATCTGAAAAATCGTTGATATAATTATCCCGCTATTTGCCGGACAGTAAACCTCACTGATTAAAGTTTCACTTTATCGCTTCTCAATCGCCACAATAAATGGTGGGTTATTTTGCTGGTTAATGAAGCCGTATCGCAAAACGTGAGCTTGTTTTTGGTCGAGTTCTTCTGCAAATTTGAGAACAGCGTCGCGTTCTACTTGTCCTTCTGGATGTCCGTGGTAAATGACAAGGACGATGATACCTTCAGGTGCCATTACTTCTAATAATTGCTCGATCGCCGAGATTGTTGAGTTCGGTTTTGTAACGATATGTTTGTCACCGCCAGGAAGGTAACCTAAGTTGAAGATAGCGCCTGTTACTTTTCCTTTTGCTTCTTCTGGTAGTACGGATAAAAGTGTATCGTGACTATCGTGAACTAAAACAGTGCGTTCGAAAAGTTCTTTTTCTTTTAGGTGGATAGTAGAACTTTCGATTGCTTCTTGTTGTATATCAAATCCAAATACTTTTCCGTTATCTCCAACGATTTCAGCTAGGAAGCAAGTGTCATGCCCATTGCCGAGTGTTGCATCTACAGCGTAATCGCCTTCTTTTACTGCCGTTTGCAGCAGCGAGCGAGCAAACGGTAATACACGTTCTAATTTCATTTTTGTTTCTCCTCATTTGCATATTTTCCTTGCCAGCTTCCGCGGCGTACAAATTCTGCATCGATGGAATTTAATACTTCCCATTTATTTAAGCTCCACATTGGACCAATCATTAAGTCAGGCGGACCGTCACCTGTGATGCGGTGCACAATTACGTCTTCTGGAATCATTTCAAGTTGGTCAACAACGAGACTTACGTAATCTTCAAGAGAAAGGAATTCTAGTTGTCCTTTTTCATATTGCTTCACCATTGGTGTTCCTTTTAATAAATGAAGTAAATGAATTTTAATTCCTTGTACATCAAGCTTCGCTACTTCACGAGCTGTTTCCATCATCATGTCGTAATCTTCAAGTGGAAGACCGTTAATAATATGAGAGCAAACTCTAATGCCATGTTTGCGTAATTTATTTACGCCTTCCACATAAGATGGATAATCGTGAGCACGGTTAATAAGATTTGCAGTGCGTTCATGAACAGTTTGTAGTCCGAGTTCAACCCAAAGGTATGTACGTTTATTTAAGTCCGCTAAATATTCAACGACATCGTCTGGTAAACAATCTGGACGAGTTGCGATAGAAAGACCGACAACGTCTTTTTCTGCTAGAAGCGGTTCGAATTTTTCTTTTAACACTTCAAGTGGTGCATGTGTATTTGTGTAAGCCTGAAAATAAGCGATACATTTTCCGTCTTTCCACTTTGAGTGCATTTTTTCTTTCATTTCATGATATTGCGTTATAACATCATCGCGGCGATCGCCAGCGAAGTCACCAGATCCAGCAGCACTACAAAACGTGCAGCCGCCGTAAGCGACTGTACCGTCACGGTTCGGGCAATCGAAGCCAGCATCTAATGAAACCTTAAAGATTTTTTCACCAAATTCATTTCGTAAGTGGTAATTCCATGTATGATAACGTTTATTGTCATTTGTATATGGAAAAGGGTTTTGAACCTTCATTATTTTCCCTCCTAAGACGAGTCAAAATGAAACAAAATCCATTATAACATACTCATAAGGTTCATATGGAAGGGACACACTAAAGGGGAATTGAAGCAAGGAGGGACAATTATGGCAGAGCGTCAATCACTTGAAGAGTATATTACACAGGCAGAACAAGCGGTGGAATATGCGAAAGAACAATTAGACCAAGGCATGAGACAGGAGCATTACAATACGATGGAGTATTCGGATGCTCAGTTGCAATTAGAACAAGCATATAATGATTTACAAACGATGCAACAACATGCGAATGATGAGCAACGTGAGCAATTAAATAGAGCACGTATGGCAATTCGCCAATTGCAACATCAAATGATTATTACACCGCACTAATAAGGAGTGAATGTAATGGCGAAACGTTCAGATCAAAACAATCCAGAGCAAAAAACGCAAAACGGACATAACGCTGAGTTTTCGAATGAACTTGATCCAGTTGTTCAAGTGAAACAGCGTAATAGTAAAAAAGGACAACCTCAAAGATCAAAGCAATCAGAGTAAACCAGGTCGCATTATGACCTGGTTTTATTTTGCCTCGTGATTGGTGCGGGCTAATCAAGGGGCAAAATAATACTGATTAAAGATTTAATTCATTATATTACAATAATGTAAGATTACTGTAAGGTTTGCCGATAGTTACGATTGTCTTTTTCCAGTAGAGTAAGGGAGGATTTAATTTTGGACAAGGAGTTGTATTATGGGGAACAATATTACAAACAAACGAATTGATGAGTTAGATTACATTCGTGGCTTCGCATTACTGGGGATTATTTTAGTAAATATTCTTGCACTACTGAATATTAAAGTTCCAGATCCTAGTACAGTGGATGCAAGTTATCAAAGGTTTCTATACTTATTTGTAGAAGGTCGTTTCTTCTCAATCTTTTCATTCTTATTTGGAGTAGGATTCTATATCTTCATTACAAGAGCAATTGCGAAAGGGAAGAATGGATATGTTCTATTTCTACGCCGCTTAGTTGCACTATTTATTTTTGGTTTGATTCATCAAATGTTTCAGCCTGGGGAAGCACTAGCGTTATACGCAATTTGTGGATTAGTCACTTTACCATTTTATAAAGTGAAGAAGCAAATAAATTTAGTAATCGGTCTTATTCTTACGATAGTTTTTAGTATTATGGCAGCTAAGGAATTATTACCATTAGGTTTAATTTTATTAGGTCTTGCTGCAGGACAGTACAAAGTATTTGAAAATCTTTCAGCGAAAATAAAGCAAGTCGCTATTTTTACAGGAGTTATGTTTGTTTTAAGTGTGATAGCTTTATGGTATCAATATGGGCACGTTCCTGCTAATCCATTTGTAAATATGATACTTATGACTGAAGATGGAACAATGGACGCTGCAGGTCAATTCTTAAAAATTGGTGTTACAGTTGGACCAATTATTTCAGCTTTCTATGTAGGAGCATTAATTTTATTACTTCAGTTAAAACCAGTTCAAGCATTGTTAGCGCCACTGAAATACTATGGTCGTATGGCTTTAACAAATTATGTTGGACAAACTGCAATGATTTTAATTGCAGGCAGTGTATTTAATTTTGCAGGAAACTTAACGTACATGCAGACGCTATATGTGTGTATCGCAATTTATGCAATTCAAATTATTTTCAGTGTGATTTGGATGAAAATCTTTAAAATGGGTCCATTAGAATGGATTTGGCGTGTTATTACGTATTGGACGGTAACGCCTTTAAAGAAATAAGAGTAGGAAATAGGCTGTTCCACATGCGAAACAGCCTATTTTTATTGTCATCTTTTGTCGGTAAGTCGATATATTTGAAAAATCGCTGATAAAATTGCATTTACTAATGAAAAAACTAAGCTTACATGCTCAGTTTTTTTTCTTACAAAAGTGTAAGGTAATTGTAATGTTAATAAATAGCAGTTCACCTCCAAAAGGCGCAAAATAGGTAATGGAAAAACAAGCGTAGGAGGATATATATGACGAAACCAGTTGTAGACGTGAAAAACGTTCAAAAAGTGTACGGTAAAAAAGGTGAGAACCAATCACACGCGTTAAAAGGTGTTTCATTCTCAATTCAAGAGGGTGAGTTTGTTGGTATTATGGGACCATCTGGTTCTGGTAAAACGACATTATTAAATGTAATTTCAACGCTTGATAAAGCAACGGGCGGCGTTGTTGAAATTGCGGGTACGGATATTACGAAAATGAAGCAAGGTGAGCTTTCTGATTTCCGTTCACAAAAATTAGGATTTATCTTCCAAGACTTTAACTTATTAGAGAACTTATCTATTTATGAGAACATTGCACTTCCACTTTCCCTTCAAGGTGTTTCATCACGTAATATTGGACCGAAAGTAGAGAAAGTAGCGGATATGTTAGGGATTACAGAAATTCTTCAAAAGTATCCAACTGAAGTATCCGGTGGACAGAAACAACGTTCAGCAGCAGCACGTGCACTAGTGCATGAACCGGCAATTATTTTAGGAGACGAGCCAACAGGAGCTCTTGATTCGAAAAATGCAGCAAGTTTACTTGATGCGATGACAAACTTAAATGAAGAACAAGGCGTATCTATTATGATGGTTACGCATGATCCATATAGTGCAAGTTACTGTCAGCGTATTTTATTCATTCAAGATGGTGAGCTATATAAAGAAATTCACCGCGGTGGTACGCGTGAAGAATTTTATAAAGAAATTTTAGATGTGCTTGCGGACTTAGGCACACAAAAAGCGTAAGAAGGGAGGTCTAGGGCATGTTATTTAAACTTTCCATGTCAGGGCTAAAGAGTAAGCTGAAAGATTATATTGTCTTACTTGTTGGTCTTGTCATGTCGATTTCAATTTTTTATATGTTCCAAACGTTAGCGCTGAATAAAGCGTTTATCGAATCCAATTCTGTTATTAAGTCTATTGGTTTCGTATTCCAAGCAGGTTCATTTTTATTAGCAATTATAACGTTCTTCTACATTTTATATGCAAACTCTTTCTTATTATCTCTTCGTCAAAAAGAGTTTGGTATGTACATGATGTTAGGTGCAAAAAAGCATAAAGTTACATTACTTATGTTTATTGAAACGATTGTATTAGGTGCTGCGTCTCTTGCGATTGGACTTACAGTTGGTGTAGGACTTGCAGAAGGTATCGGACAGTTATTAATGAAACAATTAGAATTTGCTGGTGAAGGCTATAAGGCATTTTATCTACCATCTATGACTGTTACTTGCATTTTCTTCTTTGCATTATTTGTATTATCAGCAATTATGAACAGTATTAAGTTATCACGTATTTCAGTACTGCAACTTGTACATGCAGATGCACAAACAGAACGTGTTGCGGTAAAAGGAAAAATGACAGGTGTCGTTGCAATCCTTGCTGTTATTTTATTAGGAATTGGCTATGCATCAATGATTTACATGGAAAAACTAAGAGAAATGGGAATCCTTATTGCATTAATTACAACAACAACTGGTACTTACATGCTATTTGGATCACTTCTCCCAGTTATCATTAGAAAGTTAAAGAGTAATAAAAAGCGTAGTGAAAAAGGACTTAATGCTTTTACGTTTGCACAATTAAATTTCCGTATTAATAGCTTAACGAAAGTACTTGCAACAGTAGCGATGTTAGTTGCTCTTGGAGCGGGTGCAATTTCAGGTGGTATGGCGTTTAAAAATAACGTTATAAAAATGGTTGATGGTTTAGTAATATATGATTCAGTTATTCATAACCCAACAGCTGAAGAAAAGAAAATTTTAGATGGTATTACATTTAAAGAGAAAAATGAATATCGTTACAAAGTTGATAATAAATACGTTTATTATGTAAAAGAAGATTTAGAGAAAAAACCTCCTTTCGTACAAGATACAAAACAGATGGAAAGTATGAAGGATTTAGGAAAAACAAAGAAAGTTTCAGAGGAACTACCAGTAGGTGCAGTTTCTAGAGAAATGAATGAAAAAGATGCGAGTGCTAAAGAACTTCCAGAAGAATGGAACGAGGCTTTCAGAACAATTCAGCCATTTTATCTATATGAAGATCATGCAATTAAAATTGTAGATCAGAAAATGTACGATACTGTAAATGGTAAAGAAGGAATCGTAGTTACTGGAAAAACAGACGATTTTGTAGCATACACAAAAGAATGGAAAAAACTTGACGAGTTGCAGCTAGTTAAATATAAAAATGTAAAAGCTGAAAGATTAGATAGTAAATATCAATCTTACAATGGATTCTACGGCATTGCGAGTGGAACAGTATTTATGGGATTCTTCCTTGGAATTGCATTCTTAGCAATGATGGCAAGTTGTTTAATGTTTAAAATTCTTTCTGGGGCATCAAAAGATATTACGCGTTATCAAATGCTGCGTAAAATCGGTGTGCGCCGTGAATTATTAACGAAATCAATTTATAAAGAGTTATTCTTAGTATTCTTATTCCCGGCAATTGTGGGTATTGCTCACGTATTAGTTGGTATGAATATTTTCGGATTTATTTTAGTCGATCCGTACTTCCGCATTTGGGTTCCAATCGTAATTTTCGTAGTAATTTATGCGATTTATTACTTCATTACAGTTCAATTGTATAAAGGGATTGTTCTTCCGAAAGAAGATTAATGAAAAACCGAGCGAAACTGCTCGGTTTTTTATATTTTCAGAAAAAACACTTTAATGTTACTGAAATATATAATATAATGTAAATAAGTAACATAATATGTGAGTATCAGCAACTGGAAAACGAAAGGAGGAATTCGATCTATAGAAAGAAGCTGAACTTGTATCCCCTATTTTTCTACCCACACTGTATTTGTACAATCATAAATTTCCAAATGCAAATTGCCTGGACCTAAGCAGTATATTGTGAAAATAATGGATAATACATGGAAATCTCATAGCATGATAAGGTAAAATGGACAGGATAATGAGAAAATTACCGTTCATAAGATGAGGAGATACATGTATGGAAGTTGTAGAGGCATTAAAAGATATAAACCAAATTGAGGCTATGAAAAAGTATTTAAAAGAGCACTCGCAGCGAGATTATCTTTTATTTGTTATTGGGATTAACACTGGATTAAAGATTACTGAACTATTGAGTATTAAGTTTGAAGATGTATTAAATGAAGATGGAACTGTTAAAGAGTTTTATTCTCTTCCTGTGAAAGATGAAAAGTTTAAACAAGATATTTATTTAAATACAAAAGTAAAAGAGGCGCTTTTAGCGTACGTACAATCTTTTGATATTCAAAGAGAAAACTACGTATTTCAATCTAACAAAACAACAAATTCAATCTCACGCCAACAAGCGTATCGTGTTATCCATAGCGCAGCTGAAGCGGTCGGAATACTTGGTAAGATTGGAACGAACTCAATGCGAAAAACATTTGGATTTCATGCGTACAAAAGAGGAATAGCGATTGCGCTATTGCAAAAGCATTTTCATCACGCAACTCCATCAGAAACGTTAAAGTATTTAGGAATCTTAAAAGATGAGAAGTTTAAAACAGAGATTGATGTAGATTTGTAAAAGGAAAGAGCCGTAACTAAAAAATTAGGAGGCGGGAATATGAATATTAGAGAGAGTGAACTACCGGGCATTGGATGTAAATTTGAAGTGATAACAAAGGGTAATGAAAAAATGGTTATTGTTATTCATGATGATGGAAGAAGGGAAATGTACCATTTTGATGCGGATCACGATGAGAGTATTTCAAGCATTTCTCTTCGTGATTCTGAAGCGAGACAAATTGCGGCTATATTAGGCGGAATGGTATATAGGCCGCAAGCGCTAGACACAATTGAGATGGCTTTTGAAGGATTATCAATTGAATGGTTTAAGGTGGAGAATAATGCACCAGTAGTACAACAAACAATTGGGAGCTTACACGTTAGAAAAACATATAACGTAACAATCATTGCTATTTTGAAAAAGAATATGAAGAAATTCTTTAATCCAGGTCCAGACTCTATTATTGAAGCTGGCGATATGCTCGTATTATCGGGTGAAAGACATGAAGTAAAAAGAATTATTAATGAATTGCTTTCAGCAGGAGGTGATTCCTAATCGATGGATACTTTAATTTTTGAAGTTGGAACTGCGTTAGTATTAGTAGCTTTTGCAGCTATCCTCGCTGCGAAGTTAAAGTTCTCGATTATTCCGTTTCTCATTATACTCGGTATGTTAGTGGGGCCTCATGCCCCAGATTTAGGACTTATCGATTTAAGGTTTATTGAAAGCGGAGAAGTTATTTCCTTCCTCGGCCGTGTTGGCGTCATATTCCTCTTATTCTACTTAGGTTTAGAATTCTCAATAAAAAAATTAATTAAATCAGGAAAGTCGATTGCTTTTGGGGGTAGTGTTCATATATCGCTTAATTTCATATTAGGTTTACTTTATGGATATGTTATGGGTTTCCCCTTATTAGAAACATTAATTATTGCTGGAATCATTACAATCTCATCGAGTGCAATTGTTGCAAAAGTAATTGTTGATTTAAGAAGATCTGGTAATAAAGAGACAGAACTCATTTTAGGGATCATTATGTTTGATGATATCTTTTTAGCTGTATATTTATCAGTCGTTTCAGGATTAGTACTCGGAGGTGCAACATCATTTGTAGGTGCTCTTACATCCGTACTAATCGCAGTAGGATATATGTTACTATTCTTTGTAATCGCTAGAAAAGCTACGCCGTTTCTAAATAAAGTATTAGATATTTCGTCAAACGAAATTTTTATTATCGTAATATTCGCTATTTTATTCTTTGTAGCAGGATTTTCAGAAACAATTCATGTTGCGGAAGCGATTGGGGCTTTATTGTTAGGACTCGTCTTTTCAGAAACAGAGCATAGTGATCGAATCGAGCATCTCGTCGTCCCGTTTCGTGATTTCTTTGGAGCTATATTTTTCTTCAGCTTCGGTTTAAGTATAGATCCGTTTTCGCTTGGAGGAGCAGTATGGTTGGCATTAGGAGCAGTTTTCATTACTCTCATCGGTAATTTTACAGCTGGAATGATTGCAGGGCGTAAAGCCGGGTTATCGCATAAGGCTTCTGCGAATATCGGTTTAACACTTATATCACGTGGGGAATTCTCCATTATTGTCGCGAATCTCGGAATAGCGGGTGGCTTAATGGCAACGATTAAACCATTCTCAGCTTTATATGTTTTAATATTGGCGTCGTTAGGGCCATTATTAACGAAGGAATCTGGGAGAATATACTCTCTACTAGACAAAATATTTAAATGGAGTGCTAAAGAAAGTGCAAAACGTAAAAAGGAAGTTGGATAACCTTCCGCTAGGGAACGATTCCATTATTTATAGATTAATATTTTACAAAGACGATTAGAATACATTCTAATCGTCTTTGTGCTATTATCTGAATTTCATCAACTATTAATCAGTTGCCTTCCTATTTAAAAACGACTACAATTTTACTGTTATATATATGAACGAAAAAAAGAGAATAGGAGGGGAGACTATGCCAAGGAAAGTATGGCTATTAGTAGCTGGGATGATTATTAATGTCACGGGTGCTTCTTTTTTATGGCCTTTTAATACAATTTACTTACATGATAGTTTAGGGAAATCTTTATCAGTAGCCGGAATGGTATTAATGATCAACTCGCTTACTGGTGTAATCGGAAACTTGCTCGGCGGTGTTTTATTTGATAAATGGGGCGGTTATAAATCAATTTTAGTAGGGATTGTCATTACACTTGTATCGATTTTAGGTCTTGTATTTTTCCATGGTTGGCCATTATATGTTGTGTGGCTAGCATTAATCGGATTCGGTTCTGGAATGGTATTTCCATCGATGTATGCGATGGTCGGTACGGTTTGGCCAGAAGGCGGGAGACGAGCGTTCAATGCAATGTATGTTGGACAAAACGTTGGGATTGCGATTGGAACAGCGTGCGGTGGATTAGTTGCGTCTTATCGTTTTGATTATATTTTCTTAGCGAACTTTATTTTATATTTTATTTTCTTCTTAATTGCGTTTATTGGATTTCGTGGTATGGAAGCAAAAAAAGAGCAAGAGGTACAAAAAGAAGCCGAAACGAAAAAAGGTTGGTCACTTACACCTGGCTTCAAAGCGCTTCTAATCGTATGTGTAGCATATGCTTTATGCTGGGTTACATACGTACAGTGGCAAGGGGCAATTGCGACGTATATGCAAGAGTTAAATATTAGCCTTCGTCACTACAGTTTATTATGGACGATAAACGGGGCGATGATTGTTTGTGCACAGCCGCTTGTTAGTATGCTAATTCGCTGGATGAAGCGTTCTTTAAAACAACAAATTATGATTGGAATTATCATTTTTGCGGCGTCGTTTATTGTTTTAAGCCAAGCGCAGCAATTTACGATGTTTCTCGTTGCGATGGTGACATTAACAATTGGTGAATTATTCGTATGGCCGGCAGTTCCGACCATCGCAAATATACTTGCACCGAAAGATAAGCTAGGATTTTATCAAGGTGTTGTAAATAGCGCAGCGACTGTAGGGAAAATGTTCGGACCGGTCGTTGGCGGAGCGATTGTTGACTTATACAATATGGAAGTATTGTTTATTGCGATCATGGTAATGCTTGTAGTAGCGCTTATAGCAACGAGTATTTATGATAGACGAGTAAAAGTAGAAGAAACAGTTGAAGAAAAAATTGCAGTTTAGTTTGACGGAACATGCAACATGTTTTAGAATATATTTAAATAAATAGTAATAAAATATCCTTTGGAACAAGGGAGTTAGCTCGGCTGTCCTAAGAAAAATTCGTTAGTTAGCAAGAGTAACGATTAACCGAGAAGCCCTCGCTTCAAGTAAGCTCTTAAGAGATACTAAGTGGTGGGTAGTTCACATTAACTCATATTTGTAATAACAGTGAGAAGGAGTAGTAGTAATAGAAGCTGGTTTAGAGAGTTGACGGTCGGTGCAAGTCAATCCACGTTTCGTTATGAACTCGCCTTTGAGTTGCAGTTGTGAAATCATTAGTAGCAATTGCCGTTAATCCACGTTACGGATCTAAGCGAATGTATTTATTACATTAATTTAGGGTGGTACCGCGGGAATCTATAACCTCTCGTCCCTTTCTAGGGATGAGAGGTTTTTTGTATTTTGGGCGGTGAAAATAAATAGAATTTCAAGGAGGGTATCTCATGAGCTTTAATCATCAAGAAATTGAGAAGAAGTGGCAAGGGCATTGGGAAGAGAATAAAACATTCCGTACGCCAGATGAAACAGAAAAACCAAAATTTTATGCACTAGATATGTTCCCATATCCATCAGGTGCAGGCTTACACGTAGGTCATCCAGAAGGTTATACAGCGACAGATATTTTATCTCGTATGAAGCGTATGCAAGGATATAACGTTCTTCATCCAATGGGATGGGATGCATTCGGTCTTCCAGCAGAGCAATATGCACTTGATACTGGGAACAGCCCGGCTGAATTTACAGAGCTTAATATTAATACGTTCCGTAATCAAATTAAAGCATTAGGCTTCTCTTACGATTGGGATCGTGAAGTAAATACAACAGATCCAACCTACTACAAGTGGACACAATGGATCTTCCTAAAACTATTTGAAAAAGGTTTAGCTTACGTTGATGAAGTACCTGTAAACTGGTGCCCGGCACTTGGTACAGTACTTGCAAACGAAGAAATCATTGACGGTAAGAGTGAGCGCGGTGGACATCCAGTTGAGCGTCGTCCGATGAGACAGTGGATGTTAAAAATTACAGCTTACGGAGATCGTCTATTAGAAGATCTAGATGAGCTTAATTGGCCAGAAAGCTTAAAAGACATGCAACGTAACTGGATCGGTCGTTCTGAAGGTGCAGAAGTACACTTCAACATCGACGGTACAGATGAGAAGTTCACAGTTTTCACAACGCGTCCTGATACACTATTTGGATCAAGCTACTGTGTACTGGCTCCAGAGCATGCACTTGTTGCTAACATTACAACACCAGAACAAAAAGAAGCTGTAGAAGCTTACATTAATTCTGTAAAAATGAAGAGTGACCTAGAGCGTACAGAACTTGCGAAAGAAAAAACTGGTGTATTTACTGGTGCTTACGCAGTTAACCCAGTAAACGGTGAGAAATTACCAATCTGGATCGCTGATTATGTTCTTGCAACTTACGGAACAGGTGCTGTAATGGCAGTTCCAGCTCACGATGAGCGTGACTATGAATTCGCATCAACGTTCAATCTTCCAATGAAGGAAGTTGTAAAAGGCGGAGACATTTCGAAAGAAGCATATACAGGTGATGGTGCGCACGTAAACTCAGCATTCCTTGATGGTTTAAATAAAGAAGAAGCAATTGTAAAAATGATTGAGTGGCTTGAAGTAACAAGCGCAGGAAATCAAAAAGTAACGTACCGTCTACGTGACTGGTTATTTAGTCGCCAACGTTACTGGGGTGAGCCAATTCCAGTAATCCATTGGGAAGATGGTACAATGACAGCTGTGAAAGAAGAAGAATTACCATTAGTTCTTCCGAAAACAGAGAATATTCGTCCTTCAGGTACAGGTGAATCACCACTTGCAAACATTGAAGAGTGGGTAAATGTTGTTGATCCTGAAACTGGTAAAAAAGGTCGTCGTGAAACAAATACAATGCCACAATGGGCTGGTAGCTGCTGGTACTACCTACGCTACATCGATCCAAACAACAGCGAAGCACTTGTAGACCCTGAAAAAGTAAAACAATGGCTTCCAGTTGATATTTATATCGGTGGAGCAGAGCACGCTGTACTTCACTTACTATATGCTCGTTTCTGGCATAAAGTATTATATGATATCGGTGTAGTTCCAACGAAAGAACCGTTCCAACAATTATTCAACCAAGGTATGATCTTAGGTGAAAACAACGAGAAAATGAGTAAATCAAAAGGTAACGTTGTAAATCCTGATGATATCGTAGCAAGCCACGGTGCAGATACACTTCGTCTATACGAAATGTTCATGGGACCATTAGATGCTTCAATCGCTTGGTCTGAAAATGGTCTTGACGGAGCTCGTCGTTTCCTAGATCGCGTATGGCGCCTATTCATTCAAGATAACGGTGAATTAAGTGAGAAAATTACTGATGCACCAAACAAAGATCTTGAAAAAGCTTACCACCAAACAGTGAAGAAAGTAACAGAAGACTATGCAGAGCTTCGCTTCAACACAGCGATTTCTCAAATGATGGTATTCATCAACGATGCATACAAAGCTGAAACACTTCCGAAAGAATATGTAGAAGGTTTCGTAAAAATGATTGCACCAGTTGCACCTCACATCGGGGAAGAACTATGGAGCAAGCTTGGATACAATGAAACAATCACATATGCAAGCTGGCCAACATTTGATGAGTCTAAACTTGTAGAAGATGAAGTTGAAATCGTTGTTCAAATTATGGGCAAAGTTCGCGCAAAATTAACAATGAGTAAAGACGCATCAAAAGAAGAAATGGAACAACTTGCACTTGAGGAAATCAAAGAACAAATTGAAGGGAAAACAGTTCGTAAAGTAATTGTTGTTCCTGGAAAACTTGTTAACGTTGTTGCAAACTAATTAATTATTAATAAAAGCTCAGAGCGTTTATGCTCTGAGCTTTTTTGCGTGAAAAATAAGCTCAGGATCACCTTCATCTAAGTTTTCGATCATTCCACTACGCATAAATCCATTTGCTTTAAAAACTTTTTGCATATTTGTATTCGATTTGTTCGTTGAAGAAAATATTTTTTGAGTAGGAGAATGTCTTAACATATATGAGATTAATGAGCTTGCATGACCTTGTCTTCTTTTCGTTGGCGAAACAATGATTAATGAGAGGAAAGTATGACCAAAGAAATTTGTATCGTATGTTAGAAAACCAAAAATGGAATTGTCCTCTTTTGCAATTATGCAGTTTCCTTCATCAATAGCGTGTTTAATATAATCTCGTCTACTATCGTCTCCGATTACATCAACATCTATATGTAAGATTGAATCTAAATCGTCGATGGAAGCTTTTGCTATGTTTTCCAATGGAAATGCCCCCTTTCAAGTGAAAGAAAAATAATCAGTCAATTAATCCTATCATGAATGGAGAACGAAGAAAAAGGTGCGGAATTCCTATATAGACAAACACCTATTCTTCTATAGGAAATATGATAAAAAAGGTAGGTTAAATTGAGCAGAAGAGGTGAAAGGAATGAAAGGGATGGGCAATAATGCACCACATGGCTTCTTCGGAGGTGGATCGGATAGTTATGGACAAATGATGTTTATGGGAGGAGATGGTCAACACGGATATGGCGGAATCCCAAGTTGGATGGGAGGAGCAGCTGGAGGATTTCCGACGTCAGTGGCGGGTGTACAAACAGGATTCCCAACATCAGTAGCTGGAGTGCAAACAGGATTTCCGGCAGTAGTAGGAGGTACACAAGGAGGAGTGCCTGTAGGTATGCCAACTACATTTCCATCTTTCGTTGGCGGTGTACAAACAGGATTTCCTGTACCTGGCGTTGGTGTTGTAGCTGGTGGAATCGGCGGATTCCCTCAAGGTGTTCACGGTCATCATGAGCATCACGGCCATCATCATCATCATCAACACGGTCATCATCAGCACCACGGTCATCATCAGCATCATGGCCATCATCAAATTCATCCGCAGGCTGTCCTTTATCAAACACACCAAGGCCACCATCAGCACCAAGGCCACCATCAGCACCACGGTCACCATCAGCACCAAGGTCATCAAGATCATCACGGCCACCATCAGCAACAAGTACATCACCAAGGCCACCACCAAATTCATCCGCAAGCTGTTCTTTTCCAAACACACCAAGGCCAACACCATCACCAAGGCCAACATCATCACCAAGGCCAACACCATCACCAAGGCCAACATCATCACCAGGGCCAACACCATCATCAAGGTCAACACCATCATCAAGGTCAACACCATCATCAAGGTCAACAGCATCATCAAGGTCAACAGCATCATCAAGATCAACACCATCATCAAGGTCAACAGTATCATCAAGGTCAACAACAATACCAACAGTATCAGCAGCAACAACAGCCTTGGGCAGGTGGAATTGGAGCAGGAACGGCAGCAGGAGCAGCAGCTGGTACTGCAGGAGCAGCAGGACACGCGGGACACGTGGGTCATTAATAGACCAAAGGAAAAAAGACATAATAAAAAACTGCTAGGAGATCCTAGCAGTTTTTTTCTGCCTCTTGACGTTGGCGTTCGATTTCCGCACGAAGTTGTGGTTCTGGTGCTTGCCATCTTTCTGGTTTCATAATTTTGCCGTCGCCCTCGCGGAATCGAGGCTTTCCATCAGGGAATAGTTTTGCCATGTTAGCATTGTTTACAATTTCGAATCCTTTATCTGGACGTACGCCCATTTCTGCAAACGTTCCGAATGCAAAGTAAATAAGGTCAATAAGTGCATCATATTGATCTTCAACAGTTGTTGCTTCTAGAAATTCTTCTAGCTCTTCTTGCATAAAGCTAGCACGAATTTTTGCACGCTTTTCTGTTAGTTTTGTTGGAGTACTTGTCACAGGATGTCCGAATTCTTCATGCATTTTTGCAACAAGTTCGTATCCTTTATCTAAACCTTTTTCGTTTGTCATGTTGTTTCATCCTTTCTTACGTTATGCCGAATTTTATCCCGCATTAACGGGCAGTAAGACTCCCACCTCAAAAGTCGCAAATGCGAGGAAGTGAGGTGGGAGATAACTGCCCGTAAAAGCCCGATTGGTTCAACTAATAATCAGTGGGGAATGGACAAAATCCCCACTGATTAAAGTTTCACTTTATTGTAACATAAGGTAGTTATTAAATAAAAAGAACATCTTATGAGAAATAAGATGTTCGCTCGTTATTCTTCTTTCTTAATTAAAACGGATAAAATAAGATCGATGCCAAACATTCCGATGAGTAGCATTGGTATAACTACAAGTAAATAACGTGTGAAAGAAACATTTTTTAAGTATGTATGAGCAAATAAAAAGACGCCGAGAAATAAAATACCATTTAAAATAGGTTTTACTAATTTTTTAGACATAAGCTCCCCCGTAAATATCCCTTTTCTCTATTTAAAAATAAAAACTCTCCTCTCCGTTGTACCATGCCAGAGAGGGGAGTTCAATCATTAACTTATTCAATTACACCAGAATTTGTAATATTGACAGTGTATGTCACCTTAATAGGAACATCTTTATACATTCTTCGCCACTCTTCTAATTTGAATGGACGGTAATGTTCTTTATACCTTGCGCCGAGACCGAGTGGATCCACATTCAAAGATTTAAACTGTTTAATTAGTTTATTAGCTTTTGCACCTAATTGTTTTTCAACTGATTTTTCTATTTTTTTTGTGTTCTTTTTATTTTCTAAGTTGAACTGTTTAGATAGTTCTTGAATACGTGCTTCTAGTTTGACGTGTATGAAAAATGATGGTTTACCATCTTTAATACTTACTTGGTAAGTTGGGACGGATCGAATATTATTAATGATTGTGTATCCTGAGTTTGCTTTAAATTCATGTGAATCTAATCGGTGTTTTTCCAATAAACCTTTAAAGGCAAACATATCATGATAACTAATTTTGCCAATATATTTATCTCCTTTTAAAAGGCCGATGCCAGTAATTTTTATTTTATCTTTATCTTTTTTGAGAATTGGTAAATAGGAATCTTGGCCTTCACGATAATATCGATAGGCTCCTAAATGTAGATTGTCAGTAGGTAAAGGACCCGTTTTCATATTATGGTCTAACATTTTCTTTATATAAATTGCAACGTTAGATGATGTTGTATATTTACCCTTTAGTAAGTCTAGCCCTGTTCCCTCTAATAAACCGACATACATAGAGTTCCCGATATTCACATCACGAATTAACGTGTCAAAGGAAGTTGACAACCCTTTTTTAGCAATTTTTGTAGTAAATAAAGCAATACGCATTTGTCCACTTGCAAACGGTTGAGCGGATTCTAAAGAAGTATCCGCTTTTACTTGTTTTACTGCGTTTCCTACGCCTTCAAAGACTTGAACTTTATTACCTTTTTTCTGTATAGGACATACGAATGTGACTTTTACTTTATTATCTTT +>8_3#NODE_4_length_39999_cov_63.2942_ID_7 +AAGGTTTAAGAAGGGGCAATAGAATTATGAAATCAGGAAAGTATGAAGTTAGAGCTAAGGGCCATGGCGCAAGTTTCATGCCTATGGAAGTAACCCTCTCTGAAGATGAAATTGAAGATATTAAGGTAGATGCTAAGGGCGAAACTAAGGGTGTGGCAGATGAAGTCTTTAGACGCTTGCCAGAAGAAATTGTTAAAAATCAAACTTTAAATGTTGATACCGTTAGTGGTGCCACTATTTCTAGTCATGGTGTGATTGATGGAGTAGCCAGTGCAATTAGTAAAGCTGGTGGAGACCCTGATGAGTGGAAAAAACGTGCCAAGCCTGCAGAACAAAGAGAAAAAGACGAAACATATACTACTGACGTTGTAATTGTTGGTGCTGGTGGTGCAGGTCTAGCTGCAGCAGCAAGAAGCATTCAACATAATAAAAAGGTTATTGTGCTTGAAAAATTCCCACAAATTGGTGGTAATACCAGTCGTGCTGGTGGCCCAATGAATGCGGCTGAACCTGATTGGCAAAAACAATTTAAAGCTTTAGCTGGTGAAAAGGAAACTCTTGAAGAACTGGCAGCAACTCCAATTGAAAAAATTGATCCAGAATATCGAACAGATTTTAAGGAGTTACAAAAACAAATTAAAGAATATGTTGCTTCTGGAGCTAATTACTTATTCGACTCAAAATTACTTCATGAAATTCAAACTTATTTAGGTGGTAAACGTACAGATTTAAAGGGTAATGAAATTCATGGTAATTATACCTTAGTTAAAGAATTAGTTGATAATGCTTTAGATTCAGTTCACTGGTTGGCAGATTTAGGAGTAGTTTTTGATCGCAGCCAAGTAACAATGCCAGTTGGTGCCTTGTGGAGAAGAGGACACAAGCCGGTTGAGCCAATGGGCTATGCCTTTATTCATGTTTTAGGTGATTGGGTAAAAGAACATGGTGGCACAATTTTAACTGATACTCGTGCTAAGCATTTAATTATTGAAAACGGAAAAGTTTGTGGTGTAATTGCCAAAAAGCCTGATGGCAGTAAAATTACTATTCACGCCAAAGCAGTAATTTTAACTGCTGGAGGATTTGGCGCAAATACGCCAATGGTTCAAAAATATAATACTTATTGGAAACATATTGATGATAATATTGCGACTACTAATTCTCCAGCTATTACTGGTGACGGGATTGGTCTTGGTAAAGAAGCAGGTGCAGATTTAGTTGGAATGGGCTTTATTCAAATGATGCCTGTTTCTGATCCTAAGACTGGTGAACTATTTACTGGTCTTCAAACTCCTCCGGAAAACTATATTATGGTCAATCAAAAGGGAAAGCGTTTCGTAAATGAGTTTGCGGAACGTGACGTTTTAACTAAGGCAGCGATTGATAATGGTGGTTTATTCTACTTAATCGCTGATGACAAGATCAAAGATACTGCTTATAACACAACGCAAGAATCACTTGATGCTCAAGTTAAAGCAGGTACCTTATTTAGAGCAGATAGCTTAGAAGAGTTAGCTGAACAAATCGGTATGGATCCTGATACTTTAGTCGATACAATCAAGAAATATAATTCATATGTCGATGCAGGAAAAGATCCTGATTTTGAAAAGTCTGCCTTTAACTTAAAATGCGAGGTGGCACCATTTTATGCTACGCCAAGAAAGCCTGCTATTCATCATACAATGGGTGGTTTAAAGATTGATACTGGTGCACATGTTTTAGATAAAGATGGTAAACAAATTTCTGGTTTATATGCGGCTGGGGAAGTAGCTGGCGGAATACATGCTGGCAACCGTCTAGGAGGGAATTCTCTTGCTGATATCTTTACTTTTGGTAGAATTGCCGCAAACAGTGCTATTGATGAATTAGAAAAATAGTATAGATAGATTAAAACGTTGTAAACACTAGTATTTGCAATGTTTTTTTGCTAAAATACTAAAAATACCTAATTTTTTGGAGGACTTGAATTGAGAGAAGCGCAGGAAAAAAAGTTAACTACAAAGCAATACTTAATTGTAGCATCGATGATTTTTGCACTGTTTTTTGGTGCAGGAAACTTAATTTTCCCACTTCATTTAGGCCAGCTTGCTGGTAAAAACTGGGGACCAGCAGCAATTGGTTTTTCAATTACAGGAGTAGTCTTACCCTTACTTTCACTTCTTGCAGTTGCAATTACTCGCAGTAATGGTGTTTATCAAATCGGATTACCAGTAGGAAAGGTCTTTGCTCTTTCTTTTATGACTTTGATGCAGCTAGCAATTGGTCCTTTATTTGCAGCACCAAGAAATGCCACTGTATCTTATACTGTGGGGATTGCACCTCTTTTGCCTAAGCAATTTCATGGAATCGGTTTGATTGTCTTTACAATTATCTTTTTTGCCATAGTTTATGTGATTGCCTATAACGAGAGTGATATTCTTTCTTCTTTAGGTAAAATTCTGAATCCGATCTTTTTAATTTTACTTTTTGTTGTTTTTGTAATTGCGTTTGCTCGCCCACTTGGTAATCCTGAGATGGCCGCACCTACTAAAGAATATATGAACGGCGCAATAGTTAAGGGCTTTTTAGAGGGCTATAATACTATGGATGCTTTGGCAGGCTTAGCCTTTGGGGTAACTGTGGTAACAGCAATTAAAGAGCTAGTTAATAATGACAAAAAGAAAACTGCTAAAATGACTGCTAAGTCGGGGTTAATTGCAGTTTTTGCAATTGGTATAATCTATACTTTATTAATCGTTATTGGTGCGATGTCATTAGGGCATTTTAAAATAGCGAGTGACGGCGGTATTTTATTTTCAGAGCTCGTTAAGTATTATGCTGGTATTTTTGGACAGGCTTTGTTAGCTGTTTTAATCTTTTTAGCCTGTTTAACTACCGCAGTTGGAGTCTTAGCAGCTTTTGCCTTAGATTTCAGTGCCCACTATTCAAAGATTAGCTATAAAGGTTGGCTAACAATCGGTTGTTTAGGTTCTTTAGCTACAGCAAATTTAGGTTTAGAAAAGATCATTCACTGGTCACTTCCTGTTTTAATGTTTTTGTATCCCTTGGCTATTGTGTTAATTATTTTATCTGTATGTTCACCACTATTTAAAGGTGACATGGTAGTATACAAAATTACGATGCTTTTAACTCTAGTTCCAGCGATTTTCGATCTAATTACCAACTTGCCTGCACCAATTTCTGGAACGCAGTTTTATAAAGTAGTAAGTCAGATTCGCTTGCAATATCTACCTTTAGCTAATATTGGCTTATCTTGGGTTGTACCAACAATTTTAGGATTAGTTATCAGTGTCTTGATTCATTTTGGGCGTCGTAAAGCTAATAAAATATAGTAGTTCGGCTATGGTTCATTTGAATCATGGCTTTTTTGTTGGGATAATGAAATTAGGTGATAAAAAATGAGTTTAACCAATAAAGTGCTCGAAAAAGATGTGCATGATTTATTTACTCGGGTAGCGCCGCATTATGATCAGATGAATAATTTAATTAGCTTAGGTACACAAAAAAGCTGGCGCAAGAGATTTTTAAAAGAATTAAAAGTAGCACCGGGTGATTTTGCCCTAGATTTATGTTGCGGTACAGGCGATATTACGATTGCTTTGGCAAAACAAGTTGGTCCGTCAGGAAATGTAATTGGGCTTGATTTTAACCAAGAAATGCTTGATTTAGCTGAGCAGAAAATACGTCAGCAAAATTTACAAAAAGAAATTCAGTTAAAGCAAGGGGATGCAATGCATTTGCCTTACCCAGATCAGAGTTTTGATATTGTAACGATTGGCTTTGGCTTACGGAACGTTCCTGACGCAAATCAGGTCTTGAAAGAAATTTATCGAGTATTAAAGCCAACTGGTAAAGTTGGAGTATTAGAAACATCGCAGCCAACTAATCCGCTTATTAAGCTAGGGTGGAAGAGCTATTTTAGGCTGTTTCCTAGCTTTGCGAAATTACTAGGAGCCAATGTATCTGATTATCAATATTTGTCTCATACAACCGCTGAATTTGTTTCTGCGCAGCATTTAAAAGAAATGCTTGTAAAAAATGGATTTAAAAATGTGCAGGTAAATAAATTAAATCTAGGTGCGGGTGCAATTCATGTCGGAATTAAGAAGTAAATGAGCTAAAATATAAAATAAAAACTAAAAAAAGGAGAATTTTATGAAAACTGGTACTAAAATTATTACTTTAGACAACGGTTATCACTTGTGGACTAATACTCAAGGTGAAGGCGATATTCACTTATTAGCGCTTCATGGCGGACCTGGTGGCAATCACGAGTATTGGGAAGATGCTGCAGAGCAACTGAAGAAACAAGGATTGAATGTACAAGTTACAATGTATGATCAGTTAGGTTCATTATATTCAGATCAACCAGATTATTCTGATCCAGAAATCGCTAAAAAATATTTAACTTATGAATATTTCTTAGATGAAGTTGATGAAGTTCGTGAAAAACTTGGTCTAGATAATTTTTACTTAATTGGCCAAAGCTGGGGCGGTCTTTTAGTTCAAGAATACGCTGTTAAATATGGTAAGCACTTAAAGGGTGCAATTATTTCGTCAATGGTTGATGAAATTGATGAATATGTTGATTCAGTTAACAGAAGAAGACAAGAGGTTCTTCCACAAACTGAAATTGACTTTATGCACGAATGTGAAAAGAATAATGATTACGATAATCAACGCTATCAAGATGATGTGCAAATTTTAAATATTAATTTTGTGGATAGAAAGCAACCATCAAAGCTTTACCATTTAAAAGACTTAGGTGGCAGTGCAGTTTATAATGTCTTCCAAGGTGATAATGAATTTGTTATTACTGGTAAGCTTAAAGACTGGCATTTTAGAGATCAACTTAAGAATATTAAGGTACCAACATTAATTACTTTTGGTGAAAATGAAACAATGCCAATTTCTACTGCTACAATTATGCAAAAAGAAATTCCAAATTCCCGTCTTGTTACTACGCCTAACGGTGGACACCATCATATGGTTGATAATCCTGACGTTTATTATAAGCACTTAGCTGACTTCATTCGTGAAGTTGAAAATGGTAATTTTAAGGGTGAATAAAGATAAAAAGTCCATGCGATAGTTTCTATATTGAAGCTCTAGCATGGACTTTTTAAGCATATAAATCGCTTTCATGACATATATTCTATATAATTAATAACAAATATAGTTTTATTATTTTAAGGAGAAAAGCCATGAAGAGAAAAATAAGTAAACTACTTGCAGCGACGGCTTTAATGGGGACACTAGGAATTTCAGTAGTGGCTTGCGGCAATAATGATTCAACCAGTAAAACTAAACAGTCAAGTGTAAAGAAGACTGCTAAAGATAAGGCAAGTAATACAAAAAATGCAACAAGAAAAAGTCAGATTAAACTTAGTCAAACTGAAGCAATAAATAAGTTTGATAAAAAGTATTCTGATAAAAAATTAAAAGAAATAGATCTAAAATTAGATGGTAATAAATATCTTTATGAAATTACAGGATTTGATAAGGATAAAGAATATGAAATGACCATCAATGCCACAAGTGGAAAAGAAGTCAAATCTAGTTCCGAAAAATTAGACTTAGATGAACGTTTGCAAAAGGGACTTGATTTAGATAAAGTGATATCTCGTGATCAAGCAAGTGAAATAGCAGAAAAAGAAGTTAAAAATAGTACTGCTAAAGAATGGACTCTAAAAATGGATCAAGATAAAGCCATTTGGGATGTAACTGTTGAATCAGGTTCAAGCAAACATGAAGTAGAAATTGATGCTATTTCTAAGAAAGTTATTAAATCAGAAAAAGATGATTAAACAAAAAAATTCAGAGTTTTTAAGCTCTGAATTTTTTTATTAGTATTTTTATAGAAATTAATTTAATAGGTGATTAGCTTGTCAGCGTTCATCGTTTCATCGTCTAACTTATAAGTCTTTAATTCAAAATCATTTTCAAAGTTGCAGTAATAAGTCTTAGTAGCAGCAGAGTAGCAAGCGGTGTAGACAGTGTATTCATCGCTACCCAGTTTGTTAACTACGCTACCCTTAATCATCGCAACAGACTTTAAGATGTTAAAGAACTTAGCCACGTTAGCTTTTTCACCCTTAACAGTTGGGTAATTAACATTTAAGTAAGCTGCTTTAACAAAGCGATCTGCTGGAATGCTATCACCTGGTAAACCTAAGCTGCCAGTGCCAACGCCCCATGGAGCAACTTTTTGACCGTTCCAGCTTTGAGCTGTAGCATCATGTGGATCTAAACCAGTATAGTTGCCAAGGTTAGTAAGATGCCAGTTAAAGTCTGGACTGTTAGTTAAAACGCCAAGCTTATCATCGAAGACTTTCATTCCGTATTGTTTTGAAACTTCAACAATAATAGCTTCATCTTTGTCACTAATAATCCAGTGAAGAGGAGCAACTGCAAATGATGAGTTAAGAGCTTCATTAACTAAGTTAACGTTCTTTAAAGCTTCTTTTACATCGCTAACTTTAGTAAAGTTCTGGGTGACCCAGAGCATAATTTCGTAAGAAGCTAAATTGATTTTTCCATCAATTGGACCATCGCTAAATTTAGCAAAATGTGGGAAGTTTAGACCAGCAATTCCCAAACCATCTTCATTGAAACAATCAAAGTAAGAAGGATAGCCATCAACTACAATTCCCATACCGATAACGGCTTTTTTAGTAGTCGTATTATCTAAAAACTTATATGGAAGAGGATAGTTGCGTGGTGTGATAATTACACCTTCACCATAATCTTGTCCAACATCTAAGTTACGTCCAAAGTAGAGATTTCCTTGATCATCTGTAAATCTTAAACCAGTACACATTTTAAAATCCTCCTTAACTATTTAGCAGTAGTTTTAGCTTCTTGCTGAATGATGTATTTCTTTTGACTTCTCTTAATTTGAATAACGGCAAATACTAAGATGATGAAACCAATTGCTACAAAGGCAATCATGAAGTACCAAGCAACTGTAAAGCTGGCTTTTCCTTTAATAATTCCGAATAATGGAGCACCAATAGCATATCCAATTGCATAAGCTAAACCCACATATCCTAGCATTACACCTTGATCTCTTGAACCAAAGAGATCCTTGGCCATAAATGCAGGACCAGACATGTAGCTAAATACAGCTAAACCACAGAAAATAGCATAAGCAACACCTGCGATTTTACTTACGTGAGCACCATAAGGTTGAAAACTGATCAAAACCATCATTAAGATGGATAAAACATACATGCAGCCGGCATAAGTCATTGATTTGGCAGTACCAAATTTATCAAATAAGAAGCCACCGGAAACATTTCCAATTAAACAACCAACACCGTACATTGATCCAATGAGGCCAACATCTGTTAGTGAAAGCTTAGTATCAAGAAAGGCCGCATAATCTTCATTTAAGGAAGCTAGGCCTAAGCCAATAATTAGAAAACCAAGACTGAAAATCCAGAACCATTTCATTTGTAGAACTTGTTTACTAGTCCAGCCCTTAAATTCTTTCGCCTTGGCCAATTCTGCTTCTTTTTTGCTTTCAGCTAATTCAGCATCAGAAACGACGATTTCATCTTTCTTAGGAGTTCTAATAAAGCAAGCAATAACTATACCAATTACTAATAGAGCAACTGCGAAGATGAAAAATGGTGCCATTGAGGTCAAGTGACCTGTCTTAGTGTTACCAGTCATAAAGTGCTTTAAAATAGCTTGCGTTGCAGGTTGTAAGAAAATATTACCAATCGAACCACCGCAGAAAGCAATTCCTAAAGCAGCTCCACGTCCTTTTGCAGGGAACCAGTGGTTAATAACCCAAGGAACACCTTGGCCGGAATAAAAAGTTGAACCGATCATACAAATGATAGCAGCAATATAGAATCCTGGCAGTTTTGTGCTAATTCCAAAGATTATATAGGCAATAGCAGAAAGGCCAATACCGATTAAATACATAAGTCTGAAATTAACTTTCTCTAATGCTTTACCAATAAATGGCGAAGCAACTGAAGCAAAAACTGCCCCAAACGTAAAGATTAATGTATACGATGCTAAGGTAAAGTGAAAAGTATTAACTAACGGGTGGATAAACAAAGGTTGAATATTTTGAGCAATCCCATAAGGAATGGCTTGGGTTAACATACAAAGGAAAACCATAAAATATTTATAGCCTTTGCTAACGACTTTATCTTTAGTAGCGACATCAGTAGACATGTAATTACCTTCTTTCTAAACTAATTACTTATCTAAAATAATATTTGGTTTGCTGACGACAATATTTTTTTGCATTTGCTTAATTTTAATAGCTGCAAAAATTAATAATATAAAACCAATTGCTACAAAGAAAATAGTGCAGCACCAAGCAGTTGTGAAACTGGTAGCGCCTTTTATGACGCCAAATAGTGGGGCGCCAATTGCAAAACCAACAGCATAGGCCAGGCTAATGTAACCTAAGTTAACACCTTGAGCTTTTGCACCAAATAAACTTTTCGACATAAATGCAGGACCAGACATATAGCTAAAGACAGATAGACCACTTGTAAAGGCCCAACCCATACCAGCGTAGAAATTAATGCGATCGCCGTAAGGATGAAGGCTAATAAAGATCATCATTAGGATAGCTATAATTAACATTATTCCTGCATATGCCATTGATTTTGCTGTACCGAACTTATCAAATAAATAACCCCCAGAAATGTTTCCGATGATGCCAGCAAGTCCAAAGACTGAGCCAATTATTCCGACCTCAGTTAAGGATAATTTAGTATCAAGAAAGGCCGCATAGTCTTCATTTAATGAGGCTAGGCCTAAACCGATAATTAAAAATCCAATGCTGAAAATCCAGAACCATTTCATGTGTAGAACTTGTTTGCCACTCCAGCCTTGAAATTCATGGGCTTGCTTTTGAGCAGCTTCGTGTCGGTTTTGTTCAACTTCTTGAGCAGAAGCTAAAATTTCATCTTTCTTTGGTACCCTAATAAAAGCTGCAATAATCAATCCAACTACTAATAGAGCAACAGCAAAAATAAAGAAAGGTGTCATCGAAGTTAAATGACCAGTCTTGGTATTACCAGTCATGAAATGCTTTAAAATTTCCTGGGTAATAGGTTGCAAGAAAATATCACCAATCGATCCACCACAAAATGCTAGACCTAAAGCAACACCGCGTCCTTTAAATGGAAACCAGTGATTGATAATCCAAGGGACACCCTGACCAGAATAAAAGGTTGAACCAACCATACAAATAATTCCAGCCAAATAAAAGCTAGGTAGCTTTGTACTAATTCCAAAAATTACATATGCTCCAGCTGAAAGACAAATACCTATCAGATATAAAATCTTAAAGTTTACTTTTTGCAAAGCCTTACCAACTAATGGTGAAACTAAAGACCCTACAACAGCCCCAAAGGTAAAAATTAATGTATAAGAAGCTAATGTAAAATGAAAAGTATTAACCAGAGGGTGAACAAATAAAGGCTGAATTAATTGGGCGATTCCATATGGAACAGCTTGAGTTAACATGCAGAGAAATACCATAAAGTATTTGTAGCCTCGACTGACCACCTTTTGTCTATCGTTTGACATCGTATCTCTCTTCTTTCTGACAAAATACTATTAGCTTGATAAGTTATATATTATTAAACACGTGAGCTTTGACTATAGCCACCTTTATTGTGGCTCACTAAATCGAGTAAATCAAAAAATAACGACCTATAATGTTATTTAATAGGCATAATAAAAAGCCTATCTTTTTGAGATAGACTTTAGAAAATTATTTTTTTACATATCCTGCGATGTCATCATCGTCAAAATCATAGGTTAACTTCTTATCATATGCTTTTTCATATGCTTTTTTCTTTTGTAGATAATCAATATCACGCATTCTTTTGGCATTTTGATCAACTGTTAGACGGTCAACTGGATAAATTGTTGGTAAAACGTGAAGAATGTAGCGAGTTTTGTGGAATTCGCTGCTATTTTTCTCCATTTCAGGGAGGTCCTGAATTTCTACAAAACAGGAAATAATTGGAACGTTGGCCTTAGCTGCGTAGTAGTAGGCGCCCTTTTGAAGAGGGCGAGGTTTTCGATAGTTATACCACATTTCTTGCTCCGGATAGATCAAGACCCAGCCCTTTTTATCAAAGACTTTTTGCAAATGCTTAGGAAATTCACGCCCCAAATATTGGTAGCTTTGAACAATTGGAATTGCATTAATGTTGTTCATTAGAAAACCAAAGAAACCGGGTAATTTTAAATTAGAATCTTCAATCACAACATAAAGTTTTTTGTGACATTTGTTGGCTAATTTCTTGATTGGAAGACTATCGGTTTGCTTGAAGTGATTACTGGTAATAATTGCGCCATATTTAGGTCCTTGAGCCCGCTTCTTATTTTTGATTTGCGTGTGAGCCATCAAAATTAAAGTGTCAATATTAATAATACCTTGAGCAATAATATTCTTTAGTTTATGACTAAACTTCTTTTGTTGTTTCCAATAATCGTTAACGAGTTTGATTCTTTCGCTTTTAGTCAGGATAGGATCGCCTAATTCGACTTTGGCATGCATTTTTCCCTTTTTAACGGCTGTTTTGATGTTTTCGATAACTTGCGCGCGATTTTCACCAATAATCATAATTTCACCCTTTCGCCTTTTCTTTTAATTTGAGCCCAGTTATTTGGATCAAGTGCCGTGCTCGCCTCTTTTTCAAGCATATGATCTAATTTTGCCCGATTTTCAGCTCGCTCTTGATTAGTATAGTTGTCTAACTCTGATTTTAATTCCTCATAAAAAGGTGTGCTAGAGGCATATTTCCAGAAATAATCATTATATTGGACGTCTTTAAAGTGCCAAGGTTTGAAAAATAAATTATAGTGAATTAGCCCTGGTTCAGGAATGGGGGCGGTATTTTCATTAGGCATTGCGTCCCAACGTGGATCAAGATGTAAAATTCTTCCGTCACAAATCTCATTTAGGTAGTCTTGATCAGGAGCAATACAATCAAAGTGATATTGCTCAAGTAAATGCATAAAGTGATCGATAAAGTGTTCATCCCTAAAAGCTTTAGAATTTAAGACTAACATCCCAGAATTAATATATTTCTTTGGATCGAGAGCTAGAACTTCTTTAATATATTTCACCATTTTATCTACATATTGAATCGATGAATCTGTACAAGCGGCGAATAGATTATTACCTAAGTCGTTATTATACAGTTTAGCAATGTCATCAACGACGACTGTGTCACTATCAATGTAGATTGCTTTATCATATTGTGGAAATAAGTCAGGGATAAAGAGACGATAAAAAATAGACATTGTGAAAAAGTCTGCCCGGAGATAGTTTTCTTTACGATTTTGGATCGGTTTTACCAGTTTTTCATCAATATGGAAAAATTTAACGTGCACATGGTCATTAGCAAAAGCTGCAAGTTGCTTTTGGTGTTTTTCAGACAGGTCTTGATTTAGAAAAGTAACTGTGTAATCTTTTTGCGGATTAGCATTTTTAACCAAGGAGTTTAGCGAAACTGCCGCATACTTGGCAAAATCATCACTAATACTATAAAAAACAGGGATAGTCATTTTTAAGCCTCTTTATATTTTTACAGATGTCGTTTTCTTAACTTTCGCCCAAGTTACATCTTCATCTTTGATTTCATCAACTTTTTTAATCATCCAATCAAGGTCGGCACGTGCTTTTTTACGGTCTTCATCGGTGAAGTTAGCTAATTGTTCTTTTAGTTCTCCATAATATGGAGACTTTTTAGCAACATCCCAGAAGTATTTGACATATTGAACATCTGCGAAATGCCAAGGCTTAAAAAAGAGATTATAATGGACAATTTTAGGATTTTTAATTTCATCCATGTGTTCATTGGGCATTGCATCCCACTCAAGGGGCAAATGATAAATTTTGTCTTCACAAATTTCGTTCATATAAGCTTGATCAGGATCGATATTATCGAAATGATACTTTTCAATTAATGAATAGAATTTATCAACAAATTTCTTATCTCTAAAGGCTTTCATATTGAAGAGAATAACGCCGTTATTGATATATTTTTCTGGTGGAAAAATACCTTGGCATTCCTTGATATAAACTTGAAGTGGCTTAATAAATCTGATTGACATGTCTGGTACACTAGCAAACATGTTGTCGCCGATTTCGGTGTTGTAGAGTTCAGCAATGTCAGTACAAATAATAGTATCAGCGTCTAAATAAACAGCCTTATCATACTGAGGAAAAAGGTTAGGGATAAATAAACGGTAGAAAATAGACATCGTGAAAAATTGTGCTCGTAAGTAATTCTCTTCACTGTTGTGAATCGGAGCTACCATTTCATCATCAATATGAAAAATATTAACATGGACATTTTTAGTAGAAAGATCTTCTAAATCTTTTTTGTGCTTATCGCTAATGTTTTGAACTAAAAGAGTAATGGTATAGTCTTTGTTTTGATCCACATGGTCAATTAGTGACTGAATTGAAACGGCCGCGTAGGGTGTGTAGTTGTCGCTAATTGTGTAAAAAACAGGTATAGTCATCAAAAAATAAACTCCTTATATATTAAATGTTTCTAGGCCCGCTTTAATTGTGACCTAAGCTCTACGTACTGCTTAAGTAAATCATCATATTCATGTAATTTCAAGACGCTGTGCACTCGCTCTATATCCCATGGTTTAACAGTTAAAGTATGGAAGTATGGCTTAAAACGAAAGCTAGTCGTGAAATGTTGAATTTTGGTATCTGGTCGTAAACGATATTGTTCATTGTAGCAACGTGGAGCAATTTTCTTCTCAGTAGCAAGTTTATTTAGAGCAGACTGATCTGGAAGAAGCATTTTTTTATCACTCATTTTTTCTCTTACTTTGGCAAATAATTGTGTTTTTTTAATTTCAGCCATGTTAAGAAGCAAGACGCCGGAGTTTAGGTAATCAAAAGTCTTGTGGGTATGGATATTGTGAAAGAAGAACCGACCCCAGAAGTCTAAAACACCGACTAGTTCAATATTAGCTAGATCTTGATTATAAAAGTCGCTAATATCTTTACGCACAATAATATCATCGTCTAAATATAAGATACGATCAGGAATCTGTGGTAGTTGGTCCGCAAAGAGACGCAGCATTGCATATGGCGTAAAACGAGTTTCCATATTTGCGCGTGGAGGTTCTTTTTTAAATAATTCAGTACAATCGATTAATTCAGCAGTATTGTTAGGATCCTTTTCTTTAAGCAATTTTTTAATTAGATTAAAAGCTTGTTGGTTAAAAGCTGTGTATCCCTCTGCGTGCATCGTCAAAATATAAAAGTGCAGGGGAGTTGATGTGTTTTTTAGTAAAGACAGCGTTGTAATTAAAATACCATCTTCGGCATGAGAATCGCCGCAAAACATAATATTCATTTTTGTCCTCCAATTTTTCGCTGCTGGTAGCGATAATATTCACAGTTACTCAATTTTGAACGGTTCTTCAATTGAGTAAATATTTGATCATGTAGCTTTTTTTGTTGCAGTTTTTTAGGTAGAGAAGTATCTGGAAAAAAGGGGCCATCAATATAAACAGTAATTCTAGGGTGCTTGCTATATTTTCTTTTCTGGTAGGTCGTTGTCATAACAAAGCTTGGCTTAGTTGTTTGAACTGGAAAGTTAAAACTAGTTGCTGGAAATGGACGAATATCAGTATAGTAGGGCCAAACATGAGCTTCTGGATAGATAATGATATGGGCGTTTTCCTTAATTAGAGTATTAACTGCTTTAAGCAGATGAATTGCTTGCTTAATATTTTTGCCAACTGGTAATCCTCCATAAGGCAAGAGAATTTTGCCAATTATTGGAATTCCCCAGTTTGCTTGATTAGCAATTGCATAATAATCTTTCCAGCCAAAGAGGGTTAAAGGCATAAAGACGTCGTTAACCATTTGAGTGTGATTTGCATAGACAAAATAGCCCTGATGCTTATATTTAAATAACTTATCTCTCCCTATTACTTTGACATGCATTCCGCCATAAGTAAAAAGATAAGCAAAGCCAGTAGCTAAAAAACGCATTAAATAGTTAAGAGGTGTGCGTTTAATTATTTGATAATCGCTAGGTAAGGTAAAATCTTGTTGCTTACTTTTGACAATGTCGTCAGTTAAAGAATTATAGTAAATAACATTACGTTGAGATGGTTTAGTATTCATGATATTAATTATAAAAGAAGTTTAAAAACAAAAAAAGACATCGCTTCAGCGATGTCTTATAAAATATGGGTGGTCAGGGGATCGAACCCTGGACCCACGGATTAAGAGTCCGTTGCTCTGCCAGCTGAGCTAACCACCCAATTGGATAACCAACAAATAGTATTATGCCAGGAAAAGAACTAAAGTGCAAGCTTTTTTTGCATAAGTTTAAAAAAAGTGTGGAAAACAGTATAATATAGTTATTAATATTGAGGAGGCATCATTTATGCCAAAAAAGATTTTAGTCGTTGACGATGAAAAACCAATTTCTGACATTATTAAATTTAACTTGACTAAGGAAGGCTTTGATGTCGACACTGCTTATGACGGCGAAGAAGCGGTTAAAAAAGTTGATGAATATGATCCAGACTTGATGATTCTAGATCTAATGTTGCCTAAAAAAGATGGCCTAGAAGTTGCACGGGAAGTACGTCAAACTCACGATATGCCAATTATTATGGTAACTGCGAAAGATACTGAAATTGATAAGGTATTAGGACTCGAGATGGGTGCAGATGACTATGTTACTAAGCCTTTCTCTAATCGAGAACTAGTTGCTAGAGTAAAGGCAAACCTACGTAGACGTGATTTAACCCAAAAAGCTACTGAAGATGATGAAGATAAAAATATTACTATTAGCAATTTGGTAATTATGCCTGAAGCCTATATGGTCGAAAAAAATGGCGAAAAAATTGAATTAACACATCGTGAATTTGAATTACTTCATTATTTAGCTCAGCATATGGGTCAAGTGATGACTAGGGAACACTTGCTGCAAACCGTTTGGGGCTATGATTACTTCGGGGATGTTAGAACTGTTGACGTAACTGTTAGACGTTTACGTGAAAAAATCGAAGATAATCCGAGTTCGCCAACAATTTTAGTTACACGGCGTGGAGTAGGATATTACGTTAAAAACCCATCAGATGAATAAGGATCAAAGTCACTAGTAATAGTGGCTTTTTTTGCAATTAGGACAAATGAAGAAAATAAAAAGTGTATTAAATTCTATCAATTTTAAAATCGCAGTAATTTTTATGCTGCTTTTATTAGCAACGATTGAAGTGGTTGGTGCGTCTTTTACAAGGCAACTCGAACAAAACTCAATTCAAAATTTTGAATCATCAATTCAAGTTCCAAATATTATTACTAACCAAATTTCTAGCCAATTAAGTAGGGCTAATTCTAAAAAGGCTAATCAGCAGTTAAGCCAGATTATCTCAAACTATAATTTGGGCGATATTAGCCAATTAATGGTAGTTGATAATAAGGGAGTAATTAGAGCTGTTTCAAATGTGAATGATCAAAACCGTATTGGACAGCGAACTAGTAATGTAGATATCAAGAGTGTACTCTCAAATGGAAAGCAAGTTTCAAAAGTTATTAATGACAATGGAAATTATATGGTGCAAATTTCACCTTTGACTTCTGCTAATGGAACTAATACTCCAGTAGGAGCAATTTATGTTCGCGCTAGCTTACAAGGTGTCTTTAATAACTTGAGACAGGTATCGATTTACTTCCTGATTGCTTCGCTGATTGCAGCTGTTTTGGGAGCAATTGTAGCCTTGGTTATTTCCCGGGCAATAACTAGACCTATTGAAGAGATGCGTAAACAAGCATTAAGAGTTGCAAATGGAGACTATTCTGGGCATGTTAGAGTTTATGCGCAAGATGAGTTAGGTCAACTAGCCGAGGCTTTCAATACTTTATCAGTAAGAATTGAAAGAACTCAAGAAATTTCAGACAGTGAGCGAAGACGACTAGATAATGTCTTAACACACATGACAGATGGTGTTATTGCAACAGACAGACATGGAAATATCACAATTATTAATGAAACAGCACTCGATTTTCTAGGAAAAACTGAAAAAGACGTTATTGGAAAGCCAATAACTAATTTACTTGGATTAAAAGATGTTACTATTCAGGACTTATTAAGTACTCAGCAAGAATTAGTTGTTCGAGTTAATGATAATACGCGCGATGAAATGATTTTGCATGCTAACTTTTCTTTGATCCAGCGTGTAACGGGGTTTGTTTCCGGCTTAGTTTGCGTGCTTCATGACATTACCCAACAACAAAAAAATGAACGTGAACAACAACAATTCGTTTCAAATGTTTCACATGAGCTTAGGACGCCGTTAACAAGTTTAAGAGCATATGTTGAGGCATTGAATGATGGGGCATGGAAAGATCCAAATATTGCACCACAATTTCTGCATGTTATTCAAGATGAAACTGAGCGAATGATTCGAATGATCAATGATTTGCTTAGCTTATCTAGAATGGATCGAGGAGTAGCGAGGATGGATTTAGAATGGGTTAACTTAAATGACTTTGTTAACCACGTTTTAAATCGTTTTGATATGATGCTCAAGTCTGATTCTGATAAGATGCACAAAAAGAAATATACGATTAAGCGTGAATTTCCTCATCAAGCCTTATGGGTTGAAATTGATACTGACAAGATGATGCAAGTAATTGACAATATCATGAACAATGCTATCAAGTATTCACCTGACGGTGGCGTCATCACAGTTCGCCTATTGCAGGCTCAAAAGCACGTTATCTTAAGTATTTCTGATCAAGGATTAGGAATTCCAAGAAAAGATTTAAACAAAATCTTTGATAGATTTTATCGTGTTGATAAGGCGCGTTCTCGTAAACAAGGTGGAACTGGTCTAGGACTGGCTATTTCTAAAGAAATAGTAGAAGCACACCATGGTCGAATTTGGGCAGATAGTGCTGAAGGAGCCGGCTCAACTTTCTACATTTCTTTGCCATATGAAGCAATTAGCGAGGAAGGAGAAAACTGGGATGAGGTTTAAAGATAAGTTTTCTAGAATTGCATTACGAGTTAGCTTAATTGCAATGGTTGCACTATCAATTATCTTGTCAGCTATTATTTGGGGTTCTGATGCACGATTCTCAAGGATTGAAGAGACATCTAATCAAACTCAAATTAAAGATTTAGGCCAACGTTCTTTAAGAGATATTTATTTACCTACTCAAACTTTTTATTTTAAAGATAAGCAAATGTATCAGGTTTACGATACTAAAAATAATCTTCCGCTAGAGTTTTCAAAATTAACTCAATCAGTTAAGCCACTATTGCCAATCAGAGTTTGGTCTAGTCAAAGTAAATATGAAAAGTTATTAAAAAATCCTGACTATGTTCAGTTAACATATCCGGATCAGATAACGATTTCCTTATTCTTAACTAATGTAAGAAAAACTGATAGTCGGGAATTTAATCGCTTTTTTGTGCCAGCTCGATCCAGTAAGTATATTTATTTAGGCAATGACGAAAACTATACTATTTATCGAGTTCGTCTAAATGATGTATCGTTTGACAATTTGGTGGAACATATTCAAAGCGCTAAAACACAGATGCCAGTTACGCTTCAAAAAGTTCATGATGATTATTTACTTTTCTATGAAAAGGATTTAAGTTTACCAACATACAGCTATTTAACTAATGAAGAGTCAGATTCATACTTTGTGTATCGCTTGTTAGGCTCTAATAATCCTACCCAGCATAGTTCTGGCGAAAGCATTACATATTCTAATGGAGTATACGAGCGGTTGATTGCGGCAAAACATACTCATAATTATGAATATGTTGATTATCAACAGGATCAGGTTCCGAAGACTATTAGTCGAAAATTAAATGATAGTTTATATTTTGTTCGTAAAATTGGATTATCTGAGCCAGATTTAAGGTTCTTTGATGCTGATAATAATACAGTTATTTATCAAAATTATGTTGAAGAATATCCGATCTTTTTACCAGGGAAATATAAGATGCGGGCGCAAGTTAAGTTTGCTTCTAATGGAATGACAATTAACTTTAATAGCTTAGACTTGCAGATTCCAATTCCAACTAATGGTGAAAAGAAGACGCTTATTCCGACTAATGAAGCGATGGATGAATTATATCAAAAGGGATATCACCAAAAAGATATTGAGCGCATTGTTATTGGCTATACAGCTAAGTCTGATAATAGTAAAAATAAAAAATTAGTAGATCTAGAACCAGCCTATTATGTAAAGATTAATAAGCAGTGGAAGACCCTAGATGAATGGCTAAATATTAATAATCAAATTACAAATTCTAGAAAGGAGGGGCTAGTAGATGGACTTTAAGAGAATTGAATGGATTTTTTTAGTCGTATTTATTGGAATTAATATCTTTTTGGGTATTGAGCTTTGGCAAACACCGACGCTTTTATCTGCTGGCTCTACTCCAATTCAAACTGATATTAAAAGCGAAACGAGTGCAGATCAGATAACTATCCCTAAAGTTGATGATAAGCAAGATGATGGGTATTATTTAGCTACCAAAGTGGACAATTCTTGGACAAAAAAAGCAGCAGCACAGGTAAATCAACAAGTAGAAACTAATACAAGTGAAAATAGTATTTCAGTTAATTTGAACAAACCAATTACTCTGTCGAAAGATTCCAAAAAAGCCCTGAAAGAAATTATTCACTTTAAAAATAATAGTCAAAACGTGTATGAGGGTAAAGATTATGTCTATCTTTCAGAATTATCCGAGGGAGATGATTATGTCTTTAATCAGAAAACAAAGTATGGAGAAGTTTTTGCAGCAACTGCTCTATTGCACATTATTGTTAAAAATAATCAAATTGTATCGTATTCGCAACGCTACGTTAGCAATTTGAATCCAGTACGTGAAAGACAAAATACAATTAGTTCAAAGGCTGCTGTAAATTCTTTATACACTTATAGTGAATTGCCAAATAATTCTAAGATTATCTGGTTAAAGCAAGTTTATACTAAATTGATCACTGTTCGAGGGAGCGAAATTTATATTCCTACATGGGTAGCTGCGATTGAAAACAATAATTCTCATACGATGACCCTTAAGCGCGTAAACGCATTTACAGGGACAATTATCCAAAATAATATTGCTGCTGATGATTCAAAGGAGTAGAGAAATTTGAAAGTATCAGTTTTATCAAGTGGTTCAACAGGTAATTCTACATTGATTGAGACTCCGCAGCATAAAATCTTAATGGATGCAGGTTTATCTGGAAAGAAAACTAAGGAATTACTAGCACAGGTAGGCGTTGATATTAAAGATATTGATATGGTATTTATTAGCCATGATCATACTGACCATTCTGGTGGATTAGGCGTATTGATGAGAAGATATCCTAAAATTTCAGCCTATGCGAATTCAGGTACTTGGAATTATTTAATTGAAAGCAATAAGATCGGTAAATTACCTGTTGAACAAATTAATACGTTTGAGTCTGGTATAACTAAGACTTTTGGTGATTTGGACGTGACAAGTTTTGCGACGAGTCATGATGCGGCGCAGCCACAATATTATGTCTTTACAAGTGGTGGGAAAAGATTTGCGTGTTTAACAGATACTGGTTATGTTTCAAGTACTGTTAAAGGTGAAATAAAGGATGCAGATGGCTATTTAATGGAATTTAACTATGACGATATGATGCTTAGAAATGGCCCTTATTCTTGGTCTTTGAAACATCGAATTATGTCTGATGTTGGACATCTTTCAAATGATCAAGCAGCAGACACCTTACTTGATGTAGTTTCTCCCAAAACTAAACATATTTTTTTAGCGCATCGCAGCCAGCATAATAATACTCAGTATTTAGCGCATGAAACGGCTGAAGACTTATTGGTAGCTGGGGATGCAAATTTACCAGCTGATGTTAAAATTATTGATACTAATCCAATGCAAGCTGGTTCTTTAACGAAAATATAAATTTTTAGCAAATAATTTGACTTGAATTCATAAAAAATTCAAAATTTGTGATTAGGCTAGAAATGAAGAGAAAAGGAGAAATTTTTATGGCAGAAAATAATACTAAACACCCTACTAAGCATAATGCTTTAATTAAAACTGGTATTGTTGGAGTGGTTGCCGGACTTCTTGGGGGTGGAGTTGCTTATGCAGGTTTGTCTCAAGTTAATGGGCAAAATGCACCACAAACAAGTGTTGTTCCAACGACAAAAGTTGAAAAGTCTAGCAGTAAAAATAGTAGCCAGATGACTAATGCCTTTAATACGGTAAAGAAATCAGTTGTTTCTGTTGTTAACTTAAAGAGACAAAGTTCATCTTCAAGTAGTGATCCGTTTGGTATTTTTGGCTCTGACAGTTCTAGTTCTTCAAAGAAAAATTCTAAGTCTGATTTGGAAACTTACAGCGAAGGTTCAGGTGTTATTTACATGAAGTCTAATGGCAAAGGCTACATTGTAACTAATAACCACGTTGTATCTGGAAGTGATGAGATTCAGGTCATTTTAAGCAATGGTAAAAAGGTAACTGCAAAAAAAGTAGGAACTGATTCAGAAACAGATTTAGCTGTTTTAACCATTGATGGAAAATATGTTACTCAAACTGCGCAATTTGGCTCATCTAAGAATTTAGAGCCAGGACAGCAAGTTATTGCTGTTGGTTCACCTTTAGGTAGTGAATATGCTACAAGTGTAACTCAAGGTATTATTTCAGCTAAGAATAGAACTGTTGATGTAACTAATTCTGCTGGACAGGTTACTAACCAGGCAACCGTTATCCAGACTGATGCTGCTATTAATCCGGGTAACTCAGGTGGTCCACTTGTGAATATGTCTGGTCAAGTAATTGGGATTAACTCAATGAAATTATCCTCTTCTAGTGATGGTACAGCTGTTGAGGGAATGGGATTTGCAATTCCAAGTGATGAAGTTGTTTCTATTATTAATCAATTAGTTAAGAATGGAAAAATCATTCGTCCAAAATTAGGTGTAAGAATTGTATCAGTTGACGAATTAACAGAGTATGGCCGCAAGAAACTTGGTTTGCCTGATAGTGTTAAATCTGGTGTCTATGTAGCTAGCGTTACTAAGAATGGTAGTGCAGATAAAGTAGGAATTAAGTCACATGATGTAATCACCAAGATTGATGGTAAAGATGTTGATAGTGTTGTTTCACTACATACTGCGCTTTATACACATAAAGTTGGTGATACAGTAACACTTCAAGTCGTTAGAGACGGTAAGTCACAGAATATCAAAGTTACTTTAAGTTAAGTGAATATTATTAAATAGAGATAAAAAAGCTATGATTATGCGTTTTGTAAGCGGTGATCATAGCTTTTTCTTAATATTTAAAATGTGAAACGTAATAAAGTTCGTCTTTTTGAATTGTCAAGAACTGAAAGTTATCCAGAAGATTGTGGGAAAATGTGGAAAACTCTGTGGATTGTGCATAACTCGGTCAAAGTAAAAATAAATAGTGTGGAAAACCTGTGAATTGTGTGTAAATAAAATTCTAAAATTAGCTGTTCTGAACGGCTGTTCAAGTTGATTATGTAGGGAATAGACCCAAAAGTTTGGAAAATTAGTAGTATTTATAAAAATGTGGAATAAATTTGCTAAAAAAGTTATCCACTTTTTTAGAAACCACAATTTATTGATAATTAATTAAGTCAACCACTAAATATAGTTTTCGATAATCCACAGCTAGGGGAAAAGTGATGTGGACGCTATGTCAATTAGGGGATAAAATTAAAAACTAGATGCGTGAAAGGTTAGATAGTTACAATGAATATCAAAATTGTTTGTGTAGGTAAACTAAAAGAAAAGTATTTTAAGGATGGTATTGCTGAATACGTAAAGAGAATGGGGCGTTTTGCTAAAGTAAAAATAATTCAAGTACCAGACGAAAAAGCCCCCGAAAAGCTAAGTCCTGCTGAAATGGAGCAGGTCAAAGAAATCGAGGGAAAAAGAATTCTAGATAAGATAAAAGATAAAGAATATGTTTATGTAACGGCAATTAAGGGTAAAGAAAGAACTAGCGAAGACTTTGCTAAAGAGCTAGCTGACTTAACTACTTATGGACATTCAGACATTACCTTTGTGATTGGAGGAAGTTTAGGAACAAGTAATGCAGTAAACAAACGAGCTGATGATCTAATTAGTTTTGGTAAATTTACAATGCCGCACCAATTAATGCGGTTAGTTTTAGTTGAACAAATTTACCGTGCCTTTATGATTAATAGCGGTAGTCCGTATCATAAATAATTATTTTATAAAAAATAAGATTGGTTGCAGTAGGATTAAAATACTACTAAAAATAAGCCAGTCTTTTTTTGTTAGGGCAATTGGCACGATGACATTTCGTGGTTTATCTTCACGATACCCATGTGAAATCATTCCTTCAGCAAGGCTGTTTGACCAAGAAATAGCTGCTAAAATCGCTTTAAAATATAATGTTGGCGAATAAAAAGAGAGATGATAATGCCTCATGTCTCCAACTAAGCGAATGCGATTAACTTCAGCATGGATTTTAGGAAGTACGTTGAATGCGGCTAAAGTCCCATAGGCGAATTTACTCGGTAGTTTAAAATTTTGCTCTAATGAGCGAGCGAGTGAAAGAATACTAGTGGTTTCAGTAAATGTAGCTCCTAAAAATACATAAGCGTAAATTCGTGTAAATAAAACGCTGGCATGGTAGAGACTATGACCTGGAGTAAAGTAATAGATCGTAATAAAGACTACTAAAGCTGCAAATAATGGGATTAAAAAGAGCAAGAGTAGTTTTTTAGGGCTGAGATGCTTAAACGTGAGATAGCCAAGACTAGCTATAATAATTATTATATTTCCAACCAAATTAGGAATAAGCGATATTTCAAGTGAAATAATTAAAATCAAAAATAGTTTCAATCCGGGATTCATGTTGTTTACCTTTTTAATTAGTGTAGAAAAGTTTCTGATCTTTGATAGTTAAGTGATAGTCACAGAGATGCTGAACTTGTGCAAGTTGGTGACTAATGACAAGCAAGGTGTGGTTAGAATTAAGAAAGTACTTTTTAAGCAAATAAATGACCTCTGAGACGCTTTCTTGATCTAAACCACTAAATGGTTCATCTAGAAGTAAAACCTCGGGATAAGCCATTAACATGACAAGAATTTGCAATTTTCTTTTTTGTCCTCCAGATAGAGAATAAACAACTTGTTCGCCTAAATTTTCTAAGTTGAGTTCAGTCAGCAGGTGCTTGAGCTCAGTTTTATTCAAGGAAGGGTTTTGATTGTGTTTTAGGCTAAAGTCTAATTCTTCCTTTACTGTAACATTTAAGAATTGGTCATCTGGATTTTGAAAAAATTGTCCTACTTTTGCTAGATAGGGATGCCGGCGCCATTTCTTAATGTCTTTACTTTTAAATATTAATTGTCCTTGATACGGAATAATTTTGGTTAGAGCTTTAAATAAAGATGTCTTACCGCTTCCGTTTTCGCCAGTTAATAAAGTGGCTCTTCCGGAGTAGATTTTTAGATCAGTTGGAGAAATTAGTTTTCTTTCTGGAAAAGATAATGAAAATGATTCAAGTGAAAATACTGGATTTTGTTTTAAGTCAGGAAGAGAGAAAGTTAATGCTTGATTTTTTAGCGTGTTTTCAGGCAAAGTAATAGTTTTAATATTTTTGTCTTTGATAGCCAAGACCGTGGAAACTAGCTTTTCATAGCCAGAGAAATTATGGTCGCTAATTAAAATTGTTTTTCCTTCTTTGTGAAGTGCAGCTAAGCAAGTTAGTAGAAACTTCCGATTATGAGAGTCGCAATTAGCAAAAGGTTCATCTAATAAAAATAAGTCGCTTTTCATGGCAATCAGGATAGCTAAGGCAACGCGCTGCTTTTCACCGCCTGAAAGCTGAAGAAAAGGTTGGTCTAATAAAGATGAAATATTAGTTCTAGCACTTGCGTAATTAATACGCTTTAAAGCAGCAGTACGATCAACTAGTTCATTTTCTAAGGCAAAAATTAGTTCTTCTCGAGGAGTCGCCATCGTAAATTGTCGGTTAGGGTCTTGAAATACCATGCCCCAATTAGTGAAAGGCTGTTTTAAAGAACCAGCAGTAATTTTGCCATTGAAAGTTGGGTCAAGGCCGGAGATGAGCTTTAACAGAGTTGATTTGCCACTACCGGTTTGACCAGTCAATAGAACAAAATCACCTTGATTTATGGTTAAATTTAAATCTTTAAAAATTGGCTGATTTTGATATTGAAAAGATAGCTGATTAATTTTAATTTGTTTGGACGACATGGGCTTTTACCAAGAGGTTAATGATCAATTTAACTAAAACGCAGGTAAACAAAAGCATTGAAAGCCAACGAACTACGAAGTAGAAAATCATGTTAAAAGTTGAAAAATAGCTATAGCCATTCTTAATGAAGTCATACAGGTAGGTAACGATTGTTGTCGTTGTAGCAGATAAGAAAAGGGTGAACCAGTTGTAGCGACGGTAGCTAGTAAGAGTAAAACCTAATTCGCTACCTAAGCCTTGAACAGCTCCAGAGATCAGATTTACGGCTCCCCATTGTTCTCCAGCGATAAGTTCAACGCAAGAACCTAAAAATTCTCCTAGAAATGCAGCCCCAGGTAGTCGAACTAAGAATCCCGCTAAAGGACCTGCCATACACCAGATTCCCATCATAATATCATTGGCTAACATTCCATAACCAACCGGTGTAAGGAGCGCAGTTAAGACATTATAGAGGCCGTCACTAACTAAGTAAATAAAGCCAAAAATAATAGCAATTAAAGCAATTAGGATAATATCTCGAACATGTAATTTTTTCATTTTTTCCTCCCGAAATAAAAAAGCCACCTCAAAGAGGTGACTAAATAAAAGTCTAAAAATAAATCATTTTCCCTACGCTGATGTTAATCAAACAGGTTCAATGGGTATATTCTCAGCGATCTGGCACCCCAGTTATATTTCTAGGTTTAGCTTACGCTTAATAAGCCGAAATGACAATTAAAAATAGCAAAAAGGTTATAATTAAAATAGAGTGAAAGCAAAGGGGACCGGTTGAGATGGCAACAGCGACATTAAAAAATTATTTATCAAGTTTAATGCAGAATACGCAAGGTACTGAAATAGAGATCCGTACTGAAAATCTTCTTTTGCCATATAGTCTTTATTTAGAAACAGTGCATTATGATTTTACTGAATATCAATCTAGTACTAGTCAATTAATTTACTGCTTTGCGGGTAAGTGTGAGGTTGAAATTAATAATAAAACTTTTTATTTGACGGCAGGAAATATTTTGCTAATTGAAAAAGACACTGACTATATAATTAAAGTTAGTGATAAAAACGCAATTATTGTTAAGTTTAAGCTAAAGAGTAATTTTTCATGGCAAAAACAAGTAGAACAGTTAGATGCCAATACACCCACCGAACAAAGATTGAGCACATTATTTTTAAAAAAGTTAAATCAAGATAGTGCTTTTTTATTTACAACTACCTCTGTAACGTGGGGTAGTCAAAACTTGAAAGGTATAATTCAAGACTATCTTAATAATGTCGCTTTTAATGGAACAATTGGCTTAGAGCTACTTAAAATTATAATCTTACGCAATTTGCGCGAACAAAACTTTAAAGCAAATGAAGTAAAAGAAAGTACATTTAAAGATGAAGCGTTAGATCAATACATCGACCAACACTATAATGATATTAGTTTAGCGCAAGCTGCCAAATATTTTGGCTTTAATCGTAATTATTTTTCTACAATGGTAAAAGAAAAAACGGGAAAGAGCTTTGTTGAGCATGTCGATGAAAGAAGGATGAAAGAAGCTAGAAGGTTATTGGCCAAGCCAAATGTTTCATTAAAAGAAATAATTGAAACTATTGGATATTCAAGCAAGTCGTTTTTCTATAAGAAATTTAAGCATTATTATGGCATGACTCCAGCAGAAATGAGAAAGAGACTATTTAGAGAGGCTCATATCAATTTGAAATAATAGAGTACACAAAAAGGGATGCCTTGTGCATCCCTTTTATTACATGTTTTCCAATCTCTTAATTCTGTCTGCTGTAGGCGGATGAGTATCAAATAATGAGCTCAAGCCACGCTTATGAAACGGATTTTCGATGTATAAACCAGCGCTTGACGGATCTGGATCTTTCATTGGCTCGCTATTTGAAATCTTTTCTAAAGCTGAAATTAACCCTTGAGGGTTTCTAGTAAGCTCAACTGAACTAGCATCAGCTAAATATTCACGATTACGTGAGAGAGCCATCTGAGCTAGTGCAGCAGCAAGAGGTCCTAAAATTAATGTGAAAACAATGGCCACTATTTTAAAAATTGTTTCTAGAGAGCTAGAGTCATCGTCATCACGGTCAGAATTGCCGCCCCACCACCAAATACGGGACGCAAAACTAGAGATAAATGAAATTACAGCAGCTAAAGCTACTCCAATTGTAGATACTAAAATGTCATAATTTCGAATATGAGAAATTTCATGGCCAAGAACACCCTCTAATTCACTTCGATTAAGTCGCTTTCTTAATCCAGTAGTAACAGCTACGAAACTGTGCTTAGGATCGCGCCCAGTTGCAAAGGCATTAGGACTTTCATCGTTGATAATAAAGACTCTTGGCATTGGAACCTGTCCCGCAAGTGCCATATCTTCTACAATATGCCACAATTCTGGATCATCTTCTTCATGAATCTCTCGGCCATGATTCATGCTCATGACTAAATTTCCCGGATTTTGAAGAACAATAAAAAGATAAATTAAGCTACCGATTAGGGCAATTACAATTCCTGATAGCGGTTCACCGTTAATCAAATATCCTAACCCACCACCAACAAGAGCTAGAATAATAACGAATATAACAAGCAAAAATGCTGTTTTACGCTTATTGCGTGCTATTTGTTGATAGAGCATTGATAGACCTAGAATTTAACTTTAGGCACAGCTTTTTCTTCAGTTGGTGTTTGAAGGTAGTCCATATTCTTAAAGCCGTGAATCTTAGCGACTAAGTTAGATGGGAAAGTTAATAGTTTCTCGTTAAACATAGCAACAGTTGAATTGTATAACTGACGTGAATATGCAATCTTATTTTCAGTGTTTGTAAGTTCTTCTTGAAGTGACATGAAGTTTTGATTTGCTTTTAAGTCCGGGTAATTTTCAGCTAGAGCAAAAATACTCCTCAACGAATCAGTAATTTGGTTAGAAAGTTTAATAGCTTCTTCGTGATCGGAATTAGGAATATTTACTAATTGATTTCTTAGAGCAACTACTTTTTCTAAAGTGCCGCTTTCATGTTTGGCATAACCCTTAACTGTTTCAACTAAGTTAGGAATTAGGTCGTTACGACGTTTTAATTGGACATCAATTTGACTCCACGCCTCGTCAGCATGAACTTTAGCTTTTTGCAAGCCATTGTAGATACCAATATAAACGGCCACCAGTAAAATTAAAATGATAATAATAATCCATGTTAAAGTCATAGAAATAAATCCTTTCTATTATCATAAAAATATGGTTAGAATGTTAGGAATATTGTACCAAATAAAGAAAGCTTAGAATACCAAAAAAGCGCAAAATTGCGCTTTTTTTATGATATTTAAATAACAAAATCTAATACTATCCAACCTAGAACTGTCAAAATAATTAAACCTGCAAATTCTAGAACTGAAAATACCCAGAAAAACTTTCGTTTTGAAACAGTTCCGTTTTCAACAAAACTCTTGAAAACAAGCATATTGGCCATTGAACCAACGATTGAACCGAATCCTCCGATATTTGAGCCAAGGAATAAGGCTTCCGCAAAGTTAGTAAATTTACCAATTAAAATTGTTGAAGGAACGTTAGAAATGAATTGACTGGAAATAATAGAAGTCAAGAATACAGAATGTTCGGAGAACATTGTCTTAGAAATTAAAGTAACAATAAATGGAATTTGCTGGATATCGCTAATAAAAATGAAAAAGCATGTGAAAGTTAGAAGTAGAGCATAATCCACATGTAACATAATAGAAGGATTAATAATAAGAGCAAGGATCACTGCAACAATTGCAGGAACATATGCAGGTACAATATTGAAAACGCCAAAGAAGAAGAAAATTGAAACAGCAATCGTTAAAAGCATTGGTCGAATGCTGATTCTAATGTCTTCTAAAGGCACAGTAGGGATTGGCTTATCTTTAACAAAAAAGATAAACACTAAAACAATAATTAAACTAACTAGTAATAATGGAATTGACCAACTAAAAAACTTAATTGGTGAAACATTATAACGGTTAACAACAAAGATGTTATGAGGATTCCCCCATGGCGTAAAAGCAGCGCCAATATTTGCGCCCATCCCAATTAAGGTGACAGGCAATATCTCAGGTAAGTGATGGCGTCTAGCAATCGTTAAATATAACGGTATAAGAGTTAAAACAGTTATATCATTAGTTAAAAACATCGCAGAGATAATTGACAAAATTGTGAAGATTGCAGTTAATCTTCGAGTACTCCTAGCGCTTGCAGTTAGTTTATAGGCCAAGACATCCAAAACATGTAAGTAAGAAAAAATTTGGATGATTGTAAGCATTGCTAGAATTGAGTAAAGTGTATGGAAGTTAATATCTTCAATACGCGGTCTTGCGAAGAATAAGCTTATGATCGTTATGACAACCGTAATTTGTAAAATACGGTCTTTAGCAATATTTTTGATGACGGTCATTAAGGTCCCCTCTTAAAATTAATTCAACACTACTATTTTGAACTCTTTACGGCAAGTTCGCAACGCTTTTTTCGCATTAGTCCTTCCTATTAGTAAGATTTTGAGCATTTTATTAGGAAAAACGGTAATTAACCTGCTATATTTAAGTTAGTAAAAGAGGACGGAGGAAAATAAATATGGTTGATTCCAAGAATCTTGAATTAAAAATTAAAGATAGTGCTAAAGAAGTTTTAGAAAAGAAAATTAAGCCAGGACAAGTCGTTTTGCTGGCTTTAAATGATGGTTCAAATGGCTATTCTAAATTAGGTGGAACTTGTACAATCGGTGCTAATTTTCAACTAGTGTTGCTAGATCATAAGGATCCAGAATTTTCAATTAAGGTTAACAACAACATGGGTTTAGATATGTATACATCTGATAAAGAATTAGCCTTTTTAGACGATGGTTTAGTTCTTAATGCTCGTAATGCTACTCTATCCTTATCTTCTAATGAAGGAATCATCGATGGCGGAGTTACTATTTCTGAATTCAAAGGAGAAAAACTTTCTGCTGATGAGATGAAAAAATTAGGTGGAAAGATCTGCTAGGTATTGAAGTGACGTGCTTCTTCGTGGTATGATAATAAAAGATTTTCGAAAATTTTATTTGAGAGGAAGTATAGATCTATGCGTAAAGGTGAAAACTACAACACTGGTGTTACACCAAACTTAAGACCTAAGAACAAGAAGAACTCTAAGGCTCGTGTTAAGAGAGCTGCTGAAGTGGTTGCTTTCTTAAACAAAGCTGCTAAAGATGAAAACAAGTAATTAAATTTACTGCATAAGATAAAGAGAGACGCTGGTTAGCGTCTCTCTTTTTTTGCATGTAAAAGTTATATAAATTCTAATTTATTTAATAACATTTTGTTGTAAATGAATTTATATTATTATATAAAAAGCCTTATAAAATATGAAAAATTGAACATAGGATAGAATTTTAGATGAATAACATTTCGAGAAAAGGGCTAGAAGTAAAGGCAGTTTTTGCCCTTGTTTTACCCTCTCTTTTTTTAGGACTAGCAAGTAACTTTTCCCGCAATATTTTTTTAGGAGTTATGCTGCTCTTAATTTTGGGAGCAGTCGTCGGCTTGAGCATACCAACTATTGTTAGTACCTGGCTTATTTTGATTTTGACTGTACTAGGGATTAGCTCTCTAACTCTTGGATATGCGGCGATAGATTTTTATGGAAAGACCCTGCTGCTTATTTCTTTTCCCTTAGAAACATATTTAACTAGTCAAATTAAAGAGTGCGTTTTTAGATGGCAAATATTTAAGAAAAATCAGTCTAGTGCTTACCGTTATCTTAAGCACTACGATCAAAATGTAAAATTACAAACGACATATAATGCAGAGAAGTTATATAAAAAGATTAAGAGAATTTTAACGGAGAAAAATTATTTACCATTGTGGTGTGATTTTACGATTATTGATTGGGAACATGATCAGCAGTTTGCGCAATTTAATTTAGAAGATCATGACAAAATTTTGCAGCAAATAGCAAGAGTACTTAAAACAAGTAGATTAGTTGATGAAAATTTATATTACCTGGGAAATGGTCAGTTTCTAATTATCTCTAATACGATCGCACCAGGTACATTAATGGTTTTAAATGATGAACTAAAAGCTGAGTTAAAAAGGCTACAGTATGGGGAATATCAGCCTGCTTTTAAAATGGCAACGCAGCACATTACAGAACAAGATTTTCTTGAGTATCCTAATTTTAATGCAATTCTTAAACGTCTAAAAAGAAAGCTTGAAACGGACTTAGTTGTTGAATATTTAAAGGGAGTAGAAGCATGAGTAATTGGTTCTTTTTCTTGGTAATTTTGGTTACTTTAACATTTCTAGTGGCATTTTTTGCTATGTTTCGTAGCCTCTGGACTATTTATCATCAAACTGAGCATACAGTGCGTCATTTAGGAAGAAAGCATAATGTTAAATGATTTTTTAGCTGTTTCAACTTTAGTTTCAATTTGGTTTTCGCTTTTAGCTTCGTTAGTTACTCTTTATGGTGCAACTCGTTTTTGGCTTAAGCATAGTAAAAAAGTCATTTCAATTACGCCACTAAAGCATTACCCCGCAATTACGATTGTTGTTCCAGCACATAATGAAGAAGTGGTAATTGCAAATACCACTAAGGGAATTCTTAACTTGAATTATCCTGCATCAAAGGTCGAGCTACTCTTATATGCCGATAATTGCCAAGATAAAACTGCAGCTGAAATGCGTAAGACGGTAGATTTACCTCAATATAAATACAGAAATGTACAAATAATTGAACGGCGTGGCTCAGGCGGTAAAGCAGGTGTCTTAAACGATGCCTTAAAGATAGCGCAGGGAGAATATATTTGCGTTTATGATGCAGATGCATTGCCCGAGAAAAATGCTTTATATTTTTTAGTGAAAAAAGTTTTAGAGAATCCCCAACGATATATGGCAACTTTTGGCAGAAATAAAACACGGAATGCGAAGCAAAATTTTTTAACTAAATGCATTAACCAAGAAGTTATAGTGTCTCAAAGACTTCAACATGTAGGTGTTTGGAATTTATTTAAAATTGGGCGAATTCCAGGAACCAATTTCATCATCAATACTGAATATGTAAAAAAGATTGGTGGTTGGCAAAGTGGCGCCTTGACTGAAGATACCGAAATTTCTTTTCGCATTATGGAAGATGGCTATTTGATTGCTCTAGCGTATAATTCAGAGGCTTTTGAACAGGAACCTGAGCATTTACGTGATTATTATTATCAACGATTACGTTGGGCTAAAGGAAATTATCAAGTAGTGATGAATAACTTTAAACACTTATTTGATACTAGTAACTGGCGTGTAAAGCTAGAGACTTTTTATTTAGCATGTACTTTCTTTTGGTTTAATTTAGCTGTTGTTCTTTCAGACATTATCTTTTTTGTGGATTTGGGATGTATAATTACAAGATTTTTCAATCCAGAAATCCCAATTATCTTTGCAATGAATTCTAATATTCTCTTAATGCAGCTACTATTAATTAACTGGCTATTAATGATCTTGCTCTATGTAATTCAAATAAATCTCGCTTTAGCTACTCAATATGGTCAAGCAACTAGTGATCAAATCTGGCTAGCTTTAGTGTCATATTTTACTTATTCGCAACTCTTCATTGCTATTTCTCTGCAAGCAGTCTGCTCAGTAATTGGAGATAAGGTGTTTAAACGAGATGGTACTAAGTGGGTGAAAACCAAAAGATTCGCAGATTAGGAATAAGAGATGAAAGTAAAGAATTTAATACTGATTTTAGGAACTATCATTGCTTATTTAGGAATCATCGCCTATGTGAGGATGAGCAATGATCGAACGCTAGAAGAGCGATGTTATCATTCGTGGCGTGAAGATTATATAAAAAATAAAAATCAAAATGAGCAATATGTGAATGCAGCAGGAAGAAATAATCCTTCATTTGCCTTATCTGAAGCACAAGGTTATGGAATGTTACTGGCTGCTAAAGCGGGAGAGAAACATTTAGGAAGTCGGATTGATTTTCAAAAACTAGATAATTATTACCTGGCTCATCGCTTAACAAATTCAAATTTGATGAGCTGGAAACAGAAAGATAGAAAAAATATTTGGCGTGATAATCCTGTGAGTGCAAGTGATGGTGACATAATAATTGCGCAAGCTTTGCTGCATGCTGATAGAGTTTGGCCAGGACATGGTTATAAGTCACAAGCTGTTAACTTAATTAATGATATTAAAAGGCTTGAAATCAATCAGAAAGCTAAAATGGTAACTGTCGGAAACTGGGCTAATAAAGATTCACGCTTTTATAATGTACTGAGAACATCAGATGTAATGCCAAAAGCGTTTGAAGAATTTTATCAAGTAACTGGGGATCAAGCTTGGCTAACAATTAAAAGAAAAATGCTTAACTATTTGCAAAAGTTAAGCAAGAAGTACAATACTGGTTTAGTTCCTGATTTTGCCTGGATTTCTAATAATTATGTTAGGCCCGCACAAGCTAATGAAGTGGCGATTAATGATGATGGTCACTACAGTGCAAATGCTTGCCGAGTTCCAATGTTGTTAGCTGAAAGCAATGATAAAGAAGCAGCTCAAGTAGTAAAGAAGATGCTAAAATTTTTTAAGAAAAAAGGGACTTCAGCTGGTTTTACCTTAAAAGGTGAAAAATTACATCATTATCAATCAGCGAGCTTTAGTGCGCCAATCTTTGTAGCGGCTAGTAAGTATCGTAATCAGGGTTATGATACTTTGATTGAACAAGAAAAATATATCTTTTCAAGACCACTTCCGAAAGATAACTATTACGATGCTACTTTAACGGTTTTAGCTGCTTTAAATACAAATGAGTTGACGGGATTAGAAAAATAGAGAAAGATTAAGTTGTTGAATCTTTCTCTATTTTTGATTTGCAAGAGTAGAACGGTTTTGCTATGATAGTAGAAATTTTAGACATCTTTATATTTCTGGAAGGGGTAAGTTTTTATGGGATTAAATGCATATATTCAAGGTTTTAATAGTTTAGAGTCAATCGATCGTGCACCAGGTTATTTTAAATATCAGCATCATTCTGTCGCTGATCATAGTTTTAGAACAGCTGAACTTGCTCAAATGATGGGAGATATTGAAGAAGTTGTTGGTAAACAAAAAATAAATTGGAAAGCCCTCTATGAAAAGAGTCTTAATCATGATTATACCGAGCGTTTTATTGGTGACATTAAGACTCCAGTTAAATATGCAACTCCACAATTGCGGAAGATGATTGGAGACGTTGAAGAAACAATGACTGCCAAATTTATTAAGGACGAAATTCCAAAAGAATTTCAAAAAATTTATACTAAGCGTTTGTCTGAAGGGAAAGATGATACTTTAGAAGGTAAAATCTTATCTATTTGTGATAAGTTAGATTTACTTTATGAAGCTTATGGTGAAATTGAATTAGGAAATCCAAATCCAGTTTTCATGCAAATGTTTAAAGAAAGCCTTGAAACGATTAAAAAATTTGACGACTTAACTTGCGTTCAATACTTTATTAAGAAGATTTTGCCAGACTTATTTAAAGGTGACTTTGCTGGTAAAGATAAGATGCAAAGAATTGCTTTTAGTATTTTACTAATGGGGGAAGAATAGTTTGAAATTTCAGTGCGCATCCTGTGGGTTGCGACTTGACAGCCTTCATTTTAAGCAAGAAGGATTAATGAATCCCAAGTTAACAAGTATTTGTGATATTTGCCTAACACGCAATCTAAATGCAGAAGATTATGAAAATCCAGTAATTGAGAATATTTCCCAGGTAATGCTCTATGGCTTTGTAGGAAAGAAAAATACGGGTAAGCTTAATCTAGAGTCTTTAAATCAATATCTTGTAGGAAGCGATCATAGACGCCAATATGAATCTTTTATTCAGGATTATGATGGTAATGAAGTAGCTCTCCAGCAATTGTCACGATCTGAATTTAATCAGGCAATTATTAAGAGCGAAGATAAAGAAATAAATTATATTCCTAATAGCAATGTTGATTTTGCGGTTAATATGAAAAACATGGGAATTGAAGTTGACTTTTCTTTAAATGAAGTCGATTTTGTTGACCGTGAAAGAATTCGTCATAAATATAATTATCGCTGCCAATATTGCGGTAGACGCGGGACGAGCGTAGATCACAAAGATCCAGTTTCTTTATCGCATGATAATTCTTTTGATAATCTGATTTTGTCATGTAGCGAGTGTAATCGAATTAAATCTAATATGCCTTATAATTTGTTTACTAAGCTGAATAATCAACTTACTGTTGTAAATAGAAAATTGGTTAAGTACGAAGATACTCTTGCTAATTTAAAAGAAGAATTTCAGCAGGCAAAACGTGATTTGGCTGGACAAGTTCATTTAAAAGGGGTAGTTAATGACCCTGAATTAAATGCGATGAGAAAGCAAAATAAAAAGCTTCAGGATGCAATTGATAGTTTGCAGAGCGATTATGATGCTTTAAGGAAATTGCGTAAGACATATTTTGAAACTGGTTGGAAACTAGCACAAGAAAAAGAAAATAGTGAGATTATATAGTAAAAAAATGCATTGGACATAAAAATCCAATGCATTTTTTGCTATCTTAAAGTTCATTACTTCTAAAGTTACGTAAGAATTTCTTAGTCTTCTCTTCTTGAGGATCATTAAAGATCTCTTGTGGTGTACCTTGTTCAGTGATAACTCCATCACTCATAAAGATAACTTGATCAGAAACGTCACGAGCAAATCCCATTTCGTGAGTAACAATGACCATTGTTAAGCCAGTTTTAGCTAGTAACTGCATAGTATTAAGTACTTCACCAACCATTTCAGGATCTAGCGCACTAGTTGGTTCATCGAAAAGTAAGATTTCAGGATCCATCGAAATTGCCCGCGCAATAGCTACACGCTGCTGTTGTCCACCAGATAATTGTTGAGGCTTTGCAGTTAAGAAAGGTTCCATGCCAACTTTCTTTAAGTTCTCAATTGCAACTTTTTTGGCTTCTTCTTTAGAGCGTCCAAGAACCATTTCTTGACCAATCATACAGTTTTGAAGCACGTTTTTATTATTAAATAAATTAAATTGCTGGAACACCATACCAACCTTTGATCGGAATTTATTGCGATCAAAATGAGAATCTAAAATATTGTTACCATGGAATAAGATCTTACCATTGCTTGGTTCTTCAAGAAGGTTAATACATCTAAGCATGGTCGATTTACCACCACCAGATGGCCCAATAATGGTCATTACTTCACCTTTATTAATATCAAAAGAAATATCTTTTAATACTGTGTGGTCGCCGTATTTCTTTTGTAAATGTTGTACTTGTAAAATGTTTTCTTCGTTATTATTGTTCATGTTCAAATCCAGTCTCCTTTGGATCTTCTACTTGTAATTGATTAGCCATTAAGTTGTAGTTCTTTGGACCTTCAAGCTTCTTTTCAATGAAATTAAAGATTCTAGTAATTGAGAATGTCAAAATTAAGTAGATCATTGAAATAATGAAGTAAGTTTGGAAGAATTGGAAGGTTTGACTGGCAACAGTTGTACCAACAAAGAACAATTCTGATACAGAGATAATACTTAATACTGAAGTATCCTTAATGTTAACAATAAATTCGTTAGTAATTGATGGCAAACAGTTTCTAATTGCTTGTGGTAAAATGATATGCCACATTTGTTGTGAGTGAGTCATACCGATAGCAGAAGCTGCTTCAAATTGACCTTTAGGGGTAGCGTTAATACCACCACGAATAACCTCGGCTAAGTAAGCACCAGTATTAATTGAAACAATTACTAGAGCAGCTACTGTACGGTTTAAGTTAAGGTGCCATAATTGGGCAATTCCGTAGTAGATAACAGCTGCTTGAACCATCATAGGAGTTCCACGGAACACTTCAATATAAACTGCTAAAATCCAATCAACAATCTTTAAGCCCCACTTCTTACCAGTAGACTTAGGAGTTGGGATAGTTCTAACAATACCCACAAGTAAACCAATGAAGAAACCAACAATAGTACCAACTAAAGCTAAAAGAAGAGTCATTCCAACACCGCCAAGGATCATTGAACCATAGCGATGCATAATTGACATAAACCAGTTTTCTTTCTTGCTGCCACCTGCTTGAGGCTGTTCCTTTACAGCATCAGCCATTAATTGGTCGCGCTTTTTCTTAGAAATACCATTTAAAATGCTATTAACTTGGTTAAGTAATTCAGTGTTGCCTTTCTTAACACCAATTGATGTAATTGAATCATCGTGGTCAACTTTGAATCCTTGCATCTTGTTTAAGTTAACGGCAACGATGTTAGGGTTAACTGCCTTGTAACTTTGGTATTCAATATCTTCTGCTACATAACCATCAATAGTACCTGATTGTAAGCTTTGACGCATTGCAGAAAAGCTTCTCATTGCTGGTTCACGCTTAGCGCCATGTAATTGCTTAATTAGATCATAGTGCAATGTACCTTGTTGAGCAGTTAATTTTGCGCCTTTAAAATCATTTAAGCCTTTTGCTTGAGCAAATTTTCCAGTCTTGCTTGTAATAACTACGAAAGTACTCTTTCTGTATGGTACAGAGAAGTTGATTGCCTTTCTACGTTCAGCAGTAGGAGACATACCAGCAATAATTAAATCAATTTTGCCAGAAGTTAAAGCTGGTAATAGACCATCCCATTCGGTCTTTTCAACGACAACTTTACGATGAAGCTTTTTACCGATAATTTTTGCGATTTGTACATCATATCCATTTGCATATTGCTTAGATCCATCAATTGGAACTGCACCATTTGCATCGGTTGTTTGAGTCCAATTGTATGGTGGGTAATTAGCTTCCATACCTATTTTTAGGGGTGCTTCTTTTTTAGCCGCTTCGGTTTGACTAGAATTAGAGCTAAAACCGATAACTAAACTTAAAATTAGGGCTAATAAAGCACTAAACCATCTTATTTTTGACTTCACTTTGAAATACCTCCAAAAATTATTTCAAATAAAGCCAATACAATAAAAGCGTAAATAAAAAGCCTCGCTGTTTACCAAAAACAACGAGGTTTGAAAATCATCAGTAATTAAACCAAATCATATAGCGCTCCCTGGGGACTACCAGGACAGTATGTAAGATATGCTCTCACATCCCAACAAGCTTAATTCGCGAAAGTAAGCTTGTTTCGGCGATAATCCTAAAAACTACTGTCAATGTATTCGCGTACTCAAGTAGATTTTGTGGTTATCGCAACCTCTATTGCATTGGGGATTTATTTAACTGATGAAAAGATTAAACGATTTTTAATATTTTGTCAAATAAATCCGTGATTTTTTTTGACATCTTTTTTCTTAATATCTGAAACATAGGGATTACCAGCAACTATAAATCTTAGCTTTTTCTTGGCCCATTTAGGATCTGACTGATTGATCCCAACTCTTGGAAGAGCAGTAATTTTTTCAATTTTTCGTTTATGATTAATATCAATATCAAAAGGAGAATCTTCTAAACGAGCTAAGTCCCATTTTCTACTAGTAATTCCAAAAGCCTGCATCATCTTTCCAGGGCCATTAGTTAAAAGTGGACCAGTTTTGCCGTTGCGATTTTTAATCATGGTATCAATTCCGGTCAGTGGGTCAATTGCCCTAATTAAAACTCCTTGTGGTTCACCTTCTGCTTGGCACGCAACATCAAAGAAGAAGTATTGTCTCTGAGAATAGATATACAAACTGCCACCAGGACGATATAAGCCTTCATTTGCCTGACTGCGTCTGCCACCATAAGAATGGGCAGCCCGGTCTTTTACACCAACATAGGCTTCTGCTTCAACTATTGTGCCTGATAAAAGTTCATTTCCATTGTTGAAAGATAAAGTTCTACCCAACAGATCTTTACTAATTTCACTAGTAGACCGATTAGTAAAAAATTCTTCATAGTTCATATTACTACTTCTTTCTAAAAATGTTGTACACTCGTACTTGAAAGTAAATATAAATTGTAGGGGGAATCAAAAGATGAATGAAATAAAAGTTGTTAGAATTTATGACCATGAGCAACCCGCAGGTTATAGAATCTTAGTAGATCGCTTATGGCCTCGAGGAATGAGCAAGGTAAAAGCTCATTTAGATGAATGGGATAAGGAAATTGGGCCAACTAATGAATTAAGAAAATGGTTTAATCATGAAGATGACAAGTTTCCTGAATTCAAGACTAAGTATATAGCGCAATTGAAAGCTAATCCAGTTACAACCGAGTTTGTTAAAAATGTAAAAGAAAAGCTAGCTCAAGAAGACGTTATCTTCTTATATGGTGCTAAAAATAAAAAGCACAACCAGGCAGTTGTGCTTAAAGATTTTATTGATAGTCAATTAGGTTAAGGCATTTTTTTGCCATGCAAGTTTTGTCGAACGTAATGAAAAATAATTGTTAAAATAACTGTACCAAAAACGATGCCGGCAAAGATGGCATCGGGCAGTTTGAAGTTTAGCGGTGGCAATGAAACAAGTAGCTTAACTGCAATGATTGTGATTAAAACATAGGCCATTGTTTGAAGTTCAGGAATAATTTCCATTAATTTGATAATAACTTCAGCAACTCCTCGCATGCATAGAATACCAATCATTCCGCCAACTAAGACGACAACTGGATTATTTGACATGGCTAAGGCTGCTAAAACAGAGTCAATTGAAAAGACGATATCCATTGACTCAATTGAAATAACAGTTCTCCAAAATAATGAAAGATGGTGCTTTCTTGTCTTGGAGCTCTTTTTCTTATGGGCTTCTTTTTTTGCCGCTTCTTTCTTAGCAACTTGTGCAGGGTGGCGAAGATCATAAAAATACTTATAAACTAAGTAAAGTAAATATAAACCGCCTAACAGTTTGATTTCCCAGAAATTAATTAGGTAAGTACCAATCCCAATTACAATAAAGCGAAATAGATAAGCCCCCCAAAGTCCGTAAACTAACGATTTCCTTTGCTCATCTTTAGTTGGTAAAACTTGCGTTTGTGCGGCTAAAACCACCGCATTATCAACTGATAGCAAACACTCCATTAAAATTAGAGTCAGAATCATCATCCAATCTTGTCCGCTAGTTAGGACATGAGCCCAGTTATTAGCGTCAAAAAAGGGTTGATAAAGTTTAATCAATTGCATTCAAATAGCCTTCTTCAAGTTTTATTAAGCGTTGCTATTCTTTCTCTCCCATTCTATAACTTCCAACCTTATCTGACGCTAAAATTACGCGATGACTTGGAATAAAAATTAGGGATGGATTTAGGGCTACCGCATGTGCAACTGTTCGAACCGAAGTTGAGCCATTAATAGCAGAAGCTAAATCATTGTAAGAAATGGTTGTTCCATAAGGAATATTTTCAATCATTTTAACAACTTGATTTTGAAATGGAGTACCAAAGCTTGAGTAGTCGATAGGAACGGTAAAGGAGCGACGCGTGACTGCAAAATATTCTTTTAATTGTTGAACATAAGGAGCCAATTTTTTGGAATCTTGGACCAGAATGTGGTTAGGATAAAATCCTAAAATAGAAGATTCAATATTACTATTCTTGGAACCGACGAATGCTAATCCCAAGTTACTAATGACTAATTCATAGGTCCATGGCTTAATTTCAACAAAATCGTAATAAAAAACTTTTGCTTGGGCAAGTGGCATTTAATCCTAACCTTTCTTTTAATTAATATATTAACTCCATCTTATATTGTACAACAAAAAATAACGAAACTGTCAAATTTTTTGTGTAAATAGAT +>5_3#NODE_14_length_39993_cov_63.1132_ID_27 +ATTTCGGTACGGCCTAAACATGGCAAAATAAGCGCATCTTTACCACACACCAAGTGGCTACGATTTAACTTGGTTGCCACATGTACGGTTAAATTACATTTTCTAAGCGCTTCTTGTGTATAAGCGGTGTCAGGTGTTGCAACGGCAAAGTTTCCGCCCAAACCAATAAAGACTTTGATATCACCTTCGGCCATGGCTTTAATGGTTTCAACTACACCATATCCATTTTTTCTTGGTGATTTAATACCGAACACACGGTCAATATTGTCGAGAAGTTTCGGGCTAGGGAGTTCATTAATGCCCATAGTGCGGTCGCCTTGCACGTTACTGTGCCCGCGAACCGGACAAAGCCCTGCACCCGGTCTACCAATTTGACCACGCGCCAACAATAAATTTGCCAGCATATGTACGTTAGCAGTTCCGTGACGATGCTGGGTAATCCCCATACCCCAACAGAAAATTGAACGCTCTGACTCTAAAAAGAGTTTAGCCAGTTTTTCTAAATGTTCAGGTGACAGACCTGAGTGCTTATAAAGATCTGCCCACTCCGTTCTTTCGACCTCAGCAATCATTTCGTCGAAACCAACGGTATTGACTGCAATAAAGTTTCGGTCAAAAACGCTTGGTTTACCTGAGGCAAGGGCCTTTTTATCCCACTCATGTAAGTGCTTTAAAATACCTAACATCAGTGCATAGTCGCCGCCAATTTTAGGCTGGAAATAATAGCGACTAATTGGCGTACTGCCATTGGTCATCATTTCAAGCGGTGCTTGCGGGTCTTGGAAACGTTCGAGACCTCGCTCTTTAATTGGGTTAATCGCAATAATGTTACCGCCGCGTTTAGACACTTCACGCAAAGTACCTAACATACGTGGATGGTTGGTGCCTGGGTTGTGCCCAAAACTAAAAATGGCGTCGGCTAAATCAAAATCTTCTAGGGTGACTGTACCCTTTCCTAAACCAATCGAGTCGAGCAAACCAACACTTGTGGTTTCATGGCACATGTTTGAGCAGTCGGGAAAGTTATTTGTACCAAAGCTTCGCACAAACAATTGATATAAAAATGCTGCTTCGTTACTGGCACGGCCCGAGGTATAAAATGCAGCTTGGTCTGGATGATCTAGCGCATGTAAATGTTGAGCGATGAGTTTAAAGGCATCGTCCCATGAAATAGGCACATATTTATCGGTTGTAGCATCATAACGAACTGGGTCGGTTAAGCGGCCTAGGTCTTCTAGGAAAAAGTCACTTTGTTCTGATAACCAACTGACGGTATGCCCTGCAAAATATTCGGGAGTGACGGTTTTACTGGTTGCTTCAAAAGCAACCGCCTTTGCACCATTTTCACAAAAGTTAAATGCGTGAGCGTCTTTCTTTTCTGGCCATGCACAGCCGGGACAGTCAAAACCTGTAGGCTGGTTAATGTTGAGTAAGGTAATTGAACCTTTACCTAAAATATCTTGTCGCTTTAAGTTACGAGCAACACTTAGCAGTGCACCCCACCCGCCAGCAGGATGAGTATATGGCTCAATTCGCGCAAAACCGTTATTTTCCTGTATGTGCTTTTGTTCTTCTGAATTATCCACTTTTTCATCCTCTTATCTTTTTTACACTCAACAGGCTCAGTGAAATCACAATTTAATTATGGTTTTATCAAAGCTACCCGCTGTTTACCGATGAGTAGAGTCCACTTTTTATATTATTGCCACTATGCTAAAAGCCGGCTCAACCGTTCTTATAATATGTGCCATATTCTTTACAATTACAATAATTCAAGCCAACCACGTTATTTATATAAAAACATATAAATCAAATAGTTAAATAGATATTAATTCTATCTTTTACAGATAAAAACATTATTGGGTTTCATTTTTAACCGACTGAAAGCTTTTTAAAATTGACACTTACACGACTTTATCTATGTTTTTTTCGCATTAAGCAGTATAAAACAATTAAAAATTTATCTGCATTAGAGCATTTATGAAGGTTGTGATCGCCCCTGATTCATTTAAAGATAGCTTATCTGCTTTAAAGGTAGCTCAAGCCATTGCCAAAGGATGGCAAGCGGTTTTTCCCGATGCAGAAACGATTTTGTGCCCCATGGCAGATGGCGGTGAAGGAACCATTGAAGCAGTTTTAGAAGTCTGCGATGGACAATGGCGTGTAAAAACAGTGGTTGGACCATTAGGTCAGCCTGTTCAGGCCAAATGGGGTTGGTTAGAAACACAAAAAATTGCCATGATTGAAATGGCTCAAGCGAGCGGCATACAGTTAGTTCCACCGTCTGAGCGTGATGCCTGCCACAGCACAACGTTTGGCACAGGTCAGCTCATTTTGGAGGCTTTAGACGCAGGCGCAAAAGACATTATTTTAACTGTGGGCGGTAGTGCGACCAATGATGGCGGCACAGGCTTACTTAGTGCTTTAGGTGCAGTTTTACTCGATGCTAACCAAAAGGTTTTACCCGCTGGCGGCCTTGCCCTTAGCCACCTATCCAAAATTGATTTAACACATTTCGACTCACGTATTCAGCACACACGGTTTTTACTAGCTGCCGATGTGACCAATCCACTTTGTGGCCCAAACGGCGCATCACATATTTTTGGCCCACAAAAAGGGGCCTCTCCTGCTCAAGTCCAGTTACTCGATGCAGCCCTTGCCCATTTTGCTGATGTCACGGCACAATTTTTAGGTTTCGACAAACGAGATGAAGCGGGTTCTGGGGCTGCTGGTGGTTTGGGCTTTGCCGCCAAAAGTTATTTAAATGCAGACTTTAAAGCTGGAGTTAAAGTTGTGGCTGAACTGAATCAGCTTGAGCATAAAATTTCTAACGCAGATTGGGTAATTACGGGTGAAGGTAAATTTGACCAACAAACCTTAAGCGGTAAAACCGTTTTTGGGGTAAGCCAAATTGCCAAAGCCCACAATGTTCCGGTCATTGTGATTGCAGGGACTTTAGGTGAAGACTACCAAGCACTTTATGAGCACGGCGTAAGCGCTGCTTTCTCACTAGCCAATGGCCCCATTACCCTAGAACACGCCTGCGAACATGCAGCAGAACTCATTTATGAGCGAACAGTCGATATAGCGAGATTAATTCAGTTTAGCCAAACCAGCTTAAATGCTAAAAAAGTCTAATCGGCTAATTTTAAAAGACTCTCTCTACAACTTTCCAGGATTTCATCGCTAAGGTTTGATTGGTCTGGGCATGCACGAGACAATGCAATTGCCCCTACTGACTGAGCAAGAATATTAATAGCTTGCTTTCTCAGCTCTTCTGTTTCTAAAGAAACTTGTTTTGCGCGTTCATTCATAATGAACTCAATAAGCTGCTCAATACCCTCTTCAAACTTGACTTTTACTTCGTCGGGCTGTCTAGCGGCATCACAAGATAGCGCTGTTAAGGTACAACCCTGATCTCGGTTGTCTCGATGTGTACGAGACACATACATATGAATAAATTGGGTTAAGTTTAAACCTTCTATTCTTTTTAAAACCTGCTCAAGACCATGTTTGGCAGTAATAGTAACAAGGTCTGTTTTCGAGCTGAAATGTTTATAAAACCCACCATGAGTAAAGCCTGCGCTAGACATTAATTCGGCAATGCCCACCCCGTCGTAGCCTTTACTACGAAACAGTTGAGTCGCCTTTTCAACAATTTTATCTCGGTTTTCTTTTACCTGAGTTTTACTGACTTTCATGTTGGTGCGCTCACTTTTCTTCGCCCTGCTCGTCATTATTATACATTGATTATGATTGACATCAAAACCTGTATATTTTAGATTATAATCATCATCTAAAAAATTAAAGCGTGGAGACATCACATGTCATCTCAATCAAAAGTACTTATTACAGGGGCTTCGTCAGGAATTGGTTCTGTTTATGCAGATCGATTTGCTCAACGTGGCTATCACTTAATCTTGGTGGCACGAGACACCCACCGTTTAGATAAAATTTCAAAAGATCTTCAAGAAAAATACGGTGTACAGGTCGAGTTCATTCAAGCCGATTTATCAAATGATCAAAACATTTGCAAAATTGAAAATGTACTCAAAAACGATGCTGATATTGAAATTTTGGTAAATAACGCTGGGATTGCCCTGAACGGAAACTTTTTAACTCAAGATCGTAATGAGATTGAACAACTTCTAACTTTAAATATGACTGCGGTAGTGCGGTTGTCTCATGCAATGTCGCAATCCCTTATACGCAAAGGTAAAGGCGCCATTATTAATTTAGGCTCTGTACTTGGCTTAGCGCCTGAATTTGGTTCTACCATTTATGGCGCAAGTAAATCTTTCATCCAATTTTTTAGCCAAGGCTTACATTTAGAGCTAAAAGATCATGGGGTACACGTTCAAGCTGTATTACCTTCTGCCACTAAAACCGAAATTTGGGAACGTTCAGGTATAGATTTAAGCCAAGTTCCGCCATTAATGGATGTCAATGATTTGGTCGATGCAGCCCTGATTGGTTTTGATCGTAAAGAAACAATTACAATTCCTGTGCTGAAAGATGAAAATCAATGGAATAATTTTGAAAAATCGCGTATGACATTATTACCAAACTTCTCTTCGGCTGAAGTAGCCCAACGTTATAAAAACTAACGACTAGGGCTACCCAACTCTCAAGTAGGAAAATCTTATGAATGTTATTGATGCTCTCCGCAAACGCCGTGCAGTTAAACGCTTCGACCCTGCTTTTCAACTCTCTGAAGATGACAAGAAGCAGTTATTACAAGAAGTTTTAGCAAATGCACCAAGCGCTTTTAACTTGCAGCATTGGCGTCCAGTCATTGTTGAAGATGCAGAGTTAAGACAAAAAATTCGCGCGATTGCTTGGGACCAACCACAAGTAACAGAGTCATCATTACTGATTGTGTTATGTGCCAAAGTAAATACATGGGAAGTTGACGCTAAACGTGTATGGGATGGAGCTTCTCCAGAAGTTCAAGATATTATGGTTGGTGCAATCGACCAGTATTACCGTGACCGCCCACAAACCCAACGTGACGAAGTGATGCGTAGTGCGGGTATCTTTGCTCAAACCCTTATGTTACTTGCTCAAGAGCATGGTTTAGATAGCTGTCCAATGGACGGTTTTGACTTTGATGCAATGGCAAAGCTCATTAACTTACCTGAAGATCACGTGGTGTGTTTAATGATTGCGGTTGGTAAATCAGCTTCTGAACCTTACCCGCGCGTAGGCAAACTCCCTTACGATGATGTCATTATTAAAAATACATTCTAAGTTGTAATTCAAATTTTAGGCTTTCATTCTATTTTTGTTTTTATACTAAATAGATGGAAGCCTTTAATTTTGCATTAATTATAAAGCTGACTTTGACTCGGCTATAAACTGTCTCAAAAAGCAATTTAATAATCTCTCCCTTTCATCCGCGTACTCTGCACTCAAAAGCTGAATGATTGTGCCTTCCACCATATATAAAAATAGTTTGGCATCGGTAAAAGAGACATCGGGTTTAAGCAATCTAAGCTGGCTATAAATCTCATTTTTTAGCCATGTTCTATAGCGCACTGCGGTTTGATAAGCGTTGGGATAGCTATTTTTAATTTCAAAAACGGCTTTAAATAATAAGTAATACGGCCCTTCTACATCGGTATGTAAGTCATAAAGCTTTTTGAGTTTATCGATGGCAGGAGTGCTTAAGTCATATTCGACCATTGCGACTACTTGTTCTTGGAGCTTTTCTTTTTGCACCATCAGGCAGATTTCAATAAGCCTTTCTTTAGAGTGAAAGTAATTATAAAAAGTCGCTTTTGTAATCTCTGATTCTTTCACTATTCTGTCGACCCCAATAAGGTGAAACCCACGGTGGTGAAACAGATCAATTGCTTTATTTACAACGTATAAGGCACGTGTTGGGAGAACTAAATTTGGCATAGTTTTACCGTTATAAATTCTTGTTGTTTGTAAATGAGATACTTTTTTGGGAGAAAAAAAGGCACAGCAAAGCCTATAAAGACTGTGCTTGGCACATCTCAAGTTTTATTGTTGCTAAGTTTTTGGTTGTGACGATTTAAAGCCTGAAAGCGCTAATGGCCCTCAAACTGCTTTAAAGCTTATGTGGTGTCGTTATAGCTTTTGGCATTGAGAGCCAAGAAATAATGTATGTGCAAAGCCAACTCCTTTTTATTGGGAGTTCTGCCAGACATGAATAGGAATTATGGTGGCAGAACGGAAGAAGGTTGACAGACTAGTATTATCTCAAACTAGCACACCCGTAGGTGTCCCTCTCCCGTCCTACCGCTACGGGAAAGGGACGAGTAGTACACCACACAAGACTGTTCCTCAGAAGGAAAAGCTCTGATGCGGGAGATAATATTCAGCCTGTCAAAGCCGGCTGGCAATGTGGCCAGCAGGCAAAGAATAGTGCTCTGCTCTTTAGCAGTCAAGCACACAAAATGACATTTGTTTTAAATTAGATCATTAAAAAGTAAGGATAAATAATGAGATAGTTATTGTTAAATGGGGGTTTATAAAGGGGAAATAAAAACAGCCTAGAAGACAACGATCAAAAAAATGAGGCTTATTATAAGCCCCGTTCCGTTAATTATCTTGTATCTTCACTAGGGTTCGATGTTGATGAAGTACTATTCTTTGATTCTATTTCAGTTGCTTTTGGAGTGATTGCTATTACAGCTTTACTAGCAGCATCTTTAATTTGTTCGGCCACTTGACCCGCAAAACCTGGTACTGTTTGGGTCGCCTGTTTTACTATATCTGATGAAAACAATTCATGCCACGGACTTCCATGATTTTCATTCTCTACAAATCTCAGTGGCTGCTCATCTAGGCGTGATAAAGCCGATGCCAAAAGTCTTGTTTCCATATTCTTATCAACTCTAGCAGCCTCTCTACTATAGCCTTCATATGCACGAGATACAGAAGCTTTAAATGCATAATCTTCAGATAATCTAAAACGCTGGCCTATTTGTTTTGTTGCTAACCAAGAAAACCAAATTGGAGCGCCTAAAGATAAAAGAGAAAGAAGTAAATTAATAAAAATAATGATCGTTGAGCTATGAGTATCTTTTAAAAGCTCAGATAGGCTTGATAAACGTTGAGAACCAAAATAACCTCCAACTCCTAAAGCAAAAATTAGGCCTGCAACCCACCATTTTATAGACCTAGACAAGATATTCGAACGTTCAGTAAATGCCGCTGCCAATCCAACACTTGTTGCTGCTGAATATGCAGTTTCACAGCGTTCTAACACATTTTTAGCATTCAAAGCATAATTATTAATTTCTTCATCAAGTAATTTCGAATTTTGTTTTAATAGCTCAATTTCTTTATTTTCTTTCATTGAATCTGAAAACAAATTATTAATTTGTTGTCTCGCTTCCTTTAAAGAGGCTAAATCAGTCGGTAATTGTTCAGCTGCCTCCGATGCTTGTTCAATTCGTTCAACCATTGAAGTCAAAGAGTCTGTGCGAGGTTCTACTTCATGTATTTTTGCTTCAATAGAGCGAATACGAGCATTTAATTTTTTTAATTTTGTTAAAGCTTCAACTTGGTCAGCAGCAAGTTTATTTGCATATGGAACTAAAATAGACTCAAGTAAATTTTTAAGGCTATCAATAGTGAATAAATATGCTGGAATTGCTAAATTGACGTTAGATGACCACATTTGGGGAATGGTATGACTAACAAGATATTCTAAGCGTTTAATATAATTAGATATAAGAGCATCTTGATTCTGATCCAGTTCATCAGTTTCATAATTTTTAATAAAATCAATAATATGTTTTGATTTAGAAATGAGTTCTGTTTTGTTTAAGTTAGGACTACCCCAACTATTATGTGTTTGTGCTAAAGGCTCATTATTTTGAATAACGTTATTAAGTTGATTATTAACTGCTTCTAATTGCTGTAATATGTTTTCTAATGTAGCGTGCATGACTAACCCTTTCATTGGCTTTAAATTATTTTATTTAAACAATTATAATACATATGTAGGCTAGAAAGTGAACAAAAGAAAATAATGTGACATGTCATTCAATAGTTTCTATCAAATATAATAGACAATATAGATAAAACGGGATTTTCATATTAAGGTCAAATAACCTTTTAGTGTTGTCTTTAACTTTATATTTATTAGACAGCAAAATTTATTTTTTATACAGCAACAAACTCATTTTTTTAGTACTTATGTGATTTATATATTTTTATAGACAACACTAATATTTAGTACATTCTTTATTAATGTCCAAAATTAACTATAGCTTAGCTACAAAAAAGCCCCTGTTCTACAGAGGCTTTTTTCAATTCAACAACGTATTATTTATGAACCCAAGGCACAGCCGGTGAACCAATCCACGCAGATTTAGCCGGAATATTTTCACCTTTCATGACCAGTGTTAATGGCCCTAACACGGCATGATTGCTCACGTTTGCGTTATACAAAATAATACTGCGCGCATTGACCACTACATCATGCCCCACGTTCACTTGCCCGATTTTCATAATACGGTCTTCAAACAAGTGAGTTTGTGGGCCTGAAAAACTATTAAACTCGGCACGGTCACCAATGCTGACACAGTCAAACTCGGTAATATCAGCCGTATCCATATAAACGTCTTTACCAATACGAACCCCTAAAATACGCAAGAAGAATGGCAACATTGGCGTTCCTCTTAAGTAGTTTAAGAAGTTAGGAATCGCAACTGATTCATATAGGCTGGTAATCCCTTCACTTAACCATACGAACATGGTCCACATAGGTGCCGAACACGGTTGATAGCGACCAATCAAAATCCATTTTAATAGCGCAACAATAAGGAAACAGCCAACACCATAAAGCAGCCCTGCGAGCGTTAATGCAACCAGTCCTGTCTCAATATTGTAGTTATTAATCACATCAATAACGTCTAGCACGATCATATAACCCACCCCAATCGCAAGTGCGGCCGGCAATACAATACGAAGCCCTTCAATAAAACCGCGCATTAAACGGCGCTTAATACTTGGTTTAAAGGTTAAATGGTCAGGATATTTTTCAGCAGCTTCACGTGCAGGTAACAGCAACGCCGGAGAACCAAACCAAGTTTGTCCATCATACATTTCACGGTTATCTGGTGTTTTTGATTGCACGCCAATTAAAACATTGTCTGGTAACACTGCCCCATCAGCAATATAAGCACTATTTCCCACAAAACTACGGTTACCGATTTTGGTTGCTTTTAAAGACATCCAACCGCCTTTAATCTCTTCATCACCCAGCATTACCGCATCGGCAATAAAGCTTTCTTCACCCAAAGTCAGCATTTCTGGAATGACACCAGTTGCGGTAGAAATTTCGGTATTTTTACCTACTTTTGCTCCCAGCATACGAAACCATGTAGGCGCATAAATAGTGGCAAACAAGCCATGTAAGGTTTGTAAACTCGTCTCTAAAATTTGGGCCGCAAACCATTTACGATAATACGTGCTGCCATGAACTGCATAGGTTCCGGTTTCAAGACGCGGCAATGCAATTTTACGCAGACCTGAAGAAATCACCGCAGTAATCATCATCATCATTGCACTAGCCGGAATCGCTAAAATGAAATAGTAAAGCGCGATTTGCAAATGGTTGTTCGGGTTAATATTAAATACATTTACATCTAACCAGTCGACCAAAAGGAAGCTTGGAAAAATTGGAATAAAGAAAAGACAGGCAATAATTAATGCGCTAACGCCATAGTATCCATATTCAGCAATTTTACGTATAAACGATAGTTTAGGACGCTCTGGCAATTTAGCTTGCTCATCTATATGCCCTATTTTCTGAGCAGGTGTTCCATCCCAAATTTCGCCCTCAGGTACAACCGTGTCATATTCAATTGAAGTTAAGGCATTTACATGAGCTTGTTTTTCTAACACTGTATTTTCTTCTAATACAGCATACGAACCTACATAACTGTCTTGTTTAAGGTGAATTGAACCTAATACCAAGTGACCATGTTCAACTTTGGCATTTTCTAAATTCACCTGAGAACCGATACTCACGCCATCTTCAATAGTCAGTAAAGATGGCATACGAATATGTACAGAACTAATGGTAACGTCGTGACCGATTTTTGCACCAAGTGCTTTCAAATATAAGTTAAGTAAAGTCGAACCCGACAAGAGATAGACAGGTGAAATATTACTAATACGGTCCGCTAGCCACCAACGGAAATAGGTTAATCCCCATAATGGATAACGCCCTGCGCCAATTCCTAACATTAATAGTCGTTTTACCGTAATCGACAGTACAAAACTGCTCATAATGACACTGACATACACCAATAAAGACAACGCAATCGCATACGGAATACTGTCGCGAGTTCCACCGGTAAAGTAGTGATAGGTAAAGAATGGCGCTAACCACTGCAAAATATTAATCGAGATTAAAACCGGAATAGTTACAAGCTGTGCAATACCACATAACCATTTATAGGTCTGATTACAAGGGTTATCTTGTCCAATTTGGCTATCAAATAAAGTAGGTTCAGGCTGCTCTAACATGAGTGCAGCAATCGCGCCTACTCTTCTCGCTTGATATAAGTTTTGAATGGTCAGATGGCTATATTCCGCATGCTCACGCAGATTTGAAATTAAAACAGCGGCAAGTAGTGAATGCCCGCCTAAATCATCAAAGAAATCAGAATCGAGTTTAATCGGCATGTTTGGGAACAAACGATTCAAAATTTCAAATAGGATTTCTTCGGCTGGGTTCTGTGGCTGATCAGATTCACTGCGATCTACCACACTTGTGAGCGGTCTTGCTTTTAAAGCTTTACGGTCAATTTTCCCAGATAATAGACGTGGAACTTCTTCAATAATCTCAAAACGATTTGGCACCATATAAGGCGGTAATCGCTGACTCAAGTTATGGCGTAGTTCTTTAATTTCTATAGCTTGCTTTGCATCAATTTCTGGTGCAATAAACGCAATAAGCTGATCAATGCCGTCTTCAGGACGAAGAATAACGGCTGCCGTACCAATTCCGTCAATATCGCAAAGCGCAGCTTCGATCTCGCCAAGCTCTACACGGAAACCACGGATTTTAACTTGGTCATCGGCACGACCTAAACAGTGTACTTGACCAAACTCATCAATTTTAGCCAAATCCCCAGTACGGTAGAGTAATTCTTCCTCTATACTCATTGCCCAAGGGTTCTCAATAAACTTATCTGCGGTTAAATCAGGTCGTCCTAAATAGCCTTGTGCCACACTTGGACCAAAAATACATAACTCCCCTGTTTCACCTTGCTCGAGCAACTCTCTTTCAGAGTTAATGACCAACATACCGTAGTTTGGTAAGGGCTTGCCAATGGTGACAGGTTTGCCGCGTTCCAATAACTCAAGACTTGCAGAAACCGTGGTTTCGGTTGGTCCATAGGTGTTAAACATTTGATGATGAGGTAATGCCCAGCGGTCAACCAGTGAGTCCGGACACATTTCCCCACCTAAATTAATAATTCTTAAATTAGGTACGTCTTCAGGAAATAAGGCAAGTAACGTTGGAACAGCATGCAGTACTGTAATTTGTTCCTGCTTTAAGGTTTGGCATAACCGCTCAGGGTCACTGACAAGTGACTTCGGTGCAATCCACAACGTTGCACCTACCAAATAAGAAAGCCAAATCTCTTCAAACGACATATCAAAAGCGACTGAAAAGCCTTGATAGACCTTGTCTTGCTCTTGAATACCCAAGATACTATTTTCACTACGTAAAAAGTGACAAATATTTTTTTGAGTAATCACAATTCCTTTGGGTTTACCTGTCGAACCCGAGGTATAAATAATATAGGCAGGTTGATCTGGAGTGGTTTTCGCTAAACTTACACTTTCGCTTAATGGTTTTTGCAGCTCGGTGTTGGTCCATTTTGTTTGCGGCACTTCAGCTAAATGTTCATACCATTCATCTGTAGTAATCATTCCCACCGCTTCGGCATCTTCAAGACATACCGCAATACGGTCCGCGGGCGTATCCATATCAAAAGGAAGCCAAGCTGCACCGCTTAAACAAATGGCTAGTTGAGCTTTTAAAAGCTCAATACCACGCGGCAGCCATAGCCCTACAATATGACCTGGCTTTACCCCTTTTAAAGCCAGATGTTGAGCCATAATTAAGGCTTGCTGATACAGCTCACCATAACTCAGCGTTTTATCTGCTTCAATTAAAGCCGTTTTATCAGGAAGAGTTTGTGCAGTATGAGCAAAAATATCGGCTAAGACTTCATTCTGCAAAAATTCCGGATGATACTTTCCACGAATTACATTTTTCGTGTTCGTTACAAACTGGTTCATTTTTATACTCTCTTAATTTTCTGATTTGGCTTAAATGAAGTCATAATTTATTTTTCTGACCCATTAACGAAATCTAGATAGGCCATAAATTAATTAAGAGAACTTAAAGCAAGATTAAATTTTTTAGTTTTGATGTTTATTCAACAGAGTAAGAACCTTAGCCTCTAATTTTAAATTATAAATAAAAATCAAATATTTAATATTATTATTCAAATTTTATGATTATCTTTAAGATAAAAACGTACAAAACTAATGAAGTAAAAAGCTAGAATTCACAAGATTTATATAGTTCTTGTTTTAATTAAACTAATAAAAGCCATCCTGTATACAAATTCTCCATATTTTAAACTTAATTGTTTAGTTCTGAATTATTAAAAGAGTTCTTAAAAATAAAAGAGTTCATTTAAATTTATGCATGATGTTTGATTATTAATATTATATTAAAATTGTATGCATCAATTCTTTTTAACTTATATCTTTATCTGGATTTTATAAAAAGTTAACTCCATTAAGCGGATTGAGCGAGCAGTTATTTACACGTTCTACTCATAACTCAACCTACCCTATTTGGAGTGGTGAGTTGTGTGGGTTAAAACGAAGATACTCTTGAAAGGCGAGCTAAGATCACCTGTTTCATTTAAGCATGACGAAGTATCTTTATTTATAATATTTGAAAATGGACTTAAAAAATTAAACAGTGGTGCTGGTATGAGTCAACACATAATAACCCCAAAAGAAATACCTTTAGCATTTCAAACCGCTTGGAATAAGCATGATATGCAGGCCTTTGCTGCTCTTTTTGATAAAGATGCGACTTTCGTTAACCGTTTCGGTCACTACGTTAAAGGAGTTGATGAAATTATTGCAATGCATCAACCTATTCATGGAACCATTTATCGTGATTCAACACTAGAGAATGAGCTGATAGACCTCATACCTATGAGTGAGGATATATGTATTTCACATTTTTGGAGCCGCCTCACTGCAGGCGTAGCTCATCCTCAAGGGCCTCATCAGATTGATACTCTCATTCTTACTGTGCTCACCAAGAAGAATCATTCTTGGTACATTCAGGCGCTAGAAAATGTCACTTTGACGAACCCTCGTACGGGAGAGACCATACTTAGAAATATATAGAAAGGAGCACGATCATTTTTTTAGTAATTTATATGCATAAAAAAAAAGCCCTCGTCATTAACAAGGGCTTTTTAAGAATTTTGGCGGAAGCGGTGAGATTCGAACTCACGGAGGACTCACACCCTCGTCGGTTTTCAAGACCGGTGCATTAAACCGCTCTGCCACGCTTCCAACGAGCGCTATAATATAAACAAAAATGCAAGTTGGCAAATCTTTTATTTAATTTTTAATTCAAACGCTCAAAATAAAAACAAATCTATCTAAATTTCATCATTTTTAGGTTTTTCATAGCGAATTGCATTGATATACCACAGCTTTTTACCCAAAGGCGTGTGTACTTCTACTTCATCATCGACTTCTTTACTCAAGAGTGCACGTGCCATAGGTGACTCAATTGAAATATGCTGAGGATGGTGATCATAAATTTCATCTACCCCGACAATTCGTAAAGTTTTTTGCTCCCCTTCTTCATTTTCAATGTCGACCCAAGCACCAAAGTAAACTTTGCCTTCTTGCTCAGGTGAGTAATCAACAATTTTTAACTCTTCTAAACGCTTTCCTAAATAGCGTACTCGTCGATCAATTTTTCTAAGGAGCTGCTTGTTATATTGATAATCTGCATTCTCACTACGGTCACCGAGGCTTGCAGCCCAATTTACTTTTTTCGTAATTTCTGGTCTTTCTTCATGCCAAAGTTGTTTTAATTCAGCAACTAATTTGTCATGGCCTGACCGAGTGATTAAGTTAGATTTCATGATTAAAAACCACTGATTAACAACATAAGGGTTACAAGATGTAGCATTTTATACCCTATTTTCTCACATCAGAAGTGTATAAATTTTCTTCCTTAATAAATTCATGTTGTTATGATGATATATCGGATATTTAATTGTACAAAATTTGACAATACTTTTTCTCAATATTATGATGCGCGCATCTTAAATGTTGAGTATTTTTTAACCCACTCAAAATCTATGTAGATTTTCTACACAGTTTGCTTTTTTCATTTCATGTCATTTTGAATGACGTCATGACTGCCCACCCCTTATTGAATTTATCAACTTTTAGAAGTTATGGCAGCTTTTTGTCTGACTTTTAGGGCATAAATTACCTGTAGCGGAGACAACATGCACAGTAATTCAAGTTCTGGGTCGAGGCTTGGCCTTAGCTCTTTATTTTCTTCTCTCCCATTGAAAGTTCTTGCGCTCAGCACTATGTTTTTTCCCTTTCATAGTATCAATGCAGGAAAAACCCTACAGTACAATACTGTAGTTAACACAAACACATTAACAGTAGTTGCCGTAGAAAGCCCAACGACTGTTTTTAAAGAAGACCAGTTTTTACACGGCTTCGGTTATGACTTAGCGCGTAACTATGCACAAAGCTTAAATGTAAAGTTGGACTTCAAAATCGTGACAGACAATGCCACGGCACTTAAGTGGGTTCAGCAAGGTAAAGCAAATCTTGCCATGACAACCGCAAGCTTAAGTTCAATCGAGAACAAAGGTTTAATGTCTTTTTCTGCAAGTTGTGGTGATATCGTAAATTTGCAAAAAAATGGATTAAATCCGAATCTAAGCTGGGTGTTTAAACAAGCAGATGACCCGCTTACACAAACAGCAAGCGGTTTTGTTTGTCAAAGCAAACAAAATGGTCTAACTCAGCAACTTGCTTCTTTTTATAACCGTAATGTTGTAAAACCAGAAGCTTGGTCTACGATTCAACGTGACTTAAGTGCACGTATACCGATTTACAAAGCAAGCTTTAAACAAAGTGCTGCTCAGTACGATTTAGACTGGCATTTGCTTGCAGCGATTGGTTATCAAGAATCGTACCTAAAGCCAGAGTCTGTTTCACCAACAGGTGTACGTGGGTTAATGATGTTAACCAATAGTACAGCTCGGGCAATGGGTGTGAGTAACCGTAATGATCCAGCACAGAGTATTCAGGGCGGTGCGAAATATTATGATCTCATGTTAAGTGAGTATGATGATATTCCTTTCCCAGATCGCAACTGGTATGCGCTTGTGGCTTACAATATGGGTCCAGGTGCAGTGAACCAGATTCAAAAGCGCTTGCAAGCCCAAGGGAAAGACCCGAACCAATGGGTTAATCTCTATAATTATTTGCAGAGTAACAAAACTCGCAATGGTCGATACAAACAAGCAGTTCAGTATGTGACTCGTATACGTGCTTATCTTGAGCACATTAAAACGGCACAAACGCGAATTAATATCTAGTTTTTATCATCTAAAAGTTGATAAATTCATTGTTTGTATAAATAAAAAGCTTACCTCTCGGGTAAGCTTTTTATATGAATTCTTTTTCAGTTTAAGCGGTTTGCTCTAATACTTTTTTGAACAAATCTTTTTGTTCTTGGCTTGGTTTAGCTGTTTGTAAAGCCATTAACTGAGACATATCGCCTTGAACTTTTATTTTACCAGTCATGAATGCCTGCATAGCAGCTGCCATATCAAACTCTAGGAATACTTTACGTAAAGTTTCAGCATCCATGTTTAATGTAGTTTTAGCATTTGAAGACAAACCTTTTTGGATTTTACCCCCATCTAACGCTAATTCGGTATTTCCAGAAGCATCTGTAACTACTAAATTAATTGCCAAATTCGCAAGAGCTGGTGGCAAATTAAGATCACCTGCTTCAGCAGTTAATTTTTCGACAGTCGCAAACCAATCATCAGTTAAAAATGCAGGCATGATTCTTCCTCAAATTTATTTGTTCAATTTCTCTAGTGTTGCCTATCCAGACAACATCGTGTGAAGCACTTCTGCTTGAGCGTTGTTATAGCATGTAATTTTAAGGTTGAGCTATGACTTTTTGTACGGAACATTCATTTTTTAGTATTTTGATACATTAAAGGCACTTGTTTATAAAGTGCCTTCAATTGACTTATTAATTCGTTTTATTCAGCAGCAAAGCTTAAGTTCACTACATCTAAACGTTTTTTCAGTTCTTCCGGATCTTTAAAGTCTAATTCGCGCTGCGAGTCAAGAACTTCGAAATCGAACAAATCACGGTCAGCAAGTTGTGAAGGTGAAACGTTCTGTAACGCACCAAAAATGCTGTGTAAACGTTTTGGATACTGTTTGTCCCATTCACGAAGCATTTCATTGATCATGGCACGTTGCAAGTTCTCTTGTGAACCACAAAGGTTACACGGAATAATCGGGAACTTACGTAACTCTGCATATTTGATAATGTCCTTTTCTTCAACATAAGCCAAAGGACGAATCAAAATATTTTTCTTATCTGACGATAAAAGCTTAGGTGGCATTGCCTTTAAGCTACCGCCATGGAATAAGTTCAAGAAGAAAGTAGCAATGATGTCATCACGGTGATGCCCTAGCGCAACTTTGGTTGCACCAATTTCTTGAGCAAAGCCATAAAGCGAACCACGGCGTAAACGTGAACACACAGCGCAGTACGTTTTACCTTCAGGAGTTAAACGTTTGGTAATGCTGTAAGTGTCTTTTTCTAAAATGTAATACGGAATGTTATTCTCTTCCATATAACGTGGTAAGACATCTTCAGGGAAACCAGGCTGCTTTTGGTCAAGGTTAACCGCAACAATATCAAAATTGATCGGTGCAATACGTTTGAACTGCAACAAAATGTCGAGCAAGGTATAACTGTCTTTACCACCAGACACGCACACCATGACCTTATCACCATCTTCAATCATATTGAAGTCACGGATGGCATGTCCAACTTGACGGCGAAGCTTCTTAAGCAAACGATAATAGGCCGAGCTTGTTGGAAGTTCAGGCTTAAAATTAAATCCTTCGTTGGACTCAACTGGCGCGTACATAGACGAGATATACCCATAAATAAAAATACCGCGCAATTTTATATGATTCGGATGAGAATCGCATCAAAAGAATAGTTTCAATTCTTGAAGTCGTCATAATGCTCAGGCATGATGCAAAAGCAGTGCGCTATTTTGACCGATGAGTTCATTTTTTGCGCAAAATGCTTACTTTTTGGACTTTTCCACAGTTTTCCACAAAACCTGTGGATAAAATTGTGGATTTCTTAATACTTGACAAACTGTTTTGACCTTCTGTTAAGGGTTTTATTTAAATTGATCATTTTTTAACCAATGATTTTTACCCTTATTTTTCAAATAGTTATTCAAGTCAATAGAATCATTTTAAAAATTTTTTTTAATTTTTTTTGATGATCATTCGAGCAAAACTGCTTGATTTTTTCAAAGAGTCAAAATAAAAAAAATTACACAAAACTTCTTTAAGTAATATTTTTTGATAAACTCTTTGACAAGTTTTGAGACCCTAAAATTGTGAATAACATAAAATGAAAAATTCGGTTCAATATCTTTTTTCTTGTTTACTGGGGATGACCACTTTTTCGGTAGGTATAACGCCGACTTCAGCAGGTCAGATGTACATTTACCAAGACAAAAACGGCAGTACTTTACTGACTAACCGTAAAAGTTATGACCATTCACTCAAAAAATTAAAAGTCACCTACTACCCGGATAGTAATATTCATAGTTATAGCAACTGGGGGACTTCAGAAGCTTCGGTTCTACCAAGCTATAGCAAAAACAAAAATGCTTTTGATCATATTATTAAGCAGGCAGCACAACAGCATGGTGTTTCGGAAGGATTAATCAAAGCCGTTATGCATACCGAGTCTGGCTTTAACGTAAATGCCCGCTCTCCGGTTGGTGCCCAAGGTTTAATGCAGCTTATGCCAGCTACTGCTCGTCGTTTTAACGTATCTAACGCTTATGATCCTCAGCAAAACATCTTCGCTGGTGCTAAATATTTAAGTTGGTTGCTCAAACGTTTTAACGGTAATACGCAAATGGCGCTTGCCGCTTATAATGCAGGAGAAGGAAACGTCGATAAATATGGCGGTATTCCCCCATTCCGTGAAACCCAAGATTATGTTCGCCGAGTCACAAGCCGCTACCAAAACTTATATTCTTCTGGTGTAGGCCTTTCTTCTTTTAGCAATTCGAGTATTTCTGCCCAAGCGATAAACCAGCCAGCCATACCGCACAGTACTTCGACGCAAGTCTCTGCTCAGCCTATAAAGTATTCTTCCTCGCGCCAAATCGTGACATTGCCAGATGGTACATATACAGACGCACCTACGGGAACTTATGTCACCAATAATGCAACTGCTATTGCACATATCCGGATTGAGTAAGCTTAAAAGGCCTCATACCTTACGGTTACTTATTTAGCGGGTTCTATTATCAAAAAATTGTAATGATCTATTACCCCCACTGTCGTGTTCAGAGGGGCTCTCTAATAACTTGTTTAATTTAAATACATTCTTCGATCGCAAAGAATGTATTGATTCCTTATTTTTTTCTTGAATTTTTCCCCTAAGCGCCGCATATTAGCTGAGTGATTTATAATTTATAAATCAGATAATATTTTATTTATAAGAGGATTTTCTCAATGATGCGGATTGGTTTGTTTTTGCTTACCAACCTCGCGGTACTGGTTGTAGCTGGCATTATTTTGTCACTCTTCGGTGTCGGTAGTTACCATGGCGCGGGTGGCTTAAATCTAGGCAACCTTTTAGTCATCTGTTTTGTGTTCGGTATGGTGGGCTCTTTAGTCTCTCTATTCATGTCCAAATGGATGGCTAAGAAAACTACTGGTACAGAACTGATTGACCCAAATGCTCCTCGTAACCAAGCTGAAAGCTGGTTATTGCAAACAGTCGCTGAACTTTCTCAACGTGCTGGTATTAATATGCCAGAAGTTGGTATCTTCCCTTCATATCAGTCTAATGCCTTTGCAACAGGCTGGAATAAAAATGATGCCTTAGTTGCCGTTTCAAGTGGTCTACTTGAGCGTATGAACAAAGATGAGCTACGTGCTGTGCTCGCGCACGAGATCGGCCACGTTGCAAATGGTGATATGGTCACATTGGCACTCATCCAAGGTGTTGTAAACGCCTTCGTTATGTTCTTTGCTCGTGTAGTAGGTGACTTTATTGACCGTAATGTCTTTGGTCGTCAAGACAATGAAGCCCCAGGTATGGGTTATTTCATCATTACAATGGTTCTAGATATCGTGTTTGGTATTCTTGCCTCTGCCATTGTGATGTGGTTCTCTCGTTACCGTGAATACCGTGCAGATGAAGCCGGTGCGCGTTTAGCTGGTAAACAAGCGATGATTTCTGCTTTATTACGTTTGCAAGCTGAAACAGAGTTACCTGACCAAATGCCAAAAGAAATGAAAGCGTTCGCAATTGCGGAAGGTAAAGAACAAGGCTTTAGTTTAGCTGCATTGTTCCAAACTCACCCTACAATTGAACAACGTGTGGCAGCTCTACACCAATTAGATTGTCCATAAAACGTTTAATGTAAAAATAAAGCCTCCAAATGGAGGCTTTATTTTTTTGATTACATAGTTTGATAAAGTCCCCAAAGGTAACTTGCACCCCACCACACAGCTAAGCCAATTAATAGCAGTACAAACGCACCTAATAAATCGGGTATGTGCAACCATAACCAACTCACAACAGGGTTACGCCAGAATGCCGACTGTTGTTGCTTCTTTTCGAGCTTTTCATGTAAATACGGATGAACGACATGAATATCACTTTGGATCGCATATTCTTCACGTATATGCGTATGCACCACATCTAATGCTTTTCGACTGGGACGTATAAAAATATCGAAAATAGTCCCTGCAACCGGGACAAAACCGACTACAGCATCGACTACAGCAAGTTTGATCACTGGAGTGAGCTTATGCTGAGGCACACCTATTTGTTTAGCCTTATAAATGGCATAACACGTCAGTGCGAACCCAGCGACATCACCGGCAATTGGAATAGTACTTAAGGCAGCATCAGCGCCAATACCCTGCTTTGTAAAAGGAATACGGACCGCACTATCCATCATGGTTGCGAACTTGGCAAGATCTCGTTCTAGGGCGATAACCTGCTGCTGTGTTAATTTTTTTTCTTGAGGCATCAAAATCAAATATAAGAAGAAAAGTTAATCTTAATATACTTGATTTGATGCCCGTTTTTAAATTGTTTACTTTTAGGTTGAAGCTTTCGATTTAAACAGTGAACGTAATAATACGTACATGACCGGAATAAAAAACAGAACCAGCACAGTACCAAACATCACGCCACCTAAAATACTAATACCAATCTCTTGACGACTAACCGCACCAGCACCTTGAGCAAACACAAGCGGAATTACGCCTGCCCCAAAGGCTAAAGAGGTCATTAGAATCGGTCTTAATCGTAAGCTCGCACCTTCTAAGGCCGCCTGAATGGCATTCTTACCTTTTTCTTGCGCTAAGGCCGCGAACTCAACAATTAAAATGGCGTTTTTACATGACAAACCAATAGTCGTTAATAGCGCAATTTGGAAATACACGTCATTTGGCAAACCAAAAATATAAGAGAAAATCACACTTCCTCCTACACCAAGCGGAATAGATGTCATTACGGCGGCAGGAATACTTAAGCTTTCATATAAAGCAGCTAAACATAAGAAAATAAATCCAGCCGAAATTAAATATAACCACACCGCCTGATTAGTCGACTTCTGTTCTTCAAAAGATAAACCTGTCCACGCTAAACCAATATCTTTTTGTTGGTTAACAAGTTGTTCTACATCTTTCATCGCTTGGCCAGAACTGCTGCCACTCGCAACATCTGCTTGTAGTTGTAAAGCACTATATCCCATATAGCGTTTTACAATTTCCGGTGCCCCGCCCCAGCTAAAGTTGGCAAATGAACTAAACGGAACCATTTCATTTTGGTCATTACGTACAGACCAGTTATATAAATCTTCCGGTTTAGATCTAAACTCGGCATCACCTTGAATCATGACACGTTTAATACGGCCCCGATCAATAAAGTCATTTACATAAGTTCCGCCCCATGCGCTAGATAAAGTATTATTAATTGCCGATAGCTGTAGTCCATTTGCTAGTGCCTGTTTCTGGTCAATCTTAATATTAAGATTTGCCTTACTATTGGTTGACTGCTTATCAAAGTTTTCGAAAGTTGAATAATTTTTACTTTGAGCCTGCAATTGGCGGAAGGCACTATCTAGAAAATCTTGCCCTTGCCCATTCAAATCCTGAATCCATAAATCCAGACCATCTGTTTGACCTAAGCCATTAACTGAGGCAGGTAAAGTCACATTAATCTGTGCATTATTAAAATGACTAAAGTATTTCATTGCACGCTTTTGTATAGCCTCAGCCGAGTTTTCTTTTCCTGTTCGGACATCCCACGGTTTTAGAGCAATAAACCCTTGTGCCAAGTTTTGTCCTGTGCCCGAATAATTTCGTCCATAGCGGATTAAAACCAAATCTACGTTTTTATCTTCTTGAGTTAAGAAATATTGGCGGACTTGCTCACCAATTTTCTGACTTTGAGAAATTGGTGCGCTGTCTACGAGCTTAATTTGAACACTTAAAATCCCTTGGTCTTCTTTAGGAATAAAACCGCTTTTTAAACCGTTATAGAACAGCGTAAAAACGGCAATTAAAGCCACAAAAATCACAATAACTGATTTACTGTAATGAATACTCGTCTGAACAAGCTTGATATATTGATTTTTAAGTTGCTCAATCTTTTGGTTAAACCATACGGCCCAGCGCTGAGGTTGAGGGTTTGGTTTTAAAATTAATGCACATAAAGCCGGTGTTAAAATGAGCGCAACAATAAGGGACAACGCCATAGCGGCAACTAAAGTAATAGAAAACTGACGGTAAATCACCCCAATTGAACCGCCTAAAAAGGACATTGGAATAAAAACAGCAGTTAAAACCAACGTAATTCCGACTAAGGCACCACTAATTTCTCCCATCGACTCAATAGCAGCTTCTTTAGGAGATAAGTGCTGTTCATGCATGAGCCGCTCAACGTTTTCTACGACCACGATTGCATCATCGACCAACAAACCAATCGCGAGTACCAGCGCAAATAACGTTAAGGTGTTAATACTAAAACCAAGCACATATAAGACCGCGAAAGTTCCTAAAATTACAACTGGAACGGTAATACTCGGAATGAGCGTAGCACGCCAGCTTTGTAAGAACAGGAACATGACCAAAATAACCAGAATAATCGCCTCTACCAGAGTCTTTACTACTTCCTTAATTGACTCTTGAACAAAGGGCGTATTATCTCGTGGATAAACGATTTTATAACCCGCTGGTAGTTTCGTTGTAAGCTGATCTAGAGTTTGGTGGATGAGCTTAGAGGTCTGAATTGCATTTGCACCTGAAGATAAAGAAATACCCAAACCTGCGGCAGGATAGCCATTAATAGTGTTAAAAGACTGATAGTTTTCTGCACCTAGCTCAACTCTGGCAATATCTTTTAAATACACATAACTCGCCGTTTTATTCGACTTCACGACAATATTTTTAAAATCCTCAACTGTTTTTAAGCGAGAACCTGCTGTGACTTTTGTATTTAAATATTGACCGTCAATTACGGGTAAGTCACCAATTGCCCCTGCGGCGACCTGAGTATTTTGTGCGGTGATTGCATTCGCTACATCACTTGGCATTAAATTATATTGTTTTAATTTATCCGGATTTAACCAGATACGCATGGCATATTGTGAACCAAATACATCAGTTTCACCCACCCCTTCAATACGGTTCAGGTTATCTACCACATGCGTAGTTAAATAGTCCGATAGCTCAATGTTTCCTGTTTTACCAGTCGAGTCATATAAGCCAATTACCATAAAAGTGTCACCCAGTGACTTACTTACCGTAACACCTTGGCGTTGAACTTCATCCGGTAAGCGACGTATTACGCCACTAATACTATTTTGTACTTGGACCTGAGCAGTATCCGGATTTGTTCCGTTATCAAAACTTATAGTAATTCGACTACGTCCAGATGAGTCACTCGATGAACTAAAATAGAGTAAGTGGTCAATCCCTTGTATTTGCTGTTCTAAAATTTGAGTAACACTTTGCTCAACCGTTTGTGCATCTGCACCACTATAGTTGGCCGACACAGTAATTTTAGGTGGAGCAATGTCTGGATACCGTTCTACAGGCAAATTCATAACCGAAAAAATACCGAAAGCCATAACAATGATCGCCAATACATTGGCAAAAATGGGGCGTTGAATAAAGAATTTAGATAGCATGGCTGCTCGGTATGATGTCGTTAATTAACAGAAGTAAATGCCGCTTCGAGACCTTGTGATGATCAACGAGCGGTATAAAAAAGACAAGTAGAATTAAACAATAAATGTAATTAAGACAAAAACAAAGCGTGATTGAATATTTCCAACAAACCAATAAAAAACCAGTTAACGAGTGTCACTTAAAATTCATGAATTCTTCATGATCATGTCAAACAAAGCACCGAAACTTGAGTCGTTTCAACTAAAACAATATGAGTAGAATTATGTTTAGAAAAGCACTTTTATGCTTAAGTTTAATTAGCTTGGTTGGTTGTAATGATGACGATAAAACTGAAACGACGCCAACCACGCCAGAATATCAACTTCCTAAAATTCTAGTAGTAGGACACCGCGGCGCTAGCGCTTTACGTCCTGAACATACTTTAGCTTCATATCAAAAAGCGATTGATGACGGCGCAGATTTCATTGAACCGGATCTAGTCTCTACAAAAGATGGCGTATTGGTTGCCCGCCATGAAAATGAGATTGGTGGAACAACCAATGTAAGCACTTTAAGTCAGTTTGCAGACCGTAAAAAAACAAAAAATATTGATGGCGTCGACTTAACCGGTTGGTTCACGGAAGACTTCACTTTAAGTGAATTACAGCAGCTTAAAGCGCGTGAACGTATTCCTGAGTTTCGACCAGCCAACACAGCTTATAATGACCTTTACCCTGTCCCAACTCTAGAACAAATCATTGAGCTTGCCGAAGCTAACTATAAAAAGACGGGGAAAATTATAGGTTTATATATTGAAACGAAACATCCGACTTATTTTAAAAATCAAAATCTGGCAATGGAAGATACTCTTTTAAAAACCTTAGCCAAATATAAATATACACGTGATATTGCACCTGTCTATTTACAGTCTTTTGAAGTGAGTAATTTAAAATATTTAAAAAATGAGCTTGATCTTCATAAGACGCTTAAACATGCACAAATTATTCAGCTATACGATTCAAAAACATCTCGACCAGCAGACTTCGTAGAGTCTGGTGACACTAAAACTTATGCTGATTTAGCCACAGCTCAAGGGTTAAAAGATGTTGCCAAATATGCAAATGGTGTAGGACCAAGTAAAGGTTACATACTGACCTTTAATAACGATGGCTCTTATAAAACTAGTACGTTTATTTCTGATGCACATACGGCTGGCTTAAAAGTACATCCTTATACTTTCCGACCAGAAAATAACTTCTTACCAGCGCCGTTAAAGTGCAGCCCAGATAAACCTGCTGAACGTTGTCCATCTGGTGCGTTAAAAGAGTTTGAAGCCTATTTCAAGGCAGGTGTTGATGGCGTCTTTACAGATGACCCAGCACTCGGTCGTGAAGCTGTCACTAATTTTGAAAAAGCTGCAAAATAACTCAATCTTCATAAAAAAACAGGGCAATGAATGCCCTGTTTTTTTTATTTATCAAGACTGTTATTGAAATATAAAGGAATCCACTGATACTGCTCAGCATTCACTTTATACACATGACCAATTCCCGGGAACGGTAAATGCGGGGCAGCTACCCATTGTTGCTTATTTGAAATTTCAGCAAACATTTTTAAACGAGTATTAATTGCCTGCTCTGAATTTACATCAAAATCTACCCCAGTTTTTGGAGCATCAAACTGTAAAGAATGTGAGTGGACAATATCACCAACAAAGACAATTTGTTGCCCTTTACTCTTCAAACGGAAGCTATGATGTCCCGGTGTGTGCCCTTGTGTATTGATTACTTCAAAACCTTGAATAACATCATCATCTTTAAAGGTTTTGAATGCTTTTTTTGCTTGATAAGGTGCAAGTGCAGCTTTTACGTTTTTAACAGTACCTAAATAATTTTCTTTTTTATCCGCTGGTACAGTTTTTTCATTTGCTGGATTTAACCAATAATCTGCTTCACGTTCATGAGCATAGATCGTTGCATTAGCAAATACGGCTTTTCCATTTTGAGCAATACCGCACACATGGTCTGGATGTAAGTGGGTCAATAAAACAGTTTTCACATTGGCAAGTTGATAGCCCGCTAATTCAAGATTTTTAGCAATTGAACCTAACTGTGGACCAAAACAACTCGCAGCTCCACTGTCAACAAGCGTCAAACTTTTACCATCATCAACTAAAAAGGCATTTACTGAAGTCTGAATCCCCTTCTCATTTACAGCAGCATATTTCGTTAAAATTTTAGTTTTTTCAGCCGGACTTAAATTTTTAAACAATTTAGGATCTAGATAAATAGTGCCATCGAGCAAGGAAGTTATCCGATAATTACCAAATTGATGATGGTAATATCCGGGTACTTGTTGAGCTGAAGCTGGTTCTGCATAGCTTATATGCAAGCTACCCATGATAAGTCCTAAGGCTACAAATAGTTTTTTCATCTCTTCTTCTCTATGGAGTTATAAAGTCAAAATCTTTAAAAAAGATTTAATATTTTTTTAAGGGCTGCACTATATAATGCATGAAAAGTGTCAAAAATATAAGTGAATGGATAGTTATTCTCCCTATAAAGGTAAAAGTGGCCTAAAGCGCATCTTGAACGCTACCAGTTATTCAATTTCAGGATTTAAAGCTGCTTATCAAAATGAAGCTGCATTTCGGCAAATTGTTTTAATTAATCTTGTACTCATCCCTGTAAGCTTTTTCCTGGATGTAACTCGCGGCGAACATGCCCTGATGATCATTGTCTGTTTATTTGCCATCATTGTTGAGCTCTTCAACTCTGCCATTGAAGCAGTGGTTGACCGAGTTTCACTCGAGAAACACCAACTTTCCAAAAATGCAAAAGATATGGGTAGTGCCGCTCAGTTTGTTGCACTTTCTATTATTGTCGCCACTTGGCTTATTATTTTATTTGGATAAAAAAAACCATGCTTATAAATAGATAAGCATGGTTGTAACAAAAGAGGAACTTCTAGCTGCTGTTCCTGTTTTAATTTCAGGGTTCTGAACTTAACTTATCCTTAAAATTACTGGAAAAATAAAAAAAATCCCTGCGAATGCAGGGATTTTTTTGGGTTAAAGCGTTGATGCGTTATTACATCATTCCGCCCATACCACCCATACCGCCCATATCTGGAGCAGCTGGTTTGTCTTCAGGAATGTCAGTAATCATACATTCTGTAGTTAACATTAAGCCAGCAACAGAAGCAGCGTGCTCAAGTGCAGAACGAGTTACTTTAGCTGGGTCAAGGATACCCATTTCTAACATATCACCATATTCGCCAGTTGCAGCGTTGTAACCGAAGTTACCTTCACCATTCTTAACAGCGTTGATAACTACAGATGGCTCATCACCAGCATTCGCAACGATTTGACGAAGTGGAGCTTCGATCGCACGGCGTAAAATGTTGATACCAGCTGTTTGATCTTCGTTAGCGCCTTTTAAGCCTTCAAGAGCATTTACAGCGCGAACAAGAGCAACACCACCACCAGCAACAACACCTTCTTCAACTGCTGCACGAGTTGCGTGAAGTGCGTCGTCTACGCGGTCTTTCTTCTCTTTCATTTCAACTTCAGTTGCTGCACCGATTTTAATTACAGCAACACCGCCTGCTAACTTAGCAACACGTTCTTGTAATTTTTCACGGTCATATTCTGAAGTAGACTCTTCGATTTGAGCACGGATTTGTTGAACACGCTCAGCGATAGCAGCAGCATCACCAGCACCGTCAACAATAACTGTGTTTTCTTTAGAAACAGTGATTTTGTGCGCTGTACCTAAATCTTGAAGAGTTGCTTGTTCTAAAGACATACCAACTTCTTCAGAAATAACTGTTGCGCCAGTCAAGATCGCGATATCTTGAAGCATTGCTTTACGACGGTCACCGAAACCAGGAGCTTTAACAGCACATACTTTGATGATACCGCGCATGTTGTTTACAACAAGAGTTGCAAGCGCTTCACCTTCAACATCTTCAGCGATGATAAGAAGTGGTTTACCAGTTTTAGCAACTGCTTCTAAAACAGAAATCAATTCACGAATGTTGCTGATTTTTTTATCAACAAGAAGAATGAACGGATTTTCAAGTTCAGCAGTTAAAGTATCTTGTTTGTTTGCAAAGTACGGAGAGATATAACCACGGTCAAACTGCATACCTTCTACAACGTCTAATGCGTCTTCGAAGCCAGAACCTTCTTCTACAGTGATTACGCCTTCTTTACCTACTTTTTCCATTGCTTGAGCAATAAGTTTACCAACAGTAGTATCAGAGTTAGCAGAGATTGAACCTACTTGTTCAATTGCTTTGAAATCATCAGCTGGTTTAGCAATAGAACGGATATTTTCAACTACAGTTTTTACTGCAATGTCGATACCGCGTTTTAAATCCATTGGGTTCATACCAGCAGTTACTGATTTGATACCTTCATTTAAAATTGCTTGAGCAAGTACAGTTGCAGTTGTTGTACCGTCACCTGCGATGTCGTTAGTTTTGCTTGAAACTTCACGAACAAGTTGAGCACCCATGTTTTCAAACTTGTCTTTTAATGAAATTTCTTTTGCAACAGTTACACCGTCTTTAGTAATGTGCGGCGCACCGAAAGAGCGGTCGATCACAACATTACGGCCTTTAGGACCTAAAGTCACTTTAACCGCATCTGCAAGTACGTTTACCCCTGCAATCATTTTTGAGCGAGCTGAATCACCAAATTTTACGTCTTTAGCTGACATATTAAACTCCGAATCTTTTTAAATACTGAATCTGATAATGAATTGAGTTATGGATCGATTAGCCTTCTAATACAGCTAAGATGTCTGACTCTTTCATGATTAAGAGTTCTTCACCATTTACTTTAACTGTTGTACCTGCATAAGTACCAAATAACACCTTGTCACCAACTTTAACATCCAAAGCACGTACGCCGTTATCAGTGATTTGACCATTGCCTACTGCAATTACTTCACCTTGAGATGGTTTTTCAGCAGCAGAACCTGGCAATAAAATACCGCCAGCAGTTTTGGTTTCTTCTTCTACGCGACGAATCACAACGCGATCATGTAATGGACGAATGTTGCTCATAATTAACTCCATCAGACTTAGTCTTTTTGATTAATCGTTATTGCTTTAATCCAGAGCAATAACAAAATATTTAGATGTCACTTTTGTGGGGATGAAAAAAATGGCTTCAAGGGGAAATAAGAAAAATTTTTCTCTTTTTTGAATATTTTTTAAACAATCAATTATTTAAGCTCATCTAAACCAAATCAATATGAAGTGAATTCAGCCAGCTGTTCATTAAACCAATAATGTCGAATTTTTCCTTCATTTGAAATTGTATTAAAGCTATTTGGATTATGATGATATAAGCGAGTAGAAGTTGCTGTACCTGCATGTATATCAAAAATAGGATGATCTATTTTTAAAGAATAAATCTGAGTTAAATCATATATAGCTGTTTTATGGAGATGCCCATGTAACATTCCAAATAGACCAGTTGTACTCCACTTTTCTAAAGCGATTTTGCCCAGAACCGGACAATCTTTTATTCCATGCTTATTATCAGGCGGCGTATAAAAGGGTTGATGAAAAACAACCAGCTTTATTTTATTTTTAGGTCCCCTGTCTAAACGCTCATAAGTAGCTTGAATTTGTTCAATCGAAATATGCCCACGCGTATGATAACGACGACGTATACTATTCACCCCAACAATATAAAAATGTTCAGTTTCTAAAGTCGGCTCTAATTCACCAAAAAAATATCTATAACGGGTAAATGGAGAAAAAAAACGGTTCCAGACATGATACAAAGGTATATCGTGGTTACCAGGCACCACAAGATAAGGAATATTGAGGCTATCTAAATACTGTCGACATTTAAAAAATTGTTCATATTTGGCGCGTTGAGTAATATCGCCGCTCACAACAATCACTTCAGGCTGTTGCTGAATACAAAAATCTCTTATTGCCTCTAAACACTCTTTTTTTTCTGTCCCAAAGTGTAAATCAGACAGATGTAGTAACATTCGGCACCATAATATTTAAAGCATTTTTTTCTACAGTGAAGTTTAAAGGAGGTTTCATCTCTATGATTTCTCCATCTAAGGCCACTGTGAGTTTGGTTTTCTTTGCACATTCAACAATGACATGATCAGCACAAAAACTATATACATCTTGTGCATCTTCAACTTTACCTTGAACCCATTGCCAAAGCATATTTAATAAACTCAGCTTATCACTTTTTGTAATTACAACTCCAGCGACTCTTCCCTGTGCTGCACATTCTGCAATTCTGAGCTTCATATCACATAGTTGCAATTGGTTATTTCCAAAGAAAATAAGCGGCGCTTTAACTGGATATTTCTTGCCATCTACAGTTATAGAAAGCTTCATTGATTTATTTTCTCTCAATAAAACATCTAAAGCTGAGGTATAGGCATGTAAAGGCAGTCTTCCTAAATATTTATTATAAAGCTCACGTTTCTTTATAAATAATGGATAAAGTCCTAGACTCGCATTATTTAGATAAATATGATCATTAATAACTGCTACATGTACTGACCTAGGTTTACCTGTTGCAATAACCTCTGCTGCCTCTAAAAGGTCTAACGGAATCTCTAAAACTTTAGCAACATAATTAAAAGTCCCTAAGGGCAGAATTCCCATGGGAATAGGAGTATTTTTAAGTTTAGTTGCAACCGCATTTAAAGTACCATCTCCTCCTGCCGCAACAACCACTCCTGTATTTTCATTTTGTGAATGTCGATGAATAACATTATTTATCAAATCATCAAATAATGTATTTTCATTTAATTCAAATACTTGTATTTCAAAACCATATTCTGTAAATACAGTCATGAGCTGCTCATAGACGTCTTCATGTTTTGAAGCATGAAATCCTGATTTTTCGTTATAGATGATTGAAAGAGGTTTCAAAGGCCGCATTTTATTATCGTAAAGTCAGTTCTTTATTCTATTTTGTATATAAAATCGCCTATTAAAAGTCTCTTTATGTAAATTTTGAATGAGCTTCATTTGTATAATCAATAATAATTCTCAAAAAAATTTTTCAAGCCTAATTAATTGATTTTAAATAAAATAATTTAGATAAATAACATACATATAACATAAAAATATAAAAGCAAAAACCTTATAAAAACATATAAACATATGATTTTAATTAAAAAATAAAGAAAACTCCAAATATCTGCTAGTTAGTCTTTAGTCTTATTTTTTTTATGATTTTATGCTTTAATACAGCCACTTTTTTATTTGATCTTTCATTGTATCCCAATGAATGACTTGTACGTTGTACATATTTTGAATAGGCCTCACCATGACCACAGTGAACGCACCAGAATTCGTTCGTCATCCTAAGCTTATAGCATGGGTTGAAGAAATTGCAAACTTAACCAAACCAGCAAAAATCGAATGGTGTGACGGAAGCGAAGAAGAGTATCAACGTCTAATCGACTTGATGATCGCTAACGGCACCATGCAGAAATTAAACCAAGAAAAACATCCTGGTTCTTATCTTGCAAATTCTGACCCATCTGACGTTGCGCGTGTTGAAGATCGTACTTACATCTGCTCTCAAAATAAAGAAGATGCTGGTGCGACAAACAACTGGGAAGATCCAGCTGTTATGCGTGAAAAATTAAATGGTTTATTTGAAGGTTCAATGAAAGGCCGTACCATGTACGTTGTTCCTTTCTCTATGGGTCCTTTAGGTAGCCATATTGCTCACATTGGTATCGAGTTAACTGACTCTCCTTATGTAGCTGTTAGCATGCGCAAAATGGCACGTATGGGTAAAGCAGTTTATGATGTATTAGGTACAGATGGCGAGTTTGTTCCTTGTGTACATACAGTAGGTGCTCCACTTGCTGAAGGTCAAAAAGATGTTGCTTGGCCTTGTAACCCAGAGAAATATATCGTTCATTACCCAGAAACTCGCGAAATCTGGTCTTTCGGTTCTGGTTACGGCGGTAACGCGTTACTTGGTAAAAAATGTTTAGCTCTTCGTATCGCTTCTGTCATGGGACGCGAACAAGGTTGGTTAGCTGAACACATGCTTATTCTTGGTGTAACTAACCCTCAAGGTGAAAAACACTACATCGCTGCTGCATTCCCGTCTGCTTGTGGTAAAACAAACTTTGCAATGTTAATTCCACCAGCAGGTTATGAAGGTTGGAAAATCGAAACTGTAGGTGACGATATTGCTTGGATTAAACCAGGTGAAGATGGTCGCTTATATGCGATTAACCCTGAAGCTGGTTTCTTCGGTGTAGCACCTGGTACAAATACTAAAACTAACCCGAATTGTATGGCAACTCTTCACAAAGACGTTATCTATACAAACGTAGCAGTAACTGACGATGGTCAAGTATGGTGGGAAGGTCTTTCTAAAGAAGTTCCAGCAAACTTAACTAACTGGAAAGGTCAACCTCACGTAAACGGCGAAAAAGCAGCACATCCAAATGCTCGTTTCACTGTTGCAGCAGGTCAATGTCCATCTATCGATGCTGATTGGGAAAACCCAGCAGGTGTTCCAATTTCTGCATTCATCTTCGGTGGTCGTCGTGCAGATACAGTACCTTTAGTTTCTGAAGCTTTCGACTGGGTTGACGGTGTATATAAAGCGGCAACTATGGGTTCTGAAACTACTGCTGCTGCTGTTGGTCAACAAGGTATTGTTCGCCGTGACCCATTCGCGATGCTTCCATTTGCTGGCTATAACATGGCTGACTACTTTGACCACTGGTTAAACCTTGGTGCGAAAGTAAGTGAAAAAGCTGAAGCTTCTGGCAACAAATTACCAAAAATCTTCAACGTAAACTGGTTCCGTCGTGATGCGGAAGGCAACTTCGTATGGCCTGGTTTCGGTCAAAACATGCGTGTTCTTGAGTGGATCATTGATCGTTGTGAAGGTCGTGCGAACGCTGTTGAAACACCTATCGGTTTTGTTCCAACATATGAAGACTTGAATTGGGAAGGCACTGAGTTCACTAAAGAACAATTTGACCTCATCACAAATCAAGATAAAGACCAGTGGGTTACTGAAATTGAAAGCCACACTGAGTTATTCAATAAACTTGGCGAACGCTTACCTAAAGCATTAAAAGAACGTCAAGCAGCTTTACTTGAAGCTGTAAAAACTGGCTTCTAATTACTCTATATAAAAAAGGTCGCCATGTGCGACCTTTTTTATTATGCGGCATCGTCTTTTAATCGACGTTCTGATGAATAGGCTTCTAAAACTTTTACCTGTTTTACATCAAATGCCAAACCTAATTTAGATCTGCGCCAAAGTATATCTTCTGCTGTGTGAGCCCATTCATATTCACACAAATATCTGACTTCACACTCAAACAAATCATGGCCAAAATGTTGACCCAATTGTTCTATAGCATTTCGCTCTTTAAGCATATTCCACACTCTTGTGCCATATGCATGAGCCCAACGGTTGGCAAGTGCGTCAGAAATTCTACTTACACGTGTTTTAATCTGATTGATTAAATCATCTAGTGTGGTCCAGTTTTCGGCGCCAGGTAATGCTTCATCTGCAGTCCACTCCTCTGCCATGTCATTAAAAAAAGGAGCCAGATGTTCTAAAGCAGCTTCGGCTAACTTTCGGTAAGTTGTAATCTTGCCCCCAAACACTGAGAGCAATGGTGTGGTTTTATCTTCTGCCTGTAGAGCCAAAGTATAGTCGCGTGTAATTGCAGATGGATTATCCGACTCATCATCACATAAAGCACGCACACCCGAGTACTGACTTACAATATCAGCTCGCGTCAACTGCTTTTTAAAATGTGAATTTGTCACTGTCAAAAGGTAATCAATTTCCACATCAGTAATCTCTACTTTTTGTGGGTCGCCTATGTATTCCTGATCTGTTGTACCAATGAGGGTATATTTTTCTAAATAAGGAATTGCAAAAACGATCCGTCGATCTTCGTTTTGCATAATAAAGGCCTTATGGCAGTCATATAATTTTGGCACAACAATATGACTTCCTTGTATAAGCCTAATTTGATAAGGCGAACTTAAACCCAGGTTTTTACTAATAATTTCTTCAACCCAAGACCCCGCCGCATTTACAATAGCTTTCGCACGTATCTGATAGAACTCAGCTCCGCTTTGTAACTCCAGATGCCACAATTCCTGTTGTCTATAAGCCTTAACACAACGTGTACGTGTAACGACTTTTGCCCCTTTCTCTTTAGCTTGTAGTGCATTTAACACCACAAGACGCGCATCATCTACAGTACAGTCAGAATATTCAAAACCACGTGTTATCGCTGGTTTTAAAGGGCTGTCTTCTTTGAAATAAATGAGATTTGATCCTAATAATTTTTCTCGTTTTCCCAAATAATCATAAAAAAATAGACCTGCTCGAATTAGCCATGCAGGGCGTAAATGGGATCGATGAGGCATGATAAAACGCATTGGTTTAATAATATGTGGCGCTTTAGCTAGCAATACTTCACGCTCTGCTAATGCTTCTCTGACCAGCCTGAATTCTTTATGTTCTAAATAACGTAGGCCGCCATGAATTAATTTGCTGCTAGCAGATGAGGTATGACTGGCTAAATCATCTTTTTCACATAAAAATACCGATAATCCGCGCCCTGCTGCATCATTGGCAATACCAACACCATTAATACCGCCACCAATTACGGCAAAATCATATATTTTTGAATAATCATTAGGTTGTACTTTCATTTCTCATGTTCTTTTTATTGTTGATCTTATAAAAATTAAAACATCAAATTGATGAAACAACAATCAAAAAAGTAGAATAATCAATTTTACATCCTCTAGTATTTAACTATCCATTGACTACCCTACTATTGAATTCAACTAGTTAAGAAAAAGCTCAATCCTCTGCCCAATTTTGGCTACGCTTAACTGCTTTTAGCCAACCTTTATAAATGAGTTCAGCTTGTTCAGAAGGCATTTTCGGTTCAAATACCTTTTCTATGGCAGATTTATTTCTTAACTCATGAAGATCTTGCCAAAAACCAGTGGCCAACCCTGCCAGGAAAGCTGCACCCAATGCTGTAGTTTCTTTCATTATTGGACGTTCTACCGGTGTCGTTAAAATATCGGCCTGAAATTGCATCAAGAAATTATTTTCCGTTACTCCTCCATCAACACGGAGTGTACGAAGCTCTTCTTCTGCATCTTGTTGCATGGCATCTAAAACATCTCGAGTTTGAAAGGCTATAGACTCTAGAGTTGCGCGGATAATATGTTCAATACTAGCCCCACGCGTCAGACCAAAAATTGCGCCGCGAGCTGTCGGGTCCCAATACGGTGCTCCTAACCCGGTAAAAGCAGGTACTACATATACACCATTATTATCCTTTACACGTGTTGCATAAAGCTCGGAATCCTTAGCATTCTTAATAACCTTTAATTCATCGCGTAACCACTGTACGCAAGAACCACCGTTAAATACGGCACCCTCTAAGGCATAGTTCACCTCACCACTCGCACCACAGGCAATAGTTGTGAGTAATCCATGCTCTGAACGGACAATCTTTTTACCCGTATTCATGAGTAGAAAACAGCCGGTACCGTAGGTATTTTTAGCCTGTCCCGACTCAACACACATTTGACCAAAAAGCGCAGCCTGCTGATCACCCGCGATTCCGGCAATTGGGATCCCTACTTCTTGTCCACTAATGGTATGTGTATATCCATAAACCTCAGATGAACTACGAACCTCCGGCAACATTGCTCTAGGAATGTCTAAAGCTTGTAAAAGCTTTTCATCCCACTCAAGCTTTTCAGTATCAAACAGCATGGTTCGAGATGCATTGGTAAAATCAGTGACATGGACGGCCCCATTAGTTAGTTTCCAAATGAGCCAAGTGTCCACCGTACCGAATAATAACTCTCCCCGCTCAGCACGCTCACGGCTGCCTTCAACATGATCCAAAATCCATTTAATTTTTGTGGCCGAGAAATATGGATCAATGACTAAACCGGTGGTCTTACGGATATATTCTTGCCAACCTGCTTTATACAATTGATTGCATATTTCGGTTGTTTGTCGGCTTTGCCAAACAATCGCATTGTAAATAGGTCTTCCTGTTTTTTTGTCCCAGACAATGGTGGTTTCCCGCTGGTTTGTAATTCCAATTGCCGCAACTTGTTCACTCTTGATGCCAGCTTGAGCCAATGCTTCGACCCATACAGCACTTTGAGTTGCCCAAATTTCCATAGGATCATGCTCAACCCAACCTGGCTGAGGGTAAATCTGGGTAAATTCTCTTTGAGCAATACTGACAACGTTCGCATCATGATCTAAAACAATTGCTCTTGAGCTTGTTGTTCCCTGATCAAAAGCGACAATATATTTTTTCGGGCAATTTGACATCTGAATGTCCCTTTCATTTTCCACACACATCTAGCCCATAATGCAAAAACATCATGCCTAACTGTATTTATTTAAAATTTTCTTTGGAATATATCATTATTTTTAAGTGGCATAGGCTTTCTTCGTCCAAACATTACCCATTCTTTATCAATTTTTTACTGCATGGAAAAAAGTAATAAGAAAAATATTGAAATTAGAAGATAAAAATATGCTGATCGGCTAATTTGAAATCACATAAATATTCAACTTATTCCTTTGCTTAAATAATAAACATTGTTTTACAGACATAAAAAAAATAAAGTGAAATACTCCTGTAGATAATATGTAAAAACGGAATAAATCTTCTCTTTTTTAGCATTCAACCCAAGCGAAACATGGAGCGAATCATGGTTGATCAACCTTCTACCGCAACAACTCCACATTCTAATTTAGATACAAAAACCCGTCTAAAATCAATTTTGGGCGGTTCTGCCGGTAACCTTGTCGAATGGTACGACTGGTATGTATATGCCGCATTTACGCTCTATTTTGCTCATGCATTTTTCCCAAAAGGAAGTCAAACCGCTCAACTTCTTCAAGCTGCAGCTATTTTTGCAGTGGGTTTCCTTATGCGTCCCATTGGCGCATGGATTATGGGGATCTATTCTGACCGTAAAGGACGTAAAGCGGGCCTTACACTTTCCGTTACTTTAATGTGTATCGGTTCGCTACTGATTGCTGTTACACCCTCCTACGAAAGTATTGGTGTATTTGCTCCATTGCTTTTGGTTATTGCACGTTTAATTCAAGGCTTAAGTGTAGGCGGTGAATATGGCGCAAGTGCAACGTATTTAAGTGAAATGGCAGAAAAAGATCGTCGTGGTTTTTTCTCAAGCTTTCAATATGTCACTTTAATTGCAGGACAGCTTACAGCTTTATGCGTACTACTTATCTTGCAAATGATACTCACCGAAGAACAACTGCATGACTGGGGATGGCGTGTTCCGTTTTTCATCGGAGCCCTTTTAGCTATCGTGGTGTTCCGTATTCGTCGTGGCTTATTGGAAACTCAATCCTTTAAAAACGCTCAAGCAGAAACAGATCAGCCGAAATCAGGAATGTTTGCTTTATTTAAACATTACCCTAAAGAAGCCTTCACTGTATTATTCCTAACTGCTGGTGGTACTTTAGCTTTTTATACTTACACCACCTATTTACAGAAATATTTAGTGAATACTTCTGGTTTTACCAAGCCTGAGGCTACTCAAATTACCACTTTAGCCTTATTCATCTTTATGTGCTTACAGCCATTGGCAGGTGCCTTATCAGATCGAATTGGCCGTAAACCGCTTATGATCGCCTTCGGGGTTACAGGTGTTTTATTCACTTATATTTTGTTTGATACACTTGCAAACACGCATAACTACTGGACTGCTTTCTGGCTATGTTTAGGTGGACTGGTTATGGTAACCGGCTACACATCAATTAATGCTGTAGTGAAAGCTGAACTTTTCCCTGCACATATTCGAGCTTTAGGTGTCGCATTACCTTATGCAATTGCGAATACTTTATTTGGTGGTACAGCCGAGTTTTTTGCATTAAGTTTTAAAGAAGCCGGGCACGAATCTTGGTTCTTTATTTACGTCAGTATCATGATTTTCATTTCATTGCTGATTTATATCTTCATGAAAGACACCAAACATCATTCAAAAATTAAAGAACATTAAAATCCTTCAATTAGAGCTAATATCATAAGTGAATGTTAGCTCTAACTTAGTGAATAAGCTCCAACAACGTTTCTATTTTATGAGATACTTCATATAAGAAAATAAGGACGATACTGATGAATAAAAGTATTCTATGTTTGGCATTAAGTAGTATGTTCATTTTAACTGCCTGTCAGACTACGCCTCGACAATATAATGGTTCAACTGGCTACCAGATTGAAAACCAAACAAAAACTTCAGCGACTTTAGCTTACACTTTAGCTGGTCGTAGTAACCAGCAACTTGATGAGCGTAAATTGCAACGCGCTTGTCAAAATGTGTTAGGTGCTCAGAAAGTTTATAAATTATCAATTTTAAGCATCAATGAAATTCCTAATCCAGCTAAAGATGAGCATTACGGAATTCAACTAGGTGAAACCCGCGCTTCTTTTGGCTTATCAAATACACCTAGCTTAAATAATGGTGAAGATTATGCGACACGCCAAGCACTTGAAGCACGTCCAAGTACATTAAAAGTCGTTCGTTATACTTGTTCATAACATAAAAAAGACGCTGTTTTTACAGCGTCTTATAAAAACTTACATTTCTAAATTCTTTCGCTTCATCCAGATCTGGCCAAGTTGTTGCACTTCTTTCTTCAATTTTTTCATATTTTGGATGACTAAAATTTTTAACACGGCTATCTGCTACCCATACTTCTGGTGCAAATTTTAAAAATTCGTCTAAGAAAAAGCGATTACATTGGTCATATAACACATCGGCAGCAAGTAAAACATCGACTTGTTCAGCCTTATATAAATCATCCAAATATTCGAGTTCTACATCATTAAGTAAAGCATTTTCACGACAAGCATTTAAACTGACCTGATCAATATCACAGCAGATTACGCGTTTTGCTCCTGCCATTTTTGCAGCAATTGCCACTACACCAGAACCTGCTCCGAAATCTAACACCACTTTATCTTTAACATGGTGCGGCTCTGCAAGTATCCACTGAGCCATCGCCAAGCCTGATGCCCAACAGAAAATCCAATACGGCGTGTCATTCCAGATACGGCGAATAACTTCGTCATCCAACCGATCAGTTGGAAAAACAGGTGGAATCAACCATAAAGAAATTGGAGTTTCTGGCAACTGTTGTGCCATTAATTCACAATGAGGAATGACTTCATGTAAGGCTTGAAGTAAATGTTCAGGGGCTTTGGTCAATTGAAAGGTGCAGCTCATAAATAAGCTCAACAATTTATAATTACTATTTGATCATCTGCATCGCGAGCAAGACTAAAACAGTAGCTTAAAGCATGATATTGGCTAGTTACTAAAAATACGTTAGTTCAATTCCACTGCTTTAATTAGTGCAACCGTTTTGGTCTTGCGTAGCATTCGCTACTCATCCCGAAAGAAAAGTAACATACGAGCTCCTAACATTTTCCTTAAACGGTAGGATGCTGCCGTGAATAAATTAAATTAACCTAAAGCTTTTTCAGCAGCTTCTACAGTTTGACGAATCAAAGTTGTAATGGTCATTGGACCAACGCCACCCGGTACTGGTGTATAAGCAGAAGCAATTTCTTCAATACCTTGTAATTGAATATCGCCTACACCACCGCCATCACGTGGATGGAAACCAGCGTCAACAACAACTGCACCTTGTTTAATCCAATCTTTTTGAATAAGTTCAGCTTTACCTACAGCACCAACAATAATGTCAGCTTGTTTCACAAGCTCAGGTAAGTTTTGTGTACGTGAATGACAAATCGTTACTGTCGCATTTGCTTGCAACAACATCATTGCCATTGGTTTACCTAAAATTGCTGAACGACCAACTACAACCGCGTGTTTGCCAGCAATTTCGATATTGTTTTCTTTAAGAATCGTCATAATGCCAGCAGGAGTCGCTGAACCATATGCAGCTTCACCCATTGCCATACGGCCAAAACCAAGGCAAGTTACGCCATCTACATCTTTTGCTAAAGAGATTGCATCAAAACATGCGCGCTCATCAATTTGTGCAGGAACTGGATGCTGTAAAAGAATACCGTGAACATCTGGATTGGCATTAAGCTTTTCAATTTCAGCTAATAATTGTTCTGTTGTGGTTTCTTGTGGTAATTCGATTTTTAATGAATCCATACCTACTCGGCGGCAGGCATTCCCCTTCATACGTACATAAGTTGCAGAAGCACCATCATCCCCAACCAAAATAGTCGCTAAAATTGGGGTACGACCTGTTTTCGCTTTTAAAGCTTCAACACGTACCAACAAATTTTCTTCAATTTGCTTTGCTAATGCACGACCGTCTAAAACCAATGCCACAGCACATCTCCAAGGTAG +>3_2#NODE_11_length_39995_cov_63.1687_ID_21 +CTGCCCAAACTGGTGCAGGCGGCGAACGGGGCAGCCCTCGCGCAGCTTCAGGTCAAGCTGTCGAACAACGACTACACCCCCGCCCGCACCCTGCAAGACGCCCAGCTCGCGCTCGCCAACGCCCAGCGCACCCTGAACGACGCGACGCGGGCGAGCAGCACCGGGGTCAGCGACGCCTACCGCGCCGTGCAAAACGCCCAGCAGCAAGTGAACATCGCCCGGCAGCAGGCGACCAACGCGCAGACCGCGCTCACCCAAGCGCAGGCCCGCCTCAAGGCCGGCACCGCCGCCGCCGTCGAAGTTCAGCAGGCGCAGGTGCAGGCGCAACAGGCGCAACTCGGCGTGCAGCAGGCGCAAGACGGCCTGTGGCAGGCGCTCGCCGCGCTCGGGGCCGCGAGCGGGACGGACGTGACCGGGCTGGTGAAATAGTGGGGACTGGTGAAATAGTGGGGGCTGGTGAAGTAGTGGGCGGCCCTCTTCAGATGGCGCGGCGGGTGACCAGAGGAGCCGCGCCGCGCCTGGCCGGTGCCCTGCTTCTCACGGCGCTGCTTGCCGGGTGCTCGCCCAAGACCGAAACGAAAACCAACGACCTCGACGCCGCGCTTCCCAAGACGACCACCCTCAGCGTGCAGACGGTGACGGCGAGGGCGGGCACGCTGACCGCCCAGCGCAGCGCGAGCGCGACCATTCAGGCCGAGCGCGACAGCCAGGTGGCGACCCAGTCCTCCGGCGCCGTGCAGAGCATTCCCGTGAGTCAGGGCGAGGCGGTGGCGAAAGGCGACGTGCTCGTCAAACTCGACGACACCGCTCAACAGCAAGCGCTCGACAACGCCCGGCTGCAACAGCGCCAGGCCCAGATCAGCCTCGACCAGACGCGGCAAAGTACGTCCCAGGGGACCGGCGCCCTGCAGGCGAGCGTGACCTCGGCGCAGGCGGCGCTCGCGCAGGCCGAGCAAAACGCCCAGAGCGCCGAGAAGCTCTACGGCCTCGGCGGCATCAGCCTCGCGGACGTGCAGGCGGCCCGCTCGCAGCTCGCGCAGGCCCAGGCGCAACTCGCGCAGGCCCGCAATACCCTGGAGCAAAACGGGCGCAGCGCCGGCAACTCGGTGCCGCTCGCGCAGGTGCAGCTCGACACGGCCCGCACTGCCGTGCGCCAGGCCGAGCAAAACCTCAGCCGCACCGCCGTCCGTGCCCCCTTCGCTGGTACGGTGGCCGACGTGCTGACCGAGGTCGGCGAGTTCGCCGGGCAGGGCACCCCGGTCATCCGCCTCGTGGACCCCGGCAGCGTCCGCGCCCGCGTGGGCGTGCCCACCGCCGACGCCGCCGCGCTCACCGAGGGCGTCAAGTTCAACCTCAGCTACGGCGGCAAAAGCTACGTGGCGACGGTGGTAGACAGCTCGGGTATCGCCGGCAAAGACCGCCTGGTGCCGATTACCGCGACCATTGAGGGCGGCAATGCTCTGCCCGTCGGCGCCGCCGCCCGCGCCAGCTACCGCGCCACACTCGGCAGCGGCCTGCTGATTCCGGCGAGTGCCCTTCAGGTGGAAGGCGGTGAAAACGCCGTCTACGTCGCCCGCAGTGGCAAGGCCGAGCGCGAAGTCGTGCAGGTCGTCGCCGAGAGCGGCAACCGGGTGGTGGTGTCGGGCCTTCAGGACGGCGACGCGGTCATCAGCCCCCTGCCCGCCGGGGTGCAGGACGGGGCGAAGGTGGTGGTGAAGTGAGGGCCATAGCGCACAGAGTCGAGGGCCGGGGGGGAGAAAACCCCTCACCCCTTGCTGCGCAAGGCCCGCTGCTTCGCAGCTTTGCAAGTCTCCCCTTGGGAGAGGGTCAACAGCGCAAAAACCTTAAGCCTTCTATGACCCTCGCCCCTTGCGGGAGAGGGCCTGCCGCAGGCAGGGGTGAGGGGGCCAGCCCGTCCTGTCTTTCCCCACACGCCCCCCAGGTCACCCCATGAGCACCCACTTCGACGAAGCCGAATTCAGGTCCGGCGGCACCCTGCCCGACGGCACACCCGAGCCGCAGATTCACCCGCTGGTGCGCTTCAGCGTCAAGAACTACGTCTTTTCCATCGGCATCTTCGTGATGGTGGTCCTGCTGGGGCTGGTGGCGACCTTCCGGCTGGGGGTCGAGCTGCTGCCCAACTTCGAGGTGCCGGTGCTGGCGGTGAGCACGTCCTACCCCGGCGCCAATCCCGACCAGGTGGACCGCGAGGTCAGCCGCCGGGTGGAAGACGCGGTGAGCACGCTCTCGGGCGTCACCGACATCAACACGACCTCGGTCAGCAATCAGTCGGCGGTGGTTATCACCTTCAGCGACTCGACCAACATTGACTCGGCGGCCAACTCGGTGTCGCAGGCGGTGGCGGCGATTCGCGGCACCCTGCCCGACGGCGCGGAGGCTCCGGTGGTGCAGAAATTCGACCCCAACGCCCAGCCGATTCTGACGCTGGCGCTGCTCGGCGGCGCGGCGCGGCCCAGCGAGGTGACGACCTTTGCCGAGGACACGCTGGTGCCCCGCCTGCAACGGGTGGAGGGCGTGGCCGACGTGACCGTGACCGGCGGGCCCGAGCGGCAGGTGCAGGTCTTGCTTGACCCGGCGCGGCTGCAAGGCTTTGACCTCGCGCCCGCACGAATCAGCGGGGCCATCGGTTCCTCGGCGCTCGACCTGCCTGCCGGGACGCTCGACCGGGGCGGCTCGACCACCTCCTACAGCACCCGCAACACCCCGCGCAGCGCCGCCGACGTGGCGCGCATCGTGGTGGACCCGTCCACCGGCCTGCGCGTGAGCGACGTGGCGACGGTGCGTGACGCCAGCGCGGCGGCCAACAGCTACGCCCGCGTCAACGGGCAGCCCGCCGTGCTGCTCGGCGTCCGCAAGGCGAGCGGCACCAACTCGGTGGCCGTGACCGACAACGTGCGCGCGGCGATGGAAGCCCAAAAGCTCCCGCCGGGCTACCGCCTCACCCTCGCCAGCGACACCACGACCAGCACGCGGGCCACAGTGAACGACACCTTCCGGGAATTCCTGATTGCGGTGGGCGCGGTAGGTCTGATTTGCCTGCTGTTCCTGGGACGGCTCAACACCGTGTTCGCCGTCATTCTCGCCATTCCGATTTCCATCAGCGCCGCGCCGCTGCTCTTCGGGACGCTCGGCTTTACCTTCAACATCATCACGCTGCTCGCCATCATCGTCGCCATCGGCATCGTGGTGGACGACTCTATCGTGGTGGCGGAGAACGTGCAGCGCTACCGCGACCTGGGCTACTCGCCGCTGCGGAGTGTGCTGCTCGGCGGCTCGGAAGTGTTTTCCGCCGTCACCGCCGCGAGTTTCTCGCTGCTGGCGGTCCTCATTCCGCTGAGCCTGATGCCGGGCATCCTGGGGCAGTTCTTCAAACAGTTCGGGCTCGGCATCGCCGCCGCCATCGTGCTGAGCTGGCTGGAAAGTCTGCTGTTCCTCACCGTCCGCATGGCGTACACCCGCGAACCCGCCCGCATCACCTGGGCTGACCTGCCCGCTGTGCTGCGCCGGCTGCCGCTCACTTTCCGCGAGTCGCTGAGCGGCGTGAAGACCTTCTGGGGCCTGCTCGGGCTGGCGCTGGCCGGGGCCGCGACGTACTTTGGCCTGCACCGCGCCGGGCTGCCCGCTGCCGCCGCCGGGGTGCTGTGCGTCCTCCTCGCGCCGGTCGTGCTGACTGTCGTCCGCTACCTCCTCACGGTGCTGTATGCCCTGCTCGAAGCGCTGACCGAGACGCTCCACACCCTCACGCTGAGTGGCGTCAACCGCGCGGCCAAAGCCTACGCCCGCTCGCTGGCAGGCGCCCTGCGGCGGCCCGGCGTGGTGATGCTGGTCGCGGGCCTCTTTCTGCTCAGTGCGCCGCTCGCCCTGCGCGGGCTGGGCTTTGCCTTCGTCCCGGCGAGCGACAGCGGCATTGCGACCATCAGCCTGACCCTGCCGGTGGGCACGCCGCTGGCCGTGACGGATGAGCTGACCCGGCAGGTGGAAGACAAGCTGCTCGCGCACCGTGAAGTCAAGCTGGTGCAGACCGCTTCGGGCAGCAGCGGGGTCCTCGGCGGTGCTAACGCCAACACCTCCGACCTCACGCTGACGCTGATTCCCAAAGCCGAGCGTCCCGGCATCGACGAGTTGCTGGAACGCTACCGCCGTGAACTTGCCCCGCTGGTCGCCCGCTACCCCGGCACCGAATTGCTGGTGGCGGGGCAGGCGGTGGGGCCGGGCGACAGTTCGGACATCTCGCTCGCGCTCACCGCCCCGAGTCAGTCGCTGCTCGAAGAACGCAACCGCGCGGTGGTCCGGCTGCTGAGTGCCGACCCCAACATCCGCACGGTGAAAAGCAGCCTCTCGGCCACCCGGCAGGAGCGCACCTTCGTTCCCGACCCGCTGCGGCTGAGCGGCACGGGCCTGAGTGCGAGCGACGTGGCGCAGGCGCTGCGAACCTACAACGACGGCACCGTGGCCGGGCAGGTCCGCGACGGCGACCGCAGCGTGGACATCGTGGTGCGGCTCGACCCCGCGCTGGTGTCGGGCGAACAGAGTCTGCTCTCGCAGACGCTCTACTCGCAGGCCCTCGGCGCCAACGTGCCGCTCTCCAGCCTGGGCCGCTTCGAGGTCGCGCAGGCGCCCGCCACGCTGCGGCGCTTCAACAAGGCGTACACCGCCACGCTCGACATCAACCTCGTGTCGGGCGGTCCCAACCCCTTCGCCTATCAGAAAGACGTGCAGCAGCGCGTCGAAAAGGCCGGGCTGCTCGCGGGCGGCGTGACCCTCGGCAACGCCAACTCCTTCGGCAGCGCGGGGCTGACCGGCGACCTGCTGTTCTACGGCCCGATTCTGATGGTGGCGGCGGTGCTGCTCACTTATCTGGTGCTGGGCAGTCAGTTCAACTCGTTCCGTTACCCGATTTATCTGTTGCTCCCGATTCCGCTCGCCATCGTGGGGGCGCTGTGGACGCTGCACCTGTTTGGCGTCAATCTGGACGTGATTACGGTGCTGGGGATGGTGATTTTGCTCGGCCTCTCCACCAAAAACTCCATCCTGTACCTCGAATTCGTGACCGAACGCGCCCGCGTCCTGCCGCTGCACGAGGCGCTGCTCGAAGCCGCCGAACTGCGGTTTCGCCCCATCCTGATGACCACGCTGACGGTGCTGGTGATTTCCATTCCCCTCATCCTCGGCCACGGCGAGGGCGCCGAATTCCGCCGCGGCCTGGGCATCGTGATTCTGGGGGGCGTGGTGACGAGCACCCTGCTGACCTTCTACGTGGTGCCGAGCGTCTTCTGGCAGTTCGAGAAAAAGCGCATGGCCACGCCACAACCGAAATTGGAGCCGGTGGCTGGAGACTGAACCCTCGCTGAAACCGTGAAGAAATGGCGCATGTGCGGCCCTCTCCTACTAAGGAAGAGGGCCGCACGCTGTAGGGACGGGCTCATAAAGTTCTCTAAAGCATTTGACAGAAAAAGACACCCTCACCCCTGCCTTCGGCAGGCCCTCTCCCATCAAGGTAGAGGGTCAAAAATGCTCTAGAAAAGGCGAATCAGTGAACTTTATTGCGGCTGCAAGGGAGCCAGTACAGCCCGGCGTTGGCCTCGTTCGGGTTGGTCGAGCCCCAGCACCAGCCCTTGGCCCGGATGCGGCGCAGCGTGGCGTCACGTTTGTCGCACATGCTCTGGGTGCTTTTGGAAGAGTGCGCCGACTGGCAGACCTTCTCCTGGGCGATGGCCTGATGAATCAGCGACCGGACGGCGGGGTCGTGGGGCTGGTGGCTGGTGCCGATTGCGCCTGCACCAGCGGTTGAGGTGAGCAGGGTCGCGGCGAGCAAGAGGGGACGCAGAGTGTTCATGGCTGCACTGTGCGGGGGCAAGGTTGGAGGCGCGTGAAAAGTGACTTCAGGGGTTAGCGACCGCCGCTCATGTGCGGGAGGTCTTCCCGTTCGGCCCTCAGCCCCCTCGTTACTGCTCACGGAGGGGCTTTTGCATTTGCCCCCCGCCGTGTATATGCATAGAATATTCAGAATGCTGGGCAAGGAAATCGGAAAAGACCAACGACAAAAGCGCATTCAGGACATCATCCTGCGGGAGAGTGTGTCCACGCAGGCCGAACTGGTCAAGCTGCTGGCGAAAGAAGGCGTGCAGGTCACGCAGGCCACCGTCAGCCGCGACATCAACGAGCTGCGGCTGGTGCGGGTGCCCATCGGCAAGGGGCGGCACCGCTATGCCCTGGCGCAGTACGGCGGCGACAGCGACATCGAGGAGCAGCTCGCCCGCCTCTTTCAGAGCTTCGTGCAGGACGTGGACCGGGGCGAGAACATCCTGGTCATCCGCACCGCCGACGGGCACGCCTCGGGGGTCGCGCTGCTGCTCGACCGCTGGAAGCGCGACGACATCGTGGGTACGCTGGCGGGCGAGGACACCATCATGGTTGTGGCCCGCTCCACCCATGACGGCGAGAGCCTGATGGAAGAATTCAACGCGCTGATGCTGGGGTAACAGCTCTCACAGCCCAGTCCAGAGCACAAAAAAGGCCCCCTTCCACACCTGGGAAGGGGCTTTTTCCTGGGCTGTTTCGGGCCGCTTATTCTTCGCGCAGCACGTAGCCGACGCCGCGCACCGTGTGAATCAGACGCCGCTCTCCGCCTTCTTCGAGCTTGCGGCGCAGGTAGCCGATGTACACGTCCACCACGTTGCTGCCGCCGGTATACTCGGGCCAGACCTTTTCCTCGATTTCAAAGCGCGAAAAGACCTTACCGGGGTTGCGGGCGAGCAGTTCGAGCAGTTCGAATTCCTTCGCCGAAAGTTCGACCCGGCGTCCGCCCCGGAAAATCTCGCGTCCATCGAGGTTCATCACCAGGTCGGCCACCCGCACTTCGCCCGTCACGGCGGGGTTGACTCGCCGCAGATGGGCGCGGACGCGGGCCAGCAGTTCCTCGATGGAAAAGGGCTTGATGAGGTAGTCGTCGGCGCCCGAGTCCAGGCCCTCCACCTTGTCCTGAATGCCGTCCTTGGCGGTCAGGATGATGATGGGGGTGTTGCTCGTCTTGCGGATGCGGCGGGCGACCTCAAGGCCGTCGAGGACCGGCAGCATCAGGTCGAGAATCACCAGATCGGGGTTGACTTCGCGGAATTTCGACAAGCCGGTCACGCCGTCAAAGGCCACCTCGGTGGCGTAGCCCTCGGCGGCCAGTTCCAGCTCGATGAAGCGGGCAATATCTTTTTCGTCTTCGATGACGAGGACGAGCGGCTTGCGTTCCATGCCCGCAGTGTAATGAGTGCTCTCATGAGAAGGCGGCGCCCCGTCCTTAAGTTCTATTCATCACAAATGCTGGCGGGGGCGGGCAGCGCTGGGCCGTCACGCGGGTCACAGGTGAGCGCCGTACACTGCCGGACGTGACCACGGGCCGTTCCACTTCTTCGCCTTCCGCCTCGCCGGGCCAGGGCACGTCGGCGCAGGACCCGGCCCCCCGGCGCTGGCTGCTGCGGCCCTCGTCGCTGCAATTGCGGCTGACGTTGCTGTACGCGGCGCTGCTGGCGCTGATGCTCGCCTCGGTGTACGGTGCCGCGCTGGTGCTCATGCGCAGCAGCTTGATTACCGGGCTTGACGAGGGGATGCGCAACACTTACAGCCAGTTCAGCGAGCTGGTGGCGCAACTTGCCCTTGACTCGCCCACCACCGAGCAAGAGCGCGACAAGGAAGGCGTGCTGCCCCGCGCCCGCACGCTGTTTCCCAACGACGCGATTCAAATCGAGAAGCTGCCTTTCGTGGACCACGACCGGCTCCTGACCCGCCTGAGCGACGCCCAAAAGCCCGCGCAGCAGCGTCAGGAATTGCAGGTGGTGCGCTCGCAGCTCACGAAGTACCGCTACCCGGTCACGGTCAACCGTGCTTCTCCGCTCGAACTCAGCGACGCCGAGCTGCTCGACCTTATCGAGTCGCCCACGGGCCGCATCTTCATCGCCCGGCAGATTCGTGAGCCCTACAGCGACAAGACGGTGCCCTACCGCATCCTCGTGACATTGGCGGAGGTGCAGTACGCGCCGCGTCCCCTCGCTTCGCTACGCGACGGCGACGTGACGGTGGATTTCATGCCGCCGCCGGTGCTTTCGATTATTTACGTAGGGCGCAGCGTGGCGGGCATCGAGGACACGCTGGGGCGGCTGCAACGGGTGTTCGCCATCGTGATGCTCTTCGGTGCTCTGCTCGCGGGCACGCTGGCTTACGTGCTGGCGGGCCGGGCGCTGCGGCCCCTGCAAGAAGTGCGCCAGGCTGCCGAGCGCATCGGGGGACAGACGCTGACCGAGCGCGTGCCCGAGCCCCAGACCGGCGACGAGGTGCAGGCCCTCGCCCGCTCGCTCAACGCCATGCTGGGGCGGCTGGAAGCGAGTTTCGAGGCGCAGCGCCGCTTTACCAGTGACGCCAGCCACGAGCTGCGGACGCCCGTCACTGCCATCAGCGGGCACGCGAGTTACCTGCTGCGCCGCACCAATCCCGGCGGGCAGGAGCGCGAGAGCCTCAACATCATCCGTTCAGAATCCGAGCGGCTGACCAACCTGATTACCAGCCTGCTGCAACTCGCCCGCTCCGACAGCGGGGCCCTGACCCTGAACCCCGCGCCGATTTTCTCGCGCCTCTTTCTGGACGACGTGAGCCGCGAACTCGCTCCGCTGGCGACCGGGGGCTCCGAGCTGCGGGTCAGCGGTCCCGACATTCCTTTCGAGGGCGACCCCGACCGGCTGCGGCAGGTCATTATCAACTTGGTCGGCAACGCGCTCAAGGCCGGGGCCAAGACGGTCACGCTGGAGAGCAGCTCGCAGGAAGAGGGCCGCGAGGTCCGCCTGAGCGTGCGCGACGACGGCCCCGGCATCCCGGCCGAGCACCTCTCGCGTCTCTTTGACCGCTTCTACCGCGTGGAAGACAGCCGCAGCCGCGACCAGGGCGGCGCGGGCCTGGGCCTGAGCATCGCGCACAGCATCGTGGACGCCCACGGCGGGCGCATCTGGCTGGAGAGCGAGGTCGGGCGCGGCACCGTGGCGCACGTCCAGCTCCTGGTGGGCGACGTGCCGGTGCTGGACGAAGACGACGTGCCGTAGGGAGACGGGGGAGAGGCAACTTCCCAGCTTCTCCATTGGTACCTCTGCGCCCCCGGCCACCTTCCCCCGCCCGCGCCCGCCCTAGACTCGGCGGGTGCGGCGTTTTCCTGTCCTGTTTCCTCTGCTTTTGCTGCTGGCGCTGACGGCCTATCTGCTGCCCGCCCGGACGCCGCAGAACGTGGAGGCCCCGCCGCCCGCCGCCACGACGACGCTGCCACAGGCGCTCCCGACCGCCGAGCGCGAGCTGTTCGCCTCGCTGCGTCCGGCGGTGGTGCGGGTGGACAGCGTGAACACGGCGACGCGCACCGGGGGCCTGGGCACCGGCTTTTTCATCAGCGGGGAGGGGCAGGTCCTGACCGCCTACCACGTCGTCAAGGCGGGGCAACTCTTCGCAGTGACGACCCTGGCGGGCAAGACCTACCCGGCGCGGGTCGCGGCTTTCGACGAGGCGGCGGACGTGGCCCTCTTGCAAGTCGCGCGCGGCGGCCCCTTTCCCTACCTTGAACTGGCGAGCGGGACACCGCAGGTGGGCGAACGGGTGCTCGCCATCGGCAACAGCGGCGGCGACTTTTTGCAGCCCCGGCGTGGCGAACTGCTGCGGTTGAACGCCGAGGCGGGCCGCAGCGATTTCCCCGAGGGCACGCTGGAAATGAACGCGCCGCTCGCGCAGGGCGACAGTGGCGGGCCGATTTTCAATGAGCGCGGCGAGGTGCTCGGCGTGGTGAGTTACATCCGGGTGAGTGGCGACGGCGTGACGCGGGCGAGCTACGCGGTGCCGGTGCCGGGCGGCGGCGACCTCATTCGCGGCCTGCAAGCGGGGGAGAGGCGCGAGTCGCCGCTGACGGCCCTCGTCGGCCTCGCCTTCGACCAGATGCACAGCGGCCTGACCGACCCGCCCGGCGCCGTGGTGCTGCGCGTCACCCCCGGCAGCGCCGCCGCCCGCGCGGGCCTGCGCGGCTGCGTGGCCGACCGCCAGGGCCAGCTCACCGGCCTGGGCGACGTGATTCTCAGCATCGGCGGCGTGCGGACGCCCGACAGCGGCACGGCGCTCGACCAGGTCAAGCGGCTCAAGGTGGGCGACCAGGTGGACGTCGAATACCTGCGCGGAGAGCAGCGGGCGCACGCCACCCTCACCCTGCGCGCGCAGCCGGTGGGCAAAACACCGCTGAATGCCGAGCCCTGCACCCGTCAGTAATTAATTCAGGCGGCAAAGGTAGAGAATCCCACAAGCCTTTTCCTATCCACGTTTGGCGCTGACCAGGCAAAATACAGAGATGTCGAGAGCTTTTGTCAAGGAAGACGGCGGCGAACGCTGGACCCCACCCGCAAAGGCCGCCGATTACCGGGTGGTCTTTGAGACGCTGGACGGCCCCGAGACGGTGTACGAGGCCGATGACCTGCTCGGCGCCCTGCGCTGGGCGGCGGCCCGCCCCGGCGCCGGCTTCGAGGTGCGCGGACGAGACGGGCGGCTGCTGGCCGTGAGCTGAGCAGAAATGAAGTAGGCGGGGAGCTGAACACACGGCTCCCCGCCTTTCTGTTCTCTGCTCTTGCCTTACCAGTCGCCCTCCACCGTGAAGCGCACCGCCCCCGCCAGCGCTTGCCCCGGCTCCAGCACCCGCAGGTCCACGCCGCCGACGCCCCGCGCCGCGAGATTGAAAGCGTCGGTGGCGTGGCTCGTCGGCTCCAGCGCGAGGCTGCCGTCGGGCGCGGCGAAGACGACGAGGTGCGAAAACACGTTGTCCGCGGTCAGGGTGAGGGCGCGCTCGCCCCAGTCCAGCCGGGCGACGCCGTCCCACGCGGTGTACGTAGCGTCGGGCGAACGCGTGCCCACCGCTGAAGGCTGGCGGAAGTCTTCCTCAGGGTTGACCGGGCGGGCTCCCCCCGTCGGCAGTTGCCGCTCGTCGGTGTCGTAGACGAGCGCGGCGTCAAAGTGCAGCGTCGGGTCCACCCCGTCCTGAATCCGCTGGAAATACGGGTGCAGGCCCATTCCAGCGGGCATCGGGCGGGCGTCGGCGTTCGTCAGGGTCACGCTGATATCGCAGTGCGGCCCATGCAGGTGGTACTCGACCTCGGCGCTGAGGTGCCAGGGCCAGTTGAAGTCAGGAAATTCGCGGCTGTCGAAGGTGCAGCGCAGGTGCGCGCCCGTGACCCGCGTGACCTGCCAGGGCCGGTTCCTCACGTCGCCGTGCTGGGTGAGGCCGTCTTTGGTGGTGACGCGCAGTTGAACGTCCTCGCTGCCGAAGGTCAAGCGGGCGTCCCGAATCCGGTTGGAAAACGGCAGCAGCGTAAAGCAAGCGCACTGGCTGCTCGTTTCGACCTGTTCCAGGTCCACTGGACGCAGCACCGGGCGACCAGATGCGGAACGCAGATTCAGGATGCTGGCGCCGAGGTCCGGCAAAATCTCAAGTTCGAGCGCCTCGCTGCGGATGGTTTCGGTGCGCCGGGTCACTTGTTCCCCCCGCTCTTTCCCCGGCTGCGGGTCATTTCATAGAGCAGGATGCCTGCCGCCACCGAGGCGTTGAGGCTCTGGACCTGTCCGCGCACCGGGATGCTGACCAGCACGTCGCATTTCTCGCGCACCAATCGGCGCATCCCTTCGCCCTCGGCCCCGATGACGAGCGCCGCCTTGCCGGAAAAGTCGGTGCGGGTCACGTCCTGCGCCGCCTCGCCCGCCGCGCCGTAGACCCACACGCCGTCGGCCTTGAGCTGGTCGATGAGCCGGGGCAGGTTCTTGGTCTGGGCGACGGGCAGGTACGCGGTGGCCCCGGCAGCGGTTTTGGCGACCACCGGGGACAGGGGAGCCGAGCGGCGCTCCTCCACGACCACGCCGTGCGCGCCGAGCACCTCCGCCGAGCGGATGATCGCGCCGAAGTTGCGCGGGTCGGTCACGCCGTCGAGCAGCACCATCAGCAGCGGTGCGCCACTCGCCTCGGCGCGGTCGAGGATGTCGTCCACCGTCGCCCATTGCAACTCCTCGACCTCGGCCACCACGCCCTGATGCTGTGTGGTTCCGACGAGCTGGTCGAGTTCGATGCGCGGCAGCCAGCGCACCCGCACCCCGGCGTCGCGGGCGAGGTCCTGCACCTGCCGGGCAAAGGCGTCCTCGACCCCGCGCGCCAGTACCACCTCGGCCACCCGTCCGGCGGGCAGGGCTTCGAGCACCGGGTTTCTTCCGTAAAGCAACATGTTCAGGAGTCTAAGCTGCCAGGGGGGGGAGCCGGCCGAGCCGTTACTCTGTGCTCGTGAACGGTCTTGCCCTCAACCTGATTCTGGCGCTGGTCTGGGCACTGTTTCTGGGCGAGGTCAGCCTGCGGAGCCTGAGCATCGGCTTCCTGCTGGGCTTCGCGGTCCTGACCCTCTTTCACCGGGCGCTGGGCAGCCGCGCCTATATCCGGGCGGTGGGCGGGGCGCTGCGGCTGACGGCGTACTTCGTGGCCGAACTGGTGCGGGCGAACTTCCAGATGGCGCGGCTCGCCCTGCGGCCCCACCCGCAACTTCACCAGATGATCGTCGCCGTGCCGTTGCGGCTGCGCGGCGAGGGGGCGCTGACCCTGCTCGCCGCGCTGACGGGGCTGCTGCCAGGCACCGTCACGCTGGGCTTCTCGCCGGACCAGGGCACCATGTATATCCACGCGGCGGGTCTGGAGCAGCGCAGCGCCGTGCGCCGCAGTGTGCAGGAAATCGAAACGCGGCTGTTGCAGATACTGGGGGAAAGGCAGGGCTCCAGCTGAAGACCCCCGCATGAGGAACGCGCGACGAGGTGCCCCGGAGCGGAGAGAAGCGGAAGAGAGTTTTCCAGCCCTAGCGCCCCTGCACCCACGTCACCAGGTCGCCCGCGCTTAGCGGCGGGGTCAGCGCGTAGCCCTGCGCGGCGTCGCAGCCCAGGTCACGCAGCATGTCGAGCTGGGCGTAGGTTTCGACCCCCACCGCCGTGACCGTCAGGCCCAGGCTGTGCGCGAGGCTGACCGTGCCCTGCACCAGCTTGAGCGATTTCTGGTCGCCCGGCAGCCGGGTGGTCAGGGTGGGGTGCAGCTTGACGCCGTAGAGCGGAAACTGGGTCAGCGCGGTCAGGCTGCTCGCGCCGTCGCCGAAGTCGTCCACGATCAGCCGGGCACCGCGCGCGCGCAGGGCGTCGAGCAGGCCCAGGGTTTCCTCGCTGTGGTCGAGCAGGCTGCTCGCACTCACCTCAATGTCCAGCGCACTCGGGCGGGTAATCAGCGGCAGCAGGCGCTCGATGGCCCCTTCGCGCCGCAGCTCTTCCAGGCTGAGGTTGACGCTGGTGCGCCAGTTGCGTTGCCCGGTGGCGTCGCGCAGCTGCGTGCGGCCCTGCACCGCCTCGCGCACCACCCATTCGCCGATGGAGGTGATGAGGTCGCTGCGCTGCGCGAGGGGCAGAAAGCGGGCGGGCGAGACGTTGCCGAGTTCGGGGTGTGTCCAGCGCAGCAGCGCCTCGGCGCCCAGCACCCGGCCGTCTTTGAGGCTGACCGCCGGCTGGTAGAGCAGCGAGAACTGCTCGTGTTGCAGGCCGCGGTCCAGCACCGCCCGCAACTGGTCTTCGAGCTCGAAGGTCTGGGCTTCCTCGGCGCGCATGTCCGGCTCGAACACCGTCAGTTGCCCGCGTCCCTGGCGCCGGGCGTGTTGCAGCGCCACCTCGGCGTCGGCCAGGGCTTGCTCGGGGGCCGCGGCCAGCAGCGGCGCGGCCCCCAGCGAAAAGGTCACCGGCACCAGCCGCTTGCCGGTCCGCAGCGGTTCGCGCAGCGCCGCCTGGAGCTTCTGGCCCCCCACGGCGGCCCCGAGCTGCGGCAGGTACACGGCGAAGGTGTCGTCGGCCAGCCGCGCCGCCTGCCCGCCTTCCAGCTCCGCGAGGTCGTTGAGCCGCGCCGCCACCTGAATAAGCAGCAGGTCGCCCGCCGTGCGGCTCAGCGCCGCGTTGAGGCCCCCGAAGCCGTCAATGTCGAGGCAGGCGACCATCCCGCCCGTTTGTGCTGCCGTGGCCTGACCCAGCGCCGCGCGCAGCCCGCTGCGGTTGAGCAGGCCGGTCAGCCCGTCGTGCGAGGCGTCGTGGCGCAGTTTGGCCTGGGCGCGGCGCAGGGTGGTCACGTCGCGCAGCGAGAGCAGCAGCTCGGGCGCGGCGCTCCGGCCCGGGGAAGCGGCCGAGGAACCGGTGGACTGACCGCTCCCCGGCAGGGCCGAGAGGCTGATTTCCAGATGCCGGGTCACGCCGCCGGGCAACACGGCCAGCACCTCGCCGTTGTGCGGCAGGGTCAGCGCCGAGAGGGGGGGCACCGTCAGCGGGTTGCCCTGCGGGGTGTGCAGCTTGATGCCGAGTTGCTGGTGCAGGCGCGGCAGCGGCAGCCCGGTCAGGGCGATGTTGTGGGGGTCGAAGCCCAGCAGCCGCGCCGCCGGCTCACTGATGAGCTGCACCCGGCCCTGCGGGTCGAGCAGTATGGTGGCCGCGCCGCTCAGGTGCAGCAGCCGCGCCGCGCCGTTGTCCTCGCTCACCGCGGCGGGTCGGACACCCTCGGCGAGCAGCAGCCCCGGCACGGTGTCGCTGCGGCGCACCAGCAGCGGCAGTTGCCCGCCGCGCGCCAGTGCAATCTCGCTCGGCACCGGGCTGGGCCGCCCCGCCTCTGCGCTCTGAATGAGGTCCTGAAGCAGCCGCGCGCCCGCCGGAGCCCAGCAGGGCCACTCGTGCAGGGGCCGCTGGGGGTGAGCCTGGGCTTCGGCGAGGTCTTCGGGTTCCCCCAGCAGTTGACGCATCCCCCGGCTCGCCCCCTGCACCGCGCCGCCCGGTCCCAGCAGCGCCAGCGGCAAGTCGGTGCCGAGCAGCGCGGCGAACTGGGTCGTCTGGGTGTGCTCGGTGCTCACGTCCTGCACGGTCCACAGCACGCCCGCCGCCGCGCCGCCGAAGTAAGGCCGCGCCGACCCACGCAGCCAGCGCTCCCCGCTGTGCCCCGGCAGGGACGGCACCCGCTCGTCGCCGAGTTGCACGGCCCGCCCAGCCGCCGCGCTGTCGAGCTGCTGCGCGAGCGCCGCGTGGTCGGGAAAAACCTCGTGGACGGTGCGGCCCAGCACATCCGCCCCGGTCAGCCCGAACAGTTCCAGAAAGGTGCGGCTGACCTGTCGGAACTCCAGCTCGGCGCTCAGCCACGCCGTTGCGGCGGGAAGCTGGGTGATCAGCATGTCGGCCTCGCGGCTCGCCCCCTCGGCACTCGCCGCCGAGAGCAGCAGCGTGAGCACCTGCACCGCGCCGTCGGGCACCGGCGCCCCGTCGGACCACAGCAGTCCCAGCAGCGCTCCGTCGCGCGTGATCCAGGCGAGGTCGCCGTGTTCGAGCCAGTCGTCGGGCGGCACCAGTTGCGCTTCCTCGGTGGGCGCGCCCTCGGCCTGAATGTTCAAAATCCGTTGCCCGATGGCGGCCAGCAGGGTGGCCTGCGGAGCGTGCAGCCGGATCAGGTCACGCAGGGCAATGGGCAACGCGGCATGGGCATCGGTCGGGGCAGCGGGCATGGGGTTCAACCTCGGAAAAAGCGGGAAGCGGGAGAAACGGGCGCGTCGGGGCGCCTTGAAATGAACTGCTGTTCTGAGCTTGCCACGTCGCCTCTTACGGTCTTCATGCAAGCCAAAGAAATAGGAGGAAACTTGCTGCTGGGCGCTTAGAGCATTGGATACAAAAAGACCCCCTCACCCCTTGCTTCGCAAGGCCCTCTCCCACCAGGGTAGAGGGTCAAAAATCCAAAGCATTCTTATGTCGAACACTCTAGGGACACCCATCGATCAACAGCAAAAGGCCCACCGCCGCGTGTGCAGGGTGGGCCTTCTGAGGAGAGGAGAATTCAGGCGCTCGGTTTGTCCGACTTGTCCTCGGCGGCGCTCTCTTCTTCGGTGCTGAGCGAAGCCTCTTCGATGGAGATACTCGCCATCAAGACCACCTTGTCACCGTCCTGCTCGACCTCGACCTTGGAGTTGCCCGAGGGGAAGTACTTTTGGACGACTTCCAGCAGGTCGTTGCGCAGGGCCTCGACCTTGCCGGGCGCGATCTTGGCGCGGTCATAGGCGAGCACCAGTTCGAGGCGGTCCTTGAGCGTTTCCTTGGAGCGGCGTCCCTTGAACCAACCGAACATCAGCTCCCACCCCCGAACAGGCGGCGAATGGCGGCCCAGATGCCTTTTTCTTCCTCGGTCAGCTTGGGAAAGGGCACGTCCTGACCCTGAATGCGCTGCGCGGTCGCCATGAAGGCGTCGCCCGCCTTGGTCTTGCCGAGCACCGCCGGCTCACCGACGTTGGTCGAGACGATGATGCCCTCGTCCTCAGGCACGATTCCGATGGGCTTGACGCCCAGGATGTCCACCATGTCGTCGATGGAGAGCATGTTGCCGCTCGCCACCATCTTGGGACGCAGGCGGTTGACGACCAGCCGAATCTCGGTGATCTGCTGCGCTTCGAGCAGGCCGATGATGCGGTCGGCGTCGCGCACCGACGACACTTCGGGGTTCACGACCACCAGGGCGCCCTCGGCGGGGGCGGCGGCGGTGCGGAAGCCCGACTCGATCCCGGCGGGCGAGTCGATCAGCACGCGGTCGAAGCCCTCTTCTTCCAGCAGGCCCTTGACGACTTCCTTGAAGACCTCGGGGTCCAGGGCGTCCTTGTCGCGGGTCTGCGAGGCGGGCAGCAGGTGCAGGTTCTCCACGCGCTTGTCGCGGATCAGTGCCTGATTCATGCGGCACTTGCCTTCCAGCACGTCCACCAAATCGAACACCACGCGGGACTCTAAGCCCATCACCACGTCCAGGTTCCGCAAACCCACGTCCACGTCGATCACAACGACTTTTTCACCCAGTCGGGCCAGGGCCGCGCCGATGTTGGCGGTGGTCGTGGTCTTGCCCACGCCCCCCTTGCCCGACGTGACGACAATCACTTTGGCATCCATGCTTGGCGCAGCATAGCAAGGCAACGTGAAAATGTAAGCCTGTGGGGGTGGAGGCCGGGGGAGGGGTTGCGGCAGGTGTGCCGGGTGGGGACAGCGGCCCCGAATGCAGGCGCCGGGAAGCCGGTAGACTGCCGGGCAGCATGACCCGACTTTCCGGAAACAATCAGAACCGCAGCGTCCTGATCATCGGCACCCTGGTCGCCGCCGCGCTGATCGCCCTCGCCCTGTTCGCCGTGCGCGGCAAGAGCACGACCACGACTGGCGAGGCGCAGACCTTCACCTACGCCAACTTGCCCTACGCGGGGCAGGCGAACGCGCCGGTCAACGTACTGGTCGTCGAGGACTTCAAGTGCCCCAACTGCAAGTCCTTCGAGGAAACGGTGGCGCCCGAACTGCGGACCAAATACGTGGGGACCGGCAAGGTCAAGATGTACTCGCTGGTCTACCCCTTCCTCGCCGACCGTCTACCCGAAGACGACAGCAAGTACGCCGCCCAGGCCGCCCGCTGCGTCTACGCTCAGGGCAAGAACGACGCTTTCAATACCTACAAGGAAATCCTGTTCCGCGCCCAGGGCCCGGAAACCGAAGTCTGGGCCACCAAGTCGCGCCTGAAGGAGCTGGCGACCAGCCTCGATATCGACCAGGCCAAGTTCGCCACTTGCCTCGACAACGACGAAACCGCCGCGCAGGTCGAGACCGACAAGCAAGAAGCCCTCAAGGCGGGCGTGGGCGGCACCCCCACCGTCTTCGTCAACGGCAAGCTGGTGAATGTCCAGAGCGATTACGTCAAGGACATCTCGGCGGCCATCGACGAAGCCCTCAAGCCGTGAGCAGCGGGGCATGAACCGCGACACCCGGTTGTACCTGGCCTGGTTGGTGGCCCTCGCCGCCACCCTTGGCAGCCTGTATTTCAGTGAGATTCGGCACTTCAACCCCTGCCCGCTGTGCTGGGCGCAGCGCATCTTCATGTACCCGCTGGCCGTCATTCTGGGCATCGCGGCGTTTGTGGGCGACCACGGCGTGCGGCGCTACGTGCTGCCGCTGGCGGCGCTGGGGCTGGGCTTCGCCATCTTCCAGAACCTCGAAACCTGGGGTTTCGTTCAGTCCATCAAGGCGTGCACCGTCAACGCTGCCGCTGCCTGCAACACCCCCTGGCCGGTGTGGGGAACCAGCCAGGACACGCTGAACCGTGCGCTCACCATCCCGGTGCTGAGCATGATCGCCTTTGCGCTGATTCTGGCGCTGCTGAGCTGGCCCCGGCAGCGCGTGACCGTGCCCGAAAGCGCGGCAGTCCAGGGCTAAAGTTGCAACTCGAGCGCCCGGCCCCCTATACTTCTCCGGCCCGGTTTGCCTGCCAGCGTGCAGCGCGAGCCTGAAAAGGAGTACGAACGTTTCTGCCCGTGAAGAGCGGGACCTCGGAGACGGTCCGCTGCCCGAAGGAGACCCCCCATGCAGACCCACATCAAGATCAACCGCGGCGAGCTGCTGCGCGGCATCGAGCAGGACCACACCCGCCAGCTTCCCGACTTCCGCCCCGGCGACACCGTGCGCGTGGACACCAAGGTGCGCGAAGGCAACCGCACCCGCAGCCAGGCCTTTGAAGGCGTCGTCATCGCCATCAACGGCTCGGGCAGCCGCAAGAGCTTTACCGTGCGCAAGATTTCGTTCGGTGAAGGTGTGGAGCGCGTGTTCCCCTTCGCTAGCCCCCTGGTCAATCAGGTGACCATCGTGGAACGCGGCAAGGTCCGCCGCGCCAAGCTGTACTACCTGCGCGAACTGCGCGGCAAGGCCGCCCGCATCAAGAGCGACCGCAGCCGCGTGATGAAAGACGCCGCCCGCGCCCAGCAGGACAAGGCGAACGCCAGCGCCAGCCAGGCCGCTGCGGCTCAGGCCGACGTGACCGTCATCAGCGCCGCTCCCGAAGTGGCCCCCGAAACCCAGGGCGAATAAGTCCAGGGCTCCCAAGCCCGCCTGCCTCCGCGTGAGGTGGGCGGGTTTCTTTTGGCTGTGGGGTGGGCGTTTACAGGTAAGCCAGTTCCCAGTCCTTGGCCCCGTGCGCCAGCCCTGCTCGCAGCAGCGCGAGGCCACTCGGCGCCGGGTCGCGCCAGTGGGGGAGGCCACTCAACTCAGCCTCGAACTCGGCCAGCGGGCGGCGGGCGACGGGCTGCACGGTGTCTGTTACGGTGCCTGCGCTGACCTCATAGACCGCGCTATACACGTTGCCTTTGCGGGCGTCGAGGCTCACCGCCTGCGGGCCGTCGCCGCGCACCAGACTTTCCAGGGTGGACACGCCGCGCACTTCGGCGCCCCACACGCGGGCGAGCCCCAGCGCGTAACTCGCGCCGACGCGCACGCCGGTATAGGAGCCGGGGCCGGTGCCGATAACGATGGTCTGGGCCTGAAACGGCAGCCCCGCCGCCCCGAACAGGGCACGGGTCTCGTCGGCCAGCCGCTCGGCGCTCGCCCGCCCGACCTCCTGCACCCGCTCCAGCTCGCCGCCGGGCCAGCTCACGGCCAGCGTGAGGAAGGGGGTGGCGGTGTCCAGGGCGAGGGTCACAGAAAGAGGGGTGACAGGGGCGGCGGAGACGGTCATCGGGCGTATTAAAGCAGAGGCCGGGGCAGGCGCCCGTGTGCCCTGCCCGCTCTCATCTCTTTGTCACCCCGGCAGGGACGTTTGACCGCCCCTTTTTCGCCCCCTGGAAGAAGGGCAGGTGTTATCCTCTGCTCACTATGTCGAACATTGCCAAAGGGCTTGAAGGTGTTCTCTTTACCGAGAGCAAGCTGACGTTTATCAACGGCTCGGAGGGCATTCTGACCCACCTGGGCATTCCGATTCAGGAATGGGCCGAAAAGAGCACCTTCGAGGAACTCAGCCTCGCGCTGCTCGACGCCAAACTGCCCACCGCCGAGGAACTCGCCAAGTTCGACGCCGAACTCAAGGCCAACCGCGCCATCCCCGATCAGCTCGTGGGCATCATCCGCGACATGCCCAAGGGCGTACACCCCATGCAGGCGCTGCGCACGGCGGTCAGCTACCTGGGCCTGCTCGACCCTCAGGCCGAGGACATCACCCCTGAGGCCCGGCGCGCCATCAGCACCCGCATGATCGCGCAGTTCTCGACCATCATCGCGGCGATCAACCGCGCCCAAGAAGGCCAGGACATCGTGGCCCCCCGCGCCGACCTGACCCACGCGGGCAACTTCCTGTACATGCTGACCGGCAACGAGCCCACCCCCGAGCAGGCCCGTCTGTTCGACATCGCGCTCGTGCTGCACGCCGACCACGGCATGAACGCCTCGACCTTCACGGCGATTGCGACCAGCTCGACCCTCAGCGACATGTACTCCTGCATGGTGAGCGCCATCGGCGCGCTCAAAGGCCCGCTGCACGGCGGCGCCAACGAAGCCGTGATGACCATGCTCGACGAAATCGGCACCGTGGACAAGGCCGAGGCGTACATCACGGGGAAGCTCGACAACAAAGAGAAGATCATGGGCGTGGGCCACCGCGTCTACAAGTACTTCGACCCCCGCTCGCGCGTGCTGCGCGACTACGCCGAGCACGTCGCCAACAAAGAAGGCAAGAGCAACTACTACCAGATCCTCGAAGCCATCGAGAAGATCATCGTGGACCGCATGGGCGCCAAGGGCATCTACCCCAACGTGGACTTCTACTCCGGCACCGTGTACAGCGACCTGGGCATCAAGAAGGAATACTTCACCCCCATCTTTGCCCTGGCCCGCATCAGCGGCTGGTGCGCCAGCGTCATCGAGTACAGCCAGGACAACCGCCTGCTGCGCCCCGACGCCGAGTACACCGGCGCCCGCGACCAGCACTACGTGGACATCAAAGACCGCCAGTAAGCGGAGAAGGTAAGAAAGGGGGCCGGAGCATGGCGCTGCGGCCCTTTTTCTTTGCTGCCTGTTTCGCTTTACAGCAGCCTGCCACCATCATGCGCTTCGGGCGTCTGCACCGGCACCCCCCGCGCATTCAGGGCCTCGCGCAGCATGGGGATGTTTTTGAGGGCATGCCCGCCAGTCGTGTTCTGGAAAAACACGTACAGCTCGGAGAGGTCACCGGCGACTGCCGCAATTTTCTCGGCCCATTCGTCCATCTCGGCGCGGTTGTAGAGGTAATCGTGGCGCTCGCCCGCGCTCTGGCCTTCCCACCAGGTTTCGCGGTTGCGCCCGTGCAGGCGCAGGTAGCCCACGTCGCCGGTTACGTGGACCTGGGGTTCGGGCATCCCACCCACTGGCGGGTAATCCGGGCTGACCCAGATGATGCCGAATTCGCCCATGCCCTCGCGGACTTCGGGTTTGTCCCAGCTCGCGTGCCGCAGTTCCACCGCGAGTTCGTGCCCAGCAAAGCGCTCGGTGAGCAGCGCCAGATACTTGCGGTTGGCGGCGGTGCGGTGAAACGAGTAGGGAAACTGCGCCAGATACGGCCCCATCATGCCGGCCTCGCGCAGTGGCTCGGGGCTCTGGAGCATTCGGTCAAAGTCGGTGTCGGTGGGAGCGCGTACGTGGGTAAAGACCTTGTTCAACTTGACGGCGAAGCGCACCCACCCGCCCGATTTGCGGACCATGCCTTCAAAGGCCTTGAGGCCGGGAATCGCGTAGAACGATGAATTGAGCTCCACCGCGTCGAAGTGGTGGGCGTAGGTCTCCAGAAAGGCGTCTTTCTTGACGCCCTCGTAAATCAGGCCCGGAGCCGTCCAGTCCTCGTCGGTGTAGCCGCCGCAGCCGATGTAGACGCGCATGGGGGAGAGGGTAGCGGGTGGGCGTGGGAAACATATTTACAGTCCGCAGAGAATCCCAGCAGAGACTTGGAACTCCAGTAGCTCTCAGGGCAAGCAGGAGCAGGCGCTGCCTTGCATTGGCGTCCTGCGCCCTGAACGTCTGACCCCGTTCCGCCTACAATCTCCCCCGTGACCCGCTCCGTCAGCCACCCGCCCTACGCCGACCCCTTCGCCCACCCGCCCGAGACTTCGCCACAGGAACTGACCGCCCAGCTGACTCCCGGCGCCCGTTTTCAGGACGTGCGCTTTGAGAACTACCGGCCCAACCCGGAGTTTCCCAGTCAGGCGCAGGCGCGCGACCAGCTTGCGGAATTCGTGCGGCAGGCGGGGCAAAAGGCGAGCGGCGGGGGCTTCTGGCCCTTCAAGCGGAGGGCCCCGGAAGGGCGCGGATTTTACCTCGACGGCGGCTTCGGCGTGGGCAAAACGCACCTGCTGGCGAGCGCTTACCACGCGGCCAAAGGGCAGTCTGGGGGCAGCGGGGGCGACGTGGCCTTCATGTCGTTTCAGGACCTGATGTACCTGATCGGGGCGCTGGGGATGCCGCAGGCCATCGAGACTTTCCGGAACTACAAGCTGCTGCTGATCGACGAATTTGAACTCGACGACCCCGGCAACACCCACATGGCGAACACCTTTCTGGGCGGGCTGATGCCGAGCGGCGTGAGCGTGGTGGCGACCTCCAACACCGAGCCGGGGGCGCTAGGCGCAGGGCGCTTCAATGCCGAGGATTTTCAGCGGCAGATTCAGGGGATCGCCGACCGCTTCGGGCCGCACCGGATAGACGGCCCCGACTACCGCCAGCGCGGCACCGAGCCCGCCCAGCCGCTCAGCGAGGCCGAGTTCGCGGCGTGGCTCGCGCGCCAGAACCCGCAGACGACGGCGCTGATGACGCACGCCGAACTCAACCGCGCTCTGCTGAGCGTGCACCCGAGCCGCTTTGCCCGCGTGCTGGAGCAGGTGAGCGGCGTAGCGGTGACGAACCTGGAACCGATGCCCGAGCAGGGAGAAGCGCTGCGCTTCGTGCATTTCGTGGACAAGGTGTACGACCTCGGGCTGAGCGCCGCGTTTACCGGGGTGCCGCTCAATGCCCTGTTTGACGACATCTACCGCAACGGCGGCTACGCCAAGAAGTACAGCCGGGCGCTGAGCCGCCTCTCGGAAATGCTGCGGGAGGCGCGGGGGTAAGGGACTTTTCCCTGTCCTCCAGCTCCCATGTTTCGCCGTGACCCCCTCATGGCCCAGTGGCGCGCCGTGGCCCGCTTGCTTGAACTGGACGACGCGCAGGCCCGCGCCCTGCTGGGGCTGGTGCGGCAAACCTTGCGGCGCGGCGGGGTGCTCGGCGTAGCGCGGGGAGACGAGCGGCAGCTGCTGACCTTCGGCGGCGTACCGGAAAATGGCGTCTTTGAGCTGGCGAGCGTGACCAAGCCCTTCACGGCGGCCCTCGCCTCGGCGCTGGTGCGCGATGGGCGGCTGGACTGGAACGCGCCCCTCGCCGCGCTCGGCGGGCCGCTGCGCCGCTTGCCCCGCGCGCTGACGCCTTATGCGCTGGCGACCCACACGGCGGGGCTGCCTCCGCAACCGGCGCGGGCTGCGCTGACCACCTTCACCCGGTTTGCCGACCCCTACGGCGGCATGTCACCGGCGGACGTGCTGGCGAGTGCCCGCCGCTGGGCCAATCCGGGGCAGGCGGGGCGCTTCGGATACTCCAACCTCGGCGCGGGGGTGCTGGCGCTGGGGCTGGCGCACGCGGCGGGGGAGGAGACGTCGGCAGCGGGCTATAAGCGGGCGCTGCGTCGCCTGGTCACCTTTCCACTCGGACTGCCGGGCGTGGGTCTGACCCCGGCGCGGGATGTCGTGCCGCCTTACGGTCTGCTCGGCGGGCAGGCGGTGACGGGCTTTGCCGAACTCGCGGGCGCGGGGGGCCTGTTCGGGAGCGCGGCGGAGTTGCTGCACTTCGGCGAGGCGCACCTGAGCGGGGCAGCAGGGCAGCACTGGCGGCAGGCGCAGGCTTTTCCCGGCTTGCCGCCGCTCTACGCCGGGGCCGCGCCGGGGTGGTTTCAATCGGGGAGCACCGTCTGGCACGACGGCATTGCCCGTGGCACCCGCACGGCGCTCGGGTTCTCGCCGCGATCCGGCGCGGTGGTCACGCTGCTGGTGCGCGGAGCGGTGCCGCTGGTGGGGGTGCGGGCGGGGGTACCGCTTTTGCTGCTCGGGTTGCTGGGCGGGACAGACCGTTAGAGCATTTGACAGAATGATCAGATCTGTCTTTGACCCTCTACCAAGGGGAGAATTGTCAAGCTGCGAAGTAGAGGGCCTTGCGGAGCAAGGGGTGAGGGGTCTTCACGTCCAGTCCTGCGCGAACTGCTCCCAACTCGCCGTGTCGCGGGGGCACCCGCCCGTCTCCGTCCATTCCCAGTAGGGGGTGTAGAGGCTGCGGGCGGTGACCAGCTCATCGCGGAAAATCCGGGCGCTGAGGCCGGTGTCCAGCAAACCGCTCGTTTCAAAGCGCTGAAGCACGCCGCTGCGGTCGTAGGGCACCCGGCGCAGCTCGGTCTGCCAGCCCTGCGGCGTGGCGGTGAGCAGCAGGTACTGCGCGCGGGGGTCGCCGTCCGCCGGAGAACCGACGGCCCCGGTGTTCAGCACCCTCACGCCGCCCACCAGCGCGTCGGCCTGGCGGTGGATGTGCGAGCCCACCAGCACGCCCGCGCCCGACTCGTCGGCAATGCGGCGAACCAGCGCGGGGTCGGTGCGTTCGCTCAGGCCCCGGCGGTAGTCGTCGGGGGTGCCGTGCGCGAGCAGCACGTCGGGCAGGCCGGGCACGCTCAGCCGCGCGGTCAGCGGCCAGTCTCCCGGTACGTCGAGCAGCCCTGCCGCGTGCAGTTGCTCGGTGCTCCAGGCCGTCGCGCCCCAGAAGGGGTCGTCGTACCAGTCCTGCGGCAATTTATCGCTGCGCGTGTGCCACAGCCGCAGCAGGTCGTCGTGGTTACCGAGGACAAACGACGTGTCGTCGCGGGCGAGCAGTTCTTCCATCACCTGCACCGAATCCGGCCCCCGGTTCACCACGTCGCCGTTGACGATCAGCCGCTCGGCGCCCTGCTCCGCCGCGTCTTTCAGCACGGCCCGCAGGGCGTCGGCGTTTCCGTGGATGTCGGCCAGCACAGCAATTCTCACGGGGGCATTGTAGGCGCCGCTTCCGAAGAAAATACTGAAGAAAACCCCATACCCGTTCAGGCCCCGTTTGTGGTTACAGTAGGGCCATGAAGCCACTGAAGATGCTGACTGTTGTTCTGCTGGGTGCTGGAACGTTCGCTTCGGCGGCTTCGGTGACTTCGGCGGTCAAGACCGAGATCAACCAGCTCGCCCGCCAATACACGGGCCGCCCGTGCCTGAGCAATGGCGAATACGGCCTGGACACCGAGCCTTTTGGCCGCAAGGTGTCGCCGAGCGAGCTGAGGACGGCGCGGGATATCCTGTACCGTCAGTTGCTGGAAGGTTTCGACCAGAGTGCACGCAAAAACCACTTCACCTACGAGGTGCGCTACAACCAGAACCGCGTCTGGGTCTGGGCCAAAGCCCCCAACTCCTCCCGGCAATATTTCACCTTCGCGACCCTCAGCCCGACCGGCATTCGAATGGTGAGCTGCCTACAGTAAGCGGACAAGAAGAAACAGGCCCCTTCCCGCTGTGGAGGGGGCCTTTTGCTGTTTAAGGTTGCCGAATCCAGTACCGCCGAATGGGCTGGTCATGATACTGGGGCACCTCAAATTCCCCTTCCAGTTCGCCGCCGTTCGCCTCAATGACGCCGCGTGAGCCGAAGTTGTCCACATCGCAGGTGACCAGCACCGGGTCTATCCCCAGTTCGCGCGCCCGCTCCAGCGCCAGCCGCAGAATCAGGGTGCCGTAGCCTTTGCGCCGCGCGCTGGGGCGGATTTCGTAGCCGATGTGGCCGCCGAACTCACGCAGGCGGTCGTTGAGGCTGTAACGGATAGAGACGCGTCCCAGGTACTCGCTCCCCTCCACCAGCCAGCGGTATTCGGAGTGGACAAAGCCCTCCGGCAACTCGTTGCCCGGTTCGTAACGGGTCAGGTCGCGCAGGACTTTGCCGAAGTCGGCGCTGATGTCGTCGAGCTTCCAGACCAGCGTCTCGCCCAGGCCACTGCCGGTTTCCTGTGCCTCGCGCACGGCGGCGAGGAAGCTGTCTTTGTACTGTTCGGAGGGGCGTACGAGGTGTGGCATGGGTCAGGGTAAAGGTTGGCGGTGGCCTGGGCATCGGCCACCCGGCCTACCCTCACCCACCAACCGGGTTGGTCAATCTGTCCCGAAATGGCGTCCGCAGCCCCATAAAGCGGCGGGTGGGGGGCGTACAATCCGGGATGTGAAAGAAGCGGCCTTCGACGTGCTTGACCTCGGCCTGCTGCCCTACCCGCAGGCGTGGGCGCGGCAGAAGCAGGAACTGGCCCGCGTGGCGGTGGGCGGGCGGCCCACGCTGCTGCTGGTGGAGCACCCCGCCGTGCTGACGCTGGGGCGCAAGGCGCAGGAGGGCGAGAACATCGTGGTCACGCGCGAGTATCTGGCTGCGCAGGGCATCGACGTGTTCGCGGTGGAGCGCGGCGGCGACGTGACCTACCACGGCCCCGGCCAGCTCGTCGCCTACGCCATTTTTCCAGTCGGGCGCCGGGTGCGCGACTTCCTGCGGCTGCTGGAAAACGCAGTGGTGACGGCGCTGGGCACCCTCGGCCTGCCCGATGCCCGGCCCAATCCCGGCTACGCGGGCGTGTACGTGGACCCGCGCGAGATCAACGGCAAGACCTACGACCAGAAAATCTGCTCCATCGGCGTGGCCATCAAGCAGAATGTCGCCCTGCACGGCATCGGCCTGAACGTCTGCACCAATCTGGACCACTTCGACCTGATCGTGCCCTGCGGCCTGACGGATACCCAGATGACCAGCGTGCAGCGTGAGTACGACCTGCGCGGTCTGGGCAGCGTCAGCATGGAGCAGGCGAAAAAGGCGCTGACCGACGCTTTCGCCTTGACCTTTGCCGACTATGATTGGAGCCTTCCGGGGGTGGCCGCCGGGCAGGAGGCGCTCTCTGTGGCCTCTCCCTAGCCTCCTTGACCAGCCGTCACCCTCCAGCGACCCTTTCCAACCGAGGAAACCGAAATGACCCAGCAAGACCCGTCCACCAAGGAACCTAAATTCATCAAAAACGGCATCTACCGCAAGGACAGCGTGCCGGTGCGCGAGAAGAAACCTGAGTGGCTGAAAGTCACCATCCCCACCGGGCAGGTGTTTACCGAAGTCCGCAAAATCGTGAAGGAGCACCGCCTGCATACGGTGTGCGAGGAAGCGATGTGCCCCAACATCGGCGAGTGCTGGTCGCGTGGGACGGCGACGTTCATGCTGATGGGCCACATCTGCACCCGCGCCTGCCGCTTTTGCGCGGTGGACACTGGCAACCCGATGGGCAAACTGGACCTCGACGAGCCGCGCAGCGTGGCCGACTCGGTGCGGCTGATGGACCTGAAGTACGTGGTGCTGACCTCGGTGGACCGCGATGACCTGCCCGACGGCGGCGCCTACCACTTCGCCAAGACGGTGAAGGCCATCAAAGAGGTCAATCCGCAGACCCGCGTGGAAGCGCTGACGCCCGACTTCGGCGGCAACACGGCCTGCGTGGACCTCGTGCTGGACAGCGGCGTGGATACCTACGCCCAGAACCTCGAAACCGTGCGGCGCCTGACCCACCCGGTGCGCGACATTCGCGCGAGCTACGACCGCACCCTGAGCGTGCTGGCCCACGCCAAGCAGGCCCGCCCCGACGTGATTACCAAGACGAGCCTCATGCTGGGCCTGGGCGAAACCCGCGAGGAAATCCGCGAGGCGATGGCTGATTGCCGCGCCGCCGGGGTGGACGTGCTCACCTTCGGGCAGTACCTGCGCCCCACCATGCACCATCTACCGGTGGAACGCTACATTTCGCCCGCCGAGTTCGACGAAATCCGCGAAGAAGGCATGCAACTTGGTTTCCTGGAAGTCGTCTCCGGTCCGCTGGTGCGCTCGTCGTACAAGGCTGAGCAGATCGTGATGGACCGGCCCGGCAACCTGCCCGAGCACCTGAGCCACTTAGACGGCGGCAGCGAATTGACCTTGATTTGAGATTGAGTGGGAAAGGGGCCGGGGCCATGCGCTCTGGCCTTTATTCTTTGGGTCGGTACTTGGCTTCGAGGCTCCACCAAAAGTCGGACGCGCCGCTGCCTACCGTCCGTTACCCTGACCTCAATGTCAAAATTCCGGCGAATACTGCTGCCCCTGGCGCTGCTGGCTGCCCTGGCTCCAGCACAGGCGGCCCCACCTACGAGCTATCAGGGGCCGGTGTACGGCGCTGGCGTGCCGAACGTCAAGGTGGTTCGCCCTCTCTGGACGTTGACGGTAGATAAGGCCGAGTACGGCGACAGGGCCGTGCTGCTCGCTGAAAACCGCGTGCTGGTGAAGGTGGGCGGGGCGTTGCAAGCCCGCGACGTGGCGACGGGCCGCGTGCGGTGGACGCTGCGGCAGCCGGGAAACCTGGGACTGGCCGACAAAAACGCTGTCTTTCTCACGTCCGGCCCCACCCTGAGCGCTTACCGGCTCAGCGACGGGCGGCGCCTGTGGACCCGCGACCTGGGCGGCGCGGTGCGCGATGTGGGCGAGTCGGGCGGCGTGCTGTACGCGACGACGGAACACGGCGGCATAGCCCTGAGCGCCGCGACGGGTCAGACACGCTGGGCCTTTAAAGAGCACGAAATGACGGGCTTCCGGACGGTTCTGGGCGACACAGGGTCAGGGGGCGTGGTGTTCTGGGACGCTTACCAGGGCGAGCCGCATTTTCCGGCCACCTACGCCTTTGACGCGGCGACGGGCAAACAGCTCTACCGCCTCGGCGGCACGACGGGGCCGCTGGGCGTGCGCGGCAAGGCGGTCCTGATGGCCGACACGAGCTTTATAGGAAGCGATGACAATGCCACCCTGACCTGGGTAAACCTGCGTTCCGGCGTCACTGAACAGGTCTTGAAGCTGGCCGCCGATTTCCGCTGCCCTGGCAGAGGCACCCTGGAGCGCCGCACGTCCGAAACCTTCTCTGCCCCGCCGCACATTTATGTCAATGACCAGTGCGGCACGCGGCTGCGGCAGTTCTGGGCGAATGACCCGGCGCTGGCTGGAAAAACGCCGTCCACGCCGCTGCCACCCGCCCGCACCTTCGCCGTGCCGGACGACGGTCGCTTTCGCCTGGGGCCAGTGGGGGAACTGTTGGTGTTTGAAAGCCGCATGGGAGAAGTGCGCTTGATTCCAACCACGGGCCGCGCGCCTATCAACTACAACGGGGTGGATATGCCGACCGGGACGGGCCTTGTCCTGCCGGGAGCGGGGCCGGTGTCGCGGTTGGACGCCCTGGGGAGCGTGCTGTATGTCGGGCGAGTGAATGGTGAGTTTCTCGCCTACGACGCCGCGAAGAAAAAGCCCCTCTACGCGGCGCAGCTTCCCTGGCGCGGCTTCGGGCCGACCTTCCGCAGTGGCAAGTACGCGGTGCTGACCACTCCCGGCGCACTGGCTGTGGTGCGCGAGCCCTGAAAACGCTGGTGGTCGGCGGCACCGGCATGCTGCTGGGACTGGTGCGCGAGCTCCTCGCGGCGGACGACGAGGTGTGGACGCTGGCCCGCCACGCCCCGGCGCTTACGCACCCGCGCCTGCATCCCCTGCTGGCGGACTACCGCGACGCAGCGGCCTTGCGCGCGGCCCTGGCCTCCGCCACGCCGTTTGACCGGGCGGTGGTCTGGATTCACTCTGCGGCGCCGGACGCGCCCTTCGTGGTGGCCGAAGCGGTGCGCGGGCCGTTTTTTCACGTCCTGGGCAGCGCGGTGGCCGACCCGTCCAGACCCGACGACGGGCGGCGGGCCCGTTTCGCCGCGCTGGGCACCGATGCGCGTGACGTAGTGCTGGGCTTCGTGCGGGAAGGCGAACACTCACGCTGGCTGACGAACGCGGAAATCTCGGCGGGGGTCTGGGAGGCTGTGCAGGGTAATGTTCAGCGGGCAGTGGTGGGAACGGTGACGCCGTGGGCCGCGCGGCCCGGCTGACCGGCGTGTTCCTGCTAGCCTCGCCTCATGCTTGACCCCCGTTCTGTTCCTTTTGCCGCTCAGATTCACCCCCAGGCCCGCCCGGCGCGGCGGCTGACCTGGGACTCGCGCGAGGCGGGGCCGGACACGGCGTTCGTCGCGCTGCCCGGCGAGAAGATGCACGGCAACAGGTTCGTCGAGCGGGCGCTCGCGGCGGGGGCGCCCTTCGTCCTGACCGACCTCGACGTGCCCCGCGCCGTGCGGGTAGATGATGCGCGGGGGGCCCTGTTCGCCTGGGCGCGGAGTGAACGGGCGAAAAACCCGCTGGTGGTGGGCATCACCGGCAGCGCGGGCAAGACGACCGCCAAGAGTTACGCGGCGGCGGCCCTGGACGCCCACTTCATGCCGGTGTTCAACACCATGCCCGCCATCGCCTGTTTTCTGGTGCAGTACGGCGCGAGCCAAAAGCCGCTGGTGGTGGAGATGGGCATCGACCACATCGGCGAAATGGCCGAGCTGATGGACCTCGTGCGGCCCGATGTGGGCGTCGTGACGACCATAGGCCCGGCGCATCTGGAGCAGTTCGGCATGGTGGAGACCATCGCCCACGAAAAGGGTCAGATTCTGACGGCGCGGCGGGCATTGGTCGGCTCGCAGGCCGCCGCCTTTTTCCCCAAGGCCCAGTTTCCCCACGTGGACAGCTACGGCTTCGGCGACGTGACGTTCCGGGGCGAGGGGCTGGAACTCTCGCCGCAGGCCGCTCGCTTCCGCTTCGGCGGCATGGGCGTGATTTTGCCGCTCGCCTCGCGGGTGCAGGCCGAGGCCGCCGTGCTGGGGATGGTCCTCGCCCGCGAAGCTGGCATCGCGCTGGCCGACGCCGCCGTGCGGATGAGCGCGGTGGAAGTGCCCGGCGGGCGCTACCGACTGCATCCGGGCCGGTTTACCGTCATTGACGACGCCTACAACGCGTCCCCGGTGGCGGTGCGCGCCGCGCTCGACGCCCTGCACGCGCTGAAGCCGGAGGGCGAGGTAGGCCGCCGCATCAGCGTGCTGGGCCGGATGCTGGAACTCGGCCCCACTGAGCGTGAGCTGCACGCCGAAGTCGGCAGCTATGCCCGCGAGCAGGCCGACCTGACCTACGGGGTGGGCGAGTTCGCGCAGGAGCTGGGCGAGCGCGCCTTCGCCACCGTGCCCGAGCTGCTCGCCGACCTGCTGAATGAAGTCCGTGACGGCGACATCGTGCTCGTCAAGGCGAGCCGGGGCATTTCCCTGACCCCGGAGCAGCGCGCGGTGACGGGCGTGGGGCTGGACGTGGTGGTGGAGGCCCTGCTCGACCAGCGCGACCGCTGACCTGCCGCTCACCCGGACCCGGCACAATCGGACCATGCCCCTCCCGCCCCGCTGGACCACACCGCGCCGCTTGCTTCGCCTTCCCGTGCTGGCCCTGCTGGGGGCCTCGCTGCTCGCCTGTGCCCCCGCGCCGTCTGCCGGGCGGGGAAGTGAGGCGGGAAGCACTGCCTCCGCCACTGGCGGGGTCCGCGCGGCCTTCAGCGACGATGGGGTGGCGTGGGTCAGTGGGGGCCGGGCGTGCGTGGCCCGCGTGCCGAGCTTTCAGCCGAGTTGCCCGCGCCTCGCCCCCGCCTCCGATGTGGGCTGGCAGCAGAGCGGTGGGCAGGGAGCCGACGCCTGGGCCGCGCTGCCGGGGCCGGGGCTGGTGGTCACGCTCGACCGGGCGCCGCGCAGCCTGAATGTGGGCGCGGTGGTCCTGCTGAGTAGCACCCGCATCTACCGCGAGGACGGCAGCGCCCTGACCTACAGCGGCGAAGCGGGTCGTGGCGTGGCCGGAGCGCCGCTGGCCGCCGTGACCGGGGGCGACGGGCGCGATTACGTGGTCCTGGGCCGCGAACTGCGCCGGGTGGACGACGGGGCAGTGCTTGACCGGGCGGCGCAGCCTTTCCTGTATGCCACCCCCAGCGGGGCCGCCAGTGCCGCCCTGCCCACCGCCAGTGACGGCCTGAGCCTTTACCGGCTGACCGGCAGCACCCTGGAACGCTTCAGTGCCGGGCAGGTGGTCGCTCGCGTGTCGCATGGCCCAGGGCAGGTGGGGCTGGTGCGCGGCGAGGTGGTTACGGTGGACGCGGCGGGCCGGGTGCGGCGATTCACTCTGCAACTCGAGCCGCTGAACTGAGCCTGGGACGCCGGGTTTCGGTGCCCTGTCCGTAGCTTGCGAGGGGCCAACGAGCGGCTCACCGACGCGCTCCGGCAGGGCCTGGGGGTGTCAGAGTTCCGTAAGCGGGGGGATGGTTCCATGGGCAGCGGTATGTCCGCCCTTGCCCTCACTCGCCGGCTTATTCCCGCAGCAGCTTCTGCCCGTTTCCGGAGCGTGGCCCAGGCCCCCGCATCGTTGCCGGGGGGGCCGGCCCGGGTAGATGAGTGAATGCTAAAATTCGCCGGACTCTGGAGGGAGCATGACGAGCCTCATTAACCGTTTTCGCAGTCGTTCCGCCGCCATCGGCGTGGAGATCGGCACCAGCACCATCAAGGTGGTGGCCCTTAAGGCAGGGGCCCCGCCTTCCCTGCAACACGCGGTGATGGTCCCCACCCCCATCGGCAGCATGCGTGACGGTCTGGTCGTCGAACCCCAGGCAGTGGCGAGCGAACTCAAAAGCCTGCTGGCCGAGCACCGCATCACCACCCGCCACGCCGTCACCGCCGTGCCCAATCAGGTGGCCGTGACCCGCAACATCATGGTGCCGCGCATGGACCGCAAGGACCTGCAAAGCGCCATTCGCTGGGAAGCCGAGCGCTACATCCCTTACCCCATCGACGAGGTCACGCTCGACTTCGACCTGCTCGACGACCCCGCCAACGTGCCCGAGGACGGGCAGATGGAAGTGGTCATCGCGGCGGCGCCCACCGAGGCCGTGCACCGTCAGATCGAGGTGCTGCGCCTCGCGGGCCTCGAACCCACCGTGGTGGACCTCAAGAGCTTCGCGGCGCTGCGGGCGCTACGCGGCAACCTGCTCGGCGAGCACCTCACCAAGAGCACCCTAACCGGCACCAACTACACCGAGGCCGGCGAGGTGGCGCTGGTGATGGAAATCGGCGCGAGCAGCAGCGTCATCAATCTGGTGCGCGGCGACCGCATCCTGATGACCCGCAACATCAACGTGTCCGCCGACGATTTCACCACTGCGCTGCAAAAGGCCTTCGACCTCGACTTTGCCGCCGCCGAGGATGTCAAGCTCGGTTACGCCACCGCCACCACCCCCACCGAGGACGAGGAAGACCTGCTCAACTTCGACCTCAGCCGCGAGCAGTACAGCCCGGCGCGCGTGTTCGAAGTGGTGCGCCCGGTGCTCGGCGACCTGATCACCGAAATCCGCCGCTCGCTGGAGTTCTACCGCGTGCAGAGCGGCGACGTGGTCATCGACCGGACCTTCCTCGCCGGGGGCGGCGCCAAGCTGCGCGGCCTGGCGGCGGCCATCGGGGACGCGCTGGGCTTCGGGGTCGAAGTCGCGTCGCCCTGGCTGACGGTGCAGACCGACCAGGCGGGCGTGGACACCGGCTACCTCCAGACCAACGCCCCCGAATTCACCGTGCCGCTGGGACTGGCGCTGCGGGGGGTGATGGGCCGTGGTTGAAATCAACCTCTTGCCCAAGGAGTATCGCCGGCAATCGCAGCCCAGCGTGTGGAAATACGCGTCCTGGGCGGCGGCTGGCCTCACGGCGGCGGTGCTCGGCGGCTGGTTCCTCTCGGTGTCGGGCGACACCAATCAGTTGCGCGAACGCTCGGCGGCGCTGCAACAGCAGATCGACGCGGTGGCCCCCCAGAAGTCCCGGTTCAACGACCTCACCGCCCAGCAGGGCGAACTCGAGCGCGTCACCCAGGTGGCGACGCAACTGCGCGACCAGAAGACCTACTGGTCCAACGACCTCGCGTCCTTCGTCGAGCGCGTGCCGGGCAACGTGGTGTTCAGCAACGTCAACATGTCCACCGTCGCGCCCGGTGCGGAGCCCAGTCTCGCCTACGCAGGCAAGCCCGTAACCCGGCAGCTTGACCTCACCGGCAGCGCCCGCAGTCAGGAAGCGATTGTCGGCTTCCTCAACGCCTTTGAGACCGACAGCAATTTCGGCGTCGATTTCAAGGGGATGCAGCATGACGCTACCAACGGCATCTACACCTTTACCGCCAGCATCGGGGTTGTGGGCGATCAGCCGAGCGCCGCGCCGGGGGCTGACCCGACAGCGCAGGGCGCGCCTGCCACTGGAACGGCACCGGCTGCCCCAGCGGCTCCTGCGCCGACGGCCACCACCCCCGCTGCCGCGCCGGCCCAGGGAGGTGCCCAGTGAAAGCCATCAAGCTCGCCCCCCACTACATCTTCGCGCTGGTTCTGACCCTGTGCTTGGTGCTGGGCTACCTCTTTTACACCATGGCGATTCAGCCCCGGCAACTCGAAATCACTTCCCTCAACGACGAGATCACCAACAAGGAAACCACGCTCGCCGCCGATCAGGCCAAAGCGGCGCGGGTGCCGACCTTGACGGCTGAAGTGGCCCGCCTCGAAGTCGAGCGCGAGAAGTTCCTGCGGGCGCTGCCGCCCACCGCCAACTTCGGTCAGGTGGTCGCCAACCTGCGCCAGACCGTGAGCGCCGCCGGGGGAGACCTCAAAACCCTCAATTTCGCTGGGAGCGGCGCGGCGGGGGCCAACCTGCCGGCGGGCGTGCGCCCCATCGGCATGACCATGTCGGTCAATGGCCGCTTTCCGCAACTGTTCCAGATTCTGCGCAGCCTGGAACTGCAAAACCGCTTTACCACCGTGGACAACGTGAGCCTGCAAACCCAGGGCGACGCCGGCACGGGCGGGGGCGGGACGCTGGGCAGCACCCTCGGCCTGACGGTCTATACCTTCGATGCGTCGGGCGCGACGAGCACCCCTGACGCGGCGGCTCCGGCAGCGGGCACGCCGGCCCCCGCTGCACCGGCCGCTGGAGGCACGCAACCATGAGCCGTTTTCCCACTGAAGACCAAACCCAGACCCAGACCATGACCGAGGTCACCAACGTCTCGCAGGAGAAATCACCGCTCAACCTCAACTTCTCGCGCGAAGTCAAGCTGGTGCTGGTGGTGCTCGGCATGGTCGCCCTGATCGGCGGCTGGTTCGTCCTGACCGGCCAGCCCGACGACCAGACCGTCACCGCGCCGGTCCCCGTGACTACCCCAGCCCCGGAGACGGCCACCACACCTGAAACCGCGCCGGCAGGAAATGCGGCCCCGGACGGTCAGACGGCGGGCACGGCCACAGAGACGACGACCACCCCAGGCACTACAGATGAGTCGTCCGGCAAGAAGGCGACAGGAACAGCCGCCAAGTCGCAGAAAGGCACGCAGTCTGGCGACGGCTCCGACGAAAGCCTGGTGGCGACGGTGCCGCCGTTGGAGGGTGAAGGCACGGCGACCCCACCGGTGCCCGTGCCCGGCGGCATCAACCCGGACCGCCCGCTCAAGACCCTCAGCGGCGCGGACCCCTTCGGCTCTCTGACGCCCAGTCCGGGAACCGGCACGGCTGGCGGGACGGCTCCAGCCACCCCGGTCGAAGCCCAAACTCCGGAAACTCAAAGCCCGGAGACGGCGCCCGAAACGCAGGCACCCACCGCGCAGAGCACTACCAATAGCCCAGCCCCGGTGGTCCTCAGCAGCCCGGCAGGCAGCAGTGTCAGCGGGCAGGGCGGCGTGCTCGATTACTCCTCGCGCCTCGATCAAGGGGCCCTGCCGACCCCTGTGATCCCAGTATCTCCGACGGAAGTCCGCAGCCCGGTGACCAGCGCTCCGGTCAGTGGCAGCTCAGTCAACGGCGACAGTGTCGGCAGCGGCATGGCGACAGCGCCCACGGTTGTGGTGAGCACTCCGGCTCAGCCCACCACCACTCCCTCGGCTTCAAGCCCAGTGGCTCGGAATACAGCGGGCCAGAGCGGAGCAGGACAGAACACGGGCGCCTCCCGCCCGGCCAGTGGCACCCCGTCGCGGGCCAGTGCTCCCAACGTGGTCGTCGTGCGTCCTCCGGCTCGCGTGTCCGGTACTTCGGCGGCGGGCGCGACGGGCAGCTCACGCACCGGCGCGGCGGGCACGCCCGCAGCAACGCCGACGCGCAACCAGTCTGCGTCCGGCACCCGCCCCGCGTCCGGGCAGACGCCGCCCCGAGTCATTGCGGTGGCCCCGGCGGGGGGCGCCCGAACTCCGGCAGCCCCCGGGAACAGACCGGCGACCGCCGCACCGACCCGGCCCAGCCCTTCCAGCACCGCTCCCAGCAAGACCGTCGTGGTGCGGCCCCCGGTCCCGGTCAGCGGGCAGCAACCGCCCCGCGTCGTGACCAGCCCCGCTGCGGGCGTCCCGGCGGGCAACGCAGCCCCAGCGCCGACCCCGCCTCCAGTCGTGGCGGCGCGGGTACCCCAGGCGAACACCGTGCCTGAGCTGGTGGCCCAGGCGGGCACGTCGGCGGCCCCGGCCACTTCCGGTGCGTCCGCCGGAACGGGCACGGCTGGTCAAGCGGGCAATGACGCCGCTGGCACGGCGGCCCTACCTCCCCTCATCACGGCGGTGGGCAGCGAGGCGGCTCCGGCCAGCACCGCTACGGCGGACGCAGGAAGCAGCACCCCGGCGGCGCCGGCAGCGACTGCCCTCGACCGCCTGCTCGCGCCGGGTCAGGTGGCCCTCAGCTCGGTGGTGCTCGGCCCGACCAACACGGCGGTGTTCCGCACGCCGCAGGGGTTCGTTGTGGTGGAGCAGGGGCAGACCGTCCCCGGCCTCACCGGCGAAGACGGCACCCCGGTCGTCTTGCAGAAAGTAGAAACCGGCGCCGTCACTGTGGCGCTTGGCACCAAGGTCAAATCTCTGGAACTGGAACAAAGGTGAGCCATGAATAAACGACACGCCCTCCTCTTGACTGCCGTGCTGGGCATGGCCACCGCTTATGCTCAAACTGCACCGACCACCACGACCGTCAATACGTTGCAGACGGTTTACCGTGACCCTAGTCTGACCTCTGCTCCAATTACAGCAAATGTAGGTAAATACGTCGGCCCCCTGTCTACTTTCTTGGCAAGCATCGCCAAATCGGCGGGCTACGAAGTGGTTTTCAATTTCAATATTGATGCGCTGGCCCTCATCAACGGTGAAATTGTTTTTGGAAATAGTACCGCTTCAGTGACGACTTCCTATGCAACTCCTTTAGGTAGGCCCCAGGAATTACCAGCCAAACCTGTAGTCCACAATTTCTCGAACGCTCCTTTCAACGAAGCTTGGCCTTTGCTTATGGATGTTTATGAGTTGGATTATCAACTAGTTAAAGTTGGTAGTGCAAATGTCATACGAATTGGTCAAAGGCCTAAGCAGTTGGCTCTGCCCCTAAAATTTATTTCGGCAGAGAGTGCCTTAACTGCCATTGAGAAATTTTTTGGTGAAGAAAAGTTTGAGACCGTGATAAGTTTGGATAGCAATAATAAACCTTTTCAAACTACTCGGCCAACAGGTAAATTTGGATTGCCTAATAGCATAAAAGTAATCCCCGATTCCAGCAATAAGCGTTTGATTATTGGTTCCAATAGTGAGGATGGAATAAGAATACGTAGTTTCGTAGAAACAATAGATGTACAATCCTCTGGTAAAGTAATATCTACTGATTCCATAAGCGAAATATATATTGTAAGAGGCCAGAAAGAAAGCGTATTACAGTTTTTGAGAGATAGTTTTCCGGAATTAATAGTTACCGATTATGCATCTGGAGGCCTTGCTATTGAAGGCCCACGAACAAGTGTTAACAGGGCTATAATCTTGCTTGGACAAGTAGACCGTGCACCTGAAATTCCTATTGTGCAGCGCATCTACACCGTGCGCGGCCAGGCCGCCGACATCACGGCGCTGCTCGCTGCCCAGTACCCTACCCTGCGCGTGACCCCGGTGGGGCAGACCGGCCAACTGGTGCTCAACGGCGCTCAGGCCCAGCTCGACACGGCGCTCGCCCTGCTCGAACAGGTGGACCGCCCCGCCCCGGTGGCCGAGTCCCGTACCGTTCAGCGCGTCTTCCAGCTCGTCAACGCCAGCGCTGAGGAAGTCAAGGCGACCCTGGAAGGCACCCTGGCGCGTGACCTGACCGCCGACAGCAATAACGACGTGCTGCCCAACGTGCCCGTCACCGCCACCGACGCCAACGGCAACACGACCGTGGTGAGCGTGCCCAACGCGCTCGGCAAGACGGCGAACCAGGGCACGGCGAATGCTCAGGCTCAGACTGCCCAGACTCCGGCCAACACCCAGCAGGCGACCCTGATCGCCGACAAGCGCACCAACAGCCTGATCGTGCGCGGCACGCCCGAACAGGTGGCGCAGGTGGCCGAGCTGGTGCCGCAGCTCGATCAGGTGGTGCCGCAGATCAACGTGCAGGTGCGGATTCAGGAGGTCAACGAGCGAGCGCTGCAATCGCTGGGCCTCAACTGGCGGGCGACCTTCGGTGGCTTCAACGTGGCGGTCTCGGGCGGCACCGGGCTGGCGGCCACCTTCAACCCCACCCAGAGCTTCCTCGGTTTCAACATTTTCCCGACCCTCACGGCGCTCGAAACCCAGGGCCTGACCCGGCGCGTGTACGACGGCAACGTGACCATGCAAAGCGGCCAGCGCTCGCTGAGTGCCACGGGCGGCGCCCAGAACGCTTCGAGCGGCGCGGCGGCCAGCGTCAAGAGTGGGGGCCGACTGGAAATCAACATCCCCTCGGCGGCGGGCAACATCGTGCGGCAGATCGATTACGGTCTCAACCTCGATTTCTTCAGCCCGCAGGTGGCGCCCGACGGCACCATCACCCTGCGTATTCGCGGTCAGGTCAACCAGCCGGCAACGGCGATCACCGCCGACAGCCTGCCCAACCTCATCGACTTCACCAACAGCGAAGCGCAAAGCACCATCACCTTCAAGAACGGCCAGACCATCCTGATGAGCGGCCTGCTCGGCAGCACCGAGACCACCAACCGCAGCGGCGTGCCCTTCCTGAGCAGCCTGCCCGGTGTGGGCGCGGCCTTCGGGGAAAAACGCACCGAGAAGACCCAGTCGCAACTGCTCGTCATCATCACCGGCACCGTCGTCAAATAACGCCTAGAGCAGTTCTCCGAATTACGCGTGCGTCGGAACAGCACCGCCACCCGCTCCATTCTCTGCTTCGCAGCTTTGCAAGTCCGTTCTGCTCCGTGTTTGTACTCGCTCTGCTCGCCAAAAAGCTGTGCCATCTTTTTGTCAAATGCTCTACGTCATACCGGCGCCTTTCTGCGAAGGGGCGCCGCTTTTTTGACTCTCGCGGCGGAGCCTGTTTGCTCAAGCCGCCGCGCCGGGGCCGCCAATCCCGCTACAGTCGCCCTATGAGGTACCTGACCGCCGGAGAATCGCACGGGCCACAGCTCACGGCCATTATCGAGGGGCTGCCCGCGCAGTTGCCGCTGGGCAAGGCCGACATCGACCCCTGGCTGAGAAAACGGCAGGGCGGCTACGGACGCGGGCGACGCATGGTCATCGAAACCGACGAGGCCGAGCTTCTGAGCGGCGTCCGTGCGGGCCGCACCACCGGCGCCCCCGTCACCCTCGCCATTCAGAACAAGGACCACCGCAACTGGACTGAAATCATGTCGCCTGAGCCGGGAGGCGAGCCGCGCAAGAAGGCGCTGACCGACGCCCGCCCCGGCCACGCCGACCTGACCGGCGGCATCAAGTACCGTCACAAGGACCTGCGCGACGTGCTCGAACGCGCCTCGGCGCGCGAAACGGCGGCGCGGGTGGCGGTAGGCAGCATTGCCCTCAAGCTGCTTTCTGAACTCGGCATCGAGGGGGCCAACTATGTCTTCAACCTTGCTGGCATTGAAACGCGGCAGGCCTTCTCCTGGGACGCACTCGACGCCATCGAGGACTCCGACCTGCGCACCCCCGATGCCGACGCCGCCGCGCAGATGCGCGAGCGCATCGACCAGGCCAAGAAGGACGGCGACACCCTGGGCGGCATCCTCGAAGTGCGCTTCCGGGGCCTACCGGTC +>9_3#NODE_13_length_39994_cov_62.9723_ID_25 +GCCGGGCCTGCCACCCGTCGAAAGTCCGGTTCTCCGGGAGAACGGCCTCAGGCCGGGCGCGGCACCTTGGCCAGCGCCGGCGCCGGCGCCGGCATCAGCCGGCGCACCAGCCACTTGCGCACCGGCACCACGCCGCGCTCCTGGAACAGCACGCAGAACGCCACCAGCAGGACCAGGAACAGCGGGTAGTAGAGCAGCGACGATGGCAGCGCGGCCGCCGCCTCGGTGCAGGCGCGCCAGTCGTCGAGACAACGCCCCGGCACGGCGATAAGGCGCTCGCTACGGGAGAACAGGGTGAACATCGGCACATGCAGGGCGAACAGCGACAGCGAGGCGGCCCCCAGGCGCGGCGACCAGTGGCGGACGAAAGCGCTCTTCGGGTCGGCGGGCAGCGCGCAAAGGCACACCAGCAGCAACTGGGCCGGTAGCAGCAGGCCGTTGTGCAGCAGGAAGTACCAATGTTTGGCGCCCTGGGTGAACAGGTAGTCGGCCACCAGGAAGGCCGCCACCACCAGCGCCACCAGCGCGGCCAGCAGGCCGCGTCCCGGCACCATGCCACGGTCGCGGTAGCCGCGGAACAACCCGTAGGCCAGGATGCCGCCAAGGAATTCCGGCAGGCGCAGCAACGGATTGCGGTGCAGCAGGCCGGTCCAGGGGATCCCGTAGCTCTCGCTGGCCACTACCAGCGCCGGCGGGATCAGGTAGACCAGCCAGACCAGCGCCAGCACCTTCCACTTGTGCCGGCTGCGCATCAGCCGCGGGGCGACGAAGGGAAACGCCAGGTAGAAGAACATCAGCGTCGACAGCGACCACAACGGCGCGTTGAAGGTCAGGTAATAGGGATTCCAGGCCTGCAGCAGGAGGATCTGCAGGATGCTGTTGACGAACAGTTCGGTGTTGCTCATCCAGTGCAGGAAGAGTTCCGGGTGGGTGCGGCCGAGCACTTCGTTGGTGTCGTAGATGACGAAGCGCGGGGTGGCCTTGTCCAGCTCGGGACCGATTCCCAGGTGGCTGAGGGCCAGCAACACCAGGATCGACAGCAGCAGGGAGAACAGGTGCAACGGATAGAGATTGGACAGTCGCTTGGTCCAGAAACTGCGGGCGCTTTCGCGCAACCGGCCGCTGCGTGCGTAGACATGGGCCAGGAGGAAGCCGGAAAGCACGAAGAAGGTACTGGTGGCGAAGAACCCCAGGCTGGTCAGTTCGTCGAGACCGGGAAACTTCTGCTCTTTCGGATAGGTATGCAGGGTGTGGTACACCACCACGTACAGGCCGAGCAGGAAGCGCAACCATTCCAGTCCGATGAAACGTTCCTTGGAGTCGGGCGTTGCCACGCATCTCTCCTTTGTCATTGTAATGGGCGGTCCGGGGTAGACCCGCAAGCTCCCCGGCAAGTTTCACCGGGCCGGCCAGCGGCTACGAACGGCCGCTCCCTAGCGCCGGCGGTCGAGGAAATTCACCACTACCCGGTCAAGCCAACCCCACAGGCGCTGGTGCAGGCGCATCCACAGGGGTCGCGCGCGCCACATCGCCAGGGTCACCTCGCGGCTGTCGGCGAAGTCCGCCAGCAGGCTCGCCGCCGCCTCATTGGTGAAGTCCGGGTCGAGGGCCTCGAGATTGGCGTCGAGGTTGAAGCGTAGATTCCAATGATCGAAGTTGCAGGAGCCGACGCTCACCCAATCGTCGACCATGACCATCTTCAGGTGCAGGAAGCGCGGCTGGTATTCGTGGATGCGCACGCCGGCGCGCAGCAGGCGCGGGTAGTAGCGCTGCCCGGCATAGCGCACCGGGGCGTGGTCGGTGAGCCGCCCGGCCAGCAGCAGGCGCACCTCCACGCCGCGCTGGGCGGCCTTGCGCAGGGCCCGGCGAACCTTCCAGGTCGGCAGGAAGTACGGCGTCGCCAGCCAGATCCGTCGGCGCGAGCCGTTCAGCGCGCGGACCAGCGATTGCAGGATGTCGCGGTGCTGGCGGGCGTCGGCGTAGGCCACCCGGCCCAACCCACGGGCCGCCCCCGGCTGCGGCGGAAGGCGGGTAAGGGTCATGCCCTCGCGGGGCTTCCAGGCTTTCTCTTCGAGACAGGCCAGCCATTGCCGCTCGAACAGCGCCGCCCAGTCCGCCACCACCGGGCCGTCCATCTCGACCATCACCTCGCGCCAGGCGCTGACGTCGCTGACCGGTTCCCAGAATTCGTCGGTGATCCCGGCGCCGCCGACATAACCGAGGCGCCCGTCCACCAGCAGCAGCTTGCGATGGTCGCGGTAGAGATTGCCGCCAGTCAGCTTCCAGCGCAGCGGGTTGTACAGCCGCAGTTCGCCGCCGGCCTCGCGCAGGCGCTGGATCCAGGCGCTGCCCAGGCCGAGGCAGCCGAAGCCGTCGAACAGGCAGCGCACGGCGACGCCCCGGCGGCGGGCATCCAGCAGGGCGACGAGAAACAGTTCGGCGCAATGCCCGTCTTCCACCAGGTACAGCTCCAGCTCGATGCTCCGCTGCGCCGCCTCGATGGCCAGCAGCATGCGCGGGAAGAACCGCACGCCGTCATTGAGCAGGGCGAAGCGGTTGTCGCTGCGCCAGGGAAAGACCGGGCCGCTCACCGCGAAGTGAAGATCAGCACGGCGCCGACCGGCACCGAGAGCCCGATGCTGCCGGCGCCGGCCAGCTTGCGCAGGGTCTCGATGCCGGGGGCGAGGTCGAAGTCGGCGGCGTCCAGCACCAGCGGCGACAGGGTCACCACCTGGAAGCGATGCTCGTCCAGGCGGGTCACCAACAGGTCGCTGGCGTAGCTCTTGCGCTTGCCGTGCAGGCTGACGGTCACCGGCTGGCGCATTTCCAGTTGCACGCCGTCGGCCAGCTCGACGATCGGCGCCAGGTCGAGCTGGGCGGTGATCTCCGCCTCGGCGAAGCGCTTGGTCTCGAACAGATGCTCGCGGATGCGTTCGTCGCGCAGCGGGATGTCGGTGCTCACCGAGTCCAGCTCGACGCGCAGGCGCGCCTGCCCCTTGTCGTCGACCTTGCCGTGCATGACCAGGAAGCGGCCGACTTCCGAAAGGGTAGCGTTCTTGGTGGAGATGAAACTGAGTCGCGACGACTCGTAGTCCAGGTACCAGGCGGCCTGGACCGGCAAGGCCAGGAGGGCCAGGCCGAGGCAGATCAACTTGTGCATGCGGGCTCCCTTTGCGTCCGGCGCCACCTTAAACCGGCCGCCTGGGAGCAGGCAAACTCCCGCGTCAGTCGAGCGCGTGCTGCAACTGACTGACGGTATCGGCCAGGGTTCCCAGGTGCCGGCGGCGCACCCGTTCGACGCAATCCTGGTCGCGCGGCCAGTGCAGCGCCACGCTGCCGAGCAGGCGACCCTGGGCGCGTACCGGCAAGGCCACCGCGCGGATCAGGAACGGCAGGCGCACCGGATACTCCCAGTGTCCCTCGGTACGCTCGCCGTAGCCCTGGCCGTGGGCCTGCTCGCGGGTGCGCAACCAGAGGTCCTCGTCTACCTGGTGCTGGCGCGCCAGGCGGCGCACCTCGCTGTCTGCCAGTTCGCCCAGGCAGGCCTTGCCCATGGCCGAATGGAACAGGCTGGCGTGCTGGCCGACCACCAGCCGGTTGGCCGGGTAGCGCTTGCGCAGGACGCCGGGCACCGAGCTTTCCGCCACCAGCAGGCGGTCGCCGTCGAGAATCGACAAATCGGCGACCAGCCCGGTGCGCTCGCTCAACTCCACCAGCAGCGGCGCGGAACGCTCCACCACCTGGCAACGGAAGCGCTCGGCGGCGTCACCGTAGAGGCGGGTGGAACGCAGCCGGTAGCGCCGGTCGGTCAAGCCGCGGTACAGCCAGCCCTGCTCCAGCAGGGTATGCAGCAGCCGCGACACCGTGGCCTTGGGCAGTTCGGTCAGGTAATGCAGTTCCTCGAGCCCCAGCGCCTGGTGCTGGCCCAGCAGATCGAGGATCGCCAGCGCCCGCTCCACCGAACGCACGGTTCCCGCCTCTTCTCTTTCCGGCATGACGACACCTCCACCGTCGACAGACCGTTATGGCAAAGCAAGATGCAGGCCCAGGTCATGTAGGAAGAATCTGCGAATACGCCAGCATTGCTCCGCGGCCTAGTCGATGCCCCCTGGAGCCTCGAGCATCGCCGGCGGACGGCGGCTGACCCAACGGGTCAGGCCCTCGTGGGCATGGGCCAGTTGCAGCACCGCCAGGTCGGCCTGCGCCGGGCCGATTATCTGCAATCCCATCGGCAGGCCCGCCGCGCCGAAGCCGATCGGTACGCTGATCGCCGGCAAACCGGCCAGGGTCGGGCCGATCACCACCTCCATCCAGCGGTGATAGGTGTCCATCGGCCGCCCGGCGACCTGCCGCGGCCACGCCGTTTCTGCATCGAAAGGAAACACCTGGGCGCTGGGCAGCAAGAGGAAATCGTAACGTTCGAACAGACGCGCCAGCGCCCGATACCAGTCGCTGCGATCCAGCGAGGCGCGATAGACCTCGGTGGCGCCGAGGCCGAGCCCGGACTCCACTTCCCACTGCGCCTCCGGCTTGAGCCGGACGCGCCGTGCGGGATCGGCGTAAAGCTCGCCAAGCGAGCCCTGCACCAGCCACTGGCGATGGACCAGCCAGGTGCGCCACAGGCGCTCCAGGGGATAGTCCGGCAGGCACGCCTCGACCTCGCAGCCCAGCTCGGCGAAATCGCCCAGCGCGGCTTCGCAAAGCTCCAGCACGCCCTCCTCCATCGGCAGGTAGCCGGCGTAGTCACCGAGCCAGCCGAGCCGGGCTCTGCGGAAATCGCGCCCGAGGTCGTCGGCGAACCTGCGCGGATCGTCGCGCAACGACAGCGGACAGCGCGGATCGTAGCCGGCCTGGGTGGCCAGCAGCCGGGCCAGGTCCGCCACGCTGCGCCCCATCGGGCCTTCGGTGGCCAGTTGCTGGACGAACAGTTCCGCTTGCGGGCCGTGCGGCACCCGTCCCTGGGACGGGCGGAAGCCGTAGACGTTGTTGTAGGCGGCGGGGTTGCGCAGCGAACCCATCATGTCGCTGCCGTCGGCCACCGGCAGCATGCGCAGCGCCAGCGCCACCGCCGCCCCACCGCTGCTGCCGCCGGCGATCCGCGCCGGATCGTAGGCATTGCGGGTGGTGCCGAACAGCGGGTTGTAGGTCTGCGAGCCGAGCCCGAACTCCGGTACGTTGGTCTTGCCGATGACGATCGCACCGCTGCTCTTCACCCGCTCGACGACGATGGCGTCGTGCTCCGGCACCTGCCCGGCGAACAGCGGCGAACCCAGGGTGGTAGGAATGCCGGAGGTGGCGGCGAGGTCCTTGATCGCCTGCGGCATGCCGTGCATCCAGCCCAGCCATTCGCCGCGCGCCAGTTGCCGGTCGCGCTCGTCGGCCTGGGCGAGCAGGCGGCTTTCCGCCTGCAGCGAGACGATGGCGTTGACCCGCGGGTTGAACCGTTCGATCTGCGCCAGGTAGGCCTGCATGACCTCGCGGCAGGACAGTTCGCGTCGGCGGATCGCCGCGGACAGCGGCAGCGCGTCAAGCGCGACGATTTCCTCGTGGCCGTCGCTTCGATGGCGGGTGGCGCTCATTCACAGACTCCTTGCCGGCTTGCCCAGGGCCGGTTCACGGGGGCGCGGGGCGGGCGTCGCGCCCTCCCGCAGATAGAGGGTGGCGAGCAGCCCGAGGAAGGCCGCGCCGAGCACGTAGAACGCCGGCGCCACTGGCGAGCCGCCGACCTCGGTGAGCCAGGTGACGATGAACGGAGCGAAACCGCCGAACAGCATCACCGCCAGGTTATACGCCACCGCCAGGCCGGTGGAACGCACCCGCACCGGAAACTGCTCGGCCACCGCGGTCGGCGCCGGGCCGAAGAAGCCGCCGATCGCCGTGCACAGCAACAGTTGCATCAGCAGCAGGCGCCCCAGGCTCGGCGCCGCGGCGACCCAGGCGAACAGCGGATAGACCATGAGCATGAAGGCCAGGGTCGCGACCAGCAGCACCGGGCGCCGCCCGACCCGGTCGGAGAGTCCCCCAGCCAACGGGATCACCAGGGTCATCAGCGCCACCGCAGCCATCTGCACCATGAACACCTCGTCCAGCGGCAGGCCCAACTGCTTGTGGGCGAAGGTCGGCATATTCACCAGCACCACGTAGAAGGACACCGTGCCGATCACCGTCAGGCCCATGCTCACCAGCACGCTGCGCCGGTATTCGCGCAGCACGCCGAGCAGCCCCTGGCGCTCGCCGGGCTCACGGATGGCTTCGAGGAAGGCCTCGGTCTCGCCCATGTAGCGGCGGATCCACAATCCCACCGGACCGATCAGCAGACCGAACAGGAACGGCACCCGCCAGCCCCAGCTCTCCAGGGACTCGGCGTCCAGGCAATGGGTGACCAGCGCGCCCATCCCCGCACCGGCGAACACCGCCAGGCACTGGCCGAACAATTGCCAGGAACCGTAGAGACCGCGCCGGTGCGGCGGCGCGCTTTCCACCAGGAAAGCCGTGGCGCTGGCGTACTCGCCGCCGGTGGCGAAACCCTGCAGCATTCGCGCGATCACGATCAGCAACGGTGCGCCGACGCCGATCGCCGCGTAGGTCGGGGCGAAGGCGATCATCGCGATGGACAGCGTCATCAGCAGGATGATCAGTTGCATCGCCGCCTTGCGTCCCCTGCGGTCGGCATACAGGCCGAGCAGGACCCCACCGACCGGACGCATGAAGAAACCGACACCGAAGGTGGCCAGGGCCATCAACAGCGAGGTGTACTCGTCGCCGGAAGGGAAGAACAGCCTGGCGATCAGGCTGGAAAGGAAGCCGTAGACGATGAAGTCGTACCACTCCAGGGCGTTGCCGATCACGGCAGCCGCCACCTGGCGGGTACGCGAGGTACCGCTCGGGCAAGCGTGCATGGGAATCTCCTGGTAACCGGGGGAATGCCGGCGCGGCTCAGGCCGCGCCGTTATTGGCGGCAGTCAGTCGAAGGCCGGACGCCGCGGGGGCGGCCAGCCAGGTCTCGGCGAGCGCGCCCCAATAGGCCGCGCCCCGGACCAGGATGTCGTCGTTGAAGTCATAGGCCGGGTTGTGCACCATCGGCCGTCCTCTGCCGTTGCCGATGAACAGGTAGCTGCCGGGGCAGCGCTGTAGCATCCAGGCGAAGTCCTCGCTGCCCATCAGCTTCGGCGTGTCGCCGTCGACCTGCTCCGCGCCGGCCAGCTCGACCCCGACCTGGCGGGCGAACTCGGTTTCCTCGACGCTGTTGACCAGCACCGGATAGGCCGGGTAGTGCTCGATGCTGGCCTGGCACCCATAGCTGGCGGCCTGCAACTCGATGATCTGGCGAACCCGCTGCAGCACCTGCTCGCGCACCTGGCCGTCGAGCGCCCGCAGGCTCAGGCGCAGCACGGCGCGCTGGGGTATGACGTTGGCCGCCTCGCCGGCCTGCAATGCGCCGACCGTGACCACCGCGGCCTTCTGCGGATCGACGTTGCGCGCCACCACGCTCTGCAGGGCCATCACCGCGCTCGACGCGGCGAGCAACGGATCGACGCTCAGGTGCGGCATCGAACCGTGTCCACCGACGCCCTCCAGGGTGACGCTCAGCAGGTCCTGGGAAGCCATCATCGGTCCCGCGCGGAAGCCCAGGTGCCCGGCCTCCAGTCCCGGCATGTTGTGCATGCCGAACAGCGCATCGCAGGGAAAGCGCTCCAGCAGCCCGTCGGCGAGCATCGCCTCGGCGCCGCCCTGGCCTTCCTCGGCGGGCTGGAAGATCAGTACCAGGGTGCCGTCGAAACGCCGCGTCGCCGCCAGGTAGCGTGCCGCGCCGAGCAGCATCGCGGTGTGTCCGTCGTGGCCGCAGGCGTGCATGCGCCCGCCGTGGCAGCTGCTGTAGCCGAGCCCCGTGGCCTCGACGATCGGCAACGCATCCATGTCCGCCCGCAGGCCGAGCCGCCGCGTGCCGTCGCCCTGGCGGAGGACGCCGACCACGCCGGTGCGGCCGATCCCTTCGTGCACTTCGTAGCCCCAGCCGCGCAGGCATTCGGCGACCAGCGCGGCGGTGCGTCGCTCCTCGAAGCCCAGTTCGGGATGGGCGTGGATGTCCTGGCGCAGGCTACGCAGGTCGTCGGCGACCTCCTCGAGCCAGGCCACGATGTGTCGATGTCGAGCCATGCCGTCTCTCCTCTGGCCGGATTGCGCTCCGGTCTTGTTGGCGGGAGGCCCGGACGCAAGGCCGGCCGGGCGTGGTCACAGCTAAGCGCGGCACGCGGCGAAGGACAACGGAAGGCGGTTCCACTGGGTGGAACCCAAGGCCCGCGCCGCCCTCCGCCGGTACGTCAGAACTGTTCCGGCGGCACTTCGCGGAAGTTCAGCACGGCGAACACCGGTACCTCGGGAGTGATCGAGGGAATATTGGCCATCTCCTTCAAGGGCTTGAGCTTCTCTTCCAGGTCGAAGTCGAGCAGCTTCAGCAGCACCGGCTCCAGGGTCGCCACCTGGACCAGCCCTTCGCCCTGGCGGGTGACCAGCACGTCGGCCTTCAGGCGGCGCTGCTGGCCGTGCAGGTCGAGGGTGAAGTCGAGCTTCTCGACCCGCGACTGGCCGACCCGCAGGTCGTCGAAGCCGGCCATGTCCAGGCGGCTCTCCACGGTGGCCTCGGGGAAGCGCGAGGTTTCGAAGAAGTTGTCCTTCATTCGCTCGTCGCGCAGGGCCAGGCCGCTGTCCAGGGTGCCCAGCGGCACCACGATGCGCGCGACGCCCTGCTTGTCGATCTGGCCGCTGAGACGATCGAAGCGCTGCACCTCGGCCATCTTCGCGCGTTTCACGGAAACGAAGCTGATCCGCGAGTAATCCGGCTCCAGTTGCCATTCCGCCGCCGACAGCAGAGGGCTGAGGAGCAACAACAGGACACACAATACAACAGGCATTCCACCTCCGGCTTCGGGCCGGCGCCGTCCCTCGCCCCGCGCCCCGACATGCGCCGCCGGTCAGTCCGGCAGCCTGTCCGCCTCGGGCATGAGCCGGCATCCCTCGCGCGGGCCCAGCCAGGCGGCCGTCTGCGTACTGCCGAGGCCGCGGGCGGTCACCCGGCGGCGGGCTTCGTCGTCGAAGAAGCCGCTGATCGGCACGTACTGTTGGGTATAGGCCCGGCAATAGTCCGCGGAGTTCCCCATCACGTAGCGGCAGGAGCAGTACTCCTTGGCCGTATAGGCGCCGATGATGGCGGGAAAGGCCGCCAGGTGGATGCGGTTCTGCCAGGCCCACGCGAGGGCGGCGAGCAGCCCCAGGACGAACAGGAAGAGCAAGGGATGGCGACGGATCATGGCCGCGCCTCCCCGGCGAACGCCGCCTGTGCCAGGCGCAGGAATTCGTTGTGGCGGAAACGCCCGTCGCGGTCGTCGGCGTAGCGCACGATCACCAGCTTCTCCTCGGGCAGCACGTAGAGCGCCTGGCCCCAGTGGCCGAGCGCGGCGAAGGCGGTCTCGGCGACGTCCGGCCAGGGCGCCGGCGCGCCTTTCACCGCACGGTTCAACCACCACTGGCCGCCCGGCACCGCCTCGCCCTCCTCTTCCGCGCTCGGCCGGTAGCGGGCGAAGGGTGTCAGGTTGAACTCGACCCATGCCAGCGGCAGCAGTTGCCGCTCGCGCCAGCGCCCGTGGCGCTGCATCAGCAGGCCGACGCGCGCCAGGTCGCGGGCGCTCATGTAGACGTACGACGAGCCGACGAACGTGCCGGCGGCGTCTCGCTCCCATACCGCCGAACGGATGCCCAGCGGCTCGAACAGCGCGCGCCAGGGGTACTCGGCATAGTCCGCGCCGAGCATGCCGTGGAGGGCCGCCGCCAGCACGTTGCTGTCGCCGCTGGAGTAGCGGAAACGCCGGCCCGGCGGCCGGGCCTGCGGGGTTTCGGCGACGAAGCCGGCCATGTCGTCACGCCCACGGGTGTAGAGCATCGCCACCACGGAGGATTTCAATGGCGCGAACTCGTAGTCCTCCTGCCAGTCCAGCCCGGAAGACCAGTTCAGCAGGTGGCGCAACGTCACCTCGGGATGACGGGCGAACGGCGGGTAGTAGCGCGCCACCGGGTCGTCGAGCTGGAAGCGCCCTTCGCCCTCGGCGACGCCAAGCAGCGTGGCGAGCAGGCTCTTGCTCACCGACCAGGTAAGGTGCGGGGTCTCTGCACGGCTCGGGCCGGCGTAGCGCTCATAGACCAGACGACCGTCGCGGATCACCACTACCGCGTCGGTGCGGATGCCTTTACGACGAACATCGTCGCGACGACCGAAGGCATAGCGCTCGAATGCCTCCAGCGCCGGCCCCGCGGGCTGTTGGGCACGGGGCCAGTCGGTGTCGGGCCAGTCTTCGGCCAGGGCAGGGAGGGCGAATACCAGGGCCAGCGCGGCGAGGGCGCGGCGGGACAACGGCAAAGCGGGCATCGGCGGGCCCTCTCGACGGTCGAGGGGCAACCCTAGCATGGCCTCGATGACGCCAGGAAGCGCGGACTCAGCCTGGCATGCGGTAGTCGTCCGGGCCGAGCAGGTCGATGCCGCACTCGAGGTTCTCGATCCAGTAGAGGAAGGCGCTGATCATTTCCTTGCCGACGAAGGGCCGCCCGTGTTGCGGCACGATCATCTCCACATCCATGTCCCGGACCATCCGCGCCCACAGGCGGCAGATCTTGTTCGCCGCCATGTAGCGGCGATGGAATCCGTCCATGTACTGGATATGCGCGGCGAAATCGCTGACCGGCGCCGCGTCGTCCACCAGCGAGGCGCCCATATCGCCCGAGAAAAGGATCTTGCTCACCGGGTCGTAGAGCTGGAAGTTGCCCACCGAATGCAGGAAATGCGCGGGGATCGCCTTCAACGAGCAGTCGCCGAGAGCGATCGCCTGGCCGCGGTCGGGCAACGGCATGACCCGGTCGTAGGTGTTGATGCCATGGCTGACCGCCAGGTAGTTGGCCGTCAGGTGCGGCAGGAAGCGCGCCCACAGCTTGGAGCAGATCACCCGGGCGCGGGTGTGCAGCAGCCACTTGTCGAGGGCGGCGATGATGTCCGGGTCCTGGTGCGAAGCGAAGATGTAGTCCAGCTCCTGCAGCGGAAAGCGTTTCGACAATTCCAGCGAGAGCGGCGTGTAGGTCAGGTCGCCGCCGGGATCGAGCAGCAGGTACTGGCCGTTGTCGACGATCAGGAACTGGTTGGACTGGACCCCTTCGCCGCTGACGAGGTCATCGAAACACAGGCACTGGTGGCTGCCGTTGTCGAACAGCACGATCGGTTCGCGTCGCATGGTCGGTGAAATCTACTCTGTGCAGGCGCCGAGGAGCATTCCTCGCCGGATGCTGCGCAGGGTAAGCGCCTGCGGCGCGGCGCCTCACTGACCTGGGTCAAGCCCGAGGCGGGTCAAGGCCTTCTCCAGGCGAACTTTGCGGGCCCCGTTGGCAATCGCCAGCAAGCCGCGATCGCGTTGCCACAATGCCTCCCAGGTCGAGGCGTTGGCGGCGAAGGCGGCATCCAGTTGCCCCTCCAGCGCCGAGAACTGTTCGAACAGCCCAGCCAGTACCGCCTGGGTGCCGACGCAGGCGACCGGATGCCGCGCGACCTCGGCGCAGCCGCCGAGGATGCCGGCCAGGCGCAGCAGCAGCGGCGAGGGCCAGCCGCATTCATGGCCGACCCCATAGAGCCAGCCGACCAGCGCCGCCAGCACATGGACATCCTCCAGCAGGCGGAACGGCCTGACGTAGTCGTCCCAACCATCGCCGGGCAGGCGCTCGCAATGGGCACCGTCGAGCAACAGGCGGGAATGCGGCACATCCGGCATCAGCGGCAGCGCCGGCAACGCCTCCAGGCGCGCCCCCGGGGCGCCGGCCAGGACCACCGTCATCGACAGTCGCGGCGGCTGCCCCGGCGCCTCGTCGCGGGCGGCCACCAGCCACCAGGAGGCGCTGTCGCCAGCGGTGACGAAATCCTTTTGTCCATGCAGGACCAGCCCCTCCAGGCGGGTCTGCAGGTCCGCCGGGCGCACGCTGCGCTTCTCGGTCACGCACAGCGCGCCGAGACCTTCCGGCGCCGCCGGCCAGAGCGCGCGCAGGGCCGCCTGGTAGCCGGCGAGGAAGGCCAGGCCCGGACTCGCGGCCAATCGTCCGCCGAGCACCGCCCGCTCCAGGGTGCCAAGACCGTCGGCGCGCGCCAGGGCGGCGGCGTGCCAGTCTTCCAGGGAGCCTTCGGTGGGCAGGCGGTCGAGCGGGCCGAGCAAGCGTTGCCAGGACATGGCGGACTCCAGTGTGGAATGTGTCGGAACCGGCATCCTGCCGAGGGTGCCAGCGTTGAACAACGGCGCGTTACATCCGGCGACAATGTCATACAAGCATCACGCCCCTGTCACAGGGGTGACACCGCGCCTTCCTAGCCTGAGCTTGCACAACAAAGCCGAGCGGCCGCAACCCGCGGCGCCGGCCCAAGGAGGCCCAGGCATGACCCAGACCGCCATTACCCGCGAACCCGTCGCCGGACGCCGCCTGAAGGCCGAGCGCCTGAACGGCGCCCGCGCCCTGCGCGAGGCCCAGGCCCTGCGCTACCGCGTATTCAGCGCCGAGTTCGACGCCAAGCTCGAAGGCGCCGAAGACGGCCTCGACCGCGACGACTACGACCGTCACTGCGCGCACATCGGCGTCCGCGACCTCGACAGCGGCGCCCTGGTGGCCACCACCCGGCTGCTCGACCATCGCGCCGCCGAACGCCTCGGGCGCTTCTACAGCGAAGAGGAGTTCCACCTCTCCGGCCTCGACGCCCTGCACGGCCCGGTCCTGGAGATCGGCCGCACCTGCGTCGCTCCCGAATACCGCAACGGCGCCACCATCGCGGTACTCTGGGGCGAACTCGCCGAGGTCCTCAACGAGGGCGGCTACCGCTACCTGATGGGCTGCGCCAGCATTCCCATGCGCGACGGCGGTATGCAGGCCAAGGCGGTGATGCAGCGGCTGCGCGAACGCTACCTGTGCACCGACTACCTGCAGGCCGAGCCGAAGAACCCGCTGCCGCCGCTGGACGTCCCGGAAAACCTCACCGCCGAGCTGCCGCCGCTGCTCAAGGCCTACATGCGCCTGGGCGCCAAGATCTGCGGCGAGCCCTGCTGGGACCCGGATTTCCAGGTCGCCGACGTGTTCATCCTGCTCAAGCGAGACGAACTCTGCCCGCGCTACGCCCGCCACTTCAAGGCAGCGGTCTGATGGCGCGGCTGCGCCTGCTGCTGCGCAGCGCCCGCCTGCTCGGGCTGGTCGCGCTCGGACTGGGCCTGGCCGCTTGGGTCAGCCTGCGCGAGCGGCTGCCCGGCGCCGACGTCACGCCGTTGCGCCAGCGCCTGACCCGCTGGTGGCTGGCGCGCCTCTGCGCGGCCCTGCCCTTCGAGGTCAGGGTCAGCGGCGAAGCGCCCCGGCAACCCATGCTGTGGGTAGCCAACCATGTGTCCTGGACCGATATCCCGCTGCTCGGCGCCCTGGCGCCGCTGACCTTCCTGTCCAAGGCCGAGGTGCGTGCCTGGCCGCTGGCCGGCTGGCTGGCGGAAAAGGCCGGCACCCTGTTCATCCGCCGCGGCTCGGGCGACAGCCGGCTGATCAACCAGCGCTTGGCCGAACAGCTGCACCGCGGACGCAACCTGCTGATCTTCCCGGAAGGCACCACCACCAACGGCGAAAGCCTGCGCACCTTCCACGGTCGCCTGATGGCCAGCGCCCTGGAAGCCGGCGTGGCGGTGCAGCCGGTGGCGATCAGCTACCGCCGCGACGGCGTGCCCGATGCGCAGGCGCCGTTCATCGGCGACGACGACTTGCTCAGCCACCTCGGCCGCCTGCTGCGCGGCGAGCGCGGCAGCGTGCATATCCAACTGCTGGAACCGATCCCCAGCCAGGGCCTGGACCGCGCCGAACTGGCCCGCCAGGCCCAGCAGGCGGTGCGCCTGGCGCTGTTCGGCACTGCCGCCACTACGCAAACCCGGCGCGCCGCCTAGTCGCGCTTGGGCGGCACCGCCAGCACGTCGCACGGCAGGTCGCCGAGGGCCGCCTCGGCCACGCTGCCGATCAGCGCGCGGCGGAAGCCGGTCAGGCCCTGGGTGCCGAGCACCGTCAACTGCGGCGCCTGGCGGCGTACCTCGGTCATGATCACGTTGATCGGCAAGCCTTCGCCGACGCGCAACTGGATGTCCTCGCGGCCCAGCCCCAGGCCGGCGACGTAGTCGCGCAGCTCGACGTTGGCCTTGAGTTCCTCCTGGTCGACGTAATGCTCGACGCGGTCTTCCTTGATCCCCGAATAACGCATCATGCCTTTGGCGAACGGCTCGAAGGCATGCAGCGCGGTGAGGCTGGCGGCGTCGAGGAAGCCGAGCTGGCCGGCCATCTGCACCGCGCGCGTCGAGGTCGGCGAGAAGTCCAGCGCCAGCAGCACCCGGCGGTACTCCTCTTCCGGCGCACCGGCCGCGCGCAGCACCGGGATCTTCGCATTGCGTACCACCCGCTCCAGGGTGGTGCCGATGAACAGGTCGCGCAGCGGCGTGTGCCGGTGCGCGCCCATCACCAGCAGGTCGGCGCCGATCTCCTCGGCCACCGCGTTGATCACCACCACCGGATCGCCGACCCGCACGATCGCCTTGGACTCGACGCCGAGGCGCTCCTTCAGGGCCTGTCCGTTCACTTCCAGCAGGTTGGCGATGGCCAGCCGTTGCGGCGCGATCAGCACCGGCGGCTGGTCGTCGTCGAGCACGTTCAGCACGGTCAGTTCGGCATTCCTCGCCTTGGCCAGCGCCGCGGCGCGCATGACCGCCAGCTCGGAACGCGAGGACAGGTCGGTCGCCACCAGGATTCGTTTCATCTTCCACCTCCATTGGCAGAGCGTTGAACGTCGGCCCCATTGAACGATCGACGCGACCGCTGCCGGTTGATGTGCATCAAGCGGATGCTTCGGACTGAAACAATTGATAGGTGATTCCGCCGGCCGGGCCAATGGCCGCTGCGCCAGGCTCAGCGCTGGCTCAGGGCGAAAGCCTGGAGTTGCGGGTAGAAGGCGCGGAAATCCGCGCTCAGGTCATCGTAGCGCTGGGCCAGTTCTTCCCAGGAACCGTCCAGCAGGTGCGGTCGCGACAGCCTCCGCGACATGCCGCCCAGCACTTCGCGCAAGACCGCGAACTCGCGGTAGCTGCCCAGCCAGTCCTGGGCCGCCATCCGCGGCGCGATGCGCGCCAGGCGCTCCGGCAGCGGCGAGGCGGTGCGCAGAGTGCCGTAGACCCGCTCGACGAACTGCGGCAGCGGTTCGTCGGCGTAGTCGTTCCAGTCGCGCGCCAGGCAGTGGTCGAAGAACACGTCCAGCAGGACGCCGGCGAAACGCCGGCGCTCCAGCGGGAAGCGCCGCTTGGCCGCATGCACCAGTGGATGGCTGTCGGTGAAGGCGTCGATGCGCCGGTGCAACTGGATGGCCCGCTCGATCTCGCCCGGCCATTGTCCCTGCAGGCGGCCCTTGACGAAGTCGCCGTACAGGCTGCCGAGCAGTTGGGCCGGTTGCGGGCCGCCGAGGTGGAGATGCGCGAGGTAGTTCATGCGGGCTAGCTTACCCCAGGGACGGAACCGTCGTTTCGATCATCGTCATCAGCCCTGTCGATAGCGACTATCGTCGCAATCGATCCCCATATCGTCCCACACCGATATATAGTTCGTCCCAACCCGATATGGACGGCCAACGGACCACCGCCATGCCACTGGACATCGACGAAATCATCAAGGCTCTCTCCCACCCGGTGCGACGCGACATGCTGCGCTGGCTGAAGGAACCGGAGAAGTACTTCGTCGAGCAGGACCACCCGTTCGAGATCGGCGTCTGCGCCGGCAAGTTCGACCAGCGTACCGGTCTCTCCCAGTCCACCGTGTCGGTACACCTCGCCACCCTGCAACGCGCCGGTCTGGTGACCAGCCGCCGGGTCGGCCAGTGGAATTTCTTCAAGCGCAACGAGGAGACCATCCAGGCCTTCCTCGACCAACTCGGCGACGAGCTGTAAGCAGCACTCGCTTCCACCCCACCACCTCGCAGTCCAGGAGCTCGACCGATGCCGGCGTCTCTTCTCGTCCTCGCTTTATCCGCCTTTGCCATCGGTACCACCGAATTCGTCATCATGGGCCTGCTGCCGCAGGTCGCCGGCGACCTGCGGGTATCCATTCCCAGCGCCGGCTGGCTGATCAGCGGCTACGCCTTCGCCGTGGCCTTCGGCGCCCCGTTGATGGCCATGGCCACCGCCCGCCTGGAACGCAAGAAAGCCCTGCTCGCGCTGATGGGCATCTTCATCGTCGGCAACCTGCTCTGCGCGGTGGCCGCCAACTACGGCCTGCTGATGCTGGCGCGGATCGTCACCGCGCTCTGCCATGGCGCCTTCTTCGGCATCGGCTCGGTGGTCGCCGCCAGCCTGGTGGCGCCGAACCGCCGCGCTTCCGCCGTGGCCCTGATGTTCACCGGCCTGACCCTGGCCAACGTGCTCGGCGTGCCGCTGGGTACCGCGCTCGGCCAGGAAGCCGGCTGGCGCGCGACCTTCTGGGTGGTGACCCTGATCGGCGTGGTCGCCTTCGTCGGCCTGGCCAGGGTGCTGCCAAACGACCGCGAGGAAGAGAAGGTCGACCTGCGCCAGGAAATGTCCGCGCTGAAGAACCCGTCGCTGTGGCTGGCCCTGGGCACCACCGTGCTGTTCGCCGCCTCGATGTTCGCCCTGTTCACCTACGTCGCGCCGCTGCTCGGCGAAGTCACCGGGGTCAGCCCGCGCGGCGTGACCTGGACCCTGCTGCTGATCGGCGTCGGCCTGACCGTCGGCAACGTGATCGGCGGCCGCCTCGCCGACTGGCGCCTGGGCACCACCATGGCCGCGGTGTTCGCCGCCATGGCGCTGGTCTCGGCGCTGTTCAGCTGGACCAGCCAGGCCCTGCTGCCGGCGGAAATCACCCTGTTCTTCTGGGCCGCCGCCGCCTTCGCCGCGGTGCCCGCACTGCAGGTCAACGTGGTCCGCGTCGGGCATGCCGCGCCGAACCTGGTGGCGACCCTGAACATCGGCGCCTTCAACGTCGGCAACGCGCTCGGCGCCTGGGTCGGCGGCAGCGTCATCGACCACGGCCTGGGCCTGACCCGCGTGCCGCTGGCCGCCGCCGCGCTGGCCGCGCTGGCGCTGCTGGCCACCCTGATCGCCTTTTCCGGAAACGGCCGCGCCCAGGCGCAGCCGGTCCTCGATTGATCCCCTGTCCGTCTATTCCCTAGTGGAGGATGTTCCATGGCTACTCTGTTCGATCCCATCGTGCTCGGCGACCTCGAGCTGCCCAACCGCATCGTCATGGCGCCGCTGACCCGCTGCCGCGCCGATGAAGGCCGCGTGCCCAACGCCCTGATGGCCGAGTACTACGCCCAGCGCGCCGATGCCGGCCTGATCCTCAGCGAGGCCACCGCGGTCACGCCGATGGGCGTGGGCTACCCCGACACCCCCGGCATCTGGTCCGATGACCAGGTCCGTGGCTGGAGCAACGTGACCAAGGCGGTGCACGCCGCCGGCGGGCGCATCTTCCTGCAACTCTGGCACGTCGGGCGGATTTCCGATCCGCTCTACCTGAATGGCGAGCTGCCGGTGGCGCCGAGCGCCATCGCCGCCGAGGGGCACGTCAGCCTGGTGCGCCCGAAGCGTCCCTACGTCACCCCGCGCGCGCTGGACACCGAGGAGATCGCCGACATCGTCGAGGCCTACCGCCAGGGCGCCGAGCGGGCCAAGGCCGCCGGCTTCGACGGCGTGGAGATCCACGGCGCCAACGGCTACCTGCTCGACCAGTTCCTCCAGGACAGCACCAACAAGCGCACCGACCGCTACGGCGGCTCCATCGAGAACCGCGCGCGCCTGCTGCTGGAGGTCACCGACGCGGCGATCTCCGTATGGGGCGCCCAGCGCGTCGGCGTACACCTGGCGCCGCGTGCCGACTCCCACGACATGGGCGATTCCAACCGCCTGGAAACCTTCAGCCATGTCGCCCGTGAGCTGGGCAAGCGCGGCATCGCCTTCATCTGCGCCCGCGAAGCGCAGGCCGACGACAGCATCGGCGTAGCCCTGAAGAAAGCCTTCGGCGGACCCTACATCGCCAACGAGCAGTTCACCCTCGACAGCGCCAACGCCATCCTGGCCAAGGGCGACGCCGATGCGGTGGCCTTCGGCGTCCCCTTCATCGCCAACCCGGACCTGGTCGAACGCCTGCGCCAGGGCGCCGAACTGAACCCGCCGCGCCCGGAAACCTTCTATACCGGCGGTACCGAAGGCTACCTGGACTATCCGACCCTGGCCTGATCGCGCCAGCCAGGAAGAGCCCGCCACGCTTGTCGTGGCGGGCTCTGTGCTTTGCCCGTCTAGCGCAGGCGGAAATACGGCTCCGGCTCCGCGCACGGATCGCGCTGGGCGCAGCCCTTGCAACCGCCGCCGCAACCGCCCGAGCTTTGCGCAGCCACCCGCTCGACCTTGCCGAGCGCGATCAGCCGCTCCAGGATCGCCTCCACCAGCGCCGGCGGCGCGTCCACCTGGCGGCTGAGCTGTTTCAGCCCACGCGCCTGGCCGTCTTCCAGGGCGGCGCGGAGCTGCATGAGTGTCGCCATCAGTGACATCCTCCCTGCCCGCTCGGGGCATCGGCCATTCGGCCTGGCAGTTGCAGTACCTGCTCACGGCCGAACAGACGCAGGCAGGTCATCAGGATCAGGTTGAAGAGCAGCACCACGGCGATGGTCAGGACGCTACGCTCGGGGTGCGCGGCGAAGGTGGCGACCTGGTAGCAGAGGGTCGCCAGCGAGTAGGCGACGTTCAGCCCCCAGAGCACCGAGAAAGCCATCCAGCCCTTGCTGCTTTCCCGGGCGATCGCGCCCATGGCGGTCACGCAGGGCACGTAGAGCAGGACGAAGACCAGATAGCTGTAGGCCGCGATGGGGCTGCCGAACTTGCTCGCCATGGTGCCCATCGAGCCGGTCTCCATGTCGCCGTCGGCCATGCTCGCCTCCACCGGGTTGGCCAGCACGCTGAGGCTGAAGGTATCGACCAGGCCGTCCCAGGTCTCGACCAGGGCCTCGCGCAGTTGCCCGGGCAGGTCGTAGCCTTCGTAGTCGAAGGCCTCGCCCTGGATCTGCTCGGCGGTGTAGAGGGTGTTCAGGGTGCCGACCACCACTTCCTTGGCCATCGCCCCGGTGACCAGGCCGACGGTGGCCTGCCAGTTGTCCGGCTGCACCCCGAGCGGCGCCAGCAGCGGAGTGAGACGCTGGCTGACGCTGGCCAGCGCGGAATGGCCGATGTCGCCCTGCACCGGCTTGCCGTCGAGGGTGATGCTGTTCAGGCCGCCGATCACCAGGCTGACCAGGATGATCACCTTGCCGGCCCGCACCACGAAGCCGCGCAGGCGCGACCAGGTCTGCAGCAGCAGGCTCTTAAGGTGTGGCACATGGTACAGCGGCAGTTCCATGACGAACGGCGAGGCCTCGCCGCGCATCAAGGTGTGCTTGAGCAACAGCCCGGTGAGGATCGCCACGACGATGCCGAGCAGGTACAGGGAGAAGATCACCAGCGCGCCGCCCTGGCCGAAGAAGGCCCCGGCGAATACGGCGAAGATCGCCAGGCGCGCGCCGCAGGACATGAACGGCGCCATCATGATGGTAATCAGCCGCTCGCGCTGCGCGTCCAGGGTCCGCGCGCCCATGATCGACGGTACGTTGCAGCCGAAACCGACGATCAGCGGCACGAACGACTTGCCCGGCAGGCCCAGCGCCTGCATCAGGCGGTCCATGACGAAGGCCGCGCGTGCCATGTAGCCGGAATCCTCGAGCAGCGAGAGGAACAGGTACATCAAGCCGATCTGCGGCACCAGCGGCAACACCGTGTTGACCCCGCCACCGATGCCCTGGGCGAGGAACACCGTGAGCCAATCGGGCAGGCCGAAGCGGATACCCAGCCACTGGATGCCGTCGATGAAGATCGCCGACGAGCCCTTGTCGAAGATCGGCTGTAGCGCCCCGCCGATGTTGATGGCGAAGAAGAACATCAGGTACATCACCAGCAGGAAGATCGGCAGACCCAGCCAGCGATTCAGGACGACCCGGTCGAGCCATTGGGTCAGCCGGTGCGGCTGAGCCTGCTGGTGGTCGCAGACCGCCGCACAGATCTCGCCGATCAGTCGGTAGCGCGCATCGACGATCGCCAGCTCGGGCTCCTCGCCGCAGCCCTGGCGCGCCTGTTCCAGGGTGGCGGGCGGCAAGCCCAGCGCCGGGCCGTTGAAGATGTCGCCTTCCAGCGCCTGTAACGCCAGCCAGCGCGGCTCGATGGCCGAGGCCGCCGGAGCGCGCGTTTCCAGAAGGTAGCCGACCTGCGCCTGGATGGCCGGTGGGTAATCCACCGCCAGCGCCGCCTGCGGGAGCTGTAGGGAGTCGATGGCCGCCTTCAGTTCGTCGATGCCATCGGCGCGGGTGGATACCAGCGGCACCACCGGGCAGCCGAGACGCCGCGCCAGACCGTCGATATCGATGCGGATGCGCTGGCTACGGGCGATGTCGAGCATGTTCAGCGCGACGATGCAGGGGATGCCCATCTCCCGCAGTTGCACCGTGAGGTACAGGTTGCGCTCCAGGTTCGCCGCATCCACCACGTTGACCAGCACGTCCACTTCGCCGCTGGCGATGTAGCGGCAGGCGATCTGCTCGTCCAGCGAGGCCTGGGCGGAAACGCTGGTCAACGAATAGGTGCCGGGCAGGTCGACCAGGCGTACCGCGTGACGGACGGTATGGAACGCACCCTCCTTGCGCTCCACGGTGACGCCGGCCCAGTTGCCGACCCGCTGGCGCGAGCCGGTGAGCTGGTTGAACAGGGTGGTCTTGCCGGAGTTGGGATTGCCGATCAGGCCGAGGGTCAATGCGGTCATGGGGATTCCGGGCGGGGTCAGTCGAGCGGGACGAGGGTCAGCAGGGCGAGGTCCTTGCGCCGCAGCGCCAGGCTGGTCTGGCGGGTCTCGACCTGGATCGGGTCGCCCAGCGGGGCGATGCGCACCACGCGCAGCGCCGCGCCGGGCAGAAGCCCCATGGAGAACAGGCGTTGGCGATAGCCGTTGCTGATGGCGGGGGAATAACCGGTGATGCGGTAGGAACGGGACGGTTGCAATGCGCTCATGGCGGATTCCAGGCAGAGGAAGGGACCAATGGCCTCCCTGGCCGGTGACCGAGAATCTAAACGACTCTCACTTAAGGCTCGCTTAAGGACGCAGTATCGGAGTGGAGGAGCAGCGGATTCGCTGATCCAGAGCAATGGAAGCGGGGATTTTCGCCGGGCGCGGCGAACTGTCGCCGCCAGCGCCCGGTGCGACGGGCGGCTGGCGGCGCAAGGGGTCAGCGGGTATTGAGCTGCTGCTGCAGGTTCTGCACCTGGGCCTGGAGGGTGTTGATGTTGCGCATCACCTGGGTGCGGAAGGCGTCGAACTCGGCGGTATTGCCGCCCTGGGCCGGGCGGTTGTCGAGCTGGCTGCGCAATACCAGCAGGTCATCCTGGATACCCTTGATCGTCTGGCTCGGGTTGCCCTGCTTCTTCAGCGCCGCGACATCGCCGCCGAGGGCCTTGAGTTGCTCGTTGGTCTTGCCCAGTTCGACCTGCAGGGCTTTCAGCTTCTCCTGTTCGGCGGCCAGTGTCTGCAGCTTGCCGTCCAGTTGCGCGACCAGTTGCGCCGCGCTTTCCTGCTGGGCCTTGAGATCGCTGGCCAGTTGGTCGAGGCGTTTGCCCTGGCCGCCGAACTGGTTGCTCACATCGGCCTGCTGGCGTTCCTGGCTGCCGAGTTTCTCCTGCAGTTGCTTGACCTGCAGGCGCAGCGCCTCGCGTTCGCTGGTGACGCTGGACTCGCTGGCGACCACCTTGCCGCGAATGTCGTCGAGGCGCCCGGCGGCTTCTTCGCTGATCCTTACGAAGCTTTCCTGGGTCGCCACCAACTGCTGTTCCATCAGGCTGATTTGCTGGTGGCTCCACCAACCCACCCCGGCGAGGGCGATCAGCATCGCCCCGAGCAGGGCCCAGAGCGGGCCGGTAGAGGCCGGACGGCGCTCCTTGGTGTCGACGACCCGGCGCTGCTCCGGTTCGTAGACGTGCCGCGGGTGGTGGCCGAATTCATCACGGTCGGCAGCATCGGTGGTCAGGCTGGGCACATCGTCGAGTTCGTCGAAAGCATCGTTACGCATGGTGAAAACCTGTGAAGGGGAACGGCATCCTGCCGGAGGCCTGGAGAAAGGGCCGAATGGCGCATTATATCGGTTCGGCCCCGCCCCTTCAGCGTCTGCTCGAAGCAAATTGCCGGACCGGGGCGCTCCACCGCGAAGCCGTGCGCCCCAAGACGGAGCGAACGCTGTCCGGCACAGACCGCCGCAGGCCGCCAGCGGTTCCCCGCCCCGCACGCCGGCGCTGGTCCGCGGCCCGCATGGCACGTCGCTTGCTTGGGCATTGCCGGGCAGATTCGAGAGCCGGAGGATGCATGGAGTTGAACAAATCCTTGCTGGATTGCATGCGCGCCGTGCGCCGACGACTACGCGAGGAACAAGCGCTGGATATCCACTTCCAGCAACGCGATGCGATCGCCGCGATGCAGGCGGCCTGCGCCCGCTCCGGCGATGCGACGACGCGGGAGCTGGGACAGCGCCTCGGCAGGCTCAGCGGGGTCGCGCTGCCGCCGGCGGAACCTTCCCTGTTGCCCGCGCAGGCGCCGAGCCGACAATACGCCGGCCCGCTGCGCGGCTGATCAGAGTCCCTGGGCGCGCCACCAGGCGCAGAACTCGTCGAGCGCGGTCCACAGGCTGACCCGCGGGTCGTAGTCGAGGTATTCGCGGGCGCGGTTGATGTCCAGGGTGAAGTTCTTCGCCATCACCGCCATGCCCAGGCGGAACAGCACCGGCTCCGGGCGCCCCGGCAGGATCCGGCAGACGCCTTCGTTGAGCGCCGCCAGGCCATAGCCGACGGCATACGGCAGGTGACCGCCGACCGGCGGCAGGTCGAGCTGGCGCATCACGTAGTTGACCGCATCCCAGAACGGTACCGGCTGCCCGTTGCTGATGTTGTAGACCTTGCCCAGCGCCGGCTCGCCGGCCAGCAGGCAACTGAACAGCGCGTCGTTGAGATTGTGCACGCTGGTGAAGTCGACCCGGTTCAGGCCGTTGCCGAGGATCCGCAGGCGTCCCTTGCGGTGGGCCTGGATCATTCGCGGGAAGATGCTGGTGTCGCCGGCGCCGACCACGAAGCGCGGACGCAGCGCCAACACTTCCAGGCCCAGGTCGCGGGCGCTGAGCACCAGTTGCTCGGCCTGGTACTTGGTCGCCCCGTAATGGTCGGAAAAACGCCGGGGTACGTACTCTTCGTTCAGGTCCAGGTGGTCGCGCCCGTCGAAATAGATCGACGGCGACGACAGATGCACCAGGCGCCGCACCTTTTGCCGCATGCAGGCCTCGACGACGCTCTCGGCAAGCCCGACGTTGGCCGCCAGGAAGCGTTCGCGCGGTCCCCAGACTCCGACCGCGCCGGCACAATGCACCACCGCCTCGACATCCTCGCAGAGGCGCAGGACCAGCGCCGGGTCGGCGAGGTCGCCGGGAACGAACTCGGCGCCCCGCGCCACCAGATGTTCCACCGCGTCCGCACGACGACCGCTGACCCGTACCGACAGGCCCTGCTCCAGGGCGAAGCGAGCGAAACGCCCGCCGATGAAGCCCGTCGCCCCCGTTACCAGAATCCGCATTGCGTTTCTCCCGTTGTTCTTCTTGTGCCGCGACTCTAGCAACCCGGAGCCGCGCCACCACTGGCACAGCATGCCAGAACAACGACCTTGCGGGCCGCCGGCACGACTTGCGTTGGGCGACAGAAGCGGCGCTGCGCGCTACAAGGTCTAACCTGAAAGCCACCTGCACGCTGGGAGACGCACCATGGCCAGTCACTGGATGATCTACGGCGCCAACGGCTACACCGGCCGGCTGGTCGCTGAACAGGCGCAGCGCGAGGGCCTGACGCCGCTCCTCGGCGGACGCAACCCGGCCGCCCTGCACGCCCTCGGCAGTCAGCTCGGGCTGGAGTGCCGGGTCTTCGACCTGGGGGATCCGCAGGCCTGCCGCGAAGCCCTGGACCAGGTGAAGGTGGTGGCCCATTGCGCCGGTCCGTTCTCCGCCACCAGCACGCCGATGATCGCCGCCTGCCGCGCCGCCGGCACCCACTACGTGGATATCACTGGAGAGATCGCGGTGTTCGAGCAGGCCCACGCCGGCGACGCCGAGGCCCGCGAGGCCGGCATCGTGGTCTGCCCGGGCGTGGGCTTCGATGTGATCCCCACCGATTGCCTGGCCGCCTGCCTGAAGGAGGCGCTGCCGGACGCGCAACGGCTGGCGCTGGGCTTCGTCACCGGCAGCGGGCTTTCGACGGGAACCGCGAAGACCAGCGTCGAAGGCCTCAAGTTCGGCGGCAAGATCCGCGAGAATGGCCGCCTGCGCGACGTGCCGTTGGGCTACAAGCGCCGCGACATCGATTTCGGCCGCGGCCTGCGGCACGCCGTGACGATTCCCTGGGGCGACGTGGCCACCGCCTACTACAGCACCGGCATCCCGGACATCGAGGTCTACCTGCCGGCGCCGCCGCTGCTGGCCCTGGGCATGCGCCTGATCGATCCGCTGCGTCCGCTGCTCGGTCGGCAACGGGTCCAGGACTGGCTCAAGGGGCAGGTCGACAAACGCATCGCCGGCCCCGACCAGGCGGCCCGCGAGCGCCTGCGCACCTGGGTCTGGGGCGAGGCGCGCAACGCCCGCGGCGAACGCCGCACGGCGCGCCTGGAGACGGCCAACGTGTACGACCTGACCCTGCACGGCGTGCTCCTGGCAGTGCGCCACCTGCTGGACTACCAGGGTCCCGGCGGCTATTTCACGCCATCGCGGTTGCTCGGCGCGCGCTGCGTCGAATCGTTGCCCGGCTCCGGACGAATCACCGTCATCGGCTGATCAGCCTGACACCCGCACCAAGCCGTCGCCGGCGCGGGCGAGCAGGTGCTCGGTGAGCGACGCGAGCAGTTCGCCGCCGTTGCGCCAGTAGTGCCAGTACAACGGTACGTCGATGACCTGGCCGGGCAGCAGCTCGACCAGTTCACCGCGCGCCAGCTCGCCTTGCACCTGGCGCTCCGGCACCAGGCCCCAGCCGAGACCGCCGGCGGTGAGGCGGACGAAACCTTCCGAGGACGGGCAGAGGTGGTGGATGAAACCGCCCTCGACGCCGAGGTCCTTGAGAAAGCGGTGTTGCAGCAGGTCGTCCGGGCCGAACACGATCGCCGGCACGCCGGCCAGGGCGGCCGCCTCGACCCCGCGGGGAAAGTGCCGGGCAATGAAATCCGGGCTGGCCAGGCCGCGATAACGCATGGCTCCGAGCAGCAGCGAACGCGCGCCGGCCACCGGCCGCGCGCTACCGCAGACGCAGCCCGCCACTTCGCCCGCGCGCATGCGCTTGAGCCCGACCTCCTGGTCCTCCACCACAAGGTCCAGCAGCACCCGCCGCTCGGCGCAGAAATCGCCCACCGCGGCGGCCCACCAGGTCGCCAGGCTATCGGCGTTGAGAGCGATGCGCAGGCGTTCCGGGGCACCGCCCTCATCCAGGTTCGGCACCCAGCGCTGCAGGTCGCCCTCCAGCAGCCGCACCTGCTGCACATGGTTGAGCAGGCGGCGCCCGAGATCGGTGGGATGCGGCGGCGTCTCGCGCACCAGGACCGGCTGGCCGACCCGCGCCTCGAGCAGCTTGATCCGCTGCGATACCGCCGACTGCGACAGCCCCAGCGCCTGTGCGGCGCGCTCGAAACCGCCCTGCTCCACCACTGCGGCCAGCGCGGCGAGCAACTTGTAGTCGAACAAAATCAGTTTTCCTAATGAGAGATCAGCAGGATTCGTTTTCCTTATAAACCGCCCGCGGCCACACTCGCCAGCACTTTTCCGATCCACCCGACGCGGACCGACCATGGCTGGCGAAACCTCGCTGCGCACCCTGCTGCGCACGCTCACTCCCGAACTCAATCCCGGCGACTATGTGTTCTGTACCTGCGCCGCTGGCGCGCTGCCGAACGGCGCCGAGCCGCTCGCCAGCTTCCGCGAGCGCGAGGGCCTGACCCTGGTGCTCGAACGCCGGCAGGCCGAACGCCTGGGCCTGCCCTACGAATACGTGGCGGCCTGGATCACCCTGACGGTGCACTCCTCGTTGGCGGCGGTGGGCCTGACCGCCGCCTTCGCCACGGCGCTGGCCGAAGCCGGGATCAGTTGCAACGTGATGGCCGGCTATTTCCACGACCACCTGTTCGTCGCCCGCGACGAGGGCCGGCGCGCCCTCGCCGTGCTCCAGCGGCTGGCGGCGGAGGCACACTGACATGTGGCAGAGCTATCTCAACGGCATCCTGGTGGCTGCCGGCCTGATCATCGCCATCGGCGCGCAGAATGCCTTCGTCCTCGCGCAGAGCCTGCGCCGCGAGCATCACCTCTCGGTAGCCGCGCTCTGCGTGTTCTGCGACGCGGTGCTGGTCAGCCTCGGCGTGTTCGGCCTGGCCAAGCTGCTGCTGGAAAACCCGACGCTGCTGGCCATCGCCCGCTGGGGCGGGATCGCCTTCCTGACCTGGTACGGGCTCAAGGCGCTGCTTCGCGCGTTGCGCCCGGACGCCCTCGGCAACGCCGCGGAAACCGGGCCACGCTCGCGCAAGGCGGTGCTTCTGGCGGCACTGGCGGTCACCCTGCTCAACCCCCACGTCTATCTCGATACCGTACTTCTTATCGGTTCGCTCGGCGCCCAGCAGGCCGCGCCGGGCGCCTATGCCCTCGGCGCGGCCAGCGCCTCGCTGATGTGGTTCTTCGCCCTCGCCCTCGGCGCGGCATGGCTGGCCCCCTGGCTGGCGCGCCCGGCCACCTGGCGCCTGCTCGACCTGATGGTGGCGGCCATGATGCTGGGCATGGCCGCGCAACTGCTGTTCCGGGGATAACGCGGTTTCTGTCTTCCCGAGCGAACCCTCGCGCCGCCAGGCAGTCCAGAGAGAATGTGGCGGTGTTCGCCGGCGAGCTTGCAGAAGCCCGTCGCAGCGCGCCGCCGCCGGCCTTTGCCCCTACAGTTGCTGCGTGGATATGCCCCGACACGGGTGCTATGATCCGGAGTTCGCGGCACTGGGAGCCTAGGCTCCGAGTCGCACCCGGCGCCCTTTCCGGAGCGCTCCGCCGAACGTCCCCCCCGGGGATCCAGGACGGGCTCCCGGACAGGCATCGGACCGGCCCCGTGAACCGGTCGCGCGCTAGCCGCGCCAGTCCTGACCTGAGGAAGAATAGGAGAGACACCATGGCTTTCGAATTGCCGCCGCTGCCTTACGAAAAGAACGCCCTTGAGCCGCACATTTCCGCAGAAACCCTGGAGTACCACCACGACAAGCACCACAACACCTACGTGGTGAACCTGAACAACCTGATCCCGGGCACCGAGTTCGAAGGCAAGAGCCTCGAAGAGATCGTCAAGAGCTCCTCCGGCGGCATCTTCAACAACGCCGCCCAGGTGTGGAACCACACCTTCTACTGGAACTGCCTGAGCCCGAACGGCGGTGGCCAGCCCACCGGCGCCCTGGCCGACGCCATCAACGCCGCCTTCGGCTCCTTCGACAAGTTCAAGGAAGAGTTCACCAAGACTTCCGTCGGCACCTTCGGTTCCGGCTGGGGCTGGCTGGTGAAGAAGGCCGACGGCTCCCTGGCCCTGGCCAGCACCATCGGTGCCGGCAACCCGCTGACCAGCGGCGACACCCCGCTGCTGACCTGCGACGTCTGGGAACACGCCTACTACATCGACTACCGCAACCTGCGTCCGAAGTACGTCGAGGCGTTCTGGAACCTGGTCAACTGGGACTTCGTAGCGAAGAATTTCGCTGCCTGAGTCTGATCAGAACCACGAAAAAGCCCGGCATTCGCCGGGCTTTTTCGTTTTAATCTGCGGTCCAGGAGCCATCGTCTACCCTCAGGAACTGGTAGACCTTCGCTCGTGATAGCCTCTTGAGGTAAACAAGCTCTTCCTACCGGCTTGTCTTTCGCCCTGGCGACGAACAAGGAAGGCCCCTTGAAACTGGACTCCCGACACAGCCTCTCGCTCAAGCTGCTGCGTGTCGTGCTACTGGCGGCGCTAGCCGTTGGCGTAGTGCTCAGTTGCGCGCAGATCGTCTTCGACGCCTACAAGGCCAAGCAGGCGGTGAGCAGCGACGCCCAACGCATCCTCGCGATGGTCCGCGACCCTTCCACCCAGGCGGTCTACAGCCTCGACCGGGAAATGGCGATGCAGGTCCTCGAAGGCCTGTTCCAGCACGAGGCGGTGCGCCAGGCGAGCATCGGCCATCCCGGCGAGCCAATGCTGGCGGAGAAATCCAGGCCGCTGCTGGACCTGCCGACGCGCTGGCTGACCGACCCGATCCTCGGCCAGGAACGCACCTTCAGCATTCGACTGATCGGCCGCCCGCCCTATAGCGAATACTACGGCGACCTGAAGATCACCCTGGACACCGCGCCCTACGGCGAGAACTTCGTCACCACTTCGGAAATCATCTTCATCTCCGGCATTCTCCGCGCCCTGGCCATGGGCCTGGTGCTGTTCCTGGTCTACCACTGGATGCTGACCAAGCCGCTGTCGAAGATCATCGAGCACCTGGTCAGCATCAATCCCGACCGCCCCAGCCAGCACCAGTTGCCGCTGCTCAAGGGGCACGAACGCAACGAGCTGGGGCTCTGGGTGACCACCGCCAACCAGTTGCTCGCCTCGATCGAAAGCAACAGCCACCTGCGCCGCGAGGCCGAGGACAACCTGCTGCGCATTTCCCAGTACGACTTCCTCACCGGCCTGCCGAACCGCCAGTTGCTGCAGCAGCAACTCGACCAGATCCTCGACGGCGCCGGCCGCCAGCAGCGCCGGGTGGCAGTGCTGTGCCTGGGCCTCGACGATTTCAAGGGGATCAACGAGCAGTACACCTACCAGCTCGGCGACCAGCTGCTGATCGCCCTCGCCGACCGCCTGCGCGGGCACAGCGCGCGGCTCGGTTCGCTGGCGCGCCTGGGCGGCGACCAGTTCGCCCTGGTCCAGGCCGACATCGAGCAACCCTACGAGGCGGCCGAACTGGCGCAGAGCATCCTCGACGGCCTGGAAGCGCCGTTCGAGATCGACCAGCACGAGGTGCGCCTGCGCGCCACCATCGGTATCACCCTGTTCCCCGAGGACGGCGAGACCACGGAGAAACTGCTGCAGAAGGCCGAGCAGACCATGACCCTGGCCAAGACCCGCTCGCGCAACCGCTACCAGTTCTACATCGCCAGCGTGGACAGCGAGATGCGTCGCCGCCGGGAACTGGAAAAGGACCTGCGCGACGCCCTGCAGCGCCACGAGCTGCACCTCGTCTACCAGCCGCAGGTGGACTACCGCGACCACCGCGTGGTCGGCGTCGAGGCGCTGCTGCGCTGGCAACATCCGTTGCACGGCTTCGTCCCGCCGGACCTGTTCATCCCGCTGGCGGAACAGAACGGCAGCATCTTCAGCATCGGCGAGTGGGTGCTCGACCAGGCCTGCCGGCAGTTGCGCGAATGGCACGACCAGGGCTTCGACGACCTGCGCATGGCGGTCAATCTTTCCACCGTGCAGCTCCACCACAACGCCCTGCCACGGGTGGTCAGCAACCTGCTGCAGGTCTACCGCCTGCCGGCGCGCAGCCTGGAGCTGGAAGTCACCGAGACCGGCCTGATGGAGGACATCTCCACTGCCGCCCAGCACCTCCTCAGCCTGCGCCGCGCCGGCGCGCTGATCGCCATCGACGATTTCGGCACCGGCTATTCCTCGCTGAGCTACCTGAAGAGCCTGCCGCTGGACAAGATCAAGATCGACAAGAGTTTCGTCCAGGACCTGCTGCAGGACGAGGACGACGCGACCATCGTTCGCGCCATCATCCAGCTCGGCAAGAGCCTGGGCATGCAGGTGATCGCCGAGGGCGTGGAAACCGCCGAGCAGGAGGCGTACATCATCGCCGAAGGTTGCAACGAAGGTCAGGGCTACCTGTACAGCAAGCCGCTGCCGGCCAGGGAACTGACCCAGTACCTCAAGCAGGCGCGACGCCTGAGCCAGGCCACCAGCAGCGAACGGCCCTGATCAGAACAGGCGGCTGAACAGCCAGCCCGCCAATAGCGCGCTGCCCAGGCAGAGCAACGCGGCCCCCGCCGCCTGGCGGCGCAGTTGCACGGCGAGATCGTCCTCGCCCTGGCTGGAGAGAATGAAGGGATGCGCCTCCCCCGGCTTGCCGAGCCGGTGGCGGGTCGGCGCGGCGCTGCTTGCCCGATGCCGATCCTCGGCCTCCAGCCGTGCCGCCAGGCGCACCCGGTTCCATTCGCGCTCGTCCAGCTCGCCGTTGCCGTCGCTGTCGAAGCGCGCCAGCAGGCCGTGGAAATCGCCCTTCCATTCACGGATCACCGCACCCTGCGCCCGTTCGGCATCGAGTCCCTGGCGACCGCCACCGCTGCTGCGGAATTCGCCCAGCGCATAGAGCGGCTCGCCGGCATGCAGGCGCTCCTCGGTGTAGCGGTAGAGCCGCTCTCCGCCGACCAGGGTGCCGAAGCTCTCCAGCAGTCGCAGGGTATCGATCGGTGCTTCGCGAAAAGCCTCCCAGCGCTGCAGGGTCAAGGGTCGCACCTCGGCGCCGCGTGGGTCGACCAGGCAGGCATCGGTGGCATCGCGCAAGCCGAATGGCGACTCGCTGGCACCTTTGTCGACGGTGCGCCAGGCCTTCTCGCGGCCACTGCGCTTTTCCTCGACTTCGATGCGGTAGCGCCACCACAGGCACGGCTTGCCGGTCAAGGGAGCCTGCAGCGGGCCTTCCGGGCCCTCCTCGAGCACCCCGTAGAGTTCGACGAAACCCTGCGCCGCCGAACGGATCCGCGAGGTCGGTACGTCGCCGAGCAGGCGCGCCCGGGCAAGGCGGTGCACGAACTGCCAGCCGGCCCAGGCGCAGAGCCCCAGGCTGGCGGCGATGAACAGCCAGCGCAGGTCGAAGTCCATCTCAGCCGAACAGGGCCTTGAGATCGACGTCGGCCTTCTCCGCCTCGCTGAACTGCAACAGCTCGGCGGCCTTGAAGCCAGCGACACGAGCGATCAGCAGGTCGGGAAATTGCTCGATGCGCACGTTGTTCAGGTTCACCGCCTCGTTGTACAGCTCGCGGCGATCGGCGATCCCGTTCTCCAGGCCGCTGATGCGCTGGGAAAGGAACTGGAAACTTTCGTTGGCCTTGAGCTGCGGGTAGTTCTCGGCCAGCGCGAAGAGCTGCCCCAGGCCGGCGCGCAGGCCGCTCTCGGCCTTGCCCAGGGCATTCACGTCGGCGCGCTCGCGGGCGCTGGCGACGGCGTTGCGGGCGGCGATGACCCGCTCCAGGGTGGCGCGCTCGTGCTGCATGTATTGCTTGCAGGCCTCGACCAGCTTCGGCAACTCGTCATGGCGCTGCTTGAGGAGCACGTCGATGTTGGCCCAGGCCTTGCCCACGGCGTGCTTCAGCCGTACCAGGCCGTTGTAGAGCACCACCGCGTAGCCGGCGAGCAGCAGCAGGACAACCCAGAAAGCGATAGCGGTCAGACTCATGGACGGAGCTCCGGACGAAGATACTGGCATTCTAGCTGCGCCCGACATGTCCAGGCACGACGGCGATCACGCTGCGCCCTTTTCAAAAATGCAAATCTTTCGCATTATGTGGCGGTTTTTTAACGCCCTCGCGGGTGTTTCCGACAAGCGCACGCCGCTTCGGCACGCCACCAATAAAGCAAAGGAATCCGCCATGACTCGTATGCCCCTGGCCACCGCCAGTCTGCTGGCCCTCGCCATCTCCCTCGCCGGCTGCGGCGACGACAAGAAAGCCGAAGCGCCCGCAACACCGGCGGCCAGCACGCAGCCCGCCGCGCCCGCCGCCGCCCCGGCCGCCAAGGTCGACGAGGCCGCCGCCAAGGCGGTGATCAAGAACTACGCGGACCTCGCCGAAGCCACCTTCGCCGATGCCCTGAGCACCGCCAAGGACCTGCAGAAAGCCATCGACGCGTTCCTCGCCAAGCCTGACGCGGAGACCCTGAAAGCCGCCAAGGAAGCCTGGTTCGCCGCTCGTACCCCCTACTCCCAGAGCGAAGCCTTCCGCTTCGGCAACGCGATCATCGACGACTGGGAAGGACAGGTTAACGCCTGGCCGCTGGACGAAGGCCTGATCGACTATGTCGCCAAGGACTACCAGCACGCCCTGGGCAACCCCGGCGCCACCGCCAACATCGTCGCCAACACCGAGATCCAGGTCGGCGAAGACAAGATCGACGTCAAGGAAATCACCGGCGAGAAACTGGCCAGCCTGAACGAGCTGGGCGGTTCCGAAGCCAACGTCGCCACCGGCTACCACGCCATCGAGTTCCTCCTCTGGGGCCAGGACCTGAACGGCACCGGTCCCGGCGCCGGCAATCGTCCGGCCACCGACTATGCCCAGGGCAAGGACTGCACCGGCGGCCATTGCGACCGTCGCGCCGCCTACCTGAAGGCCGTCACCGACCTGCTGGTCAGCGACCTCGAATACATGGCCGGACAGTGGAAAGCCGGCGTCGCCGACAACTACCGCGCCAAGCTGGAGGCCGAACCGGTGGATACCGGCCTGCGCAAGATGTTCTTCGGCATGGGCAGCCTGTCCCTCGGCGAACTGGCCGGCGAGCGCATGAAGGTCGCGCTGGAGGCCAACTCCACCGAAGACGAGCACGACTGCTTCAGCGACGACACCCACCACACCCTGTTCTTCAACGGCAAGAGCATCCGCAACATCTACCTCGGCGAGTACAAGCGCATCGACGGCAGCGTGGTCAAGGGCCCGAGCCTGGCCGACCTGGTCGCCAAGGCCGACGCCGCCGCCAACGACACCCTGAAGGCCGACCTGGCCGACACCGAGGCCAAGCTGCAGGCCATCGTCGACAGCGCCGAGAAGGATGGCGTGCACTTCGACCAGATGATCGCTCCGGACAACAAGGACGGCCAGCAGAAGATCCGCGACGCCATCGCCGCCCTGGTCAAGCAGACCGGCGCCATCGAGCAGGCCGCGGGCAAGCTGGGTATCCAGGACCTGAAGCCGGACAACGCCGACCACGAGTTCTGATTCCTTCCTGCGCCAACAGCGTGCGGCCCATGGCCGCACGCTGCGTTTCAGGGCCTCGAAAGCGGACTTCGCGGGCCTTTACAAACGCAAATCCCTCTTATTTTGATTAGCTCCAACTGGTAAGCTTGCGGGCTGTTTCTACGCAGTCCGGGATGTTCGCCGATGTCGTTGCCGTCACCTTCCATGCCGCTCGCCTGTCTCCTGACGGCGCTCCTTCTGGGCGGATGCGGCGCCGACGACGAACCGCTGCGCGCCGAGCCCGGCGAACACCTGTCCGGGGGCGCGACCACGGTGCTGCAAAGCGACCGCAACGCCTTCTCCCTGCCCTCCGCCAACCTCGCCCCGAGCCGGCGCCTGGACTTCAGCGTGGGCAACAGTTTCTTCCGCAACCCCTGGGTCATCGCCCCTTCCTCCACCACCGCGCGCGACGGTCTGGGACCGCTGTTCAATACCAACGCCTGCCAGAACTGCCATGTGAAGGACGGCCGCGGCCACCCGCCGGGAGCGGACGCGGTCAGCGCGGTGTCGATGCTGGTACGCCTGTCGATCCCGGCCGGCCCTGCGGACGGCAAGACCCTGTTGCACCAGGGGGTGATTCCCGAGCCGACCTACGGCGGCCAGTTGCAGGACGTCGGCATTCCCGGCGTCGCCCCGGAAGGCAAGGTGCGGGTGGACTACGAGCCGCTGAAGGTGAGGTTCGAGGACGGCACCGAGGTCGAACTGCGCAAGCCGATCCTGCGTATCAGCCAGCTCGGCTATGGGCCGATGCATCCGCAGACGATGTTTTCCGCCCGCGTCGCCCCGCCGATGATCGGGCTCGGCCTGCTCGAGGCGATTCCCGAAGAGGCGATCCTGGCCAATGCCGATCCGGACGACCGCAACGGCGACGGTATCCGCGGGCGCGCCAACCAGGTCTGGGATGCAGCGCGGCAGCGCACCGCGCTCGGCCGCTTCGGCTGGAAGGCCGGCCAGCCGGATATCCCGCAGCAGAACGCGCACGCCTTCGCCAACGACATGGGCCTGACCAGCAGCCTGCTGCCCCACGACGACTGCAGCGCCGCCCAGGTCGAGTGCCGCCGGGCGCCCGACGGCGGCGAGCCGGAAGTCAGCGACAACATCTTCGCCCAGGTGCTGTTCTACAGCCGCAACCTGGCGGTCCCGGCGCGGCGCAAGGTGGACGACCCGCAGGTGCTGGCCGGCAAGCGCCTGTTCGCCCAGGCCAACTGCGTGGCCTGCCACGTTCCCGCCTTCACTACCGGTTCCGACGCCAGCGAGCCGGAGCTGGCCAACCAGCGGATTCGTCCCTATTCCGACCTGTTGCTGCATGACATGGGCGACGGCTTGGCGGACAATCGCCCGGAATTTCTCGCCAGCGGGCGGGACTGGCGAACCCCGCCGCTATGGGGCATCGGCCTGACCGAAACGGTCAACGGCCACACCCAGTTCCTCCACGACGGGCGCGCCCGCAATCTGCTGGAGGCGATCCTCTGGCACGGCGGCGAAGCCGAAGCGGCCAAGCGGCACGTACTCGGCTTCGACGCCGACCAGCGCAGCGCCCTGCTGGCCTTCCTGAATTCACTCTGAGGAGCCCACATGTTCCGCCCACGACTGCTCTTGACCAGCCTCGCCATCGCCCTCGGCGCCTGCTCGCCGCAAGACCCGCAGGCCGTCACCAGTGCCGCCCTGGCGCAACAGGTGATCCTGCCGACCTACAGCCGCTGGGTGGAGGCCGACCAGGCCCTGGCCAGCAGCGCCCTGGCCTACTGCCAGGGCAAGGAAGACCTGGCCAAGGCCCGCGACGCGTTCCACGCCGCGCAGAAGGCCTGGGCCGAGCTGCAACCGCTGCTGATCGGCCCGCTGGCCGAGGGCAACCGCGCCTGGCAGGTGCAGTTCTGGCCGGACAAGAAGGACCTGGTCGGGCGCCAGGTCGAACAGTTGCTGAAGAACAACCCGCAGGTCGACGCCGCGGCCCTGGCCAAGGCCAGCGTGGTGGTGCAGGGGCTCTCCGCCTACGAATACATCCTCTTCGACAGCAAGATCGACCTCGCCGACGCCGCCACCAAGGCCCGCTATTGCCCGTTGCTGGAAGCCATCGGCACCCACCAGCAGCAACTGGCCCAGGACATCCTGGCGCGCTGGAAGAACGACGGCGGCATGCTCACCCAGATGAGCAAGTTCCCCAACGACCGCTACGCCGACGCCCACGAGGCGATCGCCGAGCTGCTGCGGGTCCAGGTCACCGCCCTGGACATGCTGAAGAAGAAACTCGGTACTCCGCTGGGCCGGCAGAGCAAGGGCATTCCCCAGCCCTACCAGGCCGAGGCCTGGCGCAGCAACGCCTCGCTGGCCAGCCTGGACGCCAGCCTGAGCGGCGCCCAGGCGCTCTGGGAAGGTATCGACGGCAAGGGCCTGAAGACCCTGCTGCCGGCCGAACAGAAGGACCTGGCCGGCAAGATCGACGCCGCCTACGCCGACAGCCACGCGAAGCTGGCGGCGCTGGAACAGAAACCGCTCTCCGAACTGCTCGCCAGCGAAGACGGGCGCAACCAGCTCAACGCCCTCTATGACAGCCTGAACGTGGTCCACCGCCTGCACGAAGGCGATCTGGCCCGCGCCCTCGGCGTGCAACTGGGCTTCAACGCCAACGATGGCGACTGAGGTCGGCACCATGCTGCGACGTCATGTGATCGGCCTGGGCAGCCTGCTGCTCGGCGCCTTGTCCTTCGGCGGCTGGAGCTTCAGCCGCCTGGGTAGCCAACCGCTGGTGCTATCGGCGCGCGACGACGCCGATGGCCAGCATTACGCGGTGGGCTACCGCCTGGACGGCAAGTGCCAGTTCGCCACCCGCGTGGCCCAGCGCTGCCACGACATCGTCCAGCACCCGAGCCTGCCGCTGGCGCTGTTCGTCGCCCGCCGGCCAGGCACCGAAAGCTACCTGATCGACCTGAACGACGGTCGCCTGCTGCAGACCCTGGTCTCGCAGAAAGACCGCCACTTCTACGGCCACGGCGTGTTCCACCAGAGCGGCGAGTGGCTCTACGCCACCGAGAACGACACTACCGATCCCGGTCGCGGCGTGCTCGGCGTCTACCGTTTCGACGGCGAGCGACTGCAGCACAGCGGCGAGATCTCCACCCACGGCCTCGGGCCGCACCAGGTTTCCTGGATGCCCGACGGCGAGACCCTGGTAGTGGCCAATGGCGGCATTCGCACCGAGGCGGAAAGCCGGGTCGAGATGAACCTCGACGCCATGGAGCCCAGCCTGGTGCTGATGCGCCGCGACGGCAACCTGCTGTCCAAGGAAACCCTGGCGCAGCAGATGAACAGCGTCCGCCACCTGGCGATCGGTCGCGACGGCACCATCGTCGCCGGCCAGCAGTTCATGGGCGATGCCCACGAACATGCCGACCTGCTGGCGATCAAGCGCCCCGGCCGTCCCTTCGAAGCCTTCCCGGTGGCCGAGGAGCAGCGCCTGGCGATGGCCCAGTACACCGCCAGCGTGGCGATCCACGACGACCTGCGCCTGGTGGCCCTGACCGCCCCGCGCGGCAACCGTTTCTTCATCTGGGACCTGGACAGCGGTGCCGTACGCCTCGACGCGCCGTTGCCGGACTGCGCCGGCGTGGGCGCGGTGAAGAACGGCTTCGTCGTCACGTCCGGACAGGGCCGTTGCCGCTTCTACGACTGCCAGGGCGAGCGCATCGCCGCGCAACCGCTGGAGCTGCCTGCCGGCCTCTGGGACAACCACCTGCACCTGGCCTGAGAGCACGGCATCAACCTCTCGCCATACCGGTCCGCGAAAGTCGGAAGCGACCTGCAACACGCTTGAAATCTTTCGCCAATCCTGTGCTTTGACAAGCCCTGAAAGCTGCACGTAATGTGTCGGATTCGCCCCGAAACGGGGCGCTCTCGGCATACCAGGGACCAGGATTATGTTGCTCCGCCGCATGTTGATCATGCTCGCCGCGGTGATCGCCGTGGTGGCGATTCTCGCCGGCTACAAGGTCTACTCCATCCGTCAGCAGATCGCCCTTTTCAGCGCACCGAAACCGCCGATCAGCGTGACCGCCAGCCTGGCCGAAAAGCGTCCCTGGCAGAGCCGCCTGCCAGCCATCGGCAGCCTCAAGGCATTCCAGGGCGTGACCCTCACCGCCGAAGTCTCCGGCACGGTACGCGACGTACTGTTCCTTTCCGGCGACCAGGTGAAGCTGGACCAACCGCTGATCCAGTTGGAAAGCGACGTCGAGGAAGCCACCCTGCGCACTGCCGAGGCCGATCTCGGCCTGGCCAGGGCCGAGTACCAGCGCGGCCGCGAACTGATCGGCAGCAAGGCCATCTCGAAAAGCGAATTCGATCGTCTCGCCGCGCAGTGGGCCAAGACCAGCGCCACCGTCGCCGAGCTGAAGGCGGCGCTGGCGAAGAAGCGCGTGCTCGCGCCCTTCGCCGGGACCATCGGCATCCGCCAGGTGGACGTCGGCGACTACGTCTCGCCCGGGACGCCGATCGCCACCTTGCAGGACCTTTCCACCCTGCTCCTGGATTTCCACCTGCCCGAGCAGGACTTCCCCCTGCTCAGCCGCGGCCAGCTGGTGAAGGTCCGGGTCGCCGCCTACCCCGGCCAGGTGTTCGACGCCGAGATCGCCGCCATCAACCCCAAGGTCGACAACGAGACCCGCAACCTGCAGGTCCGCGCTGCCCTGGAGAACCCGGACGGCAAGCTGCTGCCGGGCATGTTCGCCAACCTCGAGGTGATGTTGCCTGGCGAGGAACAACGCGTCGTGGTGCCGGAGACGGCGATCACCTTCACCCTCTACGGCGACTCGATCTACGTCGTCGGGCAGAAGAAGGACGAGCAGGGCCAGGTGTCGAAGGATGACAAGGGCCAGCCGCAACGGGTCGTCGAGCGCCGCTTCGTCAGGATCGGCGAACGCCGCGAAGGCCTGGCGGTGGTGCTCGAAGGCCTGGAGGGCGGCGAGCAGGTAGTGACTTCCGGGCAACTGAAGCTCGACAACGGCGCCGCGGTGGCCATCGTCGCCGAGCGGGACCTCCAGCAAGAGCACTGAGTCGCGCGCCTTCCCACTCCGTGGCGGAAGGCTTGCCAAGGGACTGAAACATGGCTTTTACCGATCCGTTCATCCGTCGTCCGGTCCTGGCGAGCGTGGTCAGCCTGCTGATCGTCCTGCTCGGCATGCAGGCCTTCAGCAAGCTGGTGATCCGCGAGTATCCGCAAATGGAGAACGCGCTGATCACGGTGACCACGCTCTACGCCGGCGCCAACGCGGAAACCATCCAGGGCTACATCACCCAGCCGCTGCAGCAGAGCCTGGCCAGCGCCGAAGGCATCGACTACATGACCTCGGTGAGCCGGCAGAACTATTCGACCATCTCCATCTACGCGCGGATCGGCGCCAATACCGATCGCCTGGTCACCGAGCTGCTGGCCAAGTCCAACGAAGTGAAGAGCCAGCTGCCGCCGGACGCCGAGGACCCGGTGCTGCAGAAGGAGGCCGCGGACGCCTCGGCGCTGATGTACATCAGCTTCTACAGCGAGCAGATGAACAACCCGCAGATCACCGACTACCTGTCGCGGGTGATCCAGCCCAAGCTGGCGACCCTGCCCGGTATCGCCGAGGCGGAGATCCTCGGCAACCAGGTGTTCGCCATGCGCCTGTGGCTGGACCCGGTGAAGATGGCCGCGTTCGGCGTCACCGCCGGCGAGATCAACCAGGCGGTGCAGCAGTACAACTTCCTCGCCGCCGCCGGCGAGGTGAAGGGCCAGTTGGTGGTCACCAGCGTCAATGCTTCCACCGACCTCAAGTCGCCCCAGGCCTTCGCCGCCATCCCGGTGAAGACCGACGGCGACCGCCGGGTGCTGATGGGTGATGTCGCACGGGTCGAGCTGGGCGCCGCCAGCTACGACGCGATCAGTTCGTTCAATGGGATTCCCTCGGTCTACATCGGCATCAAGGGCACGCCCAGCGCCAACCCGCTGGACGTGATCAAGGAAGTGCGGGCGAAGATGCCCGAACTGGAAGAGCAATTGCCGCCCAACCTCAAGGTGTCCATCGCCTACGACGCCACGCGCTTCATTCAGGCCTCCATCGATGAAGTGGTGAAGACCCTCGGCGAGGCGGTGCTGATCGTCATCGTGGTGGTGTTCCTGTTCCTCGGCGCGTTCCGTTCGGTACTGATCCCGGTGGTGACCATTCCGCTGTCGATGATCGGCGTATTGTTCTTCATGCAGGCCATGGGCTACTCGATCAACCTGCTGACCCTGCTGGCGATGGTCCTGGCCATCGGGCTGGTGGTGGACGACGCGATCGTGGTGGTGGAAAACATCCACCGCCACATCGAGGAGGGCAAGCCGCCCTTCGAGGCCGCCCTGGAGGGCGCGCGGGAGATCGCCGTACCGGTGGTCAGCATGACCATCACCCTCGCCGCGGTCTACGCGCCGATCGGTTTCCTCACCGGCCTCACCGGCGCCCTGTTCAAGGAGTTCGCCTTCACCCTGGCCGGCGCGGTGATCATTTCCGGGATCGTCGCCCTGACCCTGTCGCCGATGATGTGCTCGCGCCTGCTGCGCCACGAGGAGAATCCCTCGGGCCTGGCGCATCGCCTCGACCTGATCTTCGAGGGCCTGAAGCAACGCTACCAGCGCGCCCTCCACGGCACCCTGGACACCCGTCCGGTGGTCCTGGTGTTCGCCGTGCTGGTACTGGCGCTGATCCCGGTACTGCTGATGTTCACCAAGAAGGAGCTGGCGCCGGAAGAGGACCAGGGCATCGTGTTCCT diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/sim1_galaxy.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/sim1_galaxy.fasta Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,12526 @@ +>NODE_19_length_33054_cov_76.4496_ID_37 +GGCCTGTCCGGCTGACGTGCTGGCTGCGCGTGCTGAGTCCGCAGCATCAGCCGCATGGGT +TGCCGCCTCACGGGCTGATGTGCTGGCATCACTGGCTGACTTCTTCGCGGCTGCCGTGTT +CTGTGCCACCGCGGACGCGTTACGCGCCACCTCTTCCACCATCAGTTCAAAACGGCGCAG +TGCCTCCGGACGGGCATCATCCTCCGTCATGGCACCGAGAAAATCATTCAGCGTACCGGG +TTGAGAATCTTCATACACGGTAATGGTCCCGGCATGTGACGGCGGGAATCCTTCCACCAA +CAGAATAACGCTGTACTGACCGTACTCAACGTCCATGCTGTAACGCCCGGCTTCATCCGG +ATTTTCTGAGGCCAGCGTGTTCACCACCACCGTGGTGCTGTTACGTTTTGCTTTCAGCTG +GATTGTGCAGTTCTGTACCGGTTTTCCTGTGCCGTCTTTCAGTACACCTGAAATCTTTAC +TGCCATATTCACCCCACAAAAAAGCCCGCCTGAACCGGCGGGCTGTCATAACACTGTGTT +ACCTGGCTAATCAGAACTTATAACCGACACCCACGATGAAACCGTCAGTGCGCCAGTCGC +CACTGCCGGGGCCTTCATAAGCAATATCAATGGCCACGGATTCGGTCGGGTTAAACTGCA +CGCCAGCTCCCCACGCCAGAGACGTGTTGCTGTGGCGACCGTCATCACTTCCGGTCAGCA +CGTCGTGCGTTTTCCCCTTGGGAAGGTGCGAACAAGTTCCTGATATGAGATCATCATATT +CATCCGGAGCGCATCCCAGAGGGACATCATGAGCCATCAACTCACCTTCGCCGATAGTGA +ATTCAGCACTAAGCGCCGTCAGACCCGAAAAGAGATTTTCCTCTCCCGCATGGAGCAGAT +TCTGCCATGGCAGAATATGACCGCTGTCATCGAGCCGTTTTATCCCAAGGCGGGCAATGG +CCGACGGCCCTATCCGCTGGAGACCATGCTGCGTATTCACTGCATGCAGCATTGGTACAA +CCTGAGCGACGGTGCCATGGAAGATGCCCTGTACGAAATCGCCTCCATGCGCCTGTTTGC +CCGATTATCCCTGGATAGCGCCCTGCCGGATCGCACCACCATCATGAATTTCCGCCACCT +GCTCGAGCAGCATCAACTGGCCCGTCAATTGTTCAAGACCATCAATCGCTGGCTGGCCGA +AGCAGGCGTCATGATGACCCAAGGCACTTTGGTGGATGCCACCATCATTGAGGCACCCAG +CTCTACCAAGAACAAAGAGCAGCAACGCGATCCGGAGATGCATCAGACCAAGAAAGGCAA +TCAGTGGCACTTTGGCATGAAGGCCCACATTGGTGTCGATGCCAAGAGTGGCCTGACCCA +CAGCCTGGTCACCACCGCGGCCAACGAGCATGACCTCAATCAGCTGGGTAATCTGCTTCA +TGGAGAGGAGCAATTTGTCTCAGCCGATGCCGGCTACCAAGGAGCGCCACAGCGCGAGGA +GCTGGCCGAGGTGGATGTGGACTGGCTGATCGCCGAGCGTCCCGGCAAGGTAAAAACCTT +GAAGCAGAATCCGCGCAAGAACAAAACGGCCATCAACATCGAATACATGAAAGCCAGCAT +CCGTGCCAGGGTGGAGCACCCGTTTCGCATCATCAAGCGGCAGTTCGGCTTCGTGAAAGC +CAGATACAAGGGGCTGCTGAAAAACGATAACCAACTGGCGATGTTATTCACCCTGGCCAA +CCTGTTTCGGGTGGACCAAATGATACGTCAGTGGGAGAGATCTCAGTAAAAACCGGAAAT +AACGCCAGAAATGGTGGAAAAAATAGCCTAAATAGGCTGATTCGATGTGTTTGCGGGAAA +AAAATCGGCCCAGATCCGCGAAATTTTAATCAGCGAGTCAGCTTGGGAAGAAATGACCTG +CTTATTCGCACCTTCCCTTGTTGTCAGTTACGCGGAGATAATCCCCGGAGAAAGTCGACA +CACGGCTGTAAGCCATACCCGCCATCGCATACGCGCTGAACCATTCATTCACGCGCACAG +ACGGCCCCACCATCACGCTGAACCAGCGGTTACGCACGGAATCTTCATGCCAGCGGGTAT +CGCTGTAACGGGTAAGCTGGCGATTCTTGTCTCCTGCATAGCTGAATGGCCGCCCTGCTG +CATTAACACCATAAATGGCGACTGACCAGTGGACAACCCAACAACAATATCCGTCATTTG +TGCAGGCAACATGCGCATAGCAAAAGCCGTTTGTTTTGCCGACATTCCGGTTTTGCTTAA +TTGTGATTGAGTAACCTCAAGCTCACTCCGCATAGCACGAAGTTTTCCAGAAAGCTCCTC +ATACATTTCAGGAGAAAGCATCCCCTTAGCTTTTGCTTCATTGAGCTGTTTCTGTTGTTC +TACCAGACGATTAAAAGCAGTTCCGACAGGATCAAGTTGAGCAATCAGACGTTGCAAAGC +AACAACCTGTTCATCATGCGCTTTTGCTGCTTCTCGCTCTGCCTGAGCCTCTCCGGTAAG +CTCTCGCCGTGTTTCCTGTATTTTTCGGCTATAATTCTCAAACTGAGAACCATTTATTTT +CCCGGATGCAAACGCAGCATTAAGTTCATCATGCTGTTGTTCAAGATTTCTTAGCGCCGC +AGCCAGAGGGTCGATCTTGTCCAGCATTCTTTGAAAGGCCTGAGCCTGCGCTTCCTGCTG +AGCGGCAGCAAGTTTTCCGGCCTTCTCGGCTTCTCTCTGCGCTTGCGCAACCCCGCTCAA +TTCCTCTGTGGTTTCATTAAGTTTACGGACAAGAAATTCATATTCTTCTTTATCAATAAG +CCCTTTATCGAAAAATTTCTTTAATTCAGAATAGCGTCGACCGACAGTATCAATTGCGGC +ACCAACTGGATCAATAGCTGCTTTTAATTTTGCGAGCGCGTTCTTCTCATCTTCTGTTGC +CTTAGTCACTTTCCCTGCGCTATTTGCAGCAGTTTCCCCAGCCTGCGTCATTTTGACTAA +TGAGGAGGTCAGATTGTCAGCATTATTTTTCGCTCCAGTGCTATCAATAATTATTGCGAG +ACGCGAGGTTTGCTCTGCCATTTATTAAAACTCCTGACAACAAAAAACCCACCGCGAAGT +GGGTTTCAGGCGACATAATAGTAGATATAGCGATTACGAGGCCACGCAATGCTTTTCTCC +AGGAGCATCATCGATTTAATTAAAGACACCATCACATCTCTGTAACAGAGTGTACGTAAT +TAACAACTACACACACTGCTCCTGAAAATACTGGTCATCCAGTGCAAAGATCACTGCTTC +AAATTCATCGCGCTCAATCAATACCGGATGAGTGGCTAAATATTCATTTATCTCTGTCAG +AGATAAAGGCAAAGGCACCCCAGCCATTCCAGCATAACGTCGGGCACGGGATATTACCGA +ATAGGCGTACAACAACTCCTTAAGCACCGGGTCTATTTCTGGTTCCGGTATCGGTGGCAA +CCTGAGTTTTTCTCGCTTCCATCTTGCCTTTTCCCCCCTTTCTCCCCCGAACTCCGATAA +CCACCGCTGGGCAGCTATGGCTTTTTTATCGTATCCTGCTTCTGCTGCTCTTTACCCTGG +GCGATGCTGGCTGCTTCTGCAAGGATCTGCCAGTACAACTCTGGATTCTGCTTAAGCAGC +GCGATCCCTCGTTCTGCCGTATATTCCAGTGCAACCTCAACACCATTAACCAGTTCACCA +ACCCCTTTCCAGTCTTTCAGCAGATAACGAGCGGCATTATCAATGAGTAAATCATCAACA +GAATCCACCTCGGAAACCTTTGAAATATCAAACTCCTTCGTTCCGACGTGCAAACTGGCA +TCCATTTTCTCAATGTGGCGACGGATTAATGCATTACGGGAGCGATACTGATCGTTATCG +CTGCTTCTGCTCATCCGTCAGGCCAGACAGAACAATGACCGGAACAGAATCCATTTTGAG +CATTTCAGCCGCCATAACACGACCGTGACCCGCAATAATTTCGCCCTTTTCGTCAATCAG +CACCGGATTAGTCCAGCCGAATTGCTTAATACTTTCTACCAGTTGTGCCACCTGCTCAGT +ACTGTGCGTCCTGGCGTTGTGCGCATACGGTGACAATTCTTGTAATGGGCGATAGACTAT +CTTTAATTTCTCGCTCATACAGCCTTGCTTTATGAATAAAACGCACCCCAGCAGCCAGTG +CTACTGGGGACGGAGGTGTTGCTGGTAAAGTTAGGTATTGGATCAATGAGTGAGTCAACA +TAATATTAAACTCACAATTATAAATCAGCCATATATTAGGAGCGCCAAAAAAAACCTGAA +AACAATATAATAACAGGATAAATTTCAAGGCGACCAAGAATCATAGCTATGCACATTAAG +CATTTTGCAATGTCATTAAGCACTCCGAATGACGATGCAGTAGCCCCAAAACCTAATCCC +ATATTATTAATACATGCAGCCACTGTTGCAAATGATGTAAGAAAATCATATCCCATACCA +TTTAACACCAGTATAAAAAACACCGTGAAGAGAGTATAAAGAAAAAAGAAACTCCATACA +GACCTCATTACACGATCTGTAACTATCTTCCCTCCTACATTTACACTCAACAACGCTCTG +GGATGAGAAAGCTGATTTATCTCGTGTTTGCTTTGTTTGAAAAGTATAAGAAATCGAAGT +GACTTAATTCCACCACAAGTTGAACCTATACATCCCCCAAAGAAACTTGACAACAGCAAA +AACACTATCGTGTGCGTGGGCCAACTTGCATAATCCTGCGTAGCTAAACCATTATCAGTG +AGCATGGAGCTGGCAAGAAAAAACGAATGAATAAAACTTCCATGCAAGTCATACATACCT +ATATGCCAGACCTGGAAAGAGGTAACAATGATCACCCCTAAGGCTATTAACAGAAAGAAA +CGAAGTTCAATATCTCTGATTAAAGGTTTTATCGTTTTCCTGCTAATAACAATATACCAA +AGAGTGAAGTTGAAAGCCGATAGCAGGGAAAAAGAACCAGCCACCAGCTCAACCAAATAG +TTATTAAAATATCCGATACTCTCGCTATGAGTTGAGAAACCACCAAGCGAAACTGTGGAA +ATCCCGTGACAAATAGCATCAAACAAAGGCATTCCTGCAAGTCTATAACAGACAATACAA +GCAATACCTAATAAAGAATAAGTTATCCACAGTGTCCGTGACGTATCGGCCAGGCGGGGA +GTGAGTTTGTCATCCTTAAATGGCCCCGGCATTTCTGACTGATAAAGCTTTGCACCACCA +ATACCCAATAATGGCAATACAGCAACCGCCAGAACAATAACTCCTAAACCACCTATAAAA +TTTAACTGTGACCGATAGTACAAATATGCCCGAGGTAATGAACTAACATCATCAATTACA +GTTGCTCCTGTTGTTGTTATTCCAGAAACCCCTTCAAACAGAGCATCAATGAACGTTAAA +TTAAGTTCTGAGTCAATCCATAAAGGGAATGCACTAATAACAGAAAACAAAATCCAAAAC +ATTACAATTATAATAAACCCATCACGGGTACGTAATTGAATGCCAGATTTCTTAGTTGTA +TACCACGCTCCGCCACCAATGCAAAAAAATATAACGAAAGTTATAAAGAAAACAAACAGG +CTTTTTTCTTTATAAAACAATGCTACAACCATTGGTGGCAACATTGAAAGACTATAGAGC +CAAACCAGGAACCCACACATATGAGTAACAACTCTTACATGAGATGTATTCATATCTAAA +TATTCTTTCAATTATAACCACCTTGCTGCAATATTATGATTATACTGTATAAAATTTAAC +TCCTCTTAGATCTTACTTCACTGTTCCTTATGAAACAATCATCAAAATGAATCATATTGT +AGTTAAGATTTTACTTTAAACACTGCTCGGTTATGTATTGCTGAGCACCTTCAAGTTGGG +CCTGCATCATTACCAGTCGTTCCCGGAGGGTGAAATAATCCCGTTCAGCGGTGTCTGCCA +GTCGGGGGGAGGCTGCATTATCCACGCCGGAGGCGGTGGTGGCTTCACGCACTGACTGAC +AGACTGCTTTGATGTGCAACCGACGACGACCAGCGGCAACATCATCACGCAGAGCATCAT +TTTCAGCTTTCGCATCAGCTAACTCCTTCGTGTATTTTGCATCGAGCGCAGCAACATCAC +GCTGACGCATCTGCATCTCAGTAATTGCCGCGTTCGCCAGCTTCAGTTCTCTGGCATTTT +TGTCGCGCTGTTCTTTGTAGGCGATGGCGTTATCACGGTAATGATTAACACCCCATGACA +GGCAGACGACGATGCAGATAACCAGAGCGGAGATACCGATGCTGCTTTCGGCTCTGCTGG +TAAATTATCGCCCGGTATGCAGTAACGAAATTTACCGCCCTGATTTACGCGAATCAGACG +ACCTTTGCTGATTGCCATTGCCAGCGTTGAAGCCACTTTGCGTGATGTGGTACCAAACAA +TGTAGCCAGCTCATCAGCCGTTTGTGGTCCTCGTTGTTCAATCGTCGCGGTTAAATCGCA +CTCTGAGATTTTCGCTACTGTTGCTGTGGTGATTTCTTCCGGCAGTTCTGCCTGCGCTGG +CTGTTCCTGCTGAACATTGTTATCAGCCACACGCCAGGTGTACGCGCTTTTATCAACAAA +ACCAGCCTTTTTCAGTTCCCATAGTTCGTTCAGCACTTCTTCACGACTGATATCAAGTCG +CGCAGCAAGTTCTATGGATGTGGCTTTTCCCATTGCTTTCAGTGCGTCAAAAACAGTCTC +CATTAAATTTTTCTCCCGGTAAAAATTACTTCGCAATTCCTGGCTGGACGACATTCGGAC +GCCAGCTCTCCCAGTTAAAATTCACCCATCGCCCGCCGTTCATGGTCATGCGATCCATAA +TCCGCTCGCCGAGCAATGTTTTCATGGCCTCATAGTTCAGGTTTGTCAGCATCCCCACGC +TGCGCATCGACGCTGTCCGGCGATCAACAATCTGGTGCAGTACCACCTGCTCGTTTTTCG +TCTCGCGCTGAATGCCAATTTCATCAAGAACCAGCAGATCCACTTCGCACAGTTCCCGCA +AAAATTTTTCGCCTGACTGCCCATCGTCATAGCTGGCGTGCAGGGCACTCATAACATCAG +CCACGGTAACCACAATCACTGTCTGACCGTCTTTCAGCAGGCGATTCCCGATAGCTGCCG +CTAAGTGGTTCTTCCCGGTACCAGGTTTTCCGCTGAACGCAAAATTTGTACACCCGGTCA +TCAGTTCATCAGCGATGGATTTCGCCTGACTCAACGCGTATCGCTGCCCTTCGTTCTGCA +CCTGGTAATTCGAAAACGAGCATTTGCGGTGCAATGGCTGGATGCCAGAGCGATTCAGAA +TTTTTTCCACCCGCAACTGACGATTCTGACGGTTGATCTCCTCACAACGTTTCTGGCCTT +CGGAAAGTTGCCACTCGCGCCACTCCGCTACCGTCTTGAATGGCGCGGTTACATGTGACG +GGGCCAGTCTGCGGATACGTTCAAGAACATCGCCTGTCGCAATATTTTTCATGGTCAGTT +ACCCCCTGAAGCCTGGCGGGATCGCACTATCCGGTAACGAGACGGTGTTAACCTGTCGGA +GTAACGTCTCAGGTCGAACACCTTTCGGCGCGAACAAGCCCTGGTATTCATTGGCGATGC +TGTGTCGAATCACCTGCTCAGGTGAAAAACCCTGCTGGCGGAATTTTTCCAGCTCCCGTA +TCGCCCCGTTAGCGCCCTGCTCCGTTCGAATCGGTTTTCGCAATGCCTGGCGAAATTCAA +CCCACTCACGCCAAAGCGAGACAGAAATCCAGTTCGGCAAAGTAATATCCAGAGGGTCAA +ACTTTTTGACACCTCGATTCCCCCGGGGGGGATTTAGGGGGGGATCTGTTTTTAGATCTT +TATCTGTATCTTTATTAGTTGCCTTTGTGTTGACATCATGTTCAAACACCACTTCAACAT +CTGTTTGAACACCTGTTAAATTTCTCTCTTGTTTTGTTTGAACATCTGCTTCCTTTCTGC +TTCTTCTGGCCTGAACAGATGCTTTTCCTGCGGCTGATTTTTTGGTTAATTTTTCCCTGA +CTGATGCCAGATCTTCCTCAATCCGAAGATGCACCCATTCCTCGCCGTTATCGCAAAAAA +ACTCCTGCAAGGATGGTTCAACATCAGCCCATCGCTCGTTAGTCAGACGGGCAATTTTTG +CCAGCCTGTTTTTAGGTATTGGCTTTCCTGTTTGCCAGTAATTGAACATCAGCAACAAAT +ACGCACCATGCTCCTCTGCTGACAAATGCATGGTGTCAGCCAGGTAATCAGCTATGTACA +GTTGCATGTATGGTAATGCGGCCATAATTGCCCCGTATGATGCTGCCCGGTGGCTTAGAA +TAAGCACAAACAGCATGGAAACTTTTGCTTAATGAACAATGACAGAATCGTCGGAAGAAC +CGCCGCCGCTGAAATGCGCTTTCCGGTAAACGGCTTGGACTGCATCATCATGCGCATCAA +TTGCCGTACTTAACGCTTCCTGCGCCGCCAGTAATGCACGGCGTTCCAGGGTATCGAAGA +TGCAGAGTCGGTGACGCAGCTCGCGCGGAAGGATTGCCAGAATTGCTGGGATCAGCTTCT +GAATTTTTTCTCTTTGCGTTTTCGTTTCACCTTTCAACCAACGGTGATAGATATTCTGCT +GATTGTTCCAGTCCTTGCCTGGAACCAGGGGCAATTCGCCGCCCCCCTGGCGCAGATATT +CTTCAGTAATTGCATTGGCTACCCATGCCTGCCCTTTTTCGGCTGCTAGGGCAAACAACA +CTGATTCGATGTGCTCATGCTTGATTTTCATGAATCATTTGCCTCTTGATGTTTCAGGTA +TGATCAAATGAGGATTTGTTACTGTCATTTAGTTGCTTCACTGACATATTCTGCGAACAA +CATGCCGAACGTCGTAAATATGACCAGTCAATATCAGGACGAAGTTCTTCGCACAGAACC +TCACCTCTTGTTGCACGTTCAATTGCTGGACATCTCTCGGCAGGCAATTGACGTACCCCT +TTGATCCATTGATTTACGCTTGGAGGTGATACACCTAAAAGCCTAGCCATTGCTGATTGC +CCACCGACAACAGCACAAGCTTGCTTGAATGAATAGTTCTCTTTTTTCATCGAATGAACT +CCAAAAACACACAGAAATATTAGGCGACGCCTAACGCAATTGTCAATAGGCTGTGCCTAA +TGCAGTAAGGGTAGGGATTGCCTAATGTAATGCGCATAGGAGAATATTAAGCAATGCTTA +GTGGTAAAGACTTAGGCCGAGCGATAGAGCAGGCCATTAACAAAAAAATCGCATCGGGAT +CCGTCAAATCAAAGGCGGAGGTCGCACGCCACTTTAAAGTCCAACCACCATCAATTTATG +ACTGGATTAAGAAAGGCTCTATAAGTAAAGATAAACTTCCAGAATTATGGCGTTTCTTTT +CTGATGTTGTTGGTCCAGAGCATTGGGGGCTTAACGAATACCCCATACCAACCCCCACCA +ATTCAGATACAAAAAGTGAACTTTTAGATATAAACAACCTTTATCAAGCAGCCTCTGATG +AAATAAGAGCGATTGTAGCTTTCCTGTTATCTGGAAATGCTACAGAACCAGATTGGGTTG +ACCACGATGTTCGCGCCTACATAGCAGCGATGGAAATGAAAGTGGGTAAGTATCTGAAAG +CTCTTGAATCTGAACGGAAAAGCCAGAACATCACAAAAACTGGAACTTAAACTTATATGG +TCTGACGGAAAACTCCTGGATTCCGTTATTTAACCCCCCCATCACTTTCTGCTGTCGCCA +TCACCTATTAGGTTACGCTCAAAACATTAGGCATAGCCTATTGACAATCAATTAGGCATT +ACCTATAGTTCCAGCATACCACCCACCCCGCCCCACAGAACGCCGGGCAATACTTCGAGT +TACCAGGCAGTGGTAAGGGGTTAAGTAGCCAGCCCGAGGCGTATGAACATGACGGCGGGA +TTCAAATTTTGCAGTGCAGCAGTTAGTTCCGCCACCCGGCGTTAAGGGGAGAGATAAGAT +GGTGCATTACGAAGTAGTTCAGTATTTGATGGATTGTTGCGGTATCACTTACAACCAGGC +TGTGCAGGCTTTACGCAGCAACGACTGGGATCTCTGGCAGGCAGAAGTCGCTATACGTAG +CAACAAGATGTGAGATTCGCAAAATGCAAAAAATCGACCTCGGCAACAACGAATCCCTGG +TGTGCGGCGTGTTCCCCAACCAGGATGGAACGTTCACTGCCATGACGTATACCAAAAGCA +AAACATTTAAAACCGAAACTGGTGCGCGCCGATGGTTGGAGAAGCACACAGTAAGCTAAC +GATTAAAACGTCTACTCCTGCTGTTCCAGAATAACTTCATAAAATGGGAGTATTTTTCGG +TGACGAGATAATAAGAACAGTTTGCGCTATCACTCTGATGTTGAATGATGCCCTTCCGTT +CTAATTTTTTCATAACCGGGTTACGGCAAGGAGAAGTGATAATAAGATTTCCTGTTTTAA +GGAAATCTTTAAATACAGCGATTTCTTTCTCAGATAAACGAAGCAATACTCGTTGCTCTG +GTAGTAATGAATAATGCTTTTGAATATGTGCTCGCAATCTTGAGAAGGAAATGGCGACCA +CGAAAGAAAAGGCAAAAACGATAATCTGAAAGAGCCAAGGTATTTCAGTATAAGCATTGA +ATGCGACAGTAAACTCTTTCGGTATCAGCCAGAGAGTGAGACCAAAAATGATAATCGTAT +ACATAAGTCTTTCGAGTGGCTCGTTAGCAAAAAGTTTCAACAATGGAGTAAATACATCCA +ACATATCAATAACTCTCAACTGTAAGGGTATTGAAATGTTAACACAAGCTCTCGCTGTAG +GGGTATAGCCGAGACCACCGAAGCCCGGAGGTGGTGAAATAAAACCGGGCACAACACGAA +GGCGCATTTCCGATATCCATAAAGAGTCGGTCTTGTCTGTTAAATTTAAATGGTGGGAGT +GCGCCTCCGGTTGTAAATAACGACATTGCTGTGTGTAGTCCTGGCGGCATCAGTTTTTTT +CTTGAAGTTCGGCTGATGTCCGCCCTTTTTAAAGTGAATTTTGTGATGCGGTGAATGCGG +CTAAGCGCACGTGGCACAGTTAAAAGTCATGTTAGTCCTTATTGGTTTGGGTGGGAAAGC +CGACTGTAATTGTTAACTGGTTGCAGTCACCTGGAGGCACCAGACACCGCATCAACAAAG +TTCATTTGTAAAAATGGAGATAATTATGATTGCACATCACTTCGGAACTGATGAAATACC +ACGTCAGTGTGTGACTCCTGGCGATTATGTTCTTCATGAAGGCCGGACATATATTGCCTC +GGCAAACAATATTAAAAAGCGAAAACTATATATTCGTAACCTGACCACAAAAACATTCAT +TACTGACCGCATGATTAAAGTCTTCCTCGGTCGTGATGGTTTACCTGTAAAGGCGGAGTC +ATGGTGATGACTAAGAAAATAAAATGTGCTTACCACCTTTGCAAAAAAGACGTTGAAGAA +AGCAAAGCTATTGAAAGAATGCTTCACTTCATGCACGGGATTTTATCAAAAGACGAACCG +AGAAAATATTGCAGTGAAGCTTGTGCCGAAAAAGACCAGATGGCACATGAACTTTAATTA +ATTGACTATTCGAAACTGAATTTATGCCAGAAATGGCAGGTATTCGCTCAACCTTAATTA +AGGAGAAAAACATGATTACCAATTATGAAGCCACTGTTGTAACTACCGATGACATTGTTC +ACGAGGTGAATCTGGAAGGAAAGCGCATTGGCTACGTAATTAAAACAGAAAATAAAGAAA +CCCCATTCACTGTGGTTGATATCGATGGTCCATCAGGCAACGTAAAAACACTTGATGAAG +GTGTCAAAAAAATGTGCCTGGTGCATATCGGAAAGAATCTGCCCGCAGAAAAAAAAGCCG +AATTTCTGGCAACTCTAATTGCAATGAAATTAAAAGGTGAAATCTGAAAGAAATAGCCTG +CGTATGGCGCAGGCTATGAACAGTGTGTATCCGGCAAGATCATTCACTGAACAAAACGAA +TTTTAATCTGAGTTGAGGTTAAAAAACAATGAGCACAAAACCACTCTTCCTGTTACGGAA +AGCGAAAAAATCATCCGGTGAACCTGACGTCGTCCTGTGGGCAAGCAACGATTTTGAATC +GACCTGTGCCACTCTGGACTACCTGATCGTTAAGTCAGGTAAAAAACTGAGCAGCTATTT +TAAAGCTGTTGCCACGAATTTTCCTGTCGTTAATGACCTGCCCGCTGAAGGTGAGATCGA +TTTTACCTGGAGTGAACGCTATCAACTCAGCAAAGACTCCATGACATGGGAACTAAAACC +GGGAGCAGCACCAGACAACGCTCACTATCAAGGCAATACCAACGTCAACGGCGAAGACAT +GACTGAGATTGAGGAGAATATGCTACTCCCAATTTCTGGCCAGGAACTGCCCATTCGTTG +GCTTGCTCAACACGGCAGCGAAAAACCGGTAACGCACGTTTCACGCGACGGACTCCAGGC +ATTACACATTGCTCGGGCTGAAGAACTACCGGCTGTTACTGCCCTGGCTGTTTCCCACAA +AACCAGCCTGCTCGACCCGCTGGAAATTCGCGAACTCCACAAACTGGTTCGTGACACTGA +CAAAGTTTTCCCTAATCCTGGTAATTCAAACCTGGGACTGATAACTGCTTTTTTCGAAGC +ATACCTGAACGCTGACTACACCGATCGAGGACTGCTGACAAAAGAGTGGATGAAGGGTAA +TCGTGTTTCACACATCACTCGCACGGCTTCCGGTGCTAATGCTGGCGGCGGAAACCTCAC +CGATCGCGGCGAAGGTTTCGTACACGATCTGACGTCACTGGCGCGCGACGTAGCCACTGG +CGTACTGGCCCGTTCAATGGATCTGGACATCTATAACCTTCATCCGGCACACGCTAAACG +CATTGAGGAAATTATCGCTGAAAATAAACCGCCCTTTTCTGTTTTCCGCGACAAATTCAT +CACCATGCCTGGCGGGCTGGATTATTCCCGCGCCATCGTGGTTGCGTCCGTAAAAGAAGC +ACCAATTGGGATCGAGGTCATCCCCGCGCACGTCACTGAATATCTGAACAAAGTACTGAC +TGAAACCGATCATGCCAACCCTGATCCGGAAATCGTGGATATTGCCTGCGGTCGCTCCTC +TGCCCCGATGCCGCAGCGAGTAACAGAAGAAGGAAAACAGGATGATGAAGAAAAACCGCA +ACCATCTGGAACAACGGCAGTTGAACAGGGAGAGGCTGAAACAATGGAACCGGACGCAAC +TGAACATCATCAGGACACGCAGCCGCTGGATGCTCAGTCACAGGTAAATTCTGTTGATGC +GAAATATCAGGAACTGCGGGCAGAACTCCATGAAGCCCGGAAAAACATTCCATCAAAAAA +TCCTGTCGATGACGATAAATTGCTTGCTGCATCACGTGGTGAATTTGTTGACGGAATTAG +CGACCCGAACGATCCGAAATGGGTAAAGGGGATCCAGACTCGCGATTGTGTGTACCAGAA +CCAGCCAGAAACGGAAAAAACCAGCCCAGATATGAATCAACCTGAGCCAGTAGTGCAACA +GGAACCGGAAATAGCCTGCAATGCCTGCGGCCAGACTGGCGGGGATAACTGCCCTGACTG +TGGTGCGGTGATGGGCGACGCAACATACCAGGAAACATTCGATGAAGAGAGTCAGGTTGA +AGCTAAGGAAAATGATCCGGAGGAAATGGAAGGCGCTGAACATCCGCACAATGAGAATGC +TGGCAGCGATCCGCATCGCGATTGCAGTGATGAAACTGGCGAAGTCGCAGATCCCGTAAT +CGTAGAAGACATAGAGCCAGGTATTTATTACGGAATTTCGAATGAGAATTACCACGCGGG +TCCCGGTATCAGTAAGTCTCAGCTCGATGACATTGCTGATACTCCGGCACTATATTTGTG +GCGTAAAAATGCCCCCGTGGACACCACAAAGACAAAAACGCTCGATTTAGGAACTGCTTT +CCACTGCCGGGTACTTGAACCGGAAGAATTCAGTAACCGCTTTATCGTAGCACCTGAATT +TAACCGCCGTACAAACGCCGGAAAAGAAGAAGAGAAAGCGTTTCTGATGGAATGCGCAAG +CACAGGAAAAACGGTTATCACTGCGGAAGAAGGCCGGAAAATTGAACTCATGTATCAAAG +CGTTATGGCTTTGCCGCTGGGGCAATGGCTTGTTGAAAGCGCCGGACACGCTGAATCATC +AATTTACTGGGAAGATCCTGAAACAGGAATTTTGTGTCGGTGCCGTCCGGACAAAATTAT +CCCTGAATTTCACTGGATCATGGACGTGAAAACTACGGCGGATATTCAACGATTCAAAAC +CGCTTATTACGACTACCGCTATCACGTTCAGGATGCATTCTACAGTGACGGTTATGAAGC +ACAGTTTGGAGTGCAGCCAACTTTCGTTTTTCTGGTTGCCAGCACAACTATTGAATGCGG +ACGTTATCCGGTTGAAATTTTCATGATGGGCGAAGAAGCAAAACTGGCAGGTCAACAGGA +ATATCACCGCAATCTGCGAACCCTGTCTGACTGCCTGAATACCGATGAATGGCCAGCTAT +TAAGACATTATCACTGCCCCGCTGGGCTAAGGAATATGCAAATGACTAAGCAACCACCAA +TCGCAAAAGCCGATCTGCAAAAAACTCAGGGAAACCGTGCACCAGCAGCAGTTAAAAATA +GCGACGTGATTAGTTTTATTAACCAGCCATCAATGAAAGAGCAACTGGCAGCAGCTCTTC +CACGCCATATGACGGCTGAACGTATGATCCGTATCGCCACCACAGAAATTCGTAAAGTTC +CGGCGTTAGGAAACTGTGACACTATGAGTTTTGTCAGTGCGATCGTACAGTGTTCACAGC +TCGGACTTGAGCCAGGTAGCGCCCTCGGTCATGCATATTTACTGCCTTTTGGTAATAAAA +ACGAAAAGAGCGGTAAAAAGAACGTTCAGCTAATCATTGGCTATCGCGGCATGATTGATC +TGGCTCGCCGTTCTGGTCAAATCGCCAGCCTGTCAGCCCGTGTTGTCCGTGAAGGTGACG +AGTTTAGCTTCGAATTTGGCCTTGATGAAAAGTTAATACACCGCCCGGGAGAAAACGAAG +ATGCCCCGGTTACCCACGTCTATGCTGTCGCAAGACTGAAAGACGGAGGTACTCAGTTTG +AAGTTATGACGCGCAAACAGATTGAGCTGGTGCGCAGCCTGAGTAAAGCTGGTAATAACG +GGCCGTGGGTAACTCACTGGGAAGAAATGGCAAAGAAAACGGCTATTCGTCGCCTGTTCA +AATATTTGCCCGTATCAATTGAGATCCAGCGTGCAGTATCAATGGATGAAAAGGAACCAC +TGACAATCGATCCTGCAGATTCCTCTGTATTAACCGGGGAATACAGTGTAATCGATAATT +CAGAGGAATAATTCAGCCTGGCGGTGTAATGCACCGCCAACTTGAAATATTTTTTATGAG +AAAAATTATGAGATATGACAATGTTAAACCATGTCCATTTTGTGGTTGTCCATCAGTAAC +GGTGAAAGCCATTTCAGGATATTACCGAGCGAAGTGTAACGGATGCGAATCCCGAACCGG +TTATGGTGGAAGTGAAAAAGAAGCACTCGAAAGATGGAATAAACGAACCACTGGAAATAA +TAATGGAGGTGTTCATGTATAAAATTACCGCCACTATTGAAAAGGAAGGTGGCACTCCTA +CTAACTGGACAAGATATTCAAAATCTAAACTAACGAAATCAGAATGCGAAAAAATGCTCT +CAGGTAAAAAAGAAGCAGGCGTTTCCAGAGAGCAGAAAGTAAAACTGATAAATTTTAATT +GCGAGAAACTTCAGTCCTCGAGAATTGCATTGTATTCAAATTAAAACTTCATAGCTGATT +ATTAATAATCAACATCGGGCGTCAATTTCAGTCTAACATTGGCGCCTGCCAGAGGTGATG +CGATGGCACAAGTAATCTTTAATGAAGAGTGGATGGTTGAATACGGCCTGATGCTTCGCA +CTGGTCTGGGGGCCAGACAAATTGAAGCATACCGCCAGAACTGTTGGGTGGAGGGCTTCC +ACTTCAAACGAGTATCTCCTTTAGGTAAGCCAGACAGCAAACGAGGGATTATCTGGTACA +ACTATCCAAAGATAAATCAGTTTATCAAAGACTCATGATATGTCTAAATTACCAACAGGT +GTCGAGATTAGAGGTAGATACATTCGCATCTGGTTCATGTTTCGAGGAAAACGATGTCGG +GAAACATTAAAAGGCTGGGAGATTACAAACAGTAATATTAAAAAGGCCGGAAATTTAAGA +GCGCTGATAGTTCATGAAATAAACTCCGGTGAATTTGAGTATTTAAGACGTTTTCCCCAG +TCCAGCACTGGGGCAAAAATGGTGACAACGAGAGTCATAAAAACGTTCGGAGAGCTTTGT +GATATCTGGACAAAAATTAAAGAGACAGAGTTAACAACAAACACAATGAAGAAAACGAAA +TCACAATTAAAAACACTCAGAATAATAATTTGTGAAAGTACCCCGATATCACATATTCGT +TATAGCGATATCTTAAACTACCGGAATGAACTGCTGCATGGAGAAACGCTTTACCTGGAT +AATCCAAGATCCAACAAAAAAGGAAGAACCGTGCGCACAGTTGATAACTATATCGCCCTG +CTCTGTTCGCTGTTGCGTTTTGCGTATCAGTCGGGATTTATATCAACCAAACCATTTGAA +GGAGTAAAAAAATTACAGCGAAACAGAATAAAGCCTGATCCGTTATCTAAAACAGAATTC +AATGCATTAATGGAAAGTGAAAAAGGACAGAGCCAGAACTTGTGGAAATTTGCCGTTTAC +TCAGGACTTCGTCACGGGGAACTGGCAGCTCTGGCGTGGGAGGATGTGGATCTCGAAAAG +GGAATAGTGAATGTCAGAAGAAACCTGACGATACTTGATATGTTCGGTCCCCCAAAAACA +AATGCCGGGATCCGAACAGTAACACTACTGCAGCCTGCTCTTGAAGCACTGAAGGAGCAA +TACAAACTGACCGGGCATCATCGCAAAAGCGAAATCACCTTTTATCATCGGGAGTACGGC +AGAACCGAAAAGCAAAAACTGCATTTTGTTTTCATGCCCAGGGTGTGTAACGGAAAACAA +AAACCTTATTACTCGGTAAGCAGTTTGGGGGCAAGGTGGAATGCAGCAGTAAAACGTGCT +GGTATTCGCCGCCGTAATCCGTACCATACGCGGCATACTTTTGCCTGCTGGCTGTTGACG +GCAGGAGCGAACCCGGCATTTATAGCCAGCCAAATGGGGCATGAAACTGCGCAGATGGTG +TATGAAATTTACGGTATGTGGATTGATGACATGAACGACGAACAGATAGCCATGTTGAAT +GCGCGGTTATCGTAGTTGCAAAGTTTGCCCCCAATTTGCCCCATTTAGTACCAGAGAACT +GAAATAATGCAAGAAAATCAACAAATTACAAAGAAAGAACAATACAACCTGAACAAATTA +CAAAAACGTCTGCGTCGTAACGTGGGCGAAGCCATTGCTGACTTCAATATGATTGAAGAA +GGCGATCGCATCATGGTTTGCCTCTCCGGGGGTAAAGACAGCTATACCATGCTGGAGATT +CTGCGCAATTTGCAGCAAAGCGCGCCAATCAATTTTTCGCTGGTGGCTGTTAACCTCGAT +CAAAAGCAACCGGGCTTCCCGGAACACGTTCTGCCCGAGTATCTTGAAAAGCTGGGCGTT +GAGTACAAGATTGTTGAAGAGAATACTTACGGTATCGTGAAAGAGAAGATTCCAGAGGGC +AAAACCACTTGCTCACTGTGTTCTCGCCTTCGTCGCGGTATCCTTTATCGTACCGCAACG +GAACTGGGGGCGACGAAGATCGCGTTGGGTCACCATCGTGACGATATCCTGCAAACGTTG +TTCTTAAATATGTTCTACGGCGGTAAGATGAAAGGTATGCCTCCGAAACTGATGAGCGAT +GATGGCAAACATATCGTTATTCGTCCGCTGGCCTACTGCCGCGAGAAAGATATTCAGCGA +TTTGCCGATGCAAAAGCGTTCCCGATTATTCCGTGCAACCTGTGCGGTTCACAGCCTAAC +CTGCAACGTCAGGTGATTGCTGACATGTTGCGTGACTGGGATAAACGTTATCCAGGGCGT +ATCGAGACGATGTTCAGCGCGATGCAGAATGTGGTGCCGTCGCATCTGTGCGATACCAAC +CTGTTCGATTTCAAAGGCATCACCCACGGTTCTGAAGTGGTTAACGGGGGTGATCTGGCG +TTTGATCGCGAAGAGATCCCACTACAACCGGCGTGCTGGCAGCCAGAAGAAGATGAAAAT +CAGTTGGATGAGTTACGGCTGAATGTGGTTGAAGTGAAATAACCAGGATAGCGCCCGATG +CGCAAGCGTATCGGGCTACTCTTATGGAGGCCGGATAAGACGCGGCCAGCGTCGCATCCG +GCAATCCCGAATAAGATGTTTACTCTTGCACCCGGCAATTCAACATTTCATTATTTTAAT +AACCGCACCCGGCACGTTTTTCCTTTAATCTTCCCGCCCTGTAACTGTTTCCATGCTTTA +TGAGCAACAGCCTGACGGACCGCGACATAGACATGCGCCGGATGCACGGCGATTTTGCCA +ATATCTGCGCCATCAAGCCCGATATCTCCTGTCAGTGCACCTAATACATCACCCGGGCGC +ATTTTGGCTTTTTTCCCGCCATCGATACACAACGTTGCCATTTCTGCTTCCAGCGTCGCA +ATGGAACTATTAGCTGGCGGCGTTTGCCAGTTAAGTTTTATCTGCAACATGTCAGAAATG +ATATTGGCCCGCTGTGCTTCTTCCGGAGCACAGAAACTGATCGCCAGACCGCTATTTCCT +GCACGAGCTGTACGACCGATGCGATGTACATGAACTTCAGGGTCCCACGCCAGCTCAAAG +TTCACCACCAGCTCAAGCGATTTAATATCCAGACCACGCGCAGCAACATCAGTCGCGACC +AGTACACGGGCGCTACCGTTAGCAAAACGTACCAGGGTCTGATCGCGATCGCGTTGCTCC +AAATCGCCGTGTAATGACAATGCACTTTGCCCTACTTCATTCAGCGCGTCGCAGACAGCC +TGGCAATCTTTTTTGGTATTGCAAAACACCACGCAAGAGGATGGCTGATGCAAGCTTAAT +AACCGTTGCAACAGAGGAATTTTGCCTTTGCTGGATGTCTCATAAAATTGTTGTTCAATG +GGTGGCAAAGCATCTGTTGAGTCAATTTCAATCGCCAAAGGATCGCGTTGCACTCGTCCG +CTGATTGCAGCGATGGCTTCCGGCCAGGTTGCCGAAAACAGAAGCGTCTGTCGAGATGCA +GGCGCAAAACGGATGACATCATCAATGGCATCGCTAAATCCCATATCCAGCATGCGGTCG +GCCTCATCCATCACCAGCGTATTCAACGCATCCAGTGATACCGTGCCTTTTTGCAGGTGA +TCCAGCAAACGCCCCGGCGTTGCCACGATAATATGCGGCGCATGTTGCAACGAATCACGC +TGCATACCGAACGGTTGACCACCGCACAACGTCAAAATTTTGGTATTTGGCAGAAAACGC +GCCAGCCGACGCAATTCACCTGCCACCTGATCCGCCAGTTCACGCGTAGGACACAGCACT +AAAGCCTGGGTTTGAAATAGCGACGCATCAATTTGCTGTAACAAGCCGAGGCCAAAAGCC +GCCGTTTTGCCGCTGCCGGTTTTCGCCTGCACGCGAACATCTTTTCCGGCAAGGATCGCC +GGAAGCGCGGCGGCCTGCACCGGCGTCATGGTTAAATAACCCAACTCATTAAGGTTCGTG +AGTTGGGCGGGAGGCAAAACATTCAGGGTAGAAAAAGCGGTCACAATCTATTCTCGTGGT +CATCGACGCAAAGTTAGCAGGCGCGTATCCTCGCAGATCTACGCTCACGATGCGACAATT +TAATCGGTTCTTCATCGGGTGGTGGGTCAGGCATGGGTTGCGGGCGAGGGATCGGATCGG +GCACTGGAACAGGATCGCCAGGAATCGGTTCAGGGACAGGAATTTGCAAATAAATAAGTG +TCGTCATATTTCCCTCTGGTCATTGGGTGGACTCTTAAAGGGTAGACGCTGATAAATAAC +AGGCAAAAAAAAGCCGACTCATCAAAGTCGGCGTCGTACGAATCAATTGTGCTATGCAGT +AATTCAAAAAAGGAAGTAAGACAATATGGAGCGCAACGCCCATCGCTTGACGTTGCATTC +ACCTGCAAGAGAGATATTGCCCTGAATGGGTAGAGAGTTTATTGACTTCGCTCAAACTTT +GCGGCGTTTTTGTATACAGACAGCCGGAAAAATTGCTTTTGTTACAACCATTTACTACGA +TGCAACCATAAAGCAACACCACCAATAAGAACAACTAACAGAATACAAAAAATTGAAAAT +CCGAATTGCCACCCGCCGCCAGGGATCCCACCAAGGTTGACGCCAAATAACCCGGTCAGA +AAGGTACTGGGTAAAAAGACCATTGCCATCAACGACATTGTATAGGTACGACGAGCTAAA +TTTTCCTGCATCACCTGAGCGATTTCATCCGCCATCACGCCAGTCCGTGCTATACAGGCG +TCGATTTCGTCAAGGCCGCGCCCAAGGCGATCGGCAATATCCTGCATCCGACGGCGTTGG +TCATCGCTCATCCACGGCAAACGTTCACTGGCAAGACGAGCATAAACATCACGTTGCGGT +GCCATATAGCGACGCATCACAATTAATTGTTTGCGCAGCAGAGCCAGGAATCCACGCGGT +GGAATTTGCTGATCAAGGAGATTATCTTCAAGGTCGATAATTTTATCGTGCAGCTGCTCG +ATAAATTCACTGGAATGATCGGTCAACGCATCGCACACATCCACCAGCCATCCCCCGCAA +TCGGTCGGACCCGTGCCCTCTTCCAGATCGCTCACCACATCGTCCAGCGCCAGCACTTTG +CGTTGTCGGGTCGAAACAATTAACCGCCCGTCCATATATACACGCATGGCGACCAGTTGA +TCGGGGCGTTCATCGGTGCTGCCGTTTATACAGCGCAATGTAATCAGCGTGCCTTCACCG +AGACGGCTGACTCGGGGACGCGTGCTCTCGCCCGCCAGCGCATCACGTACGTTATTGGGA +AGCAGCGGTGTTGTCGCCAGCCATTGGGCGCTATCATGGTGTACATAATTAAGGTGGAGC +CAACAGGGATGCGCTTCATCAATCACATCTGTATTTTCCAGCGGTTTAACGCCGCCTCTA +CCATCCAGCATCCAGGCAAATACTGCATCCGGGACATTAACGTCCGATCCCTTAATCGCT +TCCACAGTGCCTCCATCATCAACGCATTATTTTGTAGTCTAGCCTTCTGGCCCTGTTACG +CAACATCTCATCACCCCATTACCCTGAAATGATTAATAAAATTCTGTCTAAATTGAATAC +AAAAAGCAAAATGCTTTTCCGTATACAAACCGTGTGAAGTGTTAAATAGCGTCTATCATT +ATCAGAATTATCTGATCATATGACGTGGCTTTTTTGCGATCGGATAGCAACAAAAATTGA +TAAAAATAACGGGATCTCAATGATTACGCACAACTTCAATACCCTGGACTTACTCACCAG +TCCTGTCTGGATCGTTTCGCCCTTTGAGGAACAGTTAATTTATGCCAATAGCGCGGCGAA +ACTGTTGATGCAAGACCTCACGTTTAGTCAGCTACGAACCGGACCCTATTCCGTCTCCTC +ACAAAAAGAACTGCCGAAATACCTCTCCGATCTGCAAAACCAACACGATATTATCGAAAT +CCTCACTGTTCAGCGTAAAGAAGAGGAAACAGCATTGAGCTGTCGGCTTGTTTTGCGAAA +GCTGACAGAAACAGAACCGGTGATTATTTTCGAAGGTATCGAAGCGCCGGCAACGCTGGG +TTTAAAAGCCAGTCGCTCGGCAAATTATCAGCGCAAAAAACAAGGTTTTTATGCGCGCTT +TTTTCTGACTAACTCTGCACCAATGTTGTTGATTGACCCGTCACGAGATGGACAAATCGT +CGATGCTAACCTCGCCGCGCTCAATTTCTATGGTTATAACCATGAAACGATGTGCCAGAA +ACATACCTGGGAAATAAATATGCTCGGGCGTCGCGTCATGCCTATCATGCATGAAATCTC +GCATTTACCCGGTGGTCATAAACCTTTGAATTTTGTTCATAAACTGGCGGATGGTTCGAC +TCGTCATGTGCAGACCTATGCCGGACCGATTGAAATTTATGGCGACAAGCTCATGTTATG +TATTGTGCATGATATTACTGAGCAAAAACGGCTGGAGGAGCAGCTGGAACATGCTGCTCA +CCATGACGCGATGACCGGATTACTGAATCGGCGACAGTTTTATCACATTACGGAACCAGG +CCAAATGCAGCATCTCGCCATCGCTCAGGATTACAGCTTGTTGCTCATCGACACCGATCG +TTTTAAACACATTAACGATCTCTATGGGCATTCTAAAGGTGATGAGGTGTTATGCGCCCT +CGCCCGCACCCTCGAAAGTTGCGCTCGCAAAGGCGATTTGGTGTTTCGTTGGGGAGGCGA +AGAGTTTGTCTTATTGCTACCAAGAACCCCACTGGATACCGCGCTTTCGCTGGCTGAAAC +TATCCGCGTAAGCGTGGCAAAAGTGAGTATTTCGGGCTTACCACGCTTTACCGTCAGCAT +TGGTGTGGCGCATCACGAAGGAAATGAAAGCATCGATGAACTGTTTAAACGCGTTGATGA +TGCTTTGTATCGGGCGAAAAATGATGGACGCAACCGCGTGCTGGCGGCATAAGCCGCGGA +TGCGTCTCGAGATCAACGACTGCGCTTAGCGTGGCGCTCCCAGTTTTCTTGCTTCGCCTG +CGCCGTTTTACGTAGTGCGACGTAACACGCCCCGCTGCCGCCATGATGCGGTAGCGCGGT +GCAATATGCCTGAACATCATCAAATTCGGTCAGCCAGCGCGCCACATAGCTGCGGACAAT +ATTGGCATGCGATTTATCATCCCGCCCTTTACCATGAATAATCAGCACGTTACGCAAACC +ATCCGCCAGGGCTTGTTGAATAAAACTGAACACCATTTTGCGGCACTCTTCCACCGGCTG +GCGCAAAAGATTCAGGCTCGCCTGTTGCGGATATTTACCACTGCGCAGCTTATCCAGCAC +CCCATGTTGCAACCCTTCCCGCCGAAACTCCAGCGGCTGACTTAGTGGGATGATGTCGAG +AAATCCGGTGGTGAGGAAATTATCAAGCTGCAGCGTGTCGATACGCTGCGGCGCACGTTG +GTTACGCGTTGGATGCCAGTGGACATCGGTAGCACGTTTCAGCGGCTGGACATCTTCCAT +GGCGTCAAGAAACAGCGATTTGTCGTCAAGGTTCATGTTACATCCTCCCGCAATTAAGAG +CGCGATATGATAACCAGACCGGGTCGGTCCAACAACGTATTACCCAAATTTCCAGTAATA +AGTTCCAAATATTGCCGATATTTTAAGCAAAATACTTATGCATGATTATTCATTCACGAT +ATTAATAATGTAACTTATATTTTCGTGAAATCTGTCACTGAAGAAAATTGGCAACTAAAG +GTTAAAACCGTTATAACACAGTCACCGGCGCAGAGGAGACAATGCCGGATTTAAGACGCG +GATGCACTGCTGTGTGTACTGTAGAGTCTGGCGGATGTCGACAGACTCTATTTTTTTATG +CAGTTTTAACTTTGCAGATAGCCGCATTCTCGCCGGAATTGCGTGATTAATGACGCGGTC +AGTGGTGTCTGGCGACTATCACGCCGCTGAATCAAATAATAGGCCGCTTTCGGTAAAATT +TCGCTAACCGGCAACATCACCAGCCCCTGTCCGTGCAAGGGATCGCAGCCCATTTCTTCA +GGCAGTTTGCTGAGAAAATCGCTTTTTGCCACCAGACTGATACAGGCTGAGAACGTCTCG +CAGACTACACCGACCTGTGGCGTTTGCGCCTGATCGTCAAGCAATTCACTCAACTGTTTG +TAGTAGCTGCCGTGTGGCGTCGGCATTGTCCAGCTGTAATCCAGTAACTGTTTGATCGAA +CGGGCACCAATGGCGGGGTGTCCCGGGCGGCAAAAGATCGCGAATTGCTTTTCCAGTAAT +TTCTCAAAAGTAAATTCGTGGTCGTACGGTCCCTGATAATAGGTATTGATGGTGAAATCC +AATTCTCCCTGACGCAATTCATTAATCATCGACACCAGTTGCCCTTCCATAATGCGTACT +TTTACCTGCGGATGCTGCTGATGAAAACGAGATATGACAGCTGGCATCAGACTGCGGGAA +ATACTGGCCCCCATGCCGATATTAATCTGCCCTGCCAGTTGCCCTTGTCGTTGGCGAATA +TCCTCTTGGGCTGCGCGCAGCTCTTCAAGAATTAGACTGGCGTGCTGATAAAAACTTTCA +CCGGCATCAGTTAACGTCACGCCTTTACTACGGCGAAAAAAGAGTTGCGCCGCTAACCCT +TCTTCTAGCTCCTGAATAGATTTACTCAGTGCCGGTTGCGACATATTCAACATTCGGCTC +GCTCCGCGAATGCTGCCCTGACGAGCCACTTCAACAAAAGCCCGAATTTGATGAATTTTT +ACCTGAAAAGCCATGACGCCACCGATAACCGTTATTTATCAGACCAAAGAAACTGGCATC +TACTTTAATGCAGATGATTGTGTCAGGGTAATTTATGAACGGTTAAAACTGTGAAAAATC +AGTTAGTGATAAGTAAAAACTATCGCTACGTGAACCGGGTCACACTTTTTACTGATGACG +GGAAAGGTTATGGAGTCTTTGAATCAATTTGTTAATTCGCTTGCCCCAAAATTATCGCAC +TGGCGACGTGATTTTCATCACTATGCAGAGTCTGGCTGGGTGGAATTCCGCACTGCCACC +CTTGTTGCGGAAGAATTGCACCAGCTCGGCTATTCACTGGCGCTGGGTCGCGAAGTAGTT +AATGAAAGTAGCCGGATGGGATTACCTGATGAATTCACTCTACAACGCGAATTCGAGCGC +GCTCGTCAACAGGGTGCGCTAGCACAATGGATTGCGGCTTTTGAAGGTGGTTTCACTGGT +ATCGTCGCCACCCTGGATACCGGTCGCCCCGGTCCGGTGATGGCTTTCCGTGTCGATATG +GACGCGCTGGATCTCAGTGAAGAGCAGGATGTCAGCCATCGCCCCTACCGCGACGGTTTT +GCGTCATGTAACGCCGGAATGATGCATGCCTGTGGTCATGATGGACATACCGCCATTGGG +CTTGGGCTGGCGCATACCCTTAAACAGTTCGAGTCCGGACTACATGGCGTCATCAAACTG +ATTTTTCAGCCTGCAGAGGAAGGTACGCGTGGCGCGCGGGCGATGGTCGATGCAGGTGTC +GTAGATGATGTTGATTATTTTACTGCCGTGCACATTGGCACTGGCGTACCTGCGGGCACC +GTGGTGTGCGGCAGTGATAATTTTATGGCAACCACCAAATTTGACGCGCACTTCACCGGT +ACCGCCGCTCACGCAGGCGCAAAACCAGAAGACGGTCACAATGCCTTGTTGGCGGCAGCA +CAAGCCACTCTTGCACTGCATGCAATCGCCCCGCACAGCGAAGGAGCTTCCAGAGTAAAC +GTGGGCGTTATGCAGGCAGGAAGCGGTCGTAACGTTGTTCCTGCCTCGGCGTTGCTGAAA +GTGGAAACACGCGGGGCCAGCGACGTCATTAATCAATATGTTTTTGACCGTGCACAACAA +GCGATTCAGGGCGCAGCAACCATGTATGGTGTCGGCGTTGAAACTCGTCTGATGGGTGCA +GCTACCGCCAGTTCTCCTTCGCCGCAATGGGTCGCATGGTTGCAAAGTCAGGCGGCTCAG +GTCGCGGGGGTCAATCAGGCCATTGAACGTGTTGAAGCGCCTGCGGGTTCCGAAGATGCC +ACATTAATGATGGCCCGCGTGCAGCAACATCAAGGGCAAGCCTCCTACGTGGTGTTTGGC +ACACAGCTGGCGGCAGGTCATCACAACGAAAAATTCGATTTTGACGAGCAGGTTCTCGCT +ATTGCCGTCGAAACGCTGGCGCGCACCGCGCTCAATTTTCCCTGGACGCGAGGTATCTGA +TGCAGGAAATCTATCGTTTTATCGACGATGCGATTGAAGCCGATCGCCAACGTTATACCG +ATATTGCCGATCAAATCTGGGATCATCCAGAAACACGTTTTGAAGAGTTCTGGTCAGCGG +AGCATCTGGCTTCGGCGCTGGAATCTGCAGGCTTCACCGTTACCCGCAACGTAGGCAATA +TCCCAAATGCCTTTATTGCTTCGTTTGGTCAAGGCAAACCGGTTATCGCCCTGCTGGGAG +AATATGACGCCCTGGCAGGTTTAAGTCAGCAAGCAGGTTGCGCGCAACCTACATCCGTGA +CGCCCGGTGAAAATGGTCACGGTTGCGGACACAATTTGCTGGGAACCGCCGCCTTTGCCG +CTGCAATAGCCGTCAAGAAATGGCTGGAACAATATGGGCAAGGCGGCACGGTGCGCTTTT +ATGGTTGTCCTGGCGAAGAAGGCGGCTCGGGTAAAACGTTCATGGTTCGCGAGGGGGTAT +TTGATGATGTGGATGCGGCACTCACCTGGCACCCGGAAGCCTTTGCCGGTATGTTCAATA +CCCGCACGCTGGCAAACATTCAGGCATCATGGCGCTTTAAAGGGATCGCAGCACATGCCG +CGAATTCCCCTCATTTGGGACGCAGCGCCCTTGATGCCGTAACGTTGATGACCACTGGCA +CCAACTTCCTCAACGAACATATTATTGAAAAAGCGCGCGTACACTATGCCATCACAAATA +GCGGCGGGATCTCGCCCAACGTGGTCCAGGCGCAGGCAGAAGTGCTTTATCTTATCCGCG +CCCCCGAAATGACCGACGTGCAGCATATTTATGATCGGGTCGCCAAAATCGCCGAAGGTG +CGGCATTGATGACCGAAACCACGGTTGAATGCCGCTTCGACAAAGCCTGTTCCAGTTATC +TCCCGAATCGCACCTTAGAAAATGCCATGTACCAGGCCCTATCCCATTTTGGTACCCCGG +AATGGAACTCCGAAGAACTGGCTTTTGCGAAACAAATTCAGGCTACGCTCACCTCCAACG +ATCGGCAAAACAGTCTGAATAATATCGCCGCAACCGGTGGCGAAAACGGCAAGGTTTTTG +CACTACGTCATCGTGAAACGGTACTGGCGAATGAAGTCGCTCCATATGCCGCCACCGATA +ACGTGCTTGCGGCATCGACTGATGTCGGCGACGTCAGTTGGAAACTGCCTGTTGCCCAGT +GTTTCAGCCCCTGTTTTGCCGTCGGTACACCGCTACATACGTGGCAACTGGTTAGCCAGG +GGCGAACATCTATTGCTCATAAAGGAATGCTGCTGGCGGCGAAAACTATGGCAGCAACCA +CAGTCAATCTCTTCCTTGATTCAGGGCTATTGCAAGAATGCCAACAAGAGCATCAGCAAG +TAACGGACACGCAACCGTATCACTGCCCTATCCCGAAAAACGTGACACCGTCACCTTTAA +AATAACAACAACAACGCAAACACAACAACCGAGGAATGCCCATGAGTATGTCATCCATAC +CGTCGTCCTCCCAATCCGGGAAGCTCTATGGCTGGGTCGAAAGAATTGGTAACAAGGTTC +CCCATCCTTTTCTGCTCTTTATCTATTTGATTATCGTACTCATGGTGACGACGGCAATTT +TGTCGGCCTTTGGCGTCAGTGCGAAAAACCCGACCGATGGTACGCCGGTCGTGGTGAAAA +ACCTGCTCAGTGTGGAAGGATTACACTGGTTTTTACCCAATGTTATTAAAAACTTTAGCG +GTTTTGCTCCACTTGGTGCGATCCTGGCGCTGGTTTTAGGTGCCGGTCTGGCGGAGCGCG +TCGGCTTACTGCCAGCACTAATGGTTAAAATGGCATCGCATGTTAATGCCCGCTACGCCA +GTTATATGGTGCTGTTTATTGCTTTTTTCAGCCACATTTCTTCCGATGCGGCGTTAGTGA +TCATGCCACCGATGGGTGCGCTGATTTTTCTGGCGGTGGGCAGGCATCCAGTTGCAGGTT +TACTGGCTGCCATTGCAGGCGTAGGTTGCGGCTTTACGGCTAATTTACTGATTGTCACAA +CCGACGTGTTGCTGTCGGGGATCAGCACGGAAGCGGCAGCTGCGTTCAATCCGCAAATGC +ACGTCAGTGTAATTGATAACTGGTATTTTATGGCCAGCTCCGTAGTCGTACTGACGATTG +TTGGCGGCCTGATAACCGACAAAATCATCGAGCCACGGTTAGGTCAATGGCAGGGAAACA +GCGATGAGAAACTGCAGACATTGACCGAAAGTCAGCGTTTTGGTTTACGCATAGCAGGTG +TCGTATCGCTACTTTTTATTGCTGCGATTGCGCTGATGGTGATCCCGCAAAACGGGATAT +TGCGCGATCCGATTAATCACACCGTGATGCCATCACCCTTTATTAAAGGTATCGTGCCAC +TGATCATTCTTTTTTTCTTTGTTGTCTCGCTGGCTTATGGCATCGCTACCCGCACAATTC +GACGTCAGGCGGATTTACCGCATTTAATGATTGAACCGATGAAAGAGATGGCGGGATTTA +TCGTGATGGTTTTTCCCCTCGCCCAATTTGTCGCCATGTTTAACTGGAGCAACATGGGGA +AATTCATCGCCGTGGGGCTGACCGATATACTGGAAAGTTCAGGGCTTAGCGGCATCCCGG +CGTTTGTCGGTCTGGCGTTGCTTTCCTCTTTCTTATGCATGTTTATTGCCAGCGGTTCCG +CAATCTGGTCGATTCTCGCCCCCATTTTCGTACCAATGTTTATGCTACTTGGCTTTCACC +CGGCATTTGCGCAAATCCTCTTTCGTATTGCCGACTCATCCGTATTGCCTTTAGCGCCGG +TATCTCCTTTTGTTCCACTGTTTCTTGGATTCCTGCAACGCTACAAACCAGACGCGAAAC +TGGGTACTTACTATTCGTTAGTCTTGCCCTATCCACTTATCTTTTTGGTGGTATGGCTGC +TGATGTTGCTGGCGTGGTATCTTGTCGGTCTGCCGATAGGTCCGGGTATTTACCCACGTT +TGTCTTAAGAGAGAACGGATGCTGAGATTACTTGAAGAAAAAATTGCCACGCCACTGGGT +CCACTGTGGGTGATTTGCGATGAGCAATTTCGCCTGCGGGCGGTTGAATGGGAAGAGTAC +AGCGAACGCATGGTGCAGCTGCTGGACATCCATTATCGCAAAGAAGGCTATGAGCGCATT +TCTGCCACCAATCCAGGCGGTTTAAGCGACAAGCTTCGTGAATATTTTGCCGGTAATCTT +AGCATTATTGATACGCTTCCCACTGCTACGGGGGGGACGCCATTTCAGCGCGAAGTCTGG +AAAACACTACGCACTATCCCCTGCGGGCAGGTAATGCATTACGGCCAACTGGCTGAGCAA +TTGGGCCGTCCTGGCGCGGCGCGTGCCGTTGGTGCGGCAAACGGATCGAATCCCATCAGC +ATCGTCGTACCTTGCCATCGGGTTATTGGCCGAAACGGCACCATGACCGGATATGCAGGC +GGAGTTCAGCGAAAAGAGTGGTTATTGCGCCATGAAGGTTATCTTTTGCTGTAAACATTA +AACAATTTGTGCCAGCTTGTTCACACTTTTATGTAAAGTTACCCTTAACAACTTAAGGGT +TTTCAAATAGATAGACATATATTTACATCTAATATCGGAATTCTCTGCTGTTAAGGTTTG +CTTAGGGAAGGTGCGAATAAGCGGGGAAATTCTTCTCGGCTGACTCAGTCATTTCATTTC +TTCATGTTTGAGCCGATTTTTTCTCCCGTAAATGCCTTGAATCAGCCTATTTAGACCGTT +TCTTCGCCATTTAAGGCGTTATCCCCAGTTCACAATATAGTTAAATGCGATGTTTTTGAC +GGTGTTTTCCGCGTTACCAGCAGCGTTAACGGTGATGGTGTGTCCATGTGAACCAATCGC +AACGGAGTGCGTATGAGCACCAATACCGACAGTATGCGCGTGTGCACCTGCGCTTGCAGC +AGTGCCGGACAGTGAGTGGGTATGTGCGCCAGCAGATGATGTTGCATAGTTTTGATTATG +CACAACAGACAATCTTGTTGATGCACTACCAGCACCGGAGTTAGCACTAGCCGTGTTCAC +GTTGGCTAGTGAGTGTGTGTGTGCTCCAGCCGAGTTTGTAGAGCCGCTCACACTGTGTGT +ATGTGCCCCGGTGTTATTCGTGGATTTAGTGCCGTAATCAAACGACGATGTGGTTTTCGT +CCCCAAATCTGTACTGGATGCGCTGGCGCTGTGGGTATGCGATTTAATGCCGTCCTGTTC +CTGAGACAATACGGCCCGACCACTGGCAGGTTTGCCCTTAATCGTCCAGCCACGCATATC +AGGGATCACGCCTGACGGATAAGCGGCTGCAAGTTTCGGGTAAGCAGATTTGTCAAAAGC +CTGCCCCTGCATCAGGGCATAACCAGACGGAACGGTATCTGATGGCCACGGGATTGGTGC +GCCGACTGGGTAGCTTTCTGGTGGAAGATTTTTCGAGGTATAAACTTCTGCCCAGTCTTC +CTCAAAACCATAACCGTCTCTTGAAGAACGGTAGAACAGACCACCATTTCTGTAATGCGC +CTTCATCTGCAGGGTCCGGCAACTTCCGACTCCGGTATAGAAGTTAACCAGAATATAGCT +GTCGCCAGAGCGGGTGACATTGTAAGCGCCAGATTCGGCATTCCATGGAACGCCACCATC +CGCATCGGCATATGTATCCGTTGCCCTTCTGGCAAAAGCAGCCACATGCGCGGCGGTTAA +AGTAATATCTTTGGAACCATCAAACTCAACACCAGAAACCCGTCTTGGCGTTTGCAGCTT +TGTTGCTGTTAATGCATTACCGTTCAGACTTGCGGACAGTTTGGTTCCAATAACCAGTTC +GCCGGTTGCGTTATCAATAGCAAACGGTCTTAATGTATTCCAGCCACCATAAACATCACC +TTGATTGGTAAGCAGCAGGTAAGTTTTAGCGCCATCATTACGCCATAATGCACCATACTC +CCCACCTATCATTCGAATCTGATTACCACCACGCGCTACAATTTCGTCTGTGGCAAAAAG +TTTTTTGCACGACAAGTTATCGTTAACGATTAACGAATGAGACTCATAAAAACCACGCCC +ACTCTTAAAATCAAGGATAACGTCCGCCGCGATACATTCAGTCGCCGGATTTGTTGCCCC +AAACTTATAGGTCGTATCATTAACAACGAGATCAGCACCAGGTGCGGATATTGACAGGCC +ATCTTCAATAAACGCAAAAACAGGGAAAGCAGCGCCATCAACATAGAACACAGAGCGCAA +ATCATCGCCCTTATTACTCATCATTATTGAGTGAATGGCTCGTTCATTGTTTTGATATTG +CCAGAACATGCCATAAGCATAACGCCCCCTGTCAGTCCAGCCACCAGGCATAACAAATCC +GTTAAACTCGCAGTTATTCATCGGATCGCCTGCGGTTCGCGTTGCCGTGGTGATAATGAC +TCTTGATGCCAGTTCGCTTACTGAGCCAGCAGAACGCATAACAACAACAGGGTAATATTT +TCCAGATGTTGCACCTGCAGGAGCGTTAACCCGCACATAACGCATACCACGCTTATCAGC +AAAGTCTGTTTTACTGACCGCGTTAATGTTGTTCAGGAAGCATCCCTTATCGGGTATATC +AGCGCCGTTCTGGTCTTTCTGCAGACGTTTCTCTGCATTGTCATAGGCTGATTTTACTGC +CTTTGGCGTTGCCGCCAGCGTTTCAGACGTACTGTTGGTCGCACTGCTGAGCTGTACTAT +CCCCTTTTTCGTCGTACTTGCATCCTCAAGCGCCACGGCGGATGCAATATCCTCTGCCCG +TTTAGCTGCTGTCTCGGCGCGCGTTGCCGCGGATTCCGCCGTACTTTTGCTCTGAGCTGC +CGCCGTCGCACTGCCAGCAGCCTCTGTCGCCTTCGTGGATGCCGTCGTGGCGCTGCTCTT +CGCTGCTGACGCTTGTCTGGTCGCCTCATCTTTTGAAGCAGACGCCGATGATGCCGATGA +CGCCGCCGAACTGGCTGACGATGCGGCAGCCGTTTTTGAGGATTCTGCGCTTGTTTCCGA +CGCTTTCGCGTTCGTTTCGGATGTCTTCGCTGCGGAAGCAGACCTCGCTGCTGCGCTGGC +CTGTTCAGTGGCTTCGCCAGCCTTCGTTGTGGCTGTTGAAGCAGACGATGCGGCGCTTTC +TGCCGATTTTCCGGCGGCGGTGGCACTGGCTGAGGCCTGCCCGGCACTTGTTGACGCTGC +ACTGGCAGACGACGCAGCCGCTGTTTTTGAGCCTGCCGCAGCCGAGGCGCTCTGTCCCGC +TGCCGTTTCAGAAGACCTGGCGTTCGTCTCGGACGTTTTTGCCGCCTTCGCGGAATTTCC +TGCCGCCGTTGCCGAGGAAGCTGCACTACTGGCGCTTGATGATGCGTTCGTTTCTGATGA +TTTTGCCGCTTCTTTTGAGGCCGCCGCATCCCGGGCCGAGGTCGCAGCTTCTGATGCCTT +CGTGGTCGCGGTGGATGCAGATGTGGCTGCTGATTGTAGTGACGCTGAAGCATTCGTTTC +TGACGTTTTCGCCGCACCGGCACTGGTGGCCGCCGCGCTTTTTGAGGACTCTGCAGCGGC +AGCACTTTTTGATGCTTCAGTGGCCTTTGTTGATGCCGTTCCTGCGCTGGAAGACGCTGA +CTGAGCCGACGACGCGGCCTGTCCGGCTGACGTGCTGGCTGCGCGTGCTGAGTCCGCAGC +ATCAGCCGCATGGGTTGCCGCCTCACGGGCTGATGTGCTGGCATCACTGGCTGT +>NODE_17_length_39819_cov_63.5061_ID_33 +TTCAGCTTTCTTCGTTTCATTCCATTCCAATAAAATTTTATGATTTTCTTCAGGAAGAGT +AATGTTTATTTTTCCAATCGGCTCGTCTTTATGATAATTATTTATCACTAATTCTAATAG +ATTCATAAATCTTTCTTTATGCAATGCTAGTTCCGCGCCGTTATATACTTCCGGATTCGC +ATCAAAATGAATCATTAACTCATTTTCATCGAAGCGCTTATATACATTAATCGATAAGTC +ATCGACAGGTCCTGCTGATAAATTATGTGTAATACCACGATTTCCAGCAAAGTTAAGCCC +GTAATCAAAAGGCATAACATTCACTAACGGCCCAAACAACCTTTGATTTTCACCTAGTAA +CTTTAAGTCTCTTCTTAATTCCTCATGACGATATTTATGATGACGACGTACGTCTCGAAT +TTCTTTAGAAACTTGCTGTATTAATTCTACAAAAGTTATATTTGGAGTTAACTTTAGACG +AAGTGGAACTAAATTCATTACCATGCTAGGTGTATGAATAGATACAGAACCAAGACGCCC +CATCATCGGTAAACCTAAAACAATATCATTTGCACCTGTTAATTTATGCATATAAATACT +TGTTACAGCTACAACAAATTCGGGCCAACTCGTTAACGAAATATTAATATCTTCTAGTAA +AGTTTTCGTACTAGCATTTGATAAATAAGCAGTTTCTCGTAAAAATCCATTTGATGTTCT +CGGCGCTTGTTCTGCTAAACTTATAACTTCCGGTTCATCTGCAAACTTTTCTAGCCAAAA +AGTACGATTTTCCTGAAATTGTTTAGATTCACGATACTCATTATCTTCTTGTACAATTTT +GGCAAGAGAACCAAATGGCTTTTCATTCTTATTTGTTTCTTCTATAAGTGATGTATACTC +ATTCGCTACCTTTTGGCTAAGAAGTGAAAATCCATAGCCATCCATCACAATGTGATGGAT +GCGCTGATACCAAAAGAAACGATTATTTTCAACTTGAATAAGTGCTTCAGTAAATAATTT +ATCTTCTTTTAAATCTATCGGTACAGATAAATCATTTTTCATCCATACTTTAGCAGCTTC +TTCAGGATTTCCCTCTTTACGAACGTCAATAAAATGCATATGAAACGTTGACGATTCCTC +AATAACTTGCCATGGGCCAATTTCATCCTCTTCGAAACGAACATGAAGTGCTTCTGCTTC +TTTTACCACTTTACGTACAGCTAACTCGAAAATTTCATGATTAATATTTCCGTTTATTTC +TATATATTCCCCAGTATTATAAATCGGATTAAGCGGATCTAATTGTTGCGCAAACCACAT +ACCAGACTGCGCAGATGATAACGAATGGCGAATCTTTTGACTATTAGGCATTCTTTACTA +CCCCTTTATTATGTAGTCTATTTGTTACAGCACTTGTGCTGCTTGTGAAGATAGTAAACG +GTACCAATCATCAACAGTTGGACGTTCTGCTAAATCTGCAAAAGTGATTTCTTTCCCTTC +GCGGCGCCACTTCTCTACTAAACTCATAATTCTGACCGAATCAAGTCCTCTATTTAATAA +GTCTTCATCAACTTCAATACTCTCTACTGGCTCACGGAGTAGTTGTGCAACTAGTTCATG +TACTTTCTGTAAAGTGATTCCTTCACTCTCATCGCTTTTTACACTTTGTAAATCTTTTAA +TAGTAAGTTTGTTGATGTCGTTACTGCACATCTATTAGATGCATACTGCAATGCTTGTTT +ATGATGCTCTAGCGAAAAATCAGCAACTGCATCTGCTACAAAAAATGGCTCTATACCATC +CATAAATGCTTCACAAGCTGTTAAAAGGCAGCCAATATGCGCATAAATACCGCAAATAAT +AAGTTGATCTCTTCCCTGCTCATTTAAAATTTCTAATAGATTCGTCTTTTTAAATGCACT +ATATCTCCATTTGGTTAGGAATATATCATCCTCATCTGGAGTAAGCTCATCGACAATTTT +CTTTTTATCTGGTCCAGCAGGAATACCGTCACCCCAAAAGTCTTGTAATAACCCTCGTTG +TTCTAACGTTTGTCCACCAGGTTGTGCTGTATAAACAACTGGTATACCAAGTTCCTTGCA +TCTTTCTCTTATCACCTTAATATTTGAAATTAGTTCTACTTTTGGCGATTCTTTATCGCT +ATATGCATCAAGAAAATATTCTTGCATGTCATGGATTAGAAGAACCGCACGTTTCGGATC +TGGCGTCCAATTCACTTTATTTTTTGGTAGTTCTGATTCAATTGGCATTTTATATACTGA +AATAGATGGGATAGCCATCTAATCACTTCCTTCCGATTTTTATTATTGTTTTACTGTAAT +AAGTTTTTCAGCAATGACTTTACGTAATTCTTTTTTGCTTACTTTTCCAACACCTGTTTG +TGGGAATGATTCAATAAATTCAATTCGATCTGGAATCTTATAAGCTGCTATACCACGTTC +TCTTAAGAACATTTTTAATTCGTTTGCAGTTAGGACTTGTCCACGAGCGATAACAAAAGC +GCAAGTGCGTTCTCCTAAATAATCGTCAGGCATAGATACAATTGCTACATCATGTACTGC +ATCATGTGCTAATAGATGATTTTCAACCTCTTCCGCAGCAACCTTCTCACCGCCACGATT +AATTTGATCTTTATCTCTTCCTTCTACAATGATGTAACCTTGTTCATTTACTTTCACAAG +ATCTCCTGTACGATAAAATCCATCCTTTGTAAATGATCGTGCATTATGCTCTTCCGCTTT +ATAATAACCACGAATTGTATATGGCCCTCTTGTTAATAAACTTCCTACTTCACCGGGTTT +TACATCGTTATCATTTTCATCAACAACCCTTACTTCATCGAATGTAGACATCGGTCTACC +TTGCGTATGAATAATGATTTCTTCAGGATCATTTAATCTTGTGTAATTTACTAACCCTTC +TGCCATACCGAACACTTGCTGTAACGTACATCCAAATGTAGGGCGAATACGCTTCGCAAC +TTCGGCACTAAATTTCGCACCTCCTACTTGAATAACTTGCAGGCTCGATAAATCGTCGTT +ACGGGAAGATGCTGCATCAAGCCAAATCATTGCTAATGGTGGAACGAGCGCTGTAATCGT +AACTTTTTCTTTTTCTATAAGAGCAAATGCCTCATCTGGACTACCTCCAGTTGCCAATAC +TACTTTTCCACCCGCATAAAAAGTTCCAAATGTCCCTGGAGAACTCATCGGGTAATTGTG +TGCTACTGGAAGAACTGCCATATAGACACTTTCTGCATTTAAATTACAAATGTCAGCGCT +AACACGTAAACTATAAATATAGTCATCATGTGTTCTAGGAATTAATTTAGAAAGTCCTGT +CGTCCCTCCTGATAATTGGAGAAACGCAACATCACTTGGCTGAACTTCTGGTAACGGAAT +CGGATCCATATAAAGATCAGTTATGTTCACGAATTCTTCTTCTTCCCCCACTACGATTAC +ATGTTGTAAAGCGGGAACTTTCTCTTTCACTTCTCTTGCTAGTTTTCGGTAATCAAAGCC +GAGAGCCTTATCTGAAATAATGTAAGCACTCGCCTCACCGAACTCGCAAAAATAACTAAT +TTCACTACTGCGATGAGAAGGTAGTGCAAAAACAGGAAGTGCTCCAATGCGAAATAGCGC +AAAGCATATTTCGAAAAACTCTATAATATTAGGTAACTGGATTACAACTCGGTCCTCTTT +CTTTATTCCTAAATTCAGTAAACCTGAAGCTAAGCGATCTACCTTTTTATCAAGTTCACT +ATACGTTATATGCGTATTACTACTTACAACCGCAATTTGATCCCCATATTTTTCAGCACG +TTCTTTTAACATCGAACCAAATGTTTCTCCAAGCCAACACCCTTCTTCTCGATAGCGATT +GGCAAATTCTTTTGGCCATTCCGTATAACCTATTAACATTCTTTCTTCCCCCTATTTTTC +ATTCAGCAAATTATCTCTTAACCCTAAAGCCTTCAACATTGTTTGGAACTTAGCTGATGT +TTCTGCTAACTCATCTTCTGGCTTTGATTCAGCAACAACTCCAGCACCTGCATATAAGCG +AAGTGTATTCTCTTGCACTTCAGCACAACGAATTGTAACAATCCATTCGCCATCTCCATC +TAAATCGCTCCACCCTAACATTCCTGTAAAGAATTCACGGTCAAATGGCTCAATTTTTTG +AATAGCCTCTCTCGCTTCTTCCATTGGAGTTCCGCAAACTGCTGGCGTAGGATGAAGAGC +AATTGCTAATTCTAAAGAAGACGTATTTGGGTCCTTTAGTTCACCTTTCACTTCCGTAGA +CAAATGCCACATTGCTTCACTATGAATAACTGAGGGTTTTTCTGGAACATGTAATATATG +ACAGTAAGGGCGAAGCGCAGCGGCAACCGCTTCAACTACTACCGCATGTTCATGTAAATC +TTTTGGAGAAGAAAGTAATTCTTCTGCCCTTCTTTTATCTTCTACTGGATCCTCACTTCG +TGGTCTTGAGCCAGCCAACGGATTAGAAATTACTTGCATACCATGACGTGAAACAAGTAA +TTCAGGACTTGCTCCAATTAACGTCTTACTGTACTCGTTTTCATCTTTCGGCAAATTAAC +AGCAAATGTGTAGCCGTGCTTATTATGTTCCGCTAATTCACGAAGTAGTTTTTGTTGATC +AATTTTCTCGGAAGACTTAACATCCAATGATCTAGATAGAACGATTTTCCTTAAATCTCC +GTCCTGAATTTTTGCTATTCCTTGCTTCACACCATCCATATAAACTTCAGGAGCTGGAAC +CGGTGTCATTTCAAATGTCAGTTTCTCATTTCGTTTTATCTCATTTGTCGTCTCTAACTG +TAAACGCTCAGTAATTCTACTATGTTCTGGTACGATAAGTTGAACTTCTTTTCTACGATC +AAAGGGCAAAGCACCAACAGCGATAGGATTCGGATTTCCGGCTTGTTTTGCATTACTTAA +TACCGCTTGTACAAGTTCTGGAAAGCTTTCAATTTCACGATGTTTTACTGTAGTAAACTC +TCCTTCTGCTAATATTGTTCGAGTTGGTGAAGCGAAAAAGAATGAAGATTCTGTCTTATA +ATCTTCTAAAAGTTTTTCTGACAGTTCCTTTACGGCTGTAAGTTCATTCATAGTATAATA +ACCTCCTGAATTTTTTATTAAACTCCTAATGTAGCTCCGCCATCGACACATAAATTGTGC +ATTGTAATATGACTCGCTTTATCAGAAGCTAAAAACAATACTGCTTCAGCAATTTCTGAA +GGTTGTGCAATTTTTTGTAACGGAATACCGAGTCTATATGTATTTTGAGAACCAGCAATT +ATATTTTCAGCTCCATTCTCATCAGCCCATAGTAATCTTTGCATTTCAGTTTCAGTAGAA +CCAGGAGAAACTAAGTTGCAGCGAATATTGTATGCTGCAAGCTCTAACCCTAAACACTTC +ATAAACATTGTCGTTGCGGCTTTTGATGCAGCGTACGCAGCCATTTCCATTCTCGGAGTA +TTTGCCGCATTTGAACCAACTGTAACAATTGCCCCTGACTTTCTTTGCATCATATGTTTA +CTTACTGCTCGAGACATATAGAAAACTCCTGTAGAATTTACAGAGAAGGTTTTATTCCAA +TCTTCATCACTTAAAGAGTGAATTGGTCCCATACGTAATATCCCTGCAACATTTATTAAT +ATATCTATTGGTGCTATATCATTTTCAATACCGTTTACCATATCTTCAACAGCGGTACTA +TCACTCACATCTAAATGAAATATTTTCATACGTGTTTCATTTAATTCATTTTCATTTAAG +AGTACGTTTAACCCCTCTTCATTTTGATCAACCGCAATAACTGTAGCTCCTCTTTCTAAA +AACATTTTGGCAACAACACTGCCTATACCTTGAGCTGCACCTGTTACTAAAACCGTTTTT +CCATCAAATTCCCCTAAGTTCATTTCCTCTATCCTTTCATTTGAAAATTAATCTAATTAA +ACAAATAGCAATAATGATAACGATTATCACTATCAATGTAAAAAATAATACACTTACTAA +ACTAATTGGTCAAGTGTACTTTTCATCAAAATATTATTCATTGATAATGTTTTTCATTTA +ATATTCTTACATCGAAATGATAATCTTTCTCAACAAACTTGTCAACCTATTTTTATTTTT +CAGATAAAATATTTTAGTACTTCTCAATACAATTACATCTAAAAGCATTAATAAGTACTT +ACATTAAAAAATTTAATTCGTACTCCTCTTTTTAAAATTGAAGAAGATAAACTTTAGATA +CCTTCATTTACAAAGCCTTACCAAAAACTCAAGTATATATTTTCTCAACATTTTTCTGAA +AATATCAAGAAAGTATATCTTGCAATTACCGATGCATGTCGTGTTTATAATTTAAGATTT +AATAAGTATTTCGAATATAATATATCAATTTATGATATTCCAATATAATCCAAACGGATA +CTTTACTCATTTTTTTGCCTCTAGAAAATTCCACATGTTGTAATTCATAAAAAAGCACCC +TATTAAATAGGGTGCTTTTTAGTAAACATAGTGTTAAAGAAGATTTATATACTTACGCTA +CACTGCGATACTCTTCTTCTTTTTCTTTTTTCTGCTTAGGCACTTGGATTAAAAGTGCGA +TAATAAATGATACGACACATAATACGCCGATCACCATGAAAGTTGGTTTGAATCCACCTA +GAACTGCACCGATAAAGGAACCAGCGAGCGCTCCAAATCCAAATCCTTGATACACAATTC +CGTAGTTCTTACTATGGTTTTTCATACCGTAGAAATCACCAACGATAGCTGGGAAAATAG +TGATATTTCCACCAAAGCAAAACGCTACACTTGCTACACATACGAAGTAGATACCATAAT +TTAAATCTACGAAACTTAAAACCAAGACTGACGCTGCCATAACAACAAATGTACCAGTAA +CGATTTTTAAACGGCCGATTTTATCTGATAACGGTCCTAAAATAATACGACCTAATGTAT +TGAAGATTGCAACCATAGCCACCGCATTAGCTGCTGTCGCTGCGCTAAGCCCTACAAGTT +GAACACCAATGTCTTTTACCATACCAATTAAGTATAAGCCACTCATACATGATGTAAATA +ACATAATAAATAATAAGTATACTTGTTTTGTGCCCAACATTTCTTTCGTTGTATATTCCT +GAGTTTTTGTTTCATGAACTGCTCCTTGATCTGCAGCTTGATGGATTAAACAAGCGCCAA +GGACAATCATAGCTGTAACAATTAAACCCCAGTATATAAACGCTTGTGATACACCAACTG +ATTCAATTAACTGTGCGTTAATATATTTAAAAATTAAGCTACCTGAACCATATGCAGAGA +CAGAAATACCAGCAATTAAACCTTTACGCTTTGGAAACCACTTTATTAAATTGGATAATG +AAGTGATATATGCTGTACCATCTGCATAACCTACAACAACCCCTGCTAGTACATAAAGCA +ATATTAATGAGGAAGCTTGTGAACTAAGTATTAATCCAATTCCTAATGCTAGTCCAGCTA +TCATAATAAGTTTACGAAGTCCCCATTTTTCTTGCAATTTACTCGCAAACAAAGTTGAAA +ACGCTAAAGACAGGCTAGTAATTGAGAAAGTTATAGCAACAGCGTTAAGACTCCAACCGT +ATTTACTCACTAAGGGCTGATTGAATAAACTCCATGTATATATCGTACCAAGCCCCATTT +GTACTATGACTGTTCCAAGGACAACAAGCCATGGATTAACCGATGATTTTTTCATACCAT +CTCTCCTCCTCTTTCTATTTCAATGCTATTCGCTAAATATGCTTTTTTTATTTTTATTTT +CGCCACCTCATTCGACTTAACAACTTCATTGTATAGGATGTTTTCGCTTATGTAAGCACT +TACAAGATGGAGCGTCAAAAATATGGAATCAGTTACAGAAAACGAAGGGTGAAATACAAA +AAACTACTTGTTAAATACGTAGTAGTTTTTATAATGTTTTACATCAATACGACTTATGTG +TGTTTTCATATCCTCTTTTATACAAAGTTATGCGCCCTGTTACACCATGGCTGGACTTCA +AAATTTTGATTATCCTATATAAACAAAAAAACACCATATTCTTTTTAACTCCCCAGCTTT +ATAACATGTTGCGTCTATATTTCTTAATACAAATCATTTAATAGTCTTGTAATCTGGCTT +CTCGTTACAATGCTTGTAATCAAAAATCCCACCATTTCCACTTCCGTATATCTTGTTTTT +CTAACGAAACATACAAAAGTACAACGTTTTTTACAATTTTCTACATAAAAACAGAAAATA +TATAATTCATTTACATAATTTTGCACTTTAATTTTTATTAAACAACAAATATTTTATTGA +TAAATAAAACTAAACATTCACTAATATATCTATACGTGGTTTAATATATATAATAAACAA +ACATTTCTGACTTTTATAAGGAGGTACTATGAAAGCAAACGTAGGCGATACTATACTATT +TCAACGAAATAACTTAAAGATCACAGCATCTGTACTAAAACTATACACTGAATCGGTCTT +AGTTGAAGTCACAGATGTAAGCGGTGGTACTTTTGAATTTGACCGAACAATTGTAAATCA +TAAAAACTATAAAATTTTAAATGCAAATAAATAAAACAATACAATACAAAATAGCCCCTG +CTTAACAGCGGAGGCTATTTTGTATAATAACAATTCATTAATTTACATATCCTAAAACAC +TAACAATCTTGCTTGCTTTTAAATTATTAATATAATAATAAATTTCTAACCCTCTTCTCT +CAGCTCTTAGCACTTTACCTTTCATTTTTGATAAATGCTGCGAAACAGTCGATTGAGGAA +TATTTAATATTTCTGTTAATTGTGTTACATTACAAGTTTTGCGTGCACTTAATTCATTTA +CAATTTGTAATCGTATTGGATGCGCCATTATTTTTAAAATATCCACATCTTCTTCAGTTA +CTAAACTTTTTTTTATCTCGCTCGTATATGTTGTCATAAGACATTCCCCTTTTATTTGAA +TTCCTTTTTATTAAAATCTGCTTCTAAATAAAATATTATCTCTAAATATACAATTAAGAC +CCTTATATAGAAATAATAGGATTTTGATTATTTAAAAACAGCAATACTATACAATTTCAT +CTATTTGGTACATATACATAATACAATTTCAATTTGTGCTCTATGTGAAGTTAGCATAAA +TTTATAATGAAGATTTTTCAAACAATTCACATCTAACTATATACTTCCTCATAACACTAT +CCAGTGATTTTAACATATTTTAGGCGTTTGTATATACTATTGCAAAACACATTAATATTG +AGGGGGACTTTATATGCCAACACCGTCACAAGAACAAGTTGAAGAAGCTCGTGAAACATA +TGTCAGTCGTATTATTTTAGTTGGTGGTTCAGCTTTATTTTTAACAGGTGGTGTTATATC +TGCAATTTCAGCCTTTAAAAGTTATAATCGTTTGGCAAATACGCCACCTTCTTCAAATGA +TTCATAATTGCATCATTCATTCCTTTAAATAATAAAGATGCCGCCTCTACTATAGAGACG +GCTTTATATAAAAACATTATTAAATTCATTTGAACTTAATTGAAAAAACGTTATAACAAT +TTGGTACAATGTTCAAAGTAAGGATAGATACTTTCAATTTGACAAGCTTATTTCACCACA +AACCTCGTCTGACTACACCCATCCTTCTGCTTCGTTACTTCTAATACAGCTGGCATTGCA +TTTTTCAGTTCTTGAACGTGTGAAATTACACCGATAAAGCGACCTGATTTTTGCAAGTCG +ATTAGGGCGTCAACTGCTTTCGTTAATGACTCTTCATCTAATGAGCCGAATCCTTCATCG +ATAAACATCGTTTCGATGGAAATACCACCCTCATACGCTTGAATTACATCTGCCATTCCA +AGTGCTAAGCAAAGTGATGCGTTAAATTTCTCACCGCCAGATAATGTTTTTACATCTCGG +GTTTGACCTGTATATGCATCATATACATCTAATCCTAATCCACTTTGTCGATTCCTCTTT +TCAACTCGTTCGCTTCGTTTTAAATAAAATTGTCCATTTGATAATTTACGTAGTCGTTCG +TTCGCAATTTGAACAATTTGTTCTAAATACTCAATTAAGATGTAACGTTCAAAGGATATA +CGACTATCGTTATCCCCTTTCATTACTTCATATAAATCAACAAGTTCTTGGAATGCTTTT +TCTTCCTCATGAATTTGTTCATCAATTCGTTTAATATTTTCATGTAAATCCGTAATATAT +GATACGGCGTTTTGAGCACGTTGTCGTTTTTCTTTAATAATATCAAGGCTAATTTCCAAT +TCTTTCACTTGCTCATCTAATGAAGAAATGTCCATATACTCTTTATCTTTTAAGTCTGCT +TTTAATTCTTCAATTTGTTTTGTAAGTACTTCTAGAGATGAGTAATAACTTTGGATTTCT +TGTTGTAACTTATCCATCTCAGCATCATTTAATTTGGATTCTTTATATGTTAATTGATCT +GTAAATCCGCTTTGTTCTAGCTCTTTCATAAAGCGTGTGAACGTCTCTTCTTTCTTCTCT +TTTGCACTATTAAATTGATTAGAAGCATTATCATGTTCTGCTTGAATACGTATATTTTCA +TTTTGCCAATGTTGATACGCCTCTTGTACTTTCTCCCATTCATCTTCCATTAACCTAAGT +TCTTGCAGAGCTTGGTCAAACTGTACTTTCCAAGCTTGTACCGTTTGCAAGCTTTCCGGA +ATATTTTGCTTATCATGTTCATATGACGTACGAAGCTGCATACACTCCATTTCAGTACGG +TGCTGCACAGTTTCTACTTCACGCTTTTGTTTCTGGAGTTCTTCTACTTTTTCTTCTACG +CTTTTTATATTCACAGCAATTTGCTTACGCGTTTCTTCACTTGCTTTTAACGCATTAACT +TCTGCTGCTAATTGCTTTCCTTTTTGAACGAGCGCACTATATGTTTCTACTAATTCTTCT +GAGCGATAGCCTCGCTTTAGCACTTCTTCTATCACTTGTTCATATTGAACTCGATAGAAA +TTCCACTTTTCTTCTAATTGAACATGTAATTTTTCCGCAATAGTTTTCTTTTCTCTTAGC +TCATTTAACTCTTTTTCATCAATTGCATTACTTTGCTCTGTAGCTTTTTTCGGGTGGTCC +ATACTACCACATACTGGACAAGATTCACCATCATGTAAATGAAGAGCTAATATACCAGCT +TGTTCACTTAACCAGCGGCGTTCCATATTTTCATATGTACTAACTGCCTGTTGCATCTTA +TGAAACGCACTTTCTTTTTCTAGCTCGTATTTTTGTTTTTCTTGCCATACATCATATGCC +TGCTTTAATACTTTTGCATCTTCTCGCATATTCGTTAGTTCTTCTACTTTAGCTACATAT +TGTTCAAGCGCTGCTTCTAATTGCTGTAATTCGCCAGACATTCTTTGTTTTTCATTTTTA +TACGCTTCTAGTTGTTCATCTAACTTTTGCATACCTTCTTTTAATTTTCCTATGTGAATT +TCAGCATTTTGCAAATTTAACTTTTTCTCAGCTAATGATGCAATAATTGGCTGTAACTCT +TCTAGTCTTTGAACTTGTTTTTTAGCATTTTCTCGCTCGGATGCCTTATTCTTTAATGCT +TCATATTTCTCCTGAGCAAGTGCAAAACTGTTCATTATATGCTCTTTTTTTGCGATTATC +TGTTTTAACAAACTTTCAGACTGCTGTTCATATTGCATCGCTTCCTCATGCCATTGCTCA +AATGGTAATAATCGCTTCGCCTGTTCCGCATGTTTAAAAGATTTTTCTTTCATTTCGATT +ACGGCACGGTTTTCTTGTAAAATACTGTACTTCTCATTCTTTTGTTCTAAATCTTTGAAT +TTCTCATTTACAGCTTTTGCTGCATGAAAACGTGCCTCTGCTTCTTTTAATTGTTTCGTT +TTGTCATTTTGTTCTAAGTGTAATTGCTCAACCTCTGCATTGTACCAATTTGTTTCTTGT +TCTAATGCCTCTACTACTTGATGTGTATTTACATGCTCTTGCTCCACTAATGTCTCTAAT +AATGCCCCATCACGTATTGGTAATTTAAATACATTACGGAAATATAGCTCACGTTCTTTT +TGTTTTTCTTGTAAGACGTCTTTCCATTGTTTACGCTTTTGATCTAATATCTCACGCATT +AATTTATAACGATCTGTTTTAAAAATGCGGCGTAGTATTTCTTCTTTATTTTCCGTTTCA +GATGTTAATAATTTTCGGAACTCTCCTTGCGGTAACATGACAATTTGGCTAAATTGATGT +TTACTTAATCCAATTAAATCTTCAACTTTTTTATTTACATCCGTTACATGAAAACGATCG +ACACATGGAACTTTCTCCTCATCAATTACTTCATATAATTCGACTGCATGTCCTGTAATT +GTTTTATTTCCTTGTTTTTTATGGCCCAGTTGTCGTTTAATTTCATAACTTTTTCCTTTT +AATTGAAAAGTTAATTCCACACTTGTATAAACATCATCATCAGCAAATTGGCTACGAAGC +ATGTTAGTATCACTGCGCTCTTCTCCACTAGCTTCTCCATATAATACATAACAAATTGCA +TCAAAAATCGTTGTCTTTCCAGCTCCTGTATTTCCAGAAATAGCGAAAATACGATGCTCA +CCAAGATCTTTAAAATCGATTACTTCTTTCTGTTTATATGGACCAAACGCGGTCATTATA +AGCTGAATCGGTCTCATCCTCGCTCACCTTCCCGTTCTTGCACTGTTTGCAAAACATCTA +AAAATAGACGTTCTTTCTCTTCTGGTAGATCTAGCCCTTTCATTTCTTTATAAAATGCTT +TTAAAAGAGATAAATCATCCGTTTTATGTCTTGAAACAGTTACTTCATTTTCATCTGTGA +ACTCTCGTCTTTGAATAGATCTTTCAACATGCATTGCATTTGGGTATACAGAACGTATTT +TTTCCATTGGCTGCAAGACAGGATTTTCATCTAATAATTTCACAAATACATAATCTTCAC +TCACTGGATGAAGCAATAAATCGTCTATTTTCGCTTCTACTGTACGCATTTTACGACGTG +GTGTAAGTAAACGTTTTTCAATTGTTGTTTCACCTTTTTCATCCAGTTCCACAATATAAT +ATCCTTTTTTATGCTTTTCTTCAGAAATAGAATATGCAAGTGGTGAACCTGAATAACGAA +TTGTCTCATTACGTACAAAATGCGCTTGATGTAAATGGCCAAGCGCCGTGTAATGAAATT +TATCAAAATAATGGCTATTTACATATTCAGCACCACCAATTGAAAGTGGTCGTTCCGCAT +CACTCGTATTTTCCTCTGCCTCTCCTGAAGAAGTTACAAATGCATGTCCTACAAATATGT +GTCTCGCTTCTTTATCCATCGTTTCAGAGAGTTCGTTCATAAAAATACGCATCGCATCAT +CATGAGAACGAACATCTTCATTTTTCAATATATGTCTAACAATACTTGGATCTGCATACG +GAACGAGATGGAAATGAACCTCTCCGTATTCATCATTTAAAACAACGGGATTGTATGGAA +ACTGGAATTGTCCAACAATATGTAATCCTTGTTTTTTCATTAAATTACTACCAAAATGTA +TGCGGTCCGGACTATCGTGGTTTCCTGCAACTGCGATTATTGGTGTTTGTAAATCAATAA +CTATCTTTTGTAATACATCATTTAATAAGTCTACTGCTTCTGTAGGTGGAATTGCTCGGT +CATATAAATCTCCTGCAATAATTACAGCATCCGGTTTTTCCTCTTCAACAGCTTGTACAA +ACTGATCTAACACAATTTTTTGATCTTCAGTCATATATACACCATGAACAAGCTTACCTA +AATGCCAATCCGCTGTATGAAAAAACTTCATATTACCCTTCTCCTTTTAGTTTACTGATT +GTAGTGAAACAAGCTGATAGTTTAGTGTAGTTTTTCCGTTCTCCCATTCTAACTTTTGGA +CAATTGCACTACCGATTCTTTCTTCGTTTGATTTATATAAAGGCAATATTTCTTGAATAG +TAAATAGATGATAACCATCTAAAATTAATTGAAATAAATTATCTTCTATATGTAATCTTA +CTTCTTTTTCATTTGAAATAATTTTTGTATGCATCTCAAATTTCATGCTAGATACAACCC +CTTTTTTCTAATCTCTACATTTTAATAGTATAGTGGTCATGCAAGATTGACAAGTAAATA +CACCGGGATGCACATACTTTTTTAATTATAAATATTTTATGTTTTTTTATATGAAATACG +GTATGTGAAATTATTAATCTACAAAAATAAAAATGCCCTTTCAGGCATCCTTATTTTTGT +AAGTACTCTTTCATAACGTAGTACAATACAGTTATTAAGACAATCGCTACAAAAAACGCA +AATACGGCCACAACATACCCATCTTTATTTAATAAAAAGCATATGGAAATAGATATTATA +AAAATCGGAAACACGAGACGTACCTTAGCCTTCACTTCTTCTGTCGTATCTGTATGATGA +AAATACTGTGTAATTCCTACCATAAGAGATGACAAAATAATAACCGACAATAGTAAAGGT +CCTGGCATTCCGATCTCTCCTTTCTCTATTTCTAATTATATACAAACCCCGCGCTCTTTA +AAAGTTAGATAATTCAGCCTTTTAACTTTTTCTACATCCACTCTTTAATTTACACCACGT +ATACTAACTCAATATAAGTTATACATATAATAGCGCCTTCAACACGACATTTTCAAATAA +TAGCACTTACAGTAATGTTGTAATATTCCTCTTCATTCTCTGAAATCCTGCAAATATAAC +TCGTCAAAATATGCGATAAGTACTAACACTAAGCACCTAATTTAGACAAAAATTATAGAG +GACATTTCAGCATGTATCAAGAAATAAGAATAGAAAGATAAGTAAGGGAGGATTTATAAT +GACTGAACACGTTTTATTTTCTGTTAGCGAAAACGGCGTTGCATCTATTACTTTAAACCG +TCCAAAAGCACTTAATTCTTTATCCTATGACATGTTACAACCAATTGGGCAAAAACTTAA +AGAATGGGAGCAAGATGAGCGTATTGAACTCATCGTTTTAAAAGGAGCTGGGACGAAAGG +TTTTTGTGCAGGTGGTGATATTAAAACGCTATACGAAGCACGTTCTAACGAAGTTGCATT +ACAACATGCAGAGCAGTTTTTTGAAGAAGAATATGAAATTGATACATTTATTTATCAATA +CAAAAAACCAATTATCGCTTGTTTAGATGGAATTGTAATGGGCGGTGGTGTCGGTCTGAC +AAATGGAGCGACGTATCGAATTGTAACAGAGCGTACGAAGTGGGCAATGCCTGAAATGAA +CATCGGTTTCTTCCCGGATGTCGGTGCTGCTTATTTCTTAAATAAAGCGCCTGGATATAC +TGGTCGATATGTTGCTTTAACGGCATCTATTTTAAAAGCTCCTGATGTATTATATATTAA +CGCTGCTGATTACTTTATGACATCAGATTCATTACCAAATTTCCTTACCGCACTTGAAAA +TGTAAATTGGAAAAAAGAAGATGTACATACTCATTTAAAAGAAGTTATTCGTACATTTGC +AACTGCTCCAAACTTAGATGGCAATCTTTCTTCTTCAGTAGAAGAAATTAATTCGCATTT +TGCATTCGATACAATTGAGGGAATCATTCAATCGTTAGAGAAAGATCAAAGTCCATTTGC +CCAAACAACGAAAGAAAAGCTATTATCCAAATCCCCTGTTTCATTAAAGGTAACATTAAA +ACAGTTTATTGATGGCCAAGAAAAGTCAGTTGAAGAATGTTTCGCGACAGATCTTATACT +CGCTAAAAACTTCATGCGACATGAAGATTTCTTTGAAGGAGTACGCTCCGTTGTAGTTGA +TAAAGACCAAAATCCAAATTATAAATATAAACAATTAAGTGATGTTTCAGAAGAAGATGT +AAATCGATTCTTTAACTTACTTAACGCTTAAGCAAAGAGGCCATATATTGTAGCACACTC +TCAGCTACTCTATATGGCCTCTTCTTTTTAGTTAAATAATTTTAAATATAATTTAATTTT +ATTTAACTAAAATGTATGTTAAACTAACGTTAATCGAATTTTCTAAAAACAAAAAAGGAA +GGCGGATAACATTATGGAAGATTTTTTTCTGTTTATTATTATGTCTATATGTCTTATTAT +TCTACCCGGTCCTGATACAGCAATGGCTACGAAAAACACATTGATTGCCGGCAAAATCGG +CGGCGTAAAAACTGTTTTTGGTACTTGTGTTGCGCTTTTAATTCACACTTTAGCTGCTGT +AATTGGTCTTTCAGCACTTATTGTAAAGTCTGCTCTTTTATTTTCTATTTTTAAATATGT +TGGTGCTGTATATTTAGTCTATATTGGTATTAAAGCCCTTTTAGCAGTAAGAAACACAGA +AGACTTAAATACAAATGACGTTCCAATAAATAATGACAATAAACATACTTCTTGCTTTCG +CCAAGGGTTTCTTACAAATTTACTAAACCCTAAGATTGCAGTCTTCTTTTTAACTTTTTT +ACCACAGTTTTTAAATCCAAATCATAATACATTTATACAACTTCTCGTTATGGGACTTAC +TTATCTCATTTTAACCGTCATTTGGTTTGCTTTTTATATATTTTTAATTGATAAAATTAG +TGCTTTTATGAAAAAACCGAAGACACAGCGTTATATTCAAGGGATTACAGGAATCGTCTT +AATTGGGTTTGGTATTAAATTAGCCTTTGAAAGAAATAGTTAACACAATACAAACGAAAA +TACACTACTCGATTATGAGTAGTGTATTTTCGTTTGTATTATTTTTATAATGAGTGGGAA +TGCACCCCACTCATTATCATTTCACTTTATTAAAACCAGCGCTCAGTTACAACTTTTTTA +CGTGTATAAAATTGAACGCCATCCGTACCATTTGTACCAAGATCTCCAAAGAATGAAGCT +TTGTTTCCTGCAAATGCGAAGAATGCCATTGGTGCTGGAACATTAACGTTTACACCAATC +ATACCAGCATCAATATTGTCTCGGAATGTTTGTGCATGTTTTCCATTTGATGTATAAATA +ACAGCACCATTCGCAAATTTCGATTGATTTGTCAGCTTAATACCTTCTTCTAAATCTTTT +ACTCTTACAATACTTAATACCGGAGCGAAAATTTCATCTTGCCAAATTTTCATTTCTTGA +TTTACACCATCAAAGATTGTTGCACCAACAAAGTAACCTTCTCCAACTTCTTCATTGATT +TTACGGCCATCTACTAGTAAAGTTGCTCCATCTGCTACACCACTATTAATATAGCCTAAA +ACACGTTCTTTATGAGATTCACGGATTAATGGTCCAACATAATTATCTTCGTTGAAACCG +TCACCTACTTTTAACTTCTTCGTTTCCGCTACTAATACATCAATGAATTCATCAGCAATT +TCATCAACTACTGCTACTACTGAACATGCCATGCAGCGCTCTCCACTACTTGCGAACGCA +GACCCAATTACACCTTGTACTGTTTTCTCAAGGTTGCAATCTGGCATAACAACCGCATGG +TTTTTCGCACCTGCTAACGCTTGTACACGTTTACCATTTTTTGTACCAGTTTCATATACG +TAGCGTGCCACTGGCTCAGAGCCAACAAACGAAACTGCTTGAATATCTTTATTTTCTAAA +ATACTATTTACAACATCTTTTCCGCCTTGTACTAAATTTAATACCCCTTTTGGAAAACCA +GCTTCATAGAATAGCTCTACAAGTCGCTCAGCTAAAAGTGGCGTTCTTTCAGATGTTTTT +AATACGAATGTATTACCGCAAGCAATTGCAAGTGGGAACATCCATAATGGAATCATCATC +GGGAAGTTAAACGGTGTAATACCAGCAACAACTCCGATTGGGTAGCGCCAAATCGATCCA +TCAATTCCACTAGCAATATTCGGAAGGGCTTGTCCCATCATTAAATTTGGTGCTGATGTT +GCAAGTTCTACCGCTTCAATACCACGCTGTACTTCACCAGTTGCATCCGTTAGCGTTTTA +CCGTTTTCTAGCGTAATGATTTTTGCAAGCTCTTCTTTATTTTCTTGTAACAGTTGTAGA +TATTTGTATAGTTGTCTTGAACGATTTGGAACTGGCACTTTAGACCATGTTTTGTATGCC +GCTTTTGCCGCTTCAACAGCTTTTTCAACATCTTCTTTTGGAGAAAGTGGAACGTAAGCG +ATGATTTTTCCGGTTGCCGGATTCGGAACCGCTTCTACTTCCGTACCAGTAGATTCTACC +CATTCGCCATTAATATGATTTTTCACTCGTTTAATTTCAGTTGTAATCATTATATTCTCT +CCTTTTTTATCATGGTTTTTATAAGTGCACTATTTTTGATTAGAAATTAATTGTTCGCTA +ACTTTCTTATATAAAGCAGCCATATCGTTTTCACCATATCCTGCCTCACTCGCTTCTTCA +TATACGTTTAATAGCATCTCACTTACTGGTAAGTGAAGTTCGCTTTCTTTCGCTAAATCT +ACCGCAAATCCTAAATCTTTCTTTAATAAATTCACAGTAAAGCCTGGCTCATAATTTTCT +GATGCAATGAAACTTTTATAATTACGCTCATAAATTCTACTTTGACCGTAACTTACATTT +AAAATTTCAAACATTTTATCTAAATCCATATTGTTCTTTTTCGCTAATGTTAAAGCTTCA +CTCACACCAGCTGTATAAAAACCAATTAATAGATTATTAATTAATTTAACAGTTGTACCA +CTATCAATCTGCTCACTAACATGGAAAACATTCGCTCCAAGCACTTCCATGACAGATTCA +GTTTTCTCATACACTTCTTTCGATCCACCAACCATAAACGTTAATGTACGGTTTTCTGCA +CCAATTACCCCACCGCTAACAGGTGCTGCTAAAAAGTCTACTTTCTTTTCCTTCGCCGAT +TCCTCTAATTGTTTGTTTAATTGTGGAGATACTGTACTTGTATCAATTAAAGCTACATTC +GAGTGGCTATTTTCAAATAATCCTTCTTCCCCAAAATATACCGCTTCAACAGCACGAGGT +GATGGTAAGCTTGTAAAAATCACATCGCATGTCTCTGCTAGTTTTGAGATTGATAGGCCG +ATAATTCCTCCTTCTTTTTCAAAGGAAGCTTCCGCCTCTTTATTCAAGTCCACTCCATAT +ACTGTGTAGCTAGATTTAACTAAATTTTTAGACATTGGAAGACCCATGTTACCTAAACCG +ATAAAACCAATCTTTTTCATATCGCTTTCCTCCTATTTACACAATATATTTCCCGCGATT +TATAATAACTTTCGCAATATCTCTCTTTTTCGTAAATAAGTTTGAGTATAAAGGCACTAA +TAATTGACGGATTTCAGCTAATATTTCTTGTCTATTTTGTTCTTCTGTGACAGCTGCTGA +AAGAATGGAAATCGCGGATTCTTCTACTTTACGATATCCTTCTTCACAAATAACATCTGT +TATCATTTGTTTCGTACGCTCTTTTTCCTCACCATTTTTACTAACTGCTTTCCTCGTACG +TAAAAATGCTGATTCCATGACGTACACGTCCGTCAACATATTTGATAATACGCGTGAATA +TTCTTGCTCTTGCTCAATTTTTAAACCTGGAGTTTTAGAGAGCGTTTTCAAAGATTGTTT +CAACAATTTTTTCGCTAATAAAATGTAACGATGGTTTCTTTCTACATTTGCCACATCAAT +TTCTACTTCAGCATCTTCTATTTGCTCGATTTGCTTCATTAACATTTTTGCAACTGTTAA +TCTATTAATTTCATTCGTTCCTTCAAAAATACGACTAATCCTAGCATCACGATATAATCG +TTCTACTTCGTATTCTTGCATATAACCGTAACCACCATGAATTTGTACAGCTTCATCTAC +GATATAAGCAAGTGTTTCAGAAGCGTTTACTTTATTCAGTGCACATTCAATTGCAAATTG +GGACATTTTTTTCATAAGATCTTCATCACTCTCATGAATTGCTTCATCAATTACACCCGC +TGTACGGTAAGCTGCACTTTCTGCTCCATATGTAGAAATAATCATATTGGCAATTTTCTC +TTGAATCATCGTAAAATCTACTAATTCCGTTTGGAACTGTTTTCGCTCTTTTCCATATTG +AACCGATAAACCAATTGCTTGTTTTGCTGTTCCAATATTTCCAAAAGCCAGTTTTAGTCT +AGCAAAGTTAAGGATATTAAGAGCTACGTGATGCCCTTTCCCAACTTCCCCTAAAACATT +TTCAGCAGGGATGACAACATCTTCTAAAATAAGCGTCGCTGTTGAAGAACCTTTAATCCC +CATTTTCTTTTCTTCTAGTCCAATGGATACACCTTCACATGTTCTTTCGACAATAAACGC +TGTCATTCCTTTATTTGTCTTCGCAAAAACAACGTATACATCTGCCATATGAGCATTTGT +AATCCACTGCTTCTCACCATTTAACTTCCAAGCAGTTCCATCTTCATTCAATACTGCACT +CGTTTTTGCACTTAATGCATCAGAACCAGCATTTGGCTCAGTTAAAGCATAAGCCCCAAT +CCATTCTCCAGACGCAATTTTCGGCAAATATTTTTCTTTCTGTTCTTTCGTTCCATAATA +TATGTAAGGCAACGTACCTACACCGGCGTGTATATTAAAAGAAACGCTAAATGCACCAGC +GTAACCCATTTTCTCTGCTACAAGACCTGAGACAGCCTTTCCTAACTCGAATCCGCCATA +ATCTTCTGGTACCTCAATGCCTAATAATCCAAGTTCCCCAGCTTTCTCAAATAATTGACG +AGAAACTTTATAGTTATGTTGTTCAATATTCTCAATTTGCGGGACAATTTCTTGTTTAAC +GAATTGTTCTGTCGTTTTTGCAATTAAATCTTCATCCCCTGAAAAATCTTCTGGTGTAAA +GAAAGTATTATTCACATCTTTATTGAGTGAAAAAAATTCATCCCATGGTAACTTCGTTTT +CTCCATCTGAATCCCCCTATCATTTCGGCCTTCGTTGATTTTTGCATATGCTCGTCTACG +ATGAGTGGAATATCTTATTATTGAAAACAGAAGGCGATGATTTTTCACATCAAGCAGTTC +GTTTTGCCAAAGATGTATTTTCATCGCCTAAATCCTTCTACCTATTGATCTTCAGATAAT +ACTGTTTTTGCAATTCCAGTATCTAACTCTTCAAAGTCATGATATGAAATTGTTTCATAT +AATTCACTTCTCGTTTGCATATTAGAAAGTGCATCTTTTTGAGAACCCGTTTCCTTAATT +AGCGTGAACACATTCTCATATGCTTTTGCAGCAACACGAAGTGAAGTTACAGGATAAATT +ACCATTTGGAAGCCCATATTCGCAAATTCCTCTGCACTATAATATGGTGTTTTTCCGAAC +TCAGTCATATTTGCTAGTAAAGGTGCATTCACTTTGCTAGTAAATAAACGGAATTCTTCT +TCCGATTGAAGCGCTTCTGGGAATATTGCATCTGCCCCTGCTTTTACATATGCATTCGCT +CTTTCAATCGCTGCATCTAATCCTTCCACGCCGCGAGCATCTGTGCGTGCTACAATATAT +AAACTTGGCGCAACTTCTTTAATCGCTTTAATTTTTTGAACTAATTCTTCTGTAGTAACA +AGTTTCTTACCATTTAAATGTCCACATTTCTTTGGTAATTGTTGATCTTCAATCTGAACA +GCCGCAACTTTCGCTTCCACCATTTCTACAGCTGTCCTCGCTACGTTTAGTACTCCACCA +AATCCTGTATCAATATCAACAAGAACTGGTAAATCTGTAGCTCTAACTAGATCCCTTGCT +CTCTCTGCTACTTCAGTAGACGTCACGATTCCTAAATCTGGTAGTCCTTTACTTGCAGTG +TAAGCAGCTCCCGATAAATATAGAGCTGAAAAACCTGTGTTTTTCGCAACAAGAGCCGCC +ATTGCATCATGAGCACCTGGAATTTGCAAAATTTCATTTGCTTCTACTAAAGCTCGGAAG +CGATTCGCAAGCTCCTCTTGTGTTGACTGTTTATTCACAACCCAAGCCATTCTAAATTCC +CCCTATTATTAAATTAAGAATAGATCTACAAATTCGTTTACATTCATATTTTCTAGTTTT +TCTTCATTTAGACAAGCATCATGGATTTTTTCTTGTTGCTTACTAGAATAATGACCTGCC +ATATTTGCAGTGAATTTTCGAACAACTTTTGGAATTGCTTCGTCTCTACGGAAACGGTGG +CCAAGTGGGTATTCACATTCCACATTTTCTGTTACAGTACCATCTTTAAAATGAACTTGA +ACAGCGTTGGCGATTGAGCGCTTGTTCGGGTCAAGGTAATCTAAACTGTACTGTTTGTTT +TCAACAACAACCATCTTATTACGTAATTCATCTACACGTGGATCATTCGCTACTACATCC +TCATAATCATCCGCAACGATATCTCCTTTTAATAAACCTATTGCCGTAATGTATTGTAGG +CAGTGATCGCGATCAGCTGGGTTATTTAATGGACCTTCTTTATCGATAATACGAATTGCT +GACTCATGAGTTGTAATTGTAATACGATCAATTTCATCTAATCTTTCTTTAAGTTCCGGA +TGTAATTTCACTGCACATTCTGCAGCTGTTTGTGCATGGAATTCTGCTGGGAATGACACT +TTGAATAATACATTTTCCATTACATAAGATTCTAGCGGTCTTGCTAATTTTAATTCTTGC +TTGTTAAATAACACATCTTGGAATCCCCATCCTGGTGCAGATAATGCCGTTGGGTAACCC +ATTTCACCTTTTAATGCAGTCATTGCAAGATGAACACCGCGGCTCGTCGCATCACCTGCT +GCCCATGATTTACGTGAACCTGTATTTGGAGCATGACGATATGTACGAAGACTAGAATTA +TCAATCCACGCATGTGATAATGCATTAAAGATTTCCTCACGCGTTCCGCCAAGCATTTTT +GCAACTACTGCAGTCGTCGCTACTTTTACGTATAATACGTGATCAAGACCAACACGGTTT +AAACTGTTTTCTAAAGCAAGTACACCTTGAATTTCATGTGCTTTAATCATCATTTCTAGT +ACTTCACGTACTTTTAATGGTTCTTTTCCTTCTGAAATACGAACGCGGCTAATATAATCT +GCAACTGCTAAAATTCCGCCTAAGTTATCCGATGGATGTCCCCACTCTGCTGCAAGCCAA +GTATCGTTATAGTCTAACCAACGGATCATACATCCGATATTAAATGCGCCTTTTACTGGA +TCTAGCACATAAGATGTTCCTGGTACACGTGTACCATTTGGCACGATTGTTCCTGGTACA +ACTGGTCCTAATAATTTCGTACACTCTGGGTATTGTAGTGCTAAAATTCCACATCCAAGT +GTATCAAGTAATACGTAGCGAGCCGTACTGAATGCCTCTGCACTTGTAACCTCTTTATTT +AATACATAGTCCGTAATTTCTTCTAATAATGCATCTTTTTGTTTAATTTCATTTGTTTTA +ATCATGCTATTCTTCCCTTTCTGTCGAAAATTCGTTATTATTAATGATGGGTGTCGGTCA +AACACCCATCTGGCGGGCGGCGCTAACAACTCCCCAGTTGTTTTTATTTACTAAATGCAT +GTCGCTCGCCAATATAATTTACACGTGGACGGAACAAACGATTGTTCGCATGTTGCTCAA +TTACGTGTGCACATAGCCCTACTGTTCTAGAACTAAAGAAGATTGGTGTGTAAAGTTGAA +TTGGAATACCAAGCATCCAATATACTGGAGCAGCATAATAATCAAGATTCGGATAAATTC +CTTTTTCCTTCTCCATAATTTTTTCTCCAGCTTCACACATTTCATATAGTGTATAATCGC +CTTTCACATCACATAACTGCTTTAATGCTTCCTTCATCATAAGTGCTCTTGGATCCATCT +TTTTCATATAAACACGGTGTCCAAAGCCCATAATTTTTTCTTTGTTATATAGTTTCTTCT +GTAATAACTCTTCAAATTTCTCAACATTACCAGCTTCTAAAAGCATGTACATAACCGCTT +CATTCGCACCGCCATGAAGACTTCCTTTTAAGGATGCAACTGCTCCTGTTAAAGCGCCGT +ATAAGTCGGATTGCGTTGATGCAATAACGCGTGCTGTAAATGTAGAGTTTGGCATTTCAT +GTTCACTATATAAAACAAGGGAACGATCAAAGATTTTCTCTTCAAGTTCAGTTGGTTTCT +TTCCAGTTAACATATAGAAGAAATTCGCACTATATGACAATTCTTGAAGAGGCTCAATTG +GTTCCTCATTATTTAAAATATGGTAGCTATTCGCTACAATATTTGGCACTTTACTTAAAA +GCTTGTATCCTCGACTTTTATTCACTTCTAACGAGCGGTTTTCGATATCACTATCGTAAC +CAGCTAATGCAGACACACCTGTACGTAGCCCATCCATAGGATGCGTTTCCTTAGGCAATG +CCTTTAGAACATTGAATACACCTTCTGGCACTGCATATTCTTCTTTTAATTTCTTTTCAA +TTGTTGCTTTCTCATCCTCATTCGGTAAACGTTCTTCTAATAAAAGGTGCACAATGTCTA +AATACTCTTTTGTTTTTGAGAGTTCGATTAAATCATACCCTTGAATTACAATTTCACCTT +TTACTGTGTCAAGAAACGAAATTTTCGTTTCCGCTGCTACTACACCATCTAATCCCGGGG +AAAATTTTTCTTCAGCTTTCATCATTATTTCCTCCAATCCTAATAGATCTTCGTGCACAT +AACTGTAAGCCTTTACAATAATATGAAACTACATTTCTATTTCAATGTATCAGAGTTTTT +AGAAGATTTCAATTTATTTTATAATGATAAGTTTCCTATACTTTTACCGATGTAATAATA +CAATTTTTCAAAAATAACTTTCATCTTTTATAAAGAAAGGACAGACATCCTTTTACAATG +TTTTAAAACGAAATATAACAATTATCCATATAATCCCACTAAATATACCTTTAATCCCTT +TTTCTTCAACAAACATAAAAAACCGAGCTGCACTCATGCACAGCTCGGTTTTTTATAATA +TTTATTTCATATTCCCATAAATATAATTCTTATATAAAATTTATTTATTTTTAATAAAAT +TATTATTTTACATAATTAAATTAAATTTCTTCCGTTTCTTTACGATAGCTTACAACTAAT +TCATAGTTTTGAAAGTACGAAAGATCATCAAAATCTATAAAAATAAACTACCGTCTTTCT +CAACTGACACTATTTCCCGAACTGATTTCTCAGAAACAATCCCTAAAGAATATCCGGGCT +GAATTGAGCTATTTCCTCCGTACCTTACAATACATCAAATAGATTCATCATGCTTTATAC +GCAAAATCTTTTTTGCCTAATCGTTGTTTTACAAAAATGACAAATATAATTAAAGCTATT +AATCCTGAAATTGCACTACCTGCAAATAAAATACGGTAACCAAACATTTGTGAAACTACT +CCAAGTAAAATCGCACCTAATCCAATCCCTAAATCAAAAGCGGTAAAGAATGAAGCGTTA +GCAACTCCTCTTTTACTAGGATCAACAATTGTCAGCATCGCTGCTTGTAAAGCGGGCTGT +GCTGAACCGAATCCAACACCATATAATGCCGCTGCAATCATTACACCTATCAAACCATTT +GAAATCGTTAGTACAACTATCGCTAAAATAGTAATACATAGTGCCGGAAGTATGATAAAA +ACTTCTCCATACTTATCTAATAGTTTTCCTGAAATAGGTCGTACAATTGTTAAAGCAATT +GCATATACAAGAAAGAATGTCCCAGGATTCACATCAATTGATGACGCGAATAACGGTAAA +AATGTTGTTATTCCTCCATATGCAAATGATAAAAAGAAAACAACAACCGTTATTGATAAA +ACGGATTTTTCAAACAGCTGAATTTTCCCTTTTTCTTTTTGTGGCGTGAATGGCATTTTC +GTTATTAACGATAATACGACTGCCATAAAGGATAAAAGAGTTGCTAACAAGAATAGACCA +TGAAATGAATAATTTTGTACAACCCATAATCCAATCATCGGGCCAATTGCCATCGCGATT +GTCATCGCCATGCCATACCAGCCCATCCCTTCACCACGACGTGAATCTGGAATAATATCA +GTTATCGCTGTTCCAACTGCTGTTGTAGAAACAGCCCACGTCACTCCGTGAATAACGCGT +AAAACAGCTAAAAGGACAATAGTCGATGCTAAATTATACGAATACATCGTTATCCCAAAA +AATATGAGTCCGAAAATAATAAAAGATCTTCTACCATATTGATCTAGCATACCTCCAATA +ATCGGTCGTATTACAACTGCCGCTATTGTAAACATTCCCATCATCAGTCCAACTTGCGAT +TCATTTCCACCAATCTCTTTAATAAAGAGCGGGAGCGTTGGAACAAGTAAATAAAACCCC +GTAAATAAAAATAACATCGCGATAGTCATTTGTATAAATGATTTCGTCCATAATCGTTCC +ATTAAAATCCCTCCATAAACACGTAAAATATTACAATCAACACATCAATTGTATATTTTT +CATGTTCCCTTGCCCTCCGCTAATAGACGTATCTTTTCATAATAAATGGTCTACCATCAA +TTTAAGTTGGTCTTACGACTTTATGCCTATGTTTTAAGCAAAAAACCTATATATGTACCT +TTATATGTTTTAGAAGAATATACTATTTTATTATTAAACTTAGGAGTGATCGTTTTCATG +TGGGTATCACCAAGAATAACCGCCGTTGTCCCATCAGCTCCCTCAATACATGTTCCTGTC +GTTCATACAGCTAGTTCTGATGTTGGTATCGAACTTCCTGCCGCTTGGCAACAATATCAA +TCGACTGACCAAGTCACCCCTTCATTTTGGCATCATGGTACACACCCGAGTAGTACTACT +TTTTATGGCCCTAGTTATCCAATACCGCCAACTCAAATTTTTTATCATTTCCCATCAATT +TACTTTCAAAATTTCTATGGTACTTTTAATATTTAAAAATAATATTACTCTTTATAATAC +AAAAAGCACCAAAGAAACCGTTTTTCCCGGTACCTTCGGTGCTATTAAAATGCATTATTT +ATTGTCCATTATTGGAAGGAGCCTCTCCTGTGTTCCCGCCAGTAGTTGGTGGATTTGGAG +TTCCTTGGCCACCTCCAGTGGTTGGTGGATTTGGAGTTCCTTGGCCACCTCCAGTGGTTG +GTGGATTTGGAGTTCCTTGACCATTTCCATTGTTTTGTTCATTTTGCTTATTCTGTTCAT +CTTGTTTCTTTTGCTCTTCTTGCTTCTTAAGTTCATCTTGTTTCTTTTGCTCTTCTTGTT +TCTTTTGCTCTTCTTGTTTCTTAAGTTCTTCCTGTTTCTTTAATTCCTCTTGATTTTTCT +CTTCTTCAGTTTTTTGCTGTTGATCTTGTTTTGGTTGTTCCGTTGTATCCGGTACGCTCG +TATTTGGAGATGAATCACGTTTTTCACCTTTTATACGCAATTCACTACCTTCTTGAATAA +CACTACTTGGCATTTTAAAGCGTGATTTATCTGTAGCCATCTCGCTCATCATTTCTTTAA +AGATCAACTGTGCAATTTTCGTATTTTTACTACTAATATACTCGTCTTTACCATCTTTCG +TATATCCAGTCCATACTGCCATCGTATATTGCGGTGTATATCCTGCAAACCAACTATCAC +GAGTTGCACTTTCTGGAATTCCATATTGTGCTAGTTGTTTTGAAGAATAGTTAGTTGTAC +CTGTTTTACCAGCTACATCTAAAGAGGCCACATTTGCCGATGTACCAGTACCTGATGTAA +CTACAGAACGAAGCATATCAGTAATCATATATGCTGTAGAATCCGCCATAACTGATTTTG +GTTTTTGTCCAAAACTTTGTGACTTGCCGTCTGGATAAACTACTTTCTTAACAAAATGCG +GCTTCGTATACTTACCGTCATTCCCAAATGCCGCATAAGCACCTGCAATTTCAGTTGGTG +ACGCTTCATTTGTACCGATCGCTGTTGATTCTGTCGGTGCTACATTAAATGTAATACCTA +ATTTCTCAGAGAACTCTTTCGATTTACTAATACCTACCTCTTTAGCTGTTTTAATAGCTG +GGATATTACGTGACTTTGTTAGTGCTTCACGCATTGAGATTGGCCCTAAATGACTTCTAT +CTGCATTTCTAACTTCTTGGCCAGTTGAATACTTAAATGGAGAGTCATCAATTTGATGAT +AAGTAGCCCATTTTAAATATTCAATTGCAGGAGCGTAATCAAAAATTGGTTTCATCGTTG +AACCAGCTGCACGATCTAATTCAATCGCCATATTATGCCCTTTAAATACTGCTTTATTTT +CACCGCGCCCACTACCTATAGCGCGAACTTCTCCAGTTTTCGTATCCATAAATGTGAAAG +CACCTTGGAATTTATCATTTGGATAATTAATAATATTTGTATTCAAAATATTATCAGCTA +ATTTCTGTGCTTTCGGGTCTAATGTTGTATAGATTTCTAAACCATCTGATCCAATATTAA +CATCTGGTATTTCCTTTTCTACTTCTTTCACAACTGCATCCATAAATGCAGTATGAGGCA +TTGCTTGAACAGTTGCAGTCTTAAGTCCATCTGTTACTTTAACTTTCGAAGCTTCTTCCA +TTTCTGCCTTTGTAATATAACCATGTCTATTCATTAAGTTTAGTACAGTATTTCTTCTTT +CTGTTGCCCTTTGAACATTTTCTTTTTTCGTTGGGTCATAGTTATTCGGTGCTTTTGGTA +AGCCAGCTAGCATCGCAACTTCTGGTAATGTTAAATCTTTCAATTCTTTACCGTAGTAGT +TTTGTGCTGCTGTTGCGATTCCATATGAACGGTTTCCTAAATTAATTTTATTTAAATACA +TCTCTAATATTTCATGTTTTGAATATTGTTGTTCTAGCTTATACGCTAAATATATTTCCT +GAACTTTACGTTTTGACGTTTTTTCCATCGATAAGAAGTAGTTTTTAATAACTTGCTGCG +TTATCGTACTACCACCTTGAGAGCCATAATCTCCCTTAAGACTAACTAAAACTGCACGAG +CAGTACCTTTAAAATCTACTCCGCTATGCTCATAAAAACGCGAATCTTCTGTTGCTAAAA +ATGCATTTTCTACCAATTTAGGGATTTGATCATACGTAACATTCGTCCGCTTTTCTTTCC +CATATTCATATACCAAATTCCCGTCTTTATCATAGATTTTTGAGGATAACGGATTAACAA +GTTTTGCTTTCTCAAGTTTTGGTGCATCCTTAATCATTACGAAAAAGGTAGCAACCCCTG +CCACTAAACCTACAATACCAAGTAGTAAACAACTGATTAAAAATTTACGAAAAAACGATG +TTTTACCTTTTGGTTTTGTTTTTTTAGAAGCTGGCTGTTTTTTCTTTTGAACTTGTCGTC +GCTCCTCTCGAGAACGATAATTTTCTGACATGTTACTTTCTCCTGCCTTTCAATTCTCCC +GTTGATTTACTTGAAAATATTGAGGTCACTTTCCCAAATACATTTAATCAAGAAAGTATA +TTGACCTGGAGCATTCAATATTTTCCCAATTCACAGATTATACATTTTAGAATACTTCTT +ATACAAACTATTTTCACTTCTATTTTTTCTAATAATACTGCCCCTTAAAGCATAGAATGT +AAGAAGTGAATTTTTAATACCTCGTATAAATAATATACCTTTTTTTCAAATGGTCAATAT +TTTCTTATTTATAAATCAAAGGTGAAATAAAAATAGCACAACGATATGTACTTCGTGTGA +ACTTCCTTATAATTACGTATAACAAACATTTAATATATTGCATATAAGTATAATACTCCT +TTAATTATTAGGATTTCAACACGACAATCCCCTTCTCTACAAGAAACCATCTAAAATGAA +AAATAAGGAGTTGCGTAATGATGGAGCTATCTCCCGTTATTTCAAAAAAATTAGTTGCTG +TTGGCTATAATCCTTTTTCTATGATTTTACGTATTCAACTAAAGAATGGTATGTATGATT +TCTTTAATGTGCCAGAAAGCATTTACACCGGTTTATTAAACGCACATTCTAAAAGTTATT +ATCATAATACTTATATTAAAAATTCTTACCGCTATACTAAAATTTAAACTTGATTGGCAT +ACAAAAAAGAAAGGCGAATCAAATCCCCTTTCTTTTTTGTATGTCAATCGCAATTTACCT +TCTATTTCTTTTATTAATTTCATCATACGCCTGAACTTGTTTTTTAAGTCGCCTGTCGGC +TTTTTTATCATGTACGACAAGAACTGCGTGTATCGCTCCTGGAACCCAAAAAATTAAGGT +TAAAATGAAATTAATAATCGCTTGAAACGGCTTTCCGCAAAATAAAACAGCTACCGGCGG +AAGTAGAATTGCTAATAAGTACATCATGTCTGAAAAACTCCTTAATGTTTATTTTTCGAT +TCTTCCCCATCCATCTCCTACACTATCAAACCTATGAAATCATGTTATTTCTAAAAGTGT +ATATAAGAACTCATCATTACAAAAATCACATATAATAGTATAACTAAATGAGGAACCTCT +CACTAACGTTAACAAGTTATTTTCACCTGTTCGTTTATATGTATATTCTACGCCTATATG +AAAAAGCCTACAAAAACAATTAACACATCCATCTTGTCCTACTTCTAAAATTGAAATACT +CCAAACTTCATTGAAATGAGTATTTCATCACCTTTTCTTAGATTCCAATCGTTGTTCCTC +TACTATTTCGGTTAATCCAATTGCTGAAATTACAGTACCTATTGCCTGTATCCAAATACC +TATTAGCCCTATTATACGTTGATCTTCTTTATCTATATTACTGTCTTCTTTTCCCTTTTC +TTCTTTTGGTATATTTTTAATACCTTCATATGCTTGTAATCCCGCACCTAAACTTTGCAT +TGAATTCCCTAATACGATAATCCACTGTATTCCATTTATCTCATTCAATGCTACTTCTAC +TCCTAAAAAAGCGCCAATTGATTGCAAACTATTTCCAGTAATTATAAGGAAATCATTTTC +TTTCATTTGTTCATTGAGATTAAAATACGTTCCTATTACATTTGAAATATTACCTAAAGC +TAATAACTCAATCCCCGTTTTCGCTAGCGCTTTATTTTTATTATCTATAGTTACCTCTTG +CTTCTCACATTTATCTTCATCTTTCGCATCATTAGTTGCTATAATTTGTAATATATATCC +GAGAGCTTGTAATGAACTCCCTACAATAACGAGATCCGACTCAATCTTTTCTTCCCCTGC +AAACCCTCTAGTTGTTCCAATTGCTGCTATAAGATTTCCTCCTACTTGAAACCAGGCACC +TGTCAATTCTAAATCGCGTAGGTTCATTTCCCATCACCTATATTTAAATTTTCTAGTACA +TTCAAAACAAACAAATATCACCTTTAATTCATTATTCTATTTTTCATATGTAATTTTGTG +CATCACATAATGTACTAATAATACAAAAGAGCACCTTTCAGGCGCTCTTTTTGACAAGGA +GTTTCTCAATGAATGTATGCCGGACATGTCAATGACTCACCGTTTTTATCTATTGATATA +TTTAATATATAATGAATTGCTTTATGTGACCACATTTCAGGTGTAGGAGATTCTTTTACA +TATTGAGGTGCACATAAGTGAAGCATTGGAGTGCTAATACTGCCACCTGGATCTAAAGCA +ACTACTGCCATACCATTTGGCAGTTCCATTGCCATTGACTGCGTGATACCTTCAACAGCA +AACTTCGAGGCACAATATGGCGCAAGTTCAGCCTCGCCTTCTCTTCCCCAACTAGAGCTC +ATGTTAATAATAATTCCTTCTTTCCTAGCTATCATCGCCGGGACGAACGCCCTAATGACA +TTTACTACCCCGTTCACATTTACACTCATTACACTTTCAAATTCTTGTGCGGTAACTTTC +CATAGAGGTGCATTTTGATTAACAATCGATGCATTATTTATTAACATATCGGGAGCCCTA +TACTTATTAAGAATACGACTTGCCCATTTACTAACTTGGTGATGATTTGAAACATCAATT +ATTTGAAAATCATGTGAAGCACCGTAATATTTATTAAGTTCTTCAATTTTATTTTTTGAG +CGTCCACACCCTGCTATGTTCCACCCCAATTCATGAAACCTATCAACCATTGCACGTCCT +AACCCTTGCGTTACTCCAGTAATAATTACAAGTTTCCCAATGTCGTTTCGCTCCAACTTC +ATCTCCTCCTTTCCATTAATATTACTCTTTTTATTCTTATTCAACTTCTCACTTAAGTAT +CAATTCATTTCATACTTTAGAAGTATTTTTGATATTACATATTACAAACAAAAAGAGTAA +CCATCATAGTTACTCTTTTTGTTTGTATCATTATTATTTTTAAATTGTATCTTCTATTCC +CTACATTTCTCACATCGATAAATATAGGTTACACCTTAATGAAAGTATATTTATGTTTAC +GGTTCGATTTATCCAAATACTTTTCAATCCACTTTTTCAACTTAACAGAGCCCACCTCTA +TCAATAAATTTCCACCTAGTCTTATAACATACTTTTATTTAAATTCTCAATCTCATTTTG +CAACTTTTTAAAATTCAGCTCTTCATTAATTTTAACATACTTTTTGCTCGATTGATTAAA +CAAAGCTTCAAACATTTTTCTATTCCGGAATATTTGTCAAATGAACCAATTAGACTATAC +CTTCATATAGTAATGCAAACAATTACTTATAAAGCAGGTGAAAATTATGCCCTCAGTTGT +TGGGAATTTAGTTGTACAAAATAGTAACGGATCTTTCAACTTAGGTGATTTCTACAATGT +TTCTCCAAAAGAGAATACGAAAGCTTATAACGGCTCTGGCGCATCTAACGTTGGTTTCGT +AGTGAACACTTTTAGTGGGGTTAGCGCGACTAATACATTTGATTCTGATCTAGCAGATCA +AAACCAAGTTGGTACGGCCTAACCAGATCTGTAATCATTTTTGTATAACATTTTAAAATT +TGTCTATACTATACTCAGGCTGATACACAGCTATCATTGATTTTTTGAGTGCCAATCTTT +TTCTCCCTTTCCCTGGGACTAAGCAGCTAGCTTGCAGCTAGCTGCTTTTTCATTTAGTTC +TTCACCTCTTCAAAAAATTAGGTTTTAACAGGTAGCCTGACTCACAGCCTAATTTTGAGG +TGTGAATCTGCCTGTCCGTAAATAGCTAGATAAATTAAAGAAGCTAAACTTCAAAATTCA +GCTTCTTTTAGGTTACATCAATTATGTCCATTGCAGAAAATATATATGTTTTATAAAATG +CATCTGTACATTTTACAATTTGTTGAATTGGATTTATATGAACCACTGTCATATAACTCG +TAAGTATGTATCCTTCTTCAAAATATATCACTAACATCTCTTCTTCAGACAATAATGAAG +TTAATAATTTGTCTGAAATGCTGTCTTTCGCTTCATTTGTTATTAGCGGTCTTTGGACTT +TTAATTTCTCTCCAATAATTTTTTTCATACTTAATAATTGTTCCGACATTACTGTAAACG +GAACCCACTCTCTTCCTTTTTCCTGTATCTTTACATAATTCATTTTCTTTACTCCTCAAT +TTTCTATATCTATCCATTCTTGAACTACCCCACTTTCACTTCGTTTAGAAGTGGGGGATT +CCTAAGTAAAGAGTTCTATCGAACTCTAATTTATTAGGCTATCTCCGCAGCCCTTGCGGT +TAGAAGCCTTATCGCTTCATTCTTTATCCATCTCTTATTTATACGTAGCGTAGCTTTCCA +ATTCAAAACTATTTATGAATTTGTAGACATCTCCACTTCAAAAGAACATTCGTTCTAATT +ATACACGAACGAACGTTCTTACGGAAGTGCAACATGTAAATTTTTCAAACAAAATGCAAC +TTTAATATATGAAGTTTATTCATACTCATTTATTACTCCCTTTTATTAATCAAATTTCTT +TGCACTTCCTATCGCATCATAGAATACGTGTAGCAACAATAAAAGCGACTCCACCTAATG +TGAAGTCGCTTTTATTGAACTAATTATATTTCTAATTTTACTTTTCTATCCTAACTAACG +TTCTTTCTGTAATACAAATTTCAGCTACATCTCATTAACCATCCTTCAACAAAAGCATAC +ACTTTTATTATGAACGAACTCACAAAGCTTTAGAAACTTATGATGTAAAACAATTCATTC +CTCATAAGTTTCATACATATCGACAATTACTACCATTTATCCTGCTATTTGCAGGCAGTA +AGATTCACACTCATCAATTTTTCACTTTCTTTATTGAATATGCTTGGGTTGCATTATAAT +ATGTAATTCTCTCTCTGTTTCTGTTTTTTTCGGATTCCGATAATATTCCAGTTTATATTC +TAATACTTCATACTCAACCATCCCAATCTTTATCCCTTTTGTTTCTTTTAATACAGCTAA +TAGAACGCTTATATTTTCATTAGATGATTCAAAAATTTTCCCTCCGCCTTCTCTCTTAAA +AATAACTTTCATATATTCTCCCCAAAATTAATTTAATAATCTAGTAAATATAAAAATATT +CATCCTATTTTAATAAAGTGAAGGCTAATAATCAGTGGGGGTTTTACGAGCAGTAAGACT +CCCACAAATAGCGGGATAAATAATTCTTATCATCATTAAGAACATCTTTTTGTACTAAAT +GTTCAACATGTCACTTCCTATAAAGTTAATTTATACTCCAATAAATATGATATGTACCAT +TCTTTAAATTATGTCCTCAATTCACTCTCCAAGAAAATATATTTAGTTTATATTTAACAA +AATTAATTAACTAAAACATTATCATTTCAACATTTTGCGAATTTAATTGCTTATTCCTTT +TATCTACATCCAAATTTTCCTATTGTAATTCTTAATATTTAAATGTAATATTGTATTGTC +TTACTATCTATTTATAACTCACACACTAACGCGAAAACAAAAAAAACAGAAATATAGCAT +TATGCTAAATCTCTGTTTTTCTTCTAAACCTTATTTCTCATCTAAGAATTTAAATGTATC +TTTAAATTCTTTATCTGTTACTTTAATATCTGCTTCTTTCAATAGATCATTGATTACTTG +TTGTTTCCATTTACCTGTCGCATCTTGCAGTCTTTGTTGTTCTAAATCTTTTCGAATCGT +ATCTTTTACTTCATCAAATGGTTTCAATTCTTTTTTATCCGTCACTTTTATTATATGATA +ACCATAAGATGTTTTTATTGGCTCACTTACTTGTCCTGCATCTAATTTATACGCAGCTTC +CTCAAACTCTTTCACAGTTTGACCAGGAGCAAAACCAGCGATTTCTCCGCCCTGTTCCTT +TGAACCAGTATCTTCCGAATACTGTTTCGCTAAAGCAGTAAAATCTTCACCATTATTTAT +TTTCTCTTTTACTTCTTTCGCAGTTTTTTCATCTTTCACTAAAATGTGACTTACCTTCAT +TTCAGGTTTATGGTTATCTTTCACATCTTTATCTGTGACAGTCGCTCTAATCGCTTTCTC +AAGTGCAATTTCTGGCTTCATTCTCTCTTTTAATTCATCTTCATTCTTTAATCCTACTTG +CTCTAAAGCGACTTTAAACTTGTCACCCATTTGTTCCTTCGCTGCTTCTACTTGTTTTTT +AGCTTCTTCATCTGAAACTTTATATTTATCTAGCAATGCCTTATTTAACATCATTTGAGA +TAAAGTACTTTTTCCATATGTTTGCCTTAATTCTTTACTTAACTCTTTCTCTGTCACGTT +TCCTACTTTTGATGTTGCAACATTTTCTGAGGAACCACACGCAGACAATGTTAGCGCCAC +ACATGCAATAATAGTTCCCATAAATAGCTTTTTCTTTTTCAATTCAAATACCTCACTTTG +ATAAATGTATATTAGGAACTATACAGCGCTATTGTGAAATCAATGTGTTTTCTGCAAGTA +TTTCTCTTCATGCCAGAGACAAGCTTTATTAAATAAAAAAGCCGGCATAAATCCGACTTT +TAACATCATTTAATCTGCACATGTTCTTCTTTGATCCATTTGTTCCCACCTATATCTATA +TATCCATCTTGACGATTCCAAATACGATAAGGAACCCTCCCATCGACCGCACCTGTATCA +TTCCCGTTTGGCCCATCGTACGTTCTAATTTCATATCCAGGTGTATACTTTGAATATGCA +TAAAACCATTGTACATTAAAATGTTCTAACTTAGCCCACTGTTGTTCACCACCTAAACAA +ATCATATCTTTTTCGCCACCTCCCCAATAACCTTTATAGATTAAATATGGGATTTTTTTC +GTAAGTCTTCCTATAAATTGTGGATTCGCAGGATTTTCATATAAATTAATTCCGTATCCA +TCATCATATTTTGAAACTGCAATACCAATTCCATTTGGTCGCTTACTGTCTCCAATACTA +GATTGATTTGTTTGATTTGAACCTATAAACCAAGAAAGATTTTTATTCCCAATTAATTGA +TTCAAATCACATTTTCCAATACCAGGAACATTTCCTGAATCAGTATATTGCCAAATATCA +CAAGGATATGCTGGTTTATTCCCTCCATAACGAGGAATCCAAATAAAATCAGCATCAATA +TTGCGAGCTCCAAATGATACATACGTGTGATGTCCTACATATAAACCAATTTTTTTTGCA +CCTAATCGGCGTAATTCATCAATAAATGCTTGAGTCCCGCCTTGCATATCTATCATTGTT +TGAACTTCTACATCTGCCACCCAAAATTTAGCGTTTTTATCGCCACGATTCCAAAAATCT +TGTGCTTCTTTTTTTGCATCTGAAATAGATATAAAACGGCAAAATGCATAATTCCCAAAT +GGGATACTACGTTTCTTCATTTCTTTCACATAACCTTGATACATAAAATCGATCGTATTC +GAACCATCTTGAACTCTAGCAATAACTAAATCTAGCTGAGACGCTGCAATATCCCAATTA +ATATTTCCATTCCATTTCGAAATATCTACAATATAACCCATTACTAATCAACTCCTTTTT +ATTATCTTATTCAAAGCTTGTTTTATTGCTAGACCCATACGCCTAATAGCAAGCTTCTAT +AAGACAAATAAATATATTTTCATCATTTTTTAAAACTAAATAATTTCTTTTTATCCATAA +TAAATAAGTCCTTCTTTTTTTGCATTTGTAAACGATAAACTTGCGTTTATTTCATACTAA +ATATTGGAATAAAACTATAAATAATGTACATACTACATATAAGCAATTTTTGCTTATACT +CATGAATGACCTCCAAGATTTTTCAATCTCCCTGTAAAAAGGCAGCTAAAAAAAGCTGCC +TTTTTACAATTTATTATATAGAGACAAGTTAATTAACGCTTATTCCCTCATATTAGTCTG +TTTTTAAGCTTTTTATAAACATGATATATATTTTTTCACAACACTATTTTGATACGTAAT +AGAAATTACATCAACTCATACAGTATATGTAATAATCAACTACAATAAAAATTACATATT +ATATTAATATGTCTTCCTAATTAAAAGGAAATAAACATATAACCTCTGTTATGTAAGGAG +TTTATTTAATGAAAAAAATCCCTATCGTATTCAAAGTTCCCCCGAATTCAAAGCTAAAAG +TTACATTCTATGGTCCTTGTAACGAAGTAATTACAAATGTATCTTTAATTAATCAACTAC +TCACCACTACCTGCCAAACTGTATCTCAATATCCAGACTTCAAGAAATATATAACTGAAG +TCCGATCATTATCAAATTGTTAATCGTATAATTTTATGATTTATCATTTCGAATAGTACG +ATAACAAAATAATAAAAAGAGCCGCATCTTTCAAAAGAAACGGCTCTTTTTCATTTCACA +AAACATGACTTCATTAAAAAACTAAAATAACTCATTAATTGATATTCTGTTGATATTTTA +TTGATATTTTACACATATTAATGAAATGCGCATATATTATCGTATGGAGATTCTCCACTC +ATAAAACCTATCTTTTTTTGAGCATACTTTATTTGTATGCTCCTTTTTTATTCTTTACTT +AACGGCTTACCCTGTTAATATTTCCAAATATCATCTGAACGAACATTGAATATTTCAATC +TTTTCTTATTATGTGCTATGAATATAAGTTTAATAATTTATCTGTAGCTACATTTTCAGT +ATCACTTAAATCGACAAAATAATGAAGTTGCCACAATTGACTCTTCTTAGCTTGATTACT +AATAGGTTTATCCCAAATAGGGTAAACCCTCATAGCCATTTTCCCTTTTTGATTTTGAGT +TTTTAATATTTTTTCTTTTAGAAGAATCTCTTTTGGAAAAATAAATTGTCCTAGGTTATT +ATCCGCTATACAAGTGATAACTAAAAGGTTCGGAGCTGAATCATAAGAAAATGCCTGATT +GCTCATGGAGGCATTCTTCTCCCAAAAAGAAACAAATTGTCCAATCTTATTAGGCGTAAT +TTTTGATACCCTAAATCGGATGCTTCTATTATTTAGCTGGAATACCCCTCCTGCATATTC +TGAATTTTGCTTTTCTTCTTTTCGTTTAGTAATTAATAGATTATTAGGTTTGTAAATCAT +TTCATCTAGTTTTTCAATTATATTACTAAAATTTGTCATGTAAACGCACCTTTTTTATTT +TCCAGTATACCAAGAACGTTCGTTCTTGTATATTAAACTTAAACAAATGTTGGCTTTGAA +TAGTCTTTCAAAGATTATTTTATCTACTCCATCCATAACTTCAAATTTACTTTTATAGTA +TTTATTACATTACTCTTCCAATTCCTTATATTCTTCATTATTTCATTAATATATTAATAA +AAAACGAGGTACATATTACATGAATCCATCCAAAATTGACTTCGGCTCCATAGAGCAACC +TTTATTCCTATTAATTTTATATCTTATCGCAATTCTTTTAACACTCGGCATTACATTTAG +TATATGTCATTCCCTTTTACTAAATTTAAATGCACCGAGATGGGTAGCAAAACTGTTAGC +TATCATATTAACTCTAGGTGTTGCTTATCAAGTGTTTATGAACTTATTTTAATATATTAC +CAATTTAACACCTTTAACTAAAAGGCCCTACCATAAGGTAGAGCCTTTTTCAAAAAAATA +GTCCATAAATCACAAACATGTCTTTTATTTTATTGGAGATGGGAATCTTGCAGCTTCAAA +ATGCGTACTAATAAATCCTGGATCATTTTGTATAATCAGTCCATCTACATCACTCTCAGC +AAAAATTTCAACTACATCTCCTGCATGTAATTGAAGTATTGTTGAAACAGCTACGACATT +TGCAAAATTGGGTGGACCAAAAAAGTCATTATCTATCGCTATAGCTGGATTATCATTTAC +CCGAATTTCTACACGCGCTCTATAATTCGTATTACTATCATTAGGTGCAAAGCTGATTGT +ACCAATAATAGAATAAACCCCTCTAGTCTTCGGAATAAAATCTGATGTTACTGGATTATA +TTCATTCGCTAAATCAAACTGTTCATTTTGGAATAATACTTTAACAAAAGTATTTGCAGT +GACATTTTGATTTACTGTACTAACCGCTCTAAAAGCAGATGCCCTTACAAGTTTATCTTT +CTCATTATCACAACACTTATCGTAATAATCTGTACTCTTCTTACAACAAGTATGCTTTGT +CTTTTTATAGCCATTGCAATGCTTACAATAATAATAAGC +>NODE_15_length_39993_cov_62.9495_ID_29 +TCTTGATTATTGGCGATAAAGTCCCAAAAAGTATCGTGTGCTGTTTGTCCTTGGGGAATC +TCATTATGTGGCTCTGGTTTAAGAGCATGAATAAGATCCGGAAATTTAATACCATCTTGA +ATGAAGAAAATCGGAATGTTATTACCCACTAAATCGAAATTTCCTTCCTCTGTATAAAAC +TTAACAGCGAATCCTCGTACATCTCGGTTCGTTTCATTTGCACCTTTTGATCCAGCAACT +TCAGAAAAACGAATAAAAATAGGTGTCTTTTTAGATGGGTCTTGTAAAAAATGAGCCATT +GTTAAGTCTTCTAAAGAATCATATAGCTCAAAAACACCATGTGCTCCGTAACCGCGGGCA +TGAACGACTCTTTCAGGAATTCGTTCACGGTCAAAATGAGCGAGTTTTTCTCGCATCAGG +AAATCTTCTAAAAGTGTAGGACCACGATCGCCGGCGGTTAAAGAGTTTTCATCATTTGAA +ATTTTGACACCCGTATTCGTTGTCATCTCTTTTCCTTCATTGTTTTTCGTAAAGGATTGT +AACTGTTCTATCTTCTTATTTTCACGACCGTCCATATAAAGCACCTCCCATATTATACGT +ATTATTGTTCATGGTTTTTATGCTTACATATGGTATGGGGAAGTATATTATCTATGGATT +GATTTTCCTCATTTGTTCAATTTATCTTTGTTTGTCTATGAAAATCATTTATTATAATAA +ATGGTATTCTTTTGAAGGGGTGTTGAAGTGGAAGAGAGTTTTCTATCAGCTAAAGAAGAA +AAGAAAAACGCTAAGATTTTCTTATGGGTAATACATGTTATTTTAATTGTATACGAGGTT +GCATACGCGATTATATTAGAAGATACAATGCCTTTAGCAAATTGGCATAAAGGGATATGG +AAGCTTGCATATATTATGGCTATATTAGGTATTAGTGTTTACTTATTTGAGAGAGAAAAA +GCATATTTAGTTAAATATACATATTTATTTGCATACATGATCGCAGAGACTTTTAATATT +GGATGGTATGCTTTTCATAATACAATAGCATTTGATGAAGGGAATGTAATTGAATATATT +TTCATTTTCTTCGTACCGATCTTTTTGAGTAAAAGATATTTATTTGTCTTAGCGCCATTC +CTTATAGGGAAATATATGATATACCTATTTGTATTTGGAGAACTTAACCTGTTTATGTCT +CTTGTTATAAATATGGTGTTACTTTTCGTGTCATTTATTATTTTAAATCGATTTTTACAA +TATCTTTCAGCAGTAAAGGGACGTATTGCAGAGGCAAGTCATTCACAAAAATTGGCGGTT +ATCGGAAAAATGGCAGCGACAGTCGGACATGAAATTAAAAACCCACTTGCTTCATTAAAA +GGGTTTACGCAATTACAAAAAGAGAAACACGAAAAAGATGCAACATATGAACAAATGATT +CTCGAAATCGAAAATATGAATAATATGATTAGTGAATTAATGGAGGTTGCTGCATGTAAA +CCTTCTGTTTATGAAAAACACATTGTAAGTGATGTTTTAGTACAAGCGGTAGAAAACATG +CGCGGAAAAATGAACGAGTTAAATATAAATTGTACCTTTAATGAAGAGCAAAATAGAAGC +GAAATTGAATGTGATAAACGCAAATTAAAAGGAGTATTTTTGTATGTTATTAAAAATGCT +TTAGAGGCAATGGAACATGGCGGAACATTAAAAATACAAGTTGAAAATAAAAAAAGAGAT +TATGTAATAGTAAGTATAGTAGATAGTGGTTTTGGGATAAAAAAGGATAATTTAGGACGA +GTTAAGGATGCTTTTTATACAACGAAGCAAGATAGAATTGGATTAGGTCTTACGGTAGCA +GAGCGAATTGTGACAGAGCACCTTGGAGAATTACACATTTCTAGTGAAGTAAAGAAAGGA +ACGAGAGTAGAAATACTGCTCCCGAAAAAATGTGAGCGCAATGTGACACAAGATTAAGAG +GTGTTGTTAAAGGAGCTATCATATGGAAAAAGGAAATATATTTGAAAAAGAAGAGATAAA +GGCATTAATAATATTTTTAAGCTTATTCTTCGTTATATTTTTTGCGTATGATTTTGCTGA +AAAAGCTATTGTCCTTTTATCAGATAAAAATCAAAAACTAGCAGATGCTTTTGGAGAAGG +ATTAGGTTTATGGCTATATAGTTTTATGGTTGGATTATTCTTTATAGGACTTTATTTTAT +GAAATGGAAAAAGCCGTATATTGTGAAGTATATTATTTTAATTGGCTATAATATATTGGA +TTTTATTAATAACTTCATTATTTATTACGGAAGTGATGCGGAATTTGATGGCGGGAATAT +AGTAGAAGGATTCTTTATTTTATTTGCACCAATATTTGTGAATAAGAGGTACTTTTGGTT +AGTTGCGGGAACTATTGTTGGAAAATATGCACTTATGGGATTCGTTGTTCAATCCTTTAT +TGTTCTTATCCCAATAGCATTATATAGCGTGTTTGTTATTATATGTTGGATTATATTTTT +AAGATTCCAATCTTACGTTCGTACACTTGAAATGATGGATAAAGAAATACGAAATGTAGA +GAAATTAGCAATGGTTGGAAAAATGGCGACAGTAATTGGCGATAAGATTAGAAGACCGTT +AGAAAAATTGAAAAAACTTGTGAATAAGCAAGCGAAAAAATATCCAGAAGATAAAATTTA +TAGTGAAATTATGAGACAGGAAGTAGAGCGAATTCATACAATTGCTACAGAACTAAATGG +GTTTGAGAAATCTAAATCGGTAGAATCCGAAACGTATAATATTAAAGAAATCATCTCTTA +TGTTATCAGGGTTATGGAAAAGCCAGCTTTAGAACAAGGAATAAAAATGCATGCTATTTA +TAGTAAAGATATACCATCAATTACATGTGAGGAAAAACGATTGAAACAAGTATTTTTTAA +TTTAATAAAAAATGCGATTGAAGCAATGTCAGTTGGCGGAACTATTACTGTAAAAGTTAT +AGTGGAAGATGTGATCATTGTTCAAATTATAGATGAGGGATGCGGCATTCCGAAAGATAA +AATTCCGAAGCTAAATGAAGCTTTTTACACAACGAAAGAAACTGGAACAGGCTTAGGATT +AGTAGTTACGGAAAAAATTATTAAAGATCACCACGGTAAATTGAACTTTGAAAGTGAAGT +TGGAGTTGGAACGACTGTTGAGATTATGTTGCCGATTTAATTATAGTGGGATATTTTAAA +AAGGGGAAATCACTTTCTTTAGAGTGATTTCCCTTTTCAGTGTGGAAGGAAGTACGGAAT +TTTAATAAACGGGATGTTTACAACATTGACAAACTGTACAAATTCACAATCCAAAATATG +GGTGTTTTAAAGCGAGAGTATATAATAAATTTGTCTGTTATTTGTTTTAATTTATTTGCA +ATTTTTTTGGCCTCGGCATCGAGGTCCTTTTTGTAAATGAATGATTTGTAAGTGTGGATT +ATATATCAGTGATTAGAAAAATAGAATATACAGAACTTGCCGTAATCCCGGGGGTAATAT +AAAGAGATATCGCATCCCCCCTCGAAAACAGGTTTCCGGTTGTAATACTAGTCCAAGTTA +CAGTGGCAGGTCCAGGTGCTGGAGTAGGGTTATACACTTGAGGTTGTCTAGAGCCAATAT +CCGTAGGTTTAATTGAAAATGTCATAGTTCCCGTAATTTTATCTGTAGTAGTTAAAGTAA +TAGTAGATAGGATGTAACTAGAGGTAGGGGCAGAGAGGTTTGTTGGGACATTCCTGCAAA +TATCAATCGTATAAACAGCAGCGGGTAAGTTATTAATATTTATAGATGTAGCAAAACCAA +CTATATTTCCAGCTCCAGCAGTTACATACGGAATAACGTTAGAATCAATTCCTGGAGAAC +CTGCTATACGTTGAAACCCAGCATTCGTCCCTTGAAAAACAATACTTTTCGCTGTTGAAC +TACCAGCAGGTCCAGTAGTCCCGGTAGAACCAGTGGCACCAATAGTTCCGGTAGCTCCGG +TAGAACCAGTGACTCCAGTAGAACCAGTAGCTCCGGTAGAACCAGTGACTCCAGTAGAAC +CAGAAGCTCCGGTAGAACCAGTGACTCCAATAGTCCCGGTAGCTCCGGTAGAACCAGTGA +CTCCGGTAGAACCAGTAGAACCAGTAGTTCCGGTAGAACCAGTAGAACCACCTTCTACCA +ATACGATATAATCTTGAGACACATTCGGAAATCCTTGAGGTGCATCTTTTTTTACAACAT +AGCCGCTACTCGTGTAAGTGACAACTTGTCCCGTTTTATAGTTAGGAGCGGCTGCTAGAC +TAAAAGCGGTAGTACTTTGCAAACCGGCCCCGGCATTGCCAGTAGGTCCAGTAGCACCAG +TGGGTCCAGTAGCACCGATAGAACCGCTGTTGCGTCTACCGTCACAACTAAAGCAATTGC +GTTGGTACATATATTATCTCCTTTCTATATTTTTGAAAATCTAGTTTTAAAAGTAGAACT +ACTATATATATATTGTAGATTCTATATTTTTGTGTGGTCGATCGTATTGTGTATATAGAT +TTTAAAAATAGGGGAGGTGTTATTAGTTTTATAACCACCAGGTCTCCTGCGGAATCAAAG +TAACATCTACATCAAAATTCTCTTTAAACCAATCATACATCAATGTCTCTTTCACTAAAT +ATTCAGAGGCAGAGTGTGACAAACCGATGAGTGACATATTTGTTTCTTTCGCATAATCCA +TAATGAGTGAGTATTTATGCCTACCATAATCATTGTCGATATGGCAATGGATTTCTCCTG +TAATAAATGATTAAGTTATTGTTCTATTTCTTTTAAGGCTGGCATGTTTGGTAATCCTCC +TTAAGATAGTTCTTACTTTGTTAGTTGTTTTTGTTTTTTTATGGAAAAATAAGAAGCGGA +TAGGAAACAAATAAAACCTAGAATCCAGCCAATAGAAGGAGAATAGGGAAGTATATGTTG +TAATACCATTCCGCTAGAAATAAAAAAGCTACCTAATAGACCTAAGAAAATGAGATATCG +CTGTAGTTTGAACATCAGTGATACAACCTCCTTTATCACGCATTAACTGCCCGTAACCTG +ATTGGCGAGGGCTAATAATCAGAGGGGGATGGATACCTCCCTGATTAAAGTTTCACTTTA +TCACATATATTTCAAAGTAGTCTTAATTTTCTGCATATAAGATTCGAGAAAAATGGTGGT +ACAATATATAAATGATACGAGAAGAGAGGATGTAGGTTAACACATGTACGTTACTGTAAC +AGAGGCAGCATATAAAAAGATTATGGATACGATTCCAAGTGAAACGAAATATATAAAGTT +ATTTTATGATAATGAAGGTTGCGGTTGTGTTATGAGCGGGATTATCGATTTAGTAGCCGT +TTCAGAGAAAGATGAGCGCGATGTAGATATCGAATCAAGCGTACTAAACTTTATTGCAGA +TCGCACAAAGCTTGTATTTATGGATGATAAATTAACAGTTGATTGGCATGAAGTTGGAGG +GACTTTCCAGCTGAAGAGCCCAAGCCAGTTTTATAATCCGAATATGAAGTTACATGTTCG +AGTATAATGAAGAGAGAAGGAGTATATAATATGCTCCTTTTTCTTATGGATTTTTTCACA +TAATGAAATGTACATATAGTTGAATTTTCTGTATAATTCTCTTTGCGAAGGGAGTTGCGC +ATGAAAAGATGGGGAATTGAGTTATTAATTTTAAGTGTCGTTATTATTTGGGGGATTAAC +TATACGATTGCGAAATATGGACTTTTAGAATTTACAGCAATTGAGTTTACTGCAGTTCGA +ATGATGACAGCAGCACCACTACTGTTATTACTTACGTTCTTTATCGAAAAGTCACTTTAT +ATGGAGCGAAAAGATATACCAAGATTAATCATCGTTAGCGTTGTAGGTATTGTACTGTAT +CAAACGTTATTTATGGAAACTGTAAAATACACATCCGCTACAAATGCTTCTTTACTCATA +TCTATTTCACCTATTTTTACAACTGTATTTGCGATTTTCTTGAAACAAGAAAAATTTTCT +TCTCGAAAGTTAGTTGGTTCCATGATTGCCTTTATTGGTGCTACATTAGTTTTAGTAGCG +GGGCATTCACTCGCTAGTTCTTTCTACGGAAATGGGATTGGACTTATTACATCAATATGT +TGGGGACTTTACCCCGTTTTAGCAGGGCCGCTTATTAAAAAATACTCAGCATTACGTGTT +ACTGCATGGTCGACATTAGTTGGAGCGATTCCGCTCTTATTGTTAAGTGGCCCACATGTA +TTTGTCATGCCATTTCACATTACACACGGAATGACACTATTTGCTTTACTATATTCTGTT +TTCTTTGTAACAGTATTCGGTTTAGTCATGTGGTATGTTGGTGTTCAAAAAATTGGTGCG +TCACATACGATGGTATATATGTATATTACGCCGCTTGTAGCTGTTTTATTTGCAGCTGTA +TGGGCAAATGAATATGTATCGTTTCAACAAATAATCGGTGGAATCATCATTTTCTTCGGT +CTATGGTTTGTGAAATCAGAGAAAGTAGAAGCTAATTCTATCGTGCAAGAACCTATATCA +AAATAGGAAAACAGATCACCTATGATCTGCTTTCCTATTTTTTTATTTTTGGTAAAAATA +CGAGGAATAAAATGGCAGTAATAAGTGGGATTAAATTAAGAAAAGGAATGCGGAAGAGTG +TAATAAGAATAATACCTGGAAGAAGAACACCGAGGACAGCATAGAAAATACTATGTTTTT +TCGTGTACCAATACAAGGAAAGTCCAATAAGTGCAGGAACAATGAGATGAATGAGAGCCA +TTAAAAAATAAATCATTAGACGCCCCCTTTATTTATGTGCTTATTACATCATATCCGTAT +GTTCATAGATTGATATCAATTTTTTCATAATTGGACGGAAATGTTTTGAAAAACGTCACA +TTTATGAGGGATTCTATATTAAAAATCTTGTATAATATTATTTCCTAAAAGCATTTTAAC +AACATAAACATACCGTTTATGTTGTTTTTAATTTGAATGAAATAGATTTTAAGTAGTATG +GACTCGAAAGAAATGAGTTATACAGCTTTTGTTGGATAAATAGAAAAAACATTGAAGGGG +AGTACAGCATGATTAGTTTGTCATTAAAATCGTTTAAAGTTCAATCATATTATCTACTAG +GTATTCTGTTATTAGGCTGGATGTTAACACCGTTTTCAGCACATTTTCTAGGTGCAGGAA +TTGGACTTATTGTAAGCATGTATTGTGTTTGGATTTTAGGAAGGCGTATCGAAAAGCTTG +GAGATAGTATAGTAAAGAAGACGAAAGCACCGACGCTTGGTATGTTTAATCGCTTTGCAG +CTGCAATTTTGGGTGCTATTATCATGTACGAAATTGAGCATCATATGGTAATGTGGGCAT +TTGCAATAGGTATTCTGGGTGGTTATTTCTTAATCGTTATTAATTTAGGGTATTATAGTA +TGAAGGATGAGGAAGAATTAACGAAGGGCTGAAAAAGATAAAAAGATGAAAAGCCTCTTT +TTAGATGGGATAGAGGATCCAGCGGTGGATAGAAGCCGTTAGGGCCTTTTTTAGCCACAT +GCGGTGCGAAACCAGAGCGAGGTAGTGAGCAGGAATATAATTTATATGTTCGAAGCCGTA +ATTTGTATATATGAAAATCTGGTTATTTAAATTGAAAATTGGCTTACTCTGGAATAAGGG +ATCCGGCGATGGATAGTACAACAGGAAGCGTAACTTTGTATAAAATATAGGTTTCCGCTT +CCAATAAGTTAATATGAAATCACTTAGTAGTATGACTCTTAGGTTCTTTAAAAAAGTTGG +ATACAATACAACAAACTATTCCGGTAATTAGAATAATGTATTTTATCGTTTCACTTGTAT +GCTCAAATAAATTAACGTTAAGCATTACAAGTCCCCAAAACATTTGAATCGCTCCATAAC +TGAATAAAATTTTTTCTGTACGAGAAAAATTTTTCATGGGCCTCACCCTTTCCTTTATAT +GTAAAATATATCATAGGAAACTACAAACGAATGATTATAAAATGAATTTTCTGTAAATTT +ATAGGTATTGGTATGCAATAGATGAAGAGATTAAAGTTTCACTTTATATAAGATATAGAA +AGAAGCTTACGTTTTTAACGTAAGCTTCTTTTTTATGTAAGTGTGTGTAACCGTTAAGCC +GCATTGGGTTTTGTCGGTGAGTACGCGGAACTTTACGGTTACACGACACTTATTCATATG +AGTAAAAGGGTTATTCCAACAGAAAATTTGTAATTAAGCGATTGGACCGCCTAAGTTAAT +AATAGCTTCAGAAACGCTATCGAATTTTTTGAAGTTTTCTTTAAATTCGTTTGCAAGCTC +AATTGCTTTCACTTTGTAAGCATCTTTATCAGCCCAAGTTTGTTCAGGCATTAATACTTC +ATCAGGTACACCTGGAACATGACGAGGTACTTCAAGGCCAAAGATGTCGTGTTTTGCAGT +TTCAGCTTTAGCAAGTTCACCGCTTAGTGCTGCTTGAATCATTGCACGAGTGTAACCTAA +GTTCATACGTTTACCAACGCCGTATTCGCCACCAGTCCAGCCAGTGTTTACTAAGAATAC +TTTCGCATCATGTTTCTCGATTTTTTCACCAAGCATTTCAGCATAACGAGATGCATCAAG +CGGTAAGAACGGTGAACCGAAGCAAGTAGAGAATGTAGCTTGCGGAGATGTAACACCGCG +CTCTGTTCCTGCTAGCTTACTAGTGTAACCGCTTAAGAAATGGTACATAGCTTGCTCTTT +TGATAACTTACTGATTGGAGGCAATACGCCAGATGCATCAGCAGTTAAGAAAATAATTGT +ATTTGGATGTCCTGCAACACTTGGCAGTACGATATTGTCAATCGCATGCATAGGGTATGC +AGCACGTGTATTTTCTGTTAAAGTAGTATCGTTATAGTCAGCGATGCGTGTTTGACCATC +AATGACAACGTTTTCTAAAACTGAACCAAATTTGATTGCATCGAAGATTTGTGGTTCTTT +CTCATGAGAAAGGTTTACACATTTTGCATAGCAACCGCCTTCAATATTGAATACGCCGTT +ATCAGACCAACCGTGCTCATCGTCACCGATTAATTTACGGTTTGGATCAGCAGATAATGT +TGTTTTGCCTGTTCCAGATAAACCGAAGAATAGTGCTACGTCGCCTTCTTCGCCTACGTT +TGAAGAGCAGTGCATAGAAAGAATGTCTTGTTCAGGTAGTAAGAAGTTCATAATAGAGAA +GATTGATTTTTTCATTTCTCCAGCGTATTCTGTACCACCGATTAGTACGATACGTTTTTC +GAATGAAACCATAATGAATGCTTCAGAATTTGTACCGTCAATTGCTGGATCTGCTTTGAA +GTTTGGTGCAGAAACAATTGTGAACTCTGATTCGTGAGTTGCTAATTCTTCTTCATTTGG +ACGAATAAATAATTGATGTACGAACAAATTATGCCATGCATATTCGTTAACAACTTGAAT +TGGTAGGCGATAGTTGCGATCAGCGCCAGCAAATCCTTTGAAGATGAATAACTCTTCTTT +TTCTTTTAAGTATTCTAAAACTTTTATATATAATTTATTAAAATGTTCTTCAGAGATCGG +TTGGTTCACGGCTCCCCAAGCAATTTTGTCAGCAACCGATGCTTCCTTCACAATAAATTT +ATCCTTAGGAGAACGTCCTGTGTATTTTCCTGTTGAAGCAGAAACGGCACCAGTAGAAGT +TAATTTCCCTTCGTTTCGCATTAATACTTTTTCCACTAATTGCGGAACACTTAATTGAAT +CTGTGCATTGCTTCCGTTCAATAATTCATGTAAACCAATTTGGACATTCACAGTACTCAT +ATTTATATACCATCCTTTTCATTTAATGAAATATTTCTCGTAATCCCCATCAATAGTATA +ACACAATTATATAAATAATGTATACTATTTATTTATTTTTGTTTGTGTGAATGTATCATT +TCCCTATTAATATTTCATTCTATGGGTATTGAGAAAAACTTTCAGGAAAATGTGAATATT +AATTGACAAATGAATATCATTTCTTTAGTATAGGTTGGGACGGATACTCTCTTATCCCGA +GCTGGCGGAGGGACAGGCCCGATGAAGCCCAGCAACCTCATTTGTAGTGGTAAATACAGG +TGAATAGGTGCTAAAACCTGTGCGAGGCTAACGGTCTCGAACGATAAGAGCAAAGGGCAA +AAAGCAGTATGCAAGTAGCAAATTAAACCTTTCCTCTATGTTAAGTAGGAAAGGTTTTTC +TGTATGCTTGTGTGGGAGAATAAATGTATGTCGCAGTTTGTGGCAAATTAAGGATGAGTT +CCGTACAATATATACAATTACTGTAGGGAGGTTTACCACATGACAAAAAAACGTCATCTG +TTCACATCTGAGTCTGTAACTGAAGGACATCCAGATAAAATTTGTGACCAAATTTCTGAT +TCAATTTTAGATGCGATCTTAGCAAAGGACGCAAATGCACGTGTAGCTTGTGAAACAACT +GTAACAACTGGTTTAGTATTGGTAGCGGGGGAAATTACGACTTCTACTTACGTAGATATT +CCAAAAATCGTTCGTGAAACAATTCAAGGCATTGGTTACACACGCGCAAAATACGGATTC +GATGCAGAAACTTGTGCAGTTTTAACATCTATCGATGAGCAATCTGCTGACATCGCTATG +GGTGTTGACCAAGCACTAGAAGCACGCGAAGGTCAAATGACTGACGCTGAGATTGAGGCA +ATTGGTGCGGGAGACCAAGGTTTAATGTTTGGCTTCGCATGTAATGAAACACAAGAATTA +ATGCCACTTCCAATCTCGCTTGCTCACAAATTAGCTCGTCGTTTAACTGAAGTACGTAAA +GATGACACATTATCATACTTACGTCCGGATGGAAAAACGCAAGTTACAGTTGAGTATGAT +GAAAATGGTAAACCTGTACGTGTGGATACAATTGTAATTTCTACACAACATCATCCAGAT +GTTACGTGGGAAGAAATCGATCGCGATTTAAAAGAGCACGTAATTAAAGCTGTAGTTCCA +GCAGAATTAATGGATGGAGAAACGAAATTCTTCATTAACCCAACTGGCCGCTTCGTAATT +GGTGGACCACAAGGTGATGCTGGTTTAACAGGACGTAAAATCATCGTTGATACTTACGGT +GGATACGCTCGCCATGGCGGTGGTGCATTCTCTGGTAAAGATGCAACGAAAGTTGACCGT +TCTGCAGCATATGCAGCTCGTTATGTTGCGAAAAACATCGTAGCAGCTGGTCTTGCTGAC +AAAGCAGAAGTACAACTTGCATACGCAATTGGCGTAGCACAACCAGTATCAATTTCAGTT +GATACATTAGGCACTGGTAAAGTATCTGAAGACGTATTAGTAGAACTAGTTCGTAACAAC +TTCGATCTTCGCCCAGCTGGTATTATTAAAATGCTAGACTTACGTCGCCCAATTTACAAA +CAAACAGCAGCTTACGGCCACTTCGGACGTACTGATGTAGATCTTACATGGGAACGTACA +GACAAAGCAGCTACTTTAAAAGAGCAAGCTGGTCTATAATATATGAAAAAAAGCTTTGCG +CTGTGTGCGCAAAGCTTTTTTTATTTGGTTTTATCCCGCATTAACTGCCCGTAAAAGCCC +GATTGGTTCAACTAATAATCAGTGGGGAAGAACAAAATCCCCACTGATTAAAGTTTCACT +TTATTTACTACTTGAATGCAAGAACTACAATAAACTTCCCCGTCTAGCTCAATATACTTC +TTCATATTTGTCATACCACTTGCCGGAAATGGCATATGAATCCACGCTTTTTCATACGTT +TGTATTTCCTTTTCACATGATTTACAAAGGATTGGTTTCTTACGGAACATAGACGGTCAC +CCTTTCAGTTGGCGTGCTTTTCCTGCGCAAAAAGCACTGCTCAGTAAAAATAGCGTTGCG +AATATAATACTCACTAATGTAGCGTATGGAATCGCGCCTTTTAAGGAAAAGCCGACAGTG +ATGGAAGCAACAAATAAGAGAATGAATGTTGTTGTTAATTTTTTGTAAAGTTCCATAGTG +AGTGAACCTCCAAATTATTTTGTAATTTAAAATGAAATCTACTATGTAGAAAATATAAGG +GGATTTAAAGAGTAGCTAATGGTGTGTAGTCCAGGCGTATAAGAAAGTATATTTTCCTTT +TTGTGGTCGTATATACTTATATTGTATCACGGTGCATCATTGGCTTCATACATAATTGTG +TAAAGATATGTATGATAAAATGAAGAAAAGTCTTTAGGAAAGGAAAGATGAAAAATGAGC +GTAACTGAACATAAAAAACAAGCTCCAAAAGAAGTGCGCTGCAAAATCGTGACGATTTCC +GATACACGTACGGAAGAGACGGATAAGAGCGGACAACTATTACATGAATTATTAAAAGAA +GCAGGACATAAAGTGACCTCTTATGAAATTGTGAAAGATGATAAAGAGAGTATTCAGCAG +GCTGTGTTAGCTGGTTATCATAGGGAAGATGTTGATGTCGTGCTAACGAATGGTGGAACT +GGTATTACGAAACGTGATGTAACGATTGAGGCAGTATCAGTGTTATTAGATAAAGAAATT +GTTGGATTTGGTGAGTTGTTCCGTATGATCAGTTATTTAGAAGATATCGGAAGTAGCGCA +ATGTTAAGTAGAGCGATCGGTGGTACAATCGGGCGTAAAGTTGTCTTTTCGATGCCAGGG +TCTAGCGGAGCGGTTCGTCTTGCGATGAACAAGTTAATTTTACCGGAATTAGGTCATATT +ACATTTGAGTTGCATCGTCAATGAGTAAGTGTGCTGGAATTGTATTAGCAGGAGGTATGT +CGAGCCGATTCGGTGAGCCGAAAGCGTTAGTAGGCTGGAAAGAAAGTACTTTTATTGAGC +ATATTGTGAAAGTAATGGAAAGCGCTGTGCAAGAAATTGTAGTCATTAGTCATACTGATA +TAAAAGAGAGAGTAGAGCAATTAGTACAAGTTCCAGTTATAGAAGATATGTCGCACTATA +AAGGAAATGGGCCACTTGCTGGAATAGTGTCAGGAATGGAATACATAGATTCAGATTGGT +ATATTATCATGCCTTGTGATGCACCGAACGTTTCAAATGAATGGATTACCATTTTATTAG +AGCAAACGAGCGATGAATATGATGCTGTTGTCCCTATTATTAATGGAAGAAAACAACCGT +TACTTGCAGCGTATAATAACCGCGTGAAAGAAAAGATTTATGCTTTGCTTCAAGAAGAAA +AAAGAAGTATGGGCCAGCTTTTATCACAATGTAATGTGAAATATATTGCTGGTGAAGATG +TACAAGCGAATGTAGATTGGTTTATAAATGTGAATACGAAAGAAGAATATGTGCAGGCTC +AAAAAGACCTTTCAAATGAATGAAAGGTTTTTTGGTATACAGAAATTGGAGAACAATTGC +TGTATAAAGGTATGTGAGTAAGTTGATTTTAAGGGGATTTCGTGAACTACCCGCCACTTA +ACGTCCTAAAGGAGTGTTTGAAGTGAGGGCTTCTCGGTTAATCATTACTTCTGATAGCTA +ACGAATTCGTCCTAAGACAGCCGAGCTAACTCCCTTGTTCCAAAGGTTATTTTATTACAA +TTTATGCTGACGCTAAAAGACGTATTGCTTCATTTTTGATATTGATGGCTGAGTTATAAT +CTCTATCGAGATTTACACCGCATATACAAGAATAGATTCGTTCAGACAATGTCAGACTTT +TCACATTTCCACAGCTACTACATGTTTTTGTTGATGGAAACCATTTGTCTATTTTCACAA +GTTGTTTTCCTTGTTCGTTTAGTTTATAAGTTAAGAAAGAAGTGAACATACCCCACCCAT +TATCAGCGACACTTTTTCCAAAATGAAGTGCTTGCGATATTCCTTTCATGTTTAGATCTT +CAATAGCTACAACATCAAAATTGGCAGCTAACTTTTTAGACTCATGATGAAGAAAATTCT +TACGTTGGTTAGCTACTTTCTCATGTAACGTAGCTACACGAATACGTTGTTTATTCCAAC +GTTCAGAACCTTTATTGCGTCTTGATAATACTCGTTGCGCCTTTGTTAATCGGTCTAACA +TGTTACGATAGAACTTAGGATAATTGGCTTTCTCATCCTCAGAACTGACGTATAATCCAT +CCATTGCAAAGTCTAATCCAACAACTGTTTCTACTTCTTTTTGTACAATCTCTTTTTCGT +ATTCAGTCAAGATTGACACATAGTATTTACCAGTAGGTGTCATAGAAATCGTACAAGACT +TGATCATATAATCTTGTGGTATCTCTCTATGTTTTTTGATACGTACCATTTTCAGTTTTG +GCAATTTGATATGACCATTAAGCAACAGAATATTACCGTTTACTACATTCGTTGTATAAG +ACTTTCTGTCCTTTTTACTTTTGAATGTTGGGAAGTTATTTTGACCACTAAAGAAATTCT +TATATGCAGTTTGCAAGTGTAGTTGAGCATTTGCAAAAGCTAATGAATCAACTTCTTTTA +ACCACTCAAATTCTGCTTTATATTTCGCAGGGGTTGGAGGCTTTTGTTTCTTTAGTTGTT +CCTTATCATCTTTGTATTTTTCGTACGCTTCTTTTCGCTCAGCTAACATTCTATTGTACA +CGAAACGTACACAACCGAAGGTTTTACGTATAAGATTTGCTTGTTCTTCTGTTGGATACA +ACCGGAATTTATATGCTTTATTCTGCTTTGTCATATCATTTCACCTCACTTTTTACCTTG +ATTTTCAATATGTTTCTTTACTACGTCTATTGGCGAACTACCAGTAGTTAGTAAGCAAAA +ACTTCTTGACCAAAACATTTCTTTCTAAAGTTTTTTTCACTTGTGGAAAGTCTTTCTTGA +TTAGTTGAGAACTTGCACTTTTATAAGCATTAATAAATTTTGTCATTTCTTTTTAAGCGA +GTAATTCTGATGCGGTGTGCTGAAGGACTGTTTCATATTGGTCAAGTAAGCCACGAAAGA +TTCCATCCCAGCTTTTTGATTGGGCATAAGACGAAGCAGCTATACCCATTTGCATAAGCT +GCTCTTCATTTTGTAATAAAGAATGGATGGATGATAGAAATGAATCTGCATGTTTGGGTG +GGCAAAGAACTCCTGTTTTTCCATCTGTAATAATATTTTTAACTCCGCCACTATTTGCAC +CAATGACAGGTGTACCGCATGCAAATGATTCAAGTACGACATTTCCGAACGTTTCAGTAG +CTGATGGAAATACCATCATATGAGAACAAGCATATGCTTCAGCTAAATCCGTACCTTGTA +AATATCCAGTAAAAGTGACATTTGTTTTCGGAACAGCTTCACGCAAATTTGTTGCTAGAG +GCCCGTCTCCAGCGATGAGCCAATGAATATCGTCACGTTCTTTGTTTGTCGTTTGAATAA +GTGTTTGAAGAGTATCGATATCTTTTTCAGGAGCAAGGCGTCCGACGTAGGAAAGAATAT +GTTTCGCTGTAATATTATATTTTTTTCGGAATAGGTCTTTATTGTAAGTTGGATGAAAGA +GAGTACAATCTACACCACGTCCCCAAATATAGAGCTGTTGAAATCCTTTTTTCTTTAATT +GATGTAATGTTTCAGGAGAAGGAACAAAGTTTTTTTGCATATGACTATGAAACCAGCTTA +AATAGTTCCATAGCATATTGGAGAGAAATTCGATTTTGTAATAGCGTAAATAGGCGTCAA +AATCAGTATGATAAGAACCGACAACTGGGATATTTAACTTTTTGGCATAATATAACCCAC +AAAGTCCCATATTGAAAGGCGTAGCGATGTGAATGATATCAGGTTTAAAGGCAAGAAGTT +CCCGCTTAATGCGCGGAGTAGGAAAGGCAAAGCGACATTCTGGATATAATATTGTTAACG +GGATACTTCTCATTTTGTTCACATTCGCTACGAAATTATCTTCTGCCGTATGCTGAGGGG +CGAAAACAGAATAGGCGATATTTTCTTTTTGAAAATATTTAGTTAAACGTTCCAAAGTTT +TCGCCACACCGTTGACTTGTGGTGTAAATGTATCGGTAAATATGGCGACTCTCATCATAT +CGCTCCTTTACATGAGTGGAATGAGTTGATAAAAAGAGATGATGCCAGAGCAAGTGCCAA +GGCACATGCCTACAAATACATCTGACGGATAATGAAGCCCTAAATAAATACGGGAAATAC +CGACGCATAATGCTAACGGTAATAGAAAAGCAAGCAAGTTTGGATTATAGCAAATAAATG +GAATGAGGACAGAGAAAACAGCCGTTGTATGTCCGGATGGAAAAGAATGATCTTTTAATG +GATGAACTGGATATTTCGCATCCTGAATTGTTAAATAAGGGCGTTTTCGTGGATACCATC +TTTTTAATATTTGCACAGGAATATGGCTAATTGTTAAAGAAATAGCAGTTGCAATTGCAG +CTTGATGCAAATTCCCTGTTGCGAAAATTAAAAAGAATAGGGTAAGTGCAATGGAAAAAC +TTGCACCACCGATATGGGTAATATTGCTGAAAAAGATGTTTAATGTTTTTTGATCAAAGT +AGCGATTAATTCCTTTGAAGATGTAACATTCTATTTTATATAGTCCACTGACCTTCATGA +GATTTTTCCTCCCTCATTTACATATATATGGAGTTATTAACTTCTATTTTAATAAGATTT +TATTGAGGGAATAATAATGTTTTGTAAAGAAAAAGTTAGATTTTTTCATAAAAAAAGCTG +CCAGTCTTATGGCAGCGATAAAATCACTTTTGTAACGATTTATAATATTGTCCTTTTTCA +ACGTATTGTGTGCGGATACGTTCCATATCTTTACGGTCTTCTTCTGTTAATTCGCGAATG +ACCTTCGCAGGGCGACCGAAAGCTAACGTATTAGGAGGGATTTTCTTTCCTTGTGAAACG +AGACTACCAGCACCGATGAAAGCTCCTTCACCAATTTCAGCGCCATCTAATATGATAGAT +CCCATTCCGATTAAAGCATCTTTTTCAATTTTGCAGCTATGTAAAATAACTTGATGCCCA +ACGGTAACGTCATCTTCTAAAATAAGAGGATACTGGGGGCTTTGGTGAAGTGTACATTGA +TCTTGTACATTTACTCTATTTCCAATTATCGTTGGTGATACATCACCGCGGATGACTGTA +TTAAACCAAATGCTAGATTCTTCGCCAATTGTCACATCGCCTGTAATGGTAACATAGTCA +GCGATAAAAGCACTACTCGCAATTTTCGGATTTTTTTCTTTGTAAGGATATATCATGTAA +AGCCTTCCTTTCCTAGAGACTAGTTTTAGTGTATCAAATTATGAGAAAGAGTGAAATGGA +GGAGGTCCGTATGTGGAATTATGAAGCAGAGGAAGCGAAAGCTGTAGTCGTTATCGTGCA +CGGCGCAATGGAATATCACGGACGTTACGAAGCTATAGCAGAAATGTGGAATCATATCGG +CTACCACGTTGTGATGGGAGACCTTCCATCACATGGAACGACTTCGAGAAATAGAGGACA +TATTGATTCATTTGATGAATACATAGAGGAAGTTAAATTATGGGTGAAAGAGGCAAGGAA +GTATCGATTACCTATTTTTATATTTGGTCATAGTATGGGTGGTCTTATTGTCATTCGTAT +GATGCAAGAAACGAAGAGAGAAGATGTAGATGGTATTATTTTAAGTTCACCATGTTTAGG +TGTATTAACTGGACCTTCTGCTCCGCTTCGAGCTGCCTCAAAAATATTAAATGTTGTTGC +TCCAAAATTGCAATTTGCAACGAATCTTACAGTGGAAATGTCAACTCGTAATCATGAAGT +GAGAGATGCGATGGAGAATGATTCATTGTTCTTGCGCAAAGTATCAGTACGTTGGTATAG +TGAATTGATTAAGTCTATCGAAATTGCGCATAAAAAAATAGGCGATTTTCCAGACGTTCC +ACTCTTGCTAATGCAGGCTTGTGAGGATAAACTTGTAGATAAAACACGTGTCCGTACTTG +GTTTGATAATGTTAAAATAAGTGATAAGGCTTTTAAAGAATGGCCGAATTGTTATCATGA +GTTATTAAATGAGTATGAGCGTGATGAAATTTTGAATTATATTCAGTCATTTACTGAAAT +ACGCATCAATAACATAATAGAAACAAATAAGTAAATTATTTGTACATTGAATAGAGGAGA +TGAAGGAAGAAGTGAACGTACCGAGTAATCCCATAACGCTCATGGCGAAAGTATACCGTG +ATGTGTTTCCGGTAGTACACCATGAGCTAGCGATGTGGAAAGAGCGTGCCTACCATATTC +CGAATGACGAGCTTCATAGTCAGGCAATCGCAAGTATTGAGAATAAAACGTTTCATTGCG +AGGGCGGTGGCATTTTAGCGCTACTAGCAAATGAACATCGTGAGGAATGTATTCGTTTTA +TCGTAGCTTATCAAACGATTAGCGACTATTTAGATAATTTATGTGATCGTAGTACATCAC +TTGACCCGAAAGACTTTGCGGCTCTTCATGAGTCGATGGTAATGGCATTAAGCCCTGAAG +TTGAAGGGGGCGGTAACTATTATCGTTATCGTGATGATCAAGATGATGGTGGTTATTTAG +ATGAGCTTGTTGAAACATGTCAGGATGTTTTAAAGAAAACGAAGCATTATGATAAAATTG +CTCCAATTCTTCATGAACTTGCTTGTTATTATTGTGATTTGCAAATTCATAAACACGTGA +AGTTAGAAGAAAGAGAACCACGCTTAAAAACATGGTTTGAAGCGCATAAAGAAAATTTAC +CACCGATGAGTTGGTTTGAGTTTTCAGCATGTGCAGGTTCTACGCTTGGAATTTTCTGTC +TTGTCGCATATGCATTTCATGATGAATTACATGATGAAGATATTGCGAAAATTAGACAAG +GATACTTCCCTTACGTACAAGGACTTCACATTTTACTTGATTATTTCATCGATCAAGAAG +AAGACCGTATAGGCGGGGATTTGAATTTCTGTAGTTATTATGAAAACGAGCAAGCTATAT +TAGATCGTATGAAACATTTTGTAGAAGAAGCAGAGAAGAGCATTGGTGATTTGCCTCATG +CGAAGTTTCATCGTCTCATAAGCCGAGGATTACTTGGTATTTATTTATCAGATCAAAAAG +TATCAGCGCAAAAGAATATGCATAAAATGGCACGGCGCATTGTAAAATACGGAGGGCTCA +CTTCACGATTCTTCTACTGGAACGGGAAGATGTACCGAAAGAAAATGGCGCAGTGATGAA +GAGACCATCTCAAAAGGTATTTTGGGATGGTCTTTTCCTATTGTCGTATTTTGTAGTTAA +GTCGATATAATTTCATGTGCTGTTGCAATCTTGCTCCATAATGAAAAAGGGACAGTCCAA +AATTAGATTTTTTGGACTGTCCCTTTTTACTATGATCGTATTTTTAAAAATACATATATG +AGAAGTTCAATCGCGAAAATTACTAAAACAGATAGTCCGAATAAAGGCATGACAGCGCCT +AGTATAACCATCATGATAAAGAAAACGATTATGCTTTTCTTATCTCTTTGCTTTGGCGGT +GCTGCTAATTTTCCTTTTGGCTTTCTTGCTAACCACATTTTTATTCCGTAATAAATGAGA +AGTAATAAAGATAATGTCGTTAGTAAGCATAATATTTTATTTGGCCATCCGAATAAATGT +CCTTCGTGAAGCGGGATACCGTAAGTGAACCATTGTGCAAATAATCCATAATCACGATAG +TCCGTTTTCGAAATGAGTTCTCCGCTATATTGATCAAAGTAAGCTGTTATTTCTTCATTT +GGTGCGACATGCATACCCGTAATACCGGAACCGCTTGATTTCGAAACGGTGAATACACCT +TTCGGATCAGCCGGTAGGGAGATAACATATGGCTTCTTTATTTCAATTCCTTTTTGTAAT +TCATCGACAGAAATTGCTTTTGGCTCATTTGAATTCGATTCAGGAGGAGCTTCTTTTCTT +GTTGCCCACGGCAATTCTTTTACCTTTGATTCAGGCGGTGCCATATACAACTTTGGATAT +CCAATCGATTCATTCGACGATGCGATTTTATAAATTTGGTTACCCATAAATCCTGACCAA +GGCAATCCAGATGCGACTAGCAGAACGAGTGGAATTGTAAATATAATACCGATAATAGAA +TGACGTCGTTTTGCTTTTTCTCGCTTATTGGATGCTGGTGTGTTTTTGAATTGGCGTATA +CTCATATACAACCCAGTCACAATTAAAAAGATTGTCCAGCACGCTGCAAGTTCTACAATA +TAATTGACGACAGTGCCACCGACTAAAAGGGAACTATGTAATTCCCGCATTATATTGGAA +AACGTTTCAGCTGTATTTTGATCCCCAACAATTTGATTATTGCTATCTAAATACACAAAT +TTTTGCTTTCCTGTATATTCATTAGCAATCGTAAGCCTAGTATTATAATCCCCATTAAAC +TCACTAATTTTCGTCACGCTATAATGCGGATATTTTTTCTCTGTTAAGAAAAGCGAATCA +GCCATCGAAATAGATTCTGTTTGGGCGCTCTTCCCAAAATATAAATCTTTATAGATGAAA +TCTTCAACTTCCTCACGAAATAAATACCCAATTCCGCTCAGTGACAAAGTAATAAGAAGC +GGCGTAATAAAAAGCCCGGCATAAAAATGCCAACGCCAAAAAATATAATGAAGCGAACGA +TTTTGTTTCATACGACAGTTCCCCTAACCTTCCCTATATGAATAAACTACTTATTTAATA +TAGCGAGATTAAGTGTGATTACTTTGAAAGAATTGTGAAAAAAGTTAAAAGTTTTGGGGA +TTTTTATCAAACACAAAGCACATTTCAAAAGGGATGTGTTTTATGTTTATGAAGACCTTT +TTATATTTGATGCATAGATCCTAACAACTGCTTCCGCAAGACATTGAGCGGAATCAACAA +AGTCCTCACTCACCACCACATTCAAATCGGTACATGCGATAATAGCGGTATCTACTTCGT +CTTTTAACTGTAAAACGAGCAGACTCCATAATTTGCGAGCTTCCTCAATTTCTCCGCCTT +TAATACAAGTAATAATTTGATTAATCATTTCTTGCCATTGTTCATTATGAATGTACTCTA +TATTACGTTTTGTAATCCCGTCTTGGTAAATACCTGCTTGAACAGTCGCTTCTGTTGCAA +GAAGAGCGACTCTTTTTGTATTTTCAGGAATTGCTTTTAACGTCTCATCGACTATATTCA +AAATAGGAATGGTAATAGAACGCTGTAATTCTTCGAAATAAAGATGCGCTGTATTACATG +GCATTGCGATGAAATCTACACCAGTACTTTCAAGTCTTTGCGCTCCTTCAATAATCGCTT +TTTTCATCGCCGCATGATCAATAGGGCGATCCATATAAAACGGTGTTGGGCACGAATAAA +TCATCATATGAGGAAAGTCCATATCATGCTTTGCTCCATATATTGTTTGGCACCCTGCTA +CAACTGTATCGACGAATGGTCCGGTTGATTTTGGACCCATTCCTGCTAGTATTCCAATCA +TTGTTTAGTCCCCTTTAATATGTAATCAAAAACGTTTTCTGCGAACGAGAAAAAGAAATA +AAGTAAGTAATAAAAATGAATATACGATAGCCCAAGCTGTAAGCCATAGAGTATGAGTAA +GTATATTATATTTGTATTGTCCAAGCATTATAAAGCTTTCGACAGGTGGTAAAAACCATA +GTAACTCTTTTTTTAACCGGATTGAGGCAATCGTCATAAGTAATATAAATGTAAGACTAA +GCCAAGCGGTGCTTGCTTTTTGAATAATATTTCTAGTAAATAATGTTGCGATTGAAATTC +CTAGTATAGATAAAGACATGTGAGCGAGAAATCCGACTAGAAATAAAGAAGCTGTCATTT +TTTCATTAAACATTTGCAAAATAATTGGATAGATTACGGATATAAAAGATAAAACAGTAC +AAATAAGAAGAGCCGTAACATATTTACCAACGTATAATGCCGAAATATTATTTGTATGTG +AAATAGTGATTTGTTCTTGTACAAGATCTTCAGTATGAAAAATTGTGACTGTAATCCAGG +CAGATAATAAATAAAGTGCGAGTGCTGTTTCTAAGTATGTCGGTACAATGGGAGTTGGTT +TATATGTATATACAAAAAGTAAGCTTATGCAGTACATCGCAATAGGAGGAATGTACTTAT +ACGATTTTGTGTAGTCGAGAAAATGGTAACGAATAAGCGCAAACATAATAAACCTTCTTT +CGATTTAAAAATTAGGCTGTAGCAGTGTGATAGATGCTTTTTTATGTAATAAAAACTGAA +GCATTTCGTTTGTATATTCCTTTTCGATTTGTAATTGAATGAGGTTTTGATTTGAATTGT +GTGTAACTTGTATAAATCCTGATTGCTTTTGTAGTTCTATAGATGAAAATGTTTCGTGAA +CAATTGCCTCAATATAGACCTGTTCTGTTGATTTTTGCACAGAGGTATCTTCCGCAATTG +TATGATTTGCTAATGTCACAATTCTATCGGCGAAGTTTTCTAGTAGTTGTTTTTCGTGAC +ATGTGAAGAGTATAGATATGCCTTGTTGTTTTAATGAGAGTAAAATATGCTCTAGTTCTT +GCTGAGAGTTAGGGTCGAGTCCGGAAAGAGGTTCATCTAAAATTAATAAATGGACGTCTG +TAAGTAACGCTTGCATAATGCCTGTTTTTTGTTTCATACCTTTTGAAAAGTTACGTACAA +TGGAATGCTTTGCATGATGTAAATGAAAAGATTCAAGGAGGATCGGAATTTTATTTCTTA +AATATTTTGTTGATAAACCGTGAATGTGGCCGAGATGATATAAATAATCCTCTAATGTAA +AACGAATTTCTTCTGGAAAATGTTCAGGAACATATCCGATTTGTATATGTTCTTTTCTTT +GAAGTGTCCCTGCCGTAGGTGAAATAAATCCTGCAATTATTTTGAGTAGAGTACTTTTTC +CAGTCCCGTTCCCGCCAATAATAGCGAGTGCTTCTCCTTCTGGAATGGATAAATCGATGT +TGTCTAGTATGAGTGATTTGCTGTACTTCTTTTGAATTCCCCTTAGTTCTACTAGCAAAG +TTCTATCCTTCTTTCTTCAAATATTGTATACATCCATTATATAGGAGAATAAGAAAAAGA +CGTGCAATGAGTGCACGTCTTTTTTTGTTATGTTTTGTCGAACAATCGATATATCTGAAA +AATCGTTGATATAATTATCCCGCTATTTGCCGGACAGTAAACCTCACTGATTAAAGTTTC +ACTTTATCGCTTCTCAATCGCCACAATAAATGGTGGGTTATTTTGCTGGTTAATGAAGCC +GTATCGCAAAACGTGAGCTTGTTTTTGGTCGAGTTCTTCTGCAAATTTGAGAACAGCGTC +GCGTTCTACTTGTCCTTCTGGATGTCCGTGGTAAATGACAAGGACGATGATACCTTCAGG +TGCCATTACTTCTAATAATTGCTCGATCGCCGAGATTGTTGAGTTCGGTTTTGTAACGAT +ATGTTTGTCACCGCCAGGAAGGTAACCTAAGTTGAAGATAGCGCCTGTTACTTTTCCTTT +TGCTTCTTCTGGTAGTACGGATAAAAGTGTATCGTGACTATCGTGAACTAAAACAGTGCG +TTCGAAAAGTTCTTTTTCTTTTAGGTGGATAGTAGAACTTTCGATTGCTTCTTGTTGTAT +ATCAAATCCAAATACTTTTCCGTTATCTCCAACGATTTCAGCTAGGAAGCAAGTGTCATG +CCCATTGCCGAGTGTTGCATCTACAGCGTAATCGCCTTCTTTTACTGCCGTTTGCAGCAG +CGAGCGAGCAAACGGTAATACACGTTCTAATTTCATTTTTGTTTCTCCTCATTTGCATAT +TTTCCTTGCCAGCTTCCGCGGCGTACAAATTCTGCATCGATGGAATTTAATACTTCCCAT +TTATTTAAGCTCCACATTGGACCAATCATTAAGTCAGGCGGACCGTCACCTGTGATGCGG +TGCACAATTACGTCTTCTGGAATCATTTCAAGTTGGTCAACAACGAGACTTACGTAATCT +TCAAGAGAAAGGAATTCTAGTTGTCCTTTTTCATATTGCTTCACCATTGGTGTTCCTTTT +AATAAATGAAGTAAATGAATTTTAATTCCTTGTACATCAAGCTTCGCTACTTCACGAGCT +GTTTCCATCATCATGTCGTAATCTTCAAGTGGAAGACCGTTAATAATATGAGAGCAAACT +CTAATGCCATGTTTGCGTAATTTATTTACGCCTTCCACATAAGATGGATAATCGTGAGCA +CGGTTAATAAGATTTGCAGTGCGTTCATGAACAGTTTGTAGTCCGAGTTCAACCCAAAGG +TATGTACGTTTATTTAAGTCCGCTAAATATTCAACGACATCGTCTGGTAAACAATCTGGA +CGAGTTGCGATAGAAAGACCGACAACGTCTTTTTCTGCTAGAAGCGGTTCGAATTTTTCT +TTTAACACTTCAAGTGGTGCATGTGTATTTGTGTAAGCCTGAAAATAAGCGATACATTTT +CCGTCTTTCCACTTTGAGTGCATTTTTTCTTTCATTTCATGATATTGCGTTATAACATCA +TCGCGGCGATCGCCAGCGAAGTCACCAGATCCAGCAGCACTACAAAACGTGCAGCCGCCG +TAAGCGACTGTACCGTCACGGTTCGGGCAATCGAAGCCAGCATCTAATGAAACCTTAAAG +ATTTTTTCACCAAATTCATTTCGTAAGTGGTAATTCCATGTATGATAACGTTTATTGTCA +TTTGTATATGGAAAAGGGTTTTGAACCTTCATTATTTTCCCTCCTAAGACGAGTCAAAAT +GAAACAAAATCCATTATAACATACTCATAAGGTTCATATGGAAGGGACACACTAAAGGGG +AATTGAAGCAAGGAGGGACAATTATGGCAGAGCGTCAATCACTTGAAGAGTATATTACAC +AGGCAGAACAAGCGGTGGAATATGCGAAAGAACAATTAGACCAAGGCATGAGACAGGAGC +ATTACAATACGATGGAGTATTCGGATGCTCAGTTGCAATTAGAACAAGCATATAATGATT +TACAAACGATGCAACAACATGCGAATGATGAGCAACGTGAGCAATTAAATAGAGCACGTA +TGGCAATTCGCCAATTGCAACATCAAATGATTATTACACCGCACTAATAAGGAGTGAATG +TAATGGCGAAACGTTCAGATCAAAACAATCCAGAGCAAAAAACGCAAAACGGACATAACG +CTGAGTTTTCGAATGAACTTGATCCAGTTGTTCAAGTGAAACAGCGTAATAGTAAAAAAG +GACAACCTCAAAGATCAAAGCAATCAGAGTAAACCAGGTCGCATTATGACCTGGTTTTAT +TTTGCCTCGTGATTGGTGCGGGCTAATCAAGGGGCAAAATAATACTGATTAAAGATTTAA +TTCATTATATTACAATAATGTAAGATTACTGTAAGGTTTGCCGATAGTTACGATTGTCTT +TTTCCAGTAGAGTAAGGGAGGATTTAATTTTGGACAAGGAGTTGTATTATGGGGAACAAT +ATTACAAACAAACGAATTGATGAGTTAGATTACATTCGTGGCTTCGCATTACTGGGGATT +ATTTTAGTAAATATTCTTGCACTACTGAATATTAAAGTTCCAGATCCTAGTACAGTGGAT +GCAAGTTATCAAAGGTTTCTATACTTATTTGTAGAAGGTCGTTTCTTCTCAATCTTTTCA +TTCTTATTTGGAGTAGGATTCTATATCTTCATTACAAGAGCAATTGCGAAAGGGAAGAAT +GGATATGTTCTATTTCTACGCCGCTTAGTTGCACTATTTATTTTTGGTTTGATTCATCAA +ATGTTTCAGCCTGGGGAAGCACTAGCGTTATACGCAATTTGTGGATTAGTCACTTTACCA +TTTTATAAAGTGAAGAAGCAAATAAATTTAGTAATCGGTCTTATTCTTACGATAGTTTTT +AGTATTATGGCAGCTAAGGAATTATTACCATTAGGTTTAATTTTATTAGGTCTTGCTGCA +GGACAGTACAAAGTATTTGAAAATCTTTCAGCGAAAATAAAGCAAGTCGCTATTTTTACA +GGAGTTATGTTTGTTTTAAGTGTGATAGCTTTATGGTATCAATATGGGCACGTTCCTGCT +AATCCATTTGTAAATATGATACTTATGACTGAAGATGGAACAATGGACGCTGCAGGTCAA +TTCTTAAAAATTGGTGTTACAGTTGGACCAATTATTTCAGCTTTCTATGTAGGAGCATTA +ATTTTATTACTTCAGTTAAAACCAGTTCAAGCATTGTTAGCGCCACTGAAATACTATGGT +CGTATGGCTTTAACAAATTATGTTGGACAAACTGCAATGATTTTAATTGCAGGCAGTGTA +TTTAATTTTGCAGGAAACTTAACGTACATGCAGACGCTATATGTGTGTATCGCAATTTAT +GCAATTCAAATTATTTTCAGTGTGATTTGGATGAAAATCTTTAAAATGGGTCCATTAGAA +TGGATTTGGCGTGTTATTACGTATTGGACGGTAACGCCTTTAAAGAAATAAGAGTAGGAA +ATAGGCTGTTCCACATGCGAAACAGCCTATTTTTATTGTCATCTTTTGTCGGTAAGTCGA +TATATTTGAAAAATCGCTGATAAAATTGCATTTACTAATGAAAAAACTAAGCTTACATGC +TCAGTTTTTTTTCTTACAAAAGTGTAAGGTAATTGTAATGTTAATAAATAGCAGTTCACC +TCCAAAAGGCGCAAAATAGGTAATGGAAAAACAAGCGTAGGAGGATATATATGACGAAAC +CAGTTGTAGACGTGAAAAACGTTCAAAAAGTGTACGGTAAAAAAGGTGAGAACCAATCAC +ACGCGTTAAAAGGTGTTTCATTCTCAATTCAAGAGGGTGAGTTTGTTGGTATTATGGGAC +CATCTGGTTCTGGTAAAACGACATTATTAAATGTAATTTCAACGCTTGATAAAGCAACGG +GCGGCGTTGTTGAAATTGCGGGTACGGATATTACGAAAATGAAGCAAGGTGAGCTTTCTG +ATTTCCGTTCACAAAAATTAGGATTTATCTTCCAAGACTTTAACTTATTAGAGAACTTAT +CTATTTATGAGAACATTGCACTTCCACTTTCCCTTCAAGGTGTTTCATCACGTAATATTG +GACCGAAAGTAGAGAAAGTAGCGGATATGTTAGGGATTACAGAAATTCTTCAAAAGTATC +CAACTGAAGTATCCGGTGGACAGAAACAACGTTCAGCAGCAGCACGTGCACTAGTGCATG +AACCGGCAATTATTTTAGGAGACGAGCCAACAGGAGCTCTTGATTCGAAAAATGCAGCAA +GTTTACTTGATGCGATGACAAACTTAAATGAAGAACAAGGCGTATCTATTATGATGGTTA +CGCATGATCCATATAGTGCAAGTTACTGTCAGCGTATTTTATTCATTCAAGATGGTGAGC +TATATAAAGAAATTCACCGCGGTGGTACGCGTGAAGAATTTTATAAAGAAATTTTAGATG +TGCTTGCGGACTTAGGCACACAAAAAGCGTAAGAAGGGAGGTCTAGGGCATGTTATTTAA +ACTTTCCATGTCAGGGCTAAAGAGTAAGCTGAAAGATTATATTGTCTTACTTGTTGGTCT +TGTCATGTCGATTTCAATTTTTTATATGTTCCAAACGTTAGCGCTGAATAAAGCGTTTAT +CGAATCCAATTCTGTTATTAAGTCTATTGGTTTCGTATTCCAAGCAGGTTCATTTTTATT +AGCAATTATAACGTTCTTCTACATTTTATATGCAAACTCTTTCTTATTATCTCTTCGTCA +AAAAGAGTTTGGTATGTACATGATGTTAGGTGCAAAAAAGCATAAAGTTACATTACTTAT +GTTTATTGAAACGATTGTATTAGGTGCTGCGTCTCTTGCGATTGGACTTACAGTTGGTGT +AGGACTTGCAGAAGGTATCGGACAGTTATTAATGAAACAATTAGAATTTGCTGGTGAAGG +CTATAAGGCATTTTATCTACCATCTATGACTGTTACTTGCATTTTCTTCTTTGCATTATT +TGTATTATCAGCAATTATGAACAGTATTAAGTTATCACGTATTTCAGTACTGCAACTTGT +ACATGCAGATGCACAAACAGAACGTGTTGCGGTAAAAGGAAAAATGACAGGTGTCGTTGC +AATCCTTGCTGTTATTTTATTAGGAATTGGCTATGCATCAATGATTTACATGGAAAAACT +AAGAGAAATGGGAATCCTTATTGCATTAATTACAACAACAACTGGTACTTACATGCTATT +TGGATCACTTCTCCCAGTTATCATTAGAAAGTTAAAGAGTAATAAAAAGCGTAGTGAAAA +AGGACTTAATGCTTTTACGTTTGCACAATTAAATTTCCGTATTAATAGCTTAACGAAAGT +ACTTGCAACAGTAGCGATGTTAGTTGCTCTTGGAGCGGGTGCAATTTCAGGTGGTATGGC +GTTTAAAAATAACGTTATAAAAATGGTTGATGGTTTAGTAATATATGATTCAGTTATTCA +TAACCCAACAGCTGAAGAAAAGAAAATTTTAGATGGTATTACATTTAAAGAGAAAAATGA +ATATCGTTACAAAGTTGATAATAAATACGTTTATTATGTAAAAGAAGATTTAGAGAAAAA +ACCTCCTTTCGTACAAGATACAAAACAGATGGAAAGTATGAAGGATTTAGGAAAAACAAA +GAAAGTTTCAGAGGAACTACCAGTAGGTGCAGTTTCTAGAGAAATGAATGAAAAAGATGC +GAGTGCTAAAGAACTTCCAGAAGAATGGAACGAGGCTTTCAGAACAATTCAGCCATTTTA +TCTATATGAAGATCATGCAATTAAAATTGTAGATCAGAAAATGTACGATACTGTAAATGG +TAAAGAAGGAATCGTAGTTACTGGAAAAACAGACGATTTTGTAGCATACACAAAAGAATG +GAAAAAACTTGACGAGTTGCAGCTAGTTAAATATAAAAATGTAAAAGCTGAAAGATTAGA +TAGTAAATATCAATCTTACAATGGATTCTACGGCATTGCGAGTGGAACAGTATTTATGGG +ATTCTTCCTTGGAATTGCATTCTTAGCAATGATGGCAAGTTGTTTAATGTTTAAAATTCT +TTCTGGGGCATCAAAAGATATTACGCGTTATCAAATGCTGCGTAAAATCGGTGTGCGCCG +TGAATTATTAACGAAATCAATTTATAAAGAGTTATTCTTAGTATTCTTATTCCCGGCAAT +TGTGGGTATTGCTCACGTATTAGTTGGTATGAATATTTTCGGATTTATTTTAGTCGATCC +GTACTTCCGCATTTGGGTTCCAATCGTAATTTTCGTAGTAATTTATGCGATTTATTACTT +CATTACAGTTCAATTGTATAAAGGGATTGTTCTTCCGAAAGAAGATTAATGAAAAACCGA +GCGAAACTGCTCGGTTTTTTATATTTTCAGAAAAAACACTTTAATGTTACTGAAATATAT +AATATAATGTAAATAAGTAACATAATATGTGAGTATCAGCAACTGGAAAACGAAAGGAGG +AATTCGATCTATAGAAAGAAGCTGAACTTGTATCCCCTATTTTTCTACCCACACTGTATT +TGTACAATCATAAATTTCCAAATGCAAATTGCCTGGACCTAAGCAGTATATTGTGAAAAT +AATGGATAATACATGGAAATCTCATAGCATGATAAGGTAAAATGGACAGGATAATGAGAA +AATTACCGTTCATAAGATGAGGAGATACATGTATGGAAGTTGTAGAGGCATTAAAAGATA +TAAACCAAATTGAGGCTATGAAAAAGTATTTAAAAGAGCACTCGCAGCGAGATTATCTTT +TATTTGTTATTGGGATTAACACTGGATTAAAGATTACTGAACTATTGAGTATTAAGTTTG +AAGATGTATTAAATGAAGATGGAACTGTTAAAGAGTTTTATTCTCTTCCTGTGAAAGATG +AAAAGTTTAAACAAGATATTTATTTAAATACAAAAGTAAAAGAGGCGCTTTTAGCGTACG +TACAATCTTTTGATATTCAAAGAGAAAACTACGTATTTCAATCTAACAAAACAACAAATT +CAATCTCACGCCAACAAGCGTATCGTGTTATCCATAGCGCAGCTGAAGCGGTCGGAATAC +TTGGTAAGATTGGAACGAACTCAATGCGAAAAACATTTGGATTTCATGCGTACAAAAGAG +GAATAGCGATTGCGCTATTGCAAAAGCATTTTCATCACGCAACTCCATCAGAAACGTTAA +AGTATTTAGGAATCTTAAAAGATGAGAAGTTTAAAACAGAGATTGATGTAGATTTGTAAA +AGGAAAGAGCCGTAACTAAAAAATTAGGAGGCGGGAATATGAATATTAGAGAGAGTGAAC +TACCGGGCATTGGATGTAAATTTGAAGTGATAACAAAGGGTAATGAAAAAATGGTTATTG +TTATTCATGATGATGGAAGAAGGGAAATGTACCATTTTGATGCGGATCACGATGAGAGTA +TTTCAAGCATTTCTCTTCGTGATTCTGAAGCGAGACAAATTGCGGCTATATTAGGCGGAA +TGGTATATAGGCCGCAAGCGCTAGACACAATTGAGATGGCTTTTGAAGGATTATCAATTG +AATGGTTTAAGGTGGAGAATAATGCACCAGTAGTACAACAAACAATTGGGAGCTTACACG +TTAGAAAAACATATAACGTAACAATCATTGCTATTTTGAAAAAGAATATGAAGAAATTCT +TTAATCCAGGTCCAGACTCTATTATTGAAGCTGGCGATATGCTCGTATTATCGGGTGAAA +GACATGAAGTAAAAAGAATTATTAATGAATTGCTTTCAGCAGGAGGTGATTCCTAATCGA +TGGATACTTTAATTTTTGAAGTTGGAACTGCGTTAGTATTAGTAGCTTTTGCAGCTATCC +TCGCTGCGAAGTTAAAGTTCTCGATTATTCCGTTTCTCATTATACTCGGTATGTTAGTGG +GGCCTCATGCCCCAGATTTAGGACTTATCGATTTAAGGTTTATTGAAAGCGGAGAAGTTA +TTTCCTTCCTCGGCCGTGTTGGCGTCATATTCCTCTTATTCTACTTAGGTTTAGAATTCT +CAATAAAAAAATTAATTAAATCAGGAAAGTCGATTGCTTTTGGGGGTAGTGTTCATATAT +CGCTTAATTTCATATTAGGTTTACTTTATGGATATGTTATGGGTTTCCCCTTATTAGAAA +CATTAATTATTGCTGGAATCATTACAATCTCATCGAGTGCAATTGTTGCAAAAGTAATTG +TTGATTTAAGAAGATCTGGTAATAAAGAGACAGAACTCATTTTAGGGATCATTATGTTTG +ATGATATCTTTTTAGCTGTATATTTATCAGTCGTTTCAGGATTAGTACTCGGAGGTGCAA +CATCATTTGTAGGTGCTCTTACATCCGTACTAATCGCAGTAGGATATATGTTACTATTCT +TTGTAATCGCTAGAAAAGCTACGCCGTTTCTAAATAAAGTATTAGATATTTCGTCAAACG +AAATTTTTATTATCGTAATATTCGCTATTTTATTCTTTGTAGCAGGATTTTCAGAAACAA +TTCATGTTGCGGAAGCGATTGGGGCTTTATTGTTAGGACTCGTCTTTTCAGAAACAGAGC +ATAGTGATCGAATCGAGCATCTCGTCGTCCCGTTTCGTGATTTCTTTGGAGCTATATTTT +TCTTCAGCTTCGGTTTAAGTATAGATCCGTTTTCGCTTGGAGGAGCAGTATGGTTGGCAT +TAGGAGCAGTTTTCATTACTCTCATCGGTAATTTTACAGCTGGAATGATTGCAGGGCGTA +AAGCCGGGTTATCGCATAAGGCTTCTGCGAATATCGGTTTAACACTTATATCACGTGGGG +AATTCTCCATTATTGTCGCGAATCTCGGAATAGCGGGTGGCTTAATGGCAACGATTAAAC +CATTCTCAGCTTTATATGTTTTAATATTGGCGTCGTTAGGGCCATTATTAACGAAGGAAT +CTGGGAGAATATACTCTCTACTAGACAAAATATTTAAATGGAGTGCTAAAGAAAGTGCAA +AACGTAAAAAGGAAGTTGGATAACCTTCCGCTAGGGAACGATTCCATTATTTATAGATTA +ATATTTTACAAAGACGATTAGAATACATTCTAATCGTCTTTGTGCTATTATCTGAATTTC +ATCAACTATTAATCAGTTGCCTTCCTATTTAAAAACGACTACAATTTTACTGTTATATAT +ATGAACGAAAAAAAGAGAATAGGAGGGGAGACTATGCCAAGGAAAGTATGGCTATTAGTA +GCTGGGATGATTATTAATGTCACGGGTGCTTCTTTTTTATGGCCTTTTAATACAATTTAC +TTACATGATAGTTTAGGGAAATCTTTATCAGTAGCCGGAATGGTATTAATGATCAACTCG +CTTACTGGTGTAATCGGAAACTTGCTCGGCGGTGTTTTATTTGATAAATGGGGCGGTTAT +AAATCAATTTTAGTAGGGATTGTCATTACACTTGTATCGATTTTAGGTCTTGTATTTTTC +CATGGTTGGCCATTATATGTTGTGTGGCTAGCATTAATCGGATTCGGTTCTGGAATGGTA +TTTCCATCGATGTATGCGATGGTCGGTACGGTTTGGCCAGAAGGCGGGAGACGAGCGTTC +AATGCAATGTATGTTGGACAAAACGTTGGGATTGCGATTGGAACAGCGTGCGGTGGATTA +GTTGCGTCTTATCGTTTTGATTATATTTTCTTAGCGAACTTTATTTTATATTTTATTTTC +TTCTTAATTGCGTTTATTGGATTTCGTGGTATGGAAGCAAAAAAAGAGCAAGAGGTACAA +AAAGAAGCCGAAACGAAAAAAGGTTGGTCACTTACACCTGGCTTCAAAGCGCTTCTAATC +GTATGTGTAGCATATGCTTTATGCTGGGTTACATACGTACAGTGGCAAGGGGCAATTGCG +ACGTATATGCAAGAGTTAAATATTAGCCTTCGTCACTACAGTTTATTATGGACGATAAAC +GGGGCGATGATTGTTTGTGCACAGCCGCTTGTTAGTATGCTAATTCGCTGGATGAAGCGT +TCTTTAAAACAACAAATTATGATTGGAATTATCATTTTTGCGGCGTCGTTTATTGTTTTA +AGCCAAGCGCAGCAATTTACGATGTTTCTCGTTGCGATGGTGACATTAACAATTGGTGAA +TTATTCGTATGGCCGGCAGTTCCGACCATCGCAAATATACTTGCACCGAAAGATAAGCTA +GGATTTTATCAAGGTGTTGTAAATAGCGCAGCGACTGTAGGGAAAATGTTCGGACCGGTC +GTTGGCGGAGCGATTGTTGACTTATACAATATGGAAGTATTGTTTATTGCGATCATGGTA +ATGCTTGTAGTAGCGCTTATAGCAACGAGTATTTATGATAGACGAGTAAAAGTAGAAGAA +ACAGTTGAAGAAAAAATTGCAGTTTAGTTTGACGGAACATGCAACATGTTTTAGAATATA +TTTAAATAAATAGTAATAAAATATCCTTTGGAACAAGGGAGTTAGCTCGGCTGTCCTAAG +AAAAATTCGTTAGTTAGCAAGAGTAACGATTAACCGAGAAGCCCTCGCTTCAAGTAAGCT +CTTAAGAGATACTAAGTGGTGGGTAGTTCACATTAACTCATATTTGTAATAACAGTGAGA +AGGAGTAGTAGTAATAGAAGCTGGTTTAGAGAGTTGACGGTCGGTGCAAGTCAATCCACG +TTTCGTTATGAACTCGCCTTTGAGTTGCAGTTGTGAAATCATTAGTAGCAATTGCCGTTA +ATCCACGTTACGGATCTAAGCGAATGTATTTATTACATTAATTTAGGGTGGTACCGCGGG +AATCTATAACCTCTCGTCCCTTTCTAGGGATGAGAGGTTTTTTGTATTTTGGGCGGTGAA +AATAAATAGAATTTCAAGGAGGGTATCTCATGAGCTTTAATCATCAAGAAATTGAGAAGA +AGTGGCAAGGGCATTGGGAAGAGAATAAAACATTCCGTACGCCAGATGAAACAGAAAAAC +CAAAATTTTATGCACTAGATATGTTCCCATATCCATCAGGTGCAGGCTTACACGTAGGTC +ATCCAGAAGGTTATACAGCGACAGATATTTTATCTCGTATGAAGCGTATGCAAGGATATA +ACGTTCTTCATCCAATGGGATGGGATGCATTCGGTCTTCCAGCAGAGCAATATGCACTTG +ATACTGGGAACAGCCCGGCTGAATTTACAGAGCTTAATATTAATACGTTCCGTAATCAAA +TTAAAGCATTAGGCTTCTCTTACGATTGGGATCGTGAAGTAAATACAACAGATCCAACCT +ACTACAAGTGGACACAATGGATCTTCCTAAAACTATTTGAAAAAGGTTTAGCTTACGTTG +ATGAAGTACCTGTAAACTGGTGCCCGGCACTTGGTACAGTACTTGCAAACGAAGAAATCA +TTGACGGTAAGAGTGAGCGCGGTGGACATCCAGTTGAGCGTCGTCCGATGAGACAGTGGA +TGTTAAAAATTACAGCTTACGGAGATCGTCTATTAGAAGATCTAGATGAGCTTAATTGGC +CAGAAAGCTTAAAAGACATGCAACGTAACTGGATCGGTCGTTCTGAAGGTGCAGAAGTAC +ACTTCAACATCGACGGTACAGATGAGAAGTTCACAGTTTTCACAACGCGTCCTGATACAC +TATTTGGATCAAGCTACTGTGTACTGGCTCCAGAGCATGCACTTGTTGCTAACATTACAA +CACCAGAACAAAAAGAAGCTGTAGAAGCTTACATTAATTCTGTAAAAATGAAGAGTGACC +TAGAGCGTACAGAACTTGCGAAAGAAAAAACTGGTGTATTTACTGGTGCTTACGCAGTTA +ACCCAGTAAACGGTGAGAAATTACCAATCTGGATCGCTGATTATGTTCTTGCAACTTACG +GAACAGGTGCTGTAATGGCAGTTCCAGCTCACGATGAGCGTGACTATGAATTCGCATCAA +CGTTCAATCTTCCAATGAAGGAAGTTGTAAAAGGCGGAGACATTTCGAAAGAAGCATATA +CAGGTGATGGTGCGCACGTAAACTCAGCATTCCTTGATGGTTTAAATAAAGAAGAAGCAA +TTGTAAAAATGATTGAGTGGCTTGAAGTAACAAGCGCAGGAAATCAAAAAGTAACGTACC +GTCTACGTGACTGGTTATTTAGTCGCCAACGTTACTGGGGTGAGCCAATTCCAGTAATCC +ATTGGGAAGATGGTACAATGACAGCTGTGAAAGAAGAAGAATTACCATTAGTTCTTCCGA +AAACAGAGAATATTCGTCCTTCAGGTACAGGTGAATCACCACTTGCAAACATTGAAGAGT +GGGTAAATGTTGTTGATCCTGAAACTGGTAAAAAAGGTCGTCGTGAAACAAATACAATGC +CACAATGGGCTGGTAGCTGCTGGTACTACCTACGCTACATCGATCCAAACAACAGCGAAG +CACTTGTAGACCCTGAAAAAGTAAAACAATGGCTTCCAGTTGATATTTATATCGGTGGAG +CAGAGCACGCTGTACTTCACTTACTATATGCTCGTTTCTGGCATAAAGTATTATATGATA +TCGGTGTAGTTCCAACGAAAGAACCGTTCCAACAATTATTCAACCAAGGTATGATCTTAG +GTGAAAACAACGAGAAAATGAGTAAATCAAAAGGTAACGTTGTAAATCCTGATGATATCG +TAGCAAGCCACGGTGCAGATACACTTCGTCTATACGAAATGTTCATGGGACCATTAGATG +CTTCAATCGCTTGGTCTGAAAATGGTCTTGACGGAGCTCGTCGTTTCCTAGATCGCGTAT +GGCGCCTATTCATTCAAGATAACGGTGAATTAAGTGAGAAAATTACTGATGCACCAAACA +AAGATCTTGAAAAAGCTTACCACCAAACAGTGAAGAAAGTAACAGAAGACTATGCAGAGC +TTCGCTTCAACACAGCGATTTCTCAAATGATGGTATTCATCAACGATGCATACAAAGCTG +AAACACTTCCGAAAGAATATGTAGAAGGTTTCGTAAAAATGATTGCACCAGTTGCACCTC +ACATCGGGGAAGAACTATGGAGCAAGCTTGGATACAATGAAACAATCACATATGCAAGCT +GGCCAACATTTGATGAGTCTAAACTTGTAGAAGATGAAGTTGAAATCGTTGTTCAAATTA +TGGGCAAAGTTCGCGCAAAATTAACAATGAGTAAAGACGCATCAAAAGAAGAAATGGAAC +AACTTGCACTTGAGGAAATCAAAGAACAAATTGAAGGGAAAACAGTTCGTAAAGTAATTG +TTGTTCCTGGAAAACTTGTTAACGTTGTTGCAAACTAATTAATTATTAATAAAAGCTCAG +AGCGTTTATGCTCTGAGCTTTTTTGCGTGAAAAATAAGCTCAGGATCACCTTCATCTAAG +TTTTCGATCATTCCACTACGCATAAATCCATTTGCTTTAAAAACTTTTTGCATATTTGTA +TTCGATTTGTTCGTTGAAGAAAATATTTTTTGAGTAGGAGAATGTCTTAACATATATGAG +ATTAATGAGCTTGCATGACCTTGTCTTCTTTTCGTTGGCGAAACAATGATTAATGAGAGG +AAAGTATGACCAAAGAAATTTGTATCGTATGTTAGAAAACCAAAAATGGAATTGTCCTCT +TTTGCAATTATGCAGTTTCCTTCATCAATAGCGTGTTTAATATAATCTCGTCTACTATCG +TCTCCGATTACATCAACATCTATATGTAAGATTGAATCTAAATCGTCGATGGAAGCTTTT +GCTATGTTTTCCAATGGAAATGCCCCCTTTCAAGTGAAAGAAAAATAATCAGTCAATTAA +TCCTATCATGAATGGAGAACGAAGAAAAAGGTGCGGAATTCCTATATAGACAAACACCTA +TTCTTCTATAGGAAATATGATAAAAAAGGTAGGTTAAATTGAGCAGAAGAGGTGAAAGGA +ATGAAAGGGATGGGCAATAATGCACCACATGGCTTCTTCGGAGGTGGATCGGATAGTTAT +GGACAAATGATGTTTATGGGAGGAGATGGTCAACACGGATATGGCGGAATCCCAAGTTGG +ATGGGAGGAGCAGCTGGAGGATTTCCGACGTCAGTGGCGGGTGTACAAACAGGATTCCCA +ACATCAGTAGCTGGAGTGCAAACAGGATTTCCGGCAGTAGTAGGAGGTACACAAGGAGGA +GTGCCTGTAGGTATGCCAACTACATTTCCATCTTTCGTTGGCGGTGTACAAACAGGATTT +CCTGTACCTGGCGTTGGTGTTGTAGCTGGTGGAATCGGCGGATTCCCTCAAGGTGTTCAC +GGTCATCATGAGCATCACGGCCATCATCATCATCATCAACACGGTCATCATCAGCACCAC +GGTCATCATCAGCATCATGGCCATCATCAAATTCATCCGCAGGCTGTCCTTTATCAAACA +CACCAAGGCCACCATCAGCACCAAGGCCACCATCAGCACCACGGTCACCATCAGCACCAA +GGTCATCAAGATCATCACGGCCACCATCAGCAACAAGTACATCACCAAGGCCACCACCAA +ATTCATCCGCAAGCTGTTCTTTTCCAAACACACCAAGGCCAACACCATCACCAAGGCCAA +CATCATCACCAAGGCCAACACCATCACCAAGGCCAACATCATCACCAGGGCCAACACCAT +CATCAAGGTCAACACCATCATCAAGGTCAACACCATCATCAAGGTCAACAGCATCATCAA +GGTCAACAGCATCATCAAGATCAACACCATCATCAAGGTCAACAGTATCATCAAGGTCAA +CAACAATACCAACAGTATCAGCAGCAACAACAGCCTTGGGCAGGTGGAATTGGAGCAGGA +ACGGCAGCAGGAGCAGCAGCTGGTACTGCAGGAGCAGCAGGACACGCGGGACACGTGGGT +CATTAATAGACCAAAGGAAAAAAGACATAATAAAAAACTGCTAGGAGATCCTAGCAGTTT +TTTTCTGCCTCTTGACGTTGGCGTTCGATTTCCGCACGAAGTTGTGGTTCTGGTGCTTGC +CATCTTTCTGGTTTCATAATTTTGCCGTCGCCCTCGCGGAATCGAGGCTTTCCATCAGGG +AATAGTTTTGCCATGTTAGCATTGTTTACAATTTCGAATCCTTTATCTGGACGTACGCCC +ATTTCTGCAAACGTTCCGAATGCAAAGTAAATAAGGTCAATAAGTGCATCATATTGATCT +TCAACAGTTGTTGCTTCTAGAAATTCTTCTAGCTCTTCTTGCATAAAGCTAGCACGAATT +TTTGCACGCTTTTCTGTTAGTTTTGTTGGAGTACTTGTCACAGGATGTCCGAATTCTTCA +TGCATTTTTGCAACAAGTTCGTATCCTTTATCTAAACCTTTTTCGTTTGTCATGTTGTTT +CATCCTTTCTTACGTTATGCCGAATTTTATCCCGCATTAACGGGCAGTAAGACTCCCACC +TCAAAAGTCGCAAATGCGAGGAAGTGAGGTGGGAGATAACTGCCCGTAAAAGCCCGATTG +GTTCAACTAATAATCAGTGGGGAATGGACAAAATCCCCACTGATTAAAGTTTCACTTTAT +TGTAACATAAGGTAGTTATTAAATAAAAAGAACATCTTATGAGAAATAAGATGTTCGCTC +GTTATTCTTCTTTCTTAATTAAAACGGATAAAATAAGATCGATGCCAAACATTCCGATGA +GTAGCATTGGTATAACTACAAGTAAATAACGTGTGAAAGAAACATTTTTTAAGTATGTAT +GAGCAAATAAAAAGACGCCGAGAAATAAAATACCATTTAAAATAGGTTTTACTAATTTTT +TAGACATAAGCTCCCCCGTAAATATCCCTTTTCTCTATTTAAAAATAAAAACTCTCCTCT +CCGTTGTACCATGCCAGAGAGGGGAGTTCAATCATTAACTTATTCAATTACACCAGAATT +TGTAATATTGACAGTGTATGTCACCTTAATAGGAACATCTTTATACATTCTTCGCCACTC +TTCTAATTTGAATGGACGGTAATGTTCTTTATACCTTGCGCCGAGACCGAGTGGATCCAC +ATTCAAAGATTTAAACTGTTTAATTAGTTTATTAGCTTTTGCACCTAATTGTTTTTCAAC +TGATTTTTCTATTTTTTTTGTGTTCTTTTTATTTTCTAAGTTGAACTGTTTAGATAGTTC +TTGAATACGTGCTTCTAGTTTGACGTGTATGAAAAATGATGGTTTACCATCTTTAATACT +TACTTGGTAAGTTGGGACGGATCGAATATTATTAATGATTGTGTATCCTGAGTTTGCTTT +AAATTCATGTGAATCTAATCGGTGTTTTTCCAATAAACCTTTAAAGGCAAACATATCATG +ATAACTAATTTTGCCAATATATTTATCTCCTTTTAAAAGGCCGATGCCAGTAATTTTTAT +TTTATCTTTATCTTTTTTGAGAATTGGTAAATAGGAATCTTGGCCTTCACGATAATATCG +ATAGGCTCCTAAATGTAGATTGTCAGTAGGTAAAGGACCCGTTTTCATATTATGGTCTAA +CATTTTCTTTATATAAATTGCAACGTTAGATGATGTTGTATATTTACCCTTTAGTAAGTC +TAGCCCTGTTCCCTCTAATAAACCGACATACATAGAGTTCCCGATATTCACATCACGAAT +TAACGTGTCAAAGGAAGTTGACAACCCTTTTTTAGCAATTTTTGTAGTAAATAAAGCAAT +ACGCATTTGTCCACTTGCAAACGGTTGAGCGGATTCTAAAGAAGTATCCGCTTTTACTTG +TTTTACTGCGTTTCCTACGCCTTCAAAGACTTGAACTTTATTACCTTTTTTCTGTATAGG +ACATACGAATGTGACTTTTACTTTATTATCTTT +>NODE_14_length_39993_cov_63.1132_ID_27 +ATTTCGGTACGGCCTAAACATGGCAAAATAAGCGCATCTTTACCACACACCAAGTGGCTA +CGATTTAACTTGGTTGCCACATGTACGGTTAAATTACATTTTCTAAGCGCTTCTTGTGTA +TAAGCGGTGTCAGGTGTTGCAACGGCAAAGTTTCCGCCCAAACCAATAAAGACTTTGATA +TCACCTTCGGCCATGGCTTTAATGGTTTCAACTACACCATATCCATTTTTTCTTGGTGAT +TTAATACCGAACACACGGTCAATATTGTCGAGAAGTTTCGGGCTAGGGAGTTCATTAATG +CCCATAGTGCGGTCGCCTTGCACGTTACTGTGCCCGCGAACCGGACAAAGCCCTGCACCC +GGTCTACCAATTTGACCACGCGCCAACAATAAATTTGCCAGCATATGTACGTTAGCAGTT +CCGTGACGATGCTGGGTAATCCCCATACCCCAACAGAAAATTGAACGCTCTGACTCTAAA +AAGAGTTTAGCCAGTTTTTCTAAATGTTCAGGTGACAGACCTGAGTGCTTATAAAGATCT +GCCCACTCCGTTCTTTCGACCTCAGCAATCATTTCGTCGAAACCAACGGTATTGACTGCA +ATAAAGTTTCGGTCAAAAACGCTTGGTTTACCTGAGGCAAGGGCCTTTTTATCCCACTCA +TGTAAGTGCTTTAAAATACCTAACATCAGTGCATAGTCGCCGCCAATTTTAGGCTGGAAA +TAATAGCGACTAATTGGCGTACTGCCATTGGTCATCATTTCAAGCGGTGCTTGCGGGTCT +TGGAAACGTTCGAGACCTCGCTCTTTAATTGGGTTAATCGCAATAATGTTACCGCCGCGT +TTAGACACTTCACGCAAAGTACCTAACATACGTGGATGGTTGGTGCCTGGGTTGTGCCCA +AAACTAAAAATGGCGTCGGCTAAATCAAAATCTTCTAGGGTGACTGTACCCTTTCCTAAA +CCAATCGAGTCGAGCAAACCAACACTTGTGGTTTCATGGCACATGTTTGAGCAGTCGGGA +AAGTTATTTGTACCAAAGCTTCGCACAAACAATTGATATAAAAATGCTGCTTCGTTACTG +GCACGGCCCGAGGTATAAAATGCAGCTTGGTCTGGATGATCTAGCGCATGTAAATGTTGA +GCGATGAGTTTAAAGGCATCGTCCCATGAAATAGGCACATATTTATCGGTTGTAGCATCA +TAACGAACTGGGTCGGTTAAGCGGCCTAGGTCTTCTAGGAAAAAGTCACTTTGTTCTGAT +AACCAACTGACGGTATGCCCTGCAAAATATTCGGGAGTGACGGTTTTACTGGTTGCTTCA +AAAGCAACCGCCTTTGCACCATTTTCACAAAAGTTAAATGCGTGAGCGTCTTTCTTTTCT +GGCCATGCACAGCCGGGACAGTCAAAACCTGTAGGCTGGTTAATGTTGAGTAAGGTAATT +GAACCTTTACCTAAAATATCTTGTCGCTTTAAGTTACGAGCAACACTTAGCAGTGCACCC +CACCCGCCAGCAGGATGAGTATATGGCTCAATTCGCGCAAAACCGTTATTTTCCTGTATG +TGCTTTTGTTCTTCTGAATTATCCACTTTTTCATCCTCTTATCTTTTTTACACTCAACAG +GCTCAGTGAAATCACAATTTAATTATGGTTTTATCAAAGCTACCCGCTGTTTACCGATGA +GTAGAGTCCACTTTTTATATTATTGCCACTATGCTAAAAGCCGGCTCAACCGTTCTTATA +ATATGTGCCATATTCTTTACAATTACAATAATTCAAGCCAACCACGTTATTTATATAAAA +ACATATAAATCAAATAGTTAAATAGATATTAATTCTATCTTTTACAGATAAAAACATTAT +TGGGTTTCATTTTTAACCGACTGAAAGCTTTTTAAAATTGACACTTACACGACTTTATCT +ATGTTTTTTTCGCATTAAGCAGTATAAAACAATTAAAAATTTATCTGCATTAGAGCATTT +ATGAAGGTTGTGATCGCCCCTGATTCATTTAAAGATAGCTTATCTGCTTTAAAGGTAGCT +CAAGCCATTGCCAAAGGATGGCAAGCGGTTTTTCCCGATGCAGAAACGATTTTGTGCCCC +ATGGCAGATGGCGGTGAAGGAACCATTGAAGCAGTTTTAGAAGTCTGCGATGGACAATGG +CGTGTAAAAACAGTGGTTGGACCATTAGGTCAGCCTGTTCAGGCCAAATGGGGTTGGTTA +GAAACACAAAAAATTGCCATGATTGAAATGGCTCAAGCGAGCGGCATACAGTTAGTTCCA +CCGTCTGAGCGTGATGCCTGCCACAGCACAACGTTTGGCACAGGTCAGCTCATTTTGGAG +GCTTTAGACGCAGGCGCAAAAGACATTATTTTAACTGTGGGCGGTAGTGCGACCAATGAT +GGCGGCACAGGCTTACTTAGTGCTTTAGGTGCAGTTTTACTCGATGCTAACCAAAAGGTT +TTACCCGCTGGCGGCCTTGCCCTTAGCCACCTATCCAAAATTGATTTAACACATTTCGAC +TCACGTATTCAGCACACACGGTTTTTACTAGCTGCCGATGTGACCAATCCACTTTGTGGC +CCAAACGGCGCATCACATATTTTTGGCCCACAAAAAGGGGCCTCTCCTGCTCAAGTCCAG +TTACTCGATGCAGCCCTTGCCCATTTTGCTGATGTCACGGCACAATTTTTAGGTTTCGAC +AAACGAGATGAAGCGGGTTCTGGGGCTGCTGGTGGTTTGGGCTTTGCCGCCAAAAGTTAT +TTAAATGCAGACTTTAAAGCTGGAGTTAAAGTTGTGGCTGAACTGAATCAGCTTGAGCAT +AAAATTTCTAACGCAGATTGGGTAATTACGGGTGAAGGTAAATTTGACCAACAAACCTTA +AGCGGTAAAACCGTTTTTGGGGTAAGCCAAATTGCCAAAGCCCACAATGTTCCGGTCATT +GTGATTGCAGGGACTTTAGGTGAAGACTACCAAGCACTTTATGAGCACGGCGTAAGCGCT +GCTTTCTCACTAGCCAATGGCCCCATTACCCTAGAACACGCCTGCGAACATGCAGCAGAA +CTCATTTATGAGCGAACAGTCGATATAGCGAGATTAATTCAGTTTAGCCAAACCAGCTTA +AATGCTAAAAAAGTCTAATCGGCTAATTTTAAAAGACTCTCTCTACAACTTTCCAGGATT +TCATCGCTAAGGTTTGATTGGTCTGGGCATGCACGAGACAATGCAATTGCCCCTACTGAC +TGAGCAAGAATATTAATAGCTTGCTTTCTCAGCTCTTCTGTTTCTAAAGAAACTTGTTTT +GCGCGTTCATTCATAATGAACTCAATAAGCTGCTCAATACCCTCTTCAAACTTGACTTTT +ACTTCGTCGGGCTGTCTAGCGGCATCACAAGATAGCGCTGTTAAGGTACAACCCTGATCT +CGGTTGTCTCGATGTGTACGAGACACATACATATGAATAAATTGGGTTAAGTTTAAACCT +TCTATTCTTTTTAAAACCTGCTCAAGACCATGTTTGGCAGTAATAGTAACAAGGTCTGTT +TTCGAGCTGAAATGTTTATAAAACCCACCATGAGTAAAGCCTGCGCTAGACATTAATTCG +GCAATGCCCACCCCGTCGTAGCCTTTACTACGAAACAGTTGAGTCGCCTTTTCAACAATT +TTATCTCGGTTTTCTTTTACCTGAGTTTTACTGACTTTCATGTTGGTGCGCTCACTTTTC +TTCGCCCTGCTCGTCATTATTATACATTGATTATGATTGACATCAAAACCTGTATATTTT +AGATTATAATCATCATCTAAAAAATTAAAGCGTGGAGACATCACATGTCATCTCAATCAA +AAGTACTTATTACAGGGGCTTCGTCAGGAATTGGTTCTGTTTATGCAGATCGATTTGCTC +AACGTGGCTATCACTTAATCTTGGTGGCACGAGACACCCACCGTTTAGATAAAATTTCAA +AAGATCTTCAAGAAAAATACGGTGTACAGGTCGAGTTCATTCAAGCCGATTTATCAAATG +ATCAAAACATTTGCAAAATTGAAAATGTACTCAAAAACGATGCTGATATTGAAATTTTGG +TAAATAACGCTGGGATTGCCCTGAACGGAAACTTTTTAACTCAAGATCGTAATGAGATTG +AACAACTTCTAACTTTAAATATGACTGCGGTAGTGCGGTTGTCTCATGCAATGTCGCAAT +CCCTTATACGCAAAGGTAAAGGCGCCATTATTAATTTAGGCTCTGTACTTGGCTTAGCGC +CTGAATTTGGTTCTACCATTTATGGCGCAAGTAAATCTTTCATCCAATTTTTTAGCCAAG +GCTTACATTTAGAGCTAAAAGATCATGGGGTACACGTTCAAGCTGTATTACCTTCTGCCA +CTAAAACCGAAATTTGGGAACGTTCAGGTATAGATTTAAGCCAAGTTCCGCCATTAATGG +ATGTCAATGATTTGGTCGATGCAGCCCTGATTGGTTTTGATCGTAAAGAAACAATTACAA +TTCCTGTGCTGAAAGATGAAAATCAATGGAATAATTTTGAAAAATCGCGTATGACATTAT +TACCAAACTTCTCTTCGGCTGAAGTAGCCCAACGTTATAAAAACTAACGACTAGGGCTAC +CCAACTCTCAAGTAGGAAAATCTTATGAATGTTATTGATGCTCTCCGCAAACGCCGTGCA +GTTAAACGCTTCGACCCTGCTTTTCAACTCTCTGAAGATGACAAGAAGCAGTTATTACAA +GAAGTTTTAGCAAATGCACCAAGCGCTTTTAACTTGCAGCATTGGCGTCCAGTCATTGTT +GAAGATGCAGAGTTAAGACAAAAAATTCGCGCGATTGCTTGGGACCAACCACAAGTAACA +GAGTCATCATTACTGATTGTGTTATGTGCCAAAGTAAATACATGGGAAGTTGACGCTAAA +CGTGTATGGGATGGAGCTTCTCCAGAAGTTCAAGATATTATGGTTGGTGCAATCGACCAG +TATTACCGTGACCGCCCACAAACCCAACGTGACGAAGTGATGCGTAGTGCGGGTATCTTT +GCTCAAACCCTTATGTTACTTGCTCAAGAGCATGGTTTAGATAGCTGTCCAATGGACGGT +TTTGACTTTGATGCAATGGCAAAGCTCATTAACTTACCTGAAGATCACGTGGTGTGTTTA +ATGATTGCGGTTGGTAAATCAGCTTCTGAACCTTACCCGCGCGTAGGCAAACTCCCTTAC +GATGATGTCATTATTAAAAATACATTCTAAGTTGTAATTCAAATTTTAGGCTTTCATTCT +ATTTTTGTTTTTATACTAAATAGATGGAAGCCTTTAATTTTGCATTAATTATAAAGCTGA +CTTTGACTCGGCTATAAACTGTCTCAAAAAGCAATTTAATAATCTCTCCCTTTCATCCGC +GTACTCTGCACTCAAAAGCTGAATGATTGTGCCTTCCACCATATATAAAAATAGTTTGGC +ATCGGTAAAAGAGACATCGGGTTTAAGCAATCTAAGCTGGCTATAAATCTCATTTTTTAG +CCATGTTCTATAGCGCACTGCGGTTTGATAAGCGTTGGGATAGCTATTTTTAATTTCAAA +AACGGCTTTAAATAATAAGTAATACGGCCCTTCTACATCGGTATGTAAGTCATAAAGCTT +TTTGAGTTTATCGATGGCAGGAGTGCTTAAGTCATATTCGACCATTGCGACTACTTGTTC +TTGGAGCTTTTCTTTTTGCACCATCAGGCAGATTTCAATAAGCCTTTCTTTAGAGTGAAA +GTAATTATAAAAAGTCGCTTTTGTAATCTCTGATTCTTTCACTATTCTGTCGACCCCAAT +AAGGTGAAACCCACGGTGGTGAAACAGATCAATTGCTTTATTTACAACGTATAAGGCACG +TGTTGGGAGAACTAAATTTGGCATAGTTTTACCGTTATAAATTCTTGTTGTTTGTAAATG +AGATACTTTTTTGGGAGAAAAAAAGGCACAGCAAAGCCTATAAAGACTGTGCTTGGCACA +TCTCAAGTTTTATTGTTGCTAAGTTTTTGGTTGTGACGATTTAAAGCCTGAAAGCGCTAA +TGGCCCTCAAACTGCTTTAAAGCTTATGTGGTGTCGTTATAGCTTTTGGCATTGAGAGCC +AAGAAATAATGTATGTGCAAAGCCAACTCCTTTTTATTGGGAGTTCTGCCAGACATGAAT +AGGAATTATGGTGGCAGAACGGAAGAAGGTTGACAGACTAGTATTATCTCAAACTAGCAC +ACCCGTAGGTGTCCCTCTCCCGTCCTACCGCTACGGGAAAGGGACGAGTAGTACACCACA +CAAGACTGTTCCTCAGAAGGAAAAGCTCTGATGCGGGAGATAATATTCAGCCTGTCAAAG +CCGGCTGGCAATGTGGCCAGCAGGCAAAGAATAGTGCTCTGCTCTTTAGCAGTCAAGCAC +ACAAAATGACATTTGTTTTAAATTAGATCATTAAAAAGTAAGGATAAATAATGAGATAGT +TATTGTTAAATGGGGGTTTATAAAGGGGAAATAAAAACAGCCTAGAAGACAACGATCAAA +AAAATGAGGCTTATTATAAGCCCCGTTCCGTTAATTATCTTGTATCTTCACTAGGGTTCG +ATGTTGATGAAGTACTATTCTTTGATTCTATTTCAGTTGCTTTTGGAGTGATTGCTATTA +CAGCTTTACTAGCAGCATCTTTAATTTGTTCGGCCACTTGACCCGCAAAACCTGGTACTG +TTTGGGTCGCCTGTTTTACTATATCTGATGAAAACAATTCATGCCACGGACTTCCATGAT +TTTCATTCTCTACAAATCTCAGTGGCTGCTCATCTAGGCGTGATAAAGCCGATGCCAAAA +GTCTTGTTTCCATATTCTTATCAACTCTAGCAGCCTCTCTACTATAGCCTTCATATGCAC +GAGATACAGAAGCTTTAAATGCATAATCTTCAGATAATCTAAAACGCTGGCCTATTTGTT +TTGTTGCTAACCAAGAAAACCAAATTGGAGCGCCTAAAGATAAAAGAGAAAGAAGTAAAT +TAATAAAAATAATGATCGTTGAGCTATGAGTATCTTTTAAAAGCTCAGATAGGCTTGATA +AACGTTGAGAACCAAAATAACCTCCAACTCCTAAAGCAAAAATTAGGCCTGCAACCCACC +ATTTTATAGACCTAGACAAGATATTCGAACGTTCAGTAAATGCCGCTGCCAATCCAACAC +TTGTTGCTGCTGAATATGCAGTTTCACAGCGTTCTAACACATTTTTAGCATTCAAAGCAT +AATTATTAATTTCTTCATCAAGTAATTTCGAATTTTGTTTTAATAGCTCAATTTCTTTAT +TTTCTTTCATTGAATCTGAAAACAAATTATTAATTTGTTGTCTCGCTTCCTTTAAAGAGG +CTAAATCAGTCGGTAATTGTTCAGCTGCCTCCGATGCTTGTTCAATTCGTTCAACCATTG +AAGTCAAAGAGTCTGTGCGAGGTTCTACTTCATGTATTTTTGCTTCAATAGAGCGAATAC +GAGCATTTAATTTTTTTAATTTTGTTAAAGCTTCAACTTGGTCAGCAGCAAGTTTATTTG +CATATGGAACTAAAATAGACTCAAGTAAATTTTTAAGGCTATCAATAGTGAATAAATATG +CTGGAATTGCTAAATTGACGTTAGATGACCACATTTGGGGAATGGTATGACTAACAAGAT +ATTCTAAGCGTTTAATATAATTAGATATAAGAGCATCTTGATTCTGATCCAGTTCATCAG +TTTCATAATTTTTAATAAAATCAATAATATGTTTTGATTTAGAAATGAGTTCTGTTTTGT +TTAAGTTAGGACTACCCCAACTATTATGTGTTTGTGCTAAAGGCTCATTATTTTGAATAA +CGTTATTAAGTTGATTATTAACTGCTTCTAATTGCTGTAATATGTTTTCTAATGTAGCGT +GCATGACTAACCCTTTCATTGGCTTTAAATTATTTTATTTAAACAATTATAATACATATG +TAGGCTAGAAAGTGAACAAAAGAAAATAATGTGACATGTCATTCAATAGTTTCTATCAAA +TATAATAGACAATATAGATAAAACGGGATTTTCATATTAAGGTCAAATAACCTTTTAGTG +TTGTCTTTAACTTTATATTTATTAGACAGCAAAATTTATTTTTTATACAGCAACAAACTC +ATTTTTTTAGTACTTATGTGATTTATATATTTTTATAGACAACACTAATATTTAGTACAT +TCTTTATTAATGTCCAAAATTAACTATAGCTTAGCTACAAAAAAGCCCCTGTTCTACAGA +GGCTTTTTTCAATTCAACAACGTATTATTTATGAACCCAAGGCACAGCCGGTGAACCAAT +CCACGCAGATTTAGCCGGAATATTTTCACCTTTCATGACCAGTGTTAATGGCCCTAACAC +GGCATGATTGCTCACGTTTGCGTTATACAAAATAATACTGCGCGCATTGACCACTACATC +ATGCCCCACGTTCACTTGCCCGATTTTCATAATACGGTCTTCAAACAAGTGAGTTTGTGG +GCCTGAAAAACTATTAAACTCGGCACGGTCACCAATGCTGACACAGTCAAACTCGGTAAT +ATCAGCCGTATCCATATAAACGTCTTTACCAATACGAACCCCTAAAATACGCAAGAAGAA +TGGCAACATTGGCGTTCCTCTTAAGTAGTTTAAGAAGTTAGGAATCGCAACTGATTCATA +TAGGCTGGTAATCCCTTCACTTAACCATACGAACATGGTCCACATAGGTGCCGAACACGG +TTGATAGCGACCAATCAAAATCCATTTTAATAGCGCAACAATAAGGAAACAGCCAACACC +ATAAAGCAGCCCTGCGAGCGTTAATGCAACCAGTCCTGTCTCAATATTGTAGTTATTAAT +CACATCAATAACGTCTAGCACGATCATATAACCCACCCCAATCGCAAGTGCGGCCGGCAA +TACAATACGAAGCCCTTCAATAAAACCGCGCATTAAACGGCGCTTAATACTTGGTTTAAA +GGTTAAATGGTCAGGATATTTTTCAGCAGCTTCACGTGCAGGTAACAGCAACGCCGGAGA +ACCAAACCAAGTTTGTCCATCATACATTTCACGGTTATCTGGTGTTTTTGATTGCACGCC +AATTAAAACATTGTCTGGTAACACTGCCCCATCAGCAATATAAGCACTATTTCCCACAAA +ACTACGGTTACCGATTTTGGTTGCTTTTAAAGACATCCAACCGCCTTTAATCTCTTCATC +ACCCAGCATTACCGCATCGGCAATAAAGCTTTCTTCACCCAAAGTCAGCATTTCTGGAAT +GACACCAGTTGCGGTAGAAATTTCGGTATTTTTACCTACTTTTGCTCCCAGCATACGAAA +CCATGTAGGCGCATAAATAGTGGCAAACAAGCCATGTAAGGTTTGTAAACTCGTCTCTAA +AATTTGGGCCGCAAACCATTTACGATAATACGTGCTGCCATGAACTGCATAGGTTCCGGT +TTCAAGACGCGGCAATGCAATTTTACGCAGACCTGAAGAAATCACCGCAGTAATCATCAT +CATCATTGCACTAGCCGGAATCGCTAAAATGAAATAGTAAAGCGCGATTTGCAAATGGTT +GTTCGGGTTAATATTAAATACATTTACATCTAACCAGTCGACCAAAAGGAAGCTTGGAAA +AATTGGAATAAAGAAAAGACAGGCAATAATTAATGCGCTAACGCCATAGTATCCATATTC +AGCAATTTTACGTATAAACGATAGTTTAGGACGCTCTGGCAATTTAGCTTGCTCATCTAT +ATGCCCTATTTTCTGAGCAGGTGTTCCATCCCAAATTTCGCCCTCAGGTACAACCGTGTC +ATATTCAATTGAAGTTAAGGCATTTACATGAGCTTGTTTTTCTAACACTGTATTTTCTTC +TAATACAGCATACGAACCTACATAACTGTCTTGTTTAAGGTGAATTGAACCTAATACCAA +GTGACCATGTTCAACTTTGGCATTTTCTAAATTCACCTGAGAACCGATACTCACGCCATC +TTCAATAGTCAGTAAAGATGGCATACGAATATGTACAGAACTAATGGTAACGTCGTGACC +GATTTTTGCACCAAGTGCTTTCAAATATAAGTTAAGTAAAGTCGAACCCGACAAGAGATA +GACAGGTGAAATATTACTAATACGGTCCGCTAGCCACCAACGGAAATAGGTTAATCCCCA +TAATGGATAACGCCCTGCGCCAATTCCTAACATTAATAGTCGTTTTACCGTAATCGACAG +TACAAAACTGCTCATAATGACACTGACATACACCAATAAAGACAACGCAATCGCATACGG +AATACTGTCGCGAGTTCCACCGGTAAAGTAGTGATAGGTAAAGAATGGCGCTAACCACTG +CAAAATATTAATCGAGATTAAAACCGGAATAGTTACAAGCTGTGCAATACCACATAACCA +TTTATAGGTCTGATTACAAGGGTTATCTTGTCCAATTTGGCTATCAAATAAAGTAGGTTC +AGGCTGCTCTAACATGAGTGCAGCAATCGCGCCTACTCTTCTCGCTTGATATAAGTTTTG +AATGGTCAGATGGCTATATTCCGCATGCTCACGCAGATTTGAAATTAAAACAGCGGCAAG +TAGTGAATGCCCGCCTAAATCATCAAAGAAATCAGAATCGAGTTTAATCGGCATGTTTGG +GAACAAACGATTCAAAATTTCAAATAGGATTTCTTCGGCTGGGTTCTGTGGCTGATCAGA +TTCACTGCGATCTACCACACTTGTGAGCGGTCTTGCTTTTAAAGCTTTACGGTCAATTTT +CCCAGATAATAGACGTGGAACTTCTTCAATAATCTCAAAACGATTTGGCACCATATAAGG +CGGTAATCGCTGACTCAAGTTATGGCGTAGTTCTTTAATTTCTATAGCTTGCTTTGCATC +AATTTCTGGTGCAATAAACGCAATAAGCTGATCAATGCCGTCTTCAGGACGAAGAATAAC +GGCTGCCGTACCAATTCCGTCAATATCGCAAAGCGCAGCTTCGATCTCGCCAAGCTCTAC +ACGGAAACCACGGATTTTAACTTGGTCATCGGCACGACCTAAACAGTGTACTTGACCAAA +CTCATCAATTTTAGCCAAATCCCCAGTACGGTAGAGTAATTCTTCCTCTATACTCATTGC +CCAAGGGTTCTCAATAAACTTATCTGCGGTTAAATCAGGTCGTCCTAAATAGCCTTGTGC +CACACTTGGACCAAAAATACATAACTCCCCTGTTTCACCTTGCTCGAGCAACTCTCTTTC +AGAGTTAATGACCAACATACCGTAGTTTGGTAAGGGCTTGCCAATGGTGACAGGTTTGCC +GCGTTCCAATAACTCAAGACTTGCAGAAACCGTGGTTTCGGTTGGTCCATAGGTGTTAAA +CATTTGATGATGAGGTAATGCCCAGCGGTCAACCAGTGAGTCCGGACACATTTCCCCACC +TAAATTAATAATTCTTAAATTAGGTACGTCTTCAGGAAATAAGGCAAGTAACGTTGGAAC +AGCATGCAGTACTGTAATTTGTTCCTGCTTTAAGGTTTGGCATAACCGCTCAGGGTCACT +GACAAGTGACTTCGGTGCAATCCACAACGTTGCACCTACCAAATAAGAAAGCCAAATCTC +TTCAAACGACATATCAAAAGCGACTGAAAAGCCTTGATAGACCTTGTCTTGCTCTTGAAT +ACCCAAGATACTATTTTCACTACGTAAAAAGTGACAAATATTTTTTTGAGTAATCACAAT +TCCTTTGGGTTTACCTGTCGAACCCGAGGTATAAATAATATAGGCAGGTTGATCTGGAGT +GGTTTTCGCTAAACTTACACTTTCGCTTAATGGTTTTTGCAGCTCGGTGTTGGTCCATTT +TGTTTGCGGCACTTCAGCTAAATGTTCATACCATTCATCTGTAGTAATCATTCCCACCGC +TTCGGCATCTTCAAGACATACCGCAATACGGTCCGCGGGCGTATCCATATCAAAAGGAAG +CCAAGCTGCACCGCTTAAACAAATGGCTAGTTGAGCTTTTAAAAGCTCAATACCACGCGG +CAGCCATAGCCCTACAATATGACCTGGCTTTACCCCTTTTAAAGCCAGATGTTGAGCCAT +AATTAAGGCTTGCTGATACAGCTCACCATAACTCAGCGTTTTATCTGCTTCAATTAAAGC +CGTTTTATCAGGAAGAGTTTGTGCAGTATGAGCAAAAATATCGGCTAAGACTTCATTCTG +CAAAAATTCCGGATGATACTTTCCACGAATTACATTTTTCGTGTTCGTTACAAACTGGTT +CATTTTTATACTCTCTTAATTTTCTGATTTGGCTTAAATGAAGTCATAATTTATTTTTCT +GACCCATTAACGAAATCTAGATAGGCCATAAATTAATTAAGAGAACTTAAAGCAAGATTA +AATTTTTTAGTTTTGATGTTTATTCAACAGAGTAAGAACCTTAGCCTCTAATTTTAAATT +ATAAATAAAAATCAAATATTTAATATTATTATTCAAATTTTATGATTATCTTTAAGATAA +AAACGTACAAAACTAATGAAGTAAAAAGCTAGAATTCACAAGATTTATATAGTTCTTGTT +TTAATTAAACTAATAAAAGCCATCCTGTATACAAATTCTCCATATTTTAAACTTAATTGT +TTAGTTCTGAATTATTAAAAGAGTTCTTAAAAATAAAAGAGTTCATTTAAATTTATGCAT +GATGTTTGATTATTAATATTATATTAAAATTGTATGCATCAATTCTTTTTAACTTATATC +TTTATCTGGATTTTATAAAAAGTTAACTCCATTAAGCGGATTGAGCGAGCAGTTATTTAC +ACGTTCTACTCATAACTCAACCTACCCTATTTGGAGTGGTGAGTTGTGTGGGTTAAAACG +AAGATACTCTTGAAAGGCGAGCTAAGATCACCTGTTTCATTTAAGCATGACGAAGTATCT +TTATTTATAATATTTGAAAATGGACTTAAAAAATTAAACAGTGGTGCTGGTATGAGTCAA +CACATAATAACCCCAAAAGAAATACCTTTAGCATTTCAAACCGCTTGGAATAAGCATGAT +ATGCAGGCCTTTGCTGCTCTTTTTGATAAAGATGCGACTTTCGTTAACCGTTTCGGTCAC +TACGTTAAAGGAGTTGATGAAATTATTGCAATGCATCAACCTATTCATGGAACCATTTAT +CGTGATTCAACACTAGAGAATGAGCTGATAGACCTCATACCTATGAGTGAGGATATATGT +ATTTCACATTTTTGGAGCCGCCTCACTGCAGGCGTAGCTCATCCTCAAGGGCCTCATCAG +ATTGATACTCTCATTCTTACTGTGCTCACCAAGAAGAATCATTCTTGGTACATTCAGGCG +CTAGAAAATGTCACTTTGACGAACCCTCGTACGGGAGAGACCATACTTAGAAATATATAG +AAAGGAGCACGATCATTTTTTTAGTAATTTATATGCATAAAAAAAAAGCCCTCGTCATTA +ACAAGGGCTTTTTAAGAATTTTGGCGGAAGCGGTGAGATTCGAACTCACGGAGGACTCAC +ACCCTCGTCGGTTTTCAAGACCGGTGCATTAAACCGCTCTGCCACGCTTCCAACGAGCGC +TATAATATAAACAAAAATGCAAGTTGGCAAATCTTTTATTTAATTTTTAATTCAAACGCT +CAAAATAAAAACAAATCTATCTAAATTTCATCATTTTTAGGTTTTTCATAGCGAATTGCA +TTGATATACCACAGCTTTTTACCCAAAGGCGTGTGTACTTCTACTTCATCATCGACTTCT +TTACTCAAGAGTGCACGTGCCATAGGTGACTCAATTGAAATATGCTGAGGATGGTGATCA +TAAATTTCATCTACCCCGACAATTCGTAAAGTTTTTTGCTCCCCTTCTTCATTTTCAATG +TCGACCCAAGCACCAAAGTAAACTTTGCCTTCTTGCTCAGGTGAGTAATCAACAATTTTT +AACTCTTCTAAACGCTTTCCTAAATAGCGTACTCGTCGATCAATTTTTCTAAGGAGCTGC +TTGTTATATTGATAATCTGCATTCTCACTACGGTCACCGAGGCTTGCAGCCCAATTTACT +TTTTTCGTAATTTCTGGTCTTTCTTCATGCCAAAGTTGTTTTAATTCAGCAACTAATTTG +TCATGGCCTGACCGAGTGATTAAGTTAGATTTCATGATTAAAAACCACTGATTAACAACA +TAAGGGTTACAAGATGTAGCATTTTATACCCTATTTTCTCACATCAGAAGTGTATAAATT +TTCTTCCTTAATAAATTCATGTTGTTATGATGATATATCGGATATTTAATTGTACAAAAT +TTGACAATACTTTTTCTCAATATTATGATGCGCGCATCTTAAATGTTGAGTATTTTTTAA +CCCACTCAAAATCTATGTAGATTTTCTACACAGTTTGCTTTTTTCATTTCATGTCATTTT +GAATGACGTCATGACTGCCCACCCCTTATTGAATTTATCAACTTTTAGAAGTTATGGCAG +CTTTTTGTCTGACTTTTAGGGCATAAATTACCTGTAGCGGAGACAACATGCACAGTAATT +CAAGTTCTGGGTCGAGGCTTGGCCTTAGCTCTTTATTTTCTTCTCTCCCATTGAAAGTTC +TTGCGCTCAGCACTATGTTTTTTCCCTTTCATAGTATCAATGCAGGAAAAACCCTACAGT +ACAATACTGTAGTTAACACAAACACATTAACAGTAGTTGCCGTAGAAAGCCCAACGACTG +TTTTTAAAGAAGACCAGTTTTTACACGGCTTCGGTTATGACTTAGCGCGTAACTATGCAC +AAAGCTTAAATGTAAAGTTGGACTTCAAAATCGTGACAGACAATGCCACGGCACTTAAGT +GGGTTCAGCAAGGTAAAGCAAATCTTGCCATGACAACCGCAAGCTTAAGTTCAATCGAGA +ACAAAGGTTTAATGTCTTTTTCTGCAAGTTGTGGTGATATCGTAAATTTGCAAAAAAATG +GATTAAATCCGAATCTAAGCTGGGTGTTTAAACAAGCAGATGACCCGCTTACACAAACAG +CAAGCGGTTTTGTTTGTCAAAGCAAACAAAATGGTCTAACTCAGCAACTTGCTTCTTTTT +ATAACCGTAATGTTGTAAAACCAGAAGCTTGGTCTACGATTCAACGTGACTTAAGTGCAC +GTATACCGATTTACAAAGCAAGCTTTAAACAAAGTGCTGCTCAGTACGATTTAGACTGGC +ATTTGCTTGCAGCGATTGGTTATCAAGAATCGTACCTAAAGCCAGAGTCTGTTTCACCAA +CAGGTGTACGTGGGTTAATGATGTTAACCAATAGTACAGCTCGGGCAATGGGTGTGAGTA +ACCGTAATGATCCAGCACAGAGTATTCAGGGCGGTGCGAAATATTATGATCTCATGTTAA +GTGAGTATGATGATATTCCTTTCCCAGATCGCAACTGGTATGCGCTTGTGGCTTACAATA +TGGGTCCAGGTGCAGTGAACCAGATTCAAAAGCGCTTGCAAGCCCAAGGGAAAGACCCGA +ACCAATGGGTTAATCTCTATAATTATTTGCAGAGTAACAAAACTCGCAATGGTCGATACA +AACAAGCAGTTCAGTATGTGACTCGTATACGTGCTTATCTTGAGCACATTAAAACGGCAC +AAACGCGAATTAATATCTAGTTTTTATCATCTAAAAGTTGATAAATTCATTGTTTGTATA +AATAAAAAGCTTACCTCTCGGGTAAGCTTTTTATATGAATTCTTTTTCAGTTTAAGCGGT +TTGCTCTAATACTTTTTTGAACAAATCTTTTTGTTCTTGGCTTGGTTTAGCTGTTTGTAA +AGCCATTAACTGAGACATATCGCCTTGAACTTTTATTTTACCAGTCATGAATGCCTGCAT +AGCAGCTGCCATATCAAACTCTAGGAATACTTTACGTAAAGTTTCAGCATCCATGTTTAA +TGTAGTTTTAGCATTTGAAGACAAACCTTTTTGGATTTTACCCCCATCTAACGCTAATTC +GGTATTTCCAGAAGCATCTGTAACTACTAAATTAATTGCCAAATTCGCAAGAGCTGGTGG +CAAATTAAGATCACCTGCTTCAGCAGTTAATTTTTCGACAGTCGCAAACCAATCATCAGT +TAAAAATGCAGGCATGATTCTTCCTCAAATTTATTTGTTCAATTTCTCTAGTGTTGCCTA +TCCAGACAACATCGTGTGAAGCACTTCTGCTTGAGCGTTGTTATAGCATGTAATTTTAAG +GTTGAGCTATGACTTTTTGTACGGAACATTCATTTTTTAGTATTTTGATACATTAAAGGC +ACTTGTTTATAAAGTGCCTTCAATTGACTTATTAATTCGTTTTATTCAGCAGCAAAGCTT +AAGTTCACTACATCTAAACGTTTTTTCAGTTCTTCCGGATCTTTAAAGTCTAATTCGCGC +TGCGAGTCAAGAACTTCGAAATCGAACAAATCACGGTCAGCAAGTTGTGAAGGTGAAACG +TTCTGTAACGCACCAAAAATGCTGTGTAAACGTTTTGGATACTGTTTGTCCCATTCACGA +AGCATTTCATTGATCATGGCACGTTGCAAGTTCTCTTGTGAACCACAAAGGTTACACGGA +ATAATCGGGAACTTACGTAACTCTGCATATTTGATAATGTCCTTTTCTTCAACATAAGCC +AAAGGACGAATCAAAATATTTTTCTTATCTGACGATAAAAGCTTAGGTGGCATTGCCTTT +AAGCTACCGCCATGGAATAAGTTCAAGAAGAAAGTAGCAATGATGTCATCACGGTGATGC +CCTAGCGCAACTTTGGTTGCACCAATTTCTTGAGCAAAGCCATAAAGCGAACCACGGCGT +AAACGTGAACACACAGCGCAGTACGTTTTACCTTCAGGAGTTAAACGTTTGGTAATGCTG +TAAGTGTCTTTTTCTAAAATGTAATACGGAATGTTATTCTCTTCCATATAACGTGGTAAG +ACATCTTCAGGGAAACCAGGCTGCTTTTGGTCAAGGTTAACCGCAACAATATCAAAATTG +ATCGGTGCAATACGTTTGAACTGCAACAAAATGTCGAGCAAGGTATAACTGTCTTTACCA +CCAGACACGCACACCATGACCTTATCACCATCTTCAATCATATTGAAGTCACGGATGGCA +TGTCCAACTTGACGGCGAAGCTTCTTAAGCAAACGATAATAGGCCGAGCTTGTTGGAAGT +TCAGGCTTAAAATTAAATCCTTCGTTGGACTCAACTGGCGCGTACATAGACGAGATATAC +CCATAAATAAAAATACCGCGCAATTTTATATGATTCGGATGAGAATCGCATCAAAAGAAT +AGTTTCAATTCTTGAAGTCGTCATAATGCTCAGGCATGATGCAAAAGCAGTGCGCTATTT +TGACCGATGAGTTCATTTTTTGCGCAAAATGCTTACTTTTTGGACTTTTCCACAGTTTTC +CACAAAACCTGTGGATAAAATTGTGGATTTCTTAATACTTGACAAACTGTTTTGACCTTC +TGTTAAGGGTTTTATTTAAATTGATCATTTTTTAACCAATGATTTTTACCCTTATTTTTC +AAATAGTTATTCAAGTCAATAGAATCATTTTAAAAATTTTTTTTAATTTTTTTTGATGAT +CATTCGAGCAAAACTGCTTGATTTTTTCAAAGAGTCAAAATAAAAAAAATTACACAAAAC +TTCTTTAAGTAATATTTTTTGATAAACTCTTTGACAAGTTTTGAGACCCTAAAATTGTGA +ATAACATAAAATGAAAAATTCGGTTCAATATCTTTTTTCTTGTTTACTGGGGATGACCAC +TTTTTCGGTAGGTATAACGCCGACTTCAGCAGGTCAGATGTACATTTACCAAGACAAAAA +CGGCAGTACTTTACTGACTAACCGTAAAAGTTATGACCATTCACTCAAAAAATTAAAAGT +CACCTACTACCCGGATAGTAATATTCATAGTTATAGCAACTGGGGGACTTCAGAAGCTTC +GGTTCTACCAAGCTATAGCAAAAACAAAAATGCTTTTGATCATATTATTAAGCAGGCAGC +ACAACAGCATGGTGTTTCGGAAGGATTAATCAAAGCCGTTATGCATACCGAGTCTGGCTT +TAACGTAAATGCCCGCTCTCCGGTTGGTGCCCAAGGTTTAATGCAGCTTATGCCAGCTAC +TGCTCGTCGTTTTAACGTATCTAACGCTTATGATCCTCAGCAAAACATCTTCGCTGGTGC +TAAATATTTAAGTTGGTTGCTCAAACGTTTTAACGGTAATACGCAAATGGCGCTTGCCGC +TTATAATGCAGGAGAAGGAAACGTCGATAAATATGGCGGTATTCCCCCATTCCGTGAAAC +CCAAGATTATGTTCGCCGAGTCACAAGCCGCTACCAAAACTTATATTCTTCTGGTGTAGG +CCTTTCTTCTTTTAGCAATTCGAGTATTTCTGCCCAAGCGATAAACCAGCCAGCCATACC +GCACAGTACTTCGACGCAAGTCTCTGCTCAGCCTATAAAGTATTCTTCCTCGCGCCAAAT +CGTGACATTGCCAGATGGTACATATACAGACGCACCTACGGGAACTTATGTCACCAATAA +TGCAACTGCTATTGCACATATCCGGATTGAGTAAGCTTAAAAGGCCTCATACCTTACGGT +TACTTATTTAGCGGGTTCTATTATCAAAAAATTGTAATGATCTATTACCCCCACTGTCGT +GTTCAGAGGGGCTCTCTAATAACTTGTTTAATTTAAATACATTCTTCGATCGCAAAGAAT +GTATTGATTCCTTATTTTTTTCTTGAATTTTTCCCCTAAGCGCCGCATATTAGCTGAGTG +ATTTATAATTTATAAATCAGATAATATTTTATTTATAAGAGGATTTTCTCAATGATGCGG +ATTGGTTTGTTTTTGCTTACCAACCTCGCGGTACTGGTTGTAGCTGGCATTATTTTGTCA +CTCTTCGGTGTCGGTAGTTACCATGGCGCGGGTGGCTTAAATCTAGGCAACCTTTTAGTC +ATCTGTTTTGTGTTCGGTATGGTGGGCTCTTTAGTCTCTCTATTCATGTCCAAATGGATG +GCTAAGAAAACTACTGGTACAGAACTGATTGACCCAAATGCTCCTCGTAACCAAGCTGAA +AGCTGGTTATTGCAAACAGTCGCTGAACTTTCTCAACGTGCTGGTATTAATATGCCAGAA +GTTGGTATCTTCCCTTCATATCAGTCTAATGCCTTTGCAACAGGCTGGAATAAAAATGAT +GCCTTAGTTGCCGTTTCAAGTGGTCTACTTGAGCGTATGAACAAAGATGAGCTACGTGCT +GTGCTCGCGCACGAGATCGGCCACGTTGCAAATGGTGATATGGTCACATTGGCACTCATC +CAAGGTGTTGTAAACGCCTTCGTTATGTTCTTTGCTCGTGTAGTAGGTGACTTTATTGAC +CGTAATGTCTTTGGTCGTCAAGACAATGAAGCCCCAGGTATGGGTTATTTCATCATTACA +ATGGTTCTAGATATCGTGTTTGGTATTCTTGCCTCTGCCATTGTGATGTGGTTCTCTCGT +TACCGTGAATACCGTGCAGATGAAGCCGGTGCGCGTTTAGCTGGTAAACAAGCGATGATT +TCTGCTTTATTACGTTTGCAAGCTGAAACAGAGTTACCTGACCAAATGCCAAAAGAAATG +AAAGCGTTCGCAATTGCGGAAGGTAAAGAACAAGGCTTTAGTTTAGCTGCATTGTTCCAA +ACTCACCCTACAATTGAACAACGTGTGGCAGCTCTACACCAATTAGATTGTCCATAAAAC +GTTTAATGTAAAAATAAAGCCTCCAAATGGAGGCTTTATTTTTTTGATTACATAGTTTGA +TAAAGTCCCCAAAGGTAACTTGCACCCCACCACACAGCTAAGCCAATTAATAGCAGTACA +AACGCACCTAATAAATCGGGTATGTGCAACCATAACCAACTCACAACAGGGTTACGCCAG +AATGCCGACTGTTGTTGCTTCTTTTCGAGCTTTTCATGTAAATACGGATGAACGACATGA +ATATCACTTTGGATCGCATATTCTTCACGTATATGCGTATGCACCACATCTAATGCTTTT +CGACTGGGACGTATAAAAATATCGAAAATAGTCCCTGCAACCGGGACAAAACCGACTACA +GCATCGACTACAGCAAGTTTGATCACTGGAGTGAGCTTATGCTGAGGCACACCTATTTGT +TTAGCCTTATAAATGGCATAACACGTCAGTGCGAACCCAGCGACATCACCGGCAATTGGA +ATAGTACTTAAGGCAGCATCAGCGCCAATACCCTGCTTTGTAAAAGGAATACGGACCGCA +CTATCCATCATGGTTGCGAACTTGGCAAGATCTCGTTCTAGGGCGATAACCTGCTGCTGT +GTTAATTTTTTTTCTTGAGGCATCAAAATCAAATATAAGAAGAAAAGTTAATCTTAATAT +ACTTGATTTGATGCCCGTTTTTAAATTGTTTACTTTTAGGTTGAAGCTTTCGATTTAAAC +AGTGAACGTAATAATACGTACATGACCGGAATAAAAAACAGAACCAGCACAGTACCAAAC +ATCACGCCACCTAAAATACTAATACCAATCTCTTGACGACTAACCGCACCAGCACCTTGA +GCAAACACAAGCGGAATTACGCCTGCCCCAAAGGCTAAAGAGGTCATTAGAATCGGTCTT +AATCGTAAGCTCGCACCTTCTAAGGCCGCCTGAATGGCATTCTTACCTTTTTCTTGCGCT +AAGGCCGCGAACTCAACAATTAAAATGGCGTTTTTACATGACAAACCAATAGTCGTTAAT +AGCGCAATTTGGAAATACACGTCATTTGGCAAACCAAAAATATAAGAGAAAATCACACTT +CCTCCTACACCAAGCGGAATAGATGTCATTACGGCGGCAGGAATACTTAAGCTTTCATAT +AAAGCAGCTAAACATAAGAAAATAAATCCAGCCGAAATTAAATATAACCACACCGCCTGA +TTAGTCGACTTCTGTTCTTCAAAAGATAAACCTGTCCACGCTAAACCAATATCTTTTTGT +TGGTTAACAAGTTGTTCTACATCTTTCATCGCTTGGCCAGAACTGCTGCCACTCGCAACA +TCTGCTTGTAGTTGTAAAGCACTATATCCCATATAGCGTTTTACAATTTCCGGTGCCCCG +CCCCAGCTAAAGTTGGCAAATGAACTAAACGGAACCATTTCATTTTGGTCATTACGTACA +GACCAGTTATATAAATCTTCCGGTTTAGATCTAAACTCGGCATCACCTTGAATCATGACA +CGTTTAATACGGCCCCGATCAATAAAGTCATTTACATAAGTTCCGCCCCATGCGCTAGAT +AAAGTATTATTAATTGCCGATAGCTGTAGTCCATTTGCTAGTGCCTGTTTCTGGTCAATC +TTAATATTAAGATTTGCCTTACTATTGGTTGACTGCTTATCAAAGTTTTCGAAAGTTGAA +TAATTTTTACTTTGAGCCTGCAATTGGCGGAAGGCACTATCTAGAAAATCTTGCCCTTGC +CCATTCAAATCCTGAATCCATAAATCCAGACCATCTGTTTGACCTAAGCCATTAACTGAG +GCAGGTAAAGTCACATTAATCTGTGCATTATTAAAATGACTAAAGTATTTCATTGCACGC +TTTTGTATAGCCTCAGCCGAGTTTTCTTTTCCTGTTCGGACATCCCACGGTTTTAGAGCA +ATAAACCCTTGTGCCAAGTTTTGTCCTGTGCCCGAATAATTTCGTCCATAGCGGATTAAA +ACCAAATCTACGTTTTTATCTTCTTGAGTTAAGAAATATTGGCGGACTTGCTCACCAATT +TTCTGACTTTGAGAAATTGGTGCGCTGTCTACGAGCTTAATTTGAACACTTAAAATCCCT +TGGTCTTCTTTAGGAATAAAACCGCTTTTTAAACCGTTATAGAACAGCGTAAAAACGGCA +ATTAAAGCCACAAAAATCACAATAACTGATTTACTGTAATGAATACTCGTCTGAACAAGC +TTGATATATTGATTTTTAAGTTGCTCAATCTTTTGGTTAAACCATACGGCCCAGCGCTGA +GGTTGAGGGTTTGGTTTTAAAATTAATGCACATAAAGCCGGTGTTAAAATGAGCGCAACA +ATAAGGGACAACGCCATAGCGGCAACTAAAGTAATAGAAAACTGACGGTAAATCACCCCA +ATTGAACCGCCTAAAAAGGACATTGGAATAAAAACAGCAGTTAAAACCAACGTAATTCCG +ACTAAGGCACCACTAATTTCTCCCATCGACTCAATAGCAGCTTCTTTAGGAGATAAGTGC +TGTTCATGCATGAGCCGCTCAACGTTTTCTACGACCACGATTGCATCATCGACCAACAAA +CCAATCGCGAGTACCAGCGCAAATAACGTTAAGGTGTTAATACTAAAACCAAGCACATAT +AAGACCGCGAAAGTTCCTAAAATTACAACTGGAACGGTAATACTCGGAATGAGCGTAGCA +CGCCAGCTTTGTAAGAACAGGAACATGACCAAAATAACCAGAATAATCGCCTCTACCAGA +GTCTTTACTACTTCCTTAATTGACTCTTGAACAAAGGGCGTATTATCTCGTGGATAAACG +ATTTTATAACCCGCTGGTAGTTTCGTTGTAAGCTGATCTAGAGTTTGGTGGATGAGCTTA +GAGGTCTGAATTGCATTTGCACCTGAAGATAAAGAAATACCCAAACCTGCGGCAGGATAG +CCATTAATAGTGTTAAAAGACTGATAGTTTTCTGCACCTAGCTCAACTCTGGCAATATCT +TTTAAATACACATAACTCGCCGTTTTATTCGACTTCACGACAATATTTTTAAAATCCTCA +ACTGTTTTTAAGCGAGAACCTGCTGTGACTTTTGTATTTAAATATTGACCGTCAATTACG +GGTAAGTCACCAATTGCCCCTGCGGCGACCTGAGTATTTTGTGCGGTGATTGCATTCGCT +ACATCACTTGGCATTAAATTATATTGTTTTAATTTATCCGGATTTAACCAGATACGCATG +GCATATTGTGAACCAAATACATCAGTTTCACCCACCCCTTCAATACGGTTCAGGTTATCT +ACCACATGCGTAGTTAAATAGTCCGATAGCTCAATGTTTCCTGTTTTACCAGTCGAGTCA +TATAAGCCAATTACCATAAAAGTGTCACCCAGTGACTTACTTACCGTAACACCTTGGCGT +TGAACTTCATCCGGTAAGCGACGTATTACGCCACTAATACTATTTTGTACTTGGACCTGA +GCAGTATCCGGATTTGTTCCGTTATCAAAACTTATAGTAATTCGACTACGTCCAGATGAG +TCACTCGATGAACTAAAATAGAGTAAGTGGTCAATCCCTTGTATTTGCTGTTCTAAAATT +TGAGTAACACTTTGCTCAACCGTTTGTGCATCTGCACCACTATAGTTGGCCGACACAGTA +ATTTTAGGTGGAGCAATGTCTGGATACCGTTCTACAGGCAAATTCATAACCGAAAAAATA +CCGAAAGCCATAACAATGATCGCCAATACATTGGCAAAAATGGGGCGTTGAATAAAGAAT +TTAGATAGCATGGCTGCTCGGTATGATGTCGTTAATTAACAGAAGTAAATGCCGCTTCGA +GACCTTGTGATGATCAACGAGCGGTATAAAAAAGACAAGTAGAATTAAACAATAAATGTA +ATTAAGACAAAAACAAAGCGTGATTGAATATTTCCAACAAACCAATAAAAAACCAGTTAA +CGAGTGTCACTTAAAATTCATGAATTCTTCATGATCATGTCAAACAAAGCACCGAAACTT +GAGTCGTTTCAACTAAAACAATATGAGTAGAATTATGTTTAGAAAAGCACTTTTATGCTT +AAGTTTAATTAGCTTGGTTGGTTGTAATGATGACGATAAAACTGAAACGACGCCAACCAC +GCCAGAATATCAACTTCCTAAAATTCTAGTAGTAGGACACCGCGGCGCTAGCGCTTTACG +TCCTGAACATACTTTAGCTTCATATCAAAAAGCGATTGATGACGGCGCAGATTTCATTGA +ACCGGATCTAGTCTCTACAAAAGATGGCGTATTGGTTGCCCGCCATGAAAATGAGATTGG +TGGAACAACCAATGTAAGCACTTTAAGTCAGTTTGCAGACCGTAAAAAAACAAAAAATAT +TGATGGCGTCGACTTAACCGGTTGGTTCACGGAAGACTTCACTTTAAGTGAATTACAGCA +GCTTAAAGCGCGTGAACGTATTCCTGAGTTTCGACCAGCCAACACAGCTTATAATGACCT +TTACCCTGTCCCAACTCTAGAACAAATCATTGAGCTTGCCGAAGCTAACTATAAAAAGAC +GGGGAAAATTATAGGTTTATATATTGAAACGAAACATCCGACTTATTTTAAAAATCAAAA +TCTGGCAATGGAAGATACTCTTTTAAAAACCTTAGCCAAATATAAATATACACGTGATAT +TGCACCTGTCTATTTACAGTCTTTTGAAGTGAGTAATTTAAAATATTTAAAAAATGAGCT +TGATCTTCATAAGACGCTTAAACATGCACAAATTATTCAGCTATACGATTCAAAAACATC +TCGACCAGCAGACTTCGTAGAGTCTGGTGACACTAAAACTTATGCTGATTTAGCCACAGC +TCAAGGGTTAAAAGATGTTGCCAAATATGCAAATGGTGTAGGACCAAGTAAAGGTTACAT +ACTGACCTTTAATAACGATGGCTCTTATAAAACTAGTACGTTTATTTCTGATGCACATAC +GGCTGGCTTAAAAGTACATCCTTATACTTTCCGACCAGAAAATAACTTCTTACCAGCGCC +GTTAAAGTGCAGCCCAGATAAACCTGCTGAACGTTGTCCATCTGGTGCGTTAAAAGAGTT +TGAAGCCTATTTCAAGGCAGGTGTTGATGGCGTCTTTACAGATGACCCAGCACTCGGTCG +TGAAGCTGTCACTAATTTTGAAAAAGCTGCAAAATAACTCAATCTTCATAAAAAAACAGG +GCAATGAATGCCCTGTTTTTTTTATTTATCAAGACTGTTATTGAAATATAAAGGAATCCA +CTGATACTGCTCAGCATTCACTTTATACACATGACCAATTCCCGGGAACGGTAAATGCGG +GGCAGCTACCCATTGTTGCTTATTTGAAATTTCAGCAAACATTTTTAAACGAGTATTAAT +TGCCTGCTCTGAATTTACATCAAAATCTACCCCAGTTTTTGGAGCATCAAACTGTAAAGA +ATGTGAGTGGACAATATCACCAACAAAGACAATTTGTTGCCCTTTACTCTTCAAACGGAA +GCTATGATGTCCCGGTGTGTGCCCTTGTGTATTGATTACTTCAAAACCTTGAATAACATC +ATCATCTTTAAAGGTTTTGAATGCTTTTTTTGCTTGATAAGGTGCAAGTGCAGCTTTTAC +GTTTTTAACAGTACCTAAATAATTTTCTTTTTTATCCGCTGGTACAGTTTTTTCATTTGC +TGGATTTAACCAATAATCTGCTTCACGTTCATGAGCATAGATCGTTGCATTAGCAAATAC +GGCTTTTCCATTTTGAGCAATACCGCACACATGGTCTGGATGTAAGTGGGTCAATAAAAC +AGTTTTCACATTGGCAAGTTGATAGCCCGCTAATTCAAGATTTTTAGCAATTGAACCTAA +CTGTGGACCAAAACAACTCGCAGCTCCACTGTCAACAAGCGTCAAACTTTTACCATCATC +AACTAAAAAGGCATTTACTGAAGTCTGAATCCCCTTCTCATTTACAGCAGCATATTTCGT +TAAAATTTTAGTTTTTTCAGCCGGACTTAAATTTTTAAACAATTTAGGATCTAGATAAAT +AGTGCCATCGAGCAAGGAAGTTATCCGATAATTACCAAATTGATGATGGTAATATCCGGG +TACTTGTTGAGCTGAAGCTGGTTCTGCATAGCTTATATGCAAGCTACCCATGATAAGTCC +TAAGGCTACAAATAGTTTTTTCATCTCTTCTTCTCTATGGAGTTATAAAGTCAAAATCTT +TAAAAAAGATTTAATATTTTTTTAAGGGCTGCACTATATAATGCATGAAAAGTGTCAAAA +ATATAAGTGAATGGATAGTTATTCTCCCTATAAAGGTAAAAGTGGCCTAAAGCGCATCTT +GAACGCTACCAGTTATTCAATTTCAGGATTTAAAGCTGCTTATCAAAATGAAGCTGCATT +TCGGCAAATTGTTTTAATTAATCTTGTACTCATCCCTGTAAGCTTTTTCCTGGATGTAAC +TCGCGGCGAACATGCCCTGATGATCATTGTCTGTTTATTTGCCATCATTGTTGAGCTCTT +CAACTCTGCCATTGAAGCAGTGGTTGACCGAGTTTCACTCGAGAAACACCAACTTTCCAA +AAATGCAAAAGATATGGGTAGTGCCGCTCAGTTTGTTGCACTTTCTATTATTGTCGCCAC +TTGGCTTATTATTTTATTTGGATAAAAAAAACCATGCTTATAAATAGATAAGCATGGTTG +TAACAAAAGAGGAACTTCTAGCTGCTGTTCCTGTTTTAATTTCAGGGTTCTGAACTTAAC +TTATCCTTAAAATTACTGGAAAAATAAAAAAAATCCCTGCGAATGCAGGGATTTTTTTGG +GTTAAAGCGTTGATGCGTTATTACATCATTCCGCCCATACCACCCATACCGCCCATATCT +GGAGCAGCTGGTTTGTCTTCAGGAATGTCAGTAATCATACATTCTGTAGTTAACATTAAG +CCAGCAACAGAAGCAGCGTGCTCAAGTGCAGAACGAGTTACTTTAGCTGGGTCAAGGATA +CCCATTTCTAACATATCACCATATTCGCCAGTTGCAGCGTTGTAACCGAAGTTACCTTCA +CCATTCTTAACAGCGTTGATAACTACAGATGGCTCATCACCAGCATTCGCAACGATTTGA +CGAAGTGGAGCTTCGATCGCACGGCGTAAAATGTTGATACCAGCTGTTTGATCTTCGTTA +GCGCCTTTTAAGCCTTCAAGAGCATTTACAGCGCGAACAAGAGCAACACCACCACCAGCA +ACAACACCTTCTTCAACTGCTGCACGAGTTGCGTGAAGTGCGTCGTCTACGCGGTCTTTC +TTCTCTTTCATTTCAACTTCAGTTGCTGCACCGATTTTAATTACAGCAACACCGCCTGCT +AACTTAGCAACACGTTCTTGTAATTTTTCACGGTCATATTCTGAAGTAGACTCTTCGATT +TGAGCACGGATTTGTTGAACACGCTCAGCGATAGCAGCAGCATCACCAGCACCGTCAACA +ATAACTGTGTTTTCTTTAGAAACAGTGATTTTGTGCGCTGTACCTAAATCTTGAAGAGTT +GCTTGTTCTAAAGACATACCAACTTCTTCAGAAATAACTGTTGCGCCAGTCAAGATCGCG +ATATCTTGAAGCATTGCTTTACGACGGTCACCGAAACCAGGAGCTTTAACAGCACATACT +TTGATGATACCGCGCATGTTGTTTACAACAAGAGTTGCAAGCGCTTCACCTTCAACATCT +TCAGCGATGATAAGAAGTGGTTTACCAGTTTTAGCAACTGCTTCTAAAACAGAAATCAAT +TCACGAATGTTGCTGATTTTTTTATCAACAAGAAGAATGAACGGATTTTCAAGTTCAGCA +GTTAAAGTATCTTGTTTGTTTGCAAAGTACGGAGAGATATAACCACGGTCAAACTGCATA +CCTTCTACAACGTCTAATGCGTCTTCGAAGCCAGAACCTTCTTCTACAGTGATTACGCCT +TCTTTACCTACTTTTTCCATTGCTTGAGCAATAAGTTTACCAACAGTAGTATCAGAGTTA +GCAGAGATTGAACCTACTTGTTCAATTGCTTTGAAATCATCAGCTGGTTTAGCAATAGAA +CGGATATTTTCAACTACAGTTTTTACTGCAATGTCGATACCGCGTTTTAAATCCATTGGG +TTCATACCAGCAGTTACTGATTTGATACCTTCATTTAAAATTGCTTGAGCAAGTACAGTT +GCAGTTGTTGTACCGTCACCTGCGATGTCGTTAGTTTTGCTTGAAACTTCACGAACAAGT +TGAGCACCCATGTTTTCAAACTTGTCTTTTAATGAAATTTCTTTTGCAACAGTTACACCG +TCTTTAGTAATGTGCGGCGCACCGAAAGAGCGGTCGATCACAACATTACGGCCTTTAGGA +CCTAAAGTCACTTTAACCGCATCTGCAAGTACGTTTACCCCTGCAATCATTTTTGAGCGA +GCTGAATCACCAAATTTTACGTCTTTAGCTGACATATTAAACTCCGAATCTTTTTAAATA +CTGAATCTGATAATGAATTGAGTTATGGATCGATTAGCCTTCTAATACAGCTAAGATGTC +TGACTCTTTCATGATTAAGAGTTCTTCACCATTTACTTTAACTGTTGTACCTGCATAAGT +ACCAAATAACACCTTGTCACCAACTTTAACATCCAAAGCACGTACGCCGTTATCAGTGAT +TTGACCATTGCCTACTGCAATTACTTCACCTTGAGATGGTTTTTCAGCAGCAGAACCTGG +CAATAAAATACCGCCAGCAGTTTTGGTTTCTTCTTCTACGCGACGAATCACAACGCGATC +ATGTAATGGACGAATGTTGCTCATAATTAACTCCATCAGACTTAGTCTTTTTGATTAATC +GTTATTGCTTTAATCCAGAGCAATAACAAAATATTTAGATGTCACTTTTGTGGGGATGAA +AAAAATGGCTTCAAGGGGAAATAAGAAAAATTTTTCTCTTTTTTGAATATTTTTTAAACA +ATCAATTATTTAAGCTCATCTAAACCAAATCAATATGAAGTGAATTCAGCCAGCTGTTCA +TTAAACCAATAATGTCGAATTTTTCCTTCATTTGAAATTGTATTAAAGCTATTTGGATTA +TGATGATATAAGCGAGTAGAAGTTGCTGTACCTGCATGTATATCAAAAATAGGATGATCT +ATTTTTAAAGAATAAATCTGAGTTAAATCATATATAGCTGTTTTATGGAGATGCCCATGT +AACATTCCAAATAGACCAGTTGTACTCCACTTTTCTAAAGCGATTTTGCCCAGAACCGGA +CAATCTTTTATTCCATGCTTATTATCAGGCGGCGTATAAAAGGGTTGATGAAAAACAACC +AGCTTTATTTTATTTTTAGGTCCCCTGTCTAAACGCTCATAAGTAGCTTGAATTTGTTCA +ATCGAAATATGCCCACGCGTATGATAACGACGACGTATACTATTCACCCCAACAATATAA +AAATGTTCAGTTTCTAAAGTCGGCTCTAATTCACCAAAAAAATATCTATAACGGGTAAAT +GGAGAAAAAAAACGGTTCCAGACATGATACAAAGGTATATCGTGGTTACCAGGCACCACA +AGATAAGGAATATTGAGGCTATCTAAATACTGTCGACATTTAAAAAATTGTTCATATTTG +GCGCGTTGAGTAATATCGCCGCTCACAACAATCACTTCAGGCTGTTGCTGAATACAAAAA +TCTCTTATTGCCTCTAAACACTCTTTTTTTTCTGTCCCAAAGTGTAAATCAGACAGATGT +AGTAACATTCGGCACCATAATATTTAAAGCATTTTTTTCTACAGTGAAGTTTAAAGGAGG +TTTCATCTCTATGATTTCTCCATCTAAGGCCACTGTGAGTTTGGTTTTCTTTGCACATTC +AACAATGACATGATCAGCACAAAAACTATATACATCTTGTGCATCTTCAACTTTACCTTG +AACCCATTGCCAAAGCATATTTAATAAACTCAGCTTATCACTTTTTGTAATTACAACTCC +AGCGACTCTTCCCTGTGCTGCACATTCTGCAATTCTGAGCTTCATATCACATAGTTGCAA +TTGGTTATTTCCAAAGAAAATAAGCGGCGCTTTAACTGGATATTTCTTGCCATCTACAGT +TATAGAAAGCTTCATTGATTTATTTTCTCTCAATAAAACATCTAAAGCTGAGGTATAGGC +ATGTAAAGGCAGTCTTCCTAAATATTTATTATAAAGCTCACGTTTCTTTATAAATAATGG +ATAAAGTCCTAGACTCGCATTATTTAGATAAATATGATCATTAATAACTGCTACATGTAC +TGACCTAGGTTTACCTGTTGCAATAACCTCTGCTGCCTCTAAAAGGTCTAACGGAATCTC +TAAAACTTTAGCAACATAATTAAAAGTCCCTAAGGGCAGAATTCCCATGGGAATAGGAGT +ATTTTTAAGTTTAGTTGCAACCGCATTTAAAGTACCATCTCCTCCTGCCGCAACAACCAC +TCCTGTATTTTCATTTTGTGAATGTCGATGAATAACATTATTTATCAAATCATCAAATAA +TGTATTTTCATTTAATTCAAATACTTGTATTTCAAAACCATATTCTGTAAATACAGTCAT +GAGCTGCTCATAGACGTCTTCATGTTTTGAAGCATGAAATCCTGATTTTTCGTTATAGAT +GATTGAAAGAGGTTTCAAAGGCCGCATTTTATTATCGTAAAGTCAGTTCTTTATTCTATT +TTGTATATAAAATCGCCTATTAAAAGTCTCTTTATGTAAATTTTGAATGAGCTTCATTTG +TATAATCAATAATAATTCTCAAAAAAATTTTTCAAGCCTAATTAATTGATTTTAAATAAA +ATAATTTAGATAAATAACATACATATAACATAAAAATATAAAAGCAAAAACCTTATAAAA +ACATATAAACATATGATTTTAATTAAAAAATAAAGAAAACTCCAAATATCTGCTAGTTAG +TCTTTAGTCTTATTTTTTTTATGATTTTATGCTTTAATACAGCCACTTTTTTATTTGATC +TTTCATTGTATCCCAATGAATGACTTGTACGTTGTACATATTTTGAATAGGCCTCACCAT +GACCACAGTGAACGCACCAGAATTCGTTCGTCATCCTAAGCTTATAGCATGGGTTGAAGA +AATTGCAAACTTAACCAAACCAGCAAAAATCGAATGGTGTGACGGAAGCGAAGAAGAGTA +TCAACGTCTAATCGACTTGATGATCGCTAACGGCACCATGCAGAAATTAAACCAAGAAAA +ACATCCTGGTTCTTATCTTGCAAATTCTGACCCATCTGACGTTGCGCGTGTTGAAGATCG +TACTTACATCTGCTCTCAAAATAAAGAAGATGCTGGTGCGACAAACAACTGGGAAGATCC +AGCTGTTATGCGTGAAAAATTAAATGGTTTATTTGAAGGTTCAATGAAAGGCCGTACCAT +GTACGTTGTTCCTTTCTCTATGGGTCCTTTAGGTAGCCATATTGCTCACATTGGTATCGA +GTTAACTGACTCTCCTTATGTAGCTGTTAGCATGCGCAAAATGGCACGTATGGGTAAAGC +AGTTTATGATGTATTAGGTACAGATGGCGAGTTTGTTCCTTGTGTACATACAGTAGGTGC +TCCACTTGCTGAAGGTCAAAAAGATGTTGCTTGGCCTTGTAACCCAGAGAAATATATCGT +TCATTACCCAGAAACTCGCGAAATCTGGTCTTTCGGTTCTGGTTACGGCGGTAACGCGTT +ACTTGGTAAAAAATGTTTAGCTCTTCGTATCGCTTCTGTCATGGGACGCGAACAAGGTTG +GTTAGCTGAACACATGCTTATTCTTGGTGTAACTAACCCTCAAGGTGAAAAACACTACAT +CGCTGCTGCATTCCCGTCTGCTTGTGGTAAAACAAACTTTGCAATGTTAATTCCACCAGC +AGGTTATGAAGGTTGGAAAATCGAAACTGTAGGTGACGATATTGCTTGGATTAAACCAGG +TGAAGATGGTCGCTTATATGCGATTAACCCTGAAGCTGGTTTCTTCGGTGTAGCACCTGG +TACAAATACTAAAACTAACCCGAATTGTATGGCAACTCTTCACAAAGACGTTATCTATAC +AAACGTAGCAGTAACTGACGATGGTCAAGTATGGTGGGAAGGTCTTTCTAAAGAAGTTCC +AGCAAACTTAACTAACTGGAAAGGTCAACCTCACGTAAACGGCGAAAAAGCAGCACATCC +AAATGCTCGTTTCACTGTTGCAGCAGGTCAATGTCCATCTATCGATGCTGATTGGGAAAA +CCCAGCAGGTGTTCCAATTTCTGCATTCATCTTCGGTGGTCGTCGTGCAGATACAGTACC +TTTAGTTTCTGAAGCTTTCGACTGGGTTGACGGTGTATATAAAGCGGCAACTATGGGTTC +TGAAACTACTGCTGCTGCTGTTGGTCAACAAGGTATTGTTCGCCGTGACCCATTCGCGAT +GCTTCCATTTGCTGGCTATAACATGGCTGACTACTTTGACCACTGGTTAAACCTTGGTGC +GAAAGTAAGTGAAAAAGCTGAAGCTTCTGGCAACAAATTACCAAAAATCTTCAACGTAAA +CTGGTTCCGTCGTGATGCGGAAGGCAACTTCGTATGGCCTGGTTTCGGTCAAAACATGCG +TGTTCTTGAGTGGATCATTGATCGTTGTGAAGGTCGTGCGAACGCTGTTGAAACACCTAT +CGGTTTTGTTCCAACATATGAAGACTTGAATTGGGAAGGCACTGAGTTCACTAAAGAACA +ATTTGACCTCATCACAAATCAAGATAAAGACCAGTGGGTTACTGAAATTGAAAGCCACAC +TGAGTTATTCAATAAACTTGGCGAACGCTTACCTAAAGCATTAAAAGAACGTCAAGCAGC +TTTACTTGAAGCTGTAAAAACTGGCTTCTAATTACTCTATATAAAAAAGGTCGCCATGTG +CGACCTTTTTTATTATGCGGCATCGTCTTTTAATCGACGTTCTGATGAATAGGCTTCTAA +AACTTTTACCTGTTTTACATCAAATGCCAAACCTAATTTAGATCTGCGCCAAAGTATATC +TTCTGCTGTGTGAGCCCATTCATATTCACACAAATATCTGACTTCACACTCAAACAAATC +ATGGCCAAAATGTTGACCCAATTGTTCTATAGCATTTCGCTCTTTAAGCATATTCCACAC +TCTTGTGCCATATGCATGAGCCCAACGGTTGGCAAGTGCGTCAGAAATTCTACTTACACG +TGTTTTAATCTGATTGATTAAATCATCTAGTGTGGTCCAGTTTTCGGCGCCAGGTAATGC +TTCATCTGCAGTCCACTCCTCTGCCATGTCATTAAAAAAAGGAGCCAGATGTTCTAAAGC +AGCTTCGGCTAACTTTCGGTAAGTTGTAATCTTGCCCCCAAACACTGAGAGCAATGGTGT +GGTTTTATCTTCTGCCTGTAGAGCCAAAGTATAGTCGCGTGTAATTGCAGATGGATTATC +CGACTCATCATCACATAAAGCACGCACACCCGAGTACTGACTTACAATATCAGCTCGCGT +CAACTGCTTTTTAAAATGTGAATTTGTCACTGTCAAAAGGTAATCAATTTCCACATCAGT +AATCTCTACTTTTTGTGGGTCGCCTATGTATTCCTGATCTGTTGTACCAATGAGGGTATA +TTTTTCTAAATAAGGAATTGCAAAAACGATCCGTCGATCTTCGTTTTGCATAATAAAGGC +CTTATGGCAGTCATATAATTTTGGCACAACAATATGACTTCCTTGTATAAGCCTAATTTG +ATAAGGCGAACTTAAACCCAGGTTTTTACTAATAATTTCTTCAACCCAAGACCCCGCCGC +ATTTACAATAGCTTTCGCACGTATCTGATAGAACTCAGCTCCGCTTTGTAACTCCAGATG +CCACAATTCCTGTTGTCTATAAGCCTTAACACAACGTGTACGTGTAACGACTTTTGCCCC +TTTCTCTTTAGCTTGTAGTGCATTTAACACCACAAGACGCGCATCATCTACAGTACAGTC +AGAATATTCAAAACCACGTGTTATCGCTGGTTTTAAAGGGCTGTCTTCTTTGAAATAAAT +GAGATTTGATCCTAATAATTTTTCTCGTTTTCCCAAATAATCATAAAAAAATAGACCTGC +TCGAATTAGCCATGCAGGGCGTAAATGGGATCGATGAGGCATGATAAAACGCATTGGTTT +AATAATATGTGGCGCTTTAGCTAGCAATACTTCACGCTCTGCTAATGCTTCTCTGACCAG +CCTGAATTCTTTATGTTCTAAATAACGTAGGCCGCCATGAATTAATTTGCTGCTAGCAGA +TGAGGTATGACTGGCTAAATCATCTTTTTCACATAAAAATACCGATAATCCGCGCCCTGC +TGCATCATTGGCAATACCAACACCATTAATACCGCCACCAATTACGGCAAAATCATATAT +TTTTGAATAATCATTAGGTTGTACTTTCATTTCTCATGTTCTTTTTATTGTTGATCTTAT +AAAAATTAAAACATCAAATTGATGAAACAACAATCAAAAAAGTAGAATAATCAATTTTAC +ATCCTCTAGTATTTAACTATCCATTGACTACCCTACTATTGAATTCAACTAGTTAAGAAA +AAGCTCAATCCTCTGCCCAATTTTGGCTACGCTTAACTGCTTTTAGCCAACCTTTATAAA +TGAGTTCAGCTTGTTCAGAAGGCATTTTCGGTTCAAATACCTTTTCTATGGCAGATTTAT +TTCTTAACTCATGAAGATCTTGCCAAAAACCAGTGGCCAACCCTGCCAGGAAAGCTGCAC +CCAATGCTGTAGTTTCTTTCATTATTGGACGTTCTACCGGTGTCGTTAAAATATCGGCCT +GAAATTGCATCAAGAAATTATTTTCCGTTACTCCTCCATCAACACGGAGTGTACGAAGCT +CTTCTTCTGCATCTTGTTGCATGGCATCTAAAACATCTCGAGTTTGAAAGGCTATAGACT +CTAGAGTTGCGCGGATAATATGTTCAATACTAGCCCCACGCGTCAGACCAAAAATTGCGC +CGCGAGCTGTCGGGTCCCAATACGGTGCTCCTAACCCGGTAAAAGCAGGTACTACATATA +CACCATTATTATCCTTTACACGTGTTGCATAAAGCTCGGAATCCTTAGCATTCTTAATAA +CCTTTAATTCATCGCGTAACCACTGTACGCAAGAACCACCGTTAAATACGGCACCCTCTA +AGGCATAGTTCACCTCACCACTCGCACCACAGGCAATAGTTGTGAGTAATCCATGCTCTG +AACGGACAATCTTTTTACCCGTATTCATGAGTAGAAAACAGCCGGTACCGTAGGTATTTT +TAGCCTGTCCCGACTCAACACACATTTGACCAAAAAGCGCAGCCTGCTGATCACCCGCGA +TTCCGGCAATTGGGATCCCTACTTCTTGTCCACTAATGGTATGTGTATATCCATAAACCT +CAGATGAACTACGAACCTCCGGCAACATTGCTCTAGGAATGTCTAAAGCTTGTAAAAGCT +TTTCATCCCACTCAAGCTTTTCAGTATCAAACAGCATGGTTCGAGATGCATTGGTAAAAT +CAGTGACATGGACGGCCCCATTAGTTAGTTTCCAAATGAGCCAAGTGTCCACCGTACCGA +ATAATAACTCTCCCCGCTCAGCACGCTCACGGCTGCCTTCAACATGATCCAAAATCCATT +TAATTTTTGTGGCCGAGAAATATGGATCAATGACTAAACCGGTGGTCTTACGGATATATT +CTTGCCAACCTGCTTTATACAATTGATTGCATATTTCGGTTGTTTGTCGGCTTTGCCAAA +CAATCGCATTGTAAATAGGTCTTCCTGTTTTTTTGTCCCAGACAATGGTGGTTTCCCGCT +GGTTTGTAATTCCAATTGCCGCAACTTGTTCACTCTTGATGCCAGCTTGAGCCAATGCTT +CGACCCATACAGCACTTTGAGTTGCCCAAATTTCCATAGGATCATGCTCAACCCAACCTG +GCTGAGGGTAAATCTGGGTAAATTCTCTTTGAGCAATACTGACAACGTTCGCATCATGAT +CTAAAACAATTGCTCTTGAGCTTGTTGTTCCCTGATCAAAAGCGACAATATATTTTTTCG +GGCAATTTGACATCTGAATGTCCCTTTCATTTTCCACACACATCTAGCCCATAATGCAAA +AACATCATGCCTAACTGTATTTATTTAAAATTTTCTTTGGAATATATCATTATTTTTAAG +TGGCATAGGCTTTCTTCGTCCAAACATTACCCATTCTTTATCAATTTTTTACTGCATGGA +AAAAAGTAATAAGAAAAATATTGAAATTAGAAGATAAAAATATGCTGATCGGCTAATTTG +AAATCACATAAATATTCAACTTATTCCTTTGCTTAAATAATAAACATTGTTTTACAGACA +TAAAAAAAATAAAGTGAAATACTCCTGTAGATAATATGTAAAAACGGAATAAATCTTCTC +TTTTTTAGCATTCAACCCAAGCGAAACATGGAGCGAATCATGGTTGATCAACCTTCTACC +GCAACAACTCCACATTCTAATTTAGATACAAAAACCCGTCTAAAATCAATTTTGGGCGGT +TCTGCCGGTAACCTTGTCGAATGGTACGACTGGTATGTATATGCCGCATTTACGCTCTAT +TTTGCTCATGCATTTTTCCCAAAAGGAAGTCAAACCGCTCAACTTCTTCAAGCTGCAGCT +ATTTTTGCAGTGGGTTTCCTTATGCGTCCCATTGGCGCATGGATTATGGGGATCTATTCT +GACCGTAAAGGACGTAAAGCGGGCCTTACACTTTCCGTTACTTTAATGTGTATCGGTTCG +CTACTGATTGCTGTTACACCCTCCTACGAAAGTATTGGTGTATTTGCTCCATTGCTTTTG +GTTATTGCACGTTTAATTCAAGGCTTAAGTGTAGGCGGTGAATATGGCGCAAGTGCAACG +TATTTAAGTGAAATGGCAGAAAAAGATCGTCGTGGTTTTTTCTCAAGCTTTCAATATGTC +ACTTTAATTGCAGGACAGCTTACAGCTTTATGCGTACTACTTATCTTGCAAATGATACTC +ACCGAAGAACAACTGCATGACTGGGGATGGCGTGTTCCGTTTTTCATCGGAGCCCTTTTA +GCTATCGTGGTGTTCCGTATTCGTCGTGGCTTATTGGAAACTCAATCCTTTAAAAACGCT +CAAGCAGAAACAGATCAGCCGAAATCAGGAATGTTTGCTTTATTTAAACATTACCCTAAA +GAAGCCTTCACTGTATTATTCCTAACTGCTGGTGGTACTTTAGCTTTTTATACTTACACC +ACCTATTTACAGAAATATTTAGTGAATACTTCTGGTTTTACCAAGCCTGAGGCTACTCAA +ATTACCACTTTAGCCTTATTCATCTTTATGTGCTTACAGCCATTGGCAGGTGCCTTATCA +GATCGAATTGGCCGTAAACCGCTTATGATCGCCTTCGGGGTTACAGGTGTTTTATTCACT +TATATTTTGTTTGATACACTTGCAAACACGCATAACTACTGGACTGCTTTCTGGCTATGT +TTAGGTGGACTGGTTATGGTAACCGGCTACACATCAATTAATGCTGTAGTGAAAGCTGAA +CTTTTCCCTGCACATATTCGAGCTTTAGGTGTCGCATTACCTTATGCAATTGCGAATACT +TTATTTGGTGGTACAGCCGAGTTTTTTGCATTAAGTTTTAAAGAAGCCGGGCACGAATCT +TGGTTCTTTATTTACGTCAGTATCATGATTTTCATTTCATTGCTGATTTATATCTTCATG +AAAGACACCAAACATCATTCAAAAATTAAAGAACATTAAAATCCTTCAATTAGAGCTAAT +ATCATAAGTGAATGTTAGCTCTAACTTAGTGAATAAGCTCCAACAACGTTTCTATTTTAT +GAGATACTTCATATAAGAAAATAAGGACGATACTGATGAATAAAAGTATTCTATGTTTGG +CATTAAGTAGTATGTTCATTTTAACTGCCTGTCAGACTACGCCTCGACAATATAATGGTT +CAACTGGCTACCAGATTGAAAACCAAACAAAAACTTCAGCGACTTTAGCTTACACTTTAG +CTGGTCGTAGTAACCAGCAACTTGATGAGCGTAAATTGCAACGCGCTTGTCAAAATGTGT +TAGGTGCTCAGAAAGTTTATAAATTATCAATTTTAAGCATCAATGAAATTCCTAATCCAG +CTAAAGATGAGCATTACGGAATTCAACTAGGTGAAACCCGCGCTTCTTTTGGCTTATCAA +ATACACCTAGCTTAAATAATGGTGAAGATTATGCGACACGCCAAGCACTTGAAGCACGTC +CAAGTACATTAAAAGTCGTTCGTTATACTTGTTCATAACATAAAAAAGACGCTGTTTTTA +CAGCGTCTTATAAAAACTTACATTTCTAAATTCTTTCGCTTCATCCAGATCTGGCCAAGT +TGTTGCACTTCTTTCTTCAATTTTTTCATATTTTGGATGACTAAAATTTTTAACACGGCT +ATCTGCTACCCATACTTCTGGTGCAAATTTTAAAAATTCGTCTAAGAAAAAGCGATTACA +TTGGTCATATAACACATCGGCAGCAAGTAAAACATCGACTTGTTCAGCCTTATATAAATC +ATCCAAATATTCGAGTTCTACATCATTAAGTAAAGCATTTTCACGACAAGCATTTAAACT +GACCTGATCAATATCACAGCAGATTACGCGTTTTGCTCCTGCCATTTTTGCAGCAATTGC +CACTACACCAGAACCTGCTCCGAAATCTAACACCACTTTATCTTTAACATGGTGCGGCTC +TGCAAGTATCCACTGAGCCATCGCCAAGCCTGATGCCCAACAGAAAATCCAATACGGCGT +GTCATTCCAGATACGGCGAATAACTTCGTCATCCAACCGATCAGTTGGAAAAACAGGTGG +AATCAACCATAAAGAAATTGGAGTTTCTGGCAACTGTTGTGCCATTAATTCACAATGAGG +AATGACTTCATGTAAGGCTTGAAGTAAATGTTCAGGGGCTTTGGTCAATTGAAAGGTGCA +GCTCATAAATAAGCTCAACAATTTATAATTACTATTTGATCATCTGCATCGCGAGCAAGA +CTAAAACAGTAGCTTAAAGCATGATATTGGCTAGTTACTAAAAATACGTTAGTTCAATTC +CACTGCTTTAATTAGTGCAACCGTTTTGGTCTTGCGTAGCATTCGCTACTCATCCCGAAA +GAAAAGTAACATACGAGCTCCTAACATTTTCCTTAAACGGTAGGATGCTGCCGTGAATAA +ATTAAATTAACCTAAAGCTTTTTCAGCAGCTTCTACAGTTTGACGAATCAAAGTTGTAAT +GGTCATTGGACCAACGCCACCCGGTACTGGTGTATAAGCAGAAGCAATTTCTTCAATACC +TTGTAATTGAATATCGCCTACACCACCGCCATCACGTGGATGGAAACCAGCGTCAACAAC +AACTGCACCTTGTTTAATCCAATCTTTTTGAATAAGTTCAGCTTTACCTACAGCACCAAC +AATAATGTCAGCTTGTTTCACAAGCTCAGGTAAGTTTTGTGTACGTGAATGACAAATCGT +TACTGTCGCATTTGCTTGCAACAACATCATTGCCATTGGTTTACCTAAAATTGCTGAACG +ACCAACTACAACCGCGTGTTTGCCAGCAATTTCGATATTGTTTTCTTTAAGAATCGTCAT +AATGCCAGCAGGAGTCGCTGAACCATATGCAGCTTCACCCATTGCCATACGGCCAAAACC +AAGGCAAGTTACGCCATCTACATCTTTTGCTAAAGAGATTGCATCAAAACATGCGCGCTC +ATCAATTTGTGCAGGAACTGGATGCTGTAAAAGAATACCGTGAACATCTGGATTGGCATT +AAGCTTTTCAATTTCAGCTAATAATTGTTCTGTTGTGGTTTCTTGTGGTAATTCGATTTT +TAATGAATCCATACCTACTCGGCGGCAGGCATTCCCCTTCATACGTACATAAGTTGCAGA +AGCACCATCATCCCCAACCAAAATAGTCGCTAAAATTGGGGTACGACCTGTTTTCGCTTT +TAAAGCTTCAACACGTACCAACAAATTTTCTTCAATTTGCTTTGCTAATGCACGACCGTC +TAAAACCAATGCCACAGCACATCTCCAAGGTAG +>NODE_12_length_39995_cov_63.3136_ID_23 +TAAATTCTTTTACTATGTTCCTTAAGATTAATAATTGAATTTATAGTTAGGTGAATTTAT +GAATTTTAATAATAATATATCCATTAAAAAATTAACATTACTTATTTTTGTAATTATGTA +TATTATATTTTTTCTAATAACAAGTTTAAATTTATATTCGTACTCAAAATACGAATTTAA +TAAAACAGAAAAACTTATAGAAAATTTCAATGTCAGTCTGAGTCAGCAAGTTGTAGAAAA +ATTAAATAATATTTCTGATGTATCAAAATATCCATTACTTATTCCTGAAGTGAGAAACTT +ATATTCTATACTTGCTGCTGATAAACCTTATGATATAAGTGAATATAATTATTTAAAGTA +CATTTGTGATATGATGCTAATCCAAAACACCTCAATAAATGGTGCTTACATATATGATTT +GAGCGGAAGAGGCGTATCAAGTACACGTAATAGTTCTAATGATAAACTTAAAAATCCAAA +GTCAGAAAAATGGTTTATAGATTCCTTAAATTCTAATGAATTTACATCAATTTTTCCTAA +TATAAATGCAAGCGATATATTTGAATTTACTTCGCAAGACTCTAAACAGTTAATAGCATT +GGCTAGAAAAATAATAGATATTAAAACAAAGAAAGTAACTGGAGTATTATTAATTACTAT +TCCAATTGATGAAATTCGCAATTTATTAGTAAAAGATCATCTTCCATTTAATAATCAGAT +AGTTTCTATCTATGATATTAATGGAAATTTAATTCTAACCACAGGCGAGGAATCCAATGT +ATTTATTCCTACTTATGACCAGTTACACAATGCAACCACTACTCCTAGCATACAGTATAT +GGATAGTAATATTGAATATATAGTTTCCTACAATACTATACCTTCAACAAGCTGGATAAT +TGTAAATTCAATTCCTAAGTCAAATGCATATCATATAGACAACTTATATATTTTTTCGTT +TATTATTAACATAACTTTCTTCTTAATTTTATTTATTGTGCTATATATATTTTTTATAAA +AAGAATATTTAATCCTTTAAAGTTTTTAATCAAGAATATGGAGAGCAATGTTGAAAATAA +TTTAAATTATAAAGTATCTTATACTAAAAATGACGAAATTGGAATATTAATGAAATCCTA +TAATGAAATGAAAAGCAGAATAAGCGATTTAATTAATATTAATTATAAGAGCCAAATTGA +ACAGAAAGAACTAGAACTTAAGCAACTTCAAAATCAAATTAATCCTCATTTTATATATAA +TACCTTAGAATCTATTCGCATGATGGCAGAAATAAATGACGATATAGAAACTTCAACCAT +GTCTGAATATTTTGGCTCTATTACAAGGTACAGTATGAATAGAAAAATAAACACCGTCTT +ATTAAAGGAAGAAATAAGCATTATTGAAAATTATATTTACCTTCAAAAGATTAGATTCGA +TCAGCTATTCACAATTGAAAATTTGATTACCTCCGAAATTCTAGACTGCGAAATAATTAA +AATGATAATTCAGCCATTAATTGAAAACTCTATCTATCATGGCTTAAGTGAATGTAGCGG +TGATGGGAAAATAGTAATTAAAGGTGAACATATAACTGAAAATCTAGTACTAACCATTTC +TGACAACGGTATAGGTATGGATCATGTAAAATTAAAAAAATTAAATGATTATATTAATGA +TAAGAATAATGATTTTAGCGGTACCGCCTTAAGGAATATAAACAAAAGATTAAAGTTGAA +TTATGGCAATGACTATGGATTAGAAATTCATAGTATTCTCGGAAAAGGAACAACTATGGT +GCTAACTATTCCATACATTGTAAAGTGAAACTAAGTCCATGGAGCCCCCTATGGACTTAG +CCTTATTTTTCTTATTTACTTATTCCCTATAAACATCTATATTCCTATTTCAGAGTTGAA +AATTAAAACCTTTTATATCAATTTTACTATTTACATAACTACTCCTTTATTTCCAAAATA +TCACTTTACCTCGTTTATTACAAACAATTGCGACATAAAATCTTCACGTTTTCTCGACCA +ATATTCACTAGCTTTTTCTGTATAATCTTTTGATAATACTATTCCTAAATTCATAAGTTC +ATCACCGTAGTAATTTCCTTCTCTACCTTCTATTGAATACAGTTTATCCGCGTCAAGTCC +CTGTAATTTTATACGATTATATTTTTCATTTGGCCTGCATAAAGCTTGATAGTAGCCCAC +AATGGCTTGTGACTTATCATCAGACACTACCATCCATGAAGTTTCATTGCTTTCAAAAGG +ATTTAGCATTCTATAAAAATCACCTTTTTGAATCAACGATTTATTTTTCTTAAAGAATTT +AACTTGTTCTTTGACTATTTCCTTTTCCTCATCACTCATAGTATTAACATTAAGCTCATA +CCCAAAAGTTCCAAAATATGCTACATTGGCTCTTGTGTTAAGCGGAGTAAGTCTTCCTAC +CTGATGATTTGGTACTGCTGATACATGACTTCCTATTGCGCTTAATGGATAAATCATTGA +AGTACCATATTGTATCTTTAATCTTTCTACAGCATCAGTATCATCGCTTGCCCAAGTCTG +TGGTGCATAGTATAGCATTCCAGGATCAAATCTAGCCCCGCCGCCTGCGCATGATTCAAT +TAAAAGCTCTGGGTATTTAGTTATTATCTTTTCAGCCAACTTATATACTCCAAGAATATA +TCTATGGAATACTTCACCTTGTCTATCTTCTTCCAGCGCTAATGAATACGGTTCTGTTAT +ATATCTGTTCATATCCCACTTAATATAAGAAATTTTAGAATTTCCCATTATGTTATCCAT +CAAACTAAATACATATTCAACAACTTCATCCCTTGAGAAATCTAGGATGAATTGGTTTCT +TCCATGAGACATGTTTCTGTTAGGTGTTTGAATAGCCCATTCTGGATGTTCTTTAAATAA +TTTAGTATCCTTACATACCATTTCTGGCTCAAACCACAATCCAAATTTAAGTCCTATCTC +TTCTACTTTTTCAGATAAACCAGCTATTCCATTTGGAAGTTTCTCTTTGTTTTCGAACCA +ATCTCCAAGAGAACTTGTATCATCATTTCTTTTTCCGAACCAACCATCATCAAGTACAAA +CAGCTCTATTCCCAGGTCCTTTGCAGCCCTTACTATAGAAAGTATCTTTTCTTCATCAAA +GTCAAAATATGTTGCTTCCCAATTGTTTACAAGAATAGGACGTTCTTTATCTCTCCATTT +ACCTCTTGCAAGTCTTGTTCTATAAAGTTTGTGGTAAGTTTGACTCATTGCATTTAAACC +ATCATTAGTGTAGACAATTACACATTCTGGCGTTTGAAATTCTTCTCTTGAATCAAGTTT +CCATTTGAACTCATAAGGATTAATACCCATCATAACTCTTGATACTCCATACGAATCAAC +TTCAACTTGTCCTAAGAAATTTCCACTGTATACTAAACTAAAGCCATACACTTCTCCTAC +ATATTCAGTTGTATTTGGACGTTTTAATATCACAAATGGATTATGAACATGACTACTTGC +ACCTCTTAAACTGTGAATAGATTGCACACCTTCTGTTAACTTTCTCTTTTTAAGATGACA +CTCTCTAGCCCAAGCACCTGATGTATGAATCATTTCAAAATCTGAATCTGCTAAATCAAC +GCTACAGCTCATAGCATTGTCTATATAAAATGCCTCTTCCCCTTCATTAGTTATTTTTGC +ATTTCTGCATATTGCATCAAGACCTTCGTATATTGTATAACTTAGTATTAGTTTACTATT +TAATATCTCATCAAATAATGCAATTTCTAATGTTTGAGCTTCTGAAGCTGCTTCAATATA +AGTCGATGGTAATCCATTTAACTTATTCTTTCCTTCAAATATTTTATATCCAACATATTT +ATAGTTACTAATTCTACTTCCATTTTTACTTATAATCGTATGAGCCGGATATCTGTAGTC +TGTAATTCCGTAGCTTGGATATTCCTGCTTTATATGTTCTAATGACGAAGTATGATTTCC +CTCAAACATATTGCATGAAGGTTTAAGTTCTCTTTCCATTATATGTTTAAATGACTCTCT +ATGCCTTAATCGTTTACCATAATAAACATTTTCCATCTGTTCTATGCCATCGATTACTCT +AATAATATAACTTATATTATTATTATAAAGATGAAACTCTCTAGTATTTTCATTAAATTT +AATTCCCATCTTATCTTTCTCCTTCTTAAGTATTTACAAAATAATACTTTAGTGTTTAGT +ACACTGTAGATAGTGCTAAAATTCACTTAAACAATCATTTGAAGTTGGTTTACTTTTTTA +AATATTCTGAAGAATATATATAGTAATATTCTTCAGAATATTTGTTTTAATTTTAAACTA +AATATTTTTTATATAAATAGTTTAATAGCAATTATTTATTAATTTATAATTACCACCTAA +ACTACAGGATAACTTTTCAAAACTTTAAGCTTTCCTTTTATTTCATAATTTCCAAGTGCT +GCAGGTATTAAATAGCTATCTCCCATTTTTATCTTTTCTAAATACCCATTTCCTTCTATT +ATTCCTTCTCCATCCACACAAGTTAAGATGTCAAATTTATCTTCATCACTGCTATCTTGC +AATGTTTCTTCTATAGTTAATTTTTCTATTCCAAAATATTCATTTTTGCATAGTAAACTC +TTTTTATAACCATCAAATGCTATTTCTTCTCCTTTAAGATTTTCACATTGAAGTTCAAAG +TTTGTCACGTCTAATGCTTTTTCTACATGTATTTCTCTTGGTCTACCGTAATCATATACT +CTATAGGTTACATCACTATTTTGTTGTATTTCAGCTATTATAACACCTTCACATATAGCA +TGAATAAGTCCACTATTTATTAAAAAGCAATCACCTTTTTTTACATCTATCTTATTTAAA +TATTTTTCAACTTCTCCTGATTTTATTGCCGCTTCAAATTCTTCCTTTGTACATCCTTTA +GTACCAACTATCAAACTAGCATCTGGTTTAGCATCTACAACATACCATGCTTCTGTCTTA +CCATAATCACCTTCATATTTAGCTGCATATTCATCTCCAGGATGAACTTGTACTGAAAGT +TTTTCTCTTGAATTAATTAATTTCACTAGTAAAGGTAACTTTTCTAGGCTTACCTTAGCA +CCAACGATGCTAGTTCCGTATTCTTTTATCAGTTCATCAAATCTGATTCCCTTGAATTCA +CCATTTGCTACAATTCCTGTTCCATTTGGATGACAAGCTATATCCCAGCTTTCTCCGATT +TTTCCCTCTGGAAGGTTATCTCTAAAAGTTTCTAGGTCTCTTCCTCCCCATATTTTTTCG +TAGTATAAATTTTCAAATCTTATTGGATACATACTTCCTCCTACTCCCTCACTCGATATA +TAAAAAACATATCATATATTATTAAAAACCTCACTTTAAATATATATTCTTTTCGGCAAC +TTGTCTATACTTTTGCAGTTAAAAGCATACTTTATTCATTTAAGATAAAATTTATTAACT +TATATGTACTCACATAAACTTATGACTCTTCTCTCGAAACTCCATTCAATTAATACAGCG +ACTCAGCACACTCTCTCTTAATTACATATTTAGTATTAAATTTCCACTAAGGCATTTATA +TAAATCACTTTCAATATCCAAAATTTACATACTTCCTTAGACGAAAAAGATGCTGCGATA +ATGTATAATATTACTGCAACACCTTTAATTTATTTATATATTAATTTATTTATATAACTG +TTATAGCTTTAAATTCAAAATTGTCATATCTGTGTCTAGAAAACGAGTACTCGAACGCTC +TTCCATTTTCTAAGTATACAACCTTTTCCACTTCTGCTATTGGATCTCCATCCATTAACA +ATAAATATTCTTTATCAAATTCATCTGCTTTACTTACCCTAACCGTCGAATGAGCACTTT +GAATCTTTAATCCTAGCTTTCTCTGTAAAAAATTATATATAGACCCATTAACATCTTCTT +TTTTTATTCCCTCTGCAATTGATAACGGCATATAAGTTTTTTCTATTACAATAGGTGCCT +CATCTACATACCTTACTCTATGTATGAAGTAGACAAAATCACCGTTATCTATCTTAAATG +TACTTGCAATTTTCTCATCGGCATTAATAATCTTAAAATCTAAAACTTTGCTTGTGACCT +CATGCCCTAAATTAGTATTAGTTAATCCCGAAAACTGTTTTTTTCGAATAATCTCTTGAA +TCTCTCCTTCGGTTATGTCCTTTATAAAACTTCCCGATCCCCTTCTCTTTACAACCAATC +CTTCTGCAACTAACAAATCTAGAGCTTTCTTCACTGTCATTCTACTAGCATTAAATCTTT +CGCATAGTTCTTTTTCAAACGGTAATTGTTCATTAGGCTTGTACTTGCCACTCAATATAT +ATTCTTTAATCTGGGATGCTATTTTCTGATACTTTATCATAACTATTCCTTTCATATCTA +ATTCTTCTTATTATGCAGACTTATAATTTAATTTTTTAAATATTTAAATATCTCACATGC +TCTTGACTTAAAGGGAATTATGTGGTGGTTTTCCATTGCATTTGTTATTATAACTTATTA +TTCCTTATACAACTTTCTTTATCAGTAATTATTGACCATTTCACTAAAAACCCCCACCAT +ATTCCATATATTTTTTATACAACCTGCACAAATATACTCTACCCTTAATATATAATATAT +GTCTTTCATGCACTCCTGTCTATACTTTTATTTAGTACTTGTTTATCTCACCAATAAATA +AAGAATAGGCTGATTAATACCAACCTATTCTTCGTCATTACTATTTTACTACTATATAAA +TAAAAATTATTTAAAATTATAATATATCCCCCTATAAAAAGACTTTTATCAAATAATCTA +TCTTATAGGGGATATTTTAATTGAGATTATAAGCTTTATTAATGATCTTGTTGAGCTGCT +AATTCATCAGCATAAGCCATAGCATCTGCTTTCTTAAAAAATGGGAAGTAAATTACAGTA +GAAACTATTAAGAATACAAATTGTGCTAAAGCGTAACTAGGTCCACCAAGAATAAATCCT +GAAATTACAGTTGGAACTGTCCATGGTACTAATATTGCCCCCATTGGTGTTAGTAAGCCT +GATGCAATCGCGAAGTAAAGTAATACTGCATTTATTACTGGACCAAGTATAAATGGTACA +CCCATGATTGGATTCATTACTACTGGAATACCAAATAATATTGGTTCATTAATATTAAAT +AAATTTGGAGTAAGCGCTAACTTACCTAATTGCTTAAATTGAGCCGATTTTGCAAGGAAT +AGTAGTAGTAGTGTAAGTCCCAATGTTTGACCACTACCTGTTAAATTAACAAAACTATCA +TAGAATTGTTGAGTCACTATATGAGCACCATGAGCAAGGTCTAAAGTCCCCGCTTTTTGT +AATTCTGCATTGAATGCAGTATTAGCAGTTAATAATGGGCCTACCATTCCTCCTACTGTA +ATACCTCCATGAATTCCAAAAAACCATAAGAATGGAACAGCAAATGCAATAAGCATAGCT +CCAGGTAATGAATCTGATGCTGCTTGTAATGGTGTTTGAACCACTTTATAAATAATCTCT +ACAAAAGAAGTTTGGAATCCAAATTTGCATAATGCATAAACAACAGCAGCTCCACTAAAT +ATAACAGTAGCCGGAATTAAAGCTGAGAATGCATTAACAACACCTTGTGGAACACCCTCT +GGCATTTTTATTGTAATGTTTCGCTTAATCATTGCGCAATAAACAAATGAAACAATCAAA +CTAACTACAATTGCAGTAATCATACCTGCTCCACCGGTCCAAGATGTAGGAATAGCACCG +CCCACTTGAACTGGAGTCGCGTCTGGTGTCGGCTGATAAGTAACAAAGTTATTAGTAACT +ATAACAAATACACTTAGAGCAATTACTGATGGTGAAAACGGCTCTACTCCCTCATTCTTG +CAGTATGAGTAAGTCATTCCTATAACTCCAACCAATGCCATAATGCCCATTGTTGATCCT +TGTACTTTGTATAATGGATCATTCCAACCTGCACCAAAAGTTGAAGTCATAAAATCTGTA +AATGCTTTTAATGGAAAAGCTGCAATTAATAAAAATACAGATCCAATAATATTCAATGGT +AATGTAAACATCATACCATCTTTTAACGCTTGGACACCTTTAAGATTAACGAATTTCATA +ATTACTGGAACTATTTTTTCGTTAAGTGTTTGATTAAATGACATAATTATTCCTCCTTTG +AATTAAAAATATAATATAAATAATTACAAATATAAATTTGCTATTATTGCCTTATTGAAT +TTTTATTACTTTAAATGTTTTTAAACTTAATATTATCTAAGTACTATAGCGTCTAGATTC +AATATATATGCTTAAAGTACTATTTCTTAATTAAACTTAATGCTAAATCTAATACTTTAG +CTCCATTCATCATTCCATAATCAACCATAGGTATCACTTCTACTGGAACTCCTTTTGGTT +CACAAAGACTTTTTGCTTTTCCTAAAGTATATCCAACTTGTGGTCCTAAAAGTGCAACAT +CCATGCTATCTAGATGTCTATCCATTTGTGATTCTGGGAATGCTATAATTTCTGCATCTA +TCCCCTTAGCCTTAGCTGCTTCCTTCATCTTATTAACTAAAACACTTGTTGACATCCCTG +ATGCGCAGAATAATTTAATTGTAATCATAATTGTTCCTCCTTAAAATCTTTCTATAAATA +AAATAATGTATTAAATACTTAAGTACGTTTACATCAACGCACTAGTAATATGTTGATAAG +CAAATAATATATTTATTTATTAAGTAAAGTATTTACAACTTTTCTTAATTCTATAATTTG +TTCTATTAAAGTTTTCTCAGATATAGCTGTCATTAAATGATCTTGTGCATGAACAAACAA +AACTGATAATTCTATTTTTTCACCTGCGGCTTCTTTCTGAAGCATTTCTGTTTGTGCATC +GTGTGCCTTAGCTAAAGCATCATTAGCCAAACCCATCTCTTTTTCAGCTTCTTCATACTT +ACCTTCATTTGCCATTCCTAGCGCCATATAAGCATGATTTTTACAATCTCCTGCATTAAT +TATAATATTCATTATTGCCATTTCTAGTTGATCCATTTTTACTCCTCCAATTTGCGTGTT +ATTTTTTATGCAATTATATAGAGCAATAAATATGCCAAACACTATTAATGATATAAGTTT +TTATAATTTTCATCCCCAAACTTATATATATTTATAAAACAAAATGAATGAATATTATAT +ATTATAAGTATATACATTTATTTTAAAAAGATTATATATTTTATTTTCCAAGCACACAAT +ATTTATCAATATATGTAGTGTTTAAGCAAAGCATACAGTGTTTATATAAATATTTATAGT +GTTTGAAAAAATAATATCTGGTGTTTCATTTTTTCAAACACTGAACCTTTTATTATAAAA +CAATATTCGATACTAAACCCTTTATCCATTATCATATATGCATCTATTACTGCTGACTTA +TTTTTCACTAGCCAACAATCTTCTGATATAGTCTCGTAAATCAATTTACAGAATCGAAAA +TTAACATATAAAAAGAGAAAGTCAAATGATTCCTTCATCTAAACTTTCTCTTAAAAACAT +GTTCTAAACTCGCTACTTTAATTTTTATATTTCATTATACCCAAGCACCACTTGCATCTA +CTGTATAACCGTCAATAACTGTATCACTAGCCATAAATCCATCACTATATAAATAGTACC +ATTTATCATCTACTTTCTGCCAACCTATTTGCATTTGGCCAAATGAAGCCAATAAGTACC +ACTTACCATTTACTAGCTGCCAGCCTGTTTGCATTGCACCAGTTTTAGCTAGCAAATACC +AATTCCCCTTTACTTGTTGCCAGCCCGTTTGCGCTGTACCAGTTTCATTAGCTAAATACC +AAGTACCATTTATTTGTTGCCAACCCGTTTTAGGTTGTCCATTGCTATCGAGCAATTTTA +CAGTCCCATCAGCACTTGTGTTTAAAATTGCTGTAGTTACACCTGTTGTAACTGAATTTT +CAGTTGTTGTGCTCGTTGTTGTAGTTGACCCACTGTTCGATGAACTACTATTTGATGATG +TACTTGAACTACTACTTGATGAATTTCTCGCAATAGAAGACGTAGTACTTTCAAATATCT +TACCATTATAAGTAACTAATAATTTAATATAATATCCTTTATCACTGCCAATTAACCTAT +AAGTTTTATCAGTTCCCACTAATGCACCGTTAGTGATACTATCATCTTTATTAAATAACC +TATACCAAGCATATGTTACACCTTGAGATGGAACAAACTCTGTAGAATCTGCCTTTAATA +ATTCAGCAGTTAGAGTTCCACCAACCCTTTCAGTTCCATTAATTGCAACTCTTGAAGGCA +TTACTACTTCAGCTGTTCCACCTGTTGATCCTCCTGCATTTGTATTTCCTCCTGTTTGTT +CTCCACCAGTATTATTGCTACCTGAATTTGAAGAACTTACCCCTTTAATATCATCAAAAT +ACATTACTGAGTTAACAGTATCTGTTCCAATTGTATTGCACCATATTCCCATCTTCTCTA +TATGAGCTTGGTCAAATGTTCCATTGTTTTTTCCTTTAAATTGGCTAAAAGGTATAGTTA +GAAGTTTAGCTTCAGTTGTTGCTGCAAAATCCGGCATCCAAACTTCAAAATCTTCACCGT +TTGATGTAAGTTGTATAACGAGTTTTTGACCTTTGCCATCTGGCTTAACCCAAATTTGTA +ATGCATCGCAACCAGACCAATCCGCATCTAATGATTGTGTAATACCAGTCCATCCGTTTG +GAGCAATCTTATAATTAAACGCAAGTCCATAATCGCCACTATTTTTATTATTTGTATCTA +ACTTTGGTGTAACACTGCAGCCTGATCCAACATTAGTAGACCAAACGCCTTGAAGCAATG +CATCTTCTCCTGAATATGATTCAAAATTATCTACCAATTTAGGATCTTTCTCTGCCTCTT +TTATATTAAATATAGCTTTTATTGTATCTAATTTATTATCATCTGAATATAAATCAATAG +ACCCAACAGCTTGTCCTATTTTATCGAGAATAGTTTGTGAAATTGCAGCTGAATATATAC +CATTTTCATCTTTTGTAGCGTTAAGAGTTTCAATAGCATCTCCTGCTTTATTTTTCAATA +CAAACTTAACTTGTCCATCATATTCTTTTACGCTTGCTGTTACTATAGTTGGTTCAAGAA +TACGGCTGCTTGAAACTGGTCCTGTTATGAATCCATAAGAATATGCTGACTCTTTAGTTA +TTGCAGAACTAGAGTAATCTTCCACTCCATCAGCAAATATTGATTTATCTTCATTATAGT +AATTTATAAAGTTATTAATCATTTCATGACCTTTTGTATCACTTACCATATATGGAGCAA +AGAAACCATCCGTTTCACCAAAATTCGCCCAAGTCATATAGTACGGCATATCCGAACTTG +ATATTATATCAGCTACATGCTTAAACCAATCTTTATCGGCATTTCCTGATAGAGCCATAA +AACCATTATTTTCATTGTTACGCAATCCTACTTCAGATACTGCTGAGAGTTTACCTTTAT +TCTTTGCTATATTTTGAACTAGGCTAATAGTGTCTTTGAAACTCGCCATCCATGGATCTG +TAGATGCATCTTCAGTAGGATTATCATCATAATAATCAAAAGCCAAGATATCAACAAATT +CATCACCTGGATAACGTGATAAGTAATCTTCCTCATTTTCAAACGGCCCATTTGGTGAAT +ATACATATAAGAAATTATGAACTTTATCAGTATCCCTTAAATATTCAACTGTATAACGAT +ACATATTCTTGTACGCTTCCTCGTCACAATATGCTTTACCCCACCAGAACCAGCTTCCAT +TATTCTCATGGAATGGCCTGAATATCACCGGAACTCCAGCATTCTCTAGTTTATGTGCAT +AAGTTGCAAGCATATCTAAATACCCAACATATACATCATTCAAATCTCCACCTGGCATAA +TCCTTGACACTATATTACCAGTTGTTGTTCCTGGAGTATATCCTGAATAATCATAATTTC +CGTTTTTATCTTTTCCTTTTTCTTTAACTAATTCAAAATTTGGCATATGAGCTGACAAAG +TTATTATACCGCCTTCATTTGACGCCTCTATGCTTAAATCTGCTGCTTTTGAAATCAAAT +CCTTTCCAGCAGCTTTCTCTTCATCAGTTAATGATAATTCGGATCCCGTTAATGACAGGG +CATCAATTCCAACAATCGCAGAAATAGAACTTGTTATATCCTTTGTGTCTGAATTAGTTG +GACCATTTCCAGCCTTATGATGTGTATCATTTTGATGTCCATATAAAACATAATCCGATT +TACCTACACCAGTTAAATATGAGTATAAATTTGCTGTTTTTGATGTTGCACTCGAGTCAA +CTAACTTTACTTTTGAAGGCATTAAATCGCTTACATCAATTATATTTTGTGTTGTAGTAG +TTGTTGTCTTTTCAACATATTTATCTGCTACTTGTTCTTGACTAAGTTTAATATTATCTA +TATATATATCACCATTATAGTTAGTGCTTGCTCCTATAATTCCAATAGTAATTGAACTGG +CATCTACATCCTTAGAAGTAAACTTAAGAGTTACCTGTACTTTCTTTAATCCAGTATTAC +CAACATCTACCGAATTATTTAAGTCAATTGAGCCATAGGTATTTAAGCTATCTGTAATGA +ATAATTTTGTTTGAAATCCTCCAGTAGTCATCTTACTTGGGTTATAAATAAAATCATATG +TTAATATATTATATCCTTTAAATGAAGTTGCTGCACCCAAATCTTTGTTAATTTTAAATT +CACTCCAGCCACTTGCAGAATCCTTAGAATAATCAAGGGAAAGCTTCAAAGCCCCACTGC +CAATCGTAGTGTTATCATAATTTACAACGTTATCAGAAGATCCATGATAATCATAGGCTC +CTCCATAAAACCATCCATCACTATCATTCTCAGAATTATCAAAATTCCATTTTAAATCAG +AAACACTCGGAAGTGGTGCCGTATCACCTGAAGCTTGGTTGTTTACAATTTTAATGTTAT +CTATAAATAGCTTACCATTGTAATTGCATTGATTTCCTACAATTTTAATATAGAAATTCG +ATAATCCCACAGTATCTTTGTCAGTTTTTATAGATACAGTTTTTCTTATATAACCATTAA +CAGCGCTTTCATTTTTGAAACCAGTTGAATCTACCGATCCCGAACCAATACCTGCATCCT +TCCAATTGGAGTCTTGCAATGTGCTTTCCAAGTGAATCTCGCCAGTAAAATTAGCATTTG +TTTGTGGAATTATAATATCAAATTGAATCTCAGAACCTGCTTTAATTTTTTCTGAGCTTT +TAGCAATAAAATCAAGACAAGTTTCAGAATTCCATGTACTTACTCCGTCATAATTTGCAT +TAAATCCTAACGCATTATTTCCATTTATATCTTCTATTGCTACACTTGATATATCTGCCT +TCCAATTAACTTGATCCGGCAACTTCCCATCTTCAAAACCATTATAATAAACAGCAGAAG +GGGTTGTACTAATTTCAGACAGCGCTGCGACTGTGTTAGTCTCATTAGTAGCTGCGGTAA +CTGCTGCACTACTTGTACTGCTATTTAAATTTGTTGCATCGTTACTCCCCCCCTCCATAG +TAACTGTTGTACTAGAGATGCCTGTACTATTTTCTAAAACTGTTGCATTATTATTTTCCG +CTATTCCTATTGCTGAAACCGGTAAGATAGTAGTTGAAGTTGAGATTACCATAACTGTCG +CTAATAGTTTTTTAATTTTTTTTGTTTTCATTAATTTTCCTCCTAATACCAAGTTATCGT +TTAATTAACATTGCAAACTTTTAAAATTCACTTTTCTGATTTAGAGAATTCTTATTAGTT +ATTTATTTATTTTTTAATTGTAAATCTTTGGTTAAACTTTATCAGTTAAAGCATAATTGA +TACTATTACTTATTTGATTAATTTGCCTTGTTTATATTTTACTTTAACGATTAATTAAAG +CATTTACATTATATAATAATTTAATAAGCTTTTCAATATATGGAAAAATGTTTTTATATA +TGAAAAAAAACTCTTTATTATACATAAAACTTATCTTGTTAAAAATTCATTACCAGCAAG +ACAAGCTTTTTCCATAACACATAATTGTTTAATAGTAATAAGAAAATATCTTTCTCTTAA +CTACTTAAAATGAATTCTTTCTAAAGAAATAACCATGAATAATCAAAAAATTATATTTCT +AACTATCCATGGTCCTCATTTATAATAATTTTTAATTGAAATATTTTAGAAAGCTATTTG +CATTACTCACCTTATTTAGATAATATTCCCCTTAAATATTGAGTCAACTCTTTCGCTGCT +TTCTGATGCGATAGAAATCCAGGATGTTGTCTTGATCCTACTGTTTCTTCTGTTGTATTT +GGAAGCTGGAATACAGAAACCTTTTTATCACCAGTCTCTTTAATATATGCATCCACAGCA +CGATAAATAGACGGAAGCATTGGAATTCCTAACATCCCATATACCCATACTATATTAGCC +TTTTTATTATATTTTCTAAGTTTAACTAGAAAGTTATAAGCCGCTTTTTCAAAAGCTTCT +AAATCTTCCTCATTAAATGTGCCATCCTCATTTAATCTTTGTTTATGAGTCTCTCCTGTA +ACTTCATCTCTCCACTCAGGTGAAGTAAAAGCTCCTTCATCATTTGTTCCAAGGTTAACT +ACTACAACATCTGGCTGCCAAGATTCAAAGTGGTTATCTTCGAAAGCTCCTAGAGATTCA +TTTTTATCACCAGTAAGAAGTCCGCACACCTTTTCATAATACTCTGGAATATTATAATGA +GGATTATTATCCCAACTTGTAAGTATCCCCCATCCACTTTGAGAAATAATTCTATGTTCT +GCATTTAAGGCTTCTGCAGTCATTGTGCTATAATTATCTATCGCACTAAACCACATAGAA +ATCCAATCTTCTTCTTTCTTAGCCCCTATTACTCCTTCTCCTGATGTAATACTATCTCCA +ATAAATTCAATCTTGTACGTTTTTTCTTCTATAGGTAGACATTCCCCATCAAATTTCACA +GCATGTATTTGCATATAACTTCCTGGGTCGCCATTCATAGCCTGAACATCTCTTATAATT +TTAACATTTTTTATTTCATGCTCTGTCATCCCTCTAAATATACAAACCCAGTACCTTCCG +GCAATTAACATTTGTCTGCTTACAGTAGCGGAATTAACAACTATACTAATCCAAGGCTCA +TACATATCATAGTCCACTTCTACTTCAACCCAAAGTTCAGAGCCTTTCGCATTTAGCTCA +ATACCACTTCCTGTCCAAAATACTGTTAGTGGTGAAAGACATCCAGTCGTTCTCCCATGA +ACTTTTAAATTTTCAATATCTGATAATGGGTATATTTTTAGTTTTTCATTTTCTCTCATT +ATTTTCCCTCCCACAAGTTTTCAATATTCAAACTATATTTTTATTTAATCCTCATTCTAA +TTTAAATTATATTTATTTTTTACTCACTCTCCCTGGCTACTAATCACTTTTTGTTTCTAG +TAGGCCCTCTTCATTACTTTTATATTACATCTGTAATTTCTTTAATTTATAGAACTCACC +TTTTTTCTCCATCAGTTCATCATAAGTCCCAAACTCTTCTAATCCACCATTTCCTACAAC +TGCAATCTTATCTGCATTTCTAATTGTAGATAGCCTATGTGCAACTACTAAGGTTGTTCT +ATCTTTCACTAAGCTTTCAATTGATTCTTGTATCTTTTTTTCCGAAACGCTATCTAAAGC +AGATGTTGCTTCATCAAGAACCAATATCTTCGGATTTCTCACAAAAGCTCTTGCAATTGA +TATTCTCTGACGTTGTCCACCAGAGAGATTACTTCCATGTTCAGTAATTATGGTATCTAA +TCCATTTGGGAGTGATTCTATTAACTCCTCCAAATTTGCCGCTTTAATTATTTTACTTAA +CTCTTCATCAGAAATGTTTTTTATTCCATAAAGGATATTTTCCCGAATAGTATCAGAAAA +TAATATTGCATTTTGTGGTACAACCGCAATCTTGGAACGATAGCTTTGAAGATTAATATT +TTTAATATCCTGATTATCAATCAGTACTTGTCCGTCTGTGGCTTGGAAAAATCCTATTAC +TAGATTTAGAATTGTCGATTTCCCGGCTCCTGAAGTGCCTACAAACGCTACTGTTTCTCC +TGGATTTATAGAGAAATTTAGATTTTTTAAAATTGGACCAGTACTATCTTTGTACTGAAA +TTCCACATCCTTAAATGTAATTTTCCCTTCTACATCCTTAATTTTCTTTTTCTTACGGTT +ATTTTCTACATCGTCAGAAAGTAATATATCTCCAATCGAATCCACAGATTCCAGTCCTTT +TGCAATAGTCGGCAATAGTGTAATAATACCCGAAATCTGTGCTACTATAGACCCAAAATA +AGTTTGATATAATACAACTTCACCAATTGTAATGTTTCCTTTTAATGCTATATAACCTGT +AAAACCAAGACAAATTATTTGGAACACTTGAAACGAAACCCAGCTTATGGATGAGAAATA +TGCCTGAATAAGATCTAACTTCAAACCCTTTTTAGCAACATTCACCAACTGACTATCTAT +TTTCTTTGTTTCTTGCTTTTCTAACGCATGAGCTCTGGTAACAGGAATTAATTCCACCAT +TTCCATTACACGAACTGAAGTCTCTTCCATCTCTTTCCTATAGTCTGTATTATACCTTTT +TATCTTTCCTCCAAAAGCAACTATAATAAGTACTGCTACAGGTATAGTTGCAAGAAAAAA +CACAAACACTGTTAAACTCTTAAAAATAACAACTCCTAAAGCTGCTACAATATTTAATAC +AATACCTAATATTGTTATAAAAACTTGAGATGATAGATTCTCAATCTGTTCAACATCACG +CATAATCTTTGACTGTAATCTTCCTGATTGCATTTGATTATGATATGCAATAGAAAGCTG +CTGAAGTCTTTTTACAAGAGTACTTCTGAGTTCTCTTTCTACACTTCTAATAGTTTTTGC +ATATAGAACTGTATGTACATAATTTGTTGGTATATTTTGCAGTACCATAACAAGAATTAT +TATAATATTAATTACAATAATATGAACTGCATTATCACCTTTATCGGTTGCTGCATTGAT +AATATTCGCGGATACAACTGGCAGAACCCATACAGGAGAACTTTTTAATAGGAAAAATAT +AATTGAAAGAAATAACTTTAAGTAATGTCCTTTATATATACCAACAAGAGTTTTTAAAGT +ACTATTTTCATTCTGTTTAAAAATTTTCAAAAGAGCTTCTTCGCCTGGTTCTAACTTGTC +ATTTGTTTTATCTTCTTCACCTATCACTTTTCCACCACCATATCATAATACTTCTTTATA +ACTTTTTCTGCTGGTTTTCCATATACATCATAACCTTTATAATTTTCAGCTCTATTTAGA +GAATATTGATTGCACGCCCAATCCCACATAGCAAAACCAGATACCCATTCACGTTTAAGC +GAAGATTCAAACATAGCCTTGTACCATTCTGCTTGTTCATTTAAATCTATATCACCTTCA +AGGCTCCAATCGTTTGGAACTTCTGAAGAACCTTTAGTGGACATACAACCAGCTTCAGCA +AAGAAAAATGGCTTATCGAAATTTTTGACTACTTTTTCGATTCGATCTAACTCTTTTTCC +CAATCCTTAAGTGGATAATAACCACTTGAAGATATAACATCAACACAATCCCACCATTTT +ACATTATGCTCTTGATATTTATCAGTATTATAAGATACAGCTCCATGATAAACATCTCTA +ATATCAGCAATTAGTTTACGCCATTCATTTTCTCTACGCTCCGACATAACCATTTCACAG +CCTGCAATAAACATTTCACAACCTGTTTTTTCTGCTATACGAGCATAATGAAGTTGGAAA +TCCGTGTAAGAAGCAAACCATTCACTCCATTTTGGCTCACAATGAACGTCTTCATCAAAG +AAATTTATATGAGCCCTCCAAGTTCCATTTTTACAATTTACAGTCGGCTTAATCGCTATT +CTAAGACCAATATGCTTTGCATAATTTATGATATTTTCTAACTCTGTATCTGAAATAGTA +GAATTAGAACTATAACAAATCTCTTCTGATTGAGGAGTATCCTGAAGACCATTTGGTACC +AATATTATAAAATTAATTCCAACCCTTTCTCTTAAATTATCTAAACTCTTATATGTTTCT +TTTTTTAAATAGCTCCCTTTCCTCGCAAAAGGCGCAAAAGTAAACCCTTTAATAAATTTC +ATAAATCCCATTCCTTTCTTTAAATTAAGCGCCATGAAGAAAATAACAGATCAAAATGTA +ATGTATATTTTCTCAAAACAAATCAACTCTTTTGCTGTCATTTAACAAAAGATTTATAAA +ACAAAAGTTCTAATTCTTGTACGAAGGTTTTATATACGCTAGGCAGAATAAGGCAAATCT +TGTTTGTAGCAGATTTTATATAAAAAATAACCGACGTAATATGATATTAAATAAACAAAA +ACATGCTGATCTATTATTTTTTTCATTGCGCCTCCGCTTACATATACTATATCAATTTCA +TTTTTCATCTGATATAATACAATTGTTTAAAAATAGAACAAATCTATTTTACGGAGGCTA +TTATGAATTATTTATTTGAAGAAATTAATGTTTTAACCTCACCCTATGAAGCCTTTTTAG +CTGGCACTAAACATGGTGGCTTCCCAATAAAGGCTCACTGGCATTATTATATGGAAATTA +TTTTTATCATTAAAGGATCTTCATTAATAAATTGTAATAATAAGGAATATGTCCTCGAGC +CTGGAGATTTAATTTTATTTCATCCACAAGCGGTGCACTCTATTTTTTCAGCTTCAAGCG +AACCTTTAAAGTATGGTGTACTTAAATTTGATATAAATAATCTTCATATAAATAGTAACT +ATACTCCAAAACTTGGCTCAATATTCAAATGCGCTATAGACGATCCTTATGCACCTATAT +TACTTCCATCAAAATTATTTAAAAATTCACCAGAACGTTTATTTAATAGCTGCATTGAAG +AAGTAGTAATTAAAGATTATGGTTATGACATTCGCTTTCAGTCTTTAGTTTCATCTCTAC +TGATAGAAATTCTTAGAATATGGAGAAAAACTGGATTTGATACGAATAACATCACTTTAA +TGCCCTATGATACTGATTCGTTACATACAATATTACAATATATAGATGAGCATTCACATG +AATCAATAAAAGTAGAAGACCTTGCTGAAAAATGCCATATGAGTTATTCGTATTTTGCAA +AAAAATTTCACGAATTGTATGGTCAATCCTGTAAGGACTATATTGAATTTATAAGATTGA +GCAAAGTTAAAGATTTATTATTATTTACTAATTTTGATTTAAATTATATTAGTCAAGAGA +CGGGATTTGCGGACTGCAGTCATTTAATTAGAACGTTCAAAAAAAAGACAGGTATAACAC +CAAAACAGTTTAGAATGCTGCACATTATAGATAAATAACATGTAACTTATAGTTATGCGT +ATAACTAGCAAAAAAAGAAAACCCCCTGTTACTAGGCTGCCAGAAAAATTAATTGTTAAT +TCATTCCAATAAAAGTTGTGTCAACCATGAATGCTCCCGGTGCAAGCTCGTGACAAGCGG +CGATAGAACAACTTTTATTGAAGAAATACATACAATTATTTTTTCAACACAACACTTCGT +AAAAGGGCACCTTCTCTTTCTAGTATAGTATATATATCAAAGCATAAGTATGATTTTTAA +CTAGCTTATCTACAGTTTCTATAAATTACTACTTAAGTCTAGTAAATGTTAGAACTTGTA +TATCTGTAACCCAAAATTCCTACAAGCTAGATTTCAGTTTAGCTAGATCTTCTAAATTTA +TAACATATTCATCGTTGTAGAATTTCATCAATTCTTCTTTGGTATTCCATTCTAATTCTT +TAACAAAGTTATTCCAAGTCATAAACCATAGCCAATGAGTTTCTGTTTCCTTAAGTATTT +TAGGATCTGGAATCGGTCCATTTTCACTTAGAGCTACTGGTTTTACACTATTTGGAATAG +CTTCTGTATTTAGAAATTCAATTGTTAGTGGGCCATGATTTTCAAGAGGAGCATAATAAT +CATTACTGTTAATATCTACTACATCATCTCCCGGATACCACTCTACATTGGGCGAGTTCC +ACACCCAAATTAAATTATTTAATTCATGATAACTTACATATCTATCATACATCAACCTAT +ATAGCTTTATGTAGGCTTGGGGACCTTTATCTCCCCACCAAAACCATCCTCCAGAAGCTT +CATGCAACGGTCTCCATAAAATCGGAATATTGTTGTTTTTAAATATCTTTAACTGTTCAG +CTATTGTATCAAGATCTCTTATCATATTAATATTTTCTTCAGTACCTTCAACTAGTGCTT +TTTCTAAATCAAAGTTAGTATTTTCAGTATAAAAGCTCTTCCCTCTTCCATTCATTGGAG +AGAACCAGTGCCAGCATAAAGTTATAATTGCATCTGTATTTTTTCCCCAAAATAGAGCTG +TTTCTATAGTCCCCTTATTATTAGCTAACTCATCTATACACTCAAAATTACTATCAGGTG +TTTCAGTTGCTAGTGAGTAACTTAGTAAATCAAAGCCGACTATAGCTGGAGTCTTTCCTG +TCATTCTCTTTATATATTCAAAATCTGTTCCACTTGCCTTATTGCAATGTTGACCCGATA +AAATTCCTTTACCATATATTTTAGAGAATATTTTCATTAATTCTATGCATTCTTTACTTG +CATCTTTATTACTTAATACAAAATTGGGGTTATCCAAAATACAATTAGTACCCTCCGTAG +CTATTATGCTATCAATATTCAAATATCCAATTTGCTTTGATATTTTAATCTCATTAACTC +CACTTTTTAATTTTAAGCTTCCGATTTCCTTGATAGCAAATTCACTAGCTATATCAAATA +CTACACTTCCATAGCATTCATCGTTAATAAAAATTCTGTGTACAGATTGTTTCTGAAATG +CTGATTCAGAGATTTCTTCAGGAACCGATAATTCGGCTTTTGAAGGTAGAGCATACTTAA +TTCCTAATCTGTAAAATCCATTATGTGATAAATTAATTTCAGCAGTTATATAATCATCAT +CATTTATTAATGCAATTAAGTATCCTGAACCGCTATATCCTTTAATTATATTATTTGTTT +TACCATTTATATGCTTAATCTCTGTAGAATCTATTATTATAAGGTTATTTATCATAGATT +TTCTCCCTAATTCATAAATAATTAGTCTTTTAAAATTTCAGCGCTACATCACTGCCTTAA +GTGTCCATGACCTAGATGAAAAATCCCCTATAACTCCTATAGTATCAATTCCAGAGTACA +TTAACTCATCGCCACCTGCCTCAATATCAGTACCTTCAATTATATAAGTATATTCTGGAT +TTAGTCCCTTAAATCTTATTTTTTTCGGAAATCTATTAGGTACTTCCAAGTCTTTAAAAT +AGAATGCAACCGCTTCAGTCTTATCTTCTGTAACATAAATCCAAGCAGTTTCATTCCCCT +CAAACGGGCTTAGAATTCTATAAAATTCTCCGTATTGAATAATATGACGTATTTCTTTAT +AGCGAGCTACTTGAGATTTAACAATTTCTTTTTCCTCGTCTGAAAACTTTGTTAAATCAA +GTTCGTACCCAAAATTTCCCGACATAGCCACATTACCTCTAGTTTCCAAGCTTGTTGTCC +TTCCAACTTGGTGATTAGGACATGCTGATATATGAGATCCCATTGAACTTGCTGGGTATA +CTAAACTTGTTCCATATTGAATTTTTAATCTTTCTATCGCATCTGTGTCATCACTAGTCC +AAGTTTGCGGCATATAATATAGCATTCCTGGATCAAATCTTCCACCTCCAGAGGCACAAC +TTTCAAATAGCACTTCTGAGAACTTACTTGTTATGGTCTCAAGAATATAATAAAGCCCTA +GTACATATCTATGAGATATTTCCATTTGCTTGTTACTAGAAAGTCTGGCTGACCCTAAAT +CAGATATCCCCCTATTCATATCCCACTTAACATAAGATATAGGCGCACTTTCTAATATAT +TAGAAAGCATTTTTATAACTTCATCACATACATCCTCTCTACTTAAATCAAGAACAAGTT +GAGATCTCTGAGCTCTTGATTGTTTTGGTTCTCTATTTGGTACATGAATACACCAATCCG +GATGCGCACGATATAAATCACTATCAGGAGATACCATTTCTGGTTCAAACCACAATCCAA +AACGGATCCCTTTATCAGTAATCTTATTTACAAGACTGTCTAACCCATCTGGTAATTTTC +TCTTATCTACTACCCAATCACCCAAAGAGCAGTTATCCGAGTCTCTCTTCCCAAACCATC +CATCATCTAATACAAAAAGTTCCATTCCAAATTCTTTCCCTGCCATAGCAATTTCTTCAA +TCTTCTCAGTAGTAAAATCAAAATAGGTAGCCTCCCAATTATTGATAAGTATTGGTCTTT +CTTCAAACTGATATTTACCACGACATAATCTTTCACGATATAGTTTATGATATGTTCTCG +ACATTTCACCTAATCCAGCATCAGAATATACCATAACAACTTCCGGCGTTTCAAAAATAT +CTCCCGCAGATAATTCCCAATTAAAATCAAATGGATTTATTCCCATCTGTACTCTTGTGC +AGCTAAATTGATCTACCTCTACTTCTGCTAAAAAGTTTCCACTATATACTAAGCTAAAAC +CATATACTTCACCATTATCTTCATTTGCATTCTTTGTTAAAAGAGCTAAGAAAGGATTCT +GCTGGTGACTGCTTATACCCTTCTTACTATCTATAGACTGCATCCCCGGTACCAATGCTC +TCTTATAAATATGCTTCTCACGACACCACGCACCTGATAATTGTAACAAATCAAAATTAG +AATGTTCTAAATCGATGCTAAAACTTAAAGCTTTTCGCAAATTTATCTTCTCTCCATTAG +ACTTGCTATTTACAAACTTAGCATATCTAGTTATAACATTATAACTATTGTAAATAGTAT +ACGATAAGATAACCGTAAGCCCCGCAATATCATCACTTAATTTTACTTCTAGTGTTTCAG +CTTCCTCTTTATTATTAACATAAGTTGCAGGTAAAGACTCCATTTTAGATTTACCAGGTA +TAATATCATGTTTCACGTATTTTAATTCGCTAGTACTAGCTCCATTCTCAAGAATTGCTT +CATAACTCGGCGTCCTAAAATCCCCTGAACCAAATGCCGAAAATTCTTGTGGTACTACAT +CTAAATTGGTTTCTTCAACACTAGATGAACCAATTAGTTCATAGCTATTAACTCTTTTCC +CATAATATCTATGAGCTAAATAGCCCTCATTGGTTATCTGCATCACATAACTAGTTTCGT +CATTGCATAAATGTAATGTTTTACTCTTTTCATTAAAACTAATAGGCATAAAATTTCTCC +TCAAATATACTCTAATTTCAAATAATTTTGTATAAAATCATTTACATATACTATACCAAC +TTCACTACTAAGACGATATAACACAACTTACTAAAAATAGAACAAATCTATTTTGAGAAT +TTACTAAAACTTTAAGTTAATTTTTACACTTTCAATAAGCCATATGTTATTTACGTATAA +AAATAAGGTATTACCTCCACTTCCGCAAGTAGCTGTAATACCTTATATTCTTTCTTTATG +AAACTAATATTCCTAAAATAGTTATTTCTTTAGCTATATCTTCTTTAACCACATCAATCT +CAATTTTATATCTCTTAACTTCATCATTCAATTCTAGTATTTCTGTTGCCGAATAATCCC +CCCAACCATCTTTAAAATAAGTATCTAAAAACGCCTTGCTCTTACCATTTACACTTATAT +TGATTTTCCCCGAGTTTATAGAAATGCTCTTTTTATATAGTAAGATAATGTTTCTCGCCT +CTAACTCAAAAACTAGTTTTCCATCTTTATTATCTTCTATAGTATATTTCCACCCATTTT +GAAACACTTGAAATCCTTCATCATAATTATCAAATCCTAACATTTCTTTTGGTTGAATAT +TAGCATTATTAATTATTATTCCATTTATATATCTATCTTCGAACACACAAGGAGTATTTA +AAACATTATCTGTTATTTTTATAATCTCTCCATCTTTATAATCATTTTCGTAGATATTAT +TAATAAAATTTATCAATAGTTTAGATATTATATAATGACCATCATCATTAGGATGAACTT +CATCAGTTAGTACATCAGCAAATTTCATTTTGTTTTTCACAACCTCACACTTTAGAGCAT +CTCTAAAGCTTATCATCGGTAAGTTATATCGCTTTCCTATTTCTATTTGTTGTTCTTGAA +CATTAAACCCTGTATCCATCGTCATAAATACTTCTACTACTGCCGGTTTATTTTCGCTAG +TTAATAATTTTCTAATTAAGCTTTCGTAAGCAACCTTACACGAATAATCTTCATGGTCAT +TAACTGCGGCATCAACAAAAACTATATCTGGATTTTGACTTATCACCTGCTTTTCTACTC +TGTGAACTCCTATTAAAGATCCAGTTGCTCCAACTCCTGCATTTATATACCTAACATTTA +CCCTATTGAATTTTTCTTTTAACCAAGCATAAGTTAATTCAACATAACATTTTTCCTGAA +CTGTTGAGTTACATCCCTGAGTTATAGATCCTCCTAAAAATGCTACTATTAAATCTTCTC +CCATCTCAGCTTTTCTGATTAACTTTAATATTCTACTTATATCCCCTTTATTAACTAATG +AACTTTTAAACATTTCTTCTGTTATTTTAAATCCCATGTGTCATCCTCCTAACAACTTTT +TATATAATTAGTTAATTTAATAGTATTTTAACTTAATCGTTTAAATAATAATTGAATAAT +TTTATTAATATAAGTATAATGATTTTTACAAACATAAACTTGCATTATTTCAGCAAATAA +TAACACTATTTTGATATTAAATTGAATTTATAATAAAGAGGTATTATTTATGAATAAAAA +TATTTTTAAAGAAACTATGTATTCTATTGATTCTATGTTCCCTTTATATAGTTCTGATAT +TAATATTGGGGATAAAACAAACCCGCTTAATTGCCACTGGCATGATGAAATTGAGTTTAT +TTTAATAACTTACGGAAAAGCCATATTTCAAATAGAAAATTCCTCCTACGAAGTAAACGA +AGGCGATATCATCATCATTGGATCTGGCGAACTTCATTCTGCTTACAGCCAAAGTTTTAG +TGATTCGTGTATTTGCAAATCATTGACTTTTAATTGTGATATGCTAAGCAGTAAAAGTAG +TGACTCGATTCAGATTAAATTTATTAATCCGTTAATAAATAATCAATTGAACTTGCCTCA +TCATTTAAAATGCATCAACGATAACGAAAGAATGATAAGATCATTTTTATTAGAATTAAT +GTCTACACTGAGTATTAGAGAAAGCAACTTTGAATTAACAGCTAAATCATATCTATATAT +GATTTTTTCAAAAATAATGCTTATGGTTACTCATAAGAATGCTAATGAGTTATTAAATGT +TAGTAATTCAAATAAGATAGATAACTTTAAACATGTTCTTAACTATATTCATATGAACCA +CAATAAATCCCTTACAATAAAAGAGCTATCTCTGCAACTTAATATGAGCGAAGGGCATTT +CTGTCGTATCTTTAAATCCCTAACTTTTAAAACTCCTATTGATTACATTAACTACTATAG +AACGACAAAAGCGCAGGAGTTTCTAATAAATAGCGATAAAAAAATCTTAGAAATCAGCAT +TGATGTTGGTTTCAATAATCTTAGCTACTTCATAAATATTTTCAAGAAGAACACTGGCTT +TACGCCATCTGAATTTAGAAAAAAAATACATCTTCGTGGAGCAGACATTGAAAAATGAGC +TACTATATATCTTATATAATATTTTTAAGACATATTGTAATTATTTACACAGTTGTGTTA +TTATTTATTATATAAGCTAAACGTTAATTCTTAATTTAATCAATCTTTTTTGCTAATACA +TACTTCCAATGCAAGCTACATCAAAATTTCATAATACTTAAATTTACGTATAATACTGTG +AATATATAGGGGGGACAAACAAATGATTAAAAAAATACTGAAACCAGAGAAAATTAAAGT +AAAAATCTTCAATTTTAAGCCAACAAATCTCAAATATATTAGCTTAAATTTTGTAAAAGA +AAAATTCAAAAATCTATCTATCCGCAAAAAGTTATTTTACTCTTTTATGTTAGTATCATT +TATCGGAATAATTTCCGGATTAATAGGTTTGACATTTATACAAAAAACAACCAGTGAATA +TAATTCTGCTTTAAAAAATTACGGTTTTTCTCAAGGTGACATAGGAAAACTTGGAATAGA +AATAGAAAAGTCTAATTCTTTAGTTAGAGATACTTTATTTTTAACTGATGCTAATGAACA +AAAGGATGCCAAAAATTCTCTTAATAAATCTTTAGATGAAATCGAAGAATTACTAAATAC +TGTTACAAAATCTGTTACTTCAAACGAAGAAAAGGAAATATTAAACAGAATAAAGATCAA +TCTAGCAGCATATAAACAAATCAGAACCACTGTTGTTGTAAAAGGTTTAGCAAACGATAA +AGAATCTGGGCTAAAAATCTTCAAATCTGACGGATCCATCTTGATGAATAAAATCAGCTC +TGATATTTCACTCTTATTGCAAACTAAAATAGATACATGTAATCTACTTTCTACCAAATT +AAATGTTTTAAAATTTGTTAGTATAATAATTGTTATTACAAGCATGATAATTTCTTTAAT +TCTTGGCATTTTCTTAGGCAAAAATATTATAAAATCGATCAGCAACCCTATTGATCATAT +GAAAAATGTGGCAAGCGAAATGGCTAATGGTAACCTTGAAGTTTCAATAGATATATCTTC +AAATGATGAAATAGGTGAATTAGCTTTATCTTTCTCTCAAATGATTAAAACTCTAAAAAA +ATATATCACTGAAATATCTACCGTTTTAGGCAGCATTTCTAATAGAAATTTTGATATCCA +TATTATAGAGGATTACAAAGGAAATTTTATTCAAATAAGGAGTTCACTGGACAATATAGT +AGCATCCCTAAGCAATGTATTCTTAGAGATTAAAGATGCAACTATGCAAGTCAATAGCGG +AGCTAGTCAAGTAGCTAGCACTTCGCAGATTATATCAGAAGGGGCAACCGAACAAGCTAA +TTCTATCGAAGAATTATCTGCCTCTATAGAAACGATTTACAACCAAGTTCAAAATACTGT +GACTACTGCGGATAATACTAATTTAATTACCATGAACTTAGTTAAAAGTATACAAAATAG +CAATTCCCAAATGAATCAAATGCTTTCTGCCATAAATGATATAGAAAGATCTTCTAAATA +CATCAGCAATATAATTAAGGCAATTAGCGATATAGCAACCAAGACGAATCTTCTTGCTTT +AAATGCTGCCATCGAAGCCGCACGAGCTGGTGAGGCCGGTAAAGGTTTTTCCGTAGTAGC +CGATGAAGTAAGAAAATTATCTTTCCAAAGTGCAGATGCAGCTAAACAAACATCATTACT +TATTACCGATTCTATCAAAGCTGTAAATAAAGGTAGAGATTTAGCCAATAGCACTGCTAA +AACCTTATTGGAAGTAGTTAATAGTGCGACTAATGTTACCGAATTAATATCAAATATTAC +ATCAGTCTCTAAGGCCCAGGCCAATTCAATCGATCAAATACATGATAGTATTTTAAAAAT +ATCTGATGTTGTTCAATCAAATTCAGCAATAGCTGAAGAAAGTGCAGCTTCTAGCGAAGA +ATTGACTGCCCAAGCTGAAACTTTAAATACAATGGTAGATAAATTTAAGCTTAAATCCTA +AATAAAAAATAAAACAGCCGATAGCTTTACTGAATGTAAAACTATCGGTTAAATAATATA +ATAAAACTTATATTCCTTTTTCACTTTTATTCATAAGAATACCGGTGCAAATGGGCTAAT +TTCTCTTATGAATTTTTCTCTAATTTTCTAACGGTATCTTTTATAACAATTTTTCCACTT +ATAACTCTTCTTGCCGGCTTATAATCGCTCTTATGCACTTTCTTTATTATAGCACCAACT +GCCGTTTCAGCCATTGTCTTCATATCTACTTCAAAAGTTGTGATCCCTAGGTCTGATAGT +TCTGTTATTAAATAGTTATCAAATCCAACTACAGAAACATCTTCCGGAACCCTATATCCT +TTATCCTGTAATTTCTTAATCAAAATATATGCTACTGAATCACAATTACATGCAAAAGCA +GTTGGCATTTTCTCTGGCAACTCAAAATTTATTTCTTTCTTGCCAACTTCTCTATCTGAA +ATATAACTATTTGAAGAATAATCAATACCATTTTCAAGTAAAGCCTTTGTAAATCCTAAA +AAACGATCTTGAATGCTGCTTGTAGCACGTGGATTTCCCACAAACGCAATATCCCTATGA +CCCATTTCAATTAAGTAATCAGTTAATAAATACATCCCATAATAATTATCCGAAGTTACA +GTATCATAGTTACTATTTTTATCGTATGTATCTAACATCACTACAGGAACTTCATTAAGA +TCATTTAGATATTCTATATAATCTTTTTTCAATTCTCCCATAACTATAATTCCATCTATT +TTACTATCTTGTATCATTCTTGGAATTACTTTATTATTCTCATCATCTTTGCTTATAATC +TCCATAATCCCATAATATGATTTTTTAGTTAATGCATTTAATACATTTTGATACAACTCC +CAATAAAAGGAATGATACATTTCAACAAATCCTTCTGGAATTAGCACCCCTACATTATAG +GTATGACCATCTCTTAGAGATCGCGCAATAGAATTTTGACGATATCCCATTTCAGCCGCA +ACTTTCTTAATTTTTTCCCTTAATTCATCACTAACTCCCTCTTTGTCAGATAGTGCTTTA +GAAACTGTAACTTTACTTACATTTAAAGCTTCAGCAATGTCAGCAAGTTTAATAGCTTTT +GCCATATTATTCATTCCCTTCTTGTTGTACGTATAACTAACTATCATAGATGCTGCTTGT +TTACTAACTATGATAACATACGGAATTATCAAAATATTTCAATGTTGTTGTTAAACTAAT +CCACTTAATAAATTTTATAACTTTACACATCTGCATTTAATATATTATAGAATGATGGGT +ATTATTTAATATTTTCATATGATAAAAAAGTTTTTAACTATAATTATTGTCTATAGTCAA +AATTTTATCCTATTCTGTTTAAAAAATTTTCTGCTCACATTCAATTCTTTATAAATAAAC +GGTTAGTTTTCATTGTTCTCTAATATTATACAATAAAAAGCTGTATTTCGCTATTAGTAT +ATACTCCAAAAGAGTTTTAATTTTAGCTAAAATACAGCTTAATAATTTTATTTTTTTATA +TTCTTACTTCACTTTTTAAAGTTAACGTTTAAGCGGTTTATAAAAAAAATTATAATTTCC +CCTCTTGCCCGTCTATCGTTCCAAGTTCATTATGTTCTTTAATAAACTTAACAGTTTCAT +CAATAGTAGTATACGCCAAACCTACATACGTATCCGCAGCACCATAGTAAATAGCAATTC +TTCCTGTTTCAGCATCAGTTAACGCCGCACATGGGAATACTACATTGGCTACAAATCCAC +GTTCTTCATACCATTCTTCTGGTGTTAACATTATACTTCCAGCCCTATATAACACTTTCG +ATGGACAATTCTTATCTAATATTGCTGCACTCATAGAGTAAACCAAACCGTTGCATGTTC +CAGTTACACCATGATAGAACATTAACCATCCTTCATCTGTTTCAATTGGTGCAGGTCCAC +ATCCAATTTTAATTGACTGCCACCATCCTGATCCACCTTTTTGCATTACTATTCTATGTT +CTCCCCAATATTTTAAATCAGGACTTTTGCTTAAGAATACATCCCCAAATGGAGTATGTC +CATTATCACTTGGTCTAGATAACATAACATATTTTCCATCAATTTTCTTAGGAAATAAAA +CTCCATTACGGTTAAATGGCAAGAATGGATTTTCTAGCCTTATAAATGTCTTAAAATCAG +TTGTTTTAGCTATTCCAATTGCGGCACCATCAAAATCCCCACACCAAATAATATAATACG +CGTCCTCGATTTTTAGTAATCTCGGGTCATATGCATAAAGTGGTTGATAATCACTTCCTT +CTTCATCTACAAATTTTATTTTTTCTTTTTCAAACTCCCAATTAATTGCGTCTTTACTAT +ACCCTAAATAAATATGTGGTCTTCCATTTATAGTTTCGCCACGGAACACTCCTATAAATT +CATCCCCGTAAGGCATAACTGCACTATTAAATATTCTAGCTACACCCTCTACAGGATTTC +TTTTAATAATTGGATTTTCAGTATGTCTCCATATTGGACCATTAAAGTTAGCTGGCTTCT +CTTGCCATGGAATATTAGGTAGATTTTCGCCTAAAATTTTACTCATATTAATTCACCTTT +CTTAAGAAATAGCCAATCTTATGTATTATTATATAATTCTAAAGAATACACAATTTAAAT +AGCATTTCTTTCTATATTTATTATTATTTATTAGCTTTGTTATAAATATAATTAACTTAT +TCTTTTACTGACCCCGATACTAATCCACTATAAATATGCTTTTGTAAAGTAACAAATACA +AGAAACGTTGGAATCATAATCAGAACAATACCCGCAGATATTATTTCCCAATGGGCACCA +AATGGTCCTTTAAACTTAAATAACGTTGTTGATAACGTTAATAAGTTTGGGCTAGGTGTA +TATAAAAATGGTGTGTAGAAATCATTATAAACACCAACACCTTTTGTGATCATAACTGTT +ACTATTGCTGGGGAAAGTAGCGGAAGAATAATTTTATAATAAATTTGAAAATAGTTTGCA +CCGTCAATAATCGCAGATTCATCTAACGAAACTGAAATATTCTCTAAAAATTGTAGGAAT +ATATATATAGTCATTATATCAGTACCGCAATATAGAATAATCGGAGCTAATCTAGTATTA +AATAAACCTAACGCATGTACAATCTGGAATGTTGCAACTTGTGTTGCTATCCCTGGGATC +ATTACAGCAACTAGGAACATACCATTTACAAATTTACTAAACTTTGTTTTAAATCTGCTA +AATATATATGCTGCCATAGATCCAGTAAGTATTGAACCTATTAATGAAAATACTAAAACA +AATAATGTATTTTTAAAACCATTTACCATATTACCTTTTTCAAAAACAGCTTTAAAATTA +TCTAGGTTAAGGAAAGACTTAGGTAATGCTAACGGGCTTGTAGTAGTATATTCAGCACCA +GTCTTAAACGCTGTAAAAAATACACTTAATATAGGAATTATAAATACTAGTACAAAAAAG +ATTAAGATGGCATATTTAATTACTTCATAAACAATCTCTTTTCCACTCATTTTTTTTATT +TTCATACTCTTACATTCCTTTCCTTTTCATAACTGCATTTTGTATACTAGTGATAATTAT +ACACATCACCAATAATACCATTGCCATTGCTGAAGCCAGTCCCACTCTATGATTTTGGAA +TGCATACTGTACTGTTTGAACTATGAAAGTCATACTTCCATTTCCACCATTGGTCATTAT +ATATGGTGTTTCAAACGCTGATAATGCTCCCGTAAGCGATAAAAATAATTGTAAACCTAG +AATTGTTGAAATACCTGGTATTATTATATATCTAAGTTGTTGGAATCTATTAGCTCCATC +TACTCTTGCCGCTTCTAATACATCTGATGAAATAGATGCCATTGCAGCCGAAAACATTAC +GATATTATACCCAATATATCTCCATACCGATACAGAAGCTAATGATATATTAACAATACT +CGGATCCTGTAACCAATATTTCACGAAACCACCTAAACCAACTGCCTTTAAAACAGTATC +TAAAGTACTATTAGGTTGGAAAAAGAATATAAAAACAAAACTAATTGCAACACTATTTAT +TAGCGATGGGAAGAAATATACACCTTTAAATAAATTTGAAAATTTACACCCAAATGAAAC +TAAGTATGCAATTGCAATACCTAATGCGATTTGAATAAATGATGCAACAAAATAATATAT +ACTTACAAATAACGGCTCAAAATATTTAGCATCACTCATTATAAGCTTATAATTTTTAAG +CCCTACAAATTCCTTAGTCTTAGAAATACCATCCCAGTCAGTAAAACTATATCCAGCCAT +TGCTATTGCCGGCAAATAAGTAAAGACCCCTAATAGCACTACTGGTACCATTAAAAACAG +GACTGAAATTATAATTTTTTGTTTGTTATAAGGCAATGAGGAGAAGCTAATTTTTTTCTT +CATATTGCTGGTGCTAGTTGCTGCATTTTTAGTATTTGCTGTGGCATTTTCAGCATTTGT +TGTCATAATAACACTCCTTTACTCTTAAATTAAAAAGGCGATATTTTATTTTATTTTATA +TCGCCTCTTTTCAAATTAATGACTACATTAAAATACTATTTACCAATTGAGTCTACTCCA +GTAGCCCAGCTCTTATTTAATGAAGCCATATATTCATCAAAAGTTTGTTTTCCATTTCCT +AAACCAATCTCGATTACTGTTTTAACCCATTTACTATCATTAAGGTTAAGTAATGATTCT +TTTTGAACTTTATCTAAATCAGTAGCCATTTGTGTAGTACCCATCTTTTGTTGAACTAAT +GTTGCTCCTGATCCCGCTAAGAAGTCTGGTAATTTTGCACCAACAAGAGAAGAAACCATA +TCCGAATCGTTTGGATATTTTTCTACGAAGTATTTTACGAATTCTTTTGCAATTTCTTTG +TTTTTACTATGAACATTTACACCCATCATATAATCTGGGGCAATTTGCATAACTTGTTTT +CCATCATGTGATATAGGTGATGCCATAAATTTAATGTCTTCAGGTGTTTTAGATTTAGCC +TTCATTTGACCAACTGCCCATGATCCTAAAGCCATTACACCTATTTTACCATCAGCCACC +ATTTGCTTTGAAGTCTCCCAATCTGTTGTCATCGGATCTTCTTCAACTAACTTATTAGCA +ACTGCATCATTTAATAACTTTAATGATGTATAAAGAGGTTGCCCTTGTGCAAATACATTT +TTGTCATAAATCATTTTATTTGGGTAGTCTACATCACCTGACGCGTTAACTAATTGCGCC +GCAGACCAGTTAGTTAAGGCCCAGTCATCTTTATAGTTTGTATACAACGGAACTGCAGAT +GTCTTTTCTTTAATGGCTTTTAAATCAGCTATAAATTCCTCTGGTGTTGTTGGAGTTTTT +GTAATTCCAGCTTCTTTAAAAACCTTTTCATTATAGAGAAAACCATTAGCGTTAGCACCA +GTTGCTAAACCATAAACAGTTCCATCTACATCTGCATTATCAAGGAAATCATATTTACCT +TTAAGTTCTTCTCTAGTTCCAATTGGTTCATAATAATCTTTATATTGATTTTTAGTTATG +TTAGCAGGCAGCATTAAAACATCACCATAATCTTCTGTTCCCATTCTAGTGTTCATTGTA +TTTTGATAATCGTTTAACGCTTCAAAATTAACTGTGGTTCCAGGATGTTTTGATTCAAAC +TCTTCTTTATACTTTTTAAAAGTTTCGTCCATGTCAGTTCTATGTGTTAATACTGTAATA +GTTCCACCTGATTTTCCATCACTTTTTTCTGATGTACTAGTACTACTGCTACCACAACCA +GTTAGCGATCCAACAGCTACCGCACCAGCTAATAATAATGCTAATTGTTTACGGATTTTC +ATTTTTCATACCCCTCTTCTTTTTTAAGTTAACGTTTAATCGTTTTTAGTATATATTCAA +ATTTAATAATTGTCAATATTGTTAACCATAATTATATTACTTTTTAATTAATTTAACTTA +AATTTTAATTATTTTTTTAATTTTTAATTAACTGATTTTTATTACTCTCTATCCTCACTA +TTCCTCTCCTTTTCACTTTTTAATATATATTTTTTTATTCCATTCCAAGCACAAAAACTG +CTCAAAAAACTGCAAAAAGAAAATGTGAACAAAAAAAGAACAGGTATAGTGAATGGACAA +AATAAAATACTTATTTCTATCAGAGATATGCATAGTATGAATGTTACGAAAGAATATTTC +ATATACACAACACTTAATAACATTGAATTTTTAACAATTATCCTTAATGGTAAATTTATA +CCTATACTCATTGGATATATGTATATCAATATAATACAGAAGAATACTCCTATTATGATA +ATAATTGAAGTTGCTATACTATAAATTAATCCTTCTGTGGCAATACAAAAAGCTAACAAC +CTTCCTAGAATCACCATAAATGCAATCACAAATAACCAAACAAGAAAGGATTGCTTCCAA +TTTTTCTTAAAAGCCTCCTTAAAATCATAAAATAAATCTGAAGGTTTATCCTGCACCATT +TTAACTGTTACTGATGTCATCGCTCCAATAGCGGGGCCAATAGATATAATAGGAATACAG +CAAATTATAAACAGTAAGTTCAATTTAAGCAGCGACGTAAATTCACGCGCAAATATATCC +CCAAATAAGGCCAACCCCTCTTTAGGTGGTGCATCTTTGGAAACACCTCGCCCCTCTTTA +TTGTAATCAAAGAAAAATAATCTCATAATGTACCTCCAAGTTAGGTTCCAACCTTAAAGC +ATTAAATTTATAATAAAGCTACTTTACCCGTTATAAGTTCTATACAGCCAAAGATTGTCT +ACCAATTATCCAAGGATGTTCAATATAGAAAAATAGTTGGTAGCACGAACAAAAAATACC +ACATTAAATTCATCAAGCTAATCTATACACTTAGATTTTATCAACCACTCTATTACTTTT +TCCAACTAGATTAATTTCAAATTGTTTTTCCTGTCTCTTTATCAAATAAATGCATCTTAT +ATTCATCAATTGCTATCTTTATCATATCTCCAGGTTCTACTATAGTATCTGCTGATGCGC +GAACAGTTAACTTTTTGTTATCGAAATTAACATAAAGATACACCTCCGCTCCCATTAATT +CATGAAGGGCTATTTGAGCATATAGTGTACTTTTTTCATGTTCTTCTAAGAAATCATTTT +CTATGCTGACATGTTCTGGCCTAATTCCAAGAACAACATTTTTTCCAATATACGCTTCAA +AAGTTTCCTCTCCACCTTTACTTTTAGGAATAGGAATACTATAGCTATTGAATTTTACAT +AGTATTGGTTATCTTTCTTTTCAATAGTTGAATCTATAAAATTCATTTGTGGCGAGCCGA +TAAATCCAGCAACAAATAGGTTAGCCGGCGAATCATACAACACCATTGGCTTATCTGCTT +GTTGTATGTATCCATCTTTCATAACTACAATTCTATCTCCCATTGTCATTGCCTCAATTT +GGTCATGAGTAACATATACAAAGGTAACGCCTAATCTCCTATGGAGTTTAGTAATCTCCC +CTCTCATTTCAGCACGAAGCTTTGCGTCTAAATTCGAAAGCGGTTCATCTAAAAGAAAAA +CTGCTGGTTCACGAACCATGGCTCGTCCTAAAGCTACACGCTGTCTTTGACCTCCTGAAA +GCTCCTTAGGCTTACGGTTAAGTAAATACTCAATATCTAATATTTTAGCTACTTCTCTAG +TTTTCTTATCTATTAATTTCTTATCTTCTTTTCTTAAATTTAGTCCAAATCCAATATTTT +TGTATACAGTCATATGAGGATATAATGCATAATTTTGGAATACCATTGAAATATCTCTAT +CCTTAGGTGAAACCTCATTAACTAAATTATCCCCAATATATAGTTCCCCTCCACTTATAT +CTTCTAATCCTGCAATCATTCTTAACGTAGTAGATTTACCACATCCTGATGGTCCAACAA +GAACAATGAATTCACCATCATTAATTTCTAAGTTTAGATTTTTCACAACGGTTGCATTAC +CAGGATAAGTTTTTTCTATATTTTTAAGTGAAATACCAGCCATATTACGCCTCCTTATTT +ACTTGCGTTGTTATTAAAACTATTTTTAGGTTAACGTTAATCTAAGAATAAATATAATAT +ACCATTGTTTTTTAATCCTTACAAGATTTAAAATATTATTTTTTGTAGTTTTTAGCCCTT +CAATAATTTTTAATCTTTATTGCTTACTATAGAATTTTCCGCTATCACTCTTAGTAAACG +TTCTCTTATAAATAGTTTATTTATTCTATCAATGCACGTCAATGTTATAGTAATAATAGT +TTATTTTTCGGTATTTTTGCATATTTCTATGTTAACTTTATAATCTAATTAACTTAATTT +TTACAGAACAAAATTATTTAATTTGTTAATAATATTTTTTTACTGTAAGATCTTTATAAG +ACCTAATGATTAATACATTTTAATTTAAAATTACCTTATCGTTATACTTTTTTTAACTTT +TTTCTATTGACTTTCATAGTGATACTAAATTATAATTTGTCTGTAAACGATATATCATAA +AAGGGGGAAAAGTTATGATTAAAAAATCTAAATTCAAACTAATGCTATCTTTGTTACTGT +TATCTGCATCGATGTTTACTTTCCAAACTCCAGCTAGTGCAGCCGAAGACTCTTCATGGA +TAAGAGGCACGAACGTACCTGCAATCTGGTATGATAGTCAATCCTATTCTTCTCTTAACA +AAATTAATAGCGAAGGATTTAATACTGTTCGTCTTGTTTGGAGCACTAGCGGATCAACTT +CTAGATTAAATGATTACCTTACAAAATGTGATAATTTAGGCCTTAAAGCAATAGTTGAAC +TTCATGATGCAACTGGTGGTACCACTACAGACTCGTTGAATACTTGTGTGAATTATTGGG +TTCGTAGTGATGTATTAACAGTAATGAAGAATCACCCTAAGGCTTGGTTAAATATCGCTA +ATGAATGGGGTCCAGCTAACAGTAGTGTTTGGAGAGATGGTTACAAAAGTTCTATAAGTA +AAATACGTACTGCTGGTTATACTGGTACTATTGTAATCGATGCAGGCGGATGGGGACAAG +ATAGCAGTGATATACTAAACTATGCTCTTGATGTGTACAATTCCAATACTAATAAGAATG +TTATATTCAGCATACACATGTATGGATCATGGAATAGCAATTCAGATATAGATTCATTCT +TAAGCAGTTGTAAAAGTAAAGGTATTCCAATTATAGTTGGAGAATTTGGTTATAACTATA +ATAGCGGTAATAATAATCTTGGTTGCAAAGTTGATGTTGCTCATTTATTAAGTTATTGCA +AACAAAATAAAATCGGATTTATCGCATGGTCTTGGGCTGGAAATGATTCTGCAAATTCAT +GGCTAGATATGACTAATTCTTGGGGAAGTTATACTTGGTGGGGACAATATGTAAAAGATA +ATATGTGGTAATATATTATAATCACATATGTTGCTTTGTCTCGGATATAAAGAGATATTA +TTCTATAGATATTCCATGAGAAAGGAAATCATATTTTGAGCCAATAGCAATTTTAAACTA +CAAAAAAGAATTAGTGAGCTAACTAAAAAAATACTTATTTTCTATGAACGTCTCTATGAT +ACAGATATCTTCCATTGAAATTTAAAATAATATTTAAAGAAAAATTACCGAAACGTTAGC +AATATAGTTTATCCATTTTGTGAAATTACCTTGCTTCCATGAAAAACAAATATATTTCTT +CATTCAAAAATTAATTGCTTAATTATATTATAAAAAATTTAAGTAAAACCTTATAATTTG +CAACTATATAAAATTCATAAACATATTTTACAAAAGGAAGTAAAAGTAAGTTAATCAAAT +CTTATTCTTTTCATTTCCCAAAATATACCTTTCTATTCAACAAAAAAATCAGAACTTTTT +CAACAAGTCCTGATTTTTTATTTATATAGGCTTAAATATCTTATAAAACTGTAACTATTT +CAAAGATCAACCGTTTTAGTCAATTTGTGCTTAGCATAAATACAATTACAATTTTTCTCT +TCTCTTTGTAGACTTTATAAATATCTCTTTTAAGGTTTCTTTGTCATCATATTTTATTGC +ATCTTTTATTTTATCCAATTCTACTTCAAAATTTTCAATGCTTTTTAATAGATTCTCTTT +ATTTCCAAGGAAAAGTTCGCTCCAAAGGTCTTCATTTATATTAGCAATTCTTGTTAAATC +ACGATAGCTATCTCCAATGAAACTTCCTGTTTCTCTTCCTTCAACATCGCTATTTACCAG +TGCTACAGCTAATGAGTGCGGAAGCTGACTTGTGTAGCCAATCATTTCATCATGATATTC +TGGAGTTATTCTTTTAACTCTTTTAAATCCAATTTTATAAATCAGATCTTCCACCATGTT +TAAATTTTCTTCTTTATTTCTAGAAACCGGTGTTAAAATATAATTTGCACCTTTAAAAAC +TTGGCTACTTGCAAAATCTATTCCTTTTTTCTCCCGTCCTGCCATTGGATGGCCAAAAAC +AAAATCTATATTCTCCGGTAATATATTTACAATATCTTCGATAAACATTTTTTTAATTCC +AGTAGCATCTGTTATTACTGCACCATCTTTAAAATTGCTTTTATTATCTATAATAAATTG +CTTAACTAATCTAGGATATAATGAAATAATTATAAAATCTGCACTCTTAATTACTTCTTC +TCCGTTTTTAAAACCTTCTCTTATTAATCCAAGTTTTTTAGCTTTTTCTAAAGATTCTTC +ATTAATATCTATTCCATAAACATCATTATACCCAGCTTCTTTTAAAGCCATAGTAAATGA +TCCCCCAATTACTCCAAGACCTACAACTACTATTTTCATATCTATAGCTCCCTTATATAG +AATTAGATTTGCTTATCTTCAATTTTTGCTACAGCTTTAACTTTACTCATTAGCACATCA +AATTGATCTGGAGTAAGTGATTGTTGTCCATCGCTTAGTGCATTTTCAGGATCGTTATGA +ACTTCTATCATAAGTCCATCAGCCCCAGCTATAATTGCTGCTTTTGCCATTGGTTCTACC +AAGTAAGCATAACCTCCTGCATGACTTGGATCAACAATTATAGGTAAATGTGATAATTTC +TTAATTACTGGAACTGCTTGCAAATCTAATGTATTTCTTGTAATAGTTTCGAATGTTCTT +ACTCCTCTTTCACAAAGAATTACATTTTCATTTCCACCTGCCATGATATATTCTGCTGAC +ATAAGCCATTCTTCTATAGTTGCAGATAAACCTCTCTTTAATAGTATTGGTTTATTTGTT +TTACCTATTTGCTTTAATAGATCGAAATTTTGCATATTTCTAGCACCAATTTGAATCATA +TCCACTTCTTCAACAAAAGTATCTACGTAATCAGTAGACATAAGTTCTGTTACTATAGGA +AGTCCTGTTTCCTGTTTTGCTGTTTTTAAAAGTTTTAATCCTTCAAGCTCTAAACCTTGA +AAGCTATAAGGTGAAGTTCTTGGTTTAAATGCCCCACCTCTTAAAAAGTTGGCTCCTGCT +GCTTTTACTCTTTTTGCGATTTCAACAATTTGTTCTTCACTTTCTACAGAACACGGTCCA +GCCATTATTCCAAGTCTCCCACCGCCAACTATTGATCCCTCAATATTAACTACTGTATCT +TCAGGCTTAAATATCCTGTTTGCTTTCTTAAAAGGTTCTTGAACCTTCATAACCCTATCT +ACACCTTTTAATACTTGTAATTTTTTAGGATCTAATATTGAAGTATCACCAACAGCTCCT +ACTATAAAATATGTGTCTCCTTTAGAAAGATGAGC +>NODE_16_length_39898_cov_63.337_ID_31 +CGCGCGCGCCGTTATTCAGTAGCTCTTCTGCCAGCGAAATCCCCATTTGTTCGGCATCTT +GCGGCGCACCGCGGCGTTCACCGCGAATAATCTGCGAACCGTCCGGCGCGCCGACCAGCG +CACGCAGCCAGATTTCGCCATCAATAAGCTCGGCGTAGCTACCAATTGGCACCTGACATC +CGCCTTCGAGACGGGTATTCATGGCGCGTTCTGCGGTAACGCGCAGTGCAGTTTCGTGGT +GATTCAGCGCGGCAAGCAGCTCGCGAGTGCGTGAATCATCAAGGCGGCATTCAATACCCA +CCGCACCTTGTCCTACCGCCGGAAGAGAAATCTCGGGTGGCAACGCGGCGCGAATACGTG +ACTCCAGACCTAAACGTTTTAGTCCGGCTACGGCAAGAATGATGGCATCGTATTCGCCGT +TATCCAGTTTGCTCAGGCGAGTGCCGACGTTGCCGCGCAGGGAGCGGATAATCAGATCCG +GACGGCGTTCAGCCAGTTGGCACTGGCGACGTAAACTGGACGTCCCGACGATACTGCCTG +CCGGTAACGCATCCAGACTGTCATAGTTATTGGACACAAAGGCATCGCGAGGATCTTCAC +GCTCACAAATAGTGACCAGTCCCAGACCTTGCGGGAATTCAACCGGCACATCTTTCATTG +AGTGTACGGCGATATCGGCGCGATTTTCGAGGAGCGCGACTTCCAGCTCTTTTACAAATA +AGCCTTTTCCGCCTACTTTCGCCAGCGGCGTATCAAGAATCACATCGCCGCGCGTCACCA +TCGGTACCAGTTCAACGACCAGGCCCGGATGGCTCGCCATCAACTTGTCTTTGACATAGT +GTGCCTGCCAGAGTGCAAGTGGGCTTTGGCGTGTGGCAATTCTTAAAACATTGTCTAACA +TGCTTGTTACCGTCATTATCATCCGTGGTCCATCCTAACATCCTTGCCAGAGTGATGTCA +GTGTTGTGGTGAAACGTAGACGCCTGCGCAAACCGTAAAATGAGGTCTGGCAGTGGATCC +TGACAGGCGTTTCACGCCGTTGTAATAAGGAATTTACAGAGAATAAACGGTGCTACACTT +GTATGTAGCGCATCTTTCTTTACGGTCAATCAGCAAGGTGTTAAATTGATCACGTTTTAG +ACCATTTTTTCGTCGTGAAACTAAAAAAACCAGGCGCGAAAAGTGGTAACGGTTACCTTT +GACATACGAAATATCCCGAATGCCGCGTGTTACCGTTGATGTTGGCGGAATCACAGTCAT +GACGGGTAGCAAATCAGGCGATACGTCTTGTACCTCTATATTGAGACTCTGAAACAGAGA +CTGGATGCCATAAATCAATTGCGTGTGGATCGCGCGCTTGCTGCTATGGGGCCTGCATTC +CAACAGGTCTACAGTCTACTGCCGACATTGTTGCACTATCACCATCCGCTAATGCCGGGT +TACCTTGATGGTAACGTTCCCAAAGGCATTTGCCTTTACACGCCTGATGAAACTCAACGC +CACTACCTGAACGAGCTTGAACTGTATCGTGGAATGTCAGTACAGGATCCGCCGAAAGGT +GAGCTTCCAATTACTGGTGTATACACCATGGGCAGCACCTCGTCCGTAGGGCAAAGTTGT +TCCTCTGACCTGGATATCTGGGTCTGTCATCAATCCTGGCTCGATAGCGAAGAGCGCCAA +TTGCTACAACGTAAATGTAGCCTGCTGGAAAACTGGGCCGCCTCGCTGGGTGTGGAAGTC +AGCTTCTTCCTGATTGATGAAAACCGCTTCCGTCATAATGAAAGCGGCAGCCTGGGGGGC +GAAGATTGTGGCTCCACCCAGCATATACTGCTGCTTGACGAATTTTATCGTACCGCCGTG +CGTCTCGCCGGTAAGCGTATTCTGTGGAATATGGTGCCGTGCGACGAAGAAGAGCATTAC +GACGACTATGTGATGACGCTTTACGCGCAGGGCGTGCTGACGCCAAATGAATGGCTGGAT +CTCGGTGGCTTAAGCTCGCTTTCTGCTGAAGAGTACTTTGGTGCCAGCCTTTGGCAGCTC +TACAAGAGTATCGATTCCCCATACAAAGCGGTACTGAAAACACTGCTGCTGGAAGCCTAT +TCCTGGGAATACCCGAACCCACGTCTGCTGGCGAAAGATATCAAACAGCGTTTGCACGAC +GGCGAGATTGTATCGTTTGGTCTCGATCCATACTGCATGATGCTGGAGCGTGTTACTGAA +TACCTGACGGCGATTGAAGATTTTACCCGTCTGGATTTAGTACGTCGCTGCTTCTATTTA +AAAGTGTGCGAAAAGCTCAGCCGTGAACGCGCCTGCGTAGGCTGGCGTCGCGCAGTGTTG +AGCCAGTTAGTGAGCGAGTGGGGTTGGGACGAAGCTCGTCTGGCAATGCTCGATAACCGC +GCTAACTGGAAGATTGATCAGGTGCGTGAGGCGCACAACGAGTTGCTCGACGCGATGATG +CAGAGCTACCGTAATCTGATCCGCTTTGCGCGTCGCAATAACCTTAGCGTCTCCGCCAGT +CCGCAGGATATCGGCGTGCTGACGCGTAAGCTGTATGCCGCGTTTGAAGCATTACCAGGT +AAAGTGACGCTGGTAAACCCGCAGATTTCACCCGATCTCTCGGAACCGAATCTGACCTTT +ATTTATGTGCCGCCGGGCCGGGCTAACCGTTCAGGTTGGTATCTGTATAACCGCGCGCCA +AATATTGAGTCGATCATCAGCCATCAGCCGCTGGAATATAACCGTTACCTGAATAAACTG +GTGGCGTGGGCATGGTTTAACGGCCTGCTGACCTCGCGCACCCGTTTGTATATTAAAGGT +AACGGCATTGTCGATTTGCCTAAGTTGCAGGAGATGGTCGCCGACGTGTCGCACCATTTC +CCGCTGCGCTTACCTGCACCGACACCGAAGGCGCTCTACAGCCCGTGTGAGATCCGCCAT +CTGGCGATTATCGTTAACCTGGAATATGACCCGACAGCGGCGTTCCGCAATCAGGTGGTG +CATTTCGATTTCCGTAAGCTGGATGTCTTCAGCTTTGGCGAGAATCAAAATTGCCTGGTA +GGTAGCGTTGACCTGCTGTACCGCAACTCGTGGAACGAAGTGCGTACGCTGCACTTCAAC +GGCGAGCAATCGATGATCGAAGCCCTGAAAACTATTCTCGGCAAAATGCATCAGGACGCC +GCACCGCCAGATAGCGTGGAAGTCTTCTGTTATAGCCAGCATCTGCGCGGCTTAATTCGT +ACTCGCGTGCAGCAACTGGTTTCTGAGTGTATTGAATTGCGTCTTTCCAGCACCCGCCAG +GAAACCGGGCGTTTCAAGGCGCTGCGCGTTTCTGGTCAAACCTGGGGGTTGTTCTTCGAA +CGCCTGAATGTATCGGTACAGAAACTGGAAAACGCCATCGAGTTTTATGGCGCGATTTCG +CATAACAAACTGCACGGCCTGTCAGTGCAGGTTGAAACCAATCACGTCAAATTACCGGCG +GTGGTGGACGGCTTTGCCAGCGAAGGGATCATCCAGTTCTTTTTCGAAGAAACGCAAGAC +GAGAATGGCTTTAATATCTACATTCTCGACGAAAGCAACCGGGTTGAGGTATATCACCAC +TGCGAAGGCAGCAAAGAGGAGCTGGTACGTGACGTCAGTCGCTTCTACTCGTCATCGCAT +GACCGCTTTACCTACGGCTCAAGCTTCATCAACTTCAACCTGCCGCAGTTCTATCAGATT +GTGAAGGTTGATGGTCGTGAACAGGTGATTCCGTTCCGCACAAAATCTATCGGTAACATG +CCGCCTGCCAATCAGGATCACGATACGCCGCTATTACAGCAATATTTTTCGTGATGAACG +TGCCGGAAAGCGAGGCTTATCCGGCATGCAATCTTAGCGGAAACTGACTGTTTCACCCGC +CTGCTGCGTCGCCGCCTGTTCCAGCAAATCCCAGAAGGTTTCGCCGCTGCGATCACAAAT +CCACTCATCGCCTTTCAGGTCAAAATGGTAGCCGCCCTGTTTGGTTGCCAGCCATACCTG +GTGCAGCGGCTCCTGGCGGTTGATAATGATTTTGCTGCCATTCTCAAAGGTAATGGTCAG +TACGCCGCCGTTGATTTCGCAGTCGATATCGCTGTCGCCATCCCAGTCGTCCAGGCGTTC +TTCAATGGTCAGCCAGAGTTGATCAGCCAGGCGATGAAATTCACTGTCGTTCATTGTTGT +ATCCTGTTTTTAAGTGATGGCGGCAGTATAGCGGCATGGGGTCAGGGCTTCAAAGTTTGC +ACCTCTGCGGCTGCGTTCCGGCACGATTCATCCGTCACCGGAATAATGATGTCTCTGTGT +AGCGAAAGATTTGTCTCTTCATTAGGGCGCAGTTACACCACGTCTTTCCCTGTTTCTGGT +AAACATTATGATCAGGTTTACCGAGCGAGCATCCTCACGCTGACGGAACTAAAAAAGACA +ACAAACAAACCACATTGCGATAGTGCATAAAGCCATCCTGGCGCGAGGTGCCGATCACGA +AACTACCAGCAAAACATAAATCCCCACGAGTAAGCGTTATACTCGCAGCATTTCCTCACT +TTTCAGACTTCATAAAGAGTCGCTAAACGCTTGCTTTTACGTCTTCTCCTGCGATGATAG +AAAGCAGAAAGCGATGAACTTTACAGGCAATCCATAATGAAAAACGTGTTTAAGGCACTC +ACTGTATTACTTACTCTCTTCAGCCTGACGGGCTGCGGTCTGAAAGGTCCGCTCTATTTC +CCGCCTGCAGATAAAAACGCACCGCCGCCGACCAAACCGGTAGAGACGCAAACGCAATCC +ACGGTGCCGGATAAAAACGATCGCGCCACTGGCGATGGTCCATCCCAGGTGAATTACTAA +AAGTCAGTTTCTGTACCCGCGTGATTGGAGTAAATGATGCAGTTCTCGAAAATGCATGGC +CTTGGCAACGATTTTATGGTCGTCGACGCGGTAACGCAGAATGTCTTTTTTTCACCGGAG +CTGATTCGTCGCCTGGCTGATCGGCACCTGGGGGTAGGGTTTGACCAACTGCTGGTGGTT +GAGCCGCCGTATGATCCTGAACTGGATTTTCACTATCGCATTTTCAATGCTGATGGCAGT +GAAGTGGCGCAGTGCGGCAACGGTGCGCGCTGCTTTGCCCGTTTTGTGCGTCTGAAAGGA +CTGACCAATAAGCGTGATATCCGCGTCAGCACCGCCAACGGGCGGATGGTTCTGACCGTC +ACCGATGATGATCTGGTCCGCGTAAATATGGGCGAACCCAACTTCGAACCTTCCGCCGTG +CCGTTTCGCGCTAACAAAGCGGAAAAGACCTATATTATGCGCGCCGCCGAGCAGACAATC +TTATGCGGCGTGGTGTCGATGGGAAATCCGCATTGCGTGATTCAGGTCGATGATGTCGAT +ACCGCGGCGGTAGAAACGCTTGGTCCTGTTCTGGAAAGCCACGAGCGTTTTCCGGAGCGC +GCCAATATCGGTTTTATGCAAGTGGTTAAGCGCGAGCATATTCGTTTACGCGTTTATGAG +CGTGGGGCAGGAGAAACCCAGGCCTGCGGCAGCGGCGCGTGTGCGGCGGTTGCAGTAGGG +ATTCAGCAAGGTTTGCTGGCCGAAGAAGTACGCGTGGAACTCCCCGGCGGTCGTCTTGAT +ATCGCCTGGAAAGGTCCGGGTCACCCGTTATATATGACTGGCCCGGCGGTACATGTCTAC +GACGGATTTATTCATCTATGAAGCAACCAGGGGAAGAACTGCAGGAAACACTCACGGAGC +TTGATGACCGGGCGGTTGTCGATTATCTGATTAAAAATCCTGAGTTTTTTATCCGTAATG +CGCGCGCAGTAGAAGCGATACGTGTGCCGCATCCGGTACGCGGCACCGTTTCGTTGGTCG +AGTGGCACATGGCCCGCGCACGTAATCATATTCATGTTCTGGAAGAGAACATGGCGCTGT +TGATGGAACAGGCTATCGCCAACGAAGGCCTGTTTTATCGCCTACTCTACCTGCAGCGCA +GTCTCACCGCCGCCAGCAGTCTCGACGATATGCTGATGCGCTTTCACCGCTGGGCGCGCG +ATCTCGGCCTGGCAGGTGCGAGTCTGCGCCTGTTTCCGGATCGCTGGCGCTTAGGTGCGC +CGTCGAACCACACTCATCTGGCATTAAGCCGTCAGTCTTTCGAACCGCTGCGTATTCAGC +GTTAGGGGCAGGAACAGCACTATCTTGGGCCGCTTAACGGACCAGAGCTGCTGGTGGTGC +TACCGGAAGCGAAAGCGGTGGGATCGGTGGCGATGTCGATGCTGGGAAGCGATGCTGATT +TGGGTGTCGTGCTGTTTACCAGTCGCGATGCCAGTCACTATCAACAAGGGCAAGGAACGC +AGTTACTTCATGAAATTGCGCTGATGTTGCCGGAGCTTCTGGAGCGTTGGATTGAACGCG +TATGACCGATTTACACACCGATGTAGAACGCTACCTACGTTATCTGAGCGTGGAGCGCCA +GCTTAGCCCGATAACCCTGCTTAACTACCAGCGTCAGCTTGAGGCGATCATCAATTTTGC +CAGCGAAAACGGCCTGCAAAGCTGGCAGCAATGTGATGTGACGATGGTGCGCAATTTTGC +TGTACGCAGTCGCCGTAAAGGGCTGGGAGCAGCAAGTCTGGCGTTACGGCTTTCTGCGCT +ACGTAGCTTTTTTGACTGGCTGGTCAGCCAGAACGAACTCAAAGCTAACCCGGCGAAAGG +TGTTTCGGCACCGAAAGCGCCGCGTCATCTGCCGAAAAACATCGACGTCGACGATATGAA +TCGGCTGCTGGATATTGATATCAATGATCCCCTCGCTGTACGCGACCGTGCAATGCTGGA +AGTGATGTACGGCGCGGGTCTGCGTCTTTCTGAGCTGGTGGGGCTGGATATTAAACACCT +CGACCTGGAGTCTGGTGAAGTGTGGGTTATGGGGAAAGGCAGCAAAGAGCGCCGCCTGCC +GATTGGTCGCAACGCTGTGGCGTGGATTGAGCACTGGCTTGATTTGCGCGACCTGTTTGG +TAGCGAAGACGACGCGCTTTTTCTGTCGAAACTGGGCAAGCGTATCTCCGCGCGTAATGT +GCAGAAACGCTTTGCCGAATGGGGCATAAAACAAGGGCTGAATAATCACGTTCATCCGCA +TAAATTACGTCACTCGTTCGCCACGCATATGCTGGAGTCGAGCGGCGATCTTCGTGGTGT +GCAGGAGCTGCTGGGTCATGCCAACCTCTCCACCACGCAAATCTATACTCATCTTGATTT +TCAACACCTTGCCTCGGTGTACGATGCGGCGCATCCACGCGCCAAACGGGGGAAATAATG +CGTTTTTACCGGCCTTTGGGGCGCATCTCGGCGCTCACCTTTGACCTGGATGATACCCTT +TACGATAACCGTCCGGTGATTTTGCGCACCGAGCGAGAGGCGCTTACCTTTGTGCAAAAT +TATCATCCGGCGCTGCGCAGCTTCCAGAATGAAGATCTGCAACGCCTGCGCCAGGCGGTA +CGGGAAGCGGAACCCGAGATTTATCACGACGTGACGCGCTGGCGTTTTCGTTCGATTGAA +CAAGCGATGCTCGACGCCGGGCTGAGTGCCGAAGAAGCCAGTGCAGGCGCACACGCAGCA +ATGATCAACTTTGCCAAATGGCGCAGCCGAATCGACGTCCCGCAGCAAACTCACGACACC +TTAAAACAGCTGGCGAAGAAATGGCCGCTGGTGGCGATCACCAACGGTAACGCCCAGCCG +GAGCTGTTTGGTTTGGGGGATTATTTTGAGTTTGTGCTGCGCGCTGGCCCGCACGGGCGC +TCAAAACCGTTCAGCGATATGTACTTTTTGGCTGCGGAAAAACTCAACGTGCCGATCGGC +GAGATCTTACATGTTGGGGACGATCTCACCACTGACGTGGGTGGGGCAATTCGCAGCGGA +ATGCAGGCTTGTTGGATCAGACCGGAAAATGGCGATCTGATGCAAACCTGGGACAGCCGT +TTACTGCCGCATCTGGAAATTTCCCGGTTGGCATCTCTGACCTCGCTGATATAATCAGCA +AATCTGTATATATACCCAGCTTTTTGGCGGAGGGCGTTGCGCTTCTCCGCCCAACCTATT +TTTACGCGGCGGTGCCAATGGACGTTTCTTACCTGCTCGACAGCCTTAATGACAAACAGC +GCGAAGCGGTGGCCGCGCCACGCAGCAACCTTCTGGTGCTGGCGGGCGCGGGCAGTGGTA +AGACGCGCGTACTGGTGCATCGTATCGCCTGGTTGATGAGCGTGGAAAACTGCTCGCCAT +ACTCGATTATGGCGGTGACGTTTACCAACAAAGCGGCGGCGGAGATGCGTCATCGTATCG +GGCAACTGATGGGCACGAGCCAGGGCGGTATGTGGGTCGGCACCTTCCACGGGCTGGCGC +ACCGTTTGCTGCGTGCGCACCATATGGACGCCAATCTGCCGCAGGATTTCCAGATCCTCG +ACAGTGAAGACCAGCTACGCCTGCTTAAGCGTCTGATCAAAGCCATGAACCTCGACGAGA +AGCAGTGGCCGCCGCGGCAGGCAATGTGGTACATCAACAGCCAGAAAGATGAAGGCCTGC +GTCCGCATCATATTCAAAGCTACGGTAATCCGGTGGAGCAGACCTGGCAGAAGGTGTATC +AGGCGTATCAGGAAGCGTGTGACCGCGCGGGCCTGGTGGACTTCGCCGAGCTGCTGCTGC +GCGCTCACGAGTTGTGGCTTAACAAGCCGCATATCCTGCAACACTACCGCGAACGTTTTA +CCAATATCCTGGTGGACGAATTCCAGGATACCAACAACATTCAGTACGCGTGGATCCGCC +TGCTGGCGGGCGACACCGGCAAAGTGATGATCGTCGGTGATGACGACCAGTCAATCTACG +GCTGGCGCGGGGCGCAGGTGGAGAATATTCAGCGTTTCCTTAATGATTTCCCCGGTGCCG +AAACTATTCGTCTGGAGCAAAACTACCGCTCTACCAGCAATATTCTGAGCGCCGCTAACG +CCCTGATTGAAAACAATAACGGGCGTCTGGGTAAAAAACTGTGGACCGATGGCGCGGACG +GTGAGCCTATTTCCCTCTATTGCGCTTTTAACGAACTCGATGAAGCGCGTTTTGTGGTTA +ACCGCATCAAAACCTGGCAGGACAACGGCGGAGCGCTTGCCGAGTGCGCCATTCTCTACC +GCAGCAACGCCCAGTCGCGGGTGCTCGAAGAGGCGTTATTGCAGGCCAGTATGCCGTACC +GTATTTACGGCGGGATGCGCTTCTTCGAACGCCAGGAAATCAAAGATGCGCTCTCGTATC +TGCGCCTGATTGCCAACCGCAACGACGACGCGGCCTTTGAGCGTGTGGTGAATACGCCAA +CGCGGGGTATTGGTGACCGGACGCTGGACGTGGTACGTCAGACATCGCGCGATCGCCAGT +TAACACTCTGGCAGGCATGTCGTGAGCTGTTGCAGGAAAAAGCCCTCGCCGGGCGAGCTG +CCAGCGCCTTGCAGCGATTTATGGAATTAATCGACGCCTTAGCGCAGGAAACTGCCGATA +TGCCGCTGCATGTACAGACTGACCGGGTAATTAAAGACTCCGGCCTGCGTACCATGTATG +AGCAGGAGAAGGGCGAAAAAGGTCAGACGCGTATCGAAAACTTAGAGGAACTGGTGACGG +CAACGCGCCAGTTCAGCTACAACGAAGAAGACGAAGATTTAATGCCGCTGCAGGCGTTCC +TCTCCCATGCGGCACTGGAAGCAGGTGAAGGGCAGGCGGATACCTGGCAGGATGCGGTGC +AGTTGATGACGCTACACTCGGCGAAAGGCCTGGAGTTCCCGCAGGTGTTTATCGTTGGTA +TGGAAGAGGGCATGTTCCCAAGCCAGATGTCGCTGGATGAAGGCGGGCGTCTGGAAGAAG +AACGCCGTCTGGCCTACGTTGGCGTAACCCGCGCGATGCAGAAACTGACGCTGACCTACG +CGGAAACCCGCCGTCTGTATGGTAAAGAGGTTTACCATCGCCCGTCGCGCTTTATCGGCG +AGCTGCCGGAAGAGTGTGTGGAAGAGGTGCGCCTGCGCGCCACGGTAAGCCGCCCGGTCA +GCCATCAGCGGATGGGTACGCCGATGGTCGAGAACGACAGCGGCTACAAGCTCGGCCAGC +GCGTACGCCACGCTAAGTTTGGTGAAGGCACCATTGTCAATATGGAAGGCAGCGGTGAGC +ATAGCCGTTTGCAGGTGGCATTTCAGGGCCAGGGTATTAAATGGCTGGTGGCGGCATACG +CCCGGCTGGAGTCGGTGTAACGTTGCCGGATGCGGTGCTGCGCACCTTATTTGGCCTAAA +AAATCATTCAGATTCAATAAATTGCAACGTCATGTAGGCCGGATAGGGCGTTTACGCCGC +ATCCGGCATCTGCGCCATCTTCAGTATCTGACACAAAACTATCGTTTTAACCTTTCCGCT +CGACGGAAATCATGGTGACAAAAGGATAGCGTTGCCACGGGATTGCCCCGCCTTTCATAT +ACATATGTGAAATCGTGCCATCAAGATAAAGCAGCTGCTCAACGTTCAGTTTCGCTTTGG +CATAACAGGCAAAATCATAAAAATTTGTTGCCTGCTGGCTCAACAAAAACACGGCGTTCC +CATGTTTATTAATCCCAACACCGTTACGAATTTTGCTTGAGGCGACGTTGGGATGAATAC +GCGGATTAATTACACCGTTTTCCATCAACATTGGCCCTGACTGCACCGCAAACTGAATCT +CTTTACTGGTTTTGAAGGCATCCAGACGAACGATGCCGACTTTATCTCCCGCGACATAAA +ACACGCCGCCAGGACGGATAAAGAAATTCCCTTCACCTGAAGCGAGATTTAACGCCACCT +TCTGCTGACCGTTTTCGATGTACAAACCGAGCGGCGCATAGCTTTCATCATAGATGCCGC +CGTTCATCGCCATCTGCACCTGACCCTGACTATTAATATCCGCCAGCAGAGCATGTAACG +TTCCCCACGCTTCGCCATTGGCTTTTTGCCAGTACATTTTCACCCGCTCTGTTTGAGGAT +TAACGGTATACGCCTGTACGGTCAGCGTCGGATCTGAGAGTGCGCAATCATCAGCGGCAA +CAGCAAACAAGGGAAGTAAGGTGAGGGCGAGAAAAATCCGTTTGAGATTCAAGGTGATCA +TTCCTTTACCAATGAGTAGCTGATGCGCCATTATAGGTCCTGGATGTGGGATTTTTTTAT +CCTGTTAGCGACCTTGACGAGTACCAAAAAGCGCGAAGTTCAACTATTGTTCTGTGGTGT +TCTGTTGCGTGTTGACGGCAAAATTTTGCTGGCGTAACATGCGCGCACGATCACTCTAAG +AGGACATTCGCCTTGGACACACCCAGTAGATACTGGCTCACTATCCTGTCATCCAGGATC +AACTCCTAAGGCTATCCCTTTTTGCTGATAGCCTTAGCGGTTGTCAGCGACCTCAATTTT +TCCCGTCGCGCTGAGTCAGGCTGTTTAATGGTCTGAAACCCAATTTGTTTCTGTGTGCCC +ACCGAACTGTCCGATATTTTAAGCATTGGGAGTCCCGGTCATGCTGAGCGCATTTCAACT +GGAAAATAACCGACTGACCCGGCTGGAAGTCGAAGAGTCACAACCCCTTGTAAATGCAGT +ATGGATTGATCTTGTCGAACCGGACGACGACGAGCGACTGCGCGTACAATCTGAACTTGG +CCAGAGCCTGGCAACCCGCCCGGAACTGGAAGACATCGAAGCATCGGCACGTTTCTTTGA +AGACGACGACGGCCTGCATATTCACTCCTTCTTCTTCTTTGAAGATGCGGAAGATCACGC +CGGTAACTCCACTGTGGCATTTACCATCCGTGATGGTCGTCTGTTTACTCTGCGTGAGCG +TGAACTGCCCGCTTTTCGTCTGTATCGTATGCGTGCCCGTAGCCAGTCGATGGTAGACGG +TAACGCCTACGAGTTGCTGCTGGATCTGTTCGAAACCAAAATCGAACAGTTGGCAGATGA +AATTGAAAATATCTATAGCGACCTGGAGCAGTTGAGCCGGGTGATTATGGAAGGGCATCA +GGGCGATGAGTACGACGAGGCGCTCTCCACTCTGGCGGAACTGGAAGATATCGGCTGGAA +AGTTCGCCTGTGTCTGATGGATACCCAGCGCGCGCTCAACTTCCTGGTGCGTAAAGCGCG +TTTACCGGGTGGGCAACTGGAGCAGGCGCGTGAAATCCTGCGAGATATCGAATCCCTGCT +GCCGCATAACGAATCCCTGTTCCAGAAGGTGAACTTCCTGATGCAGGCGGCAATGGGTTT +TATCAACATCGAGCAGAACCGCATCATCAAAATCTTCTCGGTGGTATCCGTGGTATTCCT +GCCGCCGACGCTCGTTGCTTCCAGCTATGGCATGAACTTTGAGTTTATGCCAGAACTGAA +GTGGAGCTTCGGCTACCCTGGCGCGATTATCTTTATGATCCTCGCGGGCCTGGCACCGTA +TCTGTACTTTAAGCGGAAGAACTGGTTGTAAAAAACGAGAGCGGTGGCTTAGTCTGGCTA +AGCCACCTGTTATTCAAAGGCTCCAGGTATTTAACCCTTTTACCTCTTTCTCATAGAACC +ATTTGTTCGTGTTAACAGCAACATAGGCTGCTACGGCAATTCCCAGAATGTTAACGCCAA +TTAGTGCACCAACAAATAGAGATAGAATACCAAGTAATAGAACTATAATTGCTTTTTTCC +ATAACCCCAGGACAAATAAATATATCCAGGAACAGAAGAAAGCAATGAAATTCATTTGAA +TAGTTAAGCGTTGTCTTACTTTTAATGCTTTAAATGCTGCTTTATATTCTGGTGTTGCCC +CCCAGAATCCAGGAAAACCATGTTGATCATAAAAATTAAATCGGTATTTCCATTTTTCAC +TTAATGAACCATCGTTCATATATTCCTTACTCATAAATACTCCATAACTATTGTTTTGAT +GAATCAGTAGGTGCAAGCATTAGCATACTGAAAGTGGAAAAATAACAAATCAAAAAAATC +ATCGAACCATTGCCTGAACAGGCAAAATCTTCGGCTATCATTGTGATGATAGAGATGATA +TATACTGCTAATGTACCAAAAACATAAGTTTTTATATAGATGAAACCACTATCACGGAGT +CGCTGGCAATTCATGTTGATGACGAGATAATGGAGTACGATGGTAGAGACTATAACAAGA +AAGCCTGCTTCTCCATCGTTAAAAAAGATAATAAGAAAGGCAAAAATGAAATTTATTAAA +ATAAATGAAAATATATAACGACGTCTGGAAATCTTACCGTTAGATGTTGGGATAAATATA +CGTAACATAAATTTTACATCCTTGTATGAGTCTCCGGTCAGCATGGCAATATGCCCACTC +TCATGCAAGAGTCGGCATATTTTTTCAGAATATATTTATTTTTTATTTGGACGTTCTACG +CTGCGTATAAATCGCATCCATCACAAAAATTGCCAGCGCCACCCAAATAAAGGCGAAAGT +CACCATCTTATCGGCACCCGGTTTTTCACCATAAAACGTCACAGCCAGCAGGAACATCAG +CGTCGGGCCAATGTACTGGAAAAAGCCTAACGTTGAGAGACGCAAGCGCGTGGCAGCGGC +GGTAAAACACAACAGCGGTACGGTAGTGACAATACCGGCGGCGATCAGCAGTAAATTCAG +CGACATCGGGTTTTGCCCCATATGGCTGGTTGAGCTGTCGGCAATAGCAAACAGGTAAAT +TGCCGCCACGGGCAGCAGCCACATGGTTTCGATTAACATGCCGGTTTGCGCTTCAACGGC +AATCTTCTTGCGTACCAGACCGTAGAAGGCAAAACTAAATGCCAGTCCCAGCGCGATAAT +AGGTAGCGAACCAAAAGTCCACAGCTGGACTAACACGCCACATATCGCCAGAATCACCGC +CAGCCATTGCATCCGGCGGAATCGCTCGCCGAGGAAAATCATCCCCAGCACAATGTTCAC +CAGCGGGTTAATAAAGTAACCAAGGCTCGCTTCCAGCATATGGTGATTGTTCACCGCCCA +GATAAACAGTAGCCAGTTGCCACCAATCAGCACGGCAGAGACTGCCAGCATAAAAATTTT +CTGTGGCGTCTGAATCAGCGTTTTTAAATAGGACCACTGGCGGCAAATGCTCATCAGCAC +CACCATAAAGAAAAACGACCAGATCACGCGATGCGTCAGGATTTCATCGGCGGGCACGTA +GTAAATCAACTTGAAGTACGCTGGCGCTATACCCCAAATAAAATAAGCGGCAAGAGCGAG +TAATACGCCCTGCCGCGTTTGTTTTGCATCCATCGGGAATACTCATTTTTAATTGGTAAC +AGCAGTTTACCTGCTTTTATGTCTTCAACCTACCATATAGGTGGCGGTGGCACTGGCAAT +ATAAAGCTGTTCTTCATTGTGTAATTCAACGCGGGCGACGGCGACTTTATTGCCTGCACG +CAACAGGCTACTAGTAGCAGTAAAACGCTCGCCCCTGCCTGGGCGCAGATAATCAACGCG +AAGATCAATGGTCCCCATCCGCGATAGCCGCTGGCGTAGTTCATCTTCACTGATGGTTTC +GTGGCGGGTTAAGGTACTTCCCACGCACACCAGACCGGCGGCGACATCCAGCGCCGACGC +AATGACCCCGCCGTGCAAAATGCTTTGCGCCCAGTTGCCCACCATCATTGGCTGATTTTT +AAAGGCCAGCTGTGCGAACTCTTTTTCGTAACGCTCCAGTTCCATCCCCAATGCGCGGTT +AAATGGCATGTGATAAACAAACATCTCACCCACTAATTTCAGGGCTTGTTCAGCGGTCAG +TACGGCAGACATATCATCCTTACACTTCATTGGTTAATGAAATGTTGATTTTATGCTTCT +TTGTTGTTGGTTTCTACTTTAGGAAGGGATAACTAACGGCTATGGAGTTAAGTATGTAAA +ATAGCCCGCAGAAAAATATTCACCTTATCAATAATTCGTTACGGAGAACACGACCGATGC +GGACTCTGCAGGGCTGGTTGTTGCCGGTGTTTATGTTGCCTATGGCAGTATATGCACAAG +AGGCAACGGTGAAAGAGGTGCATGACGCGCCAGCGGTGCGTGGCAGTATTATCGCCAATA +TGCTGCAGGAGCATGACAATCCGTTCACGCTCTATCCTTATGACACCAACTACCTCATTT +ACACCCAAACCAGCGATCTGAATAAAGAAGCGATTGCCAGTTACGACTGGGCGGAAAATG +CGCGTAAGGATGAAGTAAAGTTTCAGTTGAGCCTGGCATTTCCGCTGTGGCGTGGGATTT +TAGGCCCGAACTCGGTGTTGGGTGCGTCTTATACGCAAAAATCCTGGTGGCAACTGTCCA +ATAGCGAAGAGTCTTCACCGTTTCGTGAAACCAACTACGAACCGCAATTGTTCCTCGGTT +TTGCCACCGATTACCGTTTTGCAGGTTGGACGCTGCGCGATGTGGAGATGGGGTATAACC +ACGACTCTAACGGGCGTTCCGACCCGACCTCCCGCAGCTGGAACCGCCTTTATACTCGCC +TGATGGCAGAAAACGGTAACTGGCTGGTAGAAGTGAAGCCGTGGTATGTGGTGGGTAATA +CTGACGATAACCCGGATATCACCAAATATATGGGTTACTACCAGCTTAAAATCGGCTATC +ACCTCGGTGATGCGGTGCTCAGTGCGAAAGGACAGTACAACTGGAACACCGGCTACGGCG +GCGCGGAGTTAGGCTTAAGTTACCCGATCACCAAACATGTGCGCCTTTATACTCAGGTTT +ACAGCGGCTATGGCGAATCGCTCATCGACTATAACTTCAACCAGACCCGTGTCGGTGTGG +GGGTTATGCTAAACGATTTGTTTTGATGAACGGTTGAGTGGTTGGCAAATCTGGAATCCA +GCATCCAGGATTACCCTCTCAGAGACTAAAAGCATTGCAGTTTCTCGCGCAGGCGCTGAA +AATAGCGCCTGTTTTTATTTCAGGCAATCGGGGTGAATGTGGCGCAGGCGGAAGTGTTGA +ATCTGGAGTCCGGAGCTAAACAGGTTTTACAAGAAACCTTTGGCTACCAACAGTTTCGCC +CCGGCCAGGAAGAAATTATCGACACTGTGCTTTCCGGCCGCGATTGCCTCGTCGTCATGC +CCACTGGTGGCGGAAAATCCCTTTGCTATCAAATCCCTGCCTTATTGCTAAACGGCCTTA +CCGTGGTTGTTTCACCGCTGATTTCGTTGATGAAAGATCAGGTGGATCAACTGCAAGCCA +ACGGCGTGGCGGCGGCGTGCCTTAACTCGACGCAAACCCGCGAACAGCAACTTGAAGTGA +TGACAGGCTGCCGCACCGGGCAAATTCGTCTGCTTTATATCGCCCCGGAACGCCTGATGC +TGGATAACTTTCTTGAGCATCTGGCGCACTGGAATCCGGTGTTATTAGCCGTTGATGAAG +CGCACTGTATCTCCCAATGGGGCCACGATTTCCGCCCGGAATATGCCGCGCTCGGTCAGT +TGCGCCAGCGGTTCCCGACGCTGCCGTTTATGGCGCTGACCGCCACAGCCGACGACACCA +CGCGCCAGGATATCGTGCGCCTGCTGGGGCTGAACGATCCGCTGATTCAAATCAGCAGTT +TTGACCGTCCGAATATTCGCTACATGCTGATGGAGAAGTTCAAACCGCTCGATCAGTTGA +TGCGCTACGTGCAGGAACAGCGCGGTAAGTCAGGCATTATCTACTGCAACAGCCGCGCGA +AAGTAGAAGACACCGCTGCGCGCCTGCAAAGCAAGGGAATTAGCGCGGCGGCCTATCATG +CCGGGCTGGAAAATAATGTTCGCGCCGATGTGCAGGAAAAATTCCAGCGCGATGACCTGC +AAATTGTGGTGGCGACGGTGGCGTTCGGCATGGGCATCAATAAACCAAACGTTCGCTTCG +TGGTCCACTTTGATATTCCGCGCAATATCGAATCCTATTATCAGGAAACCGGACGCGCCG +GGCGTGATGGCCTGCCCGCGGAAGCGATGCTGTTTTACGATCCGGCTGATATGGCGTGGC +TGCGCCGTTGTCTGGAAGAGAAGCCGCAGGGGCAGTTGCAGGATATCGAGCGCCACAAAC +TCAATGCGATGGGCGCGTTTGCCGAAGCGCAAACTTGCCGTCGTCTGGTATTGCTGAACT +ATTTTGGCGAAGGGCGTCAGGAGCCGTGCGGGAACTGCGATATCTGCCTCGATCCGCCGA +AACAGTACGACGGTTCAACCGATGCTCAGATTGCCCTTTCCACCATTGGTCGTGTGAATC +AGCGGTTTGGGATGGGTTATGTGGTGGAAGTGATTCGTGGTGCTAATAACCAGCGTATCC +GCGACTATGGTCATGACAAACTGAAAGTCTATGGCATGGGCCGTGATAAAAGCCATGAAC +ATTGGGTGAGCGTGATCCGCCAGCTGATTCACCTCGGCCTGGTGACGCAAAATATTGCCC +AGCATTCTGCCCTACAACTGACAGAGGCCGCGCGCCCGGTGCTGCGCGGCGAATCCTCTT +TGCAACTTGCCGTGCCGCGTATCGTGGCGCTCAAACCGAAAGCGATGCAGAAATCGTTCG +GCGGCAACTATGATCGCAAACTGTTCGCCAAATTACGCAAACTGCGTAAATCGATAGCCG +ATGAAAGTAATGTCCCGCCGTACGTGGTGTTTAACGACGCAACCTTGATTGAGATGGCTG +AACAGATGCCGATCACCGCCAGCGAAATGCTCAGCGTTAACGGCGTTGGGATGCGCAAGC +TGGAACGCTTTGGCAAACCGTTTATGGCGCTGATTCGTGCGCATGTTGATGGCGATGACG +AAGAGTAGTCAGCAGCATAAAAAAGTGCCAGTATGAAGACTCCGTAAACGTTTCCCCCGC +GAGTCAAATGTATGTTGATGTTATTTCTCACCGTCGCCATGGTGCACATTGTGGCGCTTA +TGAGCCCCGGTCCCGATTTCTTTTTTGTCTCTCAGACCGCTGTCAGTCGTTCCCGTAAAG +AAGCGATGATGGGCGTGCTGGGCATTACCTGCGGCGTAATGGTTTGGGCTGGGATTGCGC +TGCTTGGCCTGCATTTGATTATCGAAAAAATGGCCTGGCTGCATACGCTGATTATGGTGG +GCGGTGGCCTGTATCTCTGCTGGATGGGTTACCAGATGCTACGTGGTGCACTGAAAAAAG +AGGCGGTTTCTGCACCTGCGCCACAGGTCGAGCTGGCGAAAAGTGGGCGCAGTTTCCTGA +AAGGTTTACTGACCAATCTCGCTAATCCGAAAGCGATTATCTACTTTGGCTCGGTGTTCT +CATTGTTTGTCGGTGATAACGTTGGCACTACCGCGCGCTGGGGCATTTTTGCGCTGATCA +TTGTCGAAACGCTGGCGTGGTTTACCGTCGTTGCCAGCCTGTTTGCCCTGCCGCAAATGC +GCCGTGGTTATCAACGTCTGGCGAAGTGGATTGATGGTTTTGCCGGGGCGTTATTTGCCG +GATTTGGCATTCATTTGATTATTTCGCGGTGATGCCAGACGCGTCTTCAGAGTAAGTCGG +ATAAGGCGTTTACGCCGCATCCGACATTATTTTTCACGCATGCCTCGCCGATGCTAACAG +CGCTCCCACCAGCATAAACAACGAGCCGAAAATCTTATTCAGCGCCTTCATCTGCTTTGG +TCCTTTAATCCATAGAGCAATCCGTTGAGCAAGGGTGGCGTAACCGATCATCACAATAAT +ATCGACCACAATAGTGGTGACGCCGAGCACGATATACTGCATCAGTTGCGGCTGTTGCGG +CATGATGAATTGCGGAAATAGCGCCGCCAGAAACACAATACTTTTGGGATTGGTGAGATT +CACAAAAACTGCGCGCTGGAACAAATGTCGACGCGATTGAGTAGAGGCCAGCGATTTAAG +GTCAATTGCACCAGCGGCGCGCCACTGCTGGATTCCCAGCCAAATCAAGTAAGCCGCGCC +TGCCCACTTCAACACTTCAAACGCAATCACTGAGCGGGAAAATAGCGTCCCCAACCCCAC +GCCAACCAGCACAATATGAATCGCCAGTCCGGTCTGAAGCCCAGCAATAGACGCCACCGC +GCCGCGATAACCGTGGTTGAGCGAGGTGGTCATAGTGTTGATTGCACCAGAGCCTGGCGA +CAGGCTTAAAATGATCGATGTCAGCAGGTAGGCAAACCACCATTCTAAGGTCATGATGAA +CTCCCGGTGTGTCTATTTTTGTGCCACAATACGCTACTGTCGCAGCGTTGTGTCAGGCAC +GCTAAAAAAAACGATTTTACGTGGTTTAAGAGGCAGATTACCCGATGTTTCAGCAGCAAA +AAGACTGGGAAACAAGAGAAAACGCGTTTGCTGCTTTTACCATGGGACCGCTGACTGATT +TCTGGCGTCAGCGTGATGAAGCAGAGTTTACTGGTGTGGATGACATTCCGGTGCGCTTTG +TCCGTTTTCGCGCACAGCACCATGACCGGGTGGTAGTCATCTGCCCGGGGCGTATTGAGA +GCTACGTAAAATATGCGGAACTGGCCTATGACCTGTTCCATTTGGGGTTTGATGTCTTAA +TCATCGACCATCGCGGGCAGGGACGTTCCGGTCGCCTGTTAGCCGATCCGCATCTCGGGC +ATGTTAATCGCTTTAATGATTATGTTGATGATCTGGCGGCATTCTGGCAGCAGGAGGTTC +AGCCCGGTCCGTGGCGTAAACGCTATATACTGGCACATTCGATGGGCGGTGCGATCTCCA +CATTATTTCTGCAACGCCATCCAGGTGTATGTGACGCCATTGCGCTAACTGCGCCAATGT +TTGGGATCGTGATTCGTATGCCGTCATTTATGGCACGGCAGATCCTCAACTGGGCCGAAG +CGCATCCACGTTTCCGTGATGGCTATGCAATAGGCACCGGGCGCTGGCGCGCGTTGCCGT +TTGCTATCAACGTACTGACCCACAGCAGACAGCGATATCGACGTAACTTACGCTTCTATG +CTGATGACCCAACGATTCGCGTCGGTGGGCCGACCTACCATTGGGTACGCGAAAGTATTC +TGGCTGGCGAACAGGTGTTAGCCGGTGCGGGTGATGACGCCACGCCAACGCTTCTCTTGC +AGGCTGAAGAGGAACGCGTGGTGGATAACCGCATGCATGACCGTTTTTGTGAACTCCGCA +CCGCCGCGGGCCATCCTGTCGAAGGAGGACGGCCGTTGGTAATTAAAGGTGCTTACCATG +AGATCCTTTTTGAAAAGGACGCAATGCGCTCAGTCGCGCTCCACGCCATCGTTGATTTTT +TCAACAGGCATAACTCACCCAGCGGAAACCGCTCTACAGAGGTTTAAATTTCTTATGTAC +CAGGTTGTTGCGTCTGATTTAGATGGCACGTTACTTTCTCCCGACCATACGTTATCCCCT +TACGCCAAAGAAACTCTGAAGCTGCTCACCGCGCGCGGCATCAACTTTGTGTTTGCGACC +GGTCGTCACCACGTTGATGTGGGGCAAATTCGCGATAATCTGGAGATTAAGTCTTACATG +ATTACCTCCAATGGTGCGCGCGTTCACGATCTGGATGGTAATCTGATTTTTGCTCATAAC +CTGGATCGCGACATTGCCAGCGATCTGTTTGGCGTAGTCAACGACAATCCGGACATCATT +ACTAACGTTTATCGCGACGACGAATGGTTTATGAATCGCCATCGCCCGGAAGAGATGCGC +TTTTTTAAAGAAGCGGTGTTCCAATATGCGCTGTATGAGCCTGGATTACTGGAGCCGGAA +GGCGTCAGCAAAGTGTTCTTCACCTGCGATTCCCATGAACAACTGCTGCCGCTGGAGCAG +GCGATTAACGCTCGTTGGGGCGATCGCGTCAACGTCAGTTTCTCTACCTTAACCTGTCTG +GAAGTGATGGCGGGCGGCGTTTCAAAAGGCCATGCGCTGGAAGCGGTGGCGAAGAAACTG +GGCTACAGCCTGAAGGATTGTATTGCGTTTGGTGACGGGATGAACGACGCCGAAATGCTG +TCGATGGCGGGGAAAGGCTGCATTATGGGCAGTGCGCACCAGCGTCTGAAAGACCTTCAT +CCCGAGCTGGAAGTGATTGGTACTAATGCCGACGACGCGGTGCCGCATTATCTGCGTAAA +CTCTATTTATCGTAATCGTTCTTTATTTGGTCAGTTGTCAACCTGATACTTCGCTACAAT +GGATACCCGTTAATCAAAGAGTTTTCCATTGTGGCGCTACTTATCATCACCACGATTCTG +TGGGCCTTCTCCTTTAGCTTTTATGGCGAGTACCTTGCGGGGCACGTCGATAGCTATTTT +GCGGTGCTGGTGCGCGTTGGCCTGGCGGCACTCGTTTTTCTGCCGTTTCTGCGTACCCGT +GGCAATAGCCTGAAAACGGTCGGCCTGTATATGCTGGTGGGCGCGATGCAGCTTGGCGTG +ATGTATATGCTGAGTTTCCGCGCTTATCTCTACCTGACGGTTTCCGAGCTGCTGCTGTTC +ACCGTGCTGACGCCGCTCTACATCACGCTGATTTATGACATCATGAGTAAGCGCCGTCTG +CGCTGGGGCTATGCCTTTAGCGCCTTGCTGGCGGTGATTGGTGCCGGGATTATTCGCTAT +GATCAGGTCACCGACCATTTCTGGACTGGCTTGCTGCTGGTGCAACTCTCCAATATCACT +TTTGCCATTGGCATGGTGGGTTACAAACGCCTGATGGAAACTCGCCCGATGCCACAGCAT +AACGCCTTTGCGTGGTTCTATCTTGGCGCGTTTCTGGTGGCAGTGATTGCATGGTTCTTG +CTGGGAAATGCGCAGAAAATGCCGCAAACCACGCTGCAATGGGGCATTCTGGTGTTTCTT +GGCGTGGTGGCTTCCGGGATTGGCTACTTTATGTGGAACTACGGCGCGACGCAGGTGGAC +GCCGGAACGCTGGGCATTATGAATAATATGCACGTTTCGGCAGGGCTGCTGGTAAACCTG +GCTATCTGGCACCAACAGCCGCACTGGCCAACGTTTATTACAGGCGCGCTGGTGATCCTG +GCCTCACTGTGGGTGCATCGTAAGTGGGTCGCTCCGCGCTCTTCACAAACGGCAGATGAT +CGCAGGCGTGATTGCGCGCTGAGCGAATAAACGCTTCCGTAACTGGCTGACGCTGCTCGC +CATCGCGCACGGCGGCGTACAGTCGGCTCCACAAGCCTTCGCCCAGGGTTTTGGTCACCA +CCAGACCCTGGCGCTCAAAACTCTCTACTACCCAATGCGGTAGCGCGGCAATACCCATCC +GCGCGGCAACCATCTGAATCAACAATAAGGTGTTATCGACGCTTTTCAGTGACGGGCTGA +CGCCTGCCGGCTGAAGAAAATGCCGCCAGACATCCAGTCGACTACGCTGCACCGGATAAA +TTAATAGCGTCTCGCTGGCGAGATCTTCCGGTGTAATTCGCGTTTTCGCCGCCAGTGGAT +GGTCAGGTGCTAACACCAGACGCACTTCATAGTCGAACATCGGCGAATAATGCAGGCCAC +TGCGCGGCAGAATATCGGACGTCATTACCAGATCCAGCTCTCCCTGTTGCAAGGCGGGCT +GCGGGTCAAATGTCACGCCCGATTTAAAATCCATCTCTACCTGCGGCCAGTTCTTATGGA +AATTTTCTAACGCGGGTGTCAGCCACTGAATACAGCTATGGCACTCAATGGCAATGCGCA +GACGCGTCTGCTGCGGTTCATTGCAGGCTTGCAGGGCCTGGCTAATTTGCGGCAGTACCT +GGTTTGCCAGTTGCAACAGGATTTCTCCCTGCGGTGTAAAGCGTAGCGGCTGGCTCTTAC +GCACAAATAGCCGGAAGCCAAGGCGTTGTTCCAGATCGCTAAACTGGTGAGACAGGGCGG +ATTGCGTCTGATGCAACGTCGCCGCAGCGGCTGCGAGCGAGCCGCAGTTCCGCAACGCTT +GTAGCGTTTTCAGGTGTTTTACTTCGATCATGAAAGTCCTTCACTTCGGCATGAATAATT +TGCGCTTGAGGAATATACAGTAACCGCCAATTATGGATGTGTAAACATCTGGACGGCTAA +AATCCTTCGTCTTTTAAATTTATGGTGCGTTGGCTGCGTTTCTCCACCCCGGTCACTTAC +TTCAGTAAGCTCCCGGGGATGAATAAACTTGCCGCCTTCCCTAAATTCAAAATCCATAGG +ATTTACATATAATTAGAGGAAGAAAAAATGACAATATTGAATCACACCCTCGGTTTCCCT +CGCGTTGGCCTGCGTCGCGAGCTGAAAAAAGCGCAAGAAAGTTATTGGGCGGGGAACTCC +ACGCGTGAAGAACTGCTGGCGGTAGGGCGTGAATTGCGTGCTCGTCACTGGGATCAACAA +AAGCAAGCGGGTATCGACCTGCTGCCGGTGGGCGATTTTGCCTGGTACGATCATGTACTG +ACCACCAGTCTGCTGCTGGGTAACGTTCCGGCGCGTCATCAGAACAAAGATGGTTCGGTA +GATATCGACACCCTGTTCCGTATTGGTCGTGGACGTGCGCCGACTGGCGAACCTGCGGCG +GCAGCGGAAATGACCAAATGGTTTAACACCAACTATCACTACATGGTGCCGGAGTTCGTT +AAAGGCCAACAGTTCAAACTGACCTGGACGCAGCTGCTGGACGAAGTGGACGAGGCGCTG +GCGCTGGGCCACAAGGTGAAACCTGTGCTGCTGGGGCCGGTTACCTGGCTGTGGCTGGGG +AAAGTGAAAGGTGAACAATTTGACCGCCTGAGCCTGCTGAACGACATTCTGCCGGTTTAT +CAGCAAGTGCTGGCAGAACTGGCGAAACGCGGCATCGAGTGGGTACAGATTGATGAACCC +GCGCTGGTACTGGAACTACCACAGGCGTGGCTGGACGCATACAAACCCGCTTACGACGCG +CTCCAGGGACAGGTGAAACTGCTGCTGACCACCTATTTTGAAGGCGTAACGCCAAATCTC +GACACGATTACTGCGCTGCCTGTTCAGGGTCTGCATGTTGACCTCGTACATGGTAAAGAT +GACGTTGCTGAACTGCACAAGCGCCTGCCTTCTGACTGGTTGCTGTCTGCGGGTCTGATC +AATGGTCGTAACGTCTGGCGCGCCGATCTTACCGAGAAATATGCGCAAATTAAGGACATT +GTCGGCAAACGTGATTTGTGGGTGGCATCTTCCTGCTCGTTGCTGCACAGCCCCATCGAC +CTGAGCGTGGAAACGCGTCTTGATGCAGAAGTGAAAAGCTGGTTTGCCTTCGCCCTACAA +AAATGCCATGAACTGGCACTGCTGCGCGATGCGCTGAACAGTGGTGACACGGCAGCTCTG +GCAGAGTGGAGCGCCCCGATTCAGGCACGTCGTCACTCTACCCGCGTACATAATCCGGCG +GTAGAAAAGCGTCTGGCGGCGATCACCGCCCAGGACAGCCAGCGTGCGAATGTCTATGAA +GTGCGTGCTGAAGCCCAGCGTGCGCGTTTTAAACTGCCAGCGTGGCCGACCACCACGATT +GGTTCCTTCCCGCAAACCACGGAAATTCGTACCCTGCGTCTGGATTTCAAAAAGGGCAAT +CTCGACGCCAACAACTACCGCACGGGCATTGCGGAACATATCAAGCAGGCCATTGTTGAG +CAGGAACGTTTGGGACTGGATGTGCTGGTACATGGCGAGGCCGAGCGTAATGACATGGTG +GAATACTTTGGCGAGCACCTCGACGGATTTGTCTTTACGCAAAACGGTTGGGTACAGAGC +TACGGTTCCCGCTGCGTGAAGCCACCGATTGTCATTGGTGACATTAGCCGCCCGGCACCG +ATTACCGTGGAGTGGGCGAAGTATGCGCAATCGCTGACCGACAAACCGGTGAAAGGGATG +CTGACGGGGCCGGTGACCATACTCTGCTGGTCGTTCCCGCGTGAAGATGTCAGCCGTGAA +ACCATCGCCAAACAGATTGCGCTGGCGCTGCGTGATGAAGTGGCCGATCTGGAAGCCGCT +GGAATTGGCATCATCCAGATTGACGAACCGGCGCTGCGCGAAGGTTTACCGCTGCGTCGT +AGCGACTGGGATGCGTATCTCCAGTGGGGCGTAGAGGCCTTCCGTATCAACGCCGCCGTG +GCGAAAGATGACACACAAATCCACACTCACATGTGTTATTGCGAGTTCAACGACATCATG +GATTCGATTGCGGCGCTGGACGCAGACGTCATCACCATCGAAACCTCGCGTTCCGACATG +GAGTTGCTGGAGTCGTTTGAAGAGTTTGATTATCCAAATGAAATCGGTCCTGGCGTCTAT +GACATTCACTCGCCAAACGTACCGAGCGTGGAATGGATTGAAGCCTTGCTGAAGAAAGCG +GCAAAACGCATTCCGGCAGAGCGCCTGTGGGTCAACCCGGACTGTGGCCTGAAAACGCGC +GGCTGGCCAGAAACCCGCGCGGCACTGGCGAACATGGTGCAGGCGGCGCAGAACTTGCGT +CGGGGGTAAAATCCAAACCGGGTGGTAATACCACCCGGTCTTTTCTCATTACAGCGACTT +CTTCCCACCATACTGCTTAAACCATTCCAGCATACGCTGCCAGCCATCTTCTGCAGATGC +GGCATGATAGCTCGGGCGATAATCAGCGTTGAATGCATGCCCGGCGTCCGGGTACACGAT +AATCTCTGCTTTCGCATTAGCAGCCCGCAGCGCCTGGCGCATGGTTTCAACGCTCTCCTG +CGGAATGCTGTTATCCTGACCACCATATAAGCCGAGAATCGGCGCGTTAAGATCGGTTGC +GATATCAACAGGTTGTTTCGGTGAATTCAGCGACTTGTCGCCCGTCAGTTTGCCGTACCA +CGCCACTGCGGCTTTTAGCTGTGGATTATGCGCGGCATACAGCCAGGTGATACGTCCACC +CCAGCAGAATCCGGTGATCATTAAACGATGAACATCGCCGCCGTTGCGGGAAGCCCAACT +GGCGACATGATCGAGATCGGCCAGCACCTGCGAGTCAGGCACTTTTGCTACCAGACCGCT +AAGCAACGTGGGGATATCGGCAAAATCATTCGGATCGCCTTCGCGGAAGTAAAGTTCAGG +TGCGATAGCCAGATACCCCTCCAGCGCCAGACGGCGACAAATGTCGCGGATATGTTCATG +CACGCCAAAAATTTCCTGCACTACAATGACCACTGGCAGTGGGCCATCGCTTTGCTTTGG +TCTGGCATGGTAAGCAGGCATGTTATCCCCTTGTGAAGGGATAGAGGTGAAGCCCGCCAC +AATCGCGTCGTCCGGGGTCTGAACGATGGTCGAAGCGAGAGGCGATGCAGCAGGTGCAAA +TCCAGATTGTTGTGTTGTTGCCATGGTATTCTCCGTACCCTTATAAAAATGTTGCGCAAT +GTTAACTATAGTCAGCATGCAACAAATCACATTGCCTGAATCGGCTCATCTTTTATGCAG +TCCTGCAGAATGAAGGGTGATTTATGTGATTTGCATCACTTTTGGTGGGTAAATTTATGC +AACGCATTTGCGTCATGGTGATGAGTATCACGAAAAAATGTTAAACCCTTCGGTAAAGTG +TCTTTTTGCTTCTTCTGACTAAACCGATTCACAGAGGAGTTGTATATGTCCAAGTCTGAT +GTTTTTCATCTCGGCCTCACTAAAAACGATTTACAAGGGGCTACGCTTGCCATCGTCCCT +GGCGACCCGGATCGTGTGGAAAAGATCGCCGCGCTGATGGATAAGCCGGTTAAGCTGGCA +TCTCACCGCGAATTCACTACCTGGCGTGCAGAGCTGGATGGTAAACCTGTTATCGTCTGC +TCTACCGGTATCGGCGGCCCGTCTACCTCTATTGCTGTTGAAGAGCTGGCACAGCTGGGC +ATTCGCACCTTCCTGCGTATCGGTACAACGGGCGCTATTCAGCCGCATATTAATGTGGGT +GATGTCCTGGTTACCACGGCGTCTGTCCGTCTGGATGGCGCGAGCCTGCACTTCGCACCG +CTGGAATTCCCGGCTGTCGCTGATTTCGAATGTACGACTGCGCTGGTTGAAGCTGCGAAA +TCCATTGGCGCGACAACTCACGTTGGCGTGACAGCTTCTTCTGATACCTTCTACCCAGGT +CAGGAACGTTACGATACTTACTCTGGTCGCGTAGTTCGTCACTTTAAAGGTTCTATGGAA +GAGTGGCAGGCGATGGGCGTAATGAACTATGAAATGGAATCTGCAACCCTGCTGACCATG +TGTGCAAGTCAGGGCCTGCGTGCCGGTATGGTAGCGGGTGTTATCGTTAACCGCACCCAG +CAAGAGATCCCGAATGCTGAGACGATGAAACAAACCGAAAGCCATGCGGTGAAAATCGTG +GTGGAAGCGGCGCGTCGTCTGCTGTAATTCTCTTCTCCTGTCTGAAGGCCGACGCGTTCG +GCCTTTTGTATTTTTGCGTAGCGCCTCGCAGGAAATGCCTTTCCAACTGGACGTTTGTAC +AGCACAATTCTATTTTGTGCGGGTAAGTTGTTGCGTCAGGAGGCGTTGTGGATTTCTCAA +TCATGGTTTACGCAGTTATTGCGTTGGTGGGTGTGGCAATTGGCTGGCTGTTTGCCAGTT +ATCAACATGCGCAGCAAAAAGCCGAGCAATTAGCTGAACGTGAAGAGATGGTCGCGGAGT +TAAGCGCGGCAAAACAACAAATTACCCAAAGCGAGCACTGGCGTGCAGAGTGCGAGTTAC +TCAATAACGAAGTGCGCAGCCTGCAAAGTATTAACACCTCTCTGGAGGCCGATCTGCGTG +AAGTAACCACGCGGATGGAAGCCGCACAGCAACATGCTGACGATAAAATTCGCCAGATGA +TTAACAGCGAGCAGCGCCTCAGTGAGCAGTTTGAAAACCTCGCCAACCGTATTTTTGAGC +ACAGCAATCGCCGGGTTGATGAGCAAAACCGTCAGAGTCTGAACAGCCTGTTGTCGCCGC +TACGTGAACAACTGGACGGTTTCCGCCGTCAGGTTCAGGACAGCTTCGGTAAAGAAGCAC +AAGAACGCCATACCCTGACCCACGAAATTCGCAATCTCCAGCAACTCAACGCGCAAATGG +CCCAGGAAGCGATCAACCTGACGCGCGCGCTGAAAGGCGACAATAAAACCCAGGGCAACT +GGGGCGAGGTAGTATTGACGCGGGTGCTGGAGGCTTCCGGTCTGCGTGAAGGGTATGAAT +ATGAAACCCAGGTCAGCATCGAAAATGACGCCCGCTCGCGGATGCAGCCGGATGTCATCG +TGCGCCTGCCGCAGGGAAAAGATGTGGTGATCGACGCCAAAATGACGCTGGTCGCCTATG +AACGCTATTTTAACGCCGAAGACGACTACACCCGCGAAAGCGCGCTACAGGAACATATCG +CGTCGGTGCGTAACCATATCCGTTTGCTGGGACGCAAAGATTATCAACAGCTGCCGGGGC +TGCGAACTCTGGATTACGTGCTGATGTTTATTCCCGTTGAACCCGCTTTTTTACTGGCGC +TTGACCGCCAGCCGGAGCTGATCACCGAAGCGTTGAAAAACAACATCATGCTGGTTAGCC +CGACTACGCTGCTGGTGGCGCTGCGCACTATCGCCAACCTGTGGCGTTATGAGCATCAAA +GCCGCAACGCCCAGCAAATCGCCGATCGTGCCAGCAAGCTGTACGACAAGATGCGTTTGT +TCATCGATGACATGTCCGCGATTGGTCAAAGTCTCGACAAAGCGCAGGATAATTATCGGC +AGGCAATGAAAAAACTCTCTTCAGGGCGCGGAAATGTGCTGGCGCAGGCAGAAGCGTTTC +GCGGTTTAGGAGTAGAAATTAAACGCGAGATTAATCCGGATTTGGCTGAACAGGCGGTGA +GCCAGGATGAAGAGTATCGACTTCGGTCGGTTCCGGAGCAGCCGAATGATGAAGCTTATC +AACGCGATGATGAATATAATCAGCAGTCGCGCTAGCCCATTGGGAGTAGTTAAGCCGGGT +AGAAATCTAGGGCATCGACGCCCAATCTGTTACACTTCTGGAACAATTTTTTGATGAGCA +GGCATTGAGATGGTGGATAAGTCACAAGAAACGACGCACTTTGGTTTTCAGACCGTCGCG +AAGGAACAAAAAGCGGATATGGTCGCCCACGTTTTCCATTCCGTGGCATCAAAATACGAT +GTCATGAATGATTTGATGTCATTTGGTATTCATCGTTTGTGGAAGCGATTCACGATTGAT +TGCAGCGGCGTACGCCGTGGGCAGACCGTGCTGGATCTGGCTGGTGGCACCGGCGACCTG +ACAGCGAAATTCTCCCGCCTGGTCGGAGAAACTGGCAAAGTGGTCCTTGCTGATATCAAT +GAATCCATGCTCAAAATGGGCCGCGAGAAGCTGCGTAATATCGGTGTGATTGGCAACGTT +GAGTATGTTCAGGCGAACGCTGAGGCGCTGCCGTTCCCGGATAACACCTTTGATTGCATC +ACCATTTCGTTTGGTCTGCGTAACGTCACCGACAAAGATAAAGCACTGCGTTCAATGTAT +CGCGTGCTGAAACCCGGCGGCCGCCTGCTGGTGCTTGAGTTCTCGAAGCCAATTATCGAG +CCGCTGAGCAAAGCCTATGATGCATACTCCTTCCATGTGCTGCCGCGTATTGGCTCACTG +GTCGCGAACGACGCCGACAGCTACCGTTATCTGGCAGAATCCATCCGTATGCATCCCGAT +CAGGATACCCTGAAAGCCATGATGCAGGATGCCGGATTCGAAAGTGTCGACTACTACAAT +CTGACGGCAGGGGTTGTGGCGCTGCATCGTGGTTATAAGTTCTGACAGGAGACCGGAAAT +GCCTTTTAAACCTTTAGTGACGGCAGGAATTGAAAGTCTGCTCAACACCTTCCTGTATCG +CTCACCCGCGCTGAAAACGGCCCGCTCGCGTCTGCTGGGTAAAGTATTGCGCGTGGAGGT +AAAAGGCTTTTCGACGTCATTGATTCTGGTGTTCAGCGAACGCCAGGTTGATGTACTGGG +CGAATGGGCAGGCGATGCTGACTGCACCGTTATCGCCTACGCCAGTGTGTTGCCGAAACT +TCGCGATCGCCAGCAGCTTACCGCACTGATTCGCAGTGGTGAGCTGGAAGTGCAGGGCGA +TATTCAGGTGGTGCAAAACTTCGTTGCGCTGGCAGATCTGGCAGAGTTCGACCCTGCGGA +ACTGCTGGCCCCTTATACCGGTGATATCGCCGCTGAAGGAATCAGCAAAGCCATGCGCGG +AGGCGCAAAGTTCCTGCATCACGGCATTAAGCGCCAGCAACGTTATGTGGCGGAAGCCAT +TACTGAAGAGTGGCGTATGGCACCCGGTCCGCTTGAAGTGGCCTGGTTTGCGGAAGAGAC +GGCTGCCGTCGAGCGTGCTGTTGATGCCCTGACCAAACGGCTGGAAAAACTGGAGGCTAA +ATGACGCCAGGTGAAGTACGGCGCCTATATTTCATCATTCGCACTTTTTTAAGCTACGGA +CTTGATGAACTGATCCCCAAAATGCGTATCACCCTGCCGCTACGGCTATGGCGATACTCA +TTATTCTGGATGCCAAATCGGCATAAAGACAAACTTTTAGGTGAGCGACTACGACTGGCC +CTGCAAGAACTGGGGCCGGTTTGGATCAAGTTCGGGCAAATGTTATCAACCCGCCGCGAT +CTTTTTCCACCGCATATTGCCGATCAGCTGGCGTTATTGCAGGACAAAGTTGCTCCGTTT +GATGGCAAGCTGGCGAAGCAGCAGATTGAAGCTGCAATGGGCGGCTTGCCGGTAGAAGCG +TGGTTTGACGATTTTGAAATCAAGCCGCTGGCTTCTGCTTCTATCGCCCAGGTTCATACC +GCGCGATTGAAATCGAATGGTAAAGAGGTGGTGATTAAAGTCATCCGCCCGGATATTTTG +CCGGTTATTAAAGCGGATCTGAAACTTATCTACCGTCTGGCTCGCTGGGTGCCGCGTTTG +CTGCCGGATGGTCGCCGTCTGCGCCCAACCGAAGTGGTGCGCGAGTACGAAAAGACATTG +ATTGATGAACTGAATTTGCTGCGGGAATCTGCCAACGCCATTCAGCTTCGGCGCAATTTT +GAAGACAGCCCGATGCTCTACATCCCGGAAGTTTACCCTGACTATTGTAGTGAAGGGATG +ATGGTGATGGAGCGCATTTACGGCATTCCGGTGTCTGATGTTGCGGCGCTGGAGAAAAAC +GGCACTAACATGAAATTGCTGGCGGAACGCGGCGTGCAGGTGTTCTTCACTCAGGTCTTT +CGCGACAGCTTTTTCCATGCCGATATGCACCCTGGCAACATCTTCGTAAGCTATGAACAC +CCGGAAAACCCGAAATATATCGGCATTGATTGCGGGATTGTTGGCTCGCTAAACAAAGAA +GATAAACGCTATCTGGCAGAAAACTTTATCGCCTTCTTTAATCGCGACTATCGCAAAGTG +GCAGAGCTACACGTCGATTCTGGCTGGGTGCCACCAGATACCAACGTTGAAGAGTTCGAA +TTTGCCATTCGTACGGTCTGTGAACCTATCTTTGAGAAACCGCTGGCCGAAATTTCGTTT +GGACATGTACTGTTAAATCTGTTTAATACGGCGCGTCGCTTCAATATGGAAGTGCAGCCG +CAACTGGTGTTACTCCAGAAAACCCTGCTCTACGTCGAAGGGGTAGGACGCCAGCTTTAT +CCGCAACTCGATTTATGGAAAACGGCGAAGCCTTTCCTGGAGTCGTGGATTAAAGATCAG +GTCGGTATTCCTGCGCTGGTGAGAGCATTTAAAGAAAAAGCGCCGTTCTGGGTCGAAAAA +ATGCCAGAACTGCCTGAATTGGTTTACGACAGTTTGCGCCAGGGCAAGTATTTACAGCAC +AGTGTTGATAAGATTGCCCGCGAGCTTCAGTCAAATCATGTACGTCAGGGACAATCGCGT +TATTTTCTCGGAATTGGCGCTACGTTAGTATTAAGTGGCACATTCTTGTTGGTCAGCCGA +CCTGAATGGGGGCTGATGCCCGGCTGGTTAATGGCAGGTGGTCTGATCGCCTGGTTTGTC +GGTTGGCGCAAAACACGCTGATTTTTTCATCGCTCAAGGCGGGCCGTGTAACGTATAATG +CGGCTTTGTTTAATCATCATCTACCACAGAGGAACATGTATGGGTGGTATCAGTATTTGG +CAGTTATTGATTATTGCCGTCATCGTTGTACTGCTTTTTGGCACCAAAAAGCTCGGCTCC +ATCGGTTCCGATCTTGGTGCGTCGATCAAAGGCTTTAAAAAAGCAATGAGCGATGATGAA +CCAAAGCAGGATAAAACCAGTCAGGATGCTGATTTTACTGCGAAAACTATCGCCGATAAG +CAGGCGGATACGAATCAGGAACAGGCTAAAACAGAAGACGCGAAGCGCCACGATAAAGAG +CAGGTGTAATCCGTGTTTGATATCGGTTTTAGCGAACTGCTATTGGTGTTCATCATCGGC +CTCGTCGTTCTGGGGCCGCAACGACTGCCTGTGGCGGTAAAAACGGTAGCGGGCTGGATT +CGCGCGTTGCGTTCACTGGCGACAACGGTGCAGAACGAACTGACCCAGGAGTTAAAACTC +CAGGAGTTTCAGGACAGTCTGAAAAAGGTTGAAAAGGCGAGCCTCACTAACCTGACGCCC +GAACTGAAAGCGTCGATGGATGAACTACGCCAGGCCGCGGAGTCGATGAAGCGTTCCTAC +GTTGCAAACGATCCTGAAAAGGCGAGCGATGAAGCGCACACCATCCATAACCCGGTGGTG +AAAGATAATGAAGCTGCGCATGAGGGCGTAACGCCTGCCGCTGCACAAACGCAGGCCAGT +TCGCCGGAACAGAAGCCAGAAACCACGCCAGAGCCGGTGGTAAAACCTGCTGCGGACGCT +GAACCGAAAACCGCTGCACCTTCCCCTTCGTCGAGTGATAAACCGTAAACATGTCTGTAG +AAGATACTCAACCGCTTATCACGCATCTGATTGAGCTGCGTAAGCGTCTGCTGAACTGCA +TTATCGCGGTGATCGTGATATTCCTGTGTCTGGTCTATTTCGCCAATGACATCTATCACC +TGGTATCCGCGCCATTGATCAAGCAGTTGCCGCAAGGTTCAACGATGATCGCCACCGACG +TGGCCTCGCCGTTCTTTACGCCGATCAAGCTGACCTTTATGGTGTCGCTGATTCTGTCAG +CGCCGGTGATTCTCTATCAGGTGTGGGCATTTATCGCCCCAGCGCTGTATAAGCATGAAC +GTCGCCTGGTGGTGCCGCTGCTGGTTTCCAGCTCTCTGCTGTTTTATATCGGCATGGCAT +TCGCCTACTTTGTGGTCTTTCCGCTGGCATTTGGCTTCCTTGCCAATACCGCGCCGGAAG +GGGTGCAGGTATCCACCGACATCGCCAGCTATTTAAGCTTCGTTATGGCGCTGTTTATGG +CGTTTGGTGTCTCCTTTGAAGTGCCGGTAGCAATTGTGCTGCTGTGCTGGATGGGGATTA +CCTCGCCAGAAGACTTACGCAAAAAACGCCCGTATGTGCTGGTTGGTGCATTCGTTGTCG +GGATGTTGCTGACGCCGCCGGATGTCTTCTCGCAAACGCTGTTGGCGATCCCGATGTACT +GTCTGTTTGAAATCGGTGTCTTCTTCTCACGCTTTTACGTTGGTAAAGGGCGAAATCGGG +AAGAGGAAAACGACGCTGAAGCAGAAAGCGAAAAAACTGAAGAATAAATTCAACCGCCCG +TCAGGGCGGTTGTCATATGGAGTACAGGATGTTTGATATCGGCGTTAATTTGACCAGTTC +GCAATTTGCGAAAGACCGTGATGATGTTGTAGCGTGCGCTTTTGACGCGGGAGTTAATGG +GCTACTCATCACCGGCACTAACCTGCGTGAAAGCCAGCAGGCGCAAAAGCTGGCGCGTCA +GTATTCGTCCTGTTGGTCAACGGCGGGCGTACATCCTCACGACAGCAGCCAGTGGCAAGC +TGCGACTGAAGAAGCGATTATTGAGCTGGCCGCGCAGCCAGAAGTGGTGGCGATTGGTGA +ATGTGGTCTCGACTTTAACCGCAACTTTTCGACGCCGGAAGAGCAGGAACGCGCTTTTGT +TGCCCAGCTACGCATTGCCGCAGATTTAAACATGCCGGTATTTATGCACTGTCGCGATGC +CCACGAGCGGTTTATGACATTGCTGGAGCCGTGGCTGGATAAACTGCCTGGTGCGGTTCT +TCATTGCTTTACCGGCACACGCGAAGAGATGCAGGCGTGCGTGGCGCATGGAATTTATAT +CGGCATTACCGGTTGGGTTTGCGATGAACGACGCGGACTGGAGCTGCGGGAACTTTTGCC +GTTGATTCCGGCGGAAAAATTACTGATCGAAACTGATGCGCCGTATCTGCTCCCTCGCGA +TCTCACGCCAAAGCCATCATCCCGGCGCAACGAGCCAGCCCATCTGCCCCATATTTTGCA +ACGTATTGCGCACTGGCGTGGAGAAGATGCCGCATGGCTGGCTGCCACCACGGATGCTAA +TGTCAAAACACTGTTTGGGATTGCGTTTTAGAGTTTGCGGAACTCGGTATTCTTCACACT +GTGCTTAATCTCTTTATTAATAAGATTAAGCAATAGCATGGAGCGAGCCTCACCATCGGG +TTCGGTGAAAATGGCCTGAAAGCCTTCGAACGCGCCTTCGGTAATAATCACCTTATCACC +CGGATAAGGGGTTGCCGGATCGACAATGTCTTTCGGTTTATATACCGATAGCTGATGAAT +AACCGCCGATGGGACTATCGCTGGCGACGCGCCAAAGCGCACGAAGTGGCTGACACCGCG +GGTCGCGTTGATAGTCGTGGTATGAATCACTTCTGGGTCAAATTCCACAAACAGGTAGTT +GGGGAACAATGGCTCACTGACTGCAGTACGTTTTCCACGCACGATTTTTTCCAGGGTGAT +CATCGGTGCCAGGCAATTCACAGCCTGTCTTTCGAGGTGTTCCTGGGCACGTTGAAGTTG +CCCGCGCTTGCAGTACAGTAAATACCAGGATTGCATAATGACTCTTATCCGTTTAATCGG +GGCGCAAGGATAGCAAAAGCTTTACGCTAAGTTAATTATATTCCCCGGTTTGCGTTATAC +CGTCAGAGTTCACGCTAATTTAACAAATTTACAGCATCGCAAAGATGAACGCCGTATAAT +GGGCGCAGATTAAGAGGCTACAATGGACGCCATGAAATATAACGATTTACGCGACTTCTT +GACGCTGCTTGAACAGCAGGGTGAGCTAAAACGTATCACGCTCCCGGTGGATCCGCATCT +GGAAATCACTGAAATTGCTGACCGCACTTTGCGTGCCGGTGGGCCTGCGCTGTTGTTCGA +AAACCCTAAAGGCTACTCAATGCCGGTGCTGTGCAACCTGTTCGGTACGCCAAAGCGCGT +GGCGATGGGCATGGGGCAGGAAGATGTTTCGGCGCTGCGTGAAGTTGGTAAATTATTGGC +GTTTCTGAAAGAGCCGGAGCCGCCAAAAGGTTTCCGCGACCTGTTTGATAAACTGCCGCA +GTTTAAGCAAGTATTGAACATGCCGACAAAGCGGCTGCGTGGTGCGCCCTGCCAACAAAA +AATCGTCTCTGGCGATGACGTCGATCTCAATCGCATTCCCATTATGACCTGCTGGCCGGA +AGATGCCGCGCCGCTGATTACCTGGGGGCTGACAGTGACGCGCGGCCCACATAAAGAGCG +GCAGAATCTGGGCATTTATCGCCAGCAGCTGATTGGTAAAAACAAACTGATTATGCGCTG +GCTGTCGCATCGCGGCGGCGCGCTGGATTATCAGGAGTGGTGTGCGGCGCATCCGGGCGA +ACGTTTCCCGGTTTCTGTGGCGCTGGGTGCCGATCCCGCCACGATTCTCGGTGCAGTCAC +TCCCGTTCCGGATACGCTTTCAGAGTATGCGTTTGCCGGATTGCTACGTGGCACCAAGAC +CGAAGTGGTGAAGTGTATCTCCAATGATCTTGAAGTGCCCGCCAGTGCGGAGATTGTGCT +GGAAGGGTATATCGAACAAGGCGAAACTGCGCCGGAAGGGCCGTATGGCGACCACACCGG +TTACTATAATGAAGTCGATAGTTTCCCGGTATTTACCGTGACGCATATTACCCAGCGTGA +AGATGCGATTTACCATTCCACCTATACCGGGCGTCCGCCAGATGAGCCCGCGGTGCTGGG +TGTCGCACTGAACGAAGTGTTTGTGCCGATTCTGCAAAAACAGTTCCCGGAAATTGTCGA +TTTTTACCTGCCGCCGGAAGGCTGCTCTTATCGCCTGGCGGTAGTGACAATCAAAAAACA +GTACGCCGGACACGCGAAGCGCGTCATGATGGGCGTCTGGTCGTTCTTACGCCAGTTTAT +GTACACTAAATTTGTGATCGTTTGCGATGATGACGTTAACGCACGCGACTGGAACGATGT +GATTTGGGCGATTACCACCCGTATGGACCCGGCGCGGGATACTGTTCTGGTAGAAAATAC +GCCTATTGATTATCTGGATTTTGCCTCGCCTGTCTCCGGGCTGGGTTCAAAAATGGGGCT +GGATGCCACGAATAAATGGCCGGGGGAAACCCAGCGTGAATGGGGACGTCCCATCAAAAA +AGATCCAGATGTTGTCGCGCATATTGACGCCATCTGGGATGAACTGGCTATTTTTAACAA +CGGTAAAAGCGCCTGATGCGCGTTTGTTTTGCCCTATTTATCGATCCGACAGAGAAAGCG +CATGACAACCTTAAGCTGTAAAGTGACCTCGGTAGAAGCTATCACGGATACCGTATATCG +TGTCCGCATCGTGCCAGACGCGGCCTTTTCTTTTCGTGCTGGTCAGTATTTGATGGTAGT +GATGGATGAGCGCGACAAACGTCCGTTCTCAATGGCTTCGACGCCGGATGAAAAAGGGTT +TATCGAGCTGCATATTGGCGCTTCTGAAATCAACCTTTACGCGAAAGCAGTCATGGACCG +CATCCTCAAAGATCATCAAATCGTGGTCGACATTCCCCACGGAGAAGCGTGGCTGCGCGA +TGATGAAGAGCGTCCGATGATTTTGATTGCGGGCGGCACCGGGTTCTCTTATGCCCGCTC +GATTTTGCTGACAGCGTTGGCGCGTAACCCAAACCGTGATATCACCATTTACTGGGGCGG +GCGTGAAGAGCAGCATCTGTATGATCTCTGCGAGCTTGAGGCGCTTTCGTTGAAGCATCC +TGGTCTGCAAGTGGTGCCGGTGGTTGAACAACCGGAAGCGGGCTGGCGTGGGCGTACTGG +CACCGTGTTAACGGCGGTATTGCAGGATCACGGTACGCTGGCAGAGCATGATATCTATAT +TGCCGGACGTTTTGAGATGGCGAAAATTGCCCGCGATCTGTTTTGCAGTGAGCGTAATGC +GCGGGAAGATCGCCTGTTTGGCGATGCGTTTGCATTTATCTGAGATATAAAAAAACCCGC +CCCTGACAGGCGGGAAGAACGGCAACTAAACTGTTATTCAGTGGCATTTAGATCTATGAC +GTATCTGGCAAACCATGCCCGATGCGACGCTGTCGCGTCTTATCGTGCCTACAAATAGTC +CGAACCGTAGGCCGGATAAGGCGTTTACGCCGCATCCGGCAATTGGTGCATGATGCCTGA +TGCGACGCTGGCGCGTCTTATCAGGCCTACATTGGTGCCGGATCGGTAGACCGGATAAGG +CGTTCACGCCGCATCCGGCAAGTGGTTAAACCCGCTCAAACACCGTCGCAATACCCTGAC +CCAGACCGATACACATCGTCGCCAGACCAAACTGAACGTCTTTGCGTTCCATCAGATTCA +GCAGCGTGGTGCTGATACGCGCACCGGAACAACCCAGCGGATGACCCAGCGCGATCGCGC +CACCGTTGAGGTTGATCTTCTCGTCAATCTGCTCAATTAGTCCCAGATCTTTAATACATG +GCAGGATCTGCGCGGCAAAGGCTTCGTTCATTTCAAACACGCCGATATCGCTGGCAGAAA +GCCCCGCTTTTTTCAGCGCCAGTTTCGAGGCCGGAACCGGGCCGTAACCCATAATCGATG +GGTCACAACCAACGACCGCCATCGAACGCACACGAGCGCGCGGCTTAAGACCTAATTCAT +GGGCGCGGCTTTCACTCATCACCAGCATGGCAGCTGCGCCATCGGAAAGTGCAGAAGATG +TGCCCGCCGTTACCATACCGTTTACTGGATCAAACGCCGGACGCAGCGTGGCGAGGGCTT +CCACGGTGGTTTCCGGGCGAATCACTTCGTCGTAATTAAACTGCTTCAGGACGCCGTCGG +CATCGTGACCACCGGTCGGGATGATTTCATTTTTAAATGCGGCCGACTGCGTGGCGGCCC +AGGCGCGGGCGTGTGACCGCGCGGCAAAGGCATCCTGCATTTCACGGCTGATACCGTGCA +TACGCGCCAGCATTTCTGCCGTTAAGCCCATCATGCCCGCCGCTTTGGCGACATTGCGGC +TCAGGCCGGGGTGAAAATCGACGCCGTGACTCATCGGCACATGGCCCATATGCTCCACGC +CGCCAACCAGACATGCCTGCGCATCGCCAGTCATGATCATTCGTGCTGCGTCATGCAGTG +CCTGCATGGATGAACCACACAAGCGATTAACGGTAACCGCCGGGACAGAGTGTGGTACTT +CTGCCAGCAGCGCCGCGTTACGGGCGATATTAAAACCCTGCTCCAGCGTCTGCTGCACAC +AACCCCAGTAAATATCGTCGAGGGCCGCCGCTTCCAGCGCCGGGTTACGCGCCAGCAGGC +TACGCATTAAATGAGCGGAGAGATCTTCTGCACGCACGTTACGAAAAGCACCGCCCTTCG +AACGGCCCATCGGGGTGCGAATTGCATCGACAATGACAACCTGTTCCATTGTGACTCCTT +AAGCCGTTTTCAGGTCGCCAACCGGACGGGCTGGCTCAACCGGAGGATAGTACGGTTCGT +TATGACGCGCTTTATTACGCAGACCTTCCGGCACTTCATACAGCGGGCCGAGGTGCTGAT +ATTGCTGTGCCATATCGAGGTATTTTGCGCTACCGAGGGTGTCCAGCCAGCGGAACGCGC +CGCCGTGGAACGGAGGGAAGCCCAGGCCGTAGACCAGCGCCATATCCGCTTCCGCCGGAG +TGGCGATAATGCCTTCCTCCAGACAGCGCACCACTTCGTTGACCATCGGGATCATCATGC +GGGCGATAATCTCTTCTTCGCTGAAATCGCGCTTCGGCTGGCTCACTTCTGCCAGCAGGT +CTTCAACGGCGGCGTCTTCTTCTTTCTTCGGCTTACCTTTGCTGTCTTCTTTATAACGCC +AGAAACCGAGGCCGTTCTTCTGACCAAAGCGGTTGGCATCAAACAGCGCGTCGATGGCAT +CGCGGTAATCTTTCTGCATCCGCTGCGGGAAGCCTGCTGCCATGACAGCCTGAGCGTGAT +GCGCGGTATCAATGCCCACAACGTCCAGCAGATATGCCGGGCCCATCGGCCAGCCAAACT +GTTTTTCCATCACTTTGTCGATCTTGCGGAAATCCGCGCCGTCGCGCAGCAGCTGGCTGA +AACCGGCGAAATACGGGAACAGCACGCGGTTAACAAAGAAGCCGGGGCAGTCGTTAACCA +CAATCGGCGTCTTGCCCATCTTGCTCGCCCAGGCGACAACTTTCGCGATGGTTTCGTCGG +AGCTTTTCTCGCCGCGAATAATTTCTACCAACGGCATTCGGTGGACCGGGTTAAAGAAGT +GCATCCCGCAGAAGTTTTCCGGGCGTTCCAGCGCGTTGGCCAGTTCGCTGATAGGAATGG +TTGAAGTGTTAGACGCCAGCACGGTATCCTGGCGTACTTTTTGTTCGGTTTCTGCCAGTA +CGGCTTTTTTCACTTTCGGGTTTTCAACAACCGCTTCTACCACAATATCCACGCGGTCAA +ATCCGGCGTAGTCGAGCGTTGGGTGGATTGTGGAGATCACGCCAGCCAGTTTCAGACC +>NODE_11_length_39995_cov_63.1687_ID_21 +CTGCCCAAACTGGTGCAGGCGGCGAACGGGGCAGCCCTCGCGCAGCTTCAGGTCAAGCTG +TCGAACAACGACTACACCCCCGCCCGCACCCTGCAAGACGCCCAGCTCGCGCTCGCCAAC +GCCCAGCGCACCCTGAACGACGCGACGCGGGCGAGCAGCACCGGGGTCAGCGACGCCTAC +CGCGCCGTGCAAAACGCCCAGCAGCAAGTGAACATCGCCCGGCAGCAGGCGACCAACGCG +CAGACCGCGCTCACCCAAGCGCAGGCCCGCCTCAAGGCCGGCACCGCCGCCGCCGTCGAA +GTTCAGCAGGCGCAGGTGCAGGCGCAACAGGCGCAACTCGGCGTGCAGCAGGCGCAAGAC +GGCCTGTGGCAGGCGCTCGCCGCGCTCGGGGCCGCGAGCGGGACGGACGTGACCGGGCTG +GTGAAATAGTGGGGACTGGTGAAATAGTGGGGGCTGGTGAAGTAGTGGGCGGCCCTCTTC +AGATGGCGCGGCGGGTGACCAGAGGAGCCGCGCCGCGCCTGGCCGGTGCCCTGCTTCTCA +CGGCGCTGCTTGCCGGGTGCTCGCCCAAGACCGAAACGAAAACCAACGACCTCGACGCCG +CGCTTCCCAAGACGACCACCCTCAGCGTGCAGACGGTGACGGCGAGGGCGGGCACGCTGA +CCGCCCAGCGCAGCGCGAGCGCGACCATTCAGGCCGAGCGCGACAGCCAGGTGGCGACCC +AGTCCTCCGGCGCCGTGCAGAGCATTCCCGTGAGTCAGGGCGAGGCGGTGGCGAAAGGCG +ACGTGCTCGTCAAACTCGACGACACCGCTCAACAGCAAGCGCTCGACAACGCCCGGCTGC +AACAGCGCCAGGCCCAGATCAGCCTCGACCAGACGCGGCAAAGTACGTCCCAGGGGACCG +GCGCCCTGCAGGCGAGCGTGACCTCGGCGCAGGCGGCGCTCGCGCAGGCCGAGCAAAACG +CCCAGAGCGCCGAGAAGCTCTACGGCCTCGGCGGCATCAGCCTCGCGGACGTGCAGGCGG +CCCGCTCGCAGCTCGCGCAGGCCCAGGCGCAACTCGCGCAGGCCCGCAATACCCTGGAGC +AAAACGGGCGCAGCGCCGGCAACTCGGTGCCGCTCGCGCAGGTGCAGCTCGACACGGCCC +GCACTGCCGTGCGCCAGGCCGAGCAAAACCTCAGCCGCACCGCCGTCCGTGCCCCCTTCG +CTGGTACGGTGGCCGACGTGCTGACCGAGGTCGGCGAGTTCGCCGGGCAGGGCACCCCGG +TCATCCGCCTCGTGGACCCCGGCAGCGTCCGCGCCCGCGTGGGCGTGCCCACCGCCGACG +CCGCCGCGCTCACCGAGGGCGTCAAGTTCAACCTCAGCTACGGCGGCAAAAGCTACGTGG +CGACGGTGGTAGACAGCTCGGGTATCGCCGGCAAAGACCGCCTGGTGCCGATTACCGCGA +CCATTGAGGGCGGCAATGCTCTGCCCGTCGGCGCCGCCGCCCGCGCCAGCTACCGCGCCA +CACTCGGCAGCGGCCTGCTGATTCCGGCGAGTGCCCTTCAGGTGGAAGGCGGTGAAAACG +CCGTCTACGTCGCCCGCAGTGGCAAGGCCGAGCGCGAAGTCGTGCAGGTCGTCGCCGAGA +GCGGCAACCGGGTGGTGGTGTCGGGCCTTCAGGACGGCGACGCGGTCATCAGCCCCCTGC +CCGCCGGGGTGCAGGACGGGGCGAAGGTGGTGGTGAAGTGAGGGCCATAGCGCACAGAGT +CGAGGGCCGGGGGGGAGAAAACCCCTCACCCCTTGCTGCGCAAGGCCCGCTGCTTCGCAG +CTTTGCAAGTCTCCCCTTGGGAGAGGGTCAACAGCGCAAAAACCTTAAGCCTTCTATGAC +CCTCGCCCCTTGCGGGAGAGGGCCTGCCGCAGGCAGGGGTGAGGGGGCCAGCCCGTCCTG +TCTTTCCCCACACGCCCCCCAGGTCACCCCATGAGCACCCACTTCGACGAAGCCGAATTC +AGGTCCGGCGGCACCCTGCCCGACGGCACACCCGAGCCGCAGATTCACCCGCTGGTGCGC +TTCAGCGTCAAGAACTACGTCTTTTCCATCGGCATCTTCGTGATGGTGGTCCTGCTGGGG +CTGGTGGCGACCTTCCGGCTGGGGGTCGAGCTGCTGCCCAACTTCGAGGTGCCGGTGCTG +GCGGTGAGCACGTCCTACCCCGGCGCCAATCCCGACCAGGTGGACCGCGAGGTCAGCCGC +CGGGTGGAAGACGCGGTGAGCACGCTCTCGGGCGTCACCGACATCAACACGACCTCGGTC +AGCAATCAGTCGGCGGTGGTTATCACCTTCAGCGACTCGACCAACATTGACTCGGCGGCC +AACTCGGTGTCGCAGGCGGTGGCGGCGATTCGCGGCACCCTGCCCGACGGCGCGGAGGCT +CCGGTGGTGCAGAAATTCGACCCCAACGCCCAGCCGATTCTGACGCTGGCGCTGCTCGGC +GGCGCGGCGCGGCCCAGCGAGGTGACGACCTTTGCCGAGGACACGCTGGTGCCCCGCCTG +CAACGGGTGGAGGGCGTGGCCGACGTGACCGTGACCGGCGGGCCCGAGCGGCAGGTGCAG +GTCTTGCTTGACCCGGCGCGGCTGCAAGGCTTTGACCTCGCGCCCGCACGAATCAGCGGG +GCCATCGGTTCCTCGGCGCTCGACCTGCCTGCCGGGACGCTCGACCGGGGCGGCTCGACC +ACCTCCTACAGCACCCGCAACACCCCGCGCAGCGCCGCCGACGTGGCGCGCATCGTGGTG +GACCCGTCCACCGGCCTGCGCGTGAGCGACGTGGCGACGGTGCGTGACGCCAGCGCGGCG +GCCAACAGCTACGCCCGCGTCAACGGGCAGCCCGCCGTGCTGCTCGGCGTCCGCAAGGCG +AGCGGCACCAACTCGGTGGCCGTGACCGACAACGTGCGCGCGGCGATGGAAGCCCAAAAG +CTCCCGCCGGGCTACCGCCTCACCCTCGCCAGCGACACCACGACCAGCACGCGGGCCACA +GTGAACGACACCTTCCGGGAATTCCTGATTGCGGTGGGCGCGGTAGGTCTGATTTGCCTG +CTGTTCCTGGGACGGCTCAACACCGTGTTCGCCGTCATTCTCGCCATTCCGATTTCCATC +AGCGCCGCGCCGCTGCTCTTCGGGACGCTCGGCTTTACCTTCAACATCATCACGCTGCTC +GCCATCATCGTCGCCATCGGCATCGTGGTGGACGACTCTATCGTGGTGGCGGAGAACGTG +CAGCGCTACCGCGACCTGGGCTACTCGCCGCTGCGGAGTGTGCTGCTCGGCGGCTCGGAA +GTGTTTTCCGCCGTCACCGCCGCGAGTTTCTCGCTGCTGGCGGTCCTCATTCCGCTGAGC +CTGATGCCGGGCATCCTGGGGCAGTTCTTCAAACAGTTCGGGCTCGGCATCGCCGCCGCC +ATCGTGCTGAGCTGGCTGGAAAGTCTGCTGTTCCTCACCGTCCGCATGGCGTACACCCGC +GAACCCGCCCGCATCACCTGGGCTGACCTGCCCGCTGTGCTGCGCCGGCTGCCGCTCACT +TTCCGCGAGTCGCTGAGCGGCGTGAAGACCTTCTGGGGCCTGCTCGGGCTGGCGCTGGCC +GGGGCCGCGACGTACTTTGGCCTGCACCGCGCCGGGCTGCCCGCTGCCGCCGCCGGGGTG +CTGTGCGTCCTCCTCGCGCCGGTCGTGCTGACTGTCGTCCGCTACCTCCTCACGGTGCTG +TATGCCCTGCTCGAAGCGCTGACCGAGACGCTCCACACCCTCACGCTGAGTGGCGTCAAC +CGCGCGGCCAAAGCCTACGCCCGCTCGCTGGCAGGCGCCCTGCGGCGGCCCGGCGTGGTG +ATGCTGGTCGCGGGCCTCTTTCTGCTCAGTGCGCCGCTCGCCCTGCGCGGGCTGGGCTTT +GCCTTCGTCCCGGCGAGCGACAGCGGCATTGCGACCATCAGCCTGACCCTGCCGGTGGGC +ACGCCGCTGGCCGTGACGGATGAGCTGACCCGGCAGGTGGAAGACAAGCTGCTCGCGCAC +CGTGAAGTCAAGCTGGTGCAGACCGCTTCGGGCAGCAGCGGGGTCCTCGGCGGTGCTAAC +GCCAACACCTCCGACCTCACGCTGACGCTGATTCCCAAAGCCGAGCGTCCCGGCATCGAC +GAGTTGCTGGAACGCTACCGCCGTGAACTTGCCCCGCTGGTCGCCCGCTACCCCGGCACC +GAATTGCTGGTGGCGGGGCAGGCGGTGGGGCCGGGCGACAGTTCGGACATCTCGCTCGCG +CTCACCGCCCCGAGTCAGTCGCTGCTCGAAGAACGCAACCGCGCGGTGGTCCGGCTGCTG +AGTGCCGACCCCAACATCCGCACGGTGAAAAGCAGCCTCTCGGCCACCCGGCAGGAGCGC +ACCTTCGTTCCCGACCCGCTGCGGCTGAGCGGCACGGGCCTGAGTGCGAGCGACGTGGCG +CAGGCGCTGCGAACCTACAACGACGGCACCGTGGCCGGGCAGGTCCGCGACGGCGACCGC +AGCGTGGACATCGTGGTGCGGCTCGACCCCGCGCTGGTGTCGGGCGAACAGAGTCTGCTC +TCGCAGACGCTCTACTCGCAGGCCCTCGGCGCCAACGTGCCGCTCTCCAGCCTGGGCCGC +TTCGAGGTCGCGCAGGCGCCCGCCACGCTGCGGCGCTTCAACAAGGCGTACACCGCCACG +CTCGACATCAACCTCGTGTCGGGCGGTCCCAACCCCTTCGCCTATCAGAAAGACGTGCAG +CAGCGCGTCGAAAAGGCCGGGCTGCTCGCGGGCGGCGTGACCCTCGGCAACGCCAACTCC +TTCGGCAGCGCGGGGCTGACCGGCGACCTGCTGTTCTACGGCCCGATTCTGATGGTGGCG +GCGGTGCTGCTCACTTATCTGGTGCTGGGCAGTCAGTTCAACTCGTTCCGTTACCCGATT +TATCTGTTGCTCCCGATTCCGCTCGCCATCGTGGGGGCGCTGTGGACGCTGCACCTGTTT +GGCGTCAATCTGGACGTGATTACGGTGCTGGGGATGGTGATTTTGCTCGGCCTCTCCACC +AAAAACTCCATCCTGTACCTCGAATTCGTGACCGAACGCGCCCGCGTCCTGCCGCTGCAC +GAGGCGCTGCTCGAAGCCGCCGAACTGCGGTTTCGCCCCATCCTGATGACCACGCTGACG +GTGCTGGTGATTTCCATTCCCCTCATCCTCGGCCACGGCGAGGGCGCCGAATTCCGCCGC +GGCCTGGGCATCGTGATTCTGGGGGGCGTGGTGACGAGCACCCTGCTGACCTTCTACGTG +GTGCCGAGCGTCTTCTGGCAGTTCGAGAAAAAGCGCATGGCCACGCCACAACCGAAATTG +GAGCCGGTGGCTGGAGACTGAACCCTCGCTGAAACCGTGAAGAAATGGCGCATGTGCGGC +CCTCTCCTACTAAGGAAGAGGGCCGCACGCTGTAGGGACGGGCTCATAAAGTTCTCTAAA +GCATTTGACAGAAAAAGACACCCTCACCCCTGCCTTCGGCAGGCCCTCTCCCATCAAGGT +AGAGGGTCAAAAATGCTCTAGAAAAGGCGAATCAGTGAACTTTATTGCGGCTGCAAGGGA +GCCAGTACAGCCCGGCGTTGGCCTCGTTCGGGTTGGTCGAGCCCCAGCACCAGCCCTTGG +CCCGGATGCGGCGCAGCGTGGCGTCACGTTTGTCGCACATGCTCTGGGTGCTTTTGGAAG +AGTGCGCCGACTGGCAGACCTTCTCCTGGGCGATGGCCTGATGAATCAGCGACCGGACGG +CGGGGTCGTGGGGCTGGTGGCTGGTGCCGATTGCGCCTGCACCAGCGGTTGAGGTGAGCA +GGGTCGCGGCGAGCAAGAGGGGACGCAGAGTGTTCATGGCTGCACTGTGCGGGGGCAAGG +TTGGAGGCGCGTGAAAAGTGACTTCAGGGGTTAGCGACCGCCGCTCATGTGCGGGAGGTC +TTCCCGTTCGGCCCTCAGCCCCCTCGTTACTGCTCACGGAGGGGCTTTTGCATTTGCCCC +CCGCCGTGTATATGCATAGAATATTCAGAATGCTGGGCAAGGAAATCGGAAAAGACCAAC +GACAAAAGCGCATTCAGGACATCATCCTGCGGGAGAGTGTGTCCACGCAGGCCGAACTGG +TCAAGCTGCTGGCGAAAGAAGGCGTGCAGGTCACGCAGGCCACCGTCAGCCGCGACATCA +ACGAGCTGCGGCTGGTGCGGGTGCCCATCGGCAAGGGGCGGCACCGCTATGCCCTGGCGC +AGTACGGCGGCGACAGCGACATCGAGGAGCAGCTCGCCCGCCTCTTTCAGAGCTTCGTGC +AGGACGTGGACCGGGGCGAGAACATCCTGGTCATCCGCACCGCCGACGGGCACGCCTCGG +GGGTCGCGCTGCTGCTCGACCGCTGGAAGCGCGACGACATCGTGGGTACGCTGGCGGGCG +AGGACACCATCATGGTTGTGGCCCGCTCCACCCATGACGGCGAGAGCCTGATGGAAGAAT +TCAACGCGCTGATGCTGGGGTAACAGCTCTCACAGCCCAGTCCAGAGCACAAAAAAGGCC +CCCTTCCACACCTGGGAAGGGGCTTTTTCCTGGGCTGTTTCGGGCCGCTTATTCTTCGCG +CAGCACGTAGCCGACGCCGCGCACCGTGTGAATCAGACGCCGCTCTCCGCCTTCTTCGAG +CTTGCGGCGCAGGTAGCCGATGTACACGTCCACCACGTTGCTGCCGCCGGTATACTCGGG +CCAGACCTTTTCCTCGATTTCAAAGCGCGAAAAGACCTTACCGGGGTTGCGGGCGAGCAG +TTCGAGCAGTTCGAATTCCTTCGCCGAAAGTTCGACCCGGCGTCCGCCCCGGAAAATCTC +GCGTCCATCGAGGTTCATCACCAGGTCGGCCACCCGCACTTCGCCCGTCACGGCGGGGTT +GACTCGCCGCAGATGGGCGCGGACGCGGGCCAGCAGTTCCTCGATGGAAAAGGGCTTGAT +GAGGTAGTCGTCGGCGCCCGAGTCCAGGCCCTCCACCTTGTCCTGAATGCCGTCCTTGGC +GGTCAGGATGATGATGGGGGTGTTGCTCGTCTTGCGGATGCGGCGGGCGACCTCAAGGCC +GTCGAGGACCGGCAGCATCAGGTCGAGAATCACCAGATCGGGGTTGACTTCGCGGAATTT +CGACAAGCCGGTCACGCCGTCAAAGGCCACCTCGGTGGCGTAGCCCTCGGCGGCCAGTTC +CAGCTCGATGAAGCGGGCAATATCTTTTTCGTCTTCGATGACGAGGACGAGCGGCTTGCG +TTCCATGCCCGCAGTGTAATGAGTGCTCTCATGAGAAGGCGGCGCCCCGTCCTTAAGTTC +TATTCATCACAAATGCTGGCGGGGGCGGGCAGCGCTGGGCCGTCACGCGGGTCACAGGTG +AGCGCCGTACACTGCCGGACGTGACCACGGGCCGTTCCACTTCTTCGCCTTCCGCCTCGC +CGGGCCAGGGCACGTCGGCGCAGGACCCGGCCCCCCGGCGCTGGCTGCTGCGGCCCTCGT +CGCTGCAATTGCGGCTGACGTTGCTGTACGCGGCGCTGCTGGCGCTGATGCTCGCCTCGG +TGTACGGTGCCGCGCTGGTGCTCATGCGCAGCAGCTTGATTACCGGGCTTGACGAGGGGA +TGCGCAACACTTACAGCCAGTTCAGCGAGCTGGTGGCGCAACTTGCCCTTGACTCGCCCA +CCACCGAGCAAGAGCGCGACAAGGAAGGCGTGCTGCCCCGCGCCCGCACGCTGTTTCCCA +ACGACGCGATTCAAATCGAGAAGCTGCCTTTCGTGGACCACGACCGGCTCCTGACCCGCC +TGAGCGACGCCCAAAAGCCCGCGCAGCAGCGTCAGGAATTGCAGGTGGTGCGCTCGCAGC +TCACGAAGTACCGCTACCCGGTCACGGTCAACCGTGCTTCTCCGCTCGAACTCAGCGACG +CCGAGCTGCTCGACCTTATCGAGTCGCCCACGGGCCGCATCTTCATCGCCCGGCAGATTC +GTGAGCCCTACAGCGACAAGACGGTGCCCTACCGCATCCTCGTGACATTGGCGGAGGTGC +AGTACGCGCCGCGTCCCCTCGCTTCGCTACGCGACGGCGACGTGACGGTGGATTTCATGC +CGCCGCCGGTGCTTTCGATTATTTACGTAGGGCGCAGCGTGGCGGGCATCGAGGACACGC +TGGGGCGGCTGCAACGGGTGTTCGCCATCGTGATGCTCTTCGGTGCTCTGCTCGCGGGCA +CGCTGGCTTACGTGCTGGCGGGCCGGGCGCTGCGGCCCCTGCAAGAAGTGCGCCAGGCTG +CCGAGCGCATCGGGGGACAGACGCTGACCGAGCGCGTGCCCGAGCCCCAGACCGGCGACG +AGGTGCAGGCCCTCGCCCGCTCGCTCAACGCCATGCTGGGGCGGCTGGAAGCGAGTTTCG +AGGCGCAGCGCCGCTTTACCAGTGACGCCAGCCACGAGCTGCGGACGCCCGTCACTGCCA +TCAGCGGGCACGCGAGTTACCTGCTGCGCCGCACCAATCCCGGCGGGCAGGAGCGCGAGA +GCCTCAACATCATCCGTTCAGAATCCGAGCGGCTGACCAACCTGATTACCAGCCTGCTGC +AACTCGCCCGCTCCGACAGCGGGGCCCTGACCCTGAACCCCGCGCCGATTTTCTCGCGCC +TCTTTCTGGACGACGTGAGCCGCGAACTCGCTCCGCTGGCGACCGGGGGCTCCGAGCTGC +GGGTCAGCGGTCCCGACATTCCTTTCGAGGGCGACCCCGACCGGCTGCGGCAGGTCATTA +TCAACTTGGTCGGCAACGCGCTCAAGGCCGGGGCCAAGACGGTCACGCTGGAGAGCAGCT +CGCAGGAAGAGGGCCGCGAGGTCCGCCTGAGCGTGCGCGACGACGGCCCCGGCATCCCGG +CCGAGCACCTCTCGCGTCTCTTTGACCGCTTCTACCGCGTGGAAGACAGCCGCAGCCGCG +ACCAGGGCGGCGCGGGCCTGGGCCTGAGCATCGCGCACAGCATCGTGGACGCCCACGGCG +GGCGCATCTGGCTGGAGAGCGAGGTCGGGCGCGGCACCGTGGCGCACGTCCAGCTCCTGG +TGGGCGACGTGCCGGTGCTGGACGAAGACGACGTGCCGTAGGGAGACGGGGGAGAGGCAA +CTTCCCAGCTTCTCCATTGGTACCTCTGCGCCCCCGGCCACCTTCCCCCGCCCGCGCCCG +CCCTAGACTCGGCGGGTGCGGCGTTTTCCTGTCCTGTTTCCTCTGCTTTTGCTGCTGGCG +CTGACGGCCTATCTGCTGCCCGCCCGGACGCCGCAGAACGTGGAGGCCCCGCCGCCCGCC +GCCACGACGACGCTGCCACAGGCGCTCCCGACCGCCGAGCGCGAGCTGTTCGCCTCGCTG +CGTCCGGCGGTGGTGCGGGTGGACAGCGTGAACACGGCGACGCGCACCGGGGGCCTGGGC +ACCGGCTTTTTCATCAGCGGGGAGGGGCAGGTCCTGACCGCCTACCACGTCGTCAAGGCG +GGGCAACTCTTCGCAGTGACGACCCTGGCGGGCAAGACCTACCCGGCGCGGGTCGCGGCT +TTCGACGAGGCGGCGGACGTGGCCCTCTTGCAAGTCGCGCGCGGCGGCCCCTTTCCCTAC +CTTGAACTGGCGAGCGGGACACCGCAGGTGGGCGAACGGGTGCTCGCCATCGGCAACAGC +GGCGGCGACTTTTTGCAGCCCCGGCGTGGCGAACTGCTGCGGTTGAACGCCGAGGCGGGC +CGCAGCGATTTCCCCGAGGGCACGCTGGAAATGAACGCGCCGCTCGCGCAGGGCGACAGT +GGCGGGCCGATTTTCAATGAGCGCGGCGAGGTGCTCGGCGTGGTGAGTTACATCCGGGTG +AGTGGCGACGGCGTGACGCGGGCGAGCTACGCGGTGCCGGTGCCGGGCGGCGGCGACCTC +ATTCGCGGCCTGCAAGCGGGGGAGAGGCGCGAGTCGCCGCTGACGGCCCTCGTCGGCCTC +GCCTTCGACCAGATGCACAGCGGCCTGACCGACCCGCCCGGCGCCGTGGTGCTGCGCGTC +ACCCCCGGCAGCGCCGCCGCCCGCGCGGGCCTGCGCGGCTGCGTGGCCGACCGCCAGGGC +CAGCTCACCGGCCTGGGCGACGTGATTCTCAGCATCGGCGGCGTGCGGACGCCCGACAGC +GGCACGGCGCTCGACCAGGTCAAGCGGCTCAAGGTGGGCGACCAGGTGGACGTCGAATAC +CTGCGCGGAGAGCAGCGGGCGCACGCCACCCTCACCCTGCGCGCGCAGCCGGTGGGCAAA +ACACCGCTGAATGCCGAGCCCTGCACCCGTCAGTAATTAATTCAGGCGGCAAAGGTAGAG +AATCCCACAAGCCTTTTCCTATCCACGTTTGGCGCTGACCAGGCAAAATACAGAGATGTC +GAGAGCTTTTGTCAAGGAAGACGGCGGCGAACGCTGGACCCCACCCGCAAAGGCCGCCGA +TTACCGGGTGGTCTTTGAGACGCTGGACGGCCCCGAGACGGTGTACGAGGCCGATGACCT +GCTCGGCGCCCTGCGCTGGGCGGCGGCCCGCCCCGGCGCCGGCTTCGAGGTGCGCGGACG +AGACGGGCGGCTGCTGGCCGTGAGCTGAGCAGAAATGAAGTAGGCGGGGAGCTGAACACA +CGGCTCCCCGCCTTTCTGTTCTCTGCTCTTGCCTTACCAGTCGCCCTCCACCGTGAAGCG +CACCGCCCCCGCCAGCGCTTGCCCCGGCTCCAGCACCCGCAGGTCCACGCCGCCGACGCC +CCGCGCCGCGAGATTGAAAGCGTCGGTGGCGTGGCTCGTCGGCTCCAGCGCGAGGCTGCC +GTCGGGCGCGGCGAAGACGACGAGGTGCGAAAACACGTTGTCCGCGGTCAGGGTGAGGGC +GCGCTCGCCCCAGTCCAGCCGGGCGACGCCGTCCCACGCGGTGTACGTAGCGTCGGGCGA +ACGCGTGCCCACCGCTGAAGGCTGGCGGAAGTCTTCCTCAGGGTTGACCGGGCGGGCTCC +CCCCGTCGGCAGTTGCCGCTCGTCGGTGTCGTAGACGAGCGCGGCGTCAAAGTGCAGCGT +CGGGTCCACCCCGTCCTGAATCCGCTGGAAATACGGGTGCAGGCCCATTCCAGCGGGCAT +CGGGCGGGCGTCGGCGTTCGTCAGGGTCACGCTGATATCGCAGTGCGGCCCATGCAGGTG +GTACTCGACCTCGGCGCTGAGGTGCCAGGGCCAGTTGAAGTCAGGAAATTCGCGGCTGTC +GAAGGTGCAGCGCAGGTGCGCGCCCGTGACCCGCGTGACCTGCCAGGGCCGGTTCCTCAC +GTCGCCGTGCTGGGTGAGGCCGTCTTTGGTGGTGACGCGCAGTTGAACGTCCTCGCTGCC +GAAGGTCAAGCGGGCGTCCCGAATCCGGTTGGAAAACGGCAGCAGCGTAAAGCAAGCGCA +CTGGCTGCTCGTTTCGACCTGTTCCAGGTCCACTGGACGCAGCACCGGGCGACCAGATGC +GGAACGCAGATTCAGGATGCTGGCGCCGAGGTCCGGCAAAATCTCAAGTTCGAGCGCCTC +GCTGCGGATGGTTTCGGTGCGCCGGGTCACTTGTTCCCCCCGCTCTTTCCCCGGCTGCGG +GTCATTTCATAGAGCAGGATGCCTGCCGCCACCGAGGCGTTGAGGCTCTGGACCTGTCCG +CGCACCGGGATGCTGACCAGCACGTCGCATTTCTCGCGCACCAATCGGCGCATCCCTTCG +CCCTCGGCCCCGATGACGAGCGCCGCCTTGCCGGAAAAGTCGGTGCGGGTCACGTCCTGC +GCCGCCTCGCCCGCCGCGCCGTAGACCCACACGCCGTCGGCCTTGAGCTGGTCGATGAGC +CGGGGCAGGTTCTTGGTCTGGGCGACGGGCAGGTACGCGGTGGCCCCGGCAGCGGTTTTG +GCGACCACCGGGGACAGGGGAGCCGAGCGGCGCTCCTCCACGACCACGCCGTGCGCGCCG +AGCACCTCCGCCGAGCGGATGATCGCGCCGAAGTTGCGCGGGTCGGTCACGCCGTCGAGC +AGCACCATCAGCAGCGGTGCGCCACTCGCCTCGGCGCGGTCGAGGATGTCGTCCACCGTC +GCCCATTGCAACTCCTCGACCTCGGCCACCACGCCCTGATGCTGTGTGGTTCCGACGAGC +TGGTCGAGTTCGATGCGCGGCAGCCAGCGCACCCGCACCCCGGCGTCGCGGGCGAGGTCC +TGCACCTGCCGGGCAAAGGCGTCCTCGACCCCGCGCGCCAGTACCACCTCGGCCACCCGT +CCGGCGGGCAGGGCTTCGAGCACCGGGTTTCTTCCGTAAAGCAACATGTTCAGGAGTCTA +AGCTGCCAGGGGGGGGAGCCGGCCGAGCCGTTACTCTGTGCTCGTGAACGGTCTTGCCCT +CAACCTGATTCTGGCGCTGGTCTGGGCACTGTTTCTGGGCGAGGTCAGCCTGCGGAGCCT +GAGCATCGGCTTCCTGCTGGGCTTCGCGGTCCTGACCCTCTTTCACCGGGCGCTGGGCAG +CCGCGCCTATATCCGGGCGGTGGGCGGGGCGCTGCGGCTGACGGCGTACTTCGTGGCCGA +ACTGGTGCGGGCGAACTTCCAGATGGCGCGGCTCGCCCTGCGGCCCCACCCGCAACTTCA +CCAGATGATCGTCGCCGTGCCGTTGCGGCTGCGCGGCGAGGGGGCGCTGACCCTGCTCGC +CGCGCTGACGGGGCTGCTGCCAGGCACCGTCACGCTGGGCTTCTCGCCGGACCAGGGCAC +CATGTATATCCACGCGGCGGGTCTGGAGCAGCGCAGCGCCGTGCGCCGCAGTGTGCAGGA +AATCGAAACGCGGCTGTTGCAGATACTGGGGGAAAGGCAGGGCTCCAGCTGAAGACCCCC +GCATGAGGAACGCGCGACGAGGTGCCCCGGAGCGGAGAGAAGCGGAAGAGAGTTTTCCAG +CCCTAGCGCCCCTGCACCCACGTCACCAGGTCGCCCGCGCTTAGCGGCGGGGTCAGCGCG +TAGCCCTGCGCGGCGTCGCAGCCCAGGTCACGCAGCATGTCGAGCTGGGCGTAGGTTTCG +ACCCCCACCGCCGTGACCGTCAGGCCCAGGCTGTGCGCGAGGCTGACCGTGCCCTGCACC +AGCTTGAGCGATTTCTGGTCGCCCGGCAGCCGGGTGGTCAGGGTGGGGTGCAGCTTGACG +CCGTAGAGCGGAAACTGGGTCAGCGCGGTCAGGCTGCTCGCGCCGTCGCCGAAGTCGTCC +ACGATCAGCCGGGCACCGCGCGCGCGCAGGGCGTCGAGCAGGCCCAGGGTTTCCTCGCTG +TGGTCGAGCAGGCTGCTCGCACTCACCTCAATGTCCAGCGCACTCGGGCGGGTAATCAGC +GGCAGCAGGCGCTCGATGGCCCCTTCGCGCCGCAGCTCTTCCAGGCTGAGGTTGACGCTG +GTGCGCCAGTTGCGTTGCCCGGTGGCGTCGCGCAGCTGCGTGCGGCCCTGCACCGCCTCG +CGCACCACCCATTCGCCGATGGAGGTGATGAGGTCGCTGCGCTGCGCGAGGGGCAGAAAG +CGGGCGGGCGAGACGTTGCCGAGTTCGGGGTGTGTCCAGCGCAGCAGCGCCTCGGCGCCC +AGCACCCGGCCGTCTTTGAGGCTGACCGCCGGCTGGTAGAGCAGCGAGAACTGCTCGTGT +TGCAGGCCGCGGTCCAGCACCGCCCGCAACTGGTCTTCGAGCTCGAAGGTCTGGGCTTCC +TCGGCGCGCATGTCCGGCTCGAACACCGTCAGTTGCCCGCGTCCCTGGCGCCGGGCGTGT +TGCAGCGCCACCTCGGCGTCGGCCAGGGCTTGCTCGGGGGCCGCGGCCAGCAGCGGCGCG +GCCCCCAGCGAAAAGGTCACCGGCACCAGCCGCTTGCCGGTCCGCAGCGGTTCGCGCAGC +GCCGCCTGGAGCTTCTGGCCCCCCACGGCGGCCCCGAGCTGCGGCAGGTACACGGCGAAG +GTGTCGTCGGCCAGCCGCGCCGCCTGCCCGCCTTCCAGCTCCGCGAGGTCGTTGAGCCGC +GCCGCCACCTGAATAAGCAGCAGGTCGCCCGCCGTGCGGCTCAGCGCCGCGTTGAGGCCC +CCGAAGCCGTCAATGTCGAGGCAGGCGACCATCCCGCCCGTTTGTGCTGCCGTGGCCTGA +CCCAGCGCCGCGCGCAGCCCGCTGCGGTTGAGCAGGCCGGTCAGCCCGTCGTGCGAGGCG +TCGTGGCGCAGTTTGGCCTGGGCGCGGCGCAGGGTGGTCACGTCGCGCAGCGAGAGCAGC +AGCTCGGGCGCGGCGCTCCGGCCCGGGGAAGCGGCCGAGGAACCGGTGGACTGACCGCTC +CCCGGCAGGGCCGAGAGGCTGATTTCCAGATGCCGGGTCACGCCGCCGGGCAACACGGCC +AGCACCTCGCCGTTGTGCGGCAGGGTCAGCGCCGAGAGGGGGGGCACCGTCAGCGGGTTG +CCCTGCGGGGTGTGCAGCTTGATGCCGAGTTGCTGGTGCAGGCGCGGCAGCGGCAGCCCG +GTCAGGGCGATGTTGTGGGGGTCGAAGCCCAGCAGCCGCGCCGCCGGCTCACTGATGAGC +TGCACCCGGCCCTGCGGGTCGAGCAGTATGGTGGCCGCGCCGCTCAGGTGCAGCAGCCGC +GCCGCGCCGTTGTCCTCGCTCACCGCGGCGGGTCGGACACCCTCGGCGAGCAGCAGCCCC +GGCACGGTGTCGCTGCGGCGCACCAGCAGCGGCAGTTGCCCGCCGCGCGCCAGTGCAATC +TCGCTCGGCACCGGGCTGGGCCGCCCCGCCTCTGCGCTCTGAATGAGGTCCTGAAGCAGC +CGCGCGCCCGCCGGAGCCCAGCAGGGCCACTCGTGCAGGGGCCGCTGGGGGTGAGCCTGG +GCTTCGGCGAGGTCTTCGGGTTCCCCCAGCAGTTGACGCATCCCCCGGCTCGCCCCCTGC +ACCGCGCCGCCCGGTCCCAGCAGCGCCAGCGGCAAGTCGGTGCCGAGCAGCGCGGCGAAC +TGGGTCGTCTGGGTGTGCTCGGTGCTCACGTCCTGCACGGTCCACAGCACGCCCGCCGCC +GCGCCGCCGAAGTAAGGCCGCGCCGACCCACGCAGCCAGCGCTCCCCGCTGTGCCCCGGC +AGGGACGGCACCCGCTCGTCGCCGAGTTGCACGGCCCGCCCAGCCGCCGCGCTGTCGAGC +TGCTGCGCGAGCGCCGCGTGGTCGGGAAAAACCTCGTGGACGGTGCGGCCCAGCACATCC +GCCCCGGTCAGCCCGAACAGTTCCAGAAAGGTGCGGCTGACCTGTCGGAACTCCAGCTCG +GCGCTCAGCCACGCCGTTGCGGCGGGAAGCTGGGTGATCAGCATGTCGGCCTCGCGGCTC +GCCCCCTCGGCACTCGCCGCCGAGAGCAGCAGCGTGAGCACCTGCACCGCGCCGTCGGGC +ACCGGCGCCCCGTCGGACCACAGCAGTCCCAGCAGCGCTCCGTCGCGCGTGATCCAGGCG +AGGTCGCCGTGTTCGAGCCAGTCGTCGGGCGGCACCAGTTGCGCTTCCTCGGTGGGCGCG +CCCTCGGCCTGAATGTTCAAAATCCGTTGCCCGATGGCGGCCAGCAGGGTGGCCTGCGGA +GCGTGCAGCCGGATCAGGTCACGCAGGGCAATGGGCAACGCGGCATGGGCATCGGTCGGG +GCAGCGGGCATGGGGTTCAACCTCGGAAAAAGCGGGAAGCGGGAGAAACGGGCGCGTCGG +GGCGCCTTGAAATGAACTGCTGTTCTGAGCTTGCCACGTCGCCTCTTACGGTCTTCATGC +AAGCCAAAGAAATAGGAGGAAACTTGCTGCTGGGCGCTTAGAGCATTGGATACAAAAAGA +CCCCCTCACCCCTTGCTTCGCAAGGCCCTCTCCCACCAGGGTAGAGGGTCAAAAATCCAA +AGCATTCTTATGTCGAACACTCTAGGGACACCCATCGATCAACAGCAAAAGGCCCACCGC +CGCGTGTGCAGGGTGGGCCTTCTGAGGAGAGGAGAATTCAGGCGCTCGGTTTGTCCGACT +TGTCCTCGGCGGCGCTCTCTTCTTCGGTGCTGAGCGAAGCCTCTTCGATGGAGATACTCG +CCATCAAGACCACCTTGTCACCGTCCTGCTCGACCTCGACCTTGGAGTTGCCCGAGGGGA +AGTACTTTTGGACGACTTCCAGCAGGTCGTTGCGCAGGGCCTCGACCTTGCCGGGCGCGA +TCTTGGCGCGGTCATAGGCGAGCACCAGTTCGAGGCGGTCCTTGAGCGTTTCCTTGGAGC +GGCGTCCCTTGAACCAACCGAACATCAGCTCCCACCCCCGAACAGGCGGCGAATGGCGGC +CCAGATGCCTTTTTCTTCCTCGGTCAGCTTGGGAAAGGGCACGTCCTGACCCTGAATGCG +CTGCGCGGTCGCCATGAAGGCGTCGCCCGCCTTGGTCTTGCCGAGCACCGCCGGCTCACC +GACGTTGGTCGAGACGATGATGCCCTCGTCCTCAGGCACGATTCCGATGGGCTTGACGCC +CAGGATGTCCACCATGTCGTCGATGGAGAGCATGTTGCCGCTCGCCACCATCTTGGGACG +CAGGCGGTTGACGACCAGCCGAATCTCGGTGATCTGCTGCGCTTCGAGCAGGCCGATGAT +GCGGTCGGCGTCGCGCACCGACGACACTTCGGGGTTCACGACCACCAGGGCGCCCTCGGC +GGGGGCGGCGGCGGTGCGGAAGCCCGACTCGATCCCGGCGGGCGAGTCGATCAGCACGCG +GTCGAAGCCCTCTTCTTCCAGCAGGCCCTTGACGACTTCCTTGAAGACCTCGGGGTCCAG +GGCGTCCTTGTCGCGGGTCTGCGAGGCGGGCAGCAGGTGCAGGTTCTCCACGCGCTTGTC +GCGGATCAGTGCCTGATTCATGCGGCACTTGCCTTCCAGCACGTCCACCAAATCGAACAC +CACGCGGGACTCTAAGCCCATCACCACGTCCAGGTTCCGCAAACCCACGTCCACGTCGAT +CACAACGACTTTTTCACCCAGTCGGGCCAGGGCCGCGCCGATGTTGGCGGTGGTCGTGGT +CTTGCCCACGCCCCCCTTGCCCGACGTGACGACAATCACTTTGGCATCCATGCTTGGCGC +AGCATAGCAAGGCAACGTGAAAATGTAAGCCTGTGGGGGTGGAGGCCGGGGGAGGGGTTG +CGGCAGGTGTGCCGGGTGGGGACAGCGGCCCCGAATGCAGGCGCCGGGAAGCCGGTAGAC +TGCCGGGCAGCATGACCCGACTTTCCGGAAACAATCAGAACCGCAGCGTCCTGATCATCG +GCACCCTGGTCGCCGCCGCGCTGATCGCCCTCGCCCTGTTCGCCGTGCGCGGCAAGAGCA +CGACCACGACTGGCGAGGCGCAGACCTTCACCTACGCCAACTTGCCCTACGCGGGGCAGG +CGAACGCGCCGGTCAACGTACTGGTCGTCGAGGACTTCAAGTGCCCCAACTGCAAGTCCT +TCGAGGAAACGGTGGCGCCCGAACTGCGGACCAAATACGTGGGGACCGGCAAGGTCAAGA +TGTACTCGCTGGTCTACCCCTTCCTCGCCGACCGTCTACCCGAAGACGACAGCAAGTACG +CCGCCCAGGCCGCCCGCTGCGTCTACGCTCAGGGCAAGAACGACGCTTTCAATACCTACA +AGGAAATCCTGTTCCGCGCCCAGGGCCCGGAAACCGAAGTCTGGGCCACCAAGTCGCGCC +TGAAGGAGCTGGCGACCAGCCTCGATATCGACCAGGCCAAGTTCGCCACTTGCCTCGACA +ACGACGAAACCGCCGCGCAGGTCGAGACCGACAAGCAAGAAGCCCTCAAGGCGGGCGTGG +GCGGCACCCCCACCGTCTTCGTCAACGGCAAGCTGGTGAATGTCCAGAGCGATTACGTCA +AGGACATCTCGGCGGCCATCGACGAAGCCCTCAAGCCGTGAGCAGCGGGGCATGAACCGC +GACACCCGGTTGTACCTGGCCTGGTTGGTGGCCCTCGCCGCCACCCTTGGCAGCCTGTAT +TTCAGTGAGATTCGGCACTTCAACCCCTGCCCGCTGTGCTGGGCGCAGCGCATCTTCATG +TACCCGCTGGCCGTCATTCTGGGCATCGCGGCGTTTGTGGGCGACCACGGCGTGCGGCGC +TACGTGCTGCCGCTGGCGGCGCTGGGGCTGGGCTTCGCCATCTTCCAGAACCTCGAAACC +TGGGGTTTCGTTCAGTCCATCAAGGCGTGCACCGTCAACGCTGCCGCTGCCTGCAACACC +CCCTGGCCGGTGTGGGGAACCAGCCAGGACACGCTGAACCGTGCGCTCACCATCCCGGTG +CTGAGCATGATCGCCTTTGCGCTGATTCTGGCGCTGCTGAGCTGGCCCCGGCAGCGCGTG +ACCGTGCCCGAAAGCGCGGCAGTCCAGGGCTAAAGTTGCAACTCGAGCGCCCGGCCCCCT +ATACTTCTCCGGCCCGGTTTGCCTGCCAGCGTGCAGCGCGAGCCTGAAAAGGAGTACGAA +CGTTTCTGCCCGTGAAGAGCGGGACCTCGGAGACGGTCCGCTGCCCGAAGGAGACCCCCC +ATGCAGACCCACATCAAGATCAACCGCGGCGAGCTGCTGCGCGGCATCGAGCAGGACCAC +ACCCGCCAGCTTCCCGACTTCCGCCCCGGCGACACCGTGCGCGTGGACACCAAGGTGCGC +GAAGGCAACCGCACCCGCAGCCAGGCCTTTGAAGGCGTCGTCATCGCCATCAACGGCTCG +GGCAGCCGCAAGAGCTTTACCGTGCGCAAGATTTCGTTCGGTGAAGGTGTGGAGCGCGTG +TTCCCCTTCGCTAGCCCCCTGGTCAATCAGGTGACCATCGTGGAACGCGGCAAGGTCCGC +CGCGCCAAGCTGTACTACCTGCGCGAACTGCGCGGCAAGGCCGCCCGCATCAAGAGCGAC +CGCAGCCGCGTGATGAAAGACGCCGCCCGCGCCCAGCAGGACAAGGCGAACGCCAGCGCC +AGCCAGGCCGCTGCGGCTCAGGCCGACGTGACCGTCATCAGCGCCGCTCCCGAAGTGGCC +CCCGAAACCCAGGGCGAATAAGTCCAGGGCTCCCAAGCCCGCCTGCCTCCGCGTGAGGTG +GGCGGGTTTCTTTTGGCTGTGGGGTGGGCGTTTACAGGTAAGCCAGTTCCCAGTCCTTGG +CCCCGTGCGCCAGCCCTGCTCGCAGCAGCGCGAGGCCACTCGGCGCCGGGTCGCGCCAGT +GGGGGAGGCCACTCAACTCAGCCTCGAACTCGGCCAGCGGGCGGCGGGCGACGGGCTGCA +CGGTGTCTGTTACGGTGCCTGCGCTGACCTCATAGACCGCGCTATACACGTTGCCTTTGC +GGGCGTCGAGGCTCACCGCCTGCGGGCCGTCGCCGCGCACCAGACTTTCCAGGGTGGACA +CGCCGCGCACTTCGGCGCCCCACACGCGGGCGAGCCCCAGCGCGTAACTCGCGCCGACGC +GCACGCCGGTATAGGAGCCGGGGCCGGTGCCGATAACGATGGTCTGGGCCTGAAACGGCA +GCCCCGCCGCCCCGAACAGGGCACGGGTCTCGTCGGCCAGCCGCTCGGCGCTCGCCCGCC +CGACCTCCTGCACCCGCTCCAGCTCGCCGCCGGGCCAGCTCACGGCCAGCGTGAGGAAGG +GGGTGGCGGTGTCCAGGGCGAGGGTCACAGAAAGAGGGGTGACAGGGGCGGCGGAGACGG +TCATCGGGCGTATTAAAGCAGAGGCCGGGGCAGGCGCCCGTGTGCCCTGCCCGCTCTCAT +CTCTTTGTCACCCCGGCAGGGACGTTTGACCGCCCCTTTTTCGCCCCCTGGAAGAAGGGC +AGGTGTTATCCTCTGCTCACTATGTCGAACATTGCCAAAGGGCTTGAAGGTGTTCTCTTT +ACCGAGAGCAAGCTGACGTTTATCAACGGCTCGGAGGGCATTCTGACCCACCTGGGCATT +CCGATTCAGGAATGGGCCGAAAAGAGCACCTTCGAGGAACTCAGCCTCGCGCTGCTCGAC +GCCAAACTGCCCACCGCCGAGGAACTCGCCAAGTTCGACGCCGAACTCAAGGCCAACCGC +GCCATCCCCGATCAGCTCGTGGGCATCATCCGCGACATGCCCAAGGGCGTACACCCCATG +CAGGCGCTGCGCACGGCGGTCAGCTACCTGGGCCTGCTCGACCCTCAGGCCGAGGACATC +ACCCCTGAGGCCCGGCGCGCCATCAGCACCCGCATGATCGCGCAGTTCTCGACCATCATC +GCGGCGATCAACCGCGCCCAAGAAGGCCAGGACATCGTGGCCCCCCGCGCCGACCTGACC +CACGCGGGCAACTTCCTGTACATGCTGACCGGCAACGAGCCCACCCCCGAGCAGGCCCGT +CTGTTCGACATCGCGCTCGTGCTGCACGCCGACCACGGCATGAACGCCTCGACCTTCACG +GCGATTGCGACCAGCTCGACCCTCAGCGACATGTACTCCTGCATGGTGAGCGCCATCGGC +GCGCTCAAAGGCCCGCTGCACGGCGGCGCCAACGAAGCCGTGATGACCATGCTCGACGAA +ATCGGCACCGTGGACAAGGCCGAGGCGTACATCACGGGGAAGCTCGACAACAAAGAGAAG +ATCATGGGCGTGGGCCACCGCGTCTACAAGTACTTCGACCCCCGCTCGCGCGTGCTGCGC +GACTACGCCGAGCACGTCGCCAACAAAGAAGGCAAGAGCAACTACTACCAGATCCTCGAA +GCCATCGAGAAGATCATCGTGGACCGCATGGGCGCCAAGGGCATCTACCCCAACGTGGAC +TTCTACTCCGGCACCGTGTACAGCGACCTGGGCATCAAGAAGGAATACTTCACCCCCATC +TTTGCCCTGGCCCGCATCAGCGGCTGGTGCGCCAGCGTCATCGAGTACAGCCAGGACAAC +CGCCTGCTGCGCCCCGACGCCGAGTACACCGGCGCCCGCGACCAGCACTACGTGGACATC +AAAGACCGCCAGTAAGCGGAGAAGGTAAGAAAGGGGGCCGGAGCATGGCGCTGCGGCCCT +TTTTCTTTGCTGCCTGTTTCGCTTTACAGCAGCCTGCCACCATCATGCGCTTCGGGCGTC +TGCACCGGCACCCCCCGCGCATTCAGGGCCTCGCGCAGCATGGGGATGTTTTTGAGGGCA +TGCCCGCCAGTCGTGTTCTGGAAAAACACGTACAGCTCGGAGAGGTCACCGGCGACTGCC +GCAATTTTCTCGGCCCATTCGTCCATCTCGGCGCGGTTGTAGAGGTAATCGTGGCGCTCG +CCCGCGCTCTGGCCTTCCCACCAGGTTTCGCGGTTGCGCCCGTGCAGGCGCAGGTAGCCC +ACGTCGCCGGTTACGTGGACCTGGGGTTCGGGCATCCCACCCACTGGCGGGTAATCCGGG +CTGACCCAGATGATGCCGAATTCGCCCATGCCCTCGCGGACTTCGGGTTTGTCCCAGCTC +GCGTGCCGCAGTTCCACCGCGAGTTCGTGCCCAGCAAAGCGCTCGGTGAGCAGCGCCAGA +TACTTGCGGTTGGCGGCGGTGCGGTGAAACGAGTAGGGAAACTGCGCCAGATACGGCCCC +ATCATGCCGGCCTCGCGCAGTGGCTCGGGGCTCTGGAGCATTCGGTCAAAGTCGGTGTCG +GTGGGAGCGCGTACGTGGGTAAAGACCTTGTTCAACTTGACGGCGAAGCGCACCCACCCG +CCCGATTTGCGGACCATGCCTTCAAAGGCCTTGAGGCCGGGAATCGCGTAGAACGATGAA +TTGAGCTCCACCGCGTCGAAGTGGTGGGCGTAGGTCTCCAGAAAGGCGTCTTTCTTGACG +CCCTCGTAAATCAGGCCCGGAGCCGTCCAGTCCTCGTCGGTGTAGCCGCCGCAGCCGATG +TAGACGCGCATGGGGGAGAGGGTAGCGGGTGGGCGTGGGAAACATATTTACAGTCCGCAG +AGAATCCCAGCAGAGACTTGGAACTCCAGTAGCTCTCAGGGCAAGCAGGAGCAGGCGCTG +CCTTGCATTGGCGTCCTGCGCCCTGAACGTCTGACCCCGTTCCGCCTACAATCTCCCCCG +TGACCCGCTCCGTCAGCCACCCGCCCTACGCCGACCCCTTCGCCCACCCGCCCGAGACTT +CGCCACAGGAACTGACCGCCCAGCTGACTCCCGGCGCCCGTTTTCAGGACGTGCGCTTTG +AGAACTACCGGCCCAACCCGGAGTTTCCCAGTCAGGCGCAGGCGCGCGACCAGCTTGCGG +AATTCGTGCGGCAGGCGGGGCAAAAGGCGAGCGGCGGGGGCTTCTGGCCCTTCAAGCGGA +GGGCCCCGGAAGGGCGCGGATTTTACCTCGACGGCGGCTTCGGCGTGGGCAAAACGCACC +TGCTGGCGAGCGCTTACCACGCGGCCAAAGGGCAGTCTGGGGGCAGCGGGGGCGACGTGG +CCTTCATGTCGTTTCAGGACCTGATGTACCTGATCGGGGCGCTGGGGATGCCGCAGGCCA +TCGAGACTTTCCGGAACTACAAGCTGCTGCTGATCGACGAATTTGAACTCGACGACCCCG +GCAACACCCACATGGCGAACACCTTTCTGGGCGGGCTGATGCCGAGCGGCGTGAGCGTGG +TGGCGACCTCCAACACCGAGCCGGGGGCGCTAGGCGCAGGGCGCTTCAATGCCGAGGATT +TTCAGCGGCAGATTCAGGGGATCGCCGACCGCTTCGGGCCGCACCGGATAGACGGCCCCG +ACTACCGCCAGCGCGGCACCGAGCCCGCCCAGCCGCTCAGCGAGGCCGAGTTCGCGGCGT +GGCTCGCGCGCCAGAACCCGCAGACGACGGCGCTGATGACGCACGCCGAACTCAACCGCG +CTCTGCTGAGCGTGCACCCGAGCCGCTTTGCCCGCGTGCTGGAGCAGGTGAGCGGCGTAG +CGGTGACGAACCTGGAACCGATGCCCGAGCAGGGAGAAGCGCTGCGCTTCGTGCATTTCG +TGGACAAGGTGTACGACCTCGGGCTGAGCGCCGCGTTTACCGGGGTGCCGCTCAATGCCC +TGTTTGACGACATCTACCGCAACGGCGGCTACGCCAAGAAGTACAGCCGGGCGCTGAGCC +GCCTCTCGGAAATGCTGCGGGAGGCGCGGGGGTAAGGGACTTTTCCCTGTCCTCCAGCTC +CCATGTTTCGCCGTGACCCCCTCATGGCCCAGTGGCGCGCCGTGGCCCGCTTGCTTGAAC +TGGACGACGCGCAGGCCCGCGCCCTGCTGGGGCTGGTGCGGCAAACCTTGCGGCGCGGCG +GGGTGCTCGGCGTAGCGCGGGGAGACGAGCGGCAGCTGCTGACCTTCGGCGGCGTACCGG +AAAATGGCGTCTTTGAGCTGGCGAGCGTGACCAAGCCCTTCACGGCGGCCCTCGCCTCGG +CGCTGGTGCGCGATGGGCGGCTGGACTGGAACGCGCCCCTCGCCGCGCTCGGCGGGCCGC +TGCGCCGCTTGCCCCGCGCGCTGACGCCTTATGCGCTGGCGACCCACACGGCGGGGCTGC +CTCCGCAACCGGCGCGGGCTGCGCTGACCACCTTCACCCGGTTTGCCGACCCCTACGGCG +GCATGTCACCGGCGGACGTGCTGGCGAGTGCCCGCCGCTGGGCCAATCCGGGGCAGGCGG +GGCGCTTCGGATACTCCAACCTCGGCGCGGGGGTGCTGGCGCTGGGGCTGGCGCACGCGG +CGGGGGAGGAGACGTCGGCAGCGGGCTATAAGCGGGCGCTGCGTCGCCTGGTCACCTTTC +CACTCGGACTGCCGGGCGTGGGTCTGACCCCGGCGCGGGATGTCGTGCCGCCTTACGGTC +TGCTCGGCGGGCAGGCGGTGACGGGCTTTGCCGAACTCGCGGGCGCGGGGGGCCTGTTCG +GGAGCGCGGCGGAGTTGCTGCACTTCGGCGAGGCGCACCTGAGCGGGGCAGCAGGGCAGC +ACTGGCGGCAGGCGCAGGCTTTTCCCGGCTTGCCGCCGCTCTACGCCGGGGCCGCGCCGG +GGTGGTTTCAATCGGGGAGCACCGTCTGGCACGACGGCATTGCCCGTGGCACCCGCACGG +CGCTCGGGTTCTCGCCGCGATCCGGCGCGGTGGTCACGCTGCTGGTGCGCGGAGCGGTGC +CGCTGGTGGGGGTGCGGGCGGGGGTACCGCTTTTGCTGCTCGGGTTGCTGGGCGGGACAG +ACCGTTAGAGCATTTGACAGAATGATCAGATCTGTCTTTGACCCTCTACCAAGGGGAGAA +TTGTCAAGCTGCGAAGTAGAGGGCCTTGCGGAGCAAGGGGTGAGGGGTCTTCACGTCCAG +TCCTGCGCGAACTGCTCCCAACTCGCCGTGTCGCGGGGGCACCCGCCCGTCTCCGTCCAT +TCCCAGTAGGGGGTGTAGAGGCTGCGGGCGGTGACCAGCTCATCGCGGAAAATCCGGGCG +CTGAGGCCGGTGTCCAGCAAACCGCTCGTTTCAAAGCGCTGAAGCACGCCGCTGCGGTCG +TAGGGCACCCGGCGCAGCTCGGTCTGCCAGCCCTGCGGCGTGGCGGTGAGCAGCAGGTAC +TGCGCGCGGGGGTCGCCGTCCGCCGGAGAACCGACGGCCCCGGTGTTCAGCACCCTCACG +CCGCCCACCAGCGCGTCGGCCTGGCGGTGGATGTGCGAGCCCACCAGCACGCCCGCGCCC +GACTCGTCGGCAATGCGGCGAACCAGCGCGGGGTCGGTGCGTTCGCTCAGGCCCCGGCGG +TAGTCGTCGGGGGTGCCGTGCGCGAGCAGCACGTCGGGCAGGCCGGGCACGCTCAGCCGC +GCGGTCAGCGGCCAGTCTCCCGGTACGTCGAGCAGCCCTGCCGCGTGCAGTTGCTCGGTG +CTCCAGGCCGTCGCGCCCCAGAAGGGGTCGTCGTACCAGTCCTGCGGCAATTTATCGCTG +CGCGTGTGCCACAGCCGCAGCAGGTCGTCGTGGTTACCGAGGACAAACGACGTGTCGTCG +CGGGCGAGCAGTTCTTCCATCACCTGCACCGAATCCGGCCCCCGGTTCACCACGTCGCCG +TTGACGATCAGCCGCTCGGCGCCCTGCTCCGCCGCGTCTTTCAGCACGGCCCGCAGGGCG +TCGGCGTTTCCGTGGATGTCGGCCAGCACAGCAATTCTCACGGGGGCATTGTAGGCGCCG +CTTCCGAAGAAAATACTGAAGAAAACCCCATACCCGTTCAGGCCCCGTTTGTGGTTACAG +TAGGGCCATGAAGCCACTGAAGATGCTGACTGTTGTTCTGCTGGGTGCTGGAACGTTCGC +TTCGGCGGCTTCGGTGACTTCGGCGGTCAAGACCGAGATCAACCAGCTCGCCCGCCAATA +CACGGGCCGCCCGTGCCTGAGCAATGGCGAATACGGCCTGGACACCGAGCCTTTTGGCCG +CAAGGTGTCGCCGAGCGAGCTGAGGACGGCGCGGGATATCCTGTACCGTCAGTTGCTGGA +AGGTTTCGACCAGAGTGCACGCAAAAACCACTTCACCTACGAGGTGCGCTACAACCAGAA +CCGCGTCTGGGTCTGGGCCAAAGCCCCCAACTCCTCCCGGCAATATTTCACCTTCGCGAC +CCTCAGCCCGACCGGCATTCGAATGGTGAGCTGCCTACAGTAAGCGGACAAGAAGAAACA +GGCCCCTTCCCGCTGTGGAGGGGGCCTTTTGCTGTTTAAGGTTGCCGAATCCAGTACCGC +CGAATGGGCTGGTCATGATACTGGGGCACCTCAAATTCCCCTTCCAGTTCGCCGCCGTTC +GCCTCAATGACGCCGCGTGAGCCGAAGTTGTCCACATCGCAGGTGACCAGCACCGGGTCT +ATCCCCAGTTCGCGCGCCCGCTCCAGCGCCAGCCGCAGAATCAGGGTGCCGTAGCCTTTG +CGCCGCGCGCTGGGGCGGATTTCGTAGCCGATGTGGCCGCCGAACTCACGCAGGCGGTCG +TTGAGGCTGTAACGGATAGAGACGCGTCCCAGGTACTCGCTCCCCTCCACCAGCCAGCGG +TATTCGGAGTGGACAAAGCCCTCCGGCAACTCGTTGCCCGGTTCGTAACGGGTCAGGTCG +CGCAGGACTTTGCCGAAGTCGGCGCTGATGTCGTCGAGCTTCCAGACCAGCGTCTCGCCC +AGGCCACTGCCGGTTTCCTGTGCCTCGCGCACGGCGGCGAGGAAGCTGTCTTTGTACTGT +TCGGAGGGGCGTACGAGGTGTGGCATGGGTCAGGGTAAAGGTTGGCGGTGGCCTGGGCAT +CGGCCACCCGGCCTACCCTCACCCACCAACCGGGTTGGTCAATCTGTCCCGAAATGGCGT +CCGCAGCCCCATAAAGCGGCGGGTGGGGGGCGTACAATCCGGGATGTGAAAGAAGCGGCC +TTCGACGTGCTTGACCTCGGCCTGCTGCCCTACCCGCAGGCGTGGGCGCGGCAGAAGCAG +GAACTGGCCCGCGTGGCGGTGGGCGGGCGGCCCACGCTGCTGCTGGTGGAGCACCCCGCC +GTGCTGACGCTGGGGCGCAAGGCGCAGGAGGGCGAGAACATCGTGGTCACGCGCGAGTAT +CTGGCTGCGCAGGGCATCGACGTGTTCGCGGTGGAGCGCGGCGGCGACGTGACCTACCAC +GGCCCCGGCCAGCTCGTCGCCTACGCCATTTTTCCAGTCGGGCGCCGGGTGCGCGACTTC +CTGCGGCTGCTGGAAAACGCAGTGGTGACGGCGCTGGGCACCCTCGGCCTGCCCGATGCC +CGGCCCAATCCCGGCTACGCGGGCGTGTACGTGGACCCGCGCGAGATCAACGGCAAGACC +TACGACCAGAAAATCTGCTCCATCGGCGTGGCCATCAAGCAGAATGTCGCCCTGCACGGC +ATCGGCCTGAACGTCTGCACCAATCTGGACCACTTCGACCTGATCGTGCCCTGCGGCCTG +ACGGATACCCAGATGACCAGCGTGCAGCGTGAGTACGACCTGCGCGGTCTGGGCAGCGTC +AGCATGGAGCAGGCGAAAAAGGCGCTGACCGACGCTTTCGCCTTGACCTTTGCCGACTAT +GATTGGAGCCTTCCGGGGGTGGCCGCCGGGCAGGAGGCGCTCTCTGTGGCCTCTCCCTAG +CCTCCTTGACCAGCCGTCACCCTCCAGCGACCCTTTCCAACCGAGGAAACCGAAATGACC +CAGCAAGACCCGTCCACCAAGGAACCTAAATTCATCAAAAACGGCATCTACCGCAAGGAC +AGCGTGCCGGTGCGCGAGAAGAAACCTGAGTGGCTGAAAGTCACCATCCCCACCGGGCAG +GTGTTTACCGAAGTCCGCAAAATCGTGAAGGAGCACCGCCTGCATACGGTGTGCGAGGAA +GCGATGTGCCCCAACATCGGCGAGTGCTGGTCGCGTGGGACGGCGACGTTCATGCTGATG +GGCCACATCTGCACCCGCGCCTGCCGCTTTTGCGCGGTGGACACTGGCAACCCGATGGGC +AAACTGGACCTCGACGAGCCGCGCAGCGTGGCCGACTCGGTGCGGCTGATGGACCTGAAG +TACGTGGTGCTGACCTCGGTGGACCGCGATGACCTGCCCGACGGCGGCGCCTACCACTTC +GCCAAGACGGTGAAGGCCATCAAAGAGGTCAATCCGCAGACCCGCGTGGAAGCGCTGACG +CCCGACTTCGGCGGCAACACGGCCTGCGTGGACCTCGTGCTGGACAGCGGCGTGGATACC +TACGCCCAGAACCTCGAAACCGTGCGGCGCCTGACCCACCCGGTGCGCGACATTCGCGCG +AGCTACGACCGCACCCTGAGCGTGCTGGCCCACGCCAAGCAGGCCCGCCCCGACGTGATT +ACCAAGACGAGCCTCATGCTGGGCCTGGGCGAAACCCGCGAGGAAATCCGCGAGGCGATG +GCTGATTGCCGCGCCGCCGGGGTGGACGTGCTCACCTTCGGGCAGTACCTGCGCCCCACC +ATGCACCATCTACCGGTGGAACGCTACATTTCGCCCGCCGAGTTCGACGAAATCCGCGAA +GAAGGCATGCAACTTGGTTTCCTGGAAGTCGTCTCCGGTCCGCTGGTGCGCTCGTCGTAC +AAGGCTGAGCAGATCGTGATGGACCGGCCCGGCAACCTGCCCGAGCACCTGAGCCACTTA +GACGGCGGCAGCGAATTGACCTTGATTTGAGATTGAGTGGGAAAGGGGCCGGGGCCATGC +GCTCTGGCCTTTATTCTTTGGGTCGGTACTTGGCTTCGAGGCTCCACCAAAAGTCGGACG +CGCCGCTGCCTACCGTCCGTTACCCTGACCTCAATGTCAAAATTCCGGCGAATACTGCTG +CCCCTGGCGCTGCTGGCTGCCCTGGCTCCAGCACAGGCGGCCCCACCTACGAGCTATCAG +GGGCCGGTGTACGGCGCTGGCGTGCCGAACGTCAAGGTGGTTCGCCCTCTCTGGACGTTG +ACGGTAGATAAGGCCGAGTACGGCGACAGGGCCGTGCTGCTCGCTGAAAACCGCGTGCTG +GTGAAGGTGGGCGGGGCGTTGCAAGCCCGCGACGTGGCGACGGGCCGCGTGCGGTGGACG +CTGCGGCAGCCGGGAAACCTGGGACTGGCCGACAAAAACGCTGTCTTTCTCACGTCCGGC +CCCACCCTGAGCGCTTACCGGCTCAGCGACGGGCGGCGCCTGTGGACCCGCGACCTGGGC +GGCGCGGTGCGCGATGTGGGCGAGTCGGGCGGCGTGCTGTACGCGACGACGGAACACGGC +GGCATAGCCCTGAGCGCCGCGACGGGTCAGACACGCTGGGCCTTTAAAGAGCACGAAATG +ACGGGCTTCCGGACGGTTCTGGGCGACACAGGGTCAGGGGGCGTGGTGTTCTGGGACGCT +TACCAGGGCGAGCCGCATTTTCCGGCCACCTACGCCTTTGACGCGGCGACGGGCAAACAG +CTCTACCGCCTCGGCGGCACGACGGGGCCGCTGGGCGTGCGCGGCAAGGCGGTCCTGATG +GCCGACACGAGCTTTATAGGAAGCGATGACAATGCCACCCTGACCTGGGTAAACCTGCGT +TCCGGCGTCACTGAACAGGTCTTGAAGCTGGCCGCCGATTTCCGCTGCCCTGGCAGAGGC +ACCCTGGAGCGCCGCACGTCCGAAACCTTCTCTGCCCCGCCGCACATTTATGTCAATGAC +CAGTGCGGCACGCGGCTGCGGCAGTTCTGGGCGAATGACCCGGCGCTGGCTGGAAAAACG +CCGTCCACGCCGCTGCCACCCGCCCGCACCTTCGCCGTGCCGGACGACGGTCGCTTTCGC +CTGGGGCCAGTGGGGGAACTGTTGGTGTTTGAAAGCCGCATGGGAGAAGTGCGCTTGATT +CCAACCACGGGCCGCGCGCCTATCAACTACAACGGGGTGGATATGCCGACCGGGACGGGC +CTTGTCCTGCCGGGAGCGGGGCCGGTGTCGCGGTTGGACGCCCTGGGGAGCGTGCTGTAT +GTCGGGCGAGTGAATGGTGAGTTTCTCGCCTACGACGCCGCGAAGAAAAAGCCCCTCTAC +GCGGCGCAGCTTCCCTGGCGCGGCTTCGGGCCGACCTTCCGCAGTGGCAAGTACGCGGTG +CTGACCACTCCCGGCGCACTGGCTGTGGTGCGCGAGCCCTGAAAACGCTGGTGGTCGGCG +GCACCGGCATGCTGCTGGGACTGGTGCGCGAGCTCCTCGCGGCGGACGACGAGGTGTGGA +CGCTGGCCCGCCACGCCCCGGCGCTTACGCACCCGCGCCTGCATCCCCTGCTGGCGGACT +ACCGCGACGCAGCGGCCTTGCGCGCGGCCCTGGCCTCCGCCACGCCGTTTGACCGGGCGG +TGGTCTGGATTCACTCTGCGGCGCCGGACGCGCCCTTCGTGGTGGCCGAAGCGGTGCGCG +GGCCGTTTTTTCACGTCCTGGGCAGCGCGGTGGCCGACCCGTCCAGACCCGACGACGGGC +GGCGGGCCCGTTTCGCCGCGCTGGGCACCGATGCGCGTGACGTAGTGCTGGGCTTCGTGC +GGGAAGGCGAACACTCACGCTGGCTGACGAACGCGGAAATCTCGGCGGGGGTCTGGGAGG +CTGTGCAGGGTAATGTTCAGCGGGCAGTGGTGGGAACGGTGACGCCGTGGGCCGCGCGGC +CCGGCTGACCGGCGTGTTCCTGCTAGCCTCGCCTCATGCTTGACCCCCGTTCTGTTCCTT +TTGCCGCTCAGATTCACCCCCAGGCCCGCCCGGCGCGGCGGCTGACCTGGGACTCGCGCG +AGGCGGGGCCGGACACGGCGTTCGTCGCGCTGCCCGGCGAGAAGATGCACGGCAACAGGT +TCGTCGAGCGGGCGCTCGCGGCGGGGGCGCCCTTCGTCCTGACCGACCTCGACGTGCCCC +GCGCCGTGCGGGTAGATGATGCGCGGGGGGCCCTGTTCGCCTGGGCGCGGAGTGAACGGG +CGAAAAACCCGCTGGTGGTGGGCATCACCGGCAGCGCGGGCAAGACGACCGCCAAGAGTT +ACGCGGCGGCGGCCCTGGACGCCCACTTCATGCCGGTGTTCAACACCATGCCCGCCATCG +CCTGTTTTCTGGTGCAGTACGGCGCGAGCCAAAAGCCGCTGGTGGTGGAGATGGGCATCG +ACCACATCGGCGAAATGGCCGAGCTGATGGACCTCGTGCGGCCCGATGTGGGCGTCGTGA +CGACCATAGGCCCGGCGCATCTGGAGCAGTTCGGCATGGTGGAGACCATCGCCCACGAAA +AGGGTCAGATTCTGACGGCGCGGCGGGCATTGGTCGGCTCGCAGGCCGCCGCCTTTTTCC +CCAAGGCCCAGTTTCCCCACGTGGACAGCTACGGCTTCGGCGACGTGACGTTCCGGGGCG +AGGGGCTGGAACTCTCGCCGCAGGCCGCTCGCTTCCGCTTCGGCGGCATGGGCGTGATTT +TGCCGCTCGCCTCGCGGGTGCAGGCCGAGGCCGCCGTGCTGGGGATGGTCCTCGCCCGCG +AAGCTGGCATCGCGCTGGCCGACGCCGCCGTGCGGATGAGCGCGGTGGAAGTGCCCGGCG +GGCGCTACCGACTGCATCCGGGCCGGTTTACCGTCATTGACGACGCCTACAACGCGTCCC +CGGTGGCGGTGCGCGCCGCGCTCGACGCCCTGCACGCGCTGAAGCCGGAGGGCGAGGTAG +GCCGCCGCATCAGCGTGCTGGGCCGGATGCTGGAACTCGGCCCCACTGAGCGTGAGCTGC +ACGCCGAAGTCGGCAGCTATGCCCGCGAGCAGGCCGACCTGACCTACGGGGTGGGCGAGT +TCGCGCAGGAGCTGGGCGAGCGCGCCTTCGCCACCGTGCCCGAGCTGCTCGCCGACCTGC +TGAATGAAGTCCGTGACGGCGACATCGTGCTCGTCAAGGCGAGCCGGGGCATTTCCCTGA +CCCCGGAGCAGCGCGCGGTGACGGGCGTGGGGCTGGACGTGGTGGTGGAGGCCCTGCTCG +ACCAGCGCGACCGCTGACCTGCCGCTCACCCGGACCCGGCACAATCGGACCATGCCCCTC +CCGCCCCGCTGGACCACACCGCGCCGCTTGCTTCGCCTTCCCGTGCTGGCCCTGCTGGGG +GCCTCGCTGCTCGCCTGTGCCCCCGCGCCGTCTGCCGGGCGGGGAAGTGAGGCGGGAAGC +ACTGCCTCCGCCACTGGCGGGGTCCGCGCGGCCTTCAGCGACGATGGGGTGGCGTGGGTC +AGTGGGGGCCGGGCGTGCGTGGCCCGCGTGCCGAGCTTTCAGCCGAGTTGCCCGCGCCTC +GCCCCCGCCTCCGATGTGGGCTGGCAGCAGAGCGGTGGGCAGGGAGCCGACGCCTGGGCC +GCGCTGCCGGGGCCGGGGCTGGTGGTCACGCTCGACCGGGCGCCGCGCAGCCTGAATGTG +GGCGCGGTGGTCCTGCTGAGTAGCACCCGCATCTACCGCGAGGACGGCAGCGCCCTGACC +TACAGCGGCGAAGCGGGTCGTGGCGTGGCCGGAGCGCCGCTGGCCGCCGTGACCGGGGGC +GACGGGCGCGATTACGTGGTCCTGGGCCGCGAACTGCGCCGGGTGGACGACGGGGCAGTG +CTTGACCGGGCGGCGCAGCCTTTCCTGTATGCCACCCCCAGCGGGGCCGCCAGTGCCGCC +CTGCCCACCGCCAGTGACGGCCTGAGCCTTTACCGGCTGACCGGCAGCACCCTGGAACGC +TTCAGTGCCGGGCAGGTGGTCGCTCGCGTGTCGCATGGCCCAGGGCAGGTGGGGCTGGTG +CGCGGCGAGGTGGTTACGGTGGACGCGGCGGGCCGGGTGCGGCGATTCACTCTGCAACTC +GAGCCGCTGAACTGAGCCTGGGACGCCGGGTTTCGGTGCCCTGTCCGTAGCTTGCGAGGG +GCCAACGAGCGGCTCACCGACGCGCTCCGGCAGGGCCTGGGGGTGTCAGAGTTCCGTAAG +CGGGGGGATGGTTCCATGGGCAGCGGTATGTCCGCCCTTGCCCTCACTCGCCGGCTTATT +CCCGCAGCAGCTTCTGCCCGTTTCCGGAGCGTGGCCCAGGCCCCCGCATCGTTGCCGGGG +GGGCCGGCCCGGGTAGATGAGTGAATGCTAAAATTCGCCGGACTCTGGAGGGAGCATGAC +GAGCCTCATTAACCGTTTTCGCAGTCGTTCCGCCGCCATCGGCGTGGAGATCGGCACCAG +CACCATCAAGGTGGTGGCCCTTAAGGCAGGGGCCCCGCCTTCCCTGCAACACGCGGTGAT +GGTCCCCACCCCCATCGGCAGCATGCGTGACGGTCTGGTCGTCGAACCCCAGGCAGTGGC +GAGCGAACTCAAAAGCCTGCTGGCCGAGCACCGCATCACCACCCGCCACGCCGTCACCGC +CGTGCCCAATCAGGTGGCCGTGACCCGCAACATCATGGTGCCGCGCATGGACCGCAAGGA +CCTGCAAAGCGCCATTCGCTGGGAAGCCGAGCGCTACATCCCTTACCCCATCGACGAGGT +CACGCTCGACTTCGACCTGCTCGACGACCCCGCCAACGTGCCCGAGGACGGGCAGATGGA +AGTGGTCATCGCGGCGGCGCCCACCGAGGCCGTGCACCGTCAGATCGAGGTGCTGCGCCT +CGCGGGCCTCGAACCCACCGTGGTGGACCTCAAGAGCTTCGCGGCGCTGCGGGCGCTACG +CGGCAACCTGCTCGGCGAGCACCTCACCAAGAGCACCCTAACCGGCACCAACTACACCGA +GGCCGGCGAGGTGGCGCTGGTGATGGAAATCGGCGCGAGCAGCAGCGTCATCAATCTGGT +GCGCGGCGACCGCATCCTGATGACCCGCAACATCAACGTGTCCGCCGACGATTTCACCAC +TGCGCTGCAAAAGGCCTTCGACCTCGACTTTGCCGCCGCCGAGGATGTCAAGCTCGGTTA +CGCCACCGCCACCACCCCCACCGAGGACGAGGAAGACCTGCTCAACTTCGACCTCAGCCG +CGAGCAGTACAGCCCGGCGCGCGTGTTCGAAGTGGTGCGCCCGGTGCTCGGCGACCTGAT +CACCGAAATCCGCCGCTCGCTGGAGTTCTACCGCGTGCAGAGCGGCGACGTGGTCATCGA +CCGGACCTTCCTCGCCGGGGGCGGCGCCAAGCTGCGCGGCCTGGCGGCGGCCATCGGGGA +CGCGCTGGGCTTCGGGGTCGAAGTCGCGTCGCCCTGGCTGACGGTGCAGACCGACCAGGC +GGGCGTGGACACCGGCTACCTCCAGACCAACGCCCCCGAATTCACCGTGCCGCTGGGACT +GGCGCTGCGGGGGGTGATGGGCCGTGGTTGAAATCAACCTCTTGCCCAAGGAGTATCGCC +GGCAATCGCAGCCCAGCGTGTGGAAATACGCGTCCTGGGCGGCGGCTGGCCTCACGGCGG +CGGTGCTCGGCGGCTGGTTCCTCTCGGTGTCGGGCGACACCAATCAGTTGCGCGAACGCT +CGGCGGCGCTGCAACAGCAGATCGACGCGGTGGCCCCCCAGAAGTCCCGGTTCAACGACC +TCACCGCCCAGCAGGGCGAACTCGAGCGCGTCACCCAGGTGGCGACGCAACTGCGCGACC +AGAAGACCTACTGGTCCAACGACCTCGCGTCCTTCGTCGAGCGCGTGCCGGGCAACGTGG +TGTTCAGCAACGTCAACATGTCCACCGTCGCGCCCGGTGCGGAGCCCAGTCTCGCCTACG +CAGGCAAGCCCGTAACCCGGCAGCTTGACCTCACCGGCAGCGCCCGCAGTCAGGAAGCGA +TTGTCGGCTTCCTCAACGCCTTTGAGACCGACAGCAATTTCGGCGTCGATTTCAAGGGGA +TGCAGCATGACGCTACCAACGGCATCTACACCTTTACCGCCAGCATCGGGGTTGTGGGCG +ATCAGCCGAGCGCCGCGCCGGGGGCTGACCCGACAGCGCAGGGCGCGCCTGCCACTGGAA +CGGCACCGGCTGCCCCAGCGGCTCCTGCGCCGACGGCCACCACCCCCGCTGCCGCGCCGG +CCCAGGGAGGTGCCCAGTGAAAGCCATCAAGCTCGCCCCCCACTACATCTTCGCGCTGGT +TCTGACCCTGTGCTTGGTGCTGGGCTACCTCTTTTACACCATGGCGATTCAGCCCCGGCA +ACTCGAAATCACTTCCCTCAACGACGAGATCACCAACAAGGAAACCACGCTCGCCGCCGA +TCAGGCCAAAGCGGCGCGGGTGCCGACCTTGACGGCTGAAGTGGCCCGCCTCGAAGTCGA +GCGCGAGAAGTTCCTGCGGGCGCTGCCGCCCACCGCCAACTTCGGTCAGGTGGTCGCCAA +CCTGCGCCAGACCGTGAGCGCCGCCGGGGGAGACCTCAAAACCCTCAATTTCGCTGGGAG +CGGCGCGGCGGGGGCCAACCTGCCGGCGGGCGTGCGCCCCATCGGCATGACCATGTCGGT +CAATGGCCGCTTTCCGCAACTGTTCCAGATTCTGCGCAGCCTGGAACTGCAAAACCGCTT +TACCACCGTGGACAACGTGAGCCTGCAAACCCAGGGCGACGCCGGCACGGGCGGGGGCGG +GACGCTGGGCAGCACCCTCGGCCTGACGGTCTATACCTTCGATGCGTCGGGCGCGACGAG +CACCCCTGACGCGGCGGCTCCGGCAGCGGGCACGCCGGCCCCCGCTGCACCGGCCGCTGG +AGGCACGCAACCATGAGCCGTTTTCCCACTGAAGACCAAACCCAGACCCAGACCATGACC +GAGGTCACCAACGTCTCGCAGGAGAAATCACCGCTCAACCTCAACTTCTCGCGCGAAGTC +AAGCTGGTGCTGGTGGTGCTCGGCATGGTCGCCCTGATCGGCGGCTGGTTCGTCCTGACC +GGCCAGCCCGACGACCAGACCGTCACCGCGCCGGTCCCCGTGACTACCCCAGCCCCGGAG +ACGGCCACCACACCTGAAACCGCGCCGGCAGGAAATGCGGCCCCGGACGGTCAGACGGCG +GGCACGGCCACAGAGACGACGACCACCCCAGGCACTACAGATGAGTCGTCCGGCAAGAAG +GCGACAGGAACAGCCGCCAAGTCGCAGAAAGGCACGCAGTCTGGCGACGGCTCCGACGAA +AGCCTGGTGGCGACGGTGCCGCCGTTGGAGGGTGAAGGCACGGCGACCCCACCGGTGCCC +GTGCCCGGCGGCATCAACCCGGACCGCCCGCTCAAGACCCTCAGCGGCGCGGACCCCTTC +GGCTCTCTGACGCCCAGTCCGGGAACCGGCACGGCTGGCGGGACGGCTCCAGCCACCCCG +GTCGAAGCCCAAACTCCGGAAACTCAAAGCCCGGAGACGGCGCCCGAAACGCAGGCACCC +ACCGCGCAGAGCACTACCAATAGCCCAGCCCCGGTGGTCCTCAGCAGCCCGGCAGGCAGC +AGTGTCAGCGGGCAGGGCGGCGTGCTCGATTACTCCTCGCGCCTCGATCAAGGGGCCCTG +CCGACCCCTGTGATCCCAGTATCTCCGACGGAAGTCCGCAGCCCGGTGACCAGCGCTCCG +GTCAGTGGCAGCTCAGTCAACGGCGACAGTGTCGGCAGCGGCATGGCGACAGCGCCCACG +GTTGTGGTGAGCACTCCGGCTCAGCCCACCACCACTCCCTCGGCTTCAAGCCCAGTGGCT +CGGAATACAGCGGGCCAGAGCGGAGCAGGACAGAACACGGGCGCCTCCCGCCCGGCCAGT +GGCACCCCGTCGCGGGCCAGTGCTCCCAACGTGGTCGTCGTGCGTCCTCCGGCTCGCGTG +TCCGGTACTTCGGCGGCGGGCGCGACGGGCAGCTCACGCACCGGCGCGGCGGGCACGCCC +GCAGCAACGCCGACGCGCAACCAGTCTGCGTCCGGCACCCGCCCCGCGTCCGGGCAGACG +CCGCCCCGAGTCATTGCGGTGGCCCCGGCGGGGGGCGCCCGAACTCCGGCAGCCCCCGGG +AACAGACCGGCGACCGCCGCACCGACCCGGCCCAGCCCTTCCAGCACCGCTCCCAGCAAG +ACCGTCGTGGTGCGGCCCCCGGTCCCGGTCAGCGGGCAGCAACCGCCCCGCGTCGTGACC +AGCCCCGCTGCGGGCGTCCCGGCGGGCAACGCAGCCCCAGCGCCGACCCCGCCTCCAGTC +GTGGCGGCGCGGGTACCCCAGGCGAACACCGTGCCTGAGCTGGTGGCCCAGGCGGGCACG +TCGGCGGCCCCGGCCACTTCCGGTGCGTCCGCCGGAACGGGCACGGCTGGTCAAGCGGGC +AATGACGCCGCTGGCACGGCGGCCCTACCTCCCCTCATCACGGCGGTGGGCAGCGAGGCG +GCTCCGGCCAGCACCGCTACGGCGGACGCAGGAAGCAGCACCCCGGCGGCGCCGGCAGCG +ACTGCCCTCGACCGCCTGCTCGCGCCGGGTCAGGTGGCCCTCAGCTCGGTGGTGCTCGGC +CCGACCAACACGGCGGTGTTCCGCACGCCGCAGGGGTTCGTTGTGGTGGAGCAGGGGCAG +ACCGTCCCCGGCCTCACCGGCGAAGACGGCACCCCGGTCGTCTTGCAGAAAGTAGAAACC +GGCGCCGTCACTGTGGCGCTTGGCACCAAGGTCAAATCTCTGGAACTGGAACAAAGGTGA +GCCATGAATAAACGACACGCCCTCCTCTTGACTGCCGTGCTGGGCATGGCCACCGCTTAT +GCTCAAACTGCACCGACCACCACGACCGTCAATACGTTGCAGACGGTTTACCGTGACCCT +AGTCTGACCTCTGCTCCAATTACAGCAAATGTAGGTAAATACGTCGGCCCCCTGTCTACT +TTCTTGGCAAGCATCGCCAAATCGGCGGGCTACGAAGTGGTTTTCAATTTCAATATTGAT +GCGCTGGCCCTCATCAACGGTGAAATTGTTTTTGGAAATAGTACCGCTTCAGTGACGACT +TCCTATGCAACTCCTTTAGGTAGGCCCCAGGAATTACCAGCCAAACCTGTAGTCCACAAT +TTCTCGAACGCTCCTTTCAACGAAGCTTGGCCTTTGCTTATGGATGTTTATGAGTTGGAT +TATCAACTAGTTAAAGTTGGTAGTGCAAATGTCATACGAATTGGTCAAAGGCCTAAGCAG +TTGGCTCTGCCCCTAAAATTTATTTCGGCAGAGAGTGCCTTAACTGCCATTGAGAAATTT +TTTGGTGAAGAAAAGTTTGAGACCGTGATAAGTTTGGATAGCAATAATAAACCTTTTCAA +ACTACTCGGCCAACAGGTAAATTTGGATTGCCTAATAGCATAAAAGTAATCCCCGATTCC +AGCAATAAGCGTTTGATTATTGGTTCCAATAGTGAGGATGGAATAAGAATACGTAGTTTC +GTAGAAACAATAGATGTACAATCCTCTGGTAAAGTAATATCTACTGATTCCATAAGCGAA +ATATATATTGTAAGAGGCCAGAAAGAAAGCGTATTACAGTTTTTGAGAGATAGTTTTCCG +GAATTAATAGTTACCGATTATGCATCTGGAGGCCTTGCTATTGAAGGCCCACGAACAAGT +GTTAACAGGGCTATAATCTTGCTTGGACAAGTAGACCGTGCACCTGAAATTCCTATTGTG +CAGCGCATCTACACCGTGCGCGGCCAGGCCGCCGACATCACGGCGCTGCTCGCTGCCCAG +TACCCTACCCTGCGCGTGACCCCGGTGGGGCAGACCGGCCAACTGGTGCTCAACGGCGCT +CAGGCCCAGCTCGACACGGCGCTCGCCCTGCTCGAACAGGTGGACCGCCCCGCCCCGGTG +GCCGAGTCCCGTACCGTTCAGCGCGTCTTCCAGCTCGTCAACGCCAGCGCTGAGGAAGTC +AAGGCGACCCTGGAAGGCACCCTGGCGCGTGACCTGACCGCCGACAGCAATAACGACGTG +CTGCCCAACGTGCCCGTCACCGCCACCGACGCCAACGGCAACACGACCGTGGTGAGCGTG +CCCAACGCGCTCGGCAAGACGGCGAACCAGGGCACGGCGAATGCTCAGGCTCAGACTGCC +CAGACTCCGGCCAACACCCAGCAGGCGACCCTGATCGCCGACAAGCGCACCAACAGCCTG +ATCGTGCGCGGCACGCCCGAACAGGTGGCGCAGGTGGCCGAGCTGGTGCCGCAGCTCGAT +CAGGTGGTGCCGCAGATCAACGTGCAGGTGCGGATTCAGGAGGTCAACGAGCGAGCGCTG +CAATCGCTGGGCCTCAACTGGCGGGCGACCTTCGGTGGCTTCAACGTGGCGGTCTCGGGC +GGCACCGGGCTGGCGGCCACCTTCAACCCCACCCAGAGCTTCCTCGGTTTCAACATTTTC +CCGACCCTCACGGCGCTCGAAACCCAGGGCCTGACCCGGCGCGTGTACGACGGCAACGTG +ACCATGCAAAGCGGCCAGCGCTCGCTGAGTGCCACGGGCGGCGCCCAGAACGCTTCGAGC +GGCGCGGCGGCCAGCGTCAAGAGTGGGGGCCGACTGGAAATCAACATCCCCTCGGCGGCG +GGCAACATCGTGCGGCAGATCGATTACGGTCTCAACCTCGATTTCTTCAGCCCGCAGGTG +GCGCCCGACGGCACCATCACCCTGCGTATTCGCGGTCAGGTCAACCAGCCGGCAACGGCG +ATCACCGCCGACAGCCTGCCCAACCTCATCGACTTCACCAACAGCGAAGCGCAAAGCACC +ATCACCTTCAAGAACGGCCAGACCATCCTGATGAGCGGCCTGCTCGGCAGCACCGAGACC +ACCAACCGCAGCGGCGTGCCCTTCCTGAGCAGCCTGCCCGGTGTGGGCGCGGCCTTCGGG +GAAAAACGCACCGAGAAGACCCAGTCGCAACTGCTCGTCATCATCACCGGCACCGTCGTC +AAATAACGCCTAGAGCAGTTCTCCGAATTACGCGTGCGTCGGAACAGCACCGCCACCCGC +TCCATTCTCTGCTTCGCAGCTTTGCAAGTCCGTTCTGCTCCGTGTTTGTACTCGCTCTGC +TCGCCAAAAAGCTGTGCCATCTTTTTGTCAAATGCTCTACGTCATACCGGCGCCTTTCTG +CGAAGGGGCGCCGCTTTTTTGACTCTCGCGGCGGAGCCTGTTTGCTCAAGCCGCCGCGCC +GGGGCCGCCAATCCCGCTACAGTCGCCCTATGAGGTACCTGACCGCCGGAGAATCGCACG +GGCCACAGCTCACGGCCATTATCGAGGGGCTGCCCGCGCAGTTGCCGCTGGGCAAGGCCG +ACATCGACCCCTGGCTGAGAAAACGGCAGGGCGGCTACGGACGCGGGCGACGCATGGTCA +TCGAAACCGACGAGGCCGAGCTTCTGAGCGGCGTCCGTGCGGGCCGCACCACCGGCGCCC +CCGTCACCCTCGCCATTCAGAACAAGGACCACCGCAACTGGACTGAAATCATGTCGCCTG +AGCCGGGAGGCGAGCCGCGCAAGAAGGCGCTGACCGACGCCCGCCCCGGCCACGCCGACC +TGACCGGCGGCATCAAGTACCGTCACAAGGACCTGCGCGACGTGCTCGAACGCGCCTCGG +CGCGCGAAACGGCGGCGCGGGTGGCGGTAGGCAGCATTGCCCTCAAGCTGCTTTCTGAAC +TCGGCATCGAGGGGGCCAACTATGTCTTCAACCTTGCTGGCATTGAAACGCGGCAGGCCT +TCTCCTGGGACGCACTCGACGCCATCGAGGACTCCGACCTGCGCACCCCCGATGCCGACG +CCGCCGCGCAGATGCGCGAGCGCATCGACCAGGCCAAGAAGGACGGCGACACCCTGGGCG +GCATCCTCGAAGTGCGCTTCCGGGGCCTACCGGTC +>NODE_9_length_39996_cov_63.0617_ID_17 +GCGGGATGCGCCTGGGCGTGCCGTTGCGCGAGCAGGAGTTGCGCGGGGCGCTGTGGCGCT +TGCTCGAGGACCCGGCCATGGCGGCGGCCTGTCGGCGTTTCATGGAATTGTCACAACCGC +ACAGTATCGCTTGCGGTAAAGCGGCCCAGGTGGTCGAACGTTGTCATAGGGAGGGGGATG +CGCGATGGCTGAAGGCTGCGTCCTGAACGGTGCTGGCATAACAGATAGGGTTGCCATGAT +TTTGCCGTATCGGCAAGGCTGCGCGCTTGACAGCGTCATACCCCGGGCCAATTCTGCTGT +GATGCATTTTATCGATCAGGGCTTACTGCAATGAGGAATGACGGAGGCTTTTTGCTGTGG +TGGGACGGTTTGCGTAGCGAGATGCAGCCGATCCACGACAGCCAGGGCGTGTTCGCCGTC +CTGGAAAAGGAAGTGCGGCGCCTGGGCTTCGATTACTACGCCTATGGCGTGCGCCATACG +ATTCCCTTCACCCGGCCGAAGACCGAGGTCCATGGCACCTATCCCAAGGCCTGGCTGGAG +CGATACCAGATGCAGAACTACGGGGCCGTGGATCCGGCGATCCTCAACGGCCTGCGCTCC +TCGGAAATGGTGGTCTGGAGCGACAGCCTGTTCGACCAGAGCCGGATGCTCTGGAACGAG +GCTCGCGATTGGGGCCTCTGTGTCGGCGCGACCCTGCCGATCCGCGCGCCGAACAATTTG +CTCAGCGTGCTTTCCGTGGCGCGCGACCAGCAGAACATCTCCAGCTTCGAGCGCGAGGAA +ATACGCCTGCGGCTGCGTTGCATGATCGAGTTGCTGACCCAGAAGCTGACCGACCTGGAG +CATCCGATGCTGATGTCCAACCCGGTCTGCCTGAGCCATCGCGAACGCGAGATCCTGCAA +TGGACCGCCGACGGCAAGAGCTCCGGGGAAATCGCCATCATCCTGAGCATTTCCGAGAGC +ACGGTGAACTTCCACCACAAGAACATCCAGAAGAAGTTCGACGCGCCGAACAAGACGCTG +GCTGCCGCCTACGCCGCGGCGCTGGGCCTCATCTGATGCTTAGGGCGCGCCGGCTGGCGC +GCCCTACCAGATCTGGCAGGTTGCCTGCCGTTCATCCTCCTTTAGTCTTCCCCCTCATGT +GTGTGCTGGTATGTCCTCCGACTGAGAGGGCCCAGGAGTATCAGGGTAGGGATGCCGCCT +TTTTTTCTCGGCCGGCACGACACGGGGACTTGGTCATGATCGAATTGCTCTCTGAATCGC +TGGAAGGGCTTTCCGCCGCCATGATCGCCGAGCTGGGACGCTACCGGCATCAGGTCTTCA +TCGAGAAGCTGGGCTGGGACGTGGTCTCCACCTCCAGGGTCCGCGACCAGGAGTTCGACC +AGTTCGACCATCCGCAAACCCGCTACATCGTCGCCATGGGCCGCCAGGGTATCTGCGGTT +GTGCCCGCCTGTTGCCGACGACCGACGCCTACCTGCTCAAGGAAGTCTTCGCCTACCTGT +GCAGCGAAACCCCGCCCAGCGATCCGTCGGTATGGGAGCTTTCGCGCTACGCCGCCAGCG +CGGCGGACGATCCGCAACTGGCGATGAAGATATTCTGGTCCAGCCTGCAATGCGCCTGGT +ACCTGGGCGCCAGTTCGGTGGTGGCGGTGACCACCACGGCCATGGAGCGCTATTTCGTTC +GCAACGGCGTGATCCTCCAGCGCCTCGGCCCGCCGCAGAAGGTCAAGGGCGAGACGCTGG +TCGCGATCAGCTTCCCGGCCTACCAGGAGCGCGGCCTGGAGATGCTGCTGCGCTACCACC +CGGAATGGCTGCAGGGCGTACCGCTGTCGATGGCGGTGTGAGGTCGTCAGCCATTTCGCG +CACTTTTTTCCGCTTCTCCTGCCGCATGCTCGGCCCGCGCCCCGGCGTCATCGGGCGTTC +CCCTGCATTCCGGGATTTGGCCGCGGCTGCCGACTTGCGTAGTCTCTCTGCGGTCCGCCA +TCCCGAGGAGTCGCCATGCCGAAGTCATTCCGCCATCTCGTCCAGGCCCTGGCCTGCCTT +GCGCTGCTGGCCAGCGCCAGCCTCCAGGCGCAGGAGAGCCGCCTCGACCGCATCCTCGAA +AGCGGCGTGCTGCGCGTCGCCACCACTGGCGACTACAAGCCCTTCAGCTACCGCACGGAA +GAGGGCGGTTACGCCGGTTTCGACGTGGACATGGCGCAGCGCCTGGCCGAGAGCCTGGGG +GCCAAGCTGGTAGTGGTGCCGACCAGTTGGCCGAACCTGATGCGCGATTTCGCCGACGAC +CGCTTCGACATCGCCATGAGCGGCATCTCGATCAACCTGGAGCGCCAGCGCCAGGCGTAT +TTCTCGATTCCCTACCTGCGCGACGGCAAGACGCCGATCACCCTCTGTAGCGAAGAAGCG +CGTTTCCAGACCCTGGAGCAGATCGACCAGCCGGGCGTGACGGCCATCGTCAACCCCGGC +GGCACCAACGAGAAGTTCGCCCGGGCGAACCTGAAGAAGGCCCGGATCCTGGTGCATCCG +GACAACGTGACGATCTTCCAGCAGATCGTCGACGGCAAGGCCGACCTGATGATGACCGAC +GCCATCGAGGCCCGCCTGCAGTCGCGTCTGCACCCGGAACTCTGCGCCGTGCATCCGCAG +CAACCCTTCGACTTCGCCGAGAAGGCCTACCTGCTGCCGCGCGACGAGGCCTTCAAGCGC +TACGTCGACCAGTGGCTGCACATCGCCGAGCAGAGCGGCTTGTTGCGCCAGCGCATGGAG +CACTGGCTCGAATACCGCTGGCCCACCGCGCACGGCAAGTAATACAGGGCCGGCGAGGGT +GGCCGCGGGCCCGCGCGGCCTTCCTTGGCGGCGGCAAAAACGTTATGGTCGGCGCCCCAT +CCTGGTGCCTGGTCCATGCGTTATCTACTGTTCGTCACCGTCCTCTGGGCGTTCTCCTTC +AACCTGATCGGCGAGTACCTCGCCGGCCAGGTCGACAGCTACTTCGCCGTGCTTACCCGG +GTCCTTCTCGCTGGCCTGGTGTTTCTCCCGCTGACCCGCTGGCGCGGCGTCGAACCGCGT +TTCGTCGGCGGGGTGATGCTGGTCGGCGCGCTGCAGTTCGGCATCACCTACGTCTGCCTG +TACCTGAGCTTCAACGTGCTGACGGTGCCCGAGGTGCTGCTGTTCACCGTGCTGACGCCG +GTCCACGTGGCCCTGTTCGACGACCTGCTCAACCGCCGCTTCAACTTCTGGGCCCTGGCC +GCCGCGCTGGTGGCGGTGCTGGGCGCGGCGATCATCCGCTACGACGGGATCACCGGCGAG +TTCCTCCAGGGCTTCCTGCTGCTGCAACTGGCCAACGCCACCTTCGCCGCCGGCCAGGTG +CTGTACAAGCGCCTGGTGCGCAAGTACCCGTCCGAGCTGCCGCAGCGCCAGCGCTTCGGT +TATTTCTTCGTTGGCGCGCTGCTGGTGGCGTTGCCTGCCTGGCTGCTGTTCGGCGATCCG +CAGCGCCTGCCGGCCGGCGAGCTGCAATGGGGCGTACTGGTATGGATGGGGCTGCTGGCC +ACCGCCCTCGGCCAGTTCTGGTGGAACAAGGGCGCCACCGAGGTGGACGCCGGTACCCTG +GCGGTGATGAACAACCTGCACGTGCCGGTCGGGTTGCTGCTCAACCTGCTGATCTGGAAC +CAGCACGCCGACCTGCCGCGCCTGGCCCTGGGCGGCGCGGTGATCGTCGCTTCGCTGTGG +GTCAACCGGCTCGGCCGGCGCGAGGTGCGTGCATGAGGATTTCCGGACAGGGCGTGCTGC +TGTCGCTGGCCGCCTCGGTGCTGTTCGTTACCCTGCCGGGCTACGTCCACCTGCTGGAGC +CGCTGGACAGCCTGCAGGTGGTGGCGCATCGGGTGGTCTGGTCGATCCCGATGGTATTCC +TGCTGGTCGTCGCCACCCGCCAGTGGCCGACCCTGCGCGCTGCCTGGCGCCGGCTGTTCG +CCGAGCCCTGGCTGCTGGCCTGCTTCCCGCTGACCGCGGCGATGATGCTGCTGCAATGGG +GCATCTTCATCTGGGCGCCGTTGGCCGGGAAAACCCTTGAACTGTCGCTGGGCTACTTCC +TCCTGCCGCTGGCGATGGTGCTGGTGGGGCGGGTGTTCTACGGCGAGCGCCTGACGCCGC +TGCAGGCTATCGCCGTGGCCTGCGCGCTGGCCGGGGTGCTCCACGAGTTCTGGCTGACCC +GCGCGTTCTCCTGGGTCTCCCTGGTCACCGCGCTGGGCTATCCCCCGTACTTCATGCTGC +GGCGCAGGATGGGCGTGGACGCGCTGTCCGGGTTCGTCTTCGAGATGCTCTTCCTGCTGC +CGCTGGCGTTGGCCGCGCTGTACTGGCTGGGCGACGAGAGCCAGGCCTTCCGCGAGGCGC +CGCGCCTGTGGCTGCTGCTGCCGATGCTGGGGCTGATCAGCGCGCTGGCCTTCGGCGCGA +TGATGGCTTCCAGCCGGCTGCTGCCGATGGGGCTGTTCGGGATTCTCAGCTACGTCGAAC +CGGTGCTGCTGTTCCTGGTGGCGGTACTGTTCCTCGGCGAGGCGTTCCGTCCCGAGCAGC +TATGGACCTACGCGCCGATCTGGCTGGCGGTGCTGTTGACCGGCTGGGACAGCGCGCGCC +TGCTGAGGAAACAGGCGCGGCGGGGCATCTGAGCGAGCCGCAGGCGCCGGAATCAGCGCT +TGCGGGTGCTTTCGACCTGGGCGAGGGTGGTCGGCAGGATGCGCTTGCCGGCCAGGTAGT +GGCGTTTCCAGTAGCTGCTGTTGAGGTCGGAGACGCGGACCTTCTTGCCGCGCCGCGGCG +CGTGCACGAAGCGGTCGTTGCCGACGTAGATGCCGACGTGGTCGACGCTGCGGCTGCGGA +TCCGGAAGAACACCAGGTCGCCCGGTTGCAGCTTGCCGCGGGAGACCTTGTTGTTGTCCA +TGTTGTAGATGGCGCGGGCGGTCCGCGGCAGGTCGACGTCATCGACGTCCTGGAACACAT +AGTTGACCAGGCCGCTGCAATCGAAGCCTTTCTTCGGCGTGGTGCCGCCCCAGCGGTAGG +GGGTGCCGATCATGCTGAAGGCGCGGTCGGTGACTTCGGAAGCGGCGGCCGCGACCACCG +AGCGGTTGGCGGACTGGCGGGTGCTGACGACCCGGCTGGGCGACAGGGTGGAATTCTTCG +ACGGCTTGAAGGTGTGGGTGACCTGGTTGGAAGCCAGGCTGGGCGTCGAGAGGGCGGCCA +GAAGGGCCGCGAAGCCTACAGACAGGCATGTCAAAAAGGGTGAACGCATTTCGGCATGTC +TTCGCGTGCGTGAATAACGGTCCAGGCTTCCGCTGGTTCGTGGTTACTCTCTGTTCATCG +GAGTGTCTTCTGGGTGACCGGAACGAGCCCTCCTGACTCTTCCGACCAGGCCGTCAAACG +AAACGGCCAACTGCTTCCGGGCCTGGATGCATTCAGGTTGTCCCGGCGATAACGGGCTTC +ACGAAGGAAAATCGGGGACTCATCGGCGTTCCCAGCGTATTCCCCTACGAAAGCGGCGGC +TATTGTGACCTATTTTGACGCCGGCCTTCTGACCATTCGTCGAATGATCGGTTCCGGATG +TGACGCGCCGGTTTCGCCGTGCAAGTGGCGATACTTGCGGGTCCGGCAATGACCTGTTGT +TCGCCGTGGCCTTGGAAAGCCTCTGCGCCGGGGGTATGTTCGTGTTCCCCGTACCCGAGT +GAAGCCCGTAGAAGCGAGTCCAGATATGACCGAAACAGCCAAGCGTCCCTTGTACGTTCC +CCATGCCGGCCCATCGCTGCTGGAGATGCCGCTGCTGAACAAAGGCAGCGCGTTCAGTAC +CCAGGAACGCATCGATTTCAACCTGCAGGGCCTGCTGCCGCACAACATCGAGACCATCGA +GGAGCAGACCGAGCGCGCCTACAGCCAGTACAACCTGTGCAACACCGATCTGGACCGCCA +CATCTTCCTGCGCTCGATCCAGGACAACAACGAGACCCTGTTCTTCCGCCTGCTCGAGGA +GCACCTGGAAGAAATGATGCCGATCATCTACACCCCCACGGTCGGCCAGGCCTGCCAGGA +GTTCTCGAAGATCTACCGGACCCACCGCGGCCTGTTCATCTCCTACCCGGACCGCGAGCG +GATCGACGACATCCTGCGCAGCGCCACCAAGAACAACGTGAAGATCGTGGTGGTCACCGA +CAGCGAGCGGATCCTCGGCCTGGGCGACCAGGGCATCGGCGGGATGGGCATCCCGATCGG +CAAGCTGTCCCTGTACACCGCCTGCGGCGGTATCAGCCCGGCCTACACCCTGCCGGTGGT +GCTGGACGTAGGCACCAACAACCCGGACCTGCTCAACGACCCGATGTACATGGGCTGGCG +CCACGAGCGGGTGAGCGGGGCGCAGTACGAGGAGTTCGTCGACCTGTTCATCCAGGCGAT +CAAGCGCCGCTGGCCCAACGTCCTGCTGCAATTCGAGGACTTCGCCCAGACCAATGCCAT +GCCGTTGCTGGAGCGCTACAAGGACGAGCTGTGCTGCTTCAACGACGACATCCAGGGCAC +CGCCGCGGTGGCCGTGGGCACCCTGCTGGCGGCTTGCAAGGCCAAGGGCGAGAAGCTCAG +CGAGCAGACCGTGACCTTCGTCGGCGCCGGCTCCGCCGGTTGCGGCATCGCCGAACAGAT +CATCGCCGCCATGCAACTGGAGGGCCTGGACGAGGCCCAGGCGCGTCGGCGCATCTTCAT +GGTCGACCGCTGGGGCCTGCTCACCGACGACATGAGCAACCTGCTCGACTTCCAGCACCG +CCTGGCGCAGAAACGCGCCGATCTCGGTGCCTGGGGCGGCCAGCAGGGCGACGACCTGGC +GTTGCTGGAAGTGATCCGCAATGCCCGGCCGACGGTGCTGATCGGCGTCTCCGGGCAGCG +CGGGCTGTTTTCCGAAGAGGTCATCCGTGAGCTGCACAGCCATTGCAAGCAGCCGCTGGT +GATGCCGCTGTCCAACCCGACCTCGCGGGTCGAGGCGACTCCGCAGGAAATCCTCAACTG +GACCGACGGCCAGGCGCTGGTCGCCACCGGTAGCCCGTTCCAACCGGTGCAGGTGGGCGA +CAAGCGGATTCCCATCGCCCAGTGCAACAACGCCTATATCTTCCCCGGCATCGGCCTGGG +GGTGATCGCCGCGCGGGCCAACCGGGTCACCGAAGGCATGCTGATGGCCGCGGCCAACGC +CCTGGCCAACTGCTCGCCGATCGTTACCCAGGGCGAGGGCGCCGTGTTGCCGGCGCTGGG +CGACATCCGCGAGGTCAGCAAGCGCATCGCCGTCGCCGTGGCCAAGCAGGCCCAGGCCGA +GGGCAAGGCCCTGCATACCAGCGACGAGGTACTGAACGACGCCATCGAGGCGAACTTCTG +GTTCCCGCGCTACCGTGCCTACCGCCGGACTTCGTTCTGAGGCTCAGCCGCAGGCTGCGC +ACGGGGGAGAGGCGTTAACCGCCGATATTCCGCGCGATCGGTGCGAGAACGCGAAAAGGC +CACGAGGTGGCCTTTTTCGTTGACGGTAGCGCTAGCGCTAGCGCACATGATCGGGGCGCG +TCCGGTGTTCGGCCTGGGTTTCCGGGTTTTCCGCCGCGCGCCGCGCCAGGGCCGGCAGGA +CATGATCGCGCAGCAGCGGCGCGAGATTGTCCGGCTGCGCTTGGCCGGGGACCAGCCAGG +CCAGTTCCTCCAGCTCCGCCTGCGCACAGACGGCGTGGGGCAAGCGGGCGACGTAGATAT +CGGCATCGACGCGGGTGTTCGCCTCGTTGGCCGCCGGGGCCTGGAAGCTGCCGAGGTGCT +CGAAGGTCGAGGCGCCCATCGGCAGGCGCAGTTCCTCCAGCAGTTCGCGCTGTAGCGCGG +CGAGGGGCGTTTCGCCCGGCTCGCGCTTGCCGCCCGGGAGCATGAAGGCCTGGGTGCCGC +GCTTGCGCACCAGCAACAGGTTGCCCTGGTCGTCGAACAGGCAGGCGGCGGAAATGCTCA +GCAGGTTGTCGGTCATTGCGGGTCGTCCTCGGCGATCACCTGGTAGCGCATCTGTACGAT +GCCGCTGTTGTAGCCATGTTGTTCCAGCAGTTGCAGGCGCCGCTCGCGGCCGCCGGCGAA +CAGCGGGATGCCGGCGCCGAGCAGCTGCGGAATGACGCTGACGATCACCTCGTCGAGCAG +CCCGGCCGCCAGGCAACTACCGGCGAGGCTGCCGCCGCCGGCCAGCCAGACCCGCCGGCA +GCCCTGTTCGCCGAGGCGTGCGAGGCCTTCCTGGGGCGTGCCATGGCGCAACTCGACGCC +TTCCACCGCGCTCTCCCGGGGATTGCGGGTGAGCACCTGGCAGGGCTTGCCCGGATACGG +CCAGTCGCCGAAGCCGCGCACGATATCGTAGGTGCCGCGGCCCATCAGCAGCCCGTCGAT +GCCCTGGTAGAAGCCGTTGTAGCCATGGTCGTTGCCGCCCTCGGCGAAACGGTCGAGCCA +GTCGACGCTACCGTCGGGACGGGCGATGAAGCCGTCCAGGCTGGTGGCGACGTAGTAGAT +CAGGTGCGGTTTCATGGACGGCGCTCCCCTCGCTTCAGACAGTGGCTTCGTCGCTGCGGC +TCGGTGCGGCCAACGCCTCGAACGAACGGGCCTCGGCGTCACGCCCTTCGAGCATGCCCT +GCAGTCCCTGGCGCAGGTCCTGGCCGCTGGGTTGCTGGTAGGCGCCGAGGCCGAACTCCG +GCATCACCGCCAGCAGGTAGTCGAACACGTCGCCCTGGATGCGCTCGTAGTCGACCCAGG +CGGTGGTGCGGGTGAAGCAGTATATCTCCAGCGGGATGCCCTGGGCGGTCGGCTGCAACT +GGCGGACCATGCAGGTCATGTGCGGATGGATGTCCGGATGGTTCTGCAGGTAGGCGAGGG +CGTAGGCGCGGAACGTACCGATGTTGGTCAGCCGGCGGCGGTTGGCGGCCAGCGGGGCGA +CCCCGCCGTTGGCGGCATTCCACTCCTGCAACTCGGTCTGCTTGCGGGCCATGTATTCGG +TGAGCAGGCGCACCTGGCTCAGGCGCCGCTCTTCCTTCTCGTCGAGGAAACGCACCTGGC +TGGAGTCGACGTAGATCGCCCGCTTGATCCGCCGTCCGCCGGACTGCTGCATGCCGCGCC +AGTTCTTGAACGACTCCGACATCAGGCGCCAGGTCGGCACCGAGACGATGGTCTTGTCGA +AGTTCTGCACCTTCACGGTGTACAGGGTGATGTCCACCACGTCGCCGTCGGCGCCGACCT +GCGGCATCTCGATCCAGTCGCCGACTCGCAGCATGTCGTTGCTGGTGAGCTGCACGCTGG +CGACGAAGGACATCAGGGTGTCCTTGTAGACCAGCAGGAGGACCGCCGACATCGCGCCGA +GGCCGGACAGCAGCAGGAGCGGCGAGCGGTCGATGATGATCGAGACGATGATGATCGCGC +CGAACACGAACACGGCGATCTTCGCCAGTTGCACGTAGCCCTTGATCGAGCGGGTGCGGG +CGTGCTCGGTACGCGCGTAGATGTCCAGCAGGGCATTCAGCAGCGCGGTGAGGGCGAGGA +TCAGGAACAGCACGGTGAAGGCCAGTGCCAGGTTGCCGAGGAAGTGCGCGGCCTTGTCCG +GCATCTCCGGGATCCATGGCAGGCCGAATTGCACCAGCAGCGAGGGCGTGGTCTGCGCCA +GGCGCTGGAACACCTTGTTGCGGATCAGGTCGCCGAGCCAGTGCAGCGCGGGCTGGCGGG +CGAGCAGGTTGGCGGCGTAGAGGACGATGTAGCGGGCCACCCGGCCGAGGATCAGGGCGC +CCACCAGCAGCAGGCCGAAGGCCAGGCCGGCGTGCAGGATAGGGTGTTGGTCGAGCGCCG +ACCAGTACCCGGAGAGCTGGTCGAGAAGGGATGTGGAGTCCATGGACAGGCGACTTCCTT +TGTTGCGGGCGGGGGGGACGGGCCGGCCCCGCGGTTGAACGGACATCCCGACGCGGGGGC +CGGGAACGCACAAAGGAACCAAGAGTGCCCCATCGGGCGCCGGATCGCATCCCCGACGTG +TCTACAAAGGTTTTCCGCCGACGCGCGGGCTCAGCCGGCAAGCTCCTCCAGGGTGCGCCC +GCGGGTCTCGATGCCGAAGGCCCAGACCACCAGGGCGGCGACGCCGAAACAGAGGGCGCC +GAGGGTGAACACCCCGCCCTGGCCGGTGAGCGGCAGGACCAGCCCGGTGACCAGCGGGCC +GAGCAGCGAGCCGATCCGCCCGACCGCCGAGGCGAAGCCCGAGCCGGTGGCGCGCGCGGA +GGTCGGATAGAGTTCCGGGGTATAGGTGTAGAGCACCGCCCACATGCCGAACAGGAAGAA +CTGCATGGCCAGGCCGAAGCCGATCAGCAGTGCCAGGCTGCCGCCGAACACCGCGGTCTG +GCCGTAGGCGTAGGCCATCGCGCCGCCGCCGAGGAGCATCAGCACGCAGCTCGGCTTGCG +GCCCCAGCTTTCCACCAGCCAGGCCGCGCAGAGGAAGCCGGGTATCCCGGCCAGCGAAAT +CAGCACCGTGTAGTACACCGACTGGGTGACGGCGAAGCCGGATTGCTGGAGCAGCGTGCT +GAGCCAGGAGGTCAGGCCGTAGAAACCGAGCAGGGCGAAGAACCACAGGCCCCAGACGGT +CAGGGTGCGGCGCCGGTAGGCTGGCGACCAGAGTTCGGCGAAGGCGCTGAAGAAGCCCGG +ACGGCTGCGTTCCCGCTGCGGCTGGCGCAGCGGCGGCGGCAGTTCGGACAGGCCCAGCGA +ACGCATCACCCGTGTTTCTATGTCGCGCAATACCCGGTCCGCCTGTTCGCGCCGGCCGGC +CTGCTCCAGCCAGCGCGGCGACTCGGGAATCAGGAAGCGGATGGCGAGGACGAACACCGC +CGGCAGCGCCAGCACCAGGAAAATGCTGCGCCAGCCGGTCAGCGGCAGCAGGAAGTAGGA +CAGGCAGCCGGCAGCGACGAAGCCCAGCGGCCAGAAGCCGTCCATCAGCGCAATGTACTT +GCCGCGGCGGCTGGCCGGGATCATCTCCGACAGCAGCGACTGGGCGATGGGAAACTCCAT +CCCCATGCCGATCCCCAGCAATACCCGGTAGAAGGTCAGGCTGTCGAGGTCGCCGGCGGT +GGAGCACAGGTAGCTGGCCAGGCCCCAGAGCACGATGCTGGCCTGGAATACCGGCTTGCG +CCCGAAGCGGTCGGCGAGCATGCCCGACAGGGCCGCGCCGATCACCATGCCGAAGAAGCT +CGAGCTGGCCAGCAGCCCGGCCTGCGCCGAGTCGAGCCCGAACTCGGCTTTGATCGAGCC +GAGCAGGAAGGTCATCATCGCCAGGTCCATCGAGTCGAAGAAGAACGCCAGCGCGATGAT +CACGAAGACCAGTCGGTGATACGGACTCAGCGGCAGCCGTTCGAGGCGTTCGGCGGCGGA +TGCGGGCGCGTCGGGCATGACCTGGTTCCTCTGGGCGGCGGGCGTCGAACGAGTCTGCGA +TCGCCGCCCGCGCCGTGCTTGCCTGGCGGCGACCCGGCGTCGAGCCGATTGCGACGCCGG +CGCTGGCCGTCGCGCCGCGCGAATAAAGCCTGCGCGCCGGCGCGGCAACCGTTAGGCTAT +GCACCTGTCCATTTTCACGTGCAATGCCGAGGTTTCCCTTGTTCAGCCAATTTCCCCTCC +ACGAACGCCTGCTGAAGGCACTGGAAAGCCTGTCCTTCAGCGAGCCGACGCCGGTCCAGG +CCGCGGCCATTCCCAAGGCGCTGGAGGGGCACGACCTGCGGGTCACGGCGCAGACCGGCA +GCGGCAAGACCGCGGCCTTCCTGCTGCCGCTGCTGCACCGCCTGCTGGCCGAGGACAAGC +CGCGCTCCCTGGCGCGCGCGCTGATCCTCCTGCCGACCCGCGAGCTGGCCCAGCAGACCC +TCAAGGAAGTCGAGCGCTTCGCCCAGTTCACCTTCATCAAGGCCTGCCTGATCACTGGCG +GCGAGGACTTCAAGGTGCAGGGCGCGCGCCTGCGCAAGAACCCGGAGATCATCATCGGCA +CCCCCGGCCGCCTGCTCGAGCAGCGCAACGCCGGCAACCTGCCGTTGCAGGACATCGAGG +TGCTGGTGCTGGACGAGGCCGACCGCATGCTCGACATGGGCTTCGCCGACGACGTGCTGG +CCCTGGCCAATGCCTGCCCGGCCGAGCGCCAGACCCTGCTGTTCTCCGCCACCCACAGCG +GCGCCGGGTTGAACAAGGTGATCGCCGAGGTGCTGCGCGAGCCGCAAGTGCTGCGCCTGA +ACCAGGTCGGCGAGCTGAACGAGAATGTCCGCCAGCAGGTCATCACCGCCGACGACGTGG +CGCACAAGGAACAGTTGCTGCAATGGCTGCTGAGCAACGAGACCTACACCAAGGCCATCG +TCTTCACCAATACCCGGGTCTCCGCCGATCGCCTGACCGGGCGGCTGATCGCCAACCAGC +ACAAGGTCTTCGTCCTGCATGGCGAGAAGGACCAGAAGGACCGCAAGCTGGCCATCGAGC +GCCTGAAGCAGGGCGCGGTGAAGATCCTCGTCGCCACCGACGTCGCCGCACGCGGCCTGG +ACGTCGAAGGCCTGGACCTGGTGATCAACTTCGACATGCCGCGCAGCGGCGACGAGTACG +TGCACCGCATCGGCCGGACCGGTCGCGCCGGCGCCGAGGGCCTGGCGATCTCGCTGATCT +GCCATGGCGACTGGAACCTGATGTCGAGCGTCGAACGCTACCTCAAGCAGAACTTCGAAC +GACGCAACATCAAGGAACTGAAGGCCGCCTACCAGGGGCCGAAGAAACTCAAGGCGTCGG +GCAAGGCCGCCGGCAGCAAGAAGAAAAAGCAGGACAGGAAGGGCGCCGCCGCCAAGCCCG +CGGCCAAGCGCAAGCCCGCCGCGCGGCCGAAGGCCGGGCCGTCGGCGGTGGTCAGCGCCG +ACGGCATGGCGCCGCTCAAGCGCAAGAAGCCGACGGCGGAGTGATCGCGCGCCGCCGACC +GCTTCGGAGGGCGGATAACCGTTTGCGGTTATCCGCCGCGCCGGCGTTGGATGGCGGATA +ACGCCGCTGGCGCTGTCCGCCCCGGGGGACTGCCTGGCCCCGTTCTCAGCTCCCCGCCGT +CGCGCCCGCCTCGTCGTTGGCCGGCGGCGTGGCACTCTTCACCAGCGGCGTATCGTAGCT +GCTCGCCTCGTAGCTCACGCAATGGCGGATCTGCGGCGGACCGTCGCCGGCATGCAGGGC +GGTGACGTTGTCGATGACGATCTTGTCGGCCAGGGTCATGCGGTCGAGCATGCGCAGGTG +CTGGATCCAGTTGTCCACCAGGAACAGTTCCTGCCAGACCTCCGGGTTGCTCACGTCGCG +GAACAGCGACCAGCGCTCCGCGCCGTTGCGCAGGCGCAGGCGGCGCAGCGGCTGGGCGGC +GCGAACGAAGTCGCGGGTGCGCTCGGCGGGAATCCGGTATTCGATGGAGACCAGCACCAT +GCCCCGCCGCGTGTTGAAGACGAAGCTCGGCTGCCCGGGCATGCTCGCCGGCGCGCGGGA +AATACTGGCGGCGTCCATTTCCGGCAGGCGCGAGTTGTACAGGAGGATCACCGAGGCCAG +CAGCAGGCAGCCGGCGGCCAGCAGCGCGCCGTGCACGGTCATGGTCTCGGCGAGATGGCC +CCAGAGGAACGAACCCAGCGCCAGGCCACCGTACAGCGCGGTCTGGTACAGCGCCAGGGC +GCGCGCCTTGATCCAGTCGGGGACGAGGATCTGCACCGCCGAGTTGTAGGTGGCGAGGGC +GCCGATCCAGCAGCCGCCACCGAGGATCAGCACCGGGAACAGGACCCAGAGGTTGTCCAC +CAGGCCGAGGGTCAGCAGGATCAGCGCCAGGGTGAAGCCGGCCAGGCTGATCAGCCGGCT +GCTGCCGATCCGCTGGCGCAGACGCGAGACCTGGGTGCTGCCGAGGATCGCGCCGAGGCC +GAGGGCGCCGAGCATGTAGCCGTAGATCGCCGCGTCGCCGTCCGGGTTGCGGTGCGCCAG +CAGCGGCAGCAGGGCCCAGACCGCGCTGGCGGAGAGGCCGAAGGCGAAGGAGCGCATCAT +CACCAGGCGGGTGACGGTTGAGTACTGGGTGAAGCGCAGCGCCGCGGTGACGCCTTCGAG +GATGCCTTCCGGCGGCAGGCTGCGCTTGGGCACGTCGCGCCGCCACTGCCAGATCGCCCA +GATCAGGGCCATGTAGCAGAAGCTGTTGAACAGGAACACCCAGGCCGGTCCCACCGCGCT +CAGCAGCAGGCCGCCGAGGGCCGGGCCGGCGGCGCGGGCGACGTTGTAGTTGACGCTGTT +GAGCAGCACCGCGTCGCTGACCATGCGCGCCGGCACCTGCTCGTTGACCGCCGCCTGCCA +GGCCGGGATGGTCACCGAGCCGCCGAGGGAGATCCAGAGGATGGAGATGATCAGCAGCAC +CGGGTCGAGGTAGCCGAGGAAGGCCAGCAGGGTGGCGAACATCGCTCCGGTCATCTCGAA +GCTGAGCCCCCAGAGCATGATCTTGCGCCGGTCGTGGTTGTCGGCGATGACCCCGGAGAG +GATCGACAGCAGCACCAGCGGCAGTGCCGCGGCGACCTGGATCATCGCCACCATCAGCGG +GCTGGCGTGGGCGTCGGTGACCACCCAGGCGGCGGCCACCGACTGGGCCCAGGTGCCGAG +GTTGGCGAACAGGTTGCAGATCCAGATGATGCGGAAGGCCTGGATCGAGAACGGCGCCCA +GGTGCCGGTGCGTTCTGGCTTGGCCGCTTGGCCTTCGGGTTTGAGGGGCAGGTCGTGCTT +GGGCGAAACGGACTGGAGCATCAGATGGGTGCCTTGTATCAGCGTGCCTGATGTCGGGAA +AGTCTAGCCGCCTGGCGCCTGGGCGCCATTGATGGCGGTCATGGGGCCGGCCCGGCGTGG +ATAGACATTGGTCCGATGGCGCTATCGGAGAGTTCCGATGTCCAGGCAGGACAGTGCCCT +GGGTGTCCCGCATTGGTAGGCTTGCCGGCACCGAGTCGAGACAAGAGCGAAAAGCCGATG +AAGTTGCTGTTGCGTTGCCTGCTGCTGGGGGCGTGGGTCGTCTCGCCGAGCCTGTGGGCC +TGGTCCAACCATACGGTGGGCAGCTACCTGGCATTGCGCGAGCTGGCGGCCATCCGCGAG +GCGCCGGAAGTCGAGGTGGAGCCGCTGGAGGCGTTCCTCGCCGCCGAGCGCGGCGGACTG +GCCGCCCTGCTGGACGAGCAGGAAGCCTACGCCCGAGCGCATATCGGCAACTACCCGGCG +CGTCCGGACGCCCTGCGCTTCGCTGCCGAGGGCGAGGCCGGCGACTTGCGTCAGGCGTTC +CTCGCGGCGCTGCGGGTCAACCCGGAGATCCGCCTGGCGCTGGCGCTCCAGCCGCTTCCC +GGGCAGGACCAGCCGCAGCGCCCGCACCTCAAGCCACAGGAGGTGCTGGTGTTCCAGAAC +CTCTCGCCGTGGACCGCCTGGCGTTTCATCCGCCTGGAGCCCGGCGAGCGAGTCGCGCCC +CTGGCAGTGCTGGCCACCGCCGCCGACGAACCGGACTACGGCCACGACATCAACCTGTTC +AGCGACAACCCCGGCGAAGCCGGCCAGCGCTACGGCTTCGGCACCCAGCCATTCGGCGAC +CCGCGCTTCGAGTTCAGCTCCCAGGCGCCGTTCCACATGGGCTTCTATCACGAGGCGGCG +GTGATCTACAGCGGCGCGCCGTTCCTCGCCCGGGCCTGGCCGGAATGGCGTGCCTACCAG +TACTTCGGCCTGTCCCGCTTCGCCTTCGCCAACGGTCATCCCTACTGGGGCTACCGGTTC +CTTGGCTGGGGCATGCACTACATCCAGGACATCACCCAGCCCTACCACTCCACGCCGCTG +CCCGGTGCCAGCCTGGCGGCGATGCTACAGATGGAGGGCAAGGCATTGCTCGGCTACCCG +GAGGAAAAGCAGGCGGCCATCGAGCGTGTCGCGAACCGCCATACCGCGGTGGAGAAATAC +CAGTTCGACTGGCTGCGCCAACTGCTCCGCGACGGCCGCCCGCAGCCGATGCTCGACGCC +TACGCCGACACCCGCCGCGACGGTGCCTATCCGGCCTATTCGCCGACCTACTTGCGCGAG +GTGGTGGCCGCCGAGTCGAACGCCCACGCCGCGGCCTTCGACGCCGCCATCGGCGAATGG +CTGGCCGCCCGCCCGGCCTCCGCCGCGCAGGACTTCAGCGAGAGCAACCAGCCACGCCCC +GAGGCCCACGACAATGCGGGGCTGAACGCGCAACTGATCGAGCTGATCGGGCATTTCGGC +GCGCATAGCCGGAACATCGTCAGGGCGGCGTTGGAGACGGAGGAGGGTGGGGCGAAGGAG +TGAGGGGGCGGCGGATAAGCGGCGGATAACCGCGAGCGGTTATTCGCCCTACGGAGCGGG +GGCACTGTCCGGCGTTTCCGGGGCGTGTAGGGCGAATAACGCCATGGGCGTTATCCGCCG +ATGTCGCGGATAAGCGGCGGATAACCGCGAGCGGTTATTCGCCCTACGGAGCGGGGACGC +TGTCCGGCGTTTCCGGGGTGTGCAGGGCGAATAACGCCATGGGCGATATCCGCCGATGTC +GCGGATGAGCGGCGGATGACCGCGAGCGGTTATTCGCCCTACGGAGCTGGGGCGCAGTCC +GGCTTTTTCGGGGTGTGTAGGGCGAATAACGCCATGGGCGTTATCCGCCGATGTCGCGGA +TGAGCGGCGGATGACCGCGAGCGGCTATTCGCCCTGCGGAGCAGGGGCGCTGTCCGGCGT +TTCCGGGGTGTGCAGGGCGGCGAACCCGCCCCGCCAGGCGGGTCGATGCCGCGACTCAGT +CGCGCAACAGCTCCGCAGGCACCTCGCTGCGCAACATCAACTGGCACTGCTGGCTCTCCG +GGTCGAACAGGATCACCGCCTCGCCCCGCCGCAAGGCATGCCGGGCGCGTTCTACACGCA +CGTCCAGCGGGGTCTCGTCGCCATTGTCGGTGCCTTCGCGGGTGACGAAGTCTTCCAGCA +GGTTGTTCAGGGTGTCGGCTTCGAGCAGGTCGTGGGGGATCAGCATTGGCGGTACTCCGG +GATGGCGGCGAATGCTAGCGCGACGCATCGCGCACTGTCATGCCTGTCGCGAAGGCTGGC +CGAGCAGGCGTTCGAGGGTGGCATAGAGTTCCGCCCGGTCCACCGGCTTGCCGAGGTAGG +CGTCCATGCCAGCCTCGATTCCGGCCCGGCGGTGTTCGTCGAGGATGTGCGCGGTCAGGG +CGACGATCGGTACGCGTGGCCAGCCCTGGGCACGCTCCTCCCGGCGGATCAGGCGGGTCG +CCTCGAAGCCGTCCATTTCCGGCATCTCGCCATCCATCAGGATCAACTGGATGCCGTTCG +GATCGCGCAGGTACTCGTCCAGCGCGAGCCGTCCGTTGCCGGCCAGGCGCACCGCGTAGC +CGCGCTTGGCGAGGAAGCCGCGCACCACCAGCTGGTTCACCGGGTTGTCCTCGGCGACCA +GGATGCATGGGGCGTCCAGGCGTTCGTCCGGGGCCGCCTCGCTGCTCCGCCCTGGCTGTC +GGCGCCGCTCCCGGTACAGTTCCAGCAGGGCTTCGCGCAGGGCCTTCACCGCCACCGGCT +GGGCCAGGGCCAGCAGGCGCAGGCCTTCGTGCGGCGGCAGGTCCTGGCAGTGCTGCGGCG +GGCATAGCAGGAGGATGCGCTGGCCTTGCTCCAGTTGCGCATAGAGGGTGTCCAGCCAGA +TCGACGGCGGCCCCGGCCAGGGCGCCGCCAGGACCAGCAGGGGCGGTGCGCTGAAGTCCT +CCAGGTAGGCGTTCAGCCGTCTCGGCTGCAGGCAGCGTTCGACCCGCAGCCCCCAACGCT +CCAGCAATGCCTGCAGGCAGTCCAGGGTCAGGTTGTCCTCGCTGGCCAGCAGGGCCGGGC +GGTGTTGCAGCAGTTGCGCCAGTTCATCCGCTTCGCCGGCGTCGAGGGCGGGGCTGAGCG +GCAGGTCGACGCTGAACCGGGTGCCCTTGCCCGGCTCGCTGCTCACCTCGATGCGCCCGC +CCATCATCTGCACCAGCTCCCGGCTGATCGCCAGGCCCAGGCCGCTGCCGCCGTAGCGGC +GGGTGGTGCTGGAGTCGGCCTGGGAGAAGGATTCGAACAGGGTCTTCTGCGCCTGGGCGG +ATATCCCGATGCCGCTGTCGCTGACGCTGTACAGCAGGCGCTCGTGTCCGCCCTCGTCGA +AGCGCCGCTGCACGCGCACGGCGACATGGCCTTCGGCGGTGAACTTGAGGGCATTGCTGA +GCAGGTTCATCAGTACCTGGCGCAACCGCGTCGGGTCGCCGTTGAGGCGGCGCGGCACGC +CCCTGTCCAGGCCGAGATGCAGGCGCAGGCGTTTTTCCACGGCCTGGGCGCTGAACAGCG +CCAGGGTGTCGGAGAGCAGTTCCTCCAGGTCGAAGTCGATGCGCTCCAGGTGCAGCTTGC +CGGACTCGATGCGAGCGTAGTCGAGGATATCGTTGATCACCGACATCAGCGCGCTGCCGG +AGCTGGCGATGGTCTCCACGTAGGCCGCCTGGCCGCGGTCCAGCGGGGTGTCGCGGAGCA +GTTGCAGCATGCCCAGCACGCCGTTCAGCGGGGTGCGAATCTCATGGCTCATCTTCGCCA +GGAAGCGGCTCTTCGCCTCGTTCTCCACCCGCGCCTGCTCGGCCGCCTGGCGCGAGCGGA +AGCCTTCCTCCTTGAGCGCATTGATGCGGTCGGCGAGGCCGATGGACAGGGTCACCAGTT +CCACGGTCATGCCGATCTTGACCACGCTGCTGCCGAACAGGCCGAACAGTTCGAAGCCCA +GCGAGGCGGCGGTGGTGACCAGGAACGACAGCAGCAGCGCGCCCCAGGCCAGGATGTAGT +AGAGGCCGTAGCGCAGGCCCTGGCGCCAGACGTGGACGCCGGCCAGCAGCAGGCTGAGGG +AGACCAGCATCACCGTCAGGCTGGCGAGGACGTTCCAGGCGCGCAGCCCCACCAGCGGCT +CGCTGGCCAACAGGACCACGCAGGCCAGCAGCAGGCCGCGGAGGAAGCGGTCGAGGCGCG +GGAAGTCGCGGCGGGTGTAGAGGTAGCCGCGGCTGAACTGGATCGACACCAGGCAGCTCA +GGTACATCAGCAGGTAGATGCCGGCCGACTCCAGCGCCACGTGTCCGGGCAGCAGCTTGA +ACAGCAGGCCGTCGAAGCTGGCGGAAAACAGGCCGAGGCTGAGGTTGTACAGCAGGTACC +AGGCATAGGTGGCCTCGCGTAGCGAGACGAAGAGGAACAGGTTGTAGCAGAACAACCCGA +ACAGCACCCCGTAGAAGGCCCCGTTGAAGCCCATCAGGGTTTCCTGGCTGGCCGCGCTGG +CCGCGTAGGTGCTGAAGTACAGCGGCACGTAGACGGTGCTGGTGCTCTGCACGCGCAGCA +GCAGGGTGCTCTCGCCGGGCGGCAGTCGCAGCGGGAACCAGAAGTTGCGCACTTGCACCG +GGCGCTGGGAGAAGGCGAACAGGTCGCCGCTCTCCTGCTGTTCGATGCGCCCATCGGCGG +TCAGCAGGTAGACCTTGAGGTCGTCCAGCAGCGGGTAGTTGATCTCCAGGAAACCGGCCA +GGTCGGTGCCGTTGCGGTTGTCCAGGCGCACCCTGAACCACCAGGCTGCGGCGTTCTTGC +CGAAGTTCGCGTGGTCGCCGCGCAGGGCGGCGAAGGCCTGCGCCGGCAAGGCGAGGATGT +CGCCCAGACGCGCCTTGCCGCCGGCGTCGCGGTAGTACTCGGCGTACTCGCCCAGGGACA +GGCGCAGGTCCTCTCCGTCCAGCGGCGCGGGCGGCTGCGCCCGGGCCGGCAGCGCGCAGC +AGAGCAACAGCAGGACCAGGCAGAGTCGAGACATGCAACTACTCCATGGACACGAGTCGG +CACGCATGATGCTACGGAGGAAACGAGACAGGCGAGGGAACCTGCCATCAGCCTAGCAGT +TGGCTGGAGAGGAGGGCGGCCTCCCCGGAGGCGGGGAGGCCGGCGCGGGCTCAGCCCTTG +TCGCTGTTGTTGCTGCTGTCGAGCACTTCGTCCACCGGCGGGACGTGGGTGCAGCTTTCC +ATGTGCACCGGATGCTCGAGCTGCTTGTTGAAGCGTTCCAGCGAGGTTTCCCGGGGCTTG +GCGTCGCTGGCGAAGACCGGCGGGCTGAGGATGTAGGCGGTGAGCAGCCGGCTCAGCGCG +GCGAGACTGTCGATGTGCGTGCGCTCGTAGCCGTGGGTGGCGTCGCAGCCGAACGCCAGC +AGCGCGGCGCGGGTGTCGTGGCCGGCGGCGATCGCCGACTGCGCGTCGCTGTGGTAGTAG +CGGAACAGGTCGCGGCGCAGGGCGATGTCGTGGCGCTCGCCGAGCCGCAGCAGGTGCCGC +GACAGATGGAAGTCGTAGGGCCCGCCGGAGTCCTGCATGGCCACGCTGACGCTGTGCTCG +TTGGAATTCTGGCCTTCGGCCACCGGGGCGATGTCGATGCCGACGAACTCGCTGACGTCC +CAGGGCAGGGCGCCGGCGGCGCCGGAGCCGACTTCCTCGGTGATGGTGAACAGCGGGTGG +CAGTCGATCGGCGGCTCCTGGCCGCTTTCCTTGACCGCCTTCAGCGCCGCCAGCAGCGCC +GCGACCCCGGCCTTGTCGTCCAGGTGGCGGGCGCTGATATGGCCGCTCTCGGTGAACTCC +GGCAGCGGATCGAAGGCCACGTAGTCACCCACCGAGATGCCCAGGCTCTCGCTGTCGGCG +CGACTGGCGGTATAGGCGTCGAGACGCAGCTCGATGTTGTCCCAACTGATCTTCAGGTTG +TCCACCTCGGTATTGAAGGCGTGGCCCGAAGCCAGCAGCGGCAGCACGCTGCCACGGAAC +ACGCCCTGTTCGGTGAACACCGTGACCCGGCTACCCTCGGCGAAGCGGCTCGACCAGCAG +CCTACCGGGGCCAGGCCGAGGCGGCCGTTGGGCTTGATCTCGCGGACCAGCGCGCCGATC +GTGTCGAGGTGCGCGGAAACCGCCCGGTCGGGGGTATTCAGGCGGCCCTGGAGGGTCGCC +CGGATGGTTCCCCGGCGAGTCATCTCGAAGGGCACGCCAAGTTCGTCGAGCTGCTCGGCG +ACATAGCGCACGATGGTATCGGTGAAGCCGGTGGGGCTGGGGATGGCGAGCATCTCCAGC +AGCACCCGTTGCAGGTAGTTGAGGTCCGGTTGTGGCAGAGGGGTCATGAAGGCACCTCGC +GGCTGAGGGGGAAGAGAAGATCGACGAAGCGTTCGGCGGTCGGCTGCGGCTCGTGGTTGG +CCAGGCCGGCGCGCTCGTTGGCTTCGATGATCACGTAGTCGGGCTGGTCGGCGGCCTCGA +CCAGGAAGTCCAGGCCCACCACCGGTATTTCCAGGGCCCGCGCGGCCTGGATTGCGGCCT +CGCGGAGGGCCGGATGGAGGGCGTCGGTGACGTCCTCGAGGATGCCGCCGGTGTGCAGGT +TGGCGGTGCGCCGCACGGCCAGGTGTTCGCCGCTGGGCAACACGTCGTCGTAGCCATAGC +CGGCGGCCCGCAGGGTGCGCTCGGTCTCGTGGTCGAGCGGGATGCGGCTTTCCCCGCCGG +TGGCGGCCTGGCGCCGGCGGCTCTGCGCCTCGATCAGCTCGCGGATGCTGTGCCGGCCGT +CGCCGATCACCGCGGCGGGACGACGGATGGCGGCGGCCACCACCTCGTAGCCAATCACCA +GCACGCGCAGGTCGTGGCCGGCGTGGTAGCTCTCCAGCAGCACGCGCGAATCGAAATGGC +GGGCGGCCTCGATGGCCTCCTCGACCTCTTCCGCGGTGCGCAGGTCCACCGCCACGCCCT +GGCCCTGCTCGCCGTCCACCGGCTTGACCACCAGCGAACCGTGCTCGGCGAGGAACGCGG +CGTTTTCCTCCGCGCTGCCGGCCAGGCGCTGCTGTGGCTGGCGCAGGCCGGCGTGCTGCA +GGGCGCGGTGGGTGAGGACCTTGTCCTGGCAGAGGGTCATGCTTACCGCCGTGGTCAGGT +CGGAGAGCGATTCACGGCAGCGGATCTGCCGGCCGCCCTGGGTCAGGGTGAACAGGCCGC +CCTCGGCGTCGTCGACGCGCACCTCGATGCCGCGCCGGTGGGCTTCGTCGACGATGATCC +GGGCGTAGGGGTTGAGGTCTTCCTCCGGGCCGGGGCCGAGGAACAGCGACTGGTTGATGC +CGTTCTTGCGCTTGATGGTGAAGGTCGCCAGTTCGCGGAAGCCGAGCTTGCGGTAGAGCG +CCTTGGCCTGCTGGTTGTTGTGCAGCACCGAGAGGTCGAGGTAAGCCAGTTCGCGGCTCA +TGAAGTGCTCGACCAGGTGCCGTACCAGCGCCTCGCCGACGCCCGGCCGGCTGCATTGCG +GGTCCACCGCCAGGCACCAGAGGCTGGAGCCGCCCTCCGGATCCTGGAACGCCTTGGCGT +GGTTGAGGCCCATCACCGTGCCTACCGCGACGCCAGTGTCGGCGTCCTCGGCCAGCCAGT +AGATCGGCCCGCCCTGGTGGCGCGGGCTGAGGCGCTCGGTCTGCACCGGAAGCATGCCGC +GCGCCAGGTACAGGCGGTTGATCGCCGCCCAGTCCTCCTCGGTATGCACCCGGCGGATGC +GGAAACCGCGTGGCGGACGGCGCGCCGGGCGGTAGTCGGTGAACCACAGGCGCAGGGTAT +CCGAGGGATCGAGGAAGAGCTGCTGCGGGGCGTGGGCCAGGACCTGCTGCGGCGCCGCCA +CGTAGAGCGCGATGTCGCGTTCGCCGGGGGCTTCCTGCAACAGGTCGCGGGCCAGTTCCT +CGGCGTCCGGGTAGGTCTGCCCGATCAGCAGGCGGCCCCAGCCGCAGTGGATGCTCAGCG +GTTTCTCGGTGGGCTCGCTGTGATCTTCGGCGAGACGCGCCTGGAGGCGTTCGTAGGTGG +GCGCCTGGATACGTTGCAGGCGCTGGTTGTGCGGGTGAGGCAGATGCAAGTGGCCTTTCA +TGATCGCTCCTGGTTCGGGCCCCTGCGTTGGGGGCCCATTTTCAAAGTCCCTGTTCGCTG +AGCCAGAGGTTGAGGGCGGCCAGCTGCCACAGCTTGGAGCCGCGCAGCGGTGTGAGGTCG +CCGTCCGGATCGCTCAGCAGGCGATCGAATATCGCCGGCTGGAACAGGCCGCGGTCCTGG +CTCGGGTCGAGCAGCAGTTCGCGTACCCATTCGCGGGTGCGGCCCTGCAAGTGCTTGAGG +CCGGGCACCGGGAAGTAGCCCTTGGGCCGGTCGATGACTTCGCTGGGAATCACCTTGCGC +GCGGCACCCTTGAGCACCTGCTTGCCGCCGTCGCCGAGCTTGAAGCGGGCGGGAATGCGC +GCCGACAGCTCGGCCAGGCGGTAGTCGAGGAACGGCACGCGGGCTTCCAGGCCCCAGGCC +ATGGTCATGTTGTCGACCCGCTTGACCGGGTCGTCGACCAGCATGATCGTGCTATCCAGG +CGCAGCGCCTTGTCCACCGCGGCCTCGGCGCCGGGGCTGGCGAAGTGGTCGCGGACGAAG +CGCCCGGCGACGTCCTCGACGCGGAAGCGCTCGCCGACGGTGGCCAGGTATTCCTCGTGG +TCGCGGTCGAAGAACGCCGCGCGGTACGCGGCGAAGGCGTCGTCGGCGCCATCCACCTTC +GGATACCAGTGGTAGCCGGCGAACAGCTCGTCGGCGCCCTGGCCGCTCTGCACCACCTTG +CAGTGCTTCGAGACTTCCCGCGCGAGCAGGTAGAAGGCGATGCAGTCGTGGCTGACCATC +GGCTCGCTCATGGCGCGGAAGGCCGCCGGCAACTGCTCGATCACTTCGTGCTCGCCGATG +CGCAGGCGATGGTGACGGGTGTGATAGCGCTCGGCGATCAGGTCGGAATACTGGAACTCG +TCGCCGCGCTCGCCGCCGGCATCCTCGAAGCCGATGGAGAAGGTCAGCAGGTTGTCCACC +CCGGCTTCGTGCAGCAGGCCCACCAGCAGGCTCGAATCGACCCCGCCGGAGAGCAGCACG +CCGACCTCGCGGGCGGCGCGCTGGCGGATCGCCACCGCTTCGCGGAGGCCGTCGAGGACG +CGCTCCTGCCAGTCGTCGAGCGTCAGCTCGCGCTCGTCCGGGCGTGGGCCGTAGTCCAGC +GTCCACCAGGTGCGCTGCTCGCAGCTGCCGTCGAGGTCGACGCTCATCCAGGTCGCCGGC +GGCAGCTTCTTCACGCCCTCCAGCAAGGTGTGCGGGGCGGGAACCACGGCGTGGAAGTTG +AGGTAGAAATTCAGCGCCTGCGGATCGAGGTCGCTGGCGATGTCGCCGCCCTTGAGCAGG +GCCGGCAGGCTCGAGGCGAAGCGTAGGCGCGAGCGGTCGAGGCTGTAGTAGAGCGGCTTG +ATGCCGAGTCGGTCACGGGCCAGGAACAGCCGCTGGCGATCGCGCTCCCAGACGGCGAAG +GCGAACATCCCATTGAGCCGGGGCAATAAATCGGCGCCCCAGGCATGGTACCCCTTCAAG +AGAACTTCGGTGTCGCCGCCGGAGAAGAACCGATAGCCAAGGCTTTCCAGTTCGCCACGC +AGTTCGGGGTAGTTGTAGATGGCGCCATTGAACACCATGGCCAGGCCGAGATCGGGATCG +ATCATCGGTTGGCCGGAAGCTTCCGCCAGGTCCATGATCTTCAGCCGTCGATGTCCGAGG +GCGATCGGACCCTGGGCGTGGAAGCCCCAGGCGTCGGGGCCGCGAGGGGCCAGGTGGTGG +GTGATGCGTTCGACCGCTGCGAGGTCGGCTGCCTGGTTGTCGAAACGAAACTCTCCTGAT +ATGCCGCACATATTCCTTACCGGTTCTCCGTTGGGGAAGTGGGTTGCGTACCCCTTCCGG +GTACATGACTTACCTAACAGAGGACCGTTACAGTTGGAAGGAGTTCTATACGAATTTTCC +GGGCCTATCAGGACCCTTAACGAAGAATTTATTATGGTTGAAAGTAATAAGACCGCTGCC +GATACCTATTCGCTCGCTATTTTCCGCGCCATCCGCCGTCTCCAGCAGGCCGCCGAAATC +CACTCCAAGCGTCTCAGCCGATACGGTGGCCTGACCCCGCTGCAATTGCTGATCCTGCAT +GTCCTGGCGGTGGAGGGAGAACTGACCGCGACGCAACTGGCCAAGCTGGTCAGCCTGTCC +CAGGCATCGCTCTCCGGCGTGCTCGACCGCCTCGAAGGACGCGGCCTGCTCTATCGTCGG +CGCGACGAACAGGACCGGCGCAAGTCCTGGCTGCACCTCGACCCGGCCGGCCACGAAGCC +CTGGCGGAGGCGCCGCCGCTATTGCCGGAGTACGTGATCGAGCGCTTCGCCGCGTTGCCC +GAGTGGGAGCGCCACGGCCTGCTGGCCGCGCTGCTGCGCGCGGCCGACCTGTTCGGCTTG +CCGGAAGAGGACGTCGAAGAAGAGTGAGGGCGCCAGGTTCGCTACGAAAGACAGTCGCGT +CGGCGGTCTTTACCTGGCGAACCTGCCGATGCCTGGTGGACAAGCGCGTTGTACGCCATG +GCGTGACCTTCGCGTAGGGCGCATAACGCCGCTTCCCCGGTGCCCCGGCCGCCGAGGAAA +GCTACCCGCCGTAGAGCTTCTGCGCCGCCAGGGAGAAGCTCACCGCCACCAGCATCAGCG +CGAACAGGCGTTGCAGGGTCGGGCCGCCGAGGCGCTGCGCCAGCAGGTTGCCGCCGAGCA +CGCCGACCGCGCCGCCGGCGGCGAGGCCGCCGAGCAGCGGCAGCGGCGGGTGGGCGCCGG +TCAGGTAGATCAGGAAGCCGCCGCCGGAGACCAGCGCGATCACCGCCATCGAGGTGGCGG +TGGCGGCCATCATCGACAGCGGGGTGAACCAGAGTAGCCCCGGCACCACCAGGAAGCCGC +CGCCCACGCCCATCAATCCCGACAGCAAGCCGACCGCCAGGCCGATGCCGAGCAGCGGCA +GGCTCCGTGCCTGGTCGCTGGCCTCGCGTTTCATCCCGGCGCCGCGCCACATGCGCCAGG +CCGACCAGAGCACCAGCAGGCAGAAGGCGACGATCAGCACGCCTTCCGGGACGAAGCGGC +CGAGCCATTGGCCGACGGCGTTGCTCGGCAGGCCGGCCAGCGCCAGGACCAGCACCGGCC +GCCAGGCCACCTGGCCCTGGCGCGCGCGCGGAATGGCGCCAATCAGCGCCGACAGGGCGA +CCGCGCCGAGGCTCACGCCGATGGCGTCGCGCAGCGGCAGGTGCAGGCTGAGCAGCAGGG +GCAGGGCGACCAGCGAGCCGCCGGCGCCAGTGAGGCCGAGCAGCAGGCCGAGAACGGCGC +CGATGCCCAGGGATTCGAAGAGCAGTACATCCATCGTCGATCAACTGTTTCCGCGGGTGA +GCCAGCCGGTGGTCCGGTATGGCCTGCGCGTGCGGCAGGCCCGAAGGGTAGCGCAAATCG +GTGCGCCCTGGGCTTCAGCCGCGCTGCCCGCGCACCAGGTCGCGGAGCAGGAAGCGGTTC +GGGTGGCAGGCCTCGGCCACCGCGCGCGGCAGCGGCAGCGGCTCGCCGCAGATCCACGCC +GCCAGCAGTTCGCCGGACAGCGGCGCACTGATCAGCCCGCGCGAGCCGTGGCCGCTGTTC +AGGTACAGGCCTGGCAGCCAGGGGCAGGCACGCTCCGGCGCCTGGCGGGCGTCGCGGGCG +AGCACCGCGTAGGCCTCGTCGAAGGCCGCGCGCTCGGCCAGCGGGCCTACCAGGGGCAGG +TAGTCGGGGCTGGTGCAGCGGAATGCCGCGCGGCCTTCCAGGCGCTCCAGCGGCAGGTCG +TCGGCGCCCAGGCGCTGCAACAGGTCGGGGGATATTTCCCGCAGCAGTTCGAGGTTGCCC +TGGTGTTCGGCGAGGGTCGGCGCCAGGTCTTCGCTCTTGAAGTCGAAGCTGGCGCCGAGG +GTGTGTTCGTCGCCACGCGGCGGTGCGACGTAGCCCTCGGCGCAGACCACGGTGCGCAGC +GCCCGGCTTTGCGGGGTGGCCGGCAGGCGGGTGACCTGCCCGCGAATGCGCTTCAGCGGC +AGCTCGGCGGCGGGCGGGAAGTCGCGGATGTCGGCGGCGGTGGCGAGGATCGCGAGTGGC +GCGCGGGCCAGGCATTCGTCGCCGGCGTAGGCGCACCAGTCGTCGCCCTCGCGGCGCAAG +CGTACTGCCCGGCCGCTCAGCAGGGTGATCCCGGGGGTGGCGGCCAGGGCCTGGCACAGC +GCGGGCGGATGGACCCAGCCGGCTTCGGGATAGAACAGGCCGCCGGCGGGCAGGGCGACG +CCGGCGAGGCGCTCGGCCTGCTCGCGCTCCAGGCCGTGCAGCAGGTCGGCGGGGAAGGCT +GCGGCCAGTTGCGCCTGGCGTTGCGCTTCCTTGGCGTCGAAGGCCAGTTGCAGTACGCCG +CAGGCATCCCAGTCGTGGCCGCGCCGCAGGCGTTCGAGCAGCCGGCGGGTATGGCCGAAG +CCGCTGAGCACCAGGCGCGACAGCGGCGTGCCGTGGGCCGAGAGTTTCAGGTAGAGCACG +CCTTGGGGGTTGCCCGAGGCTTCCCGGGCGAGGCCGGGATGGCGTTCGATCAGGGTCACC +TGCCAGCCGCGCGCGGCGAGGCTGGCGGCGCTGGCGCAGCCGGCCAGGCCGCCGCCGACC +ACCAGTGCCGCGCGGCGCCCGGCGTGGGGCGCGGGGCGTGCGTACCAGGGCTTGCCGGCG +TTCGCCGGCGGCCCCTGGTAGGTGCCGCTGAGCATCTCGCGCTTCTGTCCGTAGCCCGGT +ACCCGTTGCATGGCGAAACCGGCCTCGACCAGCCCGCGGCGAACGAAACCGGCGCTGGTG +AAGGTGCCCAGGGTCGCCTGCGGCGCCGACAGTCGCGCCAGTTCGGCGAACAGTACGGGC +GACCACATGTCCGGGTTCTTCGCCGGGGCGAAGCCATCGAGGAACCAGGCGTCGACCCGC +GCGTCGAGCTGCGGCAGGCACTCCAGGGCATCGCCCAGCAGCAGGGTCAGGCCGACCCTT +CCGCCCGCGAACGCCAGGCGCTGGAAGCCCGGGTGCACGGCAAGGTACTGGCCCAGCAGG +GCCTCGCTCCAGGGCGCCAGTTCCGGCCATAGCGCCAGGGCGCGGCGCAGGTCGGCGGCG +GCGAGGGGGAATTTCTCCACGCTGACGAACTCCAGGCGCGCGCCGGCCGGCGCGACCCGC +TCGAACAGCTGCCAGGCGCAGAGGAAGTTTAGTCCGGTGCCGAAGCCGGTCTCGCCGATA +CACAGCACCTCGCCGTCGCCCAGCGCGGCGAAGCGCTCGGCCAGGCGGTTGGTGGCGAGG +AACACATGGCGCGTCTCGTTGAGCCCGGAATGCCGGGAAAAATATACGTCGCCGAAGGCG +CGGGATAGCGGCTGGCCGTTTTCGTCCCAGTCGAGCTGGGCATGCTGGAAGTCGGACATG +GCAGAACCTGAAAAAGCGGATGGCGCATTTTACGGGGCTTTGCCGGCCAGCGGCGAGGCG +CTGCGCGAACATGCCGCGGCGCCGCGCAGACAGCCGCACGCCGATCCGCTACCTTGAAGG +ACTCAGGTTAAGGGAGACCTCCATGTTCGAATCCGCGGAAGTTGGCCACAGCATCGACAA +GGACACCTACGAGAAGGCCGTCATCGAGTTGCGCGAAGCGCTGCTCGAGGCGCAGTTCGA +GCTCAAGCAGCAGGCGCGCTTCCCGGTGATCATCCTGATCAACGGCATCGAGGGCGCCGG +CAAGGGGGAGACGGTCAAGCTCCTCAACGAGTGGATGGACCCGCGCCTGATCGAAGTGCA +GAGCTTCCTCCGTCCCTCCGACGAGGAACTGGAGCGGCCGCCGCAGTGGCGCTTCTGGCG +GCGCCTGCCGCCCAAGGGGCGGACCGGTATCTTCTTCGGCAACTGGTACAGCCAGATGCT +CTACGCGCGGGTCGAGGGGCATATCAAGGAGGCCAAGCTGGATCAGGCCATCGATGCCGC +CGAACGCTTCGAGCGCATGCTCTGCGACGAAGGCGCGCTGCTCTTCAAGTTCTGGTTCCA +CCTCTCCAAGAAACAGCTCAAGGAACGCCTCAAGGCGCTGGAAAAGGACCCGCAGCACAG +CTGGAAGCTCAGTCCGCTGGACTGGAAGCAGAGCGAGGTCTACGACCGCTTCGTGCATTA +CGGCGAGCGCGTGCTGCGCCGCACCAGCCGGGACTACGCGCCCTGGTACGTGGTGGAAGG +CGTGGACGAGCGCTACCGCGCCCTGACCGTCGGTCGCATCCTCCTCGAAGGGTTGCAGGC +CGCGCTGGCCACCAAGGAGCGCGCCAAGCGCCAGCCGCACGCCGCGCCGCTGGTGTCGAG +CCTGGACAACCGCGGCCTGCTGGACTCCCTGGACCTGGGCCAGTACCTGGACAAGGACGC +CTACAAGGAGCAGCTCGCCGCCGAGCAGGCCCGCCTGGCCGGGCTGATCCGCGACAAGCG +CTTCCGCCAGCATTCGCTGGTCGCGGTGTTCGAGGGCAACGACGCGGCCGGCAAGGGCGG +CGCCATCCGCCGTGTCACCGACGCGCTGGACCCGCGCCAGTACCATATCGTGCCGATCGC +CGCGCCGACCGAAGAGGAGCGTGCGCAGCCCTATCTCTGGCGCTTCTGGCGGCACATTCC +GGCGCGTCGCCAGTTCACCATCTTCGACCGTTCCTGGTACGGCCGCGTGCTGGTGGAGCG +CATCGAGGGCTTCTGCGCACCGGCCGACTGGCTACGCGCCTATGGCGAGATCAATGACTT +CGAGGAGCAGCTCAGCGAGTACGGGATCATCGTGGTGAAGTTCTGGCTGGCGATCGACAA +GCAGACCCAGATGGAGCGCTTCAAGGAACGCGAGAAAACCCCCTACAAGCGCTACAAGAT +CACCGAGGAAGACTGGCGCAACCGCGACAAGTGGGACCAGTACGTGGACGCGGTGGGCGA +TATGGTCGACCGTACCAGCACCGAGATCGCGCCCTGGACCCTGGTCGAAGCCAACGACAA +GCGCTTCGCCCGGGTCAAGGTGCTGCGCACCATCAACGACGCCATCGAGGCGGCGTACAA +GAAGGACAAGTGAGGCCTGCCGGAATGCGGCGCCGTCCTCTCCAGGGGATGGCGACGCAT +ACGCCGGATGAATGATCGGTGCCTCGCGCGATTGCCTCGCCTTATCCTCGCTCGATCCGG +CCGCTGCCCCGGCGGCCGACCGACAATAACAGCGAGGGTACCCCCATGCGTGAAGTGGTG +ATCGTCGACAGCGTCCGGACCGGCCTGGCCAAGTCCTTCCGCGGCAAGTTCAACCTGACC +CGGCCGGACGACATGGCCGCCCACTGCGTCGACGCGCTGCTGGCGCGCAACGACCTCGAC +CCGCTGCTGGTGGATGACTGCATCGTTGGCGCCGGCTCCAACGAAGGCGCCCAGGGCCAC +AACATCGGGCGCAACGTGGCGGTGCTCTCCGGCCTCGGCATCCAGGTGCCGGGGATGACC +CTCAACCGCTACTGTTCCTCCGGCCTGCAGGCGATCGCCATCGCCGCCAACCAGATCGCT +TCCGGTTGCAGCGAGGTGATCGTCGCCGGCGGCGTCGAGTCGATCACCCTGACCCTGAAG +AGCGTCAACACCGACCACCTGGTGAACCCGCTGCTGCAAAGGGAGGTGCCGGGCATCTAC +TACCCCATGGGGCAGACCGCCGAGATCGTCGCCCGTCGCTACGGCATCACCCGCGAGGCC +CAGGACGCCTACGCCCTGCAGAGCCAGCAGCGGATGGCGCGAGCCCAGGCGGACGGGCTG +TTCGCCGACGAGATCGTGCCGATGACCACCCGCTACGCGGTGGAGGACAAGGCCAGCGGC +GAGAAGCAGGTGCTCGACGGAGTGGTCGACCGCGACGACTGCAACCGCCCGGACACCACC +CTCGAAGGCCTGGCCTCGCTGAAGCCGGCGTTCGCCGAGGACGGTTCGGTCACCGCCGGC +AACGCCTCGCAACTCTCCGACGGCGCCTCGATGACCCTGCTGATGAGCCTGGAGAAAGCG +CTGGCGCTGGGCCTGGAGCCGAAGGCCTTCTTCCGTGGCTTCACCGTGGCCGGCTGCGAG +CCGGACGAAATGGGCATCGGTCCGGTGTTCTCGGTGCCGAAGCTGCTCAAGGCGAAGGGG +CTGAAGATCGCCGACGTCGATCTCTGGGAACTCAACGAAGCCTTCGCCTCGCAGTGCCTG +TACTGCCGCGACCGGCTGGAGATCGACAACGAGAAGTACAACGTCAATGGCGGCTCCATC +GCCATCGGCCACCCGTTCGGCATGACCGGCTCGCGCCAGGTCGGTCATCTGGTCCGCGAA +CTGCACCGGCGCAACCTGCGCTACGGCGTGGTGACCATGTGCGTCGGCGGCGGCATGGGC +GCCAGCGGGCTGTTCGAAGCCGTACGCTGAGCCCGTGCGCGACGGCGGGCCCCGCAGGTC +GGGGCTCGCCTCATGGGTTTTATTTACATGACGCTCCGTGCCATCCCCGGCGACTCGCGC +TAGGCTAGGCGCCTTTTTCCTTGCATAGGGATGTAGCGATGTCCACCGAGCCCAACTCGC +CGTTCGTCGACGACCCGCTGAGCGCCGTCGATGCACGGGTTCTCGGCAGCCTCGTCGAGA +AGCAGGCGACCACCCCGGAAACCTATCCGCTGACCCTCAATGCCCTGGTCCTCGCCTGCA +ACCAGAAGACCAGCCGCGATCCGGTGATGAACCTCACGCCCGGCCAGGTCGGCCAGAGCC +TGCGCCAGCTCGAGGGGCGCGGCCTGGTCAGGCTGGTGATGGGCAGCCGCGCCGACCGCT +GGGAGCACACCCTGGGCAAGGGCCTGGAATTGGTGGCGCCACAGGTGGCGCTGCTCGGCC +TGCTGTTCCTGCGTGGTCCGCAGACCCTCAACGAACTGCTCACCCGCAGCAACCGCCTGC +ACGATTTCGACGACGTCGAGCAGATCCGCCACCACCTGGAACGCCTCGCCGGCCGTGGCC +TGGCCGTGCACCTGGAGCGTCGCGCCGGGCAGCGCGAGGAGCGCTACATGCACCTGCTCG +GCAGCCAGGCCGACCTCGAGGCCGCCGTGGAGGCGATGGGCAGCGACCCGGAACGCGCCG +CGCCGGCCGCTCTGTCGGCCGATGCGGAAGCGCGGATCGCCGAGCTGGAGACGCGCCTGG +CGGCGCTGGAGGAGCGCCTGGCCAGGCTCGAAGGAGGGGCGTGATATCGCGGGAGGGCGC +AATCGGTGACATGCAGGTTCCGGCGGATAACCGCGAGCGGTTGTTCGCCCTACGGCCCGC +AGTGACTTTGTGGGCGGCGCTGCCTGTCGGGTAGGGCGGATAACGCCAGCGGCGTTATCC +GCCGATGCGCCGCCCCGCAAGGCGAACGCCAACCCAAAAAAGCCCGGCTATTGCCGGGCT +TTTTCATGCGCGCGCGTCGGTGGCCGATCAGGCCTCCGGCTGTACCGGCACGTGGATCAG +TTGCAGGCGCTCGCTGCTCCAGGCACGGGTGCTGTTGGTCAGCTCGATGTCGTTGTTCAG +CTTGCGCCCGTAAGACGGCACGATTTCCTTCAGGCGTGCCTGCCACTCGGGAGTAGCGAC +CTTGTCCTTGAAGGCCTTCTCCAGCACCGACAGCATGATCGGCGCGGCGGTCGAGGCGCC +CGGCGAGGCGCCGAGCAGGGCGGCGACGGAGCCGTCGGCGGCGGTCACCACTTCGGTGCC +GAATTGCAGTACGCCGCCTTTCTCGGCGTCCTTCTTGATGATCTGCACGCGCTGGCCGGC +CTGCACCAGCTTCCAGTCCTCGTCGCGGGCTTCCGGGAAGTACTCGCGCAGCGAGGCCAT +GCGGTCGTCCTGGCTGAGCATCAACTGGCCGATCAGGTACTGGCTGAGATCGAAGTTGTC +GATGCCGGCGTTGAACATCGGGCCGATGTTGCCGCTGGTCACCGAGCCCGGCAGGTCCCA +CAGCGAGCCGTTCTTCAGGAACTTGGTGGAGAAGGTGGCGAACGGACCGAACAGCAGAAC +CGGCTTGCCGTCGATCATGCGGGTGTCGAGGTGCGGCACCGACATCGGCGGCGAACCCAC +CGAAGCCTTGCCGTAGACCTTGGCCAGGTGGCGCTTGACCACGTCCGGGTTGGTGGTGGC +GAGGAACGAACCGCCCACCGGGAAGCCGGCGTAGCCTTCGGCCTCGGGGATGCCGGACAT +CTGCAGCAGCTTCAGCGCGCCGCCGCCGGCGCCGATGAAGACGAACCTGGCCTTGACGCT +GGTTTCCTTGTCGCCATTGGCCAGGTCGGCCATGGTCACGGTCCAGGTGTTGTCGTCGTT +GCGCTTGAGGTCGCGGACCTCATGTTGCAGGCGCAGCTTGAAGGTGTCCTTGGCCGACAG +CGAGCCCACCAGTTGGCGAGTGATCTCGCCGAAGTTGACGTCGGTGCCGATCGACATGCG +CGTGGCGGCGATCTTCTGCCCCGGCTCGCGGCCTTCCATCACCAGCGGCACCCACTGCTT +GATCTGCTCGGGGTCTTCCGAGTACTCCATGCCGCGGAACAGCGAGCTGTGCTGCAGGGC +GGCGTGGCGCTTCTTGAGGAAGGCGACGTTGTCGTCGCCCCAGACGAAGCTCATGTGCGG +GACGTTGTTGATGAACGACTTCGGATCGTTCAGCACCTTGCGGTCGACCTGGTAGGCCCA +GAACTGCTTGGAGATCTCGAAGTTCTCGTTGATCGCCACCGCCTTGCTGATATCCATCGA +GCCGTCCGCGGCCTCGCTGGTGTAGTTCAGTTCGCAGAACGCCGAGTGACCGGTGCCCGC +GTTGTTCCAGCCGTTGGAGCTTTCTTCGGCGACCTTGTCCAGGCGCTCGACCATCTCGAT +GGTCCAGCCCGGTTCCAGTTCGTTCAGATAGGTACCCAGGGTGGCGCTCATGATGCCGCC +ACCGATCAGGAGCACGTCGACCGGTTTTTCCGATTCGACGCTGTTTTTGGAGCAACCCAG +CACGCTGACGCACAAAAGCATCAATAAGATTTTTTTCATGGATTTACCGCTAGAAAACGT +GGAGAAGCGTGGATACGCACGACATGGCAGGTATTCACCGGGTGAATGATACTGGAACTC +ACCCCCGATGTGAGTGCGTTCACATGCGGTGACATTTGCCTCGGCGTTCCGCCGAGAATA +CCTGCCTTCCCTTTCTTGAACGTTTCAGCCACCGGGAAATAAATTCAGCGCCTGTATCGT +TTCAGCGGGAACCCGGCGCGTGGATTTCCGCCGATTGGCGCATCCTGCGGATATACGCGT +CGATTTCCCGCTCCGCGTCGAAACGGGTGAAATAGGGACCTTCCAGGGTCCCTTCGCGGG +TGGAGAAGAAGTACTGCCCATTCACCGAACTTATGCGATCGCTGCGAAAGCGGGTGGCCG +GGGTGGGATCGGTGGAACGTTTCCCCAACATGACGGCGACTCCTCGATAGCGGGCCAAGG +TGTTTTCAGTGTAGGCGGGGGTTGGCCGGGCGGCAGGCCGGCGGACGAAAGGAGACGGAT +TTCCAGGCGAAGGGTTGGGTATTCTATTTTTGGTTATAAGAAAATCTGAATTTATTCTTT +TTAACGTCAGCCGGCCTTGGGCATAGTGGGCTCCAACACGAACAAGGAGTGACCCCATGA +GCCTCAGACTCGGCGACATCGCCCCCGATTTCGAACAGGACTCCAGCGAAGGGCGCATCC +GCCTCCACGAGTGGCTGGGCGACAGCTGGGGCGTGCTGTTCTCCCACCCCGCCGACTTCA +CCCCGGTGTGCACCACCGAGCTGGGTTTCACCGCCAAGCTCAAGGACCAGTTCGCCCAGC +GCGGGGTCAAGGTCCTGGCGCTGTCGGTGGATCCGGTGGAGTCGCACCTGAAGTGGATCG +ACGATATCAACGAGACCCAGGACACCCGGGTCAATTTCCCGATCATCGCCGACGCCGACC +GCAAGGTTTCCGAACTCTACGACCTGATCCACCCGAACGCCAACGACACGCTGACCGTGC +GTTCGCTGTTCATCATCGACCCGAACAAGAAGGTGCGCCTGATCATCACCTACCCGGCGA +GCACCGGGCGCAATTTCAACGAGATCCTCCGGGTGATCGATTCGCTGCAACTGACCGACG +AGCACAAGGTCGCCACGCCAGCCAACTGGGAGGACGGCGACGAGGTGGTAATCGTGCCTT +CGCTGAAGGACGAGGAAGAGATCAAGCGGCGTTTCCCCAAAGGCTACCGCGCGGTGAAGC +CCTACCTGCGCCTGACGCCCCAGCCCAACCGCTGAGGCCGGATTCGTCCCGCCTGCATGG +GCAGGCACGGCAGGCGGGACCTTTTATTCGACGGTGCGTGCCGTCATGCAACGCTGTTTA +GCCACGAGCAGGGTTTTTGGCCGGTTTCGACCGGCCCTCTTTTTTATGTGCGCCAGCGCG +TCGCCGCGACGCTTTTTATCCGCCATCCATTTCCGAAACCGGCTCTGACTTGCCCGCCAG +ACGGGGAGAAGGCTCGCTCATCCACCTCCTTCCAGGAGCAGACGCATGATCGCCGCTCTC +CGTCGCTGCCGGGCCTGGCTGGCCCTGCTCGCCGTTCTTGCCGTGTTTCCCGCGGCAGAG +GCCGTCGCCCCCGCTGAAATCCGTCTCGACTATGCCTACTACGCCCCCACCAGCCTGGCG +CTCAGGAAGTTCGGCTGGCTGGAGAAAAGCCTGGACGGCAGCGGCACCCGGGTGCGCTGG +GTGTTCAGCCAGGGCAGCAACCGTTCCCTGGAGTACCTCAACGCCGGCAGCGTCGATTTC +GCCTCGACCGCCGGCCTCGCCGCGGTACTCGCGCGGGCCAACGGCAGCCCGGTACGTACC +GTATACGTCGCCAGCCGCCCGGAGTGGACCGCGCTGCTGGTGCGCAAGGATTCGCCGATC +CGCAGCCTCGCCGAGCTCAAGGGGCGCAAGGTGGCTGCCACCAAGGGCACCGATCCCTAC +CTGTTCCTGCTCCGCAGCCTGCACAGCGTGGGCCTGGACAAGAACGACCTGCGTATCGTC +CACCTGCAGCATCCCGACGGCCGGGTAGCCCTGGAGAAGGGCCAGGTCGACGCCTGGGCC +GGACTCGATCCACACATGGCGGCCAGCGAGTTGCAGGCCGGCTCGCGGCTGCTCTACCGC +AACCTCGGCTTCAACAGCTACGGCGTGCTCAATGTCCGCGAGGATTTCGCCGAACGCCAT +CCGCAACTGATCCGCCAGGTGCTGGCGGCCTACGAGCAGGCGCGCCACTGGGTGATCGGG +CATCCCGACGAGGCCGCGCAACTGCTTGCCGAGGAAGCCGGCCTGCCGCTGGAAGTGGCT +CGCCTGCAACTGTCGCGCACCGATTTCAGCCAGCCGCTGCCGGGCGCCGAACAGGTCGCC +GCGCTCAAGGCCGCGGCGCCGATCCTGGCGGACGAGCGCCTGGTGCGGCCGGGCGTGGAC +GTGCAGAAGGTGGTCGACGAACTGATCGCGCCGCAGTGGGCCGCCGAGGTCATCGGCGGC +GCCCCGTTGGCACGCACGGAGCCCTGAGCCATGGTCAGCCAGAGCCTGCGACTGGCCCGG +CGAGTCTCGCGCCGGGCGTGGCGACTGCGCGTGCCGGGCGCCTGGCGGGCCTGGGCCTTG +CCGCTGCTGGCACTATCGTCGTGGGAGGCGCTGGTGCGCCTGGGCTGGCTGCCGGCCTAC +CAGATGCCGGCGCCGAGCGGCATCCTGCTGACCCTGGTGGAACTGGCTCGCGGCGAGCTG +TGGGGGCATGTCGGCGCGAGCCTGGCGCGGGTCGCCGCCGGCTTCGCGATCGGCAGCGGC +CTGGCCCTGGTGGTCGGTACCTGGGTCGGTCTCAGTCGTCGCGCCGAAGCCTACCTGGAG +CCGAGCTTCCAGGCCCTGCGGGCGATCCCCAGCCTGGCCTGGGTGCCCCTGCTGCTGCTC +TGGTTCGGCATCGACGAGACGCCGAAGATCGTCCTCATCGCCCTCGGCGCCTTCTTCCCG +GTCTACCTCGCGCTGGTCGCCGGGGTACGCGGGGTCGATCGCAAGTGGGTGGAACTGGGG +CGGCTCTACCGTTTGTCGCGCTTCGCCCTGGTCCGCCGCATCCTCCTTCCGGCGGCGCTG +CCGAACCTGTTCACCGGCCTGCGCGGGGCGCTCAGCCTGAGCTGGATGTTCCTCGTCGCG +GCGGAGCTGATCGCCGCTACCCAGGGCCTCGGCTACCTGCTCAGCGACGGTCGGGAAACC +TCGCGTCCGGACTTGGTGATCGCCGCGATCCTGGTCCTGGCGGCCCTCGGCAAGCTCAGC +GATGGCCTGCTCCGCTCGCTGGAGCGCCGGGCCCTGCGCTGGCGCGACAGCTTCGACGGG +GAGGGTGGCGCATGAGCGGCCTGCTCGACCTGCTGGAGATTCGCAAGGCCTATGGCGATA +CGCGGGTGCTGGAGGGCGTGGCGCTGTCGCTGGCGCCCGGCGAGGTGGTCAGCCTGCTCG +GCCCCAGTGGTTGCGGCAAGAGCACCCTGCTGCGGATCGCCGCCGGACTGGACGATGACT +TCCAGGGCACTGTCGAACGCAACCCGATCCTCGGCTTCGGCCCGGACGGCGAGAACGGCC +GCAGCGGCGGAATCGGCGTGGTGTTCCAGGAGCCGCGCCTGTTGCCCTGGCTGACGGTGG +CGCAGAACGTCGGTTTCGCCGACGGCTGGCTGGAGGACGAGCACTGGGTCGAGCGCCTGC +TCGCCGATGTCGGACTGGCCGGCTGCGGAGGATTGCTGCCCAAGCAACTGTCCGGCGGCA +TGGCGCAGCGCGCGGCGATCGCCCGCGGTCTCTACGGACGGCCGCAGGTGCTGTTGCTCG +ACGAGCCGTTCAGCGCGGTCGACGCCTTCACCCGCATGCGCCTCCAGGACCTGCTGCAGG +ACGTGGTGCAGAACTACGAGATCAGCGTCCTGCTGGTCACCCACGATCTCGACGAGGCGT +TCTATCTCGCCGACCGCGTGCTGCTGATGGGCGGCCGTCCCGGACACATCCGCCGCGAGT +TCCACGTACCGCTGGCGCGTCCCCGCGATCGCCGGGCGGTGGAGCTGGCCTACTTGCGCG +GCGAGGCCCTGACCGAAATGCAGCGGGCGCACGTGCTCTGAGGCAAGACCCCGAACCGGG +TTTTTATATGCTTTATGGAATAAATAAATGAATAAAAAACATTTATTGGAATATGAGGCC +GCCTGTTAAGGTCTATGCAACTTGGTTAGCAAGCCAAACGAATATTCTGTTTTCCGGTTA +TAAGGAATCCCCCCATGCTCGTCGTTTCCATCGCCGGAAGTCCCAGCGTACGCTCCCGCT +CCGGGGTGTTGCTGGAGCGTGCCAGGGACTGGCTGAGCCGTCGCGGCGTCGAGGTGGCCA +GCCACCAGGTGCTGGATTTTCCCGCCGAAGACCTGCTGCGTGCCCGCCTCGACAGTCCGC +CGGTACTGGCGCTGGCCGAGCAGATCGGCCGCGCCGACGGCCTGCTGGTCGCCACCCCGG +TGTACAAGGCCTCGTTTTCCGGCGCGCTGAAGGTCCTGCTCGACCTGTTGCCCGAGCGCG +CCCTGGAGCACAAGGTGGTGCTGCCGTTCGCTACCGGTGGCAGCAGTGCGCACATGCTGG +CCGTGGACTATGCATTGAAGCCGGTCCTGGCCGCGCTCAAGGCCCAGGAGGTACTGCATG +GCGTATTCGCCGTGGACAAGCAGATCGCCTACGCCACCGAGAGCGACCCGGCGCGCCTGG +AACCGGTCCTCGAGCAGCGCCTGGAAAACGCCCTGGAAACCTTCCACCTGGCCCTCTCGC +GACGGCCGCAGCCGATCGACCCGCAGTTGCTCAACGAGCGCCTGGTGAACGCCCGCTGGA +GCATCTGATCCGTCCGCCGCCAGCGCACGCGCGCCGCGGCCCGATTGCACAACGCACACA +CGACGACCTCACTGTTCCGTCAACGGCGCGGCAGGTTCGGCACCCACAACGACAAAGGAG +AGCGCCATGCGCACCATCGCTTTGCGTCGTGGACTGGCGGCCCTGCTGGTGGCGGCCCTG +TCCTACGGCGTTCAGGCCGACGAGAAGTCGGCCAACACCCTGCGGATCGGCTACCAGAAA +TACGGCACCCTGGTCCTGCTCAAGGCCCGCGGCACCCTGGAGAAACGTCTCGCCGAAGAC +GGCGTGAAGGTGCAATGGACCGAGTTTCCCGGCGGCCCGCAACTGCTCGAGGGACTCAAT +GTCGGCAGCATCGACTTCGGCGTGACCGGGGAAACCCCGCCGGTCTTCGCCCAGGCCGCC +GGCGCCGACCTGCTCTACGTGGCCTACGAACCGCCGGCGCCGACCAGCGAGGCGATCCTC +CTGCCGAAGGACTCGCCGATTCGGTCGGTGGCCGAGCTGAAGGGCAGGAAAGTCGCCCTG +AACAAGGGCTCCAACGTGCATTACCTGTTGGTCCGCGCCCTCGAGCAGGCCGGCCTGAAG +TACAGCGACATCCAACCCGTCTACCTGCCGCCGGCCGACGCCCGCGCCGCCTTCGAGCGT +CACAGCGTCGACGCCTGGGTGATCTGGGACCCCTACCAGGCCGCCGCCGAGAAGCAGTTG +TCCGCGCGGGTCCTGGTGGATGGTCGCGAGCTGGTCGACAACCACCAGTTCTACCTCGCC +ACCCGGACTTACGCGCAACGGCATCCGCAGGTGCTCGACAAGCTGGTGGACGAGATCCGC +GAGGTCGGTGATTGGTCGCGGGCCAACCCGCAGCAGGTGACCGAGCAGGTCGCGCCGCTG +CTCGGGCTGCCCGCCGACATCACCCTTACCGCGGTGAAGCGCCAGGGTTACGGCGCGCAG +TTGATCACCCCGGCGGTGGTCGAGGCGCAGCAGAAGATCGCCGACACCTTCACCCAGCTG +AAGCTGATTCCCAAACCGCTGAGCATCAAGGACGTGATCTGGACGCCACCGGCCGGCAAG +GTCGCCAGCGCACCCTGAGCCCGTACCGCCCACATACCGACACGATCATCGCGCGGCTAC +GCCACGCCGGCGCGCTCACCCTGGAGTCAATTGCGATGAGCCTCGAGATTTTCTGGTTCC +TTCCCACCCACGGCGACGGCCACTACCTGGGCACCACCCAGGGCGCCCGTGCCGTCGACC +ACGGCTACCTGCAGCAGATCGCCCAGGCCGCCGACC +>NODE_8_length_39997_cov_62.8364_ID_15 +GGGACAAAAACGCCGTGTTTATCGCAGCTTGGTTAGCTGAATTCCAAAACTGCACGGACT +ATTGTCTTATGTCAAATGCTCTAATTACGCCAGAGCGAATTGCTCTTGCGCCGCGAGCAG +GCCGCTGAGCTGCTCGGCGGTGAGCGGGTAGGAGAGCGCGTAGCCCTGCCCGAGCGGGCA +GCCGAGTTGGCGGGCGGCCTCGACGACCACTCAACCGCTCTTATCTCGCATCCTGCCCCG +CAGGATGCACCGCACTACCCTTCGCCCTTCACGCCTCGTCTTGCATTCTGTTGATTTTGA +GCAGCATCACAAATGTAGACACCCCCGTGCCCATAGCACTGTGGAACCACCCCACCCCAT +GCCGAACTGGGTCGTGAAACACAGCAGCGCCAATGATACTCGGACCGCAGGGTCCCGGAA +AAGTCGGTCAGCGCGGGGGTTTTTTCGTTGCGGGAGTAGCTCAGCTGGTAGAGCACTACC +TTGCCAAGGTAGATGTCGCGAGTTCGAATCTCGTCTCCCGCTCCATCATCCCCCCTTCGC +CTCTTCCGAGGTGGAGGGGCTTTTTTGTTGGCTTGCCGCATTTCTATGAACCAGTGCGGC +ACAATAACAGCTGTGTTTGAATTTGAAATCAAGCAGCGTGATGGACGTGCCCGCACGGCG +ACGTTCCAGACGCCGCGCGGAGCGGTCACCACACCTATGTTTATGCCCGTCGGCACCCAG +GGCACCGTAAAGGGCATCAGCCCACAGGAACTGCTGGAAATCGGTTCGCAGATGATCCTG +GCCAATACCTATCACCTGATGCTGCGGCCAGGAGAGCAGCTGGTCAAGGCGCATGGTGGT +TTGCCGGGATTCACCGCCTACCCAGGCCCCTTTCTGACTGACTCAGGCGGTTTTCAGGTC +ATGAGCCTGGGGCACATGCGCAAAATCAGTGAGGAAGGGGTCGTGTTCAAGAACCACCTT +GATGGCAGTCGCGTCGAGCTGACGCCGGAGCGCAGCATTCAGGTGCAGGAGGCCCTGGGA +GCAGACGTCATCATGGCCTTTGACGAGTGCCCTCCCTATCCTGCCGAGCGGCCTTACATC +GAGGCGAGCCTGGACCGCACGGTGCGCTGGCTCGAGCGCTGCCACGCGGTGAAGACTAAA +GATGACCAGGCCCTCTTCGCCATCGTGCAGGGTGGCGTGCATGAGGATTTACGTCTCAAG +AGTTTAGAGGCCACCCTGCCTTTCGCCACGCCTGGGTTCGCGGTCGGCGGCCTGGCGGTC +GGGGAAAGCAAGGAAGAGATGTATCCGGCGGTGGCGTTTACAGCCGGCCGTCTTCCCGAA +AATAAGCCGCGTTATTTGATGGGTGTGGGCCACCCCGAAGACCTTGTGGCAGGAGTGGCA +CTGGGAATCGACATGTTCGACTGTGTGTATCCGACACGGACCGGGCGCTTCGGCTACGCG +CTGACGGACGACGGACGGCTCAACCTCAACTCCAGTGCGCCGCGCACCCAGCTCCAGCCC +ATCGACGCGGAATGTGACTGCTACGCCTGCCGTCATTACACCCGCGCTTACCTGGCGCAT +CTGCTGAGAGCGGAAGAAATGCTGGCTCCGCGCATGTTGTCGCTGCATAACCTGCGGTAT +CTGCACCGGTTGGTCGAGCGAATGCGAGTGGCGATCAATGGGCAGCAGTTCCATCCCTGG +GCAGCAGACTGGAGCGAGCGTTATTTTCACGGCAATGTCCCAGGCTGGTTCACCAGTGCG +TTTGAGCGCAGTACCCAGTCCGAAATTTAATAAGACGGACTCGCTGCGGCCTTTTCGGCG +TCACTTAAACGGTCAAAGTTGAAAATGAGAAGAAAATCTGATGGTCAACTGACGGAGAAG +GGGCGAAAACAGTTTATTTGACGGTATTTAGCTCGCTCTTAGGGTTGACCCGCCCATCTT +CATGCTTGTATCATCTGCACGATCTGCCCTCTTCTGTAGCCACAGGCTGCGAAGGGTGGG +TTGCGCGAAGAAGGCGGAGTAAGGCCCAACTTCTCCTGGGAGAGGAAACATGGCAACTCG +CCCCATCAACATCCTGCGTGCTGGAAACGCTGCTCGACGCGCACTGAGATTGGGGGTTCC +CTATGAAGAAAAGTCTGATCGCTCTTACCACGGCGCTGTCGTTCGGCCTCGCTGCCGCCC +AGACCGCCGCACCGGTGAGTGCACCCCAGGTTCCCGCCCTGACCGACGTTCCTGCCGGCC +ACTGGGCGAAGGACGCCATTGACCGTCTAGTGAGCCGCGGCGTCATCCTGGGCTACCCCG +ACGGCACGTTCCGTGGCACCCAGAACCTGACCCGCTACGAAGCCGCCATCATCATCGCCC +GTCTGCTCGACCAGATGCGCGACGGCGAAACCCCCGCTGGCATGACCGCCGAGGACATGA +CCGCGCTGCAAAACGCCATTCAGGAACTGGCCGCCGACCTGGCCGCCCTGGGCGTGCGCG +TCAGCGACCTCGAAGCGAACGCCGTCAGCAAGGACGACTTTGCCCGCCTGGAAGCCCGCA +TCGAAGAAGTGGCGGCTGCTGGCGGCGAGCAGGGCGCGACCGAAGCCCTCCAGGGCCAGA +TCGACGACCTGACCGCCCGTGTGGACGAGTACGACGCGCTGCGTGCCGATGTCGACGACA +ACGCCAGCAGCATCGCTGCCCTCAACGACCTGACCGTGCTGCTCAACCAGGACATCCTGG +ACCTGCAAGACCGCGTCAGCGCCGTGGAAGCCGCGCAGGCCGACTTCGTCCAGCGCAGCG +ACTTCGACGCCCTCGGTGGCCGCGTGACCACCGTCGAAACTCGCGTCGAAACCGTCAACA +ACTCGCTGACGGGCCGCATCGCTGCCCTGGAGCGCAACGCGTTTAGCGTCAAGCCCAGCC +TGACCATCGGCTACAGCGTGAGCCGCACCAGCCGCAACTTCGACGTTGACCGCCTGTTCC +CCCTGAACGCGGACGGCACCGTGGCCAACAACGCCTTCACCAGCGGCGGCATCGACACCG +ACACCGGTGCTCAGCGCCGTGACTTCGGTGACTTCGGCAACGCCTCTGACCCCGTGGTCG +CGGGTGCGGCGGGCCTGTACGGCTTCGCGGACGGCGTGAGCTACACGGTGTACTTCACCG +ACGGCTCCACGGCGACCTTCGACGGCCTGAACCCCGCCGATTACAAGGTCCCCACCGGGA +AGGTCATCGATACCACCAAGGGTCGCAACGGCTTCGGCTTCAACAACCTGGCCCGCTACA +AGGAAGGCAGCACCGACATCGGTATTAGCCTGGGCTTCGACACCAGCGGCCAGTTCAGCC +AGGTCACCAGCGGCACCGGTGGCAGCCTGTTCAGCACCGCGGGCCGTCTCCAGGTCAACC +AGATTGACCTGAACTTCGGTCTGGTCACCGGCCTGCCGAGCGACGCTTACGTCGACACCA +ACGGCAACGGCAAGAAGGACGACGGCGAGGCGACTGGCCGCGGCACGTACCTCGGCAGCG +GCGGCACGGCGGCCATCCTCCGCGATCCCGCTGGGAACGTCTACCGCCCCGTGTTCTTCC +GCTTCAAGAACGCCACCACCCAGTTCAGCGTGGGCAACAACCCCGTTATCGTGACCTTGG +GCCAGCAGCAGAAGTTCTACTTCAGCGACTACGTCTTCGACAACAACTACGATGGCCGTG +GCGACGGCTTCACCGTGACCGTGGACGGCAGCAACGTGCCCGTGATCGGCGCCTGGAAGC +CCCAGATCAAGGGCGTGTACGGCAGCCGCAGCGGTCTCGACGGCACCGCCGAAGCCGGCT +ACGGCGTGTACTACCGCGGCGTGCGTGCGCAGATCACCCCCGTCGGCACCCTGACGGCGG +GCATCCACTACGCGCAGGAAGGCCGCGACATGTTCGGCGCAGCTCAGAACACCACCAGCA +CGCCTTCCGACGTCACCACCTACGGCGCCGACCTGCACGGCAAGGCCTTCGGTGTGGAAC +TGCACAGCGAGTACGCCACCAGCCGCGTGCGCCCCAACACGGCCAATGCTGCCGTTCAGA +CCAGCAACGCGTTCTACGCCCGCGTGGCGACCCGCAAGGACAACCTGGCGTTCGACCTGA +ACACGCCCGCCGCCAAGTTCGGCAACGACACCTTCGGCGTGTCGCTGTACGACCTGAACT +ACCGCAAGATCGACGCGGGCTACAACAACGTGGCTGGCATCAGCGAGTATGGCTACGGCT +CGTACAGCCGCACCTCGGCCCAGAACATCGCCTACAACCCCGACACCGGCGTGACGGCTC +CCTTTGCCAACCTCGACCGTCAGGCCTACACCGACGCCAATAACGACGGCACGTCTGACC +GCAACGCTGACGGCACCGTCGTTGCAACCAACACCAAGATCGGTCAAATGGGCTTCGGCG +TGAAGGCAGCGGCCAACCTCGGTCCCGTGGCCATCGGCGGCTACTACGACACCAGCACCG +GCGCCAACGGCGACAATGCCAACCGCATGACCGAAGCGGGCGGCTCGGCCAAGGTGGCCT +ACAGCATCTTCTCGCTGCGTGGCACGTACAACACCCTGGACAGCAACCGTCCCCAGATCT +ACCGTGACGCCGCCGGCACCCAGATCATCGGCGACGCCAAGGTGCGCCGCTACGCCGTGC +AGGCGGACGTGACCCCCGGCCTGGGCCTGTTCGTGGGCGCTTACTACCGCGACGTGAACG +TCAACGGTGTGCGCTCGACCACCGACCGTGGTCTGCTGGGCCGCGGCTACCTGGCTTCCA +GCTTCGAGCCCGGCGTGGGAAACAATGCCTACCGCACCGGTCTGCGCTGCGCCGACAACA +ACTTCGGCACCGGTACCCGGGACATCGACGGTGTGGGCGGCGTGCTCAACCCGGCGGTCA +ACCTCGACCAGAGCCGCACCGCCACCTGCTTCACCTCCTACGGGGTCGAAGCGGGCCACG +CGGGCGACAACGCCAACGCCCTGGTCAAGGACCTGTTCTTCCGCGTGGGCTACTCGCGCG +TGTACGTGCCCACCACCGCCACCGCGACGACGGGCGATTTCAGCGGCTCGGTGACCTACG +GTGACGCCCGTTACGACCGCAAGGTCGGCGTGGCGAACGTGCGCCTCGCGGGCTCGTTCT +CGACCACCAACACCCAGCTCGACAGCCGTCCTGCCGGGACCCGTGGCGCGGTCGGCCTGA +TTGTGCGCACCGATCCCCTGGAAAACGTGCCCTTCCGTCCCCAGTTCAACGGTCAGGTGG +GCTACTACACCGCCGACAACCGTGTGGCCGCTGGGAACTACAACGCCAACGCGACCAAGT +ACGGCGCGGGCGTGGTCCTGAACGACTTCCTGCTGCCCCAGACCAAGATCGGCGTGCGCT +ATGACGGCTATATGGCTCAGAACCGTCAGTACACCCCCTTCGACGGCGACGGCACCCAGG +GCTACTTCAGCGACGCCAACAACAACCGCCGGACCAACCTGAACGGCGTGTACGTGGAAG +GCGCCTACCAGGACCTGATCTTCAGCTACGGCACCTACACCCTGAGCCAGAAGGATCTCA +ACGGCGTCGAGTACGGCAGCGGCATCAACAACGGCCAGCCCGCTCGCGGCCAGACCTTCA +AGATCAGCTACAAGGTCAACTTCTAAAGCCCCTAGTTCCCGGCATTGCCGGGCAACGGCC +CCCGCTTCGGCGGGGGTTTTTGTTGTTCTGTAGTTAGTTCCGGTGCGGCGCGGCCAAAGT +CGGTAGACTGACGCCATGCTCGCTGTTTTTGGACACCTCAACCCCGATACCGACGCCATT +TCGGCGGCGATGGTCTACGCCCGGCTGCTGACGCGGCAGGGCACCGAGGCCCAGGCTTAC +CGGCTGGGCGAACCGAACTTTGAGACGGCCTATGTGCTGCGTGAACTGGGCCTGGAAGCG +CCGCCACTCCTGACCGAACTGCCCGCCGGCAGCAAGGTGGCGCTGGTGGACCACAACGAA +AGCGCGCAGTCGCTGCCCGCCCTCGGCGAACTCGACGTGACCCGCGTGGTGGATCACCAC +AAGCTGGGCGACCTGACGACCATCAACCCGCCCTACCTGCGCTTCGAGCCGGTAGGCTGC +ACGGGCACCATCCTGCTGAAGCTGCACCGCGAGGCTGGCCTGAGCGTGGAACCCCAGGAC +GCCAAGCTGATGCTGAGCGCCATTCTGAGTGACACCCTGCACTTCCGCAGCCCGACCACC +ACCCAGGACGACCGCGACGCGGTGGCGTTCCTGGCCCCGGTGGCCGGGGTAAACGACGTG +GAGGCCTACGCACTGGCCATGTTCGCCGCCAAGAGCGATCTGGGCAACACGCCCGCCGAG +ACTCTGCTGCGGATGGACTACAAGGTCTTTCCCTTCGGGGACCCTGTGCAGCCGCAAAAC +TGGGGCATCGGCGTGATCGAGACGACCAACCCGGCCTACGTGTTCGGGCGGCAGCAGGAA +CTCCTCGCAGCGATGGATCAGGTCAAGGCCGAGGACACCCTCTCGGGCATGTTGCTGAGC +GTGGTGGACATCCTGAACGAAACCAACCGCACGCTGGTGCTGGGCGCCACCGAGGCCAAA +GTGCTGCGCGAAGCCTTCGGCGCCGAAGCTGAGGGACAGGTAGCCGATCTGGGCAACCGC +ATCAGCCGCAAGAAGCAGATCGTGCCGACGCTGGAAAAATACTTCGCGCCCGAAGCCTGA +GCGGGCTCTTTCAACAGGGGAGACGGGGAGCACCTCGTCTCCCTATTTACTTGCACAGTC +CAGAGCGGCGGTATGCTCCGCGCGTGACGCAGGACACGAATGTGGAATGGCTGTTCGCCC +GGCAGCGGTTCGGAGTGCACCCCGGCCTGGACCGGGTGCGCGAGCTGCTGGCCCGGCTCG +GCGACCCGCAGCGGCAGTTCGGGGCCGTGCTGGTCGGCGGAACCAACGGCAAGGGCAGCA +CCGCCGCCACGCTGGCCGCCATGCTGCGGGCGGACCGCAGACGCACCGGCCTCTTCACCA +GCCCGCATCTGACCCGGCTGAGCGAGCGTTTCGTGGTGGACGGGGAGGAAGTCTCGTCGG +CGCAGGTCACGGCAGCGCTGGCCCGCGTGCGCCCGGTGAGCGAAGCGGTGGAGGCGTCTT +TTTTTGAGGTGGTCACTGCGCTGGGGTGTGTGCTGTTCGCCGAGGCCGGAGTAGAAATCG +CCGTGATGGAAGTCGGGTTGGGAGGACGGCTGGACGCCACCAACGCGCTGGAGCCCCGGC +TGAGCGTGATTACCAATGTGGCACTCGACCACACCGCCATCCTGGGCGGCACCGTCGAGC +AGATCGCGGCGGAGAAGGCGGGCATTCTGCGGGCCGGGCGACCTGCCGTGACGGCTGCCG +CGCCCGCCGTGCTGCCCGTGCTGGAGCAGCGGGGCGCCGATCTCTGGGCGCTGGGTCGGG +ACTGGACGGCGCAGACCCGCAGCCTGGGCTGGCAAGGTACGGCGGTCGACGTGCAGTGGC +CCGGCGGTGCGGCGCAAGTGCAGACCCCGCTGCTGGGCGAACATGGGGGCCTCAACGCGG +CGCTCGCGGCAGTGGCGGCGGCGCGGCTGGGCGTCTTTAATGAAGCCATCCGGCAAGGAG +CCGCGCAGACCCGCTGGCCGGGGCGGTTGGAGGTTGTCCCCTGGCAAGGGCGGCGGGTAC +TGCTAGACGGGGCCCATAACCCGGCGGGAGCACAGGCACTCGCGCAGGCGCTGCGTCCAT +TGCTGGCGGCGAGTGGCCGTGCCCAGTTGCCCATCATCTTCGGGGCAGCGGAAGACAAGG +ACCTGGGCGGGGTCGCCGCAGAACTGCTGCCCCTCGCCTCACGGGTCATCCTGACGCGGG +CTGCGCTCAGTCCCCGGGCCGCCGACCCGGCGCAGCTCGCGGCTCTGTTTCCAGGCGTGC +CATTGGAGTTGACCCAGACGCCCGCCGAGGCGCTAGCAGTACTGCGGTCTGACGATGACC +TCGTCCTTATCTGCGGGAGCCTCTATCTGCTCGGCGAACTGCGTCCTTTGCTGCTGGGCG +AGCAGTCGGAAGGCCACGAACGCTGGCAATAGCCACAGCAGCGGCCAATTTCTGGTTTCG +AAACAGGAATAAGGCAAGTTGACTGCCCTCATTCTGTATTGACCGTAGCAAGGAGCGGTG +TTACACTCCGACCAAGGGTGCCTGATTCTGCCCACAAGCAAACGCCAGGCGGGTTCCACA +TCATCTGTCTCTCGGGCATCCCCGACGGGTGACGGGTGGTCCGCCGCAGCCCAGGAGGTG +AATGACATTGAAACTGCACGAACGACTTCGTGAATTGCGCAGCGAACGTGGGCTGCGGCT +CAAGGACGTGGCCGAGGTTGCCGACATCAGCGTGCCGTACCTGAGCGACCTGGAGCGCGG +GCGCACCAACCCCAGCCTCGAAACCCTCCAGACCCTCGCCGGGGCGTACAACATCACGGT +TCACGACCTGCTCGAAGGGGTCGAGTTCTACGGTGAATCCACCGACGGCGCCCTGCCCAA +GGGCCTCGCGGACCTGATCGCCGACCCCACGCTGGGCCCGCAGATCACGCCCGACTGGGT +GCGGACCCTCTCGCGCATCGAGTTGCGTGGCAAGCGCCCGCGCGACAAGCAGGACTGGTA +CGAGATCTACCTGCATCTCAAACGCATCCTGAACTGATTTCCGCACAACTCCGGCCCCGT +CGCCGGGGTTTTGTTCTTGGGAGATTCAAAAATGGCGCCCCCTCGCTGACCTTTCCGTCG +AGGGGGCGCCGTTCTGTGAGGAGGCGTGTCAGGTCAGCGGTAAGACCTGCCGGTGCTCGG +CTTCTCGCCGTTGCAGCACCAAGGTTCCCGCTCCCCAGGCGCCGCCGACGAGGGCGAGCG +CGAAGGCCAGCGGACTCCAGCTCAAGCTGCCCGCAAAGAGGCTGATGCCCAGGAAAGCGC +CGACCGTATCGGGCACTGGCAGGCGCAGGCGGCAGGCGAGGGCGCGGCCCAGGTCGTAGA +CGCTGACGCTCAGGCCAGTGGCGATCAGCAGCCCCGCCAGCGCAAGCACCAGCAGCGCCG +GGCCGAGCAGACCCGCAAAGGCCAGCGCTAGTGCCGGCAACAGCAGCGCCGCGAGCAGCA +GCACGCCGAGCGCCAGCGTCCGCATCGGCGCGTGCCGTTGCTGCCGGGCGAGATCTGGGG +CTGTGCCTGCCACGAAGAGCAGCAGCAGCAGGCCCGCCGTGAGCGTCACGAAAATCTGCG +GCCACGCGGCGGCGCCCAGCCAACCGAGCAGGGGCCGGAAGGCCGCCGCCGTTGCCAGCC +GCACCCCCTGCGGCGCCGGCAACGCCCCGACCTCAGCGGCCTCGCCGGGGGTGCGGCCCA +GCACCGCGCTCATTTCGCCGCTGACCTGGGCGCCGGGCGCCTGTTTGATGTCGCCCAGCA +GAGTGACCACCCGGCCATTGACAGCGGCGTCGGGCGCCAGCACCACGTCTCCGCCGATGG +CGACGATATTGCCCTCGACGGGTCCATGAACCTCCACCGGGTGGCCGAAGCTCAGGCCGG +GTTCGCCGTTGACCAGGCTGGCGAGCGGCGGCAGCGACAGCGCGCCGGCCAGCACGAAGG +CCCCGGCCCCGAAGCGCCGCACGGCGGGCGTGGGCATCCAGGTGACCAGCGCACTCGTGA +GCAGCAGCAGCAACAGGCCCAGGCCAGCGACGGGTGCCACGTGCTCCAGCACCGTCTGCA +TGACCAGGGCGCCCGCCGCCAGGTTGGGCCACGCCGCCGTGACCGTCAGCAGGGTGAGTC +CGGTCAGCAGCCCACCGACGAGCAGCAGCGGAGCGTGGCGGCTGTGGGAGGCAGAGACGG +ATCTGGAAGCTGGGACTTGGGCTGGTGGCGTCACTTCCAGTGAAGGTGGTGCGGAAACAA +CAGCCTCGGCCTGCTGCCCCTCCTGCGCGATGCGGCCCGCCAAAGATGCCGCCAGCGAGC +GGGGCGGCGGGGGCACCGGCGCCTGCTGCAACACATTTCGCGTGCGGGCGTCGCTGGCAA +TCTCGGCAGCTAAAGATGCGGCGACAGGACGTAGCATTGGAGGCGGCGTCTGCAACAGCG +CGGCGGCCCGCAGGTCGCTGAGGACGGCAGGTGCCACCGAAGCAACTGAAGGCAGAGGTG +CGGCCCCCAGTTGCCGGGCCAGCGCGATGTCCTGCGAGACACTGGCCGCGAGTGAACGTG +GGAGGGCTGGCGGAGCAAGAACACCGCGCAGGGTGGCACTGCTGGCAATTTCACTGCTCG +TTTCGGCAGCCAATTGGTGAGCGAGTTCAGGTGCCAGCGTCGGCGCGGTGAGCTGTTTCC +CCAGGCGAAGATCGGCCAGCACCGCGTCGGCCACACTGCGGGGAGCAGGCGGATTGGGGC +CGGTCAGCAGCGAGATGGCCCGGTGCAGTGAGCGCCGCGCCTCCTGCACCTGCGGGTCAA +GGCAGGCGGCGTCCAGGGCGGCCTGTTCGTCCGCGCTCAGGTCGACGTCCGCCTCACGGT +GCAGCAGCGCCCGCAGCTCCGGTGTCAGAACCTGCGTCCATTCTGGTGTGTTCCACCCCC +CCTCTTGCCGTGCGCCCATCGTCGGTCCCTCCTCGTCCCTTCCTTTACGTCAAATCCGCC +TGCAAAGTTCCCGGCGGTGGGAGAACGGTCGCCGGAAATTATCTAGACAAATTAGGGATT +AAGGCTGGGGCGGACGCTCGGCGGCCTGGTAGTCGCCGCTTTTGCCGCCGGTCTTGGACA +GCAGCCGCACGCCGCTGACCTCGATGGCCTTGCTCGTCGCCTTGAGCATGTCGTAGACGT +TGAGCGCGGCCACCGTGACGGCGGTGAGCGCTTCCATCTCGACGCCAGTGGGGGCAGTGG +TTTTGACGAGGGCGGTGATGTGGATGCCCGCGTCTTTCAGCGTCACGTCCACCTCGGCAC +TCGTGACCGGAATCGGGTGGCACAGAAAAATCAGGTCGGCGGTGCGCTTGCTGCCTGCGA +GCCCGGCGAGGCGCGCCACGCTCAGGGGGTCGCCTTTGGGCGTTTGCCCGGCGAGCAGCG +CGGCCCGCGACTCGGGCGGGAGCAGCACCCACGCTTCGGCGCGGGCGGTGCGGGTGGTGG +GCGCCTTGGCCGACACGTCTACCATGCGCGGCTGGCCGCCGACGAAATGGGTGAGCTGCG +GGGCGTCGCCTTGCGGGTCGCTCATTCGTCGGGCAGCTCCAGGTCTTTCAGGCGCGCGAA +GGGGTTTTGCTTGGCGTGGCTGCTCCCCTCGGGCGAACCGAGTTCGACGACCTGCTCCTC +GATGGGCACCTGGGCGCTGTGTTCGCAGGGGCCTTCGTTGAGGTCGTGGCCGCAGACCTG +ACACAGGCCCTTGCACGCGGGGTCGTGCAGCACGCTCAGCGGCGCGGCGAGCAGGGTGCT +TTCGGCCAGGTACGCGCTGAGGTCAAGGTCGGGATTGCCGAAGACGAGGACTTCCTCGCC +AGTTTCGGCTTCTTCGATGTAAGGCTCCTCGGCGGAGGGTTCGTAGCGCATCAGCGTGCC +GAGGTCGATGTCGAGCGGCACCTGCACCTCGCGCAGGCAGCGGGCACATTCCATGACCAG +CGTGGGCGAAAACTGGCCTTGCAGGTACATCTCGTCGCCGCCAAGCGAGTTGATGTCCAC +CTCAAAAGCGGCAGGCTCGGCAAAACGCAGGGTTTGCGGCTGCTTGCCCTGAAGGTACTG +GAGGTGGTCGAGTTCGCCCTCGGCGTGCGCGTCGTCGGCGGTTCGCAGCAGGGCTCCCAG +GTGAATGCGGGGTTGATCGTTCATGGGCGCCATGATAGAGGGCTGGCCTGCGGGCAACTG +CGGCGGCGTGGGGGTTGCGCCCTGGTGAGCGATACGGGGGAAGGGCACCGCTGCCGGTGT +TCCGTTCAGGGTTCATCTGTCCTGCCCGGCGCGGCGGCATACTGCGGGGCAGTGCTCGCC +CCCTCCCGTGACCAGGCCCCTGTGACTGACTTCCTTTCCTCTACCCCCCTGACCGCCTTC +CGGCGTGCGCTGGCCCGTGTGCTCGCGCTGCTGGTGGGTTTCGGTCCGCTGGCGGCGCAC +GCTCAGGCGGCGGGCACAGCGGCGGTGAAGCCCCTGGTCGCCACGGTCGCTACGCCGTCG +GCCTCGCGCCCAAACGCGGCGCTGCGCTGGACGCGGCTGGGGAACCCCAGGTCGCCCTCG +CTGCTGCAAGTGCCCGCCGATTGCCAGGTGCGCAGTTGCCCGCTGGTGGTGGTGTCGCAC +CCCCGCGCCCAGGACGCCGCCCGACTGCGCGACAGTGCCCAGGTCGGCAAAATCTCGCAG +GCGCTGCTGGCCGCCGATTTCGCGGTGCTGCTCAGCGGCGACGGCGGCGTAAACACCTGG +GGCAGTCCGGCAGCGCTGCGGGAAGTCGCGCAGGTTCACCGTGAGGCCACCAGCCGATTT +CGCTGGAACAGCCGGACCTACGCCCTGGGCCTGAGCATGGGCGGGCTCCTCTCGCTGCGC +AGCGCCCTGCCCGATTCGCCCTATACCGTCCGGGGCGTAGCTCTTATTGACGCCTGGGTC +AGCCTGCGTGGCGCGTGGGGCAGCGCCCTGTCGCGCCAGCGTGAAATCGGCAAGGCGTAT +GGCCTGACCGTGCCGCCCACCCCCACCCTCGACCCCCTGCCCCTCGCGCAGCGCCTGGCT +CCGGTGGCGCCGCTGCCGCTGTTTCTGGCCTACAGCCCCACCGACACGGTGGTGTCCTCG +CGCAAGAATGCCGAACTGCTGATTCCCAAGGCCGAACAGGGCGTGAGCGAGATCGTGCGC +CTCGACGGGCCCCACCTCGGCGGCAACCGCTTCTCACCGCAGATGATTGCCCGGCTGGTC +GGCTTTTATCAGGGCCTGGAGCAGCGCGCCATTGCCCGCCAGCACAAGGAATTTCCGCCT +GGTCTCCCGGCTCCGGCGGTGCAGCGGGCTCAGACGGAGACGTCGAAAAGCTAAAGGCGA +GGGGGCAGAGAAACCGACTTCTCTGTCCCCTCGCTTTTAATGGGTTTATACGGATTTCGT +CCAATTCCCCCACCTCCGGAACGGCACCGGAGGCGGGACCATTTCCCGAAATCCTTTTTA +CTGCTTCTCACTCCGTTCGGATGGAATCCCCAGAACCGGGGGATCCCATCGCAGCCGGTT +TACATCGCCGCCGGAATCTCGATGCCGATGAGGTCCAGGGTGTCCTCGAAGGCTTTGCGC +AGCCGCACGATCAGCGCCAGCCGCGCTTCCCGCAACCCTTCTTCCGATTGCAGGACGTTG +GTGGCGGGTTTGCCCTGCTTGGTTTTGGCGTTGTACCACGCGTTGAAGGATGTGGCGAGA +TCGAGCGCGTACTGCGCCACCACGTGCGGCGAGTGGATGCGGGCGGCCTGCGCGGCGACT +TCGGGCAGCTTGGCGATCTGCTTGGCGAGCACGAGGTCGATGTCGGGCAGCGCGTCCCAG +TCGGCCCCGGTGCCGTCGGTGGCATAACCGGCTTCCTCGGCCTTTTTCAGGATGTTGGCG +GCGCGCACGGCGGCGTACTGCACGTAGGGCGCGGTGTCCCCGTTCAGCGCCAGCGCCTGC +TCCCAGCGGAAGTCGATTTTGCGGGTCGGCTCGGCCTTGAGCATGGCGAAGCGGATGGCG +CCCAGGCCGATGCGCCGGGCGATTTCGGCGGCGTCCTCGCGCGCGGCCAGGTCGGGATTG +ATGCCCTGCAAGACGCTCAGCGCGCGCTTCTGGGCCTCGTCCATCGCGTCGTCGGCACTG +ACCGCGATGCCCTTGCGCCCGCTGATGGTCTGGCCTTCCAGCGTCACGAAGGCGTAGGAG +AGGTGGATGCTGCGCTCCTCCTTCTCCTGCTCGCCCGCCACACCCAGCGCCGAACGCACC +ACCGTCTGCGGGTGGTCCTGGCGCGAGTCGATCACGTTGATGACCTCCTGCGCGTGCCCG +AAGCGCCGCTCATCGTCGGGCTGGCCGTCCGGGGCACTCGTCCAGATGGTGTTGCCTTCG +GGGTCCTGCATGAAGGGCTTGAATTTCATCCCCTCGAACAGGCCGAATTTCCAGAACTGG +TAGCCGATGTCCTTGGCGGCGTACATGGCGGTGCCGCCCGAGCGCACGAGGACCACGTTC +GGTTCTTCCAGCCCCGGCATAAATTCCGACACGTCCATGATGAACGCCCCGGCGTACTTG +CCCTCGGTGGGGCGCGAGGTGTACCTGCTGCCTTCCAGAATGTTCATGGCCTGCGCCAGA +AACCCGCTGCCCACCACGTCGGATTCCCAGTTCAGCAGGTCGTAGCGCGCGCCGAGGCGA +AAGCAGGTCTGAAGCTGCGCCTTCACCGTCTGCTCGACCAGGGGCCGCAGCTCCCCGGCT +TCGAGCTTGTGCATGATCTCCATGATGCCGCTTTCCAGCTCGGGCTTCTGGGGGTCGGCG +TTGAGCTGCACGTAGCCTTCACCGAGCCACTGGTCGTACTTCTGCACGCCGTCCCAGACG +CGGCCGTAATGCTGGGTGGCGAACAGCGACTCGGCGGCCTGTCGCCCGGTGTCGTCGATG +TAGTTCTGTACCTCGACGGTGTGCCCCGCCGCCCGCAGAATCCGCGCCATGGAGTCGCCC +AGCACCACGTTACGCAGGTGCCCGACGTGCAGCTCTTTGTTGGGGTTGACCGAGGTGTGC +TCGATGACGACCTTGCCCTCGCGCTTGGGCAGCTCGAAGGGACGCTCGACCACGCCGCGC +ACGAAAGCGCCCGCGTCGAGGAAAAAGTTCAGGAACGGCCCCGTGGCCTCCACCCTGCGG +ATGCCGGCGGGCAGCACCACCGTCTGCGCGAGCTGCGCGGCGATCTGGGCGGGGTTGCCC +CCGGCCGCCTTCGCCATCTGGAAGGCGGCGGGCGTGCCGTAGTCGCCGGGTTTATTGGCG +GGCGTTTCCTGAATGGCGGCGTCGACGGGCATTCCCATCTGGTGCGCGGCCTGCTCGACG +GCTGCTTTGAGTTGGGCCTTCAAATCCATTCGGGCAGTTTACGGGGCGCGGGCGCTGACA +GGGTCACGGGGCAGGAAAAGAGGCCATGCGCCACTTCCCGCCCCTGTGCTGAAGGCCAAA +ACTGTCCGGAATCGGGTATTATACGGCTTTAATCCGATTCCCGAACATCCGGAAAGGCGC +CGGATGTCCGTCTATCTCCTTAAAACCGTATTTTTCCATGCGCTCCGCGCAAAATTGCGC +CTGGACATGTCCGGGACTCAATTTGAAACCGTATTACCGCTGTTTCAGCGCCGCGTCCCG +GTAAAAGCGCTCCAGCCCCAGCGCAATGGCCTGCGCCAGTCGCTCGCGGCCCGTGTCGCT +CATCAGGGTGCGGAGGTTGCTCTTGTCGGTCAGGAAGGCCGTCTCGATCAGGATGGAAAG +CTGCGTGGTGGGCCGGGTCAGCGCGAGGTTCTGGTAATGCACGCCGTCGTTGCCCACGTC +GGGCAGTTTTTCCACCAGGGCACCCTGCACCGCGTCGGCCAGGGCGCGGGCCTGTGGATT +GTAAAAGTACACGCCGCTGCCGCGCTTGGTCCGGGGGTCCACGCCGTCAGGCAGCGCGTT +GGCGTGGATGCTGACGAGCAGCTCGGCGTTCTTCGCCTCGGCGAGCAGTGGGCGGTTGTA +GATAGGCACGGTGGTGTCGGCCTCACGGGTCAGAATCACGTTGGCGCCCTTCTCGCGCAG +CAACTCGGCAAGGCGTAGCGTCAGCGGCAGCGTGAGGTTCTTCTCCGGCACCCGCAGCGG +TCCCGCTCCGCCGAACTCGTCGCCGCCGTGACCGGGGTCGAGGACGATGGTGCGTCCGGC +AAGCGGCTGGCGGGCGTTGATGGCCGGAGCATTCCGCACCCGCAGGGTCAGGGTGTTGCG +CTCGGCGGAGTCGTCGGCGCCGTAGGTCGCGTCGTAGCCCCAGGGCGCCCCGTTCAGGTC +CACGTGCAGGCGGACCACGCCGTCGGCGTCCTGCACCCAGCGCACGTCGCGCACCGCGCC +CGTCGGGACGTCGGAGACGATGTACTCCACGTCGGACACCGCGTGGAACAGCCGCAGGTC +GAGGCTGCCGCCCTGTCCGGCCTGCTGCTCGACGGTAAAGGGCACGCGCTCGGGGAGCAG +CACCTGCACTTCGCTGTGGGTGCCGCTGTTTTTCACATTGATGTTGGCGAACACGGCGCG +GGGAAGGGGCGTGCCTTCCGGGCGCAGCGTCAGGGAGCTCTTCGGCGCGTTGAGCGTCAG +GCCATCCGACACCTGCACGGTGTAGGTGCCCCCTTCCTCGCCCACCACGATGGTCCGGGC +GCCGGGGCGGGGGTAGACCACGAAGTTGCGGCCCGCGCCGTTGCGCCACACCTGCCCGGC +GGCTTGCAGGCCGCGCCCAGCGACGGTGGCGGTGACCTCGGCCACGCGGGGGCCGGTGCC +AGTCACGCTGAGTTTGCCGGTGCTCTCGGCGGTGGCCGTGGTGCCGTCCGCGCCGGTCAG +CGTGAAGGACACGGGCGCAGCGGCGAGTTGGGCGGGCAGCAGGAAGGTGCCCTCGTACTC +GCCGGGCGACGTTTCAGCCAGGGGAAAGGGGCCGAGGTCGCCGACCTTGTAGCTCGCCTT +CCCACCGGGCGTGCCGCTGAAGCCGACCGGCACGCCGCGCGTTTCCAGGTTCGCCGGTTG +CAGGTAGGCCACCCGGTTCTCGTCGGGCAGCACGCTGTCGGCCACGATTTGTGCCGCGCC +CCTCAGGACTGTCTGCGGGCGGCTGGTCACGCGCAGTTCTTTGGTGCTGGTCACGCCGCC +GAGGGTGGTTTCGAGCTTCAGCACATTCTCGCCGGGGGTCAGCGGCACCCACTCGATAAA +CAGGCCGTCGGCGCCCACGTCCACCGGCTGTCCGCCGAGGCTGAAGGTCGCGCCGGGCTT +GACGCTGCCTTCGAGCAGCACGTGGTCGAAACCCACGCTGTACTTGTCGGGCGGGTACGC +CACAAAAATCGGCTCGTCGCTGATAGGCGGCAGCAGTCGGGTCGCGCTGGCGGGCGCCGG +CGTAGGCGAGGGAGCTGCGGGCGCGGCGGTTTGCGCGGCAGCGAACGAGGCGAGCAGCAG +GGCGGGGAGCAGGGTACGTTTCAAGATAAAGACCTCCCTCGAATTGTGGTTCGGGGCGCA +GTGTACGTCAGTTGCCCGACTTGAGGCCAGCGCGTCTGAGTTCGTTTCTGAGTTCCGCCA +CCGAGTAGCGGCGCGAAGGCAGCCGCAGCAGCGTGAGGCCCCCGGAGCGGAAAATCACGT +CCTTGACCGAGTCGCGGTACTGCTGCTCGGCTTTCTGGTGCGACTTGCCGTCGAGCTCGA +TGGCCAGCCGGGGCCGGAAGTGGTCGGCGGCGTCCACGATGATGAAATCCACGTGCTTGT +CGCGCAGCCGCCCCAGCACCGCGCCGCGCTGGGCGGGGTCGTCGATGGTAAAGAGGTCGT +TGAGCCGCACATTGGGAAAGGCCCGGTAGCTCGTGCCGCGCAGCGCGTCTTCCAGAGCGC +GAAAAAAAGCGTTCTCGTCGGCGGAAAAGAAGTAGCGCCGCGCCGAGACGGGCAGCGAAG +TCGGCACCCGTTTTTCGCCGGCCAGAGGAGATGGAGGTGAGGAAGAAGCCGGCGCGGGCG +CGGCGTCCCGCAGGCCGAACAGTGAGGCGAGGCATCCCAGGGGCATGGGAGCAGTGTAGG +CAGGCATGGAATCGCGGTGGATTTGGGCCTCTGCATGAGACGCGGGTCAGCGGCGGCTGG +CCTATGCTATAGGGAGCCGCCAACTCGGTATACTCTCCCGAGTCTTTGTACTGAGTTCAA +TATTCCGCCTGCCCCGCTGCGCCGCGCCCCTGCGCTTCCCCCGGAGGTTCCCGAGTTGCT +CCCCCTTGCCCATCAAATCGCCTTTTTCATCTTCGCTCTGGTGACCGGCGCTGTCGGCGC +TTACGGCTTTTACCGGCTGTTTTTGCGGATTCGCCGGGGCGTCCCTGCCAGCGAGTGGCG +CTGGAACGACGCCCCGCAGCGCATCGGCTACGCGCTGGTCACCTCGCTGACCCAGGAACG +CACCTTCCGCAAGCGGCCCTGGATTAGCGTCCTGCACGCCTTTATCTTCTACGGCTTTAC +CTACTACTTGCTCGTCAACGTCGTGGACGGGCTGGAAGGCTACCTTCCCTTCTTGCACAT +CAGCAGTGCCACTCCGCTAGGCGCGCTCTACAACGTCCTGGCCGACATCCTGAGCCTCGG +CGTGCTGATCGGCGTGATCGGGCTGGTCATTCGCCGCCTGTGGGGCAAGAGCCGGCGCGA +CTTCCGCTTTACCGACAAGACGCTGCTGCATCCCCTCGTCAAAGACAACTACATCAAGCG +CGACTCGCTGATCGTCTCGGCCTTCATCACCTTCCACGTCGGGAGCCGCATTATCGGCAA +CGCCGCCAAGATGGTGCAGGAAGCCCGGCTGGACGCGGGGCACTACGACGCCTTCCAGCC +GTTCTCCTCGGCGGTGGGCAGCGCGCTGTTCGGCGGCCTGAGCGACTCGGCCTTGCAGGG +CTGGCGCCTCTTCGGCTTCTGGGGGGCGCTGGGCAGCATTCTGCTGTTCCTGAGCTACTT +CCCCTTCTCCAAGCACATCCACATCTTCATGGCGCCGGTCAACTACGCGCTCAAGCGCCC +CGTCAACAGCGGCACCCTGCCCCCCATGAAGGGCCTGGAAGAAGCGATGGAGGCCGAGGA +ACCCAAGCTGGGCGTGGAAAAACTGGAAGACCTGGAATGGCCCCGGTTGCTCGACGCCTA +CGCCTGTATTCAGTGCAACCGCTGTCAGGACGTGTGCCCGGCGAACGCCACCGGCAAGGC +CCTCTCGCCCGCCGCGCTGGAAATCAACAAGCGCATGGAACTGAACGTGATCGCGGCGCA +GCACAACCCCTTCGTGCTCAGGCCCGTGCCGTTCGAAGCCGGCGAAAGCACCGCCCGTCC +GCTGCTGGAATACGCCATCAACGAGGAATCGGTGTGGGCCTGCACCACCTGCGGCGCCTG +CATGCAGGTGTGCCCGGTGCAGGACGAGCAGATGCTCGACATCGTGGACATCCGCCGCAA +CCTGGTGATGGTGCAGGGCGAGTTCCCGCCGCAGCTCCAGACCGCCTTCCGCGGCATGGA +ACGCGCCAGCAACCCCTGGGGCATCTCGCGCGACAAGCGTATGGAGTGGGCCGAGGGCCT +GAAAGTCCCCACTATCGACGAAAACCCCGAACCCGACGTGATCTACTGGGTGGGCTGCGC +CGCGAGCTACGACCCCGGCGCTCAGAAGGTGGCCCGCTCCTTCGTGCAACTGCTCGACAA +GGCGGGCGTGAACTACGCCGTGCTGGGCAAGAAGGAAGCCTGCACCGGGGACAGCGCCCG +GCGCGCCGGGAACGAATTCTTGTACCAGCAACTCGCGCAGGAAAACGTGGAAACGCTCAA +TCAGGTGGCGCCCAAGCTGATCGTGGCGACCTGCCCGCACTGCATGAACGCCATCGGCAA +CGAGTACCGGCAGATCGGCGGCGACTACCGCACCATTCACCACACCGAATACCTCGAGCA +ACTCGTGGCGGCGGGCAAGCTGCCGACCGCGCAGCTGCACGACAACGTGGTCTACCACGA +CCCCTGCTACCTGGGCCGCCACAACGGCGTGTATGAGGCGCCCCGCCAGCTGATTTCGCA +GATGGCTGGGCAAATTCTGGACATCGAACGCCAGCGCGAGAACTCGTTCTGCTGCGGTGC +GGGCGGCGCTCAGTTTTGGAAGGAAGAGGAAGAAGGCCGCGAGCGCGTCTCCGACAACCG +CTTCCGCGAATTGCAGGCGCGGCTCGACACCGCCGCCGAGGCGAGCGCCGAATTTGAACG +CACCGGCAAGGTGCTGGCGGTGGGCTGCCCCTTCTGCAAATCCATGATGAACTCCACGCC +CGAGAAGCAAAAGCGCGACGACATCATCGTGAAAGACGTGGCCGAGCTGATGCTCGAAAG +CGTGCAGCGCGCCAGTGGCGAGTGGGTCGAACCTGCCGTGGCGCCCTCGCCCGAAGTCGA +AGTGCCCAACGCGGCGCTGCCGATGGAACGCACCGGCGACGCCCCCTCCGCCGACGCCCC +GCGCGACGACGTGGTGGGCACCACCAGCGCCGACGTGGAAAACGCCCAGCCCGGCAGCCC +GGTGGCGAACGCCGGCACCCAGCCCGAGGCTCAGGCGGCGGCCCCCAGCCCTGCACCCAG +CGGCGAAGGCACGCCTGTTCCGGCGGCCCGCAAGTCGTGGAAGCCCAAGGGCGGCGACGA +CGTGAGCGCGGCGGCTCCGGCGCCCGCCCCTGCCGCCTCCACTGCCGAGGGGGCCGCTCC +CGCCCGCAAGAGCTGGAAGCCCAAGGCGAGCGCGGATGATGTGGCCTCCACCGCGACGGT +TATCGAAGCCGCACCCGCAGCTCCGGCCCCCGAAGCTACCGGCACCCGCAAGGCCTGGAA +CCCCAAAGCGAAGGCCGACGACGTGGGCACCGCGCCCGCCGCGCAGCCTGCCGTTCCTGC +TCATGCCCCTGCCGCTGAAGGTGCCGCCCCCGCCCGCAAAGCCTGGAATCCGAAGGCGAA +GCCCGAGGACGCTGCCCCTGTGGCGCCCGCCCCCATGCGCGACGACGTGAACCCGGCGCC +GGCAGCTCAGGCCACGGCATCTGCCGCCCCCGCCGAAACGGGCGCCACGGGACGCAAGGC +CTGGAACCCCAAGGCGAAAACTCCGGCAGCGGCGGAAGGGTCGGCCCCGGTTCAGGAAGC +GGTGCAGGAAGTTGCTCCTCAGCCTGCCGTCGCTCCTGCCCCCGTGCAAGGCGATATTCA +GGATCCGCTGCCCCAGCCTCCGGTCCAGCAGGCCAGTGCCCCCGCCCAGAGCGGTGAGCG +CAAGAAGTGGAACCCCAAGGCGAAAGCCGAAGCGGCGCCCGCTCCTGCTGCCCCCGCCGC +GCCCATCACCGAACACGTCGTGCTCGACGAAGTCGGCGTCAACGGGCTGGAAGAAGGCCC +CGAGGCGACCTCCGCCCCCGACGCGGCCCCGGTCACGACCCAGCCCGCCACCCCCGAGCC +CTCTGCCTCCCAGACCGGCGAAGTCAACGCCGAGACGGGCCGCAAGAAGTGGCAGCCCAA +GAACAAGGGCTGACACCCACTCCTGAGTCGAGTCCACTGAGTGAAGTACGCCCCCGCCGG +GAAACTGGCGGGGGTTTTGCTTTGCCTGCCCTCTACAATCCGGGCGTGAACCTCCTGCTC +ATTCGGCACGCGCAGTCCACCAACAACCTGCTCTACGCCCAGACCGGAGGCAGCGAGGGC +CGCTCCGCCGACCCGCCCCTGACCGAACTCGGCCACGCGCAGGCACGGGCGCTCGCCGAG +TTCGCCCGCACCGACGAGACCCTGCGCGGCCTGACCCACCTGTATTGCAGCCTGACCACC +CGCGCGGTGCAGACGGCGGCGCCACTCGCGGCGGTGCTGGGGCTCGGCGTGCAGGGCCTC +ACGCACGCTCACGAGACCCAGGGCCTCTTCCTGCGCGACGAGGCCGGGGTGCCCCGGCCG +GTGCCGGGCCGCACCCACGCCGAGCTGCTGACTGAGAATCCCGCGCTGCTCTGGCCCGCC +GACCTCGCAGCGGAGGCGGCCTGGGAAGGCGGTTTCGAGCCCGAGGACCACGCGGCTTAT +CTGGCCCGCGCGGCCCGCGTGGTGGGTGAACTGCGCGCGGCGCACGGCCCTCAGGACACG +GTGGGGCTCGTCACCCACGGCCACTTCACCCAGTTTCTGCTGCGCGAGCTCATCAGCCAC +GGCACGGCGTTTTTCCGCGTCGCCAACACCTCGACCACCCTGCTCACCCTGCCCGGCCCG +CAGGACCCGCCCGAGTTCGGCCCGCTGGTGGGCTGGGTCAACCGGCACGACCACCTGACG +CCGGAGCAGGTCACCGTTTGACCGGAGCTGTAGTTTTGCTTACAGAGACGCACTTTTGCG +GCCCTTCCCGTTAGAAATACTAAGGACGGCTGCCATCCGGTCAGGGTTTCTGCGGGTCAT +GCTGAGGCACTGCCACTTCGGAACCCGAGGTGGACAAAGGAGCACCCCATGAAACTGAAG +ATGTCCGACCTGATGATTCTGCTGGGCTACGCCTCCATCGGCTATAGCGCCTACCGTTAC +TTCACTGCCAGCGACGACGACTCCAAGCGCGACGCCCTGTTCGTCGGTCAGTGGGCGCCC +ACCTTCTTCATCCTGGGTGTGGGGGCCGAAAACCGTGAGTACCGCAAGCAGAATACCCTG +GCCCTCGACGCCAACGCCTGAGCCCCGAACCTCTCCGCAGAGGCCGTCCATCCGGGCGGC +TTTTTGCTTTTGCGGCCCTCCGCCCCTTTTCGATGTCAAGGTGACTTATGAACCTGACTC +CCGCTCAACGCAGCAATTTCTTTCCCCTGACCGCCTGGGGGTATCCCCTGTGGCGCGCCC +GTTCGCTGACGTGGCTGGCCGGAGAACCGTTCGGGCTGGCGCGGGAAGCGCGGCTGTTTC +TCGGGCTGTGCCGCCCTCATGCTGGGGAACGCTGGCTGGACGTGGGCACCAGCACCGGGT +TTTACGCCGGGGTGCTGGCGCGGGCGGGCTGCCGGGTGGTGGCCGCTGACCTCAGCCCCG +CCATGCTGCACGCGGCGGCGCGGCGGGAACCCCGGCCCCAGATCGAGTGGGTGCAGACGA +ACGTGGAAGAGACCGGCTGGCGCACGGCGAGCTTCGACGGCGTGGTCGTTGGGGCGACGT +TGAACGAAACCGCCGACCCCGCGCGGCTGCTCGGCGAGTGCGCGCGGCTGCTGCGTCCCG +GTGGCAAGCTGTGGCTGATGTTCGTGCCGCGCACGGGCGGGCCGCTGCAAGGGCTCCTCG +CCCGGCTCGGTGGGCTGACCTTTCCTGACCTGAGCGCGGTGGAGACTGCCTTGCCCGGCG +GCGCCATCGTTCACGCCCGGCGGGCGGGGGCCGTGCAGTTCGCTCTGTTCGTCCGGGGCA +ACGGGTAGGCTGGCAACATGCCGCCCCTCCGCTCCCAGATTCGGCCCGGCCTGACCGTGG +ACATCGTGCAAAAGCAGGACCAGCCCACCGGCAAGCTGACCCGTGGCGTGGTCGCGGCGC +TGCTCACGCGCTCGCCCTCGCACCCGCACGGCATCAAGGTCCGGCTGACGAGCGGGCAGG +TGGGCCGGGTGCAGGCGGTGGTCGGCGGGGAATAAAGAAAGTCCGACTTTCTGGCGCGGG +GCCGCGAGGCTTACGTTAACGCCGGAGGTGCGCCATGCCCAAAGCCTGGAGCAACAAAGA +CGAACGTCAGTACGAGCACGTCAAGGACAGTGAGGTCAAGCGCGGCGAGTCGCCAGACCG +CGCCGAGGAAATCGCCGCCCGCACCGTCAACAAATCCCGCCGCGAAGAGGGCCGCACCCC +GAACAAGCGCACCCAGGGCACCGGCAACCCGGACGCCGCGCTGAGCGACCTGACCCGCGA +CGAGCTCTACAACCGCGCCAAGGAAAAAGGCATCGCGGGCCGCAGCCGCATGAGCAAGGC +GGAACTGGTCAGGGCGCTGAGCTGACCCCCGCTGCACCATAAAAAAGCCCCTCCCAGCCG +GAGGGGTTTTCTATTTGGCCGGTGGCTCAGCGCTTGGGCATGCTCAGGCCGAGGCCGGTG +AGAAAGCCGCCCGCGACGGCGAGCGCGGTCATGAGCATGACGTTGTTGACGGGGTTGGGG +CCCTCGTTGCTCTTGTTGGCGTAGGCGATGGCGTGCAGCTGGTTGATGTCGATGTAGTTC +TTCGCGAGATAGAACAGGCTGGCCACGATGACCACCAGGCCGATCAGCACCACCACCTTG +GAGAGAATCTGCATGGTTCAAATTGTAAGTCTCCGCCCGTTTGTGCCTCTTGGCCGTTCG +GAAAGATTGTGGCGGGGCAGGGTCATCAGGGTCAGTGGGTCAGCGTCCGTGCGGCGTCGC +GTCCAGCCAGGCGGTCAGCGCCTGAGCCACGCCCGCCGGGTCGGTGACGACCACCGGCAC +GCCGAGCGAGGCTGCCGCGCTCGCTGCCGCGCACTCGCCCTGTTCCCCCATTACGGTCCC +GGCGCCGGTCCAGGCGGGAAACGGGCCGCCTTCCTGGTAGGGCGCGCCGCTGAGGTCGTC +CACCACCCGCGCGGGCACCTCCACCGCTGGGCCACCTGCGAGGGCCCGCTCGGCCACCAC +CAGCACGCCCCGGTGTTCGCCTTCTTCCAGCGCGCCGAGCAGCGGGGTCAGTCCGTCGTG +GACGAGCAGCTCGAAGGTGCCCTTGCCGTCCACTGCCGCGAGGGCGGCGGGCAGCGCAGC +GGCGAGTTCGGGCGGCGCGGCGACCAGCCAGCGGTGGCCCCCGGCGCGGCCCTCGAAGCT +GGCGCGGGGCGAGCCGTACACGTCCGTCCAGGTGCGGGTGTGTTGCATGGCGTCATCATC +GGGCATGGGGGCCAGAGGGGCAAGCGGAGTCGCCCCGCTATGCTGCGCTCATGACGCTGC +CTGCTCCCCGCACCCACGGTTCCGTTCGTCTGTGGTCGCTCCCCACCGGCCCCCTTCAGG +AAAACGCGCTGCTCGTGGCGGGCTCGGACAACCAGGGCTTCTTGATTGACCCCGGCGACG +AAGCGGCGCGCATTCTGGACCTCGTGCGCGCGGCGGGGGTGGACGTGCAGGCGATTTTGC +TCACCCACGCGCACTTCGACCACATCGGCGCGGTGCAGCCGGTGCGCGAGGCGCTCGGCG +TGCCGGTTCACCTCCACGCGGACGACCTCGCCACCTATCACCTCGGCGCGGCGTCGGCGG +GGCGCTGGAACCTGCCCTTCGTGCAGCCCGCCGACCCCGACCAGCAGATTGCCCAGGACC +AGACCTTCACGGCAGGCGACCTCACCCTGACCGCCCGTGAGCTGCCGGGGCACGCGCCGG +GGCATGTGGTGTTCGTGGGCGACGGCTTCGTGATTGCGGGCGACACCCTCTTTGCCGGGG +GCATCGGGCGCACCGACCTGCCGGGCGGCAACCACCCGCAGCTCATCGCGGGCATCGAGC +GCGAACTGCTGAGCCTGCCGGACGACACCTACGTCTACCCCGGCCACGGCGGCTTCACGA +CGATTGGACGGGAGAAGCGGAGCAATCCGTTCCTGTAGAGCCTTTGACAAAAAGATGGCA +CAGCTTTTTGGCGAGCGGAGCGAGTGCAAAACACGGAGCAGAACGGACTTGCAAAGCTGC +GAAGCAGAGAATGGAGCGGGTGGCGGTGCTGTTCCGACGCACGCGTCATTCGGAGAACTG +CTCTAAGGCAGGAAAAAAGGCGCCAAGGTGAGTTGACCTTGGCGCCGTTTCTTTTTTCTC +TTCCCTCAGTCGAAATCCATCGCCCACCACGCCTCGCGCTGTTCGGCCATGCGGGCGCGG +GGGTCGCCGAGGGTGTCGAGCGACGCTTGCAGGCCCTTCCAGTCCTGTTCGCTGAGGGGG +TCGAGCGTGAGCGCGCGCTGGTGGTATTGCGCGGCCTCCTTGGGCTTGCCGGCCAGGCTC +GCGGCGCGGGCGGCGAGGCCGAGCAGGTTCATCTGGGTCTGTTCGAGCCGCGAGCGCACG +TCGTCCACCCAGGGGCTGTCGGCGCCGGGCAGGAACGTGCCGTACTGCCCGACGAGTTCC +TTGAGCTCCTCGTAGCCCAGACTGCCCGCTTCGGCCTGTCCGGCGAGCAGTTCGTAGCGT +TGCACGTCGTACTCGGGGTTCAGGCCAGCGGCGAGCGCGTACTTGCGGTTCTGGCTGGTC +ACGGCCTCGTTGTTCAGGCTCTTGCGCAGGCGGTGCAGCGTGGTGTGAAAGAGGCTGCTC +GCCCGCGACTCGTCTTTTTCGGGCCACAGCGCCTCGGCGGCTTCCCAGCTGGTGACTTCC +TTGTGTTCGAGCAGGTAGAAAAACAGTTCGAGCGCCTTGCGCGACACCCACGACACCTGC +TGCCCCTGCCACAGCACCTGCGCGGTGCCGAGCCCCTTGGCCTGGGTGCCGCTTTCGGCC +TGCGCGGTCAGGCCCACCCGGCGCAACCGGGCGTCCACAGCGGTGGTCAGGCCCTGCGGC +GTAAAGGGCTTGGGCAGGTAGTCGTCGGCGCCGAGGTTCATGCCCCGGCGCACATCGGTC +CGCTCGGCGTGGCTCGAGAGCAAGATGAACGGCATCGCCGAAAGGTTCTCGTGGTCGCGC +ACCTGTTCCAGAAATTCCAGGCCAGTCATGTACGGCATCACCACGTCGCTGATGATCAGG +TCAGGGGTAAACACTTTGAGCAGGTCCAGCGCTTCCACCGGATGCGAACAGCTCCGCACC +TCGTGCCCGGCGCGGCTGAGAATGACGCTGATGAGCTTGACAATGGCGGCGTCGTCGTCC +ACGACCAGAATGCGCGGCATATGAGAAGTCTAACAGTTCGTCTCCGCCGCCGGCCGCCCC +CACCCTGACGAATAAGGGCCCCCCTTACAATCTCGTGAGAAGCGGCCTGAAGTCGGTTTG +TGCGGGCGTGTGTCCTGTTGTCCTGTTTGCCCCGCTGCCGCTGTCTGCCCGCCCTCCAGA +TGCGCAGCGGTCAGGGCGCTATCCTTTTTCCCATGACGTATCAGGCGGTCATCGGGCTCG +AAGTGCACCTGCAACTGAACACCCGGTCCAAAATCTTCAGCGCCTGCCCCGCCGACTACC +ACGGCGCGGGGCCGAACGAATTCACCGACCCCTTGACCCTGGGCCTGCCGGGCACGCTGC +CCACCCTCAACCGCCGGGCGGTGGAACTCGCCATGATGTTCGGTCTGGGGCTGGGCTGCG +ACGTGTCGGGTTTTACGCAGTTTCACCGCAAGAACTACTTTTACCCCGACGCCCCCAAGA +ACTTTCAGCTCTCGCAGTACGATCGCCCGATTGCGCGTGACGGGTATCTGGACGTGCCGG +GAGAGGGCGGCCCCGAGCGCATCCGCATCAAGCGGGCGCACCTTGAAGACGACGCGGGCA +AGCTCGTGCACCCGACCTACGCGCCCTACTCGCTGCTCGACCTCAACCGGGCGGGCTCGG +CGCTGATTGAAATGGTCACGGAAGCCGACATCACCGGCCCCGAGCAGGCCCGCGCCTTTC +TGGAAAGCGTGCAGGCCATTGCGCAGTCGCTCGGCGTGTCCGACGCGACCCCGGAGGAAG +GCAAGATGCGCTGCGACGTGAATATTTCCATTCACAAGCCCGGCGAGCCTTGGGGCACCA +AGGTGGAAGTCAAGAACCTCAACTCCTTTCGCTCGGTGGCCCGCGCCATCGAGTACGAGG +CCGCGCGGCAGGCGAAGGTGCTGGACGCGGGCGGCATCATCACCCAGGACACGCTCGGCT +GGGACGAGGGCGGCCAGAAGACCTTCCTGATGCGGACCAAGGAAGGCGAGGCCGACTACC +GCTACTTCCCCGAGCCCGACCTGCCGCCGCTCGACATCACCCCCGAGTGGATTGCCGAGG +TGCGCGCCCGGATGCCCGAACTGCCCGCGCAAAAGCTGGAGCGTTACCGGGCAGCGGGCG +TACGCGAGAGCGACGCGCAGACCCTGAGCCTGAGCGTCAGCCTTTCGAAGTTCTACGACG +AGGCTCTCAAAAGCGGCTCCGACACCCAGAAACCGGATGCCCAGAAGCTCGCCAACTGGC +TGCTCACCGACGTGGCCGGGGCACTCGCCGCGCAGGAAAAGGGAGTTGAGGACAGCGACC +TCCAGCCCGCACACCTCGCTGCGCTGGTCGGCCTGATTGACGCCGGAACCATCAGTGGCA +AGATTGCCAAAGACCTGCTGCCCGACGTGCTGGCGGGCCACGACCCCGCGCAGCTGGTGC +AGGAGCGCGGGCTGAGCGTGGTGACTGACACCGGGGCGATTGACGCCGCCATTGACGCCG +CGATGGAGGCCGACCCCGCGACTGTAGAGAAGGTGCGCGGTGGCAACGCCAAGGCGATGA +ACGCGCTGTTCGGCCCAGTCATGAAGGCGATGGGCGGCAAGGCCAAACCCGAAGTCGTCC +GCGAGCGCCTGACCGCCAAGCTGGGCCTGTGACCGCGCCGGCAGGGGCTTCCGGCAACCG +CTGGCGCCTCCCGGCTCTCACCGCGCAGTGGCTCTACGTGGCCGCCAGCTTGGGGCTGAC +CGCGTATACGCTGCTGCCGGGGCTGAATGAAGGCACGGTCGTCACCGGCCTGCGCTCGCT +GCTCGCCGCGCTGGTGGGGGCGTGGTGGGCGGTCATTTTCGGGCGTTATCTGCTCGGCCA +GGGCACGCCGGACACCGACGGCACGCTCAGGGCGCTGCGGATTTTCTTTCCCTGGCTCAC +GGCGCTGCGGCTGTCTTTGTGGCTCATCGGGCTGCTGGCGCTGAGCGGCAACGTGACGGC +GGAGGTCAACCCGGTGGCGCTGACGGCGCTGCAAACCCTGTCGTTCGGGTACATCTTCGC +CAAGAACGCGGTGTACGGCACGCTCGCCCGCTACGCGACCGACCCGGCGAACGCGCTGGG +GCGCCGGCGCCTGGGCGAGTGGCTCAACGTGGCCGCGCCGCTCGCCCTTGCCATCGGGGT +CATCAACACCGTGCCGCTGGGCGGCGTGAGCGAGGGCTTCCGGACGCAGGACGTGGTGAT +GTATGGCGTCCACGCCGCGCTGGACCTGCTCGCGCTGGGGCTGAGCCTGCTCGCACTGCG +CGAGATGCAGACCCCGGTGCGGGAGCGGGAGGCCTGAGCTGGGCCGAGTGCGGCAGCCCC +CTTACTCTCCTGGCCCCTTGCTATCCTGAGCCTTACGCATGACAGACGCCTCTTTTCCGG +CCTTGCCGCCGCCGCTGGTGTCCCTGGGTGACCTCGCCTGGGACGTACTGGCCAAACCCG +ACACCCTGCTGCTGCCGGGCGGCGACACCACCGGGCGCCTGGAACTCTCCGGCGGGGGCA +GCGCCGCCAACCTGGCCGTGTGGGCCGCGCGGCTCGGCGCCCCGACCACCTTCGTCGGCA +AAATCGGCCAGGACCGCTTCGGCGAACTGGCGACCGCCGAACTGCGCGCCGAGGGGGTGC +GCGCCGAGGTGCTGGCAAGCGCCGCGCACCCCACCGGCGTCATCCTCGCCCTGATTGACC +GCCGGGGCCAGCGCGCCATGCTGACCGGGCAGGGCGCCGACTGGGAACTGCTGCCGGAGG +AGCTGCCACGCGACGTGCTCTCCAGTGCCGGACACCTGCACCTGACCGCCTGGAGCCTGT +TTCGTGACCCGCCCCGCGCCGCCGCGCTCGAAGCGGCCCGCATCGCCAAGGCGGGGGGCG +CCACCCTCAGCTTGGACCCCGGCAGCTTTCAGATGATTCAGCAACTTGGGCGCGAAGCCT +TCCTGAACATCGTGGACGCGCTGCCCTTCGACGTGATGTTTCCCAACGACGACGAGGCCC +GCGCCATGAGCGGCGAGCGCGACAACGAAGCGGCGCTCACCTGGCTGCGGGCCCGCTACC +CCCGCGCCCTGATTGCCCTGAAGATGGACGAGGACGGTGCCCTGATCGAAGGCCCGCAGA +CCGCCCGCGTGCAGGTGCCCGCCACCCGCGACCCGCTGGTGGACGCCACCGGGGCCGGGG +ACGCCTTCGGGGGCGCTTTCCTGTCGCAGTGGCTGCGGCACCACGACGCCGAGCGCGCCG +CCCGCGTCGCCGTGCAGGTGGGCGGCTGGGTGGTCTCGCGCTTCGGGGCTCGCCCGCCCG +CTGACCCTGACCTCACCCGGCGGCTGGCGAGCGTGGGCGCCGATCTGTTGACCCCTGACC +CCGCAGCTGCCCCTGACCCTTCCCAAGACTCTCCTGAGGTAAACGCATGACGACCCTGAA +TTCCCGCCGTGGCCTGCCGCTCTGGGCCAAACTGCTGCTCGGCTTGATCGTGCTGGGCCT +GCTGGCCGTGCTCGGCGCCTTTGCCTATTTCCGCAGCCTCTTCGGCCCGGCGGGCGGCGC +CCCCTACACGCTGGAAGTCACGCCCGGCGAGACCGTGCCGCAAATCGCCCGCGAACTCGA +AAACAAGAAGATCGTCAAAAACGCCCGCATCCTGCGCTACGCCATGCAGCAAAGCGGCGC +GGCGGCGCGGCTCAAGGAAGGCGCCTACGACCTCAACGGTCAGATGACGGTGGATGAGGT +CGTCAAGACGCTGGACGGCCCCGCCCGCGTTCCGGTGGTCAACGTGACCGTGCCCGAGGG +CCGGCGCATCAAGGACCTGCCCGAGATTTTCCAGAAGGCGGGCTTCGACGCGGCGGCCAT +CACGGCGGCGCTCAACGACGCGTCGCTCAGCCGCTACGCCCGGGGCAAGCAGAAGAACCT +CGAAGGCTTCGTGTTTCCCGCCACCTACGAGTTCCGCCCCAAGGACAGCGCCACTGACGT +GGTGAAGAAGATGGTGGAGCGCATGGAAACCGAGTTCACGCCGGGCAACGTCGCCAAGGC +GAAGGCGCTCGGCCTCGACGTGCGCGACTGGGTGACGCTGGCGAGCATGGTGCAGGCCGA +GGCCGCCAACAACGAAGAGATGCCGGTCATTGCCGGGGTCTTCCTCAACCGCCTGCGCGA +CGGCATTCCGCTCGGCAGCGACCCCACCGTGGCCTATGGGCTGGGCAAGGACCTGCCGGA +ACTCGACCGCTCGGCGGGGGACTTCAAGGTGGATACGCCTTATTCCACCTACACCCGCCA +GGGGTTGCCCGCTGGCCCCATCAACAACCCCGGCGAGGCCGCGCTCCTGAGCATCGTCAA +CCCGCAGCGCAAAATGGCGGATGGGCGCGACGCCCTGTACTTCCTGCACGCGGGCGGCAA +AATCTACGTGAACCACACGTATGCGGAGCACCTGCGCGACAACGACCGCTACCGCTGAGC +CGACAAAAGGGAGGGGGTGGAGTGCCCGGGACACCGTTCCCGCCGCGCTCCACCCCTTCG +CTGCCCTTCTCTTTCAGCCCGTTACTTCTGCGCTTTGAGCCACGTCGTCAGGTCGTTGAG +CGGCTGGGCGTCCATCCAGCCCAGAGTCGGCCTGCCGTCTTTGAGCGGCGAGAGCGAGTG +CCCCAGGCCCGGATAGACGATGAGCTTCACGCGCTTGTTGCCCGCTTTTTCGGCGGCGGC +CTGCGCCAATTTCGCGCCGGGGAGGATGGTCTGCGGATCGGCGGCGCCGTGCAGCATCAG +CACCGGGCCGCCGTAGCCGGGCAGGTTCTGCGTGACCGAGCCGTAGGCATCGGCATCCTT +GAAGTAGCGGGCGGCAGCGGGGCCGAAATCCGGCAGGTTGGGGTACGGGGCAAACGCCGC +CTCCAGCCGGTCCTTGATGGGGCCGTCAATCCGGACGCGGCCCCCGGCGTCGGCCTTCAT +CTCGGGAAGAAAGGTCCACGTCTTGTCCTTTTGCACCACGCCGAAGGCGGACGCTTGATC +GGCGAGGTCGAGCCCGAAGCGCTCGAAGCCGGCGGTCAGCTCGGCAATCGTCAGGAAGCC +GTCGTGGTCGGCGTCCATCAGCTCGTGAACCTGCGCGAGGGGCCGGTCGACGAGCTGGAA +ATGCAGCGTTTCGCGGAAGGTGTAGCCCACCGTGCCGATGAGCACCAGCCCGCGCACCAG +CTCCGGGTGCTCGCGGGCGATGCGGGCGGCGAGCATGGTGCCTTCCGAATGACCGAGCAA +AAAGACCTCACCGGGGTTGACCTCGGGCAAAGTCCGCACGGTCTGGAGCACGCCCAGCGC +GTCGGCGGAAAACTGCGAGACGGTGGCCTGTTCGGGGCGGGCCGCCGGGTCGATGCGGGG +GCCAGCGCCCAGCACGCCGCGCTTGTTGTAGCGCACCACCGCGAAGCCCGCCTCGTTGAG +CTGCCGGGCGAGTTGCAAAAAGACCTTGCTGCCGCCGGGCACGCCGCTCACCTGTTCGGG +CAGCGTTTCGTTGAGGTCGTTGGGGCCGCTCCCGTGCAGCAGAATTACGGTGCGGAACTT +CTGACCCGGCTTGGCCCCCGGCGGGAGGCTGAGTTCGCCGGTCAGCGCATAGCCGTCCGC +GAGCTTGAAGGTCAGCGGGCGAGTGTTTTGCAGAACGATCTGCGCGGGTGGGGCCGGAGG +TGGGGTGGTCATGCCCAGAGTGTCGGGCTAGGCTACTTGCCATGCCAGAGCAAAATGCAA +GTAGCGTACTCGTCCAGTCCGAGGGCGCGGCGCTGGCGCTGCTGGGCGGCATCGTGAGCG +GCGAACTGGGGCCTTTTCGCCAGGGGCCGCTGAGTCCCGCGCAGTGGGCCGCGCAGCGAG +GGGAACCGCTCTACCGCACGCTGCGCCGGGTGCGCCGCTGGCAGGCGCTCGGCGTGCTGG +ACGTGCAGGACACCAAAAAAAGGGCGGGGCGGCCCGTGCGGCTCTATGGCCTGACCTCCC +CCTCCTTCTACATTCCGCACGCCGTGTTGCCCGTTGACGAGGTGCTGGCGCGCATCGGTG +AGCCGATGGAACGCCAACTGCGCGCCGACATCGCTCGGGCCTTTGCCGACCTGCCCGACA +TCGGCGGCACCCAGATCGTGATTCACGGCGAGACCTACGGCGCGTTTCTCGCTCACTCGC +CCGGTCAGCCCTGGGGCGGCAAAGACGACCTGGTGCTGATTGACCGCTGGGCGAACCTGC +GCCTGACCCACGCCGACGCCCTCGCCTTTCAACACGAACTCGACGAGCTGTTTGCCCGCT +ACGAGCACCGGTCGGGGCCACAAAGCTACTCGTTGCATCTGCGGCTGGTGGCCTCGGCGG +GGGGGGGGGCGAGCCGTTAGAGAGGCACGAGCCGCGTGGAAGCAAACAAAATGCGCTGCG +GCTATTCCAGAAATGCCAGTAGCTCTTGATTCACCCGCAGCGTTTCGTCACGCATGACCC +AGTGGGTGGCGTGGGGAAACTTGACGGTTTTCAGGTTGGGCACCCACTTGCCGAGGTTGT +CGGCGAGCTCGGGCACCAGAGCCGAATCCTTTTCGCCCCACAGCAGCAGGGTGGGGGCGT +GAACCTGCCGGCCCTTGACGTTCCCGAAGCGGATCAGGGCGCGGTAATAGTTGATCATGG +CGGTGGCCGCGCCGGGCTGTGTCCAGGCGGTTTCGTAGTGGTGCATGTCCTCGGCGCTGT +AGCTGCCGGGCCGCGCTCCGCCCAGGCCCTTGCGTCCAAAGGGCACCAGCAGGCGCTCGG +GCAGCCAGGGGAGCTGGAAAAAGCCCACGTACCACGAGCGCTTTCTCTGCTCGGGACGGC +GCATTTCCCGTCCGAAGGCGCCGGGGTGCGGGGCGTTGAGAATCACCAGTTTGTCCACCA +CGGCGGGCCGCGAGATGGCGACCGCCCAGGCAATCACGCCGCCCCAGTCGTGCCCGACGA +TATGGGCGCGCTCGTGCCCGAGGTCGTGGATGAGCGCCGCCACGTCGGCCACCAGCGTAT +CGATGTGGTACGACTCCACGCCCTGCGGCTTTTCGCTGATGTTGTAGCCGCGTTGGTCGG +GCACCACCACCCGGAACCCCGCGCGGGCGAGCGGCCCGATCTGGCGGTCCCAGCCGCGCC +AGAACTCGGGAAAGCCGTGCAGCAAGATGACCGGGGGGCCCTGTTCGGGGCCGGCCTCCA +CGCAGTGCAGCCGCACCCCGTTCACGAGGCGTTCGGTGTGCCGAAGTTCGCTTGACATGG +TCCCAGTATCCTCCCCGGCGTGCGCGGCGCCGTGACCGCTGCCGGTGCCATTGTCTTTAG +TTCGGCCGCGCTAGAGCATTTGACATAAGAATGTTTCGCATTTTTGACCCTCTACCCTGG +TAGGACTCGAAGAGCTGCACCAAGGGGTCTTTTTCTGTCAAATGCTCTAGAATGTCCGGC +GCATTTCGCGTTCCCCCGGACCCTGACGGCCCGGCGCGCCGGACGCACGCTCTACCTTTT +TTTCGCGCAGGTTCGCACAGCACAGGAGACTTCTGAAACATGGCCGGTCATAGCAAGTGG +GCGCAGATCAAGCGCAAGAAGGGTGCCAACGACAAGAAACGCAGCGCGATGTATTCCAAG +CACATCCGCGCCATTCAGGCCGCCGTCCGCTCGGGCGGCAGCGGTGACCCGGCGGGCAAC +CTCAGCCTGAAAAACGCGATTGCGGCGGCCAAGACCGACACGGTGCCCGCCGACAACATC +GAAAACGCCATCAAGCGTGCGGTGGGCGCGGGCGAAGGCGCCGCCGAGTACAAGGAGCAG +ACCTACGAGGGCTACGGCCCCGGCGGCACCGCCATTTTCATCGAGACGCTGACCGACAAC +GTCAACCGCACCGTGGCCGACATCCGCGCCGTGTTCAACAAGCGCGGCGGCAGCATGGGC +AACTCTGGCTCGGTGGCGTGGCAGTTCGAGAAAAAGGGCATCATCCTGCTTCGCGATGCC +TCCGAGGCCGCGCAGGAAGTCGCCATCGAAAACGGCGCCGAGGACATTCAGGAATCCGAC +GAGGGCCTGGAAATCAGCACCGCGCCGAACGACCTCTACGCCGTGCAGGACGCCCTGAGC +GCCGCCGGCTACGCCGTGGAAAGCGGCCAGATCACCATGCTGCCGACCAACACGGTGGCG +GTGGCTGGCGACGACGCCCGCAAACTGCTGACGCTGGTGGAATACCTCGAAGAACTCGAC +GACGTGCAGAACGTGTACACCAACGCCGATCTGCCCGAGGACGAAGAGGACTGACCGAAA +CGAGAAAGAACAAAAAAGCGGAGGCGTAGGCTTCCGCTTTTTGCTTTGGGTGGGGATGCG +TTTTCCTGAGCCGTCGCTTCAATCACACGCCAAGGCCAGCTCCGCTTGCCGGGCGAGCTG +CGTCTCACGGGTCAGATACCAGCGGCTGAGGTCCGAGCCGAGTTCGCGGATGATCGCTTC +GGCCTGCGGGAGCGCGGCGCGGCGGCTCTGCATATTGCGCTGCACGACGGCGGTCAGGTC +GTCGAGGTTGTAGAGGTGCGCGCCGGGCACGCTGGCAATCTCGGGGTCAAGGATGCGCGG +CACGCTGATGTCAATCAGGAACATGGCGCGGCCCGGACGCCCGGCGAGCGCCTCGCGCAC +GTTCTGGGCGGTGACCACGTAGTGCGGCGCGGCGCTCGATGCGATGACCACGTCGACTTC +GGGGAGCGCGGCGGACAGTTCCTCGGCGGGGCAGGCGCGGCCCCCCAGTTTTTCGGCGAG +GGCGCGGGCCCGCTCGGCGGTGCGGTTGACCACCAGCACGTCCTGCACGCCTGCCGCCCG +CAGGTGGGTCAGAGTCAGCTCGGCGGTTTCGCCCGCCCCCAGAATCAGGGCGCGGCGCTG +CGAGAGCTCACCGAGCGCGGCCTGCGCGAGTTCCACGGCGGCGCTCGACACGCTGACCAC +CTTGTCGCTCAGGCCGGTGTGCGAGCGCACCCGCTTGCCCGCCGCCAGTGCGCCCTGCAC +CACTTTGTTCATCAGGGTGCCGCTCAGACCGCGCTCGCGGGCACTTTGCCAGGCACGTTT +GACCTGTCCCTGAATCTGGGTTTCGCCGATGACGAGGCTGTCGAGCCCCGCCGTCACGCG +GTAGAGGTGCCGCACGGCGGCCTCGCCCCGGTACACGTAGAGGTGATCGAGCAGCGCGTG +GCCCCAGGCGCCCTCGAAGGCGGCGAGCGGGTCGCCGTGCACCCCGGCGAGATAGACCTC +GGTGCGGTTGCAGGTGGCGAGCAGCAGCACCTCGTCGGCGTGGCGCGCGAGGTGGCCGAG +CAGCGCGCCTTCTTCCTCGGGCCGCACGGCGACGCGCTCGCGGACCTCGACTGGCGCGGT +CTGGTGATTGAGGCCGACCACCACAAAATCGAGCGGTGCGGGCTGCGGCACCGTACGCCG +GGCGAGCAGGTGCTGGGCAGTCGGACACGCGAGCGTCATGCCTGCGCCTCTGGCCGGGCC +GTCCGCAGACCGCGCGGCGTGAGGGAAGCAGAGAACAGGAGCATGTTGGGCAGCATAGTC +TTCCGGCGCGGGGCGCGCGTCCCGTACCGAACGGTAAGAAAGCTGTCTGGGTCGCCTTTT +TTCGTTTCCGGGATTCCTGGGATGCGGGCCCCTGACATGACGACACGTCCTTCTGACGCT +ACCCTGCAACCGACCTATGACTGCTCCTCCCGCCTTGCAAGACGTGTATGGCGTGCCCAT +TCCACCCGGCAGGCCGCTGGCCCAGGGCCGGCACGCCGACCTGCTGACCCTCGGCGAGGT +GCGCGGGCAGGTAGCGAAGGTCTACCTGGCGCCGCCGCCGCCCGCCCAGGCCCAGAAGGT +CGAGGAACTGGTCAAGGTGCCGCCCCACCCGGACGTGGCCCGGCGCCTGGTGTGGCCGCT +GGCCGCGCTCTACGGCCCCAACCGGCAACTCGCGGGGCTGCTCTTGCCCCTGCTGCCGCC +CCAGCAGTTTCAGGGGGTGCGGGCGCTGCTGCGGGCCGACACGCGCCGCCGCAGTTTTCC +GGAGGCCGACTGGCCCTTCGCCGTGCGGGTCGGGCGCGAACTGGCGCTCGCCTTTGCCGA +ACTGCACGCCGCCGGGCACCACATGGGCGACGTGCGGCCCGAGCACGCGCTGGTGTCCGA +CGCGGGCGAGGTGCGGCTGGTTGGGGCCGACGACTACGCGCTGCACCTCGCTGGGCGCGA +CTTTGCCGGGCCGGTGGCGAGCGCGGAATATCTGCCCCCCGAGCGTCAGCGCGTTGGGAG +CGCCACTGGCGCTGGGGCCGACGCCGAGAGCGACGCCTTCGGGCTGGCCGTCCTGCTGTT +CGAGCTGCTGCTGGGCCGCCACCCCTACGCCGGGATTCAGGCACGCGGGGCGGCGCCGGG +GCCGGGCGGGGCGATTGCCGCCGGGCTCTTTGTGGACGCGCCGCAGGCCGGGCCAGGGCG +GCGGACCGCGCCGGGCGAGTGGCCCTTCGCGGCGTTGCCCCCGGCGGTGCAGGCGCTGTT +CGTGCAGGCGTTCGCGGTGCCGGTGGTCCCCCGCCCATCTCCCGAAACCTGGGCAGCGGC +GCTCGGCGCTCTGGCCGCCGAACTGGTGCCCTGTGCCCGCCGCGCCGGTCACTGGCAGGT +GCCGGGGCTCCCTTGCCCGAGTTGCGCCGCCGAGCGCGAGGGGCCGGCGCCCAGCAGCGG +GGAAGACGTAACGGCGCGGGTGCAGCGGCTGTGGAACGACGTGCAGCGAGTGGTCGCGCC +GCCGCCGTCGCCCCCGGTGGCCCCGGTCATCGAGGCGCCGCCGCTGCCCCCGCTGCCGCT +GGCGCTGCCCGACAAACCACGGGGTTTAAACCACCGCCAGCAGTTGCAACTCCTGACCTG +GACGCTGCGGGTCGCCGTCCTCGCCGCGCTGATGCTGGGGGTCGGGCTGGTGCAGCGCTC +GGTGCTGGCGGGGCTGGTGGTGCCCGCACTGATCGTCTTTGCCCTCACGCTGGGGCGGCG +CTTTGCGGTGGACTGGGACGGCCTGATCGACCGTTACGAGCGCTGGGAACACGACCTCGT +GCGGGGGCTGTTGCCGCGCCGGGGGCCGCAGCAGCGCTACCGCCGGGCGGTGCGGCGGCG +CCGGGCCGAGGTGAAGACCGAGCTCGCCGCCCGCGTCGCCCAGCGCGAGGAACTGCGTGA +GCGCTACCACGTCGAGAACGCCGCCGCCCTGCACCTGCGCGAGCAGCAGGCGTTGGAGCA +GCGCCGGGCGCACCTGCTGAGCCTCAGCGCGGGCGGGAGCCTGGAGGCGCTACTGGGCCG +CTGGCGCGAACGCTCCCGGCAAGACTATTTGCGCCAGCAGCAGCTCAGCAGCAGCGGCGT +GCCGGGCGTCGGCCCCCGCGAACTCGGGCTGGCGGTGGCGCAGGGCATTCGCACCGCGTT +GGACGTGACCGCCGAGCGGGTCCGCGCTCTGCCTGCCCCGCTGGGCCGCGAGTTGCTGGC +GTGGCGGCGCGGCCTGGAAGACTTTTTTCAGTTCGACCCCGGCGCAGTGCCCCGCGCCGA +GCTCAACGCCGTACAGCAGCGCGGGCAGCAGCACCTCGGCGACGAAATCGAGACCTTCGA +GCGGGCCGTTCATGCCTACACCAATGCTCGCTGGGACCGCCACGAGGCCGAAATTTCCCG +GCAACTTGGCGTCGTCGAGCGCGAAATCGAGCAGTACAAAAAGGCGCTGAGCGAACTGCG +GGCCATCAAGGTCTGAGCCTGTGCTGTTTGGCACTTTTTCGCCTTCTTTGCACCCCGTCT +CAACATCGCGGGGGCGAGCACAAGGGCGCGAAATACTCTCTTGACCCCGTGTCGCGCCTG +ATTGGTCCGGCTCGCGGGGCACAAGGTGAACCATGACTGACAAGGCACAGACTTCTCCGG +CGCGCCTGCCCCGCATCATTCAGGGGGGCATGGGCGTGGCCGTTTCCGACTGGCGGCTGG +CGCAGGCGGTGTCGCGCACCGGCGAACTCGGCGTGGTGTCGGGCACCGGCATCGACAACG +TGCTGGTGCGGCGCCTTCAAGACGGCGACCCGCAGGGCCACGTCCGCCGGGCGCTCGCCG +AGTACCCCAACCCGGCCAAAGCGCAGGAATTCATCGCCAAGTATTTCCTCGAGGGAGGGC +GTGCCGAGGGGCAACCCTACGCCCGCGTGCCGCTGCCCAGCCACCGCAACCACCAGCTCG +CCTGGGAACTGTCTATCGCCGGGGCCTTTGTCGAGGTCTGGCTCGCCCGCGAGGGGCACA +GCAATCCGGTGGGGCTCAACCTGCTCACCAAGCTCGAACTGATGACCCTGCCCTCGCTCT +ACGGCGCCATGCTCGCCGGGGTGGACACGGTCATCATGGGCGCGGGCATTCCGCGTGAAG +TGCCCGCCGCGCTCGACAACTTCGCGGCGGGGCGGCCCGGCACCTTCAAACTCAGCGTGA +AGGGCGACCCACAGGGCGACACGCCGGGCGTGACCCTCGACCCCGCCGCTTACGGCTTTG +ACGGAGTGACCACTGGGCGCCCGAAGTTCTACCCCATCGTGACCTCGCACGTGCTCGCCG +GGGCGATGCTGCGCAAAAAGACCGGCGGCATCGAAGGCTTTATCGTCGAAGGCCCCACGG +CGGGCGGCCACAACGCGCCCCCGCGCGGCAACTACGCGCTCGACGACCTCGGCCAGCCGG +TCTACGGCGAGCGTGACGTGTGCGACCTCGGCGAAATGCGCAAATTCGGCGTCCCCTTCT +GGCTGGCGGGCGGCTACGGCAAGCGCGGCGGCCTGCAAGACGCGCTCGCGGAGGGCGCTG +CCGGCATTCAGGTCGGCACGCTGTTCGCCTACGCCCAGGAAGCGGGCATCCGCGACGAGT +TGCAGCGGCGGGTGCTCGGCGAGGTGCAGCAGGGCGACCTCAAGGTCTACACCGACCCGC +TCGCTTCGCCCACCGGCTTTCCCTTCAAGGTGGTGCAGCTCCCCGAAACACTTTCCAACC +CTGAGGTCTACGCCCGCCGGATGCGGATTTGCGACATCGGCTACCTGCGCGAAGCCTACT +GGGACATGGCCCCCGGCAAGGAAGGCAAAGTGGGCTGGCGCTGCGCCGCCGAGCCGGTGG +ACCAGTACGTCGCCAAGGGCGGCAAGGTCGAGGACACCGTGGGCCGCAAGTGCCTGTGCA +ACGCCCTGATGTCCGACGCGGGGCTGCCACAGATTCAGAAAAACGGCGACGTGGAAAAGT +CCCTGCTCACCAGCGGCGACGGCCTGACCGAACTCGGCAGCTGGAAGCCGGGGTATACGG +CGGCGGACGCTATCGAGTTTTTGCGCGGCTGAGGTGCGGCGAAGCAGGCTGGTCTCCTTC +CTGCTGCTCGCCGCTGCCGTAGTGGGAGCAACGCTCAAGGTGGTGAAGGATGACCGAAGC +GCCGTCCGCCTACCCCCTGCCGTGTACGTTCCGCCAAGCGGTCCCTTGATTGAGCCCAAG +GAAAATGCCGGACGTTGCCTTGCCACCGCCAAAATCACGCTGCCCAAGTCCTACAACGCC +TTTGCCAACCACTGGCGTTTTACGGAAGCTAAACAACTGACCGCCGAAACCTGGCAGATT +CGCGGCGAACTGAGCGAGTATTCGGCGACGGGCGTCAAGGTGTTTATTTTTCAGTGTGTG +GATAGCAATGAAGCGACGGGGGTAGAGCTTCACCCGAAGTAAAAATTGGGCAAGTTGGGG +GCCAGGGCTGCGGCGTTGGCCCCTTTTTCCTATTGTCCGATCTCCCCCAAAATCTCCCCA +ATCACCTCTTCCAATCCCAGCGGTCCGGTGTCGATCACGCGGGCGTCGGGCGCGGGGGCA +CTCTGGGCGGCGTCCTTCCTGTCCCTGGCGATGAGGG +>NODE_7_length_39998_cov_63.2738_ID_13 +CTACTTGCTGTAGCAGCAGGACAAAAGGTAATTGATCAAGTTACTAATCATTCACCTTTT +ATGGAAATGATCATTGTTTGGATTGTGGCGACGACATTTCAACAATTTTTACCATCCCTT +TCGACAATGGTTCAAGGAGTTCTAACTGATAAATTAACAGGCTTTATCAATATTAGCTTG +ATGAAAAAATCAGCAGACTTACAATCGATCAGTATTTTTGATGATAGTAAGTATTTTGAT +GATTTACAAATGCTCAGAGATGATGCAAGCTGGCGTCCGGTTAATTTAATTGTTTATGGA +GTATCAGTCTTACAGTCATTTCTAACGCTAGCTTTCATGCTAATATATTTGGCACGATAC +AATTGGTGGCTAGCCTTGCTTTTATTAGTAGTAATGGTACCGCAGAGTCTTTCTTACTAT +CGTATTCAGCAGCAGTCATTTGAAACAATGGTTGAAAGAAGCAAGAATGCGAGATACTTG +CACTACTATAGTGGATTGTTGCTTGATCGCAGGGATGCTAAAGAAGTTAGACTTTTCAAC +ATGTTTCCTAAGATCATCGAAAAGTATACAAGCTTATTTGAACAAACGAAAAAAGACGTT +AACCAAATTCGTAAAAAGCAACTTGCGACTAGTTCACTGTTTGTTGTATTGACTGTCGGA +GTGTTTGGCTATGGCTTTTATTGGTTTACTAATTCAGTAAGAACAGGTGCATTAGAAGTT +GGCGTATTATTGATGTTTGTTTCAGTAATTGGCTATATTTCTACTAGCATGGCTCGGGTA +GTAGAAGACAGCAGCTTGTTATACGATTCATTATTATGGGTTGAAAAATACTTTAAGTTT +CTTGAGTATCAAGATGATTTCAAAAACGGTGGTCAGAACTTTCCTGATGATTTTGATGAT +ATTAATATTAAGAATCTGTCTTTTACATATCCGTTTTCTGATACTGAAATTTTGCACAAT +GTTAGTTTCTCAGTTAAAAGTGGAGAAAAAGTTGCAATTGTTGGAGAAAATGGGTCAGGA +AAATCCACTCTGGTAAAGCTATTAATGCGTTTTTATGACCCGACTAATGGAAAAATTTCT +GTTGATAATTATGATTTAAAAGACTTTAATATCTTCGACTTACATAAAAATTTATCAGCT +ACTTTTCAGGATTTTTCTCGCTTTAAATTAACGCTTAAAGAAAACGTGATTACCGGATAT +TCATTCAATAAAGGTAGGGTAAATAATGTTCTTAAAGCAGCAGGATTGGGTGATTTGCTA +GCTAATGATCATCTTAATCTGAATACGATGCTGGCTAAAGATTTCGAAAATGGAACTGAT +TTGTCAGGTGGTCAATGGCAAAAAATAGCTTTAGCACGAGACTTATATGCTAATGGCAAG +ATTGAATTTTTAGATGAACCAACGGCAGCCTTAGATGCTAAGAGCGAATCGGAAATTTAT +CAACGCTTTTTGAAAGAAAATGATAAAAAGACAATTTTCTTTGTTACTCACCGTTTGTCA +GCAGTTAGATTTGCTGATAAGGTATTATTTCTTGACGGTGGAAAAGTTAGTGGATTTGAC +ACGCATACTAATTTATTGCAAACTAATCCAAAATATAAAGAAATGTACGACTTACAGAAA +AATGCATATCTGTAAAGTTAAATTTTGATAGATAATAAAATTCTTCATGCAAGCAGGCAT +GGAGAATTTTTTTGCCTTTTAACAGTTAAGATTTTTCAGTGATTTTTACGTTATAATAGA +ACATAGGTTTTTGGAAGAAAGGATATTTGGTAACGAATGGCAAAAAAAGATACGACACCA +ATGATGAAGCAATACTACGAAATTAAGGAACAATATCCCGATGCGTTTTTGTTTTATCGT +GTGGGCGATTTCTATGAATTATTTGAAGACGATGCAGTTAAGGGTGCACAAATTCTAGAG +TTGACCCTAACTCATCGTTCAAATAAAACTAAAAATCCAATTCCAATGGCAGGAGTTCCT +CACCTAGCTGTGGATACTTACGTTAATACTTTGGTGGAAAAGGGATATAAAGTTGCGCTT +TGTGAACAACTTGAAGATCCAAAAAAAGCTAAAGGAATGGTAAAGCGTGGAATTATTCAG +TTAATTACGCCAGGTACCATGATGCACGAACGTCCTGACCAAGCAAAAGACAGTAACTAC +TTAACGTCAGTTATTTCAACAAATTCTGGCTTTGGCCTAGCTTACAGCGACTTATCCACT +GGGGAAACTTTTTCCACACACTTAACTGACTTTGAAGGTGTCGCAAATGAATTATTATCC +CTTCAAACAAGAGAAGTTGTGTATAACGGTCATTTAACAGAAGCAAACAAGGACTTTTTA +AAGAAAGCTAATATTACTGTTTCTGAACCGGTTAAAGTAGAGGGCGAACATGCTGAGATT +TCCTATGTAACGCAAAATTTAACTGACGATGCAGAAATTAAGGCTACCAAGCAACTAGTA +GCTTATTTGCTGTCGACGCAAAAGCGTAGTTTGGCTCACTTACAAGTAGCACAAAGTTAT +GAGCCTACGCAATACTTGCAAATGTCGCATACTGTGCAAACGAACTTAGAGTTGATTAAA +TCAGCTAAGACTTCTAAGAAGATGGGATCTTTATTCTGGCTCTTAGATAAAACAAGCACG +GCAATGGGAGGCAGACTTCTTAAATCTTGGATTGAACGTCCGCTTTTATCAGTTACTGAA +ATTACTCGTCGTCAAGAAATGGTTCAAGCGCTTCTTGATGATTACTTCACCAGAGAAAAA +GTTATCGACAGCTTAAAGGGTGTGTATGACTTAGAAAGATTAACTGGTCGAATTGCTTTT +GGTTCAGTTAATGCTCGTGAAATGCTACAACTAGCGCATTCTTTAGGTGCAATTCCTGAT +ATTTTGAATTCCTTGCTTGAAACAAATAATCCACATCTGCAAAACTTTGCTAAGCAAATT +GATCCGTTAAAGGGAATTCATGATTTGATTGTTAACACTATTGTGGATAATCCACCACTT +TTAACGACGGAAGGTGGCTTGATTAGGGAAGGAGTTTCAGACCAATTAGACCGCTACCGT +GATGCCATGAACAACGGTAAAAAGTGGCTATCTGAAATGGAAAGTCACGAGCGTGAAGTA +ACTGGAATTAACAACTTAAAGGTTGGCTACAACAAGGTCTTTGGCTACTATATTGAAGTT +ACGAATTCCAATAAAGATAAGGTGCCAACTGATCGCTACACTAGAAAGCAAACATTAACT +AATGCAGAGCGTTATATCACGCCTGATTTAAAGGAACATGAATCCTTAATTCTAGAAGCG +GAAGCTAAGTCAACAGGCTTAGAGTATGACTTGTTTGTAAAATTAAGAGAAAATGTTAAA +AAGTATATTCCAGCTTTACAAAAATTAGCTAAACAAGTTGCCAGTTTAGACGTTTTGACG +AATTTTGCGACAGTTAGTGAACAAAATAATTATGTTCGTCCAGACTTTACGGTTGATAAG +CAAGAAATTAATGTAGTTAATGGTCGCCACCCCGTTGTTGAACAAGTGATGACGGCTGGT +AGTTATATTCCTAATGACGTGAAGATGGATCAAGATACCGATATTTTCTTAATTACTGGA +CCTAATATGTCTGGTAAGTCTACTTATATGCGTCAAATGGCCTTAATTGCAATCATGGCG +CAGATTGGCTGTTTCGTTCCAGCAGATAGTGCAACGTTGCCAATTTTTGATCAAATCTTT +ACTCGAATTGGTGCAGCAGATGATTTGATTTCTGGTCAGAGTACTTTTATGGTAGAAATG +AGCGAAGCTAATGACGCTTTGCAGCATGCGACAAAACGTTCTTTAGTTTTATTTGATGAA +ATTGGACGAGGTACTGCTACCTATGATGGGATGGCCTTAGCCGGAGCAATCGTGAAGTAT +CTTCATGATAAGGTCGGGGCAAAAACGCTCTTTGCGACTCACTACCACGAATTAACTGAC +TTAGACCAGACTTTAAAGCATTTAAAGAATATTCACGTTGGGGCAACCGAAGAAAATGGC +AAGTTGATTTTCTTACATAAGATCCTACCAGGACCAGCAGATCAATCTTATGGTATTCAC +GTTGCTCAATTGGCAGGCTTACCGCATAAGGTTTTAAGAGAAGCAACTACGATGCTTAAG +CGATTAGAGAAGCAAGGAGCAGGAGAGTTGCAGCCAGCTAGCGAGCAACTTGATCTCTTT +ACTGCTGAAGAAGCGAGTGTACCTGCAATTTCAGATGATGAAAAAGATGTCTTGGATGAT +ATTCAAAATGTATATCTTGCTGACAAAACTCCTTTGCAGGTTATGGAACTTGTAGCTCAA +TGGCAGCAAGAGTTGAAAGATAAGGATTAATAATCATGAGCAAAATTCATGAACTTTCAC +CTGAATTAACTAATCAAATTGCTGCTGGTGAAGTTATTGAACGGCCAGCTAGTGTTGTTA +AAGAATTATGTGAAAATTCCCTTGATGCCGGTAGCAGCCGCATTAGAATTAACTTTATTG +ATGCAGGCTTAAAGCAAATTACGGTGCAAGACAATGGGAGCGGAATTGCAAAAGACCAGC +TTAATTTAGCTTTTACTCGGCATGCAACAAGTAAGATTGCAACAGAGCGAGATTTATTTA +ATATTTCTACTTTAGGCTTTCGCGGAGAGGCTTTAGCGTCAATTGCGGCCGTTTCTCATG +TGGAAGTAATGACTAGTAGCGATAATTTAGGCGGAGTTCGGGCAATTTTTGTAGGTAGTG +AAAAAAAGCTCCAAGAAGATGCTGCTTCACCAAAGGGAACCAAGATTTCAGTTAGTGATT +TATTTTTTAATACGCCAGCTAGGCTGAAATATTTACGCTCTGAAAGAACTGAAACTCTAA +AGATTGTCGATATTGTTAACCGACTTGCTTTAGGACATCCTGATGTTTCTTTTACTCTTA +CTAATAATGGTAAGATCTTATTAAAGACCAACGGACGTAATGATTTAAGACAAGATATTG +CAAATATTTATGGCCGTCAACTTGCAGAAAAGATGGAGGTTTTGAAAGGAAGCAGTCCTG +ATTTTAAGATTACCGGCTTACTTTCTGATCCTAATACAACTCGTTCAAGTCGAAACTTTA +TTTCTTTATTATTGAATGGCCGTTATATAAGAAATTACCGTCTAACTCAGGCGATTATAG +CTGGATATGGCAGTAAATTAAGACCACGCCGTTATCCAATTGCTGTTGTAAACATTGAGC +TAGATCCACTTTTAGTGGATGTTAACGTCCATCCAACTAAACAAGAAGTGCGCTTGTCAA +AGGAGCAGGAATTAGAGCGCCTGTTAACTACAAGTATTTCAGAAGCCTTAGATCAGAATA +GTCAGATTGATTCAGGTTTGAATAATTTATTAACTCCTAAAAAGTCGACAAATATTGACC +AATTAAAATTTAACTTAAATAAAGATGTGGTTGATACTGTTCGTCCAGTCGAATTTACTC +CGCAAGTTGAAGCTGATAAAAGTGCTGAAGTTCATGAAACAGCGGCAAGTTTTGTTAGCT +TAGATAAGGTTCGTAATGACGATAAATATGTGATTACTGCAACTTGGGATGAAAATGTGG +ATAAGCAGGTGCAGCTAAGTCCATTTGATGAGGAAAAAGATCTGCAAGGAAAAGATGATA +GCATTATTTCTTCAGGGGATGAAATCTTAGCTAATAGCCTACCTGAATTAATTTATTTAG +GTCAGACTAAATCATATTTAGTTGCACGTCATGATGAGGACTTATACTTAATTGATCAAG +TAGCAGCTTATCGACGCTTAGCTTATGATCAAATTTTTCGTGATTTAAATAGTGAAAATA +TTTCTCAGCAAGGTTTACTTAGTCCTTTAATTTTAGATTTTAGTAATGTTGACTATTTGA +AGTTAAAAGAAAATCTTAATAATTTACAAGAATTAGGGATATTTTTAGAGGATTTTGGTC +AAAATAGTCTGATCTTAAGAACCTATCCAATTTGGCTCCAACCTGACGTAGAAAAAAATG +TCCGCATGATTTTAGATTTATATTTAAATCAAAATGAACAAGATTTTACCAAGCTTAAAG +CACAAATTGCTGGTGAAATAACGCTGCGTCAAAGTACTAGAAGAAGAAATCTAAATCCAG +TTGAAGCACAAGAATTATTGAAAAATTTGAGAAATAGTAGTGATCCTTATCAAGATTTTG +AGGGAAAAATTATTATTATTCAGTTAAGTGAAAATGATCTCAATAAGATGTTTAAAAAAG +ATGAGTAGGTAAAATGTTTGAATATCTCAAAGGAATAGTAGCAAAAATCGATCCAGCTTA +TGTAGTATTAGATGTAAATGGCATTGGGTATAAGATCCTTTGTCCCACTCCATATAGCTA +TCAAGAAAATCAGCCAGCAACTATCTATGTTGAACAAGTTGTTAGAGATACAGGGATTAC +CTTATATGGCTTTTTATCTTTAGAAGACAAAGAATTATTTTTAAAATTATTAAGCGTTTC +AGGAATTGGACCAAAGTCAGCTGTGGCGATTATGGCGGCTGAGGATACAGATTCGTTAGC +TAGCGCAATTCAGAATGGAGAGGTGAAATATCTAACTCGTTTTCCAGGGGTTGGAAAAAA +GACTGCTTCTCAGATTGTCTTAGACTTAAAGGGAAAGTTAGGCGACTATGTAAAGAAATC +AGCAGTTGCTACCGATCTTACACCGAGCCTGCAAGATGCTTTGCTTGCCTTGGTAGCACT +TGGTTATACCCAAAAAGAAGTTGACCGAATTACTCCAAAGTTAGCAAAATTGCCTGAAAA +TACTGCTGATGGTTATATTAAAGAGGCTTTAGCTTTACTACTTAAGAAGTAAAAGTAAAA +ATTAGGTAAGGATAAGGAAGTGAAAGATGTGAATGATGAAGAACGAATTATAGGCGCTGA +GAGTAATGAAGAGGATGAAACGATTGAATTGTCTTTGCGTCCGCAACTACTTGCTCAATA +TATTGGACAAGATAAAGTAAAGAGCGAGATGAAAATCTATATTAAAGCGGCTAAGCAGCG +AGACGAAGCTTTAGATCATGTTTTGTTATATGGCCCACCTGGTTTAGGTAAAACTACTTT +GGCATTTGTAATCGCTAATGAAATGGGCGTTCACTTAAAGAGCACCTCGGGTCCGGCAAT +TGAAAAGGCGGGCGATTTGGTGGCTCTGCTTTCGGAATTAAATCCAGGGGATGTCTTATT +TATTGATGAGATTCACCGTTTAGCAAAGCCAGTCGAAGAAGTCTTATATTCGGCAATGGA +AGATTTTTATATCGATATTGTGGTTGGAGAAGGACAAACAACTCATGCTGTACACGTGCC +ACTTCCTCCATTTACTTTGATTGGAGCAACTACCAGGGCTGGTCAATTATCTGCGCCGCT +AAGAGATCGTTTTGGAATTATTGAGCACATGCAGTACTATTCTATTGATGATCTAGAAAA +AATAATTCAAAGATCAAGCGTAGTCTTTAACACTAAAATTGATCCAGAAGCTGCTATTGA +ACTAGCTCGACGTTCTCGTGGAACGCCGCGTGTTGCTAACCGTTTACTTAAGCGGGTTCG +CGATTTTGCGGAAGTAAAGGGAGAAGAAGCTATTTCTTTAGCAACTACTAAGCACTCACT +TCATTTGTTAGAAGTTGACGATGAAGGTTTAGATCAAACTGACAGAAAACTTTTAAGAAT +GATGATTGAAAATTATGGTGGCGGTCCGGTCGGGATTAAGACTATTGCAGCTAATGTTGG +TGAAGATACAGATACAATTGAAGAAGTTTATGAACCATATTTACTTCAAAAAGGATTCAT +TACTCGAACTCCTAGAGGGAGAAGCGTTACGCAAAAAGCTTATTTACAATTAGGATATCC +ACCAAAAAAAGCAGAGTAAGTTTTTAGTTGGACCAAAAATACGGTATAATTATTATTTGA +AAGTTAATTAATAAAAATAATGAGGTGTAATACTTTGAATACATTTTTCTTAGCACAAAG +TGCTGCTGGTATGAATAACATGTTCATGATTATTGCAATGATTGCAATTTTTGTCTTCTT +CTACTTCTCCATGATCAAGCCGCAAAAGAAGCAACAACAAGAACGGATGAAGATGATGTC +TGAACTTAAGAAGGGCGATCAAGTAATTATGGTTGATGGCCTTCATGGTAAAGTTGATTC +AATCAATGACGCAGATAAGACTGTTGTGATTGATGCCGATGGAATTTTCTTAACATTTGA +AAGAATGGCTATTCGTCGCGTTCTTCCAACTGCTGCCGCTCCTGCTAAAGATGTTGAAGC +TAATGAAGCAAAAGAAGAAAAAGTTGAAACAGAAGAAAAGCCTACTGAAAGCAGTAAACC +GGAAGAAACAGCTTCTGATAAAACTGATGATACTAATTCTGAAGATAAATAAAAAAACGT +CCGCAAGGACGTTTTTATTTTTTCATTGTCATGGCTTAGTATTGTAAAATATGAATTGGT +AAAGTAAATTTTAATAAGTTTAATAGGTAGGTAGTTAGAATGGAAAATATTCCAGTTGTC +ATGATTATTTTTGGTGGCTCGGGTGATTTAGCTCATAGAAAGCTTTATCCAGCCTTATTC +AACTTATATCAAAAGGGATTAATTCATGATCATTTTGCCGTAATTGGTACAGCTAGAAGA +CCATGGAGTCATGAATACTTACAAGAACAAGTTGTTGAAGCAATTAAAGAAAGCAACAGC +AGCTTTGATGAGAAAGATGCAAAAGAATTTGCTTCGCATTTTTATTACCAATCACATGAT +GTAACTGATGTTGATCACTACATTGCTCTAAAAGAATTAGCTACAAAGCTTGATAAGAAA +TATCACGCAGAAGGCAACCGTATTTTCTACATGGCAATGGCACCAAGATTTTTTGGCACA +ATTGCAACCCATATTAACGATCAAAAGCTAGTTGGGAGTGGTTTTAACCGCTTGGTAATT +GAAAAGCCATTTGGTCATGATCTAGCTTCTGCTGAAAAATTAAATCAAGAAATTAGCGAA +AGTTTTGCTGAAGATTCGGTATATCGAATTGACCATTATCTCGGCAAAGAGATGGTCCAA +AATATTATGCCGCTTAGAATGACTAATCCAATTGTTAATAATATTTGGTGCAAGAAATAC +ATTGCTAATATGCAAGTTACCCTTGCTGAAAGCTTAGGCGTTGGTACTCGTGGCGGATAT +TATGAGACTTCTGGTGCTTTACGCGATATGGTTCAAAATCATATTTTTCAAATTATTACC +CTTTTAGCCATGCCAGAACCTAAAGCCTTAGATTCAGATCATATTCATGAAGCAAAGCAA +GAACTACTCGATAGTTTGGTCATTCCGACTCCAGAAATGGTAAAGCAGCATTTCTCACGT +GGTCAATATTTAGCAAGTGATGATGAAGTGGAGTATTTAAAGGCCGATCAAGTTGCTCCT +GATTCTAAAGTAGAAACTTTTGTTGCTGGCGAAGTTAACTTCAAGAAGGGTCCCGTAGCT +GGTGTTCCAATCTACTTTAGAACCGGTAAGAAGATGAAAGACAAAGTTTCTAGAATCGAT +ATTGTACTCCACCACATGAATAATTTATACGGCAATGCACATTCAAATAATATTTCAATT +ATTATTGATCCAAGAAGCGAGATTTTCTTTACTATTAACGGAAAGAAGATTACTACTGAG +GGCTTAAGAAGAGAAAATCTCAGCTATAAGTTCTCTAAGGAAGAAATGGCTCAAGTCCCA +GATGGCTACGAAAGATTACTTCACGATGTCTTTGTAGCTGATCGTACAAACTTTACGCAC +TGGTCTGAATTAAAGCAATACTGGAAGTTTGTTGATGCGGTTGAAGCTGCATGGCAAGAT +GAAAATAAAGATATTAAGCAACTTGAGCAATATCCAAGTGGTGAATTTGGTACTGAATCG +AGTAACCATATTTTTGAAAAAGATACTGAACACTGGATTTACCGTTAATGGATTTACTGC +CTAAAAATAATACTAAGCGTAAGATTATTCATCTGGATATGGATGCTTTTTATGCGTCAG +TAGAGATTCGTGACAATCCTGCTTTAAGAAATAAGGCTGTTTTAATTGGCGGCGATCCTA +AGAAAAATAGGGGACACGGGGTTGTGGCAACTGCTAATTATGTGGCTCGTCAATATGGTG +CTCATTCAGCTATGCCGACAGCTAAGGCAATTAGGTTAATTCCAGCAGAAAAACTGGTAA +TTATTCAACCACATTTTGAAAAGTATCGGGCTGTTTCAGCTGAAATTCATCGTTTAATGC +ATAAAATGACTGATCGAGTAGAATCGGTTGCCTTAGATGAAGCATACTTAGATGTAACTG +AGAATAAACTTCATCTAGCTGATCCTGTTCGGATTGCGACTATACTTCAAGAGCAAATTT +ATCAAAAGGTTGGATTAACAAGCTCATTCGGCGTTTCTTACAATAAGTTCTTAGCCAAAA +TGGGATCTGAATATGCTAAACCCTTTGGCAGAACCGTAATTCGGCCTGAAACTGCACTTG +ATTTCTTGGCAAAACAAAAAATTGAAAAGTTCCCTGGAATCGGTCCTAAGACGCAAGAAA +GATTAGCAGAGATGGGAGTCTACACTGGAGCTGACTTAATAAAAATTCCAACGGATGTGT +TGATTAAGAAATTTAATCGCATGGGCTATTTGATCGCCCAACATGCACATGGCATCGACT +TACGAGCGGTAGTTACTGATTCAGAAAGAAATCGAAAGTCGATTGGGATTGAGCGAACTT +TTAATCAGAGTTTGTTTGATGAAAATGAAGCTTTAACTAGACTGCGAGCTTATAGCGGTG +AACTTGAGAACCGGTTGAAAAAGCGACATTTTCTTGCTAATTGTGTTGTCTTAAAGATTC +GCGATGCTAATTTTAAGACAATTACTAAAAGAAGAAAGCTAAAACAAGGCACGAATGATA +AAATTGTTATTTACGACACTGGTCGTGTTTTGTTTGAAACAGAAAAAGGGATGCTCACAA +CTGGAGTACGTTTATTAGGTCTTACGGTAACCGATTTTGAAGAACATCCAGTTGAGAATT +TATCGTTAGATATTTTTGAAAACAAGTAATGATAAAATAAATTTAGTAAACAACAAGGAG +AAAAGATTAATGGCAACTTTTGATGAAATTTATGAAAAAATAAAAGAATACCCAACAATT +ATTTTGCACCGTCACACTAGTCCTGACCCAGATGCAATTGGTTCTCAAGCTGGTTTAGCA +AGATCATTGAGATTAGCTTTTCCAGATAAAAAGATTCTTTGTGCTGGTGAAAATGACGAA +GGTGACCTGATCTGGATTAATAAGATGGATGAAGTTACACCTGAAGATTATCAAGGTGCT +TTAGTAATTACTACTGATACTGCTAATACTCCACGTATTTCTAACAAGAATTACGATAAG +GGTGATTTTTTGATCAAGATTGACCACCACCCAGATGTAGATCCATATGCAGATATGAGC +TATGTAGATCCAGACGCACCAGCTGCTTCAGAAATTGTTTTTGACTTTTTGAAAGCAGAG +AACTTGCCAGTTACTAAGGAAGTTGCAGCAGCTTTATATGCAGGTATTGTTGGCGATACA +GGTAGATTTATGTATCCTGAAACTTCAGCTCACACTTTTGACGTTGCAGCTGAATTAACT +AAGACAGGAATTAACATTACTGATATTGCTAGAGAAATTGCGGATGTTACTTTTGATGAA +GCAAAATTGCAAGCTTTAGCAATGGATAAGATGGAAATTAACCCAGTTGGAGCAGCTTAC +ACAATTTTGATGCAAGACGATTTGAAGAAGATGGGCTTAACTGATGACCAAGCAAACGTT +GCTGTTTCAACTCCAGGTCGGATTAAGGACGTGTTAGCTTGGAATGTATTTGTTGAAAAG +CCAGATGGTACTTTCCGCGTTCATTATCGTTCAAAGGGTCCTGTAATTAACCATTTAGCT +GAAAAACATGATGGTGGTGGCCATGCATTAGCATCTGGTGCCAATGCCAAGGATATGGAT +GAAGTAAAACAAGTCTTTGACGAAGTAGTAGAAGTAACAAAGAAGTACAATGAAGAACAT +GGCACAAACAAATAGCATATTTCAAAGTGAAAAGATTAGACCAGAATTGCGAGCTGGTCT +AGAAAAAATTAATTTTAGCAAACCAACTAAGGTTCAAGCTGCAGTCATTCCGGCTCTTTT +AAGTAATAAAAGTGTTGTGGTTCAAGCAGCAACTGGTTCAGGTAAGACTCATGCCTACCT +GCTCCCACTTTTGAACATGATTGATGAAAATGCCCCAGTAACGCAAGCAGTAGTTACCGC +GCCTAGCCGAGAGTTAGCAAATCAATTGTATAAGGTAGCGCGTCAATTAAGGGATGCGAG +TGGTCTAAATATTTCCATTGAATATTTGGGCGGTGGTAATGATCGTAATCGTCAAATTGA +AAAAGCTGAAAATAGAGCTCCACAATTAATTATTGCTACACCAGGTCGTTTACATGACTT +TGCCTCAAAGAAAGTCATTAATTTAGATAACGTCAAAGCCTTCATTATTGATGAAGCAGA +CATGACGCTTGATATGGGCTTTTTAAGTCAAATGGATGAGATCATCTCTAAACTAGATAA +AAAGGTCGTTTTAGGCGCTTTTTCAGCTACTATCCCAGTTAAGCTAGAGAACTTTCTCAG +AAAGTATATGTCAAAGCCAGACTTTATTGTTATTGATAATCCAGCAATTATTGCTCCTAC +AATTCAAAATGACTTAATTGATGTTGGTTCAAGAGATAAAAAAGAAATTTTATATAAGCT +CTTAACGATGGGACAACCATATTTGGCCTTAGTTTTTGCTAATACTAAAAAGACTGTCGA +CGAATTAACTAATTATCTTGAAGAACAAGGGCTTAAGGTGGCTAAGATCCACGGCGGGAT +TACTGAACGTGAGCGCAAGCGTATTATTCGCCAAGTTAGAGAAGGGCAGTACCAATATGT +TGTAGCTAGTGATTTAGCCGCTAGAGGAATTGACGTACCAGGAGTTAGCTTAGTAGTTAA +CTATGAAATTCCTAAGGATCTTGAATTTGTCATTCACCGAATTGGGAGAACTGGTCGAAA +TGGGCTTGAAGGTCATGCAATTACCTTGATCTATGATGAAGAGATGCCTCAAATTGAAGA +CTTAGAAAAGCTAGGTATTCACTTTGACTTTAAAGAAATTAAAAATGGTGAATTAGTTGA +AAGAACACACTACCATCGTCGTGATAATCGCCAAGCTCGGAGTCATAAGCTTGATAATCG +CATGATTGGAATGGTTAAGAAGACTAAGAAAAAAGTAAAGCCTGGTTATAAGAAGAAAAT +TAAGCAAGCTATTCAAAAAGATCGCCAGCAAAAGCGTAAGATCGAAGAGCGTCATCAGAT +TAGAAAAGCTAAACGTAGACGTAAGCGTGAACGCGAACAAGCACGCGGTAATTTTGACAA +CTAGAAAATTTGTAGTAAAATAAATCACGTTTAGGACATATTACTGAAGTATTTTCTTCA +AAGAGAGGGCCAGGCAGGTGAGAAGGTCTAGAAATACTTAGTAATGCTACCTAATTTAAC +TAAAGAATTAAATATTTAAAGAGGTAACAAACACTGTTGCAATCAAGGTGGTACCGCGCT +GGGAGCGTCCTTGTTTTATGCAATAGTGTTTTTTTATTTGTAGGAGACTTTTTATGAAAC +AATTGACTAGTTCACAAGTACGTCAAATGTTCTTGGACTTTTTTAAAGAGCATGGCCACA +TGGTTATGCCAAGTGCATCATTAATTCCACAAGATGATCCAACCTTATTGTGGATTAACT +CTGGTGTTGCTACGATGAAGAAATATTTTGATGGTTCTGTTGTGCCTAAGAATCACCGAA +TTACTTCTTCTCAAAAATCAATTAGAACTAATGATATTGAGAATGTTGGTAAAACTGCAC +GTCACCAAACTTTCTTTGAAATGCTTGGTAACTTCTCAGTTGGGGACTACTTTAAGAAAG +AAGTTATCCCTTGGGCATGGGAATTTTTAACTAGTCCAAAATGGTTAGGCTTAGATCCAG +ATAAGCTTTATGTAACTGTTTACCCTAAGGATACAGAAGCATATCATATGTGGCATGATG +TTGTTGGCTTACCGGAAGATCACATTGTGAAGTTAGAAGACAACTTCTGGGATATTGGTG +AAGGTCCATGTGGTCCTGACTCAGAAATTTTCTATGATCGTGGTCAAGAAAATAACGATG +TTGCGGAAGACGATCCTGAAAACTTCCCAGGTGGGGAAAATGCTCGCTACCTTGAAATTT +GGAACATCGTCTTTTCACAATTCAATCACTTACCAAATGGTAAATATGTTGATCAACCAC +ATAAAAACATTGATACCGGTATGGGATTAGAGCGTGTTGTTTCAATTATTCAAGATGCAC +CAACTAACTTTGAAACTGACTTATTTATGCCAATTATTAAAGAAACTGAAAAGCTAAGTG +ATGGCAAGAAGTATGCAGCAAACAAGGAAGATGACGTAGCATTTAAGATTATTGCTGACC +ACGTTCGTGCTGTAAGTTTTGCGATTGCTGATGGGGCTCTTCCTTCAAACTCAGGTCGTG +GATATGTTTTACGTCGTTTAATTAGACGTGCTGACTTGAATGGTCAACGTTTGGGTATTA +AGGGCGCATTTTTGTACAAATTAGTACCTGTAGTTGGCGAAATTATGAAGAGTCACTACC +CAGAAGTTGTTGATCAACAAGCATTCATTCAAAAAGTAATTAAGAATGAAGAAGAAAGAT +TCCAAATAACGCTTTCATCTGGTTTGAACTTGCTTGATAATATTATTGCTGAAGCTAAGA +AGAGCGATGATAAGACTGTTTCTGGTAAAGATGCGTTTAAGTTATTTGATACTTATGGCT +TCCCATACGAATTAACTTTTGAAGCTGCTCAAGATGCAGGTCTTAAGGTTGATAAGAAGG +GCTTTGATGAAGAAATGAAAGCCCAAAAGGAACGCGCACGTAAGGCTCGTGGTAACTTAC +AATCAATGGGTTCACAAGATGTTACTTTGATGAATATCAAAGACAAGAGTGAATTTGAGT +ACGGTACTTTAGAAGAAAAGCATGCTAAGTTAATCGATATTGTTGTTAATGATAAGTTAG +TTGATAAAGCTGACGGTGAACACGCAACTTTAATTTTTGATAAGACTCCATTTTATGCAG +AACGTGGTGGACAAGTTGCCGATCACGGTGAAATTTTGAATCAAAATGGTGAATTGGTTG +CTCGTGTAACAGATGTACAACATGCACCAAACGATCAAAACCTACACTTTGTTGATATTA +TTTTGCCACTTGAAAAGGGACAAGAATATATCTTGAAAGTTGATCAAAAACGTCGTCGCG +GCTTAAAGCACAACCATACTGCTACTCACTTATTGCATGCTGCTTTACGTGAAGTTTTGG +GTACTCACACTCACCAAGCTGGATCTTTAGTTGAACCAGATTACTTACGTTTTGACTTTA +CTAGCTTAGAGCCAATGACTAAGAAGGAAATTGCTAACGTTGAAAAGATCGTTAACGAAA +AGATTTGGGAAGAAATTCCAGTTAAGACAACAGTTACTGATCCAGATACTGGTTTGAAGA +TGGGTGCTTTAGCCTTATTTGGTGAAAAATATGGTGACACAGTTCGTGTTGTTCAAATTG +ATGACTTCTCAACTGAATTCTGTGGTGGTACCCACTGTGAAAACACTGACCAAATCGGAA +TGCTTAAGATTGTTTCTGAATCTGCTGTTGGTGCTGGTACTCGTAGAATTATTGCTGTTA +CTGGTCCAGAAGCTTACAAATATGTAACAGACCGTGACGAAATTTTGAAAGAAGTTCAAG +ATGAAGTTAAGGCAACTAAGGCTGAAGATGTAACTAACAAGATTTCTTCTCTTGAAGAAG +ACTTACGTGCAAGTCAAAAAGAAGCTGAACAATTAAAGGCACAAATTAACAAGGCTAAAG +CCGGCGACTTGTTTAATGATGTTAAGCAAGTTAAAGGTTTAACTGTAATTGCTGCTCAAG +CTGATGTAGAAGGCATGAATGATTTACGTGAACTTGCTGATAACTGGAAGAGCAGTGATA +AATCTGATGTGTTAGTTTTAGCTGCTGAAGTTAATGGCAAAGCCAACATGGTTATTAGTT +TGAACGATAAGGCAATTAAAGCGGGTCTTAAAGCCGGCGACTTAATTAAGACTGCTGCTC +CAATCTTTGGTGGTGGCGGTGGCGGTCGTCCAAATATGGCACAAGCTGGTGGTAAGAACC +CAGCAGGCTTAAAGGACGCTATTGCTAAAGTGTTACAAGAAGTTGAAGAAAAACAAAATT +AATTGAGTTCGTTCGATATTTCAAGTAAAATTAAGAAGATAGGAGGAATTTTATTATGAG +TTCGCTAGATAAAACGATGCATTTTGACTTTAACCAAAATAAGGGTAAGAATGTATACGA +TACTCTACAAGACGTATACAATGCACTTGAGGAAAAGGGCTACAGCCCAATTAATCAAAT +TGTTGGTTACTTACTCTCAGGCGACCCTGCATATATTCCTCGGCATAATGATGCCCGTAA +TTTAATCTTGAAACATGAACGTGATGAAATTATTGAGGAATTGGTTAAGAGTTATTTAGG +TAAAAATAAATAATGCGATTACTTGGACTAGACGTTGGCTCTAAGACTGTGGGGGTTGCA +ATTAGCGATCCTTTGGGAATCACTGCTCAAGAGCTTGAAACAATCAAAATTGACGAGAGC +AAGTTTAGTTTTGGCATGCGCCAAATCAGAAAGCTTGTCCGTAAATACGACGTCGAAGGT +TTTGTTTTAGGGCTACCCAAAAACATGGATGGCAGTAGTGGACATTCTGTAGAAAGAAGC +AAGCAGTACGGTGAACGCTTGAAAGAGAAGTTTGACTTGCCTGTTCATTATATGGATGAG +CGGCTTACAACCGTACAGGCTGATCGTATTCTGGTTCAAGAAGCCGGCGTACATGATCGT +GTTGAAAGAAAAAAAGTTATTGACCAAATGGCTGCTGTTTTGATTTTACAAAGTTATCTA +GAAGCAACCAGAAAGGATAAGTAAAATGAGTGAAAAAATTAACGCTAACCAAGATAATGA +TCGTCAAATCACTTTAGTTGATGATCAAGGCAATGAAGAACTTTTTGAAATATTGTTCAC +TTTTACGTCTGAAGATTATGGTAAATCTTATGTTTTACTATATCCAGCAGCCGTTAGCGA +CGATGATGATGTCGAAGTACAAGCTTTTAGCTATGATGCAGATGAAGACGGTGATGTAAC +CAGTTCTGACTTACACGAAATCTCTGATGATGACGAGTGGAACATGGTACAAGGCGTTTT +AAATACATTTTTGTCAGACGATCGCTTAAGTGGCGAATAATCTTCAAAAAAGACTGGCGC +AGAAAGGCTAGTCTTTTTTGCTTAATAAGGATGAACAATGATTTCTATTCTCATTTTATT +AATTTTTATTTATTGTTGTTATGTAGGTTATCGACGTGGCATAGTGTATGAAGGCTTAGC +TGCAGGTGGATATGTAGTAGGGTTAATCTTGGCAACATTATTATATAAACCTTTTAGTAA +CTTCTTAAATCTTTGGGTGCCATATCCCTCAGCAAGTGATCGAAGTAGCTTTGCTTTCTT +TGATAAGACTACAGGATTAACATTAGATAAGTCATTTTATGCAGCAATAGCTTTTTCAAT +TGTTTTGGTGCTTGTTTGCTGTATATGGCGTTTGGTGATGTTAGGTTTTAATCAGTTAAG +ATATGTTACTGTAGATGCTAGATTAAATACATGGGGCAGCATTATTATTGCTTTTATTGT +GACCCAAATTAGTGTTTATTTGCTTTTATTTATCTTAGCTACGATTCCTAACAATAGTTT +GCAAAATATGCTGGGGCATTCTATTTTAGCAAGTGGTATCTTGCATTTCTCTCCAGGAAT +TTCGCAAATTTTCATAAAATTATTCATTACAACAATATAAAAGTGGGCCCAGCTACGGGG +CCCTTTTTGCTAGGTGGAAGATATGAACTCTAAAATTATTGAAAAATTAGAATATAATCG +CATAATTAGACAATTAAGTGACTTGGCAATTACTGCACCTGCAAAAGCTCAGGCTCTGAA +ATTAATGCCAAGTAGTGACTTTGATGAAGTAAAAAAATCAATTGATCAAACTAGAGTACT +TTCGAATATCTTGCGAGTTAAGGGGCCAATGCCGATAACTGACTTTAAAGATGTTCGACC +TAGTTTGAAGCGATTGAAAGTTAAGGCTAACTTAAATGGTGAAGAATTAGGTAATATATT +TCTAGTTCTTAGTTTGGCTAAAGATGTAGGACAATTTGCATCGGACCTTGAAGAGCGTGA +AATTGATACACGTCCTATTGAAAAGTACCTCAAAAATTTAGCAGTTCCAGAAGATTTGTT +TAAGAAGTTAAATCAAGCAATTGAATATGATGGAACAGTTAAAGATACAGCTTCTTCTAA +ATTGATGCAGCTTAGACATGATATTCAGAGTAATGAAACTGATATTAAGAACCACATGAA +TGACTATATTAGTGGCAAGCATACGCAATATTTGTCAGAAAATATTGTGACTATCAGAGA +TGGACGTTATGTTTTGCCAGTAAAACAAGAATATAAAAATAAATTTGGTGGAGTTGTTCA +TGATCAAAGTGCTAGTGGTCAAACTTTATTTGTTGAGCCACAAGCAGTCTTGGTACTTAA +TAACCGTCAACAAAATTTGCTGGCTCAAGAACGCCAAGAAATTCACCGTATTTTAATCGA +GTTATCCGAACTTGCTGGTGCTTATCAAAAAGAAATTAATAATAATGCATTAGCATTAAC +TCAATTAGACTTTTTAAGTGCTAAAAGCAAATTAGCTAAGAAGATGAAAGCGACAGAGCC +TGTCTTAAATCAAGATCATATCATTAAGTTGAGAAAGGCACGCCACCCCTTAATTGATCC +TAAAAAGGTAGTACCAAATAATATTGAATTAGGAACTACTTTTGATACTATGCTAATTAC +AGGACCAAACACTGGTGGTAAAACTATTACCTTAAAAACTTTAGGATTGCTACAATTAAT +GGCTCAAGCTGGTTTATTTATCACAGCTGAAGAAGGTAGTCAATTAACGGTCTTTAATGA +AATCTATGCTGATATTGGAGATGAACAATCCATTGAGCAATCTTTGAGTACATTTTCATC +GCATATGGATCAGATTATTAAAATTATGAATAATGTTACTGAAGATGATTTAGTTTTAAT +TGATGAACTTGGTGCTGGAACTGATCCAGAAGAAGGTGCAAGTTTAGCAATTGCAATCTT +AGATGATTTACGCCAAACACAAGCAAAGATTGCGATTACTACACACTATCCAGAATTAAA +GTTATATGGTTATAACCGCAAGCGAACTACTAATGCCTCAATGGAATTTGACTTAAAGAA +ATTGGCACCAACTTATCGCTTGAGAATTGGAATTCCTGGTCAAAGTAATGCTTTTGCGAT +TGCACATCAATTAGGAATGAATGAAGCTGTTGTAGATAAGGCTAGGGACCTAATGAATGA +TGAAGACAGCGACATTAATAAAATGATTGAGCGTTTAACAGAACAAACTAAGGCAGCTGA +ACAACTTCATGAAACTTTGAAGCAAAATGTTGATCAAAGTATCACCCTTAAACGTCAACT +GCAAAATGGGCTTGATTGGTATAACCAACAAGTACAAAAGCAACTCGAGAAGTCTCAGGA +GAAAGCTGATGAAATGCTTGCTAAGAAACGTAAGCAAGCTGAAAAGATTATCAATGATCT +TGAAGAGCAAAGAAGAGCTGGCGGACAAGTTAGAACCAATAAAGTAATTGAAGCTAAAGG +CGCTTTGAATAAACTTGAGCGTGAAAATCAAAACTTGGCTAATAATAAGGTGCTACAGCG +TGAAAAAAAGCGTCATGATGTAAGTGTAGGCGACAATGTTAAAGTCCTGTCATATGGTCA +ACAAGGTGTAATTACGAAGAAGTTAGGCGAGCATGAATTTGAAGTCCAAATTGGTATTTT +GAAGGTGAAGGTAACTGATCGTGATGTTGAAAAGATCGCTGCACAAGCCAGTCAAAAAAA +GCCAGAGAAATCTGTTCGCTCTAGTCGTGGTCTTCGTTCTAGCCGTGCAAGTAGTGAACT +TGATTTAAGAGGGCAGCGTTATGAAGAAGCTTTGACTAACTTAGATCGCTATCTTGATGC +TTCACTTTTGGCTGGGTTGAATACAGTAACCATCATTCATGGTATTGGTACAGGCGCAAT +TAGAAATGGTGTTCAACAATATTTAAAGCGTAATCGACATGTAAAGAGTTATAATTATGC +TCCAGCTAATCAAGGTGGAACAGGGGCTACAATAGTTAATTTGCAATAAGCAATATACTT +GCAAAAAGTAAGTAATTATACTAAACTATACTTATAATTTAAAGAATTTATTTTGTGAGG +TAATAACTATGGTTGATGAAATTACAGATGCAACTTTTGAAGATGAAACTAGCGAAGGTG +TTGTTTTAACTGATTTCTGGGCAACTTGGTGTGGTCCATGTAAAATGCAATCACCAGTAA +TTGACCAACTTTCAGAAGAAATGGACGATATAAAGTTCACTAAGATGGATGTTGACCAAA +ATCAAGAAACAGCTCGAAACTTAGGAATTATGGCAATTCCAACTTTACTGATTAAAAAAG +ATGGTAAAATTGTTGACCGCTTAACTGGTTATACTCCTAAGGAAAAGCTTGAACAAATCT +TAGATCAATACACTGATTAATTTATAGTCAAAAAGGGTAAACCGCGATTGCGATTTACCC +TTTTTGATATTAGTCTTTTTGTATTCATTAAGTTGATCAATCAGACTGAGCTGTAATTTG +GACGACATGCTTTTTAGTAATCGTAGCTGAGGCCTCAGTAGGAGTGCCATTTTGGCGTTC +TAAACATTCTGCAATAATTCCACTTTCCAATGAAAATTCATCACTTTGGCTATCTAGTCG +ATCAGTAACAACTTGTCCTGAAAGTTCGAAAACTGCAGTGTGTTTACGATCCTTGAGTAC +TTTTAAAGTTCCAAATTGAGCCATGTCAAAAAATTCGATCAAGTCATCGATGTCTGATAA +GTCATATTTGCGACTGACGTGTTTACCGGCCCAGTAAAGAATTTCTTTATCTTCGCTACC +TAAGATAGTAGGTAAGATAAAGTCACGGTATAATGAATTTAAAAAATAAAGATGTTCGTT +TGTGTGTTCCATGCTTTCATTTCCTTTTGTCTTCTATAACATTTTAACTTAAAATAGGTG +GAGGAGAAAAGAGATTTTAATCATCATTTGTTTAAAGGAGTTTTTTTAATGGATAATCGA +CCAATTGGAGTTTTAGATTCAGGCTTAGGTGGCTTAACTGTTTTAAAAAAAGTAATTGAG +AAGATGCCTAACGAATCAACTATTTTTATTGGTGACCAGGCTAATATGCCCTATGGAGAT +CGCTCAAAAGAAGAGATTATTTCTTTAACTCGCGACAGTGTAAACTTCTTATTAAGTAAA +GATGTAAAGATTATTATTTTTGGTTGCAATACTGCCACTGCGGTTGCAATGTCTACTATT +AAAAAAGAAATTCCTTTGCAAATAATTGGAGTAGTTCAATCTGGTGCCTTAGCTGCAGCT +AGAACCACAGAAACAAAGAATGTTGCGGTAATTGGTACTAAGGCAACTGTTAATAGTCAT +TCTTATTTAAAAGAAATTCAATATCGTGATCCAAAAATTCAAGTAAGCGAATTTGCACAA +CCTAAATTAGCGCCTCTTGCTGAGGAGGATCCTGCTGAAGAGATTAAGCAGGCTGTGGTC +AGCGAAAGCTTAGCGCCGTTGAAAAAGGCTGATTACGATACCCTTGTTTTAGGATGCACC +CATTATCCATTATTAAGAAAGGAAATTGTTGCAGTGGTTGGTCAAGATAGAAAGATTGTT +GACCCAGCTGATCAGGTGGCACAATATACCTATAATGTTTTACGACGTGATGGTTTATTT +GCGGCTGGTAATTCTGATACAAAACACGAATACTATACGACAGGTGAAGCTAAGAAGTTT +ACTGAAATAACACGTCAATGGATGAATGATGAAACAATTGTTGGTCATCATGTAGATGCT +GAGGATTAAAAATGGATACTTTATTATTTGCAACTAACAATAAAAACAAGGCTAAGGAAG +TTGAAGAAGCCTTAAAAAAGAATAACTTTCCGATTCATGTGATTACTAATCAAGATTTGA +CTGACCCACCACATGTGTTAGAAACTGGTACCACATTTTTAGCTAATGCCAAGCTCAAGG +CTCACCAGATGGCAGAATTTAGCAATCTACCGACTTTAGCTGATGATTCAGGATTATCGG +TCGATAAATTAAATGGTGCTCCTGGCGTTTATTCTGCGCGGTATGGCGGGGAAGCTCATA +ATGATGCTCTGAACAATGCTAAATTATTAGCAGAATTAGGCGGGGTGCCAAAAGAAGCGA +GAAAGGCTACTTTTCATACAACGATGGTTGTTTCTTGGCCAGGTAAATTTGAGGATGATC +TAGTAACTGAAGGCGAAATTCGTGGTGAAATTTTAACTTATCCACAAGGTGAAGGCAATT +TTGGTTATGATCCGCTTTTCTTTGTTTCTGATAAAGGAAAAACATTTGCTGAAATGACGG +TGGATGAAAAGAATGCAATTTCTCACCGAGGTCAAGCTTTGAGAAAATTACTTGCGGAAC +TACCGGCCTGGTGGAAAAAGATGGAAAACAAATAAATTTCGAAGAACTTATTCTGATTAG +GGGATAAGTTCTTTTTGTTTTTTCTGCTTTTAATTATAAGTATGAAAGCGCTATACTAAA +ATTGTGAGTTTACTTAATAAATGTAAGTTTAATTGAAATTGAGGTTATAGCGTATGAAAA +AAGTTTTAGCAATTAATTCAGGAAGCTCGTCCTTTAAATATAAATTATTTTCTTTTCCTG +ATGAAAAAGTTATCGCTTCAGGGATGGCTGATCGAGTTGGGATGGAAAATGCTGTCTTTA +AAATTAAGTTGAGTAATGGGCGAGAATATATCAAAAATATGCCGATTCATGATCAAGAAG +AGGCAGTGAAGCTTTTAATTGAGGATTTGAAAAAATTTCATGTAGTTGAAGATCTGAGAG +AAATTACTGGAATTGGTCACCGAATTGTAAATGGCGGTGAAATATTTAAAGAATCAGTTC +GTGTTGGTGACAAAGAGTTACAAGAAATATTTGATTTAGGTGAACTTGCGCCTTTGCACA +ACATTCCTGAGGCAAATGGAATTAAGGCCTTTATGAATATTGTTCCTAATGTTCCACAAG +TAGCAGTTTTTGATACTTCATATCATCAAACTTTAGATCCAATTCACTATCTATATTCAA +TTCCTTATGAATATTATGAAGATTATGGAATTAGAAAATATGGTGCGCATGGAATTTCTA +TTTCTTATGTTGCACCGCGTGCAGCCAAGATGCTTAAGAAAAATCCTAACTTAGTTAACT +TGATTGTTTGTCACTTAGGATCAGGTGCTTCAGTCACTGCGGTTAAACGCGGAAAGTCTT +ATGATACTTCAATGGGAATTAGTCCGTTAACTGGTGTAACAATGGGGACGAGAAGCGGAG +ATTTTGATCCTTCAGCACTTCAGCGTTTAATGCATAAGACCGGAATGAATATTGATGAAG +CAATTGATGTTTTGAATTATAAGTCAGGTTTATTAGGTATTTCAGGTGTATCTTCTGATA +TGCGTGACTTAATTGAAAGTAAAGATAAGCGTGCCAAGTTAGCACGTAAGATATTCATTA +ACCGTGTTGTTCGCTATGTTGGTGCTTATGCAGCAGAGTTAGGTAGAATTGATGCAGTTG +TCTTTACTGCAGGAGTAGGTGAACATGATCCTGGTATTAGAGCTGGTATTATGTCCTCAT +TAAGATATTTGGGGTTAGAACCAGATTTTAAGGCTAATCGAACAGATGGTGAAAAGTTTA +TTTCTAAACCTAAGTCTAGAGTAAAGGCAATTATTGTCCCAACTAATGAAGAATTAATGA +TTGCTCGCGAAGTAGTTCGCGTTGTGCAATAAAAAAGGATTAAGCAATAGATAAAAGTCT +GGTTGCTTAATCCTTTAAGATGCAGAGCTATTTTCTTGTTTTGTTGGCATGGGACTGAAT +TTAAGCCCGCGTTGATTGAATTCTTTGATATAAGCACTGAGATAGGCATTTTTAATCGTA +CTTTGGCTATTTGCAATTACTTGAAAATGAATTTGATAGGTAATACTTGAGCCGGTTTGA +TTAATGATTCCAATAATTTGAGGGCCCTTTTTGATTTGTTTTTTAAACTTATCTTTTAAA +GCGGCGTTTACCGTATCAACGGCATGATTAATTTCAGTAAGATTATTTTCTACTGAAAGA +TTTAAATTAATATCCATGCTCCAGCCAGAGTGTGAACCATGACTCAAATTTTCAACTACG +GTAATGTTGCGATTAGGAATATAGATTACAGCGCCATTAGTAGTTTTAAGGCGGGTATTG +CGCAAGGTAAAGGAGAGAACAGTTCCGGTGTAGTTGCCAATTTTGACAGTATCGCCGATA +TTGTATTCACCTTCGCTTAAGATGTTCATCCCAGTTATGACATCGCTAACTAAGCCTTGT +GCGCCCATCCCTAAAGCTAGAGATACTAATCCTACACTAGCTAATAGCGTTCCGACTGGA +ATTCCCATTAACGTTAAAATACTATATAAATAAAATAAGAGAATAGTATATTGAAAAAGA +GCAATTCCTAGTTGAGCTAAGGTTCTTTTTCTTCCAGTCATGCGTTCCTGAAATTTAGGA +TTTTTTAATAGGTATTTTGTTAATAAGCGCTTTCCTATATGCCAAATTAAAAAGAAAATA +ATTGTAGTAATAATAATTTGGATGCCACGGCTTAACAAAGTGTGCAGTACATTGTCCCAG +TTAATTAGGTAATCGATATTATTAGTTTTTTTAGTTGTAGAAGTTCCTTTTAGAAGAGTA +GACCAGTTCAACTCATGCCCTCCTTATCTAGTTTCTTTAAAAATAGTATATCAAAAACTT +GGGTACTATTTCATTGCTTGAGGGGAGGAATAATGCTAGACTTTTTTATGAGAGGAATAA +TTAATGGAGGATAGATTATGTCAATTTCATATGGTGCATTAGCTGGCTTAATTGCAGCAG +TTGCCTTTTTAATCTTGGTCTTATTTACTTTACCTTTGATTGTTAGAGTTACTAAGACAA +TGAAAAAAGTTGACTCAACTATGGACAGCGTTAATACGGCTGTTGATGATTTAACGAAAC +AAACTTCTGTATTAATGAAGCAAAGTGAAGATTTATTAGAAAAAAGTAATGCTCTTTTAG +CTGACGTAAATGGAAAAGTAACAGAATTAGAACCAGTTGTAAAGGCAGCAGCAGATTTAG +GTGAAAGCGTGTCAGATATCAATTCTTCATCAAGAAGAATGGTTGAACGCTTCTCTGGTA +TGGGTATTAGAGGAGCTGGAATTGGCATCTTTTCATCTCTAGTTAGTCGCATGTTTGCAA +GACGTAAACGTCGTCGCGGTGAAGACTAATATTTATTAAAGGAGAACAATTATGCGTAAA +CTTGGTGGATTTTTATTAGGTAGCGTTGTTGGTTTAGGTGTTGGCTTAATTGCTGGCTCA +TTATTGTTGCCAGAAGACGCAACTGACGATGTAAAGAAAAAACTTGCTGAAAATGAAAAA +TTGCAAGATTTGAAAGAAAAGTATGATAAAGGAACTGAAGCTATTAAGAATCAATTAGCT +TCTTTCCCTAAGTCAGTTGAAGATGATTCCGAATTAAGAGATTTTGATGATATCGTAATT +GATGACACTAACAAAGATCTTGGCGAAGATGAAAAATCTGACAAGGATGCTGTTAGCGAT +TTAAACAATGCTGAAGATACTGAAAATAACTAAAATTAAATATTTATAAAGAGAATCAAG +TTCTACTTGGTTCTTTTTTTGCAATAAAAAACCTGTTGATAAAATTTATCAACAGGTAAT +TTTAGGCCTAGTCTTTAACTGGAATATATTTTAATTCCTTGCTTGTCTTAGTGAAGGTTT +CAAAACCGTTTTTAGTGACTACACCACAATCTTCAATTCTAACTCCAGCTACGTTAGGGA +TATAAATACCAGGTTCAATTGAGAAGCACATGCCTTCCTCGAGAACTACGTCGTTTCCTT +CCATGATTTGTGGAAATTCGTGAACGTTCATTCCAATACCGTGGCCAAGACGATGAATGA +AGTATTCACCATAACCAGCCTTAGTGATAATATTTCTAGCAACAGCATCTAATTCGCTAG +CAGTAATGCCTGGTTTAGCTGCTTCAATAGCTGCTTGTTGCGCCTCACGGTCGACTTCAT +AAATTTCTTTTTCTTTAGCAGTTGGTTCGCCGTAAGCGACAGTTCTACTTGAGTCAGAAG +CATAGCCGTTGTGCATGGTTCCAAGATCGAATAAAACTAATTCGTTTGGTTCAATTTTGT +TCATTGTTGGACCAAGGTGAGGGTTAGCAGCATTTGTACCAGCTTGGACAATTGTCTCAA +AACTAGTGTGCATTACACCTTTTTGAAGCTTTAGTTGGTAATCAATTTGTCCAGCAACAT +AACGCTCAGTTACGCCGTTTCTTAGGGCATTAAAACCAATTTGAAAGGCAAAATCAGCTT +CTGCACCAGCAGCCTTTAATTGTTCCACCTCTTCTGGAGTTTTACGTAAACGTGCTTCAG +CTACGAATGGAGAAACATTATTGGTAAATGAAGAATCAGGAAAGGCAATGCGTAATTGTT +CTAAACGGTCAACTGAAAGATGCGATTTTTCGATTGCAATATTATTTGGTTTACCAACAC +GTTGATTTACTAAATCAGCAATTTTTGCCCATGGGTTTTCATGGTCAAGATAGCCATAAA +CATCGCCATCCCAAGCAGAGTTTTTAGCTTCTTCAACATTTAATTCAGGTGCAAAAATAA +ATGCAGGCTTGTCTTTAAAAGCAATTAAAGCAAAAATTCTCTCATGTGGATCCATGCTAT +AGCCGGTGAAATAATTAATACTGATTGGATCAGAAATATAAGCAACATCATTATTGGTTG +AAATTAACCAATTTTGTAACTTGTCTAAGTTCATAATTTTGGCTCCTTTTTTAATATATA +CTGGGAGTATACCATATAATATGGTAAAATAATGAGTTTTTAGTCGTAAACGGTTGCAAT +GTGTGAATATTCTTGATAAGTTATAAAAGAGTGTTTTTACAGGAGTGTGAGAAATTTATG +CAAAAGCAAGAAGTAACAATTTATGATGTTGCTCGTGAAGCAAAAGTTTCTATGGCTACT +GTTTCAAGAGTAGTAAATGGAAATAATAATGTACGTAAAGAAACACGTGAAAAAGTATTA +GCTGTCATTGATCGTTTGCATTATCAACCTAATGCAGTTGCTCAAGGATTAGCTTCAAAG +AGAACTACAACAGTTGGATTGATTGTCCCTGATTTGACAAATATGCACTTTGCTGAATTA +TCTAAGGGAATTGACGATATTGCTACAATGTATAAGTACAATATCTTACTTTCAAGTGTC +GGAAATACCTTGTTAAATGAAGATCAAGTTATCCAAAATTTACTTAATAAGCAAGTTGAT +GGTGTAATTTACATGTCAAACTTGATGAATGAAAAGGCACAAGAAATTTTCAATCGCACT +AACACACCAGTTGTTTTAGCTGGTACTGCTGATGCTAACCAAGAATTTTCTAGTGTAACT +ATTGACTATAAGGCTGTTGAAAAGGAAGCTTTAGGTCTTCTATTAAAAAATGGTAAGAAG +AACTTGGCCTTAGTCGTAGGTGACGGAAAAGCTTATGTTAACAAGGACAACCGTTTCGTT +GCCTATAAGGATTTCATGGAAGAACATAATTTAAAGAACGTTCACATCTATGAAGATGCC +AAGACTTATGAAGATGGCTATGAATTATTTAAGCAAATCAAAGCTGATAATGTAGATGGA +ATTATTGCTACTCGTGATGTAACAGCTGCAGGTATCGTTAATGCTGCTGCTGATGCTGGT +GTAAAGATCCCAGAAGATTTAGAAATTATCTCCGCTGCTTCAACTAATGTCGCTAAAATT +GTTCGTCCTCAATTAACAACTGTTCAACAACCGCTTTATGATATTGGTGCTGTTGCAATG +AGAATGTTAACTAAGTTGATGAATGATGAAGAATTAGACGATTCACATGTTATCTTGCCA +TATACTTTGAAGAAGTCTCGTTCTACTAAGTAGATCTAACTATAGAAAATCCCTTTAGGT +AATTTACCTAAAGGGATTTTTTAATATATAATTTTAATTTTTCTACGGAGCAGTCGGACC +ACTATCAGTGGCTGTATTGCCTTCACTAGTTTCGCTGCCAGTATTTGTATTAGCAGCTGA +ACCTTCATTAGAACTAATATTTGAATTTACATCATTTGCGGCATTGTTGCTCTCGGAATT +ACTATTTTCAGTTGAAGAAGATGATTCACTTGTGGAACTTGCACGACTCCGTCTGGTAGA +AGTAGATGAAGAAGAACCATTACCATTTCCGCCTTGACCAGTACTTCGAGCTTGCTGATC +ATTAGCTCCGACAGTCCGATTAGTCGAAGATGAGGAATTATTATTTGATTCACGATATAC +TTCGTGGTTTTTACCATAGTAATTTTTAGCATCTTCTGAAGTTGAGACATCAGCTACAGC +AAACAGACTGTTGATATTTTTCTTCTTAGAAATTGTCTTACCAGTGTAGATTAAGACTTT +ACCATAATCATTATTCTTGCCTTCTAAGTAGTCGTAGAAGAGGTTGTAGTCTTTAGTTGA +TCCACCGATACCAAATCTTGCAGTAGCAGCTGGAGCAGGGGATAAACTTAATGCGGTAGT +CTTTTTACCAGAAAGTTCAATATTATCGCCATCATAGCTTACAGTTCCTGGCAATGTTCC +AGTTCTTGCTAAGACTTTATTTTTATAAACAGAGCTTGGACGACTTGGTGCATCATTTAA +CTTAAAGATTGACGGATCTTCTTCGTAAAGTGCATTGGCGATATTTGCCCAAAGGTTTAA +GTTGGTTTCACTGGAATTAGAATCTAGGTTATATGAGTGACCATAGAAGTTGTCATACCC +CATCCAACTAGAAATTGTGATTCCAGGAGTAGACCCATTGAACCAAATATCACGATAATC +ATTACTGGTTCCAGTTTTACCGATTAAGTTCTTGTAGTTAAACTTCAAAGTACCAGTTAA +GCTTGAAGCAGTACCCTTAGTTACTACTTGGTGAAGCATCTTTTGCATGATGTAAGAGGT +GCCGGTAGAGAAGACTTTTTGTGGATTTTGTTTATGCTTATAAATTACTCTACCTGATGG +ATCTTGAATTTCTTCAATGTAGTAAGGATCAGCTCGCTTACCATTGTTATAGAAGTTAGA +GAAGGCACTAGCGTTATCAGCAACTGAAAAACCATAATCTGTACCACCGAGGGCCAAACC +TAGGTTTTCAAATTCGCTTTTACTCAAGTTTAAGCCTAGTTTTTTCATATCCTGACGTAA +ATTAATGTTCTTATCGTTGACTAATTTATTGTATAAGTTTACAGCAGGTAAGTTATAAGA +TTTACTTAGAGCTTCTTGAGCTAAGATAAACCGGTTTTCAACAGTGGAATTGTAGTCAGT +TGGAATATAGTTGCCAAATCTAGTAGGGAAGTCTGCTAATGCTGTTTGACTAGAAATAAA +CCTGTGTTCAATTGCTGGACCATAAACTAGATATGGCTTAATAGATGAACCAGGTGAACG +ATAAGTATCAAAGGCGTGATTAATTTGAGAGTTCTTAAAGTCAACTCCGCCGCTAAAAGC +TAATACCTTGCCAGTTGCATTATCAATTACAACACTACCGTTTTCCACATGCTCGGTAGT +ATTAACCCACTTGTTAGTAGATGAATCAAAATCACGAGAAGTCTTATCTTGACCATATTT +ATTTTGCTTAACTATGCGTTGCATAGTTTGATAAAGTGGCTGTCTAATCGTGGTCTTAAT +ATGATAGCCCTTTTGATGAAGCAGTTCAGTTGCACTAGTTAAGTATTGATTATAACGATT +AGTATCTTGTTTAACATCTGATACCTTTAAACCATCTTGCTCAATTAATTTTTCTGCTAA +AAGACTAGTACTCTTATTCATTACTAAATTATATAAGTAGCCACTTTGTTTCTTTTGTTT +AGATGCTTTTTCAGGAGCAAGGAAGTCAGCACGTAAATCATATTTTTTAGCTGCAAGATA +GTCTTTTTTACTGATATCGCCGTTTCGATACATCCGAAATAGAACAATATCTTTTCTACG +CATCGCTAAATCAAGGTCTTTTTTAATTTTTCCGTTTAAACGATATGGGGTATAAACAGA +TGGACTTTGAGGTAAACCAGCAATAAAGGCAGCTTGAGGAAGGTTAAGTTCAGAAATAGA +CTTACCAAATATCCCTTGAGCTGCAGTTTTAATACCTACGATGTTTTCTCCACGGTTGTT +TTTACCATAAGGAGCAGCATTTAGATATGAACGGAGAATATCTTCTTTTGAGAAATGCTT +TTCAATCTTATGAGCATAAAACATTTCGGTAACTTTTCTTCTCCAAGTGGTCTGACTAGT +TAAAAACTGCATTTTAACTAATTGCTGAGTTAAAGTCGATCCACCAGTTTGTACACCAAC +ACCAGTTAGCTCAGAAAGAACAGCTCTAACTAGGGATTTAGGTAAAACCCCTTTATGAAT +ATAGAAATTTTCATCTTCAGTAGCTGTCACAGCATTCTTAACTAAAGGAGTTAGTTCGCT +TTCACTTGCCTTTTTAGTGACGGTATCTGGACGAACTGTAGCAATTTTTTTATTGCCAGC +ATAGTAAAGTGTGGCTGAATTTTGTGCATGGTTTATTTGCTGATTTAATTCGCTAATTGT +TGGAATGGGCTGCTGGCGAACGATGCCAAGAGCATAACCACCACCAAAACCGATTAGGAG +AAACAAACCAAAAACTGCAATTAAGATCAAATAATGAAAAACACGACGTAAAGTTAAATA +AACGATGCCACTGTAAAATTTCCATTGGCTTTCGTCTGATTTGCTTTGAAGTGTTTTAAC +TTCTGGCCCCGCAGTTAGGAACTCTATGATTTTTTCTTTTAAATTTTTCACAGATCAGGA +TCCTTTGTTAGTTACTTTCCCCCTATTATAGCAAAAAAAGGACTAGCAATGACGCTGTCC +TTTTTTACGAATTTATTAATATTTTGATTTATTTAGTTAATTCATAGATTGCTTCGGCAT +AAATAGCAATTGAATTAATTAAATCGTCAACCTTCATGTATTCATTTGGTTGGTGCATAA +CAAGTGGAGCACCTTCTGGTTGAGCACCAAAGGCTACACCATGTTTGAATAAACGGCCAT +ATGTTCCGCCACCAATGATAACTTCGTGACCTTTCTTACCAGTTTGCTTTTCATAAACTG +AAAGCAGGGTCTTAACAATTGGATCGTCTCCAGGAACGTAATGAGGAGCTTCTGCTGAAC +CATCAATTCTGGCGTCTAAGATATCGCCGAATTTTTCGTTAATATTTTTAACCATTTCTT +CTGGCTCAATACCTTGTGGGTAGCGAACATTGTTTAATAGGTATGCGTTTTGACCTTCAA +AGTTAAACATACTTGGTGAACTAGTTAGGTCGCCCATTAAGTCGTCATGATGGAAAATAC +CTAATTTCTTACCGTTAAAGTCTTTGTGTTCAACAGTTGCAAGGAAGTGTAAGAAATTCT +TCGCTTGACCGTCAAAGTTAAGGCTATCAAGGAAGAGAGCAAGGTAAGTAGCAGCATTTC +TACCAGTTTCAGGAGCAGAAGCATGAGCACCATGACCGGTTAAAGTTAATGAAAGACGGC +CGCTAAGCATTTCTGCCTTTCCTTCAAGCTTATGCTCTTTCAGGAATAAGTTGAACTTTT +CTTTGATGCCATCGATATCGCCTTCAAGTTGAGCATGAGCTGTTTGAGGGATAACATTAG +TTGCAATTCCTGATTGGAATGTCCAAAGTTTAACATCGCCTTGGCTTGGGTCATCTTTAA +AGTCAAGTTTTAAGGTAACAATGCCTTGTTCACCATTGATAATTGGAAATTCTGCGTCAG +GAGAGAAAGCAACATCTGGAGCTGGTTGATGCTTTAAGTAATAGTCAATTCCAACCCAGT +TAGTTTCTTCATTAGTTCCAACTACGAAGTCAATTTTCTTTTTAGGTTTAAAGCCATGCT +CTTTTAAAATCAGCATGCCATAGTAAGCTGCTAAGGATGGGCCTTTATCATCAGCACTAC +CACGCCCATAAATTTTGCCATCTTTAATAGTCATCTTAAATGGATCAGTTTTCCAGCCTT +CGCCAGCAGGAACGACATCCATGTGGCCGATAATACCAACGCGTTTGTCGCCAGAACCCA +TGTTGATTCGACCAGCATAGTTATCAAAGTTTTCAGTATCAAAGCCGTCACGTTTAGCAA +ATGATAAAAACTTCTTCATAGCCTTCACTGGGCCAGGACCTACAGGATATTCTTTGCTAG +TATTGTTTAAATCTTCAGAAGAATCAATTGCAATCAATTCTTTTAAATCATTTAAAATAG +CATCTTTCTTTTCTTGTGCTAATTTTTTATAATTTAGTTCTTCCATAACGTACTCCTTTA +ATTTTTATCTCTGATTAAATATTACAGCATTTACATCTGAATAAGTCGACCAAAAGCTTT +TATTTGGGACTTTTTCAACTGGTAAATAAGGAAATAGGGCAACGTTTGTTTGACGAAAAG +CAGGATCGAGTTTTCGATAATTCTTATAGGCAAATGTGTTTTTATTAAATAAGCCATAAT +ATAAGACTTTAGACGACTTAACCCAGCCATCATCAGTAAGAAGCCATAAATCTTTAACTC +CGCCACTTACACCTAGTATTTTATAGTGTTTTCTTGGCATAAGTTGATTCTTTTTACCAG +ATTGCTTATCAGGTTTATTATAAGTGAAAATTGGATTTTGAGTAGGGTTAATAATACTTA +AATTCCAATATGGTAAAATCCATTCAGGTTGTAGGTCCAAACTAACAAGGTCAGCGGGAA +CAAATTCCTTATCGCCAATTTGGTAAAAAGTCTGTCCTGAAGCAATAACAGCTGAAGAAA +CATGAACTGGGTCTAAAAAATTAAGTTCTTTTGATGTTGTCACCTTTAGATATGGATTCT +CACTAATTGTAGTAGGTACACGGCCAAAGTAAATTCCGTCCTTGTTAACGACATATTTTT +TAGTGCCAGTTTTAGCTAATTTGTATTTATAGCCTCTTGCGGGGAAATTAGTGACAACTC +CATCAACATCATTGTTGATTAACCAATTCCATAACTTAGGCGACTCATTCATTTCGGCCC +AAACAAATACCTTTTGATGTAGCGCGTGAAGTTGACTAATTAGCTTGGGATTTTGGGTAA +CTAGATCAGATGAGATATTGATTCCGTTTACGTAACTTAGTACTTCAAAATTAATTCTTT +GCAAAGAACCAACAATAAAGATTCTTGGAACATCTGGTAACAGTTGACTCATAGTTTTTA +AACTTGGTGCAGAAAAGCTGTGAATCATTACTCGATCTTGCATATGGTATTTCTTGATAC +TGGCAGCAAGGAGTTCTTCCATATTTTTAGGACTATTATGCTTAGTTTTTTTTGTTTCAA +GCAAAAATTTAGTGTTAGGTTTATCTTTGTAGTAATCAAATAATTGATCTAAAGAAATAA +TTGATTCACCATTAGCTTGTTTTAAGGTGTTTAAATAGGAAAAATTATTTTGTGATACAA +TAACCGATGATCCAACAACTCTGCTTAGATCGCGGTCATGAGAAACAACTAAGACGTTAT +CTTTGGAAACATGCAGGTCGAGCTCTACATAGTCCGCGCCATCTGCAAAAGCTGAGTTAT +CACTTTGAATTGTTTCTTCTGGATATTTACTGGGATTACCGCGGTGACCAATGACTAGGA +ATCCCGATTCAAAAATAAAAATCAAACTAAAAAAAAGGGCTAGGAAATAAAGCCTGCTTG +TTTTCATGTGTACACCTCTATTCTTTTTAACAATAGCATGGATGTATACTTTTTTCTATA +ATTAATATAGTAGTTAATAATCATTTTTAAGAAAGTAGATGCTGCAATATGGCTTATGAG +CAACTAGATTTAATCGAAGAAGTTACTCGCAATGATGGATCTAAATATTATGAAATTTCA +AATATTGATCAAAATGGTATCGCTGAATTAGCCGTAGATCGTGGATTGATAAAGAAGGTG +CGGATACTTCAGCTAAATTTGGCTAGAACAAAAGCACTTCAATTATATGAAGAATATATT +AATAAAACGTACCAACTAGAGACTTTAACCAATGAGGATGATTGGAAAGATCCGCAGTGG +GTTGAGTGGGAAAAGCCTAAAGGAAAAGTTTTAGACGCCTTTAATACAGTATTAAAAGCC +AATCATATTGGTTAGAAGAGAGTAAAGTATGGAAAAATTACGTATTGTTCATACCAATGA +CCTACATTCTCACTTTGAACAATTTCCTAAAATAAAAAGATATTTACATCAAGCACAAAA +TGACAAAAGTGTTGATCAAACTTTTACCTTCGATGCTGGAGACTTTATGGATCGTTCTCA +TCCCTTATCTGATGCAACTGAGGGCCAAGCTAATATTAAGCTGATGAATGAATTTAACTA +TGATGCGATCACAATTGGTAATAATGAAGGAATCTCTAATTCACATGCAGTTTTAGAAAA +ATTATTTGATCATGCGAATTTTCCAGTTATTTTAGCTAACTTACGTGAAGAAGATGAAAG +TATGCCTAAATGGTGTACTTCATATAAGATTTTTGAAACTAAGAAGAAAACGAGAATTGC +AGTTGTTGGCTTAACTGCTGCATATCCTATGACCTATGGTCCTAACCACTGGCATGTGAA +GTTGATTTCTGATACTTTAGATAAGTTACTTCCTACAATTAACGGGCAATATGATATGTT +AATTATCGTTAGTCATATTGGCTTAAGAATGGATCGGTATATTGCACATCATTATCCAGA +AGTGGATTTGATTGTTGGTGGGCATAGTCATGATTATTTACCAAAGGGCGAGAAAGTAAA +TAAAACATGGATTACCCAAACTGGAAAATGGGGTCAGCATATCGGCGATATTGCGATTCA +ATTAAATGATGATCACCAGGTAGTGTCAATTGTACCAAATACTGTCAAAACCGCTGATTT +ACCAAGTTCGGAATCTGATCAAGCTAAAATCGATGCTTGGCGTAAGCAGGGTGAACAAAT +GCTGCAATCACGTCAAATTGCAAAGTTACCTGCTAAATTTAACGATGATAAAATGGCAGC +AATTCAAGTTTCTCTAGATGCAATTAGTGATTTTGCTGGAACTGATATTGCGGTTTTAAA +TAGTGGCTTATTTTTGACGCCATTTCATCAGGGAATTTTAACAGAAGCTGATTTACAAGC +TGCTTTACCTCACCCAATGCATGTAGTGCAAACTAAATTATTTGGAAGTGATGTTTGGCG +CATGGTAATGGAGATGGAAAAGAACCGTCATTTTCTAAGCAAGTTTGCCCTCAAAGGAAT +GAGTTTTAGAGGGAAAATTTTTGGCGATCTTGTTTATAAAGGCATTCAGGTTGATCATGC +AAGTAGAACAGTCTACATTAACGGAAAAGAGATTGATCCTGAAAAAGAGTATACAATTGC +CTTGCTTGATCATTATGTTTTAATTCCATTTTTCCCTACCATTTCAATTATGGGTAAAAA +CAAGTTTTTATTCCCTGATTATTTACGAACTGTAATTGGAAACTACTTAAAAGAGAAATA +TCCAATAGATAAATAGAAAGAAAAATAATGGAGTATAAAGAAGCAGATAAAGAACAACCG +CTTTATCATCCTTTGGCTCATGTAATCGTTAATGAAGATAAGATAATGATTAATGGGCGA +AAGTATGAAATTTTAGCTAATGTAAAAGATGCTCTAGATATTGAAATGCTAAAAGAGAAA +TATGATCCTTTCCTTGACCAGTACGATTATTTAGTAGGGGACATTTCAAGTGAGCATTTA +AGATTAAAAGGATTTTACGATGAAAAAGATCGCGTTGCAATTGATAAAAAAGCTAATGCG +ATTGTGGATTATTTAGAGGAATATTGTAATCCTGGCAGTGCATATTTTGTTTTACGACTA +GCTCAAGAAAATCAGATTAAAAAGGTTGCATTTGGCAGACAAAATAAGAGAAAAAATATG +TCTAATAATCGCTATCATTCTAAACGTCCCTACTTTAAAGAAAGACGTGTCCACAAAACA +AGAATTGGTGGCCATAAGACAGCTGTAAAATTTCAACGTGGAAAAGGAAGTCACAAGAAT +AAAGGCTTCGTAATTAAGAAAAGAAAGGGCTAATTGTGGAACATAAAGATTATAAATGTT +ACTTAATTGATTTGGATGGGACTATTTATCGCGGTAGCGATACGATCGAAAGTGGCGTTA +GATTCATTCATCGTTTGCAAGAGAAAAATATTCCCCACTTATTTTTAACTAATAATTCTA +CTCGTACGCCGCGAATGGTAGTTGATAAATTGCGCGGACATGGGGTTAACACAGATATCT +ATCATATTTATACGCCGGTTTTAGCTACGGAGTCTTACTTGCTTGCTCAAAATCCAGATA +CTGCTAAGATACCAGTTTATATCATTGGACAGACAGGATTAGTGCAAGGTCTGTTAAAAA +ATGAACGTTTCTACTACGATGATCGAAATCCTAAATATGTTGTAGTTGGAATGGATACGG +ATTTAACTTATCATAAAATTCGCGTTGCTACTCGCTCAATTAGAAATGGCGCAACTTTTA +TTGGAACAAATGCTGATAAAAATTTGCCTTCTGGGGATGAGCTATTGCCTGGTAACGGTG +CACTATGTACAATGCTAGAAGTAGCAACAGGTGTTAAGCCCATTTATATTGGTAAACCTT +CATCAATTATTGTGGCTAGTGCTTTGAAGATGCTAAATGCTCAAGGCAGGGATGCAATTT +TGGTTGGGGATAACTATGATACTGATATTATGGCAGGCATTAACTGTAATATTGACTCTT +TGTTAACTTTGACCGGTGTAACTACTAAGAAGCAATTGGCAGAAAGAGATAAGCAGCCAA +CTTACGTTGTAGAAAATCTGGATGAGTGGAAACTATGAAGAAGAAACCAATCGTCGCCGT +TGTTTACAATTGTTTGATGGCTCTTGCTACTGCTACTTTAGGTGCAGTAGTTTTAAGTTG +GCCATTATTAGCAGTTTTTGTAAAAATTCAAAAAACGAATTTGATCGTAAAGACAACCTT +AGCAAAACTATACACAACTCTTAGATTATTTATTGTCGCCATTTAATGATAGACTTCAGA +TGTCAAATTTCCCGACTTCTCCAAGTGCAGCGCGACATTTTTATGAGTGTAAGTTACTAT +TTGAATTAGCGATAATTGTTTTTGTAGTTGGATTAATTATTCTTATTTTCTTAAAAATGA +GAAAAAGAATGAATTATATTTATATTTCGAGAACAACAGCGCTAATTTTCATGATTTTGC +CTGTGATTATCTTACCGTTTGCTTTAATGAATTTTGATGAATTTTTTGTAGCATTTCATC +ATCTATTGTTCAACAATGGGGATTGGCTATTTAACCCTGCGACAGACCCAATTATTAATG +TATTAACCGAAGAGTTTTTTGCTGGTTGCTTTGCAACTGGCGGAATAATTTATGAGTTAT +ATTTTTCTTGTTTTATTTTGTCAAAAAAATAAAGGAGCTTTAACGAAGCTCCTTTTTTAA +TACTAATTTTTTCTTTAACCATATAATCAAGACTGGTAAAACTGAAATTAAAATGATTGC +AATCACGATCAATGAGAAATGTTCCTGGACGATTGGAATATTTCCAAAGAAGAATCCTAA +TGCGGAAAATAAGGTTACCCAGAGAAAACCACCAATGAAGTTATAAATGATGAAAGTTCG +ATAATGCATCTTACTTGCACCTGATACAAATGGAACGAATGTACGAATAAATGGAATAAA +TCTTCCGATTACAATTGTAATACCGCCGTGTCGATCGAAGAAGTTTTCAGCGGCTTTTCG +TTTATCTTGATTAATTAACTTGTTGAACCAACTGTGTTTAGTCCCTTCAGCTTGTGACCA +ATGTCCGATTTCATAGTTAATCGTATCACCTAAAACAGCAGCTGCTATGAAGATTAAATA +GCAAACCCAGAAGTGAAGATTATATTTGGGCGTTGCTGCCATAGCACAACATGCAAATAT +AAGTGAATCTCCAGGTAGAAATGGAAAGATAACCAGACCTGTTTCGATAAAAATAATTGC +AAATAAGATTAAGTAAGTCCAGTTTCCAAACATATTCACAATGGTGACTAAATGGTCATC +AATATGAAGAATAAAATCGATAAGTCCCATAAATAAAAATAGATTCCTTTCACAAATTAA +ATACTGGTAGAATGAAAAGTCATTGTTTTTTCTGGAAATAGGTCTTGCATAATGTTGCTA +ATTGCAATTTGTGCCTCGCCAAATCCTAAAGCAATCATTGGCACTCTTCCAGGATAAGTA +ATAGCATCGCCAATTGCATATACATGAGGAAGGTTGGTATGCATTTTTTGAGAAACAGCG +ATTAGATTATGGTCTAATTTGATCCCCCATTTACGTAATTGTCGATTATCGCTCTTGAAG +CCATATGCAACTAGGATTTCATCAACATTTTTAGTAACAAAATCTTGACTTGCACCAACT +TTATGCAAAACTAATTCCATTCTATTATTGTTTAACTGCAGATCTTTTGGAAGATAAGGA +GTTAAAAGTTCAACATTTTTTAAACTTTTAAGCTTATTGACGGAGCTTTCAAGACCGCGA +AATTCATTTCTCCGGTGAATTAAGAATACATCTGATGTGTTTGCTAATTCTAGTGCCCAG +TCAAGTGCAGAATCTCCACCTCCAAGAATGGCTACTTTTTTGTTAGCAAAGATTTCAGGA +TGCTGCATTGAGTAATGGATATGTTGGTTTACGTCTAAAGTAGTTGATAAAGGAAGAGTT +TTGGGCTTAAACGCGCCAAGTCCCGTAGCAATTAATAAACTTTTTACTTCATATTTTTTG +TCAACTAGGATATTTTCATTTTCTAAAAAAGAAATCTCTTTTACTTTATGAGAAAGAATG +AATTTAGTATTGATGTTTTGCTCAAGTAGCTGAGAAACTAGCTTTTTACCTTTAATAGAA +GGAAAAGCGGGAATATCCTTAATATTTTTTTGTGGATAAAGCATATTAATTTGACCGCCA +ATTTCGTCTAATGCATCAAAAGAAATTGTCTTTAGTCCATGTAAATTAGCAAAAGAGGCT +GCAAAAAGTCCAATAGGACCTGCACCAATAATAGCTAAATCATATTTTTCCATCTTTATA +AGAAATCCTCCATAAAAATTAAGCATAGAAGTCATGTCTTTAGCGTTAATTAAAAAGACA +GTTATAATTAATATTAGTAAGGGATGAGATAGTATGCAAGTTAAAATTGGCGATATAGTT +CATGGAAAAATTAGTGGGATACAACAATATGGGATTTTTGTAAGGCTAGATAGCAAAGTA +GAAGGCTTGATCCATATCTCTGAAATACATGGTGGATATGTTAAAGATATCGGAAGAGAG +TATCAAGTTGGTGAAACAATAAAGGTACAAGTAATTGATATAGATCCATATTCTAATCAG +ATTAGTTTATCTAGAAGGGCAGTCCTTCCTGAAGTAAAAGAAGCAAGAAAAAAGCGAGTC +CATTTTTGGACTTCAAAACGAGTTAAGAAAGGTTTTACTCCTTTAAAAGAAGTATTGAAT +ACTCAAATTAAAGAAGCAAAAAGTAGATATTCTAAATAAAGAGTAAATAAACGTGTTAAG +GAGTAGTGCTTAACACAGTTTTTTTATATCCGAATTTT +>NODE_6_length_39999_cov_63.2183_ID_11 +GGAGGTCACTGAGCTGGGGACGTTTTCCGTCAATACCCTCACGACTTTCCACACCACGGT +TCAGCTGGCTCAGTGCGATGATGGGGATGTTCAGTTCTTTGGCTAACCCCTTCAGTGAAC +GTGAAATGGTACTGACCTCTTCCTGACGGCTACCGAATGACATTCCGCTGGCATTCATTA +ACTGAAGGTAGTCAATGATAATGATTTTTACTCCATGCTCGCGTACAAGCCGTCGTGCTT +TGGTGCGTAATTCAAAGACGGACAAGGATGGTGTGTCATCCACATACATCGGAGCATCGT +ATAATTCTTTTATTTTATAATCCAATTGTCCCCATTCATACGGAGCCAACTGACCACTCT +TGATTTTCTCACCCGGAATTTCGCACACGTTGACAATCATACGGTTGACCAACTGTACGT +TGGACATTTCAAGAGAGAACAAAGCCACAGGGATTTTCGCATTTACCGCCATGTTCTTTG +CCATGGAAAGCACAAAAGCCGTTTTACCCATAGCGGGGCGGGCGGCAATAATCACCAGGT +CGGAATTTTGCCAGCCCGAGGTCATTTTGTCCAATGCATGGAAACCGCTTTCCAGACCAC +TCAGCCCGTCCGTTCGTGCCGCCGCTTTTTGAAGCATCTCATACGCTTCCTGAATGACAG +GGTTGATTTGCGTATAATCCTTCTTCATGTTCTGTTGCGAGATTTCGAACAGTTTTCCTT +CCGCTTCCTGCATCAGGTCGTCCACGTCTTGGGTTTCGTCGAAAGCCTTTGTCTGGATGT +TGCTGGTAAAGGTAATCAGTTCGCGTGCAAGAAACTTCTGCGCGATGATACGTGCGTGAT +ATTCTATGTGTGCGGAAGAGGCTACCTTGCCGCTTAGCTGTGTTATGTAAAAAGGACCGC +CTGCATCTTCCAGTTCACCGGTGCTGCGTAATTGTTCGGCCACAGTCAAGATATCGACTG +GTTGCTGGCGTAAAGCCAAAGAGGTAATAGCCGAATAAATAAGTTGGTGACGGCGTTCAT +AAAAAGATTCGGGACGAAGAATCTCACTTACTAATGAGTAAGCATCTTTTTCTATCATTA +AGGCTCCCAATACAGCTTCCTCCAGCTCAGGTGCCTGGGGCTGTAAATGTCCGTATTCGT +CAACAGGCTTTGCTTTCGAAGTTTTGGGAGTACGTGTCATTTTTCTCGTTTCTGCCATTG +TCGTCTGTTTAAAGTAGGGCAAAGATAGAACTTTTTGCTGAGTTGGACACTCGTCCATTC +CCAAAAATAATAAAAGAAATATGAACCTGAATCTACGGAGAAATGAACAATCTGGACAGT +AAATGGTTTTTTTATATGGTAAAATGAAAAGTTTGTTCTATATTTGGCGCTCTGTCCGGA +TAAAAGATGGCGTGCATCTTTGTAGAACGGGCAACAATCTAACGAAACACACTAAAAATA +ACTATTTAAAGAACAGAATATGATTACATTTCCTAATGCAAAGATTAATTTGGGACTTAA +TATTGTAGAAAAGCGCCCGGATGGATACCACAATCTGGAAACCATATTTTATCCGATAAA +CCTGCAAGACGCTCTGGAAGTGACGCGGCGGGAAAACAATGACAAAGAATACACTTTACA +CATAAGCGGATCTCCTTTGGAGGGTGAACCCGAAGATAATCTGGTAGTAAAAGCCTATAA +GTTATTAAAAAAAGACTATCCCGGACTGTTGCCGGTAGATATACACATGTACAAGCATAT +ACCGGCGGGTGCCGGACTAGGTGGCGGATCATCAGATGCCGCTTGTATGATAAAGCTTCT +GAATGATAAATTTTCATTGGGACTCAGTACGGAACGGATGGAGGAATATGCGGTAAAACT +AGGGGCAGACTGTGCTTTTTTCATTCGGAACAAACCTGTTTTCGCTACCGGAATAGGGAA +TCTGTTCGAGCCTGTGGAATTGTCATTAAAGGGATACCATATAATTTTGATTAAACCTGA +TATCTTTGTGTCCACCCGGGATGCGTTTGCCGAAATAAAGCCTGTGCGTCCGGCTGTTTC +TTTGAAAGAAATTGTAAGGCAACCTATGGAAACATGGAAAAACAGTATGAAGAATGATTT +TGAAGATAGTGTCTTCAAAAAGTTCCCTGAGATTGCTGCTATCAAAGATGAATTGTATGA +TTTGGGGGCAGTGTATGCCGCCATGAGTGGCTCGGGGTCTTCCGTCTATGGTATTTTCAA +AGCACCGATAGAGAATGTGGAAGATAAATTCTGTGGATGCTTTTGCAGACAGAGAGCGTT +GGAATAGTGTGGCAGATCAGTGGTTAGTGATTAGTGGACTGCGCTGTCATGCCGCAGGTA +TTAACCACTAACCACTATAGTCTATCTTTTACATAGTGCCTTGAAATCACAATAGGTACA +TTTTTCTATGATTTCTGTTTGGGTAAATGATTTTTCCGGATTAAATATCTCTTCTAATAA +TCCTTGAAGTCGTTCACGGTATTCTTTTTCATATTTACTGAAATCTTCCACAGCCTCTTT +GGGCTTTCGTGGCTCGCCCATTTGTATAACGGGAGAATAGGTTTCTGTAGCGGCCCGATG +AATATAGAGAAGGGCCGGAGCTATCTTCATCGTTGGTTGCTTGCGGCACATGATGGCGGC +ATACAGGAACGTCTGGAATACATAGTTGGAACGTTTCTTGTCCGGAATAAACAGAGATTC +CACATGGGGTGGGGTGTCTGCATCACCGCCTGTCTTGTAGTCCACAATACGGAGTGTGCC +ATCCTTGCTGTCCATACGGTCAATAATTCCCCCGATACGTGATTTTATGACTCCTTTGGG +TGTTTGTATGTCGATGGGTTCATCCACTTCCATTTCCGATGCAATAAAAGTGAAGGGAGC +GTAACGTAAATCATTTTGTAAAAGTTGTTTTAAATATCTTGCAATAACGGCAGAGTTGAT +AAGTTGTATGCCGTTATATTCCGGTTTCTCGTTTTGCGGAACATTAAAGAATAATTTCTT +GAATGCTGTATCCACATAATCTTGCAGCTTCACTTCATTGCGTAGCAACGTTTCCAATGC +TTCCTTATTAATGACTTTGCCATGAGTGGTCAGGTCTTTATAAATATGTTCGGCTGCATA +ATGGAAAATACTTCCGAATGTGGCAGAATCTATTTCCGCACTCACTTCATCGGGGGCGGA +GAGGCCGGCTACATACCTATAATAAAATTTTAAAGGACAATCCAGGTAATAATTCAAGGC +TGATGGAGAAAATTTTGCTTTGGGATTGGCACGGACATCAAACAGGCTTTGCATCCGGCG +CATGACATCCGGGGTCTTTTCTACAGTGATAGGCGAGGTGCCTTGGGGGGACTGTCCGGC +CTCCAGAAACTGGCGTGTGATGGGGTGAGGCCATTCAATAAGGAATTGCAGCATGAAACG +GCTCCATTCGCCGCGGTTCAGTCCGTCCGAACTGGTATTGTACATCAAGGTAATCCTTTC +TGCCCGTTGCAGCAGACGATAGAAATAGTATGCATATACGGCAATCTTATGCTCTATAGT +AGTCATTCCGAAAGCTTTGCGCAGGTTGTAAGGAATAAAGGAAGAGTCTCCTCCCGATTT +GGGAAGCTGCCCTTCGTTGACCGATAGTAATACCAAGTGGCGGAAATCCAGGTTACGTGT +TTCCAATACCCCCATGACCTGCATACCAATGGCCGGTTCGCCATGGAATGGAATATTGGT +CGCTGACAAGACCTTGACCAGCAATCGCCGGAATGTTTCGCTCTGTACGGTTAATTCATC +TTCTTCTATCAGTGTGCGGAACCGGTTGATGGTGGTATAGGCTTTGAAAAGCGACTCCCG +GTAAAGTTGGTTGAATGCATCTGTGTCCTCGGTACCGGAAGTGTTTGCCTGGTAAATGCC +TGCCACTTGCTGCAAAGTTTCGGACAGGCGTATGCACAGATTCAGGTTTCCGGAAAGCGG +AGTGAAAAGCCGGGTAAGGAATTCATCTTTACCCAATTCGCCGGGTAACGGATAGAAACG +GTTGTTCCGGGTCAGTTCTTTTTCCAACAATTCGGCTTGACCGGTGAGTTGCCGGGTATA +GGGGTGTTTTAACAGTGTAACAACCGATTGAAACGTGTAGCGGCCGCTTTTGAAATTAAA +TCCGTGGGTATGCAATTCCAGTAAGGCGATGAGGAAACTATATACAGGAGTTTGCGATAA +CGGAAATCCCATGGTGATATTGACATGTTTTACTTCTGCCGGCAATGAATGGAGGACCGG +CTGAAGCAATGCTTCGTTACACAATACGACTGCGGTTTCCTTTTCCGGTGTAGTCAGATT +GTTTCGTATCCATTGAGGCAGGTAACGGGCTTGTGCATTTTCAGTAGAAGATGCGATGTA +GTGTACCTCTTTAGGTTTCGAGAGATTTTTGAATAACTCACCGGACAAAGGAGAAGGAAA +GTCGCGTAAGTTCCGGCGGATAAACTCTCCGGCTTCATGGGTGACGGCTTGTCTGTTTTC +CTTCATATAAAATTCGTCGTAATCCCAATAAAAAACAGCTTTCCCGGCATCTTTCAATTG +GGTGAACAACGTATGTTCTACTTTGTTCAGTACATTAAATCCTACAAAAACATATTTCTC +GTATGGAAGTTTGTCCACATTCAGATGCTCTATGACGTGGCGGTACATCATTCCTTCGTA +GGCAATGTTTTGAGAAGCTAAGGATTCACGGAACCCTTTGTAGATGTTTCCCAATACGTC +CCATAAAGAGATAAACCGTTCTTTCAGCGCTGTTCTCCGTTCGATGGAGAAGTTCTGGAA +GAACTGCCGGATAGCTTCTTCCTGCTCATCGTCAATGAAGGTGTAGTCGTCCATTATGTT +GCGCAGATCCTGTAAATTGGAAAACAGTTTGTCGGTATCCACTTTATTTTTGTCGGCATC +GTCAAAATCACTGATCAGCATTTCTCCCCAAAAATAGAAATCATCCAGTGTTTCCGTACT +TTGAGTTTCCCGTCGGAATATCTTATAAAGTTCGCATACCAGTTTTACCGGATCACCCAC +TTCCCATGGAGAAAGGCTACGGAACAATTCGCTGATGCTGACGTATGCGGGCGACCAAAT +AGGTGAATCCGACTCTTGGGCGAGATACTCATTAAAGAAAAGTCCGGCACGTTTGTTAGG +AAAAACCACGGCTGTATGTGCCAGGTTTCCTTCCGTATGTTTATATAAATCGGCTGCTAC +CAGTTTCAAAAAGCTTTCCATTATTCTATTTCCTCCAGTTCGTTGTTAAATACATACCAT +AAATAGCCACGGATATGCTCATATCCCATATCCGATAATAAATCCATATATTCTTTCACC +TGCTTGTTGTAAGCTTTCCGCTTTTTACCGAATTTAAAGTCCACCACAATCACCTCTCCG +TCTTTCATCATCACGCGGTCCGGACGGCGGGTTTGCAGCACTCCTTTTTCCCGATAAATA +ATGGCACACTCATTGTATAGTTCCCAGCGGCCCGAATACCATTCTTTGACCAGCGGATGC +TTCAAAGCCCATTCGGTGAGTTTTCGTATTTGTTCTTCCTGTTGTGCCGACTCTATGATT +CCTTCGAAACGCAGGCGTTCTATGGCGGGAGGCACATCGTCCGTGGTACGGATTACGGAG +AACAGATTATGCAACAACTGTCCTTGCCGGATGTATTTGTCTTCCGCTTCCTCTTCTCCC +CGGATAAATTCGGCAGAACGGTTGGACTGCTTGAATTCGATGTTGGTTTCCAGCGTTTCC +AGATGGATAGGCAAACGTCGGGCTACAGTCAGCAGCTTGTTGCCGGATACTTGTTTTTCC +TCTTCGTGAGATGAGAGATAGAGCGTTCCCAGTTCGTATATTTCTTCTCCGGTGGCATAG +CTCTTTCCGGTCATGTCGGATAAGGCACTTTCCAACAATTCGGATACCGTTCCTTTCTGT +TCCGCCTTTCCATATATAATCAAGTTCTTTTTGGCACGGGTAAATGCCACATAAAGCAGG +TTCAGATTATCCACCCATAATTGCAAGCGTTCATTCAGGAACTCTTCCCGATAAATGGAT +TGTTGCATGGCTGTGGAATAGTTGATGGGAACAATGTCAAGATCGCTAAACGGAGCTTGC +CGGGGAGCACACCATACCAGATGATTGTAAGTCTCATTCTCCATTTTCCAGTCGCAGAAA +GGTAGCAGTACGGTATGGTATTCCAATCCTTTGGACTTGTGGATGGAGAGAATGCGAATT +CCTTCCACCTCGCCGGAAGGGATGGTCTTGCTGCCTAGTTTTTCTTCCCAGTAGGTGATA +AAAGCGGATAATTCGGAAGAATTGCTTTGCAGATATTCCGTCACGGCATCAAAGAAGGCG +CACAGATAGGCATCCTGTTGTTCGATGCAGGACATCTGGAACAGATTGAAAAGTTTTTCC +ATCAGCTCATACAGTGGCATGAGGCGAAGTTGCTCCGCTTCTTTTATGAAATCGAATGGC +AGATAATCATCTATCTCGTTCAGCAATAGAGTGTTGAGGTCTATTCCTTTATGTAATACT +TCATTCTGATAGGCAGTAGCCAGTTGTGCCTTGGCAATCCGGTTTTCGGGTTGTGACAAG +TAGCGCAGGCCATCCATTATCATACATACCGCCAGTGAGGCATCCAGTCGGAATGCCTCA +TCGGATACGATTTTGTAAGAGGTGTTCTTGTCGAAGTAATCGGCTATTAAAGGAATGCTT +CTGTTCTTGCGCACCAGGATGGCAATATCTTTCAGTTGCACGCCCTGCGCCACAAGCAGC +TCCACTTCTTCTCCTAGGTGGTGCAGGGTGTTTTCCATATAGGTCATGTCTTCCGTATCC +GATAGAAATTCCACTTTGACATACCCTTTTCCGGGATCTTTGTATGTTTCCTGACACACG +TCGTTGTAGGCTTCCTTCAGTTCCTTGCATTCCTTTTTTTGTTCTTCCTTGTAAATGTTG +TTCAACACCTCGCAGGCGGCTGTAAACACTTCGTTGTTGAAGTGAATGATGTTTGCGGCG +CTCCGGCGGTTGGTGGTCAGTGTCTTTACTTTGACGGGAAAGGCTTCGATGTTTGTTTTC +AATCCGTTCAGTATGCCCCAGTCACCGTTTCTCCAGCGGTAGATGGATTGTTTCACGTCT +CCTACAATCAAGCTGTCTGCTCCCTGAGACAAACCTTCGAGCAGCAACAGACGGAAATTG +TCCCATTGCATACGCGAAGTGTCCTGAAATTCATCGATCATTACGTTGCGTATGGTTGTT +CCTATCTTTTCGAATACAAACGAGGAATCTCCTTCTTTTACCAGGTTATGAAGCAACGCG +TTGGTATCCGACAGCAGGAAGCGGTTGTTTTCATGGTTCAATTCGCGTACCTCTTCGTCT +ATATTGGCCAGCAGACGGATATTGTTTACGTGGCGCAGGGAGAGTTGGCAGGAATTGGCC +AGCATATTGTTCCGGCTTCTGTATTTTTCCGATTCATTCAGCAGAGGAATCAGTTCTTTT +TCTGCCAGACCTTGGATAGCGTTGCGTATGGGGGAGGTTTTTTTCACCCATTCGTCGGGG +CAGTCCAGACATTTTTCCACTGTTACATTGCGGACACTGTCATCCAGTTTACCCGATTGT +AATTTATTAAAATAGCTGGCTATACCTCGGGAGCCGTTTTTCAAATCTTCTACTTTCACC +CCGTTTGTATCCAATATTCCGAAGAATTGGTCGGCAAATCCTTTCATTTGTTCCTGCACC +TCTTCCAAAATGGCTTGCAGCGTTTCGCGGTAATTCTTGATACAGTCTTTGTCCCGTAAT +TTTTCCCGTAGTCCGTTTCCTTTTTCTATATATCCTTCATCGAAGATATTTCGTCCGAAA +TTCTTTATCTCACCGGAAACATTCCACCTCTTGTCATCGGCTATTCTTTCTTCTATGTAT +TCCAACAGCCAATAGAGTACGGGAGATTGTCGGTCCAGCTTTTCTATCATGGAGTCCACG +GCATCACTCAGTACCTCCATATTGTTCAGTTCGATATTCAGATTGGCACCCAATTCCAGC +TCCCGTGCCAAATTGCGCATCACCGACTGGAAGAATGAGTCAATGGTTTCCACCCGGAAG +CGGCTGTAATCGTGTATCATGTAGTGCAGTGCCGTACCGGCGGCTGTCCTTATGTCTTCT +TGGGGCCTTTCCAGTTCTTCTGTGATTTTCTGTAAATAGGGATCAGAATCCTTATCCTTT +ATCCATATTCCGTATAGCTGGCTCAGAATCCGCTCTTTCATTTCGGTAGTGGCCTTGTTG +GTAAAAGTCACAGCCAGTATATTACGGTATGCCCGTGGGTTCTGTATCAGCAGTTTTATA +TATTCCACGGCCAGTGTGAATGTTTTTCCCGAACCGGCAGAGGCTTTATAAACTAATAAT +TCAGGAGAATGTTTCATTTTAAATAATTTGCTAATTTGCTAATGGCTGCGCTGTGTACCG +GATGGTAATTGGCTGACACATTAGTACATTTTTATTTTTAATTAAAAACCAGCACAAAAA +CAGTGGCCTGTCCCTTTTCCTTGTAAGGCAGCAGGGTCAGTGTGCCACCGCTCAGTCTCA +TGATTTGTTTGCTCAGGCTCAGTCCGATGCCGCTTCCTTCCTCTTTGGTCGTGAAGAAAG +GTACAAAAATTTGTCCGGCTGCATCGGGAGGAATGGCGGGACCGTTGTTGGCTATCTCTA +TACGGATACTTTCTTGCGGATCACAATAAGCTTTCAAGGTGATCTTTCCGTCGGGAGCAT +TACCTATGGCTTGAATCGCATTTTTCAGCAGATTCGTAACCACTTGGGCTATCAGGTTCT +CGTCGGCAAAGACTATCAGGTCTTCCCGGGCTTCGAGGATGGAAAGGGTGATGTTGGGGC +ATGGATGCTGGTGCTGTGCCAGTCGGATCATCCGTTCCAGGAAAGGACGGACATAGAACA +GCGATGGCTCAGGTGATGGCAGTCGTGTCAGTTTGCGATAAGACATGACAAAATTTATCA +ATCCTTTACCTGTAGAGTGAATCGTTTCCAGCCCTTGTTTCATTTCTTCATTTTCTGCTC +CGGGAAGAGCCAGCAGTGTTTCGCTGAGGGAAGTCACCGGGGTCAGGGAGTTCATAATTT +CGTGTGTCAGCACGCGGGTAAGGCGTATCCATGAATCTATTTCCCGTTCATCCAATTCCC +GATTGATATCACTCAAGGCAATAATGCGGAGTTCCTCGTCTTTGATACGGATGCCGGACA +CCCGTAAGGCAAGATGCACCGTTCCCCGTTCCGTATTGAACTGAATTTGCAGCTTGTCAC +CCGGAACTGCTTTTTCCAAAGCAGTCATCAGTTCATCCGAGATGCGGGAGAGTTGTTTGA +CATGAGTCAGAACATCCAGCCCCAACAGATTCATGGCCTCTTTGTTCTTTTGATAAACGG +AGCCTTTGCTGTTCAGTACCACTATGCCGGTTTCTACAAAGTCCATGATGAGCTCATAAT +ACTTCTCGCGTTGTGCCGTTTCCTGTTTTACATTGTACAAAATACGTGCGATACGGTTCA +GCATGACATTGACCATGGAAGAATCGCCAGGGGAGGCATGTTCGTAAAAACGAACAGCGG +GGTCGTCGTTTTCGATGGCGTCGAGCAGAAAGGCTATCTTCCGGGTATTAAAAGTGTACA +GGCGGTAAAACCAGCCTATGGAAACGAGGAATGAAGGTACGATTACGCATAGCCACAACC +AGTTGCGCTGCTGGATTAGTAAGGTGCAGGCAATGGCTAGAATCAGTACGGTAAATAGGC +GGAAGAAGAATTGTTTCACAGCTTAATTATTTATTATTTCGTTTGAATGTTCCTTTAGTT +CCTGTTTAAAAATAGATTATTGTTATTTGTTTTGTCATCCTGAACCATCGTGAAGAATGA +GATGAGATAATAGGTCATAATCCGAATTTTTTCATCTTATTATAAAGCGTCTGCCGGGTA +ATTCCCAACTGTGCCGCTACAGCCGAGAGGTTTCCGTTGCATTTCTCTATCGCTTTCTGA +ATCATTTGCAATTCCATCTCCTCCAGAGTTGAGACACTTGTTTCTGTCGCAGGAGCCGCT +ATTTTTCGGGGAAAATGAAAATGTTCCTCACTCAATATACCATCTTCATTGATAATGACC +GCTTTCTCTACGGCATGTTCCAGTTCGCGTATGTTTCCATACCACGGATATGTCCGTAAC +TTTTCTTGCGCACCGGTAGAAAGAAGGATATTTCCTTTGTCATATTGTTTGCAGAACCGG +TCTATAAAGCGTTCGGCCAAAGGGATAATATCTTCTTTGCGTTCCCGCAGAGGGGGAATC +TCGATGTGGATGGTATTGATGCGGTAAAGCAAATCTTCGCGGAATTTGCCTTTGGCCACC +ATCTCTTCCAAATTACAATTGGTGGCGCAAATCAAACGTATGTTGACAGGTATCGGTTCG +TTGCTTCCTACTCGCACCACGCTCCGGCTCTGAATGGCGGTGAGTAGTTTGGATTGCAGG +TGATAGGGCAGGTTGCCTATTTCGTCCAGAAACAAAGTCCCTTCATGGGCGGCTTCAAAT +TTACCTGCACGGTCGGTATGGGCATCGGTAAACGAGCCTTTTTTATGACCGAACAGTTCG +CTTTCGAACAGGGATTCGGTGATGGCACCCATGTCCACTGTTATCATATCCCGCCGGTAT +CGGTTGGACAAGGCGTGTATTTCCCGCGCCAGCATTTCTTTTCCTGTGCCGTTCTCTCCT +GTAATCAGAATATTGGCATCCGTTTGCGCCACTTTCTCTATCAGCGTCCGCAGTTGCTTC +ATGGCATTGCTTTCTCCCCAATACATAGAAGATACGGTTTTAGGGACTTCTGCCCTCTTC +TTGTTCTTTGAGGAATTGCGGCAGGCAGACAGCAAGGTTTCTATCAACCGGGTATTGTCC +CAGGGTTTGACGATGAAGTCTGTCGCCCCTTCCTTGATGCCGCGCACTGCCAAGTCTATG +TCAGCGTATGCTGTGAAAAGTACTACGGGCAGGGAGGGGTGTATTTTCTTTATCTCGTGT +AACCAGTACAGACCTTCATTCCCGGTATTCAGTCCGCTGCTGAAATTCATATCCAGCAGC +ACCACTTGCGCATTTTCTTCACGTAACGCAGCGGGCAGTGTGATGGGTGAGGGCAGTGTG +ACGATATGTTCGAAATGATTTTTCAGCAATAGCTTTACTGCCGACAGCACTCCTTTGTTA +TCATCTACCACAATGATGGTTCCTTGTTTGCTCATAAGTCGTTGTAGCTTCTTTTTTCTT +TAAAGATATCTTTAGTTGTACGTTTCATGCAAAGGTACTATCTCTTCACACACTCTCATT +TCCATTCCTTTATAGACCACTACTATTTTGTGTAGCTGTGTGGTGCCCACAGCATTCTTC +ACCCGGTCGGAATCAGCATAACGATTTGCCTGAGCGATCCCTTCCAGCCTCAGTTCTTCC +ACACGACTTTCAGAATCTTTGTATTTGGCATATTTCAACTCTATGATATAACTGTGCTTC +ATGTCCGGATAGATTTCCAGCAAAGGACACAGGAAAAGATCAGCGTATCCTTCTTGTGTA +TCCGCTTCAGAAATAGGGCGGTAGAAGCGGTTTTGCGCTGTCATGGCCAGTGTGAATCCA +TGTACGAAAAACTCTCCTTTCTGTTTGTCACGTTGTGAAGCGTAACGTTTCAGACAGTCG +GCTATGTAACCGAAGTAGGCCTGCCAGTTACCATCGTATGCCAAAGCCGAGGCCAGTTCG +TCTTTCTCATGACTGCTGAAACTAAGGTCGGCCTCATTGTATGTATTCAGCAAATAGGTG +TATAGTTGCTCTTCCACTACCTGATTGGGAATGGTGAGTTTGTTTTTTCCTTTATGCATT +CCGCTGATGGTAAGCATACCGAAATAATAGAGCAGACTTATGAAATTGTCCGGATCGACA +ATATTCGCAGCCGGAAAGCTATCCTTCAGTTCTCCGGTGATATATCCTTGACTTACCAGG +GTTTGTATGATGGATGCATCATGAGCAAACTCCTTGTCCTTGCGAATCAGCATCCGGAGT +TTTTCGTAGTCTATGCGTATGTTGCTTTCTATCATTTTCTGTGGCGCTTTGCCGCGCAGA +ATATAGTTTTTGACAAAATAGAGAACCATGTTGGAATTGTACAGGGTAGTTTCTCCGTAG +CATTCCGGTGCGAAGCAGTAGTTGTCATACCACGGCTTCATGATATCTATCAGTTGGCCG +ACGGTATGGTTGAAAGGACTGTTTGTGGAATAATACGTCAGCATCTCACGCACTTCCTTT +TCTGTGAATCCTGTCATTTCGTTGAACTCTGGTGAGAGGGAGTAATTGGTACCTATATTG +AATCCGCTGGTGAGATCATCCATGGTTACGGGGCTTACTCCGGTGATGAAGCAACGCTTG +ATGCAGGAATCTGTTCCGGCTTTCACTTTGTTGAAGAATGCGCGCAGGTAACCCTCTTTG +TGTGTCTCTTCAGTGTATCGGTGCAGACTTTCGGCATCGGAGAGAATGGCATTGGTAAAG +TGGTCATACTCATCGATAAAAAGATAAATTTTCTGCTCAGTCTTGTTGCACTCCGTAAAC +AGATATTCCAATTGTTCTACCGCTCCTGATTTTTCATCCAGCTTCTCTTTGATACCTTGT +GGGAGATAGTCGGCATAGATATCGCAGAAATAGTCGAACATGGTCTGGCAGTGTGCGTCC +AGTCCTTGACGGTAGTTGTGCAATTCCCCGCTGATACCGGAAAAATTGAGTTTCAGCACT +AGATAGCTGTTGCGGTCCGGCGTGGGGTGCTTTCCTATGTAGAGGTCTCCGAACAGGGCG +TCAAACTTATCGCGGGTGCGTACATCATAATAATGTTGCAGCATGCTCAAGGTCAGACTT +TTGCCAAATCGCCGGGGACGGATGAAGAAGAAAAACCGGTCCGACTGTTCTATCAGTGGG +ATGAAGGCGGTTTTGTCCACATAATAATAATTGTCGAAACGGATGTCTGCAAAGTTCATC +ATACCGTATGGCAATCGTTTCCTGTTTGAAGGTGCATATTCCATTAGTTATATTATTTTT +TTTATGAATAAACTCTGGTAAAAGAGATATGTATGGCATACAAAGATACTTTCTTTTCCC +TGCTTCTCCAAAAAACAAAGGTTCTTTCTTTTTGGTAACATGGAGGAAGCTGTCCGATAG +GTATCCGCCAATCTTTTCTTTCAGAGAAAACTTTTCAATTTCATGAATTGTTCTATTATA +AAATAGCATTTCAAATGAAAACATTTATATTAAGCATAATTGTAACCATGATTACATCGG +TAGCTGGTGCACAGCAAAAAAGTTTTTATGACTTTACGGTCAAGACTATTGACGGGAAGG +ACTTGCCACTTTCCACATTCAAGGGTAAGAAAGTATTGGTAGTCAATGTGGCATCTAAAT +GCGGTTTCACTCCCCAGTACGCCAAGTTGGAGGAGTTGTATGAGAAATATGGAAAAGATG +ATTTTGTCGTTATCGGATTTCCTGCCAATAATTTCCTGCATCAGGAACCCGGAACGAATG +AAGAAATCAAGGAGTTTTGTACGTTGAATTATGGAGTGACTTTTCCTATGATGGCTAAAA +TATCTGTAAAGGGAAAAGATATAGCTCCCCTCTATCAATGGCTTACGCATAAAAGTGAAA +ACGGGGTTTCGGATGCTAAGATAGGGTGGAACTTTCATAAGTTTCTGATTGATGAAAATG +GTAGATGGGTTGCCTCAATCGGCTCTACTACCAGTCCGCTTTCACAGGAAATTGTAGGAT +GGATTGAAGAATAAAATGACCAGAAGATAAATAATGCCCTATTGTTATTGCACCAGCTTT +CCGTTTTTCAGTTCCGACATCAGCAGGTCATAGATGTATGCCGGGCTTTCGATATACAGG +CCGGTGGAGAAGTCTTCCAGTTTGACGAATGTCTCCGAATTGTGGAATGTGGAGATGGCT +TCCTGCAAATCCATATCATTACGTTCCACCAAGTAACGGACTACATCGGCGGATATTCCT +TCAAAGAGGAATTTTGCGGTATTCGTGTTCATAGTCGTTTCAGCATTTTAATCGCTGCCT +CTGAGTGAAAGAAATATTGTGAAGTCACTTTCGAAAATGTCAGTTCGTGCAATAGTTGTT +CTTCGTTAATAAAATTCTCGGTGTATAAACGTAGCAAGCGCGCAATGGTATCATCGGCAA +CAGGGCCAATTACGATATCGTAATCGTGGCAAGGCTGAGTAGTGTTTATATCGCGGTTCG +ACATGACAAAACGTGCCCATTCCCAGTCTGGCTCCTTGAAAACCTTGACTTTCAATTCGC +TCGACTGCGCTTCTTCGAGGTCAAACTCAAACATAGTCAGAGTGGGGTTTCCGCTGAACA +TCCGCGCTGTTCTTGCGGCCATCCTTTGGGCCTGTTCCCTAATGTCGGTCAGGTAAAATC +CTCTGCCGAAATCCTTATAGGGGCGGCATCTGTTGAGGTCGATGCCGTCGATTGCCGTAT +TTGAACCGTGGTAAAGTATCATCGCAATGTTCCTCCATTGTTTTGGCAGACGATGGTCAG +GTCATCCACACAGTCGTTAAAAGAAAGCGTGTGTTCCACATCGTAAAATTCGGTGAGAAA +ATCAATGCCTTTGAAACGTCTGATATAATTGCTTGCCTCCCGGATTGTCAAATCGTGACG +GCGCGCAAATTCGACAATGACCGTCATCACATAATCGATTAGGTTCTTGTCCGCTTTCTT +TTTGGACTTGAGCGCCACTATCGGTTCGACACCCAACGCCTCCGCTATGCGGTTGACAGT +GCCAAAATTAACACACAGACCGTTTTCCACTTTGGAAACCATTGCTTTTTGGACGCCAAT +CATCACCCCGACTTCTTCTTGTGTCAATCCTTTGTTTTTTCGGCATTGGGCCAAAAGTTC +TCCTATTTTTTGCAAATTCGTATTCATACCACAAAAATAGATAAAGTTTCTTGTGTAGGA +AACTTTTTAACTACAAATATATTCTTTAGATAATTATTATCTTTCCAATTAAATGGCAGC +TCTGCTCTGACACGCAAGAACCCGTGTGCCGGATGATGATTGTTAGGAAAACAGGAGGGA +AATCAGTTCCTTTGTACATCATAGGTTATGGAACGGTGGTTCGAAAAGAGTTGTCGCTAT +TTTTTTGCTTATTTATTTGTGGGGATAAATATTTATCCCTATATTGTATTAGTAAAATTA +AAAATATCATGGAGAATGATTATTAAAACTGAAAGTTATGGATACAATAAAAGACATTTG +GTTTGATGCTAACCGCATTTATATGAAAACGGATGGTGGAGAAACTTTCAGCCGTCCCTT +AGAGGCATTCCCTTTGCTGAAGGATGCAAGTGATAGAGAGCGCCTTGATTTCAAGATAGG +AAAGTTTGGTGACGATGTTCGTTGGGAATCATTGGATGAGGATATTCATATATCCAGTTT +TTTCGATACAGCGGAGCCGGATTATGAAAATGAGATAGCAATGATATTTAAACGTTTCCC +TCAACTAAATGTGTCAGAGGTTGCACGTAGTATGGGAATAAACAAGAGTCTTCTTTCCAA +GTATATCTATGGTATTAAAAAGCCGAGTGATATAAGGAAGACGCAGATTAAAGAGGCTCT +CCATCTTTGGGGAAAAGAATTACTTGCCGTTTGATTAATCTCTTAATAATAGTTTAGGTG +TGATTCCATTTGGTTTCACGCCTTTTTTATGTTAATTTTTCCACAATCATAGTTTCAATA +GAAAGATAATTAAATATAAGGAAGATATGGCTAGAATAAACAAATACGGCTATGTTGCTG +CGTGACGTTTTTGCATCCGTCAGGGAGATGCGGTGAACAAGCCAAGCTCATACCAAGAAC +CGGGCAAGCATGTGATGCAGGCAGGGTAGGCGTGTACCGAGTACCAGGTAGCCTTGTTTG +CTGTATCTTATATCTTACCATCTGGGCGATGATGCGCCCGGAAGAATTTGCATCAAGGTA +AGTATACCGATAATCTGTTTCTTGTTTGTTTTTTGAGTAATCAAGAAACATCACTAAAAA +TAGCGGAATATATTTTCGTCCTATTTATTAAATTGCTATCTTTGCCACGTTTTCCACGCC +GGTACTTCCGCAAGGAAGTTAGGGAGGTGGGAAATATACATATATAGAAAAGACGTTTAC +TTGACGTTTTGTTCTTGGCTCTACAGAACTTCGCAAATTCTAAAATCCAGCCAAAACAAA +GCAACAGTAGGTGTCTGCGGGGTATGTTTATATCCTTGGTGCGCCTTTGCCGGTCAAAGG +TGCATTCAAGTGTCATTATACATATCGGCGTGGGCTTCTGCGTTGCTCGTTCAGACTGGA +TTGGGCAATGCGAAGGCCTTGTAGGGCGGGACGAGTAACAGGTACAGATTCCCGCGCTTT +TTTTATATATAGGTATTATAAATTCTGATTTAACACATAAGAACCGTCCGGTTCGGGAAT +CTGCGGAAGGTCATCGGTTGACCCGACAGGCTGCATCTACATCGTGCGGATTCCCATCCT +CCATTTGTTGAAATAGCTTTTCTTTGGTCAGCTTAATCACAAGAGGCCGATTCCTCTTGT +ATTTTACGAAACTTCTCGTAATTGATTGAATTGCAGATGTATATCTGTACTTCAAAGCTC +CATAACTATGCTTTCAAGTGAAAAAATGCAATGGTTTGCCATGCGTGCCACTTATCGCCG +GGGGATGCAGATCAAGGCTTTGTTGGATAAAGAAGGAATCAACAATTTCATTCCCATGCG +TTACGAAGTTCGTATAAGGAACGGGTGCAAGAGGCGTGAGCTGGTTCCCGTTATCAGTGA +TCTGATATTTGTTCATTCCGTCCAGTCCGAACTTCAGAAGGTGAAGTTCAAACTTCCGTA +TTTTCAATATATGATTGATATCCGCAACGGGCAGAAAATCATTGTGCCCGATGATCAGAT +GAGGCAGTTTATCGCTGTGGCTGGAACGTATGACGAGCATCTTATCTTCTTCAGTCCCGA +TGAGGTGAACCTGCGTAAAGGTACGAAAGTCCGTATTACGGGTGGTGACTTCGAGGGCTA +CGAGGGGGTGTTTGTAAAGGTGAAAGGTGCGAGAGACCGTCGTGTGGTTATCAGTCTGCA +AGGGGTGATAGCCATGGCCATGGCGACGCTTTCGCCGGACTTGATAGAAGTGATAGAAGA +GCCGAAGAAGAAATAACTTCATTTGCTACGTTAAACATTCTTATTCCTTGATTAATGGGG +AAGTAAACGATCTGATAATATAAATCTTATGTATAATAATGTATAGAACCACATTCATGC +CGTCAGGCATCCGGCTCCAGCATCTGATTCAGAATCAGTATAAGGAAATTTTAGCCAGGG +AAAAGAATAATGATAAATTCATTCATCTGTATGACATTGGTGCGTATTGGGTGGCTTTCG +AGTGTTCCGCCTGTAGGTTGAGCGGTTTGTTTTCTAAAAGTGAGCTGACTTTGTTTCGTG +TCCCGGATTGTGTGGAATATGTGGTGATGGTTTCTGTTCCTGCCGATGAGGCGGAGGGCT +GTTTGGGTGAGTACATTATTTTGCACGATGGAATTTACCGGAAAGTGTGGTCGGAGCATG +TTTTGCCGATGGGGGATTATCGTCATTGGCATGAGATGGCTGTGAGGTCTGTTTTGTTGT +AAAGAAAAGTATGCGGACTTCGTGGACCAAGTAAGTGGTGTTACTTTGCTTGGGAACATA +TCTTACTTTGCTTACGAAGATATCTTACTTTTTTCAATGGAATATGGTCATTCGTCATGC +TCTATAGGGAATACCCTGAAAAATCCTTCAAAAACTTCAGCCTGCTCGCAAACGCCCTAC +CCGTGGGCATTCCGGCTGAAGGATTGGGCTCAGGAATCCTTCAGCTGGGGGTTGGAATGT +TTCAGCCGAACTGATCTTCCTCCGATTCTTAGGGAATCGTTTTTCCTGAAGGATATTTTC +CGGCTGAAACATTTCACAATGCATATCCTTCAGCCGGAATGCCGACAGGCAGGGTGTTTG +CGGAGTGGCTGAAAGATCTGAAGGAAAAAAAGGCGTTTCGGCATGTAGATACCGTTATGG +AAAAAACAACAGGTCAAATTTTAAAAAGCTCTTGTTCTTTCAATGAAAACGTCCTGTTCT +TTTTTTTAAAGACCTTGTTCTTTTCGGAAAAGAACAGGATCTTTTAAGGGTTAGTAACGT +GTATGCCGGAAGGTTGGTAACGTGCCACCCCACACGTTACTAATGTGTGGGGTGTAAGAT +TAGTAATGTTGCAGGTGGAAGATTGCATATCTTGCTAGTGATACAATCATAAACGTATGA +AGTTCCAGTATATAAAAGATAAAGTTAGTGCTTATTTCACTAAGGGGAATGAGCGTAGTG +TTGCGGTGAAGAAAAATATCGCCGTTTCATTAGTGTTGAAATGTATAAGTATTTTAGTTT +CCTTGCAGGTCGTTCCATTGACCATCGGTTATGTTAATCCTACGAAGTATGGCATTTGGC +TGACGTTGAGTTCCATTATTGCCTGGCTGTCTTATTTTGATTTGGGGTTTGCCCACGGAT +TTCGTAATCGTTTTGCTGAGGCAAAAGCAAAAGGGGATATGAAGTTGGCAAAAGAATATG +TCAGTACTACATATGCAGTGCTCTTCTTGCTCTTTTCCGTAATTTTATTGATAACCTTGG +TGGTGAATAATTATTTGGATTGGAGTCGTATTTTAAATATTGATCCTGTTTATAAGGATG +AGCTGAGTCTGGTTTTCGGTTTGTTGGCATGCTTCTTTTGCCTGAACATTGTGGCAAGTG +TCTTTACTACCATGCTGACGGCAGATCAAAAGCCTGCTTTAGCTTCGTTGATTCAAACGG +GTGGACAGGTATTGGCATTTGTCTGTATTTATATATTGACAAAGACGACTTCAGGAAGTT +TGAGCGCGTTGGCTTTTGTCTTTTCGGGGGTACCTTGTATACTGGTAATCACTATTTCTG +TTATTATGTTTCATGATAAAAGATACAGTTTAGTAGCTCCTTCATTACTAAGTGTCCGTT +TAGTGTTGACACGGAATATTTTGGGACTGGGTGGACAATTTTTTGTCATTATGGTTTCTA +CATTGTTCATTTTTCAATTCGTCAATATCATTTTATCTAGAACACAAGGACCGGAGGTCG +TAACCCAATACAATATTGCCTATAGGTATTTTAATGTGGTAAATATGATATTTATCATTA +TCCTTAATATTTTTTGGTCTGCATTTACAGATGCATATATTAAAAAAGATAGTGGATGGA +TGAAGAGAATCATTAAAAAATTGGATACATTATGGCTTTTATGTATTCCTGTACTAATTT +TAATGATATTGTGTTCTGAAATTATTTTTCAATGGTGGATTGGTAATTCTGTATCTGTTC +CTTTTTCATTATCTGTTTGCATAGCTGTATATGTTTTTCTTCAAACAGGAGGTAATATTT +ATATGTACCTTATTAATGGAACAAGTAAAGTGCGTATACAATTGATTGTCTATTTATTAT +TTGCTTTAACAGCTATACCGTTGATGACTTTTTTTGCGAAACGTTTTGGAGTGGAAGGTG +TTTTGATAGTTCCTGCTGTAGTTTTTGGCTTACAAGCTTGTATTGGAAGAATACAGATCT +TGAAGATTGTAAATGGTACAGCTAAAGGTATTTGGCTGAAGTGATGAGAATAATAATGTT +TAAAATAAGATTTTCAATGAGACATTGTTTGATTATGACAGCTTATAAGGATGCAGAGAT +GATAAATTCAATTATAGATGAAACACCTGTAAGTTGGGGAGTCTATATACATATTGATGC +AAAATCATCACTTTTATCTTCCATGATAAACAATAGAGCAATTGTTATAAAAAAATATCG +TATATATTGGGGTGGAATAGAACATTTATATGCATTTATAGAGCTCATGAGTATGGCGTT +AAATAGTGGGGAAAATTATGATTATTATCATTTAATAACAGGTCAGGATTATTATGCTAT +ACCTCCGCTGCAATTTGATACTATATTAGGTGGTGATGGGATGAATTATCTTGACATATT +TCCACTGCCAAGACAAGGGTGGTGGGGAGATGGCTTGGATATTTTAAGGTATAGAACATT +TTCCTCAAGAACAGATATTCGTAAAGGTATATATCGAAAATTGGATTCATTATGGCGTAT +AACTCAGAAGATGTTAGGTTTGCAGCGATCACTTCCTTCATATTCTATATATGGGGGTAG +TGTGTATTGTTCGTTGACAAAAAACGCTGTAAACGAGGTCGTTAATGGAGAGACTTCAGA +AGACTTATTGCAAAGGCTTAAAAATACGACTTGTGGGGAGGAAGTGTATTTCCAAACGAT +TTTAATGAATTCAAACTTGAGAGATACGATTTTTAATAATCAGTTACGCTATATAGATTG +GAATGTGAAAAATGCACCAGGAGTATTGATTGACGAGGATTTTGATAAAATTGTTAAGGG +AAAAGCTCTCTTTTGTCGGAAACTCGATTCAACAGTATCCAAATCACTATTAATTAAACT +GAAAAAATATATGAGTGATTTTTCCGTTAGATAAAACGATAGGAAACTTAATAGGTATAA +GCTATTTTCAAGAAAGGTGTTGGTACAATTAGTAATCATTCTTATCTGTATTGGATGCAG +TATACAGATATTGAGAAATAATGGAGATAAACGGTTGGTTTATTTTTTATGGGGAATAAT +GTTTTGTAATTATGGCGTAGAAGTAATTCCTCATTTAAGTGTGTTGCGTGTTCTGACCAT +TGCTTTTTATATATCAGTGTTTATGAGAAAGAGTAACAGGCTTATTTTATTGAATATTCC +ATTAAAAAGACAATTCATTCTACTTTTTATTGCATATTTATGTACTGGGCTATTTGATGA +TAGAGGGTGTAGTGTAGGATTGTATAAAGCCATTTTGGAGTATATGGAAACTTTTGGATT +TATATTGTTAGGATATATAAGTTTTTATCATACAAACGATTTACATAAATTGCTGCATGC +AATATTACAGATAGCTTTATGTGTATCTATTTATTCGATAATTACATATTTGATAGGAGC +GGATATTGTAAATTCACAAATACCAGGAACAGATTTTAGTATGAATAGTGGAGCGAGAGT +CAGAATACCCTCTTTTTATTACAGTTCTCACATAGCAGGAGCTGCTATATCATCAATGTT +GATTATAATTATTTCTCCTGAAAATAACAAAAATATAAAACATAGATTATTACTTGCATT +TTTGTTATTTATAGCATTGTTATTTACGGGTTCCAGATCTTCCATTTTAGCTTTATTTGT +AGGTTTGATATTTGTATATTATTATTTTTATTCTTCAAAAGTGCAATCACACAAAGTATT +ATTACTTGTTCTAGGATTGGGTGTTGTTGGGGTGGTGGCATCTTTTATAAGCAGGATTCC +TTTTCTTTCTGATATGTTTGAGGAGGGAGGGGGAGATACTGGCGGAAGTAATATTTTTAT +GAGATTGCAACAATTAGTATATAGCTATGAACTTTTCTTACAATCTCCATATTTTGGAAA +TGGCTTTAAATATTTTTGGGAAGTTGTAAAAGTTGATGATAGCTTTTTAAGTAGTATGCT +TTTGGGAGCGGAAAGTTATGTATTTGTTTTATTGATAGAGAGAGGACTAATACAAATTGT +TACTATCGTTTATTTCTTCTTCTCATTAATAAAAATTTTCATGCAATATAAAGACAAAAT +CTTTATCATAACGTGCATGGGTTTAACATGGGCTTTCCTTTTTAATTCAATAGTTACCGG +AAATGGAGATAAATGGTCGTATATGATGATATATATAGGCTGTGGCTTATCGGTAATTAG +AAATTGCAGAAAGAATATATCATGTACTAAAAAACAATTATAAGATAATGTTTTCTATTA +TTATTCCTCTATATAACAAGGCTGATTATATTGCAGAAACACTGAAGTCAGTTCTGAATC +AAACATATTGTGATTATGAAGTAATCGTGGTGAATGATTCATCGACAGATAATAGTTTGG +AAGTGGCTTCTAGTTTTCAGGATGAACGAATTCATATTTATACAAAAGAAAATGAAGGTG +TATCAGCAGCACGTAACTATGGTATTATGCGTGCTAAATATGATTACATTGCTTTTCTGG +ATGCAGATGATATTTGGGAATCCGATTATTTGGAATGCCAAAAGAAGTTGATAGAAATCT +ATCCTAATGCTGGGATTTATTCTACTGCTTTCTACTCTTTGGAAAAGGGAAAACGGAAAC +TTCGTAATGTCTTGATTAATGAACACACACACTTCCTTGTTCATGACTATTTTAAAGAAT +CCGTAATGAATGGATTATCCATTTGCTGGACTAGTTCCTTGTGTTTAAAGAAAGAAATTA +TAGAGCGGATACCAATGTTTCGAGTAGGTATTAAGCGTGGTGAGGATTTGGACCTTTGGC +TTAGAATAGCGTTGAATTATGATGTAGCCTATTTGAACCTTCCGAAAGTCTTTTATAAAA +CAGGGTTATCTGATAGTCTTACATCAGATTATTCTAAAAATGGAGAATTTCCTTATTATG +AGTGGCTTAACTTTACATCGGAAAGTTCTTATTATAGAAAGTATGTGATTTTAGCTATGT +ATATACATGCAAAAAATGCATTTATACATCATGATTACGATACATGTTTGGCCGAGTTGT +GGAGTGTTAAAACTATTGCTTGGAGATTCAAATGGGCAAAGCGATTATTATTACTAGTAA +TTTCATGGATAAAAAGTAAATAATGAATATCATTTATTTTGTTCCCAATATTACCATTGC +AGGAGGTATATTCCGTATAGTGTCAGATAAGATGAATTATTTGGCGGAAAATATGGAAGG +ACGGCTTTTTTTGGCATATTATGGTAATGGACAGGAAAAACCTATATATCCTTTGCACCC +GAACATTCAACTGTTACCTATTGATATAGAGTGGAAGGTAGGCTTTGGAAAGAAAATTAA +TAGAGTATGGAAAAATATAAGTATTATACGTCACATATTTAAAAAATATAAGATAGATAT +TGCTGTCAATGCCAATGCTCCTTTATTGATATGGATATTACCATTTATTTGTCGGCGGAT +AAAGAAGATCCATGAGTTTCATTTTTCTTATAAAGGACAACAGATATTAGATGAGGAAAT +TTTTAAATCGCGAGGAAAGAAATTTTTAGTTCAATATTTAAGAAAGTGTTGTTTGACAAA +ATTCGATAAGGTAATTGCATTAACAGAGTCTGACAAGAAAATGTGGAATTTACCGAATAT +ATTTGTAATACCTAATTTTAGCAATATACAACTACATGAAAGAAATGGTAGAAAAAGCAA +AGTTGCTATATCAGCAGGTAGATTAGAATCTGTAAAAGGTTATAATCGATTGATTGCAGC +TTGGGTGATAGTTGCACAAAAGTGTCCGGACTGGCAATTGGAAATTTGGGGTGAAGGAAG +TCTGCGAGAAAGTCTTCAACGACAAATTGATGCTTTGCATCTTTCATCAGTTGTTCATCT +GAAAGGTGTTTCACCTAGTATAGGTGAAGTATATTCTCATTCTTCTTTTTTTGTCATGTC +ATCTTTGTATGAAGGTTTCCCGTTGGTATTGGTGGAGGCGATGAATTGTGGCTTGCCTTG +TGTTAGTTTTGACATTACTGGAGCAAATAGTATCATAGACAATGGTAAGAATGGATTTTT +AGTGCCGGATAATGATGTAAATGCCTTGGCGGAAGCTTGTATAAAATTGATAGGAGATAG +AACGTTGTTGGAAAATATGAGTAAACAAGCTTACATTTCTAGTGCTCGTTTTTCTAAATT +AAAGGTGATGCAGAAATGGTTGGATTTATTTCATGAGTTGTCGGAGAAATAAATGTTATA +GTTGAGATTAGATATAGCTTTTATTTTTGTAAAAATGAATTATTAAAGAATGAATTATGC +TTTTATAGACTCCATGGGGTCATTGACATACAATAATGGTATTAAAATTCAAGCCATAAT +GTGGAAGAATGGACTTGAAAAATTGGGGCACAACGTTAAATTGGTTAATTTGTGGGAGAA +TATTGACTTCTCAAGTTATGATGCTGTAATCATCTTTGCTATGGGGGCAAATATTTATAA +ACTGATAAAAGGTCTTTCTAGGATTAATGAAAATATCATAGTAGCTCCTATTATAGATCC +TAACAGAAGTGATAGGTTTTATAAATTCTTGTTTAAGTTTTATGGTAGTACAAGATTAGC +GTTATCCAATCATTACCATGATATGTGGTCTGTAAAGGAAAAGGTAAAGCTATGGCTTGT +TCGTTCAGAACAAGAAAGACATTATGTGAGTTATTGCCTTGATATACCGAACGATAAAAT +AGCTAAAGTACCATTGAATTATAGAATTCCTGAGATTGGGCAATTAGGTGAAAAAGAGGA +TTTCTGTTTACATGTGTCTAGATTAGATGCACCTAATAAAAATGTTCCAAGACTTATTGA +AGCGGCAAAAAAATATGGTTTTGATTTGAAATTGGCAGGTCATATATCTGGAGAAAAAGA +AGAGAAAAAAATCATTTCACTTATTGGTAGTACAAAGAATATAGAGTATTTGGGAGAAGT +GAATGAAGAGGAATTGGTATCACTTTATAAAAGAGCTAAAGTATTTGCTCTTCCTTCTTT +AAGAGAAGGAGTAGGTATGGCAGCACTTGAGGCGGCTGCTTATGGATGTGAGATAGTTCT +TACTTGTGTAGGTGCGCCTAAAGAATATTATGAGGGTCGTGCTTTGTTAGTTAATCCGCA +AAGTATAAATGAAATAGGTGCTGCCATAATAAAAGCTATTAATAAAGGGTATTCACAGCC +TGAGTTAAAATCTTATATAGAAAAGCGTTATAGTGAGAACGCTTGTTTGAAATTATTAAA +TGAAAATCTGTTGAAGTTGAATGAGTGTTCACTGATATGAAAACTGTTAAGTGTTTATAT +AACATTCTTATATCGTTTTAATATTGAATTTATAGTTATGGAAAATAAACTGGTTTCAAT +AATTATACCTGTCTATAATGTTGAAAAATATATTAAAAAATGCCTTGATACTGTTTTATG +TCAATCACATCATAATCTTGAAATAATTTTGGTGGATGATGGTTCTTCGGATGCTTCAGG +AATTATTTGTGATGAGTATGCCCAGAAAGATAAACGTGTAAAGGTGATTCATAAAGAGAA +TGAAGGTGTAAGTGTAGCACGAAATACAGGCATTGATATAGCTACAGGTGAATACATTTG +CTTTTCTGATGCTGACGATTATTTGCAATTGGATTATGTTGAATACCTTTTGAAGATCGC +AAATGAAAATGATGCTGATATAGCAGTAACAACTGATTGGTTTATTACCTTTTTAGGAAA +AGAATTTCCGGAGGATTTAAGAGATATACCACTAGTTCTGAGTCCTGAGCATGCAGCAGC +AGCCATTTTATATTATCATATTCCAATAGGATGTAACAGTAAAATCTTTAAGCGAAGTTT +GTTAAATAATAAGATTAGATTCTATCCTCAGCTATCCGTAGGTGAAGGCTTTAATTTTAA +TGTAAAAGCTTTTTTATCTGCTAATAAAGTTGCTGTAACCAAAAGGCGAATATATTATTA +TCGTCGTAATAATCCTGCGAGTTGTATGACAAAGTTTAAATTAGAAAAATGTAATACAGC +ATTGTATGCCATTCAAATGATTAAGGATAATCTTATGATTAAATCAAATAGTTTAAACAG +AGCATGTGATTTTGCGGATTGGCATACACATGGTGATATGTATAATTGGATGGTATTAGC +AAAAGTGAAAGATATGTATCCAGATATGTATATGAGATGCTTCAATAAAGTACGTTCTTA +TTCATTTAAAGCGATATTTGCTCCAATAAGTAAAAAAGAAAAATTTAGAGCGATACTTCA +GTTTATACATCCTAGATTGTTGGCATTTGTATTAGAGCTTAGAAGAGCTTATTATCAGCG +GTAACTATATATTTATCTGAAATAAAGCGATGAAAGTAGCAATTCTCTCCATGCAGCAAA +TTAAGAACTACGGTTCTTTTCTTCAAGCCTTTTCTTTAAAAAAGAATATTGAGTCATTAG +GTCATACCTGTGAATTTATAAATATTATTCCCGGAGAACAATTAGAAGGATATAAGACCA +GTTGTTTTTACAATATTAAATTACTTTTCCAGCGCTTATGGGGATGGGATTTTTACAAAC +GTTTCCAAACGATATTTGTTTTTCAGAATCGTTTTCGAAAAGAGTTCCTACCTTATTTGG +GAGTAAAAAAAGGAATAAATACAAAACATTATGATGTAGTGGTTATTGGAAGTGATGAAG +TATTTAATTGTACTCAAAAGACTTGGTTTGGTTTTTCGTCTCAATTATTTGGGAAAGGAC +TGAATGCAAGTCGGATCATTACGTATGCAGCATCCTTTGGTGCGACTACTATAGACAAAT +TACAACTTGTCGGCAAAAAGGAAATTGTCTCAAGTTTATTGCATGATTTAGATGCCATTT +CTGTGCGTGACGAGAATAGCATGAAAGTTATTGAAGAACTAACCGGTAGAACTCCTTGGC +TTCATGTTGATCCAGTGTTGATGTTTGATTATAATCAATTTATACCTGATAAATTCAATA +GGAATGAATATATAATTGTTTATACATATCCGGGAAGAATTACTGATAAAAAGGAAATAA +GTTCTATCAGAAATTTTGCTAAATCAAAAGAACTGAAGTTGATATCAATAGGACATTATT +TTTCATGGTGTGATGAAGTTGTTATTCCTACTCCGTTTGAAGTACTAGCTTATTTTAAAG +GAGCATCTTATATTATTACTGATACATTTCATGGTAGTGTATTCTCTATAAAATTTAATA +AAGAATTTTGTACTATAGTACGAGATATGAATAGTAATAAATTGGTGTCTTTATTAAAAC +AGTTTGGACTAGAGAATCGTATAGTTACCGATATGAATAAAATGCAAAAGATTTTGGAAA +CTCCCATTGATTATGCAGGTGTAAATAAAATAATAATGGAGGAAACAAAACGGTCTATTA +CTTATTTGACACAAAATATCAGATAATGAACTCTTTGTTAGTTACCTACATCTTGCAAAC +AGAGAAATGGACGTCTCCCGAACTTTTCGAAACGAAAAACAGAATTTACACCTTTCTCAA +CCCCGTATCTTACCTCATAGCTTTAGAAAACAAATCTTTATTTGAACAATTCGATGGTAT +TTTTGCCGATGGCTCTTTACTGGTTTCTGCCATAAGATTAGTATACGGAAAGCGAGTGAC +CAGACGTAGTTTTGATATGACCTCCCTTGCTCCCGAATTGTTGAATTACCTCATGGAAAG +CCGTAAAACGCTCTATATCGTAGCCTCGGGTCAAGAACAAGTGGAGTGTTCTGTGAGGAT +ATTTCAAGAGCAGTATCCGGAGTTACGCATTGCAGGGTTCAGGAACGGATATTTCTCTTC +GAATGCTGAAATGAATAAGGAAGCTTCTCATATAGTCGAATTAAATCCGGATTTTCTGAT +AGTAGGTATGGGGGCCTTGATGCAGGAAAAGTTTCTGCTTAAGGTAAAAAAGATGGGTTA +CCAAGGCATCGGCTTTACGTGTGGTGGCTTTGTTCACCAAACGGCAATGAATAGGATGCA +CTATTATCCGAACTGGGTTGATAAGATGAATTTGCGTTTCGTTTATAGAATGTATAAGGA +GAAACATACACGTACAAGATATCTGCAAGCTGCATTCTTGTTTCCGGTACGCTTTATCGG +GGAAAGAATTTTCGGATGAAAGAGCAGACTTTGCTTATTGCCATGTAAATAATGGAGAAT +GGCAACAACAGGACGAAGATAGCAGTAGGAATAATTTTTCCGAGTATTTTTAATACATCT +GTTTTCATGCTGCAAATATAAATTAAAATTCATAGAATAGTGAAATTAAGAACTCTAATA +TTTCTGGGGGGACTCTTTGGAGGAATGAATGTTATCAAAGGGCAGGCTTTTTATGGTACT +ACAGGACTACTTCATGCACCTACTGCCGTTATGCAGAAAGATAAAACCGTTATGCTGGGA +GGAAATATGCTTGATGTAAATATACTTTCCCGATATTGGGTGAGAAGTGAATATCATCCC +TATACCTATAACTATTATATAAATTGTACTTTGTTTCCTTGGCTGGAAGTAGCATATACT +TGTACGTTGGTAAAGGGGATACATGGTTCTTCTTATTGGCCTCAGCAAACTTGGGGCAGG +TTTACCAATCAGGACAGATCGTTTCATTTCCGGTTGAGGGCATGGAAAGAAGGCTGGTGG +AAGGCTTGGACGCCTCAGGTAGTGATTGGTGCGAATGATCCGGGAAGCCATTCTTCAAAT +GGAGGTGGAGACATAGATTGGGGGGGCGGTGGTAGCGGAAACCACAATTACTTGACACGT +TATTATCTGGCGGCAACGAAACATGTTGAATTTAGTGGAATAGGTACAGTGGGGGTTCAT +GTAGCTTGGGTTATCGGTAAAGCTATGAGTGATGTGCATTATAGCCGTCCTGCTGCAGGG +GTAAACTTTCATTTTGGCATGAAGGGCGAAGGTTTCTGGCAGAAAGCTCTGAATGGATTT +AATTTGATGGCGGAAGTGTGTCCCGGACATGCGGAAGATCTGCATACAGCAACTTATACT +GTGAATGTGGGTGGAACTTATTCAATATGGAAAGACCATATCAATCTTATTGCTGAACTG +AATGACGGTAAATTTTTTAGCGGAGGTATATTCTTTAAATTACATCTTAAATAACTGACT +TGATTTCTAGTGGATGAAGCATTATAAGAATAGCCGGAATCACTTGGAAAAAGTAGTTCC +GGCTATTCTTATATAACGATATTTGTATGTCCTGTTTTTATTATCCTACAATGTATTTAG +CATTCTTGCCGCTTATTTTACGAATGAATACCACAAGCAGCCACGATACGCCAAATGCCA +CGATTGCGGCAGCGGGAATTTGTAAAGGCACGGGAACACCGATATTGCGCATCAGGACTA +CACCCGGACCTGTGAAAAAATAATGTATCATATAGATGCCAAAACCACATACGGTCAAGT +TTGCCAGCATACGCTGGATCAGTTCTGAGCGTACTTTTACTTTCTTGGCTATGAGGAAGA +AGGGGATAGTCATCATAATCACATTCAAAGAATTATAGGTGAAGAACAGTTCCAACATTT +CATCAGTGCATTGTGGTAATGCGGTCATGTGGCGGAAACCGAAGAATGTGACCATGTAAC +CTGCCAGAAACATGGGGATACAGACTGACAGTGTCTTACTCCATGACCAGTCGTGGTTGC +GCAGATAGTGTCCCAATAGCAAGTACCCGTTGAATCCGGCAAAATAATAGAGCATATTGA +ACGAGTTCCACGCACAGCCTCCCCAAATGTATGGGGATACGAACTGATAATAATAAGGTA +ACAAGCCGGTTATTCCCCATGCGACAAGAAACCATAGTTTGGCTTTCTCCGAGGCCTTTT +CTACCCATGCGGAGAAAATAGGAAGGTATAGATATAATCCGATAAGCAGATAGATATACC +ACATATGTACGTCTAATAAAGAGAAATTGAGAGGCATTTCTGCTATATAACCCAATGATA +CGTTGAGTGACTGACGGGCGGCTTCTTCACCGCTATAAGGGAAAAAGTCAAGGATTATTT +CAGGATTAAGTCCCAACAATCCTGTAAACCAGGGAAATAAATTATAAATGACACTCCAGA +TAAGGAACGGCCAGAATACACGGCTGATTCTTTTTTTGTAGAAAGCGGATATTTCGCCAC +GTACGGGGAGCAGCAATGCTCCTGTAATCATGACAAACAGTGGGACACACGGACGGAGAA +AGGCTCCGTAGGCGGCTCCCCAGAATTTGATTTCATCAATGTTGGCTGGCGGTTCGCCGG +GGTAAAAGTTGAACGGGTCTGCACTATGGCAGCAAACTACGGTGAACATGGCTACCAAAC +GAACGACATCCAGCCATACCACATGTTTTCTTGTGGTGTTCATAGCAATAGGTTGATTCA +TAATTTAATTTTTGATTTGACAGGCACAAAGGTACGCTTTCTCTGAGGATTATTTCTTGC +TTTGGAACAAAAAATAAAAAGAACTATCCGCAATCATACTACAAAATGAGAATCCTCCCA +CTGCAAGAGTCACAATCATTTGTCTTTTGCAGGGGGAGGAATTCTATTTAGGGTTAGAGG +GTTATTTTAGTCCTCTGTTCTTCAACAAGGGATCAATACCGGGTTCTTTGCCACGGAAAT +TGACATACATGTCCATCGCATCGTCAATGCAGCCGGGAGTGAGGATATATTTGCGGAACT +TTCCTGCTACTTCTTGATTAAAGATATCGCCTGTTTCCTTGTATGCCTCGTATGCGTCAC +AATCCAGCACTTCTGCCCAAATATAACTATAGTAACCGGCAGTATATCCACCACCCATGG +TGTGGTTGAAATAAGTGGTGCGATAGCGTGACGGAATTTGTTTCAGCAATCCGCGCTTAC +CTAATGTTTCTGCTTCGAACTGCATAACGTCCATATCATCGGGAATTTCTTTTAATACAT +GATAATCCATATCCAGCAGGGAAGCAGCTAAATATTCAGCCGTAGCGAAGCCTTGTCCGT +ATTTGCCACTCTTGTCCATTTTTTCAACCAGTCCGGCAGGAATAACTTCACCTGTCTGAT +AATGTTTGGCATATACATTCAATACCTCCGGTTCAAAAGCCCAATGCTCATCTATCTGTG +ACGGCAGTTCTACAAAATCACGCGGAACACTAGCCACACCATAATAATGTACATCTTTGA +AAAGATTATGTAAAGCGTGTCCGAATTCGTGGAACAAGGTATTCGCTTCGTCTGCACTGA +GCAAAGCGGGTTGTCCGGCTGCCGGTTTGGTAAAGTTGCATACCACGGTTACTACAGGAG +CCACTTTCTTGCCGTCCTTGTAGGTCTGTGAACGATAGCTTCCGCACCATGCGCCACCTT +TCTTGCTGGCACGCGGAAAGAAGTCCATATAGAGTACACCTAGATGAGAACCGTCTTTGT +CCTTACATTCGAATGCCTGTGCATCGGGATGAGGGAGGGGAAGATTGTCCAGCTGGGTGA +AAGTGATACCGTACAGTTTGTTGGCAACATAGAACACACCGTCACGTACGTTTTCCAGTT +TCAGATAAGGACGTACCTGATTTTCATCCAAGTCGAATTTGGCTTTCTTGGCTCGGTCTG +CATAATAACGCCAGTCCCAACCTTCGGCTGTGAAGGTTTTTCCGTCTTTCTTTATTTCGG +CATTGATATCTGCCAGTTCTTCTTTGGCTTTGGACAGAGTAGGTGTCCAAATCTGATCTA +ATAATTTATATACGGCATCCGGTGTTTTTGCCATACGTTCTTCCAGTGCAAAGGAAGCAT +AGTTTTCATATCCCATCAGCTTGGCTTTTTCCAGACGGGCCTTGAGCAGTTTGCGCACCA +CTTCCTTGTTGTCGTATTCATTTCCGTTATTTCCTCTGTTAATGTATCCTTTGAATATCT +TTTCACGCAGGTCTCGGTTATCGGCATATTGCAGAAAAGGCATGACGCTGGGATTATGCA +GGGTGAAAATCCATTTGCCTTCCATGCCTGCTTCTTTAGCTGTTTCGGCAGCACTGGCAA +TCAGGTTTTGGGGTAATCCGGCTAAGTCTTCTTCTTTGTCCACGATCAATTGGAAAGCAT +TGGTTTCCTTTTGCATATTTTGGCCGAAGGTGAGCTGTAACATGGAAATCTCACTATTTA +ATTCCCGCAGTTTTTTTTGACTGTCGGCATCCAGGTTGGCACCTCCACGTACAAAATCTT +TATAGGTTTCTTCCAACAACTTCTTTTGCTCTTTGTCCAAATTGCCGGGATTTTCATAGA +CAGCTTTGATGCGTGCAAAAAGTTTTTCATTTAAGGCGATGTCATCGCTGTGTTTTGATT +GTAGTGGAGAGAGTTCACGGCTCAGTGCGTCCATTTCATCATTGGTATTGGCGCTTTTCA +GTCCGTAGAATACGGTGCTGACTTTACGAAGTAACGCTCCGCTTTGATCCAGTGCTGCAA +TGGTATTCTGGAAAGTGGCGGGTTCGGGGTTGTTCACTATAGCGTCAATCTCTTTCTGTT +GTTCTTCCATGCCCTGCAAAAAGGCGGGTTTGTAATGTTCCATCTTGATTTGGTCGAAAG +GAGGAACCTGGAAGGGAGTTGTGTACTCTGAAAGGAATGGATTTCCGGTAGGGGCTGCCT +GTTTCCCTTGCGAACCGCAACCGCTTAATATGGCAACCGCACAGGCTGCCAAAAGAATGT +GTTTCATAATAATTTGTTTTATATAGGTGATAATGCAAAGATAGTAAAATAAGGCAAATT +CCAATAAGTTTGAAGCGTAGAACTATTTGTTTGTTTTCATAGTCTGTCTAATTATCATAC +ATCATTGTCTAATAATTAGACAGACTGTTTCTTTTTGTTATTTTGCAATGTGTTGTTAAT +CAGTTATTTGTTTGCGTGGCACGGTTTTGGCATTTATATTATAGAGAACGAATTATAGAA +ACTTTAGTTTCTTATACACTTTATGGGAAGTAACGTTGTGAAACGGGACTTCTCTCTTTT +AAAATAATAATAATTAAAAACACGAAATATGAAACAGATTTATTATGTCATTCAGGCTTT +GATACATGGACGTGGAGCTAATATTATAAAAGTCGTTTCGTTAGGCTTGGGCCTGACTAT +GAGTATTCTGCTTTTCTCAAGAGTTGTGTATGAGCAGAGTTTTGATACTTGTTTTAAGGA +TCATGATAAATTATACCAATTGTGGAATATATGGACAGTAAATGGAGAGCCTTTTCCTCC +GAGTGAGTTCATTATCGGTGCAGCTGCCGGAGGCATACTGGATGCAATGCCGGAAATTGT +GGAATCGGCGGCAAGTACAGGCTCGTGGCCGGTTTCGGCACCTATTTATAACGGGAGTGT +CCGCTTTGATGATTTTAAAGTTGCTGCTGATTCATTGTTTTTTCAGACAATGGGTATTGA +GGTCTTGAGTGGTGATCCGGTACGTGAATTGCAGCAAAAAGATGTAATTTTCCTGAGTAA +AGATTTGGCGGATAAAATGTTTGGCGGTGAGAACCCGATTGGTAAAATAATTAGTTTCAA +TAAGGAAATTGAGTTGACAGTCAAAGGAACGTATGCTGCTTTACCTGAGAATTGTACTAT +GCGACCGAAGGCTGTCATTTCCTTGCCTTCTATCTGGAGCCGCCGGATAGGAAATTATTC +ATGGAACGGTGGGGACAGCTGGAAGGAATATATCCGGTTGAAGCAGGATATTGATTTGGA +TGAATTGAACAAGCGTATTGATATGGTAGTACAGCAACATATTCCCAGAAGTGATAAGTT +GGGAATCACTGTCATGGCAAAACCTGTTCGGGATACGTACCGTGGATATGATGAAGTGAA +AAGAATGCGTAATATTATGCTTATTTTAGGTATATCTATTTTGTTTATCACGACATTGAA +TTATGTGTTGATTTCTATTTCTTCATTGAGCCGGCGTGCCAAAAGTATTGGTGTTCATAA +ATGCAGCGGAGCGGGAACGGGTACTGTTTTTGGTATGTTTATGTGGGAAACCGGTATTAT +TATTTTGCTTTCATTGTTTTTGATGGTTTTTTTAATGTTTAATTTCCGGGAATTTGTGGA +AGATACTACAGCTGCTAAACTGGAGTCTCTTTTTGCGGTGGAACGTATTTGGGTGCCTTT +TGGAGTGACTGCTGTCTTATTCCTCATTGGTGGTGTGCTGCCGGGACGAATTTTTTCAAA +GATACCGGTCACACAGGTTTTCCGGCGTTATACAGAGGGTAAGAAAGGATGGAAACGTCC +GTTATTGTTTATCCAATTTGCAGGAGTGGCTTTTATATGTGGATTGATGTGTGTAGTGAT +GCTCCAGTATCATTATGTTATTAATAAAGATCCGGGTTATAATCCTGAGCGGGTAGTGAT +TGGCGTTAATAATGCACCGGATGCTAAAGCCCGGCTTGCTGCCCGTCACTTTTATGAGGG +GCTTCCATACGTGGAAGCATTGACTTCGGCAACTAGCTATCCGTCTAATGGATATAGCGG +GCAGATGATACCGGATGAGAAAGGAACGAGTCTTTTTTCCAGCCGTTATGATTTTACTCA +GGAGAATTATGTAGCTTTTATGGGAATGGTCATACAGCAAGGGCGTGTGCCTCGTGAATC +CGGTGAGGTGGCAGTGAATGAGGAATTTGTACGGAGAATGCATTGGGGAAAAGATGTTTT +AGGTAAGAGTATACAGACAGAAGAAGGCCGTGTCAAAATAGTAGGTGTGATTAAAGATTT +TAATATTGGCGGATTCTACTCAGAGCTGAAACCGTTTGTCCTGCATCATCATCCCAAAGA +TTTGGCTGATCTTGTTTATCTCCGTCTGAAGGAACCCTTTGGTGAGAATCTTCAGAAACT +GAAGCGTGATGCTGCCGAAGCTTTTCCTAATCAAACAGTCGGCTTTGAAAGCTTGGAACA +AAAAATGGCGGATAGTTATAATTCCGTTCGTGTATTTCGTAATGCGACTTTATTGGCGGC +AATTGCCATCTTGTTTATTACTTTGATGGGACTGATAGGCTATATTAATGATGAATTACA +GCGACGTTCGAAAGAAATTGCTATTCGTAAAGTGAATGGAGCTGAGTCATTTGCGATATT +GGAAATGTTGGTGCAAGATGTTCTTTGGATTTCTTTCCCGGCTGTAGTCGCAGGTACACT +AGGGGCATGGTATGTAGGTGGTTTATGGATGGAACAGTTTGCTGTGACTGTAGGTTCGCT +TGTTCCTTATTATGTATGTGTAGCTATCGTGGTCTTGATATTGATTGTCAGTTGTGTCAT +TTCCAAAACATGGCGCATTGCCAATGAGAATCCAGTAAAGAGTATTAAATCAGAATAAAG +ATCAATAGAACAAACAATTAAAATTTATTATTATGATACAGATTGAAAATTTAAGTAAGG +TATTTCGTACTTCAGAAGTAGAAACAATTGCGTTGAATCATGTAAACATAGAGGTGAAAG +AAGGAGAGTTTGTAGCTATTATGGGACCATCAGGTTGTGGAAAATCCACCTTATTGAATA +TCCTTGGTCTGCTGGACAATCCTACTGAAGGCAGTTACAAGTTGCTGGGCAACGAAGTAG +CCAATCTGAAAGAAAAAGAACGCACCCGCCTGCGTAAAGGTGTAATAGGTTTTGTATTTC +AAAGCTTTAATCTGATAGACGAACTGAATGTTTTTGAAAATGTGGAATTACCGTTGACTT +ATCTTGGAATCAAGGCCAGTGAACGTAAGGAGCGTGTTCTGGAAATTTTAAAGCGAATGA +ACTTGAGTCATCGTGCCAAGCATTTCCCGCAGCAGCTTTCCGGCGGTCAACAACAGCGTG +TGGCTATTGCCCGTGCTGTGGTTACTAACCCTAAATTGATCCTCGCCGATGAGCCTACCG +GTAATTTGGACTCAAAGAACGGTGCTGAAGTAATGAATCTATTGACCGAACTTAACAAGG +AGGGGACTAGTATTGTCATGGTAACTCACTCTCAGCATGATGCCGGATTTGCTCATCGTG +TCATTCATTTGTTTGACGGTAGTGTAGTGGCGAACATTAAAGAATAATAATGAAAGGGTG +GTTGTAATGAAACAAATCTATTATGCAATACAATCGACATTACACGGCCGTGGAAGCAAT +GTGACAAAAATCATATCGCTTTCATTAGGCTTGACTATTGGTGTTCTGCTTTTTTCACAA +ATAGCATTTGAACTCAATTATGAAAAATGTTACCCTGATGCCGACCGATTGGTGTTGGTA +CGTGGAGGAGGAGAGAATGTAAAGACTGGAGAGAAAGGAGAAGGATATGATGATAGCTTG +TTTGCTCCGATGGCAGAAGCATTCCGCAGTGATTTGTCACAATGGATTGAAAATGCTACA +GTCATTTTCAATTTTGAGACATTAAATGTATTCAAGGATGGTCATAAGCTGAAAGACGTA +AACTACGCTTATGTAGATACTTGCTATTTCCGTACTTTCGGCATCAAAGTATTGAAAGGG +AATCCTGAGGAATTGCAAAGGGCAGGGAGCATATTCGTTTCTGAAACTTTTGTTCGTGAT +GTCTTTGGTGGACAGGACCCTGTGGGACAAAAATTATCTTTGGATAAACAGCATGAACTG +ACAGTCCGTGGTGTTTATCAGGATACTCCGGAAAATACGGCCTATCATTTTGATTTTGTA +GCTCCCATTTATGCCGGAGGTGGTTATATAGGGGGTGGAACATGGGGGCGTAATGACATT +TATTATACTATTCTCCGTTTGCGCGATGGGGTGGACAGGGAAGAAATCAATCGTCAGATA +TACAAGGCTATGCAAAAATATTATCCTGATTCTGCTGATGATGAATGGAGAAATTTTTAT +GATGCACAGCCATTGCCCGAAATTCATCTGGATGACTCAAATACCCGCACTCGCCTTTAC +ATTTATGGTTTTTTGGGCTTTGCCATATTTTTTGTGGCAATCATGAACTATGTATTAGTA +GCTATTGCTACCATGAGCCGCAGGGCAAAAAGTATAGGTGTGCATAAGTGTAGTGGTGCC +AGTGCAATCAATATTTTCAGTATGTTCCTTTTTGAGACGGGGATTGTGGTATTGATGTCC +GTTATAGTGGCCCTCTTTATTATTTTCAATACCAAAGATCTGATAGAAGATTTACTTTCG +GTGCAGTTATCCTCACTCTTTACATTGGAAACTTTATGGGTTCCGATGCTCATTGTTTTT +GTACTGTTCATAGTAGCCGGAGTGCTGCCGGGAAGATTGTTTTCACGCATTCCTGTCACT +CAAGTTTTTCGTCGTTACACAGATGGGAAAAAGGGGTGGAAGCGTTCTCTTTTATTTGTA +CAATTTATGGGGGTATCTTTTGTGATGGGGATTTTGTTAGTATCTTTGATGCAGTATCAT +CATCTGATTAATAGTGATATGGGTATCCGTACTCCAGGATTGGTGGAGGCGGAAACATGG +ATGTCACCCGAAGAAGCAGAGAATATGGTTAACGATCTTCGTCGTCAACCCATGGTGGAA +AATGCCACCCGTTCTATGCATGGTGTTCTGGGAGAATATTGGACTCGTGGGCTGATTGAT +AATAGTGGCAAGCGAATAGAAACCTTGATGTATAATCCTTGTGATAAGAATTATGCTGAA +ACCATGGGGATTACTATTATTGAAGGGAAAGATATGCAGAATGAGGGAGATGTATTGGTC +AACGAAGAAGTAGTGCGGCTGATGAAATGGACAGATGGGGCAGTAGGCAAACGGTTGAAT +GATTTTGATAAAGCCGGAACCATTGTTGGTGTATTCCGTAATGTGCGCAATACGTCTTTT +CTTTATAAGCAGTTTCCCGTGGCTTTGGTTTATAGTCATAATACCAGTCACACTTTTGAT +GTACGTTTAAAACAGCCATACGATGAGAGTTTGAAAAAGCTGAACGAATACATGGAGCAG +GTGCATTCCACTAAGGCATTGGAATTTATTCCGATAGATACCATGCTGAAGGAAATCTAT +CGCAATGTATATCGTTTTCGTAATTCTGTATGGATTACTTCTACTTTTATTTTGCTTATT +GTTGTTATGGGGTTGATAGGTTACGTGAATGATGAAACACAACGTCGCAGTAAGGAAATA +GCTATCCGTAAGGTGAATGGTGCTGAAGCTTCGACTATTCTTCGGTTGCTGTCCCGTGAT +ATATTATATGTTGCTGTCCCATCGGTCTTGATAGGAATTGTAGTATCGTACTTTACAGGA +AAGGCTTGGTTGGATCAATTTGCCGAAACGATAGATATGAATGCGTTGTATTTTGTCGGA +ACAGCATTGGTGATTATCGCTCTGATAGTGGTGTGTGTCGTTGTTAGAGCTTGGCGTATT +GCTAATGAGAATCCGGTAAAGAGTATTAAAATTGAGTAACTAAATAGATAGCGATGAAGC +AATTATATTATGCAATACAAACGATATTACACGGTCGTGGAAGTAATGTGACGAAAGTCA +TATCGCTTTCATTGGGGCTGACTGTTGGCATCCTGTTGTTCTCGCAGATTGCATTTGAGT +TAAGTTATGAAAAATGTTATCCTGAAGCTGGAAATCTAGGAATAGTCCGCGCATATTACC +ATAATTTGGAAACAGGTGAGTCGATGGGGGACGATGGGGATATCTATGATTATTCCGTTT +TTGCTCCGATAGCGGCGGCTTTGGCTGAGAATATGCCAATAGAAGTGGAAAAAGCTACTT +GTATAAATTCTTACACAGCCAACGGTAATTATTATTATGAAAATCAATTGTTGTCGGATG +ACGAACAGTGTCTTATGGTGGATACTTGTTTTTTCCAAACCTTTGGCATTCCGGTTTTAA +AGGGAAATCCTAACGATTTGATTAACTCCAATACTTTGTTTGTGTCCGAGAGTTTTGCAC +GCCGTTTTTTGGGTGATGCAGATCCTATAGGTAAAGTGTTAATGTTGGAGCGGAAAACTG +AAATGATTGTCCGTGGTGTTTATCGTGATATTCCGGAGAATACCATGTTTAAGGCAGACT +TTGTAGTAAGTGTACATAAAGAAGGAGGTTATAAGGATGGAGCCGGATGGGGAGGAAATG +ATATTTTTTATGCAGTGTTCCGGACCGGAGAAGCTTCGGATATAGAGGTGGTAAACGATA +ACATCCAGCGGATGGTGGAGAAATACATGCCGACTGAACATAACGGTTGGAAACTGGAAC +TCAGTGTCATTCCATTGGCAACAAAGCATCAGACTTCTTCAGAAACAACAAAGCGTCTAG +TTATCTATGGCTTTTTGGGTTTCTCTATCTTCTTTGTCGCTATTATGAACTATATGCTTA +TTGTAATTGCTACTTTGGGACGTCGTGCCAAGAGTGTCGGTGTCCATAAGTGCAATGGTG +CCAATAATGGGAATATCTTCAGTATGTTTATGATGGAAACGGGCATTATTGTTCTGGTAT +CCATTCTGGTCTGTGCATTTATCATTTTCAATGCGCGGGATTTGATAGAAGATTTGCTGT +CGGTGCGACTTGCATCACTCTTTACATGGGGAACTTTGTGGGTACCGCTGCTGGTTGTTG +TACTTTTATTTTTGTTGGCAGGTGTATTGCCCGGACGTATTTTTTCACATATTCCTGTCA +CACAGGTATTCCGTCGTTATACTGATGGTAAAAAAGGATGGAAACGGTCACTGTTGTTTA +TACAGTTTACCGGTGTGTCTTTTGTCTTGGGATTATTACTGGTTACGTTAATGCAGTATA +GTCATTTGATGAACAGGGATATGGGGATCAATACTGCCGGACTGGTAGAAGCCGAAAGTT +GGCTGGACATTGAAATTGTTCCTCATATGCGCGATGAGATCCGGCGTCAGCCTATGGTAG +AGAGCGTGGCTACAGCCAGCCATAGTGTATTGGGCCAATATTGGACAAAAGGATTGATGG +GCAGTGACGGCAAGCGTATAGGTGTTCTTAATATGAACTATGTGGATTTTAATTATCCGG +ATGTGGTAGGCATTACGATTATAGAAGGAAAGCCGATGACCCATGCCAAAGATCTGCTGG +TGAATGAAGAGATAGTCCGGCTTAAGAAATGGACGGATGGAGCCGTAGGAAAACCTTTTG +ATGAGATTAAGGAAGGAACTATAGTAGGTGTGTTCCGTGATGTGCGCAATCAAAGTTTCT +ATCAGGCGCAGCAACCCATTGTGCTGATAGGTGGTAATGCGTTTAATCATACCTTCAATG +TACGGTTGAAAGAACCCTACGATGAAAATTTGAAGCGCCTGAATGAATTCATGGACAAAA +CTTTTCCACAAGTGGCTTTAACATTCCATACAGTGGATG +>NODE_13_length_39994_cov_62.9723_ID_25 +GCCGGGCCTGCCACCCGTCGAAAGTCCGGTTCTCCGGGAGAACGGCCTCAGGCCGGGCGC +GGCACCTTGGCCAGCGCCGGCGCCGGCGCCGGCATCAGCCGGCGCACCAGCCACTTGCGC +ACCGGCACCACGCCGCGCTCCTGGAACAGCACGCAGAACGCCACCAGCAGGACCAGGAAC +AGCGGGTAGTAGAGCAGCGACGATGGCAGCGCGGCCGCCGCCTCGGTGCAGGCGCGCCAG +TCGTCGAGACAACGCCCCGGCACGGCGATAAGGCGCTCGCTACGGGAGAACAGGGTGAAC +ATCGGCACATGCAGGGCGAACAGCGACAGCGAGGCGGCCCCCAGGCGCGGCGACCAGTGG +CGGACGAAAGCGCTCTTCGGGTCGGCGGGCAGCGCGCAAAGGCACACCAGCAGCAACTGG +GCCGGTAGCAGCAGGCCGTTGTGCAGCAGGAAGTACCAATGTTTGGCGCCCTGGGTGAAC +AGGTAGTCGGCCACCAGGAAGGCCGCCACCACCAGCGCCACCAGCGCGGCCAGCAGGCCG +CGTCCCGGCACCATGCCACGGTCGCGGTAGCCGCGGAACAACCCGTAGGCCAGGATGCCG +CCAAGGAATTCCGGCAGGCGCAGCAACGGATTGCGGTGCAGCAGGCCGGTCCAGGGGATC +CCGTAGCTCTCGCTGGCCACTACCAGCGCCGGCGGGATCAGGTAGACCAGCCAGACCAGC +GCCAGCACCTTCCACTTGTGCCGGCTGCGCATCAGCCGCGGGGCGACGAAGGGAAACGCC +AGGTAGAAGAACATCAGCGTCGACAGCGACCACAACGGCGCGTTGAAGGTCAGGTAATAG +GGATTCCAGGCCTGCAGCAGGAGGATCTGCAGGATGCTGTTGACGAACAGTTCGGTGTTG +CTCATCCAGTGCAGGAAGAGTTCCGGGTGGGTGCGGCCGAGCACTTCGTTGGTGTCGTAG +ATGACGAAGCGCGGGGTGGCCTTGTCCAGCTCGGGACCGATTCCCAGGTGGCTGAGGGCC +AGCAACACCAGGATCGACAGCAGCAGGGAGAACAGGTGCAACGGATAGAGATTGGACAGT +CGCTTGGTCCAGAAACTGCGGGCGCTTTCGCGCAACCGGCCGCTGCGTGCGTAGACATGG +GCCAGGAGGAAGCCGGAAAGCACGAAGAAGGTACTGGTGGCGAAGAACCCCAGGCTGGTC +AGTTCGTCGAGACCGGGAAACTTCTGCTCTTTCGGATAGGTATGCAGGGTGTGGTACACC +ACCACGTACAGGCCGAGCAGGAAGCGCAACCATTCCAGTCCGATGAAACGTTCCTTGGAG +TCGGGCGTTGCCACGCATCTCTCCTTTGTCATTGTAATGGGCGGTCCGGGGTAGACCCGC +AAGCTCCCCGGCAAGTTTCACCGGGCCGGCCAGCGGCTACGAACGGCCGCTCCCTAGCGC +CGGCGGTCGAGGAAATTCACCACTACCCGGTCAAGCCAACCCCACAGGCGCTGGTGCAGG +CGCATCCACAGGGGTCGCGCGCGCCACATCGCCAGGGTCACCTCGCGGCTGTCGGCGAAG +TCCGCCAGCAGGCTCGCCGCCGCCTCATTGGTGAAGTCCGGGTCGAGGGCCTCGAGATTG +GCGTCGAGGTTGAAGCGTAGATTCCAATGATCGAAGTTGCAGGAGCCGACGCTCACCCAA +TCGTCGACCATGACCATCTTCAGGTGCAGGAAGCGCGGCTGGTATTCGTGGATGCGCACG +CCGGCGCGCAGCAGGCGCGGGTAGTAGCGCTGCCCGGCATAGCGCACCGGGGCGTGGTCG +GTGAGCCGCCCGGCCAGCAGCAGGCGCACCTCCACGCCGCGCTGGGCGGCCTTGCGCAGG +GCCCGGCGAACCTTCCAGGTCGGCAGGAAGTACGGCGTCGCCAGCCAGATCCGTCGGCGC +GAGCCGTTCAGCGCGCGGACCAGCGATTGCAGGATGTCGCGGTGCTGGCGGGCGTCGGCG +TAGGCCACCCGGCCCAACCCACGGGCCGCCCCCGGCTGCGGCGGAAGGCGGGTAAGGGTC +ATGCCCTCGCGGGGCTTCCAGGCTTTCTCTTCGAGACAGGCCAGCCATTGCCGCTCGAAC +AGCGCCGCCCAGTCCGCCACCACCGGGCCGTCCATCTCGACCATCACCTCGCGCCAGGCG +CTGACGTCGCTGACCGGTTCCCAGAATTCGTCGGTGATCCCGGCGCCGCCGACATAACCG +AGGCGCCCGTCCACCAGCAGCAGCTTGCGATGGTCGCGGTAGAGATTGCCGCCAGTCAGC +TTCCAGCGCAGCGGGTTGTACAGCCGCAGTTCGCCGCCGGCCTCGCGCAGGCGCTGGATC +CAGGCGCTGCCCAGGCCGAGGCAGCCGAAGCCGTCGAACAGGCAGCGCACGGCGACGCCC +CGGCGGCGGGCATCCAGCAGGGCGACGAGAAACAGTTCGGCGCAATGCCCGTCTTCCACC +AGGTACAGCTCCAGCTCGATGCTCCGCTGCGCCGCCTCGATGGCCAGCAGCATGCGCGGG +AAGAACCGCACGCCGTCATTGAGCAGGGCGAAGCGGTTGTCGCTGCGCCAGGGAAAGACC +GGGCCGCTCACCGCGAAGTGAAGATCAGCACGGCGCCGACCGGCACCGAGAGCCCGATGC +TGCCGGCGCCGGCCAGCTTGCGCAGGGTCTCGATGCCGGGGGCGAGGTCGAAGTCGGCGG +CGTCCAGCACCAGCGGCGACAGGGTCACCACCTGGAAGCGATGCTCGTCCAGGCGGGTCA +CCAACAGGTCGCTGGCGTAGCTCTTGCGCTTGCCGTGCAGGCTGACGGTCACCGGCTGGC +GCATTTCCAGTTGCACGCCGTCGGCCAGCTCGACGATCGGCGCCAGGTCGAGCTGGGCGG +TGATCTCCGCCTCGGCGAAGCGCTTGGTCTCGAACAGATGCTCGCGGATGCGTTCGTCGC +GCAGCGGGATGTCGGTGCTCACCGAGTCCAGCTCGACGCGCAGGCGCGCCTGCCCCTTGT +CGTCGACCTTGCCGTGCATGACCAGGAAGCGGCCGACTTCCGAAAGGGTAGCGTTCTTGG +TGGAGATGAAACTGAGTCGCGACGACTCGTAGTCCAGGTACCAGGCGGCCTGGACCGGCA +AGGCCAGGAGGGCCAGGCCGAGGCAGATCAACTTGTGCATGCGGGCTCCCTTTGCGTCCG +GCGCCACCTTAAACCGGCCGCCTGGGAGCAGGCAAACTCCCGCGTCAGTCGAGCGCGTGC +TGCAACTGACTGACGGTATCGGCCAGGGTTCCCAGGTGCCGGCGGCGCACCCGTTCGACG +CAATCCTGGTCGCGCGGCCAGTGCAGCGCCACGCTGCCGAGCAGGCGACCCTGGGCGCGT +ACCGGCAAGGCCACCGCGCGGATCAGGAACGGCAGGCGCACCGGATACTCCCAGTGTCCC +TCGGTACGCTCGCCGTAGCCCTGGCCGTGGGCCTGCTCGCGGGTGCGCAACCAGAGGTCC +TCGTCTACCTGGTGCTGGCGCGCCAGGCGGCGCACCTCGCTGTCTGCCAGTTCGCCCAGG +CAGGCCTTGCCCATGGCCGAATGGAACAGGCTGGCGTGCTGGCCGACCACCAGCCGGTTG +GCCGGGTAGCGCTTGCGCAGGACGCCGGGCACCGAGCTTTCCGCCACCAGCAGGCGGTCG +CCGTCGAGAATCGACAAATCGGCGACCAGCCCGGTGCGCTCGCTCAACTCCACCAGCAGC +GGCGCGGAACGCTCCACCACCTGGCAACGGAAGCGCTCGGCGGCGTCACCGTAGAGGCGG +GTGGAACGCAGCCGGTAGCGCCGGTCGGTCAAGCCGCGGTACAGCCAGCCCTGCTCCAGC +AGGGTATGCAGCAGCCGCGACACCGTGGCCTTGGGCAGTTCGGTCAGGTAATGCAGTTCC +TCGAGCCCCAGCGCCTGGTGCTGGCCCAGCAGATCGAGGATCGCCAGCGCCCGCTCCACC +GAACGCACGGTTCCCGCCTCTTCTCTTTCCGGCATGACGACACCTCCACCGTCGACAGAC +CGTTATGGCAAAGCAAGATGCAGGCCCAGGTCATGTAGGAAGAATCTGCGAATACGCCAG +CATTGCTCCGCGGCCTAGTCGATGCCCCCTGGAGCCTCGAGCATCGCCGGCGGACGGCGG +CTGACCCAACGGGTCAGGCCCTCGTGGGCATGGGCCAGTTGCAGCACCGCCAGGTCGGCC +TGCGCCGGGCCGATTATCTGCAATCCCATCGGCAGGCCCGCCGCGCCGAAGCCGATCGGT +ACGCTGATCGCCGGCAAACCGGCCAGGGTCGGGCCGATCACCACCTCCATCCAGCGGTGA +TAGGTGTCCATCGGCCGCCCGGCGACCTGCCGCGGCCACGCCGTTTCTGCATCGAAAGGA +AACACCTGGGCGCTGGGCAGCAAGAGGAAATCGTAACGTTCGAACAGACGCGCCAGCGCC +CGATACCAGTCGCTGCGATCCAGCGAGGCGCGATAGACCTCGGTGGCGCCGAGGCCGAGC +CCGGACTCCACTTCCCACTGCGCCTCCGGCTTGAGCCGGACGCGCCGTGCGGGATCGGCG +TAAAGCTCGCCAAGCGAGCCCTGCACCAGCCACTGGCGATGGACCAGCCAGGTGCGCCAC +AGGCGCTCCAGGGGATAGTCCGGCAGGCACGCCTCGACCTCGCAGCCCAGCTCGGCGAAA +TCGCCCAGCGCGGCTTCGCAAAGCTCCAGCACGCCCTCCTCCATCGGCAGGTAGCCGGCG +TAGTCACCGAGCCAGCCGAGCCGGGCTCTGCGGAAATCGCGCCCGAGGTCGTCGGCGAAC +CTGCGCGGATCGTCGCGCAACGACAGCGGACAGCGCGGATCGTAGCCGGCCTGGGTGGCC +AGCAGCCGGGCCAGGTCCGCCACGCTGCGCCCCATCGGGCCTTCGGTGGCCAGTTGCTGG +ACGAACAGTTCCGCTTGCGGGCCGTGCGGCACCCGTCCCTGGGACGGGCGGAAGCCGTAG +ACGTTGTTGTAGGCGGCGGGGTTGCGCAGCGAACCCATCATGTCGCTGCCGTCGGCCACC +GGCAGCATGCGCAGCGCCAGCGCCACCGCCGCCCCACCGCTGCTGCCGCCGGCGATCCGC +GCCGGATCGTAGGCATTGCGGGTGGTGCCGAACAGCGGGTTGTAGGTCTGCGAGCCGAGC +CCGAACTCCGGTACGTTGGTCTTGCCGATGACGATCGCACCGCTGCTCTTCACCCGCTCG +ACGACGATGGCGTCGTGCTCCGGCACCTGCCCGGCGAACAGCGGCGAACCCAGGGTGGTA +GGAATGCCGGAGGTGGCGGCGAGGTCCTTGATCGCCTGCGGCATGCCGTGCATCCAGCCC +AGCCATTCGCCGCGCGCCAGTTGCCGGTCGCGCTCGTCGGCCTGGGCGAGCAGGCGGCTT +TCCGCCTGCAGCGAGACGATGGCGTTGACCCGCGGGTTGAACCGTTCGATCTGCGCCAGG +TAGGCCTGCATGACCTCGCGGCAGGACAGTTCGCGTCGGCGGATCGCCGCGGACAGCGGC +AGCGCGTCAAGCGCGACGATTTCCTCGTGGCCGTCGCTTCGATGGCGGGTGGCGCTCATT +CACAGACTCCTTGCCGGCTTGCCCAGGGCCGGTTCACGGGGGCGCGGGGCGGGCGTCGCG +CCCTCCCGCAGATAGAGGGTGGCGAGCAGCCCGAGGAAGGCCGCGCCGAGCACGTAGAAC +GCCGGCGCCACTGGCGAGCCGCCGACCTCGGTGAGCCAGGTGACGATGAACGGAGCGAAA +CCGCCGAACAGCATCACCGCCAGGTTATACGCCACCGCCAGGCCGGTGGAACGCACCCGC +ACCGGAAACTGCTCGGCCACCGCGGTCGGCGCCGGGCCGAAGAAGCCGCCGATCGCCGTG +CACAGCAACAGTTGCATCAGCAGCAGGCGCCCCAGGCTCGGCGCCGCGGCGACCCAGGCG +AACAGCGGATAGACCATGAGCATGAAGGCCAGGGTCGCGACCAGCAGCACCGGGCGCCGC +CCGACCCGGTCGGAGAGTCCCCCAGCCAACGGGATCACCAGGGTCATCAGCGCCACCGCA +GCCATCTGCACCATGAACACCTCGTCCAGCGGCAGGCCCAACTGCTTGTGGGCGAAGGTC +GGCATATTCACCAGCACCACGTAGAAGGACACCGTGCCGATCACCGTCAGGCCCATGCTC +ACCAGCACGCTGCGCCGGTATTCGCGCAGCACGCCGAGCAGCCCCTGGCGCTCGCCGGGC +TCACGGATGGCTTCGAGGAAGGCCTCGGTCTCGCCCATGTAGCGGCGGATCCACAATCCC +ACCGGACCGATCAGCAGACCGAACAGGAACGGCACCCGCCAGCCCCAGCTCTCCAGGGAC +TCGGCGTCCAGGCAATGGGTGACCAGCGCGCCCATCCCCGCACCGGCGAACACCGCCAGG +CACTGGCCGAACAATTGCCAGGAACCGTAGAGACCGCGCCGGTGCGGCGGCGCGCTTTCC +ACCAGGAAAGCCGTGGCGCTGGCGTACTCGCCGCCGGTGGCGAAACCCTGCAGCATTCGC +GCGATCACGATCAGCAACGGTGCGCCGACGCCGATCGCCGCGTAGGTCGGGGCGAAGGCG +ATCATCGCGATGGACAGCGTCATCAGCAGGATGATCAGTTGCATCGCCGCCTTGCGTCCC +CTGCGGTCGGCATACAGGCCGAGCAGGACCCCACCGACCGGACGCATGAAGAAACCGACA +CCGAAGGTGGCCAGGGCCATCAACAGCGAGGTGTACTCGTCGCCGGAAGGGAAGAACAGC +CTGGCGATCAGGCTGGAAAGGAAGCCGTAGACGATGAAGTCGTACCACTCCAGGGCGTTG +CCGATCACGGCAGCCGCCACCTGGCGGGTACGCGAGGTACCGCTCGGGCAAGCGTGCATG +GGAATCTCCTGGTAACCGGGGGAATGCCGGCGCGGCTCAGGCCGCGCCGTTATTGGCGGC +AGTCAGTCGAAGGCCGGACGCCGCGGGGGCGGCCAGCCAGGTCTCGGCGAGCGCGCCCCA +ATAGGCCGCGCCCCGGACCAGGATGTCGTCGTTGAAGTCATAGGCCGGGTTGTGCACCAT +CGGCCGTCCTCTGCCGTTGCCGATGAACAGGTAGCTGCCGGGGCAGCGCTGTAGCATCCA +GGCGAAGTCCTCGCTGCCCATCAGCTTCGGCGTGTCGCCGTCGACCTGCTCCGCGCCGGC +CAGCTCGACCCCGACCTGGCGGGCGAACTCGGTTTCCTCGACGCTGTTGACCAGCACCGG +ATAGGCCGGGTAGTGCTCGATGCTGGCCTGGCACCCATAGCTGGCGGCCTGCAACTCGAT +GATCTGGCGAACCCGCTGCAGCACCTGCTCGCGCACCTGGCCGTCGAGCGCCCGCAGGCT +CAGGCGCAGCACGGCGCGCTGGGGTATGACGTTGGCCGCCTCGCCGGCCTGCAATGCGCC +GACCGTGACCACCGCGGCCTTCTGCGGATCGACGTTGCGCGCCACCACGCTCTGCAGGGC +CATCACCGCGCTCGACGCGGCGAGCAACGGATCGACGCTCAGGTGCGGCATCGAACCGTG +TCCACCGACGCCCTCCAGGGTGACGCTCAGCAGGTCCTGGGAAGCCATCATCGGTCCCGC +GCGGAAGCCCAGGTGCCCGGCCTCCAGTCCCGGCATGTTGTGCATGCCGAACAGCGCATC +GCAGGGAAAGCGCTCCAGCAGCCCGTCGGCGAGCATCGCCTCGGCGCCGCCCTGGCCTTC +CTCGGCGGGCTGGAAGATCAGTACCAGGGTGCCGTCGAAACGCCGCGTCGCCGCCAGGTA +GCGTGCCGCGCCGAGCAGCATCGCGGTGTGTCCGTCGTGGCCGCAGGCGTGCATGCGCCC +GCCGTGGCAGCTGCTGTAGCCGAGCCCCGTGGCCTCGACGATCGGCAACGCATCCATGTC +CGCCCGCAGGCCGAGCCGCCGCGTGCCGTCGCCCTGGCGGAGGACGCCGACCACGCCGGT +GCGGCCGATCCCTTCGTGCACTTCGTAGCCCCAGCCGCGCAGGCATTCGGCGACCAGCGC +GGCGGTGCGTCGCTCCTCGAAGCCCAGTTCGGGATGGGCGTGGATGTCCTGGCGCAGGCT +ACGCAGGTCGTCGGCGACCTCCTCGAGCCAGGCCACGATGTGTCGATGTCGAGCCATGCC +GTCTCTCCTCTGGCCGGATTGCGCTCCGGTCTTGTTGGCGGGAGGCCCGGACGCAAGGCC +GGCCGGGCGTGGTCACAGCTAAGCGCGGCACGCGGCGAAGGACAACGGAAGGCGGTTCCA +CTGGGTGGAACCCAAGGCCCGCGCCGCCCTCCGCCGGTACGTCAGAACTGTTCCGGCGGC +ACTTCGCGGAAGTTCAGCACGGCGAACACCGGTACCTCGGGAGTGATCGAGGGAATATTG +GCCATCTCCTTCAAGGGCTTGAGCTTCTCTTCCAGGTCGAAGTCGAGCAGCTTCAGCAGC +ACCGGCTCCAGGGTCGCCACCTGGACCAGCCCTTCGCCCTGGCGGGTGACCAGCACGTCG +GCCTTCAGGCGGCGCTGCTGGCCGTGCAGGTCGAGGGTGAAGTCGAGCTTCTCGACCCGC +GACTGGCCGACCCGCAGGTCGTCGAAGCCGGCCATGTCCAGGCGGCTCTCCACGGTGGCC +TCGGGGAAGCGCGAGGTTTCGAAGAAGTTGTCCTTCATTCGCTCGTCGCGCAGGGCCAGG +CCGCTGTCCAGGGTGCCCAGCGGCACCACGATGCGCGCGACGCCCTGCTTGTCGATCTGG +CCGCTGAGACGATCGAAGCGCTGCACCTCGGCCATCTTCGCGCGTTTCACGGAAACGAAG +CTGATCCGCGAGTAATCCGGCTCCAGTTGCCATTCCGCCGCCGACAGCAGAGGGCTGAGG +AGCAACAACAGGACACACAATACAACAGGCATTCCACCTCCGGCTTCGGGCCGGCGCCGT +CCCTCGCCCCGCGCCCCGACATGCGCCGCCGGTCAGTCCGGCAGCCTGTCCGCCTCGGGC +ATGAGCCGGCATCCCTCGCGCGGGCCCAGCCAGGCGGCCGTCTGCGTACTGCCGAGGCCG +CGGGCGGTCACCCGGCGGCGGGCTTCGTCGTCGAAGAAGCCGCTGATCGGCACGTACTGT +TGGGTATAGGCCCGGCAATAGTCCGCGGAGTTCCCCATCACGTAGCGGCAGGAGCAGTAC +TCCTTGGCCGTATAGGCGCCGATGATGGCGGGAAAGGCCGCCAGGTGGATGCGGTTCTGC +CAGGCCCACGCGAGGGCGGCGAGCAGCCCCAGGACGAACAGGAAGAGCAAGGGATGGCGA +CGGATCATGGCCGCGCCTCCCCGGCGAACGCCGCCTGTGCCAGGCGCAGGAATTCGTTGT +GGCGGAAACGCCCGTCGCGGTCGTCGGCGTAGCGCACGATCACCAGCTTCTCCTCGGGCA +GCACGTAGAGCGCCTGGCCCCAGTGGCCGAGCGCGGCGAAGGCGGTCTCGGCGACGTCCG +GCCAGGGCGCCGGCGCGCCTTTCACCGCACGGTTCAACCACCACTGGCCGCCCGGCACCG +CCTCGCCCTCCTCTTCCGCGCTCGGCCGGTAGCGGGCGAAGGGTGTCAGGTTGAACTCGA +CCCATGCCAGCGGCAGCAGTTGCCGCTCGCGCCAGCGCCCGTGGCGCTGCATCAGCAGGC +CGACGCGCGCCAGGTCGCGGGCGCTCATGTAGACGTACGACGAGCCGACGAACGTGCCGG +CGGCGTCTCGCTCCCATACCGCCGAACGGATGCCCAGCGGCTCGAACAGCGCGCGCCAGG +GGTACTCGGCATAGTCCGCGCCGAGCATGCCGTGGAGGGCCGCCGCCAGCACGTTGCTGT +CGCCGCTGGAGTAGCGGAAACGCCGGCCCGGCGGCCGGGCCTGCGGGGTTTCGGCGACGA +AGCCGGCCATGTCGTCACGCCCACGGGTGTAGAGCATCGCCACCACGGAGGATTTCAATG +GCGCGAACTCGTAGTCCTCCTGCCAGTCCAGCCCGGAAGACCAGTTCAGCAGGTGGCGCA +ACGTCACCTCGGGATGACGGGCGAACGGCGGGTAGTAGCGCGCCACCGGGTCGTCGAGCT +GGAAGCGCCCTTCGCCCTCGGCGACGCCAAGCAGCGTGGCGAGCAGGCTCTTGCTCACCG +ACCAGGTAAGGTGCGGGGTCTCTGCACGGCTCGGGCCGGCGTAGCGCTCATAGACCAGAC +GACCGTCGCGGATCACCACTACCGCGTCGGTGCGGATGCCTTTACGACGAACATCGTCGC +GACGACCGAAGGCATAGCGCTCGAATGCCTCCAGCGCCGGCCCCGCGGGCTGTTGGGCAC +GGGGCCAGTCGGTGTCGGGCCAGTCTTCGGCCAGGGCAGGGAGGGCGAATACCAGGGCCA +GCGCGGCGAGGGCGCGGCGGGACAACGGCAAAGCGGGCATCGGCGGGCCCTCTCGACGGT +CGAGGGGCAACCCTAGCATGGCCTCGATGACGCCAGGAAGCGCGGACTCAGCCTGGCATG +CGGTAGTCGTCCGGGCCGAGCAGGTCGATGCCGCACTCGAGGTTCTCGATCCAGTAGAGG +AAGGCGCTGATCATTTCCTTGCCGACGAAGGGCCGCCCGTGTTGCGGCACGATCATCTCC +ACATCCATGTCCCGGACCATCCGCGCCCACAGGCGGCAGATCTTGTTCGCCGCCATGTAG +CGGCGATGGAATCCGTCCATGTACTGGATATGCGCGGCGAAATCGCTGACCGGCGCCGCG +TCGTCCACCAGCGAGGCGCCCATATCGCCCGAGAAAAGGATCTTGCTCACCGGGTCGTAG +AGCTGGAAGTTGCCCACCGAATGCAGGAAATGCGCGGGGATCGCCTTCAACGAGCAGTCG +CCGAGAGCGATCGCCTGGCCGCGGTCGGGCAACGGCATGACCCGGTCGTAGGTGTTGATG +CCATGGCTGACCGCCAGGTAGTTGGCCGTCAGGTGCGGCAGGAAGCGCGCCCACAGCTTG +GAGCAGATCACCCGGGCGCGGGTGTGCAGCAGCCACTTGTCGAGGGCGGCGATGATGTCC +GGGTCCTGGTGCGAAGCGAAGATGTAGTCCAGCTCCTGCAGCGGAAAGCGTTTCGACAAT +TCCAGCGAGAGCGGCGTGTAGGTCAGGTCGCCGCCGGGATCGAGCAGCAGGTACTGGCCG +TTGTCGACGATCAGGAACTGGTTGGACTGGACCCCTTCGCCGCTGACGAGGTCATCGAAA +CACAGGCACTGGTGGCTGCCGTTGTCGAACAGCACGATCGGTTCGCGTCGCATGGTCGGT +GAAATCTACTCTGTGCAGGCGCCGAGGAGCATTCCTCGCCGGATGCTGCGCAGGGTAAGC +GCCTGCGGCGCGGCGCCTCACTGACCTGGGTCAAGCCCGAGGCGGGTCAAGGCCTTCTCC +AGGCGAACTTTGCGGGCCCCGTTGGCAATCGCCAGCAAGCCGCGATCGCGTTGCCACAAT +GCCTCCCAGGTCGAGGCGTTGGCGGCGAAGGCGGCATCCAGTTGCCCCTCCAGCGCCGAG +AACTGTTCGAACAGCCCAGCCAGTACCGCCTGGGTGCCGACGCAGGCGACCGGATGCCGC +GCGACCTCGGCGCAGCCGCCGAGGATGCCGGCCAGGCGCAGCAGCAGCGGCGAGGGCCAG +CCGCATTCATGGCCGACCCCATAGAGCCAGCCGACCAGCGCCGCCAGCACATGGACATCC +TCCAGCAGGCGGAACGGCCTGACGTAGTCGTCCCAACCATCGCCGGGCAGGCGCTCGCAA +TGGGCACCGTCGAGCAACAGGCGGGAATGCGGCACATCCGGCATCAGCGGCAGCGCCGGC +AACGCCTCCAGGCGCGCCCCCGGGGCGCCGGCCAGGACCACCGTCATCGACAGTCGCGGC +GGCTGCCCCGGCGCCTCGTCGCGGGCGGCCACCAGCCACCAGGAGGCGCTGTCGCCAGCG +GTGACGAAATCCTTTTGTCCATGCAGGACCAGCCCCTCCAGGCGGGTCTGCAGGTCCGCC +GGGCGCACGCTGCGCTTCTCGGTCACGCACAGCGCGCCGAGACCTTCCGGCGCCGCCGGC +CAGAGCGCGCGCAGGGCCGCCTGGTAGCCGGCGAGGAAGGCCAGGCCCGGACTCGCGGCC +AATCGTCCGCCGAGCACCGCCCGCTCCAGGGTGCCAAGACCGTCGGCGCGCGCCAGGGCG +GCGGCGTGCCAGTCTTCCAGGGAGCCTTCGGTGGGCAGGCGGTCGAGCGGGCCGAGCAAG +CGTTGCCAGGACATGGCGGACTCCAGTGTGGAATGTGTCGGAACCGGCATCCTGCCGAGG +GTGCCAGCGTTGAACAACGGCGCGTTACATCCGGCGACAATGTCATACAAGCATCACGCC +CCTGTCACAGGGGTGACACCGCGCCTTCCTAGCCTGAGCTTGCACAACAAAGCCGAGCGG +CCGCAACCCGCGGCGCCGGCCCAAGGAGGCCCAGGCATGACCCAGACCGCCATTACCCGC +GAACCCGTCGCCGGACGCCGCCTGAAGGCCGAGCGCCTGAACGGCGCCCGCGCCCTGCGC +GAGGCCCAGGCCCTGCGCTACCGCGTATTCAGCGCCGAGTTCGACGCCAAGCTCGAAGGC +GCCGAAGACGGCCTCGACCGCGACGACTACGACCGTCACTGCGCGCACATCGGCGTCCGC +GACCTCGACAGCGGCGCCCTGGTGGCCACCACCCGGCTGCTCGACCATCGCGCCGCCGAA +CGCCTCGGGCGCTTCTACAGCGAAGAGGAGTTCCACCTCTCCGGCCTCGACGCCCTGCAC +GGCCCGGTCCTGGAGATCGGCCGCACCTGCGTCGCTCCCGAATACCGCAACGGCGCCACC +ATCGCGGTACTCTGGGGCGAACTCGCCGAGGTCCTCAACGAGGGCGGCTACCGCTACCTG +ATGGGCTGCGCCAGCATTCCCATGCGCGACGGCGGTATGCAGGCCAAGGCGGTGATGCAG +CGGCTGCGCGAACGCTACCTGTGCACCGACTACCTGCAGGCCGAGCCGAAGAACCCGCTG +CCGCCGCTGGACGTCCCGGAAAACCTCACCGCCGAGCTGCCGCCGCTGCTCAAGGCCTAC +ATGCGCCTGGGCGCCAAGATCTGCGGCGAGCCCTGCTGGGACCCGGATTTCCAGGTCGCC +GACGTGTTCATCCTGCTCAAGCGAGACGAACTCTGCCCGCGCTACGCCCGCCACTTCAAG +GCAGCGGTCTGATGGCGCGGCTGCGCCTGCTGCTGCGCAGCGCCCGCCTGCTCGGGCTGG +TCGCGCTCGGACTGGGCCTGGCCGCTTGGGTCAGCCTGCGCGAGCGGCTGCCCGGCGCCG +ACGTCACGCCGTTGCGCCAGCGCCTGACCCGCTGGTGGCTGGCGCGCCTCTGCGCGGCCC +TGCCCTTCGAGGTCAGGGTCAGCGGCGAAGCGCCCCGGCAACCCATGCTGTGGGTAGCCA +ACCATGTGTCCTGGACCGATATCCCGCTGCTCGGCGCCCTGGCGCCGCTGACCTTCCTGT +CCAAGGCCGAGGTGCGTGCCTGGCCGCTGGCCGGCTGGCTGGCGGAAAAGGCCGGCACCC +TGTTCATCCGCCGCGGCTCGGGCGACAGCCGGCTGATCAACCAGCGCTTGGCCGAACAGC +TGCACCGCGGACGCAACCTGCTGATCTTCCCGGAAGGCACCACCACCAACGGCGAAAGCC +TGCGCACCTTCCACGGTCGCCTGATGGCCAGCGCCCTGGAAGCCGGCGTGGCGGTGCAGC +CGGTGGCGATCAGCTACCGCCGCGACGGCGTGCCCGATGCGCAGGCGCCGTTCATCGGCG +ACGACGACTTGCTCAGCCACCTCGGCCGCCTGCTGCGCGGCGAGCGCGGCAGCGTGCATA +TCCAACTGCTGGAACCGATCCCCAGCCAGGGCCTGGACCGCGCCGAACTGGCCCGCCAGG +CCCAGCAGGCGGTGCGCCTGGCGCTGTTCGGCACTGCCGCCACTACGCAAACCCGGCGCG +CCGCCTAGTCGCGCTTGGGCGGCACCGCCAGCACGTCGCACGGCAGGTCGCCGAGGGCCG +CCTCGGCCACGCTGCCGATCAGCGCGCGGCGGAAGCCGGTCAGGCCCTGGGTGCCGAGCA +CCGTCAACTGCGGCGCCTGGCGGCGTACCTCGGTCATGATCACGTTGATCGGCAAGCCTT +CGCCGACGCGCAACTGGATGTCCTCGCGGCCCAGCCCCAGGCCGGCGACGTAGTCGCGCA +GCTCGACGTTGGCCTTGAGTTCCTCCTGGTCGACGTAATGCTCGACGCGGTCTTCCTTGA +TCCCCGAATAACGCATCATGCCTTTGGCGAACGGCTCGAAGGCATGCAGCGCGGTGAGGC +TGGCGGCGTCGAGGAAGCCGAGCTGGCCGGCCATCTGCACCGCGCGCGTCGAGGTCGGCG +AGAAGTCCAGCGCCAGCAGCACCCGGCGGTACTCCTCTTCCGGCGCACCGGCCGCGCGCA +GCACCGGGATCTTCGCATTGCGTACCACCCGCTCCAGGGTGGTGCCGATGAACAGGTCGC +GCAGCGGCGTGTGCCGGTGCGCGCCCATCACCAGCAGGTCGGCGCCGATCTCCTCGGCCA +CCGCGTTGATCACCACCACCGGATCGCCGACCCGCACGATCGCCTTGGACTCGACGCCGA +GGCGCTCCTTCAGGGCCTGTCCGTTCACTTCCAGCAGGTTGGCGATGGCCAGCCGTTGCG +GCGCGATCAGCACCGGCGGCTGGTCGTCGTCGAGCACGTTCAGCACGGTCAGTTCGGCAT +TCCTCGCCTTGGCCAGCGCCGCGGCGCGCATGACCGCCAGCTCGGAACGCGAGGACAGGT +CGGTCGCCACCAGGATTCGTTTCATCTTCCACCTCCATTGGCAGAGCGTTGAACGTCGGC +CCCATTGAACGATCGACGCGACCGCTGCCGGTTGATGTGCATCAAGCGGATGCTTCGGAC +TGAAACAATTGATAGGTGATTCCGCCGGCCGGGCCAATGGCCGCTGCGCCAGGCTCAGCG +CTGGCTCAGGGCGAAAGCCTGGAGTTGCGGGTAGAAGGCGCGGAAATCCGCGCTCAGGTC +ATCGTAGCGCTGGGCCAGTTCTTCCCAGGAACCGTCCAGCAGGTGCGGTCGCGACAGCCT +CCGCGACATGCCGCCCAGCACTTCGCGCAAGACCGCGAACTCGCGGTAGCTGCCCAGCCA +GTCCTGGGCCGCCATCCGCGGCGCGATGCGCGCCAGGCGCTCCGGCAGCGGCGAGGCGGT +GCGCAGAGTGCCGTAGACCCGCTCGACGAACTGCGGCAGCGGTTCGTCGGCGTAGTCGTT +CCAGTCGCGCGCCAGGCAGTGGTCGAAGAACACGTCCAGCAGGACGCCGGCGAAACGCCG +GCGCTCCAGCGGGAAGCGCCGCTTGGCCGCATGCACCAGTGGATGGCTGTCGGTGAAGGC +GTCGATGCGCCGGTGCAACTGGATGGCCCGCTCGATCTCGCCCGGCCATTGTCCCTGCAG +GCGGCCCTTGACGAAGTCGCCGTACAGGCTGCCGAGCAGTTGGGCCGGTTGCGGGCCGCC +GAGGTGGAGATGCGCGAGGTAGTTCATGCGGGCTAGCTTACCCCAGGGACGGAACCGTCG +TTTCGATCATCGTCATCAGCCCTGTCGATAGCGACTATCGTCGCAATCGATCCCCATATC +GTCCCACACCGATATATAGTTCGTCCCAACCCGATATGGACGGCCAACGGACCACCGCCA +TGCCACTGGACATCGACGAAATCATCAAGGCTCTCTCCCACCCGGTGCGACGCGACATGC +TGCGCTGGCTGAAGGAACCGGAGAAGTACTTCGTCGAGCAGGACCACCCGTTCGAGATCG +GCGTCTGCGCCGGCAAGTTCGACCAGCGTACCGGTCTCTCCCAGTCCACCGTGTCGGTAC +ACCTCGCCACCCTGCAACGCGCCGGTCTGGTGACCAGCCGCCGGGTCGGCCAGTGGAATT +TCTTCAAGCGCAACGAGGAGACCATCCAGGCCTTCCTCGACCAACTCGGCGACGAGCTGT +AAGCAGCACTCGCTTCCACCCCACCACCTCGCAGTCCAGGAGCTCGACCGATGCCGGCGT +CTCTTCTCGTCCTCGCTTTATCCGCCTTTGCCATCGGTACCACCGAATTCGTCATCATGG +GCCTGCTGCCGCAGGTCGCCGGCGACCTGCGGGTATCCATTCCCAGCGCCGGCTGGCTGA +TCAGCGGCTACGCCTTCGCCGTGGCCTTCGGCGCCCCGTTGATGGCCATGGCCACCGCCC +GCCTGGAACGCAAGAAAGCCCTGCTCGCGCTGATGGGCATCTTCATCGTCGGCAACCTGC +TCTGCGCGGTGGCCGCCAACTACGGCCTGCTGATGCTGGCGCGGATCGTCACCGCGCTCT +GCCATGGCGCCTTCTTCGGCATCGGCTCGGTGGTCGCCGCCAGCCTGGTGGCGCCGAACC +GCCGCGCTTCCGCCGTGGCCCTGATGTTCACCGGCCTGACCCTGGCCAACGTGCTCGGCG +TGCCGCTGGGTACCGCGCTCGGCCAGGAAGCCGGCTGGCGCGCGACCTTCTGGGTGGTGA +CCCTGATCGGCGTGGTCGCCTTCGTCGGCCTGGCCAGGGTGCTGCCAAACGACCGCGAGG +AAGAGAAGGTCGACCTGCGCCAGGAAATGTCCGCGCTGAAGAACCCGTCGCTGTGGCTGG +CCCTGGGCACCACCGTGCTGTTCGCCGCCTCGATGTTCGCCCTGTTCACCTACGTCGCGC +CGCTGCTCGGCGAAGTCACCGGGGTCAGCCCGCGCGGCGTGACCTGGACCCTGCTGCTGA +TCGGCGTCGGCCTGACCGTCGGCAACGTGATCGGCGGCCGCCTCGCCGACTGGCGCCTGG +GCACCACCATGGCCGCGGTGTTCGCCGCCATGGCGCTGGTCTCGGCGCTGTTCAGCTGGA +CCAGCCAGGCCCTGCTGCCGGCGGAAATCACCCTGTTCTTCTGGGCCGCCGCCGCCTTCG +CCGCGGTGCCCGCACTGCAGGTCAACGTGGTCCGCGTCGGGCATGCCGCGCCGAACCTGG +TGGCGACCCTGAACATCGGCGCCTTCAACGTCGGCAACGCGCTCGGCGCCTGGGTCGGCG +GCAGCGTCATCGACCACGGCCTGGGCCTGACCCGCGTGCCGCTGGCCGCCGCCGCGCTGG +CCGCGCTGGCGCTGCTGGCCACCCTGATCGCCTTTTCCGGAAACGGCCGCGCCCAGGCGC +AGCCGGTCCTCGATTGATCCCCTGTCCGTCTATTCCCTAGTGGAGGATGTTCCATGGCTA +CTCTGTTCGATCCCATCGTGCTCGGCGACCTCGAGCTGCCCAACCGCATCGTCATGGCGC +CGCTGACCCGCTGCCGCGCCGATGAAGGCCGCGTGCCCAACGCCCTGATGGCCGAGTACT +ACGCCCAGCGCGCCGATGCCGGCCTGATCCTCAGCGAGGCCACCGCGGTCACGCCGATGG +GCGTGGGCTACCCCGACACCCCCGGCATCTGGTCCGATGACCAGGTCCGTGGCTGGAGCA +ACGTGACCAAGGCGGTGCACGCCGCCGGCGGGCGCATCTTCCTGCAACTCTGGCACGTCG +GGCGGATTTCCGATCCGCTCTACCTGAATGGCGAGCTGCCGGTGGCGCCGAGCGCCATCG +CCGCCGAGGGGCACGTCAGCCTGGTGCGCCCGAAGCGTCCCTACGTCACCCCGCGCGCGC +TGGACACCGAGGAGATCGCCGACATCGTCGAGGCCTACCGCCAGGGCGCCGAGCGGGCCA +AGGCCGCCGGCTTCGACGGCGTGGAGATCCACGGCGCCAACGGCTACCTGCTCGACCAGT +TCCTCCAGGACAGCACCAACAAGCGCACCGACCGCTACGGCGGCTCCATCGAGAACCGCG +CGCGCCTGCTGCTGGAGGTCACCGACGCGGCGATCTCCGTATGGGGCGCCCAGCGCGTCG +GCGTACACCTGGCGCCGCGTGCCGACTCCCACGACATGGGCGATTCCAACCGCCTGGAAA +CCTTCAGCCATGTCGCCCGTGAGCTGGGCAAGCGCGGCATCGCCTTCATCTGCGCCCGCG +AAGCGCAGGCCGACGACAGCATCGGCGTAGCCCTGAAGAAAGCCTTCGGCGGACCCTACA +TCGCCAACGAGCAGTTCACCCTCGACAGCGCCAACGCCATCCTGGCCAAGGGCGACGCCG +ATGCGGTGGCCTTCGGCGTCCCCTTCATCGCCAACCCGGACCTGGTCGAACGCCTGCGCC +AGGGCGCCGAACTGAACCCGCCGCGCCCGGAAACCTTCTATACCGGCGGTACCGAAGGCT +ACCTGGACTATCCGACCCTGGCCTGATCGCGCCAGCCAGGAAGAGCCCGCCACGCTTGTC +GTGGCGGGCTCTGTGCTTTGCCCGTCTAGCGCAGGCGGAAATACGGCTCCGGCTCCGCGC +ACGGATCGCGCTGGGCGCAGCCCTTGCAACCGCCGCCGCAACCGCCCGAGCTTTGCGCAG +CCACCCGCTCGACCTTGCCGAGCGCGATCAGCCGCTCCAGGATCGCCTCCACCAGCGCCG +GCGGCGCGTCCACCTGGCGGCTGAGCTGTTTCAGCCCACGCGCCTGGCCGTCTTCCAGGG +CGGCGCGGAGCTGCATGAGTGTCGCCATCAGTGACATCCTCCCTGCCCGCTCGGGGCATC +GGCCATTCGGCCTGGCAGTTGCAGTACCTGCTCACGGCCGAACAGACGCAGGCAGGTCAT +CAGGATCAGGTTGAAGAGCAGCACCACGGCGATGGTCAGGACGCTACGCTCGGGGTGCGC +GGCGAAGGTGGCGACCTGGTAGCAGAGGGTCGCCAGCGAGTAGGCGACGTTCAGCCCCCA +GAGCACCGAGAAAGCCATCCAGCCCTTGCTGCTTTCCCGGGCGATCGCGCCCATGGCGGT +CACGCAGGGCACGTAGAGCAGGACGAAGACCAGATAGCTGTAGGCCGCGATGGGGCTGCC +GAACTTGCTCGCCATGGTGCCCATCGAGCCGGTCTCCATGTCGCCGTCGGCCATGCTCGC +CTCCACCGGGTTGGCCAGCACGCTGAGGCTGAAGGTATCGACCAGGCCGTCCCAGGTCTC +GACCAGGGCCTCGCGCAGTTGCCCGGGCAGGTCGTAGCCTTCGTAGTCGAAGGCCTCGCC +CTGGATCTGCTCGGCGGTGTAGAGGGTGTTCAGGGTGCCGACCACCACTTCCTTGGCCAT +CGCCCCGGTGACCAGGCCGACGGTGGCCTGCCAGTTGTCCGGCTGCACCCCGAGCGGCGC +CAGCAGCGGAGTGAGACGCTGGCTGACGCTGGCCAGCGCGGAATGGCCGATGTCGCCCTG +CACCGGCTTGCCGTCGAGGGTGATGCTGTTCAGGCCGCCGATCACCAGGCTGACCAGGAT +GATCACCTTGCCGGCCCGCACCACGAAGCCGCGCAGGCGCGACCAGGTCTGCAGCAGCAG +GCTCTTAAGGTGTGGCACATGGTACAGCGGCAGTTCCATGACGAACGGCGAGGCCTCGCC +GCGCATCAAGGTGTGCTTGAGCAACAGCCCGGTGAGGATCGCCACGACGATGCCGAGCAG +GTACAGGGAGAAGATCACCAGCGCGCCGCCCTGGCCGAAGAAGGCCCCGGCGAATACGGC +GAAGATCGCCAGGCGCGCGCCGCAGGACATGAACGGCGCCATCATGATGGTAATCAGCCG +CTCGCGCTGCGCGTCCAGGGTCCGCGCGCCCATGATCGACGGTACGTTGCAGCCGAAACC +GACGATCAGCGGCACGAACGACTTGCCCGGCAGGCCCAGCGCCTGCATCAGGCGGTCCAT +GACGAAGGCCGCGCGTGCCATGTAGCCGGAATCCTCGAGCAGCGAGAGGAACAGGTACAT +CAAGCCGATCTGCGGCACCAGCGGCAACACCGTGTTGACCCCGCCACCGATGCCCTGGGC +GAGGAACACCGTGAGCCAATCGGGCAGGCCGAAGCGGATACCCAGCCACTGGATGCCGTC +GATGAAGATCGCCGACGAGCCCTTGTCGAAGATCGGCTGTAGCGCCCCGCCGATGTTGAT +GGCGAAGAAGAACATCAGGTACATCACCAGCAGGAAGATCGGCAGACCCAGCCAGCGATT +CAGGACGACCCGGTCGAGCCATTGGGTCAGCCGGTGCGGCTGAGCCTGCTGGTGGTCGCA +GACCGCCGCACAGATCTCGCCGATCAGTCGGTAGCGCGCATCGACGATCGCCAGCTCGGG +CTCCTCGCCGCAGCCCTGGCGCGCCTGTTCCAGGGTGGCGGGCGGCAAGCCCAGCGCCGG +GCCGTTGAAGATGTCGCCTTCCAGCGCCTGTAACGCCAGCCAGCGCGGCTCGATGGCCGA +GGCCGCCGGAGCGCGCGTTTCCAGAAGGTAGCCGACCTGCGCCTGGATGGCCGGTGGGTA +ATCCACCGCCAGCGCCGCCTGCGGGAGCTGTAGGGAGTCGATGGCCGCCTTCAGTTCGTC +GATGCCATCGGCGCGGGTGGATACCAGCGGCACCACCGGGCAGCCGAGACGCCGCGCCAG +ACCGTCGATATCGATGCGGATGCGCTGGCTACGGGCGATGTCGAGCATGTTCAGCGCGAC +GATGCAGGGGATGCCCATCTCCCGCAGTTGCACCGTGAGGTACAGGTTGCGCTCCAGGTT +CGCCGCATCCACCACGTTGACCAGCACGTCCACTTCGCCGCTGGCGATGTAGCGGCAGGC +GATCTGCTCGTCCAGCGAGGCCTGGGCGGAAACGCTGGTCAACGAATAGGTGCCGGGCAG +GTCGACCAGGCGTACCGCGTGACGGACGGTATGGAACGCACCCTCCTTGCGCTCCACGGT +GACGCCGGCCCAGTTGCCGACCCGCTGGCGCGAGCCGGTGAGCTGGTTGAACAGGGTGGT +CTTGCCGGAGTTGGGATTGCCGATCAGGCCGAGGGTCAATGCGGTCATGGGGATTCCGGG +CGGGGTCAGTCGAGCGGGACGAGGGTCAGCAGGGCGAGGTCCTTGCGCCGCAGCGCCAGG +CTGGTCTGGCGGGTCTCGACCTGGATCGGGTCGCCCAGCGGGGCGATGCGCACCACGCGC +AGCGCCGCGCCGGGCAGAAGCCCCATGGAGAACAGGCGTTGGCGATAGCCGTTGCTGATG +GCGGGGGAATAACCGGTGATGCGGTAGGAACGGGACGGTTGCAATGCGCTCATGGCGGAT +TCCAGGCAGAGGAAGGGACCAATGGCCTCCCTGGCCGGTGACCGAGAATCTAAACGACTC +TCACTTAAGGCTCGCTTAAGGACGCAGTATCGGAGTGGAGGAGCAGCGGATTCGCTGATC +CAGAGCAATGGAAGCGGGGATTTTCGCCGGGCGCGGCGAACTGTCGCCGCCAGCGCCCGG +TGCGACGGGCGGCTGGCGGCGCAAGGGGTCAGCGGGTATTGAGCTGCTGCTGCAGGTTCT +GCACCTGGGCCTGGAGGGTGTTGATGTTGCGCATCACCTGGGTGCGGAAGGCGTCGAACT +CGGCGGTATTGCCGCCCTGGGCCGGGCGGTTGTCGAGCTGGCTGCGCAATACCAGCAGGT +CATCCTGGATACCCTTGATCGTCTGGCTCGGGTTGCCCTGCTTCTTCAGCGCCGCGACAT +CGCCGCCGAGGGCCTTGAGTTGCTCGTTGGTCTTGCCCAGTTCGACCTGCAGGGCTTTCA +GCTTCTCCTGTTCGGCGGCCAGTGTCTGCAGCTTGCCGTCCAGTTGCGCGACCAGTTGCG +CCGCGCTTTCCTGCTGGGCCTTGAGATCGCTGGCCAGTTGGTCGAGGCGTTTGCCCTGGC +CGCCGAACTGGTTGCTCACATCGGCCTGCTGGCGTTCCTGGCTGCCGAGTTTCTCCTGCA +GTTGCTTGACCTGCAGGCGCAGCGCCTCGCGTTCGCTGGTGACGCTGGACTCGCTGGCGA +CCACCTTGCCGCGAATGTCGTCGAGGCGCCCGGCGGCTTCTTCGCTGATCCTTACGAAGC +TTTCCTGGGTCGCCACCAACTGCTGTTCCATCAGGCTGATTTGCTGGTGGCTCCACCAAC +CCACCCCGGCGAGGGCGATCAGCATCGCCCCGAGCAGGGCCCAGAGCGGGCCGGTAGAGG +CCGGACGGCGCTCCTTGGTGTCGACGACCCGGCGCTGCTCCGGTTCGTAGACGTGCCGCG +GGTGGTGGCCGAATTCATCACGGTCGGCAGCATCGGTGGTCAGGCTGGGCACATCGTCGA +GTTCGTCGAAAGCATCGTTACGCATGGTGAAAACCTGTGAAGGGGAACGGCATCCTGCCG +GAGGCCTGGAGAAAGGGCCGAATGGCGCATTATATCGGTTCGGCCCCGCCCCTTCAGCGT +CTGCTCGAAGCAAATTGCCGGACCGGGGCGCTCCACCGCGAAGCCGTGCGCCCCAAGACG +GAGCGAACGCTGTCCGGCACAGACCGCCGCAGGCCGCCAGCGGTTCCCCGCCCCGCACGC +CGGCGCTGGTCCGCGGCCCGCATGGCACGTCGCTTGCTTGGGCATTGCCGGGCAGATTCG +AGAGCCGGAGGATGCATGGAGTTGAACAAATCCTTGCTGGATTGCATGCGCGCCGTGCGC +CGACGACTACGCGAGGAACAAGCGCTGGATATCCACTTCCAGCAACGCGATGCGATCGCC +GCGATGCAGGCGGCCTGCGCCCGCTCCGGCGATGCGACGACGCGGGAGCTGGGACAGCGC +CTCGGCAGGCTCAGCGGGGTCGCGCTGCCGCCGGCGGAACCTTCCCTGTTGCCCGCGCAG +GCGCCGAGCCGACAATACGCCGGCCCGCTGCGCGGCTGATCAGAGTCCCTGGGCGCGCCA +CCAGGCGCAGAACTCGTCGAGCGCGGTCCACAGGCTGACCCGCGGGTCGTAGTCGAGGTA +TTCGCGGGCGCGGTTGATGTCCAGGGTGAAGTTCTTCGCCATCACCGCCATGCCCAGGCG +GAACAGCACCGGCTCCGGGCGCCCCGGCAGGATCCGGCAGACGCCTTCGTTGAGCGCCGC +CAGGCCATAGCCGACGGCATACGGCAGGTGACCGCCGACCGGCGGCAGGTCGAGCTGGCG +CATCACGTAGTTGACCGCATCCCAGAACGGTACCGGCTGCCCGTTGCTGATGTTGTAGAC +CTTGCCCAGCGCCGGCTCGCCGGCCAGCAGGCAACTGAACAGCGCGTCGTTGAGATTGTG +CACGCTGGTGAAGTCGACCCGGTTCAGGCCGTTGCCGAGGATCCGCAGGCGTCCCTTGCG +GTGGGCCTGGATCATTCGCGGGAAGATGCTGGTGTCGCCGGCGCCGACCACGAAGCGCGG +ACGCAGCGCCAACACTTCCAGGCCCAGGTCGCGGGCGCTGAGCACCAGTTGCTCGGCCTG +GTACTTGGTCGCCCCGTAATGGTCGGAAAAACGCCGGGGTACGTACTCTTCGTTCAGGTC +CAGGTGGTCGCGCCCGTCGAAATAGATCGACGGCGACGACAGATGCACCAGGCGCCGCAC +CTTTTGCCGCATGCAGGCCTCGACGACGCTCTCGGCAAGCCCGACGTTGGCCGCCAGGAA +GCGTTCGCGCGGTCCCCAGACTCCGACCGCGCCGGCACAATGCACCACCGCCTCGACATC +CTCGCAGAGGCGCAGGACCAGCGCCGGGTCGGCGAGGTCGCCGGGAACGAACTCGGCGCC +CCGCGCCACCAGATGTTCCACCGCGTCCGCACGACGACCGCTGACCCGTACCGACAGGCC +CTGCTCCAGGGCGAAGCGAGCGAAACGCCCGCCGATGAAGCCCGTCGCCCCCGTTACCAG +AATCCGCATTGCGTTTCTCCCGTTGTTCTTCTTGTGCCGCGACTCTAGCAACCCGGAGCC +GCGCCACCACTGGCACAGCATGCCAGAACAACGACCTTGCGGGCCGCCGGCACGACTTGC +GTTGGGCGACAGAAGCGGCGCTGCGCGCTACAAGGTCTAACCTGAAAGCCACCTGCACGC +TGGGAGACGCACCATGGCCAGTCACTGGATGATCTACGGCGCCAACGGCTACACCGGCCG +GCTGGTCGCTGAACAGGCGCAGCGCGAGGGCCTGACGCCGCTCCTCGGCGGACGCAACCC +GGCCGCCCTGCACGCCCTCGGCAGTCAGCTCGGGCTGGAGTGCCGGGTCTTCGACCTGGG +GGATCCGCAGGCCTGCCGCGAAGCCCTGGACCAGGTGAAGGTGGTGGCCCATTGCGCCGG +TCCGTTCTCCGCCACCAGCACGCCGATGATCGCCGCCTGCCGCGCCGCCGGCACCCACTA +CGTGGATATCACTGGAGAGATCGCGGTGTTCGAGCAGGCCCACGCCGGCGACGCCGAGGC +CCGCGAGGCCGGCATCGTGGTCTGCCCGGGCGTGGGCTTCGATGTGATCCCCACCGATTG +CCTGGCCGCCTGCCTGAAGGAGGCGCTGCCGGACGCGCAACGGCTGGCGCTGGGCTTCGT +CACCGGCAGCGGGCTTTCGACGGGAACCGCGAAGACCAGCGTCGAAGGCCTCAAGTTCGG +CGGCAAGATCCGCGAGAATGGCCGCCTGCGCGACGTGCCGTTGGGCTACAAGCGCCGCGA +CATCGATTTCGGCCGCGGCCTGCGGCACGCCGTGACGATTCCCTGGGGCGACGTGGCCAC +CGCCTACTACAGCACCGGCATCCCGGACATCGAGGTCTACCTGCCGGCGCCGCCGCTGCT +GGCCCTGGGCATGCGCCTGATCGATCCGCTGCGTCCGCTGCTCGGTCGGCAACGGGTCCA +GGACTGGCTCAAGGGGCAGGTCGACAAACGCATCGCCGGCCCCGACCAGGCGGCCCGCGA +GCGCCTGCGCACCTGGGTCTGGGGCGAGGCGCGCAACGCCCGCGGCGAACGCCGCACGGC +GCGCCTGGAGACGGCCAACGTGTACGACCTGACCCTGCACGGCGTGCTCCTGGCAGTGCG +CCACCTGCTGGACTACCAGGGTCCCGGCGGCTATTTCACGCCATCGCGGTTGCTCGGCGC +GCGCTGCGTCGAATCGTTGCCCGGCTCCGGACGAATCACCGTCATCGGCTGATCAGCCTG +ACACCCGCACCAAGCCGTCGCCGGCGCGGGCGAGCAGGTGCTCGGTGAGCGACGCGAGCA +GTTCGCCGCCGTTGCGCCAGTAGTGCCAGTACAACGGTACGTCGATGACCTGGCCGGGCA +GCAGCTCGACCAGTTCACCGCGCGCCAGCTCGCCTTGCACCTGGCGCTCCGGCACCAGGC +CCCAGCCGAGACCGCCGGCGGTGAGGCGGACGAAACCTTCCGAGGACGGGCAGAGGTGGT +GGATGAAACCGCCCTCGACGCCGAGGTCCTTGAGAAAGCGGTGTTGCAGCAGGTCGTCCG +GGCCGAACACGATCGCCGGCACGCCGGCCAGGGCGGCCGCCTCGACCCCGCGGGGAAAGT +GCCGGGCAATGAAATCCGGGCTGGCCAGGCCGCGATAACGCATGGCTCCGAGCAGCAGCG +AACGCGCGCCGGCCACCGGCCGCGCGCTACCGCAGACGCAGCCCGCCACTTCGCCCGCGC +GCATGCGCTTGAGCCCGACCTCCTGGTCCTCCACCACAAGGTCCAGCAGCACCCGCCGCT +CGGCGCAGAAATCGCCCACCGCGGCGGCCCACCAGGTCGCCAGGCTATCGGCGTTGAGAG +CGATGCGCAGGCGTTCCGGGGCACCGCCCTCATCCAGGTTCGGCACCCAGCGCTGCAGGT +CGCCCTCCAGCAGCCGCACCTGCTGCACATGGTTGAGCAGGCGGCGCCCGAGATCGGTGG +GATGCGGCGGCGTCTCGCGCACCAGGACCGGCTGGCCGACCCGCGCCTCGAGCAGCTTGA +TCCGCTGCGATACCGCCGACTGCGACAGCCCCAGCGCCTGTGCGGCGCGCTCGAAACCGC +CCTGCTCCACCACTGCGGCCAGCGCGGCGAGCAACTTGTAGTCGAACAAAATCAGTTTTC +CTAATGAGAGATCAGCAGGATTCGTTTTCCTTATAAACCGCCCGCGGCCACACTCGCCAG +CACTTTTCCGATCCACCCGACGCGGACCGACCATGGCTGGCGAAACCTCGCTGCGCACCC +TGCTGCGCACGCTCACTCCCGAACTCAATCCCGGCGACTATGTGTTCTGTACCTGCGCCG +CTGGCGCGCTGCCGAACGGCGCCGAGCCGCTCGCCAGCTTCCGCGAGCGCGAGGGCCTGA +CCCTGGTGCTCGAACGCCGGCAGGCCGAACGCCTGGGCCTGCCCTACGAATACGTGGCGG +CCTGGATCACCCTGACGGTGCACTCCTCGTTGGCGGCGGTGGGCCTGACCGCCGCCTTCG +CCACGGCGCTGGCCGAAGCCGGGATCAGTTGCAACGTGATGGCCGGCTATTTCCACGACC +ACCTGTTCGTCGCCCGCGACGAGGGCCGGCGCGCCCTCGCCGTGCTCCAGCGGCTGGCGG +CGGAGGCACACTGACATGTGGCAGAGCTATCTCAACGGCATCCTGGTGGCTGCCGGCCTG +ATCATCGCCATCGGCGCGCAGAATGCCTTCGTCCTCGCGCAGAGCCTGCGCCGCGAGCAT +CACCTCTCGGTAGCCGCGCTCTGCGTGTTCTGCGACGCGGTGCTGGTCAGCCTCGGCGTG +TTCGGCCTGGCCAAGCTGCTGCTGGAAAACCCGACGCTGCTGGCCATCGCCCGCTGGGGC +GGGATCGCCTTCCTGACCTGGTACGGGCTCAAGGCGCTGCTTCGCGCGTTGCGCCCGGAC +GCCCTCGGCAACGCCGCGGAAACCGGGCCACGCTCGCGCAAGGCGGTGCTTCTGGCGGCA +CTGGCGGTCACCCTGCTCAACCCCCACGTCTATCTCGATACCGTACTTCTTATCGGTTCG +CTCGGCGCCCAGCAGGCCGCGCCGGGCGCCTATGCCCTCGGCGCGGCCAGCGCCTCGCTG +ATGTGGTTCTTCGCCCTCGCCCTCGGCGCGGCATGGCTGGCCCCCTGGCTGGCGCGCCCG +GCCACCTGGCGCCTGCTCGACCTGATGGTGGCGGCCATGATGCTGGGCATGGCCGCGCAA +CTGCTGTTCCGGGGATAACGCGGTTTCTGTCTTCCCGAGCGAACCCTCGCGCCGCCAGGC +AGTCCAGAGAGAATGTGGCGGTGTTCGCCGGCGAGCTTGCAGAAGCCCGTCGCAGCGCGC +CGCCGCCGGCCTTTGCCCCTACAGTTGCTGCGTGGATATGCCCCGACACGGGTGCTATGA +TCCGGAGTTCGCGGCACTGGGAGCCTAGGCTCCGAGTCGCACCCGGCGCCCTTTCCGGAG +CGCTCCGCCGAACGTCCCCCCCGGGGATCCAGGACGGGCTCCCGGACAGGCATCGGACCG +GCCCCGTGAACCGGTCGCGCGCTAGCCGCGCCAGTCCTGACCTGAGGAAGAATAGGAGAG +ACACCATGGCTTTCGAATTGCCGCCGCTGCCTTACGAAAAGAACGCCCTTGAGCCGCACA +TTTCCGCAGAAACCCTGGAGTACCACCACGACAAGCACCACAACACCTACGTGGTGAACC +TGAACAACCTGATCCCGGGCACCGAGTTCGAAGGCAAGAGCCTCGAAGAGATCGTCAAGA +GCTCCTCCGGCGGCATCTTCAACAACGCCGCCCAGGTGTGGAACCACACCTTCTACTGGA +ACTGCCTGAGCCCGAACGGCGGTGGCCAGCCCACCGGCGCCCTGGCCGACGCCATCAACG +CCGCCTTCGGCTCCTTCGACAAGTTCAAGGAAGAGTTCACCAAGACTTCCGTCGGCACCT +TCGGTTCCGGCTGGGGCTGGCTGGTGAAGAAGGCCGACGGCTCCCTGGCCCTGGCCAGCA +CCATCGGTGCCGGCAACCCGCTGACCAGCGGCGACACCCCGCTGCTGACCTGCGACGTCT +GGGAACACGCCTACTACATCGACTACCGCAACCTGCGTCCGAAGTACGTCGAGGCGTTCT +GGAACCTGGTCAACTGGGACTTCGTAGCGAAGAATTTCGCTGCCTGAGTCTGATCAGAAC +CACGAAAAAGCCCGGCATTCGCCGGGCTTTTTCGTTTTAATCTGCGGTCCAGGAGCCATC +GTCTACCCTCAGGAACTGGTAGACCTTCGCTCGTGATAGCCTCTTGAGGTAAACAAGCTC +TTCCTACCGGCTTGTCTTTCGCCCTGGCGACGAACAAGGAAGGCCCCTTGAAACTGGACT +CCCGACACAGCCTCTCGCTCAAGCTGCTGCGTGTCGTGCTACTGGCGGCGCTAGCCGTTG +GCGTAGTGCTCAGTTGCGCGCAGATCGTCTTCGACGCCTACAAGGCCAAGCAGGCGGTGA +GCAGCGACGCCCAACGCATCCTCGCGATGGTCCGCGACCCTTCCACCCAGGCGGTCTACA +GCCTCGACCGGGAAATGGCGATGCAGGTCCTCGAAGGCCTGTTCCAGCACGAGGCGGTGC +GCCAGGCGAGCATCGGCCATCCCGGCGAGCCAATGCTGGCGGAGAAATCCAGGCCGCTGC +TGGACCTGCCGACGCGCTGGCTGACCGACCCGATCCTCGGCCAGGAACGCACCTTCAGCA +TTCGACTGATCGGCCGCCCGCCCTATAGCGAATACTACGGCGACCTGAAGATCACCCTGG +ACACCGCGCCCTACGGCGAGAACTTCGTCACCACTTCGGAAATCATCTTCATCTCCGGCA +TTCTCCGCGCCCTGGCCATGGGCCTGGTGCTGTTCCTGGTCTACCACTGGATGCTGACCA +AGCCGCTGTCGAAGATCATCGAGCACCTGGTCAGCATCAATCCCGACCGCCCCAGCCAGC +ACCAGTTGCCGCTGCTCAAGGGGCACGAACGCAACGAGCTGGGGCTCTGGGTGACCACCG +CCAACCAGTTGCTCGCCTCGATCGAAAGCAACAGCCACCTGCGCCGCGAGGCCGAGGACA +ACCTGCTGCGCATTTCCCAGTACGACTTCCTCACCGGCCTGCCGAACCGCCAGTTGCTGC +AGCAGCAACTCGACCAGATCCTCGACGGCGCCGGCCGCCAGCAGCGCCGGGTGGCAGTGC +TGTGCCTGGGCCTCGACGATTTCAAGGGGATCAACGAGCAGTACACCTACCAGCTCGGCG +ACCAGCTGCTGATCGCCCTCGCCGACCGCCTGCGCGGGCACAGCGCGCGGCTCGGTTCGC +TGGCGCGCCTGGGCGGCGACCAGTTCGCCCTGGTCCAGGCCGACATCGAGCAACCCTACG +AGGCGGCCGAACTGGCGCAGAGCATCCTCGACGGCCTGGAAGCGCCGTTCGAGATCGACC +AGCACGAGGTGCGCCTGCGCGCCACCATCGGTATCACCCTGTTCCCCGAGGACGGCGAGA +CCACGGAGAAACTGCTGCAGAAGGCCGAGCAGACCATGACCCTGGCCAAGACCCGCTCGC +GCAACCGCTACCAGTTCTACATCGCCAGCGTGGACAGCGAGATGCGTCGCCGCCGGGAAC +TGGAAAAGGACCTGCGCGACGCCCTGCAGCGCCACGAGCTGCACCTCGTCTACCAGCCGC +AGGTGGACTACCGCGACCACCGCGTGGTCGGCGTCGAGGCGCTGCTGCGCTGGCAACATC +CGTTGCACGGCTTCGTCCCGCCGGACCTGTTCATCCCGCTGGCGGAACAGAACGGCAGCA +TCTTCAGCATCGGCGAGTGGGTGCTCGACCAGGCCTGCCGGCAGTTGCGCGAATGGCACG +ACCAGGGCTTCGACGACCTGCGCATGGCGGTCAATCTTTCCACCGTGCAGCTCCACCACA +ACGCCCTGCCACGGGTGGTCAGCAACCTGCTGCAGGTCTACCGCCTGCCGGCGCGCAGCC +TGGAGCTGGAAGTCACCGAGACCGGCCTGATGGAGGACATCTCCACTGCCGCCCAGCACC +TCCTCAGCCTGCGCCGCGCCGGCGCGCTGATCGCCATCGACGATTTCGGCACCGGCTATT +CCTCGCTGAGCTACCTGAAGAGCCTGCCGCTGGACAAGATCAAGATCGACAAGAGTTTCG +TCCAGGACCTGCTGCAGGACGAGGACGACGCGACCATCGTTCGCGCCATCATCCAGCTCG +GCAAGAGCCTGGGCATGCAGGTGATCGCCGAGGGCGTGGAAACCGCCGAGCAGGAGGCGT +ACATCATCGCCGAAGGTTGCAACGAAGGTCAGGGCTACCTGTACAGCAAGCCGCTGCCGG +CCAGGGAACTGACCCAGTACCTCAAGCAGGCGCGACGCCTGAGCCAGGCCACCAGCAGCG +AACGGCCCTGATCAGAACAGGCGGCTGAACAGCCAGCCCGCCAATAGCGCGCTGCCCAGG +CAGAGCAACGCGGCCCCCGCCGCCTGGCGGCGCAGTTGCACGGCGAGATCGTCCTCGCCC +TGGCTGGAGAGAATGAAGGGATGCGCCTCCCCCGGCTTGCCGAGCCGGTGGCGGGTCGGC +GCGGCGCTGCTTGCCCGATGCCGATCCTCGGCCTCCAGCCGTGCCGCCAGGCGCACCCGG +TTCCATTCGCGCTCGTCCAGCTCGCCGTTGCCGTCGCTGTCGAAGCGCGCCAGCAGGCCG +TGGAAATCGCCCTTCCATTCACGGATCACCGCACCCTGCGCCCGTTCGGCATCGAGTCCC +TGGCGACCGCCACCGCTGCTGCGGAATTCGCCCAGCGCATAGAGCGGCTCGCCGGCATGC +AGGCGCTCCTCGGTGTAGCGGTAGAGCCGCTCTCCGCCGACCAGGGTGCCGAAGCTCTCC +AGCAGTCGCAGGGTATCGATCGGTGCTTCGCGAAAAGCCTCCCAGCGCTGCAGGGTCAAG +GGTCGCACCTCGGCGCCGCGTGGGTCGACCAGGCAGGCATCGGTGGCATCGCGCAAGCCG +AATGGCGACTCGCTGGCACCTTTGTCGACGGTGCGCCAGGCCTTCTCGCGGCCACTGCGC +TTTTCCTCGACTTCGATGCGGTAGCGCCACCACAGGCACGGCTTGCCGGTCAAGGGAGCC +TGCAGCGGGCCTTCCGGGCCCTCCTCGAGCACCCCGTAGAGTTCGACGAAACCCTGCGCC +GCCGAACGGATCCGCGAGGTCGGTACGTCGCCGAGCAGGCGCGCCCGGGCAAGGCGGTGC +ACGAACTGCCAGCCGGCCCAGGCGCAGAGCCCCAGGCTGGCGGCGATGAACAGCCAGCGC +AGGTCGAAGTCCATCTCAGCCGAACAGGGCCTTGAGATCGACGTCGGCCTTCTCCGCCTC +GCTGAACTGCAACAGCTCGGCGGCCTTGAAGCCAGCGACACGAGCGATCAGCAGGTCGGG +AAATTGCTCGATGCGCACGTTGTTCAGGTTCACCGCCTCGTTGTACAGCTCGCGGCGATC +GGCGATCCCGTTCTCCAGGCCGCTGATGCGCTGGGAAAGGAACTGGAAACTTTCGTTGGC +CTTGAGCTGCGGGTAGTTCTCGGCCAGCGCGAAGAGCTGCCCCAGGCCGGCGCGCAGGCC +GCTCTCGGCCTTGCCCAGGGCATTCACGTCGGCGCGCTCGCGGGCGCTGGCGACGGCGTT +GCGGGCGGCGATGACCCGCTCCAGGGTGGCGCGCTCGTGCTGCATGTATTGCTTGCAGGC +CTCGACCAGCTTCGGCAACTCGTCATGGCGCTGCTTGAGGAGCACGTCGATGTTGGCCCA +GGCCTTGCCCACGGCGTGCTTCAGCCGTACCAGGCCGTTGTAGAGCACCACCGCGTAGCC +GGCGAGCAGCAGCAGGACAACCCAGAAAGCGATAGCGGTCAGACTCATGGACGGAGCTCC +GGACGAAGATACTGGCATTCTAGCTGCGCCCGACATGTCCAGGCACGACGGCGATCACGC +TGCGCCCTTTTCAAAAATGCAAATCTTTCGCATTATGTGGCGGTTTTTTAACGCCCTCGC +GGGTGTTTCCGACAAGCGCACGCCGCTTCGGCACGCCACCAATAAAGCAAAGGAATCCGC +CATGACTCGTATGCCCCTGGCCACCGCCAGTCTGCTGGCCCTCGCCATCTCCCTCGCCGG +CTGCGGCGACGACAAGAAAGCCGAAGCGCCCGCAACACCGGCGGCCAGCACGCAGCCCGC +CGCGCCCGCCGCCGCCCCGGCCGCCAAGGTCGACGAGGCCGCCGCCAAGGCGGTGATCAA +GAACTACGCGGACCTCGCCGAAGCCACCTTCGCCGATGCCCTGAGCACCGCCAAGGACCT +GCAGAAAGCCATCGACGCGTTCCTCGCCAAGCCTGACGCGGAGACCCTGAAAGCCGCCAA +GGAAGCCTGGTTCGCCGCTCGTACCCCCTACTCCCAGAGCGAAGCCTTCCGCTTCGGCAA +CGCGATCATCGACGACTGGGAAGGACAGGTTAACGCCTGGCCGCTGGACGAAGGCCTGAT +CGACTATGTCGCCAAGGACTACCAGCACGCCCTGGGCAACCCCGGCGCCACCGCCAACAT +CGTCGCCAACACCGAGATCCAGGTCGGCGAAGACAAGATCGACGTCAAGGAAATCACCGG +CGAGAAACTGGCCAGCCTGAACGAGCTGGGCGGTTCCGAAGCCAACGTCGCCACCGGCTA +CCACGCCATCGAGTTCCTCCTCTGGGGCCAGGACCTGAACGGCACCGGTCCCGGCGCCGG +CAATCGTCCGGCCACCGACTATGCCCAGGGCAAGGACTGCACCGGCGGCCATTGCGACCG +TCGCGCCGCCTACCTGAAGGCCGTCACCGACCTGCTGGTCAGCGACCTCGAATACATGGC +CGGACAGTGGAAAGCCGGCGTCGCCGACAACTACCGCGCCAAGCTGGAGGCCGAACCGGT +GGATACCGGCCTGCGCAAGATGTTCTTCGGCATGGGCAGCCTGTCCCTCGGCGAACTGGC +CGGCGAGCGCATGAAGGTCGCGCTGGAGGCCAACTCCACCGAAGACGAGCACGACTGCTT +CAGCGACGACACCCACCACACCCTGTTCTTCAACGGCAAGAGCATCCGCAACATCTACCT +CGGCGAGTACAAGCGCATCGACGGCAGCGTGGTCAAGGGCCCGAGCCTGGCCGACCTGGT +CGCCAAGGCCGACGCCGCCGCCAACGACACCCTGAAGGCCGACCTGGCCGACACCGAGGC +CAAGCTGCAGGCCATCGTCGACAGCGCCGAGAAGGATGGCGTGCACTTCGACCAGATGAT +CGCTCCGGACAACAAGGACGGCCAGCAGAAGATCCGCGACGCCATCGCCGCCCTGGTCAA +GCAGACCGGCGCCATCGAGCAGGCCGCGGGCAAGCTGGGTATCCAGGACCTGAAGCCGGA +CAACGCCGACCACGAGTTCTGATTCCTTCCTGCGCCAACAGCGTGCGGCCCATGGCCGCA +CGCTGCGTTTCAGGGCCTCGAAAGCGGACTTCGCGGGCCTTTACAAACGCAAATCCCTCT +TATTTTGATTAGCTCCAACTGGTAAGCTTGCGGGCTGTTTCTACGCAGTCCGGGATGTTC +GCCGATGTCGTTGCCGTCACCTTCCATGCCGCTCGCCTGTCTCCTGACGGCGCTCCTTCT +GGGCGGATGCGGCGCCGACGACGAACCGCTGCGCGCCGAGCCCGGCGAACACCTGTCCGG +GGGCGCGACCACGGTGCTGCAAAGCGACCGCAACGCCTTCTCCCTGCCCTCCGCCAACCT +CGCCCCGAGCCGGCGCCTGGACTTCAGCGTGGGCAACAGTTTCTTCCGCAACCCCTGGGT +CATCGCCCCTTCCTCCACCACCGCGCGCGACGGTCTGGGACCGCTGTTCAATACCAACGC +CTGCCAGAACTGCCATGTGAAGGACGGCCGCGGCCACCCGCCGGGAGCGGACGCGGTCAG +CGCGGTGTCGATGCTGGTACGCCTGTCGATCCCGGCCGGCCCTGCGGACGGCAAGACCCT +GTTGCACCAGGGGGTGATTCCCGAGCCGACCTACGGCGGCCAGTTGCAGGACGTCGGCAT +TCCCGGCGTCGCCCCGGAAGGCAAGGTGCGGGTGGACTACGAGCCGCTGAAGGTGAGGTT +CGAGGACGGCACCGAGGTCGAACTGCGCAAGCCGATCCTGCGTATCAGCCAGCTCGGCTA +TGGGCCGATGCATCCGCAGACGATGTTTTCCGCCCGCGTCGCCCCGCCGATGATCGGGCT +CGGCCTGCTCGAGGCGATTCCCGAAGAGGCGATCCTGGCCAATGCCGATCCGGACGACCG +CAACGGCGACGGTATCCGCGGGCGCGCCAACCAGGTCTGGGATGCAGCGCGGCAGCGCAC +CGCGCTCGGCCGCTTCGGCTGGAAGGCCGGCCAGCCGGATATCCCGCAGCAGAACGCGCA +CGCCTTCGCCAACGACATGGGCCTGACCAGCAGCCTGCTGCCCCACGACGACTGCAGCGC +CGCCCAGGTCGAGTGCCGCCGGGCGCCCGACGGCGGCGAGCCGGAAGTCAGCGACAACAT +CTTCGCCCAGGTGCTGTTCTACAGCCGCAACCTGGCGGTCCCGGCGCGGCGCAAGGTGGA +CGACCCGCAGGTGCTGGCCGGCAAGCGCCTGTTCGCCCAGGCCAACTGCGTGGCCTGCCA +CGTTCCCGCCTTCACTACCGGTTCCGACGCCAGCGAGCCGGAGCTGGCCAACCAGCGGAT +TCGTCCCTATTCCGACCTGTTGCTGCATGACATGGGCGACGGCTTGGCGGACAATCGCCC +GGAATTTCTCGCCAGCGGGCGGGACTGGCGAACCCCGCCGCTATGGGGCATCGGCCTGAC +CGAAACGGTCAACGGCCACACCCAGTTCCTCCACGACGGGCGCGCCCGCAATCTGCTGGA +GGCGATCCTCTGGCACGGCGGCGAAGCCGAAGCGGCCAAGCGGCACGTACTCGGCTTCGA +CGCCGACCAGCGCAGCGCCCTGCTGGCCTTCCTGAATTCACTCTGAGGAGCCCACATGTT +CCGCCCACGACTGCTCTTGACCAGCCTCGCCATCGCCCTCGGCGCCTGCTCGCCGCAAGA +CCCGCAGGCCGTCACCAGTGCCGCCCTGGCGCAACAGGTGATCCTGCCGACCTACAGCCG +CTGGGTGGAGGCCGACCAGGCCCTGGCCAGCAGCGCCCTGGCCTACTGCCAGGGCAAGGA +AGACCTGGCCAAGGCCCGCGACGCGTTCCACGCCGCGCAGAAGGCCTGGGCCGAGCTGCA +ACCGCTGCTGATCGGCCCGCTGGCCGAGGGCAACCGCGCCTGGCAGGTGCAGTTCTGGCC +GGACAAGAAGGACCTGGTCGGGCGCCAGGTCGAACAGTTGCTGAAGAACAACCCGCAGGT +CGACGCCGCGGCCCTGGCCAAGGCCAGCGTGGTGGTGCAGGGGCTCTCCGCCTACGAATA +CATCCTCTTCGACAGCAAGATCGACCTCGCCGACGCCGCCACCAAGGCCCGCTATTGCCC +GTTGCTGGAAGCCATCGGCACCCACCAGCAGCAACTGGCCCAGGACATCCTGGCGCGCTG +GAAGAACGACGGCGGCATGCTCACCCAGATGAGCAAGTTCCCCAACGACCGCTACGCCGA +CGCCCACGAGGCGATCGCCGAGCTGCTGCGGGTCCAGGTCACCGCCCTGGACATGCTGAA +GAAGAAACTCGGTACTCCGCTGGGCCGGCAGAGCAAGGGCATTCCCCAGCCCTACCAGGC +CGAGGCCTGGCGCAGCAACGCCTCGCTGGCCAGCCTGGACGCCAGCCTGAGCGGCGCCCA +GGCGCTCTGGGAAGGTATCGACGGCAAGGGCCTGAAGACCCTGCTGCCGGCCGAACAGAA +GGACCTGGCCGGCAAGATCGACGCCGCCTACGCCGACAGCCACGCGAAGCTGGCGGCGCT +GGAACAGAAACCGCTCTCCGAACTGCTCGCCAGCGAAGACGGGCGCAACCAGCTCAACGC +CCTCTATGACAGCCTGAACGTGGTCCACCGCCTGCACGAAGGCGATCTGGCCCGCGCCCT +CGGCGTGCAACTGGGCTTCAACGCCAACGATGGCGACTGAGGTCGGCACCATGCTGCGAC +GTCATGTGATCGGCCTGGGCAGCCTGCTGCTCGGCGCCTTGTCCTTCGGCGGCTGGAGCT +TCAGCCGCCTGGGTAGCCAACCGCTGGTGCTATCGGCGCGCGACGACGCCGATGGCCAGC +ATTACGCGGTGGGCTACCGCCTGGACGGCAAGTGCCAGTTCGCCACCCGCGTGGCCCAGC +GCTGCCACGACATCGTCCAGCACCCGAGCCTGCCGCTGGCGCTGTTCGTCGCCCGCCGGC +CAGGCACCGAAAGCTACCTGATCGACCTGAACGACGGTCGCCTGCTGCAGACCCTGGTCT +CGCAGAAAGACCGCCACTTCTACGGCCACGGCGTGTTCCACCAGAGCGGCGAGTGGCTCT +ACGCCACCGAGAACGACACTACCGATCCCGGTCGCGGCGTGCTCGGCGTCTACCGTTTCG +ACGGCGAGCGACTGCAGCACAGCGGCGAGATCTCCACCCACGGCCTCGGGCCGCACCAGG +TTTCCTGGATGCCCGACGGCGAGACCCTGGTAGTGGCCAATGGCGGCATTCGCACCGAGG +CGGAAAGCCGGGTCGAGATGAACCTCGACGCCATGGAGCCCAGCCTGGTGCTGATGCGCC +GCGACGGCAACCTGCTGTCCAAGGAAACCCTGGCGCAGCAGATGAACAGCGTCCGCCACC +TGGCGATCGGTCGCGACGGCACCATCGTCGCCGGCCAGCAGTTCATGGGCGATGCCCACG +AACATGCCGACCTGCTGGCGATCAAGCGCCCCGGCCGTCCCTTCGAAGCCTTCCCGGTGG +CCGAGGAGCAGCGCCTGGCGATGGCCCAGTACACCGCCAGCGTGGCGATCCACGACGACC +TGCGCCTGGTGGCCCTGACCGCCCCGCGCGGCAACCGTTTCTTCATCTGGGACCTGGACA +GCGGTGCCGTACGCCTCGACGCGCCGTTGCCGGACTGCGCCGGCGTGGGCGCGGTGAAGA +ACGGCTTCGTCGTCACGTCCGGACAGGGCCGTTGCCGCTTCTACGACTGCCAGGGCGAGC +GCATCGCCGCGCAACCGCTGGAGCTGCCTGCCGGCCTCTGGGACAACCACCTGCACCTGG +CCTGAGAGCACGGCATCAACCTCTCGCCATACCGGTCCGCGAAAGTCGGAAGCGACCTGC +AACACGCTTGAAATCTTTCGCCAATCCTGTGCTTTGACAAGCCCTGAAAGCTGCACGTAA +TGTGTCGGATTCGCCCCGAAACGGGGCGCTCTCGGCATACCAGGGACCAGGATTATGTTG +CTCCGCCGCATGTTGATCATGCTCGCCGCGGTGATCGCCGTGGTGGCGATTCTCGCCGGC +TACAAGGTCTACTCCATCCGTCAGCAGATCGCCCTTTTCAGCGCACCGAAACCGCCGATC +AGCGTGACCGCCAGCCTGGCCGAAAAGCGTCCCTGGCAGAGCCGCCTGCCAGCCATCGGC +AGCCTCAAGGCATTCCAGGGCGTGACCCTCACCGCCGAAGTCTCCGGCACGGTACGCGAC +GTACTGTTCCTTTCCGGCGACCAGGTGAAGCTGGACCAACCGCTGATCCAGTTGGAAAGC +GACGTCGAGGAAGCCACCCTGCGCACTGCCGAGGCCGATCTCGGCCTGGCCAGGGCCGAG +TACCAGCGCGGCCGCGAACTGATCGGCAGCAAGGCCATCTCGAAAAGCGAATTCGATCGT +CTCGCCGCGCAGTGGGCCAAGACCAGCGCCACCGTCGCCGAGCTGAAGGCGGCGCTGGCG +AAGAAGCGCGTGCTCGCGCCCTTCGCCGGGACCATCGGCATCCGCCAGGTGGACGTCGGC +GACTACGTCTCGCCCGGGACGCCGATCGCCACCTTGCAGGACCTTTCCACCCTGCTCCTG +GATTTCCACCTGCCCGAGCAGGACTTCCCCCTGCTCAGCCGCGGCCAGCTGGTGAAGGTC +CGGGTCGCCGCCTACCCCGGCCAGGTGTTCGACGCCGAGATCGCCGCCATCAACCCCAAG +GTCGACAACGAGACCCGCAACCTGCAGGTCCGCGCTGCCCTGGAGAACCCGGACGGCAAG +CTGCTGCCGGGCATGTTCGCCAACCTCGAGGTGATGTTGCCTGGCGAGGAACAACGCGTC +GTGGTGCCGGAGACGGCGATCACCTTCACCCTCTACGGCGACTCGATCTACGTCGTCGGG +CAGAAGAAGGACGAGCAGGGCCAGGTGTCGAAGGATGACAAGGGCCAGCCGCAACGGGTC +GTCGAGCGCCGCTTCGTCAGGATCGGCGAACGCCGCGAAGGCCTGGCGGTGGTGCTCGAA +GGCCTGGAGGGCGGCGAGCAGGTAGTGACTTCCGGGCAACTGAAGCTCGACAACGGCGCC +GCGGTGGCCATCGTCGCCGAGCGGGACCTCCAGCAAGAGCACTGAGTCGCGCGCCTTCCC +ACTCCGTGGCGGAAGGCTTGCCAAGGGACTGAAACATGGCTTTTACCGATCCGTTCATCC +GTCGTCCGGTCCTGGCGAGCGTGGTCAGCCTGCTGATCGTCCTGCTCGGCATGCAGGCCT +TCAGCAAGCTGGTGATCCGCGAGTATCCGCAAATGGAGAACGCGCTGATCACGGTGACCA +CGCTCTACGCCGGCGCCAACGCGGAAACCATCCAGGGCTACATCACCCAGCCGCTGCAGC +AGAGCCTGGCCAGCGCCGAAGGCATCGACTACATGACCTCGGTGAGCCGGCAGAACTATT +CGACCATCTCCATCTACGCGCGGATCGGCGCCAATACCGATCGCCTGGTCACCGAGCTGC +TGGCCAAGTCCAACGAAGTGAAGAGCCAGCTGCCGCCGGACGCCGAGGACCCGGTGCTGC +AGAAGGAGGCCGCGGACGCCTCGGCGCTGATGTACATCAGCTTCTACAGCGAGCAGATGA +ACAACCCGCAGATCACCGACTACCTGTCGCGGGTGATCCAGCCCAAGCTGGCGACCCTGC +CCGGTATCGCCGAGGCGGAGATCCTCGGCAACCAGGTGTTCGCCATGCGCCTGTGGCTGG +ACCCGGTGAAGATGGCCGCGTTCGGCGTCACCGCCGGCGAGATCAACCAGGCGGTGCAGC +AGTACAACTTCCTCGCCGCCGCCGGCGAGGTGAAGGGCCAGTTGGTGGTCACCAGCGTCA +ATGCTTCCACCGACCTCAAGTCGCCCCAGGCCTTCGCCGCCATCCCGGTGAAGACCGACG +GCGACCGCCGGGTGCTGATGGGTGATGTCGCACGGGTCGAGCTGGGCGCCGCCAGCTACG +ACGCGATCAGTTCGTTCAATGGGATTCCCTCGGTCTACATCGGCATCAAGGGCACGCCCA +GCGCCAACCCGCTGGACGTGATCAAGGAAGTGCGGGCGAAGATGCCCGAACTGGAAGAGC +AATTGCCGCCCAACCTCAAGGTGTCCATCGCCTACGACGCCACGCGCTTCATTCAGGCCT +CCATCGATGAAGTGGTGAAGACCCTCGGCGAGGCGGTGCTGATCGTCATCGTGGTGGTGT +TCCTGTTCCTCGGCGCGTTCCGTTCGGTACTGATCCCGGTGGTGACCATTCCGCTGTCGA +TGATCGGCGTATTGTTCTTCATGCAGGCCATGGGCTACTCGATCAACCTGCTGACCCTGC +TGGCGATGGTCCTGGCCATCGGGCTGGTGGTGGACGACGCGATCGTGGTGGTGGAAAACA +TCCACCGCCACATCGAGGAGGGCAAGCCGCCCTTCGAGGCCGCCCTGGAGGGCGCGCGGG +AGATCGCCGTACCGGTGGTCAGCATGACCATCACCCTCGCCGCGGTCTACGCGCCGATCG +GTTTCCTCACCGGCCTCACCGGCGCCCTGTTCAAGGAGTTCGCCTTCACCCTGGCCGGCG +CGGTGATCATTTCCGGGATCGTCGCCCTGACCCTGTCGCCGATGATGTGCTCGCGCCTGC +TGCGCCACGAGGAGAATCCCTCGGGCCTGGCGCATCGCCTCGACCTGATCTTCGAGGGCC +TGAAGCAACGCTACCAGCGCGCCCTCCACGGCACCCTGGACACCCGTCCGGTGGTCCTGG +TGTTCGCCGTGCTGGTACTGGCGCTGATCCCGGTACTGCTGATGTTCACCAAGAAGGAGC +TGGCGCCGGAAGAGGACCAGGGCATCGTGTTCCT +>NODE_10_length_39995_cov_63.156_ID_19 +TAGATGCTCAATGTGACATATACCACATCCACAATATACCATAATATGACAAACACCACA +AAAAGTTCCATCCACAGAAACGCATTGCTGCGGCGCTCGTTCCATATCTGTTTCAATAAA +TTCCTGTTCATGATCTAACGTTGGTTTAAAGCGTCCGTAATATTCATGCGCGATGCCCTC +CAGGCAGGGATGCACGCTGACAGCAAATTCAAGAGAAGGCAGAAAATGAATGCGGCCAAG +AATATCCAGGGACTGAACAACATATCTGCCGAAAGACTGGTTTCTCCTATTTGGGCACGG +TTCTCGCTATTGGAAAATAAGAAATCATTCAATAAAAAAGTACATGCATAACTGAAAAGC +ATTCCTACTGCTCCGGCTATGAGCGTCAATAATAAATTTTCATAAAACACTTGCCTCAAC +AAGACATTGGCAGTAGCACCAAATGCCTTCCTCACCCCTATTTCCGACATGCGCTTCCGC +ATACGGGACAAAGTCATGCTACTCAAATTTATAGCCGGTACCAGCAATAAAATAACAATC +ACCAACAGGTAGTGCAAATAAGCTTCCTTCGCCTGTAACTCTCTTCCCCAGTGACGAAAG +ATAAAAGAGAAACGATCATCGGGTTGTCCACGATAAAAAATTTCGGAATCCTGCAATCCG +TCATTGAATTTCTGACGCAGGCGTTCCGCCTCTTCTCTGATGGCAGGAAAATCTTTCTCC +GAATGTGCCAGAATGACCACACGAAAAGTTCCCATCGCATTGTCATACCAGCACTTCCTG +GCTATTTCTGTTGAATTGAAAGGTATCCAAATCTGCGCATAAGAAGAAACAGCCAGTTTG +GATACATCTTTCACAACACCCACTACCGTATAATCGGCCAAGTTGAGTTGGACCGTACGT +CCTACCGCTTCTACCGTACCAAACAAATTCCGGGCCACAGAAGCACTAAGTACAACCTTA +GGAAGTCCGCTTTCACTATCGGCAGCCGTAAACGGCTTTCCGGCAATAAAATCAAAAGAG +AACACTTTCCAAAAGGTATCGTCAGTTTCAAGGTTATCCACTGTCATCATTTTTCCAGCA +GGTACAGAAATACGCATCCTGCCTCCTGTAGACACTATAGTTACCGCCTCAGGAACAGTA +AGTGCCTTAAAACACTCTTTAGCCACTGTGAGTGACATACAGCCGTTTGATGAATTATCA +CTGGTATCTCCTTTCTGATGAATAGACATCGCTTTCATATAAAGAGAACGTGAACGGTTC +ACCTCGGGCACACAATCCGTAATGCGTACTTCCAACACCAGGACAATGACCATGATCATA +CAAATAGCCAATGCCGTTCCTATGATAGAAACCAAGCTGATGAGCCTATTCTCACGAAGT +TGGTAAAAGCCTGTTTAATATATAGTTTTATCATGTTTGTGCTATGCTCTAAAACCTTTT +AACAAAAAAGAGATTACTCATTATGCAATGCTTCCGCCGGCTGAACTTTCATAGCCTTAC +GTGCGGGAATCCAAATGCCTACTACAATCATCAATGCAATCAAAATAAAGGAAATAAGAA +CTGTTATTATGAAACGTCCACCTTCAATAGTAGTGCCATTCATCCAAGCATTCAATTCAC +TATTTGCCAGATTCCAATCAATAAAAATCGCAGGAATTGTGGCCACCGCCAATAATATCA +ATCCCTCTGCCAACTGACGGACAAAGATACTATGATCTGTTCCACCCAATGATTTCATCA +ATGCAATCTCACCACGTCGTTGCTGAGTACGGAACCAGAAAGTACCTAGCAATCCCAAGA +ATATATTCAACAACAAGAAACCCATTCCGAAAAGGTAACTGTTCCAGGCATTCGTATGTG +ACTGCTGAAAGTTACGACGGATATCGGTAAAAGAACGCACATCCGCGATAAACACATTGC +CCACGCGATACAGTTTCTCACTATCCGCTTTCAAACGGGCTATAAAATCGCGATCTTGAT +CCTCTTTTACGCGTACACAAAACTCCGTGCCCAAATCATACCATTCTTCCGGCATAGGAG +CCACCATGCAGTAACTGTTCCATGCCTCAAAATAATCCCCATAACGGACAACCTGCAAAG +AAGCCGCTAAATTATATGTATTCGTTGTATCCCCGAACAAATAAAATTGTTTGTCTACCA +ATGAAGTCAAATCACATCCATAGCGCCGTTTATAAAGATTATCTGAAGCTAAGAAATTCT +TCGGATGTTTCAGCATCTCAGCCAGTTGTTCAGGGGTCTCCCCACGGGTGCCACGATAAC +GGAACACACGGACAAAATCGGGAGACACCAATCTGCGTATAGTCCATCCCGGCGAACGTA +ATGTATCATACACCACTGACGCACTGCTGTTACTGCCATTATACGGGTAAGAATTTTGTC +CTAATCCGGCTGCCTCTATGTCCGGTCTATGCTGAAGACGACTTAGAAGTTCCTTGATAT +CCTGCCGCTTTTCCTCGTCTGTCTGATTGGGAATAAAGTCTGGACTTTTGTCCGTAAGAC +GTCCCATTTGCACCAAATAGCAATGAGAAATATCAAATCCCCGTGGTTCATTATAAATTG +AGGTCTGTACATACATATAGTCCACAATATACCATAAAACGACACTTACCAACAATAATT +CGGTGACAAGCCAAAGATTAGACCGCCATTCATTTTTTATCTGAGTAAATAGTTTCTTAT +TCATCTTGATTCTTTACTTTTAATTCGTTATCACCTTAGTGAATCCTTCCTCCCAGTGCA +TTGACAATGCTAGTCCTTGAAGCCCTCCAAGCAGGAATGCCGCTACTCAGCAAATTCAAA +ACAAAGCAAAACAGCAATGCATAAAGGAAAGTGGAAGGGTGCAATAAAATACTGGAATCA +ACTGTAGGGGCATTCAATGTCGCACTATAAGGCTGTGCAAACAGGATATCGGTACCTAAA +TAAGCCATAACAATACTGATAAACAAACCTACGGCACCAGCCAACAAAGTCACGACCAGG +TTTTCCATGATTATTTGTCCAACCATTTCCATGCGGGTGCTCCCAAAAGCACGGCGTACT +CCTATTTCCGCCACACGTTGGCGCAACCGGCTTTGCGTCATACTGCTCAGATTTATAGCA +GGTACTAACAACAGAATGATAAAAATGATAGCACGTTCACGACGAGCCGACTTCAAATCC +GGCTCCAAGTTAGCTGCAAATGAAATAGACTGAGTTTCCTGATCGTATGGCCGGTTACGG +TCTATCAACGTATACCCCTGATCGGCAAGAATAGAATTATACTCACTCATACGCCGCTTG +GCTTCAGCACGGATTTCTCCAAAATCATCATGGCTTCGAGCCAAAATAGTGACACTCATC +ATCCCCATGTGCTGGTCACTCCACGTATCATTAGGCAAATCCGTCGAAGTAAAGGGAATC +CATACTTGGCCATAAGAAGCATCGGCAAGAGTAGATACATCTTTCACTACCCCTACCACA +CGATAGGGAGCATGATTCAACAAAAACTCTTTGCCCGCAGATTCTGTGGAACCAAACAAA +GCCCGCGATATGCTCTCAGTAATCACAGCCACAGGAGTGCCTGCATTGAAAGTAGCTTCA +TCGTATGGTTTTCCATTCACAAAACGAAAATCGAACACTTTAAAAAAAGTATCATCCGTT +TCCCGCACATCCGCTCCAATAGCCGGCATAGCGGGTAATGAAACAGGAGTCGATACCGGC +ATACTGCAATAAATAGTGACAGCTTCAGGGGTTTTCAATGATTTATATATCTCACGCGCT +GTCCGCACACTCATCGGTCCGTTACTTGTTCCGTCCCCCCAATCCTTATGACTAATACTC +ATATAATGCACATGCAGAAAACGATCCCGGTTTGACTCGGGAGAAAAAGGAGCGACCTTC +ACTTGTTGTAGCATCACTACCAACATAATAAGGAAAATGCTAAGAGCAGTTCCTGCAATG +CTGATGACACTGATAAGCGGATGCTGGCGAAGTTGCGCCAAAGCTTGCTTAAAATACTGT +TTAATCATCTCTGTTATTTCTTAATAAATTCCTTTTCATAGTAATCAGTAGTTATTGCAC +CTGACGCCCATCAAAGAAACGAATGGTTCTGGAAGTCTGTTTGGCTTGTTCTTCATTATG +GGTTACCATCACAATGGTCCGTCCATCCTCTTTATTTAATTTATGCAACAATTCCATGAC +CTCTGCACCCATCTTTGAGTCCAGATTACCGGTAGGCTCATCCGCAAGAATAATTTCAGG +ATTACCCACAATAGCACGGGCTATCGCTACACGCTGGCACTGTCCTCCGGAAAGCTGTGT +GGGCATGTGCCGCATACGATGACTTAAACCGACACGATCCAACACTTCTTTTGCCAGACG +GGTACGCTCCTTGGCAGCCATCTTTCTATAGAGCAAAGGCAACTCCACATTGTCAATCAC +ATTCAGCGAATTAATCAGGTGGAACGACTGGAATACAAAGCCCAAGGTCTTGTTACGGAA +AGCAGCCAGTTCCTTGTCCTTCATGCTTTCTACAGACGTGCCGTTTATTTCAATCTTTCC +ACTGCTGGGAGCGTCCAGCAGCCCCATAATATTCAGCAAGGTAGACTTGCCGCAACCGGA +AGGACCCATAATGCTGACAAACTCCCCTTTAGCCACATCCAAATTTACATTTTCCAGCGC +TAATGTTTCAATCTCATTCGTACGGTAGATTTTATTGATACCGGTTAATTTAATCATTGC +CATATTTTTTTCTTATTTAAATTGTTAATCTTATACACTTTCTATTTTCATGAATCAAAA +GACATTCCAAAGCTATAATCATTTAATATTCAACGAATTTTAATTCTACATACTGTCCGA +TAACAGACGGTGTGACCGTTTTTACCGGACTAATACGACTGATGCTGTATGCCGGAAAAA +AATGTCAACAAGCACTATGAATAATATAAAAAGGTAAACGATCAAAAAAACGACATAGCC +TAAGAACAAATTAAATTATGTTCGGGAAATGAGTGAAAAGGATGAAAAAACACTTCATTT +CCCGTAACAATCAATGTATTAATTAAGAGAGAGTTTTATCTTGTTGTTTATGTCATAGCT +GCTTCTTATTTCAACTTCAGCTTATTCTTGTTTTTGTAACTGCTCATATCGCTGACAACC +ACCTTGTCACCCGGTTTCAACCCCGATATGACTTCCACATATTCAAAATTACTATCACCC +AATTGAACCTTACGCTTTACTATTTCATCCTTACTATCCTGAACAAATAATTCATACTCA +CCGCGACCGACATAGTAAGAAGCATTCGCCAATCTTAAAACCCCTTCTTTCACGGCATTC +ATTACATATACATCCGTCTTCAAGCCGGAACGCAATCGCTTGTTATTATCTTCATTCAAT +TGTACAATAAAGGAAATCACCCCATTCTTACTCAACGGTGTCACACTACTTACCGTTCCT +TCCAGTTTCTCATTGCCAATCTTGACTATGGCACGTCCACCTGCTGCCACACGGTCACCG +TATGTATCGGCTATTTCACCTTCCACTTTAAAGTGGCTCAAATCAGAAATGATAGCTACC +TGGCTGCCTTCCGCTACCTGCGCACCTACCTGATTATTTATATAGGTAAGAATCGCTTTA +CGCGGTGAACGAATTTGAGCGTCATCCAATGTACGTTTCATTTCAGCAAGACTCTTTGAG +AAAATATTGAATTCCAATTCCTTCACTTTCAAATCAGCCTCTTTTACTTTCGATTCATTC +GCGTATTGCTGACGGAGCTGTTCCAATTCCAATTTTCCGGTATTAAAGTTCAGTTCAGCC +TGACGAACCTTGTCCGTTGTTCCAGAGCCCAAACTATCCAAATAACGTTCGTTACGAAGT +TCCACCTCCATACGGTTCAATTTCATGGCGGAAATTTTCACTTGCATCGATAAATCACTC +AGATACGTATTATTATTCACTTTCAACTGCTCTAACTGATAACGTTTCATCTGTTCTTCA +TCCAGTAATTTCTTGTATTCAGTTTCCGTACTCTGCAAATCAAGTTTCAAAATCGGCGTA +CCTACATCCACACTGTCTCCTCCCTTACGGTAAACTTCCACGATACGAGTATTAATAGGT +GAGTTGATAATCTCTTCAAAAGCCGGAACAACCTTACCGGAAGCACTGACACTTACTTCA +ATAGTTCCATTGTCCACTTCTGAAAACACCAAATCTTTCCTATTGACGCTGCTTCGCATA +AACGAAATCAATACAGCTATACATACCACTGCCGCCACACCTATTGCACCATATTTAATG +AACTTCTTTTTGCGCTCCTTATCACGCACTTCTTTAGGAATTTCTCTGTCCATAACTTAT +TGTAATTTTATTATTATCATTTTTGTGCTAAAGAATAAACAACATCCGTGCCAAGGAAAC +AACTTATTGATAATCAATAACAAATAAAGCGTACAAGGTGTCCGATATCGGACACCCTGT +ACGCTTTTAAACATCAGTTACCGTACAATAATGATTAACGCAAACGCAATATATCACCTA +CTTTAGGAGAAGGGTCTTCCGGTTTCATCTTATTCATCTTATATAAATTTTTCAGACGAA +TACCATATTTTTGGGAAATGGAATACATAGATTCTCCCCCACGGACAACATGAACTATGT +GTTCCTTATCCGCACGACGATGCTTTTTATCCAAATAAATAATATCACCTGGCTTCAATA +CATATCCCTTATAAAGATCATTGTATTTCCTCAACTTGCGTTGACTGATATCAAATTCTT +TAGAAAGTTTTTTAAATGTATCCCCCGGACGTACCACAATATAGAGTAAATCATTAGCCA +AATAAGGCTGATGCGGGTTTGGGAATTCCTTCATCCACTTTATACCATCCTTGGTATCAT +ACTTATGTAATTCATATAATTCAATGATATCAATTAAACGATATGCATAGCGAGGGTCAG +TAGCATATCCCGCTTTCTTCAGCCCATGCGCCCATCCTTTATAATCGGTGATTTTTAATT +TAAAAAGAGATGCATAACGTGAACGTCCTTTTAAAAATTTAGAATGATCTTCATATGAAT +CACGAGGGTGCTTATAGGCACGAAAACATTCATTACGGGCATCATCATCATGCCGTACAG +TACGACCAGTCCAGTCACCACCACATTTTATACCGAAATGATTATTGGATTTACGAGCCA +GTGTACTTTTTCCCGCTCCTGATTCCAATAATCCTTGAGCCAGAGTGATGCTGGCAGGAA +TACGATACCGTTTCATCTCATCGATAGCGAGATCTTTATACTTATGAATATATTCCTCAT +ACTGCCGGTTACGGGTTTGCGCTTGCACTGTAAAATTACATAGACAACAAAACAACCCGA +TTAATATAAATTTCAACGTATGTTTCATCATATAAAACTGTTTCAAAATCGCGACTATAC +TTTTTCCGCTATATATAGTTGCAAACTTACCATTTTTATCCGGCTGTATGCAATCTTTTT +AAATTAATTACGAATTTCATTCGAATTCCAAAAGTCCAAAGAAATCCGGACGATGAAAAT +CCGGCTTCTCTATTTTAATAGGATTCCATGATAAGAAATGAGGCTTTTGCAATTCATCTC +CACATTTATAAAAATTAGCCCGTATGCTCATTCCATCCAGCCCGGTAATGGCATGCTTAA +AAAATACCTTATACGGAATCAGCAAAGCCACCTCCCAAGTACATTCTCCTATCTTTTCTT +CAAAAGTTTCTCTGCCCAAGCTAGCCCAACGCTTCACCTGATCCGTTATTTCAGAAGGAG +CCATTTCACGATTATTACGCTCCGAACCTGCTGCTAAAAGAATCGTAGCGATACAGTTAC +ATTCCAAATTATAATAAATGCCATCTCCCGCAGGAATAGAGAAAAATTCCACGCAGGAAT +CTGTCCAAACACTACCGTTATCTTCACCGTATTTGGCACGAACACTGTCTTCTACTACTT +TATAATGGACTAATATAGCATCATTTGTATAGGCTATACGAAAAGAGACTTGAGGTTGAT +AAGGATATTCTTTCCAGTTCACTATATTAATAGGATGATACGTAATGTTCTCATTATCAA +ACAAGGTCGGTATTTGTCGTGCATCTTGAACTTTACCACTTAATTTCTTTACTTTCATCG +TATTGTTTTTATTAGTTAAATAGCTACTATTAGCTTCACAAAAATATATACTTCCACTGA +GAAAACAAAATAGTGGCTACCATAAACAGCAGCCACTAAAAGAATAAAGCAATAAAGACA +ATGAAAGCACAATTTTTACATTTTCATCCATATTTATTTCTTCACATCGCCCTGTGGCAT +ATGTCTTCTCCCATCATGCTGCTTTTTCATTCCTTGTCTTTTTCTCATCTCTTTACGGAA +TTTATCGCCATTGTGTTTCTCCATATTATACATTTTCTGAATTTGTTTAGGAGAAAGAAA +TTTCCGGAACTCATTATAATACTTCTCACGAACATCCAGTATCTTTCTACTCTGCGCGAA +ACGTGCCTTGATAGCCTGCTCCACTTCAGCATCTGTAGGAAGCGGTTTGGGGGTCTGTTT +ATCTGCAGCCGTTCTATTTGCAATATTTCGACAGGCTCCCATATGACGAGTAGCCCGCAT +TTCTTCCATGTATTGTTTATACACCGGAATAAATTTAGCAGTCGTTGCGTCATCCAATGC +CAATCCTTTTATAATTTGATTGCACTGTATTTCCAGCATCTGCTCTTTATTAAACTGCCT +TCTTTCAGATTTAGCTCCTTTCTTTTCTTGTGCAAAAAGTGTTACCTGACTTCCCATAAC +AATGGCTGCCAACATCATTAAAAAAAATTTTGTTCTCATCATTTTTTTGTTTTTAGTGAT +TAATCAATTTATAAATATATCATTTTCAGACAATTCCACCCATCCTGCTAATTCCTCATC +AGACATAGATTCTATATAACGATCCAAAGTATCTGAATAAGAAGAGTCTAAATCCGTAGA +AACAATAAGTGAATGAGAAGGCAACTCCTCTGCCACAGGTTGGGATATCGGAAAGAAAAG +AACTCCCAACATGACAGCCGCAATAGCTAAAGCAGCCCCAATAATCAATTTTATTCTATG +CTGCCTATTAAACCTCTCAGCCTGTGTACGCTCCAATACTTGCTTCTGCATTTTTCCGAA +AAAGCCATCAGGGGTACGATAAGGCATCCGCTTCCCAATATTCTTAAAATCAAACTCCTT +TTCCATCCTGTCTTTAATTATTTATCATGTACTCTTTTATCTTCTCTTTTGCATAATGAT +AATTCACTTTCAACGTATCCACTTTCGTGTCCGTAATCCTGCTAATTTCCTCATATTCCA +GTTCATCATAATAACGAAGATTGAAAACTACCCGCTGCTTTTCAGGAAGCATCAAAATAG +CCTGCTGAAATTTCACCGCCATTTCGTTCTCATAATCCACATAATCTGACGCCATCAACT +TATTCATTAACTCTTCCTGAACTTCTTCCGCCGAAACAGCTTGTTCCTTACGCATATTCA +GAAAACGGAAACACTCATTGGTCGCTATCCGGTAAATCCATGTTCCCAATGAACTTTCCT +CCCTAAACTGAGGAAGATGGCGAAAGACGCGAATAAAGACTTCCTGCAGAATATCCTCTG +CATCCTCATGTGATACAACCAATCTCCGGATATGCCAATATATGGGCTGTTGAAAAAAAT +CTATCAACAGTTTAAATCCCCGTTCCGGGTTGGAAGCCCATACTTCCCTAATTTTATCTT +CGTTTATCATCTATTCGTCAAAAGTTTATAGGTTAGAACCACCCGAGAACAAGAAGTTAA +AAGTGAAAATGAAGTTTTTACATTAATTATAGATGAAGTTAAAATAATCAATCACATTTC +CTACAAGAGAAAAATACAACCAATCTTCCGGAATATGAATTTAAAGAATTCAAACTCAAT +CTTCCTATCAATAAATTTCAGTTCTTAGAAGAATTACCATATAAACAATAGGGAAATCCC +TAAAAATAAATAGGGGAAAAGATAGAAGAATATACCTTTTCCCAACTTATCTTTGTTACA +AACAAAAAAACAAAAAAATGAGAAAATTGAAACAAACATTATTAGCGTTAACCATTTGTA +CTTTAGTCATCAGTTGCAGTTTGACTACTAGTAGCACTATTATGAAAGTACAGAGAGGGA +TGTCCCAAGAAGAAGTCAGCCATTTACTTGGGAAACCCGATTTCCGTAGATTCGACAACG +GCTCGGAACAATGGGAATATACCAAGACCAATGTGTCCACAGCTGCAAACACGGTAATTA +TTATCGATTTTGTAGACGGAATGGTAACAAACATGGATTCTTTTGAGTCCAACATTACTC +CGCCTCCAGTTGCAGTGTGTCCGCCAAATGAAATCATTACGGTAGTTCCACCCAACCACC +CTGATCATTCTGGCCCGCACAGACCAAAACACAAAGCCATGAATCCACATGATTTTGAAA +ACCTCTACAAAAAAGTAAAAAACAAAGCATTTAAGGACGACCAAATGGAATTATTATCTG +TAGGAGTTGTAAACAACTACTTCACCTGTAAACAAACTGCCCGACTCATGTCCATATTTA +CATGGGACGATGAAAAAATGAAAGTATTAAGAATGGTTTCCAACCGCATTGTAGACCGTG +AAAATGGAAAAGAAATCATCAAGACACTGGATTCTTTATTTAAACAAGATGATGCACGTA +AAATATTGGGAATCACTAACCAATGGTAATTCAACCTAAACAACACACAAAAACATAAAT +ATATGAGAAAATTCAAACAACTATTATTGCTTGTTATCAGTATAACACTTGTCAGCTGTC +ATACCACAGGAAGTATCAAACAGAAAGCATGGAAAGTACAGCAGGGAATGACAACAGAAG +AAATAGGCCAACTTCTTGGAAAACCCGATTTCCGACGTTTCGATGGTTCTTTGGAACAAT +GGGAATATCAGAGCGGAGGAATTGCAACGTCTTGCAAATTTCTAATTATCGAATTTCGAA +ACGGAAAGGTAACAAGCATGGATTCCTACAATGAAATAGCTAAAGAGACTTCAGTAGGAG +ACCTATATTCCAGCAAAATATCCCTCCATACCGTTGGTTCAATAGACGATAACGAATTTG +AAAAGATATACAATGAAACAAAAAATTCCGTATTCAAAGATTCAACTCTGGAAAAAGCCA +TTATAAATAAAAAATTAAGTTGCGCACAATGCCTTAAACTTATGTCACTTTATACTTTTG +ATAATGACAAGTTAAAAATGTTACAAGTACTGAAAGACCATATAGCTGACACAACAAATT +ACGATAATATTGTCAACTCACTAGATTTCATTTCAAGTAAAAACAAGGCCAAAGAGATAT +TAGGAATACCCTAATACAGTAAAAGCAAACGTACCTGTTGCAATAAACCTAAAAAATATT +GCAACAGGCACATCTGCTTTACCTCAATCTTCTATTCTGACAGCTGTGCCGCTTGCGGTT +ACCATCAGCATACTACCACTGTCCCCCACAGTTTCATAATCCAAATCCACCCCAACTACA +GCATTAGCTCCCAATAAAGCAGCCTGATCCTGCATTTCTCGTAACGCAGTATCTTTAGCC +TGTCGAAGTACTTCTTCATAAGAACCGGAACGACCGCCTACAATGTCACGAATACTAGCA +AAAAAATCACGAAACAGATTAGCACCGATAATAGTTTCACCTGTTACAATGCCATAATAT +TTGGTTATACGTTTTCCTTCGATAGTAGGTGTTGTTGTTGCTAACATAATCTTTCTCTTT +TTTAATTATATATTCATTAGACGCACAAATAAAAGAAAAAGTTGCAACTTTCTCCTTTTT +TATCGGTAACTATATTATTCTTTTCACATTCAACTCTGACAAGCAATAAGAAAACAAATA +AATTTAAAACCAGATTTTAGAAAAGAAACATTGGGAGATTAAAAACAAAACATTACATTT +GCAAGAAAACAAAGAGGAACGTATCATGAAACAAATCAAACTGGAAATAAACATAGAGGC +TTGCCATTATGACGAACTAACAGAGAAGGACCGCAAACTGATAGACGCCGCGTGTGAAGC +AACGAAAAGAAGCTATGCTCCTTACTCACATTTTGCTGTTGGAGCCGCCGCACTGTTGGA +AAATGATATCGTCATTACCGGAACCAATCAAGAAAATGCAGCCTACCCATCTGGTCTTTG +TGCAGAACGTACCACTTTGTTTTATGCCAATTCTCAATATCCGGATCAAGCAGTAAAAAC +ACTCGCCATTGCTGCCCGCACAGAAAATGGTTTTCTTGACACACCTATTCCTCCTTGCGG +AGCCTGTCGGCAAGTATTATTAGAAACTGAAAAACGATATGGAAAACCTATGCGGATACT +GCTTTATAGTAAGACTGATATTTATATATTAGAAAACGTAAGCGGACTTTTACCTTTATC +GTTTGATGGAAATTATCTGAAATGATACCTACCATCCGTCTGCACAGCCAACAGTTAATA +AATCCGGTTTTCAATAATCCGAAAGATCTTGTATCATGGATGGGAGGCATTCAAGCGCAA +GATTACACCATGTCCAAATGGGCTATTGGTATCCGGCTAAAGGCAGGAAATCTGCAAACA +GTCAATGAGGCACTGGCAAAAGGAGATATATTGCGCATTCATGTCATGCGTCCTACTTGG +CATTATGTAGCTGCAGAAGATATACGCTGGATGCTGAAGCTTTCATCCCGACGCATCATT +ACTGCCAATGACTCTTTTGCCAAGTCCAGAGGACAAGACATTTCAGTTGACATTTACAAC +AAAGCAAACCGATTATTGGAAAAGGCACTAGCAGGACACAATCATCTGACCAAGCAAGAA +ATTGACAACGTATTTAAAGAAGGAGGGCTGGAAACCAATGAAAGATTATCCAACCGTTTC +CTGATTCACGCTGAAGCTGAAGGACTTATTTGCAGCGGAGCAGATAAAAATAATAAAATC +ACCTTCGCACTTTTGGATGAGCGTGTTCCTCCAATACAAGAATTACATAAAGAAGAGGCG +CTAGCTATCTTGGCACGCAAGTATTTTAGAAGCCACTCTCCTGCCAGTCTGAAAGACTTT +GTATGGTGGTCCGGACTTTCGGTAACGGAAGCTAGACAAGGTATAGCCGCCATTGAACAA +GAATTGCTTACTGACCGTTTCCTAGCACAAAAATTATATGTTCATCAGTCTTATAAAGAA +GAAAAAACAACCGACATATTGCATATCCTTCCTTCATACGATGAATACCTAATCAGCTAC +AAAGACCGTACTGATGTATTGAACAAAGAATACCAACACAAAGCATTCAATTCTTTCGGG +ATATTCCGTCCGGTTATTCTGTACAACGGGCAAATAGTTGGAAATTGGAATAAAGTCATA +CAAAAACAAACAACACATATAGAAATGAACTGGTTCAAGAAAAATACAAAAATCAGGTAC +TGTCCAAAATTTTGTGTAAATGGAAACAGGATTCAGTTGTAAGTTTGTTCTTATATCTGG +ATTCTGTTTTCAAATATAAGCATAAACTGGTTCATAATCAATCCCCAGTTTGAAATAGGC +ATTGTCCATTTCTTTTCAATCTCCATAAGTGAAAGATACACGGTCTTTTTTACGGCATCG +TCCGACGGGAATGAAAGCTTTGATTTGGTGTACTTTCTGATTTTTCCGTTCAGATTCTCA +ATAAGATTTGTGGTATAGATTATTTTCCTGATTTCCAATGGGAACTGGAAGAAAACAGTC +AGATCATCCCAGTTGTTTCTCCATGAAAGTATGGCGTATGGATACTTTCCTCCCCATTTC +TTTTCCAGATTGTCAAGTTCTGCGGCAGCAACTTCCTTATTAGGTGCATTGTAGATATTC +TTCATATCCGCCGTAAACTCTTTCTTATCCTTATAAACGACATATTTACAGGAGTTCCTG +ATCTGATGTACCACACAGATCTGAGTGGATGACTGGGGGAATACGGTACGGATGGTATCT +GTAAATCCATTCAGATTGTCAGTACAGGTAATCAGTATATCCTGCACTCCACGAGCCTTC +AAGTCGGTCAGGACACCCATCCAGAAAGAGGAACTTTCCGATTTGCCGACCCACATGCCA +AGGACTTCCTTCAGGCCGTTCTGTTTCAGACCGACACAAAGATAGACGGTCTTGTTTATA +ATCTTGCCGTTATCCCGTACCTTGAAGACGATACCATCCATCCAGACTATCAGATAGACC +GGATCCAGAGGACGGTTCTGCCATTCCTGCGCAGCCTGGCTTACCTTGTTTGTAATAATG +GAAATAGCTGATGTAGAGAGCTCTATTTCATAAATCTCACGCATCTCCTCCTCTATATCG +GAAACACTCATTCCTTTGGCATACAGGGAGATAACAAGCTTCTCTATAGAAAGTCCCCGG +CTTTCATGCTTGGGGACTGCTATCGGTTCAAACTGCCCGTTGCGGTCACGCGGAATGGAG +ATGACAGACTCTCCATGTCCGGTCTGAATTTTCTTCGGATAACTGCCATTCCGGGAGTTG +CCGGTGTTGTTCCCTGCCACGGAATTCTTCTCATACCCCAAATGGGCATCCATCTCACCT +TCAAGCATCTTTTCCAATACCTGCGCATGCAACTGATTCAGAAACCTGCTCACATCCGCT +TCTGTCTTGAACTGGCTAAGGAACTCCTTGCTTAATACCTCATCAGGCACTACTTGATTC +TTTTCTTTCATAATCTTTTTCATTTGGTAAATGTATAAAATAAAAAATACGGAACTCGAT +TTTGAATCCCGTATTTTCCATTTACACAAAATATTTTATAGTGCCAAAAATCAAAAAAGA +ATTACTTTCCCTAGCGGAGAGAAAATACCTTACTTTTTTCTCCGAACTGTAAGAAAATTG +TAAATGGATTATTTCTGCGTCATCACATTTTAAAAATAATCCATTACATTTACATTATTC +AGTATTGCAATATAAAACTCTCAGCCACTTTCAAGAAACGTTCATGACCTTTTTCATTCA +AATGTGCAGTATCTGTATTATTCTTGCTGTTCTGAAAATATATTTTCCTAAAATGATCAT +TACTTGCATAAATTCCTCCTTTTCGGGCGCTGTCAAAAATAGGAATACTATAATTTCCAC +AAACCTCAATCATAGCATCCACCACTTTCTCAGCATCACTTCCTGCAAAATTTTTACAGT +TCCATCGGGTAAAGAAAAAAATCTTAGCTGTGGGATACTTTTCAATAAGCCCTTCACATA +GCATTGCCAGCCTCTCTTTAAATACATCAATACCGCCAATTGAATCCAATTTAAAACCAT +CATTGTGCCCACCCACAACAATAACATAATCCAAATCATCCGGCATTTCCTTATATCTTA +CATACATCGCTTCCCCCCAACGCGGGCTACTATAAGCAATGCTGCTGCCGTTCTTGCCAT +AATTTAAATATTCCATACCATGTTTCTCGGCAAATTTATAATGCCAGGTATTCTTAACAG +GTTCCTTATGATTCTTCACATAACTATCTCCAATAATTCCCAAACGTTTTCCTTTCAATT +CATCAGTCCTGACGGTAACTCCTTTAATAATTTGTTCCAACTGCTTATATACCTGTTGTC +CCAAATACTCTGCCGAATGAGCCGTGAAATGAAGTCTGTCATTTAACAATTCCGCGCCTG +ACATATCTATCAGGTGCATGTTCGGATCTTCAGCAGCCAATTGCTTCATCGCATTCTCCA +CTTCTCTGCTGAAATATTTATTACTTCGGGCAACAGTACCAAAAATAAAAGGCAAACGAG +AATAATCCTTTCCGGTTTTTTCTGTCAGATGCATACGCACATAAGCCACCATTGTCTTCA +AGTTACGGTAATAATCTTTACTTTTAGCATAATCACTTTCCCCCTGATGCCAAAGAAATG +CATCTATCTGATAGCCATCCTTCAGGCGGGAAAGAGTCTTGTCTATACACATGTCTATCT +CTTGAATGAAAGAAAGCAACAAGGAGTTTCCTCCATCAGAAGTTGGCTTAGCCTGTGCCA +GCCATTCAGGCGCCGCAGACCAAAAACGTCCCTTGGAAGCATTATAATCAGGTGCTATAG +AAGTTCCGCCAACAGCCCATTTCACCACATAAAATTTCTCCTGCAACAACTGCTCCAACC +AATAATATGTCACTGCATCAAAAGCCCACATATTATTTTTTCCGCTACGTTTGGCACGAG +GCCAGAAAGGAATAAATTCTCCCTTTCCGTCATTCTGAGCTATCTGACAATAACGGTAGG +CACCTTCGGCATACGTCAGTGTATCCGTAGCCAATGCTTTAATATATGCCGGCAAATCTT +CATTAGGAGTTCGTCCATCGGTATTAGACTGTCCGGCAGTTATAATCACATGTACAGGAT +TGGAAGCCATCGTATTCAAGCCAACAGCTGCAAAAATCAAAATCAAAATTAAGTTTTTCA +TAAGTATCATTTCATGTTTTTAATGGACACAAAAATAATCCTAATTCTAATATAACGACA +TGGATTATCGTCCATTATCATAGATTTTTGAACTCATTATGGTTCATACAGAATGAGAAT +TGCGGCTAAACCATAAAAAAAGAGCCACAGGAACAATGGCTCCCAATGACTCTTTCTTTT +TCAGCTCATATTTATAATTTAATTATCTCCATACATACGCGCACGCATTTCTTTAATATG +ATCAGAAGTGATGTATTCATCGTATTCCATCATCTTGTCAATGATACCATTCGGGGTCAA +TTCAATAATACGGTTCGCCACCGTTTCAATAAATTCGTGGTCGTGGGAAGAGAACAGCAC +ATTTCCCTTATAAGTCTTCAGATTATTATTGAAAGCCTGAATTGATTCCAAGTCCAAATG +GTTGGTTGGAGTATCCAAAATCAGACAGTTCGCATTACGCAACTGCATACGGGCAATCAT +ACAACGCATCTTCTCACCTCCGGAAAGCACATTCACTTTCTTCAATACTTCCTCTCCGGA +AAACAACATACGCCCCAAGAAACCTTTCATATAGACTTCGTTACCTTCGCCAAACTGGCT +CAGCCAATCTACCAAATTCAAATCACTTTCAAAGAAATCCGTATTATCCAAAGGCAAATA +AGCCGTAGTAATAGTAACGCCCCACGCGAATTTACCAGCCTGCGGAGTACGATTTCCATT +GATTATTTCAAAAAAAGCAGTCATGGCACGCGGATCACGGCTCAAAAAGACAATCTTATC +TCCTTTTTCTACATTAAAATTCACATCATTGAACAATACCATACCGTCTTCAGTTTCCGC +ACGCAAGCCGGATACTTCCAAAATCTGATTGCCCGGTTCACGATCCGGAGTAAAGATGAT +ACCGGGATATTTACGGGACGACGGCTTAATTTCATCTACATTCAGTTTTTCCAACATCTT +CTTTCGGCTGGTTGTCTGCTTGCTCTTAGCCACATTGGCACTAAAACGACGGATAAATTC +TTCCAACTCTTTCTTCTTCTCTTCAGCTTTAGCCTTCTGGTTCTGCTGCTGGCGAAGTGC +CAACTGACTTGATTCATACCAGAAGCTATAGTTACCGGCAAACATATTCACTTTACCGAA +ATCAATATCTACAGTATGAGTACAAACAGAGTCCAAAAAGTGACGGTCATGACTTACCAC +CAACACCGTGTGTTCGAAATTAGAAAGATATTCCTCCAACCAAGTCACCGTATCCATATC +CAAATCATTGGTAGGTTCATCCAACAACAAGTTGTCAGGATTACCGTAAAGTGCCTGCGC +CAACATGACACGCACCTTTTCCTTACCACTCAGTTCCCCCATCAACATATAGTGCTTGTC +TTCCTTTATGCCCAATCCGCTCAACAAAGAAGCAGCATCACTCTCGGCATTCCAACCATC +CAGTTCGGCAAACTTCTCTTCCAATTCAGAAACTTTCAAGCCATCTTCATCTGTAAAATC +AGTTTTCGCATACAGTTCTTCACGCTGTTTCATAATGTTCCACAACACAGTATGTCCCAT +CATAACCGTATCCATGACTGTATATTCATCCCACTTAAAGTGATCCTGGCTCAATACAGA +CAGACGTTCGCCCGGTCCTAATACCACTGAGCCCTTAGTCGGTTCCAGCTCACCTGAAAT +TGCTTTCAAGAAAGTAGATTTTCCGGCGCCGTTCGCACCGATAACCCCGTAAATATTACC +ATTGGTGAACTTCATATTTACGTCATTATACAACACTCTTTTACCAAATTGAATGGCCAA +GTTCGAAACTGTAATCATCCTAATATACCTTATTTATATAATTATTCTATTTTGGAATTG +CAAAGGTAGGCATTTTAATTCAGAAAAGTATGCCAGTGTGGCAGAAATTCCGTACTTTTG +CAAAATTATTACTCTAAGAAAACAACTAGGCACGAAATTTGTTAGCAGTAAATTAACCTA +AAAGACCTGACTGTTATGAGTAAAAATAAGAAAAACAAAAAGTTTAATAAGAATATGAAC +CCAACAGAGAAGAACCAACCTCAAGACGAGGAAGTTTTGAAAAATCAGGAAGCGGCAGAA +GCTGCCATTGATGAAGAAACTCAGAAAGAAGCAACAGAAGAACTGAACGCTGAAGAGAAG +GTGAACAAGGAATTGGCGGAAGCTCAGAAAACAATAGAAGAGCAGCATGACAAATACCTG +CGTCTTTCAGCCGAATTTGACAATTACCGTAAACGCACCATGAAAGAGAAGGCCGAATTG +ATTAAGAATGGTGGAGAAAAGGCCATTACTGCCATTCTTCCTATTTTGGATGACTTGGAG +CGTGCAGTAAAGACTTCAGAAACTTCGGATGATGTAAAAGCAATGCGTGAAGGAATTGAG +TTGATCTACAACAAATTCCTGAAAGTATTGAACCAGGAAGGACTTCAGAAAATAGAGACT +GACGGTGAGAACTTTGATACCGATTATCACGAAGCAATTGCATTAGTTCCAGCTCCTTCC +GAAGAGAAGAAAGGAAAAATATTGGACTGTGTACAAACCGGTTATAAGCTGAATGACAAA +GTAATACGCCACGCCAAAGTAGTTGTGGCTCAATAAAACAGTATCATAAAACGACATGGC +AAAAAGAGACTATTACGAAGTTCTGGAGGTGGACAAAACAGCCACCCTTGATGTTATAAA +AAAAGCATACCGCAAAAAAGCAATACAATATCATCCGGACAAGAATCCGGGAGATAAAGA +AGCGGAAGAAAAATTCAAGGAAGCTGCCGAAGCTTATGATGTGTTGAGCAACCCGGATAA +ACGCGCCCGTTACGACCAATTCGGACATGCAGGAATGAGTGGAGCTGCCGGTGGCGGTTT +TGAAGGATTCGGACAAGGTATGTCCATGGATGATATTTTCTCTATGTTCGGTGACATCTT +CGGTGGACATGGAGGAGGTTTCGGAGGCTTTGGAGGTTTCGGTGGTGGCGGACGTTCCGC +ACAACGCAAGTTCCGTGGTTCAGACCTCCGTGTAAAAGTAAAACTGAACTTAAAAGAGAT +TTCTACGGGAGTAGAAAAGAAATTCAAACTGAAAAAGTACGTAACATGTGACCATTGCCA +TGGTTCAGGAGCCGAAGGAGAAGGAGGAACGGAAACGTGTCCCACTTGTCATGGAACAGG +AAGTATCACCCGCACCCAACAAAGCATTTTTGGAATGGTGCAATCACAAAGCGTATGTCC +ACAATGTAACGGAGAAGGGAAAATAATTAAGAACAAATGTAAAGCTTGTGCAGGAGAAGG +TATTGTATACGGAGAAGAAGTCGTGGAAGTGAAAATTCCTGCCGGTGTGGCTGAAGGAAT +GCAACTTTCTGTCAATGGGAAAGGAAATGCGGGCAAACATAACGGTGTTCCCGGTGATTT +GCTTGTTGTCATAGAAGAAGAATCCCATCCAGACCTAATACGCGATGAAAATGATTTGAT +TTACAATCTGTTACTAAGCGTTCCGACTGCTGCTTTAGGAGGCACTGTAGAAATACCAAC +TATTGATAGCAAAGTAAAAGTAAAAATTGAACCAGGCACCCAACCAGGTAAGGTCTTACG +CCTAAGAGGTAAAGGATTGCCCAATGTCAATAGTTATGGTTACAGTAACGGTACAGGTGA +TTTATTAGTCAATGTAAGCGTATATATTCCAGAAACGTTGAATAAAGATGAAAAACAAGC +ACTAGAGAAAATGCAAGAATCGGATAACTTCAAACCGAATACAAGCATTAAAGAAAAAAT +ATTCAAGAAGTTCAAAAACTTCTTCGATTAATTTTTATACTACAAAGTGTCACAGAAAGA +ATACCAGGTTTTATATCTGCCTGGCATTCTTTTGTGACACTTTGCGGTAAATAAAAACAT +TTGCCATGAATTACGAAGAAACATTGGATTATTTATATAATAGTGCCCCATTATTCCAAC +ATATAGGAAAAGATGCATACAAGGCAGGATTAGAAAACACTTATCTTTTAGACAAGTATT +TCAACCATCCCCATCGCCAATTCCGAACCATTCATATAGCCGGGACCAATGGAAAAGGCT +CTTGTTCACATACTTTAGCCGCCATTTTACAATCAGCCGGATACAAGACAGGACTTTACA +CTTCTCCACATCTGATAGATTTCCGTGAACGAATCCGCGTCAACGGAATTCCTGTATCCA +AAGAATATGTCATAGACTTTGTAGAAAAACACCGTGCCTTTTTTGAACCTCTGCATCCTT +CTTTCTTTGAACTGACTACCGCAATGGCTTTCCACTATTTTGCCCAAAGCCAAGTAGATG +TAGCCATTATAGAAGTCGGCCTGGGAGGACGAATAGATTGTACAAACATCATCCGTCCGG +ATCTATGTGTCATAACCAATATCAGTTTCGACCATATACAATTCTTAGGTAACACATTAG +CCAAAATAGCCACAGAGAAGGCCGGAATCATCAAAGAAAAGACCCCGGTAGTTATCGGTG +AAACCACACCCGAAACCAAACCTATATTTACAACCCGTGCTAAAGAAGTAAACGCTCCTA +TCTACTTTGCAGAAGAAGAACAGTTATTACACTCCTCCAGCATAAATGAAAAAGGTAAAC +GAATATATCAAACAACCGACTACCTCAATCTGGAAGGCGAACTGGAAGGGCTTTGCCAAC +TTAAAAACACCAATACACTTTTATCTGCCATCCGCCTATTAAAACAAGCAGGTTATCAAC +TTACCGAAAGCAATATACGCAAAGGATTCTCACAAGTATGTGAACTCACCGGCCTGATGG +GAAGATGGCAAAAATTAGAAAGTGAACCGACTTTAATATGCGATACAGGACATAATGTAG +GAGGTATTTCGTATATCATAGAACAATTAAAACATCAGAAATATGAACGATTACACATTG +TAATAGGTATGGTAAACGACAAGGATATCAGTGGAGTATTGTCCATGCTTCCTAAAAATG +CCACCTATTATTTTACCAAAGCCAGCGTAAAGCGTGCTTTATCCGAAAAAGAATTACAAA +GCTTAGCGATGCAATCCGGGCTGCACGGAGATACTTATCCTGATGTAGAAACAGCTGTAA +CAGCCGCTAAAGAAAAGGCTAACAAAAATGACTTTATTTTTGTTGGAGGAAGCAGCTTTA +TCGTTGCGGATTTATTAAAATTTCACGTTTAGTCCGTTCACTTTTATAGAAAAAGTTTGG +GAATACCGAAGAAAATCTTTTTATTTGCATTAGTTTTATACATAAAATAAAAAGATTTTC +AATAACATGATAAACACATCTACTTCCGATGAGAATTTATGCGGCCTGAAACGGGCCGAT +TTTCAGACAACAGTGAACGGGAAACAAACAGACCTCTTCATTCTGAAAAATGAAAACGGA +GCTGAAATAGCTGTCACCAACTATGGTGGAGCAGTATTAGCTATTATGGTTCCGGACAAA +AACGGTAAATATGCCAATGTCATTCAAGGACATGATAGTATAACCCATGTTATCAATAGC +CACGAACCTTTCCTCAGTACTCTTATCGGTCGTTACGGAAACCGTATAGCAGGAGGCAAA +TTCATTTTAGAAGGGAAAGAATATTCACTGACTATCAACAATGGTCCCAACTCGCTGCAT +GGCGGTCCTACCGGATTTCATACCCGCATTTGGGATGCAGAACAGGAAACTCCCCAAAGC +TTGAAATTGCATTATTTATCTGCTGATGGCGAAGAAGGATTTCCGGGAAATTTGGATATT +CACGTAACGTACACTTTAAGTAATCAAAATGAATTCATCATTACTTACCACGCTACAACA +GACAAAACAACATTGGTAAATCTTACTCACCACGGTTTTTTCAGCCTATCAGGCATTGCC +AATCCTACGGCAACCGTTGATAACAACATCGTAACCATTAATGCTGATTTTTATACTCCG +ATAGATAATGTATCCATCCCTACAGGCGAAATTGCCAAAGTAGAAGGTACTCCTATGGAT +TTTCGTACTCCTCAAAGAGTAGACAGTAGAATCAATGACCCATTCGAACAGCTAGAATTC +GGTGCCGGATATGACCACTGCTATGTATTAAACAAACGTGAAGCAGGCACACTGAGTTTT +GCAGCAAAATGTGTGGAGCCCGAAAGCGGTCGTAGCATGGAAGTATACACTACAGAGCCC +GGTGTACAAGTTTATACTTCAAACTGGCACAATGGTTTCGAAGGTGCTCATGGAGCAACT +TTCCCAGCAAGAAGTGCCATCTGTTTTGAAGCACAACATTTTCCTGATACGCCCAATAAA +GGCCATTTCCCTTCTTGTGTTCTACATCCGGGGGAAACTTACAATCAAGTAACCATCTAC +AAATTCGGTGTAGAAAAATAATTTTATGAAATAACCTAATTAATAAACTTATAATTTTTT +CAAAACCATGAATCAACAAAAACAGAACGGTAATATCATCGCTATCATTACAATGTTCTT +CCTTTTCGCGATGATTTCCTTCGTTACTAATCTTGCTGCGCCTTTCGGCACAATATGGAA +AAACCAATATGCAGGTGCCAATACCTTGGGTATGATGGGAAATATGATGAATTTCCTCGC +ATATCTGTTTATGGGGATTCCTTCAGGTAATATGCTTGTAAAAATCGGATATAAAAAAAC +AGCCCTCATTGCCATGGCAGTAGGTTTCATCGGTTTGTTTATCCAATATATTTCCAGTTT +GTTCGGTGCAGATATAGACGTATTCAATTTAGGAGAATATGCCATTAAAATGAATTTTAT +CATCTACCTGCTTGGTGCTTTCGTCTGTGGTTTTTGTGTATGTATGCTGAACACAGTAGT +TAACCCGATGCTAAATCTTTTAGGTGGTGGTGGTAACAAAGGTAACCAATTAATCCAAGC +CGGTGGTGCTCTAAACTCATTGTCAGGTACTTTGACTCCGCTTTTCGTAGGTGCCTTGAT +TGGTTCTGTTACCCCTCAAACAGCTATGTCAGATGTAGCTCCTCTGCTTTTCATCGCAAT +GGGTGTATTCGTATCAGCGTTTATCGCTCTTTCATTCATCGCCATTCCAGAGCCTCATCT +AAGAAAAGCAGGTCATGAAAAAGAGAAATTCTCTCATAGTCCTTGGAATTTCCGCCATAC +TGTATTAGGTGTAATCGGTATCTTTGTGTATGTAGGTATTGAAATCGGTATTCCAGGCAC +ATTAAATTTCTATCTTGCCGACCCAACGGAAAAAGGTGCGGGTCTGCTTGCCAACGGTGC +CGCTATCGGTGGTGCTATTGCTGCCATCTATTGGTTGCTCATGTTAGTGGGACGTTCTGC +AAGTAGTGTCATCAGCGGTAAAGTAGCTACACGCACACAATTGATTGTTGTTTCCGCAAC +AGCTATCTGTTTTATATTAATCGCAATCTTCACTCCAAAAGAAATCACTGTCTCTATGCC +GGGATATAGTGTAGAAAATGGATTCGAAATGGCCTCTGTACCTGTCAGTGCCCTATTTTT +GGTACTTTGTGGTTTATGTACGTCCATTATGTGGGGAGGTATCTTCAACCTCGCTGTAGA +AGGTTTAGGCAAATATACCGCACAAGCTTCAGGTATTTTTATGATGATGGTTGTCGGTGG +CGGTATTTTCCCATTATTGCAGCAATTCATTTCTGACGCTGTGGGATATATGGCCAGCTA +TTGGTTAATTATCGCTTTGCTTGCTTATCTGTTGTTCTATGGTTTGGTAGGATGCAAGAA +CGTAAACAAAGACATTCCTGTAGAATAATTATAAAAACATATAGTATTAACTTTAAACTA +TTAATATCATGGATATAGAATACGTAAGAAGTCGCTTCATCAAACATTTTGATGGCACAA +CAGGTTCAGTATATGCATCACCCGGACGTATTAACCTTATTGGAGAACATACCGACTATA +ACGGTGGTTTTGTCTTTCCCGGAGCAGTAGATAAAGGTATGATTGCTGAAATCAAACCTA +ATGGTACTGATAAGGTACGTGCTTATTCTATTGATCTAAAAGACTATGTAGAATTCGGCT +TGAACGAAGAAGATGCCCCTAAAGCCAGTTGGGCAAGATATATTTTCGGCGTTTGCCGTG +AAATGATCAAACGTGGTGTAGATGTGAAAGGTTTTGACACTGCTTTTGCGGGTGACGTAC +CTTTGGGTGCAGGTATGTCTTCATCTGCTGCATTGGAAAGTACGTATGCTTTCGCTATCA +ACGAACTTTTCGGTGACAACAAAATAGATAAATTTGAATTAGCCAAGGTAGGTCAGGCAA +CAGAACATAATTATTGTGGTGTAAATTGCGGTATTATGGATCAATTTGCATCTGTATTCG +GAAAAGAAGGTAGTTTGATCCGTTTGGACTGTCGTTCATTGGAATATCAATACTTCCCGT +TCAAACCGGAAGGTTACCGATTGGTATTGGTTGACTCGGTAGTCAAACACGAATTGGCTT +CTTCTGCTTATAATAAACGTCGCCAAAGCTGCGAAGCTGCTGTAGCCGCCATCCAGAAAA +AACATCCTCATGTAGAATTTTTACGTGATTGTACAATGGAAATGTTGCAGGAAGCAAAAG +CCGAGATCAGTGAAGAAGACTATATGCGTGCAGAATACGTCATTGAAGAAATCCAACGTG +TACTTGATGTTTGCGATGCTTTGGAAAGAGGTGATTACGAAACCGTAGGACAGAAGATGT +ATGAAACCCACTATGGCATGAGTAAATTGTATGAAGTAAGCTGTGAAGAACTTGACTTCT +TGAATGATGTTGCTTTCGACTGCGGTGTCACCGGTTCACGTGTCATGGGTGGAGGTTTCG +GTGGATGTACAATCAACCTAGTTAAAAATGAACTATACGAAACATTCATCACCACTGCCA +AAGAAAGATTCAAAGAGAAGTTCGGAAGAAGTCCCAAAGTTTACGATGTAGTCATCAGTG +ACGGTTCTCGCAAGCTGGTATAAAAAAGATCTCAAATTCGCTAACATATGAAAAAACCGC +TTTGGATTCAAAGCGGTTTTTTCTTCTTTATGCTATAAAAGTGTAGGAAATACACTTTTA +TGTTGCACAACATACTCTTTTCATATCCAATGTATTAGTAATAGTGTTACTTTTACACCC +AAAATCATATACTCTATTACTAATTAGTACTTTTATATCATGAAAAGCATGAAAAAAACC +TTTCTGGGAACCGGTATCGCACTGACCTTGTTAAGTGCTTGCGCTCCCAAACAATCTCAA +GAAACACTTACAAAGTCCGGATTAAACCCCACTAATTATGAAACAATAGTGGATGGCGTA +AAACCTGTTAAACTGTATACGCTAAAGAATGCAGCCGGAATGGAAGTATGCGTAACTAAT +TTTGGCGGACGTATTGTATCTATCATGGTCCCCGACAAAAATGGAAATCTAAAAGATGTA +GTGCTGGGTTTTGACAGCATTGCTGACTACCAAAATATACCAAGCGACTTCGGTGCTTCT +ATCGGACGCTATGCCAACCGCATTAATAAAGGAGTCATCGTAATGGATGGAGAAACCATT +CAATTACCCCAAAATAATTTTGGTCACTGCCTGCATGGAGGTCCTAAAGGATGGCAATAC +CAAGTTTATGAAGCAAACCAATTAAACGACAGTACTATGACACTGACTATGAAGTCACCG +GATGGAGATGCCAATTTTCCGGGAAATGTAACCGCTACTGTTACTTATGCACTGACAAGA +GACAATGCCATCGACATAAATTACGAAGCGACAACTGACAAAAAGACTGTTATCAATATG +ACCAACCACTCTTATTTCAACTTGAGTGGAAACCCTGCCAATCCGGCCACCGACCATATT +CTTTATGTAAATGCAGACAGCATCACTCCCGTTGACAGCACATTCATGACCACTGGAGAA +ATGATGGCAGTAACAGAAACTCCATTTGATTTCAATACTCCTAAAACCATTGCCCCCGAT +GTGACTAACTTTGAAAACGAACAAATAAAATTCGGTAACGGATTCGACCATAATTGGGTA +CTTAACACGAAAGGCGATATCAACCAGCTGGCTGCCAAACTGACCAGCCCTACTAGTGGT +ATCACTTTGGAAGTATATACTAATGAACCGGGAATACAAGTTTATACAGGCAACTTTTTG +GATGGAACCGTAAAAGGGAAAAAGGGAATTACTTATCCGCAACGCGCGTCTGTTTGTCTG +GAAACACAGCACTATCCCGACAGCCCCAACAAATCACAATGGCCTTCTGTAATTCTGGAA +CCGGGACAGACCTATCACAGCCAATGTATCTTTAAATTCGGTGTTGAAAAATAATTGTTA +AACTTTAAATATTTAATATCATGAATTGGAATTCACATGAATTCATCTGGCTGGACTGGA +CAATACTGGCAGTCGGCATTGTAGCTGTGATATGGGCGGTATGGCGCTCTGTACAAAAAG +ACAAACGCTCGCAACAAGGAGCAAGCAGTGAAGATTATCTATTTGGCAAAGGTGAGCCAT +GGTACATCATTGGTGCTGCTATCTTTGCAGCCAATATCGGTTCGGAACATCTGGTAGGTT +TGGCAGGAACCGGTGCCAAATCCGGAGTAGGTATGGCACACTGGGAAATGCAAGGTTGGA +TGATTCTTCTTCTAGGATGGCTTTTTGTTCCATTCTACCAACTATTAAACAATAAAATGG +GCAAAATCATTACCATGCCCGATTTCCTTAAATACCGTTATACCCCGCGTACCGGTTCAT +GGCTTTCTATCATCACACTGATAGCCTACATTCTTACTAAAGTGAGTGTAACCGCCTATA +CCGGAGGTATCTTTTTGGAGTTCTTACTTGGACTTCCTTTTTGGTATGGGGCAATCGGAC +TTATTGTCCTGACCGGCATCTTCACCGTACTGAGTGGAATGAAAGGAGTAATGACCCTCT +CAGCTATTCAAACCCCTATTTTAATCATCGGTTCTTTCTTGGTTCTTTTCCTGGGACTGT +CAGCCTTGGGAGACGGTAATATTGCTACAGGATGGACAGAAATGATGGATCATGCGCGTA +GCGCCATGAATGTAGGAGCAGACGGACACGCTTATGGTGCCAACCATATGTTTCACTGGA +CCGAAGCAGACCCGATGTATCAAGATTATCCTGGATTCTGGGTATTTATCGGAGCTTCCA +TCATCGGTTTCTGGTATTGGTGTACCGACCAGCATATCGTACAGCGTGTACTTGGACAGC +GTAAAGGCGAAGATAATGATGTGGTAATGAAGCGTGCCCGCAGAGGTACTATTGCAGCCG +GTTATTTTAAAATCCTGCCTGTCTTCATGTTTCTTATTCCGGGGATGGTAGCTGCCGCCT +TGGCTGCAAAAGGTGAATTTGATATGTCAAATACAGATGCGGCTTTTGCTGTAATGGTAA +AGGATGTTCTGCCTGCCGGTGTAAAAGGTATTGTAACTATCGGATTCATCTGCGCATTGG +TTGCTTCACTGGCTGCTTTCTTTAATTCATGCGCCACACTTTTTACAGAAGACTTTTACA +AGCCCATGTTTAAAAACAAAAGTGAAGCTACTTATGTTATGGTAGGACGTATCGCCACCG +TAGTTGTAGTTATCCTCGGCATGGCATGGATTCCTGTAATGATGAGCCTCGGCAGCCTTT +ACGACTACCTGCAAGGCATACAGTCCCTCCTTGCACCTGCAATGGTAGCCGTATTTGCAC +TTGGTATATTCTCCAAGAAAATCACTCCGAAAGCCGGTGAAACAGCCATGATTGTAGGTT +TCCTAATTGGTATGCTCCGTTTGCTGACTAATATCCTCACCAATACCGGAAAAGATGTGA +TGACCGGCTGGTTTTGGGAAAATACAACTTGGTTTTGGCAGACAAACTGGCTTATTTTTG +AAATATGGCTACTTGTATTCCTCTTGTTGTTAATGGTTGTTGTATCATGCTTCACTCCGG +CTCCAACCGCCAAACAAGTGGAAGCCATCACCTTTACCGGCAGTTACAAGGAACTTATCA +GAAAGAGTTGGAACAAATGGGATGTTATTACCTCTTTAGGTGTAGTTCTGCTTTGTGCAC +TATTCTATGCTTATTTCTGGTAAACCATAAATTAAAGGCTGTATGCTTTGAATTATCTTG +AAGAATACAGCCTGTTAATTTTAAACCGACGCTAAACCATGACCACCTATTATAACATAA +ACCCTAAATTCTATGTATCTGTAGACTGTATCATTTTCGGTTTTGACGAAGGAGAATTAA +AACTGCTTCTACTAAAACGAAATTTTGAACCGGCTATGGGAAAATGGTCTTTGATGGGAG +GATTCGTACAAGAAGACGAAAGTGTAGACGCTGCCGCCAAACGGGTATTGGCAGAACTGA +CCGGATTGGAAAATGTATATATGGAACAAGTAGGCACATTTGGCGACTTAGAGCGCGATC +CGGGAGAACGAGTCATATCAGTAGCCTATTATGCTTTAGTCAATGTAAATGAATATGACA +GGGAACTAGTGCAGCAGCACAATGCCCATTGGACAAAAATAGACGAACTGCCACAGCTCA +TTTTCGACCATCCGATCATGATTTCCAAAGCACGTGAACTGATGAAGCACAAAGCATCAT +ACAATCCAATTGGTTTTAACCTGCTGCCCGAACTGTTTACCCTGACACAGTTGCAAAATC +TATATGAGGCAATCTATGGCGAACCGATGGACAAGCGGAATTTCCGCAAGCGGGTAGCCG +AAATGGATTTCATTGAAAAAACAGATTTGATAGACAAATCAGGTTCCCGACGAGGTGCTT +ATCTTTATAAATTTAATGATAAGGCCTACAGAAAAGATCCGAAATTTAAACTTTAAAATA +ATGTGCCGATGCAATAATTTGCCAATTGACAAATTAGCAGATTGCCACATTACCCAATTA +TTCATTATGTTAGAAAAATTAAAAGAAAAAGTATTCCGTGCCAATCTGGATTTGGTGAAA +CACGGACTAGTTATTTTTACATGGGGAAATGTTTCTGCCATCGATCGTGAAACAGGTCTG +GTAGTAATCAAACCCAGTGGCGTATCATACGATGATATGAAAGCTGAAGACATGGTTGTA +GTAGATCTGGACGGCAACGTGGTAGAAGGTTCTCTTCGTCCGTCTTCAGATACTCCCACC +CATGTAGTTCTATATAAAGCATTCCCCGAAATAGGTGGAGTGGTACATACCCACTCCACC +TATGCTACCGCCTGGGCACAAGCCGGAATGGATATCCCCAATATCGGAACGACTCATGCC +GATTATTTCCATGACGCCATTCCTTGTACCGCAGATATGACTGAAGAGGAAGTAAAAGGT +GCCTACGAACAGGAAACAGGCAATGTGATTGTGAAACGTTTCAAGAACCTGAACCCTGTA +CATACTCCGGGAGTACTAGTAAAAAATCATGGTCCTTTTGCATGGGGAAAAGATGCCAAT +GATGCTGTACATAATGCAGTAGTCATGGAACAGGTAGCCAAAATGGCAAGTATCGCTTTT +GCCGCCAACCCTCATTTAACGATGAATTCTTTATTAATAGAAAAGCATTTCAGCCGCAAA +CACGGCCCCAATGCCTATTATGGACAGAAATAAGAAACAAAGAAATTAATAACCCTTTTA +AAATATAACATTATGGAAAAAGCATTTGATCAGTATGAAGTATGGTTTGTAACAGGAGCA +CAGCTTCTGTACGGAGGTGACGCAGTTATAGCAGTAGACGCTCACTCCAATGAAATGGTA +AACGGACTGAACGAATCAGGGAAACTTCCTGTAAAAGTAGTATATAAAGGAACAGCCAAC +TCTTCTAAAGAAGTGGAAGCAGTGTTCAAAGCAGCCAACAACGATGAAAAATGTATCGGT +GTCATCACTTGGATGCACACTTTCTCCCCAGCTAAAATGTGGATTCACGGTCTGCAACAG +TTAAAGAAACCACTGTTACATCTACACACTCAATTCAATAAGGAAATTCCTTGGGATACC +ATGGATATGGACTTCATGAATCTGAATCAATCCGCCCATGGTGACCGCGAATTCGGACAT +ATCTGTACCCGTATGCGCATCCGCCGCAAAGTAGTGGTAGGTTACTGGAAAGACGAAGAC +ACCCAGCACAAGATTGCCGTTTGGATGCGTGTTTGTGCAGGTTGGGCAGACTCTCAGGAT +ATGCTGATCATCCGTTTCGGCGATCAGATGAATAATGTAGCTGTAACTGACGGTGATAAA +GTGGAAGCAGAACAACGTATGGGGTATCACGTAGATTACTGTCCGGCAAGCGAACTGATG +GAATACCATAAAAACATCAAGGATACAGATGTAGAGGCACTTGTAGCAACTTACTTCAAC +GAATACGACCACGATGCTTCATTAGAAGATAAATCAACCGAAGCTTATCAGAAAGTATGG +AACGCTGCAAAAGCCGAATTAGCTCTTCGTGCCATCCTTAAAGCCAAAGGCGCCAAAGGA +TTCACTACTAATTTTGATGATTTGGGCCAAACAGACGGCAGCTACTTCGATCAGATTCCG +GGACTGGCTTCCCAGCGTCTGATGGCTGAAGGCTACGGATTCGGTGCGGAAGGTGACTGG +AAATCAGCAGCTCTCTACCGTACCGTATGGGTGATGAACCAAGGACTTTCCAAAGGTTGT +TCTTTCCTGGAAGATTACACATTGAACTTCGATGGTGCAAACAGTGCTATCCTGCAATCA +CATATGCTGGAAGTTTGCCCTCTTATCGCCGCTTCCAAACCACGTCTGGAAGTACACTTC +CTAGGCATAGGTATTCGCAAGAGCCAGACAGCCCGTCTTGTATTCACTTCAAAAGTAGGC +TCAGGTTGTACCGCCACTGTAGTAGACTTGGGTAACCGTTTCCGTCTGATCGTGAACGAC +GTAGAATGCATCGAGCCGAAACCGTTGCCCAAATTACCGGTTGCTTCCGCCCTGTGGATT +CCGATGCCTAATTTCGAAGTAGGTGCAGGCGCATGGATCCTGGCAGGTGGAACACATCAT +TCTTGCTTCTCTTATGATCTGACAGCAGAATATTGGGAAGACTATGCTGAGATTGCAGGT +ATCGAAATGATCCGTATCGACAAAGATACTACCATCAGCAACTTCAAGAAAGAGCTTCGC +ATGAACGAAGTCTACTATATGCTGAACAAAGCACTTTGCTAATTCATCCATGAAGAGGGG +ATGTGTCAAGGCTAATAAGATTCTTGGCACATCCCACCTTATTTTATCATCATTATTGTT +GTACCTTAAATTATAGGTTTTAATATGAAATCAGATGCAAAATCAACCATCCAAGCAGGA +AAAGCTATTCTAGGCATAGAATTCGGATCAACACGAATCAAAGCTGTTTTGATTGACCAG +GAAAACAAGCCCATTGCCCAGGGAAGCCACAGCTGGGAAAACCAACTGGTAGACGGACTG +TGGACTTACAGTGTGGAAGCTATCTGGCATGGCCTGCAAGACTGCTATGCAGATCTCCGT +TCCAACGTAAAAAAGCTATATGACACAGAAATAGAAACCTTGGCGGCAATCGGTGTCAGT +GCCATGATGCATGGTTATATGGCATTCAATAAAGAAGAAGAAATCCTTGTACCTTTCCGT +ACATGGAGAAATACCAATACCGGTCCGGCAGCCGCCGCTTTATCTGAATTATTCGTATAT +AACATTCCTCTGAGATGGAGCATTTCTCATTTATACCAAGCTATTTTAGACAATGAAGAA +CACGTAAGTAACATTGACTATCTGACCACCCTTGCAGGTTTCATTCATTGGCAAATTACA +GGTCAGAAAGTTCTGGGCATAGGTGATGCATCAGGAATGCTTCCCATAGACCCGGCTACC +AAGAATTATTCTGCCGAAATGATAGCCAAGTTCGACAAGTTGGTAGCTCCTAAAGGATAC +CCTTGGAAACTGACAGATATCCTGCCCAAAGTCTTACCCGCCGGCGAGAATGCAGGGTTC +CTTACCCCGGAAGGTGCCAAGAGGCTGGACGTGTCGGGCCACTTGAAAGCAGGTGTACCT +GTCTGCCCTCCGGAAGGAGATGCCGGAACCGGCATGGTAGCAACCAACGCTGTCAAGCAA +CGCACCGGGAATGTATCAGCAGGCACTTCCTCATTTTCCATGATTGTATTGGAAAAAGAG +TTGTCCAAGCCATACGAAATGATTGACATGGTTACCACTCCCGACGGAAGCCTTGTAGCT +ATGGTACATTGCAACAACTGCACCTCCGACCTCAATGCCTGGATCAATCTGTTCAAAGAA +TACCAAGAACTGCTGGGTATACCCGTAGATATGAATGAATTATATGGAAAACTTTACAAT +CATGCGCTCGCAGGCGATGCAGATTGCGGTGGTCTTATTTCGTATAATTATATTTCAGGT +GAACCCGTGACAGGACTTGCCGACGGAAGGCCATTGTTTGTACGTTCTGCAAATGACAAA +TTCAACCTCGCCAACTTTATGCGTACCCATTTATACGCTTCAGTCGGAGTTCTTAAGATA +GGTAATGACATTCTTTTCAATGAAGAGAAAATCAAAGTAGACAGAATCACAGGACACGGC +GGATTATTCAAAACGAAAGGTGTAGGCCAGAGAATACTTGCAGCAGCTATAAACTCCCCT +ATTTCCGTAATGGAAACTGCCGGTGAAGGTGGCGCATGGGGAATCGCCCTGCTAGGTTCT +TACCTTGTCAACAATGAAAAGAACCAATCCCTGGCAGATTTTCTGGAAGACAAAGTATTC +GCCGGTGATGCCGGTATTGAAATATCGCCAACAGCCGAAGATGTAGCCGGATTCAACACA +TATATCGAAAACTATAAGGCAGGACTGCCTGTTGAAGAAGCAGCGACCCGATTTAAGAAG +TGAGTCAAACGGGCTTAAAAAAGAGCCTCTTTACATGTTGCGGTTTACACATAAAGAGTT +ATCTTTGCAGATATTAAATAAAATAGCAAAGATAAACCGACTCACTAATAGCATTCAAAT +TATAATATTATTTTAATATGAACGAAAAGAAACTTATGAACCGTGCAGCGGACAACATCC +GTATTCTGGCTGCTTCGATGGTAGAGAAAGCTAATTCAGGACACCCGGGTGGAGCTATGG +GTGGCGCTGATTTTGTAAACGTGCTTTTCTCTGAGTTTTTAGTTTACGATCCTGAAAATC +CGCGTTGGGAAGGGCGTGACCGTTTCTTCCTTGATCCAGGCCACATGTCACCGATGCTGT +ATTCCACTTTGGCACTGACCGGCAAGTTCACCATGGAAGAACTGGCACAGTTCCGCCAAT +GGGGCAGTCCGACTCCGGGACACCCTGAAGTGGACATCATGCGCGGTATTGAAAATACTT +CCGGCCCGTTAGGACAAGGACACACTTTTGCAGTAGGTGCTGCTATCGCTGCCAAATTTC +TGAAAGCCCGTCTGGGTCATGTCATGGACCAGACCATCTATGCCTATATATCCGATGGGG +GTATTCAAGAAGAGATTTCTCAGGGTGCAGGCCGTCTGGCAGGTCATTTGGGGCTGGACA +ACTTGATCATGTTTTATGATTCAAACGACATCCAGCTGTCTACCGCTACCGATGCTGTTA +CCAGCGAAGATGTTGCCAAGAAATACGAAGCATGGCACTGGAAAGTAATTACCATCGACG +GTAATGATCCCGATGCAATCCGCACGGCTCTGACAGAAGCGAAAGCCGTAACCGGCCAGC +CCACATTAATTATCGGTAAGACCATTATGGGTAAAGGCGCACGCAAAGCGGATGATTCCA +GTTATGAACGTAACTGTGCCACCCACGGTGCTCCTTTGGGAGGTGATGCTTACATCAATA +CAATCAAGAACTTAGGCGGCGATCCAACCAATCCTTTCCAAATCTTCCCTGAAGTAAAAG +AGTTGTATGCCAAACGTGCAGAGGAACTGAAAAAAATTGTTGCAGAAAAATATGCGGCTA +AAGCTGAATGGACTAAAGCTAACCCCGAACTGGCTGCTAAATTGGAACTATGGTTCTCTG +GCAAAGCTCCGAAAGTAAATTGGAATGTTATCGAACAAAAAGCCGGAGATGCTACACGCA +GCGCTTCTGCCAAAGTTCTCGGTGTACTGGCTACAGAAGTAGAAAACATGATCGTTTCTT +CTGCCGACCTGTCAAATTCAGACAAGACCGACGGTTTCTTGAAGAAAACACATGCATTCA +CAAAAGACGACTTCACCGGTGCATTTCTGCAGGCAGGTGTTTCCGAATTAACCATGGCTT +GCTGCTGCCTGGGTATGGCACTTCACGGAGGTGTGATTGCTGCATGCGCTACCTTCTTCG +TATTCTCAGACTACATGAAACCCGCCATACGTATGGCTGCCTTGATGGAACTTCCCGTCA +AATTCATCTGGACACACGACGCATTCCGTGTAGGTGAAGACGGTCCTACTCACGAACCGG +TAGAACAAGAAGCACAAATCCGCCTGATGGAAAAACTGAAAAACCACAAAGGACACAATT +CTATGTTGGTACTCCGTCCGGCAGATGCGGAAGAAACCACTGTGGCATGGAAATTGGCCA +TGGAAAATACCAGCACCCCCACCGCACTGATCTTCTCCCGCCAGAATATCGCGAATCTGC +CTGCAGGAAACGACTATTCACAGGCTGCCAAAGGCGCTTATATTGTTGCAGGCTCCGATG +AAAATCCGGATGTAATCCTAGTCGCTTCAGGTTCCGAAGTTTCCACTTTGGAGGCAGGTG +CAGAACTTCTCCGCAAGGATGGCATAAAAATACGTATCGTATCCGCTCCTTCTGAGGGAT +TGTTCCGCAGCCAAAGCAAGGAATACCAAAACAGTATCATTCCCACAGGGGCTAAAGTAT +TCGGTCTGACTGCCGGTCTTCCTGTAAACCTTGAAGGTTTGGTAGGTGCCAACGGTAAGG +TATTCGGTCTGGAATCATTCGGATTCTCCGCTCCTTACAAAGTATTGGATGAAAAGCTGG +GCTTTACCGCACAGAACGTGTATAACCAAGTAAAAGAAATGCTGGCATGAAAAAAGTAGG +ACTGGCATCCGATCATGCAGGATTTGAACTGAAAGAGTTTGTAAAGACATGGTTAACTGA +AAAAGGCTATCCATGCAAAGATTTCGGTACATACAGTACAGAAAGCTGTGACTATGCAGA +TTATGCCCATCCGCTTGCACTGGCTATAGAAGCCGGAGAATGCGGACCCGGAGTGGCAAT +TTGCGGAAGCGGTGAAGGCATCAGCATGACATTGAACAAACATCAGGGTATCCGTGCGGC +ACTATGCTGGATGCCCGAGATAGCCCATCTGTCACGTCAGCACAATGACGCCAATGTTCT +GGTAATGCCGGGACGATTCATTGACCATGAAATGGCCGAAAAAATATTGGATGAATTCTT +TAACAGCGGCTTCGAAGGCGGACGTCACCAAAAGCGCATTGAAAAGATACCTGTAAAGTA +AAATTCACGCAATAATAATAAAAGTAAAAAAAGAGAGGCATTATAGCTTCTCTTTTTTTT +TATTCCTATCTTTGCTTAGATTAAAATATAAAATAAAAAATATTACTATGAAGAAGATTC +TTTTTTTAATGCTCCTATGTCTGCCATTCATCGCTATGGCACAGACAGATCCCAAATATC +TGGCGGGAGCGATAACTATGGATGACGGTAAAGTTTCTTTCAAAACAGAGATACAGGCAC +CGTCTTTAACGAAAGACCAGCTATATGGCACCATGCTGAAATGGGCCACAGAACGTTTCA +AGCCCGAAGGCAAGTTCAATGCACGTGTTCTTTACACCAATGAAGATGAAGGAACCATTG +CAGCAGGTGGCGAAGAATATCTGGTATTCTCCTCCTCTGCATTGTCATTGGACAGAACCA +GAATTTATTATCAGTTGTTTATCACCTGCGAAAACGGAAAATGTGATATCGAAATGACCC +GTATCCGTTATTGGTATGATGAAGCTCGTGACGGAGGAGAAAAATACAGTGCAGAGGAAT +GGATTGTAGACGATATGGCTTTAAACAAATCGAAGACCAAGCTGGCTCCCATCTGCGGCA +AATTCAGAAGAGAGACTATTGACTTGAAAGACACACTGTTCAAATCCATCCAAGATACAT +TGGGCAACAAGGTCTTGAACAACTCACAAATTGCAGTTGCTCCTGCTCCGGGCGTGACAG +CTACTCCGATATCAAATGCAACAACAATCGTTACCGCCACTCCGGTAACTCCCCCTGCCC +AACCGGCTGTCATCGGTGGTTCTGAGGGTAATACCGAAATAAAAGCAGCCAATAATGCAA +CTCCCAGCAAAGAACAAAGCATAGATGACCAAATCAAGGCATCCTCACGTATGACAATTA +CTGCCGGAAACGATGAACAATTCGAAATCGGTAAAGAATGCTGGGGAGGTTTCGGCCAGT +TATTCGGCAAAGAAGTAGCATTCTGCGTAATCGACCAAGCCAAATCAATGGGCAATATGC +TGATGGATCAGAGCGATAATTATAAAATTTCGTTCTACAAGCAAGGTAATAGCGAGCCGT +GGTTGATAGTAAACTGCAAGAAACTGATGAAACAGACCGTTACCGGTGAGGAAGCAAAAA +AGATGAACCCCAGTAATGACGGTCAGAAGGCATATAATATGTATGTAGGTGAAGTAATAA +AATAGTCTTTTCTCCCATATATTTATACCGTGACAGTTTATTCTGATAATACATAAAACT +TTTTTTAGGGGGAGATAACCTGACAACTCCCCCTAAAAAACATACAAACGATCTACCATA +TACTTCCCCAAAAGCACCATGACAAATACCATTCAAACATGTCTCCCCTTTAAGAATAAA +TAACACCGTTCCTCTATCAACCGTTTCACTTGAAACCATTTCTATAATAATCAAACCATT +AGAAGAAATCATTCTTCCATTTTTTCCCTATCTTTCTCTCCTTTTTTTAAATGTTTTTAG +CAAGAAAGCCGTTTTTTCTGCTCCAATCTTCCCTTTCATAACAAATAAAGAGTATCTTTG +TAAACAACAATGAGACAATCATGCCTTATGACGGAACAATTACAGGACATAAAGACACTT +ATCGAACAGGGTGATACGGAAAGAGCCATTCATGCGCTAACTAATTTCATCCGAAACGAT +GCGCACGTCAACGATGAACCCTATTACTTACTGGGTAACGCCTACCGCAAAATGGGAGAC +TGGCAACAAGCTTTGAACAATTATCTGGAAGCCATCGAGCGTAATCCGGAAAGTCCGGCC +GTTTCCGCCCGTGACATGATTATGAATATTCTGAATTTCTATAACAAAGACATGTATAAT +CAATAATAACTAACCCATGGCAAAGATTAAAGGTGCTATTGTAGTAGACACAGAGCGTTG +CAAAGGTTGCAATCTATGTGTAGTGGCATGTCCGCTGCATGTAATCGCGCTCAATGCCAA +ACAAGTGAATAAAAAAGGATACACATTCGCCCACCAGGTATTAGAAGATACCTGCAATGG +TTGCGCATCATGTGCCACAGTCTGTCCGGACGGATGCATCACTGTGTATAAAGTAAAACA +ACAATAAATGTAACAAATATGGCAGAAGAAGTCGTTTTAATGAAGGGAAATGAAGCCATA +GCCCACGCCGCTATCCGTATCGGAGTAGACGGTTACTTTGGCTATCCCATCACTCCCCAA +TCGGAAATATTGGAAACGCTGGCCGAAGAGAAACCTTGGGAAACTACCGGTATGGTGGTG +TTGCAGGCAGAAAGTGAAGTCGCAGCAATCAACATGGTATATGGCGGTGCAGCGAGCGGT +AAAATGGTAATGACCTCATCCTCCAGTCCCGGCGTCAGTTTAAAGCAGGAAGGCATCTCG +TACATTGCCGGTGCCGAACTCCCCTGCCTGATTGTCAATGTCATGCGCGGAGGTCCCGGA +TTAGGAACCATCCAACCTAGTCAGGCCGACTATTTCCAAACAGTGAAAGGCGGCGGTCAC +GGAGACTATCGCCTCATAGCCCTGGCTCCTGCATCAGTACAGGAAATGGCAGATTTCGTA +GGTATAGCTTTTGATCTGGCTTTCAAATACCGCAATCCCGCCATTATTCTGGCAGACGGC +GTTATCGGACAGATGATGGAAAAAGTAGTATTGCCCGAACAGCGCACACGCCTGACAGAT +GAAGAAGTCATAGCCCGTTGTCCGTGGGCCACTACCGGCAGAACCCACCACCGTACTCCC +AACATCATCACCTCATTGGAACTCGATCCTGCCGAGATGGAAAAACGTAATATCCATCTT +CAGAAAAAATATGCAGAAATAGAAGAAAACGAAGTACGTTTCGAAGAACTTCACTGCGAA +GATGCCGAATATTTGATCGTAGCTTTCGGTTCTTGTGCCCGCATTGCCCAAAAAGCAATG +GAAATGGCTCGGGAAGAAGGCATCAAAGTTGGTTTGCTCCGTCCTATTACCCTATGGCCG +TTCCCGTCGAAAGCTATTGCAGCACGCGCCGCACAGGTAAAAGGTATCCTCACTGTAGAA +TTGAACGCCGGACAAATGGTAGAAGATGTCCGTCTGGCCGTAGAATGCAAAGTGCCTGTA +GAACACTTCGGACGTCTGGGTGGTATTGTTCCCGATCCGGATGAAGTAATCACTGCACTG +AAAGAGAAACTAATAAAATAAGACCGTATGAATATCGATCTGATCAGAAACATATTGAAC +ATACTTTTCATGGCACTGGCACTAGCGGCTGTCATCACTTACTTCATGGCAAGTGACTTC +AAAGTATTCATCTATGTCTGTGCAGCAGCTATTTTTCTCAAGCTGATGGAATTCTTTATG +CGGTTCATGTTATAAGCAATAGGAGGAGAAACTATGACAAAAGAAGAAATCATAAAACCC +GAAAACCTGGTTTATAAGAAACCGACACTGATGAATGACAATCCGATGCATTACTGCCCC +GGATGCAGCCACGGTGTGGTTCACAAGTTAATAGCCGAAGTTATTGAAGAAATGGGCATG +GAAGATAAGGCAATCGGTGTGTCACCGGTAGGATGTGCCGTTTTTGCATACAACTATCTA +GACATCGATTGGCAAGAAGCCGCCCACGGACGCGCACCAGCCGTAGCTACCGCCATCAAG +CGTTTATGGCCCGGACGTCTGGTATTTACCTACCAGGGAGATGGTGACCTGGCATGTATC +GGCACTGCCGAAACCATCCATGCTTTAAATCGTGGCGAAAACATTACTATCATCTTTATC +AACAACGCCATTTATGGAATGACCGGTGGGCAAATGGCTCCTACCACGCTGGTTGGCATG +AAGACAGCCACCTGCCCCTATGGTCGTGATGTAGCCATTCATGGCTATCCGCTGAAAATG +ACAGAAATTGCCGCCACTTTGGAAGGCACCGCCTACGTTACCCGCCAGGCCGTACATACG +GTTCCTGCCATCCGCAAAGCAAAAAAGGCTATCCGCAAGGCATTCGAAAACTCAATGAAT +GGCAAAGGATCCAATTTGGTGGAAATTGTATCGAC +>NODE_5_length_39999_cov_63.277_ID_9 +CGATCAATTGGTAACAGATATTCGTGAAATGAACCAGAAACCAGTTAAAAAGCAATTGCG +TTTAGGCTTAACTACTTTATTTGCTGTACAATTTATGAAAGAAATTTCACGATTTTTAAC +AACCCATCCACATGTGAATTTAATATTACAGCAAGATGGCTCGCCAAAATTACAAACGAT +GTTAGCAAATAAAGAAATTGATATGGGACTAATTTCTTTCCCAAATACCTTACCTGAAAT +TATTCATATTGAACCTTTAGAAACGACTACCAAAGGCTACCATGTTTATGTCGTAGTACC +AGAATCAAATCCTCTCTCCCAATATGAAAAATTAACTTTTAAAGATTTGAAAGATCAACG +ATTTTCATCGTTAAGTGATAATTTTATGATTGGTCGCCTACTACTAGATCGGACTCGGAG +CTTCGGTTATGAGCCGAATATCATTTTGCATAACGATGATTTACAAGTACTTCTTTATAG +TTTACAAAAAAATAATTCGATTTGTTTGCTGCCGATTGAATATTATGAAGTGGGAAAAAG +TCAGGGACTGAAATGGATTCCTTTAAAAGATAAGTTTGACTATTTCCCCATCGGCATTGC +TTTGCGCCGCGATTTTAGTATGACAGAAGATGTTAGAGATTTTATCCAAATCATTAAAGA +AAATTAATGGATAATCTAGCCTTGATAAGCAAAGACGACTTTACTATCATAAAGTTTCAA +CACTTTTTCAAATAAGAATTTTGAGAATAAACCCGCGACAATTGGAACAACAAACCAAGA +AAAGAATAAGGGTAAGATCCCTAAGCCCGCATCCAATGAAGCAAGTGGTCCCACGAGTCC +AACCAAGCCAAATCCAGCCGATTGTGGGGTTCCTGAAATGTTGAATAGAACCACAGGAAT +TGCTGAAATAGTCGCTGTAAATAAACAAGGAATTAAAATAATTGGGTATTTAAATAAATT +AGGCATCATCATTTTCATGCCGCCCAAAGAGACAGCCAACGTCACTCCCGATTGGTTGAC +ATTCCAAGAATTAATCACAAGAACAACAGTTGTGGCTGCAATTCCCATTGCAGCAGCACC +TGCAGACAAGCCATTCAACTGAATCGCCAAACCGATTGCCACAGTAGTGATTGGTGAAAT +AATCAGTGCAGCAAATGAACAAGCAATTAAAATACTCATGATAATTGGCTGGAAATCAGT +AAAATTGTTAATCACTTTCCCAATGGCGACCGTAATTTGTGTCACATATGGATAAATCAA +CATGCCAAACAAACCTGCACCTACTCCAACAACGATCGGTAAAGCAATGATTTCCACTGA +ACCGAATTTTTTATCGATGACTAATAAAAGTAAGACTGCAACAGAAGCGGTAATCATAAT +ATTAATGATGTCACCAGTGCCTGCACCAATAAAGCCTTTCACTTCTGAATTAAATTTAAT +CACGCCAGAGCCAGCAAATGCTGCACCGCCAGCAATCATCATTTTCTGTGGCGTTAAGCC +AAATTGAAAAGCAATCAGTCCACCAATAATTAAAGGGGTGGCTAGTTGGAAAATTTGCGC +CGCATGAATAATCATTTCGATAATCTTGTATTCTGCAAAATATTTTAAAATGGCTCCTAA +TACAGCATTGGGAATTAAGGCGATAATGGTGCCAGAAGCTGTTCCAGCTAACACTTTGTT +AAAAAATATTCTTGGGGTTAATTGAGCATCCAGTTTATCAGTGGGCATTTTCAACACTCC +TATTGATTATTTTTGTTTTTTTAGAACAAGTTTGAGAATGATTGAACTTTCTATTAGATA +AAGAGCGGCCTCAAAGAAGAGCGCTTATTTCTTTGAGGCGCTTTTTATTTAACTAAAGCA +AGGAACGATTTTTAATATACATCCGTATCCGCATTATATTTCGCTAAATTATCTTTAACA +TCTCGCAAGAACTTACCTGCTTGTTGTCCATCTAAAATTCGATGATCAATAGAAAGACAT +AAATTAACCATATCTGCCACTTTAAAGCCTCCATCTGCGGTTGGCACAAGGCGTTTATTA +ATTGACTCTACTTGTAATATCGCAGCTTGTGGATGGTTAATAATTCCCATCGATTGGACG +GAACCTAGTGTCCCAGTATTATTAAGAGTAAAGGTCCCTCCTTGCATTTCCTTGCTTGCT +AGCGTTCCTTGTCGGACTTCTTGTGCCAAGCGATTGATTTCTTTAGCTAGCCCTGCAATC +GAATAGTTATCCGCTTGTTGAATCACTGGCACGTAAAGATGCTCGTCTGTCGTTACAGCT +ATGGATAAATTAACATCTTTATGATAAATAATTGAGCCATCATCCCAAGATGTATTGATT +TTTGGATTTTTCTTTAACGCTTGAATGACTGCTTTGGCAAAAAATGGGAAGAAGCTTAGA +GACAGGCCTTCTTGTTGTTTAAATTCATCTTTTAATGAATTTCTAAGTTGGACTAAGTTG +GTCACATCCGCTTCCACCATTAACCAAGCGTGAGGGATTTCGTTGACACTTTGGACCATT +TTTTTAGCAATCGCTTTACGTACAGGATCTGCAGAGACAATTTTATCTGGGCTTGTTTCA +GTTGGACTAGCTGTTGCGGCGCTTTCATTTTGCGAAGCAACTGGCTCCTCTGAAATTGAA +GGGCTAGTTCCTGGACTCACTGTTTTCTCAGGTGTTCTTCCCTGTGTTGGTGTAAAGTTT +GTAACATCTTTTCTAGTAATTCGACCATCACGACCTGTCCCTGTAACTTGTGTTAAATCA +ATCTTTTTTTCTTGGGCAATTTTAAGGACTGCTGGGGAATAACGACCATTATTTTTTTGA +TGGGACGTCGCAGTGCTTGTTGTTGCAACCGTTTCATGTTCTTGCGCTTGTTCCGCACTT +GCTTCTTTCACTGGTGCTAAAGTAGCTACTTCTGTTTTTTCGGTTGTTTCTTCTGTCTCT +AACGTCATCACCGCTGTTCCGATGGGCACATCTGTATCTAGAGAAATCAGAAATTCTTTT +ACTACACCATCAAAATCTGATGGGACTTCCGTGGTGACTTTATCAGAAACGACTTCCATT +AAGGGATCATAGCGTTTCACTGAATCTCCTGGTTTAACTAACCATTGGACAATGGCTGCC +TCTGTGACACTTTCGCCTAGATGAGGCATTTTGATTTCTTTTGTGGCCATTTATTTTTTC +CTCCTTGCTTCAATTAAAATTCTGCTAATTCTTTCATTGCTGCTAATACTTGTTCTTCAT +TGATTAAAAATTCTCTTTCTAAAGGTAATGCATAAGGCATGCTTGGACAATCAGGTCCTG +CAAGTCGTTGAATCGGTGCATCTAAATCAAAGAGAGCATCTTCTGAAATCATTGCCGCAA +TTTCACTCATCACGCTGCCTTCTTTATTGTCTTCTGTTACGAGTAAGACCTTCCCTGTTT +TCTTAGCTGCAGCTACTAATGTTTCGCGATCTAAAGGATATAATGAGCGAACATCGACGA +TTTCGGCATCAATTCCTTCAGCGACTAGTTTTTCCGCCGCAGCTAAAGCTAATTGCAAGG +TCATGCCGTAACTAATCACGGTTAAATCACTACCCGTTCTAACGACATTGGCTTTGTCAA +TTGGTACGATATAATCATCTGCAGGCACTTCATCTTTTAGTAAACGATATAAACGCTTAT +GTTCGTAAAAAATGACTGGATCATCGGAACGAATCGCTGCTTTAATCATCCCTTTTGCAT +CATAAGGATTAGAGGGGGTCACAACTCTTAATCCTGGTTGTCCACAAAAAACTTTTTCTG +TAGACTGAGAATGATACAGCCCACCACGAACACCGCCGCCATAAGGGGTCCGATAAACGA +TCGGCGCAGTCCAATCGCCTTTTGTTCTGTAGCGCATTGTCCGAGCCTCTGATAACAATT +GATTGGTTGCAGGCAAAATGTAATCCGCAAATTGAAATTCACCAATTGCGCGATAGCCCA +TTAAACCTAAGCCGACAGCTAAACCACCAATTAAACCTTCTGTTAACGGTGTATTAAAAC +AACGCTCGTCACCGTATTTAGCAGCCAAGCCCTTTGTTACACCGAACACGCCGCCTTTGT +CGCCGCCGACATCTTCTCCAAAAATAACTACTTTTTCATCACGAGCCATTTCTTCAGAAA +TTCCTAAGTTAATTGCTTCTAAATAAGTCATCTCAGCCATTATATTTCTTCTCCTCTTCT +CTTATTTTGCATACACTTCTTCTAAAATTGATGTCGGTACAGGATCTGGCATTGCTTCTG +CTTCATCCGTTGCTTGATTGATTTCTGCACGAATTTCTTCATCAATTTTGGCAATGTCTT +CGTCTGTTAAATAGCCCTCTTCTAATAATTGTTTTTCAAAAAGCTTCACTGCATCGTTCT +TTTTCATTTCTTCAATTTCTTCTTTTGAACGATAAACAGATTGATCATCGTCAGCGGAAT +GAGAAGTCAAGCGCGAAACCATTAATTCAATCAATTTTGGTCCTTTTTTCCCGCGAGCCG +CTTTTACTGCTTCTTTAAATGCTAGATAGACTTCAGTAAAATCACTACCATCAACGGTTA +CACCTTCAAAGCCATAAGCTTTCGCGCGATCGGCCATTCGTTTATTGGCATACTGTTCTT +CAATTGGGACAGAAATCGCATATTCATTATTTTCAACAACAAAAATGACTGGTAATTTTT +TTACGCCTGCAAAGTTCATAGCTTCTTGGACTTCTCCTTGATTGGCAGAGCCTTCCCCAG +TGGTGGTCAATGCAACAAAATCAGCTTTTTGAAGTTGCGCTGCATAACCAACACCTGTTG +CTAATGGCATTTGTGTACTTACTGTTGAAGAGAAGGAAACAATATTATGCTCTTTTGAAC +CATAATGATTCGGCATTTGACGACCATGGGAAGAAGGATCCGCTTCTTTTCCAAAAGAAC +CCATTAAAATATCTTTGGAGGTCATGCCCCAAACCAAGCACGCGGTCATATCACGATAAT +ACGGTAAAAAATAATCTTTTTGAGGATCAAAAGCCATCGCCATCGCTACTTGTGCAACTT +CTGCCCCTTGACCAGAAATATTGAAAGAGGTCTTACCAATCCTTGTTAATTGCCACAACC +GTTCGTCTAAACGACGTCCTCGTAGTACTTGACGATATGCCTGAATCAATTCTTCTTTTG +ATAAACCCGATTTTTTAAGCGTTTTCATTTTCATCAACCTTTCTCTATTTGTGAATCGCT +AAGCCATAAGTATCTAAGGCTGCTTCTTGCAATACTTCTGTCATTGTTGGATGCGCATGG +ATTGCTTCACCAATTTCAATGGGCGCGGCATCTAAATACATGGCAGTACTTGCTTCGGCA +ATTAAATCCGTGACATGTGGCCCAATCATAGAAACCCCTAATAAATCATCGGTCTTCTTG +TCACGAATTACTTCTATAAAGCCATCTGTTTCTCCATAAACAAGTGATTTACCATTACCA +TTAAAATTAAAGGTGCCAATCACTACTTCTTTATCTGCTGGTAAAGTTTCTCTAGTATAG +CCGACACTTGCTATTTCAGGATTTGTATAAACACCTCGAGGCACGTTTGTATAATTTAAA +GGTTCAACTGTCTCGCCTAAAAGATGCTGAACAGCTAACTCTCCTTCTTTCATAGCCACA +TGTGCTAGTTGGAGTGTATCGATACAATCACCGATCGCATAAATATGTCCTTCTGTGGTT +TGATAAAATTCATTTACTTCAATGCCTTTGTCAGTATATTTCACTGAGGTATTCTGTAAC +CCTAACTTATTGATATTGGGTTGACGCCCAATTGCTACCATGACTTTGTCAACAGCGAGG +CTTTCTTGTCCTGCAACTTCTATTTGAACCTTCTGCCCGGTAACTTTCGCTTCTTGGACT +TTGCTACCTAATAAAATGTTAATTCCTCGTTGCTCTAAACGTTTCTTTAATTCTTTAGAA +ATTGTTGCACTCTCATTTATAAGTAACCGATCCAAAAATTCAATGATAGTTACGTTGACA +CCTAAACTATTTAATAAAGAAGCCCACTCGACACCAATAACGCCACCACCAATAATTGCA +ATTGATTCTGGTAGCTCTTCTAATTCAAGCATCCCATCAGAAGATAAGATAAATTCTTCA +TCTAGTGGCAAATTAGGTAACGTTTTAGGACTAGAACCTGTCGCAATAATGACATTTTTA +GGAACGATAATTTCTTCTTCCCGTGTTGGATCATTAAACGTAACTGCCACTGCGCCAGAG +ACTGGTGAAAAAATGGAGGGACCTAAAATCGCACCTTCGCCTGCTAACATTTTGATTTTA +TTTTTTTTGCATAAGCCTTCAACACCTTTGTGTAACTGCTCAATAATTCCTTCTTTCCGT +TGTTGTATTTTAGAAAAATCAATGGACGCTGCCTCTGTTTCAATCCCAAAAGAAGCCGCT +TGTTTTAAGGTATCAAAAACCTCTGCGCTTCTTAATAAAGCTTTTGTAGGGATACAGCCT +TTGTGCAAACAAGTGCCACCTAATTTGTATTTTTCAACAATTGTGACGTTTAGCCCTTTT +TGTGCGGCTCGAATTGCTGCTACATAGCCTCCTGTTCCGCCACCTAAAATCAGTAAATCT +GTTTGTTCCGCCATTTTGATCACTCCTGCTCAATTTTTGCTTCACTATAATTTAAAGCTT +GTTCTTCCCCTGTTAATACTCGATTGACGCCTTCATATAAAGCAGCCATCTCCATTTCAC +CTGGGTAAACCTTAATTGGTGCAATCCAAGTGACTTTTTGACTAATTTCTTGAACAACGG +TTTGCGAATAGGCTGCGCCCCCTGTTAAAATAATTGCATCAATTGTGCCTTCAAGAACCA +CGGCCATCTCGCCAATACTTTTTGCGATTTGGTAACACATTCCTTTTAAATAGTAATTTG +CTGTTTGATCACCTGCAGCTATTTGTGCTTGGATATGCCGTAAATCTGTTTCACCTAGGT +ATGATTTAAGGCCGCTATTACCCGCAATTAGCTTTTTCACTTGGCTAATCGTTAACTCCT +GTTCAAGTATCCATTGCGCAAATTCAACGAGCGGTAAGGCACCACTGCGTTCTGGCGTAT +AAGGACCTTCACCATCAAGCCCATTTACCACATCCACCATGCGGCCTTTTTGATGCGCCC +CCAAGCTAATGCCCCCACCTAAATGAACAACGATAAAATTGCTTTGCTCATATGTTTTCC +CTAAATCCTCAGCGATTTTTCGAGCGACCGCTTTTTGATTTAAGGCGTGACCAACGCTAC +GCCGGTGAATGCCTTTTAATCCGGAAATCCTGGCTAATGGTTGTAGCTCATCTACAACAA +CAGGATCGACAATAAAGGCTGGCACGTGATATTTTTCTGCAAACTCATTAGCTAAAATCG +CCCCTAAATTTGAAGCATGGGTATTAAAGCGTTCTGTCCGCAAGTCTTCTAGCATTTGTT +GATCGACAAGATACGTACCGCCAGGAATAGGCTTAAGCAAACCACCACGCCCTACGACAG +CGGCTAATTGTGTGATGTTATGTGTTTCTAAAAATTCCGCGATCATTTGTTTTCGAAATG +GTGTTTGACTGACAACATTTTCAAAAGGTGCCAATTCTTGGACGCTATGTCTAAGCGTTT +CTTCCGCCAAGCAATCATGATTAGCAAAAAGCGCTAACTTCGTGGAGGTCGATCCAGGAT +TAATAACCAATACTGTTTCCATTGTTTTCTGCTCCTTTTTTTAAACTTGTCTCATGGCAA +ATCTTAATGAATGAAATTTGCTTTCTGTCGAATCACTTCTAGATGTTAAAACGACTGGGA +CTTTTGTGCCAACAATCGTTCCTCCCACCTTTGCGTGCCCAAACAAGGTTAATGATTTGT +ATAAACAATTTCCAACATCAATAGTTGGGACCACTAAAATATCTGCATCCCCCATGATAG +GACCGCTGTAACGTTTATGCGCAACCGCTTCTTCAGAGGTCGCTAAATCAAGCGAAAGTG +GCCCAAAAACCGTAGCCTCTTGTTGATCATTAAAATGTGCCGTGACTTCTTTTGCTAAAA +CAGACGAAGGCATTTTAGGATTGAAATTTTCCGCTGCGCTTAACAAAGCAATTTTCGGAT +GATGCAGTCCCAATTTTTGGGCGACTTCTTTAGCATTTTCAACAATTTCAATGAGGGTCG +CTTGAGTGGGGGCGATATTCATCGCACAATCGGTTAACAAGAAGGTTTTTCCCGCAGGCA +GCTCCACCATTGCTACATGGGAAAGAATCGGTTTATTTTTTAATTGATGCTCACTTTTCA +ACATTTCTTTTAGTAATGTGTGAGTCTGAATAATTCCTTTCAATAAAATTTGTGCTTGAC +CGGTTGCAACTAGACTGACAGCTTCCTGTGCTACCGCGGCCTCATCTGAGCAATGAACAT +ATTTCCAGAGATTTTCAGTATCAAGATTTTCATTTGTATCAAATACAATAAACTGCAACG +GTTGCTCGGCTTCTTTTAGCGCTTTTTTAACTAGCTGTAAAATCTCTGGTTGTGAACCTC +CTGCAATTGAAACAGTGATCATTTTCTCACCTCATTTTTCTTCTTTACACTTTAAATCTA +CCTCAGTATGTAAGCGCATACAAATTGTTAATTTTTATGTCCTTATTAATATTTTTTATA +GAGAAGGAAAAGCTTAGTAAATCAAGCTTTTCTTTCTCTAAACAATTGTTATCTAATTTA +CTTTTCATGAAAAAACTTCTTTTCTAAGAATGAAAGGAAAACTTATGCTTCGGCACAGCT +GTTTATTTATGTCAGATTCTAAAGCATGTTATACTTTAGATACTTATTTTTAGGAGGAAT +TAAATGGCACTACTACTATTCTTTTTGTTTATCGCCCTACTAGGATTTGGTATATTAAAA +ATTAATAACCGCAGTATCCTCGGCGGTATCACTCTGGCTTCCGGCACCTTATTGTCATTA +GTCACCTTACTATTTATCGGACTAGACAAAATTTATTTACATTTTAAGAATGGCGACCTA +ATTACCTTGGCCATTGCTTATCTATTAATTCCCGCTGTGTTTATCGGCATTTGCCTTTAC +TTTATTTTTAACTCACGTACGATGCAAACGAAAGAAGGCAAAAGTGTCACGGCTAAATTG +TCGGCGGGCTTAGGGTTGAACTTATTAATTGTTTTACCAGCCTTTCTATATTTATTATCA +GTTGGCACAGCGCAAATACCTTATGTGCTTTTTCTGTTCTTACTTTTTCTATTGTTAATG +GATCTTTTGCTGACCTTTCTATTTGCTGCTTACGTCTTGTATTCATGGATGTACCAAATG +ATTCCTTTAAAAAAAGCGGTTGATTATATCATTGTTTTAGGTTCGGGAATTCGTAGCGAG +GAAGTACCTCCACTTTTGAAGAGTCGGTTAGATAAAGGGATTGAATATTATGAAAAAAAT +CCCACCGCTAAATTTGTAGTCAGCGGTGGTCAAGGTCCTGACGAACCTGTGGCCGAAGCT +TTCGCAATGAAAAAATATTTGCTTTCACAAAATATTCCTGCAGAAGCAATTTTGATGGAG +GACCAATCCACAACCACTTATGAAAATATGTTGTTTTCTAAAGCAATAATTCAGGCAGAT +TGGCAAAAGATGCCGTCTGATTCTAAACAACCCTCTGTAATTTTTTCAACAAATAACTAT +CATGTCTTACGAGGAGCTATGTATGCCCATCGTGTCGGCTTAAAAGCTGAAGGTGTCGGG +GCGCCAACTGCCTTATATTTTTTACCAACTGCTCTAATCCGAGAATACACTGCCTTACTG +GTTCATGATAAGCGAATTGTGCTTTTTGTTTTTCTACTTGTCACTCTTCTTTTAGGAATC +AGTATCTTACCCATCTAAAAAAACAGCGACTCACATTTTTGTGAGTCGCTGTTTTTATTC +TTTTAATATAAAAAGGTTTAATGAACGTAATTGGCTATCAATGCAATCAGCCATAAATGA +AGCGGATAGAATCCGTAAAATAAATATTTGAAAAATGGTGCTTTGCTCCCTCTCTCACCA +TTGTAAAGCGATATAAATGGTAACACAGTGATAAACATAAAATCAGAATTATAAGCTAAC +ATTTCAATTGTCGTTGGCCAATCTCCCAACCATTGAAAACTTGTAACTAAGAAAAAGAGC +GCTAATGCACCATATAAACAGTTCCGTAAGACAAGTCTCTTTCTGGCAAGATACGTAATC +AACATAAAAGGTAACATCACAACGCCACCTTCAGCAAACATTGCTCCTATTCCTAAAATG +GCTAATATACTAATCAATAAAACAATTTTTAAACTGATTTTTGGCATCTCTAGCATCGCT +TTTGTCACAATTAACATACTGACACCAAGGGCTAAGGTGAAGAAAATATTGTTGTGAACA +GCGACAGCTGGATTATTCACTAAATGATTTAGAAGCGTATTTCCAACAAACATGATTGCT +GCCCAAATATACAAACGACCATTGTAACGATAAATATTCCGTGTATAGTTGAACCCTTCT +ACAGCCATGTAACCAAAAAACACACCGACACAACGAGTAATTACATGAAAAATCAGTGCC +CATTCTGGCGGAACAAAATAACTAATATGGTCAAGAACCATTAACCCCATCATCAATAAT +TTTAAGCGATTTGCGTTCATGAATCGTCTCCTTTATTTTTTCTACAGAAACTATCATATC +TTTTTATATAACAGAAAACAACGAACATTCTTGTTAGATTCAACCATTCTTTTTGTCACT +TTTGTTAAACTTGGTACCGCAAAGATAAATCTTCAAATAAATAATGCTGGTATTTAAATT +GTTCAAAATGAATTTTTCGAATTTCTTGCTTAATGGCTATTAAGCATTCATCGACCGTCA +TCAATGATTCTTCTTTCAGAAGCGCCTCTTCTATTAATAATTTGCATTGGCAGTCAATCA +ATAAAAGTTGACTATAGAATGTGTAAAATGTTTCTTTATTTAATTTTTCAATCAAATTTT +TAGCTTGATTCTGCAGCTTTTGATAACATTTGGTCAATTTTTGATAATCCATTAAATCAA +CTAACTGACCTTGTTTTAACATGTTACTAAAGTCTAAATCCCAAAGCATTTCTTTAGTCG +GCTGTTGCAATAAATAATACATAGAATGACTACTAATAATTTCAGTGGTCGGTTCTTCAA +TTGTTTCATAAAAAGTCACAAAATATTCTCGCTCAATCACAGTAATAATTTCTTCTTCTG +AAAGCTCACAATCTTCTAAATTTTGATAGAATATCAATAATTTAATCCGCTCATCTATTT +TATTCAGCGAAGCGACTAATTCAGGAAAAAGAACCGACTGACAATTCTCTACTAATTCTC +TTGCCTTATTTTGTTCATTTTTATAAAAGCTGTATAACTTTTTCCGCTGTTTTAACGAGT +TTAATAAGTTATCGGGTATATTCCAGTAATATTGTTTCTTCATAGCATTCCCACTCTTCT +TTTTTATGTACAATTTTACAAAATTGGTCTTAGCCAATTTCGTCTCTAGTTATTCTATCA +AAAAAGTGACGCTTTTTTACTGACAATAATTACTCGTTTTACGCCCAAAAAGTGAAAATC +TAGCTGTTAAGTAATTATTGGGGCTTGTTTCCCGCGGGCCGAAAAAACTTTTTCCTTTTA +AAAGTCCAGAAAAAGACCTACACTAGAGATAGACAAACGTTGGAGGGATTTAAATGGATT +TACACTTAACGAATAAATTAGCATTAATTACTGGTTCAACCAAAGGAATTGGCAAAGCAA +TTGCGATTGAAATGGCTCGCGAAGGGACCGATGTCATTATCAATGGGCGTAATGAAGCCG +AAGTAATCAAAGTTGTTGAAGAAATACAAACAATGTTTCCAGACACTCATCCTCAAGCAG +GAACTGCCGATATTTCCATTGAAAGTCAACGAACTACTTTGCTTGAAAAATTCCCTAAAG +TCGACATTTTAGTGAACAATATGGGGATTTTTGAACCAATGGAATACTGGGACATCGATG +ACGCCACTTGGGAAAAATTTTTTACTGTGAACGTGTTGTCAGGCAATGCATTAGCAAAAG +CTTATCTACCTAAAATGCTTGCACAAGATTTTGGTCGCATTATTTTCATCGCTAGCGAAG +AAGCGGTGATGCCTTCTGGCGAAATGCCCCAATATAGCATGACAAAAACGATGAATCTTT +CCTTAGCTAAAAGTTTATCCAACTTAACTGTCGGCACACATGTCACCGTTAACACGGTTA +TGCCTGGCTCAACCCTTACCGAAGGTGTAGAAAAAATGTTGGAAGATATGTACGCTGATT +CAGACATTCCCAAAGAGGATTGGGAAAAAGATTTCATGAAAAATCATCGTTCTCGTTCAC +AAATCCAACGGCTCATTCGTCCAGAAGAAATTGGTCGTTTTGTTACCTTTGTGGCCAGCC +CGGATTCTTCTTCCTTCTCAGGCGAAGCCTTAAGAATCGATGGCGGCTTAGTTCCAACGA +TCTTCTAAATTAAACAAAAAAGACAGCCAGCAAAAATTCACATTTTTTTGCTGGCTGTCT +TTTTAAGCATATTAATTGATTACTTACCAAAAGTAAGTTATATTTATCTCGAATTCGAAA +TAAAAAGAGGTGAAACAATGAACCAACAGCAAGAAGCTTTAAAAGCCTATATCGGTTTAT +TAAGAACCAGCCATCGACTAGAGCAACTTGCCAAGCAAGATGTTACTTGTTATGACTTAA +ACATTACAGAATTTTCAGTGTTAGAGCTGTTACTCCATAAAGGTCCTCAGACCATCCAAA +AAATCAAGGAGAAAATTTTAATCGCTAGCAGTAGCACCACTTATGTTATTGACCAATTAC +ATAAAAAAGGCTATGTAACGCGCACTCCCAGTGAAAAAGACCGACGCATTACTTACGTCG +AATTAACAGAAGCTGGAAAAACATTAATTAAAGAAATTTTCCCGACGCATGCAAAGCGAA +TTGCAGAAGCATTTGAACAACTCTCTTCCGAAGAATTAACACTTCTTCAAAAAACTTTAC +GAAAAATAACAAATGAAACGAAATGAGGAAATAATGATGAAAAAAGAAGATCAATTATTA +GGAATCCACCACGTTACAGCTATGACAAGTGATGCAGAAAAAAACTATCACTTCTTTACA +GATGTTTTAGGGATGCGTTTAGTCAAAAAAACAGTGAATCAAGATGATATCTATACTTAC +CATACCTATTTTGCTGATGATTTGGGTACACCAGGTACAACCATGACCTTTTTCGATTTT +CCCAATAACCCTAAAGGATTAAAAGGAACCAATACAATTTCAAGAACAGGGTTCCGGGTT +CCTTCAGATGCAGCTTTGACTTATTATGAAAATCGCTTCAATGAATTTGCTGTCAAACAC +ACAGGTATTTCTGAAGAATTCGGGAAAAAAGTCCTTCGCTTTTGGGATTTTGATGATCAA +GCGTATCAATTAATCTCTGATGAATTAAATCAGGGCGTTGCAGCGGGCACCCCTTGGAAA +AAAGGACCTGTTCCAACAGAATTTGCGATTTATGGATTAGGACCTGTCGAAATAGCTATT +TCCTATTTTCATGAATTCAAAGAAGTCTTTGAAGAAATTCTAGGCTTTCACCTAGTGGCA +CAAGAAGGCAATCGCTATTTACTAGAAGTTGGCCAAGGTGGCAATGGTGCCCAAGTCGTT +TTGGTAGACGATGATACTAGCTCACAAGCGCAACAAGGATATGGTGAAGTACATCACGTT +GCATTCCGCCTAGCGGATCGTAAATCACTTGGGACTTGGCAAGCGCTCTTTGATCATTTA +GGCTTACAAAACTCTGGCTATGTCGATCGTTATTACTTTGAATCATTGTATGTTCGCATT +GGACATATTTTAGTCGAATTAGCCACCGATGAACCAGGGTTTATGGGGGATGAACCTTAC +GAAACATTAGGAGAAAAGTTATCTCTTGCGCCATTTTTAGAAAACCGTCGTGAGTATATT +GAGAGTGTTATCAAGCCTTTCAATACAAAACGAGCCTAAGGAGGAAAAAAAACATGCATT +CAATTTTAAAAAAAGGACATCCTGAAGCACCTGTCTTTGTGCTACTTCACGGTACAGGTG +GTGATGAAACATCTCTCCTACCAATTGCCCAAGAACTAAATAAACAAGCTACTGTGCTAA +GTATTCGTGGTGATGTTTCAGAAAATGGAATGAATCGTTATTTTAAGCGCCTAGCGGAAG +GTCATTATGACTTAGAAGATCTAGAAAAACGCGGCGAGGCGCTTCATAAGTTTATTCAAC +AAGCCGCTAACGAGCATCAATTTTCATTGGATAAAATTATTTTTATTGGCTATTCAAATG +GGGCCAATATCGTTATTCAATTATTGCTTACTCATCCCGATAGTTACCATCAAGCTGTCC +TCTATCATCCCATGTTTCCTGTTGAATTGACCAATCAACCAGACTTGACCGACACTTCTG +TTTTATTATCTCTAGGAGAGCATGACCCGATTGTTCCACTTCCTGAAAGTATGCGTGTGA +TTCAATTATTTCAGAATCATGGAGCAACCGTACAAGAGGTTTGGACACAAAGTCATCAAT +TAACTTATCAAGAAATTAAGGAAACACAAACTTGGTTGGCACATCTGTCCTCTTAACAAA +AAAGTGGGTCCTAAGTCGAAATGACTTAGGACCCACTTTTTCTATTTTAAGATAACTCAC +TGTCTGAGCTTCTTTTTATTCTGCCATTTGAACAACTATTTTACCAACTGCATGGTGCGT +TTCACTGAGTGCATGTGCATCATAAATCCCTTGTCTAGAAAAAGGGAAAACTTCACCAAT +AATTGATTTCACTTTTCCAGCTGCCATCAAATCAGCAATTTTTTGTAATTGTTCTCCATT +TGGTTGAAGCCAAATACTTTCAGCAGAAACATTTTTTTCGGCTGCCAATTGTTTATCTTC +AATGCCCACAATTGAAACAAGACGACCTGTGTTTGGTTTTAAGACGGCAAAACTATTTTT +TTGAACCTCACCACCCATTGTATCAAAGACCAAATCAACATCAGCTAATACCTCTGCAAA +ATTCGTTGTATGATAATCAATCACTTCATCTGCACCAATTTTTTTCAGTAAGGCATGATT +TTTGGCGCTAGCGGTCGTGATGACATGTGCGCCCGCTTCTTTTGCTAGTTGAATCGCATA +AGTACCGACCCCACCTGCACCAGCATGAATTAAAACAGTTTCTCCTTCTTTAAGGTGACC +ATGATCAAACAATGCTTGCCAAGCGGTTAAACCAGCCAACGGAACGGCAGCCGCTTCTTC +AAAGCTAATTGTTTCAGGGATTTTTGCTAATAAATGATCATCCACAATCGTTACTTCTGC +ATAGGTACCAAAACGAGTAGTTTCAGGACGAGCAAAAACTTTATCGCCAACTTGCCAATC +CGTTACTTGACTCCCCACCTCTGTAATCACACCAGCGACATCCCAACCAAGAATAATCGG +AAAGGACCAATCAAACATCTGTTTTAAATATCCTTCACGCAATTTCCAATCAATCGGATT +AATTGATGTCACGTATTCTTTTACCAACACTTGATGTTCCGATAATTCTGGCAGTGTTAC +TTCTGCTTCTTCAAGTACCTCTTTACTTCCGTACTGATTAATCACAACAGCTTTCATAAA +CCAACTTCCTCTCATTTGTTTCATTTTTCACACTTTTAGTATACGCCTATTGCCTTCTTT +TTATATAAGAATATGCTTTAGTTAAATTGAGAGAAAGTAGCTGTTTTTAGTCTATTATTC +ATAACGTAAAGATTCAATTGGATCTAATTTTGCTGCTCTTCGAGCTGGTAAAGTTCCTGC +TAAAAAAGCAATAAACATAATAACTAAGATAATCGTTAGTGAAGATGGCAATGAAAATTG +AATTAATTTAAATCCTGTTAAAGCCTTTAAAAAAGAATCCGTTGCCAAACGATTAACTAA +ATTTCCAACCCCAACAGCTCCTAAAATGCCTAATATTGAGCCAAAAAAGCCAATTAAAGC +CGCCTCAACACTAAAAATTGTAAAGACTTTCCCATTGCTAAGTCCCATGGCTTTCATTAA +ACCAATTTCTCGCGTTCTTTCTTGGACAGACATATACAGCGTGTTAATAATGCCAAAGCT +TGCAGCTAACAAGGCAATTGCACCAAACATGGTTAAGACACCAGTGATTGCATTGATTAT +ATTACGAATCATGCCAATTTCATCTTCGACAGTAGTCGCTAAATAGCCAGCTTTGTCTAA +ATCTTTTTTGATGTCCTTAATCTGCTCTGGCGTGCTGTCTTTTTTAACTTCAGCAATAAT +CATCGCGTATTGATTTTTTAAATGTTCTGGAAGATCTGCTTGATTGATTGAGACAACTTT +ATCAATCAATGCTTTATTCATCAGCGAGAGCCCATTTTGAATGACACTGGCATTTCTAAC +GCCAACAATTTTTGCTTCAATAACTTGCTCTTGTCCTTTCAAGGAACTTGAGATACCTAA +TTGAACTGTTTCCCCCACGGCAGCTTTACTAGATGTATAGCCGAGCGCTTTTACATATTC +TGGTGACAAATTAATTTCGAAGTCCTGGCTCGTTTGCGAAACTTTACGGCCAGCCGCTAA +ATCAATGGTCATCTCATCTAGCGCAGAAGTTGCTGAAAATACATATTTATGCTTATCTGC +TCCTTTTATATAATCGATAGCAACGGATTTCATTGGTTCCACTGATGTAACATCCGAGAT +TTTTTTAATCTTCTCAATATCTTTTTCAGCAAGCATACTTTGTTGTTGAATCGTACTGGT +TTTTTTCTCTGGATTATATTTACTTGGTTCTGTTCCGTTACCAACATTCATCTCCATTTT +TGGCTGAATAAATAACTGATTTGCACCACCGACACTGCCGACTTGTTTGTCAATATAATC +ATTTACACCAATATTGACTCCTGTAGTTAAAGCTATGGTAAATGCTCCAATAAAGATTGC +AATAATGGTCAATACGGTTCGACCTTTATTACGCATTAAATTGGTACTAGCTGATTTTAA +AATATCTCTAAATTTCATTTACTCATCTCCTCCTACAATCAAGCCGTCTCGAACATGAAC +TTGCCGATCACAACGTGCTGCTAAGTCTGGATCATGTGTCACAATAATTAAGGTAATTCC +TTTATTTTTATTTAAATCAAATAACAATTCTTCAATTTTCTTCCCAGTGGCAGAATCTAA +ATTCCCTGTAGGTTCATCTGCAAAAATAATTTGCGGATTATTAACTAAAGCACGCGCTAT +ACATACCCGTTGTTTTTGGCCACCAGATAAATTATTCGCTTTATTTTGAACTTTATCTTC +CAAACCGACTGCTTTTAACGCATCTAAAGCCATCTTTTTCCTCTTGCTACCGGAAATCCC +ACCAATTTTTAATGGTAAAAGAACATTATTTAAAACTGTATCCTTGGCATTCATAAAGAA +TTGTTGAAAAACAAAACCAAATTCTTCATTTCTCGTTTTATTCAGCACTTTCTTTCTAAT +ACTAGTAACATTTTTACCATTTAAATAAATATCTCCTGAAGTTGGTTGATCTAACAATGC +CAAAATGTGCATAAATGTTGATTTCCCAGAGCCACTTTTACCAATTATGGCCACAGATTC +ACCTTTTTCAACTTTTAAATCAACACCTTTTAACGCATCAAATTTTGTTTCGTTCTTTCC +ATAACTTTTTTTGATATTTTTTGCTTCAATTACCGCCATGTTACCCCTCCATTTTTCATT +CTTTGTCGTTTACTTATTTACAAAGTTTTTTACTCTTTTCATACTCCTTTCTTCTCTTTC +TATTTTGACGGAAGCCCTTTCCTTTGTCATGGTAATTCCGATGTATCTCTCAATTAAAAC +AATAAGAACATGTTCCAAATTCACCTCATTGAACATGTTCTTATTAATTAGTTATGCATT +TACGCGTTTAATTTCACGATGTTTATTCAATAAATAGGCCAAACCAAGATTAATGCCAAC +AAAGCAACTGATTGTCAGAAGAAGTGGTAGGACAGTAAAACCAGTGGATTGATAACCAAC +CATTGCTTGCAAGACATGGAGCATTCTCAGGCGCATTGGTGCCCCCCCTAATTGTGCAAT +CGTGCCCAGTAAAATAGGAATCATTAATAACAGCAATAAAACAATGAGTTTTTTTACGCC +ATCTATCCGATCATTAAAGACACCTGCTAACAAGCCAAGTGAGCCGCTAAGCATTAGTAA +GATAAAAAAGAGCAACCACGACATGAAAAAGTTTCCTTGCGCATAGACATCAATTAATTT +CATTGATAATTGAAAATGAGAAATGAAATTGCCGCTAAATACTTTAATCAGTACAAGTAC +AGCCAAGGATCCAACCAAAGATAAAATAGCATTACTTACAAAATTCACCAAAAAAATTGT +CCAGCGGGAAAGCCCGTTTTGAATAAACAGTTTAAAATCTGTGTTCATTCCCAAAAATGA +TAGTATCCCCATAAAAACCAGACAAGGAATTACTGCATCAGAGCTGACAGTATTCACGTC +GTTAGAAAAGAGCAACCCAATAAGAGGAAAAAGAATGCCGAACAAAGCATAAAAGCCAAA +ATAAATTGCTAAGGAACGGACTTGATAAATAACACGATACCGCAAAGCTGTTTTAAATTT +CATTTTACTTTTCCTTCTTTCTATTGGTTAATTGCACAAAATATGTTTGTAAATTTAGCG +GCGCAATTTGAACCTCTGCTTTTTCAGTCGGCAAGTCGCCGTACACGTAAGCCGTAACTG +CCCCACCTAAAGTGTCCATGCCTAAGATTTCCAATGACTGAGTATAGCGCTCTACTTGTT +CTTTCGGCCCAGAAACAATGCGACCGTTTTTCAAAATTGTTTCAATAGATTCTGCGCGAA +TCAATTTTCCTTGATCAATGATGATAATATCTTCTAATAAATTCGCAATTTCTTCAATCA +AATGGGTGGAAATCACAAACGTACGGGGACGTTCTTGGTATGTCTCAATTAAATAAGTAT +AAAATAATTCCCGATGATTGGCGTCTAAACCTAAGACTGGCTCATCTAAAAAAATATACT +CACAGGGGACGGATAGCGCCACTATTAGTTTAGCAATACTCCGATAACCAGTTGATAATT +TTTTGAATGTCTTTTTACCATCTAAACCAAAATCGCTTAACATTTGTTCGGCTAAAGACC +AGTCAAAGGATCCATAAAAACCTTCAGTCGTTTTAAAAATATCTTTAATTTTTAACTGAG +GAGGAAATAAATTATCTTCACTCATTAAGTAAATATGATTCAAAGCAGTCTCATTATCGG +TCACTGTTTCTCCCGCCAATTGAACGGAACCTGAAGTGGCAAAACTTCGATTATTAATGA +TATTTAATAACGTACTTTTACCAGCCCCGTTTCTGCCTAAGAGCCCATAAATCGTTTCTT +TTTCAAAAGTAATTGAAATATTATCTAGTGCTTTTTTTTGATGATATTTTTTACTTACTG +ATTCCACCCGCAAACTCATGCGTCATACCCTCTTTCAATTAGTTGTTTTAATTGTTCAGC +TGTAATTCCTAATTTTTTAGCTTCTGCGACAACTTCTAAGACTTCTTTATTTAAAAATTC +TTCTTTCCGTGCACTTCTTACTCTCTCTTGAGCGCCTGGTAAAACAAACATGCCAATCCC +CCGTTTCTTTTCAATCAACTGTCGTTCTACTAATAAATTCATTCCTTTTAAAACGGTGGC +TGGATTGATTTGATAACTTTTTGATATTTCTGTGGTTGAGGGAATTTGTTCGCCTTCTAA +GTAAGCGCCATTAAAAATTCCTTCGGCAATCTGATCGGCTACTTGCTGAAATAAAGGCTT +TTCCCCCGAAAAATTAAATTCCATTTTTACACCTCGCTTAAATAGTTAACTACTTATGTA +ACTAACTATATATCCTAGAAATTCTTTTGTCAATAAAAAAACCATTGTTTCCAAAGAAAT +TTTTCTTTGGAAACAATGGTGATACTTTTGAGTTTATTGAATGATTTCGTTCTTATAGGG +TTCCATTGTATAACAATTCATCGGTTGTTTCGTTGACAATTTTTAAGCGATTTTGTTTTT +TTATCAAAATACTTTTATAGGTCGTTCCTGTAGCCTTTTCTTCCATCACAAGAATGCGTT +TATACTCATTATCAGTGACTTTCTTAATCGTTGTTTGATTTAAGGACGGAATTTTTTCTT +TTAGCGTTTTTTCTTCCGCCAGACCTTTTAAGATTTCTGATTCTGTAGTGGAACTTGTGG +GTTTATTAGAATCGCTTGATACACCTTCGTTATTAGACGCACTTTCAGATGACTTGCGAC +TTGATTCAGTTACTTTAGGCTGTGATGATTCACTGGTATTGGTTGAATTCATTTGAGTTT +GGGTTTGATTCCCACATGCACCAATTAGCAACCCAACGCCTAAGATAGAACTTGCTAAAA +TTGTTTTTCTAATCATTTGCATGACCTCCTACATCTATCATTACTTTAACCTTAACATTT +TTCAGCTAAGAGAACAATTTTTCAGATTAGAAAAGACCACTAGTCGTTATTTAAATAAAC +GAACTAGTGGTCTTACTTATTTAGGAATGATCTTCATATGAGTGAGTGGGTAATAAACAA +ATTGTGCTTTCCCTAAGATTTGATCTGCATGTATTGCACCAAAAGAACGGCTGTCTTTGG +ACATACGGCGATTATCACCAAGCACAAAATAGCTATCTTTAGGTAATTTTTCTTGCATTA +ACAATTCTTTTGAATCAAAATTCGTAGTATAAGGCATCGTTTCATGATCTTTTTTTCTGT +TTTTAGTTAAATACGGTTCAGCGATTGGCTGATTATTGACATATAATTGATCGTTTTCGT +AACGCACAGCTTCTCCTGGTAAACCAATCACACGTTTAATCAGAATCGATCCTGTGTCTG +TTTTAAAGACCACCACATCAAACCGTTTAATGGCGGAGAATTTTTCCATCACAATCATAT +CTCCTTGATTCAAGGTTTTTTGCATAGAATGGCCATCCACCCGAACAGGAATCAGGAAAA +ATCCTCTTAAAATAAAAACGGCTACGATTGCTGGTACTAAAATTTTCAGAAAGTACATTA +AATACCCAACATAATCACGTTTCTTCTTCAACTTCGTGCCACCTTTTTTCTAGTTTTTTT +GAATTTCTTTGACTGCTGTATCGTAGGCAGCGTCATTTTCAAGAATTAATTTACCTAAAG +TGGCTTCAATTTTTGTGGCTGTTTCATTGTCGATTTCTCCAGTTACAGGCAAGCCATTTT +TCTGTTGAAGATCGCTAACCGCTGCTTTTGTTTCTGCGGTGTAGTTTGCATTGTTTTCAT +CAATAGGATACGCTAATACAGCTAAAATAGCATTTAAGTTCTGAATATCTTCAGATTGAT +CTCCTTCTTTTAGCGTTTTATCGCGAGGAATCAATTTTAAATAGGCATACTCGGGGTAAT +CAGCTTTGATGGTTGGTTCAATTCCTTTTTCGTTGATCCATTCACCTTTTGGTGTTAACC +ATTTTAAAACAGTCAATTTTATTTCGGTTTGGTCATTTAAATCTTTCACCGTTTGGACCG +TTCCTTTGCCAAATGTTTTCGTTCCAATTAGTGGGACATTCGCAGATTCATGCAGTGCTG +CAGCAAAAATTTCGGATGCACTGGCACTATTGCCATCAATGATAACTGCAACAGGTTCTT +TTACTTTAAAGCCGCCGTCTAATTCTTTGGAAGCAACTTCTTTCATCGTACGTCCTTTTT +TGTCTTCAAATTGAACAATTGTTTCGCCATTTTTTAAAAACATACTGGCCATTCGCTCTG +CTTGATCTAGCAGTCCACCAGGATTTTGGCGTACATCAATTACAAAGGATTTAGCTCCTT +TTTTACGTAAATTCGTGATTGTTTCTTTCAATTCTTGATATGTTTTTTTACCAAAGGACG +TAATTTTAATCGAGCCAATTTGTGCATCTTTTTTGTCTAACTCACCTGTCACTGTTTTAA +CGGGAATTTTCCCACGTTTGATTGAGATATTTTTAGTTTCTCCCTCTCTTTGGATTGTCA +ATTCAACGGAAGTTCCTTTTTTACCTCGGACTTTTGAGACGACTTCTGCTAGTTTCATCC +CTTTTGTAGCTGTACCATCAACTTTTTCAATAATATCGCCTTCTTTGATTCCCGCTTTTT +CAGCTGGGGAATCTGCTACAGGGGCCTCTGCTACAACTGGTTCGCCATCTTTCATTGTCA +TAGTGGCACCAATTCCTTCAAAATTGCCAGATAAGCTTTCATTTAAATCATTGGCAGCAG +ATTCATTTAGATAAGTTGAGTATGGGTCTCCAATGGCTTCAGACATGCCTTTTAAGGCGC +CTTCAACTAATTCATTTTTATCTACTTCACCTACATAATTGGTACTAATTTCATTATATA +AATCCTGCACTTTGCTTAAATCAGCATTGGTAATTGCCCCTTCTTGACTCATCTTTTTAA +CACGATGATCAAAATAAATATAACTACTTCCTCCAGCTAAAAATGCGACACAGAGGAGCG +AAATAATATATTGATAAAAAGGAACAGTTCGTTTGTTCTTCATAGATTTCATCTCATTTC +TAACAAATTCGTCACTACTTTTGTTTACTATAACATGAATGAAAAAAGAGGAAAAGCTTT +GTTTGCTTTTCCTCTGGATTTATTTATTGTTTTCAAGGTACTTTTCCCATAACTCATCAA +AGATGTCCATGTTGCTTAAATAGTCAGCATTCATTTCTAAATAAGAAGAAAGTTCATGAT +AATCCTCGGTTTGTTTTGGAAACTGAATATCTTTGGCCGCTTCATTGGCAAAATCAGTTT +CAGGATCTTTAGCCGGACCTTTTAATGTCATTAAGTAATGGTAAAAGCTTCGTCTCATGT +AAATTAATCACTCCATTTTTCTTCGATAAATTCACTTCTACGTTGATGGGACAATCCATA +ACGCAAAGCGTCTTTTTTATAAAAGTCTTGATGGTATTCTTCAGCAGGATAAAATGGTGC +TGCTGGTTCGATGGTGGTAACAATTGGTTCTGTAAAACGACCACTATTAGCTAGACGTTC +TTTACTTTTCTCAGCAATTTCTTTTTGTTCTTGGGAACGATAAAAAATAACTGGCCGATA +ATTATCGCCGCGGTCTTGAAACTGACCGAAAGCATCAGTAGGATCTGTTTGTTGCCAATA +AATATCCACTAATTGTTCGTAAGAAATAATCGCTGGATCAAAAGTGATTTCTACTGCTTC +CGTATGGCCTGTTGTGTGTGTTAGAACCTGTTCATATGTTGGATTAGGCACGTGCCCACC +TGTATAACCCGAAACAACTGAGATAATCCCTGGTTGTGTATCAAAAGGCTGCACCATACA +CCAGAAACAGCCGCCTGCAAAAATTGCTTTTTCTTCCATCTCTAACACTCCTTTTTTATT +TCTCTTTTGGCAAGTATATATTCATCCGAATGTCATCGTCAACTAAATTAATCTTTTCTG +CTCGAACAAATAGACCATTTTGCATTCTAAACTGATCTAAACGTAATAAAACAGTTTGAT +CATCTGGATTGATTTCAACCCATTTAGGTAGTTTGTAAGAACGTTTAGCAAATTTCAATA +CTTCTTTAATTGGCAAGCCTAACGTTCCTATAGATAAACTTTTTGCTTTTAATTGTACAT +TACCATTCGCCATCACGTAAGGATCAAAATATAAGTAAAAAGGAATGTCATGCCCTAACA +CCTGAAATGTTCCATTTAATAAAGCATCATTTTCTAAATAAAATTTATATTTAATCTCCG +AGCCTTTTTGAAAGTCGGCTAAATAAAAGTCAATCAGTTTGTTCACTTGTTGTTTTTTTG +ACTGAATGGTGACGACTGGCTCGCCTTCTTTTTCAACAATTGCTGGTATTTTCTTTAAAT +CTGGTTCGCGAACTTGTGTTGCTCGAAATGTCACGAAAGCTACACTACCAATGACTAAAC +CCACTAGAACAAGAAAAGCGATTTTCCAGGGATTTCGTTTAAGATTGGTTGGTTGTGTCT +TTTTAATTGATTTACTTGTTTTTGGCTTTTCTTCATTCATTTTATTCACTACCTTCTGAT +TTGGTTATCCATTCTTTCTCTGTCTTGACCATTTCATCCCGGACAGCTCCGGCCATAATT +TGGTAGCCAAGATTATTCGGATGGAAGCGATCTTCTTCATATAACAAATTGTTTAAATCC +TCTTTGCTAGCACTACTTCCTGTAGTTTCTGAGTCACCACCAGTTACGCCAACTTCATCG +CCACGACCTTTATAGAGTAAATCATTGATAGGAATGAAATAGGCTCGTTTTTGCTCTTGA +ACCATTTCTTCGGTTGCTTGATTCCAGTTATCAACGATTTCTTGCATTTCTGTAATTTCG +GAAAAGTTTAAGTAGAAGGGATTGTAAATTCCTAAGACATAAATAGGGGCCTTCTCGTTG +TACTCTCGGATTTCTTCAAGTAGTCGTCTTACTCGGCGTTGATAGGCTTTTTGTGGACGA +TTGAACGAGCTGACTTTCAAGTCGAAAATATTACTACTAATTACTTTCATTAAGTCATTT +CCGCCAACCGTTAGCGTAATCACGTCCGCAGAGGCAAGGCCTTTTTGAATTTCAGGTTTT +TCTTTGATTCGTTTTAAAATTTGATCACTACGATCCCCATTTTTCCCAAAATTGTCTGTT +TGAACACCGTTCAAGTTGTAGTGTTCTTTTAAATCATCTGCCACAATAGGAACAAAACCT +CCACTATTCGTCAAATCGCCAATTCCTTCAGTTAAAGAATCACCGATTGCTGTATAATGA +ATGACTTCTTTTTGATTTTTTTGAGCAGTTGTGGCCACTTTCTCTTGCTTTAATAGTGGC +TTTGCTTTGGGGATGGCCACACTTAACAATGTAAAAACACCTAGCGCAATGAGGATAGGT +GTTAAGACGGTCAGCAAAATATGCTGTGTTTGTTTTTTCATCTTCGTCACTTCCTTTTAA +ATAAACCAACAAGAAATTTTCCTTTTCTTGTTAGAAAGAGGTTGCGTCTACTCATTTAAG +AGTGAGACCCAACCTCGTTATTTCTTAGTCTGTATAGTACATAATAGCAAAGGCATTTTT +TCCTGTATGTGTTGCAATGACTGGATTCGTATGTAATACGGGAATATCCATGTCTTTAAA +GATTGCTTGTAATCCTTCTTTGAATCCATTTGCTAGTTCTAGCCCATCCGCATGAGAAAT +GCCAATTTGTCGAACATTTGGAATCTTACTTAATTCTGATTTCAATTCGTCAAACCATTT +ATTAAACGTTTTAACGCCGCGGCCTTTCGCTACAGGAATCAATTCCGTATTTTCAAAGTC +CATGACAACTTTCATATTAAAAATGTTTGATAATAATCCTGTTGTACGGCTGATTCGTCC +ACCTTTAACCAAATTATCCAATGTTGAAATGCCGATGTATAATTTGGTATTTTGTTTGAC +GCGTTCAATTTCAGCTAAAATTTCTGGAACACCCGCTCCCGCTTGTGCCAATTTTGCTGC +TTGAATGACTTGGAAAGACAAACCTTGGTCTGTAAAATCGCTATCAATCACCGTTACTTT +ACTTGATGATAAATTGCTAGCTTGACGGGCCGCTTCAACCGTTCCACTTAAGCCTTTTGT +CATGTGAATCGAAATTACTTCACTGCCATCTTCACCTAAGCGATCATATAATTCTACAAA +TTCACCAATTGGTGGTTGACTCGTTTTGGGTAAGGCCTTCGCATTAGCCATCATGTCCAT +AAATTTTTCGCCTGGCAAATGATCATCATCTGGATAAACAACGCCATCAACCATAATTGA +TAAAGGCATCATATGGATATTTAATTCATCTCTAAGACTTTTTTCCATCGTACATGAAGA +ATCCGTTACGATTTTAACGTTTGTCATAAATTTATCACTCTTTCTTTAAAAGGAACCTGT +TTTCATGGTAGAATACCATTAAACGGTTAATTATCTAGTTATAGTATAACAGACACTGCC +GAATTTTTCATCAAATTAAAATAAAGGAAGTGAATTTATTGGAAAAAACGCATTTCTCAA +AAAAATACCTAATCGTGAACGAAGTCTTGAATGCAGTTACACATGGCATAGGCGCAGGTT +TAAGTATTGCTGGCTTAGTCATTTTACTTGTTAAAGGAGCTCGTTTAGGCTCACCGATTC +ACGTGGTATCTTATGCTATTTATGGCTCCATGTTGATTCTACTTTTTTTATCTTCAACGT +TATTTCATAGTTTAATTTTTACAAGAGCCAAAAAGGTCTTTCAGGTCTTCGATCATAGTT +CTATTTTCTTATTGATTGCTGGCAGCTATACGCCGTTTTGCTTAATTAGTATTGGCGGTT +GGTTAGGTTGGACCTTATTTAGTTTAGTCTGGTTAATCGCCATCGTCGGTATTGTCTATA +AATCCCTCACGTTGCATAAACAAGAAACAGTGAAAAACATTTCAACGATTATTTATATTG +TTTTAGGTTGGCTCTGTATCATCGCTGCTCGTCCGTTATATGAATCTCTTGGGTTTACAG +GAACAGCGTTATTGGTCGCAGGGGGTGTGTCTTACACATTAGGTGCAGCCTTCTATTCAT +TGAAAAATGTGCGGTTTATGCATGTGGTTTGGCATTTGTTCGTCATGCTTGCAGCGATCC +TCATGTATTTTTCTGTTCTCTTTTATACGTAATCACAGCACAATAAGAACACGATGCTCA +AAGCATCGTGTTCTTTTATTGCCCTAAATCTCTTTATTATAAAGGATTATTCTTCTTTTC +GAATTATTTTAACATAAATTAGTAGAAAAGACTTTACAAAACGAACATACATTCGTATAA +TAACAATACGAACACTCGTTCGAATCAAATGAAGAGGTGATACACATGGAAGCAATTCGT +CGTGTTGGATTTTTATTTTTTGTATTGGTTATAGGTATTTTTTTGGGAACATTAGGATTA +CGTCTTGCTTTTATGATCGTGACGCCGTTATTTATTCTTTGGTTTATGTCATGGGATGAG +AAGCGTTATACACGTACACGTAAACAACAGCAAGCAAGATACGTTTATCGTAAGTTCCCT +TAATAAATAATAAAACGGTGTGTTCCCCCCAAGCTACGCTCCGTTTTTATAAATATGTCA +AAGAAAGAGCGAAGGATTAAATCCTTCGCTCTTTCTAATTAAATTAGATTAGTATTCCAT +TAAGGTAACAATATCGTAGCCATCAATTTTATCGCGGCCATGTAAATCCATTAATTCAAT +TAGGAACGCACAACCAACCACAATACCACCTAATTGTTCGACTAATTCGATGGTTGCTTT +AATTGTACCACCAGTTGCTAACAAGTCATCACAAATCAAAACACGTTGGCCTGGTGTGAT +GGCATCTTTGTGCAACGTTAATGTATCTGAACCATATTCTAAGTCATAAGTCACTTCGAT +GGTTTCACGAGGAAGTTTTCCTTTTTTACGAACCGGAGCAAAGCCAACCCCTAGTTCATA +AGCCACTGGACAGCCGACAATAAAGCCACGAGCTTCCGGTCCTACAACCATATCAATTCT +TTTTTCTTTCGCGTAATCCACGATTTGTTTCGTTGCTTCACGATAGGCATCCCCATTAGC +CATTAATGGAGAAATGTCACGGAACACGATCCCTTTCTCAGGATAATCTGGAATGCTTGC +AATGTAATCTCTTAAATCCATTTTTATTTGTCCTCCTCATTCCACAGCCATTGTTGTATT +GTTTGACAATCACTGTAAAGTAGAAATTCTTCTGTTTTTATTTTTTTCAATCGTTGTTGG +TACACTTGACTTTCAGTCAATGGTCGATTGTCGGGTTTCTCAATACTGTTTAAAACACCG +CTTTCTATTGTAACAAATCCTAAGTCAAAAAACACCTGTATCATGAAAATTAATAATTTT +TCTTGGATATTTAAGTAATTTGCCACCTTCGATAGCTGGGAACGTAGATTTACTTCTTTT +TGCTGCAAAATAAATTTATACAATGTTGCAAATTGTTCACGTGAAGCCATACCGTTTAAA +TAAGCTTCTTCTGGTGAAATAAACATCATGTATATACGTTGTATCTCAGTCGCTTCAACA +ATTTCTTTAACCGTGATTGCTTCCACAGGACAGTCGACAAAAACCAGTTGTTCGATTTGA +TTTTGAGAGACAGCTTCCACCAATTCTTCTTGGTTTGCCCAAACAATAATATTAGCGGTT +GGATCTGAAATGAATTTTTGATTTTTTTCATCAAACAAAAGATAAGCTGTTGCTTCTGAA +GGAATTGGTTTCGTTTGATTATTTTTGCCACGAAAATCAAATAATTGTCTTCCCAATACA +GCAAAATCAGTCACCATTAATTGTGGTTTCTTACGACCATTCCATTCATTGATGGAAAGT +TGGCCTGCCACATCGGCGGTTCCTTGTGCTAGTTCATCCGCTTGAGGCCCCATTTGAAAA +GCAATTGCATCCAATTGTGCCCCTTCTTGATTCATTTGAAATTTCAGATGTGCATTATCT +GCACCAATTTGCCGAATTTGCGTAGGCGTGATTTCCTTAAATACAAAAGTTGGAACAGTA +TTATCGGTCCCAAAAGGCGCCAAAATACGCAATTGATCAATAAAAGTAGTCGTGGCCTGC +GAAACCGCTAAGCTCTCACTAATGAGCAACTCTTGTCCATTAGCCATATCAATTTGATTT +TTTTCAATAAAGTGTGCCAAATGTTCTTGAACAAAAGGAATATTTTCAACAGGTAAAGTC +ATTCCCGCTGCCATATGATGCCCACCAAAATGAGTGAATTGCTCACGCACTTCGTTCAAC +GCCTCATAGAGATTCAATGCGCTAATGCTACGTCCAGAGCCTTTAGCGGTCGTACCAGAT +TCATCTATTGCTAAAATAATGGTTGGTTTCCCCGTTTCTTGCATAATGCGGCCCGCGACA +ATCCCTAAGACGCCTTCATGCCAGCCTTGTTTCGCTAAAATATGGACCGGCGCATTCGGG +TCGATTAAATCCAGGGCTTCTTTGGCAATCGTCGTTACAATGTCTTTTCGTTCATTGTTT +TGTTGATCAATATATTTAGCGATTTCCAGCGCTTGCTCTTCATCAAATGTAGTCATTAAT +TCAACACCTGGTGCGGCTTCCCCTAATCGACCTAACGCATTTAAACGAGGTCCGATGGTA +AAGCCAATTGATTCTTCTGAAACAGCTTCCTTTTTCACACCCGCTTCTTGTAGTAAAACG +TCTAAACCAATTCGATCACCAGTTTGAATCATTTGAAGGCCCATTTTGACAAACGTTCTA +TTTTCATCGGTTAATGAAACCAAGTCAGCAATTGTCCCAATTGCTACCAAATCTAATAAC +TCAATTGGCAATTCACCAAGTAACGCAGTAGCGACTTTGAAAGCCACGCCAACACCAGCT +AAATCTCCAAAAGGATAGTCTCCTTGCGGATGTCTAGGGTGGACAATCGCATAGGCTTCT +GGTAATTGTTCAGGCAATTCATGATGGTCAGTAACGATTACATCTACCCCTTGAGCCATA +GCGTAGTTAATTGCTTCATGTCCAGCTACACCATTATCAACGGTTACAATTAATTGGACT +CCTTGTTCAATTTGTTCAGCAAATACGTCTTTATTAGGCCCATATCCATGAACAAAACGA +TTCGGTAGAAAGTACTGAACCATGCCACCGACTAATTCGATTGCTTCTTTCATAACCGTG +GTACTAGTGATGCCATCTGCATCATAATCCCCATAGACAAGGATTTGTTCACCCGCTTCG +ACGGCTTGCTGGATGCGGGCTACCGCTTTCTCCATATCATGCATTAAGAACGGATCATAA +ATATCTTCGATAGTCGGATGTAGAAACTTACGTAATGCTTCTTCTGTCCGGATATTGCGG +TGCCATAAAAGTTGGCCAATTAATGGATTAATTTGTTCTTTTTTTAATTGTTCTATAAAT +TCTACAGGTAATTCTGTCTTTGTCTGTAACTGCCATTGATAGTTTGATTTTTTCACGACG +TCACTCCTAACCAAGTAATTATAGCAAAATTTTTCCAAAAACTAAAGAAAGCATTTTTTT +AAATCTGAGAAAAAAACTGTGATATCTGTTAGATAAATAGCAGATATCACAGTTTTATTT +TAAAAGAAACTTAGTGATAAAGTCTCTTCCTTATTTTTGTTCCTCTTCATTCACACGAGG +TTTCACATAACGATCAATACGACTACCTGACACAGGCGTTGCGTCTACTTCTGTGTTTGA +TGTTTGTTCGTTGACCACTGGTTCTGCGACTTGTGGTGCTGCCGCTTGTTTGGCTTGCAA +ATCAGCCAATTGATTATCAAAAGAACGTTGAACTTTTTCCGTTTCTTCTTTGACTAATTT +ATCCATATCATTTTTATAAATATCCAACTCTTTTTGTACAGCTTTTAATTCCTTACGTTG +GTTCCAAATAGTGGTTGTGGAGGTTAGTAACCCAACTAGTGCGCCAATAATTGCAGAACC +TAAAATAATTAAGATTAACGGTCCACTAATTTTAGTAAAACCAAAGTTTACTGGAACAGC +TTGATTATTTAAAACAGCAAAAATAACAACAATTAAGACCAACACAAGGCCCAAAATAAC +ACGCCATTGATTTTTCATGACTTGATCGACTCCTTTATTTTTTATTTAAAATATTTCCCG +CTATGAAATCTCCTAAATGTGGAAACAATGTATAAAAGCGGGCAGCCGCTTCCATGACAA +ATGGACGATTAATCTCACGTCGAGAAGTCCCCATACTCCCAACCACTTCTTTCGCTAATT +TCGTAGGGTCCAAAACAATTTTATCCACCGCAGCTAGATATGTGCCTGTCGGGTCCGCTT +TATCAAAGAATTCTGTTTGGATTGGTCCTGGATTGACAGTCGTTACTGCTACACCCAACG +GTTTCAATTCTAAACGTAAAGCATTTGAAAAACCTAACACAGCAAATTTCGTTGCAGAAT +AAACGGTTGATTTAGCTGTAGCCATCTTCCCAGCCATTGAAGCAACGTTGATAATATGTC +CTTGCCCAGCTTCAATCATTTTAATAGCTACTTTTTGAGTAAACGTCATCATTCCTAGAA +CGTTGACATCAAACATTTGGCGTGCAACAGCCAAGTCAATTTCAACAAAATTTTCAAACA +AGCCAAAACCAGCATTGTTGACTAAAACATCTATAGGACCAACTTCTGCTGAAATAGCTT +CAACCACACGTTCAACACTTTCTGGATCGGCAATATCTAATTGATAAGAAAATGCTTCTC +GTCCGCTTAAAACAGCACACTGCTCGCGAACTTTACCAATCAAATTAATTCTGCGGGCGC +AGACAACGACGACCGCCCCTTGTTTAGCTGCTTCATAACAAATTTGTTCACCTAAACCAG +CTGAACCTCCAGTTACGACAACGACTTTATTTGTTAAATCCATCTAAGCTCCTCCTTCAT +TGGCAAATGGAATCTCAATGATATCCATATCTTTTACGATTTTTGTATTGGGGAAAATTT +CTTGGGCTTCTTCTTGAAGCTGATAGGCCTCTTTGGTTAAATAACGGGCACTAATATGCG +TTAAGATAAGTTGTTTTACTTGCGCTTCTTTTGCAACTTCAGCTGCTTGTTGACTGGTTG +AGTGGAAGTAGGCTTTTGCCATTTTTGCTTCGTGTTTATTAAACGTACTTTCATGGACTA +AAACATCCGCTCGTCGTGCTAAGGTTACGCTATTTTTGGTTTTGCGAGTGTCTCCTAAAA +TCGTCACGATTCGTCCAGGTTTACGCTCTCCCACAAACGCTTGTCCATTAATTTCCTGTC +CATCAAAAACAATCGTTTCACCACGTTTGAGTTTGCCATACAACGGCCCAGAAGGAATCC +CTAAAGCTTGAAGTTTCTCTACTTGTAATTCTCCTTCATGCGCTGCTTCCTCAATTCGAT +AGCCAAAGCTCGTGATGCCATGGTCCAAGATATTACAGCGAACAGTGAATTGTTTATCTT +TAAAAATAACGTCATTTTCTTTCGTTAATTCAATAAATTTCAGTGGATAGGACAACCGCG +ATTGCGAGACCCGTAAAGAAGTCTTGACAAAGTCAGCAATCCCAACTGGTCCATAGATTT +CTAAAGGTTCTGTCCCGCCCTGAAAAGAACGACTACTTAACAAACCAGGTAAACCAAAAA +TATGATCCCCATGTAAATGTGTAATAAAAATCTTTTCAATTTTTCTTGGGCGAATACTAC +TTTTTAAAATTTGCAATTGAGTCCCTTCACCACAATCGAAGAGCCAAACTGCATTTCGTT +CATCTAATAATTTCAACGCAATCCCTGTTACATTGCGATGTTTTGCTGGTACACCAGCGC +CTGTTCCTAAAAATTGTATTTCCATCTGCTACCATACTTTCTTTTAAACTTCCTTTTTAT +TTTAGAAGAAGTTTGCTCCTTTAGCAAATAATAATTTATCTATTTTCTCAAAGTTTTCTT +CTTAGTTTACCGCATTTTACGATAAAATAACAAAAAATTAACAAACTACCATAAAAAACC +GTGATTTCTTAGTATTTTAAAAGACTGTTTCCCTTTATTCAGAAAACAGTCTTTGCTCCT +TATTCTTCTTTCGATGTCGCTTCTTTGTGTTGTTCTTTTTTTGCTTGCCACGATTGTTTC +GCTTCATTTGCAACTTCGTTCGCATCGTTAATTTCTTGTTGGACTTCCTCTAAATCAAGC +CGCGTGATTTCGCCACCTAAGTCAAACATAACCAATTGGTTGAGCGGACGATTGTCTTCT +TCTTCTGCAATTAATAATAATCCCGTTTGTCCTTCATCAATCTTTTGAATCACATGTTCA +AAAACCGTTTGTGCTTCTTGAATTTCTTTGGCATCTTTGCTTGCACCATACATACTTCCA +GCAAACCAGCCAAAAAGAATGCCCAGAGGACCACCTAATATGCCTACTAGCATCCCAATC +ATGCTATCTTTTGATGTATGATTGTTGCCTGTAAAATCGATAAAATCATTGATTTTAAAT +TGATGCTGGCCATCATTAACATGCGTAACAACCGCCATTTGCTCACCTTTAAGCTGTCTT +TCTGCCTGCATCTTTTTTATTTCGGAAAATGCTTGGTACGATTTACTTTCAATATCAAAG +TTCATAATGATAATTCGTTTTGTCATACAAATCGCTCCTTTATTCTGGATACATCTATTT +TAGCAAGAAAGGCTTGTCTCTAGCAATGAAAACTTTTTGGTTATGTATAAAAAAAGAAGA +TAGCTAGGCTACCTCCTTGTTAATGCTTATTTACCATCAAGAATATTTAAACGGACCTTT +TTCGGACCATCGATACGTACACTATAAACAATTGTCCGAATTGCTTTTGCAACACGCCCT +TGTTTACCAATGATACGTCCAATATCTTCTTTGGCCACAGTCAAATTATACTCGAGGAAA +TCAGTTGATTCCTCAATTTCTAATGAGACTGCTTCAGGTTGACTGACTAACGGACGGACA +ATAGTTAAAATTAACTCTTTCACATCTGCCATATTCAATCACCTTATTTCTTAACGTTTT +TAGCTTCATGATGTTTTTTCATAACGCCTTCTTTTGAAAGGATGTTACGAACTGTATCAG +AAGGTTGCGCACCTTTAGATAACCAATCTAAAACTAAATCTTCTTTTAAAACTACTTCTG +CAGGGTCTTTCAAAGGGTTGTAAGTACCTACAGTTTCGATGAAACGTCCATCACGAGGAG +AACGAGAATCAGCTACTACAATACGGTAAAAAGGACTCTTTTTAGAACCCATACGTTTTA +AACGAATTTTAACTGCCATTATTAAATACACCTCCATTAACTTAATCACAAGAGTTAGTT +TAACAGATTAACAACAACCTGTAAAGAGTTTTTTCTTGACACCTTTACTTTTTTTCAATT +TTTTTAGCCAATTTGCAGGAAAACAAACGAAATAATGAAGAATTTTTACAAAATTTGATA +ATAGTTGTACTGGTTTCTCACTTGAAAGCCTTCTGTTTGATAGAGATGAAGCGCTGCTTG +ATTTTCTGTTTCAACTTCTAAAAAAATAGTCGCTGAAGCTGAATTTTCTAAAATCATTCG +GAGCGCAGACTGTAACACTTGACGGCCAAGACCGCGACCTCGTTGCGTTTTTGTCACAAC +AAAGCCATAAATCCCCCATTCGTTTTCAAAATGATCCAAACGTAGCGTGGCTAAAAGTTG +ATTGTTTTCTTTATAAATAAGTGTTCGTTGAAGGTCCTCCGGCAAAGGCGTACCTTCTAG +TAACCTAGCCAAGGAATGCAAATCAGCCACTTGTGGTCGGAGAAGTGTCAGCTTCGCCAA +TTCCTGTGAGTAAGTTTCTCTTTGAAAAACCAGATACTGTTCTGAAAAAGCTGGCACAAT +TCCTTGTCTATTAAAATAATTCAGGCCAACAGATTGCTTGTCGTCCATCACGAGAAAGAC +TTCTTTCATTCCCCACAAAGCTGCTTGCTTTTCGAGTTCTTGAACAAGTTCTTCAAAAAA +TGGCCCTACATTTGGCGCAACAATCGTCGCTTCCAAGGTTTCCCCATCAAAACAATACCC +TACGATATACGCTTGTATGGTTTGTTCCTTCCTGTATAAACCATAATAAGTATAAGCTGA +TTCCTCAGGAAATGTTTGTTCCAATTTATAAAAAGTTCCTTGTTTAATCATTTCCTTGTT +TTTTAATTCAGTCAATGTTTGTTTCTCCAACTGTGTTAACTGTCGGCGCCAATTTATTTT +TTTATTCAAGCAGTTGCTCCTTTACTCATCCTTTTACTAGCTATTATACGTGAAGTTACG +AAGTTCAACAATGTACAAGGATAAAACGTTTGCGCTTTTTTTGAAAAAGTACTATCATAA +AGTAAGTAAAAGAAAAATGGAGGGATTTACATGACAAAAAAAATTGGTATTTTTGTAGGG +AGTTTAAGAAAAGATTCATTTAACAAATTGGTAGCAAAAACAATGGCGGACTTATTTCCA +GCTGATTTCGAACCTGTTTTTATTAACATTGGTGATTTAGAATTATACAATCAAGATTTG +GATGATGAAGGAACCCCCACCGAAGCTTGGACAACATTCCGTGAAGAAGTCAAACAAGTA +GATGGTGTGATGTTTGTAACACCAGAATATAACCGTTCAGTACCAGCTGTTTTAAAAAAT +GCATTGGATGTCGGTTCACGTCCGTATGGCGAAAGCGTTTGGGATAAAAAACCAGGACTT +GTGGTGAGTGTTTCACCAGGTGCAATTAGCGGATTTGGTGCGAACCATCATTTAAGACAA +TCATTAGTCTTCTTAAATGTTCCAACTTTACAACAACCTGAAGCCTATATTGGCGGTATT +ACGAACTTAATTGGTGAAGATGGTAAAATTATTGATGGCACTGTTGGCTTCTTACAATCA +ATTGTAGATGCTTACGTGGATTTCTTTAATCGCTTAACTGCCTAATTGAATCAATCGAGA +AAAAGTGAGGCAACTTTTTAAAGGTTGTCTCACTTTTTTTATTTTTTTGCTTGACCCTCT +CGTTACGTAAAGGTTTATGATAGATTTTGTCAGGAGGAATGAATGATGGAATATACAATT +AAAAAAATGGCTTCACTATCTGGCGTCAGTGCACGGACGTTACGATATTATGATGAAATT +GGTCTTCTTCAACCAGCCAGAATTAATTCTTCTGGTTATCGAATTTACGGGCAAGCCGAG +GTGAATCGTTTGCAACAAATTCTTTTTTACCGTGAATTGGATCTGAAACTAGATGAAATT +AAAGAAATTTTGGAGCAACCTGACTTTAATGTTGAACAGGCTTTATACGAGCATCAACAA +AAACTATTGGAAAAGCGCAATGAAATTGATCGTCTTTTAGCCTCTGTTCAACAAACCTTA +CACCATTACAAAGGAGAGATAAACATGTCAGATCAGCAAAAATTCGAAGCATTCAAACAA +CAAAAAGTTCAAGAAAACGAAGAAAAATATGGTAAAGAGATTCGGGAAAAATATGGCAAC +GAAACGATTGAACAAGCCAACAAAAAATATTTAAACTTGACGGAAAAAGACATGCAAGCA +ATGCAAAACGTGGAAAAAGATTTATTTTCTAAGTTAGCCATGTATCAAAAATCACCCAAG +TTGACTAGCCAACTTGCGCAAGAAATTTTCCAATTACACAAAGATTGGTTAATGTACTCT +TGGTCAAGCTATTCGCCAGAAGCACATAAAGGGTTAGGGCTTATGTATGTTGGTGATGAA +CGATTTACGTCATATTATGAACAACACGGCGCTGGTTTTGCCGAATCGTTAAACGCAATT +ATCCAAAACTACGCTTAAACAGAAAGTGCGCCATCAGTCGAAGAGACTGATGGCGCACTT +TCTGTTTCTTACGATAATTAACTAAAGCCTGTTTATCTTTTTTTCTTTTTCTTCTTTTTG +TTTTTCTTCATCATACGATTCATAGCCATTTTACCTAGCTTGCCTTTAACGCCGCCACCT +AACATTTGATCCATACCAGGAATGTTCATATCCCCTTTGGACATTTGTTGCATCATTTTT +TTGGATTCTTTAAATTGTTTAATCATACGATTGACTTCAACCACACTATTTCCTGAACCA +GCTGCAATTCTGCGGCGGCGACTAGGATTTAATAGATCAGGATTTTCACGTTCTGCAGGG +GTCATTGATAAGACCATCGCCCGTTTCCGTGCCACATCTTTTGGATCGACTTTGACATTT +TCAATACCAGGCATGTTACTCATTCCAGGGATCATTTTTAATAAGTCCTCAATCGGTCCC +ATGCCCATAACTTGATCCAATTGCTCAATAAAATCGTTAAAGTCAAAACTGTTTTCTTTC +ATTTTTTGAGCAAGTTCTTCTGCTTTTTTCTCATCGTAATCTTGTTGCGCTTTTTCAATT +AGCGTCAACATGTCCCCCATACCTAGGATACGACTCGACATACGATCGGGATGGAAAATT +TCTAAATCGGTTAATTTTTCACCAGAACCGACAAATTTAATCGGAGCGCCCGTTACTGCC +CGAATTGACAGCGCAGCCCCCCCACGAGTATCGCCGTCCATTTTGGTAATAACAACCCCA +GTAATTCCAAGCTGTTCATTAAAACTATCTGCAACGTTGACAGCATCTTGCCCCGTCATC +GCATCAACAACTAACAGAATTTCATTGGGATTAGCCAACTCTTTAATTTGTTTCAATTCG +TCCATTAAAGCTTCGTCAATGTGTAAACGGCCGGCCGTATCAATTAAGACATAATCATTT +TTCTTTTCTTTTGCTAATGCTAACCCTTGACGAACAATTTCCACTGGATTAGCATCTGTT +CCCATATCAAAAACGGGAACTTCTAATTGTTGACCTAAAACCTTCAACTGATCAATCGCT +GCTGGACGATAAACGTCACCAGCGATTAAAAGCGGACGAGCGTTTTCAGTTTTCATTAAG +TGTTTTGCTAATTTACCAGTAAAAGTTGTTTTACCAGCCCCTTGTAACCCTGTCATCATA +ATCACTGTCGGGATTTTTGGAGATTTATTCAGTTCAACCGTTTCTGAACCTAACGTTTTC +GTTAATTCTTCATCAACAATTTTTACAATTTGTTGGGCTGGTGATAAGCTTTCTAATACT +TCGACTCCTACTGCCCGTTCTCTGACGCGTTTTGTGAAATCTTTGACCACTTGTAAATTA +ACGTCGGCTTCTAATAAAGCCAAACGGATTTCTCGCATCATTTCTTTTACGTCGGCTTCG +GAAACTTTTCCCTTACGACGGATTTTACTCATTGCCTGTTGTAGGCGGTTTGTTAAACTC +TCAAAAGCCATAGTTTCATTCCTCTATTTCTTGAATTTGTTCAATATATTTTTTGATTTC +ACTATCTTTGGGATAAGTTTCTTGAACATACTGCTGTAGTTGTTCAAGATATTCTTTCCG +TACAACATAGTTTGAATAAAGATGTAACTTTTTTTCGTATTCTTCTAAAATTTTTTCGGT +TCGTTTAATGTTATCATAAACCGCTTGGCGACTAACCTCATATTCTTCCGCAATTTCTCC +TAAAGAAAAGTCATCAGCATAGTAAAGCTCCATATAATTCATCTGCTTTTCCGTTAATAA +TGTCGAATAAAATTCAAATAACGCATTCATGCGGTTTGTTTTTTCCATTTCCATGAGCCG +TGCTCCCATCTTCAATCCCTTATAGGTTACCGATTTCATAGGGAATAGTCAATCTTTTTT +TACTAAGTTTTTACCTTTTAAAAAGAAATGTCCGCAAGTTGCCATGTTTCTTCAGAATAA +ACGGTCATTCCTTGTTGAATAAAATAAGCAGTTGCGACTCCAACTCCAGCTTTTTTAATG +CCAGAAAATTGTCCATCATAAATCAGCTGGCTGCCACAGCTGGGACTTTTTTCTTTCAAA +ATCAAGGTTGTAATTTGTTGAGTTTTTAATTTTTGATACGCGCGTTGTGCCCCTGCTTTA +AATGCCTCAGTCACATCTTCCCCGTTATCCGTATATACATAAGCAGTATTCGCCCAAACA +TCAAAGCCATCCCCGCCAATGATTTCTGCTGGCGGACGTGGAATAGGCAACTCTCCTAAA +ACTTCGGGACAAATCATTATTGCTTTTCCATTAGTTACAAGTTGTTTTAACGCTTCTTGT +TCTTGAGATTTTCCATCGTAACGGCAACAAATCCCACCTAAGCAACTACTAATACCAATC +ATTCGCTTCTCCTCATTTCATTTCTCTATTATACCATGAGTTGACTTTTCTCAAAAGTAG +CATTCTCGCTTACTTTAAAGTACGTTCTTCCTCTAAAAATATGTTAAACTAACAATAGTG +AAATATACACTTATTTTCAGAAATTTCTTGAAGGAGTTATGTAAATGAAAAAAGTTCTTG +TCGTCGATGATGAACCATCGATTTTAACTTTGTTAACCTTTAATTTGGAAAAAGAAGGCT +ATCAGGTGACTACTTCCGAAGATGGTAAAAACGGCTTCGAATTAGCTTTGTCTAATCAAT +ATGATTTTATAATTTTAGATGTAATGCTTCCTGGTATGGATGGCTTAGAAATTACCAAAG +CACTTCGTCGAGAAAAAATTGACACGCCTATTTTGATTTTAACTGCCAAAGATGAACAAG +TAGATAAAATCATCGGCTTAGAAATTGGGGCAGATGATTATTTAACAAAACCTTTTAGTC +CCAGAGAAGTCCTAGCACGCATGAAAGCTATTTTTCGTCGTTTAAAACCTACCACGACCG +AACCGCTTCAAGAGGACACACCTAAAGCTCCGCTTGTGATTGGCGAGATTCGTGTGGATG +AACAAAATTATGAAGTCTTTGTGCGCAATCAGCCCATTGAACTAACACCGAAAGAATTTG +AATTACTCGTTTACTTTATGAAACGCAAAGATCGGGTCATTAATCGGGAAACCTTGTTGG +AACGAATTTGGCAATATGACTTCGCTGGACAAAGTCGCATCGTCGATGTCCACATTAGTC +ATTTACGTGATAAAATCGAGCCTGATCCCAAACGGCCCGTTTACTTAGTAACTGTCCGTG +GCTTTGGCTACCGTTTTCAGGAGCCAAAACGATGAAAAAGAGACTGCGGATTGAATATTT +TTTAGTTGCGGCAGTCATGCTTTTATTATTTGTCGGAAGTATTGCAGCGACCAATTTTTT +CTTTCAAAAAGAAATGGTTGCTCAGCAAGAAACCTATTTAAGAAGAAAAAATACCCTTTT +AACAGACCAGTTGCCCCCTTCCGTTTTCGAAAAAGGGCAACTGACAAACCAACAACAACT +CCTTGTCACCCATGCGTTAGATGATGCAGAAGAACGGGTAACTTTGTTGCAAAAAAATGG +TACCGTCTTTTTTGACAGCAGCCAAAATGAACCGCTAGAGTCGCACAAAAAACGACCTGA +AATCGCTGCGGTTCTATCAGGAGCTACCTATGGTTCAGC +>NODE_4_length_39999_cov_63.2942_ID_7 +AAGGTTTAAGAAGGGGCAATAGAATTATGAAATCAGGAAAGTATGAAGTTAGAGCTAAGG +GCCATGGCGCAAGTTTCATGCCTATGGAAGTAACCCTCTCTGAAGATGAAATTGAAGATA +TTAAGGTAGATGCTAAGGGCGAAACTAAGGGTGTGGCAGATGAAGTCTTTAGACGCTTGC +CAGAAGAAATTGTTAAAAATCAAACTTTAAATGTTGATACCGTTAGTGGTGCCACTATTT +CTAGTCATGGTGTGATTGATGGAGTAGCCAGTGCAATTAGTAAAGCTGGTGGAGACCCTG +ATGAGTGGAAAAAACGTGCCAAGCCTGCAGAACAAAGAGAAAAAGACGAAACATATACTA +CTGACGTTGTAATTGTTGGTGCTGGTGGTGCAGGTCTAGCTGCAGCAGCAAGAAGCATTC +AACATAATAAAAAGGTTATTGTGCTTGAAAAATTCCCACAAATTGGTGGTAATACCAGTC +GTGCTGGTGGCCCAATGAATGCGGCTGAACCTGATTGGCAAAAACAATTTAAAGCTTTAG +CTGGTGAAAAGGAAACTCTTGAAGAACTGGCAGCAACTCCAATTGAAAAAATTGATCCAG +AATATCGAACAGATTTTAAGGAGTTACAAAAACAAATTAAAGAATATGTTGCTTCTGGAG +CTAATTACTTATTCGACTCAAAATTACTTCATGAAATTCAAACTTATTTAGGTGGTAAAC +GTACAGATTTAAAGGGTAATGAAATTCATGGTAATTATACCTTAGTTAAAGAATTAGTTG +ATAATGCTTTAGATTCAGTTCACTGGTTGGCAGATTTAGGAGTAGTTTTTGATCGCAGCC +AAGTAACAATGCCAGTTGGTGCCTTGTGGAGAAGAGGACACAAGCCGGTTGAGCCAATGG +GCTATGCCTTTATTCATGTTTTAGGTGATTGGGTAAAAGAACATGGTGGCACAATTTTAA +CTGATACTCGTGCTAAGCATTTAATTATTGAAAACGGAAAAGTTTGTGGTGTAATTGCCA +AAAAGCCTGATGGCAGTAAAATTACTATTCACGCCAAAGCAGTAATTTTAACTGCTGGAG +GATTTGGCGCAAATACGCCAATGGTTCAAAAATATAATACTTATTGGAAACATATTGATG +ATAATATTGCGACTACTAATTCTCCAGCTATTACTGGTGACGGGATTGGTCTTGGTAAAG +AAGCAGGTGCAGATTTAGTTGGAATGGGCTTTATTCAAATGATGCCTGTTTCTGATCCTA +AGACTGGTGAACTATTTACTGGTCTTCAAACTCCTCCGGAAAACTATATTATGGTCAATC +AAAAGGGAAAGCGTTTCGTAAATGAGTTTGCGGAACGTGACGTTTTAACTAAGGCAGCGA +TTGATAATGGTGGTTTATTCTACTTAATCGCTGATGACAAGATCAAAGATACTGCTTATA +ACACAACGCAAGAATCACTTGATGCTCAAGTTAAAGCAGGTACCTTATTTAGAGCAGATA +GCTTAGAAGAGTTAGCTGAACAAATCGGTATGGATCCTGATACTTTAGTCGATACAATCA +AGAAATATAATTCATATGTCGATGCAGGAAAAGATCCTGATTTTGAAAAGTCTGCCTTTA +ACTTAAAATGCGAGGTGGCACCATTTTATGCTACGCCAAGAAAGCCTGCTATTCATCATA +CAATGGGTGGTTTAAAGATTGATACTGGTGCACATGTTTTAGATAAAGATGGTAAACAAA +TTTCTGGTTTATATGCGGCTGGGGAAGTAGCTGGCGGAATACATGCTGGCAACCGTCTAG +GAGGGAATTCTCTTGCTGATATCTTTACTTTTGGTAGAATTGCCGCAAACAGTGCTATTG +ATGAATTAGAAAAATAGTATAGATAGATTAAAACGTTGTAAACACTAGTATTTGCAATGT +TTTTTTGCTAAAATACTAAAAATACCTAATTTTTTGGAGGACTTGAATTGAGAGAAGCGC +AGGAAAAAAAGTTAACTACAAAGCAATACTTAATTGTAGCATCGATGATTTTTGCACTGT +TTTTTGGTGCAGGAAACTTAATTTTCCCACTTCATTTAGGCCAGCTTGCTGGTAAAAACT +GGGGACCAGCAGCAATTGGTTTTTCAATTACAGGAGTAGTCTTACCCTTACTTTCACTTC +TTGCAGTTGCAATTACTCGCAGTAATGGTGTTTATCAAATCGGATTACCAGTAGGAAAGG +TCTTTGCTCTTTCTTTTATGACTTTGATGCAGCTAGCAATTGGTCCTTTATTTGCAGCAC +CAAGAAATGCCACTGTATCTTATACTGTGGGGATTGCACCTCTTTTGCCTAAGCAATTTC +ATGGAATCGGTTTGATTGTCTTTACAATTATCTTTTTTGCCATAGTTTATGTGATTGCCT +ATAACGAGAGTGATATTCTTTCTTCTTTAGGTAAAATTCTGAATCCGATCTTTTTAATTT +TACTTTTTGTTGTTTTTGTAATTGCGTTTGCTCGCCCACTTGGTAATCCTGAGATGGCCG +CACCTACTAAAGAATATATGAACGGCGCAATAGTTAAGGGCTTTTTAGAGGGCTATAATA +CTATGGATGCTTTGGCAGGCTTAGCCTTTGGGGTAACTGTGGTAACAGCAATTAAAGAGC +TAGTTAATAATGACAAAAAGAAAACTGCTAAAATGACTGCTAAGTCGGGGTTAATTGCAG +TTTTTGCAATTGGTATAATCTATACTTTATTAATCGTTATTGGTGCGATGTCATTAGGGC +ATTTTAAAATAGCGAGTGACGGCGGTATTTTATTTTCAGAGCTCGTTAAGTATTATGCTG +GTATTTTTGGACAGGCTTTGTTAGCTGTTTTAATCTTTTTAGCCTGTTTAACTACCGCAG +TTGGAGTCTTAGCAGCTTTTGCCTTAGATTTCAGTGCCCACTATTCAAAGATTAGCTATA +AAGGTTGGCTAACAATCGGTTGTTTAGGTTCTTTAGCTACAGCAAATTTAGGTTTAGAAA +AGATCATTCACTGGTCACTTCCTGTTTTAATGTTTTTGTATCCCTTGGCTATTGTGTTAA +TTATTTTATCTGTATGTTCACCACTATTTAAAGGTGACATGGTAGTATACAAAATTACGA +TGCTTTTAACTCTAGTTCCAGCGATTTTCGATCTAATTACCAACTTGCCTGCACCAATTT +CTGGAACGCAGTTTTATAAAGTAGTAAGTCAGATTCGCTTGCAATATCTACCTTTAGCTA +ATATTGGCTTATCTTGGGTTGTACCAACAATTTTAGGATTAGTTATCAGTGTCTTGATTC +ATTTTGGGCGTCGTAAAGCTAATAAAATATAGTAGTTCGGCTATGGTTCATTTGAATCAT +GGCTTTTTTGTTGGGATAATGAAATTAGGTGATAAAAAATGAGTTTAACCAATAAAGTGC +TCGAAAAAGATGTGCATGATTTATTTACTCGGGTAGCGCCGCATTATGATCAGATGAATA +ATTTAATTAGCTTAGGTACACAAAAAAGCTGGCGCAAGAGATTTTTAAAAGAATTAAAAG +TAGCACCGGGTGATTTTGCCCTAGATTTATGTTGCGGTACAGGCGATATTACGATTGCTT +TGGCAAAACAAGTTGGTCCGTCAGGAAATGTAATTGGGCTTGATTTTAACCAAGAAATGC +TTGATTTAGCTGAGCAGAAAATACGTCAGCAAAATTTACAAAAAGAAATTCAGTTAAAGC +AAGGGGATGCAATGCATTTGCCTTACCCAGATCAGAGTTTTGATATTGTAACGATTGGCT +TTGGCTTACGGAACGTTCCTGACGCAAATCAGGTCTTGAAAGAAATTTATCGAGTATTAA +AGCCAACTGGTAAAGTTGGAGTATTAGAAACATCGCAGCCAACTAATCCGCTTATTAAGC +TAGGGTGGAAGAGCTATTTTAGGCTGTTTCCTAGCTTTGCGAAATTACTAGGAGCCAATG +TATCTGATTATCAATATTTGTCTCATACAACCGCTGAATTTGTTTCTGCGCAGCATTTAA +AAGAAATGCTTGTAAAAAATGGATTTAAAAATGTGCAGGTAAATAAATTAAATCTAGGTG +CGGGTGCAATTCATGTCGGAATTAAGAAGTAAATGAGCTAAAATATAAAATAAAAACTAA +AAAAAGGAGAATTTTATGAAAACTGGTACTAAAATTATTACTTTAGACAACGGTTATCAC +TTGTGGACTAATACTCAAGGTGAAGGCGATATTCACTTATTAGCGCTTCATGGCGGACCT +GGTGGCAATCACGAGTATTGGGAAGATGCTGCAGAGCAACTGAAGAAACAAGGATTGAAT +GTACAAGTTACAATGTATGATCAGTTAGGTTCATTATATTCAGATCAACCAGATTATTCT +GATCCAGAAATCGCTAAAAAATATTTAACTTATGAATATTTCTTAGATGAAGTTGATGAA +GTTCGTGAAAAACTTGGTCTAGATAATTTTTACTTAATTGGCCAAAGCTGGGGCGGTCTT +TTAGTTCAAGAATACGCTGTTAAATATGGTAAGCACTTAAAGGGTGCAATTATTTCGTCA +ATGGTTGATGAAATTGATGAATATGTTGATTCAGTTAACAGAAGAAGACAAGAGGTTCTT +CCACAAACTGAAATTGACTTTATGCACGAATGTGAAAAGAATAATGATTACGATAATCAA +CGCTATCAAGATGATGTGCAAATTTTAAATATTAATTTTGTGGATAGAAAGCAACCATCA +AAGCTTTACCATTTAAAAGACTTAGGTGGCAGTGCAGTTTATAATGTCTTCCAAGGTGAT +AATGAATTTGTTATTACTGGTAAGCTTAAAGACTGGCATTTTAGAGATCAACTTAAGAAT +ATTAAGGTACCAACATTAATTACTTTTGGTGAAAATGAAACAATGCCAATTTCTACTGCT +ACAATTATGCAAAAAGAAATTCCAAATTCCCGTCTTGTTACTACGCCTAACGGTGGACAC +CATCATATGGTTGATAATCCTGACGTTTATTATAAGCACTTAGCTGACTTCATTCGTGAA +GTTGAAAATGGTAATTTTAAGGGTGAATAAAGATAAAAAGTCCATGCGATAGTTTCTATA +TTGAAGCTCTAGCATGGACTTTTTAAGCATATAAATCGCTTTCATGACATATATTCTATA +TAATTAATAACAAATATAGTTTTATTATTTTAAGGAGAAAAGCCATGAAGAGAAAAATAA +GTAAACTACTTGCAGCGACGGCTTTAATGGGGACACTAGGAATTTCAGTAGTGGCTTGCG +GCAATAATGATTCAACCAGTAAAACTAAACAGTCAAGTGTAAAGAAGACTGCTAAAGATA +AGGCAAGTAATACAAAAAATGCAACAAGAAAAAGTCAGATTAAACTTAGTCAAACTGAAG +CAATAAATAAGTTTGATAAAAAGTATTCTGATAAAAAATTAAAAGAAATAGATCTAAAAT +TAGATGGTAATAAATATCTTTATGAAATTACAGGATTTGATAAGGATAAAGAATATGAAA +TGACCATCAATGCCACAAGTGGAAAAGAAGTCAAATCTAGTTCCGAAAAATTAGACTTAG +ATGAACGTTTGCAAAAGGGACTTGATTTAGATAAAGTGATATCTCGTGATCAAGCAAGTG +AAATAGCAGAAAAAGAAGTTAAAAATAGTACTGCTAAAGAATGGACTCTAAAAATGGATC +AAGATAAAGCCATTTGGGATGTAACTGTTGAATCAGGTTCAAGCAAACATGAAGTAGAAA +TTGATGCTATTTCTAAGAAAGTTATTAAATCAGAAAAAGATGATTAAACAAAAAAATTCA +GAGTTTTTAAGCTCTGAATTTTTTTATTAGTATTTTTATAGAAATTAATTTAATAGGTGA +TTAGCTTGTCAGCGTTCATCGTTTCATCGTCTAACTTATAAGTCTTTAATTCAAAATCAT +TTTCAAAGTTGCAGTAATAAGTCTTAGTAGCAGCAGAGTAGCAAGCGGTGTAGACAGTGT +ATTCATCGCTACCCAGTTTGTTAACTACGCTACCCTTAATCATCGCAACAGACTTTAAGA +TGTTAAAGAACTTAGCCACGTTAGCTTTTTCACCCTTAACAGTTGGGTAATTAACATTTA +AGTAAGCTGCTTTAACAAAGCGATCTGCTGGAATGCTATCACCTGGTAAACCTAAGCTGC +CAGTGCCAACGCCCCATGGAGCAACTTTTTGACCGTTCCAGCTTTGAGCTGTAGCATCAT +GTGGATCTAAACCAGTATAGTTGCCAAGGTTAGTAAGATGCCAGTTAAAGTCTGGACTGT +TAGTTAAAACGCCAAGCTTATCATCGAAGACTTTCATTCCGTATTGTTTTGAAACTTCAA +CAATAATAGCTTCATCTTTGTCACTAATAATCCAGTGAAGAGGAGCAACTGCAAATGATG +AGTTAAGAGCTTCATTAACTAAGTTAACGTTCTTTAAAGCTTCTTTTACATCGCTAACTT +TAGTAAAGTTCTGGGTGACCCAGAGCATAATTTCGTAAGAAGCTAAATTGATTTTTCCAT +CAATTGGACCATCGCTAAATTTAGCAAAATGTGGGAAGTTTAGACCAGCAATTCCCAAAC +CATCTTCATTGAAACAATCAAAGTAAGAAGGATAGCCATCAACTACAATTCCCATACCGA +TAACGGCTTTTTTAGTAGTCGTATTATCTAAAAACTTATATGGAAGAGGATAGTTGCGTG +GTGTGATAATTACACCTTCACCATAATCTTGTCCAACATCTAAGTTACGTCCAAAGTAGA +GATTTCCTTGATCATCTGTAAATCTTAAACCAGTACACATTTTAAAATCCTCCTTAACTA +TTTAGCAGTAGTTTTAGCTTCTTGCTGAATGATGTATTTCTTTTGACTTCTCTTAATTTG +AATAACGGCAAATACTAAGATGATGAAACCAATTGCTACAAAGGCAATCATGAAGTACCA +AGCAACTGTAAAGCTGGCTTTTCCTTTAATAATTCCGAATAATGGAGCACCAATAGCATA +TCCAATTGCATAAGCTAAACCCACATATCCTAGCATTACACCTTGATCTCTTGAACCAAA +GAGATCCTTGGCCATAAATGCAGGACCAGACATGTAGCTAAATACAGCTAAACCACAGAA +AATAGCATAAGCAACACCTGCGATTTTACTTACGTGAGCACCATAAGGTTGAAAACTGAT +CAAAACCATCATTAAGATGGATAAAACATACATGCAGCCGGCATAAGTCATTGATTTGGC +AGTACCAAATTTATCAAATAAGAAGCCACCGGAAACATTTCCAATTAAACAACCAACACC +GTACATTGATCCAATGAGGCCAACATCTGTTAGTGAAAGCTTAGTATCAAGAAAGGCCGC +ATAATCTTCATTTAAGGAAGCTAGGCCTAAGCCAATAATTAGAAAACCAAGACTGAAAAT +CCAGAACCATTTCATTTGTAGAACTTGTTTACTAGTCCAGCCCTTAAATTCTTTCGCCTT +GGCCAATTCTGCTTCTTTTTTGCTTTCAGCTAATTCAGCATCAGAAACGACGATTTCATC +TTTCTTAGGAGTTCTAATAAAGCAAGCAATAACTATACCAATTACTAATAGAGCAACTGC +GAAGATGAAAAATGGTGCCATTGAGGTCAAGTGACCTGTCTTAGTGTTACCAGTCATAAA +GTGCTTTAAAATAGCTTGCGTTGCAGGTTGTAAGAAAATATTACCAATCGAACCACCGCA +GAAAGCAATTCCTAAAGCAGCTCCACGTCCTTTTGCAGGGAACCAGTGGTTAATAACCCA +AGGAACACCTTGGCCGGAATAAAAAGTTGAACCGATCATACAAATGATAGCAGCAATATA +GAATCCTGGCAGTTTTGTGCTAATTCCAAAGATTATATAGGCAATAGCAGAAAGGCCAAT +ACCGATTAAATACATAAGTCTGAAATTAACTTTCTCTAATGCTTTACCAATAAATGGCGA +AGCAACTGAAGCAAAAACTGCCCCAAACGTAAAGATTAATGTATACGATGCTAAGGTAAA +GTGAAAAGTATTAACTAACGGGTGGATAAACAAAGGTTGAATATTTTGAGCAATCCCATA +AGGAATGGCTTGGGTTAACATACAAAGGAAAACCATAAAATATTTATAGCCTTTGCTAAC +GACTTTATCTTTAGTAGCGACATCAGTAGACATGTAATTACCTTCTTTCTAAACTAATTA +CTTATCTAAAATAATATTTGGTTTGCTGACGACAATATTTTTTTGCATTTGCTTAATTTT +AATAGCTGCAAAAATTAATAATATAAAACCAATTGCTACAAAGAAAATAGTGCAGCACCA +AGCAGTTGTGAAACTGGTAGCGCCTTTTATGACGCCAAATAGTGGGGCGCCAATTGCAAA +ACCAACAGCATAGGCCAGGCTAATGTAACCTAAGTTAACACCTTGAGCTTTTGCACCAAA +TAAACTTTTCGACATAAATGCAGGACCAGACATATAGCTAAAGACAGATAGACCACTTGT +AAAGGCCCAACCCATACCAGCGTAGAAATTAATGCGATCGCCGTAAGGATGAAGGCTAAT +AAAGATCATCATTAGGATAGCTATAATTAACATTATTCCTGCATATGCCATTGATTTTGC +TGTACCGAACTTATCAAATAAATAACCCCCAGAAATGTTTCCGATGATGCCAGCAAGTCC +AAAGACTGAGCCAATTATTCCGACCTCAGTTAAGGATAATTTAGTATCAAGAAAGGCCGC +ATAGTCTTCATTTAATGAGGCTAGGCCTAAACCGATAATTAAAAATCCAATGCTGAAAAT +CCAGAACCATTTCATGTGTAGAACTTGTTTGCCACTCCAGCCTTGAAATTCATGGGCTTG +CTTTTGAGCAGCTTCGTGTCGGTTTTGTTCAACTTCTTGAGCAGAAGCTAAAATTTCATC +TTTCTTTGGTACCCTAATAAAAGCTGCAATAATCAATCCAACTACTAATAGAGCAACAGC +AAAAATAAAGAAAGGTGTCATCGAAGTTAAATGACCAGTCTTGGTATTACCAGTCATGAA +ATGCTTTAAAATTTCCTGGGTAATAGGTTGCAAGAAAATATCACCAATCGATCCACCACA +AAATGCTAGACCTAAAGCAACACCGCGTCCTTTAAATGGAAACCAGTGATTGATAATCCA +AGGGACACCCTGACCAGAATAAAAGGTTGAACCAACCATACAAATAATTCCAGCCAAATA +AAAGCTAGGTAGCTTTGTACTAATTCCAAAAATTACATATGCTCCAGCTGAAAGACAAAT +ACCTATCAGATATAAAATCTTAAAGTTTACTTTTTGCAAAGCCTTACCAACTAATGGTGA +AACTAAAGACCCTACAACAGCCCCAAAGGTAAAAATTAATGTATAAGAAGCTAATGTAAA +ATGAAAAGTATTAACCAGAGGGTGAACAAATAAAGGCTGAATTAATTGGGCGATTCCATA +TGGAACAGCTTGAGTTAACATGCAGAGAAATACCATAAAGTATTTGTAGCCTCGACTGAC +CACCTTTTGTCTATCGTTTGACATCGTATCTCTCTTCTTTCTGACAAAATACTATTAGCT +TGATAAGTTATATATTATTAAACACGTGAGCTTTGACTATAGCCACCTTTATTGTGGCTC +ACTAAATCGAGTAAATCAAAAAATAACGACCTATAATGTTATTTAATAGGCATAATAAAA +AGCCTATCTTTTTGAGATAGACTTTAGAAAATTATTTTTTTACATATCCTGCGATGTCAT +CATCGTCAAAATCATAGGTTAACTTCTTATCATATGCTTTTTCATATGCTTTTTTCTTTT +GTAGATAATCAATATCACGCATTCTTTTGGCATTTTGATCAACTGTTAGACGGTCAACTG +GATAAATTGTTGGTAAAACGTGAAGAATGTAGCGAGTTTTGTGGAATTCGCTGCTATTTT +TCTCCATTTCAGGGAGGTCCTGAATTTCTACAAAACAGGAAATAATTGGAACGTTGGCCT +TAGCTGCGTAGTAGTAGGCGCCCTTTTGAAGAGGGCGAGGTTTTCGATAGTTATACCACA +TTTCTTGCTCCGGATAGATCAAGACCCAGCCCTTTTTATCAAAGACTTTTTGCAAATGCT +TAGGAAATTCACGCCCCAAATATTGGTAGCTTTGAACAATTGGAATTGCATTAATGTTGT +TCATTAGAAAACCAAAGAAACCGGGTAATTTTAAATTAGAATCTTCAATCACAACATAAA +GTTTTTTGTGACATTTGTTGGCTAATTTCTTGATTGGAAGACTATCGGTTTGCTTGAAGT +GATTACTGGTAATAATTGCGCCATATTTAGGTCCTTGAGCCCGCTTCTTATTTTTGATTT +GCGTGTGAGCCATCAAAATTAAAGTGTCAATATTAATAATACCTTGAGCAATAATATTCT +TTAGTTTATGACTAAACTTCTTTTGTTGTTTCCAATAATCGTTAACGAGTTTGATTCTTT +CGCTTTTAGTCAGGATAGGATCGCCTAATTCGACTTTGGCATGCATTTTTCCCTTTTTAA +CGGCTGTTTTGATGTTTTCGATAACTTGCGCGCGATTTTCACCAATAATCATAATTTCAC +CCTTTCGCCTTTTCTTTTAATTTGAGCCCAGTTATTTGGATCAAGTGCCGTGCTCGCCTC +TTTTTCAAGCATATGATCTAATTTTGCCCGATTTTCAGCTCGCTCTTGATTAGTATAGTT +GTCTAACTCTGATTTTAATTCCTCATAAAAAGGTGTGCTAGAGGCATATTTCCAGAAATA +ATCATTATATTGGACGTCTTTAAAGTGCCAAGGTTTGAAAAATAAATTATAGTGAATTAG +CCCTGGTTCAGGAATGGGGGCGGTATTTTCATTAGGCATTGCGTCCCAACGTGGATCAAG +ATGTAAAATTCTTCCGTCACAAATCTCATTTAGGTAGTCTTGATCAGGAGCAATACAATC +AAAGTGATATTGCTCAAGTAAATGCATAAAGTGATCGATAAAGTGTTCATCCCTAAAAGC +TTTAGAATTTAAGACTAACATCCCAGAATTAATATATTTCTTTGGATCGAGAGCTAGAAC +TTCTTTAATATATTTCACCATTTTATCTACATATTGAATCGATGAATCTGTACAAGCGGC +GAATAGATTATTACCTAAGTCGTTATTATACAGTTTAGCAATGTCATCAACGACGACTGT +GTCACTATCAATGTAGATTGCTTTATCATATTGTGGAAATAAGTCAGGGATAAAGAGACG +ATAAAAAATAGACATTGTGAAAAAGTCTGCCCGGAGATAGTTTTCTTTACGATTTTGGAT +CGGTTTTACCAGTTTTTCATCAATATGGAAAAATTTAACGTGCACATGGTCATTAGCAAA +AGCTGCAAGTTGCTTTTGGTGTTTTTCAGACAGGTCTTGATTTAGAAAAGTAACTGTGTA +ATCTTTTTGCGGATTAGCATTTTTAACCAAGGAGTTTAGCGAAACTGCCGCATACTTGGC +AAAATCATCACTAATACTATAAAAAACAGGGATAGTCATTTTTAAGCCTCTTTATATTTT +TACAGATGTCGTTTTCTTAACTTTCGCCCAAGTTACATCTTCATCTTTGATTTCATCAAC +TTTTTTAATCATCCAATCAAGGTCGGCACGTGCTTTTTTACGGTCTTCATCGGTGAAGTT +AGCTAATTGTTCTTTTAGTTCTCCATAATATGGAGACTTTTTAGCAACATCCCAGAAGTA +TTTGACATATTGAACATCTGCGAAATGCCAAGGCTTAAAAAAGAGATTATAATGGACAAT +TTTAGGATTTTTAATTTCATCCATGTGTTCATTGGGCATTGCATCCCACTCAAGGGGCAA +ATGATAAATTTTGTCTTCACAAATTTCGTTCATATAAGCTTGATCAGGATCGATATTATC +GAAATGATACTTTTCAATTAATGAATAGAATTTATCAACAAATTTCTTATCTCTAAAGGC +TTTCATATTGAAGAGAATAACGCCGTTATTGATATATTTTTCTGGTGGAAAAATACCTTG +GCATTCCTTGATATAAACTTGAAGTGGCTTAATAAATCTGATTGACATGTCTGGTACACT +AGCAAACATGTTGTCGCCGATTTCGGTGTTGTAGAGTTCAGCAATGTCAGTACAAATAAT +AGTATCAGCGTCTAAATAAACAGCCTTATCATACTGAGGAAAAAGGTTAGGGATAAATAA +ACGGTAGAAAATAGACATCGTGAAAAATTGTGCTCGTAAGTAATTCTCTTCACTGTTGTG +AATCGGAGCTACCATTTCATCATCAATATGAAAAATATTAACATGGACATTTTTAGTAGA +AAGATCTTCTAAATCTTTTTTGTGCTTATCGCTAATGTTTTGAACTAAAAGAGTAATGGT +ATAGTCTTTGTTTTGATCCACATGGTCAATTAGTGACTGAATTGAAACGGCCGCGTAGGG +TGTGTAGTTGTCGCTAATTGTGTAAAAAACAGGTATAGTCATCAAAAAATAAACTCCTTA +TATATTAAATGTTTCTAGGCCCGCTTTAATTGTGACCTAAGCTCTACGTACTGCTTAAGT +AAATCATCATATTCATGTAATTTCAAGACGCTGTGCACTCGCTCTATATCCCATGGTTTA +ACAGTTAAAGTATGGAAGTATGGCTTAAAACGAAAGCTAGTCGTGAAATGTTGAATTTTG +GTATCTGGTCGTAAACGATATTGTTCATTGTAGCAACGTGGAGCAATTTTCTTCTCAGTA +GCAAGTTTATTTAGAGCAGACTGATCTGGAAGAAGCATTTTTTTATCACTCATTTTTTCT +CTTACTTTGGCAAATAATTGTGTTTTTTTAATTTCAGCCATGTTAAGAAGCAAGACGCCG +GAGTTTAGGTAATCAAAAGTCTTGTGGGTATGGATATTGTGAAAGAAGAACCGACCCCAG +AAGTCTAAAACACCGACTAGTTCAATATTAGCTAGATCTTGATTATAAAAGTCGCTAATA +TCTTTACGCACAATAATATCATCGTCTAAATATAAGATACGATCAGGAATCTGTGGTAGT +TGGTCCGCAAAGAGACGCAGCATTGCATATGGCGTAAAACGAGTTTCCATATTTGCGCGT +GGAGGTTCTTTTTTAAATAATTCAGTACAATCGATTAATTCAGCAGTATTGTTAGGATCC +TTTTCTTTAAGCAATTTTTTAATTAGATTAAAAGCTTGTTGGTTAAAAGCTGTGTATCCC +TCTGCGTGCATCGTCAAAATATAAAAGTGCAGGGGAGTTGATGTGTTTTTTAGTAAAGAC +AGCGTTGTAATTAAAATACCATCTTCGGCATGAGAATCGCCGCAAAACATAATATTCATT +TTTGTCCTCCAATTTTTCGCTGCTGGTAGCGATAATATTCACAGTTACTCAATTTTGAAC +GGTTCTTCAATTGAGTAAATATTTGATCATGTAGCTTTTTTTGTTGCAGTTTTTTAGGTA +GAGAAGTATCTGGAAAAAAGGGGCCATCAATATAAACAGTAATTCTAGGGTGCTTGCTAT +ATTTTCTTTTCTGGTAGGTCGTTGTCATAACAAAGCTTGGCTTAGTTGTTTGAACTGGAA +AGTTAAAACTAGTTGCTGGAAATGGACGAATATCAGTATAGTAGGGCCAAACATGAGCTT +CTGGATAGATAATGATATGGGCGTTTTCCTTAATTAGAGTATTAACTGCTTTAAGCAGAT +GAATTGCTTGCTTAATATTTTTGCCAACTGGTAATCCTCCATAAGGCAAGAGAATTTTGC +CAATTATTGGAATTCCCCAGTTTGCTTGATTAGCAATTGCATAATAATCTTTCCAGCCAA +AGAGGGTTAAAGGCATAAAGACGTCGTTAACCATTTGAGTGTGATTTGCATAGACAAAAT +AGCCCTGATGCTTATATTTAAATAACTTATCTCTCCCTATTACTTTGACATGCATTCCGC +CATAAGTAAAAAGATAAGCAAAGCCAGTAGCTAAAAAACGCATTAAATAGTTAAGAGGTG +TGCGTTTAATTATTTGATAATCGCTAGGTAAGGTAAAATCTTGTTGCTTACTTTTGACAA +TGTCGTCAGTTAAAGAATTATAGTAAATAACATTACGTTGAGATGGTTTAGTATTCATGA +TATTAATTATAAAAGAAGTTTAAAAACAAAAAAAGACATCGCTTCAGCGATGTCTTATAA +AATATGGGTGGTCAGGGGATCGAACCCTGGACCCACGGATTAAGAGTCCGTTGCTCTGCC +AGCTGAGCTAACCACCCAATTGGATAACCAACAAATAGTATTATGCCAGGAAAAGAACTA +AAGTGCAAGCTTTTTTTGCATAAGTTTAAAAAAAGTGTGGAAAACAGTATAATATAGTTA +TTAATATTGAGGAGGCATCATTTATGCCAAAAAAGATTTTAGTCGTTGACGATGAAAAAC +CAATTTCTGACATTATTAAATTTAACTTGACTAAGGAAGGCTTTGATGTCGACACTGCTT +ATGACGGCGAAGAAGCGGTTAAAAAAGTTGATGAATATGATCCAGACTTGATGATTCTAG +ATCTAATGTTGCCTAAAAAAGATGGCCTAGAAGTTGCACGGGAAGTACGTCAAACTCACG +ATATGCCAATTATTATGGTAACTGCGAAAGATACTGAAATTGATAAGGTATTAGGACTCG +AGATGGGTGCAGATGACTATGTTACTAAGCCTTTCTCTAATCGAGAACTAGTTGCTAGAG +TAAAGGCAAACCTACGTAGACGTGATTTAACCCAAAAAGCTACTGAAGATGATGAAGATA +AAAATATTACTATTAGCAATTTGGTAATTATGCCTGAAGCCTATATGGTCGAAAAAAATG +GCGAAAAAATTGAATTAACACATCGTGAATTTGAATTACTTCATTATTTAGCTCAGCATA +TGGGTCAAGTGATGACTAGGGAACACTTGCTGCAAACCGTTTGGGGCTATGATTACTTCG +GGGATGTTAGAACTGTTGACGTAACTGTTAGACGTTTACGTGAAAAAATCGAAGATAATC +CGAGTTCGCCAACAATTTTAGTTACACGGCGTGGAGTAGGATATTACGTTAAAAACCCAT +CAGATGAATAAGGATCAAAGTCACTAGTAATAGTGGCTTTTTTTGCAATTAGGACAAATG +AAGAAAATAAAAAGTGTATTAAATTCTATCAATTTTAAAATCGCAGTAATTTTTATGCTG +CTTTTATTAGCAACGATTGAAGTGGTTGGTGCGTCTTTTACAAGGCAACTCGAACAAAAC +TCAATTCAAAATTTTGAATCATCAATTCAAGTTCCAAATATTATTACTAACCAAATTTCT +AGCCAATTAAGTAGGGCTAATTCTAAAAAGGCTAATCAGCAGTTAAGCCAGATTATCTCA +AACTATAATTTGGGCGATATTAGCCAATTAATGGTAGTTGATAATAAGGGAGTAATTAGA +GCTGTTTCAAATGTGAATGATCAAAACCGTATTGGACAGCGAACTAGTAATGTAGATATC +AAGAGTGTACTCTCAAATGGAAAGCAAGTTTCAAAAGTTATTAATGACAATGGAAATTAT +ATGGTGCAAATTTCACCTTTGACTTCTGCTAATGGAACTAATACTCCAGTAGGAGCAATT +TATGTTCGCGCTAGCTTACAAGGTGTCTTTAATAACTTGAGACAGGTATCGATTTACTTC +CTGATTGCTTCGCTGATTGCAGCTGTTTTGGGAGCAATTGTAGCCTTGGTTATTTCCCGG +GCAATAACTAGACCTATTGAAGAGATGCGTAAACAAGCATTAAGAGTTGCAAATGGAGAC +TATTCTGGGCATGTTAGAGTTTATGCGCAAGATGAGTTAGGTCAACTAGCCGAGGCTTTC +AATACTTTATCAGTAAGAATTGAAAGAACTCAAGAAATTTCAGACAGTGAGCGAAGACGA +CTAGATAATGTCTTAACACACATGACAGATGGTGTTATTGCAACAGACAGACATGGAAAT +ATCACAATTATTAATGAAACAGCACTCGATTTTCTAGGAAAAACTGAAAAAGACGTTATT +GGAAAGCCAATAACTAATTTACTTGGATTAAAAGATGTTACTATTCAGGACTTATTAAGT +ACTCAGCAAGAATTAGTTGTTCGAGTTAATGATAATACGCGCGATGAAATGATTTTGCAT +GCTAACTTTTCTTTGATCCAGCGTGTAACGGGGTTTGTTTCCGGCTTAGTTTGCGTGCTT +CATGACATTACCCAACAACAAAAAAATGAACGTGAACAACAACAATTCGTTTCAAATGTT +TCACATGAGCTTAGGACGCCGTTAACAAGTTTAAGAGCATATGTTGAGGCATTGAATGAT +GGGGCATGGAAAGATCCAAATATTGCACCACAATTTCTGCATGTTATTCAAGATGAAACT +GAGCGAATGATTCGAATGATCAATGATTTGCTTAGCTTATCTAGAATGGATCGAGGAGTA +GCGAGGATGGATTTAGAATGGGTTAACTTAAATGACTTTGTTAACCACGTTTTAAATCGT +TTTGATATGATGCTCAAGTCTGATTCTGATAAGATGCACAAAAAGAAATATACGATTAAG +CGTGAATTTCCTCATCAAGCCTTATGGGTTGAAATTGATACTGACAAGATGATGCAAGTA +ATTGACAATATCATGAACAATGCTATCAAGTATTCACCTGACGGTGGCGTCATCACAGTT +CGCCTATTGCAGGCTCAAAAGCACGTTATCTTAAGTATTTCTGATCAAGGATTAGGAATT +CCAAGAAAAGATTTAAACAAAATCTTTGATAGATTTTATCGTGTTGATAAGGCGCGTTCT +CGTAAACAAGGTGGAACTGGTCTAGGACTGGCTATTTCTAAAGAAATAGTAGAAGCACAC +CATGGTCGAATTTGGGCAGATAGTGCTGAAGGAGCCGGCTCAACTTTCTACATTTCTTTG +CCATATGAAGCAATTAGCGAGGAAGGAGAAAACTGGGATGAGGTTTAAAGATAAGTTTTC +TAGAATTGCATTACGAGTTAGCTTAATTGCAATGGTTGCACTATCAATTATCTTGTCAGC +TATTATTTGGGGTTCTGATGCACGATTCTCAAGGATTGAAGAGACATCTAATCAAACTCA +AATTAAAGATTTAGGCCAACGTTCTTTAAGAGATATTTATTTACCTACTCAAACTTTTTA +TTTTAAAGATAAGCAAATGTATCAGGTTTACGATACTAAAAATAATCTTCCGCTAGAGTT +TTCAAAATTAACTCAATCAGTTAAGCCACTATTGCCAATCAGAGTTTGGTCTAGTCAAAG +TAAATATGAAAAGTTATTAAAAAATCCTGACTATGTTCAGTTAACATATCCGGATCAGAT +AACGATTTCCTTATTCTTAACTAATGTAAGAAAAACTGATAGTCGGGAATTTAATCGCTT +TTTTGTGCCAGCTCGATCCAGTAAGTATATTTATTTAGGCAATGACGAAAACTATACTAT +TTATCGAGTTCGTCTAAATGATGTATCGTTTGACAATTTGGTGGAACATATTCAAAGCGC +TAAAACACAGATGCCAGTTACGCTTCAAAAAGTTCATGATGATTATTTACTTTTCTATGA +AAAGGATTTAAGTTTACCAACATACAGCTATTTAACTAATGAAGAGTCAGATTCATACTT +TGTGTATCGCTTGTTAGGCTCTAATAATCCTACCCAGCATAGTTCTGGCGAAAGCATTAC +ATATTCTAATGGAGTATACGAGCGGTTGATTGCGGCAAAACATACTCATAATTATGAATA +TGTTGATTATCAACAGGATCAGGTTCCGAAGACTATTAGTCGAAAATTAAATGATAGTTT +ATATTTTGTTCGTAAAATTGGATTATCTGAGCCAGATTTAAGGTTCTTTGATGCTGATAA +TAATACAGTTATTTATCAAAATTATGTTGAAGAATATCCGATCTTTTTACCAGGGAAATA +TAAGATGCGGGCGCAAGTTAAGTTTGCTTCTAATGGAATGACAATTAACTTTAATAGCTT +AGACTTGCAGATTCCAATTCCAACTAATGGTGAAAAGAAGACGCTTATTCCGACTAATGA +AGCGATGGATGAATTATATCAAAAGGGATATCACCAAAAAGATATTGAGCGCATTGTTAT +TGGCTATACAGCTAAGTCTGATAATAGTAAAAATAAAAAATTAGTAGATCTAGAACCAGC +CTATTATGTAAAGATTAATAAGCAGTGGAAGACCCTAGATGAATGGCTAAATATTAATAA +TCAAATTACAAATTCTAGAAAGGAGGGGCTAGTAGATGGACTTTAAGAGAATTGAATGGA +TTTTTTTAGTCGTATTTATTGGAATTAATATCTTTTTGGGTATTGAGCTTTGGCAAACAC +CGACGCTTTTATCTGCTGGCTCTACTCCAATTCAAACTGATATTAAAAGCGAAACGAGTG +CAGATCAGATAACTATCCCTAAAGTTGATGATAAGCAAGATGATGGGTATTATTTAGCTA +CCAAAGTGGACAATTCTTGGACAAAAAAAGCAGCAGCACAGGTAAATCAACAAGTAGAAA +CTAATACAAGTGAAAATAGTATTTCAGTTAATTTGAACAAACCAATTACTCTGTCGAAAG +ATTCCAAAAAAGCCCTGAAAGAAATTATTCACTTTAAAAATAATAGTCAAAACGTGTATG +AGGGTAAAGATTATGTCTATCTTTCAGAATTATCCGAGGGAGATGATTATGTCTTTAATC +AGAAAACAAAGTATGGAGAAGTTTTTGCAGCAACTGCTCTATTGCACATTATTGTTAAAA +ATAATCAAATTGTATCGTATTCGCAACGCTACGTTAGCAATTTGAATCCAGTACGTGAAA +GACAAAATACAATTAGTTCAAAGGCTGCTGTAAATTCTTTATACACTTATAGTGAATTGC +CAAATAATTCTAAGATTATCTGGTTAAAGCAAGTTTATACTAAATTGATCACTGTTCGAG +GGAGCGAAATTTATATTCCTACATGGGTAGCTGCGATTGAAAACAATAATTCTCATACGA +TGACCCTTAAGCGCGTAAACGCATTTACAGGGACAATTATCCAAAATAATATTGCTGCTG +ATGATTCAAAGGAGTAGAGAAATTTGAAAGTATCAGTTTTATCAAGTGGTTCAACAGGTA +ATTCTACATTGATTGAGACTCCGCAGCATAAAATCTTAATGGATGCAGGTTTATCTGGAA +AGAAAACTAAGGAATTACTAGCACAGGTAGGCGTTGATATTAAAGATATTGATATGGTAT +TTATTAGCCATGATCATACTGACCATTCTGGTGGATTAGGCGTATTGATGAGAAGATATC +CTAAAATTTCAGCCTATGCGAATTCAGGTACTTGGAATTATTTAATTGAAAGCAATAAGA +TCGGTAAATTACCTGTTGAACAAATTAATACGTTTGAGTCTGGTATAACTAAGACTTTTG +GTGATTTGGACGTGACAAGTTTTGCGACGAGTCATGATGCGGCGCAGCCACAATATTATG +TCTTTACAAGTGGTGGGAAAAGATTTGCGTGTTTAACAGATACTGGTTATGTTTCAAGTA +CTGTTAAAGGTGAAATAAAGGATGCAGATGGCTATTTAATGGAATTTAACTATGACGATA +TGATGCTTAGAAATGGCCCTTATTCTTGGTCTTTGAAACATCGAATTATGTCTGATGTTG +GACATCTTTCAAATGATCAAGCAGCAGACACCTTACTTGATGTAGTTTCTCCCAAAACTA +AACATATTTTTTTAGCGCATCGCAGCCAGCATAATAATACTCAGTATTTAGCGCATGAAA +CGGCTGAAGACTTATTGGTAGCTGGGGATGCAAATTTACCAGCTGATGTTAAAATTATTG +ATACTAATCCAATGCAAGCTGGTTCTTTAACGAAAATATAAATTTTTAGCAAATAATTTG +ACTTGAATTCATAAAAAATTCAAAATTTGTGATTAGGCTAGAAATGAAGAGAAAAGGAGA +AATTTTTATGGCAGAAAATAATACTAAACACCCTACTAAGCATAATGCTTTAATTAAAAC +TGGTATTGTTGGAGTGGTTGCCGGACTTCTTGGGGGTGGAGTTGCTTATGCAGGTTTGTC +TCAAGTTAATGGGCAAAATGCACCACAAACAAGTGTTGTTCCAACGACAAAAGTTGAAAA +GTCTAGCAGTAAAAATAGTAGCCAGATGACTAATGCCTTTAATACGGTAAAGAAATCAGT +TGTTTCTGTTGTTAACTTAAAGAGACAAAGTTCATCTTCAAGTAGTGATCCGTTTGGTAT +TTTTGGCTCTGACAGTTCTAGTTCTTCAAAGAAAAATTCTAAGTCTGATTTGGAAACTTA +CAGCGAAGGTTCAGGTGTTATTTACATGAAGTCTAATGGCAAAGGCTACATTGTAACTAA +TAACCACGTTGTATCTGGAAGTGATGAGATTCAGGTCATTTTAAGCAATGGTAAAAAGGT +AACTGCAAAAAAAGTAGGAACTGATTCAGAAACAGATTTAGCTGTTTTAACCATTGATGG +AAAATATGTTACTCAAACTGCGCAATTTGGCTCATCTAAGAATTTAGAGCCAGGACAGCA +AGTTATTGCTGTTGGTTCACCTTTAGGTAGTGAATATGCTACAAGTGTAACTCAAGGTAT +TATTTCAGCTAAGAATAGAACTGTTGATGTAACTAATTCTGCTGGACAGGTTACTAACCA +GGCAACCGTTATCCAGACTGATGCTGCTATTAATCCGGGTAACTCAGGTGGTCCACTTGT +GAATATGTCTGGTCAAGTAATTGGGATTAACTCAATGAAATTATCCTCTTCTAGTGATGG +TACAGCTGTTGAGGGAATGGGATTTGCAATTCCAAGTGATGAAGTTGTTTCTATTATTAA +TCAATTAGTTAAGAATGGAAAAATCATTCGTCCAAAATTAGGTGTAAGAATTGTATCAGT +TGACGAATTAACAGAGTATGGCCGCAAGAAACTTGGTTTGCCTGATAGTGTTAAATCTGG +TGTCTATGTAGCTAGCGTTACTAAGAATGGTAGTGCAGATAAAGTAGGAATTAAGTCACA +TGATGTAATCACCAAGATTGATGGTAAAGATGTTGATAGTGTTGTTTCACTACATACTGC +GCTTTATACACATAAAGTTGGTGATACAGTAACACTTCAAGTCGTTAGAGACGGTAAGTC +ACAGAATATCAAAGTTACTTTAAGTTAAGTGAATATTATTAAATAGAGATAAAAAAGCTA +TGATTATGCGTTTTGTAAGCGGTGATCATAGCTTTTTCTTAATATTTAAAATGTGAAACG +TAATAAAGTTCGTCTTTTTGAATTGTCAAGAACTGAAAGTTATCCAGAAGATTGTGGGAA +AATGTGGAAAACTCTGTGGATTGTGCATAACTCGGTCAAAGTAAAAATAAATAGTGTGGA +AAACCTGTGAATTGTGTGTAAATAAAATTCTAAAATTAGCTGTTCTGAACGGCTGTTCAA +GTTGATTATGTAGGGAATAGACCCAAAAGTTTGGAAAATTAGTAGTATTTATAAAAATGT +GGAATAAATTTGCTAAAAAAGTTATCCACTTTTTTAGAAACCACAATTTATTGATAATTA +ATTAAGTCAACCACTAAATATAGTTTTCGATAATCCACAGCTAGGGGAAAAGTGATGTGG +ACGCTATGTCAATTAGGGGATAAAATTAAAAACTAGATGCGTGAAAGGTTAGATAGTTAC +AATGAATATCAAAATTGTTTGTGTAGGTAAACTAAAAGAAAAGTATTTTAAGGATGGTAT +TGCTGAATACGTAAAGAGAATGGGGCGTTTTGCTAAAGTAAAAATAATTCAAGTACCAGA +CGAAAAAGCCCCCGAAAAGCTAAGTCCTGCTGAAATGGAGCAGGTCAAAGAAATCGAGGG +AAAAAGAATTCTAGATAAGATAAAAGATAAAGAATATGTTTATGTAACGGCAATTAAGGG +TAAAGAAAGAACTAGCGAAGACTTTGCTAAAGAGCTAGCTGACTTAACTACTTATGGACA +TTCAGACATTACCTTTGTGATTGGAGGAAGTTTAGGAACAAGTAATGCAGTAAACAAACG +AGCTGATGATCTAATTAGTTTTGGTAAATTTACAATGCCGCACCAATTAATGCGGTTAGT +TTTAGTTGAACAAATTTACCGTGCCTTTATGATTAATAGCGGTAGTCCGTATCATAAATA +ATTATTTTATAAAAAATAAGATTGGTTGCAGTAGGATTAAAATACTACTAAAAATAAGCC +AGTCTTTTTTTGTTAGGGCAATTGGCACGATGACATTTCGTGGTTTATCTTCACGATACC +CATGTGAAATCATTCCTTCAGCAAGGCTGTTTGACCAAGAAATAGCTGCTAAAATCGCTT +TAAAATATAATGTTGGCGAATAAAAAGAGAGATGATAATGCCTCATGTCTCCAACTAAGC +GAATGCGATTAACTTCAGCATGGATTTTAGGAAGTACGTTGAATGCGGCTAAAGTCCCAT +AGGCGAATTTACTCGGTAGTTTAAAATTTTGCTCTAATGAGCGAGCGAGTGAAAGAATAC +TAGTGGTTTCAGTAAATGTAGCTCCTAAAAATACATAAGCGTAAATTCGTGTAAATAAAA +CGCTGGCATGGTAGAGACTATGACCTGGAGTAAAGTAATAGATCGTAATAAAGACTACTA +AAGCTGCAAATAATGGGATTAAAAAGAGCAAGAGTAGTTTTTTAGGGCTGAGATGCTTAA +ACGTGAGATAGCCAAGACTAGCTATAATAATTATTATATTTCCAACCAAATTAGGAATAA +GCGATATTTCAAGTGAAATAATTAAAATCAAAAATAGTTTCAATCCGGGATTCATGTTGT +TTACCTTTTTAATTAGTGTAGAAAAGTTTCTGATCTTTGATAGTTAAGTGATAGTCACAG +AGATGCTGAACTTGTGCAAGTTGGTGACTAATGACAAGCAAGGTGTGGTTAGAATTAAGA +AAGTACTTTTTAAGCAAATAAATGACCTCTGAGACGCTTTCTTGATCTAAACCACTAAAT +GGTTCATCTAGAAGTAAAACCTCGGGATAAGCCATTAACATGACAAGAATTTGCAATTTT +CTTTTTTGTCCTCCAGATAGAGAATAAACAACTTGTTCGCCTAAATTTTCTAAGTTGAGT +TCAGTCAGCAGGTGCTTGAGCTCAGTTTTATTCAAGGAAGGGTTTTGATTGTGTTTTAGG +CTAAAGTCTAATTCTTCCTTTACTGTAACATTTAAGAATTGGTCATCTGGATTTTGAAAA +AATTGTCCTACTTTTGCTAGATAGGGATGCCGGCGCCATTTCTTAATGTCTTTACTTTTA +AATATTAATTGTCCTTGATACGGAATAATTTTGGTTAGAGCTTTAAATAAAGATGTCTTA +CCGCTTCCGTTTTCGCCAGTTAATAAAGTGGCTCTTCCGGAGTAGATTTTTAGATCAGTT +GGAGAAATTAGTTTTCTTTCTGGAAAAGATAATGAAAATGATTCAAGTGAAAATACTGGA +TTTTGTTTTAAGTCAGGAAGAGAGAAAGTTAATGCTTGATTTTTTAGCGTGTTTTCAGGC +AAAGTAATAGTTTTAATATTTTTGTCTTTGATAGCCAAGACCGTGGAAACTAGCTTTTCA +TAGCCAGAGAAATTATGGTCGCTAATTAAAATTGTTTTTCCTTCTTTGTGAAGTGCAGCT +AAGCAAGTTAGTAGAAACTTCCGATTATGAGAGTCGCAATTAGCAAAAGGTTCATCTAAT +AAAAATAAGTCGCTTTTCATGGCAATCAGGATAGCTAAGGCAACGCGCTGCTTTTCACCG +CCTGAAAGCTGAAGAAAAGGTTGGTCTAATAAAGATGAAATATTAGTTCTAGCACTTGCG +TAATTAATACGCTTTAAAGCAGCAGTACGATCAACTAGTTCATTTTCTAAGGCAAAAATT +AGTTCTTCTCGAGGAGTCGCCATCGTAAATTGTCGGTTAGGGTCTTGAAATACCATGCCC +CAATTAGTGAAAGGCTGTTTTAAAGAACCAGCAGTAATTTTGCCATTGAAAGTTGGGTCA +AGGCCGGAGATGAGCTTTAACAGAGTTGATTTGCCACTACCGGTTTGACCAGTCAATAGA +ACAAAATCACCTTGATTTATGGTTAAATTTAAATCTTTAAAAATTGGCTGATTTTGATAT +TGAAAAGATAGCTGATTAATTTTAATTTGTTTGGACGACATGGGCTTTTACCAAGAGGTT +AATGATCAATTTAACTAAAACGCAGGTAAACAAAAGCATTGAAAGCCAACGAACTACGAA +GTAGAAAATCATGTTAAAAGTTGAAAAATAGCTATAGCCATTCTTAATGAAGTCATACAG +GTAGGTAACGATTGTTGTCGTTGTAGCAGATAAGAAAAGGGTGAACCAGTTGTAGCGACG +GTAGCTAGTAAGAGTAAAACCTAATTCGCTACCTAAGCCTTGAACAGCTCCAGAGATCAG +ATTTACGGCTCCCCATTGTTCTCCAGCGATAAGTTCAACGCAAGAACCTAAAAATTCTCC +TAGAAATGCAGCCCCAGGTAGTCGAACTAAGAATCCCGCTAAAGGACCTGCCATACACCA +GATTCCCATCATAATATCATTGGCTAACATTCCATAACCAACCGGTGTAAGGAGCGCAGT +TAAGACATTATAGAGGCCGTCACTAACTAAGTAAATAAAGCCAAAAATAATAGCAATTAA +AGCAATTAGGATAATATCTCGAACATGTAATTTTTTCATTTTTTCCTCCCGAAATAAAAA +AGCCACCTCAAAGAGGTGACTAAATAAAAGTCTAAAAATAAATCATTTTCCCTACGCTGA +TGTTAATCAAACAGGTTCAATGGGTATATTCTCAGCGATCTGGCACCCCAGTTATATTTC +TAGGTTTAGCTTACGCTTAATAAGCCGAAATGACAATTAAAAATAGCAAAAAGGTTATAA +TTAAAATAGAGTGAAAGCAAAGGGGACCGGTTGAGATGGCAACAGCGACATTAAAAAATT +ATTTATCAAGTTTAATGCAGAATACGCAAGGTACTGAAATAGAGATCCGTACTGAAAATC +TTCTTTTGCCATATAGTCTTTATTTAGAAACAGTGCATTATGATTTTACTGAATATCAAT +CTAGTACTAGTCAATTAATTTACTGCTTTGCGGGTAAGTGTGAGGTTGAAATTAATAATA +AAACTTTTTATTTGACGGCAGGAAATATTTTGCTAATTGAAAAAGACACTGACTATATAA +TTAAAGTTAGTGATAAAAACGCAATTATTGTTAAGTTTAAGCTAAAGAGTAATTTTTCAT +GGCAAAAACAAGTAGAACAGTTAGATGCCAATACACCCACCGAACAAAGATTGAGCACAT +TATTTTTAAAAAAGTTAAATCAAGATAGTGCTTTTTTATTTACAACTACCTCTGTAACGT +GGGGTAGTCAAAACTTGAAAGGTATAATTCAAGACTATCTTAATAATGTCGCTTTTAATG +GAACAATTGGCTTAGAGCTACTTAAAATTATAATCTTACGCAATTTGCGCGAACAAAACT +TTAAAGCAAATGAAGTAAAAGAAAGTACATTTAAAGATGAAGCGTTAGATCAATACATCG +ACCAACACTATAATGATATTAGTTTAGCGCAAGCTGCCAAATATTTTGGCTTTAATCGTA +ATTATTTTTCTACAATGGTAAAAGAAAAAACGGGAAAGAGCTTTGTTGAGCATGTCGATG +AAAGAAGGATGAAAGAAGCTAGAAGGTTATTGGCCAAGCCAAATGTTTCATTAAAAGAAA +TAATTGAAACTATTGGATATTCAAGCAAGTCGTTTTTCTATAAGAAATTTAAGCATTATT +ATGGCATGACTCCAGCAGAAATGAGAAAGAGACTATTTAGAGAGGCTCATATCAATTTGA +AATAATAGAGTACACAAAAAGGGATGCCTTGTGCATCCCTTTTATTACATGTTTTCCAAT +CTCTTAATTCTGTCTGCTGTAGGCGGATGAGTATCAAATAATGAGCTCAAGCCACGCTTA +TGAAACGGATTTTCGATGTATAAACCAGCGCTTGACGGATCTGGATCTTTCATTGGCTCG +CTATTTGAAATCTTTTCTAAAGCTGAAATTAACCCTTGAGGGTTTCTAGTAAGCTCAACT +GAACTAGCATCAGCTAAATATTCACGATTACGTGAGAGAGCCATCTGAGCTAGTGCAGCA +GCAAGAGGTCCTAAAATTAATGTGAAAACAATGGCCACTATTTTAAAAATTGTTTCTAGA +GAGCTAGAGTCATCGTCATCACGGTCAGAATTGCCGCCCCACCACCAAATACGGGACGCA +AAACTAGAGATAAATGAAATTACAGCAGCTAAAGCTACTCCAATTGTAGATACTAAAATG +TCATAATTTCGAATATGAGAAATTTCATGGCCAAGAACACCCTCTAATTCACTTCGATTA +AGTCGCTTTCTTAATCCAGTAGTAACAGCTACGAAACTGTGCTTAGGATCGCGCCCAGTT +GCAAAGGCATTAGGACTTTCATCGTTGATAATAAAGACTCTTGGCATTGGAACCTGTCCC +GCAAGTGCCATATCTTCTACAATATGCCACAATTCTGGATCATCTTCTTCATGAATCTCT +CGGCCATGATTCATGCTCATGACTAAATTTCCCGGATTTTGAAGAACAATAAAAAGATAA +ATTAAGCTACCGATTAGGGCAATTACAATTCCTGATAGCGGTTCACCGTTAATCAAATAT +CCTAACCCACCACCAACAAGAGCTAGAATAATAACGAATATAACAAGCAAAAATGCTGTT +TTACGCTTATTGCGTGCTATTTGTTGATAGAGCATTGATAGACCTAGAATTTAACTTTAG +GCACAGCTTTTTCTTCAGTTGGTGTTTGAAGGTAGTCCATATTCTTAAAGCCGTGAATCT +TAGCGACTAAGTTAGATGGGAAAGTTAATAGTTTCTCGTTAAACATAGCAACAGTTGAAT +TGTATAACTGACGTGAATATGCAATCTTATTTTCAGTGTTTGTAAGTTCTTCTTGAAGTG +ACATGAAGTTTTGATTTGCTTTTAAGTCCGGGTAATTTTCAGCTAGAGCAAAAATACTCC +TCAACGAATCAGTAATTTGGTTAGAAAGTTTAATAGCTTCTTCGTGATCGGAATTAGGAA +TATTTACTAATTGATTTCTTAGAGCAACTACTTTTTCTAAAGTGCCGCTTTCATGTTTGG +CATAACCCTTAACTGTTTCAACTAAGTTAGGAATTAGGTCGTTACGACGTTTTAATTGGA +CATCAATTTGACTCCACGCCTCGTCAGCATGAACTTTAGCTTTTTGCAAGCCATTGTAGA +TACCAATATAAACGGCCACCAGTAAAATTAAAATGATAATAATAATCCATGTTAAAGTCA +TAGAAATAAATCCTTTCTATTATCATAAAAATATGGTTAGAATGTTAGGAATATTGTACC +AAATAAAGAAAGCTTAGAATACCAAAAAAGCGCAAAATTGCGCTTTTTTTATGATATTTA +AATAACAAAATCTAATACTATCCAACCTAGAACTGTCAAAATAATTAAACCTGCAAATTC +TAGAACTGAAAATACCCAGAAAAACTTTCGTTTTGAAACAGTTCCGTTTTCAACAAAACT +CTTGAAAACAAGCATATTGGCCATTGAACCAACGATTGAACCGAATCCTCCGATATTTGA +GCCAAGGAATAAGGCTTCCGCAAAGTTAGTAAATTTACCAATTAAAATTGTTGAAGGAAC +GTTAGAAATGAATTGACTGGAAATAATAGAAGTCAAGAATACAGAATGTTCGGAGAACAT +TGTCTTAGAAATTAAAGTAACAATAAATGGAATTTGCTGGATATCGCTAATAAAAATGAA +AAAGCATGTGAAAGTTAGAAGTAGAGCATAATCCACATGTAACATAATAGAAGGATTAAT +AATAAGAGCAAGGATCACTGCAACAATTGCAGGAACATATGCAGGTACAATATTGAAAAC +GCCAAAGAAGAAGAAAATTGAAACAGCAATCGTTAAAAGCATTGGTCGAATGCTGATTCT +AATGTCTTCTAAAGGCACAGTAGGGATTGGCTTATCTTTAACAAAAAAGATAAACACTAA +AACAATAATTAAACTAACTAGTAATAATGGAATTGACCAACTAAAAAACTTAATTGGTGA +AACATTATAACGGTTAACAACAAAGATGTTATGAGGATTCCCCCATGGCGTAAAAGCAGC +GCCAATATTTGCGCCCATCCCAATTAAGGTGACAGGCAATATCTCAGGTAAGTGATGGCG +TCTAGCAATCGTTAAATATAACGGTATAAGAGTTAAAACAGTTATATCATTAGTTAAAAA +CATCGCAGAGATAATTGACAAAATTGTGAAGATTGCAGTTAATCTTCGAGTACTCCTAGC +GCTTGCAGTTAGTTTATAGGCCAAGACATCCAAAACATGTAAGTAAGAAAAAATTTGGAT +GATTGTAAGCATTGCTAGAATTGAGTAAAGTGTATGGAAGTTAATATCTTCAATACGCGG +TCTTGCGAAGAATAAGCTTATGATCGTTATGACAACCGTAATTTGTAAAATACGGTCTTT +AGCAATATTTTTGATGACGGTCATTAAGGTCCCCTCTTAAAATTAATTCAACACTACTAT +TTTGAACTCTTTACGGCAAGTTCGCAACGCTTTTTTCGCATTAGTCCTTCCTATTAGTAA +GATTTTGAGCATTTTATTAGGAAAAACGGTAATTAACCTGCTATATTTAAGTTAGTAAAA +GAGGACGGAGGAAAATAAATATGGTTGATTCCAAGAATCTTGAATTAAAAATTAAAGATA +GTGCTAAAGAAGTTTTAGAAAAGAAAATTAAGCCAGGACAAGTCGTTTTGCTGGCTTTAA +ATGATGGTTCAAATGGCTATTCTAAATTAGGTGGAACTTGTACAATCGGTGCTAATTTTC +AACTAGTGTTGCTAGATCATAAGGATCCAGAATTTTCAATTAAGGTTAACAACAACATGG +GTTTAGATATGTATACATCTGATAAAGAATTAGCCTTTTTAGACGATGGTTTAGTTCTTA +ATGCTCGTAATGCTACTCTATCCTTATCTTCTAATGAAGGAATCATCGATGGCGGAGTTA +CTATTTCTGAATTCAAAGGAGAAAAACTTTCTGCTGATGAGATGAAAAAATTAGGTGGAA +AGATCTGCTAGGTATTGAAGTGACGTGCTTCTTCGTGGTATGATAATAAAAGATTTTCGA +AAATTTTATTTGAGAGGAAGTATAGATCTATGCGTAAAGGTGAAAACTACAACACTGGTG +TTACACCAAACTTAAGACCTAAGAACAAGAAGAACTCTAAGGCTCGTGTTAAGAGAGCTG +CTGAAGTGGTTGCTTTCTTAAACAAAGCTGCTAAAGATGAAAACAAGTAATTAAATTTAC +TGCATAAGATAAAGAGAGACGCTGGTTAGCGTCTCTCTTTTTTTGCATGTAAAAGTTATA +TAAATTCTAATTTATTTAATAACATTTTGTTGTAAATGAATTTATATTATTATATAAAAA +GCCTTATAAAATATGAAAAATTGAACATAGGATAGAATTTTAGATGAATAACATTTCGAG +AAAAGGGCTAGAAGTAAAGGCAGTTTTTGCCCTTGTTTTACCCTCTCTTTTTTTAGGACT +AGCAAGTAACTTTTCCCGCAATATTTTTTTAGGAGTTATGCTGCTCTTAATTTTGGGAGC +AGTCGTCGGCTTGAGCATACCAACTATTGTTAGTACCTGGCTTATTTTGATTTTGACTGT +ACTAGGGATTAGCTCTCTAACTCTTGGATATGCGGCGATAGATTTTTATGGAAAGACCCT +GCTGCTTATTTCTTTTCCCTTAGAAACATATTTAACTAGTCAAATTAAAGAGTGCGTTTT +TAGATGGCAAATATTTAAGAAAAATCAGTCTAGTGCTTACCGTTATCTTAAGCACTACGA +TCAAAATGTAAAATTACAAACGACATATAATGCAGAGAAGTTATATAAAAAGATTAAGAG +AATTTTAACGGAGAAAAATTATTTACCATTGTGGTGTGATTTTACGATTATTGATTGGGA +ACATGATCAGCAGTTTGCGCAATTTAATTTAGAAGATCATGACAAAATTTTGCAGCAAAT +AGCAAGAGTACTTAAAACAAGTAGATTAGTTGATGAAAATTTATATTACCTGGGAAATGG +TCAGTTTCTAATTATCTCTAATACGATCGCACCAGGTACATTAATGGTTTTAAATGATGA +ACTAAAAGCTGAGTTAAAAAGGCTACAGTATGGGGAATATCAGCCTGCTTTTAAAATGGC +AACGCAGCACATTACAGAACAAGATTTTCTTGAGTATCCTAATTTTAATGCAATTCTTAA +ACGTCTAAAAAGAAAGCTTGAAACGGACTTAGTTGTTGAATATTTAAAGGGAGTAGAAGC +ATGAGTAATTGGTTCTTTTTCTTGGTAATTTTGGTTACTTTAACATTTCTAGTGGCATTT +TTTGCTATGTTTCGTAGCCTCTGGACTATTTATCATCAAACTGAGCATACAGTGCGTCAT +TTAGGAAGAAAGCATAATGTTAAATGATTTTTTAGCTGTTTCAACTTTAGTTTCAATTTG +GTTTTCGCTTTTAGCTTCGTTAGTTACTCTTTATGGTGCAACTCGTTTTTGGCTTAAGCA +TAGTAAAAAAGTCATTTCAATTACGCCACTAAAGCATTACCCCGCAATTACGATTGTTGT +TCCAGCACATAATGAAGAAGTGGTAATTGCAAATACCACTAAGGGAATTCTTAACTTGAA +TTATCCTGCATCAAAGGTCGAGCTACTCTTATATGCCGATAATTGCCAAGATAAAACTGC +AGCTGAAATGCGTAAGACGGTAGATTTACCTCAATATAAATACAGAAATGTACAAATAAT +TGAACGGCGTGGCTCAGGCGGTAAAGCAGGTGTCTTAAACGATGCCTTAAAGATAGCGCA +GGGAGAATATATTTGCGTTTATGATGCAGATGCATTGCCCGAGAAAAATGCTTTATATTT +TTTAGTGAAAAAAGTTTTAGAGAATCCCCAACGATATATGGCAACTTTTGGCAGAAATAA +AACACGGAATGCGAAGCAAAATTTTTTAACTAAATGCATTAACCAAGAAGTTATAGTGTC +TCAAAGACTTCAACATGTAGGTGTTTGGAATTTATTTAAAATTGGGCGAATTCCAGGAAC +CAATTTCATCATCAATACTGAATATGTAAAAAAGATTGGTGGTTGGCAAAGTGGCGCCTT +GACTGAAGATACCGAAATTTCTTTTCGCATTATGGAAGATGGCTATTTGATTGCTCTAGC +GTATAATTCAGAGGCTTTTGAACAGGAACCTGAGCATTTACGTGATTATTATTATCAACG +ATTACGTTGGGCTAAAGGAAATTATCAAGTAGTGATGAATAACTTTAAACACTTATTTGA +TACTAGTAACTGGCGTGTAAAGCTAGAGACTTTTTATTTAGCATGTACTTTCTTTTGGTT +TAATTTAGCTGTTGTTCTTTCAGACATTATCTTTTTTGTGGATTTGGGATGTATAATTAC +AAGATTTTTCAATCCAGAAATCCCAATTATCTTTGCAATGAATTCTAATATTCTCTTAAT +GCAGCTACTATTAATTAACTGGCTATTAATGATCTTGCTCTATGTAATTCAAATAAATCT +CGCTTTAGCTACTCAATATGGTCAAGCAACTAGTGATCAAATCTGGCTAGCTTTAGTGTC +ATATTTTACTTATTCGCAACTCTTCATTGCTATTTCTCTGCAAGCAGTCTGCTCAGTAAT +TGGAGATAAGGTGTTTAAACGAGATGGTACTAAGTGGGTGAAAACCAAAAGATTCGCAGA +TTAGGAATAAGAGATGAAAGTAAAGAATTTAATACTGATTTTAGGAACTATCATTGCTTA +TTTAGGAATCATCGCCTATGTGAGGATGAGCAATGATCGAACGCTAGAAGAGCGATGTTA +TCATTCGTGGCGTGAAGATTATATAAAAAATAAAAATCAAAATGAGCAATATGTGAATGC +AGCAGGAAGAAATAATCCTTCATTTGCCTTATCTGAAGCACAAGGTTATGGAATGTTACT +GGCTGCTAAAGCGGGAGAGAAACATTTAGGAAGTCGGATTGATTTTCAAAAACTAGATAA +TTATTACCTGGCTCATCGCTTAACAAATTCAAATTTGATGAGCTGGAAACAGAAAGATAG +AAAAAATATTTGGCGTGATAATCCTGTGAGTGCAAGTGATGGTGACATAATAATTGCGCA +AGCTTTGCTGCATGCTGATAGAGTTTGGCCAGGACATGGTTATAAGTCACAAGCTGTTAA +CTTAATTAATGATATTAAAAGGCTTGAAATCAATCAGAAAGCTAAAATGGTAACTGTCGG +AAACTGGGCTAATAAAGATTCACGCTTTTATAATGTACTGAGAACATCAGATGTAATGCC +AAAAGCGTTTGAAGAATTTTATCAAGTAACTGGGGATCAAGCTTGGCTAACAATTAAAAG +AAAAATGCTTAACTATTTGCAAAAGTTAAGCAAGAAGTACAATACTGGTTTAGTTCCTGA +TTTTGCCTGGATTTCTAATAATTATGTTAGGCCCGCACAAGCTAATGAAGTGGCGATTAA +TGATGATGGTCACTACAGTGCAAATGCTTGCCGAGTTCCAATGTTGTTAGCTGAAAGCAA +TGATAAAGAAGCAGCTCAAGTAGTAAAGAAGATGCTAAAATTTTTTAAGAAAAAAGGGAC +TTCAGCTGGTTTTACCTTAAAAGGTGAAAAATTACATCATTATCAATCAGCGAGCTTTAG +TGCGCCAATCTTTGTAGCGGCTAGTAAGTATCGTAATCAGGGTTATGATACTTTGATTGA +ACAAGAAAAATATATCTTTTCAAGACCACTTCCGAAAGATAACTATTACGATGCTACTTT +AACGGTTTTAGCTGCTTTAAATACAAATGAGTTGACGGGATTAGAAAAATAGAGAAAGAT +TAAGTTGTTGAATCTTTCTCTATTTTTGATTTGCAAGAGTAGAACGGTTTTGCTATGATA +GTAGAAATTTTAGACATCTTTATATTTCTGGAAGGGGTAAGTTTTTATGGGATTAAATGC +ATATATTCAAGGTTTTAATAGTTTAGAGTCAATCGATCGTGCACCAGGTTATTTTAAATA +TCAGCATCATTCTGTCGCTGATCATAGTTTTAGAACAGCTGAACTTGCTCAAATGATGGG +AGATATTGAAGAAGTTGTTGGTAAACAAAAAATAAATTGGAAAGCCCTCTATGAAAAGAG +TCTTAATCATGATTATACCGAGCGTTTTATTGGTGACATTAAGACTCCAGTTAAATATGC +AACTCCACAATTGCGGAAGATGATTGGAGACGTTGAAGAAACAATGACTGCCAAATTTAT +TAAGGACGAAATTCCAAAAGAATTTCAAAAAATTTATACTAAGCGTTTGTCTGAAGGGAA +AGATGATACTTTAGAAGGTAAAATCTTATCTATTTGTGATAAGTTAGATTTACTTTATGA +AGCTTATGGTGAAATTGAATTAGGAAATCCAAATCCAGTTTTCATGCAAATGTTTAAAGA +AAGCCTTGAAACGATTAAAAAATTTGACGACTTAACTTGCGTTCAATACTTTATTAAGAA +GATTTTGCCAGACTTATTTAAAGGTGACTTTGCTGGTAAAGATAAGATGCAAAGAATTGC +TTTTAGTATTTTACTAATGGGGGAAGAATAGTTTGAAATTTCAGTGCGCATCCTGTGGGT +TGCGACTTGACAGCCTTCATTTTAAGCAAGAAGGATTAATGAATCCCAAGTTAACAAGTA +TTTGTGATATTTGCCTAACACGCAATCTAAATGCAGAAGATTATGAAAATCCAGTAATTG +AGAATATTTCCCAGGTAATGCTCTATGGCTTTGTAGGAAAGAAAAATACGGGTAAGCTTA +ATCTAGAGTCTTTAAATCAATATCTTGTAGGAAGCGATCATAGACGCCAATATGAATCTT +TTATTCAGGATTATGATGGTAATGAAGTAGCTCTCCAGCAATTGTCACGATCTGAATTTA +ATCAGGCAATTATTAAGAGCGAAGATAAAGAAATAAATTATATTCCTAATAGCAATGTTG +ATTTTGCGGTTAATATGAAAAACATGGGAATTGAAGTTGACTTTTCTTTAAATGAAGTCG +ATTTTGTTGACCGTGAAAGAATTCGTCATAAATATAATTATCGCTGCCAATATTGCGGTA +GACGCGGGACGAGCGTAGATCACAAAGATCCAGTTTCTTTATCGCATGATAATTCTTTTG +ATAATCTGATTTTGTCATGTAGCGAGTGTAATCGAATTAAATCTAATATGCCTTATAATT +TGTTTACTAAGCTGAATAATCAACTTACTGTTGTAAATAGAAAATTGGTTAAGTACGAAG +ATACTCTTGCTAATTTAAAAGAAGAATTTCAGCAGGCAAAACGTGATTTGGCTGGACAAG +TTCATTTAAAAGGGGTAGTTAATGACCCTGAATTAAATGCGATGAGAAAGCAAAATAAAA +AGCTTCAGGATGCAATTGATAGTTTGCAGAGCGATTATGATGCTTTAAGGAAATTGCGTA +AGACATATTTTGAAACTGGTTGGAAACTAGCACAAGAAAAAGAAAATAGTGAGATTATAT +AGTAAAAAAATGCATTGGACATAAAAATCCAATGCATTTTTTGCTATCTTAAAGTTCATT +ACTTCTAAAGTTACGTAAGAATTTCTTAGTCTTCTCTTCTTGAGGATCATTAAAGATCTC +TTGTGGTGTACCTTGTTCAGTGATAACTCCATCACTCATAAAGATAACTTGATCAGAAAC +GTCACGAGCAAATCCCATTTCGTGAGTAACAATGACCATTGTTAAGCCAGTTTTAGCTAG +TAACTGCATAGTATTAAGTACTTCACCAACCATTTCAGGATCTAGCGCACTAGTTGGTTC +ATCGAAAAGTAAGATTTCAGGATCCATCGAAATTGCCCGCGCAATAGCTACACGCTGCTG +TTGTCCACCAGATAATTGTTGAGGCTTTGCAGTTAAGAAAGGTTCCATGCCAACTTTCTT +TAAGTTCTCAATTGCAACTTTTTTGGCTTCTTCTTTAGAGCGTCCAAGAACCATTTCTTG +ACCAATCATACAGTTTTGAAGCACGTTTTTATTATTAAATAAATTAAATTGCTGGAACAC +CATACCAACCTTTGATCGGAATTTATTGCGATCAAAATGAGAATCTAAAATATTGTTACC +ATGGAATAAGATCTTACCATTGCTTGGTTCTTCAAGAAGGTTAATACATCTAAGCATGGT +CGATTTACCACCACCAGATGGCCCAATAATGGTCATTACTTCACCTTTATTAATATCAAA +AGAAATATCTTTTAATACTGTGTGGTCGCCGTATTTCTTTTGTAAATGTTGTACTTGTAA +AATGTTTTCTTCGTTATTATTGTTCATGTTCAAATCCAGTCTCCTTTGGATCTTCTACTT +GTAATTGATTAGCCATTAAGTTGTAGTTCTTTGGACCTTCAAGCTTCTTTTCAATGAAAT +TAAAGATTCTAGTAATTGAGAATGTCAAAATTAAGTAGATCATTGAAATAATGAAGTAAG +TTTGGAAGAATTGGAAGGTTTGACTGGCAACAGTTGTACCAACAAAGAACAATTCTGATA +CAGAGATAATACTTAATACTGAAGTATCCTTAATGTTAACAATAAATTCGTTAGTAATTG +ATGGCAAACAGTTTCTAATTGCTTGTGGTAAAATGATATGCCACATTTGTTGTGAGTGAG +TCATACCGATAGCAGAAGCTGCTTCAAATTGACCTTTAGGGGTAGCGTTAATACCACCAC +GAATAACCTCGGCTAAGTAAGCACCAGTATTAATTGAAACAATTACTAGAGCAGCTACTG +TACGGTTTAAGTTAAGGTGCCATAATTGGGCAATTCCGTAGTAGATAACAGCTGCTTGAA +CCATCATAGGAGTTCCACGGAACACTTCAATATAAACTGCTAAAATCCAATCAACAATCT +TTAAGCCCCACTTCTTACCAGTAGACTTAGGAGTTGGGATAGTTCTAACAATACCCACAA +GTAAACCAATGAAGAAACCAACAATAGTACCAACTAAAGCTAAAAGAAGAGTCATTCCAA +CACCGCCAAGGATCATTGAACCATAGCGATGCATAATTGACATAAACCAGTTTTCTTTCT +TGCTGCCACCTGCTTGAGGCTGTTCCTTTACAGCATCAGCCATTAATTGGTCGCGCTTTT +TCTTAGAAATACCATTTAAAATGCTATTAACTTGGTTAAGTAATTCAGTGTTGCCTTTCT +TAACACCAATTGATGTAATTGAATCATCGTGGTCAACTTTGAATCCTTGCATCTTGTTTA +AGTTAACGGCAACGATGTTAGGGTTAACTGCCTTGTAACTTTGGTATTCAATATCTTCTG +CTACATAACCATCAATAGTACCTGATTGTAAGCTTTGACGCATTGCAGAAAAGCTTCTCA +TTGCTGGTTCACGCTTAGCGCCATGTAATTGCTTAATTAGATCATAGTGCAATGTACCTT +GTTGAGCAGTTAATTTTGCGCCTTTAAAATCATTTAAGCCTTTTGCTTGAGCAAATTTTC +CAGTCTTGCTTGTAATAACTACGAAAGTACTCTTTCTGTATGGTACAGAGAAGTTGATTG +CCTTTCTACGTTCAGCAGTAGGAGACATACCAGCAATAATTAAATCAATTTTGCCAGAAG +TTAAAGCTGGTAATAGACCATCCCATTCGGTCTTTTCAACGACAACTTTACGATGAAGCT +TTTTACCGATAATTTTTGCGATTTGTACATCATATCCATTTGCATATTGCTTAGATCCAT +CAATTGGAACTGCACCATTTGCATCGGTTGTTTGAGTCCAATTGTATGGTGGGTAATTAG +CTTCCATACCTATTTTTAGGGGTGCTTCTTTTTTAGCCGCTTCGGTTTGACTAGAATTAG +AGCTAAAACCGATAACTAAACTTAAAATTAGGGCTAATAAAGCACTAAACCATCTTATTT +TTGACTTCACTTTGAAATACCTCCAAAAATTATTTCAAATAAAGCCAATACAATAAAAGC +GTAAATAAAAAGCCTCGCTGTTTACCAAAAACAACGAGGTTTGAAAATCATCAGTAATTA +AACCAAATCATATAGCGCTCCCTGGGGACTACCAGGACAGTATGTAAGATATGCTCTCAC +ATCCCAACAAGCTTAATTCGCGAAAGTAAGCTTGTTTCGGCGATAATCCTAAAAACTACT +GTCAATGTATTCGCGTACTCAAGTAGATTTTGTGGTTATCGCAACCTCTATTGCATTGGG +GATTTATTTAACTGATGAAAAGATTAAACGATTTTTAATATTTTGTCAAATAAATCCGTG +ATTTTTTTTGACATCTTTTTTCTTAATATCTGAAACATAGGGATTACCAGCAACTATAAA +TCTTAGCTTTTTCTTGGCCCATTTAGGATCTGACTGATTGATCCCAACTCTTGGAAGAGC +AGTAATTTTTTCAATTTTTCGTTTATGATTAATATCAATATCAAAAGGAGAATCTTCTAA +ACGAGCTAAGTCCCATTTTCTACTAGTAATTCCAAAAGCCTGCATCATCTTTCCAGGGCC +ATTAGTTAAAAGTGGACCAGTTTTGCCGTTGCGATTTTTAATCATGGTATCAATTCCGGT +CAGTGGGTCAATTGCCCTAATTAAAACTCCTTGTGGTTCACCTTCTGCTTGGCACGCAAC +ATCAAAGAAGAAGTATTGTCTCTGAGAATAGATATACAAACTGCCACCAGGACGATATAA +GCCTTCATTTGCCTGACTGCGTCTGCCACCATAAGAATGGGCAGCCCGGTCTTTTACACC +AACATAGGCTTCTGCTTCAACTATTGTGCCTGATAAAAGTTCATTTCCATTGTTGAAAGA +TAAAGTTCTACCCAACAGATCTTTACTAATTTCACTAGTAGACCGATTAGTAAAAAATTC +TTCATAGTTCATATTACTACTTCTTTCTAAAAATGTTGTACACTCGTACTTGAAAGTAAA +TATAAATTGTAGGGGGAATCAAAAGATGAATGAAATAAAAGTTGTTAGAATTTATGACCA +TGAGCAACCCGCAGGTTATAGAATCTTAGTAGATCGCTTATGGCCTCGAGGAATGAGCAA +GGTAAAAGCTCATTTAGATGAATGGGATAAGGAAATTGGGCCAACTAATGAATTAAGAAA +ATGGTTTAATCATGAAGATGACAAGTTTCCTGAATTCAAGACTAAGTATATAGCGCAATT +GAAAGCTAATCCAGTTACAACCGAGTTTGTTAAAAATGTAAAAGAAAAGCTAGCTCAAGA +AGACGTTATCTTCTTATATGGTGCTAAAAATAAAAAGCACAACCAGGCAGTTGTGCTTAA +AGATTTTATTGATAGTCAATTAGGTTAAGGCATTTTTTTGCCATGCAAGTTTTGTCGAAC +GTAATGAAAAATAATTGTTAAAATAACTGTACCAAAAACGATGCCGGCAAAGATGGCATC +GGGCAGTTTGAAGTTTAGCGGTGGCAATGAAACAAGTAGCTTAACTGCAATGATTGTGAT +TAAAACATAGGCCATTGTTTGAAGTTCAGGAATAATTTCCATTAATTTGATAATAACTTC +AGCAACTCCTCGCATGCATAGAATACCAATCATTCCGCCAACTAAGACGACAACTGGATT +ATTTGACATGGCTAAGGCTGCTAAAACAGAGTCAATTGAAAAGACGATATCCATTGACTC +AATTGAAATAACAGTTCTCCAAAATAATGAAAGATGGTGCTTTCTTGTCTTGGAGCTCTT +TTTCTTATGGGCTTCTTTTTTTGCCGCTTCTTTCTTAGCAACTTGTGCAGGGTGGCGAAG +ATCATAAAAATACTTATAAACTAAGTAAAGTAAATATAAACCGCCTAACAGTTTGATTTC +CCAGAAATTAATTAGGTAAGTACCAATCCCAATTACAATAAAGCGAAATAGATAAGCCCC +CCAAAGTCCGTAAACTAACGATTTCCTTTGCTCATCTTTAGTTGGTAAAACTTGCGTTTG +TGCGGCTAAAACCACCGCATTATCAACTGATAGCAAACACTCCATTAAAATTAGAGTCAG +AATCATCATCCAATCTTGTCCGCTAGTTAGGACATGAGCCCAGTTATTAGCGTCAAAAAA +GGGTTGATAAAGTTTAATCAATTGCATTCAAATAGCCTTCTTCAAGTTTTATTAAGCGTT +GCTATTCTTTCTCTCCCATTCTATAACTTCCAACCTTATCTGACGCTAAAATTACGCGAT +GACTTGGAATAAAAATTAGGGATGGATTTAGGGCTACCGCATGTGCAACTGTTCGAACCG +AAGTTGAGCCATTAATAGCAGAAGCTAAATCATTGTAAGAAATGGTTGTTCCATAAGGAA +TATTTTCAATCATTTTAACAACTTGATTTTGAAATGGAGTACCAAAGCTTGAGTAGTCGA +TAGGAACGGTAAAGGAGCGACGCGTGACTGCAAAATATTCTTTTAATTGTTGAACATAAG +GAGCCAATTTTTTGGAATCTTGGACCAGAATGTGGTTAGGATAAAATCCTAAAATAGAAG +ATTCAATATTACTATTCTTGGAACCGACGAATGCTAATCCCAAGTTACTAATGACTAATT +CATAGGTCCATGGCTTAATTTCAACAAAATCGTAATAAAAAACTTTTGCTTGGGCAAGTG +GCATTTAATCCTAACCTTTCTTTTAATTAATATATTAACTCCATCTTATATTGTACAACA +AAAAATAACGAAACTGTCAAATTTTTTGTGTAAATAGAT +>NODE_3_length_40000_cov_63.0619_ID_5 +CATATCCCCAAACTAATTCTAAAAGCTTATCTCTTGAAAAAACTTGTTTTGGTTTTTGAG +CCATCGTGTAAAGTAAGTCAAATTCCTTTGGAGTTAAGCCTTCAACAATATTATCATCAA +AGCGCACTTCTCTTCGATCTTTTGAAATCTTTAGATGCTTAGTAATTACATCATACTTTA +CGCCATTAGAAGCATGCATATCTTTTTCTATACGACTTCTTCGATATAAAGCTTTAATGC +GGGCAATTAGTGCTAATGGACTGAATGGTTTAGTAACATATTCATCTGCACCAATGCCCA +AACCCAAAATTTGATCAGTTTCTGAGTCACGTGCAGTTAACATAATGATTGGTATTGAAA +GAGAAATAGCCCTTATTTCTTTAGCTACTTCCATTCCATCCTTTCTAGGTAAGTTAAGAT +CTAACGTTACAATGTCATAAGAATCTGGATGCGCTTTAAACATCTCAACAGCTTCTATAC +CATCCTTAGCTATATCTACTTCCCATTGTTCTTTACTGAAGAACATCTTCATCATTTCTG +CAACAGAATTGTCATCTTCAACCATTAATATCTTTAGCATTATTTATCCTTAAATCCTTT +TACTCTAGTATGTTTAATTTTATCTGGTTCAATATAATCATGTGGTAATTCATGCCGGGC +ACGTAAGAAAGCCTGCAATTTTTTCTCAGTTAAGATTATCGGAGCCAAAATAAACAAATA +AAAAGTAAGCAACCAGATCAAGAAATAGCGATCCCAGTGTAAGAAATGAATTCCTAATCC +CAATAGAAATTGAACTAAGCCAAAAATCATGAAATAATTTCGTGCAACTTTTTGCGCATA +CTTGTAACTTGTTTCATTAACTGAAGCCAAATACGACATATATCCATAAACTTGATTCGG +ATTAGGTGAAGGAGCGATCAGCCAAATGACTCCGATTACAAACATTATTACCCCGCAACC +AATATATATCATTTGACCAACTCCATTCACTTAAATTTATTCTTTACCAGGAGCTGAAAT +TACTAAACGTAAGTCACCGCTAAAACTAAAGTTCTTCATCTCATTAGGAATAATAAAATT +AGTACCCAATTTGATATTATATTCTTTACCGTCAGCAATAAACTTACCTTTACCTTTAAT +TACAGATACTAAAAGATAAGGATGATCATTTAGACTCCAATCAAAACTCCCATCAACATC +GATCTGCCATAAATAAAAATGCGGTGACATTGGTGGTTGAGCTAAAGTAGTAATCTTTGC +ATCTCCCAATGTTTCAGATGTAATTTTCAGCTTCGGATCTTTATGTGGTACTTGAATAGT +ATCAAGAGATTGTTTAATATGCAACTCTCTCTTTTTACCAGTTTTCTTATCTACTCGATC +CCAATCATACAATCTATAGGTTACATCACTTGATTGTTGTGTTTCAATAACCATAATTCC +TTTAGTTAATGCGTGAACCGTTCCAGAAGGTACATATAAAAAGTCTCCTTCTTTCACTGG +TACATAACGTAAAAGCTTCGACCACTTTCCATTTTTGATCCAATCTTCTAGTTCTTCTTT +ACTAGTTGCATTGTGCCCATAGACAAGTTTAGCACCAGGTTCGGCATGAAGCACATACCA +ACTTTCAGTTTTTCCAGAATCATTTTCATGAATTCTAGCGTATTCGTCATCTGGGTGTAC +TTGAACAGACAAATTATCATTTGCATCTAAGAATTTAACTAATAGTGGAAATTCCTTAGC +CTTGGGATTACCAAATAATTCTGGATGCTCTAAATATACTTCTCTTAACGTCTTTCCTTT +TAATGGACCGGCATTAACTGTCGAAGCGTCATCTTTGTATCCAGATATTACCCAAGCTTC +TCCAACTTTTCCATCTGGAATATCATATCCTAAAACTGTATTTAACTTACGTCCACCCCA +AATTTTGGGTCTGAAATATGGTGTTAAAAATAATGGTTCCATAAAAAAATCACCTCAATA +AAAGTATATATCTTTTTTAGATGATTTTGAAATCGATTTCAGCTATTTTTTTGAATATTT +TTTTATTCCACTAATTAAACGTTTAATTCCATCCTCTACCATAGAAATTGGTGAAGCTAG +ATTAAGCCGTAAAAATTTATTACCATTACCGCGATAAATACTACCAGGTGAAACAATTAG +ACCAGTTTCTTTTCTAATATAATCAGCTAATTCTTGACTATTTGACGAAACTTTACTTAC +ATCAAGCCACAACAAGTAAGTTGCTGGACCTGATACAATTTTTACATCTTCCAGATTATT +AGTTATTTCTTTTTCAGCATAATTAAAATTACGATTAATTATCACTAACAATTCATGTAA +CCAGCTAAAGCCTTCAGTATATGCAGCAATTGTTGCTGGAATAGCTAAAAGATTTGGCTC +AGCCAATTCATCGCTGTTAAGACCTCGATTGACGATATAACGTAAATTTTCATCTGGAAT +AATTACGGTTGCAGCATGAAGAGCAGCTACGTTAAAAGTCTTACTTGGAGAAACTAAACT +AATTATATTATCCCTCAAAGCTTCAGAAACTGAAAATGCAGGGGTGTATTTAACGTTATC +TCTGACCAAGTCTCCATGAATTTCATCTGATAGAAGAATTACGTGATATTTTTGACATAA +AGATGCAATTCTCTCTACTTCCGTAGGTGTCCAAACTATCTTTACCAAAATAATCACCAT +TAATTTCAGCATTTTTAATTTTGACGTCCTCACAAGTCCACATTGTTTCTTCTGCATTAG +AAAAGAAAACATGATCTAATGAAATATGCTTACACCGTCTAAATAACTTAGGAGCTTGTA +AATCTGAATTTTTAATTGAAATATTATTAGTATACCAAATACCGCTTCGCGACATTGTTT +CAAAGGTACTGTCAGCAACTTTAATATTATTCGAATACCATAAAGGATATTTATATTTAA +AAATAGTAGATTTTATTTCTAAACTTTGCGTTTCTTTTAATGGTGATTCTCCTTCACCAA +AAGTAATATTCTCTAAAATTGTATCCGAAAGGCCATATAGCGGTCTTTCTCCTTCAAAAT +ACTTTTCTTTAATTATTTGCATAGATTTCCTCTTTTATTAACTCTTTCAATATTTAGTCT +AAATTATGAAAAATAAATGTCTAATATTTGCTAACTATAACGAAATATATTTTTATGCTA +TAGATTAGTTTTGCACCAAGATTACTTTTCAAATTTATTATAGAAAAAATAAAATTTTGA +GAAAATTTACAGTTTTTCATACTAGAATAAATTAAATTTATAGCAAAAAAAGGATGAGTT +TCAAAGCTCATCCTTTTTAATCAATTTAGTTATTACGTTTACGCTTAGAAACACCAGCTA +AGCCGATAACGCCAAGCATTGAACTTACAGCACCTACTATAGCTGCAACAATACCATTTT +TAGCTCCAGCTTTTGGAAGTTCTGATTGAGTTGTCTTCTTAACTGAAGTTGTTGGAACTT +CTGAAGTTGGTTTAGTTGGATTATGCGGAGTATCAGGAGTATTTGGTGTTGTTGGTGTAT +TAGGTTTCTCTGGAGTAGTTGGAACATTTGGAATTGGAGTTACAGGTTGATGTGGCTTCA +AGTTATATCCACCGATTAAGTCCCAATCAACTGGTTTGCCCCATTCACCAGGAAGAGCAG +CACTATTTGGTGCATCTTTTTTAAGAACACCCATCATATCATCGTTTTCGCTAGTGTCAT +CCCAAATCCAGTCACCTTGGTTAGTCATTTTAGCTAAAACAGTAGTAGTATTATCTGGAT +TAATTTGTAACAAGAAACTTGGTGCCCAAGTTGCGTGCATACCCTTTCCAGCAACTTCAC +CACGGTTAGTGATATAAGAAGTAATCAATAATTGATCATCTCTTCCTTCTACTGGAACTG +CATAGTATGAGTAAGTTGCAGTACGCCAGTTTGCTGGAACAGAAGCAGTTAAAACTACTC +CAGATTCATTTAATGGAACGTATCCATGAGTTAGGTTATCAGAAACGTAACCAATCATTG +CAACGTTATCACCAACTGCTTTATTAGCAGCCATCCATGCATCGTCGTTACTTCCACGGT +TTAATCTAGTAGCAGCAAATAAGTAATACTTGTTGCCCAAACGAACTACATCAGGACGTT +CAATTTCATCACTTACCATTGGAGAACTAATAAGTGGTGTGTAGACCTTCTCAACACCAG +GGTTCTTAGTATCATTGTTTAACTTAATAATACCGATTGCAGCATTAGACCATTTTGCTC +TATCTTTAATATCAGAGTTAGACAAGATTTGGAAGAAATCACCTAAATTATCTTTGTTAG +TGCCACCGTAATTTAACCATTGATAAATTTGGTCAGCACCTTGATAATTCTCTGTACCAG +TACTTGCTTCAAAAACGATGTGGTCATTATCGACGTGTGCAATAGAAATCTTGTCTTGAT +CCTTTTCAAGATTTAAGTAGACAGTTGCACTTGCAATCTTTTGGTGGTTAGTGTTGTTAT +CACTTGTATCAACCTTAGTATAGTAAAGTTGGATTGAACCATCTTTATTTAAAGTTGCTG +AGCCAGACCATTGTTGAATAACTGGAGTACCTAAGCCAAAAATAGGACCAGCATTCTTCC +AATTATTAAAGTTATTGTCACCGTACTTGTTGTAAAGAAGATAAATATGATTGTCATTAG +TATTTGGAACTCCCATCATTCCAACTACTAATTGATAACCATTCCAGTTAGAAACATAAC +CAGTTTTGCGTCTTGAACTGGCCATGAATCCCAAATTTCTAAGTCTTCTACTTTTCCTGT +TTGGGCATCCATAGTTTTTGCTGCAGGCATGTTCTTAATTTTACTTGCATTAAAGAAAGG +AATAGCGTAACGAGCATCTTGTTCAATTAAAGTTTTAGCAATTTTCTTAAAGTCACTGTA +AGTTAATTGAGTACCGCTCTTTGCTTCTTTAGAAAAGTTAATCTTATTTAATTCATTAAT +TTGATCATTGGTTAATTTACTGGTATCTACATCAGATTCTTTAACAATATCTTTAATTTG +ACCCTTTAATTCAGCTTTGTTATCGTTTGATGCTGTTTTTGTTGAAGTTTCGTTAGCTTT +TTCTTCAGTATTTGTCTTAAATGAAGTAGCTGCTTTTTCATCTTGTTTTACTGCATCGTT +CTTTTTAACATCAGCAGAAGAATTAACAGGTGTCTTCTCAGTACCATCTACTGTTTCTTT +AGTAGTTGTTGTAGAGGTGGTAGCAGCTTTAACATTATTTACTGGTGAAGTATTAACAGC +ATCTGCTTTAACTTCTTTAGTTGCTGCTGAAGTCACAGTAGTAGCTTTTTCTGCTTTAGT +ATCAACACTCTTTTCTGAGCTTGAATTTACTTGATTATTATTTTTATTATTTGCTGAAGT +AGTAGTACTTACTGCTACAGTTTTGTTTTCATTAACGTTGTCTGCATTAGTAGTAGCAGC +ATTTACAGTGGAAGCACTTAATACAATTGCTGCTGTAGACAAGGTTCCCATTAATAAAGA +TTTTCCGCTTAAAGACATCTTCTTATGATTTTTATTTTCCAACATCTTTTCTCCTCCTAC +ATGCTAAGCGTTTAACATTTCATCTTACAGACTATCACCTTTTGTTATAAAAATAAATAT +TATTTTAAAAAAATTGTATAGATTTGGCAAAATTTACTGTTTTTGTATTCAATTGCCACT +TGTCAATCGTTTATCACATTAATACTAAAAAAGCCACACCAGATAATTACTCTAGTGTGG +CTTTTCCAACTATTTATGGAGATGAGGGGGTTAGAAATTTACCCTTGCAAGTCCTTTAAT +ATCAATGCTTTTAGCTATTTTAACTTTTACGTGAAGCCCAATATGAAGCCTTAAGATTGC +ATGTAATTAGCAAAGTTGGTGGCTGTTTCTTTGGCCTTATCCTTAGTTACATGAGTATAT +ATATTCATAGTTGTAGTAATATCTGAATGCCCTAGCCTGTTCTTAACATCTTTCATAGAT +ACACCAGCATCAAATAATAAGGAAGCGTGTGTATGCCTGAACCCGTGAACCTTAATCCGT +CTTAATTCATACTTATCACAAATTGCAATATTCCAACGACGTGGCTTAGATAATGCTGTA +TGTTCTCCCTTAGTATTAGCAAAAACTAAATTACTATCATCTAAAAAGTTTAGTCCAAGT +TTAAACATTTCTTTCTGTTGTTCTGTTCTCCACTCTTTCAAGTAATTCATTGTTTCGCTA +TCCATATCTAGCACCCTTACACTATTAGCCGTTTTTCCTGGTGTTATGATTAGTCGATTA +TTTTCACCCGATGATAAAGACTTATCTATATTAATGGTATTATTTTTAAAATCAATATCA +GCCCATGTAAGAGCTAGACACTCACCGCGTCGCATCCCTGAATAGGCAAGCAAGCGAAAG +AACATAAAATATTTAAAATTATATTCTTTGGCATCTTTCAAAAAGGCTTTTAATTCTTCC +TTACTATAGTAATCAGTAAACTTTTTATTATCCCTTTCAATTTTTGGTATTCTTACCTTT +TGCATTGGATTTCTTTTAACTAGCTCCATATCCATTCCATACTTAAGTAATTTTGAAGTA +TAGAATTTGAACCGTTTAAAAGTCTTAGGTGCTTGCTTAGACCATTCATTTACTACCTTT +TGACATTCAGCAACGGTTAATTTATCCACGTAGATATTACCTAAATCTTTTAAAATATGG +TTCTTGAATATCTTTTCAGTGGTCACGAAAGTACTTTCTTCTACTTCCATGCGATACTGT +TTAACCCACATATCATGCAAATCTTTAACCCGTAGCTTTTTTTGTTCAACTGGTTTATAC +TCACCTTTAAGAACTTTCAATTTATAATTCATAAAACACTCTAAGGCTTTATCTCGACTG +GTAAAGCCTTGTTTTTTTATCTGGATTGTATGCCCTGTTTTTTCATCCTTACCAGCATAA +ATTAGAAACTTATATCTTTTCTTACCACTTGGTGTGGTATATTCCTTAATTTCATCATTC +ATGTTAAATTTACCTTTCACTAATAAAAGGCGTACTGATTGGCTGATATTAAACATTTAA +CAAAACTGTGGAAGTTATAGGAATAATTCAAATGATTAGCTATTAGTAAAGATGATTTAA +ATGGTCTTAGAATTGCAATCTCAGCATTAACAAGGGAACTCATAAGTTACCATGTACATT +TACACTTTAAGGTGTATTTGTAACTACACGTTGATAGCCTGTACTTTTAGGTACTCCCTA +CCAGCATATGTAAAAGCATGGAGGGATGTCCGTAACTGTAGTTACACTCATAGATTATAC +ATATGCAGTTACAACTGCGGTTGTTAATTCCTAAAGGGATGCTAGCAAGCCAAATTATTA +TTTAACCACCACCCTGATTCAGGTTACTGGTTTGAGGGATGAACTAAGCGCTCAGCCCCT +GCACTACCGAACTAGTACCGCAGATTAGGGATGCTAGTTAAACACGTCCACAGGGAGTGT +TATATATATTTTTGATTTATTGCAATTTGTCCTATTTCATTTTTAGCATTAGTTAACGCA +TCAATTATTTTATTATATGCTTGCTCATCCATATCCTTACGAACTTTAAATGACTGCACT +TTTAAACCGTTTTTCTCTTCTGGTGGCTCTAAAAAAGCTCTATTAACATTACCTATTAAA +CGCGTTAGCTGAAACACTATTTCTTTTTGTACTCCCTGCGTAGTCGAATAAGAAGCACCA +TCTAAACTTTTAACTGCTTGGTCTATTTGATGCTGAACATCGCTAGAAGCGTCTAATCTA +TCGGTATCTATTAGCCTTTTGATTTCATCTTTTATTTGTTTAACTGAGTAGCCCGTGTTT +TTAGCCCACTCATCCCAACCGTTAATATCATTAGATAAACCCATCAAATAGGAAGTCGGA +ACTTGAAAGAACATTGCTAGCTTATCCCAAGTTTCTATTTTTGGCTCTCGCTTTCCACGT +TCATATTGGCTAATAGTTACATATGACATATTTACTTGTTCACTCATATCTCTTAATGAC +AAGCCCTTTGCTTGTCTTAATTTTTTAATTCTATTCATGCTTTCACTTCCATTCTTTCAC +TAATCTTAACATAATGTATTCGTTTTGAAAAAATATTTGCGTTTTCATTTTGAATACTTT +ATCATGTATTCATAAAGAATACAAAAAAGAAGGCGAAAATATGCAACTAACAGAAAACCA +AGTAACAGCGATTAAGCGAAAACGTGGCGAACTTGACATGTCTATTACAGTTTTAGCCAA +TACTACCAAGGTTAGTAAACGTACTTTAATAGATATTTTCAAACACAACCACAGAAATGT +GACTAAAAACACGTTTAAGAAGCTAAATGATTGGCTGATTGATGAATATGAAAGGAAGCA +ATAGCATGAAGCGAATTTATAGAGCATTCAAAGACTGGCTATATTTACCAGATAGCAATG +AAGCCGAATTATACAACTTGCTACTTTTCGGCGTTCTATTCTTAAGTGCAATTGTAGCAA +TCGTAAGTTTTACCAGAACACTTGCAATGTTTTTGGCATTGATTAGATAAGAATGGAGTT +TTAGAAATGATGAAGAAGATGAATATTGTTTATCTACCACTGGTAGAACAAGCTTATCAG +ATGATGAAAAAGCTTAATAAGAACGTATCTAAGCAAGAGATTTACAAGAAACTAATTGAA +ACTAATATGATTGACCAGCAAGGTAATCCTACCAAGTGGGCGCTTGATAATGGGCTAGTT +AGTGAATTTAACACAATTGAGGAAGCAAGAAATAAGCTTAATCAAATAAGCCCACAAAAA +ATAGAAGACCAAGTGGACGCAGATATAAACAACGTATTTAGTCGCGTGCCAGTTAGTGCC +TTTAGATGGAAGAATGAACACGATGGATATGCAATTGATAGTGCTGAACTTAAAAAGGCT +ATTCTATCAGCATTAAAAGATGGTAGCTTATCCCCTATCGGTCGTAAGCACTGGCTAGAA +GTATTAGCAGATATAAATTCACAGGAGAATTAAGAACAATGAAATATATTGAAGTTAAAA +CCTTACCTATTAATGAAGATGAAGTAATCACATTAGCAATGAATGCAAACAATCTTACCG +CTGGACTTGACTACCTTGAAAGCATGGCTTTAAACCTAGAGGGCAAAAACCAGCAAGAAA +TAGAACAACTATCAGGATTAATAGCAGGTATGAAAGAATTATCAGCAAAGAACGCTAGAA +TTTTAGCAAATATCAATCTTTAGGAGGTTACACAATGGATATTAAGCTAGACGCATCCCA +AGAACAGGATTTAAGCAATCACATTATTAATTTAGTGAAAGACAGCGTAAAACAAGCAGT +AAACAATGAAAGCAAGCCCTATTTAAATCGTAAAGAGATAGCTAAGTATTTTGGAGTTGC +TGACAGTACAATAAGTTACTGGGCTACTTTAGGGATGCCTGTTGCAGTTATTGATGGACG +CAAGTTATACGGTAAGCAAAGTATAACTAACTGGCTTAAGAGCAAAGAAAAAGCCGTGAA +AAGCTGAGACAATTCACGACATAAAAAATAAAAACACAATTAAATATTAACACATATAAG +TGTACTGGTTGGCTGATATTAAATATTTAATATTAAAAAAAGAAAAATGGAGGTTAAAAA +TGAACAGCAATTCAGACTGGGCTCTTAACTATGCTAAACAGGGCTTTTCTGTCGTCCCTA +CTAACCCAAGTAATGTAAAAATACCCGCGATAGAACACGCAGGAAAACCACCGCTAACTC +AGGAAGAAATAAAACAGCTATGGACTGAAGAACCTAATTATGGCATTGCTCTAAAAATGA +CTAATATTTTTAGCATTGACGTAGATACTCCACAGCATGCAGGAACAACTAAAATAGACG +GCTTTAAGTCGTTAAAAGAGTGCATCCCGTCAGAATGGTTACCAGATACGTTAAACGCAT +TTACCCCAAGCGGTGGGATGCACTTTTATTATATGAAAGTAAACGGCTTGCCTAATAAAT +CAGGTGCAGCAATCATCCCTGGCGTAGACGTTCAAGCATCCCCTAATAGTATCTCTGTAG +TACCACCAACTAAGCGCCAAGATGGAATTTATGAATGGAATCTAGTACCTGGGTCTAAAA +ATCCCGTTGCTATTCCACCTAGAGAATTAATTGATTTTATTCAAGAGAAAACAAACCAAA +ACGCATCAAAGCCTATTCTTTTTAAAACATTCAAAGCTAAAAATTACGCAGGCAAGTTAT +TAGACGCTTTATGCTTTCAACAAGTCAAGGGACAACGTAATAGCTATTTAACTTCATTAA +TTGGCAAAATGCTATTCTGTGGTGCTGAAGAAGAAAACTGCTACACCCTTGCAATGTTTG +CCAATAGTCAATTTCAAGAACCATTACCAGAAAAAGAAGTTACTTCCATTTTTAACAGCA +TATTAAGGAAGGAACTAGCTAATGAAAAATAAAAAAGTAAAAATGAATAAAGAAACAAAA +GAGCTAATTAATTCTCACAAGAAACAAAAAGAAAAAGCGCCTAAATTGCCAGCTTGGTGT +TATGTAGATGACAATGGCAATATAAAGGTCAATATTGGATCACTCGGCGAATTAATACAG +AAAGAACGCAATTACTTATTCATTATTAATAAGGATAAGGAAACACTTTACGAATATAAT +TATAAGCTTGGTTATTGGTTACCTATCTCTAAAGGATCAATTTCAAAAGCTATTCATGAT +AAATTAACGTCGGTTGGTAAATGGACGTCACAAAATCAAAGAAAAACCTATGAATTTATA +AATAGTGGTATTCCAAGAAAACAGTTCCAAGATACAATCGGACACTCGCCAGAAATGGCA +TTTAATTTTCTTAACGGCGTTTACAACTGGTCAACGGGCAAATTAGAACCCCATAATAAA +AAATATTACTTTGAGGGATGCACCAGCTACCCACTAGACATGGCAAACAATCAAACTTCT +GAAACTAATAAATATTTTAAGTTGCTTTTAGGCGAAAATGCTAAAACCATGATGGAATTT +ATTGGTTACTCATTTTACCCAAGTTATGAACCTATTCAATGCATCGTGATATTGAAGAAT +GAAGGTGGAGACGGTAAAACTTGGTTTACAAACCATGTTATAAATAAAATGCTTGGAATA +AATAATGTTTCTAATATTAGTTTAAATCAATTAGCGGACGTAAAAAATAACAAATTCAAA +CCAGCGGAACTATTTCACAAATATGTAAATGTTTCCAGCGAATTGAGTGAAAGCGAAAGC +TCATTATTACCTACTGACTCTTTAAAAAAGTTATCTGGTAATGACTATATAAATGTAGAT +AATAAAGGACAACGTGACACACGCTTTCAAAATTACGCAAAGTTATTAATTATAACTAAC +ACCCTTGTTCATTTTAGAGATGATAGTGATGGCTTTAGTCGTCGTGTTTATATTATGCCA +GCTCATAAAATACCAGACTTTGAAAACACTATAGATGTTAGGAAAATGGAGGCAGAACGT +GGAGCATTTGCGTATAAGTGCATCGAATTAGCTAAAGACGCAATGAAGCGCAAGCCTGTT +AAAGGGAAAAGATACCTTACTAAAACCAACAGTATTAGCCGACTAGTAAACAACTGGATG +CTAGACAATGACCCAGTGCAACAGTTTATTAATGATTGCGTTACCAAAGCACAAGGTAAA +AGAGAAAAAGCGCTAGATGTAATTCAAGCTTGGAAAAATTGGTGTAGTGATAACGATTAT +AAGCCGTTAGGCAAAACCAACTTTAAAAATAAAATGATTAAAAAGGGGTTTAATTATCAT +GAAGAGAGAAAACGCGGTTCAGATGGCAAGCAACTACCGAGAAAGTATTATTTTACTAAT +ATGACGCTTAACGAAAATGCTAACCCAGAGGGAAAAATAAACCCAGTTACTAATAACTAT +GACTTTAATAAATAGAAAAAAACTGAAAAATTGGATTTATGCCACTTTTTAAAATCGGGT +TGAGCCCTAGGGGCTGTAGTAATATGCCACTTATCCACTTTTTTTATTAATAAAAATTAA +AAATATAAATATATATAATACACATAATATATATTTATTACGTAAAAATATATTACACAG +AAACCCAATGGAAAGAAAAAAAGTGGCTTTTCAGCGCATCCCTTGTGGCTCTAAGCATGA +ATCCACTTTTAAAAACTGGATCAAACTGGAACATAACCACTTTTTGCAATTAATTTAAGG +AATTTACAGAATGAAACAAGATAGATTAAAATACGAGCTTAAAAAATACCTAAATAGTAA +ACTGACCTACCCACTACACACTGTGGAAGATATTAGCATAGAAAGAATATTAAAGGTTTA +TGGCAGCGAAAAGCCTAGCGGTTATGTAATTCAACTAATAATAACTGGGCAAGTAGGTAC +AATACGAAAGGATAAATAGAATGGTAGACAAAAATCTTGAACATACCGTGTTCTTTCACA +GCATGGACAGCTTGAACGAATGGCAAAGAAAATACTACAAAAAGATAATTATTACTGAAC +GCATGGAGACAGCTAATAACGATTACTTAATTACTTTCTTTTTAAATCGGAAACATTTAA +GCGTAAAGCCTAGCTATACCGACCAGCTTATGAATATTGCACTGGATGCGCAAACTATAG +ACTACTATCACAACGGAGTTAACATAGTAGATTTACTTTAACGCTGGTATTATAATATAG +ACATACATCCATATAAGAAGAGATCAGTAGCTTAATTGTCTGGCTGCTGGTCTTTTTTTG +TTCGCTCTCATCCCTACCAGATGGCTTATATTAGCTTGCTGGCTAGGCTAATTGCGGTTC +TCTTATGATTTATCATTTACACCTGTGGAGGTCTCACGCTGGCTTATATGCTTGTCTGCT +TGGTCTTCTGGCTGTCCTGTCTTTTCGTTTTTGCTTTCTCTTTCACTGGTTTCATGCCTT +GAGTTTCAGCAAGCTCAGCTAAAGAATTTAAATTAGCAACTAATTAATTAAGAAAGCTAG +CCGTCCATACAATGGCTCTCTCAGCATATCGAGGGATGCGTACGAGCGTAGTTACAACTA +CAGACATGCTAGCACTAATAGATTATTCATACCTGTCTGCTTTATGGTCACTCAATGTAA +CCGCCGAAATAACTCGTTATCAATCTAATTGATTTAGAAAATTAAACCGCAATCAAAAAT +AAGCAATGAATGAATTTAATTTCACTCACTGTTTTTTTATTTTGCACCCCGCCCTACCTT +TCAACGTTGCAGAGCCACACATTGCTGTCGCCTCGTGTGAAACATCAGTTGTTTCACAAT +TAATTGAGGGGGGTTATTTATAAACCTTATTACTACAGTATTTATATATGTATTCCTGCT +ATTTCTAGACGGAAAAAAAAGGCTATTCAAACCTTTCTGGAATGAATAACCTTGACTGAA +TTGATTCTTACTAGTTTTTATTATGCCGCTTCTTTATATTCGTGCAAGTCTAGAACATCT +TTTACACCCTGTTTTAACTGTTCCATCAGATAAACATCGGCAAACTGCAAGAGCGCCCTT +TTACGCAATACATTATATTGAGTGTGACCATATCCAAGGCAATTAGCAATGTCCCAGCTA +GCTAAATGCTTAATATATCTAAGCTCTAGAATGCTCCTGTATGGCTTAGTTGGTTCATTG +GTGCAATTGCCTATGGCTGGCTTTAAACACGCTAGACGGTCGTTATATGCAATTATTAAG +TTATATCTGCTCGCCACTGCATCGGGATTAATTGAACTATTTACCTTGATATTATCAAAA +TTCAAGCCGTGGACGTTGTACCGACCAGCGCACATTGTCTCATACCATTCTAAATCCTTA +GCAAAAAATTTCTCCACGTTTTTAACTGTTTCTGGTTCATTGATTTCTAGCATACCTAGC +AACTCCTATATTTATATATTATTGGAATATCTAACATAATTATATTTTACTATGTTTTAC +CAGCTGGCTTATTAACTGCAAAATAAAAGAGCTACCTTAAAAAGGCAACTCAAGCCGACT +AATTAAATATCTAAAATCAACCAATCAATACGCTATTACACTTTTAAGAATATGAAGCCT +AATATGAAGCCCGAAAATACTATTTTTTATTAATTTGATTTAATCATAGAAAACAAAAAG +CCTTTAATATCAACGTTTCTAATCCATTGACATTAAAGACTTTTAAATTTATGGAGATGA +GGGGAATTGAACCCCTGTCCAAACGTATTCCGTCGTTAACCTCTACGATCATAGTTATAT +TACTTAAACTTCGCTTTGATAAAACGCCATATAACAGGGCTAAACTATCAAAACTAACCT +AAGAATCTTATTTCTAACTATTAAGGTAAGTAGTTAAACGTAGCTCGTTAAATGGTAAGA +CCCGCAGACAGACACGAGCAATCCGTCATTGGATCACGCAGCCTGACTAAGCAGCTACTG +CGTAAGAATTTTCGTTATTTGCAGTTATAATTTAACTGTGACGTTTTAACGTAGACGTGA +CCTACGAATCGCAGTCAACGCGAATCTACGCCTGTCGAATCCCAAAACATCCCCAGATGT +TTCAATGATATCACAGTCACTGTTAAAATGCTATAACTTTGCTTTCACAGCTATTTATCT +AATTTAGCTAAACGAACAACGTCACGTGCAATCATTAACTCTTCATTTGTTGGAACGATC +ATCGCAGTAATCTTTGATTTCGGTGTAGTGATAATTCCTTCCTTGTTAGCTTTATTAGCT +TTTTCGTCATATTCAAGACCAAGCCAAGTAAGACCATCCATAATCTGTTTTCTTACACTT +GCATCATGTTCACCGATTCCTGCAGTAAAGACTAAAACATCTAAGCCACCCATTTCAGTC +ATATAAGCCCCAATATAGCGAACAATTCGGTTAATAAAAATGTCTTTTGTAAGTTTAGCC +TGTTTATCACCATTTTTAATTGCTTTTTCAATGTCTCTCATATCTGGGGAGATTCCTGAA +AGACCGAGTAAACCAGATTTAGTGTTTAACATCTTAATAATTTCATTAAAGCTGGTAATG +TTGCCTTTTTTCATAATAAATTGAAGCAAGGAAGGATCTACATCCCCACTTCTAGTACTC +ATTGTAATTCCTGCAACCGGACTGAATCCCATTGAAGTATCAAAAGACTTTCCATCTTTA +ATAGCAGTAACTGATGCACCACTACCTAGATGACAAAGAACCATCTTCAAATCTTCAACT +GGCTTCTTTAATAAGTCAGCTGTCCGACGTGATACATAGCGAGCAGAAGTACCATGTGCA +CCATATTTTCTAGCACGGAACTTTTCATAATACTTATATGGTACTGAATATAAGTATTGA +ACAGGATCTAATGATTGGTGAAAAGAAGTATCAAAAACAGCAACCTCAGGAACGTTAGGT +AAAACTTTCATAAAAGCATAGATACCGTCGGCTTCAGCTGGGTTATGCAATGGTGCATAG +TCACTCATATTATAAATCTTCCAAAGATTATCATCTGTAATGACTGTACTATCAGTAAAT +TCTTCACCACCAGCAACAACACGATGTCCTACCCCAGCAATATCTGCTAAAGAATCGATC +ACATTGTATTCTTTAAGCCAGCTAAGCAACTTTGGGACTGCTTCTTCTTGATTAGCAATA +TCACTTTGTTCATCATGTTGACTGCCATCTGCTAAAGTCATTGTGAAAACAGATCCTGGC +AAACCAACACGGTCAGCCATACCAGATGCAATTACTTTTTCATTATCTAGAGAAAATAAT +TTGTATTTAAATGATGAACTACCTGAGTTTACTGCTAAAACTTTTTTCATTTTATATACC +TCTCGTATTATATAAATTAATCTTCACTCTTATGATACCAGTCATTTAGTTTCATATTAA +ATGCTACTAAAGATTCTGGTTTCTTTAATGAATCTAATTTAGCCACCAGAACCTCACGCT +TAACAGCATGTTCTCCATGATTTTGAAAAACTAAAATAGATTTTTGTTGAATCTGACTTG +AAAACATATCATCGGGCAAATCGACAATTGCCTGAATATTAACTTTTTTAGCTAACCAAG +TCATGAATTCAGTAGATCCTTTACCAGTAAACAGTAATCTAGGTACTACTAAAAATGCAA +AACCATCACGTTTAAGGTTATTTACTATTTGTTCAATAAATAAAGTATGAGCAAAAGAAT +GTCCCTCTTTTGCGTGATTTTCGAAGCGCTCAGCATTATTATCCAATGGATAATAACCTA +CTGGCACATCACTTACCACTATATCCGCCTTTTCAATCATCCATGGATCTAAAGCATCTT +GACAGTATAAATCAATTTTTAAATCTTCAAGATGAGCACCAATATCTGCTAAATCCAATA +ATGCTTCTTCATTATCAATTCCAATTAATTTGTAATTGTTTTGTGAATGATTTTCCTGAA +TCAACTGTCTAATAACCGAATAAAGTAAATTTCCAGTTCCAATTGCGGGGTCAACCACAG +TTTTTTTACCTTTAGAAACGATTCTTTGCCAAATTAAAGCAATTATTGTAGCAATCACTG +AAGGTGTTGGCATCAAGTTATAGTCACTGGCATCTTGAGTAACTGCTTTTAAAGTTAATA +GAGTAAATATTTGTACTTTTAGAGCACGTGGCAAATTATCATAATCTAATTGACGATACT +CTTCAGTTAATTCAGCTACAGTTTCTTTATCTGGGGCACCCGATTCGACTTTGATCTTAC +CATTTTCTAGATTATCAAAGGTTTCCGTTAATGCCGATGAAAAAGAGACATTCAGAGCTT +TTTGTAAATGCTCAATAGCCTTCTGAAACTTTGGATATAATTCTTCTACTTTTTGCATTT +TTCTTCCTTCTTTAATATTCCTTCTAATATTTTATAGGTATCATCAAATACTAGCAAGCA +AACAATGTTACTTGGAAAAATAATTTATTAAAGAAAGATTACTTTCCATATTTTCAGTAA +AACTATGTTGATACATCATTACGAAAGCAATACAAATATTAATAAGCAGTAAACTACTAA +TTAAGGCACTTCCCTTAAGTCTTTTTACTATTATCTTTTTTCTCATCCTTTATTTTATCT +TCCTTCTTCTCTGGTAAATCTGTTTTGAAAAACATCTCACTTTGCCTACCATCTTTTTCA +GTTACTAAAATAGTAAAAGAATCTTTTGCAGTAGAACATTTTATTCTTTTAACTTGAAAA +AGCAGTGGCATGTGCCCTCCCTCATCTGTTCTCATTCTCAAAACATCATTTGAACTCATA +TCAATTACATATGTATCGTAAATTAAATCACCATTCTTTTTCTTCTTATCCGATAACTTT +CTAAGAACAATTTTAGTTGGATCTGAACCAGTTGTACTTACTTCAACATGTCCACTTTCT +TTTAAGAAATTATTTATTTGTACATAACTATACGTAATCTCATTAACTCCGGTTTTTTGG +CGATTAATTTTTCTAAGTGTACCTACAAGTCCAAATAATATTTGTGCACATAAAATAGTA +ACTGCGATTGCAAACACAGCTTCTAATAAGGTAAAGCCCTTTATCTTTTTCATTATTCTT +ACCTACGATAGAGATGATCATGCACTAATAACGTTTTTTTATCACTATGTCTAAAAATAT +ATTCCGCATATGCCTTATCTACTCGTTTTTCAATTTGTTTCTCAGTCTGCCTATTTTGTT +TAATTGTCATCCCTAAAAGCAATACTCCTAGACAAGCTATCGTAAAGCCTACACATGCTT +CCCACAAAATGAAGCCCTTTATTTTTCTATTCTTCAACCATCCTACCCCATGTCATCTGT +ATTTTTATCTTTTTCTCTTTACTTCCACTCTTAACAGTAATTGTTCTTGGCGATATCGTT +CCGCGATTGCTAATGTATAAATTATTAAAATTATAAACTCTAACATGTCGATTCAAATAT +ACTTTTGATACTTGATGTGGCTCCATTATCTGAATTGAAGAATTATTGTCAAAATATGAT +AAAAAATATGACTTCTTTTCTACTGTTGAACGGTGTAAATACTTATTTATAGTTGATCTA +ATTTGACGCGTAGTACTATCCAAAATTATTTGTTCCTTAATATTTTTCACGTAAAGCGTT +GGCATTAACACAAGTAAACAAACTATTGCTAAAGTAACTATCGTTTCAACTAAAGTAAAA +GCCTTTATTCTCTTAAAAATCATTTTTGTGGTAATTTTTCATATCTCTCTTTTTGTTTTT +GAGAAATATATTCTTTCTTTACCAGCTCTTCTAATGAAGTACCATCTCCATTATCTGCAG +CTAATTGTCTTTGAGTCTCAACCGTTGTTCTTAAGGCTTCATCTGTTTTACTAGTTGCTC +TCTCTTTTTGCTTACTTAGTCCTGGAACAATTAGCAAAATCAACATCGCAATAATGGCAA +TTACAATTACCATTTCAACTAAGGTAAATCCTTTTACCCGTTTATTTTTCATAATAAATT +GTTTTAACTTTTTCATTTTTCCCTCTACATTGTTTCCATTAAATGATACATTGGCATTAA +AATTTTCAAATACATCCCCAGAATACAAATTCCAATAAAAATAAAGCATAGTGGCTGCAA +ATTAACGATTAAACCATTTAATTTGAGATTTAGTTCATAAAAAAGTGTCTTGCTAAGTAA +TAGCGCTCTTTTCCCAATTTGCTCTCTTGTTGTTCCGGTTTCTAAAAGCATAACTAAGTT +ATTAGAAATAAAGGCTTCATTCTCAATAATTGTTTTAATCTCAGTTCCTTTTTCTAACTG +ATTCTTCACTTTTGTTCCTAAGACTTGCTGTAATGAGTTCTTTGGTTGCTGATCAGTAAG +TTGGCAAATTTTTTGTAATGAAAAGCCGTTGATTAATAAAACAGCTAAGTCAGACACGAT +AAGATAGTGAACATATAAATTTAATGTTGGTCCAATTACAGGTATCCTAGTTAATTTCTT +CAGTGCATAATAATCTTGCCTATTAAGTAGCCTGATTACCTTACTAATAAAAAATGCACC +TATAATAACTAACATCACAAGTCCACCCAGCATTACATTACTCGTCCAGTCATTATCTGA +CATTTCTGTCTTTAAAAAAGTTTGCATACAAATTAATAGCGTAACCATCATTCCAATTAA +AAGTGCTGGATAAGCTAATTCTCCTCGCAGCTTCTTTAACTGCTTATTTTTCAAGCGAAT +AAGTTTACTTAATTGGTCCAAACAAGACAATAAATTGCCATCAATAATTGCCATATTTAT +TTGTGCAGCTAAAGTACTAGAAAATCCTACTTCATGCAATACATCCCCAAGTTGCCTACC +TTCTTCAACCTGTTTACTTATATAAACTAATTGATTAGAATCACCACTCCATATTTTAGG +CAGCAATTTTAAACTAGCATTAAGCGAATAACCGTTAATCAGTGCTTGTCTTAAATAATC +AATAAAAATTAATTGAGCAGCGCTATTTAGCTTATCCTTCTTCATAAAGATGCTTCGTAT +CTTCACTAATCTTTCCTCTTTCAACCAACTGCTCTAAAGCACTTTGCCAGTTGCGCAAAT +CATGACGCTTATCATTAGACATAATAGCTTGATCAAGTATTTTTCCATATCCAATATCCA +TTAAACAGGCAACTTTATTTCGATCTTTTATCGGTAACAATCTTTGATAAGAAGCAGCCG +TCAAGCAATTGCACAATTCGTCTTTAGTGATTCCTAATCCTTCAAGCCGTGAAATTGTTT +GTAAAGCTGTTTTTGCATGAATAGTTGCTAAAACCAAATGTCCACTTAGAGCTGCGTTAA +TACTGATTTTGGCAGTTTCTTGGTCTCGAATTTCTCCAATAATTAAAATATCCGGTCTAT +GACGTAAAGCAGCCTTTAGTAAATCTGGGTAAGTAATTCCCGCAACAAGATTCACTTGTG +TTTGTAAAAAACTTGGATTCCAAACCTCGACTGGATCTTCAATAGTCATTACAACCTTTT +TTTCTCCAACTAATTGCGCTAACTCATACATGGTTGAGGTCTTTCCTGATCCTGTCGGAC +CGCTAGTCACTATCAGCCCACGTTGATTGGTGAGTCTTTTTAGCAAATCAAATTGTTCGG +GGAGAAAATAATTATTACTTTTCCCTTCATAAATTAATCGAACTACTAAAGACTCATCTC +CTTCAAATTCCCCTACACTTGAGAAGCGTAAAAAGACCTTTTCACCCCGAAATTCAGTCT +GATACGCCCCCACTTGTGGTCTACGACGCTCAGATATATCCATTTGCGCCTGAAACTTGA +AATAATTAAGTAATTCCTTTCCTACTTTCTGGCTTAATTCTTTTATTTTACTCAAACCTA +GGGCAGTTCGAATCTTTATTTCATATCCTCTAATAGCGGGAAAAATAAAGATATCACTAG +CATGGCTTTCAATTGCTTTTTCTAAAATAGCTTCTGATACTTCGTTCATAATTAATCCCT +CCTTTGCCATACACTTATTAGTACGCGAAAATAGGCCATATTTTTTATCATTTTAAAAAT +TAAGAAAAAAAAGACTGACACAAAGGTCAGCCATAAAAAATAGACCTAACAAAATTAGGT +CTACTTCTTGATAAATTATTATTCATCATCATCTGCAGCAGCAGTATAAACATTAGAAAC +ATCATCGTTATCTTCTAATGCATCAATTAAATGAGTAAATTGCTCTTTTTTATCTGCTGG +AACTGGAGTGGTGTTTTGCGGAATCATAGTTAATTCAGCGTTAGCTAACTTGTAGCCAGC +TTTTTCTAAAGCATCACGTACAGCAGTAAATTGCTTAGGATCAGTGTAGATTTCGAATGC +ATCATCACTAGTTTCTAAATCATCCCCACCAGCATCCATAACATCAAGTAAAACTTGGTC +TTCATCAGCATCAGTAGTGGAACGATCAATTACGATGTAACCCTTACGGTCAAACATGTA +AGCAACGGAACCAGTTGCACCGAGTGAACCACCATTACGAGTAAAGGCAACACGGACATC +AGAAGCAGTACGGTTCTTGTTATCAGTTAAAGCTTCAACTAAAACTGCAACACCACCTGG +TGCATAGCCTTCATAGGTAATTTCATCATAATGTTCATCTGAATTACCTTCAGCCTTCTT +AATAGCACGTTCAATGTTAGTCTTAGGCATGTTTGCTGCACGTGCTTTATCCATAACCAT +ACGTAAAGTAGGATTTCCTGAAGGGTCAGGACCACCACTCTTTGCAGCCATATAAATTTC +ACGAGATAATTTTTGGAAAACTTTACCTCTCTTAGCGTCTTGCGCATTCTTGCGGCCTTG +AATATTGTGCCATTTTGAATGTCCTGACATAAGAATCCTTCTTTCTTTTTATTTAATATA +GTTAACCTTGTCAATCATAACGCACTCAAGGTTTAAATTCAATCTATGATTAATTAGGAA +TTGCTTTTTTCTTGATGGTGTCTAATCCAGTAAACAAAAATACAGAGTAAAATAGCTAAA +AGCGCTCCAGCTGAATCAAGAGCAACATCGTGAAGACTTGGGGTTCTATCTCCTGTCAAA +TATTGATGAAATTCATCTAATCCAGCTAATCCAATAATTCCTAACCAGATAAACAAAGGT +CCACACCATTTTTTAAATAAACGATCTAATCCTAAACAAGCAAATAAACCAACTAAAAAA +TACGATGAAAAATGGGCAAATTTTCTCATGACAAATTGTGTCATTCCAGCTTCCCCATTG +TCTAAAAATGCATTATGCCAGCGACCAGCATAATAAATATTCCATTTTCCAACTATTCTT +TCAATAATCGGAAAATGTCGATGAATAAATCCTGGTGACATCTCTTGTTCGTGATAAGTC +ATCGAACTGGAAATAAATAAACCTAAAAGCACTAAGAGCATCAAACAGACAAAGATCATT +TCTCGCTTAGTTAATACTGTTTTTTTCATTTTTCAACTTTCCTACTTTCAAGTTCTTCAG +CAACCAAAACAATTATTTTTTTAGCAATTTCAACTTTAGTTGTTTCTGTAATATTTTCAG +TTGTTTTATCTTGCCTTAAAACCATGACTTTATCTTCATCACTACCAAATACTCCATGAC +TGACATCATTTGCAACAATCATGTCAGCTTTTTTCTCTTGCAATTTTTTCGAAGCATTTT +CTAATAAGTTATTTGTTTCAGCAGCAAAACCAACAACAACTTGATTCGCTTTTTTTATAC +TTCCCATTTTTTTCAAAATATCAGGAGTTTCTTTCAAATAAATTTTTAACTCATCCCCCT +GATCCTGCTTTTTGATTTTATGAGCAATATAATTTACCGGTTCATAATCTGCTACAGCTG +CTGCCATAATTAAAGCATCTGCACCTAAAAAAGCTGTCTTAACTGCTGAGAGCATCTGTT +CTGTAGTTTCTACCTTAATGTTTTTTAAACTAGGCGAATTAGGTAATGAGACAGCAATAT +GACCCGAAATTAAAATCACCTCGGCTCCAGCAACAAGAGCAGCTTTAGCTAAAGCTATTC +CCATCTTACCACTTGAACGGTTACCTAAAAAACGAACCGGATCAAGCGGAGATATTGTTC +CACCTGCAGTAATTACAATTTTTTTACCAGCTAATATTTGTTTAGCTTGAAAAGAATCAT +CTATCCAAGCCATAATGTTATCTGGCTCTGGCATCCTTCCCTTACCACTATATCCTTCAG +CTAAGCGCCCAGTAGCAGAATCCATTACCGCAATTCCATCTTGTTTTAATAAGGCTAGAT +TTCTTTGAAAGGCTGGATTACTCCACATATGACTATTCATAGCTGGAACTACATATTTAG +GTGAAGCTGTTGCTAGGAAAGTTGTACTTGCCGCATCATCAGCAATACCATTTGCAATTT +TAGCAATAAAATTAGCCGTTGCTGGAACTACAACAGCAATTTCTGTCCAATCGGCTAATT +CAATGTGCTGAATCTGATCAGCTCTTTCTTTTTCCCAGAGATCAGTCAACACTGGATACT +TAGTTAAAGCAGCTAATGTTTGCGTACCGATTAAATGGACAGCTTCTTTTGTCATAACTA +CTCTTACTTCATGTCCCTCTTTTTGAAAATTTCGTACAACGTTAATTGCTTTATAGGCGG +CAATACTACCTGTAATATAAACTGTAGCTTTCATTTTTCTCAACCTTAATTTCATAATTT +CTTTCTACTATAACACAAGCTAACTAAGACAAATTATTAAAAAATAAATTATCAAATTAA +AAATCATTAACGGATTCTGATATGATTTATATTGAATAAAGATATCGATCACATACCTTG +AAAGGATAAATATACAAAATGAAAAAATATTATCAAAAGATTTTTTCTCTCTTTATTGTA +ATGGGGGCTTTATTTCTAGTTCTTACTGGCTGCTCCAAAAAAGAAAATTCATCGTCTAAC +AAAATCTCGATTGTTACCAGCACCAATGTTTATGCTAATATTGCACAAAATGTTTTAGGT +AAGTACGGCAAAGCAACAGCCATTATTACTAGTAGTTCCACCGATCCCCACGATTTTGAA +CCAACTACTGCAGATGCAAAAAAAGTTCAAGATGCCAAAATTGTTGTAGCTAATGGTTTA +GGATATGATTCCTGGCTAGCAAAACTGGCTAAATCAAGTAATAAGTCTGCTGTTTTAGTT +GGTGAAGACTTAATGAATTTGAAAAGCGGTGATAATCCTCATATCTGGTTTGACTTAAAT +ATGCCAACAAAATATGTCAACTATTTGGTAAAGCGCTTATCTAAAATTGATAAAAAACAC +GCAAACTATTATAAAGAGAATGGAATTAAATATTTAGCAAAAATCAAAAAAGTCAAAAAA +ATAGCTGATTCAATTGATGGTACAAAACAAAAACCAGTATATGTTAGTGAACCAGTATTC +GACTATGCTTTGAAAGCCACCCACTTTAAGATTGGCGATAAAGATTTTGAAGAGGCAATC +GAAAATGAAACTGATCCGAGTGCCAAAATTGTTCATCAAATGAATCAAACTATTAATAAT +CGTGGTATCTCTTTCTTTGTTAAAAACTCCCAAGTAAGTAGTTCTACCGTTAACAATTTC +GTTAAAAGAGCCAAATCAAAGAAGATTCCAATCTTGCAAGTTCGTGAAACTATTCCAAAC +AATACAACTTATTTGAAATGGATGACAGAAAATTATCAAAATTTAGCAAATATCGCTAAA +AAGTTAAAATAATTACCAAAAAAGAGGCATGAGCCTCTTTTTTATTACCCTAAAATATCC +TTTAATTTGTTATTAAATTGCTTAGGAATTTCTGCCATAATTACGTGACCAGTTGGTGAC +ATAATAAAACTATCTACTTGATCATTTTTTTCTTCAAGCAACTTTTCATATCCTTCACGA +TAATACGGACTCTCCTTAGCAATAAAAAAAGTAATCGGAATTGTTGCGTTTTCTATAACT +TTTCGCCAATCTAAGGACATATGATTTTCTAACAAGTCCACATTATCTTTTCTATTAAAG +GGGTTTTCTTCTTTTGCTTTCATAAGTTTGGCATAAACACCATCATCTAGTCCTGCGAAA +GTTTCATGAACATGGGGTCTTTCCTGACATTTTTTATGATAATTTTCACTTGTATAGTCC +ATAAAGCCATACTTCCAATTCTTATCATTAAGCATCTTAGGTGATTGATCTACCACTAAT +GCACGCTTAACAACAGTTGGCCATCTTTTTATTAGACAGAAAATAATCGAAGCGCCCATT +GAATGTCCCATGAAAATAGCCTGTTTAATTTTCAGAAAAGAAATTAATTCTTCTAAATCT +TGTGTTAAACGAACAATATTATGTCCTCTTTCTGTGCGTTGGCTCTTCCCCATATTTCGG +TGGTCATAAGTAAGTACTTGATACCCTAAATTATTAAGAAATGGTACTTGAGCAGACCAT +ATTTCTTGATATGCACCAAAACCATTTACTAGGACAATCGTCTTCCCTTTCCCAGTTAAT +TCATAATTAATTTTTACATTATCACTTGTTTCAAACAGCATTTTTACTCCTTATTCTGTT +TAATTCTAATGAATAAACATCCAACTGCAGCAATAATCGCAATAATCAAAGCTTGACTTA +ATCCACTACTTAAGCTTACGTTGATCCCGTCATAAACGCTTTGCTGGAGCGAGCTTGTGG +CAATTCCAGCCGCGTATGATAAAACACCATATGAACCAATTGCAGCAATCATAAGCGGTA +AAATTGCAATCTTTTTACCAAATAAAACCATTACCATTGCCAAAATGTAGAGACCAATTA +GTAGGAAATAAATCAAATGATAAATCTCAGCTGCTTGTTTTACATCCGCAGAATTCTGTC +TTAATTGTTCATTTACTAATTGCAATATTAACTGATTAAAAACTTGTTCTTGTTGCGTAT +CATTATTTAGTTTTAGATCAGATGCAGCTAATTTACCATTGGCTAAATAATTAGTACTCA +ATTGATTTAGCCCTTGATATGATGTTTTTAATTCAATATTCTTGGGTAATTGTGATAAAA +GCGTATCTTCGATTCCCGATTCTTGCAAAAAACTGACTCCCATTTGCAAATCACTATTAT +TCGCTTGGTCAACTACCTTATTAATTGAGCTTTTCACAAGACCAGCAGTACTATCAACTT +TCAATTCAACTGGACTTGTTAAAGTCAAACTTAGTGAGATAAAGATTAATCCAATTACAA +CTAATCTCCAAATCCATTTTATTAATTTCATTTTTTCTCCTAACGAAAATTTTAATAGTC +TTTTATAATCGCTAAAATACTACATAAAGTTTTTTTTAGTGCTATATAATAAAAGTGTAA +CAAATATTAAAGGAGTTCAGCATGATTGATGAAAAACTAAAAGAATTTTTCAATAAATTT +CCTCATATTAAGGAAAATAAGCCTTTACAAAATACTCTGAGCAAGAAAATCAACAACCTA +ATAAAAAAAGAAATCCAACATGGTGCAAGTCAAGTTGAAGCGGAACAGAAAGCACTTTTA +AATTTAACAGATCTAGACACTCTATTATCACAACTAAAAAATTTAGAAGAAACTGACTAC +AGTAAATATAATGATTTCTTTGCTAAAATCTTTGATTCAAAATTAGTTAATGAATTAAAA +CTTAAATTAAATCAAATTGATGAAATTCACCTTAATTACCGCTTGGGTGATATTTTAGTA +TTACCCACTAATTCATCAAACTTAATTGTGCATGATTTTATGTCTAGAGATATTGAAAAT +CTTCATTCAACGGTCGAAAAAATTGGTAACGTCTTAAAAATTACTCAAGGTCCACGTAAG +CTGGTAGGAATTTTCAAAAATAAAACTTTATTATTTTTACCTAAAAATTTTACTGGCTTC +CTAACAATCAGAAGTCAAAGTGGTGATATCTATATTAATAAAGTTCCAAATTATTGTATG +CTGGATGTTACTGCAGTTTCTGGCAATTTATTATTGGCTCATTCTAAACTTAAACGTGTT +CAAGCAGACTTAAAGTCAGGCGACATTGCTGTTTCTAATACATCTGCAAATGTATTTCAC +ATTAATGCTCATTCTGGAATTCTAACTAGCGATCATGTTTGTGCAAAAGAAGAGATTTCT +TATCATACAAGTAGTGGAAATATAGACTTAGAAAGTATTTCTACTAAAAACTTTTTTATC +GATACTAAGAGTGGAAAAATTACTATTAATGACCTTAATAGCGAAAGAATGGATTTAGTT +ACTGATACAGGAAATATGAGCTTAAATAAGGTTAACGGTAGCGGTGATATTAAGTCTGGA +ATTGGGAAAATATCCTTATCAATTGCTGATAAATCTAATTTTAATTTTTCAATCACAAGT +AAAGTTGGAAGTATCCGCGTAAATGTACCAAAGAAAGAATTATTTAAATTCAAAATCAAT +ACTAAAAAAATCGGCCCTACTGATCTTCCACTTGACTCCATTATTTATGGTAATAATGAA +TATGATGAAATTGAAGGATATGTTGGTAATGAAAATTCAAGTAACTCACTAACGATCACA +AGTGAAATTGGAAAAGTATCAATTAAGAACGAAAATTAAACAACTTCAAGCGAAAATGCT +AGCAAACCTAAAATTTGCTAGCATTTTTTCTAAATTATTAAGCCTGCTTTATATCCTGCA +AAAACTAAAATTAAGCCGCCACCATAGGATAATAAAGCATAAAGAAGGAATGTTTTGTAA +TTATGATCCTGCCAATGAGAAAGCAACTCAACATTTAATGTGGAAAAGGTCGTATAACCT +CCCAGTACCCCGGTTCCAACTAAAGCATAAACAAATGGCGAAAAATTGCGCGAAAAAACT +AGCCCTAACAAAAATGCCCCAGTAAGATTTATTAATAAAGTAGCATAAGGAAAGTTGCTA +GACCAATGTTTTTTACCATAATTAGTAATTCCGTACCTAAGAATTGCTCCCCAAATTGCT +CCAAAACCAGCTGTTACAAGTGTCGTTATCATGCTCTTATTGCCAACTTTCTACCCAATA +TTTTTCCTGTAATCATTCCTAAATAAGCAAATAAAAATCCAATAAAGATTGAACTAAAGA +AATATATCAGCGCCTGATTATTCATTCCGCTTTCTAATTGCTTGAATGTATCCAAATGGA +AACTAGAAAAGGTTGTAAATGCTCCTACAAAACCTGTACTTAATCCTGTAACTAGCCAAT +CTCGTCCTTCCCTAAATTCAATAAAGAAATATGTTAGAAAGGCTAATAAAAAGCAACCAA +TAATATTAGCTACAAAAGTTCCAGCCTGTGACCAAATCACATTTAAATAAGCACGTAATG +CTCCTCCACAAAATGCAAATAAACCAACACTGATATAATTTTTCAACTTTTTATTCATTA +AAAATCAACCTTCTTATTGCATACGCAATTCCATCTTCATTATTTGTTCGAGTAACATAA +TTAGCTCGCTTCTTTATCTCTAATACCGCGTTTCCCATAGCTACTTTATAAAAATCTGGA +ACGTCAAACATGGAAATATCATTCTTTTGATCACCAAAAATCATTATTTCATCTTGTCTA +AGATTAAGTTTCTGCGCCAATTCTTTTAAAGCGTTTCCTTTTGAAGCTTGCTTAGAATTA +ATTTCGATTAATGTTTTATCAGAGGTAGAAACGTTATATTTTTCAATAACTTCTTTGGGC +AGGTGCCGATAAACTCTTTCCATTTCTGATGGAGCACCTGAAAACATGGCTTTTGTAAAT +TTTAAATCTTTAGCTATATCTTTAAATCCACAAACCTCAATCGACATTCTAGTCAGCCAG +CTTTCACGCGATAAATAATAATTAATAAAGTGATCTAATGTAATAAATTTATCACTCACT +TCAATATGAAAGTTAGCCTTCAATTGTTGAGCTATTTTTTCAAAAAAAAGGAAATCTTGA +TAACTTAATAATTCTTCAACTACTGCATTTCCTTGTGCATTCAAAACTAGAGCACCATTA +AAAGCCACAACATATTGATTAGGATCTTGCAGATCTAACTGATCTAAATACTCTTTAATT +CCTGGAAATGGTCGCCCCGAACAAGGAACAATTTTAATTCCTAATCGAGATGCTTTTTGT +AAAGCTTCCCTTGTTTTAACACTTATTTCTTGTTTATCATTCAATAAAGTTCCATCAAGG +TCAACGGCAATTAATTTTGTCTTCATCAATTTTCTCCTATGTAATCGTTCACTTTCCTTT +CTATTATACAAAAAAAGACGACACTACTCCCTTATTACACATAAGAAAGTAGGCGTCGTC +AGATCTTAATCGGTTAAATTGGTGAACGCCATCACCTATTATAATTTAATAAAAACAGTT +TACTTTGAGCACATCTCTTCTTCAACGACCTTAGCAACTTCTTCACAATACCCATCAGCT +AATTCTTGTGTTGGTGCTTCAGTCATAACACGGAGTAAGCTTTGAGTTCCAGATGGACGA +ACAAAAATCCGTCCTTCATCGCTCAACTCTTCCTCTACCTTTTTAATAGCTTCGGTAATT +CGCTTATGTTCTTTCCAGTCTTTCTTATCTTTGACAGGAACATTGATTAAGCGTTGTGGA +TATTCTTTAAAGTCGCTTAATAATTCACTTAATGACTTACCAGTATCCTTCATTACGTAT +AATAAATGCAATCCGGTAAGCATTCCATCACCAGTATTATGATAGTCACTGATAATAACG +TGACCAGATTGTTCACCGCCTAGATTATAACCATTAGCTCTCATTTCTTCTGAAACATAA +CGGTCACCAACTTGAGTTCTGACATTCTTAAGGCCGCGTCTTTCTAAAGCCTTAGTAAAG +CCTAAGTTACTCATTACAGTTGTAACAATCGTATCTTTCTTTAAACGGCCATGATCGGCA +AGATAGGAACCAATTACATACATAATATGGTCGCCATCAACTTCATTACCGTTTTCATCA +ACAGCGATACAACGATCAGCATCCCCATCGAATGCTAAGCCCAGTTGAGCACCTTGCTTT +ACAACTTCTTCTTGTAACTTTTTAGTATGAGTTGCACCTACATGATCATTAATATTTAAG +CCATTGGGATGAGTTGCGATTGTAGTAAAATCAACGCCCATATCAGCAAATAATCTTGAA +ATCAAAGCACTAGCTGCACCATTTGCACCATCTACAACAACTTTAATTCCACCTAATTCT +TCAGGTAAAGTATTTTCAATAAATTGCAAGTACTTTGAAGCACCTTCGTGGTAATTAGTT +ACAGTACCTAAGCCTTCAGCTGAAGGACGTGGAAGCTTATCTTCTGGTGCATCAATTAAT +TTTTCAATTTCTTCTTCTTTAGCATCAGATAGTTTCAAACCATCACTACCAAAGAACTTA +ATTCCATTATCTTCAACTGGATTATGAGATGCTGAAATTTGAACCCCAGCATCTGCACCT +TGAGCACGAACTAAGTAAGAAAGGCCCGGTGTTGTAATCACACCAACTTCTAAAACTTCA +ATTCCGACGGAAAGTAAACCAGAAATCAATGCATACTCTAACATTTGTCCAGAAATACGA +GTATCGCGTGAAACTAATACTTTCGCTCTTTCTCCATCTTTTTTATCTTTTGTAAGAACA +TATCCTCCATCACGACCTAATTTAAAAGCCATTTCTGGAGTTAAGCCAGCATTAGCAACC +CCCCGTACACCATCTGTTCCAAAATATTTAAGCATAATTTAACAACCTTTCCTTGTTTTA +TTCTTTTTTAGCAGTAGTATCAGTTACCTTAATATGAACTGGTATTACTGATGGTGAAGC +CTTAACTACGCCCTTCGGCAATTTAATTGTTACATCTTTTGTAACATCTCGATTGATACC +ACTTAAATCAACTTTTAAGGGAAGAGATGTTATCTTCTTTAGCGTATTCTCATCACCATA +TATCTCAACTGTCTCTACTTTTGCGGTTAATGAGTAAACATGAGTTGATGATTCATTTTT +GGACGTCACGTTTATTTTAACCTTCTTTTTCGACAAGTTAATTGGGATAGTAATGTGCGC +TGTTGCTGGATCAATAGCAACATTGAGTTGATGTCCCTCTTTATCAAGTGCTACCAGCAT +TTCTTCACGCTCAAATGTGCTGTCTATTCCCTTTGGTAAATTTGCGTGAGCTACAACACG +ATCAACTTGGTTAACCTCGCTCTTAGCGCCAGTAATGTTCACAACTTCTGGATCACTCTT +TGCTGTGCCAAGATTATACCCATGAGATACAGCACTCTTATTATACTCTATTTGCACAGG +AAGGGTACGAGATTTTCTCTTTTGAATGTTTACACGTACTTTACTTGGACTCACACTATA +AGTTAATTGAGTACTTAAACCATTTACCTTAATTGGCACCGTATGTTCTCCAGTTTTTAA +ATGGCTTAAATCAATATAAACACGGAAATTTTGAGTATTAATAGTAGAAGTAACCAGCGC +ATTTGATCCTTCTAAAGTAAGATTTATTTTTTCAGGATAACCAGTTACATAATATTTATC +CGTATTAACAGAAACTTGTAGAGGAGCTTTAATTACTTGCGTCTTAGTTGCAGTTTTCAA +AGTTTCTTCTCGACGTCCTTGAGTTACAAATCCCTGTTGATTAGACGAAACATAAATAGC +CAGTAAAATTGCCAATATCAATGCAACAATGCGATAGAACCAAGGTTTATCAAAAAACTT +TTTCATTTTTTATTTGACCCCCAATTCCAAACATGGTTTACAATTCGTTGATACCATTTC +GGCTTTTCTTCTTCTTCCTTTGGAACTAGTTGAGCATTTAAGTACTTCAAATACTCTTCT +CTAGTCAAATCAAGCATAAACTGGCTATTACGAGTAATTGTTACCCCACCAGTTTCTTCA +GATACAACAATTGTAATCGCATCAGTAACTTCTGAAATACCTACCGCAGCCCGGTGACGA +GTTCCAAGTTTTTTAGGAATCATACTATTATCTGAAAGTGGTAAATACGCAGCAGCAACT +GCAATCCGATTATTTCTAATAATTACTGCACCATCATGCAATGGAGTGTTAGGAATAAAG +ATGTTGATTAATAACTCGCCAGTAATATCTGCATCAATTGGAATACCTGTTTCAATATAA +TCCTCTAAACCAGTATTTTGCTGAATTGTAATTAAAGCACCGATTCTTCTTTTGGACATA +TATTGAATCGCCTTATCTAGTTCTCCAATCATTTTTTCAGCGGCCTGTTTTTCAGTCATA +GTTGTGCCACCAAAAATTGGCGATCTTCCTAAATGTTCAAGTCCACGTCTAATTTCTGGT +TGAAAAATAACGATTATTCCAATTACAGACCATGAAAGAATTTGATCAACGAAATACGTT +AGTGTATGTAAGTGCAATAAACCGGCTATAATTCTAACTATGATAATTAGTGATATTCCT +TTAACTAATTGAACAGCCTTGGTTCCTCTTACCAACATAATTAAGCGATAAATAATATAC +CAAACAATCAGAATATCAATAATATTCATTAAATTTTGCCAAGTAAATATTTGCTCGATG +TTAAATTTCACTACCCATCCCTCCTAAACCTTAATTATACCTAAACTACTGTCTTATCGA +CCATACATTTTAAATTCAAGAAATAGATCATTATATTCCTGAATTTTCTTAGGACCTAAA +TCCTGATAGACTTGTAAATTTTTCATTGCTTGTTTACTTGGATAAAATTGCTTATCGTTT +TTTATCTCTTTAGGTAATAATTTTTGAGCTTTGATATTTGGCGTTGCATAACCAATATAC +TCCGCATTTTGAGCAGCATTTTTTGGATCAAGCATGAAATTAATAAAAGCATATGCTCCT +GCTTTATTTTTGACAGTGCGAGGAATAACAAAATTATCAAACCACAAGTTTGAACCTTCT +GGTGGAACCACATAATGTAAATGCTTATTATCACTTAACATTGTCCGCGCTTCGCCAGAC +CATGTTACACCAACTGCTGCCTCATTCTGAATCATATACATTTTTAGCTCATCAGAAATA +ATCGCTTTTACATTGGGACCTAATCCATCTAATTTAGTTTTAGCTAATTTAAGATCTAAT +GAATTAGTAGTATTTAACGACTTTCCCATCGATGCTAATGAAAGTCCCATAATATCTCTA +GCAGAATCAACTAATAAAATATTATGACGATACTTTTTTGACCAAAGATCGTTCCAGTGC +TTTATTTGTCCCGGTTTTACAAACTTATCATTATATACAATTCCTAAAGTTCCCCAAAAA +TACGGGACTGAATAAGTATTCTTAGGATCAAATGAGTGATGCAAAAATTCACTCCCAATA +TTTTTATAATTTGGAATTTTTTTGGTATCAATTTTCTCAAGTAACTTAGCCTTTCGCATT +TTCGAAATCATGTAATCCGAAGGAACACAAATATCGTAAGCGGTGCCTCCTTGCTTTATC +TTCGTATACATAGCTTCATTAGAATCAAAAGTTTCATAAATTACATGATAGCCAGTTTGC +TTTTCGAATTTTTTTATCAATTTAGGATCAATATAATCACCCCAATTATAAATAATTAAA +TTCTTATTATTGGCGCTAACACCGCTATTATCTAACTGCTTAGCCCAAGCCTCTAATCCC +AAGCAAACTGCTAGAATAGCAAAAATTCCAATCAATAACTTTTTCATTGTGCACGCCCCT +TACCGATATGGGATTTATGATTAGTAATTACATAATAAATTAATACCAAAACCATTACGA +AGATAAACATTAAGGTACTTAAAGCATTAATTTCTAAATTAATACCCTGCCGTGCGCGTG +AATATATTTCTACAGATAATGTTGAAAAACCATTTCCAGTCACAAAAAAAGTAACTGCAA +AATCATCTAATGAATAAGTCAAAGCCATAAAAAAGCCAGCGAGAATTCCTGGTGTAATAG +CTGGAATCATTACTTTACTATAAACTTGCCAAGTAGAAGCACCTAAATCTCTAGCAGCAT +CGACCAAAGAGTAATCAAACTCCTTTAAACGCGGCAAAACCATTAAGACAACAATCGGAA +TCGAAAATGCAATATGACTTAATAATACTGAACCAAAGCCCAAACCAATTCCTAAAAAAG +TAAAAAAGATTAAAAAGCTGGCACCAATAATTACATCTGGTGAAACCATCAAAACGTTAT +TTAACGCTAATAATGTTTTTTGGCCTTTTTTATTTTTAGTCTGACTAATTGCAATTGCTC +CAAAAGTTCCAATTACTGTTGCAATCAAACTAGATAAGAGAGCTAATAAGATTGTTTCTA +AAAAAATGGCTAGTAGTCGATTATCGTTAAATAAGTCTTGATAATGACTCAAAGTAAATT +TTTCAAAATGATCCATATTATGACCACTCGAAAAAGAATAATATATTAAATAAAAAATAG +GTAAATACATCGAAACAAGGACAAAAGCAAAATAAATCCGAGACCATTTTATTTTTTTCA +TAGCTTGATCTCCTTTTTACGACGATGATCAGACGAAGTAAATAGCATAACAATTACCAT +CAATACTATTAATACAACTCCAATTGTCGATCCCATTGACCAATTCATCGTAGTCATAAA +ATATTCTTCAATTGCAGTACCAAGCGTAATTACTCTGTTTCCGCCAATCAATCTTGTCAA +CATGAATAAAGAAAGTGACGGAATAAATACTGCTTGAATTCCCGACTCAACTCCAGATTT +AGAAAGTGGCCATAAAACCTTAATAAACGTTTGCCACTTACTTGCTCCTAAATCATAAGC +AGCTTGAATTACAGCCGGATTAATATCACAAATTGCATTATAAATTGGCAAAATCATAAA +TGGAATTTCAATATAAGCTGCTACAAAAATAAAAGCAAAATCAGTAAATAAAATATTCGC +TGGAGCGATGCCAAAAAGCCTTAAAAAGTTATTTAGTAATCCATGTTTGCCGAAGATTCC +AATAAATGCATACGCTTTTAAAAGTAAATTAATCCAAGTAGGTAGAATAATCAAAAGCAG +CCAAAATTGCTGATTTTTCATTTGACTTAAAATATATGCAATTGGGTAGGAAATCAACAA +CGTAATTAAGGTAATTAAAAACGCATACCAAAAGGAATTTAGCGTCATTCTCAAAAAAGT +TCCATTGACAAAAAACTGCTGGAAATTTTTCAAGGTAAATCCATCATCACCTTTAAAGGC +ATTAATAGTAATTAAAATAATCGGAGCAATCACAAATAAACTAAGCCAAAGTACATAGGG +TACAAGAAAAAATAGCCTGCTCTTTTTCATATCTTACTCGTCTCCTTCATATGCCTCTAA +GCGTTTATCAAATTCTGCTTCGCTTTCACCAAAACGCATCACATGAATATCTTCAGGATC +AAAATATACTCCTACTTCTTTTCCAATATTAGTTGGATTAGTTGAGTGAATCAGCCATTC +ATTTTCATCACTATCAATGGCTTTAATTTCAAAATGATCACCCAAAAAGAGCTGACTTTC +AACCATAACTCTTAATTTTCCATGCTCAATATCAGTAATATCTAGATCTTCTGGTCTTAA +AACAACTTCTACTTTTTCTCCTGGTTTAATTCCAGCGTCAGCACATTCAAAACGATGATT +ACCAAATTCTACTTCATAATCTTTAATCATTCTTCCACTCAGAATATTTGAATCACCTAT +AAAGCGAGCTACGAAATCATTAACTGGCTCATCGTAAATATCAACTGGACTTCCGCTTTG +TTGGATTTTTCCTTCATTTAAAACAAAAATCTCATCACTCATAGCTAAAGCTTCTTCTTG +ATCATGAGTAACAAAAATAAATGTAATCCCCAGTTTCTTTTGAATTTCACGTAATTCAAA +TTGCATGTCTTTTCTTAAACGCTTATCTAAAGCCGATAAAGACTCGTCTAAAAGTAGAAC +TTTTGGCTGATTAACAATTGCTCTTGCAATTGCAACACGTTGCTGTTGTCCACCACTTAA +TTCAGAAATTTCGCGATTTGCAAAGCCATCTAACTGAACCATATGCAGAGCTTCTTTAAC +GGCCGATTTTATTTCTTGCTTATCCTTTTTCTTTATTTGCAAGCCAAAAGCTACATTTTC +AAAAACATTCATATGCGGAAATAAAGCATAATTCTGAAAAACCGTATTAATTTTTCTCTT +GGCTGCATCTAAGTTAGTAATATCTTTTCCATCAAAAAATACTTGACCACTTGTTGGTTC +ACTAAATCCAGCAATTATTCTTAAAATAGTGGTCTTTCCTGACCCTGATGGTCCTAGCAA +TGAATAGAATTTTCCTGATTCAATCGTCAAATTAATATCTTTTAGAGCCACAAAACCATC +ATCATACTCTTTACGTACATGCGTTAATTTAATAATATCGCTCAACTTTTTCACCATCTC +ATCAAAAAAGCTACAAGCCGTTACTTGTAGCTTTGTCGTAGTATCTAATCTTTTTCTTTT +CCTATAATTCTTACTTCAGTCTGCAAATCAACATCAAAATCTTTTTTGATTGTTTTTTGA +ATTAAATGAATTAAATCTAAATAATCAGTAGCCGTAGCACCACCAACATTAACAATAAAG +CCAGCATGCTTCTTTGAATCTTCCGCTCCACCAATTCTTTTTCCTTGAAGACCAGCTTTA +ATTATCATTGGGCCTACAAAATGACCTGTTGGACGTTTAAAGACACTTCCACAAGAAGGA +TATTCAAGTGGTTGCTTTGCTCTACGTAAGCCATTGAAGTATTCCATTTTAGCCTTAATT +GCCCACTTATCTCCTGGTTCAAGTCCAAAAGTAGCACTAATTACAATATCTCCAGTTTCC +TGAACTAGTGAATGACGATAGCCAAATTCCATTTCATCGTGAGTATAAGTTTTAAATTTG +CCTTCACGAGTTAAGACCCGAACAGATTTAATGACAAATTCCGTCTCGCCGCCATAAGCA +CCAGCATTCATGAAGACGGCTCCGCCGACGCTGCCCGGGATTCCTGCTGCAAACTCTAAG +CCACTTAAACTAGCTTCACAGGCTGCTTCAGACGTATCGATAATTCTCGCTCCAGCATCC +GCAGTAACTGTTGCTTCTTCTTGATTTGCAACAATTTTATCCATTTTAGTAAGAATTAAA +ACTAGACCCGCAATTCCTCCATCTCTAATAATTAAGTTAGAAGCATTTCCAATAACAGTT +AAAGGTAAGTTATTAGTTTTAACTGTTTCCACTAAGATTTTTAATTCTTCTAAATTTTTA +GGAAAAGCAAGATATTGCGCAGGCCCACCGGTCTTAGTAAACGTAAAGCGGCTCAATGGA +ATATTTTCTTGAATATCAATTCCCTGTTTTTTCAAATCCATCAGTTGCATTTTCAATCTC +TCTTTTCCTTAAATAATCGTCCCTTTAATCATACCGCAATAAATAGCTAGTTTTCTACCT +TAAGTGTTATACTAAAAAGAAAAATAATAATTTATTTAGAGGATAAAAATGAACTTTACT +GCAATGGATTTTGAAACGGCAAACAGTCATCCTGAAAGTGCATGTTCTCTTGCCTTAGTT +ATGGTTAGAAACAATGAAATTGTCGACCGTTTTTATACAGTTATTAATCCGCAAATGCCT +TTTGATAGTCGAAATATCAGAGTTCATGGTATTACTGCCGAAGACGTTAAAAATGCACCA +ACAATGGCTGAGGTTTGGCCTAAAATTAAAAAATTATATCAGCCGGGAATGTTAGTAGCT +GCTCATAATGCACGTTTTGACTGCCGGGTGATGGAAAAGTCACTTGCTCGCTATAATATT +CCAGCCCCTCACTACTTTGCAATTGATACTTTAGCAACTAGCAAAGCATTTGAACCCAAT +CTTCCTAATCACAAATTAGACACAGTTTCAGAAGCTTTAGATATCAACTTATGGCATCAC +CATAACGCTTTAAGTGACAGTGAAGCTTGCGCAGGAATTTTAATTGAAGAAAATAAACGT +GTTGGGGATGATCCAATCAAAAAAATGGTTAAGCAAATTTAAGCTTAACCATTTTTTATT +ATTCAAAAGATTTTTTTATATCTGATAACCAGACTTGGTTTCTCTCACCAACAGTTTTAA +ATTCAACTATTCGTTTGGTTGAATCCCACGAATCATCAACACGCTTAATCGTTAATTCAA +GATAATCATTTGGCAGATCATCAATAATAAATTGTGGCCATTCAATTACCACTAATCCAT +CTTCTGCTAGATATCCAGGCATATCAATACTTGATAAATCACCATCTTCTAGTCGATACA +TATCCATATGAAAAAGTGGCATTTTACCTTCACGATACTCTCTAACAATTGTAAATGTTG +GACTTTTTACAGGACGTCTAATACCTAATGACCGAGCAATTCCTTTAGTCAAAGTGGTCT +TTCCAGCACCTAAATCGCCAGATAAGAGGAGCAGATCATGCCCCTGGCTATTTTTTCCAA +TTGCCTGGCCTAATTTTTGCATTTGTTCATCAGAATTTATTTCTAATGATTCCATTTTAC +TTCTCCATATTTGCTTGAGCAGCAGTTAAAATAGCTAACAAGTATACATCTTCTGTTTTA +CATCCACGTGATAAGTCATTAATTGGAGCAGCTAATCCCTGCAATACAGGTCCAATTGCA +CTAAAGCCACCTAAACGCTCAGCAACTTTATATGAAATATTACCCGATTGAAGTTCTGGA +AAAATGAAAACATTAGCATGTCCAGCAACTGCAGAACCTGGAGCTTTTGCTTCTCCTACT +CGTGGAACAACAGCAGCATCAAACTGTAATTCACCATCGCAAACTAAATCTGGATACTTT +TGATGTACTAAATCTGTTGCATCTTGAACTTTAGTTACCATCGGTCCCTTTGCTGATCCC +TTAGTTGAAAAACTAAGCATTGCAATTTTAGGATCAAGCCCAACCATTTCAGCTGTTTTA +CTGGATTGGTAAGCAATTTCAGCTAATGTCTCACTATCAGGATCAATATTAATTGCACAA +TCCGCAAATATATATTTTTCGTCTTTGCGTTCCATAATCATAGCGCCAGAAACACGGCTC +ATTCCTTTTTTAGTTTTAATAATCTGCAAAGCTGGGCGAACCGTATTAGCTGTTGAATGA +GTAGCACCGGAAACCATTCCGTCAGCCTTTCCTTCATACACTAACATTGTGCCAAAATAT +GAAACATCTTGTAGAATTTCCTTTGCTTCTGCCAAAGTATTCTTTCCTTTTCTTAATTCA +ACAAAATCTTGGCACATTTTTTCAAAATCTGAATAGACAGCTGGATTAATTATCTCAATT +GAACCTAGATCAACATTTAATTTCTCAGCAGATTTTATTACTTCATCAGGTTTTCCTAAC +AAAAGTGGTTCAACAATTCCCTCTTCTGCGAGACGAACAGCAGCTTTAATTATTCGCTCA +TCATTTCCTTCAGGAAAAACAATCACTTTTTTTGATTCTTCTGCTTTTTTCTTAAGTGAG +TCAAATACTTTCATAACAAATTAAGCCTTTCTTTTTATCTCTAGCTAATATTTTACAATT +TCAATGCTTTTTCTCAAAACAAAAACTATGCCATATCTGATTGACTTACTTCCTGTGGCA +ATTGCCAATCAATTGGACTCTCACCAAATTTAGTCAAGGCATCATTACATCTAGAAAACG +GACGAGAGCCAAAGAAACCACGGTCTGCTGAAAAAGGACTAGGGTGTGCAGACTTGATAA +TTACGTTCTTTTCTTCATCAATTAAAGGAATTTTATTTTGGGCAAATCTTCCCCACAAAA +TAAATACAACTTTTCCGCGATCACTAAGAGCTTTAATAGCTGCATCAGTGACTATTTCCC +AGCCTTTTCCCTGATGACCATTAGCATTTCCATAAGGCACTGTTAATACTGCATTAAGCA +ATAAAACTCCTTGATCCGCCCACTTTTTCAAATAGCCATGATTTACTGGGATGGCACCAA +CATCATCATATAATTCTTTATAGATATTTTTTAACGATGGTGGCAAAGCAACCCCAGGAT +TAACGCTGAAACTCATCCCTGTGGCTTGGCCAGGGTTATGGTAAGGATCTTGACCTAAGA +TCACAACCTTCGTATCTGCAAAAGAAGTTAACTTAAAAGCAGTAAAAATGTGATACATAT +CTGGAAAAATCTGCTTTGTTCGATATTCACTTTTTAGAAAATCATGTAATTTTTGATACT +GTTCACTCTCAAAAGCGGGTGCCAAAATTTCATCCCAATCATTTCCAATTAATTTTTTCA +TATTTATTCTATCCTCTTGTATCTCTAAGAAATCCAAGTTCATGATATCATATAGATAAA +AGATCGGAGGACTTAAAATGATTAAACTTATTGTAAGCGATATGGATGGCACACTATTAA +ATAAACAAATGCAGATTTCTTCTGAAAATATTTCTGCTATTAAAGAAGCACAAGCTAAAG +GAATCGAATTTTTAGTAGCAACTGGACGAGCACCTTCTGAATCTCAAGGAATACTGGCTA +AAGCTGGACTTCACACTGGCTTCATTAACTTAAATGGCGCAATGGTTTTTAATACAGAAG +GTAAACTAATTGTAAACGAGCCAATTCCTAAAGAAGAGAG +>NODE_18_length_37282_cov_67.8523_ID_35 +ACAATCACAGTGCCTGTAGTCTTGCCTGGCTTAGTTACATCTGGAGTCTTCTCCCAAGTG +TACTTAGTGCCGCTTGGCATATCATCCTTGTTCTTAATGCCCTTTTCTGCTGGTGGTACT +ACACCTGGGGTAGTGTGAACATCTTGACCTTCTGGGGTGTACTTGTCAGCATCAGTTGGA +GTTGTAGTCTTAGTGTAAGCAACTACCACATTGGTATTTGGTGTGTTAGCATTTACATTC +TTATCAGCTAAAACAACTTTTGCATCTTTGCCATCAACGGTTGAAGTATAGTCAGTTACT +TGTGGCACATTGTATTCTGGCCAAGTGCCTTCAGTTGCTCCAGTTGACTTCCAAGTATTG +CCATCTTTTACGCCAACAGTCCATGGAGTATCAGCTTCTACATTTTCACCTTCACCGCCA +CGAACAAAGTGAACAGTTTGAACAATCGTTTCAGTTGTTCCATCAACCTTATTAACAGTA +ATCGTACGAACAACGTCTTTGTTCATGTCCTTATTATTTGGATCATATGGTGTTGGAGTT +ACTGTATTACTCTTTTGGTAAGTAATATGAACTGCTTCACCATCTATTCAATTGCGGAAG +CTGATTGAATTTCAGTTGCTTTAGTGCCATTTACGTATGAATCGTAACTAGAAATTTGTT +CTACTGCTGCAGTTGGATATTCTGCTTCGCCGCCTAAAACGAACTTGTTGTTTTCGATCT +TACCAACTTCCCAAGCACCAGTTCCAGTAACTTTTCCAGTTACTAAGTCTTCAACACCGT +TTCTACCAAAGTTAACAGTTTGAGTTGCAATTAATTGCTCTGCTTTGCCTGGCTTAGTTT +GGTAAATCTTACGGCTAACGCTAGTAAACATGTCCTTGTACTTGTACTTAGGATCAGTTG +GATTTACACCTGGAGTTGTTGGAGTAATGTCCTTTGTTTCGTGAACTAAGTTAATAACAA +CTGCTTCATTCTTAGCAGGAATAACTACATTTTCTGGTAATGTTTGACCTGCAGCTAGCT +TGTAACCTGCTGGAATGGTTAAGTTAGTCTTTACATTAACGCCTGGCTTACCACTTACAA +CAATTGGGTTACCAACATTCTTCTTGTCAACAGTGTCATAGAATTGATAACTTACAGTTA +CATCTGCAACTGGAGTTTCACTTTGTTGGTAAGTAATGTGAACTGCTTCACCATCTACTG +GCTTGCCGTTTTCAACTGCGGAAGCTGACTGAATTTCAGTTGCTTTAGTTCCATTTACAT +ATGAATCGTAACCAGAAATTTGTTCTACTGCTGCAGTTGGATATTCTGCTTCGCCGCCTA +AAACGAACTTGTTGTTTTCGATCTTACCAACTTCCCAAGCACCAGTTCCAGTAACTTTTC +CAGTTACTAAGTCTTCAACACCGTTTCTACCAAAGTTAACAGTTTGAGTTGCAATTAATT +GCTCTGCTTTGCCTGGTTTAGTTTGGTAAATCTTACGGCTAACACTTGTAAACATGTCCT +TGTACTTAGGATCAGTTGGATCTACGCCTGGAGTTGTTGGAGTGATGTCCTTTGTTCCGT +GAACTAAAGGAACAATGATTGTATTATCTGTTGTTTGATCAACTGGATAAACGAAACTTG +GATCAACTTTCCAATTGGTTGGAACTTCACTATCAATATCAGCCTTCAAAGTAGAAGCCT +TTAAGCCTTGATTCTTATTGAAGTTAAAGGTTAACTTGTCTTTGCCATTCTTAGTTTGCA +TTGGAGTATTAGTTTCAGCATCAATGTACTTAACAATTACAGTTGCATTGTAGTCTGAAT +CATCATGTTGCTGTTTACTTTGAGTTACTGGAACAACTAAAACTGCAGGTTGACTAGTAA +TATTGCTTTCAGCCGCTGGATAACTATAGCCATCTACAATACTGTAGTTAGCTGGTAAAT +TACTGTTAATAATATTCTTTAGTTGCTCAGCACTTAAACTTGAACCCTTGGCAATATTAC +CAATAGTGTAAGCTGCAACTTCTTTGCCATCATATTGGTAAGAAACTTGTTGGTTAAAGC +TAGTCTTTTGATCATCTGGCTTACCTGAATTTCCATCTTTTTCAACGTTGATATTAGCCT +TAACGTCAAGATAAGTACCATCGTTAAACATAATTCTTACAGTACCTGGAATATAGGCTA +AAACTTCTGGTGCATTAGAATTAGTATTTTGACCATTCCAACCTTCTCTACCAAGGCTAT +TAACGTTAATTAAAGTACGATATTGTTATTGAGTTAAATCTGAGATATTACTATTTTGCT +TAAAGTTCAATGTTGAACCTGTAGTAGCACCGTAGAAGTTGAAGTAATTATTTAGCCAAT +TGTTATATGCATTAACACCTAAACCTAATTTAGAAACAACATCAGCATCGTTAATTTTGA +AGTTATAACGCCACTTAGAATTTCCTGGTAAACTTTCGCCTGTAGTAGTCTGCTCTGCTG +GATTTACTTGACCGTTAGCTAATTGATACAAAGTATCAGCTGAACCATTAGCAAAATTAC +TTACATTAGTGTTTGGTGCAGAAACGCCTGGTAAATTAGTAATCCAAGAATAAGTAAATG +CACTAGCTGCAAATTTATCGTTCGCATCTTTTTCGTTGACATAGTTCTTACCATCTGTAC +TTAGCTTATAAGTAGTTATCTTCTTGTAATCACTTGCTGAATTACCAGTACCATTCCAGT +CACTGTAGAATTTAATAGTACTAATAGCACTATCATTAACCGATGGAGTGAAATCAACAC +TAGTCTTATGAACACTGGCTACTGGAGCAGTAAAACTAGTCATCGTTTGATCGCCATAAT +AAGTATTATGCTCATGATCCATACCAGTTACACTAGCTGGAACATTAACAATCTTGCTTG +AGCCATCTTTGAATGTAACAACAACTGGTACATTAAAGGAACCAAGTTGAGCATTTGAAT +CTACTGTTTCAGTTACTTTACCTGTTGTTTGGTCAACATCCACGGTAACACCAGTTGGAG +CTGTGAAAGTTCCAATCCTATAGTTAGAAACTTCATCGTTTGGCATGTCTGCTGGAATTG +TTGGGTTAACAGAACCAGTATTTTGTCCTGAATCTGGACGTTCAACATTCAAATGTGGAT +AAGTAATGTGATATTTTTCAGAATCTTTTTCTTCTGTAGTTACATTAATAGAACTTGCTG +GAATATTAATATCTAGATAAGTATTTTGTCCTTGAGCATCTTTATCATTGAAAGTAATTC +TAATTACTCCGCCTTGATCTTGAACAGGCTTAGTTTGCCATGCGGTGGACTGAATTTCAC +TATCTGGAATATTGTTTTGAACTAAAGCCTTAAATTGTGAGCTAGTTAAATCTGAACCAG +CCAAAATATCAATTGGCTCAGCTAAAGTCTTAGCACCAGCACCTTTAGCATCAATATCAA +ATGGGTTAGTCTTGTCAGTCTTTCCATCAATAGTGATATCAATTACGTTATTAGCAATAT +TCTTACCGGCAGCTGTAGCTATTTCTACAACAGTATTAGGTGTATTTTCCCAATCTGCAG +TGTAATTTGTAATAGTTGTTGGCGTAGTAGCAATTTGGTGATTTTGGTCAATACTGTAAG +ATTGAACAGATGTAATCATGCCACTGGCAGAAATAACTTGTGAATTGTCACTTGTTTCAT +GTGCGTTAGCATTAGCACGACTAATTGTTACTACAGTTGCATTATCATTATTCCAAATAA +CAGTTTGAGTACCTTTAGTTACAACAACTGGTACATTAGCATTTTGCTTGCTGCCATCTG +CGTATGTAATAGTAACAGGAATGTTGTAAGCGCCCGGTGTAGTGCTAGTTGTAGGAGTTA +AAGTAACTCTACCTGTTGAAGGATCAATTTTGCTCCAAGTGGGGGTTGCTTCAGTTCCTG +CAAAATGAGTACCGTCTGGAGTAGTAATTACTTCGCCAATCTTATTAGTAAAGGTTGGAT +CAGTTGTAGCTGTCTTTCCTTGTTCAACACTAACTGGCTTATATGAAGGAGTATATTGGT +TATTTGTTTGTAATTCAGAGCCAAATTTTACACCAGAGGCATTCCACCAACTAAAGTGAT +TAATAAAGCTATTCAAAGCATTGGAAGCTGCAGTTTCAGTTCCTTCAGGCACAACAGAAC +CATCAGAGAAACTGTTCGAACCCGTCTCAGAAGGATTTGCAGTTAAAATTACTTCGTTAA +CACCTTTAGCTGGAGTATCAGCATTAACAGAATCAATCATATATTCTGAGCCTGGTTGAT +TCCAAGCACCATATTGCTTGTGGGAACGAAAGGCATAGGCAAAGTCCCCACCTTGACTAG +TATTGTTTAAGTACTTGATGTTCCAAGTTACTGGATCACTAGCACCTGCTGGCACCATGG +TTAATGGAGTATTATAGTTGCTATCAGCTGAATTAATCACAATGTGAGTTTGTTGGTTAG +CGACATCACCAGCACCATCAGTCTTAATTAAGTAGCCTGGCTTGTTTGCTGCAGTACGTT +GTAAGTTAACTAACTTAGCATTAGTGAAGTTAATGTAGTTTTGACTACTTGAACCCCACA +TAGTTAAAATAGCAGGCCAGCCATTTCTATATGAGCCTTTAGTTAAAGTATAGGTATCTG +GTAAGTAACTTGAATATGAGGAATCTTCCAAATCTAAGCTACCACCATTCACATCAAGAT +GATAAGTTGCACCAAGTCCGTCCTTTGGTCCCATTGCAAGCAATGGTGAATCAGAATCAG +TCACTTTAGTTCTAATAATTGAAAGTAAAGCATTTTGACCAACTCTCACCGTGCTATCAG +CAGTCTTGCCATCATAATCTAAGCCAATAAGAGCAGCTGATCTCCAACCAGTGGTATTCA +TCTTGCTAGTTGTAACTTTTAATGAAGAATTCTCTTGTAAATCAAGATCGCTAGCATTCA +TAATTCCCATTGAGGAACCAGAGCCCAACTCCATCGTTACTTTAGCATTAGGCATTAAGC +GAACAACACCTGAGGTTGCATTATCAATAGTTTGATTTCCTGCACCGGCACCATTAGGAA +TAGTAATTCCAGCCATGTTAGAAGTATTAGCACTGTTGAAATCAACTGTAGTACCATTAC +TTACTACAACATTGCCTGAAACGTTAATATTGCTCAAGTTATTTGAAGTACTGTTATTAG +CAGCTAAAGTCGTTGATCCGTTAAGGAAGTTAACATTATTAGCTTGAATCAAGGCATTCC +CAGTTGTATTTGAACTAGTTAAATTGCCTTGCAAATTATTCTTACCCTCGAAGTTAACAT +TAGTACCTTTATTATTAAGGAGTTGACCTGAGTCTTGACTCGTAGTAACGCCGTTAAAGG +TAATTGCATCATTATTAGCAGCAGAATTCTTAAACCAGAATGGGCCATAACCACTAACAG +TTTGCAAACTTAGGTCTTTTAAGACTACGTTCCAGCTATGACTAGCAGTGCTTGCTCCTT +GATCATAGTGCGTATTGTCACCTAAGCTAATGTAGCGATTGCCCATGTCTAAACCATATT +GATTTTGACCATCAATGGTAACTTGACGCGCACTACCATAATTATTAATTGTTTGGTAAG +TATTATTACTCAAGTTAGTATTACTAAAGTCAATATTTTGATTTAATTTAATTGTATTTA +CGTTAGCATCTTGTAATGCATTGATGAATTGAGAATAATCACTAACTGATTGTTCTTGAT +TATTTGATGTGTTTTGAGCTTGAGTAATTGAGTTTGTCTGAACAGTAGACTTTACTTGCT +TAGTTGCAGCTGTTTTTGCTACAGCGTTAGTGTTAGTAGTTTGCTTTACAGACTCATTTT +TATTAGTTTGAACCTTATTATCAGAAACTACCTCTTTCTTTTGTACATTAGTCGTTTCTG +CAGTTCTATTAGTGTCTAAAGTAATGTTTTGATTATTAGTTTGATTTTTAACTAGGCCTG +GGGCTTCTTTGGAAGCTTTTGCAGTATTAGTAACATTTACTTGATTTGGATTTAAGGTAT +CAGCTTTTACGCTTTTACTACCTGCACCAAGACCGAAAATAGTAAAGCCAATTAAAACTG +AAGCAGCTCCCACTGAAAATTTTCTAATTGAGAAACGCTCTTTTTTATTTTCCATCTTCC +GAATTTTTTCTTGATAATTATTCTTAGATAACGTTATTGAATCTCCTCCGTCAATTCTAC +AGACCTTTCAGTGTTAACTATAGTTCTTCTTTTTATAATATTCAAGGATTTAATAACATA +TATTGTTTAATGCAGAACTATAAATTACGCTCTAAATATCAATAGCAAAAATTAACAGAA +AATCAATATGCTATTCAGTTATTTTATGCTTAAAATCCTTGTTATAGCATAGTGTTTATA +CGTATAAGCATTTTATTATTGAATAGACAATTCAAAATAAAATATTGTATTTTCATTTGC +AAAATATTAATTAATATTAGCCAGGGTATAAACGCCTGTTATTTTAAAACAAAAAAATAA +AAATAATTAAAAAAGGAGCCGTATACTACACGACTCCCTCTTTAAATCTCTTTATCTACT +AATACTATCAGCACGGCCTTTATAATCACCGTTTTCAGTTCCGATAATTAATTCTTCGCC +TTCTTTAATAAAATCTGGAACAGTTACAACTAAACCAGTATCCATCGTTGCAGGTTTACC +ACCGCCAGCAGCAGTTGCACCCTTGATTTCTGGTTGAGTTTCTTTAACAGTCATCTTTAC +AGTAGATGGTAAGTTAATTCCGATTAATTTACCTTCATCAGTAAACTTAAGATCAACATC +AATATTAGGCATCAAGAACTTAGCTTCTGCTGCAAGGTGTTCCTTAGGAATTAGATATTG +TTCATAAGTATCAGTATCCATAAAGATAAAGTTAGCGTCATCATCATAAAGGTATTGTGC +CTTCTTTAAAGCAACGTTAACTAACTCCACCTTCTCACTTGGACGCATTGTTTTGTGAAC +AACAGCTCCACTCATCACGTCACGTAAGTCCATTTGCATAACCGTGTTACCTTTACCAGG +CTTGTGGTGGTTACTCTTTAGTACTTCAATTAACTTACCATCTTGGCTAAAAATCATACC +CTTTTTTAATTCGATTGCTTGCATGTTTTCTCCCTAAAATTTTCTAATATACATATCGCT +GATCTCATGATTACCATACTTTTTAATAATCATATCAGCTCTCTTCTTAGTTGGTTCGAT +ATAGTCATGTAAATTCTTCAAGTTAACATCACGCCAGACTCTTTTGGCAAAATTATCTGC +TTGTTCAAGCGGGACGTTAGCCCACTTATAAAAGAAATTATCTGGATTATTGCGATTAAT +CTCGAGCATTAAATGGTAGCGATCAAGATACCATTTCTCTAAGTCTTCTTCGCTTGCATC +CAGATAAATTACATAATCAAAAAAGTCACTTGGGGGAGCTTGCCCTTCTGGAGGCACCTC +CAGCAGGTTAATTCCCTCAACTACCAAGATATCAGGAACGCTGACATACCCCAATTCATT +AGGCACCAGATCAGAAATTTCCTGCGAGTATAGTCTATATGGTACTCTTTCTTTACCAGC +TTTAACGCTTGCTAAAAAAGTATAAAAGGCATCCCAGTTAAATGACGATGGGAAGCCCTT +TTGATCCATTAAGTTTTTAGCTTTAAGTTCTGCATTCGACATTAAAAAACCATCAGCAGA +AACTTGGGCAATTGTTTTATCTGGTTCAAGGCGTTCGAACAAGCGAGTTATTTTCTTTGC +AAAAGTAGATTTCCCTACAGCAACAGACCCAGTTACTCCGATGATAAAAGGTATTTTTTG +CCATTTTTCGTTATAAAAGGTCTGCTTAGTGCGGTAGATTGCATTTTTTTGGCTCATATT +GTACTGAAGATTTTTCATCATGAAAATCTCCGCACTAAATTCATCGCTTGGAGGGATTAG +TGTCTGCCACTTTTCAGATGTCAGATGTGTAAATTGCTCTTGCATAAAAAAGCCGCCTTC +CTTCTGCCTTGTTCCTAGTATTTTTACATTGTTCAGCCTATCATATATATTGAGAGAAAG +AAAAGAAGATATCTATGAAAAAATTAATTCTTTTTGGTGATTCCCTACTTGCAGGCTATA +TTGATGGACGCGCAACTAATATTGTAACTCAAGGACTGCAAGAAAAATTACCTAAATTTA +CAATTATTAATAATTCTGTACCTGGGACAACTACTGAAGAGGCAATTGATTTTTATGAGT +TACGAATCAAACCTTTTAAGTATGATCTAGTTATTCTGGCCCTAGGCACTAACGACGCCA +ACATGCAGTTTGGTTTGAGCGCTGGACGATATGCGCATAACTTACAAGTATTGGTCGATT +TAATTGGGGCTGATAAAACATTATTAATGGGACCTTCCTATACTAATTGGAAAATTGCTC +AAGATCAAGCTTGGCCTAAAACTTTGCAATTTGAATTAGTTGCTCAAGAATGCCATGTTG +AAAATAAAATTCCATTCTTAAATTTTGCCAAAGTAATGCGGCAGACTGGTCACCCCAATA +AACTATTGCAAAAAGATGGTATTCACCTAAATAAAGCAGGAAATAAGTTATTGATTGAAA +GATTAGCAGACCTAGTTGAAGAAAAAGAATTGGTTACTGCATCGTAAAAAAGCATCCCAT +TGTTGGGATGCTCTTTTGTACTCCGGATATTTTATTCAGTTACTTCTTGCTTTCTTAATT +CATTTTGTTCAGCAACTCTAAAGAATGGTAAATAAATAAAAATACCGATAATGATTAAAA +TCACTTCAAAAATAATATTTCTCCAATCCATTGAACTCATATATGCCTGGAAGAAAAACG +GTGTAAATGAAGGATCTACAATATACCCCATTCCAATCCAGTGTAAACTTTCAGCAATAT +AAGTAAGAATAATATTAATAATTGGAGCAATCAAAAATGGAATACCTAAAATAGGATTAA +AACAAATTGGTGTACCAAAAATTACTGGTTCATTAATACTACAAATTCCAGGAATAATTG +ATAGCTTACCTAATGCACGATATTTTTCTACTTTGCTTCTCATCATTAGAATTACTAAAG +CAAGTGAATTACCAGATCCACCTAAAACAGCAATTCTAAACATCTGTAAATTCATTAAAT +GAGTAGGTGCTAATCCCTTTGCTACTAAAGCAGCATTAGCACCGGTAGCTGCAATACCAG +TTGCAAAGACAATTGGAAAAATTACATTACTTCCGTTAACACCAATTAACCATAATAGAT +TTCCTAAAGTTACGATCAAAATATAACCCCATAAGCTATTAGCCAAAAATGTAGCTGGAG +TCAAACACTTCATAATGAAGGTAGTAAAATCAGCATGCGTTAAGTTGACTAAAATTAAAT +TAGCACCATAGAAAATAAAGATATTTGCTAATAGCGGTACTAAGGAATTAATAAAGTTGG +CAACCATTGGGGGAATCGATGCTGGCAACTTTATCTTTAACTTACTCTTTTCAATCCCGC +GGTCAATTTCGACAGCTAATAATCCAATGATAATTGCAACAAATAGTCCATTTGTTCCTA +AATTATTGAGACTAATCTTATTATTCTTATCAACAGTAGTTGCAACTATTAAAAAGGTAA +CTAATGAGATCATCCCATCAGTAGCAGGATGAAGCCTACGATATGACTTACTTAATTCAT +ATGCAATACCAAAAGCACAAATCATACCAAAGATTCCCATAGTCATATTGTATGGGATAG +TTATTTGAGCATAATGTGCGACAGCCCAATATTTCCAACCAGCAAGCCATTGCATAAAAA +TATTAGCTGTTCGTGGATTATATCTATCAAGTACAATTGGTGGGTTAACAACAATTAAGA +AGAATGAACCAATTACCAAAAATGGCAATCCGAAAAGCATACCATTTGATATAGCTTGTA +GATGCTTTTCATTTCCAAGTTTTTGTCCAATAGGCGTTAAAACTTTATCCAATTTAGTAA +TCAGTGAATCTTTTTTAGCAGCAGTTTCAGCCATTAGAGATCACCTATTTTCCTTACTAA +AGATAAATACAACTTAAATTTGATATGGATTGAACTTGAAAGCATTAGGTATAACTTTAC +TGTCATCCCAATTTTCTCTAATTTTGTGGTATTCGTCCCAATAGTCATTGTCCACTTTTA +ATTGTGGAGTAACCTTAGAATCGACTTCTAGAAAATGCATACTTCCTCTTCCCATCTTTT +TGCCACGAGTGGTGTGCTTGTCTAATGCATAATCTGGAATTTCAGGAACATAACCTAGAG +CAAAGTTCTTAATAATGATATTCTTAAGTAAATCAGAAGACCGATCCTTAGTTGATTCAC +ATAAGTAACGAATTGCATGAACAAACATCATGGCACGATCCGGCTCATTGTAACTAAAGT +TTTGTCTCATTTGATTTAGACTATTAATCAAAATTGGAGCTTGTGGATTTCCCATTCCTA +TATCTTCAACTGAAATAGCTTGAAGTCTACGCCATAGTTTTTCTTCCATTTGCGGAGAAG +AAATATACATTTCATAAGCAAATTCACATGCTGCTCGTTCTTTGCCTTTTCTGATTGATT +TTTGCAATGCAGAGATAACTTCATCAGCTGCTAATCCATTTCTAGTCCTAGTTCTGGCCC +ATGGATCAGCTAAAAATTCCTTATCTCTATTTTGTGGTACTTCATTTTTCTTAATATCTT +TTTCTGACATCGTGAGTCCTTCTTTCAATATATAGGAGAAAACAATGAATATTCAAACTA +TTAGCGAAAAGTTTCAACTGAATACTGATGAACAAAAAATATTAATTTATATGAATCAAC +ACCGTAATGAAATTAAAAATATCAATATTCGTGAACTCGCTAAACGAACATTTACATCTC +CTAGCTTTATCGTTAAGACCTGTAAAAAGATGAAGCTTTCAGGATATTCAGAATTGGTCT +TTCTAATTGCAGATGCGCCAAATTTTCCTAATAATACTGAAAACGACTTAAAAGTAGAGT +CATACGTAAAGCCTTTTTCCAACTTGATGGATAAGCATAAAGACTCAATGATCATGATAT +TGGGCAGTGGATACTCTCAAAATATTGCTAATTACATGAGTGAATATCTTAATTTGAATG +GCTTTCGTTGTACATCCAATTCGCATCTTGAGATGCTTAGAAAGCATAAGAATACTTTAA +TAATTATCATCAGCAATTCTGGAGAAACAAAGCGATTAGCTGAACTTTGCATACAAGCTC +AGAAAAATAATCGTGATGTTATTTCCTTTACAGGAGATAAGAACTCAACAATTGCAAAAC +ACTCAACGCTTGCAATAAGCTCAGATACTTTTAACCCCACTTCTTTTGACAGCCATTACC +CTCAGCTGTTTTTTGGTCTAACACTCATCTATTTTGAATTATTAATGAGTAACTTTTTGT +CTAATTAGGTACCTTCTTAAGACAATTCTTATACTACTTTGGTTTTTAACAATTTTGAAT +ACTTTTCCTTGTTTTGAAAACATGTTTCCATTTTAAAAAACATTTTTATTCAGGCATCAA +AAAAGGCATCCCTAACGGGATGCTTTTTTTACACACACTACTCTACGTACAAGGCTTGCT +TACCTTGCTGATTATAGCTAGCCTCTATTTTCTTTAAACTTACTTTTTTATCTTTAACGC +CATAAGGATCATTTACGTAAACAATCCCCTTCTTTTTATTATATCCAGTAACCACACAAG +CATGAGAAGAAGGTGTCACATTAACCTTTCCCTGCTTAGTCTGCCATGTCCGCATATCAT +TTACCCGGTCATATTTTAAAGTCGTAATAATCATTACTGGGTGTCCTTTAGAAACTAACT +TCAACACCCTAACAAAATCATTGCCAGTATAATTACGAATTCTCTTAGTATACTTCTTAG +CAACATCATAGAGCGGCTCATTATAAACACACCACCCCGCGTTTTCGATACTCATATAAC +CAACAAAACCGACATGAGGATTTCCGCGATAATCATTGCCAATAAAAGAGGAAACATGCT +TAATGTTAGAAGACAATTCTAATTTACTTACATTAATATCATAATAATTTAGGAGCATTG +ACAGTGACGTAACTTCACAGCCATTTGGCAGGTCCGGTAATTGCCTTTCTAAGGGGACAT +TCAATTTTTGCTCAGGTCTTAAAGTAAGCCAGTCATATTCATTTTGAATTTTTTCAAAAT +TAAGGATAATCCCAACTACTCCAGCTGTAATCAGTATTAAAATTCCATATAAAATCTTTC +GTCGACGTCGATATCTCTTTGCTCTGCTTATTCTTTTCATGCTGAATATTCTATATCAGA +AATGTCAAAACGCAAATTAAGTTGGCAATGTAAGAATTAAATAGATTTCATTTCTTTTGT +GCTTTTAGCATAAGAATTAAATTAACTTTATTTATACTCGAAATATGGAACAAGAATTAA +GTTAATTTAATTTATATGCTAAAATTAAATTAAGAATTAAATTAGTTAAAATCTTGTATC +AACTATTATTAAAATTAGAGGTTAACAATGGTACCACGTGATTCTTACTTAAATATTTTA +GATAAACTACGTAATAAACAAATCATTAAAGTCCTAACTGGAGTACGTCGCTGCGGTAAA +TCTACTATTCTTCAGTTATATCAAGAGCGATTGCTCAAGTCTGGAGTAAATACTAATCAA +ATTCAAACGATAAATTTTGAAGATTTAGACTTAGTATCAATTAAGACCTACCTAGACCTA +TATAACTACATCAATGAACATCTCATTCCTAACAAAATGAATTACATCTTTATTGATGAA +ATCCAAAGCATTCCTAATTTTGAAAAAGCACTCGATAGTCTTTATATAAAAAATAATGTA +GATTTATATGTCACAGGATCTAATGCCTTTATGCTTTCTGGCGAATTAGCTACTCTTCTT +TCGGGACGCTACATAGAAATCCCTATCTATCCTTTTTCTTTTAAAGAATTTTTACAAACA +ACTGAACTTTCAAAAGAAGAAGCCTTTTCAACTTACTTAGAACGAGGTGGATTTCCATTT +GCTACTGAATTAAACGACAATAATACTTATCTTTCTTACATTCAAGGAATTATCAATACA +GTATTAATAAAAGATATCCTAACGAGAGTGAATCGCGGTAATGCAACTTTACTTGAAGCA +ATTGCTAGTTTCTTAACTGAAGCCAACGGCAGTCTCGTTACTCCTGCTAAAATTGCCAAT +ACTCTAACCTCTAATGGAATCAAGACTAGCAACGCCACAGTAATTTCATACCTCGAAAAA +TTAGTAAATTCCTATTTATTCTACCAATGCAATCGATATGATATTGCAGGCAAAAAATAT +CTACAAATTAATAGCAAGTATTACCCCGTTGACCCCGCCCTCAGACGTGCTCTTTTAGGT +CAGAAACGTCCAAACATGGGCAGCAGGCTTGAAAATATCGTGTTTATGGAATTAAAGAGA +CGCGGTTATGAAGTATATGTCGGTAGTCTTAAAAATAAAGAAATTGATTTCGTTGCAATC +AAAGACGGTGTTAAGCAATATTATCAAGTTAGTCTCACAGTCCAAGACGATAAGACTTAT +AATAGGGAAATTGCGCCCTTTCTTGAAATTACTGATAACTATCGGAAAATTCTTCTCACT +CAAGACCCCGGTAGCTACAACGACAATGGTATTGAACAAATAAACGTTATCGACTGGCTA +CTAAAAGAGCAATAAATTTAAATAAACCCACAAAAAATGGAATGCCAAATTTGGCACTCC +ATTTTTTTATTTATTCACATAAGTCTTCTCGTCTAAATCATCAAACAGTGGGCTCTTCTC +GCCTGTATAAATCAAGTCAAGTTTATACATTGCACGTGAAGTAATTGTGTAAAGCAATTG +CGTTTCATCAAGTTGGTGATAATTATCCCTTGAAACATTCCAAGCAATTACTGCATCAAA +TTCAAGGCCTTTAGCTAAATATGATGGCATAACTAAAGTTCCAGGCACTAAACGCTGGTT +AGCTGACCCAATTAAAGTTGCCTTAATTTTTCTTTCTTTCAAAACCTTAGCCACTTCTTT +TGCTTCAGCCAAAGTCTTAGTAATAATGGCAGTAGTTAACTTTTGCTCATCATTTTCTAC +TAAGATATCTTCTAAAGCATTATATTCTTTTTCAAGACTGTCACGCTTGTAAAAAGCAGG +CTTAGGACCACGTCTATCAAAGGCCTCAATCTTTTCACCTTGCCGCAGAATTTGCTTAGT +AAAATTAGTTAATTCCTTAGTTGAACGATAAGACTTGGTTAATTGAACAACATCAGTCTT +TTCAGGATCAAACAACTTGGAAATCTGACCTAAAAGAGTCTTACTTTCATCTTTAGTAAA +AATAGCCTGGTTCAAATCTCCGAGCATCGTAAATTTAGCTCTTGGGAAGTTGTATTTAAG +GTAAGCTAACTGGAATGGAGTGTAATCTTGAATTTCATCAATAAAGGCATAACGCATTTC +ATAGTCAGTTCTACGCCCTGTAACTAAATCGTACAAGTACAAATATGGTGAAATATCAGC +CATCTTGATCTTGCCGTTTCTGAACCTATCTTTAACATTTTCAATATGTGCTTCCCACTC +TTCTTGGCTAATCTCCCACTTGCTTAAATCAATCAACTTAGGAACAGCACGCAAGAAGCG +CAAGTATTGCGCACGAATATTCAAGAAACGGTTCTGATGAATCTTACGACTAACTTGCTT +TAATTGCTTAATTACAATTTTACGAGCTAAGAAGTGCTCTTCTTTAGCACTTGATTCAAA +TTCTTGGTCTGGACGATCATATAACTTATTCAATTGCTCTTGATCTAACGATTCAATCGT +CTTATCAACCCAAGCCTTCTTAGTTTCTGGCTCAATTCTTCTATTTAAGCTATTAATTAA +AGCTTCTTTGGTAGCTTCAATTCTGTTTCTTAAATGGTAATTTTCATTAAAACCATAGTA +AATTTCCTTGATCTTTTCTTTATCAAAAAATGGTTTCTTCTTATTCTTGAAATAAATATT +TTTAAAAATTAAACCATTCTTTTCAAGGTGCTTAGCATAGCGAGTTACCGCATTAAAGAA +TTGTAAGGAATCTTTTAAGTTAACGATCTTGTCATTTTGCTCATTATCTTCGAATTGTTC +AAATAAGTTTTCAACATTCATCCCTGGAACACGACGAGAAACAAATTGCCAGTAAGTCAT +CTGCACCATATTTTGTTCACCCATTTCAGGTAGAACATTTTTAACATAATCGTTAAACAA +TTGATTAGGGCTAAACATAATAACATCGCTAGAAGTTAAATTCCCGTGATAACGGTAAAG +TAGGTACGCCACACGTTGCATAATAGCTGAGGTCTTACCTGATCCTGCTGCCCCTTGAAC +AAATAAAAGATCTGCACTAGTATTTCTAATAATCTTATTTTGCTCACGCTGAATCGTTGT +TACGATCGACTTCATCTGTGTAGAAGATTTTTCGTTTAAGACATTTAAGAGCATTTGATC +CCCAATTGATTCATTGGTATCAAACATATTGGTAATCTTACCATCTTCAATTAAGAATTG +CCGTTTCTTAGTCATATCAACGGTTTGCACACCATCGGGCGCATTATAAGAAACTTTACC +TAACTTACCATCATAATAAATAGAAGATATCGGTGCACGCCAATCATAGATTAAAAAATG +ATCTTCTTTATCTGCAAAAGACCCTAAGCCAATATAAATCGTTTCAGGATCTTCATTAGG +TTCTTGGAAATCAACTCTGGCAAAGTACGGTCTCTTTTCTAGACGCTGCAAAGTTGATAA +TTGCTTAGCAGAATGTTGCCAGGCATTTTGACGTTCTGCTGACATTTGTTGCTGCTGATG +AATTGACAAGGCAGTATCCATTGAAGTTGAATAGCCATCATAATCTAATTTCACATCATC +AAAAAAGTGCGAATTAATATTTTGCGCTTCATTTTGCGCATGTTCAATTGAATGATCTAA +TTCGCTTTCTTTCTTTTTAATTAAGCCTAGAACTTTATCTAAATGTTTTTGTTCTTTTTG +TTGTTCAGTTTCTTTTACCAAAATTAAGTCACCTATTTCTAGTATTAGTAAAAACTTTGC +ATACTATTTTATCATATGTATACGCTTTCTAAAAGGATTTACAACTTTGAGAAAGTCTCA +ATTATTATTTAGTCTTTTCTTAATTAATTAGTTTTATAATTAAATAAAATTACCTAGCGA +GATGAAAAAGTGAAACGATTCTTTAAAAGTATTCTAGTTCTAGCTCTTTTATTTATTGGA +CTTAACTTTGCTTATCAAAAGACTGCGCCAGAAATTGAAAAAACTTTTGGAACTAGAAAC +CCACTCCCATATTTAACAGCCAAAGTACAACAATTTATTTCACCTGAAAAAATTCAAAAT +GATGATACAAATGCCGACAGTTCCAAAGGACATACTTTTGAAACTAACTCTGCATCTGTT +TATCTTGACCTCTCTGATCCCACCCTCAGGCAAGCCGCAATTGATGGAATCAATATTTGG +AACAATACAGGGGCCTTTAATTTCAAAATTACGAATAACAAAAATAATGCCAAAATCATC +ATTAAAGCAATGAATGATGGACAGACTAACGCGGCTGGTTTAACTGATACGCAATATAAT +TCCCTAACAGGGCACTTAATTAAAGCAACTGTCCGTTTAAATTCATATTACCTTTTAAAA +TCCAAGTTATGGCTACAACCACGGTCGAATCGTCAACACAGTTGAACACGAATTAGGCCA +TGCAATCGGGTTAGGCCATAAGAACGGCATTTCCGTCATGTATCCACAAGGATCTTTCTA +TACCATTCAGCCAAGCGATGTTGAAGATGTTAAAAAATTATATCAAGAACAATAAGAGCG +ATTCTATTTTTAAGAACCGCTCTTATTTTGTATTTTTCTTTTTTAATTGGTGTTGCTTTT +TAAAATAAGGAATCCAGAAAGTTATGACATAACCTACTGCAATACCTACAAAAGTCCATA +CCATAATATTTTTAACAAATAAACCGATAAACAAACCAATTAAGCCAAAAATCCCAATAA +TAAATGATTGATCTAGTTGCTTCATACTTATTTCTCCATTTCTATCTTAACTATAACAAA +ATTATCTTATTATAAGCATATTTTTAGACTTACTTTGCCTTGCATTTTATACTATATATC +ATAAATGTTTTTAGATATATTTGCTTATATGCAAACAAATATCAACACTATACAAATAAA +GAAATGTATTTTAATAAAAGAAAGGAATGGTATATCCTTTATGGATAATCAGAAACAACC +TCCCTCGTATTCTCAAACAACAAACGAGTTATTTTCAGATTTAGAAACAAGTTCTACTGG +CTTAAGTGAGCCAGAAGCTCAGCGGCGGTTAAAGAAATATGGACCTAACGCTCTAGCTGA +AAAGCCACCAAAATCAACCTTAATGATGCTCAAAGAGCAAATCATTGATCCGATGATTTT +GATCTTATTAGGAGCAGCTGCTTTTTCTGCAATTTTAAATGAATGGGTAGAAGCCAGCGT +AATTTTCTTTATTGTTGTCGTTAACTCAATTATTGGAATTATTCAGGAGAAAAAAGCTCA +GTCTTCTCTAGCGGCTCTCAAAACGATGAGCGCGCCAACTGCTACAGTTATGCGTGGCGG +TATTGAAAAGATCATCCCTGCTAGCGAATTAGTTATCGGGGATTTAGTTATTCTAGCTAG +TGGCGACATGGTTCCAGCTGATTTGCGTTTAATTCAATCTGCTAACTTAAAAATTGCAGA +AGCCTCTTTAACCGGTGAATCAATTGCTAGTGAAAAGAATGCGAAAGCTGTTCTTTCAGT +AGATTGTCCTCTAGGCGACAGAAAGAATATGGCTTATACTTCTTCAATCGTTACTTACGG +ACGAGGAAGCGGGATTGTAACTAAAACTGGAATGGACACCGAAATTGGCCAAATAGCCGG +TATGCTTGAAAATGATGATGCTGGTGATACTCCATTAAAGCGAAAACTTAATACAGTAGG +TAAAGTCTTAACAATCATTGGCTTAATTATCTGCGTTTTAATCTTTGCAATTGGTGCCTT +TTACGGCCGCCCACTTTTACCCCAATTTTTAGTAGCAATTTCTTTAGCAATTTCAATTAT +TCCAGAAGGTCTACCAGCAACTGCAACCATTATTATGGCGCTAGGCGTGCAACGAATGGC +AAAACAACATGCCTTAATTAAGAAGTTGCCTGCTGTTGAAACTCTAGGAAACGCGACGGT +TATTTGTTCTGATAAGACAGGAACATTAACGTTAAATAAAATGACTGTAACTCATTTAGC +TAATGGTGATGATTTCCTTAATAAAAAAGTTCTAAGTGTAGAAAAAGCAAGTAAGGATTC +TAATTCATATAAGCAATTAATCTATGCATCTAGTCTTTGTAACGATGCTAGTTTTAATTT +AGAAAATCCTAAAGAAGTAATTGGAGACCCGACTGAAGGTGCACTTCTTCCTCTAGCACA +AGATTTAGGTTACTCGGCTCTTAACTTAAGAAAAGAGTATCCAAGACTTAGCGAATATCC +TTTCGATTCAATTAGAAAGAGAATGACTACGGTTCATGAAATAAACAATGAATACGTCGC +TTACACTAAAGGTGCGCTCGATGAGTTACTGCCACTTTGTGACTACATCTACACAAATAA +TGGTATGCGTAAATTAACTAAGTCAGATAAAGATAATATTCTTACCTTATCGCACAAGAT +GTCTGATCAAGCATTAAGAGTCTTAGGCTTTGCCAGCAAAAATATGCTAAACCTGCCTCA +AGAAGGAGAAAATATTGAACAGCATTTAGTTTTCTTAGGTACAGTCGGTATGATTGATCC +TGCCAGAGATGAGGTTAAGGCATCAATTAAGATGGCTCGCGAAGCTGGAATTAAGACCAT +TATGATCACGGGTGACCACAAAAATACAGCAGTCGCTATTGCTAAAAATTTAGGTATTTA +TACTAATGGAAACACTGTTATTTCAGGTACTGAATTAAACGAAATGACAGATAATGAACT +AGATCAAGCAGTTAAATCCGCCACTGTTTTTGCCCGCGTTTCGCCTAATGATAAATTAAG +AATCATTCAAAGCCTAAAACGAAATAATGAAGTAGTAGCCATGACAGGAGATGGGGTCAA +TGACTCACCCACCCTAAAGGCTGCTGATATCGGTGTTGCTATGGGTATTGGTGGTACTGA +TGTTGCTAAAGATGTTTCAGATATGATTTTACTTAATGATAGCTTTACAACAATTACGGC +AGCAATTAGAGAAGGTCGAAAAGTATATCGCAATATTCAGAAAGTAATTCAATTTTTACT +AGTTGGTAATATTGCCGAAATTACTACTTTATTTGTAGCTACAATCTTCAACTGGGATGC +ACCGCTACTTGCTGTGCATATTCTCTGGGTTAACTTAGCTACAGCTAGTTTGCCTGCTTT +AGCTTTAGGAGTAGACCCAGCAAGTAAAAATATCATGAAACATAAGCCAGTAAAGACTGG +AACTTTGTTTGAAAAAGACCTTGTTTGGCGTGTTATCAGTCAAGGAATTTTCGTCGCCTT +AATGACCTTAATAGCTTACTGGATTGGAGAGTCGTTTGATAATCCAATTGCTGGTCAAAC +AATGGCATTTTGCGTTTTAGCCTTATCCCAAATGCTTCGCGCATTTAACCAACATTCTAA +TACTGATCCAATCTGGGTTAGAGGCAATAAAATAAATGTTTGGTTGATTATCTCCTTTAT +TGTTTCAGCGGTCCTAATGGGAATCATTCTTTTCACTCCTAACTTACAAACTCTCTTCCA +TTTGACTAGTCTTACTTCAAGACAATGGTTAGTAGTAATTATTCTTTCTCTCTTCTCTAT +CCTGCAAGTTGAGATAAGCAAGTGGATTAAGAAGCTAATTAAAGCTAGACAAAAAGAAAA +CAAGCTGCAAACAACCAGTGACTAACATAATATAATAATTTAATTATTATGAGCATATTT +TTCGACTTATGATGATTTAATTTATATAATATAAGTATAAATTTAACACTTACATCTGGC +ATAAAGACTTAAACAACCTTTATGCCTTCCACTTTTTGGTAATCAAAATAATCGGTATCG +TAGATACCAAAACAGCCAAATTGTAGTGACCCCCGAACTTCGGACACGGATTCATCCTTA +CACAAACTATTTTACGGTCTCAAAGAAAGAGCTTTCAATGTCTATATACAATATTTTATA +TACTGTTTTAGCATAATATATATTTATGGAATATTATTTTTTTAAGATTAATTGTTGACT +AAAAGTAAGTTATAGGATATTATAACTTGTGAAAAGTAAGAAAGGAGCTAATGATTATGT +TTAATGTACGTCAAGCAAATTCTTGTTGTTGTAATTGTCAAAAAATTTGGCAATTATTTG +GCGTACAGTCATTAGTTCACTTCAGGAATTTCTCTAGTTAGAAACTAGCTTTTCAAAGTT +ATAACTATGATTTTTCTCAAGAGCTGTTGGCACTAGGCCAATGGCTCTTTTTTGTTTGCT +TAGAATCGGGGTTGAATAAATTGCAAACTACTCTCATTGTTTTAGTAGCCGTCGCTATTC +TAGCTGGCATGGCTTACTTACATAAAAAGAATTGGGGATTTACCAAGCTAGTATTTCTCT +CATTAGTCGTTGGAATCGTCTTTGGAGTTTCTATTCAATTAATGTTTGGTGCCAAAAACG +AGATTGTAAAAAATAGTATTGATTGGATTTCCATTGTTGGAGATGGGTATGTCTCCTTAT +TACAAATGTTGGTTATCCCACTTCATTAGTTGGTGCCTTTACGCAATTAAAAATGACTAC +TAAAATCCGAAAAATTGCTACAAGTGTCTTAGCTATCTTGCTAGGAACTACTGCAGTTGC +TTCATTCCTGGGCTTTAGCAGTGTTGCAATTTTTAATTTAGGCGGAGCTGGCTTTGCTAA +GGGAATGACTGCTTCTTCAACTGCTCTTAGTGCAATTAAGGACCATCAAGAACAATTGAA +AGGCCTAACTTTACCGCAACAAATTACTTCGTTTTTCCCGCAAAATATTTTTGCCGACTT +TGCAGGAATGCGTTCAACTAGTACAATTGCAGTTGTAGTTTTCTCTATCTTTGTCGGAAT +TGCATTCTTACAAATTAAGAAAGAAAAAGCTGAAGTCGCTGCTACCTTTGCACGTGGTAT +TCAAGCATTACGTGCAATTATCATGCGGATAGTTAAAATTGTACTCGAACTCACTCCATA +CGGAATCTTTGCTCTAATTGCTAGAACTACTGCAACTAACAGTTTCGCAACAATGAGCAA +ATTATTGGTCTTCATCGTTGCCGCTTATGTTGCAATAATAGTTATGTTTATTGTTCACGC +TGTTTTACTTTTAATTAATGGAATTAATCCTATTACTTACTTTAAGAAGGCGTGGCCAGT +TTTAGTTTTTGACTTCACTTCCAGAACTAGTGGTGGTAGTTTGCCACTTAATGTTCGTAC +TCAACGTGAATCAATGGGCGTTAGCGACACAATTGCCGATTTCGCCGCTAGCTTTGGTTT +AACTATTGGTCAAAATGGATGTGCAGGTATTTACCCATCGATGGTTGCAGCAATTACTGC +TCCCCTTGTTGGAGTTAATATTTTCTCATGGCAATTTGTCTTAACCTTAGTTGTAATTGA +TGTTATTTCAAGTTTCGGCGTTGCCGGTGTTGGTGGCGGTGCAACATTTACTACTTTAAT +GGTACTTGGAGCACTTAACCTGCCAGTTACAGTACTTGGAGTTTTAATTGCTATTGACCC +AATCGTTGACATGGCAAGAACTGCTCTTAATGTTAATGATTCGATGGTCGCTGGTGTAAT +TACTGCTAAGCGTACTGGTGAATTAGATTGGAATATCTTTAATAATCAAAAAGATGATGT +AGATACTGAAATTGAATAATTTTTATAAATATTCTTGACATTATTATTTAGTCATTGTAG +TATTATGACAACAAAATTAATTAAAGCTTAGAAAAGATGAGTAAGCACTTAATGCATCTC +ACAGAGAGTCGGAACAGGTGTAAGCCGATGTTGCTAGAGTGAAGAAGATGGTCTTGGAGC +TAAATTAAAATGCGAGCTATTTAAAGTAAGCAATTTACGATTGACGTACGTTATCACGTC +TGAGTCTTCCATATTTTGGAGTACTTATTAAGGAACTGATTGTGAAATCGGTTTGAATAT +AGGGTGGTAACACGGTATTAAGCGTCCCTAGTGTAGTGTAATAGCTATACTAAGGACGCT +TTTTCTTTTGCCGATTTTTTTTTGTCAAAGGAGGGATCATTATGAAAAATAAGAATATTA +TTTTTAACGAACGGGTATTTAATCGAATGTGCTACTTAAATATGATCCTTCTTGTTAGTG +GATCCTCAAAAAACATTTAAGTGAAAAATTTAGTTAAAATTAAGAGGTAGATTACTATGA +CAGAATTAGCACATTTCGACATTGTTGGTAGTTTTTTAAGACCGGAAGAATTAAAACAGG +CTAGAGATAAATTTAATCACGGAGATATTTCGCAAGCTGAACTAATTCAAGTAGAAAACC +AAGATATTGAAAAGTTAATTCATACAGAAGAAAACTTAGGCTTAAAAGCTGTAACTGATG +GCGAATTTCGTCGCAGTTGGTGGCATCTTGAATTTTTATGGGGTTTAACTGGCGTTAAAA +AATATGATTATCACGAAAGCTACAAGTTCCATGGCGCTAAAACTAGAACAGATAATGCAG +AATTAGCTGGCAAAGTAGCTTATAATCCCGAGCATCCATTCTTTAAAGCGTTTGAGTTTG +TTAAAGAGCATACTAATGTGACTCCTAAACAAACTATCCCATCCCCTACACTGCTTTTTA +GAGATAACCGATCAGATAATTGGCCTAATTTTTATAATAATAAACGAGCTTATCTTGATG +ATCTGGCTAAAGCGTACCATGAAACAATCAAGCATTTTTACGACTTAGGTTGTTGCTATC +TTCAAATTGATGACACAACTTGGGCTTTTTTAATCAGTAAATTGAACGAATCCAAAAATG +ATCCCAAGGAACATGAAAAATATATTCAATTAGCGGAAGATTCAGTGTATGTAATTAATA +AGTCACTAGAAAATCTACCAGATGATCTGACTGTTGCTACACATATTTGTCGTGGTAATT +TTAAATCTACATTTCTTTTTTCAGGCGGATATGAACCAATTGCTAAGTATTTGGCGCAAT +TAAATTATAATCGCTTTTTCTTAGAATACGACAATGACCGTGCTGGAGATTTTGCTCCAA +TTAAAACAATCTGGAACAACCGGGATGACGTCACAATTGTTTTAGGATTAATTACATCAA +AAGATGGACAGTTAGAAAATCCTGCTGCAATTATTCAAAGAGTTAATGAAGCAGCTAAAT +TAGTTCCATTGTCTAACCTCGCGCTTAGTACGCAATGCGGCTTTGCCTCAACAGAAGAAG +GTAATATTCTCAGCGAAGCTGATCAATGGAAAAAGATAAAATTAGTCGTTGATACTGCTA +ACAAAATTTGGAAATAAAAGATATTTTTATAAGGAGGAAATTTATTATGGGTAAAGTTGA +AAGTTTTGAATTAGATCACACTAAAGTTAAGGCACCTTATGTTCGCCTAATTACTGTTGA +AGAAGGTAAAAAAGGTGACAAGATTAGTAATTTTGATCTTCGCTTAGTTCAGCCAAATGA +AAATGCCATCCCTACTGGAGGCTTACATACTATCGAGCACTTGTTAGCTGGATTATTGCG +TGACCGTATTGACGGCTACATTGATTGTTCACCATTTGGTTGCCGAACGGGCTTTCATCT +TTTGGTTTGGGGTACCCCATCAACTACAGATGTTGCTAAGTCTCTTAAGGAAGCACTTGA +AGAAATTCGCGATAATATTCAATGGGAGGATGTTCCTGGCACTACTATTGAATCTTGTGG +GAATTATCGCGACCATTCTCTATTTTCTGCTAAACAATGGTCAAGAGATATTTTAGAAAA +GGGTATTTCTGATGATCCATTTGAAAGAAACGTTGTAGAATAATTATTTTTGAAAAATAG +GACAAAAAAGATGCATATTACTGCTTCAAACAGTAAAAATGCATCTTTTTTCGTATTTAT +TTAATTTTTATCTTTTTCTTCTCTTCCTTTCTTTACCACCAATTAGTCCAATTATTGCTC +CGACAGAAGCTAATGCTAATCCTGCTAATGATAAATCTGTCTTTTGACTACCAGTTTGCG +GTAGTGTTGTTCTAGCACGATTAATTTGCTTAGCTGATGTGACATTTCTTGAAGTTCCAA +CTGCATAAGTTGTATGAGTTAAGCTTGTAGAAGTGTTTGAAGCTTCAACATTCTTGGACT +TGTTTAACTTATTAATTTGCGTAACAATCTTAGATGATTCTGTTTCATTCGATTGATTTG +GCTTGTTAGGTATTGTTTGTTCTTCCGGCTTTGTTGGTTCACTCGGTTGTGCTGGCTTAC +CTGGTGTTGTCGGTTCTTCCGGCTTCGTTAATTATTCTTTTTATATCTCTTCCACAAGAC +TATATTCCAAATTATCAACAAGCACACCCAAAAGATGGCTAAAATCATCGTATTCCGATA +ACCACTGCCTTTTTGAATCTTATTCCAAATTTCGCTAAAGTATGTTCCACTTAAAAATGC +ACTTAAAGTTATTATCGATAAAGTCAAAGACATCTTTTTAATTTTATCCTTAACCTTCAT +CAACTGAATCCAGTTATAGACAACTTCACAAATTACGATTAGTAAAACTAGTAAGTCATA +CCACCATTCACGTTGTTTAACAAATTCATCAATGGTAGTAATAAAATAGTAACCTGCAAG +GATTGCAAATATGTTTAAGATCAACAGCTGTCCCACAACTGTACTTTGTTCTTTTTCTTT +CATGATCTTCCGCACTTTCTACTTTTTCTCTTCATATAAATTATAAAACATTCGCTTTTT +TTCTTGCTTAACCTAAGGCTCTATTTTTTAAACTGCCGCCAATAATTAGCAAGGTATTCT +GTTGTTAAACTCACGGGATGGTTAATCAAATCTTGTGTTTGTCCTGCAGCAACGACTTTT +CCACCATTTTTTCCGCCACGTGGCCCTAAATCAAGAATATTATCTGCATTAACTATCATA +TTTAGATCGTGCGTAATCGTAATAATAGTTGCCCCTTGATCTAGCAATTTCTGCATTACT +TGAACTAAAACTTTCACATCAAGCGGATGTAATCCAATTGTAGGCTCATCAAATACAAAT +AAAGTATCGTCCTGTTTATGACTTAAATGTGTCACTAACTTCAAACGCTGTGCCTCTCCA +CCAGATAAAGTTGGTGTACTTTCACCTAAATGAAGATAATCTAGTCCAACTTCTTTTAGT +AAAAGTAAGTCACGCTCAATTTTAGGTTCTTTCTTAAAAACAGGAATTGCTTCATTAATA +TCTAAATTTAAAATATCAACAATTGAGTAGCCGTTCCACTTTACTTTTTGAACTTCTGGA +TTATAGCGATTACCCTCACAGGTCGGACAAATCTGCTGCATATCAGGTAAAAATTGAATA +TCTAAAGTTACAATTCCAGTTCCACCACAAGTTGGACAAGCTCCCTGTTTATTATTATAA +GAAAAGTAAGTTGGCGTATAATGCTTGTTTTTTGCAAGAGGCTGCCGAGCAAATAATTTA +CGTAAATTATCCATAATTGAAGTATATGTAGCAACAGTTGACCTGGTACTTTTACCAATT +GGTGAAGCATCAACGCTAACTACTTGATTAATTGGCGATTCAAAGTTCTTAACTTGTTTA +GGTAAATTTTCACCTTTTGCTTGTGCTTGAATTGCTGGAACTAAACTATCTAGAATTAAA +CTTGTCTTACCTGCTCCAGAAAATCCAGTCACAGCAGTTAATTGATTAACAGGAATATTC +GCATGAACATCTTGCAGATTAAAATAATGATCCACATTAAATGAAATTTTTACAGAGTTA +ACTTTTTCTGCTCGCTTTCTCGCCATTAATTCAGCAGTTCCATCAAGATAAGGTCTAATT +AAAGATTGCTTGTCTTTTTTTATTTGATCAACGCTACCTTGATCCAGAATTTCACCGCCT +TGTTCACCTGATCCTGGTCCAATCTCAATTATTTCATCAGCTGCTTTAATAATATCCACA +TTGTGATCCACAACTACTAGTGAATTTCCTTGTGCTACTAATTTATGCAAGACCTTAATT +AAGCCGTCAACATTAGCAGGATGTAGCCCAATTGAAGGTTCATCTAAGACATATAAGACG +CCAGTTGTTTCTGTTCTCAACGTGCGTGCTAACTGAATTCGCTGTAATTCACCTGTGGAT +AAGGTGTTACCATTTCGAGCCATTGTTAAATAATCTAAGCCTAAATCAAGTAGTGGTTGC +AAGTTCTCCACAAACTCTGTGAATAAGGCGCTGGCCATTTTATGCATCTCAGCTGGTAAA +CTCTTAAGCACTTGACCTTTCCAAGCAATCAAATCATCTAACGGCATTTCAGCTACTTCG +TTAATATTTAAACTACCTACTAATTGCTTCAATAATCCTGGCTTTAATCTTGAACCATGA +CACACAGGACAAGTCTGATATGAGAAAAATTCTGAAATCCGCTTTTGCGCACGTTCGCTC +TTACTAGTTTTGGCAGAACGTAAAACAGCTTGGTGAGCATTTTCATAAAGGGCATTAAAA +TCATGAAAAACTCTCCCCGTTCCTGATAAAAAGTCCATTTTATACTTCTTTTCAGGACCA +TTAAGAACAAAATCTTTTTCTTTATCAGTTAAATCCTTATAGGGAATGTCAATCCTAACC +CCCGCATGTTCTGCTACATTGGGCATAAAGTTTCTACCAGGTAGCGACCACGAAGCAACT +GCTCCCTCTTTAATAGAAAGACTAGGATCAGCAATAAACTTACTATCATCCAATTGACGT +ACTTTTCCTGTCCCGCCACATTCTTCACAAGCTCCATCAGAATTAAAGGCAAATTGCTCA +GCACTTGGCACATAAAACTTAACACCGCAAACTGGACAAGTAAGTTGACCCATTTCTTCA +CCTGACTTTGCCATTGCTTCTGCAATTTCTAGACTTGGCTTTAAACGGTGACCATTAGGA +CAAACAGGCGAGCCTAATCTAGAAAAGATCAACCGCAAAATATTAAAAGTTTCGCTCATT +GTTCCAACCGTTGCTCGTTCAGATGGAATAGTTGGACGCTGTCTTAAGGCTAAAGCTGAA +GGAATATGCTTGACGCTAGTAACATTTGCCTGATTTCCCTGTTTGATTCGTCTACGCATA +TAAGTAGACAAGGCATCTAAATATCTTCTGGAGCCTTCTTCATATAAGATACCCATGGCT +AAAGAAGATTTTCCTGAGCCTGACAAACCAGAGATTGCAACAAATTTATGAAGTGGGATA +TTAACATCAATATTTTTCAGATTATGAACACGTCCCCCACGCACTTCAATCTGAGTTGGT +AACTTTGCAGTCAAAGTCAGTTCCTCCTTACTTTTTATCAACTACTATCTTACAACAATT +ATTATAAAAAATAGCCATTCTATCGTTGCCTATACAAAAAGAAGACTCTGCTTTATAAAA +GTAGAGTCTTCTGAAGGAATATAACTATGAAAATTTAATTTAAGGAAATATTATTTAAGA +ATTGTGTAATTTGTGTTGATATATGAGGATTATGGAAAATTAAAATTTTAAGTTATTTTG +TAAAGCCAACATCGTACCATGAGTTTGCTGATGGCTTAAGTGACCAGCCAGTTACCTTGT +CGTTAACTGCAGTTACTGACCAGCTGTTTGTAGTAAGTACAACGTAGGCATTGTCATACA +TCCATTGTTGCCACTTATCAAATGCTTGTACACGGTACTTGTGGTTGAAGGCCTTAGTTG +AATCAATGTTAGTAAGAAGCTTAGATTGAGTTGGAGAAACAAAACGAGCCATGTTGTATG +GAGCACCTTCACTGTAAAGGTCCATTGGTGATGGTTCAGATGATAGGCTCCAACCACCTT +CAAAGACATCAATCTTAGGACTATCTGCTTGAACATCTTGTACCCAAGAGTTGAATTCCA +TTGGACGGCCACCAACAAATTTAGCATCTAAGCCAATCTTTTGCCATTGTTGGATGTAGT +TAGTCCAGATCTTTTCAGCGTTTGGTTGTGTATTACGAACAGCAACGTTAATAGTTAACT +TCTTACCATTTGGTTGACGACGGTATTTTTCACCCTTACGCTTCTTGTATCCAGCCTTGT +CTAACAATTCATTGGCTTTCTTCAAGTTGTATGGGTAGCCCTTGATACTCTTGTCGGAGA +AGTCTCCAAATTGTGCTGGAATTAAAGTATTAATACGGAAAGTCAAGCCATTGCTGTAGC +GCTTGTTAACAGCATCAATGTTCATTGCATAAGCCATCGCTTTACGTAATGAAACATTGT +TCATCTTAGCGTTCTTATCTTCAACGTTCTTACCAGTCTTCTTGTCAAACTTACCTACCT +TAAAGCCTAAGTAGTTGTAAGATAGAGGAATCTTACCAATGAAGTTAACGCCCTTAGTAT +CCTTAACATTGTTCCATTGAGAGTTGAGCACACCAGTAATATCAAACTTGTGACTCTTAA +TAGCTTGAGAAACAGAGTTAGTTCCAATTACTTCCATAGTAATCTTGTCTAAGTTTGGCT +TACCACGCCAGTAATGTTCGTTTGGTACGTAAGTAACTGATTGGCCACGAACAACCTTTT +GAACCTTGTAAGGGCCAAAGAATAGTGGTTGCTTCCGAACTTTATCATCAGATAAAAGTT +TCTCAAATGGAACATCTTTCAAGTAGTGGTATGGTTCTGCACTTTCAAGGAAGTAACCGT +TACCTGATTGAAGCATACCTGGCTTCATTTCTTTGAAGTGAAGTACAACTTTTCGACCAT +TTTCGCCATTTGGCATTTCAATACCAGAAATCTTGTCGGTCTTACCCTTATGATATTCTT +CTAAACCAACAAGGTTAGCTAAAGAATCTGTATATCTTGAAGTCTTGGTCTTAGGGTTAC +CAACAATTTCGTAGGCGTATTCAAGGTCCTTAGCAGTTACTTGCTTACCATCAGACCACT +TAACACCCTTTTTAACTTCAATGGTAATTGTCTTTGCATTCTTATCTAACTTAAAGGTAG +CTGCACCCTTATTGTTAATCTTATAGCTATCGTCAACACTAAAGAGTGATTCTAAACCTG +GACTTTGAATTTCTGTGTCAGTAGACGTATCAGATAATTCTGGTAAGAAAATACCTGTGA +AAGGTGAGTCACTTTCAATTGCATACTTTAATGTTCCACCCTTTTTAACTTGTTTCTTAG +GAACAGCTTCTTTAAAACTAACTTTCTTATCGCTAGCGTTATTATTGTTGTTATTCCCAC +AAGCAGTTAAAGTCAATGCCGCACCGGACAATACTGTAATTGCTCCTAACCACTTAACTT +TTTTCATTTTTTCTCCCCCTTTGTGAGATGACTTAAATTTCTTGATTTTTATTATATGAT +ATTAAAATGAAAAAAACAATACAAAATTAAAATATTTTTTGGCTAAATTTGTGTTTTATT +AAAAATCTAGTATTAATACACTTGTTTTCTTTAAGGTGTAATCTGATAGTTATAAAAATC +GCCAAAAGTTATAAAGAGTTATAAAAAAAGAAAGATGATACACATGAATAATAATCCTTT +CAATCCTAGTTTTGGCAAAATACCAAGTATCTTTCTAAAACGCGATAATTTATCTCAAAG +AATAATCGACGAATTAAATAGAGAAAATTCTCCATTTCAAACTTCACTCATTTATGGTCA +AAGAAGGTCTGGAAAAACCACCCTTATGTCTGAAATTTCTTCTAAACTTAAAGAAAATAA +AGACTGGATCATTATTGACTTAGTCTTTGATAATGATTTACTCATTTCTCTAACTAATCA +ACTTCAAGAGCATTTATTAAAATTAAAACTAATTAAAAATTTAGATATAAAAATGAACTT +TCTCGGCATTGATATTAATGCTTCCTTAGCTCAAAACATCGATGCCAATTTTCAACAAAT +TTTGCAAACTAGTCTTGAGAAATTAACAAGAAAAGGCAAAAATGTGCTAATTAATATTGA +TGAGGTTCATTTAACTCCTTTACTAAAGAAATTTGCCAATTGTTATCAGATCATGATTAG +AAAAAATCTTAAGGTTTCTTTATTAATGGCTGGTCTTCCAGAAAATGTTTCTGAAATTCA +AAATGACGATGTCTTAACTTTTCTTCTGCGATCTAATCGGATTGTCCTCAATCCATTGAA +CTTAGAAACTATTAAACTTAGCTACAAGCATATTTTTCAAAATGCTAACTTCAATATTGA +TGATAGAACTATTTTATATATGACCAAGCAAACACAAGGATTTGCATATGCCTTTCAACT +TTTGGGTTATCATATCTGGAGATATGCTACAGAACAGAATAAAAAAACTATTTCTCTTTC +TTTAGTAGACGAAATCTTAGACATTTATTTGAGTGATTTAAATCGCAATGTTTACTTTAA +GGTTTACAATGATCTTTCTTCTAAAGAAAAAGAGTTCGTGCAAGCAATGGTAAAAGTTGG +AAAACAAAAAGTCAAAAGCCAAGAAATCGGGAAAATAATGAATAAAGGTGCTAATTATTT +AGCTGTTTATCGGAGAAAATTAATAGATGATCAAGTAATCAAACCTGATGGTTATGGCTA +TGTCAGTTTCTTACTCCCGCATTTTGACAAGTTCATTGAACAAGAAATGATCTTAAATGA +ATTCTAAAATTTCAAGCAAAAAAAGCTGGTAATACCAACGCCATTTCAAGCGAAGATATT +GCCAGCTTTTCTTTATATTCTCTTTTCAGAAGTTTTTTTAATTTTTCTCTTAGGTGTGAA +ATTCAAAGCTAAAACTGCCCCAATTACTAAAAATGTTCCTAGCCAATCCATCCCTGACAT +CACCAAACCAAAGATTAAAACTGAACCAACTGTAGCTGATAAGGGCTCAAAAGCATCAAG +CAAACTAACCGTTGACGGCTTAACATAACGTAATGCATTCGCCATAATCTGAAAAGGCAC +TATTGTTCCCAGAATAATAATTGCTCCTACCCAGAGCCAAACTTTTGGTGTGTTTGGAAC +AGCGGGAAAGCTAGGATGAAAAATCACCAAACCAAGTCCAGCAAAAATCATCCCCCAACC +AGTCAAAACTAAACTCGATATTCTTTTAACAATTTTAACTGGAATCAACGTATAGCTTGC +TTCACCTAGAGCAGATAATAAGCCGAAAAATAATGCCACTGGAGTAATTGTTAAATGATT +TAAATTTCCATGAGTTGACAGCAAAAATACACCAATAAAAGCCATCAGTGCTGCTAAAAT +ATCTAACCTTCTTAAAACTTGCTGATGAGTTAAAGCAAGATAAGCAAGAACAAAAAACGG +TCCAATAAATTGTAAGATAGTCGCAATCGAAGCATTCGCCATTTCAATGACAATAAAGTA +AAAAATCTGTACTGGTAATAAGCCAAATACCCCATAAGCAATAATGTGCAACGTATTTTT +CTTGTCTTTTAAAACTGAAATTGGCTTTTGATGTAAAATTGTCGCAATGATTAATAAGAC +AATCCCAGAAATAATTAGTCTTACTTGTGTCAGCCAAATAGGTGTAATTTTAGAACTGAT +CTTAAATAAAGACTCCGCAAATAAACCTGATATTCCCCACATCACAGCTGCTAAAGCTGC +CAAAACTGTCCACAGTCTGCTTCTAGTAACTTGCTTATCCATCCAGCTTCTTCCTCACAA +ATAAAAATCAATCTAAATCATCATAGCATAGTTACTCTAAGCAATCATTGTAATTTCGAC +AAAAAAATACTGTTCAATTAGGTAAGCAACCTGAACAGTATTTTTTAATTCAAATTAATC +TAAATCTTTTCCATCAGATTCAATAACTTTCTTGTACCAATAGAATGAATCCTTGGGCAT +TCTCTTTAAAGTACCGTTGCCTTCATCATCTCTATCCACATAGATAAAGCCATAACGCTT +ACTCATTTGACCGGTACCTGCAGAAACTAAGTCAATACAGCCCCAAGTAGTGTAACCAAT +TAAATCTACACCATCATCAATTGCACGCTCCATTGCCTTAATATGCATTCTCAAGTAATC +AATCCGGTAATCATCATGAATCTTGCCATCATCGCTGATCTTATCAACTGCGCCAAGACC +ATTTTCAACAACCATCATTGGAATGTCGTAGCGATCATACATTACTTCAAGATAATATTG +AAGTCCATCTGGATCAGTTGCCCAGCCCCATTCGGAATACTTCAAGTATGGGTTCTTAAC +ACCAGCAGCAAAATTACCACCAACTTTATCTTTTACTTCATGGGTAGTAATAATGTTAGA +CATGTAGTAAGAGAAGGTATAAATGTCAACTTTTCCTTCAAGCAAGTCTTTTCGATCTTG +CTCGGTAATATTTAAATGAACATTATGCTCATTCCATAAACGCTTAGCATAAGTTGGGTA +TTTACCTTTTGCTTGCACATCACCGCAGGTTCCCACATATGACGGTTAGCCAAAATATCC +TTAGGATCTGGTGTTAATGGATAGTCTACAATACCACAAATCATATTACCAACTACATAG +TTAGGATCTAATGCATGAGCAATCTTAACTGCGCGAGCACTAGCAACAAATTGATAATGC +AACTTTTGGTAGGCATGTTGATATACCTTATCATCAGTTACATTATTGCCAAATGCACTC +AGCATTAAAAGCGTTGAGTTAATTTCATTAAATGTAAGCCAGTACTTAACCAGTCCTCTG +TATTCTTCAAACAAAGTAGTTGCATACTTAACATACATATCGATCATTTTGCGATCTCCC +CAGTCATGGTATTTTTCGCTGAGGTATAGGGGATCTTCATAGTGAGAAATAGTAACTAAA +GGTTCAATCCCGTACTTTTTACATTCTTCAAATACACGGTGATAAAAATCTAACCCAGCT +TGATTTGGCTTTTCTTCATCGCCTTTAGGGAAAATTCTAGTCCAGGCAATTGAAAGGCGG +AAGATCTTAAAGCCCATTTCAGCAAACATCTTAATGTCTTCTTTATAGTGGTGATAAAAA +TCAATTGCCACGTGGTTTGGGTAATATTCATTTGGATCAATTGCACCAACTGCGCCTTCT +GGCAAGCCCGCCCCAGGAATACCTGGAGTTTTTTCTAACTTACCATTCAACTTATAAGTC +AACATTCTTGGTGCATCTAAACTACCAGCAGTTGTAATATCAGTGACTGATAATCCTTTT +CCATCTTCATCATACGCACCTTCAATTTGATTTGCGGCAGTAGCTCCGCCCCATAAAAAG +TTTTTTGGAAATGACATTTATATTTCTCCTCTTTTTCAATACTAGTCATTTTTATTATAT +ATGAAATCGATTACATTATCTAAGCCATTTGAGGATTTGAACTAGCTGCTTCTGTTTCAC +TAGCTCCTTCTAAGTCTAAAATCTCTTGTCCTACTTTTACCTTGCCTTGACTTACAACAG +TGATCTTTTCATAATCTTTTGAGTTAGTAACTACGATTGGTGTCGTTACAACATAGCCCT +TAGCATGAATTGCATCGATATCAAACTTAACTAATTCATCACCTGCTTTAACTTCGTCGC +CTTTTGCAACTAAGGTTTCAAAACCATGTCCTTGTAAATTAACAGTATCCATTCCGATGT +GCATCAAGATTTCTGCACCATCGCTTGTCTTCATTCCGATAGCATGTCCTGTTGGGAAGG +TCATTACAACTTTACCATCTGCTGGTGCATGTAGAACTCATTCGCTAGGCTCAACAGCAA +CGCCTTCACCCATAGCACCGCTTGAGAAGACTTCATCTTTTACTTCTGATAGTGGCAATA +ATTCACCAGCAAGTGGACTAACTAGTTCAGTTGAAACATTTAAGTCGCTAGTACTTGCTA +AGCTTGGGGCAGCTTTCGCAATAGTATCTGCTTGGTTTGCAACTTCTTGAACAGTCTTTG +CTTGTTTTTCATCGTAAGCTTGATCAACACTCTTCTTGCCAAGAACCATTTGTAAAATAA +ATCCAAGAACGAATGAGACAGCCATTGCAATCATTAAACCGTAAACACTCATGTCAACAC +CAGTCTTTGGACCAATAGCAGCAGGGATTACGAAGACTCCCATACCACCCATCATGTACA +TCTTAGTACCGAAGAATCCAATCAAACCGCCACCAATACCAGCTGCAATACAACTTAAGA +CAAATGGCTTTTTACGTGGAAGAGTTACACCATAAATAGCTGGTTCAGTAACACCGAAAA +TTCCTGATAAGAATGCTGGAAGAGCAATACCTTTTAATTTTTGATCTTTAGTTTGTAATA +AGATTGCTAAAACAACACCAATTTGTGCAAATGAAGCACCTAATGATAACCCTAAGATTG +GGTCATAACCTAATGCAGCAATATTTGACATCATCACTGCAACAAATCCCCAGTGAACGC +CGAAGATAACGAATACTTGCCAGAAGCCACCAAGTAAAATGCCAGCTAATACTGGACTAA +AGTTGTAAACAGCTGAAGTAATGGCAGCAAGTGCATTACCAATCCAAGTAGCAAGTGGCC +CGATAATTAAGAAAGTTAAAGGAACTACAATCAGTAAAGTAACAAATGGAACTAAGAAAG +TTTTTACCACAGTTGGAATCCATTTCTTGCACCATTTTTCAACAACAGAAGCAAACCAAA +CCGCTAAAATAATTGGAATAACTGTTGAAGTATAGTTCATTGAAATAATTGGAATACCTA +AGAATGTAGCATGAACTTGTGAATGTAAGAACGTTCCGTTAAATAAATCAAACAAAACTT +TTTTGCTGCTATTCATTGCTACCATTGCTGGATAACACATTGCAGCACCAATTGTAATGG +CTGTAAAGCGGTCTACCTTAAACTTCTTAGCACTAGTAATTGCTAGAATAAGTGGCAAGA +AGTAAAAGAAACCATCCCCAATCGCATATAAAACTTCATAAGTACCAGAGGTTTTAGCAA +GCCAGCCAAATGAAGCACACATTGCAGTTAAGCCTTTAATCATACCTGCTGCAGCCATTG +GCCCCAAGATCGGAGTAAAGATACCTGAAATAAGATCGATAGCTTTATCCATTAAACTCA +TATTGCTGTCATCTAAATCATCATCAGCAACTTGGCCACCACCAGAAAAGCCACCTTCTT +TTAAGACTGCCTCGTATACGTCAGCAACTTCATTACCAATAACTACCTGATATTGTCCAC +CAGCTTTAACAACTGTAACAACACCATCAGTATCTTTTAAGGCATCATCATTAGCCTTTT +TTTCATCTTTTAATTTAAAACGCAAGCGTGTTGTACAGTGAACAACGCTGATTACGTTGT +CTTTTCCGCCAACGTCCTTAATAATCGTTTTTGCTAGAGCATCATAGTTCTTAGCCATAG +CTTTTTTCCTCCCAATAATAAAACCCAAGCTACTCAAAAACTAGAGAACGTAAATGTTCA +ATGTCTTTCAGTAACTTGGGTTCTTGCCTAATCGAATTAGTAACATACCTAAATATGAAA +ATATTTAATTTTCTTGGCGGTTTGTCACGCGCCAAATATGTAAATAAGTTGCGATTCTCT +CTGCCGTATCATAGGCATGATTGTACTTAATCTTCATAAAAGCTAATAGCGAATCATCAA +GTTCGCCACTTTTTTGATGCTTATTTCTTAAAAGTCTTACCAATAATACTCTTAAGTGAG +TAATGAAGCGACTATAATTAAACGACTCTGTATCTAAAGTCATTTGGTATTGATACTGAA +TAATATCAATAATCCCGCTAATCAACTCAGTGATCTCAACTGTTTCTTGAACCTGATCAT +TATCAGAAGCTGCATTAACGAAGTGATAAGTCATAAAAATACTTTCACTCGGTGGTAAGT +TGACTTTCATTTCTTTATTAATTAAAGCAATGACTTTTTCGCTAATCTTATACTCCTTAG +GAAAAAGATTCTTAACTTCCCAGCGTGTATTAGCAGCACTCATATCAATATGATCTTCAA +TTCTTGAAAGAGCAAAATCAATATGGTCAGCTAAAGCTAAGTATTGAAAATCATTGAACT +TCACTTTTAGAAGTGGCTCCACCATCTGGATCACCTTATTGGTTAAATCGATGGTTGAAG +CATTGATTTCTTTAACTTGATTGACTTCATCTTGATGAGAAGTAGCGGTAAACCGCCTTT +CAATTTTATTCTCATCAATCTTATCGCCTTTCTTTAAGCCAAAGCCCACGCCTTTGCCCA +GGACTACTTGTTCTTGGCCTTGATCGTCCTGAACCAAGGCTGCACTATTGTTAAATGTTT +TAAGAAATATCATCCGACGAATCACCTTTATATAGAAAAAAGCTACCACAAACTACAATA +TACACTTTAAGTATATCATCATTATTTGTAGTAGCTCAGACCTGATCGCATCAGTAATCC +GCTTATTTACAGTAATTATAATAGCGCTTTCACATATTTGTGTCAACAAGCATTTGCAGA +AAAATGCTTATTTTCATGTCAGAAAAAAAGGCTAGACCTTAATGTCTAGCCTCTTTTAGG +GATTTCAAACTAACTTTTATTATTTTTGACGCTTTTTTCTATCTACGCCTAATCCGAATA +AACTACCTACTGCGGCAATTGCTAAGCCTAAGATACCAGCTGTATTTTCTGACTTAGCAC +CAGTTTGTGGCAATGTATTCTTTTCAGCATGGTGATTTTCATTTACAGTAGCTGCTTTTG +CAGTATTGGTTTCAGAATTAGTTTCTGTAGTCTTACCACCATTGTTGTCACTTGTTCCAG +TAGCGTCTACATAGACCTTAACTGTTACATCTACGCTAGAGCCATCTGGATAAGTTACCG +TAACAACGCCTGTATGTTCACCAATTGTAGAAACATTTGGCACTTCTTTCCAAGTGTACT +TAGTACCTTCTGGCATATCGTTCCTGTTCTTAATACCTTCTGATGGGTTTGGAACAACAC +CTGGAGTAGTGTGAATTGGTTGTGGCTCTGGAGTTACATGGTTCGTACCGATAATTACTG +TTACGTTAACCTTATCTTTTGAACCATCTGGGTAAGTCACAATCACAGTGCCTGTAGTCT +TGCCTGGCTTAGTTACATCTGGAGTCTTCTCCCAAGTGTACTTAGTGCCGCTTGGCATAT +CATCCTTGTTCTTAATGCCCTTTTCTGCTGGTGGTACTACACCTGGGGTAGTGTGAACAT +CTTGACCTTCTGGGGTGTACGC +>NODE_2_length_40000_cov_63.1617_ID_3 +CCAGCGGCCAGGTCGATCGACAGCACGCACTGCCCGGTGTAGGGCAACAGGCGCTCGCGG +TCGTCCAGGCTGCCCGCGCAGGGCTTGACCACCATGACATCGTTGGCACCGGTTTCCAGC +AGATGGTCGATCACGCCGAGCAACTGCCCGCCCTGGTCGATCACCTTCAGGCCTTCCAGC +TGGTGCCAGTAGTACTCACCTTCCTCGAGAGAGGGCAACTCGCTACGCGGGATGCAGATC +TCGTAACCGGTGAAGGTGCGGGCCTCTTCGCGATCGTCGAGCCCCTTGAGCTTGGCGGCC +AGGACCTTGCCATGCAGGCGCCCCCTGACCAGCTCGGCCTGCCGAATCTCGCCGTCGCGC +CGGAGCGTCCAGCGGCGATAGTCCAGCAGGTTGTCCAACGGGTCGGTAAAGGAATACACC +TTCACCTCACCGCGGATGCCGTACACCGAAACGATCTTGCCGATCACGACCAGGTCGTCG +GCGGGTGTCGGCATTTCACTCATGACGGCTTAGGCGTTTGCCTTGGCAGCGTCCTTGAGC +AGCTGAGCAACGCGCTCAGACGGCTGTGCACCCTGGCCGAGCCAGTAGGTAGCACGCTCC +TGGTCGACGGACAGACGCACTTCGCCACCAGTCGCAACCGGGTTGAAGAAACCGATGCGC +TCGACGAAGCGACCATCGCGCGCATTGCGGCTGTTGGTCACGGTCAGGTGGTAGAAGGGG +CGCTTTTTGGAGCCGCCACGAGCAAGACGGATGGTTACCATGTGAACTTCGTTCCTGTAG +TCGGTGCTTGCAAAATGAATGCACGCTGGGCCCACGGCCCGAAAGGCCGCATATTCTAAG +GATTATCGGGGAATTTGCAAATCTCTTTTTCGGCCACCCATCGGCCGGCGCGGAGAGCGC +GGGAAGCACACGGTCCGCCCGCCGGCCGACGGCGCGAAACCGCCGGCGTGGCCAGCGGCC +GCGCGTCCGATCAAAGCTTGGGCATGCCCCCCGGGAACATACTGCCCATGCCTCGCATCA +TCTTGGCCATGCCGCCCTTGGCGGTGACCTTCTTCATCATCTTCTGCATCTGCTTGTGCT +GCTTGATCAGCCGGCCGACGTCCTGCACCTGGGTACCGGAGCCAAGGGCGATGCGGCGCT +TGCGCGAGCCGCTGATCATTTCCGGATCGCGCCGTTCGCCGGGCGTCATCGAGTTGATGA +TCGCCTCCATCTGCTTGAACTGTTTCTCCGCAGCACCCTGGGCATTGCCCATCTGCGCCA +GGTTGACCCCGCCGAGCATCGGCAGCTTGTCCATGAGCCCGCCGAGGCCGCCCATGTTCT +TCATCTGTTGCAACTGGTCGCGGAAGTCTTCCAGGTCGAAGCCCTTGCCCTTCTTGATCT +TCTTCGCCAGCTTCTCGGCCTTGTCGCGGTCGAGGTTCTGCTCGGCCTGTTCGATCAGGC +TGAGCACGTCGCCCATGCCGAGGATGCGCGAGGCCACGCGGTCGGGATGGAACGGATCGA +GCGCTTCGCTCTTTTCGCCCATGCCGAGGAACTTGATCGGCTTGCCGGTGATCGCCCGCA +CCGAGAGCGCGGCACCACCACGCGCGTCGCCGTCGACCTTGGTCAGGACCACGCCGGTCA +GCGGCAAGGCGTCATTGAAGGCCTTGGCGGTGTTGGCGGCGTCCTGGCCGGTCATGGCAT +CGACCACGAACAGGGTTTCCGCCGGCTTGATCGCCGCGTGCACCTGCTTGATCTCGTCCA +TCATGTCGGCATCGATGTGCAGGCGGCCTGCGGTATCGACGATCACCACGTCGATGAACT +TCAGTCTCGCCTCGCGGATCGCCGCCTCGGCGATGGCCACCGGCTTCTGGCTGACGTCGG +AAGGGAAGAAGGTTACGCCAACCTCGCCCGCCAGGGTTTCCAACTGCTTGATCGCGGCGG +GTCGGTAGACGTCGGCGGAAACCACCATCACCGACTTCTTCTTGCGTTCCTTAAGGAAGC +GCGCCAGCTTGCCCGCGGTGGTGGTCTTGCCCGCGCCCTGCAGGCCGGCCATCAGGATCA +CCGCCGGCGGCGCGACGCTCAGCGCCAGGTCCTCGTTGGCCGCCCCCATCAGCTCCTCGA +GCTCGGCGCGGACGATCTTCACGAACGCCTGTCCCGGGGTCAGGCTCTTCGAGACCTCGG +TACCGACGGCGCGCTCCTTGACCTTGTTGACGAAGTCCTTGACCACCGGCAGGGCCACGT +CGGCCTCGAGCAGGGCCATGCGCACTTCGCGCAGAGTGTCCTTGATGTTGTCCTCGGTCA +GCTTGGCCTTGCCGGTGACATGGCGAAGCGTCTGCGAGAGGCGGTCTGTAAGGTTTTCGA +ACATGCGCGATCCTTCCACGGGGGTTGCGGCAAGCGGCGGATTATAACCAAGAGCGCGTC +GCGGGACACGCGATGAAAGACGGAGTGTGTGTCCGGCGACGACCGGAGGCCAGCCTCAAG +TCCTTGATTTGCCGATGCGTTCCGTCTTTGCCGAAGCGACCGCCGGACCATGAGGTCGCG +GCGCCCGCGGCAGGCGGTCCGCCAACCCGGCCACAGGTACGAGAGAACTGTTCGCAACGG +CTTTCTCGCGACGCTCGATCTATGCCAAACTCCCATCCTTTCGGGCCCGCGATTACAAGG +ACTTATGCATCCCCTGCTGCCCAGCCTCATCGCTGCTGTTCTCTATCTCGGCACCGCCGC +CTACCAGGGTGCCTGTGTGTCCAAGCGCACCGCGCCGGGCAAACCCCTGCTGCTCCTGTT +CGGACTCCTGGCACTGGTCGCCCACGCCTTCAGCCTCTACCAGCAATTGCTGACCCCGGC +CGGCCTGGTGCTGGACTTCTTCAACGCCGCCAGCCTGATCGCCGCCGCGGTAATTCTCCT +GACCCTGCTGGCGACCTTGCGGATACCGGTGGAGAACCTGCTCCTCCTTCTATTCCCGCT +GGGTGCCCTCACCACCCTGCTGGCGGTACTCATCCCCCACGGCACCGTCGAGCCGATCAA +CGAACAGCCAGGCATTCTGGCCCACATCCTGCTCTCGATCCTGGCCTACGGCCTGCTCAC +CATCGCGGTGTTCCAGGCGCTCTTGCTGCTGCTCCAGGACTACCGCCTCAAGCACAAGCA +TCCGTCCGGGCTGATCCGCAACTTCCCGCCCCTGCAAACCATGGAAAGCCTGCTGTTCGG +CTTCCTCTGGGGCGGCTGGTCGCTGCTTTCGCTGTCGCTGCTGTCCGGATGGCTGTTCGT +CGACAACCTGTTCGCCCAGCACCTGGCGCACAAGACCATCCTGTCCTGCTTCGCCTGGGT +GGTCTTCGCCGTGCTCCTCTGGGGTCGCCACCAGCTCGGCTGGCGGGGCCACAAGGCGAT +CCGCTGGACCCTGGCCGGTTTCTGCCTGTTGATGTTGGCGTACTTCGGCAGCAAGCTGGT +GCGGGAATTCATCCTGCACATCTGATGGGCCTCCTTCATGGATGAGCTGCACCCCGGGTA +CCTGGTCGGCCTGCTGGTTCTCCTGGTCGCCTGCTCGGCGTTCTTCTCCTGCGTCGAGAC +CGCCCTGCTCAACCTCGACCGTTACCGCCTGCGCCTGCAGGCCAAGCAAGGCCTGCGCGG +GGCCCGGCGCAGCAGTTGGCTGCTGCTGCACGACGACCGCCTGCGCGGCACCCTGTTGTT +CGGCCGCACCCTGGTCAACGTCAGCGCCGCCGCCCTGGCCAGTTGGGCCGCGCTGCGCCA +CTGGGGCGTCATCGGCCTGGCCGTCGCCATCCCCGGCATGACCCTGCTCCTGCTGCTGTT +CGGCGCCCTGCTGCCGCGCCGCTACGGCGCCCTTCGCTCGGAACGCGTCGCCCTGCCGCT +CAGCCTGCCGCTCCTGATCCTGCAGCGCCTGTGCTGGCCCCTGCTGTGGCTGCTGACCCT +GCTGAGCAACGCGCTGCTGCGCCTGCTCGGCGTCGCCGCGGCGGAACAGGACGACAGCGG +ACGCAGCCGCGACGAAGAGGCCCTGCATCCGGCCGACAATCCCCAGGCCAGCGAGGTCAA +CCGGCATGACATGCTGCTCGGCCTGCTGGACCTGGAGAAGGTCACGGTCAACGACCTGAT +GATCCCGCGCAACGAAATCGAAGGCATCGACCTCGACGACGAACTCGAGGTCATCGTCGA +GCAGTTGCGCACCACCAGCCACACCCGCCTGCCGGTCTACCGCGACGACGTCAACCAGAT +CGAAGGGGTCGTGCACATGCGCCAGATCGCGCGCCTGCTGACCCAGGGCCGCCTGACCAA +GGAGAATCTCCGCCAGGCCTGCATGGAGCCGTATTTCGTGCCGGAAAGCACGCCGCTGTC +GACCCAGTTGGTGAACTTCCAGAAGGAGAAGCGCCGTATCGGCGTGGTGGTCGACGAATA +TGGCGAGGTGATCGGCATCGTCACGCTGGAGGACATCCTCGAAGAGATCGTCGGCGACTT +CAACGACCTCGACAGCCTCGACAACCCGGATATCCAGGCCCAGGAGGACGGCAGCTTCGT +CATCGACGGCAGCGCCAACCTGCGCGAGCTGAACAAGTCGCTTGGCTGGCAATTGCCCTG +CGACGGCCCGAAGACCCTCAACGGCCTGGTCACCGAAGCCCTGGAGCAGATCCCCGATTG +CGCGGTGTGCCTGCGCATCGGCCCCTACTGCCTGGAAATCCAGCAGTCGGCGGAAAACCG +TGTGAAAAGCGTGCGCGCCTGGCATCCCCGCGCGCTGACCCCGCTGGTCGAAAGCGACGC +CAGCGTCTGATCCGGGATTGCCGAACCGGCCCCGCGGCGCTCTATAATCGACCCCAGCTT +ATCCAGCCCTGCCGCCGGTTCCGCCGCTACCCGCAGCCCGAGCCCCGGCCAGTCACCCGC +CAGTCCCGCCTGCCCGCCCGGCCCCGATGCCGACGGTACCAGCCGGCGCACGGCGCGCAG +CCCCATCCTCCCTGCGCGACCGATCTCGTCCGCCCCACCCGGGCGCCGCGCCGCCGCATC +GGCGCACTGGCCATAACCATCCGGCCGCACGGAAGCACTCCGTCGTGCCCCGACTGCCAG +GGACCTTCCCACCATGACCGCCGCCACCCTCGCCGCCGACGCCGCTCCGGAACCGGCCAA +CTCGACCACCCGGGTCGCAGTCGCCAGCTTCATCGGCACCGCCATCGAGTTCTACGATTT +CTACGTCTATGCCACTGCCGCCGCCCTGGTGATCGGTCCGGTATTCTTCCCGCAGACATC +CGGCACCGCGCAGATGCTCAGCGCCTTCCTCACCTTCGGCATCGCCTTCCTCGCCCGTCC +GCTGGGCTCGGCGCTGTTCGGCCACTTCGGCGACCGCATCGGGCGCAAGTCGACCCTGGT +CGCCTCGCTGCTGCTGATGGGGGTGTCCACCACCCTGATCGGCGTCCTCCCCGGCTACGA +CAGCATCGGCTACTGGGCGCCGTTGCTGCTCTGCGTGCTGCGCTTCGGCCAGGGCCTCGG +CCTCGGCGGCGAGTGGGGCGGCGCCGCGCTGCTGGCCACGGAGAACGCGCCGGCCGGCAA +GCGCGCCTGGTTCGGCATGTTTCCCCAGCTTGGCCCGTCGATCGGCTTCCTCGCCGCCAA +CGGCCTGTTCCTGGCCCTGGCGATGCTGCTCAGCGAGGAGCAGTTCCGTGAGTGGGGCTG +GCGGATCCCGTTCCTGCTCAGCGCGGCGCTGGTTGTGGTCGGCCTCTACGTACGCCTGAA +GCTGGCGGAAACCCCGGTGTTCGCCAAGGCCATGGCCAAGCACGAGAGGGTCCGCCTGCC +GATCGCCGAGCTGTTCGCCCAGCACTGGCGGCCGACCCTGCTCGGCGCCCTGGCGATGGT +GGTGTGTTATGCGCTGTTCTATATCTCCACGGTGTTCTCGCTGAGCTACGGGGTGGCCAG +CCTCGGCTTCAGCCGCGAGGAGTTCCTTGGTCTGCTGTGCCTCGCGGTGCTCTTCATGGC +CGCCGCCACGCCGCTGTCGGCCTGGCTCAGCGACCGCTTCGGACGCAAGCCGGTACTGCT +GCTCGGCAGCCTGGCGGCGATCGCCTCCGGCTTCGCCATGGAGCCGCTGCTCAGCCAGGG +CTCGACGTTCAGCGTCGCCCTGTTCCTCTGCATCGAGCTGTTCCTGATGGGGGTCACCTT +CGCCCCGATGGGCGCGCTGCTGCCGGAAATATTCCCCACCCACGTGCGCTACACCGGCGC +GTCGGCGGCCTACAACCTGGGCGGCATTCTCGGCGCCTCGGTGGCGCCCTATATCGCCCA +GAAGCTGGTCGGCATCGGCGGCCTGGGCTGGGTCGGCGGCTACGTCTCCGCAGCGGCGCT +GCTCAGCCTGCTGGCGGTGCTGTGCCTGAAGGAAACCCGCGACAACGACCTCGGCGCAGT +GTCCTGAGTCTCGCTCAGGCTACCGCGACGCCGCCCTTCTCCCTCGCCGCCAACCAGTCG +GCGAGGGCCCCGGCCCAGACCTGGTAGCCCAGCTGCGACGGATGAAAGCCGTCGATGGCC +AGGTAGCCCGGAAGCAGGTCCAGCGAGATCGGGCAGTAGCCGGCGCCGGTCGCGCCAGCG +AGATCGCACAGCGCCTCGTCGAGCAGCCGCCCGCGCCACCCCAGCAGGGCCCGCAGCAAC +CCGGGCAAGGCGGAAAAATGCTGCAGCGGCGGCACCGCGGTGCAGGTCACCTGCGCGCCG +GCCGCCTGGAAGCCGTCGATCAGGGCCGCGCAGTCGGCGCGCCAGCGTTCGAGGGAGCGC +AGGCTGGTGGTGTCATTGACGCCGAAGACCAGGATCACCTGGTCGAAGAAACGTCCCGCC +ACCTGCGGTAGCAAGCGTTCGCGCGCCTCGCCGCTGGTGATGCCGTTCTCGCCCAGCGCC +TGCCAGGCCACCGGGCGCCGCAGACGCTCGCCCAGCGCCGCGGCCAGGCGGCCGGCCAGG +GCGTAGTCCAGGCAACTGGCGCCGACCCCGGCCACTGTCGACTCACCGAACAACAGCAAG +CGCAGCGGCTCTTCGGCACCGGACCCACCGACCAGCCCTTCGCGAGGTCCCGCCGCGGGC +GCCAGGCGCAAGGCTCGGCGGCGGGTGCGGATAGCCAGCGGCACCGCCAGCGGCAGCAAC +GGCAGAGCCGCCGCCCACCAGGCCAGCGTTGCCAGCCGGCGCATCTCAGAGTTCGACGCG +AACCGCCTGGGCGGCCCGGGTCGCCTTGGCGCGGGCGGCGTCGATAGACTCGTCGCGCGC +CAGGGCCACGCCCATCCGCCGCTGGCCATCCACTTCCGGCTTGCCGAACAGGCGTAGCGC +CGTATCCGCCTCGCTCAGCGCGGCACCGAGGTTGGCGAAAGCGACCTGCCGGGACTTGCC +TTCCACCAGGATCACCGCCGAGGCCGACGGGCCGAGCTGGCGGATCACCGGGATCGGCAG +GCCGAGGATCGCCCGCGCGTGCAGCGCGAACTCGGAAAGATCCTGGGAAATCAGGGTCAC +CAGGCCGGTATCGTGCGGGCGCGGCGACACCTCGCTGAACCACACCTGGTCGCCCTTGAC +GAACAGTTCGACGCCGAACAGCCCGCGTCCGCCGAGGGCCTCGGTCACTGCCCGGGCGAC +CCGCTCGGACTCGGCCAGGGCCTGCGCGCTCATCGCCTGCGGCTGCCAGGACTCGTGGTA +GTCGCCCTTGACCTGGCGGTGACCGATCGGCGCGCAGAAAGTGGTGCCGTCGACGTGGCG +CACGGTGAGCAGGGTGATTTCGTAGTCGAAATCGATGAAGCCCTCGACGATCACCCGGCC +CTTGCCGGCGCGCCCGCCTTCCTGGGCATAGTCCCAGGCCGCCTGCAGGTCGTCGGGGCC +TTTCAGGACGCTCTGGCCCTTGCCCGACGAACTCATGATCGGCTTCACCACGCAGGGGTA +GCCGACGCGCTCGACGCCACGGCGGTAGTCCTCGAAGGTATCGGCGAAGTGATAGGGCGA +GGTCGGCAAGCCGAGCTCCTCGGCAGCCAGCCGGCGGATACCCTCGCGGTTCATGGTCAG +TTGCGCGGCGCGGGCGGTGGGCACCACGGTGTAGCCTTCGGCCTCCAGCTCGACCAGGGT +CGCGGTGGCGATGGCCTCGATCTCCGGCACGATGTAGTGCGGCTTCTCCTGCTCGATCAC +CGCGCGCAGGGCGGCGCCGTCGAGCATGCTGATCACGTGGCTGCGATGTGCCACCTGCAT +GGCCGGCGCGTTGCCATAGCGATCGACGGCGATCACTTCGCAACCCAGGCGTTGCAGCTC +GATGGCGACTTCCTTGCCCAGCTCGCCGGAGCCACAGAGAAGGACGCGGGTCGCGCTCGG +CGACAGGGGGGTACCGATACGGGTCATGGAAGAACCTCGGAAATGAAAGAAAACGGAATC +AGCTTCAGGCGGAACCGCTGACCAGCCCTTGCGCCTTGGCGCGCTCGAAACAGCGACCGA +GCACCTCGCGGCGCTCGTCGTTGCTCATCAGGCCCCAGCGGGTGATCTCGGCCGCCGTGC +GCTGGCAGCCGATACAGATGTCCTGCTCGTCCAGCGCGCAGACATGCACGCAGGGCGAAG +CCACCGGTCGCTCGTTCACGCCGGGTCCTCGACGAGGTCGCGGGCGTAACGGCGGGCATT +GTGCACATAGTGGGCGGCGCTGGCCTCGAGCATTTTCTTCTGCGGCTCGCTCAGCTCGCG +CACTACCTTGCCCGGCGAGCCCATCACCAGCGAGCCGTCGGGAATCTCCTTGCCCTCGGG +AATCAGCGCGTTGGCGCCGATGATGCAGTACTTGCCGATCTTCGCCCCATTGAGGATCAC +CGCGTTGATCCCCACCAGGCTGTAGTCGCCCACGCTGCAACCATGCAGCATGGCGTTGTG +GCCGACGGTGACGCCCTTGCCGAGGGTCAGCGGATAGCCCATGTCGGTGTGCATCACCGA +GCCGTCCTGGACGTTGCTGTGCTCGCCGATATGGATCAGCTCGTTGTCGCCGCGCAGCAC +CGCGCCAAACCAGACGCTGGCGCCGGCATCCAGGCGAACCTTGCCGATCACCGCTGCGCT +GGGCGCGATCCAGCTGTCGGGATGGGTTTCGACGCGGGCATCGCCCAGGCGGTATTTCAT +CGCGATCTCCTCATGGCCGGGGTCTGCGCCCCGTCGGTCAACTCAGTTTGATGAACGAAG +CCGGCGGCGTGTGCAGGTCGATGGGGTCGTCGTACAGCAGGTTGACCAGCTCCACCACCA +TGATGGCCGTCAGGCCCCAGATCTTGAACTCGCCGAAGCGATAGCTGGGCACGTACCAGC +TGCGGCCGAAGTAGTCGATACGGTGGGTGACCTCGCGCGGGTCGTCGCGAAAGAAACTCA +GCGGCACGTTGAACACCGCGGCGATCTCGCCGTCGTTGGGCTGGTACTCGACGAAGTCGG +GAATGAAGGCGACATAGGGAGTGACCTCGATGCCATGCCGCGAGACCAGGGTGCTCAGCG +GCCCGACCACTTCCACCAGGCCCGGCGGCAAGGCGATCTCCTCCTCCGCCTCGCGCAGGG +CGGTGCGCACCAGGTCGGCATCTTCCGGGTCGCGCCGCCCGCCGGGGAAGGCGACCTCGC +CGCCATGGGTGGACAAACCGGCGGCGCGCAGGGTCAGCACCAGCTCCGGATCGTCGCTGC +GGGTGATGGGCACCAGCACGGCCGCTTGGGGGAAGCGCTGGTCGGTATCCAGTTGGCGGG +GGCGGTGCGCCTCGATGCGTTGGCGCAGCTCGTCGAGCGTGCAGTTCATACTGCCGGGGT +TTCCGAGGTCTTGTCGTTTCCCCGAGATCATGGCACGAAAGCCCCGGCCAGCCCACCCCT +CCCGCACGGGGCCTTGTCGCCTCCCGGCCAGCCGAGCAAGATAGGCCTTTCCCCGAAGAA +GAACCTGGCATGAAATTCTGCAGCCTGTGCGGCGCCACGGTGGTCCAGCGCATCCCGGAC +GGCGACAACCGCCTGCGCTACGTCTGCGACGCCTGCCACACCGTGCACTACCAGAACCCG +CGTATCGTCGCCGGCAGCCTGCCGGTGTGGGACGGCCAGGTCCTGCTCTGCCGTCGCGCC +ATTGCGCCGCGCCTGGGCTACTGGACGCTGCCGGCCGGCTTCATGGAGAACGGCGAGACC +CTCGCCCAGGCGGCGGCCCGCGAGACAGAGGAAGAAGCCAACGCGCGGATCGGCGACCTG +CAGCTCTATACCCTTTTCGACCTGCCGCACATCAGCCAGGTCTACCTGTTCTTCCGCGCC +GAGCTGCTCGACCTGGATTTCTCCGCCGGCGATGAAAGCCTGGAGGTGCGGCTTTTCGAC +GAAGCGGAGATTCCCTGGTCGGAGCTGGCTTTTCCGACCATCGGCCGTACCTTAGAATGC +TACTTCAGCGACCGCCGCGAAGGCTGCTTCCCGGTGCGCAACGAAGCCATTGCACCTATG +CTGGCTTCCTATAAGAAAGACTGAACGCCAAACACTCGAACACCGCCGCCGTTGCGGCTC +AAGGGAACATCGGGGATTTCACTTCCATGCGCTGGTTGCTTGCTTTGCTGTGCTTGACCT +TCGCCAGTCTGACCCAGGCCAATGCCGCGCCGAGCCTGGAAGGCAAGGTGGACAAGGTCC +TGGTGCTCAAGTCCGAGCGAAAACTGCTGCTCCTCAACAAAGGCAACGTCCTGAAAAGCT +ACCGTGTCTCGCTGGGCAAGCGCCCGACCGGCCCGAAGCTGGCCGAAGGCGACAACCGCA +CCCCGGAAGGCTTCTACTGGATCGACTGGCGCAAGACCAGCAACAACTACAACCTCTCCA +TGCACATTTCCTACCCCAACGCCCGCGACGTGGCCAAGGCCCGGGAAAAGGGCCTGCCGG +CCGGTGGCATGATCATGATCCACGGCACCCCGCTGGATGACGAATACCCGGAGTGGTACT +TCTCCACCCTGGACTGGACCAATGGCTGTATCGCCATGAACAACACCGACATGCGCGAAG +TCTGGAGCGTGGTGAAAGACGGCACGCTGATCGAGATCCGCCCCTGACAGAACCAGGCTG +ATACCACTCGTCAGCCGTCAGCCACGCCAGGAACGCCGCCGCGAGATTTTCCGGCGGCGT +TTTCTTTTATCCGTCAAACCCTTGCATCCCTTACTCAGGCCGAGTCAATCGACGGGACGG +CTGGCTGACGCGTGCGCGACGCACAACTGGCATTGACGTGGTATTCAAGTGGTATTAGTT +TTCCCAGCATTACCGGGCGACAACAACACAATCCGCATCGGAACCTCTCATGAAGACAGC +CCACGACCTGCTCCTGGCCCTGCGTCCCGACGAGGCGCAACCCACCCCCCTCTATCTGCA +GCTGGCGCGCAATCTCGAGAGCGCGATCCATGCCGGCCAGTGGAAAGCGGAGGAAGCGCT +GCCCTCCGAACGCAACCTCAGCGAAACCCTGAACATTTCCCGCGTCACCGCGCGCAAGGC +CCTCGAAGTACTCTTCGAGCAGGGCCTGATCCGCCGCAACCAGGGCTCCGGCACCTTCAT +TACGCCGCGTCTCGAACAACCGTTGTCGCGTCTCTCCAGCTTCAGCGAGATGCTCCGCCT +GAAAGGCTTCACCCCCGGCTCCACCTGGCTGGAGCGCGGCATCGCCCTGCCCACCCACGA +CGAACTGATCCGCCTCGGCCTCTCGCCGACCGAGAAGGTCACGCGCATGAAGCGCCTGCG +CAAGGCCGACGGCACAGTGATGGCGATCGAGAACAGCACCCTGCCGGCGCGCCTGCTGCC +GGACCCGACAGCGGTCGGCGATTCCCTCTACGAATACCTGGACGGCATCGGTCGCCCAGT +GGTCCGCGCCCTCCAGCACGTGCGCGCGATCAACGCGTCCGCGTCCGATGCCGCGCTGGT +CGGCATCGCGCCGGGCACCGCGATGCTGCTGATGACCCGGATCGGCTACCTCGAGGACAA +CACCCCGATCGAGCTGACCGACACCTACTGCCGCAACGACTACTACGACTTCGTCGCCGA +ACTGCGACGCTGACCGACACGGAGCCGCCATGCTCGAAGGCAACATTCTCACCCCGCAAG +GCTGGGTCCTCGGCCGCCTACACATCCAGGACGGGCGTATCCAGCGGATCGAAGGCGAGC +CCTGCGACCCGGCCGGCAACGCCGATCCGTACCTGCTGCCAGGCTTCATCGACCTGCATG +TGCACGGCGGCGGCGGCCGCGACATCATGGAAGGCGGCGACGCCTTCGCCACCATCGCCC +GCACCCACCGGCGCTTCGGCACGACCTCGCTGCTGGCCACCACCATGACCGCACCGCGCG +AGGAGATCGCCGACATCCTCAGCCAGCTCGGCGCTTACTGCCGCCGCTCGCTGGAGGGCG +GTTCACGGATACTCGGCGTGCACCTGGAAGGCCCCTACATCAACTCCGGCAAGCTCGGCG +CGCAGCCCAACTTCGCCCATGCCGCGGTGCTCGAGGAAGTCGAGGACTACCTGCGCCGCG +CGCCGATCCGGGTGATCACCATCGCCCCCGAGATCGCCGGACACCGGCCGCTGATCCGCG +CCCTCGCCGAGCGCGGCGTACGCCTGCAGATCGGCCATACCCTGGGCAGCTACGAGGACG +GCGTGGCCGCACTGGAAGCCGGCGCCAGCAGCTTCACCCACCTCTACAACGCGATGACCG +GCCTGCACCACCGCGAACCGGGTATCGTCGGCGCGGCCCTGGCCCACGCACGCTACGCCG +AGCTGATCCCCGACCTGCTGCATGTCCATCCCGGCGCCATCAGGGTGGCCCTGCGTTCGA +TCCCGTGCCTGTACTGCGTGACCGACTCCACCGCCGCCGCCGGCATGCCCGACGGCCAGT +ACAAGCTCGGCAGCCACACGGTGACCAAGTGCCTGGGCGGCGTGCGCCTGCCCGACGGCA +CCCTGGCCGGCAGCACCCTGACCATGGACCAGGCGCTGCGCAACCTGGTGAAGATCGGCC +TGCCCCTGGCCGAAGCCTCGCAACGCCTTTCGCAATTCCCCGCCGATTACCTCGGCCTCG +CCGAGCGCGGCCGCCTGGCGCCCGCAGCCTGGGCCGATGTGGTGCGCCTGGATCGCTCCC +TGGAACTCGACGCCGTGATGGTGGAAGGAGAACTCGAATGACTTCGCTGATGCTCGAGGA +AGCGCTCTCCGCCGCCGCGGTGGCGAGCCGTCAACTGGCCGTGCTCGACGCTTGCCTGCC +GGCGCTTGGCCAGCGCCTGCGGGAGGTCGATCCAAACCTGGCGCTGACCGTCGCGCGCGG +CAGCTCCGACCATGCCGCCAGCTACTTCGCCTACCTGGCCATGCAGCACGCCGGCTTGCC +GGTGGCCTCGCTGCCCATGTCGGTAGTGACCCTGAACCGCTCGCCGCTGCGTGTCGCCGG +GCAGGCCGTCTTCGCTTTCTCGCAGTCAGGACAGAGCCCCGATCTCGTGGACAGCCTGCG +CATCCTGCGCGAACGCGGCGCCCTGGGCATCGCCCTGGTGAATGCCGAAGATTCGCCGCT +GGAAGCCGCCAGCGAATTCGTCGTGCCGCTCTGCGCCGGCACCGAGCGCAGCGTCGCAGC +GACCAAGAGCTTCATCGCCACGCTCAGCGCCAGCGCCCGCCTGCTCGCCCACTGGCAGCG +CGACAATGCCCTGCTGGCCGCCGGCCAGGGCCTGGCGGCCGGGCTGGAGCAGGCCGCCAG +GCTCGACTGGTCGCCGGCCATCGAGGCCCTGCGCGACTGCCAGCGGCTGATGGTGATCGG +TCGCGGCGCCGGCTACGCCATCGCCCAGGAGGCCGCGCTGAAATTCAAGGAAACCTCGGC +GATCCAGGCCGAGGCCTTCAGCAGCGCCGAGGTCCGCCATGGACCGATGGCGCTGGTCGA +GGAGCGCTATCCGCTGCTGGTGTTCGCTCCCCGCGGCCCGGAGCAGGAAGGCCTGCTGGC +GCTCGCCGAGGATATGCGCCAACGCGGTGCGCAGGTATTGCTCGCTGCCCCGGACGACAT +CGCCGAACGCGACCTGCCGCTGCAGCGCGCCGCGCATCCGGCGCTCGATCCGATCCTGGC +GATCCAGAGCTTCTACGTCATGGCCGCCGGCCTGGCCGAAGCCCGCGGCATGGACCCGGA +CCAGCCACGGCACCTGAGCAAAGTCACCCGAACCCACTGAGCCTGAGCGCGCGCCCATGA +ACAACAAGAATCTCGCCCTCAAAGCGCCCCTGAGCGGCCCCGTGATGCCGCTCAACCGAG +TACCCGACCCGGTGTTTTCCAGCGGCACCCTGGGCGAAGGCATCGCCATCGACCCACTCA +ACGACTGCCTGCACGCGCCCTGCGCCGGGCTGGTCAGCCACCTGGCGCGAACCCGCCACG +CGCTCAGCCTGCGCGCCGACAACGGTGCCGAGCTGCTGCTGCACGTCGGCCTCGATACAG +TACAGCTGCAAGGCGAAGGCTTCGAGGCGCTGGTCGAGGAAGGTGCGCGGGTGATCGAGG +GCCAGCCGCTGCTGCGCTTCGACCTGGACCGCGTCGCCCGCGGCAGCCGCAGTCTGATCA +CGGTGATGATCCTGACCAACGGCGACGGCTTCCAGGTACGCCCGCTGACCACCAACCCGG +TGGAGGTCGGCGCGCCGCTCCTGCAACTGAGTCCGGAGAAGGCCGAGCAACGTCCGGCCA +ATCCCGCGCCTGGCGAAGGCTCCGCGCAGCGCCAGGTCCGCGGGCGGGCGCGGGTCGCCC +ATCATGGCGGTCTGCACGCACGCCCGGCAGCGCTGCTACGGAAGACCGCGCAGGGCTTTT +CCAGCCAGGCCGAACTGCACTTCGCCGGGCAGGTGGCCAGCGTCGACAGCCTGGTCGGCA +TCATGGGCCTGGGGGTCGCCGAACAGGACGAGGTAGAGGTGATCTGTCGCGGCGAAGACA +GCGAGGCGGCCCTCGACGCCCTGCTCGCGGCCCTCGCCAGCGCCACCGCGGGGGCTCCGA +AGGAGGCCCCGCGCGCCATCGCCTCGGGCGAGCCGGCGCGACCTGCGGCTGTCGCCGGTA +CCCTGGCCGGCGTCTGCGCTTCGCCCGGCCTGGCCAGCGGTCCCCTGGCGCGGCTCGGCG +CCATCAGCCTGCCGGCGGATGACGGCCGGCATCGTCCCGAGGAACAGCACCTCGCCCTGG +ACCAGGCCCTGCAACGGGTGCGCGACGACGTACAGGGGAGCCTGCAGCAGGCCAGGCTCG +GCGGCGACGAGAACGAAGCGGCGATCTTTTCCGCGCATCTCGCGCTGCTGGAAGACCCGG +GTCTGCTGGACGCCGCCGACATGCTGATCGACCAGGGCGTCGGCGCCGCCCACGCCTGGC +ACCGGGCGATTCAGGCCCAGTGCGAGATTCTCCAGGCGCTGGGCAACCTGCTGCTGGCCG +AGCGCGCCAACGACCTGCGCGACCTGGAAAAACGCGTGCTGCGGGTGCTGCTCGGCGACA +CCGCACCGTTGCGGGTGCCTGCTGGGGCCATCGTCGCCGCCCGGGAGATCACCCCCTCCG +ACCTCGCGCCGCTGGTGGATGCCGGCGCGGCTGGCCTGTGCATGGCCGAAGGCGGCGCCA +CCTCCCACGTGGCCATCCTCGCCCGTAGCAAGGGCCTGCCGTGCCTGGTGGCGCTCGGCG +CCGGGCTGCTGGAGCTGGAGGAAGGCCGGCAGGTAGTACTGGACGCCGGCCAGGGCCGGC +TGGAACTCAGCCCCGACGCTCGACGCCTGGAGCAGGTCGCCTTGCAAGTGGCGCAGCGCG +AGGAACAACACCGCCGCCAGCAGGCCGATGCGCAGCGCGAGGCGCTCACCCGCGACGGCC +GGCGCATCGAGATCGGCGCCAACGTCGCCTCGCCGCGCGAGGCCGCCGAAGCCTTCGCCA +ACGGCGCCGACGGGGTCGGCCTGCTACGCACCGAGTTTCTCTTCCTCGAGCGCCGCGCCG +CGCCCGACGAAGAGGAGCAGCGCAACGCCTACCAGGAGGTCCTGGACGCCATGGGCCAGC +GCAAGGTGATCATCCGCACCATCGATGTCGGCGGCGACAAGCACCTCGACTACCTGCCGC +TGCCGGTGGAAGAAAACCCAGCACTGGGCCTGCGCGGCATCCGCCTCGGCCAGGCGCGCC +CGGAGCTGCTCGACCAGCAGTTGCGCGCGCTGCTGAGGGTCGAACCACTGGAACGCTGCC +GGATACTGCTGCCGATGGTCAGCGAGGTCGACGAACTGCGCGCCATCCGTCGCCGCCTCG +GCGAGCTGGCCACGCAGTTGGGGATCGAGCGCCTGCCCGAACTCGGGGTGATGATCGAGG +TGCCCTCCGCCGCCCTGCTCGCCGATCAACTGGCCGAACACGCCGACTTCCTCTCCATCG +GCACCAACGACCTGTCGCAGTACGCCCTGGCCATGGACCGTTGCCACGCCGGCCTGGCCG +ACCGCATCGACGCCCTGCACCCGGCGCTGCTGCGGCTGATCGCCCAGACCTGCGCCGGAG +CAGCCCGCCACGGCCGCTGGGTCGGCGTCTGCGGTGCGCTGGCCTCGGACCCGCTGGCGA +CACCGGTACTGGTCGGCCTCGGGGTCGAGGAATTGTCCGTCGGGCCGAACCTGGTCGGCG +AGATCAAGACCCGGGTGCGCCAGCTCGACGCCGCCGAATGCCGCCGCCACGCCCAGGCGC +TACTGGACCTGGGCAGCGCCCGGGCGGTGCGCGACGCCTGCCTGCAACACTGGCCGCTGG +CCTGACAAGAACAATCGGAGACCACCGCCATGCCCCCGTTCCTGATCGAAAGCCTGCAAC +GCCTGGGCCGCGCCCTGATGCTGCCGATCGCGATCCTGCCGATCGCCGGCCTGCTGCTGC +GCCTGGGCGACGTCGACCTGCTCGACATTCCGCTGGTCCACGACGCCGGCAAGGCGATCT +TCGCCAACCTGGCGCTGATCTTCGCCATCGGCATCGCCGTCGGTTTCGCTCGCGACAACA +ATGGCACCGCCGGCCTCGCCGGCGCCATCGGCTACCTGGTGATGATCTCGGTGCTCAAGG +TGATCGATCCGGGCATCGACATGGGCGTGCTCTCCGGCATCATCAGCGGCCTGGTGGCTG +GCGCCCTGTATAACCGCTTCAAGGACGTGAAGCTGCCGGAATACCTGGCGTTCTTCGGCG +GCCGCCGCTTCGTGCCGATCGCCACCGGGATCAGCGCGGTCTGCCTGGGCCTGCTGTTCG +GGGTGATCTGGCCACCGCTGCAACAGGGCATCAACGGCCTCGGCCAACTCATGCTGGAAA +GCGGCAGCTTCGGCGCCTTCGCCTTCGGCGTGCTGAACCGCCTGCTGATCGTCACCGGGT +TGCACCACATCCTCAACAACCTGGTGTGGTTCGTCTTCGGTAGCTTCACCGACCCGGAAA +CCGGGCGCGTCGTCACCGGCGACCTGGCGCGCTATTTCGCCGGCGACCCGAAGGGCGGCC +AGTTCATGGCCGGGATGTTCCCGGTGATGATGTTCGGCCTGCCCGCCGCCTGCCTGGCGA +TGTACCGCAACGCCCGTGCGGAGCGGCGCAAGCTGATCGGCGGGCTGCTCCTGTCGATGG +CCTTGACCGCCTTCCTCACCGGCGTCACCGAGCCGGTGGAGTTCGCCTTCATGTTCCTCG +CGCCGCTGCTCTACCTGCTGCATGCGCTGCTCACCGGCCTGTCCATGGCCCTCACCGACC +TGCTCGACATCCGCCTCGGCTTCACCTTCTCCGGTGGCGCCATCGACCTCGCCCTGGGCT +GGGGCCGCTCCACCCACGGCTGGATGCTCTGGCCGCTGGGCCTGCTCTATGCCGGCATCT +ATTACCTGGTGTTCGACTTCTGCATCCGCCGCTTCAACCTGAAGACCCCGGGCCGCGAAG +ACGACGCGAGCAGCGAATCGGGCGACAACGCCGAAGCCGAGCGCGCCCCGGCATTCATCC +GCGCCTTGGGTGGCGCAGCCAACCTCGAAGTGGTGGACGCCTGCACCACGCGCCTGCGTC +TGCGCCTGGTCGACCGCGACAAGGCCTCGGACGCCCAACTCAAGGCCCTCGGTGCGATGG +CCGTGGTACGTCCGGGCAAGGCCGGCAGCCTGCAGGTGGTGGTCGGCCCACAGGCCGACA +GCATCGCCGACGAGATCCGCCGCGCCCTGCCCTTCGATACGCAACCGGGCGAAGCCGTAC +CGCCGCTGGGCAGTCCGCACACAGCCGAAGAGGTGGTGGCGATGCAGGCCACCGTCGACG +CCGCCGAAGCCCAGGCCTGGCTCGGCGCCCTCGGCGGCGCCGGCAACCTGCGCGAGGTAC +GGGACGTCGCACTGACCCGCCTGCGGGTCAGCGTGGCGGACGAACGCAAGCTGGCCACCG +AGCAACTGCGTCGTCTCGGCGGACAGGGCGTCAGTTCCCTCGCCGGCGGCATCTGCCATA +TCCTGGTGGGCCCCCGCGCCGCGGCCCTGAGCCAGGCCCTGCAACCGCTGCTGCGGCGCT +GACGGCAGGAGGGCTGGACCTCGGCCAATCGGCGGATAACCGCCAAGCGGTTATTCGCCC +CACGCCACAGCTCCCCCGTTCGGGTTCGATAATCGCGAAGCGGTTGTTCGCGCCACGCCA +CAGCCCCGGGTCGGGCTCTCCCCGTGAGGCGGATAACGAAAAGGGCGGCCACTGGCCGCC +CTTCTTGCTTGTCCGAGGAAAACTCAGAAGACCATGTCGGTCAGGCGCCAGACTTCGTAG +GCCGGGGTCTCATAGGGATGCGCCGCCTTCAGCGCCTTGACGCTGGCGTGGATGAGCTCG +TCGGCCACCACCAGCTCCACTTTCCACTCCGCCACGTGCTCGACCTGGCCGACCTGGCCC +AGGTAGGGTTGGCTACCGTCCAGGGGCCGGAACTGGCCCTGGCCAAGGGACTGCCAGCAG +CAACTGTCGTACGCCCCGATGCGCCCGCCACCCGCGGCGAACACCGCTTGCTTGACGACA +TCCAGATGGCTCTCAGGCACATAGAAACACAGCTTGTACATCGTTTTCTCCGCACGAATC +GAATGCTGCACGAGGCGGTCGGTGGTTGTGCGACTGCCGACGCCAAATAATGGCACTTTG +CCTTGACATGATGAATATTTGACGCTGCCTTTACGACAGATAAGCGCAACATTTGTTCGC +TCGTCACAACACCTTCGTGCGGGCGTTCACAGATTTACGTTCAGCCGCGGAAACGCCGCC +ACCTGGCCTTCAGCCCGGAGGCGGAAGGCGCATGGCTGCCGTCGCAATACGGCAGGTCGG +CGGAGCGCCCGCAGCGGCACAACAGCAACAGGCGCTCGCGGGTCGCGTGGAGCGTCAGGC +CGTTGCGGCAGTCGGCGGGACAATCGGGAAGCGAAGCCGAAGCCCCGCAGGTACAGAGCC +GCAAGGTGTCGCCGGGAGAGACCCGGCGAACCTCGGGAAGAAGGTCGGGCGACGAGTTGG +AGTCCGCATCGCCCGCCATGCCGGATCAGTCCACCCAGACCCGGGCGTTGCGGAACATGC +GCAGCCAGCCGCCGTCTTCCTGCCAGTCGTCCGGACGCCAGGAATTCTGCACGGCGCGGA +ACACCCGCTCGGGGTGCGGCATCATGATGGTGACCCGGCCGTCGCGGCTGCTCAGCCCGG +TGATGCCGCGCGGCGACCCGTTGGGGTTGGCCGGGTAGGCTTCGGTGACCTTGCCGTGGT +TGTCGACGAAACGCAGCGACACGCAGCCGGAAAGGTCGGCCTCGAGCAACGCTTCCTCCG +ACTCGAACTCCGCATGGCCTTCGCCATGGGCGATGGCGATCGGCAGGCGCGAACCGGCCA +TGCCCTGCAGGAAGATCGACGACGACTCCTGCACCTGGACCATCGCCACCCGCGCCTCGA +ACTGCTCGGAACGGTTGCGCACGAAGTGCGGCCAGAACTCGGTGCCGGGAATCAGCTCGT +GCAGGTTGGACATCATCTGGCAGCCGTTGCAGACGCCGAGGGCGAAGCTGTCCTTGCGCG +CGAAGAACGCCTGGAAGCCGTCGCGGGCGCGGGCGTTGAAGAGGATCGACTTGGCCCAGC +CCTCGCCGGCGCCGAGCACGTCGCCGTAGGAGAAGCCACCGCAGGCCACCAGGCCCTTGA +AGGCGTCCAGGTCGACCCGCCCGGCGAGGATGTCGCTCATGTGCACGTCGATCGCGGCGA +AGCCGGCGCGGTCGAACGCCGCGGCCATCTCCACCTGGCCGTTGACGCCCTGCTCGCGGA +GGATCGCCACTTTCGGCCGCACGCCCTTCTTGATGTAGGGCGCGGCGATGTCGTCGTTGA +CGTCGTAGCTGAGCTTGATCGACAGCCCGGGGTTGTCTTCGTCCAGCAGCGCGTCGAACT +CCTGCTCGGCGCAGTCGGCGTTGTCGCGCAGACGCTGGATCTGGTAGCTGGTCTCGCTCC +AGATGCGTTGCAGGATGCGGCGCTGGGCGCTGTAGACGGTCTCGCCGTTGTAGTTCAGGT +TGATCTCGTAGCCGTTGACCGGCTGGCCGATCACCGCCACGCAGTCGTCGAGACCGGCGG +CGCTGAACTGCGCGAGGACTTCCGGAGTGGCACCCTCGCGAACCTGGATCACCGCGCCCA +GTTCCTCGCTGAACAACACGGCCGCCAGCTCTTCGCGGCTATCGGCCAGGGCATCGAGGT +TCAGCTCGACGCCGCAGTGACCGGCGAAGGCCATCTCCAGCACGCTGGTGATCAGGCCGC +CGTCGGAACGGTCGTGGTAGGCGAGGATATGGCCGTCGGCATTGAGCCCCTGGATCACCG +CGAAGAAGGCTTTCAGGTCCTCGGCGTCGTCGACGTCCGGCACGGCGCGGCCGAGCTTGC +CGTGGACCTGGGCCAGGATCGAGCCGCCGAGACGGTTCTTGCCGCGGCCGAGATCGATCA +GGATCAGGTCGGTCTCGCCCTTGTCCAGGCGCAGTTGCGGGGTCAGGCTCTGGCGCACGT +CGGCGACCGGGGCGAAGCCGGTGACGATCAGCGAGACCGGAGAAGTGACGCTCTTGTCCT +CGCCATTGTCCTGCCAGCGGGTCTTCATGGACATCGAGTCCTTGCCCACCGGGATGGTGA +TGCCCAGTTCCGGGCACAGTTCCATGCCCACGGCCTTGACCGTGTCATACAGGCGCGCGT +CCTCGCCGGGGTGGCCGGCGGCGGCCATCCAGTTGGCGGAAAGCTTGATGTCGGACAGCT +TGCCGATACGCGCGGCAGCCAGGTTGGTGACCGTCTCGCCGATGGCCATGCGTCCGGAAG +CCGGGGCGTCCAGCAGCGCCAGCGGGGTACGTTCGCCCATCGCCATGGCCTCGCCGGTGT +AGACGTCGAAGCTGGTGGCGGTGACGGCGCAGTCGGCCACCGGCACCTGCCAGGGCCCGA +CCATCTGGTCGCGGGCCACCAGCCCGGTGATGGTGCGGTCGCCGATGGTGATCAGGAAGC +TCTTGCTGGCCACGGCGGGATGGCGCAGGACGCGCTCGACGCTTTCCTGCAGCTCCAGCC +CGGCGGCGTCGAAATCGTCGCCCAGCTCGGCCTCGCGGGTGACCGCGCGGTGCATGCGCG +GCGCCTTGCCGAGCAGGACTTCCAGCGGCATGTCCACCGGCTTGTTGTCGAAATGGCTGT +CGGCGACGGTCAGCTGGCGCTGCTCGATGGCCTCGCCGACCACCGCGAACGGGCAGCGCT +CGCGCTCGCAGATGGCCTTGAAGGTCTCGAAGTCGGCGGCGTCCACCGACAGCACGTAGC +GCTCCTGCGACTCGTTGCACCAGATTTCCAGCGGGCTCATGCCCGGCTCGTCGTTGGGCA +CCGCGCGCAGCTCGAAGCGACCGCCGCGGCCGCCGTCGTTGATCAGTTCCGGCAGGGCGT +TGGACAGGCCGCCGGCACCGACGTCATGGATGAAGCTGATCGGGTTGCGCTCGCCGAGCT +GCCAGCAGCGGTCGATCACTTCCTGGCAACGTCGCTCCATTTCCGGGTTGTCGCGCTGCA +CCGAGGCGAAGTCGAGGTCGGCGGAGCTGGCGCCGGTGGCCATCGAAGAGGCGGCGCCGC +CGCCCAGGCCGATCAGCATGGCCGGGCCGCCAAGGACGATGAGCTTGGCGCCGACGCTGA +TCTCGCCCTTCTGCACGTGTTCGTCGCGAATGTTGCCCATGCCGCCGGCGAGCATGATCG +GCTTGTGGTAGCCGCGCACTTCCTCGCCGTGAGGGGTAGCGATCTTCTGCTCGAAGGTAC +GGAAGTAGCCGGTCAGCGCCGGACGGCCGAACTCGTTGTTGAACGCGGCGCCGCCCAGCG +GGCCCTCGATCATGATGTCCAGCGCGGTGACGATGCGCTCGGGCTTGCCGTAGGGCACTT +CCCAGGGCTGTTCGAAACCGGGGATCTGCAGGTTGGACACGGTGAAGCCGGTCAGGCCGG +CCTTCGGCTTGGCGCCGCGGCCGGTGGCGCCCTCGTCGCGGATCTCGCCGCCGGAACCGG +TGGAGGCACCGGGGAACGGCGCGATGGCGGTCGGGTGGTTGTGGGTCTCCACCTTCATCA +GGATCTGCACCGGCTCGCGGCTGGCGGCGTATTCGCGGGTCTGCGGATCGGGGAAGAAAC +GCCCGGCGACATGGCCGACGATGACCGCGGCGTTGTCCTTGTATGCGGACAGCACGCCTT +CGCGGTTCATCTCGTAGGTGTTCTTGATCATGCCGAACAGGCTCTTGTCCTGGGCCTGCC +CGTCGATGTCCCAACTGGCATTGAAGATCTTGTGCCGGCAATGCTCGGAGTTGGCCTGGG +CGAACATCATCAGCTCGACGTCGTGCGGGTTGCGCCCCAGTTCGCCGAAGCTCTTCAGCA +GGTAGTCGATCTCGTCCTCGGCCAGGGCCAGGCCCAACTCGACGTTGGCCTTCTCCAGCG +CGGCGCGACCGCCGCCAAGCACGTCGACGGCGGTGAGCGGGCGGGGCTGCGCATGGCTGA +ACAGCTCGGCGGCGCCTTCCAGGCGGTCCAGCACCAGTTGGGTCATGCGGTCATGCAGGC +GGGCGGCGACCTGCTGGGCGTCGCTCTCGGACAGTTCGCCCTGCACATAGTAGGCAATGC +CGCGCTCCAGCCGGTCGATCTTCGCCAGGCCGCAGTTGCGGGCGATGTCGGAGGCCTTGC +TCGACCACGGCGAGATGGTGCCGAAGCGCGGCACCACCAGGAACAGCCGTCCGCTGGGCT +CCTGCACCGGCACGCTCGGGCCGTATTTCAGCAGCCGCGCCAGCACCTGCTCCTCGTCGG +CGGTGAGCGCGCCGGTGACGTCGGCGAAATGCGCGAACTCAGCGTACAGCCCGGTAACGG +CAGGTACGTGCTGGGTCAGTTGCTCGAGCAGTTTGCCGTGGCGGAAAGCGGAAAGGGCGG +GAGCGCCGCGCAGGATCAGCATTGTCGGAACAGCCTCTGGAGACGGGAAGGTGTGGGGGC +CGGCCGGGCCGGCCCGCAGAGGCCGTGCATTCTACCGTAAAGCACCGTCCGGCGGCACCC +GCGCCGATGGCTATCGCATAGCCCTCCATCGGATCCGCGGCGGCCCGCTTCGAAGCACTT +CGAGAACGGCGCCGCAGCGCTCTGCGACGGGGCAAGCGAAACGGCGGACGAACGGTATCC +CGGCGATAGCAGAGAAGGTCCCGGCTGTCGAGATATGGCGCCGTCTCGCCTTTGCGTATA +CTGCGCCGATGTTTGCCCTGACCGCGTACCGCTTACGTTGCGCCGCCTGGCTGTTGGCGA +CCGGCATCTTTCTGCTGCTTGCGGGCTGTAGCGAGGCGAAAGCCCCTACCGCCCTGGAGC +GCGTGCAGAAGGAGGGCGTACTGCGCGTGATCACCCGCAACAGCCCGGCCACCTACTTCC +AGGACCGCAACGGCGAAACCGGCTTCGAATACGAACTGGCCAAGCGCTTCGCCGAGCGCC +TCGGCGTCGAGCTGAAGATCGAGACCGCCGACAACCTCGACGACCTCTATGCCCAGCTTT +CCCGCGAGGGCGGCCCGGCGCTCGCCGCGGCCGGCCTGACCCCGGGACGCGAAGACGACG +CCAGCGTGCGCTACTCGCACACCTACCTCGACGTCACCCCGCAGATCATCTACCGCAACG +GCCAGCAGCGCCCGACCCGCCCGGAAGACCTGGTCGGCAAGCGCATCATGGTGCTCAAGG +GCAGCAGCCACGCGGAGCAGCTCGCCGAGCTGAAGAAGCAGTATCCCGAACTGAAGTACG +AAGAATCCGATGCTGTCGAAGTGGTCGACCTGTTGCGCATGGTCGACGTCGGCGACATCG +ACCTGACCCTGGTCGACTCCAACGAACTGGCGATGAACCAGGTGTACTTCCCCAACGTCC +GCGTCGCCTTCGACTTCGGCGAAGCCCGCGGGCTGGCCTGGGCCTTGCCGGGGGGCGACG +ACGACAGCCTGATGAACGAGGTCAACGCGTTCCTCGACCAGGCCAAGAAGGAAGGCCTGC +TGCAACGCCTGAAGGACCGTTACTACGGGCATGTCGACGTACTCGGCTACGTCGGCGCCT +ACACCTTCGCCCAGCACCTGCAGCAACGCCTGCCGCGCTACGAAAGCCACTTCAAGCAGA +GCGGCAAGCAGCTGGATACCGACTGGCGCCTGCTCGCCGCCATCGGCTACCAGGAATCGC +TGTGGCAGCCCGGCGCCACCTCCAAGACCGGCGTGCGCGGCCTGATGATGCTGACCAACC +GGACCGCCCAGGCGATGGGCGTGTCCAACCGGCTCGACCCGAAGCAGAGCATCCAGGGCG +GCAGCAAGTATTTCGTGCAGATCCGCAGCGAACTGCCCGAGAGCATCAAGGAACCGGACC +GCAGCTGGTTCGCCCTGGCCGCCTACAACATCGGCGGCGCGCACCTGGAAGACGCGCGCA +AGATGGCCGAGAAGGAAGGCCTCAACCCGAACAAGTGGCTGGACGTGAAGAAGATGCTGC +CGCGCCTGGCGCAGAAGCAGTGGTACGCCAAGACCCGCTACGGCTATGCGCGCGGCGGCG +AGACCGTGCACTTCGTACAGAACGTGCGGCGCTACTACGACATCCTCACCTGGGTGACCC +AGCCGCAGATGGAAGGCAGCCAGATCGCCGAGAGCGGGTTGCACCTGCCCGGCGTGAACA +AGACGCGCCCGGAAGAAGACAGCGGCGACGAGAAACTCTAGCCGCCCCCTCCCCTCAGGA +TTCTCCGTCGCTTGCCAGCCAGGCGGCGTGCTCGCGCACATCGGTCGGCCAGCCGGCGAT +CCGCTCGGCGAAGCCCTCGCGATCCCCGGCGAACAGCGCACGCGTCGCCTCCTCGAAGCC +CGGCAGGTCGCCGGCCATCGCATTCATGAAATGGTAGGCGCGCTCCTGCGCCTGCCGCGC +ACGCCCGGACTGGCTGCGGCGCGCCTCCTCGACCAGCTTGCGCAGCGCCACCGAGGCGCC +CCCGGGCTGCGCGGCGAGCCATTCCCAGTGGCGCGGCAACAAGGTCACCTCGCGCGCCAC +CACGCCCAGCTTCGGCCGCCCGCGCCCGCGCGGCGCGGCCTCCTCCGCCGGTTGCCGCTC +GCCTGCGACGACCAGGCGCGCCAGCTGGTCCTCGCTGGAACCACGGATATCGAGATCGAC +GGAGCGCCCAGTGGCATTGTCGAATACCAGCAGCGGTCCGGCTGCACCGCCGGCGACCGC +TTGCTTCAAGGCCAGCGCGACCTCCGGCAGCGCCGCCGTCAGCAGCCGGCGCGTGCCGTC +GAAGCAGGTGAAGGATTGGATGTATGGATCATTGGCCATGACAGGCTCCCATGGATGGAA +CGCCCGAATAATACCCGGATATAAATAGAGCGACAATATTATCCGGATAAAATAAATCCC +ATGAAAAAGGCCGCCCTATGGCGGCCTCTTCCGGCAGCGATGACCTACCAGCCGGCCGGC +AGCAGGTTCATGGCGCTGCAAATGGCGTAGGCGGCGCTGGCCAGGTAGATCGTCACGATC +ACCGCCTGGATCGCCGGATGGGCGATCCAGCCGCATTTCCAGGCCGGCTCGAAGTCGCCG +TTCCTACGCGCCGAGCGCAGCATCAGGATCGGCATGATCGAGAGAATCACCCCGCTGAAG +GCCCCGGCGAAGTACAACGCGTTGACGAAGCCGACCAGCCCGCTGTAGGCCAGGACGAAC +GGCGGCAGCGCGACGATCGCCAGCACCAGCAGGCGGGTCTTCGGCCGGTTCTCCGGACCG +AGCTTGTGGAACTTGTCGAAGATGTTGGTCAGGAAGCTACCGCCCAGGCCCCAGTAGGAC +GTCAGCATGGCGCACAGGGCGAAGGTGTTGGCGGTGAAGAAGGCCCATTCGCCGAGCGCC +TTGCCCCAGGCCAGGGTGGCGACTTCGGACTGGTTCTCCAGGCCGGTCAGGGCGATCACC +GAGAGCGGCACGATGCTGAGCAGGACGAAGGTGGTCAGCATCCCGGTGATCACCGCCTTC +GGCAGGCGCTCCGGGGCGTGGCTGAAACCACGGGCCATCTCCGGGACGATGTACTGGGCG +GAGAAGCAGAACACCGCGATGTTGAATACCGGCACCATGTAGATCCAGTCGCCGTCGAGC +AGACGGGCGAACTCGGTATTGTCGTTGAGCAGGGTGGCCGCCACCAGGATCAGGATCATG +CTGACCATGCCGATGCTGATGAACTTCTCGCCCTTGCCGATGGCGCTCAGCCCCAAGTAC +AGCACGCCGGCGGCCGGAATGAAGAACAGCACGCTGCCCAGGGCCGGGCTGATGCCGAAG +AAGGCGCTGAGGATCTTGCCGCTGCCGCTCATGTAGGCGATCAGCGCGCCGATGCTGTTG +ACCGCCACGGACAGGAAGATCGCCCAGGCGCCGAAGGAGCCGACATAGCGCTGGGCAAGG +CCGCTCAACTGGTTGTGGGTGCGCGTGCGCAGGGCGGTTTCCGAGACGTAGAGCATGGAA +ATGGTGGTGAACAGGCCGGCCACCGCCAGCCAGAGCAGCAATGGCATGAAGCCGGCCTTG +CGGCTGGCGTAGGCCATGGACAGCACGCCTGCGCCGATGTTGGTTCCGACGATCATCGCC +ACCGCCTCGAGGAACGTCAGGCGCTTGACCTCCAGTCCCGAGGAATCGACGGCTTCCGCC +CCGGCGTGATCCTGATACTGCTGGACCCGGGTATCACTCATGACAACAACTCCCTGCATT +TCGAATTGGCATTGTGCGGGAGGCGGCCAGGACCGCCGTACCGATGGCCTTCTTCGAGGA +CAATGCAGGGCCGGGAATCGCGGACGCCTCCGGACCGGCCGCGCTTTTCGGCTGCGCGGT +CCGGCGGGGGTATCCGCAGGCGGGCTCGCTCTCACCTGAAGCCATTGCCTGCGGGCCGCT +TGCTCAAGCGGCCGTGCAAAATATCACACAATTCGACGAAGGAAGCGCGCGGCGACTACG +ACTTTACGCCTATGCCGATCGTCCGAAATATCCGGATAAATAATTGATGCAGATGGCTTT +TGAAGGAGAGGAAGGGGCAAGGACGTGCTTTAACGTTTCAGCCGGAGCGTTCGAAAAGAG +AAACATCCGTCAGTCTTCGCGAGGCTTCTTCGCCGCTTCGCGCCGCGCGCGGAAGAACGC +CGAGAGCGCCTGGCTGCATTCTTCCGCCAGCACGCCACCCTCGACCATGACCCTATGGTT +GAGATGCTCCTGCTCGAAGAATCGCCCACGGCTTTCCACTGCGCCCGACTTGGGCTCGAC +CGTGCCGTACACCAGGCGCTGGATGCGTGCGTGCACCAGCAGTCCGGAACACATGCTGCA +GGGTTCCAGGGTCACATACAGGGTGCTGCCTGGCAGCCGGTAGTTGCCCGCCTCCGCCGC +TGCCATGCGGATCGCCAGCATTTCGGCGTGGGCACTGGGATCGTGGCTACTGATCGGCCG +GTTGAAGCCCCGCCCGATGATCTCGCCCTCGCGCACCAGGACCGCGCCCACCGGCACTTC +GCCGAGGGCCGCGGCCTTTTCCGCCTCTGCCAGGGCCTCGCGCATGAAATGCGGGTCCCG +GCTGCGATCGATGATCGGACGCACGCCTTTTACCGTCGGGTAGCCCTTGCCGTTGCTGAT +CAGCTTAGTCACCCGCACCAGGCCTCGCCGACCGCGATGGTCCCCATCAGGCCGGTCTCC +ATATGGTCGATGACGTGGCAGTGGAACATCCACAGGCCCGGATTGTCCGCCACCAGCGCC +ACGCGGGCAGTCTCGTTCTTGCCCAGCAGGTAGGTGTCGGTGAAGTACGGGATGATCTCC +CGGCGATCCGAATCCAGCACCTTGAAGGCCATCCCATGCAGGTGGATCGGGTGCTGGTAC +TGCGCCATGTTGCGCAGCTCGAAGATGTAGCTCTGGCCTTCCTTGAGCTTGGCCAGCGGC +GGCGCGTTGTGCTTGTGCTCCTCGCCGCCTTCCCAGGCCTTGCCGTTGATCTGCCAGAAC +GACGGATAGGGATTCTTGCCGCTGGTATCGGACATCGCCCCGACCCACTCGAAACGGAAA +CCGATCTTCTCGGCATTGGCCAGGTCCGGTTCGCTCACCGGATTCGCCGGCAACGGCTTC +GGCCAGTCGCCGGCAGGTGCCTCGGCGCTGGCCACGCTGCGGATCGTCGCCAGGCGTACC +GGCCCGTCGCGCAGGGACAGCTCGGTGCCCGCTTCCGGCACCTTCAGCGCCAGCTCCAGG +CGCATCCCGGGACCGATCCAGTACTGCCCCTCGAAGCCGCGTGGTTCCACCGGATGGCCA +TCGATGGCATAGATCCTCGCCTCGCCGTTGGGCAGGTTGAGACGGTAGGTGACGGTGTTG +TCGACGTTCAGCAGGCGCACCCGGACGATCTGCCCGGCCGGCAGGTCGATGGTCGGCACA +TGCTTGCCGTTGATCGTCGAATAGCGCCCGCGGGTGCCTTCGCGCGCGGCCTGGCGCGGC +ACGCTGAAGGGGGTGAAGGCGCCCTGCTCGTCCACGTGCCAGGTCTTCAGGCAGAGAACC +TTCTCGTGGCGGAACCCGGTGGGTTCGCGCTCCTCGATGATCAGCGGGCCGACCAACCCG +CGGCCGAGCTGCTCGCTGCTCATCAGGTGCGGGTGGTACCAGTAGCTGCCGGCGTCCTGG +GTCTTGAACTGGTAGATGAAACTCTCGCCCGGCTGCACCGGCGGCTGGGAAATGTACGGC +ACGCCGTCCATCTCGATCGGCAGGCGGATGCCGTGCCAGTGGATGGTGGTCGGTTCGTCC +AGCCTGTTGGTGAAGCGCACCCGCAGCCACTCGCCCTGCTTCGCCCGCAACTCCACGCCG +GGGCACTGGCCGCCATAGGCCAGCGCCGGACTGCTGAAGCCCGGGACGATCTCCAGGTCC +AGCGGCGCGGCGATCAGTTCGTAGTCGTACTCCTGCGCCACCTGCGGACGCGCCAGCCAG +AAGCGCGCGCCGCCGGCGCCGAGGCCGACCACCGCCAGCCCGGCGAGGCCGCCGAGCACT +TGTCTACGGGTAAATGTCATGGAGCCTTCACCTTCGCGGTACTGCGGCTGGGAAATGATA +GATTTTCTCATTTCCACGCCCACAAGGCGAGTTCTCGCTCGGAACAAATACACTCCCGCT +GACCTGCAGCAAGCAAAAGAGCTCCCCAGCGCCAGCGGCGGATCGCTGGTCGCGGACTAC +GCTCAGGCTACCTTCACGATGCGCTTGCCGAAGTTCTCGCCATTCAACAGGCCGACCAGA +GCCGCCGGCAGGCTTTCCAGCCCGTAGAGCACATCCTCGTATACCTTGATCTGCCCGGAG +TCGACCCATGCCTTGAGATCGGAGAGCGCCCGCTCTCTGCTCTCGAGAAAGTCGCCAAGA +AGGAATCCGCGCAACGTCAGGCGCTTGATGATGAGTAGCCCGGGGATCCCGGCAGGACCG +GCGGCGGGTCGGTCCAGGTCGTATTGCGAAATCGCGCCGCAACAGGCGACCCGTCCATGC +TCCGCCATGTTCGGCAGGCAGGCATCGAGGATTGCACCGCCAGTGTTGTCGAAATACGCG +TCGATCCCGTCCGGACAGGTCCGCCGCAAGTCATCGGCCAGCGTCCCCGTCTTGTAGTCG +AGTGCCGCGTCGAAACCCAGTTCACGAACCAGCCAGGCGCATTTGCTGGCGCCTCCCGCA +ATGCCGATGGTACGGCAGCCCCTGATCCTGGCGATCTGCCCGACGATAGAGCCGACGGCA +CCGGCGGCGGCAGACACTACCAGCGTTTCGCCCTGCACCGGCCTGGCGCATTCGAGAAGG +CCGAAATAGGCCGTCAGGCCTGCTACGCCGTAGACGCTGAGCAGATGCGTGAGGGGCTCC +AGGCGCGGCAGCCTGATGAGCCCGGCCGCCGGCACGACCGCGAAAGTCTGCCAGCCGGTC +TCGGCACAGACCAGGTCGCCGGGCTGGAACGACGCCGAGCACGATTCCACCACCTCGGCC +AGTCCCAACCCCGCCATCACCTCCCCGGCCACGAGGCCGGGGCGATAGGTCGCGCCTTGC +ATCCAGGCCCGGCTGGCGGCGTCCAGCGGTATGTACAGGACACGAAGCAACAGCTCACCG +TCGGCCGGAACCGGACAACGCACCTCGCGCCCCTGAAAATGCCGGACAGCGAGCGCAGTC +CGGGGAAGCTCGCTCAGAACGATCTGACGGTTGATCATCGCTTCTCTCCCGCCGCGAGAG +CGACACTGTCGCTGGCCTCACCGCCGAAAGCGGGAGGCATGTGAAACTGTCTGTCGTGCA +TGCTGTTCTCCGAGTCGATGGCAACCTGATGCAATCTCATTGTGAGAAAACATCTCCGGA +CTTAGAATGCAAAAAACTCCTATCTTCTATCGCGATCGTCCAGCATCAATGGATCCTCTC +TCCGAAGTCCTTTCCCTGGTCAACAGCCAGGACTCCTCCTTCGGCGCCCTGAAGACCGGC +GGCGATTGGGCACTCCGCTTTCCGGCACCGGAAGGTGTGAAGTTCAACGTGGTAGTCCGT +GGCGCTTGCCTGCTGGCCACCGACGGCATGGAAGAGCCGATCCGACTGGAAGCGGGCGAC +TGCTTTCTGGTGTCCTGCCGCAGTCCGCTGCTGGTCGGCAGCGATCTATCCCTTCCCGCC +GCCGACGCCACGCTGCTCTACCGAGATGCCCCTGACGGCGTCGCGCACTATGGGGAAAGC +GAGGATTGTTTCCTGATCGGCGGTCGCTTCGCCTTTGGCGAAGAAGCCAACCTGCTGTTC +GACGGGCTGCCTCCGGTGACCGTCGTGAAAAGCGATTCGGACCAGGCATCCGTACTGAGC +TGGGCGCTGCACCGACTGGCCCATGAGTTTTCCTGCCCGTCACCCGGTAGCGCGCTGATC +GCTCACCACCTGGGCCACATCATGCTGGCGCAGGTGCTGCGCCTCTATCTCGCCGGCAAG +GGCAGCGACACGCCCAGTTGGCTGCTGGCGTTGTCCGATCCGCGCATAGGCGCGGCGATC +CGGGCGATCCACGCCGAGCCGGCCAAAGTCTGGACGGTCGAGCGCCTGGCCGACGTAGCG +GGTACCTCGCGCTCCACGCTGGCCCTGCGTTTCAAGCAAACCGCGGGACTGGCGCCGCTG +GAATATGTATCGCACTGGCGTATGCAATTGGCGGCTCGCGCCCTGCGCGACAGCAAAGCG +ACGATTTCCTCGATCGCGCAAACCCTGGGCTACGGCTCGGACAGCGCCTTCAGCAACGCC +TTCAAACGCATCATGAAATGCTCGCCCAGAGACTATCGCAGCCGGCAGGCCAGCAGGGCA +TAGCGCTCGGCGAACCGCCCATGCGGCAGGTTCCCCGAGCGGTCGAGCGCATGGTCAAAC +GACCGACGGAGCTTGCCACGGTATCTTTCCCGCCCGAAGGATGATCCCGATGGCCAGGAC +CACGCACGCGCCGAAGGCATAACGCAGGCCGAACGCTTCGGAGAGAAAGCCGATCAGCGG +CGGGCCGATCAACGTGCCCAGGAATCCGATCGTCAGAACGGCTGCGATAGCCGAACTCGG +CGCCATGCTCGAGGAGCGGGCGGCGGCCCCCGCCACCAGCGGAATGACGGTACAGATGCC +GAGACCGACAAGGCAGAAGCCAATGATCGACGCCAGCAGGCTCGGATAGGCGATCGTCGT +GACCATGCCGATCAAGGCGAGCCCCCCGCTCCACTGCAAGGTGCTGCGCGTACCGAAACG +GTCGGCGACTCGGTTCAGCAGAAGCCGGCCGACAGTCATGGCGCCCATGAACACCATGAA +GCCGACTCCGACCAGTTGCCGCTCGGCCGAGACCACGTCCTGGAAATAGACGACGCTCCA +GTCATACATGATGCCTTCGCAGGCCATCGAGAAAAAAACGATCAGCCCGTAGTTGAAAAG +CCTCCTGTCCGGCATCGTGAAGGCCCGCCCGACCCGCTCCGCGCCAGGCTGGTCGTGGAG +AAAGCGCCAGCAGGCCAGGGCGGCCAGGAAGGCGATCACCGATATCACGCCGAAATGCGT +GGACAAGGCAAAGTCCTGCCCGATCATCAACGCCCCGAACCCGGCGCCGACGAATCCCGC +CAACCCCCATATACCGTGGAAAAACGGTAACTCGGTCTTTCCGATGAGCGCCTCCGACAG +GCTTGCCTGGGTATTGTTCGACACATTGACCGCGCTATAGGAGAACCCCGATACGAACAA +CAGAGCAGCAAGCTGGTAGACGCTAGAAGCCAAGGGAACCAACGACAGCAGAACGGCGTT +CGCGAGAATCGCCGTTACTCCAACGTTTCTGCTGCCGAGCTTCGCGATCATCCAGCCGGA +TACCGGCAACGAGAGCACCACCCCGGCCGACATGGCGAAAAGCGCACTGCCGAACACCCC +GTCGGAAAGCTGGAGTCCGTCTTTTATCGTGGCCATCCGGGAGCTGAGGCTCCCGAAACA +CAATCCGAGCATGAAGAACATGCCGGAGATGGCAATACCTTTCCCTACGCTGCTGGAGGT +CATTCGATCGATCCCGGTGCTTGTTTGATGGACAGAAAGCGCGGTATCCGCGACGTCTTC +CGTGTGCTCGACTAGGACGCGAATGCGTCGATCCCGGTGAGCGTCGCCGAAAGCCTCCAG +AGACGGCGGGCCTGCTCCGGATCGATCGCATGCTCGCGTACGCCGGCCAGGGTGCGCGAG +TCGTCCGGCGCGATGCCTGCGATGTCGCAATCCTCGCAGTAGAGCCCGCCCATCCCCGCC +AGCAGCGGAGAGGTCGCAGCCCACAGCTGGGTGGCAGCCCCCTGCTGCGGGGTCTTGAGC +GTGTCGGGGCTGGCCGGATTGCCGGCTTCGTCCATCCAGCCCAGGGCGATCATCTCCGCC +CGTGGAACATGGCGTTGCAACGGTGTGGCGATGCTGCCCGGATGCAATGCGAAGGCATGC +ACACCCCTCTCGCGACCCAGCGCATCCAACTGGATGGCGAACAGGGCGTTGGCAGTCTTC +GACTGCGCATAAGCCAGCCACTTGTCGTAGCCCCGTTCGAACTGCACGTCGTCCCAGCGG +ATCGCCGAGTAGCGGTGCCCGGCGGAGGACACGGCGACCACTCGCGCGTCATCCGCGAGC +GACGGCCACAAGAGGTTCACCAGGGCGTAGTGGCCAAGGTGATTGGTCGCGAACTGCGCT +TCCCAGCCCGGGCCTACGCGGGTCTCGGGACAGGCCATCACACCGGCGTTGTTGATGACG +ATATCGGCACGACGGCGCGAGGCGGAGAAGGCCTCGGCGAACCGACGCACGCTGGCCAAC +TCGGCCAGGTCGAGCGCCTGCACCTCGACTTCGTCGAGCCCGAGCAGCGCCTCCCTTGCC +ACATCCGGTCTGCGCGCCGTAACAATGACCCGCGCGCCCGCGTCGGCCAGCGCCTTCGTG +GTTTCCAGGCCAAGACCGGAGTAGCCGCCGGTCACGATGGCGGTCTTTCCGGAGAGGTCT +CGCCCCGCCATCACCTCGCTGGCCTGCGTTTTCGCTCCGAATCCCGAATTGATGGGGGTT +TGCCTGATAGCCATGAGAACTCTCCTCGCATGCGGAAGCGGCCAGGCGGATAGCGCTGGC +TCTTGGGACGAGGGTCATTATCGAAGTGGAAAACTGGATTCCGAATGCAGCGGAATCCAG +TTTCCTATCGAGATCGTCCGATCCCGTAAGCAGCTGTTTCATCACCGCTGCAAGAAAAGC +ACGGGACGGCGCAGAATTTGCTGTACGTTGGCGAACTCGCCAACAGCAGCTGAAGCAGGA +AAAATTGATATCAACCCCAGGGCCTACTAACTAGGCAGCTGTACTAAAACTGGGGGCAGG +TCACTCCACCGAACACTTCCGGGAAGGCATGGAACACAATGACGGAAAGCACCGCCACTG +CGCGCAAGCCATCAATATCAGAGCGATACTTTGGTGGATAACCGTTCGACATGAGCCTGA +CTTGTACTTGCTTGTTGAAGCCGCTCCCTCGCTTGCTCGGGAATAGGTAAAACCACAGAA +GCTTCCGCCGCCGAGCGAAGAAAATCGATGGCTAACATCACAAATTCGCCGGCCTGCCAA +GACGCATACATAGAAGACGGCGACCTGTCGGCCACCCTGCCCCATGTGCGTCTCTACGAC +CAGTAGTCGCTCCGCCCAGTTCGTGGGTAACTTGCTGGCGACGCGGAAATGGCTCAGCCC +TTGCTGAGCAGCCTGGCCACCAAACGGCGATACCCCGCCAGAAGCGAAAATCTGTCCTGG +TAGAAATCGAGGATCATGGTTTGCCCATGAATGTCGATGGTATGCGGACCGCGTGCGTCC +TGGAACGCCAGGTTTGCTACACGCTTCTCCGAATAATGCTCTGGATCGAGCACCTCGATT +TCGTTTACGGCCAGACTGCTTCCATAACCATAGGAGTTGTTCTGACCGAACCGGTAAAGC +TTTCCTCCCTCCTTGAAGATACGCCCGCCCATTCGTGCGGAGCCGGGATTCATCACCACA +GGGTTACAGGGATGGGAAGTGTAGGGTCCTTCCAGGCTCTCGCCGACATAAAGATAAAGG +TTGTCCGAACCTGATACCGCCTGGCCGCAGAACAGGTAGTACCTGCCCCCGTGTTCGAAA +AGCGTGCCATCCAGAATCCGCTCACCCTCCATTCCCTGGAATAGCTTCTTGCTCTCCAGA +GCGAAAGGAGGAGGCAGAAGACAGGGAGCGGAATGAGAGGCGACTTCCGGTATCAGATAT +TCGACACCAGACGCCTCGAACGAATAGGGATAGGAAAAATGATTCCCCTTCAAGATCACA +CGTGAAAAGTCAAGACTCTGGGCCTTCAATTCAATGATTTCACCCAGACCGTTGCTAGCA +TTCAAGGCTTCGAGACGTATCAGCTTACCGTCCGCACTAAAGAAAGGATCGGCGTAGAAA +GTATACCCCTTCTCCACCTTCGGAATCTTTCCTGCTGAGACGGACAATTCTTGCAACGAA +GGAATATCGTTGTAAGGAAGCGCAACGACATTCCATTTTTTTTCGAAAAACGCCCCGTAC +GATAGCCGGGCAAGCCCCCGGAAGATAGTCTTGCAGAAAAACTTGAAGACGGTGAAATTG +CTGGGCAGACGGTAGTTTTTGCCCAGCTTTTCTAATACCACCTGCTCTCCCCGAGAGTAA +TTGACAAGCGCCTTGCGAAGCAGCACGACCGAATTCAGATAAAAGTTTCTTGAGGTTTTC +TTGTAGGAGTGGTGATGAACCTTGGAGTACCCGCGCACCAAGACCTCGCCGGCGTCCAGT +TTGTTGGAGAGCTTCTGGACTATGATCCCGACGCTGTCTGCATTTTCATAGATCTCATAA +AAGCCCGCGGGCCGCCCCCTGTAGTACTCTGGATCGCCATGGTGGTAGGAAAGAATGTCC +AGCCTCTGCAATCCTCCATCGATGCGGAGCAGAGACATGCCAAACTTGATGACCAGCTTT +ATTCCCTTATCCAGAATTCGGGCGCAGACATCTTCCGGGATGCGCTGCCAGGCCCCTTCG +TAATCAGAATCGAAATGAATGACTTCTGAGCCACGAGAGTCTAACTGCACTCGACGAGTC +ATATCATTCTTCAGGGAAAGAATATTGAGAAAATAATACCCGCAATGCTTGATGACACTT +TTCTTGGTCGCGCTATTACGACAACTGAGAACCAATTGAATATCAAGATACTCAGAAGAG +TCTTCAATCGCCCGCTTTTGCCACTCACTCAATGAAAGATCATCAACAATCAACGCGGCC +TTTATTTTCATCCCGAAACTCGCCTCTGAAACAATGAAACCCCAAAAATGCTGCGAAATT +TCGACAGCCCCGAGAAACCGACACTTTTCACTGAAAAGCTGCCATGACAATTAGCAAAGC +ATCATTAAAGCGAAGCAGTATATCTCATGATCGACGGGCCGATTCAACCGTAAAACGGCC +GTTAAACAGTTGCTGGACCACCAGCCAACGCTATACAGACATCATTTGGCGGAGCCACAA +GGCTTTCGGCGGAGCCATAAAAGACAACGGCCTTACGGCCGTTGTCTTTCGGATGAATCA +CCAGGGAAGAAGTGGCTTCACTCCCACTCGATAGTCGCCGGCGGCTTGCTCGACACGTCG +TAGGTGACCCGGGAGATACCGGCAATCTCGTTGATGATGCGGTTCGAGACCTTCTCCAGC +AGTTCGTAGGGCAGGTGCGCCCAGCGCGCGGTCATGAAGTCGATGGTCTCCACCGCGCGC +AGGGCGACGACCCAGGCGTAGCGACGGCCGTCGCCGACCACGCCGACCGATTTCACTGGC +TGGAACACCACGAACGCCTGGCTGGTCTTGTGGTACCAGTCGAAGGCGCGCAGTTCCTCG +ATGAAGATGTGGTCGGCCTGGCGCAGCAGGTCGGCGTACTCCTTCTTCACCTCGCCGAGG +ATGCGCACGCCCAGGCCCGGGCCGGGGAACGGGTGGCGGTAGACCATGTCGTAGGGCAGG +CCCAGCTCCAGGCCGATCTTGCGCACTTCGTCCTTGAACAGTTCGCGCAGCGGCTCGACC +AGTTCGAACTGCATGTCCTCCGGCAGGCCGCCGACGTTGTGGTGCGACTTGATCACGTGG +GCCTTGCCGGTCTTGGCGCCGGCCGACTCGATCACGTCGGGGTAGATGGTGCCCTGGGCG +AGGAACTTCACGTCCTGCAGCTTGGTGGCTTCTTCGTCGAAGACTTCGATGAAGGTGCGG +CCGATGATCTTGCGCTTCTCTTCCGGGTCGGCGACGCCGGCCAGGCGGCCGAGGAACTTG +TCCTCGGCGTTGGCGCGGATCACCTTCACGCCCATGTTCTCGGCGAACATGGCCATCACC +TGGTCGCCTTCGTGCAGGCGCAGCAGGCCGTTGTCGACGAACACGCAGGTCAGTTGGTCG +CCGATGGCCTTGTGCAGCAGCGCGGCGACCACCGAGGAGTCCACGCCGCCGGAGAGGCCT +AGCAGGACCTTGGAGGAACCGACCTGGGCGCGCACGGTGGCGATGGCGTCGTCGACGATG +TTCGAGGGGGTCCACAGCGCGGCGCAACCGCAGATGTCGAGGACGAAGCGCGAGAGAATG +CGCAGGCCCTGCTTGGTGTGGGTGACTTCCGGGTGGAATTGCACGCCGTAGTAGGCGCGG +GCATCGTCGGCCATGGCGGCGATCGGGCAGCTCGGGGTGCTGGCCAGGATGTGGAAGCCG +GCCGGCATCTCGGTGACCTTGTCGCCGTGGCTCATCCACACGTCGAGGCCGAGCACGCCG +TCGTCGTCCACGTGGTCCTCGATGCCGTCCAGCAGGCGCGCCTTGCCGACCACGTCGACG +CGGGCGTAGCCGAACTCGCGCAGGTCGGAGCCCTGCACCTTGCCGCCCATCTGCTCGGCC +ATGGTCTGCATGCCGTAGCAGATGCCGAACAGCGGCACCTTCAGGTCGAACACCGCCTGC +GGCGCGCGCGGGCTGTCGGCTTCGTGTACCGACTCGGGGCCACCGGCGAGGATGATGCCG +CGCGGCGCGAAGGCGATGATCGCCTCGTTGCTCATGTCGAAGGGATGGATTTCGCAATAC +ACGCCGATCTCGCGCACGCGGCGGGCGATCAGTTGGGTGTACTGGGAGCCGAAATCGAGG +ATCAGGATACGGTGGGCGTGAATGTCTTGGGACATGGCCATCTCTCGCTACGGAATTCGA +AAACGACACGGGGCTGAATCGAACAGCCCCGTGCAGCACTCATTTCAATCAGCCTCAACC +AACCCGGTAGTTGGGGGCTTCCTTGGTGATCTGGACGTCGTGGACATGGGACTCGGCCAT +GCCCGCGCCGGTGATCCGGACGAACTGCGGCTGCGTGCGCATCTGCTGGATGTCGGCGCT +GCCGGTGTAGCCCATGGCGGCGCGCAGGCCGCCCATCAGCTGGTGGACGATGGCGGACAG +GGCGCCCTTGTACGGCACGCGACCCTCGATGCCTTCCGGCACCAGCTTCTCGGCGCCGGC +GGAGGCGTCCTGGAAGTAGCGGTCGGACGAGCCCTGGGAGCCGGACATGGCGCCCAGCGA +ACCCATGCCGCGGTAGGACTTGTAGGAACGGCCCTGGAACAGCTCGATCTCGCCCGGCGC +TTCCTCGGTACCGGCGAACATCGAGCCCATCATCACACAGTAGGCGCCGGCGACCATGGC +CTTGGCCAGGTCGCCGGAGAAGCGGATGCCGCCGTCGGCGATCAGCGGTACGCCGGTGCC +TTCGAGGGCGGCGGCAACGTTGGCGATGGCGGAGATCTGCGGCACGCCGACACCGGCGAC +GATGCGGGTGGTGCAGATCGAGCCCGGGCCGATGCCGACCTTGACCGCGTCGGCGCCAGC +CTCGGCGAGGGCCTTGGCGGCCTCGGCGGTGGCGATGTTGCCGCCGATCACCTGGACGTC +CGGGAAGGTCTGCTTGACCCAGCGCACACGCTCGATCACGCCCTTGGAGTGGCCGTGGGC +GGTGTCCACCACCACCACGTCGACCCCGGCGGCGACCAGCGCGGCAACGCGCTCGCCGGT +ATCGGCGCCGGTGCCGACGGCGGCGCCGACGCGCAGGCGGCCCTGCTCGTCCTTGGACGC +CAGCGGGTAGGTCTTGGCCTTCTCGATGTCACGGAAGGTCACCAGGCCACGCAGGTAGAA +GTTCTCGTCGACCACCAGCATCTTCTCGATGCGGTTCTCGTAGAGCTTGGCCTTCATCTC +TTCCAGCGGGGTGCCTTCGCGGGCGGTGACCAGCTTGTCCTTCGGGGTCATGATCGCGGC +GACGGTATCGCCGGCGTTCGGCTTCACCCGCAGGTCGCGACCGGTGACGATACCGACCAG +CTCGCCCTGCTCCACCACCGGGAAACCGGAGAAGCCGTACTCGCGGGCCATCTGCAGCAG +TTCGATGATCTTGGTCGAGGGGGTCACGGTGACCGGGTCGCGGACGATGGCCGTTTCGTG +CTTCTTGACCTTGCGGACTTCCGCGGCCTGCTGCTCGATGCCCATGTTCTTGTGGATGAT +GCCGATGCCGCCTTCCTGGGCCATGGCGATGGCCAGGCGGGCTTCGGTCACGGTATCCAT +CGCGGCGGATACCAGCGGGATGTTCAGTTCGATGCCGCGGGTCAGGCGAGTTTTCAAACT +CACGTCCTTGGGCAGGACTTCGGAATAACCGGGGATCAGAAGGACGTCGTCGAAAGTCAG +GGCTTCTTGACTGATTCGCAGCATGGCGGGCGCTCCCAGACGGGAAAAATGGAAGCGCGA +CATTATACCCAGCCACGGGCTTCCGCTCAACGCAGTTGATCGCCGAAAACGGCGAATGGG +CAAATATCTTGCAAGGCGCCCGCCAGGCCCCGTTCCGCCTGTCCTATCCCGCATTTGCAG +GATGGCAGCGCGGCGGCGGATGGCCGGATACTGCGTACGTCACCCAGCGACCGCCCAGGC +GGCGGGACGCCGATTCCAAGAACTATAAAAGTGGGACGGGGGAGAGAATGAAAGGGAGCA +CTGACCCGCATTCGTCATCCGCAGCCTTGCTCGCGGCGTTCGGCGCCGCCGTGCTGGACA +TCAACCGGCTGGCGCGCGACAAGACCCTGGAGCGTTTCCATCGCACCGCGCTGGAACGTC +TGCAGCAACTGGTGCCGTTCCAGCGTGCCTGGTGGGGCCGCGCGGCGCTGATCGACGGCG +TGCCGGTGGAGCACAGCGCACACCTGTTCAACCTCGAAGAGCACTATGTCGAGGACTGGA +AATCGATCAGCCACGACGACATCACCGTCGGACTGGTACA +>NODE_1_length_40000_cov_62.8079_ID_1 +TGTTATGGCTCAAGCTGTTAAAAGAATTTATAAGGATTCAAAGCTTGCTATAGGACCTGC +AATAAATAATGGTTTCTATTATGATTTTGATATTGAAAACTCATTATCCAATGAAGATTT +AGATAAAATTGAAGCAGAAATGAATAAAATAATAAATGAAAATCTTAGCTTTGAAAGAAT +CGATATTTCTAGAGACGAAGCAATAAAATTAATGGAGGAAAAGGGAGAAACATATAAAGT +AGAACTTATTAAGGATCTTCCAGAAGCTGAAAAGATATCTTTATATAAACAGGGCGATTA +TATAGATCTTTGCAGAGGTCCTCATATTCCATCAACAAAGTATATTAAAGCATTTAAACT +ATTAAGCGTTGCTGGAGCATATTGGAGAGGAAATGAAAAAAATAAGATGCTTCAAAGAGT +TTATGGAGTTGCATTTTCTAGTAAGAAAGAATTAGAATTACATTTGCATAATTTAGAAGA +AGCAAAGAAAAGAGATCATAGAAAATTAGGAAAAGAATTAAAGTTATTCACTTTTGCAGA +AGAAGGTCCAGGATTTCCATTTATGCTTCCTAAAGGAGTAATATTAAAAAATACCCTAAT +AGATTTTTGGAGAAAATTACATTACGAAGATGGTTATGTTGAAATTGAAACTCCAATAAT +GCTTAATAAGAAACTATGGGAGACTTCAGGGCATTGGTATCATTATAGAGAAAATATGTA +TACTTCAACAATTGATGAAGAAGAATTTGCATTAAAGCCAATGAATTGCCCAGGTGGAAT +GTTAGTTTATAAATCAGAATCACATTCATATAGAGATTTTCCAATGAGAGTTGGAGAACT +GGGACGAGTTCATAGACATGAGCTTTCTGGAGCACTCCATGGTCTTATGAGAGTAAGAGC +ATTCACACAAGACGATGCACATATATTTATGTTACCAGATCAAATAAAGTCAGAAATAAA +AGGTGTTATTAATTTAATTGACAAGGTATATTCAAAATTCAGATTTAAGTACAATTTAGA +ACTTTCTACAAGGCCAGAGGATTCAATGGGAAGCGATGAAGAATGGGAATTAGCAGAAAG +CTCATTGAAGGGTGCCTTAGATGAATTAAATCTTGAATATAAAATAAATGAGGGTGACGG +AGCTTTTTACGGACCTAAAATAGATTTTCATCTTGAAGATAGCATAGGCAGAACTTGGCA +ATGTGGAACAATTCAGTTAGATTTTCAATTGCCTCAAAGGTTTGAGTTAGAGTATGTAGG +TAGTGATGGAGAAAAGCATAGACCAATAGTAATCCATAGAGTAGTCTTTGGAAGTATAGA +AAGATTTATAGGAATATTAATAGAACATTTTGCTGGAAAATTCCCAACATGGCTTTCTCC +AGTTCAAGTAAAAATACTTCCTATATCAAATAAATTTAATAGCTATTCAGAAAAGATTAA +GGATAAATTAAGCTCAGAAGGTATAAGAGTTGAAATTGATCAGAAAGATGAAAAGATAGG +CTATAAAATAAGAGAAGCTAGAAATGAGAGGGTTCCTTACATTATTATTGTTGGAGAAAA +GGAAGAAGCAGAAAATAATATATCATTACGCAGCAGAAGTAATGGCGATGAGGGAACATT +AAATTTAGAAGATTTAATAGAAAGAATAAATAATGAAGTTAAAAATAAAGCTCTATAATT +AAGTAGAGATAATACTGAAATATTAAGATATTAAATTTAAGGAATAATTAAAGTAAATAT +CACTTTAATTATTCCTTTTAATATTGTATATGGACAACATCATCGAAATTTAAAATATTA +TTTTAAATTTCTGCAGTTTTTAATTTATTAAACAAGCAGTTATATATTATAATGTTATAT +ATCACTTGATGAAGGAGAAGGAAAATATGTTTTTATTTGGCGGGAATAAATTAAAGGTAA +GAAGCGAAGAAGATGAGGATGGAGTTAAAACTTTCGTGGAATACTATGGAATGAAGAAAA +GCGATGAATTTAAAGTAAAGGTTTTAAATGAAATGATTGGAGAAAACCTTTGTTTAATAG +TTTTAGACTCTAGAATGTTGTATTTTGGAAATCAAGTGGAACATGAGGTGCCAGTTGAAG +AAATAATAGAGCATTTAGGAATTTCAAGAATAGCATATAAGAAGTTTGAAATAAAGAAGG +TACCAGAAGTATCTATGTTTGGTATTAGCATAAAAAAGGGAACTAAAAAAACTGATAAAG +ATTATGTAATAGGATTTGTTGTAAATAAGTATAATTTTAAAAGAATTGAAGAATATGTAA +ATAGAATGAATCTATATTATTTCATAGATAACGCTGGACTTAGTGAAGAGGATTTATTAC +AGAAGTTGAGTGAAAATTATGAAGAAATTGATGAAATGAGTAAGGAATTTTACTGTGAGA +TATTTAATAATAATTACATAAGTCAGTTAGTTATTTCATCAGAAAATGAGAACGCTATGG +CAATAAAGGAAACGGTTGGCAGGTGCCATTCTGAGTTAAAATAAATAGTTTCACTCTAAG +AAGAATGTATGCAAATTTTATTATTTATTAATTTACTAGTGAATTCAACGTCTAATAGAA +TTAGCAAACATTCTTTTGCCTTGACAACATTATTAGATTATGGAATGATAATTAGATGTC +GGAAAACTACTAATTGATTAATTATTTTACAAATGAAAAATTAGTTGAGATTAAAAAATG +ATATAAATATTATCTAGATTAAATATGAGGGAGGTATAGGAATACTTTTTTGATAGAATT +ATTTCTATGCGTTTTTTTGAAGCTTAACAAATTGCAAGTTTAATCTATATAATAGTAGCA +TACTGAAAATTTTTTAGAAAAGGTTAATGAGTAGTATGAAGAGAATAAATGAAGCTTTTA +GTGATTATGAATCGGCTGGCAGCATAAATACTGCCGTTATACAATCCGTAGTCTTGCGTA +AGAAAACTAAAGTGCTGGAAATGGAAATTAGTTCAGATAATTATATTGAACTAGGGGAAA +TTGAAAGTTTTAATAGTTTCATAAAAGATAAATTCGGATTAAATGATTCTAAAATTATTG +TAAAGTATAGTGACGAGGTTGAAATAAGACCTATAGAAAAAGAATTGAAAAATATCGTAT +ATTCCTTATCTACAAAGTATCCTGCATTAAAGGCAGCGGTTAATAATAGTGACTATGAAA +TAGAAGGAAATACTATAAATTTTAATTTTAAAATTCCTGTAGCGGGTTTTCTAAAAACTA +TGGAATATGATAAGCAAATTAATAAAGCTATAAAACATATGTATGGGAAACATTATAATA +TCAATTTTATTGATCAAATAGATAGCGAAGAATTGGTGAAAATCGCAGAAGATAAACGTG +CTAATGAGATGAAAGTTATTAAAGAAATAAAAATTACTCAAAGCAATAATCCTCCTGAAG +TACCTAAAGTTGAAGAAGGTAAGGCAGAAGTAAAAGCTGAGGGCGATGGCAAAAAGGCAA +GTAATCCATTCTTGATTTTAGGAAGAAATGCTAATATTAAGGAAAACATAATTAAAATTA +ATGATATTACACCTGATGAAGGAAGAGTGGCTTTAGAAGGTGAAATATCAAATTTAGAAG +CAAAAGAATTAAGAAGTGGAAAAATGCTAATTTCCTTTGATTTATATGATGGATCAAATT +CGATGACTTGTAAGATTTTTGCAAAACCTAATGAGTATAATGAAGTGTTTTCTAGAATTA +AAAAGGCTAAGGGTCTTAGACTTGCTGGAAATGCAGGCTATAGTAATTTCTCTCATGAAG +TTGAACTTATTGCTAATACTGTTATTGAAACAAATGGTATAAGGAGATATAAGAGGCAGG +ATAATTCAGAGGTGAAGAGAGTAGAGCTTCACATGCATACTCAAATGAGCCAGATGGATG +CAATGTCTAGTGCTAGTGATTTAATTAAGAGAGCTATGAGCTGGGGAATGAAGTCTATTG +CTATAACTGATCATGGAGTAGTTCAGTCATTTCCAGAAGCGCACAAATTGCTTGGAAGAG +ATAATCCAGATATGAAAGTCATATATGGAGTTGAAGCTTATCTAGCTCCAGATAAAAAGC +CATCTGTAACAAATGTTAGGGAAGAAAGTATTGATACAGTATACTGTGTTCTTGATTTAG +AAACTACAGGTTTTTCTCCTCAGACAGAAAAGATTACTGAAATAGGAGTAATGAAAATTA +AGGATGGTAAGGTTATAGATAAGTTCAGTACTTTTGTAAACCCTCAAAAGTCAATTCCTA +TGAGAGTTGTAGAGGTTACAAAGATAACTGATGATATGGTAAAAAATGCAGAAACTATTG +ATAAAGTTTTCCCTAAATTGCTTGAATTTATTGAAGGAAGTGTTTTAGTTGCACATAATG +CTGACTTTGATATTGGATTCTTAAAGCATAATGCGAAAGTTTTAGGCTATGAGTTTGATT +TTACCTATATAGATACTTTAGGATTAGCGCAAGATGTATTCCCTGATTATAAATCTTATA +AGCTAGGAAGAATTGCTAAAAATCTTGGAATAAAGGTTGAAGTTGCCCATAGAGCTTTAG +ATGATGTTGATACAACTGTTAAGGTATTTAACATAATGATTGAAAAGCTAAAAGAAAGAG +GCGCGCAAACTTTATCAGATATAGATTTATATGCAGCCGATGAAGAAGCTAAGAAAGTGG +CATATAAAAAGGTTAAAACTCACCATGCAATAATACTAGCAAAAAATTACGTGGGATTAA +AGAATTTATATAAATTGGTATCATATTCTCATTTAGATTATTTTTATAAAAAGCCACGTA +TATTAAAGAGCATGTTTAAGAAATATTCTGAAGGTTTAATTATTGGAAGTGCTTGTAGTG +AAGGGGAATTATATCAAGCCATACTTCTTGGAAAACCAGAAGAACAAATTGAGGAAATTG +CTAATTTCTATGATTACTTAGAAATTCAGCCTTTAGGAAATAATGATTACTTAGTAAGAC +AGGAGCAAGTTCCAAGTAAAGAATATTTAAAAGAAATTAATAAAAAGATCGTAGAACTTG +CAGAAAGATTAGGTAAGCCTGTAGTGGCTACTGGAGATGTTCACTTCCTAGATCCTGAAG +ATGAAATATACAGACGTATATTAGAAGCAGGACAGGGATTTAAGGATGCAGATAATCAAG +CACCATTATATTTAAGAACTACTGAAGAAATGCTCGATGAATTCTCTTATTTAGGAAGAA +CAAAAGCTTATGAGGTTGTAGTTACAAATACTAATATAGTAGCAGATATGTGTGAGCAAA +TAAGCCCTATTTCTCCTGAAAAATGTCCACCTCACATAGAAGGCTGTGAGCAGACAATAA +AAGATATAGCTTATGAAAAAGCCCATGAACTTTATGGAGATCCACTTCCAGAAATAGTTC +AGGCGAGACTTGATAAAGAGCTAGATTCTATTATAAAAAATGGATTCTCAGTAATGTATA +TCATAGCTCAGAAGCTGGTATGGAAATCAAATGAAGATGGATACTTAGTAGGTTCCAGAG +GATCTGTTGGTTCATCCTTCGTTGCAAATATGACTGGTATAACAGAAGTAAATGCGCTTC +CACCTCATTACAGATGCCCTAAATGTAAGTATTCTGATTTTGAGGATTATGGCGTTCTAA +ATGGCTTTGACTTGCCAGATAAAGTATGTCCTGTTTGTGGAGAAAATCTGTATAAAGATG +GTATAGATATTCCGTTTGAAACATTCCTGGGCTTTAATGGAGATAAAGAGCCGGATATAG +ATTTAAACTTCTCAGGGGAATATCAGGCAAAGGCCCATAGATATACAGAAGTTATCTTTG +GAAAGGGAACAACATTTAAAGCAGGGACTATTGGTACTATAGCAGAAAAAACAGCGTTTG +GTTATGTTAAAAAATATTATGAAGAAAAAAATATTACAATAAACAAAGCTGAAACAATAA +GAATTTCAGTAGGGTGCACTGGTATAAAGAGAACTACAGGTCAGCATCCAGGAGGAATTA +TAGTTGTACCAAAGGGAAGAGAAATATTTGAATTCTGCCCTGTACAGCATCCTGCGGATG +ACCCTAACTCAGATATTATAACAACACATTTTGATTATCACTCTATTGACCAAAATCTAT +TGAAGCTTGATATACTAGGGCACGATGATCCGACGGTTATAAGAATGCTTCAAGATATAA +CAGGGGTTGACCCACATGAGATTCCTATGGATGACAAGGATACTATGTCCTTATTCTTCT +CAACAAAGGCTCTTGGGGTAACTCCAAATCAGATAAATTCAGAGGTTGGAACCTTTGGAA +TTCCTGAGTTTGGTACTAAGTTCGTAAGAGGAATGCTTGTGGATACAAAACCAAAGACTT +TTTCAGATTTATTATGTATATCAGGACTTTCACATGGTACAGATGTATGGCTTGGAAATG +CTAAGGACTTAATTGATAATGGAGTAATTACCAGCATAAGTGATGCGGTATGTACCAGAG +ATGATATTATGGTTTACTTAATTAGAAAAGGACTTCCACCTAATACCGCGTTTAAAATAA +TGGAAACTGTTCGTAAAGGTAAAGCCTTAAAAGAACCTAAATTTCCAGAATATGAAGCTA +TGATGAGAGAACATGATGTACCAGAATGGTATATAGAGTCTTGTAAAAAGATAAAATACA +TGTTCCCTAAAGCCCATGCAGCAGCTTATGTAATGATGGCATTTAGAATAGCATGGTTTA +AAGTTCATATACCTCAGGCTTATTATGCAACATACTTTACTATAAGAGCAAAAGCCTTTG +ATGCAGAATTTATGATCTTTGGAAAAGAAAAGGTTAAAGCAAAGATGCAGGAAATTCAGG +CTCTTGGAAATGAGGCTGGTCCTAAGGATAAGGATATGTATGATGACCTTGAAATAGTTT +TGGAAATGTATGAAAGAGGATTTAAATTCCTTCCAATTGATTTATATAAATCTAGTGCTA +CAAAATTCCAGTTAGAAGAAGAGGGAATAAGACCGCCATTAAACAGTATAGCAGGTATGG +GAAATGTAGCTGCAGAAGGTATAGCAAGTGCAGCAAAAGAAAAAGAATTTAATTCTGTAG +ATGATGTAAAGAAACGTTCAAAGATTGGAAATGCTGCCATAGAGCTACTTAGAAAATTTG +ATTGTTTAAATGGTCTTCCAGAAAGTGATCAAATGAGCTTCTTTGACGCAGTTTAGATTT +ATGACTATAGATAAAATAACTTTGAAACTAAACTTAAGTGGTAACTTACCGAAATCATAA +ATATTTTGTTCCGAAAAACTATGAAAATATTGCTGAAAATCCTAAGTTGCAGATTGCACC +ACTTCAGCATGCTCCCACTTTCAGTGTGACAAGCAAAAGTGGAACAACCTACAACTAAGA +ATTTTTAGCAGCTCATTTTCAAATGTTTTCTGCACAAATATATTTATGATTTCTAGTGAA +GATATTACCTCAAAGTTTTAGCAAGTTTGTAAATATGTTTATATTATAGATTTAAATATT +AATTTTGATATTATAGATAAACTAACTAAGATCTGAACTGGAGTATTCTCTCTTTATAGT +ATGGTCTAAATTATAAGTGGAATTTAGATTTTCTAGTGAAGTTGTACTTTTATTTTATTA +AGTATTTTAAAATTTGAACTTAAAAATTATATAAAAAATAACGTAAAAAACACAGCCTAG +TGATTAAATATATAGGCTGTGTTTTTTGTTATATATTTATTGTTAATTTAGTTATAGTAT +ATTTAAGTAAAATAGATTATTTTCCATTATCTGTTGAGCCAACTTGATTGTTGCCTTTAT +CACGATTATTCTTGCCTTTCATCTTATCAACATTGCTCTTATCATTCTTATTTCTATTGT +ATTGAGAAGATCCCATTTTATATCACCACCTTACTTTTTATTTATTATTCGCGGAAAGAG +TTTGTGATATTCATATTGAGATTAGTTTTTCGCTCTTTTCGTAAATATTGTGGGCAAATA +TAAAATTAGATTTTGAGCAAATAATCATTTTCTGTTTTCAAAATGCTCAATTCTAATATA +ATAATATTTGACATTTATAACTATAAGTATATAAACTTAAAACAAATAAGGTTATATACA +TTATATTTACAATTAGGAGGAATAGATTATGGCAAAACTATTAGATAATGAATATTCAAT +GGTATCATTTAAGGCTCGTAAAGGAGATAAGGAAGCCAGGAGAAACACAATTATAAATAT +TATATTAAATATTTTTGTATTAGAAGGTAAAGGTAGCAGTGGTACTAAGGAATTTATGTT +TACACTTACAAATAATCATCTATACATAGATGATATTGGGTACGATTTAACTGGTCAGGT +AGACATCTATCTCACAGAAAAATTTGATAGAAAAGATATAAAATCTTTTAAGGTTAGAAA +AGAAGGCAATAAAGAAATAATCTCTCTTGTAACCACTAATGGCAAAACTGTAACTTATGT +AAGGGACAATGAAAACGCTTCAGATTTAGCAACAGAAATGGCTAAACTAATAATTGAAAA +TGCAGGAAATTAAATAATATTCAATAGTCAAAGGGGGTGCTTAGATAACTCCCTTTTAAA +TATTGTTATATTTTTCACTATAATACATTAATAGGGTGTAAATCCTTAATAATTATTGAT +TTATTCATTGCGAGTACGTTTGTCAGTAAAGTTTTCTTCAACTCAACTTTATCAAGTGAT +TTATTTAGTTCAAGAAATCTATGCCAAAATAAATAATTATCCATATATTTAGTTGATACA +CCATTAAAGTGAAGATTAATCCACTTTCCAAGCCTTTTATGATATTCATTAACATGTTGC +ATATGATATAATTTTTCGATTACATATTGTTTTTTAGAAGCATTTACTTGCTTATGCTCT +AAACCCTTCATTTTTGCAAATGATAAATAATTCTTTGCCGAGTCACTTATTAATAAAGCA +TTAGGGGCAATATAATTGCCTATGACATTATCTATTTGTGTAGCAGTAATACGCCCCATA +CCAGCATTTCTACTAATTATGTGACCATTTCTATCCATAGCAACTAAAATACAAACCTGT +TCATGGCTTATGCCTCTAAATTTAGATTTACCACCTCTTTTGTTGGGTTTGCGAGTTTTA +CATTGGTTTTTCCCTTTAAATGGCTCTATGAAGAAGGTTTCATCTGACTCAATGATACCA +GACAATTGTTCTATATCCATTGAACGTAAAGAATTAAGCACTTTATGTCTCCAATAAAAG +GCTGTTGAAATACTTATATTAAGAGCCTTTGCGATTTTGGGTAGAGTTGTTCCCTCTGAT +ATAAATTGAAAATATTTAGCCCATTTTTGAGGATAATGTGTTCCTGCCATTGGACTACAA +GATACATCATTAAAGGTTTTGCCACAATCTTTACATTTATAACGTTGTCGGCTACGATAT +TTTCCATGACCAATAATATGTTCACTATTACAATGAGGACAAAACGTACCCTTTGAAAAA +CGTGTTTCTCTAATGGTTTCTATATAATCATTAACTCTATCAGAACTGTTCTTAACAAGA +GGATATATCAAGTTGAAAAACATATTTTGAACCACTGGTGATAACTTTTGAAATTCATCA +AATAAGATATTTATATCCATGAATGATATTCACTTCCAGAGCTGGTAAACTCAATAAAGT +CATCTGACAGCATATCATTTATTTTATCTTCTGACTTTCTAACTTCTGATTTTAATAACT +CATGTTCTAATTGTAATAGGTGATCTTTAATACTATTCGTGCATTCACCTCATCTTATGA +TATATATTTCAATATCAATCTTTCTATCAATAATGTTATTAATAATGAGTATTTAAATAA +TTTGCTATTTCTTCAATAAGTTCATTTCTATCTAAATCCAAAGCCATGATTGAGCAGCCA +TCCATATGATCGCAAGAGATATTGATTTTTAGTAATAGGAATTAAATTATGAAGAGGATT +AAGACAAGTTTAGAATACATTTTTGTAAGTTCATAAGTATTGAAAAAGTAAACTATGATT +TTTAGTTGTAATAATTTTAAAAAATAGGTTTACAATTTGAACTTAAAAATACTTAGAGTG +TATATAAAGTAAAAATTTTCATGTTAAATTTTATATTCCAGATGAATTTAGTTAAACTTA +CCCCCACAAGGGGCACAATGTAGCCACAAGGTGAAAATGTCTGAGGATATATGTTATTAG +CTGGTACTCCTCAGAAGTGATTGGAGTATTAGAATGGCTATATATCAGATGGATGTACAT +CCATGAGATTGATGCCCTCATGGAAAGATTAAAAAGGTTTAAAGTATTTCTTTTAAAGGA +GAGTGAGAAACATGTCAGATATTAAATGGATTAAGTTATCTACTAACATGCATGATGATG +AAAAGATGAAATTAGTAGATGCAATGCCAGAGAGATACTATTCATTATTTATGGATAAGA +TTACTTATACAAGCTGGCAAAACTAATGATAATGGACTTATATATCTAAATGAGAATATC +CCATATACTGATGAAATGTTGTCTACAATATTCTCAAGGCCTCTAGCTTCTATTAGACTT +GCACTTAAAGTATTATCAGATTTTCAAATGATTGAAATAGCTGAAAATAACGTAATTAGA +ATAGTGAATTGGGAGAAACACCAAAATGTTGAGGGAATGGACAGAGTTAGAGAGCAAAAT +AGAAAAAGAGTTCAAAACCATAGAGAAAAGAAAAAACAACTTGAAGCTGTAGTTAATGAA +AGTGAAGAAACTAGTGTTGAAATTGAAGAACATAGCTTTGCTGAAATTGAAAAATATAGT +ATTGAAGTTACAAATGATATTTGTGATACTTCAAAAAAATCTTGTAGCGTTACAGATGAT +AAAAATAACGTTACTGTAACGGTGCAGAATAAGAGAGAGAATAAGAACAAGATTAAGAAT +GAGAATAATAAAGATAGAGAGATAAAGAAAGAAGAGAATACAGGTTATGATGCAAATGAA +ACTCCTACTTCTAATGCAGCGTACGTAAGTGATTTGCACAACAGACAAGCTTTTAGATCT +CTGGTTAAGGAAACTTATATTCCTAATGCCATGGCAGTAAGTGATCCACATGAAACCACA +GATTTTAGTTCTCTGCTTAAATCAGAATCAGCAAAGTCTTCTATCTCTCAATCTAAAGAC +TCAATTGCTGAGCAGGATAATAATTTAGAGGACATAAACTCTAAATCCATTGAACTTGCT +AAGTATTGTGAACTAATAGCTGGAATACCTAATGTTCTTAATTTAGGCGCACTTAGGTTA +GCTATAGGAATGCATGGTCAAGAATATGTAAAGATGGCTATAGATATAGCATTAAAAGCC +AATAAACCTAATATGACCTACATTGATGGCATACTGAAAAATTGGAGAAGAGAAGAATAT +CCAGATGACAAGGAGGTAAACAAAAATGTCAATAGGAGCTATGGAAAGAATAGCAACTCA +GATAAAAACAAATTTGCAGGATTCAAACCAAAGGAAACACGACGAAGCCTTACAGATGAG +CAACGAAAGAAAGCTCTAGAAAACCTCATATAAGTGTGAGAAGTATTGAAATACTTCTTA +GAAGAAAAGACAAGAGTAATTCAAGGCAAGAAATCATCTCAATATATTGTGGTTATGATG +GCTTAGTAAATATATAAAATTTGAGATATACAAGAAATGATTTAATCATATAAAAGAAAA +TAATTTTGTATAGGGGAATGTAAAAATGAGAGCATCAGGAATAGTGAGAAAGCTTGATCC +ACTTGGAAGGATTGTAATACCAAAAGAAATAAGAAAGGTACTTGGAATTAATGATGGAGA +TTCTATGGAAATAATTAAAGTTGATAATGAGGTAGTTGTTAGGAAATATAGTAAAGGCTG +CATATTTTGTGGAAATGATAAAGGGGTTTTTAACTTTAAAGATATACTGATTTGTAACGA +GTGTAGGGAGGCATTAAAGGGAAATTAATTGAGAAAATTTAAGATGAGTTGATTAAGCTA +AGCCATTAAATTTAAATTTTGAATATGTAAAGTTGACTTAGTAATAAATAAAAAAGATGG +TCTTTGAAAATTTAATAATTCGATATTTACAGTTTATGCTATAATTGATTTATATTAGAA +AAAGGCATGTGTTAAATAAAATAAATCGGAGAAGTAAAATGAAGAGATTAAAATTAAAAA +AAGTAATATCTAGTTCATTATTAGTAATTTTAGTATTTGTATTAAATCCAATAGTTGTAA +GTGCTGAATGGAAACAAGATTCTAATGGATGGTGGAATGCAGAAGGAAATTCATGGTCAG +TAGGTTGGAAAGAGATTGATGGAAAGTGGTACTATTTTGGACAAGATGGATATATGAAAA +TGGGATGGATACAGTATGAAGATAAGTGGTATTACTTAGGTGATGATGGTGCTATGCTTA +AAAATACTACTATTAACGGTTACAAATTAGATTCTGACGGAGTGTGGATTCAATCATCAC +AAAATGATTCATCAAATGTTGAGTCGAAAGATAAGAATGTAAGTTATGAAACCGAAGTAA +GTAATAGAAAGCCAATTATCAAAGATGGAAACTATCTATATGAATCTGGTAATTTATCCA +AAGTTAATATTGATGGAACAAATAAAACATACTTTGAGCAGGCATATGGAAGCATTATTG +GCGTTTCTGATGGCTGGGTTTACTATGTTGATGGAAATACTGAACACCATGGCAGTAAAG +GTATATACAAAATAAAGGATAATGAGAAAGTTAAGTTAACAAGTGAGGATTATATTGTTG +ATGCTGTCTTTTATAAAGGATCAATATATTATGCATTAGGACCTGAAATGGTTGAAAATA +ATGTGGTTAAGGGTGGATTATATAAAGTTGATATTGATGGAAAAAATAAAGTTCAACTTT +CTGATCAGTTGGTTGAGAATATAAATATATATAAAGATTGCATATATTACAGCGGTACAA +GCTCTGTTACTACTGATAATACAGTTAGCAACGAGAATTCAGGTATATTTAAAATGAACA +CTGATGGCACAAGCAGAACAAAAATCAGTGATCATTATGCTTCATTCTTAAAATTAAGTG +GTAATAATATTTATTTTAGCAGTTTTGATTATAACTACAAATTATATAAGATTAGTATTG +ATGGGACAAATGAAAGAAAATTAAATGATGATATCAGCTGGGATATTCAAGTGGATGGAA +ATTGGGTATACTATAGTAATAATCCTTACGATGGGCATACTGATATGATTCTACCACTTG +ATAAGATGAGAGGAAATATTTATAAAATTACTATTGATGGAATGAATAGAACTAAGATTA +ATAGTGAGGCAAGTAGGTTATCTGAAGTATTAGATGGGTGGATATATTATAATAATTGGT +CCGATAATAAATCTTATAAAATGAAATTAGATGGTAGCAATAAATCAGTTGTGGAATATT +AGTATTATTATAATAATTCCGCTAAATAATTGATTATGTTTTATTATAGTGTTGAAATAC +AGCCTAAAATATATACATGGAATTGTTTATTATAATACTCAATATTCAAACAGTATAAAT +CACTAAAAATAAATAGTAAATTGTAAAATCGTATTATTCAAGAAATTGAATTTGCGATTT +TTTTTATTCTCAAAGAGATTATATTATTGGACAACTGTGTGGACATTGTGCCCCTTGAGG +GTATAACCTATGAAAATAGTTATTTTATTTTTGAAAAAATATATGCTGCAATTCATAGTG +AATAATTATGGATTAAATAATTGTAAAATTTTTAAAATTTACAATGAAGAATTATTTGAA +GAATTATTTTGGGAACATATATATAAAATCTATAAAAAGGGGATAAACAAAATGGAAGGG +TTACGAATTTTTAAAGATGAAAGATTTGGAGAAATAAGAGGGTTAAAAATTAATAATAAA +GATTATGCAGTTGGAATTGATATAGCAAAAGCTTTGGGATACAAAAACCCAAGAGATGCA +ATTTTAAGGCATTGCAAGGGTGTCGTGAAACACGACATAGGGGTAGTTACTGGAAAGAGG +AGAGATGGAACAGAGGTTATTCAAAATATAGAAATGAGTGTAATTCCTGAAGGGGATATT +TACAGGTTAGCAGCCAAATCAGAATTACCTGTGGCAGAGAAATTTGAAGCATGGATTTTT +GATGAAGTATTACCTAGCATACGAAAAACAGGGATGTATGCTACAGATGAATTATTAGAT +AATCCTGACTTACTTATTGCTGCAGCGACTAAGTTAAAAGAAGAGAGAAAAGCAAGGCTA +GAGGCTGAAAATAAAGTGAAATTATTAGAACCTAAAGGACAATTTTATGATGATGTCGCA +GGATCTAAAGATAGCATTGAAGTGGGACATGTTGCAAAGGTTCTTGCTATAAGAGGAATG +GGAAGAAATAATTTATTCTCACTCTTAAGAGAAAAGAAAGTTTTGGATAAAAACAATATT +CCTTATCAACAGTTTGTAGATTTAGGCTATTTTAGAGTATTGGAGCAGAAATACACAGTA +CCTAGTGGTGAAACAAAGATAAACATTAAAACAATGGTATTTCAAAAGGGGATAGAGTTT +ATAAGGAGAAAAATTGGAGAGTAATAAACCCCATTCTCCTTATTTTAGGATATGCAAATA +TAAGAGCTTGAATATTTACATATATTTAATATAAAGCACCATGAAATTTATTAATATAAA +TTATATTTTAAATTTTAAAAGCAGTAGTTAGGAAGCCTTTAACTACTGCTTTTTATATTA +AAAGATAAATATGAAAGGTGATAATAATTTTTTCTTATTCATGACAAAAAAATAATATGC +ATGGGAAAAAATGCAGAATATATTAATTAATATATTCTGGTTAAAAAATCTAAACTTAGT +ACTGAAAAATTTACATTTAATGTGTGGATTTGGACGTTTAGAGAAAATGCTATTTAATTA +ATATGAGTATTATGAGAATATGTAATTGTAAATATAAAGAATGGAAGTGAAGTTGATTTA +TGTCAAAAGTACTTTTTTTAAGTATACCTGCACATGGTCATATAAATCCTACATTAGGAT +TAATAAATGAATTAGTAAAACAAGGAGAAGAAATTACATATTTCTGCTCAGAAGATTTTA +AAGAAAAAATTGAAAAAACAGGTGCCAAATTTAAAAGTTATAGAGTGGAACTGAGCCTTT +TTAAAAGAAAACATAAGACTTCTAATGATATGGGGCCAGATAAACTTCTTGATTATATAA +ATGAAACGCTTAAATCAAGTGATAAAATTATAAAAGATATTTTAAATCAAATTGAAGGCG +AAAAATTCGATTATATTATGTATACAGCCATGTTTCCTTTTGGAAGTATCATTGCTCAAA +TACTGAAAATACCTTCAGTTTCTTCCTTTGCAGTGTTTGCAACTCCTAAAGAATTAAGGT +CGCAGCACAAAGAATTAATGAGTGAAAATTTAATAAAAAATCATCCAGTTATAGAGACTT +ATAAAAAAGTTTCAAGACAATTAAAAGCAGAATTTAATGTGGAAATGTCCCATAATATCT +TTGATTTATTTTTTAATAAGGGTGATATAAATATTGCATATACTTCTAAATATTTTGTTG +CACATCCAGAATATTATGATGATAGCTTTAAATTTATAGGGCCACCAATATATGATAGAA +AAGAAAATTTAGATTTTCCTTTTGAAAAATTAGAAGGAAAGAAAGTTGTTTATATTTCAC +TAGGTACAGTGTTTAATAATACAGACAGCAATCTCTATGATATATTCTTTAAAACATTTG +GTAATACCGATGAAGTTGTTGTAATGGCAGCATACAATGTAGACCTATCTGAATTTGATA +TACCTAATAACTTTATTGTAAGAAACTATGTTCCTCAATCAGAGGTTTTAAAGCATACTG +ATGTAGCAATAACTCATGCAGGAATGAACAGTACCAGTGACTTATTGTATAACAATGTGC +CTTTTGTAGCAATACCTATAGGTGCGGATCAACCATATATGGCAAAAAGAGCTGAAGAGT +TAGGTGCAACTATTTCTCTTGATAAAGATAATATTACTCCAGAGATTTTAAGGGAATCTG +TTGAGAAAGTTTTAACTGATCCAAGTTATCTTGAAAATATGAGAAAAATAAGTGATTCTT +TTAAAGAATCTGGTGGTTATAAAAAAGCAGTTGAAGAAATTTTTAAATTAAAACGAGATA +AGATAGATCAATAAGATTTGTTAATACTAATGCATGAAAAAATAAAAGTTGCAATAAAGG +TAGATTAGGTAAAAATACTTAGTCTGCCTTTTACTATGTATGTGGAAAAATCTTTGGAGC +AAAGGGTTTCCTTTTTAAGAAGAACTTGTAATACGGAATACTCAAGAAAGCTATGAACGT +TCTTATTATTCATGTTTACTATGCAAGCAGAAAGTAGATTGATTATTATCGCAAATTCCA +AGTTGTAGTTAAATTATTTCAAATATACCTAATAAAGAAAATATTTAAGAGATTTGTAAT +GTAGCAGCATGATAAGTGCTGTTACATTTATTTTTTGCAGAAGTAGATAATTATTTTGTA +CATGAAAAAATTGTATTATAATGATATATATGGATCTATAGATAATAATAAGATTTTATT +CTTATAACGTCTATAAAAGGAAGCGAGAGAGCTTCAAAAGATGTGTGTTAAATTAAAATG +ATAATTAATAAAATCAGTTGTATAAAAAAATGTGATTTGTGAGTAGATAAAATACTTTAT +TTTGTAAGCTAAAAAACAATTAAAATAAAAGTGGTGAAAATATGAAAATATCTATTGAGG +AAATTGGAAGAGAATTTGAAGAAGAAATTATTGTTAAATGTCATGAGGTTAATGATGATA +TTTTAAAATTAATAAGCAAATTAAAAACAGAAAAAGCTGTGGTTCTTGGTTATGATGGAT +GTAATATTCACAGGATTCGTATTTCAGATGTGTATTATTTTGAGGCAGTAGACAACAAAG +TATTTATCTACTGCAAAGACAAGGTGTTTGAGTCTAAGCAAAAGCTTTATGAATTGGAGG +AAATGTGTCAAGGAAAAAAGTTTTTCAGAGCATCCAAGTCGACTATTATAAACATGACAA +AAGTATTATCAGTACGACCATCTTTAAGTGGCAGGTTTGAATCAAAGCTGGATAATGGAG +AAAAGGTAGTTATCTCTAGACAGTTTGTACCTATATTAAAAGAAAGGTTAGGTTTATAGG +AGGAAATAGCATATGGAATTAATGGATTATATAAAAAAGTTAATAAGAAATTATTTTACT +ATATTTTCACTTATAGTAATATGTGTAACAGTATCAAGTCAGATTTTTTTACCAAATAAA +TCATTAGAATTAAAAGATATATATATTTATATGATATGCTCATTAATCATTGATTTGCTG +AGCCTAATATTATATTCCACAGAAAAATTATCAGAAAAGCAAATGTGGATAAGGAGAATT +ATTCACTTTATTATACTCAATGGAGTGCTATTAATTTTTACTAATGTGATTGGATTTATG +CATGATGCTTTAGATATAATTATATTAGAAATTCAGATAATAGTGATATATGCTATATTT +CAATTTTTAGTATGGATGTACGATAAGAAGGACGCTAATGAAATTAATGAAAAATTAAAT +ATAATGAGAGAAGAATTAGAGATTAAGAAGGAAGAAGAATAAAATGTAATTTTCAGCTTA +CCTCTTCTATTTTACAACTTACATGTCGTTAGTTACAGGATACTGCTTAAGCTATGTACA +TTTGGCTTAGTAAGTATTATTTTAGTTTTGTAGATGAAGTATGAAATTAAAGTATACTTT +TAAGATTAAGGCTGGTTGTAATTAAAAGAATAAATTGAAGAGGTGGAGGAAGAATGGGAA +TAATAGTAGTTTCTATTGCAGCAGTTATAGAAATTGTTTTTGGAATATACTGCTTTAAGA +CAAAATCAAATCAGAAGAAAATAAGAAGTTTAATTCACATTAGTGTATTTATAGCATTTG +TTGTATTTACATTTCTATCAGTAATACAATGGAGTTTCCGGTGGATGCTTTTGGGTGGGG +TATTGCTTATTTTTGCAATTACTTCGGTAATATCACTTATACTAAAAAGGGAAGATAAGA +AAAAATACAAAATTAGAAAAATGGTTGGTAAGTCTATCGCAATGTGCATAGTTGTAGTTT +TAGCTGTCAGTCCTGAATTAATCTTTCCACAGCATAAGATGCCTGAATTAACAGGAAAGT +ACAAAGAGATAAAAACGGCAACTTATACATTCAAGGACAATAATCGTATTGATATATATA +GCCCCAAAGGTGAAAATAGAGAAGTTACAGTAGAATTTTGGTATCCAGAAGATGATGAAG +TCACATCTCATTATCCTCTTGTAGTATTTTCCCATGGTGCATACGGAATGAAAACTAGTA +ACATGTCTACTTTTAGAGAGCTAGCTAGTAATGGATATGTGGTCTGCTCAATAGATCACC +CATATCAATGTTTATTTACCAGAGATGCTGATGGAAAGATAACAACAATAGATAAATCAT +TCATGGACGAGCTTAATGGTGCAAATAATGGAGCTTATGATGAAGAGACTGAATATAATC +TTGGAGTAAAATGGTTAAATGTACGCACCGAAGATATGAATTTTGTACTTGATACAATTA +TTAAGAACGTAAAGGAAGGTAATACAGAAAAAGTTTATAGTCTTATTGATGCTGGTAAAA +TTGGATTAATAGGTCATTCATTAGGAGCTTCAGCAAGTGCACAGCTTGGAAGAGAACGCA +GTGATGTAAGTGCTGTAGTTAATCTTGATGATGACTTAATAGGAGAATACACTGGTTTTA +ATAATGGCAAGTTGCAAATTAATCATAATATTTATCCTATACCCATTCTTAGCATTTACA +GTGATGATTTAAAGCGAATATATGAAAAAACTGATCCAAGCATTATACCTCAAAAATTAA +TTTCAGCGACTGCTTTAAAGTCCTTTGAAGTATATTTTGATGGAACAAATCACATGAGTT +TTACTGATCTTCCATTAGCATCTCCAATTCTTACAAAATTGTTATGCAATACAGCAAACA +GTAAGATTGGTATACAAAAAGCAGACAAGTACTATGTAATAGAGAAGATGAATAATTTAG +TATTGCAGTTCTTTAACTGTTATCTTAAGAATGAAGGAGATTTTCGACCAGAAGATAAAT +ATTAAATCGTAGTAGTAAAATCTAATTTTGAATAGCATGGCAATGAAAAAAGCGCTATCA +TGTGCTATTGATATGGTGAATATTATAAAAAAATATAAGTTGTAATTTAATGAGGTGGAG +AGATGAAGGTTTTAGGAATTAATGGTAGTTCAAGAAAAGATGGAAATACAGCATTGATTA +TGAAGATTGTTTTTGATGAATTAGAAAAAGAAGGAATTGAGACGAAAATAGTTCAATTCT +CTGGTAATATAATTGAGCCTTGTAAGGGGTGCTTTGCATGTAAAGGAAAGAATAATTGTG +TTTTTAAGAAAGACATATTTTATGAATGTTTTGAAAAAATGAAGTTAGCAGACGGAATTC +TTTTGAGTTCTCCAGTCTATTCAGCAGATGTAACAGCAAATATGAAAGCTTTTTTAGAAA +GGGCAGGGGTTGTTGTTGCAACAAATCCAGGTCTACTTAAACATAAGGTTGGTGCATCTA +TATCTGCTGTTCGCAGAGCTGGAGGTATGACAGCAGTAGATACAATGAATCATTTTTTAT +TAAACAAAGAAGTTGTTATAGCTGGTTCTACATATTGGAATATGGTTTATGGAAAAGATG +TAGGCGATATATTTAAAGATGAAGAAGGAATCAATAATATGCATAATTTGGGACAAAATA +TGGCGTGGCTGCTAAAGAAGATTAATTAATGCTATTAACAGGATCTAAAATGGTATTTCA +AAAGGGAATAAAGTTTATAAGAAGAAAAATTAATAATAAATAAAGTTGTCGTAAGCATGA +AAATAATTAATATATATTGCAAAAATAATCCTTCATCATAATTCCAAAAATATTACAAGA +ATTTTAGCTGTAATTGTGAAATGTGAATTGCAACATATATATACTATTTTCATGCTTATA +TTTATAAATTAAATGGGAATTCGTCATGAGAGGAATTATATTAGGTTTGAAAATTGCGGT +GTGTAAAGAGGAAAATCAATAGTGTGTGTAAAATATATATAAGTAAAGGAAAAAGTATCA +ATCATTTAGGGGAGACGTACTTATGACGAGAGAAAGTAATTTTGAATATGTAAAGGGGAC +AGATATAGAAGAATGCTACGAAAATTTAAAGAAGGCGGAATTTGTATATCAAAGTTTTCC +TGATGCAACAATTATGTGTTTAAGATCAATAGCGGAAACTGTTTTAATAAAAGTCAGGAA +TGAATTTTACAAAAGTGCAGATAATAGTGGAACTTATTATGGAATAATGGGTGAACTCAT +AACTGTTTCTGAAGTAAAGTTCTCAAAAGTGTATAGAGAATTTAATATTATAAGACTTAA +TGGGAATTATGTACTTCATCCTGAGGCTGTAATAGAAGAAGAAACAAAAAGGACGCCCAC +TGAACTTTTAGAGATTATGCATAACATTCTTATCTGGTATCTTCAAGATATAAAAAATAA +AAGTTCTATTGATAGTAGTAAAATCAAGTTTATAAATCCAAATAATTTTAATAATGAAAA +AAAAGAATTAACTGAGGTAAAAAAGAAGATTTCAGAAAAGAATAAGAAAATATCTGAATT +GAAATTAAAGATAAAGCAGCTTAAGGATTCAGAGAAGGATAAGGCTGATCAGTTATTTAA +GTTAAACAAAGAAATAGATAATATAAATGTGGAAAGACTTGAATTAAAAGAAAAAGATGA +GTTATTAGAAAAGAAGATTAAAGAGCATGATAAAGAAATAGAGACTATAAAAAAGAAATA +TAAAATTGATCTCCAAGAAAAAATTGAGAAATTTAAAAAGGAACATAGAGAATCTGAAAA +AGCTTCTAATGAGGCTGAATTAGCAGCAGTTAAGAATGAAATTTTAATAAAGAATAATGA +AATAGCAGAATTAAAAAGTAAGATAGATGATTTCAAAGAAGAATCTAAGGATAAATTAGA +ATTACAAAAGGAAATTGAAAAAATAAAAGCTGAAAAAGAAAAACTAGAATCAATGGATTC +TATTTTAACTAAAGGGATTAAAGAGTATGATATTGAGGTAAAAAGAATTAAGGAGCAGCA +CAAAAGGGACTTAGATGACAAAATTAATGAGTTAAAAAGTAAAGAAGATACATATTTAAA +AGAAAAAGAAGAATTAAATAATATTAGACAAAAAATACATTCAAAAGAAAATGAAATAAA +TGAACTAAAAGCAAAAGTAGATGAGTTAAGGCAACAGTCTAAAGAAATAAATTTACTTAA +AATAAGTATAGATGATTTGCAAAGTGAAAAAGGTTATTTAGAAAATCGTGATAGAATATT +AACTAATACGATTCTTGAAAAGAATAAAGAACTAGAAAATATAAAAGAAGCTTATAAAGC +TAATGTTGAAAAGATTGAAATATTACGTAAAGAGCGTAATGAATCAAATGCGTCTCTTAA +AAATAAGGAAGAAAAGCTTATTAAGGTTGAAAAAGAGAACTTTGAACTAAAAAAACAACT +TAAGGAAATAGAAGAAACAGCTAAAATTGAAGCTATAAAGAGAGATGAAGAGTTAAAGAG +AAAAGAAAAAGAGCTGCATGAAGGGATAGAAAAATTAAGACAAGCTTATAAAAATTCTTT +TGAATTAACCAGGGAGTATCAAGATGTTCTAGAAAAAAGTGAGTATTCTTATGATAAAGA +AGAAGAAAGATTATTAAATATTCAAAAGACAGATGTTAAAGAAAAATTAGTTGAAGAGGA +TAAGAGCTTTCATAATAATTTAAATGAATATAGTGAGGGCGTGAAAGAAACTAATGAAAG +AATAAGAACCTTCAAGAAGGTATTAAAAGAGAAGAGCATTAAGGAAGGTAAACACGTAGC +ATTTTATAGAGGCTTTTTAGGATTAGAAGCAGAGCAACTTAGAATTTTATATACAATGCT +TACTAAGACCAATATTTCCTCAATTTTAATTAGTAAGTCTAAGGAATTACTATCACAATC +TAATGAAGATAAGTTCATGGAATTTATACATAAAAAAGCACGAGAATTGAATCATTTATC +AGATGAGGAAGTAAGATTAAAAATTTATTATAGATTAATTAAATTAACTGATATTGAATA +TAAAAACATTTATGAAAGAAGAAGTTTTGTTGAAACTTTAGATGAAATAGTAAATGCAGG +ATATTCAATATTAGAAGGAAAAAAAGATTTCAAAGGTGGAGGCTCAAAGTTAGAAGCCAT +AGGTACATATTATTTAGAAAAAGTACTAGAGGATTTTAAAAATAAATATGATAGTGGAGA +TATAAAAGTACAGCAGGAATTAATTGATAACATTTATAATAATTTTCAAAGGTTAAGTGA +AAAAGCTAAAAAAGAAATTTATGACGAGTTACATTTGAAAAGTACTTCTGAAAGTACAGT +TAAAGCAGCCATAAGATCAGCCCCCTTTGTATTTTTATCTACTACTATAACTGTTGGGGG +ATTTTCAGCATTTAGTGCAGTTTCAAGTATAATTTTCGGAATTTCACATTTACTAGGAAC +AACTTTTTCATATGGGGTATATACTGGGGCTTCATCATTATTAAGCTTTTTCAGCGGGCC +TTTTATGGTAATACTTTTTATTGTAAATGGAGGATTTTTGCTTACTCAACATAAAAAACA +GCAATTAGAGTTAGTACCTTTATTTATAATGCAAACCATAATTACTAATGTAGCTATAGA +AAAAACAAAAATAAACTTTGATAACTATGATACTATGATTGAATTATGGAAAAGAGAAAA +AGGGAATTTTGACAAAATTACCATACAAAAAAGATTGCAGGAAAATATCTTAAATAATTA +TACAAAAGAAAGAGATGAACTTGCTGCTAAATCAGAACAAGTGTCTAGAGCTATTAATAC +ATTATGGGAAGAACATGCAAAATATAATTATAAGTTTAAAGATATTGTGTTAAGCTCTGA +TAAAAAAGTATATCTAAACTCATATGCGGAGTATAGCAGTACTATAAATAAACTGGAAAT +GGTTCAAAAAGAAATAGATGAAACTAAAAACAAGGTAGGAACCTTAAAAAGCATATTTAG +TTTAGATATGTGGAAAGGGCAATCTTCAAAGCTTGTAAATGAAATGAACATAATAAATAT +GGAAAAGTCATTAGTTGAAGAAGCAAAACAAAGCCAATACTTTAAGGAAGAAAGTGCTTT +ATTTTCAGAATTGCAGCAAAAGATAGATGTATTAAATGAAATTCAAAATAAAACTAAAGA +AAAGATAAAAGATAAAAGATAAAAATAACCTAATCAGCGTTTTGAAAAATAAAGTTAACT +TACTCGAAAATGATTTGGTTAAAATAAATTCAGAATATCCAGATATAAATAACGCTGAAT +TTAGGGCAATTGGATAGTCAGGATAAGGATAGCAAATTCATTTATTTGGGCTATAATAAA +TATAGATGTGGAGTAATACAGTCAGAATCTAATTACTTTTGTATGTTTCCAAGACAAGCT +CGTGATATGCAGAAATAGAACAACCTTGTAAAAATACACCTTAACTTTTAAGAAGTTTCA +TAAATTTAAGGACTGCAATAAGATAAAAAATATCAATCTTATTGCAGTCCTTATTTTTAA +GCTACTTCATATTTAGTAACTTGTCTTTGAGTAACCTGAGCACCGGATTTCTTAACTAGA +TCGCCAGCCTTAGTTTTAAAAACATTTTTTGAAATTATAGTGTCCATAAGTGCGTTAACT +TCATCTTTTGTAAGAGTAGTTTTAACACCAGAAACACTTAAAGTGCTTTTTTCACCAGCA +GAAGTTAAAAAAGTCATAGCTAAAGTATATTCCATTTAATTCACCACCTTTCATTAAGTT +TAGAGTTAAGGAAACTACTCCTACTCCTATAGTTTTAGGAGTACTCACAGAGTAAGAGAT +CATTATCAAATCATAGATGCCTACAGGGAAAGTTCGAAGAGCGAAATATAAAATGCTCAT +AGAGAAAGTTCTGCTTACCAAATGTAAAATTTGGAGCATCACTTTTCGGTTCTCACTTTT +GATATGCCTGCTTAACCGATTTAACTAAGTTCGAGAAGTCCAATCATAGATTTGGACTCT +CACTCTTATCTCACTTATCTGCTTAAGCATTTGCTAAACTAGAAGATTCATTAATAAAGT +AATCTCTAGTGTTAGCTTCTAATACACCTTTAATTGCATCTGCAATAGCATAGACATTTT +CAGGTGTAGCATCTGTTTTGATACCTGAGAAAGTTTTCTTGGTATAAATTGGGTCTCCAG +CCTTGTCTAGACCTTTTTGAACCTCAATACTAAGAGAAGCAGAGTCAATAGTTTTTGTTA +CAGCCATGTTTCATTACCTCCTTTACTTTGTTTTCATAAAGGATATATGGATGAATTTTA +TTTATTACATAAATAGATGAAAATTATTTGTCAATTTAGCACTATGCTGTTAGATGGATG +AGTTCTATTAGTGGCAAGAATCATTTATGCCGTATGAGCCATGAAGTATTGCAAAAAAAC +ACATGATTCTTAGTGAAGAATTAGAGAAGATAGAGGTTAATAATGCTAGTGATAAAGAAA +ATATTTAAGAGATTTATAATGTGGTAGCATAAACGAATATTGTCATATTTATTTCTTGTA +TAATTAGATAATTATTCCATGTATGAAAAAATTAGATTATAATGGGTTATATAGTATTAT +GGATAATGATAAGCCTGTATTCTTACAATGTTTATAATGAGAAGTTCGTGGCATGAATTT +AATGATATAAAGAGGTGGAATAATTATGCTATTTGGATTATTTAATAGTAGAAAAGATAA +CGATGAAATTCATAATATAAAGCAGTATGCAGTAGATATAGCGACTAATTTTAATGACGA +AGACAGGGAATATTTGAAGCCAATTTATACAATTAAGTATTATTCTAAATTAAATAATTC +AGTTGTTAAAGAAGCAATTTTTTCTAAAAATATAGAAAATGGTGATATTACTTTAGGAGA +AGCATTTTTAACGTTAGAGGAAATATTAAAACAGAGTATAGAGAGTAAGTTGTTAATTAA +TACAATGATTAATAAAAATTATGTGCTTACCAATGAGGAAGAAATTACAATTAATAAACT +TGAAGATATAATAAGATTTGGGTTGCTATAATCAATCCAAATTTAAAAAGTGTGAAGCTT +AATATAATAGATAATGTGTGTATATTCTAAAATGGATATGGAAAGAGCAATAATTTAACT +TATTGCTCAGATATTAATTATGCCAATACCACGAATTTGGGATTCAGGTGGACAAAAACA +ACTTGTTCTTTGAGAATTGAACAGTATTATATTTTGAAAATATGTTGTTAGGATGAATTA +AAATATGAATAAAGGAATGGAATAAGCTGTTATGCATATGAAGGTGAAATAGATGAATAG +TTGTAAATTGTTAGAGGTTAAAGCTATAGTTCAAAATCCATATTATAAGGCTCCAAGTTG +GATTGATGACAAGATCCAGAGGAGAGAATTTGTTGTTTTAGATGGGGATTCATCAGAAAA +CGATGTGGAATTATTTTTAATTGAATTATTAGGCTATAATAATATTAACATTGAGCAGGA +TACAGAATTAGTTATGAAAGAAGTTTTAGAGGAAGTTGAAATAGTAATAGCAGGAGGAAT +ATTATTTGTTGGAGAAAATAGAAATATATTTCCAAGCTGTTGTTGCGGTTTGGAAAGTTG +GAATGAAGTATTAGCTGCAGTTATTGATAAATCATCACCATGGTTAGGACACGATCCATA +TCCCTGCTTTGAGTATGTTGGAAACAACATTCGTATATGGTCTGATGATTTTAAGACAAA +ACAGTCAAGTGAGATTTACTTTGTTGAGTGTGATAGGGATATTTTAATCGAAAAACTAAA +ATTAGTTAGAAATGATTTAATTAAATTCTCTAAAGGGCCTTTATATAATCATATCAGTAA +ACATGCAAGAACATATACTGATTTGATGGTACAGCAATTTGAAAAATGGTTTTCTTTAAA +CCTTGTGTAATATAATCTGAATGACATGTAGCTAATTGATGAACTGATATAAATCATTAT +GGGATTTTAATATAAAATGATTTAATTTAATTTGTGGAGGAGGAGAATTTTAAGCTTGAC +TAAAAGAAATCATAAGTTTGAAATAGCGATATTAACATAGTAGTTATTGTAAATTATTTG +CTGAAGGTGATGAGTTGAAGAGTATTATTTTTGAAGGTTGGAAAATAGAAATTGATAAGG +AAAAAACTAAAAATTATTATTCACAGTTATATATAGATAGTTTACGTGAATGTTGTTCAG +ATTGTAGAAATTATTATTATGGAATAAGGCATTTGCCTAAAGTTTTTATAGATTTTTTAG +AGAGCTTATTTATTGTTCCAGAAACACCTATATGCGCTTATAAGCTGGATAAAGTTGCTG +ATAATATATTCCTATATGATGTAAATTATTTAATTAGGGGTAACATATTAGAAAAACCTG +ATAATGCTGAAACTGTTTCAGAAAATGGAAAGAAGGTATTTAACACAAGTACGTTAACTG +ATGAGGTAGAGTTTTATTTTACAAAAGAATTACATCCAAGTTTGCGAAATCTTTCTGGTT +ATGAATCGTATTTTGAACTTCAATTATTTTGTGAAATTCCTTGGGTTATAGATGAGCCGT +ATGAATAATTGCTGGTTGAAAATTATTGCATCGCACATCTATATAATAAAAAGTTATAAA +TATTTAAAATAGATGAAAAACTATTAAACAATATAATTGACACTTAACAATATAAGGTAT +ATAATTATACAAAAATAAAAGCTATGAAGAGAAGAGTAATTATGAGGGAATACTCAGAGA +GTTTCCATTTGGTGTGAGGAAATTATGAAATCATAATGAAACAAGTCTCGGAGCTGCATA +CGGATCTTATAAAGTAAAACTTTTCAGGTGGAATTTTATTTTCGCTAAATTTAGTTTAAC +TTATCTTAGAGAGTGCAACCTATAACTCACATTTGAAGAAGATATGATGTTACGGATGCC +TAGCTATAAGATAAAAATTAATTATAAGTGATGCTATGACGTAAGTTCACGTTACAGAAC +CAGAGTATGATTCAACCTATAATTTTAGGGAGATGGTGTACTTGAAGAGCCTAAAATGGT +GACATTTTAGGGAAAAGGGTGGCATCGCGAAATAGAAGTTTCGCCCCTGTAGATATATTT +TCTACAGGGGCTTTTTTGACTTTAAAATTAAGCATATAAAAAATTATAAGTTTAAAATAT +CATGGATATTTTATTTTTCGATTATCTCAAGTATGAAATTTAGAAACTTAAGGAGTGAAA +AGTTTATGATAAGTAAAAGAATAGAAAAAAACGAAAGACCAACATTTCCAAAGAAGGCTG +TAGTAACAGCTGGAATGCCTTATGGAAATAAGAATCTTCATTTTGGCCATGTTGGAGGAA +TGTTCATTCATGCAGATATATTTGCAAGATTTTTAAGAGACAGAATAGGGAAAGAAAATG +TTATTTTTGTATCTGGTACAGATTGTTATGGTTCACCTATTCTTGAAAGTTATAGAAAGC +TAAAGGAAAATGGATACGAAAATACTATGGAGGACTATGTTAAGGCAAATCATTTGAGTC +AAAAGAAAACCTTAGAAGATTACAATATCAGCTTAAATATTTTTGGAGCTTCAGCCATTG +GAAGGACAGGAGAATTTCATAATGAAGTTTCAGAAGAAATATTTAATACTTTATTTAAAA +ATGGTTATATAAAAAAGATGTCAGCACTGCAATTTTATGATGAAGATAAGAAAATCTTTC +TTAATGGAAGGCAAGTAATAGGAAAATGTCCAATAGCCGGATGTAACTCTGATAAAGCTT +ATGCAGAGGAATGTTCATTAGGGCATCAATATATGGCATCAGAATTAATTAATCCCATTA +GCACTTTATCAGGAAATAAACCAATATTAAAAAGTGTAGATAATTGGTATTTTACATTAG +ATGAATCCATGGATATAATGAAGGAACTTAATGAGTTTTTAAAGAAGAATACTAATAGAC +GAAAGTATGAAATTAACACTATAGATGAATTTTTAAAGAAGCCATTAATATATGTTCCAA +GAAAATATATAAAGGATGTAAATGAGCTAGAAGCTAAATTTCCAAGCCATGAAACTATAG +ATGAAGAGAAAAAATCTTCACTAGCGTTTATTTTCCAAACACTAGAGGATAGAGATAAAG +CAAAAGAAATATTGGATAATTTAAATATTAACTATACTAGTGGTAAGACATTGGTTCCTT +TTAGATTGTCAGGAAATATAGAGTGGGGAGTAAAAGTACCAGATAAAGAAGATTTAAAAA +ATTTAACATTCTGGGTGTGGCCAGAATCCTTATGGGCTCCAATTTCTTTTACAAAAGCAT +ATTTGGAGTCAATAGATAAGAGTCCTGAAGAGTGGAGTAATTGGTGGGATGCTGATGATT +CAATGGTATATCAATTTATAGGGGAAGATAATATATATTTTTATTCAATTGCTGAAATGG +CCATGTTTATTGGCTTAAAGGTGGCTAAGGGTGAAAATGTAGATGTTACTAAGCTAAATT +TGCCACACATTGTTTCTAATAAGCACATTTTATTTATGGATAAAAAAGCAAGCAGCAGTT +CAGATATTAAGCCACCAATGGCAGATGAATTATTAAAGTTTTATACGAAGGATCAATTAC +GCATGCACTTTATGAGTCTTGGATTATCTTCAAAAAGCGTTGGCTTTAAGCCACAAGTTT +ACATGAAGGAAGAAGAAAAAGTAGGAGCGGATCCAGTATTAAAAGAAGGAAACCTTTTAA +CCAATGTATTTAATAGACTCATTCGCTCATGTTTTTATACTCTACAAAGTCTTAATGAAG +ATATTCCTAAAGAAGAGGTAAGCAAAAAAATTAAAGAATTAACAGAAAAAGCAGTATTAG +AATATGAAAGACATATGTATAATCAAGATTTTCATAGAATAGCATATGTTCTCGACGATT +ATATAAGGGAAGTAAATAAACATTGGGCTAGTAATATTAAAAATGAGGAATTAAAAAGAA +ACGTAATAGCAGATTGTTTTTACGCTTGCAAAGTTATAGCAGTACTAATACACCCTATAG +CACCAGAAGGATGTGAAATGTTTAAAGATTATCTAAATATAGATGATGAACTATGGAATT +GGGATAAAGTATTTGAACCTATTACTTCTTATTTTGAAGATGCTGATAACCATAAGTTTA +AATTTCTTGAACCCAAAGTAGATTTCTTTAAGAAAATGGAATATCAATATTAATGGAAAA +ATGATATGGTTTTTACAATTTATGTTGTAATTAATCTAATTCAGATATTGTTTTATTAGA +GTGTGTTGGAACAATATCGGATTAGATTAATTATTTGTTAGGGTGGTTTCTAGTGAAATA +TTGTTGTACTGAGTCAAATAGAAAAGGCACATGTTATCATGAATTTCGAAAAGGAAAATT +TACTGGCTCTTTTTGGAATGAAGATTCGCTTTTAATACATGATGATAATTTGTATAGTTT +ACATCTAGCAGATATATTTCGTAGTGTTGTTCCTTGTATGAAAAAAAGATTATAATGGGT +TATATAGTCTTATAGATGACAATAGGATTGTATTCTTATAATGTCTATAATAGGAAGTTG +ATGTGAGTAGATAAAAGTACAAAGAATACTGATTAAGGTTTAGAATAATTAAGGGTGAAC +CGTACCACAAGTAACGGAGGTTTTTTTGAAGCATAATTATAATCATACTATATAAGCAAA +TATAAATATTTAAGAGTGTAGAGCTTCATACTAATTAAAGCCAATATGAAGCTCTATACT +CTATAATAAATAGACTAAATCAATAAGAAAATAAATATTTTAAAATATTCCACAAAAAAA +TTGTAACTGCAATGCAAGCTGAACATTGATAAATTGTTTGATAGAATAAATCATTACGAC +CTTTTGCTTCTGACCAGTTAACAAATTTCCGTATTAATATAATCACGATAGGTAGAAGCA +GTAATTGTAAAAGAAATCAACTGTAACAGAGATTATTTAAATCTTGAAAATGGAATGCTC +AAAGGAAACTGTTAAAGTACTAAGAAAAGCAATAGAAAAATAATCAAATAAGAAGATTAT +GAATTGAGAAATTGAAGATGTGAGAGATATGTATAAGTGACACAGCAGGATAATATATAA +AAATAAATGGATGAAATACCAGGAGAAAGAAAAGTACCTGGTATTGTTTGTAATTCTAAT +ATTCCTTGCAAACAACATCTTTAAAGAGCTTATGATTATTAGGAGCATCATGTTCACTGC +ATGCTTGCTGAAGTTCGGGGATATCAGTAGTTCCAGCTTCTAAAGCAGTTTTGTAGTAAT +TACATTTATAATTTATTAACTCCAAAGATTTCTGAAGAATTTCAATTCGTTTTTCTGCTT +CTGCCTTTCTTTCTAAGAACATTTTATATCGTTTTTCAATACTAGAATCGCCTTCCACGT +ACCCTTCAAAAAATGTCTTGATATCTTTAATCTGCATTCCTGTATTTTTCAAACATTCAA +TCATATTGAGCCAGCTTATATCGTTGTCATTGAATCTTCGGATACCACGTTCTGTTCTTT +CTAATAAAGGAAGAAGTCCTTCTTTATCATAATAGCGCAGTGTTGGTACTGAAAGGTTCA +TGATTTTTGCCACTTCACCTATAGTATATTCCATTTGTAGGCTCCTTTCTAATTTTAGGT +ATTGACCTAAAGTTAACTTTAGGGTGTATTATATGAATAAGCTTAATCTAAATTTATTGA +AAAGTCAAGAAATATGGATTAAGAAAATATGAAACATAGCATAACAAATAAAATTTAGAG +CCGATTTAAGGAGGGAATAAAACGACCAGTTCAAATGATTAGTTATGGAAATTATAAAAA +GTTTAACAGAGAGGAAAGAGTTGTTATGATGAAACATGTAACATTTAAAAATGCTAATAT +AGAAATGAAGGGGGATTTATACACTCCAGAAAACTTTGATGAAAGCAAGAAATATCCAGC +AATTGTGGCTGTTCATCCAGGTGGAGCAGTAAAGGAACAAGTATCAGGTCTTTATTCGAA +GAGAATGGCAGAAATGGGATACATTGCACTAGCGTTTGACGCATCACATCAGGGAGAAAG +CGGTGGAGAACCTCGCTATCTTGAAAATCCTACAGAAAGAGTAGAAGATATTCGCAGTGC +TATAGACTATCTTAGTACACTTTCTTATGTTGATATGGAAAAGGTAGGAGTACTTGGTAT +CTGTGCAGGTGGTGGATACTCCATTAATGCTGCTTTGACAGAAAAAAGAATTAAGGCAGT +TGCTACAATAAGTGCATTTGATATTGGTGCTGGTTTTAGAGGAGATGGTATGCCAGGTGG +CTTAGAAGCAACATTAGAAACATTAAATAAGGTTGCAGCTATGCGTAGCGCACAGACCAA +CGGAGAAGATTCTTTATATATCACATATGTTCCTAATACAGAAGAGGAAGCAAAAGCTAA +TCCAATGGTGCTTATGAAAGAAGCATATGATTATTATCGTACACCTCGTGCACAACACCC +AAATTCAACAAATAAATTACTATTTACTAGTCTTGATAAGATTATAGCATTTTCAGCATT +CGGATTTGTGCCAACATTATTAACACAGCCTATTTTAGCAATAGTAGGAAGTGAAGCTAA +TACAAAAGGATTTAGCGAAAATGCAATTGAATTGGCAAATAGTCCAAAAGAATTATTTGA +AGTAAAAGGTGCGACACATATCGCAATGTATGATGTACCTGAGTATGTTAATCAAGCAGT +TAATAAACTTGGAGAGTTTTTTGGGGAAAATCTATAATTAAGGACAGCTTTAAAAAGGAA +GTACTTATAATATTTAATACATTGGCTATTATAAAGTTATCAGATGATAGAAAGTGTCAG +TTCTGAAATATTTCTAAATTTAAAAGATTATGTGATTGGACAACAAGCCTTAATTACCGG +TGAAACAGTGATTAAGGCTTTTTCTAGTGTAAAAAATATGGAAGATTACTATGCTGAGAA +AAAATGCTTTAAGACACTAAAATATCGGAAATATCAGCAATACATTAGAAGGCAGTATTG +TTTAAAATTAATGATTAGTTTGCATTTCTTTTATTATTTCTAAAAACTCAATCACATCCT +TGGATGGATTTAAGGGATAGAACAAGCCATAAGGTATAGTGAAATCCAAATCGCTAGGGA +TGGTCACTAATGAGGGATGAATATCTGACCAGGCATCTAATGTTAAGAGGATAAGGTTAT +TTTGCCCGCAGCGATTGAATACATCAATATCATAAAGACGAGGAACACTTTTAATATGAA +TTTCAGGATGGTTCTTTATGAAAAATTCACTGATTTCATCAATGGATGCACTAGTTCCAC +CTTTTATTACAACCAACTTTTCTCCATACAAATCTGTTACAGAAAGTTTTTTCTTAGAAG +CCAGCCGATGTTTGCGAGATACAGCAAAGCAAAAATGGTAAGTTCCTAGCTTAAGTTTAT +GGGAATAATCATTCCAATTTTCTGTGAGGAATGGTCCTACTATGAAATCAAAATGTTTGC +CAATGTTACGATAAGTGTTTGGTAATGTATATGCATCATCCTCAAAAGGGACAATTTTTA +TCTCGAACTGGGGATGTTTATTACTGATTTCATTCCAAAGATCCAGTAAAACCTTACAGG +GATTAAGTATAGAGGTCCCAACCCGAATAATATGCTTATCTACATGCGTTGTATTACGTG +TACGCAGTAGTGCTTCTTGAAAATACTGCAAAATAAATTCTGCATCCTTATTGAAGGATT +TTCCTGCATCGGTCAGTTCAACACCATGGTTGGTGCGAATGAAAAGAGGTACACCCACTT +CCTTCTCAAGGGAATTAATCTGTTTCATCACAGCCGTTGGGGAAACAAACAGTTTTTCAG +CAGCCTTGGAAAAGCTACCAGACTCTGCTACCTGAATAAAAGCATTCAATTGTTCATTCA +TTTAAAGAATCTCCTTTTTGCATAAACTTTTAGTTAATGCCATGCTAACCTATTTGAACT +TCCAAATCAAGCGATTATACATTAATATTAGAACTATACTAAAGAAACACAAGTTAAAAA +AAGGAGATGCTTACTATGAAAACAGCATTAGTTACTGGTTCCAACAAAGGAATAGGTTTT +GAAATTTCTAAAAAATTGTTAAAAGAAGGATATCACGTTTTAGTTGGTGCTCGAGATGAA +AGCCGTGGCAAGAATGCAATAACAAAACTCGCTAAGTATGGTCCTGTTGATTTAATAAAA +ATAGATTATTCAGATAGTGAAAGTATAAAAGAGGCAGTACAGCGAGTTTCAAATGAATAT +AAAAAGCTTAATTTATTGGTTAATAATGCAGGCATACCTGGACCACTTATTAAACGCCCT +AGCTGGGAATTTACAAAAGAAGAGCTTTTAGAAACTTATACAGTAGATTTTCTTGGTCCA +TTTGAGCTGTCGAAAGGATTATTACCCATTCTTATAGAGAACCATGGCAAAATAGTCAAT +GTTTCAATTCCAATTGAACCAATGCCATTTCCTAATATGAACCCTTTTGCATATCTAACT +GGAAAAGCACCACTTAATATAATGACAAAGTCTTGGGGAATTAGCATTGATGAGATTGCT +CTTCCGGTACAGATATTTGCAGTGATGCCCGGTGCCGTTTCAACTGATTTGAATAATCAT +ACAACAGGAAAGGGTGTAAAAATGCCTGAAGAAGCTGCAGAATGGATTGCAGGTTTAGCT +TTGGATGAAAAAAATCACAATGGTCAAGTTATCAATTTTGAAGGTAAGCTAGCTGACTAT +AAAAATTTAGTATAGTTTATTTTGTCAATAAAAAAGACACTGGCAAATGGACTTGTTATT +TGATTGTGCCTAAATATAATAAGAGAGTAAGGAAAATGGAGTATCACTTATTAATGGATA +CGGTGAATCATAATTCCTTCTCTTATTTTCTCTTCATTAATATTTGTATAGTTCAAGCGT +AAGGTATTTTTATGACCGCCATTTGGGAAAAATGCTTCGCCTGAAATAAAAGCTATACCA +TTTTTTTAATGCACATTTTAATAATTCAAAGTGGTACTCCGTCTAGACATCAGATAAATA +AATGAATTAGAAGTTTAAAATCGGGAACTGATAATGCACAATATATTAGGAGAGATGCCA +TAACAATATTGAATAACTTTCTATGATGGCTAAGAATTTTTTGAAATAATGCACCAAAAA +GAGTCCAGCAGGACATTGATATAAATCCTACGAATGCAAGGAATATAGCAAATAAGTATA +ATATTATGCTCGATTTATAATAGGGGATTATAAAAATTGAAGTTGCCGTTATACCATAAA +GTATTGTTTTGGGATTCATGAATTGCATAAGAATTCCAGATTTAAATGTGTAGATGCTGT +AATCATTAATTGCATTTGGGTTAGAATTACTTTTGAGTATCTTGATAGCTAAATATATTA +TGTATGCTGAACTAAATACCTCCATAGACAACTTTATGTTAGGAAGAAGTTTAAATAAAA +ATAAGTTAAGATAACCACACAATAACATGATCACAGCCATACCAGTTGTAGCACCTAGAA +CAAATTTGAATGTTTTTTTATATCCAAAGTTAGTACCATTTGTCATAGACATAATATTAT +TTGGACCAGGTGTAAAAGTTGAAACAAGAACATAAGATAAAAGTGCAGATAAATTGAACA +TAGCTGACCTCCTTAATGATTTAATTTGAAAAGATAGTAGTAATATGATATCATGCTACT +AACAATTAGTGAAATCAATAGTTTAAATTGTAACAATCAAAATAAATGATTGATTTGGAG +GCATATACAAATGGAATTTCGTCAATTAAATGCATTTATTACCGTAGCTAAGCTGAGTAA +TTTTACAAAAGCAGCATTTGAACTAGGATATTCACAATCTGCTATCACTGCTCAGATACA +ACAGTTGGAAAAAGAATTAGGTGTTAATTTGTTTGAAAGATTAGGGAAAAATATATCTTT +GACTTCAGAAGGAGAACAGTTTCTTGTTTATGCTAAACAAATAATTAAGCTTTGTGATGA +AGCAAAAAGTAATTTAAGTACATCAGATGTAGTAAAGGGAACTTTGACAATAGGGGCTAA +TGAATCACTTTGCGCTGTTAGGCTTCCTCCTCTATTAAAAGAATTTCATGACCGCTATCC +GGAAATTGAAATCCTTTTAAAAATGGAAGGTAATAATAAATGTAAAACATTGATAAGAGA +AAATCAGATTGATGTTGCCTTTATTATTGGTCAGAAAATAAATGATTCTGATTTAATCAC +AGAATTGGAATTTCCTGAACCTTTAGTTTTATTAGCAATTCCAGGACATCCACTTGCTTT +CAAGAAACATGTTTATCCTGAAGATATTGCTGACTATAACATAATTGTTGCAGAAAAAGG +CTGCGGATACCGAAATCTTTTTGAACGAAGTCTCAATGATGCTGGGGTAACTCCGAAATC +AATAATGGAAATGGGAAGTATCCAATCAATCAAACAATTGACAATGAGCGGGCTTGGAAT +AACCCTGCTTCCCAAGATTGCTGCTCAGGAGGAGTTGAAACGAAAGCAATTAAGAGAACT +TCAGTGGTGGCAAGATTCCTTTTATTTGACTACGCAAATGGTATATCATAGAGATAAGTG +GGGTTCAAGGGCATTAAGAGCTTTTATAAATTTGAGTAAAGAAATGATGAATAGTTAATA +TAATATTTTGAATTCAAATAACAAAGGTATGTATTTTGTGTTCTTAGTGTTTTCTTATTT +AATTAAACATTAACATTTTTATATGACGTAACAGTTAAAAATGGAATGATAAGAAGCATC +TAAATAAGAATTCTTTATTTTTCAACGAAAGAACTTTTTATATAGATGATAAAGGAAAAA +GAGTTATCACTTGCTATTGATACGGTGAACCATGTCATAAGTGAGTGATAAATTTATTAA +GGCCAAATAATGAAAATGAATTAGAATAAAGGAACTTACCATATAAGAAAGTAATATGGT +AAGTTTTATTTAATTTATAGACAATATAGAGACAAACAGAATAGAAAATTCATTGCAAAG +AGCCTATAAAAAATGAAAAGAACACTATTTGATATAATGTTCTTTTCATCTTTTTAAAGT +ACCCATACCTGTAGAAACAAACCCAATAATCTCAGAAATTAGAACTTCTTGAGGGAGTGG +TCTATTCTGAACTTCCCATTTGTAAGTTACATTATAAATGGAAGTGCTCATCATTGTTGC +AACTAATTGGGCTCGTATTTGTTCATTTTCAGCTAATTCATGGTCTTTTATTAGCATCTC +ATAGATTATGGAATATAGCTCACAAGATATTTTATCTCCTATTAAAGGTAAAATAGAGGC +AAAACCGCACTTGCAGATGCTTTTTACCATTTCAAAGTACTCACAAACGCAAAGAATGAG +GCTACGTATAGTATTCTCATTTAGGCGCATCTCGCCTAGTAGTTTATGGGAAAAGATCTC +AGAGAATTTCTCTACTATGACGGTGTCCAGTAGTGCATATTTATCTGTAAAATGTGCATA +GAAGGTTGCACGGTTTATGGTGGCTGCGGTAGTAATATCCTTAACTGTAATGGAGTCAAA +GCCTTTTTTAGTAACTAGTGAAATAAAAGCGTTGATAATAAGATCATGGGTTCTTTTTAC +TCTAGGATTATTTATATTTACTGACATATCATTACCTCATTTAAAAGAATTTTAGTTAAA +CTCATATTGCAGATTTAGTTTTATGTAAGCAACACTTTATCAATTTTGTTGGTTGAAAAA +TGTATAAGCTAATTTTATAATCATTATAAGCAACACGAGTTGGCTTTGCAATATGTGTAG +CTATAATAATTGTTTGAAATAAATATTTATTTCTAGGGGGTTAAATATGTCAACTATAAA +TCCAAACTGTAAATGCATAAATAAAAGCTGTAAAAATCACGGGAATTGTAAAGCATGTCG +AGAGTTTCATAAATCACAATCATATCTTAATGAAACATATTGTCAGGCTGGTGCAGTTAA +ATTCATAACTAAAGGATTAATATCTAAGTTAAAGTTACATTCTTAGCACGCAACCTAGCA +ATTCAAATGCGGACTTACTATAAAATAAAAAGGGTGGGAGTGCTGAAATATGAATTTAGA +TAATCCGATTTTCAAGAAGCCAATTACAGAAGTTATTAAAGCAAGAACATCTATGAGGTC +ATATAATGGGGTGCCTTTGGACAAAACAATAGGCGATAGCATAATGGATGTCATAAATCA +GGTTAAAGCACCTTTTTGCACAAATATACGAGTTAAGATGATAAACTCTAAAGATTCAGA +TTTAAAGCTTGGAACCTATGGTATTATAAAAGGAACTTCTACCTTTATAGTCTCATCAAT +TTCAAAATCAGATAATGCATTAGTTGATTTAGGATATTTATTGGAAAGAGCAGTGCTGTA +TGCAACTGATTTAAATTTGGGTACATGTTGGCTAGGCGGTACCTTTAATAAAGGTCAATT +TGCAGAGGTTATGGAGCTTAAAGATAATGAAATCCTTCCAATAGTTATACCTGTAGGGTA +TCCTAGTGAAGCAAGAAGAGGTATGGATACTTTCGTTAGGTTTATGGCAGGTTCTAAAAA +TAGAAAAGCCTGGTCTAAACTATTCTTTGATGTAGATTTCAATACACCTTTAAAGGAGCT +GGAAAGCTTAGAGTATTTTATTCCCCTTCAAATGGTAAGGCTTGCGCCTTCAGCAGCAAA +CAAGCAACCTTGGAGAATAATTAAAAGTTCTAATTCATACGATTTCTATTTAGAACGAAG +TAAAGACTCAAAAAGTGATAGTTATAATGATATGCACAAGATAGATATTGGCATAGCTAT +GTGTCATTTTGAGTTAACTTCAGCAGAATTAGGGTTAGAGGGACATAGGGAGAAACTTAA +GCTTTCATCTCAAGATAATAAAAAATATATTATTTCTTGGGTTAGAAAATAGGGAAGCGA +ATTTGCTATAAATGAAATATATGTTGAATATGTAAGAGATATTGATTACTATAGCAAGTT +ATTTTCACATTTTTCTTCATGAACTTTTTTTCTTTCTTCGTTAATAATTTGCGCCGCGTT +AGGAGATGTATACTTCACTATTTTGGTATGAATAAGGTCTAAATTTTCATTGAGATCATT +TTAAAGGAGTTTTCTTTTTGACACAGAAACTAATTCCGCTTATTGCTGACAAAGGTGGGA +TTGTGAACACTTCAACAGGATTAACTCGTTTTACATCTCCAGGTTCCAGACTTTATGCTG +CTGCGAAAGGTTCAGTAGAAGTCTTCACTAGATATTTAGCAAAAGAATTAGGAACTAGAG +GTATAAGAGCGAATACGATAGCGCCTGGAGCTAGTAAAGGTGCCCCTTATTTTAGAAATC +GCTTCATTGATTTAGTTACTTATAAATTAGAGGATGAAGTAAATGTTACAGAAGGAGTAA +ATTCAGGATTAAGGAAAGATGCTGGCAAGCAATTTCTTGGGCAAGCTATAGTAGGATTGA +TAGAATGGTGGTTTATAAATGGAATGCCTTTTTCTCCTGATGTCTTGGTGGAACATTTAG +GAGAAGTGATAGAGAGAAATTTGTAATCATTAATTTAGACTGATTCAAATCCATTTAATT +ATAGATAGGAAATATTAAAGGAGGTTTAGAATAATTAATAGTGAACCATATCACAAATAA +AGGATAATGAGTAAGTTTGGGTAAAATTAGGGATAAGAAATAAATTTAATCCTAGTAAAG +CTTATTGTTGCATTACTAGGATATTTTAATACGTTCAATATTATAAAAATAGGTGCCAAA +ATTTTGACTCTTACGTTTAAATTGAAAAATTGTAATTGCTAGACTAAAGTATATTAAGGT +TAAAAAAATATAATTAGTGAGGTGAAATCAAATGTATATTATACTGATTGCAGGAATGCC +AGCAGTTGGTAAGACAACATTTGCTAACAAGGCATAGAGGTCACAGAATATCCTGAATCA +ATAAATCACAATGCTATTCCTGATCAAATAAAATGGGTTTCAGTGCAAATCACAAGCACT +GAAACCCATTTTAATTTGAAGCTTAACGTATATATTAAAATATCATTACTAATATTTTAG +GTAACAGCATGGAAACACCTACTGCATAAAGTAAATCAAGCCACATTACAGAACCAACCG +CGCCAATATAAATAAGACCAATGACAGGCGCTACTAATATTTTTATTAAAAATGATACTT +CTTTATTCTTTAAAATAGCATTTACAATACATTTTGCATCACCTGTACTTGGGAAGGCAT +GCATTAAAATAGAAAATCCAATCCAACAAATTAATAGATTAATTAAGTTTGAATATTGTC +CAAATTCTACTATTTCAATTGATACTGGTAATGTAATTAGAGCTCCTATGATAGTATTTA +TTAAAAATGGACCTACTGAAATAAAAAATACTTTTAATGGATTATCACTAGGTTCGTGCA +AAACATATCCACAAGGATTTGATAATTGGAAATATTTAACTTCATAGACTGGCACTCTCA +TTAGTCTGCAAAATATCTGATGTGCTAGTTCATGAACAATTACTCCAGGAAAGGTTACAA +TAGAAATTAAAAAACCAGGTATTATCATTAATTTTGCCCTCCATTACTTAGGTAATAATC +ATTTATATTTATTTTTCCATTAAAATAATCTAATGTAGTATTATCAAATTTGTTTCCTGT +AGAAAGATATTCATCTTTGATTTTATTAACTTCTTCAGTT diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/sim1_galaxy.fasta.stats --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/sim1_galaxy.fasta.stats Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,19 @@ +NODE_2_length_40000_cov_63.1617_ID_3 0.659475 63.1617 0.00130009750731305 0.00138760407030527 0.00163762282171163 0.00116258719403955 0.00220016501237593 0.00250018751406355 0.00296272220416531 0.00195014626096957 0.00261269595219641 0.00411280846063455 0.00402530189764232 0.00156261719628972 0.000825061879640973 0.00127509563217241 0.00152511438357877 0.000750056254219066 0.00188764157311798 0.00152511438357877 0.00240018001350101 0.00178763407255544 0.00430032252418931 0.00366277470810311 0.00590044253318999 0.00387529064679851 0.00348776158211866 0.0050628797159787 0.00410030752306423 0.00237517813836038 0.000700052503937795 0.00160012000900068 0.00266269970247769 0.00156261719628972 0.00272520439032927 0.00207515563667275 0.00182513688526639 0.00222516688751656 0.00538790409280696 0.00536290221766633 0.00685051378853414 0.00360027002025152 0.00376278220866565 0.00747556066705003 0.00377528314623597 0.00387529064679851 0.0011250843813286 0.00200015001125084 0.00145010875815686 0.00195014626096957 0.000537540315523664 0.00123759281946146 0.00100007500562542 0.000775058129359702 0.0037252793959547 0.00357526814511088 0.00613796034702603 0.00222516688751656 0.00272520439032927 0.00390029252193915 0.00410030752306423 0.00178763407255544 0.000312523439257944 0.00172512938470385 0.00117508813160987 0.00116258719403955 0.000762557191789384 0.00396279720979073 0.00390029252193915 0.00117508813160987 0.00151261344600845 0.00666299972497937 0.00388779158436883 0.00145010875815686 0.00231267345050879 0.0101132584943871 0.00932569942745706 0.00266269970247769 0.000762557191789384 0.00550041253093982 0.00472535440158012 0.00152511438357877 0.00213766032452434 0.00461284596344726 0.00952571442858214 0.00410030752306423 0.00356276720754057 0.00317523814286071 0.00700052503937795 0.00377528314623597 0.00802560192014401 0.0121259094432082 0.0114758606895517 0.00410030752306423 0.000975073130484786 0.00456284221316599 0.00932569942745706 0.00402530189764232 0.00573793034477586 0.00880066004950371 0.00772557941845638 0.00613796034702603 0.00585043878290872 0.0174763107233042 0.0122009150686301 0.00685051378853414 0.00503787784083806 0.0177888341625622 0.00700052503937795 0.00590044253318999 0.00187514063554767 0.00630047253544016 0.00388779158436883 0.00296272220416531 0.000162512188414131 0.00238767907593069 0.000150011250843813 0.00100007500562542 0.00186263969797735 0.00362527189539215 0.00772557941845638 0.00182513688526639 0.00345025876940771 0.00903817786333975 0.00952571442858214 0.00240018001350101 0.000262519688976673 0.00651298847413556 0.00390029252193915 0.00163762282171163 0.00305022876715754 0.00391279345950946 0.00651298847413556 0.00172512938470385 0.00295022126659499 0.00625046878515889 0.00630047253544016 0.00200015001125084 0.00311273345500913 0.00576293221991649 0.00456284221316599 0.00160012000900068 0.00153761532114909 0.00735055129134685 0.00550041253093982 0.00127509563217241 0.00370027752081406 0.00465034877615821 0.00903817786333975 0.00390029252193915 0.00866314973623022 0.00813811035827687 0.0177888341625622 0.00747556066705003 0.00891316848763657 0.0162762207165537 0.0121259094432082 0.0050628797159787 0.00121259094432082 0.00576293221991649 0.0101132584943871 0.00411280846063455 0.00406280471035328 0.00391279345950946 0.00362527189539215 0.00357526814511088 0.00645048378628397 0.0127759581968648 0.0174763107233042 0.00536290221766633 0.00253769032677451 0.00813811035827687 0.00317523814286071 0.00366277470810311 0.00232517438807911 0.00625046878515889 0.00666299972497937 0.00250018751406355 0.000500037502812711 0.00215016126209466 0.00238767907593069 0.00123759281946146 0.00271270345275896 0.00391279345950946 0.00880066004950371 0.00207515563667275 0.00272520439032927 0.00465034877615821 0.00461284596344726 0.00152511438357877 0.000350026251968898 0.00391279345950946 0.00396279720979073 0.00138760407030527 0.000375028127109533 0.000350026251968898 0.000262519688976673 0.000312523439257944 0.000950071255344151 0.00232517438807911 0.00187514063554767 0.0011250843813286 0.000812560942070655 0.00121259094432082 0.000975073130484786 0.000700052503937795 0.000425031877390804 0.00153761532114909 0.000762557191789384 0.000825061879640973 0.00207515563667275 0.00272520439032927 0.00345025876940771 0.00272520439032927 0.00383778783408756 0.00253769032677451 0.00503787784083806 0.00376278220866565 0.00797559816986274 0.00891316848763657 0.00802560192014401 0.00348776158211866 0.000812560942070655 0.00311273345500913 0.00231267345050879 0.00261269595219641 0.002675200640048 0.00271270345275896 0.00186263969797735 0.0037252793959547 0.00360027002025152 0.00645048378628397 0.00585043878290872 0.00538790409280696 0.00383778783408756 0.00866314973623022 0.00356276720754057 0.00430032252418931 0.000950071255344151 0.00295022126659499 0.00151261344600845 0.00220016501237593 0.000100007500562542 0.000500037502812711 0.000162512188414131 0.000537540315523664 0.002675200640048 0.00406280471035328 0.00573793034477586 0.00272520439032927 0.00207515563667275 0.00370027752081406 0.00213766032452434 0.00188764157311798 0.000375028127109533 0.00305022876715754 0.000762557191789384 0.00130009750731305 sim1_galaxy +NODE_3_length_40000_cov_63.0619_ID_5 0.337525 63.0619 0.0182888716653749 0.00616296222216666 0.00946320974073056 0.0132134910118259 0.00581293597019776 0.00292521939145436 0.00257519313948546 0.00643798284871365 0.00778808410630797 0.005625421906643 0.00353776533239993 0.00642548191114334 0.0113633522514189 0.00582543690776808 0.00646298472385429 0.0148761157086782 0.00511288346625997 0.0016876265719929 0.00256269220191514 0.00378778408380629 0.00481286096457234 0.00111258344375828 0.00132509938245368 0.00245018376378228 0.0019001425106883 0.0014751106332975 0.0011250843813286 0.00247518563892292 0.0049253694027052 0.00208765657424307 0.00352526439482961 0.00642548191114334 0.00722554191564367 0.00227517063779783 0.00243768282621197 0.0054129059679476 0.00563792284421332 0.00275020626546991 0.00193764532339925 0.00475035627672075 0.00317523814286071 0.00150011250843813 0.00127509563217241 0.00245018376378228 0.00568792659449459 0.0020376528239618 0.00280021001575118 0.00643798284871365 0.010150761307098 0.00351276345725929 0.00341275595669675 0.00897567317548816 0.00691301847638573 0.00310023251743881 0.00230017251293847 0.0054129059679476 0.00621296597244793 0.00361277095782184 0.00395029627222042 0.00378778408380629 0.012625946946021 0.00570042753206491 0.00698802410180764 0.0132134910118259 0.00712553441508113 0.00390029252193915 0.00407530564792359 0.00698802410180764 0.00181263594769608 0.00237517813836038 0.00135010125759432 0.00280021001575118 0.00300022501687627 0.00351276345725929 0.00171262844713353 0.00352526439482961 0.00440033002475186 0.00416281221091582 0.00255019126434483 0.00646298472385429 0.0045003375253144 0.00206265469910243 0.00327524564342326 0.00395029627222042 0.00176263219741481 0.000837562817211291 0.000537540315523664 0.00127509563217241 0.00103757781833638 0.000950071255344151 0.000500037502812711 0.0011250843813286 0.00226266970022752 0.000887566567492562 0.00171262844713353 0.00353776533239993 0.00186263969797735 0.000850063754781609 0.000775058129359702 0.00230017251293847 0.001337600320024 0.000775058129359702 0.000575043128234618 0.00193764532339925 0.000962572192914469 0.00078755906693002 0.000537540315523664 0.00132509938245368 0.00176263219741481 0.00128759656974273 0.00135010125759432 0.00257519313948546 0.00701302597694827 0.00261269595219641 0.00257519313948546 0.00341275595669675 0.00223766782508688 0.00161262094657099 0.000775058129359702 0.00243768282621197 0.00326274470585294 0.00265019876490737 0.00327524564342326 0.00256269220191514 0.00552541440608046 0.00431282346175963 0.00407530564792359 0.00946320974073056 0.00820061504612846 0.00280021001575118 0.00431282346175963 0.00570042753206491 0.00198764907368053 0.00137510313273496 0.00128759656974273 0.0020376528239618 0.00228767157536815 0.00178763407255544 0.000887566567492562 0.00208765657424307 0.00528789659224442 0.00245018376378228 0.00416281221091582 0.00582543690776808 0.00553791534365077 0.00177513313498512 0.00265019876490737 0.00361277095782184 0.0028127109533215 0.000875065629922244 0.00078755906693002 0.00150011250843813 0.00127509563217241 0.000925069380203515 0.000950071255344151 0.0014751106332975 0.0041503112733455 0.00178763407255544 0.00351276345725929 0.005625421906643 0.00420031502362677 0.00121259094432082 0.00161262094657099 0.00310023251743881 0.00195014626096957 0.000500037502812711 0.000775058129359702 0.00275020626546991 0.00158761907143036 0.000875065629922244 0.000837562817211291 0.00111258344375828 0.0030252268920169 0.00137510313273496 0.00237517813836038 0.00292521939145436 0.00613796034702603 0.00175013125984449 0.00261269595219641 0.00351276345725929 0.00235017626321974 0.00121259094432082 0.000850063754781609 0.00227517063779783 0.0028127109533215 0.00177513313498512 0.00206265469910243 0.0016876265719929 0.00488786658999425 0.00280021001575118 0.00390029252193915 0.00616296222216666 0.0135135135135135 0.00488786658999425 0.00552541440608046 0.012625946946021 0.00353776533239993 0.0030252268920169 0.00176263219741481 0.00568792659449459 0.00427532064904868 0.0041503112733455 0.00226266970022752 0.0049253694027052 0.00500037502812711 0.00528789659224442 0.00440033002475186 0.0113633522514189 0.00693802035152636 0.0028127109533215 0.00326274470585294 0.00621296597244793 0.00440033002475186 0.00158761907143036 0.000962572192914469 0.00317523814286071 0.00157511813386004 0.00127509563217241 0.00103757781833638 0.0019001425106883 0.00427532064904868 0.00228767157536815 0.00300022501687627 0.00778808410630797 0.00772557941845638 0.00235017626321974 0.00223766782508688 0.00691301847638573 0.00465034877615821 0.00195014626096957 0.001337600320024 0.00563792284421332 0.00440033002475186 0.0028127109533215 0.00176263219741481 0.00481286096457234 0.00353776533239993 0.00198764907368053 0.00181263594769608 0.00581293597019776 0.0132509938245368 0.00613796034702603 0.00701302597694827 0.010150761307098 0.00772557941845638 0.00420031502362677 0.00186263969797735 0.00722554191564367 0.00693802035152636 0.00553791534365077 0.0045003375253144 0.00511288346625997 0.0135135135135135 0.00820061504612846 0.00712553441508113 0.0182888716653749 sim1_galaxy +NODE_10_length_39995_cov_63.156_ID_19 0.413826728341043 63.156 0.0113147629525905 0.00603870774154831 0.00798909781956391 0.00895179035807162 0.00562612522504501 0.00370074014802961 0.0030131026205241 0.00402580516103221 0.00691388277655531 0.00430086017203441 0.00370074014802961 0.00506351270254051 0.00813912782556511 0.00512602520504101 0.00655131026205241 0.00730146029205841 0.00578865773154631 0.0025505101020204 0.00408831766353271 0.00437587517503501 0.00412582516503301 0.0017378475695139 0.00381326265253051 0.00323814762952591 0.0024254850970194 0.0018253650730146 0.00327565513102621 0.0023504700940188 0.0024629925985197 0.0020254050810162 0.00372574514902981 0.00506351270254051 0.00738897779555911 0.0025255051010202 0.0022379475895179 0.00410082016403281 0.00428835767153431 0.00342568513702741 0.0020254050810162 0.002875575115023 0.00427585517103421 0.0023629725945189 0.0012627525505101 0.00323814762952591 0.00383826765353071 0.0022379475895179 0.0031756351270254 0.00402580516103221 0.00748899779955991 0.00542608521704341 0.00420084016803361 0.00690138027605521 0.00577615523104621 0.00431336267253451 0.0028005601120224 0.00410082016403281 0.00667633526705341 0.00445089017803561 0.00553860772154431 0.00437587517503501 0.00595119023804761 0.00613872774554911 0.00607621524304861 0.00895179035807162 0.00673884776955391 0.00380076015203041 0.00367573514702941 0.00607621524304861 0.00303810762152431 0.00316313262652531 0.002000400080016 0.0031756351270254 0.00425085017003401 0.00378825765153031 0.00338817763552711 0.00372574514902981 0.00517603520704141 0.00506351270254051 0.00425085017003401 0.00655131026205241 0.00537607521504301 0.002875575115023 0.0027005401080216 0.00553860772154431 0.002750550110022 0.0013002600520104 0.0019378875775155 0.0012627525505101 0.0028880776155231 0.0025380076015203 0.00412582516503301 0.00327565513102621 0.0022254450890178 0.0018253650730146 0.00338817763552711 0.00370074014802961 0.00337567513502701 0.0013377675535107 0.000612622524504901 0.0028005601120224 0.002375475095019 0.0019003800760152 0.000925185037007402 0.0020254050810162 0.00438837767553511 0.0026880376075215 0.0019378875775155 0.00381326265253051 0.0029255851170234 0.0019378875775155 0.002000400080016 0.0030131026205241 0.0022254450890178 0.0030256051210242 0.000675135027005401 0.00420084016803361 0.0020754150830166 0.0030881176235247 0.000612622524504901 0.0022379475895179 0.00405081016203241 0.00431336267253451 0.0027005401080216 0.00408831766353271 0.0025755151030206 0.00573864772954591 0.00367573514702941 0.00798909781956391 0.00908931786357271 0.00391328265653131 0.00573864772954591 0.00613872774554911 0.00358821764352871 0.0016878375675135 0.0019378875775155 0.0022379475895179 0.0024754950990198 0.0016878375675135 0.0018253650730146 0.0020254050810162 0.00497599519903981 0.0018253650730146 0.00506351270254051 0.00512602520504101 0.00438837767553511 0.0028130626125225 0.00431336267253451 0.00445089017803561 0.00455091018203641 0.0015753150630126 0.0026880376075215 0.0023629725945189 0.0014627925585117 0.0014002800560112 0.0025380076015203 0.0018253650730146 0.0028380676135227 0.0016878375675135 0.00378825765153031 0.00430086017203441 0.00658881776355271 0.002375475095019 0.0030881176235247 0.00431336267253451 0.00467593518703741 0.0011752350470094 0.0019003800760152 0.00342568513702741 0.0026380276055211 0.0015753150630126 0.0013002600520104 0.0017378475695139 0.00436337267453491 0.0016878375675135 0.00316313262652531 0.00370074014802961 0.00450090018003601 0.0027255451090218 0.0030256051210242 0.00542608521704341 0.00322564512902581 0.002375475095019 0.0013377675535107 0.0025255051010202 0.0031381276255251 0.0028130626125225 0.002875575115023 0.0025505101020204 0.0026130226045209 0.00391328265653131 0.00380076015203041 0.00603870774154831 0.00715143028605721 0.0026130226045209 0.0025755151030206 0.00595119023804761 0.00455091018203641 0.00436337267453491 0.0029255851170234 0.00383826765353071 0.0026130226045209 0.0028380676135227 0.0022254450890178 0.0024629925985197 0.00572614522904581 0.00497599519903981 0.00517603520704141 0.00813912782556511 0.00473844768953791 0.0031381276255251 0.00405081016203241 0.00667633526705341 0.00506351270254051 0.0026380276055211 0.00438837767553511 0.00427585517103421 0.0013502700540108 0.0014627925585117 0.0028880776155231 0.0024254850970194 0.0026130226045209 0.0024754950990198 0.00425085017003401 0.00691388277655531 0.00752650530106021 0.00322564512902581 0.0020754150830166 0.00577615523104621 0.00462592518503701 0.00467593518703741 0.002375475095019 0.00428835767153431 0.00506351270254051 0.00455091018203641 0.002750550110022 0.00412582516503301 0.00455091018203641 0.00358821764352871 0.00303810762152431 0.00562612522504501 0.00407581516303261 0.00450090018003601 0.0022254450890178 0.00748899779955991 0.00752650530106021 0.00658881776355271 0.00337567513502701 0.00738897779555911 0.00473844768953791 0.00438837767553511 0.00537607521504301 0.00578865773154631 0.00715143028605721 0.00908931786357271 0.00673884776955391 0.0113147629525905 sim1_galaxy +NODE_12_length_39995_cov_63.3136_ID_23 0.308388548568571 63.3136 0.0162407481496299 0.00576365273054611 0.00773904780956191 0.0158906781356271 0.00588867773554711 0.0027005401080216 0.001625325065013 0.00673884776955391 0.00741398279655931 0.00376325265053011 0.00338817763552711 0.00655131026205241 0.0148529705941188 0.00575115023004601 0.00716393278655731 0.0143528705741148 0.00570114022804561 0.0017753550710142 0.0027130426085217 0.00423834766953391 0.00390078015603121 0.001000200040008 0.000875175035007001 0.0025880176035207 0.0012627525505101 0.000662632526505301 0.000825165033006601 0.001000200040008 0.00551360272054411 0.0022004400880176 0.0029630926185237 0.00655131026205241 0.00803910782156431 0.0016128225645129 0.00307561512302461 0.00621374274854971 0.00425085017003401 0.0017878575715143 0.0011877375475095 0.00420084016803361 0.00358821764352871 0.00125025005001 0.0012627525505101 0.0025880176035207 0.00631376275255051 0.0016503300660132 0.0025255051010202 0.00673884776955391 0.0142153430686137 0.00517603520704141 0.00683886777355471 0.0150280056011202 0.00631376275255051 0.00346319263852771 0.0016878375675135 0.00621374274854971 0.00641378275655131 0.00362572514502901 0.00458841768353671 0.00423834766953391 0.0136027205441088 0.00648879775955191 0.00613872774554911 0.0158906781356271 0.00687637527505501 0.00333816763352671 0.00327565513102621 0.00613872774554911 0.0018128625725145 0.0021504300860172 0.000475095019003801 0.0025255051010202 0.00303810762152431 0.0024504900980196 0.001625325065013 0.0029630926185237 0.00551360272054411 0.00401330266053211 0.0021754350870174 0.00716393278655731 0.00408831766353271 0.0016003200640128 0.0019628925785157 0.00458841768353671 0.0018503700740148 0.0010252050410082 0.000412582516503301 0.0012627525505101 0.000812662532506501 0.000737647529505901 0.000600120024004801 0.000825165033006601 0.0022879575915183 0.0013877775555111 0.001625325065013 0.00338817763552711 0.001625325065013 0.000312562512502501 0.000562612522504501 0.0016878375675135 0.000987697539507902 0.000587617523504701 0.0002250450090018 0.0011877375475095 0.0010127025405081 0.000675135027005401 0.000412582516503301 0.000875175035007001 0.0010502100420084 0.000600120024004801 0.000475095019003801 0.001625325065013 0.00531356271254251 0.0027755551110222 0.0022754550910182 0.00683886777355471 0.0019128825765153 0.0023504700940188 0.000562612522504501 0.00307561512302461 0.0029755951190238 0.0024254850970194 0.0019628925785157 0.0027130426085217 0.00557611522304461 0.00452590518103621 0.00327565513102621 0.00773904780956191 0.00796409281856371 0.0026630326065213 0.00452590518103621 0.00648879775955191 0.0014627925585117 0.000812662532506501 0.000600120024004801 0.0016503300660132 0.0028630726145229 0.0014502900580116 0.0013877775555111 0.0022004400880176 0.00616373274654931 0.001750350070014 0.00401330266053211 0.00575115023004601 0.00407581516303261 0.0013377675535107 0.0024254850970194 0.00362572514502901 0.0017378475695139 0.000662632526505301 0.000675135027005401 0.00125025005001 0.0010377075415083 0.000550110022004401 0.000737647529505901 0.000662632526505301 0.00377575515103021 0.0014502900580116 0.0024504900980196 0.00376325265053011 0.00446339267853571 0.000937687537507501 0.0023504700940188 0.00346319263852771 0.0015503100620124 0.000400080016003201 0.000587617523504701 0.0017878575715143 0.0018628725745149 0.000662632526505301 0.0010252050410082 0.001000200040008 0.0027005401080216 0.000812662532506501 0.0021504300860172 0.0027005401080216 0.00527605521104221 0.0021004200840168 0.0027755551110222 0.00517603520704141 0.0016628325665133 0.000937687537507501 0.000312562512502501 0.0016128225645129 0.002250450090018 0.0013377675535107 0.0016003200640128 0.0017753550710142 0.00518853770754151 0.0026630326065213 0.00333816763352671 0.00576365273054611 0.0145529105821164 0.00518853770754151 0.00557611522304461 0.0136027205441088 0.00526355271054211 0.0027005401080216 0.0010502100420084 0.00631376275255051 0.00562612522504501 0.00377575515103021 0.0022879575915183 0.00551360272054411 0.0147279455891178 0.00616373274654931 0.00551360272054411 0.0148529705941188 0.00576365273054611 0.002250450090018 0.0029755951190238 0.00641378275655131 0.00475095019003801 0.0018628725745149 0.0010127025405081 0.00358821764352871 0.0010752150430086 0.0010377075415083 0.000812662532506501 0.0012627525505101 0.00562612522504501 0.0028630726145229 0.00303810762152431 0.00741398279655931 0.00751400280056011 0.0016628325665133 0.0019128825765153 0.00631376275255051 0.00467593518703741 0.0015503100620124 0.000987697539507902 0.00425085017003401 0.00475095019003801 0.0017378475695139 0.0018503700740148 0.00390078015603121 0.00526355271054211 0.0014627925585117 0.0018128625725145 0.00588867773554711 0.0141028205641128 0.00527605521104221 0.00531356271254251 0.0142153430686137 0.00751400280056011 0.00446339267853571 0.001625325065013 0.00803910782156431 0.00576365273054611 0.00407581516303261 0.00408831766353271 0.00570114022804561 0.0145529105821164 0.00796409281856371 0.00687637527505501 0.0162407481496299 sim1_galaxy +NODE_18_length_37282_cov_67.8523_ID_35 0.353280403411834 67.8523 0.0138683977574506 0.00527106413798653 0.00855709648863972 0.0111188604844551 0.00649159043965772 0.00274953727299552 0.0027093001421712 0.00789989001850908 0.00871804501193702 0.00515035274551356 0.00391641406690094 0.00748410633332439 0.00957643713618928 0.00551248692293248 0.00694761125566673 0.0136269749725046 0.00629040478553609 0.00198503178733335 0.00272271251911264 0.0038091150513694 0.00482845569891896 0.00122052630167118 0.00140829957885136 0.00295072292711714 0.00207891842592344 0.00154242334826578 0.00105957777837388 0.00182408326403605 0.00563319831540546 0.00229351645698651 0.00403712545937391 0.00748410633332439 0.00761823010273881 0.0025483516188739 0.00221304219533786 0.00519058987633788 0.00592827060811717 0.0026154135035811 0.00179725851015317 0.00450655865232436 0.00278977440381984 0.00229351645698651 0.00100592827060812 0.00295072292711714 0.00602215724670726 0.00283001153464417 0.00269588776522975 0.00789989001850908 0.00846320985004963 0.00352745513559913 0.00287024866546849 0.00726950830226133 0.00689396174790096 0.00278977440381984 0.00226669170310362 0.00519058987633788 0.00549907454599104 0.00335309423536039 0.00411759972102256 0.0038091150513694 0.0110652109766893 0.00561978593846401 0.00785965288768476 0.0111188604844551 0.00714879690978835 0.00505646610692347 0.00523082700716221 0.00785965288768476 0.00193138227956758 0.00262882588052255 0.00107299015531533 0.00269588776522975 0.00323238284288742 0.00344698087395048 0.002065506048982 0.00403712545937391 0.0037420531666622 0.00405053783631535 0.00203868129509912 0.00694761125566673 0.00508329086080635 0.00195820703345047 0.00340674374312616 0.00411759972102256 0.00195820703345047 0.000844979747310818 0.000496257946833338 0.00100592827060812 0.000938866385900909 0.00112663966308109 0.000509670323774779 0.00105957777837388 0.0022264545722793 0.000831567370369377 0.002065506048982 0.00391641406690094 0.00195820703345047 0.00100592827060812 0.000657206470130637 0.00226669170310362 0.00131441294026127 0.00134123769414416 0.000509670323774779 0.00179725851015317 0.000912041632018026 0.000818154993427935 0.000496257946833338 0.00140829957885136 0.00168995949462164 0.001032753024491 0.00107299015531533 0.0027093001421712 0.00694761125566673 0.00316532095818021 0.00289707341935138 0.00287024866546849 0.00232034121086939 0.00178384613321173 0.000657206470130637 0.00221304219533786 0.00351404275865769 0.00313849620429733 0.00340674374312616 0.00272271251911264 0.00701467314037394 0.00446632152150004 0.00523082700716221 0.00855709648863972 0.00708173502508115 0.00350063038171625 0.00446632152150004 0.00561978593846401 0.00221304219533786 0.00134123769414416 0.001032753024491 0.00283001153464417 0.00193138227956758 0.00191796990262614 0.000831567370369377 0.00229351645698651 0.00454679578314869 0.00303119718876579 0.00405053783631535 0.00551248692293248 0.00610263150835591 0.00194479465650903 0.00313849620429733 0.00335309423536039 0.00350063038171625 0.000751093108720728 0.000818154993427935 0.00229351645698651 0.00152901097132434 0.000777917862603611 0.00112663966308109 0.00154242334826578 0.00433219775208562 0.00191796990262614 0.00344698087395048 0.00515035274551356 0.00362134177418922 0.00127417580943695 0.00178384613321173 0.00278977440381984 0.00262882588052255 0.000777917862603611 0.00134123769414416 0.0026154135035811 0.00148877384050001 0.000751093108720728 0.000844979747310818 0.00122052630167118 0.00368840365889643 0.00134123769414416 0.00262882588052255 0.00274953727299552 0.00650500281659916 0.00236057834169371 0.00316532095818021 0.00352745513559913 0.00258858874969822 0.00127417580943695 0.00100592827060812 0.0025483516188739 0.00244105260334236 0.00194479465650903 0.00195820703345047 0.00198503178733335 0.00602215724670726 0.00350063038171625 0.00505646610692347 0.00527106413798653 0.0107164891762118 0.00602215724670726 0.00701467314037394 0.0110652109766893 0.00415783685184688 0.00368840365889643 0.00168995949462164 0.00602215724670726 0.00368840365889643 0.00433219775208562 0.0022264545722793 0.00563319831540546 0.00426513586737842 0.00454679578314869 0.0037420531666622 0.00957643713618928 0.00781941575686043 0.00244105260334236 0.00351404275865769 0.00549907454599104 0.00427854824431986 0.00148877384050001 0.000912041632018026 0.00278977440381984 0.00134123769414416 0.00152901097132434 0.000938866385900909 0.00207891842592344 0.00368840365889643 0.00193138227956758 0.00323238284288742 0.00871804501193702 0.00747069395638295 0.00258858874969822 0.00232034121086939 0.00689396174790096 0.00466750717562166 0.00262882588052255 0.00131441294026127 0.00592827060811717 0.00427854824431986 0.00350063038171625 0.00195820703345047 0.00482845569891896 0.00415783685184688 0.00221304219533786 0.00193138227956758 0.00649159043965772 0.0129027066176668 0.00650500281659916 0.00694761125566673 0.00846320985004963 0.00747069395638295 0.00362134177418922 0.00195820703345047 0.00761823010273881 0.00781941575686043 0.00610263150835591 0.00508329086080635 0.00629040478553609 0.0107164891762118 0.00708173502508115 0.00714879690978835 0.0138683977574506 sim1_galaxy +NODE_5_length_39999_cov_63.277_ID_9 0.373284332108303 63.277 0.0193769376937694 0.0069006900690069 0.0091009100910091 0.0130513051305131 0.00741324132413241 0.00387538753875388 0.00431293129312931 0.00398789878987899 0.00758825882588259 0.00542554255425543 0.00397539753975398 0.00536303630363036 0.00763826382638264 0.0069006900690069 0.00657565756575658 0.0135763576357636 0.00716321632163216 0.00212521252125213 0.00342534253425343 0.00365036503650365 0.00482548254825483 0.00111261126112611 0.00188768876887689 0.00273777377737774 0.00353785378537854 0.00166266626662666 0.00183768376837684 0.00252525252525253 0.00301280128012801 0.00145014501450145 0.0028002800280028 0.00536303630363036 0.00815081508150815 0.00196269626962696 0.00216271627162716 0.00382538253825383 0.00442544254425443 0.00292529252925293 0.00286278627862786 0.00357535753575358 0.00367536753675368 0.00211271127112711 0.00136263626362636 0.00273777377737774 0.00343784378437844 0.00265026502650265 0.00255025502550255 0.00398789878987899 0.00771327132713271 0.00295029502950295 0.00253775377537754 0.00457545754575458 0.00681318131813181 0.00308780878087809 0.00355035503550355 0.00382538253825383 0.00596309630963096 0.00311281128112811 0.00431293129312931 0.00365036503650365 0.0078007800780078 0.00547554755475548 0.00836333633363336 0.0130513051305131 0.00836333633363336 0.00472547254725473 0.00428792879287929 0.00836333633363336 0.00251275127512751 0.00265026502650265 0.00162516251625163 0.00255025502550255 0.00326282628262826 0.00343784378437844 0.00207520752075208 0.0028002800280028 0.00323782378237824 0.00422542254225423 0.003000300030003 0.00657565756575658 0.00562556255625563 0.00232523252325233 0.00251275127512751 0.00431293129312931 0.00228772877287729 0.00123762376237624 0.000975097509750975 0.00136263626362636 0.00196269626962696 0.002000200020002 0.000425042504250425 0.00183768376837684 0.00221272127212721 0.00162516251625163 0.00207520752075208 0.00397539753975398 0.00325032503250325 0.0021002100210021 0.00122512251225123 0.00355035503550355 0.00206270627062706 0.00202520252025203 0.0009000900090009 0.00286278627862786 0.0017001700170017 0.00166266626662666 0.000975097509750975 0.00188768876887689 0.00205020502050205 0.00157515751575158 0.00162516251625163 0.00431293129312931 0.00497549754975498 0.00193769376937694 0.00202520252025203 0.00253775377537754 0.00198769876987699 0.00163766376637664 0.00122512251225123 0.00216271627162716 0.00283778377837784 0.0028002800280028 0.00251275127512751 0.00342534253425343 0.00345034503450345 0.00551305130513051 0.00428792879287929 0.0091009100910091 0.00895089508950895 0.00365036503650365 0.00551305130513051 0.00547554755475548 0.00315031503150315 0.00173767376737674 0.00157515751575158 0.00265026502650265 0.00225022502250225 0.00168766876687669 0.00162516251625163 0.00145014501450145 0.0038003800380038 0.00235023502350235 0.00422542254225423 0.0069006900690069 0.00526302630263026 0.00171267126712671 0.0028002800280028 0.00311281128112811 0.00411291129112911 0.00133763376337634 0.00166266626662666 0.00211271127112711 0.00231273127312731 0.00187518751875188 0.002000200020002 0.00166266626662666 0.00325032503250325 0.00168766876687669 0.00343784378437844 0.00542554255425543 0.00431293129312931 0.00206270627062706 0.00163766376637664 0.00308780878087809 0.00287528752875288 0.0014001400140014 0.00202520252025203 0.00292529252925293 0.00217521752175218 0.00133763376337634 0.00123762376237624 0.00111261126112611 0.0023002300230023 0.00173767376737674 0.00265026502650265 0.00387538753875388 0.00456295629562956 0.00162516251625163 0.00193769376937694 0.00295029502950295 0.00298779877987799 0.00206270627062706 0.0021002100210021 0.00196269626962696 0.00317531753175318 0.00171267126712671 0.00232523252325233 0.00212521252125213 0.00431293129312931 0.00365036503650365 0.00472547254725473 0.0069006900690069 0.0117386738673867 0.00431293129312931 0.00345034503450345 0.0078007800780078 0.00328782878287829 0.0023002300230023 0.00205020502050205 0.00343784378437844 0.003000300030003 0.00325032503250325 0.00221272127212721 0.00301280128012801 0.0031003100310031 0.0038003800380038 0.00323782378237824 0.00763826382638264 0.00768826882688269 0.00317531753175318 0.00283778377837784 0.00596309630963096 0.00355035503550355 0.00217521752175218 0.0017001700170017 0.00367536753675368 0.0023002300230023 0.00231273127312731 0.00196269626962696 0.00353785378537854 0.003000300030003 0.00225022502250225 0.00326282628262826 0.00758825882588259 0.00787578757875788 0.00298779877987799 0.00198769876987699 0.00681318131813181 0.00352535253525353 0.00287528752875288 0.00206270627062706 0.00442544254425443 0.00355035503550355 0.00411291129112911 0.00228772877287729 0.00482548254825483 0.00328782878287829 0.00315031503150315 0.00251275127512751 0.00741324132413241 0.0100510051005101 0.00456295629562956 0.00497549754975498 0.00771327132713271 0.00787578757875788 0.00431293129312931 0.00325032503250325 0.00815081508150815 0.00768826882688269 0.00526302630263026 0.00562556255625563 0.00716321632163216 0.0117386738673867 0.00895089508950895 0.00836333633363336 0.0193769376937694 sim1_galaxy +NODE_16_length_39898_cov_63.337_ID_31 0.527043962103364 63.337 0.00670510088983582 0.00579019927309187 0.0046371725780173 0.00506329113924051 0.00409825792705853 0.00402306053390149 0.00553954129590174 0.00340894848978569 0.00357187617495927 0.0044491790951247 0.00274470485023186 0.00253164556962025 0.00349667878180223 0.00477503446547186 0.00393533024188495 0.00308309311943853 0.00305802732171951 0.00215565860383507 0.00327108660233112 0.00285750093996741 0.00518862012783557 0.00265697455821531 0.00506329113924051 0.00295776413084346 0.00334628399548816 0.0072690813385136 0.00359694197267828 0.00298282992856248 0.00131595438024815 0.00216819150269457 0.00419852111793458 0.00253164556962025 0.00355934327609976 0.00256924426619877 0.0022308559969921 0.00330868529890964 0.00487529765634791 0.00392279734302544 0.00637924551948866 0.00305802732171951 0.00225592179471112 0.004035593432761 0.00165434264945482 0.00295776413084346 0.00215565860383507 0.00185486903120692 0.00279483644566988 0.00340894848978569 0.00402306053390149 0.00279483644566988 0.00196766512094247 0.00391026444416594 0.00575260057651335 0.00402306053390149 0.00563980448677779 0.00330868529890964 0.00431131720767013 0.00483769895976939 0.00427371851109162 0.00285750093996741 0.00288256673768643 0.00327108660233112 0.00407319212933952 0.00506329113924051 0.00536408071186866 0.00451184358942223 0.00186740193006642 0.00407319212933952 0.00254417846847976 0.00541421230730668 0.00388519864644692 0.00279483644566988 0.0046371725780173 0.00840957513472866 0.00572753477879433 0.00419852111793458 0.0030329615240005 0.00597819275598446 0.00333375109662865 0.00393533024188495 0.0032334879057526 0.00449931069056273 0.00775786439403434 0.00427371851109162 0.00332121819776914 0.00151648076200025 0.00345908008522371 0.00165434264945482 0.00337134979320717 0.00691816017044742 0.00541421230730668 0.00359694197267828 0.00088983581902494 0.00154154655971926 0.00572753477879433 0.00274470485023186 0.00409825792705853 0.00396039603960396 0.00264444165935581 0.00563980448677779 0.00627898232861261 0.009086351673142 0.0082717132472741 0.00637924551948866 0.00360947487153779 0.00716881814763755 0.00345908008522371 0.00506329113924051 0.00330868529890964 0.00446171199398421 0.00388519864644692 0.00553954129590174 0.00130342148138865 0.00234365208672766 7.51973931570372e-05 0.00196766512094247 0.00229352049128964 0.00189246772778544 0.00264444165935581 0.0022308559969921 0.00491289635292643 0.00703095626018298 0.00775786439403434 0.00327108660233112 0.00194259932322346 0.0048502318586289 0.00186740193006642 0.0046371725780173 0.00550194259932322 0.00379746835443038 0.0048502318586289 0.00327108660233112 0.00265697455821531 0.00287003383882692 0.00446171199398421 0.00185486903120692 0.00214312570497556 0.00320842210803359 0.00154154655971926 0.00216819150269457 0.00416092242135606 0.00381000125328989 0.00597819275598446 0.00477503446547186 0.00553954129590174 0.00407319212933952 0.00703095626018298 0.00483769895976939 0.00734427873167064 0.0036345406692568 0.00716881814763755 0.004035593432761 0.00599072565484397 0.00982579270585286 0.00691816017044742 0.0072690813385136 0.00216819150269457 0.00320842210803359 0.00840957513472866 0.0044491790951247 0.00434891590424865 0.00165434264945482 0.00189246772778544 0.00402306053390149 0.006065923048001 0.00310815891715754 0.009086351673142 0.00392279734302544 0.00214312570497556 0.0036345406692568 0.00151648076200025 0.00265697455821531 0.00357187617495927 0.00287003383882692 0.00541421230730668 0.00402306053390149 0.00348414588294272 0.00245644817646322 0.00234365208672766 0.00279483644566988 0.00365960646697581 0.00165434264945482 0.00396039603960396 0.00256924426619877 0.00391026444416594 0.00407319212933952 0.00449931069056273 0.00215565860383507 0.00297029702970297 0.00379746835443038 0.00451184358942223 0.00579019927309187 0.00462463967915779 0.00297029702970297 0.00194259932322346 0.00288256673768643 0.00204286251409951 0.00357187617495927 0.00330868529890964 0.00215565860383507 0.00131595438024815 0.00216819150269457 0.00088983581902494 0.00131595438024815 0.00200526381752099 0.00416092242135606 0.0030329615240005 0.00349667878180223 0.00398546183732297 0.00391026444416594 0.00491289635292643 0.00431131720767013 0.00391026444416594 0.00214312570497556 0.00360947487153779 0.00225592179471112 0.0036345406692568 0.00599072565484397 0.00337134979320717 0.00334628399548816 0.00131595438024815 0.00214312570497556 0.0046371725780173 0.00357187617495927 0.00541421230730668 0.00365960646697581 0.00229352049128964 0.00575260057651335 0.00426118561223211 0.006065923048001 0.00627898232861261 0.00487529765634791 0.00391026444416594 0.00734427873167064 0.00332121819776914 0.00518862012783557 0.00204286251409951 0.00265697455821531 0.00254417846847976 0.00409825792705853 0.00360947487153779 0.00348414588294272 0.00130342148138865 0.00402306053390149 0.00541421230730668 0.00434891590424865 0.00409825792705853 0.00355934327609976 0.00398546183732297 0.00553954129590174 0.0032334879057526 0.00305802732171951 0.00462463967915779 0.00550194259932322 0.00536408071186866 0.00670510088983582 sim1_galaxy +NODE_1_length_40000_cov_62.8079_ID_1 0.29915 62.8079 0.0179888491636873 0.00510038252868965 0.00966322474185564 0.0169262694702103 0.00516288721654124 0.00243768282621197 0.00105007875590669 0.0059754481586119 0.00876315723679276 0.004287821586619 0.00446283471260345 0.00636297722329175 0.015301147586069 0.00591294347076031 0.00743805785433908 0.0147011025826937 0.00507538065354902 0.00151261344600845 0.00240018001350101 0.00475035627672075 0.00285021376603245 0.00100007500562542 0.000487536565242393 0.00310023251743881 0.00108758156861765 0.000537540315523664 0.000412530939820487 0.000925069380203515 0.00441283096232217 0.00235017626321974 0.0024626847013526 0.00636297722329175 0.00946320974073056 0.00157511813386004 0.00365027377053279 0.00646298472385429 0.00466284971372853 0.00173763032227417 0.000812560942070655 0.00402530189764232 0.004287821586619 0.00157511813386004 0.00163762282171163 0.00310023251743881 0.0054879115933695 0.00141260594544591 0.00271270345275896 0.0059754481586119 0.014751106332975 0.00488786658999425 0.00603795284646348 0.014751106332975 0.0064004800360027 0.00318773908043103 0.000912568442633197 0.00646298472385429 0.00727554566592494 0.00363777283296247 0.00417531314848614 0.00475035627672075 0.0137760332024902 0.0066129959746981 0.00603795284646348 0.0169262694702103 0.00681301097582319 0.00273770532789959 0.00288771657874341 0.00603795284646348 0.0021126584493837 0.0016876265719929 0.000425031877390804 0.00271270345275896 0.00333775033127485 0.00238767907593069 0.00177513313498512 0.0024626847013526 0.00600045003375253 0.00370027752081406 0.00270020251518864 0.00743805785433908 0.00313773533014976 0.00160012000900068 0.00173763032227417 0.00417531314848614 0.00138760407030527 0.000700052503937795 0.000125009375703178 0.00163762282171163 0.000400030002250169 0.000387529064679851 0.000200015001125084 0.000412530939820487 0.00250018751406355 0.00186263969797735 0.00177513313498512 0.00446283471260345 0.00113758531889892 0.000300022501687627 0.000462534690101758 0.000912568442633197 0.000737555316648749 0.000400030002250169 0.000150011250843813 0.000812560942070655 0.000575043128234618 0.000212515938695402 0.000125009375703178 0.000487536565242393 0.00107508063104733 0.000412530939820487 0.000425031877390804 0.00105007875590669 0.00570042753206491 0.00201265094882116 0.00195014626096957 0.00603795284646348 0.00287521564117309 0.00205015376153212 0.000462534690101758 0.00365027377053279 0.00330024751856389 0.00252518938920419 0.00173763032227417 0.00240018001350101 0.00607545565917444 0.00525039377953346 0.00288771657874341 0.00966322474185564 0.00965072380428532 0.00266269970247769 0.00525039377953346 0.0066129959746981 0.00171262844713353 0.000850063754781609 0.000412530939820487 0.00141260594544591 0.0032377428307123 0.00158761907143036 0.00186263969797735 0.00235017626321974 0.00607545565917444 0.00127509563217241 0.00370027752081406 0.00591294347076031 0.0038002850213766 0.00140010500787559 0.00252518938920419 0.00363777283296247 0.00167512563442258 0.000612545940945571 0.000212515938695402 0.00157511813386004 0.000750056254219066 0.000425031877390804 0.000387529064679851 0.000537540315523664 0.00297522314173563 0.00158761907143036 0.00238767907593069 0.004287821586619 0.00498787409055679 0.000912568442633197 0.00205015376153212 0.00318773908043103 0.00166262469685226 0.000275020626546991 0.000400030002250169 0.00173763032227417 0.00153761532114909 0.000612545940945571 0.000700052503937795 0.00100007500562542 0.0024626847013526 0.000850063754781609 0.0016876265719929 0.00243768282621197 0.00498787409055679 0.0019001425106883 0.00201265094882116 0.00488786658999425 0.00160012000900068 0.000912568442633197 0.000300022501687627 0.00157511813386004 0.00242518188864165 0.00140010500787559 0.00160012000900068 0.00151261344600845 0.00411280846063455 0.00266269970247769 0.00273770532789959 0.00510038252868965 0.015226141960647 0.00411280846063455 0.00607545565917444 0.0137760332024902 0.00476285721429107 0.0024626847013526 0.00107508063104733 0.0054879115933695 0.00581293597019776 0.00297522314173563 0.00250018751406355 0.00441283096232217 0.0130509788234118 0.00607545565917444 0.00600045003375253 0.015301147586069 0.00646298472385429 0.00242518188864165 0.00330024751856389 0.00727554566592494 0.00473785533915044 0.00153761532114909 0.000575043128234618 0.004287821586619 0.000575043128234618 0.000750056254219066 0.000400030002250169 0.00108758156861765 0.00581293597019776 0.0032377428307123 0.00333775033127485 0.00876315723679276 0.00858814411080831 0.00160012000900068 0.00287521564117309 0.0064004800360027 0.00430032252418931 0.00166262469685226 0.000737555316648749 0.00466284971372853 0.00473785533915044 0.00167512563442258 0.00138760407030527 0.00285021376603245 0.00476285721429107 0.00171262844713353 0.0021126584493837 0.00516288721654124 0.0137510313273496 0.00498787409055679 0.00570042753206491 0.014751106332975 0.00858814411080831 0.00498787409055679 0.00113758531889892 0.00946320974073056 0.00646298472385429 0.0038002850213766 0.00313773533014976 0.00507538065354902 0.015226141960647 0.00965072380428532 0.00681301097582319 0.0179888491636873 sim1_galaxy +NODE_17_length_39819_cov_63.5061_ID_33 0.340967879655441 63.5061 0.0169153104279687 0.00655515370705244 0.00856439622262407 0.0141149286718907 0.00649236487844083 0.00251155314446454 0.00355384769941732 0.00541239702632108 0.0081374321880651 0.00430731364275668 0.00307665260196906 0.00615330520393812 0.0118419730761503 0.00550030138637734 0.0071955997588909 0.0124321880650995 0.00651748040988547 0.00210970464135021 0.00301386377335744 0.00568866787221218 0.00329013461924854 0.0011050833835644 0.00161995177817963 0.00234830219007434 0.00318967249346996 0.0016073940124573 0.00146925858951175 0.00258689973879847 0.00370454088808519 0.00214737793851718 0.00276270845891099 0.00615330520393812 0.00842626079967852 0.00167018284106892 0.00281293952180028 0.00498543299176211 0.00442033353425758 0.00150693188667872 0.00275015069318867 0.00371709865380751 0.00330269238497087 0.00128089210367691 0.00148181635523408 0.00234830219007434 0.00498543299176211 0.00150693188667872 0.00286317058468957 0.00541239702632108 0.0106866586296966 0.00538728149487643 0.00377988748241913 0.0107243319268636 0.00532449266626482 0.00307665260196906 0.00303897930480209 0.00498543299176211 0.0067560779586096 0.00375477195097448 0.00377988748241913 0.00568866787221218 0.0105108499095841 0.0058142455294354 0.0065300381756078 0.0141149286718907 0.0071955997588909 0.00352873216797267 0.00367942535664055 0.0065300381756078 0.00226039783001808 0.00236085995579666 0.00124321880650995 0.00286317058468957 0.00257434197307615 0.00291340164757886 0.00172041390395821 0.00276270845891099 0.00529937713482017 0.00467148884870404 0.00281293952180028 0.0071955997588909 0.00394313843680932 0.00169529837251356 0.00179576049829214 0.00377988748241913 0.00158227848101266 0.000853928069117942 0.000602772754671489 0.00148181635523408 0.00130600763512156 0.00135623869801085 0.00070323488045007 0.00146925858951175 0.00197156921840466 0.00164506730962427 0.00172041390395821 0.00307665260196906 0.00316455696202532 0.00101717902350814 0.00118042997789833 0.00303897930480209 0.00172041390395821 0.00119298774362066 0.00065300381756078 0.00275015069318867 0.00165762507534659 0.000954390194896524 0.000602772754671489 0.00161995177817963 0.00305153707052441 0.00100462125778581 0.00124321880650995 0.00355384769941732 0.0047593932087603 0.00292595941330119 0.00173297166968053 0.00377988748241913 0.0024362065501306 0.00221016676712879 0.00118042997789833 0.00281293952180028 0.00239853325296363 0.00276270845891099 0.00179576049829214 0.00301386377335744 0.0036543098251959 0.00577657223226844 0.00367942535664055 0.00856439622262407 0.00948111312035363 0.00347850110508338 0.00577657223226844 0.0058142455294354 0.00210970464135021 0.000828812537673297 0.00100462125778581 0.00150693188667872 0.00278782399035564 0.00205947357846092 0.00164506730962427 0.00214737793851718 0.00439521800281294 0.00185854932690376 0.00467148884870404 0.00550030138637734 0.00467148884870404 0.00205947357846092 0.00276270845891099 0.00375477195097448 0.00210970464135021 0.000791139240506329 0.000954390194896524 0.00128089210367691 0.00214737793851718 0.00120554550934298 0.00135623869801085 0.0016073940124573 0.00312688366485835 0.00205947357846092 0.00291340164757886 0.00430731364275668 0.00453335342575849 0.00114275668073136 0.00221016676712879 0.00307665260196906 0.00178320273256982 0.00065300381756078 0.00119298774362066 0.00150693188667872 0.0017706449668475 0.000791139240506329 0.000853928069117942 0.0011050833835644 0.00266224633313241 0.000828812537673297 0.00236085995579666 0.00251155314446454 0.00573889893510147 0.00311432589913603 0.00292595941330119 0.00538728149487643 0.00161995177817963 0.00114275668073136 0.00101717902350814 0.00167018284106892 0.00286317058468957 0.00205947357846092 0.00169529837251356 0.00210970464135021 0.00440777576853526 0.00347850110508338 0.00352873216797267 0.00655515370705244 0.0125577657223227 0.00440777576853526 0.0036543098251959 0.0105108499095841 0.00646724934699618 0.00266224633313241 0.00305153707052441 0.00498543299176211 0.00439521800281294 0.00312688366485835 0.00197156921840466 0.00370454088808519 0.00904159132007233 0.00439521800281294 0.00529937713482017 0.0118419730761503 0.00580168776371308 0.00286317058468957 0.00239853325296363 0.0067560779586096 0.00423196704842274 0.0017706449668475 0.00165762507534659 0.00330269238497087 0.00175808720112518 0.00214737793851718 0.00130600763512156 0.00318967249346996 0.00439521800281294 0.00278782399035564 0.00257434197307615 0.0081374321880651 0.00843881856540084 0.00161995177817963 0.0024362065501306 0.00532449266626482 0.00532449266626482 0.00178320273256982 0.00172041390395821 0.00442033353425758 0.00423196704842274 0.00210970464135021 0.00158227848101266 0.00329013461924854 0.00646724934699618 0.00210970464135021 0.00226039783001808 0.00649236487844083 0.00994575045207957 0.00573889893510147 0.0047593932087603 0.0106866586296966 0.00843881856540084 0.00453335342575849 0.00316455696202532 0.00842626079967852 0.00580168776371308 0.00467148884870404 0.00394313843680932 0.00651748040988547 0.0125577657223227 0.00948111312035363 0.0071955997588909 0.0169153104279687 sim1_galaxy +NODE_9_length_39996_cov_63.0617_ID_17 0.675617561756176 63.0617 0.00113769909734203 0.00116270347310779 0.000825144400270047 0.000937664091215963 0.00248793538869302 0.00271297477058485 0.00327557322531443 0.0020128522491436 0.00163778661265721 0.00367564323756657 0.00348811041932338 0.0014002450428825 0.000825144400270047 0.000887655339684445 0.00112519690945916 0.000525091891080939 0.00162528442477434 0.00133773410346811 0.00300052509189108 0.00162528442477434 0.00428825044382767 0.00428825044382767 0.0059760458080164 0.00431325481959343 0.0034756082314405 0.0054259495411697 0.00358812792238642 0.00210036756432376 0.000537594078963819 0.0014627559822969 0.00263796164328758 0.0014002450428825 0.00215037631585527 0.00163778661265721 0.00162528442477434 0.00177531067936889 0.00617608081414248 0.00578851298977321 0.00797639586927712 0.0029255119645938 0.00398819793463856 0.00830145275423199 0.00431325481959343 0.00431325481959343 0.00102517940639612 0.00188783037031481 0.00111269472157628 0.0020128522491436 0.000637611582026855 0.000887655339684445 0.000737629085089891 0.000675118145675493 0.00302552946765684 0.00338809291626035 0.00510089265621484 0.00177531067936889 0.00193783912184632 0.0036006301102693 0.00322556447378291 0.00162528442477434 8.75153151801565e-05 0.00133773410346811 0.00100017503063036 0.000937664091215963 0.000437576575900783 0.00403820668617008 0.00322556447378291 0.00100017503063036 0.00120021003675643 0.00740129522666467 0.00372565198909809 0.00111269472157628 0.00228790038256695 0.0116520391068437 0.010689370639862 0.00263796164328758 0.000650113769909734 0.0049383642137374 0.00367564323756657 0.00112519690945916 0.00181281724301753 0.00478833795914285 0.011314480034006 0.00322556447378291 0.00367564323756657 0.00325056884954867 0.00692621208711524 0.00431325481959343 0.00741379741454755 0.0138649263621134 0.011827069737204 0.00358812792238642 0.00131272972770235 0.0054259495411697 0.010689370639862 0.00348811041932338 0.00535093641387243 0.00845147900882654 0.00806391118445728 0.00510089265621484 0.00556347360788138 0.01919085840022 0.013077288525492 0.00797639586927712 0.00478833795914285 0.0190033255819768 0.00692621208711524 0.0059760458080164 0.00196284349761208 0.00562598454729578 0.00372565198909809 0.00327557322531443 8.75153151801565e-05 0.00302552946765684 0.000200035006126072 0.000737629085089891 0.00202535443702648 0.00377566074062961 0.00806391118445728 0.00162528442477434 0.00346310604355762 0.00948916060310554 0.011314480034006 0.00300052509189108 0.00026254594554047 0.00588853049283625 0.00322556447378291 0.000825144400270047 0.00217538069162103 0.00477583577125997 0.00588853049283625 0.00133773410346811 0.00281299227364789 0.00625109394143975 0.00562598454729578 0.00188783037031481 0.00247543320081014 0.00612607206261096 0.0054259495411697 0.0014627559822969 0.00118770784887355 0.00627609831720551 0.0049383642137374 0.000887655339684445 0.00340059510414323 0.00487585327432301 0.00948916060310554 0.0036006301102693 0.00881404245743005 0.00808891556022304 0.0190033255819768 0.00830145275423199 0.00855149651188958 0.0179781461755807 0.0138649263621134 0.0054259495411697 0.00141274723076538 0.00612607206261096 0.0116520391068437 0.00367564323756657 0.00432575700747631 0.00418823294076463 0.00377566074062961 0.00338809291626035 0.00585102392918761 0.0133773410346811 0.01919085840022 0.00578851298977321 0.00253794414022454 0.00808891556022304 0.00325056884954867 0.00428825044382767 0.00251293976445878 0.00625109394143975 0.00740129522666467 0.00271297477058485 0.000325056884954867 0.00235041132198135 0.00302552946765684 0.000887655339684445 0.00230040257044983 0.00418823294076463 0.00845147900882654 0.00163778661265721 0.0024379266371615 0.00487585327432301 0.00478833795914285 0.00133773410346811 0.00051258970319806 0.00477583577125997 0.00403820668617008 0.00116270347310779 0.000312554697071988 0.00051258970319806 0.00026254594554047 8.75153151801565e-05 0.00108769034581052 0.00251293976445878 0.00196284349761208 0.00102517940639612 0.000787637836621409 0.00141274723076538 0.00131272972770235 0.000537594078963819 0.000275048133423349 0.00118770784887355 0.000650113769909734 0.000825144400270047 0.00186282599454905 0.0024379266371615 0.00346310604355762 0.00193783912184632 0.00436326357112495 0.00253794414022454 0.00478833795914285 0.00398819793463856 0.00752631710549346 0.00855149651188958 0.00741379741454755 0.0034756082314405 0.000787637836621409 0.00247543320081014 0.00228790038256695 0.00163778661265721 0.00235041132198135 0.00230040257044983 0.00202535443702648 0.00302552946765684 0.00377566074062961 0.00585102392918761 0.00556347360788138 0.00617608081414248 0.00436326357112495 0.00881404245743005 0.00367564323756657 0.00428825044382767 0.00108769034581052 0.00281299227364789 0.00120021003675643 0.00248793538869302 0.000125021878828795 0.000325056884954867 8.75153151801565e-05 0.000637611582026855 0.00235041132198135 0.00432575700747631 0.00535093641387243 0.00215037631585527 0.00186282599454905 0.00340059510414323 0.00181281724301753 0.00162528442477434 0.000312554697071988 0.00217538069162103 0.000437576575900783 0.00113769909734203 sim1_galaxy +NODE_7_length_39998_cov_63.2738_ID_13 0.358842942147107 63.2738 0.014989373671709 0.00548818602325291 0.00921365170646331 0.0118264783097887 0.00580072509063633 0.00281285160645081 0.00268783597949744 0.00723840480060008 0.00908863607950994 0.00566320790098762 0.00397549693711714 0.00730091261407676 0.00880110013751719 0.0056882110263783 0.00567570946368296 0.0140767595949494 0.0055881985248156 0.00163770471308914 0.00213776722090261 0.00336292036504563 0.00483810476309539 0.00107513439179897 0.00171271408926116 0.00271283910488811 0.00252531566445806 0.00155019377422178 0.00127515939492437 0.00240030003750469 0.00465058132266533 0.00205025628203525 0.003487935991999 0.00730091261407676 0.00782597824728091 0.00231278909863733 0.00222527815976997 0.00546318289786223 0.00590073759219902 0.00242530316289536 0.00201275159394924 0.00562570321290161 0.00310038754844356 0.00201275159394924 0.00133766720840105 0.00271283910488811 0.00507563445430679 0.00281285160645081 0.00236279534941868 0.00723840480060008 0.00837604700587573 0.00292536567070884 0.00255031878984873 0.00727590948868609 0.00720090011251406 0.00313789223652957 0.0030128766095762 0.00546318289786223 0.00621327665958245 0.00373796724590574 0.00401300162520315 0.00336292036504563 0.0100512564070509 0.00532566570821353 0.00703837979747468 0.0118264783097887 0.00766345793224153 0.00467558444805601 0.00497562195274409 0.00703837979747468 0.0018627328416052 0.00260032504063008 0.00148768596074509 0.00236279534941868 0.00286285785723215 0.00387548443555444 0.00206275784473059 0.003487935991999 0.00378797349668709 0.00441305163145393 0.00345043130391299 0.00567570946368296 0.00462557819727466 0.00215026878359795 0.00308788598574822 0.00401300162520315 0.00171271408926116 0.000550068758594824 0.000375046880860108 0.00133766720840105 0.000987623452931616 0.00123765470683835 0.000725090636329541 0.00127515939492437 0.00212526565820728 0.00100012501562695 0.00206275784473059 0.00397549693711714 0.00212526565820728 0.00146268283535442 0.00100012501562695 0.0030128766095762 0.00142517814726841 0.00153769221152644 0.000900112514064258 0.00201275159394924 0.00107513439179897 0.00106263282910364 0.000375046880860108 0.00171271408926116 0.00180022502812852 0.00177522190273784 0.00148768596074509 0.00268783597949744 0.00665083135391924 0.00302537817227153 0.00262532816602075 0.00255031878984873 0.00247530941367671 0.00158769846230779 0.00100012501562695 0.00222527815976997 0.00401300162520315 0.00305038129766221 0.00308788598574822 0.00213776722090261 0.00632579072384048 0.00551318914864358 0.00497562195274409 0.00921365170646331 0.00770096262032754 0.00326290786348294 0.00551318914864358 0.00532566570821353 0.00192524065508189 0.00177522190273784 0.00177522190273784 0.00281285160645081 0.00216277034629329 0.00207525940742593 0.00100012501562695 0.00205025628203525 0.00468808601075134 0.00402550318789849 0.00441305163145393 0.0056882110263783 0.00601325165645706 0.00178772346543318 0.00305038129766221 0.00373796724590574 0.00322540317539692 0.000950118764845606 0.00106263282910364 0.00201275159394924 0.00211276409551194 0.00097512189023628 0.00123765470683835 0.00155019377422178 0.00435054381797725 0.00207525940742593 0.00387548443555444 0.00566320790098762 0.00352544068008501 0.00142517814726841 0.00158769846230779 0.00313789223652957 0.00231278909863733 0.00097512189023628 0.00153769221152644 0.00242530316289536 0.00140017502187773 0.000950118764845606 0.000550068758594824 0.00107513439179897 0.0031503937992249 0.00177522190273784 0.00260032504063008 0.00281285160645081 0.00548818602325291 0.00172521565195649 0.00302537817227153 0.00292536567070884 0.00308788598574822 0.00142517814726841 0.00146268283535442 0.00231278909863733 0.00273784223027878 0.00178772346543318 0.00215026878359795 0.00163770471308914 0.0051131391423928 0.00326290786348294 0.00467558444805601 0.00548818602325291 0.0111513939242405 0.0051131391423928 0.00632579072384048 0.0100512564070509 0.00313789223652957 0.0031503937992249 0.00180022502812852 0.00507563445430679 0.00371296412051506 0.00435054381797725 0.00212526565820728 0.00465058132266533 0.00385048131016377 0.00468808601075134 0.00378797349668709 0.00880110013751719 0.008126015751969 0.00273784223027878 0.00401300162520315 0.00621327665958245 0.00410051256407051 0.00140017502187773 0.00107513439179897 0.00310038754844356 0.00197524690586323 0.00211276409551194 0.000987623452931616 0.00252531566445806 0.00371296412051506 0.00216277034629329 0.00286285785723215 0.00908863607950994 0.00832604075509439 0.00308788598574822 0.00247530941367671 0.00720090011251406 0.00495061882735342 0.00231278909863733 0.00142517814726841 0.00590073759219902 0.00410051256407051 0.00322540317539692 0.00171271408926116 0.00483810476309539 0.00313789223652957 0.00192524065508189 0.0018627328416052 0.00580072509063633 0.0121265158144768 0.00548818602325291 0.00665083135391924 0.00837604700587573 0.00832604075509439 0.00352544068008501 0.00212526565820728 0.00782597824728091 0.008126015751969 0.00601325165645706 0.00462557819727466 0.0055881985248156 0.0111513939242405 0.00770096262032754 0.00766345793224153 0.014989373671709 sim1_galaxy +NODE_6_length_39999_cov_63.2183_ID_11 0.407060176504413 63.2183 0.0118386838683868 0.00617561756175618 0.0083008300830083 0.00905090509050905 0.00598809880988099 0.00301280128012801 0.00331283128312831 0.0039003900390039 0.00655065506550655 0.0042004200420042 0.00432543254325433 0.00427542754275428 0.00912591259125913 0.00516301630163016 0.00633813381338134 0.0074007400740074 0.00597559755975598 0.003000300030003 0.00383788378837884 0.00443794379437944 0.0037003700370037 0.00206270627062706 0.00278777877787779 0.00312531253125313 0.00276277627762776 0.00178767876787679 0.00305030503050305 0.00235023502350235 0.00287528752875288 0.00222522252225223 0.00328782878287829 0.00427542754275428 0.00686318631863186 0.00207520752075208 0.00251275127512751 0.0044004400440044 0.00397539753975398 0.00305030503050305 0.00215021502150215 0.00295029502950295 0.00445044504450445 0.0023002300230023 0.00168766876687669 0.00312531253125313 0.00322532253225323 0.0024002400240024 0.00313781378137814 0.0039003900390039 0.00861336133613361 0.0054005400540054 0.005000500050005 0.00902590259025903 0.00587558755875588 0.00448794879487949 0.00268776877687769 0.0044004400440044 0.00645064506450645 0.00443794379437944 0.00492549254925493 0.00443794379437944 0.00666316631663166 0.00618811881188119 0.00612561256125613 0.00905090509050905 0.00728822882288229 0.00331283128312831 0.0034003400340034 0.00612561256125613 0.00323782378237824 0.00331283128312831 0.00197519751975198 0.00313781378137814 0.00366286628662866 0.00378787878787879 0.003000300030003 0.00328782878287829 0.00543804380438044 0.00476297629762976 0.00372537253725373 0.00633813381338134 0.00441294129412941 0.00333783378337834 0.00246274627462746 0.00492549254925493 0.0026002600260026 0.00183768376837684 0.00211271127112711 0.00168766876687669 0.0026002600260026 0.00247524752475248 0.00435043504350435 0.00305030503050305 0.00205020502050205 0.00218771877187719 0.003000300030003 0.00432543254325433 0.00328782878287829 0.00153765376537654 0.00095009500950095 0.00268776877687769 0.00236273627362736 0.00185018501850185 0.00085008500850085 0.00215021502150215 0.00462546254625463 0.00295029502950295 0.00211271127112711 0.00278777877787779 0.00277527752775278 0.00188768876887689 0.00197519751975198 0.00331283128312831 0.00176267626762676 0.00241274127412741 0.000725072507250725 0.005000500050005 0.0027002700270027 0.00305030503050305 0.00095009500950095 0.00251275127512751 0.00386288628862886 0.00357535753575358 0.00246274627462746 0.00383788378837884 0.00257525752575258 0.00507550755075508 0.0034003400340034 0.0083008300830083 0.00897589758975898 0.0042004200420042 0.00507550755075508 0.00618811881188119 0.00327532753275328 0.00158765876587659 0.00188768876887689 0.0024002400240024 0.00281278127812781 0.00198769876987699 0.00218771877187719 0.00222522252225223 0.00615061506150615 0.00137513751375138 0.00476297629762976 0.00516301630163016 0.00472547254725473 0.00211271127112711 0.00357535753575358 0.00443794379437944 0.00352535253525353 0.00133763376337634 0.00295029502950295 0.0023002300230023 0.00152515251525153 0.00142514251425143 0.00247524752475248 0.00178767876787679 0.00215021502150215 0.00198769876987699 0.00378787878787879 0.0042004200420042 0.00742574257425743 0.00243774377437744 0.00305030503050305 0.00448794879487949 0.00403790379037904 0.00117511751175118 0.00185018501850185 0.00305030503050305 0.003000300030003 0.00133763376337634 0.00183768376837684 0.00206270627062706 0.00376287628762876 0.00158765876587659 0.00331283128312831 0.00301280128012801 0.0042004200420042 0.0025002500250025 0.00241274127412741 0.0054005400540054 0.0031003100310031 0.00243774377437744 0.00153765376537654 0.00207520752075208 0.00321282128212821 0.00211271127112711 0.00333783378337834 0.003000300030003 0.00252525252525253 0.0042004200420042 0.00331283128312831 0.00617561756175618 0.00726322632263226 0.00252525252525253 0.00257525752575258 0.00666316631663166 0.00475047504750475 0.00376287628762876 0.00277527752775278 0.00322532253225323 0.00282528252825283 0.00215021502150215 0.00205020502050205 0.00287528752875288 0.00732573257325733 0.00615061506150615 0.00543804380438044 0.00912591259125913 0.00501300130013001 0.00321282128212821 0.00386288628862886 0.00645064506450645 0.00531303130313031 0.003000300030003 0.00462546254625463 0.00445044504450445 0.00157515751575158 0.00152515251525153 0.0026002600260026 0.00276277627762776 0.00282528252825283 0.00281278127812781 0.00366286628662866 0.00655065506550655 0.00686318631863186 0.0031003100310031 0.0027002700270027 0.00587558755875588 0.00447544754475448 0.00403790379037904 0.00236273627362736 0.00397539753975398 0.00531303130313031 0.00352535253525353 0.0026002600260026 0.0037003700370037 0.00475047504750475 0.00327532753275328 0.00323782378237824 0.00598809880988099 0.00445044504450445 0.0042004200420042 0.00176267626762676 0.00861336133613361 0.00686318631863186 0.00742574257425743 0.00328782878287829 0.00686318631863186 0.00501300130013001 0.00472547254725473 0.00441294129412941 0.00597559755975598 0.00726322632263226 0.00897589758975898 0.00728822882288229 0.0118386838683868 sim1_galaxy +NODE_8_length_39997_cov_62.8364_ID_15 0.674375578168363 62.8364 0.00273791068660299 0.0020878131719758 0.00231284692703906 0.00142521378206731 0.00210031504725709 0.00261289193379007 0.0032004800720108 0.00230034505175776 0.00226283942591389 0.00321298194729209 0.00421313196979547 0.00157523628544282 0.000637595639345902 0.00165024753713057 0.00131269690453568 0.000850127519127869 0.00168775316297445 0.00220033004950743 0.00211281692253838 0.00141271190678602 0.00365054758213732 0.00462569385407811 0.00598839825973896 0.00428814322148322 0.00358803820573086 0.00682602390358554 0.00476321448217233 0.00355053257988698 0.000562584387658149 0.00202530379556934 0.00262539380907136 0.00157523628544282 0.00233785067760164 0.00216282442366355 0.00175026253938091 0.00162524378656799 0.00503825573836075 0.00507576136420463 0.00870130519577937 0.00307546131919788 0.00297544631694754 0.00611341701255188 0.00441316197429614 0.00428814322148322 0.000987648147222083 0.00172525878881832 0.00177526628994349 0.00230034505175776 0.000487573135970396 0.000675101265189778 0.000575086262939441 0.00022503375506326 0.00215032254838226 0.00155023253488023 0.00267540131019653 0.00162524378656799 0.00180027004050608 0.00253788068210232 0.00233785067760164 0.00141271190678602 0.000275041256188428 0.00153773065959894 0.00121268190228534 0.00142521378206731 0.00210031504725709 0.00375056258438766 0.00347552132819923 0.00121268190228534 0.00163774566184928 0.00812621893283993 0.00667600140021003 0.00177526628994349 0.00286292943941591 0.0104765714857229 0.00787618142721408 0.00262539380907136 0.000462569385407811 0.0038380757113567 0.00247537130569585 0.00131269690453568 0.00235035255288293 0.00635095264289643 0.00757613642046307 0.00233785067760164 0.00516327449117368 0.00707606140921138 0.00998899834975246 0.00441316197429614 0.0070385557833675 0.0147772165824874 0.00955143271490724 0.00476321448217233 0.000875131269690454 0.00482572385857879 0.00787618142721408 0.00421313196979547 0.00482572385857879 0.00713857078561784 0.00773866079911987 0.00267540131019653 0.00635095264289643 0.0178526779016853 0.0128269240386058 0.00870130519577937 0.00350052507876181 0.0166524978746812 0.00998899834975246 0.00598839825973896 0.00211281692253838 0.00673851077661649 0.00667600140021003 0.0032004800720108 0.000262539380907136 0.00251287693153973 0.000250037505625844 0.000575086262939441 0.00298794819222883 0.00316297444616692 0.00773866079911987 0.00175026253938091 0.00457568635295294 0.00957643646546982 0.00757613642046307 0.00211281692253838 0.000450067510126519 0.00502575386307946 0.00347552132819923 0.00231284692703906 0.00302545381807271 0.00406310946641996 0.00502575386307946 0.00153773065959894 0.00248787318097715 0.00630094514177127 0.00673851077661649 0.00172525878881832 0.00185027754163124 0.00693854078111717 0.00482572385857879 0.00202530379556934 0.000637595639345902 0.00187528129219383 0.0038380757113567 0.00165024753713057 0.00396309446416963 0.0067260089013352 0.00957643646546982 0.00253788068210232 0.00653848077211582 0.0122393359003851 0.0166524978746812 0.00611341701255188 0.00722608391258689 0.016902535380307 0.0147772165824874 0.00682602390358554 0.00126268940341051 0.00693854078111717 0.0104765714857229 0.00321298194729209 0.00397559633945092 0.00375056258438766 0.00316297444616692 0.00155023253488023 0.00766364954743212 0.010951642746412 0.0178526779016853 0.00507576136420463 0.00270040506075911 0.0122393359003851 0.00707606140921138 0.00462569385407811 0.00151272690903636 0.00630094514177127 0.00812621893283993 0.00261289193379007 0.000762614392158824 0.00185027754163124 0.00251287693153973 0.000675101265189778 0.00420063009451418 0.00375056258438766 0.00713857078561784 0.00216282442366355 0.00293794069110367 0.0067260089013352 0.00635095264289643 0.00220033004950743 0.000312546882032305 0.00406310946641996 0.00375056258438766 0.0020878131719758 0.000700105015752363 0.000312546882032305 0.000450067510126519 0.000275041256188428 0.00118767815172276 0.00151272690903636 0.00211281692253838 0.000987648147222083 0.000900135020253038 0.00126268940341051 0.000875131269690454 0.000562584387658149 0.00022503375506326 0.000637595639345902 0.000462569385407811 0.000637595639345902 0.00253788068210232 0.00293794069110367 0.00457568635295294 0.00180027004050608 0.00326298944841726 0.00270040506075911 0.00350052507876181 0.00297544631694754 0.00452567885182777 0.00722608391258689 0.0070385557833675 0.00358803820573086 0.000900135020253038 0.00185027754163124 0.00286292943941591 0.00226283942591389 0.00251287693153973 0.00420063009451418 0.00298794819222883 0.00215032254838226 0.00375056258438766 0.00766364954743212 0.00635095264289643 0.00503825573836075 0.00326298944841726 0.00653848077211582 0.00516327449117368 0.00365054758213732 0.00118767815172276 0.00248787318097715 0.00163774566184928 0.00210031504725709 0.00022503375506326 0.000762614392158824 0.000262539380907136 0.000487573135970396 0.00251287693153973 0.00397559633945092 0.00482572385857879 0.00233785067760164 0.00253788068210232 0.00396309446416963 0.00235035255288293 0.00168775316297445 0.000700105015752363 0.00302545381807271 0.00210031504725709 0.00273791068660299 sim1_galaxy +NODE_19_length_33054_cov_76.4496_ID_37 0.484419434864162 76.4496 0.00981815981362137 0.00585458836343832 0.00515869413936038 0.0065656107228223 0.00576381955160207 0.00412998093854951 0.00471997821548516 0.00397869958548909 0.00406946839732535 0.00426613415630389 0.00304075519651448 0.00264742367855738 0.00500741278629996 0.0044476717799764 0.00500741278629996 0.00617227920486521 0.00435690296814015 0.002511270460803 0.00393331517957097 0.00337357417324741 0.0048410032979335 0.00242050164896675 0.00412998093854951 0.0029046019787601 0.00320716468488094 0.00414510907385556 0.00337357417324741 0.00305588333182052 0.00149768539529818 0.00235998910774258 0.00505279719221809 0.00264742367855738 0.00450818432120057 0.00225409216060028 0.00248101419019092 0.00313152400835073 0.0054158724395631 0.00391818704426492 0.00437203110344619 0.00284408943753593 0.00272306435508759 0.00335844603794136 0.00217845148407007 0.0029046019787601 0.00228434843121237 0.00205742640162174 0.00323742095549303 0.00397869958548909 0.00494690024507579 0.00304075519651448 0.00226922029590633 0.00420562161507973 0.00621766361078333 0.0040543402620193 0.00487125956854558 0.00313152400835073 0.00515869413936038 0.00549151311609331 0.00431151856222202 0.00337357417324741 0.0045535687271187 0.00441741550936432 0.00509818159813621 0.0065656107228223 0.00588484463405041 0.00494690024507579 0.00257178300202717 0.00509818159813621 0.00299537079059635 0.00450818432120057 0.00320716468488094 0.00323742095549303 0.00479561889201537 0.00714047986445191 0.00500741278629996 0.00505279719221809 0.00322229282018698 0.00620253547547729 0.00390305890895888 0.00500741278629996 0.00270793621978155 0.00396357145018305 0.00648997004629209 0.00431151856222202 0.00266255181386342 0.00223896402529424 0.00311639587304469 0.00217845148407007 0.00335844603794136 0.00464433753895495 0.00450818432120057 0.00337357417324741 0.00102871320081087 0.00208768267223382 0.00500741278629996 0.00304075519651448 0.00411485280324347 0.00296511451998427 0.00220870775468216 0.00487125956854558 0.00515869413936038 0.0057033070103779 0.00399382772079513 0.00437203110344619 0.003585368067532 0.00505279719221809 0.00311639587304469 0.00412998093854951 0.00226922029590633 0.003585368067532 0.00320716468488094 0.00471997821548516 0.00170947928958277 0.0015430698012163 0.00021179389428459 0.00226922029590633 0.00287434570814801 0.0023297328371305 0.00220870775468216 0.00248101419019092 0.00564279446915373 0.00593022903996853 0.00648997004629209 0.00393331517957097 0.00220870775468216 0.00408459653263139 0.00257178300202717 0.00515869413936038 0.00683791715833106 0.00391818704426492 0.00408459653263139 0.00441741550936432 0.00296511451998427 0.00231460470182445 0.003585368067532 0.00205742640162174 0.002511270460803 0.00293485824937218 0.00208768267223382 0.00235998910774258 0.00426613415630389 0.00335844603794136 0.00620253547547729 0.0044476717799764 0.00655048258751626 0.0038728026383468 0.00593022903996853 0.00549151311609331 0.00636894496384376 0.00291973011406614 0.00505279719221809 0.00335844603794136 0.005022540921606 0.0054158724395631 0.00464433753895495 0.00414510907385556 0.00220870775468216 0.00293485824937218 0.00714047986445191 0.00426613415630389 0.00435690296814015 0.00178511996611298 0.0023297328371305 0.0040543402620193 0.00579407582221415 0.00229947656651841 0.0057033070103779 0.00391818704426492 0.0026171674079453 0.00291973011406614 0.00223896402529424 0.00242050164896675 0.00334331790263532 0.00231460470182445 0.00450818432120057 0.00412998093854951 0.00369126501467429 0.00205742640162174 0.0015430698012163 0.00304075519651448 0.00391818704426492 0.00178511996611298 0.00296511451998427 0.00225409216060028 0.00360049620283804 0.0038728026383468 0.00396357145018305 0.002511270460803 0.0038728026383468 0.00391818704426492 0.00494690024507579 0.00585458836343832 0.00485613143323954 0.0038728026383468 0.00220870775468216 0.0045535687271187 0.00243562978427279 0.00334331790263532 0.00226922029590633 0.00228434843121237 0.000998456930198784 0.00220870775468216 0.00102871320081087 0.00149768539529818 0.00196665758978548 0.00426613415630389 0.00322229282018698 0.00500741278629996 0.00488638770385162 0.00360049620283804 0.00564279446915373 0.00515869413936038 0.00360049620283804 0.0026171674079453 0.003585368067532 0.00272306435508759 0.00257178300202717 0.005022540921606 0.00335844603794136 0.00320716468488094 0.000998456930198784 0.002511270460803 0.00479561889201537 0.00406946839732535 0.0062781761520075 0.00391818704426492 0.00287434570814801 0.00621766361078333 0.00547638498078727 0.00579407582221415 0.00515869413936038 0.0054158724395631 0.00360049620283804 0.00636894496384376 0.00266255181386342 0.0048410032979335 0.00243562978427279 0.00296511451998427 0.00299537079059635 0.00576381955160207 0.00514356600405434 0.00369126501467429 0.00170947928958277 0.00494690024507579 0.0062781761520075 0.00435690296814015 0.00411485280324347 0.00450818432120057 0.00488638770385162 0.00655048258751626 0.00270793621978155 0.00435690296814015 0.00485613143323954 0.00683791715833106 0.00588484463405041 0.00981815981362137 sim1_galaxy +NODE_15_length_39993_cov_62.9495_ID_29 0.365889030580352 62.9495 0.0153038259564891 0.00615153788447112 0.00822705676419105 0.0118154538634659 0.00642660665166292 0.00271317829457364 0.00390097524381095 0.00487621905476369 0.00740185046261565 0.00455113778444611 0.00370092523130783 0.00556389097274319 0.0107901975493873 0.00570142535633908 0.00670167541885471 0.00967741935483871 0.00663915978994749 0.00252563140785196 0.00298824706176544 0.00528882220555139 0.00396349087271818 0.000975243810952738 0.00230057514378595 0.00265066266566642 0.00353838459614904 0.00180045011252813 0.00197549387346837 0.00347586896724181 0.00342585646411603 0.00227556889222306 0.00303825956489122 0.00556389097274319 0.00760190047511878 0.00186296574143536 0.00278819704926232 0.00417604401100275 0.00447611902975744 0.00152538134533633 0.00278819704926232 0.00382595648912228 0.00368842210552638 0.00156289072268067 0.00152538134533633 0.00265066266566642 0.00501375343835959 0.00160040010002501 0.00281320330082521 0.00487621905476369 0.00953988497124281 0.00530132533133283 0.00378844711177794 0.00972743185796449 0.0055388847211803 0.0035008752188047 0.00388847211802951 0.00417604401100275 0.00675168792198049 0.0039134783695924 0.00406351587896974 0.00528882220555139 0.00895223805951488 0.00575143785946487 0.00635158789697424 0.0118154538634659 0.00688922230557639 0.0040510127531883 0.00380095023755939 0.00635158789697424 0.00255063765941485 0.0028382095523881 0.00182545636409102 0.00281320330082521 0.00245061265316329 0.00306326581645411 0.00177544386096524 0.00303825956489122 0.00552638159539885 0.00463865966491623 0.00315078769692423 0.00670167541885471 0.00410102525631408 0.00198799699924981 0.00238809702425606 0.00406351587896974 0.00177544386096524 0.000937734433608402 0.000887721930482621 0.00152538134533633 0.0017629407351838 0.0017629407351838 0.00102525631407852 0.00197549387346837 0.00216304076019005 0.00178794698674669 0.00177544386096524 0.00370092523130783 0.00358839709927482 0.00128782195548887 0.00152538134533633 0.00388847211802951 0.00232558139534884 0.00161290322580645 0.000750187546886722 0.00278819704926232 0.00220055013753438 0.00113778444611153 0.000887721930482621 0.00230057514378595 0.00340085021255314 0.00166291572893223 0.00182545636409102 0.00390097524381095 0.00371342835708927 0.00303825956489122 0.00157539384846212 0.00378844711177794 0.00271317829457364 0.00210052513128282 0.00152538134533633 0.00278819704926232 0.00253813453363341 0.00241310327581895 0.00238809702425606 0.00298824706176544 0.00378844711177794 0.0054013503375844 0.00380095023755939 0.00822705676419105 0.00905226306576644 0.00356339084771193 0.0054013503375844 0.00575143785946487 0.00235058764691173 0.00147536884221055 0.00166291572893223 0.00160040010002501 0.00280070017504376 0.00226306576644161 0.00178794698674669 0.00227556889222306 0.00458864716179045 0.00217554388597149 0.00463865966491623 0.00570142535633908 0.00522630657664416 0.00212553138284571 0.00241310327581895 0.0039134783695924 0.00232558139534884 0.00101275318829707 0.00113778444611153 0.00156289072268067 0.00266316579144786 0.00125031257814454 0.0017629407351838 0.00180045011252813 0.00273818454613653 0.00226306576644161 0.00306326581645411 0.00455113778444611 0.00518879719929983 0.00177544386096524 0.00210052513128282 0.0035008752188047 0.00192548137034259 0.000975243810952738 0.00161290322580645 0.00152538134533633 0.00220055013753438 0.00101275318829707 0.000937734433608402 0.000975243810952738 0.00286321580395099 0.00147536884221055 0.0028382095523881 0.00271317829457364 0.0059264816204051 0.00312578144536134 0.00303825956489122 0.00530132533133283 0.00216304076019005 0.00177544386096524 0.00128782195548887 0.00186296574143536 0.00338834708677169 0.00212553138284571 0.00198799699924981 0.00252563140785196 0.00415103775943986 0.00356339084771193 0.0040510127531883 0.00615153788447112 0.0102400600150038 0.00415103775943986 0.00378844711177794 0.00895223805951488 0.00611402850712678 0.00286321580395099 0.00340085021255314 0.00501375343835959 0.00378844711177794 0.00273818454613653 0.00216304076019005 0.00342585646411603 0.00745186296574144 0.00458864716179045 0.00552638159539885 0.0107901975493873 0.0051262815703926 0.00338834708677169 0.00253813453363341 0.00675168792198049 0.00447611902975744 0.00220055013753438 0.00220055013753438 0.00368842210552638 0.00232558139534884 0.00266316579144786 0.0017629407351838 0.00353838459614904 0.00378844711177794 0.00280070017504376 0.00245061265316329 0.00740185046261565 0.00738934733683421 0.00216304076019005 0.00271317829457364 0.0055388847211803 0.00495123780945236 0.00192548137034259 0.00232558139534884 0.00447611902975744 0.00447611902975744 0.00232558139534884 0.00177544386096524 0.00396349087271818 0.00611402850712678 0.00235058764691173 0.00255063765941485 0.00642660665166292 0.00795198799699925 0.0059264816204051 0.00371342835708927 0.00953988497124281 0.00738934733683421 0.00518879719929983 0.00358839709927482 0.00760190047511878 0.0051262815703926 0.00522630657664416 0.00410102525631408 0.00663915978994749 0.0102400600150038 0.00905226306576644 0.00688922230557639 0.0153038259564891 sim1_galaxy +NODE_4_length_39999_cov_63.2942_ID_7 0.347858696467412 63.2942 0.0173017301730173 0.00603810381038104 0.00975097509750975 0.0131138113811381 0.00545054505450545 0.00313781378137814 0.00246274627462746 0.00698819881988199 0.00841334133413341 0.00561306130613061 0.00341284128412841 0.00635063506350635 0.0104010401040104 0.00573807380738074 0.00601310131013101 0.0148764876487649 0.00528802880288029 0.00162516251625163 0.0022002200220022 0.00382538253825383 0.00472547254725473 0.00131263126312631 0.00135013501350135 0.00265026502650265 0.00213771377137714 0.00163766376637664 0.00115011501150115 0.00165016501650165 0.00505050505050505 0.00216271627162716 0.00353785378537854 0.00635063506350635 0.00726322632263226 0.00251275127512751 0.00277527752775278 0.00517551755175518 0.00541304130413041 0.00263776377637764 0.00212521252125213 0.00535053505350535 0.00288778877887789 0.00197519751975198 0.0015001500150015 0.00265026502650265 0.00508800880088009 0.00263776377637764 0.00238773877387739 0.00698819881988199 0.00938843884388439 0.0032003200320032 0.00368786878687869 0.0072007200720072 0.00678817881788179 0.0029002900290029 0.00271277127712771 0.00517551755175518 0.00581308130813081 0.0035003500350035 0.00377537753775378 0.00382538253825383 0.0113761376137614 0.00532553255325533 0.00721322132213221 0.0131138113811381 0.00782578257825783 0.00402540254025403 0.00446294629462946 0.00721322132213221 0.00198769876987699 0.00238773877387739 0.00122512251225123 0.00238773877387739 0.00281278127812781 0.00325032503250325 0.00195019501950195 0.00353785378537854 0.004000400040004 0.00395039503950395 0.00295029502950295 0.00601310131013101 0.00501300130013001 0.00206270627062706 0.00291279127912791 0.00377537753775378 0.00197519751975198 0.000825082508250825 0.000537553755375538 0.0015001500150015 0.000987598759875988 0.000987598759875988 0.0007000700070007 0.00115011501150115 0.00251275127512751 0.00113761376137614 0.00195019501950195 0.00341284128412841 0.002000200020002 0.00111261126112611 0.00101260126012601 0.00271277127712771 0.002000200020002 0.00126262626262626 0.000525052505250525 0.00212521252125213 0.000987598759875988 0.00095009500950095 0.000537553755375538 0.00135013501350135 0.00155015501550155 0.00133763376337634 0.00122512251225123 0.00246274627462746 0.00718821882188219 0.00263776377637764 0.00305030503050305 0.00368786878687869 0.00215021502150215 0.00152515251525153 0.00101260126012601 0.00277527752775278 0.00338783878387839 0.00305030503050305 0.00291279127912791 0.0022002200220022 0.00558805880588059 0.00397539753975398 0.00446294629462946 0.00975097509750975 0.00812581258125813 0.00275027502750275 0.00397539753975398 0.00532553255325533 0.00221272127212721 0.00143764376437644 0.00133763376337634 0.00263776377637764 0.00208770877087709 0.00207520752075208 0.00113761376137614 0.00216271627162716 0.00477547754775478 0.0031003100310031 0.00395039503950395 0.00573807380738074 0.00585058505850585 0.00173767376737674 0.00305030503050305 0.0035003500350035 0.00338783878387839 0.00111261126112611 0.00095009500950095 0.00197519751975198 0.00196269626962696 0.00132513251325133 0.000987598759875988 0.00163766376637664 0.00458795879587959 0.00207520752075208 0.00325032503250325 0.00561306130613061 0.00348784878487849 0.00122512251225123 0.00152515251525153 0.0029002900290029 0.00247524752475248 0.00105010501050105 0.00126262626262626 0.00263776377637764 0.00158765876587659 0.00111261126112611 0.000825082508250825 0.00131263126312631 0.00307530753075308 0.00143764376437644 0.00238773877387739 0.00313781378137814 0.00543804380438044 0.00172517251725173 0.00263776377637764 0.0032003200320032 0.00277527752775278 0.00122512251225123 0.00111261126112611 0.00251275127512751 0.00256275627562756 0.00173767376737674 0.00206270627062706 0.00162516251625163 0.00522552255225523 0.00275027502750275 0.00402540254025403 0.00603810381038104 0.012951295129513 0.00522552255225523 0.00558805880588059 0.0113761376137614 0.00328782878287829 0.00307530753075308 0.00155015501550155 0.00508800880088009 0.00441294129412941 0.00458795879587959 0.00251275127512751 0.00505050505050505 0.0043004300430043 0.00477547754775478 0.004000400040004 0.0104010401040104 0.00737573757375738 0.00256275627562756 0.00338783878387839 0.00581308130813081 0.00367536753675368 0.00158765876587659 0.000987598759875988 0.00288778877887789 0.00175017501750175 0.00196269626962696 0.000987598759875988 0.00213771377137714 0.00441294129412941 0.00208770877087709 0.00281278127812781 0.00841334133413341 0.00742574257425743 0.00277527752775278 0.00215021502150215 0.00678817881788179 0.00425042504250425 0.00247524752475248 0.002000200020002 0.00541304130413041 0.00367536753675368 0.00338783878387839 0.00197519751975198 0.00472547254725473 0.00328782878287829 0.00221272127212721 0.00198769876987699 0.00545054505450545 0.0131263126312631 0.00543804380438044 0.00718821882188219 0.00938843884388439 0.00742574257425743 0.00348784878487849 0.002000200020002 0.00726322632263226 0.00737573757375738 0.00585058505850585 0.00501300130013001 0.00528802880288029 0.012951295129513 0.00812581258125813 0.00782578257825783 0.0173017301730173 sim1_galaxy +NODE_14_length_39993_cov_63.1132_ID_27 0.387992898757283 63.1132 0.0158414603650913 0.00722680670167542 0.00805201300325081 0.0112153038259565 0.00611402850712678 0.00487621905476369 0.00341335333833458 0.00555138784696174 0.00595148787196799 0.00630157539384846 0.00342585646411603 0.00556389097274319 0.00875218804701175 0.00445111277819455 0.00632658164541135 0.00957739434858715 0.00555138784696174 0.00263815953988497 0.00335083770942736 0.00446361590397599 0.00545136284071018 0.00221305326331583 0.00292573143285821 0.00327581895473868 0.00231307826956739 0.00213803450862716 0.00222555638909727 0.00310077519379845 0.00280070017504376 0.00290072518129532 0.00341335333833458 0.00556389097274319 0.00563890972743186 0.00231307826956739 0.00236309077269317 0.00423855963990998 0.00548887221805451 0.00295073768442111 0.00278819704926232 0.00537634408602151 0.00192548137034259 0.00255063765941485 0.00157539384846212 0.00327581895473868 0.00373843460865216 0.00248812203050763 0.00290072518129532 0.00555138784696174 0.00846461615403851 0.00390097524381095 0.00296324081020255 0.00657664416104026 0.00558889722430608 0.00216304076019005 0.00246311577894474 0.00423855963990998 0.0057889472368092 0.00413853463365841 0.00398849712428107 0.00446361590397599 0.00718929732433108 0.00445111277819455 0.00626406601650413 0.0112153038259565 0.00788947236809202 0.00470117529382346 0.00447611902975744 0.00626406601650413 0.00307576894223556 0.00323830957739435 0.00221305326331583 0.00290072518129532 0.00333833458364591 0.00441360340085021 0.00257564391097774 0.00341335333833458 0.00433858464616154 0.00406351587896974 0.00365091272818205 0.00632658164541135 0.00522630657664416 0.00251312828207052 0.0028382095523881 0.00398849712428107 0.00252563140785196 0.00101275318829707 0.00100025006251563 0.00157539384846212 0.00183795948987247 0.00207551887971993 0.00120030007501875 0.00222555638909727 0.00222555638909727 0.00110027506876719 0.00257564391097774 0.00342585646411603 0.00278819704926232 0.00195048762190548 0.00157539384846212 0.00246311577894474 0.00230057514378595 0.00197549387346837 0.00165041260315079 0.00278819704926232 0.00140035008752188 0.0020130032508127 0.00100025006251563 0.00292573143285821 0.00260065016254063 0.00155038759689922 0.00221305326331583 0.00341335333833458 0.00446361590397599 0.00246311577894474 0.00155038759689922 0.00296324081020255 0.00358839709927482 0.00122530632658165 0.00157539384846212 0.00236309077269317 0.00366341585396349 0.00388847211802951 0.0028382095523881 0.00335083770942736 0.00435108777194299 0.00436359089772443 0.00447611902975744 0.00805201300325081 0.00607651912978245 0.00352588147036759 0.00436359089772443 0.00445111277819455 0.00253813453363341 0.00233808452113028 0.00155038759689922 0.00248812203050763 0.0018754688672168 0.00287571892973243 0.00110027506876719 0.00290072518129532 0.00356339084771193 0.00237559389847462 0.00406351587896974 0.00445111277819455 0.00558889722430608 0.002975743935984 0.00388847211802951 0.00413853463365841 0.00380095023755939 0.00151287821955489 0.0020130032508127 0.00255063765941485 0.00217554388597149 0.00232558139534884 0.00207551887971993 0.00213803450862716 0.00301325331332833 0.00287571892973243 0.00441360340085021 0.00630157539384846 0.0028382095523881 0.00126281570392598 0.00122530632658165 0.00216304076019005 0.00355088772193048 0.00140035008752188 0.00197549387346837 0.00295073768442111 0.00137534383595899 0.00151287821955489 0.00101275318829707 0.00221305326331583 0.00341335333833458 0.00233808452113028 0.00323830957739435 0.00487621905476369 0.00511377844461115 0.00255063765941485 0.00246311577894474 0.00390097524381095 0.00338834708677169 0.00126281570392598 0.00195048762190548 0.00231307826956739 0.00330082520630158 0.002975743935984 0.00251312828207052 0.00263815953988497 0.00450112528132033 0.00352588147036759 0.00470117529382346 0.00722680670167542 0.0125281320330083 0.00450112528132033 0.00435108777194299 0.00718929732433108 0.00427606901725431 0.00341335333833458 0.00260065016254063 0.00373843460865216 0.00338834708677169 0.00301325331332833 0.00222555638909727 0.00280070017504376 0.00525131282820705 0.00356339084771193 0.00433858464616154 0.00875218804701175 0.00696424106026507 0.00330082520630158 0.00366341585396349 0.0057889472368092 0.00278819704926232 0.00137534383595899 0.00140035008752188 0.00192548137034259 0.00245061265316329 0.00217554388597149 0.00183795948987247 0.00231307826956739 0.00338834708677169 0.0018754688672168 0.00333833458364591 0.00595148787196799 0.00715178794698675 0.00338834708677169 0.00358839709927482 0.00558889722430608 0.00525131282820705 0.00355088772193048 0.00230057514378595 0.00548887221805451 0.00278819704926232 0.00380095023755939 0.00252563140785196 0.00545136284071018 0.00427606901725431 0.00253813453363341 0.00307576894223556 0.00611402850712678 0.010527631907977 0.00511377844461115 0.00446361590397599 0.00846461615403851 0.00715178794698675 0.0028382095523881 0.00278819704926232 0.00563890972743186 0.00696424106026507 0.00558889722430608 0.00522630657664416 0.00555138784696174 0.0125281320330083 0.00607651912978245 0.00788947236809202 0.0158414603650913 sim1_galaxy +NODE_11_length_39995_cov_63.1687_ID_21 0.674084260532567 63.1687 0.002125425085017 0.0019503900780156 0.0025755151030206 0.0014752950590118 0.001875375075015 0.0023379675935187 0.0026630326065213 0.002250450090018 0.0022254450890178 0.00338817763552711 0.00351320264052811 0.0019003800760152 0.000750150030006001 0.0018378675735147 0.001375275055011 0.0011002200440088 0.001125225045009 0.0020129025805161 0.0022129425885177 0.0013877775555111 0.00450090018003601 0.00490098019603921 0.00675135027005401 0.00411332266453291 0.00332566513302661 0.00617623524704941 0.00511352270454091 0.00320064012802561 0.000475095019003801 0.0019753950790158 0.0025880176035207 0.0019003800760152 0.0022754550910182 0.0024004800960192 0.0020879175835167 0.00125025005001 0.00570114022804561 0.00511352270454091 0.00836417283456691 0.00335067013402681 0.0027130426085217 0.00588867773554711 0.00465093018603721 0.00411332266453291 0.0010127025405081 0.0018378675735147 0.0018378675735147 0.002250450090018 0.000537607521504301 0.000637627525505101 0.000587617523504701 0.000400080016003201 0.0024254850970194 0.0014002800560112 0.0025755151030206 0.00125025005001 0.0016878375675135 0.0027380476095219 0.0023129625925185 0.0013877775555111 0.000462592518503701 0.0018378675735147 0.0012877575515103 0.0014752950590118 0.0021504300860172 0.00352570514102821 0.00323814762952591 0.0012877575515103 0.0013502700540108 0.00900180036007201 0.00651380276055211 0.0018378675735147 0.0028505701140228 0.011127225445089 0.00758901780356071 0.0025880176035207 0.000487597519503901 0.00391328265653131 0.0023504700940188 0.001375275055011 0.0022879575915183 0.00633876775355071 0.00782656531306261 0.0023129625925185 0.00512602520504101 0.00733896779355871 0.0100645129025805 0.00465093018603721 0.00666383276655331 0.0148279655931186 0.00962692538507702 0.00511352270454091 0.0010127025405081 0.00525105021004201 0.00758901780356071 0.00351320264052811 0.00446339267853571 0.00723894778955791 0.00798909781956391 0.0025755151030206 0.00650130026005201 0.0170909181836367 0.0131776355271054 0.00836417283456691 0.00325065013002601 0.0161657331466293 0.0100645129025805 0.00675135027005401 0.0017003400680136 0.00693888777755551 0.00651380276055211 0.0026630326065213 0.0002250450090018 0.0021379275855171 0.0002750550110022 0.000587617523504701 0.00325065013002601 0.00311312262452491 0.00798909781956391 0.0020879175835167 0.00456341268253651 0.00956441288257651 0.00782656531306261 0.0022129425885177 0.000637627525505101 0.00457591518303661 0.00323814762952591 0.0025755151030206 0.00321314262852571 0.00323814762952591 0.00457591518303661 0.0018378675735147 0.0023379675935187 0.00702640528105621 0.00693888777755551 0.0018378675735147 0.0020879175835167 0.00712642528505701 0.00525105021004201 0.0019753950790158 0.000600120024004801 0.0013002600520104 0.00391328265653131 0.0018378675735147 0.00378825765153031 0.00673884776955391 0.00956441288257651 0.0027380476095219 0.00582616523304661 0.0122774554910982 0.0161657331466293 0.00588867773554711 0.00755151030206041 0.0165783156631326 0.0148279655931186 0.00617623524704941 0.000887677535507101 0.00712642528505701 0.011127225445089 0.00338817763552711 0.00347569513902781 0.00395079015803161 0.00311312262452491 0.0014002800560112 0.00730146029205841 0.0106521304260852 0.0170909181836367 0.00511352270454091 0.0026630326065213 0.0122774554910982 0.00733896779355871 0.00490098019603921 0.0019003800760152 0.00702640528105621 0.00900180036007201 0.0023379675935187 0.0010877175435087 0.0019253850770154 0.0021379275855171 0.000637627525505101 0.00453840768153631 0.00395079015803161 0.00723894778955791 0.0024004800960192 0.00361322264452891 0.00673884776955391 0.00633876775355071 0.0020129025805161 0.000412582516503301 0.00323814762952591 0.00352570514102821 0.0019503900780156 0.000637627525505101 0.000412582516503301 0.000637627525505101 0.000462592518503701 0.0011752350470094 0.0019003800760152 0.0017003400680136 0.0010127025405081 0.000850170034006801 0.000887677535507101 0.0010127025405081 0.000475095019003801 0.0003250650130026 0.000600120024004801 0.000487597519503901 0.000750150030006001 0.003000600120024 0.00361322264452891 0.00456341268253651 0.0016878375675135 0.00331316263252651 0.0026630326065213 0.00325065013002601 0.0027130426085217 0.00472594518903781 0.00755151030206041 0.00666383276655331 0.00332566513302661 0.000850170034006801 0.0020879175835167 0.0028505701140228 0.0022254450890178 0.0026505301060212 0.00453840768153631 0.00325065013002601 0.0024254850970194 0.00332566513302661 0.00730146029205841 0.00650130026005201 0.00570114022804561 0.00331316263252651 0.00582616523304661 0.00512602520504101 0.00450090018003601 0.0011752350470094 0.0023379675935187 0.0013502700540108 0.001875375075015 0.0003000600120024 0.0010877175435087 0.0002250450090018 0.000537607521504301 0.0026505301060212 0.00347569513902781 0.00446339267853571 0.0022754550910182 0.003000600120024 0.00378825765153031 0.0022879575915183 0.001125225045009 0.000637627525505101 0.00321314262852571 0.0021504300860172 0.002125425085017 sim1_galaxy +NODE_13_length_39994_cov_62.9723_ID_25 0.680177026553983 62.9723 0.000462604085919332 0.00111275036883299 0.00120027006076367 0.000650146282913656 0.00233802605586257 0.00235052886899552 0.00332574829336601 0.00213798104573529 0.00207546698007052 0.00327573704083419 0.00376334675301943 0.00103773349003526 0.000550123777850016 0.00118776724763072 0.00126278412642845 0.000475106899052287 0.00133780100522618 0.00133780100522618 0.00262559075792053 0.001662874146683 0.00470105773799105 0.00390087769748193 0.00650146282913656 0.00441349303593308 0.00372583831362056 0.00510114775824561 0.00416343677327399 0.00207546698007052 0.000775174414243205 0.00133780100522618 0.00277562451551599 0.00103773349003526 0.00201295291440574 0.0017753994648796 0.00178790227801255 0.00170038258608187 0.00575129404115926 0.00563876872296267 0.007764246955565 0.00297566952564327 0.00367582706108875 0.00816433697581956 0.00445100147533195 0.00441349303593308 0.00078767722737616 0.00178790227801255 0.00121277287389663 0.00213798104573529 0.000237553449526143 0.000750168787977295 0.00071266034857843 0.000400090020254557 0.00295066389937736 0.00311320047010577 0.00543872371283539 0.00170038258608187 0.00172538821234778 0.00372583831362056 0.00331324548023305 0.001662874146683 0.000225050636393188 0.00150033757595459 0.00110024755570003 0.000650146282913656 0.000462604085919332 0.00425095646520467 0.00316321172263759 0.00110024755570003 0.0011377559950989 0.00795178915255933 0.00371333550048761 0.00121277287389663 0.00210047260633643 0.0114775824560526 0.0112400290065265 0.00277562451551599 0.000512615338451152 0.0050761421319797 0.00357580455602511 0.00126278412642845 0.00205046135380461 0.00523867870270811 0.011915180915706 0.00331324548023305 0.00387587207121602 0.00250056262659098 0.00665149658673201 0.00445100147533195 0.00772673851616614 0.0134530269310595 0.0113275486984572 0.00416343677327399 0.00078767722737616 0.00490110274811833 0.0112400290065265 0.00376334675301943 0.00547623215223425 0.00870195794053662 0.00773924132929909 0.00543872371283539 0.00592633342502063 0.0197919531894676 0.0131029481633368 0.007764246955565 0.00416343677327399 0.0193543547298142 0.00665149658673201 0.00650146282913656 0.00183791353054437 0.00618889250081268 0.00371333550048761 0.00332574829336601 6.25140656647746e-05 0.00278812732864895 0.000325073141456828 0.00071266034857843 0.00147533194968868 0.00338826235903078 0.00773924132929909 0.00178790227801255 0.00362581580855693 0.00942712110224801 0.011915180915706 0.00262559075792053 0.000250056262659098 0.00553874621789903 0.00316321172263759 0.00120027006076367 0.00235052886899552 0.00456352679352854 0.00553874621789903 0.00150033757595459 0.00247555700032507 0.00676402190492861 0.00618889250081268 0.00178790227801255 0.0021629866720012 0.0059888474906854 0.00490110274811833 0.00133780100522618 0.000812682853642069 0.00612637843514791 0.0050761421319797 0.00118776724763072 0.00395088895001375 0.00493861118751719 0.00942712110224801 0.00372583831362056 0.00973969143057188 0.00847690730414343 0.0193543547298142 0.00816433697581956 0.00887699732439799 0.0191543097196869 0.0134530269310595 0.00510114775824561 0.001387812257758 0.0059888474906854 0.0114775824560526 0.00327573704083419 0.00410092270760921 0.00403840864194444 0.00338826235903078 0.00311320047010577 0.00651396564226951 0.0137781000725163 0.0197919531894676 0.00563876872296267 0.00260058513165462 0.00847690730414343 0.00250056262659098 0.00390087769748193 0.00245055137405916 0.00676402190492861 0.00795178915255933 0.00235052886899552 0.000375084393988647 0.00217548948513416 0.00278812732864895 0.000750168787977295 0.00270060763671826 0.00403840864194444 0.00870195794053662 0.0017753994648796 0.00250056262659098 0.00493861118751719 0.00523867870270811 0.00133780100522618 0.000225050636393188 0.00456352679352854 0.00425095646520467 0.00111275036883299 0.000150033757595459 0.000225050636393188 0.000250056262659098 0.000225050636393188 0.00101272786376935 0.00245055137405916 0.00183791353054437 0.00078767722737616 0.000937710984971619 0.001387812257758 0.00078767722737616 0.000775174414243205 0.000225050636393188 0.000812682853642069 0.000512615338451152 0.000550123777850016 0.00163786852041709 0.00250056262659098 0.00362581580855693 0.00172538821234778 0.00420094521267285 0.00260058513165462 0.00416343677327399 0.00367582706108875 0.00702658098072066 0.00887699732439799 0.00772673851616614 0.00372583831362056 0.000937710984971619 0.0021629866720012 0.00210047260633643 0.00207546698007052 0.00236303168212848 0.00270060763671826 0.00147533194968868 0.00295066389937736 0.00385086644495011 0.00651396564226951 0.00592633342502063 0.00575129404115926 0.00420094521267285 0.00973969143057188 0.00387587207121602 0.00470105773799105 0.00101272786376935 0.00247555700032507 0.0011377559950989 0.00233802605586257 0.000175039383861369 0.000375084393988647 6.25140656647746e-05 0.000237553449526143 0.00236303168212848 0.00410092270760921 0.00547623215223425 0.00201295291440574 0.00163786852041709 0.00395088895001375 0.00205046135380461 0.00133780100522618 0.000150033757595459 0.00235052886899552 0.000462604085919332 0.000462604085919332 sim1_galaxy diff -r 000000000000 -r 68a3648c7d91 fosm_cluster/test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fosm_cluster/test Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,20 @@ +"x" +"NODE_2_length_40000_cov_63.1617_ID_3" 9 +"NODE_3_length_40000_cov_63.0619_ID_5" 6 +"NODE_10_length_39995_cov_63.156_ID_19" 1 +"NODE_12_length_39995_cov_63.3136_ID_23" 7 +"NODE_18_length_37282_cov_67.8523_ID_35" 8 +"NODE_5_length_39999_cov_63.277_ID_9" 5 +"NODE_16_length_39898_cov_63.337_ID_31" 4 +"NODE_1_length_40000_cov_62.8079_ID_1" 7 +"NODE_17_length_39819_cov_63.5061_ID_33" 6 +"NODE_9_length_39996_cov_63.0617_ID_17" 9 +"NODE_7_length_39998_cov_63.2738_ID_13" 8 +"NODE_6_length_39999_cov_63.2183_ID_11" 1 +"NODE_8_length_39997_cov_62.8364_ID_15" 3 +"NODE_19_length_33054_cov_76.4496_ID_37" 2 +"NODE_15_length_39993_cov_62.9495_ID_29" 5 +"NODE_4_length_39999_cov_63.2942_ID_7" 8 +"NODE_14_length_39993_cov_63.1132_ID_27" 5 +"NODE_11_length_39995_cov_63.1687_ID_21" 3 +"NODE_13_length_39994_cov_62.9723_ID_25" 9 diff -r 000000000000 -r 68a3648c7d91 mytrimmer/aaa.fplot --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/aaa.fplot Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,9 @@ +#-- forward hits sorted by %sim +0 0 0 +0 0 0 + + +1 1 100 +2100 2100 100 + + diff -r 000000000000 -r 68a3648c7d91 mytrimmer/aaa.gp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/aaa.gp Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,21 @@ +set terminal png tiny size 800,800 +set output "aaa.png" +set size 1,1 +set grid +unset key +set border 15 +set tics scale 0 +set xlabel "Assembly" +set ylabel "Assembly_22" +set format "%.0f" +set mouse format "%.0f" +set mouse mouseformat "[%.0f, %.0f]" +set mouse clipboardformat "[%.0f, %.0f]" +set xrange [1:2100] +set yrange [1:2100] +set style line 1 lt 1 lw 3 pt 6 ps 1 +set style line 2 lt 3 lw 3 pt 6 ps 1 +set style line 3 lt 2 lw 3 pt 6 ps 1 +plot \ + "aaa.fplot" title "FWD" w lp ls 1, \ + "aaa.rplot" title "REV" w lp ls 2 diff -r 000000000000 -r 68a3648c7d91 mytrimmer/aaa.png Binary file mytrimmer/aaa.png has changed diff -r 000000000000 -r 68a3648c7d91 mytrimmer/aaa.rplot --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/aaa.rplot Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,5 @@ +#-- reverse hits sorted by %sim +0 0 0 +0 0 0 + + diff -r 000000000000 -r 68a3648c7d91 mytrimmer/f1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/f1 Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,31 @@ +>Assembly +AGAATTCGTCTTGCTCTATTCACCCTTACTTTTCTTCTTGCCCGTTCTCTTTCTTAGTATGAATCCAGTA +TGCCTGCCTGTAATTGTTGCGCCCTACCTCTTTTGGCTGGCGGCTATTGCCGCCTCGTGTTTCACGGCCT +CAGTTAGTACCGTTGTGACCGCCACCGGCTTGGCCCTCTCACTTCTACTCTTGGCAGCAGTGGCCAGCTC +ATATGCCGCTGCACAAAGGAAACTGCTGACACCGGTGACAGTGCTTACTGCGGTTGTCACTTGTGAGTAC +ACACGCACCATTTACAATGCATGATGTTCGTGAGATTGATCTGTCTCTAACAGTTCACTTCCTCTGCTTT +TCTCCTCAGTCTTTGCAATTTGCCTAACATGGAGGATTGAGGACCCACCTTTTAATTCTCTTCTGTTTGC +ATTGCTGGCCGCAGCTGGCGGACTACAAGGCATTTACGGTTAGTGTGCCTCTGTTATGAAATGCAGGTTT +GACTTCATATGTATGCCTTGGCATGACGTCAACTTTACTTTTATTTCAGTTCTGGTGATGCTTGTGCTCC +TGATACTAGCGTACAGAAGGAGATGGCGCCGTTTGACTGTTTGTGGCGGCATCATGTTTTTGGCATGTGT +ACTTGTCCTCATCGTCGACGCTGTTTTGCAGCTGAGTCCCCTCCTTGGAGCTGTAACTGTGGTTTCCATG +ACGCTGCTGCTACTGGCTTTCGTCCTCTGGCTCTCTTCGCCAGGGGGCCTAGGTACTCTTGGTGCAGCCC +TTTTAACATTGGCAGCAGGTAAGCCACACGTGTGACATTGCTTGCCTTTTTGCCACATGTTTTCTGGACA +CAGGACTAACCATGCCATCTCTGATTATAGCTCTGGCACTGCTAGCGTCACTGATTTTGGGCACACTTAA +CTTGACTACAATGTTCCTTCTCATGCTCCTATGGACACTTGGTAAGTTTTCCCTTCCTTTAACTCATTAC +TTGTTCTTTTGTAATCGCAGCTCTAACTTGGCATCTCTTTTACAGTGGTTCTCCTGATTTGCTCTTCGTG +CTCTTCATGTCCACTGAGCAAGATCCTTCTGGCACGACTGTTCCTATATGCTCTCGCACTCTTGTTGCTA +GCCTCCGCGCTAATCGCTGGTGGCAGTATTTTGCAAACAAACTTCAAGAGTTTAAGCAGCACTGAATTTA +TACCCAGTGAGTATCTATTTGTTACTCCTGTTTAGTTGAAGAAAACAAGCTATTGGATTGTAACACACAT +TTTACGCTTTGTTCCTTAGATTTGTTCTGCATGTTATTACTGATTGTCGCTGGCATACTCTTCATTCTTG +CTATCCTGACCGAATGGGGCAGTGGAAATAGAACATACGGTCCAGTTTTTATGTGCCTCGGTGGCCTGCT +CACCATGGTAGCCGGCGCTGTGTGGCTGACGGTGATGTCTAACACGCTTTTGTCTGCCTGGATTCTTACA +GCAGGATTCCTGATTTTCCTCATTGGTAAGTGTGACACCAACAGGTGTTGCCTTGTTATGTCACCGTTCT +GACACATGACTTACATGGGTTTGGCTTTTGTAGGCTTTGCCCTCTTTGGGGTCATTAGATGCTGCCGCTA +CTGCTGCTACTACTGCCTTACACTGGAAAGTGAGGAGCGCCCACCGACCCCATATCGCAACACTGTATAA +AGGTAAGTATTATTAAATTTTAGAGACACTATCACGTGTAACTTGACGTGCAAGGATGGAAGAGAGGGGC +AGGGAAACGCAAATGCCGGTTGCCCGGTATGGGGGCCCGTTTATTATGGTAAGGCTCTTCGGGCAAGATG +GAGAGGCAAACATACAGGAGGAAAGGCTATATGAGCTACTCTCTGACCCACGCTCCGCGCTCGGCCTAGA +CCCGGGGCCCCTGATTGCTGAGAACCTGCTGCTAGTGGCGCTGCGTGGCACCAACAACGATCCCAGGCCT +CAGCGTCAGGAGAGGGCCAGAGAACTGGCCCTCGTTGGCATTCTACTAGGAAACGGCGAGCAGGGTGAAC +ACTTGGGCACGGAGAGTGCCCTGGAGGCCTCAGGCAACAACTATGTGTATGCCTACGGACCAGACTGGAT diff -r 000000000000 -r 68a3648c7d91 mytrimmer/f2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/f2 Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,31 @@ +>Assembly_22 +AGAATTCGTCTTGCTCTATTCACCCTTACTTTTCTTCTTGCCCGTTCTCTTTCTTAGTATGAATCCAGTA +TGCCTGCCTGTAATTGTTGCGCCCTACCTCTTTTGGCTGGCGGCTATTGCCGCCTCGTGTTTCACGGCCT +CAGTTAGTACCGTTGTGACCGCCACCGGCTTGGCCCTCTCACTTCTACTCTTGGCAGCAGTGGCCAGCTC +ATATGCCGCTGCACAAAGGAAACTGCTGACACCGGTGACAGTGCTTACTGCGGTTGTCACTTGTGAGTAC +ACACGCACCATTTACAATGCATGATGTTCGTGAGATTGATCTGTCTCTAACAGTTCACTTCCTCTGCTTT +TCTCCTCAGTCTTTGCAATTTGCCTAACATGGAGGATTGAGGACCCACCTTTTAATTCTCTTCTGTTTGC +ATTGCTGGCCGCAGCTGGCGGACTACAAGGCATTTACGGTTAGTGTGCCTCTGTTATGAAATGCAGGTTT +GACTTCATATGTATGCCTTGGCATGACGTCAACTTTACTTTTATTTCAGTTCTGGTGATGCTTGTGCTCC +TGATACTAGCGTACAGAAGGAGATGGCGCCGTTTGACTGTTTGTGGCGGCATCATGTTTTTGGCATGTGT +ACTTGTCCTCATCGTCGACGCTGTTTTGCAGCTGAGTCCCCTCCTTGGAGCTGTAACTGTGGTTTCCATG +ACGCTGCTGCTACTGGCTTTCGTCCTCTGGCTCTCTTCGCCAGGGGGCCTAGGTACTCTTGGTGCAGCCC +TTTTAACATTGGCAGCAGGTAAGCCACACGTGTGACATTGCTTGCCTTTTTGCCACATGTTTTCTGGACA +CAGGACTAACCATGCCATCTCTGATTATAGCTCTGGCACTGCTAGCGTCACTGATTTTGGGCACACTTAA +CTTGACTACAATGTTCCTTCTCATGCTCCTATGGACACTTGGTAAGTTTTCCCTTCCTTTAACTCATTAC +TTGTTCTTTTGTAATCGCAGCTCTAACTTGGCATCTCTTTTACAGTGGTTCTCCTGATTTGCTCTTCGTG +CTCTTCATGTCCACTGAGCAAGATCCTTCTGGCACGACTGTTCCTATATGCTCTCGCACTCTTGTTGCTA +GCCTCCGCGCTAATCGCTGGTGGCAGTATTTTGCAAACAAACTTCAAGAGTTTAAGCAGCACTGAATTTA +TACCCAGTGAGTATCTATTTGTTACTCCTGTTTAGTTGAAGAAAACAAGCTATTGGATTGTAACACACAT +TTTACGCTTTGTTCCTTAGATTTGTTCTGCATGTTATTACTGATTGTCGCTGGCATACTCTTCATTCTTG +CTATCCTGACCGAATGGGGCAGTGGAAATAGAACATACGGTCCAGTTTTTATGTGCCTCGGTGGCCTGCT +CACCATGGTAGCCGGCGCTGTGTGGCTGACGGTGATGTCTAACACGCTTTTGTCTGCCTGGATTCTTACA +GCAGGATTCCTGATTTTCCTCATTGGTAAGTGTGACACCAACAGGTGTTGCCTTGTTATGTCACCGTTCT +GACACATGACTTACATGGGTTTGGCTTTTGTAGGCTTTGCCCTCTTTGGGGTCATTAGATGCTGCCGCTA +CTGCTGCTACTACTGCCTTACACTGGAAAGTGAGGAGCGCCCACCGACCCCATATCGCAACACTGTATAA +AGGTAAGTATTATTAAATTTTAGAGACACTATCACGTGTAACTTGACGTGCAAGGATGGAAGAGAGGGGC +AGGGAAACGCAAATGCCGGTTGCCCGGTATGGGGGCCCGTTTATTATGGTAAGGCTCTTCGGGCAAGATG +GAGAGGCAAACATACAGGAGGAAAGGCTATATGAGCTACTCTCTGACCCACGCTCCGCGCTCGGCCTAGA +CCCGGGGCCCCTGATTGCTGAGAACCTGCTGCTAGTGGCGCTGCGTGGCACCAACAACGATCCCAGGCCT +CAGCGTCAGGAGAGGGCCAGAGAACTGGCCCTCGTTGGCATTCTACTAGGAAACGGCGAGCAGGGTGAAC +ACTTGGGCACGGAGAGTGCCCTGGAGGCCTCAGGCAACAACTATGTGTATGCCTACGGACCAGACTGGAT diff -r 000000000000 -r 68a3648c7d91 mytrimmer/out.fplot --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/out.fplot Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,9 @@ +#-- forward hits sorted by %sim +0 0 0 +0 0 0 + + +1 1 100 +2100 2100 100 + + diff -r 000000000000 -r 68a3648c7d91 mytrimmer/out.gp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/out.gp Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,21 @@ +set terminal png tiny size 800,800 +set output "out.png" +set size 1,1 +set grid +unset key +set border 15 +set tics scale 0 +set xlabel "Assembly" +set ylabel "Assembly_22" +set format "%.0f" +set mouse format "%.0f" +set mouse mouseformat "[%.0f, %.0f]" +set mouse clipboardformat "[%.0f, %.0f]" +set xrange [1:2100] +set yrange [1:2100] +set style line 1 lt 1 lw 3 pt 6 ps 1 +set style line 2 lt 3 lw 3 pt 6 ps 1 +set style line 3 lt 2 lw 3 pt 6 ps 1 +plot \ + "out.fplot" title "FWD" w lp ls 1, \ + "out.rplot" title "REV" w lp ls 2 diff -r 000000000000 -r 68a3648c7d91 mytrimmer/out.png Binary file mytrimmer/out.png has changed diff -r 000000000000 -r 68a3648c7d91 mytrimmer/out.rplot --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/out.rplot Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,5 @@ +#-- reverse hits sorted by %sim +0 0 0 +0 0 0 + + diff -r 000000000000 -r 68a3648c7d91 mytrimmer/test.delta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/test.delta Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,5 @@ +/home/inmare/galaxy/tools/mytrimmer/f1 /home/inmare/galaxy/tools/mytrimmer/f2 +NUCMER +>Assembly Assembly_22 2100 2100 +1 2100 1 2100 0 0 0 +0 diff -r 000000000000 -r 68a3648c7d91 mytrimmer/trim.seqs.C.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/trim.seqs.C.cpp Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +int main (int argc, char *argv[]); +int eval_quality(string & qstring,int lencutoff,int errors); + +int main (int argc, char *argv[]) +{ + if (argc==9) + { + unsigned long inseq=0; + unsigned long outseq=0; + unsigned long pfile=0; + ifstream infile; + ifstream infileP; + string file=argv[1]; + string filep=argv[2]; + ofstream outfile; + ofstream outfilep; + ofstream outfileunm; + string outname=(argv[6]); + string outnamep=(argv[7]); + string outunm=(argv[8]); + outfile.open(outname.c_str()); + outfilep.open(outnamep.c_str()); + outfileunm.open(outunm.c_str()); + int cutoff=atoi(argv[3]); + int errors=atoi(argv[4]); + int discard=atoi(argv[5]); + infile.open(file.c_str()); + infileP.open(filep.c_str()); + if (!infile) + { + cerr << "Couldn't open "<< infile << "\n"; + exit(1); + } + if (!infileP) + { + cerr << "Couldn't open "<< outfile << "\n"; + exit(1); + } + map Min; + map Max; + if (infile.is_open() && infileP.is_open()){ + string header; + string seq; + string seqp; + string qscore; + string qscorep; + while (!infile.eof() && !infileP.eof()) + { + getline(infile,header); + if (header!="") + { + //read headers + sequences + getline(infile,seq); // + getline(infileP,seqp); + getline(infileP,seqp);// + // + //cout <<"A:" << seq << "\n" << "B:" << seqp << "\n"; + inseq+=seq.length(); + inseq+=seqp.length(); + + + //read Qscores + getline(infile,qscore); + getline(infile,qscore);// + getline(infileP,qscorep); + getline(infileP,qscorep);// + if (discard >0 && discard<=seq.length()) + { + seq=seq.substr(discard-1,seq.length()-discard); + seqp=seqp.substr(discard-1,seqp.length()-discard); + qscore=qscore.substr(discard-1,qscore.length()-discard); + qscorep=qscorep.substr(discard-1,qscorep.length()-discard); + } + if (qscore.length()!=seq.length()) + { + cerr << "Invalid fastq\n" << seq << "\n" << qscore << "\n"; + exit(1); + } + + if (qscorep.length()!=seqp.length()) + { + cerr << "Invalid fastq\n" << seqp << "\n" << qscorep << "\n"; + exit(1); + } + + + // + //cout << qscore << "\n" << qscorep << "\n"; + + //eval Qscores + int p=eval_quality(qscore,cutoff,errors); + int pp=eval_quality(qscorep,cutoff,errors); + + string Qheader=header; + if (*(Qheader.end()-2)=='/') // togli gli slash + { + Qheader.replace(Qheader.end()-2,Qheader.end(),""); + } + string Oheader=Qheader; + Oheader[0]='+'; + if (p>0) + { + seq=seq.substr(0,p); + qscore=qscore.substr(0,p); + if (pp>0) + { + seqp=seqp.substr(0,pp); + qscorep=qscorep.substr(0,pp); + outseq+=seqp.length(); + outseq+=seq.length(); + outfile << Qheader <<"/1" << "\n" << seq << "\n" << Oheader <<"/1" << "\n" << qscore << "\n"; + outfilep << Qheader <<"/2" << "\n" << seqp << "\n" << Oheader<<"/2" << "\n" << qscorep << "\n"; + }else{ + outseq+=seq.length(); + outfileunm << Qheader <<"/1" << "\n" << seq << "\n" << Oheader<<"/1" << "\n" << qscore << "\n"; + } + }else if(p==0 && pp>0){ + seqp=seqp.substr(0,pp); + qscorep=qscorep.substr(0,pp); + outseq+=seqp.length(); + outfileunm << Qheader <<"/2" << "\n" << seqp << "\n" << Oheader <<"/2" << "\n" << qscorep << "\n"; + } + } + } + + }else{ + cerr << "could not open files\n"; + } + //cerr << "Input "<< inseq << " bases.\nOutput " << outseq << " bases.\n"; + }else{ + + cout << "input: \n"; + } +} + +int eval_quality(string & qstring,int lencutoff,int errors) +{ + int Nminori10=0; + int Nminori20=0; + int Nmaggiori25=0; + int l10=0; + int l20=0; + int p=0; + double total_perr=0; + string::iterator pos; + for (pos=qstring.begin();pos!=qstring.end();pos++) + { + int punteggio=static_cast (*pos)-33; + if (punteggio>=1 && punteggio <=41) + { + double exp=(double)punteggio/-10; + total_perr+=pow(10,exp); + if (p>0) + { + if (punteggio<=10) //count qscores <=10 + { + l10++; + Nminori20++; + Nminori10++; + }else if (punteggio<=20){ // count Qscores <=20 + l20++; + Nminori10=0; + Nminori20++; + }else if (punteggio>20){ + Nminori20=0; + Nminori10=0; + if (punteggio>=25) + { + Nmaggiori25++; + } + } + } + if (Nminori10>=10) // 3 or more consecutives very low quality bases + { + p-=Nminori10; + break; + }else if (Nminori20>=15){ // 5 or more consecutives low quality bases + p-=Nminori20; + break; + } + if (total_perr>=(double)errors) // sum of per base error probability when 5e-2 5 wrong base calls in 100 + { + //cout << p << " " << total_perr << " " << errors << "\n"; + break; + } + p++; + }else{ + cerr << "Invalid Qscore" << *pos << "=" << punteggio << "\n"; + exit(1); + } + } + double prop_gr_25=(double)Nmaggiori25/(double)(p); + if (prop_gr_25>=0.35 && p>=lencutoff && l20 <= p*0.2 && l10 <= p*0.1) // if 50% of Qscores are >= 25,size is >= cutoff a + { + return p; + }else{ + return 0; + } +} diff -r 000000000000 -r 68a3648c7d91 mytrimmer/trimPE Binary file mytrimmer/trimPE has changed diff -r 000000000000 -r 68a3648c7d91 mytrimmer/trimPE.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytrimmer/trimPE.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,27 @@ + + trimming tool for Illumina PE data + /home/inmare/galaxy/tools/mytrimmer/trimPE $f1 $f2 $l $ne $disc $o1 $o2 $o3 + "approved by the boss" + + + + + + + + + + + + + + + Our custom script for quality trimming implements strict quality filters based on the provided base call quality scores. Reads are iteratively trimmed from the 3' end until all of the following conditions were satisfied: +1. the median quality score (Qscore) of upstream bases is ≥15 +2. less than 10 bases with Qscore ≤10 and less than 15 bases with Qscore ≤ 20 are present in the upstream sequence +3. the cumulative error probability in the upstream region is below a user defined cutoff +4. the length of the trimmed read exceeds a user defined cutoff + +The program is designed to work with paired end sequencing files only, the output consists in 3 distinct files containing the pairs where both mate passed the filters (output to 2 separate files) and a third file containing all the singleton reads, for which the corresponding mate did not pass the quality filters. + + diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/Active_site/as_search.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/Active_site/as_search.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,399 @@ +package Bio::Pfam::Active_site::as_search; + +use strict; +use warnings; + +use Bio::SeqFeature::Generic; +use Bio::SimpleAlign; +use Bio::Pfam::Scan::Seq; + +=head2 find_as + + Title : find_as + Usage : find_as($as_aln, $as_res, $seq_id, $seq_se, $seq_region, $family, $hmm_file) + Function: finds active sites in a query sequence which + has a match to a Pfam active site family + + Returns : An array reference of active site postions + Args : Alignment object of active site sequences, hash of arrays containing seq ids => active site positions, + start-end sequence in the format "3-50", sequence region, family, file containing all Pfam models + +=cut + +sub find_as { + my ($as_aln, $as_res, $seq_id, $seq_se, $seq_region, $family, $hmm_file) = @_; + + + system("hmmfetch $hmm_file $family > /tmp/hmm.$$") and die "FATAL: Problem running [hmmfetch $hmm_file $family > /tmp/hmm.$$]\n"; + + $seq_id = "Query_".$seq_id; + + my $fasta; + foreach my $seq ($as_aln->each_seq) { + my $s = $seq->seq; + $s =~ s/[\-\.]//g; #Remove gaps + + $fasta .= ">" . $seq->id . "/" . $seq->start . "-" . $seq->end . "\n$s\n"; + } + $fasta .= ">$seq_id/$seq_se\n$seq_region"; + open(SEQ, ">/tmp/seqs.$$") or die "Couldn't open file seqs.$$ $!\n"; + print SEQ $fasta; + close SEQ; + + + open(OUT, "hmmalign --outformat Pfam /tmp/hmm.$$ /tmp/seqs.$$ |") or die "Couldn't open fh to hmmalign $!\n"; + + my $aln = new Bio::SimpleAlign; + my ($name, $start, $end, $seq); + while() { + if( /^(\S+)\/(\d+)-(\d+)\s+(\S+)\s*/ ) { + $name = $1; + $start = $2; + $end = $3; + $seq = $4; + + $aln->add_seq(Bio::Pfam::Scan::Seq->new('-seq'=>$seq, '-id'=>$name, '-start'=>$start, '-end'=>$end, '-type'=>'aligned')); + } + } + close OUT; + + + unlink "/tmp/seqs.$$"; + unlink "/tmp/hmm.$$"; + + #Locate exp as in fam + _exp_as($aln, $as_res); + + #Store as patterns + my $pattern_aln = new Bio::SimpleAlign; + _pattern_info($aln, $pattern_aln); + #find pred as + my $array_ref = _add_pred_as($aln, $pattern_aln, $seq_id); + return $array_ref; +} + +=head2 _exp_as + + Title : _exp_as + Usage : _exp_as($aln, $hash_of_arrays) + Function : Adds experimental active site data to alignment object + Returns : Nothing, populates the alignment object with active site residue info + Args : alignment object + +=cut + +sub _exp_as { + + my ($aln, $as_res) = @_; + + + foreach my $seq ($aln->each_seq) { + + foreach my $pos ( @{$as_res->{$seq->id}}) { + + if($pos >= $seq->start and $pos <= $seq->end) { #Feature is in the alignment + + #store column position for seq + my $col = $aln->column_from_residue_number($seq->id, $pos); + + #add feature to seq + my $aa .= uc substr($seq->seq(), $col-1, 1); + + my $feat = new Bio::SeqFeature::Generic ( -display_name => 'experimental', + -primary => $aa, + -start => $col); + + + + $seq->add_SeqFeature($feat); + } + + } + } +} + + +=head2 _pattern_info + + Title : _pattern_info + Usage : _pattern_info($aln_object, $aln_object) + Function : Takes an alignment and extracts active site patterns into a second alignment + Returns : Nothing, populates a second alignment object with active site seqences + Args : alignment object, empty alignment object + +=cut + + +sub _pattern_info { + my ($aln, $pattern_aln) = @_; + my (%pat_col_seq); + + foreach my $seq ( $aln->each_seq() ) { + + next unless($seq->all_SeqFeatures()); + my ($pat, $col); + foreach my $feat ( sort {$a->start <=> $b->start } $seq->all_SeqFeatures() ) { + $pat .= $feat->primary_tag(); #HEK + $col .= $feat->start() . " "; #33 44 55 + } + + unless(exists($pat_col_seq{"$pat:$col"})) { + $pattern_aln->add_seq($seq); + $pat_col_seq{"$pat:$col"}=1; + } + + } +} + + + +=head2 _add_pred_as + + Title : _add_pred_as + Usage : _add_pred_as($aln_object, $aln_object) + Function : Predicts active sites based on known active site data + Returns : array of active site pos + Args : alignment, alignment of known active sites + +=cut + + + + +sub _add_pred_as { + my ($aln, $pattern_aln, $query_seq_id) = @_; + my $num_seq=0; + my ($query_seq, @as_res); + + #locate query seq + foreach my $seq ( $aln->each_seq() ) { + if($seq->id eq $query_seq_id) { + $query_seq = $seq; + last; + } + } + die "FATAL: Can't locate query sequence [$query_seq_id] in active site alignement\n" unless($query_seq); + + + my $aligns_with = new Bio::SimpleAlign; + foreach my $seq1 ( $pattern_aln->each_seq() ) { + + + #See if all active site residues from seq1 exist in query seq + my $mismatch; + foreach my $feat ( sort {$a->start <=> $b->start } $seq1->all_SeqFeatures() ) { + + my $aa1 = $feat->primary_tag(); + my $col = $feat->start(); + + my $aa2 = uc substr($query_seq->seq, $col-1, 1); + unless($aa1 eq $aa2) { + $mismatch = 1; + last; + + } + + } + + #Store seq1 if all active site residues are present in seq1 + unless($mismatch) { + $aligns_with->add_seq($seq1); + } + } + + + + $num_seq = $aligns_with->num_sequences(); + return unless($num_seq); + my (%seq_to_remove, %seq_to_rem); #two hashes used to collect seq that need removing + + + #if query seq matches more than one pattern remove subpatterns and any patterns that overlap + + #first remove sub pat + if($num_seq>1) { + foreach my $sequence1 ($aligns_with->each_seq() ) { + foreach my $sequence2 ($aligns_with->each_seq() ) { + + next if($sequence1 eq $sequence2); + + my (%hash1, %hash2, $num_1, $num_2, %smaller, %larger); + #collect column positions + foreach my $feat1 ($sequence1->all_SeqFeatures() ) { + $hash1{$feat1->start} =1; + $num_1++; + } + foreach my $feat2 ($sequence2->all_SeqFeatures() ) { + $hash2{$feat2->start} =1; + $num_2++; + } + + + #see if one is a subpattern of the other + my $diff=0; + unless($num_1 eq $num_2) { + + my $remove_seq; + + if($num_1 > $num_2) { + %smaller = %hash2; + %larger = %hash1; + $remove_seq = $sequence2; + + } + else { + %smaller = %hash1; + %larger = %hash2; + $remove_seq = $sequence1; + } + + + foreach my $key (keys %smaller) { + $diff = 1 unless(exists($larger{$key})); #diff is true if it is not a subpattern + } + + + $seq_to_rem{$remove_seq}= $remove_seq unless($diff) ; + next unless($diff); + } + } + + } + } + + #Now remove any patterns which need removing + foreach my $remove (keys %seq_to_rem) { + $aligns_with->remove_seq($seq_to_rem{$remove}); + } + + + unless($num_seq >=1) { + die "FATAL: All sequences that align with active site sequences have been removed - this should never happen\n"; + } + + + + $num_seq = $aligns_with->num_sequences(); + #and then any patterns that overlap + if($num_seq>1) { + + foreach my $sequence1 ($aligns_with->each_seq() ) { + + foreach my $sequence2 ($aligns_with->each_seq() ) { + next if($sequence1 eq $sequence2); + + my ($seq1_st, $seq1_en, $seq2_st, $seq2_en); + + my (%hash1, %hash2, $num_1, $num_2, %smaller, %larger); + + #see if patterns overlap - find pattern start ends and collect column positions + foreach my $feat1 ($sequence1->all_SeqFeatures() ) { + + $seq1_st = $feat1->start() if(!$seq1_st or $feat1->start() < $seq1_st); + $seq1_en = $feat1->start() if(!$seq1_en or $feat1->start() > $seq1_en); + } + + foreach my $feat2 ($sequence2->all_SeqFeatures() ) { + + $seq2_st = $feat2->start() if(!$seq2_st or $feat2->start() < $seq2_st); + $seq2_en = $feat2->start() if(!$seq2_en or $feat2->start() > $seq2_en); + } + + #then see if patterns overlap - remove sequence with pattern of least identity + if(($seq1_st >= $seq2_st and $seq1_st <= $seq2_en) or ($seq2_st >= $seq1_st and $seq2_st <= $seq1_en)) { + my $remove = _identity($query_seq, $sequence1, $sequence2); + $seq_to_remove{$remove}= $remove; + } + } + + } + } + + #Now remove any patterns which need removing + foreach my $remove (keys %seq_to_remove) { + $aligns_with->remove_seq($seq_to_remove{$remove}); + $num_seq = $aligns_with->num_sequences(); + last if($num_seq eq "1"); #just in case the % identities are identical + } + + + $num_seq = $aligns_with->num_sequences(); + unless($num_seq >=1) { + die "FATAL: All sequences that align with active site sequences have been removed - this should never happen\n"; + } + + + + #Add features to seq + foreach my $sequence ($aligns_with->each_seq() ) { + foreach my $feat ($sequence->all_SeqFeatures() ) { + + my $actual_pos = $query_seq->location_from_column($feat->start); + $actual_pos = $actual_pos->start(); + + + push(@as_res, $actual_pos); + + + + } + } + return \@as_res + +} + + +=head2 _identity + + Title : _identity + Usage : _identity($sequence1 , $sequence2, $sequence3) + Function : Identifies seq with lowest % identity to sequence1 + Returns : The sequence which has the lowest % id to sequence 1 + Args : sequence1, sequence2, sequence3. + +=cut + + +sub _identity { + my $seq1 = shift; + my @aligns_with = @_; + my $lower_identity=100; + my $lower_identity_seq; + foreach my $s (@aligns_with) { + my $tmp_aln = new Bio::SimpleAlign; + $tmp_aln->add_seq($s); + $tmp_aln->add_seq($seq1); + + my $identity = $tmp_aln->percentage_identity(); + if($identity < $lower_identity) { + $lower_identity = $identity; + $lower_identity_seq = $s; + } + + } + return $lower_identity_seq; +} + +=head1 COPYRIGHT + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk), + Jaina Mistry (jm14@sanger.ac.uk) + +This is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . + +=cut + +1; diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/HMM/HMM.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/HMM/HMM.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,266 @@ +# HMM.pm +# +# Author: finnr +# Maintainer: $Id: HMM.pm,v 1.1 2009-10-08 12:27:28 jt6 Exp $ +# Version: $Revision: 1.1 $ +# Created: Nov 24, 2008 +# Last Modified: $Date: 2009-10-08 12:27:28 $ +=head1 NAME + +Template - a short description of the class + +=cut + +package Bio::Pfam::HMM::HMM; + +=head1 DESCRIPTION + +A more detailed description of what this class does and how it does it. + +$Id: HMM.pm,v 1.1 2009-10-08 12:27:28 jt6 Exp $ + +=head1 COPYRIGHT + +File: HMM.pm + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) + + This is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + or see the on-line version at http://www.gnu.org/copyleft/gpl.txt + +=cut + +use strict; +use warnings; + +use Moose; +use Moose::Util::TypeConstraints; +use Carp; + +#------------------------------------------------------------------------------- + +=head1 METHODS + +=cut + + +subtype 'hmmVersion' + => as Str + => where { $_ =~ m/^HMMER3\/f\s+\[3\.\d+[ab](\d+)?\s+|\s+\[.*\]/ } + => message { "|$_| does not look like as HMMER3 version" }; + +subtype 'hmmName' + => as Str + => where { $_ =~ m/\S{1,15}/ } + => message { "|$_| does not look like Pfam name or SEED" }; + +subtype 'hmmAcc' + => as Str + => where { $_ =~ m/PF\d{5}/ } + => message { "|$_| does not look like Pfam accession" }; + + +subtype 'hmmAlpha' + => as Str + => where { $_ eq 'amino' or $_ eq 'nucleic' } + => message { "|$_| does not look like a HMMER3 alphabet" }; + + +subtype 'hmmMsvStats' + => as HashRef + => where { defined ($_->{mu}) and defined ($_->{lambda}) and ($_->{lambda} <= 0.8) and ($_->{lambda} >= 0.5) } + => message { "Mu |$_->{mu}| and lambda |$_->{lambda}| must be defined and lambda must be between 0.5 and 0.8" }; + + +subtype 'hmmViterbiStats' + => as HashRef + => where { defined ($_->{mu}) and defined ($_->{lambda}) and ($_->{lambda} <= 0.8) and ($_->{lambda} >= 0.5) } + => message { "Mu |$_->{mu}| and lambda |$_->{lambda}| must be defined and lambda must be between 0.5 and 0.8" }; + +subtype 'hmmForwardStats' + => as HashRef + => where { defined ($_->{tau}) and defined ($_->{lambda}) and ($_->{lambda} <= 0.8) and ($_->{lambda} >= 0.5) } + => message { "Tau |$_->{tau}| and lambda |$_->{lambda}| must be defined and lambda must be between 0.5 and 0.8" }; + +has 'version' => ( + isa => 'hmmVersion', + is => 'rw', + required => 1 +); + +has 'name' => ( + isa => 'hmmName', + is => 'rw', + required => 1 +); + +has 'accession' => ( + isa => 'hmmAcc', + is => 'rw' +); + +has 'description' => ( + isa => 'Str', + is => 'rw' +); + +has 'length' => ( + isa => 'Int', + is => 'rw', + required => 1 +); + +has 'alpha' => ( + isa => 'hmmAlpha', + is => 'rw', + required => 1, +); + +has 'rf' => ( + isa => 'Bool', + is => 'rw', + required => 1 +); + +has 'mm' => ( + isa => 'Bool', + is => 'rw', +); + +has 'cons' => ( + isa => 'Bool', + is => 'rw', +); + +has 'cs' => ( + isa => 'Bool', + is => 'rw', + required => 1 +); + +has 'map' => ( + isa => 'Bool', + is => 'rw', + required => 1 +); + +has 'date' => ( + isa => 'Str', + is => 'rw', + required => 1 +); + +has 'buildLine' => ( + isa => 'HashRef[Str]', + is => 'rw', + required => 1, + default => sub { {} }, +); + +has 'searchMethod' => ( + isa => 'Str', + is => 'rw', +); + +has 'nSeq' => ( + isa => 'Int', + is => 'rw', + required => 1 +); + +has 'msvStats' => ( + isa => 'hmmMsvStats', + is => 'rw', + required => 1 +); + +has 'viterbiStats' => ( + isa => 'hmmViterbiStats', + is => 'rw', + required => 1 +); + +has 'forwardStats' => ( + isa => 'hmmForwardStats', + is => 'rw', + required => 1 +); + + +has 'effn' => ( + isa => 'Num', + is => 'rw', + required => 1 +); + +has 'cksum' => ( + isa => 'Int', + is => 'rw', + required => 1 +); + +has 'seqGA' => ( + isa => 'Num', + is => 'rw', +); + +has 'domGA' => ( + isa => 'Num', + is => 'rw', +); + +has 'seqTC' => ( + isa => 'Num', + is => 'rw', +); + +has 'domTC' => ( + isa => 'Num', + is => 'rw', +); + +has 'seqNC' => ( + isa => 'Num', + is => 'rw', +); + +has 'domNC' => ( + isa => 'Num', + is => 'rw', +); + +has 'emissionLines' => ( + isa => 'ArrayRef[ArrayRef]', + is => 'rw', + default => sub { [] }, +); + +has 'mapPos'=> ( + isa => 'ArrayRef[Int]', + is => 'rw', + default => sub{ [] } +); + +has 'compLines' => ( + isa => 'ArrayRef[Str]', + is => 'rw', + default => sub { [] }, +); + +__PACKAGE__->meta->make_immutable; +1; + diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/HMM/HMMIO.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/HMM/HMMIO.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,329 @@ +# HMM.pm +# +# Author: finnr +# Maintainer: $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $ +# Version: $Revision: 1.3 $ +# Created: Nov 24, 2008 +# Last Modified: $Date: 2010-01-12 17:00:26 $ +=head1 NAME + +Template - a short description of the class + +=cut + +package Bio::Pfam::HMM::HMMIO; + +=head1 DESCRIPTION + +A more detailed description of what this class does and how it does it. + +$Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $ + +=head1 COPYRIGHT + +File: HMM.pm + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) + + This is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + or see the on-line version at http://www.gnu.org/copyleft/gpl.txt + +=cut + +use strict; +use warnings; + +use Moose; +use Moose::Util::TypeConstraints; +use Carp; +use Bio::Pfam::HMM::HMM; + +#------------------------------------------------------------------------------- + +=head1 METHODS + +=cut +sub readHMM { + my ($this, $hmm) = @_; + + unless($hmm){ + confess("No HMM passed in!"); + } + chomp($hmm); + + my @input; + if(ref($hmm) eq 'GLOB'){ + @input = <$hmm>; + }elsif($hmm !~ /\n/ and -e $hmm and -s $hmm){ + #Assume that we have a filename and try and open it; + open(HMM, $hmm) || confess("Could not open $hmm:[$!]"); + @input = ; + }else{ + @input = split(/\n/, $hmm); + } + + + + #Parse the header section! + #HMMER3/f [3.1b1 | May 2013] + #NAME SEED + #ACC PF000001.1 + #DESC A description + #LENG 55 + #ALPH amino + #RF no + #MM no + #CONS yes + #CS no + #MAP yes + #DATE Fri Nov 21 09:58:16 2008 + #COM [1] /Users/finnr/Work/Software/hmmer-3.0.20081101/bin/hmmbuild -o hmmbuild.log HMM SEED + #NSEQ 279 + #EFFN 4.966292 + #STATS LOCAL MSV -11.4716 0.69948 + #STATS LOCAL VITERBI -12.3713 0.69948 + #STATS LOCAL FORWARD -5.5807 0.69948 + + #To add GA, TC, NC, CKSUM, DESC + my($objHash); + my $i =0; + foreach ( @input ){ + if(my ($version) = $_ =~ /(HMMER3.*)/){ + $objHash->{version} = $version; + }elsif(my ($acc) = $_ =~ /^ACC\s+(PF\d+\.\d+)$/){ + $objHash->{accession} = $acc; + }elsif(/NAME\s+(\S+)/){ + $objHash->{name} = $1 ; + }elsif(/DESC\s+(.*)/){ + $objHash->{description} = $1 ; + }elsif(my ($length) = $_ =~ /^LENG\s+(\d+)/){ + $objHash->{length} = $length; + }elsif( my ($alpha) = $_ =~ /^ALPH\s+(\S+)/){ + $objHash->{alpha} = $alpha; + }elsif( my ($rf) = $_ =~ /^RF\s+(no|yes)/){ + $objHash->{rf} = ($rf eq "no") ? 0 : 1; + }elsif( my ($mm) = $_ =~ /^MM\s+(no|yes)/){ + $objHash->{mm} = ($mm eq "no") ? 0 : 1; + }elsif( my ($cons) = $_ =~ /^CONS\s+(no|yes)/){ + $objHash->{cons} = ($cons eq "no") ? 0 : 1; + }elsif(my ($cs) = $_ =~ /^CS\s+(no|yes)/ ){ + $objHash->{cs} = ($cs eq "no") ? 0 : 1; + }elsif(my ($map) = $_ =~ /^MAP\s+(no|yes)/){ + $objHash->{map} = ($map eq "no") ? 0 : 1; + }elsif(my ($date) = $_ =~ /^DATE\s+(.*)/){ + $objHash->{date} = $date; + }elsif(my ($sm) = $_ =~ /^SM\s+(.*)/){ + $objHash->{searchMethod} = $sm; + + }elsif(my ($options, $hmmName, $alignName) = $input[$i] =~ /^BM.*hmmbuild(.*)? (\S+) (\S+)$/){ + $objHash->{buildLine} = { cmd => 'hmmbuild', + options => $options, + name => $hmmName, + align => $alignName } ; + }elsif(my($noSeqs) = $_ =~ /^NSEQ\s+(\d+)/){ + $objHash->{nSeq} = $noSeqs; + }elsif( my($effn) = $_ =~ /^EFFN\s+(\d+\.\d+)/){ + #EFFN 4.966292 + $objHash->{effn} = $effn ; + }elsif( my ( $cksum ) = $_ =~ /^CKSUM (\d+)/){ + $objHash->{cksum} = $cksum ; + }elsif(/GA\s+(\S+)\s+(\S+)\;/){ + $objHash->{seqGA} = $1; + $objHash->{domGA} = $2; + }elsif(/TC\s+(\S+)\s+(\S+)\;/){ + $objHash->{seqTC} = $1; + $objHash->{domTC} = $2; + }elsif(/NC\s+(\S+)\s+(\S+)\;/){ + $objHash->{seqNC} = $1; + $objHash->{domNC} = $2; + }elsif( my ($msv_mu, $msv_lambda ) = $_ =~ /^STATS LOCAL MSV\s+(\S+)\s+(0\.\d+)/){ + $objHash->{msvStats} = { mu => $msv_mu, lambda => $msv_lambda}; + }elsif( my ($viterbi_mu, $viterbi_lambda ) = $_ =~ /^STATS LOCAL VITERBI\s+(\S+)\s+(0\.\d+)/){ + $objHash->{viterbiStats} = { mu => $viterbi_mu, lambda => $viterbi_lambda }; + }elsif( my ($forward_tau, $forward_lambda ) = $_ =~ /^STATS LOCAL FORWARD\s+(\S+)\s+(0\.\d+)/){ + $objHash->{forwardStats} = {tau => $forward_tau, lambda => $forward_lambda}; + }elsif( $_ =~ /^HMM\s+A/){ + last; + }else{ + confess("Got a bad HMM header line:$input[$i]\n"); + } + $i++; + } + + my $hmmObj = Bio::Pfam::HMM::HMM->new($objHash); + + + #The next two lines are stand lines + #HMM A C D E F G H I K L M N P Q R S T V W Y + # m->m m->i m->d i->m i->i d->m d->d + $i++; + + + #Add the comp line + for( my $line = 0; $line <=2; $line++){ + $i++; + my @l = split(/\s+/, $input[$i]); + my @c; + if($line == 0 ){ + @c = @l[2..21]; + }elsif( $line == 1){ + @c = @l[1..20]; + }elsif($line == 2){ + @c = @l[1..7]; + } + $hmmObj->compLines->[$line] = \@c; + } + + + + for(my $pos = 0; $pos < $hmmObj->length; $pos++){ + #There are three lines per position - match emission line, insert emission line, state transition line + for( my $line = 0; $line <=2; $line++){ + $i++; + my @l = split(/\s+/, $input[$i]); + my @e; + if($line == 0 ){ + @e = @l[2..21]; + if($hmmObj->map){ + $hmmObj->mapPos->[$pos] = $l[22]; + } + }elsif( $line == 1){ + @e = @l[1..20]; + }elsif($line == 2){ + @e = @l[1..7]; + } + $hmmObj->emissionLines->[$pos]->[$line] = \@e; + } + } + + if($input[$i++] =~ /^\/\/$/){ + confess("Expected file terminator //, but got $input[$i]\n"); + } + + #No veryifiy that we have COMP line and the the number of emissionlines is equivalent to length + unless(scalar( @{ $hmmObj->emissionLines } ) == $hmmObj->length){ + confess("Number of emssionLines does not match the length of the model, got ".scalar( @{ $hmmObj->emissionLines} ). + " expected ".$hmmObj->length); + } + + unless($hmmObj->compLines){ + confess("No compLine set on HMM"); + } + + if($hmmObj->map){ + unless(scalar(@{$hmmObj->mapPos}) == $hmmObj->length ){ + confess("HMM object had map set, but the number of map positions does not match the length of the HMM"); + }; + } + return $hmmObj; +} + + + +sub writeHMM { + my ($this, $hmm, $hmmObj) = @_; + + unless($hmm){ + confess("No HMM out file passed in!"); + } + + unless(ref($hmm) eq 'GLOB'){ + my $hmmFile = $hmm; + $hmm = undef; + #Assume that we have a filename and try and open it; + open($hmm, ">$hmmFile") || confess("Could not open $hmmFile:[$!]"); + } + + print $hmm $hmmObj->version."\n"; + printf $hmm ("%-5s %s\n", "NAME", $hmmObj->name); + printf $hmm ("%-5s %s\n", "ACC", $hmmObj->accession) if($hmmObj->accession); + printf $hmm ("%-5s %s\n", "DESC", $hmmObj->description) if($hmmObj->description); + printf $hmm ("%-5s %d\n", "LENG", $hmmObj->length); + printf $hmm ("%-5s %s\n", "ALPH", $hmmObj->alpha); + printf $hmm ("%-5s %s\n", "RF", ($hmmObj->rf ? "yes" : "no")); + printf $hmm ("%-5s %s\n", "MM", ($hmmObj->mm ? "yes" : "no")); + printf $hmm ("%-5s %s\n", "CONS", ($hmmObj->cons ? "yes" : "no")); + printf $hmm ("%-5s %s\n", "CS", ($hmmObj->cs ? "yes" : "no")); + printf $hmm ("%-5s %s\n", "MAP", ($hmmObj->map ? "yes" : "no")); + printf $hmm ("%-5s %s\n", "DATE", $hmmObj->date); + printf $hmm ("%-5s %d\n", "NSEQ", $hmmObj->nSeq); + printf $hmm ("%-5s %f\n", "EFFN", $hmmObj->effn); + printf $hmm ("%-5s %d\n", "CKSUM", $hmmObj->cksum); + printf $hmm ("%-5s %.2f %.2f;\n", "GA", $hmmObj->seqGA, $hmmObj->domGA) if(defined($hmmObj->seqGA)); + printf $hmm ("%-5s %.2f %.2f;\n", "TC", $hmmObj->seqTC, $hmmObj->domTC) if(defined($hmmObj->seqTC)); + printf $hmm ("%-5s %.2f %.2f;\n", "NC", $hmmObj->seqNC, $hmmObj->domNC) if(defined($hmmObj->seqNC)); + + printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "MSV", $hmmObj->msvStats->{mu}, $hmmObj->msvStats->{lambda}); + printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "VITERBI", $hmmObj->viterbiStats->{mu}, $hmmObj->viterbiStats->{lambda}); + printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "FORWARD", $hmmObj->forwardStats->{tau}, $hmmObj->forwardStats->{lambda}); + + print $hmm <m m->i m->d i->m i->i d->m d->d +EOF + + printf $hmm ("%7s ", "COMPO"); + foreach my $s (@{$hmmObj->compLines->[0]}){ + printf $hmm (" %7s", $s); + } + print $hmm "\n"; + + print $hmm (" " x 8); + foreach my $s (@{$hmmObj->compLines->[1]}){ + printf $hmm (" %7s", $s); + } + print $hmm "\n"; + + print $hmm (" " x 8); + foreach my $s (@{$hmmObj->compLines->[2]}){ + printf $hmm (" %7s", $s); + } + print $hmm "\n"; + + + my $pos = 1; + foreach my $el (@{ $hmmObj->emissionLines }){ + printf $hmm ("%7s ", $pos); + foreach my $s (@{$el->[0]}){ + printf $hmm (" %7s", $s); + } + if($hmmObj->map){ + printf $hmm ("%7s - -\n", $hmmObj->mapPos->[$pos - 1]); + }else{ + print $hmm "\n"; + } + print $hmm (" " x 8); + foreach my $s (@{$el->[1]}){ + printf $hmm (" %7s", $s); + } + print $hmm "\n"; + print $hmm (" " x 8); + foreach my $s (@{$el->[2]}){ + printf $hmm (" %7s", $s); + } + print $hmm "\n"; + $pos++; + } + + + print $hmm "//\n"; +} + + + +1; + diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/HMM/HMMMatch.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/HMM/HMMMatch.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,62 @@ + +package Bio::Pfam::HMM::HMMMatch; + +use strict; +use warnings; + +use Moose; +use Moose::Util::TypeConstraints; + + +subtype 'evalue' + => as Str + => where { $_ =~ m/^(\d+(\.\d+){0,1}e[+|-]\d+|\d+\.\d+|\d+)$/ } + => message { "$_ does not look like an evalue" }; + +has 'evalue' => ( + isa => 'evalue', + is => 'rw', + required => 1 +); + +has 'bits' => ( + isa => 'Str', + is => 'rw', + required => 1 +); + +has 'name' => ( + isa => 'Str', + is => 'rw', + required => 1 +); + +has bias => ( + isa => 'Num', + is => 'rw' +); + + __PACKAGE__->meta->make_immutable; + +=head1 COPYRIGHT + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) + +This is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . + +=cut + +1; diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/HMM/HMMResults.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/HMM/HMMResults.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,582 @@ +# Bio::Pfam::HMM::HMMResults.pm +# +# Author: finnr +# Maintainer: $Id: HMMResults.pm,v 1.3 2009-12-15 14:38:08 jt6 Exp $ +# Version: $Revision: 1.3 $ +# Created: Nov 19, 2008 +# Last Modified: $Date: 2009-12-15 14:38:08 $ + +=head1 NAME + +Bio::Pfam::HMM::HMMResults - A object to represents the results from hmmsearch + +=cut + +package Bio::Pfam::HMM::HMMResults; + +=head1 DESCRIPTION + +A more detailed description of what this class does and how it does it. + +$Id: HMMResults.pm,v 1.3 2009-12-15 14:38:08 jt6 Exp $ + +=head1 COPYRIGHT + +File: Bio::Pfam::HMM::HMMResults.pm + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk) + + This is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + or see the on-line version at http://www.gnu.org/copyleft/gpl.txt + +=cut + +use strict; +use warnings; + +use Moose; +use Moose::Util::TypeConstraints; +use Bio::Pfam::HMM::HMMSequence; +use Bio::Pfam::HMM::HMMUnit; + +# +#------------------------------------------------------------------------------- +# Attributes + +has 'hmmerVersion' => ( + isa => 'Str', + is => 'rw', +); + +has 'hmmName' => ( + isa => 'Str', + is => 'rw' +); + +has 'seqDB' => ( + isa => 'Str', + is => 'rw' +); + +has hmmLength => ( + isa => 'Int', + is => 'rw' +); + +has 'thisFile' => ( + isa => 'Str', + is => 'rw' +); + +has seedName => ( + isa => 'Str', + is => 'rw' +); + +has 'seqs' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + +has 'units' => ( + isa => 'ArrayRef', + is => 'rw', + default => sub { [] }, +); + +has 'domThr' => ( + isa => 'Num', + is => 'rw', + default => '25.0' +); + +has 'seqThr' => ( + isa => 'Num', + is => 'rw', + default => '25.0' +); + +has 'evalueThr' => ( + isa => 'Num', + is => 'rw' +); + +has 'domTC' => ( + isa => 'Num', + is => 'rw' +); + +has 'seqTC' => ( + isa => 'Num', + is => 'rw' +); + +has 'domNC' => ( + isa => 'Num', + is => 'rw' +); + +has 'seqNC' => ( + isa => 'Num', + is => 'rw' +); + +has 'randSeedNum' => ( + isa => 'Int', + is => 'rw' +); + +has 'description' => ( + isa => 'Str', + is => 'rw' +); + +has 'seqName' => ( + isa => 'Str', + is => 'rw' +); + +has 'seqLength' => ( + isa => 'Int', + is => 'rw' +); + + +has 'eof' => ( + isa => 'Int', + is => 'rw', + default => 0 +); + +has 'program' => ( + isa => 'Str', + is => 'rw' +); + +=head1 METHODS + +=head2 addHMMSeq + + Title : addHMMSeq + Usage : $hmmRes->addHMMSeq( $hmmSeqObj ) + Function : Adds a Bio::Pfam::HMM::HMMSequence object to the results object + Args : A Bio::Pfam::HMM::HMMSequence object + Returns : nothing + +=cut + +sub addHMMSeq { + my( $self, $hmmSeq ) = @_; + + unless($hmmSeq->isa('Bio::Pfam::HMM::HMMSequence')){ + die 'Trying to add a non Bio::Pfam::HMM::HMMSequence object'; + } + + if($self->seqs){ + if($self->seqs->{$hmmSeq->name}){ + die "Trying to add the same sequence twice"; + } + } + + $self->seqs->{$hmmSeq->name} = $hmmSeq; +} + + +=head2 eachHMMSeq + + Title : eachHMMSeq + Usage : my @seqs = $hmmRes->eachHMMSeq + Function : Returns an array reference containing the references to all of the + : Bio::Pfam::HMM::HMMSequence objects stored in the HMMResults object. + Args : None + Returns : Array reference + +=cut + +sub eachHMMSeq { + my ($self ) = @_; + my @seqs; + my $seqRefs = $self->seqs; + foreach my $n (keys %{ $seqRefs }){ + push(@seqs, $seqRefs->{$n}); + } + return(\@seqs); +} + +#------------------------------------------------------------------------------- + +=head2 addHMMUnit + + Title : addHMMUnit + Usage : $hmmRes + Function : Adds HMM units (the actual region hit) to the HMMSequence in the object + : and for convenience to the the results sets. All we store are duplicates + : of the references. + Args : A Bio::Pfam::HMM:HMMUnit + Returns : Nothing + +=cut + +sub addHMMUnit { + my ($self, $hmmUnit) = @_; + + unless($hmmUnit->isa('Bio::Pfam::HMM::HMMUnit')){ + die "Trying to add an non-Bio::Pfam::HMM::HMMUnit\n"; + } + + if($self->seqs){ + if($self->seqs->{$hmmUnit->name}){ + $self->seqs->{$hmmUnit->name}->addHMMUnit($hmmUnit); + }else{ + warn "Could not add hmmUnit as the sequence has not been added\n"; + } + } + + #More conveinence we store the point to the hmmunit in an array + push(@{$self->units},$hmmUnit); +} + + +#------------------------------------------------------------------------------- + +=head2 domainBitsCutoffFromEvalue + + Title : domainBitsCutoffFromEvalue + Usage : $hmmRes->domainBitsCutoffFromEvalue(0.01) + Function : From the supplied evalue, it scans through all of the evalues in the results + : and calulates the bits score. + Args : An evalue. + Returns : A bits score. If no evalue is specified, returns nothing + +=cut + +sub domainBitsCutoffFromEvalue { + my ($self, $eval) = @_; + my ($dom,$prev,@doms,$cutoff,$sep,$seen); + + unless(defined ($eval) ){ + warn "No evalue specified\n"; + return; + } + + + $seen = 0; + foreach $_ ( sort { $b->bits <=> $a->bits } @{$self->units}, @{$self->eachHMMSeq} ) { + if( $_->evalue > $eval ) { + $seen = 1; + $dom = $_; + last; + } + $prev = $_; + } + + if( ! defined $prev || $seen == 0) { + carp("Evalue is either above or below the list..."); + return undef; + } + + $sep = $prev->bits - $dom->bits ; + + if( $sep < 1 ) { + return $prev->bits(); + } + if( $dom->bits < 25 && $prev->bits > 25 ) { + return 25; + } + + return $dom->bits + sprintf("%.1f",$sep/2); +} + + +#------------------------------------------------------------------------------- + +=head2 lowestTrue + + Title : + Usage : + Function : + Args : + Returns : + +=cut + +sub lowestTrue { + my $self = shift; + + unless($self->domTC && $self->seqTC) { + unless($self->domThr and $self->seqThr){ + die "Could not define TC as I am missing a threshold\n"; + } + #Set it wildly high! + my ($lowSeq, $lowDom); + $lowSeq = $lowDom = 999999.99; + + foreach my $seqId (keys %{$self->seqs} ){ + if($self->seqs->{$seqId}->bits >= $self->seqThr){ + #Is this the lowest sequence thresh + if($self->seqs->{$seqId}->bits < $lowSeq){ + $lowSeq = $self->seqs->{$seqId}->bits; + } + #For each of the regions found on the sequence, look to see if the match is great + #than the domain threshold. If it is, is it lower than we we have seen previously + foreach my $unit (@{ $self->seqs->{$seqId}->hmmUnits } ){ + if( $unit->bits() >= $self->domThr && $unit->bits() < $lowDom ) { + $lowDom = $unit->bits; + } + } + } + + } + $self->domTC($lowDom); + $self->seqTC($lowSeq); + } + return($self->seqTC, $self->domTC); +} + +#------------------------------------------------------------------------------- + +=head2 highestNoise + + Title : + Usage : + Function : + Args : + Returns : + +=cut + +sub highestNoise { + my $self = shift; + + #See if it is already set + unless($self->domNC && $self->seqNC) { + unless($self->domThr and $self->seqThr){ + die "Could not define TC as I am missing a threshold\n"; + } + + #Set it wildly low + my ($highSeq, $highDom); + $highSeq = $highDom = -999999.99; + + foreach my $seqId (keys %{$self->seqs} ){ + + if($self->seqs->{$seqId}->bits < $self->seqThr){ + #Is this the highest sequence thres below the cut-off + if($self->seqs->{$seqId}->bits > $highSeq){ + $highSeq = $self->seqs->{$seqId}->bits; + } + } + + #For each of the regions found on the sequence, look to see if the match is great + #than the domain threshold. If it is, is it lower than we we have seen previously + foreach my $unit (@{ $self->seqs->{$seqId}->hmmUnits } ){ + if( $unit->bits < $self->domThr && $unit->bits > $highDom ) { + $highDom = $unit->bits; + } + } + } + $self->domNC($highDom); + $self->seqNC($highSeq); + } + + return($self->seqNC, $self->domNC); +} + + +sub applyEdits { + my ($self, $edits, $removeBadEd) = @_; + + my @validEd; #If removeBadEd flag is on, collect all the valid ED lines in this array and return at end of sub + foreach my $e (@$edits){ + #{ seq => $1, oldFrom => $2, oldTo => $3, newFrom => $5, newTo => $6 } + if($self->seqs->{$e->{seq}}){ + my $matched = 0; + foreach my $u (@{ $self->seqs->{ $e->{seq} }->hmmUnits }){ + if($u->envFrom == $e->{oldFrom} and $u->envTo == $e->{oldTo}) { + $matched = 1; #HMM unit found + + if(defined $e->{newFrom} and $e->{newTo}){ + + #Check co-ordinates of new start and end positions are in range + if( $e->{newFrom} < $u->{envFrom} or $e->{newTo} > $u->{envTo} or $e->{newFrom} > $e->{newTo}) { + if($removeBadEd) { + print "Removing ED line due to out of range co-ordinates: " . $e->{seq}."/".$e->{newFrom}."-".$e->{newTo}. "\n"; + } + else { + warn $e->{seq}."/".$e->{newFrom}."-".$e->{newTo}." contains out of range co-ordinates - bad ED line\n"; + } + last; + } + + #Modify the start end positions + $u->envFrom($e->{newFrom}); + $u->envTo($e->{newTo}); + + #Check that the ali-positions are still okay + if($u->seqFrom < $e->{newFrom}){ + $u->seqFrom($e->{newFrom}); + } + if($u->seqTo > $e->{newTo}){ + $u->seqTo($e->{newTo}); + } + }else{ + #Set the score so low it will never get in the align + $u->bits(-999999.99); + } + + push(@validEd, $e) if($removeBadEd); + last; + } + } + unless($matched){ #HMM unit not found - bad ED + if($removeBadEd) { + print "Removing ED line for invalid hmm unit: " . $e->{seq}."/".$e->{oldFrom}."-".$e->{oldTo}. "\n"; + } + else { + warn $e->{seq}."/".$e->{oldFrom}."-".$e->{oldTo}." does not appear in the list of hmm units - bad ED line\n"; + } + } + }else{ #Sequence not found - bad ED + if($removeBadEd) { + print "Removing ED line for invalid hmm unit: " . $e->{seq}."/".$e->{oldFrom}."-".$e->{oldTo}. "\n"; + } + else { + warn $e->{seq}." does not appear in the list of hmm units - bad ED line\n"; + } + } + } + return(\@validEd) if($removeBadEd); + +} + +sub remove_overlaps_by_clan { + + my ($self, $clanmap, $nested) = @_; + + my $new = Bio::Pfam::HMM::HMMResults->new; + $new->seqName($self->seqName); + + foreach my $unit ( sort { $a->evalue <=> $b->evalue } @{ $self->units } ) { + + #check if it overlaps before adding + my $o; + + foreach my $u ( @{ $new->units } ) { + + if( exists($clanmap->{$unit->name}) and exists($clanmap->{$u->name}) and ($clanmap->{$unit->name} eq $clanmap->{$u->name}) ) { + if( overlap( $unit, $u ) ) { + if(exists($$nested{$unit->name}{$u->name})) { + next; + } + else { + $o=1; + last; + } + } + + } + } + unless($o) { + if(! $new->seqs->{$unit->name}) { + + $new->addHMMSeq( Bio::Pfam::HMM::HMMSequence->new( { name => $self->seqs->{$unit->name}->name, + desc => $self->seqs->{$unit->name}->desc, + bits => $self->seqs->{$unit->name}->bits, + evalue => $self->seqs->{$unit->name}->evalue, + numberHits => $self->seqs->{$unit->name}->numberHits}) ); + + } + $new->addHMMUnit($unit); + } + + } + return $new; +} + + + +sub overlap { + # does unit1 overlap with unit2? + my $unit1 = shift; + my $unit2 = shift; + my( $u1, $u2 ) = sort { $a->seqFrom <=> $b->seqFrom } ( $unit1, $unit2 ); + + + if( $u2->seqFrom <= $u1->seqTo ) { + return 1; + } + + return 0; +} + + +sub results { + my ( $self, $pfamScanData, $e_value ) = @_; + + my @results = (); + foreach my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $self->units } ) { + + my $pfamB = $unit->name =~ /^Pfam-B/; + + #Filter results based on thresholds + if ( $unit->name =~ /^Pfam-B/ ) { + next unless ( $self->seqs->{$unit->name}->evalue <= 0.001 and $unit->evalue <= 0.001 ); + $pfamB = 1; + } + else { + if ( $e_value ) { + next unless ( $self->seqs->{$unit->name}->evalue <= $e_value and $unit->evalue <= $e_value ) ; + } + else { + next unless $unit->sig; + } + } + + push @results, { + seq => { from => $unit->seqFrom, + to => $unit->seqTo, + name => $self->seqName }, + env => { from => $unit->envFrom, + to => $unit->envTo }, + + hmm => { from => $unit->hmmFrom, + to => $unit->hmmTo }, + + model_length => $pfamScanData->{_model_len}->{ $unit->name }, + bits => $unit->bits, + evalue => $unit->evalue, + acc => $pfamScanData->{_accmap}->{ $unit->name }, + name => $unit->name, + desc => $pfamScanData->{_desc}->{ $unit->name }, + type => $pfamB ? undef : $pfamScanData->{_type}->{ $unit->name }, + clan => $pfamB ? undef : + $pfamScanData->{_clanmap}->{ $unit->name } || 'No_clan', + + act_site => $pfamB ? undef : $unit->{act_site}, + sig => $pfamB ? "NA" : $unit->sig, + align => [ sprintf( '#HMM %s', $unit->hmmalign->{hmm} ), + sprintf( '#MATCH %s', $unit->hmmalign->{match} ), + sprintf( '#PP %s', $unit->hmmalign->{pp} ), + sprintf( '#SEQ %s', $unit->hmmalign->{seq} ) ] + }; + } + + return \@results; +} + +1; diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/HMM/HMMResultsIO.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/HMM/HMMResultsIO.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,1186 @@ +# HMMResultsIO.pm +# +# Author: rdf +# Maintainer: $Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $ +# Version: $Revision: 1.2 $ +# Created: Nov 16, 2008 +# Last Modified: $Date: 2009-12-01 15:42:20 $ + +=head1 NAME + +Template - a short description of the class + +=cut + +package Bio::Pfam::HMM::HMMResultsIO; + +=head1 DESCRIPTION + +A more detailed description of what this class does and how it does it. + +$Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $ + +=head1 COPYRIGHT + +File: HMMResultsIO.pm + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) + + This is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + or see the on-line version at http://www.gnu.org/copyleft/gpl.txt + +=cut + +use strict; +use warnings; +use Moose; +use Carp; + +#All the things we need to objectfy the search results +use Bio::Pfam::HMM::HMMResults; +use Bio::Pfam::HMM::HMMSequence; +use Bio::Pfam::HMM::HMMUnit; + +#------------------------------------------------------------------------------- + +=head1 ATTRIBUTES + + + +=cut + +has 'align' => ( + isa => 'Int', + is => 'rw', + default => 0 +); + +has 'outfile' => ( + isa => 'Str', + is => 'rw', + default => 'OUTPUT' +); + +has 'pfamout' => ( + isa => 'Str', + is => 'rw', + default => 'PFAMOUT' +); + +has 'scores' => ( + isa => 'Str', + is => 'rw', + default => 'scores' +); + +#------------------------------------------------------------------------------- + +=head1 METHODS + +=head2 parseHMMER3 + + Title : parseHMMER + Usage : $hmmResIO->parseHMMSearch( filename ) + Function : Parse the output from a HMMER3 search results + Args : Filename containing the search + Returns : A Bio::Pfam::HMM::HMMResults object + +=cut + +sub parseHMMER3 { + my ( $self, $filename ) = @_; + my $fh; + + if(ref($filename) eq 'GLOB'){ + $fh = $filename; + }else{ + open( $fh, $filename ) or confess "Could not open $filename:[$!]\n"; + } + +# open( $fh, $filename ) or confess "Could not open $filename:[$!]\n"; + my $hmmRes = Bio::Pfam::HMM::HMMResults->new; + $self->_readHeader( $fh, $hmmRes ); + $self->_readSeqHits( $fh, $hmmRes ); + $self->_readUnitHits( $fh, $hmmRes ); + $self->_readFooter($fh, $hmmRes); + return ($hmmRes); +} + + + +sub parseMultiHMMER3 { + my ( $self, $filename ) = @_; + my $fh; + + if(ref($filename) eq 'GLOB'){ + $fh = $filename; + }elsif( ref($filename) and $filename->isa('IO::File') ) { + $fh = $filename; + }else{ + open( $fh, $filename ) or confess "Could not open $filename:[$!]\n"; + } + + my @hmmResAll; + my $program; + while(!eof($fh)){ + my $hmmRes = Bio::Pfam::HMM::HMMResults->new; + my $eof = $self->_readHeader( $fh, $hmmRes ); + last if($eof); + push(@hmmResAll, $hmmRes); + if($hmmRes->program) { + $program = $hmmRes->program; + } + else { + $hmmRes->program($program); + } + $self->_readSeqHits( $fh, $hmmRes ); + $self->_readUnitHits( $fh, $hmmRes ); + $self->_readFooter($fh, $hmmRes); + } + return (\@hmmResAll); +} + +sub parseSplitHMMER3 { + my($self, $files ) = @_; + + my $hmmRes = Bio::Pfam::HMM::HMMResults->new; + + foreach my $filename (@{$files}){ + my ($fh); + open( $fh, $filename ) or confess "Could not open $filename:[$!]\n"; + $self->_readHeader( $fh, $hmmRes ); + $self->_readSeqHits( $fh, $hmmRes ); + $self->_readUnitHits( $fh, $hmmRes ); + $self->_readFooter($fh, $hmmRes); + } + + return ( $hmmRes ); + +} + + +#------------------------------------------------------------------------------- + +=head2 convertHMMSearch + + Title : convertHMMSearch + Usage : $hmmResIO->convertHMMSearch('SEARCHFILE') + Function : This wraps up a couple of methods to convert the more complex hmmsearch + : results in to nice clean format that we Pfam-ers are used to. + Args : The filename of the hmmsearch output file + Returns : Nothing + +=cut + +sub convertHMMSearch { + my ( $self, $filename ) = @_; + + unless ($filename) { + confess "No filename passed in to convertHMMSearch\n"; + } + unless ( -s $filename ) { + confess "$filename does not exists\n"; + } + + #Now parse in the raw HMM output and write out the results as a PFAMOUT + my $hmmRes = $self->parseHMMER3($filename); + $self->writePFAMOUT($hmmRes); + return $hmmRes; +} + +#------------------------------------------------------------------------------- + +=head2 writePFAMOUT + + Title : writePFAMOUT + Usage : $hmmResIO->writePFAMOUT( $hmmRes ) + Function : Writes a Bio::Pfam::HMM:HMMResults object in to a PFAMOUT file. + Args : A Bio::Pfam::HMM:HMMResults + Returns : Nothing + +=cut + +sub writePFAMOUT { + my ( $self, $hmmRes ) = @_; + + unless ($hmmRes) { + confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n"; + } + unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) { + confess("Variable passed in is not a Bio::Pfam::HMM::Results object"); + } + + my $fh; + open( $fh, ">" . $self->pfamout ) + or confess "Could not open " . $self->pfamout . ":[$!]\n"; + + print $fh <bits <=> $a->bits } ( @{ $hmmRes->eachHMMSeq } ) ) + { + $_ = $seq->desc; + my ($desc) = /^(.{1,42})/; + $desc = uc($desc); + printf $fh ( + "%-15s %-42s %8.1f %9s %3d %5.1f %5.1f\n", + $seq->name, + $desc, + $seq->bits, + $seq->evalue, + scalar( @{ $seq->hmmUnits } ), + defined( $seq->exp ) ? $seq->exp : "-", + defined( $seq->bias ) ? $seq->bias : "-" + ); + } + + print $fh <bits <=> $a->bits } @{ $hmmRes->units } ) { + + + printf $fh ( + "%-15s %6d %6d %6d %6d %6s %6s %6.1f %9s %6d %6.1f\n", + $dom->name, + $dom->envFrom, + $dom->envTo, + $dom->seqFrom, + $dom->seqTo, + $dom->hmmFrom, + $dom->hmmTo, + $dom->bits, + $dom->evalue, + $dom->domain, + defined( $dom->bias ) ? $dom->bias : "-", + + ); + } +} + +#------------------------------------------------------------------------------- + +=head2 parsePFAMOUT + + Title : parsePFAMOUT + Usage : $self->parsePFAMOUT($filename) + Function : Reads in a PFAMOUT file. This file contains the minimal amount of information + : require to constrcut a pfam ALIGN file. + Args : A filename. Normally this is filename + Returns : A Bio::Pfam::HMM::HMMResults object + +=cut + +sub parsePFAMOUT { + my $self = shift; + my $filename = shift; + + unless ($filename) { + confess('No filename or filehandle passed to parsePFAMOUT'); + } + + my $fh; + if ( ref($filename) eq 'GLOB' ) { + $fh = $filename; + } + else { + open( $fh, $filename ) or confess "Could not open $filename:[$!]\n"; + } + my $hmmRes = Bio::Pfam::HMM::HMMResults->new; + + while (<$fh>) { + /^# Domain scores/ && last; + + #if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/) { + if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s+\S+\s+(\S+)\s*$/) { + + $hmmRes->addHMMSeq( + Bio::Pfam::HMM::HMMSequence->new( + { + name => $1, + desc => $2, + bits => $3, + evalue => $4, + numberHits => $5, + bias => $6 + } + ) + ); + } + elsif (/^#|^\s+$/) { + next; + } + else { + warn "Did not parse|$_|\n"; + } + } + while (<$fh>) { + + #if (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s*$/) { + if ( + /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)/) + { + $hmmRes->addHMMUnit( + Bio::Pfam::HMM::HMMUnit->new( + { + name => $1, + envFrom => $2, + envTo => $3, + seqFrom => $4, + seqTo => $5, + hmmFrom => $6, + hmmTo => $7, + bits => $8, + evalue => $9, + bias => $10 + } + ) + ); + } + elsif (/^#|^\s+$/) { + next; + } + elsif (/^$/) { + next; + } + else { + warn "Did not parse: |$_|"; + } + } + close($fh); + return ($hmmRes); +} + +#------------------------------------------------------------------------------- + +=head2 _readHeader + + Title : _readHeader + Usage : Private method. $self->_readHeader(\*FH, $hmmResults) + Function : Reads the header section from a HMMER3 hmmsearch + Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object + Returns : Nothing + +=cut + +#Parse the header part of the output first; +sub _readHeader { + my ( $self, $hs, $hmmRes ) = @_; + + #Check the $hs is defined and a GLOB + + while (<$hs>) { + if (/^Scores for complete/) { + last; + } + elsif (/^# query HMM file:\s+(\S+)/) { + $hmmRes->hmmName($1); + } + elsif (/^# target sequence database:\s+(\S+)/) { + $hmmRes->seqDB($1); + } + elsif (/^output directed to file:\s+(\S+)/) { + $hmmRes->thisFile($1); + } + elsif (/^Query:\s+(\S+)\s+\[M\=(\d+)\]/) { + $hmmRes->seedName($1); + $hmmRes->hmmLength($2); + }elsif(/^Query:\s+(\S+)\s+\[L\=(\d+)\]/) { + $hmmRes->seqName($1); + $hmmRes->seqLength($2); + }elsif (/^sequence E-value threshold: <= (\d+)/) { + $hmmRes->evalueThr($1); + } + elsif (/^# Random generator seed: (\d+)/) { + $hmmRes->randSeedNum($1); + }elsif(/^Description:\s+(.*)/){ + $hmmRes->description($1); + }elsif(/^# (phmmer|hmmsearch|hmmscan|jackhmmer)/){ + $hmmRes->program($1); + }elsif (/(^#)|(^$)/) { + next; + }elsif(/^Accession/){ + next; + } elsif(/^\[ok\]/) { + return(1); + } else { + die "Failed to parse hmmsearch results |$_| in header section\n"; + } + } +} + +#------------------------------------------------------------------------------- + +=head2 _readSeqHits + + Title : _readSeqHits + Usage : Private method. $self->_readSeqHits(\*FH, $hmmResults) + Function : Reads the sequence hits from a HMMER3 hmmsearch + Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object + Returns : Nothing + +=cut + +sub _readSeqHits { + my ( $self, $hs, $hmmRes ) = @_; + while (<$hs>) { + +#Match a line like this +# E-value score bias E-value score bias exp N Sequence Description +# ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- +# 4e-83 285.8 10.0 5.3e-83 285.5 7.0 1.1 1 Q14SN3.1 Q14SN3_9HEPC Polyprotein (Fragment). + if (/^Domain annotation for each [sequence|model]/) { # This is the format for HMMER3b3 + last; + } + elsif (/^Domain and alignment annotation for each [sequence|model]/) { #This is the format for HMMER3b2 - can be removed later + last; + } + elsif (/^\s+(E-value|---)/) { + next; + } + elsif (/^$/) { + next; + } + else { + next if(/No hits detected that satisfy reporting thresholds/); + + #Assume that we have a sequence match + my @sMatch = split( /\s+/, $_ ); + unless ( scalar(@sMatch) >= 10 ) { + die "Expected at least 10 pieces of data: $_;\n"; + } + my $desc; + if ( scalar(@sMatch) >= 11 ) { + $desc = join( " ", @sMatch[ 10 .. $#sMatch ] ); + } + + $hmmRes->addHMMSeq( + Bio::Pfam::HMM::HMMSequence->new( + { + evalue => $sMatch[1], + bits => $sMatch[2], + bias => $sMatch[3], + exp => $sMatch[7], + numberHits => $sMatch[8], + name => $sMatch[9], + desc => defined($desc) ? $desc : "-", + } + ) + ); + + next; + } + die "Failed to parse $_ in sequence section\n"; + } + +} + +#------------------------------------------------------------------------------ + +=head2 _readUnitHits + + Title : _readUnitHits + Usage : Private method. $self->_readUnitHits(\*FH, $hmmResults) + Function : Reads the unit (domain) hits from a HMMER3 hmmsearch + Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object + Returns : Nothing + +=cut + +no warnings 'recursion'; + +sub _readUnitHits { + my ( $self, $hs, $hmmRes ) = @_; + + if($hmmRes->eof){ + return; + } + +#Parse the domain hits section +#>> P37935.1 MAAY4_SCHCO Mating-type protein A-alpha Y4. +# # bit score bias E-value ind Evalue hmm from hmm to ali from ali to env from env to ali-acc +# --- --------- ------- ---------- ---------- -------- -------- -------- -------- -------- -------- ------- +# 1 244.0 0.5 9.5e-76 1.7e-70 1 146 [. 1 145 [. 1 146 [. 0.99 +# +# Alignments for each domain: +# == domain 1 score: 244.0 bits; conditional E-value: 9.5e-76 +# SEED 1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146 +# m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp +# P37935.1 1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145 +# 899***************************.******************************************************************************************************************8 PP + + while (<$hs>) { + if (/^Internal/) { + last; + } + elsif (/\>\>\s+(\S+)/) { + my $seqId = $1; + $self->_readUnitData( $seqId, $hs, $hmmRes ); + if($hmmRes->eof){ + return; + } + } + } +} + +sub _readUnitData { + my ( $self, $id, $hs, $hmmRes ) = @_; + + if($hmmRes->eof){ + return; + } + my $hmmName = $hmmRes->seedName(); + + my $seqName = $hmmRes->seqName; + +# bit score bias E-value ind Evalue hmm from hmm to ali from ali to env from env to ali-acc +# --- --------- ------- ---------- ---------- -------- -------- -------- -------- -------- -------- ------- +# 1 244.0 0.5 9.5e-76 1.7e-70 1 146 [. 1 145 [. 1 146 [. 0.99 +# +# Alignments for each domain: + + my @units; + my $align = 1; + my $recurse = 0; + my $eof = 0; + my ($nextSeqId); + while (<$hs>) { + if (/^[(\/\/|Internal)]/ ) { + $align = 0; + $recurse = 0; + $eof = 1; + last; + } + elsif (/^\>\>\s+(\S+)/) { + $nextSeqId = $1; + $align = 0; + $recurse = 1; + last; + } + elsif (/^\s+Alignments for each domain:/) { + $align = 1; + $recurse = 0; + last; + } + elsif (/^\s+(#\s+score|---)/){ + + #Two human readable lines + next; + } + elsif (/^$/) { + + #blank line + next; + } + elsif (/^\s+\d+\s+/) { + my @dMatch = split( /\s+/, $_ ); + unless ( scalar(@dMatch) == 17 ) { + die "Expected 16 elements of data: $_\n"; + } + + push( + @units, + Bio::Pfam::HMM::HMMUnit->new( + { + name => $id, + domain => $dMatch[1], + bits => $dMatch[3], + bias => $dMatch[4], + domEvalue => $dMatch[5], + evalue => $dMatch[6], + hmmFrom => $dMatch[7], + hmmTo => $dMatch[8], + seqFrom => $dMatch[10], + seqTo => $dMatch[11], + envFrom => $dMatch[13], + envTo => $dMatch[14], + aliAcc => $dMatch[16] + } + ) + ); + + next; + } + elsif(/^\s+\[No individual domains/) { + $align=0; + next; + } + else { + confess("Did not parse line: $_"); + } + } + +# == domain 1 score: 244.0 bits; conditional E-value: 9.5e-76 +# SEED 1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146 +# m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp +# P37935.1 1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145 +# 899***************************.******************************************************************************************************************8 PP +# +# OR.... +# +# == domain 1 score: 27.6 bits; conditional E-value: 7.4e-10 +# PF00018 17 LsfkkGdvitvleksee.eWwkaelkdg.keGlvPsnYvep 55 +# L++++Gd+++++++++e++Ww++++++++++G++P+n+v+p +# P15498.4 617 LRLNPGDIVELTKAEAEqNWWEGRNTSTnEIGWFPCNRVKP 657 +# 7899**********9999*******************9987 PP + + + if ($align) { + my ($pattern1, $pattern2); + + if($hmmName and $hmmRes->program eq 'hmmsearch'){ + $pattern1 = qr/^\s+$hmmName\s+\d+\s+(\S+)\s+\d+/; + $id =~ s/(\W)/\\$1/g; # escape any non-word character + # $id =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters + # $id =~ s/\[/\\[/g; + # $id =~ s/\]/\\]/g; + $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/; + }elsif($seqName and $hmmRes->program eq 'hmmscan'){ + my $tmpSeqName = $seqName; + $tmpSeqName =~ s/(\W)/\\$1/g; # escape any non-word character + # $tmpSeqName =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters + # $tmpSeqName =~ s/\[/\\[/g; + # $tmpSeqName =~ s/\]/\\]/g; + $pattern1 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/; + $pattern2 = qr/^\s+$tmpSeqName\s+\d+\s+(\S+)\s+\d+/; + }elsif($seqName and ($hmmRes->program eq 'phmmer' or $hmmRes->program eq 'jackhmmer') ){ + $seqName =~ s/(\W)/\\$1/g; # escape any non-word character + # $seqName =~ s/\|/\|/g; #Escape '|', '[' and ']' characters + # $seqName =~ s/\[/\\[/g; + # $seqName =~ s/\]/\\]/g; + $pattern1 = qr/^\s+$seqName\s+\d+\s+(\S+)\s+\d+/; + $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/; + } + + + $recurse = 0; + my $matchNo; + my $hmmlen = 0; + while (<$hs>) { + if (/$pattern1/) { + $units[ $matchNo - 1 ]->hmmalign->{hmm} .= $1; + $hmmlen = length($1); + } + elsif (/$pattern2/) { + $units[ $matchNo - 1 ]->hmmalign->{seq} .= $1; + } + elsif (/^\s+([x\.]+)\s+RF$/) { + my $rf = $1; + $units[ $matchNo - 1 ]->hmmalign->{rf} .= $rf; + } + elsif (/^\s+([0-9\*\.]+)\s+PP$/) { + my $pp = $1; + $units[ $matchNo - 1 ]->hmmalign->{pp} .= $pp; + }elsif (/^\s+(\S+)\s+CS$/) { + my $cs = $1; + $units[ $matchNo - 1 ]->hmmalign->{cs} .= $cs; + }elsif (/^\s+==\s+domain\s+(\d+)/) { + $matchNo = $1; + } + elsif (/^\s+(.*)\s+$/) { + # $1 is *not* the match - this fails if there are prepended + # or appended spaces + # $units[ $matchNo - 1 ]->hmmalign->{match} .= $1; + # Let's get a right substring based on the HMM length + chomp; + my $m1 = substr($_,-$hmmlen); + $units[ $matchNo - 1 ]->hmmalign->{match} .= $m1; + }elsif (/^$/) { + next; + } + elsif (/^[(\/\/|Internal)]/) { + $align = 0; + $recurse = 0; + $eof = 1; + last; + } + elsif (/^\>\>\s+(\S+)/) { + $nextSeqId = $1; + $recurse = 1; + last; + } + + else { + confess("Did not parse |$_| in units"); + } + } + } + + foreach my $u (@units) { + $hmmRes->addHMMUnit($u); + } + + $hmmRes->eof($eof); + + if ($recurse and $nextSeqId) { + $self->_readUnitData( $nextSeqId, $hs, $hmmRes ); + } + return; +} +use warnings 'recursion'; + +#------------------------------------------------------------------------------- + +=head2 parseHMMER2 + + Title : parseHMMER2 + Usage : $self->parseHMMER2(\*FH ) + Function : This is a minimal parser for reading in the output of HMMER2 hmmsearch + Args : The file handle to hmmsearch output + Returns : A Bio::Pfam::HMM::HMMResults object + +=cut + +sub parseHMMER2 { + my $self = shift; + my $file = shift; + + my $hmmRes = Bio::Pfam::HMM::HMMResults->new; + + my %seqh; + my $count = 0; + + while (<$file>) { + /^Scores for complete sequences/ && last; + } + + while (<$file>) { + /^Parsed for domains/ && last; + if ( my ( $id, $de, $sc, $ev, $hits ) = + /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ ) + { + $hmmRes->addHMMSeq( + Bio::Pfam::HMM::HMMSequence->new( + { + bits => $sc, + evalue => $ev, + name => $id, + desc => $de, + numberHits => $hits + } + ) + ); + + $seqh{$id} = $sc; + } + } + + while (<$file>) { + /^Histogram of all scores/ && last; + if ( my ( $id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev ) = + /^(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/ ) + { + $hmmRes->addHMMUnit( + Bio::Pfam::HMM::HMMUnit->new( + { + name => $id, + seqFrom => $sqfrom, + seqTo => $sqto, + hmmFrom => $hmmf, + hmmTo => $hmmt, + bits => $sc, + evalue => $ev + } + ) + ); + + } + } + + return $hmmRes; +} + +#------------------------------------------------------------------------------- + +=head2 parseHMMER1 + + Title : parseHMMER1 + Usage : $self->parseHMMER1(\*FH ) + Function : This is a minimal parser for reading in the output of HMMER1 hmmsearch. + : There are a few hacks to get round some of them requirements + Args : The file handle to hmmsearch output + Returns : A Bio::Pfam::HMM::HMMResults object + +=cut + +sub parseHMMER1 { + my $self = shift; + my $file = shift; + + my $hmmRes = Bio::Pfam::HMM::HMMResults->new; + + my %seqh; + my $count = 0; + + while (<$file>) { + if ( my ( $bits, $s, $e, $id, $de ) = +/^(-?\d+\.?\d*)\s+\(bits\)\s+f:\s+(\d+)\s+t:\s+(\d+)\s+Target:\s+(\S+)\s+(.*)/ + ) + { + if ( $id =~ /(\S+)\/(\d+)-(\d+)/ ) { + $id = $1; + $s = $2 + $s - 1; + $e = $2 + $e - 1; + } + + if ( !$hmmRes->seqs->{$id} ) { + $hmmRes->addHMMSeq( + Bio::Pfam::HMM::HMMSequence->new( + { + bits => $bits, + evalue => 1, + name => $id, + desc => $de, + numberHits => 1 + } + ) + ); + } + $hmmRes->addHMMUnit( + Bio::Pfam::HMM::HMMUnit->new( + { + name => $id, + seqFrom => $s, + seqTo => $e, + hmmFrom => "1", + hmmTo => "1", + bits => $bits, + evalue => "1" + } + ) + ); + if ( $bits > $hmmRes->seqs->{$id}->bits ) { + $hmmRes->seqs->{$id}->bits($bits); + } + } + } + return $hmmRes; +} + +#------------------------------------------------------------------------------- + +=head2 writeScoresFile + + Title : writeScoresFile + Usage : $hmmResIO->writeScoresFile( $hmmRes) + Function : Writes a scores file for a Bio::Pfam::HMM::HMMResults object. + Args : Bio::Pfam::HMM::HMMResults + Returns : Nothing + +=cut + +sub writeScoresFile { + my ( $self, $hmmRes ) = @_; + + unless ($hmmRes) { + confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n"; + } + unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) { + confess("Variable passed in is not a Bio::Pfam::HMM::Results object"); + } + + my $fh; + open( $fh, ">" . $self->scores ) + or confess "Could not open " . $self->scores . ":[$!]\n"; + + my ( $lowSeq, $lowDom, $highSeq, $highDom ); + $lowSeq = $lowDom = 999999.99; + $highSeq = $highDom = -999999.99; + unless ( defined $hmmRes->domThr and defined $hmmRes->seqThr ) { + warn "No threshold set, setting to 25.0 bits\n"; + $hmmRes->domThr("25.0"); + $hmmRes->seqThr("25.0"); + } + + my @sigUnits; + + foreach my $seqId ( keys %{ $hmmRes->seqs } ) { + + #Does this sequence score above or equal to the sequence threshold? + if ( $hmmRes->seqs->{$seqId}->bits >= $hmmRes->seqThr ) { + + #Is this the lowest sequence thresh + if ( $hmmRes->seqs->{$seqId}->bits < $lowSeq ) { + $lowSeq = $hmmRes->seqs->{$seqId}->bits; + } + +#For each of the regions found on the sequence, look to see if the match is great +#than the domain threshold. If it is, is it lower than we we have seen previously + foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) { + if ( $unit->bits >= $hmmRes->domThr ) { + push( @sigUnits, $unit ); + if ( $unit->bits < $lowDom ) { + $lowDom = $unit->bits(); + } + } + elsif ( $unit->bits > $highDom ) { + $highDom = $unit->bits; + } + } + } + else { + + #Is this the highest sequence thres below the cut-off + if ( $hmmRes->seqs->{$seqId}->bits > $highSeq ) { + $highSeq = $hmmRes->seqs->{$seqId}->bits; + } + +#For each of the regions found on the sequence, look to see if the match is great +#than the domain threshold. If it is, is it lower than we we have seen previously + foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) { + if ( $unit->bits < $hmmRes->domThr && $unit->bits > $highDom ) { + $highDom = $unit->bits; + } + } + } + } + + $hmmRes->domTC($lowDom); + $hmmRes->seqTC($lowSeq); + $hmmRes->domNC($highDom); + $hmmRes->seqNC($highSeq); + + #Print the domains to the scores file + foreach my $u ( sort { $b->bits <=> $a->bits } @sigUnits ) { + print $fh + sprintf( "%.1f %s/%s-%s %s-%s %s\n", $u->bits, $u->name, $u->envFrom, $u->envTo, $u->seqFrom, $u->seqTo, $u->evalue ); + } + close($fh); + +} + +#------------------------------------------------------------------------------- + +#TODO - write _readAlign + +=head2 _readAlign + + Title : + Usage : + Function : + Args : + Returns : + +=cut + +sub _readAlign { + my ( $self, $fh, $hmmRes ) = @_; + +} + +#Parse the alignment section +#if($pp){ + +#}else{ +# while(){ +# last if(/^\/\//) +# } +#} + + + +sub _readFooter { + my($self, $fh, $hmmRes ) = @_; + + # We are going to parse something like this! + + # Internal pipeline statistics summary: +#------------------------------------- +#Query sequence(s): 1 (360 residues) +#Target model(s): 7 (836 nodes) +#Passed MSV filter: 2 (0.285714); expected 0.1 (0.02) +#Passed Vit filter: 1 (0.142857); expected 0.0 (0.001) +#Passed Fwd filter: 1 (0.142857); expected 0.0 (1e-05) +#Initial search space (Z): 7 [actual number of targets] +#Domain search space (domZ): 1 [number of targets reported over threshold] +## CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00 +## Mc/sec: inf +#// + + while(<$fh>){ + if(/\/\//){ + last; + } + } +} + + +#Parse the internal summary section +#Internal statistics summary: +#---------------------------- +#Query HMM(s): 1 (0 nodes) +#Target sequences: 5323441 (0 residues) +#Passed MSV filter: 116519 (-37389918065567040729448769671768824784852036328367855636063687997915136.000; expected 19991592792512146725679052970637918208.000) +#Passed Vit filter: 7579 (-0.0000; expected -35982214160587876085407389642471051723332987952235753317595472501307733302049608744822636544.0000) +#Passed Fwd filter: 1687 (8.3e+165; expected -7.5e-266) +#Mc/sec: 828.85 +# CPU time: 115.36u 4.45s 00:01:59.81 Elapsed: 00:03:01 + +#sub writeHMMSearch { +# my ( $self, $hmmRes ) = @_; +# my $fh; +# open($fh, ">".$self->outfile."\n"); +# +# $self->_writeHeader($fh, $hmmRes); +# $self->_writeSeqHits( $fh, $hmmRes); +# $self->_writeDomHits( $fh, $hmmRes); +# $self->_writeAlign( $fh, $hmmRes) if($self->align); +# $self->_writeInternalSummary( $fh, $hmmRes); +#} +#sub mergeHMMSearch { +# my ( $self, $filenames ) = @_; +#} + + +sub write_ascii_out { + + my ($self, $HMMResults, $fh, $scanData, $e_seq, $e_dom, $b_seq, $b_dom) = @_; + + + $scanData->{_max_seqname} = 20 unless($scanData->{_max_seqname} or $scanData->{_max_seqname} < 1); + + my $ga; + + if($e_seq or $e_dom) { + $e_seq = $e_dom unless($e_seq); + $e_dom = "10" unless($e_dom); + } + elsif($b_seq or $b_dom) { + $b_seq = $b_dom unless($b_seq); + $b_dom = "0" unless($b_dom); + } + else { + $ga = 1; + } + + + foreach my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $HMMResults->units } ) { + + if($unit->name =~ /Pfam\-B/) { + + next unless($HMMResults->seqs->{$unit->name}->evalue <= "0.001" and $unit->evalue <= "0.001"); + + + printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3s %-8s\n", + $HMMResults->seqName, + $unit->seqFrom, + $unit->seqTo, + $unit->envFrom, + $unit->envTo, + $scanData->{_accmap}->{ $unit->name }, + $unit->name, + "Pfam-B", + $unit->hmmFrom, + $unit->hmmTo, + $scanData->{_model_len}->{ $unit->name }, + $unit->bits, + $unit->evalue, + "NA", + "NA"; + + + } + else { + + #Filter results based on thresholds + if($ga) { + next unless($unit->sig); + } + if($e_seq) { + next unless($HMMResults->seqs->{$unit->name}->evalue <= $e_seq and $unit->evalue <= $e_dom); + } + if($b_seq) { + + next unless($HMMResults->seqs->{$unit->name}->bits >= $b_seq and $unit->bits >= $b_dom); + } + + my $clan = $scanData->{_clanmap}->{ $unit->name } || "No_clan"; + + + printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3d %-8s ", + $HMMResults->seqName, + $unit->seqFrom, + $unit->seqTo, + $unit->envFrom, + $unit->envTo, + $scanData->{_accmap}->{ $unit->name }, + $unit->name, + $scanData->{_type}->{ $unit->name }, + $unit->hmmFrom, + $unit->hmmTo, + $scanData->{_model_len}->{ $unit->name }, + $unit->bits, + $unit->evalue, + $unit->sig, + $clan; + + + if($unit->{'act_site'}) { + local $" = ","; + print $fh "predicted_active_site[@{$unit->{'act_site'}}]"; + } + + if($scanData->{_translate}){ + my $strand = '?'; + my $start = '-'; + my $end = '-'; + if(exists($scanData->{_orf}->{$HMMResults->seqName})){ + $strand = $scanData->{_orf}->{$HMMResults->seqName}->{strand}; + if($strand eq '+'){ + $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envFrom * 3) - 3; + $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envTo * 3) - 3; + }elsif($strand eq '-'){ + $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envFrom * 3) + 3; + $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envTo * 3) + 3; + } + } + print $fh "$strand $start $end"; + } + + print $fh "\n"; + } + + if($scanData->{_align}){ + print $fh sprintf( "%-10s %s\n", "#HMM", $unit->hmmalign->{hmm} ); + print $fh sprintf( "%-10s %s\n", "#MATCH", $unit->hmmalign->{match} ); + print $fh sprintf( "%-10s %s\n", "#PP", $unit->hmmalign->{pp}); + print $fh sprintf( "%-10s %s\n", "#SEQ", $unit->hmmalign->{seq}); + print $fh sprintf( "%-10s %s\n", "#CS", $unit->hmmalign->{cs}) if($unit->hmmalign->{cs}); + } + + } + +} + +1; diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/HMM/HMMSequence.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/HMM/HMMSequence.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,102 @@ + +package Bio::Pfam::HMM::HMMSequence; + +use strict; +use warnings; + +use Moose; +use Moose::Util::TypeConstraints; + +extends 'Bio::Pfam::HMM::HMMMatch'; + + +has sumEvalue => ( + isa => 'evalue', + is => 'rw', +); + + +has H2mode => ( + isa => 'Str', + is => 'rw' +); + +has sumScore => ( + isa => 'Num', + is => 'rw', +); + +has desc => ( + isa => 'Str', + is => 'rw', + required => 1 +); + +has numberHits => ( + isa => 'Int', + is => 'rw', + required => 1 +); + + + +has 'exp' => ( + isa => 'Num', + is => 'rw' +); + + +has hmmUnits => ( + isa => "ArrayRef[ Bio::Pfam::HMM::HMMUnit ]", + is => 'rw', + default => sub { [] } +); + + +#------------------------------------------------------------------------------- +=head1 Subroutines + +=head2 addHMMUnit + + Title : addHMMUnit + Usage : $hmmseq->addHMMUnit($hmmUnit) + Function : Adds a hmmUnit to a sequence. It checks that the variable passed in is a Bio::Pfam::HMM::HMMUnit oject + Args : A Bio::Pfam::HMM::HMMUnit oject + Returns : Nothing + +=cut + +sub addHMMUnit { + my ( $self, $hmmUnit ) = @_; + if($hmmUnit->isa("Bio::Pfam::HMM::HMMUnit")){ + push(@{$self->hmmUnits}, $hmmUnit); + }else{ + warn "$hmmUnit is not a Bio::Pfam::HMM::HMMUnit, not added\n"; + } +} + + + __PACKAGE__->meta->make_immutable; + +=head1 COPYRIGHT + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) + +This is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . + +=cut + +1; diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/HMM/HMMUnit.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/HMM/HMMUnit.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,138 @@ + +package Bio::Pfam::HMM::HMMUnit; + +use strict; +use warnings; + +use Moose; +use Moose::Util::TypeConstraints; + +extends 'Bio::Pfam::HMM::HMMMatch'; + +subtype 'Domain' + => as 'Int' + => where { $_ > 0 }; + +#coerce 'Domain' +# => from 'Str' +# => via { +# my $d; +# if(/(\d+)\/\d+/){ +# $d = $1; +# } +# return $d; +# }; +# + +#subtype 'proteinCoos' +# => as 'Int' +# => where { $_ > 0 && $_ < 100000 } +# => message { 'Protein coordinates are expected to be positive and less the 100,000'}; + + +has 'seqEvalue' => ( + isa => 'Num', + is => 'rw', +); + +has 'domain' => ( + isa => 'Domain', + is => 'rw' +); + +has 'seqFrom' => ( + isa => 'Int', + is => 'rw', + required => 1 +); + +has 'seqTo' => ( + isa => 'Int', + is => 'rw', + required => 1 +); + +#has 'indEvalue' => ( +# isa => 'evalue', +# is => 'rw', +# required => 1, +#); + +has 'domEvalue' => ( + isa => 'evalue', + is => 'rw', +); + +has 'hmmalign' => ( + isa => 'HashRef', + is => 'rw', + default => sub { {} }, +); + +has 'hmmFrom' => ( + isa => 'Int', + is => 'rw', + required => 1 +); + +has 'hmmTo' => ( + isa => 'Int', + is => 'rw', + required => 1 +); + +has 'envFrom' => ( + isa => 'Int', + is => 'rw' +); + +has 'envTo' => ( + isa => 'Int', + is => 'rw' +); + +has 'coreFrom' => ( + isa => 'Str', + is => 'rw' +); + +has 'coreTo' => ( + isa => 'Str', + is => 'rw' +); + +has 'aliAcc' => ( + isa => 'Num', + is => 'rw' +); + +has 'sig' => ( + isa => 'Int', + is => 'rw' +); + + +__PACKAGE__->meta->make_immutable; + +=head1 COPYRIGHT + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) + +This is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . + +=cut + +1; diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/Scan/PfamScan.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/Scan/PfamScan.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,957 @@ + +=head1 NAME + +Bio::Pfam::Scan::PfamScan + +=cut + +package Bio::Pfam::Scan::PfamScan; + +=head1 SYNOPSIS + + my $ps = Bio::Pfam::Scan::PfamScan->new( + -cut_off => $hmmscan_cut_off, + -dir => $dir, + -clan_overlap => $clan_overlap, + -fasta => $fasta, + -align => $align, + -as => $as + ); + + $ps->search; + $ps->write_results; + +=head1 DESCRIPTION + +$Id: PfamScan.pm,v 1.4 2010-01-12 09:41:42 jm14 Exp $ + +=cut + +use strict; +use warnings; + +use Bio::Pfam::HMM::HMMResultsIO; +use Bio::Pfam::Active_site::as_search; +use Bio::SimpleAlign; +use Bio::Pfam::Scan::Seq; + +use Carp; +use IPC::Run qw( start finish ); + +#------------------------------------------------------------------------------- +#- constructor ----------------------------------------------------------------- +#------------------------------------------------------------------------------- + +=head1 METHODS + +=head2 new + +The only constructor for the object. Accepts a set of arguments that specify +the parameters for the search: + +=over + +=item -cut_off + +=item -dir + +=item -clan_overlap + +=item -fasta + +=item -sequence + +=item -align + +=item -hmm + +=item -as + +=back + +=cut + +sub new { + my ( $class, @args ) = @_; + + my $self = {}; + bless $self, $class; + + # To avoid hard coding the location for the binary, we assume it will be on the path..... + $self->{_HMMSCAN} = 'hmmscan'; + + # handle arguments, if we were given any here + $self->_process_args(@args) if @args; + + return $self; +} + +#------------------------------------------------------------------------------- +#- public methods -------------------------------------------------------------- +#------------------------------------------------------------------------------- + +=head2 search + +The main method on the object. Performs a C search using the supplied +sequence and the specified HMM library. + +=cut + +sub search { + my ( $self, @args ) = @_; + + # handle the arguments, if we were handed any here + $self->_process_args(@args) if @args; + + # set up the output header + $self->_build_header; + + croak qq(FATAL: no sequence given; set the search parameters before calling "search") + unless defined $self->{_sequence}; + + my ( %AllResults, $pfamB, $firstResult ); + + foreach my $hmmlib ( @{ $self->{_hmmlib} } ) { + + my ( @hmmscan_cut_off, $seq_evalue, $dom_evalue ); + if ( $hmmlib !~ /Pfam\-B/ ) { + @hmmscan_cut_off = @{ $self->{_hmmscan_cutoff} }; + } + else { + $pfamB = 1; + $seq_evalue = 0.001; + $dom_evalue = 0.001; + + # It's a pfamB search so use some default cut off values + push @hmmscan_cut_off, '-E', $seq_evalue, '--domE', $dom_evalue; + } + + push @{ $self->{_header} }, + "# cpu number specified: " . $self->{_cpu} . "\n" + if ( $hmmlib !~ /Pfam\-B/ and $self->{_cpu} ); + + push @{ $self->{_header} }, + "# searching against: " + . $self->{_dir} + . "/$hmmlib, with cut off " + . join( " ", @hmmscan_cut_off ) . "\n"; + my @params; + if ( $self->{_cpu} ) { + @params = ( + 'hmmscan', '--notextw', '--cpu', $self->{_cpu}, @hmmscan_cut_off, + $self->{_dir} . '/' . $hmmlib, + $self->{_fasta} + ); + } + else { + @params = ( + 'hmmscan', '--notextw', @hmmscan_cut_off, $self->{_dir} . '/' . $hmmlib, + $self->{_fasta} + ); + + } + + print STDERR "PfamScan::search: hmmscan command: |@params|\n" + if $ENV{DEBUG}; + print STDERR 'PfamScan::search: sequence: |' . $self->{_sequence} . "|\n" + if $ENV{DEBUG}; + + my $run = start \@params, 'pipe', \*OUT, '2>pipe', \*ERR + or croak qq(FATAL: error running hmmscan; IPC::Run returned '$?'); + + # print IN $self->{_sequence}; ; + close IN; + + $self->{_hmmresultIO} = Bio::Pfam::HMM::HMMResultsIO->new; + $self->{_all_results} = $self->{_hmmresultIO}->parseMultiHMMER3( \*OUT ); + close OUT; + + my $err; + while () { + $err .= $_; + } + close ERR; + + finish $run + or croak qq|FATAL: error running hmmscan ($err); ipc returned '$?'|; + + unless ( $hmmlib =~ /Pfam\-B/ ) { + + if ( $self->{_clan_overlap} ) { + push( @{ $self->{_header} }, "# resolve clan overlaps: off\n" ); + } + else { + push( @{ $self->{_header} }, "# resolve clan overlaps: on\n" ); + $self->_resolve_clan_overlap; + } + + if ( $self->{_as} ) { + push( @{ $self->{_header} }, "# predict active sites: on\n" ); + $self->_pred_act_sites; + } + else { + push( @{ $self->{_header} }, "# predict active sites: off\n" ); + } + + if ( $self->{_translate} ) { + push @{ $self->{_header} }, "# translate DNA sequence: " . $self->{_translate} . "\n"; + } + } + + # Determine which hits are significant + foreach my $result ( @{ $self->{_all_results} } ) { + foreach + my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $result->units } ) + { + + unless ($pfamB) { + + $unit->sig(0); + if ( $result->seqs->{ $unit->name }->bits >= + $self->{_seqGA}->{ $unit->name } ) + { + if ( $unit->bits >= $self->{_domGA}->{ $unit->name } ) { + $unit->sig(1); + } + } + } + } + } + + if ($firstResult) { + $AllResults{ $self->{_all_results} } = $self->{_all_results}; + } + else { + $firstResult = $self->{_all_results}; + } + + } # end of "foreach $hmmlib" + + # If more than one search, merge results into one object + if ( keys %AllResults ) { + + foreach my $AllResult ( keys %AllResults ) { + + foreach my $seq_id ( keys %{ $self->{_seq_hash} } ) { + + my $flag; + + #If seq exists in both, add all units from $AllResult to $firstResult + foreach my $result ( @{$firstResult} ) { + + if ( $result->seqName eq $seq_id ) { + $flag = 1; + + foreach my $result2 ( @{ $AllResults{$AllResult} } ) { + + if ( $result2->seqName eq $seq_id ) { + foreach my $hmmname ( keys %{ $result2->seqs } ) { + $result->addHMMSeq( $result2->seqs->{$hmmname} ); + } + foreach my $unit ( @{ $result2->units } ) { + $result->addHMMUnit($unit); + } + } + } + } + } + + #If seq doesn't exist in $firstResult, need to add both sequence and units to $firstResult + unless ($flag) { + foreach my $result2 ( @{ $AllResults{$AllResult} } ) { + if ( $result2->seqName eq $seq_id ) { + push @{$firstResult}, $result2; + } + } + } + } + } + $self->{_all_results} = $firstResult; + + } # end of "if keys %AllResults" + + push @{ $self->{_header} }, "# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =\n#\n"; + + if ( $self->{_as} ) { + push @{ $self->{_header} }, "# "; + } + else { + push @{ $self->{_header} }, "# "; + } + + if ( $self->{_translate} ) { + push @{ $self->{_header} }, " "; + } + push @{ $self->{_header} }, "\n"; +} + +#------------------------------------------------------------------------------- + +=head2 write_results + +Writes the results of the C search. Takes a single argument, which can +be an open filehandle or a filename. A fatal error is generated if a file of the +given name already exists. + +=cut + +sub write_results { + my ( $self, $out, $e_seq, $e_dom, $b_seq, $b_dom ) = @_; + + my $fh; + + if ( ref $out eq 'GLOB' ) { + + # we were handed a filehandle + $fh = $out; + } + elsif ( $out and not ref $out ) { + + # we were handed a filename + croak qq(FATAL: output file "$out" already exists) if -f $out; + + open( FH, ">$out" ) + or croak qq(FATAL: Can\'t write to your output file "$out": $!); + $fh = \*FH; + } + else { + + # neither filehandle nor filename, default to STDOUT + $fh = \*STDOUT; + } + + if ( $self->{_header} ) { + my $header = join '', @{ $self->{_header} }; + print $fh "$header\n"; + } + + foreach my $result ( @{ $self->{_all_results} } ) { + $self->{_hmmresultIO} + ->write_ascii_out( $result, $fh, $self, $e_seq, $e_dom, $b_seq, $b_dom ); + } + close $fh; +} + +#------------------------------------------------------------------------------- + +=head2 results + +Returns the search results. + +=cut + +sub results { + my ( $self, $e_value ) = @_; + + unless ( defined $self->{_all_results} ) { + carp "WARNING: call search() before trying to retrieve results"; + return; + } + + my @search_results = (); + + foreach my $hmm_result ( @{ $self->{_all_results} } ) { + push @search_results, @{ $hmm_result->results( $self, $e_value ) }; + } + + return \@search_results; +} + +#------------------------------------------------------------------------------- +#- private methods ------------------------------------------------------------- +#------------------------------------------------------------------------------- + +=head1 PRIVATE METHODS + +=head2 _process_args + +Handles the input arguments. + +=cut + +sub _process_args { + my ( $self, @args ) = @_; + + # accept both a hash and a hash ref + my $args = ( ref $args[0] eq 'HASH' ) ? shift @args : {@args}; + + # make sure we get a sequence + if ( $args->{-fasta} and $args->{-sequence} ) { + croak qq(FATAL: "-fasta" and "-sequence" are mutually exclusive); + } + elsif ( $args->{-fasta} ) { + croak qq(FATAL: fasta file "$args->{-fasta}" doesn\'t exist) + unless -s $args->{-fasta}; + } + elsif ( $args->{-sequence} ) { + croak qq(FATAL: no sequence given) + unless length( $args->{-sequence} ); + } + else { + croak qq(FATAL: must specify either "-fasta" or "-sequence"); + } + + # check the cut off + if ( ( $args->{-e_seq} and ( $args->{-b_seq} || $args->{-b_dom} ) ) + or ( $args->{-b_seq} and ( $args->{-e_seq} || $args->{-e_dom} ) ) + or ( $args->{-b_dom} and $args->{-e_dom} ) ) + { + croak qq(FATAL: can\'t use e value and bit score threshold together); + } + + $self->{_hmmscan_cutoff} = (); + if ( $args->{-e_seq} ) { + croak qq(FATAL: the E-value sequence cut-off "$args->{-e_seq}" must be a positive non-zero number) + unless $args->{-e_seq} > 0; + + push @{ $self->{_hmmscan_cutoff} }, '-E', $args->{-e_seq}; + } + + if ( $args->{-e_dom} ) { + croak q(FATAL: if you supply "-e_dom" you must also supply "-e_seq") + unless $args->{-e_seq}; + + croak qq(FATAL: the E-value domain cut-off "$args->{-e_dom}" must be positive non-zero number) + unless $args->{-e_dom} > 0; + + push @{ $self->{_hmmscan_cutoff} }, '--domE', $args->{-e_dom}; + } + + if ( $args->{-b_seq} ) { + push @{ $self->{_hmmscan_cutoff} }, '-T', $args->{-b_seq}; + } + + if ( $args->{-b_dom} ) { + croak q(FATAL: if you supply "-b_dom" you must also supply "-b_seq") + unless $args->{-b_seq}; + + push @{ $self->{_hmmscan_cutoff} }, '--domT', $args->{-b_dom}; + } + + unless ( $self->{_hmmscan_cutoff} ) { + push @{ $self->{_hmmscan_cutoff} }, '--cut_ga'; + } + + # make sure we have a valid directory for the HMM data files + croak qq(FATAL: directory "$args->{-dir}" does not exist) + unless -d $args->{-dir}; + + # populate the object + $self->{_cut_off} = $args->{-cut_off}; + $self->{_dir} = $args->{-dir}; + $self->{_clan_overlap} = $args->{-clan_overlap}; + $self->{_fasta} = $args->{-fasta}; + $self->{_align} = $args->{-align}; + $self->{_as} = $args->{-as}; + $self->{_sequence} = $args->{-sequence}; + $self->{_cpu} = $args->{-cpu}; + $self->{_translate} = $args->{-translate}; + + $self->{_hmmlib} = []; + if ( $args->{-hmmlib} ) { + if ( ref $args->{-hmmlib} eq 'ARRAY' ) { + push @{ $self->{_hmmlib} }, @{ $args->{-hmmlib} }; + } + else { + push @{ $self->{_hmmlib} }, $args->{-hmmlib}; + } + } + else { + push @{ $self->{_hmmlib} }, "Pfam-A.hmm"; + } + + # Now check that the library exists in the data dir! + foreach my $hmmlib ( @{ $self->{_hmmlib} } ) { + + croak qq(FATAL: can't find $hmmlib and/or $hmmlib binaries in "$args->{-dir}") + unless ( + -s $self->{_dir}, + "/$hmmlib" + and -s $self->{_dir} . "/$hmmlib.h3f" + and -s $self->{_dir} . "/$hmmlib.h3i" + and -s $self->{_dir} . "/$hmmlib.h3m" + and -s $self->{_dir} . "/$hmmlib.h3p" + and -s $self->{_dir} . "/$hmmlib.dat" + ); + + # read the necessary data, if it's not been read already + $self->_read_pfam_data; + } + + $self->{_max_seqname} = 0; + + # if there's nothing in "_sequence" try to load a fasta file + $self->_read_fasta + unless $self->{_sequence}; + + # check again for a sequence. If we don't have one at this point, bail with + # an error + croak qq(FATAL: no sequence given) + unless $self->{_sequence}; + + # read fasta file, store maximum sequence name and store sequences for active + # sites prediction + $self->_parse_sequence + unless $self->{_max_seqname}; + + if ( $self->{_as} ) { + $self->_parse_act_site_data + unless $self->{_read_read_act_site_data}; + } + + if ( $self->{_translate} ) { + $self->_translate_fasta; + } + + # see if a version number was specified + $self->{_version} = $args->{version}; + +} + +#------------------------------------------------------------------------------- + +=head2 _build_header + +Adds version to the header object + +=cut + +sub _build_header { + my ( $self, $version ) = @_; + + unshift @{ $self->{_header} }, + '# query sequence file: ' . $self->{_fasta} . "\n"; + + unshift @{ $self->{_header} }, <. +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +EOF_license + + my $v = + ( defined $self->{_version} ) + ? "version $version, " + : ''; + + unshift @{ $self->{_header} }, + "# pfam_scan.pl, $v run at " . scalar(localtime) . "\n#\n"; +} + +#------------------------------------------------------------------------------- + +=head2 _read_fasta + +Reads a sequence from the fasta-format file that was specified in the +parameters. + +=cut + +sub _read_fasta { + my $self = shift; + + open( FASTA, $self->{_fasta} ) + or croak qq(FATAL: Couldn't open fasta file "$self->{_fasta}" $!\n); + my @rows = ; + close FASTA; + + $self->{_sequence_rows} = \@rows; + + $self->{_sequence} = join '', @rows; +} + +#------------------------------------------------------------------------------- + +=head2 _resolve_clan_overlap + +Resolves overlaps between clans. + +=cut + +sub _resolve_clan_overlap { + my $self = shift; + + my @no_clan_overlap = (); + foreach my $result ( @{ $self->{_all_results} } ) { + my $new = + $result->remove_overlaps_by_clan( $self->{_clanmap}, $self->{_nested} ); + + push @no_clan_overlap, $new; + } + + $self->{_all_results} = \@no_clan_overlap; +} + +#------------------------------------------------------------------------------- + +=head2 _pred_act_sites + +Predicts active sites. Takes no arguments. Populates the "act_site" field on +each results object. + +=cut + +sub _pred_act_sites { + my $self = shift; + + # print STDERR "predicting active sites...\n"; + + my $hmm_file = $self->{_dir} . '/Pfam-A.hmm'; + +RESULT: foreach my $result ( @{ $self->{_all_results} } ) { + + # print STDERR "result: |" . $result->seqName . "|\n"; + + UNIT: foreach my $unit ( @{ $result->units } ) { + + # print STDERR "family: |" . $unit->name . "|\n"; + + next UNIT + unless ( $self->{_act_site_data}->{ $unit->name }->{'alignment'} ); + + my $seq_region = substr( + $self->{_seq_hash}->{ $result->seqName }, + $unit->seqFrom - 1, + $unit->seqTo - $unit->seqFrom + 1 + ); + + my $seq_se = $unit->seqFrom . '-' . $unit->seqTo; + + # print STDERR "seq_id: |" . $result->seqName . "|\n"; + # print STDERR "seq_se: |" . $seq_se . "|\n"; + # print STDERR "seq_region: |" . $seq_region . "|\n"; + # print STDERR "family: |" . $unit->name . "|\n"; + # print STDERR "hmm_file: |" . $hmm_file . "|\n"; + # print STDERR "dir: |" . $self->{_dir} . "|\n"; + + $unit->{act_site} = Bio::Pfam::Active_site::as_search::find_as( + $self->{_act_site_data}->{ $unit->name }->{'alignment'}, + $self->{_act_site_data}->{ $unit->name }->{'residues'}, + $result->seqName, + $seq_se, + $seq_region, + $unit->name, + $hmm_file + ); + } + } +} + +#------------------------------------------------------------------------------- + +=head2 _read_pfam_data + +Reads the Pfam data file ("Pfam-A.scan.dat") and populates the C, +C and C hashes on the object. + +=cut + +sub _read_pfam_data { + my $self = shift; + + #print STDERR "reading " . $self->{_hmmlib} . ".dat\n" if($ENV{DEBUG}); + $self->{_accmap} = {}; + $self->{_nested} = {}; + $self->{_clanmap} = {}; + $self->{_desc} = {}; + $self->{_seqGA} = {}; + $self->{_domGA} = {}; + $self->{_type} = {}; + $self->{_model_len} = {}; + + foreach my $hmmlib ( @{ $self->{_hmmlib} } ) { + my $scandat = $self->{_dir} . '/' . $hmmlib . '.dat'; + open( SCANDAT, $scandat ) + or croak qq(FATAL: Couldn't open "$scandat" data file: $!); + my $id; + while () { + if (m/^\#=GF ID\s+(\S+)/) { + $id = $1; + } + elsif (m/^\#=GF\s+AC\s+(\S+)/) { + $self->{_accmap}->{$id} = $1; + } + elsif (m/^\#=GF\s+DE\s+(.+)/) { + $self->{_desc}->{$id} = $1; + } + elsif (m/^\#=GF\s+GA\s+(\S+)\;\s+(\S+)\;/) { + $self->{_seqGA}->{$id} = $1; + $self->{_domGA}->{$id} = $2; + } + elsif (m/^\#=GF\s+TP\s+(\S+)/) { + $self->{_type}->{$id} = $1; + } + elsif (m/^\#=GF\s+ML\s+(\d+)/) { + $self->{_model_len}->{$id} = $1; + } + elsif (/^\#=GF\s+NE\s+(\S+)/) { + $self->{_nested}->{$id}->{$1} = 1; + $self->{_nested}->{$1}->{$id} = 1; + } + elsif (/^\#=GF\s+CL\s+(\S+)/) { + $self->{_clanmap}->{$id} = $1; + } + } + + close SCANDAT; + + # set a flag to show that we've read the data files already + $self->{ '_read_' . $hmmlib } = 1; + } + +} + +#------------------------------------------------------------------------------- + +=head2 _read_act_site_data + +Reads the Pfam active site data file ("active_site.dat") and populates +the C hashes on the object. + +=cut + +sub _parse_act_site_data { + my $self = shift; + my $as_dat = $self->{_dir} . '/active_site.dat'; + + $self->{_act_site_data} = {}; + + open( AS, $as_dat ) + or croak qq(FATAL: Couldn\'t open "$as_dat" data file: $!); + + my ( $fam_id, $aln ); + + while () { + if (/^ID\s+(\S+)/) { + $fam_id = $1; + $aln = new Bio::SimpleAlign; + } + elsif (/^AL\s+(\S+)\/(\d+)\-(\d+)\s+(\S+)/) { + my ( $seq_id, $st, $en, $seq ) = ( $1, $2, $3, $4 ); + + $aln->add_seq( + Bio::Pfam::Scan::Seq->new( + '-seq' => $seq, + '-id' => $seq_id, + '-start' => $st, + '-end' => $en, + '-type' => 'aligned' + ) + ); + } + elsif (/^RE\s+(\S+)\s+(\d+)/) { + my ( $seq_id, $res ) = ( $1, $2 ); + push( + @{ $self->{_act_site_data}->{$fam_id}->{'residues'}->{$seq_id} }, + $res + ); + + } + elsif (/^\/\//) { + + $self->{_act_site_data}->{$fam_id}->{'alignment'} = $aln; + + $fam_id = ""; + $aln = ""; + + } + else { + warn "Ignoring line:\n[$_]"; + } + } + close AS; + $self->{_read_read_act_site_data} = 1; +} + +#------------------------------------------------------------------------------- + +=head2 _parse_sequence + +This method is used to parse the sequence and hash it on sequence +identifier. It also stores the length of the longest sequence id + +=cut + +sub _parse_sequence { + my $self = shift; + + my $seq_hash = {}; + my $seq_id; + foreach ( @{ $self->{_sequence_rows} } ) { + + next if m/^\s*$/; #Ignore blank lines + + if (m/^>(\S+)/) { + $seq_id = $1; + + if ( exists( $seq_hash->{$seq_id} ) ) { + croak "FATAL: Sequence identifiers must be unique. Your fasta file contains two sequences with the same id ($seq_id)"; + } + + #Store the max length of seq name, use this later when printing in ascii + $self->{_max_seqname} = length($seq_id) + if ( !$self->{_max_seqname} + or length($seq_id) > $self->{_max_seqname} ); + } + else { + croak "FATAL: Unrecognised format of fasta file. Each sequence must have a header line in the format '>identifier '" + unless defined $seq_id; + chomp; + $seq_hash->{$seq_id} .= $_; + } + } + + $self->{_seq_hash} = $seq_hash; +} + +#------------------------------------------------------------------------------- + +=head2 _translate_fasta + +Uses the HMMER v2.3.2 progam "translate" to perform a six-frame translation of +the input sequence. Checks the parameter "-translate". + +Accepted arguments are "all" and "orf", where "all" means (from the "translate" +help text) "translate in full, with stops; no individual ORFs" and "orf" means +"report only ORFs greater than minlen" where minlen is set to the default of +20. + +=cut + +sub _translate_fasta { + my ($self) = @_; + my $translatedFasta = $self->{_fasta} . ".translated"; + + my @params = ( 'translate', '-q', ); + if ( $self->{_translate} eq 'all' ) { + push( @params, '-a' ); + } + elsif ( $self->{_translate} eq 'orf' ) { + push( @params, '-l', '20' ); + } + else { + croak qq(Unexpected parameter '$self->{_translate}'); + } + push( @params, '-o', $translatedFasta, $self->{_fasta} ); + + print STDERR "PfamScan::translate_fasta: translate command: |@params|\n" + if $ENV{DEBUG}; + + my $run = start \@params, 'pipe', \*OUT, '2>pipe', \*ERR + or croak qq(FATAL: error running translate; IPC::Run returned '$?'); + + close IN; + close OUT; + + my $err; + while () { + $err .= $_; + } + close ERR; + + finish $run + or croak qq|FATAL: error running translate ($err); ipc returned '$?'|; + open( F, "<", $translatedFasta ) + or croak qw(Could not open $translatedFasta '$!'); + if ( $self->{_translate} eq 'orf' ) { + while () { + if (/^>\s?(\S+).*nt (\d+)\.+(\d+)/) { + $self->{_orf}->{$1}->{start} = $2; + $self->{_orf}->{$1}->{end} = $3; + $self->{_orf}->{$1}->{strand} = ( $2 < $3 ) ? '+' : '-'; + } + } + } + else { + my $currentSeq; + my $currentFrame; + my $currentLen = 0; + my $maxEnd = 0; + while () { + chomp; + if (/^>\s?(\S+\:)(\d+)/) { + if ( $currentLen > 0 ) { + my $seqName = $currentSeq . $currentFrame; + if ( $currentFrame < 3 ) { + my $start = 1 + $currentFrame; + my $end = $start + $currentLen - 1; + $self->{_orf}->{$seqName}->{strand} = '+'; + $self->{_orf}->{$seqName}->{start} = $start; + $self->{_orf}->{$seqName}->{end} = $end; + $maxEnd = $end if ( $end > $maxEnd ); + } + else { + my $start = $maxEnd - ( $currentFrame - 3 ); + my $end = $start - $currentLen + 1; + $self->{_orf}->{$seqName}->{strand} = '-'; + $self->{_orf}->{$seqName}->{start} = $start; + $self->{_orf}->{$seqName}->{end} = $end; + } + } + $currentLen = 0; + $currentSeq = $1; + $currentFrame = $2; + } + else { + $currentLen += length($_) * 3; + } + } + my $seqName = $currentSeq . $currentFrame; + if ( $currentFrame < 3 ) { + my $start = 1 + $currentFrame; + my $end = $start + $currentLen - 1; + $self->{_orf}->{$seqName}->{strand} = '+'; + $self->{_orf}->{$seqName}->{start} = $start; + $self->{_orf}->{$seqName}->{end} = $end; + $maxEnd = $end if ( $end > $maxEnd ); + } + else { + my $start = $maxEnd - ( $currentFrame - 3 ); + my $end = $start - $currentLen + 1; + $self->{_orf}->{$seqName}->{strand} = '-'; + $self->{_orf}->{$seqName}->{start} = $start; + $self->{_orf}->{$seqName}->{end} = $end; + } + } + $self->{_fasta} = $translatedFasta; +} +#------------------------------------------------------------------------------- + +=head1 COPYRIGHT + +Copyright (c) 2009: Genome Research Ltd. + +Authors: Jaina Mistry (jm14@sanger.ac.uk), John Tate (jt6@sanger.ac.uk), Rob Finn (finnr@janelia.hhmi.org) + +This is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +or see the on-line version at http://www.gnu.org/copyleft/gpl.txt + +=cut + + 1; + diff -r 000000000000 -r 68a3648c7d91 pfamScan/Bio/Pfam/Scan/Seq.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/Bio/Pfam/Scan/Seq.pm Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,57 @@ +package Bio::Pfam::Scan::Seq; + +use strict; +use warnings; + +use Bio::LocatableSeq; +use Bio::Seq::RichSeq; + +use base qw(Bio::LocatableSeq Bio::Seq::RichSeq); + +sub new { + my($class, %params ) = @_; + my( $id, $start, $end, $seq) = + ( + ($params{'-ID'} || $params{'-id'}), + ($params{'-START'} || $params{'-start'}), + ($params{'-END'} || $params{'-end'}), + ($params{'-SEQ'} || $params{'-seq'}), + ); + + my $self = $class->SUPER::new( %params ); # this is Bio::Pfam::Root + # so we have to set Bio::LocatableSeq fields ourself + + + + + $self->id( $id ); + $self->start( $start ); + $self->end( $end ); + $self->seq( $seq ); + + + return $self; # success - we hope! +} + +=head1 COPYRIGHT + +Copyright (c) 2007: Genome Research Ltd. + +Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) + +This is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . + +=cut + +1 diff -r 000000000000 -r 68a3648c7d91 pfamScan/htt.fas --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/htt.fas Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,42 @@ +>gi|296434520|sp|P42858.2|HD_HUMAN RecName: Full=Huntingtin; AltName: Full=Huntington disease protein; Short=HD protein +MATLEKLMKAFESLKSFQQQQQQQQQQQQQQQQQQQQQPPPPPPPPPPPQLPQPPPQAQPLLPQPQPPPP +PPPPPPGPAVAEEPLHRPKKELSATKKDRVNHCLTICENIVAQSVRNSPEFQKLLGIAMELFLLCSDDAE +SDVRMVADECLNKVIKALMDSNLPRLQLELYKEIKKNGAPRSLRAALWRFAELAHLVRPQKCRPYLVNLL +PCLTRTSKRPEESVQETLAAAVPKIMASFGNFANDNEIKVLLKAFIANLKSSSPTIRRTAAGSAVSICQH +SRRTQYFYSWLLNVLLGLLVPVEDEHSTLLILGVLLTLRYLVPLLQQQVKDTSLKGSFGVTRKEMEVSPS +AEQLVQVYELTLHHTQHQDHNVVTGALELLQQLFRTPPPELLQTLTAVGGIGQLTAAKEESGGRSRSGSI +VELIAGGGSSCSPVLSRKQKGKVLLGEEEALEDDSESRSDVSSSALTASVKDEISGELAASSGVSTPGSA +GHDIITEQPRSQHTLQADSVDLASCDLTSSATDGDEEDILSHSSSQVSAVPSDPAMDLNDGTQASSPISD +SSQTTTEGPDSAVTPSDSSEIVLDGTDNQYLGLQIGQPQDEDEEATGILPDEASEAFRNSSMALQQAHLL +KNMSHCRQPSDSSVDKFVLRDEATEPGDQENKPCRIKGDIGQSTDDDSAPLVHCVRLLSASFLLTGGKNV +LVPDRDVRVSVKALALSCVGAAVALHPESFFSKLYKVPLDTTEYPEEQYVSDILNYIDHGDPQVRGATAI +LCGTLICSILSRSRFHVGDWMGTIRTLTGNTFSLADCIPLLRKTLKDESSVTCKLACTAVRNCVMSLCSS +SYSELGLQLIIDVLTLRNSSYWLVRTELLETLAEIDFRLVSFLEAKAENLHRGAHHYTGLLKLQERVLNN +VVIHLLGDEDPRVRHVAAASLIRLVPKLFYKCDQGQADPVVAVARDQSSVYLKLLMHETQPPSHFSVSTI +TRIYRGYNLLPSITDVTMENNLSRVIAAVSHELITSTTRALTFGCCEALCLLSTAFPVCIWSLGWHCGVP +PLSASDESRKSCTVGMATMILTLLSSAWFPLDLSAHQDALILAGNLLAASAPKSLRSSWASEEEANPAAT +KQEEVWPALGDRALVPMVEQLFSHLLKVINICAHVLDDVAPGPAIKAALPSLTNPPSLSPIRRKGKEKEP +GEQASVPLSPKKGSEASAASRQSDTSGPVTTSKSSSLGSFYHLPSYLKLHDVLKATHANYKVTLDLQNST +EKFGGFLRSALDVLSQILELATLQDIGKCVEEILGYLKSCFSREPMMATVCVQQLLKTLFGTNLASQFDG +LSSNPSKSQGRAQRLGSSSVRPGLYHYCFMAPYTHFTQALADASLRNMVQAEQENDTSGWFDVLQKVSTQ +LKTNLTSVTKNRADKNAIHNHIRLFEPLVIKALKQYTTTTCVQLQKQVLDLLAQLVQLRVNYCLLDSDQV +FIGFVLKQFEYIEVGQFRESEAIIPNIFFFLVLLSYERYHSKQIIGIPKIIQLCDGIMASGRKAVTHAIP +ALQPIVHDLFVLRGTNKADAGKELETQKEVVVSMLLRLIQYHQVLEMFILVLQQCHKENEDKWKRLSRQI +ADIILPMLAKQQMHIDSHEALGVLNTLFEILAPSSLRPVDMLLRSMFVTPNTMASVSTVQLWISGILAIL +RVLISQSTEDIVLSRIQELSFSPYLISCTVINRLRDGDSTSTLEEHSEGKQIKNLPEETFSRFLLQLVGI +LLEDIVTKQLKVEMSEQQHTFYCQELGTLLMCLIHIFKSGMFRRITAAATRLFRSDGCGGSFYTLDSLNL +RARSMITTHPALVLLWCQILLLVNHTDYRWWAEVQQTPKRHSLSSTKLLSPQMSGEEEDSDLAAKLGMCN +REIVRRGALILFCDYVCQNLHDSEHLTWLIVNHIQDLISLSHEPPVQDFISAVHRNSAASGLFIQAIQSR +CENLSTPTMLKKTLQCLEGIHLSQSGAVLTLYVDRLLCTPFRVLARMVDILACRRVEMLLAANLQSSMAQ +LPMEELNRIQEYLQSSGLAQRHQRLYSLLDRFRLSTMQDSLSPSPPVSSHPLDGDGHVSLETVSPDKDWY +VHLVKSQCWTRSDSALLEGAELVNRIPAEDMNAFMMNSEFNLSLLAPCLSLGMSEISGGQKSALFEAARE +VTLARVSGTVQQLPAVHHVFQPELPAEPAAYWSKLNDLFGDAALYQSLPTLARALAQYLVVVSKLPSHLH +LPPEKEKDIVKFVVATLEALSWHLIHEQIPLSLDLQAGLDCCCLALQLPGLWSVVSSTEFVTHACSLIYC +VHFILEAVAVQPGEQLLSPERRTNTPKAISEEEEEVDPNTQNPKYITAACEMVAEMVESLQSVLALGHKR +NSGVPAFLTPLLRNIIISLARLPLVNSYTRVPPLVWKLGWSPKPGGDFGTAFPEIPVEFLQEKEVFKEFI +YRINTLGWTSRTQFEETWATLLGVLVTQPLVMEQEESPPEEDTERTQINVLAVQAITSLVLSAMTVPVAG +NPAVSCLEQQPRNKPLKALDTRFGRKLSIIRGIVEQEIQAMVSKRENIATHHLYQAWDPVPSLSPATTGA +LISHEKLLLQINPERELGSMSYKLGQVSIHSVWLGNSITPLREEEWDEEEEEEADAPAPSSPPTSPVNSR +KHRAGVDIHSCSQFLLELYSRWILPSSSARRTPAILISEVVRSLLVVSDLFTERNQFELMYVTLTELRRV +HPSEDEILAQYLVPATCKAAAVLGMDKAVAEPVSRLLESTLRSSHLPSRVGALHGVLYVLECDLLDDTAK +QLIPVISDYLLSNLKGIAHCVNIHSQQHVLVMCATAFYLIENYPLDVGPEFSASIIQMCGVML diff -r 000000000000 -r 68a3648c7d91 pfamScan/pfamScan.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/pfamScan.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,32 @@ + + Annotate PFAM domains. + /home/inmare/galaxy/tools/pfamScan/pfam_scan.pl -fasta $input -cpu 2 -dir /home/inmare/galaxy/tools/pfamScan/hmm > $output + + + + + + + + + + + + + + + + + Predicting active site residue annotations in the Pfam database. + (2007) BMC bioinformatics 8 :298 + PMID: 17688688 + + The EMBL-EBI bioinformatics web and programmatic tools framework. + (2015 July 01) Nucleic acids research 43 (W1) :W580-4 + PMID: 25845596 + + + + The pfam_scan.pl script is used to annotate PFAM domains on the input file sequences. Only PFAM A class domains are predicted. See http://www.ebi.ac.uk/Tools/pfa/pfamscan/help/ for instructions on how to obtain pfamSCAN and for a brief description of the output format. + + diff -r 000000000000 -r 68a3648c7d91 pfamScan/pfam_scan.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfamScan/pfam_scan.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,338 @@ +#!/usr/bin/env perl + +# $Id: pfam_scan.pl 9045 2015-05-26 09:09:52Z rdf $ + +use strict; +use warnings; + +BEGIN {push @INC,"/home/inmare/galaxy/tools/pfamScan"} +use Bio::Pfam::Scan::PfamScan; +use Getopt::Long; + +my $VERSION = "1.5"; + +#------------------------------------------------------------------------------- + +# get the user options +my ( $outfile, $e_seq, $e_dom, $b_seq, $b_dom, $dir, + $clan_overlap, $fasta, $align, $help, $as, $pfamB, + $json, $only_pfamB, $cpu, $translate ); +GetOptions( 'help' => \$help, + 'outfile=s' => \$outfile, + 'e_seq=f' => \$e_seq, + 'e_dom=f' => \$e_dom, + 'b_seq=f' => \$b_seq, + 'b_dom=f' => \$b_dom, + 'dir=s' => \$dir, + 'clan_overlap' => \$clan_overlap, + 'fasta=s' => \$fasta, + 'align' => \$align, + 'h' => \$help, + 'as' => \$as, + 'pfamB' => \$pfamB, + 'only_pfamB' => \$only_pfamB, + 'json:s' => \$json, + 'cpu=i' => \$cpu, + 'translate:s' => \$translate +); + +help() if $help; +help() unless ( $dir and $fasta ); # required options + +my $pfamA; +if ( $only_pfamB or $pfamB ) { + die qq(FATAL: As of release 28.0, Pfam no longer produces Pfam-B. The -pfamB and -only_pfamB options are now obsolete.\n); + $pfamB=1; +} +else { + $pfamA=1; +} + +my @hmmlib; +push @hmmlib, 'Pfam-A.hmm' if $pfamA; +push @hmmlib, 'Pfam-B.hmm' if $pfamB; + +#------------------------------------------------------------------------------- + +# check the input parameters + +die qq(FATAL: must specify both "-dir" and "-fasta") + unless ( defined $dir and defined $fasta ); + +die qq(FATAL: can't find directory "$dir") + unless -d $dir; + +die qq(FATAL: can't find file "$fasta") + unless -s $fasta; + +foreach my $hmmlib ( @hmmlib ) { + die qq(FATAL: can't find "$hmmlib" and/or "$hmmlib" binaries and/or "$hmmlib.dat" file in "$dir") + unless ( -s "$dir/$hmmlib" and + -s "$dir/$hmmlib.h3f" and + -s "$dir/$hmmlib.h3i" and + -s "$dir/$hmmlib.h3m" and + -s "$dir/$hmmlib.h3p" and + -s "$dir/$hmmlib.dat" ); +} + +die qq(FATAL: can't use E-value or bit score threshold with Pfam-B searches; Pfam-B searches use a default cut_off of 0.001) + if ( ( $e_seq or $e_dom or $b_seq or $b_dom ) and not $pfamA ); + +die qq(FATAL: can't use E-value and bit score threshold together) + if ( ( $e_seq and ( $b_seq or $b_dom ) ) or + ( $b_seq and ( $e_seq or $e_dom ) ) or + ( $b_dom and $e_dom ) ); + +die qq(FATAL: output file "$outfile" already exists) + if ( $outfile and -s $outfile ); + +if ( $as ) { + die qq(FATAL: "-as" option only works on Pfam-A families) + unless $pfamA; + + die qq(FATAL: can't find "active_site.dat" in "$dir") + unless -s "$dir/active_site.dat"; +} + +if ( defined $translate ) { + if ( $translate eq "" ) { + # no argument to "-translate" was given, so make "orf" the default + $translate = 'orf'; + } + else { + # there was an argument to "-translate", so make sure it's valid + unless ( $translate eq "all" or $translate eq "orf" ) { + die qq(FATAL: "-translate" option accepts only "all" and "orf"); + } + } +} + +#------------------------------------------------------------------------------- + +# build the object +my $ps = Bio::Pfam::Scan::PfamScan->new( + -e_seq => $e_seq, + -e_dom => $e_dom, + -b_seq => $b_seq, + -b_dom => $b_dom, + -dir => $dir, + -clan_overlap => $clan_overlap, + -fasta => $fasta, + -align => $align, + -as => $as, + -hmmlib => \@hmmlib, + -version => $VERSION, + -cpu => $cpu, + -translate => $translate +); + +# run the search +$ps->search; + +# print the results +if ( defined $json ) { + + my $json_object; + eval { + require JSON; + $json_object = new JSON; + }; + if ( $@ ) { + die qq(FATAL: can't load JSON module; can't write JSON-format output); + } + + if ( $json eq 'pretty' ) { + $json_object->pretty( 1 ) ; + } + print $json_object->encode( $ps->results ); + +} +else { + $ps->write_results( $outfile, $e_seq, $e_dom, $b_seq, $b_dom ); +} + +exit; + +#------------------------------------------------------------------------------- + +sub help { + print STDERR < -dir + +Additonal options: + + -h : show this help + -outfile : output file, otherwise send to STDOUT + -clan_overlap : show overlapping hits within clan member families (applies to Pfam-A families only) + -align : show the HMM-sequence alignment for each match + -e_seq : specify hmmscan evalue sequence cutoff for Pfam-A searches (default Pfam defined) + -e_dom : specify hmmscan evalue domain cutoff for Pfam-A searches (default Pfam defined) + -b_seq : specify hmmscan bit score sequence cutoff for Pfam-A searches (default Pfam defined) + -b_dom : specify hmmscan bit score domain cutoff for Pfam-A searches (default Pfam defined) + -as : predict active site residues for Pfam-A matches + -json [pretty] : write results in JSON format. If the optional value "pretty" is given, + the JSON output will be formatted using the "pretty" option in the JSON + module + -cpu : number of parallel CPU workers to use for multithreads (default all) + -translate [mode] : treat sequence as DNA and perform six-frame translation before searching. If the + optional value "mode" is given it must be either "all", to translate everything + and produce no individual ORFs, or "orf", to report only ORFs with length greater + than 20. If "-translate" is used without a "mode" value, the default is to + report ORFs (default no translation) + + For more help, check the perldoc: + + shell\% perldoc pfam_scan.pl + +EOF + exit; + +} + +#------------------------------------------------------------------------------- + +=head1 NAME + +pfam_scan.pl -- Search protein sequences against the Pfam HMM library + +=head1 SYNOPSIS + +pfam_scan.pl [options] -fasta -dir + +=head1 OPTIONS + +=over + +=item B<-dir> I + +Directory containing Pfam data files [required] + +=item B<-fasta> I + +Filename of input file containing sequence(s) [required] + +=item B<-outfile> I + +Write output to C [default: STDOUT] + +=item B<-e_seq> + +Sequence E-value cut-off [default: use Pfam GA cutoff] + +=item B<-e_dom> + +Domain E-value cut-off [default: use Pfam GA cutoff] + +=item B<-b_seq> + +Sequence bits score cut-off [default: use Pfam GA cutoff] + +=item B<-b_dom> + +Domain bits score cut-off [default: use Pfam GA cutoff] + +=item B<-clan_overlap> + +Allow sequences in different clans to overlap [default: false] + +=item B<-align> + +Show alignment snippets in results [default: false] + +=item B<-as> + +Search for active sites on Pfam-A matches [default: false] + +=item B<-json> [I] + +Write the results in JSON format [default: false] + +=item B<-cpu> + +Number of parallel CPU workers to use for multithreads [default: all] + +=item B<-translate> [I] + +Treat the input sequence as DNA and perform a six-frame translation before +searching, using the "translate" program from the HMMER v2.3.2 package. If the +optional value I is given, it must be either "all" or "orf": "all" means +translate in full, with stops, and produce no individual ORFs; "orf" means +translate and report only ORFs of length greater than 20. If B is +used but I is omitted, the default is to translate using the "orf" +method [default: off (no translation)] + +=item B<-h> + +Display help message + +=back + +The input must be a FASTA-format file. The C<-fasta> and C<-dir> options are +mandatory. You cannot specify both an E-value and bits score threshold. + +=head1 OVERVIEW + +C is a script for searching one or more protein sequences against the +library of HMMs from Pfam. It requires a local copy of the Pfam data files, which +can be obtained from the Pfam FTP area: + + ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/ + +You must also have the HMMER3 binaries installed and their locations given by your +C environment variable. You can download the HMMER3 package at: + + ftp://selab.janelia.org/pub/software/hmmer3/ + +=head1 OUTPUT + +The output format is: + +Example output (-as option): + + O65039.1 38 93 38 93 PF08246 Inhibitor_I29 Domain 1 58 58 45.9 2.8e-12 1 No_clan + O65039.1 126 342 126 342 PF00112 Peptidase_C1 Domain 1 216 216 296.0 1.1e-88 1 CL0125 predicted_active_site[150,285,307] + +Most of these values are derived from the output of I (see HMMER3 +documentation for details). The significance value is 1 if the bit score for a +hit is greater than or equal to the curated gathering threshold for the +matching family, 0 otherwise. + +=head1 REFERENCES + +Active site residues are predicted using the method described in the publication: + +Mistry J., Bateman A., Finn R.D. "Predicting active site residue annotations in +the Pfam database." BMC Bioinformatics. 2007;8:298. PMID:17688688. + +=head1 AUTHORS + +Jaina Mistry (jaina@ebi.ac.uk), Rob Finn (rdf@ebi.ac.uk) + +=cut + +=head1 COPYRIGHT + +Copyright (c) 2009: Genome Research Ltd. + +Authors: Jaina Mistry (jaina@ebi.ac.uk), rdf (rdf@ebi.ac.uk) + +This is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +or see the on-line version at http://www.gnu.org/copyleft/gpl.txt + +=cut + diff -r 000000000000 -r 68a3648c7d91 pfam_annot/annota.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/annota.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,221 @@ +#!/usr/bin/perl -w + +use strict; +my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt"; +open(IN,$d_file); +my %decode=(); +my %clan_decode; +my $id=""; +my %c=(); + + +my $prot_file=shift; +my $pfam_file=shift; +my $prefix=shift; + +while() +{ + if ($_=~/^\d/) + { + my @vl=(split(/\t+/)); + $decode{$vl[1]}="$vl[3]
";#$vl[8] $vl[9]"; + my $cc=0; + my %repeated=(); + foreach my $v (@vl) + { + $v=~s/\[\d+\]/ /g; + last if $v=~/hmmbuild/; + last if $cc>10; + next if $v=~/anon/; + next if $v=~/Bates/; + next if $v=~/Cogis/; + next if $v=~/Coggis/; + next if $v=~/Bateman/; + next if $v=~/Sonnhammer/; + next if $v=~/Finn/; + next if $v=~/Studholme/; + next if $v=~/Kerrison/; + next if $repeated{$v}; + next if $v eq $vl[3]; + next unless length($v)>=20 && $cc<=9; + $decode{$vl[1]}.="$v "; + $repeated{$v}++; + $cc++; + } + #print "$vl[1] $decode{$vl[1]}\n"; + } +} +close(IN); + +my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt"; +open(IN,$clan_file); +while() +{ + my @vl=(split(/\t/)); + #$clan_decode{$vl[1]}="$vl[3]"; + my $cc=0; + foreach my $v (@vl) + { + $cc++; + $v=~s/\[\d+\]/ /g; + $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10; + } + +} +my %plasm=(); +open(IN,"$prot_file"); +while() +{ + if ($_=~/^>(.*)/) + { + $id=$1; + $id=(split(/\s+/,$id))[0]; + if ($id=~/#/) + { + my $pid=(split(/\#/,$id))[0]; + $plasm{$pid}++; + } + }else{ + chomp; + $c{$id}.=$_; + } +} +close(IN); + +open(OUT,">$prefix"); +print OUT "\n\n"; +print OUT "\n"; +print OUT "\n"; +#print OUT "\n"; +#print OUT "\n"; +print OUT "\n"; +my $color="\"#czb9dz\""; +my %printed; +open(IN,$pfam_file); +print OUT "Proteins with PFAM domains:\n

\n"; +my @dd=keys %plasm; +if ($#dd>0) +{ + print OUT "

Show results "; + for (my $i=0;$i<=$#dd;$i++) + { + if ($i==$#dd) + { + print OUT "$dd[$i].

\n"; + }else{ + print OUT "$dd[$i],\n"; + } + } +}else{ + print OUT "
\n\n"; #div per ogni plasmide| +} +my $ntokens=0; +my $prev_plasmid=""; +my $curr_plasmid=""; +my $np=0; +while() +{ + next if $_=~/^\#/; + my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1]; + next unless $name; + if ($name=~/#/) + { + $curr_plasmid=(split(/\#/,$name))[0]; + if ($curr_plasmid ne $prev_plasmid) + { + if ($np>0) + { + print OUT "
\n"; + print OUT "
\n"; + } + print OUT "
\n"; + print OUT "\n"; + $np++; + } + $prev_plasmid=$curr_plasmid; + } + + $domain=~s/\.\d+//; + unless ($printed{$name}) + { + my $seq=$c{$name}; + + $seq=~s/\*//g; + $seq=form($seq,90); + print OUT "\n\n"; + print OUT "\n\n\n\n"; + $ntokens=2; + } + my $hd=uc $domain; + #www.canoro.altervista.org + if ($decode{$domain}) + { + my $ddes=$decode{$domain}; + $ddes=~s/\s+/ /g; + if ($ntokens % 2==0) + { + print OUT "\n\n\n\n"; + $ntokens++; + } + if ($clan_decode{$clan}) + { + my $clanD=$clan_decode{$clan}; + $clanD=~s/\s+/ /g; + next if $decode{$domain} eq $clan_decode{$clan}; + my $ddes=$decode{$domain}; + if ($ntokens % 2==0) + { + print OUT "\n\n\n\n"; + $ntokens++; + } + $printed{$name}=1; +} + +#if ($curr_plasmid ne "") +#{ + print OUT "
\n"; + print OUT "
\n"; + print OUT "
$name
\n
\n"; + print OUT "
\n"; + print OUT "
 \n$seq\n 
\n"; + print OUT "
\n"; + }else{ + print OUT "\n"; + } + + print OUT "

\n"; + print OUT " $domain\n

$ddes

\n\n"; + print OUT "

\n
\n"; + }else{ + print OUT "\n"; + } + + print OUT "

\n"; + print OUT " $clan\n

$clanD

\n\n"; + print OUT "

\n
\n"; + print OUT "
\n"; + print OUT "\n"; +#} +#print OUT "

Proteins without PFAM domains:\n
\n"; +#foreach my $seq (keys %c) +#{ +# next if $printed{$seq}; +# print OUT "<>$seq\n\n

\n$c{$seq}
\n"; +# print OUT "
\n"; +#} +#print OUT "\n\n\n\n"; +#close(OUT); + +sub form +{ + my $string=$_[0]; + my $len=$_[1]; + my $outS=""; + for (my $i=0;$i<=length($string);$i+=$len) + { + $outS.=substr($string,$i,$len)."\n"; + } + #print "A:$outS"; + #$outS=~s/\s+//g; + return $outS; +} diff -r 000000000000 -r 68a3648c7d91 pfam_annot/clans.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/clans.txt Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,515 @@ +1 CL0001 EGF \N EGF superfamily Finn RD, Bateman A anon Members of this clan all belong to the EGF superfamily. This particular superfamily is characterised as having least 6 cysteines residues.\ \ \ \ These cysteine form disulphide bonds, in the order 1-3, 2-4, 5-6, which are essential for the stability of the EGF fold. These disulphide bonds are stacked in a ladder-like arrangement. The Laminin EGF family is distinguished by having an an additional disulphide bond. The function of the domains within this family remains unclear, but they are though to largely perform a structural role. More often than not, there domains are arranged a tandem repeats in extracellular proteins. 2008-09-03 15:50:29 2004-03-17 16:02:08 26 325 6259 696 88541 1 +3 CL0003 SAM \N Sterile Alpha Motif (SAM) domain Finn RD anon SAM domains are found in a diverse set of proteins, which include scaffolding proteins, transcription regulators, translational regulators tyrosine kinases and serine/threonine kinases [1-3]. SAM domains are found in all eukaryotes and some bacteria [3] . Structures of SAM domains reveal a common five helical structure. The SAM domain is involved in a variety of functions. The most widespread function is in domain-domain interactions. The SAM domain performs domain-domain interactions using multifarious arrangements of the SAM domain. More recently, the SAM domain within the Smaug protein has been demonstrated to bind to the Nanos 3' UTR translation control element (Rfam:RF00161) [3]. This clan currently only represents the diverse SAM domain family and does not contain the more divergent SAM/Pointed family (Pfam:PF02198). 2008-09-03 15:50:29 2004-03-17 16:21:50 20 126 742 467 11010 1 +4 CL0004 Concanavalin \N Concanavalin-like lectin/glucanase superfamily Bateman A anon This superfamily includes a diverse range of carbohydrate binding domains and glycosyl hydrolase enzymes that share a common structure. 2008-09-03 15:50:29 2004-03-17 16:44:11 19 1631 2750 3131 34755 1 +5 CL0005 Kazal \N Kazal like domain Finn RD anon Kazal domains are found in both serine protease inhibitors and extracellular regions of agrins. The structure of the Kazal domain is a small alpha/beta fold. Typically the Kazal domain consists of 2 short-helices and a 3-stranded anti-parallel sheet. The fold is contains several disulphide bonds. 2008-09-03 15:50:29 2004-03-17 17:00:11 26 106 337 450 6552 1 +6 CL0006 C1 \N Protein kinase C, C1 domain Finn RD anon The members of this clan are all variations of the protein kinase C1 domain that is characterised by a rich cysteine and histidine content. The C1 domain is the N-terminal region of conservation found in protein kinase C domains. This domain is involved in binding many ligands, which include diacylglycerol, phorbol esters and zinc [1]. 2008-09-03 15:50:29 2004-03-17 17:47:56 19 30 728 396 10495 1 +7 CL0007 KH \N K-Homology (KH) domain Superfamily Finn RD anon The KH domain is thought to be the second most prevalent RNA binding motif in proteins. The motif is characterised by a conserved GXXXGXXG in the middle of the domain. Structures of KH reveal that the KH domain is arranged as either a beta-alpha-alpha-beta-beta (mini-KH domain) or beta-alpha-alpha-beta-beta-alpha (maxi-KH domain). The secondary elements are separated by at least four loop segments. The second loop is located between beta-1 and al The KH domain can be found either as single or multiple copies. The KH domain usually binds RNA as a multimer. 2008-09-03 15:50:29 2004-03-17 17:58:30 17 312 491 5344 38636 1 +9 CL0009 ENTH_VHS \N ENTH/ANTH/VHS superfamily Bateman A, McMahon H anon This clan includes the related ENTH and ANTH domains as well as the VHS domain. The ENTH domain is approximately 150 residues in length and is a solenoid of alpha-helices. The various ENTH domains have various lipid specificities but the key feature that distinguishes it functionally from ANTH domains is its ability to bend membranes. It does this by folding an additional N-terminal helix on lipid binding. The ANTH domain is approximately 300 residues in length and is a PtdIns(4,5)P2 binding domain. It has no membrane bending properties. The VHS (Vps-27, Hrs and STAM) domain is a 140 residue long domain present in the very NH2-terminus of at least 60 proteins. Based on their functional characteristics and on recent data on the involvement of VHS in cargo recognition in trans-Golgi, VHS domains are considered to have a general membrane targeting/cargo recognition role in vesicular trafficking [5]. 2008-09-03 15:50:29 2004-03-18 10:53:33 19 75 85 345 4028 1 +10 CL0010 SH3 \N Src homology-3 domain Finn RD anon Src homology-3 (SH3) domains are comprised of about 60 amino acids, performing either an assembly or regulatory role.\ For example, SH3 domains in the Grb2 adaptor protein are essential for protein-protein interactions and\ \ signal transduction in the p21 Ras-dependent growth factor signaling pathway. Alternatively, SH3 performs a regulatory role in the Src family of tyrosine kinases. SH3 domains bind a variety of peptide ligands, many of which contain a PxxP motif. This PxxP motif is flanked by different specificity elements [1]. Structures of SH3 domains, both free and ligand complexed, have provided insights into the mechanism of ligand recognition. The SH3 fold consists of two anti-parallel beta sheets that lie at right angles to each other. Within the fold, there are two variable loops, referred to as RT and n-Src loops. When SH3 binds to its ligand, the proline rich ligand adopts a PPII helix conformation, with the PPII helix structure recognised by a pair of grooves on the surface of the SH3 domain that bind turns of the helix. The SH3 grooves are formed by a series of nearly parallel, well-conserved aromatic residues [1]. 2008-09-03 15:50:29 2004-03-18 11:12:55 20 564 2030 4044 40209 1 +11 CL0011 Ig \N Immunoglobulin superfamily Bateman A, Finn RD anon Members of the immunoglobulin superfamily are found in hundreds of proteins of different functions. Examples include antibodies, the giant muscle kinase titin and receptor tyrosine kinases. Immunoglobulin-like domains may be involved in protein-protein and protein-ligand interactions. The superfamily can be divided into discrete structural sets, by the presence or absence of beta-strands in the structure and the length of the domains [1]. Proteins containing domains of the C1 and V-sets are mostly molecules of the vertebrate immune system. Proteins of the C2-set are mainly lymphocyte antigens, this differs from the composition of the C2-set as originally proposed [1]. The I-set is intermediate in structure between the C1 and V-sets and is found widely in cell surface proteins as well as intracellular muscle proteins. 2008-09-03 15:50:29 2004-03-18 16:23:40 25 3370 4292 3474 136715 1 +12 CL0012 Histone \N Histone superfamily Bateman A anon Members of this clan all possess a histone fold. Generally proteins in this clan are DNA binding. 2008-09-03 15:50:29 2004-04-19 14:28:04 17 742 154 9904 23976 1 +13 CL0013 Beta-lactamase \N Serine beta-lactamase-like superfamily Finn RD, Bateman A anon This superfamily contains proteins that have a beta-lactamase fold. This includes beta-lactamases as well as Dala-Dala carboxypeptidases and glutaminases. 2008-09-03 15:50:29 2004-04-19 15:42:28 17 961 303 5474 58501 1 +14 CL0014 Glutaminase_I \N Class-I Glutamine amidotransferase superfamily Bateman A anon Most members of this clan are glutaminase enzymes. This superfamily is shown to be related in [1]. The clan also contains the DJ-1/PfpI family that includes the peptidase PfpI that has a catalytic Cys-His-Glu triad that differs from the class I GAT Cys-His-Glu triad. 2008-09-03 15:50:29 2004-04-28 09:27:01 21 334 418 7216 67224 1 +15 CL0015 MFS \N Major Facilitator Superfamily Bateman A anon The major facilitator superfamily (MFS) is one of the two largest families of membrane transporters found on Earth [1]. It is present ubiquitously in bacteria, archaea, and eukarya and includes members that can function by solute uniport, solute/cation symport, solute/cation antiport and/or solute/solute antiport with inwardly and/or outwardly directed polarity [1]. All permeases of the MFS possess either 12 or 14 transmembrane helices [1]. 2008-09-03 15:50:29 2004-04-30 16:48:27 19 22 846 6356 249360 1 +16 CL0016 PKinase \N Protein kinase superfamily Studholme DJ anon This superfamily includes the Serine/Threonine- and Tyrosine- protein kinases as well as related kinases that act on non-protein substrates. 2008-09-03 15:50:29 2004-06-11 14:28:37 21 3279 6514 9586 173964 1 +18 CL0018 bZIP \N bZIP-like leucine zipper Studholme DJ anon This family of eukaryotic transcription factors contain a basic region adjacent to a leucine zipper. 2008-09-03 15:50:29 2004-06-16 18:30:26 14 321 111 611 8901 1 +20 CL0020 TPR Tetratrico peptide repeat superfamily Studholme DJ anon Tetratricopeptide-like repeats are found in a numerous and diverse proteins involved in such functions as cell cycle regulation, transcriptional control, mitochondrial and peroxisomal protein transport, neurogenesis and protein folding. 2008-09-03 15:50:29 2004-06-21 18:12:39 24 947 20914 6771 404043 1 +21 CL0021 OB \N OB fold Studholme DJ, Bateman A anon The OB (oligonucleotide/oligosaccharide binding) was defined by Murzin [1]. The common part of the OB-fold, has a five-stranded beta-sheet coiled to form a closed beta-barrel. This barrel is capped by an alpha-helix located between the third and fourth strands [1]. 2008-09-03 15:50:29 2004-06-22 18:31:10 17 1592 988 7656 210543 1 +22 CL0022 LRR Leucine Rich Repeat Studholme DJ anon Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains. This Pfam entry contains Leucine Rich Repeats not recognised by the Pfam:PF00560 model. 2008-09-03 15:50:29 2004-06-23 16:13:12 31 250 9511 3145 175606 1 +23 CL0023 P-loop_NTPase AAA; P-loop containing nucleoside triphosphate hydrolase superfamily Studholme DJ anon AAA family proteins often perform chaperone-like functions that assist in the assembly, operation, or disassembly of protein complexes [2]. 2008-09-03 15:50:29 2004-06-23 17:05:20 33 5523 12211 50680 1511292 1 +25 CL0025 His_Kinase_A His Kinase A (phospho-acceptor) domain Studholme DJ anon This is the dimerisation and phospho-acceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536. It is usually found adjacent to a C-terminal ATPase domain (Pfam:PF02518). This domain is found in a wide range of Bacteria and also several Archaea. It comprises one of the fundamental units of the two-component signal transduction system [2-7]. 2008-09-03 15:50:29 2004-06-29 14:19:46 13 497 5781 7648 242300 1 +26 CL0026 CU_oxidase \N Multicopper oxidase-like domain Studholme DJ, Finn RD anon Many of the proteins in this family contain multiple similar copies of this plastocyanin-like domain. 2008-09-03 15:50:29 2004-06-29 16:37:59 19 1015 245 19953 63536 1 +27 CL0027 RdRP \N RNA dependent RNA polymerase Bateman A anon This clan represents the replicative RNA dependent RNA polymerase. from a variety of RNA viruses [1]. 2008-09-03 15:50:29 2004-08-26 14:33:23 14 852 1801 12549 220781 1 +28 CL0028 AB_hydrolase Alpha/Beta hydrolase fold Bateman A anon This catalytic domain is found in a very wide range of enzymes. 2008-09-03 15:50:29 2004-08-29 17:32:06 21 1989 2396 7428 180167 1 +29 CL0029 Cupin Cupin fold Bateman A anon This clan represents the conserved barrel domain of the 'cupin' superfamily [1] ('cupa' is the Latin term for a small barrel). The cupin fold is found in a wide variety of enzymes, but notably contains the non-enzymatic seed storage proteins also. 2008-09-03 15:50:29 2004-09-06 15:03:53 19 945 1162 6529 112082 1 +30 CL0030 Ion_channel \N Ion channel (VIC) superfamily Bateman A anon This superfamily contains a diverse range of ion channels that share a pair of transmembrane helices in common. This clan is classified as the VIC (Voltage-gated Ion Channel) superfamily in TCDB. 2008-09-03 15:50:29 2004-09-08 16:21:26 15 718 809 5250 44250 1 +31 CL0031 Phosphatase \N Phosphatase superfamily Bateman A anon This family includes tyrosine and dual specificity phosphatase enzymes. 2008-09-03 15:50:29 2004-10-26 13:53:48 12 480 558 2966 20482 1 +32 CL0032 Dim_A_B_barrel \N Dimeric alpha/beta barrel superfamily Bateman A anon This superfamily of proteins possess a Ferredoxin-like fold. Pairs of these assemble into a beta barrel. The function of this barrel is quite varied and includes Muconolactone isomerase as well as monooxygenases. 2008-09-03 15:50:29 2004-10-26 16:31:17 14 510 186 4601 39476 1 +33 CL0033 POZ \N POZ domain superfamily Bateman A anon The POZ domain is found in a variety of transcription factors. POZ domains are also found in the tetramerisation domain of voltage gated K+ channels. In general these domains mediate homo-oligomerisation. 2008-09-03 15:50:29 2004-10-27 13:52:11 13 223 1167 1178 26677 1 +34 CL0034 Amidohydrolase \N Amidohydrolase superfamily Bateman A anon This family includes a large family of metal dependent amidohydrolase enzymes [1]. 2008-09-03 15:50:29 2004-10-27 17:19:50 14 704 479 5687 79783 1 +35 CL0035 Peptidase_MH \N Peptidase clan MH/MC/MF Bateman A anon This clan contains peptidases belonging to MEROPS clan MH, MC and MF. We also include Nicastrin that is part of the gamma secretase complex and not known to be a peptidase. 2008-09-03 15:50:29 2004-10-28 13:48:22 15 680 539 5405 63766 1 +36 CL0036 TIM_barrel Common phosphate binding-site TIM barrel superfamily Bateman A anon This large superfamily of TIM barrel enzymes all contain a common phosphate binding site. The phosphate is found in a variety of cofactors and ligands such as FMN [1,2]. 2008-09-03 15:50:29 2004-10-28 15:12:01 23 3964 973 10099 253453 1 +37 CL0037 Lysozyme \N Lysozyme-like superfamily Bateman A anon Barley chitinase, bacterial chitosanase, and lysozymes from phage and animals all hydrolyse related polysaccharides. The proteins little amino-acid similarity, but have a structurally invariant core consisting of two helices and a three-stranded beta-sheet which form the substrate-binding and catalytic cleft [1]. 2008-09-03 15:50:29 2004-10-28 15:30:21 13 1502 527 5612 33680 1 +39 CL0039 HUP PP-loop; PP-ATPase; HUP - HIGH-signature proteins, UspA, and PP-ATPase. Bateman A, Anantharaman V anon The HUP class contains the HIGH-signature proteins, UspA superfamily and the PP-ATPase superfamily [1]. The HIGH superfamily has the HIGH Nucleotidyl transferases and the class I tRNA synthetases both of which have the HIGH and the KMSKS motif [1],[2]. The PP-loop ATPase named after the ATP PyroPhosphatase domain, was initially identified as a conserved amino acid sequence motif in four distinct groups of enzymes that catalyse the hydrolysis of the alpha-beta phosphate bond of ATP, namely GMP synthetases, argininosuccinate synthetases, asparagine synthetases, and ATP sulfurylases [3]. The USPA superfamily contains USPA, ETFP and Photolyases [1] 2008-09-03 15:50:29 2004-10-29 14:36:02 11 1108 838 6582 177746 1 +40 CL0040 tRNA_synt_II \N Class II aminoacyl-tRNA and Biotin synthetases Finn RD anon Aminoacyl-tRNA synthetases are key components of the protein translation machinery that catalyse two basic reactions. First, the activation of amino acids via the formation of aminoacyl adenylates and second, linking the activated amino acid to the cognate tRNAs. The aminoacyl-tRNA synthetases generate AMP as the second end product of this reaction, which differentiates them from the majority of ATP-dependent enzymes that produce ADP. In addition, there is a specific aminoacyl-tRNA synthetases for each of the 20 amino acids and there are two structurally distinct classes of aminoacyl-tRNA synthetases, each\ \ encompassing 10 different specificities. The two classes have alternative modes of aminoacylation: class I aminoacylate the 2'OH of the cognate tRNA; class II aminoacylate 3'OH (with the exception of PheRS). Each class contain a conserved core domain that is involved in ATP binding and hydrolysis and combines with additional domains that determine the specificity of interactions with\ the cognate amino acid and tRNA. The class II core domain consist of a mixed-beta sheet, similar to that found in the biotin synthetases, hence why this family has also been included in this clan. The core domain contains three modestly conserved motifs that are responsible for ATP binding. The class II aminoacyl-tRNA synthetases can contain additional nested domains, found inserted in the loops of the core domain [1] (and reference therein). 2008-09-03 15:50:29 2004-11-08 11:24:57 16 514 340 5927 72316 1 +41 CL0041 Death \N Death Domain Superfamily Finn RD anon The death domain superfamily is composed of three families: the death domain (DD); the death effector domain (DED) and the caspase recruitment domain (CARD). All of the members perform a pivotal role in signalling events that regulate apoptosis. Protein-protein interactions are mediated by self-self associations, in which CARD-CARD, DD-DD and DED-DED contacts are formed exclusively The three families possess remarkably similar structures, each comprising an antiparallel six helical bundle in the Greek Key topology. Structurally, the DD and CARD families are the most dissimilar. The former is comprised of two perpendicular three-helix bundles, whereas the latter CARD domain contains six helices that are almost parallel with each other. Interestingly, the interactions in CARD or DD containing heterodimers are quite different [1]. 2008-09-03 15:50:29 2004-11-11 10:28:31 12 135 742 249 6544 1 +42 CL0042 Flavoprotein \N Flavoprotein Finn RD anon Members of this clan are FMN or FAD-binding redox proteins. Flavoproteins act in various electron-transport systems as functional analogues of ferredoxin. They are characterised by an open twisted alpha/beta structure consisting of five parallel beta-sheets connected by alpha-helices which surround the sheet. 2008-09-03 15:50:29 2004-11-12 15:13:12 11 497 221 5019 35666 1 +43 CL0043 Chelatase \N Chelatase Superfamily Finn RD anon Metallated tetrapyrroles are used as prosthetic groups in proteins involved in biologically important processes such as photosynthesis, oxygen transport, drug metabolism and nitric oxide synthesis. In living organisms, metallation is catalysed by a group of enzymes called chelatases. This clan contains ferrochelatase (heme) and cobalt chelatase [1]. 2008-09-03 15:50:29 2004-11-12 16:49:49 11 77 45 4178 7625 1 +44 CL0044 Ferritin Ferritin-like Superfamily Finn RD anon The members of this clan all share a distinctive four helical bundle. The four helices are arranged antiparallel with a left-handed twist. This helical bundle is distinguished from others by the long connection between the second and third helices. Some of the members contain a Fe or Mn dimer at the centre of the helical bundle. The ferritin fold was first described by Murzin AG and Chothia C, Cur Opin Struc Biol 1992, 2:895-903. 2008-09-03 15:50:29 2004-11-12 17:32:27 13 2058 121 6054 34310 1 +45 CL0045 Rubredoxin \N Rubredoxin-like Finn RD anon The Rubredoxin clan is comprised of three families:Rubredoxin, COX5B and desulforedoxin.Rubredoxin domains are small domains (5-6 kDa) and bind one iron atom tetrahedrally bound by four cysteine residues.Similar, desulforedoxin domains are small (4 kDa), but usually form homodimers. Each monomer binds one iron atom, but in a distorted tetrahedral arrangement. COX5B domains are membrane-anchored rubredoxin-like domains. The domain in the Rubredoxin clan are usually comprised of 2 alpha helixes and 2-3 beta strands. 2008-09-03 15:50:29 2004-11-15 11:37:59 11 172 52 2672 4680 1 +46 CL0046 Thiolase \N Thiolase-like Superfamily Finn RD anon Thiolases are ubiquitous and form a large superfamily. Thiolases can function either degradatively, in the beta-oxidation pathway of fatty acids, or biosynthetically. Biosynthetic thiolases catalyse the formation of acetoacetyl-CoA from two molecules of acetyl-CoA . This is one of the fundamental categories of carbon skeletal assembly patterns in biological systems and is the first step in a wide range of biosynthetic pathways [1]. Thiolase are usually dimeric or tetrameric enzymes. Within each monomer there are two similar domains related by pseudo dyad. The N-terminal of these two domains contains a large insertion of about 100 amino acids. 2008-09-03 15:50:29 2004-11-15 12:47:24 15 575 1810 7516 102610 1 +47 CL0047 CuAO_N2_N3 \N Copper amine oxidase, domains 1 and 2 Finn RD anon Copper amine oxidase (CuAO) are comprised of three of four domains. In the case of the four domain CuAO, the N-terminal domain (termed N1, and is not present in the three domain CuAO) and the C-terminal catalytic domain sandwich two repeated domains (termed N2 and N3). The function of these two homologous domains is uncertain. N2 and N3 both have a cystatin-like fold [1]. 2008-09-03 15:50:29 2004-11-15 13:04:24 11 186 31 504 1887 1 +48 CL0048 LolA_LolB \N Lipoprotein localisation factors LolA/B Finn RD anon Gram-negative bacteria lipoproteins are anchored to the periplasmic surface of the inner or outer membrane depending on the sorting signal, which is the residue at position 2 of the polypeptide. Five Lol proteins are involved in the sorting and membrane localisation of lipoprotein. An ATP-binding cassette (ABC) transporter, LolCDE, releases outer membrane-specific lipoproteins from the inner membrane, causing the formation of a complex between the released lipoproteins and the periplasmic molecular chaperone LolA. When this complex interacts with outer membrane receptor LolB, the lipoproteins are transferred from LolA to LolB and then localised to the outer membrane. The structures of LolA and LolB are remarkably similar to each other. Both have a hydrophobic cavity consisting of an unclosed beta-barrel and an alpha-helical lid [1,2]. 2008-09-03 15:50:29 2004-11-15 13:13:26 11 10 5 2158 3976 1 +49 CL0049 Tudor \N Tudor domain 'Royal family' Finn RD, Bateman A anon This clan covers the Tudor domain 'royal family' [1]. This includes chromo, MBT, PWWP and tudor domains. The chromo domain is a comprised of approximately 50 amino acid residues. There are usually one to three Chromo domains found in a single protein. In some chromo domain containing proteins, a second related chromo domain has been found and is referred to as the Chromo-shadow domain. The structure of the Chromo and Chromo-shadow domains reveal an OB-fold, a fold found in a variety of prokaryotic and eukaryotic nucleic acid binding proteins.\ More specifically,the chromo-domain structure reveals a three beta strands that are packed against an alpha helix. Interestingly, a similar structure is found in the archaeal chromatin proteins (7kDa DNA-binding domain). These are sequence neutral DNA binding proteins.\ The DNA binding in these archaeal proteins is mediated through the triple stranded beta sheet. These archaeal domains are though to represent an ancestral chromo domain. Homologs of the chromo domain have been found in fission yeast, ciliated protozoa and all animal species, but appear to be absent in eubacteria, budding yeast and plants [2]. The precise function of the chromo domain is unclear, but the chromo domain is thought to act as a targeting module for chromosomal proteins, although the chromosomal contexts and functional contexts being targeted vary. In all cases studies, the chromo domains are found in proteins that are involved in transcription regulation, positive and negative [2]. 2008-09-03 15:50:29 2004-11-16 16:02:09 14 325 682 504 17304 1 +50 CL0050 HotDog \N HotDog superfamily Bateman A anon The HotDog fold was first observed in the structure of Escherichia coli beta-hydroxydecanoyl thiol ester dehydratase (FabA), where Leesong et al. noticed that each subunit of this dimeric enzyme contained a mixed alpha + beta 'hot dog' fold. They described the seven-stranded antiparallel beta-sheet as the 'bun', which wraps around a five-turn alpha-helical 'sausage', This superfamily contains a diverse range of enzymes. Membership includes numerous prokaryotic, archaeal and eukaryotic proteins involved in several related, but distinct, catalytic activities, from metabolic roles such as thioester hydrolysis in fatty acid metabolism, to degradation of phenylacetic acid and the environmental pollutant 4-chlorobenzoate. The superfamily also includes FapR, a non-catalytic bacterial homologue that is involved in transcriptional regulation of fatty acid biosynthesis [1]. 2008-09-03 15:50:29 2004-11-19 13:30:09 11 673 1411 5349 52323 1 +51 CL0051 NTF2 NTF2-like superfamily Bateman A anon This superfamily contains a variety of enzymes such as Scytalone dehydratase, Delta-5-3-ketosteroid isomerase, Limonene-1,2-epoxide hydrolase among others. The family also includes presumed non-enzymatic homologues such as NTF2. 2008-09-03 15:50:29 2004-11-19 15:35:00 13 603 332 4350 23892 1 +52 CL0052 NTN \N NTN hydrolase superfamily Bateman A anon In the N-terminal nucleophile aminohydrolases (Ntn hydrolases) the N-terminal residue provides two catalytic groups, nucleophile and proton donor. These enzymes use the side chain of the amino-terminal residue, incorporated in a beta-sheet, as the nucleophile in the catalytic attack at the carbonyl carbon. The nucleophile is cysteine in GAT, serine in penicillin acylase, and threonine in the proteasome. All the enzymes share an unusual fold in which the nucleophile and other catalytic groups occupy equivalent sites. This fold provides both the capacity for nucleophilic attack and the possibility of autocatalytic processing [1]. 2008-09-03 15:50:29 2004-11-19 17:25:18 17 2263 255 5468 47927 1 +53 CL0053 4H_Cytokine 4-helical cytokine superfamily Bateman A anon Cytokines are regulatory peptides that can be produced by various cells for communicating and orchestrating the large multicellular system. Cytokines are key mediators of hematopoiesis, immunity, allergy, inflammation, tissue remodeling, angiogenesis, and embryonic development [2]. This superfamily includes both the long and short chain helical cytokines. 2008-09-03 15:50:29 2004-11-21 12:08:45 14 272 33 865 5160 1 +54 CL0054 Knottin_1 \N Scorpion toxin-like knottin superfamily Bateman A anon This clan includes a number of toxin families that share the knottin structure. These families come from scorpions, plants and arthropods. 2008-09-03 15:50:29 2004-11-22 17:39:28 12 164 12 369 2025 1 +55 CL0055 Viral_ssRNA_CP \N Positive stranded ssRNA viruses coat protein Finn RD anon The clan contains a set of viral coat protein families and peptidase A6. The only known peptidase activity is an autolytic cleavage releasing a 44-residue C-terminal fragment. The reaction is very slow and only occurs within the assembled virion. There is debate whether this is actually a true peptidase. The virion with these coat or capsid\ proteins are icosahedral viruses containing sixty triangular coat protein units, each unit consisting of three proteins. The coat protein consists of two subdomains, an eight-stranded beta-barrel on the surface and a three-helix bundle on the inner face. 2008-09-03 15:50:29 2004-11-24 13:16:32 12 790 144 9475 53300 1 +56 CL0056 C_Lectin \N C-type lectin-like superfamily Bateman A anon This clan contains domains that have a C-type lectin fold. Many of these are known or expected to mediate interactions with sugars. 2008-09-03 15:50:29 2004-11-24 13:34:58 11 581 825 748 15749 1 +57 CL0057 Met_repress MetJ/Arc repressor superfamily Bateman A anon This superfamily contains the MetJ and Arc repressors that feature a ribbon-helix-helix DNA-binding motif with the beta-ribbon located in and recognising the major groove of operator DNA [1]. 2008-09-03 15:50:29 2004-11-24 15:10:49 13 185 88 3932 18127 1 +58 CL0058 Glyco_hydro_tim Tim barrel glycosyl hydrolase superfamily Bateman A anon This large superfamily contains a range of glycosyl hydrolase enzymes that possess a TIM barrel fold. This CLAN merges clans GH-A, GH-D, GH-H and GH-K from CAZy. 2008-09-03 15:50:29 2004-11-25 11:37:58 15 2459 2481 7032 114169 1 +59 CL0059 6_Hairpin Six-hairpin glycosidase superfamily Bateman A anon This Clan includes CAZy clans GH-L, GH-M and GH-G. The members of this clan share a common structure composed of 6 helical hairpins. Most members of this superfamily are glycosyl hydrolase enzymes. 2008-09-03 15:50:29 2004-11-26 15:47:32 14 530 867 4822 41246 1 +60 CL0060 DNA_clamp \N DNA clamp superfamily Bateman A anon Sliding DNA clamps are ring-shaped proteins that allow DNA polymerase to achieve high processivity during chromosome replication by tethering the polymerase catalytic subunit to DNA. All of the structures share a 12-fold symmetry around the ring consisting of a simple structural repeat, though there is structural divergence in some of the repeats. Bacterial beta-clamps contain six repeats per subunit with two subunits per ring while the eukaryotic and bacteriophage clamps contain four repeats per subunit with three subunits per ring. Pairs of these repeats form a domain, which has been termed the 'processivity fold'; thus the ring of the sliding clamp contains six domains and therefore is often described as having 6-fold symmetry. A structural representative of a fourth family of processivity fold proteins, namely the herpes simplex virus UL42 protein, is also available. UL42 does not form a ring-shaped clamp, however, but rather functions as a monomer and interacts with DNA quite differently than do sliding clamps; it has been suggested that UL42 resembles a primitive ancestor of sliding clamps [2]. 2008-09-03 15:50:29 2004-11-26 17:13:41 11 241 64 5243 18116 1 +61 CL0061 PLP_aminotran \N PLP dependent aminotransferase superfamily Bateman A anon This superfamily contains a variety of PLP-dependent enzymes. 2008-09-03 15:50:29 2004-11-29 11:27:35 12 1560 574 7712 152775 1 +62 CL0062 APC APC superfamily Bateman A anon This large superfamily contains a variety of transporters including amino acid permeases that according to TCDB belong to the APC (Amino acid-Polyamine-organoCation) superfamily. 2008-09-03 15:50:29 2004-11-29 18:13:15 12 36 417 5587 126928 1 +63 CL0063 NADP_Rossmann FAD/NAD(P)-binding Rossmann fold Superfamily Finn RD anon A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site [1]. In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD. 2008-09-03 15:50:29 2004-11-30 13:45:28 24 8681 8330 18998 984446 1 +64 CL0064 CPA_AT \N CPA/AT transporter superfamily Bateman A anon This Clan contains transporter proteins that belong to the CPA superfamily and AT superfamily according to TCDB [1]. 2008-09-03 15:50:29 2004-12-02 10:12:55 11 10 163 5252 48799 1 +65 CL0065 Cyclin \N Cyclin-like superfamily Bateman A anon This Clan contains cyclins, Transcription factor IIB (TFIIB), and the Retinoblastoma tumour suppressor proteins. These were predicted to be related by sequence [1]. 2008-09-03 15:50:29 2004-12-02 13:15:02 14 236 144 804 14080 1 +66 CL0066 Trefoil \N Beta-trefoil superfamily Bateman A anon This family corresponds to a large set of related beta-trefoil proteins [1]. The beta-trefoil is formed by six two-stranded hairpins [2]. Three of these form a barrel structure and the other three are in a triangular array that caps the barrel. The arrangement of the secondary structures gives the molecules a pseudo 3-fold axis. 2008-09-03 15:50:29 2004-12-06 15:34:20 13 659 720 1705 12929 1 +67 CL0067 SIS \N SIS domain fold Bateman A anon This catalytic domain catalyses isomerisation reactions of a variety of sugars [1]. 2008-09-03 15:50:29 2004-12-06 16:11:44 12 275 84 6149 42474 1 +68 CL0068 RIIa \N RIIa-like fold Bateman A anon This clan includes both the RIIa dimerisation motif as well as the Dpy-30-like motif [1]. 2008-09-03 15:50:29 2004-12-07 15:58:47 11 37 67 303 1405 1 +69 CL0069 GFP \N GFP-like superfamily Bateman A anon This superfamily has an unusual fold of an 11 stranded beta barrel enclosing an alpha-helix. This superfamily includes green fluorescent protein as well as a domain from nidogen. 2008-09-03 15:50:29 2004-12-08 17:25:21 11 623 171 194 602 1 +70 CL0070 ACT \N ACT-like domain Bateman A anon These domains are involved in binding to amino-acids and causing allosteric regulation of linked enzyme domains [1]. The relationship between these two families was first noticed in [2]. 2008-09-03 15:50:29 2004-12-08 17:26:00 12 241 221 4906 47783 1 +71 CL0071 His_phosphatase PGM; Histidine phosphatase superfamily Finn RD, Rigden DJ anon The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue.\ Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches [1]. 2008-09-03 15:50:29 2004-12-09 14:54:21 11 292 168 4773 26470 1 +72 CL0072 Ubiquitin \N Ubiquitin superfamily Bateman A anon This family includes proteins that share the ubiquitin fold. It currently unites four SCOP superfamilies. 2008-09-03 15:50:29 2004-12-09 18:04:44 19 1053 1522 6169 107825 1 +73 CL0073 P53-like \N Beta-sandwich DNA-binding domain Bateman A anon This clan contains a variety of DNA-binding domains that contain an immunoglobulin-like fold. It includes the DNA-binding domains of NF-kappaB, NFAT, p53, STAT-1, the T-domain and the Runt domain [1]. 2008-09-03 15:50:29 2004-12-10 09:44:45 12 340 136 553 5385 1 +74 CL0074 Matrix \N Retroviral matrix superfamily Bateman A anon This clan brings together matrix proteins from a variety of retroviruses. 2008-09-03 15:50:29 2004-12-10 10:07:15 12 41 88 280 38433 1 +75 CL0075 Defensin \N Defensin/myotoxin-like superfamily Bateman A anon This clan includes diverse defensins as well as myotoxins. 2008-09-03 15:50:29 2004-12-10 13:40:46 12 146 13 137 1375 1 +76 CL0076 FAD_Lum_binding \N Riboflavin synthase/Ferredoxin reductase FAD binding domain Finn RD anon Riboflavin nucleotide coenzymes and flavin adenine dinucleotide (FAD) are essential cofactors for a large number of flavoproteins involved in a diverse set of redox reactions. There are thought to be four different FAD-binding folds [1].The FAD-binding fold of this clan is a cylindrical beta-fold. More specifically, the domain forms a flattened six-stranded antiparallel beta-barrel organised into two orthogonal sheets (1-2-5 and 4-3-6) separated by one alpha-helix. The cylinder is open between strands strand 4 and 5. This opening of the cylinder makes space for the isoalloxazine and ribityl moieties of the FAD, to which hydrogen bonds are formed from the open edges of the strands. The other end of the cylinder is covered by the only helix of the domain, which is essential for the binding of the pyrophosphate groups of the FAD [1].The structural differences in the FAD-binding domain are manifested mainly as loops of different length and extra extending structural elements, which may be important for interactions with their redox partners [1]. The structural core of all clan members is highly conserved. 2008-09-03 15:50:29 2004-12-10 14:19:14 11 202 346 5130 31559 1 +77 CL0077 FAD_PCMH \N PCMH-like FAD binding Finn RD anon The FAD-binding domains contained in this family fall within the PCMH (p-cresol methyl-hydroxylase) family of FAD binding proteins as defined in [1]. In this family, the structure of the FAD binding domain is comprised of two subdomains. Both of these subdomains have an alpha-beta fold. The first subdomain is comprised of three parallel beta strands, surrounded by alpha helices. The second subdomain contains five antiparallel beta strands, also surrounded by alpha helices. The junction between these two subdomains forms the FAD bind pocket, where the ligand is bound by hydrogen and van der Waals bonds [1]. 2008-09-03 15:50:29 2004-12-10 16:27:25 11 247 229 5425 25538 1 +78 CL0078 DNA_ligase \N DNA/RNA ligase superfamily Bateman A anon This superfamily contains both ATP-dependent and NAD dependent DNA ligase enzymes. The family also includes mRNA capping enzymes. The members of this clan were shown to be related by sequence in [1]. 2008-09-03 15:50:29 2004-12-10 18:04:20 12 72 170 5269 10191 1 +79 CL0079 Cystine-knot \N Cystine-knot cytokine superfamily Bateman A anon The cytokine families in this clan have the cystine-knot fold. In this 6 cysteines form three disulphide bridges that are interlinked. 2008-09-03 15:50:29 2004-12-14 17:03:09 12 253 73 2754 10562 1 +80 CL0080 Mss4-like \N Mss4-like superfamily Bateman A anon This clan contains TCTP, Mss4 and SelR families [1]. 2008-09-03 15:50:29 2004-12-14 17:24:43 11 51 31 4242 6196 1 +81 CL0081 MBD-like \N MBD-like DNA-binding domain Bateman A anon This clan contains proteins with a distinctive three stranded DNA-binding domain [1]. 2008-09-03 15:50:29 2004-12-14 17:44:54 12 24 149 2355 10578 1 +82 CL0082 MIF \N Tautomerase/MIF superfamily Bateman A anon This clan groups 5-(carboxymethyl)-2-hydroxymuconate isomerase (CHMI) and 4-oxalocrotonate tautomerase (4-OT) with macrophage inhibitory factor (MIF). Interestingly they all share an amino-terminal proline. Members of this clan for homotrimers [1]. 2008-09-03 15:50:29 2004-12-15 11:44:36 11 347 29 3218 5290 1 +83 CL0083 Omega_toxin Omega toxin-like Finn RD anon This clan contains a set of related small protein toxins and what appears to be the functionally distinct Albumin I domain. All members of this clan have a knottin-like fold. Additional information about this clan may be found from [1]. 2008-09-03 15:50:29 2004-12-16 13:53:46 14 83 23 290 1853 1 +84 CL0084 ADP-ribosyl \N ADP-ribosylation Superfamily Finn RD anon The members of this clan all represent ADP-ribosylating catalytic domains. The structurally conserved regions are located at the NAD binding region [1]. According to SCOP, the ADP-ribosylation domain is thought to have an "unusual fold". 2008-09-03 15:50:29 2004-12-16 14:31:35 12 208 320 842 3271 1 +85 CL0085 FAD_DHS \N DHS-like NAD/FAD-binding domain Finn RD anon The members of this family adopt a Rossmann fold, similar to CLAN:CL0063. However, the members of this family are distinguished in that the FAD/NAD cofactor is bound in the opposite direction. In this arrangement, the adenosine moiety is found bound at the second half of the fold.\ In addition, the conserved GxGxxG motif found in classical NADP binding Rossmann folds is absent. Finally, another distinguishing characteristic is the formation of an internal hydrogen bond in the FAD molecule [1]. 2008-09-03 15:50:29 2004-12-16 15:32:59 13 399 174 5116 30822 1 +86 CL0086 FAD_oxidored \N FAD-linked oxidoreductase Finn RD anon The members of this clan adopt a TIM barrel fold, which is reminiscent of flavin mononucleotide binding proteins, rather than one similar to other flavin adenine dinucleotide binding domains. However, the way the FAD cofactor binds in quite different compared to the binding of FMN in the TIM-barrel structures [1]. 2008-09-03 15:50:29 2004-12-16 16:43:38 11 60 30 4179 7837 1 +87 CL0087 Acyl-CoA_dh \N Acyl-CoA dehydrogenase, C-terminal domain-like Finn RD anon The Acyl-CoA dehydrogenase FAD binding domain forms an mostly alpha helical domain, comprised of four helices\ arranged in up-and-down bundle. In Acyl-CoA oxidase II this domain appears to have been duplicated. 2008-09-03 15:50:29 2004-12-17 10:15:37 12 254 148 3538 31652 1 +88 CL0088 Alk_phosphatase \N Alkaline phosphatase-like Finn RD anon The members of this clan all share a common structure of their catalytic domains, which contain conserved metal binding residues [1]. 2008-09-03 15:50:29 2004-12-17 11:25:45 15 217 321 5028 38421 1 +89 CL0089 GlnB-like \N GlnB-like superfamily Finn RD anon The members of this clan are characterised by the fact the domains, each comprised of four beta-strand and two alpha helices, tend to form tetrameric structures [1]. 2008-09-03 15:50:29 2004-12-17 12:10:20 12 264 47 4170 11982 1 +90 CL0090 Globin \N Globin-like Finn RD anon The globin fold is an evolutionary conserved six helical fold that is found in bacteria and eukaryotes. 2008-09-03 15:50:29 2004-12-17 12:17:06 11 2247 121 4808 13282 1 +91 CL0091 NAD_Ferredoxin \N Ferredoxin / Ferric reductase-like NAD binding Finn RD anon The Ferredoxin / Ferric reductase-like NAD binding domain is adopts a Rossmann like fold. However, these families have been excluded from the classical NAD(P) binding Rossmann clan (CLAN:CL0063), due to a divergence of the GxGxxG motif. In this clan, the motif phosphate binding motif is G-T/S-G-A/I-P. The changes in the motif are a reflection of the different way that the NAD(P)H is bound by this fold and the classical Rossmann fold [1,2]. 2008-09-03 15:50:29 2004-12-17 14:54:00 11 192 335 4749 22088 1 +92 CL0092 ADF \N Actin depolymerizing Factor Finn RD anon For motile cells such as Amoeba to move, there must be the rapid recycling of their actin cytoskeleton to enable a dynamic change in their shape. Gelsolin (PFAM:PF00626) and Cofilin (PFAM:PF00241) are two key domain families in this process. Both of these domain are structural and functional similar [1,2]. In particular, the beta sheet found at the core of the domain is structurally well conserved, with the helices that surround this sheet less conserved[2]. 2008-09-03 15:50:29 2004-12-17 17:36:22 11 154 175 529 7818 1 +93 CL0093 Peptidase_CD \N Peptidase clan CD Finn RD anon The members of this clan are all endopeptidase that have the catalytic dyad histidine followed by cysteine. The catalytic histidine is preceded by a block of hydrophobic residues and a glycine, where as the cysteine is preceded by a block of hydrophobic residues and a glutamine and an alanine. The members with a know structure adopt an alpha/beta fold [1]. 2008-09-03 15:50:29 2004-12-21 13:25:00 13 474 1163 1706 9782 1 +94 CL0094 Peptidase_ME \N LuxS/MPP-like metallohydrolase Finn RD anon All members of this clan are characterised by a HXXEH motif, which is is involved in zinc binding. Furthermore all members adopt an alpha and beta fold. More specifically, there us a four to six stranded antiparallel beta sheet surrounded by five helices. However, LuxS (PFAM:PF02664) is not a peptidase, although its hydrolytic mechanism of catalysis appears to be conserved [1]. 2008-09-03 15:50:29 2004-12-21 13:57:38 11 224 73 4739 30361 1 +95 CL0095 Peptidase_ML \N Peptidase Clan ML Finn RD anon This clan contains HybD-like domains. HybD is a nickel binding endopeptidase. Structural and sequences analyses have highlighted the presence of two highly conserved motifs that are shared with germination proteases and HybD [1]. Members of this clan adopt an alpha/beta fold, comprised of a central beta sheet, surrounded by alpha helices. 2008-09-03 15:50:29 2004-12-21 15:54:20 12 12 11 1706 3673 1 +96 CL0096 Pept_Inhib_IE \N Peptidase Inhibitor Clan IE Finn RD anon The members of this clan are all cystine rich domains, which form a knottin scaffold. This clan should also contain alpha-amylase but currently this family is a singleton and can not be put into Pfam. Also see [1]. 2008-09-03 15:50:29 2004-12-22 12:16:32 11 34 2 22 52 1 +97 CL0097 TypeIII_Chap \N Type III secretory system chaperone Finn RD anon The translocation of pathogenic proteins into a host cell is mediated by the type III secretory system. A component of this system is a chaperone, which binds to the protein which is going to be secreted in the bacterial cytosol and is involved in translocation of the secreted protein, although the chaperone is not translocated itself. An individual chaperone associates with one or two specific proteins [1]. There are a large number of type III secretory system chaperones, which are small acidic proteins and exhibit significant sequence divergence. This clan groups type III secretory system chaperones. Members with a known structure form small compact globular domains with an alpha-beta(3)- alpha-beta(2)-alpha like organisation [1]. 2008-09-03 15:50:29 2004-12-22 12:58:31 11 45 14 1697 3343 1 +98 CL0098 SPOUT AB_Knot; SPOUT Methyltransferase Superfamily Finn RD anon A distinct class of methylases that includes the SpoU and TrmD superfamilies and two superfamilies of predicted methylases defined by the YbeA and MJ0421 proteins in bacteria and archaea, respectively [1] (PFAM:PF00588 PFAM:PF01746). SPOUT is structurally distinct compared to more classical methyltransferases [1]. More specifically, the members of this clan form alpha/beta knots. Knots are extremely rare in protein structures as they pose a\ folding problem. The mechanism that allow a domain to be folded as a knot are unclear, but are discussed in [2] and reference therein. All members with known structure form homodimers. 2008-09-03 15:50:29 2004-12-22 15:38:51 13 112 85 5028 32567 1 +99 CL0099 ALDH-like \N ALDH-like superfamily Finn RD anon The aldehyde dehydrogenases (ALDHs) are a superfamily of multimeric enzymes which catalyse the oxidation of a broad range of aldehydes into their corresponding carboxylic acids with the reduction of their cofactor, NAD(P) into NAD(P)H. The way that the NAD is bound is distinct from other NAD(P)-dependent oxidoreductases. The domain represented by this clan consists of two similar subdomains. 2008-09-03 15:50:29 2004-12-22 15:45:47 12 614 141 5455 48240 1 +100 CL0100 C1q_TNF \N C1q and TNF superfamily Finn RD anon The members of the C1q and TNF superfamily are involved in a diverse set of functions, which include: defense, inflammation, apoptosis, autoimmunity differentiation, organogenesis, hibernation and insulin-resistant\ obesity [1]. Both C1q and TNF domains form a compact jelly-roll beta- sandwich. The core of these structures are conserved between the two families and corresponds to the detectable sequence similarity. Proteins containing both of these domains, form trimers before they are active. However, the surfaces of the domains are quite different and this difference is thought to give rise to the function difference between the clan members[1]. 2008-09-03 15:50:29 2004-12-22 15:46:56 12 294 48 443 3534 1 +101 CL0101 PELOTA RNA_ribose_bind; Pelota - RNA ribose binding superfamily Finn RD anon The members of this clan are all involved in binding to ribose sugar of RNA[1]. Indeed, the key RNA binding residues are conserved across the different families [1]. Members of this clan form mixed alpha-helical and beta-sheet structures [1][2]. 2012-10-06 18:35:16 2005-01-04 15:00:27 11 212 56 5002 13986 1 +103 CL0103 Gal_mutarotase \N Galactose Mutarotase-like superfamily Bateman A anon This clan is composed of a beta-sandwich that was first observed in domain 5 of beta-galactosidase, then as the central domain of copper amine oxidase, the C-terminal domain of chondroitinase, the C-terminal domain of hyaluronate lyase, the N-terminal domain of maltose phosphorylase and in Galactose Mutarotase [1]. All these enzymes act on a sugar substrate. 2008-09-03 15:50:29 2005-01-28 16:18:34 11 539 313 4717 24927 1 +104 CL0104 Glyoxalase \N VOC superfamily Bateman A anon This clan contains the VOC metalloenzyme superfamily [1]. The known types of reactions that are catalysed include isomerizations (glyoxalase I), epimerizations (methylmalonyl-CoA epimerase), oxidative cleavage of C-C bonds (extradiol dioxygenase), and nucleophilic substitutions (fosfomycin resistance proteins) [1]. 2008-09-03 15:50:29 2005-01-28 18:11:34 12 363 157 4720 43346 1 +105 CL0105 Hybrid \N Barrel sandwich hybrid superfamily Bateman A anon This superfamily contains proteins with a hybrid motif [1]. This motif is embedded in structurally diverse proteins. 2008-09-03 15:50:29 2005-01-31 16:15:18 12 394 669 6221 139176 1 +106 CL0106 6PGD_C \N 6-phosphogluconate dehydrogenase C-terminal-like superfamily Bateman A anon This helical domain is found associated with Rossmann domains. 2008-09-03 15:50:29 2005-01-31 18:34:00 12 232 123 5955 39015 1 +107 CL0107 KOW \N KOW domain Bateman A anon This superfamily includes proteins involved in translation that have a KOW like SH3-fold. 2008-09-03 15:50:29 2005-03-18 14:03:54 11 584 79 6487 23716 1 +108 CL0108 Actin_ATPase \N Actin-like ATPase Superfamily Finn RD anon The actin-like ATPase domain forms an alpha/beta canonical fold. The domain can be subdivided into 1A, 1B, 2A and 2B subdomains. Subdomains 1A and 1B share the same RNAseH-like fold (a five-stranded beta-sheet decorated by a number of alpha-helices). Domains 1A and 2A are conserved in all members of this superfamily, whereas domain 1B and 2B have a variable structure and are even missing from some homologues [1]. Within the actin-like ATPase domain the ATP-binding site is highly conserved. The phosphate part of the ATP is bound in a cleft between subdomains 1A and 2A, whereas the adenosine moiety is bound to residues from domains 2A and 2B[1]. 2008-09-03 15:50:29 2005-03-22 09:34:28 15 1022 508 11461 141414 1 +109 CL0109 CDA Cytidine deaminase-like (CDA) superfamily Finn RD, Coin L, Iyer LM, Zhang D, Aravind L anon This clan contains both free nucleotide and nucleic acid deaminases that act on adenosine, cytosine, guanine and cytidine, and are collectively known as the deaminase superfamily. The conserved fold consists of a three-layered alpha/beta/alpha structure with 3 helices and 4 strands in the 2134 order [1,2].This superfamily is further divided into two major divisions based on the presence of a helix (helix-4) that renders the terminal strands (strands 4 and 5) either parallel to each other in its presence, or anti-parallel in its absence [2]. Structurally, the deaminase-like fold is present in four other superfamilies including the JAB-like metalloproteins, the C-terminal AICAR transformylase-catalyzing domains of PurH, Tm1506 and the formate dehydrogenase accessory subunit FdhD. The active site of the deaminases is composed of three residues that coordinate a zinc ion between conserved helices 2 and 3. The residues are typically found as [HCD]xE and CxxC motifs at the beginning of helices 2 and 3. The zinc ion activates a water molecule, which forms a tetrahderal intermediate with the carbon atom that is linked to the amine group. This is followed by deamination of the base. 2008-09-03 15:50:29 2005-03-22 09:57:40 11 208 1010 5319 27014 1 +110 CL0110 GT-A Glycosyl transferase clan GT-A Bateman A anon This is the GT-A clan that contains diverse glycosyltransferases that possess a Rossmann like fold [1]. 2008-09-03 15:50:29 2005-03-22 10:54:55 11 713 1496 7680 127750 1 +111 CL0111 GT-C Glycosyl transferase GT-C superfamily Bateman A anon This is the GT-C clan that contains diverse glycosyltransferases that possess 8-13 predicted transmembrane segments [1]. 2008-09-03 15:50:29 2005-03-22 13:52:31 10 19 303 4345 19864 1 +112 CL0112 Yip1 Yip1/YIF1-like Finn RD, Mistry J anon Yip1 and YIF1 are members of an integral membrane complex which bind to Ras-like GTPases and are required for membrane fusion of ER derived vesicles with the Golgi [1]. 2008-09-03 15:50:29 2005-03-22 15:32:52 12 4 63 2706 5788 1 +113 CL0113 GT-B \N Glycosyl transferase clan GT-B Bateman A anon This is the GT-B clan that contains diverse glycosyltransferases that possess a Rossmann like fold [1]. 2008-09-03 15:50:29 2005-03-22 17:19:24 12 539 1601 8102 141567 1 +114 CL0114 HMG-box \N HMG-box like superfamily Bateman A anon This clan includes the DNA-binding HMG-box proteins as well as the YABBY-like transcription factors. 2008-09-03 15:50:29 2005-03-23 13:44:43 11 62 241 1493 10953 1 +115 CL0115 Steroid_dh \N Steroid oxidoreductase superfamily Bateman A anon This clan includes several enzymes, including steroid dehydrogenases and isoprenylcysteine carboxyl methyltransferase enzymes. These protein contain a varying number of transmembrane regions. 2008-09-03 15:50:29 2005-03-23 14:15:40 11 1 60 2542 7438 1 +116 CL0116 Calycin \N Calycin superfamily Bateman A anon The calycin structural superfamily [1-3] includes the lipocalins, the fatty acid-binding proteins (FABPs). 2008-09-03 15:50:29 2005-03-23 14:57:38 12 662 102 3233 10920 1 +117 CL0117 uPAR_Ly6_toxin \N uPAR/Ly6/CD59/snake toxin-receptor superfamily Bateman A anon This superfamily contains snake toxins as well as extracellular cysteine rich domains. 2008-09-03 15:50:29 2005-03-30 14:52:26 10 228 28 268 2910 1 +118 CL0118 Ribokinase \N Ribokinase-like superfamily Bateman A anon All of these enzymes are phosphotransferases that have an alcohol group as an acceptor (EC:2.7.1.-). However, 4-amino-5-hydroxymethyl-2-methylpyrimidine phosphate kinase (HMPP kinase) catalyses two phosphorylation reactions: one to a hydroxymethyl group of hydroxymethyl pyrimidine (HMP) and the second to the phosphomethyl group of HMPP [1]. The common structural feature for the enzymes in this superfamily is a central eight-stranded sheet that is flanked by eight structurally conserved helices, five on one side and three on the other [1]. The active site is located in a shallow groove along one edge of the sheet, with the phosphate acceptor hydroxyl group and -phosphate of ATP close together in the middle of the groove, and substrate and ATP binding at the ends [1]. 2008-09-03 15:50:29 2005-04-01 13:48:37 11 400 155 5041 42135 1 +121 CL0121 Cystatin Cystatin-like superfamily Bateman A anon This superfamily includes cystatins and cathelicidins [1]. The cystatin superfamily comprises cysteine protease inhibitors that play key regulatory roles in protein degradation processes. The progenitor of this superfamily was most probably intracellular and lacked a signal peptide and disulfide bridges, much like the extant Giardia cystatin. A primordial gene duplication produced two ancestral eukaryotic lineages, cystatins and stefins. Stefins - included in Pfam:PF00031 - remain encoded by a single or a small number of genes throughout the eukaryotes, whereas the cystatins have undergone a more complex and dynamic evolution through numerous gene and domain duplications [2]. 2008-09-03 15:50:29 2005-04-05 16:56:37 11 86 26 392 2322 1 +122 CL0122 UTRA Chor_lyase; Chorismate lyase/UTRA superfamily Bateman A anon This clan includes chorismate lyase as well as the UTRA domain 2008-09-03 15:50:29 2005-04-05 17:05:44 10 71 33 3574 14169 1 +123 CL0123 HTH Helix-turn-helix clan Bateman A anon This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. 2008-09-03 15:50:29 2005-04-05 17:52:07 17 2812 5699 10949 1020775 1 +124 CL0124 Peptidase_PA Peptidase clan PA Bateman A anon This clan contains a diverse set of peptidases with the trypsin fold. 2008-09-03 15:50:29 2005-04-06 15:44:18 14 2540 1159 7202 60527 1 +125 CL0125 Peptidase_CA Peptidase clan CA Bateman A anon This clan includes peptidases with the papain-like fold. 2008-09-03 15:50:29 2005-04-06 17:40:48 14 814 1796 6922 76571 1 +126 CL0126 Peptidase_MA Peptidase clan MA Bateman A anon Clan MA is one of two zinc-dependent metallopeptidases that contain the HEXXH motif. The two histidines are zinc ligands. The structures of this clan show the active site is between its two sub-domains. 2008-09-03 15:50:29 2005-04-07 08:52:20 17 981 1676 6188 88418 1 +127 CL0127 ClpP_crotonase ClpP/Crotonase superfamily Bateman A anon This family includes several peptidases of peptidase clan SK as well as crotonase like proteins. 2008-09-03 15:50:29 2005-04-07 12:12:33 11 1093 331 8610 70137 1 +128 CL0128 vWA-like von Willebrand factor type A Finn RD anon To add. 2008-09-03 15:50:29 2005-04-07 17:59:38 11 223 1340 4970 35661 1 +129 CL0129 Peptidase_AA \N Peptidase clan AA Bateman A anon This clan contains aspartic peptidases, including the pepsins and retropepsins. These enzymes contains a catalytic dyad composed of two aspartates. In the retropepsins one is provided by each copy of a homodimeric protein, whereas in the pepsin-like peptidases these aspartates come from a single protein composed of two duplicated domains. 2008-09-03 15:50:29 2005-04-08 09:36:39 13 1842 629 2629 138509 1 +130 CL0130 Peptidase_AD \N Peptidase clan AD Bateman A anon Members of this clan are peptidases that are integral membrane proteins. The catalytic aspartate is in the conserved GXGD motif. 2008-09-03 15:50:29 2005-04-08 11:13:31 10 5 33 3841 6449 1 +131 CL0131 DoxD-like DoxD-like Mistry J anon The families in this clan are all membrane proteins. The DoxD family is found on enzymes involved in elemental sulphur oxidation [1]. The other families in this clan are poorly characterised. 2008-09-03 15:50:29 2005-04-08 11:57:52 10 0 56 3235 10608 1 +132 CL0132 AbrB \N AbrB/MraZ DNA-binding domain Bateman A anon This superfamily includes the DNA-binding domain of AbrB as well as the presumed DNA-binding protein MraZ (per. comm. A Andreeva and A Murzin). 2008-09-03 15:50:29 2005-04-08 13:53:50 12 50 24 3586 11690 1 +133 CL0133 AT14A-like \N AT14A-like Mistry J anon This clan contains plant proteins. DUF677 family members are AT14A-like proteins that have sequence similarity to fungal, insect and human integrins [1]. The other members of this clan are poorly characterised. 2008-09-03 15:50:29 2005-04-08 14:00:57 10 0 14 26 879 1 +135 CL0135 Arrestin_N-like \N Arrestin_N-like Mistry J anon The families in this clan are involved in vacuolar protein trafficking, G protein signal termination and sporulation. The Arrestin N terminal domain has an Ig-like beta sandwich fold which binds to receptors and impairs their capacity to active G proteins [1]. Arrestins have also been implicated in the endocytosis of receptors and cross talk with other signalling pathways [2]. 2008-09-03 15:50:29 2005-04-12 14:27:53 10 34 59 768 5476 1 +136 CL0136 Plasmid_toxin \N Plasmid toxin-antitoxin system Mistry J anon The families in this clan are plasmid encoded toxins involved in plasmid maintenance. The plasmid encodes both a toxin and an antitoxin. Upon loss of the plasmid the antitoxin is inactivated more rapidly than the toxin. This allows the toxin to interact with its target thus killing the cell or impeding growth. 2008-09-03 15:50:29 2005-04-14 09:55:24 11 91 44 3558 18077 1 +137 CL0137 HAD HAD superfamily Bateman A anon This clan represents the haloacid dehalogenase (HAD) superfamily that includes a diverse range of enzymes that use an asp carboxylate as a nucleophile [1]. 2008-09-03 15:50:29 2005-04-15 16:57:28 14 780 821 6664 120193 1 +139 CL0139 GADPH_aa-bio_dh \N Amino acid biosynthesis and glycosomal dehydrogenase Mistry J anon This clan contains the C terminal domains of dehydrogenase enzymes involved in the biosynthesis of arginine, aspartate and aspartate derived amino acids. It also contains the C terminal domain of GAPDH, a dehydrogenase involved in glycolysis and gluconeogenesis. 2008-09-03 15:50:29 2005-04-18 13:42:09 10 525 39 9289 24271 1 +140 CL0140 Viral_NABP \N Viral nucleic acid binding Mistry J anon This clan contains viral nucleic acid binding protein families. Two of the families in this clan are known to contain zinc finger motifs [1][2]. 2008-09-03 15:50:29 2005-04-18 14:24:28 10 0 5 71 578 1 +141 CL0141 MtN3-like MtN3-like, vesicle-trafficking cargo-receptors Mistry J anon The clan forms a large and diverse family of proteins with seven transmembrane helices, common topology and, most likely, similar function. Their coding genes exist in all eukaryota and in several prokaryota. Some are responsible for metabolic diseases (cystinosis, congenital disorder of glycosylation), others are candidate genes for genetic disorders (cleft lip and palate, certain forms of cancer) or solute uptake and efflux (SWEETs) and many have not yet been assigned a function. Comparison with the properties of well-annotated clan members suggests that the proteins could be involved in protein trafficking and serve as cargo receptors in vesicle trafficking [3]. 2008-09-03 15:50:29 2005-04-19 09:24:39 11 0 74 1425 7738 1 +142 CL0142 Membrane_trans Membrane and transport protein Mistry J anon This clan contains membrane proteins involved in the transport of molecules including amino acids sugars and signalling molecules. It also includes integral membrane cell cycle proteins and some putative ammonia monooxygenases. 2008-09-03 15:50:29 2005-04-20 13:47:04 11 10 78 4928 72098 1 +143 CL0143 B_Fructosidase Beta fructosidase superfamily Mistry J anon This beta fructosidase superfamily [4] is composed of glycosyl hydrolase families. The members of this clan adopt a five-bladed beta-propeller fold [2-3]. The beta-fructosidase superfamily is also known as furanosidase superfamily [4]. 2008-09-03 15:50:29 2005-04-25 11:24:48 15 191 281 2992 11034 1 +144 CL0144 Periplas_BP Periplas_BP-like; Periplasmic binding protein like Mistry J anon This clan includes proteins involved in chemotaxis, membrane transport of sugars and allocrites, and the LacI family transcriptional regulators. It also includes some antigenic basic membrane lipoproteins. 2008-09-03 15:50:29 2005-04-25 15:04:24 12 462 379 4786 70533 1 +145 CL0145 Golgi-transport \N Golgi-transport Mistry J anon This clan contains families that are involved in intracellular transport and signalling.\ \ Arfaptins are proteins which interact with small GTPases involved in vesicular budding at the Golgi complex. They form an elongated dimer of three helix coiled coils and are structurally very similar to the BAR domain [1][2]. The Sec34 family is involved in tethering vesicles to the Golgi [3]. 2008-09-03 15:50:29 2005-04-25 16:18:47 14 49 165 766 6140 1 +146 CL0146 Herpes_glyco \N Herpes glycoprotein Mistry J anon This clan contains herpes envelope glycoproteins [1][2]. 2008-09-03 15:50:29 2005-04-25 16:48:44 11 0 3 78 288 1 +147 CL0147 Traffic \N Trafficking protein Mistry J anon The members of this clan are involved in protein trafficking. The Sec20 family are integral membrane proteins involved in ER to Golgi transport [1] and V-SNARES are involved in membrane fusion [2]. 2008-09-03 15:50:29 2005-04-26 10:46:36 10 7 29 336 2129 1 +148 CL0148 Viral_Gag \N Viral Gag protein Mistry J anon This clan contains Gag proteins which are involved in viral assembly and replication [1][2]. 2008-09-03 15:50:29 2005-04-27 12:13:23 10 185 102 374 45347 1 +149 CL0149 CoA-acyltrans \N CoA-dependent acyltransferase superfamily Finn RD anon All characterised families in this clan are involved in CoA-dependent acyltransferase. All families have a characteristic HXXXD motif. 2008-09-03 15:50:29 2005-04-27 14:03:23 11 193 1986 4892 38354 1 +151 CL0151 PK_TIM \N Pyruvate kinase-like TIM barrel superfamily Bateman A anon This superfamily consists of a number of TIM barrel domains found in enzymes such as pyruvate kinase, malate synthase and citrate lyase. 2008-09-03 15:50:29 2005-05-03 14:40:00 11 507 138 6403 40855 1 +153 CL0153 dUTPase \N dUTPase like superfamily Bateman A anon This clan contains dUTPase and many viral proteins that appear to be related. dUTPases are important in virus replication. 2008-09-03 15:50:29 2005-05-04 09:16:52 10 268 42 4817 8241 1 +154 CL0154 C2 C2 superfamily Bateman A anon This superfamily includes C2 domains and C2-like domains. 2008-09-03 15:50:29 2005-05-04 15:46:49 10 222 791 571 27805 1 +155 CL0155 CBM_14_19 \N Carbohydrate binding domain 14/19 clan Bateman A anon This clan includes two different carbohydrate binding modules. 2008-09-03 15:50:29 2005-05-04 15:50:34 10 1 158 335 6959 1 +156 CL0156 Nucleocapsid \N Mononegaviral nucleocapsid superfamily Bateman A anon This clan contains paramyxoviral and ebola type virus nucleocapsid proteins. 2008-09-03 15:50:29 2005-05-04 16:11:33 10 4 2 470 5978 1 +157 CL0157 Kleisin \N Kleisin superfamily Bateman A anon The kleisin superfamily includes ScpA, Scc1, Rec8, and Barren [1]. Scc1 interacts with SMC proteins through N- and C-terminal domains to form a ring-like structure [1]. 2008-09-03 15:50:29 2005-05-04 16:26:02 10 4 11 3187 3510 1 +158 CL0158 GH_CE \N Glycoside hydrolase/deacetylase superfamily Bateman A anon This superfamily contains diverse enzymes that act on carbohydrates including both hydrolases and deacetylases. 2008-09-03 15:50:29 2005-05-04 16:44:41 11 125 249 4786 20178 1 +159 CL0159 E-set Ig-like fold superfamily (E-set) Finn RD, Bateman A anon This clan includes a diverse range of domains that have an Ig-like fold and appear to be distantly related to each other. The clan includes: PKD domains, cadherins and several families of bacterial Ig-like domains as well as viral tail fibre proteins. it also includes several Fibronectin type III domain-containing families. 2008-12-15 16:59:57 2005-05-09 16:19:14 15 1072 9593 6497 213896 1 +160 CL0160 Methionine_synt Cobalamin-independent synthase Finn RD anon The N-terminal and C-terminal cobalamin-independent synthase domains are structurally similar, adopting a TIM beta/alpha barrel. However, the two domain perform functionally different roles. The N-terminal domain and C-terminal domains both define a catalytic cleft in the enzyme. The N-terminal domain is thought to bind the substrate, in particular, the negatively charged polyglutamate chain. The N-terminal domain is also thought to stabilise a loop from the C-terminal domain. The C-terminal domain contains the active site residues[1]. 2008-09-03 15:50:29 2005-05-09 16:58:23 10 56 28 4455 12453 1 +161 CL0161 GAF \N GAF domain-like Finn RD anon A clan of related transcriptional regulator domains. 2008-09-03 15:50:29 2005-05-09 18:32:14 11 220 3047 6850 47618 1 +162 CL0162 FBA \N F-box associated Finn RD anon Clan containing related F-box associated families. 2008-09-03 15:50:29 2005-05-09 18:41:19 10 0 47 50 1557 1 +163 CL0163 Calcineurin Calcineurin-like phosphoesterase superfamily Bateman A anon This clan contains the calcineurin-like phosphoesterases. This clan also includes the apparently inactive homologues from the small DNA polymerase subunits [1]. 2008-09-03 15:50:29 2005-05-10 16:59:39 10 274 576 5495 51255 1 +164 CL0164 CUB \N CUB clan Bateman A anon This clan contains the CUB domain [1,2]. 2008-09-03 15:50:29 2005-05-10 17:23:26 12 31 892 190 13225 1 +165 CL0165 Cache \N Cache-like domain Finn RD anon The Cache domain an extracellular domain that is thought to have a role in small-molecule recognition in a wide range of proteins, including the animal Ca(2+)-channel subunits and a class of prokaryotic chemotaxis receptors [1]. 2008-09-03 15:50:29 2005-05-10 17:26:33 10 40 407 2883 12801 1 +166 CL0166 PRD \N PRD domain superfamily Bateman A anon The PRD domain (for PTS Regulation Domain), is the phosphorylatable regulatory domain found in bacterial transcriptional antiterminator of the BglG family as well as in activators such as MtlR and LevR. The PRD domain is phosphorylated on a conserved histidine residue. PRD-containing proteins are involved in the regulation of catabolic operons in Gram+ and Gram- bacteria and are often characterised by a short N-terminal effector domain that binds to either RNA (CAT-RBD for antiterminators (Pfam:PF03123, see also comments for this family)) or DNA (for activators), and a duplicated PRD module which is phosphorylated on conserved histidines by the sugar phosphotransferase system (PTS) in response to the availability of carbon source. The phosphorylations are thought to modify the stability of the dimeric proteins and thereby the RNA- or DNA-binding activity of the effector domain. 2008-09-03 15:50:29 2005-05-11 14:46:29 11 9 110 2060 16551 1 +167 CL0167 Zn_Beta_Ribbon Zinc beta-ribbon Finn RD anon A clan of zinc-binding ribbon domains. 2008-09-03 15:50:29 2005-05-11 16:19:40 14 628 1382 6834 87461 1 +168 CL0168 PAN \N PAN-like Finn RD anon PAN domains have significant functional versatility fulfilling diverse biological functions by mediating protein-protein or protein-carbohydrate interactions [1]. These domains contain a hair-pin loop like structure, similar to knottins, but the pattern of disulphide bonds differs. 2008-09-03 15:50:29 2005-05-11 16:56:50 14 59 399 404 5383 1 +169 CL0169 Rep \N Rep-like domain Bateman A anon This clan includes replication proteins for viruses and plasmids. This domain is known to bind DNA. The members of this clan have three motifs. The central HXH is conserved in most families in the clan. 2008-09-03 15:50:29 2005-05-13 18:23:32 10 25 62 3938 11895 1 +170 CL0170 Peptidase_MD \N Peptidase MD Finn RD anon This clan is comprised of carboxypeptidases and the N-terminal domain from Sonic hedgehog proteins. The structure of the latter is similar to the peptidases, but the N-terminal domain of hedgehog has been demonstrate not to be involved in peptidase activity, but is more likely involved in signal transduction [1]. 2008-09-03 15:50:29 2005-05-19 09:30:19 10 53 142 3407 9216 1 +171 CL0171 Phospoesterase \N inositol polyphosphate 1 phosphatase like superfamily Bateman A anon Members of this clan show metal-dependent / lithium sensitive phosphomonoesterase activity. The clan includes inositol polyphosphate 1 phosphatase and fructose 1,6-bisphosphatase [1]. 2008-09-03 15:50:29 2005-05-19 17:12:45 10 289 56 4585 15156 1 +172 CL0172 Thioredoxin Thioredoxin-like; Thioredoxin-like Mistry J anon This clan contains families related to the thioredoxin family. Thioredoxins are small enzymes that are involved in redox reactions via the reversible oxidation of an active centre disulfide bond. The thioredoxin fold consists of a 3 layer alpha/beta/alpha sandwich and a central beta sheet. 2008-09-03 15:50:29 2005-05-20 15:54:37 16 2327 1079 7198 142836 1 +173 CL0173 STIR \N STIR superfamily Fenech M anon Both members of this clan are thought to be involved in TOLL/IL1R-like pathways, by mediating protein-protein interactions between pathway components. The N-termini of SEFIR and TIR domains are similar, but the domains are more divergent towards the C-terminus [1]. 2008-09-03 15:50:29 2005-08-11 10:18:06 10 27 1046 1412 7537 1 +174 CL0174 TetR_C \N TetR protein, C-terminal domain-like Fenech M anon This clan features families of transcriptional regulators for multidrug efflux pumps, which belong to the TetR superfamily. They are induced by the presence of a variety of factors, such as antibiotics or organic solvents. The C-terminal region featured in these families is thought to contain the inducer-binding site; the divergent sequences in this region allow for the binding of a variety of different inducers [1-4]. 2008-09-03 15:50:29 2005-08-11 10:47:22 9 244 37 2464 10523 1 +175 CL0175 TRASH TRASH superfamily Fenech M anon TRASH-like domains contain well-conserved cysteine residues that are thought to be involved in metal coordination. These domains are thus expected to be involved in metal trafficking and heavy-metal resistance. It has been suggested that the members adopt a 'treble-clef' fold, with 3/4 beta strands preceding a C-terminal alpha helix [1]. 2008-09-03 15:50:29 2005-08-15 13:59:13 10 149 389 2803 12513 1 +176 CL0176 Chemosens_recp \N Chemosensory 7tm receptor superfamily Finn R, Fenech M anon The members of this clan are families of various gustatory and odorant receptors. They are described as being seven-transmembrane receptors, and in fact all show characteristic regions of hydrophobicity on the alignment. 2008-09-03 15:50:29 2005-08-15 14:20:45 9 0 50 129 5871 1 +177 CL0177 PBP \N Periplasmic binding protein clan Bateman A anon Periplasmic binding proteins (PBPs) consist of two large lobes that close around the bound ligand. This architecture is reiterated in transcriptional regulators, such as the lac repressors. In the process of evolution, genes encoding the PBPs have fused with genes for integral membrane proteins. Thus, diverse mammalian receptors contain extracellular ligand binding domains that are homologous to the PBPs; these include glutamate/glycine-gated ion channels such as the NMDA receptor, G protein-coupled receptors, including metabotropic glutamate, GABA-B, calcium sensing, and pheromone receptors, and atrial natriuretic peptide-guanylate cyclase receptors [2]. 2008-09-03 15:50:29 2005-08-22 13:15:50 15 1664 837 5964 242652 1 +178 CL0178 PUA \N PUA/ASCH superfamily Bateman A anon This clan consists of the RNA binding PUA domain and ASCH domain. It also contains uncharacterised protein families. 2008-09-03 15:50:29 2005-08-22 16:11:13 15 169 216 5007 19669 1 +179 CL0179 ATP-grasp \N ATP-grasp superfamily Bateman A anon The ATP-grasp domain is found in a wide variety of carboxylate-amine/thiol ligases [1]. It is composed of two subdomains, with ATP being bound in the cleft between the two. 2008-09-03 15:50:29 2005-08-23 14:07:58 13 397 518 7468 61819 1 +181 CL0181 ABC-2 \N ABC-2-transporter-like clan Fenech M anon These families are similar to the ABC-2 transporter subfamily, as described in [1] (Pfam:PF01061). Members of this family are involved in drug transport and resistance. CcmB protein family (Pfam:PF03379) members are also transporters; they are required for haem export into the periplasm [2]. 2008-09-03 15:50:29 2005-08-23 16:06:40 9 3 239 5481 49701 1 +182 CL0182 IT \N IT (Ion Transporter) superfamily Bateman A anon This superfamily of secondary carriers specific for cationic and anionic compounds, has been termed the ion transporter (IT) superfamily [1]. 2008-09-03 15:50:29 2005-08-23 18:40:04 12 0 126 4842 49966 1 +183 CL0183 PAS_Fold PAS; PAS domain clan Bateman A anon This clan contains PAS domains that are found in a wide variety of bacterial signaling proteins. 2008-09-03 15:50:29 2005-08-24 15:20:49 13 311 6949 5998 88093 1 +184 CL0184 DMT Drug/Metabolite transporter superfamily Bateman A anon This clan contains a variety of transporters which have 4, 5, 9 or 10 membrane spanning helices. Many of the 10 membrane spanning transporters appear to be a duplication of the 5 spanning unit [1]. Many of these families contain a characteristic glycine rich motif close to the C-terminus. 2008-09-03 15:50:29 2005-08-25 11:36:38 10 12 341 5620 114318 1 +186 CL0186 Beta_propeller Beta propeller clan Bateman A anon This large clan contains proteins that contain beta propellers. These are composed of between 6 and 8 repeats. The individual repeats are composed of a four stranded sheet. The clan includes families such as WD40 Pfam:PF00400 where the individual repeats are modeled. The clan also includes families where the entire propeller is modeled such as Pfam:PF02239 usually because the individual repeats are not discernible. These proteins carry out a very wide diversity of functions including catalysis. 2008-09-03 15:50:29 2005-08-26 09:32:54 13 1163 8240 11212 356413 1 +187 CL0187 LysM \N LysM-like domain Fenech M anon The LysM domain (Pfam:PF01476) is thought to be a general peptidoglycan-binding module. Although originally described in bacterial proteins, it has been also found in some eukaryotic sequences. It takes up a beta-alpha-alpha-beta conformation, with the beta strands forming an antiparallel beta sheet and the two alpha helices packing on one side of this sheet [1]. 2008-09-03 15:50:29 2005-08-26 09:44:43 10 8 614 4599 29820 1 +188 CL0188 CH \N Calponin homology domain Fenech M anon The calponin homology (CH) domain is found in a variety of contexts, ranging from proteins involved in signalling pathways to cytoskeletal proteins. They seem to have diverse cellular functions, which are thought to include actin binding, involvement in the MAP kinase signalling pathway, and regulation of GEF activity in Rho family GTPase pathways. Structurally, they are organised into three layers, with two parallel alpha helices in the core being sandwiched between another two helices, one on each side [1]. 2008-09-03 15:50:29 2005-08-30 18:32:49 9 107 636 512 11687 1 +189 CL0189 Endonuclease \N Endonuclease V-like superfamily Wuster A anon This clan contains DNA repair proteins. In E. coli endonuclease V initiates DNA repair of deaminated DNA bases and has similarity to motifs required for the catalytic activity of the UvrC endonuclease [1]. 2008-09-03 15:50:29 2005-08-31 14:14:38 9 20 27 4568 5628 1 +190 CL0190 HSP20 HSP20-like chaperone superfamily Fenech M anon The small heat shock proteins (sHSPs) prevent protein aggregation during heat shock and oppose regulated cell death. A conserved arginine residue in the HSP20/alpha-crystallin domain (Pfam:PF00011) has in fact been implicated in the development of cataracts and myopathies [1]. The CS family (Pfam:PF04969) includes proteins that are known to bind HSP90 [2], as well as p23 (Swiss:Q15185), which is an HSP90 co-chaperone [3]. 2008-09-03 15:50:29 2005-09-01 14:25:02 11 201 170 4085 13158 1 +191 CL0191 POTRA \N POTRA domain superfamily Fenech M anon The polypeptide-transport-associated (POTRA) domain is predicted to be organised into three beta-strands and two alpha helices, the latter being found between strands 2 and 3. It is usually found associated with a beta-barrel outer membrane domain. It is thought to have a chaperone-like function; the proteins it is found in are involved in processes as diverse as bacterial septation and protein transport across membranes [1]. 2008-09-03 15:50:29 2005-09-01 14:31:22 10 20 53 4354 20069 1 +192 CL0192 GPCR_A Family A G protein-coupled receptor-like superfamily Fenech M anon This clan contains various seven-transmembrane receptors and related proteins. A major member is Pfam:PF00001, members of which have been considered to be typical members of the rhodopsin superfamily. Many members of this clan are Caenorhabditis proteins, suggesting great expansion of the relevant families in these nematode worms. 2008-09-03 15:50:29 2005-09-02 12:58:56 12 304 924 9484 92105 1 +193 CL0193 MBB Outer membrane beta-barrel protein superfamily Bateman A anon This clan gathers together a large set of beta barrel membrane proteins.Although these proteins have different numbers of beta strands in the barrel they have significant sequence similarity between families. 2008-09-03 15:50:29 2005-09-05 10:13:31 13 356 515 4665 118601 1 +194 CL0194 DNA_pol_B-like \N DNA polymerase B like Mistry J anon DNA polymerases replicate DNA by adding nucleotide triphosphate (dNTP) residues to the 5'-end of a growing chain of DNA. They use a complementary DNA chain as a template.` 2008-09-03 15:50:29 2005-09-05 13:42:41 9 181 97 2623 7651 1 +195 CL0195 DBL \N Duff-binding like superfamily Bateman A anon This clan includes DBL (Duffy-binding like) domains from a variety of plasmodium surface proteins. 2008-09-03 15:50:29 2005-09-05 15:25:46 9 4 55 9 832 1 +196 CL0196 DSRM \N DSRM-like clan Bateman A anon This clan contains RNA-binding domains. 2008-09-03 15:50:29 2005-09-08 18:03:50 11 267 170 5456 15069 1 +197 CL0197 GME \N GME superfamily Bateman A, Shirai H anon This superfamily contains a number of related enzymes such as AstB, peptidyl-arginine deiminase, arginine deiminase and amidinotransferase [1,2]. 2008-09-03 15:50:29 2005-09-15 15:15:35 9 117 27 3437 6472 1 +198 CL0198 HHH Helix-hairpin-helix superfamily Bateman A anon This superfamily includes Helix-hairpin-helix DNA-binding domains. 2008-09-03 15:50:29 2005-09-16 17:04:15 15 655 587 6368 81507 1 +199 CL0199 DPBB \N Double Psi beta barrel glucanase Bateman A anon The DPBB fold is often an enzymatic domain. The members of this family are quite diverse, and if catalytic this family may contain several different functions [1,2]. This clan represents the barwin like barrels. 2008-09-03 15:50:29 2005-09-16 17:23:52 11 29 104 3343 9071 1 +200 CL0200 Prefoldin \N Prefoldin GriffithsJones S, Finn RD, Mistry J anon The Prefoldin domain forms a coiled-coil structure that is involved in substrate-binding in the the chaperone co-factor prefoldin (PFD). Each PFD is assembled from two alpha and four beta subunits. Each alpha subunit contains two, and each beta subunit one, central beta-hairpin that is flanked N- and C-terminally by coiled-coil helices. The N-terminal regions, the prefoldin domain, are found facing into the central cavity of the chaperone. Here exposed hydrophobic patches form an interaction with the substrate (an unfolded protein) [1]. 2008-09-03 15:50:29 2005-09-19 13:51:59 9 9 52 526 2970 1 +201 CL0201 Peptidase_SH \N Peptidase clan SH Bateman A anon This clan includes the serine peptidase assemblin from herpes virus as well as other viral peptidase families predicted to be related [1]. 2008-09-03 15:50:29 2005-09-20 12:38:50 9 47 16 1555 2100 1 +202 CL0202 GBD Galactose-binding domain-like superfamily Bateman A anon This large superfamily contains beta sandwich domains with a jelly roll topology. Many of these families are involved in carbohydrate recognition. Despite sharing little sequence similarity they do share a weak sequence motif, with a conserved bulge in the C-terminal beta sheet. The probable role of this bulge is in bending of the beta sheet that contains the bulge. This enables the curvature of the sheet forming the sugar binding site [1]. 2008-09-03 15:50:29 2005-09-20 16:50:13 10 807 2917 4159 38880 1 +203 CL0203 CBD \N Carbohydrate binding domain superfamily Bateman A anon This superfamily includes several carbohydrate binding domains. These domains have a beta sandwich structure. 2008-09-03 15:50:29 2005-09-21 11:02:43 11 97 423 950 3904 1 +204 CL0204 Adhesin \N Bacterial adhesin superfamily Bateman A anon This superfamily includes a variety of bacterial adhesins that have a jelly-roll beta-barrel fold [1]. These domains are involved in sugar recognition. 2008-09-03 15:50:29 2005-09-21 11:17:50 10 156 136 1489 19890 1 +205 CL0205 Di-copper \N Di-copper centre-containing domain Bateman A anon This superfamily includes tyrosinases and hemocyanins that share a di-copper centre [1]. 2008-09-03 15:50:29 2005-09-23 12:59:27 10 106 90 1801 4482 1 +206 CL0206 TRB \N Transcriptional repressor beta-barrel domain Bateman A anon This beta-barrel domain is found at the C-terminus of a variety of transcriptional repressor proteins. 2008-09-03 15:50:29 2005-09-23 14:02:56 10 132 32 4239 8501 1 +207 CL0207 Rhomboid-like \N Integral membrane protein / protease Mistry J anon This clan contains proteins from both bacteria and eukaryotes. The Rhomboid protein is an intramembrane serine protease which is involved in epidermal growth factor (EGF)-dependent signalling pathways [1]. The DER1 family is involved in degradation of misfolded ER proteins [2]. 2008-09-03 15:50:29 2005-12-01 11:27:30 9 16 91 4052 9025 1 +208 CL0208 UBC \N Ubiquitin conjugating enzyme like superfamily Bateman A anon This superfamily includes a diverse set of proteins that bind to ubiquitin [1]. 2008-09-03 15:50:29 2005-12-02 15:22:57 10 294 238 870 12063 1 +209 CL0209 Bet_V_1_like \N Bet V 1 like Mistry J anon The Bet_V_I family is composed of sequences related to the major Birch (Betula verrucose) pollen antigen Betv1. This allergen is known to cause hayfever, dermatitis, asthma and occasionally anaphylactic shock. The other families in this clan share the same structure as Betv1 which is composed of antiparallel beta sheets and alpha helices. There is a cavity between the beta sheet and a long C terminal helix. The cavity appears to play roles in the binding of lipid molecules [1][2][3] which seems a common feature of the families in this clan. 2008-11-07 17:26:25 2005-12-02 17:50:32 10 443 245 4418 23609 1 +210 CL0210 HNOX-like \N Heme NO and oxygen binding like Mistry J anon This clan contains families that bind small molecules and are predominantly involved in signalling. Members include the heme NO binding domain. This domain is related to soluble guanylate cyclases and is mainly alpha helical in structure.\ Other members of this clan include V4R, which is predicted to be a small molecule binding domain, and a domain often found adjacent to this that is found on activators of aromatic catabolism, and on signalling molecules. 2008-09-03 15:50:29 2005-12-06 09:55:10 10 54 47 793 1692 1 +212 CL0212 SNARE \N SNARE-like superfamily Bateman A anon This clan includes part of the SNARE like superfamily. 2008-09-03 15:50:29 2005-12-08 17:07:43 8 44 96 372 5553 1 +213 CL0213 ShK-like \N Sea anemone toxin k like Mistry J anon Members of this clan include the Crisp domain which is involved in ryanodine receptor Ca2+ signalling, and the ShK domain which is named after the ShK channel inhibitor toxin. Both domains are cysteine rich and contain multiple disulphide bonds [1][2][3]. 2008-09-03 15:50:29 2006-01-03 16:32:02 8 27 121 194 2516 1 +214 CL0214 UBA \N UBA superfamily Bateman A anon This superfamily includes domains related to the UBA domain. These domains are often involved in ubiquitin binding. 2008-09-03 15:50:29 2006-01-05 15:54:42 12 148 350 5009 18121 1 +217 CL0217 Rotavirus_VP7 \N Rotavirus VP7 protein Bateman A anon This clan consists of several Rotavirus major outer capsid protein VP7 sequences. The rotavirus capsid is composed of three concentric protein layers. Proteins VP4 and VP7 comprise the outer layer. VP4 forms spikes and is the viral attachment protein. VP7 is a glycoprotein and the major constituent of the outer protein layer [1]. 2008-09-03 15:50:29 2006-01-24 14:08:46 8 28 2 702 4849 1 +218 CL0218 ox_reductase_C \N Oxidoreductase C terminal like Mistry J anon This clan contains the C terminal region of oxidoreductase proteins and putative oxidoreductase proteins. Families in this clan form an alpha/beta structure and are usually found adjacent to an N terminal Rossman fold. 2008-09-03 15:50:29 2006-01-26 11:16:09 11 193 37 3633 12363 1 +219 CL0219 RNase_H \N Ribonuclease H-like superfamily Bateman A anon This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H. 2008-09-03 15:50:29 2006-01-31 17:56:29 13 1007 2352 7614 190937 1 +220 CL0220 EF_hand \N EF-hand like superfamily Bateman A anon The EF hand is a calcium binding domain found in a wide variety of proteins [1]. 2008-09-03 15:50:29 2006-02-01 09:23:33 11 1170 2215 3113 56360 1 +221 CL0221 RRM \N RRM-like clan Bateman A anon This clan contains families that are related to the RNA recognition motif domains. However, not all these families are RNA binding. 2008-09-03 15:50:29 2006-03-05 12:30:44 10 736 1162 5818 73704 1 +222 CL0222 MviN_MATE \N MviN, MATE-like superfamily Bateman A anon This superfamily consists of a variety of integral membrane protein families. The MATE family are known to be transporters. Other proteins have been implicated in virulence and polysaccharide biosynthesis. 2008-09-03 15:50:29 2006-03-05 13:21:19 7 4 109 5014 53746 1 +223 CL0223 MACRO \N MACRO domain superfamily Bateman A anon This superfamily includes the Macro domain as well as the amino terminal domain from peptidase M17 proteins. 2008-09-03 15:50:29 2006-03-06 10:58:15 7 147 130 4353 8980 1 +224 CL0224 DHQS \N Dehydroquinate synthase-like superfamily Bateman A anon This superfamily includes Dehydroquinate synthase and Iron containing alcohol dehydrogenase which have a similar active site organisation [1]. 2008-09-03 15:50:29 2006-03-06 13:57:58 7 116 47 4875 18804 1 +225 CL0225 FtsL \N FtsL-like superfamily Bateman A anon This clan includes two proteins that are known to interact, FtsL and DivIC which are part of a trimeric complex with DivIB [2]. DivIC and FtsL are bacterial proteins essential for cell division. 2008-09-03 15:50:29 2006-03-06 15:46:31 7 0 9 3920 6384 1 +226 CL0226 M6PR \N Mannose 6-phosphate receptor Bateman A anon This clan includes cation dependent and independent mannose 6-phosphate receptors. 2008-09-03 15:50:29 2006-03-06 16:32:14 8 48 95 319 2892 1 +227 CL0227 Enolase_N \N Enolase N-terminal domain-like superfamily Bateman A anon This domain is found at the N-terminus of the catalytic Tim barrel-like domain in enolase and other enzymes. 2008-09-03 15:50:29 2006-03-06 16:37:34 8 887 42 5636 14408 1 +228 CL0228 Acyltransferase Acyltransferase clan Bateman A anon This clan includes several families of related acyltransferases. 2008-09-03 15:50:29 2006-03-06 16:43:34 7 2 169 4885 24321 1 +229 CL0229 RING \N Ring-finger/U-box superfamily Bateman A anon This clan includes the Ring zinc finger domains as well as the U-box domain that appears to have lost the zinc coordinating cysteine residues [1]. 2008-09-03 15:50:29 2006-03-06 16:54:46 10 159 1983 4407 49901 1 +230 CL0230 HO \N Heme oxygenase-like superfamily Bateman A anon This clan includes the Heme oxygenase family as well as the TENA/THI-4/PQQC family that are less well characterised [2]. 2008-09-03 15:50:29 2006-03-06 16:57:26 7 195 41 2847 5166 1 +231 CL0231 MazG \N all-alpha NTP pyrophosphohydrolase superfamily Bateman A anon This superfamily includes MazG, HisE and dimeric dUTPases (Not yet in Pfam) [1]. 2008-09-03 15:50:29 2006-03-06 17:03:44 8 112 46 4768 13122 1 +232 CL0232 NifU \N NifU C-terminal domain-like superfamily Bateman A anon This clan includes the C-terminal domain of NifU as well as a large family of uncharacterised domains. 2008-09-03 15:50:29 2006-03-06 17:10:22 7 28 49 4450 9837 1 +233 CL0233 SufE_NifU \N SufE/NifU superfamily Bateman A anon This clan includes iron sulfur cluster assembly proteins. 2008-09-03 15:50:29 2006-03-06 17:18:24 7 30 26 4741 7304 1 +234 CL0234 CTPT \N CTP transferase-like superfamily Bateman A anon This clan includes the integral membrane CTP transferase family as well as a large family of uncharacterised proteins that may also function as nucleotidyltransferases. 2008-09-03 15:50:29 2006-03-06 17:30:58 7 0 20 4821 6518 1 +235 CL0235 PspA \N PspA/ESCRT-III Bateman A anon This clan includes PspA like proteins that are transcriptional activators as well as Snf7, a protein involved in cellular trafficking. 2008-09-03 15:50:29 2006-03-07 09:10:43 8 24 36 2194 5578 1 +236 CL0236 PDDEXK PD-(D/E)XK nuclease superfamily Bateman A anon This clan includes a large number of nuclease families related to holliday junction resolvases [1,2]. 2012-10-03 14:09:06 2006-03-07 10:09:50 16 584 895 6041 71985 1 +237 CL0237 HD_PDEase \N HD/PDEase superfamily Bateman A anon This clan includes a range of phosphohydrolase enzymes with a common helical fold. 2008-09-03 15:50:29 2006-03-07 17:08:35 7 446 682 5155 45938 1 +238 CL0238 PP2C \N PP2C-like superfamily Bateman A anon This clan includes the PP2C family of phosphatases as well as the SpoIIE family. This suggests SpoIIE proteins may also be phosphatases. 2008-09-03 15:50:29 2006-03-08 17:11:21 7 81 723 3947 17476 1 +239 CL0239 Insulin \N Insulin-like superfamily Bateman A anon This superfamily includes the insulin like hormones. 2008-09-03 15:50:29 2006-03-09 13:10:30 7 895 8 341 1563 1 +240 CL0240 PFK \N PFK-like superfamily Bateman A anon This clan includes two SCOP superfamilies. Strong similarities between NAD kinases, DAG kinase, sphingosine kinase and PFK have previously been shown[1]. 2008-09-03 15:50:29 2006-03-09 16:35:37 7 146 173 5088 20115 1 +241 CL0241 ABC_membrane \N ABC transporter membrane domain clan Bateman A anon This clan includes families that are the membrane components of ABC transporter complexes. In general these regions are composed of six transmembrane helices [1]. 2008-09-03 15:50:29 2006-03-09 17:38:52 7 22 211 5258 54224 1 +242 CL0242 DNA_primase_lrg \N DNA primase large subunit like Mistry J anon This clan contains the large subunit of archaeal and eukaryotic DNA primase, an enzyme which synthesises the oligoribonucleotide primers essential to DNA replication. The large subunit of DNA primase forms interactions with the small subunit and the structure implicates that it is not directly involved in catalysis, but plays a roles in correctly positioning the primase/DNA complex, and in the transfer of RNA to DNA polymerase [1]. The clan also contains the Lef-2 family, which is required for the expression of late genes. There is some evidence to suggest that LEF2 binds to both DNA and the DNA primase small subunit LEF-1 [3]. 2008-09-03 15:50:29 2006-04-21 14:57:17 8 11 11 506 609 1 +243 CL0243 AEP Archaeo-eukaryotic primase Mistry J anon This clan includes the small subunit of 2 and eukaryotic DNA primase, and primase-helicase proteins from bacteriophages and plasmids. All known cellular life forms use primases to synthesis a short RNA primer which is extended during DNA replication by a polymerase. Bacterial DNA primase adopts a different fold to archaeal and eukaryotic primases and belongs to a different superfamily. 2008-09-03 15:50:29 2006-04-24 16:40:55 9 21 52 1612 2766 1 +244 CL0244 PGBD \N PGBD superfamily Bateman A anon This clan consists of small putative peptidoglycan binding domains composed of three alpha helices. 2008-09-03 15:50:29 2006-05-16 17:51:06 8 23 345 3348 11009 1 +245 CL0245 EDD \N EDD superfamily Bateman A anon The EDD superfamily was identified as an evolutionarily conserved domain (EDD) common to three different folds: mannose transporter EIIA domain (EIIA-man), dihydroxyacetone kinase (Dak), and DegV [1]. Both Dak and EIIA-man perform similar phosphotransfer reactions, suggesting a phosphotransferase activity for the DegV-like family of proteins, whose function other than lipid binding revealed in the crystal structure remains unknown [1]. 2008-09-03 15:50:29 2006-05-17 09:35:18 8 92 62 3559 18450 1 +246 CL0246 ISOCOT_Fold NagB-like; Isomerase,CoA transferase & Translation initiation factor Superfamily Bateman A, Anantharaman V anon This superfamily contains a variety of enzymes and non-enzymatic ligand binding domains. 2008-09-03 15:50:29 2006-05-17 13:20:27 7 279 161 5233 53370 1 +247 CL0247 2H \N 2H phosphoesterase superfamily Bateman A anon This clan includes a number of phosphoesterases that contain an internal duplication. 2008-09-03 15:50:29 2006-05-17 15:53:15 8 24 78 2901 4878 1 +248 CL0248 ParBc \N ParB-like superfamily Bateman A anon This superfamily includes nucleases related to ParB as well as uncharacterised proteins. 2008-09-03 15:50:29 2006-06-02 16:12:35 7 28 97 4788 13574 1 +249 CL0249 Phage_tail_L \N Phage minor tail protein L clan Bateman A anon This clan includes the phage minor tail protein L as well as a group of uncharacterised proteins that are also presumably phage components. 2008-09-03 15:50:29 2006-06-02 18:35:28 7 0 7 706 1564 1 +250 CL0250 GAD \N GAD domain superfamily Bateman A anon This domain is found as an insert within aspartyl-tRNA synthetase as well as GatB proteins. 2008-09-03 15:50:29 2006-07-27 15:00:10 6 16 17 4522 4847 1 +251 CL0251 MORN \N MORN repeat Mistry J anon The MORN (Membrane Occupation and Recognition Nexus) repeat is found in multiple copies in several proteins including junctophilins (See Takeshima et al. Mol. Cell 2000;6:11-22).\ A MORN-repeat protein has been identified in the parasite Toxoplasma gondiis as dynamic component of cell division apparatus [1].\ It has been hypothesised to function as a linker protein between certain membrane regions and the parasite's cytoskeleton [1]. 2008-09-03 15:50:29 2006-07-31 12:59:47 7 27 334 1412 30871 1 +252 CL0252 NfeD-like \N NfeD like Mistry J anon This clan includes the NfeD family which contains several proteins described as nodulation efficiency protein D (NfeD). The nfe genes (nfeA, nfeB, and nfeD) are involved in the nodulation efficiency and competitiveness of the Sinorhizobium meliloti strain GR4 on alfalfa roots [1]. The specific function the NfeD family is unknown although it is unlikely that NfeD is specifically involved in nodulation as the family contains several different archaeal and bacterial species most of which are not symbionts. 2008-09-03 15:50:29 2006-07-31 13:35:37 6 3 8 2902 4042 1 +254 CL0254 THDP-binding Thiamin diphosphate-binding superfamily Mistry J, Bateman A anon This clan includes pyruvate dehydrogenases, branched chain alpha-keto acid decarboxylases, phosphoketolases and the pyrimidine binding region of transketolases. 2008-09-03 15:50:29 2006-07-31 17:23:48 8 598 271 5650 93998 1 +255 CL0255 ATP_synthase ATP synthase F0 subunit Mistry J anon This clan contains subunits of the F0 complex of ATP-synthase. The F0 complex is the non-catalytic unit of ATPase and is involved in proton translocation across membranes. 2008-09-03 15:50:29 2006-08-01 13:45:46 8 29 78 10710 23627 1 +256 CL0256 Enolase_TIM \N Enolase like TIM barrel Mistry J anon This clan contains enzymes which adopt a TIM barrel fold. 2008-09-03 15:50:29 2006-08-01 15:00:35 6 1004 67 6025 21543 1 +257 CL0257 Acetyltrans Acetyltrans-like; N-acetyltransferase like Mistry J anon This clan contains families related to N-acetyltransferases. N-acetyltransferases catalyse the transfer of acetyl groups from acetyl-CoA to arylamines. 2008-09-03 15:50:29 2006-08-02 14:16:08 8 664 967 6524 128815 1 +258 CL0258 DALR \N DALR superfamily Bateman A anon Members of this family are anticodon binding domains from various tRNA synthetases. 2008-09-03 15:50:29 2006-08-17 18:00:51 6 13 28 4898 11747 1 +259 CL0259 OstA \N OstA superfamily Bateman A anon This superfamily includes the OstA family as well as a large family of uncharacterised proteins. 2008-09-03 15:50:29 2006-08-22 10:05:45 6 11 32 2407 6093 1 +260 CL0260 NTP_transf \N Nucleotidyltransferase superfamily Bateman A anon This clan contains a diverse set of nucleotidyltransferase enzymes. 2008-09-03 15:50:29 2006-08-22 16:46:46 7 338 577 5508 42917 1 +261 CL0261 NUDIX NUDIX superfamily Bateman A anon This superfamily contains the NUDIX family and one related family. 2008-09-03 15:50:29 2006-08-24 18:58:30 6 360 321 5236 48986 1 +262 CL0262 Trigger_C \N Trigger factor/SurA domain Bateman A anon This helical domain is found in two families of chaperones. It is found at the N terminus of the SurA proteins and at the C-terminus of the trigger factors where presumably it shares a common but as yet unknown function. 2008-09-03 15:50:29 2006-08-30 10:54:44 6 20 54 4464 9322 1 +263 CL0263 His-Me_finger His-Me finger endonuclease superfamily Bateman A anon This superfamily defined originally by SCOP contains a diverse range of endonucleases. Later Grishin identified the MH1 domain as belonging to the superfamily [1]. 2008-09-03 15:50:29 2006-09-06 17:41:01 7 141 462 5328 22334 1 +264 CL0264 SGNH_hydrolase \N SGNH hydrolase superfamily Bateman A anon This superfamily contains a diversity of hydrolytic enzyme activities. 2008-09-03 15:50:29 2006-09-07 09:23:58 6 122 427 4610 21200 1 +265 CL0265 HIT \N HIT superfamily Bateman A anon The HIT superfamily are a superfamily of nucleotide hydrolases and transferases, which act on the alpha-phosphate of ribonucleotides [1]. 2008-09-03 15:50:29 2006-11-09 16:05:44 6 147 99 4930 15333 1 +266 CL0266 PH \N PH domain-like superfamily Bateman A anon Members of this clan share a PH-like fold. Many families in this clan bind to short peptide motifs in proteins and are involved in signalling. 2008-09-03 15:50:29 2006-11-10 10:42:30 8 419 1526 2586 41992 1 +267 CL0267 S11_L18p \N Ribosomal protein S11/L18p superfamily Bateman A anon This superfamily includes two ribosomal proteins S11 and L18p as well as a domain from eukaryotic peptide chain release factor. This superfamily is likely to share an RNA-binding function. 2008-09-03 15:50:29 2006-11-10 14:46:27 7 451 37 5925 12981 1 +268 CL0268 Pec_lyase-like Pec_lyase; Pectate_lyase; Pectate lyase-like beta helix Bateman A anon This superfamily all contain a right handed beta helix similar to that first found in pectate lyase [1]. 2008-09-03 15:50:29 2006-11-10 15:10:57 6 217 1681 4399 45644 1 +269 CL0269 Maf \N Maf/Ham1 superfamily Bateman A anon This superfamily includes the Maf-like proteins and ITPases related to YjjX [1]. 2008-09-03 15:50:29 2006-11-10 17:10:08 7 21 29 3802 6399 1 +270 CL0270 Iso_DH \N Isocitrate/Isopropylmalate dehydrogenase-like superfamily Bateman A anon This superfamily of enzymes form dimers and have an active site between the two halves. 2008-09-03 15:50:29 2006-11-10 17:50:17 6 323 67 6496 27220 1 +271 CL0271 F-box \N F-box-like domain Bateman A anon This clan includes classical F-boxes and the PRANC domain found in pox ankyrin proteins. 2008-09-03 15:50:29 2006-11-22 13:28:07 6 29 805 512 18595 1 +272 CL0272 RGS \N RGS-like superfamily Bateman A anon This clan includes RGS domains that possess an alpha helical fold. 2008-09-03 15:50:29 2006-11-27 17:23:03 6 92 152 303 4401 1 +273 CL0273 CYTH \N CYTH-like phosphatase superfamily Bateman A anon CyaB like adenylyl cyclase and the mammalian thiamine triphosphatases define a novel superfamily of catalytic domains called the CYTH domain that is present in all three superkingdoms of life. The catalytic core of these enzymes contain a novel alpha beta scaffold with 6 conserved acidic residues and 4 basic residues [1]. 2008-09-03 15:50:29 2006-12-05 13:50:23 6 48 23 3410 3949 1 +274 CL0274 WRKY-GCM1 \N WRKY-GCM1 superfamily Bateman A anon WRKY and GCM1 are metal chelating DNA-binding domains (DBD) which share a four stranded fold [1]. We present evidence that they share a stabilising core, which suggests a possible origin from a BED finger-like intermediate that was in turn ultimately derived from a C2H2 Zn-finger domain [1]. 2008-09-03 15:50:29 2006-12-05 13:57:43 7 5 215 443 6439 1 +275 CL0275 HAS-barrel \N HAS-barrel superfamily Bateman A anon The HAS barrel is named after HerA-ATP Synthase. In ATP synthases, this domain is implicated in the assembly of the catalytic toroid and docking of accessory subunits, such as the subunit of the ATP synthase complex. Similar roles in docking of the functional partner, the NurA nuclease, and assembly of the HerA toroid complex appear likely for the HAS-barrel of the HerA family [1]. 2008-09-03 15:50:29 2006-12-06 11:13:24 8 401 41 11124 22310 1 +276 CL0276 Nucleot_cyclase \N Nucleotide cyclase superfamily Bateman A anon This superfamily includes adenylyl cyclase and the GGDEF domain [1]. 2008-09-03 15:50:29 2006-12-06 14:41:38 7 119 2538 3679 52683 1 +277 CL0277 FAD-oxidase_C \N FAD-linked oxidase C-terminal domain superfamily Bateman A anon This clan consists of a duplicated subdomain in a variety of FAD-liked oxidase/dehydrogenase enzymes. 2008-09-03 15:50:29 2006-12-07 10:48:02 6 129 128 3530 14475 1 +278 CL0278 AIG2 \N AIG2/ChaC-like superfamily Bateman A anon The structure consists of a five-stranded beta-barrel surrounded by two alpha-helices and a small beta-sheet.\ Conservation of residues in a hydrophilic cavity able to bind small ligands in some members suggests that this may also serve as an active site. 2008-09-03 15:50:29 2006-12-12 14:39:36 7 20 48 2219 4183 1 +279 CL0279 GatB_YqeY \N YqeY-like superfamily Bateman A anon This superfamily includes a domain from GatB as well as one from YqeY. Although being structurally distinct they share a common sequence relationship. 2008-09-03 15:50:29 2007-01-24 12:42:11 6 38 17 4060 6337 1 +280 CL0280 PIN \N PIN domain superfamily Bateman A anon This superfamily contains a variety of nuclease enzymes, including PIN domains and the FLAP exonucleases. 2008-09-03 15:50:29 2007-01-25 17:50:12 6 117 241 5231 29721 1 +281 CL0281 CCT \N CCT like-motif Bateman A anon This clan includes the CCT motif as well as a related motif that is similar to the first half of the CCT motif. 2008-09-03 15:50:29 2007-01-29 13:51:28 6 21 28 152 1759 1 +282 CL0282 Serum_albumin \N Serum albumin superfamily Bateman A anon This superfamily includes serum albumin and related families. 2008-09-03 15:50:29 2007-01-30 15:32:33 6 113 8 71 738 1 +283 CL0283 LigB \N LigB-like superfamily Bateman A anon This clan includes the LigB subunit of the aromatic ring opening dioxygenase LigAB [1]. The clan also includes the Memo-like proteins. 2008-09-03 15:50:29 2007-02-12 17:08:37 6 13 17 2285 4015 1 +284 CL0284 Allatostatin \N Allatostatin superfamily Mistry J anon Allatostatins are pleiotropic neuropeptides. In some insects they are known to inhibit the synthesis of juvenile hormone, an important regulator of development and reproduction. The full role of allatostatins in hormone production is still unclear [1]. 2008-09-03 15:50:29 2007-05-04 14:22:41 5 0 16 41 455 1 +285 CL0285 YycI_YycH \N YycI/YycH superfamily Bateman A anon Both, YycH and YycI are always found in a pair on the chromosome, downstream of the essential histidine kinase YycG. Additionally, both proteins share a function in regulating the YycG kinase with which they appear to form a ternary complex. Structural studies show that these two protein families share two related domains. 2008-09-03 15:50:29 2007-05-14 11:16:06 6 13 4 723 1328 1 +286 CL0286 GCS \N gamma-glutamylcysteine synthetase/glutamine synthetase clan Bateman A, Pei J anon This clan represents a superfamily of carboxylate-amine/ammonia ligases [1] that includes Gamma-Glutamylcysteine synthetase (gamma-GCS) and glutamine synthetase (GS). Gamma-Glutamylcysteine synthetase (gamma-GCS) catalyses the first step in the de novo biosynthesis of glutathione. 2008-09-03 15:50:29 2007-08-10 14:04:49 5 394 88 9114 22193 1 +287 CL0287 Transthyretin \N Transthyretin superfamily Bateman A anon This clan unifies several SCOP superfamilies that all share a 7 stranded beta sandwich fold. 2008-09-03 15:50:29 2007-08-14 16:23:44 6 929 1227 4080 40864 1 +288 CL0288 DAP_epimerase \N DAP epimerase superfamily Bateman A anon This superfamily includes DAP epimerase and proline racemase as well as the PrpF protein. It has been suggested that this fold may have evolved from the HotDog fold [1]. 2008-09-03 15:50:29 2007-08-17 11:58:37 5 61 43 3955 13393 1 +289 CL0289 FBD \N Folate binding domain Bateman A anon This folate binding domain is found in the GCV T protein as well as the sarcosine oxidase gamma subunit [1]. 2008-09-03 15:50:29 2007-08-17 13:17:41 5 63 59 4863 12811 1 +290 CL0290 EPT_RTPC \N EPT/RTPC-like superfamily Bateman A anon This superfamily includes Enolpyruvate transferase (EPT) and RNA 3'-terminal phosphate cyclase (RTPC). 2008-09-03 15:50:29 2007-08-17 13:33:07 5 195 54 4992 12126 1 +291 CL0291 KNTase_C Nucleotidyltransferase substrate binding domain Bateman A anon This alpha helical domain is found associated with a variety of nucleotidyltransferase domains. 2008-09-03 15:50:29 2007-08-20 15:49:55 6 35 91 3566 11834 1 +292 CL0292 LysE LysE transporter superfamily Bateman A anon This clan includes a diverse range of transporter families [1]. 2008-09-03 15:50:29 2007-10-04 12:03:53 5 0 68 4993 52424 1 +293 CL0293 CDC \N Cholesterol-dependent cytolysin superfamily Bateman A anon This superfamily includes the MACPF domain as well as the Cholesterol-dependent cytolysins [1]. 2008-09-03 15:50:29 2007-10-19 12:42:17 5 21 68 720 2339 1 +294 CL0294 Sec10 \N Sec10-like superfamily Bateman A anon This superfamily includes large proteins that are parts of the conserved oligomeric Golgi complex and exocyst complex. 2008-09-03 15:50:29 2007-11-20 17:02:32 5 0 34 298 1198 1 +295 CL0295 Vps51 Vps51 domain superfamily Bateman A anon This clan includes an N-terminal domain from several vesicle transport proteins that are related to Vps51. 2008-09-03 15:50:29 2007-11-20 17:11:52 6 11 168 380 6736 1 +296 CL0296 GroES \N GroES-like superfamily Bateman A anon This superfamily includes the GroES protein as well as the N-terminal GroES-like domain from Alcohol dehydrogenase. 2008-09-03 15:50:29 2008-02-08 16:40:31 4 569 527 6115 49027 1 +297 CL0297 PhoU \N PhoU-like superfamily Bateman A anon This superfamily includes PhoU and its relatives that contain a three helical bundle domain structure. 2008-09-03 15:50:29 2008-02-08 17:40:34 4 21 32 4083 13356 1 +298 CL0298 tRNA_bind_arm \N tRNA-binding arm superfamily Bateman A anon This domain is found in Phe and Ser tRNA synthetases at the N-terminus, and at the C-terminus of Val tRNA synthetase. The domain is composed of two helices. 2008-09-03 15:50:29 2008-02-12 16:57:18 4 43 35 4877 13679 1 +299 CL0299 Peptidase_SF \N Peptidase clan SF Mistry J, Rawlings N anon This clan includes the peptidase S24 and S26 families. These families adopt a mainly beta fold. Members of the family S24 have an additional C-terminal domain containing a bundle of three helices presumably important for binding DNA. 2008-09-03 15:50:29 2008-02-25 12:55:51 4 47 75 5096 20383 1 +300 CL0300 TAT \N Twin-Arginine Translocation Motif Bateman A anon This motif is found in a wide range of secreted proteins. It is named after the conserved pair of arginines that is followed by a hydrophobic stretch. 2008-09-03 15:50:29 2008-02-29 15:32:23 4 26 172 1437 2441 1 +301 CL0301 PA14 \N PA14 superfamily Bateman A anon This clan includes the PA14 domain and related families. 2008-09-03 15:50:29 2008-03-03 17:31:48 5 32 336 731 2126 1 +302 CL0302 Arginase \N Arginase/deacetylase superfamily Bateman A anon This superfamily includes arginase enzymes as well as histone deacetylases and related enzymes [1]. 2008-09-03 15:50:29 2008-04-30 14:26:28 5 410 86 3456 11530 1 +303 CL0303 H2TH \N Helix-two-turns-helix superfamily Bateman A anon This domain is thought to play a role in binding nucleic acids. It is DNA binding in nucleases and RNA-binding in ribosomal S13. 2008-09-03 15:50:29 2008-05-28 16:24:30 4 289 53 5430 13968 1 +304 CL0304 CheY \N CheY-like superfamily Bateman A anon This clan includes the CheY-like response regulators from bacteria [1-2]. 2008-09-03 15:50:29 2008-06-04 14:50:30 4 475 3478 5545 154594 1 +305 CL0305 PTH2 \N Peptidyl-tRNA hydrolase II superfamily Bateman A anon This clan includes Peptidyl-tRNA hydrolase II as well as a large family of uncharacterised proteins called DUF2000. A structure for DUF2000 shows a similar structure to PTH2. It is not clear if the DUF2000 family are also Peptidyl-tRNA hydrolases. Both families contain a conserved positively charged residue close to the amino terminus that may be part of the active site. 2008-09-03 15:50:29 2008-06-05 14:00:28 4 24 15 963 1328 1 +306 CL0306 HeH \N LEM/SAP HeH motif Bateman A anon This superfamily includes protein domains with the helix-extended loop-helix (HeH) structure. 2008-09-03 15:50:29 2008-09-03 10:49:37 3 74 240 4102 7665 1 +307 CL0307 FUSC Fusaric acid resistance protein-like superfamily Bateman A anon Members of this clan are likely to be integral membrane bound transporters. 2008-09-05 13:12:16 2008-09-05 14:12:16 3 0 73 3485 13824 1 +308 CL0308 DMSO_reductase \N Dimethyl sulfoxide reductase type II family Coggill P anon This clan includes members that are type II dimethyl sulfoxide reductase families, all of which are also membrane anchor proteins belonging to the iron-sulfur molybdoenzyme (CISM) family [1]. 2008-09-23 15:56:29 2008-09-23 16:56:29 3 8 25 1456 4376 1 +310 CL0310 DinB DinB-like superfamily Bateman A anon This superfamily are thought to be metalloenzymes. They possess a four helical bundle core structure with a beta hairpin. Members of the superfamily have a predicted active site composed of three histidines that chelate Nickel or Zinc. In some cases these histidines are replaced with Aspartate or Glutamate. Mostly they form a dimeric structure. The dinB gene is one of the DNA-damage-induced genes and the corresponding protein, DinB, is the founding member of the clan. The protein contains a four-helix up-down-down-up bundle that has previously been described in the literature in three disparate proteins: the enzyme MDMPI (mycothiol-dependent maleylpyruvate isomerase), YfiT and TTHA0303, a member of a small DUF (domain of unknown function). Most (but not all) clan members seem to have the ability to coordinate a metal ion using a conserved histidine-triad motif. The proteins that share the fold exhibit four different quaternary structures: monomeric and three different dimeric forms [1]. 2008-10-08 12:01:30 2008-10-08 13:01:30 3 36 73 2472 9715 1 +311 CL0311 SCP2 \N SCP-2 sterol transfer superfamily Bateman A anon This superfamily includes the SCP2 family as well as a domain from the mycothiol dependent maleylpyruvate isomerase. 2008-10-13 14:19:21 2008-10-13 15:19:21 3 29 62 2115 4976 1 +312 CL0312 HemS_ChuX \N Heme iron utilization protein-like superfamily Bateman A anon This superfamily includes HemS and ChuX like protein families. 2008-10-22 16:49:13 2008-10-22 17:49:13 3 13 11 760 1473 1 +314 CL0314 PP-binding \N ACP-like superfamily Bateman A anon \N 2008-11-19 12:30:53 2008-11-19 12:30:53 3 127 3226 5406 42314 1 +315 CL0315 Gx_transp Gx transporter superfamily Bateman A anon This superfamily includes a wide range of transporters that contain many conserved glycine residues in the presumed transmembrane regions. 2008-11-21 16:40:49 2008-11-21 16:40:49 3 4 132 4338 23854 1 +316 CL0316 Acyl_transf_3 \N Membrane acyl transferase superfamily Bateman A anon This superfamily includes a wide variety of integral membrane acyltransferase enzymes that often acylate sugars. 2008-11-24 13:14:42 2008-11-24 13:14:42 3 0 72 4090 17608 1 +317 CL0317 Multiheme_cytos Multiheme cytochrome superfamily Coggill P anon This family includes cytochromes that contain multiple CxxCH motifs. 2008-12-10 17:38:20 2008-12-10 17:38:20 3 247 280 1655 11292 1 +318 CL0318 Cytochrome-c Cytochrome c superfamily Coggill P anon This family includes proteins where a covalently-bound haem completes the core. The core is three helices in an open folded leaf formation. The members are monodomain cytochromes. 2008-12-12 14:30:33 2008-12-12 14:30:33 3 552 457 3452 28903 1 +319 CL0319 SHS2 Rob_SOUL; SHS2 domain Bateman A, Anantharaman V anon SHS2 is a novel domain with a simple fold containing a core of 3 strands, forming a curved sheet, and a single helix in a strand-helix-strand-strand (SHS2) configuration [1]. SHS2 is found in the bacterial cell division ATPase FtsA, the archaeo-eukaryotic RNA polymerase subunit Rpb7p, the GyrI superfamily, and the uncharacterized MTH1598/Tm1083-like proteins [1]. The fold exists as single copy versions in FtsA (where it is inserted into the RNAseH fold), Rbp7p and Dodecin [1]. It is found as a diad in the GyrI superfamily. In MTH1598/Tm1083-like proteins two copies of SHS2 are found with one inserted into another [1]. The single-copy versions in FtsA and Rbp7 mediate protein–protein interactions, while the one in Dodecin is a small molecule binding domain. The GyrI also binds small molecule, while the MTH1598 is predicted to be enzymatic [1]. 2008-12-12 19:39:43 2008-12-12 19:39:43 3 250 83 4602 13173 1 +320 CL0320 PepSY \N PepSY domain-like superfamily Bateman A anon This family includes the PepSY domain as well as a family of uncharacterised proteins. 2008-12-15 13:39:08 2008-12-15 13:39:08 3 53 124 3218 10624 1 +321 CL0321 PLAT \N PLAT domain like superfamily Bateman A anon This domain has an 8-stranded sandwich structure. 2008-12-15 14:40:00 2008-12-15 14:40:00 3 91 192 284 3462 1 +322 CL0322 RND_permease \N RND permease superfamily Bateman A anon Different members of the RND superfamily have been shown to transport hydrophobic drugs, fatty acids, bile salts, organic solvents, heavy metals, autoinducers and lipooligosaccharides in bacteria [1]. 2008-12-18 12:52:06 2008-12-18 12:52:06 3 91 154 4816 43065 1 +323 CL0323 Patatin \N Patatin/FabD/lysophospholipase-like superfamily Bateman A anon This superfamily of enzymes contains a Ser/Asp catalytic dyad. Members of this superfamily are all serine acylhydrolase enzymes. 2008-12-18 16:43:32 2008-12-18 16:43:32 3 75 1413 5254 25679 1 +324 CL0324 Homing_endonuc \N Homing endonuclease-like superfamily Bateman A anon This superfamily includes a variety of LAGLIDADG-like homing endonuclease like families. 2009-01-08 17:06:53 2009-01-08 17:06:53 3 109 309 2676 6450 1 +325 CL0325 Form_Glyc_dh \N Formate/glycerate dehydrogenase catalytic domain-like superfamily Bateman A anon This superfamily includes the catalytic domain of a variety of dehydrogenase enzymes. The domain has a flavodoxin-like fold and contains an inserted Rossman fold NAD-binding domain. 2009-01-09 10:02:56 2009-01-09 10:02:56 3 370 93 4952 24576 1 +326 CL0326 Reo_sigma \N Virus attachment protein superfamily Bateman A anon This superfamily includes virus attachment proteins that share a common beta sandwich domain. 2009-01-09 10:44:19 2009-01-09 10:44:19 3 291 20 282 678 1 +327 CL0327 Pilus \N Pilus subunit Mistry J anon This is a clan contains bacterial pilus subunits and proteins involved in secretion. Pili proteins enable the transfer of plasmid between bacteria. The families in this clan adopt an alpha helical structure which is packed against a beta sheet [2-3]. 2009-01-12 11:15:14 2009-01-12 11:15:14 3 115 552 3218 21160 1 +328 CL0328 2heme_cytochrom Transmembrane di-heme cytochrome superfamily Bateman A anon This superfamily includes a variety of different heme binding cytochromes. 2009-05-08 19:48:23 \N 3 111 270 42880 138619 1 +329 CL0329 S5 \N Ribosomal protein S5 domain 2-like superfamily Bateman A anon This superfamily contains a wide range of families that possess a structure similar to the second domain of ribosomal S5 protein. 2009-05-08 19:52:37 \N 3 764 344 6126 79017 1 +330 CL0330 AVL9 Late secretory pathway transport machinery Bateman A anon Members of this clan are involved in vesicle formation/trafficking. 2009-05-08 19:52:58 \N 3 2 129 304 2924 1 +331 CL0331 EpsM General secretion pathway protein M Coggill P anon These families are involved in the general secretory pathways of bacteria and are normally membrane-bound. 2009-05-08 19:53:19 \N 3 6 33 1882 7823 1 +332 CL0332 AcetylDC-like Acetyl-decarboxylase like superfamily Coggill P anon These families are double psi-beta barrel structures. 2009-05-08 19:53:39 \N 3 175 114 3736 19949 1 +333 CL0333 gCrystallin gCrystallin-like; Gamma-Crystallin-like superfamily Coggill P anon This superfamily includes a number of mammalian crystallins as well as ancestral beta gamma-crystallin precursor structures. 2009-05-08 19:54:54 \N 3 107 65 320 3580 1 +334 CL0334 THBO-biosyn Tetrahydrobiopterin biosynthesis-like enzyme superfamily Coggill P anon The families in this clan bind purine or ptein in topologically similar sites between subunits. 2009-05-08 19:57:58 \N 3 423 52 4568 17543 1 +335 CL0335 FumRed-TM Fumarate reductase respiratory complex transmembrane subunits Coggill P anon This superfamily constitutes two distinct families: in one family the common fold is contained in a single-chain subunit, in the other it is formed by two chains. 2009-05-08 19:58:20 \N 3 154 20 3092 7355 1 +336 CL0336 FMN-binding FMN-binding split barrel superfamily Coggill P anon This includes those related to the ferredoxin reductase-like FAD-binding domain and those that are Pyridoxine 5'-phosphate oxidase (PNP)-like. 2009-05-08 19:58:45 \N 3 224 139 4634 22171 1 +337 CL0337 RF \N Release factor superfamily Coggill P anon These families are peptide chain release factors. 2009-05-08 19:59:09 \N 3 19 24 4802 12227 1 +339 CL0339 PFL-like PFL-like glycyl radical enzyme superfamily Coggill P anon The N- and C-terminal halves of the structure have similar topologies but in some cases only one is represented by the members here, viz; the C-terminal domain of the R1 subunit of ribonucleotide reductase, and the N-terminal of PFL. The full-length structure is modelled by NRDD. 2009-05-08 19:59:52 \N 3 156 130 5379 23959 1 +340 CL0340 PTase-anion_tr Phosphotransferase/anion transport protein superfamily Coggill P anon The families here are the cytoplasmic regions of anion transporter proteins. 2009-05-08 20:00:18 \N 3 25 126 3482 19584 1 +341 CL0341 LDH_C \N LDH C-terminal domain-like superfamily Bateman A anon This superfamily includes the C-terminal domain of lactate/malate dehydrogenase as well as the C-terminal domain of the glycosyl hydrolase 4 family. 2009-05-08 20:00:39 \N 3 454 19 7644 15125 1 +342 CL0342 TolB_N TolB, N-terminal domain Bateman A anon Members of this superfamily appear to behave like the N-terminal fold of the TolB transport-portal complex protein, which is beta-stranded. 2009-05-08 20:00:59 \N 3 23 67 2109 6978 1 +343 CL0343 MHC MHC antigen-recognition domain Coggill P anon This superfamily includes all the Class I-related antigen-recognition domain families. 2009-05-08 20:01:29 \N 3 1002 27 843 46029 1 +344 CL0344 4Fe-4S 4Fe-4S ferredoxins Bateman A anon Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 2009-05-08 20:01:52 \N 3 366 2266 6690 129668 1 +345 CL0345 Aerolisin_ETX Aerolysin/ETX pore-forming domain superfamily Coggill P anon This superfamily includes pore-forming venoms and toxins from bacteria, plants, insects and fish. 2009-05-08 20:02:15 \N 3 26 17 136 416 1 +346 CL0346 Ribo_L29 \N Ribosomal protein L29, L29p, superfamily Coggill P anon Superfamily includes Ribosomal protein L29 family and its corresponding mitochondrial ribosomal family, L47. 2009-05-08 20:02:37 \N 3 242 12 4897 5412 1 +347 CL0347 Tetraspannin Tetraspannin-like Mistry J, Finn RD anon This clan includes the tetraspanin family which contains four transmembrane regions. The CD20 family also has four transmembrane regions, but its members are not considered true tetraspanins as they lack nearly all of the key functional tetraspanin residues [1]. 2009-05-08 20:04:53 \N 3 8 50 746 6471 1 +348 CL0348 Phage_tail Phage virion morphogenesis superfamily Finn RD, Coggill P anon Families involved in joining the tail to the head of the phage as well as those completing the head are included herein. 2009-05-08 20:05:12 \N 3 0 8 1841 2993 1 +349 CL0349 DprA MoCo carrier protein-like superfamily Bateman A anon Known family members of this superfamily are required for natural chromosomal and plasmid transformation. DprA is a new member of the recombination-mediator protein family, dedicated to natural bacterial transformation [1]. Superfamily includes lysine_decarboxylases. 2009-05-08 20:05:29 \N 3 59 53 4587 10964 1 +350 CL0350 PRC-barrel \N PRC-barrel like superfamily Finn RD, Coggill P anon The PRC-barrel is an all beta barrel domain found in the photosynthetic reaction centre subunit H of the purple bacteria [1]. 2009-05-08 20:05:48 \N 3 122 34 4098 7238 1 +351 CL0351 CHCH Coiled-coil helix coiled-coil helix superfamily Bateman A anon The conserved [coiled coil 1]-[helix 1]-[coiled coil 2]-[helix 2] domain (CHCH domain) superfamily members include NADH-ubiquinone oxidoreductases, some cytochrome oxidases and yeast mitochondrial ribosomal proteins. Within each helix of the CHCH domain there are two cysteines present in a C-X9-C motif. 2009-05-08 20:06:07 \N 3 58 39 353 2801 1 +352 CL0352 EsxAB WXG100-A/WXG100-B dimer Finn RD anon The WXG100 protein secretion system (Wss) is responsible for the secretion of WXG100 proteins (PF06013), such as ESAT-10 (6 kDa early secreted antigenic target) and CFP-10 (10 kDa culture filtrate protein) in Mycobacterium tuberculosis or EsxA (ESAT-6-like extracellularly secreted protein A) and EsxB in Staphylococcus aureus. These two proteins, generally encoded in the same gene cluster, form a 1:1 heterodimeric complex. These proteins are virulence factors involved in host-pathogen interaction [1], as demonstrated in Mycobacterium tuberculosis, Staphylococcus aureus or Bacillus anthracis. The Wss is encoded in many other Gram-positive (monoderm) bacteria. This superfamily contains a number of DUFs which are closely related and may or may not represent the same family of proteins. 2009-05-08 20:06:25 \N 3 34 135 901 12295 1 +353 CL0353 TIMP-like \N TIMP-like superfamily Bateman A anon This superfamily consists of the C-terminal domains of netrins, complement proteins C3, C4, C5, secreted frizzled-related proteins, and type I procollagen C-proteinase enhancer proteins, as well as the homologous N-terminal domains of tissue inhibitors of metalloproteinases (TIMPs). 2009-05-08 20:06:43 \N 3 69 60 208 1675 1 +354 CL0354 bBprotInhib \N beta-Barrel protease inhibitors Coggill P anon Superfamily consists of both metalloprotease- inhibitors and staphostatins. 2009-05-08 20:07:02 \N 2 12 6 424 573 1 +355 CL0355 CheC-like CheC-like superfamily Bateman A, Tuff TJ anon The chemotactic response regulator superfamily are CheY-P phosphatases. Their structure is two intertwined alpha-beta-(X)-beta(2) motifs. This superfamily comprises two classes of proteins each shown to interact with the chemotaxis response regulator CheY: the FliM switch proteins and the CheC-type phosphatases [1]. FliM is a component of the flagellar switch found across the bacteria and is responsible for binding CheY-P and changing the rotational direction of the flagella. The N-terminal domain is CheC-like and the C-terminal shares the SpoA domain with FliN and FliY. The CheC family is broadly broken down into three phosphatase subfamilies: CheC, CheX, and FliY. All three have an active site consensus sequence of D/S-X(3)-E-X(2)-N-X(22)-P. 2009-05-08 20:07:19 \N 3 20 27 2027 5059 1 +356 CL0356 AMP_N-like Creatinase/prolidase N-terminal domain superfamily Coggill P anon Bacterial amino-peptidases and creatinases, where the fold is a ribonuclease H-like motif, are grouped in this superfamily. 2009-05-08 20:07:37 \N 3 91 34 4808 10134 1 +357 CL0357 SMAD-FHA SMAD/FHA domain superfamily Bateman A anon Superfamily members carry a few short helices inserted in loops within the 11 strands in 2 sheets (greek-key) of the parent fold. 2009-05-08 20:07:54 \N 3 135 399 2102 12462 1 +359 CL0359 Intron-mat_II \N Type II intron maturase-like superfamily Bateman A anon Superfamily includes a variety of transcription factors that bind intron RNA during reverse transcription and splicing. 2009-05-08 20:08:30 \N 3 14 51 26168 39344 1 +360 CL0360 MTH1187-YkoF MTH1187/YkoF-like superfamily Coggill P anon Putative cell-wall biogenesis proteins and HMP-binding proteins, all with the same Ferredoxin fold, are included in this superfamily. 2009-05-08 20:08:47 \N 3 35 7 1666 2146 1 +361 CL0361 C2H2-zf Classical C2H2 and C2HC zinc fingers Coggill P anon Superfamily of classical and closely related C2H2 or beta-beta-alpha zinc finger DNA-binding domains. 2009-05-08 20:09:05 \N 3 241 8637 2337 328673 1 +362 CL0362 RAMPS-Cas5-like CRISPR-associated (Cas) Repair Associated Mysterious Proteins Bateman A, Coggill P anon This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats [1]. It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation [2]. Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers [3]. 2009-05-08 20:09:23 \N 3 24 24 1529 4925 1 +363 CL0363 H-int Hedgehog/intein (Hint) superfamily Bateman A anon This superfamily includes Hedgehog C-terminal (Hog) autoprocessing domain and Intein (protein splicing domain) families. 2009-05-08 20:09:41 \N 3 29 477 1142 3298 1 +364 CL0364 Leu-IlvD \N LeuD/IlvD-like Bateman A anon Superfamily includes LeuD-like, IlvD/EDD C-terminal domain-like, and AF0055-like families. 2009-05-08 20:09:58 \N 3 36 36 4411 10971 1 +365 CL0365 MurF-HprK_N MurF and HprK N-domain-like superfamily Bateman A anon This includes both the MurE/MurF-ligases N-terminal domain and HPr kinase/phosphatase HprK N-terminal domain superfamilies. 2009-05-08 20:10:16 \N 3 14 40 3206 4809 1 +366 CL0366 JAB Mov34-like; JAB-like superfamily Bateman A, Iyer LM, Zhang D, Aravind L anon This superfamily includes a number of proteasome regulatory subunits, eukaryotic initiation factor 3 (eIF3) subunits, regulators of transcription factors and ubiquitination-assisting protein families. In eukaryotes and in prokaryotic cognates of the ubiquitin-based modification pathway, they function as ubiquitin isopeptidases/ deubiquitinases. JAB domains are also found in diverse metabolic pathways in prokaryotes such as siderophore and cysteine biosynthesis. Other distinct versions of the JAB domain, such as RadC are predicted to function as nucleases. Structurally, the JAB domain is related to the nucleotide deaminase and binds a Zinc ion in a similar structural location. 2009-05-08 20:10:33 \N 3 35 121 4285 10806 1 +367 CL0367 CI-2 CI-2 family of serine protease inhibitors Bateman A anon This superfamily includes a range of universally found subtilases, that are serine proteases. 2009-05-08 20:10:50 \N 3 54 6 411 761 1 +368 CL0368 PhosC-NucP1 \N Phospholipase C/P1 nuclease superfamily Coggill P anon This superfamily includes the Phospholipase C and P1-nuclease families. 2009-05-08 20:11:07 \N 3 26 29 2543 5327 1 +369 CL0369 GHD Glycosyl hydrolase domain superfamily Bateman A anon This domain is C-terminal to the catalytic beta/alpha barrel domain. The superfamily includes the C-terminal domain of a number of sugar-lytic families. 2009-05-08 20:11:24 \N 3 345 276 4003 18678 1 +370 CL0370 Uteroglobin Uteroglobin-like superfamily Coggill P anon Members of this superfamily are disulfide-linked dimers of two identical chains, with 4 helices in each. They constitute important new cat, rat and rabbit allergens that are contributing to asthma world-wide. 2009-05-08 20:11:41 \N 3 12 2 47 309 1 +371 CL0371 Inovirus-Coat \N Inovirus (filamentous phage) major coat protein Coggill P anon Superfamily contains a number of filamentous phage coat-protein families. 2009-05-08 20:11:58 \N 3 29 3 67 82 1 +372 CL0372 Hy-ly_N \N Hyaluronate lyase-like catalytic, N-terminal domain Bateman A anon This contains virus envelope protein, Chondroitin AC lyase and hyaluronate lyase families. 2009-05-08 20:12:16 \N 3 44 53 774 1055 1 +373 CL0373 Phage-coat Phage coat superfamily Coggill P anon A number of different phage coat-proteins are collected together in this superfamily. 2009-05-08 20:12:33 \N 3 76 25 2757 4895 1 +374 CL0374 PEP-carboxyk \N PEP carboxykinase-like superfamily Bateman A anon This includes the PEP carboxykinase C-terminal domain and HPr kinase HprK C-terminal domain families. 2009-05-08 20:12:50 \N 3 100 16 5373 7630 1 +375 CL0375 Transporter Transporter superfamily Bateman A anon The members of this superfamily are probably all transporter protein domains. 2009-05-08 20:13:08 \N 3 0 42 409 5210 1 +376 CL0376 Oxa1 Cytochrome oxidase biogenesis family Bateman A anon The cytochrome oxidase biogenesis families are membrane transporters akin to the E coli protein YidC. For those proteins whose N-termini must reside in the intermembrane space, export is mediated by the Oxa1p export machinery, machinery that depends upon the membrane potential. Qxa1p homologues are found in all living organisms. TCDB:2.A.9. 2009-05-08 20:13:27 \N 3 0 30 4817 6863 1 +377 CL0377 FAH \N Fumarylacetoacetate hydrolase, C-terminal domain, superfamily Bateman A anon Superfamily contains fumarylacetoacetate hydrolase and related enzymes, 2009-05-08 20:13:45 \N 3 104 39 3513 10941 1 +378 CL0378 ANL Ac-CoA-synth; ANL superfamily Bateman A anon This superfamily consists of enzymes including luciferase, long chain fatty acid Co-A ligase, acetyl-CoA synthetase and various other closely-related synthetases as well as a plant auxin-responsive promoter family. The name ANL derives from from three of the subfamilies - Acyl-CoA synthetases, the NRPS adenylation domains, and the Luciferase enzymes [1]. Members of this superfamily catalyse the initial adenylation of a carboxylate to form an acyl-AMP intermediate, followed by a second partial reaction, most commonly the formation of a thioester [1]. 2009-05-08 20:14:02 \N 3 149 2269 6257 68785 1 +379 CL0379 PgaPase \N Pyroglutamate aminopeptidase superfamily Bateman A anon This is a collection of pyrrolidone carboxyl peptidase or pyroglutamate aminopeptidase families from bacteria and archaea. 2009-05-08 20:14:20 \N 3 60 16 1654 2070 1 +380 CL0380 IDO-like Indolic compounds 2,3-dioxygenase-like superfamily Bateman A anon Superfamily contains bacterial tryptophan 2,3-dioxygenase and indoleamine 2,3-dioxygenase-like families. 2009-05-08 20:14:37 \N 3 60 18 837 1644 1 +381 CL0381 Metallo-HOrase \N Metallo-hydrolase/oxidoreductase superfamily Bateman A anon This superfamily of enzymes including beta-lactamases, thiolesterases, members of the glyoxalase II family that catalyse the hydrolysis of S-D-lactoyl-glutathione to form glutathione and D-lactic acid all bind two ions of zinc. An additional family of competence proteins essential for natural transformation do not appear to bind zinc, and might be a transporter involved in DNA uptake. 2009-05-08 20:14:54 \N 3 403 305 5380 50665 1 +382 CL0382 DNA-mend \N DNA breaking-rejoining enzyme superfamily Bateman A anon This is a superfamily of DNA recombinases, topoisomerases and integrases. 2009-05-08 20:15:13 \N 3 116 209 6017 46990 1 +383 CL0383 PheT-TilS Phenylalanine- and lysidine-tRNA synthetase domain superfamily Coggill P anon Families here are thought to contain a putative tRNA-binding structural motif. The families are the C-terminal domains of tRNA-Ile-lysidine and the phenylalanine-tRNA synthetases. 2009-05-08 20:15:30 \N 3 31 54 4735 8465 1 +384 CL0384 PLC PLC-like phosphodiesterases Coggill P anon Superfamily consists of Glycerophosphoryl diester phosphodiesterase and phosphatidylinositol-specific phospholipase C families. 2009-05-08 20:15:48 \N 3 108 279 4173 15000 1 +385 CL0385 Hydrophilin \N Hydrophilin-like superfamily Coggill P anon This superfamily includes plant and bacterial hydrophilin families. 2009-05-08 20:16:05 \N 3 0 17 752 2456 1 +386 CL0386 Ant-toxin_C \N Superantigen toxins, C-terminal domain superfamily Coggill P anon Superfamily contains bacterial super-antigen toxins and the MAP family. 2009-05-08 20:16:23 \N 3 216 9 194 4767 1 +387 CL0387 DHFred \N Dihydrofolate reductase-like Coggill P anon Superfamily contains the dihydrofolate reductases and the RibD C-terminal domain-like including HTP reductase families. 2009-05-08 20:16:40 \N 3 474 41 4883 12238 1 +388 CL0388 FadR-C \N Fatty acid responsive transcription factor FadR, C-terminal domain Coggill P anon Superfamily includes C-terminal domain ligand-binding GntR families and families of fatty acid responsive transcription factors. This C-terminal domain, an antiparallel array of six alpha helices, forms a barrel-like structure, while a seventh alpha helix forms a 'lid' at the end closest to the N-terminal domain - a separate, DNA-binding winged-helix, domain. 2009-05-08 20:16:57 \N 3 24 47 3168 21611 1 +389 CL0389 TRAF \N TRAF domain-like superfamily Coggill P anon Superfamily has a circularly permuted immunoglobulin-fold topology with extra an extra beta-strand. Families include the Math and the SIAH, or Seven in absentia, members. 2009-05-08 20:17:16 \N 3 106 201 1094 6315 1 +390 CL0390 zf-FYVE-PHD FYVE/PHD zinc finger superfamily Coggill P anon Superfamily contains a number of zinc-fingers, of the FYVE/PHD type, which are found in several groups of proteins including myelin-associated oligodendrocytic basic proteins (MOBP) Rabphilins, melanophilins, exophilins and myosin-VIIA and Rab-interacting protein families. 2009-05-08 20:17:33 \N 3 184 1096 1231 20485 1 +391 CL0391 CAP_C-like Adenylate cyclase associated (CAP) C terminal like Mistry J anon Families in this clan adopt a beta super helix structure [1-2]. The clan includes the C terminal domain of adenylate cyclase which binds binds actin [1]. 2009-05-08 20:17:50 \N 3 14 29 324 1064 1 +392 CL0392 Chaperone-J \N Chaperone J-domain superfamily Coggill P anon The J-domain is found in a number of stress-response proteins. It is found at the N-terminal of Hsc20, DnaJ-chaperone in E. coli, and viral large T-antigen proteins; it is also in Hsc40, mammalian auxilin and in both animal and plant DnaJ proteins. It is also found in degenerate form in Pam16 proteins. 2009-05-08 20:18:07 \N 3 72 636 5592 28318 1 +393 CL0393 FucI-AraA_C \N FucI/AraA C-terminal domain-like [50443] Coggill P anon The enzymes in this superfamily function as a hexamer, which is the largest structurally known ketol isomerase, that has no sequence or structural similarity to other ketol isomerases. 2009-05-08 20:18:26 \N 3 24 7 1558 2470 1 +394 CL0394 DsrEFH-like \N DsrEFH-like superfamily Bateman A anon This is a superfamily of small proteins from phototrophic sulfur bacteria that are involved in oxidisation of intracellular sulfur. 2009-05-08 20:18:45 \N 3 54 21 2423 6201 1 +395 CL0395 Tubby_C Tubby C-terminal domain-like Bateman A anon This superfamily contains the scramblase protein family, the Tub family and the DUF567, a family of plant and bacterial proteins of hitherto unknown function. All members are membrane-tethered transcription factors. 2009-05-08 20:19:04 \N 3 9 44 886 2617 1 +396 CL0396 Marvel-like MARVEL domain containing superfamily Bateman A anon The MAL and related proteins for vesicle trafficking and membrane link (MARVEL) domain is a module with a four transmembrane-helix architecture that has been identified in proteins of the myelin and lymphocyte (MAL), physins, gyrins and occludin families. 2009-05-08 20:19:22 \N 3 0 28 346 3075 1 +397 CL0397 TusA-like SirA-like; TusA-like superfamily Bateman A anon Member families include sulfurtransferase TusA. 2009-05-08 20:19:39 \N 3 8 61 3075 6124 1 +398 CL0398 RMMBL_DRMBL \N RNA/DNA-metabolising metallo-beta-lactamase motif Mistry J anon This clan contains the fifth motif found in RNA and DNA metabolising metallo-beta-lactamases. The fifth motif appears to be specific to function [1]. 2009-05-08 20:19:56 \N 3 52 66 3672 6801 1 +399 CL0399 Asp-glut_race Aspartate/glutamate racemase superfamily Bateman A anon Superfamily contains aspartate racemase, glutamate racemase, hydantoin racemase and arylmalonate decarboxylase families from fungi, plants, bacteria and archaeal species. 2009-05-08 20:20:14 \N 3 108 17 4346 8931 1 +400 CL0400 GG-leader \N Double-Glycine leader-peptide cleavage motif Coggill P anon This is a collection of short bacterial families that carry a distinctive GG-cleavage motif. Conservation C-terminal to the GG-motif is not apparent. However, the families are all interconnected with critical virulence attributes of one kind or another. 2009-05-08 20:20:32 \N 3 5 10 475 1960 1 +401 CL0401 AsmA-like AsmA-like OmpF regulator protein superfamily Bateman A anon Families in this collection are AsmA-like. Mutations in the AsmA gene restore the assembly of OmpF, a trimeric outer membrane porin from E coli and related bacteria necessary for the cytotoxic action of group-A colicins. 2009-05-08 20:20:49 \N 3 0 60 2432 11866 1 +402 CL0402 Cdc48_2-like Cdc48 domain 2-like Coggill P anon Superfamily contains C-terminal domains of N-ethylmaleimide sensitive fusion proteins, VCP-like ATPases, membrane fusion ATPase p97 domain 2, peroxisome biogenesis factor 1 (PEX-1), domain 2, and ubiquitin fusion degradation protein UFD1 families. 2009-05-08 20:21:06 \N 3 52 46 547 1899 1 +403 CL0403 ADC-like \N Acetoacetate decarboxylase-like Finn RD anon Superfamily contains the acetoacetate decarboxylase enzyme family EC:4.1.1.4, and a family of uncharacterized proteins from bacteria. 2009-05-08 20:21:23 \N 3 20 14 682 968 1 +404 CL0404 BPD_transp_1 BPD_transp_1-like; BPD transporter like Mistry J, Finn RD anon This clan contains families that are involved in transport of molecules across membranes. It includes the bacterial binding protein-dependent transport system inner membrane component, Pfam:PF00528, which is ATP dependent system involved in transport of a range of substrates [1-2]. 2009-05-08 20:21:41 \N 3 36 162 5100 199771 1 +405 CL0405 DNA_b-psBarrel DNA-bdg_psBarrel; DNA-binding pseudo-barrel domain Finn RD anon Superfamily consists of type II restriction endonuclease effector (N-term) domain and plant B3 DNA binding domain families. 2009-05-08 20:21:58 \N 3 5 51 174 2348 1 +406 CL0406 YjbJ-CsbD-like YjbJ-CsbD-like superfamily Finn RD anon CsbD is a bacterial general stress response protein. It's expression is mediated by sigma-B, an alternative sigma factor [1]. The role of CsbD in stress response is unclear. YjbJ is a hypothetical protein with a similar structure. 2009-05-08 20:22:16 \N 3 2 5 2526 5640 1 +407 CL0407 TBP-like TATA-binding protein like Mistry J, Finn RD anon TBP is a transcription factor whose DNA binding fold is composed of a curved antiparallel beta-sheet [1]. This fold is also found in the N terminal region of DNA repair glycosylases. The N terminal domain of DNA glycosylase has only a single copy of the fold, whereas TBP contains a duplication of this fold [2-3]. 2009-05-08 20:22:35 \N 3 138 32 2007 3848 1 +408 CL0408 PUP \N Purine and uridine phosphorylase superfamily Bateman A anon superfamily contains a number of purine nucleoside phosphorylase, uridine nucleoside phosphorylase, and various nucleosidase families of proteins. 2009-05-08 20:22:53 \N 3 825 229 4830 16130 1 +409 CL0409 GAP \N GTPase activation domain superfamily Finn RD anon Superfamily contains BCR-homology GTPase activation domain (BH-domain) and p120GAP domain-like, including the GAP related domain of neurofibromin, families. 2009-05-08 20:23:11 \N 3 42 410 365 9451 1 +410 CL0410 LEF-8-like LEF-8 like region of RNA polymerase Rpb2 Mistry J, Finn RD anon Late expression factor 8 (LEF-8) is one of the primary components of RNA polymerase produced by polyhedrosis viruses. LEF-8 shows homology to domain 6 of the second largest subunit of prokaryotic DNA-directed RNA polymerase[1]. 2009-05-08 20:23:28 \N 3 138 141 12554 17032 1 +411 CL0411 Vir Antigenic variants from Plasmodium cell-surface Finn RD anon Several families of paralogous proteins are included in this superfamily, largely from Plasmodium species. The genome expresses great numbers of them, and they vary subtly from each other. 2009-05-08 20:23:45 \N 3 0 7 9 1914 1 +412 CL0412 Frag1-like \N Frag1 like Mistry J, Finn RD anon This clan contains the Frag1/DRAD-isomer specific 2-hydroxyacid dehydrogenase, catalytic domain. This family represents the largest portion of the catalytic domain of 2-hydroxyacid dehydrogenases as the NAD binding domain is inserted within the structural domain.. +PF00198 2-oxoacid dehydrogenases acyltransferase (catalytic domain)
These proteins contain one to three copies of a lipoyl binding domain followed by the catalytic domain.. +PF04029 2-phosphosulpholactate phosphatase
Thought to catalyse 2-phosphosulpholactate = sulpholactate + phosphate. Probable magnesium cofactor. Involved in the second step of coenzyme M biosynthesis. Inhibited by vanadate in Methanococcus jannaschii. Also known as the ComB family .. +PF03171 2OG-Fe(II) oxygenase superfamily
This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily . This family includes the C-terminal of prolyl 4-hydroxylase alpha subunit. The holoenzyme has the activity EC:1.14.11.2 catalysing the reaction: Procollagen L-proline + 2-oxoglutarate + O2 <=> procollagen trans- 4-hydroxy-L-proline + succinate + CO2. The full enzyme consists of a alpha2 beta2 complex with the alpha subunit contributing most of the parts of the active site . The family also includes lysyl hydrolases, isopenicillin synthases and AlkB.. +PF01073 3-beta hydroxysteroid dehydrogenase/isomerase family
Pfam-B_504 (release 3.0). The enzyme 3 beta-hydroxysteroid dehydrogenase/5-ene-4-ene isomerase (3 beta-HSD) catalyses the oxidation and isomerisation of 5-ene-3 beta-hydroxypregnene and 5-ene-hydroxyandrostene steroid precursors into the corresponding 4-ene-ketosteroids necessary for the formation of all classes of steroid hormones.. +PF04419 4F5 protein family
Members of this family are short proteins that are rich in aspartate, glutamate, lysine and arginine. Although the function of these proteins is unknown, they are found to be ubiquitously expressed .. +PF03061 Thioesterase superfamily
Pfam-B_2758 (release 6.4). This family contains a wide variety of enzymes, principally thioesterases. This family includes 4HBT (EC 3.1.2.23) which catalyses the final step in the biosynthesis of 4-hydroxybenzoate from 4-chlorobenzoate in the soil dwelling microbe Pseudomonas CBS-3. This family includes various cytosolic long-chain acyl-CoA thioester hydrolases. Long-chain acyl-CoA hydrolases hydrolyse palmitoyl-CoA to CoA and palmitate, they also catalyse the hydrolysis of other long chain fatty acyl-CoA thioesters.. +PF02872 5_nucleotidaseC;
5'-nucleotidase, C-terminal domain. Pfam-B_1318 (release 3.0). +PF00003 7 transmembrane sweet-taste receptor of 3 GCPR
This is a domain of seven transmembrane regions that forms the C-terminus of some subclass 3 G-coupled-protein receptors. It is often associated with a downstream cysteine-rich linker domain, NCD3G Pfam:PF07562, which is the human sweet-taste receptor, and the N-terminal domain, ANF_receptor Pfam:PF01094. The seven TM regions assemble in such a way as to produce a docking pocket into which such molecules as cyclamate and lactisole have been found to bind and consequently confer the taste of sweetness .. +PF01661 DUF27;A1pp;
Pfam-B_434 (release 4.1). This domain is an ADP-ribose binding module. It is found in a number of otherwise unrelated proteins. It is found at the C-terminus of the macro-H2A histone protein Swiss:Q02874. This domain is found in the non-structural proteins of several types of ssRNA viruses such as NSP3 from alphaviruses Swiss:P03317. This domain is also found on its own in a family of proteins from bacteria Swiss:P75918, archaebacteria Swiss:O59182 and eukaryotes Swiss:Q17432.. +PF02177 A4_EXTRA;
Amyloid A4 N-terminal heparin-binding. Alignment kindly provided by SMART. This N-terminal domain of APP, amyloid precursor protein, is the heparin-binding domain of the protein. this region is also responsible for stimulation of neurite outgrowth. The structure reveals both a highly charged basic surface that may interact with glycosaminoglycans in the brain and an abutting hydrophobic surface that is proposed to play an important functional role such as in dimerisation or ligand-binding. Structural similarities with cysteine-rich growth factors, taken together with its known growth-promoting properties, suggest the APP N-terminal domain could function as a growth factor in vivo .. +PF00962 Adenosine/AMP deaminase
+PF01490 Transmembrane amino acid transporter protein
Pfam-B_419 (release 4.0). This transmembrane region is found in many amino acid transporters including UNC-47 and MTR. UNC-47 encodes a vesicular amino butyric acid (GABA) transporter, (VGAT). UNC-47 is predicted to have 10 transmembrane domains Swiss:P34579 . MTR is a N system amino acid transporter system protein involved in methyltryptophan resistance Swiss:P38680. Other members of this family include proline transporters and amino acid permeases.. +PF00004 ATPase family associated with various cellular activities (AAA)
AAA family proteins often perform chaperone-like functions that assist in the assembly, operation, or disassembly of protein complexes .. +PF00696 aakinase;
Amino acid kinase family. Pfam-B_100 (release 2.1). This family includes kinases that phosphorylate a variety of amino acid substrates, as well as uridylate kinase and carbamate kinase. This family includes: Aspartokinase EC:2.7.2.4, Swiss:P00561. Acetylglutamate kinase EC:2.7.2.8, Swiss:Q07905. Glutamate 5-kinase EC:2.7.2.11, Swiss:P07005. Uridylate kinase EC:2.7.4.-, Swiss:P29464. Carbamate kinase EC:2.7.2.2, Swiss:O96432.. +PF03109 ABC1 family
Pfam-B_339 (release 6.5). This family includes ABC1 from yeast and AarF from E. coli . These proteins have a nuclear or mitochondrial subcellular location in eukaryotes. The exact molecular functions of these proteins is not clear, however yeast ABC1 suppresses a cytochrome b mRNA translation defect and is essential for the electron transfer in the bc 1 complex and E. coli AarF is required for ubiquinone production . It has been suggested that members of the ABC1 family are novel chaperonins . These proteins are unrelated to the ABC transporter proteins.. +PF01842 ACT domain
This family of domains generally have a regulatory role. ACT domains are linked to a wide range of metabolic enzymes that are regulated by amino acid concentration. Pairs of ACT domains bind specifically to a particular amino acid leading to regulation of the linked enzyme. The ACT domain is found in: D-3-phosphoglycerate dehydrogenase EC:1.1.1.95 Swiss:P08328, which is inhibited by serine . Aspartokinase EC:2.7.2.4 Swiss:P53553, which is regulated by lysine. Acetolactate synthase small regulatory subunit Swiss:P00894, which is inhibited by valine. Phenylalanine-4-hydroxylase EC:1.14.16.1 Swiss:P00439, which is regulated by phenylalanine. Prephenate dehydrogenase EC:4.2.1.51 Swiss:P21203. formyltetrahydrofolate deformylase EC:3.5.1.10, Swiss:P37051, which is activated by methionine and inhibited by glycine. GTP pyrophosphokinase EC:2.7.6.5 Swiss:P11585. +PF04083 abhydro_lipase;
Partial alpha/beta-hydrolase lipase region. Pfam-B_267 (release 7.3);. This family corresponds to a N-terminal part of an alpha/beta hydrolase domain.. +PF00583 Acetyltransf;
Acetyltransferase (GNAT) family. MRC-LMB Genome group. This family contains proteins with N-acetyltransferase functions such as Elp3-related proteins.. +PF01648 4'-phosphopantetheinyl transferase superfamily
Pfam-B_1679 (release 4.1) & Pfam-B_3672 (Release 7.5). Members of this family transfers the 4'-phosphopantetheine (4'-PP) moiety from coenzyme A (CoA) to the invariant serine of Pfam:PF00550. This post-translational modification renders holo-ACP capable of acyl group activation via thioesterification of the cysteamine thiol of 4'-PP . This superfamily consists of two subtypes: The ACPS type such as Swiss:P24224 and the Sfp type such as Swiss:P39135. The structure of the Sfp type is known , which shows the active site accommodates a magnesium ion. The most highly conserved regions of the alignment are involved in binding the magnesium ion.. +PF01064 Activin types I and II receptor domain
Pfam-B_338 (release 3.0). This Pfam entry consists of both TGF-beta receptor types. This is an alignment of the hydrophilic cysteine-rich ligand-binding domains, Both receptor types, (type I and II) posses a 9 amino acid cysteine box, with the the consensus CCX{4-5}CN. The type I receptors also possess 7 extracellular residues preceding the cysteine box.. +PF00441 Acyl-CoA_dh;
Acyl-CoA dehydrogenase, C-terminal domain. C-terminal domain of Acyl-CoA dehydrogenase is an all-alpha, four helical up-and-down bundle.. +PF01757 DUF33;
Acyltransferase family. Pfam-B_708 (release 4.2). This family includes a range of acyltransferase enzymes. This domain is found in many as yet uncharacterised C. elegans proteins and it is approximately 300 amino acids long.. +PF00928 Adaptor complexes medium subunit family
Pfam-B_1007 (release 3.0). This family also contains members which are coatomer subunits.. +PF00107 adh_zinc;
Zinc-binding dehydrogenase. +PF02682 DUF213;
Allophanate hydrolase subunit 1. This family is the first subunit of allophanate hydrolase.. +PF03915 Actin interacting protein 3
Wood V, Griffiths-Jones SR. Pfam-B_38461 (release 7.2). +PF00842 Ala_racemase;
Alanine racemase, C-terminal domain. Pfam-B_1496 (release 2.1). +PF01168 UPF0001;
Alanine racemase, N-terminal domain. +PF01315 Aldehyde oxidase and xanthine dehydrogenase, a/b hammerhead domain
+PF02738 Molybdopterin-binding domain of aldehyde dehydrogenase
+PF00248 aldo_ket_red;
Aldo/keto reductase family. This family includes a number of K+ ion channel beta chain regulatory domains - these are reported to have oxidoreductase activity .. +PF01263 Aldose 1-epimerase
+PF03155 ALG6, ALG8 glycosyltransferase family
Pfam-B_3941 (release 6.5). N-linked (asparagine-linked) glycosylation of proteins is mediated by a highly conserved pathway in eukaryotes, in which a lipid (dolichol phosphate)-linked oligosaccharide is assembled at the endoplasmic reticulum membrane prior to the transfer of the oligosaccharide moiety to the target asparagine residues. This oligosaccharide is composed of Glc(3)Man(9)GlcNAc(2). The addition of the three glucose residues is the final series of steps in the synthesis of the oligosaccharide precursor. Alg6 transfers the first glucose residue, and Alg8 transfers the second one . In the human alg6 gene, a C->T transition, which causes Ala333 to be replaced with Val, has been identified as the cause of a congenital disorder of glycosylation, designated as type Ic OMIM:603147 .. +PF00245 alk_phosphatase;
Alkaline phosphatase. +PF02806 alpha-amylase_C;
Alpha amylase, C-terminal all-beta domain. Alpha amylase is classified as family 13 of the glycosyl hydrolases. The structure is an 8 stranded alpha/beta barrel containing the active site, interrupted by a ~70 a.a. calcium-binding domain protruding between beta strand 3 and alpha helix 3, and a carboxyl-terminal Greek key beta-barrel domain.. +PF05111 Ameloblastin precursor (Amelin)
Pfam-B_6419 (release 7.7). This family consists of several mammalian Ameloblastin precursor (Amelin) proteins. Matrix proteins of tooth enamel consist mainly of amelogenin but also of non-amelogenin proteins, which, although their volumetric percentage is low, have an important role in enamel mineralisation. One of the non-amelogenin proteins is ameloblastin, also known as amelin and sheathlin. Ameloblastin (AMBN) is one of the enamel sheath proteins which is though to have a role in determining the prismatic structure of growing enamel crystals .. +PF01510 N-acetylmuramoyl-L-alanine amidase
Pfam-B_735 (release 4.0). This family includes zinc amidases that have N-acetylmuramoyl-L-alanine amidase activity EC:3.5.1.28. This enzyme domain cleaves the amide bond between N-acetylmuramoyl and L-amino acids in bacterial cell walls (preferentially: D-lactyl-L-Ala). The structure is known for the bacteriophage T7 structure and shows that two of the conserved histidines are zinc binding.. +PF01520 N-acetylmuramoyl-L-alanine amidase
Pfam-B_888 (release 4.0). This enzyme domain cleaves the amide bond between N-acetylmuramoyl and L-amino acids in bacterial cell walls.. +PF01593 Flavin containing amine oxidoreductase
Pfam-B_606 (release 4.1)b. This family consists of various amine oxidases, including maze polyamine oxidase (PAO) and various flavin containing monoamine oxidases (MAO). The aligned region includes the flavin binding site of these enzymes. The family also contains phytoene dehydrogenases and related enzymes. In vertebrates MAO plays an important role regulating the intracellular levels of amines via there oxidation; these include various neurotransmitters, neurotoxins and trace amines . In lower eukaryotes such as aspergillus and in bacteria the main role of amine oxidases is to provide a source of ammonium . PAOs in plants, bacteria and protozoa oxidase spermidine and spermine to an aminobutyral, diaminopropane and hydrogen peroxide and are involved in the catabolism of polyamines . Other members of this family include tryptophan 2-monooxygenase, putrescine oxidase, corticosteroid binding proteins and antibacterial glycoproteins.. +PF00501 AMP-binding enzyme
+PF05195 Aminopeptidase P, N-terminal domain
This domain is structurally very similar to the creatinase N-terminal domain (Pfam:PF01321). However, little or no sequence similarity exists between the two families.. +PF03098 Animal haem peroxidase
+PF01821 Anaphylotoxin-like domain
C3a, C4a and C5a anaphylatoxins are protein fragments generated enzymatically in serum during activation of complement molecules C3, C4, and C5. They induce smooth muscle contraction. These fragments are homologous to a three-fold repeat in fibulins.. +PF01094 Receptor family ligand binding region
This family includes extracellular ligand binding domains of a wide range of receptors. This family also includes the bacterial amino acid binding proteins of known structure.. +PF00023 ank;
Swissprot_feature_table. Ankyrins are multifunctional adaptors that link specific proteins to the membrane-associated, spectrin- actin cytoskeleton. This repeat-domain is a 'membrane-binding' domain of up to 24 repeated units, and it mediates most of the protein's binding activities. Repeats 13-24 are especially active, with known sites of interaction for the Na/K ATPase, Cl/HCO(3) anion exchanger, voltage-gated sodium channel, clathrin heavy chain and L1 family cell adhesion molecules. The ANK repeats are found to form a contiguous spiral stack such that ion transporters like the anion exchanger associate in a large central cavity formed by the ANK repeat spiral, while clathrin and cell adhesion molecules associate with specific regions outside this cavity .. +PF00191 annexin;
This family of annexins also includes giardin that has been shown to function as an annexin .. +PF03861 ANTAR domain
ANTAR (AmiR and NasR transcription antitermination regulators) is an RNA-binding domain found in bacterial transcription antitermination regulatory proteins. The majority of the domain consists of a coiled-coil. . +PF04729 Anti-silence;
ASF1 like histone chaperone. Pfam-B_3167 (release 7.5). This family includes the yeast and human ASF1 protein. These proteins have histone chaperone activity . ASF1 participates in both the replication-dependent and replication-independent pathways. The structure three-dimensional has been determined as a a compact immunoglobulin-like beta sandwich fold topped by three helical linkers .. +PF02822 Antistasin family
Members of this family are inhibitors of trypsin family proteases. This domain is highly disulphide bonded. The domain is also found in some large extracellular proteins in multiple copies.. +PF00847 AP2-domain;
Pfam-B_409 (release 3.0). This 60 amino acid residue domain can bind to DNA and is found in transcription factor proteins.. +PF02424 ApbE family
Pfam-B_1963 (release 5.4). This prokaryotic family of lipoproteins are related to ApbE from Salmonella typhimurium. ApbE is involved in thiamine synthesis . More specifically is may be involved in the conversion of aminoimidazole ribotide (AIR) to 4-amino-5-hydroxymethyl-2-methyl pyrimidine (HMP).. +PF04049 Anaphase promoting complex subunit 8 / Cdc23
Pfam-B_13808 (release 7.3);. The anaphase-promoting complex is composed of eight protein subunits, including BimE (APC1), CDC27 (APC3), CDC16 (APC6), and CDC23 (APC8).. +PF04106 Autophagy protein Apg5
Pfam-B_12134 (release 7.3);. Apg5 is directly required for the import of aminopeptidase I via the cytoplasm-to-vacuole targeting pathway .. +PF04602 arab_transf;
Mycobacterial cell wall arabinan synthesis protein. Pfam-B_4670 (release 7.5). Arabinosyltransferase is involved in arabinogalactan (AG) biosynthesis pathway in mycobacteria. AG is a component of the macromolecular assembly of the mycolyl-AG-peptidoglycan complex of the cell wall. This enzyme has important clinical applications as it is believed to be the target of the antimycobacterial drug Ethambutol .. +PF03079 ARD/ARD' family
Pfam-B_2276 (release 6.4). The two acireductone dioxygenase enzymes (ARD and ARD', previously known as E-2 and E-2') from Klebsiella pneumoniae share the same amino acid sequence Swiss:Q9ZFE7, but bind different metal ions: ARD binds Ni2+, ARD' binds Fe2+. ARD and ARD' can be experimentally interconverted by removal of the bound metal ion and reconstitution with the appropriate metal ion. The two enzymes share the same substrate, 1,2-dihydroxy-3-keto-5-(methylthio)pentene, but yield different products. ARD' yields the alpha-keto precursor of methionine (and formate), thus forming part of the ubiquitous methionine salvage pathway that converts 5'-methylthioadenosine (MTA) to methionine. This pathway is responsible for the tight control of the concentration of MTA, which is a powerful inhibitor of polyamine biosynthesis and transmethylation reactions [1,2]. ARD yields methylthiopropanoate, carbon monoxide and formate, and thus prevents the conversion of MTA to methionine. The role of the ARD catalysed reaction is unclear: methylthiopropanoate is cytotoxic, and carbon monoxide can activate guanylyl cyclase, leading to increased intracellular cGMP levels [1,2]. This family also contains other members, whose functions are not well characterised.. +PF01412 Putative GTPase activating protein for Arf
Ponting CP, Schultz J, Bork P. Putative zinc fingers with GTPase activating proteins (GAPs) towards the small GTPase, Arf. The GAP of ARD1 stimulates GTPase hydrolysis for ARD1 but not ARFs.. +PF01388 ARID/BRIGHT DNA binding domain
This domain is know as ARID for AT-Rich Interaction Domain , and also known as the BRIGHT domain .. +PF04683 ARM_1;
Proteasome complex subunit Rpn13 ubiquitin receptor. Pfam-B_4497 (release 7.5). This family was thought originally to be involved in cell-adhesion [1,2], but the members are now known to be proteasome subunit Rpn13, a novel ubiquitin receptor. The 26S proteasome is a huge macromolecular protein-degradation machine consisting of a proteolytically active 20S core, in the form of four disc-like proteins, and one or two 19S regulatory particles. The regulatory particle(s) sit on the top and or bottom of the core, de-ubiquitinate the substrate peptides, unfold them and guide them into the narrow channel through the centre of the core. Rpn13 and its homologues dock onto the regulatory particle through the N-terminal region which binds Rpn2. The C-terminal part of the domain binds de-ubiquitinating enzyme Uch37/UCHL5 and enhances its isopeptidase activity. Rpn13 binds ubiquitin via a conserved amino-terminal region called the pleckstrin-like receptor for ubiquitin, termed Pru, domain . The domain forms two contiguous anti-parallel beta-sheets with a configuration similar to the pleckstrin-homology domain (PHD) fold . Rpn13's ability to bind ubiquitin and the proteasome subunit Rpn2/S1 simultaneously supports evidence of its role as a ubiquitin receptor. Finally, when complexed to di-ubiquitin, via the Pru, and Uch37 via the C-terminal part, it frees up the distal ubiquitin for de-ubiquitination by the Uch37 .. +PF00514 Armadillo_seg;
Armadillo/beta-catenin-like repeat. Approx. 40 amino acid repeat. Tandem repeats form super-helix of helices that is proposed to mediate interaction of beta-catenin with its ligands. CAUTION: This family does not contain all known armadillo repeats.. +PF00339 arrestin;
Arrestin (or S-antigen), N-terminal domain. Ig-like beta-sandwich fold. Scop reports duplication with C-terminal domain.. +PF02752 arrestin_C;
Arrestin (or S-antigen), C-terminal domain. Ig-like beta-sandwich fold. Scop reports duplication with N-terminal domain.. +PF04959 Arsenite-resistance protein 2
Pfam-B_5269 (release 7.6). Arsenite is a carcinogenic compound which can act as a co-mutagen by inhibiting DNA repair. Arsenite-resistance protein 2 is thought to play a role in arsenite resistance .. +PF01037 ASNC_trans_reg;
Pfam-B_773 (release 3.0). The AsnC family is a family of similar bacterial transcription regulatory proteins.. +PF05118 Aspartyl/Asparaginyl beta-hydroxylase
Pfam-B_2775 (release 7.7). Iron (II)/2-oxoglutarate (2-OG)-dependent oxygenases catalyse oxidative reactions in a range of metabolic processes. Proline 3-hydroxylase hydroxylates proline at position 3, the first of a 2-OG oxygenase catalysing oxidation of a free alpha-amino acid. The structure of proline 3-hydroxylase contains the conserved motifs present in other 2-OG oxygenases including a jelly roll strand core and residues binding iron and 2-oxoglutarate, consistent with divergent evolution within the extended family. This family represent the arginine, asparagine and proline hydroxylases. The aspartyl/asparaginyl beta-hydroxylase (EC:1.14.11.16) specifically hydroxylates one aspartic or asparagine residue in certain epidermal growth factor-like domains of a number of proteins .. +PF01177 Asp/Glu/Hydantoin racemase
This family contains aspartate racemase, maleate isomerases EC:5.2.1.1 , glutamate racemase, hydantoin racemase and arylmalonate decarboxylase EC:4.1.1.76 .. +PF01400 Astacin (Peptidase family M12A)
The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. Members of this family contain two conserved disulphide bridges, these are joined 1-4 and 2-3. Members of this family have an amino terminal propeptide which is cleaved to give the active protease domain. All other linked domains are found to the carboxyl terminus of this domain. This family includes: Astacin Swiss:P07584, a digestive enzyme from Crayfish. Meprin, Swiss:Q16819, a multiple domain membrane component that is constructed from a homologous alpha and beta chain. Proteins involved in morphogenesis such as Swiss:P13497, and Tolloid from drosophila Swiss:P25723.. +PF02178 AT hook motif
Alignment kindly provided by SMART. At hooks are DNA binding motifs with a preference for A/T rich regions.. +PF03029 ATP-bind;
Conserved hypothetical ATP binding protein. Pfam-B_1301 (release 6.4) & Pfam-B_2154 (Release 8.0). Members of this family are found in a range of archaea and eukaryotes and have hypothesised ATP binding activity.. +PF00306 ATP synthase alpha/beta chain, C terminal domain
Pfam-B_15 (release 1.0). +PF02874 ATP synthase alpha/beta family, beta-barrel domain
This family includes the ATP synthase alpha and beta subunits the ATP synthase associated with flagella.. +PF04718 ATPsynth_g;
Mitochondrial ATP synthase g subunit. Pfam-B_5977 (release 7.5). The Fo sector of the ATP synthase is a membrane bound complex which mediates proton transport. It is composed of nine different polypeptide subunits (a, b, c, d, e, f, g F6, A6L). The function of subunit g is currently unknown. The conserved region covers all but the very N-terminus of the member sequences. No prokaryotic members have been identified thus far .. +PF03768 Attacin, N-terminal region
Pfam-B_2791 (release 7.0). This family includes attacin and sarcotoxin, but not diptericin (which share similarity to the C-terminal region of attacin). All members of this family are insect antibacterial proteins which are induced by the fat body and subsequently released into secreted into the hemolymph where they act synergistically to kill the invading microorganism .. +PF03797 Autotransporter beta-domain
Secretion of protein products occurs by a number of different pathways in bacteria. One of these pathways known as the type V pathway was first described for the IgA1 protease . The protein component that mediates secretion through the outer membrane is contained within the secreted protein itself, hence the proteins secreted in this way are called autotransporters. This family corresponds to the presumed integral membrane beta-barrel domain that transports the protein. This domain is found at the C terminus of the proteins it occurs in. The N terminus contains the variable passenger domain that is translocated across the membrane. Once the passenger domain is exported it is cleaved auto-catalytically in some proteins, in others a different protease is used and in some cases no cleavage occurs .. +PF03547 Auxin_eff;
Membrane transport protein. TIGRFAMs, Griffiths-Jones SR. TIGRFAMs & Pfam-B_5261 (Release 7.5). This family includes auxin efflux carrier proteins and other transporter proteins from all domains of life.. +PF02310 B12 binding domain
Pfam-B_359 (release 5.2). This domain binds to B12 (adenosylcobamide)[1-3], it is found in several enzymes, such as glutamate mutase Swiss:Q05488, methionine synthase Swiss:Q99707 and methylmalonyl-CoA mutase Swiss:P22033. It contains a conserved DxHxxGx(41)SxVx(26)GG motif, which is important for B12 binding .. +PF02607 B12 binding domain
This B12 binding domain is found in methionine synthase EC:2.1.1.13 Swiss:Q99707, and other shorter proteins that bind to B12. This domain is always found to the N-terminus of Pfam:PF02310. The structure of this domain is known , it is a 4 helix bundle. Many of the conserved residues in this domain are involved in B12 binding, such as those in the MXXVG motif.. +PF02362 B3 DNA binding domain
Pfam-B_582 (release 5.2). This is a family of plant transcription factors with various roles in development, the aligned region corresponds the B3 DNA binding domain as described in this domain is found in VP1/AB13 transcription factors . Some proteins also have a second AP2 DNA binding domain Pfam:PF00847 such as RAV1 Swiss:Q9ZWM9 . DNA binding activity was demonstrated by .. +PF01313 Bacterial export proteins, family 3
Pfam-B_898 (release 3.0). This family includes the following members; FliQ, MopD, HrcS, Hrp, YopS and SpaQ All of these members export proteins, that do not possess signal peptides, through the membrane. Although the proteins that these exporters move may be different, the exporters are thought to function in similar ways .. +PF02673 Bacitracin resistance protein BacA
Bacitracin resistance protein (BacA) is a putative undecaprenol kinase. BacA confers resistance to bacitracin, probably by phosphorylation of undecaprenol .. +PF01011 Bacterial_PQQ;
Pfam-B_1319 (release 3.0). The family represent a single repeat of a beta propeller. This propeller has been found in several enzymes which utilise pyrrolo-quinoline quinone as a prosthetic group.. +PF03704 BAD;
Bacterial transcriptional activator domain. Found in the DNRI/REDD/AFSR family of regulators. This region of AFSR (Swiss:P25941) along with the C terminal region is capable of independently directing actinorhodin production. This family contains TPR repeats.. +PF01426 BAH domain
This domain has been called BAH (Bromo adjacent homology) domain and has also been called ELM1 and BAM (Bromo adjacent motif) domain. The function of this domain is unknown but may be involved in protein-protein interaction .. +PF01145 SPFH domain / Band 7 family
This family has been called SPFH , Band 7 or PHB domain. Recent phylogenetic analysis has shown this domain to be a slipin or Stomatin-like integral membrane domain conserved from protozoa to mammals.. +PF03594 Benzoate membrane transport protein
TIGRFAMs, Griffiths-Jones SR. +PF02944 BESS motif
The BESS motif is named after the proteins in which it is found (BEAF , Suvar(3)7 and Stonewall ). The motif is 40 amino acid residues long and is composed of two predicted alpha helices. Based on the protein in which it is found and the presence of conserved positively charged residues it is predicted to be a DNA binding domain. This domain appears to be specific to drosophila.. +PF02369 Bacterial Ig-like domain (group 1)
This family consists of bacterial domains with an Ig-like fold. Members of this family are found in bacterial surface proteins such as intimins and invasins involved in pathogenicity. . +PF02785 Biotin carboxylase C-terminal domain
Biotin carboxylase is a component of the acetyl-CoA carboxylase multi-component enzyme which catalyses the first committed step in fatty acid synthesis in animals, plants and bacteria. Most of the active site residues reported in reference are in this C-terminal domain.. +PF02012 BNR/Asp-box repeat
Members of this family contain multiple BNR (bacterial neuraminidase repeat) repeats or Asp-boxes. The repeats are short, however the repeats are never found closer than 40 residues together suggesting that the repeat is structurally longer. These repeats are found in many glycosyl hydrolases as well as other extracellular proteins of unknown function.. +PF00528 BPD_transp;
Binding-protein-dependent transport system inner membrane component. LMB bacterial genome group and Prosite. The alignments cover the most conserved region of the proteins, which is thought to be located in a cytoplasmic loop between two transmembrane domains. The members of this family have a variable number of transmembrane helices.. +PF02237 Biotin protein ligase C terminal domain
The function of this structural domain is unknown. It is found to the C terminus of the biotin protein ligase catalytic domain Pfam:PF01317. . +PF03099 BPL_LipA_LipB;
Biotin/lipoate A/B protein ligase family. This family includes biotin protein ligase, lipoate-protein ligase A and B. Biotin is covalently attached at the active site of certain enzymes that transfer carbon dioxide from bicarbonate to organic acids to form cellular metabolites. Biotin protein ligase (BPL) is the enzyme responsible for attaching biotin to a specific lysine at the active site of biotin enzymes. Each organism probably has only one BPL. Biotin attachment is a two step reaction that results in the formation of an amide linkage between the carboxyl group of biotin and the epsilon-amino group of the modified lysine . Lipoate-protein ligase A (LPLA) catalyses the formation of an amide linkage between lipoic acid and a specific lysine residue in lipoate dependent enzymes . The unusual biosynthesis pathway of lipoic acid is mechanistically intertwined with attachment of the cofactor .. +PF02485 Core-2/I-Branching enzyme
Pfam-B_842 (release 5.4). This is a family of two different beta-1,6-N-acetylglucosaminyltransferase enzymes, I-branching enzyme (eg Swiss:Q06430) and core-2 branching enzyme (eg Swiss:Q02742). I-branching enzyme is responsible for the production of the blood group I-antigen during embryonic development . Core-2 branching enzyme forms crucial side-chain branches in O-glycans .. +PF00533 BRCA1 C Terminus (BRCT) domain
+PF04089 BRICHOS domain
The BRICHOS domain is about 100 amino acids long. It is found in a variety of proteins implicated in dementia, respiratory distress and cancer. Its exact function is unknown; roles that have been proposed for it include (a) in targeting of the protein to the secretory pathway, (b) intramolecular chaperone-like function, and (c) assisting the specialised intracellular protease processing system . This C-terminal domain is embedded in the endoplasmic reticulum lumen, and binds to the N-terminal, transmembrane, SP_C, Pfam:PF08999, provided that it is in non-helical conformation. Thus the Brichos domain of proSP-C is a chaperone that induces alpha-helix formation of an aggregation-prone TM region .. +PF04427 Brix domain
+PF03097 BRO1-like domain
This domain is found in a number proteins including Rhophilin Swiss:Q61085 and BRO1 Swiss:P48582. It is known to have a role in endosomal targeting. ESCRT-III subunit Snf7 binds to a conserved hydrophobic patch in the BRO1 domain that is required for protein complex formation and for the protein-sorting function of BRO1 .. +PF00439 bromodomain;
Bromodomains are 110 amino acid long domains, that are found in many chromatin associated proteins. Bromodomains can interact specifically with acetylated lysine .. +PF03909 BSD domain
This domain contains a distinctive -FW- motif. It is found in a family of eukaryotic transcription factors as well as a set of proteins of unknown function.. +PF03092 BT1 family
Pfam-B_1804 (release 6.4). Members of this family are transmembrane proteins. Several are Leishmania putative proteins that are thought to be pteridine transporters. One such protein Swiss:Q25272, previously termed (and is still annotated as) ORFG, was shown to encode a biopterin transport protein using null mutants , thus being subsequently renamed BT1. The significant similarity of ORFG/BT1 to Trypanosoma brucei ESAG10 (a putative transmembrane protein and another member of this family) was previously noted . This family also contains five putative Arabidopsis thaliana proteins of unknown function. In addition, it also contains two predicted prokaryotic proteins (from the cyanobacteria Synechocystis and Synechococcus).. +PF00651 BTB/POZ domain
The BTB (for BR-C, ttk and bab) or POZ (for Pox virus and Zinc finger) domain is present near the N-terminus of a fraction of zinc finger (Pfam:PF00096) proteins and in proteins that contain the Pfam:PF01344 motif such as Kelch and a family of pox virus proteins. The BTB/POZ domain mediates homomeric dimerisation and in some instances heteromeric dimerisation . The structure of the dimerised PLZF BTB/POZ domain has been solved and consists of a tightly intertwined homodimer. The central scaffolding of the protein is made up of a cluster of alpha-helices flanked by short beta-sheets at both the top and bottom of the molecule . POZ domains from several zinc finger proteins have been shown to mediate transcriptional repression and to interact with components of histone deacetylase co-repressor complexes including N-CoR and SMRT [4,5,6]. The POZ or BTB domain is also known as BR-C/Ttk or ZiN.. +PF03437 BtpA family
Pfam-B_4453 (release 6.6). The BtpA protein is tightly associated with the thylakoid membranes, where it stabilises the reaction centre proteins of photosystem I.. +PF03131 bZIP Maf transcription factor
Mifsud W, Eberhardt R. Pfam-B_482 (release 6.5). Maf transcription factors contain a conserved basic region leucine zipper (bZIP) domain, which mediates their dimerisation and DNA binding property . Thus, this family is probably related to Pfam:PF00170. This family also includes the DNA_binding domain of Skn-1 (Swiss:P34707), this domain lacks the leucine zipper found in other bZip domains, and binds DNA is a monomer [2,3].. +PF00168 C2 domain
Swissprot_feature_table. +PF02743 Cache;
+PF04857 CAF1 family ribonuclease
Pfam-B_1567 (release 7.5). +PF03135 CagE, TrbE, VirB family, component of type IV transporter system
Pfam-B_843 (release 6.5). This family includes the Helicobacter pylori protein CagE Swiss:Q48252, which together with other proteins from the cag pathogenicity island (PAI), encodes a type IV transporter secretion system. The precise role of CagE is not known, but studies in animal models have shown that it is essential for pathogenesis in Helicobacter pylori induced gastritis and peptic ulceration . Indeed, the expression of the cag PAI has been shown to be essential for stimulating human gastric epithelial cell apoptosis in vitro . Similar type IV transport systems are also found in other bacteria. This family includes the TrbE Swiss:P54910 and VirB Swiss:P05353 proteins from the respective trb and Vir conjugal transfer systems in Agrobacterium tumefaciens. Homologues of VirB proteins from other species are also members of this family, e.g. VirB from Brucella suis Swiss:Q9RPY1.. +PF02515 CAIB-BAIF;
CoA-transferase family III. Pfam-B_887 (release 5.4). CoA-transferases are found in organisms from all lines of descent. Most of these enzymes belong to two well-known enzyme families, but recent work on unusual biochemical pathways of anaerobic bacteria has revealed the existence of a third family of CoA-transferases. The members of this enzyme family differ in sequence and reaction mechanism from CoA-transferases of the other families. Currently known enzymes of the new family are a formyl-CoA: oxalate CoA-transferase, a succinyl-CoA: (R)-benzylsuccinate CoA-transferase, an (E)-cinnamoyl-CoA: (R)-phenyllactate CoA-transferase, and a butyrobetainyl-CoA: (R)-carnitine CoA-transferase. In addition, a large number of proteins of unknown or differently annotated function from Bacteria, Archaea and Eukarya apparently belong to this enzyme family. Properties and reaction mechanisms of the CoA-transferases of family III are described and compared to those of the previously known CoA-transferases.. +PF02888 Calmodulin binding domain
Psi-blast P70604/413-489. Small-conductance Ca2+-activated K+ channels (SK channels) are independent of voltage and gated solely by intracellular Ca2+. These membrane channels are heteromeric complexes that comprise pore-forming alpha-subunits and the Ca2+-binding protein calmodulin (CaM) . CaM binds to the SK channel through this the CaM-binding domain (CaMBD), which is located in an intracellular region of the alpha-subunit immediately carboxy-terminal to the pore. Channel opening is triggered when Ca2+ binds the EF hands in the N-lobe of CaM. The structure of this domain complexed with CaM is known . This domain forms an elongated dimer with a CaM molecule bound at each end; each CaM wraps around three alpha-helices, two from one CaMBD subunit and one from the other.. +PF01302 CAP-Gly domain
Cytoskeleton-associated proteins (CAPs) are involved in the organisation of microtubules and transportation of vesicles and organelles along the cytoskeletal network. A conserved motif, CAP-Gly, has been identified in a number of CAPs, including CLIP-170 and dynactins. The crystal structure of Caenorhabditis elegans F53F4.3 protein Swiss:Q20728 CAP-Gly domain was recently solved . The domain contains three beta-strands. The most conserved sequence, GKNDG, is located in two consecutive sharp turns on the surface, forming the entrance to a groove .. +PF01039 Carboxyl transferase domain
Pfam-B_299 (release 3.0). All of the members in this family are biotin dependent carboxylases. The carboxyl transferase domain carries out the following reaction; transcarboxylation from biotin to an acceptor molecule. There are two recognised types of carboxyl transferase. One of them uses acyl-CoA and the other uses 2-oxoacid as the acceptor molecule of carbon dioxide. All of the members in this family utilise acyl-CoA as the acceptor molecule.. +PF00755 Choline/Carnitine o-acyltransferase
Pfam-B_438 (release 2.1). +PF03378 CAS/CSE protein, C-terminus
Pfam-B_3786 (release 6.6). Mammalian cellular apoptosis susceptibility (CAS) proteins are homologous to the yeast chromosome-segregation protein, CSE1 . This family aligns the C-terminal halves (approximately). CAS is involved in both cellular apoptosis and proliferation [2,3]. Apoptosis is inhibited in CAS-depleted cells, while the expression of CAS correlates to the degree of cellular proliferation. Like CSE1, it is essential for the mitotic checkpoint in the cell cycle (CAS depletion blocks the cell in the G2 phase), and has been shown to be associated with the microtubule network and the mitotic spindle , as is the protein MEK, which is thought to regulate the intracellular localisation (predominantly nuclear vs. predominantly cytosolic) of CAS. In the nucleus, CAS acts as a nuclear transport factor in the importin pathway . The importin pathway mediates the nuclear transport of several proteins that are necessary for mitosis and further progression. CAS is therefore thought to affect the cell cycle through its effect on the nuclear transport of these proteins . Since apoptosis also requires the nuclear import of several proteins (such as P53 and transcription factors), it has been suggested that CAS also enables apoptosis by facilitating the nuclear import of at least a subset of these essential proteins .. +PF00690 Na_K_ATPase_N;
Cation transporter/ATPase, N-terminus. Pfam-B_138 (release 2.1). Members of this families are involved in Na+/K+, H+/K+, Ca++ and Mg++ transport.. +PF01545 Cation efflux family
Pfam-B_232 (release 4.0). Members of this family are integral membrane proteins, that are found to increase tolerance to divalent metal ions such as cadmium, zinc, and cobalt. These proteins are thought to be efflux pumps that remove these ions from cells.. +PF04586 Caudo_protease;
Caudovirus prohead protease. Pfam-B_4836 (release 7.5). Family of Caudovirus prohead proteases also found in a number of bacteria possibly as the result of horizontal transfer. . +PF01607 Chitin_bind_2;
Chitin binding Peritrophin-A domain. This domain is called the Peritrophin-A domain and is found in chitin binding proteins particularly peritrophic matrix proteins of insects and animal chitinases. Copies of the domain are also found in some baculoviruses. Relevant references that describe proteins with this domain include [1-3]. It is an extracellular domain that contains six conserved cysteines that probably form three disulphide bridges. Chitin binding has been demonstrated for a protein containing only two of these domains .. +PF00942 Cellulose_bind; CBD_3;
Cellulose binding domain. Pfam-B_1126 (release 3.0). +PF02018 CBD_6; CBM_4;
Carbohydrate binding domain. This family includes diverse carbohydrate binding domains.. +PF03422 Carbohydrate binding module (family 6)
Pfam-B_1231 (release 6.6). +PF00571 CBS domain
+PF02754 DUF224;
Cysteine-rich domain. The key element of this family is the CX31-38CCX33-34CXXC sequence motif normally found at the C-terminus in archaeal and bacterial Hdr-like proteins . There may be one or two copies, and the motif is probably an iron-sulfur binding cluster. In some instances one of the cysteines is replaced by an aspartate, and aspartate can in principle also function as a ligand of an iron-sulfur cluster . The family includes a subunit from heterodisulphide reductase and a subunit from glycolate oxidase Swiss:P52074 and glycerol-3-phosphate dehydrogenase.. +PF03379 CcmB protein
Pfam-B_3059 (release 6.6). CcmB is the product of one of a cluster of Ccm genes that are necessary for cytochrome c biosynthesis in eubacteria. Expression of these proteins is induced when the organisms are grown under anaerobic conditions with nitrate or nitrite as the final electron acceptor. CcmB is required for the export of haem to the periplasm.. +PF04103 CD20-like family
Pfam-B_1979 (rel 7.3), Pfam-B_10092 (rel 9.0). This family includes the CD20 protein and the beta subunit of the high affinity receptor for IgE Fc. The high affinity receptor for IgE is a tetrameric structure consisting of a single IgE-binding alpha subunit, a single beta subunit, and two disulfide-linked gamma subunits. The alpha subunit of Fc epsilon RI and most Fc receptors are homologous members of the Ig superfamily. By contrast, the beta and gamma subunits from Fc epsilon RI are not homologous to the Ig superfamily. Both molecules have four putative transmembrane segments and a probably topology where both amino- and carboxy termini protrude into the cytoplasm . This family also includes LR8 like proteins from humans, mice and rats. The function of the human LR8 protein is unknown although it is known to be strongly expressed in the lung fibroblasts . This family also includes sarcospan is a transmembrane component of dystrophin-associated glycoprotein. Loss of the sarcoglycan complex and sarcospan alone is sufficient to cause muscular dystrophy. The role of the sarcoglycan complex and sarcospan is thought to be to strengthen the dystrophin axis connecting the basement membrane with the cytoskeleton .. +PF05179 Cdc73;
RNA pol II accessory factor, Cdc73 family. Pfam-B_6394 (release 7.7). +PF01066 CDP-alcohol phosphatidyltransferase
Pfam-B_651 (release 3.0). All of these members have the ability to catalyse the displacement of CMP from a CDP-alcohol by a second alcohol with formation of a phosphodiester bond and concomitant breaking of a phosphoride anhydride bond.. +PF00150 cellulase;
Cellulase (glycosyl hydrolase family 5). +PF04218 CENP-B N-terminal DNA-binding domain
Centromere Protein B (CENP-B) is a DNA-binding protein localised to the centromere. Within the N-terminal 125 residues, there is a DNA-binding region, which binds to a corresponding 17bp CENP-B box sequence. CENP-B dimers either bind two separate DNA molecules or alternatively, they may bind two CENP-B boxes on one DNA molecule, with the intervening stretch of DNA forming a loop structure. The CENP-B DNA-binding domain consists of two repeating domains, RP1 and RP2. This family corresponds to RP1 has been shown to consist of four helices in a helix-turn-helix structure .. +PF04734 Neutral/alkaline non-lysosomal ceramidase
Pfam-B_3385 (release 7.5). This family represents a group of neutral/alkaline ceramidases found in both bacteria and eukaryotes [1,2,3].. +PF03859 CG-1 domain
Pfam-B_18451 (Release 7.1). CG-1 domains are highly conserved domains of about 130 amino-acid residues containing a predicted bipartite NLS and named after a partial cDNA clone isolated from parsley encoding a sequence-specific DNA-binding protein . CG-1 domains are associated with CAMTA proteins (for CAlModulin -binding Transcription Activator) that are transcription factors containing a calmodulin -binding domain and ankyrins (ANK) motifs .. +PF00307 actinin-binding;
Calponin homology (CH) domain. The CH domain is found in both cytoskeletal proteins and signal transduction proteins . The CH domain is involved in actin binding in some members of the family. However in calponins there is evidence that the CH domain is not involved in its actin binding activity . Most member proteins have from two to four copies of the CH domain, however some proteins such as calponin and Swiss:P15498 have only a single copy.. +PF04420 CHD5-like protein
Members of this family are probably coiled-coil proteins that are similar to the CHD5 (Congenital heart disease 5) protein. In Saccharomyces cerevisiae this protein localises to the ER and is thought to play a homeostatic role .. +PF03067 Chitin binding domain
Pfam-B_2364 (release 6.4). This domain is found associated with a wide variety of cellulose binding domain. This domain however is a chitin binding domain. This domain is found in isolation in baculoviral spheroidins and spindolins, protein of unknown function.. +PF00379 insect_cuticle;
Insect cuticle protein. Many insect cuticular proteins include a 35-36 amino acid motif known as the R&R consensus. The extensive conservation of this region led to the suggestion that it functions to bind chitin. Provocatively, it has no sequence similarity to the well-known cysteine-containing chitin-binding domain found in chitinases and some peritrophic membrane proteins. Chitin binding has been shown experimentally for this region . Thus arthropods have two distinct classes of chitin binding proteins, those with the chitin-binding domain found in lectins, chitinases and peritrophic membranes (cysCBD) and those with the cuticular protein chitin-binding domain (non-cysCBD) .. +PF04968 CHORD
Pfam-B_1217 (release 7.0). CHORD represents a Zn binding domain. Silencing of the C. elegans CHORD-containing gene results in semisterility and embryo lethality, suggesting an essential function of the wild-type gene in nematode development . . +PF02017 CIDE-N domain
This domain is found in CAD nuclease Swiss:O76075 , ICAD Swiss:O00273 the inhibitor of CAD nuclease. The two proteins interact through this domain.. +PF02487 CLN3 protein
Pfam-B_1060 (release 5.4). This is a family of proteins from the CLN3 gene. A missense mutation of glutamic acid (E) to lysine (K) at position 295 in the human protein (Swiss:Q13286) has been implicated in Juvenile neuronal ceroid lipofuscinosis (Batten disease) .. +PF02861 Clp amino terminal domain
Pfam-B_102 (release 6.0). This short domain is found in one or two copies at the amino terminus of ClpA and ClpB proteins from bacteria and eukaryotes. The function of these domains is uncertain but they may form a protein binding site .. +PF02353 Mycolic acid cyclopropane synthetase
Pfam-B_862 (release 5.2). This family consist of Cyclopropane-fatty-acyl-phospholipid synthase or CFA synthase EC:2.1.1.79 this enzyme catalyse the reaction: S-adenosyl-L-methionine + phospholipid olefinic fatty acid <=> S-adenosyl-L-homocysteine + phospholipid cyclopropane fatty acid.. +PF00780 CNH domain
Alignment kindly provided by SMART. Domain found in NIK1-like kinase, mouse citron and yeast ROM1, ROM2. Unpublished observations.. +PF00027 Cyclic nucleotide-binding domain
+PF02629 DUF184;
This domain has a Rossmann fold and is found in a number of proteins including succinyl CoA synthetases, malate and ATP-citrate ligases.. +PF01144 Coenzyme A transferase
+PF02514 cobN-Mg_chel;
CobN/Magnesium Chelatase. Pfam-B_647 (release 5.4). This family contains a domain common to the cobN protein and to magnesium protoporphyrin chelatase. CobN is implicated in the conversion of hydrogenobyrinic acid a,c-diamide to cobyrinic acid . Magnesium protoporphyrin chelatase is involved in chlorophyll biosynthesis .. +PF02492 CobW/HypB/UreG, nucleotide-binding domain
Pfam-B_428 (release 4.0) & Pfam-B_1247 (release 5.4). This domain is found in HypB, a hydrogenase expression / formation protein, and UreG a urease accessory protein. Both these proteins contain a P-loop nucleotide binding motif [2,3]. HypB has GTPase activity and is a guanine nucleotide binding protein . It is not known whether UreG binds GTP or some other nucleotide. Both enzymes are involved in nickel binding. HypB can store nickel and is required for nickel dependent hydrogenase expression . UreG is required for functional incorporation of the urease nickel metallocenter. GTP hydrolysis may required by these proteins for nickel incorporation into other nickel proteins . This family of domains also contains P47K (Swiss:P31521), a Pseudomonas chlororaphis protein needed for nitrile hydratase expression, and the cobW gene product (Swiss:P29937), which may be involved in cobalamin biosynthesis in Pseudomonas denitrificans .. +PF00135 Carboxylesterase family
+PF01484 Nematode cuticle collagen N-terminal domain
Pfam-B_200 (release 4.0). The function of this domain is unknown. It is found in the N-terminal region of nematode cuticle collagens, see Pfam:PF01391. Cuticle is a tough elastic structure secreted by hypodermal cells and is primarily composed of collagen proteins .. +PF01391 Collagen triple helix repeat (20 copies)
Members of this family belong to the collagen superfamily . Collagens are generally extracellular structural proteins involved in formation of connective tissue structure. The alignment contains 20 copies of the G-X-Y repeat that forms a triple helix. The first position of the repeat is glycine, the second and third positions can be any residue but are frequently proline and hydroxyproline. Collagens are post translationally modified by proline hydroxylase to form the hydroxyproline residues. Defective hydroxylation is the cause of scurvy. Some members of the collagen superfamily are not involved in connective tissue structure but share the same triple helical structure.. +PF03772 Competence protein
Members of this family are integral membrane proteins with 6 predicted transmembrane helices. Some members of this family have been shown to be essential for bacterial competence in uptake of extracellular DNA [1,4]. These proteins may transport DNA across the cell membrane. These proteins contain a highly conserved motif in the amino terminal transmembrane region that has two histidines that may form a metal binding site.. +PF05071 Complex1_17_2kD;
NADH ubiquinone oxidoreductase subunit NDUFA12. This family contains the 17.2 kD subunit of complex I (NDUFA12) and its homologues. The family also contains a second related eukaryotic protein of unknown function, e.g. Swiss:Q9BV02.. +PF00329 complex1_30Kd;
Respiratory-chain NADH dehydrogenase, 30 Kd subunit. +PF02950 Conotoxin
Pfam-B_529 (release 6.4). Conotoxins are small snail toxins that block ion channels.. +PF05019 Coenzyme Q (ubiquinone) biosynthesis protein Coq4
Pfam-B_14948 (release 7.6). Coq4p was shown to peripherally associate with the matrix face of the mitochondrial inner membrane. The putative mitochondrial- targeting sequence present at the amino-terminus of the polypeptide efficiently imported it to mitochondria. The function of Coq4p is unknown, although its presence is required to maintain a steady-state level of Coq7p, another component of the Q biosynthetic pathway . The overall structure of Coq4 is alpha helical and shows resemblance to haemoglobin/myoglobin (information from TOPSAN).. +PF03471 Transporter associated domain
This small domain is found in a family of proteins with the Pfam:PF01595 domain and two CBS domains with this domain found at the C-terminus of the proteins, the domain is also found at the C terminus of some Na+/H+ antiporters. This domain is also found in CorC that is involved in Magnesium and cobalt efflux. The function of this domain is uncertain but might be involved in modulating transport of ion substrates.. +PF02389 Cornifin (SPRR) family
Pfam-B_1215 (release 5.2). SPRR genes (formerly SPR) encode a novel class of polypeptides (small proline rich proteins) that are strongly induced during differentiation of human epidermal keratinocytes in vitro and in vivo. The most characteristic feature of the SPRR gene family resides in the structure of the central segments of the encoded polypeptides that are built up from tandemly repeated units of either eight (SPRR1 and SPRR3) or nine (SPRR2) amino acids with the general consensus XKXPEPXX where X is any amino acid . . +PF02628 Cytochrome oxidase assembly protein
This is a family of integral membrane proteins. CtaA is required for cytochrome aa3 oxidase assembly in Bacillus subtilis . COX15 is required for cytochrome c oxidase assembly in yeast (Swiss:P40086).. +PF02936 Cytochrome c oxidase subunit IV
Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit IV. The Dictyostelium member of this family is called COX VI Swiss:P26310. The yeast protein Swiss:P53077 appears to be the yeast COX IV subunit.. +PF03626 Prokaryotic Cytochrome C oxidase subunit IV
Pfam-B_3217 (release 7.0). Cytochrome c oxidase (COX) is a multi-subunit enzyme complex that catalyses the final step of electron transfer through the respiratory chain on the mitochondrial inner membrane. This family is composed of cytochrome c oxidase subunit 4 from prokaryotes.. +PF02937 Cytochrome c oxidase subunit VIc
Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIc.. +PF02935 Cytochrome c oxidase subunit VIIc
Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIIc. The yeast member of this family is called COX VIII Swiss:P04039.. +PF04516 CP2 transcription factor
Pfam-B_2156 (release 7.5). This family represents a conserved region in the CP2 transcription factor family.. +PF00118 cpn60_TCP1;
TCP-1/cpn60 chaperonin family. This family includes members from the HSP60 chaperone family and the TCP-1 (T-complex protein) family.. +PF02787 Carbamoyl-phosphate synthetase large chain, oligomerisation domain
Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. . +PF04969 CS domain
Pfam-B_1217 (release 7.0). The CS and CHORD (Pfam:PF04968) are fused into a single polypeptide chain in metazoans but are found in separate proteins in plants; this is thought to be indicative of an interaction between CS and CHORD . It has been suggested that the CS domain is a binding module for HSP90, implying that CS domain-containing proteins are involved in recruiting heat shock proteins to multiprotein assemblies . Two CS domains are found at the C-terminus of Ubiquitin carboxyl-terminal hydrolase 19 (USP19) (Swiss:O94966), these domains may play a role in the interaction of USP19 with cellular inhibitor of apoptosis 2 .. +PF00988 Carbamoyl-phosphate synthase small chain, CPSase domain
Pfam-B_345 (release 3.0). The carbamoyl-phosphate synthase domain is in the amino terminus of protein. Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines . The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00289. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117.. +PF03178 CPSF A subunit region
Pfam-B_1224 (release 6.5). This family includes a region that lies towards the C-terminus of the cleavage and polyadenylation specificity factor (CPSF) A (160 kDa) subunit. CPSF is involved in mRNA polyadenylation and binds the AAUAAA conserved sequence in pre-mRNA. CPSF has also been found to be necessary for splicing of single-intron pre-mRNAs . The function of the aligned region is unknown but may be involved in RNA/DNA binding.. +PF00313 'Cold-shock' DNA-binding domain
+PF04442 Cytochrome c oxidase assembly protein CtaG/Cox11
Cytochrome c oxidase assembly protein is essential for the assembly of functional cytochrome oxidase protein.\. In eukaryotes it is an integral protein of the mitochondrial inner membrane. Cox11 is essential for the insertion of Cu(I) ions to form the CuB site. This is essential for the stability of other structures in subunit I, for example haems a and a3, and the magnesium/manganese centre. Cox11 is probably only required in sub-stoichiometric amounts relative to the structural units . The C terminal region of the protein is known to form a dimer. Each monomer coordinates one Cu(I) ion via three conserved cysteine residues (111, 208 and 210) in Saccharomyces cerevisiae (Swiss:P19516). Met 224 is also thought to play a role in copper transfer or stabilising the copper site .. +PF01148 Cytidylyltrans;
Cytidylyltransferase family. Pfam-B_921 (release 3.0). The members of this family are integral membrane protein cytidylyltransferases. The family includes phosphatidate cytidylyltransferase EC:2.7.7.41 as well as Sec59 from yeast. Sec59 is a dolichol kinase EC:2.7.1.108.. +PF04145 Ctr copper transporter family
Pfam-B_3006 (release 7.3). The redox active metal copper is an essential cofactor in critical biological processes such as respiration, iron transport, oxidative stress protection, hormone production, and pigmentation. A widely conserved family of high-affinity copper transport proteins (Ctr proteins) mediates copper uptake at the plasma membrane. A series of clustered methionine residues in the hydrophilic extracellular domain, and an MXXXM motif in the second transmembrane domain, are important for copper uptake. These methionine probably coordinate copper during the process of metal transport.. +PF00394 Multicopper oxidase
Many of the proteins in this family contain multiple similar copies of this plastocyanin-like domain.. +PF03712 Copper type II ascorbate-dependent monooxygenase, C-terminal domain
The N and C-terminal domains of members of this family adopt the same PNGase F-like fold.. +PF01082 Copper type II ascorbate-dependent monooxygenase, N-terminal domain
The N and C-terminal domains of members of this family adopt the same PNGase F-like fold.. +PF02845 CUE domain
Alignment kindly provided by SMART. CUE domains have been shown to bind ubiquitin [3-4]. It has been suggested that CUE domains are related to Pfam:PF00627 and this has been confirmed by the structure of the domain . CUE domains also occur in two protein of the IL-1 signal transduction pathway, tollip and TAB2 .. +PF00190 Seedstore_11s;Cupin;
This family represents the conserved barrel domain of the 'cupin' superfamily ('cupa' is the Latin term for a small barrel). This family contains 11S and 7S plant seed storage proteins, and germins. Plant seed storage proteins provide the major nitrogen source for the developing plant.. +PF04889 Cwf15/Cwc15 cell cycle control protein
Pfam-B_6589 (release 7.6). This family represents Cwf15/Cwc15 (from Schizosaccharomyces pombe and Saccharomyces cerevisiae respectively) and their homologues. The function of these proteins is unknown, but they form part of the spliceosome and are thus thought to be involved in mRNA splicing . . +PF04677 CwfJ_N_1;
Protein similar to CwfJ C-terminus 1. This region is found in the N terminus of Schizosaccharomyces pombe protein CwfJ (Swiss:Q09909). CwfJ is part of the Cdc5p complex involved in mRNA splicing .. +PF04676 CwfJ_N_2;
Protein similar to CwfJ C-terminus 2. This region is found in the N terminus of Schizosaccharomyces pombe protein CwfJ (Swiss:Q09909). CwfJ is part of the Cdc5p complex involved in mRNA splicing .. +PF01705 CX module
This domain has no known function. It is found in several C. elegans proteins. The domain contains 6 conserved cysteines that probably form three disulphide bridges.. +PF04673 cyclase_polyket;
Polyketide synthesis cyclase. Pfam-B_5596 (release 7.5). This family represents a number of cyclases involved in polyketide synthesis in a number of actinobacterial species.. +PF00134 cyclin;
Cyclin, N-terminal domain. Cyclins regulate cyclin dependent kinases (CDKs). Swiss:P22674 is a Uracil-DNA glycosylase that is related to other cyclins . Cyclins contain two domains of similar all-alpha fold, of which this family corresponds with the N-terminal domain.. +PF02276 Photosynthetic reaction centre cytochrome C subunit
Pfam-B_5109 (release 5.2). Photosynthesis in purple bacteria is dependent on light-induced electron transfer in the reaction centre (RC), coupled to the uptake of protons from the cytoplasm. The RC contains a cytochrome molecule which re-reduces the oxidised electron donor.. +PF05038 cytochr_b558a;
Cytochrome Cytochrome b558 alpha-subunit. Pfam-B_5327 (release 7.7). Cytochrome b-245 light chain (p22-phox) is one of the key electron transfer elements of the NADPH oxidase in phagocytes .. +PF01820 Dala_Dala_ligas;
D-ala D-ala ligase N-terminus. This family represents the N-terminal region of the D-alanine--D-alanine ligase enzyme EC:6.3.2.4 which is thought to be involved in substrate binding . D-Alanine is one of the central molecules of the cross-linking step of peptidoglycan assembly. There are three enzymes involved in the D-alanine branch of peptidoglycan biosynthesis: the pyridoxal phosphate-dependent D-alanine racemase (Alr), the ATP-dependent D-alanine:D-alanine ligase (Ddl), and the ATP-dependent D-alanine:D-alanine-adding enzyme (MurF) .. +PF01113 DapB;
Dihydrodipicolinate reductase, N-terminus. Dihydrodipicolinate reductase (DapB) reduces the alpha,beta-unsaturated cyclic imine, dihydro-dipicolinate. This reaction is the second committed step in the biosynthesis of L-lysine and its precursor meso-diaminopimelate, which are critical for both protein and cell wall biosynthesis. The N-terminal domain of DapB binds the dinucleotide NADPH.. +PF01682 DB module
This domain has no known function. It is found in several C. elegans proteins. The domain contains 12 conserved cysteines that probably form six disulphide bridges. This domain is found associated with ig Pfam:PF00047 and fn3 Pfam:PF00041 domains, as well as in some lipases Pfam:PF00657.. +PF05011 Lariat debranching enzyme, C-terminal domain
Pfam-B_9676 (release 7.6). This presumed domain is found at the C-terminus of lariat debranching enzyme. This domain is always found in association with Pfam:PF00149.. +PF03107 DC1;
Pfam-B_16 (release 6.5). +PF00383 dCMP_cyt_deam;
Cytidine and deoxycytidylate deaminase zinc-binding region. +PF05026 Dcp2, box A domain
Pfam-B_10622 (release 7.6). This domain is always found to the amino terminal side of Pfam:PF00293. This domain is specific to mRNA decapping protein 2 and this region has been termed Box A . Removal of the cap structure is catalysed by the Dcp1-Dcp2 complex .. +PF03607 Doublecortin
+PF03455 dDENN domain
This region is always found associated with Pfam:PF02141. It is predicted to form a globular domain . This domain is predicted to be completely alpha helical. Although not statistically supported it has been suggested that this domain may be similar to members of the Rho/Rac/Cdc42 GEF family .. +PF02791 DDT domain
The DDT domain is named after (DNA binding homeobox and Different Transcription factors) and is approximately 60 residues in length . Along with the WHIM motifs, it comprises an entirely alpha helical module found in diverse eukaryotic chromatin proteins . Based on the structure of Ioc3, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins . The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes . In particular, the DDT domain, in combination with the WHIM1 and WHIM2 motifs form the SLIDE domain binding pocket .. +PF00270 DEAD/DEAH box helicase
Members of this family include the DEAD and DEAH box helicases. Helicases are involved in unwinding nucleic acids. The DEAD box helicases are involved in various aspects of RNA metabolism, including nuclear transcription, pre mRNA splicing, ribosome biogenesis, nucleocytoplasmic transport, translation, RNA decay and organellar gene expression.. +PF00531 death;
+PF04626 Dec-1 protein, C terminal region
The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing . Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). Alternative splicing generates different carboxyl terminal ends in different protein isoforms, so this is region is the most C terminal region that is present in the main isoforms.. +PF02141 DENN (AEX-3) domain
DENN (after differentially expressed in neoplastic vs normal cells) is a domain which occurs in several proteins involved in Rab- mediated processes or regulation of MAPK signalling pathways .. +PF00610 Domain found in Dishevelled, Egl-10, and Pleckstrin (DEP)
Ponting C, Schultz J, Bork P, Martemyanov K, Thorner J. The DEP domain is responsible for mediating intracellular protein targeting and regulation of protein stability in the cell [2-3]. The DEP domain is present in a number of signaling molecules, including Regulator of G protein Signaling (RGS) proteins, and has been implicated in membrane targeting [4-5]. New findings in yeast, however, demonstrate a major role for a DEP domain in mediating the interaction of an RGS protein to the C-terminal tail of a GPCR, thus placing RGS in close proximity with its substrate G protein alpha subunit [6-7].. +PF02272 DHHA1 domain
This domain is often found adjacent to the DHH domain Pfam:PF01368 and is called DHHA1 for DHH associated domain. This domain is diagnostic of DHH subfamily 1 members . This domains is also found in alanyl tRNA synthetase e.g. Swiss:P00957, suggesting that this domain may have an RNA binding function. The domain is about 60 residues long and contains a conserved GG motif.. +PF04922 DIE2/ALG10 family
Pfam-B_9570 (release 7.6). The ALG10 protein from Saccharomyces cerevisiae encodes the alpha-1,2 glucosyltransferase of the endoplasmic reticulum. This protein has been characterised in rat as potassium channel regulator 1 .. +PF01843 DIL domain
The DIL domain has no known function.. +PF03018 disease_resp;
Dirigent-like protein. Pfam-B_835 (release 6.4). This family contains a number of proteins which are induced during disease response in plants. Members of this family are involved in lignification.. +PF02377 Dishevelled specific domain
Pfam-B_1381 (release 5.2). This domain is specific to the signaling protein dishevelled. The domain is found adjacent to the PDZ domain Pfam:PF00595, often in conjunction with DEP (Pfam:PF00610) and DIX (Pfam:PF00778).. +PF02916 DNA polymerase processivity factor
+PF01965 ThiJ;
The family includes the protease PfpI Swiss:Q51732 . This domain is also found in transcriptional regulators such as Swiss:Q9RJG8. This N-terminal region of the full-length AdpA proteins is necessary for dimerisation of the molecule.. +PF00751 DM-domain;
DM DNA binding domain. The DM domain is named after dsx and mab-3 . dsx contains a single amino-terminal DM domain, whereas mab-3 contains two amino-terminal domains. The DM domain has a pattern of conserved zinc chelating residues C2H2C4 . The dsx DM domain has been shown to dimerise and bind palindromic DNA .. +PF01068 DNA_ligase;
ATP dependent DNA ligase domain. Pfam-B_788 (release 3.0). This domain belongs to a more diverse superfamily, including Pfam:PF01331 and Pfam:PF01653 .. +PF04679 ATP dependent DNA ligase C terminal region
This region is found in many but not all ATP-dependent DNA ligase enzymes (EC:6.5.1.1). It is thought to constitute part of the catalytic core of ATP dependent DNA ligase .. +PF04675 DNA ligase N terminus
This region is found in many but not all ATP-dependent DNA ligase enzymes (EC:6.5.1.1). It is thought to be involved in DNA binding and in catalysis. In human DNA ligase I (Swiss:P18858), and in Saccharomyces cerevisiae (Swiss:P04819), this region was necessary for catalysis, and separated from the amino terminus by targeting elements. In vaccinia virus (Swiss:P16272) this region was not essential for catalysis, but deletion decreases the affinity for nicked DNA and decreased the rate of strand joining at a step subsequent to enzyme-adenylate formation .. +PF00875 DNA photolyase
Pfam-B_777 (release 3.0). This domain binds a light harvesting cofactor.. +PF00136 DNA polymerase family B
This region of DNA polymerase B appears to consist of more than one structural domain, possibly including elongation, DNA-binding and dNTP binding activities.. +PF03104 DNA_pol_B_exo;
DNA polymerase family B, exonuclease domain. This domain has 3' to 5' exonuclease activity and adopts a ribonuclease H type fold.. +PF04081 DNA polymerase delta, subunit 4
Pfam-B_25322 (release 7.3);. +PF00772 DnaB-like helicase N terminal domain
Pfam-B_1000 (release 2.1). The hexameric helicase DnaB unwinds the DNA duplex at the Escherichia coli chromosome replication fork. Although the mechanism by which DnaB both couples ATP hydrolysis to translocation along DNA and denatures the duplex is unknown, a change in the quaternary structure of the protein involving dimerisation of the N-terminal domain has been observed and may occur during the enzymatic cycle. This N-terminal domain is required both for interaction with other proteins in the primosome and for DnaB helicase activity .. +PF03796 DnaB-like helicase C terminal domain
Pfam-B_1000 (release 2.1). The hexameric helicase DnaB unwinds the DNA duplex at the Escherichia coli chromosome replication fork. Although the mechanism by which DnaB both couples ATP hydrolysis to translocation along DNA and denatures the duplex is unknown, a change in the quaternary structure of the protein involving dimerisation of the N-terminal domain has been observed and may occur during the enzymatic cycle. This C-terminal domain contains an ATP-binding site and is therefore probably the site of ATP hydrolysis.. +PF00226 DnaJ domain
DnaJ domains (J-domains) are associated with hsp70 heat-shock system and it is thought that this domain mediates the interaction. DnaJ-domain is therefore part of a chaperone (protein folding) system. The T-antigens, although not in Prosite are confirmed as DnaJ containing domains from literature .. +PF03351 DOMON domain
Aravind L, Coggill P. The DOMON (named after dopamine beta-monooxygenase N-terminal) domain is 110-125 residues long. It is predicted to form an all beta fold with up to 11 strands and is secreted to the extracellular compartment. The beta-strand folding produces a hydrophobic pocket which appears to bind soluble haem. This is consistent with the predominant architectures where the protein is associated with cytochromes or enzymatic domains whose activity involves redox or electron transfer reactions potentially as a direct participant in the electron transfer process. The DOMON domain superfamily, of which this is just one member, shows (1) multiple hydrophobic residues that contribute to the hydrophobic core of the strands of the beta-sandwich, and small residues found at the boundaries of strands and loops, (2) a strongly conserved charged residue (usually arginine/lysine) at the end of strand 9, which possibly stabilises the loop between 9 and 10, and (3) a polar residue (usually histidine, lysine or arginine), that interacts or coordinates with ligands . The suggested superfamily includes both haem- and sugar-binding members: the haem-binding families being the ethyl-Benzoate dehydrogenase family EB_dh, Pfam:PF09459, the cellobiose dehydrogenase family CBDH and this family, and the sugar-binding families being the xylanases, CBM_4_9, Pfam:PF02018. The common feature of the superfamily is the 11-beta-strand structure, although the first and eleventh strands are not well conserved either within families or between families.. +PF04124 Dor1-like family
Pfam-B_12640 (release 7.3);. Dor1 is involved in vesicle targeting to the yeast Golgi apparatus and complexes with a number of other trafficking proteins, which include Sec34 and Sec35 .. +PF04173 TQO small subunit DoxD
Swiss:P97207 is a subunit of the terminal quinol oxidase present in the plasma membrane of Acidianus ambivalens, with calculated molecular mass of 20.4 kDa . Thiosulphate:quinone oxidoreductase (TQO) is one of the early steps in elemental sulphur oxidation. A novel TQO enzyme was purified from the thermo-acidophilic archaeon Acidianus ambivalens and shown to consist of a large subunit (DoxD) and a smaller subunit (DoxA). The DoxD- and DoxA-like two subunits are fused together in a single polypeptide in Swiss:Q8AAF0.. +PF00930 DPPIV_N_term;
Dipeptidyl peptidase IV (DPP IV) N-terminal region. Pfam-B_1017 (release 3.0). This family is an alignment of the region to the N-terminal side of the active site. The Prosite motif does not correspond to this Pfam entry.. +PF05186 Dpy-30 motif
Pfam-B_13490 (release 7.7). +PF01414 Delta serrate ligand
Ponting CP, Schultz J, Bork P. +PF01666 DX module
This domain has no known function. It is found in several C. elegans proteins. The domain contains 6 conserved cysteines that probably form three disulphide bridges.. +PF00782 Dual specificity phosphatase, catalytic domain
Alignment kindly provided by SMART. Ser/Thr and Tyr protein phosphatases. The enzyme's tertiary fold is highly similar to that of tyrosine-specific phosphatases, except for a "recognition" region .. +PF00035 Double-stranded RNA binding motif
Sequences gathered for seed by HMM_iterative_training Putative motif shared by proteins that bind to dsRNA. At least some DSRM proteins seem to bind to specific RNA targets. Exemplified by Staufen, which is involved in localisation of at least five different mRNAs in the early Drosophila embryo. Also by interferon-induced protein kinase in humans, which is part of the cellular response to dsRNA.. +PF01951 DUF101;
Archease protein family (MTH1598/TM1083). This archease family of proteins , has two SHS2 domains , with one inserted into another. It is predicted to be an enzyme . It is predicted to act as a chaperone in DNA/RNA metabolism . . +PF02575 DUF149;
YbaB/EbfC DNA-binding family. This is a family of DNA-binding proteins. Members of this family form homodimers which bind DNA via a tweezer-like structure [1-3]. The conformation of the DNA is changed when bound to these proteins . In bacteria, these proteins may play a role in DNA replication-recovery following DNA damage .. +PF02580 DUF154;
D-Tyr-tRNA(Tyr) deacylase. This family comprises of several D-Tyr-tRNA(Tyr) deacylase proteins. Cell growth inhibition by several d-amino acids can be explained by an in vivo production of d-aminoacyl-tRNA molecules. Escherichia coli and yeast cells express an enzyme, d-Tyr-tRNA(Tyr) deacylase, capable of recycling such d-aminoacyl-tRNA molecules into free tRNA and d-amino acid. Accordingly, upon inactivation of the genes of the above deacylases, the toxicity of d-amino acids increases. Orthologues of the deacylase are found in many cells .. +PF02583 DUF156;
Metal-sensitive transcriptional repressor. This is a family of metal-sensitive repressors, involved in resistance to metal ions. Members of this family bind copper, nickel or cobalt ions via conserved cysteine and histidine residues. In the absence of metal ions, these proteins bind to promoter regions and repress transcription. When bound to metal ions they are unable to bind DNA, leading to transcriptional derepression [1-5].. +PF02588 Uncharacterized BCR, YitT family COG1284
This is probably a bacterial ABC transporter permease (personal obs:Yeats C).. +PF02639 Uncharacterized BCR, YaiI/YqxD family COG1671
+PF02641 Uncharacterized ACR, COG1993
+PF02958 DUF227;
Pfam-B_2081 (release 6.4). This family includes ecdysteroid 22-kinase, an enzyme responsible for the phosphorylation of ecdysteroids (insect growth and moulting hormones) at C-22, to form physiologically inactive ecdysteroid 22-phosphates .. +PF02995 Protein of unknown function (DUF229)
Pfam-B_1566 (release 6.4). Members of this family are uncharacterised. They are 500-1200 amino acids in length and share a long region conservation that probably corresponds to several domains. The Go annotation for the protein indicates that it is involved in nematode larval development and has a positive regulation on growth rate.. +PF01697 DUF23;
Glycosyltransferase family 92. Pfam-B_1694 (release 4.1). Members of this family act as galactosyltransferases, belonging to glycosyltransferase family 92 [1,2]. The aligned region contains several conserved cysteine residues and several charged residues that may be catalytic residues. This is supported by the inclusion of this family in the GT-A glycosyl transferase superfamily.. +PF02996 DUF232;
Pfam-B_1664 (release 6.4). This family comprises of several prefoldin subunits. The biogenesis of the cytoskeletal proteins actin and tubulin involves interaction of nascent chains of each of the two proteins with the oligomeric protein prefoldin (PFD) and their subsequent transfer to the cytosolic chaperonin CCT (chaperonin containing TCP-1). Electron microscopy shows that eukaryotic PFD, which has a similar structure to its archaeal counterpart, interacts with unfolded actin along the tips of its projecting arms. In its PFD-bound state, actin seems to acquire a conformation similar to that adopted when it is bound to CCT .. +PF03080 DUF239; Glucoamylase;
Domain of unknown function (DUF239). Pfam-B_913 (release 6.4). This is a family of plant and bacterial proteins, a small number of which are putative carboxy-terminal peptidases (see for example Swiss:Q9XIN9).. +PF03087 Arabidopsis protein of unknown function
Pfam-B_1563 (release 6.5). This family represents a number of Arabidopsis proteins. Their functions are unknown.. +PF03103 Domain of unknown function (DUF243)
Pfam-B_1157 (release 6.5). This family of uncharacterised proteins is only found in fly proteins. It is found associated with YLP motifs Pfam:PF02757 in some proteins.. +PF03140 Plant protein of unknown function
Pfam-B_1292 (release 6.5). The function of the plant proteins constituting this family is unknown.. +PF03141 DUF248;
Putative S-adenosyl-L-methionine-dependent methyltransferase. Mifsud W, Moxon SJ, Eberhardt R. Pfam-B_1462 (release 6.5). This family is a putative S-adenosyl-L-methionine (SAM)-dependent methyltransferase [1,2].. +PF03164 DUF254;
Trafficking protein Mon1. Members of this family have been called SAND proteins although these proteins do not contain a SAND domain. In Saccharomyces cerevisiae a protein complex of Mon1 and Ccz1 functions with the small GTPase Ypt7 to mediate vesicle trafficking to the vacuole . The Mon1/Ccz1 complex is conserved in eukaryotic evolution and members of this family (previously known as DUF254) are distant homologues to domains of known structure that assemble into cargo vesicle adapter (AP) complexes . describes orthologues in Fugu rubripes.. +PF03194 DUF259;
Pfam-B_2902 (release 6.5). This family contains the N terminal region of several LUC7 protein homologues and only contains eukaryotic proteins. LUC7 has been shown to be a U1 snRNA associated protein with a role in splice site recognition . The family also contains human and mouse LUC7 like (LUC7L) proteins and human cisplatin resistance-associated overexpressed protein (CROP) .. +PF01657 DUF26;
Salt stress response/antifungal. Pfam-B_980 (release 4.1). This domain is often found in association with the kinase domains Pfam:PF00069 or Pfam:PF07714. In many proteins it is duplicated. It contains six conserved cysteines which are involved in disulphide bridges . It has a role in salt stress response and has antifungal activity .. +PF03195 Protein of unknown function DUF260
Pfam-B_2998 (release 6.5). +PF03268 Caenorhabditis protein of unknown function, DUF267
Pfam-B_4201 (release 6.5). +PF03269 Caenorhabditis protein of unknown function, DUF268
Pfam-B_4252 (release 6.5). +PF03312 Protein of unknown function (DUF272)
Mifsud W, Pollington J. Pfam-B_3609 (release 6.5). This family of proteins is restricted to C.elegans and has no known function. The protein contains a ubiquitin fold. The GO annotation for the protein indicates that it has a function in nematode larval development.. +PF03409 DUF274; Glycoprotein_Ce;
Transmembrane glycoprotein. Pfam-B_4416 (release 6.6). This family of proteins has some GO annotations for positive regulation of growth rate and nematode larval development. This is probably a family of membrane glycoproteins .. +PF03357 DUF279; SNF7; ESCRT-III;
Mifsud W, Moxon SJ, Mistry J, Wood V. Pfam-B_1641 (release 6.6). This family of proteins are involved in protein sorting and transport from the endosome to the vacuole/lysosome in eukaryotic cells. Vacuoles/lysosomes play an important role in the degradation of both lipids and cellular proteins. In order to perform this degradative function, vacuoles/lysosomes contain numerous hydrolases which have been transported in the form of inactive precursors via the biosynthetic pathway and are proteolytically activated upon delivery to the vacuole/lysosome. The delivery of transmembrane proteins, such as activated cell surface receptors to the lumen of the vacuole/lysosome, either for degradation/downregulation, or in the case of hydrolases, for proper localisation, requires the formation of multivesicular bodies (MVBs). These late endosomal structures are formed by invaginating and budding of the limiting membrane into the lumen of the compartment. During this process, a subset of the endosomal membrane proteins is sorted into the forming vesicles. Mature MVBs fuse with the vacuole/lysosome, thereby releasing cargo containing vesicles into its hydrolytic lumen for degradation. Endosomal proteins that are not sorted into the intralumenal MVB vesicles are either recycled back to the plasma membrane or Golgi complex, or remain in the limiting membrane of the MVB and are thereby transported to the limiting membrane of the vacuole/lysosome as a consequence of fusion. Therefore, the MVB sorting pathway plays a critical role in the decision between recycling and degradation of membrane proteins . A few archaeal sequences are also present within this family.. +PF03380 Caenorhabditis protein of unknown function, DUF282
Pfam-B_2840 (release 6.6). +PF03368 DUF283; dsRNA_bind;
Dicer dimerisation domain. +PF03382 Mycoplasma protein of unknown function, DUF285
Pfam-B_2864 (release 6.6). This region appears distantly related to leucine rich repeats.. +PF01060 Worm_family_2;
Transthyretin-like family. This family called family 2 in , has weak similarity to transthyretin (formerly called pre-albumin) which transports thyroid hormones.\. The specific function of this protein is unknown.. +PF03478 Protein of unknown function (DUF295)
Pfam-B_790 (release 7.0). This family of proteins are found in plants. The function of the proteins is unknown.. +PF03556 DUF298;
Pfam-B_3021 (release 7.0). This domain binds to cullins and to Rbx-1, components of an E3 ubiquitin ligase complex for neddylation [1-3]. Neddylation is the process by which the C-terminal glycine of the ubiquitin-like protein Nedd8 is covalently linked to lysine residues in a protein through an isopeptide bond. The structure of this domain is composed entirely of alpha helices [1,2].. +PF03619 DUF300;
Organic solute transporter Ostalpha. Pfam-B_3382 (release 7.0). This family is a transmembrane organic solute transport protein. In vertebrates these proteins form a complex with Ostbeta, and function as bile transporters . In plants they may transport brassinosteroid-like compounds and act as regulators of cell death .. +PF03703 DUF304;
Domain found in uncharacterised family of membrane proteins. 1-3 copies found in each protein, with each copy flanked by transmembrane helices. Members of this family have a PH domain like structure .. +PF03713 Domain of unknown function (DUF305)
Domain found in small family of bacterial secreted proteins with no known function. Also found in Paramecium bursaria chlorella virus 1. This domain is short and found in one or two copies. The domain has a conserved HH motif that may be functionally important. This domain belongs to the ferritin superfamily. It contains two sequence similar repeats each of which is composed of two alpha helices.. +PF03754 Domain of unknown function (DUF313)
Pfam-B_2540 (release 7.0). Family of proteins from Arabidopsis thaliana with uncharacterised function.. +PF03761 Domain of unknown function (DUF316)
Pfam-B_2972 (release 7.0). This family of proteins with unknown function are from Caenorhabditis elegans. The protein has GO references indicating the protein is a positive regulator of growth rate and is also involved in nematode larval development.. +PF03777 Small secreted domain (DUF320)
Small domain found in a family of secreted streptomyces proteins. It occurs singly or as a pair. Many of the domains have two cysteines that may form a disulphide bridge.. +PF03860 Domain of Unknown Function (DUF326)
This family is a small cysteine-rich repeat. The cysteines mostly follow a C-X(2)-C-X(3)-C-X(2)-C-X(3) pattern, though they often appear at other positions in the repeat as well.. +PF03935 DUF338;
Beta-glucan synthesis-associated protein (SKN1). This family consists of the beta-glucan synthesis-associated proteins KRE6 and SKN1. Beta1,6-Glucan is a key component of the yeast cell wall, interconnecting cell wall proteins, beta1,3-glucan, and chitin. It has been postulated that the synthesis of beta1,6-glucan begins in the endoplasmic reticulum with the formation of protein-bound primer structures and that these primer structures are extended in the Golgi complex by two putative glucosyltransferases that are functionally redundant, Kre6 and Skn1. This is followed by maturation steps at the cell surface and by coupling to other cell wall macromolecules . . +PF03990 Domain of unknown function (DUF348)
This domain normally occurs as tandem repeats; however it is found as a single copy in the S. cerevisiae DNA-binding nuclear protein YCR593 (Swiss:P25357). This protein is involved in sporulation part of the SET3C complex, which is required to repress early/middle sporulation genes during meiosis ( ). The bacterial proteins are likely to be involved in a cell wall function as they are found in conjunction with the Pfam:PF07501 domain, which is involved in various cell surface processes.. +PF03993 Domain of Unknown Function (DUF349)
This domain is found singly or as up to five tandem repeats in a small set of bacterial proteins. There are two or three alpha-helices, and possibly a beta-strand.. +PF04008 DUF355;
Adenosine specific kinase. The structure of a member of this family from the hyperthermophilic archaeon Pyrobaculum aerophilum contains a modified histidine residue which is interpreted as stable phosphorylation . In vitro binding studies confirmed that adenosine and AMP but not ADP or ATP bind to the protein .. +PF04013 DUF358;
Putative SAM-dependent RNA methyltransferase. This family is likely to be an S-adenosyl-L-methionine (SAM)-dependent RNA methyltransferase .. +PF04020 Membrane protein of unknown function
These proteins a predicted transmembrane proteins with probably four transmembrane spans. The function of these bacterial proteins is unknown. The sequences do not appear to contain any conserved polar residues that could form an active site.. +PF04037 Domain of unknown function (DUF382)
Pfam-B_10232 (release 7.3);. This domain is specific to the human splicing factor 3b subunit 2 and it's orthologues. Splicing factor 3b subunit 2 or SAP145 is a suppressor of U2 snRNA mutations. Pre-mRNA splicing is catalysed by a large ribonucleoprotein complex called the spliceosome. Spliceosomes are multi-component enzymes that catalyse pre-mRNA splicing and form step-wise by the ordered interaction of UsnRNPs and non-snRNP proteins with short conserved regions of the pre-mRNA at the 5' and 3' splice sites and branch site .. +PF04075 Domain of unknown function (DUF385)
TIGRFAMs (release 2.0);. Family of Mycobacterium tuberculosis proteins.. +PF04087 Domain of unknown function (DUF389)
TIGRFAMs (release 2.0);. Family of hypothetical bacterial proteins with an undetermined function.. +PF04155 DUF398;
Pfam-B_3296 (release 7.3). This family consists of the ground-like domain and is specific to C.elegans. It has been proposed that the ground-like domain containing proteins may bind and modulate the activity of Patched-like membrane molecules, reminiscent of the modulating activities of neuropeptides . . +PF04241 Protein of unknown function (DUF423)
This family of proteins with unknown function is a possible integral membrane protein from Caenorhabditis elegans. This family of proteins has GO references indicating the protein is involved in nematode larval development and is a positive regulator of growth rate.. +PF04255 Protein of unknown function (DUF433)
+PF01862 DUF44;
Pyruvoyl-dependent arginine decarboxylase (PvlArgDC). Methanococcus jannaschii contains homologues of most genes required for spermidine polyamine biosynthesis. Yet genomes from neither this organism nor any other euryarchaeon have orthologues of the pyridoxal 5'-phosphate- dependent ornithine or arginine decarboxylase genes, required to produce putrescine. Instead,these organisms have a new class of arginine decarboxylase (PvlArgDC) formed by the self-cleavage of a proenzyme into a 5-kDa subunit and a 12-kDa subunit that contains a reactive pyruvoyl group. Although this extremely thermostable enzyme has no significant sequence similarity to previously characterised proteins, conserved active site residues are similar to those of the pyruvoyl-dependent histidine decarboxylase enzyme, and its subunits form a similar (alpha-beta)(3) complex. Homologues of PvlArgDC are found in several bacterial genomes, including those of Chlamydia spp., which have no agmatine ureohydrolase enzyme to convert agmatine (decarboxylated arginine) into putrescine. In these intracellular pathogens, PvlArgDC may function analogously to pyruvoyl-dependent histidine decarboxylase; the cells are proposed to import arginine and export agmatine, increasing the pH and affecting the host cell's metabolism. Phylogenetic analysis of Pvl- ArgDC proteins suggests that this gene has been recruited from the euryarchaeal polyamine biosynthetic pathway to function as a degradative enzyme in bacteria .. +PF04332 Protein of unknown function (DUF475)
Predicted to be an integral membrane protein with multiple membrane spans.. +PF04720 Protein of unknown function (DUF506)
Pfam-B_4111 (release 7.5). Family of uncharacterised plant proteins.. +PF04784 Protein of unknown function, DUF547
Pfam-B_3926 (release 7.5). Family of uncharacterised proteins from C. elegans and A. thaliana. . +PF04484 Family of unknown function (DUF566)
Pfam-B_3992 (release 7.5). Family of related proteins that is plant specific.. +PF04526 Protein of unknown function (DUF568)
Pfam-B_4977 (release 7.5). Family of uncharacterised plant proteins.. +PF04502 Family of unknown function (DUF572)
Pfam-B_3967 (release 7.5). Family of eukaryotic proteins with undetermined function.. +PF04504 Protein of unknown function, DUF573
Pfam-B_2087 (release 7.5). +PF04535 Domain of unknown function (DUF588)
Pfam-B_1439 (release 7.5). This family of plant proteins contains a domain that may have a catalytic activity. It has a conserved arginine and aspartate that could form an active site. These proteins are predicted to contain 3 or 4 transmembrane helices.. +PF01883 Domain of unknown function DUF59
This family has an alpha/beta topology, with 13 conserved hydrophobic residues at its core and a putative active site containing a highly conserved cysteine . Members of this family are involved in a range of physiological functions. The family includes PaaJ (PhaH) Swiss:O84984 from Pseudomonas putida. PaaJ forms a complex with PaaG (PhaF) Swiss:O84982, PaaI (PhaG) Swiss:O84983 and PaaK (PhaI) Swiss:O84985, which hydroxylates phenylacetic acid to 2-hydroxyphenylacetic acid . It also includes PaaD Swiss:P76080 from Escherichia coli, a member of a multicomponent oxygenase involved in phenylacetyl-CoA hydroxylation . It is found near the N-terminus of the chloroplast scaffold protein HCF101 Swiss:Q8LD16, involved in the assembly of [4Fe-4S] clusters and their transfer to apoproteins .. +PF04547 DUF590;
Calcium-activated chloride channel. Pfam-B_2735 (release 7.5). The family carries eight putative transmembrane domains, and, although it has no similarity to other known channel proteins, it is clearly a calcium-activated ionic channel. It is expressed in various secretory epithelia, the retina and sensory neurons, and mediates receptor-activated chloride currents in diverse physiological processes .. +PF04576 DUF593;
Mifsud W, Eberhardt R. Pfam-B_2802 (release 7.5). This domain binds to zein proteins, Pfam:PF01559 . Zein proteins are seed storage proteins.. +PF04642 Protein of unknown function, DUF601
Pfam-B_5475 (release 7.5). This family represents a conserved region found in several uncharacterised plant proteins.. +PF04641 DUF602;
Mifsud W, Pollington J. Pfam-B_5482 (release 7.5). It is vital for effective cell-replication that replication is not stalled at any point by, for instance, damaged bases. Replication termination factor 2 (Rtf2) stabilizes the replication fork stalled at the site-specific replication barrier RTS1 by preventing replication restart until completion of DNA synthesis by a converging replication fork initiated at a flanking origin. The RTS1 element terminates replication forks that are moving in the cen2-distal direction while allowing forks moving in the cen2-proximal direction to pass through the region. Rtf2 contains a C2HC2 motif related to the C3HC4 RING-finger motif, and would appear to fold up, creating a RING finger-like structure but forming only one functional Zn2+ ion-binding site . This domain is also found at the N-terminus of peptidyl-prolyl cis-trans isomerase 4, a divergent cyclophilin family .. +PF04652 Vta1 like
Mifsud W, Mistry J, Wood V. Pfam-B_5537 (release 7.5). Vta1 (VPS20-associated protein 1) is a positive regulator of Vps4. Vps4 is an ATPase that is required in the multivesicular body (MVB) sorting pathway to dissociate the endosomal sorting complex required for transport (ESCRT). Vta1 promotes correct assembly of Vps4 and stimulates its ATPase activity through its conserved Vta1/SBP1/LIP5 region .. +PF04678 Protein of unknown function, DUF607
Pfam-B_5620 (release 7.5). This family represents a conserved region found in several uncharacterised eukaryotic proteins.. +PF04685 Protein of unknown function, DUF608
Pfam-B_5657 (release 7.5). This family represents a conserved region with a pankaryotic distribution in a number of uncharacterised proteins.. +PF04727 DUF609;
Pfam-B_3095 (release 7.5). This family represents a conserved domain which is found in a number of eukaryotic proteins including CED-12, ELMO I and ELMO II. ELMO1 is a component of signalling pathways that regulate phagocytosis and cell migration and is the mammalian orthologue of the C. elegans gene, ced-12. CED-12 is required for the engulfment of dying cells and cell migration. In mammalian cells, ELMO1 interacts with Dock180 as part of the CrkII/Dock180/Rac pathway responsible for phagocytosis and cell migration. ELMO1 is ubiquitously expressed, although its expression is highest in the spleen, an organ rich in immune cells . ELMO1 has a PH domain and a polyproline sequence motif at its C terminus which are not present in this alignment.. +PF04747 Protein of unknown function, DUF612
Pfam-B_3614 (release 7.5). This family includes several uncharacterised proteins from Caenorhabditis elegans.. +PF04749 DUF614;
Pfam-B_3635 (release 7.5). This family includes Swiss:Q9NZF1, the Placenta-specific gene 8 protein.. +PF04759 Protein of unknown function, DUF617
Pfam-B_3842 (release 7.5). This family represents a conserved region in a number of uncharacterised plant proteins.. +PF04818 DUF618;
RNA polymerase II-binding domain.. Mifsud W, Eberhardt R. Pfam-B_3687 (release 7.6). This domain binds to the phosphorylated C-terminal domain (CTD) of RNA polymerase II [1,2].. +PF04789 Protein of unknown function (DUF621)
Pfam-B_6219 (release 7.5). Family of uncharacterised proteins. Some (such as Swiss:O01625) are annotated as having possible G-protein-coupled receptor-like activity.. +PF04802 DUF625; SMK-1_Ce;
Component of IIS longevity pathway SMK-1. Pfam-B_6319 (release 7.5). SMK-1 is a component of the IIs longevity pathway which regulates aging in C.elegans. Specifically, SMK-1 influences DAF-16-dependant regulation of the aging process by regulating the transcriptional specificity of DAF-16 activity . SMK-1 plays a role in longevity by modulating the transcriptional specificity of DAF-16 .. +PF04783 Protein of unknown function (DUF630)
Pfam-B_2481 (release 7.6). This region is sometimes found at the N-terminus of putative plant bZIP proteins. Its function is not known. Structural modelling suggests this domain may bind nucleic acids .. +PF01683 EB module
This domain has no known function. It is found in several C. elegans proteins. The domain contains 8 conserved cysteines that probably form four disulphide bridges. This domain is found associated with kunitz domains Pfam:PF00014.. +PF04782 Protein of unknown function (DUF632)
Pfam-B_2481 (release 7.6). This plant protein may be a leucine zipper, but there is no experimental evidence for this.. +PF04826 DUF634;
Pfam-B_2700 (release 7.6). This domain contains armadillo-like repeats . Proteins containing this domain interact with numerous other proteins, through these interactions they are involved in a wide variety of processes including carcinogenesis , control of cellular ageing and survival , regulation of circadian rhythm and lysosomal sorting of G protein-coupled receptors .. +PF04859 Plant protein of unknown function (DUF641)
Pfam-B_6069 (release 7.6). Plant protein of unknown function.. +PF04884 Vitamin B6 photo-protection and homoeostasis
Mifsud W, Eberhardt R. Pfam-B_6523 (release 7.6). In plants, this domain plays a role in auxin-transport, plant growth and development [1,2] and appears to be expressed by all cells in the plant as well as in plastids. The family has been shown to play a role in vitamin B6 photo-protection and homoeostasis in plants .. +PF04900 DUF652;
Mifsud W, Wood V, Mistry J. Pfam-B_6634 (release 7.6). Fcf1 is a nucleolar protein involved in pre-rRNA processing . Depletion of yeast Fcf1 and Fcf2 leads to a decrease in synthesis of the 18S rRNA and results in a deficit in 40S ribosomal subunits .. +PF04949 DUF662;
Transcriptional activator. Pfam-B_6952 (release 7.6). This family of proteins may act as a transcriptional activator. It plays a role in stress response in plants .. +PF04950 Protein of unknown function (DUF663)
Pfam-B_5147 (release 7.6). This family contains several uncharacterised eukaryotic proteins.. +PF05057 Putative serine esterase (DUF676)
Pfam-B_5941 (release 7.7). This family of proteins are probably serine esterase type enzymes with an alpha/beta hydrolase fold.. +PF05097 Protein of unknown function (DUF688)
Pfam-B_6331 (release 7.7). This family contains several uncharacterised proteins found in Arabidopsis thaliana.. +PF05108 Protein of unknown function (DUF690)
Pfam-B_6322 (release 7.7). This family contains several uncharacterised bacterial membrane proteins.. +PF05127 DUF699;
Pfam-B_884 (release 7.7). This domain contains a P-loop (Walker A) motif, suggesting that it has ATPase activity, and a Walker B motif. In tRNA(Met) cytidine acetyltransferase (TmcA) it may function as an RNA helicase motor (driven by ATP hydrolysis) which delivers the wobble base to the active centre of the GCN5-related N-acetyltransferase (GNAT) domain . It is found in the bacterial exodeoxyribonuclease V alpha chain (RecD), which has 5'-3' helicase activity. It is structurally similar to the motor domain 1A in other SF1 helicases .. +PF05129 DUF701;
Transcription elongation factor Elf1 like. Pfam-B_8884 (release 7.7). This family of short proteins contains a putative zinc binding domain with four conserved cysteines. Swiss:P36053 has been identified as a transcription elongation factor in Saccharomyces cerevisiae .. +PF05197 DUF714;
Pfam-B_9855 (release 7.7). TRIC (trimeric intracellular cation) channels are differentially expressed in intracellular stores in animal cell types. TRIC subtypes contain three proposed transmembrane segments, and form homo-trimers with a bullet-like structure. Electrophysiological measurements with purified TRIC preparations identify a monovalent cation-selective channel .. +PF01031 dynamin_2;
Dynamin central region. Pfam-B_220 (release 3.0). This region lies between the GTPase domain, see Pfam:PF00350, and the pleckstrin homology (PH) domain, see Pfam:PF00169.. +PF03028 Dynein heavy chain and region D6 of dynein motor
Pfam-B_928 (release 6.4). This family represents the C-terminal region of dynein heavy chain. The chain also contains ATPase activity and microtubule binding ability and acts as a motor for the movement of organelles and vesicles along microtubules. Dynein is also involved in cilia and flagella movement. The dynein subunit consists of at least two heavy chains and a number of intermediate and light chains . The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This C-terminal domain carries the D6 region of the dynein motor where the P-loop has been lost in evolution but the general structure of a potential ATP binding site appears to be retained .. +PF04261 Dyp_perox_fam;
Dyp-type peroxidase family . TIGRFAMs (release 2.0);. This family of dye-decolourising peroxidases lack a typical heme-binding region.. +PF02221 ML domain
ML domain - MD-2-related lipid recognition domain. This family consists of proteins from plants, animals and fungi, including dust mite allergen Der P 2 (Swiss:P49278). It has been implicate in lipid recognition, particularly in the recognition of pathogen related products. A mutation in Npc2 (Swiss:Q15668) causes a rare form of Niemann-Pick type C2 disease. This domain has a similar topology to immunoglobulin domains.. +PF03271 EB1-like C-terminal motif
Pfam-B_1529 (release 6.5). This motif is found at the C-terminus of proteins that are related to the EB1 protein. The EB1 proteins contain an N-terminal CH domain Pfam:PF00307. The human EB1 protein was originally discovered as a protein interacting with the C-terminus of the APC protein. This interaction is often disrupted in colon cancer, due to deletions affecting the APC C-terminus. Several EB1 orthologues are also included in this family. The interaction between EB1 and APC has been shown to have a potent synergistic effect on microtubule polymerisation. Neither of EB1 or APC alone has this effect. It is thought that EB1 targets APC to the + ends of microtubules, where APC promotes microtubule polymerisation. This process is regulated by APC phosphorylation by Cdc2, which disrupts APC-EB1 binding. Human EB1 protein can functionally substitute for the yeast EB1 homologue Mal3. In addition, Mal3 can substitute for human EB1 in promoting microtubule polymerisation with APC.. +PF05009 EBNA-3;
Epstein-Barr virus nuclear antigen 3 (EBNA-3). Pfam-B_4674 (release 7.6). This family contains EBNA-3A, -3B, and -3C which are latent infection nuclear proteins important for Epstein-Barr virus (EBV)-induced B-cell immortalisation and the immune response to EBV infection .. +PF00679 Elongation factor G C-terminus
Pfam-B_40 (release 2.1). This domain includes the carboxyl terminal regions of Elongation factor G, elongation factor 2 and some tetracycline resistance proteins and adopt a ferredoxin-like fold.. +PF03764 Elongation factor G, domain IV
Pfam-B_40 (release 2.1). This domain is found in elongation factor G, elongation factor 2 and some tetracycline resistance proteins and adopts a ribosomal protein S5 domain 2-like fold.. +PF00036 efhand; EF_hand_1;
The EF-hands can be divided into two classes: signaling proteins and buffering/transport proteins. The first group is the largest and includes the most well-known members of the family such as calmodulin, troponin C and S100B. These proteins typically undergo a calcium-dependent conformational change which opens a target binding site. The latter group is represented by calbindin D9k and do not undergo calcium dependent conformational changes.. +PF04189 eIF3_gamma;
Pfam-B_8933 (release 7.3);. eIF-3 is a multi-subunit complex that stimulates translation initiation in vitro at several different steps. This family corresponds to the gamma subunit if eIF3 [1,2]. The Yeast protein Gcd10p has also been shown to be part of a complex with the methyltransferase Gcd14p that is involved in modifying tRNA .. +PF03610 PTS system fructose IIA component
TIGRFAMs, Griffiths-Jones SR. +PF01448 ELM2 domain
The ELM2 (Egl-27 and MTA1 homology 2) domain is a small domain of unknown function. It is found in the MTA1 protein that is part of the NuRD complex . The domain is usually found to the N terminus of a myb-like DNA binding domain Pfam:PF00249. ELM2 is also found associated with an ARID DNA binding domain Pfam:PF01388 in Swiss:O82364. This suggests that ELM2 may also be involved in DNA binding, or perhaps is a protein-protein interaction domain.. +PF02990 Endomembrane protein 70
Pfam-B_1312 (release 6.4). +PF01223 Endonuclease;
DNA/RNA non-specific endonuclease. +PF00812 Ephrin
Pfam-B_1390 (release 2.1). +PF05139 Erythromycin esterase
This family includes erythromycin esterase enzymes [1,2] that confer resistance to the erythromycin antibiotic.. +PF04800 ETC_CI_21;
ETC complex I subunit conserved region. Pfam-B_6275 (release 7.5). Family of pankaryotic NADH-ubiquinone oxidoreductase subunits (EC:1.6.5.3) (EC:1.6.99.3) from complex I of the electron transport chain initially identified in Neurospora crassa as a 21 kDa protein .. +PF04716 ETC_CI_29_9;
ETC complex I subunit conserved region. Pfam-B_4159 (release 7.5). Family of eukaryotic NADH-ubiquinone oxidoreductase subunits (EC:1.6.5.3) (EC:1.6.99.3) from complex I of the electron transport chain initially identified in Neurospora crassa as a 29.9 kDa protein. The conserved region is found at the N-terminus of the member proteins .. +PF04621 PEA3 subfamily ETS-domain transcription factor N terminal domain
The N terminus of the PEA3 transcription factors is implicated in transactivation and in inhibition of DNA binding . Transactivation is potentiated by activation of the Ras/MAP kinase and protein kinase A signalling cascades. The N terminal region contains conserved MAP kinase phosphorylation sites .. +PF04777 Erv1 / Alr family
Pfam-B_5005 (release 7.6). Biogenesis of Fe/S clusters involves a number of essential mitochondrial proteins. Erv1p of Saccharomyces cerevisiae mitochondria is required for the maturation of Fe/S proteins in the cytosol. The ALR (augmenter of liver regeneration) represents a mammalian orthologue of yeast Erv1p. Both Erv1p and full-length ALR are located in the mitochondrial intermembrane an d it thought to operate downstream of the mitochondrial ABC transporter . . +PF03372 Endonuclease/Exonuclease/phosphatase family
This large family of proteins includes magnesium dependent endonucleases and a large number of phosphatases involved in intracellular signalling . This family includes: AP endonuclease proteins EC:4.2.99.18 e.g Swiss:P27695, DNase I proteins EC:3.1.21.1 e.g. Swiss:P24855, Synaptojanin an inositol-1,4,5-trisphosphate phosphatase EC:3.1.3.56 Swiss:O43426, Sphingomyelinase EC:3.1.4.12 Swiss:P11889 and Nocturnin Swiss:O35710.. +PF02609 Exonuclease VII small subunit
This family consist of exonuclease VII, small subunit EC:3.1.11.6 This enzyme catalyses exonucleolytic cleavage in either 5'->3' or 3'->5' direction to yield 5'-phosphomononucleotides. This exonuclease VII enzyme is composed of one large subunit and 4 small ones .. +PF00929 Exonuclease; Exonuc_X-T;
Pfam-B_1153 (release 3.0). This family includes a variety of exonuclease proteins, such as ribonuclease T and the epsilon subunit of DNA polymerase III.; . +PF03016 Exostosin family
Pfam-B_2031 (release 6.4). The EXT family is a family of tumour suppressor genes. Mutations of EXT1 Swiss:Q16394 on 8q24.1, EXT2 Swiss:Q93063 on 11p11-13, and EXT3 on 19p have been associated with the autosomal dominant disorder known as hereditary multiple exostoses (HME). This is the most common known skeletal dysplasia. The chromosomal locations of other EXT genes suggest association with other forms of neoplasia. EXT1 and EXT2 have both been shown to encode a heparan sulphate polymerase with both D-glucuronyl (GlcA) and N-acetyl-D-glucosaminoglycan (GlcNAC) transferase activities . The nature of the defect in heparan sulphate biosynthesis in HME is unclear.. +PF03124 EXS family
Pfam-B_605 (release 6.5). We have named this region the EXS family after (ERD1, XPR1, and SYG1). This family includes C-terminus portions from the SYG1 G-protein associated signal transduction protein from Saccharomyces cerevisiae, and sequences that are thought to be murine leukaemia virus (MLV) receptors (XPR1). N-terminus portions from these proteins are aligned in the SPX Pfam:PF03105 family. The previously noted similarity between SYG1 and MLV receptors over their whole sequences is thus borne out in Pfam:PF03105 and this family. While the N-termini aligned in Pfam:PF03105 are thought to be involved in signal transduction, the role of the C-terminus sequences aligned in this family is not known. This region of similarity contains several predicted transmembrane helices. This family also includes the ERD1 (ERD: ER retention defective) yeast proteins Swiss:P16151. ERD1 proteins are involved in the localisation of endogenous endoplasmic reticulum (ER) proteins. erd1 null mutants secrete such proteins even though they possess the C-terminal HDEL ER lumen localisation label sequence. In addition, null mutants also exhibit defects in the Golgi-dependent processing of several glycoproteins, which led to the suggestion that the sorting of luminal ER proteins actually occurs in the Golgi, with subsequent return of these proteins to the ER via `salvage' vesicles .. +PF04554 Extensin-like region
Pfam-B_1707 (release 7.5). +PF00646 F-box domain
This domain is approximately 50 amino acids long, and is usually found in the N-terminal half of a variety of proteins. Two motifs that are commonly found associated with the F-box domain are the leucine rich repeats (LRRs; Pfam:PF00560 and Pfam:PF07723) and the WD repeat (Pfam:PF00400). The F-box domain has a role in mediating protein-protein interactions in a variety of contexts, such as polyubiquitination, transcription elongation, centromere binding and translational repression .. +PF00754 F5/8 type C domain
Pfam-B_478 (release 2.1). This domain is also known as the discoidin (DS) domain family .. +PF00487 Fatty acid desaturase
+PF02913 FAD linked oxidases, C-terminal domain
This domain has a ferredoxin-like fold.. +PF01565 FAD binding domain
Pfam-B_352 (release 4.0). This family consists of various enzymes that use FAD as a co-factor, most of the enzymes are similar to oxygen oxidoreductase. One of the enzymes Vanillyl-alcohol oxidase (VAO) has a solved structure, the alignment includes the FAD binding site, called the PP-loop, between residues 99-110 . The FAD molecule is covalently bound in the known structure, however the residue that links to the FAD is not in the alignment. VAO catalyses the oxidation of a wide variety of substrates, ranging form aromatic amines to 4-alkylphenols. Other members of this family include D-lactate dehydrogenase, this enzyme catalyses the conversion of D-lactate to pyruvate using FAD as a co-factor; mitomycin radical oxidase, this enzyme oxidises the reduced form of mitomycins and is involved in mitomycin resistance. This family includes MurB an UDP-N-acetylenolpyruvoylglucosamine reductase enzyme EC:1.1.1.158. This enzyme is involved in the biosynthesis of peptidoglycan .. +PF00970 Cyt_reductase;
Oxidoreductase FAD-binding domain. Pfam-B_143 (release 3.0). +PF03441 FAD binding domain of DNA photolyase
+PF03101 FAR1 DNA-binding domain
Pfam-B_2535 (release 6.4). This domain contains a WRKY like fold and is therefore most likely a zinc binding DNA-binding domain.. +PF04300 F-box associated region
Members of this family are associated with F-box domains, hence the name FBA. This domain is probably involved in binding other proteins that will be targeted for ubiquitination. Swiss:Q9UK22 is involved in binding to N-glycosylated proteins.. +PF00611 Fes/CIP4, and EFC/F-BAR homology domain
Ponting C, Schultz J, Bork P. Alignment extended from . Highly alpha-helical. The cytosolic endocytic adaptor proteins in fungi carry this domain at the N-terminus; several of these have been referred to as muniscin proteins . These N-terminal BAR, N-BAR, and EFC/F-BAR domains are found in proteins that regulate membrane trafficking events by inducing membrane tubulation. The domain dimerises into a curved structure that binds to liposomes and either senses or induces the curvature of the membrane bilayer to cause biophysical changes to the shape of the bilayer; it also thereby recruits other trafficking factors, such as the GTPase dynamin. Most EFC/F-BAR domain-family members localise to actin-rich structures .. +PF00111 fer2;
2Fe-2S iron-sulfur cluster binding domain. +PF01799 fer2_2;
[2Fe-2S] binding domain. +PF04324 fer2_BFD;
BFD-like [2Fe-2S] binding domain. The two Fe ions are each coordinated by two conserved cysteine residues. This domain occurs alone in small proteins such as Bacterioferritin-associated ferredoxin (BFD, Swiss:P13655). The function of BFD is not known, but it may may be a general redox and/or regulatory component involved in the iron storage or mobilisation functions of bacterioferritin in bacteria . This domain is also found in nitrate reductase proteins in association with Nitrite and sulphite reductase 4Fe-4S domain (Pfam:PF01077), Nitrite/Sulfite reductase ferredoxin-like half domain (Pfam:PF03460) and Pyridine nucleotide-disulphide oxidoreductase (Pfam:PF00070). It is also found in NifU nitrogen fixation proteins, in association with NifU-like N terminal domain (Pfam:PF01592) and NifU-like domain (Pfam:PF01106).. +PF00037 fer4;
4Fe-4S binding domain. Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich.. +PF00210 ferritin;
Ferritin-like domain. This family contains ferritins and other ferritin-like proteins such as members of the DPS family and bacterioferritins.. +PF01839 FG-GAP repeat
This family contains the extracellular repeat that is found in up to seven copies in alpha integrins. This repeat has been predicted to fold into a beta propeller structure . The repeat is called the FG-GAP repeat after two conserved motifs in the repeat . The FG-GAP repeats are found in the N terminus of integrin alpha chains, a region that has been shown to be important for ligand binding . A putative Ca2+ binding motif is found in some of the repeats.. +PF02181 Formin Homology 2 Domain
Alignment kindly provided by SMART. +PF02661 Fic/DOC family
This family consists of the Fic (filamentation induced by cAMP) protein and doc (death on curing). The Fic protein is involved in cell division and is suggested to be involved in the synthesis of PAB or folate, indicating that the Fic protein and cAMP are involved in a regulatory mechanism of cell division via folate metabolism . This family contains a central conserved motif HPFXXGNG in most members. The exact molecular function of these proteins is uncertain. P1 lysogens of Escherichia coli carry the prophage as a stable low copy number plasmid. The frequency with which viable cells cured of prophage are produced is about 10(-5) per cell per generation . A significant part of this remarkable stability can be attributed to a plasmid-encoded mechanism that causes death of cells that have lost P1 . In other words, the lysogenic cells appear to be addicted to the presence of the prophage. The plasmid withdrawal response depends on a gene named doc (death on curing) that is represented by this family . Doc induces a reversible growth arrest of E. coli cells by targetting the protein synthesis machinery. Doc hosts the C-terminal domain of its antitoxin partner Phd (prevents host death) through fold complementation, a domain that is intrinsically disordered in solution but that folds into an alpha-helix on binding to Doc .This domain forms complexes with Phd antitoxins containing Pfam:PF02604.. +PF00254 FKBP;
FKBP-type peptidyl-prolyl cis-trans isomerase. +PF01003 Flavivirus capsid protein C
Pfam-B_156 (release 3.0) . Flaviviruses are small enveloped viruses with virions comprised of 3 proteins called C, M and E. Multiple copies of the C protein form the nucleocapsid, which contains the ssRNA molecule.. +PF02832 Flavivirus glycoprotein, immunoglobulin-like domain
Pfam-B_146 (release 3.0). +PF00869 Flavivirus glycoprotein, central and dimerisation domains
Pfam-B_146 (release 3.0). +PF00949 Flavi_helicase;
Peptidase S7, Flavivirus NS3 serine protease . Pfam-B_199 (release 3.0). The viral genome is a positive strand RNA that encodes a single polyprotein precursor. Processing of the polyprotein precursor into mature proteins is carried out by the host signal peptidase and by NS3 serine protease, which requires NS2B (Pfam:PF01002) as a cofactor . . +PF01004 Flavivirus envelope glycoprotein M
Pfam-B_156 (release 3.0) . Flaviviruses are small enveloped viruses with virions comprised of 3 proteins called C, M and E. The envelope glycoprotein M is made as a precursor, called prM. The precursor portion of the protein is the signal peptide for the proteins entry into the membrane. prM is cleaved to form M in a late-stage cleavage event. Associated with this cleavage is a change in the infectivity and fusion activity of the virus.. +PF00948 Flavivirus non-structural Protein NS1
Pfam-B_157 (release 3.0). The NS1 protein is well conserved amongst the flaviviruses. It contains 12 cysteines, and undergoes glycosylation in a similar manner to other NS proteins. Mutational analysis has strongly implied a role for NS1 in the early stages of RNA replication.. +PF01005 Flavivirus non-structural protein NS2A
Pfam-B_156 (release 3.0). NS2A is a hydrophobic protein about 25 kDa is size. NS2A is cleaved from NS1 by a membrane bound host protease . NS2A has been found to associate with the dsRNA within the vesicle packages. It has also been found that NS2A associates with the known replicase components and so NS2A has been postulated to be part of this replicase complex . . +PF01002 Flavivirus non-structural protein NS2B
Pfam-B_156 (release 3.0). Flaviviruses encode a single polyprotein. This is cleaved into three structural and seven non-structural proteins. All, but two, are cleaved by the NS2B-NS3 protease complex.. +PF01613 Flavin reductase like domain
Pfam-B_710 (release 4.1). This is a flavin reductase family consisting of enzymes known to be flavin reductases as well as various oxidoreductase and monooxygenase components. VlmR is a flavin reductase that functions in a two-component enzyme system to provide isobutylamine N-hydroxylase with reduced flavin and may be involved in the synthesis of valanimycin . SnaC is a flavin reductase that provides reduced flavin for the oxidation of pristinamycin IIB to pristinamycin IIA as catalysed by SnaA, SnaB heterodimer . This flavin reductase region characterised by enzymes of the family is present in the C-terminus of potential FMN proteins from Synechocystis sp. suggesting it is a flavin reductase domain .. +PF00258 flavodoxin;
+PF04500 FLYWCH zinc finger domain
Mutations in the mod(mdg4) gene have effects on variegation (PEV), the properties of insulator sequences, correct path-finding of growing nerve cells, meiotic pairing of chromosomes, and apoptosis. The occurrence of FLYWCH motifs in mod(mdg4) gene product and other proteins is discussed in .. +PF03358 NADPH-dependent FMN reductase
Pfam-B_2010 (release 6.6). +PF02434 Fringe-like
Pfam-B_1900 (release 5.4). The drosophila protein fringe (FNG) is a glucosaminyltransferase that controls the response of the Notch receptor to specific ligands . FNG is localised to the Golgi apparatus (not secreted as previously thought). Modification of Notch occurs through glycosylation by FNG. The xenopus homologue, lunatic fringe, has been implicated in a variety of functions.. +PF01534 Frizzled/Smoothened family membrane region
Pfam-B_949 (release 4.0). This family contains the membrane spanning region of frizzled and smoothened receptors. This membrane region is predicted to contain seven transmembrane alpha helices. Proteins related to Drosophila frizzled (Swiss:P18537) are receptors for Wnt (mediating the beta-catenin signalling pathway) , but also the planar cell polarity (PCP) pathway and the Wnt/calcium pathway. The predominantly alpha-helical Cys-rich ligand-binding region (CRD) of Frizzled is both necessary and sufficient for Wnt binding . The smoothened receptor mediates hedgehog signalling .. +PF01827 DUF38;
Pfam-B_67 (release 4.2). This presumed domain is likely to be a protein-protein interaction module . It is found in many proteins from C. elegans. The domain is found associated with the F-box Pfam:PF00646. This domain is named FTH after FOG-2 homology domain .. +PF01913 Formylmethanofuran-tetrahydromethanopterin formyltransferase
This enzyme EC:2.3.1.101 is involved in archaebacteria in the formation of methane from carbon dioxide. N-terminal distal lobe of alpha+beta ferredoxin-like fold. SCOP reports fold duplication with C-terminal proximal lobe.. +PF02741 FTR, proximal lobe
The FTR (Formylmethanofuran--tetrahydromethanopterin formyltransferase) enzyme EC:2.3.1.101 is involved in archaebacteria in the formation of methane from carbon dioxide. C-terminal proximal lobe of alpha+beta ferredoxin-like fold. SCOP reports fold duplication with N-terminal distal lobe.. +PF01728 FtsJ-like methyltransferase
Pfam-B_1791 (release 4.1). This family consists of FtsJ from various bacterial and archaeal sources FtsJ is a methyltransferase, but actually has no effect on cell division. FtsJ's substrate is the 23S rRNA. The 1.5 A crystal structure of FtsJ in complex with its cofactor S-adenosylmethionine revealed that FtsJ has a methyltransferase fold. This family also includes the N terminus of flaviviral NS5 protein. It has been hypothesised that the N-terminal domain of NS5 is a methyltransferase involved in viral RNA capping .. +PF02687 DUF214;
FtsX-like permease family. This is a family of predicted permeases and hypothetical transmembrane proteins. Swiss:P57382 has been shown to transport lipids targeted to the outer membrane across the inner membrane. Both Swiss:P57382 and Swiss:O54500 have been shown to require ATP. This region contains three transmembrane helices.. +PF04082 Fungal specific transcription factor domain
Pfam-B_306 (release 7.3);. +PF01363 FYVE zinc finger
Pfam-B_655 (release 3.0). The FYVE zinc finger is named after four proteins that it has been found in: Fab1, YOTB/ZK632.12, Vac1, and EEA1. The FYVE finger has been shown to bind two Zn++ ions . The FYVE finger has eight potential zinc coordinating cysteine positions. Many members of this family also include two histidines in a motif R+HHC+XCG, where + represents a charged residue and X any residue. We have included members which do not conserve these histidine residues but are clearly related.. +PF01392 Fz domain
Also known as the CRD (cysteine rich domain), the C6 box in MuSK receptor. This domain of unknown function has been independently identified by several groups [1,2,3,4]. The domain contains 10 conserved cysteines.. +PF01585 G7;
Pfam-B_585 (release 4.1). This domain is found in a number of RNA binding proteins, and is also found in proteins that contain RNA binding domains. This suggests that this domain may have an RNA binding function. This domain has seven highly conserved glycines.. +PF01019 Gamma-glutamyltranspeptidase
Pfam-B_878 (release 3.0). +PF04114 Gaa1-like, GPI transamidase component
Pfam-B_12685 (release 7.3);. GPI (glycosyl phosphatidyl inositol) transamidase is a multi-protein complex. Gpi16, Gpi8 and Gaa1 for a sub-complex of the GPI transamidase. GPI transamidase that adds glycosylphosphatidylinositols (GPIs) to newly synthesised proteins. . +PF01590 GAF domain
This domain is present in cGMP-specific phosphodiesterases, adenylyl and guanylyl cyclases, phytochromes, FhlA and NifA. Adenylyl and guanylyl cyclases catalyse ATP and GTP to the second messengers cAMP and cGMP, respectively, these products up-regulating catalytic activity by binding to the regulatory GAF domain(s). The opposite hydrolysis reaction is catalysed by phosphodiesterase. cGMP-dependent 3',5'-cyclic phosphodiesterase catalyses the conversion of guanosine 3',5'-cyclic phosphate to guanosine 5'-phosphate. Here too, cGMP regulates catalytic activity by GAF-domain binding. Phytochromes are regulatory photoreceptors in plants and bacteria which exist in two thermally-stable states that are reversibly inter-convertible by light: the Pr state absorbs maximally in the red region of the spectrum, while the Pfr state absorbs maximally in the far-red region. This domain is also found in FhlA (formate hydrogen lyase transcriptional activator) and NifA, a transcriptional activator which is required for activation of most Nif operons which are directly involved in nitrogen fixation. NifA interacts with sigma-54.. +PF01140 gag_MA;
Matrix protein (MA), p15. Pfam-B_229 (release 3.0). The matrix protein, p15, is encoded by the gag gene. MA is involved in pathogenicity .. +PF01141 gag_p12;
Gag polyprotein, inner coat protein p12. Pfam-B_821 (release 3.0). The retroviral p12 is a virion structural protein. p12 is proline rich. The function carried out by p12 in assembly and replication is unknown. p12 is associated with pathogenicity of the virus .. +PF02140 Galactose binding lectin domain
+PF03127 GAT domain
The GAT domain is responsible for binding of GGA proteins to several members of the ARF family including ARF1 and ARF3. The GAT domain stabilises membrane bound ARF1 in its GTP bound state, by interfering with GAP proteins .. +PF00320 GATA zinc finger
This domain uses four cysteine residues to coordinate a zinc ion. This domain binds to DNA. Two GATA zinc fingers are found in the GATA transcription factors. However there are several proteins which only contains a single copy of the domain.. +PF00117 Glutamine amidotransferase class-I
+PF02934 PET112_N;
GatB/GatE catalytic domain. This domain is found in the GatB and GatE proteins .. +PF02637 DUF186;
This domain is found in GatB. It is about 140 amino acid residues long. This domain is found at the C terminus of GatB Swiss:O30509 which transamidates Glu-tRNA to Gln-tRNA.. +PF03615 GCM motif protein
+PF03074 Glutamate-cysteine ligase
Pfam-B_541 (release 6.4). This family represents the catalytic subunit of glutamate-cysteine ligase (E.C. 6.3.2.2), also known as gamma-glutamylcysteine synthetase (GCS).\. This enzyme catalyses the rate limiting step in the biosynthesis of glutathione. The eukaryotic enzyme is a dimer of a heavy chain and a light chain with all the catalytic activity exhibited by the heavy chain (this family).. +PF03009 Glycerophosphoryl diester phosphodiesterase family
Pfam-B_4008 (release 6.4). E. coli has two sequence related isozymes of glycerophosphoryl diester phosphodiesterase (GDPD) - periplasmic and cytosolic. This family also includes agrocinopine synthase, the similarity to GDPD has been noted . This family appears to have weak but not significant matches to mammalian phospholipase C Pfam:PF00388, which suggests that this family may adopt a TIM barrel fold.. +PF00626 Gelsolin repeat
+PF01408 Oxidoreductase family, NAD-binding Rossmann fold
Pfam-B_342 (release 3.0). This family of enzymes utilise NADP or NAD. This family is called the GFO/IDH/MOCA family in swiss-prot.. +PF02894 Oxidoreductase family, C-terminal alpha/beta domain
Pfam-B_342 (release 3.0). This family of enzymes utilise NADP or NAD. This family is called the GFO/IDH/MOCA family in swiss-prot.. +PF00990 DUF9;
Pfam-B_112 (release 3.0). This domain is found linked to a wide range of non-homologous domains in a variety of bacteria. It has been shown to be homologous to the adenylyl cyclase catalytic domain and has diguanylate cyclase activity . This observation correlates with the functional information available on two GGDEF-containing proteins, namely diguanylate cyclase and phosphodiesterase A of Acetobacter xylinum, both of which regulate the turnover of cyclic diguanosine monophosphate.. +PF03321 GH3 auxin-responsive promoter
Pfam-B_3652 (release 6.5). +PF00288 GHMP_kinases;
GHMP kinases N terminal domain. This family includes homoserine kinases, galactokinases and mevalonate kinases.. +PF00594 gla;
Vitamin K-dependent carboxylation/gamma-carboxyglutamic (GLA) domain. This domain is responsible for the high-affinity binding of calcium ions. This domain contains post-translational modifications of many glutamate residues by Vitamin K-dependent carboxylation to form gamma-carboxyglutamate (Gla).. +PF00208 E_L_F_V_dh; GLFV_dehydrog;
Glutamate/Leucine/Phenylalanine/Valine dehydrogenase. +PF00042 globin;
Structure_superposition. +PF04898 Glutamate synthase central domain
Pfam-B_455 (release 7.6). The central domain of glutamate synthase connects the amino terminal amidotransferase domain with the FMN-binding domain and has an alpha / beta overall topology . This domain appears to be a rudimentary form of the FMN-binding TIM barrel according to SCOP.. +PF04960 Glutaminase
This family of enzymes deaminates glutamine to glutamate EC:3.5.1.2.. +PF04488 Glycosyltransferase sugar-binding region containing DXD motif
The DXD motif is a short conserved motif found in many families of glycosyltransferases, which add a range of different sugars to other sugars, phosphates and proteins. DXD-containing glycosyltransferases all use nucleoside diphosphate sugars as donors and require divalent cations, usually manganese. The DXD motif is expected to play a carbohydrate binding role in sugar-nucleoside diphosphate and manganese dependent glycosyltransferases .. +PF00722 glycosyl_hydro9;
Glycosyl hydrolases family 16. Pfam-B_759 (release 2.1). +PF00704 glycosyl_hydro8;
Glycosyl hydrolases family 18. Pfam-B_574 (release 2.1). +PF02055 O-Glycosyl hydrolase family 30
+PF01055 Glycosyl_hydr15;
Glycosyl hydrolases family 31 . Pfam-B_369 (release 3.0). Glycosyl hydrolases are key enzymes of carbohydrate metabolism. Family 31 comprises of enzymes that are, or similar to, alpha- galactosidases.. +PF01532 Glycosyl hydrolase family 47
Pfam-B_958 (release 4.0). Members of this family are alpha-mannosidases that catalyse the hydrolysis of the terminal 1,2-linked alpha-D-mannose residues in the oligo-mannose oligosaccharide Man(9)(GlcNAc)(2).. +PF03200 Mannosyl oligosaccharide glucosidase
Pfam-B_2589 (release 6.5). This is a family of eukaryotic enzymes belonging to glycosyl hydrolase family 63. They catalyse the specific cleavage of the non-reducing terminal glucose residue from Glc(3)Man(9)GlcNAc(2). Mannosyl oligosaccharide glucosidase EC:3.2.1.106 is the first enzyme in the N-linked oligosaccharide processing pathway.. +PF03648 Glyco_hydro_67;
Glycosyl hydrolase family 67 N-terminus. Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the N-terminal region of alpha-glucuronidase. The N-terminal domain forms a two-layer sandwich, each layer being formed by a beta sheet of five strands. A further two helices form part of the interface with the central, catalytic, module (Pfam:PF07488) . . +PF03663 Glycosyl hydrolase family 76
Family of alpha-1,6-mannanases.. +PF03644 Glycosyl hydrolase family 85
Family of endo-beta-N-acetylglucosaminidases. These enzymes work on a broad spectrum of substrates.. +PF04101 Glycosyltransferase family 28 C-terminal domain
Pfam-B_1105 (release 6.4). The glycosyltransferase family 28 includes monogalactosyldiacylglycerol synthase (Swiss:P93115, EC 2.4.1.46) and UDP-N-acetylglucosamine transferase (Swiss:P74657, EC 2.4.1.-). Structural analysis suggests the C-terminal domain contains the UDP-GlcNAc binding site.. +PF00852 Fucosyl_transf;
Glycosyltransferase family 10 (fucosyltransferase). Pfam-B_1677 (release 2.1). This family of Fucosyltransferases are the enzymes transferring fucose from GDP-Fucose to GlcNAc in an alpha1,3 linkage . This family is know as glycosyltransferase family 10 .. +PF00982 TrehaloseP_syn;
Glycosyltransferase family 20. Pfam-B_1035 (release 3.0). Members of this family belong to glycosyl transferase family 20 . OtsA (Trehalose-6-phosphate synthase) is homologous to regions in the subunits of yeast trehalose-6-phosphate synthase/phosphate complex, .. +PF01755 LPS_glycoyl_T;
Glycosyltransferase family 25 (LPS biosynthesis protein). Pfam-B_1857 (release 4.1). Members of this family belong to Glycosyltransferase family 25 This is a family of glycosyltransferases involved in lipopolysaccharide (LPS) biosynthesis. These enzymes catalyse the transfer of various sugars onto the growing LPS chain during its biosynthesis.. +PF00777 Sialyltransf;
Glycosyltransferase family 29 (sialyltransferase). Pfam-B_1020 (release 2.1). Members of this family belong to glycosyltransferase family 29 .. +PF03360 Glyco_tranf_43;
Glycosyltransferase family 43. Pfam-B_1447 (release 6.6). +PF01501 Glycosyl transferase family 8
Pfam-B_730 (release 4.0) & Pfam-B_5903 (Release 7.5). This family includes enzymes that transfer sugar residues to donor molecules. Members of this family are involved in lipopolysaccharide biosynthesis and glycogen synthesis. This family includes Lipopolysaccharide galactosyltransferase Swiss:P27128, lipopolysaccharide glucosyltransferase 1 Swiss:P27129, and glycogenin glucosyltransferase Swiss:P46976.. +PF00535 glycosyl_transf_2;
Glycosyl transferase family 2. MRC-LMB Genome group. Diverse family, transferring sugar from UDP-glucose, UDP-N-acetyl- galactosamine, GDP-mannose or CDP-abequose, to a range of substrates including cellulose, dolichol phosphate and teichoic acids.. +PF00953 Glycosyl transferase family 4
Pfam-B_534 (release 3.0). +PF00903 Glyoxalase/Bleomycin resistance protein/Dioxygenase superfamily
Pfam-B_1207 (release 3.0) & Pfam-B_5495 (Release 8.0). +PF04464 glyphos_transf;
CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase . Wall-associated teichoic acids are a heterogeneous class of phosphate-rich polymers that are covalently linked to the cell wall peptidoglycan of gram-positive bacteria. They consist of a main chain of phosphodiester-linked polyols and/or sugar moieties attached to peptidoglycan via a linkage unit. CDP-glycerol:poly(glycerophosphate) glycerophosphotransferase is responsible for the polymerisation of the main chain of the teichoic acid by sequential transfer of glycerol-phosphate units from CDP-glycerol to the linkage unit lipid .. +PF00958 GMP synthase C terminal domain
Pfam-B_1137 (release 3.0). GMP synthetase is a glutamine amidotransferase from the de novo purine biosynthetic pathway. This family is the C-terminal domain specific to the GMP synthases Swiss:P49915 EC:6.3.5.2. In prokaryotes this domain mediates dimerisation. Eukaryotic GMP synthases are monomers. This domain in eukaryotes includes several large insertions that may form globular domains.. +PF01825 Latrophilin/CL-1-like GPS domain
Domain present in latrophilin/CL-1, sea urchin REJ and polycystin.. +PF02893 GRAM domain
Alignment kindly provided by SMART. The GRAM domain is found in in glucosyltransferases, myotubularins and other putative membrane-associated proteins.. +PF03514 GRAS domain family
Pfam-B_393 (release 7.0). Proteins in the GRAS (GAI, RGA, SCR) family are known as major players in gibberellin (GA) signaling, which regulates various aspects of plant growth and development . Mutation of the SCARECROW (SCR) gene results in a radial pattern defect, loss of a ground tissue layer, in the root. The PAT1 protein is involved in phytochrome A signal transduction . A sequence, structure and evolutionary analysis showed that the GRAS family emerged in bacteria and belongs to the Rossmann-fold, AdoMET (SAM)-dependent methyltransferase superfamily . All bacterial, and a subset of plant GRAS proteins, are predicted to be active and function as small-molecule methylases. Several plant GRAS proteins lack one or more AdoMet (SAM)-binding residues while preserving their substrate-binding residues. Although GRAS proteins are implicated to function as transcriptional factors, the above analysis suggests that they instead might either modify or bind small molecules .. +PF01465 GRIP domain
The GRIP (golgin-97, RanBP2alpha,Imh1p and p230/golgin-245) domain is found in many large coiled-coil proteins. It has been shown to be sufficient for targeting to the Golgi. The GRIP domain contains a completely conserved tyrosine residue. At least some of these domains have been shown to bind to GTPase Arl1, see structures in [4,5].. +PF02946 GTF2I-like repeat
Pfam-B_101 (release 6.4). This region of sequence similarity is found up to six times in a variety of proteins including GTF2I. It has been suggested that this may be a DNA binding domain [1,2].. +PF01018 GTP1/OBG
Pfam-B_875 (release 3.0). The N-terminal domain of Swiss:P20964 has the OBG fold, which is formed by three glycine-rich regions inserted into a small 8-stranded beta-sandwich these regions form six left-handed collagen-like helices packed and H-bonded together.. +PF00009 Elongation factor Tu GTP binding domain
This domain contains a P-loop motif, also found in several other families such as Pfam:PF00071, Pfam:PF00025 and Pfam:PF00063. Elongation factor Tu consists of three structural domains, this plus two C-terminal beta barrel domains.. +PF03144 Elongation factor Tu domain 2
Elongation factor Tu consists of three structural domains, this is the second domain. This domain adopts a beta barrel structure. This the second domain is involved in binding to charged tRNA . This domain is also found in other proteins such as elongation factor G and translation initiation factor IF-2. This domain is structurally related to Pfam:PF03143, and in fact has weak sequence matches to this domain.. +PF03143 Elongation factor Tu C-terminal domain
Elongation factor Tu consists of three structural domains, this is the third domain. This domain adopts a beta barrel structure. This the third domain is involved in binding to both charged tRNA and binding to EF-Ts Pfam:PF00889 .. +PF01493 DUF14;
Pfam-B_428 (release 4.0). This domain is found in glutamate synthase, tungsten formylmethanofuran dehydrogenase subunit c (FwdC) and molybdenum formylmethanofuran dehydrogenase subunit c (FmdC). A repeated G-XX-G-XXX-G motif is seen in the alignment.. +PF02756 GYR motif
The GYR motif is found in several drosophila proteins. Its function is unknown, however the presence of completely conserved tyrosine residues may suggest it could be a substrate for tyrosine kinases.. +PF03457 Helicase associated domain
This short domain is found in multiple copies in bacterial helicase proteins. The domain is predicted to contain 3 alpha helices. The function of this domain may be to bind nucleic acid.. +PF04408 Helicase associated domain (HA2)
This presumed domain is about 90 amino acid residues in length. It is found is a diverse set of RNA helicases. Its function is unknown, however it seems likely to be involved in nucleic acid binding.. +PF04774 Hyaluronan / mRNA binding family
Pfam-B_2044 (release 7.6). This family includes the HABP4 family of hyaluronan-binding proteins, and the PAI-1 mRNA-binding protein, PAI-RBP1. HABP4 has been observed to bind hyaluronan (a glucosaminoglycan), but it is not known whether this is its primary role in vivo. It has also been observed to bind RNA, but with a lower affinity than that for hyaluronan . PAI-1 mRNA-binding protein specifically binds the mRNA of type-1 plasminogen activator inhibitor (PAI-1), and is thought to be involved in regulation of mRNA stability . However, in both cases, the sequence motifs predicted to be important for ligand binding are not conserved throughout the family, so it is not known whether members of this family share a common function.. +PF02183 Homeobox associated leucine zipper
Alignment kindly provided by SMART & Pfam-B_1492 (Release 7.5). +PF00672 DUF5;
Pfam-B_113 (release 2.1). +PF04849 HAP1 N-terminal conserved region
Pfam-B_4571 (release 7.6). This family represents an N-terminal conserved region found in several huntingtin-associated protein 1 (HAP1) homologues. HAP1 binds to huntingtin in a polyglutamine repeat-length-dependent manner. However, its possible role in the pathogenesis of Huntington's disease is unclear [1-3]. This family also includes a similar N-terminal conserved region from hypothetical protein products of ALS2CR3 genes found in the human juvenile amyotrophic lateral sclerosis critical region 2q33-2q34 .. +PF02184 HAT (Half-A-TPR) repeat
Alignment kindly provided by SMART. The HAT (Half A TPR) repeat is found in several RNA processing proteins .. +PF02518 Histidine kinase-, DNA gyrase B-, and HSP90-like ATPase
SMART, Griffiths-Jones SR. This family represents the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90.. +PF00955 Anion_Exchanger;
HCO3- transporter family. Pfam-B_1004 (release 3.0). This family contains Band 3 anion exchange proteins that exchange CL-/HCO3- such as Swiss:P48751. This family also includes cotransporters of Na+/HCO3- such as Swiss:O15153.. +PF01966 HD domain
HD domains are metal dependent phosphohydrolases.. +PF00271 helicase_C;
Helicase conserved C-terminal domain. The Prosite family is restricted to DEAD/H helicases, whereas this domain family is found in a wide variety of helicases and helicase related proteins. It may be that this is not an autonomously folding unit, but an integral part of the helicase.. +PF02602 Uroporphyrinogen-III synthase HemD
This family consists of uroporphyrinogen-III synthase HemD EC:4.2.1.75 also known as Hydroxymethylbilane hydrolyase (cyclizing) from eukaryotes, bacteria and archaea. This enzyme catalyses the reaction: Hydroxymethylbilane <=> uroporphyrinogen-III + H(2)O. Some members of this family are multi-functional proteins possessing other enzyme activities related to porphyrin biosynthesis, such as Swiss:Q59294 with Pfam:PF00590, however the aligned region corresponds with the uroporphyrinogen-III synthase EC:4.2.1.75 activity only. Uroporphyrinogen-III synthase is the fourth enzyme in the heme pathway . Mutant forms of the Uroporphyrinogen-III synthase gene cause congenital erythropoietic porphyria in humans a recessive inborn error of metabolism also known as Gunther disease .. +PF00173 heme_1;
Cytochrome b5-like Heme/Steroid binding domain. This family includes heme binding domains from a diverse range of proteins. This family also includes proteins that bind to steroids. The family includes progesterone receptors such as Swiss:O00264 [1,2]. Many members of this subfamily are membrane anchored by an N-terminal transmembrane alpha helix. This family also includes a domain in some chitin synthases. There is no known ligand for this domain in the chitin synthases.. +PF01814 Hemerythrin HHE cation binding domain
Iteration of the HHE family ( ) found it to be related to Hemerythrin. It also demonstrated that what has been described as a single domain ( ) in fact consists of two cation binding domains. Members of this family occur all across nature and are involved in a variety of processes. For instance, in Nereis diversicolor Swiss:P80255 binds Cadmium so as to protect the organism from toxicity ( ). However Hemerythrin is classically described as Oxygen-binding through two attached Fe2+ ions. And the bacterial Swiss:Q7WX96 is a regulator of response to NO, which suggests yet another set-up for its metal ligands ( ). In Staphylococcus aureus P72360 has been noted to be important when the organism switches to living in environments with low oxygen concentrations ( ); perhaps this protein acts as an oxygen store or scavenger.. +PF00353 hemolysinCabind;
Hemolysin-type calcium-binding repeat (2 copies). +PF00132 hexapep;
Bacterial transferase hexapeptide (six repeats). +PF03129 Anticodon binding domain
This domain is found in histidyl, glycyl, threonyl and prolyl tRNA synthetases it is probably the anticodon binding domain .. +PF03578 HGWP repeat
Pfam-B_220 (release 7.0). This short (30 amino acids) repeat is found in a number of plant proteins. It contains a conserved HGWP motif, hence its name. The function of these proteins is unknown.. +PF00633 Helix-hairpin-helix motif
The helix-hairpin-helix DNA-binding motif is found to be duplicated in the central domain of RuvA . The HhH domain of DisA, a bacterial checkpoint control protein, is a DNA-binding domain .. +PF01079 Hint module
Pfam-B_766 (release 3.0). This is an alignment of the Hint module in the Hedgehog proteins. It does not include any Inteins which also possess the Hint module. . +PF01634 ATP phosphoribosyltransferase
Pfam-B_1142 (release 4.1). +PF00512 signal;
His Kinase A (phospho-acceptor) domain. Dimerisation and phospho-acceptor domain of histidine kinases.. +PF00850 Histone deacetylase domain
Pfam-B_343 (release 3.0). Histones can be reversibly acetylated on several lysine residues. Regulation of transcription is caused in part by this mechanism. Histone deacetylases catalyse the removal of the acetyl group. Histone deacetylases are related to other proteins .. +PF00010 Helix-loop-helix DNA-binding domain
+PF00403 Heavy-metal-associated domain
+PF00368 HMG-CoA_red1;
Hydroxymethylglutaryl-coenzyme A reductase. The HMG-CoA reductases catalyse the conversion of HMG-CoA to mevalonate, which is the rate-limiting step in the synthesis of isoprenoids like cholesterol. Probably because of the critical role of this enzyme in cholesterol homeostasis, mammalian HMG-CoA reductase is heavily regulated at the transcriptional, translational, and post-translational levels .. +PF02301 MAD2;
The HORMA (for Hop1p, Rev7p and MAD2) domain has been suggested to recognise chromatin states that result from DNA adducts, double stranded breaks or non-attachment to the spindle and acts as an adaptor that recruits other proteins. MAD2 is a spindle checkpoint protein which prevents progression of the cell cycle upon detection of a defect in mitotic spindle integrity.. +PF00104 hormone_rec;
Ligand-binding domain of nuclear hormone receptor. This all helical domain is involved in binding the hormone in these receptors.. +PF03241 4-hydroxyphenylacetate 3-hydroxylase C terminal
Pfam-B_3148 (release 6.5). HpaB Swiss:Q57160 encodes part of the 4-hydroxyphenylacetate 3-hydroxylase from Escherichia coli . HpaB is part of a heterodimeric enzyme that also requires HpaC. The enzyme is NADH-dependent and uses FAD as the redox chromophore. This family also includes PvcC Swiss:O30372 may play a role in one of the proposed hydroxylation steps of pyoverdine chromophore biosynthesis . . +PF02185 Hr1 repeat
Alignment kindly provided by SMART. +PF00570 HRDC domain
The HRDC (Helicase and RNase D C-terminal) domain has a putative role in nucleic acid binding. Mutations in the HRDC domain cause human disease. It is interesting to note that the RecQ helicase in Deinococcus radiodurans has three tandem HRDC domains .. +PF03878 Hrf1;
YIF1 (Yip1 interacting factor) is an integral membrane protein that is required for membrane fusion of ER derived vesicles . It also plays a role in the biogenesis of ER derived COPII transport vesicles .. +PF02793 Hormone receptor domain
This extracellular domain contains four conserved cysteines that probably for disulphide bridges. The domain is found in a variety of hormone receptors. It may be a ligand binding domain. . +PF01381 Helix-turn-helix
This large family of DNA binding helix-turn helix proteins includes Cro Swiss:P03036 and CI Swiss:P03034. Within the protein Swiss:Q5F9C2, the full protein fold incorporates a helix-turn-helix motif, but the function of this member is unlikely to be that of a DNA-binding regulator, the function of most other members, so is not necessarily characteristic of the whole family .. +PF01402 Ribbon-helix-helix protein, copG family
The structure of this protein repressor, which is the shortest reported to date and the first isolated from a plasmid, has a homodimeric ribbon-helix-helix arrangement . The helix-turn-helix-like structure is involved in dimerisation and not DNA binding as might have been expected .. +PF02954 Bacterial regulatory protein, Fis family
+PF04005 Hus1-like protein
Pfam-B_12502 (release 7.3). Hus1, Rad1, and Rad9 are three evolutionarily conserved proteins required for checkpoint control in fission yeast. These proteins are known to form a stable complex in vivo . Hus1-Rad1-Rad9 complex may form a PCNA-like ring structure, and could function as a sliding clamp during checkpoint control. . +PF03810 IBN_NT;
Importin-beta N-terminal domain. +PF01485 IBR domain
The IBR (In Between Ring fingers) domain is often found to occur between pairs of ring fingers (Pfam:PF00097). This domain has also been called the C6HC domain and DRIL (for double RING finger linked) domain . Proteins that contain two Ring fingers and an IBR domain (these proteins are also termed RBR family proteins) are thought to exist in all eukaryotic organisms. RBR family members play roles in protein quality control and can indirectly regulate transcription . Evidence suggests that RBR proteins are often parts of cullin-containing ubiquitin ligase complexes. The ubiquitin ligase Parkin is an RBR family protein whose mutations are involved in forms of familial Parkinson's disease .. +PF01614 Bacterial transcriptional regulator
Pfam-B_755 (release 4.1). +PF04760 Translation initiation factor IF-2, N-terminal region
This conserved feature at the N-terminus of bacterial translation initiation factor IF2 has recently had its structure solved. It shows structural similarity to the tRNA anticodon Stem Contact Fold domains of the methionyl-tRNA and glutaminyl-tRNA synthetases, and a similar fold is also found in the B5 domain of the phenylalanine-tRNA synthetase.. +PF05198 Translation initiation factor IF-3, N-terminal domain
Pfam-B_629 (release 2.1). +PF05004 Interferon-related developmental regulator (IFRD)
Pfam-B_4730 (release 7.6). Interferon-related developmental regulator (IFRD1) is the human homologue of the rat early response protein PC4 and its murine homologue TIS7 . The exact function of IFRD1 is unknown but it has been shown that PC4 is necessary to muscle differentiation and that it might have a role in signal transduction. This family also contains IFRD2 and its murine equivalent SKMc15 which are highly expressed soon after gastrulation and in the hepatic primordium, suggesting an involvement in early hematopoiesis .. +PF00817 impB/mucB/samB family
Pfam-B_1349 (release 2.1). These proteins are involved in UV protection (Swiss).. +PF04836 Interferon-related protein conserved region
Pfam-B_4453 (release 7.6). Family of proteins thought to be involved in regulating gene activity in the proliferative and/or differentiative pathways induced by NGF .. +PF04762 IKI3 family
Members of this family are components of the elongator multi-subunit component of a novel RNA polymerase II holoenzyme for transcriptional elongation . This region contains WD40 like repeats.. +PF00478 IMP dehydrogenase / GMP reductase domain
This family is involved in biosynthesis of guanosine nucleotide. Members of this family contain a TIM barrel structure. In the inosine monophosphate dehydrogenases 2 CBS domains Pfam:PF00571 are inserted in the TIM barrel . This family is a member of the common phosphate binding site TIM barrel family.. +PF03941 Inner centromere protein, ARK binding region
Wood V, Griffiths-Jones SR. Pfam-B_67765 (release 7.2). This region of the inner centromere protein has been found to be necessary and sufficient for binding to aurora-related kinase. This interaction has been implicated in the coordination of chromosome segregation with cell division in yeast.. +PF04179 Initiator tRNA phosphoribosyl transferase
Pfam-B_16986 (release 7.3);. This enzyme (EC:2.4.2.-) modifies exclusively the initiator tRNA in position 64 using 5'-phosphoribosyl-1'-pyrophosphate as the modification donor. As the initiator tRNA participates both in the initiation and elongation of translation, the 2'-O-ribosyl phosphate modification discriminates the initiator tRNAs from the elongator tRNAs .. +PF02022 Integrase Zinc binding domain
Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. This domain is the amino-terminal domain zinc binding domain. The central domain is the catalytic domain Pfam:PF00665. The carboxyl terminal domain is a DNA binding domain Pfam:PF00552.. +PF00520 ion_trans;
Ion transport protein. Pfam-B_33 (release 1.0). This family contains Sodium, Potassium, Calcium ion channels. This family is 6 transmembrane helices in which the last two helices flank a loop which determines ion selectivity. In some sub-families (e.g. Na channels) the domain is repeated four times, whereas in others (e.g. K channels) the protein forms as a tetramer in the membrane. A bacterial structure of the protein is known for the last two helices but is not the Pfam family due to it lacking the first four helices. +PF03770 Inositol polyphosphate kinase
Pfam-B_1382 (release 7.0). ArgRIII has has been demonstrated to be an inositol polyphosphate kinase . . +PF00612 IQ calmodulin-binding motif
Ponting C, Schultz J, Bork P. Calmodulin-binding motif.. +PF01007 Inward rectifier potassium channel
Pfam-B_18 (release 3.0). +PF02174 PTB domain (IRS-1 type)
+PF02922 isoamylase_N; Isoamylase_N;
Carbohydrate-binding module 48 (Isoamylase N-terminal domain). This domain is found in a range of enzymes that act on branched substrates - isoamylase, pullulanase and branching enzyme. This family also contains the beta subunit of 5' AMP activated kinase.. +PF00857 Isochorismatase family
Pfam-B_566 (release 3.0). This family are hydrolase enzymes.. +PF02373 jmjC;
JmjC domain, hydroxylase. The JmjC domain belongs to the Cupin superfamily . JmjC-domain proteins may be protein hydroxylases that catalyse a novel histone modification . This is confirmed to be a hydroxylase: the human JmjC protein named Tyw5p unexpectedly acts in the biosynthesis of a hypermodified nucleoside, hydroxy-wybutosine, in tRNA-Phe by catalysing hydroxylation .. +PF02099 Josephin
+PF02214 K_tetra;
Pfam-B_27 (Release 5.2). In voltage-gated K+ channels this domain is responsible for subfamily-specific assembly of alpha-subunits into functional tetrameric channels . In KCTD1 (Swiss:Q719H9) this domain functions as a transcriptional repressor . It also mediates homomultimerisation of KCTD1 and interaction of KCTD1 with the transcription factor AP-2-alpha [2-3].. +PF02705 K+_trans;
K+ potassium transporter. Pfam-B_677 (release 5.5). This is a family of K+ potassium transporters that are conserved across phyla, having both bacterial (KUP) Swiss:P30016 , yeast (HAK) Swiss:P50505 , and plant (AtKT) Swiss:O22397 sequences as members.. +PF01920 KE2;
This family includes prefoldin subunits that are not detected by Pfam:PF02996.. +PF01344 Kelch motif
The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase for which a structure has been solved . The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415.. +PF00013 KH-domain; KH;
KH motifs bind RNA in vitro. Autoantibodies to Nova, a KH domain protein, cause paraneoplastic opsoclonus ataxia.. +PF00109 Beta-ketoacyl synthase, N-terminal domain
The structure of beta-ketoacyl synthase is similar to that of the thiolase family (Pfam:PF00108) and also chalcone synthase. The active site of beta-ketoacyl synthase is located between the N and C-terminal domains. The N-terminal domain contains most of the structures involved in dimer formation and also the active site cysteine .. +PF02801 ketoacyl-synt_C;
Beta-ketoacyl synthase, C-terminal domain. The structure of beta-ketoacyl synthase is similar to that of the thiolase family (Pfam:PF00108) and also chalcone synthase. The active site of beta-ketoacyl synthase is located between the N and C-terminal domains.. +PF00225 kinesin;
Kinesin motor domain. +PF00467 L24;Ribosomal_L24;
This family has been extended to coincide with ref . The KOW (Kyprides, Ouzounis, Woese) motif is found in a variety of ribosomal proteins and NusG.. +PF01352 KRAB box
The KRAB domain (or Kruppel-associated box) is present in about a third of zinc finger proteins containing C2H2 fingers. The KRAB domain is found to be involved in protein-protein interactions [2,3]. The KRAB domain is generally encoded by two exons. The regions coded by the two exons are known as KRAB-A and KRAB-B. The A box plays an important role in repression by binding to corepressors, while the B box is thought to enhance this repression brought about by the A box. KRAB-containing proteins are thought to have critical functions in cell proliferation and differentiation, apoptosis and neoplastic transformation .. +PF05178 Krr1;
Pfam-B_8372 (release 7.7). The yeast member of this family (Kri1p) is found to be required for 40S ribosome biogenesis in the nucleolus .. +PF02735 ku;
Ku70/Ku80 beta-barrel domain. The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the central DNA-binding beta-barrel domain. This domain is found in both the Ku70 Swiss:P12956 and Ku80 Swiss:P13010 proteins that form a DNA binding heterodimer .. +PF03730 Ku70/Ku80 C-terminal arm
The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the C terminal arm. This alpha helical region embraces the beta-barrel domain Pfam:PF02735 of the opposite subunit .. +PF03731 Ku70/Ku80 N-terminal alpha/beta domain
The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the amino terminal alpha/beta domain. This domain only makes a small contribution to the dimer interface. The domain comprises a six stranded beta sheet of the Rossman fold .. +PF00014 Kunitz/Bovine pancreatic trypsin inhibitor domain
Indicative of a protease inhibitor, usually a serine protease inhibitor. Structure is a disulfide rich alpha+beta fold. BPTI (bovine pancreatic trypsin inhibitor) is an extensively studied model structure. Certain family members are similar to the tick anticoagulant peptide (TAP, Swiss:P17726). This is a highly selective inhibitor of factor Xa in the blood coagulation pathways . TAP molecules are highly dipolar , and are arranged to form a twisted two- stranded antiparallel beta-sheet followed by an alpha helix .. +PF03521 Kv21channel;
Kv2 voltage-gated K+ channel. +PF02828 L27 domain
Alignment kindly provided by SMART. The L27 domain is found in receptor targeting proteins Lin-2 and Lin-7.. +PF02448 L71 family
Pfam-B_1976 (release 5.4). This family of insect proteins are each about 100 amino acids long and have 6 conserved cysteine residues. They all have a predicted signal peptide and are probably excreted. The function of the proteins is unknown .. +PF00753 lactamase_B;
Metallo-beta-lactamase superfamily. +PF02652 L-lactate permease
L-lactate permease is an integral membrane protein probably involved in L-lactate transport .. +PF03798 LAG1;
Pfam-B_1398 (release 7.0). +PF03161 LAGLIDADG DNA endonuclease family
Pfam-B_3225 (release 6.5). This is a family of site-specific DNA endonucleases encoded by DNA mobile elements. Similar to Pfam:PF00961, the members of this family are also LAGLIDADG endonucleases.. +PF04916 Laminin_A;
Pfam-B_5721 (release 7.6). Phospholipase B (PLB) catalyses the hydrolytic cleavage of both acylester bonds of glycerophospholipids. This family of PLB enzymes has been identified in mammals, flies and nematodes but not in yeast . In Drosophila this protein was named LAMA for laminin ancestor since it is expressed in the neuronal and glial precursors that surround the lamina .. +PF04031 Las1-like
Pfam-B_10636 (release 7.3);. Las1 is an essential nuclear protein involved in cell morphogenesis and cell surface growth .. +PF00057 ldl_recept_a;
Low-density lipoprotein receptor domain class A. Swissprot_feature_table. +PF02987 LEA;
Late embryogenesis abundant protein. Pfam-B_106 (release 6.4). Different types of LEA proteins are expressed at different stages of late embryogenesis in higher plant seed embryos and under conditions of dehydration stress. The function of these proteins is unknown.. +PF04004 Leo1-like protein
Pfam-B_11226 (release 7.3). Members of this family are part of the Paf1/RNA polymerase II complex [1,2]. The Paf1 complex probably functions during the elongation phase of transcription . The Leo1 subunit of the yeast Paf1-complex binds RNA and contributes to complex recruitment. The subunit acts by co-ordinating co-transcriptional chromain modifications and helping recruitment of mRNA 3prime-end processing factors .. +PF00060 lig_chan;
Ligand-gated ion channel. This family includes the four transmembrane regions of the ionotropic glutamate receptors and NMDA receptors.. +PF02900 Catalytic LigB subunit of aromatic ring-opening dioxygenase
+PF03893 Lipase 3 N-terminal region
N terminal region to Pfam:PF01764, found on a subset of Lipase 3 containing proteins. . +PF01764 Lipase (class 3)
Pfam-B_893 (release 4.2). +PF04571 lipin_N;
lipin, N-terminal conserved region. Pfam-B_4929 (release 7.5). Mutations in the lipin gene lead to fatty liver dystrophy in mice. The protein has been shown to be phosphorylated by the TOR Ser/Thr protein kinases in response to insulin stimulation. The conserved region is found at the N-terminus of the member proteins [1,2].. +PF03180 NLPA lipoprotein
Pfam-B_1418 (release 6.5). This family of bacterial lipoproteins contains several antigenic members, that may be involved in bacterial virulence. Their precise function is unknown. However they are probably distantly related to Pfam:PF00497 which are solute binding proteins.. +PF02190 ATP-dependent protease La (LON) domain
Alignment kindly provided by SMART. This domain has been shown to be part of the PUA superfamily .. +PF00560 LRR;
CAUTION: This Pfam may not find all Leucine Rich Repeats in a protein. Leucine Rich Repeats are short sequence motifs present in a number of proteins with diverse functions and cellular locations. These repeats are usually involved in protein-protein interactions. Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains.. +PF01463 Leucine rich repeat C-terminal domain
Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the C-terminus of tandem leucine rich repeats.. +PF04180 Low temperature viability protein
Pfam-B_15065 (release 7.3);. The low-temperature viability protein LTV1 is involved in ribosome biogenesis 40S subunit production .. +PF02123 Viral RNA-directed RNA-polymerase
IPR001795 & Pfam-B_6212 (release 8.0) & Pfam-B_9867 (release 8.0). This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus.. +PF01476 PG_binding_2;
The LysM (lysin motif) domain is about 40 residues long. It is found in a variety of enzymes involved in bacterial cell wall degradation . This domain may have a general peptidoglycan binding function. The structure of this domain is known .. +PF03466 LysR substrate binding domain
The structure of this domain is known and is similar to the periplasmic binding proteins .. +PF03816 Cell envelope-related transcriptional attenuator domain
TIGRFAMs, Griffiths-Jones SR. +PF02847 MA3 domain
Alignment kindly provided by SMART. Domain in DAP-5, eIF4G, MA-3 and other proteins. Highly alpha-helical. May contain repeats and/or regions similar to MIF4G domains .. +PF01454 MAGE family
Prodom_3141 (release 99.1). The MAGE (melanoma antigen-encoding gene) family are expressed in a wide variety of tumours but not in normal cells, with the exception of the male germ cells, placenta, and, possibly, cells of the developing embryo. The cellular function of this family is unknown. This family also contains the yeast protein, Nse3. The Nse3 protein is part of the Smc5-6 complex [2-3]. Nse3 has been demonstrated to be important for meiosis .. +PF00390 Malic enzyme, N-terminal domain
+PF03949 malic_N;
Malic enzyme, NAD binding domain. +PF00629 MAM domain
An extracellular domain found in many receptors.. +PF03999 Microtubule associated protein (MAP65/ASE1 family)
Pfam-B_12512 (release 7.3). +PF00917 MATH domain
Pfam-B_1602 (release 3.0). This motif has been called the Meprin And TRAF-Homology (MATH) domain. This domain is hugely expanded in the nematode C. elegans .. +PF01429 Methyl-CpG binding domain
The Methyl-CpG binding domain (MBD) binds to DNA that contains one or more symmetrically methylated CpGs . DNA methylation in animals is associated with alterations in chromatin structure and silencing of gene expression. MBD has negligible non-specific affinity for DNA. In vitro foot-printing with MeCP2 showed the MBD can protect a 12 nucleotide region surrounding a methyl CpG pair . MBDs are found in several Methyl-CpG binding proteins and also DNA demethylase .. +PF03062 MBOAT, membrane-bound O-acyltransferase family
Pfam-B_2359 (release 6.4). The MBOAT (membrane bound O-acyl transferase) family of membrane proteins contains a variety of acyltransferase enzymes. A conserved histidine has been suggested to be the active site residue .. +PF02820 mbt;
Pfam-B_526 (Release 6.2). The function of this repeat is unknown, but is found in a number of nuclear proteins such as drosophila sex comb on midleg protein Swiss:Q9VHA0. The repeat is found in up to four copies as in Swiss:Q9UHJ3. The repeat contains a completely conserved glutamate at its amino terminus that may be important for function.. +PF02470 mce;
Pfam-B_475 (release 5.4). This family of proteins contains the mce (mammalian cell entry) proteins from Mycobacterium tuberculosis. The archetype (Rv0169), was isolated as being necessary for colonisation of, and survival within, the macrophage . This family contains proteins of unknown function from other bacteria.. +PF05053 Menin
Pfam-B_5848 (release 7.7). MEN1, the gene responsible for multiple endocrine neoplasia type 1, is a tumour suppressor gene that encodes a protein called Menin which may be an atypical GTPase stimulated by nm23 .. +PF00149 STphosphatase;
Calcineurin-like phosphoesterase. This family includes a diverse range of phosphoesterases , including protein phosphoserine phosphatases, nucleotidases, sphingomyelin phosphodiesterases and 2'-3' cAMP phosphodiesterases as well as nucleases such as bacterial SbcD Swiss:P13457 or yeast MRE11 Swiss:P32829. The most conserved regions in this superfamily centre around the metal chelating residues.. +PF01420 Type I restriction modification DNA specificity domain
This domain is also known as the target recognition domain (TRD). Restriction-modification (R-M) systems protect a bacterial cell against invasion of foreign DNA by endonucleolytic cleavage of DNA that lacks a site specific modification. The host genome is protected from cleavage by methylation of specific nucleotides in the target sites. In type I systems, both restriction and modification activities are present in one heteromeric enzyme complex composed of one DNA specificity subunit (this family), two modification (M) subunits and two restriction (R) subunits .. +PF01795 DUF36; UPF0117;
MraW methylase family. Pfam-B_1376 (release 4.2). Members of this family are probably SAM dependent methyltransferases based on Swiss:P18595 . This family appears to be related to Pfam:PF01596.. +PF05060 N-acetylglucosaminyltransferase II (MGAT2)
Pfam-B_6001 (release 7.7). UDP-N-acetyl-D-glucosamine:alpha-6-D-mannoside beta-1,2-N- acetylglucosaminyltransferase II (EC 2.4.1.143) (GnT II/MGAT2) is a Golgi resident enzyme that catalyses an essential step in the biosynthetic pathway leading from high mannose to complex N-linked oligosaccharides . Mutations in the MGAT2 gene lead to congenital disorder of glycosylation (CDG IIa). CDG IIa patients have an increased bleeding tendency, unrelated to coagulation factors .. +PF02142 Methylglyoxal_synth;
Pfam-B_220 (Release 4.4). This domain composes the whole protein of methylglyoxal synthetase and the domain is also found in Carbamoyl phosphate synthetase (CPS) where it forms a regulatory domain that binds to the allosteric effector ornithine. This family also includes inosicase. The known structures in this family show a common phosphate binding site .. +PF01769 Divalent cation transporter
+PF00993 Class II histocompatibility antigen, alpha domain
Pfam-B_1288 (release 3.0). +PF00969 Class II histocompatibility antigen, beta domain
Pfam-B_331 (release 3.0). +PF02816 MHCK_EF2_kinase;
This family is a novel family of eukaryotic protein kinase catalytic domains, which have no detectable similarity to conventional kinases. The family contains myosin heavy chain kinases [1,2] and Elongation Factor-2 kinase and a bifunctional ion channel . This family is known as the alpha-kinase family . The structure of the kinase domain revealed unexpected similarity to eukaryotic protein kinases in the catalytic core as well as to metabolic enzymes with ATP-grasp domains.. +PF02854 MIF4G domain
Alignment kindly provided by SMART. MIF4G is named after Middle domain of eukaryotic initiation factor 4G (eIF4G). Also occurs in NMD2p and CBP80. The domain is rich in alpha-helices and may contain multiple alpha-helical repeats. In eIF4G, this domain binds eIF4A, eIF3, RNA and DNA .. +PF02815 MIR domain
Ponting CP (EMBL archive). The MIR (protein mannosyltransferase, IP3R and RyR) domain is a domain that may have a ligand transferase function .. +PF04212 MIT (microtubule interacting and transport) domain
The MIT domain forms an asymmetric three-helix bundle and binds ESCRT-III (endosomal sorting complexes required for transport) substrates .. +PF00153 mito_carr;
Mitochondrial carrier protein. +PF03637 Mob1/phocein family
Pfam-B_1830 (release 7.0). Mob1 is an essential Saccharomyces cerevisiae protein, identified from a two-hybrid screen, that binds Mps1p, a protein kinase essential for spindle pole body duplication and mitotic checkpoint regulation. Mob1 contains no known structural motifs; however MOB1 is a member of a conserved gene family and shares sequence similarity with a nonessential yeast gene, MOB2. Mob1 is a phosphoprotein in vivo and a substrate for the Mps1p kinase in vitro. Conditional alleles of MOB1 cause a late nuclear division arrest at restrictive temperature . This family also includes phocein Swiss:Q9QYW3, a rat protein that by yeast two hybrid interacts with striatin .. +PF00994 Probable molybdopterin binding domain
Pfam-B_1258 (release 3.0). This domain is found a variety of proteins involved in biosynthesis of molybdopterin cofactor. The domain is presumed to bind molybdopterin. The structure of this domain is known, and it forms an alpha/beta structure. In the known structure of Gephyrin this domain mediates trimerisation .. +PF03454 MoeA C-terminal region (domain IV)
This domain is found in proteins involved in biosynthesis of molybdopterin cofactor however the exact molecular function of this domain is uncertain. The structure of this domain is known and forms an incomplete beta barrel.. +PF03453 MoeA N-terminal region (domain I and II)
This family contains two structural domains. One of these contains the conserved DGXA motif. This region is found in proteins involved in biosynthesis of molybdopterin cofactor however the exact molecular function of this region is uncertain.. +PF02493 MORN repeat
The MORN (Membrane Occupation and Recognition Nexus) repeat is found in multiple copies in several proteins including junctophilins (See Takeshima et al. Mol. Cell 2000;6:11-22).\. A MORN-repeat protein has been identified in the parasite Toxoplasma gondiis a dynamic component of cell division apparatus in Toxoplasma gondii . It has been hypothesised to functions as a linker protein between certain membrane regions and the parasite's cytoskeleton .. +PF03476 MOSC N-terminal beta barrel domain
Aravind L, Anantharaman V. This domain is found to the N-terminus of Pfam:PF03473. The function of this domain is unknown, however it is predicted to adopt a beta barrel fold.. +PF04643 motilin_assoc;
Motilin/ghrelin-associated peptide. Pfam-B_5485 (release 7.5). This family represents a peptide sequence that lies C-terminal to motilin/ghrelin on the respective precursor peptide. Its function is unknown.. +PF04644 motilin_ghrelin;
Pfam-B_5485 (release 7.5). Motilin is a gastrointestinal regulatory polypeptide produced by motilin cells in the duodenal epithelium. It is released into the general circulation at about 100-min intervals during the inter-digestive state and is the most important factor in controlling the inter-digestive migrating contractions. Motilin also stimulates endogenous release of the endocrine pancreas . This family also includes ghrelin, a growth hormone secretagogue synthesised by endocrine cells in the stomach. Ghrelin stimulates growth hormone secretagogue receptors in the pituitary. These receptors are distinct from the growth hormone-releasing hormone receptors, and thus provide a means of controlling pituitary growth hormone release by the gastrointestinal system .. +PF01398 Mov34; JAMM;
JAB1/Mov34/MPN/PAD-1 ubiquitin protease. Pfam-B_738 (release 3.0). Members of this family are found in proteasome regulatory subunits, eukaryotic initiation factor 3 (eIF3) subunits and regulators of transcription factors. This family is also known as the MPN domain and PAD-1-like domain , JABP1 domain or JAMM domain . These are metalloenzymes that function as the ubiquitin isopeptidase/ deubiquitinase in the ubiquitin-based signaling and protein turnover pathways in eukaryotes . Versions of the domain in prokaryotic cognates of the ubiquitin-modification pathway are predicted to have a similar role . . +PF01853 MOZ/SAS family
Pfam-B_3994 (Release 4.3). This region of these proteins has been suggested to be homologous to acetyltransferases .. +PF04117 Mpv17 / PMP22 family
Pfam-B_8493 (release 7.3);. The 22-kDa peroxisomal membrane protein (PMP22) is a major component of peroxisomal membranes. PMP22 seems to be involved in pore forming activity and may contribute to the unspecific permeability of the organelle membrane. PMP22 is synthesised on free cytosolic ribosomes and then directed to the peroxisome membrane by specific targeting information . Mpv17 is a closely related peroxisomal protein. In mouse, the Mpv17 protein is involved in the development of early-onset glomerulosclerosis . More recently a homolog of Mpv17 in S. cerevisiae has been been found to be an integral membrane protein of the inner mitochondrial membrane where it has been proposed to have a role in ethanol metabolism and tolerance during heat-shock . Defects in MPV17 is associated with mitochondrial DNA depletion syndrome (MDDS) and Navajo neurohepatopathy (NNH) . MDDS is a clinically heterogeneous group of disorders characterised by a reduction in mitochondrial DNA (mtDNA) copy number. Primary mtDNA depletion is inherited as an autosomal recessive trait and may affect single organs, typically muscle or liver, or multiple tissues. Individuals with the hepatocerebral form of mitochondrial DNA depletion syndrome have early progressive liver failure and neurologic abnormalities, hypoglycemia, and increased lactate in body fluids. NNH is an autosomal recessive disease that is prevalent among Navajo children in the South Western states of America. The major clinical features are hepatopathy, peripheral neuropathy, corneal anesthesia and scarring, acral mutilation, cerebral leukoencephalopathy, failure to thrive, and recurrent metabolic acidosis with intercurrent infections. Infantile, childhood, and classic forms of NNH have been described. Mitochondrial DNA depletion was detected in the livers of patients, suggesting a primary defect in mtDNA maintenance .. +PF03587 Mra1; Nep1;
EMG1/NEP1 methyltransferase. Pfam-B_3290 (release 7.0). Members of this family are essential for 40S ribosomal biogenesis. The structure of EMG1 has revealed that it is a novel member of the superfamily of alpha/beta knot fold methyltransferases .. +PF03022 Major royal jelly protein
Pfam-B_1099 (release 6.4). Royal jelly is the food of queen bee larvae, and is responsible for the high reproductive ability of the queen. Major royal jelly proteins make up around 90% of larval jelly proteins. This family also the sequence-related yellow protein of drosophila which controls pigmentation of the adult cuticle and larval mouth parts.. +PF04707 MSF1;
Pfam-B_5792 (release 7.5). This family includes a conserved region found in the PRELI protein and yeast YLR168C gene MSF1 product. The function of this protein is unknown, though it is thought to be involved in intra-mitochondrial protein sorting. This region is also found in a number of other eukaryotic proteins.. +PF00635 MSP_domain;
MSP (Major sperm protein) domain. Major sperm proteins are involved in sperm motility. These proteins oligomerise to form filaments. This family contains many other proteins.. +PF03820 Tricarboxylate carrier
TIGRFAMs, Griffiths-Jones SR. +PF00249 myb_DNA-binding;
Myb-like DNA-binding domain. This family contains the DNA binding domains from Myb proteins, as well as the SANT domain family .. +PF02736 Myosin N-terminal SH3-like domain
Pfam-B_110 (Release 5.5). This domain has an SH3-like fold. It is found at the N-terminus of many but not all myosins. The function of this domain is unknown.. +PF00784 MyTH4 domain
Alignment kindly provided by SMART. Domain in myosin and kinesin tails, present twice in myosin-VIIa, and also present in 3 other myosins.. +PF03485 N-Arg;
Arginyl tRNA synthetase N terminal domain. This domain is found at the amino terminus of Arginyl tRNA synthetase, also called additional domain 1 (Add-1). It is about 140 residues long and it has been suggested that this domain will be involved in tRNA recognition .. +PF01699 Na_Ca_Ex;
Sodium/calcium exchanger protein. Pfam-B_1680 (release 4.1). This is a family of sodium/calcium exchanger integral membrane proteins. This family covers the integral membrane regions of the proteins. Sodium/calcium exchangers regulate intracellular Ca2+ concentrations in many cells; cardiac myocytes, epithelial cells, neurons retinal rod photoreceptors and smooth muscle cells . Ca2+ is moved into or out of the cytosol depending on Na+ concentration . In humans and rats there are 3 isoforms; NCX1 NCX2 and NCX3 see Swiss:Q01728, Swiss:P48768 and Swiss:P70549 respectively.. +PF00999 Sodium/hydrogen exchanger family
Pfam-B_312 (release 3.0). Na/H antiporters are key transporters in maintaining the pH of actively metabolising cells. The molecular mechanisms of antiport are unclear. These antiporters contain 10-12 transmembrane regions (M) at the amino-terminus and a large cytoplasmic region at the carboxyl terminus.\. The transmembrane regions M3-M12 share identity with other members of the family. The M6 and M7 regions are highly conserved. Thus, this is thought to be the region that is involved in the transport of sodium and hydrogen ions. The cytoplasmic region has little similarity throughout the family.. +PF02690 Na+/Pi-cotransporter
Pfam-B_509 (release 5.5). This is a family of mainly mammalian type II renal Na+/Pi-cotransporters with other related sequences from lower eukaryotes and bacteria some of which are also Na+/Pi-cotransporters. In the kidney the type II renal Na+/Pi-cotransporters protein allows re-absorption of filtered Pi in the proximal tubule .. +PF02445 Quinolinate synthetase A protein
Pfam-B_1915 (release 5.4). Quinolinate synthetase catalyses the second step of the de novo biosynthetic pathway of pyridine nucleotide formation. In particular, quinolinate synthetase is involved in the condensation of dihydroxyacetone phosphate and iminoaspartate to form quinolinic acid . This synthesis requires two enzymes, a FAD-containing "B protein" and an "A protein".. +PF03822 NAF domain
+PF05089 Alpha-N-acetylglucosaminidase (NAGLU) tim-barrel domain
Pfam-B_6295 (release 7.7). Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate . Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations . The structure shows that the enzyme is composed of three domains. This central domain has a tim barrel fold .. +PF02365 No apical meristem (NAM) protein
Pfam-B_530 (release 5.2). This is a family of no apical meristem (NAM) proteins these are plant development proteins. Mutations in NAM result in the failure to develop a shoot apical meristem in petunia embryos . NAM is indicated as having a role in determining positions of meristems and primordial . One member of this family NAP (NAC-like, activated by AP3/PI) is encoded by the target genes of the AP3/PI transcriptional activators and functions in the transition between growth by cell division and cell expansion in stamens and petals .. +PF04095 Nicotinate phosphoribosyltransferase (NAPRTase) family
Pfam-B_5038 (release 7.3) & Pfam-B_5422 (Release 7.5). Nicotinate phosphoribosyltransferase (EC:2.4.2.11) is the rate limiting enzyme that catalyses the first reaction in the NAD salvage synthesis. This family also includes Pre-B cell enhancing factor that is a cytokine Swiss:P43490. This family is related to Quinolinate phosphoribosyltransferase Pfam:PF01729.. +PF04970 NC;
Lecithin retinol acyltransferase. Pfam-B_3758 (release 7.0). The full-length members of this family, eg Swiss:P53816, are representatives of a novel class II tumour-suppressor family, designated as H-REV107-like. This domain is the catalytic N-terminal proline-rich region of the protein. The downstream region is a putative C-terminal transmembrane domain which is found to be crucial for cellular localisation, but not necessary for the enzyme activity . H-REV107-like proteins are homologous to lecithin retinol acyltransferase (LRAT), an enzyme that catalyses the transfer of the sn-1 acyl group of phosphatidylcholine to all-trans-retinol and forming a retinyl ester .. +PF04904 NAB conserved region 1 (NCD1)
Pfam-B_6188 (release 7.6). Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors . This region consists of the N-terminal NAB conserved region 1, which interacts with the EGR1 inhibitory domain (R1) . It may also mediate multimerisation.. +PF04905 NAB conserved region 2 (NCD2)
Pfam-B_6188 (release 7.6). Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors . This family consists of NAB conserved region 2, near the C-terminus of the protein. It is necessary for transcriptional repression by the Nab proteins . It is also required for transcription activation by Nab proteins at Nab-activated promoters .. +PF03096 Ndr family
Pfam-B_2481 (release 6.4). +PF03102 NeuB family
Pfam-B_2572 (release 6.4). NeuB is the prokaryotic N-acetylneuraminic acid (Neu5Ac) synthase. It catalyses the direct formation of Neu5Ac (the most common sialic acid) by condensation of phosphoenolpyruvate (PEP) and N-acetylmannosamine (ManNAc). This reaction has only been observed in prokaryotes; eukaryotes synthesise the 9-phosphate form, Neu5Ac-9-P, and utilise ManNAc-6-P instead of ManNAc. Such eukaryotic enzymes are not present in this family . This family also contains SpsE spore coat polysaccharide biosynthesis proteins.. +PF02931 Neurotransmitter-gated ion-channel ligand binding domain
This family is the extracellular ligand binding domain of these ion channels . This domain forms a pentameric arrangement in the known structure.. +PF01436 NHL repeat
The NHL (NCL-1, HT2A and LIN-41) repeat is found in multiple tandem copies. It is about 40 residues long and resembles the WD repeat Pfam:PF00400. The repeats have a catalytic activity in Swiss:P10731, proteolysis has shown that the Peptidyl-alpha-hydroxyglycine alpha-amidating lyase (PAL) activity is localised to the repeats . Swiss:Q13049 interacts with the activation domain of Tat. This interaction is me diated by the NHL repeats .. +PF03031 NLI interacting factor-like phosphatase
Pfam-B_1405 (release 6.4). This family contains a number of NLI interacting factor isoforms (eg. Swiss:Q9PTJ8) and also an N-terminal regions of RNA polymerase II CTC phosphatase (Swiss:Q9Y5BO) and FCP1 serine phosphatase (Swiss:Q9PT70). This region has been identified as the minimal phosphatase domain .. +PF04923 Ninjurin
Pfam-B_5824 (release 7.6). Ninjurin (nerve injury-induced protein) is involved in nerve regeneration and in the formation and function in some tissues .. +PF02613 Nitrate reductase delta subunit
This family is the delta subunit of the nitrate reductase enzyme, The delta subunit is not part of the nitrate reductase enzyme but is most likely needed for assembly of the multi-subunit enzyme complex . In the absence of the delta subunit the core alpha beta enzyme complex is unstable . The delta subunit is essential for enzyme activity in vivo and in vitro . The nitrate reductase enzyme, EC:1.7.99.4 catalyse the conversion of nitrite to nitrate via the reduction of an acceptor.\. The nitrate reductase enzyme is composed of three subunits .\. Nitrate is the most widely used alternative electron acceptor after oxygen . This family also now contains the family TorD, a family of cytoplasmic chaperone proteins; like many prokaryotic molybdoenzymes, the TMAO reductase (TorA) of Escherichia coli requires the insertion of a bis(molybdopterin guanine dinucleotide) molybdenum (bis(MGD)Mo) cofactor in its catalytic site to be active and translocated to the periplasm. The TorD chaperone increases apoTorA activation up to four-fold, allowing maturation of most of the apoprotein. Therefore TorD is involved in the first step of TorA maturation to make it competent to receive the cofactor .. +PF00877 NlpC/P60 family
Pfam-B_292 (release 3.0) & Pfam-B_9022 (Release 8.0). The function of this domain is unknown. It is found in several lipoproteins.. +PF04981 NMD3 family
The NMD3 protein is involved in nonsense mediated mRNA decay. This amino terminal region contains four conserved CXXC motifs that could be metal binding. NMD3 is involved in export of the 60S ribosomal subunit is mediated by the adapter protein Nmd3p in a Crm1p-dependent pathway .. +PF01234 NNMT/PNMT/TEMT family
+PF04147 Nop14-like family
Pfam-B_8521 (release 7.3);. Emg1 and Nop14 are novel proteins whose interaction is required for the maturation of the 18S rRNA and for 40S ribosome production .. +PF04153 NOT;
NOT2 / NOT3 / NOT5 family. Pfam-B_2131 (release 7.3). NOT1, NOT2, NOT3, NOT4 and NOT5 form a nuclear complex that negatively regulates the basal and activated transcription of many genes. This family includes NOT2, NOT3 and NOT5.. +PF04065 Not1 N-terminal domain, CCR4-Not complex component
Pfam-B_8081 (release 7.3);. +PF03060 NPD;
Nitronate monooxygenase. Pfam-B_2634 (release 6.4). Nitronate monooxygenase (NMO), formerly referred to as 2-nitropropane dioxygenase (NPD) (EC:1.13.11.32), is an FMN-dependent enzyme that uses molecular oxygen to oxidize (anionic) alkyl nitronates and, in the case of the enzyme from Neurospora crassa, (neutral) nitroalkanes to the corresponding carbonyl compounds and nitrite. Previously classified as 2-nitropropane dioxygenase [1,2,3], but it is now recognized that this was the result of the slow ionization of nitroalkanes to their nitronate (anionic) forms . The enzymes from the fungus Neurospora crassa and the yeast Williopsis saturnus var. mrakii (formerly classified as Hansenula mrakii) contain non-covalently bound FMN as the cofactor. Active towards linear alkyl nitronates of lengths between 2 and 6 carbon atoms and, with lower activity, towards propyl-2-nitronate. The enzyme from N. crassa can also utilize neutral nitroalkanes, but with lower activity. One atom of oxygen is incorporated into the carbonyl group of the aldehyde product. The reaction appears to involve the formation of an enzyme-bound nitronate radical and an a-peroxynitroethane species, which then decomposes, either in the active site of the enzyme or after release, to acetaldehyde and nitrite.. +PF05021 NPL4 family
Pfam-B_13681 (release 7.6). The HRD4 gene was identical to NPL4, a gene previously implicated in nuclear transport. Using a diverse set of substrates and direct ubiquitination assays, analysis revealed that HRD4/NPL4 is required for a poorly characterised step in ER-associated degradation after ubiquitination of target proteins but before their recognition by the 26S proteasome . Npl4p physically associates with Cdc48p via Ufd1p to form a Cdc48p-Ufd1p-Npl4p complex. The Cdc48-Ufd1-Npl4 complex functions in the recognition of several polyubiquitin-tagged proteins and facilitates their presentation to the 26S proteasome for processive degradation or even more specific processing.. +PF01909 DUF76;
Nucleotidyltransferase domain. Members of this family belong to a large family of nucleotidyltransferases . This family includes kanamycin nucleotidyltransferase (KNTase) which is a plasmid-coded enzyme responsible for some types of bacterial resistance to aminoglycosides. KNTase in-activates antibiotics by catalysing the addition of a nucleotidyl group onto the drug.. +PF01759 UNC-6/NTR/C345C module
Sequence similarity between netrin UNC-6 and C345C complement protein family members, and hence the existence of the UNC-6 module, was first reported in . Subsequently, many additional members of the family were identified on the basis of sequence similarity between the C-terminal domains of netrins, complement proteins C3, C4, C5, secreted frizzled-related proteins, and type I pro-collagen C-proteinase enhancer proteins (PCOLCEs), which are homologous with the N-terminal domains of tissue inhibitors of metalloproteinases (TIMPs). The TIMPs are classified as a separate family in Pfam (Pfam:PF00965) . This expanded domain family has been named as the NTR module .. +PF04142 Nucleotide-sugar transporter
Pfam-B_2311 (release 7.3). This family of membrane proteins transport nucleotide sugars from the cytoplasm into golgi vesicles. Swiss:P78382 transports CMP-sialic acid, Swiss:P78381 transports UDP-galactose and Swiss:Q9Y2D2 transports UDP-GlcNAc.. +PF04096 Nucleoporin autopeptidase
Pfam-B_5132 (release 7.3);. +PF01733 Nucleoside transporter
Pfam-B_2135 (release 4.1). This is a family of nucleoside transporters. In mammalian cells nucleoside transporters transport nucleoside across the plasma membrane and are essential for nucleotide synthesis via the salvage pathways for cells that lack their own de novo synthesis pathways . Also in this family is mouse and human nucleolar protein HNP36 Swiss:Q14542 a protein of unknown function; although it has been hypothesised to be a plasma membrane nucleoside transporter .. +PF04880 NUDE protein, C-terminal conserved region
Pfam-B_6501 (release 7.6). This family represents the C-terminal conserved region of the NUDE proteins. NUDE proteins are involved in nuclear migration .. +PF00293 mutT;
+PF03826 OAR domain
+PF03137 OATP_C;
Organic Anion Transporter Polypeptide (OATP) family. Pfam-B_626 (release 6.5). This family consists of several eukaryotic Organic-Anion-Transporting Polypeptides (OATPs). Several have been identified mostly in human and rat. Different OATPs vary in tissue distribution and substrate specificity. Since the numbering of different OATPs in particular species was based originally on the order of discovery, similarly numbered OATPs in humans and rats did not necessarily correspond in function, tissue distribution and substrate specificity (in spite of the name, some OATPs also transport organic cations and neutral molecules). Thus, Tamai et al. initiated the current scheme of using digits for rat OATPs and letters for human ones. Prostaglandin transporter (PGT) proteins (e.g. Swiss:Q92959) are also considered to be OATP family members. In addition, the methotrexate transporter OATK (Swiss:P70502) is closely related to OATPs. This family also includes several predicted proteins from Caenorhabditis elegans and Drosophila melanogaster. This similarity was not previously noted. Note: Members of this family are described (in the Swiss-Prot database) as belonging to the SLC21 family of transporters.. +PF05005 Janus/Ocnus family (Ocnus)
Pfam-B_4799 (release 7.6). This family is comprised of the Ocnus, Janus-A and Janus-B proteins. These proteins have been found to be testes specific in Drosophila melanogaster . . +PF00215 Orotidine 5'-phosphate decarboxylase / HUMPS family
This family includes Orotidine 5'-phosphate decarboxylase enzymes EC:4.1.1.23 that are involved in the final step of pyrimidine biosynthesis. The family also includes enzymes such as hexulose-6-phosphate synthase. This family appears to be distantly related to Pfam:PF00834.. +PF04084 Origin recognition complex subunit 2
Pfam-B_7065 (release 7.3);. All DNA replication initiation is driven by a single conserved eukaryotic initiator complex termed he origin recognition complex (ORC). The ORC is a six protein complex. The function of ORC is reviewed in . . +PF03392 Insect pheromone-binding family, A10/OS-D
Pfam-B_3032 (release 6.6). +PF04756 OST3 / OST6 family
The proteins in this family are part of a complex of eight ER proteins that transfers core oligosaccharide from dolichol carrier to Asn-X-Ser/Thr motifs . This family includes both OST3 and OST6, each of which contains four predicted transmembrane helices. Disruption of OST3 and OST6 leads to a defect in the assembly of the complex. Hence, the function of these genes seems to be essential for recruiting a fully active complex necessary for efficient N-glycosylation .. +PF01010 oxidored_q1_C;
NADH-Ubiquinone oxidoreductase (complex I) subunit C-terminus. Pfam-B_41 (release 3.0). This sub-family represents a carboxyl terminal extension of Pfam:PF00361. It includes subunit 5 from chloroplasts, and bacterial subunit L. This sub-family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane.. +PF01483 P;
Proprotein convertase P-domain. A unique feature of the eukaryotic subtilisin-like proprotein convertases is the presence of an additional highly conserved sequence of approximately 150 residues (P domain) located immediately downstream of the catalytic domain.. +PF04062 ARP2/3 complex ARPC3 (21 kDa) subunit
Pfam-B_6413 (release 7.3);. The seven component ARP2/3 actin-organising complex is involved in actin assembly and function.. +PF02331 Apoptosis preventing protein
Pfam-B_13247 (release 5.2). This viral protein functions to block the host apoptotic response caused by infection by the virus. The apoptosis preventing protein (or early 35kD protein, P35) acts by blocking caspase protease activity.. +PF02225 PA domain
Pfam-B_259 (release 5.2). The PA (Protease associated) domain is found as an insert domain in diverse proteases. The PA domain is also found in a plant vacuolar sorting receptor Swiss:O22925 and members of the RZF family Swiss:O43567. It has been suggested that this domain forms a lid-like structure that covers the active site in active proteases, and is involved in protein recognition in vacuolar sorting receptors .. +PF00291 S_T_dehydratase;
Pyridoxal-phosphate dependent enzyme. Members of this family are all pyridoxal-phosphate dependent enzymes. This family includes: serine dehydratase EC:4.2.1.13 P20132, threonine dehydratase EC:4.2.1.16 Swiss:P04968, tryptophan synthase beta chain EC:4.2.1.20 Swiss:P00932, threonine synthase EC:4.2.99.2 Swiss:P04990, cysteine synthase EC:4.2.99.8 P11096, cystathionine beta-synthase EC:4.2.1.22 Swiss:P35520, 1-aminocyclopropane-1-carboxylate deaminase EC:4.1.99.4 Swiss:P76316.. +PF00024 apple; Apple;PAN;
The PAN domain contains a conserved core of three disulphide bridges. In some members of the family there is an additional fourth disulphide bridge the links the N and C termini of the domain. The domain is found in diverse proteins, in some they mediate protein-protein interactions, in others they mediate protein-carbohydrate interactions.. +PF01569 PAP2 superfamily
Pfam-B_486 (release 4.0). This family includes the enzyme type 2 phosphatidic acid phosphatase (PAP2), Glucose-6-phosphatase EC:3.1.3.9, Phosphatidylglycerophosphatase B EC:3.1.3.27 and bacterial acid phosphatase EC:3.1.3.2. The family also includes a variety of haloperoxidases [1,2] that function by oxidising halides in the presence of hydrogen peroxide to form the corresponding hypohalous acids.. +PF03828 Cid1 family poly A polymerase
Griffiths-Jones SR, Wood V, Mistry J. This domain is found in poly(A) polymerases and has been shown to have polynucleotide adenylyltransferase activity . Proteins in this family have been located to both the nucleus and the cytoplasm.. +PF04928 Poly(A) polymerase central domain
Pfam-B_1341 (release 7.6). The central domain of Poly(A) polymerase shares structural similarity with the allosteric activity domain of ribonucleotide reductase R1, which comprises a four-helix bundle and a three-stranded mixed beta- sheet. Even though the two enzymes bind ATP, the ATP-recognition motifs are different.. +PF05028 PARG;
Poly (ADP-ribose) glycohydrolase (PARG). Pfam-B_5996 (release 7.6). Poly(ADP-ribose) glycohydrolase (PARG), is a ubiquitously expressed exo- and endoglycohydrolase which mediates oxidative and excitotoxic neuronal death .. +PF01734 Patatin-like phospholipase
Pfam-B_2206 (release 4.1). This family consists of various patatin glycoproteins from plants. The patatin protein accounts for up to 40% of the total soluble protein in potato tubers . Patatin is a storage protein but it also has the enzymatic activity of lipid acyl hydrolase, catalysing the cleavage of fatty acids from membrane lipids . Members of this family have been found also in vertebrates.. +PF02460 Patched family
Pfam-B_2400 (release 5.4). The transmembrane protein Patched Swiss:P18502 is a receptor for the morphogene Sonic Hedgehog. This protein associates with the smoothened protein to transduce hedgehog signals.. +PF02170 ZAP;
This domain is named PAZ after the proteins Piwi Argonaut and Zwille. This domain is found in two families of proteins that are involved in post-transcriptional gene silencing. These are the Piwi family and the Dicer family, that includes the Carpel factory protein. The function of the domains is unknown but has been suggested to mediate complex formation between proteins of the Piwi and Dicer families by hetero-dimerisation. The three-dimensional structure of this domain has been solved [2-4]. The PAZ domain is composed of two subdomains. One subdomain is similar to the OB fold, albeit with a different topology. The OB-fold is well known as a single-stranded nucleic acid binding fold. The second subdomain is composed of a beta-hairpin followed by an alpha-helix. The PAZ domains shows low-affinity nucleic acid binding and appears to interact with the 3' ends of single-stranded regions of RNA in the cleft between the two subdomains. PAZ can bind the characteristic two-base 3' overhangs of siRNAs, indicating that although PAZ may not be a primary nucleic acid binding site in Dicer or RISC, it may contribute to the specific and productive incorporation of siRNAs and miRNAs into the RNAi pathway.. +PF00564 OPR;
Alignment kindly provided by SMART. +PF00786 P21-Rho-binding domain
Alignment kindly provided by SMART. Small domains that bind Cdc42p- and/or Rho-like small GTPases. Also known as the Cdc42/Rac interactive binding (CRIB).. +PF01161 Phosphatidylethanolamine-binding protein
Prosite & Pfam-B_5394 (Release 7.5). +PF01399 PCI domain
This domain has also been called the PINT motif (Proteasome, Int-6, Nip-1 and TRIP-15) .. +PF03462 PCRF domain
This domain is found in peptide chain release factors.. +PF02153 Prephenate dehydrogenase
PSI-BLAST P20692/1-290. Members of this family are prephenate dehydrogenases EC:1.3.1.12 involved in tyrosine biosynthesis.. +PF04166 Pyridoxal phosphate biosynthetic protein PdxA
TIGRFAMs (release 2.0);. In Escherichia coli the coenzyme pyridoxal 5'-phosphate is synthesised de novo by a pathway that is thought to involve the condensation of 4-(phosphohydroxy)-L-threonine and 1-deoxy-D-xylulose, catalysed by the enzymes PdxA and PdxJ, to form either pyridoxine (vitamin B6) or pyridoxine 5'-phosphate .. +PF00934 PE family
Pfam-B_253 (release 3.0). This family named after a PE motif near to the amino terminus of the domain. The PE family of proteins all contain an amino-terminal region of about 110 amino acids. The carboxyl terminus of this family are variable and fall into several classes. The largest class of PE proteins is the highly repetitive PGRS class which have a high glycine content. The function of these proteins is uncertain but it has been suggested that they may be related to antigenic variation of Mycobacterium tuberculosis .. +PF01095 Pectinesterase
+PF04710 Pellino
Pfam-B_5882 (release 7.5). Pellino is involved in Toll-like signalling pathways, and associates with the kinase domain of the Pelle Ser/Thr kinase [1,2,3].. +PF02452 PemK-like protein
Pfam-B_2134 (release 5.4). PemK is a growth inhibitor in E. coli known to bind to the promoter region of the Pem operon, auto-regulating synthesis. This Pfam family consists of the PemK protein in addition to ChpA, ChpB and other PemK-like proteins. . +PF01804 Penicillin amidase
Pfam-B_1410 (release 4.2). Penicillin amidase or penicillin acylase EC:3.5.1.11 catalyses the hydrolysis of benzylpenicillin to phenylacetic acid and 6-aminopenicillanic acid (6-APA) a key intermediate in the the synthesis of penicillins . Also in the family is cephalosporin acylase Swiss:P07662 and Swiss:P29958 aculeacin A acylase which are involved in the synthesis of related peptide antibiotics.. +PF01469 Pentapeptide repeats (8 copies)
These repeats are found in many mycobacterial proteins. These repeats are most common in the Pfam:PF00823 family of proteins, where they are found in the MPTR subfamily of PPE proteins. The function of these repeats is unknown. The repeat can be approximately described as XNXGX, where X can be any amino acid. These repeats are similar to Pfam:PF00805 , however it is not clear if these two families are structurally related.. +PF00391 PEP-utilising enzyme, mobile domain
This domain is a "swivelling" beta/beta/alpha domain which is thought to be mobile in all proteins known to contain it.. +PF05131 Pep3/Vps18/deep orange family
Pfam-B_6057 (release 7.7). This region is found in a number of protein identified as involved in golgi function and vacuolar sorting. The molecular function of this region is unknown. The members of this family contain a C-terminal ring finger domain.. +PF03051 Pept_C1-like;
Peptidase C1-like family. Pfam-B_2136 (release 6.4). This family is closely related to the Peptidase_C1 family Pfam:PF00112, containing several prokaryotic and eukaryotic aminopeptidases and bleomycin hydrolases.. +PF00112 Cys-protease;
Papain family cysteine protease. +PF01650 Peptidase C13 family
Pfam-B_1302 (release 4.1). Members of this family are asparaginyl peptidases . The blood fluke parasite Schistosoma mansoni has at least five Clan CA cysteine peptidases in its digestive tract including cathepsins B (2 isoforms), C, F and L. All have been recombinantly expressed as active enzymes, albeit in various stages of activation . In addition, a Clan CD peptidase, termed asparaginyl endopeptidase or 'legumain' has been identified. This has formerly been characterised as a 'haemoglobinase', but this term is probably incorrect . Two cDNAs have been described for Schistosoma mansoni legumain; one encodes an active enzyme whereas the active site cysteine residue encoded by the second cDNA is substituted by an asparagine residue. Both forms have been recombinantly expressed .. +PF00863 Peptidase family C4
Pfam-B_232 (release 3.0). This peptidase is present in the nuclear inclusion protein of potyviruses.. +PF02902 Ulp1_C;
Ulp1 protease family, C-terminal catalytic domain. This domain contains the catalytic triad Cys-His-Asn.. +PF03416 Peptidase family C54
+PF01433 Peptidase family M1
Members of this family are aminopeptidases. The members differ widely in specificity, hydrolysing acidic, basic or neutral N-terminal residues. This family includes leukotriene-A4 hydrolase Swiss:P09960, this enzyme also has an aminopeptidase activity .. +PF01431 Peptidase family M13
Mammalian enzymes are typically type-II membrane anchored enzymes which are known, or believed to activate or inactivate oligopeptide (pro)-hormones such as opioid peptides. The family also contains a bacterial member believed to be involved with milk protein cleavage.. +PF05193 Peptidase M16 inactive domain
Peptidase M16 consists of two structurally related domains. One is the active peptidase, whereas the other is inactive. The two domains hold the substrate like a clamp .. +PF02789 Cytosol aminopeptidase family, N-terminal domain
Pfam-B_990 (release 3.0). +PF02127 Aminopeptidase I zinc metalloprotease (M18)
+PF01546 CO_pept_M20;
Peptidase family M20/M25/M40. Pfam-B_253 (release 4.0). This family includes a range of zinc metallopeptidases belonging to several families in the peptidase classification . Family M20 are Glutamate carboxypeptidases. Peptidase family M25 contains X-His dipeptidases.. +PF00814 Glycoprotease;
Glycoprotease family. Pfam-B_1670 (release 2.1) & Pfam-B_4550 (Release 7.5). The Peptidase M22 proteins are part of the HSP70-actin superfamily ( ). The region represented here is an insert into the fold and is not found in the rest of the family (beyond the Peptidase M22 family). Included in this family are the Rhizobial NodU proteins and the HypF regulator. This region also contains the histidine dyad believed to coordinate the metal ion and hence provide catalytic activity. Interestingly the histidines are not well conserved, and there is a lack of experimental evidence to support peptidase activity as a general property of this family. There also appear to be instances of this domain outside of the HSP70-actin superfamily (e.g. Swiss:Q9ZM49).. +PF04389 Peptidase family M28
+PF01551 Peptidase_M37;
Peptidase family M23. Pfam-B_291 (release 4.0). Members of this family are zinc metallopeptidases with a range of specificities. The peptidase family M23 is included in this family, these are Gly-Gly endopeptidases. Peptidase family M23 are also endopeptidases. This family also includes some bacterial lipoproteins such as Swiss:P33648 for which no proteolytic activity has been demonstrated. This family also includes leukocyte cell-derived chemotaxin 2 (LECT2) proteins. LECT2 is a liver-specific protein which is thought to be linked to hepatocyte growth although the exact function of this protein is unknown.. +PF01434 Peptidase family M41
+PF02163 Peptidase family M50
+PF02897 Prolyl oligopeptidase, N-terminal beta-propeller domain
This unusual 7-stranded beta-propeller domain protects the catalytic triad of prolyl oligopeptidase (see Pfam:PF00326), excluding larger peptides and proteins from proteolysis in the cytosol.. +PF04080 Per1-like
Pfam-B_12918 (release 7.3);. PER1 is required for GPI-phospholipase A2 activity and is involved in lipid remodelling of GPI-anchored proteins .. +PF04695 Peroxisomal membrane anchor protein (Pex14p) conserved region
Pfam-B_4121 (release 7.5). Family of peroxisomal membrane anchor proteins which bind the PTS1 (peroxisomal targeting signal) receptor and are required for the import of PTS1-containing proteins into peroxisomes. Loss of functional Pex14p results in defects in both the PTS1 and PTS2-dependent import pathways. Deletion analysis of this conserved region implicates it in selective peroxisome degradation. In the majority of members this region is situated at the N-terminus of the protein [1,2].. +PF00294 pfkB;
pfkB family carbohydrate kinase. This family includes a variety of carbohydrate and pyrimidine kinases.. +PF00169 PH domain
PH stands for pleckstrin homology.. +PF05065 Phage capsid family
Pfam-B_3186 (release 7.7) & Pfam-B_9481 (release 10.0). Family of bacteriophage hypothetical proteins and capsid proteins. . +PF02899 Phage_integr_N;
Phage integrase, N-terminal SAM-like domain. +PF04860 Phage portal protein
Pfam-B_6050 (release 7.6). Bacteriophage portal proteins form a dodecamer and is located at a five-fold vertex of the viral capsid. The portal complex forms a channel through which the viral DNA is packaged into the capsid, and exits during infection. The portal protein is though to rotate during DNA packaging . Portal proteins from different phage show little sequence homology, so this family does not represent all portal proteins.. +PF05119 Phage_sml_term;
Phage terminase, small subunit. TIGRFAMs (release 2.0);. +PF03354 Phage_terminase;
Pfam-B_3931 (release 6.5). The majority of the members of this family are bacteriophage proteins, several of which are thought to be terminase large subunit proteins. There are also a number of bacterial proteins of unknown function.. +PF00628 PHD-finger
PHD folds into an interleaved type of Zn-finger chelating 2 Zn ions in a similar manner to that of the RING and FYVE domains . Several PHD fingers have been identified as binding modules of methylated histone H3 .. +PF01384 Phosphate transporter family
Pfam-B_923 (release 3.0). This family includes PHO-4 from Neurospora crassa which is a is a Na(+)-phosphate symporter . This family also contains the leukaemia virus receptor Swiss:Q08344.. +PF01663 Type I phosphodiesterase / nucleotide pyrophosphatase
Pfam-B_994 (release 4.1) & Pfam-B_6150 (Release 8.0). This family consists of phosphodiesterases, including human plasma-cell membrane glycoprotein PC-1 / alkaline phosphodiesterase i / nucleotide pyrophosphatase (nppase). These enzymes catalyse the cleavage of phosphodiester and phosphosulfate bonds in NAD, deoxynucleotides and nucleotide sugars . Also in this family is ATX an autotaxin, tumour cell motility-stimulating protein which exhibits type I phosphodiesterases activity . The alignment encompasses the active site [3,4]. Also present with in this family is 60-kDa Ca2+-ATPase form F. odoratum .. +PF02811 PHP_C;
The PHP (Polymerase and Histidinol Phosphatase) domain is a putative phosphoesterase domain.. +PF02972 phycoerythr_ab;
Phycoerythrin, alpha/beta chain. This family represents the non-globular alpha and beta chain components of phycoerythrin. The structure is a long beta-hairpin and a single alpha-helix.. +PF05023 Phytochelatin synthase
Pfam-B_9299 (release 7.6). Phytochelatin synthase is the enzyme responsible for the synthesis of heavy-metal-binding peptides (phytochelatins) from glutathione and related thiols . The crystal structure of a member of this family shows it to possess a papain fold . The enzyme catalyses the deglycination of a GSH donor molecule . The enzyme contains a catalytic triad of cysteine, histidine and aspartate residues.. +PF02567 Phenazine biosynthesis-like protein
+PF00454 Phosphatidylinositol 3- and 4-kinase
Prosite & Pfam-B_6771 (Rlease 7.6). Some members of this family probably do not have lipid kinase activity and are protein kinases, e.g. Swiss:P42345 .. +PF00792 Phosphoinositide 3-kinase C2
SMART, Griffiths-Jones SR. Alignment kindly provided by SMART. Phosphoinositide 3-kinase region postulated to contain a C2 domain. Outlier of Pfam:PF00168 family. . +PF00640 Phosphotyrosine interaction domain (PTB/PID)
+PF04987 Phosphatidylinositolglycan class N (PIG-N)
Pfam-B_5307 (release 7.6). Phosphatidylinositolglycan class N (PIG-N) is a mammalian homologue of the yeast protein MCD4P and is expressed in the endoplasmic reticulum . PIG-N is essential for glycosylphosphatidylinositol anchor synthesis. Glycosylphosphatidylinositol (GPI)-anchored proteins are cell surface-localised proteins that serve many important cellular functions .. +PF01850 PIN domain
+PF04696 pinin_SDK_memA;
pinin/SDK/memA/ protein conserved region. Pfam-B_4141 (release 7.5). Members of this family have very varied localisations within the eukaryotic cell. pinin is known to localise at the desmosomes and is implicated in anchoring intermediate filaments to the desmosomal plaque . SDK2/3 is a dynamically localised nuclear protein thought to be involved in modulation of alternative pre-mRNA splicing . memA is a tumour marker preferentially expressed in human melanoma cell lines. A common feature of the members of this family is that they may all participate in regulating protein-protein interactions .. +PF01504 Phosphatidylinositol-4-phosphate 5-Kinase
Pfam-B_571 (release 4.0). This family contains a region from the common kinase core found in the type I phosphatidylinositol-4-phosphate 5-kinase (PIP5K) family as described in . The family consists of various type I, II and III PIP5K enzymes. PIP5K catalyses the formation of phosphoinositol-4,5-bisphosphate via the phosphorylation of phosphatidylinositol-4-phosphate a precursor in the phosphinositide signaling pathway.. +PF02171 Piwi domain
This domain is found in the protein Piwi and its relatives. The function of this domain is the dsRNA guided hydrolysis of ssRNA. Determination of the crystal structure of Argonaute reveals that PIWI is an RNase H domain, and identifies Argonaute as Slicer, the enzyme that cleaves mRNA in the RNAi RISC complex . In addition, Mg+2 dependence and production of 3'-OH and 5' phosphate products are shared characteristics of RNaseH and RISC. The PIWI domain core has a tertiary structure belonging to the RNase H family of enzymes. RNase H fold proteins all have a five-stranded mixed beta-sheet surrounded by helices. By analogy to RNase H enzymes which cleave single-stranded RNA guided by the DNA strand in an RNA/DNA hybrid, the PIWI domain can be inferred to cleave single-stranded RNA, for example mRNA, guided by double stranded siRNA. . +PF00801 PKD domain
This domain was first identified in the Polycystic kidney disease protein PKD1. This domain has been predicted to contain an Ig-like fold .. +PF01477 PLAT/LH2 domain
This domain is found in a variety of membrane or lipid associated proteins. It is called the PLAT (Polycystin-1, Lipoxygenase, Alpha-Toxin) domain or LH2 (Lipoxygenase homology) domain. The known structure of pancreatic lipase shows this domain binds to procolipase Pfam:PF01114, which mediates membrane association. So it appears possible that this domain mediates membrane attachment via other protein binding partners.\. The structure of this domain is known for many members of the family and is composed of a beta sandwich.. +PF00614 Phospholipase D Active site motif
Ponting C, Schultz J, Bork P. Alignment kindly provided by SMART. Phosphatidylcholine-hydrolysing phospholipase D (PLD) isoforms are activated by ADP-ribosylation factors (ARFs). PLD produces phosphatidic acid from phosphatidylcholine, which may be essential for the formation of certain types of transport vesicles or may be constitutive vesicular transport to signal transduction pathways. PC-hydrolysing PLD is a homologue of cardiolipin synthase, phosphatidylserine synthase, bacterial PLDs, and viral proteins. Each of these appears to possess a domain duplication which is apparent by the presence of two motifs containing well-conserved histidine, lysine, and/or asparagine residues which may contribute to the active site. aspartic acid. An E. coli endonuclease (nuc) and similar proteins appear to be PLD homologues but possess only one of these motifs. The profile contained here represents only the putative active site regions, since an accurate multiple alignment of the repeat units has not been achieved.. +PF01690 Potato leaf roll virus readthrough protein
Pfam-B_1335 (release 4.1). This family consists mainly of the potato leaf roll virus readthrough protein. This is generated via a readthrough of open reading frame 3 a coat protein allowing transcription of open reading frame 5 to give an extended coat protein with a large c-terminal addition or read through domain . The readthrough protein is thought to play a role in the circulative aphid transmission of potato leaf roll virus . Also in the family is open reading frame 6 from beet western yellows virus and potato leaf roll virus both luteovirus and an unknown protein from cucurbit aphid-borne yellows virus a closterovirus.. +PF03126 Plus-3 domain
This domain is about 90 residues in length and is often found associated with the Pfam:PF02213 domain. The function of this domain is uncertain. It is possible that this domain is involved in DNA binding as it has three conserved positively charged residues, hence this domain has been named the plus-3 domain. It is found in yeast Rtf1 which may be a transcription elongation factor .. +PF04043 Plant invertase/pectin methylesterase inhibitor
This domain inhibits pectin methylesterases (PMEs) and invertases through formation of a non-covalent 1:1 complex . It has been implicated in the regulation of fruit development, carbohydrate metabolism and cell wall extension (see ). It may also be involved in inhibiting microbial pathogen PMEs. It has been observed that it is often expressed as a large inactive preprotein . It is also found at the N-termini of PMEs predicted from DNA sequences (personal obs:C Yeats), suggesting that both PMEs and their inhibitor are expressed as a single polyprotein and subsequently processed. It has two disulphide bridges and is mainly alpha-helical .. +PF04721 PNGase;
Domain of unknown function (DUF750) . Pfam-B_4045 (release 7.5). This family of proteins with unknown function shows similarity to PNG-1, a enzyme responsible for de-N-glycosylation of misfolded glycoproteins in the cytosol . However, unlike PNG-1, this protein does not contain a catalytic triad in its transglutaminase domain .. +PF03726 Polyribonucleotide nucleotidyltransferase, RNA binding domain
This family contains the RNA binding domain of Polyribonucleotide nucleotidyltransferase (PNPase) PNPase is involved in mRNA degradation in a 3'-5' direction.. +PF01357 Pollen_allergen;
This family contains allergens lol PI, PII and PIII from Lolium perenne.. +PF01522 Polysac_deacet;Polysacc_deacet;
Polysaccharide deacetylase. Pfam-B_502 (release 4.0). This domain is found in polysaccharide deacetylase. This family of polysaccharide deacetylases includes NodB (nodulation protein B from Rhizobium) which is a chitooligosaccharide deacetylase . It also includes chitin deacetylase from yeast , and endoxylanases which hydrolyses glucosidic bonds in xylan .. +PF04831 Popeye protein conserved region
Pfam-B_3905 (release 7.6). The function of Popeye proteins is not well understood. They are predominantly expressed in cardiac and skeletal muscle. This family represents a conserved region which includes three potential transmembrane domains .. +PF01558 Pyruvate ferredoxin/flavodoxin oxidoreductase
Pfam-B_350 (release 4.0). This family includes a region of the large protein pyruvate-flavodoxin oxidoreductase and the whole pyruvate ferredoxin oxidoreductase gamma subunit protein. It is not known whether the gamma subunit has a catalytic or regulatory role. Pyruvate oxidoreductase (POR) catalyses the final step in the fermentation of carbohydrates in anaerobic microorganisms . This involves the oxidative decarboxylation of pyruvate with the participation of thiamine followed by the transfer of an acetyl moiety to coenzyme A for the synthesis of acetyl-CoA . The family also includes pyruvate flavodoxin oxidoreductase as encoded by the nifJ gene in cyanobacterium which is required for growth on molecular nitrogen when iron is limited . . +PF01855 Pyruvate flavodoxin/ferredoxin oxidoreductase, thiamine diP-bdg
Pfam-B_323 (release 4.2). This family includes the N terminal structural domain of the pyruvate ferredoxin oxidoreductase. This domain binds thiamine diphosphate, and along with domains II and IV, is involved in inter subunit contacts . The family also includes pyruvate flavodoxin oxidoreductase as encoded by the nifJ gene in cyanobacterium which is required for growth on molecular nitrogen when iron is limited .. +PF04151 Bacterial pre-peptidase C-terminal domain
+PF01577 Poty_P1;
Potyvirus P1 protease. Pfam-B_364 (release 4.1). The potyviridae family positive stand RNA viruses with genome encoding a polyprotein. members include zucchini yellow mosaic virus, and turnip mosaic viruses which cause considerable losses of crops worldwide. This family consists of a C terminus region from various plant potyvirus P1 proteins (found at the N terminus of the polyprotein). The C terminus of P1 is a serine-type protease responsible for autocatalytic cleavage between P1 and the helper component protease Pfam:PF00851 [1,2]. The entire P1 protein may be involved in virus-host interactions .. +PF03291 mRNA capping enzyme
Pfam-B_4078 (release 6.5) & Pfam-B_3482 (Release 7.5). This family of enzymes are related to Pfam:PF03919.. +PF00481 Protein phosphatase 2C
Protein phosphatase 2C is a Mn++ or Mg++ dependent protein serine/threonine phosphatase.. +PF00823 PPE family
Pfam-B_297 (release 3.0). This family named after a PPE motif near to the amino terminus of the domain. The PPE family of proteins all contain an amino-terminal region of about 180 amino acids. The carboxyl terminus of this family are variable, and on the basis of this region fall into at least three groups. The MPTR subgroup has tandem copies of a motif NXGXGNXG. The second subgroup contains a conserved motif at about position 350. The third group are only related in the amino terminal region. The function of these proteins is uncertain but it has been suggested that they may be related to antigenic variation of Mycobacterium tuberculosis .. +PF01535 DUF17;
Pfam-B_874 (release 4.0). This repeat has no known function. It is about 35 amino acids long and found in up to 18 copies in some proteins. This family appears to be greatly expanded in plants. This repeat occurs in PET309 Swiss:P32522 that may be involved in RNA stabilisation . This domain occurs in crp1 that is involved in RNA processing . This repeat is associated with a predicted plant protein Swiss:O49549 that has a domain organisation similar to the human BRCA1 protein. The repeat has been called PPR .. +PF04193 PQ loop repeat
TIGRFAMs (release 2.0);. Members of this family are all membrane bound proteins possessing a pair of repeats each spanning two transmembrane helices connected by a loop . The PQ motif found on loop 2 is critical for the localisation of cystinosin to lysosomes . However, the PQ motif appears not to be a general lysosome-targeting motif. It is thought likely to possess a more general function. Most probably this involves a glutamine residue .. +PF05033 Pre-SET motif
This protein motif is a zinc binding motif . It contains 9 conserved cysteines that coordinate three zinc ions. It is thought that this region plays a structural role in stabilising SET domains.. +PF00156 Phosphoribosyl transferase domain
This family includes a range of diverse phosphoribosyl transferase enzymes. This family includes: Adenine phosphoribosyl-transferase EC:2.4.2.7, Swiss:P07672. Hypoxanthine-guanine-xanthine phosphoribosyl-transferase Swiss:P51900. Hypoxanthine phosphoribosyl-transferase EC:2.4.2.8 Swiss:P36766. Ribose-phosphate pyrophosphokinase i EC:2.7.6.1 Swiss:P09329. Amidophosphoribosyltransferase EC:2.4.2.14 Swiss:P00496. Orotate phosphoribosyl-transferase EC:2.4.2.10 Swiss:P11172. Uracil phosphoribosyl-transferase EC:2.4.2.9 Swiss:P25532. Xanthine-guanine phosphoribosyl-transferase EC:2.4.2.22 Swiss:P00501. In Arabidopsis, At the very N-terminus of this domain is the P-Loop NTPase domain .. +PF00377 prion;
Prion/Doppel alpha-helical domain. The prion protein is thought to be the infectious agent that causes transmissible spongiform encephalopathies, such as scrapie and BSE. It is thought that the prion protein can exist in two different forms: one is the normal cellular protein, and the other is the infectious form which can change the normal prion protein into the infectious form. It has been found that the prion alpha-helical domain is also found in the Doppel protein.. +PF00227 proteasome;
The proteasome is a multisubunit structure that degrades proteins. Protein degradation is an essential component of regulation because proteins can become misfolded, damaged, or unnecessary. Proteasomes and their homologues vary greatly in complexity: from HslV (heat shock locus v), which is encoded by 1 gene in bacteria, to the eukaryotic 20S proteasome, which is encoded by more than 14 genes . Recently evidence of two novel groups of bacterial proteasomes was proposed. The first is Anbu, which is sparsely distributed among cyanobacteria and proteobacteria . The second is call beta-proteobacteria proteasome homologue (BPH) .. +PF03371 PRP38 family
Members of this family are related to the pre mRNA splicing factor PRP38 from yeast . Therefore all the members of this family could be involved in splicing. This conserved region could be involved in RNA binding. The putative domain is about 180 amino acids in length. PRP38 is a unique component of the U4/U6.U5 tri-small nuclear ribonucleoprotein (snRNP) particle and is necessary for an essential step late in spliceosome maturation . . +PF01789 PsbP
Pfam-B_1303 (release 4.2). This family consists of the 23 kDa subunit of oxygen evolving system of photosystem II or PsbP from various plants (where it is encoded by the nuclear genome) and Cyanobacteria. The 23 KDa PsbP protein is required for PSII to be fully operational in vivo, it increases the affinity of the water oxidation site for Cl- and provides the conditions required for high affinity binding of Ca2+ .. +PF01416 PseudoU_synt;
tRNA pseudouridine synthase. Howe K, Griffiths-Jones SR. Involved in the formation of pseudouridine at the anticodon stem and loop of transfer-RNAs Pseudouridine is an isomer of uridine (5-(beta-D-ribofuranosyl) uracil, and id the most abundant modified nucleoside found in all cellular RNAs. The TruA-like proteins also exhibit a conserved sequence with a strictly conserved aspartic acid, likely involved in catalysis.. +PF01437 Plexin_repeat;
+PF04046 PSP
Pfam-B_PSP (release 7.3);. Proline rich domain found in numerous spliceosome associated proteins.. +PF04468 PSP1 C-terminal conserved region
This region is present in both eukaryotes and eubacteria. The yeast PSP1 protein is involved in suppressing mutations in the DNA polymerase alpha subunit in yeast .. +PF04024 PspC domain
This family includes Phage shock protein C (PspC) that is thought to be a transcriptional regulator. The presumed domain is 60 amino acid residues in length.. +PF04886 PT repeat
Pfam-B_517 (release 7.6). This short repeat is composed on the tetrapeptide XPTX. This repeat is found in a variety of proteins, however it is not clear if these repeats are homologous to each other. The alignment represents nine copies of this repeat.. +PF01329 Pterin 4 alpha carbinolamine dehydratase
Pterin 4 alpha carbinolamine dehydratase is also known as DCoH (dimerisation cofactor of hepatocyte nuclear factor 1-alpha).. +PF03095 Phosphotyrosyl phosphate activator (PTPA) protein
Pfam-B_2456 (release 6.4). Phosphotyrosyl phosphatase activator (PTPA) proteins stimulate the phosphotyrosyl phosphatase (PTPase) activity of the dimeric form of protein phosphatase 2A (PP2A). PTPase activity in PP2A (in vitro) is relatively low when compared to the better recognised phosphoserine/ threonine protein phosphorylase activity. The specific biological role of PTPA is unknown, Basal expression of PTPA depends on the activity of a ubiquitous transcription factor, Yin Yang 1 (YY1). The tumour suppressor protein p53 can inhibit PTPA expression through an unknown mechanism that negatively controls YY1 .. +PF02302 PTS system, Lactose/Cellobiose specific IIB subunit
Pfam-B_9339 (release 5.2). The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The lactose/cellobiose-specific family are one of four structurally and functionally distinct group IIB PTS system cytoplasmic enzymes. The fold of IIB cellobiose shows similar structure to mammalian tyrosine phosphatases. This family also contains the fructose specific IIB subunit.. +PF01472 PUA domain
The PUA domain named after Pseudouridine synthase and Archaeosine transglycosylase, was detected in archaeal and eukaryotic pseudouridine synthases, archaeal archaeosine synthases, a family of predicted ATPases that may be involved in RNA modification, a family of predicted archaeal and bacterial rRNA methylases. Additionally, the PUA domain was detected in a family of eukaryotic proteins that also contain a domain homologous to the translation initiation factor eIF1/SUI1; these proteins may comprise a novel type of translation factors. Unexpectedly, the PUA domain was detected also in bacterial and yeast glutamate kinases; this is compatible with the demonstrated role of these enzymes in the regulation of the expression of other genes . It is predicted that the PUA domain is an RNA binding domain.. +PF01480 PWI domain
+PF00787 PX domain
Alignment kindly provided by SMART & iterated. PX domains bind to phosphoinositides.. +PF02194 PXA domain
Alignment kindly provided by SMART. This domain is associated with PX domains Pfam:PF00787.. +PF00070 pyr_redox;
Pyridine nucleotide-disulphide oxidoreductase. This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain.. +PF02852 pyr_redox_dim;
Pyridine nucleotide-disulphide oxidoreductase, dimerisation domain. This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases.. +PF01729 QRPTase;
Quinolinate phosphoribosyl transferase, C-terminal domain. Pfam-B_2063 (release 4.1). Quinolinate phosphoribosyl transferase (QPRTase) or nicotinate-nucleotide pyrophosphorylase EC:2.4.2.19 is involved in the de novo synthesis of NAD in both prokaryotes and eukaryotes. It catalyses the reaction of quinolinic acid with 5-phosphoribosyl-1-pyrophosphate (PRPP) in the presence of Mg2+ to give rise to nicotinic acid mononucleotide (NaMN), pyrophosphate and carbon dioxide [1,2]. The QA substrate is bound between the C-terminal domain of one subunit, and the N-terminal domain of the other. The C-terminal domain has a 7 beta-stranded TIM barrel-like fold.. +PF02749 Quinolinate phosphoribosyl transferase, N-terminal domain
Pfam-B_2063 (release 4.1). Quinolinate phosphoribosyl transferase (QPRTase) or nicotinate-nucleotide pyrophosphorylase EC:2.4.2.19 is involved in the de novo synthesis of NAD in both prokaryotes and eukaryotes. It catalyses the reaction of quinolinic acid with 5-phosphoribosyl-1-pyrophosphate (PRPP) in the presence of Mg2+ to give rise to nicotinic acid mononucleotide (NaMN), pyrophosphate and carbon dioxide [1,2]. The QA substrate is bound between the C-terminal domain of one subunit, and the N-terminal domain of the other. The N-terminal domain has an alpha/beta hammerhead fold.. +PF01424 R3H domain
The name of the R3H domain comes from the characteristic spacing of the most conserved arginine and histidine residues. The function of the domain is predicted to be binding ssDNA.. +PF03834 Binding domain of DNA repair protein Ercc1 (rad10/Swi10)
TIGRFAMs, Griffiths-Jones SR, Coggill PC. Ercc1 and XPF (xeroderma pigmentosum group F-complementing protein) are two structure-specific endonucleases of a class of seven containing an ERCC4 domain. Together they form an obligate complex that functions primarily in nucleotide excision repair (NER), a versatile pathway able to detect and remove a variety of DNA lesions induced by UV light and environmental carcinogens, and secondarily in DNA interstrand cross-link repair and telomere maintenance. This domain in fact binds simultaneously to both XPF and single-stranded DNA; this ternary complex explains the important role of Ercc1 in targeting its catalytic XPF partner to the NER pre-incision complex .. +PF04423 Rad50 zinc hook motif
The Mre11 complex (Mre11 Rad50 Nbs1) is central to chromosomal maintenance and functions in homologous recombination, telomere maintenance and sister chromatid association. The Rad50 coiled-coil region contains a dimer interface at the apex of the coiled coils in which pairs of conserved Cys-X-X-Cys motifs form interlocking hooks that bind one Zn ion. This alignment includes the zinc hook motif and a short stretch of coiled-coil on either side.. +PF04055 Radical SAM superfamily
Radical SAM proteins catalyse diverse reactions, including unusual methylations, isomerisation, sulphur insertion, ring formation, anaerobic oxidation and protein radical formation.. +PF02145 Rap/ran-GAP
+PF04078 Cell differentiation family, Rcd1-like
Pfam-B_5278 (release 7.3);. Two of the members in this family have been characterised as being involved in regulation of Ste11 regulated sex genes [1,2]. Mammalian Rcd1 is a novel transcriptional cofactor that mediates retinoic acid-induced cell differentiation .. +PF05177 RCSD region
Proteins contain this region include C.elegans UNC-89. This region is found repeated in UNC-89 and shows conservation in prolines, lysines and glutamic acids. Proteins with RCSD are involved in muscle M-line assembly, but the function of this region RCSD is not clear.. +PF01030 Receptor L domain
Pfam-B_244 (release 3.0). The L domains from these receptors make up the bilobal ligand binding site. Each L domain consists of a single-stranded right hand beta-helix . This Pfam entry is missing the first 50 amino acid residues of the domain.. +PF02010 REJ domain
The REJ (Receptor for Egg Jelly) domain is found in PKD1 Swiss:P98161, and the sperm receptor for egg jelly Swiss:Q26627. The function of this domain is unknown. The domain is 600 amino acids long so is probably composed of multiple structural domains. There are six completely conserved cysteine residues that may form disulphide bridges. This region contains tandem PKD-like domains.. +PF03432 Relaxase/Mobilisation nuclease domain
Pfam-B_4002 (release 6.6). Relaxases/mobilisation proteins are required for the horizontal transfer of genetic information contained on plasmids that occurs during bacterial conjugation. The relaxase, in conjunction with several auxiliary proteins, forms the relaxation complex or relaxosome. Relaxases nick duplex DNA in a specific manner by catalysing trans-esterification . . +PF03090 Replicase family
Pfam-B_2424 (release 6.4). This is a family of bacterial plasmid DNA replication initiator proteins. Pfam: PF01051 is a similar family. These RepA proteins exist as monomers and dimers in equilibrium: monomers bind directly to repeated DNA sequences and thus activate replication; dimers repress repA transcription by binding an inversely repeated DNA operator. Dimer dissociation can occur spontaneously or be mediated by Hsp70 chaperones.. +PF03248 Rer1 family
Pfam-B_3358 (release 6.5). RER1 family protein are involved in involved in the retrieval of some endoplasmic reticulum membrane proteins from the early golgi compartment. The C terminus of yeast Rer1p interacts with a coatomer complex .. +PF02453 Reticulon
Pfam-B_2196 (release 5.4). Reticulon, also know as neuroendocrine-specific protein (NSP), is a protein of unknown function which associates with the endoplasmic reticulum. This family represents the C-terminal domain of the three reticulon isoforms and their homologues.. +PF04527 Drosophila Retinin like protein
Pfam-B_4914 (release 7.5). Family of Drosophila proteins related to the C-terminal region of the Drosophila Retinin protein. Conserved region is found towards the C-terminus of the member proteins.. +PF03732 Retrotransposon gag protein
Pfam-B_3194 (release 7.0). Gag or Capsid-like proteins from LTR retrotransposons. There is a central motif QGXXEXXXXXFXXLXXH that is common to Retroviridae gag-proteins, but is poorly conserved .. +PF01694 Rhomboid family
Pfam-B_1399 (release 4.1). This family contains integral membrane proteins that are related to Drosophila rhomboid protein Swiss:P20350. Members of this family are found in bacteria and eukaryotes.\. Rhomboid promotes the cleavage of the membrane-anchored TGF-alpha-like growth factor Spitz, allowing it to activate the Drosophila EGF receptor. Analysis has shown that Rhomboid-1 is an intramembrane serine protease (EC:3.4.21.105). Parasite-encoded rhomboid enzymes are also important for invasion of host cells by Toxoplasma and the malaria parasite .. +PF05104 Ribosome receptor lysine/proline rich region
Pfam-B_3249 (release 7.7). This highly conserved region is found towards the C-terminus of the transmembrane domain . The function is unclear.. +PF00636 Ribonuclease III domain
+PF04597 Ribophorin I
Ribophorin I is an essential subunit of oligosaccharyltransferase (OST), which is also known as Dolichyl-diphosphooligosaccharide--protein glycosyltransferase, (EC:2.4.1.119). OST catalyses the transfer of an oligosaccharide from dolichol pyrophosphate to selected asparagine residues of nascent polypeptides as they are translocated into the lumen of the rough endoplasmic reticulum. Ribophorin I and OST48 are though to be responsible for OST catalytic activity . Both yeast and mammalian proteins are glycosylated but the sites are not conserved. Glycosylation may contribute towards general solubility but is unlikely to be involved in a specific biochemical function Most family members are predicted to have a transmembrane helix at the C terminus of this region.. +PF01775 Ribosomal L18ae/LX protein domain
This family includes eukaryotic L18ae as well as archaebacterial specific LX. Ribosomal protein L18ae forms part of the 60S ribosomal subunit.. +PF00828 Ribosomal protein L18e/L15
Pfam-B_1295 (release 2.1). This family includes eukaryotic L18 as well as prokaryotic L15.. +PF01907 Ribosomal protein L37e
This family includes ribosomal protein L37 from eukaryotes and archaebacteria. The family contains many conserved cysteines and histidines suggesting that this protein may bind to zinc.. +PF00347 L6;
Ribosomal protein L6. +PF02482 Ribosomal_S30;
Sigma 54 modulation protein / S30EA ribosomal protein. Pfam-B_869 (release 5.4). This Pfam family contains the sigma-54 modulation protein family and the S30AE family of ribosomal proteins which includes the light- repressed protein (lrtA) (Swiss:P47908) .. +PF01201 Ribosomal protein S8e
+PF00652 Ricin-type beta-trefoil lectin domain
+PF00355 Rieske [2Fe-2S] domain
Prosite & Pfam-B_31 (release 4.1). The rieske domain has a [2Fe-2S] centre. Two conserved cysteines coordinate one Fe ion, while the other Fe ion is coordinated by two conserved histidines. In hyperthermophilic archaea there is a SKTPCX(2-3)C motif at the C-terminus. The cysteines in this motif form a disulphide bridge, which stabilises the protein .. +PF00866 Ring hydroxylating beta subunit
Pfam-B_771 (release 3.0). This subunit has a similar structure to NTF-2 and scytalone dehydratase.. +PF04068 Possible Fer4-like domain in RNase L inhibitor, RLI
Possible metal-binding domain in endoribonuclease RNase L inhibitor. Found at the N-terminal end of RNase L inhibitor proteins, adjacent to the 4Fe-4S binding domain, fer4, Pfam:PF00037. Also often found adjacent to the DUF367 domain Pfam:PF04034 in uncharacterised proteins. The RNase L system plays a major role in the anti-viral and anti-proliferative activities of interferons , and could possibly play a more general role in the regulation of RNA stability in mammalian cells. Inhibitory activity requires concentration-dependent association of RLI with RNase L .. +PF04437 RINT-1 / TIP-1 family
This family includes RINT-1, a Rad50 interacting protein which participates in radiation induced checkpoint control , as well as the TIP-1 protein from yeast that seems to be involved in a complex with Sec20p that is required for golgi transport .. +PF01163 RIO1 family
This is a family of atypical serine kinases which are found in archaea, bacteria and eukaryotes.\. Activity of Rio1 is vital in Saccharomyces cerevisiae for the processing of ribosomal RNA, as well as for proper cell cycle progression and chromosome maintenance. The structure of RIO1 has been determined .. +PF01000 RNA polymerase Rpb3/RpoA insert domain
Pfam-B_172 (release 3.0). Members of this family include: alpha subunit from eubacteria alpha subunits from chloroplasts Rpb3 subunits from eukaryotes RpoD subunits from archaeal. +PF04997 RNA polymerase Rpb1, domain 1
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 1, represents the clamp domain, which a mobile domain involved in positioning the DNA, maintenance of the transcription bubble and positioning of the nascent RNA strand [1,2]. . +PF00623 RNA polymerase Rpb1, domain 2
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 2, contains the active site. The invariant motif -NADFDGD- binds the active site magnesium ion [1,2].. +PF04983 RNA polymerase Rpb1, domain 3
Pfam-B_288 (release 4.2). RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 3, represents the pore domain. The 3' end of RNA is positioned close to this domain. The pore delimited by this domain is thought to act as a channel through which nucleotides enter the active site and/or where the 3' end of the RNA may be extruded during back-tracking [1,2].. +PF05000 RNA polymerase Rpb1, domain 4
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 4, represents the funnel domain. The funnel contain the binding site for some elongation factors [1,2].. +PF04998 RNA polymerase Rpb1, domain 5
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 5, represents the discontinuous cleft domain that is required to from the central cleft or channel where the DNA is bound [1,2].. +PF04992 RNA polymerase Rpb1, domain 6
Pfam-B_288 (release 4.2). RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 6, represents a mobile module of the RNA polymerase. Domain 6 forms part of the shelf module [1,2]. This family appears to be specific to the largest subunit of RNA polymerase II.. +PF05001 RNA polymerase Rpb1 C-terminal repeat
The repetitive C-terminal domain (CTD) of Rpb1 (RNA polymerase Pol II) plays a critical role in the regulation of gene expression. The activity of the CTD is dependent on its state of phosphorylation .. +PF04565 RNA polymerase Rpb2, domain 3
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 3, s also known as the fork domain and is proximal to catalytic site .. +PF04567 RNA polymerase Rpb2, domain 5
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 5, is also known as the external 2 domain .. +PF00562 RNA polymerase Rpb2, domain 6
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain represents the hybrid binding domain and the wall domain . The hybrid binding domain binds the nascent RNA strand / template DNA strand in the Pol II transcription elongation complex. This domain contains the important structural motifs, switch 3 and the flap loop and binds an active site metal ion . This domain is also involved in binding to Rpb1 and Rpb3 . Many of the bacterial members contain large insertions within this domain, as region known as dispensable region 2 (DRII).. +PF04560 RNA polymerase Rpb2, domain 7
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Rpb2 is the second largest subunit of the RNA polymerase. This domain comprised of the structural domains anchor and clamp . The clamp region (C-terminal) contains a zinc-binding motif . The clamp region is named due to its interaction with the clamp domain found in Rpb1. The domain also contains a region termed "switch 4". The switches within the polymerase are thought to signal different stages of transcription .. +PF03874 RNA polymerase Rpb4
This family includes the Rpb4 protein. This family also includes C17 (aka CGRP-RCP) is an essential subunit of RNA polymerase III. C17 forms a subcomplex with C25 which is likely to be the counterpart of subcomplex Rpb4/7 in Pol II .. +PF01351 Ribonuclease HII
+PF01138 3' exoribonuclease family, domain 1
This family includes 3'-5' exoribonucleases. Ribonuclease PH contains a single copy of this domain, and removes nucleotide residues following the -CCA terminus of tRNA. Polyribonucleotide nucleotidyltransferase (PNPase) contains two tandem copies of the domain. PNPase is involved in mRNA degradation in a 3'-5' direction. The exosome is a 3'-5' exoribonuclease complex that is required for 3' processing of the 5.8S rRNA. Three of its five protein components, Swiss:P46948 Swiss:Q12277 and Swiss:P25359 contain a copy of this domain . Swiss:Q10205, a hypothetical protein from S. pombe appears to belong to an uncharacterised subfamily. This subfamily is found in both eukaryotes and archaebacteria.. +PF03725 3' exoribonuclease family, domain 2
This family includes 3'-5' exoribonucleases. Ribonuclease PH contains a single copy of this domain, and removes nucleotide residues following the -CCA terminus of tRNA. Polyribonucleotide nucleotidyltransferase (PNPase) contains two tandem copies of the domain. PNPase is involved in mRNA degradation in a 3'-5' direction. The exosome is a 3'-5' exoribonuclease complex that is required for 3' processing of the 5.8S rRNA. Three of its five protein components, Swiss:P46948 Swiss:Q12277 and Swiss:P25359 contain a copy of this domain . Swiss:Q10205, a hypothetical protein from S. pombe appears to belong to an uncharacterised subfamily. This subfamily is found in both eukaryotes and archaebacteria.. +PF02755 RPEL repeat
The RPEL repeat is named after four conserved amino acids it contains. The function of the RPEL repeat is unknown however it might be a DNA binding repeat based on the observation that Swiss:Q9VZY2 contains a Pfam:PF02037 domain that is also implicated in DNA binding.. +PF04059 rrm_2;
RNA recognition motif 2. Pfam-B_4981 (release 7.3);. +PF01137 RCT;
RNA 3'-terminal phosphate cyclase. RNA cyclases are a family of RNA-modifying enzymes that are conserved in all cellular organisms. They catalyse the ATP-dependent conversion of the 3'-phosphate to the 2',3'-cyclic phosphodiester at the end of RNA, in a reaction involving formation of the covalent AMP-cyclase intermediate . The structure of RTC demonstrates that RTCs are comprised two domain. The larger domain contains an insert domain of approximately 100 amino acids . . +PF05189 RNA 3'-terminal phosphate cyclase (RTC), insert domain
RNA cyclases are a family of RNA-modifying enzymes that are conserved in all cellular organisms. They catalyse the ATP-dependent conversion of the 3'-phosphate to the 2',3'-cyclic phosphodiester at the end of RNA, in a reaction involving formation of the covalent AMP-cyclase intermediate . The structure of RTC demonstrates that RTCs are comprised two domain. The larger domain contains an insert domain of approximately 100 amino acids . . +PF00301 rubredoxin;
+PF02759 RUN domain
This domain is present in several proteins that are linked to the functions of GTPases in the Rap and Rab families. They could hence play important roles in multiple Ras-like GTPase signalling pathways. The domain is comprises six conserved regions, which in some proteins have considerable insertions between them. The domain core is thought to take up a predominantly alpha fold, with basic amino acids in regions A and D possibly playing a functional role in interactions with Ras GTPases .. +PF00853 Runt domain
+PF00665 Integrase core domain
Pfam-B_10 (release 2.1). Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site .. +PF00077 rvp;
Retroviral aspartyl protease. Single domain aspartyl proteases from retroviruses, retrotransposons, and badnaviruses (plant dsDNA viruses). These proteases are generally part of a larger polyprotein; usually pol, more rarely gag. Retroviral proteases appear to be homologous to a single domain of the two-domain eukaryotic aspartyl proteases such as pepsins, cathepsins, and renins (Pfam:PF00026).. +PF00078 rvt; RVT;
Reverse transcriptase (RNA-dependent DNA polymerase). Published_alignment and HMM_iterative_training. A reverse transcriptase gene is usually indicative of a mobile element such as a retrotransposon or retrovirus. Reverse transcriptases occur in a variety of mobile elements, including retrotransposons, retroviruses, group II introns, bacterial msDNAs, hepadnaviruses, and caulimoviruses.. +PF03501 Plectin/S10 domain
Pfam-B_2138 (release 7.0). This presumed domain is found at the N-terminus of some isoforms of the cytoskeletal muscle protein plectin as well as the ribosomal S10 protein. This domain may be involved in RNA binding.. +PF01479 S4 domain
The S4 domain is a small domain consisting of 60-65 amino acid residues that was detected in the bacterial ribosomal protein S4, eukaryotic ribosomal S9, two families of pseudouridine synthases, a novel family of predicted RNA methylases, a yeast protein containing a pseudouridine synthetase and a deaminase domain, bacterial tyrosyl-tRNA synthetases, and a number of uncharacterized, small proteins that may be involved in translation regulation . The S4 domain probably mediates binding to RNA.. +PF04382 SAB domain
This presumed domain is found in proteins containing FERM domains Pfam:PF00373. This domain is found to bind to both spectrin and actin, hence the name SAB (Spectrin and Actin Binding) domain.. +PF03399 SAC3/GANP/Nin1/mts3/eIF-3 p25 family
Pfam-B_2845 (release 6.6) & Pfam-B_4388 (release 7.5). This large family includes diverse proteins involved in large complexes. The alignment contains one highly conserved negatively charged residue and one highly conserved positively charged residue that are probably important for the function of these proteins. The family includes the yeast nuclear export factor Sac3 Swiss:P46674, and mammalian GANP/MCM3-associated proteins, which facilitate the nuclear localisation of MCM3, a protein that associates with chromatin in the G1 phase of the cell-cycle. The 26S protease (or 26S proteasome) is responsible for degrading ubiquitin conjugates. It consists of 19S regulatory complexes associated with the ends of 20S proteasomes. The 19S regulatory complex is composed of about 20 different polypeptides and confers ATP-dependence and substrate specificity to the 26S enzyme. The conserved region occurs at the C-terminal of the Nin1-like regulatory subunit [4,5,6]. This family includes several eukaryotic translation initiation factor 3 subunit 11 (eIF-3 p25) proteins. Eukaryotic initiation factor 3 (eIF3) is a multisubunit complex that is required for binding of mRNA to 40 S ribosomal subunits, stabilisation of ternary complex binding to 40 S subunits, and dissociation of 40 and 60 S subunits .. +PF03435 Saccharopine dehydrogenase
Pfam-B_4166 (release 6.6) & Pfam-B_6325 (Release 7.5). This family comprised of three structural domains that can not be separated in the linear sequence. In some organisms this enzyme is found as a bifunctional polypeptide with lysine ketoglutarate reductase. The saccharopine dehydrogenase can also function as a saccharopine reductase.. +PF00536 SAM_1;
SAM domain (Sterile alpha motif). It has been suggested that SAM is an evolutionarily conserved protein binding domain that is involved in the regulation of numerous developmental processes in diverse eukaryotes. The SAM domain can potentially function as a protein interaction module through its ability to homo- and heterooligomerise with other SAM domains.. +PF02198 Sterile alpha motif (SAM)/Pointed domain
Alignment kindly provided by SMART. +PF01342 SAND domain
The DNA binding activity of two proteins has been mapped to the SAND domain. The conserved KDWK motif is necessary for DNA binding, and it appears to be important for dimerisation . This region is also found in the putative transcription factor RegA from the multicellular green alga Volvox cateri. This region of RegA is known as the VARL domain .. +PF02037 SAP domain
The SAP (after SAF-A/B, Acinus and PIAS) motif is a putative DNA/RNA binding domain found in diverse nuclear and cytoplasmic proteins.. +PF05184 Saposin-like type B, region 1
+PF03489 Surfactant_B;
Saposin-like type B, region 2. +PF04499 SIT4 phosphatase-associated protein
Pfam-B_2011 (release 7.5). This family includes a conserved region from a group of yeast proteins that associate with the SIT4 phosphatase. This association is required for SIT4's role in G1 cyclin transcription and for bud formation. This family also includes homologous regions from other eukaryotes.. +PF04000 Sas10/Utp3/C1D family
Pfam-B_6555 (release 7.3). This family contains Utp3 and LCP5 which are components of the U3 ribonucleoprotein complex . It also includes the human C1D protein and Saccharomyces cerevisiae YHR081W (rrp47), an exosome-associated protein required for the 3' processing of stable RNAs , and Sas10 which has been identified as a regulator of chromatin silencing . This family also includes the human protein Neuroguidin an initiation factor 4E (eIF4E) binding protein .. +PF01547 SBP_bacterial_1;
Bacterial extracellular solute-binding protein. Pfam-B_269 (release 4.0). This family also includes the bacterial extracellular solute-binding protein family POTD/POTF.. +PF00497 Bacterial extracellular solute-binding proteins, family 3
+PF04144 SCAMP family
Pfam-B_1298 (release 7.3). In vertebrates, secretory carrier membrane proteins (SCAMPs) 1-3 constitute a family of putative membrane-trafficking proteins composed of cytoplasmic N-terminal sequences with NPF repeats, four central transmembrane regions (TMRs), and a cytoplasmic tail. SCAMPs probably function in endocytosis by recruiting EH-domain proteins to the N-terminal NPF repeats but may have additional functions mediated by their other sequences .. +PF02023 SCAN domain
Pfam-B_1614 (Release 5.0). The SCAN domain (named after SRE-ZBP, CTfin51, AW-1 and Number 18 cDNA) is found in several Pfam:PF00096 proteins. The domain has been shown to be able to mediate homo- and hetero-oligomerisation .. +PF02404 Stem cell factor
Pfam-B_2598 (release 5.4). Stem cell factor (SCF) is a homodimer involved in hematopoiesis. SCF binds to and activates the SCF receptor (SCFR), a receptor tyrosine kinase. The crystal structure of human SCF has been resolved and a potential receptor-binding site identified . . +PF00188 SCP;
Cysteine-rich secretory protein family. This is a large family of cysteine-rich secretory proteins, antigen 5, and pathogenesis-related 1 proteins (CAP) that are found in a wide range of organisms, including prokaryotes and non-vertebrate eukaryotes , The nine subfamilies of the mammalian CAP 'super'family include: the human glioma pathogenesis-related 1 (GLIPR1), Golgi associated pathogenesis related-1 (GAPR1) proteins, peptidase inhibitor 15 (PI15), peptidase inhibitor 16 (PI16), cysteine-rich secretory proteins (CRISPs), CRISP LCCL domain containing 1 (CRISPLD1), CRISP LCCL domain containing 2 (CRISPLD2), mannose receptor like and the R3H domain containing like proteins. Members are most often secreted and have an extracellular endocrine or paracrine function and are involved in processes including the regulation of extracellular matrix and branching morphogenesis, potentially as either proteases or protease inhibitors; in ion channel regulation in fertility; as tumour suppressor or pro-oncogenic genes in tissues including the prostate; and in cell-cell adhesion during fertilisation. The overall protein structural conservation within the CAP 'super'family results in fundamentally similar functions for the CAP domain in all members, yet the diversity outside of this core region dramatically alters the target specificity and, thus, the biological consequences . The Ca++-chelating function would fit with the various signalling processes (e.g. the CRISP proteins) that members of this family are involved in, and also the sequence and structural evidence of a conserved pocket containing two histidines and a glutamate. It also may explain how Swiss:Q91055 blocks the Ca++ transporting ryanodine receptors.. +PF03803 Scramblase
Pfam-B_3893 (release 7.0). Scramblase is palmitoylated and contains a potential protein kinase C phosphorylation site. Scramblase exhibits Ca2+-activated phospholipid scrambling activity in vitro. There are also possible SH3 and WW binding motifs. Scramblase is involved in the redistribution of phospholipids after cell activation or injury .. +PF01390 SEA domain
Domain found in Sea urchin sperm protein, Enterokinase, Agrin (SEA). Proposed function of regulating or binding carbohydrate side chains. Recently a proteolytic activity has been shown for a SEA domain .. +PF04091 Exocyst complex subunit Sec15-like
Pfam-B_7871 (release 7.3);. +PF04815 Sec23/Sec24 helical domain
Pfam-B_3055 (release 7.3). COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is composed of five alpha helices.. +PF04811 Sec23/Sec24 trunk domain
COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is known as the trunk domain and has an alpha/beta vWA fold and forms the dimer interface.. +PF04136 Sec34-like family
Pfam-B_16464 (release 7.3);. Sec34 and Sec35 form a sub-complex, in a seven protein complex that includes Dor1 (Pfam:PF04124). This complex is thought to be important for tether vesicles to the Golgi .. +PF02889 Sec63 Brl domain
This domain (also known as the Brl domain) is required for assembly of functional endoplasmic reticulum translocons .. +PF04048 Sec8 exocyst complex component specific domain
Pfam-B_9576 (release 7.3);. +PF00856 SET domain
SET domains are protein lysine methyltransferase enzymes. SET domains appear to be protein-protein interaction domains. It has been demonstrated that SET domains mediate interactions with a family of proteins that display similarity with dual-specificity phosphatases (dsPTPases) . A subset of SET domains have been called PR domains. These domains are divergent in sequence from other SET domains, but also appear to mediate protein-protein interaction . The SET domain consists of two regions known as SET-N and SET-C. SET-C forms an unusual and conserved knot-like structure of probably functional importance. Additionally to SET-N and SET-C, an insert region (SET-I) and flanking regions of high structural variability form part of the overall structure .. +PF03749 Sugar fermentation stimulation protein
This family contains Sugar fermentation stimulation proteins. Which is probably a regulatory factor involved in maltose metabolism. SfsA has been shown to bind DNA and it contains a helix-turn-helix motif that probably binds DNA at its C-terminus.. +PF05002 SGS domain
This domain was thought to be unique to the SGT1-like proteins , but is also found in calcyclin binding proteins.. +PF03983 SLA1 homology domain 1, SHD1
Pfam-B_ (release 7.2). NPFXD peptides specifically interact with the SHD1 domain. NPFXD is a clathrin-facilitated endocytic targeting signal. NPFXD was originally discovered in the cytoplasmic domain of the furin-like protease Kex2p . Sla1 is thought to function as an endocytic adaptor . . +PF04925 SHQ1 protein
Pfam-B_11411 (release 7.6). S. cerevisiae SHQ1 protein is required for SnoRNAs of the box H/ACA Quantitative accumulation (unpublished).. +PF01549 DUF18;ShTK;
Pfam-B_662 (release 4.0). This domain of is found in several C. elegans proteins. The domain is 30 amino acids long and rich in cysteine residues. There are 6 conserved cysteine positions in the domain that form three disulphide bridges. The domain is found in the potassium channel inhibitor ShK in sea anemone .. +PF04542 sigma70_r2;
Region 2 of sigma-70 is the most conserved region of the entire protein. All members of this class of sigma-factor contain region 2. The high conservation is due to region 2 containing both the -10 promoter recognition helix and the primary core RNA polymerase binding determinant. The core binding helix, interacts with the clamp domain of the largest polymerase subunit, beta prime [1,2]. The aromatic residues of the recognition helix, found at the C-terminus of this domain are though to mediate strand separation, thereby allowing transcription initiation [1,2]. . +PF04545 sigma70_r4;
Region 4 of sigma-70 like sigma-factors are involved in binding to the -35 promoter element via a helix-turn-helix motif . Due to the way Pfam works, the threshold has been set artificially high to prevent overlaps with other helix-turn-helix families. Therefore there are many false negatives.. +PF03145 Seven in absentia protein family
Pfam-B_1854 (release 6.5). The seven in absentia (sina) gene was first identified in Drosophila. The Drosophila Sina protein is essential for the determination of the R7 pathway in photoreceptor cell development: the loss of functional Sina results in the transformation of the R7 precursor cell to a non- neuronal cell type. The Sina protein contains an N-terminal RING finger domain Pfam:PF00097. Through this domain, Sina binds E2 ubiquitin-conjugating enzymes (UbcD1) Sina also interacts with Tramtrack (TTK88) via PHYL. Tramtrack is a transcriptional repressor that blocks photoreceptor determination, while PHYL down-regulates the activity of TTK88. In turn, the activity of PHYL requires the activation of the Sevenless receptor tyrosine kinase, a process essential for R7 determination. It is thought that thus Sina targets TTK88 for degradation, therefore promoting the R7 pathway. Murine and human homologues of Sina have also been identified. The human homologue Siah-1 also binds E2 enzymes (UbcH5) and through a series of physical interactions, targets beta-catenin for ubiquitin degradation. Siah-1 expression is enhanced by p53, itself promoted by DNA damage. Thus this pathway links DNA damage to beta-catenin degradation [2,3]. Sina proteins, therefore, physically interact with a variety of proteins. The N-terminal RING finger domain that binds ubiquitin conjugating enzymes is described in Pfam:PF00097, and does not form part of the alignment for this family. The remainder C-terminal part is involved in interactions with other proteins, and is included in this alignment. In addition to the Drosophila protein and mammalian homologues, whose similarity was noted previously, this family also includes putative homologues from Caenorhabditis elegans, Arabidopsis thaliana.. +PF04938 Survival motor neuron (SMN) interacting protein 1 (SIP1)
Pfam-B_5071 (release 7.6). Survival motor neuron (SMN) interacting protein 1 (SIP1) interacts with SMN protein and plays a crucial role in the biogenesis of spliceosomes. There is evidence that the protein is linked to spinal muscular atrophy (SMA) and amyotrophic lateral sclerosis(ALS) in humans .. +PF03530 Calcium-activated SK potassium channel
+PF02437 SKI/SNO/DAC family
Pfam-B_2013 (release 5.4). This family contains a presumed domain that is about 100 amino acids long. All members of this family contain a conserved CLPQ motif. The c-ski proto-oncogene has been shown to influence proliferation, morphological transformation and myogenic differentiation . Sno, a Ski proto-oncogene homologue, is expressed in two isoforms and plays a role in the response to proliferation stimuli. Dachshund also contains this domain. It is involved in various aspects of development [2,3].. +PF01466 Skp1 family, dimerisation domain
+PF03931 Skp1 family, tetramerisation domain
+PF00395 S-layer homology domain
+PF01423 Sm;
The LSM domain contains Sm proteins as well as other related LSM (Like Sm) proteins. The U1, U2, U4/U6, and U5 small nuclear ribonucleoprotein particles (snRNPs) involved in pre-mRNA splicing contain seven Sm proteins (B/B', D1, D2, D3, E, F and G) in common, which assemble around the Sm site present in four of the major spliceosomal small nuclear RNAs. The U6 snRNP binds to the LSM (Like Sm) proteins . Sm proteins are also found in archaebacteria, which do not have any splicing apparatus suggesting a more general role for Sm proteins. All Sm proteins contain a common sequence motif in two segments, Sm1 and Sm2, separated by a short variable linker. This family also includes the bacterial Hfq (host factor Q) proteins. Hfq are also RNA-binding proteins, that form hexameric rings.. +PF02463 RecF/RecN/SMC N terminal domain
This domain is found at the N terminus of SMC proteins. The SMC (structural maintenance of chromosomes) superfamily proteins have ATP-binding domains at the N- and C-termini, and two extended coiled-coil domains separated by a hinge in the middle. The eukaryotic SMC proteins form two kind of heterodimers: the SMC1/SMC3 and the SMC2/SMC4 types. These heterodimers constitute an essential part of higher order complexes, which are involved in chromatin and DNA dynamics .\. This family also includes the RecF and RecN proteins that are involved in DNA metabolism and recombination.. +PF04158 Sof1-like domain
Pfam-B_9404 (release 7.3);. Sof1 is essential for cell growth and is a component of the nucleolar rRNA processing machinery . . +PF01033 Somatomedin B domain
+PF03700 Sorting nexin, N-terminal domain
Pfam-B_29150 (release 7.0) . These proteins bins to the cytoplasmic domain of plasma membrane receptors. and are involved in endocytic protein trafficking. The N-terminal domain appears to be specific to sorting nexins 1 and 2.. +PF04130 Spc97 / Spc98 family
Pfam-B_3531 (release 7.3). The spindle pole body (SPB) functions as the microtubule-organising centre in yeast. Members of this family are spindle pole body (SBP) components such as Spc97 and Spc98 that form a complex with gamma-tubulin. This family of proteins includes the grip motif 1 and grip moti 2 . Members of this family all form components of the gamma-tubulin complex, GCP .. +PF04435 DUF545;
Domain of unknown function (DUF545) . Pfam-B_429 (release 7.5). Family of uncharacterised C. elegans proteins. The region represented by this family can is found to be repeated up to four time in some proteins.. +PF04014 SpoVT_AbrB;
Antidote-toxin recognition MazE. MazE is the antidote to the toxin MazF of E. coli. MazE-MazF in E. coli is a regulated prokaryotic chromosomal addiction module. MazE antidote is degraded by the ClpPA protease of the bacterial proteasome. MazE-MazF is thought to play a role in programmed cell death when cells suffer nutrient deprivation , and MazE-MazF modules have also been implicated in the bacteriostatic effects of other addiction modules . MazF toxin functions as an mRNA interferase, cleaving mRNAs at ACA sequences to inhibit protein synthesis leading to cell growth arrest .. +PF00622 SPRY domain
Alignment kindly provided by SMART. SPRY Domain is named from SPla and the RYanodine Receptor. Domain of unknown function. Distant homologues are domains in butyrophilin/marenostrin/pyrin homologues.. +PF03105 SPX domain
Pfam-B_502 (release 6.5). We have named this region the SPX domain after (SYG1, Pho81 and XPR1). This 180 residue length domain is found at the amino terminus of a variety of proteins. In the yeast protein SYG1, the N-terminus directly binds to the G- protein beta subunit and inhibits transduction of the mating pheromone signal . This finding suggests that all the members of this family are involved in G-protein associated signal transduction. The N-termini of several proteins involved in the regulation of phosphate transport, including the putative phosphate level sensors PHO81 Swiss:P17442 from Saccharomyces cerevisiae and NUC-2 Swiss:Q01317 from Neurospora crassa, are also members of this family [see 4,5]. The SPX domain of S. cerevisiae low-affinity phosphate transporters Pho87 and Pho90 auto-regulates uptake and prevents efflux. This SPX dependent inhibition is mediated by the physical interaction with Spl2 NUC-2 contains several ankyrin repeats Pfam:PF00023. Several members of this family are annotated as XPR1 proteins: the xenotropic and polytropic retrovirus receptor confers susceptibility to infection with murine leukaemia viruses (MLV) . The similarity between SYG1, phosphate regulators and XPR1 sequences has been previously noted, as has the additional similarity to several predicted proteins, of unknown function, from Drosophila melanogaster, Arabidopsis thaliana, Caenorhabditis elegans, Schizosaccharomyces pombe, and Saccharomyces cerevisiae [1,2]. In addition, given the similarities between XPR1 and SYG1 and phosphate regulatory proteins, it has been proposed that XPR1 might be involved in G-protein associated signal transduction and may itself function as a phosphate sensor .. +PF03125 C. elegans Sre G protein-coupled chemoreceptor
Pfam-B_352 (release 6.5). Caenorhabditis elegans Sre proteins are candidate chemosensory receptors. There are four main recognised groups of such receptors: Odr-10, Sra, Sro, and Srg. Sre (this family), Sra Pfam:PF02117 and Srb Pfam:PF02175 comprise the Sra group. All of the above receptors are thought to be G protein-coupled seven transmembrane domain proteins [1,2]. The existence of several different chemosensory receptors underlies the fact that in spite of having only 20-30 chemosensory neurones, C. elegans detects hundreds of different chemicals, with the ability to discern individual chemicals among combinations .. +PF04086 Signal recognition particle, alpha subunit, N-terminal
Pfam-B_7342 (release 7.3);. SRP is a complex of six distinct polypeptides and a 7S RNA that is essential for transferring nascent polypeptide chains that are destined for export from the cell to the translocation apparatus of the endoplasmic reticulum (ER) membrane . SRP binds hydrophobic signal sequences as they emerge from the ribosome, and arrests translation.. +PF00448 SRP54-type protein, GTPase domain
This family includes relatives of the G-domain of the SRP54 family of proteins.. +PF02881 SRP54-type protein, helical bundle domain
+PF02978 Signal peptide binding domain
+PF00436 Single-strand binding protein family
This family includes single stranded binding proteins and also the primosomal replication protein N (PriB). PriB forms a complex with PriA, PriC and ssDNA.. +PF04503 Single-stranded DNA binding protein, SSDP
Pfam-B_2031 (release 7.5). This is a family of eukaryotic single-stranded DNA binding proteins with specificity to a pyrimidine-rich element found in the promoter region of the alpha2(I) collagen gene.. +PF05030 SSXT protein (N-terminal region)
Pfam-B_4900 (release 7.6). The SSXT or SS18 protein is involved in synovial sarcoma in humans. A SYT-SSX fusion gene resulting from the chromosomal translocation t(X;18) (p11;q11) is characteristic of synovial sarcomas. This translocation fuses the SSXT (SYT) gene from chromosome 18 to either of two homologous genes at Xp11, SSX1 or SSX2 .. +PF01852 START domain
Alignment kindly provided by SMART. +PF01740 SpoIIAA;
The STAS (after Sulphate Transporter and AntiSigma factor antagonist) domain is found in the C terminal region of Sulphate transporters and bacterial antisigma factor antagonists. It has been suggested that this domain may have a general NTP binding function .. +PF03015 Male sterility protein
Pfam-B_1115 (release 6.4). This family represents the C-terminal region of the male sterility protein in a number of arabidopsis and drosophila. A sequence-related jojoba acyl CoA reductase is also included.. +PF02544 3-oxo-5-alpha-steroid 4-dehydrogenase
Pfam-B_1713 (release 5.4). This family consists of 3-oxo-5-alpha-steroid 4-dehydrogenases, EC:1.3.99.5 Also known as Steroid 5-alpha-reductase, the reaction catalysed by this enzyme is: 3-oxo-5-alpha-steroid + acceptor <=> 3-oxo-delta(4)-steroid + reduced acceptor. The Steroid 5-alpha-reductase enzyme is responsible for the formation of dihydrotestosterone, this hormone promotes the differentiation of male external genitalia and the prostate during fetal development . In humans mutations in this enzyme can cause a form of male pseudohermaphorditism in which the external genitalia and prostate fail to develop normally . A related enzyme is also found in plants is Swiss:Q38944 (DET2) a steroid reductase from Arabidopsis. Mutations in this enzyme cause defects in light-regulated development .. +PF02910 succ_DH_flav_C;
Fumarate reductase flavoprotein C-term. This family contains fumarate reductases, succinate dehydrogenases and L-aspartate oxidases.. +PF00884 Sulfatase
Pfam-B_784 (release 3.0) & Pfam-B_7393 (Release 8.0). +PF00685 Sulfotransfer;
Sulfotransferase domain. Pfam-B_87 (release 2.1) & Pfam-B_1885 (Release 7.5). +PF03567 Sulfotransfer2;
Sulfotransferase family. Pfam-B_3050(7.0),Pfam-B_5394(7.7),Pfam-B_7836(10.0),Pfam-B_5040(7.5). This family includes a variety of sulfotransferase enzymes. Chondroitin 6-sulfotransferase catalyses the transfer of sulfate to position 6 of the N-acetylgalactosamine residue of chondroitin. This family also includes Heparan sulfate 2-O-sulfotransferase (HS2ST) and Heparan sulfate 6-sulfotransferase (HS6ST). Heparan sulfate (HS) is a co-receptor for a number of growth factors, morphogens, and adhesion proteins. HS biosynthetic modifications may determine the strength and outcome of HS-ligand interactions. Mice that lack HS2ST undergo developmental failure only after midgestation,the most dramatic effect being the complete failure of kidney development . Heparan sulphate 6- O -sulfotransferase (HS6ST) catalyses the transfer of sulphate from adenosine 3'-phosphate, 5'-phosphosulphate to the 6th position of the N -sulphoglucosamine residue in heparan sulphate .. +PF04935 Surfeit locus protein 6
Pfam-B_5497 (release 7.6). The surfeit locus protein SURF-6 is shown to be a component of the nucleolar matrix and has a strong binding capacity for nucleic acids .. +PF01805 Surp module
This domain is also known as the SWAP domain. SWAP stands for Suppressor-of-White-APricot. It has been suggested that these domains may be RNA binding .. +PF02201 SWIB/MDM2 domain
This family includes the SWIB domain and the MDM2 domain . The p53-associated protein (MDM2) is an inhibitor of the p53 tumour suppressor gene binding the transactivation domain and down regulating the ability of p53 to activate transcription. This family contains the p53 binding domain of MDM2 .. +PF04434 SWIM zinc finger
This domain is found in bacterial, archaeal and eukaryotic proteins. It is predicted to be organised into two N-terminal beta-strands and a C-terminal alpha helix, thus possibly adopting a fold similar to that of the C2H2 zinc finger (Pfam:PF00096). SWIM is thought to be a versatile domain that can interact with DNA or proteins in different contexts .. +PF04433 SWIRM domain
This SWIRM domain is a small alpha-helical domain of about 85 amino acid residues found in chromosomal proteins. It contains a helix-turn helix motif and binds to DNA .. +PF00804 Syntaxin
Pfam-B_1158 (release 2.1). Syntaxins are the prototype family of SNARE proteins. They usually consist of three main regions - a C-terminal transmembrane region, a central SNARE domain which is characteristic of and conserved in all syntaxins (Pfam:PF05739), and an N-terminal domain that is featured in this entry. This domain varies between syntaxin isoforms; in syntaxin 1A (Swiss:O35526) it is found as three alpha-helices with a left-handed twist. It may fold back on the SNARE domain to allow the molecule to adopt a 'closed' configuration that prevents formation of the core fusion complex - it thus has an auto-inhibitory role. The function of syntaxins is determined by their localisation. They are involved in neuronal exocytosis, ER-Golgi transport and Golgi-endosome transport, for example. They also interact with other proteins as well as those involved in SNARE complexes. These include vesicle coat proteins, Rab GTPases, and tethering factors .. +PF00907 T-box
Pfam-B_363 (release 3.0). The T-box encodes a 180 amino acid domain that binds to DNA. Genes encoding T-box proteins are found in a wide range of animals, but not in other kingdoms such as plants. Family members are all thought to bind to the DNA consensus sequence TCACACCT. they are found exclusively in the nucleus, and perform DNA-binding and transcriptional activation/repression roles. They are generally required for development of the specific tissues they are expressed in, and mutations in T-box genes are implicated in human conditions such as DiGeorge syndrome and X-linked cleft palate, which feature malformations .. +PF04719 hTAFII28-like protein conserved region
Pfam-B_4085 (release 7.5). The general transcription factor, TFIID, consists of the TATA-binding protein (TBP) associated with a series of TBP-associated factors (TAFs) that together participate in the assembly of the transcription preinitiation complex. The conserved region is found at the C-terminal of most member proteins. The crystal structure of hTAFII28 with hTAFII18 shows that this region is involved in the binding of these two subunits. The conserved region contains four alpha helices and three loops arranged as in histone H3 [1,2].. +PF04177 TAP42-like family
Pfam-B_5735 (release 7.3);. The TOR signalling pathway activates a cell-growth program in response to nutrients . TIP41 (Pfam:PF04176) interacts with TAP42 and negatively regulates the TOR signaling pathway .. +PF03943 TAP C-terminal domain
The vertebrate Tap protein is a member of the NXF family of shuttling transport receptors for nuclear export of mRNA. Tap has a modular structure, and its most C-terminal domain is important for binding to FG repeat-containing nuclear pore proteins (FG-nucleoporins) and is sufficient to mediate nuclear shuttling . The structure of the C-terminal domain is composed of four helices . The structure is related to the UBA domain.. +PF03134 TB2/DP1, HVA22 family
Pfam-B_837 (release 6.5). This family includes members from a wide variety of eukaryotes. It includes the TB2/DP1 (deleted in polyposis) protein (e.g. Swiss:Q00765), which in humans is deleted in severe forms of familial adenomatous polyposis, an autosomal dominant oncological inherited disease. The family also includes the plant protein of known similarity to TB2/DP1, the HVA22 abscisic acid-induced protein (e.g. Swiss:Q07764), which is thought to be a regulatory protein.. +PF00352 Transcription factor TFIID (or TATA-binding protein, TBP)
+PF03148 Tektin family
Pfam-B_3069 (release 6.5). Tektins are cytoskeletal proteins. They have been demonstrated in such cellular sites as centrioles, basal bodies, and along ciliary and flagellar doublet microtubules. Tektins form unique protofilaments, organised as longitudinal polymers of tektin heterodimers with axial periodicity matching tubulin. Tektin polypeptides consist of several alpha-helical regions that are predicted to form coiled coils. Indeed, tektins share considerable structural similarities with intermediate filament proteins. Possible functional roles for tektins are: stabilisation of tubulin protofilaments; attachment of A and B-tubules in ciliary/flagellar microtubule doublets and C-tubules in centrioles; binding of axonemal components .. +PF01397 Terpene synthase, N-terminal domain
Pfam-B_728 (release 3.0). It has been suggested that this gene family be designated tps (for terpene synthase) . It has been split into six subgroups on the basis of phylogeny, called tpsa-tpsf. tpsa includes vetispiridiene synthase Swiss:Q39979, 5-epi- aristolochene synthase, Swiss:Q40577 and (+)-delta-cadinene synthase Swiss:P93665. tpsb includes (-)-limonene synthase, Swiss:Q40322. tpsc includes kaurene synthase A, Swiss:O04408. tpsd includes taxadiene synthase, Swiss:Q41594, pinene synthase, Swiss:O24475 and myrcene synthase, Swiss:O24474. tpse includes kaurene synthase B. tpsf includes linalool synthase.. +PF00440 tetR;
Bacterial regulatory proteins, tetR family. +PF03850 Transcription factor Tfb4
This family appears to be distantly related to the VWA domain.. +PF02269 TFIID-18;
Transcription initiation factor IID, 18kD subunit. Pfam-B_3681 (release 5.2). This family includes the Spt3 yeast transcription factors and the 18kD subunit from human transcription initiation factor IID (TFIID-18). Determination of the crystal structure reveals an atypical histone fold . +PF03847 TFIID_A;
Transcription initiation factor TFIID subunit A. +PF04494 TFIID_WDA;
WD40 associated region in TFIID subunit. Pfam-B_9152 (release 7.5). This region, possibly a domain is found in subunits of transcription factor TFIID. The function of this region is unknown.. +PF04253 Transferrin receptor-like dimerisation domain
This domain is involved in dimerisation of the transferrin receptor as shown in its crystal structure.. +PF02824 TGS domain
The TGS domain is named after ThrRS, GTPase, and SpoT . Interestingly, TGS domain was detected also at the amino terminus of the uridine kinase from the spirochaete Treponema pallidum (but not any other organism, including the related spirochaete Borrelia burgdorferi). TGS is a small domain that consists of ~50 amino acid residues and is predicted to possess a predominantly beta-sheet structure. There is no direct information on the functions of the TGS domain, but its presence in two types of regulatory proteins (the GTPases and guanosine polyphosphate phosphohydrolases/synthetases) suggests a ligand (most likely nucleotide)-binding, regulatory role . . +PF00763 Tetrahydrofolate dehydrogenase/cyclohydrolase, catalytic domain
Pfam-B_882 (release 2.1). +PF02882 Tetrahydrofolate dehydrogenase/cyclohydrolase, NAD(P)-binding domain
Pfam-B_882 (release 2.1). +PF00899 ThiF_family;
Pfam-B_59 (release 3.0). This family contains a repeated domain in ubiquitin activating enzyme E1 and members of the bacterial ThiF/MoeB/HesA family.. +PF02597 DUF170;
ThiS (thiaminS) is a 66 aa protein involved in sulphur transfer Swiss:O32583. ThiS is coded in the thiCEFSGH operon in E. coli. This family of proteins have two conserved Glycines at the COOH terminus. Thiocarboxylate is formed at the last G in the activation process. Sulphur is transferred from ThiI to ThiS in a reaction catalysed by IscS . MoaD, Swiss:P30748 a protein involved sulphur transfer in molybdopterin synthesis, is about the same length and shows limited sequence similarity to ThiS. Both have the conserved GG at the COOH end.. +PF01833 IPT/TIG domain
This family consists of a domain that has an immunoglobulin like fold. These domains are found in cell surface receptors such as Met and Ron as well as in intracellular transcription factors where it is involved in DNA binding. CAUTION: This family does not currently recognise a significant number of members.. +PF04280 Tim44-like domain
TIGRFAMs (release 2.0);. Tim44 is an essential component of the machinery that mediates the translocation of nuclear-encoded proteins across the mitochondrial inner membrane . Tim44 is thought to bind phospholipids of the mitochondrial inner membrane both by electrostatic interactions and by penetrating the polar head group region . This family includes the C-terminal region of Tim44 that has been shown to form a stable proteolytic fragment in yeast. This region is also found in a set of smaller bacterial proteins. The molecular function of the bacterial members of this family is unknown but transport seems likely. The crystal structure of the C terminal of Tim44 has revealed a large hydrophobic pocket which might play an important role in interacting with the acyl chains of lipid molecules in the mitochondrial membrane .. +PF04176 TIP41-like family
Pfam-B_12821 (release 7.3);. The TOR signalling pathway activates a cell-growth program in response to nutrients . TIP41 interacts with TAP42 and negatively regulates the TOR signaling pathway .. +PF01582 TIR domain
Pfam-B_571 (release 4.1). The Toll/interleukin-1 receptor (TIR) homology domain is an intracellular signalling domain found in MyD88, interleukin 1 receptor and the Toll receptor. It contains three highly-conserved regions, and mediates protein-protein interactions between the Toll-like receptors (TLRs) and signal-transduction components. TIR-like motifs are also found in plant proteins thought to be involved in resistance to disease. When activated, TIR domains recruit cytoplasmic adaptor proteins MyD88 (Swiss:Q99836) and TOLLIP (Toll interacting protein, Swiss:Q9H0E2). In turn, these associate with various kinases to set off signalling cascades .. +PF03920 TLE_N-terminal;
Groucho/TLE N-terminal Q-rich domain. The N-terminal domain of the Grouch/TLE co-repressor proteins are involved in oligomerisation.. +PF05154 TM2 domain
This family is composed of a pair of transmembrane alpha helices connected by a short linker. The function of this domain is unknown, however it occurs in a wide range or protein contexts.. +PF03348 TMS_TDE;
Serine incorporator (Serinc). Pfam-B_3473 (release 6.5). This is a family of eukaryotic membrane proteins which incorporate serine into membranes and facilitate the synthesis of the serine-derived lipids phosphatidylserine and sphingolipid . Members of this family contain 11 transmembrane domains and form intracellular complexes with key enzymes involved in serine and sphingolipid biosynthesis .. +PF03459 TOBE domain
The TOBE domain (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulfate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain.. +PF04265 Thiamin pyrophosphokinase, vitamin B1 binding domain
TIGRFAMs (release 2.0);. Family of thiamin pyrophosphokinase (EC:2.7.6.2). Thiamin pyrophosphokinase (TPK) catalyses the transfer of a pyrophosphate group from ATP to vitamin B1 (thiamin) to form the coenzyme thiamin pyrophosphate (TPP). Thus, TPK is important for the formation of a coenzyme required for central metabolic functions. The structure of thiamin pyrophosphokinase suggest that the enzyme may operate by a mechanism of pyrophosphoryl transfer similar to those described for pyrophosphokinases functioning in nucleotide biosynthesis .. +PF04263 Thiamin pyrophosphokinase, catalytic domain
TIGRFAMs (release 2.0);. Family of thiamin pyrophosphokinase (EC:2.7.6.2). Thiamin pyrophosphokinase (TPK) catalyses the transfer of a pyrophosphate group from ATP to vitamin B1 (thiamin) to form the coenzyme thiamin pyrophosphate (TPP). Thus, TPK is important for the formation of a coenzyme required for central metabolic functions. The structure of thiamin pyrophosphokinase suggest that the enzyme may operate by a mechanism of pyrophosphoryl transfer similar to those described for pyrophosphokinases functioning in nucleotide biosynthesis .. +PF00515 TPR;
Tetratricopeptide repeat. Alignment kindly provided by SMART. +PF01938 DUF90;
+PF00486 trans_reg_C;
Transcriptional regulatory protein, C terminal. Pfam-B_94 (release 1.0). +PF02458 Transferase family
Pfam-B_1540 (release 5.4). This family includes a number of transferase enzymes. These include anthranilate N-hydroxycinnamoyl/benzoyltransferase that catalyses the first committed reaction of phytoalexin biosynthesis . Deacetylvindoline 4-O-acetyltransferase EC:2.3.1.107 catalyses the last step in vindoline biosynthesis is also a member of this family . The motif HXXXD is probably part of the active site. The family also includes trichothecene 3-O-acetyltransferase.. +PF01336 Aspartyl_tRNA_N; tRNA_anti;
OB-fold nucleic acid binding domain. This family contains OB-fold domains that bind to nucleic acids . The family includes the anti-codon binding domain of lysyl, aspartyl, and asparaginyl -tRNA synthetases (See Pfam:PF00152). Aminoacyl-tRNA synthetases catalyse the addition of an amino acid to the appropriate tRNA molecule EC:6.1.1.-. This family also includes part of RecG helicase involved in DNA repair. Replication factor A is a heterotrimeric complex, that contains a subunit in this family [2,3]. This domain is also found at the C-terminus of bacterial DNA polymerase III alpha chain.. +PF01841 Transglutaminase-like superfamily
This family includes animal transglutaminases and other bacterial proteins of unknown function. Sequence conservation in this superfamily primarily involves three motifs that centre around conserved cysteine, histidine, and aspartate residues that form the catalytic triad in the structurally characterised transglutaminase, the human blood clotting factor XIIIa' . On the basis of the experimentally demonstrated activity of the Methanobacterium phage pseudomurein endoisopeptidase , it is proposed that many, if not all, microbial homologues of the transglutaminases are proteases and that the eukaryotic transglutaminases have evolved from an ancestral protease. . +PF00927 1005; Transglutamin_C;
Transglutaminase family, C-terminal ig like domain. Pfam-B_1005 (release 3.0). +PF02779 transketolaseD2; transket_pyr;
Transketolase, pyrimidine binding domain. This family includes transketolase enzymes, pyruvate dehydrogenases, and branched chain alpha-keto acid decarboxylases.. +PF02780 transketolaseD3; transketolase_C;
Transketolase, C-terminal domain. The C-terminal domain of transketolase has been proposed as a regulatory molecule binding site .. +PF00335 transmembrane4;
+PF00905 Penicillin binding protein transpeptidase domain
The active site serine (residue 337 in Swiss:P14677) is conserved in all members of this family.. +PF01609 Transposase_11;
Transposase DDE domain. Pfam-B_1013 (release 4.1). Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction . This family contains transposases for IS4 Swiss:P03835 , IS421 Swiss:P11901 , IS5377 Swiss:Q45620, IS427 , IS402 , IS1355 Swiss:O69604, IS5, which was original isolated in bacteriophage lambda .. +PF02371 Transposase_19;
Transposase IS116/IS110/IS902 family. Pfam-B_280 (release 5.2). Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases for IS116, IS110 and IS902. This region is often found with Pfam:PF01548. The exact function of this region is uncertain. This family contains a HHH motif suggesting a DNA-binding function.. +PF01526 Transposase_7;
Tn3 transposase DDE domain. Pfam-B_885 (release 4.0). This family includes transposases of Tn3, Tn21, Tn1721, Tn2501, Tn3926 transposons from E-coli. The specific binding of the Tn3 transposase to DNA has been demonstrated. Sequence analysis has suggested that the invariant triad of Asp689, Asp765, Glu895 (numbering as in Tn3) may correspond to the D-D-35-E motif previously implicated in the catalysis of numerous transposases .. +PF01548 Transposase_9;
Pfam-B_646 (release 4.0). Transposase proteins are necessary for efficient DNA transposition. This family includes an amino-terminal region of the pilin gene inverting protein (PIVML) and members of the IS111A/IS1328/IS1533 family of transposases.. +PF00579 tRNA synthetases class I (W and Y)
MRC-LMB Genome group. +PF01409 tRNA synthetases class II core domain (F)
Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only phenylalanyl-tRNA synthetases. This is the core catalytic domain.. +PF01588 Putative tRNA binding domain
Pfam-B_482 (release 4.1). This domain is found in prokaryotic methionyl-tRNA synthetases, prokaryotic phenylalanyl tRNA synthetases the yeast GU4 nucleic-binding protein (G4p1 or p42, ARC1) , human tyrosyl-tRNA synthetase , and endothelial-monocyte activating polypeptide II. G4p1 binds specifically to tRNA form a complex with methionyl-tRNA synthetases . In human tyrosyl-tRNA synthetase this domain may direct tRNA to the active site of the enzyme . This domain may perform a common function in tRNA aminoacylation .. +PF03250 Tropomodulin
Pfam-B_3359 (release 6.5). Tropomodulin is a novel tropomyosin regulatory protein that binds to the end of erythrocyte tropomyosin and blocks head-to-tail association of tropomyosin along actin filaments . Limited proteolysis shows this protein is composed of two domains . The amino terminal domain contains the tropomyosin binding function .. +PF00992 Troponin
Pfam-B_62 (release 3.0). Troponin (Tn) contains three subunits, Ca2+ binding (TnC), inhibitory (TnI), and tropomyosin binding (TnT). this Pfam contains members of the TnT subunit. Troponin is a complex of three proteins, Ca2+ binding (TnC), inhibitory (TnI), and tropomyosin binding (TnT). The troponin complex regulates Ca++ induced muscle contraction. This family includes troponin T and troponin I. Troponin I binds to actin and troponin T binds to tropomyosin.. +PF00234 tryp_alpha_amyl;
Protease inhibitor/seed storage/LTP family. This family is composed of trypsin-alpha amylase inhibitors, seed storage proteins and lipid transfer proteins from plants.. +PF00089 trypsin;
+PF02210 TSPN; TSP_N;
Pfam-B_4211 (release 12.0) . This family includes the Thrombospondin N-terminal-like domain, a Laminin G subfamily.. +PF03133 Tubulin-tyrosine ligase family
Pfam-B_682 (release 6.5). Tubulins and microtubules are subjected to several post-translational modifications of which the reversible detyrosination/tyrosination of the carboxy-terminal end of most alpha-tubulins has been extensively analysed. This modification cycle involves a specific carboxypeptidase and the activity of the tubulin-tyrosine ligase (TTL) . The true physiological function of TTL has so far not been established. Tubulin-tyrosine ligase (TTL) catalyses the ATP-dependent post-translational addition of a tyrosine to the carboxy terminal end of detyrosinated alpha-tubulin. In normally cycling cells, the tyrosinated form of tubulin predominates. However, in breast cancer cells, the detyrosinated form frequently predominates, with a correlation to tumour aggressiveness . On the other hand, 3-nitrotyrosine has been shown to be incorporated, by TTL, into the carboxy terminal end of detyrosinated alpha-tubulin. This reaction is not reversible by the carboxypeptidase enzyme. Cells cultured in 3-nitrotyrosine rich medium showed evidence of altered microtubule structure and function, including altered cell morphology, epithelial barrier dysfunction, and apoptosis . Bacterial homologs of TTL are predicted to form peptide tags. Some of these are fused to a 2-oxoglutarate Fe(II)-dependent dioxygenase domain .. +PF01167 Tub family
+PF03953 tubulin_C;
Tubulin C-terminal domain. This family includes the tubulin alpha, beta and gamma chains. Members of this family are involved in polymer formation. Tubulins are GTPases. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. Tubulin is the major component of microtubules. (The FtsZ GTPases have been split into their won family).. +PF00567 Tudor domain
Alignment kindly provided by SMART. +PF04906 Tweety
Pfam-B_5713 (release 7.6). The tweety (tty) gene has not been characterised at the protein level. However, it is thought to form a membrane protein with five potential membrane-spanning regions. A number of potential functions have been suggested in .. +PF04564 U-box domain
Pfam-B_2801 (release 7.5). This domain is related to the Ring finger Pfam:PF00097 but lacks the zinc binding residues .. +PF00627 UBA/TS-N domain
This small domain is composed of three alpha helices. This family includes the previously defined UBA and TS-N domains. The UBA-domain (ubiquitin associated domain) is a novel sequence motif found in several proteins having connections to ubiquitin and the ubiquitination pathway. The structure of the UBA domain consists of a compact three helix bundle . This domain is found at the N terminus of EF-TS hence the name TS-N. The structure of EF-TS is known and this domain is implicated in its interaction with EF-TU . The domain has been found in non EF-TS proteins such as alpha-NAC Swiss:P70670 and MJ0280 Swiss:Q57728 .. +PF01040 CytC_assmbly_fac; COX10_ctaB_cyoE;
UbiA prenyltransferase family. Pfam-B_1357 (release 3.0). +PF00240 Ubiquitin family
This family contains a number of ubiquitin-like proteins: SUMO (smt3 homologue) (see Swiss:Q02724), Nedd8 (see Swiss:P29595), Elongin B (see Swiss:Q15370), Rub1 (see Swiss:Q9SHE7), and Parkin (see Swiss:O60260). A number of them are thought to carry a distinctive five-residue motif termed the proteasome-interacting motif (PIM), which may have a biologically significant role in protein delivery to proteasomes and recruitment of proteasomes to transcription sites .. +PF00789 UBX domain
SMART, Mistry J, Wood V. Alignment kindly provided by SMART. This domain is present in ubiquitin-regulatory proteins and is a general Cdc48-interacting module .. +PF00443 UCH-2;
Ubiquitin carboxyl-terminal hydrolase. +PF03456 uDENN domain
This region is always found associated with Pfam:PF02141. It is predicted to form an all beta domain .. +PF03167 Uracil DNA glycosylase superfamily
+PF02809 Ubiquitin interaction motif
This motif is called the ubiquitin interaction motif. One of the proteins containing this motif is a receptor for poly-ubiquitination chains for the proteasome . This motif has a pattern of conservation characteristic of an alpha helix.. +PF01027 UPF0005;
Inhibitor of apoptosis-promoting Bax1. Pfam-B_1376 (release 3.0) & Pfam-B_5704 (release 7.5). Programmed cell-death involves a set of Bcl-2 family proteins, some of which inhibit apoptosis (Bcl-2 and Bcl-XL) and some of which promote it (Bax and Bak). Human Bax inhibitor, BI-1, is an evolutionarily conserved integral membrane protein containing multiple membrane-spanning segments predominantly localised to intracellular membranes. It has 6-7 membrane-spanning domains. The C termini of the mammalian BI-1 proteins are comprised of basic amino acids resembling some nuclear targeting sequences, but otherwise the predicted proteins lack motifs that suggest a function. As plant BI-1 appears to localise predominantly to the ER, we hypothesized that plant BI-1 could also regulate cell death triggered by ER stress . BI-1 appears to exert its effect through an interaction with calmodulin . The budding yeast member of this family has been found unexpectedly to encode a BH3 domain-containing protein (Ybh3p) that regulates the mitochondrial pathway of apoptosis in a phylogenetically conserved manner .. +PF03684 Uncharacterised protein family (UPF0179)
The function of this family is unknown, however the proteins contain two cysteine clusters that may be iron sulphur redox centres.. +PF03699 Uncharacterised protein family (UPF0182)
This family contains uncharacterised integral membrane proteins.. +PF03676 Uncharacterised protein family (UPF0183)
This family of proteins includes Lin-10 from C. elegans. . +PF03671 UPF0185;
Ubiquitin fold modifier 1 protein. This is a family of short ubiquitin-like proteins, that is like neither type-1 or type-2. It is a ubiquitin-fold modifier 1 (Ufm1) that is synthesised in a precursor form of 85 amino-acid residues. In humans the enzyme for Ufm1 is Uba5 and the conjugating enzyme is Ufc1. Prior to activation by Uba5 the extra two amino acids at the C-terminal region of the human pro-Ufm1 protein are removed to expose Gly whose residue is necessary for conjugation to target molecule(s). The mature Ufm1 is conjugated to yet unidentified endogenous proteins, . While Ubiquitin and many Ubls possess the conserved C-terminal di-glycine that is adenylated by each specific E1 or E1-like enzyme, respectively, in an ATP-dependent manner, Ufm1(1-83) possesses a single glycine at its C-terminus, which is followed by a Ser-Cys dipeptide in the precursor form of Ufm1. The C-terminally processed Ufm1(1-83) is specifically activated by Uba5, an E1-like enzyme, and then transferred to its cognate Ufc1, an E2-like enzyme .. +PF00179 Ubiquitin-conjugating enzyme
Proteins destined for proteasome-mediated degradation may be ubiquitinated. Ubiquitination follows conjugation of ubiquitin to a conserved cysteine residue of UBC homologues. TSG101 is one of several UBC homologues that lacks this active site cysteine [4, 5].. +PF02814 UreE;
UreE urease accessory protein, N-terminal domain. Pfam-B_6279 (release 6.1). UreE is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid.. +PF04192 Utp21 specific WD40 associated putative domain
Pfam-B_16350 (release 7.3);. Utp21 is a subunit of U3 snoRNP, which is essential for synthesis of 18S rRNA.. +PF02151 UvrB/uvrC motif
+PF05008 Vesicle transport v-SNARE protein N-terminus
Pfam-B_5492 (release 7.6). V-SNARE proteins are required for protein traffic between eukaryotic organelles. The v-SNAREs on transport vesicles interact with t-SNAREs on target membranes in order to facilitate this . This domain is the N-terminal half of the V-Snare proteins.. +PF00790 VHS domain
Alignment kindly provided by SMART. Domain present in VPS-27, Hrs and STAM.. +PF00654 voltage_CLC;
Voltage gated chloride channel. wublastp P37020/1-588. This family of ion channels contains 10 or 12 transmembrane helices. Each protein forms a single pore. It has been shown that some members of this family form homodimers. In terms of primary structure, they are unrelated to known cation channels or other types of anion channels. Three ClC subfamilies are found in animals. ClC-1 (Swiss:P35523) is involved in setting and restoring the resting membrane potential of skeletal muscle, while other channels play important parts in solute concentration mechanisms in the kidney . These proteins contain two Pfam:PF00571 domains.. +PF04840 Vps16, C-terminal region
Pfam-B_6003 (release 7.6). This protein forms part of the Class C vacuolar protein sorting (Vps) complex. Vps16 is essential for vacuolar protein sorting, which is essential for viability in plants, but not yeast . The Class C Vps complex is required for SNARE-mediated membrane fusion at the lysosome-like yeast vacuole. It is thought to play essential roles in membrane docking and fusion at the Golgi-to-endosome and endosome-to-vacuole stages of transport . The role of VPS16 in this complex is not known.. +PF04841 Vps16, N-terminal region
Pfam-B_6003 (release 7.6). This protein forms part of the Class C vacuolar protein sorting (Vps) complex. Vps16 is essential for vacuolar protein sorting, which is essential for viability in plants, but not yeast . The Class C Vps complex is required for SNARE-mediated membrane fusion at the lysosome-like yeast vacuole. It is thought to play essential roles in membrane docking and fusion at the Golgi-to-endosome and endosome-to-vacuole stages of transport . The role of VPS16 in this complex is not known.. +PF03635 Vacuolar protein sorting-associated protein 35
Pfam-B_3569 (release 7.0). Vacuolar protein sorting-associated protein (Vps) 35 is one of around 50 proteins involved in protein trafficking. In particular, Vps35 assembles into a retromer complex with at least four other proteins Vps5, Vps17, Vps26 and Vps29. Vps35 contains a central region of weaker sequence similarity, thought to indicate the presence of at least three domains .. +PF04129 Vps52 / Sac2 family
Pfam-B_10164 (release 7.3);. Vps52 complexes with Vps53 and Vps54 to form a multi- subunit complex involved in regulating membrane trafficking events .. +PF04100 Vps53-like, N-terminal
Pfam-B_5601 (release 7.3);. Vps53 complexes with Vps52 and Vps54 to form a multi- subunit complex involved in regulating membrane trafficking events .. +PF02204 Vacuolar sorting protein 9 (VPS9) domain
Alignment kindly provided by SMART. This domain acts as a GDP-GTP exchange factor (GEF). It activates Rab GTPases by stimulating the release of GDP and allowing GTP to bind .. +PF03302 Giardia variant-specific surface protein
Pfam-B_4536 (release 6.5). +PF00092 vwa;
von Willebrand factor type A domain. +PF00094 vwd;
von Willebrand factor type D domain. Swiss:P17554 contains a vwd domain. Its function is unrelated but the similarity is very strong by several methods.. +PF00095 wap;
WAP-type (Whey Acidic Protein) 'four-disulfide core'. Swissprot_feature_table. +PF00400 G-beta;
WD domain, G-beta repeat. Pfam-B_2 (release 1.0). +PF00568 WH1 domain
Alignment kindly provided by SMART. WASp Homology domain 1 (WH1) domain. WASP is the protein that is defective in Wiskott-Aldrich syndrome (WAS). The majority of point mutations occur within the amino- terminal WH1 domain. The metabotropic glutamate receptors mGluR1alpha and mGluR5 bind a protein called homer, which is a WH1 domain homologue . A subset of WH1 domains has been termed a "EVH1" domain and appear to bind a polyproline motif.. +PF02205 WH2 motif
Alignment kindly provided by SMART. The WH2 motif (for Wiskott Aldrich syndrome homology region 2) has been shown in WASP Swiss:P42768 and Scar1 (mammalian homologue) to be the region that interacts with actin.. +PF02467 Transcription factor WhiB
Pfam-B_2249 (release 5.4). WhiB is a putative transcription factor in Actinobacteria, required for differentiation and sporulation.. +PF02019 WIF domain
The WIF domain is found in the RYK tyrosine kinase receptors Swiss:P34925 and WIF the Wnt-inhibitory- factor. The domain is extracellular and contains two conserved cysteines that may form a disulphide bridge. This domain is Wnt binding in WIF, and it has been suggested that RYK may also bind to Wnt . The WIF domain is a member of the immunoglobulin superfamily, and it comprises nine beta-strands and two alpha-helices, with two of the beta-strands (6 and 9) interrupted by four and six residues of irregular secondary structure, respectively. Considering that the activity of Wnts depends on the presence of a palmitoylated cysteine residue in their amino-terminal polypeptide segment, Wnt proteins are lipid-modified and can act as stem cell growth factors, it is likely that the WIF domain recognises and binds to Wnts that have been activated by palmitoylation and that the recognition of palmitoylated Wnts by WIF-1 is effected by its WIF domain rather than by its EGF domains. A strong binding affinity for palmitoylated cysteine residues would further explain the remarkably high affinity of human WIF-1 not only for mammalian Wnts, but also for Wnts from Xenopus and Drosophila .. +PF03106 WRKY DNA -binding domain
Pfam-B_85 (release 6.5). +PF02206 Domain of unknown function
Alignment kindly provided by SMART. +PF00397 WW_rsp5_WWP;
The WW domain is a protein module with two highly conserved tryptophans that binds proline-rich peptide motifs in vitro.. +PF02825 WWE domain
The WWE domain is named after three of its conserved residues and is predicted to mediate specific protein- protein interactions in ubiquitin and ADP ribose conjugation systems .. +PF02706 wzz;
Chain length determinant protein. Pfam-B_1977 (release 5.5). This family includes proteins involved in lipopolysaccharide (lps) biosynthesis. This family comprises the whole length of chain length determinant protein (or wzz protein) that confers a modal distribution of chain length on the O-antigen component of lps . This region is also found as part of bacterial tyrosine kinases such as Swiss:P38134.. +PF03254 Xyloglucan fucosyltransferase
Pfam-B_3419 (release 6.5). Plant cell walls are crucial for development, signal transduction, and disease resistance in plants. Cell walls are made of cellulose, hemicelluloses, and pectins. Xyloglucan (XG), the principal load-bearing hemicellulose of dicotyledonous plants, has a terminal fucosyl residue. This fucosyltransferase adds this residue .. +PF05181 XPA;
XPA protein C-terminus. +PF01286 XPA;
XPA protein N-terminal. +PF00102 Protein-tyrosine phosphatase
Swissprot_feature_table. +PF04893 DUF649;
Pfam-B_5598 (release 7.6). The Yip1 integral membrane domain contains four transmembrane alpha helices. The domain is characterised by the motifs DLYGP and GY. The Yip1 protein is a golgi protein involved in vesicular transport that interacts with GTPases .. +PF03226 Yippee;
Yippee zinc-binding/DNA-binding /Mis18, centromere assembly. Pfam-B_2930 (release 6.5). This family includes both Yippee-type proteins and Mis18 kinetochore proteins.\. Yippee are putative zinc-binding/DNA-binding proteins. Mis18 are proteins involved in the priming of centromeres for recruiting CENP-A. Mis18-alpha and beta form part of a small complex with Mis18-binding protein. Mis18-alpha is found to interact with DNA de-methylases through a Leu-rich region located at its carboxyl terminus .. +PF02757 YLP motif
The YLP motif is found in several drosophila proteins. Its function is unknown, however the presence of completely conserved tyrosine residues and its presence in Swiss:Q15303 may suggest it could be a substrate for tyrosine kinases.. +PF04146 YT521-B;
Pfam-B_1386 (release 7.3). A protein of the YTH family has been shown to selectively remove transcripts of meiosis-specific genes expressed in mitotic cells . It has been speculated that in higher eukaryotic YTH-family members may be involved in similar mechanisms to suppress gene regulation during gametogenesis or general silencing. The rat protein Swiss:Q9QY02 YT521-B is a tyrosine-phosphorylated nuclear protein, that interacts with the nuclear transcriptosomal component scaffold attachment factor B, and the 68-kDa Src substrate associated during mitosis, Sam68. In vivo splicing assays demonstrated that YT521-B modulates alternative splice site selection in a concentration-dependent manner . The YTH domain has been identified as part of the PUA superfamily .. +PF00643 B-box zinc finger
+PF02892 BED zinc finger
+PF01530 Zinc finger, C2HC type
This is a DNA binding zinc finger domain.. +PF00097 Zinc finger, C3HC4 type (RING finger)
Swissprot_feature_table. The C3HC4 type zinc-finger (RING finger) is a cysteine-rich domain of 40 to 60 residues that coordinates two zinc ions, and has the consensus sequence: C-X2-C-X(9-39)-C-X(1-3)-H-X(2-3)-C-X2-C-X(4-48)-C-X2-C where X is any amino acid . Many proteins containing a RING finger play a key role in the ubiquitination pathway .. +PF00642 Zinc finger C-x8-C-x5-C-x3-H type (and similar)
+PF00098 Zinc knuckle
Overington and HMM_iterative_training. The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. The motifs are mostly from retroviral gag proteins (nucleocapsid). Prototype structure is from HIV. Also contains members involved in eukaryotic gene regulation, such as C. elegans GLH-1. Structure is an 18-residue zinc finger.. +PF02008 CXXC zinc finger domain
This domain contains eight conserved cysteine residues that bind to two zinc ions. The CXXC domain is found in a variety of chromatin-associated proteins. This domain binds to nonmethyl-CpG dinucleotides. The domain is characterised by two repeats , and shows a peculiar internal duplication in which the second unit is inserted into the first one . Each of these units is characterised by four conserved cysteines, displaying a CXXCXXCX(n)C motif that chelate a Zn+2 ion. The DNA binding interface has been identified by NMR . In eukaryotes, the CXXC domain is found in stramenopiles, plants and metazoans. Plants possess a mono-CXXC domain that is present in distinct chromatin proteins . Structural comparisons show that the mono-CXXC is homologous to the structural-zinc binding domain of medium chain dehydrogenases .. +PF01529 DHHC palmitoyltransferase
Pfam-B_945 (release 4.0). This family includes the well known DHHC zinc binding domain as well as three of the four conserved transmembrane regions found in this family of palmitoyltransferase enzymes.. +PF04438 HIT zinc finger
This presumed zinc finger contains up to 6 cysteine residues that could coordinate zinc. The domain is named after the HIT protein Swiss:P46973. This domain is also found in the Thyroid receptor interacting protein 3 (TRIP-3) Swiss:Q15649 that specifically interact with the ligand binding domain of the thyroid receptor.. +PF02891 MIZ/SP-RING zinc finger
This domain has SUMO (small ubiquitin-like modifier) ligase activity and is involved in DNA repair and chromosome organisation .. +PF01753 MYND finger
+PF05020 NPL4 family, putative zinc binding region
Pfam-B_13681 (release 7.6). +PF04810 Sec23/Sec24 zinc finger
COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is found to be zinc binding domain.. +PF02148 Zn-finger in ubiquitin-hydrolases and other protein
+PF04704 Zfx / Zfy transcription activation region
Zfx and Zfy are transcription factors implicated in mammalian sex determination. This region is found N terminal to multiple copies of a C2H2 Zinc finger (Pfam:PF00096). This region has been shown to activate transcription when fused to a GAL4 DNA binding domain .. +PF02535 ZIP Zinc transporter
Pfam-B_1189 (release 5.4) & Pfam-B_1903 (Release 7.5). The ZIP family consists of zinc transport proteins and many putative metal transporters. The main contribution to this family is from the Arabidopsis thaliana ZIP protein family these proteins are responsible for zinc uptake in the plant . Also found within this family are C. elegans proteins of unknown function which are annotated as being similar to human growth arrest inducible gene product, although this protein in not found within this family.. +PF00246 Zn_carbOpept;
Zinc carboxypeptidase. Prosite & Pfam-B_4832 (Release 7.5). +PF00100 zona_pellucida;
Zona pellucida-like domain. Swissprot_feature_table. +PF01262 AlaDh_PNT;
Alanine dehydrogenase/PNT, C-terminal domain. Pfam-B_4166 (release 6.6). This family now also contains the lysine 2-oxoglutarate reductases. . +PF05222 Alanine dehydrogenase/PNT, N-terminal domain
This family now also contains the lysine 2-oxoglutarate reductases. . +PF05218 Protein of unknown function (DUF713)
Moxon SJ, Pollington J. Pfam-B_6651 (release 7.7). This family contains several proteins of unknown function from C.elegans. The GO annotation suggests that this protein is involved in nematode development and has a positive regulation on growth rate.. +PF05210 Sprouty protein (Spry)
Pfam-B_6527 (release 7.7). This family consists of eukaryotic Sprouty protein homologues. Sprouty proteins have been revealed as inhibitors of the Ras/mitogen-activated protein kinase (MAPK) cascade, a pathway crucial for developmental processes initiated by activation of various receptor tyrosine kinases . The sprouty gene has found to be expressed in the the brain, cochlea, nasal organs, teeth, salivary gland, lungs, digestive tract, kidneys and limb buds in mice .. +PF05216 UNC-50 family
Pfam-B_6607 (release 7.7). Gmh1p (Swiss:P36125) from S. cerevisiae is located in the Golgi membrane and interacts with ARF exchange factors .. +PF00702 haloacid dehalogenase-like hydrolase
Pfam-B_566 (release 2.1). This family is structurally different from the alpha/beta hydrolase family (Pfam:PF00561). This family includes L-2-haloacid dehalogenase, epoxide hydrolases and phosphatases. The structure of the family consists of two domains. One is an inserted four helix bundle, which is the least well conserved region of the alignment, between residues 16 and 96 of Swiss:P24069. The rest of the fold is composed of the core alpha/beta domain . Those members with the characteristic DxD triad at the N-terminus are probably phosphatidylglycerolphosphate (PGP) phosphatases involved in cardiolipin biosynthesis in the mitochondria .. +PF04227 Indigoidine synthase A like protein
Indigoidine is a blue pigment synthesised by Erwinia chrysanthemi implicated in pathogenicity and protection from oxidative stress. IdgA is involved in indigoidine biosynthesis, but its specific function is unknown . The recommended name for this protein is now pseudouridine-5'-phosphate glycosidase.. +PF04613 UDP-3-O-[3-hydroxymyristoyl] glucosamine N-acyltransferase, LpxD
UDP-3-O-[3-hydroxymyristoyl] glucosamine N-acyltransferase (EC 2.3.1.-) catalyses an early step in lipid A biosynthesis: UDP-3-O-(3-hydroxytetradecanoyl)glucosamine + (R)-3-hydroxytetradecanoyl- [acyl carrier protein] -> UDP-2,3-bis(3-hydroxytetradecanoyl)glucosamine + [acyl carrier protein] . Members of this family also contain a hexapeptide repeat (Pfam:PF00132). This family constitutes the non-repeating region of LPXD proteins.. +PF00244 14-3-3 protein
+PF02826 2-Hacid_DH_C;
D-isomer specific 2-hydroxyacid dehydrogenase, NAD binding domain. This domain is inserted into the catalytic domain, the large dehydrogenase and D-lactate dehydrogenase families in SCOP. N-terminal portion of which is represented by family Pfam:PF00389.. +PF02834 2_5_ligase; 2_5_RNA_ligase;
LigT like Phosphoesterase. Members of this family are bacterial and archaeal RNA ligases that are able to ligate tRNA half molecules containing 2',3'-cyclic phosphate and 5' hydroxyl termini to products containing the 2',5' phosphodiester linkage. Each member of this family contains an internal duplication, each of which contains an HXTX motif that defines the family. The structure of a related protein is known . They belong to the 2H phosphoesterase superfamily . They share a common active site, characterised by two conserved histidines, with vertebrate myelin-associated 2',3' phosphodiesterases, plant Arabidopsis thaliana CPDases and several several bacteria and virus proteins.. +PF03475 3-alpha domain
Aravind L, Anantharaman V. This small triple helical domain has been predicted to assume a topology similar to helix-turn-helix domains. These domains are found at the C-terminus of proteins related to Swiss:P32157.. +PF01612 3_5_exonuclease; 3_5_exonuc;
Pfam-B_659 (release 4.1). This domain is responsible for the 3'-5' exonuclease proofreading activity of E. coli DNA polymerase I (polI) and other enzymes, it catalyses the hydrolysis of unpaired or mismatched nucleotides. This domain consists of the amino-terminal half of the Klenow fragment in E. coli polI it is also found in the Werner syndrome helicase (WRN), focus forming activity 1 protein (FFA-1) and ribonuclease D (RNase D). Werner syndrome is a human genetic disorder causing premature aging; the WRN protein has helicase activity in the 3'-5' direction [4,5]. The FFA-1 protein is required for formation of a replication foci and also has helicase activity; it is a homologue of the WRN protein . RNase D is a 3'-5' exonuclease involved in tRNA processing. Also found in this family is the autoantigen PM/Scl thought to be involved in polymyositis-scleroderma overlap syndrome.. +PF00803 3A/RNA2 movement protein family
Pfam-B_1054 (release 2.1) & Pfam-B_6332 (release 7.5). This family includes movement proteins from various viruses. The 3A protein is found in bromoviruses and Cucumoviruses. The genome of these viruses contain 3 RNA segments. The third segment (RNA 3) contains two proteins, the coat protein and the 3A protein. The function of the 3A protein is uncertain but has been shown to be involved in cell-to- cell movement of the virus . The family also includes movement proteins from Dianthoviruses.. +PF02829 3H domain
This domain is predicted to be a small molecule binding domain, based on its occurrence with other domains . The domain is named after its three conserved histidine residues.. +PF00725 3-hydroxyacyl-CoA dehydrogenase, C-terminal domain
Pfam-B_743 (release 2.1). This family also includes lambda crystallin. Some proteins include two copies of this domain.. +PF02737 3-hydroxyacyl-CoA dehydrogenase, NAD binding domain
Pfam-B_743 (release 2.1). This family also includes lambda crystallin.. +PF02446 4A_glucanotrans; 4a_glucanotrans;
4-alpha-glucanotransferase. Pfam-B_1924 (release 5.4). These enzymes EC:2.4.1.25 transfer a segment of a (1,4)-alpha-D-glucan to a new 4-position in an acceptor, which may be glucose or (1,4)-alpha-D-glucan .. +PF01812 5-formyltetrahydrofolate cyclo-ligase family
Pfam-B_1555 (release 4.2). 5-formyltetrahydrofolate cyclo-ligase or methenyl-THF synthetase EC:6.3.3.2 catalyses the interchange of 5-formyltetrahydrofolate (5-FTHF) to 5-10-methenyltetrahydrofolate, this requires ATP and Mg2+ . 5-FTHF is used in chemotherapy where it is clinically known as Leucovorin .. +PF02739 5'-3' exonuclease, N-terminal resolvase-like domain
Pfam-B_716 (release 3.0). +PF01367 5_3_exonuclease;
5'-3' exonuclease, C-terminal SAM fold. Pfam-B_716 (release 3.0). +PF03491 Serotonin (5-HT) neurotransmitter transporter, N-terminus
+PF02096 60Kd inner membrane protein
+PF00428 60s_ribosomal;
60s Acidic ribosomal protein. Pfam-B_151 (release 1.0). This family includes archaebacterial L12, eukaryotic P0, P1 and P2.. +PF01591 6-phosphofructo-2-kinase
Pfam-B_717 (release 4.1). This enzyme occurs as a bifunctional enzyme with fructose-2,6-bisphosphatase. The bifunctional enzyme catalyses both the synthesis and degradation of fructose-2,6-bisphosphate, a potent regulator of glycolysis . This enzyme contains a P-loop motif.. +PF00393 6-phosphogluconate dehydrogenase, C-terminal domain
This family represents the C-terminal all-alpha domain of 6-phosphogluconate dehydrogenase.\. The domain contains two structural repeats of 5 helices each.. +PF02495 7kD viral coat protein
Pfam-B_2886 (release 5.4). This family consists of a 7kD coat protein from carlavirus and potexvirus .. +PF02294 7kD DNA-binding domain
Pfam-B_8148 (release 5.2). This family contains members of the hyper-thermophilic archaebacterium 7kD DNA-binding/endoribonuclease P2 family. There are five 7kD DNA-binding proteins, 7a-7e, found as monomers in the cell. Protein 7e shows the tightest DNA-binding ability.. +PF00001 7 transmembrane receptor (rhodopsin family)
This family contains, amongst other G-protein-coupled receptors (GCPRs), members of the opsin family, which have been considered to be typical members of the rhodopsin superfamily. They share several motifs, mainly the seven transmembrane helices, GCPRs of the rhodopsin superfamily. All opsins bind a chromophore, such as 11-cis-retinal. The function of most opsins other than the photoisomerases is split into two steps: light absorption and G-protein activation. Photoisomerases, on the other hand, are not coupled to G-proteins - they are thought to generate and supply the chromophore that is used by visual opsins .. +PF00002 7 transmembrane receptor (Secretin family)
This family is known as Family B, the secretin-receptor family or family 2 of the G-protein-coupled receptors (GCPRs).They have been described in many animal species, but not in plants, fungi or prokaryotes. Three distinct sub-families are recognised. Subfamily B1 contains classical hormone receptors, such as receptors for secretin and glucagon, that are all involved in cAMP-mediated signalling pathways. Subfamily B2 contains receptors with long extracellular N-termini, such as the leukocyte cell-surface antigen CD97 (Swiss:P48960); calcium-independent receptors for latrotoxin (such as Swiss:O94910), and brain-specific angiogenesis inhibitors (such as Swiss:O14514) amongst others. Subfamily B3 includes Methuselah and other Drosophila proteins (e.g. Swiss:P83119). Other than the typical seven-transmembrane region, characteristic structural features include an amino-terminal extracellular domain involved in ligand binding, and an intracellular loop (IC3) required for specific G-protein coupling .. +PF02949 7tm Odorant receptor
Pfam-B_436 (release 6.4). This family is composed of 7 transmembrane receptors, that are probably drosophila odorant receptors.. +PF00207 Alpha-2-macroglobulin family
This family includes the C-terminal region of the alpha-2-macroglobulin family.. +PF01835 MG2 domain
This is the MG2 (macroglobulin) domain of alpha-2-macroglobulin .. +PF01356 Alpha amylase inhibitor
+PF02137 Adenosine-deaminase (editase) domain
Adenosine deaminases acting on RNA (ADARs) can deaminate adenosine to form inosine. In long double-stranded RNA, this process is non-specific; it occurs site-specifically in RNA transcripts. The former is important in defence against viruses, whereas the latter may affect splicing or untranslated regions. They are primarily nuclear proteins, but a longer isoform of ADAR1 is found predominantly in the cytoplasm. ADARs are derived from the Tad1-like tRNA deaminases that are present across eukaryotes. These in turn belong to the nucleotide/nucleic acid deaminase superfamily and are characterized by a distinct insert between the two conserved cysteines that are involved in binding zinc .. +PF00324 aa_permeases;
+PF03306 Alpha-acetolactate decarboxylase
Pfam-B_3661 (release 6.5). +PF04611 Mating type protein A alpha Y mating type dependent binding region
This region is important for the mating type dependent binding of Y protein to the A alpha Z protein of another mating type in Schizophyllum commune .. +PF03417 Peptidase_C45;
Acyl-coenzyme A:6-aminopenicillanic acid acyl-transferase. +PF02496 ABA/WDS induced protein
Pfam-B_2496 (release 5.4). This is a family of plant proteins induced by water deficit stress (WDS) , or abscisic acid (ABA) stress and ripening . . +PF00950 ABC 3 transport family
Pfam-B_1591 (release 2.1). +PF01061 ABC-2 type transporter
Pfam-B_865 (release 3.0) & Pfam-B_31 (release 15.0). +PF00664 ABC transporter transmembrane region
Pfam-B_2 (release 2.1). This family represents a unit of six transmembrane helices. Many members of the ABC transporter family (Pfam:PF00005) have two such regions.. +PF00005 ABC transporter
ABC transporters for a large family of proteins responsible for translocation of a variety of compounds across biological membranes. ABC transporters are the largest family of proteins in many completely sequenced bacteria. ABC transporters are composed of two copies of this domain and two copies of a transmembrane domain Pfam:PF00664. These four domains may belong to a single polypeptide as in Swiss:P13569, or belong in different polypeptide chains.. +PF00561 abhydrolase;
alpha/beta hydrolase fold. MRC-LMB Genome group. This catalytic domain is found in a very wide range of enzymes.. +PF03806 AbgT putative transporter family
TIGRFAMs, Griffiths-Jones SR. +PF02230 abhydrolase_2;
Phospholipase/Carboxylesterase. Pfam-B_1382 (release 5.2). This family consists of both phospholipases and carboxylesterases with broad substrate specificity, and is structurally related to alpha/beta hydrolases Pfam:PF00561 .. +PF02517 CAAX protease self-immunity
Pfam-B_1073 (release 5.4). Members of this family are probably proteases (after a isoprenyl group is attached to the Cys residue in the C-terminal CAAX motif of a protein to attach it to the membrane, the AAX tripeptide being removed by one of the CAAX prenyl proteases). The family contains the Swiss:Q03530 CAAX prenyl protease. The proteins contain a highly conserved Glu-Glu motif at the amino end of the alignment. The alignment also contains two histidine residues that may be involved in zinc binding . While they are involved in membrane anchoring of proteins in eukaryotes, little is known about their function in prokaryotes. In some known bacteriocin loci, Abi genes have been found downstream of bacteriocin structural genes where they are probably involved in self-immunity. Investigation of the bacteriocin-like loci in the Gram positive bacteria locus from Lactobacillus sakei 23K confirmed that the bacteriocin-like genes (sak23Kalphabeta) exhibited antimicrobial activity when expressed in a heterologous host and that the associated Abi gene (sak23Ki) conferred immunity against the cognate bacteriocin. Interestingly, the immunity genes from three similar systems conferred a high degree of cross-immunity against each other's bacteriocins, suggesting the recognition of a common receptor. Site-directed mutagenesis demonstrated that the conserved motifs constituting the putative proteolytic active site of the Abi proteins are essential for the immunity function of Sak23Ki - thus a new concept in self-immunity .. +PF03992 Antibiotic biosynthesis monooxygenase
This domain is found in monooxygenases involved in the biosynthesis of several antibiotics by Streptomyces species. It's occurrence as a repeat in Streptomyces coelicolor SCO1909 (Swiss:Q9X9W3) is suggestive that the other proteins function as multimers. There is also a conserved histidine which is likely to be an active site residue.. +PF00887 Acyl CoA binding protein
Pfam-B_864 (release 3.0). +PF03255 Acetyl co-enzyme A carboxylase carboxyltransferase alpha subunit
Pfam-B_1935 (release 6.5). Acetyl co-enzyme A carboxylase carboxyltransferase is composed of an alpha and beta subunit.. +PF00871 Acetokinase family
Pfam-B_1595 (release 2.1). This family includes acetate kinase, butyrate kinase and 2-methylpropanoate kinase.. +PF02550 Acetyl-CoA_hydro;
Acetyl-CoA hydrolase/transferase N-terminal domain. This family contains several enzymes which take part in pathways involving acetyl-CoA. Acetyl-CoA hydrolase EC:3.1.2.1 (Swiss:P32316) catalyses the formation of acetate from acetyl-CoA, CoA transferase (CAT1) EC:2.8.3.- (Swiss:P38946) produces succinyl-CoA, and acetate-CoA transferase EC:2.8.3.8 (Swiss:Q59323) utilises acyl-CoA and acetate to form acetyl-CoA.. +PF00797 Acetyltransf2;
Pfam-B_575 (release 2.1). Arylamine N-acetyltransferase (NAT) is a cytosolic enzyme of approximately 30kDa. It facilitates the transfer of an acetyl group from Acetyl Coenzyme A on to a wide range of arylamine, N-hydroxyarylamines and hydrazines. Acetylation of these compounds generally results in inactivation. NAT is found in many species from Mycobacteria (M. tuberculosis, M. smegmatis etc) to man. It was the first enzyme to be observed to have polymorphic activity amongst human individuals. NAT is responsible for the inactivation of Isoniazid (a drug used to treat Tuberculosis) in humans. The NAT protein has also been shown to be involved in the breakdown of folic acid.. +PF00328 acid_phosphat; Acid_phosphat_A;
Histidine phosphatase superfamily (branch 2). The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue. Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches.The smaller branch 2 contains predominantly eukaryotic proteins. The catalytic functions in members include phytase, glucose-1-phosphatase and multiple inositol polyphosphate phosphatase. The in vivo roles of the mammalian acid phosphatases in branch 2 are not fully understood, although activity against lysophosphatidic acid and tyrosine-phosphorylated proteins has been demonstrated.. +PF03767 acid_phosphat_B;
HAD superfamily, subfamily IIIB (Acid phosphatase). Pfam-B_2784 (release 7.0). This family proteins includes acid phosphatases and a number of vegetative storage proteins. . +PF00330 aconitase;
Aconitase family (aconitate hydratase). +PF00694 Aconitase C-terminal domain
Pfam-B_224 (release 2.1). Members of this family usually also match to Pfam:PF00330. This domain undergoes conformational change in the enzyme mechanism .. +PF01756 Acyl-CoA oxidase
Pfam-B_598 (release 4.2). This is a family of Acyl-CoA oxidases EC:1.3.3.6. Acyl-coA oxidase converts acyl-CoA into trans-2- enoyl-CoA .. +PF00873 AcrB/AcrD/AcrF family
Pfam-B_578 (release 3.0). Members of this family are integral membrane proteins. Some are involved in drug resistance. AcrB cooperates with a membrane fusion protein, AcrA, and an outer membrane channel TolC. The structure shows the AcrB forms a homotrimer .. +PF05058 ActA Protein
Pfam-B_5981 (release 7.7). The ActA family is found in Listeria and is associated with motility. ActA protein acts as a scaffold to assemble and activate host cell actin cytoskeletal factors at the bacterial surface, resulting in directional actin polymerisation and propulsion of the bacterium through the cytoplasm of the host cell [1,2]. . +PF00976 Corticotropin ACTH domain
Pfam-B_1057 (release 3.0). +PF00022 actin;
+PF01643 Acyl-ACP thioesterase
Pfam-B_928 (release 4.1). This family consists of various acyl-acyl carrier protein (ACP) thioesterases (TE) these terminate fatty acyl group extension via hydrolysing an acyl group on a fatty acid .. +PF02770 Acyl-CoA dehydrogenase, middle domain
Central domain of Acyl-CoA dehydrogenase has a beta-barrel fold.. +PF02771 Acyl-CoA dehydrogenase, N-terminal domain
The N-terminal domain of Acyl-CoA dehydrogenase is an all-alpha domain.. +PF02551 Acyl-CoA thioesterase
This family represents the thioesterase II domain. Two copies of this domain are found in a number of acyl-CoA thioesterases.. +PF00698 Acyl_transf;
Acyl transferase domain. Pfam-B_250 (release 2.1). +PF02273 Acyl transferase
Pfam-B_5787 (release 5.2). This bacterial family of Acyl transferases (or myristoyl-acp-specific thioesterases) catalyse the first step in the bioluminescent fatty acid reductase system.. +PF00708 Acylphosphatase
Pfam-B_686 (release 2.1). +PF01553 Acyltransferase
Pfam-B_128 (release 4.0) & Pfam-B_5069 (Release 7.5). This family contains acyltransferases involved in phospholipid biosynthesis and other proteins of unknown function . This family also includes tafazzin Swiss:Q16635, the Barth syndrome gene .. +PF02805 Metal binding domain of Ada
The Escherichia coli Ada protein repairs O6-methylguanine residues and methyl phosphotriesters in DNA by direct transfer of the methyl group to a cysteine residue. This domain contains four conserved cysteines that form a zinc binding site [1,2]. One of these cysteines is a methyl group acceptor. The methylated domain can then specifically bind to the ada box on a DNA duplex .. +PF01602 Adaptin N terminal region
Pfam-B_491 (release 4.0). This family consists of the N terminal region of various alpha, beta and gamma subunits of the AP-1, AP-2 and AP-3 adaptor protein complexes. The adaptor protein (AP) complexes are involved in the formation of clathrin-coated pits and vesicles . The N-terminal region of the various adaptor proteins (APs) is constant by comparison to the C-terminal which is variable within members of the AP-2 family ; and it has been proposed that this constant region interacts with another uniform component of the coated vesicles .. +PF03352 Methyladenine glycosylase
Pfam-B_3953 (release 6.5). The DNA-3-methyladenine glycosylase I is constitutively expressed and is specific for the alkylated 3-methyladenine DNA. . +PF02438 adeno_100;
Pfam-B_1583 (release 5.4). The late 100kD protein is a non-structural viral protein involved in the transport of hexon from the cytoplasm to the nucleus.. +PF03052 Adenoviral protein L1 52/55-kDa
Pfam-B_2151 (release 6.4). The adenoviral protein L1 52/55-kDa is expressed in both the early and late stages of infection which suggests that it could play multiple roles in the viral life cycle. The L1 52/55 kDa protein interacts with the viral IVa2 protein and is required for DNA packaging . L1 53/55-kDa is required to mediate stable association between the viral DNA and empty capsid .. +PF02703 Early E1A protein
Pfam-B_1193 (release 5.5). This is a family of adenovirus early E1A proteins. The E1A protein is 32 kDa it can however be cleaved to yield the 28 kDa protein. The E1A protein is responsible for the transcriptional activation of the early genes with in the viral genome at the start of the infection process as well as some cellular genes .. +PF01691 Adenovirus E1B 19K protein / small t-antigen
Pfam-B_1569 (release 4.1). This family consists of adenovirus E1B 19K protein or small t-antigen. The E1B 19K protein inhibits E1A induced apoptosis and hence prolongs the viability of the host cell . It can also inhibit apoptosis mediated by tumour necrosis factor alpha and Fas antigen . E1B 19K blocks apoptosis by interacting with and inhibiting the p53-inducible and death- promoting Bax protein . The E1B region of adenovirus encodes two proteins E1B 19K the small t-antigen as found in this family and E1B 55K the large t-antigen which is not found in this family; both of these proteins inhibit E1A induced apoptosis .. +PF01696 Adenovirus EB1 55K protein / large t-antigen
Pfam-B_1728 (release 4.1). This family consists of adenovirus E1B 55K protein or large t-antigen. E1B 55K binds p53 the tumour suppressor protein converting it from a transcriptional activator which responds to damaged DNA in to an unregulated repressor of genes with a p53 binding site . This protects the virus against p53 induced host antiviral responses and prevents apoptosis as induced by the adenovirus E1A protein . The E1B region of adenovirus encodes two proteins E1B 55K the large t-antigen as found in this family and E1B 19K Pfam:PF01691 the small t-antigen which is not found in this family; both of these proteins inhibit E1A induced apoptosis. This family shows distant similarities to the pectate lyase superfamily.. +PF04623 Adenovirus E1B protein N-terminus
This family constitutes the amino termini of E1B 55 kDa (Pfam:PF01696). E1B 55K binds p53 the tumour suppressor protein converting it from a transcriptional activator which responds to damaged DNA in to an unregulated repressor of genes with a p53 binding site . This protects the virus against p53 induced host antiviral responses and prevents apoptosis as induced by the by the adenovirus E1A protein . The role of the N terminus in the function of E1B is not known.. +PF04834 Early E3 14.5 kDa protein
Pfam-B_4148 (release 7.6). The E3B 14.5 kDa was first identified in Human adenovirus type 5. It is an integral membrane protein oriented with its C terminus in the cytoplasm. It functions to down-regulate the epidermal growth factor receptor and prevent tumour necrosis factor cytolysis. It achieves this through the interaction with E3 10.4 kDa protein [1,2].. +PF03307 Adenovirus 15.3kD protein in E3 region
Pfam-B_3512 (release 6.5). +PF02440 Adenovirus E3 region protein CR1
Pfam-B_1854 (release 5.4). +PF02439 Adenovirus E3 region protein CR2
Pfam-B_1854 (release 5.4). Early region 3 (E3) of human adenoviruses (Ads) codes for proteins that appear to control viral interactions with the host . This region called CR2 (conserved region 1) is found in Adenovirus type 19 (a subgroup D virus) 49 Kd protein in the E3 region. CR2 is also found in the 20.1 Kd protein of subgroup B adenoviruses. The function of this 50 amino acid region is unknown.. +PF03376 Adenovirus E3B protein
Pfam-B_3736 (release 6.6). +PF04528 Adenovirus early E4 34 kDa protein conserved region
Pfam-B_4904 (release 7.5). Conserved region found in the Adenovirus E4 34 kDa protein.. +PF00541 adeno_fiber;
Adenoviral fibre protein (knob domain). Specific attachment of adenovirus is achieved through interactions between host-cell receptors and the adenovirus fibre protein and is mediated by the globular carboxy-terminal domain of the adenovirus fibre protein, termed the carboxy-terminal knob domain.. +PF00608 adeno_fiber2;
Adenoviral fibre protein (repeat/shaft region). There is no separation between signal and noise. Specific attachment of adenovirus is achieved through interactions between host-cell receptors and the adenovirus fibre protein and is mediated by the globular carboxy-terminal domain of the adenovirus fibre protein, rather than the 'shaft' region represented by this family. The alignment of this family contains two copies of a fifteen residue repeat found in the 'shaft' region of adenoviral fibre proteins.. +PF04881 Adenovirus GP19K
Pfam-B_6142 (release 7.6). This 19 kDa glycoprotein binds the major histocompatibility (MHC) class I antigens in the endoplasmic reticulum (ER). The ER retention signal at the C-terminus of GP19K causes retention of the complex in the ER, preventing lysis of the cell by cytotoxic T lymphocytes .. +PF01065 Hexon, adenovirus major coat protein, N-terminal domain
Pfam-B_885 (release 3.0). Hexon is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. The penton complex, formed by the peripentonal hexons and base hexon (holding in place a fibre), lie at each of the 12 vertices . The N and C-terminal domains adopt the same PNGase F-like fold although they are significantly different in length.. +PF03678 Hexon, adenovirus major coat protein, C-terminal domain
Pfam-B_885 (release 3.0). Hexon is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. The penton complex, formed by the peripentonal hexons and base hexon (holding in place a fibre), lie at each of the 12 vertices . The N and C-terminal domains adopt the same PNGase F-like fold although they are significantly different in length.. +PF02456 Adenovirus IVa2 protein
Pfam-B_1982 (release 5.4). IVa2 protein can interact with the adenoviral packaging signal and that this interaction involves DNA sequences that have previously been demonstrated to be required for packaging . During the course of lytic infection, the adenovirus major late promoter (MLP) is induced to high levels after replication of viral DNA has started. IVa2 is a transcriptional activator of the major late promoter . . +PF01686 Adenovirus penton base protein
Pfam-B_1180 (release 4.1). This family consists of various adenovirus penton base proteins, from both the Mastadenoviradae having mammalian hosts and the Aviadenoviradae having avian hosts. The penton base is a major structural protein forming part of the penton which consists of a base and a fibre, the pentons hold a morphologically prominent position at the vertex capsomer in the adenovirus particle . In mammalian adenovirus there is only one tail on each base where as in avian adenovirus there are two .. +PF03955 Adenovirus hexon-associated protein (IX)
Hexon (PF01065) is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX.. +PF03910 Adenovirus minor core protein PV
+PF01310 Adenovirus hexon associated protein, protein VIII
Pfam-B_1405 (release 3.0). See Pfam:PF01065. This family represents Hexon.. +PF02459 Adenoviral DNA terminal protein
Pfam-B_1602 (release 5.4). This protein is covalently attached to the terminii of replicating DNA in vivo .. +PF03228 Adenoviral core protein VII
Pfam-B_3049 (release 6.5). The function of this protein is unknown. It has a conserved amino terminus of 50 residues followed by a positively charged tail, suggesting it may interact with nucleic acid. The major core protein of the adenovirus, protein VII, was found to be associated with viral DNA throughout infection. The precursor to protein VII were shown to be in vivo and in vitro acceptors of ADP-ribose. The ADP-ribosylated core proteins were assembled into mature virus particles. ADP-ribosylation of adenovirus core proteins may have a role in virus decapsidation.. +PF04439 Streptomycin adenylyltransferase
Also known as Aminoglycoside 6- adenylyltransferase (EC:2.7.7.-), this protein confers resistance to aminoglycoside antibiotics.. +PF01928 Adenylate_cyc_2;
These sequences are functionally identified as members of the adenylate cyclase family, which catalyses the conversion of ATP to 3',5'-cyclic AMP and pyrophosphate. Six distinct non-homologous classes of AC have been identified. The structure of three classes of adenylyl cyclases have been solved .. +PF01295 Adenylate_cycla;
Adenylate cyclase, class-I. +PF00709 Adenylosuccinate synthetase
Pfam-B_690 (release 2.1). +PF00106 short chain dehydrogenase
This family contains a wide variety of dehydrogenases.. +PF04619 Dr-family adhesin
This family of adhesins bind to the Dr blood group antigen component of decay-accelerating factor. This mediates adherence of uropathogenic Escherichia coli to the urinary tract. This family contains both fimbriated and afimbriated adherence structures . This protein also confers the phenotype of mannose-resistant hemagglutination, which can be inhibited by chloramphenicol. The N terminal portion of the protein is though to be responsible for chloramphenicol sensitivity .. +PF03257 Mycoplasma adhesin P1
Pfam-B_4117 (release 6.5). This family corresponds to a short 100 residue region found in adhesins from Mycoplasmas.. +PF00406 adenylatekinase;
+PF05191 Adenylate kinase, active site lid
Comparisons of adenylate kinases have revealed a particular divergence in the active site lid. In some organisms, particularly the Gram-positive bacteria, residues in the lid domain have been mutated to cysteines and these cysteine residues are responsible for the binding of a zinc ion. The bound zinc ion in the lid domain, is clearly structurally homologous to Zinc-finger domains. However, it is unclear whether the adenylate kinase lid is a novel zinc-finger DNA/RNA binding domain, or that the lid bound zinc serves a purely structural function .. +PF05221 S-adenosyl-L-homocysteine hydrolase
Pfam-B_157 (release 2.1). +PF00670 AdoHcyase;
S-adenosyl-L-homocysteine hydrolase, NAD binding domain. Pfam-B_157 (release 2.1). +PF03747 ADP-ribosyl_GH;
ADP-ribosylglycohydrolase. This family includes enzymes that ADP-ribosylations, for example ADP-ribosylarginine hydrolase EC:3.2.2.19 cleaves ADP-ribose-L-arginine . The family also includes dinitrogenase reductase activating glycohydrolase . Most surprisingly the family also includes jellyfish crystallins , these proteins appear to have lost the presumed active site residues.. +PF04587 ADP-specific Phosphofructokinase/Glucokinase conserved region
Pfam-B_4731 (release 7.5). In archaea a novel type of glycolytic pathway exists that is deviant from the classical Embden-Meyerhof pathway. This pathway utilises two novel proteins: an ADP-dependent Glucokinase and an ADP-dependent Phosphofructokinase. This conserved region is present at the C-terminal of both these proteins. Interestingly this family contains sequences from higher eukaryotes. [1,2,3]. . +PF01117 Aerolysin toxin
This family represents the pore forming lobe of aerolysin.. +PF05110 AF-4 proto-oncoprotein
Pfam-B_6407 (release 7.7). This family consists of AF4 (Proto-oncogene AF4) and FMR2 (Fragile X E mental retardation syndrome) nuclear proteins. These proteins have been linked to human diseases such as acute lymphoblastic leukaemia and mental retardation . The family also contains a Drosophila AF4 protein homologue Lilliputian which contains an AT-hook domain. Lilliputian represents a novel pair-rule gene that acts in cytoskeleton regulation, segmentation and morphogenesis in Drosophila .. +PF03969 AFG1-like ATPase
This family of proteins contains a P-loop motif and are predicted to be ATPases.. +PF01314 AFOR;
Aldehyde ferredoxin oxidoreductase, domains 2 & 3. Aldehyde ferredoxin oxidoreductase (AOR) catalyses the reversible oxidation of aldehydes to their corresponding carboxylic acids with their accompanying reduction of the redox protein ferredoxin. This family is composed of two structural domains that bind the tungsten cofactor via DXXGL(C/D) motifs. In addition to maintaining specific binding interactions with the cofactor, another role for domains 2 and 3 may be to regulate substrate access to AOR .. +PF02730 Aldehyde ferredoxin oxidoreductase, N-terminal domain
Aldehyde ferredoxin oxidoreductase (AOR) catalyses the reversible oxidation of aldehydes to their corresponding carboxylic acids with their accompanying reduction of the redox protein ferredoxin. This domain interacts with the tungsten cofactor .. +PF02420 Insect antifreeze protein repeat
This family of extracellular proteins is involved in stopping the formation of ice crystals at low temperatures. The proteins are composed of a 12 residue repeat that forms a structural repeat. The structure of the repeats is a beta helix . Each repeat contains two cys residues that form a disulphide bridge.. +PF03756 AfsA_repeat;
A-factor biosynthesis hotdog domain. The AfsA family are key enzymes in A-factor biosynthesis, which is essential for streptomycin production and resistance. This domain is distantly related to the thioester dehydratase FabZ family and therefore has a HotDog domain .. +PF04671 Erythrocyte membrane-associated giant protein antigen 332
Pfam-B_4587 (release 7.5). To date many different Plasmodium antigens recognised by the hyperimmune system human sera have been cloned, sequenced and characterised. The majority contain tandemly repeated amino acid sequences which make up a considerable portion of the protein sequence. It has been suggested that these repeat-containing antigens may provide an immunological 'smokescreen' to the parasite in order to evade the human immune system. This repeat is found exclusively in the Plasmodium falciparum Ag332 protein and occupies most of its length .. +PF01453 Agglutinin; MMBL2; MMBL; Agglutinin;
D-mannose binding lectin. Prodom_2511 (release 99.1). These proteins include mannose-specific lectins from plants as well as bacteriocins from bacteria .. +PF05039 agouti;
Pfam-B_5381 (release 7.7). The agouti protein regulates pigmentation in the mouse hair follicle producing a black hair with a subapical yellow band. A highly homologous protein agouti signal protein (ASIP)is present in humans and is expressed at highest levels in adipose tissue where it may play a role in energy homeostasis and possibly human pigmentation .. +PF04647 Accessory gene regulator B
Pfam-B_5200 (release 7.5). The arg locus consists of two transcripts: RNAII and RNAIII. RNAII encodes four genes (agrA, B, C, and D) whose gene products assemble a quorum sensing system. AgrB and AgrD are essential for the production of the autoinducing peptide which functions as a signal for quorum sensing. AgrB is a transmembrane protein .. +PF04730 Agrobacterium VirD5 protein
Pfam-B_3261 (release 7.5). The virD operon in Agrobacterium encodes a site-specific endonuclease, and a number of other poorly characterised products. This family represents the VirD5 protein.. +PF00578 AhpC/TSA family
MRC-LMB Genome group. This family contains proteins related to alkyl hydroperoxide reductase (AhpC) and thiol specific antioxidant (TSA). . +PF02626 DUF183;
Allophanate hydrolase subunit 2. This domain forms the second subunit of allophanate hydrolase. In yeast urea amidolyase (Swiss:P32528) this domain is found between Pfam:PF00289 and Pfam:PF00364.. +PF01808 AICARFT/IMPCHase bienzyme
Pfam-B_1613 (release 4.2). This is a family of bifunctional enzymes catalysing the last two steps in de novo purine biosynthesis. The bifunctional enzyme is found in both prokaryotes and eukaryotes. The second last step is catalysed by 5-aminoimidazole-4-carboxamide ribonucleotide formyltransferase EC:2.1.2.3 (AICARFT), this enzyme catalyses the formylation of AICAR with 10-formyl-tetrahydrofolate to yield FAICAR and tetrahydrofolate . This is catalysed by a pair of C-terminal deaminase fold domains in the protein , where the active site is formed by the dimeric interface of two monomeric units . The last step is catalysed by the N-terminal IMP (Inosine monophosphate) cyclohydrolase domain EC:3.5.4.10 (IMPCHase), cyclizing FAICAR (5-formylaminoimidazole-4-carboxamide ribonucleotide) to IMP . . +PF04548 AIG1 family
Pfam-B_1846 (release 7.5). Arabidopsis protein AIG1 appears to be involved in plant resistance to bacteria.. +PF00731 AIR carboxylase
Pfam-B_462 (release 2.1). Members of this family catalyse the decarboxylation of 1-(5-phosphoribosyl)-5-amino-4-imidazole-carboxylate (AIR). This family catalyse the sixth step of de novo purine biosynthesis. Some members of this family contain two copies of this domain.. +PF00586 AIR synthase related protein, N-terminal domain
MRC-LMB Genome Group. This family includes Hydrogen expression/formation protein HypE Swiss:P24193, AIR synthases Swiss:P08178 EC:6.3.3.1, FGAM synthase Swiss:P35852 EC:6.3.5.3 and selenide, water dikinase Swiss:P16456 EC:2.7.9.3. The N-terminal domain of AIR synthase forms the dimer interface of the protein, and is suggested as a putative ATP binding domain .. +PF02769 AIR synthase related protein, C-terminal domain
MRC-LMB Genome Group. This family includes Hydrogen expression/formation protein HypE Swiss:P24193, AIR synthases Swiss:P08178 EC:6.3.3.1, FGAM synthase Swiss:P35852 EC:6.3.5.3 and selenide, water dikinase Swiss:P16456 EC:2.7.9.3. The function of the C-terminal domain of AIR synthase is unclear, but the cleft formed between N and C domains is postulated as a sulphate binding site .. +PF04988 A-kinase anchoring protein 95 (AKAP95)
Pfam-B_5366 (release 7.6). A-kinase (or PKA)-anchoring protein AKAP95 is implicated in mitotic chromosome condensation by acting as a targeting molecule for the condensin complex. The protein contains two zinc fingers which are thought to mediate the binding of AKAP95 to DNA .. +PF02983 AL_protease;
Alpha-lytic protease prodomain. +PF00490 Delta-aminolevulinic acid dehydratase
+PF00171 aldedh;
Aldehyde dehydrogenase family. This family of dehydrogenases act on aldehyde substrates. Members use NADP as a cofactor. The family includes the following members: The prototypical members are the aldehyde dehydrogenases Swiss:P00352 EC:1.2.1.3. Succinate-semialdehyde dehydrogenase Swiss:P25526 EC:1.2.1.16. Lactaldehyde dehydrogenase Swiss:P25553 EC:1.2.1.22. Benzaldehyde dehydrogenase Swiss:P43503 EC:1.2.1.28. Methylmalonate-semialdehyde dehydrogenase Swiss:Q02252 EC:1.2.1.27. Glyceraldehyde-3-phosphate dehydrogenase Swiss:P81406 EC:1.2.1.9. Delta-1-pyrroline-5-carboxylate dehydrogenase Swiss:P30038 EC: 1.5.1.12. Acetaldehyde dehydrogenase Swiss:P17547 EC:1.2.1.10. Glutamate-5-semialdehyde dehydrogenase Swiss:P07004 EC:1.2.1.41. This family also includes omega crystallin Swiss:P30842 an eye lens protein from squid and octopus that has little aldehyde dehydrogenase activity.. +PF01081 KDPG and KHG aldolase
This family includes the following members: 4-hydroxy-2-oxoglutarate aldolase (KHG-aldolase) Phospho-2-dehydro-3-deoxygluconate aldolase (KDPG-aldolase). +PF00596 Aldolase_class_II;
Class II Aldolase and Adducin N-terminal domain. MRC-LMB Genome Group. This family includes class II aldolases and adducins which have not been ascribed any enzymatic function.. +PF03752 DUF312;
Short repeats of unknown function. This set of repeats is found in a small family of secreted proteins of no known function, though they are possibly involved in signal transduction. ALF stands for Alanine-rich (AL) - conserved Phenylalanine (F).. +PF05208 ALG3 protein
Pfam-B_3416 (release 7.7). The formation of N-glycosidic linkages of glycoproteins involves the ordered assembly of the common Glc3Man9GlcNAc2 core-oligosaccharide on the lipid carrier dolichyl pyrophosphate. Whereas early mannosylation steps occur on the cytoplasmic side of the endoplasmic reticulum with GDP-Man as donor, the final reactions from Man5GlcNAc2-PP-Dol to Man9GlcNAc2-PP-Dol on the lumenal side use Dol-P-Man . ALG3 gene encodes the Dol-P-Man:Man5GlcNAc2-PP-Dol mannosyltransferase.. +PF03561 Allantoicase repeat
This family is found in pairs in Allantoicases, forming the majority of the protein. These proteins allow the use of purines as secondary nitrogen sources in nitrogen-limiting conditions through the reaction: allantoate + H(2)0 = (-)-ureidoglycolate + urea.. +PF04864 Allinase
Pfam-B_4527 (release 7.6). Allicin is a thiosulphinate that gives rise to dithiines, allyl sulphides and ajoenes, the three groups of active compounds in Allium species. Allicin is synthesised from sulfoxide cysteine derivatives by alliinase (EC:4.4.1.4), whose C-S lyase activity cleaves C(beta)-S(gamma) bonds. It is thought that this enzyme forms part of a primitive plant defence system.. +PF04030 D-arabinono-1,4-lactone oxidase
Pfam-B_33547 (release 7.3);. This domain is specific to D-arabinono-1,4-lactone oxidase EC:1.1.3.- , which is involved in the final step of the D-erythroascorbic acid biosynthesis pathway .. +PF00128 alpha-amylase;
Alpha amylase, catalytic domain. Alpha amylase is classified as family 13 of the glycosyl hydrolases. The structure is an 8 stranded alpha/beta barrel containing the active site, interrupted by a ~70 a.a. calcium-binding domain protruding between beta strand 3 and alpha helix 3, and a carboxyl-terminal Greek key beta-barrel domain.. +PF02903 alpha-amylase_N;
Alpha amylase, N-terminal ig-like domain. +PF02296 Alpha adaptin AP2, C-terminal domain
Pfam-B_8859 (release 5.2). Alpha adaptin is a hetero tetramer which regulates clathrin-bud formation. The carboxyl-terminal appendage of the alpha subunit regulates translocation of endocytic accessory proteins to the bud site.. +PF02883 Adaptin C-terminal domain
Pfam-B_8859 (release 5.2). Alpha adaptin is a heterotetramer which regulates clathrin-bud formation. The carboxyl-terminal appendage of the alpha subunit regulates translocation of endocytic accessory proteins to the bud site. This ig-fold domain is found in alpha, beta and gamma adaptins.. +PF00944 Alpha_core;
Alphavirus core protein . Pfam-B_266 (release 3.0). Also known as coat protein C and capsid protein C. This makes the literature very confusing. Alphaviruses consist of a nucleoprotein core, a lipid membrane which envelopes the core, and glycoprotein spikes protruding from the lipid membrane.. +PF01589 Alphavirus E1 glycoprotein
Pfam-B_587 (release 4.1). E1 forms a heterodimer with E2 Pfam:PF00943. The virus spikes are made up of 80 trimers of these heterodimers (sindbis virus) .. +PF00943 Alphavirus E2 glycoprotein
Pfam-B_308 (release 3.0). E2 forms a heterodimer with E1. The virus spikes are made up of 80 trimers of these heterodimers (sindbis virus) .. +PF01563 Alphavirus E3 glycoprotein
Pfam-B_285 (release 4.0). This protein is found in some alphaviruses as a virion associated spike protein .. +PF03229 Alphavirus glycoprotein J
Pfam-B_3350 (release 6.5). +PF01120 Alpha-L-fucosidase
+PF02232 Alpha trans-inducing protein (Alpha-TIF)
Pfam-B_1799 (release 5.2). Alpha-TIF, a virion protein (VP16), is involved in transcriptional activation of viral immediate early (IE) promoters (alpha genes). Specificity of Swiss:P23990 for IE genes is conferred by the 400 residue N-terminal, the 80 residue C-terminal is responsible for transcriptional activation .. +PF02430 Apical membrane antigen 1
Pfam-B_2016 (release 5.4). Apical membrane antigen 1 (AMA-1) is a Plasmodium asexual blood-stage antigen. It has been suggested that positive selection operates on the AMA-1 gene in regions coding for antigenic sites .. +PF03913 Amb V Allergen
+PF02948 Amelogenin
Pfam-B_402 (release 6.4). Amelogenins play a role in biomineralisation. They seem to regulate the formation of crystallites during the secretory stage of tooth enamel development. thought to play a major role in the structural organisation and mineralisation of developing enamel. They are found in the extracellular matrix. Mutations in X-chromosomal amelogenin can cause Amelogenesis imperfecta .. +PF04709 Anti-Mullerian hormone, N terminal region
Anti-Mullerian hormone, AMH is a signalling molecule involved in male and female sexual differentiation . Defects in synthesis or action of AMH cause persistent Mullerian duct syndrome (PMDS), a rare form of male pseudohermaphroditism . This family represents the N terminal part of the protein, which is not thought to be essential for activity .\. AMH contains a TGF-beta domain (Pfam:PF00019), at the C terminus.. +PF01425 Amidase
Pfam-B_191 (release 2.1). +PF01832 Amidase_4;
Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase. Pfam-B_888 (release 4.0). This family includes Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase EC:3.2.1.96. As well as the flageller protein J Swiss:P75942 that has been shown to hydrolyse peptidoglycan .. +PF02274 Amidinotransferase
Pfam-B_5784 (release 5.2) and Pfam-B_1850 (release 5.5). This family contains glycine (EC:2.1.4.1) and inosamine (EC:2.1.4.2) amidinotransferases, enzymes involved in creatine and streptomycin biosynthesis respectively. This family also includes arginine deiminases, EC:3.5.3.6. These enzymes catalyse the reaction: arginine + H2O <=> citrulline + NH3. Also found in this family is the Streptococcus anti tumour glycoprotein (Swiss:P16962).. +PF01979 Adenine_deam;
Amidohydrolase family. This family of enzymes are a a large metal dependent hydrolase superfamily . The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source . This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit . Dihydroorotases (EC:3.5.2.3) are also included [4-5]. . +PF04909 Amidohydrolase
Pfam-B_4687 (release 7.6). These proteins are amidohydrolases that are related to Pfam:PF01979 .. +PF00155 aminotran_1; aminotran_1_2;
Aminotransferase class I and II. +PF00202 aminotran_3;
Aminotransferase class-III. +PF01063 AA_transferase_C4;aminotran_4;
Aminotransferase class IV. Pfam-B_607 (release 3.0). The D-amino acid transferases (D-AAT) are required by bacteria to catalyse the synthesis of D-glutamic acid and D-alanine, which are essential constituents of bacterial cell wall and are the building block for other D-amino acids. Despite the difference in the structure of the substrates, D-AATs and L-ATTs have strong similarity.. +PF00266 aminotran_5;
Aminotransferase class-V. This domain is found in amino transferases, and other enzymes including cysteine desulphurase EC:4.4.1.-.. +PF02293 AmiS/UreI family transporter
This family includes UreI and proton gated urea channel as well as putative amide transporters .. +PF02461 Ammonia monooxygenase
Pfam-B_2301 (release 5.4). Ammonia monooxygenase plays a key role in the nitrogen cycle and degrades a wide range of hydrocarbons and halogenated hydrocarbons.. +PF00909 Ammonium Transporter Family
Pfam-B_596 (release 3.0). +PF05145 Putative ammonia monooxygenase
This family are annotated by COGS as putative ammonia monooxygenase enzymes.. +PF04896 Ammonia monooxygenase/methane monooxygenase, subunit C
Pfam-B_6611 (release 7.6). Ammonia monooxygenase plays a key role in the nitrogen cycle and degrades a wide range of hydrocarbons and halogenated hydrocarbons. This family represents the AmoC subunit. It also includes the particulate methane monooxygenase subunit PmoC from methanotrophic bacteria [1,2].. +PF03782 AMOP domain
This domain may have a role in cell adhesion. It is called the AMOP domain after Adhesion associated domain in MUC4 and Other Proteins. This domain is extracellular and contains a number of cysteines that probably form disulphide bridges .. +PF04739 5'-AMP-activated protein kinase beta subunit, interation domain
This region is found in the beta subunit of the 5'-AMP-activated protein kinase complex, and its yeast homologues Sip1, Sip2 and Gal83, which are found in the SNF1 kinase complex . This region is sufficient for interaction of this subunit with the kinase complex, but is not solely responsible for the interaction, and the interaction partner is not known . The isoamylase N-terminal domain (Pfam:PF02922) is sometimes found in proteins belonging to this family.. +PF02166 Androgen receptor
+PF03139 Vanadium/alternative nitrogenase delta subunit
Pfam-B_1227 (release 6.5). The nitrogenase complex EC:1.18.6.1 catalyses the conversion of molecular nitrogen to ammonia (nitrogen fixation) as follows: 8 reduced ferredoxin + 8 H(+) + N(2) + 16 ATP <=> 8 oxidised ferredoxin + 2 NH(3) + 16 ADP + 16 phosphate. The complex is hexameric, consisting of 2 alpha, 2 beta, and 2 delta subunits. This family represents the delta subunit of a group of nitrogenases that do not utilise molybdenum (Mo) as a cofactor, but instead use either vanadium (V nitrogenases), or iron (alternative nitrogenases). V nitrogenases are encoded by vnf operons, and alternative nitrogenases by anf operons. The delta subunits are VnfG and AnfG, respectively.. +PF00212 Atrial natriuretic peptide
+PF03452 Anp1
Pfam-B_4441 (release 6.6). The members of this family (Anp1, Van1 and Mnn9) are membrane proteins required for proper Golgi function. These proteins co-localise within the cis Golgi, and that they are physically associated in two distinct complexes .. +PF03374 Phage antirepressor protein KilAC domain
Pfam-B_3485 (release 6.6). This domain was called the KilAC domain by Iyer and colleagues .. +PF04715 Anthranilate synthase component I, N terminal region
Anthranilate synthase (EC:4.1.3.27) catalyses the first step in the biosynthesis of tryptophan. Component I catalyses the formation of anthranilate using ammonia and chorismate. The catalytic site lies in the adjacent region, described in the chorismate binding enzyme family (Pfam:PF00425). This region is involved in feedback inhibition by tryptophan . This family also contains a region of Para-aminobenzoate synthase component I (EC 4.1.3.-).. +PF03497 Anthrax toxin LF subunit
+PF02522 Aminoglycoside 3-N-acetyltransferase
Pfam-B_1432 (release 5.4). This family consists of bacterial aminoglycoside 3-N-acetyltransferases EC:2.3.1.81, these catalyse the reaction: Acetyl-Co + a 2-deoxystreptamine antibiotic <=> CoA + N3'-acetyl-2-deoxystreptamine antibiotic.\. The enzyme can use a range of antibiotics with 2-deoxystreptamine rings as acceptor for its acetyltransferase activity, this inactivates and confers resistance to gentamicin, kanamycin, tobramycin, neomycin and apramycin amongst others.. +PF03230 Antirestriction protein
Pfam-B_3190 (release 6.5). This family includes various protein that are involved in antirestriction. The ArdB protein Swiss:Q47057 efficiently inhibits restriction by members of the three known families of type I systems of E. coli .. +PF03589 Antitermination protein
+PF01786 Alternative oxidase
Pfam-B_1154 (release 4.2). The alternative oxidase is used as a second terminal oxidase in the mitochondria, electrons are transfered directly from reduced ubiquinol to oxygen forming water . This is not coupled to ATP synthesis and is not inhibited by cyanide, this pathway is a single step process . In rice the transcript levels of the alternative oxidase are increased by low temperature .. +PF01261 AP_endonulease2;
Xylose isomerase-like TIM barrel. Prosite & Pfam-B_3980 (Release 7.5). This TIM alpha/beta barrel structure is found in xylose isomerase (Swiss:P19148) and in endonuclease IV (Swiss:P12638, EC:3.1.21.2). This domain is also found in the N termini of bacterial myo-inositol catabolism proteins. These are involved in the myo-inositol catabolism pathway, and is required for growth on myo-inositol in Rhizobium leguminosarum bv. viciae .. +PF01636 Phosphotransferase enzyme family
Pfam-B_840 (release 4.1). This family consists of bacterial antibiotic resistance proteins, which confer resistance to various aminoglycosides they include: aminoglycoside 3'-phosphotransferase or kanamycin kinase / neomycin-kanamycin phosphotransferase and streptomycin 3''-kinase or streptomycin 3''-phosphotransferase. The aminoglycoside phosphotransferases inactivate aminoglycoside antibiotics via phosphorylation . This family also includes homoserine kinase. This family is related to fructosamine kinase Pfam:PF03881.. +PF02558 Ketopantoate reductase PanE/ApbA
This is a family of 2-dehydropantoate 2-reductases also known as ketopantoate reductases, EC:1.1.1.169. The reaction catalysed by this enzyme is: (R)-pantoate + NADP(+) <=> 2-dehydropantoate + NADPH. AbpA catalyses the NADPH reduction of ketopantoic acid to pantoic acid in the alternative pyrimidine biosynthetic (APB) pathway . ApbA and PanE are allelic . ApbA, the ketopantoate reductase enzyme is required for the synthesis of thiamine via the APB biosynthetic pathway .. +PF03256 Anaphase-promoting complex, subunit 10 (APC10)
Pfam-B_4273 (release 6.5). +PF04110 Ubiquitin-like autophagy protein Apg12
Pfam-B_9471 (release 7.3);. In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells . The Apg12 system is one of the ubiquitin-like protein conjugation systems conserved in eukaryotes. It was first discovered in yeast during systematic analyses of the apg mutants defective in autophagy. Covalent attachment of Apg12-Apg5 is essential for autophagy [2,3,4].. +PF04108 Autophagy protein Apg17
Pfam-B_71163 (release 7.3);. Apg17 is required for activating Apg1 protein kinases .. +PF04111 Autophagy protein Apg6
Pfam-B_11747 (release 7.3);. In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells . Apg6/Vps30p has two distinct functions in the autophagic process, either associated with the membrane or in a retrieval step of the carboxypeptidase Y sorting pathway .. +PF04109 Autophagy protein Apg9
Pfam-B_12479 (release 7.3);. In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells . Apg9 plays a direct role in the formation of the cytoplasm to vacuole targeting and autophagic vesicles, possibly serving as a marker for a specialised compartment essential for these vesicle-mediated alternative targeting pathways .. +PF04655 Aminoglycoside/hydroxyurea antibiotic resistance kinase
Pfam-B_4369 (release 7.5). The aminoglycoside phosphotransferases achieve inactivation of their antibiotic substrates by phosphorylation utilising ATP. Likewise hydroxyurea is inactivated by phosphorylation of the hydroxy group in the hydroxylamine moiety [1,2,3].. +PF00807 Apidaecin
Pfam-B_1489 (release 2.1). These antibacterial peptides are found in bees. These heat-stable, non-helical peptides are active against a wide range of plant-associated bacteria and some human pathogens . The Pfam alignment includes the propeptide and apidaecin sequence.. +PF04711 Apolipoprotein A-II (ApoA-II)
Apolipoprotein A-II (ApoA-II) is the second major apolipoprotein of high density lipoprotein in human plasma.\. Mature ApoA-II is present as a dimer of two 77-amino acid chains joined by a disulphide bridge . ApoA-II regulates many steps in HDL metabolism, and its role in coronary heart disease is unclear . In bovine serum, the ApoA-II homologue is present in almost free form.\. Bovine ApoA-II shows antimicrobial activity against Escherichia coli and yeasts in phosphate buffered saline (PBS) .. +PF04691 Apolipoprotein C-I (ApoC-1)
Apolipoprotein C-I (ApoC-1) is a water-soluble protein component of plasma lipoprotein. It solubalises lipids and regulates lipid metabolism. ApoC-1 transfers among HDL (high density lipoprotein), VLDL (very low-density lipoprotein) and chylomicrons. ApoC-1 activates lecithin:choline acetyltransferase (LCAT), inhibits cholesteryl ester transfer protein, can inhibit hepatic lipase and phospholipase 2 and can stimulate cell growth. ApoC-1 delays the clearance of beta-VLDL by inhibiting its uptake via the LDL receptor-related pathway . ApoC-1 has been implicated in hypertriglyceridemia , and Alzheimer's disease .. +PF01333 Apocytochrome_F;
Apocytochrome F, C-terminal. Pfam-B_1294 (release 3.0). This is a sub-family of cytochrome C. See Pfam:PF00034.. +PF01442 Apolipoprotein A1/A4/E domain
Prodom_1521 (release 99.1). These proteins contain several 22 residue repeats which form a pair of alpha helices. This family includes: Swiss:P02647 Apolipoprotein A-I. Swiss:P06727 Apolipoprotein A-IV. Swiss:P02649 Apolipoprotein E.. +PF01583 Adenylylsulphate kinase
Pfam-B_578 (release 4.1). Enzyme that catalyses the phosphorylation of adenylylsulphate to 3'-phosphoadenylylsulfate. This domain contains an ATP binding P-loop motif.. +PF03440 Aerolysin/Pertussis toxin (APT) domain
This family represents the N-terminal domain of aerolysin and pertussis toxin and has a type-C lectin like fold.. +PF02610 L-arabinose isomerase
This is a family of L-arabinose isomerases, AraA, EC:5.3.1.4. These enzymes catalyse the reaction: L-arabinose <=> L-ribulose. This reaction is the first step in the pathway of L-arabinose utilisation as a carbon source after entering the cell L-arabinose is converted into L-ribulose by the L-arabinose isomerases enzyme .. +PF02311 AraC-like ligand binding domain
Pfam-B_12588 (release 5.2). This family represents the arabinose-binding and dimerisation domain of the bacterial gene regulatory protein AraC. The domain is found in conjunction with the helix-turn-helix (HTH) DNA-binding motif Pfam:PF00165. This domain is distantly related to the Cupin domain Pfam:PF00190.. +PF03869 Arc-like DNA binding domain
Arc repressor act by he cooperative binding of two Arc repressor dimers to a 21-base-pair operator site. Each Arc dimer uses an antiparallel beta-sheet to recognise bases in the major groove .. +PF04659 Archaeal flagella protein
Pfam-B_4437 (release 7.5). Family of archaeal flaD and flaE proteins. Conserved region found at N-terminus of flaE but towards the C-terminus of flaD . . +PF01917 Archaebacterial flagellin
Enright A & COG3354 & COG3353. Members of this family are the proteins that form the flagella in archaebacteria.. +PF01637 Archaeal_ATPase;
Pfam-B_1507 (release 4.1). This family contain a conserved P-loop motif that is involved in binding ATP. This family is almost exclusively found in archaebacteria and particularly in Methanococcus jannaschii that encodes sixteen members of this family.. +PF00798 Arenavirus glycoprotein
Pfam-B_1047 (release 2.1). +PF00843 Arenavirus nucleocapsid protein
Pfam-B_1333 (release 2.1). +PF00025 arf;
ADP-ribosylation factor family. Pfam combines a number of different Prosite families together. +PF01316 Arginine repressor, DNA binding domain
+PF02863 Arginine repressor, C-terminal domain
+PF00491 arginase;
+PF00764 Arginosuccinate synthase
Pfam-B_888 (release 2.1). This family contains a PP-loop motif .. +PF01960 ArgJ family
Members of the ArgJ family catalyse the first EC:2.3.1.1 and fifth steps EC:2.3.1.35 in arginine biosynthesis. . +PF03308 ArgK protein
Pfam-B_3540 (release 6.5). The ArgK protein acts as an ATPase enzyme and as a kinase, and phosphorylates periplasmic binding proteins involved in the LAO (lysine, arginine, ornithine)/AO transport systems.. +PF02374 Anion-transporting ATPase
Pfam-B_1201 (release 5.2). This Pfam family represents a conserved domain, which is sometimes repeated, in an anion-transporting ATPase. The ATPase is involved in the removal of arsenate, antimonite, and arsenate from the cell. . +PF02040 Arsenical pump membrane protein
+PF03960 ArsC family
This family is related to glutaredoxins Pfam:PF00462.. +PF01129 NAD:arginine ADP-ribosyltransferase
+PF02497 Art_glycop;
Arterivirus glycoprotein. Pfam-B_787 (release 5.4). This is a family of structural glycoproteins from arterivirus that corresponds to open reading frame 4 (ORF4) of the virus.. +PF01481 Arte_nucleocap;
Arterivirus nucleocapsid protein. Prodom_2418 (release 99.1). +PF01606 Arterivirus envelope protein
Pfam-B_664 (release 4.1). This family consists of viral envelope proteins from the arterivirus genus; this includes porcine reproductive and respiratory virus (PRRSV) envelope protein GP3 and lactate dehydrogenase elevating virus (LDV) structural glycoprotein. Arteriviruses consists of positive ssRNA and do not have a DNA stage.. +PF00951 Arteri_glycop;
Arterivirus GL envelope glycoprotein. Pfam-B_425 (release 3.0). Arteriviruses encode 4 envelope proteins, Gl, Gs, M and N. Gl envelope protein, is encoded in ORF5, and is 30- 45 kDa in size. Gl is heterogenously glycosylated with N-acetyllactosamine in a cell-type-specific manner. The Gl glycoprotein expresses the neutralisation determinants.. +PF01097 Defensin; Arthro_defensin;
+PF04161 Arv1-like family
Pfam-B_9351 (release 7.3);. Arv1 is a transmembrane protein with potential zinc-binding motifs. ARV1 is a novel mediator of eukaryotic sterol homeostasis .. +PF01731 Arylesterase
Pfam-B_2101 (release 4.1). This family consists of arylesterases (Also known as serum paraoxonase) EC:3.1.1.2. These enzymes hydrolyse organophosphorus esters such as paraoxon and are found in the liver and blood. They confer resistance to organophosphate toxicity . Human arylesterase (PON1) Swiss:P27169 is associated with HDL and may protect against LDL oxidation .. +PF00858 Amiloride-sensitive sodium channel
Pfam-B_415 (release 3.0). +PF01671 African swine fever virus multigene family 360 protein
Pfam-B_1174 (release 4.1). The multigene family 360 protein are found within the African swine fever virus (ASF) genome which consist of dsDNA and has similar structural features to the poxyviruses . The biological function of this family is not known . Although Swiss:Q65137 is a major structural protein .. +PF05170 AsmA family
The AsmA gene, whose product is involved in the assembly of outer membrane proteins in Escherichia coli . AsmA mutations were isolated as extragenic suppressors of an OmpF assembly mutant . AsmA may have a role in LPS biogenesis .. +PF00733 Asparagine synthase
Pfam-B_443 (release 2.1). This family is always found associated with Pfam:PF00310. Members of this family catalyse the conversion of aspartate to asparagine.. +PF03590 Aspartate-ammonia ligase
TIGRFAMs, Griffiths-Jones SR. +PF00026 asp;
Eukaryotic aspartyl protease. Aspartyl (acid) proteases include pepsins, cathepsins, and renins. Two-domain structure, probably arising from ancestral duplication. This family does not include the retroviral nor retrotransposon proteases (Pfam:PF00077), which are much smaller and appear to be homologous to a single domain of the eukaryotic asp proteases.. +PF02261 Aspartate decarboxylase
Pfam-B_3879 (release 5.2). Decarboxylation of aspartate is the major route of beta-alanine production in bacteria, and is catalysed by the enzyme aspartate decarboxylase EC:4.1.1.11 which requires a pyruvoyl group for its activity. It is synthesised initially as a proenzyme which is then proteolytically cleaved to an alpha (C-terminal) and beta (N-terminal) subunit and a pyruvoyl group. This family contains both chains of aspartate decarboxylase.. +PF00710 Asparaginase
Pfam-B_652 (release 2.1). +PF01112 Asparaginase
+PF04958 Arginine N-succinyltransferase beta subunit
Arginine N-succinyltransferase EC:2.3.1.109 catalyses the transfer of succinyl-CoA to arginine to produce succinylarginine. This is the first step in arginine catabolism by the arginine succinyltransferase pathway.. +PF04996 Succinylarginine dihydrolase
This enzyme transforms N(2)-succinylglutamate into succinate and glutamate. This is the fifth and last step in arginine catabolism by the arginine succinyltransferase pathway.. +PF04952 Aste_AspA;
Succinylglutamate desuccinylase / Aspartoacylase family. COG2988 & Pfam-B_15640 (release 10.0). +PF03115 Astrovirus capsid protein precursor
Pfam-B_2957 (release 6.5). This product is encoded by astrovirus ORF2, one of the three astrovirus ORFs (1a, 1b, 2). The 87kD precursor protein undergoes an intracellular cleavage to form a 79kD protein. Subsequently, extracellular trypsin cleavage yields the three proteins forming the infectious virion .. +PF04377 Arginine-tRNA-protein transferase, C terminus
This family represents the C terminal region of the enzyme arginine-tRNA-protein transferase (EC 2.3.2.8), which catalyses the post-translational conjugation of arginine to the N terminus of a protein. In eukaryotes, this functions as part of the N-end rule pathway of protein degradation by conjugating a destabilising amino acid to the amino terminal aspartate or glutamate of a protein, targeting the protein for ubiquitin-dependent proteolysis. N terminal cysteine is sometimes modified .. +PF04376 Arginine-tRNA-protein transferase, N terminus
This family represents the N terminal region of the enzyme arginine-tRNA-protein transferase (EC 2.3.2.8), which catalyses the post-translational conjugation of arginine to the N terminus of a protein. In eukaryotes, this functions as part of the N-end rule pathway of protein degradation by conjugating a de-stabilising amino acid to the amino terminal aspartate or glutamate of a protein, targeting the protein for ubiquitin-dependent proteolysis. N terminal cysteine is sometimes modified . In S cerevisiae, Cys20, 23, 94 and/or 95 are thought to be important for activity . Of these, only Cys 94 appears to be completely conserved in this family.. +PF03078 ATHILA ORF-1 family
Pfam-B_2240 (release 6.4). ATHILA is a group of Arabidopsis thaliana retrotransposons belonging to the Ty3/gypsy family of the long terminal repeat (LTR) class of eukaryotic retrotransposons[2,3]. The central region of ATHILA retrotransposons contains two or three open reading frames (ORFs). This family represents the ORF1 product. The function of ORF1 is unknown.. +PF03477 ATP cone domain
+PF02222 ATP-grasp domain
Pfam-B_157 (release 5.2). This family does not contain all known ATP-grasp domain members. This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity.. +PF00217 ATP:guanido phosphotransferase, C-terminal catalytic domain
The substrate binding site is located in the cleft between N and C-terminal domains, but most of the catalytic residues are found in the larger C-terminal domain.. +PF02807 ATP:guanido phosphotransferase, N-terminal domain
The N-terminal domain has an all-alpha fold.. +PF01747 ATP-sulfurylase
Pfam-B_494 (release 4.2). This domain is the catalytic domain of ATP-sulfurylase or sulfate adenylyltransferase EC:2.7.7.4 some of which are part of a bifunctional polypeptide chain associated with adenosyl phosphosulphate (APS) kinase Pfam:PF01583. Both enzymes are required for PAPS (phosphoadenosine-phosphosulfate) synthesis from inorganic sulphate . ATP sulfurylase catalyses the synthesis of adenosine-phosphosulfate APS from ATP and inorganic sulphate .. +PF00231 ATP synthase
+PF05176 ATP10 protein
ATP 10 is essential for the assembly of a functional mitochondrial ATPase complex .. +PF00895 ATP synthase protein 8
Pfam-B_446 (release 3.0). +PF00119 ATP synthase A chain
+PF00006 ATP synthase alpha/beta family, nucleotide-binding domain
This family includes the ATP synthase alpha and beta subunits, the ATP synthase associated with flagella and the termination factor Rho.. +PF00430 ATP synthase B/B' CF(0)
Pfam-B_137 (release 1.0). Part of the CF(0) (base unit) of the ATP synthase. The base unit is thought to translocate protons through membrane (inner membrane in mitochondria, thylakoid membrane in plants, cytoplasmic membrane in bacteria). The B subunits are thought to interact with the stalk of the CF(1) subunits. This domain should not be confused with the ab CF(1) proteins (in the head of the ATP synthase) which are found in Pfam:PF00006. +PF00137 ATP synthase subunit C
+PF01813 ATP synthase subunit D
Pfam-B_1304 (release 4.2). This is a family of subunit D form various ATP synthases including V-type H+ transporting and Na+ dependent. Subunit D is suggested to be an integral part of the catalytic sector of the V-ATPase .. +PF00401 ATP synthase, Delta/Epsilon chain, long alpha-helix domain
Pfam-B_114 (release 1.0). Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. This subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (Pfam:PF00213).. +PF02823 ATP synthase, Delta/Epsilon chain, beta-sandwich domain
Pfam-B_114 (release 1.0). Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. The subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (Pfam:PF00213).. +PF04627 ATP-synt_E;
Mitochondrial ATP synthase epsilon chain. This family constitutes the mitochondrial ATP synthase epsilon subunit. This is not to be confused with the bacterial epsilon subunit, which is homologous to the mitochondrial delta subunit (Pfam:PF00401 and Pfam:PF02823) The epsilon subunit is located in the extrinsic membrane section F1, which is the catalytic site of ATP synthesis. The epsilon subunit was not well ordered in the crystal structure of bovine F1 , but it is known to be located in the stalk region of F1 . E subunit is thought to be involved in the regulation of ATP synthase, since a null mutation increased oligomycin sensitivity and decreased inhibition by inhibitor protein IF1 .. +PF01990 ATP synthase (F/14-kDa) subunit
This family includes 14-kDa subunit from vATPases , which is in the peripheral catalytic part of the complex . The family also includes archaebacterial ATP synthase subunit F .. +PF04911 ATP synthase j chain
+PF02038 ATP1G1/PLM/MAT8 family
+PF03899 ATP synthase I chain
+PF03154 Atrophin-1 family
Pfam-B_3427 (release 6.5). Atrophin-1 is the protein product of the dentatorubral-pallidoluysian atrophy (DRPLA) gene. DRPLA OMIM:125370 is a progressive neurodegenerative disorder. It is caused by the expansion of a CAG repeat in the DRPLA gene on chromosome 12p. This results in an extended polyglutamine region in atrophin-1, that is thought to confer toxicity to the protein, possibly through altering its interactions with other proteins [1,2]. The expansion of a CAG repeat is also the underlying defect in six other neurodegenerative disorders, including Huntington's disease. One interaction of expanded polyglutamine repeats that is thought to be pathogenic is that with the short glutamine repeat in the transcriptional coactivator CREB binding protein, CBP. This interaction draws CBP away from its usual nuclear location to the expanded polyglutamine repeat protein aggregates that are characteristic of the polyglutamine neurodegenerative disorders. This interferes with CBP-mediated transcription and causes cytotoxicity .. +PF03769 Attacin, C-terminal region
Pfam-B_2791 (release 7.0). This family includes attacin, sarcotoxin and diptericin. All members of this family are insect antibacterial proteins which are induced by the fat body and subsequently released into secreted into the hemolymph where they act synergistically to kill the invading microorganism .. +PF03472 Autoinducer binding domain
This domain is found a a large family of transcriptional regulators. This domain specifically binds to autoinducer molecules.. +PF00765 Autoinducer synthetase
Pfam-B_881 (release 2.1). +PF03987 Autophagy_C;
Autophagocytosis associated protein, active-site domain . Pfam-B_10019 (release 7.3). Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the vacuole. The cysteine residue within the HPC motif is the putative active-site residue for recognition of the Apg5 subunit of the autophagosome complex .. +PF03986 Autophagocytosis associated protein (Atg3), N-terminal domain
Pfam-B_10019 (release 7.3). +PF02309 AUX/IAA family
Pfam-B_801 (release 5.2). Transcription of the AUX/IAA family of genes is rapidly induced by the plant hormone auxin. Some members of this family are longer and contain an N terminal DNA binding domain, such as Swiss:O64965. The function of this region is uncertain.. +PF02041 Auxin binding protein
+PF02519 Auxin responsive protein
Pfam-B_1263 (release 5.4). This family consists of the protein products of the ARG7 auxin responsive genes family none of which have any identified functional role. . +PF03708 Avian retrovirus envelope protein, gp85
Pfam-B_3651 (release 7.0). Family of a vain specific viral glycoproteins that forms a receptor-binding gp85 polypeptide that is linked through disulfide to a membrane-spanning gp37 spike. Gp85 confers a high degree of subgroup specificity for interaction with distinct cell receptors .. +PF01382 Avidin family
+PF03377 Xanthomonas avirulence protein, Avr/PthA
Pfam-B_3936 (release 6.6). +PF03591 AzlC protein
TIGRFAMs, Griffiths-Jones SR. +PF02216 B domain
Pfam-B_1782 (release 5.2). This family contains the B domain of Staphylococcal protein A, which specifically binds to the Fc portion of immunoglobulin G.. +PF04182 B-block binding subunit of TFIIIC
Pfam-B_68239 (release 7.3);. Yeast transcription factor IIIC (TFIIIC) is a multi-subunit protein complex that interacts with two control elements of class III promoters called the A and B blocks. This family represents the subunit within TFIIIC involved in B-block binding .. +PF02246 Protein L b1 domain
Pfam-B_3438 (release 5.2). Protein L is a bacterial protein with immunoglobulin (Ig) light chain-binding properties. It contains a number of homologous b1 repeats towards the N-terminus. These repeats have been found to be responsible for the interaction of protein L with Ig light chains .. +PF03483 S3_4;
Pfam-B_1005 (release 7.0). This domain is found in tRNA synthetase beta subunits as well as in some non tRNA synthetase proteins.. +PF03484 tRNA synthetase B5 domain
Pfam-B_1005 (release 7.0). This domain is found in phenylalanine-tRNA synthetase beta subunits.. +PF01603 Protein phosphatase 2A regulatory B subunit (B56 family)
Pfam-B_984 (release 4.1). Protein phosphatase 2A (PP2A) is a major intracellular protein phosphatase that regulates multiple aspects of cell growth and metabolism. The ability of this widely distributed heterotrimeric enzyme to act on a diverse array of substrates is largely controlled by the nature of its regulatory B subunit. There are multiple families of B subunits (See also Pfam:PF01240), this family is called the B56 family .. +PF02043 Bacteriochlorophyll C binding protein
+PF00216 Bacterial DNA-binding protein
+PF00308 bac_dnaA;
Bacterial dnaA protein. +PF01311 Bacterial export proteins, family 1
Pfam-B_1442 (release 3.0). This family includes the following members; FliR, MopE, SsaT, YopT, Hrp, HrcT and SpaR All of these members export proteins, that do not possess signal peptides, through the membrane. Although the proteins that these exporters move may be different, the exporters are thought to function in similar ways .. +PF01312 FlhB HrpN YscU SpaS Family
Pfam-B_1200 (release 3.0). This family includes the following members: FlhB, HrpN, YscU, SpaS, HrcU SsaU and YopU. All of these proteins export peptides using the type III secretion system. The peptides exported are quite diverse.. +PF05088 Bacterial NAD-glutamate dehydrogenase
Pfam-B_6291 (release 7.7). This family consists of several bacterial proteins which are closely related to NAD-glutamate dehydrogenase found in Streptomyces clavuligerus. Glutamate dehydrogenases (GDHs) are a broadly distributed group of enzymes that catalyse the reversible oxidative deamination of glutamate to ketoglutarate and ammonia .. +PF01152 Globin;
Bacterial-like globin. This family of heme binding proteins are found mainly in bacteria. However they can also be found in some protozoa and plants as well.. +PF00296 bac_luciferase;
Luciferase-like monooxygenase. +PF01036 Bacteriorhodopsin-like protein
Pfam-B_1412 (release 3.0). The bacterial opsins are retinal-binding proteins that provide light- dependent ion transport and sensory functions to a family of halophilic bacteria [2,3]. They are integral membrane proteins believed to contain seven transmembrane (TM) domains, the last of which contains the attachment point for retinal (a conserved lysine). This family also includes distantly related proteins that do not contain the retinal binding lysine and so cannot function as opsins. Some fungal examples are: Swiss:O74870, Swiss:P25619, Swiss:P38079, Swiss:Q12117.. +PF01103 Bac_suface_Ag;
Pfam-B_1201 (release 3.0). This entry includes the following surface antigens; D15 antigen from H.influenzae, OMA87 from P.multocida, OMP85 from N.meningitidis and N.gonorrhoeae. The family also includes a number of eukaryotic proteins that are members of the UPF0140 family. There also appears to be a relationship to Pfam:PF03865 (personal obs: C Yeats). In eukaryotes, it appears that these proteins are not surface antigens; S. cerevisiae YNL026W (SAM50, Swiss:P53969) is an essential component of the Sorting and Assembly Machinery (SAM) of the mitochondrial outer membrane. The protein was localised to the mitochondria .. +PF01338 Bacillus thuringiensis toxin
+PF01654 Bacterial Cytochrome Ubiquinol Oxidase
Pfam-B_1148 (release 4.1). This family are the alternative oxidases found in many bacteria which oxidise ubiquinol and reduce oxygen as part of the electron transport chain. This family is the subunit I of the oxidase E. coli has two copies of the oxidase, bo and bd', both of which are represented here In some nitrogen fixing bacteria, e.g. Klebsiella pneumoniae this oxidase is responsible for removing oxygen in microaerobic conditions, making the oxidase required for nitrogen fixation. This subunit binds a single b-haem, through ligands at His186 and Met393 (using SW:P11026 numbering). In addition His19 is a ligand for the haem b found in subunit II. +PF00936 Bact_microcomp; Bac_microcomp;
Pfam-B_1071 (release 3.0). Bacterial microcompartments are primitive organelles composed entirely of protein subunits. The prototypical bacterial microcompartment is the carboxysome, a protein shell for sequestering carbon fixation reactions. These proteins for hexameric structure .. +PF02397 Bact_transf;
Bacterial sugar transferase. Pfam-B_1538 (release 5.4). This Pfam family represents a conserved region from a number of different bacterial sugar transferases, involved in diverse biosynthesis pathways.. +PF01721 Class II bacteriocin
Pfam-B_1954 (release 4.1). The bacteriocins are small peptides that inhibit the growth of various bacteria. Bacteriocins of lactic acid bacteria may inhibit their target cells by permeabilising the cell membrane .. +PF04798 Baculovirus 19 kDa protein conserved region
Pfam-B_6291 (release 7.5). Family of Baculovirus proteins of approximate mass 19 kDa. . +PF04631 Baculovirus hypothetical protein
Pfam-B_5343 (release 7.5). This family includes several hypothetical baculoviral proteins, with predicted molecular weights of approximately 44 kD.. +PF04786 ssDNA binding protein
Pfam-B_6251 (release 7.5). Family of Baculovirus ssDNA binding proteins.. +PF04639 Baculoviral E56 protein, specific to ODV envelope
Pfam-B_5446 (release 7.5). This family represents the E56 protein, which is localises to the occlusion derived virus (ODV) envelope, but not to the budded virus (BV) envelope .. +PF04850 Baculovirus E66 occlusion-derived virus envelope protein
Pfam-B_4624 (release 7.6). +PF03258 Baculovirus FP protein
Pfam-B_4275 (release 6.5). The FP protein is missing in baculovirus (Few Polyhedra) mutants .. +PF04700 Structural glycoprotein p40/gp41 conserved region
Pfam-B_4278 (release 7.5). Family of viral structural glycoproteins .. +PF03273 Baculovirus gp64 envelope glycoprotein family
Pfam-B_4223 (release 6.5). This family includes the gp64 glycoprotein from baculovirus as well as other viruses e.g. Swiss:P28970.. +PF04735 Baculovirus DNA helicase
Pfam-B_3393 (release 7.5). +PF04838 Baculoviridae late expression factor 5
Pfam-B_5141 (release 7.6). +PF05073 Baculovirus P24 capsid protein
Pfam-B_6005 (release 7.7). Baculovirus P24 is associated with nucleocapsids of budded and polyhedra-derived virions [1,2].. +PF04766 Nucleopolyhedrovirus p26 protein
Pfam-B_6066 (release 7.5). Family of Baculovirus p26 proteins. . +PF05214 Baculo_P33;
Pfam-B_6583 (release 7.7). This family consists of a series of Baculovirus P33 protein homologues of unknown function.. +PF02961 Barrier to autointegration factor
The BAF protein has a SAM-domain-like bundle of orthogonally packed alpha-hairpins - one classic and one pseudo helix-hairpin-helix motif. The protein is involved in the prevention of retroviral DNA integration.. +PF05112 Baculo_P47;
Baculovirus P47 protein. Pfam-B_6441 (release 7.7). This family consists of several Baculovirus P47 proteins which is one of the primary components of Baculovirus encoded RNA polymerase, which initiates transcription from late and very late promoters .. +PF04878 Baculo_P48;
Baculovirus P48 protein. Pfam-B_6510 (release 7.6). +PF04583 Baculoviridae p74 conserved region
Pfam-B_4744 (release 7.5). Baculoviruses are distinct from other virus families in that there are two viral phenotypes: budded virus (BV) and occlusion-derived virus (ODV). BVs disseminate viral infection throughout the tissues of the host and ODVs transmit baculovirus between insect hosts. GFP tagging experiments implicate p74 as an ODV envelope protein [1,2].. +PF04513 Baculovirus polyhedron envelope protein, PEP, C terminus
Polyhedra are large crystalline occlusion bodies containing nucleopolyhedrovirus virions, and surrounded by an electron-dense structure called the polyhedron envelope or polyhedron calyx. The polyhedron envelope (associated) protein PEP is thought to be an integral part of the polyhedron envelope. PEP is concentrated at the surface of polyhedra, and is thought to be important for the proper formation of the periphery of polyhedra. It is thought that PEP may stabilise polyhedra and protect them from fusion or aggregation .. +PF04512 Baculovirus polyhedron envelope protein, PEP, N terminus
Polyhedra are large crystalline occlusion bodies containing nucleopolyhedrovirus virions, and surrounded by an electron-dense structure called the polyhedron envelope or polyhedron calyx. The polyhedron envelope (associated) protein PEP is thought to be an integral part of the polyhedron envelope. PEP is concentrated at the surface of polyhedra, and is thought to be important for the proper formation of the periphery of polyhedra. It is thought that PEP may stabilise polyhedra and protect them from fusion or aggregation .. +PF04501 Baculovirus major capsid protein VP39
This family constitutes the 39 kDa major capsid protein of the Baculoviridae .. +PF04913 Baculovirus Y142 protein
Pfam-B_6688 (release 7.6). +PF04684 BAF1 / ABF1 chromatin reorganising factor
ABF1 is a sequence-specific DNA binding protein involved in transcription activation, gene silencing and initiation of DNA replication. ABF1 is known to remodel chromatin, and it is proposed that it mediates its effects on transcription and gene expression by modifying local chromatin architecture . These functions require a conserved stretch of 20 amino acids in the C-terminal region of ABF1 (amino acids 639 to 662 S. cerevisiae (Swiss:P14164)) . The N-terminal two thirds of the protein are necessary for DNA binding, and the N-terminus (amino acids 9 to 91 in S. cerevisiae) is thought to contain a novel zinc-finger motif which may stabilise the protein structure .. +PF02179 BAG domain
Alignment kindly provided by SMART. Domain present in Hsp70 regulators.. +PF02923 Restriction endonuclease BamHI
+PF00373 Band_41;
This domain is the central structural domain of the FERM domain.. +PF03114 BAR domain
BAR domains are dimerisation, lipid binding and curvature sensing modules found in many different protein families. A BAR domain with an additional N-terminal amphipathic helix (an N-BAR) can drive membrane curvature. These N-BAR domains are found in amphiphysin, endophilin, BRAP and Nadrin. BAR domains are also frequently found alongside domains that determine lipid specificity, like Pfam:PF00169 and Pfam:PF00787 domains in beta centaurins and sorting nexins respectively.. +PF01337 Barstar (barnase inhibitor)
+PF00967 Barwin family
+PF04865 Baseplate J-like protein
Pfam-B_4777 (release 7.6). The P2 bacteriophage J protein lies at the edge of the baseplate. This family also includes a number of bacterial homologues, which are thought to have been horizontally transferred.. +PF01586 Myogenic Basic domain
Pfam-B_427 (release 4.1). This basic domain is found in the MyoD family of muscle specific proteins that control muscle development. The bHLH region of the MyoD family includes the basic domain and the Helix-loop-helix (HLH) motif. The bHLH region mediates specific DNA binding . With 12 residues of the basic domain involved in DNA binding . The basic domain forms an extended alpha helix in the structure.. +PF02028 BCCT family transporter
+PF02327 Bacteriochlorophyll A protein
Pfam-B_38317 (release 5.2). Bacteriochlorophyll A protein is involved in the energy transfer system of green photosynthetic bacteria. The protein forms a homotrimer, with each monomer unit containing seven molecules of bacteriochlorophyll A.. +PF00452 Apoptosis regulator proteins, Bcl-2 family
+PF04538 Brain expressed X-linked like family
Pfam-B_3086 (release 7.5). This is a family of transcription elongation factors which includes those referred to as Bex proteins as well as those named TCEAL7. Bex1 was shown to be a novel link between neurotrophin signalling, the cell cycle, and neuronal differentiation, suggesting it might function by coordinating internal cellular states with the ability of cells to respond to external signals . TCEAL7 has been shown negatively to regulate the NF-kappaB pathway, hence being important in ovarian cancer as it one of the genes frequently downregulated in this cancer. A closely related protein, TFIIS/TCEA, found in Pfam:PF07500 is involved in transcription elongation and transcript fidelity. TFIIS/TCEA promotes 3' endoribonuclease activity of RNA polymerase II (pol II) and allows pol II to bypass transcript pause or 'arrest' during elongation process. It is thus possible that BEX is also acting in this way .. +PF04714 BCL7, N-terminal conserver region
Pfam-B_5900 (release 7.5). Members of the BCL family have significant sequence similarity at their N-terminus, represented in this family. The function of BCL7 proteins is unknown. They may be involved in early development. In addition, BCL7B is commonly hemizygously deleted in patients with Williams syndrome .. +PF01869 BadF/BadG/BcrA/BcrD ATPase family
Enright A & Pfam-B_5854 (Release 7.5). This family includes the BadF Swiss:O07462 and BadG Swiss:O07463 proteins that are two subunits of Benzoyl-CoA reductase, that may be involved in ATP hydrolysis. The family also includes an activase subunit from the enzyme 2-hydroxyglutaryl-CoA dehydratase Swiss:P11568. The protein Swiss:O66634 contains two copies of this region suggesting that the family may structurally dimerise. This family appears to be related to Pfam:PF00370.. +PF03170 Bacterial cellulose synthase subunit
Pfam-B_3954 (release 6.5). This family includes bacterial proteins involved in cellulose synthesis. Cellulose synthesis has been identified in several bacteria. In Agrobacterium tumefaciens, for instance, cellulose has a pathogenic role: it allows the bacteria to bind tightly to their host plant cells. While several enzymatic steps are involved in cellulose synthesis, potentially the only step unique to this pathway is that catalysed by cellulose synthase. This enzyme is a multi subunit complex. This family encodes a subunit that is thought to bind the positive effector cyclic di-GMP. This subunit is found in several different bacterial cellulose synthase enzymes. The first recognised sequence for this subunit is BcsB. In the AcsII cellulose synthase, this subunit and the subunit corresponding to BcsA are found in the same protein. Indeed, this alignment only includes the C-terminal half of the AcsAII synthase (Swiss:Q59167), which corresponds to BcsB.. +PF02138 Beige/BEACH domain
+PF00407 Bet_v_I;
Pathogenesis-related protein Bet v I family. This family is named after Bet v 1, the major birch pollen allergen. This protein belongs to family 10 of plant pathogenesis-related proteins (PR-10), cytoplasmic proteins of 15-17 kd that are wide-spread among dicotyledonous plants . In recent years, a number of diverse plant proteins with low sequence similarity to Bet v 1 was identified. A classification by sequence similarity yielded several subfamilies related to PR-10 : - Pathogenesis-related proteins PR-10: These proteins were identified as major tree pollen allergens in birch and related species (hazel, alder), as plant food allergens expressed in high levels in fruits, vegetables and seeds (apple, celery, hazelnut), and as pathogenesis-related proteins whose expression is induced by pathogen infection, wounding, or abiotic stress. Hyp-1 (Swiss:Q8H1L1), an enzyme involved in the synthesis of the bioactive naphthodianthrone hypericin in St. John's wort (Hypericum perforatum) also belongs to this family. Most of these proteins were found in dicotyledonous plants. In addition, related sequences were identified in monocots and conifers. - Cytokinin-specific binding proteins: These legume proteins bind cytokinin plant hormones . - (S)-Norcoclaurine synthases are enzymes catalysing the condensation of dopamine and 4-hydroxyphenylacetaldehyde to (S)-norcoclaurine, the first committed step in the biosynthesis of benzylisoquinoline alkaloids such as morphine . -Major latex proteins and ripening-related proteins are proteins of unknown biological function that were first discovered in the latex of opium poppy (Papaver somniferum) and later found to be upregulated during ripening of fruits such as strawberry and cucumber . The occurrence of Bet v 1-related proteins is confined to seed plants with the exception of a cytokinin-binding protein from the moss Physcomitrella patens (Swiss:Q9AXI3).. +PF03494 Beta-amyloid peptide (beta-APP)
+PF00144 beta-lactamase;
Prosite and Pfam-B_106 (Release 7.5). This family appears to be distantly related to Pfam:PF00905 and PF00768 D-alanyl-D-alanine carboxypeptidase.. +PF01212 Beta-eliminating lyase
+PF02929 Beta galactosidase small chain
Pfam-B_592 (Release 6.3). This domain comprises the small chain of dimeric beta-galactosidases EC:3.2.1.23. This domain is also found in single chain beta-galactosidase.. +PF02180 Bcl-2 homology region 4
Alignment kindly provided by SMART. +PF02368 Bacterial Ig-like domain (group 2)
This family consists of bacterial domains with an Ig-like fold. Members of this family are found in bacterial and phage surface proteins such as intimins.. +PF04775 Acyl-CoA thioester hydrolase/BAAT N-terminal region
Pfam-B_2191 (release 7.6). This family consists of the amino termini of acyl-CoA thioester hydrolase and bile acid-CoA:amino acid N-acetyltransferase (BAAT) . This region is not thought to contain the active site of either enzyme. Thioesterase isoforms have been identified in peroxisomes, cytoplasm and mitochondria, where they are thought to have distinct functions in lipid metabolism . For example, in peroxisomes, the hydrolase acts on bile-CoA esters .. +PF03496 Binary_toxA;
ADP-ribosyltransferase exoenzyme. This is a family of bacterial and viral bi-glutamic acid ADP-ribosyltransferases, where, in Swiss:Q93Q17, E403 is the catalytic residue and E401 contributes to the transfer of ADP-ribose to the target protein. In clostridial species it is actin that is being ADP-ribosylated; this result is lethal and dermonecrotic in infected mammals.. +PF03495 Clostridial binary toxin B/anthrax toxin PA
The N-terminal region of this family contains a calcium-binding motif that may be an EF-hand.. +PF02084 Bindin
+PF00351 biopterin_H;
Biopterin-dependent aromatic amino acid hydroxylase. This family includes phenylalanine-4-hydroxylase, the phenylketonuria disease protein.. +PF00364 biotin_req_enzy; biotin_lipoyl;
Biotin-requiring enzyme. This family covers two Prosite entries, the conserved lysine residue binds biotin in one group and lipoic acid in the other. Note that the HMM does not currently recognise the Glycine cleavage system H proteins.. +PF03744 6-carboxyhexanoate--CoA ligase
This family contains the enzyme 6-carboxyhexanoate--CoA ligase EC:6.2.1.14. This enzyme is involved in the first step of biotin synthesis, where it converts pimelate into pimeloyl-CoA . The enzyme requires magnesium as a cofactor and forms a homodimer .. +PF02632 BioY family
A number of bacterial genes are involved in bioconversion of pimelate into dethiobiotin . BioY is a component of the BioMNY transport system involved in biotin uptake in prokaryotes .. +PF00653 Inhibitor of Apoptosis domain
BIR stands for 'Baculovirus Inhibitor of apoptosis protein Repeat'. It is found repeated in inhibitor of apoptosis proteins (IAPs), and in fact it is also known as IAP repeat. These domains characteristically have a number of invariant residues, including 3 conserved cysteines and one conserved histidine that coordinate a zinc ion. They are usually made up of 4-5 alpha helices and a three-stranded beta-sheet. BIR is also found in other proteins known as BIR-domain-containing proteins (BIRPs), such as Survivin (Swiss:O15392) .. +PF04197 Birnavirus RNA dependent RNA polymerase (VP1)
Pfam-B_2204 (release 7.3). Birnaviruses are dsRNA viruses. This family corresponds to the RNA dependent RNA polymerase. This protein is also known as VP1. All of the birnavirus VP1 proteins contain conserved RdRp motifs that reside in the catalytic "palm" domain of all classes of polymerases. However, the birnavirus RdRps lack the highly conserved Gly-Asp-Asp (GDD) sequence, a component of the proposed catalytic site of this enzyme family that exists in the conserved motif VI of the palm domain of other RdRps .. +PF01766 Birnavirus VP2 protein
Pfam-B_946 (release 4.2). VP2 is the major structural protein of birnaviruses . The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) .. +PF01767 Birnavirus VP3 protein
Pfam-B_946 (release 4.2). VP3 is a minor structural component of the virus. The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) .. +PF01768 Birnavirus VP4 protein
Pfam-B_946 (release 4.2). VP4 is a viral protease . The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) .. +PF03042 Birnavirus VP5 protein
Pfam-B_1772 (release 6.4). +PF03493 Calcium-activated BK potassium channel alpha subunit
+PF04940 Sensors of blue-light using FAD
The BLUF domain has been shown to bind FAD in the AppA protein (Swiss:Q53119). AppA is involved in the repression of photosynthesis genes in response to blue-light.. +PF02608 Basic membrane protein
This is a family of basic membrane lipoproteins form Borrelia and various putative lipoproteins form other bacteria. All of these proteins are outer membrane proteins and are thus antigenic in nature when possessed by the pathogenic members of the family. One protein Swiss:032436 is a transcriptional activator .. +PF01722 BolA-like protein
Pfam-B_1996 (release 4.1). This family consist of the morphoprotein BolA from E. coli and its various homologues. In E. coli over expression of this protein causes round morphology and may be involved in switching the cell between elongation and septation systems during cell division . The expression of BolA is growth rate regulated and is induced during the transition into the the stationary phase . BolA is also induced by stress during early stages of growth and may have a general role in stress response. It has also been suggested that BolA can induce the transcription of penicillin binding proteins 6 and 5 [2,1].. +PF02044 Bombesin-like peptide
+PF02414 Borrelia ORF-A
Pfam-B_1805 (release 5.4). This protein is encoded by an open reading frame in plasmid borne DNA repeats of Borrelia species. This protein is known as ORF-A . The function of this putative protein is unknown.. +PF03183 Borrelia repeat protein
Pfam-B_2029 (release 6.5). +PF00228 Bowman-Birk serine protease inhibitor family
+PF02653 Branched-chain amino acid transport system / permease component
COG0559 & Pfam-B_654 (Release 7.5). This is a large family mainly comprising high-affinity branched-chain amino acid transporter proteins such as E. coli LivH Swiss:P08340 and LivM Swiss:P22729 both of which are form the LIV-I transport system . Also found with in this family are proteins from the galactose transport system permease and a ribose transport system .. +PF00634 BRCA2_repeat;
The alignment covers only the most conserved region of the repeat.. +PF02498 BRO;
BRO family, N-terminal domain. Pfam-B_1235 (release 5.4). This family includes the N-terminus of baculovirus BRO and ALI motif proteins. The function of BRO proteins is unknown. It has been suggested that BRO-A and BRO-C are DNA binding proteins that influence host DNA replication and/or transcription . This Pfam domain does not include the characteristic invariant alanine, leucine, isoleucine motif of the ALI proteins .. +PF03032 Brevenin/esculentin/gaegurin/rugosin family
Pfam-B_1232 (release 6.4). This family contains a number of defence peptides secreted from the skin of amphibians, including the opiate-like dermorphins and deltorphins, and the antimicrobial dermoseptins and temporins. The alignment for this family includes the signal peptide.. +PF01318 Bromo_CP;
Bromovirus coat protein. +PF01573 Bromovirus movement protein
Pfam-B_508 (release 4.1). +PF04450 Peptidase of plants and bacteria
Pfam-B_5066 (release 7.5). These basic secretory proteins (BSPs) are believed to be part of the plants defence mechanism against pathogens . . +PF00779 BTK motif
Alignment kindly provided by SMART. Zinc-binding motif containing conserved cysteines and a histidine. Always found C-terminal to PH domains. The crystal structure shows this motif packs against the PH domain. The PH+Btk module pair has been called the Tec homology (TH) region .. +PF04514 Bluetongue virus non-structural protein NS2
This family includes NS2 proteins from other members of the Orbivirus genus. NS2 is a non-specific single-stranded RNA-binding protein that forms large homomultimers and accumulates in viral inclusion bodies of infected cells. Three RNA binding regions have been identified in Bluetongue virus serotype 17 (Swiss:P33473) at residues 2-11, 153-166 and 274-286 . NS2 multimers also possess nucleotidyl phosphatase activity . The precise function of NS2 is not known, but it may be involved in the transport and condensation of viral mRNAs .. +PF04426 Bul1 C terminus
This family contains the C terminus of Saccharomyces cerevisiae Bul1. Bul1 binds the ubiquitin ligase Rsp5, via an N terminal PPSY motif (157-160 in Swiss:P48524) . The complex containing Bul1 and Rsp5 is involved in intracellular trafficking of the general amino acid permease Gap1 , degradation of Rog1 in cooperation with Bul2 and GSK-3 , and mitochondrial inheritance . Bul1 may contain HEAT repeats.. +PF04425 Bul1 N terminus
This family contains the N terminus of Saccharomyces cerevisiae Bul1. Bul1 binds the ubiquitin ligase Rsp5, via an N terminal PPSY motif (157-160 in Swiss:P48524) . The complex containing Bul1 and Rsp5 is involved in intracellular trafficking of the general amino acid permease Gap1 , degradation of Rog1 in cooperation with Bul2 and GSK-3 , and mitochondrial inheritance . Bul1 may contain HEAT repeats.. +PF03557 Bunyavirus glycoprotein G1
Pfam-B_653 (release 7.0). Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. This family contains the G1 glycoprotein which is the viral attachment protein .. +PF03563 Bunyavirus glycoprotein G2
Pfam-B_1048 (release 7.0). Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. This family contains the G2 glycoprotein which interacts with the Pfam:PF03557 G1 glycoprotein .. +PF01104 Bunyavirus non-structural protein NS-s
Pfam-B_880 (release 3.0). The NS-s protein is encoded by the S RNA. This segment also encodes for the N protein. These two proteins are encoded by overlapping reading frames.. +PF03231 Bunyavirus non-structural protein NS-S
Pfam-B_3018 (release 6.5). This family represents the Bunyavirus NS-S family. Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase.. +PF00952 Bunyavirus nucleocapsid (N) protein
Pfam-B_587 (release 3.0). The bunyaviruses are enveloped viruses with a genome consisting of 3 ssRNA segments (called L, M and S). The nucleocapsid protein is encode on the small (S) genomic RNA. The N protein is the major component of the nucleocapsids. This protein is thought to interact with the L protein, virus RNA and/or other N proteins.. +PF04196 Bunyavirus RNA dependent RNA polymerase
Pfam-B_2559 (release 7.3). The bunyaviruses are enveloped viruses with a genome consisting of 3 ssRNA segments (called L, M and S). The nucleocapsid protein is encode on the small (S) genomic RNA. The L segment codes for an RNA polymerase. This family contains the RNA dependent RNA polymerase on the L segment.. +PF03181 BURP domain
Pfam-B_1432 (release 6.5). The BURP domain is found at the C-terminus of several different plant proteins. It was named after the proteins in which it was first identified: the BNM2 clone-derived protein from Brassica napus Swiss:O65009; USPs and USP-like proteins Swiss:P21746 Swiss:P21747 Swiss:Q06765 Swiss:O24482; RD22 from Arabidopsis thaliana Swiss:Q08298; and PG1beta from Lycopersicon esculentum Swiss:Q40161. This domain is around 230 amino acid residues long. It possesses the following conserved features: two phenylalanine residues at its N-terminus; two cysteine residues; and four repeated cysteine-histidine motifs, arranged as: CH-X(10)-CH-X(25-27)-CH-X(25-26)-CH, where X can be any amino acid . The function of this domain is unknown.. +PF03309 Bvg_acc_factor;
Type III pantothenate kinase. Pfam-B_3452 (release 6.5). Type III pantothenate kinase catalyses the phosphorylation of pantothenate (Pan), the first step in the universal pathway of CoA biosynthesis.. +PF04681 Blastomyces yeast-phase-specific protein
Pfam-B_5640 (release 7.5). The molecular function of this protein is not known. Its expression is specific to the high temperature, unicellular yeast morphology (as opposed to the lower temperature, multicellular mycelium form) .. +PF00170 bZIP;
bZIP transcription factor. The Pfam entry includes the basic region and the leucine zipper region.. +PF00386 C1q domain
C1q is a subunit of the C1 enzyme complex that activates the serum complement system.. +PF01413 C-terminal tandem repeated domain in type 4 procollagen
Ponting CP, Schultz J, Bork P. Duplicated domain in C-terminus of type 4 collagens. Mutations in alpha-5 collagen IV are associated with X-linked Alport syndrome.. +PF03595 C4dic_mal_tran;
Voltage-dependent anion channel. This family of transporters has ten alpha helical transmembrane segments . The structure of a bacterial homologue of SLAC1 shows it to have a trimeric arrangement. The pore is composed of five helices with a conserved Phe residue involved in gating. One homologue, Mae1 from the yeast Schizosaccharomyces pombe, functions as a malate uptake transporter; another, Ssu1 from Saccharomyces cerevisiae and other fungi including Aspergillus fumigatus, is characterised as a sulfite efflux pump; and TehA from Escherichia coli is identified as a tellurite resistance protein by virtue of its association in the tehA/tehB operon. In plants, this family is found in the stomatal guard cells functioning as an anion-transporting pore . Many homologues are incorrectly annotated as tellurite resistance or dicarboxylate transporter (TDT) proteins.. +PF01681 C6 domain
This domain of unknown function is found in the C. elegans protein Swiss:Q19522. It is presumed to be an extracellular domain. The C6 domain contains six conserved cysteine residues in most copies of the domain. However some copies of the domain are missing cysteine residues 1 and 3 suggesting that these form a disulphide bridge.. +PF03596 Cadmium resistance transporter
TIGRFAMs, Griffiths-Jones SR. +PF00028 cadherin;
Swissprot_feature_table. +PF01049 Cadherin_C_term;
Cadherin cytoplasmic region. Pfam-B_257 (release 3.0). Cadherins are vital in cell-cell adhesion during tissue differentiation. Cadherins are linked to the cytoskeleton by catenins. Catenins bind to the cytoplasmic tail of the cadherin. Cadherins cluster to form foci of homophilic binding units. A key determinant to the strength of the binding that it is mediated by cadherins is the juxtamembrane region of the cadherin. This region induces clustering and also binds to the protein p120ctn .. +PF03507 CagA exotoxin
Pfam-B_918 (release 7.0). +PF03524 cagX;
Conjugal transfer protein. PRINTS & Pfam-B_5812 (Release 7.5). This family includes type IV secretion system CagX conjugation protein. Other members of this family are involved in conjugal transfer to plant cells of T-DNA.. +PF03185 Calcium-activated potassium channel, beta subunit
Pfam-B_2176 (release 6.5). +PF00214 Calcitonin / CGRP / IAPP family
+PF04847 Calcipressin
Pfam-B_4547 (release 7.6). Calcipressin is also known as calcineurin-binding protein, since it inhibits calcineurin-mediated transcriptional modulation by binding to calcineurin's catalytic domain .. +PF02029 Caldesmon
+PF05042 Caleosin related protein
Pfam-B_5163 (release 7.7). This family contains plant proteins related to caleosin. Caleosins contain calcium-binding domains and have an oleosin-like association with lipid bodies. Caleosins are present at relatively low levels and are mainly bound to microsomal membrane fractions at the early stages of seed development. As the seeds mature, overall levels of caleosins increased dramatically and they were associated almost exclusively with storage lipid bodies . This family is probably related to EF hands Pfam:PF00036.. +PF00915 Calicivirus coat protein
Pfam-B_202 (release 3.0). +PF01067 Calpain large subunit, domain III
Pfam-B_852 (release 3.0). The function of the domain III and I are currently unknown. Domain II is a cysteine protease and domain IV is a calcium binding domain. Calpains are believed to participate in intracellular signaling pathways mediated by calcium ions.. +PF00748 Calpain inhibitor
Pfam-B_543 (release 2.1). This region is found multiple times in calpain inhibitor proteins.. +PF00402 calponin;
Calponin family repeat. +PF00262 calreticulin;
+PF01216 Calsequestrin
+PF03160 Calx-beta domain
+PF01213 CAP;
Adenylate cyclase associated (CAP) N terminal. +PF04451 Capsid_Iridovir;
Large eukaryotic DNA virus major capsid protein. DOMO:DM04206; Iyer L. This family includes the major capsid protein of iridoviruses, chlorella virus and Spodoptera ascovirus, which are all dsDNA viruses with no RNA stage. This is the most abundant structural protein and can account for up to 45% of virion protein . In Chlorella virus PBCV-1 the major capsid protein is a glycoprotein . The four families of large eukaryotic DNA viruses, Poxviridae, Asfarviridae, Iridoviridae, and Phycodnaviridae, are referred to collectively as nucleocytoplasmic large DNA viruses or NCLDV. The virions of different NCLDV have dramatically different structures. The major capsid proteins of iridoviruses and phycodnaviruses, both of which have icosahedral capsids surrounding an inner lipid membrane, showed a high level of sequence conservation. A more limited, but statistically significant sequence similarity was observed between these proteins and the major capsid protein (p72) of ASFV, which also has an icosahedral capsid. It was surprising, however, to find that all of these proteins shared a conserved domain with the poxvirus protein D13L, which is an integral virion component thought to form a scaffold for the formation of viral crescents and immature virion .. +PF05159 Capsule polysaccharide biosynthesis protein
This family includes export proteins involved in capsule polysaccharide biosynthesis, such as KpsS Swiss:P42218 and LipB Swiss:P57038.. +PF00194 carb_anhydrase;
Eukaryotic-type carbonic anhydrase. +PF02977 Carboxypeptidase A inhibitor
+PF00619 Caspase recruitment domain
Ponting C, Schultz J, Bork P. Motif contained in proteins involved in apoptotic signaling. Predicted to possess a DEATH (Pfam:PF00531) domain-like fold .. +PF01623 Carlavirus putative nucleic acid binding protein
Pfam-B_808 (release 4.1). This family of carlavirus nucleic acid binding proteins includes a motif for a potential C-4 type zinc finger this has four highly conserved cysteine residues and is a conserved feature of the carlaviruses 3' terminal ORF . These proteins may function as viral transcriptional regulators. The carlavirus family includes garlic latent virus and potato virus S and M, these viruses are positive strand, ssRNA with no DNA stage.. +PF00997 casein_kappa;
Pfam-B_1298 (release 3.0). Kappa-casein is a mammalian milk protein involved in a number of important physiological processes. In the gut, the ingested protein is split into an insoluble peptide (para kappa-casein) and a soluble hydrophilic glycopeptide (caseinomacropeptide). Caseinomacropeptide is responsible for increased efficiency of digestion, prevention of neonate hypersensitivity to ingested proteins, and inhibition of gastric pathogens.. +PF00363 caseins;
+PF00302 Chloramphenicol acetyltransferase
+PF03123 CAT RNA binding domain
P39805/1-60 Psi-blast. This RNA binding domain is found at the amino terminus of transcriptional antitermination proteins such as BglG, SacY and LicT. These proteins control the expression of sugar metabolising operons in Gram+ and Gram- bacteria. This domain has been called the CAT (Co-AntiTerminator) domain. It binds as a dimer to short Ribonucleotidic Anti-Terminator (RAT) hairpin, each monomer interacting symmetrically with both strands of the RAT hairpin . In the full-length protein, CAT is followed by two phosphorylatable PTS regulation domains (Pfam:PF00874) that modulate the RNA binding activity of CAT. Upon activation, the dimeric proteins bind to RAT targets in the nascent mRNA, thereby preventing abortive dissociation of the RNA polymerase from the DNA template .. +PF00199 catalase;
+PF00666 Cathelicidin
Pfam-B_276 (release 2.1). A novel protein family, showing a conserved proregion and a variable carboxyl-terminal antimicrobial domain. This region shows similarity to cystatins.. +PF04731 Caudal like protein activation region
This family consists of the amino termini of proteins belonging to the caudal-related homeobox protein family. This region is thought to mediate transcription activation. The level of activation caused by mouse Cdx2 (Swiss:P43241) is affected by phosphorylation at serine 60 via the mitogen-activated protein kinase pathway . Caudal family proteins are involved in the transcriptional regulation of multiple genes expressed in the intestinal epithelium, and are important in differentiation and maintenance of the intestinal epithelial lining. Caudal proteins always have a homeobox DNA binding domain (Pfam:PF00046).. +PF00689 Na_K_ATPase_C;
Cation transporting ATPase, C-terminus. Pfam-B_137 (release 2.1). Members of this families are involved in Na+/K+, H+/K+, Ca++ and Mg++ transport. This family represents 5 transmembrane helices.. +PF03310 Caulimovirus DNA-binding protein
Pfam-B_3746 (release 6.5). +PF03233 Caulimo_AT;
Aphid transmission protein. Pfam-B_3118 (release 6.5). This protein is found in various caulimoviruses. It codes for an 18 kDa protein (PII), which is dispensable for infection but which is required for aphid transmission of the virus . This protein interacts with the PIII protein .. +PF01693 Caulimo_VI;
Caulimovirus viroplasmin. Pfam-B_1373 (release 4.1). This family consists of various caulimovirus viroplasmin proteins. The viroplasmin protein is encoded by gene VI and is the main component of viral inclusion bodies or viroplasms . Inclusions are the site of viral assembly, DNA synthesis and accumulation . Two domains exist within gene VI corresponding approximately to the 5' third and middle third of gene VI, these influence systemic infection in a light-dependent manner .. +PF04771 Chicken anaemia virus VP-3 protein
Pfam-B_2147 (release 7.6). This protein is found in the nucleus of infected cells and may act as a transcriptional regulator. It induces apoptosis, and is also known as apoptin [SwissProt annotation for Swiss:P54094].. +PF01146 Caveolin
All three known Caveolin forms have the FEDVIAEP caveolin 'signature motif' within their hydrophilic N-terminal domain. Caveolin 2 (Cav-2) is co-localised and co-expressed with Cav-1/VIP21, forms heterodimers with it and needs Cav-1 for proper membrane localisation. Cav-3 has greater protein sequence similarity to Cav-1 than to Cav-2. Cellular processes caveolins are involved in include vesicular transport, cholesterol homeostasis, signal transduction, and tumour suppression .. +PF02275 Linear amide C-N hydrolases, choloylglycine hydrolase family
Pfam-B_5806 (release 5.2). This family includes several hydrolases which cleave carbon-nitrogen bonds, other than peptide bonds, in linear amides. These include choloylglycine hydrolase (conjugated bile acid hydrolase, CBAH) EC:3.5.1.24, penicillin acylase EC:3.5.1.11 and acid ceramidase EC:3.5.1.23. This domain forms the alpha-subunit for members from vertebral species, see family NAAA-beta, Pfam:PF15508.. +PF03914 CBF/Mak21 family
Wood V, Griffiths-Jones SR. Pfam-B_3822 (release 7.2). +PF02312 Core binding factor beta subunit
Pfam-B_12381 (release 5.2). Core binding factor (CBF) is a heterodimeric transcription factor essential for genetic regulation of hematopoiesis and osteogenesis. The beta subunit enhances DNA-binding ability of the alpha subunit in vitro, and has been show to have a structure related to the OB fold .. +PF02045 CCAAT-binding transcription factor (CBF-B/NF-YA) subunit B
+PF00808 Archael_histone; Arch_histone;
Histone-like transcription factor (CBF/NF-Y) and archaeal histone. Pfam-B_1351 (Rel 2.1) & Pfam-B_3673 (Rel 7.5) & Pfam-B_2078 (Rel 8.0). This family includes archaebacterial histones and histone like transcription factors from eukaryotes.. +PF01656 CBIA;
CobQ/CobB/MinD/ParA nucleotide binding domain. Pfam-B_782 (release 4.1). This family consists of various cobyrinic acid a,c-diamide synthases. These include CbiA Swiss:P29946 and CbiP Swiss:Q05597 from S.typhimurium , and CobQ Swiss:Q52686 from R. capsulatus . These amidases catalyse amidations to various side chains of hydrogenobyrinic acid or cobyrinic acid a,c-diamide in the biosynthesis of cobalamin (vitamin B12) from uroporphyrinogen III. Vitamin B12 is an important cofactor and an essential nutrient for many plants and animals and is primarily produced by bacteria . The family also contains dethiobiotin synthetases as well as the plasmid partitioning proteins of the MinD/ParA family .. +PF02570 Precorrin-8X methylmutase
This is a family Precorrin-8X methylmutases also known as Precorrin isomerase, CbiC/CobH, EC:5.4.1.2. This enzyme catalyses the reaction: Precorrin-8X <=> hydrogenobyrinate. This enzyme is part of the Cobalamin (vitamin B12) biosynthetic pathway and catalyses a methyl rearrangement [1,2].. +PF01888 CbiD
CbiD is essential for cobalamin biosynthesis in both S. typhimurium and B. megaterium, no functional role has been ascribed to the protein. The CbiD protein has a putative S-AdoMet binding site. It is possible that CbiD might have the same role as CobF in undertaking the C-1 methylation and deacylation reactions required during the ring contraction process .. +PF01890 CbiG;
Cobalamin synthesis G C-terminus. Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process . Within the cobalamin synthesis pathway CbiG catalyses the both the opening of the lactone ring and the extrusion of the two-carbon fragment of cobalt-precorrin-5A from C-20 and its associated methyl group (deacylation) to give cobalt-precorrin-5B . This family is the C-terminal region, and the mid- and N-termival parts are conserved independently in other families.. +PF02571 Precorrin-6x reductase CbiJ/CobK
This family consists of Precorrin-6x reductase EC:1.3.1.54. This enzyme catalyses the reaction: precorrin-6Y + NADP(+) <=> precorrin-6X + NADPH. CbiJ and CobK both catalyse the reduction of macocycle in the colbalmin biosynthesis pathway [1,2].. +PF01891 Cobalt uptake substrate-specific transmembrane region
This family of proteins forms part of the cobalt-transport complex in prokaryotes, CbiMNQO. CbiMNQO and NikMNQO are the most widespread groups of microbial transporters for cobalt and nickel ions and are unusual uptake systems as they consist of eg two transmembrane components (CbiM and CbiQ), a small membrane-bound component (CbiN) and an ATP-binding protein (CbiO) but no extracytoplasmic solute-binding protein. Similar components constitute the nickel transporters with some variability in the small membrane-bound component, either NikN or NikL, which are not similar to CbiN at the sequence level. CbiM is the substrate-specific component of the complex and is a seven-transmembrane protein . The CbiMNQO and NikMNQO systems form part of the coenzyme B12 biosynthesis pathway . The NikM protein is Pfam:PF10670.. +PF02553 Cobalt transport protein component CbiN
CbiN is part of the active cobalt transport system involved in uptake of cobalt in to the cell involved with cobalamin biosynthesis (vitamin B12). It has been suggested that CbiN may function as the periplasmic binding protein component of the active cobalt transport system .. +PF02361 Cobalt transport protein
Pfam-B_673 (release 5.2). This family consists of various cobalt transport proteins Most of which are found in Cobalamin (Vitamin B12) biosynthesis operons. In Salmonella the cbiN cbiQ (product CbiQ in this family) and cbiO are likely to form an active cobalt transport system .. +PF01903 CbiX
The function of CbiX is uncertain, however it is found in cobalamin biosynthesis operons and so may have a related function. Some CbiX proteins contain a striking histidine-rich region at their C-terminus, which suggests that it might be involved in metal chelation .. +PF02262 CBL proto-oncogene N-terminal domain 1
Pfam-B_3949 (release 5.2). Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. Cbl_N is comprised of 3 structural domains of which this is the first - a four helix bundle.. +PF02761 CBL proto-oncogene N-terminus, EF hand-like domain
Pfam-B_3949 (release 5.2). Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. The so called N-terminal domain is actually 3 structural domains, of which this is the central EF hand domain.. +PF02762 CBL proto-oncogene N-terminus, SH2-like domain
Pfam-B_3949 (release 5.2). Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. The so called N-terminal domain is actually 3 structural domains, of which this is the C-terminal SH2 domain.. +PF00734 CBD_fungal; CBD_1;
Fungal cellulose binding domain. Pfam-B_444 (release 2.1). +PF02013 CBD_5;
Cellulose or protein binding domain. PSI-BLAST P10476/668-713. This domain is found in two distinct sets of proteins with different functions. Those found in aerobic bacteria bind cellulose (or other carbohydrates); but in anaerobic fungi they are protein binding domains, referred to as dockerin domains or docking domains. They are believed to be responsible for the assembly of a multiprotein cellulase/hemicellulase complex, similar to the cellulosome found in certain anaerobic bacteria.. +PF03425 Carbohydrate binding domain (family 11)
+PF03426 Carbohydrate binding domain (family 15)
+PF03424 CBM_28;
Carbohydrate binding domain (family 17/28). +PF03427 Carbohydrate binding domain (family 19)
+PF00553 CBD_1; CBD_2;
Cellulose binding domain. Two tryptophan residues are involved in cellulose binding. Cellulose binding domain found in bacteria.. +PF00686 CBD_2; CBD_4;
Starch binding domain. Pfam-B_111 (release 2.1). +PF03370 PRS;
Putative phosphatase regulatory subunit. Pfam-B_2433 (release 6.6). This family consists of several eukaryotic proteins that are thought to be involved in the regulation of glycogen metabolism. For instance, the mouse PTG protein Swiss:O08541 has been shown to interact with glycogen synthase, phosphorylase kinase, phosphorylase a: these three enzymes have key roles in the regulation of glycogen metabolism. PTG also binds the catalytic subunit of protein phosphatase 1 (PP1C) and localises it to glycogen. Subsets of similar interactions have been observed with several other members of this family, such as the yeast PIG1, PIG2, GAC1 and GIP2 proteins. While the precise function of these proteins is not known, they may serve a scaffold function, bringing together the key enzymes in glycogen metabolism. This family is a carbohydrate binding domain.. +PF03423 Carbohydrate binding domain (family 25)
+PF02839 CBD_7; CBM_5;
Carbohydrate binding domain. This short domain is found in many different glycosyl hydrolase enzymes and is presumed to have a carbohydrate binding function. The domain has six aromatic groups that may be important for binding.. +PF04942 CC domain
Pfam-B_4563 (release 7.5). This short domain contains four conserved cysteines that probably for two disulphide bonds. The domain is named after the characteristic CC motif.. +PF01845 CcdB protein
+PF04995 Heme exporter protein D (CcmD)
The CcmD protein is part of a C-type cytochrome biogenesis operon . The exact function of this protein is uncertain. It has been proposed that CcmC, CcmD and CcmE interact directly with each other, establishing a cytoplasm to periplasm haem delivery pathway for cytochrome c maturation . This protein is found fused to CcmE in Swiss:P52224. These proteins contain a predicted transmembrane helix.. +PF03100 CcmE
Pfam-B_2583 (release 6.4). CcmE is the product of one of a cluster of Ccm genes that are necessary for cytochrome c biosynthesis in eubacteria. Expression of these proteins is induced when the organisms are grown under anaerobic conditions with nitrate or nitrite as the final electron acceptor.. +PF03918 Cytochrome C biogenesis protein
Members of this family include NrfF, CcmH, CycL, Ccl2.. +PF03597 Cytochrome oxidase maturation protein cbb3-type
TIGRFAMs, Griffiths-Jones SR. +PF03150 Di-haem cytochrome c peroxidase
Pfam-B_3135 (release 6.5). This is a family of distinct cytochrome c peroxidases (CCPs) that contain two haem groups. Similar to other cytochrome c peroxidases, they reduce hydrogen peroxide to water using c-type haem as an oxidisable substrate. However, since they possess two, instead of one, haem prosthetic groups, bacterial CCPs reduce hydrogen peroxide without the need to generate semi-stable free radicals. The two haem groups have significantly different redox potentials. The high potential (+320 mV) haem feeds electrons from electron shuttle proteins to the low potential (-330 mV) haem, where peroxide is reduced (indeed, the low potential site is known as the peroxidatic site) . The CCP protein itself is structured into two domains, each containing one c-type haem group, with a calcium-binding site at the domain interface. This family also includes MauG proteins, whose similarity to di-haem CCP was previously recognised .. +PF04505 CD225;
Interferon-induced transmembrane protein. Pfam-B_2070 (release 7.5). This family includes the human leukocyte antigen CD225, which is an interferon inducible transmembrane protein, and is associated with interferon induced cell growth suppression .. +PF01130 CD36 family
Pfam-B_1229 (release 3.0). The CD36 family is thought to be a novel class of scavenger receptors. There is also evidence suggesting a possible role in signal transduction. CD36 is involved in cell adhesion.. +PF04549 CD47 transmembrane region
Pfam-B_2739 (release 7.5). This family represents the transmembrane region of CD47 leukocyte antigen [1-2].. +PF03234 Cdc37;CDC37;
Cdc37 N terminal kinase binding. Pfam-B_3345 (release 6.5). Cdc37 is a molecular chaperone required for the activity of numerous eukaryotic protein kinases. This domain corresponds to the N terminal domain which binds predominantly to protein kinases and is found N terminal to the Hsp (Heat shocked protein) 90-binding domain Pfam:PF08565. Expression of a construct consisting of only the N-terminal domain of Saccharomyces pombe Cdc37 results in cellular viability. This indicates that interactions with the cochaperone Hsp90 may not be essential for Cdc37 function .. +PF02724 CDC45-like protein
Pfam-B_1919 (release 5.5). CDC45 is an essential gene required for initiation of DNA replication in S. cerevisiae (Swiss:Q08032), forming a complex with MCM5/CDC46. Homologues of CDC45 have been identified in human , mouse and smut fungus (Swiss:Q99107) among others. . +PF02933 cdc48_2;
Cell division protein 48 (CDC48), domain 2. Pfam-B_799 (release 5.2). This domain has a double psi-beta barrel fold and includes VCP-like ATPase and N-ethylmaleimide sensitive fusion protein N-terminal domains. Both the VAT and NSF N-terminal functional domains consist of two structural domains of which this is at the C-terminus. The VAT-N domain found in AAA ATPases Pfam:PF00004 is a substrate 185-residue recognition domain .. +PF02359 VAT-Nn; cdc48_N;
Cell division protein 48 (CDC48), N-terminal domain. Pfam-B_799 (release 5.2). This domain has a double psi-beta barrel fold and includes VCP-like ATPase and N-ethylmaleimide sensitive fusion protein N-terminal domains. Both the VAT and NSF N-terminal functional domains consist of two structural domains of which this is at the N-terminus. The VAT-N domain found in AAA ATPases Pfam:PF00004 is a substrate 185-residue recognition domain .. +PF03381 DUF284;
LEM3 (ligand-effect modulator 3) family / CDC50 family. Pfam-B_2846 (release 6.6). Members of this family have been predicted to contain transmembrane helices. The family member LEM3 (Swiss:P42838) is a ligand-effect modulator, mutation of which increases glucocorticoid receptor activity in response to dexamethasone and also confers increased activity on other intracellular receptors including the progesterone, oestrogen and mineralocorticoid receptors. LEM3 is thought to affect a downstream step in the glucocorticoid receptor pathway. Factors that modulate ligand responsiveness are likely to contribute to the context-specific actions of the glucocorticoid receptor in mammalian cells . The products of genes YNR048w (Swiss:P53740), YNL323w (Swiss:P42838) and YCR094w (Swiss:P25656) (CDC50) show redundancy of function and are involved in regulation of transcription via CDC39 . CDC39 (also known as NOT1) is normally a negative regulator of transcription either by affecting the general RNA polymerase II machinery or by altering chromatin structure . One function of CDC39 is to block activation of the mating response pathway in the absence of pheromone, and mutation causes arrest in G1 by activation of the pathway . It may be that the cold-sensitive arrest in G1 noticed in CDC50 mutants may be due to inactivation of CDC39. The effects of LEM3 on glucocorticoid receptor activity may also be due to effects on transcription via CDC39.. +PF02611 CDP-diacylglycerol pyrophosphatase
This is a family of CDP-diacylglycerol pyrophosphatases, EC:3.6.1.26. This enzyme catalyses the reaction CDP-diacylglycerol + H2O <=> CMP + phosphatidate.. +PF03598 CO dehydrogenase/acetyl-CoA synthase complex beta subunit
TIGRFAMs, Griffiths-Jones SR. +PF03599 CO dehydrogenase/acetyl-CoA synthase delta subunit
TIGRFAMs, Griffiths-Jones SR. +PF02234 Cyclin-dependent kinase inhibitor
Pfam-B_1698 (release 5.2) & Pfam-B_5787 (Release 8.0). Cell cycle progression is negatively controlled by cyclin-dependent kinases inhibitors (CDIs). CDIs are involved in cell cycle arrest at the G1 phase.. +PF03261 Cyclin-dependent kinase 5 activator protein
Pfam-B_4160 (release 6.5). +PF05174 Cysteine-rich D. radiodurans N terminus
This domain is found individually and at the N terminus of a few multi-domain proteins. . +PF03498 Cytolethal distending toxin A/C family
+PF00272 cecropin;
+PF02927 celD_N;
N-terminal ig-like domain of cellulase. +PF03500 Cellulose synthase subunit D
+PF03552 Cellulose synthase
Pfam-B_1346 (release 7.0). Cellulose, an aggregate of unbranched polymers of beta-1,4-linked glucose residues, is the major component of wood and thus paper, and is synthesised by plants, most algae, some bacteria and fungi, and even some animals. The genes that synthesise cellulose in higher plants differ greatly from the well-characterised genes found in Acetobacter and Agrobacterium sp. More correctly designated as 'cellulose synthase catalytic subunits', plant cellulose synthase (CesA) proteins are integral membrane proteins, approximately 1,000 amino acids in length. There are a number of highly conserved residues, including several motifs shown to be necessary for processive glycosyltransferase activity .. +PF03040 CemA family
Pfam-B_1775 (release 6.4). Members of this family are probable integral membrane proteins. Their molecular function is unknown. CemA proteins are found in the inner envelope membrane of chloroplasts but not in the thylakoid membrane . A cyanobacterial member of this family has been implicated in CO2 transport, but is probably not a CO2 transporter itself . They are predicted to be haem-binding however this has not been proven experimentally .. +PF03879 Cgr1 family
Members of this family are coiled-coil proteins that are involved in pre-rRNA processing .. +PF04752 ChaC-like protein
Pfam-B_3722 (release 7.5). The ChaC protein is thought to be associated with the putative ChaA Ca2+/H+ cation transport protein in Escherichia coli. Its function is not known. This family also includes homologues regions from several other bacterial and eukaryotic proteins.. +PF00195 Chal_stil_synt;
Chalcone and stilbene synthases, N-terminal domain. The C-terminal domain of Chalcone synthase is reported to be structurally similar to domains in thiolase and beta-ketoacyl synthase.\. The differences in activity are accounted for by differences in this N-terminal domain.. +PF02797 Chal_stil_syntC;
Chalcone and stilbene synthases, C-terminal domain. This domain of chalcone synthase is reported to be structurally similar to domains in thiolase and beta-ketoacyl synthase. The differences in activity are accounted for by differences in the N-terminal domain.. +PF02431 Chalcone-flavanone isomerase
Pfam-B_2073 (release 5.4). +PF03502 Nucleoside-specific channel-forming protein, Tsx
+PF03924 CHASE domain
This domain is found in the extracellular portion of receptor-like proteins - such as serine/threonine kinases and adenylyl cyclases [1,2]. Predicted to be a ligand binding domain .. +PF03173 Putative carbohydrate binding domain
Pfam-B_8666 (release 6.5). This domain represents the N terminal domain in chitobiases and beta-hexosaminidases EC:3.2.1.52. It is composed of a beta sandwich structure that is similar in structure to the cellulose binding domain of cellulase from Cellulomonas fimi . This suggests that this may be a carbohydrate binding domain.. +PF03174 Chitobiase/beta-hexosaminidase C-terminal domain
Pfam-B_8666 (release 6.5). This short domain represents the C terminal domain in chitobiases and beta-hexosaminidases EC:3.2.1.52. It is composed of a beta sandwich structure . The function of this domain is unknown.. +PF01339 CheB methylesterase
+PF04509 CheC-like family
The restoration of pre-stimulus levels of the chemotactic response regulator, CheY-P, is important for allowing bacteria to respond to new environmental stimuli. The members of this family, CheC, CheX, CheA and FliY are CheY-P phosphatase [1,2]. CheC appears to be primarily involved in restoring normal CheY-P levels, whereas FliY seems to act on CheY-P constitutively. CheD enhances the activity of CheC 5-fold, which is normally relatively low [1,2]. In some cases, the region represented by this entry is present as multiple copies.. +PF03975 CheD chemotactic sensory transduction
This chemotaxis protein stimulates methylation of MCP proteins . The chemotaxis machinery of Bacillus subtilis is similar to that of the well characterised system of Escherichia coli. However, B. subtilis contains several chemotaxis genes not found in the E. coli genome, such as CheC and CheD, indicating that the B. subtilis chemotactic system is more complex. CheD plays an important role in chemotactic sensory transduction for many organisms. CheD deamidates other B. subtilis chemoreceptors including McpB and McpC. Deamidation by CheD is required for B. subtilis chemoreceptors to effectively transduce signals to the CheA kinase . The structure of a complex between the signal-terminating phosphatase, CheC, and the receptor-modifying deamidase, CheD, reveals how CheC mimics receptor substrates to inhibit CheD and how CheD stimulates CheC phosphatase activity. CheD resembles other cysteine deamidases from bacterial pathogens that inactivate host Rho-GTPases. Phospho-CheY, the intracellular signal and CheC target, stabilises the CheC-CheD complex and reduces availability of CheD . A model is proposed whereby CheC acts as a CheY-P-induced regulator of CheD; CheY-P would cause CheC to sequester CheD from the chemoreceptors, inducing adaptation of the chemotaxis system .. +PF01739 CheR methyltransferase, SAM binding domain
Pfam-B_694 (release 4.2). CheR proteins are part of the chemotaxis signaling mechanism in bacteria. CheR methylates the chemotaxis receptor at specific glutamate residues. CheR is an S-adenosylmethionine- dependent methyltransferase - the C-terminal domain (this one) binds SAM.. +PF03705 CheR methyltransferase, all-alpha domain
Pfam-B_694 (release 4.2). CheR proteins are part of the chemotaxis signaling mechanism in bacteria. CheR methylates the chemotaxis receptor at specific glutamate residues. CheR is an S-adenosylmethionine- dependent methyltransferase.. +PF01584 CheW-like domain
Pfam-B_579 (release 4.1). CheW proteins are part of the chemotaxis signaling mechanism in bacteria. CheW interacts with the methyl accepting chemotaxis proteins (MCPs) and relays signals to CheY, which affects flageller rotation. This family includes CheW and other related proteins that are involved in chemotaxis. The CheW-like regulatory domain in CheA binds to CheW, suggesting that these domains can interact with each other.. +PF01111 Cyclin-dependent kinase regulatory subunit
+PF04344 Chemotaxis phosphatase, CheZ
This family represents the bacterial chemotaxis phosphatase, CheZ. This protein forms a dimer characterised by a long four-helix bundle, composed of two helices from each monomer. CheZ dephosphorylates CheY in a reaction that is essential to maintain a continuous chemotactic response to environmental changes. It is thought that CheZ's conserved residue Gln 147 orientates a water molecule for nucleophilic attack at the CheY active site.. +PF00187 chitin_binding;
Chitin recognition protein. +PF01644 Chitin_synth;
Pfam-B_892 (release 4.1). This region is found commonly in chitin synthases classes I, II and III. Chitin a linear homopolymer of GlcNAc residues, it is an important component of the cell wall of fungi and is synthesised on the cytoplasmic surface of the cell membrane by membrane bound chitin synthases .. +PF03142 Chitin synthase
Pfam-B_1787 (release 6.5). Members of this family are fungal chitin synthase EC:2.4.1.16 enzymes. They catalyse chitin synthesis as follows: UDP-N-acetyl-D-glucosamine + {(1,4)-(N-acetyl-beta-D-glucosaminyl)}(N) <=> UDP + {(1,4)-(N-acetyl-beta-D-glucosaminyl)}(N+1).. +PF03503 Chlamydia cysteine-rich outer membrane protein 3
PRINTS, Griffiths-Jones SR. +PF03504 Chlamydia cysteine-rich outer membrane protein 6
PRINTS, Griffiths-Jones SR. +PF01308 Chlamydia_OMP;
Chlamydia major outer membrane protein. Pfam-B_1429 (release 3.0). The major outer membrane protein of Chlamydia contains four symmetrically spaced variable domains (VDs I to IV). This protein is believed to be an integral part to the pathogenesis, possibly adhesion. Along with the lipopolysaccharide, the major out membrane protein (MOMP) makes up the surface of the elementary body cell. The MOMP is the protein used to determine the different serotypes.. +PF00504 chloroa_b-bind;
Chlorophyll A-B binding protein. Pfam-B_54 (release 1.0) & Pfam-B_5772 (Release 7.5). +PF02962 5-carboxymethyl-2-hydroxymuconate isomerase
+PF04428 Choline kinase N terminus
Found N terminal to choline/ethanolamine kinase regions (Pfam:PF01633) in some plant and fungal choline kinase enzymes (EC:2.7.1.32). This region is only found in some members of the choline kinase family, and is therefore unlikely to contribute to catalysis.. +PF01633 Choline/ethanolamine kinase
Pfam-B_1165 (release 4.1). Choline kinase catalyses the committed step in the synthesis of phosphatidylcholine by the CDP-choline pathway . This alignment covers the protein kinase portion of the protein. The divergence of this family makes it very difficult to create a model that specifically predicts choline/ethanolamine kinases only. However if [add Pfam ID here for Choline_kinase_C] is also present then it is definitely a member of this family. . +PF04345 Chorismate lyase
Chorismate lyase catalyses the first step in ubiquinone synthesis, i.e. the removal of pyruvate from chorismate, to yield 4-hydroxybenzoate.. +PF01723 Chorion;
Pfam-B_1914 (release 4.1). This family consists of the chorion superfamily proteins classes A, B, CA, CB and high-cysteine HCB from silk, gypsy and polyphemus moths. The chorion proteins make up the moths egg shell a complex extracellular structure .. +PF03964 Chorion family 2
The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary . . +PF00425 chorismate_bind;
chorismate binding enzyme. Pfam-B_164 (release 1.0). This family includes the catalytic regions of the chorismate binding enzymes anthranilate synthase, isochorismate synthase, aminodeoxychorismate synthase and para-aminobenzoate synthase.. +PF01817 Chorismate_mut;
Chorismate mutase type II. Chorismate mutase EC:5.4.99.5 catalyses the conversion of chorismate to prephenate in the pathway of tyrosine and phenylalanine biosynthesis. This enzyme is negatively regulated by tyrosine, tryptophan and phenylalanine [2,3].. +PF01264 Chorismate_synth;
+PF02417 Chromate transporter
Pfam-B_1872 (release 5.4). Members of this family probably act as chromate transporters [1,2]. Members of this family are found in both bacteria and archaebacteria. The proteins are composed of one or two copies of this region. The alignment contains two conserved motifs, FGG and PGP.. +PF00385 chromo;
Chromo (CHRromatin Organisation MOdifier) domain. +PF01393 Chromo shadow domain
This domain is distantly related to Pfam:PF00385. This domain is always found in association with a chromo domain.. +PF00878 CIMR_repeat;
Cation-independent mannose-6-phosphate receptor repeat. Pfam-B_764 (release 3.0). The cation-independent mannose-6-phosphate receptor contains 15 copies of a repeat.. +PF02464 Competence-damaged protein
Pfam-B_2197 (release 5.4). CinA is the first gene in the competence-inducible (cin) operon, and is thought to be specifically required at some stage in the process of transformation . This Pfam family consists of putative competence-damaged proteins from the cin operon.. +PF04162 Circo_coat;
Gyrovirus capsid protein (VP1). Pfam-B_1772 (release 7.3). Gyroviruses are small circular single stranded viruses. This family includes the VP1 protein from the chicken anaemia virus which is the viral capsid protein.. +PF02443 Circo_ORF2;
Circovirus capsid protein. Pfam-B_1890 (release 5.4). Circoviruses are small circular single stranded viruses. This family is the capsid protein from viruses such as porcine circovirus and beak and feather disease virus Swiss:Q9YUC8. These proteins are about 220 amino acids long.. +PF04487 CITED
Pfam-B_3987 (release 7.5). CITED, CBP/p300-interacting transactivator with ED-rich tail, are characterised by a conserved 32-amino acid sequence at the C-terminus. CITED proteins do not bind DNA directly and are thought to function as transcriptional co-activators . . +PF04223 Citrate lyase, alpha subunit (CitF)
In citrate-utilising prokaryotes, citrate lyase EC:4.1.3.6 cleaves intracellular citrate into acetate and oxaloacetate, and is organised as a functional complex consisting of alpha, beta, and gamma subunits. The gamma subunit serves as an acyl carrier protein (ACP), and has a 2'-(5''-phosphoribosyl)-3'-dephospho-CoA prosthetic group. The citrate lyase is active only if this prosthetic group is acetylated; this acetylation is catalysed by an acetate:SH-citrate lyase ligase. The alpha subunit substitutes citryl for the acetyl group to form citryl-S-ACP. The beta subunit completes the reaction by cleaving the citryl to yield oxaloacetate and (regenerated) acetyl-S-ACP. This family represents the alpha subunit EC:2.8.3.10.. +PF01874 ATP:dephospho-CoA triphosphoribosyl transferase
The citG gene is found in a gene cluster with citrate lyase subunits . The function of the CitG protein was elucidated as ATP:dephospho-CoA triphosphoribosyl transferase [2-3].. +PF03600 Citrate transporter
+PF00285 citrate_synt;
+PF03802 Apo-citrate lyase phosphoribosyl-dephospho-CoA transferase
+PF01214 Casein kinase II regulatory subunit
+PF03805 Cytoadherence-linked asexual protein
Clag (cytoadherence linked asexual gene) is a malaria surface protein which has been shown to be involved in the binding of Plasmodium falciparum infected erythrocytes to host endothelial cells, a process termed cytoadherence. The cytoadherence phenomenon is associated with the sequestration of infected erythrocytes in the blood vessels of the brain, cerebral malaria. Clag is a multi-gene family in Plasmodium falciparum with at least 9 members identified to date. Orthologous proteins in the rodent malaria species Plasmodium chabaudi (Lawson D Unpubl. obs.) suggest that the gene family is found in other malaria species and may play a more generic role in cytoadherence.. +PF01217 Clathrin_adapt_s;
Clathrin adaptor complex small chain. +PF00637 Clathrin_repeat;
Region in Clathrin and VPS. Each region is about 140 amino acids long. The regions are composed of multiple alpha helical repeats. They occur in the arm region of the Clathrin heavy chain.. +PF01086 Clathrin light chain
+PF01394 Clathrin propeller repeat
Clathrin is the scaffold protein of the basket-like coat that surrounds coated vesicles. The soluble assembly unit, a triskelion, contains three heavy chains and three light chains in an extended three-legged structure. Each leg contains one heavy and one light chain. The N-terminus of the heavy chain is known as the globular domain, and is composed of seven repeats which form a beta propeller .. +PF03505 Clostridium enterotoxin
+PF03515 Colicin-like bacteriocin tRNase domain
The C-terminal region of colicin-like bacteriocins is either a pore-forming or an endonuclease-like domain. Cloacin and Pyocins have similar structures and activities to the colicins from E coli and the klebicins from Klebsiella spp. Colicins E5 and D cleave the anticodon loops of distinct tRNAs of Escherichia coli both in vivo and in vitro . The full-length molecule has an N-terminal translocation domain and a middle, double alpha-helical region which is receptor-binding .. +PF03513 Cloacin immunity protein
+PF01785 Closterovirus coat protein
Pfam-B_1309 (release 4.2) & Pfam-B_6985 (release 8.0). This family consist of coat proteins from closteroviruses a member of the closteroviridae. The viral coat protein encapsulates and protects the viral genome. Both the large cp1 and smaller cp2 coat protein originate from the same primary transcript . Members of the closteroviridae include Sugar beet yellow virus and Grapevine leafroll-associated virus, closteroviruses have a positive strand ssRNA genome with no DNA stage during replication. . +PF00574 Clp protease
The Clp protease has an active site catalytic triad. In E. coli Clp protease, ser-111, his-136 and asp-185 form the catalytic triad. Swiss:P48254 has lost all of these active site residues and is therefore inactive. Swiss:P42379 contains two large insertions, Swiss:P42380 contains one large insertion.. +PF01093 Clusterin
+PF03026 Influenza C virus M1 protein
Pfam-B_1290 (release 6.4). This family represents the matrix 1 protein of influenza C virus. The protein is the product of a spliced mRNA. Small quantities of the unspliced mRNA are found in the cell additionally encoding the M2 protein (see Pfam:PF03021).. +PF03021 Influenza C virus M2 protein
Pfam-B_1092 (release 6.4). Influenza C virus M1 protein is encoded by a spliced mRNA. The unspliced mRNA is also found in small quantities and can encode the protein represented by this family.. +PF02543 Carbamoyltransferase
Pfam-B_1740 (release 5.4). This family consists of NodU from Rhizobium and CmcH from Nocardia lactamdurans. NodU a Rhizobium nodulation protein involved in the synthesis of nodulation factors has 6-O-carbamoyltransferase-like activity . CmcH is involved in cephamycin (antibiotic) biosynthesis and has 3-hydroxymethylcephem carbamoyltransferase activity , EC:2.1.3.7 catalysing the reaction: Carbamoyl phosphate + 3-hydroxymethylceph-3-EM-4-carboxylate <=> phosphate + 3-carbamoyloxymethylcephem.. +PF04989 Cephalosporin hydroxylase
Members of this family are about 220 amino acids long. The CmcI protein Swiss:O85726 is presumed to represent the cephalosporin-7--hydroxylase . However this has not been experimentally verified. . +PF02627 Carboxymuconolactone decarboxylase family
Carboxymuconolactone decarboxylase (CMD) EC:4.1.1.44 is involved in protocatechuate catabolism. In some bacteria a gene fusion event leads to expression of CMD with a hydrolase involved in the same pathway . In these bifunctional proteins (e.g. Swiss:O67982) CMD represents the C-terminal domain, Pfam:PF00561 represents the N-terminal domain. . +PF00795 Nitrilase;
Carbon-nitrogen hydrolase. Pfam-B_1042 (release 2.1) & Pfam-B_5155 (Release 7.5). This family contains hydrolases that break carbon-nitrogen bonds . The family includes: Nitrilase EC:3.5.5.1 Swiss:Q42965, Aliphatic amidase EC:3.5.1.4 Swiss:Q01360, Biotidinase EC:3.5.1.12 Swiss:P43251, Beta-ureidopropionase EC:3.5.1.6 Swiss:Q03248. Nitrilase-related proteins generally have a conserved E-K-C catalytic triad, and are multimeric alpha-beta-beta-alpha sandwich proteins .. +PF01110 Ciliary neurotrophic factor
+PF03450 CO dehydrogenase flavoprotein C-terminal domain
+PF00473 Corticotropin-releasing factor family
+PF02552 CO dehydrogenase beta subunit/acetyl-CoA synthase epsilon subunit
This family consists of Carbon monoxide dehydrogenase I/II beta subunit EC:1.2.99.2 and acetyl-CoA synthase epsilon subunit. Carbon monoxide beta subunit catalyses the reaction: CO + H2O + acceptor <=> CO2 + reduced acceptor.. +PF01121 UPF0038;
Dephospho-CoA kinase. This family catalyses the phosphorylation of the 3'-hydroxyl group of dephosphocoenzyme A to form Coenzyme A EC:2.7.1.24. This enzyme uses ATP in its reaction.. +PF02035 Coagulin
+PF04733 Coatomer epsilon subunit
Pfam-B_3343 (release 7.5). This family represents the epsilon subunit of the coatomer complex, which is involved in the regulation of intracellular protein trafficking between the endoplasmic reticulum and the Golgi complex .. +PF04053 Coatomer WD associated region
Pfam-B_1269 (release 7.3);. This region is composed of WD40 repeats.. +PF02572 ATP:corrinoid adenosyltransferase BtuR/CobO/CobP
This family consists of the BtuR, CobO, CobP proteins all of which are Cob(I)alamin adenosyltransferase, EC:2.5.1.17, involved in cobalamin (vitamin B12) biosynthesis. These enzymes catalyse the adenosylation reaction: ATP + cob(I)alamin + H2O <=> phosphate + diphosphate + adenosylcobalamin.. +PF01122 Eukaryotic cobalamin-binding protein
+PF03186 CobD/Cbib protein
Pfam-B_2468 (release 6.5). This family includes CobD proteins from a number of bacteria, in Salmonella this protein is called Cbib. Salmonella CobD is a different protein . This protein is involved in cobalamin biosynthesis and is probably an enzyme responsible for the conversion of adenosylcobyric acid to adenosylcobinamide or adenosylcobinamide phosphate .. +PF02654 Cobalamin-5-phosphate synthase
This is family of Colbalmin-5-phosphate synthases, CobS, from bacteria. The CobS enzyme catalyses the synthesis of AdoCbl-5'-p from AdoCbi-GDP and alpha-ribazole-5'-P . This enzyme is involved in the cobalamin (vitamin B12) biosynthesis pathway in particular the nucleotide loop assembly stage in conjunction with CobC, CobU and CobT .. +PF02283 COBU;
Cobinamide kinase / cobinamide phosphate guanyltransferase. Pfam-B_7022 (release 5.2). This family is composed of a group of bifunctional cobalamin biosynthesis enzymes which display cobinamide kinase and cobinamide phosphate guanyltransferase activity. The crystal structure of the enzyme reveals the molecule to be a trimer with a propeller-like shape .. +PF00241 cofilin_ADF;
Cofilin/tropomyosin-type actin-binding protein. Severs actin filaments and binds to actin monomers.. +PF00963 Cohesin domain
Cohesin domains interact with a complementary domain, termed the dockerin domain. The cohesin-dockerin interaction is the crucial interaction for complex formation in the cellulosome .. +PF01410 Fibrillar collagen C-terminal domain
Ponting CP, Schultz J, Bork P. Pfam-B_464 (release 3.0). Found at C-termini of fibrillar collagens: Ephydatia muelleri procollagen EMF1 alpha, vertebrate collagens alpha(1)III, alpha(1)II, alpha(2)V etc.. +PF01024 Colicin pore forming domain
+PF03857 Colicin immunity protein
Colicin immunity proteins are plasmid-encoded proteins necessary for protecting the cell against colicins. Colicins are toxins released by bacteria during times of stress .. +PF01320 Colicin immunity protein / pyocin immunity protein
+PF02674 Colicin V production protein
Colicin V production protein is required in E. Coli for colicin V production from plasmid pColV-K30 . This protein is coded for in the purF operon. . +PF01114 Colipase, N-terminal domain
SCOP reports duplication of common fold with Colipase C-terminal domain.. +PF00325 crp;
Bacterial regulatory proteins, crp family. +PF02740 Colipase, C-terminal domain
SCOP reports duplication of common fold with Colipase N-terminal domain.. +PF03047 COMC family
Pfam-B_2107 (release 6.4). This family consists exclusively of streptococcal competence stimulating peptide precursors, which are generally up to 50 amino acid residues long. In all the members of this family, the leader sequence is cleaved after two conserved glycine residues; thus the leader sequence is of the double- glycine type . Competence stimulating peptides (CSP) are small (less than 25 amino acid residues) cationic peptides. The N-terminal amino acid residue is negatively charged, either glutamate or aspartate. The C-terminal end is positively charged. The third residue is also positively charged: a highly conserved arginine . A few COMC proteins and their precursors (not included in this family) do not fully follow the above description. In particular: the leader sequence in the CSP precursor from Streptococcus sanguis NCTC 7863 Swiss:O33758 is not of the double-glycine type; the CSP from Streptococcus gordonii NCTC 3165 Swiss:O33645 does not have a negatively charged N-terminus residue and has a lysine instead of arginine at the third position. Functionally, CSP act as pheromones, stimulating competence for genetic transformation in streptococci. In streptococci, the (CSP mediated) competence response requires exponential cell growth at a critical density, a relatively simple requirement when compared to the stationary-phase requirement of Haemophilus, or the late-logarithmic- phase of Bacillus . All bacteria induced to competence by a particular CSP are said to belong to the same pherotype, because each CSP is recognised by a specific receptor (the signalling domain of a histidine kinase ComD). Pherotypes are not necessarily species-specific. In addition, an organism may change pherotype. There are two possible mechanisms for pherotype switching: horizontal gene transfer, and accumulation of point mutations. The biological significance of pherotypes and pherotype switching is not definitively determined. Pherotype switching occurs frequently enough in naturally competent streptococci to suggest that it may be an important contributor to genetic exchange between different bacterial species . The family Antibacterial16, streptolysins from group A streptococci, has been merged into this family. . +PF02247 Large coat protein
Pfam-B_2294 (release 5.2). This family contains the large coat protein (LCP) of the comoviridae viral family.. +PF02248 Small coat protein
Pfam-B_2294 (release 5.2). This family contains the small coat protein (SCP) of the comoviridae viral family.. +PF01257 complex1_24kD; Complex1_24kDa;
Thioredoxin-like [2Fe-2S] ferredoxin. +PF00346 complex1_49Kd;
Respiratory-chain NADH dehydrogenase, 49 Kd subunit. +PF01512 Respiratory-chain NADH dehydrogenase 51 Kd subunit
Pfam-B_780 (release 4.0). +PF00668 DUF4;
Pfam-B_130 (release 2.1). This domain is found in many multi-domain enzymes which synthesise peptide antibiotics. This domain catalyses a condensation reaction to form peptide bonds in non- ribosomal peptide biosynthesis. It is usually found to the carboxy side of a phosphopantetheine binding domain (Pfam:PF00550). It has been shown that mutations in the HHXXXDG motif abolish activity suggesting this is part of the active site .. +PF00029 connexin;
+PF03508 Gap junction alpha-1 protein (Cx43)
+PF03509 Gap junction alpha-8 protein (Cx50)
+PF03601 Conserved hypothetical protein 698
TIGRFAMs, Griffiths-Jones SR. +PF03602 Conserved hypothetical protein 95
TIGRFAMs, Griffiths-Jones SR. +PF04234 CopC domain
CopC is a bacterial blue copper protein that binds 1 atom of copper per protein molecule. Along with CopA, CopC mediates copper resistance by sequestration of copper in the periplasm .. +PF00127 copper-bind;
Copper binding proteins, plastocyanin/azurin family. +PF00649 Copper fist DNA binding domain
+PF01218 Coproporphyrinogen III oxidase
+PF03232 Ubiquinone biosynthesis protein COQ7
Pfam-B_3545 (release 6.5). Members of this family contain two repeats of about 90 amino acids, that contains two conserved motifs. One of these DXEXXH may be part of an enzyme active site.. +PF04803 Cor1/Xlr/Xmr conserved region
Pfam-B_6320 (release 7.5). Cor1 is a component of the chromosome core in the meiotic prophase chromosomes . Xlr is a lymphoid cell specific protein . Xlm is abundantly transcribed in testis in a tissue-specific and developmentally regulated manner.\. The protein is located in the nuclei of spermatocytes, early in the prophase of the first meiotic division, and later becomes concentrated in the XY nuclear subregion where it is in particular associated with the axes of sex chromosomes .. +PF01544 CorA-like Mg2+ transporter protein
Pfam-B_944 (release 4.0) & Pfam-B_3206 (release 7.5). The CorA transport system is the primary Mg2+ influx system of Salmonella typhimurium and Escherichia coli. CorA is virtually ubiquitous in the Bacteria and Archaea. There are also eukaryotic relatives of this protein. The family includes the MRS2 protein Swiss:Q01926 from yeast that is thought to be an RNA splicing protein . However its membership of this family suggests that its effect on splicing is due to altered magnesium levels in the cell.. +PF03311 Cornichon protein
Pfam-B_3813 (release 6.5). +PF04694 Coronavirus ORF3 protein
Pfam-B_5763 (release 7.5). +PF03262 Coronavirus 6B/7B protein
Pfam-B_4476 (release 6.5). +PF02398 Coronavirus protein 7
Pfam-B_1574 (release 5.4). This is a family of proteins from coronavirus which may function in viral assembly. . +PF03187 Corona nucleocapsid I protein
Pfam-B_2926 (release 6.5). +PF01635 Coronavirus M matrix/glycoprotein
Pfam-B_845 (release 4.1). This family consists of various coronavirus matrix proteins which are transmembrane glycoproteins. The M protein or E1 glycoprotein is The coronavirus M protein is implicated in virus assembly . The E1 viral membrane protein is required for formation of the viral envelope and is transported via the Golgi complex .. +PF04753 Coronavirus non-structural protein NS2
Pfam-B_3747 (release 7.5). +PF05213 Coronavirus NS2A protein
Pfam-B_6568 (release 7.7). This family contains a number of corona virus non-structural proteins of unknown function. The family also includes a polymerase protein fragment from Berne virus and does not seem to be related to the Pfam:PF04753 Coronavirus NS2 family. This family is part of the 2H phosphoesterase superfamily .. +PF03053 ORF3b coronavirus protein
Pfam-B_2130 (release 6.4). Members of this family are non-structural proteins, approximately 250 amino acid residues long. They are found in transmissible gastroenteritis coronavirus (TGEV) and porcine respiratory coronavirus (PRCV) isolates. These proteins are found on the same mRNA as another product, designated ORF3a. While ORF3a/b has been implicated in TGEV and PRCV pathogenesis, its precise role remains unclear (see [2,3]).. +PF03905 Coronavirus_NS4;
Coronavirus non-structural protein NS4. +PF00937 Coronavirus nucleocapsid protein
Pfam-B_267 (release 3.0). +PF01600 Coronavirus S1 glycoprotein
The coronavirus spike glycoprotein forms the characteristic 'corona' after which the group is named. The Spike glycoprotein is translated as a large polypeptide that is subsequently cleaved to S1 and S2 Pfam:PF01601 .. +PF01601 Coronavirus S2 glycoprotein
The coronavirus spike glycoprotein forms the characteristic 'corona' after which the group is named. The Spike glycoprotein is translated as a large polypeptide that is subsequently cleaved to S1 Pfam:PF01600 and S2 .. +PF00115 Cytochrome C and Quinol oxidase polypeptide I
Pfam-B_23 (release 1.0) and Prosite. +PF05051 Cytochrome C oxidase copper chaperone (COX17)
Moxon SJ, Mistry J, Wood V. Pfam-B_5838 (release 7.7). Cox17 is essential for the assembly of functional cytochrome c oxidase (CCO) and for delivery of copper ions to the mitochondrion for insertion into the enzyme in yeast . The structure of Cox17 shows the protein to have an unstructured N-terminal region followed by two helices and several unstructured C-terminal residues. The Cu(I) binding site has been modelled as two-coordinate with ligation by conserved residues Cys23 and Cys26.. +PF00431 CUB domain
Pfam-B_136 (release 1.0). +PF00116 Cytochrome C oxidase subunit II, periplasmic domain
+PF02790 Cytochrome C oxidase subunit II, transmembrane domain
The N-terminal domain of cytochrome C oxidase contains two transmembrane alpha-helices.. +PF00510 Cytochrome c oxidase subunit III
Pfam-B_78 (release 1.0). +PF02284 Cytochrome c oxidase subunit Va
Pfam-B_7466 (release 5.2). Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit Va.. +PF01215 Cytochrome c oxidase subunit Vb
+PF02046 Cytochrome c oxidase subunit VIa
+PF02297 Cytochrome oxidase c subunit VIb
Pfam-B_9188 (release 5.2). Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of the potentially heme-binding subunit IVb of the oxidase.. +PF02238 Cytochrome c oxidase subunit VIIa
Pfam-B_3023 (release 5.2). Cytochrome c oxidase, a 13 sub-unit complex, is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of the heart and liver isoforms of cytochrome c oxidase subunit VIIa.. +PF02285 Cytochrome oxidase c subunit VIII
Pfam-B_6423 (release 5.2). Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIII.. +PF02672 CP12 domain
+PF01383 CpcD/allophycocyanin linker domain
Pfam-B_887 (release 3.0). +PF00166 cpn10;
Chaperonin 10 Kd subunit. This family contains GroES and Gp31-like chaperonins. Gp31 is a functional co-chaperonin that is required for the folding and assembly of Gp23, a major capsid protein, during phage morphogenesis . . +PF05205 Cps15;
COMPASS (Complex proteins associated with Set1p) component shg1. The Shg1 subunit is one of the eight subunits of the COMPASS complex, complex associated with SET1, conserved in yeasts and in other eukaryotes up to humans. It is associated with the region of the Set1 protein that is N-terminal to the C-terminus, ie Set1-560-900. The function of Shg1 seems to be to slightly inhibit histone 3 lysine 4 (H3K4) di- and tri-methylation, and it is a pioneer protein. The COMPASS complex functions to methylate the fourth lysine of Histone 3 and for silencing of genes close to the telomeres of chromosomes .. +PF00289 CPSase;
Carbamoyl-phosphate synthase L chain, N-terminal domain. Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines . The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00988. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117.. +PF02786 CPSase;
Carbamoyl-phosphate synthase L chain, ATP binding domain. Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines . The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00988. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117. The ATP binding domain (this one) has an ATP-grasp fold.. +PF00650 CRAL/TRIO domain
+PF03765 CRAL/TRIO, N-terminal domain
This all-alpha domain is found to the N-terminus of Pfam:PF00650.. +PF02537 CrcB-like protein
CRCB is a putative integral membrane protein possibly involved in chromosome condensation. Over expression in E. coli also leads to camphor resistance . . +PF01321 Creatinase/Prolidase N-terminal domain
This family includes the N-terminal non-catalytic domains from creatinase and prolidase. The exact function of this domain is uncertain.. +PF00030 crystall;
Beta/Gamma crystallin. Swissprot_feature_table. The alignment comprises two Greek key motifs since the similarity between them is very low.. +PF02633 Creatinine amidohydrolase
Creatinine amidohydrolase (EC:3.5.2.10), or creatininase, catalyses the hydrolysis of creatinine to creatine . . +PF03858 Crustacean neurohormone H
These proteins are referred to as precursor-related peptides as they are typically co-transcribed and translated with the CHH neurohormone (Pfam:PF01147). However, in some species this neuropeptide is synthesised as a separate protein. Furthermore, neurohormone H can undergo proteolysis to give rise to 5 different neuropeptides . . +PF01147 Crustacean CHH/MIH/GIH neurohormone family
+PF00525 crystallin;
Alpha crystallin A chain, N terminal. Pfam-B_97 (release 1.0). +PF03783 Curli production assembly/transport component CsgG
CsgG is an outer membrane-located lipoprotein that is highly resistant to protease digestion. During curli assembly, an adhesive surface fibre, CsgG is required to maintain the stability of CsgA and CsgB .. +PF02599 Global regulator protein family
This is a family of global regulator proteins. This protein is a RNA-binding protein and a global regulator of carbohydrate metabolism genes facilitating mRNA decay . In E. coli CsrA binds the CsrB RNA molecule to form the Csr regulatory system which has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis . In other bacteria such as Erwinia caratovara RmsA has been shown to regulate the production of virulence determinants, such extracellular enzymes . RmsA binds to RmsB regulatory RNA.. +PF02554 Carbon starvation protein CstA
This family consists of Carbon starvation protein CstA a predicted membrane protein.\. It has been suggested that CstA is involved in peptide utilisation .. +PF00859 CTF/NF-I family transcription modulation region
Pfam-B_362 (release 3.0). +PF01467 Cytidylyltransf;
Cytidylyltransferase. This family includes: Cholinephosphate cytidylyltransferase Swiss:P49585. Glycerol-3-phosphate cytidylyltransferase Swiss:P27623.. +PF02348 Cytidylyl_trans;
Cytidylyltransferase. Pfam-B_886 (release 5.2). This family consists of two main Cytidylyltransferase activities: 1) 3-deoxy-manno-octulosonate cytidylyltransferase, , EC:2.7.7.38 catalysing the reaction:- CTP + 3-deoxy-D-manno-octulosonate <=> diphosphate + CMP-3-deoxy-D-manno-octulosonate, 2) acylneuraminate cytidylyltransferase EC:2.7.7.43, [1,2], catalysing the reaction:- CTP + N-acylneuraminate <=> diphosphate + CMP-N-acylneuraminate. NeuAc cytydilyltransferase of Mannheimia haemolytica has been characterised describing kinetics and regulation by substrate charge, energetic charge and amino-sugar demand .. +PF04808 Citrus tristeza virus (CTV) P23 protein
Pfam-B_2595 (release 7.6). This family consists of protein P23 from the citrus tristeza virus, which is a member of the Closteroviridae.\. CTV viruses produce more positive than negative RNA strands, and P23 controls this asymmetrical RNA accumulation. Amino acids 42-180 are essential for function and are thought to contain RNA-binding and zinc finger domains .. +PF01179 Copper amine oxidase, enzyme domain
Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ). This family corresponds to the catalytic domain of the enzyme. . +PF02727 Copper amine oxidase, N2 domain
This domain is the first or second structural domain in copper amine oxidases, it is known as the N2 domain. Its function is uncertain. The catalytic domain can be found in Pfam:PF01179. Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ).. +PF02728 Copper amine oxidase, N3 domain
This domain is the second or third structural domain in copper amine oxidases, it is known as the N3 domain. Its function is uncertain. The catalytic domain can be found in Pfam:PF01179. Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ).. +PF02298 Plastocyanin-like domain
Pfam-B_398 (release 5.2). This family represents a domain found in flowering plants related to the copper binding protein plastocyanin. Some members of this family (eg Swiss:P93328) may not bind copper due to the lack of key residues.. +PF03263 Cucumovirus protein 2B
Pfam-B_4373 (release 6.5). This protein may be a viral movement protein.. +PF00760 Cucumovirus coat protein
Pfam-B_867 (release 2.1). +PF02376 CUT domain
Pfam-B_770 (release 5.2). The CUT domain is a DNA-binding motif which can bind independently or in cooperation with the homeodomain, often found downstream of the CUT domain. Multiple copies of the CUT domain can exist in one protein (eg Swiss:P10180).. +PF00888 Cullin family
Pfam-B_1149 (release 3.0). +PF03091 CutA1 divalent ion tolerance protein
Pfam-B_2307 (release 6.4). Several gene loci with a possible involvement in cellular tolerance to copper have been identified . One such locus in eubacteria and archaebacteria, cutA, is thought to be involved in cellular tolerance to a wide variety of divalent cations other than copper. The cutA locus consists of two operons, of one and two genes. The CutA1 protein is a cytoplasmic protein, encoded by the single-gene operon and has been linked to divalent cation tolerance. It has no recognised structural motifs . This family also contains putative proteins from eukaryotes (human and Drosophila).. +PF03932 CutC family
Copper transport in Escherichia coli is mediated by the products of at least six genes, cutA, cutB, cutC, cutD, cutE, and cutF. A mutation in one or more of these genes results in an increased copper sensitivity. Members of this family are between 200 and 300 amino acids in length are found in both eukaryotes and bacteria.. +PF01083 Cutinase
+PF01473 Putative cell wall binding repeat
These repeats are characterised by conserved aromatic residues and glycines are found in multiple tandem copies in a number of proteins. The CW repeat is 20 amino acid residues long. The exact domain boundaries may not be correct. It has been suggested that these repeats in Swiss:P15057 might be responsible for the specific recognition of choline-containing cell walls . Similar but longer repeats are found in the glucosyltransferases and glucan-binding proteins of oral streptococci and shown to be involved in glucan binding as well as in the related dextransucrases of Leuconostoc mesenteroides. Repeats also occur in toxins of Clostridium difficile and other clostridia, though the ligands are not always known.. +PF04122 Putative cell wall binding repeat 2
This repeat is found in multiple tandem copies in proteins including amidase enhancers and adhesins .. +PF03638 CXC;
Tesmin/TSO1-like CXC domain, cysteine-rich domain. Pfam-B_1144 (release 7.0). This family includes proteins that have two copies of a cysteine rich motif as follows: C-X-C-X4-C-X3-YC-X-C-X6-C-X3-C-X-C-X2-C. The family includes Tesmin Swiss:Q9Y4I5 and TSO1 Swiss:Q9LE32 . This family is called a CXC domain in .. +PF03128 CXCXC repeat
Pfam-B_252 (release 6.5). This repeat contains the conserved pattern CXCXC where X can be any amino acid. The repeat is found in up to five copies in Vascular endothelial growth factor C . In the salivary glands of the dipteran Chironomus tentans, a specific messenger ribonucleoprotein (mRNP) particle, the Balbiani ring (BR) granule, can be visualised during its assembly on the gene and during its nucleocytoplasmic transport. This repeat is found over 70 copies in the balbiani ring protein 3 Swiss:Q03376. It is also found in some silk proteins .. +PF02560 Cyanate lyase C-terminal domain
Cyanate lyase (also known as cyanase) EC:4.2.1.104 is responsible for the hydrolysis of cyanate, allowing organisms that possess the enzyme to overcome the toxicity of environmental cyanate. This enzyme is composed of two domains, an N-terminal helix-turn-helix and this structurally unique C-terminal domain .. +PF04199 Putative cyclase
Pfam-B_1440 (release 7.3). Proteins in this family are thought to be cyclase enzymes. They are found in proteins involved in antibiotic synthesis. However they are also found in organisms that do not make antibiotics pointing to a wider role for these proteins. The proteins contain a conserved motif HXGTHXDXPXH that is likely to form part of the active site.. +PF02984 cyclin_C;
Cyclin, C-terminal domain. Cyclins regulate cyclin dependent kinases (CDKs). Swiss:P22674 is a Uracil-DNA glycosylase that is related to other cyclins . Cyclins contain two domains of similar all-alpha fold, of which this family corresponds with the C-terminal domain.. +PF03784 Cyclotide family
This family contains a set of cyclic peptides with a variety of activities. The structure consists of a distorted triple-stranded beta-sheet and a cysteine-knot arrangement of the disulfide bonds . Cyclotides can be separated into two subfamilies, namely bracelet and moebius. The bracelet cyclotide subfamily tends to contain a larger number of positively charged residues and has a bracelet-like circularisation of the backbone . The moebius cyclotide subfamily contains a backbone twist due to a cis-Pro peptide bond and may conceptually be regarded as a molecular Moebius strip .. +PF00548 Cys-protease-3C;
3C cysteine protease (picornain 3C). Picornaviral proteins are expressed as a single polyprotein which is cleaved by the viral 3C cysteine protease.. +PF00007 Cystine-knot domain
Published_alignment enriched with PDOC00234 members.. The family comprises glycoprotein hormones and the C-terminal domain of various extracellular proteins. It is believed to be involved in disulfide-linked dimerisation.. +PF01053 Cys/Met metabolism PLP-dependent enzyme
Pfam-B_366 (release 3.0). This family includes enzymes involved in cysteine and methionine metabolism. The following are members: Cystathionine gamma-lyase, Cystathionine gamma-synthase, Cystathionine beta-lyase, Methionine gamma-lyase, OAH/OAS sulfhydrylase, O-succinylhomoserine sulfhydrylase All of these members participate is slightly different reactions. All these enzymes use PLP (pyridoxal-5'-phosphate) as a cofactor.. +PF00839 cys_rich_FGFR;
Cysteine rich repeat. Pfam-B_297 (release 3.0). This cysteine rich repeat contains four cysteines. It is found in multiple copies in a protein that binds to fibroblast growth factors . The repeat is also found in MG160 and E-selectin ligand (ESL-1).. +PF00031 cystatin;
Very diverse family. Attempts to define separate sub-families failed. Typically, either the N-terminal or C-terminal end is very divergent. But splitting into two domains would make very short families. All members except Swiss:Q03196 and Swiss:Q10993 are found. Pfam:PF00666 are related to this family but have not been included.. +PF01578 CytC_asm;
Cytochrome C assembly protein. Pfam-B_114 (release 4.1) Pfam-B_8014 (Release 8.0). This family consists of various proteins involved in cytochrome c assembly from mitochondria and bacteria; CycK from Rhizobium , CcmC from E. coli and Paracoccus denitrificans [2,1] and orf240 from wheat mitochondria . The members of this family are probably integral membrane proteins with six predicted transmembrane helices. It has been proposed that members of this family comprise a membrane component of an ABC (ATP binding cassette) transporter complex. It is also proposed that this transporter is necessary for transport of some component needed for cytochrome c assembly. One member CycK contains a putative heme-binding motif , orf240 also contains a putative heme-binding motif and is a proposed ABC transporter with c-type heme as its proposed substrate . However it seems unlikely that all members of this family transport heme nor c-type apocytochromes because CcmC in the putative CcmABC transporter transports neither .. +PF02224 Cytidylate kinase
Pfam-B_1582 (release 5.2). Cytidylate kinase EC:2.7.4.14 catalyses the phosphorylation of cytidine 5'-monophosphate (dCMP) to cytidine 5'-diphosphate (dCDP) in the presence of ATP or GTP.. +PF01265 Cytochrome c/c1 heme lyase
+PF02322 Cytochrome oxidase subunit II
Pfam-B_997 (release 5.2). This Family consists of cytochrome bd type terminal oxidases that catalyses Quinol dependent, Na+ independent oxygen uptake . Members of this family are integral membrane proteins andi contain a protohaem IX centre B558. One member of the family Swiss:O05192 is implicated in having an important role in micro-aerobic nitrogen fixation in the enteric bacterium Klebsiella pneumoniae .. +PF00283 cytochr_b559;
Cytochrome b559, alpha (gene psbE) and beta (gene psbF)subunits. +PF00284 cytochr_b559a;
Lumenal portion of Cytochrome b559, alpha (gene psbE) subunit. This family is the lumenal portion of cytochrome b559 alpha chain, matches to this family should be accompanied by a match to the Pfam:PF00283 family also. The Prosite pattern pattern matches the transmembrane region of the cytochrome b559 alpha and beta subunits.. +PF02335 cytochr_c552;
Pfam-B_19175 (release 5.2). Cytochrome c552 (cytochrome c nitrite reductase) is a crucial enzyme in the nitrogen cycle catalysing the reduction of nitrite to ammonia. The crystal structure of cytochrome c552 reveals it to be a dimer, with with 10 close-packed type c haem groups.. +PF03188 Cytochrome_B561;
Eukaryotic cytochrome b561. Pfam-B_2927 (release 6.5) & Pfam-B_7165 (Release 8.0). Cytochrome b561 is a secretory vesicle-specific electron transport protein. It is an integral membrane protein, that binds two heme groups non-covalently. This is a eukaryotic family. Members of the 'prokaryotic cytochrome b561' family can be found in Pfam: PF01292.. +PF00032 cytochrome_b_C;
Cytochrome b(C-terminal)/b6/petD. +PF00033 cytochrome_b_N;
Cytochrome b(N-terminal)/b6/petB. +PF00034 cytochrome_c;
The Pfam entry does not include all Prosite members. The cytochrome 556 and cytochrome c' families are not included. All these are now in a new clan together. The C-terminus of DUF989, Pfam:PF06181, has now been merged into this family.. +PF02167 Cytochrome_C1;
Cytochrome C1 family. +PF01322 Cytochrome_C_2;
+PF02085 Cytochrome_CIII;
Class III cytochrome C family. +PF03264 Cytochrome_NNT;
NapC/NirT cytochrome c family, N-terminal region. Pfam-B_1404 (release 6.5). Within the NapC/NirT family of cytochrome c proteins, some members, such as NapC Swiss:P33932 and NirT Swiss:P24038, bind four haem groups, while others, such as TorC Swiss:P33226, bind five haems. This family aligns the common N-terminal region that contains four haem-binding C-X(2)-CH motifs.. +PF01801 Cytomegalo_gL;
Cytomegalovirus glycoprotein L . Pfam-B_1420 (release 4.2). Glycoprotein L from cytomegalovirus serves a chaperone for the correct folding and surface expression of glycoprotein H (gH) . Glycoprotein L is a member of the heterotrimeric gCIII complex of glycoprotein which also includes gH and gO and has an essential role in viral fusion .. +PF02239 D1_heme;
Cytochrome D1 heme domain. Pfam-B_3322 (release 5.2). Cytochrome cd1 (nitrite reductase) catalyses the conversion of nitrite to nitric oxide in the nitrogen cycle. This family represents the d1 heme binding domain of cytochrome cd1, in which His/Tyr side chains ligate the d1 heme iron of the active site in the oxidised state .. +PF02109 DAD family
Members of this family are thought to be integral membrane proteins. Some members of this family have been shown to cause apoptosis if mutated , these proteins are known as DAD for defender against death. The family also includes the epsilon subunit of the oligosaccharyltransferase that is involved in N-linked glycosylation .. +PF00130 DAG_PE-bind; C1;
Phorbol esters/diacylglycerol binding domain (C1 domain). This domain is also known as the Protein kinase C conserved region 1 (C1) domain.. +PF03982 Diacylglycerol acyltransferase
Pfam-B_11378 (release 7.2). The terminal step of triacylglycerol (TAG) formation is catalysed by the enzyme diacylglycerol acyltransferase (DAGAT) [1,2].. +PF01219 Prokaryotic diacylglycerol kinase
+PF00609 DAGKa;
Diacylglycerol kinase accessory domain. Ponting C, Schultz J, Bork P. Diacylglycerol (DAG) is a second messenger that acts as a protein kinase C activator. This domain is assumed to be an accessory domain: its function is unknown.. +PF00781 DAGKc;
Diacylglycerol kinase catalytic domain. Alignment kindly provided by SMART. Diacylglycerol (DAG) is a second messenger that acts as a protein kinase C activator. The catalytic domain is assumed from the finding of bacterial homologues. YegS is the Escherichia coli protein in this family whose crystal structure reveals an active site in the inter-domain cleft formed by four conserved sequence motifs, revealing a novel metal-binding site. The residues of this site are conserved across the family .. +PF00793 DAHP_synthetase;
DAHP synthetase I family. Pfam-B_1032 (release 2.1). Members of this family catalyse the first step in aromatic amino acid biosynthesis from chorismate. E-coli has three related synthetases, which are inhibited by different aromatic amino acids. This family also includes KDSA which has very similar catalytic activity but is involved in the first step of liposaccharide biosynthesis. The enzyme is also part of the shikimate pathway, EC:2.5.1.54.. +PF01474 Class-II DAHP synthetase family
Prodom_1974 (release 99.1). Members of this family are aldolase enzymes that catalyse the first step of the shikimate pathway.. +PF02733 Dak1 domain
This is the kinase domain of the dihydroxyacetone kinase family EC:2.7.1.29. . +PF02734 DAK2 domain
This domain is the predicted phosphatase domain of the dihydroxyacetone kinase family.. +PF03045 DAN domain
Pfam-B_1968 (release 6.4). This domain contains 9 conserved cysteines and is extracellular. Therefore the cysteines may form disulphide bridges. This family of proteins has been termed the DAN family after the first member to be reported. This family includes DAN, Cerberus and Gremlin. The gremlin protein is an antagonist of bone morphogenetic protein signaling. It is postulated that all members of this family antagonise different TGF beta Pfam:PF00019 ligands . Recent work shows that the DAN protein is not an efficient antagonist of BMP-2/4 class signals, we found that DAN was able to interact with GDF-5 in a frog embryo assay, suggesting that DAN may regulate signaling by the GDF-5/6/7 class of BMPs in vivo .. +PF01266 FAD dependent oxidoreductase
This family includes various FAD dependent oxidoreductases: Glycerol-3-phosphate dehydrogenase EC:1.1.99.5, Sarcosine oxidase beta subunit EC:1.5.3.1, D-alanine oxidase EC:1.4.99.1, D-aspartate oxidase EC:1.4.3.1.. +PF01678 Diaminopimelate epimerase
Pfam-B_2089 (release 4.1). Diaminopimelate epimerase contains two domains of the same alpha/beta fold, both contained in this family.. +PF05173 Dihydrodipicolinate reductase, C-terminus
Dihydrodipicolinate reductase (DapB) reduces the alpha,beta-unsaturated cyclic imine, dihydro-dipicolinate. This reaction is the second committed step in the biosynthesis of L-lysine and its precursor meso-diaminopimelate, which are critical for both protein and cell wall biosynthesis. The C-terminal domain of DapB has been proposed to be the substrate- binding domain.. +PF03344 Daxx Family
Pfam-B_3933 (release 6.5). The Daxx protein (also known as the Fas-binding protein) is thought to play a role in apoptosis, but precise role played by Daxx remains to be determined. Daxx forms a complex with Axin. . +PF02277 Phosphoribosyltransferase
Pfam-B_5739 (release 5.2). This family of proteins represent the nicotinate-nucleotide- dimethylbenzimidazole phosphoribosyltransferase (NN:DBI PRT) enzymes involved in dimethylbenzimidazole synthesis. This function is essential to de novo cobalamin (vitamin B12) production in bacteria. Nicotinate mononucleotide (NaMN):5,6-dimethylbenzimidazole (DMB) phosphoribosyltransferase (CobT) from Salmonella enterica plays a central role in the synthesis of alpha-ribazole-5'-phosphate, an intermediate for the lower ligand of cobalamin .. +PF03880 YxiN_DEAD;
DbpA RNA binding domain . This RNA binding domain is found at the C-terminus of a number of DEAD helicase proteins . It is sufficient to confer specificity for hairpin 92 of 23S rRNA, which is part of the ribosomal A-site. However, several members of this family lack specificity for 23S rRNA. These can proteins can generally be distinguished by a basic region that extends beyond this domain [Karl Kossen, unpublished data].. +PF04290 Tripartite ATP-independent periplasmic transporters, DctQ component
The function of the members of this family is unknown, but DctQ homologues are invariably found in the tripartite ATP-independent periplasmic transporters . . +PF03605 Dcu;
Anaerobic c4-dicarboxylate membrane transporter. TIGRFAMs, Griffiths-Jones SR. +PF03606 C4-dicarboxylate anaerobic carrier
TIGRFAMs, Griffiths-Jones SR. +PF03184 CENP-B; DDE;
DDE superfamily endonuclease. Pfam-B_2254 (release 6.5). This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. Interestingly this family also includes the CENP-B protein. This domain in that protein appears to have lost the metal binding residues and is unlikely to have endonuclease activity. Centromere Protein B (CENP-B) is a DNA-binding protein localised to the centromere.. +PF02862 DDHD domain
The DDHD domain is 180 residues long and contains four conserved residues that may form a metal binding site. The domain is named after these four residues. This pattern of conservation of metal binding residues is often seen in phosphoesterase domains. This domain is found in retinal degeneration B proteins, as well as a family of probable phospholipases. It has been shown that this domain is found in a longer C terminal region that binds to PYK2 tyrosine kinase. These proteins have been called N-terminal domain-interacting receptor (Nir1, Nir2 and Nir3) . This suggests that this region is involved in functionally important interactions in other members of this family.. +PF03345 Oligosaccharyltransferase 48 kDa subunit beta
Pfam-B_3520 (release 6.5). Members of this family are involved in asparagine-linked protein glycosylation. In particular, dolichyl-diphosphooligosaccharide-protein glycosyltransferase (DDOST), also known as oligosaccharyltransferase EC:2.4.1.119, transfers the high-mannose sugar GlcNAc(2)-Man(9)-Glc(3) from a dolichol-linked donor to an asparagine acceptor in a consensus Asn-X-Ser/Thr motif. In most eukaryotes, the DDOST complex is composed of three subunits, which in humans are described as a 48kD subunit, ribophorin I, and ribophorin II. However, the yeast DDOST appears to consist of six subunits (alpha, beta, gamma, delta, epsilon, zeta). The yeast beta subunit is a 45kD polypeptide, previously discovered as the Wbp1 protein, with known sequence similarity to the human 48kD subunit and the other orthologues. This family includes the 48kD-like subunits from several eukaryotes; it also includes the yeast DDOST beta subunit Wbp1.. +PF04625 DEC-1 protein, N-terminal region
The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing . Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa).. +PF04624 DEC-1_REPEAT;
The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing . Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). This repeat is usually found in 12 copies in the central region of the protein. Its function is unknown. Length polymorphisms of Dec-1 have been observed in wild-type strains, and are caused by changes in the numbers of the first five repeats .. +PF02352 Decorin binding protein
Pfam-B_800 (release 5.2). This family consists of decorin binding proteins from Borrelia. The decorin binding protein of Borrelia burgdorferi the lyme disease spirochetes adheres to the proteoglycan decorin found on collagen fibres .. +PF01335 Death effector domain
+PF00711 Beta defensin
Pfam-B_675 (release 2.1). The beta defensins are antimicrobial peptides implicated in the resistance of epithelial surfaces to microbial colonisation .. +PF00879 Defensin propeptide
Pfam-B_517 (release 3.0). +PF00323 defensins;
+PF01041 DegT_DnrJ_EryC1_fam;
DegT/DnrJ/EryC1/StrS aminotransferase family. Pfam-B_239 (release 3.0). The members of this family are probably all pyridoxal-phosphate-dependent aminotransferase enzymes with a variety of molecular functions. The family includes StsA Swiss:P72454, StsC Swiss:P77952 and StsS . The aminotransferase activity was demonstrated for purified StsC protein as the L-glutamine:scyllo-inosose aminotransferase EC:2.6.1.50, which catalyses the first amino transfer in the biosynthesis of the streptidine subunit of streptomycin .. +PF02286 Dehydratase large subunit
Pfam-B_7927 (release 5.2). This family contains the large subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances.. +PF02288 Dehydratase medium subunit
Pfam-B_7081 (release 5.2). This family contains the medium subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances.. +PF02287 Dehydratase small subunit
Pfam-B_6588 (release 5.2). This family contains the small subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances.. +PF00257 dehydrin;
Prosite & Pfam-B_3306 (Release 7.5). +PF02336 denso_VP4;
Pfam-B_19701 (release 5.2). Four different translation initiation sites of the densovirus capsid protein mRNA give rise to four viral proteins, VP1 to VP4. This family represents VP4.. +PF01791 DeoC/LacD family aldolase
This family includes diverse aldolase enzymes. This family includes the enzyme deoxyribose-phosphate aldolase EC:4.1.2.4, which is involved in nucleotide metabolism. The family also includes a group of related bacterial proteins of unknown function, see examples Swiss:Q57843 and Swiss:P76143. The family also includes tagatose 1,6-diphosphate aldolase (EC:4.1.2.40) is part of the tagatose-6-phosphate pathway of galactose-6-phosphate degradation .. +PF00455 deoR; DeoR;
DeoR C terminal sensor domain. The sensor domains of the DeoR are catalytically inactive versions of the ISOCOT fold, but retain the substrate binding site . DeorC senses diverse sugar derivatives such as deoxyribose nucleoside (DeoR), tagatose phosphate (LacR), galactosamine (AgaR), myo-inositol (Bacillus IolR) and L-ascorbate (UlaR) , , .. +PF04511 Der1-like family
Pfam-B_1901 (release 7.5). +PF01880 Desulfoferrodoxin
Desulfoferrodoxins contains two types of iron: an Fe-S4 site very similar to that found in desulforedoxin from Desulfovibrio gigas and an octahedral coordinated high-spin ferrous site most probably with nitrogen/oxygen-containing ligands. Due to this rather unusual combination of active centres, this novel protein is named desulfoferrodoxin .. +PF04598 DFNA5;
Pfam-B_5153 (release 7.5). The precise function of this protein is unknown. A deletion/insertion mutation is associated with an autosomal dominant non-syndromic hearing impairment form . In addition, this protein has also been found to contribute to acquired etoposide resistance in melanoma cells . This family also includes the gasdermin protein . +PF04127 dfp;
DNA / pantothenate metabolism flavoprotein. Pfam-B_6559 (release 7.3);. The DNA/pantothenate metabolism flavoprotein (EC:4.1.1.36) affects synthesis of DNA, and pantothenate metabolism.. +PF05035 2-keto-3-deoxy-galactonokinase
2-keto-3-deoxy-galactonokinase EC:2.7.1.58 catalyses the second step in D-galactonate degradation.. +PF00926 3,4-dihydroxy-2-butanone 4-phosphate synthase
Pfam-B_1148 (release 3.0). +PF00701 Dihydrodipicolinate synthetase family
Pfam-B_557 (release 2.1). This family has a TIM barrel structure.. +PF01368 DHH family
Pfam-B_1245 (release 3.0). It is predicted that this family of proteins all perform a phosphoesterase function. It included the single stranded DNA exonuclease RecJ.. +PF02833 DHHA2 domain
This domain is often found adjacent to the DHH domain Pfam:PF01368 and is called DHHA2 for DHH associated domain. This domain is diagnostic of DHH subfamily 2 members . The domain is about 120 residues long and contains a conserved DXK motif at its amino terminus.. +PF01180 DHOdehase;
Dihydroorotate dehydrogenase. +PF01761 3-dehydroquinate synthase
Pfam-B_1327 (release 4.2). The 3-dehydroquinate synthase EC:4.6.1.3 domain is present in isolation in various bacterial 3-dehydroquinate synthases and also present as a domain in the pentafunctional AROM polypeptide Swiss:P07547 . 3-dehydroquinate (DHQ) synthase catalyses the formation of dehydroquinate (DHQ) and orthophosphate from 3-deoxy-D-arabino heptulosonic 7 phosphate . This reaction is part of the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. . +PF01487 Type I 3-dehydroquinase
Pfam-B_2492 (release 4.0). Type I 3-dehydroquinase, (3-dehydroquinate dehydratase or DHQase.) Catalyses the cis-dehydration of 3-dehydroquinate via a covalent imine intermediate giving dehydroshikimate. Dehydroquinase functions in the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. Type II 3-dehydroquinase catalyses the trans-dehydration of 3-dehydroshikimate see Pfam:PF01220.. +PF01220 Dehydroquinase class II
+PF04706 dickkopf_N;
Dickkopf N-terminal cysteine-rich region. Pfam-B_5838 (release 7.5). Dickkopf proteins are a class of Wnt antagonists. They possess two conserved cysteine-rich regions. This family represents the N-terminal one . The C-terminal region has been found to share significant sequence similarity to the colipase fold, Pfam:PF01114, Pfam:PF02740 .. +PF05086 Dict_REP;
Dictyostelium (Slime Mold) REP protein. Pfam-B_6278 (release 7.7). This family consists of REP proteins from Dictyostelium (Slime molds). REP protein is likely involved in transcription regulation and control of DNA replication, specifically amplification of plasmid at low copy numbers. The formation of homomultimers may be required for their regulatory activity .. +PF04562 Dict_spore_N;
Dictyostelium spore coat protein, N terminus. The Dictyostelium spore coat is a polarised extracellular matrix composed of glycoproteins and cellulose. Four of the major coat glycoproteins exist as a multi-protein complex within the prespore vesicles before secretion. Of these, SP96 and SP70 are members of this family. The presence of SP96 and SP70 in the complex is necessary for the cellulose binding activity of the complex, which is in turn necessary for normal spore coat assembly . The function of this region of these proteins is not known.. +PF00186 DiHfolate_red;
Dihydrofolate reductase. +PF02966 Mitosis protein DIM1
+PF05163 DinB family
DNA damage-inducible (din) genes in Bacillus subtilis are coordinately regulated and together compose a global regulatory network that has been termed the SOS-like or SOB regulon. This family includes DinB from B. subtilis .. +PF00775 Dioxygenase;
Pfam-B_1018 (release 2.1). +PF04444 Catechol dioxygenase N terminus
This family consists of the N termini of catechol, chlorocatechol or hydroxyquinol 1,2-dioxygenase proteins. This region is always found adjacent to the dioxygenase domain (Pfam:PF00775).. +PF01866 Putative diphthamide synthesis protein
Swiss:Q16439 is a candidate tumour suppressor gene . DPH2 from yeast Swiss:P32461 , which confers resistance to diphtheria toxin has been found to be involved in diphthamide synthesis. Diphtheria toxin inhibits eukaryotic protein synthesis by ADP-ribosylating diphthamide, a posttranslationally modified histidine residue present in EF2. The exact function of the members of this family is unknown.. +PF02763 Diphtheria toxin, C domain
N-terminal catalytic (C) domain - blocks protein synthesis by transfer of ADP-ribose from NAD to a diphthamide residue of EF-2.. +PF01324 Diphtheria_tox;
Diphtheria toxin, R domain. C-terminal receptor binding (R) domain - binds to cell surface receptor, permitting the toxin to enter the cell by receptor mediated endocytosis.. +PF02764 Diphtheria toxin, T domain
Central domain of diphtheria toxin is the translocation (T) domain. pH induced conformational change in this domain triggers insertion into the endosomal membrane and facilitates the transfer of the catalytic domain into the cytoplasm.. +PF00200 disintegrin;
+PF05141 Pyoverdine/dityrosine biosynthesis protein
DIT1 is involved in synthesising dityrosine . Dityrosine is a sporulation-specific component of the yeast ascospore wall that is essential for the resistance of the spores to adverse environmental conditions. Pyoverdine biosynthesis protein PvcA is involved in the biosynthesis of pyoverdine, a cyclized isocyano derivative of tyrosine [2,3]. It has a modified Rossmann fold .. +PF04977 Septum formation initiator
DivIC from B. subtilis is necessary for both vegetative and sporulation septum formation . These proteins are mainly composed of an amino terminal coiled-coil.. +PF05103 DivIVA protein
The Bacillus subtilis divIVA1 mutation causes misplacement of the septum during cell division, resulting in the formation of small, circular, anucleate mini-cells . Inactivation of divIVA produces a mini-cell phenotype, whereas overproduction of DivIVA results in a filamentation phenotype . These proteins appear to contain coiled-coils.. +PF00778 DAX;
Alignment kindly provided by SMART. The DIX domain is present in Dishevelled and axin . This domain is involved in homo- and hetero-oligomerisation. It is involved in the homo- oligomerisation of mouse axin Swiss:O35625 . The axin DIX domain also interacts with the dishevelled DIX domain . The DIX domain has also been called the DAX domain.. +PF01738 Dienelactone hydrolase family
Pfam-B_757 (release 4.2). +PF04914 DltD C-terminal region
Pfam-B_6216 (release 7.6). DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis . This family consists of the C-terminal region of DltD.. +PF04918 DltD_central;
Pfam-B_6216 (release 7.6). DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis . This family consists of the central region of DltD.. +PF04915 DltD N-terminal region
Pfam-B_6216 (release 7.6). DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis . This family consists of the N-terminal region of DltD.. +PF03474 DMRTA motif
This region is found to the C-terminus of the Pfam:PF00751 . DM-domain proteins with this motif are known as DMRTA proteins. The function of this region is unknown.. +PF00885 6,7-dimethyl-8-ribityllumazine synthase
Pfam-B_1503 (release 3.0). This family includes the beta chain of 6,7-dimethyl-8- ribityllumazine synthase EC:2.5.1.9, an enzyme involved in riboflavin biosynthesis. The family also includes a subfamily of distant archaebacterial proteins that may also have the same function for example Swiss:O28856.. +PF04976 DMSO reductase anchor subunit (DmsC)
The terminal electron transfer enzyme Me2SO reductase of Escherichia coli is a heterotrimeric enzyme composed of a membrane extrinsic catalytic dimer (DmsAB) and a membrane intrinsic polytopic anchor subunit (DmsC) .. +PF03989 DNA gyrase C-terminal domain, beta-propeller
This repeat is found as 6 tandem copies at the C-termini of GyrA and ParC DNA gyrases. It is predicted to form 4 beta strands and to probably form a beta-propeller structure . This region has been shown to bind DNA non-specifically and may stabilise the DNA-topoisomerase complex .. +PF00204 DNA_topoisoII;
This family represents the second domain of DNA gyrase B which has a ribosomal S5 domain 2-like fold. This family is structurally related to PF01119.. +PF00986 DNA gyrase B subunit, carboxyl terminus
Pfam-B_332 (release 3.0). The amino terminus of eukaryotic and prokaryotic DNA topoisomerase II are similar, but they have a different carboxyl terminus. The amino-terminal portion of the DNA gyrase B protein is thought to catalyse the ATP-dependent super-coiling of DNA. See Pfam:PF00204. The carboxyl-terminal end supports the complexation with the DNA gyrase A protein and the ATP-independent relaxation. This family also contains Topoisomerase IV. This is a bacterial enzyme that is closely related to DNA gyrase, .. +PF03603 DNA polymerase III psi subunit
TIGRFAMs, Griffiths-Jones SR. +PF01653 DNA_ligase_N;
NAD-dependent DNA ligase adenylation domain. Pfam-B_1334 (release 4.1). DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor . This domain is the catalytic adenylation domain. The NAD+ group is covalently attached to this domain at the lysine in the KXDG motif of this domain. This enzyme- adenylate intermediate is an important feature of the proposed catalytic mechanism .. +PF03120 NAD-dependent DNA ligase OB-fold domain
Pfam-B_1334 (release 4.1). DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor . This family is a small domain found after the adenylation domain Pfam:PF01653 in NAD dependent ligases . OB-fold domains generally are involved in nucleic acid binding. . +PF03119 NAD-dependent DNA ligase C4 zinc finger domain
Pfam-B_1334 (release 4.1). DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor . This family is a small zinc binding motif that is presumably DNA binding . IT is found only in NAD dependent DNA ligases .. +PF00145 C-5 cytosine-specific DNA methylase
+PF01119 DNA mismatch repair protein, C-terminal domain
This family represents the C-terminal domain of the mutL/hexB/PMS1 family. This domain has a ribosomal S5 domain 2-like fold.. +PF02499 Probable DNA packing protein, C-terminus
Pfam-B_1283 (release 5.4). This family includes proteins that are probably involved in DNA packing in herpesvirus. This domain is found at the C-terminus of the protein.. +PF02500 Probable DNA packing protein, N-terminus
Pfam-B_1179 (release 5.4). This family includes proteins that are probably involved in DNA packing in herpesvirus. This domain is normally found at the N-terminus of the protein.. +PF00712 DNA polymerase III beta subunit, N-terminal domain
Pfam-B_631 (release 2.1). A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold.. +PF02767 DNA polymerase III beta subunit, central domain
Pfam-B_631 (release 2.1). A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold.. +PF02768 DNA polymerase III beta subunit, C-terminal domain
Pfam-B_631 (release 2.1). A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold.. +PF04364 DNA polymerase III chi subunit, HolC
The DNA polymerase III holoenzyme (EC:2.7.7.7) is the polymerase responsible for the replication of the Escherichia coli chromosome. The holoenzyme is composed of the DNA polymerase III core, the sliding clamp, and the DnaX clamp loading complex. The DnaX complex contains either either the tau or gamma product of gene dnax, complexed to delta.delta' and to chi psi. Chi forms a 1:1 heterodimer with psi.\. The chi psi complex functions by increasing the affinity of tau and gamma for delta.delta' allowing a functional clamp-loading complex to form at physiological subunit concentrations. Psi is responsible for the interaction with DnaX (gamma/tau), but psi is insoluble unless it is in a complex with chi .. +PF00476 DNA polymerase family A
+PF03175 DNA polymerase type B, organellar and viral
Pfam-B_236 (release 6.5). Like Pfam:PF00136, members of this family are also DNA polymerase type B proteins. Those included here are found in plant and fungal mitochondria, and in viruses.. +PF04042 DNA polymerase alpha/epsilon subunit B
Pfam-B_12632 (release 7.3) and Pfam-B_5821 (release 7.3). This family contains a number of DNA polymerase subunits.\. The B subunit of the DNA polymerase alpha plays an essential role at the initial stage of DNA replication in S. cerevisiae and is phosphorylated in a cell cycle-dependent manner. DNA polymerase epsilon is essential for cell viability and chromosomal DNA replication in budding yeast. In addition, DNA polymerase epsilon may be involved in DNA repair and cell-cycle checkpoint control. The enzyme consists of at least four subunits in mammalian cells as well as in yeast. The largest subunit of DNA polymerase epsilon is responsible for polymerase epsilon is responsible for polymerase activity. In mouse, the DNA polymerase epsilon subunit B is the second largest subunit of the DNA polymerase. A part of the N-terminal was found to be responsible for the interaction with SAP18. Experimental evidence suggests that this subunit may recruit histone deacetylase to the replication fork to modify the chromatin structure .. +PF04931 DNA_pol_V;
Pfam-B_10566 (release 7.6). This family includes the fifth essential DNA polymerase in yeast EC:2.7.7.7. Pol5p is localised exclusively to the nucleolus and binds near or at the enhancer region of rRNA-encoding DNA repeating units.. +PF00336 DNA polymerase (viral) C-terminal domain
Pfam-B_107 (release 1.0). +PF00242 DNA polymerase (viral) N-terminal domain
Pfam-B_107 (release 1.0). +PF04104 Eukaryotic and archaeal DNA primase, large subunit
DNA primase is the polymerase that synthesises small RNA primers for the Okazaki fragments made during discontinuous DNA replication. DNA primase is a heterodimer of two subunits, the small subunit Pri1 (48 kDa in yeast), and the large subunit Pri2 (58 kDa in the yeast S. cerevisiae) . The large subunit of DNA primase forms interactions with the small subunit and the structure implicates that it is not directly involved in catalysis, but plays roles in correctly positioning the primase/DNA complex, and in the transfer of RNA to DNA polymerase .. +PF01896 Eukaryotic and archaeal DNA primase small subunit
DNA primase synthesises the RNA primers for the Okazaki fragments in lagging strand DNA synthesis. DNA primase is a heterodimer of large and small subunits. This family also includes baculovirus late expression factor 1 or LEF-1 proteins. Baculovirus LEF-1 is a DNA primase enzyme . Bacterial DNA primase adopts a different fold to archaeal and eukaryotic primases.. +PF03604 DNA directed RNA polymerase, 7 kDa subunit
+PF00521 DNA gyrase/topoisomerase IV, subunit A
Pfam-B_55 (release 1.0). +PF01556 DnaJ_C;
DnaJ C terminal domain. Pfam-B_342 (release 4.0). This family consists of the C terminal region form the DnaJ protein. It is always found associated with Pfam:PF00226 and Pfam:PF00684. DnaJ is a chaperone associated with the Hsp70 heat-shock system involved in protein folding and renaturation after stress. The two C-terminal domains CTDI and this, CTDII, are necessary for maintaining the J-domains in their specific relative positions .. +PF00684 DnaJ central domain
Pfam-B_89 (release 2.1). The central cysteine-rich (CR) domain of DnaJ proteins contains four repeats of the motif CXXCXGXG where X is any amino acid. The isolated cysteine rich domain folds in zinc dependent fashion. Each set of two repeats binds one unit of zinc. Although this domain has been implicated in substrate binding, no evidence of specific interaction between the isolated DNAJ cysteine rich domain and various hydrophobic peptides has been found .. +PF03265 Deoxyribonuclease II
Pfam-B_4508 (release 6.5). +PF01712 Deoxynucleoside kinase
Pfam-B_1744 (release 4.1). This family consists of various deoxynucleoside kinases cytidine EC:2.7.1.74, guanosine EC:2.7.1.113, adenosine EC:2.7.1.76 and thymidine kinase EC:2.7.1.21 (which also phosphorylates deoxyuridine and deoxycytosine.) These enzymes catalyse the production of deoxynucleotide 5'-monophosphate from a deoxynucleoside. Using ATP and yielding ADP in the process.. +PF00404 celCC;
Dockerin type I repeat. The dockerin repeat is the binding partner of the cohesin domain Pfam:PF00963. The cohesin-dockerin interaction is the crucial interaction for complex formation in the cellulosome . The dockerin repeats, each bearing homology to the EF-hand calcium-binding loop bind calcium .. +PF04118 Dopey, N-terminal
Pfam-B_17466 (release 7.3);. DopA is the founding member of the Dopey family and is required for correct cell morphology and spatiotemporal organisation of multicellular structures in the filamentous fungus Aspergillus nidulans. DopA homologues are found in mammals. S. cerevisiae DOP1 is essential for viability and, affects cellular morphogenesis .. +PF04556 DpmII;
DpnII restriction endonuclease. Members of this family are type II restriction enzymes (EC:3.1.21.4). They recognise the double-stranded unmethylated sequence GATC and cleave before G-1 . http://rebase.neb.com/rebase/enz/DpnII.html. +PF04244 DUF426;
Deoxyribodipyrimidine photo-lyase-related protein. This family appears to be related to Pfam:PF00875.. +PF05219 DREV methyltransferase
Pfam-B_6662 (release 7.7). +PF02635 DsrE/DsrF-like family
DsrE is a small soluble protein involved in intracellular sulfur reduction . This family also includes DsrF.. +PF01916 Deoxyhypusine synthase
Eukaryotic initiation factor 5A (eIF-5A) contains an unusual amino acid, hypusine [N epsilon-(4-aminobutyl-2-hydroxy)lysine]. The first step in the post-translational formation of hypusine is catalysed by the enzyme deoxyhypusine synthase (DS) EC:1.1.1.249. The modified version of eIF-5A, and DS, are required for eukaryotic cell proliferation .. +PF01323 DSBA-like thioredoxin domain
This family contains a diverse set of proteins with a thioredoxin-like structure Pfam:PF00085. This family also includes 2-hydroxychromene-2-carboxylate (HCCA) isomerase enzymes catalyse one step in prokaryotic polyaromatic hydrocarbon (PAH) catabolic pathways [2,3,4]. This family also contains members with functions other than HCCA isomerisation, such as Kappa family GSTs (e.g. Swiss:P24473), whose similarity to HCCA isomerases was not previously recognised. The sequence Swiss:O07298 has been annotated as a dioxygenase but is almost certainly an HCCA isomerase enzyme. Similarly, the sequence Swiss:Q9ZI67 has been annotated as a dehydrogenase, but is most probably also an HCCA isomerase enzyme. In addition, the Rhizobium leguminosarum Swiss:Q52782 protein has been annotated as a putative glycerol-3-phosphate transfer protein, but is also most likely to be an HCCA isomerase enzyme (see ).. +PF02600 Disulfide bond formation protein DsbB
This family consists of disulfide bond formation protein DsbB from bacteria.\. The DsbB protein oxidises the periplasmic protein DsbA which in turn oxidises cysteines in other periplasmic proteins in order to make disulfide bonds . DsbB acts as a redox potential transducer across the cytoplasmic membrane and is an integral membrane protein . DsbB posses six cysteines four of which are necessary for it proper function in vivo .. +PF02683 Cytochrome C biogenesis protein transmembrane region
This family consists of the transmembrane (i.e. non-catalytic) region of Cytochrome C biogenesis proteins also known as disulphide interchange proteins. These proteins posses a protein disulphide isomerase like domain that is not found within the aligned region of this family.. +PF01984 DUF122;
Double-stranded DNA-binding domain. This domain is believed to bind double-stranded DNA of 20 bases length.. +PF04077 DsrH like protein
DsrH is involved in oxidation of intracellular sulphur in the phototrophic sulphur bacterium Chromatium vinosum D .. +PF05160 DSS1/SEM1 family
Pfam-B_22209 (release 7.7). This family contains the breast cancer tumour suppressor BRCA2-interacting protein DSS1 and its homologue SEM1, both of which are short acidic proteins. DSS1 has been shown to be a conserved component of the Rae1 mediated mRNA export pathway in Schizosaccharomyces pombe .. +PF00908 dTDP-4-dehydrorhamnose 3,5-epimerase
Pfam-B_540 (release 3.0). This family catalyse the isomerisation of dTDP-4-dehydro-6-deoxy -D-glucose with dTDP-4-dehydro-6-deoxy-L-mannose. The EC number of this enzyme is 5.1.3.13.. +PF03942 DTW domain
This presumed domain is found in bacterial and eukaryotic proteins. Its function is unknown. The domain contains multiple conserved motifs including a DTXW motif that this domain has been named after.. +PF01950 DUF100;
Fructose-1,6-bisphosphatase. This is a family of bacterial and archaeal fructose-1,6-bisphosphatases (FBPases). FBPase catalyses the hydrolysis of D-fructose-1,6-bisphosphate (FBP) to D-fructose-6-phosphate (F6P) and orthophosphate and is an essential regulatory enzyme in the glyconeogenic pathway.. +PF01954 Protein of unknown function DUF104
This family includes short archaebacterial proteins of unknown function.\. Archaeoglobus fulgidus has twelve copies of this protein, with several being clustered together in the genome.. +PF01955 DUF105;
Adenosylcobinamide amidohydrolase. This prokaryotic protein family includes CbiZ which converts adenosylcobinamide (AdoCbi) to adenosylcobyric acid (AdoCby), an intermediate of the de novo coenzyme B12 biosynthetic route .. +PF01956 DUF106; Methyltrn_RNA_3;
Integral membrane protein DUF106. This archaebacterial protein family has no known function. Members are predicted to be integral membrane proteins.. +PF01957 DUF107;
NfeD-like C-terminal, partner-binding. NfeD-like proteins are widely distributed throughout prokaryotes and are frequently associated with genes encoding stomatin-like proteins (slipins). There appear to be three major groups: an ancestral group with only an N-terminal serine protease domain and this C-terminal beta sheet-rich domain which is structurally very similar to the OB-fold domain, associated with its neighbouring slipin cluster; a second major group with an additional middle, membrane-spanning domain, associated in some species with eoslipin and in others with yqfA; a final 'artificial' group which unites truncated forms lacking the protease region and associated with their ancestral gene partner, either yqfA or eoslipin. This NefD, C-terminal, domain appears to be the major one for relating to the associated protein. NfeD homologues are clearly reliant on their conserved gene neighbour which is assumed to be necessary for function, either through direct physical interaction or by functioning in the same pathway, possibly involve with lipid-rafts .. +PF01958 Domain of unknown function DUF108
This family has no known function. It is found to compose the complete protein in archaebacteria and a single domain in a large C. elegans protein Swiss:Q19527.. +PF01959 DUF109;
3-dehydroquinate synthase (EC 4.6.1.3). 3-Dehydroquinate synthase is an enzyme in the common pathway of aromatic amino acid biosynthesis that catalyses the conversion of 3-deoxy-D-arabino-heptulosonic acid 7-phosphate (DAHP) into 3-dehydroquinic acid . This synthesis of aromatic amino acids is an essential metabolic function for most prokaryotic as well as lower eukaryotic cells, including plants. The pathway is absent in humans; therefore, DHQS represents a potential target for the development of novel and selective antimicrobial agents. Owing to the threat posed by the spread of pathogenic bacteria resistant to many currently used antimicrobial drugs, there is clearly a need to develop new anti-infective drugs acting at novel targets. A further potential use for DHQS inhibitors is as herbicides .. +PF01345 Domain of unknown function DUF11
Pfam-B_1553 (release 3.0). A domain of unknown function found in multiple copies in several archaebacterial proteins.. +PF01969 Protein of unknown function DUF111
This prokaryotic family has no known function.. +PF01970 DUF112;
Tripartite tricarboxylate transporter TctA family. This family, formerly known as DUF112, is a family of bacterial and archaeal tripartite tricarboxylate transporters of the extracytoplasmic solute binding receptor-dependent transporter group of families, distinct from the ABC and TRAP-T families . TctA is part of the tripartite TctABC system which, as characterised in S. typhimurium , is a secondary carrier that depends for activity on the extracytoplasmic tricarboxylate-binding receptor TctC as well as two integral membrane proteins, TctA and TctB. complete three-component systems are found only in bacteria. TctA is a large transmembrane protein with up to 12 predicted membrane spanning regions in bacteria and up to 11 such in archaea, with the N-terminal within the cytoplasm. TctA is thought to be a permease, and in most other bacteria functions without TctB and TctC molecules .. +PF01972 DUF114;
Serine dehydrogenase proteinase. This family of archaebacterial proteins, formerly known as DUF114, has been found to be a serine dehydrogenase proteinase distantly related to ClpP proteinases that belong to the serine proteinase superfamily. The family has a catalytic triad of Ser, Asp, His residues, which shows an altered residue ordering compared with the ClpP proteinases but similar to that of the carboxypeptidase clan .. +PF01973 DUF115;
Protein of unknown function DUF115. This family of archaebacterial proteins has no known function.. +PF01976 Protein of unknown function DUF116
This archaebacterial protein has no known function. The protein contains seven conserved cysteines and may also be an integral membrane protein.. +PF01978 DUF118;
Sugar-specific transcriptional regulator TrmB. One member of this family, TrmB, has been shown to be a sugar-specific transcriptional regulator of the trehalose/maltose ABC transporter in Thermococcus litoralis.. +PF01982 DUF120;
Domain of unknown function DUF120. This domain is a CTP-dependent riboflavin kinase (RFK), found in archaea, that catalyses the phosphorylation of riboflavin to form flavin mononucleotide in riboflavin biosynthesis EC:2.7.1.26. Its structure resembles a RIFT barrel, structurally similar to but topologically distinct from bacterial and eukaryotic examples. The N-terminal is a winged helix-turn-helix DNA-binding domain, and the C-terminal half is most similar in sequence to a group of cradle-loop barrels. Swiss:O28174 has this domain attached to Pfam:PF00325.. +PF01983 DUF121;
Guanylyl transferase CofC like. Coenzyme F420 is a hydride carrier cofactor that functions during methanogenesis. This family of proteins represents CofC, a nucleotidyl transferase that is involved in coenzyme F420 biosynthesis. CofC has been shown to catalyse the formation of lactyl-2-diphospho-5'-guanosine from 2-phospho-L-lactate and GTP .. +PF01986 Domain of unknown function DUF123
+PF01987 DUF124;
Mitochondrial biogenesis AIM24. In eukaryotes, this domain is involved in mitochondrial biogenesis . Its function in prokaryotes in unknown.. +PF01988 DUF125;
This family includes the vacuolar Fe2+/Mn2+ uptake transporter Swiss:P47818, Ccc1 and the vacuolar iron transporter VIT1 Swiss:Q9ZUA5.. +PF01989 Protein of unknown function DUF126
This archaebacterial protein family has no known function.. +PF01994 DUF127;
tRNA ribose 2'-O-methyltransferase, aTrm56. This family is an aTrm56 that catalyses the 2'-O-methylation of the cytidine residue in archaeal tRNA, using S-adenosyl-L-methionine. Biochemical assays showed that aTrm56 forms a dimer and prefers the L-shaped tRNA to the lambda form as its substrate . aTrm56 consists of the SPOUT domain, which contains the characteristic deep trefoil knot for AdoMet binding, and a unique C-terminal beta-hairpin .. +PF01995 Domain of unknown function DUF128
This archaebacterial protein family has no known function. The domain is found duplicated in Swiss:O27611. Many of these are attached to an N-terminal winged helix domain suggesting these are transcriptional regulators and that this domain has a ligand binding function.. +PF01996 DUF129;
F420-0:Gamma-glutamyl ligase. F420-0:Gamma-glutamyl ligase (EC:6.3.2.-) is an enzyme involved in F420 biosynthesis pathway. It catalyses the GTP-dependent successive addition of multiple gamma-linked L-glutamates to the L-lactyl phosphodiester of 7,8-didemethyl-8-hydroxy-5-deazariboflavin (F420-0). This reaction produces polyglutamated F420 derivatives. GTP + F420-0 + n L-glutamate -> GDP + phosphate + F420-n. +PF02343 DUF130; R03H10.4;
TRA-1 regulated protein R03H10.4. Pfam-B_814 (release 5.2). This family of proteins represents the protein product of the gene R03H10.4 which is located near a sequence that matches the TRA-1 binding consensus. TRA-1 is a transcription factor which controls sexual differentiation in C.elegans. R03H10.4 shows male-enriched reporter gene expression and acts as a direct target of TRA-1 regulation .. +PF01998 Protein of unknown function DUF131
This archaebacterial protein family has no known function. The proteins are predicted to contain two transmembrane helices.. +PF02001 Protein of unknown function DUF134
This family of archaeal proteins has no known function.. +PF02006 Protein of unknown function DUF137
This family of archaeal proteins has no known function.. +PF02363 DUF139;
Cysteine rich repeat. Pfam-B_602 (release 5.2). This Cysteine repeat C-X3-C-X3-C is repeated in sequences of this family, 34 times in Swiss:O17970. The function of these repeats is unknown as is the function of the proteins in which they occur. Most of the sequences in this family are from C. elegans.. +PF02405 DUF140;
Pfam-B_1126 (release 5.2). This domain functions as a permease. In Swiss:Q7DD59 it is involved in L-glutamate import into the cell . In Swiss:Q8L4R0 it is involved in lipid transfer within the cell .. +PF02408 DUF141;
Pfam-B_1716 (release 5.4). This is a family of hypothetical C. elegans proteins. The aligned region has no known function nor do any of the proteins which possess it. However, this domain is related to the CUB domain.. +PF02410 DUF143;
Oligomerisation domain. Pfam-B_1798 (release 5.4). In yeasts, this domain is required for the oligomerisation of ATP synthase subunit 9 into a ring structure .. +PF02413 DUF144;
Caudovirales tail fibre assembly protein. Pfam-B_1800 (release 5.4). This family contains bacterial and phage tail fibre assembly proteins . E.coli contains several members of this family although the function of these proteins is uncertain.. +PF02415 DUF145; Chlamydia_PMP;
Chlamydia polymorphic membrane protein (Chlamydia_PMP) repeat. This family contains several Chlamydia polymorphic membrane proteins. Chlamydia pneumoniae is an obligate intracellular bacterium and a common human pathogen causing infection of the upper and lower respiratory tract. Common for the Pmps are the tetrapeptide GGA(I/V/L) motif repeated several times in the N-terminal part. The C-terminal half is characterised by conserved tryptophans and a carboxy-terminal phenylalanine. A signal peptide leader sequence is predicted in 20 C. pneumoniae Pmps, which indicates an outer membrane localisation. Pmp10 and Pmp11 contain a signal peptidase II cleavage site suggesting lipid modification. The C. pneumoniae pmp genes represent 17.5% of the chlamydia-specific coding capacity and they are all transcribed during chlamydial growth but the function of Pmps remains unknown . This family shows some similarity to Pfam:PF05594 and hence is likely to also form a beta-helical structure (personal obs:C Yeats).. +PF02457 DUF147;
DisA bacterial checkpoint controller nucleotide-binding. Pfam-B_1846 (release 5.4). The DisA protein is a bacterial checkpoint protein that dimerises into an octameric complex. The protein consists of three distinct domains. This domain is the first and is a globular, nucleotide-binding region; the next 146-289 residues constitute the DisA-linker family, Pfam:PF10635, that consists of an elongated bundle of three alpha helices (alpha-6, alpha-10, and alpha-11), one side of which carries an additional three helices (alpha7-9), which thus forms a spine like-linker between domains 1 and 3. The C-terminal residues, of domain 3, are represented by family HHH, Pfam:PF00633, the specific DNA-binding domain. The octameric complex thus has structurally linked nucleotide-binding and DNA-binding HhH domains and the nucleotide-binding domains are bound to a cyclic di-adenosine phosphate such that DisA is a specific di-adenylate cyclase. The di-adenylate cyclase activity is strongly suppressed by binding to branched DNA, but not to duplex or single-stranded DNA, suggesting a role for DisA as a monitor of the presence of stalled replication forks or recombination intermediates via DNA structure-modulated c-di-AMP synthesis .. +PF02520 Domain of unknown function DUF148
Pfam-B_1103 (release 5.4). This domain has no known function nor do any of the proteins that possess it. In one member of this family Swiss:Q23614 the aligned region is repeated twice.. +PF02576 Uncharacterised BCR, YhbC family COG0779
+PF02577 DUF151;
Bifunctional nuclease. This family is a bifunctional nuclease, with both DNase and RNase activity . It forms a wedge-shaped dimer, with each monomer being triangular in shape. A large groove at the thick end of the wedge contains a possible active site .. +PF02578 DUF152; Cu_oxidase_4;
Multi-copper polyphenol oxidoreductase laccase. Laccases are multi-copper oxidoreductases able to oxidise a wide variety of phenolic and non-phenolic compounds and are widely distributed among both prokaryotes and eukaryotes. There are two main active catalytic sites with conserved histidines that are capable of binding four copper atoms .. +PF02579 DUF153;
Dinitrogenase iron-molybdenum cofactor. This family contains several NIF (B, Y and X) proteins which are iron-molybdenum cofactors (FeMo-co) in the dinitrogenase enzyme which catalyses the reduction of dinitrogen to ammonium. Dinitrogenase is a hetero-tetrameric (alpha(2)beta(2)) enzyme which contains the iron-molybdenum cofactor (FeMo-co) at its active site .. +PF02582 Uncharacterised ACR, YagE family COG1723
+PF02585 DUF158;
GlcNAc-PI de-N-acetylase. Members of this family are related to PIG-L an N-acetylglucosaminylphosphatidylinositol de-N-acetylase (EC:3.5.1.89) that catalyses the second step in GPI biosynthesis .. +PF02586 Uncharacterised ACR, COG2135
+PF01519 Protein of unknown function DUF16
Pfam-B_764 (release 4.0). The function of this protein is unknown. It appears to only occur in Mycoplasma pneumoniae. The crystal structure revealed that this domain is composed of two separated homotrimeric coiled-coils .. +PF02589 Uncharacterised ACR, YkgG family COG1556
+PF02590 DUF163;
Predicted SPOUT methyltransferase. This family of proteins are predicted to be SPOUT methyltransferases .. +PF02591 Putative zinc ribbon domain
Structural modelling suggests this domain may bind nucleic acids .. +PF02592 Uncharacterized ACR, YhhQ family COG1738
+PF02593 DUF166;
Thymidylate synthase. This family catalyses the synthesis of thymidine monophosphate (dTMP) from deoxyuridine monophosphate (dUMP). The physiological co-substrate has not yet been identified .. +PF02594 Uncharacterised ACR, YggU family COG1872
+PF02596 Uncharacterised ArCR, COG2043
+PF02598 DUF171;
Putative RNA methyltransferase. This family has a TIM barrel-like fold with a deep C-terminal trefoil knot. The arrangement of its hydrophilic and hydrophobic surfaces are opposite to that of the classic TIM barrel proteins. It is likely to bind RNA , and may function as a methyltransferase [2,3].. +PF02604 DUF172; PhdYeFM;
Antitoxin Phd_YefM, type II toxin-antitoxin system. Members of this family act as antitoxins in type II toxin-antitoxin systems . When bound to their toxin partners, they can bind DNA via the N-terminus and repress the expression of operons containing genes encoding the toxin and the antitoxin . This domain complexes with Txe toxins containing Pfam:PF06769, Fic/DOC toxins containing Pfam:PF02661 and YafO toxins containing Pfam:PF13957.. +PF02616 DUF173;
ScpA and ScpB participate in chromosomal partition during cell division. It may act via the formation of a condensin-like complex containing smc that pull DNA away from mid-cell into both cell halves. These proteins are part of the Kleisin superfamily.. +PF02617 DUF174;
ATP-dependent Clp protease adaptor protein ClpS. In the bacterial cytosol, ATP-dependent protein degradation is performed by several different chaperone-protease pairs, including ClpAP. ClpS directly influences the ClpAP machine by binding to the N-terminal domain of the chaperone ClpA. The degradation of ClpAP substrates, both SsrA-tagged proteins and ClpA itself, is specifically inhibited by ClpS. ClpS modifies ClpA substrate specificity, potentially redirecting degradation by ClpAP toward aggregated proteins .. +PF02618 DUF175; ADC_lyase;
This family of proteins is found in bacteria. Proteins in this family are typically between 332 and 389 amino acids in length. This family was previously incorrectly annotated and names as aminodeoxychorismate lyase. The structure of Swiss:P28306 was solved by X-ray crystallography.. +PF02620 Uncharacterized ACR, COG1399
+PF02621 DUF178;
Menaquinone biosynthesis. This family includes two enzymes which are involved in menaquinone biosynthesis. One which catalyses the conversion of cyclic de-hypoxanthine futalosine to 1,4-dihydroxy-6-naphthoate, and one which may be involved in the conversion of chorismate to futalosine . These enzymes comprise two domains with alpha/beta structures, a large domain and a small domain. A pocket between the two domains may form the active site, a conserved histidine located within this pocket could be the catalytic base .. +PF02622 Uncharacterized ACR, COG1678
+PF02623 DUF180;
The protein BSU35380 from Bacillus subtilis (renamed FliW) was characterised as being a flagellar assembly factor. Experimental characterisation was also carried out in Treponema pallidum (TP0658). In Campylobacter jejuni, Cj1075 has been shown to be involved in motility and flagellin biosynthesis. The two paralogues in Helicobacter pylori (HP1154 and HP1377) were found to be able to bind to flagellin. FliW proteins are involved in flagellar assembly . FliW is part of a three-part feedback loop: in Bacillus subtilis FliW inhibits CsrA (an RNA-binding protein) which inhibits FliC translation; hence FliW is required for FliC (flagellin) production .. +PF02624 DUF181;
+PF02636 DUF185;
Putative S-adenosyl-L-methionine-dependent methyltransferase. This family is a putative S-adenosyl-L-methionine (SAM)-dependent methyltransferase [1,2]. In eukaryotes it plays a role in mitochondrial complex I activity .. +PF02638 Glycosyl hydrolase like GH101
+PF01579 Domain of unknown function (DUF19)
Pfam-B_402 (release 4.1). This presumed domain has no known function. It is found in one or two copies in several Caenorhabditis elegans proteins. It is roughly 130 amino acids long. The domain contains 12 conserved cysteines which suggests that the domain is an extracellular domain and that these cysteines form six intradomain disulphide bridges. The GO annotation for this protein indicates that it has a function in nematode larval development and has a positive regulation of growth rate.. +PF02643 Uncharacterized ACR, COG1430
Two structures have been solved for members of this large (>500 members) family of bacterial proteins present mostly in environmental bacteria and metagenomes (distant homologues are also present in several Plasmodium species). TOPSAN analysis for pdb:3pjy shows that there is much similarity with the other solved structure, pdb:3m7a, solved for UniProt:Q2GA55 (Saro_0823), a homologue of Thermotoga maritima TM1668, UniProt:Q9X1Z6., The homologue in Caulobacter crescentus (CC1388), UniProt:Q9A8G6, is associated with CspD, a cold shock protein (CC1387), UniProt:Q9A8G7. However, the genomic context of UniProt:Q2GA55 is most conserved with a putative xylose isomerase, suggesting a possible role in extracellular sugar processing. Saro_0821, UniProt:Q2GA57, is annotated as an AMP-dependent synthetase and ligase. PDB:3m7a structure corresponds to the C-terminal (27-165) fragment of the YP_496102 (Saro_0823) protein and it is structurally unique, as the best hits from Dali have a Z-score of 3.8 (1nt0, 2j1t, 3kq4) and it is thus a likely candidate for a new fold. Interestingly, many of the top Dali hits are involved in sugar metabolism. There are no obvious active site-like cavities on the protein surface of 3m7a (http://www.topsan.org/Proteins/JCSG/).. +PF02645 DUF194;
Uncharacterised protein, DegV family COG1307. The structure of this protein revealed a bound fatty-acid molecule in a pocket between the two protein domains. The structure indicates that this family has the molecular function of fatty-acid binding and may play a role in the cellular functions of fatty acid transport or metabolism .. +PF02646 DUF195;
This family contains several bacterial RmuC DNA recombination proteins. The function of the RMUC protein is unknown but it is suspected that it is either a structural protein that protects DNA against nuclease action, or is itself involved in DNA cleavage at the regions of DNA secondary structures . +PF02649 DUF198;
Type I GTP cyclohydrolase folE2. This is a family of prokaryotic proteins with type I GTP cyclohydrolase activity. GTP cyclohydrolase I is the first enzyme of the de novo tetrahydrofolate biosynthetic pathway present in bacteria, fungi, and plants, and encoded in Escherichia coli by the folE gene; it is also the first enzyme of the biopterin (BH4) pathway in Homo sapiens . The invariate, highly conserved glutamate residue at position 216 in Swiss:Q5F9K6 is likely to be the substrate ligand and the metal ligand is likely to be the cysteine at position 147. The enzyme is Zinc 2+ dependent .. +PF02650 DUF199;
WhiA C-terminal HTH domain. This domain is found at the C-terminus of the sporulation regulator WhiA. It is predicted to form a DNA-binding helix-turn-helix structure . The WhiA protein also contains two N-terminal domains that are distant homologues of LAGLIDADG homing endonucleases .. +PF02655 DUF201;
+PF02656 Domain of unknown function (DUF202)
This family consists of hypothetical proteins some of which are putative membrane proteins. No functional information or experimental verification of function is known. This domain is around 100 amino acids long.. +PF02659 Domain of unknown function DUF
This family consists of hypothetical transmembrane proteins non of which have any known function, the aligned region is 180 amino acids long.. +PF02660 DUF205;
Glycerol-3-phosphate acyltransferase. This family of enzymes catalyses the transfer of an acyl group from acyl-ACP to glycerol-3-phosphate to form lysophosphatidic acid ].. +PF02675 DUF206; AdoMetDC;
S-adenosylmethionine decarboxylase . This family contains several S-adenosylmethionine decarboxylase proteins from bacterial and archaebacterial species. S-adenosylmethionine decarboxylase (AdoMetDC), a key enzyme in the biosynthesis of spermidine and spermine, is first synthesised as a proenzyme, which is cleaved post translationally to form alpha and beta subunits. The alpha subunit contains a covalently bound pyruvoyl group derived from serine that is essential for activity [1,2].. +PF02676 DUF207;
Methyltransferase TYW3. The methyltransferase TYW3 (tRNA-yW- synthesising protein 3) has been identified in yeast to be involved in wybutosine (yW) biosynthesis . yW is a complexly modified guanosine residue that contains a tricyclic base and is found at the 3' position adjacent the anticodon of phenylalanine tRNA. TYW3 is an N-4 methylase that methylates yW-86 to yield yW-72 in an Ado-Met-dependent manner .. +PF02677 Uncharacterized BCR, COG1636
+PF02678 DUF209;
This family consists of Pirin proteins from both eukaryotes and prokaryotes. The function of Pirin is unknown but the gene coding for this protein is known to be expressed in all tissues in the human body although it is expressed most strongly in the liver and heart. Pirin is known to be a nuclear protein, exclusively localised within the nucleoplasma and predominantly concentrated within dot-like subnuclear structures . A tomato homologue of human Pirin has been found to be induced during programmed cell death . Human Pirin interacts with Bcl-3 and NFI and hence is probably involved in the regulation of DNA transcription and replication. It appears to be an Fe(II)-containing member of the Cupin superfamily.. +PF01595 Domain of unknown function DUF21
Pfam-B_618 (release 4.1). This transmembrane region has no known function. Many of the sequences in this family are annotated as hemolysins, however this is due to a similarity to Swiss:Q54318 that does not contain this domain. This domain is found in the N-terminus of the proteins adjacent to two intracellular CBS domains Pfam:PF00571.. +PF02679 DUF210;
(2R)-phospho-3-sulfolactate synthase (ComA). In methanobacteria (2R)-phospho-3-sulfolactate synthase (ComA) catalyses the first step of the biosynthesis of coenzyme M from phosphoenolpyruvate (P-enolpyruvate). This novel enzyme catalyses the stereospecific Michael addition of sulfite to P-enolpyruvate, forming L-2-phospho-3-sulfolactate (PSL). It is suggested that the ComA-catalysed reaction is analogous to those reactions catalysed by beta-elimination enzymes that proceed through an enolate intermediate .. +PF02680 Uncharacterized ArCR, COG1888
+PF02681 Divergent PAP2 family
This family is related to the Pfam:PF01569 family (personal obs: C Yeats).. +PF02697 Uncharacterized ACR, COG1753
Structural modelling suggests this domain may bind nucleic acids .. +PF02698 DUF218 domain
+PF01629 Domain of unknown function DUF22
Pfam-B_1137 (release 4.1). This domain is found in 1 to 3 copies in archaebacterial proteins. The function of the domain is unknown. This family appears to be expanded in Archaeoglobus fulgidus.. +PF02713 Domain of unknown function DUF220
Pfam-B_1412 (release 5.5). This is family consists of a region in several Arabidopsis thaliana hypothetical proteins none of which have any known function. The aligned region contains two cysteine residues.. +PF02714 Domain of unknown function DUF221
Pfam-B_1596 (release 5.5). This family consists of hypothetical transmembrane proteins none of which have any function, the aligned region is at 538 residues at maximum length.. +PF02720 Domain of unknown function (DUF222)
Pfam-B_1711 (release 5.5). This family is often found associated to the N-terminus of the HNH endonuclease domain Pfam:PF01844. The function of this domain is uncertain. This family has been called the 13E12 repeat family .. +PF02721 Domain of unknown function DUF223
Pfam-B_1714 (release 5.5). +PF02890 Borrelia family of unknown function DUF226
Pfam-B_1255 (release 6.0). This family of proteins are found in Borrelia. The proteins are about 190 amino acids long and have no known function.. +PF02989 Lyme disease proteins of unknown function
Pfam-B_1298 (release 6.4). +PF03003 Poxvirus proteins of unknown function
Pfam-B_1300 (release 6.4). +PF03008 Archaea bacterial proteins of unknown function
Pfam-B_1430 (release 6.4). +PF03057 Protein of unknown function
Pfam-B_488 (release 6.4). This family represents the C-terminal region of a number of C. elegans proteins of unknown function.. +PF03072 MG032/MG096/MG288 family 1
Pfam-B_2298 (release 6.4). This family consists entirely of mycoplasmal proteins. Their function is unknown. Another related family, Pfam:PF03086, also consists entirely of mycoplasmal proteins of the MG032/MG096/MG288 family. Some proteins, such as Swiss:P75072, are included in both families, but of course differ in the aligned residues.. +PF01638 DUF24;
HxlR-like helix-turn-helix. Pfam-B_1509 (release 4.1). HxlR, a member of this family, is a DNA-binding protein that acts as a positive regulator of the formaldehyde-inducible hxlAB operon in Bacillus subtilis.. +PF03086 MG032/MG096/MG288 family 2
Pfam-B_2385 (release 6.4). This family consists entirely of mycoplasmal proteins. Their function is unknown. Another related family, Pfam:PF03072, also consists entirely of mycoplasmal proteins of the MG032/MG096/MG288 family. Some proteins, such as Swiss:P75072, are included in both families, but of course differ in the aligned residues.. +PF03112 Uncharacterized protein family (ORF7) DUF
Pfam-B_2667 (release 6.5). Several members of this family are Borrelia burgdorferi plasmid proteins of uncharacterized function.. +PF03136 DUF245; Proteosome_20S;
Pfam-B_3042 (release 6.5). Pupylation is a novel protein modification system found in some bacteria . This family of proteins are the enzyme that can conjugate proteins of the Pup family to lysine residues in target proteins marking them for degradation. The archetypal protein in this family is PafA (proteasome accessory factor) from Mycobacterium tuberculosis . It has been suggested that these proteins are related to gamma-glutamyl-cysteine synthetases .. +PF03158 Multigene family 530 protein
Pfam-B_2304 (release 6.5). Members of this family are multigene family 530 proteins from African swine fever viruses. These proteins may be involved in promoting survival of infected macrophages .. +PF03151 DUF250;
Triose-phosphate Transporter family. Pfam-B_3234 (release 6.5). This family includes transporters with a specificity for triose phosphate .. +PF03159 DUF251;
XRN 5'-3' exonuclease N-terminus. Pfam-B_2349 (release 6.5). This family aligns residues towards the N-terminus of several proteins with multiple functions. The members of this family all appear to possess 5'-3' exonuclease activity EC:3.1.11.-. Thus, the aligned region may be necessary for 5' to 3' exonuclease function. The family also contains several Xrn1 and Xrn2 proteins. The 5'-3' exoribonucleases Xrn1p and Xrn2p/Rat1p function in the degradation and processing of several classes of RNA in Saccharomyces cerevisiae. Xrn1p is the main enzyme catalysing cytoplasmic mRNA degradation in multiple decay pathways, whereas Xrn2p/Rat1p functions in the processing of rRNAs and small nucleolar RNAs (snoRNAs) in the nucleus .. +PF03162 DUF252;
Tyrosine phosphatase family. Pfam-B_3756 (release 6.5). This family is closely related to the Pfam:PF00102 and Pfam:PF00782 families.. +PF03190 DUF255;
Protein of unknown function, DUF255. Pfam-B_2331 (release 6.5). +PF03192 Pyrococcus protein of unknown function, DUF257
Pfam-B_2788 (release 6.5). +PF03193 Protein of unknown function, DUF258
Pfam-B_2832 (release 6.5). +PF03196 Protein of unknown function, DUF261
Pfam-B_2687 (release 6.5). +PF03235 Protein of unknown function DUF262
Pfam-B_3462 (release 6.5). +PF03237 DUF264;
Terminase-like family. Pfam-B_3575 (release 6.5). This family represents a group of terminase proteins.. +PF03266 DUF265;
Pfam-B_4081 (release 6.5). This domain is found across all species from bacteria to human, and the function was determined first in a hyperthermophilic bacterium to be an NTPase . The structure of one member-sequence represents a variation of the RecA fold, and implies that the function might be that of a DNA/RNA modifying enzyme . The sequence carries both a Walker A and Walker B motif which together are characteristic of ATPases or GTPases. The protein exhibits an increased expression profile in human liver cholangiocarcinoma when compared to normal tissue .. +PF03270 Protein of unknown function, DUF269
Pfam-B_4172 (release 6.5). Members of this family may be involved in nitrogen fixation, since they are found within nitrogen fixation operons.. +PF03189 DUF270;
Pfam-B_2323 (release 6.5). +PF03407 DUF271;
Nucleotide-diphospho-sugar transferase. Pfam-B_4460 (release 6.6). Proteins in this family have been been predicted to be nucleotide-diphospho-sugar transferases.. +PF03314 Protein of unknown function, DUF273
Pfam-B_3636 (release 6.5). +PF03434 DUF276
Pfam-B_4450 (release 6.6). This family is specific to Borrelia burgdorferi. The protein is encoded on extra-chromosomal DNA.\. This domain has no known function.. +PF03353 DUF278; Lin-8_Ec; Lin-8_Ce;
Ras-mediated vulval-induction antagonist. Pfam-B_3924 (release 6.5). LIN-8 is a nuclear protein, present at the sites of transcriptional repressor complexes, which interacts with LIN-35 Rb.Lin35 Rb is a product of the class B synMuv gene lin-35 which silences genes required for vulval specification through chromatin modification and remodelling . The biological role of the interaction has not yet been determined however predictions have been made. The interaction shows that class A synMuv genes control vulval induction through the transcriptional regulation of gene expression. LIN-8 normally functions as part of a protein complex however when the complex is absent, other family members can partially replace LIN-8 activity .. +PF01709 DUF28;
Transcriptional regulator. Pfam-B_1741 (release 4.1). This is a family of transcriptional regulators. In mammals, it activates the transcription of mitochondrially-encoded COX1 . In bacteria, it negatively regulates the quorum-sensing response regulator by binding to its promoter region .. +PF03436 Domain of unknown function (DUF281)
Pfam-B_4313 (release 6.6). This family of worm domain has no known function. The boundaries of the presumed domain are rather uncertain.. +PF03383 DUF286;
Caenorhabditis serpentine receptor-like protein, class xa. Pfam-B_2888 (release 6.6). This family contains various Caenorhabditis proteins, some of which are annotated as being serpentine receptors, mainly of the xa class.. +PF03384 Drosophila protein of unknown function, DUF287
Pfam-B_2926 (release 6.6). +PF03385 Protein of unknown function, DUF288
Pfam-B_3134 (release 6.6). +PF01062 Worm_family_8;DUF289;
Bestrophin, RFP-TM, chloride channel. Bestrophin is a 68-kDa basolateral plasma membrane protein expressed in retinal pigment epithelial cells (RPE). It is encoded by the VMD2 gene, which is mutated in Best macular dystrophy, a disease characterised by a depressed light peak in the electrooculogram . VMD2 encodes a 585-amino acid protein with an approximate mass of 68 kDa which has been designated bestrophin. Bestrophin shares homology with the Caenorhabditis elegans RFP gene family, named for the presence of a conserved arginine (R), phenylalanine (F), proline (P), amino acid sequence motif. Bestrophin is a plasma membrane protein, localised to the basolateral surface of RPE cells consistent with a role for bestrophin in the generation or regulation of the EOG light peak. Bestrophin and other RFP family members represent a new class of chloride channels, indicating a direct role for bestrophin in generating the light peak . The VMD2 gene underlying Best disease was shown to represent the first human member of the RFP-TM protein family. More than 97% of the disease-causing mutations are located in the N-terminal RFP-TM domain implying important functional properties . The bestrophins are four-pass transmembrane chloride-channel proteins , and the RFP-TM or bestrophin domain extends from the N-terminus through approximately 350 amino acids and contains all of the TM domains as well as nearly all reported disease causing mutations . Interestingly, the RFP motif is not conserved evolutionarily back beyond Metazoa, neither is it in plant members.. +PF01724 Domain of unknown function DUF29
Pfam-B_2003 (release 4.1). This family consists of various hypothetical proteins from cyanobacteria, none of which are functionally described. The aligned region is approximately 120-140 amino acids long corresponding to almost the entire length of the proteins in the family. Swiss:Q2RPE2, PDB:3fcn, is a small protein that has a novel all-alpha fold. The N-terminal helical hairpin is likely to function as a dimerisation module. This protein is a member of PFam family PF01724. The function of this protein is unknown. One protein sequence contains a fusion of this protein and a DnaB domain, suggesting a possible role in DNA helicase activity (hypothetical). Dali hits have low Z and high rmsd, suggesting probably only topological similarities (not functional relevance) (details derived from TOPSAN). The family has several highly conserved sequence motifs, including YD/ExD, DxxNVxEEIE, and CPY/F/W, as well as conserved tryptophans.. +PF03442 DUF291;
Carbohydrate binding domain X2. This domain binds to cellulose and to bacterial cell walls. It is found in glycosyl hydrolases and in scaffolding proteins of cellulosomes (multiprotein glycosyl hydrolase complexes). In the cellulosome it may aid cellulose degradation by anchoring the cellulosome to the bacterial cell wall and by binding it to its substrate . This domain has an Ig-like fold .. +PF03398 DUF292;
Regulator of Vps4 activity in the MVB pathway. Pfam-B_3833 (release 6.6). ESCRT-I, -II, and -III are endosomal sorting complexes required for transporting proteins and carry out cargo sorting and vesicle formation in the multivesicular bodies, MVBs, pathway. These complexes are transiently recruited from the cytoplasm to the endosomal membrane where they bind transmembrane proteins previously marked for degradation by mono-ubiquitination. Assembly of ESCRT-III, a complex composed of at least four subunits (Vps2, Vps24, Vps20, Snf7), is intimately linked with MVB vesicle formation, its disassembly being an essential step in the MVB vesicle formation, a reaction that is carried out by Vps4, an AAA-type ATPase. The family Ist1 is a regulator of Vps4 activity; by interacting with Did2 and Vps4, Ist1 appears to regulate the recruitment and oligomerisation of Vps4. Together Ist1, Did2, and Vta1 form a network of interconnected regulatory proteins that modulate Vps4 activity, thereby regulating the flow of cargo through the MVB pathway .. +PF03444 DUF293;
Winged helix-turn-helix transcription repressor, HrcA DNA-binding. This domain is always found with a pair of CBS domains Pfam:PF00571.. +PF03445 Putative nucleotidyltransferase DUF294
This domain is found associated with Pfam:PF00571. This region is uncharacterised, however it seems to be similar to Pfam:PF01909, conserving the DXD motif.\. +PF03479 Domain of unknown function (DUF296)
Pfam-B_796 (release 7.0) & Dlakic M. This putative domain is found in proteins that contain AT-hook motifs Pfam:PF02178, which strongly suggests a DNA-binding function for the proteins as a whole. There are three highly conserved histidine residues, eg at 117, 119 and 133 in Swiss:Q46QL5, which should be a structurally conserved metal-binding unit, based on structural comparison with known metal-binding structures. The proteins should work as trimers.. +PF03537 DUF297; Glyco_hydro_114; GHL7;
Glycoside-hydrolase family GH114. This family is recognised as a glycosyl-hydrolase family, number 114. It is endo-alpha-1,4-polygalactosaminidase, a rare enzyme. It is proposed to be TIM-barrel, the most common structure amongst the catalytic domains of glycosyl-hydrolases .. +PF03618 DUF299;
Kinase/pyrophosphorylase. Pfam-B_3403 (release 7.0). This family of regulatory proteins has ADP-dependent kinase and inorganic phosphate-dependent pyrophosphorylase activity [1-3].. +PF03625 Domain of unknown function DUF302
Domain is found in an undescribed set of proteins. Normally occurs uniquely within a sequence, but is found as a tandem repeat (Swiss:Q9X8B8). Shows interesting phylogenetic distribution with majority of examples in bacteria and archaea, but also in in D.melanogaster (e.g. Swiss:Q9VA18).. +PF03629 Domain of unknown function (DUF303)
Pfam-B_3622 (release 7.0). Distribution of this domain seems limited to prokaryotes and viruses. . +PF03733 Domain of unknown function (DUF307)
Domain occurs as one or more copies in a small family of putative membrane proteins.. +PF03729 RUF1;
Short repeat of unknown function (DUF308). Family of short repeats that occurs in a limited number of membrane proteins. It may divide further in short repeats of around 7-10 residues of the pattern G-#-X(2)-#(2)-X (#=hydrophobic).. +PF03745 Domain of unknown function (DUF309)
This domain is found in eubacterial and archaebacterial proteins of unknown function. The proteins contain a motif HXXXEXX(W/Y) where X can be any amino acid. This motif is likely to be functionally important and may be involved in metal binding.. +PF01732 Putative peptidase (DUF31)
Pfam-B_2152 (release 4.1). This domain has no known function. It is found in various hypothetical proteins and putative lipoproteins from mycoplasmas. It appears to be related to the superfamily of trypsin peptidases and so may have a peptidase function.. +PF03750 Protein of unknown function (DUF310)
This family contains a number of archaeal proteins that are completely uncharacterised. The proteins are between 130 and 160 amino acids long. Their C-terminus contains several conserved residues.. +PF03759 DUF315;
PRONE (Plant-specific Rop nucleotide exchanger). Pfam-B_3610 (release 7.0). This is a functional guanine exchange factor (GEF) of plant Rho GTPase .. +PF03773 Predicted permease
This family of integral membrane proteins are predicted to be permeases of unknown specificity.. +PF03778 Protein of unknown function (DUF321)
Pfam-B_876 (release 7.0). This family may be related to the FARP (FMRFamide) family, Pfam:PF01581. Currently this repeat was only detectable in Arabidopsis thaliana. . +PF03780 DUF322;
The alkaline shock protein Asp23 was identified as an alkaline shock protein that was expressed in a sigmaB-dependent manner in Staphylococcus aureus.. +PF03781 DUF323;
Sulfatase-modifying factor enzyme 1. This domain is found in eukaryotic proteins required for post-translational sulfatase modification (SUMF1). These proteins are associated with the rare disorder multiple sulfatase deficiency (MSD) . The protein product of the SUMF1 gene is FGE, formylglycine (FGly),-generating enzyme, which is a sulfatase. Sulfatases are enzymes essential for degradation and remodelling of sulfate esters, and formylglycine (FGly), the key catalytic in the active site, is unique to sulfatases . FGE is localised to the endoplasmic reticulum (ER) and interacts with and modifies the unfolded form of newly synthesised sulfatases. FGE is a single-domain monomer with a surprising paucity of secondary structure that adopts a unique fold which is stabilised by two Ca2+ ions. The effect of all mutations found in MSD patients is explained by the FGE structure, providing a molecular basis for MSD. A redox-active disulfide bond is present in the active site of FGE. An oxidised cysteine residue, possibly cysteine sulfenic acid, has been detected that may allow formulation of a structure-based mechanism for FGly formation from cysteine residues in all sulfatases . In Mycobacteria and Treponema denticola this enzyme functions as an iron(II)-dependent oxidoreductase [5,6].. +PF03787 DUF324;
COG1332, COG1567, COG1367. The molecular function of these proteins is not yet known. However, they have been identified and called the RAMP (Repair Associated Mysterious Proteins) superfamily. The members of this family have no known function they are around 300 amino acids in length and have several conserved motifs.. +PF03804 Viral domain of unknown function
+PF03885 Protein of unknown function (DUF327)
The proteins in this family are around 140-170 residues in length. The proteins contain many conserved residues. with the most conserved motifs found in the central and C-terminal region. The function of these proteins is unknown.. +PF03883 Protein of unknown function (DUF328)
Members of this family are functionally uncharacterised. They are about 250 amino acids in length.. +PF03884 Domain of unknown function (DUF329)
The function of this short domain is unknown it contains four conserved cysteines and may therefore be involved in zinc binding.. +PF03886 Protein of unknown function (DUF330)
The proteins in this family are uncharacterised. The proteins are 170-190 amino residues in length.. +PF03889 Domain of unknown function
Members of this family are uncharacterised proteins from a number of bacterial species. The proteins range in size from 50-70 residues.. +PF03891 Domain of unknown function (DUF333)
This small domain of about 70 residues is found in a number of bacterial proteins. It is found at the N-terminus the of Swiss:O28332 protein. The proteins containing this domain are uncharacterised.. +PF03904 Domain of unknown function (DUF334)
Staphylococcus aureus plasmid proteins with no characterised function.. +PF03928 Domain of unknown function (DUF336)
This family contains uncharacterised sequences, including several GlcG proteins. The alignment contains many conserved motifs that are suggestive of cofactor binding and enzymatic activity.. +PF03929 DUF337;
PepSY-associated TM helix. +PF03937 DUF339; TPR_div1;
Flavinator of succinate dehydrogenase. This family includes the highly conserved mitochondrial and bacterial proteins Sdh5/SDHAF2/SdhE. Both yeast and human Sdh5/SDHAF2 interact with the catalytic subunit of the succinate dehydrogenase (SDH) complex, a component of both the electron transport chain and the tricarboxylic acid cycle.\. Sdh5 is required for SDH-dependent respiration and for Sdh1 flavination (incorporation of the flavin adenine dinucleotide cofactor). Mutational inactivation of Sdh5 confers tumor susceptibility in humans . Bacterial homologues of Sdh5, termed SdhE, are functionally conserved being required for the flavinylation of SdhA and succinate dehydrogenase activity. Like Sdh5, SdhE interacts with SdhA. Furthermore, SdhE was characterised as a FAD co-factor chaperone that directly binds FAD to facilitate the flavinylation of SdhA. Phylogenetic analysis demonstrates that SdhE/Sdh5 proteins evolved only once in an ancestral alpha-proteobacteria prior to the evolution of the mitochondria and now remain in subsequent descendants including eukaryotic mitochondria and the alpha, beta and gamma proteobacteria . This family was previously annotated in Pfam as being a divergent TPR repeat but structural evidence has indicated this is not true.. +PF01784 DUF34;
NIF3 (NGG1p interacting factor 3). Pfam-B_1006 (release 4.2). This family contains several NIF3 (NGG1p interacting factor 3) protein homologues. NIF3 interacts with the yeast transcriptional coactivator NGG1p which is part of the ADA complex, the exact function of this interaction is unknown [1,2].. +PF03956 Membrane protein of unknown function (DUF340)
Members of this family contain a conserved core of four predicted transmembrane segments. Some members have an additional pair of N-terminal transmembrane helices. The functions of the proteins in this family are unknown.. +PF03959 DUF341;
Serine hydrolase (FSH1). This is a family of serine hydrolases .. +PF03961 Protein of unknown function (DUF342)
This family of bacterial proteins has no known function. The proteins are in the region of 500-600 amino acid residues in length.. +PF03966 DUF343;
Trm112p-like protein. The function of this family is uncertain. The bacterial members are about 60-70 amino acids in length and the eukaryotic examples are about 120 amino acids in length. The C terminus contains the strongest conservation. Trm112p is required for tRNA methylation in S. cerevisiae and is found in complexes with 2 tRNA methylases (TRM9 and TRM11) also with putative methyltransferase YDR140W . The zinc-finger protein Ynr046w is plurifunctional and a component of the eRF1 methyltransferase in yeast . The crystal structure of Ynr046w has been determined to 1.7 A resolution. It comprises a zinc-binding domain built from both the N- and C-terminal sequences and an inserted domain, absent from bacterial and archaeal orthologs of the protein, composed of three alpha-helices .. +PF03976 DUF344;
Polyphosphate kinase 2 (PPK2). Inorganic polyphosphate (polyP) plays a role in metabolism and regulation and has been proposed to serve as a energy source in a pre-ATP world. In prokaryotes, the synthesis and utilisation of polyP are catalysed by PPK1, PPK2 and polyphosphatases. Proteins with a single PPK2 domain catalyse polyP-dependent phosphorylation of ADP to ATP, whereas proteins containing 2 fused PPK2 domains phosphorylate AMP to ADP.\. The structure of PPK2 from Pseudomonas aeruginosa has revealed a a 3-layer alpha/beta/alpha sandwich fold with an alpha-helical lid similar to the structures of microbial thymidylate kinases .. +PF03978 DUF345;
Borrelia burgdorferi REV protein. Pfam-B_26177 (release 7.2). This family consists of several REV proteins from Borrelia burgdorferi (Lyme disease spirochete). The function of REV is unknown although it known that gene is induced during the ingesting of host blood suggesting a role in the metabolic activation of borreliae to adapt to physiological stimuli .. +PF03984 Repeat of unknown function (DUF346)
This repeat was found as seven tandem copies in one protein. It is predicted to be composed of beta-strands. Thus it is likely that it forms a beta-propeller structure. It is found in association with BNR repeats, which also form a beta-propeller.. +PF03988 Repeat of Unknown Function (DUF347)
This repeat is found as four tandem repeats in a family of bacterial membrane proteins. Each repeat contains two transmembrane regions and a conserved tryptophan.. +PF01796 DUF35 OB-fold domain
Pfam-B_1390 (release 4.2). This domain has no known function and is found in conserved hypothetical archaeal and bacterial proteins. The domain is approximately 70 amino acids long. The domain is duplicated in Swiss:O53566. The structure of a DUF35 representative reveals two long N-terminal helices followed by a rubredoxin-like zinc ribbon domain and a C-terminal OB fold domain represented in this entry. OB-folds are frequently found to bind nucleic acids suggesting this domain might bind to DNA or RNA.. +PF03994 Domain of Unknown Function (DUF350)
This domain occurs in a small set of of bacterial proteins. It has two transmembrane regions, and often occurs as tandem repeats. The are no conserved catalytic residues.. +PF03995 DUF351;
Peptidase inhibitor family I36. This domain is currently only found in a small set of S. coelicolor secreted proteins. There are four conserved cysteines that probably form two disulphide bonds. Proteins 2SCK31.15C (Swiss:Q9ADK5) and SCO3675 (Swiss:Q9X8V7) also have probable beta-propellers at their C-termini. This family includes Swiss:P01077 a known peptidase inhibitor of known structure. This protein has a crystallin like fold Pfam:PF00030 and is distantly related by sequence. It is not known whether other members of this family are peptidase inhibitors.. +PF04001 DUF352;
Transcription factor Vhr1. Vhr1 is a transcription factor which regulates the biotin-dependent expression of transporters VHT1 and BIO5 .. +PF04007 Protein of unknown function (DUF354)
Members of this family are around 350 amino acids in length. They are found in archaebacteria and have no known function.. +PF04009 Protein of unknown function (DUF356)
Members of this family are around 120 amino acids in length and are found in some archaebacteria. The function of this family is unknown. However it contains a conserved motif IHPPAH that may be involved in its function.. +PF04010 Protein of unknown function (DUF357)
Members of this family are short (less than 100 amino acid) proteins found in archaebacteria. The function of these proteins is unknown.. +PF04019 Protein of unknown function (DUF359)
This family of archaebacterial proteins are about 170 amino acids in length. They have no known function. The most conserved portion of the protein contains the sequence GEEDL that may be important for its function.. +PF04021 DUF361; Type_III_signal;
Class III signal peptide. This family of archaeal proteins contains. an amino terminal motif QXSXEXXXL that has been suggested to be part of a class III signal sequence. With the Q being the +1 residue of the signal peptidase cleavage site . Two members of this family are cleaved by a type IV pilin-like signal peptidase.. +PF04015 Domain of unknown function (DUF362)
Domain that is sometimes present in iron-sulphur proteins.. +PF04016 Domain of unknown function (DUF364)
This domain of unknown function has a PLP-dependent transferase-like fold. Its genomic context suggests that it may have a role in anaerobic vitamin B12 biosynthesis. This domain is often found at the C-terminus of proteins containing DUF4213, Pfam:PF13938.. +PF04033 Domain of unknown function (DUF365)
Archaeal domain of unknown function.. +PF04017 Domain of unknown function (DUF366)
Archaeal domain of unknown function.. +PF04034 Domain of unknown function (DUF367)
+PF04018 Domain of unknown function (DUF368)
Predicted transmembrane domain of unknown function. Family members have between 6 and 9 predicted transmembrane segments.. +PF04126 DUF369;
This domain has a cyclophilin-like fold, consisting of an eight-stranded beta-barrel with an alpha helix located between the beta-2 and beta-3 strands and a 310 helix located between the beta-7 and beta-8 strands. The catalytic site found in human cyclophilin is not conserved in this domain, suggesting a different function for this domain [1,2].. +PF01809 DUF37;
Pfam-B_1485 (release 4.2). This domain has haemolytic activity . It is found in short (73-103 amino acid) proteins and contains three conserved cysteine residues.. +PF04025 Domain of unknown function (DUF370)
Bacterial domain of unknown function.. +PF04027 Domain of unknown function (DUF371)
Archaeal domain of unknown function.. +PF04036 Domain of unknown function (DUF372)
Domain of unknown function.. +PF04123 Domain of unknown function (DUF373)
Archaeal domain of unknown function. Predicted to be an integral membrane protein with six transmembrane regions.. +PF04028 Domain of unknown function (DUF374)
Bacterial domain of unknown function.. +PF04041 Domain of unknown function (DUF377)
This family contains many hypothetical proteins, some of which are predicted to be glycosyl hydrolases. This family was noted to belong to the Beta fructosidase superfamily in .. +PF04070 Domain of unknown function (DUF378)
Predicted transmembrane domain of unknown function. The majority of the family have two predicted transmembrane regions.. +PF04038 Domain of unknown function (DUF381)
Archaeal domain of unknown function. Strongly conserved YPLM motif.. +PF04063 Domain of unknown function (DUF383)
+PF04064 Domain of unknown function (DUF384)
+PF04074 Domain of unknown function (DUF386)
TIGRFAMs (release 2.0);. This family consists of conserved hypothetical proteins, typically about 150 amino acids in length, with no known function.. +PF04079 Putative transcriptional regulators (Ypuh-like)
TIGRFAMs (release 2.0);. This family of conserved bacterial proteins are thought to possibly be helix-turn-helix type transcriptional regulators.. +PF04076 DUF388;
Bacterial OB fold (BOF) protein. TIGRFAMs (release 2.0);. Proteins in this family form an OB-fold. Analysis of the predicted binding site of BOF family proteins implies that they lack nucleic acid-binding properties . They contain an predicted N-terminal signal peptide which indicates that they localise in the periplasm where they may function to bind proteins, small molecules, or other typical OB-fold ligands . As hypothesised for the distantly related OB-fold containing bacterial enterotoxins, the loss of nucleotide-binding function and the rapid evolution of the BOF ligand-binding site may be associated with the presence of BOF proteins in mobile genetic elements and their potential role in bacterial pathogenicity .. +PF01837 Domain of unknown function DUF39
Pfam-B_7373 (release 4.0). This presumed domain is about is about 360 residues long. The function of this domain is unknown. It is found in some proteins that have two C-terminal CBS Pfam:PF00571 domains. There are also proteins that contain two inserted Fe4S domains near the C-terminal end of the domain. The protein Swiss:O26943 has been misannotated as an inosine monophosphate dehydrogenase based on the similarity to the CBS domains.. +PF04094 Protein of unknown function (DUF390)
Pfam-B_1698 (release 7.3). This is a family of long proteins currently only found in the rice genome. They have no known function. However they may be some kind of transposable element.. +PF04134 Protein of unknown function, DUF393
Members of this family have two highly conserved cysteine residues near their N-terminus. The function of these proteins is unknown.. +PF04143 DUF395;
Pfam-B_2577 (release 7.3). This is an integral membrane protein. It is predicted to have a function in the transport of sulphur-containing molecules . It contains several conserved glycines and an invariant cysteine that is probably an important functional residue.. +PF04148 DUF396;
Transmembrane adaptor Erv26. Pfam-B_22900 (release 7.3);. Erv26 is an integral membrane protein that is packed into COPII vesicles and cycles between the ER and Golgi compartments. It directs pro-alkaline phosphatase into endoplasmic reticulum-derived COPII transport vesicles .. +PF04149 Domain of unknown function (DUF397)
Pfam-B_3066 (release 7.3). The function of this family is unknown. It has been suggested that some members of this family are regulators of transcription. In particular, it is thought that this may regulator of antibiotic production in Streptomyces coelicolor . . +PF04187 Protein of unknown function, DUF399
No function is known for any member of this family.. +PF04165 Protein of unknown function (DUF401)
TIGRFAMs (release 2.0);. Members if this family are predicted to have 10 transmembrane regions.. +PF04167 Protein of unknown function (DUF402)
Family member FomD is a predicted protein from a fosfomycin biosynthesis gene cluster in Streptomyces wedmorensis . Its function is unknown.. +PF04168 DUF403;
A predicted alpha-helical domain with a conserved ER motif.. An uncharacterized alpha helical domain containing a highly conserved ER motif and typically found as a tandem duplication. Contextual analysis suggests that it functions in a distinct peptide synthesis/modification system comprising of a transglutaminase, a peptidase of the NTN-hydrolase superfamily, an active and inactive circularly permuted ATP-grasp domains and a transglutaminase fused N-terminal to a circularly permuted COOH-NH2 ligase domain .. +PF04175 Protein of unknown function (DUF406)
TIGRFAMs (release 2.0);. Members of this family appear to be found only in gamma proteobacteria. The function of this protein family is undetermined. Solution of the structures of the two members of this family investigated bear some resemblance to that of the single domain enzyme pterin-4a-carbinolamine dehydratase, PDC. Although the residues of PCDs involved in binding of metabolite are not conserved in the two structures under study, they do correspond to a surface-region structurally aligned with residues that are highly conserved, eg Glu 89, suggesting that this region is also involved in binding of a ligand, thereby possibly constituting a catalytic site of a yet uncharacterised enzyme specific for gamma proteobacteria.. +PF04174 DUF407;
A circularly permuted ATPgrasp . An ATP-grasp family that is present both as catalytically active and inactive versions. Contextual analysis suggests that it functions in a distinct peptide synthesis/modification system that additionally contains a transglutaminase, an NTN-hydrolase, the Alpha-E domain, and a transglutaminase fused N-terminal to a circularly permuted COOH-NH2 ligase. The inactive forms are often fused N-terminal to the Alpha-E domain .. +PF04181 DUF408;
Pfam-B_22202 (release 7.3);. This family includes the human RPAP2 (RNAP II associated polypeptide) protein and the yeast Rtr1 protein . It has been suggested that this family of proteins are regulators of core RNA polymerase II function .. +PF04188 DUF409;
Mannosyltransferase (PIG-V)). Pfam-B_9248 (release 7.3);. This is a family of eukaryotic ER membrane proteins that are involved in the synthesis of glycosylphosphatidylinositol (GPI), a glycolipid that anchors many proteins to the eukaryotic cell surface. Proteins in this family are involved in transferring the second mannose in the biosynthetic pathway of GPI .. +PF04190 Protein of unknown function (DUF410)
Pfam-B_12495 (release 7.3);. This family of proteins is from Caenorhabditis elegans and has no known function. The protein has some GO references indicating that the protein has a positive regulation of growth rate and is involved in nematode larval development.. +PF04214 Protein of unknown function, DUF
The function of the members of this bacterial protein family is unknown. Some members may be involved in conferring cation resistance.. +PF04217 Protein of unknown function, DUF412
This family consists of bacterial uncharacterised proteins.. +PF04219 Protein of unknown function, DUF
+PF04220 DUF414;
Der GTPase activator (YihI). YihI activates the GTPase activity of Der, a 50S ribosomal subunit stability factor . The stimulation is specific to Der as YihI does not stimulate the GTPase activity of Era or ObgE. The interaction of YihI with Der requires only the C-terminal 78 amino acids of YihI . A yihI deletion mutant is viable and shows a shorter lag period, but the same post-lag growth rate as a wild-type strain. yihI is expressed during the lag period. Overexpression of yihI inhibits cell growth and biogenesis of the 50S ribosomal subunit . YihI is an unusual, highly hydrophilic protein with an uneven distribution of charged residues, resulting in an N-terminal region with high pI and a C-terminal region with low pI .. +PF04222 Protein of unknown function (DUF416)
This is a bacterial protein family of unknown function. Proteins in this family adopt an alpha helical structure. Genome context analysis has suggested a high probability of a functional association with histidine kinases, which implicates proteins in this family to play a role in signalling (information from TOPSAN 2Q9R).. +PF04224 Protein of unknown function, DUF417
This family of uncharacterised proteins appears to be restricted to proteobacteria.. +PF04235 Protein of unknown function (DUF418)
Probable integral membrane protein.. +PF04237 DUF419;
YjbR has a CyaY-like fold . +PF04238 Protein of unknown function (DUF420)
Predicted membrane protein with four transmembrane helices.. +PF04239 Protein of unknown function (DUF421)
+PF04240 Protein of unknown function (DUF422)
Predicted to be an integral membrane protein.. +PF04242 Protein of unknown function (DUF424)
This is a family of uncharacterised proteins.. +PF04248 Domain of unknown function (DUF427)
+PF04250 Protein of unknown function (DUF429)
+PF01861 Protein of unknown function DUF43
This family includes archaebacterial proteins of unknown function. All the members are 350-400 amino acids long.. +PF04254 Protein of unknown function (DUF432)
Archaeal protein of unknown function.. +PF04256 Protein of unknown function (DUF434)
+PF04258 DUF435;
Signal peptide peptidase. The members of this family are membrane proteins. In some proteins this region is found associated with Pfam:PF02225. This family corresponds with Merops subfamily A22B, the type example of which is signal peptide peptidase. There is a sequence-similarity relationship with Pfam:PF01080.. +PF04260 Protein of unknown function (DUF436)
TIGRFAMs (release 2.0);. Family of bacterial proteins with undetermined function. . +PF04266 DUF437;
The ASCH domain adopts a beta-barrel fold similar to the Pfam:PF01472 domain . It is thought to function as an RNA-binding domain during coactivation, RNA-processing and possibly during prokaryotic translation regulation .. +PF04282 Family of unknown function (DUF438)
+PF04283 DUF439;
Chemotaxis signal transduction system protein F from archaea. This is a family of proteins that are archaea-specific components of the bacterial-like chemotaxis signal transduction system of archaea. In H. salinarum, the CheF proteins interact with the chemotaxis proteins CheY, CheD and CheC2 as well as the flagella-accessory proteins FlaCE and FlaD, and are essential for any tactic response. CheF probably functions at the interface between the bacterial-like chemotaxis signal transduction system and the archaeal flagellar apparatus.. +PF04269 Protein of unknown function, DUF440
This family consists of uncharacterised bacterial proteins.. +PF04284 Protein of unknown function (DUF441)
Predicted to be an integral membrane protein.. +PF04273 Putative phosphatase (DUF442)
TIGRFAMs (release 2.0);. Although this domain is uncharacterised it seems likely that it performs a phosphatase function.. +PF04276 Protein of unknown function (DUF443)
TIGRFAMs (release 2.0);. Family of uncharacterised proteins.. +PF04285 Protein of unknown function (DUF444)
Bacterial protein of unknown function. One family member (Swiss:Q97LI1) is predicted to contain a von Willebrand factor (vWF) type A domain (Smart:VWA).. +PF04286 Protein of unknown function (DUF445)
Predicted to be a membrane protein.. +PF04287 tRNA pseudouridine synthase C
This family is suggested to be the catalytic domain of tRNA pseudouridine synthase C by association. The structure has been solved for one member, as PDB:2HGK, which by inference is designated in this way.. +PF04289 Protein of unknown function (DUF447)
Archaeal protein of unknown function.. +PF04296 Protein of unknown function (DUF448)
+PF01863 Protein of unknown function DUF45
This protein has no known function. Members are found in some archaebacteria, as well as Helicobacter pylori. The proteins are 190-240 amino acids long, with the C terminus being the most conserved region, containing three conserved histidines. This motif is similar to that found in Zinc proteases, suggesting that this family may also be proteases.. +PF04313 DUF450;
Type I restriction enzyme R protein N terminus (HSDR_N). This family consists of a number of N terminal regions found in type I restriction enzyme R (HSDR) proteins. Restriction and modification (R/M) systems are found in a wide variety of prokaryotes and are thought to protect the host bacterium from the uptake of foreign DNA . Type I restriction and modification systems are encoded by three genes: hsdR, hsdM, and hsdS. The three polypeptides, HsdR, HsdM, and HsdS, often assemble to give an enzyme (R2M2S1) that modifies hemimethylated DNA and restricts unmethylated DNA .. +PF04301 Protein of unknown function (DUF452)
+PF04303 DUF453;
PrpF is a protein found in the 2-methylcitrate pathway. It is structurally similar to DAP epimerase and proline racemase. This protein is likely to acts to isomerise trans-aconitate to cis-aconitate .. +PF04304 Protein of unknown function (DUF454)
Predicted membrane protein.. +PF04305 Protein of unknown function (DUF455)
+PF04306 Protein of unknown function (DUF456)
This family is a putative membrane protein that contains glycine zipper motifs .. +PF04307 Predicted membrane-bound metal-dependent hydrolase (DUF457)
Family of predicted membrane-bound metal-dependent hydrolases, based on Swiss:Q97LP7. May act as phospholipases.. +PF04308 Protein of unknown function (DUF458)
Family of uncharacterised eubacterial proteins.. +PF04311 Protein of unknown function (DUF459)
Putative periplasmic protein.. +PF01864 Putative integral membrane protein DUF46
This archaebacterial protein has no known function. It contains several predicted transmembrane regions, suggesting it is an integral membrane protein.. +PF04312 Protein of unknown function (DUF460)
Archaeal protein of unknown function.. +PF04314 Protein of unknown function (DUF461)
Putative membrane or periplasmic protein.. +PF04315 Protein of unknown function, DUF462
This family consists of bacterial proteins of uncharacterised function.. +PF04317 YcjX-like family, DUF463
These proteins possess a P-loop motif.. +PF04327 Protein of unknown function (DUF464)
+PF04325 Protein of unknown function (DUF465)
Family members are found in small bacterial proteins, and also in the heavy chains of eukaryotic myosin and kinesin, C terminal of the motor domain (Myosin Pfam:PF00063, Kinesin Pfam:PF00225). Members of this family may form coiled coil structures.. +PF04328 Protein of unknown function (DUF466)
Small bacterial protein of unknown function. Structural modelling suggests this domain may bind nucleic acids .. +PF04326 DUF467; AAA_div;
Divergent AAA domain. This family is related to the Pfam:PF00004 family, and presumably has the same function (ATP-binding).. +PF04318 Protein of unknown function (DUF468)
These conserved ORFs probably are probably not translated into protein [Personal communication, Val Wood].. +PF04320 Protein with unknown function (DUF469)
Family of bacteria protein with no known function.. +PF01865 DUF47;
Protein of unknown function DUF47. This family includes prokaryotic proteins of unknown function, as well as a protein annotated as the pit accessory protein from Sinorhizobium meliloti Swiss:O30498. However, the function of this protein is also unknown (Pit stands for Phosphate transport). It is probably distantly related to Pfam:PF01895 (personal obs:Yeats C).. +PF04322 Protein of unknown function (DUF473)
Family of uncharacterised Archaeal proteins.. +PF04536 DUF477; Repair_PSII; Phosphatase;
TLP18.3, Psb32 and MOLO-1 founding proteins of phosphatase. COG1512 & Pfam-B_18715 (release 10.0). This family has a Rossmann-like fold. It has phosphatase activity .. +PF04334 Protein of unknown function (DUF478)
This family contains uncharacterised protein encoded on Trypanosoma kinetoplast minicircles.. +PF04336 Protein of unknown function, DUF479
This family includes several bacterial proteins of uncharacterised function.. +PF01867 DUF48;
CRISPR associated protein Cas1. Clustered regularly interspaced short palindromic repeats (CRISPRs) are a family of DNA direct repeats found in many prokaryotic genomes. This family of proteins corresponds to Cas1, a CRISPR-associated protein. Cas1 may be involved in linking DNA segments to CRISPR .. +PF04337 Protein of unknown function, DUF480
This family consists of several proteins of uncharacterised function.. +PF04338 Protein of unknown function, DUF481
This family includes several proteins of uncharacterised function.. +PF04339 Protein of unknown function, DUF482
This family contains several proteins of uncharacterised function.. +PF04467 Protein of unknown function (DUF483)
Family of uncharacterised prokaryotic proteins.. +PF04340 Protein of unknown function, DUF484
This family consists of several proteins of uncharacterised function.. +PF04341 Protein of unknown function, DUF485
This family includes several putative integral membrane proteins.. +PF04342 Protein of unknown function, DUF486
This family contains several proteins of uncharacterised function.. +PF04343 Protein of unknown function, DUF488
This family includes several proteins of uncharacterised function.. +PF04356 Protein of unknown function (DUF489)
Protein of unknown function, cotranscribed with purB in Escherichia coli, but with function unrelated to purine biosynthesis .. +PF04357 Family of unknown function (DUF490)
+PF04361 Protein of unknown function (DUF494)
Members of this family of uncharacterised proteins are often named Smg.. +PF04362 DUF495;
Bacterial Fe(2+) trafficking. This is a family of bacterial Fe(2+) trafficking proteins.. +PF04363 Protein of unknown function (DUF496)
+PF04365 Protein of unknown function (DUF497)
+PF04430 Protein of unknown function (DUF498/DUF598)
This is a large family of uncharacterised proteins found in all domains of life. The structure shows a novel fold with three beta sheets. A dimeric form is found in the crystal structure. It was suggested that the cleft in between the two monomers might bing nucleic acid .. +PF00674 DUP family
Pfam-B_99 (release 2.1). This family consists of several yeast proteins of unknown functions. Swiss-prot annotates these as belonging to the DUP family. Several members of this family contain an internal duplication of this region.. +PF04465 Protein of unknown function (DUF499)
Family of uncharacterised hypothetical prokaryotic proteins.. +PF04366 Family of unknown function (DUF500)
Proteins in this family often also contain an SH3 domain (Pfam:PF00018), or a FYVE zinc finger (Pfam:PF01363).. +PF04417 Protein of unknown function (DUF501)
Family of uncharacterised bacterial proteins.. +PF04367 Protein of unknown function (DUF502)
Predicted to be an integral membrane protein.. +PF04456 Protein of unknown function (DUF503)
Family of hypothetical bacterial proteins.. +PF04457 Protein of unknown function (DUF504)
Family of uncharacterised proteins.. +PF04458 Protein of unknown function (DUF505)
Family of uncharacterised prokaryotic proteins.. +PF04368 Protein of unknown function (DUF507)
Bacterial protein of unknown function.. +PF04370 Domain of unknown function (DUF508)
This is a family of uncharacterised proteins from C. elegans.. +PF01871 DUF51;
This family consists of several AMMECR1 as well as several uncharacterised proteins. The contiguous gene deletion syndrome AMME is characterised by Alport syndrome, midface hypoplasia, mental retardation and elliptocytosis and is caused by a deletion in Xq22.3, comprising several genes including COL4A5, FACL4 and AMMECR1 . This family contains sequences from several eukaryotic species as well as archaebacteria and it has been suggested that the AMMECR1 protein may have a basic cellular function, potentially in either the transcription, replication, repair or translation machinery .. +PF04459 Protein of unknown function (DUF512)
Family of uncharacterised prokaryotic proteins.. +PF04375 DUF513;
This family consists of several bacterial HemX proteins. The hemX gene is not essential for haem synthesis in B. subtilis. HemX is a polytopic membrane protein which by an unknown mechanism down-regulates the level of HemA .. +PF04415 Protein of unknown function (DUF515)
Family of hypothetical Archaeal proteins.. +PF04414 DUF516;
D-aminoacyl-tRNA deacylase. Several aminoacyl-tRNA synthetases have the ability to transfer the D-isomer of their amino acid onto their cognate tRNA. D-aminoacyl-tRNA deacylases hydrolyse the ester bond between the polynucleotide and the D-amino acid, thereby preventing the accumulation of such mis-acylated and metabolically inactive tRNA molecules.. +PF04378 DUF519;
Ribosomal RNA small subunit methyltransferase D, RsmJ. RsmJ is the tenth to be found of the ten methyltransferases required for full methylation of 16S ribosomal RNA (rRNA). It specifically methylates m(2)G1516. A strain of E.coli lacking RsmJ due to deletion of the rsmJ(yhiQ) gene is missing the methyl group at G1516 and exhibits a cold-sensitive phenotype.. +PF04461 Protein of unknown function (DUF520)
Family of uncharacterised proteins.. +PF04412 Protein of unknown function (DUF521)
Family of hypothetical proteins.. +PF04463 Protein of unknown function (DUF523)
Family of uncharacterised bacterial proteins.. +PF04411 Protein of unknown function (DUF524)
This domain has been identified as a member of the PD-(D/E)XK nuclease superfamily through transitive meta profile searches . The domain has two additional beta-strands inserted to the core fold after the first core alpha-helix.\. It has been speculated that it could function as s methylation-dependent restriction .. +PF04379 Protein of unknown function (DUF525)
Members of this family include the bacterial protein ApaG and the C termini of some F-box proteins (Pfam:PF00646). F-box proteins contain a carboxyl-terminal domain that interacts with protein substrates , so this family may be involved in protein-protein interaction. The function of ApaG proteins is unknown, but mutations in the Salmonella typhimurium ApaG homologue corD gives a phenotype of low-level cobalt resistance and decreased magnesium efflux by effects on the CorA magnesium transport system .. +PF04380 DUF526;
Membrane fusogenic activity. BMFP consists of two structural domains, a coiled-coil C-terminal domain via which the protein self-associates as a trimer, and an N-terminal domain disordered at neutral pH but adopting an amphipathic alpha-helical structure in the presence of phospholipid vesicles, high ionic strength, acidic pH or SDS. BMFP interacts with phospholipid vesicles though the predicted amphipathic alpha-helix induced in the N-terminal half of the protein and promotes aggregation and fusion of vesicles in vitro.. +PF04384 DUF528;
Iron-sulphur cluster assembly. This family of proteins is likely to be involved in the assembly of iron-sulphur clusters. It may function as an adaptor protein. In Escherichia coli Swiss:P0C0L9 forms part of the isc operon, which encodes genes involved in iron-sulphur cluster assembly. Its structure is entirely alpha helical, and it contains a modified wing-helix structure, usually found in DNA-binding proteins. It binds to Fe2+ and Fe3+ ions and to the cysteine desulfurase IscS, the same surface of the protein is involved in both binding to iron and to IscS [1,2].. +PF04385 DUF529;
Domain of unknown function, DUF529. This family represents a repeated region found in several Theileria parva proteins. The repeat is normally about 70 residues long and contains a conserved aromatic residue in the middle. . +PF04409 Protein of unknown function (DUF530)
Family of hypothetical archaeal proteins.. +PF04407 Protein of unknown function (DUF531)
Family of hypothetical archaeal proteins.. +PF04391 Protein of unknown function (DUF533)
Some family members may be secreted or integral membrane proteins.. +PF04392 DUF534;
ABC transporter substrate binding protein. This family contains many hypothetical proteins and some ABC transporter substrate binding proteins.. +PF04393 Protein of unknown function (DUF535)
Family member Shigella flexneri VirK (Swiss:Q99QA5) is a virulence protein required for the expression, or correct membrane localisation of IcsA (VirG) on the bacterial cell surface , . This family also includes Pasteurella haemolytica lapB (Swiss:P32181), which is thought to be membrane-associated.. +PF04394 Protein of unknown function, DUF536
Pfam-B_2107 (release 7.3). This family aligns the C-terminal region from several bacterial proteins of unknown function that may be involved in a theta-type replication mechanism.. +PF04398 Protein of unknown function, DUF538
Pfam-B_2637 (release 7.3). This family consists of several plant proteins of unknown function.. +PF04400 Protein of unknown function (DUF539)
Putative periplasmic protein.. +PF01877 DUF54;
PH1010 Swiss:O58738 is composed of five alpha-helices (1-5) and eight beta-strands (1-8) with the following topology: beta-1, alpha-1, beta-2, beta-3, alpha-2, alpha-3, beta-4, beta-5, alpha-4, beta-6, alpha-5, beta-7, beta-8. The first six beta-strands (1-6) form a slightly twisted antiparallel beta-sheet and face five alpha-helices on one side. The last two beta-strands form an antiparallel beta-sheet in the C-terminus. PH1010 forms a characteristic homodimer structure in the crystal.\. Dimerisation of the molecule is crucial for function. The structure resembles that of some ribosomal proteins such as the 50S ribosomal protein L5 . Although the structure resembles that of the RRM-type RNA-binding domain of the ribosomal L5 protein, the residues involved in RNA-binding in the L5 protein are not conserved in this family . Despite this, these proteins bind to double-stranded RNA in a non-sequence specific manner .. +PF04402 DUF541;
Protein of unknown function (DUF541). Members of this family have so far been found in bacteria and mouse SwissProt or TrEMBL entries. However possible family members have also been identified in translated rat (Genbank:AW144450) and human (Genbank:AI478629) ESTs. A mouse family member has been named SIMPL (signalling molecule that associates with mouse pelle-like kinase). SIMPL appears to facilitate and/or regulate complex formation between IRAK/mPLK (IL-1 receptor-associated kinase) and IKK (inhibitor of kappa-B kinase) containing complexes, and thus regulate NF-kappa-B activity . Separate experiments demonstrate that a mouse family member (named LaXp180) binds the Listeria monocytogenes surface protein ActA, which is a virulence factor that induces actin polymerisation. It may also bind stathmin, a protein involved in signal transduction and in the regulation of microtubule dynamics . In bacteria its function is unknown, but it is thought to be located in the periplasm or outer membrane.. +PF04418 Domain of unknown function (DUF543)
This family of short eukaryotic proteins has no known function. Most of the members of this family are only 80 amino acid residues long. However the Arabidopsis homologue is over 300 residues long. The presumed domain contains a conserved amino terminal cysteine and a conserved motif GXGXGXG in the carboxy terminal half that may be functionally important.. +PF04424 Protein of unknown function (DUF544)
Eukaryotic protein of unknown function.. +PF04440 DUF546;
Dysbindin (Dystrobrevin binding protein 1). Pfam-B_3919 (release 7.5). Dysbindin is an evolutionary conserved 40-kDa coiled-coil-containing protein that binds to alpha- and beta-dystrobrevin in muscle and brain. Dystrophin and alpha-dystrobrevin are co-immunoprecipitated with dysbindin, indicating that dysbindin is DPC-associated in muscle. Dysbindin co-localises with alpha-dystrobrevin at the sarcolemma and is up-regulated in dystrophin-deficient muscle. In the brain, dysbindin is found primarily in axon bundles and especially in certain axon terminals, notably mossy fibre synaptic terminals in the cerebellum and hippocampus. Dysbindin may have implications for the molecular pathology of Duchenne muscular dystrophy and may provide an alternative route for anchoring dystrobrevin and the DPC to the muscle membrane . Genetic variation in the human dysbindin gene is also thought to be associated with Schizophrenia .. +PF04445 DUF548;
Putative SAM-dependent methyltransferase. This is a family of putative SAM-dependent methyltransferases.. +PF04446 DUF549;
tRNAHis guanylyltransferase. The Thg1 protein from Saccharomyces cerevisiae is responsible for adding a GMP residue to the 5' end of tRNA His . The catalytic domain Thg1 contains a RRM (ferredoxin) fold palm domain, just like the viral RNA-dependent RNA polymerases, reverse transcriptases, family A and B DNA polymerases, adenylyl cyclases, diguanylate cyclases (GGDEF domain) and the predicted polymerase of the CRISPR system . Thg1 possesses an active site with three acidic residues that chelate Mg++ cations . Thg1 catalyzes polymerization similar to the 5'-3' polymerases .. +PF01878 DUF55;
This domain was formerly known as DUF55. Crystal structures have shown that this domain is part of the PUA superfamily. This domain has been named EVE and is thought to be RNA-binding .. +PF04447 Protein of unknown function (DUF550)
This family is found in a range of Proteobacteria and a few P-22 dsDNA virus particles. The function is currently not known.. +PF04448 Protein of unknown function (DUF551)
This family represents the carboxy terminus of a protein of unknown function, found in dsDNA viruses with no RNA stage, including bacteriophages lambda and P22, and also in some Escherichia coli prophages.. +PF04472 Protein of unknown function (DUF552)
Family of uncharacterised proteins.. +PF04473 Transglutaminase-like domain
This family of uncharacterised archaeal proteins are related to Transglutaminase-like domains. This family has previously been called DUF553 and UPF0252.. +PF04474 Protein of unknown function (DUF554)
Family of uncharacterised prokaryotic proteins. Multiple predicted transmembrane regions suggest that the region is membrane associated. . +PF04475 Protein of unknown function (DUF555)
Family of uncharacterised, hypothetical archaeal proteins. . +PF04476 Protein of unknown function (DUF556)
Family of uncharacterised, hypothetical prokaryotic proteins.. +PF04452 DUF558;
RNA methyltransferase. RNA methyltransferases modify nucleotides during ribosomal RNA maturation in a site-specific manner. The Escherichia coli member is specific for U1498 methylation .. +PF04480 Protein of unknown function (DUF559)
+PF04575 Protein of unknown function (DUF560)
Pfam-B_4010 (release 7.5). Family of hypothetical bacterial proteins. . +PF04481 Protein of unknown function (DUF561)
Protein of unknown function found in a cyanobacterium, and the chloroplasts of algae.. +PF04763 Protein of unknown function (DUF562)
Pfam-B_6057 (release 7.5). Family of uncharacterised proteins.. +PF04577 Protein of unknown function (DUF563)
Pfam-B_4026 (release 7.5). Family of uncharacterised proteins.. +PF04483 Protein of unknown function (DUF565)
Predicted transmembrane protein found in plants, chloroplasts and cyanobacteria. This family is also known as YCF20.. +PF04525 DUF567;
Pfam-B_4998 (release 7.5). The structure of this family has been solved. It comprises a 12-stranded beta barrel with a central C-terminal alpha helix. This helix is thought to be a transmembrane helix. It is structurally similar to the C-terminal domain of the Tubby protein . In plants it plays a role in defense against pathogens .. +PF04601 Protein of unknown function (DUF569)
Pfam-B_4902 (release 7.5). Family of hypothetical proteins. Some family members contain a two copies of the region.. +PF01881 DUF57;
CRISPR associated protein Cas6. This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats . It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation . Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers .. +PF04489 Protein of unknown function (DUF570)
Protein of unknown function, found in herpesvirus and cytomegalovirus.. +PF04672 DUF574;
S-adenosyl methyltransferase. Pfam-B_4601 (release 7.5). This family contains a SAM (S-adenosyl methyltransferase) domain, with a central beta sheet with 3 alpha-helices on both sides. Crystal packing analysis of the structure PDB:3giw from Swiss:Q82L35 suggests that a monomer is the solution state oligomeric form. An unidentified ligand (UNL, cyan) was found at the putative active site surrounded by the residues His57, His170, Phe171, Tyr216 and Met22 . The UNL is likely to be a phenylalanine or phenylalanine-like molecule. (details derived from TOPSAN).. +PF04746 Protein of unknown function (DUF575)
Pfam-B_6048 (release 7.5). Family of uncharacterised proteins. Contains several chlamydial members. . +PF04507 Protein of unknown function, DUF576
Pfam-B_2120 (release 7.5). This family contains several uncharacterised staphylococcal proteins.. +PF04510 Family of unknown function (DUF577)
Pfam-B_3938 (release 7.5). Family of Arabidopsis thaliana proteins. Many of these members contain a repeated region.. +PF04669 DUF579;
Polysaccharide biosynthesis. Pfam-B_4574 (release 7.5). This family of proteins plays a role in xylan biosynthesis in plant cell walls. Its precise role in xylan biosynthesis is unknown [1,2]. Its function in other organisms is unknown.. +PF01882 Protein of unknown function DUF58
This family of prokaryotic proteins have no known function. Swiss:P71138 a protein of unknown function in the family has been misannotated as alpha-dextrin 6-glucanohydrolase.. +PF04515 DUF580;
Plasma-membrane choline transporter. Mifsud W, Pollington J. Pfam-B_2258 (release 7.5). This family represents a high-affinity plasma-membrane choline transporter in C.elegans which is thought to be rate-limiting for ACh synthesis in cholinergic nerve terminals .. +PF04570 Protein of unknown function (DUF581)
Pfam-B_4765 (release 7.5). Family of uncharacterised proteins.. +PF04518 DUF582;
Effector from type III secretion system. Mifsud W, Eberhardt R. Pfam-B_2447 (release 7.5). This is a family of effector proteins which are secreted by the type III secretion system [1,2]. The precise function of this family is unknown.. +PF04519 DUF583;
Polymer-forming cytoskeletal. Pfam-B_2455 (release 7.5). This is a family of bactofilins, a functionally diverse class of cytoskeletal, polymer-forming, proteins that is widely conserved among bacteria. In the example species C. crescentus, two bactofilins assemble into a membrane-associated laminar structure that shows cell-cycle-dependent polar localisation and acts as a platform for the recruitment of a cell wall biosynthetic enzyme involved in polar morphogenesis. Bactofilins display distinct subcellular distributions and dynamics in different bacterial species, suggesting that they are versatile structural elements that have adopted a range of different cellular functions.. +PF04520 DUF584;
Senescence regulator. Pfam-B_2571 (release 7.5). This protein regulates the expression of proteins associated with leaf senescence in plants [1,2].. +PF04522 Protein of unknown function (DUF585)
This region represents the N terminus of bromovirus 2a protein, and is always found N terminal to a predicted RNA-dependent RNA polymerase region (Pfam:PF00978).. +PF04532 Protein of unknown function (DUF587)
This family consists of the N termini of some human herpesvirus U58 proteins, and some cytomegalovirus UL87 proteins. This region is always found N terminal to the Pfam family UL87 (Pfam:PF03043), which has no known function.. +PF04569 Protein of unknown function
Pfam-B_2799 (release 7.5). This family represents a conserved region in a number of uncharacterised plant proteins.. +PF04574 Protein of unknown function (DUF592)
This region is found in some SIR2 family proteins (Pfam:PF02146).. +PF04578 Protein of unknown function, DUF594
Pfam-B_2859 (release 7.5). +PF04591 Protein of unknown function, DUF596
Pfam-B_5061 (release 7.5). This family contains several uncharacterised proteins.. +PF04640 DUF597;
PLATZ transcription factor. Mifsud W, Riaño-Pachón D, Mistry J. Pfam-B_5458 (release 7.5). Plant AT-rich sequence and zinc-binding proteins (PLATZ) are zinc dependant DNA binding proteins. They bind to AT rich sequences and functions in transcriptional repression .. +PF04654 Protein of unknown function, DUF599
Pfam-B_5550 (release 7.5). This family includes several uncharacterised proteins.. +PF00892 DUF6;
EamA-like transporter family. Pfam-B_177 (release 3.0). This family includes many hypothetical membrane proteins of unknown function.\. Many of the proteins contain two copies of the aligned region. The family used to be known as DUF6.. +PF04634 Protein of unknown function, DUF600
Pfam-B_5411 (release 7.5). This conserved region is found in several uncharacterised proteins from Gram positive bacteria.. +PF04645 Protein of unknown function, DUF603
Pfam-B_5498 (release 7.5). This family includes several uncharacterised proteins from Borrelia species.. +PF04646 Protein of unknown function, DUF604
Pfam-B_5503 (release 7.5). This family includes a conserved region found in several uncharacterised plant proteins.. +PF04657 Protein of unknown function, DUF606
Pfam-B_5554 (release 7.5). This family includes several uncharacterised bacterial proteins.. +PF01886 Protein of unknown function DUF61
Protein found in Archaebacteria. These proteins have no known function.. +PF04748 DUF610; div_psaccdeacet;
Divergent polysaccharide deacetylase. Pfam-B_5949 (release 7.5). This family is divergently related to Pfam:PF01522 (personal obs:Yeats C).. +PF04764 Protein of unknown function (DUF613)
Pfam-B_6084 (release 7.5). Family of chloroplast proteins of unknown function. Some members have two copies of the conserved region.. +PF04751 Protein of unknown function (DUF615)
This family of bacterial proteins has no known function.. +PF04765 Protein of unknown function (DUF616)
Pfam-B_6152 (release 7.5). Family of uncharacterised proteins.. +PF04768 Protein of unknown function (DUF619)
This region of unknown function is found at the C-terminus of Neurospora crassa acetylglutamate synthase (amino-acid acetyltransferase, EC: 2.3.1.1) (Swiss:Q12643). It is also found C-terminal to the amino acid kinase region (Pfam:PF00696) in some fungal acetylglutamate kinase enzymes.. +PF01887 DUF62;
S-adenosyl-l-methionine hydroxide adenosyltransferase. This is a family of proteins, previously known as DUF62, found in archaebacteria and bacteria. The structure of proteins in this family is similar to that of a bacterial fluorinating enzyme . S-adenosyl-l-methionine hydroxide adenosyltransferases utilises a rigorously conserved amino acid side chain triad (Asp-Arg-His) which may have a role in activating water to hydroxide ion . This family used to be known as DUF62.. +PF04788 Protein of unknown function (DUF620)
Pfam-B_6213 (release 7.5). Family of uncharacterised proteins.. +PF04822 DUF622;
Mifsud W, Eberhardt R. Pfam-B_3835 (release 7.6). This domain is named takusan, which is a Japanese word meaning 'many'. Members of this family regulate synaptic activity .. +PF04844 DUF623;
Transcriptional repressor, ovate. Mifsud W, Eberhardt R. Pfam-B_4487 (release 7.6). This is a family of transcriptional repressors. In plants, these proteins are important regulators of growth and development [1,2].. +PF04854 Protein of unknown function, DUF624
Pfam-B_4640 (release 7.6). This family includes several uncharacterised bacterial proteins.. +PF04776 Protein of unknown function (DUF626)
Pfam-B_2357 (release 7.6). Protein of unknown function, currently only identified in Brassicaceae.. +PF04781 Protein of unknown function (DUF627)
Pfam-B_2475 (release 7.6). This family represents the N-terminal region of several plant proteins of unknown function.. +PF04780 Protein of unknown function (DUF629)
Pfam-B_2475 (release 7.6). This family represents a region of several plant proteins of unknown function. A C2H2 zinc finger is predicted in this region in some family members, but the spacing between the cysteine residues is not conserved throughout the family.. +PF01889 Membrane protein of unknown function DUF63
Proteins found in Archaebacteria of unknown function. These proteins are probably transmembrane proteins.. +PF04816 Family of unknown function (DUF633)
Pfam-B_5077 (release 7.6). This family of proteins are uncharacterised have no known function.. +PF04827 DUF635;
Plant transposon protein. Pfam-B_2859 (release 7.6). This family contains plant transposases which are putative members of the PIF / Ping-Pong family .. +PF04828 DUF636;
Glutathione-dependent formaldehyde-activating enzyme. Pfam-B_2779 (release 7.6). +PF04830 Possible hemagglutinin (DUF637)
Pfam-B_2732 (release 7.6). This family represents a conserved region found in a bacterial protein which may be a hemagglutinin or hemolysin.. +PF04829 DUF638;
Pre-toxin domain with VENN motif. Pfam-B_2732 (release 7.6). This family represents a conserved region found in many bacterial porlymorphic toxins which is located before the C-terminal toxin modules .. +PF04842 Plant protein of unknown function (DUF639)
Pfam-B_6010 (release 7.6). Plant protein of unknown function.. +PF04852 Protein of unknown function (DUF640)
Pfam-B_6053 (release 7.6). This family represents a conserved region found in plant proteins including Resistance protein-like protein (Swiss:O49468).. +PF04862 Protein of unknown function (DUF642)
Pfam-B_4723 (release 7.6). This family represents a duplicated conserved region found in a number of uncharacterised plant proteins, potentially in the stem. There is a conserved CGP sequence motif.. +PF04867 Protein of unknown function (DUF643)
Pfam-B_6086 (release 7.6). Protein of unknown function found in Borrelia burgdorferi, the Lyme disease spirochete.. +PF04870 DUF644;
Mifsud W, Eberhardt R. Pfam-B_4889 (release 7.6). This family of proteins plays a role in the moulting cycle of nematodes, which involves the synthesis of a new collagen-rich cuticle underneath the existing cuticle and the subsequent removal of the old cuticle .. +PF04875 Protein of unknown function, DUF645
Mifsud W, Eberhardt R, Haft D. Pfam-B_4997 (release 7.6). This family includes several uncharacterised proteins from Vibrio cholerae. There is some doubt regarding the existence of these proteins, they are encoded by open reading frames contained within a repeated region in the Vibrio superintegron.. +PF04883 DUF646;
Bacteriophage HK97-gp10, putative tail-component. Pfam-B_6160 (release 7.6). This family of proteins is found in the caudovirales. It may be a tail component.. +PF04890 Family of unknown function (DUF648)
Pfam-B_5530 (release 7.6). Family of hypothetical Chlamydia proteins. This family may well comprise of two domains, as some members only match the N-terminus. . +PF04894 Archaeal protein of unknown function (DUF650)
Pfam-B_6199 (release 7.6). This family represents the amino terminal region of an archaeal protein of unknown function.. +PF04895 Archaeal protein of unknown function (DUF651)
Pfam-B_6199 (release 7.6). This family represents the carboxy terminal region of an archaeal protein of unknown function.. +PF04910 DUF654;
Transcriptional repressor TCF25. Mifsud W, Eberhardt R. Pfam-B_6652 (release 7.6). Members of this family are transcriptional repressors. They may act by increasing histone deacetylase activity at promoter regions .. +PF04919 Protein of unknown function (DUF655)
Pfam-B_6697 (release 7.6). This family includes several uncharacterised archaeal proteins. This protein appears to contain two HHH motifs.. +PF04920 Family of unknown function (DUF656)
Pfam-B_5777 (release 7.6). A family of hypothetical proteins from Beet necrotic yellow vein virus.. +PF04936 Protein of unknown function (DUF658)
Pfam-B_5062 (release 7.6). Protein of unknown function found in Lactococcus lactis bacteriophages.. +PF04937 Protein of unknown function (DUF 659)
Pfam-B_5061 (release 7.6). Transposase-like protein with no known function.. +PF04939 DUF660;
Ribosome biogenesis regulatory protein (RRS1). Pfam-B_6906 (release 7.6). This family consists of several eukaryotic ribosome biogenesis regulatory (RRS1) proteins. RRS1 is a nuclear protein that is essential for the maturation of 25 S rRNA and the 60 S ribosomal subunit assembly in Saccharomyces cerevisiae .. +PF04978 Protein of unknown function (DUF664)
Pfam-B_5281 (release 7.6). This family is commonly found in Streptomyces coelicolor and is of unknown function. These proteins contain several conserved histidines at their N-terminus that may form a metal binding site.. +PF05006 Protein of unknown function (DUF666)
Pfam-B_5319 (release 7.6). This family contains several uncharacterised viral proteins.. +PF05018 Protein of unknown function (DUF667)
This family of proteins are highly conserved in eukaryotes. Some proteins in the family are annotated as transcription factors. However, there is currently no support for this in the literature.. +PF05003 Protein of unknown function (DUF668)
Pfam-B_4700 (release 7.6). Uncharacterised plant protein.. +PF05037 Protein of unknown function (DUF669)
Pfam-B_5014 (release 7.7). Members of this family are found in various phage proteins.. +PF05050 DUF672;
Methyltransferase FkbM domain. Pfam-B_5811 (release 7.7). This family has members from bacteria to human, and appears to be a methyltransferase.. +PF05054 Protein of unknown function (DUF673)
Pfam-B_5918 (release 7.7). Family of uncharacterised viral proteins.. +PF05056 Protein of unknown function (DUF674)
Pfam-B_5937 (release 7.7). This family is found in Arabidopsis thaliana and contains several uncharacterised proteins.. +PF05055 Protein of unknown function (DUF677)
Pfam-B_5920 (release 7.7). This family consists of AT14A like proteins from Arabidopsis thaliana. At14a has a small domain that has sequence similarities to integrins from fungi, insects and humans. Transcripts of At14a are found in all Arabidopsis tissues and localises partly to the plasma membrane .. +PF05077 Protein of unknown function (DUF678)
Pfam-B_6127 (release 7.7). This family contains several poxvirus proteins of unknown function.. +PF05078 Protein of unknown function (DUF679)
Pfam-B_6129 (release 7.7). This family contains several uncharacterised plant proteins.. +PF05079 Protein of unknown function (DUF680)
Pfam-B_6131 (release 7.7). This family contains several uncharacterised proteins which seem to be found exclusively in Rhizobium loti.. +PF05080 Protein of unknown function (DUF681)
Pfam-B_6137 (release 7.7). This family contains several uncharacterised beak and feather disease virus proteins.. +PF05081 Protein of unknown function (DUF682)
Pfam-B_6152 (release 7.7). This family consists if several uncharacterised baculovirus proteins.. +PF05082 DUF683;
Moxon SJ, Eberhardt R. Pfam-B_6161 (release 7.7). This family contains several uncharacterised bacterial proteins. These proteins are found in nitrogen fixation operons so are likely to play some role in this process. They consist of two alpha helices which are joined by a four residue linker. The helices form an antiparallel bundle and cross towards their termini. They are likely to form a rod-like dimer . They have structural similarity to the regulatory protein Rop, Pfam:PF01815.. +PF05075 Protein of unknown function (DUF684)
Moxon SJ, Pollington J. Pfam-B_6081 (release 7.7). This family contains several uncharacterised proteins from Caenorhabditis elegans. The GO annotation suggests that the protein is involved in nematode larval development and has a positive regulation on growth rate.. +PF05085 Protein of unknown function (DUF685)
Pfam-B_6261 (release 7.7). This family consists of several uncharacterised proteins from Borrelia burgdorferi (Lyme disease spirochete). There is some evidence to suggest that the proteins may be outer surface proteins.. +PF05092 DUF686;
Pfam-B_6313 (release 7.7). This is a family of dsDNA Baculovirus proteins. It is required for the infectivity of the OBs or occlusion bodies. It is a structural protein of the ODV envelope required only in the first steps of per os larva infection, as viruses being produced in cells expressing the gene for this protein but not containing it in their genomes are able to produce successful infections. Baculoviruses are large DNA viruses that infect arthropods, mainly members of the order Lepidoptera. In their life cycle, they produce two kinds of particles, a budded, non-occluded virus (BV), which buds out of the infected cell and is responsible for the cell-to-cell transmission of the virus, and an occluded form, the occlusion body (OB), which is responsible for protecting the virus between encounters with larvae. A variable number of virions are included in the para-crystalline structure of the OB, mainly constituted by the virus-encoded polyhedrin protein; these virions are called occlusion body-derived virions or ODVs. . +PF05095 Protein of unknown function (DUF687)
Pfam-B_6321 (release 7.7). This family contains several uncharacterised Chlamydia proteins.. +PF05093 DUF689;
Cytokine-induced anti-apoptosis inhibitor 1, Fe-S biogenesis. Pfam-B_6320 (release 7.7), Wood V. Anamorsin, subsequently named CIAPIN1 for cytokine-induced anti-apoptosis inhibitor 1, in humans is the homologue of yeast Dre2, a conserved soluble eukaryotic Fe-S cluster protein, that functions in cytosolic Fe-S protein biogenesis. It is found in both the cytoplasm and in the mitochondrial intermembrane space (IMS) . CIAPIN1 is found to be up-regulated in hepatocellular cancer, is considered to be a downstream effector of the receptor tyrosine kinase-Ras signalling pathway, and is essential in mouse definitive haematopoiesis . Dre2 has been found to interact with the yeast reductase Tah18, forming a tight cytosolic complex implicated in the response to high levels of oxidative stress .. +PF05148 DUF691; Methyltransf_hyp;
Hypothetical methyltransferase. Pfam-B_6432 (release 7.7). This family consists of several uncharacterised eukaryotic proteins which are related to methyltransferases Pfam:PF01209.. +PF05114 Protein of unknown function (DUF692)
Pfam-B_6476 (release 7.7). This family consists of several uncharacterised bacterial proteins.. +PF05113 Protein of unknown function (DUF693)
Pfam-B_6473 (release 7.7). This family consists of several uncharacterised proteins from Borrelia burgdorferi (Lyme disease spirochete).. +PF05107 Family of unknown function (DUF694)
TIGRFAMs (release 2.0);. Family of hypothetical bacterial proteins.. +PF05117 Family of unknown function (DUF695)
TIGRFAMs (release 2.0);. Family of uncharacterised bacterial proteins.. +PF05128 Domain of unknown function (DUF697)
TIGRFAMs (release 2.0);. Family of bacterial hypothetical proteins that is sometimes associated with GTPase domains.. +PF01901 Protein of unknown function DUF70
Archaebacterial proteins of unknown function. Members of this family may be transmembrane proteins. . +PF05142 Domain of unknown function (DUF702)
Members of this family are found in various putative zinc finger proteins.. +PF05152 Protein of unknown function (DUF705)
Pfam-B_6448 (release 7.7). This family contains several uncharacterised Baculovirus proteins. . +PF05153 Family of unknown function (DUF706)
Pfam-B_2804 (release 7.7). Family of uncharacterised eukaryotic function. Some members have a described putative function, but a common theme is not evident.. +PF05212 Protein of unknown function (DUF707)
Pfam-B_6598 (release 7.7). This family consists of several uncharacterised proteins from Arabidopsis thaliana.. +PF05166 DUF709;
This family of proteins formerly called DUF709 includes the E. coli gene ycgL. Homologues of YcgL are found in gammaproteobacteria. The structure of this protein shows a novel alpha/beta/alpha sandwich structure .. +PF01902 DUF71; ATP_bind4;
This family of proteins probably binds ATP. This domain is about 200 amino acids long with a strongly conserved motif SGGKD at the N terminus.In some members of this family e.g. Swiss:Q12429, this domain is associated with Pfam:PF01042. . +PF05164 DUF710;
Cell division protein ZapA. ZapA is a cell division protein which interacts with FtsZ. FtsZ is part of a mid-cell cytokinetic structure termed the Z-ring that recruits a hierarchy of fission related proteins early in the bacterial cell cycle. The interaction of FtsZ with ZapA drives its polymerisation and promotes FtsZ filament bundling thereby contributing to the spatio-temporal tuning of the Z-ring .. +PF05167 Uncharacterised ACR (DUF711)
The proteins in this family are functionally uncharacterised. The proteins are around 450 amino acids long. It is likely that this family represents a group of glycerol-3-phosphate dehydrogenases.. +PF05168 DUF712;
+PF05206 DUF715;
Methyltransferase TRM13. Pfam-B_10143 (release 7.7). This is a family of eukaryotic proteins which are responsible for 2'-O-methylation of tRNA at position 4 . TRM13 shows no sequence similarity to other known methyltransferases.. +PF01904 Protein of unknown function DUF72
The function of this family is unknown.. +PF01905 DUF73;
CRISPR-associated negative auto-regulator DevR/Csa2. This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats . It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation . Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers . This family used to be known as DUF73. DevR appears to be negative auto-regulator within the system .. +PF01906 DUF74;
Putative heavy-metal-binding. From comparative structural analysis, this family is likely to be a heavy-metal binding domain. The domain oligomerises as a pentamer. The domain is about 100 amino acids long and is found in prokaryotes.. +PF01910 Domain of unknown function DUF77
Enright A, Ouzounis C, Cerutti L. Domain of unknown function. The crystal structure of two of these members shows that this domain has a ferredoxin like fold and is likely to exists as at least homodimers. Sulphate ions are are located at the dimer interfaces, which are thought to confer additional stability. Although the function of this domain remains to be identified, its structure suggests a role in protein-protein interactions possibly regulated by the binding of small-molecule ligands .. +PF01918 DUF78;
Alba is a novel chromosomal protein that coats archaeal DNA without compacting it.. +PF01923 DUF80;
Cobalamin adenosyltransferase. Cobalamin adenosyltransferase This family contains the gene products of PduO and EutT which are both cobalamin adenosyltransferases. PduO is a protein with ATP:cob(I)alamin adenosyltransferase activity. The main role of this protein is the conversion of inactive cobalamins to AdoCbl for 1,2-propanediol degradation .The EutT enzyme appears to be an adenosyl transferase, converting CNB12 to AdoB12 .. +PF01925 DUF81;
Sulfite exporter TauE/SafE. Enright A & Pfam-B_3578 (Release 7.5). This is a family of integral membrane proteins where the alignment appears to contain two duplicated modules of three transmembrane helices. The proteins are involved in the transport of anions across the cytoplasmic membrane during taurine metabolism as an exporter of sulfoacetate . This family used to be known as DUF81.. +PF01927 DUF82;
RNAse domain of the PIN fold with an inserted Zinc Ribbon at the C terminus .. +PF01930 DUF83;
Domain of unknown function DUF83. This domain has no known function. The domain contains three conserved cysteines at its C terminus.. +PF01931 DUF84;
Protein of unknown function DUF84. The function of this prokaryotic protein family is unknown.. +PF01934 Protein of unknown function DUF86
The function of members of this family is unknown.. +PF01935 Domain of unknown function DUF87
The function of this prokaryotic domain is unknown. It contains several conserved aspartates and histidines that could be metal ligands.. +PF01936 DUF88;
These domains are found in the eukaryotic proteins typified by the Nedd4-binding protein 1 and the bacterial YacP-like proteins (Nedd4-BP1, YacP nucleases; NYN domains). The NYN domain shares a common protein fold with two other previously characterized groups of nucleases, namely the PIN (PilT N-terminal) and FLAP/5' --> 3' exonuclease superfamilies. These proteins share a common set of 4 acidic conserved residues that are predicted to constitute their active site. Based on the conservation of the acidic residues and structural elements Aravind and colleagues suggest that PIN and NYN domains are likely to bind only a single metal ion, unlike the FLAP/5' --> 3' exonuclease superfamily, which binds two metal ions. Based on conserved gene neighborhoods Aravind and colleagues infer that the bacterial members are likely to be components of the processome/degradsome that process tRNAs or ribosomal RNAs.. +PF01937 Protein of unknown function DUF89
This family has no known function.. +PF01939 Protein of unknown function DUF91
The function of this prokaryotic protein is unknown.. +PF01940 Integral membrane protein DUF92
Members of this family have several predicted transmembrane helices. The function of these prokaryotic proteins is unknown.. +PF01941 DUF93;
S-adenosylmethionine synthetase (AdoMet synthetase). This family consists of several archaebacterial S-adenosylmethionine synthetase C(AdoMet synthetase or MAT) (EC 2.5.1.6). S-Adenosylmethionine (AdoMet) occupies a central role in the metabolism of all cells. The biological roles of AdoMet include acting as the primary methyl group donor, as a precursor to the polyamines, and as a progenitor of a 5'-deoxyadenosyl radical. S-Adenosylmethionine synthetase catalyses the only known route of AdoMet biosynthesis. The synthetic process occurs in a unique reaction in which the complete triphosphate chain is displaced from ATP and a sulfonium ion formed. MATs from various organisms contain ~400-amino acid polypeptide chains . . +PF01944 Integral membrane protein DUF95
Members of this family have several predicted transmembrane regions. The function of this family is unknown.. +PF01947 Protein of unknown function (DUF98)
This is a family of uncharacterised proteins.. +PF01949 Protein of unknown function DUF99
The function of this archaebacterial protein family is unknown.. +PF01207 UPF0034;
Dihydrouridine synthase (Dus). Members of this family catalyse the reduction of the 5,6-double bond of a uridine residue on tRNA. Dihydrouridine modification of tRNA is widely observed in prokaryotes and eukaryotes, and also in some archae. Most dihydrouridines are found in the D loop of t-RNAs. The role of dihydrouridine in tRNA is currently unknown, but may increase conformational flexibility of the tRNA. It is likely that different family members have different substrate specificities, which may overlap. Dus 1 (Swiss:Q9HGN6) from Saccharomyces cerevisiae acts on pre-tRNA-Phe, while Dus 2 (Swiss:P53720) acts on pre-tRNA-Tyr and pre-tRNA-Leu. Dus 1 is active as a single subunit, requiring NADPH or NADH, and is stimulated by the presence of FAD . Some family members may be targeted to the mitochondria and even have a role in mitochondria .. +PF00692 dUTPase
Pfam-B_127 (release 2.1). dUTPase hydrolyses dUTP to dUMP and pyrophosphate.. +PF02670 1-deoxy-D-xylulose 5-phosphate reductoisomerase
This is a family of 1-deoxy-D-xylulose 5-phosphate reductoisomerases. This enzyme catalyses the formation of 2-C-methyl-D-erythritol 4-phosphate from 1-deoxy-D-xylulose-5-phosphate in the presence of NADPH . This reaction is part of the terpenoid biosynthesis pathway.. +PF00350 dynamin;
+PF04912 Dynamitin
Pfam-B_5757 (release 7.6). Dynamitin is a subunit of the microtubule-dependent motor complex and in implicated in cell adhesion by binding to macrophage-enriched myristoylated alanine-rice C kinase substrate (MacMARCKS) .. +PF01221 Dynein light chain type 1
+PF00519 E1;
Papillomavirus helicase. Pfam-B_48 (release 1.0). This protein is a DNA helicase that is required for initiation of viral DNA replication. This protein forms a complex with the E2 protein Pfam:PF00508.. +PF00122 E1-E2 ATPase
+PF00676 E1_dehydrog;
Dehydrogenase E1 component. Pfam-B_117 (release 2.1). This family uses thiamine pyrophosphate as a cofactor. This family includes pyruvate dehydrogenase, 2-oxoglutarate dehydrogenase and 2-oxoisovalerate dehydrogenase.. +PF00524 E1_N;
E1 Protein, N terminal domain. Pfam-B_98 (release 1.0). +PF00511 E2_C;
E2 (early) protein, C terminal. Pfam-B_87 (release 1.0). +PF00508 E2_N;
E2 (early) protein, N terminal. Pfam-B_76 (release 1.0). +PF02319 E2F/DP family winged-helix DNA-binding domain
Pfam-B_8420 (release 5.2). This family contains the transcription factor E2F and its dimerisation partners TDP1 and TDP2, which stimulate E2F-dependent transcription. E2F binds to DNA as a homodimer or as a heterodimer in association with TDP1/2, the heterodimer having increased binding efficiency. The crystal structure of an E2F4-DP2-DNA complex shows that the DNA-binding domains of the E2F and DP proteins both have a fold related to the winged-helix DNA-binding motif. Recognition of the central c/gGCGCg/c sequence of the consensus DNA-binding site is symmetric, and amino acids that contact these bases are conserved among all known E2F and DP proteins.. +PF02817 e3_binding;
This family represents a small domain of the E2 subunit of 2-oxo-acid dehydrogenases responsible for the binding of the E3 subunit.. +PF00518 Early Protein (E6)
Pfam-B_57 (release 1.0). +PF00527 E7 protein, Early protein
Pfam-B_95 (release 1.0). +PF00563 DUF2;
Alignment kindly provided by SMART. This domain is found in diverse bacterial signaling proteins. It is called EAL after its conserved residues. The EAL domain is a good candidate for a diguanylate phosphodiesterase function . The domain contains many conserved acidic residues that could participate in metal binding and might form the phosphodiesterase active site .. +PF04157 EAP30/Vps36 family
Pfam-B_8830 (release 7.3);. This family includes EAP30 as well as the Vps36 protein. Vps36 is involved in Golgi to endosome trafficking. EAP30 is a subunit of the ELL complex. The ELL is an 80-kDa RNA polymerase II transcription factor. ELL interacts with three other proteins to form the complex known as ELL complex. The ELL complex is capable of increasing that catalytic rate of transcription elongation, but is unable to repress initiation of transcription by RNA polymerase II as is the case of ELL. EAP30 is thought to lead to the derepression of ELL's transcriptional inhibitory activity .. +PF01309 EAV_env_prot;
Equine arteritis virus small envelope glycoprotein . Pfam-B_656 (release 3.0). Equine arteritis virus small envelope glycoprotein (Gs) is a class I transmembrane protein which adopts a number of different conformations. . +PF02905 EBNA1;
Epstein Barr virus nuclear antigen-1, DNA-binding domain. This domain has a ferredoxin-like fold.. +PF00378 Enoyl-CoA hydratase/isomerase family
This family contains a diverse set of enzymes including: Enoyl-CoA hydratase (Swiss:Q13011). Napthoate synthase (Swiss:P27290). Carnitate racemase (Swiss:P31551). 3-hydoxybutyryl-CoA dehydratase (Swiss:P52046). Dodecanoyl-CoA delta-isomerase (Swiss:P42126).. +PF04736 Eclosion hormone
Eclosion hormone is an insect neuropeptide that triggers the performance of ecdysis behaviour, which causes shedding of the old cuticle at the end of a molt , .. +PF02963 Restriction endonuclease EcoRI
+PF03974 Ecotin
Pfam-B_54504 (release 7.2). Ecotin is a broad range serine protease inhibitor, which forms homodimers. The C-terminal region contains the dimerisation motif . Interestingly, the binding sites show a fluidity of protein contacts binding sites show a fluidity of protein contacts derived from ecotin's innate flexibility in fitting itself to proteases while [4,5].. +PF00736 EF1BD;
EF-1 guanine nucleotide exchange domain. Pfam-B_488 (release 2.1). This family is the guanine nucleotide exchange domain of EF-1 beta and EF-1 delta chains.. +PF00647 EF1G_domain;
Elongation factor 1 gamma, conserved domain. +PF00889 Elongation factor TS
Pfam-B_1408 (release 3.0). +PF01132 Elongation factor P (EF-P) OB domain
+PF04863 Alliinase EGF-like domain
Pfam-B_4527 (release 7.6). Allicin is a thiosulphinate that gives rise to dithiines, allyl sulphides and ajoenes, the three groups of active compounds in Allium species. Allicin is synthesised from sulfoxide cysteine derivatives by alliinase (EC:4.4.1.4), whose C-S lyase activity cleaves C(beta)-S(gamma) bonds. It is thought that this enzyme forms part of a primitive plant defence system. This family represents the N-terminal EGF-like domain .. +PF01303 Egg lysin (Sperm-lysin)
Pfam-B_1464 (release 3.0). Egg lysin creates a hole in the envelope of the egg thereby allowing the sperm to pass through the envelope and fuse with the egg.. +PF00971 EIAV coat protein, gp90
Pfam-B_210 (release 3.0). Equine infectious anaemia (EIAV). EIAV belongs to the family Retroviridae. EIAV gp90 is hypervariable in the carboxyl-end region and more stable in the amino-end region. This variability is a pathogenicity factor that allows the evasion of the host's immune response.. +PF01176 Translation initiation factor 1A / IF-1
This family includes both the eukaryotic translation factor eIF-1A and the bacterial translation initiation factor IF-1.. +PF05091 Eukaryotic translation initiation factor 3 subunit 7 (eIF-3)
Pfam-B_6311 (release 7.7). This family is made up of eukaryotic translation initiation factor 3 subunit 7 (eIF-3 zeta/eIF3 p66/eIF3d). Eukaryotic initiation factor 3 is a multi-subunit complex that is required for binding of mRNA to 40 S ribosomal subunits, stabilisation of ternary complex binding to 40 S subunits, and dissociation of 40 and 60 S subunits. These functions and the complex nature of eIF3 suggest multiple interactions with many components of the translational machinery . The gene coding for the protein has been implicated in cancer in mammals .. +PF01287 Eukaryotic elongation factor 5A hypusine, DNA-binding OB fold
eIF5A, previously thought to be an initiation factor, has been shown to be required for peptide chain elongation in yeast .. +PF01873 eIF5_eIF2B;
Domain found in IF2B/IF5. This family includes the N terminus of eIF-5 Swiss:P55010, and the C terminus of eIF-2 beta Swiss:P20042. This region corresponds to the whole of the archaebacterial eIF-2 beta homologue. The region contains a putative zinc binding C4 finger.. +PF01912 eIF6;
This family includes eukaryotic translation initiation factor 6 as well as presumed archaebacterial homologues.. +PF03608 PTS system enzyme II sorbitol-specific factor
TIGRFAMs, Griffiths-Jones SR. +PF03609 PTS system sorbose-specific iic component
TIGRFAMs, Griffiths-Jones SR. +PF03612 EIIBC-GUT;
Sorbitol phosphotransferase enzyme II N-terminus. TIGRFAMs, Griffiths-Jones SR, Yeats C. +PF03611 PTS system sugar-specific permease component
This family includes bacterial transmembrane proteins with a putative sugar-specific permease function, including and analogous to the IIC component of the PTS system. It has been suggested that this permease may form part of an L-ascorbate utilisation pathway, with proposed specificity for 3-keto-L-gulonate (formed by hydrolysis of L-ascorbate) . This family includes the IIC component of the galactitol specific GAT family PTS system.. +PF03613 PTS system mannose/fructose/sorbose family IID component
TIGRFAMs, Griffiths-Jones SR. +PF04873 Ethylene insensitive 3
Pfam-B_4883 (release 7.6). Ethylene insensitive 3 (EIN3) proteins are a family of plant DNA-binding proteins that regulate transcription in response to the gaseous plant hormone ethylene, and are essential for ethylene-mediated responses including the triple response, cell growth inhibition, and accelerated senescence. . +PF03317 ELF protein
Pfam-B_3282 (release 6.5). This is a family of hypothetical proteins from cereal crops.. +PF02323 Egg-laying hormone precursor
Pfam-B_953 (release 5.2). This family consists of egg-laying hormone (ELH) precursor and atrial gland peptides form little and California sea hare. The family also includes ovulation prohormone precursor from great pond snail. This family thus represents a conserved gastropoda ovulation and egg production prohormone. Note that many of the proteins present are further cleaved to give individual peptides . Neuropeptidergic bag cells of the marine mollusk Aplysia californica synthesise an egg-laying hormone (ELH) precursor protein which is cleaved to generate several bioactive peptides including ELH, bag cell peptides (BCP) and acidic peptide (AP) .. +PF00964 Elicitin
Elicitins form a novel class of plant necrotic proteins which are secreted by Phytophthora and Pythium fungi, parasites of many economically important crops. These proteins induce leaf necrosis in infected plants and elicit an incompatible hypersensitive-like reaction, leading to the development of a systemic acquired resistance against a range of fungal and bacterial plant pathogens .. +PF03789 ELK domain
Pfam-B_3136 (release 7.0). This domain is required for the nuclear localisation of these proteins . All of these proteins are members of the Tale/Knox homeodomain family, a subfamily within homeobox Pfam:PF00046. . +PF01151 GNS1_SUR4;
Members of this family are involved in long chain fatty acid elongation systems that produce the 26-carbon precursors for ceramide and sphingolipid synthesis . Predicted to be integral membrane proteins, in eukaryotes they are probably located on the endoplasmic reticulum. Yeast ELO3 (Swiss:P40319) affects plasma membrane H+-ATPase activity, and may act on a glucose-signaling pathway that controls the expression of several genes that are transcriptionally regulated by glucose such as PMA1 .. +PF02488 Merozoite Antigen
Pfam-B_924 (release 5.4). This family represents the immunodominant surface antigen of Theileria parasites including equi merozoite antigen-1 (EMA-1) and equi merozoite antigen-2 (EMA-2) . The protein shows variation at a putative glycosylation site, a potential mechanism for host immune response evasion .. +PF01105 emp24/gp25L/p24 family/GOLD
Pfam-B_803 (release 3.0). Members of this family are implicated in bringing cargo forward from the ER and binding to coat proteins by their cytoplasmic domains. This domain corresponds closely to the beta-strand rich GOLD domain described in .\. The GOLD domain is always found combined with lipid- or membrane-association domains .. +PF04493 Endonuc_V;
Endonuclease V is specific for single-stranded DNA or for duplex DNA that contains uracil or that is damaged by a variety of agents .. +PF02945 endonuclease_7;
Recombination endonuclease VII. +PF04231 Endonuclease_I;
Bacterial periplasmic or secreted endonuclease I (EC:3.1.21.1) E. coli endonuclease I (EndoI) is a sequence independent endonuclease located in the periplasm. It is inhibited by different RNA species. It is thought to normally generate double strand breaks in DNA, except in the presence of high salt concentrations and RNA, when it generates single strand breaks in DNA. Its biological role is unknown . Other family members are known to be extracellular . This family also includes a non-specific, Mg2+ activated ribonuclease precursor (Swiss:Q03091) .. +PF04667 endosulfine;
cAMP-regulated phosphoprotein/endosulfine conserved region. Pfam-B_4454 (release 7.5). Conserved region found in both cAMP-regulated phosphoprotein 19 (ARPP-19) and Alpha/Beta endosulfine. No function has yet been assigned to ARPP-19. Endosulfine is the endogenous ligand for the ATP-dependent potassium (K ATP) channels which occupy a key position in the control of insulin release from the pancreatic beta cell by coupling cell polarity to metabolism. In both cases the region occupies the majority of the protein [1,2]. . +PF00322 endothelin;
+PF00555 endotoxin;
This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding.. +PF03944 endotoxin_C;
This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding.. +PF03945 endotoxin_N;
delta endotoxin, N-terminal domain. This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding.. +PF03272 Viral enhancin protein
Pfam-B_4236 (release 6.5). +PF03386 Early nodulin 93 ENOD93 protein
Pfam-B_2931 (release 6.6). +PF00113 enolase;
Enolase, C-terminal TIM barrel domain. +PF03952 enolase_N;
Enolase, N-terminal domain. +PF03735 ENT domain
This presumed domain is named after Emsy N Terminus (ENT). Emsy is a protein that is amplified in breast cancer and interacts with BRCA2. The N terminus of this protein is found to be similar to other vertebrate and plant proteins of unknown function. This domain has a completely conserved histidine residue that may be functionally important.. +PF01375 Enterotoxin_A;
Heat-labile enterotoxin alpha chain. +PF01376 Enterotoxin_B;
Heat-labile enterotoxin beta chain. +PF02048 Enterotoxin_HS;
Heat-stable enterotoxin ST. This family consists of the heat stable enterotoxin ST from Escherichia coli. ST is a small peptide of 18 or 19 amino acid residues produced by enterotoxigenic E. coli and is one of the causes of acute diarrhoea in infants and travellers in developing countries. ST triggers a biological response by binding to a membrane-associated guanylyl cyclase C which is located on intestinal epithelial cell membranes .. +PF01417 ENTH domain
The ENTH (Epsin N-terminal homology) domain is found in proteins involved in endocytosis and cytoskeletal machinery. The function of the ENTH domain is unknown.. +PF00429 ENV_polyprotein;
ENV polyprotein (coat polyprotein). Pfam-B_145 (release 1.0). +PF00811 Ependymin
Pfam-B_1391 (release 2.1). +PF01404 EPH_lbd;
Ephrin receptor ligand binding domain. The Eph receptors, which bind to ephrins Pfam:PF00812 are a large family of receptor tyrosine kinases. This family represents the amino terminal domain which binds the ephrin ligand .. +PF01370 NAD dependent epimerase/dehydratase family
Pfam-B_93 (release 3.0). This family of proteins utilise NAD as a cofactor. The proteins in this family use nucleotide-sugar substrates for a variety of chemical reactions.. +PF02350 UDP-N-acetylglucosamine 2-epimerase
Pfam-B_888 (release 5.2) & Pfam-B_4862 (Release 7.5). This family consists of UDP-N-acetylglucosamine 2-epimerases EC:5.1.3.14 this enzyme catalyses the production of UDP-ManNAc from UDP-GlcNAc. Note that some of the enzymes is this family are bifunctional such as Swiss:O35826 and Swiss:Q9Z0P6 in this instance Pfam matches only the N-terminal half of the protein suggesting that the additional C-terminal part (when compared to mono-functional members of this family) is responsible for the UPD-N-acetylmannosamine kinase activity of these enzymes. This hypothesis is further supported by the assumption that the C-terminal part of Swiss:O35826 is the kinase domain .. +PF00758 Erythropoietin/thrombopoietin
Pfam-B_990 (release 2.1). +PF00275 EPSP_syntase;
EPSP synthase (3-phosphoshikimate 1-carboxyvinyltransferase). +PF03736 EPTP domain
Mutations in the LGI/Epitempin gene can result in a special form of epilepsy, autosomal dominant lateral temporal epilepsy. The Epitempin protein contains a large repeat in its C terminal section. The architecture and structural features of this repeat make it a likely member 7-bladed beta-propeller fold .. +PF01133 Enhancer of rudimentary
Enhancer of rudimentary is a protein of unknown function that is highly conserved in plants and animals. This protein is found to be an enhancer of the rudimentary gene Swiss:P05990.. +PF00810 ER lumen protein retaining receptor
Pfam-B_1387 (release 2.1). +PF02732 ERCC4 domain
This domain is a family of nucleases. The family includes EME1 which is an essential component of a Holliday junction resolvase [2-3]. EME1 interacts with MUS81 to form a DNA structure-specific endonuclease.. +PF04404 ERF superfamily
The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to ERF .. +PF03463 eRF1 domain 1
The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known . The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site . This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification.. +PF03464 eRF1 domain 2
The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known . The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site . This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification.. +PF03465 eRF1 domain 3
The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known . The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site . This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification.. +PF03734 ErfK_YbiS_YhnG;
L,D-transpeptidase catalytic domain. This family of proteins are found in a range of bacteria. It has been shown that this domain can act as an L,D-transpeptidase that gives rise to an alternative pathway for peptidoglycan cross-linking . This gives bacteria resistance to beta-lactam antibiotics that inhibit PBPs which usually carry out the cross-linking reaction. The conserved region contains a conserved histidine and cysteine, with the cysteine thought to be an active site residue. Several members of this family contain peptidoglycan binding domains. The molecular structure of YkuD protein shows this domain has a novel tertiary fold consisting of a beta-sandwich with two mixed sheets, one containing five strands and the other, six strands. The two beta-sheets form a cradle capped by an alpha-helix. This family was formerly called the ErfK/YbiS/YcfS/YnhG family, but is now named after the first protein of known structure.. +PF03694 UPF0143;
This is a family of integral membrane proteins, which may contain four transmembrane helices. Members of this family are thought to be involved in sterol C-4 demethylation. In S. cerevisiae they may tether Erg26p (sterol dehydrogenase/decarboxylase) and Erg27p (3-ketoreductase) to the endoplasmic reticulum or may facilitate interaction between these proteins . The family contains a conserved arginine and histidine that may be functionally important.. +PF04622 ERG2 and Sigma1 receptor like protein
This family consists of the fungal C-8 sterol isomerase and mammalian sigma1 receptor. C-8 sterol isomerase (delta-8--delta-7 sterol isomerase), catalyses a reaction in ergosterol biosynthesis, which results in unsaturation at C-7 in the B ring of sterols . Sigma 1 receptor is a low molecular mass mammalian protein located in the endoplasmic reticulum , which interacts with endogenous steroid hormones, such as progesterone and testosterone . It also binds the sigma ligands, which are are a set of chemically unrelated drugs including haloperidol, pentazocine, and ditolylguanidine . Sigma1 effectors are not well understood, but sigma1 agonists have been observed to affect NMDA receptor function, the alpha-adrenergic system and opioid analgesia.. +PF01222 Ergosterol biosynthesis ERG4/ERG24 family
+PF00769 Ezrin/radixin/moesin family
Pfam-B_851 (release 2.1). This family of proteins contain a band 4.1 domain (Pfam:PF00373), at their amino terminus. This family represents the rest of these proteins.. +PF04137 Endoplasmic Reticulum Oxidoreductin 1 (ERO1)
Pfam-B_4729 (release 7.3);. Members of this family are required for the formation of disulphide bonds in the ER [1,2].. +PF03238 ESAG protein
Pfam-B_3037 (release 6.5). Expression-site-associated gene (ESAG) proteins are thought to be involved in VSG activation. This family includes ESAG 117A Swiss:P04477 as well as ESAG IM Swiss: Q26705.. +PF03433 ESPA;
EspA-like secreted protein . Pfam-B_4100 (release 6.6). EspA is the prototypical member of this family. EspA, together with EspB, EspD and Tir are exported by a type III secretion system. These proteins are essential for attaching and effacing lesion formation. EspA is a structural protein and a major component of a large, transiently expressed, filamentous surface organelle which forms a direct link between the bacterium and the host cell [1,2].. +PF04806 EspF protein repeat
Pfam-B_3518 (release 7.6). The enteropathogenic Escherichia coli EspF secreted protein induces host cell apoptosis. Its proline-rich structure suggests that it may act by binding to SH3 domains or EVH1 domains of host cell signalling proteins . . +PF00756 Putative esterase
Pfam-B_476 (release 2.1) & Pfam-B_4968 (Release 7.5). This family contains Esterase D Swiss:P10768. However it is not clear if all members of the family have the same function. This family is related to the Pfam:PF00135 family.. +PF01684 ET module
This domain has no known function. It is found in several C. elegans proteins. The domain contains 8-10 conserved cysteines that probably form 4-5 disulphide bridges. By inspection of the conservation of cysteines it looks like cysteines 1,2,3,4,9 and 10 are always present and that sometimes the pair 5 and 8 or the pair 6 and 7 are missing. This suggests that cysteines 5/8 and 6/7 make disulphide bridges. . +PF00766 Electron transfer flavoprotein FAD-binding domain
Pfam-B_853 (release 2.1) & Pfam-B_1321 (release 3.0). This domain found at the C-terminus of electron transfer flavoprotein alpha chain and binds to FAD . The fold consists of a five-stranded parallel beta sheet as the core of the domain, flanked by alternating helices. A small part of this domain is donated by the beta chain .. +PF01012 ETF_beta;
Electron transfer flavoprotein domain. Pfam-B_1321 (release 3.0). This family includes the homologous domain shared between the alpha and beta subunits of the electron transfer flavoprotein .. +PF05187 ETFD;
Electron transfer flavoprotein-ubiquinone oxidoreductase. Pfam-B_2305 (release 7.7). Electron-transfer flavoprotein-ubiquinone oxidoreductase (ETF-QO) in the inner mitochondrial membrane accepts electrons from electron-transfer flavoprotein which is located in the mitochondrial matrix and reduces ubiquinone in the mitochondrial membrane. The two redox centres in the protein, FAD and a [4Fe4S] cluster, are present in a 64-kDa monomer . . +PF00178 Ets-domain
+PF03318 Clostridium epsilon toxin ETX/Bacillus mosquitocidal toxin MTX2
Pfam-B_3569 (release 6.5). This family appears to be distantly related to Pfam:PF01117.. +PF01459 Euk_porin;
Prodom_3211 (release 99.1) & Pfam-B__3211 (release 7.5). +PF04346 Ethanolamine utilisation protein, EutH
EutH is a bacterial membrane protein whose molecular function is unknown. It has been suggested that it may act as an ethanolamine transporter, responsible for carrying ethanolamine from the periplasm to the cytoplasm .. +PF03319 Ethanolamine utilisation protein EutN/carboxysome
Pfam-B_3053 (release 6.5). The crystal structure of EutN contains a central five-stranded beta-barrel, with an alpha-helix at the open end of this barrel (PDB:2HD3). The structure also contains three additional beta-strands, which help the formation of a tight hexamer, with a hole in the center. this suggests that EutN forms a pore, with an opening of 26 Angstrom in diameter on one face and 14 Angstrom on the other face . EutN is involved in the cobalamin-dependent degradation of ethanolamine .. +PF02472 Biopolymer transport protein ExbD/TolR
Pfam-B_2343 (release 5.4). This group of proteins are membrane bound transport proteins essential for ferric ion uptake in bacteria . The Pfam family consists of ExbD, and TolR which are involved in TonB-dependent transport of various receptor bound substrates including colicins .. +PF01541 Exci_endo_N;
GIY-YIG catalytic domain. Pfam-B_489 (release 4.0). This domain called GIY-YIG is found in the amino terminal region of excinuclease abc subunit c (uvrC), bacteriophage T4 endonucleases segA, segB, segC, segD and segE; it is also found in putative endonucleases encoded by group I introns of fungi and phage. The structure of I-TevI a GIY-YIG endonuclease, reveals a novel alpha/beta-fold with a central three-stranded antiparallel beta-sheet flanked by three helices . The most conserved and putative catalytic residues are located on a shallow, concave surface and include a metal coordination site.. +PF03081 Exo70 exocyst complex subunit
Pfam-B_2462 (release 6.4). The Exo70 protein forms one subunit of the exocyst complex. First discovered in S. cerevisiae , Exo70 and other exocyst proteins have been observed in several other eukaryotes, including humans. In S. cerevisiae, the exocyst complex is involved in the late stages of exocytosis, and is localised at the tip of the bud, the major site of exocytosis in yeast . Exo70 interacts with the Rho3 GTPase . This interaction mediates one of the three known functions of Rho3 in cell polarity: vesicle docking and fusion with the plasma membrane (the other two functions are regulation of actin polarity and transport of exocytic vesicles from the mother cell to the bud) . In humans, the functions of Exo70 and the exocyst complex are less well characterised: Exo70 is expressed in several tissues and is thought to also be involved in exocytosis .. +PF04257 Exodeoxyribonuclease V, gamma subunit
TIGRFAMs (release 2.0);. The Exodeoxyribonuclease V enzyme is a multi-subunit enzyme comprised of the proteins RecB, RecC (this family) and RecD. This enzyme plays an important role in homologous genetic recombination, repair of double strand DNA breaks resistance to UV irradiation and chemical DNA-damage. The enzyme (EC:3.1.11.5) catalyses ssDNA or dsDNA-dependent ATP hydrolysis, hydrolysis of ssDNA or dsDNA and unwinding of dsDNA . This family consists of two AAA domains.. +PF02601 Exonuclease_VII;
Exonuclease VII, large subunit. This family consist of exonuclease VII, large subunit EC:3.1.11.6 This enzyme catalyses exonucleolytic cleavage in either 5'->3' or 3'->5' direction to yield 5'-phosphomononucleotides. This exonuclease VII enzyme is composed of one large subunit and 4 small ones .. +PF02095 Extensin;
Extensin-like protein repeat. +PF01267 F-actin capping protein alpha subunit
+PF00469 Negative factor, (F-Protein) or Nef
Pfam-B_128 (release 1.0). Nef protein accelerates virulent progression of AIDS by its interaction with cellular proteins involved in signal transduction and host cell activation. Nef has been shown to bind specifically to a subset of the Src kinase family.. +PF03807 NADP oxidoreductase coenzyme F420-dependent
TIGRFAMs, Griffiths-Jones SR. +PF01115 F-actin capping protein, beta subunit
+PF01116 Fructose-bisphosphate aldolase class-II
+PF03405 Fatty acid desaturase
+PF04116 Fatty_acid_hyrd;
Fatty acid hydroxylase superfamily. Pfam-B_7847 (release 7.3) & DOMO:DM04600 & Pfam-B_905 (release 4.1);. This superfamily includes fatty acid and carotene hydroxylases and sterol desaturases. Beta-carotene hydroxylase is involved in zeaxanthin synthesis by hydroxylating beta-carotene, but the enzyme may be involved in other pathways . This family includes C-5 sterol desaturase and C-4 sterol methyl oxidase. Members of this family are involved in cholesterol biosynthesis and biosynthesis a plant cuticular wax. These enzymes contain two copies of a HXHH motif. Members of this family are integral membrane proteins.. +PF02504 Fatty acid synthesis protein
Pfam-B_1671 (release 5.4). The plsX gene is part of the bacterial fab gene cluster which encodes several key fatty acid biosynthetic enzymes . The exact function of the plsX protein in fatty acid synthesis is unknown.. +PF01557 Fumarylacetoacetate (FAA) hydrolase family
Pfam-B_641 (release 4.0) & Pfam-B_1228 (release 4.1). This family consists of fumarylacetoacetate (FAA) hydrolase, or fumarylacetoacetate hydrolase (FAH) and it also includes HHDD isomerase/OPET decarboxylase from E. coli strain W. FAA is the last enzyme in the tyrosine catabolic pathway, it hydrolyses fumarylacetoacetate into fumarate and acetoacetate which then join the citric acid cycle . Mutations in FAA cause type I tyrosinemia in humans this is an inherited disorder mainly affecting the liver leading to liver cirrhosis, hepatocellular carcinoma, renal tubular damages and neurologic crises amongst other symptoms . The enzymatic defect causes the toxic accumulation of phenylalanine/tyrosine catabolites . The E. coli W enzyme HHDD isomerase/OPET decarboxylase contains two copies of this domain and functions in fourth and fifth steps of the homoprotocatechuate pathway; here it decarboxylates OPET to HHDD and isomerises this to OHED. The final products of this pathway are pyruvic acid and succinic semialdehyde. This family also includes various hydratases and 4-oxalocrotonate decarboxylases which are involved in the bacterial meta-cleavage pathways for degradation of aromatic compounds. 2-hydroxypentadienoic acid hydratase encoded by mhpD in E. coli Swiss:P77608 is involved in the phenylpropionic acid pathway of E. coli and catalyses the conversion of 2-hydroxy pentadienoate to 4-hydroxy-2-keto-pentanoate and uses a Mn2+ co-factor . OHED hydratase encoded by hpcG in E. coli Swiss:P42270 is involved in the homoprotocatechuic acid (HPC) catabolism . XylI in P. putida Swiss:P49155 is a 4-Oxalocrotonate decarboxylase .. +PF00667 FAD_binding;
Pfam-B_180 (release 2.1). This domain is found in sulfite reductase, NADPH cytochrome P450 reductase, Nitric oxide synthase and methionine synthase reductase.. +PF00890 FAD binding domain
Pfam-B_255 (release 3.0). This family includes members that bind FAD. This family includes the flavoprotein subunits from succinate and fumarate dehydrogenase, aspartate oxidase and the alpha subunit of adenylylsulphate reductase.. +PF01494 FAD binding domain
Pfam-B_549 (release 4.0). This domain is involved in FAD binding in a number of enzymes.. +PF00941 dehydrog_molyb;
FAD binding domain in molybdopterin dehydrogenase. Pfam-B_1112 (release 3.0). +PF01687 FAD_Synth;
Pfam-B_1221 (release 4.1). This family represents the C-terminal region of the bifunctional riboflavin biosynthesis protein known as RibC in Bacillus subtilis. The RibC protein from Bacillus subtilis has both flavokinase and flavin adenine dinucleotide synthetase (FAD-synthetase) activities. RibC plays an essential role in the flavin metabolism . This domain is thought to have kinase activity .. +PF04703 FaeA-like protein
Pfam-B_5784 (release 7.5). This family represents a number of fimbrial protein transcription regulators found in Gram-negative bacteria. These proteins are thought to facilitate binding of the leucine-rich regulatory protein to regulatory elements, possibly by inhibiting deoxyadenosine methylation of these elements by deoxyadenosine methylase [1,2].. +PF02106 Fanconi;
Fanconi anaemia group C protein. +PF03511 Fanconia;
Fanconi anaemia group A protein. +PF01149 Formamidopyrimidine-DNA glycosylase N-terminal domain
Formamidopyrimidine-DNA glycosylase (Fpg) is a DNA repair enzyme that excises oxidised purines from damaged DNA. This family is the N-terminal domain contains eight beta-strands, forming a beta-sandwich with two alpha-helices parallel to its edges .. +PF04750 FAR-17a/AIG1-like protein
Pfam-B_3664 (release 7.5). This family includes the hamster androgen-induced FAR-17a protein (Swiss:Q60534) , and its human homologue, the AIG1 protein (Swiss:Q9NVV5) . The function of these proteins is unknown. This family also includes homologous regions from a number of other metazoan proteins.. +PF01581 FMRFamide related peptide family
Pfam-B_666 (release 4.1). The neuroactive peptide Phe-Met-Arg-Phe-NH2 (FMRF-amide) has a variety of effects on both mammalian and invertebrate tissues .. +PF02469 Fasciclin domain
Pfam-B_562 (release 5.4). This extracellular domain is found repeated four times in grasshopper fasciclin I as well as in proteins from mammals, sea urchins, plants, yeast and bacteria .. +PF02259 FAT domain
(Keith and Schreiber, Science 270:50). The FAT domain is named after FRAP, ATM and TRRAP.. +PF02260 FATC domain
(Keith and Schreiber, Science 270:50). The FATC domain is named after FRAP, ATM, TRRAP C-terminal . The solution structure of the FATC domain suggests it plays a role in redox-dependent structural and cellular stability .. +PF00316 Fructose-1-6-bisphosphatase
+PF03320 Bacterial fructose-1,6-bisphosphatase, glpX-encoded
Pfam-B_3515 (release 6.5). +PF02634 FdhD/NarQ family
A pan-bacterial lineage of proteins. Nitrate assimilation protein, NarQ, and FdhD (Swiss:P32177) are required for formate dehydrogenase activity. Structurally, they possess a deaminase fold with a characteristic binding pocket, suggesting that they might bind a nucleotide or related molecule allosterically to regulate the formate dehydrogenase catalytic subunit .. +PF04216 Protein involved in formate dehydrogenase formation
The function of these proteins is unknown. They may possibly be involved in the formation of formate dehydrogenase.. +PF03147 Ferredoxin-fold anticodon binding domain
This is the anticodon binding domain found in some phenylalanyl tRNA synthetases. The domain has a ferredoxin fold [1,2].. +PF00465 Iron-containing alcohol dehydrogenase
+PF02742 Iron dependent repressor, metal binding and dimerisation domain
This family includes the Diphtheria toxin repressor.. +PF01325 Iron dependent repressor, N-terminal DNA binding domain
This family includes the Diphtheria toxin repressor. DNA binding is through a helix-turn-helix motif.. +PF02906 Iron only hydrogenase large subunit, C-terminal domain
+PF02256 Iron hydrogenase small subunit
Pfam-B_3750 (release 5.2). This family represents the small subunit of the Fe-only hydrogenases EC:1.18.99.1. The subunit is comprised of alternating random coil and alpha helical structures that encompasses the large subunit in a novel protein fold .. +PF01032 FecCD_family;
FecCD transport family. Pfam-B_377 (release 3.0). This is a sub-family of bacterial binding protein-dependent transport systems family. This Pfam entry contains the inner components of this multicomponent transport system.. +PF04773 FecR protein
Pfam-B_3234 (release 7.5). FecR is involved in regulation of iron dicitrate transport. In the absence of citrate FecR inactivates FecI. FecR is probably a sensor that recognises iron dicitrate in the periplasm.. +PF02388 FemAB family
Pfam-B_1214 (release 5.2). The femAB operon codes for two nearly identical approximately 50-kDa proteins involved in the formation of the Staphylococcal pentaglycine interpeptide bridge in peptidoglycan . These proteins are also considered as a factor influencing the level of methicillin resistance .. +PF04023 FeoA domain
This family includes FeoA a small protein, probably involved in Fe2+ transport . This presumed short domain is also found at the C-terminus of a variety of metal dependent transcriptional regulators. This suggests that this domain may be metal-binding. In most cases this is likely to be either iron or manganese.. +PF02421 FeoB;
Ferrous iron transport protein B. Escherichia coli has an iron(II) transport system (feo) which may make an important contribution to the iron supply of the cell under anaerobic conditions . FeoB has been identified as part of this transport system. FeoB is a large 700-800 amino acid integral membrane protein. The N terminus contains a P-loop motif suggesting that iron transport may be ATP dependent .. +PF00142 fer4_NifH;
4Fe-4S iron sulfur cluster binding proteins, NifH/frxC family. +PF01794 Ferric reductase like transmembrane component
Pfam-B_728 (release 4.2). This family includes a common region in the transmembrane proteins mammalian cytochrome B-245 heavy chain (gp91-phox), ferric reductase transmembrane component in yeast and respiratory burst oxidase from mouse-ear cress. This may be a family of flavocytochromes capable of moving electrons across the plasma membrane . The Frp1 protein Swiss:Q04800 from S. pombe is a ferric reductase component and is required for cell surface ferric reductase activity, mutants in frp1 are deficient in ferric iron uptake . Cytochrome B-245 heavy chain Swiss:P04839 is a FAD-dependent dehydrogenase it is also has electron transferase activity which reduces molecular oxygen to superoxide anion, a precursor in the production of microbicidal oxidants . Mutations in the sequence of cytochrome B-245 heavy chain (gp91-phox) lead to the X-linked chronic granulomatous disease. The bacteriocidal ability of phagocytic cells is reduced and is characterised by the absence of a functional plasma membrane associated NADPH oxidase . The chronic granulomatous disease gene codes for the beta chain of cytochrome B-245 and cytochrome B-245 is missing from patients with the disease .. +PF00762 Ferrochelatase
Pfam-B_879 (release 2.1). +PF04060 Putative Fe-S cluster
This family includes a domain with four conserved cysteines that probably form an Fe-S redox cluster.. +PF02941 FeThRed; FeThRed_beta;
Ferredoxin thioredoxin reductase variable alpha chain. +PF02943 FeThRed_alpha;
Ferredoxin thioredoxin reductase catalytic beta chain. +PF01846 FF domain
This domain has been predicted to be involved in protein-protein interaction . This domain was recently shown to bind the hyperphosphorylated C-terminal repeat domain of RNA polymerase II, confirming its role in protein-protein interactions .. +PF05013 N-formylglutamate amidohydrolase
Formylglutamate amidohydrolase (FGase) catalyses the terminal reaction in the five-step pathway for histidine utilisation in Pseudomonas putida. By this action, N-formyl-L-glutamate (FG) is hydrolysed to produce L-glutamate plus formate .. +PF00167 Fibroblast growth factor
Fibroblast growth factors are a family of proteins involved in growth and differentiation in a wide range of contexts. They are found in a wide range of organisms, from nematodes to humans . Most share an internal core region of high similarity, conserved residues in which are involved in binding with their receptors. On binding, they cause dimerisation of their tyrosine kinase receptors leading to intracellular signalling. There are currently four known tyrosine kinase receptors for fibroblast growth factors. These receptors can each bind several different members of this family. Members of this family have a beta trefoil structure. Most have N-terminal signal peptides and are secreted. A few lack signal sequences but are secreted anyway; still others also lack the signal peptide but are found on the cell surface and within the extracellular matrix. A third group remain intracellular . They have central roles in development, regulating cell proliferation, migration and differentiation. On the other hand, they are important in tissue repair following injury in adult organisms .. +PF00370 FGGY;
FGGY family of carbohydrate kinases, N-terminal domain. This domain adopts a ribonuclease H-like fold and is structurally related to the C-terminal domain.. +PF02782 FGGY family of carbohydrate kinases, C-terminal domain
This domain adopts a ribonuclease H-like fold and is structurally related to the N-terminal domain.. +PF00498 FHA domain
The FHA (Forkhead-associated) domain is a phosphopeptide binding motif .. +PF00771 FHIPEP family
Pfam-B_983 (release 2.1). +PF01269 Fibrillarin
+PF00147 fibrinogen_C;
Fibrinogen beta and gamma chains, C-terminal globular domain. +PF03516 Filaggrin
+PF00038 filament;
Intermediate filament protein. +PF04732 filament_head;
Intermediate filament head (DNA binding) region. This family represents the N-terminal head region of intermediate filaments. Intermediate filament heads bind DNA . Vimentin heads are able to alter nuclear architecture and chromatin distribution, and the liberation of heads by HIV-1 protease liberates may play an important role in HIV-1 associated cytopathogenesis and carcinogenesis . Phosphorylation of the head region can affect filament stability . The head has been shown to interaction with the rod domain of the same protein .. +PF00630 Filamin/ABP280 repeat
+PF01611 Filovirus glycoprotein
Pfam-B_1023 (release 4.1). This family includes an extracellular region from the envelope glycoprotein of Ebola and Marburg viruses. This region is also produced as a separate transcript that gives rise to a non-structural, secreted glycoprotein, which is produced in large amounts and has an unknown function . Processing of this protein may be involved in viral pathogenicity .. +PF02097 Filoviridae VP35
+PF00419 Fimbrial protein
Pfam-B_196 (release 1.0) & Jackhmmer:B2PIN3. +PF04449 CS1 type fimbrial major subunit
Fimbriae, also known as pili, form filaments radiating from the surface of the bacterium to a length of 0.5-1.5 micrometres. They enable the cell to colonise host epithelia. This family constitutes the major subunits of CS1 like pili, including CS2 and CFA1 from Escherichia coli, and also the Cable type II pilin major subunit from Burkholderia cepacia . The major subunit of CS1 pili is called CooA. Periplasmic CooA is mostly complexed with the assembly protein CooB. In addition, a small pool of CooA multimers, and CooA-CooD complexes exists, but the functional significance is unknown . A member of this family has also been identified in Salmonella typhi and Salmonella enterica .. +PF02432 Fibrimal;
Fimbrial, major and minor subunit. Pfam-B_2036 (release 5.4). Fimbriae (also know as pili) are polar filaments found on the bacterial surface, allowing colonisation of the host. This family consists of the minor and major fimbrial subunits.. +PF05182 Fip1 motif
Pfam-B_4652 (release 7.7). This short motif is about 40 amino acids in length. In the Fip1 protein that is a component of a yeast pre-mRNA polyadenylation factor that directly interacts with poly(A) polymerase . This region of Fip1 is needed for the interaction with the Th1 subunit of the complex and for specific polyadenylation of the cleaved mRNA precursor .. +PF02433 Cytochrome C oxidase, mono-heme subunit/FixO
Pfam-B_2045 (release 5.4). The bacterial oxidase complex, fixNOPQ or cytochrome cbb3, is thought to be required for respiration in endosymbiosis. FixO is a membrane bound mono-heme constituent of the fixNOPQ complex.. +PF01346 Domain amino terminal to FKBP-type peptidyl-prolyl isomerase
Pfam-B_402 (release 3.0). This family is only found at the amino terminus of Pfam:PF00254. This domain is of unknown function.. +PF04620 Flagellar filament outer layer protein Flaa
Periplasmic flagella are the organelles of spirochete mobility, and are structurally different from the flagella of other motile bacteria. They reside inside the cell within the periplasmic space, and confer mobility in viscous gel-like media such connective tissue . The flagella are composed of an outer sheath of FlaA proteins and a core filament of FlaB proteins. Each species usually has several FlaA protein species .. +PF03646 FlaG protein
Pfam-B_2985 (release 7.0). Although important for flagella the exact function of this protein is unknown.. +PF03614 Repressor of phase-1 flagellin
+PF05149 Paraflagellar rod protein
Pfam-B_6464 (release 7.7). This family consists of several eukaryotic paraflagellar rod component proteins. The eukaryotic flagellum represents one of the most complex macromolecular structures found in any organism and contains more than 250 proteins . In addition to its locomotive role, the flagellum is probably involved in nutrient uptake since receptors for host low-density lipoproteins are localised on the flagellar membrane as well as on the flagellar pocket membrane .. +PF00700 Bacterial flagellin C-terminal helical region
Pfam-B_41 (release 2.1). Flagellins polymerise to form bacterial flagella.\. There is some similarity between this family and Pfam:PF00669, particularly the motif NRFXSXIXXL. It has been suggested that these two regions associate and this is shown to be correct as structurally this family forms an extended helix that interacts with Pfam:PF00700.. +PF00669 Bacterial flagellin N-terminal helical region
Pfam-B_37 (release 2.1). Flagellins polymerise to form bacterial flagella. This family includes flagellins and hook associated protein 3. Structurally this family forms an extended helix that interacts with Pfam:PF00700.. +PF01350 Flavivirus non-structural protein NS4A
Pfam-B_211 (release 3.0). Flaviviruses encode a single polyprotein. This is cleaved into three structural and seven non-structural proteins. The NS4A protein is small and poorly conserved among the Flaviviruses. NS4A contains multiple hydrophobic potential membrane spanning regions . NS4A has only been found in cells infected by Kunjin virus .. +PF01349 Flavivirus non-structural protein NS4B
Pfam-B_211 (release 3.0). Flaviviruses encode a single polyprotein.\. This is cleaved into three structural and seven non-structural proteins. The NS4B protein is small and poorly conserved among the Flaviviruses. NS4B contains multiple hydrophobic potential membrane spanning regions . NS4B may form membrane components of the viral replication complex and could be involved in membrane localisation of NS3 and Pfam:PF00972 .. +PF00972 Flavivirus RNA-directed RNA polymerase
Pfam-B_200 (release 3.0). Flaviviruses produce a polyprotein from the ssRNA genome. This protein is also known as NS5. This RNA-directed RNA polymerase possesses a number of short regions and motifs homologous to other RNA-directed RNA polymerases .. +PF01570 Flavivirus polyprotein propeptide
Pfam-B_304 (release 4.1). The flaviviruses are small enveloped animal viruses containing a single positive strand genomic RNA . The genome encodes one large ORF a polyprotein which undergos proteolytic processing into mature viral peptide chains. This family consists of a propeptide region of approximately 90 amino acid length.. +PF02525 NADHdh_2;
Flavodoxin-like fold. Pfam-B_1456 (release 5.4). This family consists of a domain with a flavodoxin-like fold. The family includes bacterial and eukaryotic NAD(P)H dehydrogenase (quinone) EC:1.6.99.2. These enzymes catalyse the NAD(P)H-dependent two-electron reductions of quinones and protect cells against damage by free radicals and reactive oxygen species .\. This enzyme uses a FAD co-factor. The equation for this reaction is:- NAD(P)H + acceptor <=> NAD(P)(+) + reduced acceptor. This enzyme is also involved in the bioactivation of prodrugs used in chemotherapy . The family also includes acyl carrier protein phosphodiesterase EC:3.1.4.14. This enzyme converts holo-ACP to apo-ACP by hydrolytic cleavage of the phosphopantetheine residue from ACP . This family is related to Pfam:PF03358 and Pfam:PF00258.. +PF02441 Flavoprotein
Pfam-B_1622 (release 5.4). This family contains diverse flavoprotein enzymes. This family includes epidermin biosynthesis protein, EpiD Swiss:P30197, which has been shown to be a flavoprotein that binds FMN . This enzyme catalyses the removal of two reducing equivalents from the cysteine residue of the C-terminal meso-lanthionine of epidermin to form a --C==C-- double bond. This family also includes the B chain of dipicolinate synthase a small polar molecule that accumulates to high concentrations in bacterial endospores, and is thought to play a role in spore heat resistance, or the maintenance of heat resistance . dipicolinate synthase catalyses the formation of dipicolinic acid from dihydroxydipicolinic acid. This family also includes phenyl-acrylic acid decarboxylase Swiss:P33751 (EC:4.1.1.-) .. +PF00460 flg_bb_rod;
Flagella basal body rod protein. +PF02120 Flagellar hook-length control protein FliK
This is the C terminal domain of FliK. FliK controls the length of the flagellar hook by directly measuring the hook length as a molecular ruler . This family also includes YscP of the Yersinia type III secretion system, and equivalent proteins in other pathogenic bacterial type III secretion systems.. +PF03963 Flagellar hook capping protein - N-terminal region
FlgD is known to be absolutely required for hook assembly, yet it has not been detected in the mature flagellum . It appears to act as a hook-capping protein to enable assembly of hook protein subunits . FlgD regulates the assembly of the hook cap structure to prevent leakage of hook monomers into the medium and hook monomer polymerisation and also plays a role in determination of the correct hook length, with the help of the FliK protein . This family represents the N-terminal conserved region of FlgD. A recent crystal structure showed that this region was likely to be flexible and was cleaved off during crystallisation .. +PF02107 Flagellar L-ring protein
+PF02119 Flagellar P-ring protein
+PF04316 Anti-sigma-28 factor, FlgM
FlgM binds and inhibits the activity of the transcription factor sigma 28. Inhibition of sigma 28 prevents the expression of genes from flagellar transcriptional class 3, which include genes for the filament and chemotaxis. Correctly assembled basal body-hook structures export FlgM, relieving inhibition of sigma 28 and allowing expression of class 3 genes. NMR studies show that free FlgM is mostly unfolded, which may facilitate its export. The C terminal half of FlgM adopts a tertiary structure when it binds to sigma 28. All mutations in FlgM that prevent sigma 28 inhibition affect the C-terminal domain and is the region thought to constitute the binding domain. A minimal binding domain has been identified between Glu 64 and Arg 88 in Salmonella typhimurium (Swiss:P26477). The N-terminal portion remains unstructured and may be necessary for recognition by the export machinery .. +PF05130 FlgN protein
This family includes the FlgN protein and export chaperone involved in flagellar synthesis .. +PF02465 Flagellar hook-associated protein 2 N-terminus
The flagellar hook-associated protein 2 (HAP2 or FliD) forms the distal end of the flagella, and plays a role in mucin specific adhesion of the bacteria . This alignment covers the N-terminal region of this family of proteins.. +PF02049 Flagellar hook-basal body complex protein FliE
+PF01706 FliG-C;
FliG C-terminal domain. FliG is a component of the flageller rotor, present in about 25 copies per flagellum. This domain functions specifically in motor rotation.. +PF02108 Flagellar assembly protein FliH
+PF02050 Flagellar FliJ protein
+PF03748 Flagellar basal body-associated protein FliL
This FliL protein controls the rotational direction of the flagella during chemotaxis . FliL is a cytoplasmic membrane protein associated with the basal body .. +PF02154 Flagellar motor switch protein FliM
+PF04347 Flagellar biosynthesis protein, FliO
FliO is an essential component of the flagellum-specific protein export apparatus . It is an integral membrane protein. Its precise molecular function is unknown.. +PF00813 FliP family
Pfam-B_1679 (release 2.1). +PF02561 Flagellar protein FliS
FliS is coded for by the FliD operon and is transcribed in conjunction with FliD and FliT, however this protein has no known function.. +PF01698 Floricaula / Leafy protein
Pfam-B_1633 (release 4.1). This family consists of various plant development proteins which are homologues of floricaula (FLO) and Leafy (LFY) proteins which are floral meristem identity proteins. Mutations in the sequences of these proteins affect flower and leaf development.. +PF00624 Flocculin repeat
Pfam-B_51 (release 2.1). This short repeat is rich in serine and threonine residues.. +PF05202 Recombinase Flp protein
+PF04964 Flp/Fap pilin component
+PF03930 Flp;
Recombinase Flp protein N-terminus. +PF02662 Methyl-viologen-reducing hydrogenase, delta subunit
This family consist of methyl-viologen-reducing hydrogenase, delta subunit / heterodisulphide reductase. No specific functions have been assigned to this subunit. The aligned region corresponds to almost the entire delta chain sequence and contains 4 conserved cysteine residues. However, in two Archaeoglobus sequences this region corresponds to only the C-terminus of these proteins Swiss:O29030 and Swiss:029595.. +PF02947 flt3_lig;
The flt3 ligand is a short chain cytokine with a 4 helical bundle fold.. +PF04772 Influenza B matrix protein 2 (BM2)
Pfam-B_2165 (release 7.6). M2 is synthesised in the late phase of infection and incorporated into the virion. It may be phosphorylated in vivo. The function of BM2 is unknown .. +PF02942 Influenza B non-structural protein (NS1)
Pfam-B_198 (Release 6.4). A specific region of the influenza B virus NS1 protein, which includes part of its effector domain, blocks the covalent linkage of ISG15 Swiss:Q64339 to its target proteins both in vitro and in infected cells. Of the several hundred proteins induced by interferon (IFN) alpha/beta, the ubiquitin-like ISG15 protein is one of the most predominant. Influenza A virus employs a different strategy: its NS1 protein does not bind the ISG15 protein, but little or no ISG15 protein is produced during infection .. +PF03506 Influenza C non-structural protein (NS1)
Pfam-B_980 (release 7.0). The influenza C virus genome consists of seven single-stranded RNA segments. The shortest RNA segment encodes a 286 amino acid non-structural protein NS1 . This protein contains 6 conserved cysteines that may be functionally important, perhaps binding to a metal ion.. +PF03555 Influenza C non-structural protein (NS2)
Pfam-B_346 (release 7.0). The influenza C virus genome consists of seven single-stranded RNA segments. The shortest RNA segment encodes a 286 amino acid non-structural protein NS1 Pfam:PF03506 as well as the NS2 protein. The NS2 protein is only about 60 amino acids in length and of unknown function.. +PF00598 Influenza Matrix protein (M1)
This protein forms a continuous shell on the inner side of the lipid bilayer, but its function is unclear.. +PF00599 Influenza Matrix protein (M2)
This protein spans the viral membrane with an extracellular amino-terminus external and a cytoplasmic carboxy-terminus.. +PF00506 flu_virus_nuc;
Influenza virus nucleoprotein. Pfam-B_10 (release 1.0). +PF00600 Influenza non-structural protein (NS1)
NS1 is a homodimeric RNA-binding protein that is required for viral replication. NS1 binds polyA tails of mRNA keeping them in the nucleus. NS1 inhibits pre-mRNA splicing by tightly binding to a specific stem-bulge of U6 snRNA.. +PF00601 Influenza non-structural protein (NS2)
NS2 may play a role in promoting normal replication of the genomic RNAs by preventing the replication of short-length RNA species . . +PF00603 Influenza RNA-dependent RNA polymerase subunit PA
+PF00602 Influenza RNA-dependent RNA polymerase subunit PB1
Two GTP binding sites exist in this protein .. +PF00604 Influenza RNA-dependent RNA polymerase subunit PB2
PB2 can bind 5' end cap structure of RNA .. +PF03069 Acetamidase/Formamidase family
Pfam-B_2541 (release 6.4). This family includes amidohydrolases of formamide EC:3.5.1.49 and acetamide. Swiss:Q50228 forms a homotrimer suggesting all the members of this family also do.. +PF01070 FMN-dependent dehydrogenase
Pfam-B_829 (release 3.0). +PF00743 Flavin-binding monooxygenase-like
Pfam-B_437 (release 2.1). +PF00039 Fibronectin type I domain
Swissprot_feature_table. +PF00040 Fibronectin type II domain
+PF00041 Fibronectin type III domain
Swissprot_feature_table. +PF02986 Fibronectin binding repeat
Griffiths-Jones SR, Schwarz-Linek U. Pfam-B_2661 (release 6.4). The ability of bacteria to bind fibronectin is thought to enable the colonisation of wound tissue and blood clots. The fibronectin binding repeat is found in bacterial fibronectin binding proteins and serum opacity factor. Bacterial fibronectin binding proteins are surface proteins that covalently link to the bacterial cell wall, mediate adherence of the bacteria to host cells and trigger the fibronectin/integrin-mediated uptake of bacteria by host cells . Each fibronectin binding repeat is an array of short motifs that bind to fibronectin type I domains . Fibronectin binding repeats are natively unfolded in the absence of fibronectin and are thought to adopt a well-defined conformation (tandem beta-zipper) upon binding .. +PF03274 Foamy virus BEL 1/2 protein
Pfam-B_4337 (release 6.5). +PF03408 Foamy virus envelope protein
Pfam-B_4411 (release 6.6). Expression of the envelope (Env) glycoprotein is essential for viral particle egress. This feature is unique to the Spumavirinae, a subclass of the Retroviridae.. +PF03623 Focal adhesion targeting region
Focal adhesion kinase (FAK) is a tyrosine kinase found in focal adhesions, intracellular signaling complexes that are formed following engagement of the extracellular matrix by integrins. The C-terminal 'focal adhesion targeting' (FAT) region is necessary and sufficient for localising FAK to focal adhesions. The crystal structure of FAT shows it forms a four-helix bundle that resembles those found in two other proteins involved in cell adhesion, alpha-catenin and vinculin . The binding of FAT to the focal adhesion protein, paxillin, requires the integrity of the helical bundle, whereas binding to another focal adhesion protein, talin, does not.. +PF02980 Restriction endonuclease FokI, catalytic domain
+PF02981 Restriction endonuclease FokI, recognition domain
+PF01770 Reduced folate carrier
Pfam-B_1123 (release 4.2). The reduced folate carrier (a transmembrane glycoprotein) transports reduced folate into mammalian cells via the carrier mediated mechanism (as opposed to the receptor mediated mechanism) it also transports cytotoxic folate analogues used in chemotherapy , such as methotrexate (MTX). Mammalian cells have an absolute requirement for exogenous folates which are needed for growth, and biosynthesis of macromolecules .. +PF03024 Folate receptor family
Pfam-B_1966 (release 6.4). This family includes the folate receptor which binds to folate and reduced folic acid derivatives and mediates delivery of 5-methyltetrahydrofolate to the interior of cells. These proteins are attached to the membrane by a GPI-anchor. The proteins contain 16 conserved cysteines that form eight disulphide bridges.. +PF02152 Dihydroneopterin aldolase
This enzyme EC:4.1.2.25 catalyses the conversion of 7,8-dihydroneopterin to 6-hydroxymethyl-7,8-dihydropterin in the biosynthetic pathway of tetrahydrofolate.. +PF00250 FKH;
+PF01226 Formate/nitrite transporter
+PF02971 formiminotr;
Formiminotransferase domain. +PF02911 formyl_trans_C;
Formyl transferase, C-terminal domain. +PF00551 formyl_transf;
This family includes the following members. Glycinamide ribonucleotide transformylase catalyses the third step in de novo purine biosynthesis, the transfer of a formyl group to 5'-phosphoribosylglycinamide. Formyltetrahydrofolate deformylase produces formate from formyl- tetrahydrofolate. Methionyl-tRNA formyltransferase transfers a formyl group onto the amino terminus of the acyl moiety of the methionyl aminoacyl-tRNA. Inclusion of the following members is supported by PSI-blast. HOXX_BRAJA (P31907) contains a related domain of unknown function. PRTH_PORGI (P46071) contains a related domain of unknown function. Y09P_MYCTU (Q50721) contains a related domain of unknown function.. +PF01491 Frataxin-like domain
This family contains proteins that have a domain related to the globular C-terminus of Frataxin the protein that is mutated in Friedreich's ataxia. This domain is found in a family of bacterial proteins.\. The function of this domain is currently unknown. It has been suggested that this family is involved in iron transport.. +PF03197 Bacteriophage FRD2 protein
Pfam-B_2816 (release 6.5). +PF04422 Coenzyme F420 hydrogenase/dehydrogenase, beta subunit N-term
Coenzyme F420 hydrogenase (EC:1.12.99.1) reduces the low-potential two-electron acceptor coenzyme F420. This family contains the N termini of F420 hydrogenase and dehydrogenase beta subunits , . The N terminus of Methanobacterium formicicum formate dehydrogenase beta chain (EC:1.2.1.2, Swiss:P06130) is also a member of this family . This region is often found in association with the 4Fe-4S binding domain, fer4 (Pfam:PF00037).. +PF03881 Fructosamine kinase
This family includes eukaryotic fructosamine-3-kinase enzymes . The family also includes bacterial members that have not been characterised but probably have a similar or identical function.. +PF04961 Formiminotransferase-cyclodeaminase
Members of this family are thought to be Formiminotransferase- cyclodeaminase enzymes EC:4.3.1.4. This domain is found in the C-terminus of the bifunctional animal members of the family.. +PF01268 Formate--tetrahydrofolate ligase
+PF03239 Iron permease FTR1 family
Pfam-B_3227 (release 6.5). +PF02491 FtsA;
SHS2 domain inserted in FTSA. FtsA is essential for bacterial cell division, and co-localises to the septal ring with FtsZ. The SHS2 domain is inserted in to the RNAseH fold of FtsA , and is involved in protein-protein interaction .. +PF01580 FtsK/SpoIIIE family
Pfam-B_458 (release 4.1). FtsK has extensive sequence similarity to wide variety of proteins from prokaryotes and plasmids , termed the FtsK/SpoIIIE family. This domain contains a putative ATP binding P-loop motif. It is found in the FtsK cell division protein from E. coli Swiss:P46889 and the stage III sporulation protein E SpoIIIE Swiss:P21458 which has roles in regulation of prespore specific gene expression in B. subtilis. A mutation in FtsK causes a temperature sensitive block in cell division and it is involved in peptidoglycan synthesis or modification . The SpoIIIE protein is implicated in intercellular chromosomal DNA transfer .. +PF04999 Cell division protein FtsL
In Escherichia coli, nine gene products are known to be essential for assembly of the division septum. One of these, FtsL, is a bitopic membrane protein whose precise function is not understood. It has been proposed that FtsL interacts with the DivIC protein Pfam:PF04977 , however this interaction may be indirect .. +PF03799 Cell division protein FtsQ
Pfam-B_1605 (release 7.0). FtsQ is one of several cell division proteins. FtsQ interacts with other Fts proteins, reviewed in . The precise function of FtsQ is unknown.. +PF01098 Cell cycle protein
This entry includes the following members; FtsW, RodA, SpoVE. +PF03867 Fushi tarazu (FTZ), N-terminal region
This region contains the important motif (LXXLL) necessary for the interaction of FTZ with the nuclear receptor FTZ-F1. FTZ is thought to represents a category of LXXLL motif-dependent co-activators for nuclear receptors.. +PF02952 fucose_iso_C;
L-fucose isomerase, C-terminal domain. Pfam-B_9303 (Release 8.0). +PF02300 Fumarate reductase subunit C
Pfam-B_11568 (release 5.2). Fumarate reductase is a membrane-bound flavoenzyme consisting of four subunits, A-B. A and B comprise the membrane-extrinsic catalytic domain and C and D link the catalytic centres to the electron-transport chain. This family consists of the 15kD hydrophobic subunit C.. +PF02313 Fumarate reductase subunit D
Pfam-B_12414 (release 5.2). Fumarate reductase is a membrane-bound flavoenzyme consisting of four subunits, A-B. A and B comprise the membrane-extrinsic catalytic domain and C and D link the catalytic centres to the electron-transport chain. This family consists of the 13kD hydrophobic subunit D.. +PF03630 Fumble
Pfam-B_3299 (release 7.0). Fumble is required for cell division in Drosophila. Mutants lacking fumble exhibit abnormalities in bipolar spindle organisation, chromosome segregation, and contractile ring formation. Analyses have demonstrated that encodes three protein isoforms, all of which contain a domain with high similarity to the pantothenate kinases of A. nidulans and mouse . A role of fumble in membrane synthesis has been proposed .. +PF04930 FUN14 family
Pfam-B_8237 (release 7.5). This family of short proteins are found in eukaryotes and some archaea. Although the function of these proteins is not known they may contain transmembrane helices.. +PF01475 Ferric uptake regulator family
Prodom_2003 (release 99.1). This family includes metal ion uptake regulator proteins, that bind to the operator DNA and controls transcription of metal ion-responsive genes. This family is also known as the FUR family.. +PF00757 Furin-like cysteine rich region
+PF04632 Fusaric acid resistance protein family
Pfam-B_5345 (release 7.5). This family includes a conserved region found in two proteins associated with fusaric acid resistance, Swiss:P24128 from Burkholderia cepacia and Swiss:Q48403 from Klebsiella oxytoca. These proteins are likely to be membrane transporter proteins.. +PF00523 fusion_gly;
Fusion glycoprotein F0. Pfam-B_102 (release 1.0). +PF01621 Cell fusion glycoprotein K
Pfam-B_1083 (release 4.1). This protein is probably an integral membrane bound glycoprotein that is involved in viral fusion with the host cell . . +PF02663 FwdE;
FmdE, Molybdenum formylmethanofuran dehydrogenase operon . This entry represents the FmdE protein that is encode by the molybdenum formylmethanofuran dehydrogenase operon. FmdE does not co-purify with the molybdenum isozyme that is formed by FmdC and FmdB . The domain is typically found as a single copy, but is repeated in some sequence two to three times. It is also common place to find this domain co-occurs with a zinc-beta ribbon domain, suggesting that is may bind nucleic acid and be involved in transcription regulation.. +PF04186 FxsA cytoplasmic membrane protein
This is a bacterial family of cytoplasmic membrane proteins. It includes two transmembrane regions. The molecular function of FxsA is unknown, but in Escherichia coli its over-expression has been shown to alleviate the exclusion of phage T7 in those cells with an F plasmid.. +PF04799 fzo_mitofusin;
fzo-like conserved region. Pfam-B_6217 (release 7.5). Family of putative transmembrane GTPase. The fzo protein is a mediator of mitochondrial fusion . This conserved region is also found in the human mitofusin protein .. +PF01125 G10 protein
+PF00503 G-protein alpha subunit
G proteins couple receptors of extracellular signals to intracellular signaling pathways. The G protein alpha subunit binds guanyl nucleotide and is a weak GTPase. A set of residues that are unique to G-alpha as compared to its ancestor the Arf-like family form a ring of residues centered on the nucleotide binding site . A Ggamma is found fused to an inactive Galpha in the Dictyostelium protein gbqA .. +PF00631 G_protein_gamma;
G-protein gamma like domains (GGL) are found in the gamma subunit of the heterotrimeric G protein complex and in regulators of G protein signaling (RGS) proteins . It is also found fused to an inactive Galpha in the Dictyostelium protein gbqA . G-gamma likely shares a common origin with the helical N-terminal unit of G-beta . All organisms that posses a G-beta possess a G-gamma .. +PF04466 G2P;
Phage terminase large subunit. Initiation of packaging of double-stranded viral DNA involves the specific interaction of the prohead with viral DNA in a process mediated by a phage-encoded terminase protein. The terminase enzymes are usually hetero-oligomers composed of a small and a large subunit. This region is found on the large subunit and possess an endonuclease and ATPase activity that require Mg2+ and a neutral or slightly basic reaction. This region is also found in bacterial sequences [1,2].. +PF04309 Glycerol-3-phosphate responsive antiterminator
Intracellular glycerol is usually converted to glycerol-3-phosphate in an ATP-requiring phosphorylation reaction catalysed by glycerol kinase (GlpK) glycerol-3-phosphate activates the antiterminator GlpP .. +PF00479 G6PD;
Glucose-6-phosphate dehydrogenase, NAD binding domain. +PF02781 Glucose-6-phosphate dehydrogenase, C-terminal domain
+PF01468 GA module
Pfam-B_895 (release 4.0). The GA (protein G-related Albumin-binding) module is composed of three alpha helices . This module is found in a range of bacterial cell surface proteins. The GA module from Swiss:Q51911 shows a strong affinity for albumin.. +PF02938 GAD domain
This domain is found in some members of the GatB and aspartyl tRNA synthetases.. +PF02337 Retroviral GAG p10 protein
Pfam-B_959 (release 5.2). This family consists of various retroviral GAG (core) polyproteins and encompasses the p10 region producing the p10 protein upon proteolytic cleavage of GAG by retroviral protease. The p10 or matrix protein (MA) is associated with the virus envelope glycoproteins in most mammalian retroviruses and may be involved in virus particle assembly, transport and budding . Some of the GAG polyproteins have alternate cleavage sites leading to the production of alternative and longer cleavage products (e.g. p19 Swiss:P21411) the alignment of this family only covers the approximately N-terminal (GAG) 100 amino acid region of homology to p10.. +PF00540 gag_p17;
gag gene protein p17 (matrix protein). The matrix protein forms an icosahedral shell associated with the inner membrane of the mature immunodeficiency virus.. +PF02228 gag_p19;
Major core protein p19. Pfam-B_1307 (release 5.2). p19 is a component of the inner protein layer of the viral nucleocapsid.. +PF00607 gag_p24;
gag gene protein p24 (core nucleocapsid protein). p24 forms inner protein layer of the nucleocapsid. ELISA tests for p24 is the most commonly used method to demonstrate virus replication both in vivo and in vitro.. +PF02093 Gag P30 core shell protein
According to Swiss-Prot annotation this protein is the viral core shell protein. P30 is essential for viral assembly .. +PF03276 Spumavirus gag protein
Pfam-B_1878 (release 6.5). +PF00337 Galactoside-binding lectin
This family contains galactoside binding lectins.\. The family also includes enzymes such as human eosinophil lysophospholipase (Swiss:Q05315, EC:3.1.1.5).. +PF03902 Gal4-like dimerisation domain
+PF01762 Galactosyltransferase
Pfam-B_885 (release 4.2). This family includes the galactosyltransferases UDP-galactose:2-acetamido-2-deoxy-D-glucose3beta-galactosyltransferas e Swiss:O43825 and UDP-Gal:beta-GlcNAc beta 1,3-galactosyltranferase Swiss:O54904 . Specific galactosyltransferases transfer galactose to GlcNAc terminal chains in the synthesis of the lacto-series oligosaccharides types 1 and 2 .. +PF02709 Galactosyl_T_2; Glyco_transf_2C;
N-terminal domain of galactosyltransferase. Pfam-B_834 (release 5.5). This is the N-terminal domain of a family of galactosyltransferases from a wide range of Metazoa with three related galactosyltransferases activities, all three of which are possessed by one sequence in some cases. EC:2.4.1.90, N-acetyllactosamine synthase; EC:2.4.1.38, Beta-N-acetylglucosaminyl-glycopeptide beta-1,4- galactosyltransferase; and EC:2.4.1.22 Lactose synthase. Note that N-acetyllactosamine synthase is a component of Lactose synthase along with alpha-lactalbumin, in the absence of alpha-lactalbumin EC:2.4.1.90 is the catalysed reaction.. +PF01296 Galanin
+PF02052 Gallidermin
+PF02744 GalP_UDP_trans_C;
Galactose-1-phosphate uridyl transferase, C-terminal domain. SCOP reports fold duplication with N-terminal domain. Both involved in Zn and Fe binding.. +PF01087 Galactose-1-phosphate uridyl transferase, N-terminal domain
SCOP reports fold duplication with C-terminal domain. Both involved in Zn and Fe binding.. +PF00304 Gamma-thionin family
+PF04410 Gar1/Naf1 RNA binding region
Gar1 is a small nucleolar RNP that is required for pre-mRNA processing and pseudouridylation . It is co-immunoprecipitated with the H/ACA families of snoRNAs. This family represents the conserved central region of Gar1. This region is necessary and sufficient for normal cell growth, and specifically binds two snoRNAs snR10 and snR30. This region is also necessary for nucleolar targeting, and it is thought that the protein is co-transported to the nucleolus as part of a nucleoprotein complex . In humans, Gar1 is also component of telomerase in vivo . Naf1 is an essentail protein that plays a role in ribosome biogenesis, modification of spliceosomal small nuclear RNAs and telomere synthesis, and is homologous to Gar1 .. +PF01071 GARS;
Phosphoribosylglycinamide synthetase, ATP-grasp (A) domain. Pfam-B_916 (release 3.0). Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the ATP-grasp domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF02786).. +PF02843 Phosphoribosylglycinamide synthetase, C domain
Pfam-B_916 (release 3.0). Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the C-terminal domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF02787).. +PF02844 Phosphoribosylglycinamide synthetase, N domain
Pfam-B_916 (release 3.0). Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the N-terminal domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF00289).. +PF03198 GAS1;
Glucanosyltransferase. Pfam-B_2209 (release 6.5). This is a family of glycosylphosphatidylinositol-anchored beta(1-3)glucanosyltransferases. The active site residues in the Aspergillus fumigatus example Swiss:B0XT72 are the two glutamate residues at 160 and 261 .. +PF02187 Growth-Arrest-Specific Protein 2 Domain
Alignment kindly provided by SMART. +PF00741 Gas vesicle protein
Pfam-B_545 (release 2.1). +PF01304 Gas vesicles protein GVPc repeated domain
+PF02704 Gibberellin regulated protein
Pfam-B_1221 (release 5.5). This is the GASA gibberellin regulated cysteine rich protein family. The expression of these proteins is up-regulated by the plant hormone gibberellin, most of these proteins have some role in plant development. There are 12 cysteine residues conserved within the alignment giving the potential for these proteins to posses 6 disulphide bonds.. +PF00918 Gastrin/cholecystokinin family
Pfam-B_1542 (release 3.0). +PF00310 Glutamine amidotransferases class-II
Prosite & Pfam-B_5381 (Release 7.5) & Pfam-B_455 (release 7.6). +PF04572 Alpha 1,4-glycosyltransferase conserved region
Pfam-B_4980 (release 7.5). The glycosphingolipids (GSL) form part of eukaryotic cell membranes. They consist of a hydrophilic carbohydrate moiety linked to a hydrophobic ceramide tail embedded within the lipid bilayer of the membrane. Lactosylceramide, Gal1,4Glc1Cer (LacCer), is the common synthetic precursor to the majority of GSL found in vertebrates. Alpha 1.4-glycosyltransferases utilise UDP donors and transfer the sugar to a beta-linked acceptor. This region appears to be confined to higher eukaryotes. No function has been yet assigned to this region . . +PF02263 Guanylate-binding protein, N-terminal domain
Pfam-B_4308 (release 5.2) & Pfam-B_9065 (release 8.0). Transcription of the anti-viral guanylate-binding protein (GBP) is induced by interferon-gamma during macrophage induction. This family contains GBP1 and GPB2, both GTPases capable of binding GTP, GDP and GMP.. +PF02841 Guanylate-binding protein, C-terminal domain
Pfam-B_4308 (release 5.2). Transcription of the anti-viral guanylate-binding protein (GBP) is induced by interferon-gamma during macrophage induction. This family contains GBP1 and GPB2, both GTPases capable of binding GTP, GDP and GMP.. +PF02425 Paralytic/GBP/PSP peptide
Pfam-B_2690 (release 5.4). This family includes insect peptides that are short (23 amino acids) and contain 1 disulphide bridge. The family includes growth-blocking peptide (GBP) of Pseudaletia separata and the paralytic peptides from Manduca sexta, Heliothis virescens, and Spodoptera exigua as well as plasmatocyte-spreading peptide (PSP1) . These peptides function to halt metamorphosis from larvae to pupae.. +PF02526 Glycophorin-binding protein
Pfam-B_1047 (release 5.4). This family contains glycophorin binding proteins from P. falciparum the malarial parasite . Glycophorin is a cell surface protein of erythrocytes. The Glycophorin binding protein contains a tandem 38 residue repeat. In Swiss:P02895 the repeat occurs 11 times.. +PF04551 GcpE protein
Pfam-B_1482 (release 7.5). In a variety of organisms, including plants and several eubacteria, isoprenoids are synthesised by the mevalonate-independent 2-C-methyl-D-erythritol 4-phosphate (MEP) pathway. Although different enzymes of this pathway have been described, the terminal biosynthetic steps of the MEP pathway have not been fully elucidated. GcpE gene of Escherichia coli is involved in this pathway .. +PF02155 Glucocorticoid receptor
+PF04107 Glutamate-cysteine ligase family 2(GCS2)
Also known as gamma-glutamylcysteine synthetase and gamma-ECS (EC:6.3.2.2). This enzyme catalyses the first and rate limiting step in de novo glutathione biosynthesis. Members of this family are found in archaea, bacteria and plants. May and Leaver discuss the possible evolutionary origins of glutamate-cysteine ligase enzymes in different organisms and suggest that it evolved independently in different eukaryotes, from an ancestral bacterial enzyme. They also state that Arabidopsis thaliana gamma-glutamylcysteine synthetase is structurally unrelated to mammalian, yeast and Escherichia coli homologues. In plants, there are separate cytosolic and chloroplast forms of the enzyme.. +PF01597 Glycine cleavage H-protein
Pfam-B_988 (release 4.1). This is a family of glycine cleavage H-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. A lipoyl group is attached to a completely conserved lysine residue. The H protein shuttles the methylamine group of glycine from the P protein to the T protein.. +PF01571 Aminomethyltransferase folate-binding domain
Pfam-B_933 (release 4.0). This is a family of glycine cleavage T-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. The T-protein is an aminomethyl transferase. . +PF04295 D-galactarate dehydratase / Altronate hydrolase, C terminus
Family members include the C termini of D-galactarate dehydratase (EC:4.2.1.42) which is thought to catalyse the reaction D-galactarate = 5-keto-4-deoxy-D-glucarate + H2O, and altronate hydrolase (altronic acid hydratase, EC:4.2.1.7), which catalyses D-altronate = 2-keto-2-deoxygluconate + H2O . As purified, both enzymes are catalytically inactive in the absence of added Fe2+, Mn2+, and beta-mercaptoethanol. Synergistic activation of altronate hydrolase activity is seen in the presence of both iron and manganese ions, suggesting that the enzyme may have two ion binding sites. Mn2+ appears to be part of the enzyme active centre, but the function of the single bound Fe2+ ion is unknown. The hydratase has no Fe-S core .. +PF01150 GDA1_CD39_NTPase;
GDA1/CD39 (nucleoside phosphatase) family. +PF02347 Glycine cleavage system P-protein
Pfam-B_840 (release 5.2). This family consists of Glycine cleavage system P-proteins EC:1.4.4.2 from bacterial, mammalian and plant sources. The P protein is part of the glycine decarboxylase multienzyme complex EC:2.1.2.10 (GDC) also annotated as glycine cleavage system or glycine synthase. GDC consists of four proteins P, H, L and T . The reaction catalysed by this protein is:- Glycine + lipoylprotein <=> S-aminomethyldihydrolipoylprotein + CO2. +PF00996 GDP dissociation inhibitor
Pfam-B_1220 (release 3.0). +PF02351 GDNF/GAS1 domain
Pfam-B_889 (release 5.2). This cysteine rich domain is found in multiple copies in GNDF and GAS1 proteins. GDNF and neurturin (NTN) receptors are potent survival factors for sympathetic, sensory and central nervous system neurons . GDNF and neurturin promote neuronal survival by signaling through similar multicomponent receptors that consist of a common receptor tyrosine kinase and a member of a GPI-linked family of receptors that determines ligand specificity .. +PF02212 Dynamin GTPase effector domain
Alignment kindly provided by SMART. +PF04807 Geminivirus AC4/5 conserved region
Pfam-B_3520 (release 7.6). +PF00799 Geminivirus_AL1;
Geminivirus Rep catalytic domain. Pfam-B_286 (release 2.1). The AL1 proteins encodes the replication initiator protein (Rep) of geminiviruses, which is a replicon-specific initiator enzyme and is an essential component of the replisome . For geminivirus Rep protein, this N-terminal region is crucial for origin recognition and DNA cleavage and nucleotidyl transfer .. +PF01440 Geminivirus AL2 protein
Prodom_1117 (release 99.1). Geminiviruses are small, ssDNA-containing plant viruses. Geminiviruses contain three ORFs (designated AL1, AL2, and AL3) that overlap and are specified by multiple polycistronic mRNAs. The AL2 gene product transactivates expression of TGMV coat protein gene , and BR1 movement protein.. +PF01407 Geminivirus AL3 protein
Pfam-B_1874 (release 3.0). Geminiviruses are small, ssDNA-containing plant viruses. Geminiviruses contain three ORFs (designated AL1, AL2, and AL3) that overlap and are specified by multiple polycistronic mRNAs. The AL3 protein comprises approximately 0.05% of the cellular proteins and is present in the soluble and organelle fractions . AL3 may form oligomers . Immunoprecipitation of AL3 in a baculovirus expression system extracts expressing both AL1 Pfam:PF00799 and AL3 showed that the two proteins also complex with each other . The AL3 protein is involved in viral replication.. +PF00845 Geminivirus BL1 movement protein
Pfam-B_1535 (release 2.1). Geminiviruses encode two movement proteins that are essential for systemic infection of their host but dispensable for replication and encapsidation.. +PF01492 Geminivirus C4 protein
Pfam-B_453 (release 4.0). This family consists of the N terminal region of geminivirus C4 or AC4 proteins. In Tomato yellow leaf curl geminivirus (TYLCV) the C4 protein is necessary for efficient spreading of the virus in tomato plants .. +PF00844 Geminivirus coat protein/nuclear export factor BR1 family
Pfam-B_1430 (release 2.1). It has been shown that the 104 N-terminal amino acids of the maize streak virus coat protein bind DNA non- specifically . This family also includes various geminivirus movement proteins that are nuclear export factors or shuttles. One member BR1 facilitates the export of both ds and ss DNA form the nucleus .. +PF01708 Geminivirus putative movement protein
Pfam-B_1771 (release 4.1). This family consists of putative movement proteins from Maize streak and wheat dwarf virus.. +PF01524 Geminivirus V1 protein
Pfam-B_893 (release 4.0). Disruption of the V1 gene in Tomato yellow leaf curl virus (TYLCV) stopped its ability to systemically infect tomato plants, suggesting that the V1 gene product is required for successful infection of the host .. +PF02053 Gene 66 (IR5) protein
+PF03323 Bacillus/Clostridium GerA spore germination protein
Pfam-B_3821 (release 6.5). +PF00196 Bacterial regulatory proteins, luxR family
+PF01353 Green fluorescent protein
+PF05165 GGDN family
I have named this protein family of unknown function GGDN after the most conserved motif. The proteins are 200-270 amino acids in length.. +PF01134 Glucose inhibited division protein A
Prosite & Pfam-B_4007 (Release 8.0). +PF02527 rRNA small subunit methyltransferase G
Pfam-B_1265 (release 5.4). This is a family of bacterial glucose inhibited division proteins these are probably involved in the regulation of cell devision . GidB has been shown to be a methyltransferase G specific to the rRNA small subunit [2, 3]. Previously identified as a glucose-inhibited division protein B that appears to be present and in a single copy in all complete eubacterial genomes so far sequenced. GidB specifically methylates the N7 position of a guanosine in 16S rRNA .. +PF03227 Gamma interferon inducible lysosomal thiol reductase (GILT)
Pfam-B_1477 (release 6.5). This family includes the two characterised human gamma-interferon-inducible lysosomal thiol reductase (GILT) sequences: Swiss:P13284 and Swiss:Q9UL08 . It also contains several other eukaryotic putative proteins with similarity to GILT . The aligned region contains three conserved cysteine residues. In addition, the two GILT sequences possess a C-X(2)-C motif that is shared by some of the other sequences in the family. This motif is thought to be associated with disulphide bond reduction.. +PF03359 Guanylate-kinase-associated protein (GKAP) protein
Pfam-B_1892 (release 6.6). +PF03275 UDP-galactopyranose mutase
Pfam-B_4203 (release 6.5). +PF02812 E_L_F_V_dh; GLFV_dehydrog_N;
Glu/Leu/Phe/Val dehydrogenase, dimerisation domain. +PF00120 gln-synt;
Glutamine synthetase, catalytic domain. +PF03951 gln-synt_N;
Glutamine synthetase, beta-Grasp domain. +PF03710 Glutamate-ammonia ligase adenylyltransferase
Conserved repeated domain found in GlnE proteins. These proteins adenylate and deadenylate glutamine synthases: ATP + {L-Glutamate:ammonia ligase (ADP-forming)} = Diphosphate + Adenylyl-{L-Glutamate:Ammonia ligase (ADP-forming)}. The family is related to the Pfam:PF01909 domain.. +PF03616 Sodium/glutamate symporter
TIGRFAMs, Griffiths-Jones SR. +PF01744 GLTT repeat (6 copies)
Pfam-B_681 (release 4.2). This short repeat of unknown function is found in multiple copies in several C. elegans proteins. The repeat is five residues long and consists of XGLTT where X can be any amino acid.. +PF02686 Glu-tRNAGln amidotransferase C subunit
This is a family of Glu-tRNAGln amidotransferase C subunits. The Glu-tRNA Gln amidotransferase enzyme itself is an important translational fidelity mechanism replacing incorrectly charged Glu-tRNAGln with the correct Gln-tRANGln via transmidation of the misacylated Glu-tRNAGln . This activity supplements the lack of glutaminyl-tRNA synthetase activity in gram-positive eubacterteria, cyanobacteria, Archaea, and organelles .. +PF05096 Glutamine cyclotransferase
This family of enzymes EC:2.3.2.5 catalyse the cyclization of free L-glutamine and N-terminal glutaminyl residues in proteins to pyroglutamate (5-oxoproline) and pyroglutamyl residues respectively . This family includes plant and bacterial enzymes and seems unrelated to the mammalian enzymes.. +PF04262 glu_cys_ligase;
Glutamate-cysteine ligase . TIGRFAMs (release 2.0);. Family of bacterial f glutamate-cysteine ligases (EC:6.3.2.2) that carry out the first step of the glutathione biosynthesis pathway.. +PF01645 Conserved region in glutamate synthase
Pfam-B_719 (release 4.1). This family represents a region of the glutamate synthase protein. This region is expressed as a separate subunit in the glutamate synthase alpha subunit from archaebacteria, or part of a large multidomain enzyme in other organisms. The aligned region of these proteins contains a putative FMN binding site and Fe-S cluster.. +PF02364 1,3-beta-glucan synthase component
Pfam-B_686 (release 5.2). This family consists of various 1,3-beta-glucan synthase components including Gls1, Gls2 and Gls3 from yeast. 1,3-beta-glucan synthase EC:2.4.1.34 also known as callose synthase catalyses the formation of a beta-1,3-glucan polymer that is a major component of the fungal cell wall . The reaction catalysed is:- UDP-glucose + {(1,3)-beta-D-glucosyl}(N) <=> UDP + {(1,3)-beta-D-glucosyl}(N+1).. +PF02685 Glucokinase
This is a family of glucokinases or glucose kinases EC:2.7.1.2. These enzymes phosphorylate glucose using ATP as a donor to give glucose-6-phosphate and ADP.. +PF01182 Glucosamine-6-phosphate isomerases/6-phosphogluconolactonase
+PF00462 glutaredoxin;
Prosite & Pfam-B_3081 (Release 8.0). +PF04399 Glutaredoxin 2, C terminal domain
Glutaredoxins are a multifunctional family of glutathione-dependent disulphide oxidoreductases. Unlike other glutaredoxins, glutaredoxin 2 (Grx2) cannot reduce ribonucleotide reductase. Grx2 has significantly higher catalytic activity in the reduction of mixed disulphides with glutathione (GSH) compared with other glutaredoxins. The active site residues (Cys9-Pro10-Tyr11-Cys12, in Escherichia coli Grx2, Swiss:P39811), which are found at the interface between the N- and C-terminal domains are identical to other glutaredoxins, but there is no other similarity between glutaredoxin 2 and other glutaredoxins. Grx2 is structurally similar to glutathione-S-transferases (GST), but there is no obvious sequence similarity. The inter-domain contacts are mainly hydrophobic, suggesting that the two domains are unlikely to be stable on their own. Both domains are needed for correct folding and activity of Grx2. It is thought that the primary function of Grx2 is to catalyse reversible glutathionylation of proteins with GSH in cellular redox regulation including the response to oxidative stress.. +PF03157 High molecular weight glutenin subunit
Pfam-B_2180 (release 6.5). Members of this family include high molecular weight subunits of glutenin. This group of gluten proteins is thought to be largely responsible for the elastic properties of gluten, and hence, doughs. Indeed, glutenin high molecular weight subunits are classified as elastomeric proteins, because the glutenin network can withstand significant deformations without breaking, and return to the original conformation when the stress is removed. Elastomeric proteins differ considerably in amino acid sequence, but they are all polymers whose subunits consist of elastomeric domains, composed of repeated motifs, and non-elastic domains that mediate cross-linking between the subunits. The elastomeric domain motifs are all rich in glycine residues in addition to other hydrophobic residues. High molecular weight glutenin subunits have an extensive central elastomeric domain, flanked by two terminal non-elastic domains that form disulphide cross-links. The central elastomeric domain is characterised by the following three repeated motifs: PGQGQQ, GYYPTS[P/L]QQ, GQQ. It possesses overlapping beta-turns within and between the repeated motifs, and assumes a regular helical secondary structure with a diameter of approx. 1.9 nm and a pitch of approx. 1.5 nm [see 1, fig.2].. +PF00745 GlutR;
Glutamyl-tRNAGlu reductase, dimerisation domain. Pfam-B_544 (release 2.1). +PF05201 Glutamyl-tRNAGlu reductase, N-terminal domain
+PF02595 DUF168;
Glycerate kinase family. This is family of Glycerate kinases.. +PF01228 Glycine radical
+PF00232 glycosyl_hydro1;
Glycosyl hydrolase family 1. +PF00331 glycosyl_hydro3;
Glycosyl hydrolase family 10. +PF00457 glycosyl_hydro6;
Glycosyl hydrolases family 11. +PF01109 Granulocyte-macrophage colony-stimulating factor
+PF01670 Glycosyl hydrolase family 12
Pfam-B_1736 (release 4.1). +PF01373 Glycosyl_hydr22;
Glycosyl hydrolase family 14. This family are beta amylases.. +PF00723 glycosyl_hydr10;
Glycosyl hydrolases family 15. Pfam-B_771 (release 2.1). In higher organisms this family is represented by phosphorylase kinase subunits.. +PF00332 glycosyl_hydro4;
Glycosyl hydrolases family 17. +PF00182 chitinase_1;
+PF00703 glycosyl_hydro7;
Glycosyl hydrolases family 2. Pfam-B_572 (release 2.1). This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities.. +PF00728 glycosyl_hydr11;
Glycosyl hydrolase family 20, catalytic domain. Pfam-B_877 (release 2.1). This domain has a TIM barrel fold.. +PF02838 glycosyl_hydr11;
Glycosyl hydrolase family 20, domain 2. Pfam-B_877 (release 2.1). This domain has a zincin-like fold.. +PF01183 Glycosyl_hydr18;
Glycosyl hydrolases family 25. +PF02156 Glycosyl hydrolase family 26
+PF00295 PG;
Glycosyl hydrolases family 28. Glycosyl hydrolase family 28 includes polygalacturonase EC:3.2.1.15 as well as rhamnogalacturonase A(RGase A), EC:3.2.1.-. These enzymes is important in cell wall metabolism.. +PF02836 glycosyl_hydro7;
Glycosyl hydrolases family 2, TIM barrel domain. Pfam-B_572 (release 2.1). This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities.. +PF02837 glycosyl_hydro7;
Glycosyl hydrolases family 2, sugar binding domain. Pfam-B_572 (release 2.1). This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities and has a jelly-roll fold.. +PF00933 glycosyl_hydr14;
Glycosyl hydrolase family 3 N terminal domain. Pfam-B_1151 (release 3.0). +PF00251 glycosyl_hydro2; Glyco_hydro_32;
Glycosyl hydrolases family 32 N-terminal domain. This domain corresponds to the N-terminal domain of glycosyl hydrolase family 32 which forms a five bladed beta propeller structure .. +PF01301 Glycosyl_hydr17;
Glycosyl hydrolases family 35. +PF01074 Glycosyl_hydr16;
Glycosyl hydrolases family 38 N-terminal domain. Pfam-B_731 (release 3.0). Glycosyl hydrolases are key enzymes of carbohydrate metabolism.. +PF01229 Glycosyl_hydr19;
Glycosyl hydrolases family 39. +PF01915 glycosyl_hydr14;
Glycosyl hydrolase family 3 C-terminal domain. Pfam-B_1151 (release 3.0). This domain is involved in catalysis and may be involved in binding beta-glucan . This domain is found associated with Pfam:PF00933.. +PF02056 Family 4 glycosyl hydrolase
+PF02449 Beta-galactosidase
Pfam-B_2131 (release 5.4). This group of beta-galactosidase enzymes belong to the glycosyl hydrolase 42 family. The enzyme catalyses the hydrolysis of terminal, non-reducing terminal beta-D-galactosidase residues.. +PF04616 Glycosyl hydrolases family 43
Pfam-B_5336 (release 7.5). The glycosyl hydrolase family 43 contains members that are arabinanase. Rabinanases hydrolyses the alpha-1,5-linked L-arabinofuranoside backbone of plant cell wall arabinans. The structure of arabinanase Arb43A from Cellvibrio japonicus reveals a five-bladed beta-propeller fold. A long V-shaped groove, partially enclosed at one end, forms a single extended substrate-binding surface across the face of the propeller .. +PF02015 Glycosyl hydrolase family 45
+PF01374 Glycosyl_hydr23;
Glycosyl hydrolase family 46. This family are chitosanase enzymes.. +PF02011 Glycosyl hydrolase family 48
Members of this family are endoglucanase EC:3.2.1.4 and exoglucanase EC:3.2.1.91 enzymes that cleave cellulose or related substrate.. +PF03718 Glycosyl hydrolase family 49
Family of dextranase (EC 3.2.1.11) and isopullulanase (EC 3.2.1.57). Dextranase hydrolyses alpha-1,6-glycosidic bonds in dextran polymers.. +PF03512 Glycosyl hydrolase family 52
+PF01630 Hyaluronidase
Pfam-B_1150 (release 4.1). +PF03065 Glycosyl hydrolase family 57
Pfam-B_2506 (release 6.4). This family includes alpha-amylase (EC:3.2.1.1), 4--glucanotransferase (EC:2.4.1.-) and amylopullulanase enzymes.. +PF02057 Glycosyl hydrolase family 59
+PF01341 Glycosyl_hydr21;
Glycosyl hydrolases family 6. +PF03443 Glycosyl hydrolase family 61
Although weak endoglucanase activity has been demonstrated in several members of this family [1-3], they lack the clustered conserved catalytic acidic amino acids present in most glycoside hydrolases. Many members of this family lack measurable cellulase activity on their own, but enhance the activity of other cellulolytic enzymes. They are therefore unlikely to be true glycoside hydrolases .. +PF03664 Glycosyl hydrolase family 62
Family of alpha -L-arabinofuranosidase (EC 3.2.1.55). This enzyme hydrolysed aryl alpha-L-arabinofuranosides and cleaves arabinosyl side chains from arabinoxylan and arabinan. . +PF03633 Glyco_hydro_65c;
Glycosyl hydrolase family 65, C-terminal domain . Pfam-B_3470 (release 7.0). This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. The C-terminal domain forms a two layered jelly roll motif. This domain is situated at the base of the catalytic domain, however its function remains unknown .. +PF03632 Glycosyl hydrolase family 65 central catalytic domain
Pfam-B_3470 (release 7.0). This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. The central domain is the catalytic domain, which binds a phosphate ion that is proximal the the highly conserved Glu. The arrangement of the phosphate and the glutamate is thought to cause nucleophilic attack on the anomeric carbon atom . The catalytic domain also forms the majority of the dimerisation interface.. +PF03636 Glyco_hydro_65n;
Glycosyl hydrolase family 65, N-terminal domain . Pfam-B_3470 (release 7.0). This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. This domain is believed to be essential for catalytic activity although its precise function remains unknown.. +PF02435 Levansucrase/Invertase
Pfam-B_2011 (release 5.4). This Pfam family consists of the glycosyl hydrolase 68 family, including several bacterial levansucrase enzymes, and invertase from zymomonas.. +PF00840 glycosyl_hydr13;
Glycosyl hydrolase family 7. Pfam-B_1478 (release 2.1). +PF02324 Gluco_S_transf;
Glycosyl hydrolase family 70. Pfam-B_965 (release 5.2). Members of this family belong to glycosyl hydrolase family 70 Glucosyltransferases or sucrose 6-glycosyl transferases (GTF-S) catalyse the transfer of D-glucopyramnosyl units from sucrose onto acceptor molecules , EC:2.4.1.5. This family roughly corresponds to the N-terminal catalytic domain of the enzyme. Members of this family also contain the Putative cell wall binding domain Pfam:PF01473, which corresponds with the C-terminal glucan-binding domain.. +PF03659 Glycosyl hydrolase family 71
Family of alpha-1,3-glucanases. . +PF03662 Glycosyl hydrolase family 79, N-terminal domain
Family of endo-beta-N-glucuronidase, or heparanase. Heparan sulfate proteoglycans (HSPGs) play a key role in the self- assembly, insolubility and barrier properties of basement membranes and extracellular matrices. Hence, cleavage of heparan sulfate (HS) affects the integrity and functional state of tissues and thereby fundamental normal and pathological phenomena involving cell migration and response to changes in the extracellular micro-environment. Heparanase degrades HS at specific intra-chain sites. The enzyme is synthesised as a latent approximately 65 kDa protein that is processed at the N-terminus into a highly active approximately 50 kDa form. Experimental evidence suggests that heparanase may facilitate both tumour cell invasion and neovascularization, both critical steps in cancer progression. The enzyme is also involved in cell migration associated with inflammation and autoimmunity .. +PF01270 Glycosyl_hydr20;
Glycosyl hydrolases family 8. +PF03639 Glycosyl hydrolase family 81
Family of eukaryotic beta-1,3-glucanases. Within the Aspergillus fumigatus protein Swiss:Q9UVV0 two perfectly conserved Glu residues (E550 or E554) have been proposed as putative nucleophiles of the active site of the Engl1 endoglucanase, while the proton donor would be D475. The endo-beta-1,3-glucanase activity is essential for efficient spore release .. +PF00759 glycosyl_hydr12;
Glycosyl hydrolase family 9. Pfam-B_843 (release 2.1). +PF03808 Glycosyl transferase WecB/TagA/CpsF family
TIGRFAMs, Griffiths-Jones SR. +PF01531 Glycosyl transferase family 11
Pfam-B_935 (release 4.0). This family contains several fucosyl transferase enzymes.. +PF01793 Glycolipid 2-alpha-mannosyltransferase
Pfam-B_1324 (release 4.2). This is a family of alpha-1,2 mannosyl-transferases involved in N-linked and O-linked glycosylation of proteins. Some of the enzymes in this family have been shown to be involved in O- and N-linked glycan modifications in the Golgi .. +PF03076 Equine arteritis virus GP3
Pfam-B_687 (release 6.4). This protein is encoded by ORF3 of equine arteritis virus. The function is unknown.. +PF04724 Glyco_tranf_17;
Glycosyltransferase family 17. Pfam-B_5914 (release 7.5). This family represents beta-1,4-mannosyl-glycoprotein beta-1,4-N-acetylglucosaminyltransferase (EC:2.4.1.144). This enzyme transfers the bisecting GlcNAc to the core mannose of complex N-glycans. The addition of this residue is regulated during development and has functional consequences for receptor signalling, cell adhesion, and tumour progression [1,2].. +PF03033 Glycosyltransferase family 28 N-terminal domain
Pfam-B_1105 (release 6.4) & Pfam-B_2764 (release 7.5). The glycosyltransferase family 28 includes monogalactosyldiacylglycerol synthase (Swiss:P93115, EC 2.4.1.46) and UDP-N-acetylglucosamine transferase (Swiss:P74657, EC 2.4.1.-). This N-terminal domain contains the acceptor binding site and likely membrane association site. This family also contains a large number of proteins that probably have quite distinct activities.. +PF04666 GnT_IV_N; Glyco_transf_55;
N-Acetylglucosaminyltransferase-IV (GnT-IV) conserved region. Pfam-B_4541 (release 7.5). The complex-type of oligosaccharides are synthesised through elongation by glycosyltransferases after trimming of the precursor oligosaccharides transferred to proteins in the endoplasmic reticulum. N-Acetylglucosaminyltransferases (GnTs) take part in the formation of branches in the biosynthesis of complex-type sugar chains. In vertebrates, six GnTs, designated as GnT-I to -VI, which catalyse the transfer of GlcNAc to the core mannose residues of Asn-linked sugar chains, have been identified. GnT-IV (EC:2.4.1.145) catalyses the transfer of GlcNAc from UDP-GlcNAc to the GlcNAc1-2Man1-3 arm of core oligosaccharide [Gn2(22)core oligosaccharide] and forms GlcNAc1-4(GlcNAc1-2)Man1-3 structure on the core oligosaccharide (Gn3(2,4,2)core oligosaccharide). In some members the conserved region occupies all but the very for N-terminal, where there is a signal sequence on all members. For other members the conserved region does not occupy the entire protein but is still to the N-terminus of the protein .. +PF03414 Glycosyltransferase family 6
Pfam-B_4383 (release 6.6). +PF01075 Heptosyltranf;
Glycosyltransferase family 9 (heptosyltransferase). Pfam-B_839 (release 3.0). Members of this family belong to glycosyltransferase family 9 . Lipopolysaccharide is a major component of the outer leaflet of the outer membrane in Gram-negative bacteria. It is composed of three domains; lipid A, Core oligosaccharide and the O-antigen. All of these enzymes transfer heptose to the lipopolysaccharide core.. +PF00274 glycolytic_enzy;
Fructose-bisphosphate aldolase class-I. +PF01102 Glycophorin A
+PF00606 Herpesvirus Glycoprotein B
This family of proteins contains a transmembrane region.. +PF00802 Pneumovirus attachment glycoprotein G
Pfam-B_1049 (release 2.1). This family includes attachment proteins from respiratory synctial virus. Glycoprotein G has not been shown to have any neuraminidase or hemagglutinin activity (Swiss-Prot). The amino terminus is thought to be cytoplasmic, and the carboxyl terminus extracellular. The extracellular region contains four completely conserved cysteine residues.. +PF02885 glycosyl_transf_3;
Glycosyl transferase family, helical bundle domain. MRC-LMB Genome group. This family includes anthranilate phosphoribosyltransferase (TrpD), thymidine phosphorylase. All these proteins can transfer a phosphorylated ribose substrate.. +PF00534 glycosyl_transf_1;
Glycosyl transferases group 1. MRC-LMB Genome group. Mutations in this domain of Swiss:P37287 lead to disease (Paroxysmal Nocturnal haemoglobinuria). Members of this family transfer activated sugars to a variety of substrates, including glycogen, Fructose-6-phosphate and lipopolysaccharides. Members of this family transfer UDP, ADP, GDP or CMP linked sugars. The eukaryotic glycogen synthases may be distant members of this family.. +PF00591 glycosyl_transf_3;
Glycosyl transferase family, a/b domain. MRC-LMB Genome group. This family includes anthranilate phosphoribosyltransferase (TrpD), thymidine phosphorylase. All these proteins can transfer a phosphorylated ribose substrate.. +PF04413 3-Deoxy-D-manno-octulosonic-acid transferase (kdotransferase)
Members of this family transfer activated sugars to a variety of substrates, including glycogen, fructose-6-phosphate and lipopolysaccharides. Members of the family transfer UDP, ADP, GDP or CMP linked sugars. The Glycos_transf_N region is flanked at the N-terminus by a signal peptide and at the C-terminus by Glycos_transf_1 (Pfam:PF00534). The eukaryotic glycogen synthases may be distant members of this bacterial family .. +PF01153 Glypican
+PF05199 GMC oxidoreductase
Pfam-B_891 (release 2.1). This domain found associated with Pfam:PF00732.. +PF00732 GMC_oxred;
Pfam-B_891 (release 2.1). This family of proteins bind FAD as a cofactor.. +PF00446 Gonadotropin-releasing hormone
+PF03071 GNT-I family
Pfam-B_2207 (release 6.4). Alpha-1,3-mannosyl-glycoprotein beta-1,2-N-acetylglucosaminyltransferase (GNT-I, GLCNAC-T I) EC:2.4.1.101 transfers N-acetyl-D-glucosamine from UDP to high-mannose glycoprotein N-oligosaccharide. This is an essential step in the synthesis of complex or hybrid-type N-linked oligosaccharides. The enzyme is an integral membrane protein localised to the Golgi apparatus, and is probably distributed in all tissues. The catalytic domain is located at the C-terminus .. +PF02447 GntP family permease
Pfam-B_1928 (release 5.4). This is a family of integral membrane permeases that are involved in gluconate uptake. E. coli contains several members of this family including GntU Swiss:P46858 a low affinity transporter and GntT Swiss:P39835 a high affinity transporter .. +PF00392 gntR;
Bacterial regulatory proteins, gntR family. Prosite & Pfam-B_6405 (Release 8.0). This family of regulatory proteins consists of the N-terminal HTH region of GntR-like bacterial transcription factors. At the C-terminus there is usually an effector-binding/oligomerisation domain. The GntR-like proteins include the following sub-families: MocR, YtrR, FadR, AraR, HutC and PlmA, DevA, DasR [1-2] . Many of these proteins have been shown experimentally to be autoregulatory, enabling the prediction of operator sites and the discovery of cis/trans relationships . The DasR regulator has been shown to be a global regulator of primary metabolism and development in Streptomyces coelicolor .. +PF02188 GoLoco motif
Alignment kindly provided by SMART. +PF04178 Got1/Sft2-like family
Pfam-B_7371 (release 7.3) & Pfam-B_8991 (release 14.0). Traffic through the yeast Golgi complex depends on a member of the syntaxin family of SNARE proteins, Sed5, present in early Golgi cisternae. Got1 is thought to facilitate Sed5-dependent fusion events . This is a family of sequences derived from eukaryotic proteins. They are similar to a region of a SNARE-like protein required for traffic through the Golgi complex, SFT2 protein (Swiss:P38166) . This is a conserved protein with four putative transmembrane helices, thought to be involved in vesicular transport in later Golgi compartments .. +PF00516 Envelope glycoprotein GP120
Pfam-B_44 (release 1.0). The entry of HIV requires interaction of viral GP120 with Swiss:P01730 and a chemokine receptor on the cell surface.. +PF03010 GP4
Pfam-B_1094 (release 6.4). GP4 is a minor membrane-associated glycoproteins. This family contains envelope protein GP4 from equine arteritis virus.. +PF00517 Retroviral envelope protein
Pfam-B_44 (release 1.0). This family includes envelope protein from a variety of retroviruses. It includes the GP41 subunit of the envelope protein complex from human and simian immunodeficiency viruses (HIV and SIV) which mediate membrane fusion during viral entry. The family also includes bovine immunodeficiency virus, feline immunodeficiency virus and Equine infectious anaemia (EIAV). The family also includes the Gp36 protein from mouse mammary tumour virus (MMTV) and human endogenous retroviruses (HERVs).. +PF02925 Bacteriophage scaffolding protein D
+PF00044 gpdh;
Glyceraldehyde 3-phosphate dehydrogenase, NAD binding domain. Eddy SR, Griffiths-Jones SR. GAPDH is a tetrameric NAD-binding enzyme involved in glycolysis and glyconeogenesis. N-terminal domain is a Rossmann NAD(P) binding fold.. +PF02800 gpdh_C;
Glyceraldehyde 3-phosphate dehydrogenase, C-terminal domain. Eddy SR, Griffiths-Jones SR. GAPDH is a tetrameric NAD-binding enzyme involved in glycolysis and glyconeogenesis. C-terminal domain is a mixed alpha/antiparallel beta fold.. +PF05024 N-acetylglucosaminyl transferase component (Gpi1)
Pfam-B_4796 (release 7.6). Glycosylphosphatidylinositol (GPI) represents an important anchoring molecule for cell surface proteins.The first step in its synthesis is the transfer of N-acetylglucosamine (GlcNAc) from UDP-N-acetylglucosamine to phosphatidylinositol (PI). This chemically simple step is genetically complex because three or four genes are required in both yeast (GPI1, GPI2 and GPI3) and mammals (GPI1, PIG A, PIG H and PIG C), respectively .. +PF04113 Gpi16 subunit, GPI transamidase component
Pfam-B_7012 (release 7.3);. GPI (glycosyl phosphatidyl inositol) transamidase is a multi-protein complex. Gpi16, Gpi8 and Gaa1 for a sub-complex of the GPI transamidase. GPI transamidase that adds glycosylphosphatidylinositols (GPIs) to newly synthesised proteins. Gpi16 is an essential N-glycosylated transmembrane glycoprotein. Gpi16 is largely found on the lumenal side of the ER. It has a single C-terminal transmembrane domain and a small C-terminal, cytosolic extension with an ER retrieval motif .. +PF02831 gpW
gpW is a 68 residue protein known to be present in phage particles. Extracts of phage-infected cells lacking gpW contain DNA-filled heads, and active tails, but no infectious virions. gpW is required for the addition of gpFII to the head, which is, in turn, required for the attachment of tails. Since gpFII and tails are known to be attached at the connector, gpW is also likely to assemble at this site. The addition of gpW to filled heads increases the DNase resistance of the packaged DNA, suggesting that gpW either forms a plug at the connector to prevent ejection of the DNA, or binds directly to the DNA. The large number of positively charged residues in gpW (its calculated pI is 10.8) is consistent with a role in DNA interaction .. +PF04965 Gene 25-like lysozyme
This family includes the phage protein Gene 25 from T4 which is a structural component of the outer wedge of the baseplate that has acidic lysozyme activity . The family also includes relatives from bacteria that are also presumably lysozymes.. +PF05084 Granule antigen protein (GRA6)
Pfam-B_6204 (release 7.7). This family contains the granule antigen protein GRA6 which is found in the parasitic protozoa Toxoplasma gondii and Neospora caninum. GRA6 protein plays an important role in the antigenicity and pathogenicity in these organisms .. +PF00267 Gram-ve_porins;
+PF00746 Gram positive anchor
Pfam-B_457 (release 2.1). +PF01271 Granin (chromogranin or secretogranin)
+PF00396 granulin;
+PF04495 GRASP55/65 PDZ-like domain
Pfam-B_3985 (release 7.5). GRASP55 (Golgi re-assembly stacking protein of 55 kDa) and GRASP65 (a 65 kDa) protein are highly homologous. GRASP55 is a component of the Golgi stacking machinery. GRASP65, an N-ethylmaleimide- sensitive membrane protein required for the stacking of Golgi cisternae in a cell-free system . This region appears to be related to the PDZ domain.. +PF04723 Glycine reductase complex selenoprotein A
Found in clostridia, this protein contains one active site selenocysteine and catalyses the reductive deamination of glycine, which is coupled to the esterification of orthophosphate resulting in the formation of ATP . A member of this family may also exist in Treponema denticola .. +PF01272 Transcription elongation factor, GreA/GreB, C-term
This domain has an FKBP-like fold.. +PF03449 Transcription elongation factor, N-terminal
This domain adopts a long alpha-hairpin structure.. +PF01184 GPR1/FUN34/yaaH family
The Ady2 protein in (Swiss:P25613) is required for acetate in Saccharomyces cerevisiae, and is probably an acetate transporter. A homologue in Yarrowia lipolytica (GPR1) has a role in acetic acid sensitivity.. +PF01025 GrpE
Pfam-B_817 (release 3.0). +PF02955 Prokaryotic glutathione synthetase, ATP-grasp domain
+PF02951 GTS_N;
Prokaryotic glutathione synthetase, N-terminal domain. +PF03917 Eukaryotic glutathione synthase, ATP binding domain
Pfam-B_2922 (release 6.5). +PF03199 Eukaryotic glutathione synthase
Pfam-B_2922 (release 6.5). +PF00255 Glutathione peroxidase
+PF03738 Glutathionylspermidine synthase preATP-grasp
This region contains the Glutathionylspermidine synthase enzymatic activity EC:6.3.1.8. This is the C-terminal region in bienzymes such as Swiss:P43675. Glutathionylspermidine (GSP) synthetases of Trypanosomatidae and Escherichia coli couple hydrolysis of ATP (to ADP and Pi) with formation of an amide bond between spermidine and the glycine carboxylate of glutathione (gamma-Glu-Cys-Gly). In the pathogenic trypanosomatids, this reaction is the penultimate step in the biosynthesis of the antioxidant metabolite, trypanothione (N1,N8-bis-(glutathionyl)spermidine), and is a target for drug design . This region, the pre-ATP grasp region, probably carries the substrate-binding site .. +PF00437 GSPII_E;
Type II/IV secretion system protein. Prosite & Pfam-B_2215 (Release 8.0). This family contains both type II and type IV Swiss:P54907 pathway secretion proteins from bacteria. Swiss:P07169 VirB11 ATPase is a subunit of the Agrobacterium tumefaciens transfer DNA (T-DNA) transfer system, a type IV secretion pathway required for delivery of T-DNA and effector proteins to plant cells during infection . . +PF05157 GSPII_E_N;
Type II secretion system (T2SS), protein E, N-terminal domain. This domain is found at the N-terminus of members of the Type II secretion system protein E. Proteins in this subfamily are typically involved in Type 4 pilus biogenesis (eg Swiss:Q9X4G8), though some are involved in other processes; for instance aggregation in Myxococcus xanthus (Swiss:Q9RF11) . The structure of this domain is now known [2,3].. +PF00482 GSPII_F;
Type II secretion system (T2SS), protein F. The original family covered both the regions found by the current model. The splitting of the family has allowed the related FlaJ_arch (archaeal FlaJ family) to be merged with it. Proteins with this domain in form a platform for the machiney of the Type II secretion system, as well as the Type 4 pili and the archaeal flagella . This domain seems to show some similarity to PF00664 but this may just be due to similarities in the TM helices (personal obs: C Yeats).. +PF00263 Bac_GSPproteins; GSPII_III; Secretin; Secretin_C;
Bacterial type II and III secretion system protein. +PF01203 T2SP_N; GSPII_N;
Type II secretion system (T2SS), protein N. Members of the T2SN family are involved in the Type II protein secretion system. The precise function of these proteins is unknown.. +PF03958 NolW-like; GSPII_III_N;
Bacterial type II/III secretion system short domain. This is a short, often repeated, domain found in bacterial type II/III secretory system proteins.\. All previous NolW-like domains fall into this family.. +PF02501 GSPII_IJ;
Type II secretion system (T2SS), protein I. Pfam-B_2607 (release 5.4). The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for the transport of proteins across the outer membrane first exported to the periplasm by the Sec or Tat translocon in Gram-negative (diderm) bacteria. As members of the T2SJ family, members of the T2SI family are pseudopilins containing prepilin signal sequences .. +PF03934 GspK;
Type II secretion system (T2SS), protein K. Members of this family are involved in the Type II protein secretion system. The T2SK family includes proteins such as ExeK, PulK, OutX and XcpX.. +PF05134 GspL;
Type II secretion system (T2SS), protein L. Pfam-B_6494 (release 7.7). This family consists of Type II secretion system protein L sequences from several Gram-negative (diderm) bacteria. The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for extracellular secretion of a number of different proteins, including proteases and toxins. This pathway supports secretion of proteins across the cell envelope in two distinct steps, in which the second step, involving translocation through the outer membrane, is assisted by at least 13 different gene products. T2SL is predicted to contain a large cytoplasmic domain represented by this family and has been shown to interact with the autophosphorylating cytoplasmic membrane protein T2SE. It is thought that the tri-molecular complex of T2SL, T2SE (Pfam:PF00437) and T2SM (Pfam:PF04612) might be involved in regulating the opening and closing of the secretion pore and/or transducing energy to the site of outer membrane translocation .. +PF04612 GspM;
Type II secretion system (T2SS), protein M. Pfam-B_5302 (release 7.5). This family of membrane proteins consists of Type II secretion system protein M sequences from several Gram-negative (diderm) bacteria. The precise function of these proteins is unknown, though in Vibrio cholerae, the T2SM (EpsM) protein interacts with the T2SL (EpsL) protein, and also forms homodimers .. +PF00043 gluts; GST;
Glutathione S-transferase, C-terminal domain. Eddy SR, Griffiths-Jones SR. GST conjugates reduced glutathione to a variety of targets including S-crystallin from squid, the eukaryotic elongation factor 1-gamma, the HSP26 family of stress-related proteins and auxin-regulated proteins in plants. Stringent starvation proteins in E. coli are also included in the alignment but are not known to have GST activity.\. The glutathione molecule binds in a cleft between N and C-terminal domains. The catalytically important residues are proposed to reside in the N-terminal domain . In plants, GSTs are encoded by a large gene family (48 GST genes in Arabidopsis) and can be divided into the phi, tau, theta, zeta, and lambda classes .. +PF02798 gluts;
Glutathione S-transferase, N-terminal domain. Eddy SR, Griffiths-Jones SR. Function: conjugation of reduced glutathione to a variety of targets. Also included in the alignment, but are not GSTs: * S-crystallins from squid. Similarity to GST previously noted. * Eukaryotic elongation factors 1-gamma. Not known to have GST activity; similarity not previously recognised. * HSP26 family of stress-related proteins. including auxin-regulated proteins in plants and stringent starvation proteins in E. coli. Not known to have GST activity. Similarity not previously recognised. The glutathione molecule binds in a cleft between N and C-terminal domains - the catalytically important residues are proposed to reside in the N-terminal domain .. +PF00735 GTP_CDC;
Pfam-B_440 (release 2.1). Members of this family include CDC3, CDC10, CDC11 and CDC12/Septin. Members of this family bind GTP. As regards the septins, these are polypeptides of 30-65kDa with three characteristic GTPase motifs (G-1, G-3 and G-4) that are similar to those of the Ras family. The G-4 motif is strictly conserved with a unique septin consensus of AKAD. Most septins are thought to have at least one coiled-coil region, which in some cases is necessary for intermolecular interactions that allow septins to polymerise to form rod-shaped complexes. In turn, these are arranged into tandem arrays to form filaments. They are multifunctional proteins, with roles in cytokinesis, sporulation, germ cell development, exocytosis and apoptosis .. +PF00925 GTP cyclohydrolase II
Pfam-B_1147 (release 3.0). GTP cyclohydrolase II catalyses the first committed step in the biosynthesis of riboflavin.. +PF01227 GTP_cyclohydro_I;
GTP cyclohydrolase I. This family includes GTP cyclohydrolase enzymes and a family of related bacterial proteins including Swiss:Q46920.. +PF04670 Gtr1/RagA G protein conserved region
Pfam-B_4577 (release 7.5). GTR1 was first identified in S. cerevisiae as a suppressor of a mutation in RCC1.\. Biochemical analysis revealed that Gtr1 is in fact a G protein of the Ras family. The RagA/B proteins are the human homologues of Gtr1. Included in this family is the human Rag C, a novel protein that has been shown to interact with RagA/B [1,2,3,4].. +PF04138 GtrA-like protein
Members of this family are predicted to be integral membrane proteins with three or four transmembrane spans. They are involved in the synthesis of cell surface polysaccharides. The GtrA family are a subset of this family. GtrA is predicted to be an integral membrane protein with 4 transmembrane spans. It is involved is in O antigen modification by Shigella flexneri bacteriophage X (SfX), but does not determine the specificity of glucosylation. Its function remains unknown, but it may play a role in translocation of undecaprenyl phosphate linked glucose (UndP-Glc) across the cytoplasmic membrane . Another member of this family is a DTDP-glucose-4-keto-6-deoxy-D-glucose reductase, which catalyses the conversion of dTDP-4-keto-6-deoxy-D-glucose to dTDP-D-fucose, which is involved in the biosynthesis of the serotype-specific polysaccharide antigen of Actinobacillus actinomycetemcomitans Y4 (serotype b) . This family also includes the teichoic acid glycosylation protein, GtcA, which is a serotype-specific protein in some Listeria innocua and monocytogenes strains. Its exact function is not known, but it is essential for decoration of cell wall teichoic acids with glucose and galactose .. +PF00211 guanylate_cyc;
Adenylate and Guanylate cyclase catalytic domain. +PF00625 Guanylate kinase
+PF02058 Guanylin precursor
+PF05120 Gas vesicle protein G
These proteins are involved in the formation of gas vesicles ( ).. +PF05121 Gas vesicle protein K
These proteins are involved in the formation of gas vesicles ( ).. +PF02213 GYF domain
Alignment kindly provided by SMART. The GYF domain is named because of the presence of Gly-Tyr-Phe residues. The GYF domain is a proline-binding domain in CD2-binding protein Swiss:O95400.. +PF02895 Signal transducing histidine kinase, homodimeric domain
This helical bundle domain is the homodimer interface of the signal transducing histidine kinase family.. +PF03030 Inorganic H+ pyrophosphatase
Pfam-B_1050 (release 6.4). The H+ pyrophosphatase is an transmembrane proton pump involved in establishing the H+ electrochemical potential difference between the vacuole lumen and the cell cytosol. Vacuolar-type H(+)-translocating inorganic pyrophosphatases have long been considered to be restricted to plants and to a few species of photo-trophic bacteria. However, in recent investigations, these pyrophosphatases have been found in organisms as disparate as thermophilic Archaea and parasitic protists .. +PF01725 Ham1 family
Pfam-B_2030 (release 4.1). This family consists of the HAM1 protein Swiss:P47119 and hypothetical archaeal bacterial and C. elegans proteins. HAM1 controls 6-N-hydroxylaminopurine (HAP) sensitivity and mutagenesis in S. cerevisiae Swiss:P47119 . The HAM1 protein protects the cell from HAP, either on the level of deoxynucleoside triphosphate or the DNA level by a yet unidentified set of reactions .. +PF04388 Hamartin protein
This family includes the hamartin protein which is thought to function as a tumour suppressor. The hamartin protein interacts with the tuberin protein Pfam:PF03542. Tuberous sclerosis complex (TSC) is an autosomal dominant disorder and is characterised by the presence of hamartomas in many organs, such as brain, skin, heart, lung, and kidney. It is caused by mutation either TSC1 or TSC2 tumour suppressor gene. TSC1 encodes a protein, hamartin, containing two coiled-coil regions, which have been shown to mediate binding to tuberin. The TSC2 gene codes for tuberin Pfam:PF03542. These two proteins function within the same pathway(s) regulating cell cycle, cell growth, adhesion, and vesicular trafficking .. +PF01567 Hantavirus glycoprotein G1
Pfam-B_399 (release 4.0). The medium (M) genome segment of hantaviruses (family Bunyaviridae) encodes the two virion glycoproteins. G1 and G2, as a precursor protein in the complementary sense RNA. . +PF01561 Hantavirus glycoprotein G2
Pfam-B_401 (release 4.0). The medium (M) genome segment of hantaviruses (family Bunyaviridae) encodes the two virion glycoproteins. G1 and G2, as a precursor protein in the complementary sense RNA. . +PF00846 Hantavirus nucleocapsid protein
Pfam-B_310 (release 3.0). +PF03866 Hydrophobic abundant protein (HAP)
Expression of HAP is thought to be developmentally regulated and possibly involved in spherule cell wall formation . . +PF01543 Hepatitis C virus capsid protein
+PF01542 Hepatitis C virus core protein
The viral core protein forms the internal viral coat that encapsidates the genomic RNA and is enveloped in a host cell-derived lipid membrane. The core protein has been shown, by yeast two-hybrid assay to interact with cellular DEAD box helicases . The N terminus of the core protein is involved in transcriptional repression .. +PF01539 Hepatitis C virus envelope glycoprotein E1
+PF01560 Hepatitis C virus non-structural protein E2/NS1
The hypervariable region of the E2/NS1 region of hepatitis C virus varies greatly between viral isolates. E2 is thought to encode a structurally unconstrained envelope protein .. +PF01538 Hepatitis C virus non-structural protein NS2
The viral genome is translated into a single polyprotein of about 3000 amino acids. Generation of the mature non-structural proteins relies on the activity of viral proteases. Cleavage at the NS2/NS3 junction is accomplished by a metal-dependent autoprotease encoded within NS2 and the N-terminus of NS3 [1,2].. +PF02907 HCV_NS3;
Hepatitis C virus NS3 protease. Griffiths-Jones SR, Knutson S. Hepatitis C virus NS3 protein is a serine protease which has a trypsin-like fold. The non-structural (NS) protein NS3 is one of the NS proteins involved in replication of the HCV genome. NS2-3 proteinase, a zinc-dependent enzyme, performs a single proteolytic cut to release the N-terminus of NS3. The action of NS3 proteinase (NS3P), which resides in the N-terminal one-third of the NS3 protein, then yields all remaining non-structural proteins. The C-terminal two-thirds of the NS3 protein contain a helicase. The functional relationship between the proteinase and helicase domains is unknown. NS3 has a structural zinc-binding site and requires cofactor NS4A.. +PF01006 Hepatitis C virus non-structural protein NS4a
Pfam-B_315 (release 3.0). NS4a forms an integral part of the NS3 serine protease, as it is required in a number of cases as a cofactor of cleavage [1,3]. It has also been reported that NS4a interacts with NS4b and NS3 to form a multi-subunit replicase complex .. +PF01001 Hepatitis C virus non-structural protein NS4b
Pfam-B_315 (release 3.0). No precise function has been assigned to NS4b. However, it is known that NS4b interacts with NS4a and NS3 to form a large replicase complex to direct the viral RNA replication .. +PF01506 Hepatitis C virus non-structural 5a protein membrane anchor
The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. The N-terminal region of the NS5a protein has been used in the construction of the alignment for this family. The C-terminal region has not been included because it is too heterogeneous.. +PF04618 HD-ZIP protein N terminus
This family consists of the N termini of plant homeobox-leucine zipper proteins. Its function is unknown.. +PF02329 Histidine carboxylase PI chain
Pfam-B_19599 (release 5.2). Histidine carboxylase catalyses the formation of histamine from histidine. Cleavage of the proenzyme PI chain yields two subunits, alpha and beta, which arrange as a hexamer (alpha beta)6. . +PF02924 Bacteriophage lambda head decoration protein D
+PF01517 Hepatitis delta virus delta antigen
Pfam-B_808 (release 4.0). The hepatitis delta virus (HDV) encodes a single protein, the hepatitis delta antigen (HDAg). The central region of this protein has been shown to bind RNA . Several interactions are also mediated by a coiled-coil region at the N terminus of the protein .. +PF02985 HEAT repeat
The HEAT repeat family is related to armadillo/beta-catenin-like repeats (see Pfam:PF00514).. +PF03130 PBS lyase HEAT-like repeat
Pfam-B_172 (release 6.5). This family contains a short bi-helical repeat that is related to Pfam:PF02985. Cyanobacteria and red algae harvest light energy using macromolecular complexes known as phycobilisomes (PBS), peripherally attached to the photosynthetic membrane. The major components of PBS are the phycobiliproteins. These heterodimeric proteins are covalently attached to phycobilins: open-chain tetrapyrrole chromophores, which function as the photosynthetic light-harvesting pigments. Phycobiliproteins differ in sequence and in the nature and number of attached phycobilins to each of their subunits. This family includes the lyase enzymes that specifically attach particular phycobilins to apophycobiliprotein subunits. The most comprehensively studied of these is the CpcE/F lyase Swiss:P31967 Swiss:P31968, which attaches phycocyanobilin (PCB) to the alpha subunit of apophycocyanin . Similarly, MpeU/V attaches phycoerythrobilin to phycoerythrin II, while CpeY/Z is thought to be involved in phycoerythrobilin (PEB) attachment to phycoerythrin (PE) I (PEs I and II differ in sequence and in the number of attached molecules of PEB: PE I has five, PE II has six) . All the reactions of the above lyases involve an apoprotein cysteine SH addition to a terminal delta 3,3'-double bond. Such a reaction is not possible in the case of phycoviolobilin (PVB), the phycobilin of alpha-phycoerythrocyanin (alpha-PEC). It is thought that in this case, PCB, not PVB, is first added to apo-alpha-PEC, and is then isomerised to PVB. The addition reaction has been shown to occur in the presence of either of the components of alpha-PEC-PVB lyase PecE or PecF (or both). The isomerisation reaction occurs only when both PecE and PecF components are present, i.e. the PecE/F phycobiliprotein lyase is also a phycobilin isomerase . Another member of this family is the NblB protein Swiss:Q9Z3G5, whose similarity to the phycobiliprotein lyases was previously noted . This constitutively expressed protein is not known to have any lyase activity. It is thought to be involved in the coordination of PBS degradation with environmental nutrient limitation. It has been suggested that the similarity of NblB to the phycobiliprotein lyases is due to the ability to bind tetrapyrrole phycobilins via the common repeated motif .. +PF00632 HECT-domain (ubiquitin-transferase)
The name HECT comes from Homologous to the E6-AP Carboxyl Terminus.. +PF03451 HELP motif
The founding member of the EMAP protein family is the 75 kDa Echinoderm Microtubule-Associated Protein, so-named for its abundance in sea urchin, sand dollar and starfish eggs. The Hydrophobic EMAP-Like Protein (HELP) motif was identified initially in the human EMAP-Like Protein 2 (EML2) and subsequently in the entire EMAP Protein family. The HELP motif is approximately 60-70 amino acids in length and is conserved amongst metazoans. Although the HELP motif is hydrophobic, there is no evidence that EMAP-Like Proteins are membrane-associated. All members of the EMAP-Like Protein family, identified to-date, are constructed with an amino terminal HELP motif followed by a WD domain . In C. elegans, EMAP-Like Protein-1 (ELP-1) is required for touch sensation indicating that ELP-1 may play a role in mechanosensation . The localization of ELP-1 to microtubules and adhesion sites implies that ELP-1 may transmit forces between the body surface and the touch receptor neurons.. +PF03996 Hemagglutinin esterase
Pfam-B_505 (release 5.5). +PF02710 Hemagglutinin domain of haemagglutinin-esterase-fusion glycoprotein
Pfam-B_505 (release 5.5). +PF00509 Haemagglutinin
Pfam-B_26 (release 1.0). Hemagglutinin from influenza virus causes membrane fusion of the viral membrane with the host membrane. Fusion occurs after the host cell internalises the virus by endocytosis. The drop of pH causes release of a hydrophobic fusion peptide and a large conformational change leading to membrane fusion.. +PF01126 Heme_oxygnease;
+PF00372 hemocyanin;
Hemocyanin, copper containing domain. This family includes arthropod hemocyanins and insect larval storage proteins.. +PF03723 hemocyanin_C;
Hemocyanin, ig-like domain. This family includes arthropod hemocyanins and insect larval storage proteins.. +PF03722 hemocyanin_N;
Hemocyanin, all-alpha domain. This family includes arthropod hemocyanins and insect larval storage proteins.. +PF00045 hemopexin;
Hemopexin is a heme-binding protein that transports heme to the liver. Hemopexin-like repeats occur in vitronectin and some matrix metallopeptidases family (matrixins). The HX repeats of some matrixins bind tissue inhibitor of metallopeptidases (TIMPs).. +PF05171 Haemin-degrading HemS.ChuX domain
The Yersinia enterocolitica O:8 periplasmic binding-protein- dependent transport system consisted of four proteins: the periplasmic haemin-binding protein HemT, the haemin permease protein HemU, the ATP-binding hydrophilic protein HemV and the haemin-degrading protein HemS (this family). The structure for HemS has been solved and consists of a tandem repeat of this domain.. +PF00906 Hepatitis core antigen
Pfam-B_8 (release 3.0). The core antigen of hepatitis viruses possesses a carboxyl terminus rich in arginine. On this basis it was predicted that the core antigen would bind DNA . There is some experimental evidence to support this .. +PF01771 Herpesvirus alkaline exonuclease
Pfam-B_822 (release 4.2). This family includes various alkaline exonucleases from members of the herpesviridae. Alkaline exonuclease appears to have an important role in the replication of herpes simplex virus .. +PF04793 BRRF1-like protein
Pfam-B_6247 (release 7.5). Family of herpesvirus proteins including Epstein-barr virus protein BBRF1. . +PF05109 Herpes virus major outer envelope glycoprotein (BLLF1)
Pfam-B_6348 (release 7.7). This family consists of the BLLF1 viral late glycoprotein, also termed gp350/220. It is the most abundantly expressed glycoprotein in the viral envelope of the Herpesviruses and is the major antigen responsible for stimulating the production of neutralising antibodies in vivo .. +PF04633 Herpesvirus BMRF2 protein
Pfam-B_5353 (release 7.5). +PF04682 Herpesvirus BTRF1 protein conserved region
Pfam-B_4518 (release 7.5). Herpesvirus protein.. +PF04929 Herpes DNA replication accessory factor
Pfam-B_5837 (release 7.6). Replicative DNA polymerases are capable of polymerising tens of thousands of nucleotides without dissociating from their DNA templates. The high processivity of these polymerases is dependent upon accessory proteins that bind to the catalytic subunit of the polymerase or to the substrate. The Epstein-Barr virus (EBV) BMRF1 protein is an essential component of the viral DNA polymerase and is absolutely required for lytic virus replication . BMRF1 is also a transactivator . This family is predicted to have a UL42 like structure .. +PF01673 Herpesvirus putative major envelope glycoprotein
Pfam-B_1084 (release 4.1). This family consists of probable major envelope glycoproteins from members of the herpesviridae including herpes simplex virus, human cytomegalovirus and varicella-zoster virus. Members of the herpesviridae have a dsDNA genome and do not have a RNA stage during there replication. . +PF02480 Alphaherpesvirus glycoprotein E
Pfam-B_849 (release 5.4). Glycoprotein E (gE) of Alphaherpesvirus forms a complex with glycoprotein I (gI) (Pfam:PF01688), functioning as an immunoglobulin G (IgG) Fc binding protein. gE is involved in virus spread but is not essential for propagation .. +PF01688 Alphaherpesvirus glycoprotein I
Pfam-B_1222 (release 4.1). This family consists of glycoprotein I form various members of the alphaherpesvirinae these include herpesvirus, varicella-zoster virus and pseudorabies virus. Glycoprotein I (gI) is important during natural infection, mutants lacking gI produce smaller lesions at the site of infection and show reduced neuronal spread . gI forms a heterodimeric complex with gE; this complex displays Fc receptor activity (binds to the Fc region of immunoglobulin) . Glycoproteins are also important in the production of virus-neutralising antibodies and cell mediated immunity . The alphaherpesvirinae have a dsDNA gnome and have no RNA stage during viral replication.. +PF01528 Herpesvirus glycoprotein M
Pfam-B_929 (release 4.0). The herpesvirus glycoprotein M (gM) is an integral membrane protein predicted to contain 8 transmembrane segments . Glycoprotein M is not essential for viral replication .. +PF01537 Herpesvirus glycoprotein D/GG/GX domain
Pfam-B_603 (release 4.0). This domain is found in several Herpes viruses glycoproteins. This is a family includes glycoprotein-D (gD or gIV) which is common to herpes simplex virus types 1 and 2, as well as equine herpes, bovine herpes and Marek's disease virus. Glycoprotein-D has been found on the viral envelope and the plasma membrane of infected cells. and gD immunisation can produce an immune response to bovine herpes virus (BHV-1). This response is stronger than that of the other major glycoproteins gB (gI) and gC (gIII) in BHV-1. Glycoprotein G (gG)is one of the seven external glycoproteins of HSV1 and HSV2. This family also contains the glycoprotein GX, (gX), initially identified in Pseudorabies virus.. +PF02489 Herpesvirus glycoprotein H
Pfam-B_1142 (release 5.4). Herpesvirus glycoprotein H (gH) is a virion associated envelope glycoprotein . Complex formation between gH and gL has been demonstrated in both virions and infected cells .. +PF02689 Helicase
Pfam-B_607 (release 5.5). This family consists of Helicases from the Herpes viruses. Helicases are responsible for the unwinding of DNA and are essential for replication and completion of the viral life cycle.. +PF03324 Herpesvirus DNA helicase/primase complex associated protein
Pfam-B_3676 (release 6.5) & Pfam-B_4951 (release 14.0). This family includes HSV UL8, EHV-1 54, VZV 52 AND HCMV 102.. +PF03585 Herpesvirus ICP4-like protein C-terminal region
Pfam-B_1422 (release 7.0). The immediate-early protein ICP4 (infected-cell polypeptide 4) is required for efficient transcription of early and late viral genes and is thus essential for productive infection. ICP4 is a large phosphoprotein that binds DNA in a sequence specific manner as a homodimer. ICP4 represses transcription from LAT, ICP4 and ORF-P that have high-affinity a ICP4 binding site that spans the transcription initiation site. ICP4 proteins have two highly conserved regions, this family contains the C-terminal region that probably acts as an enhancer for the N-terminal region .. +PF03584 Herpesvirus ICP4-like protein N-terminal region
Pfam-B_1422 (release 7.0). The immediate-early protein ICP4 (infected-cell polypeptide 4) is required for efficient transcription of early and late viral genes and is thus essential for productive infection. ICP4 is a large phosphoprotein that binds DNA in a sequence specific manner as a homodimer. ICP4 represses transcription from LAT, ICP4 and ORF-P that have high-affinity a ICP4 binding site that spans the transcription initiation site. ICP4 proteins have two highly conserved regions, this family contains the N-terminal region that contains sites for DNA binding and homodimerisation .. +PF03361 Herpes virus intermediate/early protein 2/3
Pfam-B_2178 (release 6.6). These viral sequences are similar to UL117 protein of human and chimpanzee cytomegalovirus, and to intermediate/early proteins 2 and 3 of certain herpes viruses. UL117 is thought to be a glycoprotein that is expressed at early and late times after infection . This region is close to the C-terminus of the protein and may be a transmembrane region .. +PF03363 Herpesvirus leader protein
Pfam-B_1664 (release 6.6). +PF03122 Herpes virus major capsid protein
Pfam-B_600 (release 6.5). This family represents the major capsid protein (MCP) of herpes viruses. The capsid shell consists of 150 MCP hexamers and 12 MCP pentamers. One pentamer is found at each of the 12 apices of the icosahedral shell, and the hexamers form the edges and 20 faces .. +PF04797 Herpesvirus dUTPase protein
Pfam-B_6280 (release 7.5). +PF02399 Origin of replication binding protein
Pfam-B_1518 (release 5.4). This Pfam family represents the herpesvirus origin of replication binding protein, probably involved in DNA replication.. +PF03325 Herpesvirus polymerase accessory protein
Pfam-B_3097 (release 6.5). The same proteins are also known as polymerase processivity factors.. +PF04846 Herpesvirus pp38 phosphoprotein
Pfam-B_4545 (release 7.6). This protein represents a conserved region found in most herpesvirus pp38 phosphoproteins.. +PF04637 Herpesvirus phosphoprotein 85 (HHV6-7 U14/HCMV UL25)
Pfam-B_5418 (release 7.5). This family includes UL25 proteins from HCMV, as well as U14 proteins from HHV 6 and HHV7. These 85 kD phosphoproteins appear to act as structural antigens, but their precise function is otherwise unknown.. +PF03326 Herpesvirus transcription activation factor (transactivator)
Pfam-B_3658 (release 6.5). This family includes EBV BRLF1 and similar ORF 50 proteins from other herpesviruses.. +PF04843 Herpesvirus tegument protein, N-terminal conserved region
Pfam-B_3992 (release 7.6). +PF04523 Herpes virus tegument protein U30
This family is named after the human herpesvirus protein, but has been characterised in cytomegalovirus as UL47. Cytomegalovirus UL47 is a component of the tegument, which is a protein layer surrounding the viral capsid. UL47 co-precipitates with UL48 and UL69 tegument proteins, and the major capsid protein UL86. A UL47-containing complex is thought to be involved in the release of viral DNA from the disassembling virus particle .. +PF04541 Herpesvirus virion protein U34
The virion proteins in this family include membrane phosphoprotein-like proteins such as UL34, Epstein-Barr and R50, from dsDNA viruses, no RNA stage, Herpesvirales. The family Herpes_BFRF1, Pfam:PF05900, has been merged in.. +PF04533 Herpes virus U44 protein
This is a family of proteins from dsDNA beta-herpesvirinae and gamma-herpesvirinae viruses. The function is not known, and the proteins are named variously as U44, BSRF1, UL71, and M71. The family BSRF1 has been merged into this.. +PF04529 Herpesvirus U59 protein
The proteins in this family have no known function. Cytomegalovirus UL88 is also a member of this family.. +PF03580 Herpesvirus UL14-like protein
Pfam-B_2982 (release 7.0). This is a family of Herpesvirus proteins including UL14. UL14 protein is a minor component of the virion tegument and is expressed late in infection. UL14 protein can influence the intracellular localisation patterns of a number of proteins belonging to the capsid or the DNA encapsidation machinery .. +PF04559 Herpesvirus UL17 protein
UL17 protein is required for DNA cleavage and packaging in herpes viruses. It has been shown to associate with immature B-type capsids , and is required for the the localisation of capsids and capsid proteins to the intranuclear sites where viral DNA is cleaved and packaged . In the virion, UL17 is a component of the tegument, which is a protein layer surrounding the viral capsid .. +PF04544 Herpesvirus egress protein UL20
UL20 is predicted to be a transmembrane protein with multiple membrane spans. It is involved in the trans-cellular transport of enveloped virions, and is therefore important for viral egress. However, UL20 operates in different cellular compartments and different stages of egress in pseudorabies virus and herpes simplex virus. This is thought to be due to differences in egress pathways between these two viruses .. +PF01646 Herpes virus protein UL24
Pfam-B_946 (release 4.1). This family consists of various herpes virus proteins; the gene 20 product, U49 protein, UL24 protein and BXRF1. The UL24 gene (product of the 24th ORF) is not essential for virus replication, mutants with lesions in UL24 show a reduced ability to replicate in tissue culture and have reduced thymidine kinase activity as the UL24 gene overlaps with thymidine kinase .. +PF02760 HIN-200/IF120x domain
This domain has no know function. It is found in one or two copies per protein, and is found associated with the PAAD/DAPIN domain Pfam:PF02758.. +PF03369 Herpesvirus UL3 protein
Pfam-B_2492 (release 6.6). +PF02718 Herpesvirus UL31-like protein
Pfam-B_1786 (release 5.5). This is a family of Herpesvirus proteins including UL31 (Swiss:P10215), UL53 (Swiss:P16794), and the product of ORF 69 in some strains (e.g. Swiss:O36420). The proteins in this family have no known function.. +PF03581 Herpesvirus UL33-like protein
Pfam-B_1115 (release 7.0). This is a family of Herpesvirus proteins including UL33 Swiss:P10217 ,UL51 Swiss:P16792. The proteins in this family are involved in packaging viral DNA.. +PF03586 Herpesvirus UL36 tegument protein
Pfam-B_3425 (release 7.0). The UL36 open reading frame (ORF) encodes the largest herpes simplex virus type 1 (HSV-1) protein, a 270-kDa polypeptide designated VP1/2, which is also a component of the virion tegument. A null mutation in the UL36 gene of herpes simplex virus type 1 results in accumulation of unenveloped DNA-filled capsids in the cytoplasm of infected cells . This family only covers a small central part of this large protein.. +PF03277 Herpesvirus UL4 family
Pfam-B_4461 (release 6.5). +PF05072 Herpesvirus UL43 protein
Pfam-B_5928 (release 7.7). UL43 genes are expressed with true-late (gamma2) kinetics and have been identified as a virion tegument component .. +PF03387 Herpesvirus UL46 protein
Pfam-B_2545 (release 6.6). +PF03362 Herpesvirus UL47 protein
Pfam-B_2182 (release 6.6). +PF04823 Herpes_UL49;
Herpesvirus UL49 tegument protein. Pfam-B_3850 (release 7.6). +PF04540 Herpesvirus UL51 protein
UL51 protein is a virion protein. In pseudorabies virus, UL51 (Swiss:Q85227) was identified as a component of the capsid . In herpes simplex virus type 1 there is evidence for post-translational modification of UL51 .. +PF04537 Herpesvirus UL55 protein
In infected cells, UL55 is associated with the nuclear matrix, and found adjacent to compartments containing the capsid protein ICP35. UL55 was not detected in assembled virions. It is thought that UL55 may play a role in virion assembly or maturation .. +PF04534 Herpesvirus UL56 protein
In herpes simplex virus type 2, UL56 is thought to be a tail-anchored type II membrane protein involved in vesicular trafficking. The C terminal hydrophobic region is required for association with the cytoplasmic membrane, and the N terminal proline-rich region is important for the translocation of UL56 to the Golgi apparatus and cytoplasmic vesicles .. +PF01763 Herpesvirus UL6 like
Pfam-B_878 (release 4.2). This family consists of various proteins from the herpesviridae that are similar to herpes simplex virus type I UL6 virion protein. UL6 is essential for cleavage and packaging of the viral genome .. +PF01677 Herpesvirus UL7 like
Pfam-B_1086 (release 4.1). This family consists of various functionally undefined proteins from the herpesviridae and UL7 from bovine herpes virus [1,2]. UL7 is not essential for virus replication in cell culture, and is found localised in the cytoplasm of infected cells accumulated around the nucleus but could not be detected in purified virions . Members of the herpesviridae have a dsDNA genome and do not have a RNA stage during there replication.. +PF03554 gpUL73;
UL73 viral envelope glycoprotein . Pfam-B_3001 (release 7.0). This family groups together the viral proteins BLRF1, U46, 53, and UL73. The UL73-like envelope glycoproteins, which associates in a high molecular mass complex with its counterpart, gM, induce neutralising antibody responses in the host. These glycoprotein are highly polymorphic, particularly in the N-terminal region .. +PF01802 Herpesvirus VP23 like capsid protein
Pfam-B_1435 (release 4.2). This family consist of various capsid proteins from members of the herpesviridae. The capsid protein VP23 in herpes simplex virus forms a triplex together with VP19C these fit between and link together adjacent capsomers as formed by VP5 and VP26 . VP3 along with the scaffolding proteins helps to form normal capsids by defining the curvature of the shell and size of the particle .. +PF03327 Herpesvirus capsid shell protein VP19C
Pfam-B_3451 (release 6.5). +PF01521 HesB-like;HesB;
Iron-sulphur cluster biosynthesis. Pfam-B_518 (release 4.0). This family is involved in iron-sulphur cluster biosynthesis . Its members include proteins that are involved in nitrogen fixation such as the HesB and HesB-like proteins .. +PF02444 HEV_ORF2;
Hepatitis E virus ORF-2 (Putative capsid protein). Pfam-B_1896 (release 5.4). The Hepatitis E virus (HEV) genome is a single-stranded, positive-sense RNA molecule of approximately 7.5 kb . Three open reading frames (ORF) were identified within the HEV genome: ORF1 encodes non-structural proteins, ORF2 encodes the putative structural protein(s) , and ORF3 encodes a protein of unknown function. ORF2 contains a consensus signal peptide sequence at its amino terminus and a capsid-like region with a high content of basic amino acids similar to that seen with other virus capsid proteins .. +PF02455 Hexon-associated protein (IIIa)
Pfam-B_2076 (release 5.4). The major capsid protein of the adenovirus strain is also known as a hexon. This is a family of hexon-associated proteins (protein IIIa).. +PF00349 hexokinase;
Hexokinase (EC:2.7.1.1) contains two structurally similar domains represented by this family and Pfam:PF03727. Some members of the family have two copies of each of these domains.. +PF03727 hexokinase2;
Hexokinase (EC:2.7.1.1) contains two structurally similar domains represented by this family and Pfam:PF00349. Some members of the family have two copies of each of these domains.. +PF03559 NDP-hexose 2,3-dehydratase
Pfam-B_1070 (release 7.0). This family includes a range of proteins from antibiotic production pathways. The family includes gra-ORF27 Swiss:Q9ZA32 product that probably functions at an early step, most likely as a dTDP-4-keto-6- deoxyglucose-2,3-dehydratase . Its homologues include dnmT from the daunorubicin biosynthetic gene cluster in S. peucetius , a similar gene from the daunomycin biosynthetic cluster in Streptomyces sp. strain C5 Swiss:Q53880 , eryBVI from the erythromycin cluster in S. erythraea and snoH from the nogalamycin cluster in S. nogalater. The proteins in this family are composed of two copies of a 200 amino acid long unit that may be a structural domain.. +PF04209 homogentisate 1,2-dioxygenase
TIGRFAMs (release 2.0);. Homogentisate dioxygenase cleaves the aromatic ring during the metabolic degradation of Phe and Tyr. Homogentisate dioxygenase deficiency causes alkaptonuria. The structure of homogentisate dioxygenase shows that the enzyme forms a hexamer arrangement comprised of a dimer of trimers. The active site iron ion is coordinated near the interface between the trimers .. +PF01085 Hedgehog amino-terminal signalling domain
Pfam-B_1424 (release 3.0). For the carboxyl Hint module, see Pfam:PF01079. Hedgehog is a family of secreted signal molecules required for embryonic cell differentiation.. +PF00730 Endonuclease_3;
HhH-GPD superfamily base excision DNA repair protein. Pfam-B_854 (release 2.1). This family contains a diverse range of structurally related DNA repair proteins. The superfamily is called the HhH-GPD family after its hallmark Helix-hairpin-helix and Gly/Pro rich loop followed by a conserved aspartate . This includes endonuclease III, EC:4.2.99.18 and MutY an A/G-specific adenine glycosylase, both have a C terminal 4Fe-4S cluster. The family also includes 8-oxoguanine DNA glycosylases such as Swiss:P53397. The methyl-CPG binding protein MBD4 Swiss:Q9Z2D7 also contains a related domain that is a thymine DNA glycosylase. The family also includes DNA-3-methyladenine glycosylase II EC:3.2.2.21 and other members of the AlkA family.. +PF03753 Human herpesvirus 6 immediate early protein
Pfam-B_1006 (release 7.0). The proteins in this family are poorly characterised, but an investigation has indicated that the immediate early protein is required the down-regulation of MHC class I expression in dendritic cells. Human herpesvirus 6 immediate early protein is also referred to as U90.. +PF03486 HI0933-like protein
+PF04588 Hypoxia induced protein conserved region
Pfam-B_4868 (release 7.5). This family is found in proteins thought to be involved in the response to hypoxia. Family members mostly come from diverse eukaryotic organisms however eubacterial members have been identified. This region is found at the N-terminus of the member proteins which are predicted to be transmembrane .. +PF01355 High potential iron-sulfur protein
+PF00713 Hirudin
Pfam-B_707 (release 2.1). +PF02098 Tick histamine binding protein
+PF00977 Histidine biosynthesis protein
Pfam-B_1089 (release 3.0). Proteins involved in steps 4 and 6 of the histidine biosynthesis pathway are contained in this family. Histidine is formed by several complex and distinct biochemical reactions catalysed by eight enzymes. The enzymes in this Pfam entry are called His6 and His7 in eukaryotes and HisA and HisF in prokaryotes. The structure of HisA is known to be a TIM barrel fold. In some archaeal HisA proteins the TIM barrel is composed of two tandem repeats of a half barrel e.g. Swiss:P05325 . This family belong to the common phosphate binding site TIM barrel family .. +PF00815 Histidinol dehydrogenase
Pfam-B_1358 (release 2.1). +PF00125 histone;
Core histone H2A/H2B/H3/H4. +PF01230 HIT domain
Prosite & Pfam-B_8474 (Release 8.0). +PF00816 H-NS histone family
Pfam-B_1651 (release 2.1). +PF01870 DUF50;
Archaeal holliday junction resolvase (hjc). This family of archaebacterial proteins are holliday junction resolvases (hjc gene) . The Holliday junction is an essential intermediate of homologous recombination. This protein is the archaeal equivalent of RuvC but is not sequence similar.. +PF02110 Hydroxyethylthiazole kinase family
+PF03865 HlyB;
Haemolysin secretion/activation protein ShlB/FhaC/HecB. This family represents a group of sequences that are related to ShlB from Serratia marcescens. ShlB is an outer membrane protein pore involved in the Type Vb or Two-partner secretion system where it is functions to secrete and activate the haemolysin ShlA. The activation of ShlA occurs during secretion when ShlB imposes a conformational change in the inactive haemolysin to form the active protein . . +PF02794 RTX toxin acyltransferase family
Pfam-B_1230 (Pfam 6.0). Members of this family are enzymes EC:2.3.1.-. involved in fatty acylation of the protoxins (HlyA) at lysine residues, thereby converting them to the active toxin. Acyl-acyl carrier protein (ACP) is the essential acyl donor. This family show a number of conserved residues that are possible candidates for participation in acyl transfer. Site-directed mutagenesis of the single conserved histidine residue in Swiss:P06736 resulted in complete inactivation of the enzyme .. +PF00529 HlyD family secretion protein
MRC-LMB Genome group. +PF03201 H2-forming N5,N10-methylene-tetrahydromethanopterin dehydrogenase
Pfam-B_2929 (release 6.5). +PF01101 HMG14 and HMG17
+PF00505 HMG_box; MaoC_dehydrat_N;
HMG (high mobility group) box. Pfam-B_8 (release 1.0). +PF01154 HMG_CoA_synt;
Hydroxymethylglutaryl-coenzyme A synthase N terminal. +PF00682 HMGL-like
Pfam-B_71 (release 2.1). This family contains a diverse set of enzymes. These include various aldolases and a region of pyruvate carboxylase. . +PF00423 Haemagglutinin-neuraminidase
Pfam-B_171 (release 1.0). +PF04814 Hepatocyte nuclear factor 1 (HNF-1), N terminus
Pfam-B_2624 (release 7.6). This family consists of the N terminus of homeobox-containing transcription factor HNF-1. This region contains a dimerisation sequence and an acidic region that may be involved in transcription activation. Mutations and the common Ala/Val 98 polymorphism in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) .. +PF04813 Hepatocyte nuclear factor 1 (HNF-1), alpha isoform C terminus
Pfam-B_2624 (release 7.6). This family consists of an alternative C terminus of homeobox-containing transcription factor HNF-1, found in the HNF-1A isoform. Different isoforms of HNF-1 are generated by the differential use of polyadenylation sites and by alternative splicing.\. The C-terminal region of HNF-1 is responsible for the activation of transcription, and HNF-1A, which has this C-terminal extension, transactivates less well than the B and C isoforms . Mutations and polymorphisms in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) .. +PF04812 Hepatocyte nuclear factor 1 (HNF-1), beta isoform C terminus
Pfam-B_2624 (release 7.6). This family consists of a region found within the alpha isoform and at the C terminus of the beta isoform of the homeobox-containing transcription factor of HNF-1. Different isoforms of HNF-1 are generated by the differential use of polyadenylation sites and by alternative splicing. The C-terminal region of HNF-1 is responsible for the activation of transcription . Mutations and polymorphisms in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) .. +PF01844 HNH endonuclease
+PF01848 Hok/gef family
+PF05102 holin_BlyA;
TIGRFAMs (release 2.0);. BlyA, a small holin found in Borrelia circular plasmids that is encoded by a prophage. BlyA contains two largely hydrophobic helices and a highly charged C-terminus and is membrane associated .. +PF05204 Homing endonuclease
Homing endonucleases are encoded by mobile DNA elements that are found inserted within host genes in all domains of life.. +PF05203 Hom_end-associated Hint
Homing endonucleases are encoded by mobile DNA elements that are found inserted within host genes in all domains of life. The crystal structure of the homing nuclease PI-Sce revealed two domains: an endonucleolytic centre resembling the C-terminal domain of Drosophila melanogaster Hedgehog protein, and a a second domain containing the protein-splicing active site. This Domain corresponds to the latter protein-splicing domain.. +PF00046 homeobox;
+PF00742 Homoserine dehydrogenase
Pfam-B_459 (release 2.1). +PF00103 hormone;
Somatotropin hormone family. +PF00123 hormone2;
This family contains glucagon, GIP, secretin and VIP.. +PF00159 hormone3;
Pancreatic hormone peptide. +PF00220 hormone4;
Neurohypophysial hormones, N-terminal Domain. C-terminal is in hormone5. +PF00184 hormone5;
Neurohypophysial hormones, C-terminal Domain. N-terminal Domain is in hormone5. +PF00236 hormone6;
Glycoprotein hormone. +PF04617 Hox9 activation region
This family constitutes the N termini of the paralogous homeobox proteins HoxA9, HoxB9, HoxC9 and HoxD9. The N terminal region is found to act as a transcription activation region. Btg1 and Btg2 - the B-cell translocation gene products - may function as cofactors for Hoxb9-mediated transcription. The Btg proteins modulate Hoxb9 transcriptional activity by recruiting a multiprotein Ccr4-like complex .. +PF01856 Helicobacter outer membrane protein
Pfam-B_395 (release 4.2). This family seems confined to Helicobacter. It is predicted to be an outer membrane protein based on its pattern of alternating hydrophobic amino acids similar to porins .. +PF02521 Putative outer membrane protein
Pfam-B_1230 (release 5.4). This family consists of putative outer membrane proteins from Helicobacter pylori (campylobacter pylori). . +PF03328 HpcH/HpaI aldolase/citrate lyase family
Pfam-B_3076 (release 6.5) & Pfam-B_2811 (release 14.0). This family includes 2,4-dihydroxyhept-2-ene-1,7-dioic acid aldolase and 4-hydroxy-2-oxovalerate aldolase.. +PF04982 HPP family
These proteins are integral membrane proteins with four transmembrane spanning helices. The most conserved region of the alignment is a motif HPP. The function of these proteins is uncertain but they may be transporters.. +PF01288 7,8-dihydro-6-hydroxymethylpterin-pyrophosphokinase (HPPK)
+PF02603 Hpr_kinase;
HPr Serine kinase N terminus. This family represents the N-terminal region of Hpr Serine/threonine kinase PtsK. This kinase is the sensor in a multicomponent phospho-relay system in control of carbon catabolic repression in bacteria . This kinase in unusual in that it recognises the tertiary structure of its target and is a member of a novel family unrelated to any previously described protein phosphorylating enzymes . X-ray analysis of the full-length crystalline enzyme from Staphylococcus xylosus at a resolution of 1.95 A shows the enzyme to consist of two clearly separated domains that are assembled in a hexameric structure resembling a three-bladed propeller. The blades are formed by two N-terminal domains each, and the compact central hub assembles the C-terminal kinase domains .. +PF01627 Hpt domain
Pfam-B_971 (release 4.1). The histidine-containing phosphotransfer (HPt) domain is a novel protein module with an active histidine residue that mediates phosphotransfer reactions in the two-component signaling systems. A multistep phosphorelay involving the HPt domain has been suggested for these signaling pathways. The crystal structure of the HPt domain of the anaerobic sensor kinase ArcB has been determined . The domain consists of six alpha helices containing a four-helix bundle-folding. The pattern of sequence similarity of the HPt domains of ArcB and components in other signaling systems can be interpreted in light of the three-dimensional structure and supports the conclusion that the HPt domains have a common structural motif both in prokaryotes and eukaryotes. In S. cerevisiae ypd1p this domain has been shown to contain a binding surface for Ssk1p (response regulator receiver domain containing protein Pfam:PF00072) .. +PF01628 HrcA protein C terminal domain
Pfam-B_1133 (release 4.1). HrcA is found to negatively regulate the transcription of heat shock genes [1,2]. HrcA contains an amino terminal helix-turn-helix domain, however this corresponds to the carboxy terminal domain.. +PF04877 HrpZ;
Pfam-B_6141 (release 7.6). HrpZ from the plant pathogen Pseudomonas syringae binds to lipid bilayers and forms a cation-conducting pore in vivo. This pore-forming activity may allow nutrient release or delivery of virulence factors during bacterial colonisation of host plants . The family of hairpinN proteins, Harpin, has been merged into this family. HrpN is a virulence determinant which elicits lesion formation in Arabidopsis and tobacco and triggers systemic resistance in Arabidopsis .. +PF02218 Repeat in HS1/Cortactin
Pfam-B_5631 (Release 5.2). The function of this repeat is unknown. Seven copies are found in cortactin Swiss:Q14247 and four copies are found in HS1 Swiss:P14317. The repeats are always found amino terminal to an SH3 domain Pfam:PF00018.. +PF00447 HSF-type DNA-binding
+PF00011 Hsp20/alpha crystallin family
+PF01430 Hsp33 protein
Hsp33 is a molecular chaperone, distinguished from all other known chaperones by its mode of functional regulation. Its activity is redox regulated. Hsp33 is a cytoplasmically localised protein with highly reactive cysteines that respond quickly to changes in the redox environment. Oxidising conditions like H2O2 cause disulfide bonds to form in Hsp33, a process that leads to the activation of its chaperone function .. +PF00012 Hsp70 protein
Hsp70 chaperones help to fold many proteins. Hsp70 assisted folding involves repeated cycles of substrate binding and release. Hsp70 activity is ATP dependent. Hsp70 proteins are made up of two regions: the amino terminus is the ATPase domain and the carboxyl terminus is the substrate binding region.. +PF00183 Hsp90 protein
+PF04119 Heat shock protein 9/12
Pfam-B_14318 (release 7.3);. These heat shock proteins (Hsp9 and Hsp12) are strongly expressed, an increase of 100 fold, upon entry into stationary phase in yeast [1,2].. +PF04213 Htaa;
This domain is found in HtaA, a secreted protein implicated in iron acquisition and transport .. +PF00126 Bacterial regulatory helix-turn-helix protein, lysR family
+PF04967 HTH DNA binding domain
+PF01022 HTH_ArsR_family;
Bacterial regulatory protein, arsR family. Pfam-B_139 (release 3.0). Members of this family contains a DNA binding 'helix-turn-helix' motif. This family includes other proteins which are not included in the Prosite definition.. +PF01418 Helix-turn-helix domain, rpiR family
Pfam-B_3373 (release 2.1). This domain contains a helix-turn-helix motif . The best characterised member of this family is Swiss:P39266. RpiR is a regulator of the expression of rpiB gene.. +PF02796 Helix-turn-helix domain of resolvase
+PF00165 HTH_2;
Bacterial regulatory helix-turn-helix proteins, AraC family. In the absence of arabinose, the N-terminal arm of AraC binds to the DNA binding domain (Pfam:PF00165) and helps to hold the two DNA binding domains in a relative orientation that favours DNA looping. In the presence of arabinose, the arms bind over the arabinose on the dimerisation domain, thus freeing the DNA-binding domains. The freed DNA-binding domains are then able to assume a conformation suitable for binding to the adjacent DNA sites that are utilised when AraC activates transcription, and hence AraC ceases looping the DNA when arabinose is added [1-2].. +PF04204 Homoserine O-succinyltransferase
TIGRFAMs (release 2.0);. +PF04955 HupE / UreJ protein
This family of proteins are hydrogenase / urease accessory proteins. The alignment contains many conserved histidines that are likely to be involved in nickel binding. The members usually have five membrane-spanning regions.. +PF01455 HupF/HypC family
Prodom_3112 (release 99.1). +PF04809 HupH hydrogenase expression protein, C-terminal conserved region
Pfam-B_3701 (release 7.6). This family represents a C-terminal conserved region found in these bacterial proteins necessary for hydrogenase synthesis. Their precise function is unknown .. +PF01750 Hydrogenase maturation protease
Pfam-B_548 (release 4.2). The family consists of hydrogenase maturation proteases. In E. coli HypI the hydrogenase maturation protease is involved in processing of HypE the large subunit of hydrogenases 3, by cleavage of its C-terminal .. +PF01968 Hydantoinase;
Hydantoinase/oxoprolinase. This family includes the enzymes hydantoinase and oxoprolinase EC:3.5.2.9. Both reactions involve the hydrolysis of 5-membered rings via hydrolysis of their internal imide bonds .. +PF02538 Hydantoinase B/oxoprolinase
This family includes N-methylhydaintoinase B which converts hydantoin to N-carbamyl-amino acids, and 5-oxoprolinase (Swiss:P97608) EC:3.5.2.9 which catalyses the formation of L-glutamate from 5-oxo-L-proline. These enzymes are part of the oxoprolinase family and are related to Pfam:PF01968.. +PF01185 Fungal hydrophobin
+PF01155 Hydrogenase expression/synthesis hypA family
Four conserved cysteines lie either side of the least conserved region.. +PF01924 Hydrogenase formation hypA family
HypD is involved in hydrogenase formation. It contains many possible metal binding residues, which may bind to nickel. Transposon Tn5 insertions into hypD resulted in R. leguminosarum mutants that lacked any hydrogenase activity in symbiosis with peas .. +PF02494 HYR domain
This domain is known as the HYR (Hyalin Repeat) domain, after the protein hyalin that is composed exclusively of this repeat. This domain probably corresponds to a new superfamily in the immunoglobulin fold. The function of this domain is uncertain it may be involved in cell adhesion .. +PF01608 I/LWEQ domain
I/LWEQ domains bind to actin. It has been shown that the I/LWEQ domains from mouse talin Swiss:P26039 and yeast Sla2p Swiss:P33338 interact with F-actin . I/LWEQ domains can be placed into four major groups based on sequence similarity: (1) Metazoan talin; (2) Dictyostelium TalA/TalB Swiss:P54633 and SLA110; (3) metazoan Hip1p Swiss:O00291; and (4) yeast Sla2p Swiss:P33338. The domain has four conserved blocks, the name of the domain is derived from the initial conserved amino acid of each of the four blocks .. +PF04568 Mitochondrial ATPase inhibitor, IATP
ATP synthase inhibitor prevents the enzyme from switching to ATP hydrolysis during collapse of the electrochemical gradient, for example during oxygen deprivation ATP synthase inhibitor forms a one to one complex with the F1 ATPase, possibly by binding at the alpha-beta interface. It is thought to inhibit ATP synthesis by preventing the release of ATP . The minimum inhibitory region for bovine inhibitor (Swiss:P01096) is from residues 39 to 72 . The inhibitor has two oligomeric states, dimer (the active state) and tetramer. At low pH , the inhibitor forms a dimer via antiparallel coiled coil interactions between the C terminal regions of two monomers.\. At high pH, the inhibitor forms tetramers and higher oligomers by coiled coil interactions involving the N terminus and inhibitory region, thus preventing the inhibitory activity .. +PF01749 Importin beta binding domain
Pfam-B_544 (release 4.2). This family consists of the importin alpha (karyopherin alpha), importin beta (karyopherin beta) binding domain. The domain mediates formation of the importin alpha beta complex; required for classical NLS import of proteins into the nucleus, through the nuclear pore complex and across the nuclear envelope. Also in the alignment is the NLS of importin alpha which overlaps with the IBB domain .. +PF03617 IBV 3A protein
Pfam-B_3183 (release 7.0). The gene product of gene 3 from Avian infectious bronchitis virus. Currently, the function of this protein remains unknown.. +PF03622 IBV 3B protein
Pfam-B_3190 (release 7.0). Product of ORF 3B from Avian infectious bronchitis virus (IBV). Currently, the function of this protein remains unknown .. +PF03620 IBV 3C protein
Pfam-B_3232 (release 7.0). Product of ORF 3C from Avian infectious bronchitis virus (IBV). Currently, the function of this protein remains unknown.. +PF04629 Islet cell autoantigen ICA69, C-terminal domain
Pfam-B_5314 (release 7.5). This family includes a 69 kD protein which has been identified as an islet cell autoantigen in type I diabetes mellitus . Its precise function is unknown.. +PF03921 ICAM_N-terminal;
Intercellular adhesion molecule (ICAM), N-terminal domain. ICAMs normally functions to promote intercellular adhesion and signalling. However, The N-terminal domain of the receptor binds to the rhinovirus 'canyon' surrounding the icosahedral 5-fold axes, during the viral attachment process . This family is a family that is part of the Ig superfamily and is therefore related to the family ig (Pfam:PF00047). . +PF00818 Ice nucleation protein repeat
Pfam-B_2 (release 3.0). +PF00656 ICE_p20;
+PF00463 Isocitrate lyase family
+PF03517 ICln_channel;
Regulator of volume decrease after cellular swelling. Griffiths-Jones SR, Coggill P. ICln is a ubiquitously expressed multi-functional protein that plays a critical role in regulating volume decrease in cells after cellular swelling. In plants, ICln induces Cl- currents [1,4,5], thus regulating Cl- homoeostasis in eukaryotes [2,3]. Structurally, the fold resembles a pleckstrin homology fold, on of whose roles is to recruit and tether their host protein to the cell membrane; and although the surface charges of the ICln fold are not equivalent to those of the PH domain, ICln can be phosphorylated in vitro and the PH-nature of the domain may be the part involving it in the transposition from cytosol to cell membrane during cytotonic swelling .. +PF04140 Isoprenylcysteine carboxyl methyltransferase (ICMT) family
Pfam-B_15304 (release 7.3) & Pfam-B_5114 (Release 8.0). The isoprenylcysteine o-methyltransferase (EC:2.1.1.100) family carry out carboxyl methylation of cleaved eukaryotic proteins that terminate in a CaaX motif. In Saccharomyces cerevisiae this methylation is carried out by Ste14p, an integral endoplasmic reticulum membrane protein. Ste14p is the founding member of the isoprenylcysteine carboxyl methyltransferase (ICMT) family, whose members share significant sequence homology .. +PF03971 Monomeric isocitrate dehydrogenase
NADP(+)-dependent isocitrate dehydrogenase (ICD) is an important enzyme of the intermediary metabolism, as it controls the carbon flux within the citric acid cycle and supplies the cell with 2-oxoglutarate EC:1.1.1.42 and NADPH for biosynthetic purposes .. +PF01231 Indoleamine 2,3-dioxygenase
+PF02479 IE68;
Herpesvirus immediate early protein. Pfam-B_2276 (release 5.4). This regulatory protein is expressed from an immediate early gene in the cell cycle of herpesvirus. The protein is known by various names including IE-68, US1, ICP22 and IR4.. +PF01008 Initiation factor 2 subunit family
Pfam-B_1302 (release 3.0). This family includes initiation factor 2B alpha, beta and delta subunits from eukaryotes, initiation factor 2B subunits 1 and 2 from archaebacteria and some proteins of unknown function from prokaryotes. Initiation factor 2 binds to Met-tRNA, GTP and the small ribosomal subunit. Members of this family have also been characterised as 5-methylthioribose- 1-phosphate isomerases, an enzyme of the methionine salvage pathway. The crystal structure of Ypr118w, a non-essential, low-copy number gene product from Saccharomyces cerevisiae, reveals a dimeric protein with two domains and a putative active site cleft .. +PF00707 IF3;
Translation initiation factor IF-3, C-terminal domain. Pfam-B_629 (release 2.1). +PF01652 Eukaryotic initiation factor 4E
Pfam-B_1315 (release 4.1). +PF00932 IF_C_term; IF_tail;
The lamin-tail domain (LTD), which has an immunoglobulin (Ig) fold, is found in Nuclear Lamins, Chlo1887 from Chloroflexus, and several bacterial proteins where it occurs with membrane associated hydrolases of the metallo-beta-lactamase,synaptojanin, and calcineurin-like phosphoesterase superfamilies .. +PF00714 Interferon gamma
Pfam-B_615 (release 2.1). +PF00047 Immunoglobulin domain
Members of the immunoglobulin superfamily are found in hundreds of proteins of different functions. Examples include antibodies, the giant muscle kinase titin and receptor tyrosine kinases. Immunoglobulin-like domains may be involved in protein-protein and protein-ligand interactions. The Pfam alignments do not include the first and last strand of the immunoglobulin-like domain.. +PF02395 IGA1;
Immunoglobulin A1 protease. Pfam-B_540 (release 5.2). This family consists of immunoglobulin A1 protease proteins. The immunoglobulin A1 protease cleaves immunoglobulin IgA and is found in pathogenic bacteria such as Neisseria gonorrhoeae . Not all of the members of this family are IgA proteases Swiss:O32555 from E. coli O157:H7 cleaves human coagulation factor V and Swiss:O88093 is a hemoglobin protease from E. coli EB1 .. +PF00219 Insulin-like growth factor binding protein
+PF01378 B domain
This domain is found as a tandem repeat in Streptococcal cell surface proteins, such as the IgG binding protein G.. +PF00475 Imidazoleglycerol-phosphate dehydratase
+PF00218 Indole-3-glycerol phosphate synthase
+PF05049 Interferon-inducible GTPase (IIGP)
Pfam-B_5519 (release 7.7). Interferon-inducible GTPase (IIGP) is thought to play a role in in intracellular defence. IIGP is predominantly associated with the Golgi apparatus and also localises to the endoplasmic reticulum and exerts a distinct role in IFN-induced intracellular membrane trafficking or processing .. +PF00340 interleukin-1;
This family includes interleukin-1 and interleukin-18.. +PF00726 Interleukin 10
Pfam-B_885 (release 2.1). +PF03039 Interleukin-12 alpha subunit
Pfam-B_2071 (release 6.4). Interleukin 12 (IL-12) is a disulphide-bonded heterodimer consisting of a 35kDa alpha subunit (e.g. Swiss:P29459) and a 40kDa beta subunit (e.g. Swiss:P29460). It is involved in the stimulation and maintenance of Th1 cellular immune responses, including the normal host defence against various intracellular pathogens, such as Leishmania, Toxoplasma, measles virus and HIV. IL-12 also has an important role in pathological Th1 responses, such as in inflammatory bowel disease and multiple sclerosis. Suppression of IL-12 activity in such diseases may have therapeutic benefit. On the other hand, administration of recombinant IL-12 may have therapeutic benefit in conditions associated with pathological Th2 responses [1,2].. +PF02372 Interleukin 15
Pfam-B_2545 (release 5.4). Interleukin-15 (IL-15) is a cytokine that possesses a variety of biological functions, including stimulation and maintenance of cellular immune responses .. +PF02394 Interleukin-1 propeptide
Pfam-B_1500 (release 5.2). The Interleukin-1 cytokines are translated as precursor proteins. The N terminal approx. 115 amino acids form a propeptide that is cleaved off to release the active interleukin-1.. +PF00715 Interleukin 2
Pfam-B_709 (release 2.1). +PF02059 Interleukin-3
+PF00727 Interleukin 4
Pfam-B_833 (release 2.1). +PF02025 Interleukin 5
+PF00489 IL-6;
Interleukin-6/G-CSF/MGF family. +PF01415 Interleukin 7/9 family
Ponting CP, Schultz J, Bork P. IL-7 is a cytokine that acts as a growth factor for early lymphoid cells of both B- and T-cell lineages. IL-9 is a multi-functional cytokine that, although originally described as a T-cell growth factor, its function in T-cell response remains unclear.. +PF00048 il8;
Small cytokines (intecrine/chemokine), interleukin-8 like. Includes a number of secreted growth factors and interferons involved in mitogenic, chemotactic, and inflammatory activity. Structure contains two highly conserved disulfide bonds.. +PF01787 Ilarvirus coat protein
Pfam-B_1131 (release 4.2). This family consists of various coat proteins from the ilarviruses part of the Bromoviridae, members include apple mosaic virus and prune dwarf virus. The ilarvirus coat protein is required to initiate replication of the viral genome in host plants . Members of the Bromoviridae have a positive stand ssRNA genome with no DNA stage in there replication.. +PF01450 Acetohydroxy acid isomeroreductase, catalytic domain
Prodom_2380 (release 99.1). Acetohydroxy acid isomeroreductase catalyses the conversion of acetohydroxy acids into dihydroxy valerates. This reaction is the second in the synthetic pathway of the essential branched side chain amino acids valine and isoleucine.. +PF00920 Dehydratase family
Pfam-B_1309 (release 3.0). +PF05046 Mitochondrial large subunit ribosomal protein (Img2)
Pfam-B_17929 (release 7.6). This family of proteins have been identified as part of the mitochondrial large ribosomal subunit in yeast .. +PF04156 IncA protein
Pfam-B_2718 (release 7.3). Chlamydia trachomatis is an obligate intracellular bacterium that develops within a parasitophorous vacuole termed an inclusion. The inclusion is non-fusogenic with lysosomes but intercepts lipids from a host cell exocytic pathway. Initiation of chlamydial development is concurrent with modification of the inclusion membrane by a set of C. trachomatis-encoded proteins collectively designated Incs. One of these Incs, IncA, is functionally associated with the homotypic fusion of inclusions . This family probably includes members of the wider Inc family rather than just IncA.. +PF02387 IncFII RepA protein family
Pfam-B_1209 (release 5.2). This protein is plasmid encoded and found to be essential for plasmid replication .. +PF02974 Protease inhibitor Inh
The Inh inhibitor is secreted into the periplasm where its presumed physiological function is to protect periplasmic proteins against the action of secreted proteases . A range of proteases including A, B and C from E. chrysanthemi, alkaline protease from Pseudomonas aeruginosa and the 50 kDa protease from Serratia marcescens are inhibited.. +PF00876 Ogre;
Pfam-B_779 (release 3.0). This family includes the drosophila proteins Ogre and shaking-B, and the C. elegans proteins Unc-7 and Unc-9. Members of this family are integral membrane proteins which are involved in the formation of gap junctions . This family has been named the Innexins .. +PF01658 Myo-inositol-1-phosphate synthase
Pfam-B_959 (release 4.1). This is a family of myo-inositol-1-phosphate synthases. Inositol-1-phosphate catalyses the conversion of glucose-6- phosphate to inositol-1-phosphate, which is then dephosphorylated to inositol . Inositol phosphates play an important role in signal transduction.. +PF00459 inositol_P;
Inositol monophosphatase family. +PF03488 Ins_beta_nem;
Nematode insulin-related peptide beta type. +PF03811 Ins_element1; HTH_Tnp_IS1;
InsA N-terminal domain. This appears to be a short zinc binding domain found in IS1 InsA family protein. It is found at the N-terminus of the protein and may be a DNA-binding domain.. +PF00049 ins;
Insulin/IGF/Relaxin family. Superfamily includes insulins; relaxins; insulin-like growth factor; and bombyxin. All are secreted regulatory hormones. Disulfide rich, all-alpha fold. Alignment includes B chain, linker (which is processed out of the final product), and A chain.. +PF00552 integrase; Integrase;
Integrase DNA binding domain. Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain. The central domain is the catalytic domain Pfam:PF00665. This domain is the carboxyl terminal domain that is a non-specific DNA binding domain .. +PF02920 integrase_DNA;
DNA binding domain of tn916 integrase. +PF00357 integrin_A;
Integrin alpha cytoplasmic region. This family contains the short intracellular region of integrin alpha chains.. +PF00362 integrin_B;
Integrin, beta chain. Integrins have been found in animals and their homologues have also been found in cyanobacteria, probably due to horizontal gene transfer . The sequences repeats have been trimmed due to an overlap with EGF.. +PF00143 interferon;
Interferon alpha/beta domain. +PF03487 Interleukin_13;
+PF01348 Type II intron maturase
Pfam-B_105 (release 3.0). Group II introns use intron-encoded reverse transcriptase, maturase and DNA endonuclease activities for site-specific insertion into DNA . Although this type of intron is self splicing in vitro they require a maturase protein for splicing in vivo. It has been shown that a specific region of the aI2 intron is needed for the maturase function . This region was found to be conserved in group II introns and called domain X .. +PF03519 Invasion protein B family
+PF04741 InvH outer membrane lipoprotein
Pfam-B_3503 (release 7.5). This family represents the Salmonella outer membrane lipoprotein InvH. The molecular function of this protein is unknown, but it is required for the localisation to outer membrane of InvG, which is involved in a type III secretion apparatus mediating host cell invasion [1,2].. +PF00904 Involucrin repeat
Pfam-B_1158 (release 3.0). +PF02121 Phosphatidylinositol transfer protein
Along with the structurally unrelated Sec14p family (found in Pfam:PF00650), this family can bind/exchange one molecule of phosphatidylinositol (PI) or phosphatidylcholine (PC) and thus aids their transfer between different membrane compartments. There are three sub-families - all share an N-terminal PITP-like domain, whose sequence is highly conserved. It is described as consisting of three regions. The N-terminal region is thought to bind the lipid and contains two helices and an eight-stranded, mostly antiparallel beta-sheet. An intervening loop region, which is thought to play a role in protein-protein interactions, separates this from the C-terminal region, which exhibits the greatest sequence variation and may be involved in membrane binding. PITP alpha (Swiss:Q00169) has a 16-fold greater affinity for PI than PC. Together with PITP beta (Swiss:P48739), it is expressed ubiquitously in all tissues .. +PF03278 IpaB/EvcA family
Pfam-B_4003 (release 6.5). This family includes IpaB, which is an invasion plasmid antigen from Shigella , as well as EvcA from E. coli Swiss:Q9ZNF1. Members of this family seem to be involved in pathogenicity of some enterobacteria. However the exact function of this component is not clear.. +PF04979 Protein phosphatase inhibitor 2 (IPP-2)
Pfam-B_5306 (release 7.6). Protein phosphotase inhibitor 2 (IPP-2) is a phosphoprotein conserved among all eukaryotes, and it appears in both the nucleus and cytoplasm of tissue culture cells .. +PF01715 IPP transferase
Pfam-B_1875 (release 4.1). This is a family of IPP transferases EC:2.5.1.8 also known as tRNA delta(2)-isopentenylpyrophosphate transferase. These enzymes modify both cytoplasmic and mitochondrial tRNAs at A(37) to give isopentenyl A(37) .. +PF01745 Isopentenyl transferase
Pfam-B_2229 (release 4.1). Isopentenyl transferase / dimethylallyl transferase synthesises isopentenyladensosine 5'-monophosphate, a cytokinin that induces shoot formation on host plants infected with the Ti plasmid .. +PF00605 Interferon regulatory factor transcription factor
This family of transcription factors are important in the regulation of interferons in response to infection by virus and in the regulation of interferon-inducible genes. Three of the five conserved tryptophan residues bind to DNA.. +PF04120 iron_permease;
Low affinity iron permease . Pfam-B_71435 (release 7.3);. +PF02060 Slow voltage-gated potassium channel
+PF00180 isodh;
Isocitrate/isopropylmalate dehydrogenase. +PF04279 Intracellular septation protein A
TIGRFAMs (release 2.0);. +PF01128 UPF0007;
2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase. Members of this family are enzymes which catalyse the formation of 4-diphosphocytidyl-2-C-methyl-D-erythritol from cytidine triphosphate and 2-C-methyl-D-erythritol 4-phosphate (MEP) .. +PF01695 IstB;
IstB-like ATP binding protein. Pfam-B_982 (release 4.1). This protein contains an ATP/GTP binding P-loop motif. It is found associated with IS21 family insertion sequences . The function of this protein is unknown, but it may perform a transposase function .. +PF02189 Immunoreceptor tyrosine-based activation motif
Alignment kindly provided by SMART. +PF01156 Inosine-uridine preferring nucleoside hydrolase
+PF04183 IucA / IucC family
Pfam-B_1982 (release 7.3). IucA and IucC catalyse discrete steps in biosynthesis of the siderophore aerobactin from N epsilon-acetyl-N epsilon-hydroxylysine and citrate . This family represents the N-terminal region. The C-terminal region appears to be related to iron transporter proteins.. +PF01419 Jacalin-like lectin domain
Proteins containing this domain are lectins. It is found in 1 to 6 copies in these proteins. The domain is also found in the animal prostatic spermine-binding protein (Swiss:P15501).. +PF02375 jmjN;
+PF03957 JNK;
Jun-like transcription factor. +PF01486 K-box region
Pfam-B_25 (release 4.0). The K-box region is commonly found associated with SRF-type transcription factors see Pfam:PF00319. The K-box is a possible coiled-coil structure . Possible role in multimer formation .. +PF02960 K1 glycoprotein
Pfam-B_345 (release 6.4). +PF02149 Kinase associated domain 1
+PF02524 KID_repeat;
Pfam-B_1382 (release 5.4). This is family contains the KID repeat as found in Borrelia spirochete RepA / Rep+ proteins. The function of these proteins is unknown. RepA and related Borrelia proteins have been suggested to play an important genus-wide role in the biology of the Borrelia .. +PF00050 kazal;
Kazal-type serine protease inhibitor domain. Usually indicative of serine protease inhibitors. However, kazal-like domains are also seen in the extracellular part of agrins, which are not known to be protease inhibitors. Kazal domains often occur in tandem arrays. Small alpha+beta fold containing three disulphides. Alignment also includes a single domain from transporters in the OATP/PGT family Swiss:P46721.. +PF03522 K-Cl Co-transporter type 1 (KCC1)
+PF03520 KCNQ1_channel;
KCNQ voltage-gated potassium channel. This family matches to the C-terminal tail of KCNQ type potassium channels.. +PF03812 2-keto-3-deoxygluconate permease
TIGRFAMs, Griffiths-Jones SR. +PF03814 Potassium-transporting ATPase A subunit
TIGRFAMs, Griffiths-Jones SR. +PF02669 K+-transporting ATPase, c chain
This family consists of K+-transporting ATPase, c chain, KdpC. KdpC forms strong interactions with the KdpA subunit, serving to assemble and stabilise the Kdp complex . It has been suggested that KdpC could be one of the connecting links between the energy providing subunit KdpB and the K+-transporting subunit KdpA . The K+ transport system actively transports K+ ions via ATP hydrolysis.. +PF02702 Osmosensitive K+ channel His kinase sensor domain
This is a family of KdpD sensor kinase proteins that regulate the kdpFABC operon responsible for potassium transport . The aligned region corresponds to the N-terminal cytoplasmic part of the protein which may be the sensor domain responsible for sensing turgor pressure .. +PF04962 KduI/IolB family
COG3717 & Pfam-B_11840 (release 10.0). This family includes the 5-keto 4-deoxyuronate isomerase enzyme EC:5.3.1.17 that is involved in pectin degradation. This family aldo includes bacterial Myo-inositol catabolism (IolB) proteins. The Bacillus subtilis inositol operon (iolABCDEFGHIJ) is involved in myo-inositol catabolism. Glucose repression of the iol operon induced by inositol is exerted through catabolite repression mediated by CcpA and the iol induction system mediated by IolR . The exact function of IolB is unknown. Members of this family possess a Cupin like structure.. +PF02422 Keratin
Pfam-B_1920 (release 5.4). This family represents avian keratin proteins , found in feathers, scale and claw.. +PF01500 Keratin, high sulfur B2 protein
Pfam-B_706 (release 4.0). High sulfur proteins are cysteine-rich proteins synthesised during the differentiation of hair matrix cells, and form hair fibres in association with hair keratin intermediate filaments . This family has been divided up into four regions, with the second region containing 8 copies of a short repeat . This family is also known as B2 or KAP1.. +PF04579 Keratin, high-sulphur matrix protein
Pfam-B_4676 (release 7.5). Family of Keratin, high-sulfur matrix proteins. The keratin products of mammalian epidermal derivatives such as wool and hair consist of microfibrils embedded in a rigid matrix of other proteins. The matrix proteins include the high-sulphur and high-tyrosine keratins, having molecular weights of 6-20 kDa, whereas microfibrils contain the larger, low-sulphur keratins (40-56 kDa) .. +PF03882 KicB killing factor
The kicA and kicB genes are found upstream of mukB. It has been suggested that the kicB gene encodes a killing factor and the kicA gene codes for a protein that suppresses the killing function of the kicB gene product . It was also demonstrated that KicA and KicB can function as a post-segregational killing system, when the genes are transferred from the E. coli chromosome onto a plasmid .. +PF04383 KilA-N domain
The amino-terminal module of the D6R/N1R proteins defines a novel, conserved DNA-binding domain (the KilA-N domain) that is found in a wide range of proteins of large bacterial and eukaryotic DNA viruses. The KilA-N domain family also includes the previously defined APSES domain. The KilA-N and APSES domains may also share a common fold with the nucleic acid-binding modules of the LAGLIDADG nucleases and the amino-terminal domains of the tRNA endonuclease .. +PF02172 KIX domain
Pfam-B_4149 (Release 4.2). CBP and P300 bind to the CREB via a domain known as KIX . The KIX domain of CBP also binds to transactivation domains of other nuclear factors including Myb and Jun.. +PF03037 Kinetoplastid membrane protein 11
Pfam-B_1062 (release 6.4). Kinetoplastid membrane protein 11 is a major cell surface glycoprotein of the parasite Leishmania donovani.. +PF03790 KNOX1 domain
Pfam-B_533 (release 7.0). The MEINOX region is comprised of two domains, KNOX1 and KNOX2. KNOX1 plays a role in suppressing target gene expression. KNOX2, essential for function, is thought to be necessary for homo-dimerisation . . +PF03791 KNOX2 domain
Pfam-B_533 (release 7.0). The MEINOX region is comprised of two domains, KNOX1 and KNOX2. KNOX1 plays a role in suppressing target gene expression. KNOX2, essential for function, is thought to be necessary for homo-dimerisation . . +PF00051 kringle;
Swissprot_feature_table. Kringle domains have been found in plasminogen, hepatocyte growth factors, prothrombin, and apolipoprotein A. Structure is disulfide-rich, nearly all-beta.. +PF00197 Trypsin and protease inhibitor
+PF02442 L1L_F9_C19;
Lipid membrane protein of large eukaryotic DNA viruses. Pfam-B_1868 (release 5.4), Iyer L. The four families of large eukaryotic DNA viruses, Poxviridae, Asfarviridae, Iridoviridae, and Phycodnaviridae, referred to collectively as nucleocytoplasmic large DNA viruses or NCLDV, have all been shown to have a lipid membrane, in spite of the major differences in virion structure. The paralogous genes L1R and F9L encode membrane proteins that have a conserved domain architecture, with a single, C-terminal transmembrane helix, and an N-terminal, multiple-disulfide-bonded domain. The conservation of the myristoylated, disulfide-bonded protein L1R/F9L in most of the NCLDV correlates with the conservation of the thiol-disulfide oxidoreductase E10R which, in vaccinia virus, is required for the formation of disulfide bonds in L1R and F9L .. +PF05047 Mitochondrial ribosomal protein L51 / S25 / CI-B8 domain
Pfam-B_9461 (release 7.6). The proteins in this family are located in the mitochondrion. The family includes ribosomal protein L51, and S25. This family also includes mitochondrial NADH-ubiquinone oxidoreductase B8 subunit (CI-B8) EC:1.6.5.3. It is not known whether all members of this family form part of the NADH-ubiquinone oxidoreductase and whether they are also all ribosomal proteins.. +PF04604 Type-A lantibiotic
Pfam-B_4608 (release 7.5). Lantibiotics are antibiotic peptides distinguished by the presence of the rare thioether amino acids lanthionine and/or methyl-lanthionine. They are produced by Gram-positive bacteria as gene-encoded precursor peptides and undergo post-translational modification to generate the mature peptide. Based on their structural and functional features lantibiotics are currently divided into two major groups: the flexible amphiphilic type-A and the rather rigid and globular type-B. Type-A lantibiotics act primarily by pore formation in the bacterial membrane by a mechanism involving the interaction with specific docking molecules such as the membrane precursor lipid II .. +PF02502 Ribose/Galactose Isomerase
Pfam-B_1105 (release 5.4). This family of proteins contains the sugar isomerase enzymes ribose 5-phosphate isomerase B (rpiB), galactose isomerase subunit A (LacA) and galactose isomerase subunit B (LacB). . +PF00356 lacI;
Bacterial regulatory proteins, lacI family. +PF02450 LACT;
Lecithin:cholesterol acyltransferase. Pfam-B_2099 (release 5.4). Lecithin:cholesterol acyltransferase (LCAT) is involved in extracellular metabolism of plasma lipoproteins, including cholesterol.. +PF04369 Lactococcin-like family
Family of bacteriocins from lactic acid bacteria.. +PF01306 LacY proton/sugar symporter
This family is closely related to the sugar transporter family.. +PF00961 Intron_maturase;
LAGLIDADG endonuclease. +PF02264 LamB porin
Pfam-B_4810 (release 5.2). Maltoporin (LamB protein) forms a trimeric structure which facilitates the diffusion of maltodextrins across the outer membrane of Gram-negative bacteria. The membrane channel is formed by an antiparallel beta-barrel .. +PF03746 LamB/YcsF family
This family includes LamB. The lam locus of Aspergillus nidulans consists of two divergently transcribed genes, lamA and lamB, involved in the utilisation of lactams such as 2-pyrrolidinone. Both genes are under the control of the positive regulatory gene amdR and are subject to carbon and nitrogen metabolite repression . The exact molecular function of the proteins in this family is unknown.. +PF02061 Lambda Phage CIII
The CIII protein from bacteriophage lambda is an inhibitor of the FtsH peptidase .. +PF00052 laminin_B;
Laminin B (Domain IV). Swissprot_feature_table. +PF00053 Laminin EGF-like (Domains III and V)
Swissprot_feature_table. This family is like Pfam:PF00008 but has 8 conserved cysteines instead of six.. +PF00054 laminin_G; Laminin_G;
Swissprot_feature_table. +PF00055 laminin_Nterm;
Laminin N-terminal (Domain VI). Swissprot_feature_table. +PF01299 Lysosome-associated membrane glycoprotein (Lamp)
+PF05147 Lanthionine synthetase C-like protein
Pfam-B_6095 (release 7.7). Lanthionines are thioether bridges that are putatively generated by dehydration of Ser and Thr residues followed by addition of cysteine residues within the peptide. This family contains the lanthionine synthetase C-like proteins 1 and 2 which are related to the bacterial lanthionine synthetase components C (LanC). LANCL1 (P40 seven-transmembrane-domain protein) and LANCL2 (testes-specific adriamycin sensitivity protein) are thought to be peptide-modifying enzyme components in eukaryotic cells. Both proteins are produced in large quantities in the brain and testes and may have role in the immune surveillance of these organs . Lanthionines are found in lantibiotics, which are peptide-derived, post-translationally modified antimicrobials produced by several bacterial strains . This region contains seven internal repeats.. +PF04738 Lantibiotic dehydratase, C terminus
Lantibiotics are ribosomally synthesised antimicrobial agents derived from ribosomally synthesised peptides . They are produced by bacteria of the Firmicutes phylum, and include mutacin, subtilin, and nisin. Lantibiotic peptides contain thioether bridges termed lanthionines that are thought to be generated by dehydration of serine and threonine residues followed by addition of cysteine residues . This family constitutes the C-terminus of the enzyme proposed to catalyse the dehydration step , .. +PF04737 Lantibiotic dehydratase, N terminus
Lantibiotics are ribosomally synthesised antimicrobial agents derived from ribosomally synthesised peptides . They are produced by bacteria of the Firmicutes phylum, and include mutacin, subtilin, and nisin. Lantibiotic peptides contain thioether bridges termed lanthionines that are thought to be generated by dehydration of serine and threonine residues followed by addition of cysteine residues . This family constitutes the N-terminus of the enzyme proposed to catalyse the dehydration step , .. +PF00500 late_protein_L1;
Pfam-B_69 (release 1.0). +PF00513 late_protein_L2;
Pfam-B_39 (release 1.0). +PF02354 Latrophilin Cytoplasmic C-terminal region
Pfam-B_874 (release 5.2). This family consists of the cytoplasmic C-terminal region in latrophilin. Latrophilin is a synaptic Ca2+ independent alpha- latrotoxin (LTX) receptor and is a novel member of the secretin family of G-protein coupled receptors that are involved in secretion . Latrophilin mRNA is present only in neuronal tissue . Lactrophillin interacts with G-alpha O .. +PF01273 Lipid_binding_gp;
LBP / BPI / CETP family, N-terminal domain. The N and C terminal domains of the LBP/BPI/CETP family are structurally similar.. +PF02886 Lipid_binding_gp;
LBP / BPI / CETP family, C-terminal domain. The N and C terminal domains of the LBP/BPI/CETP family are structurally similar.. +PF03815 LCCL domain
TIGRFAMs, Griffiths-Jones SR. +PF04072 Leucine carboxyl methyltransferase
Pfam-B_5898 (release 7.3);. Family of leucine carboxyl methyltransferases EC:2.1.1.- . This family may need divides a the full alignment contains a significantly shorter mouse sequence.. +PF04792 V antigen (LcrV) protein
Pfam-B_6155 (release 7.5). Yersinia pestis, the aetiologic agent of plague, secretes a set of environmentally regulated, plasmid pCD1-encoded virulence proteins termed Yops and V antigen (LcrV) by a type III secretion mechanism. LcrV is a multifunctional protein that has been shown to act at the level of secretion control by binding the Ysc inner-gate protein LcrG and to modulate the host immune response by altering cytokine production. LcrV is also necessary for full induction of low-calcium response (LCR) stimulon virulence gene transcription. Family members are not confined to Yersinia pestis [1,2]. . +PF00056 ldh;
lactate/malate dehydrogenase, NAD binding domain. L-lactate dehydrogenases are metabolic enzymes which catalyse the conversion of L-lactate to pyruvate, the last step in anaerobic glycolysis. L-2-hydroxyisocaproate dehydrogenases are also members of the family. Malate dehydrogenases catalyse the interconversion of malate to oxaloacetate. The enzyme participates in the citric acid cycle. L-lactate dehydrogenase is also found as a lens crystallin in bird and crocodile eyes. N-terminus (this family) is a Rossmann NAD-binding fold. C-terminus is an unusual alpha+beta fold.. +PF02615 ldh_2;
Malate/L-lactate dehydrogenase. This family consists of bacterial and archaeal Malate/L-lactate dehydrogenase. L-lactate dehydrogenase, EC:1.1.1.27, catalyses the reaction (S)-lactate + NAD(+) <=> pyruvate + NADH. Malate dehydrogenase, EC:1.1.1.37 and EC:1.1.1.82, catalyses the reactions: (S)-malate + NAD(+) <=> oxaloacetate + NADH, and (S)-malate + NADP(+) <=> oxaloacetate + NADPH respectively.. +PF02866 ldh_C;
lactate/malate dehydrogenase, alpha/beta C-terminal domain. L-lactate dehydrogenases are metabolic enzymes which catalyse the conversion of L-lactate to pyruvate, the last step in anaerobic glycolysis. L-2-hydroxyisocaproate dehydrogenases are also members of the family. Malate dehydrogenases catalyse the interconversion of malate to oxaloacetate. The enzyme participates in the citric acid cycle. L-lactate dehydrogenase is also found as a lens crystallin in bird and crocodile eyes.. +PF00058 ldl_recept_b;
Low-density lipoprotein receptor repeat class B. This domain is also known as the YWTD motif after the most conserved region of the repeat. The YWTD repeat is found in multiple tandem repeats and has been predicted to form a beta-propeller structure .. +PF03760 LEA-group1;
Late embryogenesis abundant (LEA) group 1 . Pfam-B_1549 (release 7.0). Family members are conserved along the entire coding region, especially within the hydrophobic internal 20 amino acid motif, which may be repeated.. +PF03168 Late embryogenesis abundant protein
Mifsud W, Griffiths-Jones SR. Pfam-B_3080 (release 6.5). Different types of LEA proteins are expressed at different stages of late embryogenesis in higher plant seed embryos and under conditions of dehydration stress. The function of these proteins is unknown. This family represents a group of LEA proteins that appear to be distinct from those in Pfam:PF02987. The family DUF1511, Pfam:PF07427, has now been merged into this family.. +PF03242 Late embryogenesis abundant protein
Pfam-B_3170 (release 6.5). Members of this family are similar to late embryogenesis abundant proteins.\. Members of the family have been isolated in a number of different screens. However, the molecular function of these proteins remains obscure.. +PF00059 lectin_c;
Lectin C-type domain. Swissprot_feature_table. This family includes both long and short form C-type. +PF03041 lef-2;
Pfam-B_1773 (release 6.4). The lef-2 gene (for late expression factor 2) from baculovirus is required for expression of late genes. This gene has been shown to be specifically required for expression from the vp39 and polh promoters . LEF-1 is a DNA primase and there is some evidence to suggest that LEF-2 may bind to both DNA and LEF-1 .. +PF03388 Legume-like lectin family
Pfam-B_2789 (release 6.6). Lectins are structurally diverse proteins that bind to specific carbohydrates. This family includes the VIP36 Swiss:P49256 and ERGIC-53 Swiss:P49257 lectins. These two proteins were the first recognised members of a family of animal lectins similar (19-24%) to the leguminous plant lectins . The alignment for this family aligns residues lying towards the N-terminus, where the similarity of VIP36 and ERGIC-53 is greatest. However, while Fiedler and Simons identified these proteins as a new family of animal lectins, our alignment also includes yeast sequences. ERGIC-53 is a 53kD protein, localised to the intermediate region between the endoplasmic reticulum and the Golgi apparatus (ER-Golgi-Intermediate Compartment, ERGIC). It was identified as a calcium-dependent, mannose-specific lectin . Its dysfunction has been associated with combined factors V and VIII deficiency OMIM:227300 OMIM:601567, suggesting an important and substrate-specific role for ERGIC-53 in the glycoprotein- secreting pathway [2,3]. . +PF00139 lectin_legB;
Legume lectin domain. +PF03954 lectin_N;
Hepatic lectin, N-terminal domain. +PF05098 Late expression factor 4 (LEF-4)
Pfam-B_6330 (release 7.7). Late expression factor 4 (LEF-4) is one of the Baculovirus late expression factor proteins. LEF-4 carries out all the enzymatic functions related to mRNA capping .. +PF04941 Late expression factor 8 (LEF-8)
Pfam-B_5130 (release 7.6). Late expression factor 8 (LEF-8) is one of the primary components of RNA polymerase produced by polyhedrosis viruses. LEF-8 shows homology to the second largest subunit of prokaryotic DNA-directed RNA polymerase .. +PF05094 Late expression factor 9 (LEF-9)
Pfam-B_6326 (release 7.7). Late expression factor 9 (LEF-9) is one of the primary components of RNA polymerase produced by baculoviruses. LEF-9 is homologous to the largest beta-subunit of prokaryotic DNA-directed RNA polymerase .. +PF05150 Legionella pneumophila major outer membrane protein precursor
Pfam-B_6492 (release 7.7). This family consists of major outer membrane protein precursors from Legionella pneumophila.. +PF03020 LEM domain
The LEM domain is 50 residues long and is composed of two parallel alpha helices. This domain is found in inner nuclear membrane proteins. It is called the LEM domain after LAP2 Swiss:Q62733, Emerin Swiss:P50402 and Man1.. +PF04011 LemA family
The members of this family are related to the LemA protein Swiss:P71452 . LemA contains an amino terminal predicted transmembrane helix. It has been predicted that the small amino terminus is extracellular . The exact molecular function of this protein is uncertain.. +PF02998 Lentiviral Tat protein
Pfam-B_1519 (release 6.4). This family contains retroviral transactivating (Tat) proteins [1,2], from a variety of Lentiviruses.. +PF02024 Leptin
+PF03588 Leucyl/phenylalanyl-tRNA protein transferase
TIGRFAMs, Griffiths-Jones SR. +PF01819 Levivirus coat protein
The Levivirus coat protein forms the bacteriophage coat that encapsidates the viral RNA. 180 copies of this protein form the virion shell. The MS2 bacteriophage coat protein controls two distinct processes: sequence-specific RNA encapsidation and repression of replicase translation-by binding to an RNA stem-loop structure of 19 nucleotides containing the initiation codon of the replicase gene. The binding of a coat protein dimer to this hairpin shuts off synthesis of the viral replicase, switching the viral replication cycle to virion assembly rather than continued replication .. +PF01726 LexA DNA binding domain
Pfam-B_1975 (release 4.1). This is the DNA binding domain of the LexA SOS regulon repressor which prevents expression of DNA repair proteins. The aligned region contains a variant form of the helix-turn-helix DNA binding motif . This domain is found associated with Pfam:PF00717 the auto-proteolytic domain of LexA EC:3.4.21.88.. +PF01790 Prolipoprotein diacylglyceryl transferase
+PF00556 Antenna complex alpha/beta subunit
+PF04991 LicD family
Pfam-B_5278 (release 7.6). The LICD family of proteins show high sequence similarity and are involved in phosphorylcholine metabolism. There is evidence to show that LicD2 mutants have a reduced ability to take up choline, have decreased ability to adhere to host cells and are less virulent . These proteins are part of the nucleotidyltransferase superfamily .. +PF01291 LIF / OSM family
+PF00549 ligase-CoA;
This family includes the CoA ligases Succinyl-CoA synthetase alpha and beta chains, malate CoA ligase and ATP-citrate lyase. Some members of the family utilise ATP others use GTP.. +PF00412 LIM domain
This family represents two copies of the LIM structural domain.. +PF01803 LIM-domain binding protein
Pfam-B_1352 (release 4.2). The LIM-domain binding protein, binds to the LIM domain Pfam:PF00412 of LIM homeodomain proteins which are transcriptional regulators of development. Nuclear LIM interactor (NLI) / LIM domain-binding protein 1 (LDB1) Swiss:P70662 is located in the nuclei of neuronal cells during development, it is co-expressed with Isl1 in early motor neuron differentiation and has a suggested role in the Isl1 dependent development of motor neurons . It is suggested that these proteins act synergistically to enhance transcriptional efficiency by acting as co-factors for LIM homeodomain and Otx class transcription factors both of which have essential roles in development . The Drosophila protein Chip Swiss:O18353 is required for segmentation and activity of a remote wing margin enhancer . Chip is a ubiquitous chromosomal factor required for normal expression of diverse genes at many stages of development . It is suggested that Chip cooperates with different LIM domain proteins and other factors to structurally support remote enhancer-promoter interactions .. +PF00538 linker_histone;
linker histone H1 and H5 family. Linker histone H1 is an essential component of chromatin structure. H1 links nucleosomes into higher order structures Histone H1 is replaced by histone H5 in some cell types.. +PF04454 Encapsulating protein for peroxidase
The Linocin_M18 is found in eubacteria and archaea [1,2]. These proteins, referred to as encapsulins, form nanocompartments within the bacterium which contain ferritin-like proteins or peroxidases, enzymes involved in oxidative-stress response. These enzymes are targeted to the interior of encapsulins via unique C-terminal extensions .. +PF03583 Secretory lipase
Pfam-B_3085 (release 7.0). These lipases are expressed and secreted during the infection cycle of these pathogens. In particular, C. albicans has a large number of different lipases, possibly reflecting broad lipolytic activity, which may contribute to the persistence and virulence of C. albicans in human tissue .. +PF03279 Bacterial lipid A biosynthesis acyltransferase
Pfam-B_1803 (release 6.5). +PF00151 lipase;
+PF01674 Lipase (class 2)
Pfam-B_968 (release 4.1). This family consists of hypothetical C. elegans proteins and lipases. Lipases or triacylglycerol acylhydrolases hydrolyse ester bonds in triacylglycerol giving diacylglycerol, monoacylglycerol, glycerol and free fatty acids . Swiss:P37957 is a extracellular lipase from B. subtilis 168 .. +PF03280 Proteobacterial lipase chaperone protein
Pfam-B_4313 (release 6.5). +PF00657 GDSL-like Lipase/Acylhydrolase
Prosite & Pfam-B_543 (Release 7.5). +PF00061 lipocalin;
Lipocalin / cytosolic fatty-acid binding protein family. Prosite and HMM_iterative_training. Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The family also encompasses the enzyme prostaglandin D synthase (EC:5.3.99.2). Alignment subsumes both the lipocalin and fatty acid binding protein signatures from PROSITE. This is supported on structural and functional grounds. The structure is an eight-stranded beta barrel.. +PF00820 Borrelia lipoprotein
Pfam-B_1321 (release 2.1). This family of lipoproteins is found in Borrelia spirochetes. The function of these proteins is uncertain.. +PF03202 Putative mycoplasma lipoprotein, C-terminal region
Pfam-B_2205 (release 6.5). +PF03260 Lepidopteran low molecular weight (30 kD) lipoprotein
Pfam-B_4108 (release 6.5). +PF03330 Lipoprotein_13;
Rare lipoprotein A (RlpA)-like double-psi beta-barrel. Pfam-B_3255 (release 6.5). Rare lipoprotein A (RlpA) contains a conserved region that has the double-psi beta-barrel (DPBB) fold [3,4]. The function of RlpA is not well understood, but it has been shown to act as a prc mutant suppressor in Escherichia coli . The DPBB fold is often an enzymatic domain. The members of this family are quite diverse, and if catalytic this family may contain several different functions. Another example of this domain is found in the N terminus of pollen allergen.. +PF03640 Secreted repeat of unknown function
This family occurs as tandem repeats in a set of lipoproteins. The alignment contains a Y-X4-D motif.. +PF04791 LMBR1-like membrane protein
Pfam-B_6189 (release 7.5). Members of this family are integral membrane proteins that are around 500 residues in length. LMBR1 is not involved in preaxial polydactyly, as originally thought . Vertebrate members of this family may play a role in limb development . A member of this family has been shown to be a lipocalin membrane receptor . +PF03923 Uncharacterized lipoprotein
The function of this presumed lipoprotein is unknown. The family includes E. coli YajG Swiss:P36671.. +PF04200 Lipoprotein associated domain
Pfam-B_3382 (release 7.3). This presumed domain is about 100 amino acids in length. It is found in lipoprotein of unknown function and is greatly expanded in Mycoplasma pulmonis. The domain is found in up to five copies in some proteins. This family also includes the Mycoplasma arthritidis MAA2 variable surface protein. MAA2 is implicated in in cytoadherence and virulence and has been shown to exhibit both size and phase variability .. +PF00921 Borrelia lipoprotein
Pfam-B_1509 (release 3.0). This family of lipoproteins is found in Borrelia spirochetes. The function of these proteins is uncertain.. +PF00938 Lipoprotein;
Pfam-B_1076 (release 3.0). This family of lipoproteins is Mycoplasma specific.. +PF01298 Transferrin binding protein-like solute binding protein
Pfam-B_893 (release 3.0). This family of proteins are distantly related to other families of solute binding proteins.. +PF01441 Lipoprotein
Prodom_1149 (release 99.1). Members of this family are lipoproteins that are probably involved in evasion of the host immune system by pathogens.. +PF01540 Adhesin lipoprotein
Pfam-B_615 (release 4.0). This family consists of the p50 and variable adherence-associated antigen (Vaa) adhesins from Mycoplasma hominis. M. hominis is a mycoplasma associated with human urogenital diseases, pneumonia, and septic arthritis . An adhesin is a cell surface molecule that mediates adhesion to other cells or to the surrounding surface or substrate. The Vaa antigen is a 50-kDa surface lipoprotein that has four tandem repetitive DNA sequences encoding a periodic peptide structure, and is highly immunogenic in the human host . p50 is also a 50-kDa lipoprotein, having three repeats A,B and C, that may be a tetramer of 191-kDa in its native environment .. +PF02030 Hypothetical lipoprotein (MG045 family)
This family includes hypothetical lipoproteins, the amino terminal part of this protein is related to Pfam:PF01547, a family of solute binding proteins. This suggests this family also has a solute binding function.. +PF03305 Mycoplasma MG185/MG260 protein
Pfam-B_4433 (release 6.5). Most of the aligned regions in this family are found towards the middle of the member proteins.. +PF00305 lipoxygenase;
+PF04778 LMP repeated region
Pfam-B_2380 (release 7.6). This family consists of a repeated sequence element found in the LMP group of surface-located membrane proteins of Mycoplasma hominis. The the number of repeats in the protein affects the tendency of cells to spontaneously aggregate. Agglutination may be an important factor in colonisation. Non-agglutinating microorganisms might easily be distributed whereas aggregation might provide a better chance to avoid an antibody response since some of the epitopes may be buried .. +PF01451 Low molecular weight phosphotyrosine protein phosphatase
Prodom_2132 (release 99.1). +PF03548 Outer membrane lipoprotein carrier protein LolA
TIGRFAMs, Griffiths-Jones SR. +PF03550 Outer membrane lipoprotein LolB
TIGRFAMs, Griffiths-Jones SR. +PF04728 Lipoprotein leucine-zipper
This is leucine-zipper is found in the enterobacterial outer membrane lipoprotein LPP. It is likely that this domain oligomerises and is involved in protein-protein interactions. As such it is a bundle of alpha-helical coiled-coils, which are known to play key roles in mediating specific protein-protein interactions for in molecular recognition and the assembly of multi-protein complexes.. +PF02169 LPP20 lipoprotein
This family contains the LPP20 lipoprotein, which is a non-essential class of lipoprotein .. +PF04348 LppC putative lipoprotein
This family includes several bacterial outer membrane antigens, whose molecular function is unknown.. +PF02684 Lipid-A-disaccharide synthetase
This is a family of lipid-A-disaccharide synthetases, EC:2.4.2.128. These enzymes catalyse the reaction: UDP-2,3-bis(3-hydroxytetradecanoyl) glucosamine + 2,3-bis(3-hydroxytetradecanoyl)-beta-D-glucosaminyl 1-phosphate <=> UDP + 2,3-bis(3-hydroxytetradecanoyl)-D-glucosaminyl-1,6 -beta-D-2,3-bis(3-hydroxytetradecanoyl)-beta-D-glucosaminyl 1-phosphate. These enzymes catalyse the fist disaccharide step in the synthesis of lipid-A-disaccharide.. +PF03331 UDP-3-O-acyl N-acetylglycosamine deacetylase
Pfam-B_3666 (release 6.5). The enzymes in this family catalyse the second step in the biosynthetic pathway for lipid A.. +PF02606 Tetraacyldisaccharide-1-P 4'-kinase
This family consists of tetraacyldisaccharide-1-P 4'-kinase also known as Lipid-A 4'-kinase or Lipid A biosynthesis protein LpxK, EC:2.7.1.130. This enzyme catalyses the reaction: ATP + 2,3-bis(3-hydroxytetradecanoyl)-D -glucosaminyl-(beta-D-1,6)-2,3-bis(3-hydroxytetradecanoyl)-D-glucosam inyl beta-phosphate <=> ADP + 2,3,2',3'-tetrakis(3-hydroxytetradecanoyl)-D- glucosaminyl-1,6-beta-D-glucosamine 1,4'-bisphosphate. This enzyme is involved in the synthesis of lipid A portion of the bacterial lipopolysaccharide layer (LPS) . The family contains a P-loop motif at the N terminus.. +PF03788 LrgA family
This family is uncharacterised. It contains the protein LrgA that has been hypothesised to export murein hydrolases .. +PF04172 LrgB-like family
TIGRFAMs (release 2.0);. The two products of the lrgAB operon are potential membrane proteins, and LrgA and LrgB are both thought to control of murein hydrolase activity and penicillin tolerance .. +PF01462 Leucine rich repeat N-terminal domain
Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the N-terminus of tandem leucine rich repeats.. +PF01816 Leucine rich repeat variant
The function of this repeat is unknown. It has an unusual structure of two helices. One is an alpha helix, the other is the much rarer 3-10 helix.. +PF05083 LST-1 protein
Pfam-B_6166 (release 7.7). B144/LST1 is a gene encoded in the human major histocompatibility complex that produces multiple forms of alternatively spliced mRNA and encodes peptides fewer than 100 amino acids in length. B144/LST1 is strongly expressed in dendritic cells. Transfection of B144/LST1 into a variety of cells induces morphologic changes including the production of long, thin filopodia .. +PF00677 Lumazine binding domain
Pfam-B_291 (release 2.1). This domain binds to derivatives of lumazine in some proteins. Some proteins have lost the residues involved in binding lumazine.. +PF00894 Luteovirus coat protein
Pfam-B_123 (release 3.0). +PF02122 Luteo_ORF2;
This family contains polyprotein processing endopeptidases from RNA viruses.. +PF04662 Luteovirus P0 protein
Pfam-B_4444 (release 7.5) & Pfam-B_3579 (release 10.0). This family of proteins may be involved in suppression of PTGS a plant defence mechanism .. +PF01659 Luteovirus putative VPg genome linked protein
Pfam-B_970 (release 4.1). This family consists of several putative genome linked proteins. The genomic RNA of luteoviruses are linked to virally encoded genome proteins (VPg). Open reading frame 4 is thought to encode the VPg in Soybean dwarf luteovirus . Luteoviruses have isometric capsids that contain a positive stand ssRNA genome, they have no DNA stage during their replication.. +PF04443 Acyl-protein synthetase, LuxE
LuxE is an acyl-protein synthetase found in bioluminescent bacteria. LuxE catalyses the formation of an acyl-protein thioester from a fatty acid and a protein.\. This is the second step in the bioluminescent fatty acid reduction system, which converts tetradecanoic acid to the aldehyde substrate of the luciferase-catalysed bioluminescence reaction A conserved cysteine found at position 364 in Photobacterium phosphoreum LuxE (Swiss:Q52100) is thought to be acylated during the transfer of the acyl group from the synthetase subunit to the reductase. The carboxyl terminal of the synthetase is though to act as a flexible arm to transfer acyl groups between the sites of activation and reduction . This family also includes Vibrio cholerae RBFN protein (Swiss:Q06961), which is involved in the biosynthesis of the O-antigen component 3-deoxy-L-glycero-tetronic acid.. +PF02664 S-Ribosylhomocysteinase (LuxS)
This family consists of the LuxS protein involved in autoinducer AI2 synthesis and its hypothetical relatives. S-ribosylhomocysteinase (LuxS) catalyses the cleavage of the thioether bond in S-ribosylhomocysteine (SRH) to produce homocysteine and 4,5-dihydroxy-2,3-pentanedione (DPD), the precursor of type II bacterial quorum sensing molecule. . +PF00206 lyase_1;
+PF02278 Polysaccharide lyase family 8, super-sandwich domain
Pfam-B_4840 (release 5.2). This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen.. +PF02884 Polysaccharide lyase family 8, C-terminal beta-sandwich domain
Pfam-B_4840 (release 5.2). This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen.. +PF00062 lys;
C-type lysozyme/alpha-lactalbumin family. Overington and HMM_iterative_training. Alpha-lactalbumin is the regulatory subunit of lactose synthase, changing the substrate specificity of galactosyltransferase from N-acetylglucosamine to glucose. C-type lysozymes are secreted bacteriolytic enzymes that cleave the peptidoglycan of bacterial cell walls. Structure is a multi-domain, mixed alpha and beta fold, containing four conserved disulfide bonds.. +PF01810 LysE type translocator
Pfam-B_1537 (release 4.2) & Pfam-B_7916 (Release 8.0). This family consists of various hypothetical proteins and an l-lysine exporter LysE Swiss:P94633 from Corynebacterium glutamicum which is proposed to be the first of a novel family of translocators . LysE exports l-lysine from the cell into the surrounding medium and is predicted to span the membrane six times . The physiological function of the exporter is to excrete excess l-Lysine as a result of natural flux imbalances or peptide hydrolysis; and also after artificial deregulation of l-Lysine biosynthesis as used by the biotechnology. industry for the production of l-lysine .. +PF03641 Possible lysine decarboxylase
Pfam-B_741 (release 7.0). The members of this family share a highly conserved motif PGGXGTXXE that is probably functionally important. This family includes proteins annotated as lysine decarboxylases, although the evidence for this is not clear.. +PF02402 Lysis protein
Pfam-B_1555 (release 5.4). +PF04971 Lysis protein S
Pfam-B_7633 (release 7.0). The lysis S protein is a cytotoxic protein forming holes in membranes causing cell lysis. The action of Lysis S is independent of the proportion of acidic phospholipids in the membrane .. +PF01186 Lysyl oxidase
+PF02401 LytB protein
Pfam-B_1515 (release 5.4). The mevalonate-independent 2-C-methyl-D-erythritol 4-phosphate (MEP) pathway for isoprenoid biosynthesis is essential in many eubacteria, plants, and the malaria parasite. The LytB gene is involved in the trunk line of the MEP pathway.. +PF04397 LytTr DNA-binding domain
This domain is found in a variety of bacterial transcriptional regulators. The domain binds to a specific DNA sequence pattern (see ). . +PF02370 M protein repeat
Pfam-B_208 (release 5.2). This short repeat is found in multiple copies in bacterial M proteins. The M proteins bind to IgA and are closely associated with virulence. The M protein has been postulated to be a major group A Streptococcal (GAS) virulence factor because of its contribution to the bacterial resistance to opsonophagocytosis .. +PF03855 M-factor
The M-factor is a pheromone produce upon nitrogen starvation. The production of M-factor is increased by the pheromone signal. The protein undergoes post-translational modification, to remove the C-terminal signal peptide, the carboxy-terminal cysteine residue is carboxy-methylated and S-alkylated, with a farnesyl residue .. +PF05034 MAAL;
Methylaspartate ammonia-lyase N-terminus. Methylaspartate ammonia-lyase EC:4.3.1.2 catalyses the second step of fermentation of glutamate. It is a homodimer. This family represents the N-terminal region of Methylaspartate ammonia-lyase. This domain is structurally related to Pfam:PF03952 . This domain is associated with the catalytic domain Pfam:PF07476.. +PF03281 Mab-21 protein
Pfam-B_4530 (release 6.5). This family contains Mab-21 and Mab-21 like proteins. In C. elegans these proteins are required for several aspects of embryonic development [2-3].. +PF01823 MAC/Perforin domain
The membrane-attack complex (MAC) of the complement system forms transmembrane channels. These channels disrupt the phospholipid bilayer of target cells, leading to cell lysis and death. A number of proteins participate in the assembly of the MAC. Freshly activated C5b binds to C6 to form a C5b-6 complex, then to C7 forming the C5b-7 complex. The C5b-7 complex binds to C8, which is composed of three chains (alpha, beta, and gamma), thus forming the C5b-8 complex. C5b-8 subsequently binds to C9 and acts as a catalyst in the polymerisation of C9. Active MAC has a subunit composition of C5b-C6-C7-C8-C9{n}. Perforin is a protein found in cytolytic T-cell and killer cells. In the presence of calcium, perforin polymerises into transmembrane tubules and is capable of lysing, non-specifically, a variety of target cells. There are a number of regions of similarity in the sequences of complement components C6, C7, C8-alpha, C8-beta, C9 and perforin. The X-ray crystal structure of a MACPF domain reveals that it shares a common fold with bacterial cholesterol dependent cytolysins (Pfam:PF01289) such as perfringolysin O. Three key pieces of evidence suggests that MACPF domains and CDCs are homologous: Functional similarity (pore formation), conservation of three glycine residues at a hinge in both families and conservation of a complex core fold . . +PF03523 Macrophage scavenger receptor
+PF03817 Malonate transporter MadL subunit
TIGRFAMs, Griffiths-Jones SR. +PF03818 Malonate/sodium symporter MadM subunit
TIGRFAMs, Griffiths-Jones SR. +PF02545 Maf-like protein
Maf is a putative inhibitor of septum formation in eukaryotes, bacteria, and archaea.. +PF02792 Mago nashi protein
This family was originally identified in Drosophila and called mago nashi, it is a strict maternal effect, grandchildless-like, gene . The human homologue has been shown to interact with an RNA binding protein Swiss:Q9Y5S9 . An RNAi knockout of the C. elegans homologue causes masculinization of the germ line (Mog phenotype) hermaphrodites, suggesting it is involved in hermaphrodite germ-line sex determination . Mago nashi has been found to be part of the exon-exon junction complex that binds 20 nucleotides upstream of exon-exon junctions .. +PF03082 Male accessory gland secretory protein
Pfam-B_256 (release 6.4). The accessory gland of male insects is a genital tissue that secretes many components of the ejaculatory fluid, some of which affect the female's receptivity to courtship and her rate of oviposition. This protein is expressed exclusively in the male accessory glands of adult Drosophila melanogaster. The proteins are transferred to the female fly during copulation and are rapidly altered in the female genital tract .. +PF04112 Mak10 subunit, NatC N(alpha)-terminal acetyltransferase
Pfam-B_9176 (release 7.3);. NatC N(alpha)-terminal acetyltransferases contains Mak10p, Mak31p and Mak3p subunits. All three subunits are associated with each other to form the active complex . . +PF04874 Mak16 protein C-terminal region
Pfam-B_4960 (release 7.6). The precise function of this eukaryotic protein family is unknown. The yeast orthologues have been implicated in cell cycle progression and biogenesis of 60S ribosomal subunits. The Schistosoma mansoni Mak16 has been shown to target protein transport to the nucleolus .. +PF01274 Malate synthase
+PF02330 Mitochondrial glycoprotein
Pfam-B_17905 (release 5.2). This mitochondrial matrix protein family contains members of the MAM33 family which bind to the globular 'heads' of C1Q. It is thought to be involved in mitochondrial oxidative phosphorylation and in nucleus-mitochondrion interactions .. +PF02157 Mannose-6-phosphate receptor
This family includes both Cation-dependent and cation independent mannose-6-phosphate receptors.. +PF01232 Mannitol_dh_N;
Mannitol dehydrogenase Rossmann domain. +PF01050 Mannose-6-phosphate isomerase
Pfam-B_899 (release 3.0). All of the members of this Pfam entry belong to family 2 of the mannose-6-phosphate isomerases. The type II phosphomannose isomerases are bifunctional enzymes. This Pfam entry covers the isomerase domain. The guanosine diphospho-D-mannose pyrophosphorylase domain is in another Pfam entry, see Pfam:PF00483.. +PF05007 Mannosyltransferase (PIG-M)
Pfam-B_5638 (release 7.6). PIG-M has a DXD motif. The DXD motif is found in many glycosyltransferases that utilise nucleotide sugars. It is thought that the motif is involved in the binding of a manganese ion that is required for association of the enzymes with nucleotide sugar substrates .. +PF01575 MaoC_like;
Pfam-B_297 (release 4.0). The maoC gene is part of a operon with maoA which is involved in the synthesis of monoamine oxidase . The MaoC protein is found to share similarity with a wide variety of enzymes; estradiol 17 beta-dehydrogenase 4, peroxisomal hydratase-dehydrogenase-epimerase, fatty acid synthase beta subunit. Several bacterial proteins that are composed solely of this domain have (R)-specific enoyl-CoA hydratase activity . This domain is also present in the NodN nodulation protein N.. +PF03642 MAP domain
Pfam-B_1396 (release 7.0). This presumed 110 amino acid residue domain is found in multiple copies in MAP (MHC class II analogue protein) Swiss:Q9Z4J2 . The protein has been found in a wide range of extracellular matrix proteins .. +PF02991 MAP1_LC3;
Autophagy protein Atg8 ubiquitin like. Pfam-B_1384 (release 6.4). Light chain 3 is proposed to function primarily as a subunit of microtubule associated proteins 1A and 1B and that its expression may regulate microtubule binding activity . Autophagy is generally known as a process involved in the degradation of bulk cytoplasmic components that are non-specifically sequestered into an autophagosome, where they are sequestered into double-membrane vesicles and delivered to the degradative organelle, the lysosome/vacuole, for breakdown and eventual recycling of the resulting macromolecules. The yeast proteins are involved in the autophagosome, and Atg8 binds Atg19, via its N-terminus and the C-terminus of Atg19.. +PF00414 Neuraxin and MAP1B repeat
+PF01124 FLAP;
This family is has been called MAPEG (Membrane Associated Proteins in Eicosanoid and Glutathione metabolism). It includes proteins such as Prostaglandin E synthase. This enzyme catalyses the synthesis of PGE2 from PGH2 (produced by cyclooxygenase from arachidonic acid). Because of structural similarities in the active sites of FLAP, LTC4 synthase and PGE synthase, substrates for each enzyme can compete with one another and modulate synthetic activity.. +PF01914 UPF0056;
MarC family integral membrane protein. Integral membrane protein family that includes the antibiotic resistance protein MarC. These proteins may be transporters.. +PF02063 MARCKS family
+PF02124 Marek's disease glycoprotein A
+PF01047 MarR family
Pfam-B_269 (release 3.0). The Mar proteins are involved in the multiple antibiotic resistance, a non-specific resistance system. The expression of the mar operon is controlled by a repressor, MarR. A large number of compounds induce transcription of the mar operon. This is thought to be due to the compound binding to MarR, and the resulting complex stops MarR binding to the DNA. With the MarR repression lost, transcription of the operon proceeds . The structure of MarR is known and shows MarR as a dimer with each subunit containing a winged-helix DNA binding motif.. +PF02064 MAS20 protein import receptor
+PF04769 Mating-type protein MAT alpha 1
This family includes Saccharomyces cerevisiae mating type protein alpha 1 (Swiss:P01365). Mat alpha 1 is a transcription activator which activates mating-type alpha-specific genes.\. MAT alpha 1 and MCM 1 bind cooperatively to PQ elements upstream of alpha-specific genes .\. Alpha 1 interacts in vivo with STE12, linking expression of alpha-specific genes to the alpha-pheromone (Pfam:PF04648) response pathway .. +PF01554 UPF0013;
Pfam-B_163 (release 4.0). +PF01824 MatK/TrnK amino terminal region
Pfam-B_30 (release 4.2). The function of this region is unknown.. +PF00661 Viral matrix protein
Pfam-B_128 (release 2.1). Found in Morbillivirus and paramyxovirus, pneumovirus.. +PF03819 MazG nucleotide pyrophosphohydrolase domain
+PF04837 MbeB-like, N-term conserved region
Pfam-B_3854 (release 7.6). This family represents an N-terminal conserved region of MbeB/MobB proteins. These proteins are essential for specific plasmid transfer.. +PF04899 MbeD/MobD like
Pfam-B_5673 (release 7.6). The MbeD and MobD proteins are plasmid encoded, and are involved in the plasmids mobilisation and transfer in the presence of conjugative plasmids .. +PF03621 MbtH-like protein
Yeats C, Eberhardt R. This domain is found in the MbtH protein Swiss:O05821 as well as at the N terminus of the antibiotic synthesis protein NIKP1. MbtH and its homologues were first noted in gene clusters involved in non-ribosomal peptides and other secondary metabolites by Quadri et al . This domain is about 70 amino acids long and contains 3 fully conserved tryptophan residues . The structure of the PA2412 protein shows it adopts a beta-beta-beta-alpha-alpha topology with the short C-terminal helix forming the tip of an overall arrowhead shape . MbtH proteins have been shown to be required for the synthesis of antibiotics, siderophores and glycopeptidolipids [3-6].. +PF02289 Cyclohydrolase (MCH)
Pfam-B_6511 (release 5.2). Methenyl tetrahydromethanopterin cyclohydrolase EC:3.5.4.27 is involved in methanogenesis in bacteria and archaea, producing methane from carbon monoxide or carbon dioxide.. +PF00493 MCM2/3/5 family
+PF00015 Methyl-accepting chemotaxis protein (MCP) signalling domain
Blast MCP1_ECOLI/361-421. This domain is thought to transduce the signal to CheA since it is highly conserved in very diverse MCPs.. +PF02993 Minor capsid protein VI
Pfam-B_1634 (release 6.4). This minor capsid protein may act as a link between the external capsid and the internal DNA-protein core.\. The C-terminal 11 residues may function as a protease cofactor leading to enzyme activation .. +PF02249 Methyl-coenzyme M reductase alpha subunit, C-terminal domain
Pfam-B_2706 (release 5.2). Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (this family), 2 beta (Pfam:PF02241), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites . The C-terminal domain is comprised of an all-alpha multi-helical bundle.. +PF02745 Methyl-coenzyme M reductase alpha subunit, N-terminal domain
Pfam-B_2706 (release 5.2). Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (this family), 2 beta (Pfam:PF02241), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites . The N-terminal domain has a ferredoxin-like fold.. +PF02241 Methyl-coenzyme M reductase beta subunit, C-terminal domain
Pfam-B_2692 (release 5.2). Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (this family), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites . The C-terminal domain of MCR beta has an all-alpha fold with buried central helix.. +PF02783 Methyl-coenzyme M reductase beta subunit, N-terminal domain
Pfam-B_2692 (release 5.2). Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (this family), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites . The N-terminal domain has an alpha/beta ferredoxin-like fold.. +PF02505 Methyl-coenzyme M reductase operon protein D
Pfam-B_2115 (release 5.4). Methyl coenzyme M reductase (MCR) catalyses the final step in methanogenesis. MCR is composed of three subunits, alpha (Pfam:PF02249), beta (Pfam:PF02241) and gamma (Pfam:PF02240) . Genes encoding the beta (mcrB) and gamma (mcrG) subunits are separated by two open reading frames coding for two proteins C and D . The function of proteins C and D (this family) is unknown.. +PF02240 Methyl-coenzyme M reductase gamma subunit
Pfam-B_2713 (release 5.2). Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (Pfam:PF02241), and 2 gamma (this family) subunits with two identical nickel porphinoid active sites .. +PF02315 Methanol dehydrogenase beta subunit
Pfam-B_12628 (release 5.2). Methanol dehydrogenase (MDH) is a bacterial periplasmic quinoprotein that oxidises methanol to formaldehyde. MDH is a tetramer of two alpha and two beta subunits. This family contains the small beta subunit.. +PF04349 Periplasmic glucan biosynthesis protein, MdoG
This family represents MdoG, a protein that is necessary for the synthesis of periplasmic glucans. The function of MdoG remains unknown. It has been suggested that it may catalyse the addition of branches to a linear glucan backbone.. +PF02975 Me-amine-deh_L;
Methylamine dehydrogenase, L chain. +PF04934 MED6;
MED6 mediator sub complex component. Pfam-B_4045 (release 7.6). Component of RNA polymerase II holoenzyme and mediator sub complex.. +PF03525 Meiotic recombination protein rec114
+PF03243 Alkylmercury lyase
Pfam-B_3505 (release 6.5). Alkylmercury lyase (EC:4.99.1.2) cleaves the carbon-mercury bond of organomercurials such as phenylmercuric acetate.. +PF02065 Melibiase
Glycoside hydrolase families GH27, GH31 and GH36 form the glycoside hydrolase clan GH-D. Glycoside hydrolase family 36 can be split into 11 families, GH36A to GH36K . This family includes enzymes from GH36A-B and GH36D-K and from GH27.. +PF01372 Melittin
+PF02964 Methane monooxygenase, hydrolase gamma chain
+PF03203 MerC mercury resistance protein
Pfam-B_2720 (release 6.5). +PF05052 MerE protein
Pfam-B_5840 (release 7.7). The prokaryotic MerE (or URF-1) protein is part of the mercury resistance operon. The protein is thought not to have any direct role in conferring mercury resistance to the organism but may be a mercury resistance transposon [1,2]. . +PF00376 merR;
MerR family regulatory protein. Prosite & Pfam-B_3021 (Release 7.5). +PF02411 MerT mercuric transport protein
Pfam-B_1796 (release 5.4). MerT is an mercuric transport integral membrane protein and is responsible for transport of the Hg2+ iron from periplasmic MerP (also part of the transport system) to mercuric reductase (MerE).. +PF02475 Met-10+ like-protein
Pfam-B_2239 (release 5.4). The methionine-10 mutant allele of N. crassa codes for a protein of unknown function, Swiss:O27901. However, homologous proteins have been found in yeast (Swiss:P38793) suggesting this protein may be involved in methionine biosynthesis, transport and/or utilisation .. +PF02965 Vitamin B12 dependent methionine synthase, activation domain
Griffiths-Jones SR, Eberhardt R. +PF03724 DUF306;
Small domain family found in proteins of of unknown function. Some are secreted (e.g. Swiss:O25998) and implicated in motility in bacteria. Also occurs in Leishmania spp. as an essential gene. Over-expression in L.amazonensis increases virulence (Swiss:O43987; ). A pair of cysteine residues show correlated conservation, suggesting that they form a disulphide bond.. +PF01676 Metalloenzyme superfamily
Pfam-B_1926 (release 4.1). This family includes phosphopentomutase Swiss:P07651 and 2,3-bisphosphoglycerate-independent phosphoglycerate mutase, Swiss:P37689. This family is also related to Pfam:PF00245 . The alignment contains the most conserved residues that are probably involved in metal binding and catalysis.. +PF02066 Metallothionein family 11
+PF01439 Metallothionein
Prodom_1611 (release 99.1). Members of this family are metallothioneins. These proteins are cysteine rich proteins that bind to heavy metals. Members of this family appear to be closest to Class II metallothioneins, seed Pfam:PF00131.. +PF02067 Metallothionein family 5
+PF02068 Plant PEC family metallothionein
+PF02069 Prokaryotic metallothionein
+PF00131 metalthio;
+PF01717 Methionine_synt;
Cobalamin-independent synthase, Catalytic domain. Pfam-B_1909 (release 4.1). This is a family of vitamin-B12 independent methionine synthases or 5-methyltetrahydropteroyltriglutamate--homocysteine methyltransferases, EC:2.1.1.14 from bacteria and plants. Plants are the only higher eukaryotes that have the required enzymes for methionine synthesis . This enzyme catalyses the last step in the production of methionine by transferring a methyl group from 5-methyltetrahydrofolate to homocysteine . The aligned region makes up the carboxy region of the approximately 750 amino acid protein except in some hypothetical archaeal proteins present in the family, where this region corresponds to the entire length. This domain contains the catalytic residues of the enzyme .. +PF01035 Methlytrans; Methyltrans; Methyltransf_1;
6-O-methylguanine DNA methyltransferase, DNA binding domain. Pfam-B_1191 (release 3.0). This domain is a 3 helical bundle.. +PF02870 Methlytrans; Methyltrans;
6-O-methylguanine DNA methyltransferase, ribonuclease-like domain. Pfam-B_1191 (release 3.0). +PF00891 Methyltransf;
Pfam-B_152 (release 3.0). This family includes a range of O-methyltransferases. These enzymes utilise S-adenosyl methionine.. +PF01596 O-methyltransferase
Pfam-B_749 (release 4.1). Members of this family are O-methyltransferases. The family includes catechol o-methyltransferase Swiss:P21964, caffeoyl-CoA O-methyltransferase Swiss:Q43095 and a family of bacterial O-methyltransferases that may be involved in antibiotic production .. +PF02390 Putative methyltransferase
Pfam-B_1023 (release 5.2). This is a family of putative methyltransferases. The aligned region contains the GXGXG S-AdoMet binding site suggesting a putative methyltransferase activity.. +PF03737 Demethylmenaquinone methyltransferase
Members of this family are demethylmenaquinone methyltransferases that convert dimethylmenaquinone (DMK) to menaquinone (MK) in the final step of menaquinone biosynthesis. This region is also found at the C-terminus of the DlpA protein Swiss:Q48806.. +PF03492 Methytransf_6;
SAM dependent carboxyl methyltransferase. Pfam-B_1148 (release 7.0). This family of plant methyltransferases contains enzymes that act on a variety of substrates including salicylic acid, jasmonic acid and 7-Methylxanthine. Caffeine is synthesised through sequential three-step methylation of xanthine derivatives at positions 7-N, 3-N, and 1-N. The protein 7-methylxanthine methyltransferase (designated as CaMXMT) catalyses the second step to produce theobromine .. +PF02086 D12 class N6 adenine-specific DNA methyltransferase
+PF01340 Met Apo-repressor, MetJ
+PF04648 Yeast mating factor alpha hormone
The hormone is excreted into the culture medium by haploid cells of the alpha mating type and acts on cells of the opposite mating type (type A). It inhibits DNA synthesis in type A cells synchronising them with type alpha, and so mediates the conjugation process.. +PF04202 Foot protein 3
Pfam-B_1860 (release 7.3). Mytilus foot protein-3 (Mfp-3) is a highly polymorphic protein family located in the byssal adhesive plaques of blue mussels.. +PF01078 Magnesium chelatase, subunit ChlI
Pfam-B_616 (release 3.0). Magnesium-chelatase is a three-component enzyme that catalyses the insertion of Mg2+ into protoporphyrin IX. This is the first unique step in the synthesis of (bacterio)chlorophyll. Due to this, it is thought that Mg-chelatase has an important role in channelling inter- mediates into the (bacterio)chlorophyll branch in response to conditions suitable for photosynthetic growth. ChlI and BchD have molecular weight between 38-42 kDa.. +PF05043 Mga helix-turn-helix domain
Pfam-B_5126 (release 7.7). M regulator protein trans-acting positive regulator (Mga) is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions . This domain is found in the centre of the Mga proteins. This family also contains a number of bacterial RofA transcriptional regulators that seem to be largely restricted to streptococci. These proteins have been shown to regulate the expression of important bacterial adhesins . This is presumably a DNA-binding domain.. +PF05220 MgpC protein precursor
Pfam-B_6685 (release 7.7). This family contains several Mycoplasma MgpC like-proteins.. +PF02308 MgtC family
The MgtC protein is found in an operon with the Mg2+ transporter protein MgtB. The function of MgtC and its homologues is not known.. +PF03448 MgtE intracellular N domain
This domain is found at the N-terminus of eubacterial magnesium transporters of the MgtE family Pfam:PF01769. This domain is an intracellular domain that has an alpha-helical structure. The crystal structure of the MgtE transporter shows two of 5 magnesium ions are in the interface between the N domain and the CBS domains. In the absence of magnesium there is a large shift between the N and CBS domains.. +PF03165 MH1 domain
Pfam-B_519 (release 3.0). The MH1 (MAD homology 1) domain is found at the amino terminus of MAD related proteins such as Smads. This domain is separated from the MH2 domain by a non-conserved linker region. The crystal structure of the MH1 domain shows that a highly conserved 11 residue beta hairpin is used to bind the DNA consensus sequence GNCN in the major groove, shown to be vital for the transcriptional activation of target genes. Not all examples of MH1 can bind to DNA however. Smad2 cannot bind DNA and has a large insertion within the hairpin that presumably abolishes DNA binding. A basic helix (H2) in MH1 with the nuclear localisation signal KKLKK has been shown to be essential for Smad3 nuclear import. Smads also use the MH1 domain to interact with transcription factors such as Jun, TFE3, Sp1, and Runx [1,3].. +PF03166 MH2 domain
Pfam-B_519 (release 3.0). This is the MH2 (MAD homology 2) domain found at the carboxy terminus of MAD related proteins such as Smads. This domain is separated from the MH1 domain by a non-conserved linker region. The MH2 domain mediates interaction with a wide variety of proteins and provides specificity and selectivity to Smad function and also is critical for mediating interactions in Smad oligomers. Unlike MH1, MH2 does not bind DNA. The well-studied MH2 domain of Smad4 is composed of five alpha helices and three loops enclosing a beta sandwich. Smads are involved in the propagation of TGF-beta signals by direct association with the TGF-beta receptor kinase which phosphorylates the last two Ser of a conserved 'SSXS' motif located at the C-terminus of MH2 [1-3].. +PF00129 Class I Histocompatibility antigen, domains alpha 1 and 2
+PF03707 SPNTR;
Bacterial signalling protein N terminal repeat. Found as an N terminal triplet tandem repeat in bacterial signalling proteins. Family includes CoxC (Swiss:Q9KX27) and CoxH (Swiss:Q9KX23) from P.carboxydovorans. Each repeat contains two transmembrane helices. Domain is also described as the MHYT domain .. +PF02426 Muconolactone delta-isomerase
Pfam-B_2784 (release 5.4). This small enzyme forms a homodecameric complex, that catalyses the third step in the catabolism of catechol to succinate- and acetyl-coa in the beta-ketoadipate pathway EC:5.3.3.4. The protein has a ferredoxin-like fold according to SCOP.. +PF03526 Colicin E1 (microcin) immunity protein
+PF04687 Microvirus H protein (pilot protein)
A single molecule of H protein is found on each of the 12 spikes on the microvirus shell. H is involved in the ejection of the phage DNA, and at least one copy is injected into the host's periplasmic space along with the ssDNA viral genome . Part of H is thought to lie outside the shell, where it recognises lipopolysaccharide from virus-sensitive strains . Part of H may lie within the capsid, since mutations in H can influence the DNA ejection mechanism by affecting the DNA-protein interactions . H may span the capsid through the hydrophilic channels formed by G proteins .. +PF04726 Microvirus J protein
This small protein is involved in DNA packaging, interacting with DNA via its hydrophobic carboxyl terminus. In bacteriophage phi-X174, J is present in 60 copies, and forms an S-shaped polypeptide chain without any secondary structure. It is thought to interact with DNA through simple charge interactions .. +PF04517 Microvirus lysis protein (E), C terminus
E protein causes host cell lysis by inhibiting MraY, a peptidoglycan biosynthesis enzyme. This leads to cell wall failure at septation . The N terminal transmembrane region matches the signal peptide model and must be omitted from the family.. +PF04478 Mid2 like cell wall stress sensor
This family represents a region near the C terminus of Mid2, which contains a transmembrane region. The remainder of the protein sequence is serine-rich and of low complexity, and is therefore impossible to align accurately. Mid2 is thought to act as a mechanosensor of cell wall stress. The C-terminal cytoplasmic region of Mid2 is known to interact with Rom2, a guanine nucleotide exchange factor (GEF) for Rho1, which is part of the cell wall integrity signalling pathway []1.. +PF01187 Macrophage migration inhibitory factor (MIF)
+PF03775 MinC;
Septum formation inhibitor MinC, C-terminal domain. In Escherichia coli Swiss:P06138 assembles into a Z ring at midcell while assembly at polar sites is prevented by the min system. MinC Swiss:P18196 a component of this system, is an inhibitor of FtsZ assembly that is positioned within the cell by interaction with MinDE. MinC is an oligomer, probably a dimer . The C terminal half of MinC is the most conserved and interacts with MinD. The N terminal half is thought interact with FtsZ.. +PF05209 Septum formation inhibitor MinC, N-terminal domain
In Escherichia coli Swiss:P06138 assembles into a Z ring at midcell while assembly at polar sites is prevented by the min system. MinC Swiss:P18196 a component of this system, is an inhibitor of FtsZ assembly that is positioned within the cell by interaction with MinDE. MinC is an oligomer, probably a dimer . The C terminal half of MinC is the most conserved and interacts with MinD. The N terminal half is thought to interact with FtsZ.. +PF03776 Septum formation topological specificity factor MinE
The E. coli minicell locus was shown to code for three gene products (MinC, MinD, and MinE) whose coordinate action is required for proper placement of the division septum. The minE gene codes for a topological specificity factor that, in wild-type cells, prevents the division inhibitor from acting at internal division sites while permitting it to block septation at polar sites .. +PF00230 Major intrinsic protein
MIP (Major Intrinsic Protein) family proteins exhibit essentially two distinct types of channel properties: (1) specific water transport by the aquaporins, and (2) small neutral solutes transport, such as glycerol by the glycerol facilitators .. +PF03094 Mlo family
Pfam-B_2483 (release 6.4). A family of plant integral membrane proteins, first discovered in barley. Mutants lacking wild-type Mlo proteins show broad spectrum resistance to the powdery mildew fungus, and dysregulated cell death control, with spontaneous cell death in response to developmental or abiotic stimuli. Thus wild-type Mlo proteins are thought to be inhibitors of cell death whose deficiency lowers the threshold required to trigger the cascade of events that result in plant cell death. Mlo proteins are localised in the plasma membrane and possess seven transmembrane regions; thus the Mlo family is the only major higher plant family to possess 7 transmembrane domains. It has been suggested that Mlo proteins function as G-protein coupled receptors in plants ; however the molecular and biological functions of Mlo proteins remain to be fully determined.. +PF03304 Lipoprotein_12;
Mlp lipoprotein family. Pfam-B_1663 (release 6.5). The Mlp (for Multicopy Lipoprotein) family of lipoproteins is found in Borrelia species . This family were previously known as 2.9 lipoprotein genes . These surface expressed genes may represent new candidate vaccinogens for Lyme disease . Members of this family generally are downstream of four ORFs called A,B,C and D that are involved in hemolytic activity. . +PF03562 MltA specific insert domain
Pfam-B_1127 (release 7.0). This beta barrel domain is found inserted in the MltA a murein degrading transglycosylase enzyme . This domain may be involved in peptidoglycan binding.. +PF01642 Methylmalonyl-CoA mutase
Pfam-B_1611 (release 4.1). The enzyme methylmalonyl-CoA mutase is a member of a class of enzymes that uses coenzyme B12 (adenosylcobalamin) as a cofactor. The enzyme induces the formation of an adenosyl radical from the cofactor. This radical then initiates a free-radical rearrangement of its substrate, succinyl-CoA, to methylmalonyl-CoA .. +PF03972 MmgE/PrpD family
This family includes 2-methylcitrate dehydratase EC:4.2.1.79 (PrpD) that is required for propionate catabolism. It catalyses the third step of the 2-methylcitric acid cycle.. +PF02406 MmoB/DmpM family
Pfam-B_1148 (release 5.2). This family consists of monooxygenase components such as MmoB methane monooxygenase (EC:1.14.13.25) regulatory protein B. When MmoB is present at low concentration it converts methane monooxygenase from an oxidase to a hydroxylase and stabilises intermediates required for the activation of dioxygen . Also found in this family is DmpM or Phenol hydroxylase (EC:1.14.13.7) protein component P2, this protein lacks redox co-factors and is required for optimal turnover of Phenol hydroxylase .. +PF03176 MMPL family
Pfam-B_357 (release 6.5). Members of this family are putative integral membrane proteins from bacteria. Several of the members are mycobacterial proteins. Many of the proteins contain two copies of this aligned region. The function of these proteins is not known, although it has been suggested that they may be involved in lipid transport .. +PF01926 50S ribosome-binding GTPase
The full-length GTPase protein is required for the complete activity of the protein of interacting with the 50S ribosome and binding of both adenine and guanine nucleotides, with a preference for guanine nucleotide.. +PF01054 Mouse mammary tumour virus superantigen
Pfam-B_518 (release 3.0). The mouse mammary tumour virus (MMTV) is a milk-transmitted type B retrovirus. The superantigen (SAg) is encoded by the long terminal repeat. The SAgs are also called PR73.. +PF05067 Manganese containing catalase
Catalases are important antioxidant metalloenzymes that catalyse disproportionation of hydrogen peroxide, forming dioxygen and water. Two families of catalases are known, one having a heme cofactor, and this family that is a structurally distinct family containing non-heme manganese .. +PF03962 Mnd1 family
This family of proteins includes MND1 from S. cerevisiae. The mnd1 protein forms a complex with hop2 to promote homologous chromosome pairing and meiotic double-strand break repair .. +PF04039 Domain related to MnhB subunit of Na+/H+ antiporter
Possible subunit of Na+/H+ antiporter , . Predicted integral membrane protein, usually four transmembrane regions in this domain. Often found in bacterial NADH dehydrogenase subunit.. +PF01899 DUF68;
Na+/H+ ion antiporter subunit. Subunit of a Na+/H+ Prokaryotic antiporter complex ( , ).. +PF03404 Mo-co oxidoreductase dimerisation domain
This domain is found in molybdopterin cofactor (Mo-co) oxidoreductases. It is involved in dimer formation, and has an Ig-fold structure .. +PF01967 MoaC family
Members of this family are involved in molybdenum cofactor biosynthesis. However their molecular function is not known.. +PF02391 MoeA; MoeE;
Pfam-B_1056 (release 5.2). This family contains the MoaE protein that is involved in biosynthesis of molybdopterin . Molybdopterin, the universal component of the pterin molybdenum cofactors, contains a dithiolene group serving to bind Mo. Addition of the dithiolene sulfurs to a molybdopterin precursor requires the activity of the converting factor. Converting factor contains the MoaE and MoaD proteins.. +PF01076 Plasmid recombination enzyme
Pfam-B_717 (release 3.0). With some plasmids, recombination can occur in a site specific manner that is independent of RecA. In such cases, the recombination event requires another protein called Pre. Pre is a plasmid recombination enzyme. This protein is also known as Mob (conjugative mobilisation).. +PF03389 MobA/MobL family
Pfam-B_3424 (release 6.6). This family includes of the MobA protein from the E. coli plasmid RSF1010, and the MobL protein from the Thiobacillus ferrooxidans plasmid PTF1. These sequences are mobilisation proteins, which are essential for specific plasmid transfer.. +PF03205 Molybdopterin guanine dinucleotide synthesis protein B
Pfam-B_2446 (release 6.5). This protein contains a P-loop.. +PF04698 MOBP; MOBP_C-Myrip;
Rab effector MyRIP/melanophilin C-terminus. Pfam-B_4174 (release 7.5). This domain is found at the C-terminus of the Rab effector proteins MyRIP and melanophilin.. +PF05161 MOFRL family
MOFRL(multi-organism fragment with rich Leucine) family exists in bacteria and eukaryotes. The function of this domain is not clear, although it exists in some putative enzymes such as reductases and kinases.. +PF04603 Ran-interacting Mog1 protein
Pfam-B_4771 (release 7.5). Segregation of nuclear and cytoplasmic processes facilitates regulation of many eukaryotic cellular functions such as gene expression and cell cycle progression. Trafficking through the nuclear pore requires a number of highly conserved soluble factors that escort macromolecular substrates into and out of the nucleus. The Mog1 protein has been shown to interact with RanGTP which stimulates guanine nucleotide release, suggesting Mog1 regulates the nuclear transport functions of Ran. The human homologue of Mog1 is thought to be alternatively spliced [1,2,3,4]. . +PF04879 Molybdopterin oxidoreductase Fe4S4 domain
This domain is found in formate dehydrogenase H for which the structure is known. The first domain (residues 1 to 60, 448 to 476, and 499 to 540), comprising two small antiparallel sheets and four helices, coordinates the Fe4S4 cluster just below the protein surface .. +PF00384 molybdopterin;
Molybdopterin oxidoreductase. Prosite & Pfam-B_2803 (Release 7.5). +PF01568 Molydopterin dinucleotide binding domain
Pfam-B_129 (release 4.0). This domain is found in various molybdopterin - containing oxidoreductases and tungsten formylmethanofuran dehydrogenase subunit d (FwdD) and molybdenum formylmethanofuran dehydrogenase subunit (FmdD); where the domain constitutes almost the entire subunit. The formylmethanofuran dehydrogenase catalyses the first step in methane formation from CO2 in methanogenic archaea and has a molybdopterin dinucleotide cofactor . This domain corresponds to the C-terminal domain IV in dimethyl sulfoxide (DMSO)reductase which interacts with the 2-amino pyrimidone ring of both molybdopterin guanine dinucleotide molecules .. +PF04744 Monooxygenase subunit B protein
Pfam-B_6020 (release 7.5). Family of membrane associated monooxygenases (EC 1.13.12.-) which utilise O(2) to oxidise their substrate. Family members include both ammonia and methane monooxygenases involved in the oxidation of their respective substrates. These enzymes are multi-subunit complexes. This family represents the B subunit of the enzyme; the A subunit is thought to contain the active site. [1,2]. . +PF03473 MOSC domain
Aravind L, Anantharaman V. The MOSC (MOCO sulfurase C-terminal) domain is a superfamily of beta-strand-rich domains identified in the molybdenum cofactor sulfurase and several other proteins from both prokaryotes and eukaryotes. These MOSC domains contain an absolutely conserved cysteine and occur either as stand-alone forms such as Swiss:P32157, or fused to other domains such as NifS-like catalytic domain in Molybdenum cofactor sulfurase. The MOSC domain is predicted to be a sulfur-carrier domain that receives sulfur abstracted by the pyridoxal phosphate-dependent NifS-like enzymes, on its conserved cysteine, and delivers it for the formation of diverse sulfur-metal clusters.. +PF02722 Major Outer Sheath Protein C-terminal region
Pfam-B_653 (release 5.5). This is a family of spirochete major outer sheath protein C-terminal regions. These proteins are present on the bacterial cell surface. In T. denticola the major outer sheath protein (Msp) binds immobilised laminin and fibronectin supporting the hypothesis that Msp mediates the extracellular matrix binding activity of T. denticola .. +PF02707 MOSP;
Major Outer Sheath Protein N-terminal region. Pfam-B_653 (release 5.5). This is a family of spirochete major outer sheath protein N-terminal regions. These proteins are present on the bacterial cell surface. In T. denticola the major outer sheath protein (Msp) binds immobilised laminin and fibronectin supporting the hypothesis that Msp mediates the extracellular matrix binding activity of T. denticola .. +PF01618 MotA/TolQ/ExbB proton channel family
Pfam-B_1099 (release 4.1). This family groups together integral membrane proteins that appear to be involved translocation of proteins across a membrane. These proteins are probably proton channels. MotA is an essential component of the flageller motor that uses a proton gradient to generate rotational motion in the flageller . ExbB is part of the TonB-dependent transduction complex. The TonB complex uses the proton gradient across the inner bacterial membrane to transport large molecules across the outer bacterial membrane.. +PF04006 Mpp10 protein
Pfam-B_12513 (release 7.3). This family includes proteins related to Mpp10 (M phase phosphoprotein 10). The U3 small nucleolar ribonucleoprotein (snoRNP) is required for three cleavage events that generate the mature 18S rRNA from the pre-rRNA. In Saccharomyces cerevisiae, depletion of Mpp10, a U3 snoRNP-specific protein, halts 18S rRNA production and impairs cleavage at the three U3 snoRNP-dependent sites .. +PF05172 MPPN;
Nup53/35/40-type RNA recognition motif. Members of this family belong to the nucleor pore complex, NPC, the only gateway between the nucleus and the cytoplasm. The NPC consists of several subcomplexes each one of which is made up of multiple copies of several individual Nup, Nic or Sec protein subunits. In yeast, this Nup or nucleoporin subunit is numbered Nup53, Nup40 in Schizo. pombe and in vertebrates as Nup35. This subunit forms part of the inner ring within the membrane and interacts directly with Nup-Ndc1, considered to be an anchor for the NPC in the pore membrane . This region of the Nup is the RNA-recognition region .. +PF01188 Mandelate racemase / muconate lactonizing enzyme, C-terminal domain
C-terminal domain is TIM barrel fold, dehydratase-like domain. Manganese is associated with this domain.. +PF02746 Mandelate racemase / muconate lactonizing enzyme, N-terminal domain
SCOP reports fold similarity with enolase N-terminal domain.. +PF04152 Mer11_DNA_bind;
Mre11 DNA-binding presumed domain . Pfam-B_3909 (release 7.3);. The Mre11 complex is a multi-subunit nuclease that is composed of Mre11, Rad50 and Nbs1/Xrs2, and is involved in checkpoint signalling and DNA replication . Mre11 has an intrinsic DNA-binding activity that is stimulated by Rad50 on its own or in combination with Nbs1 .. +PF04085 rod shape-determining protein MreC
TIGRFAMs (release 2.0);. MreC (murein formation C) is involved in the rod shape determination in E. coli, and more generally in cell shape determination of bacteria whether or not they are rod-shaped.. +PF04093 rod shape-determining protein MreD
MreD (murein formation D) is involved in the rod shape determination in E. coli, and more generally in cell shape determination of bacteria whether or not they are rod-shaped.. +PF03919 mRNA capping enzyme, C-terminal domain
+PF01331 mRNA capping enzyme, catalytic domain
This family represents the ATP binding catalytic domain of the mRNA capping enzyme.. +PF02940 mRNA capping enzyme, beta chain
The beta chain of mRNA capping enzyme has triphosphatase activity. The function of the capping enzyme also depends on the guanylyltransferase activity conferred by the alpha chain (see Pfam:PF01331). +PF02349 Major surface glycoprotein
Pfam-B_864 (release 5.2). This is a novel repeat in Pneumocystis carinii Major surface glycoprotein (MSG) some members of the alignment have up to nine repeats of this family, the repeats containing several conserved cysteines. The MSG of P. carinii is an important protein in host-pathogen interactions . Surface glycoprotein A Swiss:O59920 from Pneumocystis carinii is a main target for the host immune system, this protein is implicated in the attachment of Pneumocystis carinii to the host alveolar epithelial cells, alveolar macrophages, host surfactant and possibly accounts in part for the hypoxia seen in Pneumocystis carinii pneumonia (PCP) .. +PF04066 Multiple resistance and pH regulation protein F (MrpF / PhaF)
Members of the PhaF / MrpF family are predicted to be an integral membrane proteins with three transmembrane regions, involved in regulation of pH. PhaF is part of a potassium efflux system involved in pH regulation.\. It is also involved in symbiosis in Rhizobium meliloti . MrpF is part of a Na+/H+ antiporter complex, also involved in pH homeostasis. MrpF is thought to be an efflux system for Na+ and cholate . The Mrp system in Bacilli may also have primary energisation capacities .. +PF04471 Restriction endonuclease
Prokaryotic family found in type II restriction enzymes containing the hallmark (D/E)-(D/E)XK active site. Presence of catalytic residues implicates this region in the enzymatic cleavage of DNA [1,2].. +PF00924 UPF0003;
Mechanosensitive ion channel. Pfam-B_1136 (release 3.0). Two members of this protein family: Swiss:Q57634 and Swiss:Q58543 of M. jannaschii have been functionally characterised. Both proteins form mechanosensitive (MS) ion channels upon reconstitution into liposomes and functional examination by the patch-clamp technique. Therefore this family are likely to also be MS channel proteins.. +PF00985 Merozoite Surface Antigen 2 (MSA-2) family
Pfam-B_1052 (release 3.0). +PF01741 Large-conductance mechanosensitive channel, MscL
+PF01716 Manganese-stabilising protein / photosystem II polypeptide
Pfam-B_1814 (release 4.1). This family consists of the 33 KDa photosystem II polypeptide from the oxygen evolving complex (OEC) of plants and cyanobacteria. The protein is also known as the manganese-stabilising protein as it is associated with the manganese complex of the OEC and may provide the ligands for the complex .. +PF03429 Major surface protein 1B
Pfam-B_4414 (release 6.6). The major surface protein (MSP1) of the cattle pathogen Anaplasma is a heterodimer comprised of MSP1a and MSP1b. This family is the MSP1b chain. There MSP1 proteins are putative adhesins for bovine erythrocytes. . +PF04421 Mss4 protein
+PF03940 Male specific sperm protein
This family of drosophila proteins are typified by the repetitive motif C-G-P.. +PF05063 MT-A70
Pfam-B_3025 (release 7.7). MT-A70 is the S-adenosylmethionine-binding subunit of human mRNA:m6A methyl-transferase (MTase), an enzyme that sequence-specifically methylates adenines in pre-mRNAs. . +PF01993 methylene-5,6,7,8-tetrahydromethanopterin dehydrogenase
This enzyme family is involved in formation of methane from carbon dioxide EC:1.5.99.9. The enzyme requires coenzyme F420 .. +PF02536 mTERF
Pfam-B_1422 (release 5.4). This family contains one sequence of known function Human mitochondrial transcription termination factor (mTERF) the rest of the family consists of hypothetical proteins none of which have any functional information. mTERF is a multizipper protein possessing three putative leucine zippers one of which is bipartite. The protein binds DNA as a monomer . The leucine zippers are not implicated in a dimerisation role as in other leucine zippers .. +PF02219 Methylenetetrahydrofolate reductase
Pfam-B_2407 (release 5.2). This family includes the 5,10-methylenetetrahydrofolate reductase EC:1.7.99.5 from bacteria and methylenetetrahydrofolate reductase EC: 1.5.1.20 from eukaryotes. The structure for this domain is known to be a TIM barrel.. +PF05068 Mannitol repressor
The mannitol operon of Escherichia coli, encoding the mannitol-specific enzyme II of the phosphotransferase system (MtlA) and mannitol phosphate dehydrogenase (MtlD) contains an additional downstream open reading frame which encodes the mannitol repressor (MtlR).. +PF03083 Sugar efflux transporter for intercellular exchange
Pfam-B_623 (release 6.4). This family includes proteins such as drosophila saliva , MtN3 involved in root nodule development and a protein involved in activation and expression of recombination activation genes (RAGs) . Although the molecular function of these proteins is unknown, they are almost certainly transmembrane proteins. This family contains a region of two transmembrane helices that is found in two copies in most members of the family. This family also contains specific sugar efflux transporters that are essential for the maintenance of animal blood glucose levels, plant nectar production, and plant seed and pollen development. In many organisims it meditaes gluose transport; in Arabidopsis it is necessary for pollen viability; and two of the rice homologues are specifically exploited by bacterial pathogens for virulence by means of direct binding of a bacterial effector to the SWEET promoter .. +PF03821 Golgi 4-transmembrane spanning transporter
TIGRFAMs, Griffiths-Jones SR. +PF04208 Tetrahydromethanopterin S-methyltransferase, subunit A
TIGRFAMs (release 2.0);. The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump .. +PF04211 Tetrahydromethanopterin S-methyltransferase, subunit C
TIGRFAMs (release 2.0);. The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump .. +PF04207 Tetrahydromethanopterin S-methyltransferase, subunit D
TIGRFAMs (release 2.0);. The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump .. +PF04206 Tetrahydromethanopterin S-methyltransferase, subunit E
TIGRFAMs (release 2.0);. The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump . . +PF04210 Tetrahydromethanopterin S-methyltransferase, subunit G
TIGRFAMs (release 2.0);. The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump .. +PF02007 Tetrahydromethanopterin S-methyltransferase MtrH subunit
The enzyme tetrahydromethanopterin S-methyltransferase EC:2.1.1.86 is composed of eight subunits . The enzyme is a membrane- associated enzyme complex which catalyses an energy-conserving, sodium-ion-translocating step in methanogenesis from hydrogen and carbon dioxide . . +PF05175 Methyltransferase small domain
This domain is found in ribosomal RNA small subunit methyltransferase C (eg Swiss:P44453) as well as other methyltransferases (eg Swiss:Q53742).. +PF02416 mttA/Hcf106 family
Pfam-B_1826 (release 5.4). Members of this protein family are involved in a sec independent translocation mechanism. This pathway has been called the DeltapH pathway in chloroplasts . Members of this family in E.coli are involved in export of redox proteins with a "twin arginine" leader motif .. +PF02316 Mu_DNA_bind;
Mu DNA-binding domain. Pfam-B_12856 (release 5.2). This family consists of MuA-transposase and repressor protein CI. These proteins contain homologous DNA-binding domains at their N-termini which compete for the same DNA site within the Mu bacteriophage genome.. +PF02914 Mu_transposase;
Bacteriophage Mu transposase. +PF03888 MucB_ResB;
Members of this family are regulators of the anti-sigma E protein RseD.. +PF03108 MuDR;
MuDR family transposase. Pfam-B_271 (release 6.5). This region is found in plant proteins that are presumed to be the transposases for Mutator transposable elements [1,2]. These transposons contain two ORFs. The molecular function of this region is unknown.. +PF04310 MukB N-terminal
This family represents the N-terminal region of MukB, one of a group of bacterial proteins essential for the movement of nucleoids from mid-cell towards the cell quarters (i.e. chromosome partitioning). The structure of the N-terminal domain consists of an antiparallel six-stranded beta sheet surrounded by one helix on one side and by five helices on the other side . It contains an exposed Walker A loop in an unexpected helix-loop-helix motif (in other proteins, Walker A motifs generally adopt a P loop conformation as part of a strand-loop-helix motif embedded in a conserved topology of alternating helices and (parallel) beta strands) .. +PF04288 MukE-like family
Bacterial protein involved in chromosome partitioning, MukE. +PF00893 DUF7; SMR;
Small Multidrug Resistance protein. Pfam-B_1082 (release 3.0). This family is the Small Multidrug Resistance (SMR) family. Several members have been shown to export a range of toxins, including ethidium bromide ( and quaternary ammonium compounds , through coupling with proton influx .. +PF01225 FPGS;
Mur ligase family, catalytic domain. This family contains a number of related ligase enzymes which have EC numbers 6.3.2.*. This family includes: MurC (Swiss:P17952), MurD (Swiss:P14900), MurE (Swiss:P22188), MurF (Swiss:P11880), Mpl (Swiss:P37773) and FolC (Swiss:P08192). MurC, MurD, Mure and MurF catalyse consecutive steps in the synthesis of peptidoglycan. Peptidoglycan consists of a sheet of two sugar derivatives, with one of these N-acetylmuramic acid attaching to a small pentapeptide. The pentapeptide is is made of L-alanine, D-glutamic acid, Meso-diaminopimelic acid and D-alanyl alanine. The peptide moiety is synthesised by successively adding these amino acids to UDP-N-acetylmuramic acid. MurC transfers the L-alanine, MurD transfers the D-glutamate, MurE transfers the diaminopimelic acid, and MurF transfers the D-alanyl alanine. This family also includes Folylpolyglutamate synthase that transfers glutamate to folylpolyglutamate.. +PF02875 FPGS;
Mur ligase family, glutamate ligase domain. This family contains a number of related ligase enzymes which have EC numbers 6.3.2.*. This family includes: MurC (Swiss:P17952), MurD (Swiss:P14900), MurE (Swiss:P22188), MurF (Swiss:P11880), Mpl (Swiss:P37773) and FolC (Swiss:P08192). MurC, MurD, Mure and MurF catalyse consecutive steps in the synthesis of peptidoglycan. Peptidoglycan consists of a sheet of two sugar derivatives, with one of these N-acetylmuramic acid attaching to a small pentapeptide. The pentapeptide is is made of L-alanine, D-glutamic acid, Meso-diaminopimelic acid and D-alanyl alanine. The peptide moiety is synthesised by successively adding these amino acids to UDP-N-acetylmuramic acid. MurC transfers the L-alanine, MurD transfers the D-glutamate, MurE transfers the diaminopimelic acid, and MurF transfers the D-alanyl alanine. This family also includes Folylpolyglutamate synthase that transfers glutamate to folylpolyglutamate.. +PF02873 UDP-N-acetylenolpyruvoylglucosamine reductase, C-terminal domain
Pfam-B_1092 (release 5.2). Members of this family are UDP-N-acetylenolpyruvoylglucosamine reductase enzymes EC:1.1.1.158. This enzyme is involved in the biosynthesis of peptidoglycan.. +PF02976 DNA mismatch repair enzyme MutH
+PF01624 MutS;MutS_N;
Pfam-B_800 (release 4.1). This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF05188, Pfam:PF05192 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein . The aligned region corresponds with globular domain I, which is involved in DNA binding, in Thermus aquaticus MutS as characterised in .. +PF05188 MutS domain II
Pfam-B_800 (release 4.1). This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF01624, Pfam:PF05192 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein . This domain corresponds to domain II in Thermus aquaticus MutS as characterised in , and has similarity resembles RNAse-H-like domains (see Pfam:PF00075).. +PF05192 MutS domain III
This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF05188, Pfam:PF01624 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein . The aligned region corresponds with domain III, which is central to the structure of Thermus aquaticus MutS as characterised in . . +PF05190 MutS family domain IV
This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF01624, Pfam:PF05188, Pfam:PF05192 and Pfam:PF00488. The mutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein . The aligned region corresponds in part with globular domain IV, which is involved in DNA binding, in Thermus aquaticus MutS as characterised in .. +PF00488 mutS;MutS_C;
This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF01624, Pfam:PF05188, Pfam:PF05192 and Pfam:PF05190. The mutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein . The aligned region corresponds with domain V of Thermus aquaticus MutS as characterised in , which contains a Walker A motif, and is structurally similar to the ATPase domain of ABC transporters.. +PF03023 MviN-like protein
Pfam-B_1348 (release 6.4). Deletion of the mviN virulence gene in Salmonella enterica serovar. Typhimurium greatly reduces virulence in a mouse model of typhoid-like disease . Open reading frames encoding homologues of MviN have since been identified in a variety of bacteria , including pathogens and non-pathogens and plant-symbionts. In the nitrogen-fixing symbiont Rhizobium tropici, mviN is required for motility. The MviM protein is predicted to be membrane-associated.. +PF02344 Myc leucine zipper domain
Pfam-B_829 (release 5.2). This family consists of the leucine zipper dimerisation domain found in both cellular c-Myc proto-oncogenes and viral v-Myc oncogenes. Dimerisation via the leucine zipper motif with other basic helix-loop-helix-leucine zipper (b/HLH/lz) proteins such as Max Swiss:P25912 is required for efficient DNA binding. The Myc-Max dimer is a transactivating complex activating expression of growth related genes promoting cell proliferation. The dimerisation is facilitated via interdigitating leucine residues every 7th position of the alpha helix. Like charge repulsion of adjacent residues in this region perturbs the formation of homodimers with heterodimers being promoted by opposing charge attractions.. +PF01056 Myc_N_term;
Myc amino-terminal region. Pfam-B_387 (release 3.0). The myc family belongs to the basic helix-loop-helix leucine zipper class of transcription factors, see Pfam:PF00010. Myc forms a heterodimer with Max, and this complex regulates cell growth through direct activation of genes involved in cell replication . Mutations in the C-terminal 20 residues of this domain cause unique changes in the induction of apoptosis, transformation, and G2 arrest .. +PF01669 Myelin basic protein
Pfam-B_1868 (release 4.1). +PF01275 Myelin proteolipid protein (PLP or lipophilin)
+PF00063 myosin_head;
Myosin head (motor domain). Blastp MYSA_HUMAN/1-840. +PF01576 Myosin_tail;
Pfam-B_356 (release 4.1). The myosin molecule is a multi-subunit complex made up of two heavy chains and four light chains it is a fundamental contractile protein found in all eukaryote cell types . This family consists of the coiled-coil myosin heavy chain tail region. The coiled-coil is composed of the tail from two molecules of myosin. These can then assemble into the macromolecular thick filament . The coiled-coil region provides the structural backbone the thick filament . . +PF00819 Myotoxin
Pfam-B_1337 (release 2.1). +PF02384 N-6 DNA Methylase
Pfam-B_508 (release 5.2). Restriction-modification (R-M) systems protect a bacterial cell against invasion of foreign DNA by endonucleolytic cleavage of DNA that lacks a site specific modification. The R-M system is a complex containing three polypeptides: M (this family), S (Pfam:PF01420), and R . This family consists of N-6 adenine-specific DNA methylase EC:2.1.1.72 from Type I and Type IC restriction systems. These methylases have the same sequence specificity as their corresponding restriction enzymes.. +PF01555 DNA methylase
Pfam-B_164 (release 4.0). Members of this family are DNA methylases. The family contains both N-4 cytosine-specific DNA methylases and N-6 Adenine-specific DNA methylases.. +PF04245 37-kD nucleoid-associated bacterial protein
+PF01235 Sodium:alanine symporter family
+PF03390 Na_citrate;
2-hydroxycarboxylate transporter family. Pfam-B_3683 (release 6.6). The 2-hydroxycarboxylate transporter family is a family of secondary transporters found exclusively in the bacterial kingdom. They function in the metabolism of the di- and tricarboxylates malate and citrate, mostly in fermentative pathways involving decarboxylation of malate or oxaloacetate .. +PF03553 Na+/H+ antiporter family
Pfam-B_620 (release 7.0). This family includes integral membrane proteins, some of which are NA+/H+ antiporters .. +PF00287 Sodium / potassium ATPase beta chain
+PF00939 Sodium:sulfate symporter transmembrane region
Pfam-B_1100 (release 3.0). There are also some members in this family that do not match the Prosite motif, and belong to the subfamily SODIT1.. +PF04902 Conserved region in Nab1
Pfam-B_6188 (release 7.6). Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors . This C-terminal region is found only in the Nab1 subfamily.. +PF01849 NAC domain
+PF00175 oxidored_fad; NAD_binding;
Oxidoreductase NAD-binding domain . Xanthine dehydrogenases, that also bind FAD/NAD, have essentially no similarity.. +PF03446 NAD binding domain of 6-phosphogluconate dehydrogenase
The NAD binding domain of 6-phosphogluconate dehydrogenase adopts a Rossmann fold.. +PF03447 Homoserine dehydrogenase, NAD binding domain
Pfam-B_459 (release 2.1). This domain adopts a Rossmann NAD binding fold. The C-terminal domain of homoserine dehydrogenase contributes a single helix to this structural domain, which is not included in the Pfam model.. +PF01210 NAD_Gly3P_dh;
NAD-dependent glycerol-3-phosphate dehydrogenase N-terminus. NAD-dependent glycerol-3-phosphate dehydrogenase (GPDH) catalyses the interconversion of dihydroxyacetone phosphate and L-glycerol-3-phosphate. This family represents the N-terminal NAD-binding domain .. +PF01513 DUF15;
Pfam-B_797 (release 4.0). Members of this family include ATP-NAD kinases EC:2.7.1.23, which catalyses the phosphorylation of NAD to NADP utilising ATP and other nucleoside triphosphates as well as inorganic polyphosphate as a source of phosphorus. Also includes NADH kinases EC:2.7.1.86.. +PF02540 NAD synthase
NAD synthase (EC:6.3.5.1) is involved in the de novo synthesis of NAD and is induced by stress factors such as heat shock and glucose limitation.. +PF00146 NADH dehydrogenase
+PF02477 Nairo_nucleocap;
Nucleocapsid N protein. Pfam-B_2892 (release 5.4). The nucleoprotein of the ssRNA negative-strand Nairovirus is an internal part of the virus particle. . +PF04131 nanE;
Putative N-acetylmannosamine-6-phosphate epimerase. This family represents a putative ManNAc-6-P-to-GlcNAc-6P epimerase in the N-acetylmannosamine (ManNAc) utilisation pathway found mainly in pathogenic bacteria.. +PF04660 Nanovirus coat protein
Pfam-B_4486 (release 7.5). Family of conserved Nanoviral coat proteins .. +PF00956 NAP_family;
Nucleosome assembly protein (NAP). Pfam-B_1009 (release 3.0). NAP proteins are involved in moving histones into the nucleus, nucleosome assembly and chromatin fluidity. They affect the transcription of many genes.. +PF03892 Nitrate reductase cytochrome c-type subunit (NapB)
The napB gene encodes a dihaem cytochrome c, the small subunit of a heterodimeric periplasmic nitrate reductase .. +PF03927 NapD protein
Uncharacterized protein involved in formation of periplasmic nitrate reductase.. +PF03059 Nicotianamine synthase protein
Pfam-B_2173 (release 6.4). Nicotianamine synthase EC:2.5.1.43 catalyses the trimerisation of S-adenosylmethionine to yield one molecule of nicotianamine. Nicotianamine has an important role in plant iron uptake mechanisms. Plants adopt two strategies (termed I and II) of iron acquisition. Strategy I is adopted by all higher plants except graminaceous plants, which adopt strategy II [1,2]. In strategy I plants, the role of nicotianamine is not fully determined: possible roles include the formation of more stable complexes with ferrous than with ferric ion, which might serve as a sensor of the physiological status of iron within a plant, or which might be involved in the transport of iron . In strategy II (graminaceous) plants, nicotianamine is the key intermediate (and nicotianamine synthase the key enzyme) in the synthesis of the mugineic family (the only known family in plants) of phytosiderophores. Phytosiderophores are iron chelators whose secretion by the roots is greatly increased in instances of iron deficiency . The 3D structures of five example NAS from Methanothermobacter thermautotrophicus reveal the monomer to consist of a five-helical bundle N-terminal domain on top of a classic Rossmann fold C-terminal domain. The N-terminal domain is unique to the NAS family, whereas the C-terminal domain is homologous to the class I family of SAM-dependent methyltransferases. An active site is created at the interface of the two domains, at the rim of a large cavity that corresponds to the nucleotide binding site such as is found in other proteins adopting a Rossmann fold .. +PF04159 NB glycoprotein
Pfam-B_1501 (release 7.3). The NB glycoprotein is found in Influenza type B virus. Its function is unknown.. +PF00931 NB-ARC domain
+PF04485 nblA;
Phycobilisome degradation protein nblA . In the cyanobacterium Synechococcus PCC 7942 (Swiss:P35087) , nblA triggers degradation of light-harvesting phycobiliproteins in response to deprivation nutrients including nitrogen, phosphorus and sulphur. The mechanism of nblA function is not known, but it has been hypothesised that nblA may act by disrupting phycobilisome structure, activating a protease or tagging phycobiliproteins for proteolysis. Members of this family have also been identified in the chloroplasts of some red algae.. +PF03801 HEC/Ndc80p family
Members of this family are components of the mitotic spindle. It has been shown that Ndc80/HEC from yeast is part of a complex called the Ndc80p complex . This complex is thought to bind to the microtubules of the spindle.. +PF00334 Nucleoside diphosphate kinase
+PF05031 Iron Transport-associated domain
This domain is involved in the transport of iron, possibly as a siderophore.. +PF00880 Nebulin_repeat;
Pfam-B_1603 (release 3.0). +PF04299 DUF449; Neg_reg;
Putative FMN-binding domain. In Bacillus subtilis, family member Swiss:P21341 (PAI 2/ORF-2) was found to be essential for growth . The SUPERFAMILY database finds that this domain is related to FMN-binding domains, suggesting this protein is also FMN-binding.. +PF00960 Neocarzinostatin family
+PF03391 Nepovirus coat protein, central domain
Pfam-B_3589 (release 6.6). The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure .. +PF03688 Nepovirus coat protein, C-terminal domain
Pfam-B_3589 (release 6.6). The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure .. +PF03689 Nepovirus coat protein, N-terminal domain
Pfam-B_3589 (release 6.6). The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure .. +PF00064 neur;
Overington and HMM_iterative_training. Neuraminidases cleave sialic acid residues from glycoproteins. Belong to the sialidase family - but this alignment does not generalise to the other sialidases. Structure is a 6-sheet beta propeller.. +PF02932 Neurotransmitter-gated ion-channel transmembrane region
This family includes the four transmembrane helices that form the ion channel.. +PF02158 Neuregulin family
+PF03823 Neurokinin B
TIGRFAMs, Griffiths-Jones SR. +PF00243 Nerve growth factor family
+PF02979 Nitrile hydratase, alpha chain
+PF02211 Nitrile hydratase beta subunit
Pfam-B_5347 (release 5.2). Nitrile hydratases EC:4.2.1.84 are unusual metalloenzymes that catalyse the hydration of nitriles to their corresponding amides. They are used as biocatalysts in acrylamide production, one of the few commercial scale bioprocesses, as well as in environmental remediation for the removal of nitriles from waste streams. Nitrile hydratases are composed of two subunits, alpha and beta, and they contain one iron atom per alpha beta unit . . +PF01292 Prokaryotic cytochrome b561
This family includes cytochrome b561 and related proteins, in addition to the nickel-dependent hydrogenases b-type cytochrome subunit. Cytochrome b561 is a secretory vesicle-specific electron transport protein. It is an integral membrane protein, that binds two heme groups non-covalently. This is a prokaryotic family. Members of the 'eukaryotic cytochrome b561' family can be found in Pfam: PF03188.. +PF04097 NIC;
Pfam-B_5541 (release 7.3);. Nup93/Nic96 is a component of the nuclear pore complex. It is required for the correct assembly of the nuclear pore complex . In Saccharomyces cerevisiae, Nic96 has been shown to be involved in the distribution and cellular concentration of the GTPase Gsp1 . The structure of Nic96 has revealed a mostly alpha helical structure .. +PF03824 High-affinity nickel-transport protein
High affinity nickel transporters involved in the incorporation of nickel into H2-uptake hydrogenase and urease enzymes. Essential for the expression of catalytically active hydrogenase and urease. Ion uptake is dependent on proton motive force. HoxN in Alcaligenes eutrophus is thought to be an integral membrane protein with seven transmembrane helices . The family also includes a cobalt transporter.. +PF00374 Nickel-dependent hydrogenase
+PF04891 NifQ
Pfam-B_6173 (release 7.6). NifQ is involved in early stages of the biosynthesis of the iron-molybdenum cofactor (FeMo-co) , which is an integral part of the active site of dinitrogenase . The conserved C-terminal cysteine residues may be involved in metal binding .. +PF01106 NifU; NifU-like;
Pfam-B_1206 (release 3.0). This is an alignment of the carboxy-terminal domain. This is the only common region between the NifU protein from nitrogen-fixing bacteria and rhodobacterial species. The biochemical function of NifU is unknown .. +PF01592 NifU-like N terminal domain
Pfam-B_772 (release 4.1). This domain is found in NifU in combination with Pfam:PF01106. This domain is found on isolated in several bacterial species such as Swiss:O53156. The nif genes are responsible for nitrogen fixation. However this domain is found in bacteria that do not fix nitrogen, so it may have a broader significance in the cell than nitrogen fixation. These proteins appear to be scaffold proteins for iron-sulfur clusters .. +PF03206 Nitrogen fixation protein NifW
Pfam-B_2891 (release 6.5). Nitrogenase is a complex metalloenzyme composed of two proteins designated the Fe-protein and the MoFe-protein. Apart from these two proteins, a number of accessory proteins are essential for the maturation and assembly of nitrogenase. Even though experimental evidence suggests that these accessory proteins are required for nitrogenase activity, the exact roles played by many of these proteins in the functions of nitrogenase are unclear .\. Using yeast two-hybrid screening it has been shown that NifW can interact with itself as well as NifZ .. +PF04319 NifZ domain
Pfam-B_6057 (release 7.3). This short protein is found in the nif (nitrogen fixation) operon. Its function is unknown but is probably involved in nitrogen fixation or regulating some component of this process. This 75 residue region is presumed to be a domain. It is found in isolation in some members and in the amino terminal half of the longer NifZ proteins.. +PF01077 Nitrite and sulphite reductase 4Fe-4S domain
Pfam-B_1092 (release 3.0). Sulphite and nitrite reductases are vital in the biosynthetic assimilation of sulphur and nitrogen, respectfully. They are also both important for the dissimilation of oxidised anions for energy transduction.. +PF03460 Nitrite/Sulfite reductase ferredoxin-like half domain
Sulfite and Nitrite reductases are key to both biosynthetic assimilation of sulfur and nitrogen and dissimilation of oxidised anions for energy transduction . Two copies of this repeat are found in Nitrite and Sulfite reductases and form a single structural domain.. +PF02665 Nitrate reductase gamma subunit
This family is the gamma subunit of the nitrate reductase enzyme, the gamma subunit is a b-type cytochrome that receives electrons from the quinone pool .\. It then transfers these via the iron-sulfur clusters of the beta subunit to the molybdenum cofactor found in the alpha subunit . The nitrate reductase enzyme, EC:1.7.99.4 catalyses the conversion of nitrite to nitrate via the reduction of an acceptor. The nitrate reductase enzyme is composed of three subunits . Nitrate is the most widely used alternative electron acceptor after oxygen .. +PF02087 Nitrophorin
+PF00881 Nitroreductase family
Pfam-B_481 (release 3.0). The nitroreductase family comprises a group of FMN- or FAD-dependent and NAD(P)H-dependent enzymes able to metabolize nitrosubstituted compounds.. +PF05211 Neuraminyllactose-binding hemagglutinin precursor (NLBH)
Pfam-B_6567 (release 7.7). This family is comprised of several flagellar sheath adhesin proteins also called neuraminyllactose-binding hemagglutinin precursor (NLBH) or N-acetylneuraminyllactose-binding fibrillar hemagglutinin receptor-binding subunits. NLBH is found exclusively in Helicobacter which are gut colonising bacteria and bind to sialic acid rich macromolecules present on the gastric epithelium .. +PF04170 NlpE N-terminal domain
This family represents a bacterial outer membrane lipoprotein that is necessary for signalling by the Cpx pathway . This pathway responds to cell envelope disturbances and increases the expression of periplasmic protein folding and degradation factors. While the molecular function of the NlpE protein is unknown, it may be involved in detecting bacterial adhesion to abiotic surfaces. In Escherichia coli and Salmonella typhi, NlpE is also known to confer copper tolerance in copper-sensitive strains of Escherichia coli, and may be involved in copper efflux and delivery of copper to copper-dependent enzymes .. +PF04973 Nicotinamide mononucleotide transporter
Members of this family are integral membrane proteins that are involved in transport of nicotinamide mononucleotide [1,2].. +PF01233 Myristoyl-CoA:protein N-myristoyltransferase, N-terminal domain
The N and C-terminal domains of NMT are structurally similar, each adopting an acyl-CoA N-acyltransferase-like fold.. +PF02799 Myristoyl-CoA:protein N-myristoyltransferase, C-terminal domain
The N and C-terminal domains of NMT are structurally similar, each adopting an acyl-CoA N-acyltransferase-like fold.. +PF02070 Neuromedin U
+PF03980 Nnf1
NNF1 is an essential yeast gene that is necessary for chromosome segregation. It is associated with the spindle poles and forms part of a kinetochore subcomplex called MIND .. +PF02898 Nitric oxide synthase, oxygenase domain
+PF02474 Nodulation protein A (NodA)
Pfam-B_2183 (release 5.4). Rhizobia nodulation (nod) genes control the biosynthesis of Nod factors required for infection and nodulation of their legume hosts. Nodulation protein A (NodA) is a N-acetyltransferase involved in production of Nod factors that stimulate mitosis in various plant protoplasts.. +PF01798 Putative snoRNA binding domain
Pfam-B_1362 (release 4.2). This family consists of various Pre RNA processing ribonucleoproteins. The function of the aligned region is unknown however it may be a common RNA or snoRNA or Nop1p binding domain. Nop5p (Nop58p) Swiss:Q12499 from yeast is the protein component of a ribonucleoprotein protein required for pre-18s rRNA processing and is suggested to function with Nop1p in a snoRNA complex . Nop56p Swiss:O00567 and Nop5p interact with Nop1p and are required for ribosome biogenesis . Prp31p Swiss:p49704 is required for pre-mRNA splicing in S. cerevisiae .. +PF02451 Nodulin
Pfam-B_2163 (release 5.4). Nodulin is a plant protein of unknown function. It is induced during nodulation in legume roots after rhizobium infection.. +PF01189 Nol1_Nop2_Sun;
NOL1/NOP2/sun family. +PF04135 Nucleolar RNA-binding protein, Nop10p family
Nop10p is a nucleolar protein that is specifically associated with H/ACA snoRNAs. It is essential for normal 18S rRNA production and rRNA pseudouridylation by the ribonucleoprotein particles containing H/ACA snoRNAs (H/ACA snoRNPs). Nop10p is probably necessary for the stability of these RNPs .. +PF05048 Periplasmic copper-binding protein (NosD)
Pfam-B_5499 (release 7.7). NosD is a periplasmic protein which is thought to insert copper into the exported reductase apoenzyme (NosZ) . This region forms a parallel beta helix domain.. +PF04054 CCR4-Not complex component, Not1
Pfam-B_13503 (release 7.3);. The Ccr4-Not complex is a global regulator of transcription that affects genes positively and negatively and is thought to regulate transcription factor TFIID . . +PF00066 notch;
Swissprot_feature_table. The LNR (Lin-12/Notch repeat) domain is found in three tandem copies in Notch related proteins. The structure of the domain has been determined by NMR and was shown to contain three disulphide bonds and coordinate a calcium ion. Three repeats are also found in the PAPP-A peptidase .. +PF03000 NPH3 family
Pfam-B_1584 (release 6.4). Phototropism of Arabidopsis thaliana seedlings in response to a blue light source is initiated by nonphototropic hypocotyl 1 (NPH1), a light-activated serine-threonine protein kinase. Mutations in NPH3 disrupt early signaling occurring downstream of the NPH1 photoreceptor. The NPH3 gene encodes a NPH1-interacting protein. NPH3 is a member of a large protein family, apparently specific to higher plants, and may function as an adapter or scaffold protein to bring together the enzymatic components of a NPH1-activated phosphorelay .. +PF03116 NQR2, RnfD, RnfE family
Pfam-B_2882 (release 6.5). This family of bacterial proteins includes a sodium-translocating NADH-ubiquinone oxidoreductase (i.e. a respiration linked sodium pump). In Vibrio cholerae, it negatively regulates the expression of virulence factors through inhibiting (by an unknown mechanism) the transcription of the transcriptional activator ToxT . The family also includes proteins involved in nitrogen fixation, RnfD and RnfE. The similarity of these proteins to NADH-ubiquinone oxidoreductases was previously noted .. +PF01566 Natural resistance-associated macrophage protein
Pfam-B_624 (release 4.0). The natural resistance-associated macrophage protein (NRAMP) family consists of Nramp1, Nramp2, and yeast proteins Smf1 and Smf2. The NRAMP family is a novel family of functional related proteins defined by a conserved hydrophobic core of ten transmembrane domains . This family of membrane proteins are divalent cation transporters. Nramp1 is an integral membrane protein expressed exclusively in cells of the immune system and is recruited to the membrane of a phagosome upon phagocytosis . By controlling divalent cation concentrations Nramp1 may regulate the interphagosomal replication of bacteria . Mutations in Nramp1 may genetically predispose an individual to susceptibility to diseases including leprosy and tuberculosis conversely this might however provide protection form rheumatoid arthritis . Nramp2 is a multiple divalent cation transporter for Fe2+, Mn2+ and Zn2+ amongst others it is expressed at high levels in the intestine; and is major transferrin-independent iron uptake system in mammals . The yeast proteins Smf1 and Smf2 may also transport divalent cations .. +PF03813 Nrap protein
Members of this family are nucleolar RNA-associated proteins (Nrap) which are highly conserved from yeast (Saccharomyces cerevisiae) to human. In the mouse, Nrap is ubiquitously expressed and is specifically localised in the nucleolus . Nrap is a large nucleolar protein (of more than 1000 amino acids). Nrap appears to be associated with ribosome biogenesis by interacting with pre-rRNA primary transcript .. +PF03916 Polysulphide reductase, NrfD
NrfD is an integral transmembrane protein with loops in both the periplasm and the cytoplasm. NrfD is thought to participate in the transfer of electrons, from the quinone pool into the terminal components of the Nrf pathway . . +PF02723 Non-structural protein NS3/Small envelope protein E
Pfam-B_1913 (release 5.5) & Pfam-B_7381 (release 8.0). This is a family of small non-structural proteins, well conserved among Coronavirus strains. This protein is also found in murine hepatitis virus as small envelope protein E (e.g. Swiss:O72008).. +PF02071 Aromatic-di-Alanine (AdAR) repeat
This repeat is found in NSF attachment proteins. Its structure is similar to that found in TPR repeats Pfam:PF00515.. +PF05064 Nsp1-like C-terminal region
Pfam-B_3555 (release 7.7). This family probably forms a coiled-coil . This important region of Nsp1 is involved in binding Nup82 .. +PF03146 Agrin NtA domain
Agrin is a multidomain heparan sulphate proteoglycan, that is a key organiser for the induction of postsynaptic specialisations at the neuromuscular junction. Binding of agrin to basement membranes requires the amino terminal (NtA) domain . This region mediates high affinity interaction with the coiled-coil domain of laminins. The binding of agrin to laminins via the NtA domain is subject to tissue-specific regulation. The NtA domain-containing form of agrin is expressed in non-neuronal cells or in neurons that project to non-neuronal cell such as motor neurons. The structure of this domain is an OB-fold .. +PF02136 Nuclear transport factor 2 (NTF2) domain
This family includes the NTF2-like Delta-5-3-ketosteroid isomerase proteins.. +PF00483 ADP_glu_Plase;
Nucleotidyl transferase. This family includes a wide range of enzymes which transfer nucleotides onto phosphosugars.. +PF05014 Nucleoside 2-deoxyribosyltransferase
Nucleoside 2-deoxyribosyltransferase EC:2.4.2.6 catalyses the cleavage of the glycosidic bonds of 2`-deoxyribonucleosides .. +PF03825 Nucleoside H+ symporter
TIGRFAMs, Griffiths-Jones SR. +PF02321 Outer membrane efflux protein
The OEP family (Outer membrane efflux protein) form trimeric channels that allow export of a variety of substrates in Gram negative bacteria.\. Each member of this family is composed of two repeats. The trimeric channel is composed of a 12 stranded all beta sheet barrel that spans the outer membrane, and a long all helical barrel that spans the periplasm.. +PF02265 Nuclease;
Pfam-B_2480 (release 5.2). This family contains both S1 and P1 nucleases (EC:3.1.30.1) which cleave RNA and single stranded DNA with no base specificity.. +PF03066 Nucleoplasmin
Pfam-B_2930 (release 6.4). Nucleoplasmins are also known as chromatin decondensation proteins. They bind to core histones and transfer DNA to them in a reaction that requires ATP. This is thought to play a role in the assembly of regular nucleosomal arrays.. +PF03177 Nucleoporin;
Non-repetitive/WGA-negative nucleoporin C-terminal. Pfam-B_1212 (release 6.5). This is the C-termainl half of a family of nucleoporin proteins. Nucleoporins are the main components of the nuclear pore complex in eukaryotic cells, and mediate bidirectional nucleocytoplasmic transport, especially of mRNA and proteins. Two nucleoporin classes are known: one is characterised by the FG repeat Pfam:PF03093; the other is represented by this family, and lacks any repeats. RNA undergoing nuclear export first encounters the basket of the nuclear pore and many nucleoporins are accessible on the basket side of the pore .. +PF01773 Nucleoside_tra2;
Na+ dependent nucleoside transporter N-terminus. This family consists of nucleoside transport proteins. Swiss:Q62773 is a purine-specific Na+-nucleoside cotransporter localised to the bile canalicular membrane . Swiss:Q62674 is a a Na+-dependent nucleoside transporter selective for pyrimidine nucleosides and adenosine it also transports the anti-viral nucleoside analogues AZT and ddC . This alignment covers the N terminus of this family. +PF03800 Nuf2 family
Members of this family are components of the mitotic spindle. It has been shown that Nuf2 from yeast is part of a complex called the Ndc80p complex . This complex is thought to bind to the microtubules of the spindle. An arabidopsis protein has been included in this family that has previously not been identified as a member of this family, Swiss:Q9C953. The match is not strong, but in common with other members of this family contains coiled-coil to the C terminus of this region.. +PF04121 Nuclear pore protein 84 / 107
Pfam-B_13117 (release 7.3);. Nup84p forms a complex with five proteins, of which Nup120p, Nup85p, Sec13p, and a Sec13p homologues. This Nup84p complex in conjunction with Sec13-type proteins is required for correct nuclear pore biogenesis .. +PF01029 NusB family
The NusB protein is involved in the regulation of rRNA biosynthesis by transcriptional antitermination. . +PF02357 Transcription termination factor nusG
Pfam-B_697 (release 5.2). +PF04277 Oxaloacetate decarboxylase, gamma chain
TIGRFAMs (release 2.0);. +PF03977 OadB_MmdB;
Na+-transporting oxaloacetate decarboxylase beta subunit. Members of this family are integral membrane proteins. The decarboxylation reactions they catalyse are coupled to the vectorial transport of Na+ across the cytoplasmic membrane, thereby creating a sodium ion motive force that is used for ATP synthesis .. +PF04225 Opacity-associated protein A LysM-like domain
This family includes the Haemophilus influenzae opacity-associated protein. This protein is required for efficient nasopharyngeal mucosal colonisation, and its expression is associated with a distinctive transparent colony phenotype. OapA is thought to be a secreted protein, and its expression exhibits high-frequency phase variation [1,2]. This is a LysM-like domain.. +PF03373 Octapeptide repeat
This octapeptide repeat is found in several bacterial proteins. The function of this repeat is unknown.. +PF02317 NAD/NADP octopine/nopaline dehydrogenase, alpha-helical domain
Pfam-B_9653 (release 5.2). This group of enzymes act on the CH-NH substrate bond using NAD(+) or NADP(+) as an acceptor. The Pfam family consists mainly of octopine and nopaline dehydrogenases from Ti plasmids.. +PF02101 Ocular albinism type 1 protein
+PF02100 Ornithine decarboxylase antizyme
IPR002993 & Pfam-B_34796 (release 7.7). This family consists of ornithine decarboxylase antizyme proteins. The polyamine biosynthetic enzyme ornithine decarboxylase (ODC) is degraded by the 26 S proteasome via a ubiquitin-independent pathway. Its degradation is greatly accelerated by association with the polyamine-induced regulatory protein antizyme 1 (AZ1) .. +PF02423 ODC_Mu_crystall;
Ornithine cyclodeaminase/mu-crystallin family. Pfam-B_1960 (release 5.4). This family contains the bacterial Ornithine cyclodeaminase enzyme EC:4.3.1.12, which catalyses the deamination of ornithine to proline . This family also contains mu-Crystallin the major component of the eye lens in several Australian marsupials, mRNA for this protein has also been found in human retina .. +PF02159 Oestrogen receptor
+PF04664 Opioid growth factor receptor (OGFr) conserved region
Pfam-B_4529 (release 7.5). Opioid peptides act as growth factors in neural and non-neural cells and tissues, in addition to serving in neurotransmission/neuromodulation in the nervous system. The Opioid growth factor receptor is an integral membrane protein associated with the nucleus. The conserved region is situated at the N-terminus of the member proteins with a series of imperfect repeats lying immediately to its C-terminus .. +PF04680 OGFr_repeat;
Opioid growth factor receptor repeat. Pfam-B_4529 (release 7.5). Proline-rich repeat found only in a human opioid growth factor receptor . . +PF04606 Ogr/Delta-like zinc finger
Pfam-B_5059 (release 7.5). This is a viral family of phage zinc-binding transcriptional activators, which also contains cryptic members in some bacterial genomes . The P4 phage delta protein contains two such domains attached covalently, while the P2 phage Ogr proteins possess one domain but function as dimers. All the members of this family have the following consensus sequence: C-X(2)-C-X(3)-A-(X)2-R-X(15)-C-X(4)-C-X(3)-F . This family also includes zinc fingers in recombinase proteins.. +PF01276 Orn/Lys/Arg decarboxylase, major domain
+PF03711 Orn/Lys/Arg decarboxylase, C-terminal domain
+PF03709 Orn/Lys/Arg decarboxylase, N-terminal domain
This domain has a flavodoxin-like fold, and is termed the "wing" domain because of its position in the overall 3D structure.. +PF01277 Oleosin
+PF02191 Olfactomedin-like domain
Alignment kindly provided by SMART. +PF00691 OmpA family
Pfam-B_166 (release 2.1). The Pfam entry also includes MotB and related proteins which are not included in the Prosite family.. +PF01389 OmpA-like transmembrane domain
The structure of OmpA transmembrane domain shows that it consists of an eight stranded beta barrel . This family includes some other distantly related outer membrane proteins with low scores.. +PF03938 Outer membrane protein (OmpH-like)
This family includes outer membrane proteins such as OmpH among others. Skp (OmpH) has been characterised as a molecular chaperone that interacts with unfolded proteins as they emerge in the periplasm from the Sec translocation machinery .. +PF01278 Omptin family
The omptin family is a family of serine proteases.. +PF03922 OmpW family
This family includes outer membrane protein W (OmpW) proteins from a variety of bacterial species. This protein may form the receptor for S4 colicins in E. coli .. +PF03532 OMS28 porin
+PF02462 Opacity family porin protein
Pfam-B_2356 (release 5.4). Pathogenic Neisseria spp. possess a repertoire of phase-variable Opacity proteins that mediate various pathogen--host cell interactions . These proteins are integral membrane proteins related to other porins.. +PF01160 Vertebrate endogenous opioids neuropeptide
+PF04966 Carbohydrate-selective porin, OprB family
+PF03573 Peptidase_S43;
outer membrane porin, OprD family. This family includes outer membrane proteins related to OprD. OprD has been described as a serine type peptidase . However the proposed catalytic residues are not conserved suggesting that many of these proteins are not peptidases.. +PF03169 OPT oligopeptide transporter protein
Pfam-B_3048 (release 6.5). The OPT family of oligopeptide transporters is distinct from the ABC Pfam:PF00005 and PTR Pfam:PF00854 transporter families. OPT transporters were first recognised in fungi (Candida albicans and Schizosaccharomyces pombe), but this alignment also includes orthologues from Arabidopsis thaliana. OPT transporters are thought to have 12-14 transmembrane domains and contain the following motif: SPYxEVRxxVxxxDDP .. +PF04069 Substrate binding domain of ABC-type glycine betaine transport system
Part of a high affinity multicomponent binding-protein-dependent transport system involved in bacterial osmoregulation. This domain is often fused to the permease component of the transporter complex. Family members are often integral membrane proteins or predicted to be attached to the membrane by a lipid anchor. Glycine betaine is involved in protection from high osmolarity environments for example in Bacillus subtilis . The family member OpuBC is closely related, and involved in choline transport. Choline is necessary for the biosynthesis of glycine betaine . L-carnitine is important for osmoregulation in Listeria monocytogenes. Family also contains proteins binding l-proline (ProX), histidine (HisX) and taurine (TauA).. +PF01718 Orbivirus non-structural protein NS1, or hydrophobic tubular protein
Pfam-B_1752 (release 4.1). This family consists of orbivirus non-structural protein NS1, or hydrophobic tubular protein. NS1 has no specific function in virus replication, it is however thought to play a role in transport of mature virus particles from virus inclusion bodies to the cell membrane . Orbivirus are part of the larger reoviridae which have a dsRNA genome of at least 10 segments encoding at least 10 viral proteins ; orbivirus found in this family include bluetongue virus, and African horsesickness virus.. +PF01616 Orbivirus NS3
Pfam-B_1029 (release 4.1). The function of this Orbivirus non structural protein is uncertain. However it may play a role on release of the virus from infected cells .. +PF00898 Orbivirus outer capsid protein VP2
Pfam-B_1525 (release 2.1). VP2 acts as an anchor for VP1 and VP3. VP2 contains a non-specific DNA and RNA binding domain in the N-terminus .. +PF01700 Orbivirus VP3 (T2) protein
Pfam-B_1622 (release 4.1). The orbivirus VP3 protein is part of the virus core and makes a 'subcore' shell made up of 120 copies of the 100K protein . VP3 particles can also bind RNA and are fundamental in the early stages of viral core formation . Also found in the family is structural core protein VP2 from broadhaven virus which is similar to VP3 in bluetongue virus . Orbivirus are part of the larger reoviridae which have a dsRNA genome of 10-12 linear segments ; orbivirus found in this family include bluetongue virus and epizootic hemorrhagic disease virus. . +PF05059 Orbivirus VP4 core protein
Pfam-B_5992 (release 7.7). Orbiviruses are double stranded RNA retroviruses of which the bluetongue virus is a member. The core of bluetongue virus (BTV) is a multienzyme complex composed of two major proteins (VP7 and VP3) and three minor proteins (VP1, VP4 and VP6) in addition to the viral genome. VP4 has been shown to perform all RNA capping activities and has both methyltransferase type 1 and type 2 activities associated with it .. +PF00901 Orbivirus outer capsid protein VP5
Pfam-B_1525 (release 2.1). cryoelectron microscopy indicates that VP5 is a trimer implying that there are 360 copies of VP5 per virion . . +PF01516 Orbivirus helicase VP6
Pfam-B_765 (release 4.0). The VP6 protein a minor protein in the core of the virion is probably the viral helicase .. +PF00897 Orbivirus inner capsid protein VP7
Pfam-B_1523 (release 2.1). In BTV, 260 trimers of VP7 are found in the core. The major proteins of the core are VP7 and VP3. VP7 forms an outer layer around VP3 .. +PF02072 Prepro-orexin
+PF03827 Orexin receptor type 2
+PF02999 Mlp; Orf-D;
Borrelia orf-D family. Pfam-B_1511 (release 6.4). Borrelia burgdorferi supercoiled plasmids encode multicopy tandem open reading frames called Orf-A, Orf-B, Orf-C and Orf-D. This family corresponds to Orf-D. The putative product of this gene has no known function.. +PF04160 Orf-X;
Pfam-B_3014 (release 7.3). +PF04061 ORMDL family
Pfam-B_4871 (release 7.3);. Evidence form suggests that ORMDLs are involved in protein folding in the ER. Orm proteins have been identified as negative regulators of sphingolipid synthesis that form a conserved complex with serine palmitoyltransferase, the first and rate-limiting enzyme in sphingolipid production. This novel and conserved protein complex, has been termed the SPOTS complex (serine palmitoyltransferase, Orm1/2, Tsc3, and Sac1).. +PF02784 Pyridoxal-dependent decarboxylase, pyridoxal binding domain
These pyridoxal-dependent decarboxylases acting on ornithine, lysine, arginine and related substrates This domain has a TIM barrel fold.. +PF00278 Pyridoxal-dependent decarboxylase, C-terminal sheet domain
These pyridoxal-dependent decarboxylases act on ornithine, lysine, arginine and related substrates. . +PF02088 Ornatin
+PF02250 35kD major secreted virus protein
Pfam-B_3549 (release 5.2). This family of orthopoxvirus secreted proteins (also known as T1 and A41) interact with members of both the CC and CXC superfamilies of chemokines. It has been suggested that these secreted proteins modulate leukocyte influx into virus-infected tissues .. +PF00213 ATP synthase delta (OSCP) subunit
The ATP D subunit from E. coli is the same as the OSCP subunit which is this family. The ATP D subunit from metazoa are found in family Pfam:PF00401.. +PF02566 OsmC-like protein
Pfam-B_2694 (release 7.0). Osmotically inducible protein C (OsmC) (Swiss:P23929) is a stress -induced protein found in E. Coli. This family also contains a organic hydroperoxide detoxification protein (Swiss:O68390) that has a novel pattern of oxidative stress regulation .. +PF03207 Borrelia outer surface protein D (OspD)
Pfam-B_2915 (release 6.5). +PF02471 OspEF;
Borrelia outer surface protein E. Pfam-B_962 (release 5.4). This is a family of outer surface proteins (Osp) from the Borrelia spirochete . The family includes OspE, and OspEF-related proteins (Erp) . These proteins are coded for on different circular plasmids in the Borrelia genome.. +PF03968 OstA-like protein
This family of proteins are mostly uncharacterised. However the family does include E. coli OstA Swiss:P31554 that has been characterised as an organic solvent tolerance protein .. +PF04453 Organic solvent tolerance protein
Family involved in organic solvent tolerance in bacteria. The region contains several highly conserved, potentially catalytic, residues .. +PF00865 Osteopontin
Pfam-B_1593 (release 2.1). +PF00185 Aspartate/ornithine carbamoyltransferase, Asp/Orn binding domain
+PF02729 Aspartate/ornithine carbamoyltransferase, carbamoyl-P binding domain
+PF02338 OTU-like cysteine protease
This family is comprised of a group of predicted cysteine proteases, homologous to the Ovarian Tumour (OTU) gene in Drosophila. Members include proteins from eukaryotes, viruses and pathogenic bacterium. The conserved cysteine and histidine, and possibly the aspartate, represent the catalytic residues in this putative group of proteases.. +PF00724 oxidored_FMN;
NADH:flavin oxidoreductase / NADH oxidase family. Pfam-B_642 (release 2.1). +PF00174 oxidored_molyb;
Oxidoreductase molybdopterin binding domain. This domain is found in a variety of oxidoreductases. This domain binds to a molybdopterin cofactor. Xanthine dehydrogenases, that also bind molybdopterin, have essentially no similarity.. +PF00148 oxidored_nitro;
Nitrogenase component 1 type Oxidoreductase. +PF00361 oxidored_q1;
NADH-Ubiquinone/plastoquinone (complex I), various chains. Pfam-B_4 (release 1.0). This family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane.. +PF00662 oxidored_q1_N;
NADH-Ubiquinone oxidoreductase (complex I), chain 5 N-terminus. Pfam-B_22 (release 2.1). This sub-family represents an amino terminal extension of Pfam:PF00361. Only NADH-Ubiquinone chain 5 and eubacterial chain L are in this family. This sub-family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane.. +PF00420 oxidored_q2;
NADH-ubiquinone/plastoquinone oxidoreductase chain 4L. Pfam-B_193 (release 1.0). +PF00499 oxidored_q3;
NADH-ubiquinone/plastoquinone oxidoreductase chain 6. Pfam-B_61 (release 1.0). +PF00507 oxidored_q4;
NADH-ubiquinone/plastoquinone oxidoreductase, chain 3. Pfam-B_68 (release 1.0). +PF01059 oxidored_q5_N;
NADH-ubiquinone oxidoreductase chain 4, amino terminus. Pfam-B_381 (release 3.0). +PF01058 oxidored_q6;
NADH ubiquinone oxidoreductase, 20 Kd subunit. Pfam-B_1345 (release 3.0). +PF01237 Oxysterol-binding protein
+PF00543 Nitrogen regulatory protein P-II
P-II modulates the activity of glutamine synthetase.. +PF04275 Phosphomevalonate kinase
TIGRFAMs (release 2.0);. Phosphomevalonate kinase (EC:2.7.4.2) catalyses the phosphorylation of 5-phosphomevalonate into 5-diphosphomevalonate, an essential step in isoprenoid biosynthesis via the mevalonate pathway . This family represents the animal type of the enzyme. The other is the ERG8 type, found in plants and fungi, and some bacteria (see Pfam:PF00288).. +PF04699 p16_Arc;
ARP2/3 complex 16 kDa subunit (p16-Arc). Pfam-B_4180 (release 7.5). The Arp2/3 protein complex has been implicated in the control of actin polymerisation. The human complex consists of seven subunits which include the actin related proteins Arp2 and Arp3, and five others referred to as p41-Arc, p34-Arc, p21-Arc, p20-Arc, and p16-Arc. The precise function of p16-Arc is currently unknown. Its structure consists of a single domain containing a bundle of seven alpha helices [1,2].. +PF00864 ATP P2X receptor
Pfam-B_1590 (release 2.1). +PF04045 p34-Arc;
Arp2/3 complex, 34 kD subunit p34-Arc. Pfam-B_9846 (release 7.3);. Arp2/3 protein complex has been implicated in the control of actin polymerisation in cells. The human complex consists of seven subunits which include the actin related Arp2 and Arp3, and five others referred to as p41-Arc, p34-Arc, p21-Arc, p20-Arc, and p16-Arc . This family represents the p34-Arc subunit.. +PF00067 Cytochrome P450
Overington and HMM_iterative_training. Cytochrome P450s are haem-thiolate proteins involved in the oxidative degradation of various compounds. They are particularly well known for their role in the degradation of environmental toxins and mutagens. They can be divided into 4 classes, according to the method by which electrons from NAD(P)H are delivered to the catalytic site. Sequence conservation is relatively low within the family - there are only 3 absolutely conserved residues - but their general topography and structural fold are highly conserved. The conserved core is composed of a coil termed the 'meander', a four-helix bundle, helices J and K, and two sets of beta-sheets. These constitute the haem-binding loop (with an absolutely conserved cysteine that serves as the 5th ligand for the haem iron), the proton-transfer groove and the absolutely conserved EXXR motif in helix K. While prokaryotic P450s are soluble proteins, most eukaryotic P450s are associated with microsomal membranes. their general enzymatic function is to catalyse regiospecific and stereospecific oxidation of non-activated hydrocarbons at physiological temperatures .. +PF00870 P53 DNA-binding domain
Pfam-B_782 (release 3.0). This family contains one anomalous member, viz: Zea mays (Q6JAD8). This sequence is identical to human P53 and would appear to be a a human contaminant within the Zea mays sampling effort.. +PF04636 PA26 p53-induced protein (sestrin)
Pfam-B_5416 (release 7.5). PA26 is a p53-inducible protein. Its function is unknown. It has similarity to Pfam:PF04636 in its N-terminus.. +PF02251 Proteasome activator pa28 alpha subunit
Pfam-B_2837 (release 5.2). PA28 activator complex (also known as 11s regulator of 20S proteasome) is a ring shaped hexameric structure of alternating alpha and beta subunits. This family represents the alpha subunit. The activator complex binds to the 20S proteasome ana simulates peptidase activity in and ATP-independent manner.. +PF02252 Proteasome activator pa28 beta subunit
Pfam-B_2809 (release 5.2). PA28 activator complex (also known as 11s regulator of 20S proteasome) is a ring shaped hexameric structure of alternating alpha and beta subunits. This family represents the beta subunit. The activator complex binds to the 20S proteasome ana simulates peptidase activity in and ATP-independent manner.. +PF05138 Phenylacetic acid catabolic protein
This family includes proteins such as PaaA and PaaC that are part of a catabolic pathway of phenylacetic acid . These proteins may form part of a dioxygenase complex.. +PF02758 PAAD_DAPIN;
PAAD/DAPIN/Pyrin domain. This domain is predicted to contain 6 alpha helices and to have the same fold as the Pfam:PF00531 domain. This similarity may mean that this is a protein-protein interaction domain.. +PF00658 Poly-adenylate binding protein, unique domain
The region featured in this family is found towards the C-terminus of poly(A)-binding proteins (PABPs). These are eukaryotic proteins that, through their binding of the 3' poly(A) tail on mRNA, have very important roles in the pathways of gene expression. They seem to provide a scaffold on which other proteins can bind and mediate processes such as export, translation and turnover of the transcripts. Moreover, they may act as antagonists to the binding of factors that allow mRNA degradation, regulating mRNA longevity. PABPs are also involved in nuclear transport. PABPs interact with poly(A) tails via RNA-recognition motifs (Pfam:PF00076) . Note that the PABP C-terminal region is also found in members of the hyperplastic discs protein (HYD) family of ubiquitin ligases that contain HECT domains - these are also included in this family.. +PF03068 Protein-arginine deiminase (PAD)
Pfam-B_2195 (release 6.4). Members of this family are found in mammals. In the presence of calcium ions, PAD enzymes EC:3.5.3.15 catalyse the post-translational modification reaction responsible for the formation of citrulline residues: Protein L-arginine + H2O <=> Protein L-citrulline + NH3. Several types are recognised (and included in the family) on the basis of molecular mass, substrate specificity, and tissue localisation. The expression of type I PAD is known to be under the control of oestrogen .. +PF04371 Porphyromonas-type peptidyl-arginine deiminase
Peptidyl-arginine deiminase (PAD) enzymes catalyse the deimination of the guanidino group from carboxy-terminal arginine residues of various peptides to produce ammonia. PAD from Porphyromonas gingivalis (PPAD) appears to be evolutionarily unrelated to mammalian PAD (Pfam:PF03068), which is a metalloenzyme. PPAD is thought to belong to the same superfamily as aminotransferase and arginine deiminase, and to form an alpha/beta propeller structure. This family has previously been named PPADH (Porphyromonas peptidyl-arginine deiminase homologues) . The predicted catalytic residues in PPAD (Swiss:Q9RQJ2) are Asp130, Asp187, His236, Asp238 and Cys351 . These are absolutely conserved with the exception of Asp187 which is absent in two family members. PPAD is also able to catalyse the deimination of free L-arginine, but has primarily peptidyl-arginine specificity. It may have a FMN cofactor .. +PF03551 Transcriptional regulator PadR-like family
Pfam-B_1014 (release 7.0). Members of this family are transcriptional regulators that appear to be related to the Pfam:PF01047 family. This family includes PadR Swiss:Q9EXE6 a protein that is involved in negative regulation of phenolic acid metabolism.. +PF03283 Pectinacetylesterase
Pfam-B_1589 (release 6.5). +PF03403 Platelet-activating factor acetylhydrolase, isoform II
Pfam-B_3469 (release 6.6). Platelet-activating factor acetylhydrolase (PAF-AH) is a subfamily of phospholipases A2, responsible for inactivation of platelet-activating factor through cleavage of an acetyl group. Three known PAF-AHs are the brain heterotrimeric PAF-AH Ib, whose catalytic beta and gamma subunits are aligned in Pfam:PF02266, the extracellular, plasma PAF-AH (pPAF-AH), and the intracellular PAF-AH isoform II (PAF-AH II). This family aligns pPAF-AH and PAF-AH II, whose similarity was previously noted.. +PF03985 Paf1
Pfam-B_ (release 7.3). Members of this family are components of the RNA polymerase II associated Paf1 complex. The Paf1 complex functions during the elongation phase of transcription in conjunction with Spt4-Spt5 and Spt16-Pob3i [1,2]. . +PF02671 Paired amphipathic helix repeat
Pfam-B_281 (release 5.4). This family contains the paired amphipathic helix repeat. The family contains the yeast SIN3 gene Swiss:P22579 (also known as SDI1) that is a negative regulator of the yeast HO gene . This repeat may be distantly related to the helix-loop-helix motif, which mediate protein-protein interactions.. +PF00221 PAL;
Aromatic amino acid lyase. This family includes proteins with phenylalanine ammonia-lyase, EC:4.3.1.24, histidine ammonia-lyase, EC:4.3.1.3, and tyrosine aminomutase, EC:5.4.3.6, activities [1-3].. +PF02089 Palmitoyl protein thioesterase
+PF02569 Pantoate-beta-alanine ligase
Pantoate-beta-alanine ligase, also know as pantothenate synthase, (EC:6.3.2.1) catalyses the formation of pantothenate from pantoate and alanine .. +PF02548 Ketopantoate hydroxymethyltransferase
Ketopantoate hydroxymethyltransferase (EC:2.1.2.11) is the first enzyme in the pantothenate biosynthesis pathway.. +PF02711 E4 protein
Pfam-B_1589 (release 5.5). This is is a family of Papillomavirus proteins, E4, coded for by ORF4. A splice variant, E1--E4, exists but neither the function of E4 or E1--E4 is known .. +PF04755 PAP_fibrillin
Pfam-B_3698 (release 7.5). This family identifies a conserved region found in a number of plastid lipid-associated proteins (PAPs), and in a number of putative fibrillin proteins.. +PF04926 Poly(A) polymerase predicted RNA binding domain
Pfam-B_1341 (release 7.6). Based on its similarity structurally to the RNA recognition motif this domain is thought to be RNA binding .. +PF04795 PAPA-1-like conserved region
Pfam-B_6501 (release 7.5). Family of proteins with a conserved region found in PAPA-1, a PAP-1 binding protein. . +PF03333 Adhesin biosynthesis transcription regulatory protein
Pfam-B_3068 (release 6.5). This family includes PapB, DaaA, FanA, FanB, and AfaA.. +PF03628 PapG chaperone-binding domain
Pfam-B_3074 (release 7.0). PapG, the adhesin of the P-pili, is situated at the tip and is only a minor component of the whole pilus structure. A two-domain structure has been postulated for PapG; a carbohydrate binding N-terminus and chaperone binding C-terminus (this domain). The chaperone-binding domain is highly conserved, and is essential for the correct assembly of the pili structure when aided by the chaperone molecule PapD [1,2]. . +PF03627 PapG carbohydrate binding domain
Pfam-B_3074 (release 7.0). PapG, the adhesin of the P-pili, is situated at the tip and is only a minor component of the whole pilus structure. A two-domain structure has been postulated for PapG; a carbohydrate binding N-terminus (this domain) and chaperone binding C-terminus. The carbohydrate-binding domain interacts with the receptor glycan [1,2].. +PF03025 Papillomavirus E5
Pfam-B_1916 (release 6.4). The E5 protein from papillomaviruses is about 80 amino acids long. The proteins are contain three regions that are predicted to be transmembrane alpha helices. The function of this protein is unknown.. +PF02380 T-antigen specific domain
Pfam-B_1131 (release 5.2). This domain represents a conserved region in papovavirus small and middle T-antigens. It is found as the N-terminal domain in the small T-antigen, and is centrally located in the middle T-antigen.. +PF01507 Phosphoadenosine phosphosulfate reductase family
Pfam-B_590 (release 4.0). This domain is found in phosphoadenosine phosphosulfate (PAPS) reductase enzymes or PAPS sulfotransferase. PAPS reductase is part of the adenine nucleotide alpha hydrolases superfamily also including N type ATP PPases and ATP sulphurylases . The enzyme uses thioredoxin as an electron donor for the reduction of PAPS to phospho-adenosine-phosphate (PAP) [1,2]. It is also found in NodP nodulation protein P from Rhizobium which has ATP sulfurylase activity (sulfate adenylate transferase) . . +PF03285 Paralemmin
Pfam-B_4064 (release 6.5). +PF01508 Paramecium surface antigen domain
This domain is a cysteine rich extracellular repeat found in surface antigens of Paramecium. The domain contains 8 cysteine residues. . +PF03210 Paramyx_P_V;
Paramyxovirus P/V phosphoprotein C-terminal. Pfam-B_2037 (release 6.5). Paramyxoviridae P genes are able to generate more than one product, using alternative reading frames and RNA editing. The P gene encodes the structural phosphoprotein P. In addition, it encodes several non-structural proteins present in the infected cell but not in the virus particle. This family includes phosphoprotein P and the non-structural phosphoprotein V from different paramyxoviruses. Phosphoprotein P is essential for the activity of the RNA polymerase complex which it forms with another subunit, L Pfam:PF00946. Although all the catalytic activities of the polymerase are associated with the L subunit, its function requires specific interactions with phosphoprotein P . The P and V phosphoproteins are amino co-terminal, but diverge at their C-termini. This difference is generated by an RNA-editing mechanism in which one or two non-templated G residues are inserted into P-gene-derived mRNA. In measles virus and Sendai virus, one G residue is inserted and the edited transcript encodes the V protein. In mumps, simian virus type 5 and Newcastle disease virus, two G residues are inserted, and the edited transcript codes for the P protein . Being phosphoproteins, both P and V are rich in serine and threonine residues over their whole lengths. In addition, the V proteins are rich in cysteine residues at the C-termini . This C-terminal region of the P phosphoprotein is likely to be the nucleocapsid-binding domain, and is found to be intrinsically disordered and thus liable to induced folding .. +PF00946 Paramyx_RNA_pol;
Mononegavirales RNA dependent RNA polymerase . Pfam-B_586 (release 3.0). Members of the Mononegavirales including the Paramyxoviridae, like other non-segmented negative strand RNA viruses, have an RNA-dependent RNA polymerase composed of two subunits, a large protein L and a phosphoprotein P.\. This is a protein family of the L protein. The L protein confers the RNA polymerase activity on the complex. The P protein acts as a transcription factor .. +PF01692 Paramyxovirus non-structural protein c
Pfam-B_1202 (release 4.1). This family consist of the C proteins (C', C, Y1, Y2) found in Paramyxovirinae; human parainfluenza, and sendai virus. The C proteins effect viral RNA synthesis having both a positive and negative effect during the course of infection . Paramyxovirus have a negative strand ssRNA genome of 15.3kb form which six mRNAs are transcribed, five of these are monocistronic.\. The P/C mRNA is polycistronic and has two overlapping open reading frames P and C, C encodes the nested C proteins C', C, Y1 and Y2 .. +PF00973 Paramyx_ncap;
Paramyxovirus nucleocapsid protein. Pfam-B_158 (release 3.0). The nucleocapsid protein is referred to as NP. NP is is the major structural component of the nucleocapsid. The protein is approx. 58 kDa. 2600 NP molecules go to tightly encapsidate the RNA. NP interacts with several other viral encoded proteins, all of which are involved in controlling replication. {NP-NP, NP-P, NP-(PL), and NP-V}[1,2,3].. +PF02725 Non-structural protein C
Pfam-B_1636 (release 5.5). This family consists of the polymerase accessory protein C from members of the paramyxoviridae.. +PF01806 Paramyxovirinae P phosphoprotein C-terminal region
Pfam-B_1628 (release 4.1), Karlin D. The subfamily Paramyxovirinae of the family Paramyxoviridae now contains as main genera the Rubulaviruses, avulaviruses, respiroviruses, Henipavirus-es and morbilliviruses. Protein P is the best characterised, structurally of the replicative complex of N, P and L proteins and consists of two functionally distinct moieties, an N-terminal PNT, and a C-terminal PCT . The P protein is an essential part of the viral RNA polymerase complex formed from the P and L proteins . P protein plays a crucial role in the enzyme by positioning L onto the N/RNA template through an interaction with the C-terminal domain of N. Without P, L is not functional.The C-terminal part of P (PCT) is only functional as an oligomer and forms with L the polymerase complex. PNT is poorly conserved and unstructured in solution while PCT contains the oligomerisation domain (PMD) that folds as a homotetrameric coiled coil (40) containing the L binding region and a C-terminal partially folded domain, PX (residues 474 to 568), identified as the nucleocapsid binding site. Interestingly, PX is also expressed as an independent polypeptide in infected cells. PX has a C-subdomain (residues 516 to 568) that consists of three {alpha}-helices arranged in an antiparallel triple-helical bundle linked to an unfolded flexible N-subdomain (residues 474 to 515).. +PF01279 Parathyroid hormone family
+PF02195 ParB-like nuclease domain
Alignment kindly provided by SMART. +PF00644 Poly(ADP-ribose) polymerase catalytic domain
Poly(ADP-ribose) polymerase catalyses the covalent attachment of ADP-ribose units from NAD+ to itself and to a limited number of other DNA binding proteins, which decreases their affinity for DNA. Poly(ADP-ribose) polymerase is a regulatory component induced by DNA damage. The carboxyl-terminal region is the most highly conserved region of the protein. Experiments have shown that a carboxyl 40 kDa fragment is still catalytically active .. +PF02877 Poly(ADP-ribose) polymerase, regulatory domain
Poly(ADP-ribose) polymerase catalyses the covalent attachment of ADP-ribose units from NAD+ to itself and to a limited number of other DNA binding proteins, which decreases their affinity for DNA. Poly(ADP-ribose) polymerase is a regulatory component induced by DNA damage. The carboxyl-terminal region is the most highly conserved region of the protein. Experiments have shown that a carboxyl 40 kDa fragment is still catalytically active .. +PF01358 Poly A polymerase regulatory subunit
+PF00740 Parvovirus coat protein VP2
Pfam-B_436 (release 2.1) & Pfam-B_445 (release 3.0). This protein, together with VP1 forms a capsomer. Both of these proteins are formed from the same transcript using alternative splicing.\. As a result, VP1 and VP2 differ only in the N-terminal region of VP1.\. VP2 is involved in packaging the viral DNA.. +PF01057 Parvovirus non-structural protein NS1
Pfam-B_400 (release 3.0). This family also contains the NS2 protein. Parvoviruses encode two non-structural proteins, NS1 and NS2. The mRNA for NS2 contains the coding sequence for the first 87 amino acids of NS1, then by an alternative splicing mechanism mRNA from a different reading frame, encoding the last 78 amino acids, makes up the full length of the NS2 mRNA . NS1, is the major non-structural protein. It is essential for DNA replication. It is an 83-kDa nuclear phosphoprotein. It has DNA helicase and ATPase activity . . +PF00989 PAS fold
Sequences from SMART alignment. The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs . The PAS fold appears in archaea, eubacteria and eukarya.. +PF03793 PASTA domain
This domain is found at the C termini of several Penicillin-binding proteins and bacterial serine/threonine kinases . It binds the beta-lactam stem, which implicates it in sensing D-alanyl-D-alanine - the PBP transpeptidase substrate. It is a small globular fold consisting of 3 beta-sheets and an alpha-helix. The name PASTA is derived from PBP and Serine/Threonine kinase Associated domain.. +PF00292 'Paired box' domain
+PF03535 Paxillin family
+PF03717 Penicillin-binding Protein dimerisation domain
This domain is found at the N terminus of Class B High Molecular Weight Penicillin-Binding Proteins. Its function has not been precisely defined, but is strongly implicated in PBP polymerisation. The domain forms a largely disordered 'sugar tongs' structure.. +PF01395 PBP/GOBP family
Pfam-B_1765 (release 3.0). The olfactory receptors of terrestrial animals exist in an aqueous environment, yet detect odorants that are primarily hydrophobic. The aqueous solubility of hydrophobic odorants is thought to be greatly enhanced via odorant binding proteins which exist in the extracellular fluid surrounding the odorant receptors . This family is composed of pheromone binding proteins (PBP), which are male-specific and associate with pheromone-sensitive neurons and general-odorant binding proteins (GOBP).. +PF00427 Phycobilisome Linker polypeptide
Pfam-B_159 (release 1.0). +PF03792 PBX;
Pfam-B_3021 (release 7.0). The PBC domain is a member of the TALE (three-amino-acid loop extension) superclass of homeodomain proteins . . +PF02229 Transcriptional Coactivator p15 (PC4)
Pfam-B_6534 (release 5.2). p15 has a bipartite structure composed of an amino-terminal regulatory domain and a carboxy-terminal cryptic DNA-binding domain . The DNA-binding activity of the carboxy-terminal is disguised by the amino-terminal p15 domain. Activity is controlled by protein kinases that target the regulatory domain.. +PF01851 Proteasome/cyclosome repeat
+PF01135 Protein-L-isoaspartate(D-aspartate) O-methyltransferase (PCMT)
+PF00705 PCNA;
Proliferating cell nuclear antigen, N-terminal domain. Pfam-B_598 (release 2.1). N-terminal and C-terminal domains of PCNA are topologically identical. Three PCNA molecules are tightly associated to form a closed ring encircling duplex DNA.. +PF02747 Proliferating cell nuclear antigen, C-terminal domain
Pfam-B_598 (release 2.1). N-terminal and C-terminal domains of PCNA are topologically identical. Three PCNA molecules are tightly associated to form a closed ring encircling duplex DNA.. +PF02429 Peridinin-chlorophyll A binding protein
Pfam-B_2945 (release 5.4). Peridinin-chlorophyll-protein, a water-soluble light-harvesting complex that has a blue-green absorbing carotenoid as its main pigment, is present in most photosynthetic dinoflagellates. These proteins are composed of two similar repeated domains. These domains constitute a scaffold with pseudo-twofold symmetry surrounding a hydrophobic cavity filled by two lipid, eight peridinin, and two chlorophyll a molecules .. +PF01884 PcrB family
This family contains proteins that are related to PcrB Swiss:Q53726. The function of these proteins is unknown.. +PF04194 Programmed cell death protein 2, C-terminal putative domain
Pfam-B_19053 (release 7.3);. +PF04868 Retinal cGMP phosphodiesterase, gamma subunit
Pfam-B_4858 (release 7.6). Retinal rod and cone cGMP phosphodiesterases function as the effector enzymes in the vertebrate visual transduction cascade. This family represents the inhibitory gamma subunit , which is also expressed outside retinal tissues and has been shown to interact with the G-protein-coupled receptor kinase 2 signalling system to regulate the epidermal growth factor- and thrombin-dependent stimulation of p42/p44 mitogen-activated protein kinase in human embryonic kidney 293 cells .. +PF00233 PDEase;
3'5'-cyclic nucleotide phosphodiesterase. +PF02112 cAMP phosphodiesterases class-II
+PF00341 PDGF/VEGF domain
+PF04692 Platelet-derived growth factor, N terminal region
This family consists of the amino terminal regions of platelet-derived growth factor (PDGF, Pfam:PF00341) A and B chains.. +PF00800 Prephenate dehydratase
Pfam-B_1095 (release 2.1). This protein is involved in Phenylalanine biosynthesis. This protein catalyses the decarboxylation of prephenate to phenylpyruvate.. +PF03740 Pyridoxal phosphate biosynthesis protein PdxJ
Members of this family belong to the PdxJ family that catalyses the condensation of 1-deoxy-d-xylulose-5-phosphate (DXP) and 1-amino-3-oxo-4-(phosphohydroxy)propan-2-one to form pyridoxine 5'-phosphate (PNP). This reaction is involved in de novo synthesis of pyridoxine (vitamin B6) and pyridoxal phosphate .. +PF00595 PDZ domain (Also known as DHR or GLGF)
PDZ domains are found in diverse signaling proteins.. +PF00544 pec_lyase;
This enzyme forms a right handed beta helix structure. Pectate lyase is an enzyme involved in the maceration and soft rotting of plant tissue. . +PF05041 Pecanex protein (C-terminus)
Pfam-B_5192 (release 7.7). This family consists of C terminal region of the pecanex protein homologues. The pecanex protein is a maternal-effect neurogenic gene found in Drosophila .. +PF03211 Pectate lyase
Pfam-B_2273 (release 6.5). +PF04191 Phospholipid methyltransferase
Pfam-B_14367 (release 7.3);. The S. cerevisiae phospholipid methyltransferase (EC:2.1.1.16) has a broad substrate specificity of unsaturated phospholipids .. +PF03965 Pencillinase_R;
Penicillinase repressor. DOMO_DM03102 & Pfam-B_5099 (release 14.0). The penicillinase repressor negatively regulates expression of the penicillinase gene. The N-terminal region of this protein is involved in operator recognition, while the C-terminal is responsible for dimerisation of the protein . . +PF00805 Pentapeptide repeats (8 copies)
These repeats are found in many cyanobacterial proteins. The repeats were first identified in hglK . The function of these repeats is unknown. The structure of this repeat has been predicted to be a beta-helix . The repeat can be approximately described as A(D/N)LXX, where X can be any amino acid.. +PF00354 pentaxin;
Pentaxins are also known as pentraxins.. +PF02896 PEP-utilising enzyme, TIM barrel domain
+PF01327 Polypeptide deformylase
+PF01562 Reprolysin family propeptide
Pfam-B_117 (release 4.0). This region is the propeptide for members of peptidase family M12B. The propeptide contains a sequence motif similar to the "cysteine switch" of the matrixins. This motif is found at the C terminus of the alignment but is not well aligned.. +PF03413 Pep_M4_propep;
Peptidase propeptide and YPEB domain. This region is likely to have an protease inhibitory function (personal obs:C Yeats). This model is likely to miss some members of this family as the separation from signal to noise is not clear. The name is derived from Peptidase & Bacillus subtilis YPEB. . +PF00311 Phosphoenolpyruvate carboxylase
+PF00821 Phosphoenolpyruvate carboxykinase
Pfam-B_1309 (release 2.1). Catalyses the formation of phosphoenolpyruvate by decarboxylation of oxaloacetate.. +PF01293 Phosphoenolpyruvate carboxykinase
+PF01195 Peptidyl-tRNA hydrolase
+PF03564 Peptidase_A16; Peptidase_A16_N;
Protein of unknown function (DUF1759). This is a family of proteins of unknown function. Most of the members are gag-polyproteins.. +PF03566 Peptidase family A21
+PF02160 Cauliflower mosaic virus peptidase (A3)
+PF01828 Peptidase A4 family
+PF01829 Peptidase A6 family
+PF01252 SPASE_II;
Signal peptidase (SPase) II. +PF01640 Peptidase C10 family
Pfam-B_1522 (release 4.1). This family represents just the active peptide part of these proteins. Residues 1-120 are not part of the model as they form the pro-peptide, which before cleavage blocks the active site from the substrate. The catalytic residues of histidine and cysteine are brought close together at the active site by the folding of the active peptide.. +PF03415 Clostripain family
+PF01088 UCH;
Ubiquitin carboxyl-terminal hydrolase, family 1. +PF01470 Pyroglutamyl peptidase
+PF01831 Peptidase C16 family
+PF00648 Cys_protease_2;
Calpain family cysteine protease. +PF01478 Peptidase_C20;
Type IV leader peptidase family. Peptidase A24, or the prepilin peptidase as it is also known, processes the N-terminus of the prepilins . The processing is essential for the correct formation of the pseudopili of type IV bacterial protein secretion. The enzyme is found across eubacteria and archaea .. +PF03510 Endoptase_C24;
2C endopeptidase (C24) cysteine protease family. +PF01364 Peptidase family C25
Pfam-B_516 (release 3.0). +PF03785 Peptidase family C25, C terminal ig-like domain
Pfam-B_516 (release 3.0). +PF03412 Peptidase C39 family
Lantibiotic and non-lantibiotic bacteriocins are synthesised as precursor peptides containing N-terminal extensions (leader peptides) which are cleaved off during maturation. Most non-lantibiotics and also some lantibiotics have leader peptides of the so-called double-glycine type. These leader peptides share consensus sequences and also a common processing site with two conserved glycine residues in positions -1 and -2. The double- glycine-type leader peptides are unrelated to the N-terminal signal sequences which direct proteins across the cytoplasmic membrane via the sec pathway. Their processing sites are also different from typical signal peptidase cleavage sites, suggesting that a different processing enzyme is involved. Peptide bacteriocins are exported across the cytoplasmic membrane by a dedicated ATP-binding cassette (ABC) transporter. The ABC transporter is the maturation protease and its proteolytic domain resides in the N-terminal part of the protein . This peptidase domain is found in a wide range of ABC transporters, however the presumed catalytic cysteine and histidine are not conserved in all members of this family.. +PF00770 Adenovirus endoprotease
Pfam-B_900 (release 2.1). This family of adenovirus thiol endoproteases specifically cleave Gly-Ala peptides in viral precursor peptides.. +PF03568 Peptidase family C50
+PF03421 Peptidase_C55;
YopJ Serine/Threonine acetyltransferase. The Yersinia effector YopJ inhibits the innate immune response by blocking MAP kinase and NFkappaB signaling pathways. YopJ is a serine/threonine acetyltransferase which regulates signalling pathways by blocking phosphorylation . Specifically, YopJ has been shown to block phosphorylation of active site residues . It has also been shown that YopJ acetyltransferase is activated by eukaryotic host cell inositol hexakisphosphate . This family was previously incorrectly annotated in Pfam as being a peptidase family.. +PF03290 Pox_I7L_G1L;
Vaccinia virus I7 processing peptidase. Pfam-B_4082 (release 6.5). +PF00851 Helper component proteinase
Pfam-B_326 (release 3.0). This protein is found in genome polyproteins of potyviruses.. +PF01830 Peptidase C7 family
+PF03569 Peptidase family C8
+PF01707 Peptidase family C9
+PF00413 matrixin;
The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis.. +PF00675 Insulinase;
Insulinase (Peptidase family M16). Pfam-B_88 (release 2.1). +PF00883 Cytosol aminopeptidase family, catalytic domain
Pfam-B_990 (release 3.0). The two associated zinc ions and the active site are entirely enclosed within the C-terminal catalytic domain in leucine aminopeptidase.. +PF01401 Angiotensin-converting enzyme
Members of this family are dipeptidyl carboxydipeptidases (cleave carboxyl dipeptides) and most notably convert angiotensin I to angiotensin II. Many members of this family contain a tandem duplication of the 600 amino acid peptidase domain, both of these are catalytically active. Most members are secreted membrane bound ectoenzymes.. +PF00557 pep_M24;
Metallopeptidase family M24. This family contains metallopeptidases. It also contains non-peptidase homologues such as the N terminal domain of Spt16 which is a histone H3-H4 binding module .. +PF01742 Clostridial neurotoxin zinc protease
Pfam-B_407 (release 4.2). These toxins are zinc proteases that block neurotransmitter release by proteolytic cleavage of synaptic proteins such as synaptobrevins, syntaxin and SNAP-25.. +PF02073 Thermophilic metalloprotease (M29)
+PF01432 Peptidase family M3
This is the Thimet oligopeptidase family, large family of mammalian and bacterial oligopeptidases that cleave medium sized peptides. The group also contains mitochondrial intermediate peptidase which is encoded by nuclear DNA but functions within the mitochondria to remove the leader sequence.. +PF02074 Carboxypeptidase Taq (M32) metallopeptidase
+PF02102 Deuterolysin metalloprotease (M35) family
+PF02128 Fungalysin metallopeptidase (M36)
+PF01447 Thermolysin metallopeptidase, catalytic domain
+PF03410 Protein_G1;
Pfam-B_4417 (release 6.6). Protein G1, named after the vaccinia virus protein, is a glycoprotein expressed by many Poxviridae.. +PF01427 Peptidase_M45;
D-ala-D-ala dipeptidase. +PF01435 Peptidase family M48
+PF03571 Peptidase family M49
+PF02868 Thermolysin metallopeptidase, alpha-helical domain
+PF04951 D-aminopeptidase
Bacillus subtilis DppA is a binuclear zinc-dependent, D-specific aminopeptidase. The structure reveals that DppA is a new example of a 'self-compartmentalising protease', a family of proteolytic complexes. Proteasomes are the most extensively studied representatives of this family. The DppA enzyme is composed of identical 30 kDa subunits organised in a decamer with 52 point-group symmetry. A 20 A wide channel runs through the complex, giving access to a central chamber holding the active sites. The structure shows DppA to be a prototype of a new family of metalloaminopeptidases characterised by the SXDXEG key sequence . The only known substrates are D-ala-D-ala and D-ala-gly-gly.. +PF02031 Streptomyces extracellular neutral proteinase (M7) family
+PF01457 Leishmanolysin
Prodom_3085 (release 99.1). +PF01752 Collagenase
This family of enzymes break down collagens.. +PF00768 D-alanyl-D-alanine carboxypeptidase
Pfam-B_864 (release 2.1). +PF02113 D-Ala-D-Ala carboxypeptidase 3 (S13) family
+PF02129 X-Pro dipeptidyl-peptidase (S15 family)
IPR000383 & Pfam-B_2704 (Release 7.5). +PF00716 Assemblin (Peptidase family S21)
Pfam-B_729 (release 2.1). +PF00717 Peptidase S24-like
Pfam-B_616 (release 2.1). +PF03572 Peptidase_S41;
Peptidase family S41. +PF03574 Peptidase family S48
+PF03575 Peptidase family S51
+PF03576 Peptidase_T4;
Peptidase family S58 . +PF00082 subtilase;
Subtilases are a family of serine proteases. They appear to have independently and convergently evolved an Asp/Ser/His catalytic triad, like that found in the trypsin serine proteases (see Pfam:PF00089). Structure is an alpha/beta fold containing a 7-stranded parallel beta sheet, order 2314567.. +PF00326 Prolyl_oligopep;
Prolyl oligopeptidase family. +PF03418 Peptidase_U3; Peptidase_M63;
Germination protease. +PF01136 Peptidase family U32
+PF03577 Peptidase_U34;
Peptidase family C69. +PF03419 Sporulation factor SpoIIGA
+PF03411 Peptidase_U6;
Penicillin-insensitive murein endopeptidase. +PF01343 Peptidase_U7;
Peptidase family S49. Pfam-B_707 (release 2.1). +PF03420 Prohead core protein protease
+PF03036 perilipin;
Pfam-B_1154 (release 6.4). The perilipin family includes lipid droplet-associated protein (perilipin) and adipose differentiation-related protein (adipophilin).. +PF01497 Periplasmic binding protein
Pfam-B_461 (release 4.0). This family includes bacterial periplasmic binding proteins. Several of which are involved in iron transport.. +PF00532 periplasmic_binding_like;
Periplasmic binding proteins and sugar binding domain of LacI family. MRC-LMB Genome group. This family includes the periplasmic binding proteins, and the LacI family transcriptional regulators. The periplasmic binding proteins are the primary receptors for chemotaxis and transport of many sugar based solutes. The LacI family of proteins consist of transcriptional regulators related to the lac repressor. In this case, generally the sugar binding domain binds a sugar which changes the DNA binding activity of the repressor domain (Pfam:PF00356).. +PF00141 Peroxidase
Prosite; PfamB-105, Release 14.0;. +PF01328 Peroxidase, family 2
The peroxidases in this family do not have similarity to other peroxidases.. +PF04088 Peroxin 13, N-terminal region
Pfam-B_8055 (release 7.3);. Both termini of the Peroxin-13 are oriented to the cytosol. Peroxin-13 is required for peroxisomal association of peroxin-14 .. +PF04882 Peroxin-3
Pfam-B_6513 (release 7.6). Peroxin-3 is a peroxisomal protein. It is thought to be involve in membrane vesicle assembly prior to the translocation of matrix proteins .. +PF03212 Pertactin
Pfam-B_2005 (release 6.5). +PF02917 Pertussis toxin, subunit 1
+PF02918 Pertussis toxin, subunit 2 and 3, C-terminal domain
+PF02529 Cytochrome B6-F complex subunit 5
Pfam-B_1348 (release 5.4). This family consists of cytochrome B6-F complex subunit 5 (PetG). The cytochrome bf complex found in green plants, eukaryotic algae and cyanobacteria, connects photosystem I to photosystem II in the electron transport chain, functioning as a plastoquinol:plastocyanin/cytochrome c6 oxidoreductase . PetG or subunit 5 is associated with the bf complex and the absence of PetG affects either the assembly or stability of the cytochrome bf complex in Chlamydomonas reinhardtii .. +PF05115 Cytochrome B6-F complex subunit VI (PetL)
Pfam-B_6510 (release 7.7). This family consists of several Cytochrome B6-F complex subunit VI (PetL) proteins found in several plant species. PetL is one of the small subunits which make up The cytochrome b(6)f complex. PetL is strictly required neither for the accumulation nor for the function of cytochrome b6f; in its absence, however, the complex becomes unstable in vivo in aging cells and labile in vitro. It has been suggested that the N-terminus of the protein is likely to lie in the thylakoid lumen .. +PF03742 PetN
Pfam-B_3260 (release 7.0). PetN is a small hydrophobic protein, crucial for cytochrome b6-f complex assembly and/or stability. . +PF04614 Pex19 protein family
+PF04757 Pex2 / Pex12 amino terminal region
This region is found at the N terminal of a number of known and predicted peroxins including Pex2, Pex10 and Pex12. This conserved region is usually associated with a C terminal ring finger (Pfam:PF00097) domain.. +PF03011 PFEMP DBL domain
Pfam-B_822 (release 6.4). PfEMP1 (Plasmodium falciparum erythrocyte membrane protein) has been identified as the rosetting ligand of the malaria parasite P. falciparum [1,2]. Rosetting is the adhesion of infected erythrocytes with uninfected erythrocytes in the vasculature of the infected organ, and is associated with severe malaria. PfEMP1 interacts with Complement Receptor One on uninfected erythrocytes to form rosettes . The extreme variation within these proteins and the grouping of var genes implies that var gene recombination preferentially occurs within var gene groups. These groups reflect a functional diversification that has evolved to cope with the varying conditions of transmission and host immune response met by the parasite . A recombination hotspot was uncovered between Duffy-binding-like (DBL) subdomains . Solution of the crystal structure of the N-terminal and first DBL region of PfEMP1 from the VarO variant of the PfEMP1 protein is found to be directly implicated in rosetting as the heparin-binding site .. +PF00365 Phosphofructokinase
+PF02901 Pyruvate formate lyase
+PF01471 Putative peptidoglycan binding domain
Pfam-B_2277 (release 4.0). This domain is composed of three alpha helices . This domain is found at the N or C terminus of a variety of enzymes involved in bacterial cell wall degradation . This domain may have a general peptidoglycan binding function. This family is found N-terminal to the catalytic domain of matrixins . The domain is found to bind peptidoglycan experimentally .. +PF00300 PGAM;
Histidine phosphatase superfamily (branch 1). The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue. Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches. The larger branch 1 contains a wide variety of catalytic functions, the best known being fructose 2,6-bisphosphatase (found in a bifunctional protein with 2-phosphofructokinase) and cofactor-dependent phosphoglycerate mutase. The latter is an unusual example of a mutase activity in the superfamily: the vast majority of members appear to be phosphatases. The bacterial regulatory protein phosphatase SixA is also in branch 1 and has a minimal, and possible ancestral-like structure, lacking the large domain insertions that contribute to binding of small molecules in branch 1 members.. +PF00342 Phosphoglucose isomerase
Phosphoglucose isomerase catalyses the interconversion of glucose-6-phosphate and fructose-6-phosphate.. +PF00162 Phosphoglycerate kinase
+PF00408 PGM_PMM;
Phosphoglucomutase/phosphomannomutase, C-terminal domain. +PF02878 Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain I
+PF02879 Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain II
+PF02880 Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain III
+PF04608 Phosphatidylglycerophosphatase A
Pfam-B_5195 (release 7.5). This family represents a family of bacterial phosphatidylglycerophosphatases (EC:3.1.3.27), known as PgpA. It appears that bacteria possess several phosphatidylglycerophosphatases, and thus, PgpA is not essential in Escherichia coli .. +PF03334 Na+/H+ antiporter subunit
Pfam-B_3611 (release 6.5). This family includes PhaG from Rhizobium meliloti Swiss:Q9ZNG0, MnhG from Staphylococcus aureus Swiss:Q9ZNG0, YufB from Bacillus subtilis Swiss:O05227.. +PF02304 Scaffold protein B
Pfam-B_9648 (release 5.2). This is a family of proteins from single-stranded DNA bacteriophages. Scaffold proteins B and D are required for procapsid formation. Sixty copies of the internal scaffold protein B are found in the procapsid.. +PF04717 phage_base_V;
Phage-related baseplate assembly protein. Pfam-B_5996 (release 7.5). Family of phage baseplate assembly proteins responsible for forming the small spike at the end of the tail . Also found in bacteria, probably the result of horizontal transmission.. +PF03864 Phage major capsid protein E
Major capsid protein E is involved with the stabilisation of the condensed form of the DNA molecule in phage heads .. +PF05125 Phage major capsid protein, P2 family
TIGRFAMs (release 2.0);. +PF05144 Phage replication protein CRI
TIGRFAMs (release 2.0);. The phage replication protein CRI, is also known as Gene II, is essential for DNA replication. . +PF02303 Helix-destabilising protein
Pfam-B_9239 (release 5.2). This family contains the bacteriophage helix-destabilising protein, or single-stranded DNA binding protein, required for DNA synthesis.. +PF02305 Capsid protein (F protein)
Pfam-B_10357 (release 5.2). This is a family of proteins from single-stranded DNA bacteriophages. Protein F is the major capsid component, sixty copies of which are found in the virion.. +PF03335 Phage tail fibre repeat
Pfam-B_3576 (release 6.5). +PF03406 Phage tail fibre repeat
Pfam-B_854 (release 6.6). This repeat is found in the tail fibres of phage. For example protein K Swiss:Q37842 . The repeats are about 40 residues long.. +PF02306 Major spike protein (G protein)
Pfam-B_8833 (release 5.2). This is a family of proteins from single-stranded DNA bacteriophages. Five G proteins, each a tight beta barrel, from twelve surface spikes. . +PF04688 Phage lysis protein, holin
This family constitutes holin proteins from the dsDNA Siphidoviridae group bacteriophages. Most bacteriophages require an endolysin and a holin for host lysis. During late gene expression, holins accumulate and oligomerise in the host cell membrane. They then suddenly trigger to permeablise the membrane, which causes lysis by allowing endolysin to attach the peptidoglycan. There are thought to be at least 35 different families of holin genes .. +PF04531 Bacteriophage holin
Pfam-B_2644 (release 7.5). This family of holins is found in several staphylococcal and streptococcal bacteriophages. Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the buildup of a holin oligomer which causes the lysis .. +PF04550 Phage holin family 2
Pfam-B_61235 (release 7.0). Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the buildup of a holin oligomer which causes the lysis .. +PF05106 Phage holin family (Lysis protein S)
TIGRFAMs (release 2.0);. This family represents one of a large number of mutually dissimilar families of phage holins. Holins act against the host cell membrane to allow lytic enzymes of the phage to reach the bacterial cell wall. This family includes the product of the S gene of phage lambda.. +PF05105 Holin;
TIGRFAMs (release 2.0);. Phage holins and lytic enzymes are both necessary for bacterial lysis and virus dissemination.This family also includes TcdE/UtxA involved in toxin secretion in Clostridium difficile .. +PF00589 Phage integrase family
MRC-LMB Genome group. Members of this family cleave DNA substrates by a series of staggered cuts, during which the protein becomes covalently linked to the DNA through a catalytic tyrosine residue at the carboxy end of the alignment. The catalytic site residues in CRE recombinase (Swiss:P06956) are Arg-173, His-289, Arg-292 and Tyr-324.. +PF03245 Bacteriophage Rz lysis protein
Pfam-B_3219 (release 6.5). This protein is involved in host lysis. This family is not considered to be a peptidase according to the MEROPs database. This family Rz and the Rz1 protein (Pfam:PF06085) represent a unique example of two genes located in different reading frames in the same nucleotide sequence, which encode different proteins that are both required in the same physiological pathway .. +PF00959 Phage lysozyme
This family includes lambda phage lysozyme and E. coli endolysin.. +PF03863 Phage maturation protein
+PF04233 Phage Mu protein F like protein
Members of this family are found in double-stranded DNA bacteriophages, and in some bacteria. A member of this family is required for viral head morphogenesis in bacteriophage SPP1 (Swiss:Q38577). This family is possibly a minor head protein. This family may be related to the family TT_ORF1 (Pfam:PF02956).. +PF05136 Phage portal protein, lambda family
TIGRFAMs (release 2.1);. This protein forms a hole, or portal, that enables DNA passage during packaging and ejection. It also forms the junction between the phage capsid and the tail proteins. . +PF05133 Phage portal protein, SPP1 Gp6-like
TIGRFAMs (release 2.0);. This protein forms a hole, or portal, that enables DNA passage during packaging and ejection. It also forms the junction between the phage head (capsid) and the tail proteins. During SPP1 morphogenesis, Gp6 participates in the procapsid assembly reaction [1,2]. This family also includes the old Pfam family Phage_min_cap (PF05126).. +PF05135 Phage_QLRG;
Phage gp6-like head-tail connector protein. TIGRFAMs (release 2.1);. This family of proteins contain head-tail connector proteins related to gp6 from bacteriophage HK97 . A structure of this protein shows similarity to gp15 a well characterised connector component of bacteriophage SPP1 .. +PF04492 Bacteriophage replication protein O
Replication protein O is necessary for the initiation of bacteriophage DNA replication. Protein O interacts with the lambda replication origin, and also with replication protein P to form an oligomer . It is speculated that the N-terminal half interacts with the replication origin while the C terminal half mediates protein-protein interaction (annotation of Swiss: P14815).. +PF04984 Phage tail sheath protein
This family includes a variety of phage tail sheath proteins.. +PF04630 Phage major tail protein
Pfam-B_5341 (release 7.5). +PF05100 Phage minor tail protein L
TIGRFAMs (release 2.0);. +PF04761 Lactococcus bacteriophage putative transcription regulator
Pfam-B_3898 (release 7.5). This family represents a number of putative transcription repressor proteins found in several Lactococcus bacteriophages. Horizontal transfer may account for the presence of similar proteins in Lactococcus .. +PF04985 Phage tail tube protein FII
The major structural components of the contractile tail of bacteriophage P2 are proteins FI and FII, which are believed to be the tail sheath and tube proteins, respectively.. +PF05155 Phage X family
TIGRFAMs (release 2.1);. This family is the product of Gene X. The function of this protein is unknown.. +PF02912 tRNA-synt_2_N;
Aminoacyl tRNA synthetase class II, N-terminal domain. +PF02332 Methane/Phenol/Toluene Hydroxylase
Pfam-B_15166 (release 5.2) & Pfam-B_3223 (Release 7.5). Bacterial phenol hydroxylase is a multicomponent enzyme that catabolises phenol and some of its methylated derivatives. This Pfam family contains both the P1 and P3 polypeptides of phenol hydroxylase and the alpha and beta chain of methane hydroxylase protein A.. +PF04663 Phenol hydroxylase conserved region
Pfam-B_4509 (release 7.5). Under aerobic conditions, phenol is usually hydroxylated to catechol and degraded via the meta or ortho pathways. Two types of phenol hydroxylase are known: one is a multi-component enzyme the other is a single-component monooxygenase. This region is found in both types of enzymes [1,2].. +PF04674 Phosphate-induced protein 1 conserved region
Pfam-B_4596 (release 7.5). Family of conserved plant proteins. Conserved region identified in a phosphate-induced protein of unknown function .. +PF03831 PhnA protein
TIGRFAMs, Griffiths-Jones SR. +PF02562 PhoH-like protein
PhoH is a cytoplasmic protein and predicted ATPase that is induced by phosphate starvation.. +PF02114 Phosducin
+PF00068 phoslip;
Overington and HMM_iterative_training. Phospholipase A2 releases fatty acids from the second carbon group of glycerol. Perhaps the best known members are secreted snake venoms, but also found in secreted pancreatic and membrane-associated forms. Structure is all-alpha, with two core disulfide-linked helices and a calcium-binding loop. This alignment represents the major family of PLA2s. A second minor family, defined by the honeybee venom PLA2 PDB:1POC and related sequences from Gila monsters (Heloderma), is not recognised. This minor family conserves the core helix pair but is substantially different elsewhere. The PROSITE pattern PA2_HIS, specific to the first core helix, recognises both families.. +PF04185 Phosphoesterase family
Pfam-B_1803 (release 7.3). This family includes both bacterial phospholipase C enzymes EC:3.1.4.3, but also eukaryotic acid phosphatases EC:3.1.3.2.. +PF04272 Phospholamban
TIGRFAMs (release 2.0);. The regulation of calcium levels across the membrane of the sarcoplasmic reticulum involves the interplay of many membrane proteins. Phospholamban is a 52 residue integral membrane protein that is involved in reversibly inhibiting the Ca(2+) pump and regulating the flow of Ca ions across the sarcoplasmic reticulum membrane during muscle contraction and relaxation . Phospholamban is thought to form a pentamer in the membrane .. +PF00922 Vesiculovirus phosphoprotein
Pfam-B_1160 (release 3.0). +PF00343 phosphorylase;
Carbohydrate phosphorylase. The members of this family catalyse the formation of glucose 1-phosphate from one of the following polyglucoses; glycogen, starch, glucan or maltodextrin.. +PF00124 photoRC;
Photosynthetic reaction centre protein. +PF01895 DUF65;
This family contains phosphate regulatory proteins including PhoU. PhoU proteins are known to play a role in the regulation of phosphate uptake. The PhoU domain is composed of a three helix bundle . The PhoU protein contains two copies of this domain. The domain binds to an iron cluster via its conserved E/DXXXD motif.. +PF00502 Phycobilisome protein
Pfam-B_10 (release 1.0). +PF02333 Phytase
Pfam-B_14843 (release 5.2). Phytase is a secreted enzyme which hydrolyses phytate to release inorganic phosphate. This family appears to represent a novel enzyme that shows phytase activity and has been shown to have a six- bladed propeller folding architecture .. +PF04833 phytochel_synth; Phytochel_synth;
Pfam-B_4078 (release 7.6). Family of plant proteins are designated COBRA-like (COBL) proteins. The 12 Arabidopsis members of the family are all GPI-liked . Some members of this family are annotated as phytochelatin synthase, but these annotations are incorrect .. +PF00360 phytochrome; Phytochrome;
Phytochromes are red/far-red photochromic biliprotein photoreceptors which regulate plant development. They are widely represented in both photosynthetic and non-photosynthetic bacteria and are known in a variety of fungi. Although sequence similarities are low, this domain is structurally related to Pfam:PF01590 , which is generally located immediately N-terminal to this domain. Compared with Pfam:PF01590, this domain carries an additional tongue-like hairpin loop between the fifth beta-sheet and the sixth alpha-helix which functions to seal the chromophore pocket and stabilise the photoactivated far-red-absorbing state (Pfr) . The tongue carries a conserved PRxSF motif, from which an arginine finger points into the chromophore pocket close to ring D forming a salt bridge with a conserved aspartate residue .. +PF03284 Phenazine biosynthesis protein A/B
Pfam-B_4020 (release 6.5). +PF00388 Phosphatidylinositol-specific phospholipase C, X domain
This associates with Pfam:PF00387 to form a single structural unit.. +PF00387 Phosphatidylinositol-specific phospholipase C, Y domain
This associates with Pfam:PF00388 to form a single structural unit.. +PF02192 PI3-kinase family, p85-binding domain
Alignment kindly provided by SMART. +PF00794 PI3-kinase family, ras-binding domain
Alignment kindly provided by SMART. Certain members of the PI3K family possess Ras-binding domains in their N-termini. These regions show some similarity (although not highly significant similarity) to Ras-binding Pfam:PF00788 domains (unpublished observation).. +PF00613 Phosphoinositide 3-kinase family, accessory domain (PIK domain)
Ponting C, Schultz J, Bork P. PIK domain is conserved in all PI3 and PI4-kinases. Its role is unclear but it has been suggested to be involved in substrate presentation.. +PF02226 Picornavirus coat protein (VP4)
Pfam-B_345 (release 5.2). VP1, VP2, VP3 and VP4 for the basic unit that forms the icosahedral coat of picornaviruses. Five symmetry-related N termini of coat protein VP4 form a ten-stranded, antiparallel beta barrel around the base of the icosahedral fivefold axis .. +PF00947 Picornavirus core protein 2A
Pfam-B_138 (release 3.0). This protein is a protease, involved in cleavage of the polyprotein.. +PF01552 Picornavirus 2B protein
Pfam-B_214 (release 4.0). Poliovirus infection leads to drastic alterations in membrane permeability late during infection. Proteins 2B and 2BC enhance membrane permeability [1,2].. +PF00345 pili_assembly; Pili_assembly_N;
Pili and flagellar-assembly chaperone, PapD N-terminal domain. C2 domain-like beta-sandwich fold. This domain is the n-terminal part of the PapD chaperone protein for pilus and flagellar assembly.. +PF02753 pili_assembly_C; Pili_assembly_C;
Pili assembly chaperone PapD, C-terminal domain. Ig-like beta-sandwich fold. This domain is the C-terminal part of the pilus and flagellar-assembly chaperone protein PapD.. +PF00114 pilin;
Pilin (bacterial filament). Proteins with only the short N-terminal methylation site are not separated from the noise.\. The Prosite pattern detects those better.. +PF05137 Fimbrial assembly protein (PilN)
+PF04350 Pilus assembly protein, PilO
PilO proteins are involved in the assembly of pilin. However, the precise function of this family of proteins is not known.. +PF04351 Pilus assembly protein, PilP
The PilP family are periplasmic proteins involved in the biogenesis of type IV pili .. +PF04697 pinin_SDK_N;
pinin/SDK conserved region. Pfam-B_4141 (release 7.5). SDK2/3 is localised in nuclear speckles where as pinin is known to localise at the desmosomes where it is thought to be involved in anchoring intermediate filaments to the desmosomal plaque [1,2]. The role of SDK2/3 in the nucleus is thought to be concerned with modulation of alternative pre-mRNA splicing . pinin has also been implicated as a tumour suppressor. The conserved region is found at the N-terminus of the member proteins .. +PF00224 Pyruvate kinase, barrel domain
This domain of the is actually a small beta-barrel domain nested within a larger TIM barrel. The active site is found in a cleft between the two domains.. +PF02887 Pyruvate kinase, alpha/beta domain
As well as being found in pyruvate kinase this family is found as an isolated domain in some bacterial proteins.. +PF02827 cAMP-dependent protein kinase inhibitor
Members of this family are extremely potent competitive inhibitors of camp-dependent protein kinase activity. These proteins interact with the catalytic subunit of the enzyme after the cAMP-induced dissociation of its regulatory chains.. +PF02173 pKID domain
Pfam-B_1547 (Release 4.2). CBP and P300 bind to the pKID (phosphorylated kinase-inducible-domain) domain of CREB .. +PF03832 PkinA_anch;
This short motif is names after three conserved residues found in a WXSXK motif in protein kinase A anchoring proteins.. +PF00069 pkinase;
Protein kinase domain. +PF00433 pkinase_C;
Protein kinase C terminal domain. Pfam-B_135 (release 1.0). +PF02253 Phospholipase A1
Pfam-B_3500 (release 5.2). Phospholipase A1 is a bacterial outer membrane bound acyl hydrolase with a broad substrate specificity EC:3.1.1.32. It has been proposed that Ser164 is the active site for Swiss:P00631 .. +PF01735 Lysophospholipase catalytic domain
Pfam-B_2127 (release 4.1). This family consists of Lysophospholipase / phospholipase B EC:3.1.1.5 and cytosolic phospholipase A2 EC:3.1.4 which also has a C2 domain Pfam:PF00168. Phospholipase B enzymes catalyse the release of fatty acids from lysophsopholipids and are capable in vitro of hydrolysing all phospholipids extractable form yeast cells . Cytosolic phospholipase A2 associates with natural membranes in response to physiological increases in Ca2+ and selectively hydrolyses arachidonyl phospholipids , the aligned region corresponds the the carboxy-terminal Ca2+-independent catalytic domain of the protein as discussed in .. +PF02988 Phospholipase A2 inhibitor
Pfam-B_1254 (release 6.4). +PF00321 plant_thionins;
+PF01307 Plant viral movement protein
Pfam-B_881 (release 3.0). This family includes several known plant viral movement proteins (e.g. Swiss:Q85292) from a number of different ssRNA plant virus families including potexviruses, hordeiviruses and carlaviruses. . +PF04819 Plant_viral_rep;
Family of unknown function (DUF716) . Pfam-B_5106 (release 7.6). This family is equally distributed in both metazoa and plants. Annotation associated with Swiss:Q9SLW7 suggest that it may be involved in response to viral attack in plants. However, no clear function has been assigned to this family.. +PF05015 Plasmid maintenance system killer protein
Several plasmids with proteic killer gene systems have been reported. All of them encode a stable toxin and an unstable antidote. Upon loss of the plasmid, the less stable inhibitor is inactivated more rapidly than the toxin, allowing the toxin to be activated. The activation of those systems result in cell filamentation and cessation of viable cell production. It has been verified that both the stable killer and the unstable inhibitor of the systems are short polypeptides. This family corresponds to the toxin.. +PF01672 Putative plasmid partition protein
Pfam-B_1163 (release 4.1). This family consists of conserved hypothetical proteins from Borrelia burgdorferi the lyme disease spirochaete, some of which are putative plasmid partition proteins .. +PF05016 Plasmid stabilisation system protein
Members of this family are involved in plasmid stabilisation. The exact molecular function of this protein is not known. This family also encompasses RelE/ParE described in .. +PF00681 Plectin_repeat;
Pfam-B_68 (release 2.1). This family includes repeats from plectin, desmoplakin, envoplakin and bullous pemphigoid antigen.. +PF01523 Putative modulator of DNA gyrase
Pfam-B_845 (release 4.0). tldD and pmbA were found to suppress mutations in letD and inhibitor of DNA gyrase. Therefore it has been hypothesised that the TldD and PmbA proteins modulate the activity of DNA gyrase . It has also been suggested that PmbA may be involved in secretion .. +PF03332 Eukaryotic phosphomannomutase
Pfam-B_3713 (release 6.5). This enzyme EC:5.4.2.8 is involved in the synthesis of the GDP-mannose and dolichol-phosphate-mannose required for a number of critical mannosyl transfer reactions.. +PF03901 PMP;
Alg9-like mannosyltransferase family. DOMO:DM04662 & Pfam-B_7750 (Release 8.0). Members of this family are mannosyltransferase enzymes [1-2]. At least some members are localised in endoplasmic reticulum and involved in GPI anchor biosynthesis [3-4].. +PF00822 PMP22;
PMP-22/EMP/MP20/Claudin family. Pfam-B_1393 (release 2.1). +PF01625 Peptide methionine sulfoxide reductase
Pfam-B_1111 (release 4.1). This enzyme repairs damaged proteins. Methionine sulfoxide in proteins is reduced to methionine.. +PF02366 Dolichyl-phosphate-mannose-protein mannosyltransferase
Pfam-B_556 (release 5.2). This is a family of Dolichyl-phosphate-mannose-protein mannosyltransferase proteins EC:2.4.1.109. These proteins are responsible for O-linked glycosylation of proteins, they catalyse the reaction:- Dolichyl phosphate D-mannose + protein <=> dolichyl phosphate + O-D-mannosyl-protein.\. +PF03393 Pneumovirus matrix protein
Pfam-B_3641 (release 6.6). +PF03246 Pneumovirus nucleocapsid protein
Pfam-B_3020 (release 6.5). +PF03438 Pneumovirus NS1 protein
Pfam-B_3221 (release 6.6). This non-structural protein is one of two found in pneumoviruses. The protein is about 140 amino acids in length. The NS1 protein appears to be important for efficient replication but not essential . The NS1 protein has been shown by yeast two-hybrid to interact with the viral P protein . This protein is also known as the 1C protein. It has also been shown that NS1 can potently inhibit transcription and RNA replication .. +PF02478 Pneumovirus phosphoprotein
Pfam-B_2290 (release 5.4). This family represents the phosphoprotein of Paramyxoviridae, a putative RNA polymerase alpha subunit that may function in template binding.. +PF01048 Phosphorylase superfamily
Pfam-B_1190 (release 3.0). Members of this family include: purine nucleoside phosphorylase (PNP) Uridine phosphorylase (UdRPase) 5'-methylthioadenosine phosphorylase (MTA phosphorylase). +PF02233 NAD(P) transhydrogenase beta subunit
Pfam-B_2220 (release 5.2). This family corresponds to the beta subunit of NADP transhydrogenase in prokaryotes, and either the protein N- or C terminal in eukaryotes. The domain is often found in conjunction with Pfam:PF01262. Pyridine nucleotide transhydrogenase catalyses the reduction of NAD+ to NADPH. A complete loss of activity occurs upon mutation of Gly314 in E. coli .. +PF03833 DNA polymerase II large subunit DP2
TIGRFAMs, Griffiths-Jones SR. +PF01620 Ribonuclease (pollen allergen)
Pfam-B_1050 (release 4.1). This family contains grass pollen proteins of group V. Swiss:Q40963 has been shown to possess ribonuclease activity .. +PF01190 Pollen proteins Ole e I like
+PF00659 POLO box duplicated region
+PF02563 Polysaccharide biosynthesis/export protein
COGs & Pfam-B_1505 (Release 7.5). This is a family of periplasmic proteins involved in polysaccharide biosynthesis and/or export.. +PF01743 Poly A polymerase head domain
Pfam-B_814 (release 4.2). This family includes nucleic acid independent RNA polymerases, such as Poly(A) polymerase, which adds the poly (A) tail to mRNA EC:2.7.7.19. This family also includes the tRNA nucleotidyltransferase that adds the CCA to the 3' of the tRNA EC:2.7.7.25. This family is part of the nucleotidyltransferase superfamily.. +PF01518 Sigma NS protein
Pfam-B_803 (release 4.0). This viral protein has a poly(C)-dependent poly(G) polymerase activity . . +PF00738 Polyhedrin
Pfam-B_423 (release 2.1). These proteins are found in occlusion bodies in various viruses. The polyhedrin protein protects the virus.. +PF03364 Polyketide cyclase / dehydrase and lipid transport
Mifsud W, Mistry J, Wood V. Pfam-B_1457 (release 6.6). This family contains polyketide cylcases/dehydrases which are enzymes involved in polyketide synthesis. The family also includes proteins which are involved in the binding/transport of lipids.. +PF01736 Polyomavirus agnoprotein
Pfam-B_1917 (release 4.1). This family consist of the DNA binding protein or agnoprotein from various polyomaviruses. This protein is highly basic and can bind single stranded and double stranded DNA . Mutations in the agnoprotein produce smaller viral plaques, hence its function is not essential for growth in tissue culture cells but something has slowed in the normal replication cycle . There is also evidence suggesting that the agnogene and agnoprotein act as regulators of structural protein synthesis .. +PF00718 Polyomavirus coat protein
Pfam-B_748 (release 2.1). +PF00761 Polyomavirus coat protein
Pfam-B_871 (release 2.1). +PF00348 Polyprenyl synthetase
+PF01943 Polysaccharide biosynthesis protein
Members of this family are integral membrane proteins . Many members of the family are implicated in production of polysaccharide. The family includes RfbX part of the O antigen biosynthesis operon . The family includes SpoVB from Bacillus subtilis Swiss:Q00758, which is involved in spore cortex biosynthesis .. +PF02719 Polysaccharide biosynthesis protein
Pfam-B_1536 (release 5.5). This is a family of diverse bacterial polysaccharide biosynthesis proteins including the CapD protein (Swiss:P39853) , WalL protein (Swiss:O86159) mannosyl-transferase (Swiss:O05349) and several putative epimerases (e.g. WbiI Swiss:O69130).. +PF02530 Porin subfamily
Pfam-B_1122 (release 5.4). This family consists of porins from the alpha subdivision of Proteobacteria the members of this family are related to Pfam:PF00267. The porins form large aqueous channels in the cell membrane allowing the selective entry of hydrophilic compounds this so called 'molecular sieve' is found in the cell walls of gram negative bacteria. . +PF01379 Porphobilinogen deaminase, dipyromethane cofactor binding domain
+PF03900 Porphobilinogen deaminase, C-terminal domain
+PF00280 Potato inhibitor I family
+PF00767 Potyvirus coat protein
Pfam-B_868 (release 2.1). +PF00157 pou;
Pou domain - N-terminal to homeobox domain. +PF05061 Poxvirus A11 Protein
Pfam-B_5994 (release 7.7). Family of conserved Chordopoxvirinae A11 family proteins. Conserved region spans entire protein in the majority of family members.. +PF04651 Poxvirus A12 protein
Pfam-B_5523 (release 7.5). +PF04848 Poxvirus A22 protein
Pfam-B_4558 (release 7.6). +PF04584 Poxvirus A28 family
Pfam-B_4756 (release 7.5). Family of conserved Poxvirus A28 family proteins. Conserved region spans entire protein in the majority of family members.. +PF04665 Poxvirus A32 protein
Pfam-B_5586 (release 7.5). The A32 protein is thought to be involved in viral DNA packaging.. +PF04948 Poxvirus A51 protein
Pfam-B_6937 (release 7.6). +PF04924 Poxvirus A6 protein
Pfam-B_5792 (release 7.6). +PF04745 VITF-3 subunit protein
Pfam-B_6036 (release 7.5). Family of Chordopoxvirus proteins composing one of the two subunits that make up VITF-3, a virally encoded complex necessary for intermediate stage transcription .. +PF04835 A9 protein conserved region
Pfam-B_4431 (release 7.6). Family of Chordopoxvirus A9 proteins. . +PF04508 Viral A-type inclusion protein repeat
The repeat is found in the A-type inclusion protein of the Poxvirus family . . +PF03286 Pox virus Ag35 surface protein
Pfam-B_4295 (release 6.5). +PF03336 Poxvirus C4/C10 protein
Pfam-B_3519 (release 6.5). +PF03287 Poxvirus C7/F8A protein
Pfam-B_4089 (release 6.5). +PF04701 Pox virus D2 protein
Pfam-B_5832 (release 7.5). +PF00874 BglG_antitermin;
Pfam-B_772 (release 3.0). The PRD domain (for PTS Regulation Domain), is the phosphorylatable regulatory domain found in bacterial transcriptional antiterminator such as BglG, SacY and LicT, as well as in activators such as MtlR and LevR. The PRD is phosphorylated on one or two conserved histidine residues. PRD-containing proteins are involved in the regulation of catabolic operons in Gram+ and Gram- bacteria and are often characterised by a short N-terminal effector domain that binds to either RNA (CAT-RBD for antiterminators Pfam:PF03123) or DNA (for activators), and a duplicated PRD module which is phosphorylated by the sugar phosphotransferase system (PTS) in response to the availability of carbon source. The phosphorylations modify the conformation and stability of the dimeric proteins and thereby the RNA- or DNA-binding activity of the effector domain. The structure of the LicT PRD domains has been solved in both the active (pdb:1h99, ) and inactive state (pdb:1tlv ), revealing massive structural rearrangements upon activation.. +PF04580 Chordopoxvirinae D3 protein
Pfam-B_4684 (release 7.5). Chordopoxvirinae D3 protein conserved region. Region occupies entire length of D3 protein.. +PF03288 Poxvirus D5 protein-like
Pfam-B_4009 (release 6.5). This family includes D5 from Poxviruses which is necessary for viral DNA replication, and is a nucleic acid independent nucleoside triphosphatase. Members of this family are also found outside of poxviruses. This domain is a DNA-binding winged HTH domain.. +PF04805 E10-like protein conserved region
Pfam-B_6357 (release 7.5). Family of poxvirus proteins.. +PF04497 Pox_E2;
Pfam-B_3979 (release 7.5). This family of proteins is restricted to Poxviridae. It contains a number of differently named uncharacterised proteins.. +PF04656 Pox virus E6 protein
Pfam-B_4392 (release 7.5). Family of pox virus E6 proteins.. +PF03394 Poxvirus E8 protein
Pfam-B_3759 (release 6.6). +PF04943 Poxvirus F11 protein
Pfam-B_6911 (release 7.6). The protein F11 is an early virus protein.. +PF03337 Poxvirus F12L protein
Pfam-B_3082 (release 6.5). +PF04596 Poxvirus protein F15
Pfam-B_5182 (release 7.5). +PF04708 Poxvirus F16 protein
Pfam-B_5863 (release 7.5). +PF04767 DNA-binding 11 kDa phosphoprotein
Pfam-B_6128 (release 7.5). Family of poxvirus proteins required for virus morphogenesis. Protein function necessary for proteolytic processing of the major viral structural proteins, P4a and P4b . . +PF04599 Poxvirus G5 protein
Pfam-B_5216 (release 7.5). This protein has been predicted to be related to the FEN-1 endonuclease .. +PF04787 Late protein H7
Pfam-B_6266 (release 7.5). Family of poxvirus late H7 proteins.. +PF03289 Poxvirus protein I1
Pfam-B_4306 (release 6.5). +PF04661 Poxvirus I3 ssDNA-binding protein
Pfam-B_5571 (release 7.5). +PF04713 Poxvirus protein I5
Pfam-B_5901 (release 7.5). +PF04595 Poxvirus I6-like family
Pfam-B_5073 (release 7.5) & Pfam-B_6224 (release 8.0). This family includes I6 proteins as well as the related F5L proteins.. +PF03338 Poxvirus J1 protein
Pfam-B_3556 (release 6.5). +PF03339 Poxvirus L3/FP4 protein
Pfam-B_3380 (release 6.5). +PF00485 Phosphoribulokinase / Uridine kinase family
In Arabidopsis the region carries two binding domains, a phosphoribosylpyrophosphate-binding domain and, at the very C-terminus, a uracil-binding domain.. +PF04872 Poxvirus L5 protein family
Pfam-B_6088 (release 7.6). This family includes variola (smallpox) and vaccinia virus L5 proteins. However, not all proteins in this family are called L5. L5 is thought to contain a metal-binding region .. +PF03356 Viral late protein H2
Pfam-B_3929 (release 6.5). All Members of this family show similarity to the vaccinia virus late protein H2. This protein is often referred to by its gene name of H2R. Members from this family all belong to the viral taxon Poxviridae.. +PF04887 Poxvirus M2 protein
Pfam-B_6168 (release 7.6). This family includes M2 protein from variola virus. The function of this protein is not known.. +PF03341 Poxvirus mRNA capping enzyme, small subunit
Pfam-B_3728 (release 6.5). The small subunit of the poxvirus mRNA capping enzyme has been found to have a structure which suggests that it started life as an RNA cap 2-prime O-methyltransferase. It has subsequently evolved to a catalytically inactive form that has been retained in order to help stabilise the large subunit, D1, and to enhance its methyltransferase activity through an allosteric mechanism .. +PF03213 Poxvirus P35 protein
Pfam-B_2785 (release 6.5). +PF03395 Poxvirus P4A protein
Pfam-B_2985 (release 6.6). +PF03292 Poxvirus P4B major core protein
Pfam-B_4215 (release 6.5). +PF03296 Poxvirus poly(A) polymerase nucleotidyltransferase domain
Pfam-B_4019 (release 6.5). +PF03294 RNA polymerase-associated transcription specificity factor, Rap94
Pfam-B_4535 (release 6.5). +PF03340 Poxvirus rifampicin resistance protein
Pfam-B_3377 (release 6.5). +PF03293 Poxvirus DNA-directed RNA polymerase, 18 kD subunit
Pfam-B_4188 (release 6.5). +PF03396 Poxvirus DNA-directed RNA polymerase, 35 kD subunit
Pfam-B_3921 (release 6.6). +PF04490 Poxvirus T4 protein, C terminus
This family of poxvirus proteins are thought to be retained in the endoplasmic reticulum. M-T4 of myxoma virus (Swiss:O55698) is thought to protect infected lymphocytes from apoptosis and modulate the inflammatory response to virus infection .. +PF04491 Poxvirus T4 protein, N terminus
This family of poxvirus proteins are thought to be secreted or retained in the endoplasmic reticulum if the protein also contains an additional C terminal region (Pfam:PF04490). M-T4 of myxoma virus (Swiss:O55698) is thought to protect infected lymphocytes from apoptosis and modulate the inflammatory response to virus infection .. +PF03295 Poxvirus trans-activator protein A1 C-terminal
Pfam-B_4259 (release 6.5). +PF03355 Viral Trans-Activator Protein
Pfam-B_3956 (release 6.5). These proteins function as a trans-activator of viral late genes.. +PF04441 Poxvirus early transcription factor (VETF), large subunit
Pfam-B_3920 (release 7.5). The poxvirus early transcription factor (VETF), in addition to the viral RNA polymerase, is required for efficient transcription of early genes in vitro. VETF is a heterodimeric protein that binds specifically to early gene promoters. The heterodimer is comprised of an 82 kDa (this family) subunit and a 70 kDa subunit. . +PF04947 Poxvirus Late Transcription Factor VLTF3 like
Members of this family are approximately 26 KDa, and are involved in trans-activator of late transcription .. +PF04498 Poxvirus nucleic acid binding protein VP8/L4R
The 25 kDa product of Vaccinia virus gene L4R is also known as VP8. VP8 is found in the cores of Vaccinia virions and is essential for the formation of transcriptionally competent viral particles. It binds both single stranded and double stranded DNA and RNA with similar affinities. Binding is thought to involve cooperative interactions between protein subunits. The protein is proteolytically cleaved during viral assembly at an Ala-Gly-Ala site. Possible roles for VP8 include packaging and maintaining the DNA genome in a transcribable configuration; binding ssDNA during transcription initiation; and cooperation with I8R protein to unwind early promoter regions. VP8 may also function in either transcription elongation or release of mRNA molecules from viral particles .. +PF04395 Poxvirus B22R protein
Pfam-B_3510 (release 7.3). This is highly conserved C-rich, central region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses. There are three pairs of conserved cysteine residues.. +PF00550 pp-binding;
Phosphopantetheine attachment site. A 4'-phosphopantetheine prosthetic group is attached through a serine. This prosthetic group acts as a a 'swinging arm' for the attachment of activated fatty acid and amino-acid groups. This domain forms a four helix bundle. This family includes members not included in Prosite. The inclusion of these members is supported by sequence analysis and functional evidence. The related domain of Swiss:P19828 has the attachment serine replaced by an alanine.. +PF02503 Polyphosphate kinase middle domain
Pfam-B_2701 (release 5.4). Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules.. +PF03012 Phosphoprotein
Pfam-B_1336 (release 6.4). This family includes the M1 phosphoprotein non-structural RNA polymerase alpha subunit, which is thought to be a component of the active polymerase, and may be involved in template binding.. +PF02818 PPAK motif
These motifs are found in the PEVK region of titin.. +PF01326 PPDK_N_term;
Pyruvate phosphate dikinase, PEP/pyruvate binding domain. This enzyme catalyses the reversible conversion of ATP to AMP, pyrophosphate and phosphoenolpyruvate (PEP).. +PF01239 Protein prenyltransferase alpha subunit repeat
Both farnesyltransferase (FT) and geranylgeranyltransferase 1 (GGT1) recognise a CaaX motif on their substrates where 'a' stands for preferably aliphatic residues, whereas GGT2 recognises a completely different motif. Important substrates for FT include, amongst others, many members of the Ras superfamily. GGT1 substrates include some of the other small GTPases and GGT2 substrates include the Rab family .. +PF02541 Ppx/GppA phosphatase family
This family consists of the N-terminal region of exopolyphosphatase (Ppx) EC:3.6.1.11 and guanosine pentaphosphate phospho-hydrolase (GppA) EC:3.6.1.40.. +PF04403 Paraquat-inducible protein A
Paraquat is a superoxide radical-generating agent. The promoter for the pqiA gene is also inducible by other known superoxide generators . This is predicted to be a family of integral membrane proteins, possibly located in the inner membrane. This family is related to NADH dehydrogenase subunit 2 (Pfam:PF00361).. +PF01502 Phosphoribosyl-AMP cyclohydrolase
Pfam-B_782 (release 4.0). This enzyme catalyses the third step in the histidine biosynthetic pathway. It requires Zn ions for activity.. +PF01503 Phosphoribosyl-ATP pyrophosphohydrolase
Pfam-B_784 (release 4.0). This enzyme catalyses the second step in the histidine biosynthetic pathway.. +PF03208 PRA1 family protein
Pfam-B_2976 (release 6.5) & Pfam-B_8147 (Release 8.0). This family includes the PRA1 (Prenylated rab acceptor) protein which is a Rab guanine dissociation inhibitor (GDI) displacement factor . This family also includes the glutamate transporter EAAC1 interacting protein GTRAP3-18 .. +PF00697 N-(5'phosphoribosyl)anthranilate (PRA) isomerase
Pfam-B_247 (release 2.1). +PF03967 Photosynthetic reaction centre, H-chain N-terminal region
The family corresponds the N-terminal cytoplasmic domain.. +PF00432 prenyltrans;
Prenyltransferase and squalene oxidase repeat. Pfam-B_130 (release 1.0). +PF01080 Presenilin
Pfam-B_789 (release 3.0). Mutations in presenilin-1 are a major cause of early onset Alzheimer's disease . It has been found that presenilin-1 (Swiss:P49768) binds to beta-catenin in-vivo . This family also contains SPE proteins from C.elegans.. +PF03991 Prion_octopep;
Copper binding octapeptide repeat. This repeat is found at the amino terminus of prion proteins. It has been shown to bind to copper.. +PF03063 Prismane/CO dehydrogenase family
Pfam-B_2956 (release 6.4). This family includes both hybrid-cluster proteins and the beta chain of carbon monoxide dehydrogenase. The hybrid-cluster proteins contain two Fe/S centres - a [4Fe-4S] cubane cluster, and a hybrid [4Fe-2S-2O] cluster. The physiological role of this protein is as yet unknown, although a role in nitrate/nitrite respiration has been suggested . The prismane protein from Escherichia coli was shown to contain hydroxylamine reductase activity (NH2OH + 2e + 2 H+ -> NH3 + H2O). This activity is rather low. Hydroxylamine reductase activity was also found in CO-dehydrogenase in which the active site Ni was replaced by Fe . The CO dehydrogenase contains a Ni-3Fe-2S-3O centre.. +PF00484 Carbonic anhydrase
Prosite & Pfam-B_9319 (Release 8.0). This family includes carbonic anhydrases as well as a family of non-functional homologues related to YbcF.. +PF01619 Proline dehydrogenase
Pfam-B_1092 (release 4.1). +PF00160 pro_isomerase;
Cyclophilin type peptidyl-prolyl cis-trans isomerase/CLD. The peptidyl-prolyl cis-trans isomerases, also known as cyclophilins, share this domain of about 109 amino acids. Cyclophilins have been found in all organisms studied so far and catalyse peptidyl-prolyl isomerisation during which the peptide bond preceding proline (the peptidyl-prolyl bond) is stabilised in the cis conformation. Mammalian cyclophilin A (CypA) is a major cellular target for the immunosuppressive drug cyclosporin A (CsA). Other roles for cyclophilins may include chaperone and cell signalling function .. +PF00235 profilin;
+PF02161 Progesterone receptor
+PF02244 Carboxypeptidase activation peptide
Pfam-B_2335 (release 5.2). Carboxypeptidases are found in abundance in pancreatic secretions. The pro-segment moiety (activation peptide) accounts for up to a quarter of the total length of the peptidase, and is responsible for modulation of folding and activity of the pro-enzyme.. +PF04352 ProQ/FINO family
COG3109 & Pfam-B_7673 (release 7.7). This family includes ProQ, which is required for full activation of the osmoprotectant transporter, ProQ, in Escherichia coli. This family includes several bacterial fertility inhibition (FINO) proteins. The conjugative transfer of F-like plasmids is repressed by FinO, an RNA binding protein. FinO interacts with the F-plasmid encoded traJ mRNA and its antisense RNA, FinP, stabilising FinP against endonucleolytic degradation and facilitating sense-antisense RNA recognition .. +PF02428 Potato type II proteinase inhibitor family
Pfam-B_2913 (release 5.4). Members of this family are proteinase inhibitors that contain eight cysteines that form four disulphide bridges. The structure of the proteinase-inhibitor complex is known .. +PF00260 protamine_P1;
+PF00841 protamine_P2;
Pfam-B_1350 (release 2.1). This protein also known as protamine P2 can substitute for histones in the chromatin of sperm (Swiss). The alignment contains both the sequence of the mature P2 protein and its propeptide.. +PF03247 Prothymosin/parathymosin family
Pfam-B_3463 (release 6.5). Prothymosin alpha and parathymosin are two ubiquitous small acidic nuclear proteins that are thought to be involved in cell cycle progression, proliferation, and cell differentiation . . +PF05044 Prox1;
Homeo-prospero domain. Pfam-B_5293 (release 7.7). Prospero is a large drosophila transcription factor protein that is expressed in all neural lineages of drosophila embryos. It is needed for correct expression of several neural proteins and in determining the cell fates of neural stem cells. Homologues of prospero are found in a wide range of animals including humans with the highest level of similarity being found in the C-terminal 160 amino acids. This region was identified as containing an atypical homeobox domain followed by a prospero domain. However, the structure shows that these two regions form a single stable structural domain as defined here . This homeo-prospero domain binds to DNA.. +PF02840 Prp18 domain
The splicing factor Prp18 is required for the second step of pre-mRNA splicing. The structure of a large fragment of the Saccharomyces cerevisiae Prp18 is known . This fragment is fully active in yeast splicing in vitro and includes the sequences of Prp18 that have been evolutionarily conserved. The core structure consists of five alpha-helices that adopt a novel fold. The most highly conserved region of Prp18, a nearly invariant stretch of 19 aa, forms part of a loop between two alpha-helices and may interact with the U5 small nuclear ribonucleoprotein particles .. +PF02340 PRRSV putative envelope protein
Pfam-B_939 (release 5.2). This family consists of a conserved probable envelope protein or ORF2 in porcine reproductive and respiratory syndrome virus (PRRSV) also in the family is a minor structural protein from lactate dehydrogenase-elevating virus.. +PF01366 Herpesvirus processing and transport protein
Pfam-B_1171 (release 3.0). The members of this family are associate with capsid intermediates during packaging of the virus.. +PF02666 Phosphatidylserine decarboxylase
This is a family of phosphatidylserine decarboxylases, EC:4.1.1.65. These enzymes catalyse the reaction: Phosphatidyl-L-serine <=> phosphatidylethanolamine + CO2. Phosphatidylserine decarboxylase plays a central role in the biosynthesis of aminophospholipids by converting phosphatidylserine to phosphatidylethanolamine .. +PF04230 Polysaccharide pyruvyl transferase
Pyruvyl-transferases involved in peptidoglycan-associated polymer biosynthesis. CsaB in Bacillus anthracis is necessary for the non-covalent anchoring of proteins containing an SLH (S-layer homology) domain to peptidoglycan-associated pyruvylated polysaccharides. WcaK and AmsJ are involved in the biosynthesis of colanic acid in Escherichia coli and of amylovoran in Erwinia amylovora .. +PF00223 psaA_psaB;
Photosystem I psaA/psaB protein. +PF02531 PsaD
Pfam-B_1336 (release 5.4). This family consists of PsaD from plants and cyanobacteria. PsaD is an extrinsic polypeptide of photosystem I (PSI) and is required for native assembly of PSI reaction clusters and is implicated in the electrostatic binding of ferredoxin within the reaction centre . PsaD forms a dimer in solution which is bound by PsaE however PsaD is monomeric in its native complexed PSI environment .. +PF02605 Photosystem I reaction centre subunit XI
Pfam-B_1741 (release 5.4). This family consists of the photosystem I reaction centre subunit XI, PsaL, from plants and bacteria. PsaL is one of the smaller subunits in photosystem I with only two transmembrane alpha helices and interacts closely with PsaI .. +PF00737 PSBH;
Photosystem II 10 kDa phosphoprotein. Pfam-B_465 (release 2.1). This protein is phosphorylated in a light dependent reaction.. +PF02532 Photosystem II reaction centre I protein (PSII 4.8 kDa protein)
Pfam-B_1731 (release 5.4). This family consists of various Photosystem II (PSII) reaction centre I proteins or PSII 4.8 kDa proteins, PsbI, from the chloroplast genome of many plants and Cyanobacteria. PsbI is a small, integral membrane component of PSII the role of which is not clear . Synechocystis mutants lacking PsbI have 20-30% loss of PSII activity however the PSII complex is not destabilised .. +PF01788 PsbJ
Pfam-B_1227 (release 4.2). This family consists of the photosystem II reaction centre protein PsbJ from plants and Cyanobacteria. In Synechocystis sp. PCC 6803 PsbJ regulates the number of photosystem II centres in thylakoid membranes, it is a predicted 4kDa protein with one membrane spanning domain .. +PF02533 Photosystem II 4 kDa reaction centre component
Pfam-B_1331 (release 5.4). This family consists of various photosystem II 4 kDa reaction centre components (PsbK) from plant and Cyanobacteria. The photosystem II reaction centre is responsible for catalysing the core photosynthesis reaction the light-induced splitting of water and the consequential release of dioxygen. In C. reinhardtii the psbK product is required for the stable assembly and/or stability of the photosystem II complex .. +PF02419 PsbL protein
Pfam-B_1884 (release 5.4). This family consists of the photosystem II reaction centre protein PsbJ from plants and Cyanobacteria. The function of this small protein is unknown. Interestingly the mRNA for this protein requires a post-transcriptional modification of an ACG triplet to form an AUG initiator codon [1,2].. +PF05151 Photosystem II reaction centre M protein (PsbM)
Pfam-B_6558 (release 7.7). This family consists of several Photosystem II reaction centre M proteins (PsbM) from plants and cyanobacteria. During the photosynthetic light reactions in the thylakoid membranes of cyanobacteria, algae, and plants, photosystem II (PSII), a multi-subunit membrane protein complex, catalyses oxidation of water to molecular oxygen and reduction of plastoquinon .. +PF02468 psbN;
Photosystem II reaction centre N protein (psbN). Pfam-B_2222 (release 5.4). This is a family of small proteins encoded on the chloroplast genome. psbN is involved in photosystem II during photosynthesis, but its exact role is unknown.. +PF04725 Photosystem II 10 kDa polypeptide PsbR
This protein is associated with the oxygen-evolving complex of photosystem II. Its function in photosynthesis is not known. The C-terminal hydrophobic region functions as a thylakoid transfer signal but is not removed .. +PF01405 PSBT;
Photosystem II reaction centre T protein. Pfam-B_1880 (release 3.0). The exact function of this protein is unknown. It probably consists of a single transmembrane spanning helix. The Swiss:P37256 protein, appears to be (i) a novel photosystem II subunit and (ii) required for maintaining optimal photosystem II activity under adverse growth conditions .. +PF03912 PsbW;
Psb28 is a 13 kDa soluble protein that is directly assembled in dimeric PSII supercomplexes. The negatively charged N-terminal region is essential for this process . This protein was formerly known as PsbW, but PsbW is now reserved for Pfam:PF07123.. +PF00849 YABO;
RNA pseudouridylate synthase. Pfam-B_421 (release 3.0). Members of this family are involved in modifying bases in RNA molecules. They carry out the conversion of uracil bases to pseudouridine. This family includes RluD Swiss:P33643, a pseudouridylate synthase that converts specific uracils to pseudouridine in 23S rRNA. RluA from E. coli converts bases in both rRNA and tRNA .. +PF00796 Photosystem I reaction centre subunit VIII
Pfam-B_528 (release 2.1). +PF02427 Photosystem I reaction centre subunit IV / PsaE
Pfam-B_1594 (release 5.4). PsaE is a 69 amino acid polypeptide from photosystem I present on the stromal side of the thylakoid membrane . The structure is comprised of a well-defined five-stranded beta-sheet similar to SH3 domains .. +PF02507 Photosystem I reaction centre subunit III
Pfam-B_2122 (release 5.4). Photosystem I (PSI) is an integral membrane protein complex that uses light energy to mediate electron transfer from plastocyanin to ferredoxin. Subunit III (or PSI-F) is one of at least 14 different subunits that compose the PSI complex.. +PF03244 Photosystem I reaction centre subunit VI
Pfam-B_3007 (release 6.5). Photosystem I (PSI) is an integral membrane protein complex that uses light energy to mediate electron transfer from plastocyanin to ferredoxin.. +PF01701 Photosystem I reaction centre subunit IX / PsaJ
Pfam-B_1599 (release 4.1). This family consists of the photosystem I reaction centre subunit IX or PsaJ from various organisms including Synechocystis sp. (strain pcc 6803), Pinus thunbergii (green pine) and Zea mays (maize). PsaJ Swiss:P19443 is a small 4.4kDa, chloroplastal encoded, hydrophobic subunit of the photosystem I reaction complex its function is not yet fully understood . PsaJ can be cross-linked to PsaF Swiss:P12356 and has a single predicted transmembrane domain it has a proposed role in maintaining PsaF in the correct orientation to allow for fast electron transfer from soluble donor proteins to P700+ .. +PF01241 Photosystem I psaG / psaK
+PF00421 Photosystem II protein
Pfam-B_182 (release 1.0). +PF04012 PspA/IM30 family
This family includes PspA a protein that suppresses sigma54-dependent transcription. The PspA protein, a negative regulator of the Escherichia coli phage shock psp operon, is produced when virulence factors are exported through secretins in many Gram-negative pathogenic bacteria and its homologue in plants, VIPP1, plays a critical role in thylakoid biogenesis, essential for photosynthesis. Activation of transcription by the enhancer-dependent bacterial sigma(54) containing RNA polymerase occurs through ATP hydrolysis-driven protein conformational changes enabled by activator proteins that belong to the large AAA(+) mechanochemical protein family. It has been shown that PspA directly and specifically acts upon and binds to the AAA(+) domain of the PspF transcription activator .. +PF04839 Plastid and cyanobacterial ribosomal protein (PSRP-3 / Ycf65)
Pfam-B_2979 (release 7.6). This small acidic protein is found in 30S ribosomal subunit of cyanobacteria and plant plastids.\. In plants it has been named plastid-specific ribosomal protein 3 (PSRP-3), and in cyanobacteria it is named Ycf65. Plastid-specific ribosomal proteins may mediate the effects of nuclear factors on plastid translation. The acidic PSRPs are thought to contribute to protein-protein interactions in the 30S subunit, and are not thought to bind RNA .. +PF03034 Phosphatidyl serine synthase
Pfam-B_1414 (release 6.4). Phosphatidyl serine synthase is also known as serine exchange enzyme. This family represents eukaryotic PSS I and II which are membrane bound proteins which catalyses the replacement of the head group of a phospholipid (phosphotidylcholine or phosphotidylethanolamine) by L-serine.. +PF01515 Phosphate acetyl/butaryl transferase
Pfam-B_799 (release 4.0). This family contains both phosphate acetyltransferase and phosphate butaryltransferase. These enzymes catalyse the transfer of an acetyl or butaryl group to orthophosphate.. +PF02126 Phosphotriesterase family
+PF00809 DHPS;
Pterin binding enzyme. Pfam-B_1411 (release 2.1) and Pfam-B_3423 (release 6.6). This family includes a variety of pterin binding enzymes that all adopt a TIM barrel fold. The family includes dihydropteroate synthase EC:2.5.1.15 as well as a group methyltransferase enzymes including methyltetrahydrofolate, corrinoid iron-sulfur protein methyltransferase (MeTr) Swiss:Q46389 that catalyses a key step in the Wood-Ljungdahl pathway of carbon dioxide fixation. It transfers the N5-methyl group from methyltetrahydrofolate (CH3-H4folate) to a cob(I)amide centre in another protein, the corrinoid iron-sulfur protein. MeTr is a member of a family of proteins that includes methionine synthase and methanogenic enzymes that activate the methyl group of methyltetra-hydromethano(or -sarcino)pterin .. +PF01091 PTN_MK;
PTN/MK heparin-binding protein family, C-terminal domain. +PF05196 PTN/MK heparin-binding protein family, N-terminal domain
+PF04387 Protein tyrosine phosphatase-like protein, PTPLA
Pfam-B_1525 (release 7.3). This family includes the mammalian protein tyrosine phosphatase-like protein, PTPLA. A significant variation of PTPLA from other protein tyrosine phosphatases is the presence of proline instead of catalytic arginine at the active site. It is thought that PTPLA proteins have a role in the development, differentiation, and maintenance of a number of tissue types .. +PF01242 6-pyruvoyl tetrahydropterin synthase
6-Pyruvoyl tetrahydrobiopterin synthase catalyses the conversion of dihydroneopterin triphosphate to 6-pyruvoyl tetrahydropterin, the second of three enzymatic steps in the synthesis of tetrahydrobiopterin from GTP. The functional enzyme is a hexamer of identical subunits .. +PF00854 POT family
Pfam-B_571 (release 3.0). The POT (proton-dependent oligopeptide transport) family all appear to be proton dependent transporters .. +PF00381 PTS HPr component phosphorylation site
+PF01885 DUF60;
RNA 2'-phosphotransferase, Tpt1 / KptA family. Tpt1 catalyses the last step of tRNA splicing in yeast. It transfers the splice junction 2'-phosphate from ligated tRNA to NAD, to produce ADP-ribose 1"-2"-cyclic phosphate. This is presumed to be followed by a transesterification step to release the RNA.\. The first step of this reaction is similar to that catalysed by some bacterial toxins.\. E. coli KptA and mouse Tpt1 are likely to use the same reaction mechanism .. +PF00358 phosphoenolpyruvate-dependent sugar phosphotransferase system, EIIA 1
+PF00359 Phosphoenolpyruvate-dependent sugar phosphotransferase system, EIIA 2
+PF00367 phosphotransferase system, EIIB
+PF02378 Phosphotransferase system, EIIC
Pfam-B_639 (release 5.2). The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The sugar-specific permease of the PTS consists of three domains (IIA, IIB and IIC). The IIC domain catalyses the transfer of a phosphoryl group from IIB to the sugar substrate.. +PF02255 PTS system, Lactose/Cellobiose specific IIA subunit
Pfam-B_3710 (release 5.2). The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The lactose/cellobiose-specific family are one of four structurally and functionally distinct group IIA PTS system enzymes. This family of proteins normally function as a homotrimer, stabilised by a centrally located metal ion . Separation into subunits is thought to occur after phosphorylation. . +PF03714 Bacterial pullanase-associated domain
Domain is found in pullanase - carbohydrate de-branching - proteins. It is found both to the N or the C terminii of of the alpha-amylase active site region. This domain contains several conserved aromatic residues that are suggestive of a carbohydrate binding function.. +PF03829 PTS system glucitol/sorbitol-specific IIA component
TIGRFAMs, Griffiths-Jones SR. +PF03830 PTS system sorbose subfamily IIB component
TIGRFAMs, Griffiths-Jones SR. +PF03209 PUCC protein
Pfam-B_2839 (release 6.5). This protein is required for high-level transcription of the PUC operon.. +PF00806 Pumilio-family RNA binding repeat
Puf repeats (aka PUM-HD, Pumilio homology domain) are necessary and sufficient for sequence specific RNA binding in fly Pumilio and worm FBF-1 and FBF-2. Both proteins function as translational repressors in early embryonic development by binding sequences in the 3' UTR of target mRNAs (e.g. the nanos response element (NRE) in fly Hunchback mRNA, or the point mutation element (PME) in worm fem-3 mRNA). Other proteins that contain Puf domains are also plausible RNA binding proteins. Swiss:P47135, for instance, appears to also contain a single RRM domain by HMM analysis. Puf domains usually occur as a tandem repeat of 8 domains. The Pfam model does not necessarily recognise all 8 repeats in all sequences; some sequences appear to have 5 or 6 repeats on initial analysis, but further analysis suggests the presence of additional divergent repeats. Structures of PUF repeat proteins show they consist of a two helix structure [3,4].. +PF02245 Methylpurine-DNA glycosylase (MPG)
Pfam-B_3352 (release 5.2). Methylpurine-DNA glycosylase is a base excision-repair protein. It is responsible for the hydrolysis of the deoxyribose N-glycosidic bond, excising 3-methyladenine and 3-methylguanine from damaged DNA.. +PF04845 PurA ssDNA and RNA-binding protein
Pfam-B_4535 (release 7.6). This family represents most of the length of the protein.. +PF02700 UPF0062; PurC;
Phosphoribosylformylglycinamidine (FGAM) synthase. This family forms a component of the de novo purine biosynthesis pathway. . +PF00855 PWWP domain
The PWWP domain is named after a conserved Pro-Trp-Trp-Pro motif . The domain binds to Histone-4 methylated at lysine-20, H4K20me, suggesting that it is methyl-lysine recognition motif. Removal of two conserved aromatic residues in a hydrophobic cavity created by this domain within the full-length protein, Pdp1, abolishes the interaction o f the protein with H4K20me3. In fission yeast, Set9 is the sole enzyme that catalyses all three states of H4K20me, and Set9-mediated H4K20me is required for efficient recruitment of checkpoint protein Crb2 to sites of DNA damage. The methylation of H4K20 is involved in a diverse array of cellular processes, such as organising higher-order chromatin, maintaining genome stability, and regulating cell-cycle progression .. +PF02436 Conserved carboxylase domain
Pfam-B_628 (release 5.2). This domain represents a conserved region in pyruvate carboxylase (PYC), oxaloacetate decarboxylase alpha chain (OADA), and transcarboxylase 5s subunit. The domain is found adjacent to the HMGL-like domain (Pfam:PF00682) and often close to the biotin_lipoyl domain (Pfam:PF00364) of biotin requiring enzymes. . +PF03013 Pyrimidine dimer DNA glycosylase
Pfam-B_1388 (release 6.4). Pyrimidine dimer DNA glycosylases excise pyrimidine dimers by hydrolysis of the glycosylic bond of the 5' pyrimidine, followed by the intra-pyrimidine phosphodiester bond. Pyrimidine dimers are the major UV-lesions of DNA.. +PF01948 Aspartate carbamoyltransferase regulatory chain, allosteric domain
The regulatory chain is involved in allosteric regulation of aspartate carbamoyltransferase. The N-terminal domain has ferredoxin-like fold, and provides the regulatory chain dimerisation interface.. +PF02748 Aspartate carbamoyltransferase regulatory chain, metal binding domain
The regulatory chain is involved in allosteric regulation of aspartate carbamoyltransferase. The C-terminal metal binding domain has a rubredoxin-like fold and provides the interface with the catalytic chain.. +PF01243 Pyridoxamine 5'-phosphate oxidase
+PF00282 pyridoxal_deC;
Pyridoxal-dependent decarboxylase conserved domain. +PF00719 Inorganic pyrophosphatase
Pfam-B_613 (release 2.1). +PF02547 Queuosine biosynthesis protein
Queuosine (Q) biosynthesis protein, or S-adenosylmethionine:tRNA -ribosyltransferase-isomerase, is required for the synthesis of the queuosine precursor (oQ). It catalyses the transfer and isomerisation of the ribose moiety from AdoMet to the 7-aminomethyl group of 7-deazaguanine (preQ1-tRNA) to form epoxyqueuosine (oQ-tRNA). Q is a hypermodified nucleoside usually found at the first position of the anticodon of asparagine, aspartate, histidine, and tyrosine tRNAs [1,2]. In Streptococcus gordonii , QueA has been shown to play a role in the regulation of arginine deiminase genes .. +PF00788 Ras association (RalGDS/AF-6) domain
Alignment kindly provided by SMART. RasGTP effectors (in cases of AF6, canoe and RalGDS); putative RasGTP effectors in other cases. Recent evidence (not yet in MEDLINE) shows that some RA domains do NOT bind RasGTP. Predicted structure similar to that determined , and that of the RasGTP-binding domain of Raf kinase.. +PF03528 Rabaptin
+PF02144 Repair protein Rad1/Rec1/Rad17
+PF03215 Rad17 cell cycle checkpoint protein
Pfam-B_2764 (release 6.5). +PF04824 Conserved region of Rad21 / Rec8 like protein
Pfam-B_2686 (release 7.6). This family represents a conserved region found in eukaryotic cohesins of the Rad21, Rec8 and Scc1 families. Members of this family mediate sister chromatid cohesion during mitosis and meiosis, as part of the cohesin complex . Cohesion is necessary for homologous recombination (including double-strand break repair) and correct chromatid segregation. These proteins may also be involved in chromosome condensation. Dissociation at the metaphase to anaphase transition causes loss of cohesion and chromatid segregation .. +PF04825 N terminus of Rad21 / Rec8 like protein
Pfam-B_2686 (release 7.6). This family represents a conserved N-terminal region found in eukaryotic cohesins of the Rad21, Rec8 and Scc1 families. Members of this family mediate sister chromatid cohesion during mitosis and meiosis, as part of the cohesin complex . Cohesion is necessary for homologous recombination (including double-strand break repair) and correct chromatid segregation. These proteins may also be involved in chromosome condensation. Dissociation at the metaphase to anaphase transition causes loss of cohesion and chromatid segregation .. +PF03835 Rad4 transglutaminase-like domain
+PF04098 Rad52/22 family double-strand break repair protein
The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to Rad52 . These proteins contain two helix-hairpin-helix motifs .. +PF04139 Rad9
Pfam-B_28077 (release 7.3);. Rad9 is required for transient cell-cycle arrests and transcriptional induction of DNA repair in response to DNA damage. It contains a Bcl-2 homology domain 3 (BH3) .. +PF04002 RadC; DUF2466;
RadC-like JAB domain. A family of proteins present widely across the bacteria. This family was named initially with reference to the E. coli radC102 mutation which suggested that RadC was involved in repair of DNA lesions . However the relevant mutation has subsequently been shown to be in recG, where radC is in fact an allele of recG . In addition, a personal communication from Claverys, J-P, et al, indicates a total failure of all attempts to characterise a radiation-related function for RadC in Streptococcus pneumoniae, suggesting that it is not involved in repair of DNA lesions, in recombination during transformation, in gene conversion, nor in mismatch repair. Computational analysis, however, provides a possible function. The RadC-like family belong to the JAB superfamily of metalloproteins . The domain shows fusions to an N-terminal Helix-hairpin-Helix (HhH) domain in most instances. Other domain combinations include fusions to the anti-restriction module ArdC, the DinG/RAD3-like superfamily II helicases and the DNAG-like primase. In some bacteria, closely related DinG/Rad3- like superfamily II helicases are fused to a 3'-5' exonuclease in the same position as the RadC-like JAB domain. These conserved domain associations lead to the hypothesis that the RadC-like JAB domains might function as a nuclease .. +PF04712 Radial spokehead-like protein
Pfam-B_5891 (release 7.5). This family includes the radial spoke head proteins RSP4 and RSP6 from Chlamydomonas reinhardtii, and several eukaryotic homologues, including mammalian RSHL1, the protein product of a familial ciliary dyskinesia candidate gene .. +PF03089 Recombination activating protein 2
Pfam-B_4702 (release 6.5). V-D-J recombination is the combinatorial process by which the huge range of immunoglobulin and T cell binding specificity is generated from a limited amount of genetic material. This process is synergistically activated by RAG1 and RAG2 in developing lymphocytes. Defects in RAG2 in humans are a cause of severe combined immunodeficiency B cell negative and Omenn syndrome.. +PF04901 Receptor activity modifying family
Pfam-B_5615 (release 7.6). The calcitonin-receptor-like receptor can function as either a calcitonin-gene-related peptide or an adrenomedullin receptor. The receptors function is modified by receptor-activity-modifying protein or RAMP. RAMPs are single-transmembrane-domain proteins . . +PF00638 RanBP1 domain
+PF03085 Rhoptry-associated protein 1 (RAP-1)
Pfam-B_1750 (release 6.4). Members of this family are found in Babesia species. Though not in this Pfam family, rhoptry-associated proteins are also found in Plasmodium falciparum. Indeed, animal infection with Babesia may produce a pattern similar to human malaria . Rhoptry organelles form part of the apical complex in apicomplexan parasites. Rhoptry-associated proteins are antigenic, and generate partially protective immune responses in infected mammals. Thus RAPs are among the targeted vaccine antigens for babesial (and malarial) parasites. However, RAP-1 proteins are encoded by by a multigene family; thus RAP-1 proteins are polymorphic, with B and T cell epitopes that are conserved among strains, but not across species [1,2,5]. Antibodies to Babesia RAP-1 may also be helpful in the serological detection of Babesia infections .. +PF00071 ras;
Includes sub-families Ras, Rab, Rac, Ral, Ran, Rap Ypt1 and more. Shares P-loop motif with GTP_EFTU, arf and myosin_head. See Pfam:PF00009 Pfam:PF00025, Pfam:PF00063. As regards Rab GTPases, these are important regulators of vesicle formation, motility and fusion. They share a fold in common with all Ras GTPases: this is a six-stranded beta-sheet surrounded by five alpha-helices .. +PF00616 GTPase-activator protein for Ras-like GTPase
Ponting C, Schultz J, Bork P. All alpha-helical domain that accelerates the GTPase activity of Ras, thereby "switching" it into an "off" position.. +PF03836 RasGAP C-terminus
+PF00617 RasGEF domain
Ponting C, Schultz J, Bork P. Guanine nucleotide exchange factor for Ras-like small GTPases.. +PF00618 RasGEFN;
RasGEF N-terminal motif. Ponting C, Schultz J, Bork P. A subset of guanine nucleotide exchange factor for Ras-like small GTPases appear to possess this motif/domain N-terminal to the RasGef (Cdc25-like) domain.. +PF01858 Retinoblastoma-associated protein A domain
This domain has the cyclin fold as predicted .. +PF01857 Retinoblastoma-associated protein B domain
The crystal structure of the Rb pocket bound to a nine-residue E7 peptide containing the LxCxE motif, shared by other Rb-binding viral and cellular proteins, shows that the LxCxE peptide binds a highly conserved groove on the B domain . The B domain has a cyclin fold. . +PF02196 Raf-like Ras-binding domain
Alignment kindly provided by SMART. +PF02033 Ribosome-binding factor A
+PF05025 RbsD / FucU transport protein family
Pfam-B_4828 (release 7.6). The Escherichia coli high-affinity ribose-transport system consists of six proteins encoded by the rbs operon (rbsD, rbsA, rbsC, rbsB, rbsK and rbsR). Of the six components, RbsD is the only one whose function is unknown although it is thought that it somehow plays a critical role in PtsG-mediated ribose transport . This family also includes FucU a protein from the fucose biosynthesis operon that is presumably also involved in fucose transport by similarity to RbsD.. +PF02341 RbcX protein
Pfam-B_948 (release 5.2). The RBCX protein has been identified as having a possible chaperone-like function . The rbcX gene is juxtaposed to and cotranscribed with rbcL and rbcS encoding RuBisCO in Anabaena sp. CA . RbcX has been shown to possess a chaperone-like function assisting correct folding of RuBisCO in E. coli expression studies and is needed for RuBisCO to reach its maximal activity .. +PF00415 Regulator of chromosome condensation (RCC1) repeat
+PF04381 Putative exonuclease, RdgC
Members of the RdgC family may have exonuclease activity. RdgC is required for efficient pilin variation in Neisseria gonorrhoeae, suggesting that it may be involved in recombination reactions . In Escherichia coli, RdgC is required for growth in recombination-deficient exonuclease-depleted strains. Under these conditions, RdgC may act as an exonuclease to remove collapsed replication forks, in the absence of the normal repair mechanisms .. +PF05183 RNA dependent RNA polymerase
Pfam-B_2226 (release 7.7). This family of proteins are eukaryotic RNA dependent RNA polymerases. These proteins are involved in post transcriptional gene silencing where they are thought to amplify dsRNA templates.. +PF00154 recA;
recA bacterial DNA recombination protein. RecA is a DNA-dependent ATPase and functions in DNA repair systems. RecA protein catalyses an ATP-dependent DNA strand-exchange reaction that is the central step in the repair of dsDNA breaks by homologous recombination .. +PF02565 RecO;
Recombination protein O C terminal. Recombination protein O (RecO) is involved in DNA repair and Pfam:PF00470 pathway recombination.. +PF02132 RecR protein
+PF03837 RecT family
The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to RecT .. +PF03838 Recombination protein U
TIGRFAMs, Griffiths-Jones SR. +PF02631 RecX family
RecX is a putative bacterial regulatory protein . The gene encoding RecX is found downstream of recA, and is thought to interact with the RecA protein. . +PF02014 Reeler domain
+PF04221 DUF415;
RelE and RelB form a toxin-antitoxin system. RelE represses translation, probably through binding ribosomes ( , ). RelB stably binds RelE, presumably deactivating it.. +PF03763 Remorin, C-terminal region
Pfam-B_1798 (release 7.0). Remorins are plant-specific plasma membrane-associated proteins. In tobacco remorin co-purifies with lipid rafts. Most remorins have a variable, proline-rich C-half and a more conserved N-half that is predicted to form coiled coils. Consistent with this, circular dichroism studies have demonstrated that much of the protein is alpha-helical. Remorins exist in plasma membrane preparations as oligomeric structures and form filaments in vitro. The proteins can bind polyanions including the extracellular matrix component oligogalacturonic acid (OGA). In vitro, remorin in plasma membrane preparations is phosphorylated (principally on threonine residues) in the presence of OGA and thus co-purifies with a protein kinases(s). The biological functions of remorins are unknown but roles as components of the membrane/cytoskeleton are possible.. +PF03766 Remorin, N-terminal region
Pfam-B_1798 (release 7.0). Remorins are plant-specific plasma membrane-associated proteins. In tobacco remorin co-purifies with lipid rafts. Most remorins have a variable, proline-rich C-half and a more conserved N-half that is predicted to form coiled coils. Consistent with this, circular dichroism studies have demonstrated that much of the protein is alpha-helical. Remorins exist in plasma membrane preparations as oligomeric structures and form filaments in vitro. The proteins can bind polyanions including the extracellular matrix component oligogalacturonic acid (OGA). In vitro, remorin in plasma membrane preparations is phosphorylated (principally on threonine residues) in the presence of OGA and thus co-purifies with a protein kinases(s). The biological functions of remorins are unknown but roles as components of the membrane/cytoskeleton are possible. . +PF01244 Renal_dipeptase;
Membrane dipeptidase (Peptidase family M19). +PF01664 Reovirus viral attachment protein sigma 1
Pfam-B_1003 (release 4.1). This family consists of the reovirus sigma 1 hemagglutinin, cell attachment protein. This glycoprotein is a minor capsid protein and also determines the serotype-specific humoral immune response. Sigma 1 consist of a fibrous tail and a globular head. The head has important roles in the cell attachment function of sigma 1 and determinant of the type-specific humoral immune response . Reovirus is part of the orthoreovirus group of retroviruses with, a dsRNA genome. Also present in this family is bacteriophage SF6 Lysozyme Swiss:P21270.. +PF04582 Reovirus sigma C capsid protein
Pfam-B_2922 (release 7.5). +PF00979 Reovirus outer capsid protein, Sigma 3
Pfam-B_1049 (release 3.0). Sigma 3 is the major outer capsid protein of reovirus . Sigma 3 is encoded by genome segment 4. Sigma 3 binds to double stranded RNA and associates with polypeptide u1 and its cleavage product u1C to form the outer shell of the virion. The Sigma 3 protein possesses a zinc-finger motif and an RNA-binding domain in the N and C termini respectively. This protein is also thought to play a role in pathogenesis. . +PF01446 Rep;
Prodom_1565 (release 99.1). Replication proteins (rep) are involved in plasmid replication. The Rep protein binds to the plasmid DNA and nicks it at the double strand origin (dso) of replication. The 3'-hydroxyl end created is extended by the host DNA replicase, and the 5' end is displaced during synthesis. At the end of one replication round, Rep introduces a second single stranded break at the dso and ligates the ssDNA extremities generating one double-stranded plasmid and one circular ssDNA form. Complementary strand synthesis of the circular ssDNA is usually initiated at the single-stranded origin by the host RNA polymerase .. +PF04057 Rep-A_protein_1;
Replication factor-A protein 1, N-terminal domain. Pfam-B_6000 (release 7.3);. +PF01719 Plasmid replication protein
Pfam-B_1901 (release 4.1). This family consists of various bacterial plasmid replication (Rep) proteins. These proteins are essential for replication of plasmids, the Rep proteins are topoisomerases that nick the positive stand at the plus origin of replication and also at the single-strand conversion sequence .. +PF01051 RepB_protein;
Initiator Replication protein. Pfam-B_313 (release 3.0). This protein is an initiator of plasmid replication. RepB possesses nicking-closing (topoisomerase I) like activity. It is also able to perform a strand transfer reaction on ssDNA that contains its target. This family also includes RepA which is an E.coli protein involved in plasmid replication. The RepA protein binds to DNA repeats that flank the repA gene [3,4].. +PF02486 Replication initiation factor
Pfam-B_2164 (release 5.4) & COG2946. Plasmid replication is initiated by the replication initiation factor (REP). This family represents a probable topoisomerase that makes a sequence-specific single-stranded nick in the plasmid DNA at the origin of replication. Human proteins also belong to this family, including myelin transcription factor 2 (Swiss:O15150) and cerebrin-50 (Swiss:Q16301) .. +PF04796 Plasmid encoded RepA protein
Pfam-B_6223 (release 7.5). Family of plasmid encoded proteins involved in plasmid replication. The role of RepA in the replication process is not clearly understood .. +PF01421 Reprolysin (M12B) family zinc metalloprotease
The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. Members of this family are also known as adamalysins. Most members of this family are snake venom endopeptidases, but there are also some mammalian proteins such as Swiss:P78325, and fertilin Swiss:Q28472. Fertilin and closely related proteins appear to not have some active site residues and may not be active enzymes.. +PF05140 ResB-like family
Pfam-B_1866 (release 7.7). This family includes both ResB and cytochrome c biogenesis proteins [1,2]. Mutations in ResB indicate that they are essential for growth . ResB is predicted to be a transmembrane protein .. +PF04851 Type III restriction enzyme, res subunit
Pfam-B_4631 (release 7.6). +PF00239 recombinase; resolvase;
Resolvase, N terminal domain. Prosite & Pfam-B_3830 (Release 7.5). The N-terminal domain of the resolvase family (this family) contains the active site and the dimer interface. The extended arm at the C-terminus of this domain connects to the C-terminal helix-turn-helix domain of resolvase - see Pfam:PF02796.. +PF00072 response_reg;
Response regulator receiver domain. This domain receives the signal from the sensor partner in bacterial two-component systems. It is usually found N-terminal to a DNA binding effector domain.. +PF02813 Retroviral M domain
Retroviruses contain a small protein, MA (matrix), which forms a protein lining immediately beneath the phospholipid membrane of the mature virus particle. MA is located in the N-terminal region of the Gag precursor polyprotein. The N-terminal segment of MA proteins directs the Gag protein to the plasma membrane where budding takes place, and has been called the M domain. This domain forms an alpha helical bundle structure.. +PF00424 REV protein (anti-repression trans-activator protein)
Pfam-B_169 (release 1.0). +PF00472 RF-1 domain
This domain is found in peptide chain release factors such as RF-1 (Swiss:P07011) and RF-2 (Swiss:P07012), and a number of smaller proteins of unknown function such as Swiss:P40711. This domain contains the peptidyl-tRNA hydrolase activity. The domain contains a highly conserved motif GGQ, where the glutamine is thought to coordinate the water that mediates the hydrolysis.. +PF04506 Rft protein
+PF04589 RFX1 transcription activation region
The RFX family is a family of winged-helix DNA binding proteins. RFX1 is a regulatory factor essential for expression of MHC class II genes. This region is to found N terminal to the RFX DNA binding region (Pfam:PF02257) in some mammalian RFX proteins, and is thought to activate transcription when associated with DNA. Deletion analysis has identified the region 233-351 in human RFX1 (Swiss:P22670) as being required for maximal activation .. +PF02257 RFX DNA-binding domain
Pfam-B_3682 (release 5.2). RFX is a regulatory factor which binds to the X box of MHC class II genes and is essential for their expression. The DNA-binding domain of RFX is the central domain of the protein and binds ssDNA as either a monomer or homodimer . It recognize X-boxes (DNA of the sequence 5'-GTNRCC(0-3N)RGYAAC-3', where N is any nucleotide, R is a purine and Y is a pyrimidine) using a highly conserved 76-residue DNA-binding domain (DBD) .. +PF03214 Reversibly glycosylated polypeptide
Pfam-B_2662 (release 6.5). +PF05045 Rhamnan synthesis protein F
Pfam-B_5448 (release 7.7). This family consists of a group of proteins which are related to the Streptococcus rhamnose-glucose polysaccharide assembly protein (RgpF). Rhamnan backbones are found in several O polysaccharides of phytopathogenic bacteria and are regarded as pathogenic factors .. +PF00615 Regulator of G protein signaling domain
Ponting C, Schultz J, Bork P. RGS family members are GTPase-activating proteins for heterotrimeric G-protein alpha-subunits.. +PF00974 Rhabd_glycop;
Rhabdovirus spike glycoprotein. Pfam-B_167 (release 3.0). Frequently abbreviated to G protein. The glycoprotein spike is made up of a trimer of G proteins. Channel formed by glycoprotein spike is thought to function in a similar manner to Influenza virus M2 protein channel, thus allowing a signal to pass across the viral membrane to signal for viral uncoating.. +PF03342 Rhabdovirus M1 matrix protein (M1 polymerase-associated protein)
Pfam-B_3629 (release 6.5). +PF04785 Rhabdovirus matrix protein M2
Pfam-B_2486 (release 7.6). M protein is involved in condensing and targeting the ribonucleoprotein (RNP) coil to the plasma membrane. M interacts specifically with the transmembrane spike protein (G) is important for the incorporation of G protein into budding virions .. +PF03397 Rhabdovirus matrix protein
Pfam-B_3980 (release 6.6). +PF00945 Rhabd_nucleocap;
Rhabdovirus nucleocapsid protein. Pfam-B_477 (release 3.0). The Nucleocapsid (N) Protein is said to have a "tight" structure. The carboxyl end of the N-terminal domain possesses an RNA binding domain. Sequence alignments show 2 regions of reasonable conservation, approx. 64-103 and 201-329 . A whole functional protein is required for encapsidation to take place .. +PF03216 Rhabdovirus nucleoprotein
Pfam-B_2146 (release 6.5). +PF02484 Rhabd_NV;
Rhabdovirus Non-virion protein. Pfam-B_2189 (release 5.4). Infectious hematopoietic necrosis virus (IHNV) is a member of the family Rhabdoviridae. The non-virion protein (NV) is coded for by one of the six genes of the IHNV genome , but is absent in vesiculovirus -like rhabdovirus .. +PF00554 Rel homology domain (RHD)
Proteins containing the Rel homology domain (RHD) are eukaryotic transcription factors. The RHD is composed of two structural domains. This is the N-terminal domain that is similar to that found in P53. The C-terminal domain has an immunoglobulin-like fold (See Pfam:PF01833) that binds to DNA.. +PF02115 RHO protein GDP dissociation inhibitor
+PF00581 Rhodanese-like domain
MRC-LMB Genome group. Rhodanese has an internal duplication. This Pfam represents a single copy of this duplicated domain. The domain is found as a single copy in other proteins, including phosphatases and ubiquitin C-terminal hydrolases.. +PF00620 RhoGAP domain
Ponting C, Schultz J, Bork P. GTPase activator proteins towards Rho/Rac/Cdc42-like small GTPases.. +PF00621 RhoGEF domain
Alignment kindly provided by SMART. Guanine nucleotide exchange factor for Rho/Rac/Cdc42-like GTPases Also called Dbl-homologous (DH) domain. It appears that Pfam:PF00169 domains invariably occur C-terminal to RhoGEF/DH domains.. +PF03527 RHS protein
+PF00073 rhv;
picornavirus capsid protein. Overington and HMM_iterative_training. CAUTION: This alignment is very weak. It can not be generated by clustalw. If a representative set is used for a seed, many so-called members are not recognised. The family should probably be split up into sub-families. Capsid proteins of picornaviruses. Picornaviruses are non-enveloped plus-strand ssRNA animal viruses with icosahedral capsids. They include rhinovirus (common cold) and poliovirus. Common structure is an 8-stranded beta sandwich. Variations (one or two extra strands) occur.. +PF02267 ADP-ribosyl cyclase
Pfam-B_3719 (release 5.2). ADP-ribosyl cyclase EC:3.2.2.5 (also know as cyclic ADP-ribose hydrolase or CD38) synthesises cyclic-ADP ribose, a second messenger for glucose-induced insulin secretion.. +PF01872 RibD C-terminal domain
Enright A & Pfam-B_6425 (Release 8.0). The function of this domain is not known, but it is thought to be involved in riboflavin biosynthesis. This domain is found in the C terminus of RibD/RibG Swiss:P25539, in combination with Pfam:PF00383, as well as in isolation in some archaebacterial proteins Swiss:P95872. This family appears to be related to Pfam:PF00186.. +PF05062 RICH domain
Pfam-B_277 (release 7.7). This presumed domain is about 85 residues in length and very rich in charged residues, hence the name RICH (Rich In CHarged residues). It is found in secreted proteins such as PspC Swiss:Q9KK19, SpsA Swiss:O33742 and IgA FC receptor Swiss:P27951 from Streptococcus agalactiae. This domain could be involved in bacterial adherence or cell wall binding.. +PF01042 DUF10;UPF0076; ribonuc_L-PSP;
Endoribonuclease L-PSP. Pfam-B_797 (release 3.0). Endoribonuclease active on single-stranded mRNA. Inhibits protein synthesis by cleavage of mRNA . Previously thought to inhibit protein synthesis initiation . This protein may also be involved in the regulation of purine biosynthesis . YjgF (renamed RidA) family members are enamine/imine deaminases. They hydrolyze reactive intermediates released by PLP-dependent enzymes, including threonine dehydratase . YjgF also prevents inhibition of transaminase B (IlvE) in Salmonella .. +PF00317 ribonucleo_red; ribonuc_red_lg;
Ribonucleotide reductase, all-alpha domain. +PF02867 ribonuc_red_lgC;
Ribonucleotide reductase, barrel domain. +PF00268 ribonuc_red; ribonuc_red_sm;
Ribonucleotide reductase, small chain. +PF00545 ribonuclease;
This enzyme hydrolyses RNA and oligoribonucleotides.. +PF03631 Ribonuclease_BN;
Virulence factor BrkB. Pfam-B_4424 (release 7.0). This family acts as a virulence factor. In Bordetella pertussis, Swiss:Q45339 is essential for resistance to complement-dependent killing by serum . This family was originally predicted to be ribonuclease BN , but this prediction has since been shown to be incorrect .. +PF00825 Ribonuclease P
Pfam-B_1558 (release 2.1). +PF00445 ribonuclease_T2;
Ribonuclease T2 family. +PF00687 L1;
Ribosomal protein L1p/L10e family. Pfam-B_115 (release 2.1). This family includes prokaryotic L1 and eukaryotic L10.. +PF00466 L10;
Ribosomal protein L10. +PF00298 L11;
Ribosomal protein L11, RNA binding domain. +PF03946 L11;
Ribosomal protein L11, N-terminal domain. The N-terminal domain of Ribosomal protein L11 adopts an alpha/beta fold and is followed by the RNA binding C-terminal domain.. +PF00542 L12;
Ribosomal protein L7/L12 C-terminal domain. +PF00572 L13;
Ribosomal protein L13. +PF01294 Ribosomal protein L13e
+PF00238 L14;
Ribosomal protein L14p/L23e. +PF01929 Ribosomal protein L14
This family includes the eukaryotic ribosomal protein L14.. +PF00827 Ribosomal L15
Pfam-B_1567 (release 2.1). +PF00252 L16;
Ribosomal protein L16p/L10e. +PF01196 Ribosomal protein L17
+PF00861 Ribosomal L18p/L5e family
Pfam-B_495 (release 3.0) & Pfam-B_741 (release 4.1). This family includes ribosomal proteins from the large subunit. This family includes L18 from bacteria and L5 from eukaryotes. It has been shown that the amino terminal 93 amino acids of Swiss:P09895 are necessary and sufficient to bind 5S rRNA in vitro . Suggesting that the entire family has a function in rRNA binding.. +PF01245 Ribosomal protein L19
+PF01280 Ribosomal protein L19e
+PF00181 L2;
Ribosomal Proteins L2, RNA binding domain. +PF00453 L20;
Ribosomal protein L20. +PF01157 L21e;
Ribosomal protein L21e. +PF00829 Ribosomal prokaryotic L21 protein
Pfam-B_1297 (release 2.1). +PF00237 L22;
Ribosomal protein L22p/L17e. This family includes L22 from prokaryotes and chloroplasts and L17 from eukaryotes.. +PF01776 Ribosomal L22e protein family
+PF00276 L23;
Ribosomal protein L23. +PF03939 Ribosomal protein L23, N-terminal domain
The N-terminal domain appears to be specific to the eukaryotic ribosomal proteins L25, L23, and L23a.. +PF01246 Ribosomal protein L24e
+PF01386 Ribosomal L25p family
Ribosomal protein L25 is an RNA binding protein, that binds 5S rRNA. This family includes Ctc from B. subtilis Swiss:P14194, which is induced by stress.. +PF01016 Ribosomal L27 protein
Pfam-B_1340 (release 3.0). +PF01777 Ribosomal L27e protein family
The N-terminal region of the eukaryotic ribosomal L27 has the KOW motif. C-terminal region is represented by this family. . +PF00830 Ribosomal L28 family
Pfam-B_1561 (release 2.1). The ribosomal 28 family includes L28 proteins from bacteria and chloroplasts. The L24 protein from yeast Swiss:P36525 also contains a region of similarity to prokaryotic L28 proteins. L24 from yeast is also found in the large ribosomal subunit. +PF01778 Ribosomal L28e protein family
+PF00831 Ribosomal L29 protein
Pfam-B_1296 (release 2.1). +PF01779 Ribosomal L29e protein family
+PF03947 L2;
Ribosomal Proteins L2, C-terminal domain. +PF00297 L3;
Ribosomal protein L3. +PF00327 L30;
Ribosomal protein L30p/L7e. This family includes prokaryotic L30 and eukaryotic L7.. +PF01197 Ribosomal protein L31
+PF01198 Ribosomal protein L31e
+PF01655 Ribosomal protein L32
Pfam-B_1346 (release 4.1). This family includes ribosomal protein L32 from eukaryotes and archaebacteria.. +PF01783 Ribosomal L32p protein family
+PF00471 L33;
Ribosomal protein L33. +PF00468 L34;
Ribosomal protein L34. +PF01199 Ribosomal protein L34e
+PF01247 Ribosomal protein L35Ae
+PF01632 Ribosomal protein L35
Pfam-B_1156 (release 4.1). +PF00444 L36;
Ribosomal protein L36. +PF01158 L36e;
Ribosomal protein L36e. +PF01780 Ribosomal L37ae protein family
This ribosomal protein is found in archaebacteria and eukaryotes. It contains four conserved cysteine residues that may bind to zinc.. +PF01781 Ribosomal L38e protein family
+PF00832 Ribosomal L39 protein
Pfam-B_1293 (release 2.1). +PF00573 L1e; Ribosomal_L1e;
Ribosomal protein L4/L1 family. This family includes Ribosomal L4/L1 from eukaryotes and archaebacteria and L4 from eubacteria. L4 from yeast has been shown to bind rRNA .. +PF01020 Ribosomal L40e family
Pfam-B_884 (release 3.0). Bovine L40 has been identified as a secondary RNA binding protein . L40 is fused to a ubiquitin protein .. +PF05162 Ribosomal protein L41
+PF00935 L44;
Ribosomal protein L44. Pfam-B_1065 (release 3.0). +PF00281 L5;
Ribosomal protein L5. +PF00673 L5_C;
ribosomal L5P family C-terminus. Pfam-B_69 (release 2.1). This region is found associated with Pfam:PF00281.. +PF01159 L6e;
Ribosomal protein L6e . +PF03868 Ribosomal protein L6, N-terminal domain
+PF01248 Ribosomal protein L7Ae/L30e/S12e/Gadd45 family
This family includes: Ribosomal L7A from metazoa, Ribosomal L8-A and L8-B from fungi, 30S ribosomal protein HS6 from archaebacteria, 40S ribosomal protein S12 from eukaryotes, Ribosomal protein L30 from eukaryotes and archaebacteria. Gadd45 and MyD118 .. +PF03948 Ribosomal protein L9, C-terminal domain
+PF01281 Ribosomal_L9;
Ribosomal protein L9, N-terminal domain. +PF00338 S10;
Ribosomal protein S10p/S20e. This family includes small ribosomal subunit S10 from prokaryotes and S20 from eukaryotes.. +PF00411 S11;
Ribosomal protein S11. +PF00164 S12; Ribosomal_S12;
Ribosomal protein S12/S23. This protein is known as S12 in bacteria and archaea and S23 in eukaryotes.. +PF00416 S13;
Ribosomal protein S13/S18. This family includes ribosomal protein S13 from prokaryotes and S18 from eukaryotes.. +PF00253 S14;
Ribosomal protein S14p/S29e. This family includes both ribosomal S14 from prokaryotes and S29 from eukaryotes.. +PF00312 S15;
Ribosomal protein S15. +PF00886 Ribosomal protein S16
Pfam-B_1025 (release 3.0). +PF00366 S17;
Ribosomal protein S17. +PF00833 Ribosomal_S17;
Pfam-B_1566 (release 2.1). +PF01084 S18;
Ribosomal protein S18. Pfam-B_712 (release 3.0). +PF00203 S19;
Ribosomal protein S19. +PF01090 S19e;
Ribosomal protein S19e. +PF00318 S2;
Ribosomal protein S2. +PF01649 Ribosomal protein S20
Pfam-B_1685 (release 4.1). Bacterial ribosomal protein S20 interacts with 16S rRNA .. +PF01165 S21;
Ribosomal protein S21. +PF01249 Ribosomal protein S21e
+PF01282 Ribosomal protein S24e
+PF03297 S25 ribosomal protein
Pfam-B_4038 (release 6.5). +PF01283 Ribosomal protein S26e
+PF01599 Ribosomal protein S27a
Pfam-B_638 (release 4.1). This family of ribosomal proteins consists mainly of the 40S ribosomal protein S27a which is synthesised as a C-terminal extension of ubiquitin (CEP). The S27a domain compromises the C-terminal half of the protein. The synthesis of ribosomal proteins as extensions of ubiquitin promotes their incorporation into nascent ribosomes by a transient metabolic stabilisation and is required for efficient ribosome biogenesis . The ribosomal extension protein S27a contains a basic region that is proposed to form a zinc finger; its fusion gene is proposed as a mechanism to maintain a fixed ratio between ubiquitin necessary for degrading proteins and ribosomes a source of proteins .. +PF01667 Ribosomal protein S27
Pfam-B_1929 (release 4.1). +PF01200 Ribosomal protein S28e
+PF04758 Ribosomal protein S30
+PF00189 S3_C;
Ribosomal protein S3, C-terminal domain. This family contains a central domain Pfam:PF00013, hence the amino and carboxyl terminal domains are stored separately. This is a minimal carboxyl-terminal domain.\. Some are much longer.. +PF01015 Ribosomal S3Ae family
Pfam-B_1334 (release 3.0). +PF00163 S4;
Ribosomal protein S4/S9 N-terminal domain. This family includes small ribosomal subunit S9 from prokaryotes and S16 from metazoans. This domain is predicted to bind to ribosomal RNA . This domain is composed of four helices in the known structure. However the domain is discontinuous in sequence and the alignment for this family contains only the first three helices.. +PF00900 Ribosomal family S4e
Pfam-B_1205 (release 3.0). +PF00333 S5;
Ribosomal protein S5, N-terminal domain. +PF03719 Ribosomal protein S5, C-terminal domain
+PF01250 Ribosomal protein S6
+PF01092 S6e;
Ribosomal protein S6e. +PF00177 S7;
Ribosomal protein S7p/S5e. This family contains ribosomal protein S7 from prokaryotes and S5 from eukaryotes.. +PF01251 Ribosomal protein S7e
+PF00410 S8;
Ribosomal protein S8. +PF00380 S9;
Ribosomal protein S9/S16. This family includes small ribosomal subunit S9 from prokaryotes and S16 from eukaryotes.. +PF00834 Ribulose-phosphate 3 epimerase family
Pfam-B_1291 (release 2.1). This enzyme catalyses the conversion of D-ribulose 5-phosphate into D-xylulose 5-phosphate.. +PF02009 Rifin/stevor family
Several multicopy gene families have been described in Plasmodium falciparum, including the stevor family of subtelomeric open reading frames and the rif interspersed repetitive elements. Both families contain three predicted transmembrane segments. It has been proposed that stevor and rif are members of a larger superfamily that code for variant surface antigens .. +PF02197 Regulatory subunit of type II PKA R-subunit
Alignment kindly provided by SMART. +PF01782 RimM N-terminal domain
The RimM protein is essential for efficient processing of 16S rRNA . The RimM protein was shown to have affinity for free ribosomal 30S subunits but not for 30S subunits in the 70S ribosomes . This N-terminal domain is found associated with a PRC-barrel domain . . +PF00848 Ring hydroxylating alpha subunit (catalytic domain)
Pfam-B_407 (release 3.0). This family is the catalytic domain of aromatic-ring- hydroxylating dioxygenase systems. The active site contains a non-heme ferrous ion coordinated by three ligands.. +PF00161 Ribosome inactivating protein
+PF04957 Ribosome modulation factor
This protein associates with 70s ribosomes and converts them to a dimeric form (100S ribosomes) which appear during the transition from the exponential growth phase to the stationary phase of Escherichia coli cells.. +PF04321 RmlD substrate binding domain
L-rhamnose is a saccharide required for the virulence of some bacteria. Its precursor, dTDP-L-rhamnose, is synthesised by four different enzymes the final one of which is RmlD.\. The RmlD substrate binding domain is responsible for binding a sugar nucleotide [1,2].. +PF03035 Calicivirus putative RNA polymerase/capsid protein
Pfam-B_1282 (release 6.4). +PF00680 RNA dependent RNA polymerase
Pfam-B_32 (release 2.1). +PF00978 RNA_dep_RNApol2;
RNA dependent RNA polymerase. Pfam-B_13 (release 3.0). This family may represent an RNA dependent RNA polymerase. The family also contains the following proteins: 2A protein from bromoviruses putative RNA dependent RNA polymerase from tobamoviruses Non structural polyprotein from togaviruses. +PF00910 RNA helicase
Pfam-B_11 (release 3.0). This family includes RNA helicases thought to be involved in duplex unwinding during viral RNA replication. Members of this family are found in a variety of single stranded RNA viruses.. +PF00940 DNA-dependent RNA polymerase
Pfam-B_1108 (release 3.0). This is a family of single chain RNA polymerases.. +PF03118 Bacterial RNA polymerase, alpha chain C terminal domain
Pfam-B_172 (release 3.0). The alpha subunit of RNA polymerase consists of two independently folded domains, referred to as amino-terminal and carboxyl terminal domains. The amino terminal domain is involved in the interaction with the other subunits of the RNA polymerase. The carboxyl-terminal domain interacts with the DNA and activators. The amino acid sequence of the alpha subunit is conserved in prokaryotic and chloroplast RNA polymerases. There are three regions of particularly strong conservation, two in the amino-terminal and one in the carboxyl- terminal .. +PF05066 RNA_pol_delta;
HB1, ASXL, restriction endonuclease HTH domain. A winged helix-turn-helix domain present in the plant HB1, vertebrate ASXL, the H. pylori restriction endonuclease HpyAIII(HgrA), the RNA polymerase delta subunit(RpoE) of Gram positive bacteria and several restriction endonucleases . The domain is distinguished by the presence of a conserved one-turn helix between helix-3 and the preceding conserved turn. Its diverse architectures in eukaryotic species with extensive gene body methylation is suggestive of a chromatin function. The genetic interaction of the HARE-HTH containing ASXL with the methyl cytosine hydroxylating Tet2 protein is suggestive of a role for the domain in discriminating sequences with DNA modifications such as hmC . Bacterial versions include fusions to diverse restriction endonucleases, and a DNA glycosylase where it may play a similar role in detecting modified DNA. Certain bacterial version of the HARE-HTH domain show fusions to the helix-hairpin-helix domain of the RNA polymerase alpha subunit and the HTH domains found in regions 3 and 4 of the sigma factors . These versions are predicted to function as a novel inhibitor of the binding of RNA polymerase to transcription start sites, similar to the Bacillus delta protein [2,3].. +PF04090 RNA polymerase I specific initiation factor
Pfam-B_43469 (release 7.3);. +PF01193 RNA polymerase Rpb3/Rpb11 dimerisation domain
Pfam-B_172 (release 3.0). The two eukaryotic subunits Rpb3 and Rpb11 dimerise to from a platform onto which the other subunits of the RNA polymerase assemble (D/L in archaea). The prokaryotic equivalent of the Rpb3/Rpb11 platform is the alpha-alpha dimer. The dimerisation domain of the alpha subunit/Rpb3 is interrupted by an insert domain (Pfam:PF01000). Some of the alpha subunits also contain iron-sulphur binding domains (Pfam:PF00037). Rpb11 is found as a continuous domain. Members of this family include: alpha subunit from eubacteria, alpha subunits from chloroplasts, Rpb3 subunits from eukaryotes, Rpb11 subunits from eukaryotes, RpoD subunits from archaeal spp, and RpoL subunits from archaeal spp.. +PF02150 RNA polymerases M/15 Kd subunit
+PF01194 RNA polymerases N / 8 kDa subunit
+PF04990 RNA polymerase Rpb1, domain 7
Pfam-B_288 (release 4.2). RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 7, represents a mobile module of the RNA polymerase. Domain 7 forms a substantial interaction with the lobe domain of Rpb2 (Pfam:PF04561) [1,2].. +PF04563 RNA polymerase beta subunit
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain forms one of the two distinctive lobes of the Rpb2 structure. This domain is also known as the protrusion domain . The other lobe (Pfam:PF04561) is nested within this domain.. +PF04561 RNA polymerase Rpb2, domain 2
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Rpb2 is the second largest subunit of the RNA polymerase. This domain forms one of the two distinctive lobes of the Rpb2 structure. This domain is also known as the lobe domain . DNA has been demonstrated to bind to the concave surface of the lobe domain, and plays a role in maintaining the transcription bubble . Many of the bacterial members contain large insertions within this domain, as region known as dispensable region 1 (DRI).. +PF04566 RNA polymerase Rpb2, domain 4
RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 4, is also known as the external 2 domain .. +PF01191 RNA_pol_H;
RNA polymerase Rpb5, C-terminal domain. The assembly domain of Rpb5 . The archaeal equivalent to this domain is subunit H. Subunit H lacks the N-terminal domain.. +PF03871 RNA polymerase Rpb5, N-terminal domain
Rpb5 has a bipartite structure which includes a eukaryote-specific N-terminal domain and a C-terminal domain resembling the archaeal RNAP subunit H [1,2]. The N-terminal domain is involved in DNA binding and is part of the jaw module in the RNA pol II structure . This module is important for positioning the downstream DNA.. +PF01192 RNA polymerase Rpb6
Rpb6 is an essential subunit in the eukaryotic polymerases Pol I, II and III. This family also contains the bacterial equivalent to Rpb6, the omega subunit. Rpb6 and omega are structurally conserved and both function in polymerase assembly .. +PF03876 RNA_pol_Rpb7_N;
SHS2 domain found in N terminus of Rpb7p/Rpc25p/MJ0397. Rpb7 bind to Rpb4 to form a heterodimer. This complex is thought to interact with the nascent RNA strand during RNA polymerase II elongation . This family includes the homologs from RNA polymerase I and III. In RNA polymerase I, Rpa43 is at least one of the subunits contacted by the transcription factor TIF-IA . The N terminus of Rpb7p/Rpc25p/MJ0397 has a SHS2 domain that is involved in protein-protein interaction .. +PF03870 RNA polymerase Rpb8
Rpb8 is a subunit common to the three yeast RNA polymerases, pol I, II and III. Rpb8 interacts with the largest subunit Rpb1, and with Rpb3 and Rpb11, two smaller subunits.. +PF05158 RNA polymerase Rpc34 subunit
Subunit specific to RNA Pol III, the tRNA specific polymerase. The C34 subunit of yeast RNA Pol III is part of a subcomplex of three subunits which have no counterpart in the other two nuclear RNA polymerases. This subunit interacts with TFIIIB70 and is therefore participates in Pol III recruitment . . +PF05132 RNA polymerase III RPC4
Pfam-B_18856 (release 7.7). Specific subunit for Pol III, the tRNA specific polymerase.. +PF03431 RNA replicase, beta-chain
Pfam-B_4422 (release 6.6). This family is of Leviviridae RNA replicases. The replicase is also known as RNA dependent RNA polymerase. . +PF01876 DUF53;
This protein is part of the RNase P complex that is involved in tRNA maturation .. +PF01900 DUF69;
tRNA processing enzyme ribonuclease P (RNase P) consists of an RNA molecule associated with at least eight protein subunits, hPop1, Rpp14, Rpp20, Rpp25, Rpp29, Rpp30, Rpp38, and Rpp40 . This protein is known as Pop5 in eukaryotes.. +PF00074 rnaseA;
Pancreatic ribonuclease. Overington and HMM_iterative_training. Ribonucleases. Members include pancreatic RNAase A and angiogenins. Structure is an alpha+beta fold -- long curved beta sheet and three helices.. +PF00075 rnaseH; RnaseH;
Swissprot; SCOP and HMM_iterative_training. RNase H digests the RNA strand of an RNA/DNA hybrid. Important enzyme in retroviral replication cycle, and often found as a domain associated with reverse transcriptases. Structure is a mixed alpha+beta fold with three a/b/a layers.. +PF00773 RNB domain
Pfam-B_1009 (release 2.1). This domain is the catalytic domain of ribonuclease II .. +PF02508 Rnf-Nqr subunit, membrane protein
Pfam-B_1638 (release 5.4). This is a family of integral membrane proteins including Rhodobacter-specific nitrogen fixation (rnf) proteins RnfA and RnfE and Na+-translocating NADH:ubiquinone oxidoreductase (Na+-NQR) subunits NqrD and NqrE. . +PF03259 Roadblock/LC7 domain
This family includes proteins that are about 100 amino acids long and have been shown to be related . Members of this family of proteins are associated with both flagellar outer arm dynein and Drosophila and rat brain cytoplasmic dynein. It is proposed that roadblock/LC7 family members may modulate specific dynein functions . This family also includes Swiss:Q9Y2Q5 Golgi-associated MP1 adapter protein and MglB from Myxococcus xanthus Swiss:Q50883, a protein involved in gliding motility . However the family also includes members from non-motile bacteria such as Streptomyces coelicolor, suggesting that the protein may play a structural or regulatory role.. +PF00480 ROK family
+PF02027 Glyco_hydro_41;
RolB/RolC glucosidase family. This family of proteins includes RolB and RolC. RolC releases cytokinins from glucoside conjugates . Whereas RolB hydrolyses indole glucosides .. +PF01815 Rop protein
+PF00980 Rotavirus major capsid protein VP6
Pfam-B_1047 (release 3.0). Rotaviruses consist of three concentric protein shells. The intermediate (middle) protein layer consists 260 trimers of VP6. VP6 in the most abundant protein in the virion. VP6 is also involved in virion assembly, and possesses the ability to interact with VP2, VP4 and VP7 [1,2].. +PF01525 Rotavirus NS26
Pfam-B_762 (release 4.0). Gene 11 product is a non-structural phosphoprotein designated as NS26 .. +PF02509 Rotavirus non-structural protein 35
Pfam-B_1677 (release 5.4). Rotavirus non-structural protein 35 (NS35) is a basic protein which possesses RNA-binding activity and is essential for genome replication .. +PF00981 Rotavirus RNA-binding Protein 53 (NS53)
Pfam-B_1048 (release 3.0). This protein is also known as NSP1. NS53 is encoded by gene 5. It is made in low levels in the infected cells and is a component of early replication. The protein is known to accumulate on the cytoskeleton of the infected cell. NS53 is an RNA binding protein that contains a characteristic cysteine rich region .. +PF04866 Rotavirus non-structural protein 6
Pfam-B_4831 (release 7.6). +PF01665 Rotavirus non-structural protein NSP3
Pfam-B_1010 (release 4.1). This family consist of rotaviral non-structural RNA binding protein 34 (NS34 or NSP3). The NSP3 protein has been shown to bind viral RNA. The NSP3 protein consists of 3 conserved functional domains; a basic region which binds ssRNA, a region containing heptapeptide repeats mediating oligomerisation and a leucine zipper motif . NSP3 may play a central role in replication and assembly of genomic RNA structures . Rotaviruses have a dsRNA genome and are a major cause cause of acute gastroenteritis in the young of many species . The rotavirus non-structural protein NSP3 is a sequence-specific RNA binding protein that binds the nonpolyadenylated 3' end of the rotavirus mRNAs. NSP3 also interacts with the translation initiation factor eIF4GI and competes with the poly(A) binding protein .. +PF01452 Rotavirus non structural protein
Prodom_2202 (release 99.1). This protein has been called NSP4, NSP5, NS28, and NCVP5. The final steps in the assembly of rotavirus occur in the lumen of the endoplasmic reticulum (ER). Targeting of the immature inner capsid particle (ICP) to this compartment is mediated by the cytoplasmic tail of NSP4, located in the ER membrane.. +PF05087 Rotavirus VP2 protein
Pfam-B_6280 (release 7.7). Rotavirus particles consist of three concentric proteinaceous capsid layers. The innermost capsid (core) is made of VP2. The genomic RNA and the two minor proteins VP1 and VP3 are encapsidated within this layer . The N-terminus of rotavirus VP2 is necessary for the encapsidation of VP1 and VP3 .. +PF00639 PPIC-type PPIASE domain
Rotamases increase the rate of protein folding by catalysing the interconversion of cis-proline and trans-proline.. +PF03428 Replication protein C N-terminal domain
Pfam-B_4463 (release 6.6). Replication protein C is involved in the early stages of viral DNA replication.. +PF03055 Retinal pigment epithelial membrane protein
Pfam-B_947 (release 6.4). This family represents a retinal pigment epithelial membrane receptor which is abundantly expressed in retinal pigment epithelium, and binds plasma retinal binding protein. The family also includes the sequence related neoxanthin cleavage enzyme in plants and lignostilbene-alpha,beta-dioxygenase in bacteria.. +PF02318 RPH3A_effector; RPH3A_effect_N;
FYVE-type zinc finger. This FYVE-type zinc finger is found at the N-terminus of effector proteins including rabphilin-3A and regulating synaptic membrane exocytosis protein 2 .. +PF04390 DUF532; RplB;
Lipopolysaccharide-assembly. LptE (formerly known as RplB) is involved in lipopolysaccharide-assembly on the outer membrane of Gram-negative organisms. The lipopolysaccharide component of the outer bacterial membrane is transported from its source of origin to the outer membrane by a set of proteins constituting a transport machinery that is made up of LptA, LptB, LptC, LptD, LptE. LptD appears to be anchored in the outer membrane, and LptE forms a complex with it. This part of the machinery complex is involved in the assembly of lipopolysaccharide in the outer leaflet of the outer membrane .. +PF04032 DUF363;
RNAse P Rpr2/Rpp21/SNM1 subunit domain. This family contains a ribonuclease P subunit of humans and yeast. Other members of the family include the probable archaeal homologues. This family includes SNM1 . It is a subunit of RNase MRP (mitochondrial RNA processing), a ribonucleoprotein endoribonuclease that has roles in both mitochondrial DNA replication and nuclear 5.8S rRNA processing. SNM1 is an RNA binding protein that binds the MRP RNA specifically . This subunit possibly binds the precursor tRNA .. +PF01765 Ribosome recycling factor
Pfam-B_949 (release 4.2). The ribosome recycling factor (RRF / ribosome release factor) dissociates the ribosome from the mRNA after termination of translation, and is essential bacterial growth . Thus ribosomes are "recycled" and ready for another round of protein synthesis.. +PF00076 rrm;
RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain). Published_alignment . The RRM motif is probably diagnostic of an RNA binding protein. RRMs are found in a variety of RNA binding proteins, including various hnRNP proteins, proteins implicated in regulation of alternative splicing, and protein components of snRNPs. The motif also appears in a few single stranded DNA binding proteins. The RRM structure consists of four strands and two helices arranged in an alpha/beta sandwich, with a third helix present during RNA binding in some cases The C-terminal beta strand (4th strand) and final helix are hard to align and have been omitted in the SEED alignment The LA proteins (Swiss:P05455) have an N terminal rrm which is included in the seed. There is a second region towards the C terminus that has some features characteristic of a rrm but does not appear to have the important structural core of a rrm. The LA proteins (Swiss:P05455) are one of the main autoantigens in Systemic lupus erythematosus (SLE), an autoimmune disease.. +PF00398 Ribosomal RNA adenine dimethylase
+PF04353 Regulator of RNA polymerase sigma(70) subunit, Rsd/AlgQ
This family includes bacterial transcriptional regulators that are thought to act through an interaction with the conserved region 4 of the sigma(70) subunit of RNA polymerase. The Pseudomonas aeruginosa homologue, AlgQ, positively regulates virulence gene expression and is associated with the mucoid phenotype observed in Pseudomonas aeruginosa isolates from cystic fibrosis patients.. +PF03873 Anti sigma-E protein RseA, C-terminal domain
Sigma-E is important for the induction of proteins involved in heat shock response. RseA binds sigma-E via its N-terminal domain, sequestering sigma-E and preventing transcription from heat-shock promoters . The C-terminal domain is located in the periplasm, and may interact with other protein that signal periplasmic stress.. +PF03872 Anti sigma-E protein RseA, N-terminal domain
Sigma-E is important for the induction of proteins involved in heat shock response. RseA binds sigma-E via its N-terminal domain, sequestering sigma-E and preventing transcription from heat-shock promoters . The C-terminal domain is located in the periplasm, and may interact with other protein that signal periplasmic stress.. +PF04246 Positive regulator of sigma(E), RseC/MucC
This bacterial family of integral membrane proteins represents a positive regulator of the sigma(E) transcription factor, namely RseC/MucC. The sigma(E) transcription factor is up-regulated by cell envelope protein misfolding, and regulates the expression of genes that are collectively termed ECF (devoted to Extra-Cellular Functions) . In Pseudomonas aeruginosa, de-repression of sigma(E) is associated with the alginate-overproducing phenotype characteristic of chronic respiratory tract colonisation in cystic fibrosis patients. The mechanism by which RseC/MucC positively regulates the sigma(E) transcription factor is unknown. RseC is also thought to have a role in thiamine biosynthesis in Salmonella typhimurium . In addition, this family also includes an N-terminal part of RnfF, a Rhodobacter capsulatus protein, of unknown function, that is essential for nitrogen fixation. This protein also contains an ApbE domain Pfam:PF02424, which is itself involved in thiamine biosynthesis. . +PF03113 Respiratory synctial virus non-structural protein NS2
Pfam-B_2717 (release 6.5). The molecular structure and function of the NS2 protein is not known. However, mutants lacking the NS2 grow at slower rates when compared to the wild-type. Nevertheless, NS2 is not essential for viral replication .. +PF04479 RTA1 like protein
This family is comprised of fungal proteins with multiple transmembrane regions. RTA1 (Swiss:P53047) is involved in resistance to 7-aminocholesterol , while RTM1 (Swiss:P40113) confers resistance to an an unknown toxic chemical in molasses . These proteins may bind to the toxic substance, and thus prevent toxicity. They are not thought to be involved in the efflux of xenobiotics .. +PF02334 Replication terminator protein
Pfam-B_12997 (release 5.2). The bacterial replication terminator protein (RTP) plays a role in the termination of DNA replication by impeding replication fork movement. Two RTP dimers bind to the two inverted repeat regions at the termination site.. +PF02382 RTX N-terminal domain
Pfam-B_833 (release 5.2). The RTX family of bacterial toxins are a group of cytolysins and cytotoxins. This Pfam family represents the N-terminal domain which is found in association with a glycine-rich repeat domain and hemolysinCabind Pfam:PF00353.. +PF00016 Ribulose bisphosphate carboxylase large chain, catalytic domain
The C-terminal domain of RuBisCO large chain is the catalytic domain adopting a TIM barrel fold.. +PF02788 Ribulose bisphosphate carboxylase large chain, N-terminal domain
The N-terminal domain of RuBisCO large chain adopts a ferredoxin-like fold.. +PF00101 Ribulose bisphosphate carboxylase, small chain
+PF02915 Rubrerythrin
This domain has a ferritin-like fold.. +PF01330 RuvA;
RuvA N terminal domain. The N terminal domain of RuvA has an OB-fold structure. This domain forms the RuvA tetramer contacts .. +PF02075 Crossover junction endodeoxyribonuclease RuvC
+PF02042 RWP-RK domain
Pfam-B_9740 (Release 5.1). This domain is named RWP-RK after a conserved motif at the C terminus of the presumed domain. The domain is found in algal minus dominance proteins as well as plant proteins involved in nitrogen-controlled development .. +PF01365 RIH domain
Ponting CP (EMBL alignments). The RIH (RyR and IP3R Homology) domain is an extracellular domain from two types of calcium channels. This region is found in the ryanodine receptor Swiss:P21817 and the inositol-1,4,5- trisphosphate receptor Swiss:Q14571. This domain may form a binding site for IP3 .. +PF02026 RyR domain
This domain is called RyR for Ryanodine receptor . The domain is found in four copies in the ryanodine receptor. The function of this domain is unknown.. +PF00575 S1 RNA binding domain
The S1 domain occurs in a wide range of RNA associated proteins. It is structurally similar to cold shock protein which binds nucleic acids. The S1 domain has an OB-fold structure.. +PF00438 S-AdoMet_synt;
S-adenosylmethionine synthetase, N-terminal domain. The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold.. +PF02772 S-AdoMet_syntD2;
S-adenosylmethionine synthetase, central domain. The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold.. +PF02773 S-AdoMet_syntD3;
S-adenosylmethionine synthetase, C-terminal domain. The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold.. +PF02574 Homocysteine S-methyltransferase
This is a family of related homocysteine S-methyltransferases enzymes: 5-methyltetrahydrofolate--homocysteine S-methyltransferases also known EC:2.1.1.13, ; Betaine--homocysteine S-methyltransferase (vitamin B12 dependent), EC:2.1.1.5, ; and Homocysteine S-methyltransferase, EC:2.1.1.10, .. +PF04689 DNA binding protein S1FA
S1FA is a DNA-binding protein found in plants that specifically recognises the negative promoter element S1F .. +PF05116 Sucrose-6F-phosphate phosphohydrolase
Pfam-B_6442 (release 7.7). This family consists of Sucrose-6F-phosphate phosphohydrolase proteins found in plants and cyanobacteria. Sucrose-6(F)-phosphate phosphohydrolase catalyses the final step in the pathway of sucrose biosynthesis .. +PF01023 S_100_domain;
S-100/ICaBP type calcium binding domain. Pfam-B_242 (release 3.0). The S-100 domain is a subfamily of the EF-hand calcium binding proteins.. +PF05124 S-layer like family, C-terminal region
TIGRFAMs (release 2.0);. +PF05123 S-layer like family, N-terminal region
TIGRFAMs (release 2.0);. +PF00954 S-locus glycoprotein family
Pfam-B_357 (release 3.0). In Brassicaceae, self-incompatible plants have a self/non-self recognition system. This is sporophytically controlled by multiple alleles at a single locus (S). S-locus glycoproteins, as well as S-receptor kinases, are in linkage with the S-alleles .. +PF00526 S_mold_repeat;
Dictyostelium (slime mold) repeat. Pfam-B_96 (release 1.0). +PF00277 SAA_proteins;
Serum amyloid A protein. +PF04455 LOR/SDH bifunctional enzyme conserved region
Lysine-oxoglutarate reductase/Saccharopine dehydrogenase (LOR/SDH) is a bifunctional enzyme. This conserved region is commonly found immediately N-terminal to Saccharop_dh (Pfam:PF03435) in eukaryotes [1,2].. +PF04092 SRS domain
Pfam-B_1675 (release 7.3). Toxoplasma gondii is a persistent protozoan parasite capable of infecting almost any warm-blooded vertebrate. The surface of Toxoplasma is coated with a family of developmentally regulated glycosylphosphatidylinositol (GPI)-linked proteins (SRSs), of which SAG1 is the prototypic member. SRS proteins mediate attachment to host cells and interface with the host immune response to regulate the virulence of the parasite. SAG1 is composed of two disulphide linked SRS domains. These have 6 cysteines that form 1-6,2-5 and 3-4 pairings. The structure of the immunodominant SAG1 antigen reveals a homodimeric configuration . The SRS domain is found in a single copy in the SAG2 proteins. This family of surface antigens are found in other apicomplexans.. +PF01259 SAICAR synthetase
Pfam-B_1426 (release 3.0). Also known as Phosphoribosylaminoimidazole-succinocarboxamide synthase.. +PF03534 Sal_SpvB;
Salmonella virulence plasmid 65kDa B protein. +PF03538 Sal_vir_VRP1;
Salmonella virulence plasmid 28.1kDa A protein. +PF01758 Sodium Bile acid symporter family
Pfam-B_697 (release 4.2). This family consists of Na+/bile acid co-transporters. These transmembrane proteins function in the liver in the uptake of bile acids from portal blood plasma a process mediated by the co-transport of Na+ . Also in the family is ARC3 from S. cerevisiae Swiss:Q06598 this is a putative transmembrane protein involved in resistance to arsenic compounds .. +PF03536 Sal_vir_VRP3;
Salmonella virulence-associated 28kDa protein. +PF01536 Adenosylmethionine decarboxylase
Pfam-B_600 (release 4.0). This is a family of S-adenosylmethionine decarboxylase (SAMDC) proenzymes. In the biosynthesis of polyamines SAMDC produces decarboxylated S-adenosylmethionine, which serves as the aminopropyl moiety necessary for spermidine and spermine biosynthesis from putrescine . The Pfam alignment contains both the alpha and beta chains that are cleaved to form the active enzyme.. +PF02199 SAPA;
Saposin A-type domain. Alignment kindly provided by SMART. +PF03058 Sar8.2 family
Pfam-B_2148 (release 6.4). Members of this family are found in Solanaceae plants, a taxonomic group (family) that includes pepper and tobacco plant species. Synthesis of these proteins is induced by tobacco mosaic virus (TMV) and salicylic acid ; indeed they are thought to be involved in the development of systemic acquired resistance (SAR) after an initial hypersensitive response to microbial infection [1,2]. SAR is characterised by long-lasting resistance to infection by a wide range of pathogens, extending to plant tissues distant from the initial infection site .. +PF04790 sarcoglycan;
Sarcoglycan complex subunit protein. Pfam-B_6135 (release 7.5). The dystrophin glycoprotein complex (DGC) is a membrane-spanning complex that links the interior cytoskeleton to the extracellular matrix in muscle.\. The sarcoglycan complex is a subcomplex within the DGC and is composed of several muscle-specific, transmembrane proteins (alpha-, beta-, gamma-, delta- and zeta-sarcoglycan). The sarcoglycans are asparagine-linked glycosylated proteins with single transmembrane domains. This family contains beta, gamma and delta members [1,2].. +PF03343 SART-1 family
Mifsud W, Mistry J, Wood V. Pfam-B_3690 (release 6.5). SART-1 is a protein involved in cell cycle arrest and pre-mRNA splicing . It has been shown to be a component of U4/U6 x U5 tri-snRNP complex in human, Schizosaccharomyces pombe and Saccharomyces cerevisiae . SART-1 is a known tumour antigen in a range of cancers recognised by T cells .. +PF00269 Small, acid-soluble spore proteins, alpha/beta type
+PF04259 Small, acid-soluble spore protein, gamma-type
TIGRFAMs (release 2.0);. The SASP family is a family of small, glutamine and asparagine-rich peptides that store amino acids in the spores of Bacillus subtilis and related bacteria. . +PF03898 Satellite_CP;
Satellite tobacco necrosis virus coat protein. +PF03110 SBP domain
Pfam-B_737 (release 6.5). SBP domains (for SQUAMOSA-pROMOTER BINDING PROTEIN) are found in plant proteins. It is a sequence specific DNA-binding domain . Members of family probably function as transcription factors involved in the control of early flower development . The domain contains 10 conserved cysteine and histidine residues that probably are zinc ligands.. +PF00496 Bacterial extracellular solute-binding proteins, family 5 Middle
The borders of this family are based on the PDBSum definitions of the domain edges for Swiss:P06202.. +PF03480 Bacterial extracellular solute-binding protein, family 7
Pfam-B_808 (release 7.0). This family of proteins is involved in binding extracellular solutes for transport across the bacterial cytoplasmic membrane. This family includes Swiss:P37735, a C4-dicarboxylate-binding protein and the sialic acid-binding protein SiaP. The structure of the SiaP receptor has revealed an overall topology similar to ATP binding cassette ESR (extracytoplasmic solute receptors) proteins . Upon binding of sialic acid, SiaP undergoes domain closure about a hinge region and kinking of an alpha-helix hinge component .. +PF01297 Lipoprotein_4; SBP_bac_9;
Periplasmic solute binding protein family. Pfam-B_1416 (release 3.0). This family includes periplasmic solute binding proteins such as TroA that interacts with an ATP-binding cassette transport system in Treponema pallidum.. +PF04405 Domain of Unknown function (DUF542)
This domain is always found in conjunction with the HHE domain (Pfam:PF03794) at the N-terminus.. +PF02667 Short chain fatty acid transporter
This family consists of two sequences annotated as short chain fatty acid transporters, however, there are no references giving details of experimental characterisation of this function.. +PF04486 SchA/CurD like domain
Members of this family have only been identified in species of the Streptomyces genus. Two family members are known to be part of gene clusters involved in the synthesis of polyketide-based spore pigments, homologous to clusters involved in the synthesis of polyketide antibiotics. The function of this protein is unknown, but it has been speculated to contain a NAD(P) binding site . Many of these proteins contain two copies of this presumed domain.. +PF02630 SCO1/SenC
This family is involved in biogenesis of respiratory and photosynthetic systems. SCO1 (Swiss:P23833) is required for a post-translational step in the accumulation of subunits COXI and COXII of cytochrome c oxidase . SenC (Swiss:Q52720) is required for optimal cytochrome c oxidase activity and maximal induction of genes encoding the light-harvesting and reaction centre complexes of R. capsulatus . . +PF02036 SCP-2 sterol transfer family
Pfam-B_1050 (Release 5.1). This domain is involved in binding sterols. It is found in the SCP2 protein Swiss:P22307, as well as the C terminus of Swiss:P51659 the enzyme estradiol 17 beta-dehydrogenase EC:1.1.1.62. The UNC-24 protein Swiss:Q17372 contains an SPFH domain Pfam:PF01145 . . +PF00375 Sodium:dicarboxylate symporter family
+PF02982 Scytalone_DH;
Scytalone dehydratase. Scytalone dehydratases are structurally related to the NTF2 family (see Pfam:PF02136).. +PF03313 Serine dehydratase alpha chain
L-serine dehydratase (EC:4.2.1.13) is a found as a heterodimer of alpha and beta chain or as a fusion of the two chains in a single protein. This enzyme catalyses the deamination of serine to form pyruvate. This enzyme is part of the gluconeogenesis pathway.. +PF03315 Serine dehydratase beta chain
L-serine dehydratase (EC:4.2.1.13) is a found as a heterodimer of alpha and beta chain or as a fusion of the two chains in a single protein. This enzyme catalyses the deamination of serine to form pyruvate. This enzyme is part of the gluconeogenesis pathway.. +PF01127 Succinate dehydrogenase/Fumarate reductase transmembrane subunit
Prosite & Structural domain. This family includes a transmembrane protein from both the Succinate dehydrogenase and Fumarate reductase complexes.. +PF02810 SEC-C motif
The SEC-C motif found in the C-terminus of the SecA protein, in the middle of some SWI2 ATPases and also solo in several proteins. The motif is predicted to chelate zinc with the CXC and C[HC] pairs that constitute the most conserved feature of the motif. It is predicted to be a potential nucleic acid binding domain.. +PF00995 Sec1 family
Pfam-B_530 (release 3.0). +PF03908 Sec20
Wood V, Griffiths-Jones SR. Sec20 is a membrane glycoprotein associated with secretory pathway.. +PF03911 Sec61beta family
This family consists of homologues of Sec61beta - a component of the Sec61/SecYEG protein secretory system. The domain is found in eukaryotes and archaea and is possibly homologous to the bacterial SecG. It consists of a single putative transmembrane helix, preceded by a short stretch containing various charged residues; this arrangement may help determine orientation in the cell membrane .. +PF03839 Translocation protein Sec62
TIGRFAMs, Griffiths-Jones SR. +PF01369 Sec7 domain
Pfam-B_1629 (release 3.0). The Sec7 domain is a guanine-nucleotide-exchange-factor (GEF) for the Pfam:PF00025 family .. +PF01043 SecA_protein; SecA;
SecA preprotein cross-linking domain. Pfam-B_507 (release 3.0). The SecA ATPase is involved in the insertion and retraction of preproteins through the plasma membrane. This domain has been found to cross-link to preproteins, thought to indicate a role in preprotein binding. The pre-protein cross-linking domain is comprised of two sub domains that are inserted within the ATPase domain . . +PF02556 Preprotein translocase subunit SecB
This family consists of preprotein translocase subunit SecB. SecB is required for the normal export of envelope proteins out of the cell cytoplasm .. +PF02355 Protein export membrane protein
Pfam-B_844 (release 5.2). This family consists of various prokaryotic SecD and SecF protein export membrane proteins. This SecD and SecF proteins are part of the multimeric protein export complex comprising SecA, D, E, F, G, Y, and YajC . SecD and SecF are required to maintain a proton motive force .. +PF00584 SecE/Sec61-gamma subunits of protein translocation complex
SecE is part of the SecYEG complex in bacteria which translocates proteins from the cytoplasm. In eukaryotes the complex, made from Sec61-gamma and Sec61-alpha translocates protein from the cytoplasm to the ER. Archaea have a similar complex.. +PF03840 Preprotein translocase SecG subunit
TIGRFAMs, Griffiths-Jones SR. +PF04856 Securin sister-chromatid separation inhibitor
Pfam-B_4643 (release 7.6). Securin is also known as pituitary tumour-transforming gene product. Over-expression of securin is associated with a number of tumours, and it has been proposed that this may be due to erroneous chromatid separation leading to chromosome gain or loss .. +PF00344 secY;
+PF04628 Sedlin, N-terminal conserved region
Pfam-B_5308 (release 7.5). Mutations in this protein are associated with the X-linked spondyloepiphyseal dysplasia tarda syndrome (OMIM:313400) . This family represents an N-terminal conserved region.. +PF00477 seed_protein;
Small hydrophilic plant seed protein. +PF03841 L-seryl-tRNA selenium transferase
TIGRFAMs, Griffiths-Jones SR. +PF04593 Selenoprotein P, C terminal region
SelP is the only known eukaryotic selenoprotein that contains multiple selenocysteine (Sec) residues, and accounts for more than 50% of the selenium content of rat and human plasma . It is thought to be glycosylated . SelP may have antioxidant properties. It can attach to epithelial cells, and may protect vascular endothelial cells against peroxynitrite toxicity . The high selenium content of SelP suggests that it may be involved in selenium intercellular transport or storage . The promoter structure of bovine SelP suggest that it may be involved in countering heavy metal intoxication, and may also have a developmental function . The N terminal region always contains one Sec residue, and this is separated from the C terminal region (9-16 sec residues) by a histidine-rich sequence . The large number of Sec residues in the C-terminal portion of SelP suggest CC that it may be involved in selenium transport or storage. However, it is also possible that this region has a redox function .. +PF04592 Selenoprotein P, N terminal region
SelP is the only known eukaryotic selenoprotein that contains multiple selenocysteine (Sec) residues, and accounts for more than 50% of the selenium content of rat and human plasma . It is thought to be glycosylated . SelP may have antioxidant properties. It can attach to epithelial cells, and may protect vascular endothelial cells against peroxynitrite toxicity . The high selenium content of SelP suggests that it may be involved in selenium intercellular transport or storage . The promoter structure of bovine SelP suggest that it may be involved in countering heavy metal intoxication, and may also have a developmental function . The N-terminal region of SelP can exist independently of the C terminal region. Zebrafish selenoprotein Pb (Swiss:Q98SV0) lacks the C terminal Sec-rich region, and a protein encoded by the rat SelP gene and lacking this region has also been reported . N-terminal region contains a conserved SecxxCys motif, which is similar to the CysxxCys found in thioredoxins. It is speculated that the N terminal region may adopt a thioredoxin fold and catalyse redox reactions . The N-terminal region also contains a His-rich region, which is thought to mediate heparin binding. Binding to heparan proteoglycans could account for the membrane binding properties of SelP . The function of the bacterial members of this family is uncharcterised.. +PF01641 DUF25;
Pfam-B_1539 (release 4.1). Methionine sulfoxide reduction is an important process, by which cells regulate biological processes and cope with oxidative stress. MsrA, a protein involved in the reduction of methionine sulfoxides in proteins, has been known for four decades and has been extensively characterised with respect to structure and function. However, recent studies revealed that MsrA is only specific for methionine-S-sulfoxides. Because oxidised methionines occur in a mixture of R and S isomers in vivo, it was unclear how stereo-specific MsrA could be responsible for the reduction of all protein methionine sulfoxides. It appears that a second methionine sulfoxide reductase, SelR , evolved that is specific for methionine-R-sulfoxides, the activity that is different but complementary to that of MsrA. Thus, these proteins, working together, could reduce both stereoisomers of methionine sulfoxide. This domain is found both in SelR proteins and fused with the peptide methionine sulfoxide reductase enzymatic domain Pfam:PF01625. The domain has two conserved cysteine and histidines. The domain binds both selenium and zinc . The final cysteine is found to be replaced by the rare amino acid selenocysteine in some members of the family . This family has methionine-R-sulfoxide reductase activity .. +PF01403 Sema domain
The Sema domain occurs in semaphorins, which are a large family of secreted and transmembrane proteins, some of which function as repellent signals during axon guidance. Sema domains also occur in Swiss:P08581 the hepatocyte growth factor receptor and Swiss:P51805. +PF01118 Semialdehyde dehydrogenase, NAD binding domain
Pfam-B_1079 (release 3.0). This Pfam entry contains the following members: N-acetyl-glutamine semialdehyde dehydrogenase (AgrC) Aspartate-semialdehyde dehydrogenase. +PF02774 Semialdehyde dehydrogenase, dimerisation domain
Pfam-B_1079 (release 3.0). This Pfam entry contains the following members: N-acetyl-glutamine semialdehyde dehydrogenase (AgrC) Aspartate-semialdehyde dehydrogenase. +PF03925 SeqA protein
The binding of SeqA protein to hemimethylated GATC sequences is important in the negative modulation of chromosomal initiation at oriC, and in the formation of SeqA foci necessary for Escherichia coli chromosome segregation . SeqA tetramers are able to aggregate or multimerise in a reversible, concentration-dependent manner . Apart from its function in the control of DNA replication, SeqA may also be a specific transcription factor .. +PF04360 Serglycin
Serglycin is the most prevalent proteoglycan produced in haemopoietic cells. Serglycin is a proteinase resistant secretory granule proteoglycan .. +PF00450 serine_carbpept;
Serine carboxypeptidase. +PF00079 serpin;
Serpin (serine protease inhibitor). Overington and HMM_iterative_training. Structure is a multi-domain fold containing a bundle of helices and a beta sandwich.. +PF02403 Seryl-tRNA synthetase N-terminal domain
Pfam-B_518 (release 5.4). This domain is found associated with the Pfam tRNA synthetase class II domain (Pfam:PF00587) and represents the N-terminal domain of seryl-tRNA synthetase.. +PF01445 Viral small hydrophobic protein
Prodom_1504 (release 99.1). The SH (small hydrophobic) protein is a membrane protein of uncertain function .. +PF00017 SH2 domain
Swissprot_feature_table. +PF00018 SH3;
SH3 (Src homology 3) domains are often indicative of a protein involved in signal transduction related to cytoskeletal organisation. First described in the Src cytoplasmic tyrosine kinase Swiss:P12931. The structure is a partly opened beta barrel.. +PF04908 SH3-binding, glutamic acid-rich protein
Pfam-B_6650 (release 7.6). +PF03579 Small hydrophobic protein
Pfam-B_1121 (release 7.0). The small hydrophobic integral membrane protein, SH (previously designated 1A) is found to have a variety of glycosylated forms [1,2]. This protein is a component of the mature virion . . +PF01488 Shikimate / quinate 5-dehydrogenase
Pfam-B_336 (release 4.0). This family contains both shikimate and quinate dehydrogenases. Shikimate 5-dehydrogenase catalyses the conversion of shikimate to 5-dehydroshikimate. This reaction is part of the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. Quinate 5-dehydrogenase catalyses the conversion of quinate to 5-dehydroquinate. This reaction is part of the quinate pathway where quinic acid is exploited as a source of carbon in prokaryotes and microbial eukaryotes. Both the shikimate and quinate pathways share two common pathway metabolites 3-dehydroquinate and dehydroshikimate.. +PF00464 Serine hydroxymethyltransferase
+PF04917 Bacterial shufflon protein, N-terminal constant region
Pfam-B_6667 (release 7.6). This family represents the high-similarity N-terminal 'constant region' shared by shufflon proteins.. +PF02973 sialidase_N;
Sialidase, N-terminal domain. +PF03482 sic;
Pfam-B_5 (Release 7.0). Serotype M1 group A Streptococcus strains cause epidemic waves of human infections. This family includes the sic protein an extracellular protein (streptococcal inhibitor of complement) that inhibits human complement .. +PF00158 sigma54;
Sigma-54 interaction domain. +PF00309 sigma54_AID;
Sigma-54 factor, Activator interacting domain (AID) . The sigma-54 holoenzyme is an enhancer dependent form of the RNA polymerase. The AID is necessary for activator interaction . In addition, the AID also inhibits transcription initiation in the sigma-54 holoenzyme prior to interaction with the activator .. +PF04963 sigma54_CBD;
Sigma-54 factor, core binding domain. This domain makes a direct interaction with the core RNA polymerase, to form an enhancer dependent holoenzyme . The centre of this domain contains a very weak similarity to a helix-turn-helix motif which may represent the other DNA binding domain.. +PF04552 sigma54_DBD;
Sigma-54, DNA binding domain. This DNA binding domain is based on peptide fragmentation data. This domain is proximal to DNA in the promoter/holoenzyme complex. Furthermore this region contains a putative helix-turn-helix motif. At the C-terminus, there is a highly conserved region known as the RpoN box and is the signature of the sigma-54 proteins . . +PF04546 sigma70_ner;
Sigma-70, non-essential region. The domain is found in the primary vegetative sigma factor. The function of this domain is unclear and can be removed without loss of function.. +PF03979 sigma70_r1_1;
Sigma-70 factor, region 1.1. Region 1.1 modulates DNA binding by region 2 and 4 when sigma is unbound by the core RNA polymerase [1,2]. Region 1.1 is also involved in promoter binding . +PF00140 sigma70_r1_2;
Sigma-70 factor, region 1.2. +PF04539 sigma70_r3;
Region 3 forms a discrete compact three helical domain within the sigma-factor. Region is not normally involved in the recognition of promoter DNA, but as some specific bacterial promoters containing an extended -10 promoter element, residues within region 3 play an important role. Region 3 primarily is involved in binding the core RNA polymerase in the holoenzyme .. +PF03084 Reoviral Sigma1/Sigma2 family
Pfam-B_1759 (release 6.4). Reoviruses are double-stranded RNA viruses. They lack a membrane envelope and their capsid is organised in two concentric icosahedral layers: an inner core and an outer capsid layer. The sigma1 protein is found in the outer capsid, and the sigma2 protein is found in the core. There are four other kinds of protein (besides sigma2) in the core, termed lambda 1-3, mu2. Interactions between sigma2 and lambda 1 and lambda 3 are thought to initiate core formation, followed by mu2 and lambda2 . Sigma1 is a trimeric protein, and is positioned at the 12 vertices of the icosahedral outer capsid layer. Its N-terminal fibrous tail, arranged as a triple coiled coil, anchors it in the virion, and a C-terminal globular head interacts with the cellular receptor . These two parts form by separate trimerisation events. The N-terminal fibrous tail forms on the polysome, without the involvement of ATP or chaperones. The post- translational assembly of the C-terminal globular head involves the chaperone activity of Hsp90, which is associated with phosphorylation of Hsp90 during the process . Sigma1 protein acts as a cell attachment protein, and determines viral virulence, pathways of spread, and tropism. Junctional adhesion molecule has been identified as a receptor for sigma1 . In type 3 reoviruses, a small region, predicted to form a beta sheet, in the N-terminal tail was found to bind target cell surface sialic acid (i.e. sialic acid acts as a co-receptor) and promote apoptosis . The sigma1 protein also binds to the lambda2 core protein .. +PF02454 Sigma 1s protein
Pfam-B_2133 (release 5.4). The reoviral gene S1 encodes for haemagglutinin (sigma 1 protein), an outer capsid protein and a major factor in determining virus-host cell interactions. Sigma 1s is one of two translation products of the S1 gene.. +PF03842 Silicon transporter
TIGRFAMs, Griffiths-Jones SR. +PF04801 Sin-like protein conserved region
Pfam-B_6302 (release 7.5). Family of higher eukaryotic proteins. SIN was identified as a protein that interacts specifically with SXL (sex lethal) in a yeast two-hybrid assay.\. The interaction is mediated by one of the SXL RNA binding domains .. +PF04954 Siderophore-interacting protein
+PF02146 Sir2 family
This region is characteristic of Silent information regulator 2 (Sir2) proteins, or sirtuins. These are protein deacetylases that depend on nicotine adenine dinucleotide (NAD). They are found in many subcellular locations, including the nucleus, cytoplasm and mitochondria. Eukaryotic forms play in important role in the regulation of transcriptional repression. Moreover, they are involved in microtubule organisation and DNA damage repair processes .. +PF04247 Invasion gene expression up-regulator, SirB
SirB up-regulates Salmonella typhimurium invasion gene transcription. It is, however, not essential for the expression of these genes. Its function is unknown .. +PF01380 SIS domain
SIS (Sugar ISomerase) domains are found in many phosphosugar isomerases and phosphosugar binding proteins. SIS domains are also found in proteins that regulate the expression of genes involved in synthesis of phosphosugars. Presumably the SIS domains bind to the end-product of the pathway.. +PF05185 Skb1;
PRMT5 arginine-N-methyltransferase. Pfam-B_4050 (release 7.7). The human homologue of yeast Skb1 (Shk1 kinase-binding protein 1) is PRMT5, an arginine-N-methyltransferase . These proteins appear to be key mitotic regulators. They play a role in Jak signalling in higher eukaryotes.. +PF01202 Shikimate kinase
+PF02731 SKIP/SNW domain
This domain is found in chromatin proteins.. +PF03217 Bacterial surface layer protein
Pfam-B_2530 (release 6.5). +PF03843 Outer membrane lipoprotein Slp family
TIGRFAMs, Griffiths-Jones SR. +PF01464 Transglycosylase SLT domain
Prodom_3175 (release 99.1). This family is distantly related to Pfam:PF00062. Members are found in phages, type II, type III and type IV secretion systems (reviewed in ).. +PF02258 Shiga-like toxin beta subunit
Pfam-B_3684 (release 5.2). This family represents the B subunit of shiga-like toxin (SLT or verotoxin) produced by some strains of E.coli associated with hemorrhagic colitis and hemolytic uremic syndrome. SLT's are composed of one enzymatic A subunit and five cell binding B subunits.. +PF04102 SlyX
The SlyX protein has no known function. It is short less than 80 amino acids and is found close to the slyD gene. The SlyX protein has a conserved PPH(Y/W) motif at its C-terminus. The protein may be a coiled-coil structure.. +PF02481 SMF;
DNA recombination-mediator protein A. Pfam-B_2252 (release 5.4). The SMF family, of DNA processing chain A, dprA, are a group of bacterial proteins. In H. pylori, dprA is required for natural chromosomal and plasmid transformation . It has now been shown that DprA is found to bind cooperatively to single-stranded DNA (ssDNA) and to interact with RecA. In the process, DprA-RecA-ssDNA filaments are produced and these filaments catalyse the homology-dependent formation of joint molecules. While the E.coli SSB protein limits access of RecA to ssDNA, DprA alleviates this barrier. It is proposed that DprA is a new member of the recombination-mediator protein family, dedicated to natural bacterial transformation .. +PF03467 Smg-4/UPF3 family
This family contains proteins that are involved in nonsense mediated mRNA decay. A process that is triggered by premature stop codons in mRNA. The family includes Smg-4 and UPF3.. +PF04927 Seed maturation protein
Pfam-B_6221 (release 7.6). Plant seed maturation protein.. +PF04355 SmpA / OmlA family
Lipoprotein Bacterial outer membrane lipoprotein, possibly involved in in maintaining the structural integrity of the cell envelope . Lipid attachment site is a conserved N terminal cysteine residue. Sometimes found adjacent to the OmpA domain (Pfam:PF00691).. +PF01668 SmpB protein
Pfam-B_1766 (release 4.1). +PF01713 Smr domain
This family includes the Smr (Small MutS Related) proteins, and the C-terminal region of the MutS2 protein. It has been suggested that this domain interacts with the MutS1 Swiss:P23909 protein in the case of Smr proteins and with the N-terminal MutS related region of MutS2 Swiss:P94545 . This domain exhibits nicking endonuclease activity that might have a role in mismatch repair or genetic recombination. It shows no significant double strand cleavage or exonuclease activity . The full-length Swiss:Q86UW6 also has the polynucleotide kinase activity.. +PF00835 SNAP-25 family
Pfam-B_1606 (release 2.1). SNAP-25 (synaptosome-associated protein 25 kDa) proteins are components of SNARE complexes. Members of this family contain a cluster of cysteine residues that can be palmitoylated for membrane attachment .. +PF00565 Staphylococcal nuclease homologue
Alignment kindly provided by SMART. Present in all three domains of cellular life. Four copies in the transcriptional coactivator p100: these, however, appear to lack the active site residues of Staphylococcal nuclease. Positions 14 (Asp-21), 34 (Arg-35), 39 (Asp-40), 42 (Glu-43) and 110 (Arg-87) [SNase numbering in parentheses] are thought to be involved in substrate-binding and catalysis.. +PF00209 Sodium:neurotransmitter symporter family
+PF00176 SNF2 family N-terminal domain
This domain is found in proteins involved in a variety of processes including transcription regulation (e.g., SNF2, STH1, brahma, MOT1), DNA repair (e.g., ERCC6, RAD16, RAD5), DNA recombination (e.g., RAD54), and chromatin unwinding (e.g., ISWI) as well as a variety of other proteins with little functional information (e.g., lodestar, ETL1).. +PF04855 SNF5 / SMARCB1 / INI1
Pfam-B_6054 (release 7.6). SNF5 is a component of the yeast SWI/SNF complex, which is an ATP-dependent nucleosome-remodelling complex that regulates the transcription of a subset of yeast genes. SNF5 is a key component of all SWI/SNF-class complexes characterised so far . This family consists of the conserved region of SNF5, including a direct repeat motif. SNF5 is essential for the assembly promoter targeting and chromatin remodelling activity of the SWI-SNF complex . SNF5 is also known as SMARCB1, for SWI/SNF-related, matrix-associated, actin-dependent regulator of chromatin, subfamily b, member 1, and also INI1 for integrase interactor 1. Loss-of function mutations in SNF5 are thought to contribute to oncogenesis in malignant rhabdoid tumours (MRTs) .. +PF01174 UPF0030;
SNO glutamine amidotransferase family. This family and its amidotransferase domain was first described in . It is predicted that members of this family are involved in the pyridoxine biosynthetic pathway, based on the proximity and co-regulation of the corresponding genes and physical interaction between the members of Pfam:PF01174 and Pfam:PF01680 .. +PF00080 sodcu;
Copper/zinc superoxide dismutase (SODC). Overington and HMM_iterative_training. superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the copper/zinc-binding family is one. Defects in the human SOD1 gene cause familial amyotrophic lateral sclerosis (Lou Gehrig's disease). Structure is an eight-stranded beta sandwich, similar to the immunoglobulin fold.. +PF00081 sodfe;
Iron/manganese superoxide dismutases, alpha-hairpin domain. Eddy SR, Griffiths-Jones SR. Overington and HMM_iterative_training. superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the Mn/Fe-binding family is one. In humans, there is a cytoplasmic Cu/Zn SOD, and a mitochondrial Mn/Fe SOD. N-terminal domain is a long alpha antiparallel hairpin. A small fragment of YTRE_LEPBI matches well - sequencing error?. +PF02777 sodfe_C;
Iron/manganese superoxide dismutases, C-terminal domain. Eddy SR, Griffiths-Jones SR. Overington and HMM_iterative_training. superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the Mn/Fe-binding family is one. In humans, there is a cytoplasmic Cu/Zn SOD, and a mitochondrial Mn/Fe SOD. C-terminal domain is a mixed alpha/beta fold.. +PF03002 Somatostatin/Cortistatin family
Pfam-B_1891 (release 6.4). Members of this family are hormones. Somatostatin inhibits the release of somatotropin. Cortistatin is a peptide that is related to the Somatostatins that is found to depresses neuronal electrical activity but, unlike somatostatin, induces low-frequency waves in the cerebral cortex and antagonises the effects of acetylcholine on hippocampal and cortical measures of excitability .. +PF01680 UPF0019;
Pfam-B_2034 (release 4.1). Members of this family are enzymes involved in a new pathway of pyridoxine/pyridoxal 5-phosphate biosynthesis . This family was formerly known as UPF0019.. +PF02208 Sorbin homologous domain
Alignment kindly provided by SMART. +PF04203 Sortase family
TIGRFAMs (release 2.0);. The founder member of this family is S.aureus sortase, a transpeptidase that attaches surface proteins by the threonine of an LPXTG motif to the cell wall . . +PF04832 SOUL heme-binding protein
Pfam-B_3872 (release 7.6). This family represents a group of putative heme-binding proteins . Our family includes archaeal and bacterial homologues.. +PF04267 Sarcosine oxidase, delta subunit family
TIGRFAMs (release 2.0);. Sarcosine oxidase is a hetero-tetrameric enzyme that contains both covalently bound FMN and non-covalently bound FAD and NAD(+). This enzyme catalyses the oxidative demethylation of sarcosine to yield glycine, H2O2, and 5,10-CH2-tetrahydrofolate (H4folate) in a reaction requiring H4folate and O2 [1,2].. +PF04268 Sarcosine oxidase, gamma subunit family
TIGRFAMs (release 2.0);. Sarcosine oxidase is a hetero-tetrameric enzyme that contains both covalently bound FMN and non-covalently bound FAD and NAD(+). This enzyme catalyses the oxidative demethylation of sarcosine to yield glycine, H2O2, and 5,10-CH2-tetrahydrofolate (H4folate) in a reaction requiring H4folate and O2 [1,2].. +PF03172 Sp100 domain
Pfam-B_3126 (release 6.5). The function of this domain is unknown. It is about 105 amino acid residues in length and is predicted to be predominantly alpha helical. This domain is usually found at the amino terminus of protein that contain a SAND domain Pfam:PF01342.. +PF03014 Structural protein 2
Pfam-B_1375 (release 6.4). This family represents structural protein 2 of the hepatitis E virus. The high basic amino acid content of this protein has lead to the suggestion of a role in viral genomic RNA encapsidation.. +PF02090 Salmonella surface presentation of antigen gene type M protein
+PF02510 Surface presentation of antigens protein
Pfam-B_1678 (release 5.4). Surface presentation of antigens protein (SPAN), also know as invasion protein invJ, is a Salmonella secretory pathway protein involved in presentation of determinants required for mammalian host cell invasion. . +PF04573 Signal peptidase subunit
Pfam-B_4675 (release 7.5). Translocation of polypeptide chains across the endoplasmic reticulum membrane is triggered by signal sequences. During translocation of the nascent chain through the membrane, the signal sequence of most secretory and membrane proteins is cleaved off. Cleavage occurs by the signal peptidase complex (SPC) which consists of four subunits in yeast and five in mammals. This family is common to yeast and mammals [1,2].. +PF05122 Mobile element transfer protein
This proteins are involved in transferring a group of integrating conjugative DNA elements, such as pSAM2 from Streptomyces ambofaciens ( ). Their precise role is not known. . +PF03771 DUF317;
Domain of unknown function (DUF317). This a sequence family found in a set of bacterial proteins with no known function. This domain is currently only found in streptomyces bacteria.\. Most proteins contain two copies of this domain.. +PF00435 spectrin;
Pfam-B_1 (release 1.0). Spectrin repeat-domains are found in several proteins involved in cytoskeletal structure. These include spectrin, alpha-actinin and dystrophin. The sequence repeat used in this family is taken from the structural repeat in reference . The spectrin domain- repeat forms a three helix bundle. The second helix is interrupted by proline in some sequences. The repeats are defined by a characteristic tryptophan (W) residue at position 17 in helix A and a leucine (L) at 2 residues from the carboxyl end of helix C. Although the domain occurs in ultiple repeats along sequences, the domains are actually stable on their own - ie they act, biophysically, like domains rather than repeats that along function when aggregated.. +PF01564 Spermine/spermidine synthase
Pfam-B_798 (release 4.0). Spermine and spermidine are polyamines. This family includes spermidine synthase that catalyses the fifth (last) step in the biosynthesis of spermidine from arginine, and spermine synthase.. +PF02819 spidertoxin;
This family of spider neurotoxins are thought to be calcium ion channel inhibitors.. +PF02513 Spin/Ssty Family
Spindlin (Spin) is a novel maternal transcript present in the unfertilised egg and early embryo . The Y-linked spermiogenesis -specific transcript (Ssty) is also expressed during gametogenesis and forms part of this Pfam family. Members of this family contain three copies of this 50 residue repeat. The repeat is predicted to contain four beta strands.. +PF05215 Spiralin
Pfam-B_6625 (release 7.7). This family consists of Spiralin proteins found in spiroplasma bacteria. Spiroplasmas are helically shaped pathogenic bacteria related to the mycoplasmas. The surface of spiroplasma bacteria is crowded with the membrane-anchored lipoprotein spiralin whose structure and function are unknown although its cellular function is thought to be a structural and mechanical one rather than a catalytic one .. +PF03533 SPO11 homologue
+PF05032 Spo12 family
Pfam-B_51047 (release 7.6). This family of proteins includes Spo12 from S. cerevisiae Swiss:P17123. The Spo12 protein plays a regulatory role in two of the most fundamental processes of biology, mitosis and meiosis, and yet its biochemical function remains elusive . Spo12 is a nuclear protein . Spo12 is a component of the FEAR (Cdc fourteen early anaphase release) regulatory network, that promotes Cdc14 release from the nucleolus during early anaphase . The FEAR network is comprised of the polo kinase Cdc5, the separase Esp1, the kinetochore-associated protein Slk19, and Spo12 .. +PF03907 Spo7-like protein
S. cerevisiae Spo7 Swiss:P18410 has an unknown function, but has a role in formation of a spherical nucleus and meiotic division .. +PF01052 SPOA_protein;
Surface presentation of antigens (SPOA). Pfam-B_408 (release 3.0). This family includes the C-terminal region of flagellar motor switch proteins FliN and FliM. It is associated with family FliM, Pfam:PF02154.. +PF05036 Sporulation related domain
This 70 residue domain is composed of two 35 residue repeats found in proteins involved in sporulation and cell division such as FtsN, DedD, and CwlM. This domain is involved in binding peptidoglycan . Two tandem repeats fold into a pseudo-2-fold symmetric single-domain structure containing numerous contacts between the repeats . FtsN is an essential cell division protein with a simple bitopic topology, a short N-terminal cytoplasmic segment fused to a large carboxy periplasmic domain through a single transmembrane domain. These repeats lay at the periplasmic C-terminus. FtsN localises to the septum ring complex.. +PF03845 Spore germination protein
TIGRFAMs, Griffiths-Jones SR. +PF00588 SpoU rRNA Methylase family
MRC-LMB Genome group. This family of proteins probably use S-AdoMet. . +PF03862 spoVA;
Members of this family are all transcribed from the spoVA operon. These proteins are poorly characterised, but are thought to be involved in dipicolinic acid transport into the developing forespore during sporulation .. +PF04026 SpoVG
Stage V sporulation protein G. Essential for sporulation and specific to stage V sporulation in Bacillus megaterium and subtilis . In B. subtilis, expression decreases after 30-60 minutes of cold shock .. +PF04293 SpoVR like protein
Family member Swiss:P37875 is Bacillus subtilis stage V sporulation protein R, which is involved in spore cortex formation . Little is known about cortex biosynthesis, except that it depends on several sigma E controlled genes, including spoVR .. +PF04232 Stage V sporulation protein S (SpoVS)
In Bacillus subtilis this protein interferes with sporulation at an early stage and this inhibitory effect is overcome by SpoIIB and SpoVG. SpoVS seems to play a positive role in allowing progression beyond stage V of sporulation. Null mutations in the spoVS gene block sporulation at stage V, impairing the development of heat resistance and coat assembly .. +PF03539 Spumavirus aspartic protease (A9)
+PF03779 SPW repeat
A short repeat found in a small family of membrane-bound proteins. This repeat contains a conserved SPW motif in the first of two transmembrane helices.. +PF00494 Squalene/phytoene synthase
+PF00299 squash;
Squash family serine protease inhibitor. +PF02117 Sra; 7TM_GCPR_Sra;
Serpentine type 7TM GPCR chemoreceptor Sra. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Sra is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF02175 Srb;
Serpentine type 7TM GPCR chemoreceptor Srb. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srb is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF00530 Scavenger receptor cysteine-rich domain
These domains are disulphide rich extracellular domains. These domains are found in several extracellular receptors and may be involved in protein-protein interactions.. +PF00319 transcript_fact;
SRF-type transcription factor (DNA-binding and dimerisation domain). Prosite & Pfam-B_6396 (Release 8.0). +PF02118 Srg family chemoreceptor
+PF02290 Signal recognition particle 14kD protein
Pfam-B_7955 (release 5.2). The signal recognition particle (SRP) is a multimeric protein involved in targeting secretory proteins to the rough endoplasmic reticulum membrane. SRP14 and SRP9 form a complex essential for SRP RNA binding.. +PF01922 SRP19 protein
The signal recognition particle (SRP) binds to the signal peptide of proteins as they are being translated. The binding of the SRP halts translation and the complex is then transported to the endoplasmic reticulum's cytoplasmic surface. The SRP then aids translocation of the protein through the ER membrane. The SRP is a ribonucleoprotein that is composed of a small RNA and several proteins. One of these proteins is the SRP19 protein (Sec65 in yeast [2,3]).. +PF00660 Seripauperin and TIP1 family
+PF05022 SRP40, C-terminal domain
Pfam-B_9034 (release 7.6). This presumed domain is found at the C-terminus of the S. cerevisiae SRP40 protein Swiss:P32583 and its homologues. SRP40/nopp40 is a chaperone involved in nucleocytoplasmic transport. SRP40 is also a suppressor of mutant AC40 subunit of RNA polymerase I and III.. +PF04888 Secretion system effector C (SseC) like family
Pfam-B_5525 (release 7.6). SseC is a secreted protein that forms a complex together with SecB and SecD on the surface of Salmonella. All these proteins are secreted by the type III secretion system . Many mucosal pathogens use type III secretion systems for the injection of effector proteins into target cells. SecB, SseC and SecD are inserted into the target cell membrane. where they form a small pore or translocon [1,2]. In addition to SseC, this family includes the bacterial secreted proteins PopB, PepB, YopB and EspD which are thought to be directly involved in pore formation, and type III secretion system translocon.. +PF00474 Sodium:solute symporter family
This family includes Swiss:P33413 which is not in the Prosite entry. Membership of this family is supported by a significant blast score.. +PF04686 Streptomyces sporulation and cell division protein, SsgA
Pfam-B_5645 (release 7.5). The precise function of SsgA is unknown. It has been found to be essential for spore formation, and to stimulate cell division .. +PF00720 Subtilisin inhibitor-like
Pfam-B_679 (release 2.1). +PF04056 Ssl1-like
Pfam-B_13499 (release 7.3);. Ssl1-like proteins are 40kDa subunits of the Transcription factor II H complex.. +PF04386 Stringent starvation protein B
Escherichia coli stringent starvation protein B (SspB), is thought to enhance the specificity of degradation of tmRNA-tagged proteins by the ClpXP protease. The tmRNA tag, also known as ssrA, is an 11-aa peptide added to the C terminus of proteins stalled during translation, targets proteins for degradation by ClpXP and ClpAP. SspB a cytoplasmic protein that specifically binds to residues 1-4 and 7 of the tag. Binding of SspB enhances degradation of tagged proteins by ClpX, and masks sequence elements important for ClpA interactions, inhibiting degradation by ClpA . However, more recent work has cast doubt on the importance of SspB in wild-type cells . SspB is encoded in an operon whose synthesis is stimulated by carbon, amino acid, and phosphate starvation. SspB may play a special role during nutrient stress, for example by ensuring rapid degradation of the products of stalled translation, without causing a global increase in degradation of all ClpXP substrates .. +PF03531 Structure-specific recognition protein (SSRP1)
Griffiths-Jones SR, Mistry J. SSRP1 has been implicated in transcriptional initiation and elongation and in DNA replication and repair . This domain belongs to the Pleckstrin homology fold superfamily.. +PF04722 Ssu72-like protein
Pfam-B_5993 (release 7.5). The highly conserved and essential protein Ssu72 has intrinsic phosphatase activity and plays an essential role in the transcription cycle. Ssu72 was originally identified in a yeast genetic screen as enhancer of a defect caused by a mutation in the transcription initiation factor TFIIB . It binds to TFIIB and is also involved in mRNA elongation. Ssu72 is further involved in both poly(A) dependent and independent termination. It is a subunit of the yeast cleavage and polyadenylation factor (CPF), which is part of the machinery for mRNA 3'-end formation. Ssu72 is also essential for transcription termination of snRNAs .. +PF04184 ST7 protein
Pfam-B_2088 (release 7.3). The ST7 (for suppression of tumorigenicity 7) protein is thought to be a tumour suppressor gene. The molecular function of this protein is uncertain.. +PF03298 Stanniocalcin family
Pfam-B_4401 (release 6.5). +PF02200 STE like transcription factor
Alignment kindly provided by SMART. +PF02876 Staphylococcal/Streptococcal toxin, beta-grasp domain
+PF01123 Staphylococcal/Streptococcal toxin, OB-fold domain
+PF04022 Staphylocoagulase repeat
+PF02821 Staphylokinase/Streptokinase family
+PF01017 STAT;
STAT protein, all-alpha domain. Pfam-B_856 (release 3.0). STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. STAT proteins also include an SH2 domain Pfam:PF00017.. +PF02864 STAT protein, DNA binding domain
Pfam-B_856 (release 3.0). STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. This family represents the DNA binding domain of STAT, which has an ig-like fold. STAT proteins also include an SH2 domain Pfam:PF00017.. +PF02865 STAT_prot;
STAT protein, protein interaction domain. Pfam-B_856 (release 3.0). STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. STAT proteins also include an SH2 domain Pfam:PF00017.. +PF03875 Statherin
Statherin functions biologically to inhibit the nucleation and growth of calcium phosphate minerals. The N-terminus of statherin is highly charge, the glutamic acids of which have been shown to be important in the recognition hydroxyapatite . . +PF00836 Stathmin family
Pfam-B_1551 (release 2.1). The Stathmin family of proteins play an important role in the regulation of the microtubule cytoskeleton. They regulate microtubule dynamics by promoting depolymerization of microtubules and/or preventing polymerisation of tubulin heterodimers .. +PF02116 Fungal pheromone mating factor STE2 GPCR
+PF02076 Pheromone A receptor
+PF04885 Stigma-specific protein, Stig1
Pfam-B_6528 (release 7.6). This family represents the Stig1 cysteine rich plant protein. The STIG1 gene is developmentally regulated and expressed specifically in the stigmatic secretory zone .. +PF05217 STOP protein
Pfam-B_6629 (release 7.7). Neurons contain abundant subsets of highly stable microtubules that resist de-polymerising conditions such as exposure to the cold. Stable microtubules are thought to be essential for neuronal development, maintenance, and function. STOP is a major factor responsible for the intriguing stability properties of neuronal microtubules and is important for synaptic plasticity. Additionally knowledge of STOPs function and properties may help in the treatment of neuroleptics in illnesses such as schizophrenia, currently thought to result from synaptic defects .. +PF03088 Strictosidine synthase
Pfam-B_1533 (release 6.5). Strictosidine synthase (E.C. 4.3.3.2) is a key enzyme in alkaloid biosynthesis. It catalyses the condensation of tryptamine with secologanin to form strictosidine.. +PF04270 strep_his_triad;
Streptococcal histidine triad protein . TIGRFAMs (release 2.0);. All members of this family are proteins from Streptococcal species. The proteins are characterised by having a HxxHxH motif that usually occurs multiple times throughout the protein.. +PF02516 Oligosaccharyl transferase STT3 subunit
Pfam-B_1095 (release 5.4). This family consists of the oligosaccharyl transferase STT3 subunit and related proteins. The STT3 subunit is part of the oligosaccharyl transferase (OTase) complex of proteins and is required for its activity . In eukaryotes, OTase transfers a lipid-linked core-oligosaccharide to selected asparagine residues in the ER . In the archaea STT3 occurs alone, rather than in an OTase complex, and is required for N-glycosylation of asparagines [3-4].. +PF03481 Putative GTP-binding controlling metal-binding
Structural investigation of this domain suggests that it might be a GTP-binding region that regulates metal binding and involves hydrolysis of ATP to AMP. It is found to the C-terminus of Pfam:PF01300.. +PF01300 Telomere recombination
This domain has been shown to bind preferentially to dsRNA . The domain is found in SUA5 Swiss:P32579 as well as HypF and YrdC Swiss:P45748. It has also been shown to be required for telomere recombniation in yeast.. +PF00862 Sucrose synthase
Pfam-B_484 (release 3.0). Sucrose synthases catalyse the synthesis of sucrose from UDP-glucose and fructose. This family includes the bulk of the sucrose synthase protein. However the carboxyl terminal region of the sucrose synthases belongs to the glycosyl transferase family Pfam:PF00534.. +PF02657 UPF0050;
Fe-S metabolism associated domain. This family consists of the SufE-related proteins. These have been implicated in Fe-S metabolism and export ).. +PF05076 Suppressor of fused protein (SUFU)
Pfam-B_6089 (release 7.7). SUFU, encoding the human orthologue of Drosophila suppressor of fused, appears to have a conserved role in the repression of Hedgehog signaling. SUFU exerts its repressor role by physically interacting with GLI proteins in both the cytoplasm and the nucleus . SUFU has been found to be a tumour-suppressor gene that predisposes individuals to medulloblastoma by modulating the SHH signaling pathway . Genomic contextual analysis of bacterial SUFU versions revealed that they are immunity proteins against diverse nuclease toxins in polymorphic toxin systems . . +PF04198 Putative sugar-binding domain
Pfam-B_1085 (release 7.3). This probable domain is found in bacterial transcriptional regulators such as DeoR and SorC. These proteins have an amino-terminal helix-turn-helix Pfam:PF00325 that binds to DNA. This domain is probably the ligand regulator binding region. SorC is regulated by sorbose and other members of this family are likely to be regulated by other sugar substrates.. +PF00083 sugar_tr;
Sugar (and other) transporter. Prosite hmmls-iteration. +PF01253 Translation initiation factor SUI1
+PF03846 Cell division inhibitor SulA
TIGRFAMs, Griffiths-Jones SR. +PF00916 Sulfate transporter family
Pfam-B_223 (release 3.0). Mutations in Swiss:P50443 lead to several human diseases.. +PF03856 Beta-glucosidase (SUN family)
Members of this family include Nca3, Sun4 and Sim1. This is a family of yeast proteins, involved in a diverse set of functions (DNA replication, aging, mitochondrial biogenesis and cell septation) . BGLA from Candida wickerhamii has been characterised as a Beta-glucosidase EC:3.2.1.21.. +PF03439 Supt5;
Early transcription elongation factor of RNA pol II, NGN section. Spt5p and prokaryotic NusG are shown to contain a novel 'NGN' domain. The combined NGN and KOW motif regions of Spt5 form the binding domain with Spt4 . Spt5 complexes with Spt4 as a 1:1 heterodimer snf this Spt5-Spt4 complex regulates early transcription elongation by RNA polymerase II and has an imputed role in pre-mRNA processing via its physical association with mRNA capping enzymes. The Schizosaccharomyces pombe core Spt5-Spt4 complex is a heterodimer bearing a trypsin-resistant Spt4-binding domain within the Spt5 subunit .. +PF01975 Survival protein SurE
E. coli cells with the surE gene disrupted are found to survive poorly in stationary phase . It is suggested that SurE may be involved in stress response. Yeast also contains a member of the family Swiss:P38254. Swiss:P30887 can complement a mutation in acid phosphatase, suggesting that members of this family could be phosphatases.. +PF02104 SURF1 family
+PF02077 SURF4 family
+PF01617 Surface antigen
Pfam-B_1042 (release 4.1). This family includes a number of bacterial surface antigens expressed on the surface of pathogens.. +PF00084 sushi;
Sushi domain (SCR repeat). Swissprot_feature_table. +PF04099 Sybindin-like family
Pfam-B_3240 (release 7.3);. Sybindin is a physiological syndecan-2 ligand on dendritic spines, the small protrusions on the surface of dendrites that receive the vast majority of excitatory synapses .. +PF02383 SacI homology domain
Pfam-B_1090 (release 5.2). This Pfam family represents a protein domain which shows homology to the yeast protein SacI Swiss:P32368. The SacI homology domain is most notably found at the amino terminal of the inositol 5'-phosphatase synaptojanin.. +PF02078 Synapsin; Synapsin_N;
Synapsin, N-terminal domain. +PF02750 Synapsin, ATP binding domain
Ca dependent ATP binding in this ATP grasp fold. Function unknown.. +PF00957 synaptobrevin;
Pfam-B_303 (release 3.0). +PF01284 Synaptophysin;
Membrane-associating domain. MARVEL domain-containing proteins are often found in lipid-associating proteins - such as Occludin and MAL family proteins . It may be part of the machinery of membrane apposition events, such as transport vesicle biogenesis.. +PF01034 Syndecan domain
Pfam-B_1182 (release 3.0). Syndecans are transmembrane heparin sulfate proteoglycans which are implicated in the binding of extracellular matrix components and growth factors.. +PF01387 Synuclein
There are three types of synucleins in humans, these are called alpha, beta and gamma. Alpha synuclein has been found mutated in families with autosomal dominant Parkinson's disease. A peptide of alpha synuclein has also been found in amyloid plaques in Alzheimer's patients. . +PF00837 Iodothyronine deiodinase
Pfam-B_1631 (release 2.1). Iodothyronine deiodinase converts thyroxine (T4) to 3,5,3'-triiodothyronine (T3).. +PF03903 T4_tail_gp36;
+PF03906 Tail_fibre_T7; T7_tail_fibre;
Phage T7 tail fibre protein. The bacteriophage T7 tail complex consists of a conical tail-tube surrounded by six kinked tail-fibres, which are oligomers of the viral protein gp17.. +PF02217 Origin of replication binding protein
Pfam-B_827 (release 5.2). This domain of large T antigen binds to the SV40 origin of DNA replication . . +PF05010 Transforming acidic coiled-coil-containing protein (TACC)
Pfam-B_4807 (release 7.6). This family contains the proteins TACC 1, 2 and 3 the genes for which are found concentrated in the centrosomes of eukaryotic and may play a conserved role in organising centrosomal microtubules. The human TACC proteins have been linked to cancer and TACC2 has been identified as a possible tumour suppressor (AZU-1) . The functional homologue (Alp7) in Schizosaccharomyces pombe has been shown to be required for organisation of bipolar spindles .. +PF02202 Tachykinin family
Alignment kindly provided by SMART. +PF04972 TAD;
This domain is found in a family of osmotic shock protection proteins (e.g. Swiss:P27291). It is also found in some Secretins and a group of potential haemolysins. Its likely function is attachment to phospholipid membranes ( ).. +PF02969 TATA box binding protein associated factor (TAF)
TAF proteins adopt a histone-like fold.. +PF04658 TAFII55 protein conserved region
Pfam-B_4395 (release 7.5). The general transcription factor, TFIID, consists of the TATA-binding protein (TBP) associated with a series of TBP-associated factors (TAFs) that together participate in the assembly of the transcription preinitiation complex. TAFII55 binds to TAFII250 and inhibits it acetyltransferase activity. The exact role of TAFII55 is currently unknown. The conserved region is situated towards the N-terminus of the protein . . +PF05069 tail_comp_S;
Phage virion morphogenesis family . TIGRFAMs (release 2.0);. Protein S of phage P2 is thought to be involved in tail completion and stable head joining.. +PF02203 Tar ligand binding domain homologue
Alignment kindly provided by SMART. +PF00539 Transactivating regulatory protein (Tat)
The retroviral Tat protein binds to the Tar RNA . This activates transcriptional initiation and elongation from the LTR promoter. Binding is mediated by an arginine rich region.. +PF01026 UPF0006;
Pfam-B_1370 (release 3.0). This family of proteins are related to a large superfamily of metalloenzymes . TatD, a member of this family has been shown experimentally to be a DNase enzyme.. +PF03430 Trans-activating transcriptional regulator
Pfam-B_4420 (release 6.6). This family of trans-activating transcriptional regulator (TATR), also known as intermediate early protein 1, are common to the Nucleopolyhedroviruses. . +PF02668 Taurine catabolism dioxygenase TauD, TfdA family
This family consists of taurine catabolism dioxygenases of the TauD, TfdA family. TauD from E. coli Swiss:P37610 is a alpha-ketoglutarate-dependent taurine dioxygenase . This enzyme catalyses the oxygenolytic release of sulfite from taurine . TfdA from Burkholderia sp. Swiss:Q45423 is a 2,4-dichlorophenoxyacetic acid/alpha-ketoglutarate dioxygenase .\. TfdA from Alcaligenes eutrophus JMP134 Swiss:P10088 is a 2,4-dichlorophenoxyacetate monooxygenase . Also included are gamma-Butyrobetaine hydroxylase enzymes EC:1.14.11.1 .. +PF01361 Tautomerase enzyme
This family includes the enzyme 4-oxalocrotonate tautomerase Swiss:Q01468 that catalyses the ketonisation of 2-hydroxymuconate to 2-oxo-3-hexenedioate.. +PF02959 HTLV_tat;
Pfam-B_1456 (release 6.4). Human T-cell leukaemia virus type I (HTLV-I) is the etiological agent for adult T-cell leukaemia (ATL), as well as for tropical spastic paraparesis (TSP) and HTLV-I associate myelopathy (HAM). A biological understanding of the involvement of HTLV-I and in ATL has focused significantly on the workings of the virally-encoded 40 kDa phospho-oncoprotein, Tax. Tax is a transcriptional activator. Its ability to modulate the expression and function of many cellular genes has been reasoned to be a major contributory mechanism explaining HTLV-I-mediated transformation of cells. In activating cellular gene expression, Tax impinges upon several cellular signal-transduction pathways, including those for CREB/ATF and NF-kappaB .. +PF00683 TGF-bp;
Pfam-B_82 (release 2.1). This domain is also known as the 8 cysteine domain. This family includes the hybrid domains . This cysteine rich repeat is found in TGF binding protein and fibrillin.. +PF00566 TBC;
Rab-GTPase-TBC domain. Alignment kindly provided by SMART. Identification of a TBC domain in GYP6_YEAST and GYP7_YEAST, which are GTPase activator proteins of yeast Ypt6 and Ypt7, implies that these domains are GTPase activator proteins of Rab-like small GTPases.. +PF02970 Tubulin binding cofactor A
+PF03558 TBSV core protein P21/P22
Pfam-B_3028 (release 7.0). This protein is required for cell-to-cell movement in plants. Furthermore, the membrane-associated protein is dispensable for both replication and transcription .. +PF01840 TCL1/MTCP1 family
& Pfam-B_7391 (Release 8.0). Two related oncogenes, TCL-1 Swiss:P56279 and MTCP-1 Swiss:P56278, are overexpressed in T cell prolymphocytic leukaemias as a result of chromosomal rearrangements that involve the translocation of one T cell receptor gene to either chromosome 14q32 or Xq28 . This family contains two repeated motifs that form a single globular domain .. +PF03634 TCP family transcription factor
Pfam-B_1979 (release 7.0). This is a family of TCP plant transcription factors. TCP proteins were named after the first characterised members (TB1, CYC and PCFs) and they are involved in multiple developmental control pathways . This region contains a DNA binding basic-Helix-Loop-Helix (bHLP) structure .. +PF03645 Tctex-1 family
Pfam-B_2986 (release 7.0). Tctex-1 is a dynein light chain. It has been shown that Tctex-1 can bind to the cytoplasmic tail of rhodopsin. C-terminal rhodopsin mutations responsible for retinitis pigmentosa inhibit this interaction. . +PF00838 Translationally controlled tumour protein
Pfam-B_1548 (release 2.1). +PF03347 Vibrio thermostable direct hemolysin
Pfam-B_3633 (release 6.5). +PF01285 TEA/ATTS domain family
+PF03848 Tellurite resistance protein TehB
TIGRFAMs, Griffiths-Jones SR. +PF02765 Telo_bind;
Telomeric single stranded DNA binding POT1/CDC13. This domain binds single stranded telomeric DNA and adopts an OB fold . It includes the proteins POT1 and CDC13 which have been shown to regulate telomere length, replication and capping [2-4]. POT1 is one component of the shelterin complex that protects telomere-ends from attack by DNA-repair mechanisms [5,6].. +PF03070 TENA/THI-4/PQQC family
Pfam-B_2039 (release 6.4) & Pfam-B_7791 (release 7.7). Members of this family are found in all the three major phyla of life: archaebacteria, eubacteria, and eukaryotes. In Bacillus subtilis, TENA is one of a number of proteins that enhance the expression of extracellular enzymes, such as alkaline protease, neutral protease and levansucrase . The THI-4 protein, which is involved in thiamine biosynthesis, is also a member of this family. The C-terminal part of these proteins consistently show significant sequence similarity to TENA proteins. This similarity was first noted with the Neurospora crassa THI-4 . This family includes bacterial coenzyme PQQ synthesis protein C or PQQC proteins. Pyrroloquinoline quinone (PQQ) is the prosthetic group of several bacterial enzymes,including methanol dehydrogenase of methylotrophs and the glucose dehydrogenase of a number of bacteria . PQQC has been found to be required in the synthesis of PQQ but its function is unclear. The exact molecular function of members of this family is uncertain.. +PF04876 Tenuivirus major non-capsid protein
Pfam-B_6119 (release 7.6). This protein of unknown function accumulates in large amounts in tenuivirus infected cells. It is found in all forms of the inclusion bodies that are formed after infection .. +PF03300 Tenuivirus non-structural protein NS4
Pfam-B_4315 (release 6.5). +PF05099 Tellurite resistance protein TerB
This family contains the TerB tellurite resistance proteins from a a number of bacteria.. +PF03741 Integral membrane protein TerC family
This family contains a number of integral membrane proteins that also contains the TerC protein. TerC has been implicated in resistance to tellurium. This protein may be involved in efflux of tellurium ions. The tellurite-resistant Escherichia coli strain KL53 was found during testing of the group of clinical isolates for antibiotics and heavy metal ion resistance . Determinant of the tellurite resistance of the strain was located on a large conjugative plasmid. Analyses showed, the genes terB, terC, terD and terE are essential for conservation of the resistance. The members of the family contain a number of conserved aspartates that could be involved in binding to metal ions.. +PF02342 TerD domain
The TerD domain is found in TerD family proteins that include the paralogous TerD, TerA, TerE, TerF and TerZ proteins It is found in a stress response operon with TerB and TerC. TerD has a maximum of two calcium binding sites {2] depending on the conservation of aspartates {2]. It has various fusions to nuclease domains, RNA binding domains, ubiquitin related domains, and metal binding domains. The ter gene products lie at the center of membrane-linked metal recognition complexes with regulatory ramifications encompassing phosphorylation- dependent signal transduction, RNA-dependent regulation, biosynthesis of nucleoside-like metabolites and DNA processing linked to novel pathways . . +PF03592 Terminase_small;
Terminase small subunit . Pfam-B_3755 (release 7.0). Packaging of double-stranded viral DNA concatemers requires interaction of the prohead with virus DNA. This process is mediated by a phage-encoded DNA recognition and terminase protein. The terminase enzymes described so far, which are hetero-oligomers composed of a small and a large subunit, do not have a significant level of sequence homology. The small terminase subunit is thought to form a nucleoprotein structure that helps to position the terminase large subunit at the packaging initiation site .. +PF03936 Terpene synthase family, metal binding domain
Pfam-B_728 (release 3.0). It has been suggested that this gene family be designated tps (for terpene synthase) . It has been split into six subgroups on the basis of phylogeny, called tpsa-tpsf. tpsa includes vetispiridiene synthase Swiss:Q39979, 5-epi- aristolochene synthase, Swiss:Q40577 and (+)-delta-cadinene synthase Swiss:P93665. tpsb includes (-)-limonene synthase, Swiss:Q40322. tpsc includes kaurene synthase A, Swiss:O04408. tpsd includes taxadiene synthase, Swiss:Q41594, pinene synthase, Swiss:O24475 and myrcene synthase, Swiss:O24474. tpse includes kaurene synthase B. tpsf includes linalool synthase.. +PF02909 tetR_C;
Tetracyclin repressor, C-terminal all-alpha domain. +PF03299 Transcription factor AP-2
Pfam-B_1736 (release 6.5). +PF02559 CarD; TF_CarD; CarD_TRCF; CarD-CdnL_RID;
CarD-like/TRCF domain. CarD is a Myxococcus xanthus protein required for the activation of light- and starvation-inducible genes . This family includes the presumed N-terminal domain, CdnL.\. CarD interacts with the zinc-binding protein CarG to form a complex that regulates multiple processes in Myxococcus xanthus . This family also includes a domain to the N-terminal side of the DEAD helicase of TRCF (transcription-repair-coupling factor) proteins. TRCF displaces RNA polymerase stalled at a lesion, binds to the damage recognition protein UvrA, and increases the template strand repair rate during transcription . This domain is involved in binding to the stalled RNA polymerase . The family includes members otherwise referred to as CdnL, for CarD N-terminal like, whichdiffer functionally from CarD. The TRCF domain mentioned above is the RNA polymerase-interacting domain or RID .. +PF03529 Otx1 transcription factor
+PF03849 Transcription factor Tfb2
TIGRFAMs, Griffiths-Jones SR. +PF03153 Transcription factor IIA, alpha/beta subunit
Pfam-B_3542 (release 6.5). Transcription initiation factor IIA (TFIIA) is a heterotrimer, the three subunits being known as alpha, beta, and gamma, in order of molecular weight. The N and C-terminal domains of the gamma subunit are represented in Pfam:PF02268 and Pfam:PF02751, respectively. This family represents the precursor that yields both the alpha and beta subunits. The TFIIA heterotrimer is an essential general transcription initiation factor for the expression of genes transcribed by RNA polymerase II. Together with TFIID, TFIIA binds to the promoter region; this is the first step in the formation of a pre-initiation complex (PIC). Binding of the rest of the transcription machinery follows this step . After initiation, the PIC does not completely dissociate from the promoter. Some components, including TFIIA, remain attached and re-initiate a subsequent round of transcription.. +PF02268 TFIIA_gamma;
Transcription initiation factor IIA, gamma subunit, helical domain. Pfam-B_4941 (release 5.2). Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIA (TFIIA) is a multimeric protein which facilitates the binding of TFIID to the TATA box. The N-terminal domain of the gamma subunit is a 4 helix bundle.. +PF02751 Transcription initiation factor IIA, gamma subunit
Pfam-B_4941 (release 5.2). Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIA (TFIIA) is a multimeric protein which facilitates the binding of TFIID to the TATA box. The C-terminal domain of the gamma subunit is a 12 stranded beta-barrel.. +PF02291 TFIID-31;
Transcription initiation factor IID, 31kD subunit. Pfam-B_6729 (release 5.2). This family represents the N-terminus of the 31kD subunit (42kD in drosophila) of transcription initiation factor IID (TAFII31). TAFII31 binds to p53, and is an essential requirement for p53 mediated transcription activation.. +PF03540 TFIID_30kD;
Transcription initiation factor TFIID 23-30kDa subunit. +PF02002 TFIIE;
The general transcription factor TFIIE has an essential role in eukaryotic transcription initiation together with RNA polymerase II and other general factors. Human TFIIE consists of two subunits TFIIE-alpha Swiss:P29083 and TFIIE-beta Swiss:P29084 and joins the pre-initiation complex after RNA polymerase II and TFIIF . This family consists of the conserved amino terminal region of eukaryotic TFIIE-alpha and proteins from archaebacteria that are presumed to be TFIIE-alpha subunits also Swiss:O29501 .. +PF02186 TFIIE beta subunit core domain
General transcription factor TFIIE consists of two subunits, TFIIE alpha Pfam:PF02002 and TFIIE beta. TFIIE beta has been found to bind to the region where the promoter starts to open to be single-stranded upon transcription initiation by RNA polymerase II. The structure of the DNA binding core region has been solved and has a winged helix fold.. +PF02270 Transcription initiation factor IIF, beta subunit
Pfam-B_4519 (release 5.2). Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIF (TFIIF) is a tetramer of two beta subunits associate with two alpha subunits which interacts directly with RNA polymerase II. The beta subunit of TFIIF is required for recruitment of RNA polymerase II onto the promoter.. +PF01096 TFIIS;
Transcription factor S-II (TFIIS). +PF04994 TfoX C-terminal domain
TfoX may play a key role in the development of genetic competence by regulating the expression of late competence-specific genes . This family corresponds to the C-terminal presumed domain of TfoX. The domain is found associated with Pfam:PF00383 in Swiss:Q9JZR1. It is also found as an isolated domain in some proteins suggesting this is an autonomous domain.. +PF04993 TfoX N-terminal domain
TfoX may play a key role in the development of genetic competence by regulating the expression of late competence-specific genes . This family corresponds to the N-terminal presumed domain of TfoX. The domain is found as an isolated domain in some proteins suggesting this is an autonomous domain.. +PF00019 TGF-beta;
Transforming growth factor beta like domain. +PF00688 TGF-beta propeptide
Pfam-B_110 (release 2.1). This propeptide is known as latency associated peptide (LAP) in TGF-beta. LAP is a homodimer which is disulfide linked to TGF-beta binding protein.. +PF01702 Queuine tRNA-ribosyltransferase
Pfam-B_1643 (release 4.1). This is a family of queuine tRNA-ribosyltransferases EC:2.4.2.29, also known as tRNA-guanine transglycosylase and guanine insertion enzyme. Queuine tRNA-ribosyltransferase modifies tRNAs for asparagine, aspartic acid, histidine and tyrosine with queuine. It catalyses the exchange of guanine-34 at the wobble position with 7-aminomethyl-7-deazaguanine, and the addition of a cyclopentenediol moiety to 7-aminomethyl-7-deazaguanine-34 tRNA; giving a hypermodified base queuine in the wobble position [1,2]. The aligned region contains a zinc binding motif C-x-C-x2-C-x29-H, and important tRNA and 7-aminomethyl-7deazaguanine binding residues .. +PF04858 TH1 protein
Pfam-B_6070 (release 7.6). TH1 is a highly conserved but uncharacterised metazoan protein. No homologue has been identified in Caenorhabditis elegans . TH1 binds specifically to A-Raf kinase .. +PF00314 thaumatin;
+PF01946 Thi4 family
This family includes Swiss:P32318 a putative thiamine biosynthetic enzyme.. +PF01964 ThiC family
ThiC is found within the thiamine biosynthesis operon. ThiC is involved in pyrimidine biosynthesis . The precise catalytic function of ThiC is still not known. ThiC participates in the formation of 4-Amino-5-hydroxymethyl-2-methylpyrimidine from AIR, an intermediate in the de novo pyrimidine biosynthesis.. +PF02568 Thiamine biosynthesis protein (ThiI)
ThiI is required for thiazole synthesis, required for thiamine biosynthesis .. +PF00975 Thioesterase domain
Pfam-B_180 (release 3.0). Peptide synthetases are involved in the non-ribosomal synthesis of peptide antibiotics. Next to the operons encoding these enzymes, in almost all cases, are genes that encode proteins that have similarity to the type II fatty acid thioesterases of vertebrates. There are also modules within the peptide synthetases that also share this similarity. With respect to antibiotic production, thioesterases are required for the addition of the last amino acid to the peptide antibiotic, thereby forming a cyclic antibiotic. Thioesterases (non-integrated) have molecular masses of 25-29 kDa.. +PF01289 Thiol-activated cytolysin
+PF00108 thiolase;
Thiolase, N-terminal domain. Thiolase is reported to be structurally related to beta-ketoacyl synthase (Pfam:PF00109), and also chalcone synthase.. +PF02803 thiolase_C;
Thiolase, C-terminal domain. Thiolase is reported to be structurally related to beta-ketoacyl synthase (Pfam:PF00109), and also chalcone synthase.. +PF00085 thiored;
Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond. Some members with only the active site are not separated from the noise.. +PF00585 Thr_dehydratase_C;
C-terminal regulatory domain of Threonine dehydratase. Threonine dehydratases Pfam:PF00291 all contain a carboxy terminal region. This region may have a regulatory role. Some members contain two copies of this region. This family is homologous to the Pfam:PF01842 domain.. +PF04163 Tht1-like nuclear fusion protein
Pfam-B_64620 (release 7.3);. +PF02926 THUMP domain
The THUMP domain is named after after thiouridine synthases, methylases and PSUSs . The THUMP domain consists of about 110 amino acid residues. The structure of ThiI reveals that the THUMP has a fold unlike that of previously characterised RNA-binding domains . It is predicted that this domain is an RNA-binding domain The THUMP domain probably functions by delivering a variety of RNA modification enzymes to their targets .. +PF02511 Thymidylate synthase complementing protein
Pfam-B_1648 (release 5.4). Thymidylate synthase complementing protein (Thy1) complements the thymidine growth requirement of the organisms in which it is found, but shows no homology to thymidylate synthase.. +PF00303 thymidylat_synt;
Thymidylate synthase. Swiss:P28176 is not included as a member of this family, Although annotated as such there is no significant sequence similarity to other members.. +PF02223 Thymidylate kinase
Pfam-B_484 (release 5.2). +PF01290 Thymosin beta-4 family
+PF00086 thyroglobulin_1;
Thyroglobulin type-1 repeat. Swissprot_feature_table. Thyroglobulin type 1 repeats are thought to be involved in the control of proteolytic degradation . The domain usually contains six conserved cysteines. These form three disulphide bridges. Cysteines 1 pairs with 2, 3 with 4 and 5 with 6.. +PF04278 Tic22-like family
TIGRFAMs (release 2.0);. The preprotein translocation at the inner envelope membrane of chloroplasts so far involves five proteins: Tic110, Tic55, Tic40, Tic22 (this family) and Tic20. The molecular function of these proteins has not yet been established . . +PF01826 Trypsin Inhibitor like cysteine rich domain
This family contains trypsin inhibitors as well as a domain found in many extracellular proteins. The domain typically contains ten cysteine residues that form five disulphide bonds. The cysteine residues that form the disulphide bonds are 1-7, 2-6, 3-5, 4-10 and 8-9.. +PF00121 Triosephosphate isomerase
+PF02466 Tim17/Tim22/Tim23/Pmp24 family
Pfam-B_2241 (release 5.4) & Pfam-B_7792 (Release 8.0). The pre-protein translocase of the mitochondrial outer membrane (Tom) allows the import of pre-proteins from the cytoplasm. Tom forms a complex with a number of proteins, including Tim17. Tim17 and Tim23 are thought to form the translocation channel of the inner membrane. This family includes Tim17, Tim22 and Tim23. This family also includes Pmp24 a peroxisomal protein. The involvement of this domain in the targeting of PMP24 remains to be proved. PMP24 was known as Pmp27 in .. +PF04821 Timeless protein
Pfam-B_3454 (release 7.6). The timeless gene in Drosophila melanogaster and its homologues in a number of other insects and mammals (including human) are involved in circadian rhythm control . This family includes a related proteins from a number of fungal species.. +PF05029 Timeless protein C terminal region
Pfam-B_5695 (release 7.6). The timeless (tim) gene is essential for circadian function in Drosophila. Putative homologues of Drosophila tim have been identified in both mice and humans (mTim and hTIM, respectively). Mammalian TIM is not the true orthologue of Drosophila TIM, but is the likely orthologue of a fly gene, timeout (also called tim-2) . mTim has been shown to be essential for embryonic development, but does not have substantiated circadian function . Some family members contain a SANT domain in this region.. +PF00965 Tissue inhibitor of metalloproteinase
Pfam-B_1239 (release 3.0). Members of this family are common in extracellular regions of vertebrate species. +PF03549 Tir_receptor;
Translocated intimin receptor (Tir) intimin-binding domain. Griffiths-Jones SR, Moxon SJ. Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation . This family represents the Tir intimin-binding domain (Tir IBD) which is needed to bind intimin and support the predicted topology for Tir, with both N- and C-terminal regions in the mammalian cell cytosol . . +PF04553 Tis11B_C;
Tis11B like protein, N terminus. Members of this family always contain a tandem repeat of CCCH zinc fingers Pfam:PF00642. Tis11B, Tis11D and their homologues are thought to be regulatory proteins involved in the response to growth factors. The function of the N terminus is unknown.. +PF01108 Tissue factor
This family is found in metazoa, and is very similar to the fibronectin type III domain. The family is found in cytokine receptors, interleukin and interferon receptors and coagulation factor III proteins. It occurs multiple times, as does fn3, family Pfam:PF00041.. +PF00265 Thymidine kinase
+PF00693 TK_herpes;
Thymidine kinase from herpesvirus. Pfam-B_186 (release 2.1). +PF03219 TLC ATP/ADP transporter
Pfam-B_2261 (release 6.5). +PF05017 TMP repeat
Pfam-B_1012 (release 7.6). This short repeat consists of the motif WXXh where X can be any residue and h is a hydrophobic residue. The repeat is name TMP after its occurrence in the tape measure protein (TMP). Tape measure protein is a component of phage tail and probably forms a beta-helix. Truncated forms of TMP lead to shortened tail fibres . This repeat is also found in non-phage proteins where it may play a structural role.. +PF02581 Thiamine monophosphate synthase/TENI
Thiamine monophosphate synthase (TMP) (EC:2.5.1.3) catalyses the substitution of the pyrophosphate of 2-methyl-4-amino-5- hydroxymethylpyrimidine pyrophosphate by 4-methyl-5- (beta-hydroxyethyl)thiazole phosphate to yield thiamine phosphate . This Pfam family also includes the regulatory protein TENI (Swiss:P25053).. +PF00721 Virus coat protein (TMV like)
Pfam-B_746 (release 2.1). This family contains coat proteins from tobamoviruses, hordeiviruses, Tobraviruses, Furoviruses and Potyviruses.. +PF00229 TNF(Tumour Necrosis Factor) family
+PF00020 TNFR/NGFR cysteine-rich region
Swissprot_feature_table. +PF01107 Tobamo_MP;
Viral movement protein (MP). Pfam-B_815 (release 3.0) & Pfam-B_1906 (release 4.1). This family includes a variety of movement proteins (MP)s. The MP is necessary for the initial cell-to-cell movement during the early stages of a viral infection. This movement is active, and it is known that the MP interacts with the plasmodesmata and possesses the ability to bind to RNA to achieve its role . This family also includes consists of virus movement proteins from the caulimovirus family. It has been suggested in cauliflower mosaic virus that these proteins mediated viral movement by modifying plasmodesmata and forming tubules in the channel that can accommodate the virus particles and references therein. The family contains a conserved DXR motif that is probably functionally important.. +PF04052 TolB amino-terminal domain
TolB is an essential periplasmic component of the tol-dependent translocation system. This function of this amino terminal domain is uncertain.. +PF03349 Outer membrane protein transport protein (OMPP1/FadL/TodX)
Pfam-B_3708 (release 6.5). This family includes TodX from Pseudomonas putida F1 Swiss:Q51971 and TbuX from Ralstonia pickettii PKO1 Swiss:Q9RBW8. These are membrane proteins of uncertain function that are involved in toluene catabolism. Related proteins involved in the degradation of similar aromatic hydrocarbons are also in this family, such as CymD Swiss:O33458. This family also includes FadL involved in translocation of long-chain fatty acids across the outer membrane. It is also a receptor for the bacteriophage T2.. +PF04281 Mitochondrial import receptor subunit Tom22
TIGRFAMs (release 2.0);. The mitochondrial protein translocase family, which is responsible for movement of nuclear encoded pre-proteins into mitochondria, is very complex with at least 19 components. These proteins include several chaperone proteins, four proteins of the outer membrane translocase (Tom) import receptor, five proteins of the Tom channel complex, five proteins of the inner membrane translocase (Tim) and three "motor" proteins. This family represents the Tom22 proteins . The N terminal region of Tom22 has been shown to have chaperone-like activity, and the C terminal region faces the intermembrane face .. +PF03220 Tombusvirus P19 core protein
Pfam-B_2714 (release 6.5). +PF03544 TonB;
Gram-negative bacterial TonB protein C-terminal. The TonB_C domain is the well-characterised C-terminal region of the TonB receptor molecule. This protein is bound to an inner membrane-bound protein ExbB via a globular domain and has a flexible middle region that is likely to help in positioning the C-terminal domain into the iron-transporter barrel in the outer membrane . TonB_C interacts with the N-terminal TonB box of the outer membrane transporter that binds the Fe3+-siderophore complex. The barrel of the transporter, consisting of 22 beta-sheets and an inside plug, binds the iron complex in the barrel entrance .. +PF00593 TonB_boxC;
TonB dependent receptor. This model now only covers the conserved part of the barrel structure.. +PF01131 DNA topoisomerase
Pfam-B_505 (release 3.0). This subfamily of topoisomerase is divided on the basis that these enzymes preferentially relax negatively supercoiled DNA, from a 5' phospho- tyrosine linkage in the enzyme-DNA covalent intermediate and has high affinity for single stranded DNA.. +PF02919 Topoisomer_I_N;
Eukaryotic DNA topoisomerase I, DNA binding fragment. Pfam-B_1377 (release 3.0). Topoisomerase I promotes the relaxation of DNA superhelical tension by introducing a transient single-stranded break in duplex DNA and are vital for the processes of replication, transcription, and recombination . This family may be more than one structural domain.. +PF01028 Topoisomerase_I;
Eukaryotic DNA topoisomerase I, catalytic core. Pfam-B_1377 (release 3.0). Topoisomerase I promotes the relaxation of DNA superhelical tension by introducing a transient single-stranded break in duplex DNA and are vital for the processes of replication, transcription, and recombination .. +PF01751 Primase;
Pfam-B_500 (release 4.2). This is a conserved region from DNA primase. This corresponds to the Toprim domain common to DnaG primases, topoisomerases, OLD family nucleases and RecR proteins . Both DnaG motifs IV and V are present in the alignment, the DxD (V) motif may be involved in Mg2+ binding and mutations to the conserved glutamate (IV) completely abolish DnaG type primase activity . DNA primase EC:2.7.7.6 is a nucleotidyltransferase it synthesises the oligoribonucleotide primers required for DNA replication on the lagging strand of the replication fork; it can also prime the leading stand and has been implicated in cell division . This family also includes the atypical archaeal A subunit from type II DNA topoisomerases . Type II DNA topoisomerases catalyse the relaxation of DNA supercoiling by causing transient double strand breaks.. +PF01533 Tospovirus nucleocapsid protein
Pfam-B_950 (release 4.0). The tospovirus genome consists of three linear ssRNA segments, denoted L, M and S complexed with the nucleocapsid protein. The S RNA encodes the nucleocapsid protein and another non-structural protein .. +PF00087 toxin; toxin_1;
A family of venomous neurotoxins and cytotoxins. Structure is small, disulfide-rich, nearly all beta sheet.. +PF00451 toxin_2;
Scorpion short toxin, BmKK2. Members of this family, which are found in various scorpion toxins, confer potassium channel blocking activity .. +PF00537 toxin_3;
Scorpion toxin-like domain . Pfam-B_8170 (release 8.0). This family contains both neurotoxins and plant defensins. The mustard trypsin inhibitor, MTI-2, is plant defensin. It is a potent inhibitor of trypsin with no activity towards chymotrypsin. MTI-2 is toxic for Lepidopteran insects, but has low activity against aphids . Brazzein is plant defensin-like protein. It is pH-stable, heat-stable and intensely sweet protein . The scorpion toxin (a neurotoxin) binds to sodium channels and inhibits the activation mechanisms of the channels, thereby blocking neuronal transmission. Scorpion toxins bind to sodium channels and inhibit the activation mechanisms of the channels, thereby blocking neuronal transmission. +PF00706 toxin_4;
Pfam-B_589 (release 2.1). +PF02079 Nuclear transition protein 1
+PF01254 Nuclear transition protein 2
+PF04406 Type IIB DNA topoisomerase
Type II DNA topoisomerases are ubiquitous enzymes that catalyse the ATP-dependent transport of one DNA duplex through a second DNA segment via a transient double-strand break. Type II DNA topoisomerases are now subdivided into two sub-families, type IIA and IIB DNA topoisomerases. TP6A_N is present in type IIB topoisomerase and is thought to be involved in DNA binding owing to its sequence similarity to E. coli catabolite activator protein (CAP) . . +PF00590 Tetrapyrrole (Corrin/Porphyrin) Methylases
MRC-LMB Genome group. This family uses S-AdoMet in the methylation of diverse substrates. This family includes a related group of bacterial proteins of unknown function, including Swiss:P45528.\. This family includes the methylase Dipthine synthase.. +PF04201 Tumour protein D52 family
Pfam-B_2632 (release 7.3). The hD52 gene was originally identified through its elevated expression level in human breast carcinoma. Cloning of D52 homologues from other species has indicated that D52 may play roles in calcium-mediated signal transduction and cell proliferation. Two human homologues of hD52, hD53 and hD54, have also been identified, demonstrating the existence of a novel gene/protein family . These proteins have an amino terminal coiled-coil that allows members to form homo- and heterodimers with each other .. +PF00205 TPP_enzymes;
Thiamine pyrophosphate enzyme, central domain. The central domain of TPP enzymes contains a 2-fold Rossman fold.. +PF02775 TPP_enzymes_C;
Thiamine pyrophosphate enzyme, C-terminal TPP binding domain. +PF02776 TPP_enzymes_N;
Thiamine pyrophosphate enzyme, N-terminal TPP binding domain. +PF01963 TraB family
pAD1 is a hemolysin/bacteriocin plasmid originally identified in Enterococcus faecalis DS16. It encodes a mating response to a peptide sex pheromone, cAD1, secreted by recipient bacteria. Once the plasmid pAD1 is acquired, production of the pheromone ceases--a trait related in part to a determinant designated traB. However a related protein is found in C. elegans Swiss:Q94217, suggesting that members of the TraB family have some more general function. This family also includes the bacterial GumN protein. The family has a conserved GXXH motif close to the N-terminus, a conserved glutamate and a conserved arginine that may be catalytic. The family also includes a second conserved GXXH motif near the C-terminus.. +PF02534 TRAG; TraG;
Type IV secretory system Conjugative DNA transfer. Pfam-B_1146 (release 5.4). These proteins contain a P-loop and walker-B site for nucleotide binding. TraG is essential for DNA transfer in bacterial conjugation. These proteins are thought to mediate interactions between the DNA-processing (Dtr) and the mating pair formation (Mpf) systems . The C-terminus of this domain interacts with the relaxosome component TraM via the latter's tetramerisation domain. TraD is a hexameric ring ATPase that forms the cytoplasmic face of the conjugative pore . The family contains a number of different DNA transfer proteins .. +PF00923 Transaldolase
Pfam-B_787 (release 3.0). +PF00382 transcript_fac2;
Transcription factor TFIIB repeat. +PF00405 transferrin;
+PF00868 Transglutaminas; Transglutamin_N;
Transglutaminase family. Pfam-B_783 (release 3.0). +PF00912 Transglycosyl;
Pfam-B_558 (release 3.0). The penicillin-binding proteins are bifunctional proteins consisting of transglycosylase and transpeptidase in the N- and C-terminus respectively . The transglycosylase domain catalyses the polymerisation of murein glycan chains ( ).. +PF00456 transketolase;
Transketolase, thiamine diphosphate binding domain. This family includes transketolase enzymes EC:2.2.1.1. and also partially matches to 2-oxoisovalerate dehydrogenase beta subunit Swiss:P37941 EC:1.2.4.4. Both these enzymes utilise thiamine pyrophosphate as a cofactor, suggesting there may be common aspects in their mechanism of catalysis.. +PF01818 Bacteriophage translational regulator
The translational regulator protein regA is encoded by the T4 bacteriophage and binds to a region of messenger RNA (mRNA) that includes the initiator codon. RegA is unusual in that it represses the translation of about 35 early T4 mRNAs but does not affect nearly 200 other mRNAs .. +PF01997 DUF130;
Members of this family include Translin Swiss:Q15631 that interacts with DNA and forms a ring around the DNA. This family also includes Swiss:Q99598, that was found to interact with translin with yeast two-hybrid screen .. +PF02133 Permease for cytosine/purines, uracil, thiamine, allantoin
+PF04236 Tc5 transposase C-terminal domain
Pfam-B_2955 (release 6.5). +PF00872 Transpo_mutator;
Transposase, Mutator family. Pfam-B_376 (release 3.0). +PF00273 transport_prot;
Serum albumin family. +PF01359 Transposase (partial DDE domain)
Pfam-B_394 (release 3.0). This family includes the mariner transposase .. +PF01610 Transposase_12;
Pfam-B_1015 (release 4.1). Transposase proteins are necessary for efficient DNA transposition. Contains transposases for IS204 (Swiss:Q50911) , IS1001 (Swiss:Q06126) , IS1096 (Swiss:Q50440) and IS1165 Swiss:Q48788 .. +PF01710 Transposase_14;
Pfam-B_1769 (release 4.1). Transposase proteins are necessary for efficient DNA transposition. This family includes insertion sequences from Synechocystis PCC 6803 three of which are characterised as homologous to bacterial IS5- and IS4- and to several members of the IS630-Tc1-mariner superfamily .. +PF01797 Transposase_17;
Transposase IS200 like. Pfam-B_1347 (release 4.2). Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases for IS200 from E. coli.. +PF01385 Transposase_2;
Probable transposase. Pfam-B_1210 (release 3.0) & Pfam-B_4602 (Release 7.5). This family includes IS891 , IS1136 and IS1341 . DUF1225, Pfam:PF06774, has now been merged into this family.. +PF02992 Transposase family tnp2
Pfam-B_1531 (release 6.4). +PF02994 L1 transposable element
Pfam-B_2299 (release 6.4). +PF03017 TNP1/EN/SPM transposase
Pfam-B_1491 (release 6.4). +PF03004 Plant transposase (Ptta/En/Spm family)
Pfam-B_1902 (release 6.4). Transposase proteins are necessary for efficient DNA transposition. This family includes various plant transposases from the Ptta and En/Spm families.. +PF03050 Transposase_25;
Transposase IS66 family . Pfam-B_2526 (release 6.4). Transposase proteins are necessary for efficient DNA transposition. This family includes IS66 from Agrobacterium tumefaciens .. +PF03400 Transposase_27;
Pfam-B_2448 (release 6.6). Transposase proteins are necessary for efficient DNA transposition. This family represents bacterial IS1 transposases.. +PF04195 Putative gypsy type transposon
Pfam-B_1755 (release 7.3). This family of plant genes are thought to be related to gypsy type transposons.. +PF04693 Transposase_29;
Archaeal putative transposase ISC1217. Pfam-B_5730 (release 7.5). +PF04740 LXG domain of WXG superfamily
Zhang D, Mifsud W, Aravind L. Pfam-B_3568 (release 7.5). This domain is present is the N-terminal region of a group of polymorphic toxin proteins in bacteria. It is predicted to use Type VII secretion pathway to mediate export of bacterial toxins .. +PF04754 Putative transposase, YhgA-like
Pfam-B_3820 (release 7.5). This family of putative transposases includes the YhgA sequence from Escherichia coli (Swiss:P31667) and several prokaryotic homologues.. +PF04986 Transposase_32;
Putative transposase. Pfam-B_5271 (release 7.6). Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases IS1294 and IS801. This is a rolling-circle transposase.. +PF01498 Transposase_5;
Pfam-B_462 (release 4.0). Transposase proteins are necessary for efficient DNA transposition. This family includes the amino-terminal region of Tc1, Tc1A, Tc1B and Tc2B transposases of C.elegans. The region encompasses the specific DNA binding and second DNA recognition domains as well as an amino-terminal region of the catalytic domain of Tc3 as described in . Tc3 is a member of the Tc1/mariner family of transposable elements.. +PF01527 Transposase_8;
Pfam-B_527 (release 4.0). Transposase proteins are necessary for efficient DNA transposition. This family consists of various E. coli insertion elements and other bacterial transposases some of which are members of the IS3 family.. +PF03221 Transposase_Tc5;
Tc5 transposase DNA-binding domain. Pfam-B_2955 (release 6.5). +PF02281 Transposase_Tn5;
Transposase Tn5 dimerisation domain. Pfam-B_5683 (release 5.2). Transposons are mobile DNA sequences capable of replication and insertion into the chromosome. Typically transposons code for the transposase enzyme, which catalyses insertion, found between terminal inverted repeats. Tn5 has a unique method of self- regulation in which a truncated version of the transposase enzyme acts as an inhibitor . The catalytic domain of the Tn5 transposon is found in Pfam:PF01609. This domain mediates dimerisation in the known structure.. +PF00576 HIUase/Transthyretin family
This family includes transthyretin that is a thyroid hormone-binding protein that transports thyroxine from the bloodstream to the brain. However, most of the sequences listed in this family do not bind thyroid hormones. They are actually enzymes of the purine catabolism that catalyse the conversion of 5-hydroxyisourate (HIU) to OHCU [2,3]. HIU hydrolysis is the original function of the family and is conserved from bacteria to mammals; transthyretins arose by gene duplications in the vertebrate lineage . HIUases are distinguished in the alignment from the conserved C-terminal YRGS sequence.. +PF03896 TRAP-alpha;
Translocon-associated protein (TRAP), alpha subunit. The alpha-subunit of the TRAP complex (TRAP alpha) is a single-spanning membrane protein of the endoplasmic reticulum (ER) which is found in proximity of nascent polypeptide chains translocating across the membrane .. +PF04051 TRAPP_Bet3;
Transport protein particle (TRAPP) component. Pfam-B_9946 (release 7.3) & Pfam-B_6495 (release 8.0). TRAPP plays a key role in the targeting and/or fusion of ER-to-Golgi transport vesicles with their acceptor compartment. TRAPP is a large multimeric protein that contains at least 10 subunits. This family contains many TRAPP family proteins. The Bet3 subunit is one of the better characterised TRAPP proteins and has a dimeric structure with hydrophobic channels. The channel entrances are located on a putative membrane-interacting surface that is distinctively flat, wide and decorated with positively charged residues. Bet3 is proposed to localise TRAPP to the Golgi .. +PF04956 TrbC/VIRB2 family
Pfam-B_5261 (release 7.6) & Pfam-B_14627 (release 10.0). Conjugal transfer protein, TrbC has been identified as a subunit of the pilus precursor in bacteria. The protein undergoes three processing steps before gaining its mature cyclic structure . This family also contains several VIRB2 type IV secretion proteins. The virB2 gene encodes a putative type IV secretion system and is known to be a pathogenicity factor in Bartonella species .. +PF03743 Bacterial conjugation TrbI-like protein
Pfam-B_776 (release 7.0). Although not essential for conjugation, the TrbI protein greatly increase the conjugational efficiency .. +PF04610 TrbL/VirB6 plasmid conjugal transfer protein
Pfam-B_5275 (release 7.5) & COG3704. +PF03461 TRCF domain
+PF03546 treacle;
Treacher Collins syndrome protein Treacle. +PF00088 trefoil;
Trefoil (P-type) domain. Swissprot_feature_table. +PF01204 Trehalase
Trehalase (EC:3.2.1.28) is known to recycle trehalose to glucose. Trehalose is a physiological hallmark of heat-shock response in yeast and protects of proteins and membranes against a variety of stresses. This family is found in conjunction with Pfam:PF07492 in fungi.. +PF02358 Trehalose-phosphatase
Pfam-B_762 (release 5.2). This family consist of trehalose-phosphatases EC:3.1.3.12 these enzyme catalyse the de-phosphorylation of trehalose-6-phosphate to trehalose and orthophosphate. The aligned region is present in trehalose-phosphatases and comprises the entire length of the protein it is also found in the C-terminus of trehalose-6-phosphate synthase EC:2.4.1.15 adjacent to the trehalose-6-phosphate synthase domain - Pfam:PF00982. It would appear that the two equivalent genes in the E. coli otsBA operon otsA the trehalose-6-phosphate synthase and otsB trehalose-phosphatase (this family) have undergone gene fusion in most eukaryotes e.g. Swiss:P31688 and Swiss:P93653. Trehalose is a common disaccharide of bacteria, fungi and invertebrates that appears to play a major role in desiccation tolerance .. +PF03973 Triabin
Pfam-B_20829 (release 7.1). Triabin is a serine-protease inhibitor.. +PF02080 TrkA;TrkA-C;
This domain is often found next to the Pfam:PF02254 domain. The exact function of this domain is unknown. It has been suggested that it may bind an unidentified ligand . The domain is predicted to adopt an all beta structure .. +PF02254 KTN;TrkA-N;
Pfam-B_289 (Release 5.3). This domain is found in a wide variety of proteins. These protein include potassium channels Swiss:P31069, phosphoesterases Swiss:Q59027, and various other transporters. This domain binds to NAD.. +PF02386 Cation transport protein
Pfam-B_529 (release 5.2). This family consists of various cation transport proteins (Trk) and V-type sodium ATP synthase subunit J or translocating ATPase J EC:3.6.1.34. These proteins are involved in active sodium up-take utilising ATP in the process. TrkH a member of the family Swiss:P76769 from E. coli is a hydrophobic membrane protein and determines the specificity and kinetics of cation transport by the TrK system in E. coli .. +PF02005 N2,N2-dimethylguanosine tRNA methyltransferase
This enzyme EC:2.1.1.32 used S-AdoMet to methylate tRNA. The TRM1 gene of Saccharomyces cerevisiae is necessary for the N2,N2-dimethylguanosine modification of both mitochondrial and cytoplasmic tRNAs . The enzyme is found in both eukaryotes and archaebacteria . +PF00133 tRNA synthetases class I (I, L, M and V)
Other tRNA synthetase sub-families are too dissimilar to be included.. +PF00749 tRNA synthetases class I (E and Q), catalytic domain
Pfam-B_350 (release 2.1). Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only glutamyl and glutaminyl tRNA synthetases. In some organisms, a single glutamyl-tRNA synthetase aminoacylates both tRNA(Glu) and tRNA(Gln).. +PF03950 tRNA synthetases class I (E and Q), anti-codon binding domain
Pfam-B_350 (release 2.1). Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only glutamyl and glutaminyl tRNA synthetases. In some organisms, a single glutamyl-tRNA synthetase aminoacylates both tRNA(Glu) and tRNA(Gln).. +PF00750 tRNA synthetases class I (R)
Pfam-B_1276 (release 2.1). Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only arginyl tRNA synthetase.. +PF01406 tRNA synthetases class I (C) catalytic domain
This family includes only cysteinyl tRNA synthetases.. +PF01921 tRNA synthetases class I (K)
This family includes only lysyl tRNA synthetases from prokaryotes.. +PF00152 tRNA synthetases class II (D, K and N)
+PF02081 Tryptophan RNA-binding attenuator protein
+PF00587 tRNA synthetase class II core domain (G, H, P, S and T)
MRC-LMB Genome group. Other tRNA synthetase sub-families are too dissimilar to be included. This domain is the core catalytic domain of tRNA synthetases and includes glycyl, histidyl, prolyl, seryl and threonyl tRNA synthetases.. +PF01411 tRNA synthetases class II (A)
Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only alanyl-tRNA synthetases.. +PF01974 tRNA intron endonuclease, catalytic C-terminal domain
Members of this family cleave pre tRNA at the 5' and 3' splice sites to release the intron EC:3.1.27.9.. +PF02778 tRNA intron endonuclease, N-terminal domain
Members of this family cleave pre tRNA at the 5' and 3' splice sites to release the intron EC:3.1.27.9.. +PF01746 tRNA (Guanine-1)-methyltransferase
Pfam-B_2049 (release 4.1). This is a family of tRNA (Guanine-1)-methyltransferases EC:2.1.1.31. In E.coli K12 this enzyme catalyses the conversion of a guanosine residue to N1-methylguanine in position 37, next to the anticodon, in tRNA .. +PF03054 tRNA methyl transferase
Pfam-B_823 (release 6.4). This family represents tRNA(5-methylaminomethyl-2-thiouridine)-methyltransferase which is involved in the biosynthesis of the modified nucleoside 5-methylaminomethyl-2-thiouridine present in the wobble position of some tRNAs .. +PF04558 Glutaminyl-tRNA synthetase, non-specific RNA binding region part 1
This is a region found N terminal to the catalytic domain of glutaminyl-tRNA synthetase (EC 6.1.1.18) in eukaryotes but not in Escherichia coli.\. This region is thought to bind RNA in a non-specific manner, enhancing interactions between the tRNA and enzyme, but is not essential for enzyme function .. +PF04557 Glutaminyl-tRNA synthetase, non-specific RNA binding region part 2
This is a region found N terminal to the catalytic domain of glutaminyl-tRNA synthetase (EC 6.1.1.18) in eukaryotes but not in Escherichia coli.\. This region is thought to bind RNA in a non-specific manner, enhancing interactions between the tRNA and enzyme, but is not essential for enzyme function .. +PF02091 tRNA_synt_A;tRNA_synt_2e;
Glycyl-tRNA synthetase alpha subunit. +PF02092 tRNA_synt_B;
Glycyl-tRNA synthetase beta subunit. +PF00261 Tropomyosin
+PF03301 Tryptophan 2,3-dioxygenase
Pfam-B_4263 (release 6.5). +PF04820 Tryptophan halogenase
Pfam-B_2531 (release 7.6). Tryptophan halogenase catalyses the chlorination of tryptophan to form 7-chlorotryptophan. This is the first step in the biosynthesis of pyrrolnitrin, an antibiotic with broad-spectrum anti-fungal activity. Tryptophan halogenase is NADH-dependent .. +PF01371 Trp repressor protein
This protein binds to tryptophan and represses transcription of the Trp operon.. +PF00290 trp_syntA;
Tryptophan synthase alpha chain. +PF03222 Tryptophan/tyrosine permease family
Pfam-B_2873 (release 6.5). +PF01509 TruB family pseudouridylate synthase (N terminal domain)
Pfam-B_792 (release 4.0). Members of this family are involved in modifying bases in RNA molecules. They carry out the conversion of uracil bases to pseudouridine. This family includes TruB, a pseudouridylate synthase that specifically converts uracil 55 to pseudouridine in most tRNAs. This family also includes Cbf5p that modifies rRNA .. +PF01456 Tryp_mucin;
Mucin-like glycoprotein. Prodom_3102 (release 99.1) & Pfam-B_3837 (Release 8.0). This family of trypanosomal proteins resemble vertebrate mucins. The protein consists of three regions. The N and C terminii are conserved between all members of the family, whereas the central region is not well conserved and contains a large number of threonine residues which can be glycosylated . Indirect evidence suggested that these genes might encode the core protein of parasite mucins, glycoproteins that were proposed to be involved in the interaction with, and invasion of, mammalian host cells. This family contains an N-terminal signal peptide.. +PF00913 Trypanosome variant surface glycoprotein (A-type)
Pfam-B_1351 (release 3.0) & Pfam-B_2618 (release 8.0). The trypanosome parasite expresses these proteins to evade the immune response. This family includes a variety of surface proteins such as Trypanosoma brucei VSGs such as expression site associated gene (ESAG) 6 and 7 .. +PF03249 Type specific antigen
Pfam-B_3060 (release 6.5). There are several antigenic variants in Rickettsia tsutsugamushi, and a type-specific antigen (TSA) of 56-kilodaltons located on the rickettsial surface is responsible for the variation [1,2]. TSA proteins are probably integral membrane proteins.. +PF01166 TSC-22/dip/bun family
+PF04668 Twisted gastrulation (Tsg) protein conserved region
Pfam-B_4556 (release 7.5). Tsg was identified in Drosophila as being required to specify the dorsal-most structures in the embryo, for example amnioserosa. Biochemical experiments have revealed three key properties of Tsg: it can synergistically inhibit Dpp/BMP action in both Drosophila and vertebrates by forming a tripartite complete between itself, SOG/chordin and a BMP ligand; Tsg seems to enhance the Tld/BMP-1-mediated cleavage rate of SOG/chordin and may change the preference of site utilisation; Tsg can promote the dissociation of chordin cysteine-rich-containing fragments from the ligand to inhibit BMP signalling [1,2]. . +PF04705 Thiostrepton-resistance methylase, N terminus
This region is found in some members of the SpoU-type rRNA methylase family (Pfam:PF00588).. +PF00090 tsp_1;
Thrombospondin type 1 domain. +PF02412 tsp_3;
Thrombospondin type 3 repeat. SwissProt & Pfam-B_2972 (Release 8.0). The thrombospondin repeat is a short aspartate rich repeat which binds to calcium ions. The repeat was initially identified in thrombospondin proteins that contained 7 of these repeats . The repeat lacks defined secondary structure .. +PF03073 TspO/MBR family
Pfam-B_1882 (release 6.4). Tryptophan-rich sensory protein (TspO) is an integral membrane protein that acts as a negative regulator of the expression of specific photosynthesis genes in response to oxygen/light . It is involved in the efflux of porphyrin intermediates from the cell. This reduces the activity of coproporphyrinogen III oxidase, which is thought to lead to the accumulation of a putative repressor molecule that inhibits the expression of specific photosynthesis genes. Several conserved aromatic residues are necessary for TspO function: they are thought to be involved in binding porphyrin intermediates . In , the rat mitochondrial peripheral benzodiazepine receptor (MBR) was shown to not only retain its structure within a bacterial outer membrane, but also to be able to functionally substitute for TspO in TspO- mutants, and to act in a similar manner to TspO in its in situ location: the outer mitochondrial membrane. The biological significance of MBR remains unclear, however. It is thought to be involved in a variety of cellular functions, including cholesterol transport in steroidogenic tissues.. +PF02956 TT viral orf 1
Pfam-B_1612 (release 6.4). TT virus (TTV), isolated initially from a Japanese patient with hepatitis of unknown aetiology, has since been found to infect both healthy and diseased individuals and numerous prevalence studies have raised questions about its role in unexplained hepatitis. ORF1 is a large 750 residue protein. The N-terminal half of this protein corresponds to the capsid protein.. +PF02957 TT viral ORF2
Pfam-B_1489 (release 6.4) & Pfam-B_4693 (release 7.6). TT virus (TTV), isolated initially from a Japanese patient with hepatitis of unknown aetiology, has since been found to infect both healthy and diseased individuals, and numerous prevalence studies have raised questions about its role in unexplained hepatitis. ORF2 is a 150 residue protein. This family also includes the VP2 protein from the chicken anaemia virus which is a gyrovirus. Gyroviruses are small circular single stranded viruses. The proteins contain a set of conserved cysteine and histidine residues suggesting a zinc binding domain.. +PF03542 Tuberin
Tuberous sclerosis complex (TSC) is an autosomal dominant disorder and is characterised by the presence of hamartomas in many organs, such as brain, skin, heart, lung, and kidney. It is caused by mutation either TSC1 or TSC2 tumour suppressor gene. The TSC2 gene codes for tuberin and interacts with hamartin Pfam:PF04388 , containing two coiled-coil regions, which have been shown to mediate binding to tuberin. These two proteins function within the same pathway(s) regulating cell cycle, cell growth, adhesion, and vesicular trafficking .. +PF00091 tubulin;
Tubulin/FtsZ family, GTPase domain. This family includes the tubulin alpha, beta and gamma chains, as well as the bacterial FtsZ family of proteins. Members of this family are involved in polymer formation. FtsZ is the polymer-forming protein of bacterial cell division. It is part of a ring in the middle of the dividing cell that is required for constriction of cell membrane and cell envelope to yield two daughter cells. FtsZ and tubulin are GTPases. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. Tubulin is the major component of microtubules.. +PF00418 tubulin-binding;
Tau and MAP protein, tubulin-binding repeat. This family includes the vertebrate proteins MAP2, MAP4 and Tau, as well as other animal homologs. MAP4 is present in many tissues but is usually absent from neurons; MAP2 and Tau are mainly neuronal. Members of this family have the ability to bind to and stabilise microtubules. As a result, they are involved in neuronal migration, supporting dendrite elongation, and regulating microtubules during mitotic metaphase. Note that Tau (Swiss:P10636) is involved in neurofibrillary tangle formation in Alzheimer's disease and some other dementias. This family features a C-terminal microtubule binding repeat that contains a conserved KXGS motif .. +PF01021 TYA transposon protein
Pfam-B_90 (release 3.0). Ty are yeast transposons.\. A 5.7kb transcript codes for p3 a fusion protein of TYA and TYB. The TYA protein is analogous to the gag protein of retroviruses. TYA a is cleaved to form 46kd protein which can form mature virion like particles .. +PF03251 Tymovirus 45/70Kd protein
Pfam-B_3418 (release 6.5). Tymoviruses are single stranded RNA viruses. This family includes a protein of unknown function that has been named based on its molecular weight. Tymoviruses such as the ononis yellow mosaic tymovirus encode only three proteins. Of these two are overlapping this protein overlaps a larger ORF that is thought to be the polymerase .. +PF00983 Tymovirus coat protein
Pfam-B_1429 (release 2.1). +PF00264 tyrosinase;
Common central domain of tyrosinase. This family also contains polyphenol oxidases and some hemocyanins. Binds two copper ions via two sets of three histidines. This family is related to Pfam:PF00372.. +PF03064 HSV U79 / HCMV P34
Pfam-B_2985 (release 6.4). This family represents herpes virus protein U79 and cytomegalovirus early phosphoprotein P34 (UL112).. +PF02134 UBACT_repeat;
Repeat in ubiquitin-activating (UBA) protein. +PF01977 DUF117; UPF0096;
3-octaprenyl-4-hydroxybenzoate carboxy-lyase. This family has been characterised as 3-octaprenyl-4- hydroxybenzoate carboxy-lyase enzymes . This enzyme catalyses the third reaction in ubiquinone biosynthesis. For optimal activity the carboxy-lase was shown to require Mn2+ .. +PF01209 ubiE/COQ5 methyltransferase family
+PF03981 Ubiquinol-cytochrome C chaperone
Pfam-B_5272 (release 7.2). +PF02271 Ubiquinol-cytochrome C reductase complex 14kD subunit
Pfam-B_4192 (release 5.2). The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex . This Pfam family represents the 14kD (or VI) subunit of the complex which is not directly involved in electron transfer, but has a role in assembly of the complex .. +PF02320 Ubiquinol-cytochrome C reductase hinge protein
Pfam-B_11849 (release 5.2). The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex . This Pfam family represents the 'hinge' protein of the complex which is thought to mediate formation of the cytochrome c1 and cytochrome c complex.. +PF02921 Ubiquinol cytochrome reductase transmembrane region
Each subunit of the cytochrome bc1 complex provides a single helix (this family) to make up the transmembrane region of the complex.. +PF02939 UcrQ family
The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex . This family represents the 9.5 kDa subunit of the complex. . +PF00984 UDP-glucose/GDP-mannose dehydrogenase family, central domain
Pfam-B_1105 (release 3.0). The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate .. +PF03720 UDP-glucose/GDP-mannose dehydrogenase family, UDP binding domain
Pfam-B_1105 (release 3.0). The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate .. +PF03721 UDP-glucose/GDP-mannose dehydrogenase family, NAD binding domain
Pfam-B_1105 (release 3.0). The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate .. +PF01704 UTP--glucose-1-phosphate uridylyltransferase
Pfam-B_1634 (release 4.1). This family consists of UTP--glucose-1-phosphate uridylyltransferases, EC:2.7.7.9. Also known as UDP-glucose pyrophosphorylase (UDPGP) and Glucose-1-phosphate uridylyltransferase. UTP--glucose-1-phosphate uridylyltransferase catalyses the interconversion of MgUTP + glucose-1-phosphate and UDP-glucose + MgPPi . UDP-glucose is an important intermediate in mammalian carbohydrate interconversion involved in various metabolic roles depending on tissue type . In Dictyostelium (slime mold) mutants in this enzyme abort the development cycle . Also within the family is UDP-N-acetylglucosamine Swiss:Q16222 or AGX1 and two hypothetical proteins from Borrelia burgdorferi the lyme disease spirochaete Swiss:O51893 and Swiss:O51036.. +PF00201 UDP-glucoronosyl and UDP-glucosyl transferase
+PF03152 Ubiquitin fusion degradation protein UFD1
Pfam-B_3272 (release 6.5). Post-translational ubiquitin-protein conjugates are recognised for degradation by the ubiquitin fusion degradation (UFD) pathway. Several proteins involved in this pathway have been identified . This family includes UFD1, a 40kD protein that is essential for vegetative cell viability . The human UFD1 gene is expressed at high levels during embryogenesis, especially in the eyes and in the inner ear primordia and is thought to be important in the determination of ectoderm-derived structures, including neural crest cells. In addition, this gene is deleted in the CATCH-22 (cardiac defects, abnormal facies, thymic hypoplasia, cleft palate and hypocalcaemia with deletions on chromosome 22) syndrome. This clinical syndrome is associated with a variety of developmental defects, all characterised by microdeletions on 22q11.2. Two such developmental defects are the DiGeorge syndrome OMIM:188400, and the velo-cardio- facial syndrome OMIM:145410. Several of the abnormalities associated with these conditions are thought to be due to defective neural crest cell differentiation .. +PF02512 UK_protein;
Virulence determinant. Pfam-B_2106 (release 5.4). The UK protein is an African swine fever virus (ASFV) protein that is highly conserved amongst strains, and is an important viral virulence determinant for domestic pigs .. +PF03044 UL16_UL94;
Herpesvirus UL16/UL94 family. Pfam-B_4392 (release 6.4). This family groups together HSV-1 UL16 Swiss:P10200, HSV-6 ORF11R Swiss:P24442, EHV-1 46 Swiss:P28970, HCMV UL94 Swiss:P16800, EBV BGLF2 Swiss:P03221 and VZV 44 Swiss:P09293. UL16 protein may play a role in capsid maturation including DNA packaging/cleavage . In immunofluorescence studies , UL16 was localised to the nucleus of infected cells in areas containing high concentrations of HSV capsid proteins. These nuclear compartments have been described previously as viral assemblons and are distinct from compartments containing replicating DNA. Localisation within assemblons argues for a role of UL16 encoded protein in capsid assembly or maturation .. +PF03252 UL21;
Pfam-B_3264 (release 6.5). The UL21 protein appears to be a dispensable component in herpesviruses .. +PF01499 UL25;
Herpesvirus UL25 family. Pfam-B_700 (release 4.0). The herpesvirus UL25 gene product is a virion component involved in virus penetration and capsid assembly. The product of the UL25 gene is required for packaging but not cleavage of replicated viral DNA . This family includes a number of herpesvirus proteins: EHV-1 36, EBV BVRF1 Swiss:P03233, HCMV UL77 Swiss:P16726, ILTV ORF2 Swiss:P23987, and VZV gene 34 Swiss:P09287.. +PF04496 UL35;
Herpesvirus UL35 family . Pfam-B_3981 (release 7.5). UL35 represents a true late gene which encodes a 12-kDa capsid protein .. +PF03970 UL37;
Herpesvirus UL37 tegument protein. UL37 interacts with UL36, which is thought to be an important early step in tegumentation during virion morphogenesis in the cytoplasm .. +PF02282 UL42;
DNA polymerase processivity factor (UL42). Pfam-B_5119 (release 5.2). The DNA polymerase processivity factor (UL42) of herpes simplex virus forms a heterodimer with UL30 to create the viral DNA polymerase complex. UL42 functions to increase the processivity of polymerisation and makes little contribution to the catalytic activity of the polymerase.. +PF03117 UL49;
Pfam-B_2110 (release 6.5). Members of this family, found in several herpesviruses, include EBV BFRF2 Swiss:P14347 and other UL49 proteins (e.g. HCMVA UL49 Swiss:P16786, HSV6 U33 Swiss:P52441). There are eight conserved cysteine residues in this alignment, all lying towards the C-terminus. Their function is unknown.. +PF03121 UL52_UL70;
Herpesviridae UL52/UL70 DNA primase. Pfam-B_203 (release 6.5). Herpes simplex virus type 1 DNA replication in host cells is known to be mediated by seven viral-encoded proteins, three of which form a heterotrimeric DNA helicase-primase complex. This complex consists of UL5, UL8, and UL52 subunits. Heterodimers consisting of UL5 and UL52 have been shown to retain both helicase and primase activities. Nevertheless, UL8 is still essential for replication: though it lacks any DNA binding or catalytic activities, it is involved in the transport of UL5-UL52 and it also interacts with other replication proteins. The molecular mechanisms of the UL5-UL52 catalytic activities are not known. While UL5 is associated with DNA helicase activity and UL52 with DNA primase activity, the helicase activity requires the interaction of UL5 and UL52 [see 2,3]. It is not known if the primase activity can be maintained by UL52 alone. The region encompassed by residues 610-636 of HSV1 UL52 Swiss:P10236 is thought to contain a divalent metal cation binding motif. Indeed, this region contains several aspartate and glutamate residues that might be involved in divalent cation binding. The biological significance of UL52-UL8 interaction is not known. Yeast two-hybrid analysis together with immunoprecipitation experiments have shown that the HSV1 UL52 region between residues 366-914 is essential for this interaction, while the first 349 N-terminal residues are dispensable . This family also includes protein UL70 from cytomegalovirus (CMV, a subgroup of the Herpesviridae) strains (e.g. Swiss:P17149), which, by analogy with UL52, is thought to have DNA primase activity. Indeed, CMV strains also possess a DNA helicase-primase complex, the other subunits being protein UL105 (with known similarity to HSV1 UL5) and protein UL102.. +PF03049 UL79;
Pfam-B_2433 (release 6.4). Members of this family are functionally uncharacterised proteins from herpesviruses. This family groups together HSV-6 U52 Swiss:P52469, HVS-1 18 Swiss:Q01003 and HCMV UL79 Swiss:P16752.. +PF03043 UL87;
Herpesvirus UL87 family. Pfam-B_1736 (release 6.4). Members of this family are functionally uncharacterised. This family groups together EBV BcRF1 Swiss:P25215, HSV-6 U58 Swiss:P24437, HVS-1 24 Swiss:Q01007 and HCMV UL87 Swiss:P16730. The proteins range from 575 to 950 amino acids in length.. +PF03048 2111; UL92;
Pfam-B_2111 (release 6.4). +PF03038 UL95;
Pfam-B_2060 (release 6.4). Members of this family, found in several herpesviruses, include EBV BGLF3 Swiss:P03220 and other UL95 proteins (e.g. HCMV UL95 Swiss:P16801, HVS-1 34 Swiss:Q01023, HSV6 U67 Swiss:P24444). Their function is unknown.. +PF04817 Umbravirus long distance movement (LDM) family
Pfam-B_5103 (release 7.6). The long distance movement protein of Umbraviruses mediates the movement of viral RNA through the phloem of infected plants .. +PF00021 u-PAR/Ly-6 domain
This extracellular disulphide bond rich domain is related to Pfam:PF00087.. +PF00919 Uncharacterized protein family UPF0004
Pfam-B_1257 (release 3.0). This family is the N terminal half of the Prosite family. The C-terminal half has been shown to be related to MiaB proteins [1,2]. This domain is a nearly always found in conjunction with Pfam:PF04055 and Pfam:PF01938 although its function is uncertain.. +PF03649 Uncharacterised protein family (UPF0014)
+PF01169 Uncharacterized protein family UPF0016
This family contains integral membrane proteins of unknown function. Most members of the family contain two copies of a region that contains an EXGD motif. Each of these regions contains three predicted transmembrane regions.. +PF01170 Putative RNA methylase family UPF0020
This domain is probably a methylase. It is associated with the THUMP domain that also occurs with RNA modification domains .. +PF01171 UPF0021;ATP_bind3;
This family of proteins belongs to the PP-loop superfamily .. +PF01172 UPF0023;
Shwachman-Bodian-Diamond syndrome (SBDS) protein . This family is highly conserved in species ranging from archaea to vertebrates and plants. The family contains several Shwachman-Bodian-Diamond syndrome (SBDS) proteins from both mouse and humans. Shwachman-Diamond syndrome is an autosomal recessive disorder with clinical features that include pancreatic exocrine insufficiency, haematological dysfunction and skeletal abnormalities. It is characterised by bone marrow failure and leukemia predisposition. Members of this family play a role in RNA metabolism . In yeast these proteins have been shown to be critical for the release and recycling of the nucleolar shuttling factor Tif6 from pre-60S ribosomes, a key step in 60S maturation and translational activation of ribosomes . This data links defective late 60S subunit maturation to an inherited bone marrow failure syndrome associated with leukemia predisposition .. +PF01142 tRNA pseudouridine synthase D (TruD)
TruD is responsible for synthesis of pseudouridine from uracil-13 in transfer RNAs . The structure of TruD reveals an overall V-shaped molecule which contains an RNA-binding cleft .. +PF01139 UPF0027;
tRNA-splicing ligase RtcB. This family of RNA ligases (EC:6.5.1.3) join 2',3'-cyclic phosphate and 5'-OH ends. They catalyse the splicing of tRNA and may also participate in tRNA repair and recovery from stress-induced RNA damage [1-3].. +PF01205 Uncharacterized protein family UPF0029
+PF01256 UPF0031; carb_kinase;
This family is related to Pfam:PF02110 and Pfam:PF00294 implying that it also is a carbohydrate kinase. (personal obs Yeats C).. +PF00902 UPF0032;
Sec-independent protein translocase protein (TatC). Pfam-B_1212 (release 3.0). The bacterial Tat system has a remarkable ability to transport folded proteins even enzyme complexes across the cytoplasmic membrane. It is structurally and mechanistically similar to the Delta pH-driven thylakoidal protein import pathway. A functional Tat system or Delta pH-dependent pathway requires three integral membrane proteins: TatA/Tha4, TatB/Hcf106 and TatC/cpTatC. The TatC protein is essential for the function of both pathways. It might be involved in twin-arginine signal peptide recognition, protein translocation and proton translocation. Sequence analysis predicts that TatC contains six transmembrane helices (TMHs), and experimental data confirmed that N- and C-termini of TatC or cpTatC are exposed to the cytoplasmic or stromal face of the membrane. The cytoplasmic N-terminus and the first cytoplasmic loop region of the Escherichia coli TatC protein are essential for protein export. At least two TatC molecules co-exist within each Tat translocon .. +PF01206 UPF0033; SirA;
Sulfurtransferase TusA. This family includes the TusA sulfurtransferases .. +PF02381 UPF0040;
This small 70 amino acid domain is found duplicated in a family of bacterial proteins. These proteins may be DNA-binding transcription factors (Pers. comm. A Andreeva & A Murzin).. +PF03650 UPF0041;
Uncharacterised protein family (UPF0041). +PF03668 UPF0042; ATP_bind2;
P-loop ATPase protein family. This family contains an ATP-binding site and could be an ATPase (personal obs:C Yeats).. +PF01985 UPF0044;
CRS1 / YhbY (CRM) domain. Escherichia coli YhbY is associated with pre-50S ribosomal subunits, which implies a function in ribosome assembly. GFP fused to a single-domain CRM protein from maize localises to the nucleolus, suggesting that an analogous activity may have been retained in plants . A CRM domain containing protein in plant chloroplasts has been shown to function in group I and II intron splicing .\. In vitro experiments with an isolated maize CRM domain have shown it to have RNA binding activity. These and other results suggest that the CRM domain evolved in the context of ribosome function prior to the divergence of Archaea and Bacteria, that this function has been maintained in extant prokaryotes, and that the domain was recruited to serve as an RNA binding module during the evolution of plant genomes . YhbY has a fold similar to that of the C-terminal domain of translation initiation factor 3 (IF3C), which binds to 16S rRNA in the 30S ribosome .. +PF01894 Uncharacterised protein family UPF0047
This family has no known function. The alignment contains a conserved aspartate and histidine that may be functionally important.. +PF01458 Uncharacterized protein family (UPF0051)
Prodom_3219 (release 99.1). +PF01933 Uncharacterised protein family UPF0052
+PF02130 Uncharacterized protein family UPF0054
+PF01679 UPF0057;
Proteolipid membrane potential modulator. Pfam-B_2192 (release 4.1). Pmp3 is an evolutionarily conserved proteolipid in the plasma membrane which, in S. pombe, is transcriptionally regulated by the Spc1 stress MAPK (mitogen-activated protein kinases) pathway. It functions to modulate the membrane potential, particularly to resist high cellular cation concentration. In eukaryotic organisms, stress-activated mitogen-activated protein kinases play crucial roles in transmitting environmental signals that will regulate gene expression for allowing the cell to adapt to cellular stress. Pmp3-like proteins are highly conserved in bacteria, yeast, nematode and plants.. +PF01893 Uncharacterised protein family UPF0058
This archaebacterial protein has no known function.. +PF02694 Uncharacterised BCR, YnfA/UPF0060 family
+PF02696 Uncharacterized ACR, YdiU/UPF0061 family
+PF03401 UPF0065; Bug;
Tripartite tricarboxylate transporter family receptor. Pfam-B_3343 (release 6.6). These probable extra-cytoplasmic solute receptors are strongly overrepresented in several beta-proteobacteria . This family, formerly known as Bug - Bordetella uptake gene (bug) product - is a family of bacterial tripartite tricarboxylate receptors of the extracytoplasmic solute binding receptor-dependent transporter group of families, distinct from the ABC and TRAP-T families . The TctABC system has been characterised in S. typhimurium , and TctC is the extracytoplasmic tricarboxylate-binding receptor which binds the transporters TctA and TctB, two integral membrane proteins. Complete three-component systems are found only in bacteria .. +PF01980 Uncharacterised protein family UPF0066
+PF03006 UPF0073;
Haemolysin-III related. Pfam-B_1581 (release 6.4). Members of this family are integral membrane proteins. This family includes a protein with hemolytic activity from Bacillus cereus . It has been proposed that YOL002c encodes a Saccharomyces cerevisiae protein that plays a key role in metabolic pathways that regulate lipid and phosphate metabolism . In eukaryotes, members are seven-transmembrane pass molecules found to encode functional receptors with a broad range of apparent ligand specificities, including progestin and adipoQ receptors, and hence have been named PAQR proteins . The mammalian members include progesterone binding proteins . Unlike the case with GPCR receptor proteins, the evolutionary ancestry of the members of this family can be traced back to the Archaea.. +PF02082 UPF0074;
Transcriptional regulator. This family is related to Pfam:PF001022 and other transcription regulation families (personal obs: Yeats C).. +PF03702 Uncharacterised protein family (UPF0075)
The proteins is this family are about 370 amino acids long and have no known function.. +PF02367 Uncharacterised P-loop hydrolase UPF0079
This uncharacterised family contains a P-loop.. +PF03652 Uncharacterised protein family (UPF0081)
+PF01868 DUF49;
Domain of unknown function UPF0086. This family consists of several archaeal and eukaryotic proteins. The archaeal proteins are found to be expressed within ribosomal operons and several of the sequences are described as ribonuclease P protein subunit p29 proteins.. +PF03007 UPF0089;
Wax ester synthase-like Acyl-CoA acyltransferase domain. Pfam-B_1896 (release 6.4). This domain is found in wax ester synthase genes such as Swiss:Q8GGG1. In these proteins this domain catalyses the CoA dependent acyltransferase reaction with fatty alcohols to form wax esters .. +PF03653 Uncharacterised protein family (UPF0093)
+PF02016 UPF0094; Peptidase_U61;
Muramoyl-tetrapeptide carboxypeptidase hydrolyses a peptide bond between a di-basic amino acid and the C-terminal D-alanine in the tetrapeptide moiety in peptidoglycan. This cleaves the bond between an L- and a D-amino acid.\. The function of this activity is in murein recycling. This family also includes the microcin c7 self-immunity protein Swiss:Q47511. This family corresponds to Merops family S66.. +PF01981 DUF119;UPF0099; Pep-tRNA_hydrol;
Peptidyl-tRNA hydrolase PTH2. Peptidyl-tRNA hydrolases are enzymes that release tRNAs from peptidyl-tRNA during translation.. +PF02021 Uncharacterised protein family UPF0102
The function of this family is unknown.. +PF01875 DUF52; UPF0103;
This family contains members from all branches of life. The molecular function of this protein is unknown, but Memo (mediator of ErbB2-driven cell motility) a human protein is included in this family . It has been suggested that Memo controls cell migration by relaying extracellular chemotactic signals to the microtubule cytoskeleton .. +PF03706 Uncharacterised protein family (UPF0104)
This family of proteins are integral membrane proteins. These proteins are uncharacterised but contain a conserved PG motif. Some members of this family are annotated as dolichol-P-glucose synthetase and contain a Pfam:PF00535 domain.. +PF03656 UPF0108;
The Pam16 protein (Swiss:P42949) is the fifth essential subunit of the pre-sequence translocase-associated protein import motor (PAM) . In Saccharomyces cerevisiae, Pam16 is required for preprotein translocation into the matrix, but not for protein insertion into the inner membrane . Pam16 has a degenerate J domain. J-domain proteins play important regulatory roles as co-chaperones, recruiting Hsp70 partners and accelerating the ATP-hydrolysis step of the chaperone cycle . Pam16's J-like domain strongly interacts with Pam18's J domain, leading to a productive interaction of Pam18 with mtHsp70 at the mitochondria import channel . Pam18 stimulates the ATPase activity of mtHsp70.. +PF03657 Uncharacterised protein family (UPF0113)
+PF03350 Uncharacterized protein family, UPF0114
Pfam-B_3587 (release 6.5) & Pfam-B_10597 (release 10.0). +PF01594 DUF20;
Domain of unknown function DUF20. Pfam-B_495 (release 4.1). This transmembrane region is found in putative permeases and predicted transmembrane proteins it has no known function. It is not clear what source suggested that these proteins may be permeases and this information should be treated with caution.. +PF03715 UPF0120;
At least one member, Noc2p from yeast, is required for a late step in 60S subunit export from the nucleus . It has also been shown to co-precipitate with Nug1p, a nuclear GTPase also required for ribosome nucleus export . This family was formerly known as UPF0120.. +PF03661 Uncharacterised protein family (UPF0121)
Uncharacterised integral membrane protein family.. +PF04297 Putative helix-turn-helix protein, YlxM / p13 like
Members of this family are predicted to contain a helix-turn-helix motif, for example residues 37-55 in Mycoplasma mycoides p13 (Swiss:O05290). Genes encoding family members are often part of operons that encode components of the SRP pathway, and this protein may regulate the expression of an operon related to the SRP pathway .. +PF03660 UPF0123;
This family of proteins the superfamily of PHD-finger proteins. At least one example, from mouse, may act as a chromatin-associated protein . The S. pombe ini1 gene is essential, required for splicing . It is localised in the nucleus, but not detected in the nucleolus and can be complemented by human ini1 .. +PF03658 UPF0125;
RnfH family Ubiquitin. A member of the RnfH family of the ubiquitin superfamily. Members of this family strongly co-occur in two distinct gene neighborhood contexts. In one it is associated with a START domain protein, a membrane protein SmpA and the transfer mRNA binding protein SmpB. This association suggests a possible role in the SmpB-tmRNA-based tagging and degadation system of bacteria, which is interesting given that other members of the ubiquitin system are analogously involved in protein-tagging and degradation across eukaryotes and various prokaryotes. The second context in which the RnfH genes are present is in a membrane associated complex involved in transporting electrons for various reductive reactions such as nitrogen fixation .. +PF03458 UPF0126 domain
Domain always found as pair in bacterial membrane proteins of unknown function. This domain contains three transmembrane helices. The conserved glycines are suggestive of an ion channel (C. Yeats unpublished obs.).. +PF03673 Uncharacterised protein family (UPF0128)
The members of this family are about 240 amino acids in length. The proteins are as yet uncharacterised.. +PF03647 UPF0136; TMEM14;
Transmembrane proteins 14C. Pfam-B_2984 (release 7.0). This family of short membrane proteins are as yet uncharacterised.. +PF03677 Uncharacterised protein family (UPF0137)
This family includes GP6-D a virulence plasmid encoded protein.. +PF03669 Uncharacterised protein family (UPF0139)
+PF03686 Uncharacterised protein family (UPF0146)
The function of this family of proteins is unknown.. +PF03685 Uncharacterised protein family (UPF0147)
This family of small proteins have no known function.. +PF03695 Uncharacterised protein family (UPF0149)
The protein in this family are about 190 amino acids long. The function of these proteins is unknown.. +PF03681 Uncharacterised protein family (UPF0150)
This family of small proteins is uncharacterised. In Swiss:Q9A3L8 this domain is found next to a DNA binding helix-turn-helix domain Pfam:PF01402, which suggests that this is some kind of ligand binding domain. The structure of this domain suggests that these domains oligomerise and due to structural similarities may bind to RNA. The monomer adopts an alpha-beta-beta-beta-alpha fold and forms a homotetramer. Based on the properties and functions of structural homologues of the HB8 monomer, the protein is speculated to be involved in RNA metabolism, including RNA binding and cleavage .. +PF03692 UPF0153; FliB;
Putative zinc- or iron-chelating domain. This family of proteins contains 8 conserved cysteines. It has in the past been annotated as being one of the complex of proteins of the flagellar Fli complex. However this was due to a mis-annotation of the original Salmonella LT2 Genbank entry of 'fliB'. With all its conserved cysteines it is possibly a domain that chelates iron or zinc ions.. +PF03672 Uncharacterised protein family (UPF0154)
This family contains a set of short bacterial proteins of unknown function.. +PF03693 Uncharacterised protein family (UPF0156)
This family of proteins are about 80 amino acids in length and their function is unknown. The proteins contain a conserved GRY motif. This family appears to be related to ribbon-helix-helix DNA-binding proteins.. +PF04229 UPF0157;
This family has been suggested to belong to the nucleotidyltransferase superfamily . It occurs at the C-terminus of dephospho-CoA kinase (CoaE) in a number of cases, where it plays a role in the proper folding of the enzyme .. +PF03682 Uncharacterised protein family (UPF0158)
+PF03690 Uncharacterised protein family (UPF0160)
This family of proteins contains a large number of metal binding residues. The patterns are suggestive of a phosphoesterase function. The conserved DHH motif may mean this family is related to Pfam:PF01368.. +PF03687 Uncharacterised protein family (UPF0164)
This family of uncharacterised proteins are only found in Treponema pallidum. They contain a putative signal peptide so may be secreted proteins.. +PF03691 Uncharacterised protein family (UPF0167)
The proteins in this family are about 200 amino acids long and each contain 3 CXXC motifs.. +PF03666 UPF0171;
Nitrogen Permease regulator of amino acid transport activity 3. This family, also known in yeasts as Rmd11, complexes with NPR2, Pfam:PF06218. This complex heterodimer is responsible for inactivating TORC1. an evolutionarily conserved protein complex that controls cell size via nutritional input signals, specifically, in response to amino acid starvation.. +PF03665 Uncharacterised protein family (UPF0172)
In Chlamydomonas reinhardtii the protein TLA1 (truncated light-harvesting chlorophyll antenna size) apparently regulates genes that define the chlorophyll-a antenna size in the photosynthetic apparatus . This family was formerly known as UPF0172.. +PF02476 US2 family
Pfam-B_2256 (release 5.4). This is a family of unique short (US) region proteins from the herpesvirus strain. The US2 family have no known function. . +PF03683 Uncharacterised protein family (UPF0175)
This family contains small proteins of unknown function.. +PF03698 Uncharacterised protein family (UPF0180)
The members of this family are small uncharacterised proteins.. +PF03701 Uncharacterised protein family (UPF0181)
This family contains small proteins of about 50 amino acids of unknown function.\. The family includes YoaH Swiss:P76260.. +PF03670 Uncharacterised protein family (UPF0184)
+PF04050 Up-frameshift suppressor 2
Pfam-B_14721 (release 7.3);. Transcripts harbouring premature signals for translation termination are recognised and rapidly degraded by eukaryotic cells through a pathway known as nonsense-mediated mRNA decay. In Saccharomyces cerevisiae, three trans-acting factors (Upf1 to Upf3) are required for nonsense-mediated mRNA decay .. +PF01255 UPF0015;UPP_synthetase;
Putative undecaprenyl diphosphate synthase. Previously known as uncharacterized protein family UPF0015, a single member of this family Swiss:O82827 has been identified as an undecaprenyl diphosphate synthase .. +PF00449 urease;
Urease alpha-subunit, N-terminal domain. The N-terminal domain is a composite domain and plays a major trimer stabilising role by contacting the catalytic domain of the symmetry related alpha-subunit.. +PF00699 Urease beta subunit
Pfam-B_405 (release 2.1). This subunit is known as alpha in Heliobacter.. +PF00547 urease_gamma;
Urease, gamma subunit. Urease is a nickel-binding enzyme that catalyses the hydrolysis of urea to carbon dioxide and ammonia.. +PF01774 UreD urease accessory protein
Pfam-B_1109 (release 4.2). UreD is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid . UreD is involved in activation of the urease enzyme via the UreD-UreF-UreG-urease complex and is required for urease nickel metallocenter assembly . See also UreF Pfam:PF01730, UreG Pfam:PF01495. . +PF05194 UreE urease accessory protein, C-terminal domain
Pfam-B_6279 (release 6.1). UreE is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid. The C-terminal region of members of this family contains a His rich Nickel binding site.. +PF01730 UreF
Pfam-B_2037 (release 4.1). This family consists of the Urease accessory protein UreF. The urease enzyme (urea amidohydrolase) hydrolyses urea into ammonia and carbamic acid . UreF is proposed to modulate the activation process of urease by eliminating the binding of nickel irons to noncarbamylated protein .. +PF04115 Ureidoglycolate hydrolase
Pfam-B_9183 (release 7.3);. Ureidoglycolate hydrolase (EC:3.5.3.19) carried out the third step in the degradation of allantoin.. +PF01014 Uricase
Pfam-B_1333 (release 3.0). +PF01208 Uroporphyrinogen decarboxylase (URO-D)
+PF01175 Urocanase
+PF02083 Urotensin II
+PF02393 US22 like
Pfam-B_1016 (release 5.2). US22 proteins have been found across many animal DNA viruses and some vertebrates . The name sake of this family US22 Swiss:P09722 is an early nuclear protein that is secreted from cells . The US22 family may have a role in virus replication and pathogenesis . Domain analysis showed that US22 proteins\. usually contain two copies of conserved modules which is homologous to several other families like SMI1 and SYD (commonly called SUKH superfamily) . Bacterial operon analysis revealed that all bacterial SUKH members function as immunity proteins against various toxins. Thus US22 family is predicted to counter diverse anti-viral responses by interacting with specific host proteins . . +PF00577 Outer membrane usher protein
MRC-LMB Genome group and Prosite. In Gram-negative bacteria the biogenesis of fimbriae (or pili) requires a two- component assembly and transport system which is composed of a periplasmic chaperone and an outer membrane protein which has been termed a molecular 'usher' [1-3]. The usher protein is rather large (from 86 to 100 Kd) and seems to be mainly composed of membrane-spanning beta-sheets, a structure reminiscent of porins. Although the degree of sequence similarity of these proteins is not very high they share a number of characteristics. One of these is the presence of two pairs of cysteines, the first one located in the N-terminal part and the second at the C-terminal extremity that are probably involved in disulphide bonds. The best conserved region is located in the central part of these proteins [4-5].. +PF04871 Uso1 / p115 like vesicle tethering protein, C terminal region
Pfam-B_6073 (release 7.6). Also known as General vesicular transport factor, Transcytosis associate protein (TAP) and Vesicle docking protein, this myosin-shaped molecule consists of an N-terminal globular head region, a coiled-coil tail which mediates dimerisation, and a short C-terminal acidic region . p115 tethers COP1 vesicles to the Golgi by binding the coiled coil proteins giantin (on the vesicles) and GM130 (on the Golgi), via its C-terminal acidic region. It is required for intercisternal transport in the golgi stack. This family consists of the acidic C-terminus, which binds to the golgins giantin and GM130. p115 is thought to juxtapose two membranes by binding giantin with one acidic region, and GM130 with another .. +PF04869 Uso1 / p115 like vesicle tethering protein, head region
Pfam-B_6073 (release 7.6). Also known as General vesicular transport factor, Transcytosis associated protein (TAP) and Vesicle docking protein, this myosin-shaped molecule consists of an N-terminal globular head region, a coiled-coil tail which mediates dimerisation, and a short C-terminal acidic region . p115 tethers COP1 vesicles to the Golgi by binding the coiled coil proteins giantin (on the vesicles) and GM130 (on the Golgi), via its C-terminal acidic region. It is required for intercisternal transport in the golgi stack.\. This family consists of part of the head region. The head region is highly conserved, but its function is unknown. It does not seem to be essential for vesicle tethering . The N-terminal part of the head region, not within this family, contains context-detected Armadillo/beta-catenin-like repeats (Pfam:PF00514).. +PF00582 Universal stress protein family
MRC-LMB Genome group. The universal stress protein UspA Swiss:P28242 is a small cytoplasmic bacterial protein whose expression is enhanced when the cell is exposed to stress agents. UspA enhances the rate of cell survival during prolonged exposure to such conditions, and may provide a general "stress endurance" activity. The crystal structure of Haemophilus influenzae UspA reveals an alpha/beta fold similar to that of the Methanococcus jannaschii MJ0577 protein, which binds ATP , though UspA lacks ATP-binding activity.. +PF03253 Urea transporter
Pfam-B_3193 (release 6.5). Members of this family transport urea across membranes. The family includes a bacterial homologue Swiss:Q9S408. . +PF01099 Uterglobin;
Uteroglobin is a homodimer of two identical 70 amino acid polypeptides linked by two disulphide bridges. The precise role of uteroglobin has still to be elucidated .. +PF03998 Utp11 protein
Pfam-B_6404 (release 7.3). This protein is found to be part of a large ribonucleoprotein complex containing the U3 snoRNA . Depletion of the Utp proteins impedes production of the 18S rRNA, indicating that they are part of the active pre-rRNA processing complex. This large RNP complex has been termed the small subunit (SSU) processome .. +PF04003 Dip2/Utp12 Family
Pfam-B_10105 (release 7.3). This domain is found at the C-terminus of proteins containing WD40 repeats. These proteins are part of the U3 ribonucleoprotein the yeast protein is called Utp12 or DIP2 Swiss:Q12220 .. +PF04615 Utp14 protein
Pfam-B_5404 (release 7.4). This protein is found to be part of a large ribonucleoprotein complex containing the U3 snoRNA . Depletion of the Utp proteins impedes production of the 18S rRNA, indicating that they are part of the active pre-rRNA processing complex. This large RNP complex has been termed the small subunit (SSU) processome .. +PF03851 UV-endonuclease UvdE
TIGRFAMs, Griffiths-Jones SR. +PF00580 UvrD/REP helicase N-terminal domain
MRC-LMB Genome group.. The Rep family helicases are composed of four structural domains. The Rep family function as dimers. REP helicases catalyse ATP dependent unwinding of double stranded DNA to single stranded DNA. Swiss:P23478, Swiss:P08394 have large insertions near to the carboxy-terminus relative to other members of the family.. +PF02614 Glucuronate isomerase
This is a family of Glucuronate isomerases also known as D-glucuronate isomerase, uronic isomerase, uronate isomerase, or uronic acid isomerase, EC:5.3.1.12. This enzyme catalyses the reactions: D-glucuronate <=> D-fructuronate and D-galacturonate <=> D-tagaturonate. It is not however clear where the experimental evidence for this functional assignment came from and thus this family has no literature reference.. +PF03786 D-mannonate dehydratase (UxuA)
UxuA (this family) and UxuB are required for hexuronate degradation.. +PF03223 V-ATPase subunit C
Pfam-B_2945 (release 6.5). +PF03179 Vacuolar (H+)-ATPase G subunit
Pfam-B_1274 (release 6.5). This family represents the eukaryotic vacuolar (H+)-ATPase (V-ATPase) G subunit. V-ATPases generate an acidic environment in several intracellular compartments. Correspondingly, they are found as membrane-attached proteins in several organelles. They are also found in the plasma membranes of some specialised cells. V-ATPases consist of peripheral (V1) and membrane integral (V0) heteromultimeric complexes. The G subunit is part of the V1 subunit, but is also thought to be strongly attached to the V0 complex. It may be involved in the coupling of ATP degradation to H+ translocation.. +PF03224 V-ATPase_H;
Pfam-B_2481 (release 6.5). The yeast Saccharomyces cerevisiae vacuolar H+-ATPase (V-ATPase) is a multisubunit complex responsible for acidifying organelles. It functions as an ATP dependent proton pump that transports protons across a lipid bilayer. This domain corresponds to the N terminal domain of the H subunit of V-ATPase. The N-terminal domain is required for the activation of the complex whereas the C-terminal domain is required for coupling ATP hydrolysis to proton translocation .. +PF01639 Viral family 110
Pfam-B_1518 (release 4.1). This family of viral proteins is known as the 110 family . The function of members of this family is unknown. The family contains a central cysteine rich region with eight conserved cysteines. Some members of the family contains two copies of the cysteine rich region Swiss:P18560. . +PF03402 Vomeronasal organ pheromone receptor family, V1R
Pfam-B_3057 (release 6.6). This family represents one of two known vomeronasal organ receptor families, the V1R family (after ).. +PF02830 V4R domain
The V4R (vinyl 4 reductase) domain is a predicted small molecular binding domain, that may bind to hydrocarbons .. +PF01496 V_ATPase_sub_a;
V-type ATPase 116kDa subunit family . Pfam-B_446 (release 4.0). This family consists of the 116kDa V-type ATPase (vacuolar (H+)-ATPases) subunits, as well as V-type ATP synthase subunit i. The V-type ATPases family are proton pumps that acidify intracellular compartments in eukaryotic cells for example yeast central vacuoles, clathrin-coated and synaptic vesicles. They have important roles in membrane trafficking processes . The 116kDa subunit (subunit a) in the V-type ATPase is part of the V0 functional domain responsible for proton transport. The a subunit is a transmembrane glycoprotein with multiple putative transmembrane helices it has a hydrophilic amino terminal and a hydrophobic carboxy terminal [1,2]. It has roles in proton transport and assembly of the V-type ATPase complex [1,2]. This subunit is encoded by two homologous gene in yeast VPH1 and STV1 .. +PF02346 Chordopoxvirus fusion protein
Pfam-B_822 (release 5.2). This is a family of viral fusion proteins from the chordopoxviruses. Swiss:P26312 a 14-kDa Vaccinia Virus protein has been demonstrated to function as a viral fusion protein mediating cell fusion at endosmomal (low) pH .. +PF02691 Vacuolating cyotoxin
Pfam-B_436 (release 5.5). This family consists of Vacuolating cyotoxin proteins form Proteobacteria. These proteins are an important virulence determinate in H. pylori and induce cytoplasmic vacuolation in a variety of mammalian cell lines .. +PF03077 VACA;
Putative vacuolating cytotoxin. Pfam-B_2866 (release 6.4). This family contains a number of Helicobacter outer membrane proteins with multiple copies of this small conserved region.. +PF04333 VacJ like lipoprotein
VacJ is required for the intercellular spreading of Shigella flexneri. It is attached to the outer membrane by a lipid anchor .. +PF04294 VanW like protein
Family members include vancomycin resistance protein W (VanW). Genes encoding members of this family have been found in vancomycin resistance gene clusters vanB and vanG . The function of VanW is unknown.. +PF02557 D-alanyl-D-alanine carboxypeptidase
+PF04892 VanZ like family
Pfam-B_5529 (release 7.6). This family contains several examples of the VanZ protein, but also contains examples of phosphotransbutyrylases .. +PF03490 Variant-surface-glycoprotein phospholipase C
+PF01992 ATP synthase (C/AC39) subunit
This family includes the AC39 subunit from vacuolar ATP synthase Swiss:P32366 , and the C subunit from archaebacterial ATP synthase . The family also includes subunit C from the Sodium transporting ATP synthase from Enterococcus hirae Swiss:P43456 .. +PF01991 ATP synthase (E/31 kDa) subunit
This family includes the vacuolar ATP synthase E subunit , as well as the archaebacterial ATP synthase E subunit .. +PF01505 Major Vault Protein repeat
The vault is a ubiquitous and highly conserved ribonucleoprotein particle of approximately 13 mDa of unknown function . This family corresponds to a repeat found in the amino terminal half of the major vault protein.. +PF01847 von Hippel-Lindau disease tumour suppressor protein
VHL forms a ternary complex with the elonginB Swiss:O44226 and elonginC Swiss:O13292 proteins. This complex binds Cul2, which then is involved in regulation of vascular endothelial growth factor Swiss:P15692 mRNA.. +PF02209 Villin headpiece domain
Alignment kindly provided by SMART. +PF04702 Vicilin N terminal region
This region is found in plant seed storage proteins, N-terminal to the Cupin domain (Pfam:PF00190). In Macadamia integrifolia (Swiss:Q9SPL4), this region is processed into peptides of approximately 50 amino acids containing a C-X-X-X-C-(10-12)X-C-X-X-X-C motif. These peptides exhibit antimicrobial activity in vitro .. +PF00559 Retroviral Vif (Viral infectivity) protein
Human immunodeficiency virus type 1 (HIV-1) Vif is required for productive infection of T lymphocytes and macrophages. Virions produced in the absence of Vif have abnormal core morphology and those produced in primary T cells carry immature core proteins and low levels of mature capsid.. +PF01044 Vinculin family
Pfam-B_1420 (release 3.0). +PF02236 Vir_DNA_binding;
Viral DNA-binding protein, all alpha domain. Pfam-B_1651 (release 5.2). This family represents a domain of the viral DNA- binding protein, a multi functional protein involved in DNA replication and transcription control.. +PF03728 Vir_DNA_Zn_bind;
Viral DNA-binding protein, zinc binding domain. Pfam-B_1651 (release 5.2). This family represents the zinc binding domain of the viral DNA- binding protein, a multi functional protein involved in DNA replication and transcription control. Two copies of this domain are found at the C-terminus of many members of the family.. +PF00426 VP4;
Outer Capsid protein VP4 (Hemagglutinin). Pfam-B_161 (release 1.0). +PF03225 Vir_Hsp90;
Viral heat shock protein Hsp90 homologue . Pfam-B_2880 (release 6.5). +PF04530 Viral Beta C/D like family
Pfam-B_4973 (release 7.5). Family of ssRNA positive-strand viral proteins. Conserved region found in the Beta C and Beta D transcripts.. +PF00729 Viral coat protein (S domain)
Pfam-B_870 (release 2.1). +PF00747 viral_DNA_bp;
ssDNA binding protein. Pfam-B_490 (release 2.1). This protein is found in herpesviruses and is needed for replication.. +PF01443 Viral (Superfamily 1) RNA helicase
Prodom_1256 (release 99.1). Helicase activity for this family has been demonstrated and NTPase activity . This helicase has multiple roles at different stages of viral RNA replication, as dissected by mutational analysis .. +PF04521 ssRNA positive strand viral 18kD cysteine rich protein
Pfam-B_2612 (release 7.5). +PF00998 HCV_RdRP; Viral_RdRP;
Viral RNA dependent RNA polymerase. Pfam-B_315 (release 3.0). This family includes viral RNA dependent RNA polymerase enzymes from hepatitis C virus and various plant viruses.. +PF02407 Putative viral replication protein
Pfam-B_1223 (release 5.2). This is a family of viral ORFs from various plant and animal ssDNA circoviruses. Published evidence to support the annotated function "viral replication associated protein" has not be found.. +PF05101 Type IV secretory pathway, VirB3-like protein
This family includes the Type IV secretory pathway VirB3 protein, that is found associated with bacterial inner and outer membranes . The family also includes the conjugal transfer protein TrbD family that contains a nucleotide binding motif and may provide energy for the export of DNA or the export of other Trb proteins .. +PF04335 VirB8 protein
Pfam-B_1984 (release 7.3). VirB8 is a bacterial virulence protein with cytoplasmic, transmembrane, and periplasmic regions. It is thought that it is a primary constituent of a DNA transporter. The periplasmic region interacts with VirB9, VirB10, and itself . This family also includes the conjugal transfer protein family TrbF, a family of proteins known to be involved in conjugal transfer. The TrbF protein is thought to compose part of the pilus required for transfer . This domain has a similar fold to the NTF2 protein.. +PF00286 virus_P-coat; Virus_P-coat;
Family includes coat proteins from Potexviruses and carlaviruses.. +PF01347 Lipoprotein amino terminal region
Pfam-B_1280 (release 3.0). This family contains regions from: Vitellogenin, Microsomal triglyceride transfer protein and apolipoprotein B-100. These proteins are all involved in lipid transport . This family contains the LV1n chain from lipovitellin, that contains two structural domains.. +PF05090 Vitamin K-dependent gamma-carboxylase
Pfam-B_6307 (release 7.7). Using reduced vitamin K, oxygen, and carbon dioxide, gamma-glutamyl carboxylase post-translationally modifies certain glutamates by adding carbon dioxide to the gamma position of those amino acids. In vertebrates, the modification of glutamate residues of target proteins is facilitated by an interaction between a propeptide present on target proteins and the gamma-glutamyl carboxylase .. +PF04649 Mycoplasma hyorhinis VlpA repeat
This repeat is found in the extracellular (C-terminal) region of the variant surface antigen A (VlpA) of Mycoplasma hyorhinis. Mutations that change the number of repeats in the protein are involved in antigenic variation and immune evasion of this swine pathogen .. +PF01660 Viral methyltransferase
This RNA methyltransferase domain is found in a wide range of ssRNA viruses, including Hordei-, Tobra-, Tobamo-, Bromo-, Clostero- and Caliciviruses. This methyltransferase is involved in mRNA capping. Capping of mRNA enhances its stability. This usually occurs in the nucleus. Therefore, many viruses that replicate in the cytoplasm encode their own . This is a specific guanine-7-methyltransferase domain involved in viral mRNA cap0 synthesis. Specificity for guanine 7 position is shown by NMR in and in vivo role in cap synthesis . Based on secondary structure prediction, the basic fold is believed to be similar to the common AdoMet-dependent methyltransferase fold . A curious feature of this methyltransferase domain is that it together with flanking sequences seems to have guanylyltransferase activity coupled to the methyltransferase activity . The domain is found throughout the so-called Alphavirus superfamily, (including alphaviruses and several other groups). It forms the defining, unique feature of this superfamily .. +PF00695 Major surface antigen from hepadnavirus
Pfam-B_168 (release 2.1). +PF03762 Vitelline membrane outer layer protein I (VOMI)
Pfam-B_3481 (release 7.0). VOMI binds tightly to ovomucin fibrils of the egg yolk membrane. The structure that consists of three beta-sheets forming Greek key motifs, which are related by an internal pseudo three-fold symmetry. Furthermore, the structure of VOMI has strong similarity to the structure of the delta-endotoxin, as well as a carbohydrate-binding site in the top region of the common fold . . +PF00434 Glycoprotein VP7
Pfam-B_116 (release 1.0). +PF00522 VPR/VPX protein
Pfam-B_100 (release 1.0). +PF03643 Vacuolar protein sorting-associated protein 26
Pfam-B_4396 (release 7.0). Vacuolar protein sorting-associated protein (Vps) 26 is one of around 50 proteins involved in protein trafficking. In particular, Vps26 assembles into a retromer complex with at least four other proteins Vps5, Vps17, Vps29 and Vps35 . This family also contains Down syndrome critical region 3/A.. +PF03997 VPS28 protein
Pfam-B_6317 (release 7.3). +PF04133 Vacuolar protein sorting 55
Pfam-B_25168 (release 7.3);. Vps55 is involved in the secretion of the Golgi form of the soluble vacuolar carboxypeptidase Y, but not the trafficking of the membrane-bound vacuolar alkaline phosphatase. Both Vps55 and obesity receptor gene-related protein are important for functioning membrane trafficking to the vacuole/lysosome of eukaryotic cells .. +PF00558 Vpu protein
The Vpu protein contains an N-terminal transmembrane spanning region and a C-terminal cytoplasmic region. The HIV-1 Vpu protein stimulates virus production by enhancing the release of viral particles from infected cells. The VPU protein binds specifically to CD4.. +PF03852 DNA mismatch endonuclease Vsr
TIGRFAMs, Griffiths-Jones SR. +PF00093 vwc;
von Willebrand factor type C domain. The high cutoff was used to prevent overlap with Pfam:PF00094.. +PF02020 IF5_eIF4_eIF2;
eIF4-gamma/eIF5/eIF2-epsilon. This domain of unknown function is found at the C-terminus of several translation initiation factors .. +PF03716 WCCH_motif;
Pfam-B_3194 (release 7.0). The WCCH motif is found in a retrotransposons and Gemini viruses. A specific function has not been associated to this motif .. +PF00458 WHEP-TRS domain
+PF00110 wnt family
Wnt genes have been identified in vertebrates and invertebrates but not in plants, unicellular eukaryotes or prokaryotes. In humans, 19 WNT proteins are known. Because of their insolubility little is known about Wnt protein structure, but all have 23 or 24 Cys residues whose spacing is highly conserved. Signal transduction by Wnt proteins (including the Wnt/beta-catenin, the Wnt/Ca++, and the Wnt/polarity pathway) is mediated by receptors of the Frizzled and LDL-receptor-related protein (LRP) families .. +PF01822 WSC domain
This domain may be involved in carbohydrate binding.. +PF02165 Wilm's tumour protein
+PF03303 WTF protein
Pfam-B_4183 (release 6.5). This is a family of hypothetical Schizosaccharomyces pombe proteins. Their function is unknown.. +PF04932 O-Antigen ligase
Pfam-B_5033 (release 7.6). This group of bacterial proteins is involved in the synthesis of O-antigen, a lipopolysaccharide found in the outer membrane in gram-negative bacteria. This family includes O-antigen ligases such as E. coli RfaL .. +PF00739 Trans-activation protein X
Pfam-B_458 (release 2.1). This protein is found in hepadnaviruses where it is indispensable for replication.. +PF00860 xan_ur_permease;
Pfam-B_1593 (release 2.1). This family includes permeases for diverse substrates such as xanthine Swiss:P42086, uracil Swiss:P39766 and vitamin C Swiss:Q9UGH3. However many members of this family are functionally uncharacterised and may transport other substrates. Members of this family have ten predicted transmembrane helices.. +PF04921 XAP5, circadian clock regulator
Pfam-B_4702 (release 7.6). This protein is found in a wide range of eukaryotes. It is a nuclear protein and is suggested to be DNA binding [1,2]. In plants, this family is essential for correct circadian clock functioning by acting as a light-quality regulator coordinating the activities of blue and red light signalling pathways during plant growth - inhibiting growth in red light but promoting growth in blue light .. +PF02625 DUF182;
XdhC and CoxI family. This domain is often found in association with an NAD-binding region, related to TrkA-N (Pfam:PF02254; personal obs:C. Yeats). XdhC is believed to be involved in the attachment of molybdenum to Xanthine Dehydrogenase ( ).. +PF03894 D-xylulose 5-phosphate/D-fructose 6-phosphate phosphoketolase
Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 . This family is distantly related to transketolases e.g. Pfam:PF02779.. +PF03469 XH domain
The XH (rice gene X Homology) domain is found in a family of plant proteins including gene X Swiss:Q9SBW2. The molecular function of these proteins is unknown. However these proteins usually contain an XS domain that is also found in the PTGS protein SGS3. This domain contains a conserved glutamate residue that may be functionally important.. +PF03468 XS domain
The XS (rice gene X and SGS3) domain is found in a family of plant proteins including gene X Swiss:Q9SBW2 and SGS3 Swiss:Q9LDX1. SGS3 is thought to be involved in post-transcriptional gene silencing (PTGS). This domain contains a conserved aspartate residue that may be functionally important. The XS domain has recently been predicted to possess an RRM-like RNA-binding domain by fold recognition.. +PF04555 Restriction endonuclease XhoI
This family consists of type II restriction enzymes (EC:3.1.21.4) that recognise the double-stranded sequence CTCGAG and cleave after C-1.. +PF00193 Extracellular link domain
Swissprot_feature_table. +PF00867 XPG I-region
Pfam-B_776 (release 3.0). +PF00752 XPG N-terminal domain
Pfam-B_491 (release 2.1). +PF01834 XRCC1 N terminal domain
+PF02162 Rhodopsin_C;
XYPPX repeat (two copies). This repeat is found in a wide variety of proteins and generally consists of the motif XYPPX where X can be any amino acid. The family includes annexin VII Swiss:P24639, the carboxy tail of certain rhodopsins Swiss:Q17094. This family also includes plaque matrix proteins, however this motif is embedded in a ten residue repeat in Swiss:Q25460. The molecular function of this repeat is unknown. It is also not clear is all the members of this family share a common evolutionary ancestor due to its short length and biased amino acid composition.. +PF04690 YABBY protein
Pfam-B_5698 (release 7.5). YABBY proteins are a group of plant-specific transcription involved in the specification of abaxial polarity in lateral organs [1,2].. +PF03895 YadA; YadA_C;
YadA-like C-terminal region. This region represents the C-terminal 120 amino acids of a family of surface-exposed bacterial proteins. YadA, an adhesin from Yersinia, was the first member of this family to be characterised. UspA2 from Moraxella was second. The Eib immunoglobulin-binding proteins from E. coli were third, followed by the DsrA proteins of Haemophilus ducreyi and others. These proteins are homologous at their C-terminal and have predicted signal sequences, but they diverge elsewhere. The C-terminal 9 amino acids, consisting of alternating hydrophobic amino acids ending in F or W, comprise a targeting motif for the outer membrane of the Gram negative cell envelope. This region is important for oligomerisation .. +PF02699 DUF219;
Preprotein translocase subunit. +PF04073 YbaK;
Aminoacyl-tRNA editing domain. This domain is found either on its own or in association with the tRNA synthetase class II core domain (Pfam:PF00587). It is involved in the tRNA editing of mis-charged tRNAs including Cys-tRNA(Pro), Cys-tRNA(Cys), Ala-tRNA(Pro)[2-5]. The structure of this domain shows a novel fold .. +PF02392 Ycf4
Pfam-B_1026 (release 5.2). This family consists of hypothetical Ycf4 proteins from various chloroplast genomes. It has been suggested that Ycf4 is involved in the assembly and/or stability of the photosystem I complex in chloroplasts .. +PF01737 YCF9;
Pfam-B_2211 (release 4.1). This family consists of the hypothetical protein product of the YCF9 gene from chloroplasts and cyanobacteria. These proteins have no known function.. +PF03795 YCII-related domain
+PF02182 G9a; YDG_SRA;
Iyer LM, Aravind L, SMART. Alignment kindly provided by SMART. The domain goes by several names including SAD , SRA and YDG . It adopts a beta barrel, modified PUA-like, fold that is widely present in eukaryotic chromatin proteins and in bacteria . Versions of this domain are known to bind hemi-methylated CpG dinucleotides and also other 5mC containing dinucleotides. The domain binds DNA by flipping out the methylated cytosine base from the DNA double helix .The conserved tyrosine and aspartate residues and a glycine rich patch are critical for recognition of the flipped out base . Mammalian UHRF1 that contains this domain plays an important role in maintenance of methylation at CpG dinucleotides by recruiting DNMT1 to hemimethylated sites\. associated with replication forks . The SAD/SRA domain has been combined with other domains involved in the ubiquitin pathway on multiple occasions and such proteins link recognition of DNA methylation to chromatin-protein ubiquitination . The domain is also found in species that lack DNA methylation, such as certain apicomplexans, suggestive of other DNA-binding modes or functions . A highly derived and distinct version of the domain is also found in fungi where it is fused to AlkB-type 2OGFeDO domains . In bacteria, the domain is usually fused or associated with restriction endonucleases, many of which target methylated or\. hemi-methylated DNA .. +PF04794 YdjC-like protein
Pfam-B_5925 (release 7.5). Family of YdjC-like proteins. This region is possibly involved in the the cleavage of cellobiose-phosphate .. +PF00399 yeast_PIR;
Yeast PIR protein repeat. +PF03366 YEATS family
Pfam-B_2273 (release 6.6). We have named this family the YEATS family, after `YNK7', `ENL', `AF-9', and `TFIIF small subunit'. This family also contains the GAS41 protein. All these proteins are thought to have a transcription stimulatory activity. +PF03543 YerHae_surfAg; SurfAg;
Yersinia/Haemophilus virulence surface antigen. +PF03545 Yers_vir_YopE;
Yersinia virulence determinant (YopE). +PF03887 YfbU domain
This presumed domain is about 160 residues long. It is found in archaebacteria and eubacteria. In Swiss:Q9EUM2 it is associated with a helix-turn-helix domain. This suggests that this may be a ligand binding domain.. +PF02542 YgbB family
The ygbB protein is a putative enzyme of deoxy-xylulose pathway (terpenoid biosynthesis) .. +PF02325 YGGT family
Pfam-B_983 (release 5.2). This family consists of a repeat found in conserved hypothetical integral membrane proteins. The function of this region and the proteins which possess it is unknown.. +PF04945 YHS domain
This short presumed domain is about 50 amino acid residues long. It often contains two cysteines that may be functionally important. This domain is found in copper transporting ATPases, some phenol hydroxylases and in a set of uncharacterised membrane proteins including Swiss:Q9CNI0. This domain is named after three of the most conserved amino acids it contains. The domain may be metal binding, possibly copper ions. This domain is duplicated in some copper transporting ATPases.. +PF03755 YicC_N-term;
YicC-like family, N-terminal region . Pfam-B_3743 (release 7.0). Family of bacterial proteins. Although poorly characterised, the members of this protein family have been demonstrated to play a role in stationary phase survival . These proteins are not essential during stationary phase .. +PF03853 YjeF-related protein N-terminus
TIGRFAMs, Griffiths-Jones SR. YjeF-N domain is a novel version of the Rossmann fold with a set of catalytic residues and structural features that are different from the conventional dehydrogenases . YjeF-N domain is fused to Ribokinases in bacteria (YjeF), where they may be phosphatases, and to divergent Sm and the FDF domain in eukaryotes (Dcp3p and FLJ21128) , where they may be involved in decapping and catalyze hydrolytic RNA-processing reactions .. +PF03739 Predicted permease YjgP/YjgQ family
Members of this family are predicted integral membrane proteins of unknown function.\. They are about 350 amino acids long and contain about 6 transmembrane regions. They are predicted to be permeases although there is no verification of this.. +PF02326 Plant ATP synthase F0
Pfam-B_984 (release 5.2). This family corresponds to subunit 8 (YMF19) of the F0 complex of plant and algae mitochondrial F-ATPases (EC:3.6.1.34).. +PF01514 Secretory protein of YscJ/FliF family
Pfam-B_736 (release 4.0). This family includes proteins that are related to the YscJ lipoprotein, and the amino terminus of FliF, the flageller M-ring protein. The members of the YscJ family are thought to be involved in secretion of several proteins.\. The FliF protein ring is thought to be part of the export apparatus for flageller proteins, based on the similarity to YscJ proteins .. +PF04650 YSIRK type signal peptide
Pfam-B_3441 (release 7.5). Many surface proteins found in Streptococcus, Staphylococcus, and related lineages share apparently homologous signal sequences. A motif resembling [YF]SIRKxxxGxxS[VIA] appears at the start of the transmembrane domain. The GxxS motif appears perfectly conserved, suggesting a specific function and not just homology. There is a strong correlation between proteins carrying this region at the N-terminus and those carrying the Gram-positive anchor domain with the LPXTG sortase processing site at the C-terminus.. +PF02295 Adenosine deaminase z-alpha domain
Pfam-B_11136 (release 5.2). This family consists of the N-terminus and thus the z-alpha domain of double-stranded RNA-specific adenosine deaminase (ADAR), an RNA- editing enzyme. The z-alpha domain is a Z-DNA binding domain, and binding of this region to B-DNA has been shown to be disfavoured by steric hindrance . . +PF01559 Zein seed storage protein
Pfam-B_181 (release 4.0). Zeins are seed storage proteins. They are unusually rich in glutamine, proline, alanine, and leucine residues and their sequences show a series of tandem repeats .. +PF01754 A20-like zinc finger
The A20 Zn-finger of bovine/human Rabex5/rabGEF1 is a Ubiquitin Binding Domain [5-6]. The zinc finger mediates self-association in A20. These fingers also mediate IL-1-induced NF-kappa B activation.. +PF01428 AN1-like Zinc finger
Zinc finger at the C-terminus of An1 Swiss:Q91889, a ubiquitin-like protein in Xenopus laevis. The following pattern describes the zinc finger. C-X2-C-X(9-12)-C-X(1-2)-C-X4-C-X2-H-X5-H-X-C Where X can be any amino acid, and numbers in brackets indicate the number of residues.. +PF00096 Zinc finger, C2H2 type
The C2H2 zinc finger is the classical zinc finger domain. The two conserved cysteines and histidines co-ordinate a zinc ion. The following pattern describes the zinc finger. #-X-C-X(1-5)-C-X3-#-X5-#-X2-H-X(3-6)-[H/C] Where X can be any amino acid, and numbers in brackets indicate the number of residues. The positions marked # are those that are important for the stable fold of the zinc finger. The final position can be either his or cys. The C2H2 zinc finger is composed of two short beta strands followed by an alpha helix. The amino terminal part of the helix binds the major groove in DNA binding zinc fingers. The accepted consensus binding sequence for Sp1 is usually defined by the asymmetric hexanucleotide core GGGCGG but this sequence does not include, among others, the GAG (=CTC) repeat that constitutes a high-affinity site for Sp1 binding to the wt1 promoter .. +PF00105 Zinc finger, C4 type (two domains)
In nearly all cases, this is the DNA binding domain of a nuclear hormone receptor. The alignment contains two Zinc finger domains that are too dissimilar to be aligned with each other.. +PF01396 Topoisomerase DNA binding C4 zinc finger
Pfam-B_1854 (release 3.0). +PF02928 C5HC2 zinc finger
Predicted zinc finger with eight potential zinc ligand binding residues. This domain is found in Jumonji . This domain may have a DNA binding function.. +PF01807 CHC2 zinc finger
Pfam-B_755 (release 4.2). This domain is principally involved in DNA binding in DNA primases.. +PF05207 CSL zinc finger
Pfam-B_12353 (release 7.7). This is a zinc binding motif which contains four cysteine residues which chelate zinc . This domain is often found associated with a Pfam:PF00226 domain. This domain is named after the conserved motif of the final cysteine.. +PF05180 DNL zinc finger
Pfam-B_9925 (release 7.7). The domain is named after a short C-terminal motif of D(N/H)L. This domain is a novel zinc-finger protein essential for protein import into mitochondria .. +PF02701 Dof domain, zinc finger
Pfam-B_1250 (release 5.5). The Dof domain is a zinc finger DNA-binding domain, that shows resemblance to the Cys2 zinc finger . . +PF04770 ZF-HD protein dimerisation region
Pfam-B_2002 (release 7.6). This family of proteins has are plant transcription factors, and have been named ZF-HD for zinc finger homeodomain proteins, on the basis of similarity to proteins of known structure . This region is thought to be involved in the formation of homo and heterodimers, and may form a zinc finger .. +PF01422 NF-X1 type zinc finger
This domain is presumed to be a zinc binding domain. The following pattern describes the zinc finger. C-X(1-6)-H-X-C-X3-C(H/C)-X(3-4)-(H/C)-X(1-10)-C Where X can be any amino acid, and numbers in brackets indicate the number of residues. Two position can be either his or cys. This family includes Swiss:P40798, Swiss:Q12986 and Swiss:P53971. The zinc fingers in Swiss:Q12986 bind to DNA .. +PF00645 Poly(ADP-ribose) polymerase and DNA-Ligase Zn-finger region
Poly(ADP-ribose) polymerase is an important regulatory component of the cellular response to DNA damage. The amino-terminal region of Poly(ADP-ribose) polymerase consists of two PARP-type zinc fingers. This region acts as a DNA nick sensor.. +PF00641 Zn-finger in Ran binding protein and others
+PF02135 TAZ zinc finger
The TAZ2 domain of CBP binds to other transcription factors such as the p53 tumour suppressor protein, E1A oncoprotein, MyoD, and GATA-1. The zinc coordinating motif that is necessary for binding to target DNA sequences consists of HCCC.. +PF02953 Tim10/DDP family zinc finger
Pfam-B_1207 (release 6.4). Putative zinc binding domain with four conserved cysteine residues. This domain is found in the human disease protein Swiss:O60220. Members of this family such as Tim9 and Tim10 are involved in mitochondrial protein import .\. Members of this family seem to be localised to the mitochondrial intermembrane space .. +PF02176 TRAF-type zinc finger
+PF02207 zf-UBR1;
Putative zinc finger in N-recognin (UBR box). Alignment kindly provided by SMART. This region is found in E3 ubiquitin ligases that recognise N-recognins .. +PF03470 XS zinc finger domain
This domain is a putative nucleic acid binding zinc finger found in proteins that also contain an XS domain.. +PF01258 zf_dskA_traR;
Prokaryotic dksA/traR C4-type zinc finger. +PF04071 DUF379; zf_like;
Cysteine-rich small domain. Probable metal-binding domain.. +PF04354 ZipA, C-terminal FtsZ-binding domain
This family represents the ZipA C-terminal domain. ZipA is involved in septum formation in bacterial cell division. Its C-terminal domain binds FtsZ, a major component of the bacterial septal ring. The structure of this domain is an alpha-beta fold with three alpha helices and a beta sheet of six antiparallel beta strands. The major loops protruding from the beta sheet surface are thought to form a binding site for FtsZ .. +PF00172 Fungal Zn(2)-Cys(6) binuclear cluster domain
+PF00882 Zinc dependent phospholipase C
Pfam-B_1401 (release 3.0). +PF04228 Putative neutral zinc metallopeptidase
Members of this family have a predicted zinc binding motif characteristic of neutral zinc metallopeptidases (Prosite:PDOC00129).. +PF04298 Putative neutral zinc metallopeptidase
Zinc metallopeptidase zinc binding regions have been predicted in some family members by a pattern match (Prosite:PS00142).. +PF03854 ZnF_P11;
+PF03367 ZPR1;
ZPR1 zinc-finger domain. Pfam-B_1372 (release 6.6). The zinc-finger protein ZPR1 is ubiquitous among eukaryotes. It is indeed known to be an essential protein in yeast. In quiescent cells, ZPR1 is localised to the cytoplasm. But in proliferating cells treated with EGF or with other mitogens, ZPR1 accumulates in the nucleolus. ZPR1 interacts with the cytoplasmic domain of the inactive EGF receptor (EGFR) and is thought to inhibit the basal protein tyrosine kinase activity of EGFR. This interaction is disrupted when cells are treated with EGF, though by themselves, inactive EGFRs are not sufficient to sequester ZPR1 to the cytoplasm [1,2,3]. Upon stimulation by EGF, ZPR1 directly binds the eukaryotic translation elongation factor-1alpha (eEF-1alpha) to form ZPR1/eEF-1alpha complexes . These move into the nucleus, localising particularly at the nucleolus. Indeed, the interaction between ZPR1 and eEF-1alpha has been shown to be essential for normal cellular proliferation , and ZPR1 is thought to be involved in pre-ribosomal RNA expression . The ZPR1 domain consists of an elongation initiation factor 2-like zinc finger and a double-stranded beta helix with a helical hairpin insertion. ZPR1 binds preferentially to GDP-bound eEF1A but does not directly influence the kinetics of nucleotide exchange or GTP hydrolysis . The alignment for this family shows a domain of which there are two copies in ZPR1 proteins. This family also includes several hypothetical archaeal proteins (from both Crenarchaeota and Euryarchaeota), which only contain one copy of the aligned region. This similarity between ZPR1 and archaeal proteins was not previously noted.. +PF00791 ZU5 domain
Alignment kindly provided by SMART. Domain present in ZO-1 and Unc5-like netrin receptors Domain of unknown function.. +PF00569 Zinc finger, ZZ type
Alignment kindly provided by SMART. Zinc finger present in dystrophin, CBP/p300. ZZ in dystrophin binds calmodulin. Putative zinc finger; binding not yet shown. Four to six cysteine residues in its sequence are responsible for coordinating zinc ions, to reinforce the structure .. +PF04358 DsrC like protein
Family member Swiss:P45573 has been observed to co-purify with Desulfovibrio vulgaris dissimilatory sulfite reductase , and many members of this family are annotated as the third (gamma) subunit of dissimilatory sulphite reductase. However, this protein appears to be only loosely associated to the sulfite reductase, which suggests that DsrC may not be an integral part of the dissimilatory sulphite reductase. Members of this family are found in organisms such as E. coli and H. influenzae which do not contain dissimilatory sulphite reductases but can synthesise assimilatory sirohaem sulphite and nitrite reductases. It is speculated that DsrC may be involved in the assembly, folding or stabilisation of sirohaem proteins . The strictly conserved cysteine in the C terminus suggests that DsrC may have a catalytic function in the metabolism of sulphur compounds .. +PF04252 DUF431;
Predicted SAM-dependent RNA methyltransferase. This family of proteins are predicted to be alpha/beta-knot SAM-dependent RNA methyltransferases .. +PF04359 Protein of unknown function (DUF493)
+PF04205 FMN-binding domain
This conserved region includes the FMN-binding site of the NqrC protein as well as the NosR and NirI regulatory proteins.. +PF04432 Coenzyme F420 hydrogenase/dehydrogenase, beta subunit C terminus
Coenzyme F420 hydrogenase (EC:1.12.99.1) reduces the low-potential two-electron acceptor coenzyme F420. This family contains the C termini of F420 hydrogenase and dehydrogenase beta subunits , . The N terminus of Methanobacterium formicicum formate dehydrogenase beta chain (EC:1.2.1.2, Swiss:P06130) is also a member of this family . This region is often found in association with the 4Fe-4S binding domain, fer4 (Pfam:PF00037).. +PF04609 Methyl-coenzyme M reductase operon protein C
Methyl coenzyme M reductase (MCR) catalyses the final step in methanogenesis. MCR is composed of three subunits, alpha (Pfam:PF02249), beta (Pfam:PF02241) and gamma (Pfam:PF02240) . Genes encoding the beta (mcrB) and gamma (mcrG) subunits are separated by two open reading frames coding for two proteins C and D . The function of proteins C and D (this family) is unknown. This family nowalso includes family MtrC_related,. +PF04607 Region found in RelA / SpoT proteins
This region of unknown function is found in RelA and SpoT of Escherichia coli, and their homologues in plants and in other eubacteria. RelA is a guanosine 3',5'-bis-pyrophosphate (ppGpp) synthetase (EC:2.7.6.5) while SpoT is thought to be a bifunctional enzyme catalysing both ppGpp synthesis and degradation (ppGpp 3'-pyrophosphohydrolase, (EC:3.1.7.2)) . This region is often found in association with HD (Pfam:PF01966), a metal-dependent phosphohydrolase, TGS (Pfam:PF02824) which is a possible nucleotide-binding region, and the ACT regulatory domain (Pfam:PF01842).. +PF04226 Transglycosylase associated protein
Bacterial protein, predicted to be an integral membrane protein. Some family members have been annotated as transglycosylase associated proteins, but no experimental evidence is provided. This family was annotated based on the information in Swiss:P76011.. +PF04264 YceI-like domain
E. coli YceI is a base-induced periplasmic protein . The recent structure of a member of this family shows that it binds to polyisoprenoid . The structure consists of an extended, eight-stranded, antiparallel beta-barrel that resembles the lipocalin fold.. +PF04431 pec_lyase_N;
Pectate lyase, N terminus. This region is found N terminal to the pectate lyase domain (Pfam:PF00544) in some plant pectate lyase enzymes.. +PF05223 NTF2-like N-terminal transpeptidase domain
The structure of this domain from MecA is known Swiss:Q53707 and is found to be similar to that found in NTF2 Pfam:PF02136. This domain seems unlikely to have an enzymatic function, and its role remains unknown.. +PF05224 NDT80 / PhoG like DNA-binding family
This family includes the DNA-binding region of NDT80 as well as PhoG and its homologues. The family contains Swiss:Q05534 or VIB-1. VIB-1 is thought to be a regulator of conidiation in Neurospora crassa and shares a region of similarity to PHOG, a possible phosphate nonrepressible acid phosphatase in Aspergillus nidulans. It has been found that vib-1 is not the structural gene for nonrepressible acid phosphatase, but rather may regulate nonrepressible acid phosphatase activity .. +PF05225 helix-turn-helix, Psq domain
This DNA-binding motif is found in four copies in the pipsqueak protein of Drosophila melanogaster . In pipsqueak this domain binds to GAGA sequence .. +PF05226 CHASE2 domain
CHASE2 is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in bacteria. Specifically, CHASE2 domains are found in histidine kinases, adenylate cyclases, serine/threonine kinases and predicted diguanylate cyclases/phosphodiesterases. Environmental factors that are recognised by CHASE2 domains are not known at this time .. +PF05227 CHASE3 domain
CHASE3 is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in bacteria. Specifically, CHASE3 domains are found in histidine kinases, adenylate cyclases, methyl-accepting chemotaxis proteins and predicted diguanylate cyclases/phosphodiesterases. Environmental factors that are recognised by CHASE3 domains are not known at this time .. +PF05228 CHASE4 domain
CHASE4. This is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in prokaryotes. Specifically, CHASE4 domains are found in histidine kinases in Archaea and in predicted diguanylate cyclases/phosphodiesterases in Bacteria. Environmental factors that are recognized by CHASE4 domains are not known at this time .. +PF05229 Spore Coat Protein U domain
This domain is found in a bacterial family of spore coat proteins , as well as a family of secreted pili proteins involved in motility and biofilm formation ( ). This family is distantly related to fimbrial proteins.. +PF05230 MASE2 domain
Predicted integral membrane sensory domain found in histidine kinases, diguanylate cyclases and other bacterial signaling proteins.. +PF05231 MASE1
Predicted integral membrane sensory domain found in histidine kinases, diguanylate cyclases and other bacterial signaling proteins. This entry also includes members of the 8 transmembrane UhpB type (8TMR-UT) domain family .. +PF05232 Bacterial Transmembrane Pair family
This family represents a conserved pair of transmembrane helices. It appears to be found as two tandem repeats in a family of hypothetical proteins.. +PF05233 PHB accumulation regulatory domain
The proteins this domain is found in are typically involved in regulating polymer accumulation in bacteria, particularly poly-beta-hydroxybutyrate (PHB) . The N-terminal region is likely to be the DNA-binding domain (Pfam:PF07879) while this domain probably binds PHB (personal obs:C Yeats).. +PF05234 UAF_rrn5;
UAF complex subunit Rrn10. The protein Rrn10 has been identified as a component of the Upstream Activating Factor (UAF), an RNA polymerase I (pol I) specific transcription stimulatory factor . +PF05235 CHAD domain
The CHAD domain is an alpha-helical domain functionally associated with the Pfam:PF01928 domains. It has conserved histidines that may chelate metals .. +PF05236 Transcription initiation factor TFIID component TAF4 family
This region of similarity is found in Transcription initiation factor TFIID component TAF4 .. +PF05237 MoeZ/MoeB domain
This putative domain is found in the MoeZ protein and the MoeB protein. The domain has two CXXC motifs that are only partly conserved.. +PF05238 CHL4;
Kinetochore protein CHL4 like. CHL4 is a protein involved in chromosome segregation . It is a component of the central kinetochore which mediates the attachment of the centromere to the mitotic spindle . CENP-N is one of the components that assembles onto the CENP-A-nucleosome-associated (NAC) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC .. +PF05239 PRC-barrel domain
The PRC-barrel is an all beta barrel domain found in photosystem reaction centre subunit H of the purple bacteria and RNA metabolism proteins of the RimM group. PRC-barrels are approximately 80 residues long, and found widely represented in bacteria, archaea and plants. This domain is also present at the carboxyl terminus of the pan-bacterial protein RimM, which is involved in ribosomal maturation and processing of 16S rRNA. A family of small proteins conserved in all known euryarchaea are composed entirely of a single stand-alone copy of the domain .. +PF05240 APOBEC-like C-terminal domain
This domain is found at the C-termini of the Apolipoprotein B mRNA editing enzyme.. +PF05241 Emopamil binding protein
Pfam-B_7320 (release 7.7). Emopamil binding protein (EBP) is as a gene that encodes a non-glycosylated type I integral membrane protein of endoplasmic reticulum and shows high level expression in epithelial tissues. The EBP protein has emopamil binding domains, including the sterol acceptor site and the catalytic centre, which show Delta8-Delta7 sterol isomerase activity. Human sterol isomerase, a homologue of mouse EBP, is suggested not only to play a role in cholesterol biosynthesis, but also to affect lipoprotein internalisation. In humans, mutations of EBP are known to cause the genetic disorder of X-linked dominant chondrodysplasia punctata (CDPX2). This syndrome of humans is lethal in most males, and affected females display asymmetric hyperkeratotic skin and skeletal abnormalities .. +PF05242 Glycosylation-dependent cell adhesion molecule 1 (GlyCAM-1)
Pfam-B_7429 (release 7.7). This family consists of the lactophorin precursors proteose peptone component 3 (PP3) and glycosylation-dependent cell adhesion molecule 1 (GlyCAM-1). GlyCAM-1 functions as a ligand for L-selectin, a saccharide-binding protein on the surface of circulating leukocytes, and mediates the trafficking of blood-born lymphocytes into secondary lymph nodes. In this context, sulphatation of the carbohydrates of GlyCAM-1 has been shown to be a critical structural requirement to be recognised by L-selectin. GlyCAM-1 is also expressed in pregnant and lactating mammary glands of mouse and in an unknown site in the lung, in the bovine uterus and rat cochlea .. +PF05244 Brucella outer membrane protein 2
Pfam-B_7448 (release 7.7). This family consists of several outer membrane proteins (2a and 2b) from brucella bacteria. Brucellae are Gram-negative, facultative intracellular bacteria that can infect many species of animals and man .. +PF05246 Protein of unknown function (DUF735)
Pfam-B_7611 (release 7.7). This family consists of several uncharacterised Borrelia burgdorferi (Lyme disease spirochete) proteins of unknown function.. +PF05247 Flagellar transcriptional activator (FlhD)
Pfam-B_7623 (release 7.7). This family consists of several bacterial flagellar transcriptional activator (FlhD) proteins. FlhD combines with FlhC to form a regulatory complex in E. coli, this complex has been shown to be a global regulator involved in many cellular processes as well as a flagellar transcriptional activator .. +PF05248 Adenovirus E3A
Pfam-B_7497 (release 7.7). +PF05250 Uncharacterised protein family (UPF0193)
This family of proteins is functionally uncharacterised.. +PF05251 Uncharacterised protein family (UPF0197)
This family of proteins is functionally uncharacterised.. +PF05253 UPF0224;
U11-48K-like CHHC zinc finger. This zinc binding domain has four conserved zinc chelating residues in a CHHC pattern. This domain is predicted to have an RNA-binding function .. +PF05254 Uncharacterised protein family (UPF0203)
This family of proteins is functionally uncharacterised.. +PF05255 Uncharacterised protein family (UPF0220)
This family of proteins is functionally uncharacterised.. +PF05256 Uncharacterised protein family (UPF0223)
This family of proteins is functionally uncharacterised.. +PF05257 AXE;
Pfam-B_2845 (release 7.7). This domain corresponds to an amidase function. Many of these proteins are involved in cell wall metabolism of bacteria. This domain is found at the N-terminus of Swiss:P43675, where is functions as a glutathionylspermidine amidase EC:3.5.1.78 . This domain is found to be the catalytic domain of PlyCA .. +PF05258 Protein of unknown function (DUF721)
Pfam-B_7527 (release 7.7). This family contains several actinomycete proteins of unknown function.. +PF05259 Herpesvirus glycoprotein L
Pfam-B_7535 (release 7.7). This family consists of several herpesvirus glycoprotein L or UL1 proteins. Glycoprotein L is known to form a complex with glycoprotein H but the function of this complex is poorly understood .. +PF05261 TraM protein, DNA-binding
Pfam-B_7584 (release 7.7). The TraM protein is an essential part of the DNA transfer machinery of the conjugative resistance plasmid R1 (IncFII). On the basis of mutational analyses, it was shown that the essential transfer protein TraM has at least two functions. First, a functional TraM protein was found to be required for normal levels of transfer gene expression. Second, experimental evidence was obtained that TraM stimulates efficient site-specific single-stranded DNA cleavage at the oriT, in vivo. Furthermore, a specific interaction of the cytoplasmic TraM protein with the membrane protein TraD was demonstrated, suggesting that the TraM protein creates a physical link between the relaxosomal nucleoprotein complex and the membrane-bound DNA transfer apparatus .. +PF05262 Borrelia P83/100 protein
Pfam-B_6712 (release 7.7). This family consists of several Borrelia P83/P100 antigen proteins.. +PF05263 Protein of unknown function (DUF722)
Pfam-B_6789 (release 7.7). This family contains several bacteriophage proteins of unknown function.. +PF05264 Choristoneura fumiferana antifreeze protein (CfAFP)
Pfam-B_6800 (release 7.7). This family consists of several antifreeze proteins from the insect Choristoneura fumiferana (Spruce budworm). Antifreeze proteins (AFPs) and antifreeze glycoproteins (AFGPs) are present in many organisms that must survive sub-zero temperatures. These proteins bind to seed ice crystals and inhibit their growth through an adsorption-inhibition mechanism .. +PF05265 Protein of unknown function (DUF723)
Pfam-B_6852 (release 7.7). This family contains several uncharacterised proteins from Neisseria meningitidis. These proteins may have a role in DNA-binding.. +PF05266 Protein of unknown function (DUF724)
Pfam-B_6894 (release 7.7). This family contains several uncharacterised proteins found in Arabidopsis thaliana and other plants. This region is often found associated with Agenet domains and may contain coiled-coil.. +PF05267 Protein of unknown function (DUF725)
Pfam-B_6905 (release 7.7). This family contains several Drosophila proteins of unknown function.. +PF05268 Phage tail fibre adhesin Gp38
Pfam-B_7415 (release 7.7). This family contains several Gp38 proteins from T-even-like phages. Gp38, together with a second phage protein, gp57, catalyses the organisation of gp37 but is absent from the phage particle. Gp37 is responsible for receptor recognition .. +PF05269 Bacteriophage CII protein
Pfam-B_7453 (release 7.7). This family consists of several phage CII regulatory proteins. CII plays a key role in the lysis-lysogeny decision in bacteriophage lambda and related phages .. +PF05270 Alpha-L-arabinofuranosidase B (ABFB)
Pfam-B_7464 (release 7.7). This family consists of several fungal alpha-L-arabinofuranosidase B proteins. L-Arabinose is a constituent of plant-cell-wall poly-saccharides. It is found in a polymeric form in L-arabinan, in which the backbone is formed by 1,5-a- linked l-arabinose residues that can be branched via 1,2-a- and 1,3-a-linked l-arabinofuranose side chains. AbfB hydrolyses 1,5-a, 1,3-a and 1,2-a linkages in both oligosaccharides and polysaccharides, which contain terminal non-reducing l-arabinofuranoses in side chains .. +PF05271 Tobravirus 2B protein
Pfam-B_7517 (release 7.7). This family consists of several tobravirus 2B proteins. It is known that the 2B protein is required for transmission by both Paratrichodorus pachydermus and P. anemones nematodes .. +PF05272 Virulence-associated protein E
Pfam-B_6573 (release 7.7). This family contains several bacterial virulence-associated protein E like proteins. These proteins contain a P-loop motif.. +PF05273 Poxvirus RNA polymerase 22 kDa subunit
Pfam-B_6584 (release 7.7). This family consists of several poxvirus DNA-dependent RNA polymerase 22 kDa subunits.. +PF05274 Occlusion-derived virus envelope protein E25
Pfam-B_6633 (release 7.7). This family consists of several nucleopolyhedrovirus occlusion-derived virus envelope E25 proteins.. +PF05275 Copper resistance protein B precursor (CopB)
Pfam-B_6721 (release 7.7). This family consists of several bacterial copper resistance proteins. Copper is essential and serves as cofactor for more than 30 enzymes yet a surplus of copper is toxic and leads to radical formation and oxidation of biomolecules. Therefore, copper homeostasis is a key requisite for every organism. CopB serves to extrude copper when it approaches toxic levels .. +PF05276 SH3 domain-binding protein 5 (SH3BP5)
Pfam-B_6742 (release 7.7). This family consists of several eukaryotic SH3 domain-binding protein 5 or c-Jun N-terminal kinase (JNK)-interacting proteins (SH3BP5 or Sab). Sab binds to and serves as a substrate for JNK in vitro, and has been found to interact with the Src homology 3 (SH3) domain of Bruton's tyrosine kinase (Btk). Inspection of the sequence of Sab reveals the presence of two putative mitogen-activated protein kinase interaction motifs (KIMs) similar to that found in the JNK docking domain of the c-Jun transcription factor, and four potential serine-proline JNK phosphorylation sites in the C-terminal half of the molecule .. +PF05277 Protein of unknown function (DUF726)
Pfam-B_6757 (release 7.7). This family consists of several uncharacterised eukaryotic proteins.. +PF05278 Arabidopsis phospholipase-like protein (PEARLI 4)
Pfam-B_6763 (release 7.7). This family contains several phospholipase-like proteins from Arabidopsis thaliana which are homologous to PEARLI 4.. +PF05279 Aspartyl beta-hydroxylase N-terminal region
Pfam-B_6767 (release 7.7). This family includes the N-terminal regions of the junctin, junctate and aspartyl beta-hydroxylase proteins. Junctate is an integral ER/SR membrane calcium binding protein, which comes from an alternatively spliced form of the same gene that generates aspartyl beta-hydroxylase and junctin . Aspartyl beta-hydroxylase catalyses the post-translational hydroxylation of aspartic acid or asparagine residues contained within epidermal growth factor (EGF) domains of proteins .. +PF05280 Flagellar transcriptional activator (FlhC)
Pfam-B_6773 (release 7.7). This family consists of several bacterial flagellar transcriptional activator (FlhC) proteins. FlhC combines with FlhD to form a regulatory complex in E. coli, this complex has been shown to be a global regulator involved in many cellular processes as well as a flagellar transcriptional activator .. +PF05281 Neuroendocrine protein 7B2 precursor (Secretogranin V)
Pfam-B_6776 (release 7.7). The neuroendocrine protein 7B2 has a critical role in the proteolytic conversion and activation of proPC2, the enzyme responsible for the proteolytic conversion of many peptide hormone precursors. The 7B2 protein acts as an intracellular binding protein for proPC2, facilitates its maturation, and is required for its enzymatic activity. Processing of many important peptide precursors does not occur in 7B2 nulls. 7B2 null mice exhibit a unique form of Cushing's disease with many atypical symptoms, such as hypoglycemia .. +PF05282 AAR2 protein
Pfam-B_6782 (release 7.7). This family consists of several eukaryotic AAR2-like proteins. The yeast protein AAR2 is involved in splicing pre-mRNA of the a1 cistron and other genes that are important for cell growth .. +PF05283 Multi-glycosylated core protein 24 (MGC-24)
Pfam-B_6825 (release 7.7). This family consists of several MGC-24 (or Cd164 antigen) proteins from eukaryotic organisms. MGC-24/CD164 is a sialomucin expressed in many normal and cancerous tissues. In humans, soluble and transmembrane forms of MGC-24 are produced by alternative splicing .. +PF05284 Protein of unknown function (DUF736)
Pfam-B_7619 (release 7.7). This family consists of several uncharacterised bacterial proteins of unknown function.. +PF05285 SDA1
Pfam-B_6906 (release 7.7). This family consists of several SDA1 protein homologues. SDA1 is a Saccharomyces cerevisiae protein which is involved in the control of the actin cytoskeleton. The protein is essential for cell viability and is localised in the nucleus .. +PF05287 PMG protein
Pfam-B_7710 (release 7.7). This family consists of several mouse anagen-specific protein mKAP13 (PMG1 and PMG2). PMG1 and 2 contain characteristic repeats reminiscent of the keratin-associated proteins (KAPs). Both genes are expressed in growing hair follicles in skin as well as in sebaceous and eccrine sweat glands. Interestingly, expression is also detected in the mammary epithelium where it is limited to the onset of the pubertal growth phase and is independent of ovarian hormones. Their broad, developmentally controlled expression pattern, together with their unique amino acid composition, demonstrate that pmg-1 and pmg-2 constitute a novel KAP gene family participating in the differentiation of all epithelial cells forming the epidermal appendages .. +PF05288 Poxvirus A3L Protein
Pfam-B_7718 (release 7.7). This family consists of several poxvirus A3L or A2_5L proteins.. +PF05289 Borrelia hemolysin accessory protein
Pfam-B_7729 (release 7.7). This family consists of several borrelia hemolysin accessory proteins (BLYB). BLYB was thought to be an accessory protein, which was proposed to comprise a hemolysis system but it is now thought that BlyA and BlyB function instead as a prophage-encoded holin or holin-like system .. +PF05290 Baculovirus immediate-early protein (IE-0)
Pfam-B_7745 (release 7.7). The Autographa californica multinucleocapsid nuclear polyhedrosis virus (AcMNPV) ie-1 gene product (IE-1) is thought to play a central role in stimulating early viral transcription. IE-1 has been demonstrated to activate several early viral gene promoters and to negatively regulate the promoters of two other AcMNPV regulatory genes, ie-0 and ie-2. It is thought that that IE-1 negatively regulates the expression of certain genes by binding directly, or as part of a complex, to promoter regions containing a specific IE-1-binding motif (5'-ACBYGTAA-3') near their mRNA start sites .. +PF05291 Bystin
Pfam-B_7767 (release 7.7). Trophinin and tastin form a cell adhesion molecule complex that potentially mediates an initial attachment of the blastocyst to uterine epithelial cells at the time of implantation. Trophinin and tastin bind to an intermediary cytoplasmic protein called bystin. Bystin may be involved in implantation and trophoblast invasion because bystin is found with trophinin and tastin in the cells at human implantation sites and also in the intermediate trophoblasts at invasion front in the placenta from early pregnancy . This family also includes the yeast protein ENP1. ENP1 is an essential protein in Saccharomyces cerevisiae and is localised in the nucleus . It is thought that ENP1 plays a direct role in the early steps of rRNA processing as enp1 defective yeast cannot synthesise 20S pre-rRNA and hence 18S rRNA, which leads to reduced formation of 40S ribosomal subunits .. +PF05292 Malonyl-CoA decarboxylase (MCD)
Pfam-B_7770 (release 7.7). This family consists of several eukaryotic malonyl-CoA decarboxylase (MLYCD) proteins. Malonyl-CoA, in addition to being an intermediate in the de novo synthesis of fatty acids, is an inhibitor of carnitine palmitoyltransferase I, the enzyme that regulates the transfer of long-chain fatty acyl-CoA into mitochondria, where they are oxidised. After exercise, malonyl-CoA decarboxylase participates with acetyl-CoA carboxylase in regulating the concentration of malonyl-CoA in liver and adipose tissue, as well as in muscle. Malonyl-CoA decarboxylase is regulated by AMP-activated protein kinase (AMPK) .. +PF05293 African swine fever virus (ASFV) L11L protein
Pfam-B_7869 (release 7.7). L11L is an integral membrane protein of the African swine fever virus (ASFV) which is expressed late in the virus replication cycle. The protein is thought to be non-essential for growth in vitro and for virus virulence in domestic swine .. +PF05294 toxin_5;
Scorpion short toxin. Pfam-B_7892 (release 7.7). This family contains various secreted scorpion short toxins and seems to be unrelated to Pfam:PF00451.. +PF05295 Luciferase;
Luciferase/LBP N-terminal domain. Pfam-B_7906 (release 7.7). This family consists of a presumed N-terminal domain that is conserved between dinoflagellate luciferase and luciferin binding proteins. Luciferase is involved in catalysing the light emitting reaction in bioluminescence and luciferin binding protein (LBP) is known to bind to luciferin (the substrate for luciferase) to stop it reacting with the enzyme and therefore switching off the bioluminescence function. The expression of these two proteins is controlled by a circadian clock at the translational level, with synthesis and degradation occurring on a daily basis . However This domain is not the catalytic part of the protein. It has been suggested that this region may mediate an interaction between LBP and Luciferase or their association with the vacuolar membrane .. +PF05296 Mammalian taste receptor protein (TAS2R)
Pfam-B_1498 (release 7.7). This family consists of several forms of mammalian taste receptor proteins (TAS2Rs). TAS2Rs are G protein-coupled receptors expressed in subsets of taste receptor cells of the tongue and palate epithelia and are organised in the genome in clusters. The proteins are genetically linked to loci that influence bitter perception in mice and humans .. +PF05297 Herpesvirus latent membrane protein 1 (LMP1)
Pfam-B_5174 (release 7.7). This family consists of several latent membrane protein 1 or LMP1s mostly from Epstein-Barr virus. LMP1 of EBV is a 62-65 kDa plasma membrane protein possessing six membrane spanning regions, a short cytoplasmic N-terminus and a long cytoplasmic carboxy tail of 200 amino acids. EBV latent membrane protein 1 (LMP1) is essential for EBV-mediated transformation and has been associated with several cases of malignancies. EBV-like viruses in Cynomolgus monkeys (Macaca fascicularis) have been associated with high lymphoma rates in immunosuppressed monkeys . +PF05298 Bombinin
Pfam-B_5347 (release 7.7). This family consists of Bombinin and Maximin proteins from Bombina maxima (Chinese red belly toad). Two groups of antimicrobial peptides have been isolated from skin secretions of Bombina maxima. Peptides in the first group, named maximins 1, 2, 3, 4 and 5, are structurally related to bombinin-like peptides (BLPs). Unlike BLPs, sequence variations in maximins occurred all through the molecules. In addition to the potent antimicrobial activity, cytotoxicity against tumour cells and spermicidal action of maximins, maximin 3 possessed a significant anti-HIV activity. Maximins 1 and 3 have been found to be toxic to mice. Peptides in the second group, termed maximins H1, H2, H3 and H4, are homologous with bombinin H peptides .. +PF05299 M61 glycyl aminopeptidase
Glycyl aminopeptidase is an unusual peptidase in that it has a preference for substrates with an N-terminal glycine or alanine. These proteins are found in Bacteria and in Archaea.. +PF05300 Protein of unknown function (DUF737)
Pfam-B_6933 (release 7.7). This family consists of several uncharacterised mammalian proteins of unknown function.. +PF05301 DUF738;
Touch receptor neuron protein Mec-17. Moxon SJ, Pollington JE. Pfam-B_6943 (release 7.7). Mec-17 is the protein product of one of the 18 genes required for the development and function of the touch receptor neuron for gentle touch. Mec-17 is specifically required for maintaining the differentiation of the touch receptor . This family is conserved to higher eukaryotes.. +PF05302 Protein of unknown function (DUF720)
Pfam-B_6980 (release 7.7). This family consists of several uncharacterised Chlamydia proteins of unknown function.. +PF05303 Protein of unknown function (DUF727)
Pfam-B_7004 (release 7.7). This family consists of several uncharacterised eukaryotic proteins of unknown function.. +PF05304 Protein of unknown function (DUF728)
Pfam-B_7223 (release 7.7). This family consists of several uncharacterised tobravirus proteins of unknown function.. +PF05305 Protein of unknown function (DUF732)
Pfam-B_7356 (release 7.7). This family consists of several uncharacterised Mycobacterium tuberculosis and leprae proteins of unknown function.. +PF05306 Protein of unknown function (DUF733)
Pfam-B_7392 (release 7.7). This family consists of several uncharacterised Drosophila melanogaster proteins of unknown function.. +PF05307 Bundlin
Pfam-B_6974 (release 7.7). This family consists of several bundlin proteins from E. coli. Bundlin is a type IV pilin protein that is the only known structural component of enteropathogenic Escherichia coli bundle-forming pili (BFP). BFP play a role in virulence, antigenicity, autoaggregation, and localised adherence to epithelial cells .. +PF05308 DUF729;
Mitochondrial fission regulator. Moxon SJ, Eberhardt R. Pfam-B_6919 (release 7.7). In eukaryotes, this family of proteins induces mitochondrial fission [1,2].. +PF05309 TraE protein
Pfam-B_7677 (release 7.7). This family consists of several bacterial sex pilus assembly and synthesis proteins (TraE). Conjugal transfer of plasmids from donor to recipient cells is a complex process in which a cell-to-cell contact plays a key role. Many genes encoded by self-transmissible plasmids are required for various processes of conjugation, including pilus formation, stabilisation of mating pairs, conjugative DNA metabolism, surface exclusion and regulation of transfer gene expression . The exact function of the TraE protein is unknown.. +PF05310 Tenuivirus_NS3;
Tenuivirus movement protein. Pfam-B_7740 (release 7.7). This family of ssRNA negative-strand crop plant tenuivirus proteins appears to combine PV2 , NS2 , NS3, and PV3 proteins. Plant viruses encode specific proteins known as movement proteins (MPs) to control their spread through plasmodesmata (PD) in walls between cells as well as from leaf to leaf via vascular-dependent transport. During this movement process, the virally encoded MPs interact with viral genomes for transport from the viral replication sites to the PDs in the walls of infected cells along the cytoskeleton and/or endoplasmic reticulum (ER) network. The virus is then thought to move through the PDs in the form of MP-associated ribonucleoprotein complexes or as virions . The NS3 protein appears to function as an RNA silencing suppressor .. +PF05311 Baculovirus 33KDa late protein (PP31)
Pfam-B_7777 (release 7.7). Autographa californica nuclear polyhedrosis virus (AcMNPV) pp31 is a nuclear phosphoprotein that accumulates in the virogenic stroma, which is the viral replication centre in the infected-cell nucleus, binds to DNA, and serves as a late expression factor .. +PF05313 Poxvirus P21 membrane protein
Pfam-B_7803 (release 7.7). The P21 membrane protein of vaccinia virus, encoded by the A17L (or A18L) gene, has been reported to localise on the inner of the two membranes of the intracellular mature virus (IMV). It has also been shown that P21 acts as a membrane anchor for the externally located fusion protein P14 (A27L gene) . . +PF05314 Baculovirus occlusion-derived virus envelope protein EC27
Pfam-B_7811 (release 7.7). This family consists of several baculovirus occlusion-derived virus envelope proteins (EC27 or E27). The ODV-E27 protein has distinct functional characteristics compared to cellular and viral cyclins. Depending on the cdk protein, and perhaps other viral or cellular proteins yet to be described, the kinase-EC27 complex may have either cyclin B- or D-like activity .. +PF05315 ICEA Protein
Pfam-B_2792 (release 7.7). This family consists of several ICEA proteins from Helicobacter pylori. Helicobacter pylori infection causes gastritis and peptic ulcer disease, and is classified as a definite carcinogen of gastric cancer. ICEA1 is speculated to be associated with peptic ulcer disease .. +PF05316 Yeast_VAR1;
Mitochondrial ribosomal protein (VAR1). Pfam-B_7802 (release 7.7). This family consists of the yeast mitochondrial ribosomal proteins VAR1. Mitochondria possess their own ribosomes responsible for the synthesis of a small number of proteins encoded by the mitochondrial genome. In yeast the two ribosomal RNAs and a single ribosomal protein, VAR1, are products of mitochondrial genes, and the remaining approximately 80 ribosomal proteins are encoded in the nucleus . VAR1 along with 15S rRNA are necessary for the formation of mature 37S subunits .. +PF05317 Thermopsin
Pfam-B_7819 (release 7.7). This family consists of several thermopsin proteins from archaebacteria. Thermopsin is a thermostable acid protease which is capable of hydrolysing the following bonds: Leu-Val, Leu-Tyr, Phe-Phe, Phe-Tyr, and Tyr-Thr. The specificity of thermopsin is therefore similar to that of pepsin, that is, it prefers large hydrophobic residues at both sides of the scissile bond .. +PF05318 Tombusvirus movement protein
Pfam-B_4393 (release 7.7). This family consists of several Tombusvirus movement proteins. These proteins allow the virus to move from cell-to-cell and allow host-specific systemic spread . . +PF05320 Poxvirus DNA-directed RNA polymerase 19 kDa subunit
Pfam-B_6945 (release 7.7). This family contains several DNA-directed RNA polymerase 19 kDa polypeptides. The Poxvirus DNA-directed RNA polymerase (EC: 2.7.7.6) catalyses DNA-template-directed extension of the 3'-end of an RNA strand by one nucleotide at a time.. +PF05321 Haemolysin expression modulating protein
Pfam-B_7025 (release 7.7). This family consists of haemolysin expression modulating protein (HHA) homologues. YmoA and Hha are highly similar bacterial proteins downregulating gene expression in Yersinia enterocolitica and Escherichia coli, respectively.. +PF05322 NINE;
Pfam-B_7029 (release 7.7). This family consists of NINE proteins from several bacteriophages and from E. coli.. +PF05323 Poxvirus A21 Protein
Pfam-B_7034 (release 7.7). This family consists of several poxvirus A21 proteins.. +PF05324 Sperm antigen HE2
Pfam-B_7044 (release 7.7). This family consists of several variants of the human and chimpanzee sperm antigen proteins (HE2 and EP2 respectively). The EP2 gene codes for a family of androgen-dependent, epididymis-specific secretory proteins.The EP2 gene uses alternative promoters and differential splicing to produce a family of variant messages. The translated putative protein variants differ significantly from each other. Some of these putative proteins have similarity to beta-defensins, a family of antimicrobial peptides .. +PF05325 Protein of unknown function (DUF730)
Pfam-B_7197 (release 7.7). This family consists of several uncharacterised Arabidopsis thaliana proteins of unknown function.. +PF05326 Seminal vesicle autoantigen (SVA)
Pfam-B_7065 (release 7.7). This family consists of seminal vesicle autoantigen and prolactin-inducible (PIP) proteins. Seminal vesicle autoantigen (SVA) is specifically present in the seminal plasma of mice. This 19-kDa secretory glycoprotein suppresses the motility of spermatozoa by interacting with phospholipid. PIP, has several known functions. In saliva, this protein plays a role in host defence by binding to microorganisms such as Streptococcus. PIP is an aspartyl proteinase and it acts as a factor capable of suppressing T-cell apoptosis through its interaction with CD4 .. +PF05327 RNA polymerase I specific transcription initiation factor RRN3
Pfam-B_7041 (release 7.7). This family consists of several eukaryotic proteins which are homologous to the yeast RRN3 protein. RRN3 is one of the RRN genes specifically required for the transcription of rDNA by RNA polymerase I (Pol I) in Saccharomyces cerevisiae .. +PF05328 CybS
Pfam-B_7102 (release 7.7). This family consists of several eukaryotic succinate dehydrogenase [ubiquinone] cytochrome B small subunit, mitochondrial precursor (CybS) proteins. SDHD encodes the small subunit (cybS) of cytochrome b in succinate-ubiquinone oxidoreductase (mitochondrial complex II). Mitochondrial complex II is involved in the Krebs cycle and in the aerobic electron transport chain. It contains four proteins. The catalytic core consists of a flavoprotein and an iron-sulfur protein; these proteins are anchored to the mitochondrial inner membrane by the large subunit of cytochrome b (cybL) and cybS, which together comprise the heme-protein cytochrome b. Mutations in the SDHD gene can lead to hereditary paraganglioma, characterised by the development of benign, vascularised tumours in the head and neck .. +PF05331 Protein of unknown function (DUF742)
Pfam-B_3675 (release 7.7). This family consists of several uncharacterised Streptomyces proteins as well as one from Mycobacterium tuberculosis. The function of these proteins is unknown.. +PF05332 Protein of unknown function (DUF743)
Pfam-B_4046 (release 7.7). This family consists of several uncharacterised Calicivirus proteins of unknown function.. +PF05334 Protein of unknown function (DUF719)
Pfam-B_7667 (release 7.7). This family consists of several eukaryotic proteins of unknown function.. +PF05335 Protein of unknown function (DUF745)
Pfam-B_5169 (release 7.7). This family consists of several uncharacterised Drosophila melanogaster proteins of unknown function.. +PF05336 Domain of unknown function (DUF718)
Pfam-B_7227 (release 7.7). This family consists of several uncharacterised bacterial proteins of unknown function.. +PF05337 Macrophage colony stimulating factor-1 (CSF-1)
Pfam-B_7649 (release 7.7). Colony stimulating factor 1 (CSF-1) is a homodimeric polypeptide growth factor whose primary function is to regulate the survival, proliferation, differentiation, and function of cells of the mononuclear phagocytic lineage. This lineage includes mononuclear phagocytic precursors, blood monocytes, tissue macrophages, osteoclasts, and microglia of the brain, all of which possess cell surface receptors for CSF-1. The protein has also been linked with male fertility and mutations in the Csf-1 gene have been found to cause osteopetrosis and failure of tooth eruption .. +PF05338 Protein of unknown function (DUF717)
Pfam-B_7144 (release 7.7). This family consists of several herpesvirus proteins of unknown function.. +PF05339 Protein of unknown function (DUF739)
Pfam-B_7696 (release 7.7). This family contains several bacteriophage proteins. Some of the proteins in this family have been labeled putative cro repressor proteins.. +PF05340 Protein of unknown function (DUF740)
Pfam-B_7873 (release 7.7). This family consists of several uncharacterised plant proteins of unknown function.. +PF05341 Protein of unknown function (DUF708)
Pfam-B_7259 (release 7.7). This family consists of several uncharacterised nucleopolyhedrovirus proteins of unknown function.. +PF05342 Peptidase_M26;
M26 IgA1-specific Metallo-endopeptidase N-terminal region. These peptidases, which cleave mammalian IgA, are found in Gram-positive bacteria. Often found associated with Pfam:PF00746, they may be attached to the cell wall.. +PF05343 M42 glutamyl aminopeptidase
These peptidases are found in Archaea and Bacteria. The example in Lactococcus lactis, PepA, aids growth on milk . Pyrococcus horikoshii contain a thermostable de-blocking aminopeptidase member of this family used commercially for N-terminal protein sequencing .. +PF05344 Domain of Unknown Function (DUF746)
Yeats C, Eberhardt R. This is a short conserved region found in some transposons. Structural modelling suggests this domain may bind nucleic acids .. +PF05345 Putative Ig domain
This alignment represents the conserved core region of ~90 residue repeat found in several haemagglutinins and other cell surface proteins. Sequence similarities to (Pfam:PF02494) and (Pfam:PF00801) suggest an Ig-like fold (personal obs:C. Yeats). So this family may be similar in function to the (Pfam:PF02639) and (Pfam:PF02638) domains. This domain is also found in the WisP family of proteins of Tropheryma whipplei ( ).. +PF05346 Eukaryotic membrane protein family
Pfam-B_13582 (release 7.8). This family is a family of eukaryotic membrane proteins. It was previously annotated as including a putative receptor for human cytomegalovirus gH but this has has since been disputed . Analysis of the mouse Tapt1 protein (transmembrane anterior posterior transformation 1) has shown it to be involved in patterning of the vertebrate axial skeleton.. +PF05347 Complex 1 protein (LYR family)
Pfam-B_15215 (release 7.8). Proteins in this family include an accessory subunit of the higher eukaryotic NADH dehydrogenase complex. In Saccharomyces cerevisiae, the Isd11 protein (Swiss:Q6Q560) has been shown to play a role in Fe/S cluster biogenesis in mitochondria . We have named this family LYR after a highly conserved tripeptide motif close to the N-terminus of these proteins.. +PF05348 Proteasome maturation factor UMP1
Pfam-B_18845 (release 7.8). UMP1 is a short-lived chaperone present in the precursor form of the 20S proteasome and absent in the mature complex. UMP1 is required for the correct assembly and enzymatic activation of the proteasome. UMP1 seems to be degraded by the proteasome upon its formation. +PF05349 GATA-type transcription activator, N-terminal
+PF05350 Glycogen synthase kinase-3 binding
Pfam-B_18811 (release 7.8). Glycogen synthase kinase-3 (GSK-3) sequentially phosphorylates four serine residues on glycogen synthase (GS), in the sequence SxxxSxxxSxxx-SxxxS(p), by recognising and phosphorylating the first serine in the sequence motif SxxxS(P) (where S(p) represents a phosphoserine). Interaction of GSK-3 with a peptide derived from GSK-3 binding protein (this family) prevents GSK-3 interaction with Axin. This interaction thereby inhibits the Axin-dependent phosphorylation of beta-catenin by GSK-3 . . +PF05351 GMP-PDE, delta subunit
Pfam-B_13828 (release 7.8). GMP-PDE delta subunit was originally identified as a fourth subunit of rod-specific cGMP phosphodiesterase (PDE)(EC:3.1.4.35). The precise function of PDE delta subunit in the rod specific GMP-PDE complex is unclear. In addition, PDE delta subunit is not confined to photoreceptor cells but is widely distributed in different tissues. PDE delta subunit is thought to be a specific soluble transport factor for certain prenylated proteins and Arl2-GTP a regulator of PDE-mediated transport .. +PF05352 Phage Connector (GP10)
Pfam-B_13828 (release 7.8). The head-tail connector of bacteriophage 29 is composed of 12 36 kDa subunits with 12 fold symmetry. It is the central component of a rotary motor that packages the genomic dsDNA into pre-formed proheads. This motor consists of the head-tail connector, surrounded by a 29-encoded, 174-base, RNA and a viral ATPase protein [1,2].. +PF05353 Delta Atracotoxin
Pfam-B_30981 (release 7.8). Delta atracotoxin produces potentially fatal neurotoxic symptoms in primates by slowing he inactivation of voltage-gated sodium channels . The structure of atracotoxin comprises a core beta region containing a triple-stranded a thumb-like extension protruding from the beta region and a C-terminal helix. The beta region contains a cystine knot motif, a feature seen in other neurotoxic polypeptides .. +PF05354 Phage Head-Tail Attachment
Pfam-B_59968 (release 7.8). The phage head-tail attachment protein is required for the joining of phage heads and tails at the last step of morphogenesis .. +PF05355 Apolipoprotein C-II
Pfam-B_6456 (release 7.8). Apolipoprotein C-II (ApoC-II) is the major activator of lipoprotein lipase, a key enzyme in the regulation of triglyceride levels in human serum .. +PF05356 Phage Coat protein B
Pfam-B_51500 (release 7.8). The major coat protein in the capsid of filamentous bacteriophage forms a helical assembly of about 7000 identical protomers, with each protomer comprised of 46 amino acid, after the cleavage of the signal peptide. Each protomer forms a slightly curved helix that combine to form a tubular structure that encapsulates the viral DNA .. +PF05357 Phage Coat Protein A
Pfam-B_7225 (release 7.8). Infection of Escherichia coli by filamentous bacteriophages is mediated by the minor phage coat protein A and involves two distinct cellular receptors, the F' pilus and the periplasmic protein TolA. These two receptors are contacted in a sequential manner, such that binding of TolA by the extreme N-terminal domain is conditional on a primary interaction of the second coat protein A domain with the F' pilus .. +PF05358 DicB protein
DicB is part of the dic operon, which resides on cryptic prophage Kim. Under normal conditions, expression of dicB is actively repressed. When expression is induced, however, cell division rapidly ceases, and this division block is dependent on MinC with which it interacts .. +PF05359 Domain of Unknown Function (DUF748)
+PF05360 yiaA/B two helix domain
This domain consists of two transmembrane helices and a conserved linking section.. +PF05361 PKC-activated protein phosphatase-1 inhibitor
Pfam-B_69711 (release 7.8). Contractility of vascular smooth muscle depends on phosphorylation of myosin light chains, and is modulated by hormonal control of myosin phosphatase activity. Signaling pathways activate kinases such as PKC or Rho-dependent kinases that phosphorylate the myosin phosphatase inhibitor protein called CPI-17. Phosphorylation of CPI-17 at Thr-38 enhances its inhibitory potency 1000-fold, creating a molecular switch for regulating contraction .. +PF05362 Lon protease (S16) C-terminal proteolytic domain
The Lon serine proteases must hydrolyse ATP to degrade protein substrates. In Escherichia coli, these proteases are involved in turnover of intracellular proteins, including abnormal proteins following heat-shock. The active site for protease activity resides in a C-terminal domain. The Lon proteases are classified as family S16 in Merops.. +PF05363 Herpesvirus US12 family
Pfam-B_62991 (release 7.8). US12 a key factor in the evasion of cellular immune response against HSV-infected cells. Specific inhibition of the transporter associated with antigen processing (TAP) by US12 prevents peptide transport into the endoplasmic reticulum and subsequent loading of major histocompatibility complex (MHC) class I molecules . US12 is comprised of three helices and is associated with cellular membranes .. +PF05364 SecIII_SopE;
Salmonella type III secretion SopE effector N-terminus. Pfam-B_18665 (release 7.8). Salmonella typhimurium employs a type III secretion system to inject bacterial toxins into the host cell cytosol. These toxins transiently activate Rho family GTP-binding protein-dependent signaling cascades to induce cytoskeletal rearrangements. SopE, one of these toxins, can activate Cdc42 in a Dbl-like fashion via its C-terminal GEP domain Pfam:PF07487 . This family represents the N-terminal region of SopE. The function of this domain is unknown.. +PF05365 Ubiquinol-cytochrome C reductase, UQCRX/QCR9 like
Pfam-B_18986 (release 7.8). The UQCRX/QCR9 protein is the 9/10 subunit of complex III, encoding a protein of about 7-kDa. Deletion of QCR9 results in the inability of cells to grow on grow on-fermentable carbon source n yeast .. +PF05366 Sarcolipin
Pfam-B_33603 (release 7.8). Sarcolipin is a 31 amino acid integral membrane protein that regulates Ca-ATPase activity in skeletal muscle .. +PF05367 Phage endonuclease I
Pfam-B_22152 (release 7.8). The bacteriophage endonuclease I is a nuclease that is selective for the structure of the four-way Holliday DNA junction .. +PF05368 NmrA-like family
Pfam-B_25329 (release 7.8). NmrA is a negative transcriptional regulator involved in the post-translational modification of the transcription factor AreA. NmrA is part of a system controlling nitrogen metabolite repression in fungi . This family only contains a few sequences as iteration results in significant matches to other Rossmann fold families.. +PF05369 Monomethylamine methyltransferase MtmB
Pfam-B_58618 (release 7.8). Monomethylamine methyltransferase of the archaebacterium Methanosarcina barkeri contains a novel amino acid, pyrrolysine, encoded by the termination codon UAG [1,2]. The structure reveals a homohexamer comprised of individual subunits with a TIM barrel fold .. +PF05370 Domain of unknown function (DUF749)
Pfam-B_54547 (release 7.8). Archaeal domain of unknown function. This domain has been solved as part of a structural genomics project and comprises of segregated helical and anti-parallel beta sheet regions.. +PF05371 Phage major coat protein, Gp8
Pfam-B_31655 (release 7.8). Class I phage major coat protein Gp8 or B. The coat protein is largely alpha-helix with a slight curve . . +PF05372 Delta lysin family
Pfam-B_45919 (release 7.8). Delta-lysin is a 26 amino acid, hemolytic peptide toxin secreted by Staphylococcus aureus. It is thought that delta-toxin forms an amphipathic helix upon binding to lipid bilayers . The precise mode of action of delta-lysis is unclear.. +PF05373 L-proline 3-hydroxylase, C-terminal
Pfam-B_32425 (release 7.8). Iron (II)/2-oxoglutarate (2-OG)-dependent oxygenases catalyse oxidative reactions in a range of metabolic processes. Proline 3-hydroxylase hydroxylates proline at position 3, the first of a 2-OG oxygenase catalysing oxidation of a free alpha-amino acid. The structure contains conserved motifs present in other 2-OG oxygenases including a jelly roll strand core and residues binding iron and 2-oxoglutarate, consistent with divergent evolution within the extended family. The structure differs significantly from many other 2-OG oxygenases in possessing a discrete C-terminal helical domain.. +PF05374 Mu-Conotoxin
Pfam-B_34209 (release 7.8). Mu-conotoxins are peptide inhibitors of voltage-sensitive sodium channels .. +PF05375 Pacifastin inhibitor (LCMII)
Pfam-B_35181 (release 7.8). Structures of members of this family show that they are comprised of a triple-stranded antiparallel beta-sheet connected by three disulfide bridges, which defines this as a novel family of serine protease inhibitors [1,2]. . +PF05377 Flagella accessory protein C (FlaC)
Although archaeal flagella appear superficially similar to those of bacteria, they are quite distinct . In several archaea, the flagellin genes are followed immediately by the flagellar accessory genes flaCDEFGHIJ. The gene products may have a role in translocation, secretion, or assembly of the flagellum. FlaC is a protein whose exact role is unknown but it has been shown to be membrane-associated (by immuno-blotting fractionated cells) .. +PF05378 Hydantoinase/oxoprolinase N-terminal region
This family is found at the N-terminus of the Pfam:PF01968 family.. +PF05379 Carlavirus endopeptidase
A peptidase involved in auto-proteolysis of a polyprotein from the plant pathogen blueberry scorch carlavirus (BBScV) . Corresponds to Merops family C23. . +PF05380 Pao retrotransposon peptidase
Corresponds to Merops family A17. These proteins are homologous to aspartic proteinases encoded by retroposons and retroviruses.. +PF05381 Tymovirus endopeptidase
Corresponds to Merops family C21. The best-studied plant alpha-like virus proteolytic enzyme is the proteinase of turnip yellow mosaic virus (TYMV). The TYMV replicase protein undergoes auto-cleavage to yield two products. The auto-peptidase activity has been mapped to the central part of this polyprotein.. +PF05382 Bacteriophage peptidoglycan hydrolase
Pfam-B_6845 (Pfam7.8). At least one of the members of this family, the Pal protein from the pneumococcal bacteriophage Dp-1 Swiss:O03979 has been shown to be a N-acetylmuramoyl-L-alanine amidase . According to the known modular structure of this and other peptidoglycan hydrolases from the pneumococcal system, the active site should reside at the N-terminal domain whereas the C-terminal domain binds to the choline residues of the cell wall teichoic acids [2,3]. This family appears to be related to Pfam:PF00877.. +PF05383 La domain
This presumed domain is found at the N-terminus of La RNA-binding proteins as well as other proteins . The function of this region is uncertain.. +PF05384 Sensor protein DegS
This is small family of Bacillus DegS proteins. The DegS-DegU two-component regulatory system of Bacillus subtilis controls various processes that characterise the transition from the exponential to the stationary growth phase, including the induction of extracellular degradative enzymes, expression of late competence genes and down-regulation of the sigma D regulon . The family also contains one sequence Swiss:Q8R9D3 from Thermoanaerobacter tengcongensis which are described as sensory transduction histidine kinases.. +PF05385 M_adenovirusE4;
Mastadenovirus early E4 13 kDa protein. This family consists of human and simian mastadenovirus early E4 13 kDa proteins. Human adenovirus type 9 (Ad9) is unique in eliciting exclusively estrogen-dependent mammary tumours in rats and in not requiring viral E1 region transforming genes for tumorigenicity. E4 codes for an oncoprotein essential for tumourigenesis by Ad9 .. +PF05386 TEP1 N-terminal domain
This short sequence region is found in four copies at the N-terminus of the TEP1 telomerase component. The functional significance of the region is uncertain. However the conservation of two histidines and a cysteine suggests it is a potential zinc binding domain.. +PF05387 Chorion family 3
This family consists of several Drosophila chorion proteins S36 and S38. The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary .. +PF05388 Carboxypeptidase Y pro-peptide
This family is found at the N terminus of several carboxypeptidase Y proteins and contains a signal peptide and pro-peptide regions [1,2].. +PF05389 Negative regulator of genetic competence (MecA)
This family contains several bacterial MecA proteins. The development of competence in Bacillus subtilis is regulated by growth conditions and several regulatory genes. In complex media competence development is poor, and there is little or no expression of late competence genes. Mec mutations permit competence development and late competence gene expression in complex media, bypassing the requirements for many of the competence regulatory genes. The mecA gene product acts negatively in the development of competence. Null mutations in mecA allow expression of a late competence gene comG, under conditions where it is not normally expressed, including in complex media and in cells mutant for several competence regulatory genes. Overexpression of MecA inhibits comG transcription [1,2,3].. +PF05390 Yeast cell wall synthesis protein KRE9/KNH1
This family contains several KRE9 and KNH1 proteins which are involved in encoding cell surface O glycoproteins, which are required for beta -1,6-glucan synthesis in yeast .. +PF05391 Lsm interaction motif
This short motif is found at the C-terminus of Prp24 proteins and probably interacts with the Lsm proteins to promote U4/U6 formation .. +PF05392 Cytochrome C oxidase chain VIIB
+PF05393 Human adenovirus early E3A glycoprotein
This family consists of several early glycoproteins from human adenoviruses.. +PF05394 Pseu_avirulence;
This family consists of several avirulence proteins from Pseudomonas syringae and Xanthomonas campestris.. +PF05395 Protein phosphatase inhibitor 1/DARPP-32
This family consists of several mammalian protein phosphatase inhibitor 1 (IPP-1) and dopamine- and cAMP-regulated neuronal phosphoprotein (DARPP-32) proteins. Protein phosphatase inhibitor-1 is involved in signal transduction and is an endogenous inhibitor of protein phosphatase-1 . It has been demonstrated that DARPP-32, if phosphorylated, can inhibit protein-phosphatase-1 . DARPP-32 has a key role in many neurotransmitter pathways throughout the brain and has been shown to be involved in controlling receptors, ion channels and other physiological factors including the brain's response to drugs of abuse, such as cocaine, opiates and nicotine. DARPP-32 is reciprocally regulated by the two neurotransmitters that are most often implicated in schizophrenia - dopamine and glutamate. Dopamine activates DARPP-32 through the D1 receptor pathway and disables DARPP-32 through the D2 receptor. Glutamate, acting through the N-methyl-d-aspartate receptor, renders DARPP-32 inactive . A mutant form of DARPP-32 has been linked with gastric cancers .. +PF05396 Phage T7 capsid assembly protein
+PF05397 GAL11;
Mediator complex subunit 15. GAL11 or MED15 is one of the up to 32 or subunits of the Mediator complex which is found from fungi to humans . The Mediator complex interacts with RNA polymerase II and other general transcription factors to form the RNA polymerase II holoenzyme , thereby affecting transcription through targetting of activators and repressors . This family is found in fungi and the small metazoan starlet anemone.. +PF05398 PufQ cytochrome subunit
This family consists of bacterial PufQ proteins. PufQ id required for bacteriochlorophyll biosynthesis serving a regulatory function in the formation of photosynthetic complexes .. +PF05399 Ectropic viral integration site 2A protein (EVI2A)
This family contains several mammalian ectropic viral integration site 2A (EVI2A) proteins. The function of this protein is unknown although it is thought to be a membrane protein and may function as an oncogene in retrovirus induced myeloid tumours [1,2].. +PF05400 Flagellar protein FliT
This family contains several bacterial flagellar FliT proteins. The flagellar proteins FlgN and FliT have been proposed to act as substrate specific export chaperones, facilitating incorporation of the enterobacterial hook-associated axial proteins (HAPs) FlgK/FlgL and FliD into the growing flagellum. In Salmonella typhimurium flgN and fliT mutants, the export of target HAPs is reduced, concomitant with loss of unincorporated flagellin into the surrounding medium . . +PF05401 Nodulation protein S (NodS)
This family consists of nodulation S (NodS) proteins. The products of the rhizobial nodulation genes are involved in the biosynthesis of lipochitin oligosaccharides (LCOs), which are host-specific signal molecules required for nodule formation. NodS is an S-adenosyl-L-methionine (SAM)-dependent methyltransferase involved in N methylation of LCOs. NodS uses N-deacetylated chitooligosaccharides, the products of the NodBC proteins, as its methyl acceptors .. +PF05402 Coenzyme PQQ synthesis protein D (PqqD)
This family contains several bacterial coenzyme PQQ synthesis protein D (PqqD) sequences. This protein is required for coenzyme pyrrolo-quinoline-quinone (PQQ) biosynthesis [1,2].. +PF05403 Plasmodium histidine-rich protein (HRPII/III)
This family consists of several histidine-rich protein II and III sequence from Plasmodium falciparum [1,2].. +PF05404 Translocon-associated protein, delta subunit precursor (TRAP-delta)
Pfam-B_7178 (release 7.7). This family consists of several eukaryotic translocon-associated protein, delta subunit precursors (TRAP-delta or SSR-delta). The exact function of this protein is unknown [1,2].. +PF05405 Mitochondrial ATP synthase B chain precursor (ATP-synt_B)
Pfam-B_7506 (release 7.7). The Fo sector of the ATP synthase is a membrane bound complex which mediates proton transport. It is composed of nine different polypeptide subunits (a, b, c, d, e, f, g F6, A6L) .. +PF05406 WGR domain
This domain is found in a variety of polyA polymerases as well as the E. coli molybdate metabolism regulator Swiss:P33345 and other proteins of unknown function. I have called this domain WGR after the most conserved central motif of the domain. The domain is found in isolation in proteins such as Swiss:Q9JN21 and is between 70 and 80 residues in length. I propose that this may be a nucleic acid binding domain.. +PF05407 Rubella virus endopeptidase
Corresponds to Merops family C27. Required for processing of the rubella virus replication protein.. +PF05408 Foot-and-mouth virus L-proteinase
Corresponds to Merops family C28. Protein fold of the peptidase unit for members of this family resembles that of papain. The leader proteinase of foot and mouth disease virus (FMDV) cleaves itself from the growing polyprotein and also cleaves the host translation initiation factor 4GI (eIF4G), thus inhibiting 5'-cap dependent translation.. +PF05409 Coronavirus endopeptidase C30
Corresponds to Merops family C30. These peptidases are involved in viral polyprotein processing in replication.. +PF05410 Porcine arterivirus-type cysteine proteinase alpha
Corresponds to Merops family C31. These peptidases are involved in viral polyprotein processing in replication.. +PF05411 Equine arteritis virus putative proteinase
These proteins are characterised by a region that has been proposed to have peptidase activity involved in viral polyprotein processing in replication.. +PF05412 Equine arterivirus Nsp2-type cysteine proteinase
Corresponds to Merops family C33. These peptidases are involved in viral polyprotein processing in replication.. +PF05413 Putative closterovirus papain-like endopeptidase
Corresponds to Merops family C34. Putative closterovirus papain-like endopeptidase from the apple chlorotic leaf spot closterovirus.. +PF05414 Peptidase_C35;
Viral domain of unknown function (DUF1717). This domain is found in viral proteins of unknown function.. +PF05415 Beet necrotic yellow vein furovirus-type papain-like endopeptidase
Corresponds to Merops family C36. This protease involved in processing the viral polyprotein.. +PF05416 Southampton virus-type processing peptidase
Corresponds to Merops family C37. Norwalk-like viruses (NLVs), including the Southampton virus, cause acute non-bacterial gastroenteritis in humans. The NLV genome encodes three open reading frames (ORFs). ORF1 encodes a polyprotein, which is processed by the viral protease into six proteins.. +PF05417 Hepatitis E cysteine protease
Corresponds to MEROPs family C41. This papain-like protease cleaves the viral polyprotein encoded by ORF1 of the hepatitis E virus (HEV).. +PF05418 Apovitellenin I (Apo-VLDL-II)
This family consists of several avian apovitellenin I sequences. As part of the avian reproductive effort, large quantities of triglyceride-rich very-low-density lipoprotein (VLDL) particles are transported by receptor-mediated endocytosis into the female germ cells. Although the oocytes are surrounded by a layer of granulosa cells harbouring high levels of active lipoprotein lipase, non-lipolysed VLDL is transported into the yolk. This is because VLDL particles from laying chickens are protected from lipolysis by apolipoprotein (apo)-VLDL-II, a potent dimeric lipoprotein lipase inhibitor . Apo-VLDL-II is produced in the liver and secreted into the blood stream when induced by estrogen production in female birds.. +PF05419 GUN4-like
In Arabidopsis, GUN4 is required for the functioning of the plastid mediated repression of nuclear transcription that is involved in controlling the levels of magnesium- protoporphyrin IX. GUN4 binds the product and substrate of Mg-chelatase, an enzyme that produces Mg-Proto, and activates Mg-chelatase. GUN4 is thought to participates in plastid-to-nucleus signaling by regulating magnesium-protoporphyrin IX synthesis or trafficking.. +PF05420 BCSC_N;
Cellulose synthase operon protein C C-terminus (BCSC_C). Pfam-B_10335 (release 8.0). This family contains the C-terminal regions of several bacterial cellulose synthase operon C (BCSC) proteins. BCSC is involved in cellulose synthesis although the exact function of this protein is unknown .. +PF05421 Protein of unknown function (DUF751)
Pfam-B_10849 (release 8.0). This family contains several plant, cyanobacterial and algal proteins of unknown function. The family is exclusively found in phototrophic organisms and may therefore play a role in photosynthesis (personal obs:Moxon SJ).. +PF05422 Stress-activated map kinase interacting protein 1 (SIN1)
Pfam-B_10677 (release 8.0). This family consists of several stress-activated map kinase interacting protein 1 (MAPKAP1 OR SIN1) sequences. The fission yeast Sty1/Spc1 mitogen-activated protein (MAP) kinase is a member of the eukaryotic stress-activated MAP kinase (SAPK) family. Sin1 interacts with Sty1/Spc1. Cells lacking Sin1 display many, but not all, of the phenotypes of cells lacking the Sty1/Spc1 MAP kinase including sterility, multiple stress sensitivity and a cell-cycle delay. Sin1 is phosphorylated after stress but this is not Sty1/Spc1-dependent . . +PF05423 Mycobacterium membrane protein
Pfam-B_10885 (release 8.0). This family contains several membrane proteins from Mycobacterium species.. +PF05424 Duffy binding domain
Pfam-B_11112 (release 8.0). This domain is found in Plasmodium Duffy binding proteins. Plasmodium vivax and Plasmodium knowlesi merozoites invade human erythrocytes that express Duffy blood group surface determinants. The Duffy receptor family is localised in micronemes, an organelle found in all organisms of the phylum Apicomplexa . This family is closely associated on PfEMP1 proteins with PFEMP, Pfam:PF03011.. +PF05425 Copper resistance protein D
Pfam-B_19002 (release 8.0). Copper sequestering activity displayed by some bacteria is determined by copper-binding protein products of the copper resistance operon (cop). CopD, together with CopC, perform copper uptake into the cytoplasm .. +PF05426 Alginate lyase
Moxon SJ, Mistry J, Murzin A. Pfam-B_11800 (release 8.0). This family contains several bacterial alginate lyase proteins. Alginate is a family of 1-4-linked copolymers of beta -D-mannuronic acid (M) and alpha -L-guluronic acid (G). It is produced by brown algae and by some bacteria belonging to the genera Azotobacter and Pseudomonas. Alginate lyases catalyse the depolymerisation of alginates by beta -elimination, generating a molecule containing 4-deoxy-L-erythro-hex-4-enepyranosyluronate at the nonreducing end . This family adopts an all alpha fold .. +PF05427 Acidic fibroblast growth factor binding (FIBP)
Pfam-B_19083 (release 8.0). Acidic fibroblast growth factor (aFGF) intracellular binding protein (FIBP) is a protein found mainly in the nucleus that is thought to be involved in the intracellular function of aFGF .. +PF05428 Corticotropin-releasing factor binding protein (CRF-BP)
Pfam-B_11928 (release 8.0). This family consists of several eukaryotic corticotropin-releasing factor binding proteins (CRF-BP or CRH-BP). Corticotropin-releasing hormone (CRH) plays multiple roles in vertebrate species. In mammals, it is the major hypothalamic releasing factor for pituitary adrenocorticotropin secretion, and is a neurotransmitter or neuromodulator at other sites in the central nervous system. In non-mammalian vertebrates, CRH not only acts as a neurotransmitter and hypophysiotropin, it also acts as a potent thyrotropin-releasing factor, allowing CRH to regulate both the adrenal and thyroid axes, especially in development. CRH-BP is thought to play an inhibitory role in which it binds CRH and other CRH-like ligands and prevents the activation of CRH receptors. There is however evidence that CRH-BP may also exhibit diverse extra and intracellular roles in a cell specific fashion and at specific times in development .. +PF05430 DUF752;
S-adenosyl-L-methionine-dependent methyltransferase. Moxon SJ, Eberhardt R. Pfam-B_12088 (release 8.0). This family is a S-adenosyl-L-methionine (SAM)-dependent methyltransferase. It is often found in association with Pfam:PF01266, where it is responsible for catalysing the transfer of a methyl group from S-adenosyl-L-methionine to 5-aminomethyl-2-thiouridine to form 5-methylaminomethyl-2-thiouridine [1,2].. +PF05431 Toxin_P42;
Insecticidal Crystal Toxin, P42 . Pfam-B_19338 (release 8.0). Family of Bacillus insecticidal crystal toxins. Strains of Bacillus that have this insecticidal activity use a binary toxin comprised of two proteins, P51 and P42 (this family). Members of this family are highly conserved between strains of different serotypes and phage groups .. +PF05432 Bone sialoprotein II (BSP-II)
Pfam-B_12103 (release 8.0). Bone sialoprotein (BSP) is a major structural protein of the bone matrix that is specifically expressed by fully-differentiated osteoblasts . The expression of bone sialoprotein (BSP) is normally restricted to mineralised connective tissues of bones and teeth where it has been associated with mineral crystal formation. However, it has been found that ectopic expression of BSP occurs in various lesions, including oral and extraoral carcinomas, in which it has been associated with the formation of microcrystalline deposits and the metastasis of cancer cells to bone . . +PF05433 Glycine zipper 2TM domain
Pfam-B_13382 (Rel 8.0) & Pfam-B_6 (Rel 24.0). This family includes a putative two transmembrane alpha-helical region that contains glycine zipper motifs . This family includes several Rickettsia genus specific 17 kDa surface antigen proteins .. +PF05434 TMEM9;
Pfam-B_12447 (release 8.0). This family contains several eukaryotic transmembrane proteins which are homologous to human transmembrane protein 9 Swiss:Q9P0T7. The TMEM9 gene encodes a 183 amino-acid protein that contains an N-terminal signal peptide, a single transmembrane region, three potential N-glycosylation sites and three conserved cys-rich domains in the N-terminus, but no known functional domains. The protein is highly conserved between species from Caenorhabditis elegans to man and belongs to a novel family of transmembrane proteins. The exact function of TMEM9 is unknown although it has been found to be widely expressed and localised to the late endosomes and lysosomes . Members of this family contain Pfam:PF03128 repeats in their N-terminal region.. +PF05435 Phi-29 DNA terminal protein GP3
Pfam-B_14111 (release 8.0). This family consists of DNA terminal protein GP3 sequences from Phi-29 like bacteriophages. DNA terminal protein GP3 is linked to the 5' ends of both strands of the genome through a phosphodiester bond between the beta-hydroxyl group of a serine residue and the 5'-phosphate of the terminal deoxyadenylate. This protein is essential for DNA replication and is involved in the priming of DNA elongation .. +PF05436 Mating factor alpha precursor N-terminus
Pfam-B_12643 (release 8.0). This family contains the N-terminal regions of the Saccharomyces mating factor alpha precursor protein. All proteins in this family contain one or more copies Pfam:PF04648 further toward their C terminus.. +PF05437 Branched-chain amino acid transport protein (AzlD)
Pfam-B_14345 (release 8.0). This family consists of a number of bacterial and archaeal branched-chain amino acid transport proteins. AzlD is known to be involved in conferring resistance to 4-azaleucine although its exact role is uncertain .. +PF05438 Thyrotropin-releasing hormone (TRH)
Pfam-B_14384 (release 8.0). This family consists of several thyrotropin-releasing hormone (TRH) proteins. Thyrotropin-Releasing Hormone (TRH; pyroGlu-His-Pro-NH2), originally isolated as a hypothalamic neuropeptide hormone, most likely acts also as a neuromodulator and/or neurotransmitter in the central nervous system (CNS). This interpretation is supported by the identification of a peptidase localised on the surface of neuronal cells which has been termed TRH-degrading ectoenzyme (TRH-DE) since it selectively inactivates TRH . TRH has been used clinically for the treatment of spinocerebellar degeneration and disturbance of consciousness in humans .. +PF05439 Jumping translocation breakpoint protein (JTB)
Pfam-B_14502 (release 8.0). This family contains several jumping translocation breakpoint proteins or JTBs. Jumping translocation (JT) is an unbalanced translocation that comprises amplified chromosomal segments jumping to various telomeres. JTB, located at 1q21, has been found to fuse with the telomeric repeats of acceptor telomeres in a case of JT. hJTB (human JTB) encodes a trans-membrane protein that is highly conserved among divergent eukaryotic species. JT results in a hJTB truncation, which potentially produces an hJTB product devoid of the trans-membrane domain. hJTB is located in a gene-rich region at 1q21, called EDC (Epidermal Differentiation Complex) . JTB has also been implicated in prostatic carcinomas .. +PF05440 Tetrahydromethanopterin S-methyltransferase subunit B
Pfam-B_15021 (release 8.0). The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump . . +PF05443 ROS/MUCR transcriptional regulator protein
Pfam-B_1733 (release 8.0). This family consists of several ROS/MUCR transcriptional regulator proteins. The ros chromosomal gene is present in octopine and nopaline strains of Agrobacterium tumefaciens as well as in Rhizobium meliloti. This gene encodes a 15.5-kDa protein that specifically represses the virC and virD operons in the virulence region of the Ti plasmid and is necessary for succinoglycan production . Sinorhizobium meliloti can produce two types of acidic exopolysaccharides, succinoglycan and galactoglucan, that are interchangeable for infection of alfalfa nodules. MucR from Sinorhizobium meliloti acts as a transcriptional repressor that blocks the expression of the exp genes responsible for galactoglucan production therefore allowing the exclusive production of succinoglycan .. +PF05444 Protein of unknown function (DUF753)
Pfam-B_1957 (release 8.0). This family contains sequences with are repeated in several uncharacterised proteins from Drosophila melanogaster.. +PF05445 Poxvirus serine/threonine protein kinase
Pfam-B_1974 (release 8.0). +PF05448 Acetyl xylan esterase (AXE1)
Pfam-B_4814 (release 8.0). This family consists of several bacterial acetyl xylan esterase proteins. Acetyl xylan esterases are enzymes that hydrolyse the ester linkages of the acetyl groups in position 2 and/or 3 of the xylose moieties of natural acetylated xylan from hardwood. These enzymes are one of the accessory enzymes which are part of the xylanolytic system, together with xylanases, beta-xylosidases, alpha-arabinofuranosidases and methylglucuronidases; these are all required for the complete hydrolysis of xylan .. +PF05449 Protein of unknown function (DUF754)
This domain appears to be found in a group of prophage proteins.. +PF05450 Nicastrin
Pfam-B_15452 (release 8.0). Nicastrin and presenilin are two major components of the gamma-secretase complex, which executes the intramembrane proteolysis of type I integral membrane proteins such as the amyloid precursor protein (APP) and Notch. Nicastrin is synthesised in fibroblasts and neurons as an endoglycosidase-H-sensitive glycosylated precursor protein (immature nicastrin) and is then modified by complex glycosylation in the Golgi apparatus and by sialylation in the trans-Golgi network (mature nicastrin) . A region featured in this family has a fold similar to human transferrin receptor (TfR, Swiss:P02786) and a bacterial aminopeptidase (Swiss:P80561). It is implicated in the pathogenesis of Alzheimer's disease .. +PF05451 Phytoreovirus nonstructural protein Pns10/11
Pfam-B_15695 (release 8.0). This family consists of Phytoreovirus nonstructural proteins Pns10 and Pns11. Genome segment S11 of rice gall dwarf virus (RGDV), a member of Phytoreovirus encodes a putative protein of 40 kDa that exhibits approximately 37% homology at the amino acid level to the nonstructural proteins Pns10 of rice dwarf and wound tumour viruses, which are other members of Phytoreovirus .. +PF05452 Clavanin
Pfam-B_15887 (release 8.0). This family consists of clavanin proteins from the haemocytes of the invertebrate Styela clava, a solitary tunicate. The family is made up of four alpha-helical antimicrobial peptides, clavanins A, B, C and D. The tunicate peptides resemble magainins in size, primary sequence and antibacterial activity. Synthetic clavanin A displays comparable antimicrobial activity to magainins and cecropins. The presence of alpha-helical antimicrobial peptides in the haemocytes of a urochordate suggests that such peptides are primeval effectors of innate immunity in the vertebrate lineage .. +PF05453 toxin_6;
BmTXKS1/BmP02 toxin family. Pfam-B_16009 (release 8.0). This family consists of toxin-like peptides that are isolated from the venom of Buthus martensii Karsch scorpion. The precursor consists of 60 amino acid residues, with a putative signal peptide of 28 residues and an extra residue, and a mature peptide of 31 residues with an amidated C-terminal. The peptides share close homology with other scorpion K+ channel toxins and should present a common three-dimensional fold - the Cysteine -Stabilised alphabeta (CSalphabeta) motif . This family acts by blocking small conductance calcium activated potassium ion channels in their victim [1,2].. +PF05454 Dystroglycan (Dystrophin-associated glycoprotein 1)
Pfam-B_15784 (release 8.0). Dystroglycan is one of the dystrophin-associated glycoproteins, which is encoded by a 5.5 kb transcript in human. The protein product is cleaved into two non-covalently associated subunits, [alpha] (N-terminal) and [beta] (C-terminal). In skeletal muscle the dystroglycan complex works as a transmembrane linkage between the extracellular matrix and the cytoskeleton. [alpha]-dystroglycan is extracellular and binds to merosin ([alpha]-2 laminin) in the basement membrane, while [beta]-dystroglycan is a transmembrane protein and binds to dystrophin, which is a large rod-like cytoskeletal protein, absent in Duchenne muscular dystrophy patients. Dystrophin binds to intracellular actin cables. In this way, the dystroglycan complex, which links the extracellular matrix to the intracellular actin cables, is thought to provide structural integrity in muscle tissues. The dystroglycan complex is also known to serve as an agrin receptor in muscle, where it may regulate agrin-induced acetylcholine receptor clustering at the neuromuscular junction. There is also evidence which suggests the function of dystroglycan as a part of the signal transduction pathway because it is shown that Grb2, a mediator of the Ras-related signal pathway, can interact with the cytoplasmic domain of dystroglycan. In general, aberrant expression of dystrophin-associated protein complex underlies the pathogenesis of Duchenne muscular dystrophy, Becker muscular dystrophy and severe childhood autosomal recessive muscular dystrophy. Interestingly, no genetic disease has been described for either [alpha]- or [beta]-dystroglycan. Dystroglycan is widely distributed in non-muscle tissues as well as in muscle tissues. During epithelial morphogenesis of kidney, the dystroglycan complex is shown to act as a receptor for the basement membrane. Dystroglycan expression in mouse brain and neural retina has also been reported. However, the physiological role of dystroglycan in non-muscle tissues has remained unclear . . +PF05455 GvpH
Pfam-B_16017 (release 8.0). This family consists of archaeal GvpH proteins which are thought to be involved in gas vesicle synthesis .. +PF05456 EIF4EBP;
Eukaryotic translation initiation factor 4E binding protein (EIF4EBP). Pfam-B_5573 (release 8.0). This family consists of several eukaryotic translation initiation factor 4E binding proteins (EIF4EBP1 ,2 and 3). Translation initiation in eukaryotes is mediated by the cap structure (m7GpppN, where N is any nucleotide) present at the 5' end of all cellular mRNAs, except organellar. The cap is recognised by eukaryotic initiation factor 4F (eIF4F), which consists of three polypeptides, including eIF4E, the cap-binding protein subunit. The interaction of the cap with eIF4E facilitates the binding of the ribosome to the mRNA. eIF4E activity is regulated in part by translational repressors, 4E-BP1, 4E-BP2 and 4E-BP3 which bind to it and prevent its assembly into eIF4F .. +PF05458 Cd27 binding protein (Siva)
Pfam-B_5606 (release 8.0). Siva binds to the CD27 cytoplasmic tail. It has a DD homology region, a box-B-like ring finger, and a zinc finger-like domain. Overexpression of Siva in various cell lines induces apoptosis, suggesting an important role for Siva in the CD27-transduced apoptotic pathway . Siva-1 binds to and inhibits BCL-X(L)-mediated protection against UV radiation-induced apoptosis. Indeed, the unique amphipathic helical region (SAH) present in Siva-1 is required for its binding to BCL-X(L) and sensitising cells to UV radiation. Natural complexes of Siva-1/BCL-X(L) are detected in HUT78 and murine thymocyte, suggesting a potential role for Siva-1 in regulating T cell homeostasis . This family contains both Siva-1 and the shorter Siva-2 lacking the sequence coded by exon 2. It has been suggested that Siva-2 could regulate the function of Siva-1 .. +PF05459 Herpesvirus transcriptional regulator family
Pfam-B_15401 (release 8.0). This family includes UL69 and IE63 that are transcriptional regulator proteins.. +PF05460 ORC6; ORC6_1;
Origin recognition complex subunit 6 (ORC6). Pfam-B_16189 (release 8.0). This family consists of several eukaryotic origin recognition complex subunit 6 (ORC6) proteins. Despite differences in their structure and sequences among eukaryotic replicators, ORC is a conserved feature of replication initiation in all eukaryotes. ORC-related genes have been identified in organisms ranging from S. pombe to plants to humans. All DNA replication initiation is driven by a single conserved eukaryotic initiator complex termed he origin recognition complex (ORC). The ORC is a six protein complex. The function of ORC is reviewed in .. +PF05461 Apolipoprotein L
Pfam-B_16464 (release 8.0). Apo L belongs to the high density lipoprotein family that plays a central role in cholesterol transport. The cholesterol content of membranes is important in cellular processes such as modulating gene transcription and signal transduction both in the adult brain and during neurodevelopment. There are six apo L genes located in close proximity to each other on chromosome 22q12 in humans. 22q12 is a confirmed high-susceptibility locus for schizophrenia and close to the region associated with velocardiofacial syndrome that includes symptoms of schizophrenia .. +PF05462 Dict_CAR;
Slime mold cyclic AMP receptor. Pfam-B_16614 (release 8.0). This family consists of cyclic AMP receptor (CAR) proteins from slime molds. CAR proteins are responsible for controlling development in Dictyostelium discoideum [1,2,3,4].. +PF05463 Sclerostin (SOST)
Pfam-B_16740 (release 8.0). This family contains several mammalian sclerostin (SOST) proteins. SOST is thought to suppress bone formation. Mutations of the SOST gene lead to sclerosteosis, a progressive sclerosing bone dysplasia with an autosomal recessive mode of inheritance. Radiologically, it is characterised by a generalised hyperostosis and sclerosis leading to a markedly thickened and sclerotic skull, with mandible, ribs, clavicles and all long bones also being affected. Due to narrowing of the foramina of the cranial nerves, facial nerve palsy, hearing loss and atrophy of the optic nerves can occur. Sclerosteosis is clinically and radiologically very similar to van Buchem disease, mainly differentiated by hand malformations and a large stature in sclerosteosis patients .. +PF05464 Phi-29-like late genes activator (early protein GP4)
Pfam-B_16889 (release 8.0). This family consists of phi-29-like late genes activator (or early protein GP4). This protein is thought to be a positive regulator of late transcription and may function as a sigma like component of the host RNA polymerase .. +PF05465 Halobacterial gas vesicle protein C (GVPC) repeat
Pfam-B_17013 (release 8.0). This family consists of Halobacterium gas vesicle protein C sequences which are thought to confer stability to the gas vesicle membranes [1,2].. +PF05466 Brain acid soluble protein 1 (BASP1 protein)
Pfam-B_16137 (release 8.0). This family consists of several brain acid soluble protein 1 (BASP1) or neuronal axonal membrane protein NAP-22. The BASP1 is a neuron enriched Ca(2+)-dependent calmodulin-binding protein of unknown function [1,2].. +PF05467 Herpesvirus glycoprotein U47
Pfam-B_17115 (release 8.0). +PF05470 eIF3c_N;
Eukaryotic translation initiation factor 3 subunit 8 N-terminus. Pfam-B_17447 (release 8.0). The largest of the mammalian translation initiation factors, eIF3, consists of at least eight subunits ranging in mass from 35 to 170 kDa. eIF3 binds to the 40 S ribosome in an early step of translation initiation and promotes the binding of methionyl-tRNAi and mRNA .. +PF05472 DNA replication terminus site-binding protein (Ter protein)
Pfam-B_17662 (release 8.0). This family contains several bacterial Ter proteins. The Ter protein specifically binds to DNA replication terminus sites on the host and plasmid genome and then blocks progress of the DNA replication fork .. +PF05473 UL45 protein
Pfam-B_17674 (release 8.0) & Pfam-B_16138 (release 10.0). This family consists several UL45 proteins. The herpes simplex virus UL45 gene encodes an 18 kDa virion envelope protein whose function remains unknown. It has been suggested that the 18 kDa UL45 gene product is required for efficient growth in the central nervous system at low doses and may play an important role under the conditions of a naturally acquired infection . This family also contains several Varicellovirus UL45 or gene 15 proteins. The Equine herpesvirus 1 UL45 protein represents a type II membrane glycoprotein which has found to be non-essential for EHV-1 growth in vitro but deletion reduces the viruses' replication efficiency .. +PF05474 Semenogelin
Pfam-B_18147 (release 8.0). This family consists of several mammalian semenogelin (I and II) proteins. Freshly ejaculated human semen has the appearance of a loose gel in which the predominant structural protein components are the seminal vesicle secreted semenogelins (Sg) .. +PF05475 Chlamydia_vir;
Chlamydia virulence protein PGP3-D. Pfam-B_18238 (release 8.0). This family consists of Chlamydia virulence proteins which are thought to be required for growth within mammalian cells .. +PF05476 PET122
Pfam-B_18328 (release 8.0). The nuclear PET122 gene of S. cerevisiae encodes a mitochondrial-localised protein that activates initiation of translation of the mitochondrial mRNA from the COX3 gene, which encodes subunit III of cytochrome c oxidase . . +PF05477 Surfeit locus protein 2 (SURF2)
Pfam-B_18437 (release 8.0) . Surfeit locus protein 2 is part of a group of at least six sequence unrelated genes (Surf-1 to Surf-6). The six Surfeit genes have been classified as housekeeping genes, being expressed in all tissue types tested and not containing a TATA box in their promoter region. The exact function of SURF2 is unknown .. +PF05478 Prominin
Pfam-B_18226 (release 8.0). The prominins are an emerging family of proteins that among the multispan membrane proteins display a novel topology. Mouse prominin and human prominin (mouse)-like 1 (PROML1) are predicted to contain five membrane spanning domains, with an N-terminal domain exposed to the extracellular space followed by four, alternating small cytoplasmic and large extracellular, loops and a cytoplasmic C-terminal domain . The exact function of prominin is unknown although in humans defects in PROM1, the gene coding for prominin, cause retinal degeneration .. +PF05479 Photosystem I reaction centre subunit N (PSAN or PSI-N)
Pfam-B_18582 (release 8.0). This family contains several Photosystem I reaction centre subunit N (PSI-N) proteins. The protein has no known function although it is localised in the thylakoid lumen . PSI-N is a small extrinsic subunit at the lumen side and is very likely involved in the docking of plastocyanin .. +PF05480 Staphylococcus haemolytic protein
Pfam-B_18620 (release 8.0). This family consists of several different short Staphylococcal proteins, it contains SLUSH A, B and C proteins as well as haemolysin and gonococcal growth inhibitor. Some strains of the coagulase-negative Staphylococcus lugdunensis produce a synergistic hemolytic activity (SLUSH), phenotypically similar to the delta-hemolysin of S. aureus . Gonococcal growth inhibitor from Staphylococcus act on the cytoplasmic membrane of the gonococcal cell causing cytoplasmic leakage and, eventually, death .. +PF05481 Mycobacterium 19 kDa lipoprotein antigen
Pfam-B_19467 (release 8.0). Most of the antigens of Mycobacterium leprae and M. tuberculosis that have been identified are members of stress protein families, which are highly conserved throughout many diverse species. Of the M. leprae and M. tuberculosis antigens identified by monoclonal antibodies, all except the 18-kDa M. leprae antigen and the 19-kDa M. tuberculosis antigen are strongly cross-reactive between these two species and are coded within very similar genes [1,2].. +PF05482 Serendipity locus alpha protein (SRY-A)
Pfam-B_19519 (release 8.0). The Drosophila serendipity alpha (sry alpha) gene is specifically transcribed at the blastoderm stage, from nuclear cycle 11 to the onset of gastrulation, in all somatic nuclei . SRY-A is required for the cellularisation of the embryo and is involved in the localisation of the actin filaments just prior to and during plasma membrane invagination .. +PF05483 Synaptonemal complex protein 1 (SCP-1)
Pfam-B_19633 (release 8.0). Synaptonemal complex protein 1 (SCP-1) is the major component of the transverse filaments of the synaptonemal complex. Synaptonemal complexes are structures that are formed between homologous chromosomes during meiotic prophase .. +PF05484 LRV protein FeS4 cluster
This Iron sulphur cluster is found at the N-terminus of some proteins containing Pfam:PF01816 repeats.. +PF05485 THAP domain
The THAP domain is a putative DNA-binding domain (DBD) and probably also binds a zinc ion. It features the conserved C2CH architecture (consensus sequence: Cys - 2-4 residues - Cys - 35-50 residues - Cys - 2 residues - His). Other universal features include the location of the domain at the N-termini of proteins, its size of about 90 residues, a C-terminal AVPTIF box and several other conserved residues. Orthologues of the human THAP domain have been identified in other vertebrates and probably worms and flies, but not in other eukaryotes or any prokaryotes .. +PF05486 SRP9;
Signal recognition particle 9 kDa protein (SRP9). Pfam-B_7787 (release 8.0). This family consists of several eukaryotic SRP9 proteins. SRP9 together with the Alu-homologous region of 7SL RNA and SRP14 comprise the "Alu domain" of SRP, which mediates pausing of synthesis of ribosome associated nascent polypeptides that have been engaged by the targeting domain of SRP . This family also contains the homologous fungal SRP21 .. +PF05488 PAAR motif
This motif is found usually in pairs in a family of bacterial membrane proteins. It is also found as a triplet of tandem repeats comprising the entire length in a another family of hypothetical proteins.. +PF05489 Tail_X;
Phage Tail Protein X. This domain is found in a family of phage tail proteins. Visual analysis suggests that it is related to Pfam:PF01476 (personal obs: C Yeats). The functional annotation of family members further confirms this hypothesis.. +PF05491 Holliday junction DNA helicase ruvB C-terminus
Pfam-B_844 (release 8.0). The RuvB protein makes up part of the RuvABC revolvasome which catalyses the resolution of Holliday junctions that arise during genetic recombination and DNA repair. Branch migration is catalysed by the RuvB protein that is targeted to the Holliday junction by the structure specific RuvA protein . This family consists of the C-terminal region of the RuvB protein which is thought to be helicase DNA-binding domain. . +PF05493 ATP synthase subunit H
Pfam-B_3341 (release 8.0). ATP synthase subunit H is an extremely hydrophobic of approximately 9 kDa . This subunit may be required for assembly of vacuolar ATPase . . +PF05494 Toluene tolerance, Ttg2
Pfam-B_3575 (release 8.0). Toluene tolerance is mediated by increased cell membrane rigidity resulting from changes in fatty acid and phospholipid compositions, exclusion of toluene from the cell membrane, and removal of intracellular toluene by degradation . Many proteins are involved in these processes. This family is a transporter which shows similarity to ABC transporters .. +PF05495 CHY zinc finger
Pfam-B_5537 (release 7.8). This family of domains are likely to bind to zinc ions. They contain many conserved cysteine and histidine residues. We have named this domain after the N-terminal motif CXHY. This domain can be found in isolation in some proteins, but is also often associated with Pfam:PF00097. One of the proteins in this family (Swiss:P36078) is a mitochondrial intermembrane space protein called Hot13. This protein is involved in the assembly of small TIM complexes .. +PF05496 Holliday junction DNA helicase ruvB N-terminus
Pfam-B_844 (release 8.0). The RuvB protein makes up part of the RuvABC revolvasome which catalyses the resolution of Holliday junctions that arise during genetic recombination and DNA repair. Branch migration is catalysed by the RuvB protein that is targeted to the Holliday junction by the structure specific RuvA protein . This family contains the N-terminal region of the protein. . +PF05497 Destabilase
Pfam-B_4147 (release 8.0). Destabilase is an endo-epsilon(gamma-Glu)-Lys isopeptidase, which cleaves isopeptide bonds formed by transglutaminase (Factor XIIIa) between glutamine gamma-carboxamide and the epsilon-amino group of lysine .. +PF05498 Rapid ALkalinization Factor (RALF)
Pfam-B_4453 (release 8.0). RALF, a 5-kDa ubiquitous polypeptide in plants, arrests root growth and development .. +PF05499 DNA methyltransferase 1-associated protein 1 (DMAP1)
Pfam-B_38340 (release 8.0). DNA methylation can contribute to transcriptional silencing through several transcriptionally repressive complexes, which include methyl-CpG binding domain proteins (MBDs) and histone deacetylases (HDACs). The chief enzyme that maintains mammalian DNA methylation, DNMT1, can also establish a repressive transcription complex. The non-catalytic amino terminus of DNMT1 binds to HDAC2 and DMAP1 (for DNMT1 associated protein), and can mediate transcriptional repression. DMAP1 has intrinsic transcription repressive activity, and binds to the transcriptional co-repressor TSG101. DMAP1 is targeted to replication foci through interaction with the far N terminus of DNMT1 throughout S phase, whereas HDAC2 joins DNMT1 and DMAP1 only during late S phase, providing a platform for how histones may become deacetylated in heterochromatin following replication .. +PF05501 Domain of unknown function (DUF755)
Pfam-B_4891 (release 8.0). This family is predominated by ORFs from Circoviridae. The function of this family remains to be determined.. +PF05502 Dynactin p62 family
Pfam-B_4912 (release 8.0). Dynactin is a multi-subunit complex and a required cofactor for most, or all, of the cellular processes powered by the microtubule-based motor cytoplasmic dynein. p62 binds directly to the Arp1 subunit of dynactin [1,2].. +PF05503 Poxvirus G7-like
Pfam-B_4957 (release 8.0). +PF05504 Spore germination B3/ GerAC like, C-terminal
Pfam-B_5052 (release 8.0). The GerAC protein of the Bacillus subtilis spore is required for the germination response to L-alanine. Members of this family are thought to be located in the inner spore membrane. Although the function of this family is unclear, they are likely to encode the components of the germination apparatus that respond directly to this germinant, mediating the spore's response .. +PF05505 Ebola nucleoprotein
Pfam-B_8475 (release 8.0). This family consists of Ebola and Marburg virus nucleoproteins. These proteins are responsible for encapsidation of genomic RNA. It has been found that nucleoprotein DNA vaccines can offer protection from the virus .. +PF05506 Domain of unknown function (DUF756)
This domain is found, normally as a tandem repeat, at the C-terminus of bacterial phospholipase C proteins.. +PF05507 Microfibril-associated glycoprotein (MAGP)
Pfam-B_8462 (release 8.0). This family consists of several mammalian microfibril-associated glycoprotein (MAGP) 1 and 2 proteins. MAGP1 and 2 are components of elastic fibres. MAGP-1 has been proposed to bind a C-terminal region of tropoelastin, the soluble precursor of elastin. MAGP-2 was found to interact with fibrillin-1 and -2, as well as fibulin-1, another component of elastic fibres this suggests that MAGP-2 may be important in the assembly of microfibrils . . +PF05508 RanGTP-binding protein
Pfam-B_37054 (release 8.0). The small Ras-like GTPase Ran plays an essential role in the transport of macromolecules in and out of the nucleus and has been implicated in spindle and nuclear envelope formation during mitosis in higher eukaryotes. The S. cerevisiae ORF YGL164c encoding a novel RanGTP-binding protein, termed Yrb30p was identified. The protein competes with yeast RanBP1 (Yrb1p) for binding to the GTP-bound form of yeast Ran (Gsp1p) and is, like Yrb1p, able to form trimeric complexes with RanGTP and some of the karyopherins .. +PF05509 TraY domain
Pfam-B_8963 (release 8.0). This family consists of several enterobacterial TraY proteins. TraY is involved in bacterial conjugation where it is required for efficient nick formation in the F plasmid . These proteins have a ribbon-helix-helix fold and are likely to be DNA-binding proteins.. +PF05510 sarcoglycan_2;
Sarcoglycan alpha/epsilon. Pfam-B_9181 (release 8.0). Sarcoglycans are a subcomplex of transmembrane proteins which are part of the dystrophin-glycoprotein complex. They are expressed in the skeletal, cardiac and smooth muscle. Although numerous studies have been conducted on the sarcoglycan subcomplex in skeletal and cardiac muscle, the manner of the distribution and localisation of these proteins along the nonjunctional sarcolemma is not clear . This family contains alpha and epsilon members.. +PF05511 Mitochondrial ATP synthase coupling factor 6
Pfam-B_9347 (release 8.0). Coupling factor 6 (F6) is a component of mitochondrial ATP synthase which is required for the interactions of the catalytic and proton-translocating segments . . +PF05512 AWPM-19-like family
Pfam-B_6960 (release 8.0). Members of this family are 19 kDa membrane proteins. The levels of the plant protein AWPM-19 increase dramatically when there is an increase level of abscisic acid. The increase presence of this protein leads to greater tolerance of freezing .. +PF05513 TraA
Pfam-B_9521 (release 8.0). Conjugative transfer of a bacteriocin plasmid, pPD1, of Enterococcus faecalis is induced in response to a peptide sex pheromone, cPD1, secreted from plasmid-free recipient cells. cPD1 is taken up by a pPD1 donor cell and binds to an intracellular receptor, TraA. Once a recipient cell acquires pPD1, it starts to produce an inhibitor of cPD1, termed iPD1, which functions as a TraA antagonist and blocks self-induction in donor cells. TraA transduces the signal of cPD1 to the mating response .. +PF05514 HR-like lesion-inducing
Pfam-B_6954 (release 8.0). Family of plant proteins that are associated with the hypersensitive response (HR) pathway of defence against plant pathogens .. +PF05515 Viral nucleic acid binding
Pfam-B_6916 (release 8.0). This family is common to ssRNA positive-strand viruses and are commonly described as nucleic acid binding proteins (NABP).. +PF05517 p25-alpha
Pfam-B_6873 (release 8.0). This family encodes a 25 kDa protein that is phosphorylated by a Ser/Thr-Pro kinase . It has been described as a brain specific protein, but it is found in Tetrahymena thermophila.. +PF05518 Totivirus coat protein
Pfam-B_10221 (release 8.0). +PF05520 Citrus tristeza virus P18 protein
Pfam-B_10309 (release 8.0). +PF05521 Phage head-tail joining protein
Pfam-B_7008 (release 8.0). +PF05522 Metallothionein;
Pfam-B_1360 (release 8.0). This family consists of metallothioneins from several worm and sea urchin species. Metallothioneins are low molecular weight, cysteine rich proteins known to be involved in heavy metal detoxification and homeostasis .. +PF05523 WxcM_C;
WxcM-like, C-terminal . Pfam-B_6950 (release 8.0). This family includes FdtA (Swiss:Q6T1W8) from Aneurinibacillus thermoaerophilus, which has been characterised as a dtdp-6-deoxy-3,4-keto-hexulose isomerase . It also includes WxcM (Swiss:Q93S92) from Xanthomonas campestris (pv. campestris) .. +PF05524 PEP-utilising enzyme, N-terminal
Pfam-B_69291 (release 8.0). +PF05525 Branched-chain amino acid transport protein
Pfam-B_1869 (release 8.0). This family consists of several bacterial branched-chain amino acid transport proteins which are responsible for the transport of leucine, isoleucine and valine via proton motive force .. +PF05526 Rhodococcus equi virulence-associated protein
Pfam-B_7324 (release 8.0). This family consists of several virulence-associated proteins from Rhodococcus equi. Rhodococcus equi is an important pulmonary pathogen of foals and is increasingly isolated from pneumonic infections and other infections in human immunodeficiency virus (HIV)-infected patients. Isolates from foals possess a large virulence plasmid, varying in size from 80 to 90 kb. Isolates lacking the plasmid are avirulent to foals. Little is known about the function of the plasmid apart from its encoding a virulence associated surface proteins .. +PF05527 Domain of unknown function (DUF758)
Pfam-B_6320 (release 8.0). Family of eukaryotic proteins with unknown function, which are induced by tumour necrosis factor.. +PF05528 Coronavirus gene 5 protein
Pfam-B_7342 (release 8.0). Infectious bronchitis virus (IBV), a member of Coronaviridae family, has a single-stranded positive-sense RNA genome, which is 27 kb in length. Gene 5 contains two (5a and 5b) open reading frames. The function of the 5a and 5b proteins is unknown .. +PF05529 B-cell receptor-associated protein 31-like
Pfam-B_6449 (release 8.0). Bap31 is a polytopic integral protein of the endoplasmic reticulum membrane and a substrate of caspase-8. Bap31 is cleaved within its cytosolic domain, generating pro-apoptotic p20 Bap31 .. +PF05531 Nucleopolyhedrovirus P10 protein
Pfam-B_7343 (release 8.0) & Pfam-B_6199 (release 10.0). This family consists of several nucleopolyhedrovirus P10 proteins which are thought to be involved in the morphogenesis of the polyhedra .. +PF05532 CsbD-like
Pfam-B_6755 (release 8.0). CsbD is a bacterial general stress response protein. It's expression is mediated by sigma-B, an alternative sigma factor . The role of CsbD in stress response is unclear.. +PF05533 Beet yellows virus-type papain-like endopeptidase C42
Members of the Closteroviridae and Potyviridae families of plant positive-strand RNA viruses encode one or two papain-like leader proteinases, belonging to Merops peptidase family C42.. +PF05534 HicB family
Pfam-B_6090 (release 8.0). This family consists of several bacterial HicB related proteins. The function of HicB is unknown although it is thought to be involved in pilus formation. It has been speculated that HicB performs a function antagonistic to that of pili and yet is necessary for invasion of certain niches . . +PF05535 Chromadorea ALT protein
Pfam-B_7314 (release 8.0). This family consists of several ALT protein homologues found in nematodes. Lymphatic filariasis is a major tropical disease caused by the mosquito borne nematodes Brugia and Wuchereria. About 120 million people are infected and at risk of lymphatic pathology such as acute lymphangitis and elephantiasis. Expression of alt-1 and alt-2 is initiated midway through development in the mosquito, peaking in the infective larva and declining sharply following entry into the host. ALT-1 and the closely related ALT-2 have been found to be strong candidates for a future vaccine against human filariasis .. +PF05536 Neurochondrin
Pfam-B_7411 (release 8.0). This family contains several eukaryotic neurochondrin proteins. Neurochondrin induces hydroxyapatite resorptive activity in bone marrow cells resistant to bafilomycin A1, an inhibitor of macrophage- and osteoclast-mediated resorption. Expression of the gene is localised to chondrocyte, osteoblast, and osteocyte in the bone and to the hippocampus and Purkinje cell layer of cerebellum in the brain .. +PF05537 Borrelia burgdorferi protein of unknown function (DUF759)
Pfam-B_7415 (release 8.0). This family consists of several uncharacterised proteins from the Lyme disease spirochete Borrelia burgdorferi.. +PF05538 Campylobacter major outer membrane protein
Pfam-B_7418 (release 8.0). This family consists of Campylobacter major outer membrane proteins. The major outer membrane protein (MOMP), a putative porin and a multifunction surface protein of Campylobacter jejuni, may play an important role in the adaptation of the organism to various host environments . . +PF05539 Pneumovirinae attachment membrane glycoprotein G
Pfam-B_7428 (release 8.0). +PF05540 Serpulina hyodysenteriae variable surface protein
Pfam-B_7432 (release 8.0). This family consists of several variable surface proteins from Serpulina hyodysenteriae.. +PF05541 Entomopoxvirus spheroidin protein
Pfam-B_7488 (release 8.0). Entomopoxviruses (EPVs) are large (300-400 nm) oval-shaped viruses replicating in the cytoplasm of their insect host cells. At the end of their replicative cycle EPVs virions are occluded in a highly expressed protein called spheroidin. This protein forms large (5-20 mm long) oval-shaped occlusion bodies (OBs) called spherules. The infectious cycle of EPVs begins with the ingestion by the insect host of the spherules, their dissolution by the alkaline reducing conditions of the midgut fluid and the release of virions in the midgut lumen. The infective particles first replicate in midgut epithelial cells, then pass the gut barrier to colonise the internal tissues, mainly the fat body cells. Whilst spheroidin has been demonstrated to be non-essential for viral replication, it plays an essential role in the natural biological cycle of the virus in protecting virions from adverse environmental conditions (e.g. UV degradation) and thus improving transmission efficacy. In this respect, spheroidins are functionally similar to polyhedrins of baculoviruses or cypoviruses .. +PF05542 Protein of unknown function (DUF760)
Pfam-B_7508 (release 8.0). This family contains several uncharacterised plant proteins.. +PF05543 Staphopain peptidase C47
Staphopains are one of four major families of proteinases secreted by the Gram-positive Staphylococcus aureus. These staphylococcal cysteine proteases are secreted as preproenzymes that are proteolytically cleaved to generate the mature enzyme.. +PF05544 Proline racemase
Pfam-B_7562 (release 8.0). This family consists of proline racemase (EC 5.1.1.4) proteins which catalyse the interconversion of L- and D-proline in bacteria . This family also contains several similar eukaryotic proteins including Swiss:Q9NCP4 a sequence with B-cell mitogenic properties which has been characterised as a co-factor-independent proline racemase .. +PF05545 Cbb3-type cytochrome oxidase component FixQ
Pfam-B_7570 (release 8.0). This family consists of several Cbb3-type cytochrome oxidase components (FixQ/CcoQ). FixQ is found in nitrogen fixing bacteria. Since nitrogen fixation is an energy-consuming process, effective symbioses depend on operation of a respiratory chain with a high affinity for O2, closely coupled to ATP production. This requirement is fulfilled by a special three-subunit terminal oxidase (cytochrome terminal oxidase cbb3), which was first identified in Bradyrhizobium japonicum as the product of the fixNOQP operon .. +PF05546 She9 / Mdm33 family
Pfam-B_35269 (Release 8.0). Members of this family are mitochondrial inner membrane proteins with a role in inner mitochondrial membrane organisation and biogenesis .. +PF05547 Immune inhibitor A peptidase M6
The insect pathogenic Gram-positive Bacillus thuringiensis secretes immune inhibitor A, a metallopeptidase, which specifically cleaves host antibacterial proteins. A homologue of immune inhibitor A, PrtV, has been identified in the Gram-negative human pathogen Vibrio cholerae .. +PF05548 Gametolysin peptidase M11
In the unicellular biflagellated alga, Chlamydomonas reinhardtii, gametolysin, a zinc-containing metallo-protease, is responsible for the degradation of the cell wall. Homologues of gametolysin have also been reported in the simple multicellular organism, Volvox.. +PF05549 Allexivirus 40kDa protein
Pfam-B_7591 (release 8.0). +PF05550 Pestivirus Npro endopeptidase C53
Unique to pestiviruses, the N-terminal protein encoded by the bovine viral diarrhoea virus genome is a cysteine protease (Npro) responsible for a self-cleavage that releases the N terminus of the core protein. This unique protease is dispensable for viral replication, and its coding region can be replaced by a ubiquitin gene directly fused in frame to the core. . +PF05551 Naegl_SSU_RRNA; DUF1519;
Zinc-binding loop region of homing endonuclease. Pfam-B_7681 (release 8.0). This domain is the short zinc-binding loops region of a number of much longer chain homing endonucleases. Such loops are probably stabilised by the zinc and may be viewed as small but separate domains. The common structural feature of these domains is that at least three zinc ligands lie very close to each other in the sequence and are not incorporated into regular secondary structural elements. The biological roles played by these small zinc-binding domains are presently unknown .. +PF05552 tm_helix;
This alignment represents a conserved transmembrane helix as well as some flanking sequence. It is often found in association with Pfam:PF00924.. +PF05553 Cotton fibre expressed protein
Pfam-B_7657 (release 8.0). This family consists of several plant proteins of unknown function. Three of the sequences (from Gossypium hirsutum) in this family are described as cotton fibre expressed proteins . The remaining sequences, found in Arabidopsis thaliana, are uncharacterised.. +PF05554 Viral hemorrhagic septicemia virus non-virion protein
Pfam-B_7684 (release 8.0). This family consists of several viral hemorrhagic septicemia virus non-virion (Nv) proteins. The NV protein is a nonstructural protein absent from mature virions although it is present in infected cells. The function of this protein is unknown .. +PF05555 Coxiella burnetii protein of unknown function (DUF762)
Pfam-B_7710 (release 8.0). This family consists several of several uncharacterised proteins from the bacterium Coxiella burnetii. Coxiella burnetii is the causative agent of the Q fever disease.. +PF05556 Calcineurin-binding protein (Calsarcin)
Pfam-B_7783 (release 8.0). This family consists of several mammalian calcineurin-binding proteins. The calcium- and calmodulin-dependent protein phosphatase calcineurin has been implicated in the transduction of signals that control the hypertrophy of cardiac muscle and slow fibre gene expression in skeletal muscle. Calsarcin-1 and calsarcin-2 are expressed in developing cardiac and skeletal muscle during embryogenesis, but calsarcin-1 is expressed specifically in adult cardiac and slow-twitch skeletal muscle, whereas calsarcin-2 is restricted to fast skeletal muscle. Calsarcins represent a novel family of sarcomeric proteins that link calcineurin with the contractile apparatus, thereby potentially coupling muscle activity to calcineurin activation . Calsarcin-3, is expressed specifically in skeletal muscle and is enriched in fast-twitch muscle fibres. Like calsarcin-1 and calsarcin-2, calsarcin-3 interacts with calcineurin, and the Z-disc proteins alpha-actinin, gamma-filamin, and telethonin .. +PF05557 Mitotic checkpoint protein
Pfam-B_7761 (release 8.0). This family consists of several eukaryotic mitotic checkpoint (Mitotic arrest deficient or MAD) proteins. The mitotic spindle checkpoint monitors proper attachment of the bipolar spindle to the kinetochores of aligned sister chromatids and causes a cell cycle arrest in prometaphase when failures occur. Multiple components of the mitotic spindle checkpoint have been identified in yeast and higher eukaryotes. In S.cerevisiae, the existence of a Mad1-dependent complex containing Mad2, Mad3, Bub3 and Cdc20 has been demonstrated . . +PF05558 DREPP plasma membrane polypeptide
Pfam-B_7798 (release 8.0). This family contains several plant plasma membrane proteins termed DREPPs as they are developmentally regulated plasma membrane polypeptides .. +PF05559 Protein of unknown function (DUF763)
Pfam-B_7805 (release 8.0). This family consists of several uncharacterised bacterial and archaeal proteins of unknown function.. +PF05560 Bacillus thuringiensis P21 molecular chaperone protein
Pfam-B_7820 (release 8.0). This family contains several Bacillus thuringiensis P21 proteins. These proteins are thought to be molecular chaperones and have mosquitocidal properties [1,2].. +PF05561 Borrelia burgdorferi protein of unknown function (DUF764)
Pfam-B_7823 (release 8.0). This family consists of proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete).. +PF05562 Cold acclimation protein WCOR413
Pfam-B_7803 (release 8.0). This family consists of several WCOR413-like plant cold acclimation proteins.. +PF05563 Sal_SpvD;
Salmonella plasmid virulence protein SpvD. Pfam-B_7864 (release 8.0). This family consists of several SpvD plasmid virulence proteins from different Salmonella species.. +PF05564 Dormancy/auxin associated protein
Pfam-B_7941 (release 8.0). This family contains several plant dormancy-associated and auxin-repressed proteins the function of which are poorly understood .. +PF05565 Siphovirus Gp157
Pfam-B_7948 (release 8.0). This family contains both viral and bacterial proteins which are related to the Gp157 protein of the Streptococcus thermophilus SFi bacteriophages. It is thought that bacteria possessing the gene coding for this protein have an increased resistance to the bacteriophage .. +PF05566 Orthopoxvirus interleukin 18 binding protein
Pfam-B_7955 (release 8.0). Interleukin-18 (IL-18) is a proinflammatory cytokine that plays a key role in the activation of natural killer and T helper 1 cell responses principally by inducing interferon-gamma (IFN-gamma). Several poxvirus genes encode proteins with sequence similarity to IL-18BPs. It has been shown that vaccinia, ectromelia and cowpox viruses secrete from infected cells a soluble IL-18BP (vIL-18BP) that may modulate the host antiviral response. The expression of vIL-18BPs by distinct poxvirus genera that cause local or general viral dissemination, or persistent or acute infections in the host, emphasises the importance of IL-18 in response to viral infections .. +PF05567 Neisseria PilC beta-propeller domain
Pfam-B_7966 (release 8.0). This family consists of several PilC protein sequences from Neisseria gonorrhoeae and N. meningitidis. PilC is a phase-variable protein associated with pilus-mediated adherence of pathogenic Neisseria to target cells .\. This domain has been shown to adopt a beta-propeller structure .. +PF05568 African swine fever virus J13L protein
Pfam-B_7998 (release 8.0). This family consists of several African swine fever virus J13L proteins.. +PF05569 BlaR1 peptidase M56
Production of beta-Lactamase and penicillin-binding protein 2a (which mediate staphylococcal resistance to beta-lactam antibiotics) is regulated by a signal-transducing integral membrane protein and a transcriptional repressor. The signal transducer is a fusion protein with penicillin-binding and zinc metalloprotease domains. The signal for protein expression is transmitted by site-specific proteolytic cleavage of both the transducer, which auto-activates, and the repressor, which is inactivated, unblocking gene transcription. Homologues to this peptidase domain, which corresponds to Merops family M56, are also found in a number of other bacterial genome sequences.. +PF05570 Circovirus protein of unknown function (DUF765)
Pfam-B_8063 (release 8.0). This family consists of several short (27-30aa) porcine and bovine circovirus ORF6 proteins of unknown function.. +PF05571 Protein of unknown function (DUF766)
Pfam-B_8021 (release 8.0). This family consists of several eukaryotic proteins of unknown function.. +PF05572 Peptidase_M46;
Pregnancy-associated plasma protein-A. Pregnancy-associated plasma protein A (PAPP-A) is a metallo-protease belonging to Merops family M43. It cleaves insulin-like growth factor (IGF) binding protein-4 (IGFBP-4), causing a dramatic reduction in its affinity for IGF-I and -II. Through this mechanism, PAPP-A is a regulator of IGF bioactivity in several systems, including the human ovary and the cardiovascular system.. +PF05573 NosL
Pfam-B_8116 (release 8.0). NosL is one of the accessory proteins of the nos (nitrous oxide reductase) gene cluster. NosL is a monomeric protein of 18,540 MW that specifically and stoichiometrically binds Cu(I). The copper ion in NosL is ligated by a Cys residue, and one Met and one His are thought to serve as the other ligands. It is possible that NosL is a copper chaperone involved in metallo-centre assembly .. +PF05575 Vibrio cholerae RfbT protein
Pfam-B_8029 (release 8.0). This family consists of several RfbT proteins from Vibrio cholerae. It has been found that genetic alteration of the rfbT gene is responsible for serotype conversion of Vibrio cholerae O1 and determines the difference between the Ogawa and Inaba serotypes, in that the presence of rfbT is sufficient for Inaba-to-Ogawa serotype conversion .. +PF05576 PS-10 peptidase S37
These serine proteases have been found in Streptomyces species.. +PF05577 Serine carboxypeptidase S28
These serine proteases include several eukaryotic enzymes such as lysosomal Pro-X carboxypeptidase, dipeptidyl-peptidase II, and thymus-specific serine peptidase.. +PF05578 Pestivirus NS3 polyprotein peptidase S31
These serine peptidases are involved in processing of the flavivirus polyprotein.. +PF05579 Equine arteritis virus serine endopeptidase S32
Serine peptidases involved in processing nidovirus polyprotein.. +PF05580 SpoIVB peptidase S55
The protein SpoIVB plays a key role in signalling in the final sigma-K checkpoint of Bacillus subtilis. . +PF05581 Peptidase_S38;
Vibrio chemotaxis protein N terminus. This domain is found at the N terminus of several methyl-accepting chemotaxis proteins from Vibrio species.. +PF05582 YabG peptidase U57
YabG is a protease involved in the proteolysis and maturation of SpoIVA and YrbA proteins, conserved with the cortex and/or coat assembly by Bacillus subtilis.. +PF05584 Sulfolobus plasmid regulatory protein
Pfam-B_8140 (release 8.0). This family consists of several plasmid regulatory proteins from the extreme thermophilic and acidophilic archaea Sulfolobus.. +PF05585 Peptidase_A16;
Putative peptidase (DUF1758). This is a family of nematode proteins of unknown function. However, it seems likely that these proteins act as aspartic peptidases.. +PF05586 Anthrax receptor C-terminus region
This region is found in the putatively cytoplasmic C-terminus of the anthrax receptor.. +PF05587 Anthrax receptor extracellular domain
This region is found in the putatively extracellular N-terminal half of the anthrax receptor. It is probably part of the Ig superfamily and most closely related to Pfam:PF01833 (personal obs: C Yeats).. +PF05588 botulinum_HA-17;
Clostridium botulinum HA-17 protein. Pfam-B_8286 (release 8.0). This family consists of several Clostridium botulinum hemagglutinin (HA) subcomponents. Clostridium botulinum type D strain 4947 produces two different sizes of progenitor toxins (M and L) as intact forms without proteolytic processing. The M toxin is composed of neurotoxin (NT) and nontoxic-nonhemagglutinin (NTNHA), whereas the L toxin is composed of the M toxin and hemagglutinin (HA) subcomponents (HA-70, HA-17, and HA-33) .. +PF05589 Protein of unknown function (DUF768)
Pfam-B_8463 (release 8.0). This family consists of several uncharacterised hypothetical proteins from Rhizobium loti.. +PF05590 Xylella fastidiosa protein of unknown function (DUF769)
Pfam-B_8396 (release 8.0). This family consists of several uncharacterised hypothetical proteins of unknown function from Xylella fastidiosa, the organism that causes Pierce's disease in plants.. +PF05591 Protein of unknown function (DUF770)
Pfam-B_8473 (release 8.0). This family consists of several proteins of unknown function from various bacterial species.. +PF05592 bac_rhamnosid;
Bacterial alpha-L-rhamnosidase. Pfam-B_8527 (release 8.0). This family consists of bacterial rhamnosidase A and B enzymes. L-Rhamnose is abundant in biomass as a common constituent of glycolipids and glycosides, such as plant pigments, pectic polysaccharides, gums or biosurfactants. Some rhamnosides are important bioactive compounds. For example, terpenyl glycosides, the glycosidic precursor of aromatic terpenoids, act as important flavouring substances in grapes. Other rhamnosides act as cytotoxic rhamnosylated terpenoids, as signal substances in plants or play a role in the antigenicity of pathogenic bacteria .. +PF05593 RHS Repeat
RHS proteins contain extended repeat regions. These repeats often appear to be involved in ligand binding (e.g. ). Note that this model may not find all the repeats in a protein and that it covers two RHS repeats.. +PF05594 haemagg_repeats;
Haemagluttinin repeat. This highly divergent repeat occurs in number of proteins implicated in cell aggregation . The Pfam alignment probably contains three such repeats (personal obs: C Yeats). These are likely to have a beta-helical structure.. +PF05595 Domain of unknown function (DUF771)
Pfam-B_7023 (release 8.0). Family of uncharacterised ORFs found in Bacteriophage and Lactococcus lactis. . +PF05596 Taeniidae antigen
Pfam-B_8569 (release 8.0). This family consists of several antigen proteins from Taenia and Echinococcus (tapeworm) species.. +PF05597 Poly(hydroxyalcanoate) granule associated protein (phasin)
Pfam-B_8339 (release 8.0). Polyhydroxyalkanoates (PHAs) are storage polyesters synthesised by various bacteria as intracellular carbon and energy reserve material. PHAs are accumulated as water-insoluble inclusions within the cells. This family consists of the phasins PhaF and PhaI which act as a transcriptional regulator of PHA biosynthesis genes. PhaF has been proposed to repress expression of the phaC1 gene and the phaIF operon .. +PF05598 Transposase domain (DUF772)
Pfam-B_8195 (release 8.0). This presumed domain is found at the N-terminus of many proteins found in transposons.. +PF05599 Deltaretrovirus Tax protein
Pfam-B_8606 (release 8.0). This family consists of Rex/Tax proteins from human and simian T-cell leukaemia viruses. The exact function of these proteins is unknown. Tax is the viral transactivator; is it a nuclear phosphoprotein that interacts with CREB, coactivator CBP/p300 and PCAF to form a multiprotein complex, which activates viral LTR and stimulates virus expression. Tax is also involved in deregulated expression of numerous cellular genes leading to T-cell leukaemia. Rex is a nucleolar post transcriptional regulator that facilitates export to the cytoplasm of viral RNA not or incompletely spliced [personal communication, Dr. S Nicot].. +PF05600 Protein of unknown function (DUF773)
Pfam-B_8595 (release 8.0). This family contains several eukaryotic sequences which are thought to be CDK5 activator-binding proteins, however, the function of this family is unknown.. +PF05602 Cleft lip and palate transmembrane protein 1 (CLPTM1)
Pfam-B_8636 (release 8.0). This family consists of several eukaryotic cleft lip and palate transmembrane protein 1 sequences. Cleft lip with or without cleft palate is a common birth defect that is genetically complex. The nonsyndromic forms have been studied genetically using linkage and candidate-gene association studies with only partial success in defining the loci responsible for orofacial clefting. CLPTM1 encodes a transmembrane protein and has strong homology to two Caenorhabditis elegans genes, suggesting that CLPTM1 may belong to a new gene family . This family also contains the human cisplatin resistance related protein CRR9p which is associated with CDDP-induced apoptosis .. +PF05603 Protein of unknown function (DUF775)
Pfam-B_8676 (release 8.0). This family consists of several eukaryotic proteins of unknown function.. +PF05604 Protein of unknown function (DUF776)
Pfam-B_8747 (release 8.0). This family consists of several highly related mouse and human proteins of unknown function.. +PF05605 Di19;
Drought induced 19 protein (Di19), zinc-binding. Pfam-B_8581 (release 8.0). This family consists of several drought induced 19 (Di19) like proteins. Di19 has been found to be strongly expressed in both the roots and leaves of Arabidopsis thaliana during progressive drought . This domain is a zinc-binding domain.. +PF05606 Borrelia burgdorferi protein of unknown function (DUF777)
Pfam-B_8755 (release 8.0). This family consists of several hypothetical proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete).. +PF05608 Protein of unknown function (DUF778)
Pfam-B_8777 (release 8.0). This family consists of several eukaryotic proteins of unknown function.. +PF05609 Lamina-associated polypeptide 1C (LAP1C)
Pfam-B_8782 (release 8.0). This family contains rat LAP1C proteins and several uncharacterised highly related sequences from both mice and humans. LAP1s (lamina-associated polypeptide 1s) are type 2 integral membrane proteins with a single membrane-spanning region of the inner nuclear membrane . LAP1s bind to both A- and B-type lamins and have a putative role in the membrane attachment and assembly of the nuclear lamina .. +PF05610 Protein of unknown function (DUF779)
Pfam-B_8830 (release 8.0). This family consists of several bacterial proteins of unknown function.. +PF05611 Caenorhabditis elegans protein of unknown function (DUF780)
Pfam-B_8886 (release 8.0). This family consists of several short C. elegans proteins of unknown function.. +PF05612 Mouse protein of unknown function (DUF781)
Pfam-B_8891 (release 8.0). This family consists of uncharacterised mouse proteins of unknown function.. +PF05613 Human herpesvirus U15 protein
Pfam-B_8900 (release 8.0). +PF05614 Circovirus protein of unknown function (DUF782)
Pfam-B_8909 (release 8.0). This family consists of porcine and bovine circovirus proteins of unknown function.. +PF05615 DUF783;
Tho complex subunit 7. Pfam-B_8919 (release 8.0). The Tho complex is involved in transcription elongation and mRNA export from the nucleus.. +PF05616 Neisseria meningitidis TspB protein
Pfam-B_8925 (release 8.0). This family consists of several Neisseria meningitidis TspB virulence factor proteins.. +PF05617 DUF784;
Pfam-B_8935 (release 8.0). Both DUF784 and DUF1278 members are found to be expressed in the plant embryo sac and are regulated by the Myb98 transcription factor. Computational analysis has revealed that they are homologous to the plant prolamin superfamily (Protease inhibitor-seed storage-LTP family, Pfam:PF00234) . In contrast to the typical prolamin members that have eight conserved Cys residues forming four pairs of disulfide bonds, both DUF784 and DUF1278 domains only contain six conserved Cys residues that may form three pairs of disulfide bonds. These two domains may have potential functions in lipid transfer or protection during plant embryo sac development and reproduction . This family has been merged with the DUF1278 family.. +PF05618 DUF785;
Putative ATP-dependant zinc protease. Pfam-B_8936 (release 8.0). Proteins in this family are annotated as being ATP-dependant zinc proteases.. +PF05619 Borrelia burgdorferi protein of unknown function (DUF787)
Pfam-B_9013 (release 8.0). This family consists of several hypothetical proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete). . +PF05620 Protein of unknown function (DUF788)
Pfam-B_9014 (release 8.0). This family consists of several eukaryotic proteins of unknown function.. +PF05621 Bacterial TniB protein
Pfam-B_9028 (release 8.0). This family consists of several bacterial TniB NTP-binding proteins. TniB is a probable ATP-binding protein which is involved in Tn5053 mercury resistance transposition .. +PF05622 HOOK protein
Pfam-B_8981 (release 8.0). This family consists of several HOOK1, 2 and 3 proteins from different eukaryotic organisms. The different members of the human gene family are HOOK1, HOOK2 and HOOK3. Different domains have been identified in the three human HOOK proteins, and it was demonstrated that the highly conserved NH2-domain mediates attachment to microtubules, whereas the central coiled-coil motif mediates homodimerisation and the more divergent C-terminal domains are involved in binding to specific organelles (organelle-binding domains). It has been demonstrated that endogenous HOOK3 binds to Golgi membranes , whereas both HOOK1 and HOOK2 are localised to discrete but unidentified cellular structures. In mice the Hook1 gene is predominantly expressed in the testis. Hook1 function is necessary for the correct positioning of microtubular structures within the haploid germ cell. Disruption of Hook1 function in mice causes abnormal sperm head shape and fragile attachment of the flagellum to the sperm head .. +PF05623 Protein of unknown function (DUF789)
Pfam-B_9113 (release 8.0). This family consists of several plant proteins of unknown function.. +PF05624 LISCH7;
Lipolysis stimulated receptor (LSR). Pfam-B_9152 (release 8.0). The lipolysis-stimulated receptor (LSR) is a lipoprotein receptor primarily expressed in the liver and activated by free fatty acids . It is thought to be involved in the clearance of triglyceride-rich lipoproteins, and has been shown in mice to be critical for liver and embryonic development .. +PF05625 PAXNEB protein
Moxon SJ, Mistry J, Wood V. Pfam-B_9269 (release 8.0). PAXNEB or PAX6 neighbour is found in several eukaryotic organisms. PAXNED is an RNA polymerase II Elongator protein subunit . It is part of the HAP subcomplex of Elongator, which is a six-subunit component of the RNA polymerase II holoenzyme. The HAP subcomplex is required for Elongator structural integrity and histone acetyltransferase activity . This protein family has a P-loop motif. However its sequence has degraded in many members of the family.. +PF05626 Protein of unknown function (DUF790)
Pfam-B_9309 (release 8.0). This family consists of several hypothetical archaeal proteins of unknown function.. +PF05627 NOI;
Cleavage site for pathogenic type III effector avirulence factor Avr. Pfam-B_9342 (release 8.0). This domain is conserved in small families of otherwise unrelated proteins in both mono-cots and di-cots, suggesting that it has a conserved, plant-specific function. It is found both in the plant RIN4 (resistance R membrane-bound host-target protein) where it appears to contribute to the binding of the protein to both RCS (AvrRpt2 auto-cleavage site) and AvrB, the virulence factor from the infecting bacterium . The cleavage site for the AvrRpt2 avirulence protein would appear to be the sequence motifs VPQFGDW and LPKFGEW, both of which are highly conserved within the domain .. +PF05628 Borrelia membrane protein P13
Pfam-B_8766 (release 8.0). This family consists of P13 proteins from Borrelia species. P13 is a 13kDa integral membrane protein which is post-translationally processed at both ends and modified by an unknown mechanism .. +PF05629 Nanovirus component 8 (C8) protein
Pfam-B_9354 (release 8.0). This family consists of a group of 17.4 kDa nanovirus proteins which are highly related to the faba bean necrotic yellows virus component 8 protein whose function is unknown .. +PF05630 Necrosis inducing protein (NPP1)
Pfam-B_9369 (release 8.0). This family consists of several NPP1 like necrosis inducing proteins from oomycetes, fungi and bacteria. Infiltration of NPP1 into leaves of Arabidopsis thaliana plants result in transcript accumulation of pathogenesis-related (PR) genes, production of ROS and ethylene, callose apposition, and HR-like cell death . . +PF05631 Protein of unknown function (DUF791)
Pfam-B_9328 (release 8.0). This family consists of several eukaryotic proteins of unknown function.. +PF05632 Borrelia burgdorferi protein of unknown function (DUF792)
Pfam-B_9387 (release 8.0). This family consists of several hypothetical proteins from the Lyme disease spirochete Borrelia burgdorferi.. +PF05633 Protein of unknown function (DUF793)
Pfam-B_9395 (release 8.0). This family consists of several plant proteins of unknown function.. +PF05634 DUF794;
Moxon SJ, Eberhardt R, Barkan A. Pfam-B_9606 (release 8.0). This domain contains conserved cysteine and histidine residues . It resembles zinc fingers, and binds to zinc . This domain functions as an RNA-binding domain .. +PF05635 Ribosomal_S23p;
23S rRNA-intervening sequence protein. Moxon SJ, Eberhardt R. Pfam-B_9648 (release 8.0). This family consists of bacterial proteins encoded within an intervening sequence present within some 23S rRNA genes [1-3]. It folds into an anti-parallel four-helix bundle and forms homopentamers .. +PF05636 DUF795;
HIGH Nucleotidyl Transferase. Moxon SJ, Anantharaman V. Pfam-B_9692 (release 8.0). This family consists of HIGH Nucleotidyl Transferases. +PF05637 galactosyl transferase GMA12/MNN10 family
Pfam-B_6000 (release 8.0). This family contains a number of glycosyltransferase enzymes that contain a DXD motif. This family includes a number of C. elegans homologues where the DXD is replaced by DXH. Some members of this family are included in glycosyltransferase family 34.. +PF05638 Protein of unknown function (DUF796)
Pfam-B_9698 (release 8.0). This family consists of several bacterial proteins of unknown function.. +PF05639 DUF797;
Pfam-B_9797 (release 8.0). This family consists of several short bacterial proteins formely known as (DUF797). It was recently shown that Mycobacterium tuberculosis contains a small protein, Pup (Rv2111c), that is covalently conjugated to the e-NH2 groups of lysines on several target proteins (pupylation) such as the malonyl CoA acyl carrier protein (FabD) . Pupylation of FabD was shown to result in its recruitment to the mycobacterial proteasome and subsequent degradation analogous to eukaryotic ubiquitin-conjugated proteins. Searches recovered Pup orthologs in all major actinobacteria lineages including the basal bifidobacteria and also sporadically in certain other bacterial lineages. The Pup proteins were all between 50-90 residues in length and a multiple alignment shows that they all contain a conserved motif with a G [EQ] signature at the C-terminus. Thus, all of them are suitable for conjugation via the terminal glutamate or the deamidated glutamine (as shown in the case of the Mycobacterium Pup ). The conserved globular core of Pup is predicted to form a bihelical unit with the extreme C-terminal 6-7 residues forming a tail in the extended conformation. Thus, Pup is structurally unrelated to the ubiquitin fold and has convergently evolved the function of protein modifier.. +PF05640 DUF798;
Na,K-Atpase Interacting protein. Pfam-B_9801 (release 8.0). NKAIN (Na,K-Atpase INteracting) proteins are a family of evolutionary conserved transmembrane proteins that localise to neurons, that are critical for neuronal function, and that interact with the beta subunits, beta1 in vertebrates and beta in Drosophila, of Na,K-ATPase. NKAINs have highly conserved trans-membrane domains but otherwise no other characterised domains. NKAINs may function as subunits of pore or channel structures in neurons or they may affect the function of other membrane proteins. They are likely to function within the membrane bilayer .. +PF05641 Agenet domain
Pfam-B_2551 (release 8.0). This domain is related to the TUDOR domain Pfam:PF00567 . The function of the agenet domain is unknown. This family currently only matches one of the two Agenet domains in the FMR proteins .. +PF05642 Sporozoite P67 surface antigen
Pfam-B_8657 (release 8.0). This family consists of several Theileria P67 surface antigens. A stage specific surface antigen of Theileria parva, p67, is the basis for the development of an anti-sporozoite vaccine for the control of East Coast fever (ECF) in cattle. The antigen has been shown to contain five distinct linear peptide sequences recognised by sporozoite-neutralising murine monoclonal antibodies .. +PF05643 Putative bacterial lipoprotein (DUF799)
Pfam-B_9829 (release 8.0). This family consists of several bacterial proteins of unknown function. Some of the family members are described as putative lipoproteins.. +PF05644 DUF800;
Mitochondrial and peroxisomal fission factor Mff. Moxon SJ, Eberhardt R. Pfam-B_9868 (release 8.0). This protein has a role in mitochondrial and peroxisomal fission .. +PF05645 RNA polymerase III subunit RPC82
Pfam-B_9884 (release 8.0). This family consists of several DNA-directed RNA polymerase III polypeptides which are related to the Saccharomyces cerevisiae RPC82 protein. RNA polymerase C (III) promotes the transcription of tRNA and 5S RNA genes. In Saccharomyces cerevisiae, the enzyme is composed of 15 subunits, ranging from 160 to about 10 kDa .. +PF05647 DUF801;
Tandem-repeating region of mucin, epiglycanin-like. Pfam-B_1480 (release 8.0) Pfam-B_13922 (release 26.0). The unusual mucin, epiglycanin, is membrane-bound at the C-terminus but has a long region of this tandem-repeat at the N-terminus . It was the first mucin identified to be associated with the malignant behaviour of carcinoma cells . Mouse Muc21/epiglycanin is thought to be a highly glycosylated molecule, which makes it likely that its function is dependent on its glycoforms. Cells expressing Muc21 are significantly less adherent to each other and to extracellular matrix components than control cells, and this loss of adhesion is mediated by the TR portion of Muc21 . This family also now contains the repeat that was the C. elegans protein of unknown function (DUF801).. +PF05648 Peroxisomal biogenesis factor 11 (PEX11)
Pfam-B_2629 (release 8.0). This family consists of several peroxisomal biogenesis factor 11 (PEX11) proteins from several eukaryotic species. The PEX11 peroxisomal membrane proteins promote peroxisome division in multiple eukaryotes.. +PF05649 Peptidase family M13
M13 peptidases are well-studied proteases found in a wide range of organisms including mammals and bacteria. In mammals they participate in processes such as cardiovascular development, blood-pressure regulation, nervous control of respiration, and regulation of the function of neuropeptides in the central nervous system. In bacteria they may be used for digestion of milk. . +PF05650 Domain of unknown function (DUF802)
This region is found as two or more repeats in a small number of hypothetical proteins.. +PF05651 Putative sugar diacid recognition
This region is found in several proteins characterised as carbohydrate diacid regulators (e.g. Swiss:P36047). An HTH DNA-binding motif is found at the C-terminus of these proteins suggesting that this region includes the sugar recognition region.. +PF05652 Scavenger mRNA decapping enzyme (DcpS) N-terminal
Pfam-B_9894 (release 8.0). This family consists of several scavenger mRNA decapping enzymes (DcpS) and is the N-terminal domain of these proteins. DcpS is a scavenger pyrophosphatase that hydrolyses the residual cap structure following 3' to 5' decay of an mRNA. The association of DcpS with 3' to 5' exonuclease exosome components suggests that these two activities are linked and there is a coupled exonucleolytic decay-dependent decapping pathway.. +PF05653 DUF803;
Magnesium transporter NIPA. Moxon SJ, Eberhardt R. Pfam-B_9876 (release 8.0). NIPA (nonimprinted in Prader-Willi/Angelman syndrome) is a family of integral membrane proteins which function as magnesium transporters [1,2].. +PF05655 Pseudomon_AvrD;
Pseudomonas avirulence D protein (AvrD). Pfam-B_9946 (release 8.0). This family consists of several avirulence D (AvrD) proteins primarily found in Pseudomonas syringae [1,2].. +PF05656 Protein of unknown function (DUF805)
Pfam-B_2800 (release 8.0). This family consists of several bacterial proteins of unknown function.. +PF05657 Protein of unknown function (DUF806)
Pfam-B_7291 (release 8.0). This family consists of several Siphovirus and Lactococcus proteins of unknown function. The viral sequences are thought to be tail component proteins.. +PF05658 Hep_Hag;
Head domain of trimeric autotransporter adhesin. This seven residue repeat makes up the majority sequence of a family of bacterial haemagglutinins and invasins. The representative alignment contains four repeats.. +PF05659 Arabidopsis broad-spectrum mildew resistance protein RPW8
Pfam-B_7373 (release 8.0). This family consists of several broad-spectrum mildew resistance proteins from Arabidopsis thaliana. Plant disease resistance (R) genes control the recognition of specific pathogens and activate subsequent defence responses. The Arabidopsis thaliana locus Resistance To Powdery Mildew 8 (RPW8) contains two naturally polymorphic, dominant R genes, RPW8.1 and RPW8.2, which individually control resistance to a broad range of powdery mildew pathogens. They induce localised, salicylic acid-dependent defences similar to those induced by R genes that control specific resistance. Apparently, broad-spectrum resistance mediated by RPW8 uses the same mechanisms as specific resistance [1,2].. +PF05660 Coxiella burnetii protein of unknown function (DUF807)
Pfam-B_7114 (release 8.0). This family consists of several proteins of unknown function from Coxiella burnetii (the causative agent of a zoonotic disease called Q fever).. +PF05661 Protein of unknown function (DUF808)
Pfam-B_7112 (release 8.0). This family consists of several bacterial proteins of unknown function.. +PF05662 HIM;
Coiled stalk of trimeric autotransporter adhesin. This short motif is found in invasins and haemagglutinins, normally associated with (Pfam:PF05658).. +PF05663 Protein of unknown function (DUF809)
Pfam-B_7264 (release 8.0). This family consists of several proteins of unknown function Raphanus sativus (Radish) and Brassica napus (Rape).. +PF05664 Protein of unknown function (DUF810)
Pfam-B_5709 (release 8.0). This family consists of several plant proteins of unknown function.. +PF05666 Fels-1 Prophage Protein-like
+PF05667 Protein of unknown function (DUF812)
Pfam-B_7417 (release 8.0). This family consists of several eukaryotic proteins of unknown function.. +PF05669 SOH1;
Pfam-B_7443 (release 8.0). The family consists of Saccharomyces cerevisiae SOH1 homologues. SOH1 is responsible for the repression of temperature sensitive growth of the HPR1 mutant and has been found to be a component of the RNA polymerase II transcription complex. SOH1 not only interacts with factors involved in DNA repair, but transcription as well. Thus, the SOH1 protein may serve to couple these two processes . . +PF05670 Domain of unknown function (DUF814)
Pfam-B_738 (Release 8.0). This domain occurs in proteins that have been annotated as Fibronectin/fibrinogen binding protein by similarity. This annotation comes from Swiss:O34693 where the N-terminal region is involved in this activity . Hence the activity of this C-terminal domain is unknown. This domain contains a conserved motif D/E-X-W/Y-X-H that may be functionally important.. +PF05671 GETHR pentapeptide repeat (5 copies)
Pfam-B_8059 (release 8.0). +PF05672 E-MAP-115;
MAP7 (E-MAP-115) family. Pfam-B_8157 (release 8.0). The organisation of microtubules varies with the cell type and is presumably controlled by tissue-specific microtubule-associated proteins (MAPs). The 115-kDa epithelial MAP (E-MAP-115/MAP7) has been identified as a microtubule-stabilising protein predominantly expressed in cell lines of epithelial origin . The binding of this microtubule associated protein is nucleotide independent .. +PF05673 Protein of unknown function (DUF815)
Pfam-B_6403 (release 8.0). This family consists of several bacterial proteins of unknown function.. +PF05674 Baculovirus protein of unknown function (DUF816)
Pfam-B_7178 (release 8.0). This family includes proteins that are about 200 amino acids in length. The proteins are all from baculoviruses. This family includes ORF107 from Orgyia pseudotsugata multicapsid polyhedrosis virus (OpMNPV) and a variety of other numbered ORF proteins, such as ORF52 Swiss:Q91F03, ORF140 Swiss:Q9YMI8. The function of these proteins is unknown.. +PF05675 Protein of unknown function (DUF817)
Pfam-B_7331 (release 8.0). This family consists of several bacterial proteins of unknown function.. +PF05676 NDUFB7;
NADH-ubiquinone oxidoreductase B18 subunit (NDUFB7). Pfam-B_7077 (release 8.0). This family consists of several NADH-ubiquinone oxidoreductase B18 subunit proteins from different eukaryotic organisms. Oxidative phosphorylation is the well-characterised process in which ATP, the principal carrier of chemical energy of individual cells, is produced due to a mitochondrial proton gradient formed by the transfer of electrons from NADH and FADH2 to molecular oxygen. The oxidative phosphorylation (OXPHOS) system is located in the mitochondrial inner membrane and consists of five multi-subunit enzyme complexes and two small electron carriers: coenzyme Q10 and cytochrome C. At least 70 structural proteins involved in the formation of the whole OXPHOS system are encoded by nuclear genes, whereas 13 structural proteins are encoded by the mitochondrial genome. Deficiency of NADH ubiquinone oxidoreductase, the first enzyme complex of the mitochondrial respiratory chain, is one of the most frequent causes of human mitochondrial encephalomyopathies .. +PF05677 Chlamydia CHLPS protein (DUF818)
Pfam-B_7510 (release 8.0). This family consists of several Chlamydia CHLPS proteins, the function of which are unknown.. +PF05678 VQ motif
Pfam-B_7960 (release 8.0). +PF05679 Chondroitin N-acetylgalactosaminyltransferase
Pfam-B_8249 (release 8.0). +PF05680 ATP synthase E chain
Pfam-B_6116 (release 8.0). This family consists of several ATP synthase E chain sequences which are components of the CF(0) subunit .. +PF05681 Fumarate hydratase (Fumerase)
Pfam-B_2085 (release 8.0). This family consists of several bacterial fumarate hydratase proteins FumA and FumB. Fumarase, or fumarate hydratase (EC 4.2.1.2), is a component of the citric acid cycle. In facultative anaerobes such as Escherichia coli, fumarase also engages in the reductive pathway from oxaloacetate to succinate during anaerobic growth. Three fumarases, FumA, FumB, and FumC, have been reported in E. coli. fumA and fumB genes are homologous and encode products of identical sizes which form thermolabile dimers of Mr 120,000. FumA and FumB are class I enzymes and are members of the iron-dependent hydrolases, which include aconitase and malate hydratase. The active FumA contains a 4Fe-4S centre, and it can be inactivated upon oxidation to give a 3Fe-4S centre .. +PF05683 Fumarase C-terminus
Pfam-B_2085 (release 8.0). This family consists of the C terminal region of several bacterial fumarate hydratase proteins (FumA and FumB). Fumarase, or fumarate hydratase (EC 4.2.1.2), is a component of the citric acid cycle. In facultative anaerobes such as Escherichia coli, fumarase also engages in the reductive pathway from oxaloacetate to succinate during anaerobic growth .. +PF05684 Protein of unknown function (DUF819)
Pfam-B_9034 (release 8.0). This family contains proteins of unknown function from archaeal, bacterial and plant species.. +PF05685 DUF820;
Putative restriction endonuclease. Pfam-B_7809 (release 8.0) & Pfam-B_8730 (release 14.0). This family consists of hypothetical proteins that are greatly expanded in cyanobacteria. The proteins are found sporadically in other bacteria. A small number of member proteins also contain Pfam:PF02861 domains that are involved in protein interactions. Solutions of several structures for members of this family show that it is likely to be acting as an endonuclease.. +PF05686 DUF821;
Glycosyl transferase family 90. Pfam-B_6682 (Release 8.0) & Pfam-B_7101 (Release 8.0). This family of glycosyl transferases are specifically (mannosyl) glucuronoxylomannan/galactoxylomannan -beta 1,2-xylosyltransferases, EC:2.4.2.-.. +PF05687 DUF822; Peptidase_M15_2;
Plant protein of unknown function (DUF822). Pfam-B_7149 (release 8.0). This family consists of the N terminal regions of several plant proteins of unknown function.. +PF05688 Salmonella repeat of unknown function (DUF824)
Pfam-B_2973 (release 8.0). This family consists of several repeated sequences of around 45 residues.. +PF05689 Salmonella repeat of unknown function (DUF823)
Pfam-B_2973 (release 8.0). This family consists of a series of repeated sequences (of around 180 residues) which are found in Salmonella typhimurium and Salmonella typhi. Sequences from this family are almost always found with Pfam:PF05688.. +PF05690 Thiazole biosynthesis protein ThiG
Pfam-B_1138 (release 8.0). This family consists of several bacterial thiazole biosynthesis protein G sequences. ThiG , together with ThiF and ThiH, is proposed to be involved in the synthesis of 4-methyl-5-(b-hydroxyethyl)thiazole (THZ) which is an intermediate in the thiazole production pathway .. +PF05691 Raffinose synthase or seed imbibition protein Sip1
Moxon SJ, Eberhardt R. Pfam-B_3204 (release 8.0). This family consists of several raffinose synthase proteins, also known as seed imbibition (Sip1) proteins. Raffinose (O-alpha- D-galactopyranosyl- (1-->6)- O-alpha- D-glucopyranosyl-(1<-->2)- O-beta- D-fructofuranoside) is a widespread oligosaccharide in plant seeds and other tissues. Raffinose synthase (EC:2.4.1.82) is the key enzyme that channels sucrose into the raffinose oligosaccharide pathway . Raffinose family oligosaccharides (RFOs) are ubiquitous in plant seeds and are thought to play critical roles in the acquisition of tolerance to desiccation and seed longevity. Raffinose synthases are alkaline alpha-galactosidases and are solely responsible for RFO breakdown in germinating maize seeds, whereas acidic galactosidases appear to have other functions .\. Glycoside hydrolase family 36 can be split into 11 families, GH36A to GH36K . This family includes enzymes from GH36C.. +PF05692 Mycoplasma haemagglutinin
Pfam-B_3547 (release 8.0). This family consists of several haemagglutinin sequences from Mycoplasma synoviae and Mycoplasma gallisepticum. The major plasma membrane proteins, pMGAs, of Mycoplasma gallisepticum are cell adhesin (hemagglutinin) molecules. It has been shown that the genetic determinants that code for the haemagglutinins are organised into a large family of genes and that only one of these genes is predominately expressed in any given strain [1,2,3].. +PF05693 Glycogen synthase
Pfam-B_2874 (release 8.0). This family consists of the eukaryotic glycogen synthase proteins GYS1, GYS2 and GYS3 [1,2]. Glycogen synthase (GS) is the enzyme responsible for the synthesis of -1,4-linked glucose chains in glycogen. It is the rate limiting enzyme in the synthesis of the polysaccharide, and its activity is highly regulated through phosphorylation at multiple sites and also by allosteric effectors, mainly glucose 6-phosphate (G6P) .. +PF05694 56kDa selenium binding protein (SBP56)
Pfam-B_2816 (release 8.0). This family consists of several eukaryotic selenium binding proteins as well as three sequences from archaea. The exact function of this protein is unknown although it is thought that SBP56 participates in late stages of intra-Golgi protein transport . The Lotus japonicus homologue of SBP56, LjSBP is thought to have more than one physiological role and can be implicated in controlling the oxidation/reduction status of target proteins, in vesicular Golgi transport .. +PF05695 Plant protein of unknown function (DUF825)
Pfam-B_8370 (release 8.0). This family consists of several plant proteins greater than 1000 residues in length. The function of this family is unknown.. +PF05696 Protein of unknown function (DUF826)
Pfam-B_7303 (release 8.0). This family consists of several enterobacterial and siphoviral sequences of unknown function.. +PF05697 Trigger;
Bacterial trigger factor protein (TF). Pfam-B_8447 (release 8.0). In the E. coli cytosol, a fraction of the newly synthesised proteins requires the activity of molecular chaperones for folding to the native state. The major chaperones implicated in this folding process are the ribosome-associated Trigger Factor (TF), and the DnaK and GroEL chaperones with their respective co-chaperones. Trigger Factor is an ATP-independent chaperone and displays chaperone and peptidyl-prolyl-cis-trans-isomerase (PPIase) activities in vitro. It is composed of at least three domains, an N-terminal domain which mediates association with the large ribosomal subunit, a central substrate binding and PPIase domain with homology to FKBP proteins, and a C-terminal domain of unknown function. The positioning of TF at the peptide exit channel, together with its ability to interact with nascent chains as short as 57 residues renders TF a prime candidate for being the first chaperone that binds to the nascent polypeptide chains . This family represents the N-terminal region of the protein.. +PF05698 Bacterial trigger factor protein (TF) C-terminus
Pfam-B_8447 (release 8.0). In the E. coli cytosol, a fraction of the newly synthesised proteins requires the activity of molecular chaperones for folding to the native state. The major chaperones implicated in this folding process are the ribosome-associated Trigger Factor (TF), and the DnaK and GroEL chaperones with their respective co-chaperones. Trigger Factor is an ATP-independent chaperone and displays chaperone and peptidyl-prolyl-cis-trans-isomerase (PPIase) activities in vitro. It is composed of at least three domains, an N-terminal domain which mediates association with the large ribosomal subunit, a central substrate binding and PPIase domain with homology to FKBP proteins, and a C-terminal domain of unknown function. The positioning of TF at the peptide exit channel, together with its ability to interact with nascent chains as short as 57 residues renders TF a prime candidate for being the first chaperone that binds to the nascent polypeptide chains . This family represents the C-terminal region of the protein.. +PF05699 hATC;
hAT family C-terminal dimerisation region. This dimerisation region is found at the C terminus of the transposases of elements belonging to the Activator superfamily (hAT element superfamily). The isolated dimerisation region forms extremely stable dimers in vitro .. +PF05700 Breast carcinoma amplified sequence 2 (BCAS2)
Pfam-B_7922 (release 8.0). This family consists of several eukaryotic sequences of unknown function. The mammalian members of this family are annotated as breast carcinoma amplified sequence 2 (BCAS2) proteins . BCAS2 is a putative spliceosome associated protein .. +PF05701 DUF827;
Weak chloroplast movement under blue light. Pfam-B_6516 (release 8.0). WEMBL consists of several plant proteins required for the chloroplast avoidance response under high intensity blue light. This avoidance response consists in the relocation of chloroplasts on the anticlinal side of exposed cells. Acts in association with PMI2 to maintain the velocity of chloroplast photo-relocation movement via the regulation of cp-actin filaments . Thus several member-sequences are described as "myosin heavy chain-like".. +PF05702 Herpesvirus UL49.5 envelope/tegument protein
Pfam-B_7354 (release 8.0). UL49.5 protein consists of 98 amino acids with a calculated molecular mass of 10,155 Da. It contains putative signal peptide and transmembrane domains but lacks a consensus sequence for N glycosylation. UL49.5 protein is an O-glycosylated structural component of the viral envelope .. +PF05703 DUF828;
Moxon SJ, Eberhardt R. Pfam-B_7298 (release 8.0). This domain is frequently found at the N-terminus of proteins containing Pfam:PF08458 at the C-terminus. It is a component of the auto-regulatory loop which enables auxin canalisation by recruitment of the PIN1 auxin efflux protein to the cell membrane .. +PF05704 Capsular polysaccharide synthesis protein
Pfam-B_7575 (release 8.0). This family consists of several capsular polysaccharide proteins. Capsular polysaccharide (CPS) is a major virulence factor in Streptococcus pneumoniae . . +PF05705 Eukaryotic protein of unknown function (DUF829)
Pfam-B_7638 (release 8.0). This family consists of several uncharacterised eukaryotic proteins.. +PF05706 Cyclin-dependent kinase inhibitor 3 (CDKN3)
Pfam-B_5217 (release 8.0). This family consists of cyclin-dependent kinase inhibitor 3 or kinase associated phosphatase proteins from several mammalian species. The cyclin-dependent kinase (Cdk)-associated protein phosphatase (KAP) is a human dual specificity protein phosphatase that dephosphorylates Cdk2 on threonine 160 in a cyclin-dependent manner [1,2].. +PF05707 Zonular occludens toxin (Zot)
Pfam-B_3320 (release 8.0). This family consists of bacterial and viral proteins which are very similar to the Zonular occludens toxin (Zot). Zot is elaborated by bacteriophages present in toxigenic strains of Vibrio cholerae. Zot is a single polypeptide chain of 44.8 kDa, with the ability to reversibly alter intestinal epithelial tight junctions, allowing the passage of macromolecules through mucosal barriers [1,2]. +PF05708 Orthopoxvirus protein of unknown function (DUF830)
Pfam-B_5425 (release 8.0). This family consists of several Orthopoxvirus proteins of unknown function.. +PF05709 Phage tail protein
Pfam-B_5084 (release 8.0) & Pfam-B_10063 (release 10.0). This family consists of several Siphovirus and other phage tail component proteins as well as some bacterial proteins of unknown function.. +PF05710 Coiled coil
This region is found in a group of Dictyostelium discoideum proteins. It is likely to form a coiled-coil. Some of the proteins are regulated by cyclic AMP and are expressed late in development ( ).. +PF05711 Macrocin-O-methyltransferase (TylF)
Pfam-B_5055 (release 8.0). This family consists of bacterial macrocin O-methyltransferase (TylF) proteins. TylF is responsible for the methylation of macrocin to produce tylosin. Tylosin is a macrolide antibiotic used in veterinary medicine to treat infections caused by Gram-positive bacteria and as an animal growth promoter in the swine industry. It is produced by several Streptomyces species. As with other macrolides, the antibiotic activity of tylosin is due to the inhibition of protein biosynthesis by a mechanism that involves the binding of tylosin to the ribosome, preventing the formation of the mRNA-aminoacyl-tRNA-ribosome complex . The structure of one representative sequence from this family, NovP, shows it to be an S-adenosyl-l-methionine-dependent O-methyltransferase that catalyses the penultimate step in the biosynthesis of the aminocoumarin antibiotic novobiocin. Specifically, it methylates at 4-OH of the noviose moiety, and the resultant methoxy group is important for the potency of the mature antibiotic. It is likely that the key structural features of NovP are common to the rest of the family and include: a helical 'lid' region that gates access to the co-substrate binding pocket and an active centre that contains a 3-Asp putative metal binding site. A further conserved Asp probably acts as the general base that initiates the reaction by de-protonating the 4-OH group of the noviose unit .. +PF05712 MRG
Moxon SJ, Mistry J, Wood V. Pfam-B_5530 (release 8.0). This family consists of three different eukaryotic proteins (mortality factor 4 (MORF4/MRG15), male-specific lethal 3(MSL-3) and ESA1-associated factor 3(EAF3)). It is thought that the MRG family is involved in transcriptional regulation via histone acetylation . It contains 2 chromo domains and a leucine zipper motif .. +PF05713 Bacterial mobilisation protein (MobC)
Pfam-B_2832 (release 8.0). This family consists of several bacterial MobC-like, mobilisation proteins. MobC proteins belong to the group of relaxases. Together with MobA and MobB they bind to a single cis-active site of a mobilising plasmid, the origin of transfer (oriT) region . The absence of MobC has several different effects on oriT DNA. Site- and strand-specific nicking by MobA protein is severely reduced, accounting for the lower frequency of mobilisation. The localised DNA strand separation required for this nicking is less affected, but becomes more sensitive to the level of active DNA gyrase in the cell. In addition, strand separation is not efficiently extended through the region containing the nick site. These effects suggest a model in which MobC acts as a molecular wedge for the relaxosome-induced melting of oriT DNA. The effect of MobC on strand separation may be partially complemented by the helical distortion induced by supercoiling. However, MobC extends the melted region through the nick site, thus providing the single-stranded substrate required for cleavage by MobA . . +PF05714 Borrelia_lipo;
Borrelia burgdorferi virulent strain associated lipoprotein. Pfam-B_7866 (release 8.0). This family consists of several virulent strain associated lipoproteins from the Lyme disease spirochete Borrelia burgdorferi.. +PF05715 Zf_piccolo;
This (predicted) Zinc finger is found in the bassoon and piccolo proteins (e.g. Swiss:Q9JKS6). There are eight conserved cysteines, suggesting that it coordinates two zinc ligands.. +PF05716 A-kinase anchor protein 110 kDa (AKAP 110)
Pfam-B_5702 (release 8.0). This family consists of several mammalian protein kinase A anchoring protein 3 (PRKA3) or A-kinase anchor protein 110 kDa (AKAP 110) sequences. Agents that increase intracellular cAMP are potent stimulators of sperm motility. Anchoring inhibitor peptides, designed to disrupt the interaction of the cAMP-dependent protein kinase A (PKA) with A kinase-anchoring proteins (AKAPs), are potent inhibitors of sperm motility. PKA anchoring is a key biochemical mechanism controlling motility. AKAP110 shares compartments with both RI and RII isoforms of PKA and may function as a regulator of both motility- and head-associated functions such as capacitation and the acrosome reaction .. +PF05717 Transposase_34;
IS66 Orf2 like protein. Pfam-B_5707 (release 8.0). This protein is found in insertion sequences related to IS66. The function of these proteins is uncertain, but they are probably essential for transposition .. +PF05718 Poxvirus intermediate transcription factor
Pfam-B_5843 (release 8.0). This family consists of several highly related Poxvirus sequences which are thought to be intermediate transcription factors.. +PF05719 Golgi phosphoprotein 3 (GPP34)
Pfam-B_7957 (release 8.0). This family consists of several eukaryotic GPP34 like proteins. GPP34 localises to the Golgi complex and is conserved from yeast to humans. The cytosolic-ally exposed location of GPP34 predict a role for a novel coat protein in Golgi trafficking .. +PF05720 Cell-cell adhesion domain
This family is based on a group of Dictyostelium discoideum proteins that are essential in early development ( ). Swiss:P16642 and Swiss:P16643 are located on the cell surface and mediate cell-cell adhesion.. +PF05721 Phytanoyl-CoA dioxygenase (PhyH)
Pfam-B_5670 (release 8.0). This family is made up of several eukaryotic phytanoyl-CoA dioxygenase (PhyH) proteins, ectoine hydroxylases and a number of bacterial deoxygenases. PhyH is a peroxisomal enzyme catalysing the first step of phytanic acid alpha-oxidation. PhyH deficiency causes Refsum's disease (RD) which is an inherited neurological syndrome biochemically characterised by the accumulation of phytanic acid in plasma and tissues .. +PF05722 Ustilago B locus mating-type protein
Pfam-B_5804 (release 8.0). This family consists of several Ustilago mating-type proteins. The b locus of the phytopathogenic fungus Ustilago maydis encodes a multiallelic recognition function that controls the ability of the fungus to form a dikaryon and complete the sexual stage of the life cycle. The b locus has at least 25 alleles and any combination of two different alleles, brought together by mating between haploid cells, allows the fungus to cause disease and undergo sexual development within the plant . . +PF05724 Thiopurine S-methyltransferase (TPMT)
Pfam-B_5821 (release 8.0). This family consists of thiopurine S-methyltransferase proteins from both eukaryotes and prokaryotes. Thiopurine S-methyltransferase (TPMT) is a cytosolic enzyme that catalyses S-methylation of aromatic and heterocyclic sulfhydryl compounds, including anticancer and immunosuppressive thiopurines . . +PF05725 FNIP Repeat
This repeat is approximately 22 residues long and is only found in Dictyostelium discoideum. It appears to be related to Pfam:PF00560 (personal obs:C Yeats). The alignment consists of two tandem repeats. It is termed the FNIP repeat after the pattern of conserved residues.. +PF05726 Pirin C-terminal cupin domain
This region is found the C-terminal half of the Pirin protein.. +PF05727 Uncharacterised protein family (UPF0228)
+PF05728 Uncharacterised protein family (UPF0227)
Despite being classed as uncharacterised proteins, the members of this family are almost certainly enzymes that are distantly related to the Pfam:PF00561.. +PF05729 NACHT domain
This NTPase domain is found in apoptosis proteins as well as those involved in MHC transcription activation . This family is closely related to Pfam:PF00931.. +PF05730 CFEM domain
This fungal specific cysteine rich domain is found in some proteins with proposed roles in fungal pathogenesis .. +PF05731 TROVE domain
This presumed domain is found in TEP1 and Ro60 proteins, that are RNA-binding components of Telomerase, Ro and Vault RNPs. This domain has been named TROVE, (after Telomerase, Ro and Vault). This domain is probably RNA-binding. . +PF05732 Firmicute plasmid replication protein (RepL)
Pfam-B_5929 (release 8.0). This family consists of Firmicute RepL proteins which are involved in plasmid replication.. +PF05733 Tenuivirus_N;
Tenuivirus/Phlebovirus nucleocapsid protein. Pfam-B_5998 (release 8.0) & Pfam-B_19756 (release 10.0). This family consists of several Tenuivirus and Phlebovirus nucleocapsid proteins [1,2]. These are ssRNA viruses.. +PF05734 Herpesvirus protein of unknown function (DUF832)
Pfam-B_7683 (release 8.0). This family consists of several herpesvirus proteins of unknown function.. +PF05735 TSPC;
Thrombospondin C-terminal region. Pfam-B_1875 (release 8.0). This region is found at the C-terminus of thrombospondin and related proteins.. +PF05736 OprF_membrane;
OprF membrane domain. Pfam-B_4079 (release 8.0). This domain represents the presumed membrane spanning region of the OprF proteins. This region is involved in channel formation and is thought to form an 8-stranded beta-barrel .. +PF05737 Collagen binding domain
Pfam-B_5000 (release 8.0). The domain fold is a jelly-roll, composed of two antiparallel beta-sheets and two short alpha-helices . A groove on beta-sheet I exhibited the best surface complementarity to the collagen. This site partially overlaps with the peptide sequence previously shown to be critical for collagen binding. Recombinant proteins containing single amino acid mutations designed to disrupt the surface of the putative binding site exhibited significantly lower affinities for collagen.. +PF05738 Cna protein B-type domain
Pfam-B_366 (release 8.0). This domain is found in Staphylococcus aureus collagen-binding surface protein. However, this region does not mediate collagen binding, the Pfam:PF05737 region carries out that function. The structure of the repetitive B-region has been solved and forms a beta sandwich structure. It is thought that this region forms a stalk in Staphylococcus aureus collagen-binding protein that presents the ligand binding domain away from the bacterial cell surface.. +PF05739 SNARE domain
Pfam-B_6285 (release 8.0). Most if not all vesicular membrane fusion events in eukaryotic cells are believed to be mediated by a conserved fusion machinery, the SNARE [soluble N-ethylmaleimide-sensitive factor (NSF) attachment protein (SNAP) receptors] machinery. The SNARE domain is thought to act as a protein-protein interaction module in the assembly of a SNARE protein complex .. +PF05741 Nanos;
Nanos RNA binding domain. Pfam-B_5908 (release 8.0). This family consists of several conserved novel zinc finger domains found in the eukaryotic proteins Nanos and Xcat-2. In Drosophila melanogaster, Nanos functions as a localised determinant of posterior pattern. Nanos RNA is localised to the posterior pole of the maturing egg cell and encodes a protein that emanates from this localised source. Nanos acts as a translational repressor and thereby establishes a gradient of the morphogen Hunchback . Xcat-2 is found in the vegetal cortical region and is inherited by the vegetal blasomeres during development, and is degraded very early in development. The localised and maternally restricted expression of Xcat-2 RNA suggests a role for its protein in setting up regional differences in gene expression that occur early in development . . +PF05742 DUF833;
Moxon SJ, Eberhardt R. Pfam-B_6481 (release 8.0). In eukaryotes this family is predicted to play a role in protein secretion and Golgi organisation . In plants this family includes Swiss:A9X6Y0, which is involved in water permeability in the cuticles of fruit . Swiss:P54797 has been found to be expressed during early embryogenesis in mice . This protein contains a conserved NRDE motif.. +PF05743 Tsg101;
Pfam-B_6022 (release 8.0). This family includes the eukaryotic tumour susceptibility gene 101 protein (TSG101). Altered transcripts of this gene have been detected in sporadic breast cancers and many other human malignancies. However, the involvement of this gene in neoplastic transformation and tumorigenesis is still elusive. TSG101 is required for normal cell function of embryonic and adult tissues but that this gene is not a tumour suppressor for sporadic forms of breast cancer . This family is related to the ubiquitin conjugating enzymes.. +PF05744 Benyvirus P25/P26 protein
Pfam-B_6153 (release 8.0). This family consists of P25 and P26 proteins from the beet necrotic yellow vein viruses.. +PF05745 Chlamydia 15 kDa cysteine-rich outer membrane protein (CRPA)
Pfam-B_6389 (release 8.0). This family consists of several Chlamydia 15 kDa cysteine-rich outer membrane proteins which are associated with differentiation of reticulate bodies (RBs) into elementary bodies (EBs) .. +PF05746 tRNA-synt_1d_C;
DALR anticodon binding domain. Pfam-B_196 (release 8.0). This all alpha helical domain is the anticodon binding domain in Arginyl and glycyl tRNA synthetase. This domain is known as the DALR domain after characteristic conserved amino acids .. +PF05748 Rubella membrane glycoprotein E1
Pfam-B_6726 (release 8.0). Rubella virus (RV), the sole member of the genus Rubivirus within the family Togaviridae, is a small enveloped, positive strand RNA virus. The nucleocapsid consists of 40S genomic RNA and a single species of capsid protein which is enveloped within a host-derived lipid bilayer containing two viral glycoproteins, E1 (58 kDa) and E2 (42-46 kDa). In virus infected cells, RV matures by budding either at the plasma membrane, or at the internal membranes depending on the cell type and enters adjacent uninfected cells by a membrane fusion process in the endosome, directed by E1-E2 heterodimers. The heterodimer formation is crucial for E1 transport out of the endoplasmic reticulum to the Golgi and plasma membrane. In RV E1, a cysteine at position 82 is crucial for the E1-E2 heterodimer formation and cell surface expression of the two proteins. The E1 has been shown to be a type 1 membrane protein, rich in cysteine residues with extensive intramolecular disulfide bonds .. +PF05749 Rubella membrane glycoprotein E2
Pfam-B_6726 (release 8.0). Rubella virus (RV), the sole member of the genus Rubivirus within the family Togaviridae, is a small enveloped, positive strand RNA virus. The nucleocapsid consists of 40S genomic RNA and a single species of capsid protein which is enveloped within a host-derived lipid bilayer containing two viral glycoproteins, E1 (58 kDa) and E2 (42-46 kDa). In virus infected cells, RV matures by budding either at the plasma membrane, or at the internal membranes depending on the cell type and enters adjacent uninfected cells by a membrane fusion process in the endosome, directed by E1-E2 heterodimers. The heterodimer formation is crucial for E1 transport out of the endoplasmic reticulum to the Golgi and plasma membrane. In RV E1, a cysteine at position 82 is crucial for the E1-E2 heterodimer formation and cell surface expression of the two proteins .. +PF05750 Rubella capsid protein
Pfam-B_6726 (release 8.0). Rubella virus is an enveloped positive-strand RNA virus of the family Togaviridae. Virions are composed of three structural proteins: a capsid and two membrane-spanning glycoproteins, E2 and E1. During virus assembly, the capsid interacts with genomic RNA to form nucleocapsids. It has been discovered that capsid phosphorylation serves to negatively regulate binding of viral genomic RNA. This may delay the initiation of nucleocapsid assembly until sufficient amounts of virus glycoproteins accumulate at the budding site and/or prevent non-specific binding to cellular RNA when levels of genomic RNA are low. It follows that at a late stage in replication, the capsid may undergo dephosphorylation before nucleocapsid assembly occurs .. +PF05751 FixH
Pfam-B_6803 (release 8.0). This family consists of several Rhizobium FixH like proteins. It has been suggested that suggested that the four proteins FixG, FixH, FixI, and FixS may participate in a membrane-bound complex coupling the FixI cation pump with a redox process catalysed by FixG .. +PF05752 Calicivirus_MSP;
Calicivirus minor structural protein. Pfam-B_6811 (release 8.0). This family consists of minor structural proteins largely from human calicivirus isolates. Human calicivirus causes gastroenteritis . The function of this family is unknown.. +PF05753 Translocon-associated protein beta (TRAPB)
Pfam-B_6857 (release 8.0). This family consists of several eukaryotic translocon-associated protein beta (TRAPB) or signal sequence receptor beta subunit (SSR-beta) proteins. The normal translocation of nascent polypeptides into the lumen of the endoplasmic reticulum (ER) is thought to be aided in part by a translocon-associated protein (TRAP) complex consisting of 4 protein subunits. The association of mature proteins with the ER and Golgi, or other intracellular locales, such as lysosomes, depends on the initial targeting of the nascent polypeptide to the ER membrane. A similar scenario must also exist for proteins destined for secretion .. +PF05754 Domain of unknown function (DUF834)
Pfam-B_9258 (release 8.0). This short presumed domain is found in a large number of hypothetical plant proteins. The domain is quite rich in conserved glycine residues.\. It occurs in some putative transposons but currently has no known function.. +PF05755 Rubber elongation factor protein (REF)
Pfam-B_6903 (release 8.0). This family consists of the highly related rubber elongation factor (REF), small rubber particle protein (SRPP) and stress-related protein (SRP) sequences. REF and SRPP are released from the rubber particle membrane into the cytosol during osmotic lysis of the sedimentable organelles (lutoids). The exact function of this family is unknown . . +PF05756 S-antigen protein
Pfam-B_7194 (release 8.0). S-antigens are heat stable proteins that are found in the blood of individuals infected with malaria .. +PF05757 Oxygen evolving enhancer protein 3 (PsbQ)
Pfam-B_6905 (release 8.0). This family consists of the plant specific oxygen evolving enhancer protein 3 (PsbQ). Photosystem II (PSII)1 is a pigment-protein complex, which consists of at least 25 different protein subunits, at present denoted PsbA-Z according to the genes that encode them. PsbQ plays an important role in the lumenal oxygen-evolving activity of PSII from higher plants and green algae . . +PF05758 Ycf1
Pfam-B_6040 (release 8.0). The chloroplast genomes of most higher plants contain two giant open reading frames designated ycf1 and ycf2. Although the function of Ycf1 is unknown, it is known to be an essential gene .. +PF05760 Immediate early response protein (IER)
Pfam-B_6450 (release 8.0). This family consists of several eukaryotic immediate early response (IER) 2 and 5 proteins. The role of IER5 is unclear although it play an important role in mediating the cellular response to mitogenic signals [1,2]. Again, little is known about the function of IER2 although it is thought to play a role in mediating the cellular responses to a variety of extracellular signals [3,4].. +PF05761 5_nucleotidase;
5' nucleotidase family. Pfam-B_2948 (release 8.0). This family of eukaryotic proteins includes 5' nucleotidase enzymes, such as purine 5'-nucleotidase EC:3.1.3.5.. +PF05762 vwa_CoxE;
VWA domain containing CoxE-like protein. Pfam-B_2956 (release 8.0). This family is annotated by SMART as containing a VWA (von Willebrand factor type A) domain. The exact function of this family is unknown. It is found as part of a CO oxidising (Cox) system operon is several bacteria .. +PF05763 Protein of unknown function (DUF835)
Pfam-B_3039 (release 8.0). The members of this archaebacterial protein family are around 250-300 amino acid residues in length. The function of these proteins is not known.. +PF05764 YL1 nuclear protein
Pfam-B_3088 (release 8.0). The proteins in this family are designated YL1 . These proteins have been shown to be DNA-binding and may be a transcription factor .. +PF05766 Bacteriophage Lambda NinG protein
Pfam-B_7000 (release 8.0). NinG or Rap is involved in recombination. Rap (recombination adept with plasmid) increases lambda-by-plasmid recombination catalysed by Escherichia coli's RecBCD pathway .. +PF05767 Poxvirus virion envelope protein A14
Pfam-B_7009 (release 8.0). This family consists of several Poxvirus virion envelope protein A14 like sequences. A14 is a component of the virion membrane and has been found to be an H1 phosphatase substrate in vivo and in vitro. A14 is hyperphosphorylated on serine residues in the absence of H1 expression .. +PF05768 Glutaredoxin-like domain (DUF836)
Pfam-B_7010 (release 8.0) & Pfam-B_2829 (release 14.0). These proteins are related to the Pfam:PF00462 family.. +PF05769 Protein of unknown function (DUF837)
Pfam-B_7035 (release 8.0). This family consists of several eukaryotic proteins of unknown function. One of the family members (Swiss:O02197) is a circulating cathodic antigen (CCA) found in Schistosoma mansoni (Blood fluke) .. +PF05770 Inositol 1, 3, 4-trisphosphate 5/6-kinase
Pfam-B_7042 (release 8.0). This family consists of several inositol 1, 3, 4-trisphosphate 5/6-kinase proteins. Inositol 1,3,4-trisphosphate is at a branch point in inositol phosphate metabolism. It is dephosphorylated by specific phosphatases to either inositol 3,4-bisphosphate or inositol 1,3-bisphosphate. Alternatively, it is phosphorylated to inositol 1,3,4,6-tetrakisphosphate or inositol 1,3,4,5-tetrakisphosphate by inositol trisphosphate 5/6-kinase .. +PF05771 Poxvirus A31 protein
Pfam-B_7044 (release 8.0). +PF05772 NinB protein
Pfam-B_4884 (release 8.0). The ninR region of phage lambda contains two recombination genes, orf (ninB) and rap (ninG), that have roles when the RecF and RecBCD recombination pathways of E. coli, respectively, operate on phage lambda.. +PF05773 RWD domain
Pfam-B_3991 (release 8.0). This domain was identified in WD40 repeat proteins and Ring finger domain proteins . The function of this domain is unknown. GCN2 is the alpha-subunit of the only translation initiation factor (eIF2 alpha) kinase that appears in all eukaryotes. Its function requires an interaction with GCN1 via the domain at its N-terminus, which is termed the RWD domain after three major RWD-containing proteins: RING finger-containing proteins, WD-repeat-containing proteins, and yeast DEAD (DEXD)-like helicases. The structure forms an alpha + beta sandwich fold consisting of two layers: a four-stranded antiparallel beta-sheet, and three side-by-side alpha-helices .. +PF05774 Herpesvirus helicase-primase complex component
Pfam-B_7045 (release 8.0). This family consists of several helicase-primase complex components from the Gammaherpesviruses.. +PF05775 Enterobacteria AfaD invasin protein
Pfam-B_7107 (release 8.0). This family consists of several AfaD and related proteins from Escherichia coli and Salmonella bacteria. The afa gene clusters encode an afimbrial adhesive sheath produced by Escherichia coli. The adhesive sheath is composed of two proteins, AfaD and AfaE, which are independently exposed at the bacterial cell surface. AfaE is required for bacterial adhesion to HeLa cells and AfaD for the uptake of adherent bacteria into these cells . . +PF05776 Papillomavirus E5A protein
Pfam-B_7244 (release 8.0). Human papillomaviruses (HPVs) are epitheliotropic viruses, and their life cycle is intimately linked to the stratification and differentiation state of the host epithelial tissues. The kinetics of E5a protein expression during the complete viral life cycle has been studied and the highest level was found to be coincidental with the onset of virion morphogenesis . . +PF05777 Drosophila accessory gland-specific peptide 26Ab (Acp26Ab)
Pfam-B_7275 (release 8.0). This family consists of accessory gland-specific 26Ab peptides or male accessory gland secretory protein 355B from different Drosophila species. Drosophila males, like males of most other insects, transfer a group of specific proteins (Acp26Ab and Acp26Aa in Drosophila) to the females during mating. These proteins are produced primarily in the accessory gland and are likely to influence the female's reproduction . . +PF05778 Apolipoprotein CIII (Apo-CIII)
Pfam-B_7283 (release 8.0). This family consists of several mammalian apolipoprotein CIII (Apo-CIII) sequences. Apolipoprotein C-III is a 79-residue glycoprotein. It is synthesised in the intestine and liver as part of the very low density lipoprotein (VLDL) and the high density lipoprotein (HDL) particles. Owing to its positive correlation with plasma triglyceride (Tg) levels, Apo-CIII is suggested to play a role in Tg metabolism and is therefore of interest regarding atherosclerosis. However, unlike other apolipoproteins such as Apo-AI, Apo E or CII for which many naturally occurring mutations are known, the structure-function relationships of apo C-III remains a subject of debate. One possibility is that apo C-III inhibits lipoprotein lipase (LPL) activity, as shown by in vitro experiments. Another suggestion, is that elevated levels of Apo-CIII displace other apolipoproteins at the lipoprotein surface, modifying their clearance from plasma .. +PF05781 MRVI1 protein
Pfam-B_7407 (release 8.0). This family consists of mammalian MRVI1 proteins which are related to the lymphoid-restricted membrane protein (JAW1) and the IP3 receptor associated cGMP kinase substrates A and B (IRAGA and IRAGB). The function of MRVI1 is unknown although mutations in the Mrvi1 gene induces myeloid leukaemia by altering the expression of a gene important for myeloid cell growth and/or differentiation so it has been speculated that Mrvi1 is a tumour suppressor gene . IRAG is very similar in sequence to MRVI1 and is an essential NO/cGKI-dependent regulator of IP3-induced calcium release. Activation of cGKI decreases IP3-stimulated elevations in intracellular calcium, induces smooth muscle relaxation and contributes to the antiproliferative and pro-apoptotic effects of NO/cGMP . Jaw1 is a member of a class of proteins with COOH-terminal hydrophobic membrane anchors and is structurally similar to proteins involved in vesicle targeting and fusion. This suggests that the function and/or the structure of the ER in lymphocytes may be modified by lymphoid-restricted resident ER proteins .. +PF05782 Extracellular matrix protein 1 (ECM1)
Pfam-B_7421 (release 8.0). This family consists of several eukaryotic extracellular matrix protein 1 (ECM1) sequences. ECM1 has been shown to regulate endochondral bone formation, stimulate the proliferation of endothelial cells and induce angiogenesis [1,2]. Mutations in the ECM1 gene can cause lipoid proteinosis, a disorder which causes generalised thickening of skin, mucosae and certain viscera. Classical features include beaded eyelid papules and laryngeal infiltration leading to hoarseness .. +PF05783 Dynein light intermediate chain (DLIC)
Pfam-B_7447 (release 8.0). This family consists of several eukaryotic dynein light intermediate chain proteins. The light intermediate chains (LICs) of cytoplasmic dynein consist of multiple isoforms, which undergo post-translational modification to produce a large number of species. DLIC1 is known to be involved in assembly, organisation, and function of centrosomes and mitotic spindles when bound to pericentrin [1,2]. DLIC2 is a subunit of cytoplasmic dynein 2 that may play a role in maintaining Golgi organisation by binding cytoplasmic dynein 2 to its Golgi-associated cargo . . +PF05784 Betaherpesvirus UL82/83 protein N terminus
Pfam-B_7466 (release 8.0). This family represents the N terminal region of the Betaherpesvirus UL82 and UL83 proteins. As viruses are reliant upon their host cell to serve as proper environments for their replication, many have evolved mechanisms to alter intracellular conditions to suit their own needs. Human cytomegalovirus induces quiescent cells to enter the cell cycle and then arrests them in late G(1), before they enter the S phase, a cell cycle compartment that is presumably favourable for viral replication. The protein product of the human cytomegalovirus UL82 gene, pp71, can accelerate the movement of cells through the G(1) phase of the cell cycle. This activity would help infected cells reach the late G(1) arrest point sooner and thus may stimulate the infectious cycle. pp71 also induces DNA synthesis in quiescent cells, but a pp71 mutant protein that is unable to induce quiescent cells to enter the cell cycle still retains the ability to accelerate the G(1) phase. Thus, the mechanism through which pp71 accelerates G(1) cell cycle progression appears to be distinct from the one that it employs to induce quiescent cells to exit G(0) and subsequently enter the S phase . . +PF05785 Rho-activating domain of cytotoxic necrotizing factor
Pfam-B_7489 (release 8.0). This family consists of several bacterial cytotoxic necrotizing factor proteins as well as related dermonecrotic toxin (DNT) from Bordetella species. Cytotoxic necrotizing factor 1 (CNF1) causes necrosis of rabbit skin and re-organisation of the actin cytoskeleton in cultured cells . Bordetella dermonecrotic toxin (DNT) stimulates the assembly of actin stress fibres and focal adhesions by deamidating or polyaminating Gln63 of the small GTPase Rho. DNT is an A-B toxin which is composed of an N-terminal receptor-binding (B) domain and a C-terminal enzymatically active (A) domain .. +PF05786 Barren;
Condensin complex subunit 2. Pfam-B_7477 (release 8.0). This family consists of several Barren protein homologues from several eukaryotic organisms. In Drosophila Barren (barr) is required for sister-chromatid segregation in mitosis. barr encodes a novel protein that is present in proliferating cells and has homologues in yeast and human. Mitotic defects in barr embryos become apparent during cycle 16, resulting in a loss of PNS and CNS neurons. Centromeres move apart at the metaphase-anaphase transition and Cyclin B is degraded, but sister chromatids remain connected, resulting in chromatin bridging. Barren protein localises to chromatin throughout mitosis. Colocalisation and biochemical experiments indicate that Barren associates with Topoisomerase II throughout mitosis and alters the activity of Topoisomerase II. It has been suggested that this association is required for proper chromosomal segregation by facilitating the decatenation of chromatids at anaphase . This family forms one of the three non-structural maintenance of chromosomes (SMC) subunits of the mitotic condensation complex along with Cnd1 and Cnd3 .. +PF05787 Bacterial protein of unknown function (DUF839)
Pfam-B_7480 (release 8.0). This family consists of several bacterial proteins of unknown function that contain a predicted beta-propeller repeats.. +PF05788 Orbivirus_VP1;
Orbivirus RNA-dependent RNA polymerase (VP1). Pfam-B_7493 (release 8.0). This family consists of the RNA-dependent RNA polymerase protein VP1 from the Orbiviruses. VP1 may have both enzymatic and structural roles in the virus life cycle .. +PF05789 Baculovirus VP1054 protein
Pfam-B_7511 (release 8.0). This family consists of several VP1054 proteins from the Baculoviruses. VP1054 is a virus structural protein required for nucleocapsid assembly . . +PF05790 CD2;
Immunoglobulin C2-set domain. +PF05791 Bacillus haemolytic enterotoxin (HBL)
Pfam-B_7539 (release 8.0). This family consists of several Bacillus haemolytic enterotoxins (HblC, HblD, HblA, NheA, and NheB) which can cause food poisoning in humans .. +PF05792 Candida agglutinin-like (ALS)
Pfam-B_7578 (release 8.0). This family consists of several agglutinin-like proteins from different Candida species. ALS genes of Candida albicans encode a family of cell-surface glycoproteins with a three-domain structure. Each Als protein has a relatively conserved N-terminal domain, a central domain consisting of a tandemly repeated motif of variable number, and a serine-threonine-rich C-terminal domain that is relatively variable across the family. The ALS family exhibits several types of variability that indicate the importance of considering strain and allelic differences when studying ALS genes and their encoded proteins . Fungal adhesins, which include sexual agglutinins, virulence factors, and flocculins, are surface proteins that mediate cell-cell and cell-environment interactions. It is possible that both the serine/threonine-rich domain and the cysteine residues in the C-terminal and DIPSY Pfam:PF11763 participate in anchoring the terminal domains inside the wall, so that only the inner part of Map4p, including the repeat region, is sticking out as a fold-back loop then able to act in adhesing .. +PF05793 TFIIF-alpha;
Transcription initiation factor IIF, alpha subunit (TFIIF-alpha). Pfam-B_7586 (release 8.0). Transcription initiation factor IIF, alpha subunit (TFIIF-alpha) or RNA polymerase II-associating protein 74 (RAP74) is the large subunit of transcription factor IIF (TFIIF), which is essential for accurate initiation and stimulates elongation by RNA polymerase II .. +PF05794 T-complex protein 11
Pfam-B_7604 (release 8.0). This family consists of several eukaryotic T-complex protein 11 (Tcp11) related sequences. Tcp11 is only expressed in fertile adult mammalian testes and is thought to be important in sperm function and fertility [1,2,3]. The family also contains the yeast Sok1 protein which is known to suppress cyclic AMP-dependent protein kinase mutants .. +PF05795 Plasmodium vivax Vir protein
Pfam-B_7631 (release 8.0). This family consists of several Vir proteins specific to Plasmodium vivax. The vir genes are present at about 600-1,000 copies per haploid genome and encode proteins that are immunovariant in natural infections, indicating that they may have a functional role in establishing chronic infection through antigenic variation . . +PF05796 Chordopoxvirus protein G2
Pfam-B_7672 (release 8.0). This family consists of several Chordopoxvirus isatin-beta-thiosemicarbazone dependent protein (protein G2) sequences. Inactivation of the gene coding for this protein renders the virus dependent upon isatin-beta-thiosemicarbazone (IBT) for growth .. +PF05797 Yeast_TAF;
Yeast trans-acting factor (REP1/REP2). Pfam-B_7680 (release 8.0). This family consists of the yeast trans-acting factor B and C (REP1 and 2) proteins. The yeast plasmid stability system consists of two plasmid-coded proteins, Rep1 and Rep2, and a cis-acting locus, STB. The Rep proteins show both self- and cross-interactions in vivo and in vitro, and bind to the STB DNA with assistance from host factor(s). Within the yeast nucleus, the Rep1 and Rep2 proteins tightly associate with STB-containing plasmids into well organised plasmid foci that form a cohesive unit in partitioning. It is generally accepted that the protein-protein and DNA-protein interactions engendered by the Rep-STB system are central to plasmid partitioning. Point mutations in Rep1 that knock out interaction with Rep2 or with STB simultaneously block the ability of these Rep1 variants to support plasmid stability .. +PF05798 Bacteriophage FRD3 protein
Pfam-B_7781 (release 8.0). This family consists of bacteriophage FRD3 proteins.. +PF05800 Gas vesicle synthesis protein GvpO
Pfam-B_8221 (release 8.0). This family consists of archaeal GvpO proteins which are required for gas vesicle synthesis . The family also contains two related sequences from Streptomyces coelicolor. . +PF05801 Lagovirus protein of unknown function (DUF840)
Pfam-B_8265 (release 8.0). This family consists of several Lagovirus sequences of unknown function, largely from rabbit hemorrhagic disease virus.. +PF05802 Enterobacterial EspB protein
Pfam-B_8424 (release 8.0). EspB is a type-III-secreted pore-forming protein of enteropathogenic Escherichia coli (EPEC) which is essential for EPEC pathogenesis . EspB is also found in Citrobacter rodentium .. +PF05803 Chordopoxvirus L2 protein
Pfam-B_8620 (release 8.0). This family consists of several Chordopoxvirus L2 proteins.. +PF05804 Kinesin-associated protein (KAP)
Pfam-B_8674 (release 8.0). This family consists of several eukaryotic kinesin-associated (KAP) proteins. Kinesins are intracellular multimeric transport motor proteins that move cellular cargo on microtubule tracks. It has been shown that the sea urchin KRP85/95 holoenzyme associates with a KAP115 non-motor protein, forming a heterotrimeric complex in vitro, called the Kinesin-II . . +PF05805 L6 membrane protein
Pfam-B_7771 (release 8.0). This family consists of several eukaryotic L6 membrane proteins. L6, IL-TMP, and TM4SF5 are cell surface proteins predicted to have four transmembrane domains. Previous sequence analysis led to their assignment as members of the tetraspanin superfamily it has now been found that that they are not significantly related to genuine tetraspanins, but instead constitute their own L6 family . Several members of this family have been implicated in human cancer [2,3].. +PF05806 Noggin
Pfam-B_7925 (release 8.0). This family consists of the eukaryotic Noggin proteins. Noggin is a glycoprotein that binds bone morphogenetic proteins (BMPs) selectively and, when added to osteoblasts, it opposes the effects of BMPs. It has been found that noggin arrests the differentiation of stromal cells, preventing cellular maturation .. +PF05808 Podoplanin
Pfam-B_8548 (release 8.0). This family consists of several mammalian podoplanin like proteins which are thought to control specifically the unique shape of podocytes .. +PF05810 NinF protein
Pfam-B_8528 (release 8.0). This family consists of several bacteriophage NinF proteins as well as related sequences from E. coli.. +PF05811 Eukaryotic protein of unknown function (DUF842)
Pfam-B_7096 (release 8.0). This family consists of a number of conserved eukaryotic proteins of unknown function. The sequences carry three sets of CxxxC motifs, which might suggest a type of zinc-finger formation.. +PF05812 Herpesvirus BLRF2 protein
Pfam-B_7251 (release 8.0). This family consists of several Herpesvirus BLRF2 proteins.. +PF05813 Orthopoxvirus F7 protein
Pfam-B_7318 (release 8.0). +PF05814 Baculovirus protein of unknown function (DUF843)
Pfam-B_7353 (release 8.0). This family consists of several Baculovirus proteins of around 85 residues long with no known function.. +PF05815 Baculovirus protein of unknown function (DUF844)
Pfam-B_7453 (release 8.0). This family consists of several Baculovirus sequences of between 350 and 380 residues long. The family has no known function.. +PF05816 Toxic anion resistance protein (TelA)
Pfam-B_7534 (release 8.0). This family consists of several prokaryotic TelA like proteins. TelA and KlA are associated with tellurite resistance and plasmid fertility inhibition .. +PF05817 Oligosaccharyltransferase subunit Ribophorin II
Pfam-B_7633 (release 8.0). This family contains eukaryotic Ribophorin II (RPN2) proteins. The mammalian oligosaccharyltransferase (OST) is a protein complex that effects the cotranslational N-glycosylation of newly synthesised polypeptides, and is composed of the following proteins: ribophorins I and II (RI and RII), OST48, and Dadl, N33/IAP, OST4, STT3. The family also includes the SWP1 protein from yeast. In yeast the oligosaccharyltransferase complex is composed 7 or 8 subunits, SWP1, being one of them .. +PF05818 Enterobacterial TraT complement resistance protein
Pfam-B_7686 (release 8.0). The traT gene is one of the F factor transfer genes and encodes an outer membrane protein which is involved in interactions between an Escherichia coli and its surroundings [1,2].. +PF05819 NolX protein
Pfam-B_7801 (release 8.0). This family consists of Rhizobium NolX and Xanthomonas HrpF proteins. The interaction between the plant pathogen Xanthomonas campestris pv. vesicatoria and its host plants is controlled by hrp genes (hypersensitive reaction and pathogenicity), which encode a type III protein secretion system. Among type III-secreted proteins are avirulence proteins, effectors involved in the induction of plant defence reactions. HrpF is dispensable for protein secretion but required for AvrBs3 recognition in planta, is thought to function as a translocator of effector proteins into the host cell . NolX, a soybean cultivar specificity protein, is secreted by a type III secretion system (TTSS) and shows homology to HrpF of the plant pathogen Xanthomonas campestris pv. vesicatoria. It is not known whether NolX functions at the bacterium-plant interface or acts inside the host cell. NolX is expressed in planta only during the early stages of nodule development .. +PF05820 Baculovirus protein of unknown function (DUF845)
Pfam-B_7739 (release 8.0). This family consists of several highly related Baculovirus proteins of unknown function.. +PF05821 NDUFB8;
NADH-ubiquinone oxidoreductase ASHI subunit (CI-ASHI or NDUFB8). Pfam-B_7830 (release 8.0). This family consists of several eukaryotic NADH-ubiquinone oxidoreductase ASHI subunit (CI-ASHI) proteins. NADH:ubiquinone oxidoreductase (complex I) is an extremely complicated multiprotein complex located in the inner mitochondrial membrane. Its main function is the transport of electrons from NADH to ubiquinone, which is accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space. Human complex I appears to consist of 41 subunits .. +PF05822 Pyrimidine 5'-nucleotidase (UMPH-1)
Pfam-B_7840 (release 8.0). This family consists of several eukaryotic pyrimidine 5'-nucleotidase proteins. P5'N-1, also known as uridine monophosphate hydrolase-1 (UMPH-1), is a member of a large functional group of enzymes, characterised by the ability to dephosphorylate nucleic acids. P5'N-1 catalyses the dephosphorylation of pyrimidine nucleoside monophosphates to the corresponding nucleosides. Deficiencies in this proteins function can lead to several different disorders in humans .. +PF05823 Nematode fatty acid retinoid binding protein (Gp-FAR-1)
Pfam-B_7852 (release 8.0). Parasitic nematodes produce at least two structurally novel classes of small helix-rich retinol- and fatty-acid-binding proteins that have no counterparts in their plant or animal hosts and thus represent potential targets for new nematicides. Gp-FAR-1 is a member of the nematode-specific fatty-acid- and retinol-binding (FAR) family of proteins but localises to the surface of the organism, placing it in a strategic position for interaction with the host. Gp-FAR-1 functions as a broad-spectrum retinol- and fatty-acid-binding protein, and it is thought that it is involved in the evasion of primary host plant defence systems .. +PF05824 Pro-melanin-concentrating hormone (Pro-MCH)
Pfam-B_7863 (release 8.0). This family consists of several mammalian pro-melanin-concentrating hormone (Pro-MCH) 1 and 2 proteins. Melanin-concentrating hormone (MCH) is a 19 amino acid cyclic peptide that was first isolated from the pituitary of teleost fish. It is produced from pro-MCH that encodes, in addition to MCH, NEI, and a putative peptide, NGE. In lower vertebrates, MCH acts to regulate skin colour by antagonising the melanin-dispersing actions of small alpha, Greek-melanocyte stimulating hormone (small alpha, Greek-MSH). In mammals, MCH serves as a neuropeptide and is found in many regions of the brain and especially the hypothalamus. It affects many types of behaviours such as appetite, sexual receptivity, aggression, and anxiety. MCH also stimulates the release of luteinising hormone .. +PF05825 Beta-microseminoprotein (PSP-94)
Pfam-B_7865 (release 8.0). This family consists of the mammalian specific protein beta-microseminoprotein. Prostatic secretory protein of 94 amino acids (PSP94), also called beta-microseminoprotein, is a small, nonglycosylated protein, rich in cysteine residues. It was first isolated as a major protein from human seminal plasma . The exact function of this protein is unknown.. +PF05826 Phospholip_A2;
Pfam-B_7918 (release 8.0). This family consists of several phospholipase A2 like proteins mostly from insects .. +PF05827 Vacuolar ATP synthase subunit S1 (ATP6S1)
Pfam-B_8145 (release 8.0). This family consists of eukaryotic vacuolar ATP synthase subunit S1 proteins . It also contains BIG1 ER integral membrane proteins which are involved in cell wall organisation and biogenesis .. +PF05829 Adenovirus_PX;
Adenovirus late L2 mu core protein (Protein X). Pfam-B_8179 (release 8.0). This family consists of several Adenovirus late L2 mu core protein or Protein X sequences.. +PF05830 Nodulation protein Z (NodZ)
Pfam-B_8202 (release 8.0). The nodulation genes of Rhizobia are regulated by the nodD gene product in response to host-produced flavonoids and appear to encode enzymes involved in the production of a lipo-chitose signal molecule required for infection and nodule formation. NodZ is required for the addition of a 2-O-methylfucose residue to the terminal reducing N-acetylglucosamine of the nodulation signal. This substitution is essential for the biological activity of this molecule. Mutations in nodZ result in defective nodulation. nodZ represents a unique nodulation gene that is not under the control of NodD and yet is essential for the synthesis of an active nodulation signal . . +PF05831 GAGE protein
Pfam-B_8207 (release 8.0). This family consists of several GAGE and XAGE proteins which are found exclusively in humans. The function of this family is unknown although they have been implicated in human cancers .. +PF05832 Eukaryotic protein of unknown function (DUF846)
Pfam-B_8404 (release 8.0). This family consists of several of unknown function from a variety of eukaryotic organisms.. +PF05833 Fibronectin-binding protein A N-terminus (FbpA)
Pfam-B_8577 (release 8.0). This family consists of the N-terminal region of the prokaryotic fibronectin-binding protein. Fibronectin binding is considered to be an important virulence factor in streptococcal infections. Fibronectin is a dimeric glycoprotein that is present in a soluble form in plasma and extracellular fluids; it is also present in a fibrillar form on cell surfaces. Both the soluble and cellular forms of fibronectin may be incorporated into the extracellular tissue matrix. While fibronectin has critical roles in eukaryotic cellular processes, such as adhesion, migration and differentiation, it is also a substrate for the attachment of bacteria. The binding of pathogenic Streptococcus pyogenes and Staphylococcus aureus to epithelial cells via fibronectin facilitates their internalisation and systemic spread within the host . . +PF05834 Lycopene cyclase protein
Pfam-B_8336 (release 8.0). This family consists of lycopene beta and epsilon cyclase proteins. Carotenoids with cyclic end groups are essential components of the photosynthetic membranes in all plants, algae, and cyanobacteria. These lipid-soluble compounds protect against photo-oxidation, harvest light for photosynthesis, and dissipate excess light energy absorbed by the antenna pigments. The cyclisation of lycopene (psi, psi-carotene) is a key branch point in the pathway of carotenoid biosynthesis. Two types of cyclic end groups are found in higher plant carotenoids: the beta and epsilon rings. Carotenoids with two beta rings are ubiquitous, and those with one beta and one epsilon ring are common; however, carotenoids with two epsilon rings are rare [1,2]. . +PF05835 Synaphin protein
Pfam-B_8588 (release 8.0). This family consists of several eukaryotic synaphin 1 and 2 proteins. Synaphin/complexin is a cytosolic protein that preferentially binds to syntaxin within the SNARE complex. Synaphin promotes SNAREs to form precomplexes that oligomerise into higher order structures. A peptide from the central, syntaxin binding domain of synaphin competitively inhibits these two proteins from interacting and prevents SNARE complexes from oligomerising. It is thought that oligomerisation of SNARE complexes into a higher order structure creates a SNARE scaffold for efficient, regulated fusion of synaptic vesicles . Synaphin promotes neuronal exocytosis by promoting interaction between the complementary syntaxin and synaptobrevin transmembrane regions that reside in opposing membranes prior to fusion .. +PF05836 Chorion protein S16
Pfam-B_8659 (release 8.0). This family consists of several examples of the fruit fly specific chorion protein S16. The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary . . +PF05837 Centromere protein H (CENP-H)
Pfam-B_8705 (release 8.0). This family consists of several eukaryotic centromere protein H (CENP-H) sequences. Macromolecular centromere-kinetochore complex plays a critical role in sister chromatid separation, but its complete protein composition as well as its precise dynamic function during mitosis has not yet been clearly determined. CENP-H contains a coiled-coil structure and a nuclear localisation signal. CENP-H is specifically and constitutively localised in kinetochores throughout the cell cycle. CENP-H may play a role in kinetochore organisation and function throughout the cell cycle . This the C-terminus of the region, which is conserved from fungi to humans.. +PF05838 DUF847;
Glycosyl hydrolase 108. Pfam-B_8737 (release 8.0). This family acts as a lysozyme (N-acetylmuramidase), EC:3.2.1.17. It contains a conserved EGGY motif near the N-terminus, the glutamic acid within this motif is essential for catalytic activity . In bacteria, it may activate the secretion of large proteins via the breaking and rearrangement of the peptidoglycan layer during secretion [2,3]. It is frequently found at the N-terminus of proteins containing a C-terminal Pfam:PF09374 domain.. +PF05839 Apc13p protein
The anaphase-promoting complex (APC) is a conserved multi-subunit ubiquitin ligase required for the degradation of key cell cycle regulators Members of this family are components of the anaphase-promoting complex homologous to Apc13p .. +PF05840 Bacteriophage replication gene A protein (GPA)
Pfam-B_8738 (release 8.0). This family consists of a group of bacteriophage replication gene A protein (GPA) like sequences from both viruses and bacteria. The members of this family are likely to be endonucleases [1,2,3].. +PF05841 Apc15p protein
The anaphase-promoting complex (APC) is a conserved multi-subunit ubiquitin ligase required for the degradation of key cell cycle regulators Members of this family are components of the anaphase-promoting complex homologous to Apc15p Swiss:O94688 .. +PF05842 Euplotes octocarinatus mating pheromone protein
Pfam-B_8825 (release 8.0). This family consists of several mating pheromone proteins from Euplotes octocarinatus. Cells of the ten mating types of the ciliate Euplotes octocarinatus communicate by pheromones before they enter conjugation. The pheromones induce homotypic pairing when applied to mating types that do not secrete the same pheromone(s). Heterotypic pairs (i.e., those between cells of different mating types) are formed only when both mating types in a mixture secrete a pheromone that the other does not. The genetics of mating types is based on four codominant mating type alleles, each allele determining production of a different pheromone. The pheromones not only induce pair formation but also attract cells .. +PF05843 Suppressor of forked protein (Suf)
Pfam-B_8911 (release 8.0). This family consists of several eukaryotic suppressor of forked (Suf) like proteins. The Drosophila melanogaster Suppressor of forked [Su(f)] protein shares homology with the yeast RNA14 protein and the 77-kDa subunit of human cleavage stimulation factor, which are proteins involved in mRNA 3' end formation. This suggests a role for Su(f) in mRNA 3' end formation in Drosophila. The su(f) gene produces three transcripts; two of them are polyadenylated at the end of the transcription unit, and one is a truncated transcript, polyadenylated in intron 4. It is thought that su(f) plays a role in the regulation of poly(A) site utilisation and an important role of the GU-rich sequence for this regulation to occur .. +PF05844 YopD protein
Pfam-B_8937 (release 8.0). This family consists of several bacterial YopD like proteins. Virulent Yersinia species harbour a common plasmid that encodes essential virulence determinants (Yersinia outer proteins [Yops]), which are regulated by the extracellular stimuli Ca2+ and temperature. YopD is thought to be a possible transmembrane protein and contains an amphipathic alpha-helix in its carboxy terminus . . +PF05845 Bacterial phosphonate metabolism protein (PhnH)
Pfam-B_9057 (release 8.0). This family consists of several bacterial PhnH sequences which are known to be involved in phosphonate metabolism [1,2].. +PF05846 Chordopoxvirus A15 protein
Pfam-B_9149 (release 8.0). This family consists of several Chordopoxvirus A15 like sequences.. +PF05847 Nucleopolyhedrovirus late expression factor 3 (LEF-3)
Moxon SJ, Mistry J, Carstens EB. Pfam-B_9292 (release 8.0). This family consists of LEF-3 Nucleopolyhedrovirus late expression factor 3 (LEF-3) sequences which are known to be ssDNA-binding proteins . Alkaline nuclease (AN) and LEF-3 may participate in homologous recombination of the baculovirus genome in a manner similar to that of exonuclease (Redalpha) and DNA-binding protein (Redbeta) of the Red-mediated homologous recombination system of bacteriophage lambda .\. LEF-3 is essential for transporting the putative baculovirus helicase protein P143 into the nucleus where they function together during viral DNA replication . LEF-3 and other proteins have been shown to bind to closely linked sites on viral chromatin in vivo, suggesting that they may form part of the baculovirus replisome .. +PF05848 Firmicute transcriptional repressor of class III stress genes (CtsR)
Pfam-B_9312 (release 8.0). This family consists of several Firmicute transcriptional repressor of class III stress genes (CtsR) proteins. CtsR of L. monocytogenes negatively regulates the clpC, clpP and clpE genes belonging to the CtsR regulon .. +PF05849 Fibroin light chain (L-fibroin)
Pfam-B_9321 (release 8.0). This family consists of several moth fibroin light chain (L-fibroin) proteins. Fibroin of the silkworm, Bombyx mori, is secreted into the lumen of posterior silk gland (PSG) from the surrounding PSG cells as a molecular complex consisting of a heavy (H)-chain of approximately 350 kDa, a light (L)-chain of 25 kDa and a P25 of about 27 kDa. The H- and L-chains are disulfide-linked but P25 is associated with the H-L complex by non-covalent force .. +PF05851 Lentivirus virion infectivity factor (VIF)
Pfam-B_9439 (release 8.0). This family consists of several feline specific Lentivirus virion infectivity factor (VIF) proteins. VIF is essential for productive FIV infection of host target cells in vitro .. +PF05852 Gammaherpesvirus protein of unknown function (DUF848)
Pfam-B_9475 (release 8.0). This family consists of several uncharacterised proteins from the Gammaherpesvirinae.. +PF05853 Prokaryotic protein of unknown function (DUF849)
Pfam-B_9059 (release 8.0). This family consists of several hypothetical prokaryotic proteins with no known function.. +PF05854 Non-histone chromosomal protein MC1
Pfam-B_9146 (release 8.0). This family consists of archaeal chromosomal protein MC1 sequences which protect DNA against thermal denaturation . . +PF05856 ARP2/3 complex 20 kDa subunit (ARPC4)
Pfam-B_9272 (release 8.0). This family consists of several eukaryotic ARP2/3 complex 20 kDa subunit (P20-ARC) proteins. The Arp2/3 protein complex has been implicated in the control of actin polymerisation in cells. The human complex consists of seven subunits which include the actin related proteins Arp2 and Arp3 it has been suggested that the complex promotes actin assembly in lamellipodia and may participate in lamellipodial protrusion .. +PF05857 TraX protein
Pfam-B_9375 (release 8.0). This family consists of several bacterial TraX proteins. TraX is responsible for the amino-terminal acetylation of F-pilin subunits [1,2].. +PF05858 Bovine immunodeficiency virus surface protein (SU)
Pfam-B_9413 (release 8.0). The bovine lentivirus also known as the bovine immunodeficiency-like virus (BIV) has conserved and hypervariable regions in the surface envelope gene . This family corresponds to the SU surface protein.. +PF05859 Mis12 protein
Kinetochores are the chromosomal sites for spindle interaction and play a vital role in chromosome segregation. Fission yeast kinetochore protein Mis12, is required for correct spindle morphogenesis, determining metaphase spindle length . Thirty-five to sixty percent extension of metaphase spindle length takes place in Mis12 mutants . It has been shown that Mis12 genetically interacts with Mal2, another inner centromere core complex protein in S. pombe .. +PF05860 haemagglutination activity domain
This domain is suggested to be a carbohydrate- dependent haemagglutination activity site ( ). It is found in a range of haemagglutinins and haemolysins.. +PF05861 Bacterial phosphonate metabolism protein (PhnI)
Pfam-B_9004 (release 8.0). This family consists of several Proteobacterial phosphonate metabolism protein (PhnI) sequences. Bacteria that use phosphonates as a phosphorus source must be able to break the stable carbon-phosphorus bond. In Escherichia coli phosphonates are broken down by a C-P lyase that has a broad substrate specificity. The genes for phosphonate uptake and degradation in E. coli are organised in an operon of 14 genes, named phnC to phnP. Three gene products (PhnC, PhnD and PhnE) comprise a binding protein-dependent phosphonate transporter, which also transports phosphate, phosphite, and certain phosphate esters such as phosphoserine; two gene products (PhnF and PhnO) may have a role in gene regulation; and nine gene products (PhnG, PhnH, PhnI, PhnJ, PhnK, PhnL, PhnM, PhnN, and PhnP) probably comprise a membrane-associated C-P lyase enzyme complex .. +PF05862 Helicobacter pylori IceA2 protein
Pfam-B_9436 (release 8.0). This family consists of several Helicobacter pylori specific IceA2 proteins. The function of this family is unknown.. +PF05864 Chordopoxvirus DNA-directed RNA polymerase 7 kDa polypeptide (RPO7)
Pfam-B_9596 (release 8.0). This family consists of several Chordopoxvirus DNA-directed RNA polymerase 7 kDa polypeptide sequences. DNA-dependent RNA polymerase catalyses the transcription of DNA into RNA.. +PF05865 Cypovirus polyhedrin protein
Pfam-B_9652 (release 8.0). This family consists of several Cypovirus polyhedrin protein. Polyhedrin is known to form a crystalline matrix (polyhedra) in infected insect cells .. +PF05866 Endodeoxyribonuclease RusA
Pfam-B_8996 (release 8.0). This family consists of several bacterial and phage Holliday junction resolvase (RusA) like proteins. The RusA protein of Escherichia coli is an endonuclease that can resolve Holliday intermediates and correct the defects in genetic recombination and DNA repair associated with inactivation of RuvAB or RuvC [1,2].. +PF05867 Protein of unknown function (DUF851)
Pfam-B_9669 (release 8.0), Jackhmmer:Q9N4S5. +PF05868 Rotavirus major outer capsid protein VP7
Pfam-B_9690 (release 8.0). This family consists of several Rotavirus major outer capsid protein VP7 sequences. The rotavirus capsid is composed of three concentric protein layers. Proteins VP4 and VP7 comprise the outer layer. VP4 forms spikes and is the viral attachment protein. VP7 is a glycoprotein and the major constituent of the outer protein layer .. +PF05869 DNA N-6-adenine-methyltransferase (Dam)
Pfam-B_9691 (release 8.0). This family consists of several bacterial and phage DNA N-6-adenine-methyltransferase (Dam) like sequences .. +PF05870 Phenolic acid decarboxylase (PAD)
Pfam-B_9737 (release 8.0). This family consists of several bacterial phenolic acid decarboxylase proteins. Phenolic acids, also called substituted cinnamic acids, are important lignin-related aromatic acids and natural constituents of plant cell walls. These acids (particularly ferulic, p-coumaric, and caffeic acids) bind the complex lignin polymer to the hemicellulose and cellulose in plants. The Phenolic acid decarboxylase (PAD) gene (pad) is transcriptionally regulated by p-coumaric, ferulic, or caffeic acid; these three acids are the three substrates of PAD .. +PF05871 DUF852;
ESCRT-II complex subunit. Moxon SJ, Wood V, Mistry J. Pfam-B_9765 (release 8.0). This family of conserved eukaryotic proteins are subunits of the endosome associated complex ESCRT-II which recruits transport machinery for protein sorting at the multivesicular body (MVB) . This protein complex transiently associates with the endosomal membrane and thereby initiates the formation of ESCRT-III, a membrane-associated protein complex that functions immediately downstream of ESCRT-II during sorting of MVB cargo. ESCRT-II in turn functions downstream of ESCRT-I, a protein complex that binds to ubiquitinated endosomal cargo .. +PF05872 Bacterial protein of unknown function (DUF853)
Pfam-B_9798 (release 8.0). This family consists of several bacterial proteins of unknown function. Swiss:Q8YFZ2 is thought to be an ATPase.. +PF05873 ATP synthase D chain, mitochondrial (ATP5H)
Pfam-B_9814 (release 8.0). This family consists of several ATP synthase D chain, mitochondrial (ATP5H) proteins. Subunit d has no extensive hydrophobic sequences, and is not apparently related to any subunit described in the simpler ATP synthases in bacteria and chloroplasts [1,2]. . +PF05874 Pheromone biosynthesis activating neuropeptide (PBAN)
Pfam-B_9874 (release 8.0). This family consists of several moth pheromone biosynthesis activating neuropeptide (PBAN) sequences. Female moths produce and release species specific sex pheromones to attract males for mating. Pheromone biosynthesis is hormonally regulated by the Pheromone Biosynthesis Activating Neuropeptide (PBAN) which is biosynthesised in the subesophageal ganglion (SOG) . . +PF05875 aPHC;
Pfam-B_9877 (release 8.0). This family consists of several ceramidases. Ceramidases are enzymes involved in regulating cellular levels of ceramides, sphingoid bases, and their phosphates, EC:3.5.1.23.. +PF05876 Phage terminase large subunit (GpA)
Pfam-B_9892 (release 8.0). This family consists of several phage terminase large subunit proteins as well as related sequences from several bacterial species. The DNA packaging enzyme of bacteriophage lambda, terminase, is a heteromultimer composed of a small subunit, gpNu1, and a large subunit, gpA, products of the Nu1 and A genes, respectively. Terminase is involved in the site-specific binding and cutting of the DNA in the initial stages of packaging. It is now known that gpA is actively involved in late stages of packaging, including DNA translocation, and that this enzyme contains separate functional domains for its early and late packaging activities . . +PF05878 Phytoreovirus nonstructural protein Pns9/Pns10
Pfam-B_9947 (release 8.0). This family consists of the Phytoreovirus nonstructural proteins Pns9 and Pns10. The function of this family is unknown.. +PF05879 Root hair defective 3 GTP-binding protein (RHD3)
Pfam-B_9973 (release 8.0). This family consists of several eukaryotic root hair defective 3 like GTP-binding proteins. It has been speculated that the RHD3 protein is a member of a novel class of GTP-binding proteins that is widespread in eukaryotes and required for regulated cell enlargement . The family also contains the homologous yeast synthetic enhancement of YOP1 (SEY1) protein which is involved in membrane trafficking .. +PF05880 Fijivirus 64 kDa capsid protein
Pfam-B_9976 (release 8.0). This family consists of several Fijivirus 64 kDa capsid proteins.. +PF05881 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP or CNPase)
Moxon SJ, Mazumder R. Pfam-B_9997 (release 8.0). This family consists of the eukaryotic protein 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP). 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP) is one of the earliest myelin-related proteins expressed in differentiating oligodendrocytes and Schwann cells. CNP is abundant in the central nervous system and in oligodendrocytes. This protein is also found in mammalian photoreceptor cells, testis and lymphocytes. Although the biological function of CNP is unknown, it is thought to play a significant role in the formation of the myelin sheath, where it comprises 4% of total protein. CNP selectively cleaves 2',3'-cyclic nucleotides to produce 2'-nucleotides in vitro. Although physiologically relevant substrates with 2',3'-cyclic termini are still unknown, numerous cyclic phosphate containing RNAs occur transiently within eukaryotic cells. Other known protein families capable of hydrolysing 2',3'-cyclic nucleotides include tRNA ligases and plant cyclic phosphodiesterases. The catalytic domains from all these proteins contain two tetra-peptide motifs H-X-T/S-X, where X is usually a hydrophobic residue. Mutation of either histidine in CNP abolishes enzymatic activity . CNPases belong to the 2H phosphoesterase superfamily. They share a common active site, characterised by two conserved histidines, with the bacterial tRNA-ligating enzyme LigT, vertebrate myelin-associated 2',3' phosphodiesterases, plant Arabidopsis thaliana CPDases and several several bacteria and virus proteins .. +PF05883 DUF855;
Baculovirus U-box/Ring-like domain. Pfam-B_9633 (release 8.0). This family consists of several Baculovirus proteins of around 130 residues in length. The function of this family is unknown, but it appears to be related to the U-box and ring finger domain by profile-profile comparison.. +PF05884 DUF856;
Interactor of ZYG-11. Pfam-B_9445 (release 8.0). This family of proteins represents the protein product of the gene W03D8.9 which has been identified as an interactor of ZYG-11. ZYG-11 is the substrate-recognition subunit for a CUL-2 based complex that regulates cell division and embryonic development .. +PF05886 Orthopoxvirus F8 protein
Pfam-B_9539 (release 8.0). This family consists of several Orthopoxvirus F8 proteins. The function of this family is unknown.. +PF05887 Trypano_PARP;
Procyclic acidic repetitive protein (PARP). Pfam-B_9554 (release 8.0). This family consists of several Trypanosoma brucei procyclic acidic repetitive protein (PARP) like sequences. The procyclic acidic repetitive protein (parp) genes of Trypanosoma brucei encode a small family of abundant surface proteins whose expression is restricted to the procyclic form of the parasite. They are found at two unlinked loci, parpA and parpB; transcription of both loci is developmentally regulated . . +PF05889 Soluble liver antigen/liver pancreas antigen (SLA/LP autoantigen)
Pfam-B_9614 (release 8.0). This family consists of several eukaryotic and archaeal proteins which are related to the human soluble liver antigen/liver pancreas antigen (SLA/LP autoantigen). Autoantibodies are a hallmark of autoimmune hepatitis, but most are not disease specific. Autoantibodies to soluble liver antigen (SLA) and to liver and pancreas antigen (LP) have been described as disease specific, occurring in about 30% of all patients with autoimmune hepatitis . The function of SLA/LP is unknown, however, it has been suggested that the protein may function as a serine hydroxymethyltransferase and may be an important enzyme in the thus far poorly understood selenocysteine pathway . The archaeal sequences Swiss:Q8TXK0 and Swiss:Q8TYR3 are annotated as being pyridoxal phosphate-dependent enzymes.. +PF05890 Eukaryotic rRNA processing protein EBP2
Moxon SJ, Mistry J, Wood V. Pfam-B_9615 (release 8.0). This family consists of several Eukaryotic rRNA processing protein EBP2 sequences. Ebp2p is required for the maturation of 25S rRNA and 60S subunit assembly. Ebp2p may be one of the target proteins of Rrs1p for executing the signal to regulate ribosome biogenesis . This family also plays a role in chromosome segregation .. +PF05891 DUF858; Hydroxy-O-Methy;
AdoMet dependent proline di-methyltransferase. Pfam-B_9735 (release 8.0). This protein is expressed in the tail neuron PVT and in uterine cells in C. elegans [worm-base]. In Saccharomyces cerevisiae this is AdoMet dependent proline di-methyltransferase. This enzyme catalyses the di-methylation of ribosomal proteins Rpl12 and Rps25 at N-terminal proline residues. The methyltransferases described here specifically recognise the N-terminal X-Pro-Lys sequence motif, and they may account for nearly all previously described eukaryotic protein N-terminal methylation reactions. A number of other yeast and human proteins also share the recognition motif and may be similarly modified . As with other methyltransferases, this family carries the characteristic GxGxG motif.. +PF05892 Trichovirus coat protein
Pfam-B_9763 (release 8.0). This family consists of several coat proteins which are specific to the ssRNA positive-strand, no DNA stage viruses such as the Trichovirus and Vitivirus.. +PF05893 Acyl-CoA reductase (LuxC)
Pfam-B_9766 (release 8.0). This family consists of several bacterial Acyl-CoA reductase (LuxC) proteins. The channelling of fatty acids into the fatty aldehyde substrate for the bacterial bioluminescence reaction is catalysed by a fatty acid reductase multienzyme complex, which channels fatty acids through the thioesterase (LuxD), synthetase (LuxE) and reductase (LuxC) components .. +PF05894 Podovirus DNA encapsidation protein (Gp16)
Pfam-B_9825 (release 8.0). This family consists of several DNA encapsidation protein (Gp16) sequences from the phi-29-like viruses. Gene product 16 catalyses the in vivo and in vitro genome-encapsidation reaction . . +PF05895 Siphovirus protein of unknown function (DUF859)
Pfam-B_9891 (release 8.0). This family consists of several uncharacterised proteins from the Siphoviruses as well as one bacterial sequence Swiss:Q8K6J6. Some of the members of this family are described as putative minor structural proteins.. +PF05896 Na(+)-translocating NADH-quinone reductase subunit A (NQRA)
Pfam-B_3622 (release 8.0). This family consists of several bacterial Na(+)-translocating NADH-quinone reductase subunit A (NQRA) proteins. The Na(+)-translocating NADH: ubiquinone oxidoreductase (Na(+)-NQR) generates an electrochemical Na(+) potential driven by aerobic respiration .. +PF05899 DUF861;
Protein of unknown function (DUF861). Pfam-B_2000 (release 8.0). This family consists of several proteins which seem to be specific to plants and bacteria. The function of this family is unknown.. +PF05901 Excalibur calcium-binding domain
Extracellular Ca2+-dependent nuclease YokF from Bacillus subtilis and several other surface-exposed proteins from diverse bacteria are encoded in the genomes in two paralogous forms that differ by a ~45 amino acid fragment, which comprises a novel conserved domain. Sequence analysis of this domain revealed a conserved DxDxDGxxCE motif, which is strikingly similar to the Ca2+-binding loop of the calmodulin-like EF-hand domains, suggesting an evolutionary relationship between them. Functions of many of the other proteins in which the novel domain, named Excalibur (extracellular calcium-binding region), is found, as well as a structural model of its conserved motif are consistent with the notion that the Excalibur domain binds calcium. This domain is but one more example of the diversity of structural contexts surrounding the EF-hand-like calcium-binding loop in bacteria. This loop is thus more widespread than hitherto recognised and the evolution of EF-hand-like domains is probably more complex than previously appreciated .. +PF05902 4.1 protein C-terminal domain (CTD)
At the C-terminus of all known 4.1 proteins is a sequence domain unique to these proteins, known as the C-terminal domain (CTD). Mammalian CTDs are associated with a growing number of protein-protein interactions, although such activities have yet to be associated with invertebrate CTDs. Mammalian CTDs are generally defined by sequence alignment as encoded by exons 18-21. Comparison of known vertebrate 4.1 proteins with invertebrate 4.1 proteins indicates that mammalian 4.1 exon 19 represents a vertebrate adaptation that extends the sequence of the CTD with a Ser/Thr-rich sequence. The CTD was first described as a 22/24-kDa domain by chymotryptic digestion of erythrocyte 4.1 (4.1R). CTD is thought to represent an independent folding structure which has gained function since the divergence of vertebrates from invertebrates . . +PF05903 DUF862;
PPPDE putative peptidase domain. The PPPDE superfamily (after Permuted Papain fold Peptidases of DsRNA viruses and Eukaryotes), consists of predicted thiol peptidases with a circularly permuted papain-like fold. The inference of the likely DUB function of the PPPDE superfamily proteins is based on the fusions of the catalytic domain to Ub-binding PUG (PUB)/UBA domains and a novel alpha-helical Ub-associated domain (the PUL domain, after PLAP, Ufd3p and Lub1p) .. +PF05904 Plant protein of unknown function (DUF863)
Pfam-B_7732 (release 8.0). This family consists of a number of hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown.. +PF05906 Herpesvirus-7 repeat of unknown function (DUF865)
Pfam-B_9911 (release 8.0). This family consists of a series of 12 repeats of 35 amino acids in length which are found exclusively in Herpesvirus-7. The function of this family is unknown.. +PF05907 Eukaryotic protein of unknown function (DUF866)
Pfam-B_8299 (release 8.0). This family consists of a number of hypothetical eukaryotic proteins of unknown function with an average length of around 165 residues.. +PF05908 Protein of unknown function (DUF867)
Pfam-B_7778 (release 8.0). This family consists of a number of bacterial and phage proteins with no known function and is present in Bacillus species and the Lambda-like viruses.. +PF05910 Plant protein of unknown function (DUF868)
Pfam-B_8013 (release 8.0). This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown.. +PF05911 Plant protein of unknown function (DUF869)
Pfam-B_8094 (release 8.0). This family consists of a number of sequences found in Arabidopsis thaliana, Oryza sativa and Lycopersicon esculentum (Tomato). The function of this family is unknown. . +PF05912 Caenorhabditis elegans protein of unknown function (DUF870)
Pfam-B_8400 (release 8.0). This family consists of a number of hypothetical proteins which seem to be specific to Caenorhabditis elegans. The function of this family is unknown.. +PF05913 Bacterial protein of unknown function (DUF871)
Pfam-B_8510 (release 8.0). This family consists of several conserved hypothetical proteins from bacteria and archaea. The function of this family is unknown.. +PF05914 RIB43A
Pfam-B_8571 (release 8.0). This family consists of several RIB43A-like eukaryotic proteins. Ciliary and flagellar microtubules contain a specialised set of protofilaments, termed ribbons, that are composed of tubulin and several associated proteins. RIB43A was first characterised in the unicellular biflagellate, Chlamydomonas reinhardtii although highly related sequences are present in several higher eukaryotes including humans. The function of this protein is unknown although the structure of RIB43A and its association with the specialised protofilament ribbons and with basal bodies is relevant to the proposed role of ribbons in forming and stabilising doublet and triplet microtubules and in organising their three-dimensional structure. Human RIB43A homologues could represent a structural requirement in centriole replication in dividing cells .. +PF05915 Eukaryotic protein of unknown function (DUF872)
Pfam-B_8741 (release 8.0). This family consists of several uncharacterised eukaryotic proteins. The function of this family is unknown.. +PF05916 DUF873;
GINS complex protein. Pfam-B_8759 (release 8.0). The eukaryotic GINS complex is essential for the initiation and elongation phases of DNA replication [1-3]. It consists of four paralogous protein subunits (Sld5, Psf1, Psf2 and Psf3), all of which are included in this family. The GINS complex is conserved from yeast to humans, and has been shown in human to bind directly to DNA primase .. +PF05917 Helicobacter pylori protein of unknown function (DUF874)
Pfam-B_8835 (release 8.0). This family consists of several hypothetical proteins specific to Helicobacter pylori. The function of this family is unknown.. +PF05918 Apoptosis inhibitory protein 5 (API5)
Pfam-B_8916 (release 8.0). This family consists of apoptosis inhibitory protein 5 (API5) sequences from several organisms. Apoptosis or programmed cell death is a physiological form of cell death that occurs in embryonic development and organ formation. It is characterised by biochemical and morphological changes such as DNA fragmentation and cell volume shrinkage. API5 is an anti apoptosis gene located in human chromosome 11, whose expression prevents the programmed cell death that occurs upon the deprivation of growth factors [1,2].. +PF05919 Mitovirus RNA-dependent RNA polymerase
Pfam-B_9906 (release 8.0). This family consists of several Mitovirus RNA-dependent RNA polymerase proteins. The family also contains fragment matches in the mitochondria of Arabidopsis thaliana.. +PF05920 Coprinus_mating;
Pfam-B_4610 (release 8.0). This is a homeobox transcription factor KN domain conserved from fungi to human and plants.. +PF05922 Subtilisin_N;
Peptidase inhibitor I9. Pfam-B_52 (release 8.0). This family includes the proteinase B inhibitor from Saccharomyces cerevisiae and the activation peptides from peptidases of the subtilisin family. The subtilisin propeptides are known to function as molecular chaperones, assisting in the folding of the mature peptidase , but have also been shown to act as 'temporary inhibitors' .. +PF05923 APC cysteine-rich region
This short region is found repeated in the mid region of the adenomatous polyposis proteins (APCs). In the human protein many cancer-linked SNPs are found near the first three occurrences of the motif. These repeats bind beta-catenin .. +PF05924 SAMP Motif
This short region is found repeated in the mid region of the adenomatous polyposis proteins (APCs). This motif binds axin .. +PF05925 Enterobacterial virulence protein IpgD
Pfam-B_1987 (release 9.0). This family consists of several enterobacterial IpgD like virulence factor proteins. In the Gram-negative pathogen Shigella flexneri, the virulence factor IpgD is translocated directly into eukaryotic cells and acts as a potent inositol 4-phosphatase that specifically dephosphorylates phosphatidylinositol 4,5-bisphosphate [PtdIns(4,5)P(2)] into phosphatidylinositol 5-monophosphate [PtdIns(5)P] that then accumulates. Transformation of PtdIns(4,5)P(2) into PtdIns(5)P by IpgD is responsible for dramatic morphological changes of the host cell, leading to a decrease in membrane tether force associated with membrane blebbing and actin filament remodelling .. +PF05926 Phage head completion protein (GPL)
Pfam-B_1860 (release 9.0). This family consists of several phage head completion protein (GPL) as well as related bacterial sequences. Members of this family allow the completion of filled heads by rendering newly packaged DNA in the heads resistant to DNase. The protein is thought to bind to DNA filled capsids .. +PF05927 Penaeidin
Pfam-B_2675 (release 9.0). This family consists of several isoforms of the penaeidin protein which is specific to shrimps. Penaeidins, a unique family of antimicrobial peptides (AMPs) with both proline and cysteine-rich domains, were initially identified in the hemolymph of the Pacific white shrimp, Litopenaeus vannamei . . +PF05928 Zea mays MURB-like protein (MuDR)
Pfam-B_3145 (release 9.0). This family consists of several Zea mays specific MURB-like proteins. The transposition of Mu elements underlying Mutator activity in maize requires a transcriptionally active MuDR element. Despite variation in MuDR copy number and RNA levels in Mutator lines, transposition events are consistently late in plant development, and Mu excision frequencies are similar .. +PF05929 Phage capsid scaffolding protein (GPO) serine peptidase
Pfam-B_1730 (release 9.0). This family consists of several bacteriophage capsid scaffolding proteins (GPO) and some related bacterial sequences. GPO is thought to function in both the assembly of proheads and the cleavage of GPN . The family is found to function as a serine peptidase, with a conserved Asp, His and Ser catalytic triad, as in subtilisin, and as represented in MEROPS:S73. The family includes SwissProt:P25478 from Enterobacteria phage P2 which cleaves itself and then becomes the scaffold protein upon which the bacteriophage prohead is built - a mechanism quite common amongst phages .. +PF05930 AlpA;
Prophage CP4-57 regulatory protein (AlpA). Pfam-B_2048 (release 9.0). This family consists of several short bacterial and phage proteins which are related to the E. coli protein AlpA. AlpA suppress two phenotypes of a delta lon protease mutant, overproduction of capsular polysaccharide and sensitivity to UV light . Several of the sequences in this family are thought to be DNA-binding proteins.. +PF05931 Staphylococcal AgrD protein
Pfam-B_2868 (release 9.0). This family consists of several AgrD proteins from many Staphylococcus species. The agr locus was initially described in Staphylococcus aureus as an element controlling the production of exoproteins implicated in virulence. Its pattern of action has been shown to be complex, upregulating certain extracellular toxins and enzymes expressed post-exponentially and repressing some exponential-phase surface components. AgrD encodes the precursor of the autoinducing peptide (AIP).The AIP derived from AgrD by the action of AgrB interacts with AgrC in the membrane to activate AgrA, which upregulates transcription both from promoter P2, amplifying the response, and from P3, initiating the production of a novel effector: RNAIII. In S. aureus, delta-hemolysin is the only translation product of RNA III and is not involved in the regulatory functions of the transcript, which is therefore the primary agent for modulating the expression of other operons controlled by agr .. +PF05932 Tir chaperone protein (CesT) family
Pfam-B_2921 (release 9.0). This family consists of a number of bacterial sequences which are highly similar to the Tir chaperone protein in E. Coli. In many Gram-negative bacteria, a key indicator of pathogenic potential is the possession of a specialised type III secretion system, which is utilised to deliver virulence effector proteins directly into the host cell cytosol. Many of the proteins secreted from such systems require small cytosolic chaperones to maintain the secreted substrates in a secretion-competent state. CesT serves a chaperone function for the enteropathogenic Escherichia coli (EPEC) translocated intimin receptor (Tir) protein, which confers upon EPEC the ability to alter host cell morphology following intimate bacterial attachment .\. This family also contains several DspF and related sequences from several plant pathogenic bacteria. The "disease-specific" (dsp) region next to the hrp gene cluster of Erwinia amylovora is required for pathogenicity but not for elicitation of the hypersensitive reaction. DspF and AvrF are small (16 kDa and 14 kDa) and acidic with predicted amphipathic alpha helices in their C termini; they resemble chaperones for virulence factors secreted by type III secretion systems of animal pathogens .. +PF05933 Fungal ATP synthase protein 8 (A6L)
Pfam-B_2993 (release 9.0). This family consists of fungus specific ATP synthase protein 8 (EC:3.6.3.14). The family may be related to the ATP synthase protein 8 found in other eukaryotes Pfam:PF00895.. +PF05934 Mid-1-related chloride channel (MCLC)
Pfam-B_2711 (release 9.0). This family consists of several mid-1-related chloride channels. mid-1-related chloride channel (MCLC) proteins function as a chloride channel when incorporated in the planar lipid bilayer .. +PF05935 Arylsulfotransferase (ASST)
Pfam-B_3266 (release 9.0). This family consists of several bacterial Arylsulfotransferase proteins. Arylsulfotransferase (ASST) transfers a sulfate group from phenolic sulfate esters to a phenolic acceptor substrate . . +PF05936 Bacterial protein of unknown function (DUF876)
Pfam-B_3279 (release 9.0). This family consists of a series of hypothetical bacterial sequences of unknown function.. +PF05937 EB-1 Binding Domain
This region at the C-terminus of the APC proteins binds the microtubule-associating protein EB-1 . At the C-terminus of the alignment is also a Pfam:PF00595 binding domain. A short motif in the middle of the region appears to be found in the APC2 proteins (e.g Swiss:O95996).. +PF05938 Plant self-incompatibility protein S1
Pfam-B_3292 (release 9.0). This family consists of a series of plant proteins which are related to the Papaver rhoeas S1 self-incompatibility protein. Self incompatibility (SI) is the single most important outbreeding device found in angiosperms and is a mechanism that regulates the acceptance or rejection of pollen. S1 is known to exhibit specific pollen-inhibitory properties .. +PF05939 Phage minor tail protein
Pfam-B_3296 (release 9.0). This family consists of a series of phage minor tail proteins and related sequences from several bacterial species.. +PF05940 NnrS protein
Pfam-B_3395 (release 9.0). This family consists of several bacterial NnrS like proteins. NnrS is a putative heme-Cu protein (NnrS) and a member of the short-chain dehydrogenase family . Expression of nnrS is dependent on the transcriptional regulator NnrR, which also regulates expression of genes required for the reduction of nitrite to nitrous oxide, including nirK and nor. NnrS is a haem- and copper-containing membrane protein. Genes encoding putative orthologues of NnrS are sometimes but not always found in bacteria encoding nitrite and/or nitric oxide reductase .. +PF05941 Chordopoxvirus A20R protein
Pfam-B_3744 (release 9.0). This family consists of several Chordopoxvirus A20R proteins. The A20R protein is required for DNA replication, is associated with the processive form of the viral DNA polymerase, and directly interacts with the viral proteins encoded by the D4R, D5R, and H5R open reading frames. A20R may contribute to the assembly or stability of the multiprotein DNA replication complex .. +PF05942 Archaeal PaREP1/PaREP8 family
Pfam-B_2248 (release 9.0) & Pfam-B_9342 (release 9.0). This family consists of several archaeal PaREP1 and PaREP8 proteins the function of this family is unknown.. +PF05943 Protein of unknown function (DUF877)
Pfam-B_2566 (release 9.0). This family consists of a number of uncharacterised bacterial proteins. The function of this family is unknown.. +PF05944 Phage small terminase subunit
Pfam-B_2645 (release 9.0). This family consists of several phage small terminase subunit proteins as well as some related bacterial sequences .. +PF05946 Toxin-coregulated pilus subunit TcpA
Pfam-B_3639 (release 9.0). This family consists of toxin-coregulated pilus subunit (TcpA) proteins from Vibrio cholerae and related sequences. The major virulence factors of toxigenic Vibrio cholerae are cholera toxin (CT), which is encoded by a lysogenic bacteriophage (CTXPhi), and toxin-coregulated pilus (TCP), an essential colonisation factor which is also the receptor for CTXPhi. The genes for the biosynthesis of TCP are part of a larger genetic element known as the TCP pathogenicity island .. +PF05947 Bacterial protein of unknown function (DUF879)
Pfam-B_3751 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF05949 Bacterial protein of unknown function (DUF881)
Pfam-B_4053 (release 9.0). This family consists of a series of hypothetical bacterial proteins. One of the family members Swiss:Q45543 from Bacillus subtilis is thought to be involved in cell division and sporulation .. +PF05950 Orthopoxvirus A36R protein
Pfam-B_4070 (release 9.0). This family consists of several Orthopoxvirus A36R proteins. The A36R protein is predicted to be a type Ib membrane protein .. +PF05951 DUF882;
Bacterial protein of unknown function (DUF882). Pfam-B_4115 (release 9.0). This family consists of a series of hypothetical bacterial proteins of unknown function.. +PF05952 Bacillus competence pheromone ComX
Pfam-B_4222 (release 9.0). Natural genetic competence in Bacillus subtilis is controlled by quorum-sensing (QS). The ComP- ComA two-component system detects the signalling molecule ComX, and this signal is transduced by a conserved phosphotransfer mechanism. ComX is synthesised as an inactive precursor and is then cleaved and modified by ComQ before export to the extracellular environment .. +PF05953 Allatostatin
Pfam-B_4313 (release 9.0). This family consists of allatostatins, bombystatins, helicostatins, cydiastatins and schistostatin from several insect species. Allatostatins (ASTs) of the Tyr/Phe-Xaa-Phe-Gly Leu/Ile-NH2 family are a group of insect neuropeptides that inhibit juvenile hormone biosynthesis by the corpora allata .. +PF05954 Phage late control gene D protein (GPD)
Pfam-B_4333 (release 9.0) & Pfam-B_12199 (release 10.0). This family includes a number of phage late control gene D proteins and related bacterial sequences. This family also includes Bacteriophage Mu P proteins and related sequences.. +PF05955 Eq_herpes_Gp2;
Equine herpesvirus glycoprotein gp2. Pfam-B_4360 (release 9.0). This family consists of a number of glycoprotein gp2 sequences from equine herpesviruses.. +PF05956 APC basic domain
This region of the APC family of proteins is known as the basic domain. It contains a high proportion of positively charged amino acids and interacts with microtubules .. +PF05957 Bacterial protein of unknown function (DUF883)
Pfam-B_4421 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF05958 tRNA (Uracil-5-)-methyltransferase
Pfam-B_4661 (release 9.0). This family consists of (Uracil-5-)-methyltransferases EC:2.1.1.35 from bacteria, archaea and eukaryotes. A 5-methyluridine (m(5)U) residue at position 54 is a conserved feature of bacterial and eukaryotic tRNAs. The methylation of U54 is catalysed by the tRNA(m5U54)methyltransferase, which in Saccharomyces cerevisiae is encoded by the nonessential TRM2 gene. It is thought that tRNA modification enzymes might have a role in tRNA maturation not necessarily linked to their known catalytic activity .. +PF05959 Nucleopolyhedrovirus protein of unknown function (DUF884)
Pfam-B_4679 (release 9.0). This family consists of several hypothetical Nucleopolyhedrovirus proteins of unknown function.. +PF05960 Bacterial protein of unknown function (DUF885)
Pfam-B_4405 (release 9.0). This family consists of several hypothetical bacterial proteins several of which are putative membrane proteins.. +PF05961 Chordopoxvirus A13L protein
Pfam-B_5061 (release 9.0). This family consists of A13L proteins from the Chordopoxviruses. A13L or p8 is one of the three most abundant membrane proteins of the intracellular mature Vaccinia virus .. +PF05962 DUF886;
Pfam-B_5160 (release 9.0). HutD from Pseudomonas fluorescens SBW25 is a component of the histidine uptake and utilisation operon. HutD is operonic with the well characterised repressor protein HutC. Genetic analysis using transcriptional fusions (lacZ) and deletion mutants shows that hutD is necessary to maintain fitness in environments replete with histidine. Evidence outlined by Zhang & Rainey (2007) suggests that HutD functions as a governor that sets an upper bound on the level of hut operon transcription . The mechanistic basis is unknown, but in silico molecular docking studies based on the crystal structure of PA5104 (HutD from Pseudomonas aeruginosa) show that urocanate (the first breakdown product of histidine) docks with the active site of HutD.. +PF05963 Cytomegalo_US3;
Cytomegalovirus US3 protein. Pfam-B_7187 (release 9.0). US3 of human cytomegalovirus is an endoplasmic reticulum resident transmembrane glycoprotein that binds to major histocompatibility complex class I molecules and prevents their departure. The endoplasmic reticulum retention signal of the US3 protein is contained in the luminal domain of the protein .. +PF05964 F/Y-rich N-terminus
Pfam-B_1170 (release 8.0). This region is normally found in the trithorax/ALL1 family proteins. It is similar to SMART:SM00541.. +PF05965 F/Y rich C-terminus
Pfam-B_1170 (release 8.0). This region is normally found in the trithorax/ALL1 family proteins. It is similar to SMART:SM00542.. +PF05966 Chordopoxvirus A33R protein
Pfam-B_4799 (release 9.0). This family consists of several Chordopoxvirus A33R proteins. A33R plays a role in promoting Ab-resistant cell-to-cell spread of virus and interacts with A36R to incorporate the protein into the outer membrane of intracellular enveloped virions (IEV) .. +PF05968 Bacillus PapR protein
Pfam-B_4892 (release 9.0). This family consists of the Bacillus species specific PapR protein. The papR gene belongs to the PlcR regulon and is located 70 bp downstream from plcR. It encodes a 48-amino-acid peptide. Disruption of the papR gene abolishes expression of the PlcR regulon, resulting in a large decrease in haemolysis and virulence in insect larvae. A processed form of PapR activates the PlcR regulon by allowing PlcR to bind to its DNA target. This activating mechanism is strain specific .. +PF05969 DUF888;
Photosystem II complex subunit Ycf12. Pfam-B_4945 (release 9.0). Ycf12 has been identified as a core subunit in the photosystem II (PSII) complex [1-2]. PsbZ has been shown to be required for the association of PsbK and Ycf12 with PSII .. +PF05970 DUF889;
Pfam-B_4988 (release 9.0). This family includes homologues of the PIF1 helicase, which inhibits telomerase activity and is cell cycle regulated . This family includes a large number of largely uncharacterised plant proteins. This family includes a P-loop motif that is involved in nucleotide binding.. +PF05971 DUF890;
Protein of unknown function (DUF890). Pfam-B_5064 (release 9.0). This family consists of several conserved hypothetical proteins from both eukaryotes and prokaryotes. The function of this family is unknown.. +PF05972 APC 15 residue motif
This motif, known as the 15 aa repeat, is found in the APC protein family. They are involved in binding beta-catenin along with the Pfam:PF05923 repeats. Many human cancer mutations map to the region around these motifs, and may be involved in disrupting their binding of beta-catenin.. +PF05973 DUF891;
Phage derived protein Gp49-like (DUF891). Pfam-B_5075 (release 9.0) & Pfam-B_6067 (release 14.0). This family consists of hypothetical bacterial proteins of unknown function as well as phage Gp49 proteins.. +PF05974 Domain of unknown function (DUF892)
Pfam-B_5115 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF05975 Bacterial ABC transporter protein EcsB
Pfam-B_4764 (release 9.0). This family consists of several bacterial ABC transporter proteins which are homologous to the EcsB protein of Bacillus subtilis. EcsB is thought to encode a hydrophobic protein with six membrane-spanning helices in a pattern found in other hydrophobic components of ABC transporters .. +PF05977 DUF894;
Transmembrane secretion effector. Moxon SJ, Eberhardt R. Pfam-B_4880 (release 9.0). This is a family of transport proteins. Members of this family include a protein responsible for the secretion of the ferric chelator, enterobactin , and a protein involved in antibiotic resistance .. +PF05978 DUF895; UNC-93_Ce;
Ion channel regulatory protein UNC-93. Moxon SJ, Pollington J. Pfam-B_4965 (release 9.0). This family of proteins is a component of a multi-subunit protein complex which is involved in the coordination of muscle contraction. UNC-93 is most likely an ion channel regulatory protein .. +PF05979 Bacterial protein of unknown function (DUF896)
Pfam-B_5209 (release 9.0). In B. subtilis, one small SOS response operon under the control of LexA, the yneA operon, is comprised of three genes: yneA, yneB, and ynzC . This family consists of several short, hypothetical bacterial proteins of unknown function. These proteins are mainly found in gram-positive firmicutes. Structures show that the N-terminus is composed of two alpha helices forming a helix-loop-helix motif. The structure of ynzC from B. subtilis forms a trimeric complex . Structural modelling suggests this domain may bind nucleic acids . This family is also known as UPF0291.. +PF05980 toxin_7;
Pfam-B_5254 (release 9.0). This family consists of several short spider neurotoxin proteins including many from the Funnel-web spider.. +PF05981 CreA protein
Pfam-B_5258 (release 9.0). This family consists of several bacterial CreA proteins, the function of which is unknown.. +PF05982 Domain of unknown function (DUF897)
Pfam-B_8040 (release 9.0). Family of bacterial proteins with unknown function. +PF05983 MED7;
Pfam-B_5278 (release 9.0). This family consists of several eukaryotic proteins which are homologues of the yeast MED7 protein. Activation of gene transcription in metazoans is a multi-step process that is triggered by factors that recognise transcriptional enhancer sites in DNA. These factors work with co-activators such as MED7 to direct transcriptional initiation by the RNA polymerase II apparatus .. +PF05984 Cytomegalovirus UL20A protein
Pfam-B_5345 (release 9.0). This family consists of several Cytomegalovirus UL20A proteins. UL20A is thought to be a glycoprotein .. +PF05985 Ethanolamine ammonia-lyase light chain (EutC)
Pfam-B_5363 (release 9.0). This family consists of several bacterial ethanolamine ammonia-lyase light chain (EutC) EC:4.3.1.7 sequences. Ethanolamine ammonia-lyase is a bacterial enzyme that catalyses the adenosylcobalamin-dependent conversion of certain vicinal amino alcohols to oxo compounds and ammonia . . +PF05986 ADAM-TS Spacer 1
Pfam-B_1693 (release 8.0). This family represents the Spacer-1 region from the ADAM-TS family of metalloproteinases .. +PF05987 Bacterial protein of unknown function (DUF898)
Pfam-B_5347 (release 9.0). This family consists of several bacterial proteins of unknown function. Some of the family members are described as putative membrane proteins.. +PF05988 Bacterial protein of unknown function (DUF899)
Pfam-B_5437 (release 9.0). This family consists of several uncharacterised bacterial proteins of unknown function.. +PF05989 Chordopoxvirus A35R protein
Pfam-B_5472 (release 9.0). This family consists of several Chordopoxvirus sequences homologous to the Vaccinia virus A35R protein. The function of this family is unknown.. +PF05990 Alpha/beta hydrolase of unknown function (DUF900)
Pfam-B_5475 (release 9.0). This family consists of several hypothetical proteins of unknown function mostly found in Rhizobium species. Members of this family have an alpha/beta hydrolase fold.. +PF05991 DUF901;
YacP-like NYN domain. Pfam-B_5522 (release 9.0). This family consists of bacterial proteins related to YacP. This family is uncharacterised functionally, but it has been suggested that these proteins are nucleases due to them containing a NYN domain. NYN (for N4BP1, YacP-like Nuclease) domains were discovered by Anantharaman and Aravind . Based on gene neighborhoods it was suggested that the bacterial YacP proteins interact with the Ribonuclease III and TrmH methylase in a processome complex that catalyzes the maturation of rRNA and tRNA .. +PF05992 SbmA/BacA-like family
Pfam-B_8139 (release 9.0). The Rhizobium meliloti bacA gene encodes a function that is essential for bacterial differentiation into bacteroids within plant cells in the symbiosis between R. meliloti and alfalfa. An Escherichia coli homolog of BacA, SbmA, is implicated in the uptake of microcins and bleomycin. This family is likely to be a subfamily of the ABC transporter family.. +PF05993 Reovirus major virion structural protein Mu-1/Mu-1C (M2)
Pfam-B_5584 (release 9.0). This family consists of several Reovirus major virion structural protein Mu-1/Mu-1C (M2) sequences. This family is family is thought to play a role in host cell membrane penetration .. +PF05994 Cytoplasmic Fragile-X interacting family
Pfam-B_8072 (release 9.0). CYFIP1/2 (Cytoplasmic fragile X mental retardation interacting protein) like proteins for a highly conserved protein family . The function of CYFIPs is unclear, but CYFIP interaction with fragile X mental retardation interacting protein (FMRP) involves the domain of FMRP which also mediating homo- and heteromerization .. +PF05995 Cysteine dioxygenase type I
Pfam-B_8006 (release 9.0). Cysteine dioxygenase type I (EC:1.13.11.20) converts cysteine to cysteinesulphinic acid and is the rate-limiting step in sulphate production.. +PF05996 Ferredoxin-dependent bilin reductase
Pfam-B_5667 (release 9.0). This family consists of several different but closely related proteins which include phycocyanobilin:ferredoxin oxidoreductase EC:1.3.7.5 (PcyA), 15,16-dihydrobiliverdin:ferredoxin oxidoreductase EC:1.3.7.2 (PebA) and phycoerythrobilin:ferredoxin oxidoreductase EC:1.3.7.3 (PebB). Phytobilins are linear tetrapyrrole precursors of the light-harvesting prosthetic groups of the phytochrome photoreceptors of plants and the phycobiliprotein photosynthetic antennae of cyanobacteria, red algae, and cryptomonads. It is known that that phytobilins are synthesised from heme via the intermediary of biliverdin IX alpha (BV), which is reduced subsequently by ferredoxin-dependent bilin reductases with different double-bond specificities . . +PF05997 Nucleolar protein,Nop52
Pfam-B_8003 (release 9.0). Nop52 believed to be involved in the generation of 28S rRNA .. +PF05999 Herpesvirus U5-like family
Pfam-B_8027 (release 9.0). This family of Herpesvirus includes U4, U5 and UL27.. +PF06001 Domain of Unknown Function (DUF902)
Pfam-B_3539 (release 8.0). This domain of unknown function is found in several transcriptional co-activators including the CREB-binding protein, which is an acetyltransferase that acetylates histones, giving a specific tag for transcriptional activation. This short domain is found to the C-terminus of bromodomains. The 40 residue domain contains four conserved cysteines suggesting that it may be stabilised by a zinc ion. In CREB this domain is to the N-terminus of another zinc binding PHD domain.. +PF06002 Alpha-2,3-sialyltransferase (CST-I)
Pfam-B_6887 (release 9.0). This family consists of several alpha-2,3-sialyltransferase (CST-I) proteins largely found in Campylobacter jejuni.. +PF06003 Survival motor neuron protein (SMN)
Pfam-B_7026 (release 9.0). This family consists of several eukaryotic survival motor neuron (SMN) proteins. The Survival of Motor Neurons (SMN) protein, the product of the spinal muscular atrophy-determining gene, is part of a large macromolecular complex (SMN complex) that functions in the assembly of spliceosomal small nuclear ribonucleoproteins (snRNPs). The SMN complex functions as a specificity factor essential for the efficient assembly of Sm proteins on U snRNAs and likely protects cells from illicit, and potentially deleterious, non-specific binding of Sm proteins to RNAs . . +PF06004 Bacterial protein of unknown function (DUF903)
Pfam-B_7037 (release 9.0). This family consists of several small bacterial proteins several of which are classified as putative lipoproteins. The function of this family is unknown.. +PF06005 Protein of unknown function (DUF904)
Pfam-B_7038 (release 9.0). This family consists of several bacterial and archaeal hypothetical proteins of unknown function.. +PF06006 Bacterial protein of unknown function (DUF905)
Pfam-B_7072 (release 9.0). This family consists of several short hypothetical Enterobacteria proteins of unknown function. Structural analysis of the surface features of the protein YvyC has revealed a single cluster of highly conserved residues on the surface. Additionally, these residues fall into two groups which lie within the two largest of the three cavities identified over the surface. The conclusion from this is that these two cavities with, Leu 58, Glu 75, Ile 82, and Glu 83 and Pro 86, conserved, are likely to be important for the molecular function and reflect the cavities found on the surface of the FlaG proteins in Pfam:PF03646.. +PF06007 Phosphonate metabolism protein PhnJ
Pfam-B_7179 (release 9.0). This family consists of several bacterial phosphonate metabolism (PhnJ) sequences. The exact role that PhnJ plays in phosphonate utilisation is unknown.. +PF06008 Laminin Domain I
Pfam-B_1925 (release 8.0). coiled-coil structure. It has been suggested that the domains I and II from laminin A, B1 and B2 may come together to form a triple helical coiled-coil structure .. +PF06009 Laminin Domain II
Pfam-B_1925 (release 8.0). It has been suggested that the domains I and II from laminin A, B1 and B2 may come together to form a triple helical coiled-coil structure .. +PF06011 DUF907;
Transient receptor potential (TRP) ion channel. Moxon SJ, Mistry J, Wood V. Pfam-B_5564 (release 9.0). This family of proteins are transient receptor potential (TRP) ion channels.\. They are essential for cellular viability and are involved in cell growth and cell wall synthesis . The genes for these proteins are homologous to polycystic kidney disease related ion channel genes .. +PF06012 Domain of Unknown Function (DUF908)
Pfam-B_6534 (release 8.0). +PF06013 DUF909;
Proteins of 100 residues with WXG. Pfam-B_7198 (release 9.0). ESAT-6 is a small protein appears to be of fundamental importance in virulence and protective immunity in Mycobacterium tuberculosis. Homologues have been detected in other Gram-positive bacterial species. It may represent a novel secretion system potentially driven by the Pfam:PF01580 domains in the YukA-like proteins .. +PF06014 Bacterial protein of unknown function (DUF910)
Pfam-B_7253 (release 9.0). This family consists of several short bacterial proteins of unknown function.. +PF06015 Chordopoxvirus A30L protein
Pfam-B_7254 (release 9.0). This family consists of several short Chordopoxvirus proteins which are homologous to the A30L protein of Vaccinia virus. The vaccinia virus A30L protein is required for the association of electron-dense, granular, proteinaceous material with the concave surfaces of crescent membranes, an early step in viral morphogenesis. A30L is known to interact with the G7L protein and it has been shown that the stability of each is dependent on its association with the other .. +PF06016 Reovirus core-spike protein lambda-2 (L2)
Pfam-B_7350 (release 9.0). This family consists of several Reovirus core-spike protein lambda-2 (L2) sequences. The reovirus L2 genome segment encodes the core spike protein lambda-2, which mediates enzymatic reactions in 5' capping of the viral plus-strand transcripts .. +PF06017 M_tail; Myosin_tail_2;
Pfam-B_12631 (release 8.0). +PF06018 CodY GAF-like domain
Pfam-B_7573 (release 9.0). This domain is a GAF-like domain found at the N-terminus of several bacterial GTP-sensing transcriptional pleiotropic repressor CodY proteins. Presumably this domain is involved in GTP binding. CodY has been found to repress the dipeptide transport operon (dpp) of Bacillus subtilis in nutrient-rich conditions . The CodY protein also has a repressor effect on many genes in Lactococcus lactis during growth in milk .. +PF06019 Phage GP30.8 protein
Pfam-B_7692 (release 9.0). This family consists of several GP30.8 proteins from the T4-like phages. The function of this family is unknown.. +PF06020 Drosophila roughex protein
Pfam-B_7712 (release 9.0). This family consists of several roughex (RUX) proteins specific to Drosophila species. Roughex can influence the intracellular distribution of cyclin A and is therefore defined as a distinct and specialised cell cycle inhibitor for cyclin A-dependent kinase activity . Rux is though to regulate the metaphase to anaphase transition during development .. +PF06021 Glycine_acyl_tr;
Aralkyl acyl-CoA:amino acid N-acyltransferase. Pfam-B_7828 (release 9.0). This family consists of several mammalian specific aralkyl acyl-CoA:amino acid N-acyltransferase (glycine N-acyltransferase) proteins EC:2.3.1.13.. +PF06022 Plasmodium variant antigen protein Cir/Yir/Bir
Pfam-B_8754 (release 9.0). This family consists of several Cir, Yir and Bir proteins from the Plasmodium species P.chabaudi, P.yoelii and P.berghei.. +PF06023 Archaeal protein of unknown function (DUF911)
Pfam-B_8782 (release 9.0). This family consists of several archaeal proteins of unknown function.. +PF06024 Nucleopolyhedrovirus protein of unknown function (DUF912)
Pfam-B_8809 (release 9.0). This family consists of several Nucleopolyhedrovirus proteins of unknown function.. +PF06025 Domain of Unknown Function (DUF913)
Pfam-B_6534 (release 8.0). Members of this family are found in various ubiquitin protein ligases.. +PF06026 Ribose 5-phosphate isomerase A (phosphoriboisomerase A)
Pfam-B_5144 (release 9.0). This family consists of several ribose 5-phosphate isomerase A or phosphoriboisomerase A (EC:5.3.1.6) from bacteria, eukaryotes and archaea. . +PF06027 Eukaryotic protein of unknown function (DUF914)
Pfam-B_7017 (release 9.0). This family consists of several hypothetical proteins of unknown function. Some of the sequences in this family are annotated as being putative membrane proteins.. +PF06028 Alpha/beta hydrolase of unknown function (DUF915)
Pfam-B_7094 (release 9.0). This family consists of several bacterial proteins of unknown function. Members of this family have an alpha/beta hydrolase fold.. +PF06029 AlkA N-terminal domain
Pfam-B_13157 (release 8.0). +PF06030 Bacterial protein of unknown function (DUF916)
Pfam-B_7106 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06031 SERTA motif
Pfam-B_7533 (Release 9.0). This family consists of a novel motif designated as SERTA (for SEI-1, RBT1, and TARA), corresponding to the largest conserved region among TRIP-Br proteins . The function of this motif is uncertain, but the CDK4-interacting segment of p34SEI-1 (amino acid residues 44-161) includes most of the SERTA motif .. +PF06032 Protein of unknown function (DUF917)
Pfam-B_7195 (release 9.0). This family consists of hypothetical bacterial and archaeal proteins of unknown function.. +PF06033 Nucleopolyhedrovirus protein of unknown function (DUF918)
Pfam-B_7213 (release 9.0). This family consists of several Nucleopolyhedrovirus proteins with no known function.. +PF06034 Nucleopolyhedrovirus protein of unknown function (DUF919)
Pfam-B_7250 (release 9.0). This family consists of several short Nucleopolyhedrovirus proteins of unknown function.. +PF06035 DUF920; BTLCP;
Bacterial transglutaminase-like cysteine proteinase BTLCP. Moxon SJ, Sammut SJ, Eberhardt R. Pfam-B_7277 (release 9.0). Members of this family are predicted to be bacterial transglutaminase-like cysteine proteinases. They contain a conserved Cys-His-Asp catalytic triad. Their structure is predicted to be similar to that of Salmonella typhimurium N-hydroxyarylamine O-acetyltransferase Swiss:Q00267, in Pfam:PF00797, however they lack the sub-domain which is important for arylamine recognition .. +PF06037 Bacterial protein of unknown function (DUF922)
Pfam-B_7397 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06039 Malate:quinone oxidoreductase (Mqo)
Pfam-B_7465 (release 9.0). This family consists of several bacterial Malate:quinone oxidoreductase (Mqo) proteins (EC:1.1.99.16). Mqo takes part in the citric acid cycle. It oxidises L-malate to oxaloacetate and donates electrons to ubiquinone-1 and other artificial acceptors or, via the electron transfer chain, to oxygen. NAD is not an acceptor and the natural direct acceptor for the enzyme is most likely a quinone. The enzyme is therefore called malate:quinone oxidoreductase, abbreviated to Mqo. Mqo is a peripheral membrane protein and can be released from the membrane by addition of chelators . . +PF06040 Adenovirus_E3;
Adenovirus E3 protein. Pfam-B_7475 (release 9.0). This family consists of several Adenovirus E3 proteins. The E3 protein does not seem to be essential for virus replication in cultured cells suggesting that the protein may function in virus-host interactions . . +PF06041 Bacterial protein of unknown function (DUF924)
Pfam-B_7600 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06042 Bacterial protein of unknown function (DUF925)
Pfam-B_7663 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function. This family was recently identified as belonging to the nucleotidyltransferase superfamily .. +PF06043 Reovirus P9-like family
Pfam-B_8265 (release 9.0). +PF06044 Dam-replacing family
Pfam-B_8314 (release 9.0). Dam-replacing protein (DRP) is an restriction endonuclease that is flanked by pseudo-transposable small repeat elements. The replacement of Dam-methylase by DRP allows phase variation through slippage-like mechanisms in several pathogenic isolates of Neisseria meningitidis .. +PF06045 Rhamnogalacturonate lyase family
Pfam-B_8355 (release 9.0). Rhamnogalacturonate lyase (EC:4.2.2.-) degrades the rhamnogalacturonan I (RG-I) backbone of pectin . This family contains mainly members from plants, but also contains the plant pathogen Erwinia chrysanthemi.. +PF06046 Exocyst complex component Sec6
Pfam-B_8361 (release 9.0). Sec6 is a component of the multiprotein exocyst complex. Sec6 interacts with Sec8, Sec10 and Exo70.These exocyst proteins localise to regions of active exocytosis-at the growing ends of interphase cells and in the medial region of cells undergoing cytokinesis-in an F-actin-dependent and exocytosis- independent manner .. +PF06047 DUF926;
Ras-induced vulval development antagonist. Pfam-B_8083 (release 9.0). This family is from synthetic multi-vulval genes which encode chromatin-associated proteins involved in transcriptional repression. This protein has a role in antagonising Ras-induced vulval development .. +PF06048 Domain of unknown function (DUF927)
Pfam-B_8364 (release 9.0). Family of bacterial proteins of unknown function. The C-terminal half of this family contains a P-loop motif.. +PF06049 Coagulation Factor V LSPD Repeat
These repeats are found in coagulation factor V (five). The name LSPD derives from the conserved residues in the middle of the repeat.They occur in the B domain, which is cleaved prior to activation of the protein. It has been suggested that domain B bring domains A and C together for activation ( ).. +PF06050 2-hydroxyglutaryl-CoA dehydratase, D-component
Pfam-B_8369 (release 9.0). Degradation of glutamate via the hydroxyglutarate pathway involves the syn-elimination of water from 2-hydroxyglutaryl-CoA. This anaerobic process is catalysed by 2-hydroxyglutaryl-CoA dehydratase, an enzyme with two components (A and D) that reversibly associate during reaction cycles. This component contains one non-reducible [4Fe-4S]2+ cluster and a reduced riboflavin 5'-monophosphate .. +PF06051 Domain of Unknown Function (DUF928)
Pfam-B_8442 (release 9.0). Family of uncharacterised bacterial protein.. +PF06052 3-hydroxyanthranilic acid dioxygenase
Pfam-B_8515 (release 9.0). In eukaryotes 3-hydroxyanthranilic acid dioxygenase (EC:1.13.11.6) is part of the kynurenine pathway for the degradation of tryptophan and the biosynthesis of nicotinic acid .The prokaryotic homolog is involved in the 2-nitrobenzoate degradation pathway .. +PF06053 Domain of unknown function (DUF929)
Pfam-B_8458 (release 9.0). Family of proteins from the archaeon Sulfolobus, with undetermined function.. +PF06054 Competence protein CoiA-like family
Pfam-B_8535 (release 9.0). Many of the members of this family are described as transcription factors. CoiA falls within a competence-specific operon in Streptococcus. CoiA is an uncharacterised protein.. +PF06055 Exopolysaccharide synthesis, ExoD
Pfam-B_8604 (release 9.0). Among the bacterial genes required for nodule invasion are the exo genes. These genes are involved in the production of an extracellular polysaccharide. Mutations in the exoD result in altered exopolysaccharide production and defects in nodule invasion .. +PF06056 Putative ATPase subunit of terminase (gpP-like)
Pfam-B_7152 (release 9.0). This family of proteins are annotated as ATPase subunits of phage terminase after . Terminases are viral proteins that are involved in packaging viral DNA into the capsid.. +PF06057 Bacterial virulence protein (VirJ)
Pfam-B_7524 (release 9.0). This family consists of several bacterial VirJ virulence proteins. VirJ is thought to be involved in the type IV secretion system. It is thought that the substrate proteins localised to the periplasm may associate with the pilus in a manner that is mediated by VirJ, and suggest a two-step process for type IV secretion in Agrobacterium .. +PF06058 Dcp1-like decapping family
Pfam-B_8271 (release 9.0). An essential step in mRNA turnover is decapping. In yeast, two proteins have been identified that are essential for decapping, Dcp1 (this family) and Dcp2 (Pfam:PF05026). The precise role of these proteins in the decapping reaction have not been established. Evidence suggests that the Dcp1 may enhance the function of Dcp2 .. +PF06059 Domain of Unknown Function (DUF930)
Pfam-B_8283 (release 9.0). Family of bacterial proteins with undetermined function. All bacteria in this family are from the Rhizobiales order. . +PF06060 Pre-pro-megakaryocyte potentiating factor precursor (Mesothelin)
Pfam-B_8552 (release 9.0). This family consists of several mammalian pre-pro-megakaryocyte potentiating factor precursor (MPF) or mesothelin proteins. Mesothelin is a glycosylphosphatidylinositol-linked glycoprotein highly expressed in mesothelial cells, mesotheliomas, and ovarian cancer, but the biological function of the protein is not known [1,2].. +PF06061 Baculoviridae ME53
Pfam-B_8086 (release 9.0). ME53 is one of the major early-transcribed genes. The ME53 protein is reported to contain a putative zinc finger motif .. +PF06062 Uncharacterised protein family (UPF0231)
Pfam-B_9027 (release 9.0). Family of uncharacterised Proteobacteria proteins.. +PF06064 Host-nuclease inhibitor protein Gam
Pfam-B_9063 (release 9.0). The Gam protein inhibits RecBCD nuclease and is found in both bacteria and bacteriophage .. +PF06066 SepZ
Pfam-B_9064 (release 9.0). SepZ is a component of the type III secretion system use in bacteria. SepZ is a gene within the enterocyte effacement locus. SepZ mutants exhibit reduced invasion efficiency and lack of tyrosine phosphorylation of Hp90 .. +PF06067 Domain of unknown function (DUF932)
Pfam-B_9083 (release 9.0) & Pfam-B_002957 (release 23.0). Family of prokaryotic proteins with unknown function. Contains a number of highly conserved polar residues that could suggest an enzymatic activity.. +PF06068 TIP49 C-terminus
Pfam-B_9170 (release 9.0). This family consists of the C-terminal region of several eukaryotic and archaeal RuvB-like 1 (Pontin or TIP49a) and RuvB-like 2 (Reptin or TIP49b) proteins. The N-terminal domain contains the Pfam:PF00004 domain. In zebrafish, the liebeskummer (lik) mutation, causes development of hyperplastic embryonic hearts. lik encodes Reptin, a component of a DNA-stimulated ATPase complex. Beta-catenin and Pontin, a DNA-stimulated ATPase that is often part of complexes with Reptin, are in the same genetic pathways. The Reptin/Pontin ratio serves to regulate heart growth during development, at least in part via the beta-catenin pathway . TBP-interacting protein 49 (TIP49) was originally identified as a TBP-binding protein, and two related proteins are encoded by individual genes, tip49a and b. Although the function of this gene family has not been elucidated, they are supposed to play a critical role in nuclear events because they interact with various kinds of nuclear factors and have DNA helicase activities.TIP49a has been suggested to act as an autoantigen in some patients with autoimmune diseases . . +PF06069 PerC transcriptional activator
Pfam-B_9117 (release 9.0). PerC is a transcriptional activator of EaeA/BfpA expression in enteropathogenic bacteria .. +PF06070 Herpesvirus large structural phosphoprotein UL32
Pfam-B_9125 (release 9.0). The large phosphorylated protein (UL32-like) of herpes viruses is the polypeptide most frequently reactive in immuno-blotting analyses with antisera when compared with other viral proteins .. +PF06071 DUF933;
Protein of unknown function (DUF933). Pfam-B_10000 (release 9.0). This domain is found at the C terminus of the YchF GTP-binding protein (Swiss:O13998) and is possibly related to the ubiquitin-like and MoaD/ThiS superfamilies. . +PF06072 Alphaherpesvirus tegument protein US9
Pfam-B_7164 (release 9.0). This family consists of several US9 and related proteins from the Alphaherpesviruses. The function of the US9 protein is unknown although in Bovine herpesvirus 5 Us9 is essential for the anterograde spread of the virus from the olfactory mucosa to the bulb .. +PF06073 Bacterial protein of unknown function (DUF934)
Pfam-B_8947 (release 9.0). This family consists of several bacterial proteins of unknown function. One of the members of this family Swiss:Q8YEW3 is thought to be an oxidoreductase.. +PF06074 Protein of unknown function (DUF935)
Pfam-B_10021 (release 9.0). This family consists of several bacterial proteins of unknown function as well as the Bacteriophage Mu gp29 protein Swiss:Q9T1W5.. +PF06075 Plant protein of unknown function (DUF936)
Pfam-B_10047 (release 9.0). This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown.. +PF06076 Orthopoxvirus F14 protein
Pfam-B_10072 (release 9.0). This family consists of several short Orthopoxvirus F14 proteins. The function of this protein is unknown.. +PF06078 Bacterial protein of unknown function (DUF937)
Pfam-B_7321 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06079 SHAPY;
Pfam-B_7593 (release 9.0). This family consists of several eukaryotic apyrase proteins (EC:3.6.1.5). The salivary apyrases of blood-feeding arthropods are nucleotide hydrolysing enzymes implicated in the inhibition of host platelet aggregation through the hydrolysis of extracellular adenosine diphosphate. . . +PF06080 Protein of unknown function (DUF938)
Pfam-B_8833 (release 9.0). This family consists of several hypothetical proteins from both prokaryotes and eukaryotes. The function of this family is unknown.. +PF06081 Bacterial protein of unknown function (DUF939)
Pfam-B_8873 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06082 Bacterial putative lipoprotein (DUF940)
Pfam-B_9128 (release 9.0). This family consists of hypothetical bacterial proteins several of which are described as putative lipoproteins.. +PF06083 Interleukin-17
Pfam-B_9152 (release 9.0). IL-17 is a potent proinflammatory cytokine produced by activated memory T cells . The IL-17 family is thought to represent a distinct signaling system that appears to have been highly conserved across vertebrate evolution .. +PF06084 Cytomegalovirus TRL10 protein
Pfam-B_8875 (release 9.0). This family consists of several Cytomegalovirus TRL10 proteins. TRL10 represents a structural component of the virus particle and like the other HCMV envelope glycoproteins, is present in a disulfide-linked complex . . +PF06085 Lipoprotein Rz1 precursor
Pfam-B_8925 (release 9.0). This family consists of several bacteria and phage lipoprotein Rz1 precursors. Rz1 is a proline-rich lipoprotein from bacteriophage lambda which is known to have fusogenic properties. Rz1-induced liposome fusion is thought to be mediated primarily by the generation of local perturbation in the bilayer lipid membrane and to a lesser extent by electrostatic forces . This family Rz1 and the Rz protein Rz (Pfam:PF03245) represent a unique example of two genes located in different reading frames in the same nucleotide sequence, which encode different proteins that are both required in the same physiological pathway .. +PF06086 Orthopoxvirus A26L/A30L protein
Pfam-B_8938 (release 9.0). This family consists of several Orthopoxvirus A26L and A30L proteins. The Vaccinia A30L gene is regulated by a late promoter and encodes a protein of approximately 9 kDa. It is thought that the A30L protein is needed for vaccinia virus morphogenesis, specifically the association of the dense viroplasm with viral membranes .. +PF06087 Tyrosyl-DNA phosphodiesterase
Pfam-B_8155 (release 9.0). Covalent intermediates between topoisomerase I and DNA can become dead-end complexes that lead to cell death. Tyrosyl-DNA phosphodiesterase can hydrolyse the bond between topoisomerase I and DNA .. +PF06088 Nucleopolyhedrovirus telokin-like protein-20 (TLP20)
Pfam-B_7657 (release 9.0). This family consists of several Nucleopolyhedrovirus telokin-like protein-20 (TLP20) sequences. The function of this family is unknown but TLP20 is known to shares some antigenic similarities to the smooth muscle protein telokin although the amino acid sequence shows no homologies to telokin .. +PF06089 L-asparaginase II
Pfam-B_7673 (release 9.0). This family consists of several bacterial L-asparaginase II proteins. L-asparaginase (EC:3.5.1.1) catalyses the hydrolysis of L-asparagine to L-aspartate and ammonium. Rhizobium etli possesses two asparaginases: asparaginase I, which is thermostable and constitutive, and asparaginase II, which is thermolabile, induced by asparagine and repressed by the carbon source .. +PF06090 DUF941;
Inositol-pentakisphosphate 2-kinase. Pfam-B_9098 (release 9.0). This is a family of inositol-pentakisphosphate 2-kinases (EC 2.7.1.158) (also known as inositol 1,3,4,5,6-pentakisphosphate 2-kinase, Ins(1,3,4,5,6)P5 2-kinase) and InsP5 2-kinase).\. This enzyme phosphorylates Ins(1,3,4,5,6)P5 to form Ins(1,2,3,4,5,6)P6 (also known as InsP6 or phytate). InsP6 is involved in many processes such as mRNA export, nonhomologous end-joining, endocytosis and ion channel regulation.. +PF06092 Enterobacterial putative membrane protein (DUF943)
Pfam-B_7843 (release 9.0). This family consists of several hypothetical putative membrane proteins from Escherichia coli, Yersinia pestis and Salmonella typhi.. +PF06093 Spt4/RpoE2 zinc finger
Pfam-B_7906 (release 9.0) & COG2093. This family consists of several eukaryotic transcription elongation Spt4 proteins as well as archaebacterial RpoE2 . Three transcription-elongation factors Spt4, Spt5, and Spt6 are conserved among eukaryotes and are essential for transcription via the modulation of chromatin structure. Spt4 and Spt5 are tightly associated in a complex, while the physical association of the Spt4-Spt5 complex with Spt6 is considerably weaker. It has been demonstrated that Spt4, Spt5, and Spt6 play roles in transcription elongation in both yeast and humans including a role in activation by Tat. It is known that Spt4, Spt5, and Spt6 are general transcription-elongation factors, controlling transcription both positively and negatively in important regulatory and developmental roles . RpoE2 is one of 13 subunits in the archaeal RNA polymerase. These proteins contain a C4-type zinc finger, and the structure has been solved in . The structure reveals that Spt4-Spt5 binding is governed by an acid-dipole interaction between Spt5 and Spt4, and the complex binds to and travels along the elongating RNA polymerase. The Spt4-Spt5 complex is likely to be an ancient, core component of the transcription elongation machinery.. +PF06094 AIG2-like family
Pfam-B_9771 (release 9.0). AIG2 is an Arabidopsis proteins that exhibit RPS2- and avrRpt2-dependent induction early after infection with Pseudomonas syringae pv maculicola strain ES4326 carrying avrRpt2 .. +PF06096 Baculo_8Kda;
Baculoviridae 8.2 KDa protein. Pfam-B_8370 (release 9.0). Family of proteins from various Baculoviruses with undetermined function.. +PF06097 Bacterial protein of unknown function (DUF945)
Pfam-B_9171 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06098 Radial spoke protein 3
Pfam-B_9453 (release 9.0). This family consists of several radial spoke protein 3 (RSP3) sequences. Eukaryotic cilia and flagella present in diverse types of cells perform motile, sensory, and developmental functions in organisms from protists to humans. They are centred by precisely organised, microtubule-based structures, the axonemes. The axoneme consists of two central singlet microtubules, called the central pair, and nine outer doublet microtubules. These structures are well-conserved during evolution. The outer doublet microtubules, each composed of A and B sub-fibres, are connected to each other by nexin links, while the central pair is held at the centre of the axoneme by radial spokes. The radial spokes are T-shaped structures extending from the A-tubule of each outer doublet microtubule to the centre of the axoneme. Radial spoke protein 3 (RSP3), is present at the proximal end of the spoke stalk and helps in anchoring the radial spoke to the outer doublet. It is thought that radial spokes regulate the activity of inner arm dynein through protein phosphorylation and dephosphorylation .. +PF06099 Phenol hydroxylase subunit
Pfam-B_10062 (release 9.0). This family consists of several bacterial phenol hydroxylase subunit proteins which are part of a multicomponent phenol hydroxylase. Some bacteria can utilise phenol or some of its methylated derivatives as their sole source of carbon and energy. The first step in this process is the conversion of phenol into catechol. Catechol is then further metabolised via the meta-cleavage pathway into TCA cycle intermediates . . +PF06100 Streptococcal 67 kDa myosin-cross-reactive antigen like family
Pfam-B_9995 (release 9.0). Members of this family are thought to have structural features in common with the beta chain of the class II antigens, as well as myosin, and may play an important role in the pathogenesis .. +PF06101 Plant protein of unknown function (DUF946)
Pfam-B_10066 (release 9.0). This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown.. +PF06102 Domain of unknown function (DUF947)
Pfam-B_9959 (release 9.0). Family of eukaryotic proteins with unknown function.. +PF06103 Bacterial protein of unknown function (DUF948)
Pfam-B_10104 (release 9.0). This family consists of bacterial sequences several of which are thought to be general stress proteins.. +PF06105 Aph-1 protein
Pfam-B_7927 (release 9.0). This family consists of several eukaryotic Aph-1 proteins.Gamma-secretase catalyses the intramembrane proteolysis of Notch, beta-amyloid precursor protein, and other substrates as part of a new signaling paradigm and as a key step in the pathogenesis of Alzheimer's disease. It is thought that the presenilin heterodimer comprises the catalytic site and that a highly glycosylated form of nicastrin associates with it. Aph-1 and Pen-2, two membrane proteins genetically linked to gamma-secretase, associate directly with presenilin and nicastrin in the active protease complex. Co-expression of all four proteins leads to marked increases in presenilin heterodimers, full glycosylation of nicastrin, and enhanced gamma-secretase activity .. +PF06106 Staphylococcus protein of unknown function (DUF950)
Pfam-B_8992 (release 9.0). This family consists of several hypothetical proteins from different Staphylococcus species. The function of this family is unknown.. +PF06107 Bacterial protein of unknown function (DUF951)
Moxon SJ, Eberhardt R. Pfam-B_8994 (release 9.0). This family consists of several short hypothetical bacterial proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids .. +PF06108 Protein of unknown function (DUF952)
Pfam-B_8995 (release 9.0). This family consists of several hypothetical bacterial and plant proteins of unknown function.. +PF06109 Haemolysin E (HlyE)
Pfam-B_9001 (release 9.0). This family consists of several enterobacterial haemolysin (HlyE) proteins.Hemolysin E (HlyE) is a novel pore-forming toxin of Escherichia coli, Salmonella typhi, and Shigella flexneri. HlyE is unrelated to the well characterised pore-forming E. coli hemolysins of the RTX family, haemolysin A (HlyA), and the enterohaemolysin encoded by the plasmid borne ehxA gene of E. coli 0157. However, it is evident that expression of HlyE in the absence of the RTX toxins is sufficient to give a hemolytic phenotype in E. coli. HlyE is a protein of 34 kDa that is expressed during anaerobic growth of E. coli. Anaerobic expression is controlled by the transcription factor, FNR, such that, upon ingestion and entry into the anaerobic mammalian intestine, HlyE is produced and may then contribute to the colonisation of the host .. +PF06110 Eukaryotic protein of unknown function (DUF953)
Pfam-B_9087 (release 9.0). This family consists of several hypothetical eukaryotic proteins of unknown function.. +PF06112 Gammaherpesvirus capsid protein
Pfam-B_9200 (release 9.0). This family consists of several Gammaherpesvirus capsid proteins. The exact function of this family is unknown.. +PF06113 Brain and reproductive organ-expressed protein (BRE)
Pfam-B_9280 (release 9.0). This family consists of several eukaryotic brain and reproductive organ-expressed (BRE) proteins. BRE is a putative stress-modulating gene, found able to down-regulate TNF-alpha-induced-NF-kappaB activation upon over expression. A total of six isoforms are produced by alternative splicing predominantly at either end of the gene.Compared to normal cells, immortalised human cell lines uniformly express higher levels of BRE. Peripheral blood monocytes respond to LPS by down-regulating the expression of all the BRE isoforms.It is thought that the function of BRE and its isoforms is to regulate peroxisomal activities .. +PF06114 Domain of unknown function (DUF955)
Family of bacterial and viral proteins with undetermined function. A conserved H-E-X-X-H motif is suggestive of a catalytic active site and shows similarity to Pfam:PF01435.. +PF06115 Domain of unknown function (DUF956)
Pfam-B_9146 (release 9.0). Family of bacterial sequences with undetermined function.. +PF06116 Transcriptional activator RinB
Pfam-B_9294 (release 9.0). This family consists of several Staphylococcus aureus bacteriophage RinB proteins and related sequences from their host. The int gene of staphylococcal bacteriophage phi 11 is the only viral gene responsible for the integrative recombination of phi 11. rinA and rinB, are both required to activate expression of the int gene .. +PF06117 Enterobacterial protein of unknown function (DUF957)
Pfam-B_9300 (release 9.0). This family consists of several hypothetical proteins from Escherichia coli, Salmonella typhi, Shigella flexneri and Proteus vulgaris. The function of this family is unknown.. +PF06119 DUF958;
Yeats C, Myerscough N. Pfam-B_1159 (release 8.0). This is a nidogen-like domain (NIDO) domain and is an extracellular domain found in nidogen and hypothetical proteins of unknown function .. +PF06120 Tail length tape measure protein
Pfam-B_10088 (release 9.0). This family consists of the tail length tape measure protein from bacteriophage HK97 and related sequences from Escherichia coli O157:H7.. +PF06121 Domain of Unknown Function (DUF959)
Pfam-B_25471 (release 8.0). This N-terminal domain is not expressed in the 'Short' isoform of Collagen A .. +PF06122 Conjugative relaxosome accessory transposon protein
Pfam-B_10166 (release 9.0). The TraH protein is thought to be a relaxosome accessory component, also necessary for transfer but not for H-pilus synthesis within the conjugative transposon .. +PF06123 Inner membrane protein CreD
Pfam-B_10187 (release 9.0). This family consists of several bacterial CreD or Cet inner membrane proteins. Dominant mutations of the cet gene of Escherichia coli result in tolerance to colicin E2 and increased amounts of an inner membrane protein with an Mr of 42,000. The cet gene is shown to be in the same operon as the phoM gene, which is required in a phoR background for expression of the structural gene for alkaline phosphatase, phoA. Although the Cet protein is not required for phoA expression, it has been suggested that the Cet protein has an enhancing effect on the transcription of phoA .. +PF06124 Staphylococcal protein of unknown function (DUF960)
Pfam-B_10198 (release 9.0). This family consists of several hypothetical proteins from several species of Staphylococcus. The function of this family is unknown.. +PF06125 Bacterial protein of unknown function (DUF961)
Pfam-B_10221 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06126 Herpesvirus Latent membrane protein 2
Pfam-B_9147 (release 9.0). Family of Kaposi's sarcoma-associated herpesvirus (HHV8) latent membrane protein.. +PF06127 Protein of unknown function (DUF962)
Pfam-B_10320 (release 9.0). This family consists of several eukaryotic and prokaryotic proteins of unknown function. The yeast protein Swiss:P25338 has been found to be non-essential for cell growth.. +PF06128 Shigella flexneri OspC protein
Pfam-B_10333 (release 9.0). This family consists of the Shigella flexneri specific protein OspC. The function of this family is unknown but it is thought that Osp proteins may be involved in post invasion events related to virulence. Since bacterial pathogens adapt to multiple environments during the course of infecting a host, it has been proposed that Shigella evolved a mechanism to take advantage of a unique intracellular cue, which is mediated through MxiE, to express proteins when the organism reaches the eukaryotic cytosol . . +PF06129 Chordopoxvirus G3 protein
Pfam-B_10417 (release 9.0). This family consists of several Chordopoxvirus specific G3 proteins. The function of this family is unknown.. +PF06130 Propanediol utilisation protein PduL
Pfam-B_10447 (release 9.0). +PF06131 Schizosaccharomyces pombe repeat of unknown function (DUF963)
Pfam-B_10581 (release 9.0). This family consists of a series of repeated sequences from one hypothetical protein (Swiss:Q96WV6) found in Schizosaccharomyces pombe. The function of this family is unknown.. +PF06133 Protein of unknown function (DUF964)
Pfam-B_10600 (release 9.0). This family consists of several relatively short bacterial and archaeal hypothetical sequences. The function of this family is unknown.. +PF06134 L-rhamnose isomerase (RhaA)
Pfam-B_10641 (release 9.0). This family consists of several bacterial L-rhamnose isomerase proteins (EC:5.3.1.14).. +PF06135 Bacterial protein of unknown function (DUF965)
Pfam-B_10661 (release 9.0). This family consists of several hypothetical bacterial proteins. The function of the family is unknown.. +PF06136 Domain of unknown function (DUF966)
Pfam-B_8637 (release 9.0). Family of plant proteins with unknown function.. +PF06138 Chordopoxvirus E11 protein
Pfam-B_10685 (release 9.0). This family consists of several Chordopoxvirus E11 proteins. The E11 gene of vaccinia virus encodes a 15-kDa polypeptide. Mutations in the E11 gene makes the virus temperature-sensitive due to either the fact that virus infectivity requires a threshold level of active E11 protein or that E11 function is conditionally essential . . +PF06139 BphX-like
Pfam-B_8664 (release 9.0). Family of bacterial proteins located in the phenyl dioxygenase (bph) operon. The function of this family is unknown. . +PF06140 Interferon-induced 6-16 family
Pfam-B_9299 (release 9.0). +PF06141 Phage minor tail protein U
Pfam-B_9209 (release 9.0). Tail fibre component U of bacteriophage.. +PF06143 Baculovirus 11 kDa family
Pfam-B_9424 (release 9.0). Family of uncharacterised Baculovirus proteins that are all about 11 kDa in size.. +PF06144 DNA polymerase III, delta subunit
Pfam-B_9452 (release 9.0). DNA polymerase III, delta subunit (EC 2.7.7.7) is required for, along with delta' subunit, the assembly of the processivity factor beta(2) onto primed DNA in the DNA polymerase III holoenzyme-catalysed reaction . The delta subunit is also known as HolA.. +PF06145 Coronavirus nonstructural protein NS1
Pfam-B_9242 (release 9.0). Bovine coronavirus NS1 encodes a 4.9 kDa protein .. +PF06146 Phosphate-starvation-inducible E
Pfam-B_8639 (release 9.0). Phosphate-starvation-inducible E (PsiE) expression is under direct positive and negative control by PhoB and cAMP-CRP, respectively . The function of PsiE remains to be determined.. +PF06147 Protein of unknown function (DUF968)
Pfam-B_9463 (release 9.0). Family of uncharacterised prophage proteins that are also found in bacteria and eukaryotes.. +PF06148 COG (conserved oligomeric Golgi) complex component, COG2
Pfam-B_9559 (release 9.0). The COG complex comprises eight proteins COG1-8. The COG complex plays critical roles in Golgi structure and function . The proposed function of the complex is to mediate the initial physical contact between transport vesicles and their membrane targets. A comparable role in tethering vesicles has been suggested for at least six additional large multisubunit complexes, including the exocyst, a complex that mediates trafficking to the plasma membrane. COG2 structure reveals a six-helix bundle with few conserved surface features but a general resemblance to recently determined crystal structures of four different exocyst subunits. These bundles inCOG2 may act as platforms for interaction with other trafficing proteins including SNAREs (soluble N-ethylmaleimide factor attachment protein receptors) and Rabs .. +PF06149 Protein of unknown function (DUF969)
Pfam-B_9723 (release 9.0). Family of uncharacterised bacterial membrane proteins.. +PF06150 ChaB
Pfam-B_7743 (release 9.0). This family of proteins contain a conserved 60 residue region. This protein is known as ChaB in E. coli and is found next to ChaA which is a cation transporter protein. ChaB may be regulate ChaA function in some way.. +PF06151 Trehalose receptor
Pfam-B_9846 (release 9.0). In Drosophila, taste is perceived by gustatory neurons located in sensilla distributed on several different appendages throughout the body of the animal. This family represents the taste receptor sensitive to trehalose [1,2].. +PF06152 Phage minor capsid protein 2
Pfam-B_9879 (release 9.0). Family of related phage minor capsid proteins.. +PF06153 Protein of unknown function (DUF970)
Pfam-B_9915 (release 9.0). Family of uncharacterised bacterial proteins.. +PF06154 YagB/YeeU/YfjZ family
Pfam-B_7771 (release 9.0). This family of proteins includes three proteins from E. coli YagB, YeeU and YfjZ. The function of these proteins is unknown. They are about 120 amino acids in length.. +PF06155 Protein of unknown function (DUF971)
Pfam-B_10230 (release 9.0). This family consists of several short bacterial proteins and one sequence (Swiss:Q8RZ62) from Oryza sativa. The function of this family is unknown. . +PF06156 Protein of unknown function (DUF972)
Pfam-B_10235 (release 9.0). This family consists of several hypothetical bacterial sequences. The function of this family is unknown.. +PF06157 Protein of unknown function (DUF973)
Pfam-B_7947 (release 9.0). This family consists of several hypothetical archaeal proteins of unknown function.. +PF06159 Protein of unknown function (DUF974)
Pfam-B_9042 (release 9.0). Family of uncharacterised eukaryotic proteins.. +PF06160 Septation ring formation regulator, EzrA
Pfam-B_9703 (release 9.0). During the bacterial cell cycle, the tubulin-like cell-division protein FtsZ polymerises into a ring structure that establishes the location of the nascent division site. EzrA modulates the frequency and position of FtsZ ring formation .. +PF06161 Protein of unknown function (DUF975)
Pfam-B_8494 (release 9.0). Family of uncharacterised bacterial proteins.. +PF06162 Caenorhabditis elegans protein of unknown function (DUF976)
Pfam-B_10032 (release 9.0). This family consists of several hypothetical Caenorhabditis elegans proteins of unknown function.. +PF06163 Bacterial protein of unknown function (DUF977)
Pfam-B_10135 (release 9.0). This family consists of several hypothetical bacterial proteins from Escherichia coli and Salmonella typhi. The function of this family is unknown.. +PF06165 Glycosyltransferase family 36
Pfam-B_9110 (release 9.0). The glycosyltransferase family 36 includes cellobiose phosphorylase (EC:2.4.1.20), cellodextrin phosphorylase (EC:2.4.1.49), chitobiose phosphorylase (EC:2.4.1.-). Many members of this family contain two copies of this domain.. +PF06166 Protein of unknown function (DUF979)
Pfam-B_10323 (release 9.0). This family consists of several putative bacterial membrane proteins. The function of this family is unclear.. +PF06167 DUF980; MtfA;
Glucose-regulated metallo-peptidase M90. Pfam-B_8651 (release 9.0). MtfA (earlier known as YeeI) is a transcription factor A that binds Mlc (make large colonies), itself a repressor of glucose and hence a protein important in regulation of the phosphoenolpyruvate:glucose-phosphotransferase (ptsG) system, the major glucose transporter in E.coli. Mlc is a repressor of ptsG, and MtfA is found to bind and inactivate Mlc with high affinity . The membrane-bound protein EIICBGlc encoded by the ptsG gene is the major glucose transporter in Escherichia coli. MtfA is found to be a glucose-regulated peptidase , whose activity is regulated by binding to Mlc available in the cytoplasm, which in turn has been released from EIICBGlc during times when no glucose is taken up. A physiologically relevant target for this peptidase is not yet known.. +PF06168 Protein of unknown function (DUF981)
Pfam-B_8691 (release 9.0). Family of uncharacterised proteins found in bacteria and archaea.. +PF06169 Protein of unknown function (DUF982)
Moxon SJ, Eberhardt R. Pfam-B_10431 (release 9.0). This family consists of several hypothetical proteins from Rhizobium meliloti, Rhizobium loti and Agrobacterium tumefaciens. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids .. +PF06170 Protein of unknown function (DUF983)
Pfam-B_10629 (release 9.0). This family consists of several bacterial proteins of unknown function.. +PF06172 DUF985;
Cupin superfamily (DUF985). Pfam-B_9217 (release 9.0). Family of uncharacterised proteins found in bacteria and eukaryotes that belongs to the Cupin superfamily.. +PF06173 Protein of unknown function (DUF986)
Pfam-B_10711 (release 9.0). This family consists of several bacterial putative membrane proteins of unknown function.. +PF06174 Protein of unknown function (DUF987)
Pfam-B_9329 (release 9.0). Family of bacterial proteins that are related to the hypothetical protein yeeT.. +PF06175 tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE)
Pfam-B_10761 (release 9.0). This family consists of several bacterial tRNA-(MS IO A)-hydroxylase (MiaE) proteins. The modified nucleoside 2-methylthio-N-6-isopentenyl adenosine (ms2i6A) is present at position 37 (3' of the anticodon) of tRNAs that read codons beginning with U except tRNA(I,V Ser) in Escherichia coli. Salmonella typhimurium 2-methylthio-cis-ribozeatin (ms2io6A) is found in tRNA, probably in the corresponding species that have ms2i6A in E. coli. The miaE gene is absent in E. coli, a finding consistent with the absence of the hydroxylated derivative of ms2i6A in this species .. +PF06176 Lipopolysaccharide core biosynthesis protein (WaaY)
Pfam-B_10767 (release 9.0). This family consists of several bacterial lipopolysaccharide core biosynthesis proteins (WaaY or RfaY). The waaY, waaQ, and waaP genes are located in the central operon of the waa (formerly rfa) locus on the chromosome of Escherichia coli. This locus contains genes whose products are involved in the assembly of the core region of the lipopolysaccharide molecule. WaaY is the enzyme that phosphorylates HepII in this system .. +PF06177 DUF988;
Pfam-B_10800 (release 9.0). This family includes the queT gene encoding a hypothetical integral membrane protein with 5 predicted transmembrane regions. The queT genes in Firmicutes are often preceded by the PreQ1 (7-aminomethyl-7-deazaguanine) riboswitches of two distinct classes [1-2], suggesting involvement of the QueT transporters in uptake of a queuosine biosynthetic intermediate.. +PF06178 Oligogalacturonate-specific porin protein (KdgM)
Pfam-B_10852 (release 9.0). This family consists of several bacterial proteins which are homologous to the oligogalacturonate-specific porin protein KdgM (Swiss:Q934G3) from Erwinia chrysanthemi. The phytopathogenic Gram-negative bacteria Erwinia chrysanthemi secretes pectinases, which are able to degrade the pectic polymers of plant cell walls, and uses the degradation products as a carbon source for growth. KdgM is a major outer membrane protein, whose synthesis is strongly induced in the presence of pectic derivatives. KdgM behaves like a voltage-dependent porin that is slightly selective for anions and that exhibits fast block in the presence of trigalacturonate. In contrast to most porins, KdgM seems to be monomeric . . +PF06179 SURF5;
Surfeit locus protein 5 subunit 22 of Mediator complex. Pfam-B_10889 (release 9.0). This family consists of several eukaryotic Surfeit locus protein 5 (SURF5) sequences. The human Surfeit locus has been mapped on chromosome 9q34.1. The locus includes six tightly clustered housekeeping genes (Surf1-6), and the gene organisation is similar in human, mouse and chicken Surfeit locus. The Med22 subunit of Mediator complex is part of the essential core head region .. +PF06180 Cobalt chelatase (CbiK)
Pfam-B_10975 (release 9.0). This family consists of several bacterial cobalt chelatase (CbiK) proteins (EC:4.99.1.-).. +PF06181 Protein of unknown function (DUF989)
Pfam-B_11062 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function. The haem-binding domain towards the C-terminus has been merged into Cytochrome_C, Pfam:PF00034.. +PF06182 DUF990;
ABC-2 family transporter protein. Moxon SJ, Eberhardt R. Pfam-B_11079 (release 9.0). This family acts as the transmembrane domain (TMD) of ABC transporters [1,2]. The family includes proteins responsible for the transport of herbicides .. +PF06183 DinI-like family
Pfam-B_3085 (release 9.0). This family of short proteins includes DNA-damage-inducible protein I (DinI) and related proteins. The SOS response, a set of cellular phenomena exhibited by eubacteria, is initiated by various causes that include DNA damage-induced replication arrest, and is positively regulated by the co- protease activity of RecA. Escherichia coli DinI, a LexA-regulated SOS gene product, shuts off the initiation of the SOS response when overexpressed in vivo. Biochemical and genetic studies indicated that DinI physically interacts with RecA to inhibit its co-protease activity . The structure of DinI is known .. +PF06184 Potexvirus coat protein
Pfam-B_11093 (release 9.0). This family consists of several Potexvirus coat proteins.. +PF06185 DUF991;
Pfam-B_11108 (release 9.0). This family consists of several bacterial YecM proteins of unknown function.. +PF06186 Protein of unknown function (DUF992)
Pfam-B_11128 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06187 Protein of unknown function (DUF993)
Pfam-B_11260 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06188 HrpE/YscL/FliH and V-type ATPase subunit E
Pfam-B_11055 (release 9.0). This is a prokaryotic family that contains proteins of the FliH and HrpE/YscL family.\. These proteins are involved in type III secretion, which is the process that drives flagellar biosynthesis and mediates bacterial-eukaryotic interactions [1-2]. This family also V-type ATPase subunit E. This subunit appears to form a tight interaction with subunit G in the F0 complex . Subunits E and G may act together as stators to prevent certain subunits from rotating with the central rotary element . Pfam:PF01991 also contains V-type ATPase subunit E proteins.. +PF06189 5'-nucleotidase
Pfam-B_10008 (release 9.0). This family consists of both eukaryotic and prokaryotic 5'-nucleotidase sequences (EC:3.1.3.5).. +PF06191 Protein of unknown function (DUF995)
Pfam-B_11307 (release 9.0). Family of uncharacterised Proteobacteria proteins.. +PF06193 Orthopoxvirus A5L protein-like
Pfam-B_10342 (release 9.0). This family includes several Orthopoxvirus A5L proteins. The vaccinia virus WR A5L open reading frame (corresponding to open reading frame A4L in vaccinia virus Copenhagen) encodes an immunodominant late protein found in the core of the vaccinia virion. The A5 protein appears to be required for the immature virion to form the brick-shaped intracellular mature virion .. +PF06194 Phage Conserved Open Reading Frame 51
Pfam-B_11352 (release 9.0). Family of conserved bacteriophage open reading frames.. +PF06195 Protein of unknown function (DUF996)
Pfam-B_11375 (release 9.0). Family of uncharacterised bacterial and archaeal proteins.. +PF06196 Protein of unknown function (DUF997)
Pfam-B_11382 (release 9.0). Family of predicted bacterial membrane protein with unknown function.. +PF06197 Protein of unknown function (DUF998)
Pfam-B_11425 (release 9.0). Family of conserved archaeal proteins.. +PF06198 Protein of unknown function (DUF999)
Pfam-B_11426 (release 9.0). Family of conserved Schizosaccharomyces pombe proteins with unknown function.. +PF06199 Phage major tail protein 2
Pfam-B_11427 (release 9.0). Characterised members are major tail proteins from various phage, including lactococcal temperate bacteriophage TP901-1.. +PF06200 Zim;
Pfam-B_3326 (release 9.0). This short possible domain is found in a variety of plant transcription factors that contain GATA domains as well as other motifs. Although previously known as the Zim domain this is now called the tify domain after its most conserved amino acids. TIFY proteins can be further classified into two groups depending on the presence (group I) or absence (group II) of a C2C2-GATA domain. Functional annotation of these proteins is still poor, but several screens revealed a link between TIFY proteins of group II and jasmonic acid-related stress response.. +PF06201 DUF1000; Thioredox_dimer;
This family was formerly known as DUF1000. The full-length, Txnl1, protein which is a probable component of the 26S proteasome, uses its C-terminal, PITH, domain to associate specifically with the 26S proteasome. PITH derives from proteasome-interacting thioredoxin domain.. +PF06202 Amylo-alpha-1,6-glucosidase
Pfam-B_3607 (release 9.0). This family includes human glycogen branching enzyme Swiss:P35573. This enzyme contains a number of distinct catalytic activities. It has been shown for the yeast homologue Swiss:O93808 that mutations in this region disrupt the enzymes Amylo-alpha-1,6-glucosidase (EC:3.2.1.33).. +PF06203 CCT motif
Pfam-B_314 (release 9.0). This short motif is found in a number of plant proteins. It is rich in basic amino acids and has been called a CCT motif after Co, Col and Toc1 . The CCT motif is about 45 amino acids long and contains a putative nuclear localisation signal within the second half of the CCT motif . Toc1 mutants have been identified in this region.. +PF06204 Putative carbohydrate binding domain
Pfam-B_9110 (release 9.0). +PF06205 Glycosyltransferase 36 associated family
Pfam-B_9110 (release 9.0). +PF06206 DUF1001;
CpeT/CpcT family (DUF1001). Pfam-B_11004 (release 9.0). This family consists of proteins of proteins belonging to the CpeT/CpcT family. These proteins are around 200 amino acids in length. The proteins contain a conserved motif PYR in the amino terminal half of the protein that may be functionally important. The species distribution of the family is interesting. So far it is restricted to cyanobacteria, cryptomonads and plants. It has been shown that CpcT encodes a bilin lyase responsible for attachment of phycocyanobilin to the beta subunit of phycocyanin .. +PF06207 Protein of unknown function (DUF1002)
Pfam-B_10868 (release 9.0). This protein family has no known function. Its members are about 300 amino acids in length. It has so far been detected in Firmicute bacteria and some archaebacteria.. +PF06208 Borna disease virus G protein
Pfam-B_10516 (release 9.0). This family consists of Borna disease virus G glycoprotein sequences. Borna disease virus (BDV) infection produces a variety of clinical diseases, from behavioural illnesses to classical fatal encephalitis . G protein is important for viral entry into the host cell [2,3].. +PF06209 Cofactor of BRCA1 (COBRA1)
Pfam-B_11228 (release 9.0). This family consists of several cofactor of BRCA1 (COBRA1) like proteins. It is thought that COBRA1 along with BRCA1 is involved in chromatin unfolding. COBRA1 is recruited to the chromosome site by the first BRCT repeat of BRCA1, and is itself sufficient to induce chromatin unfolding. BRCA1 mutations that enhance chromatin unfolding also increase its affinity for, and recruitment of, COBRA1. It is thought that that reorganisation of higher levels of chromatin structure is an important regulated step in BRCA1-mediated nuclear functions .. +PF06210 Protein of unknown function (DUF1003)
Pfam-B_10814 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06211 BMP and activin membrane-bound inhibitor (BAMBI) N-terminal domain
Pfam-B_11792 (release 9.0). This family consists of several eukaryotic BMP and activin membrane-bound inhibitor (BAMBI) proteins. Members of the transforming growth factor-beta (TGF-beta) superfamily, including TGF-beta, bone morphogenetic proteins (BMPs), activins and nodals, are vital for regulating growth and differentiation. BAMBI is related to TGF-beta-family type I receptors but lacks an intracellular kinase domain. BAMBI is co-expressed with the ventralising morphogen BMP4 during Xenopus embryogenesis and requires BMP signalling for its expression. The protein stably associates with TGF-beta-family receptors and inhibits BMP and activin as well as TGF-beta signalling .. +PF06212 GRIM-19 protein
Pfam-B_10760 (release 9.0). This family consists of several eukaryotic gene associated with retinoic-interferon-induced mortality 19 (GRIM-19) proteins. GRIM-19, was reported to encode a small protein primarily distributed in the nucleus and was able to promote cell death induced by IFN-ß and RA. A bovine homologue of GRIM-19 was co-purified with mitochondrial NADH:ubiquinone oxidoreductase (complex I) in bovine heart. Therefore, its exact cellular localisation and function are unclear. It has now been discovered that GRIM-19 is a specific interacting protein which negatively regulates Stat3 activity . . +PF06213 Cobalamin biosynthesis protein CobT
Pfam-B_10956 (release 9.0). This family consists of several bacterial cobalamin biosynthesis (CobT) proteins. CobT is involved in the transformation of precorrin-3 into cobyrinic acid . . +PF06214 Signaling lymphocytic activation molecule (SLAM) protein
Pfam-B_11112 (release 9.0). This family consists of several mammalian signaling lymphocytic activation molecule (SLAM) proteins. Optimal T cell activation and expansion require engagement of the TCR plus co-stimulatory signals delivered through accessory molecules. SLAM, a 70-kDa co-stimulatory molecule belonging to the Ig superfamily, is defined as a human cell surface molecule that mediates CD28-independent proliferation of human T cells and IFN-gamma production by human Th1 and Th2 clones . SLAM has also been recognised as a receptor for measles virus .. +PF06215 Infectious salmon anaemia virus haemagglutinin
Pfam-B_11275 (release 9.0). This family consists of several infectious salmon anaemia virus haemagglutinin proteins. Infectious salmon anaemia virus (ISAV), an orthomyxovirus-like virus, is an important fish pathogen in marine aquaculture . . +PF06216 Rice tungro bacilliform virus P46 protein
Pfam-B_11281 (release 9.0). This family consists of several Rice tungro bacilliform virus P46 proteins. The function of this family is unknown.. +PF06217 DUF1004;
GAGA binding protein-like family. Pfam-B_10604 (release 9.0). This family includes gbp a protein from Soybean that binds to GAGA element dinucleotide repeat DNA . It seems likely that the this domain mediates DNA binding. This putative domain contains several conserved cysteines and a histidine suggesting this may be a zinc-binding DNA interaction domain.. +PF06218 Nitrogen permease regulator 2
Pfam-B_11335 (release 9.0). This family of regulators are involved in post-translational control of nitrogen permease.. +PF06219 Protein of unknown function (DUF1005)
Pfam-B_11366 (release 9.0). Family of plant proteins with undetermined function.. +PF06220 zf-U1; U1_C;
Pfam-B_10606 (release 9.0). This family consists of several U1 small nuclear ribonucleoprotein C (U1-C) proteins. The U1 small nuclear ribonucleoprotein (U1 snRNP) binds to the pre-mRNA 5' splice site (ss) at early stages of spliceosome assembly. Recruitment of U1 to a class of weak 5' ss is promoted by binding of the protein TIA-1 to uridine-rich sequences immediately downstream from the 5' ss. Binding of TIA-1 in the vicinity of a 5' ss helps to stabilise U1 snRNP recruitment, at least in part, via a direct interaction with U1-C, thus providing one molecular mechanism for the function of this splicing regulator . This domain is probably a zinc-binding. It is found in multiple copies in some members of the family.. +PF06221 Zf-C2HC5;
Putative zinc finger motif, C2HC5-type. Pfam-B_11300 (release 9.0). This zinc finger appears to be common in activating signal cointegrator 1/thyroid receptor interacting protein 4.. +PF06222 Phage tail assembly chaperone
Pfam-B_11379 (release 9.0). +PF06223 Minor tail protein T
Pfam-B_8084 (release 9.0). Minor tail protein T is located at the distal end and is involved in the assembly of the initiator complex for tail polymerisation .. +PF06224 DUF1006;
Winged helix DNA-binding domain. Pfam-B_8424 (release 9.0). This family contains two copies of a winged helix domain.. +PF06226 Protein of unknown function (DUF1007)
Pfam-B_8585 (release 9.0). Family of conserved bacterial proteins with unknown function.. +PF06227 Orthopox_N1;
Pfam-B_10619 (release 9.0). This is a family of dsDNA viruses, with no RNA stage, Poxvirus proteins.. +PF06228 DUF1008;
Haem utilisation ChuX/HutX. Pfam-B_10620 (release 9.0). This family is found within haem utilisation operons. It has a similar structure to that of Pfam:PF05171. Pfam:PF05171 usually occurs as a duplicated domain, but this domain occurs as a single domain and forms a dimer. The organisation of the dimer is very similar to that of the duplicated Pfam:PF05171 domains [1,2]. It binds haem via conserved histidines .. +PF06229 FRG1-like family
Pfam-B_8590 (release 9.0). The human FRG1 gene maps to human chromosome 4q35 and has been identified as a candidate for facioscapulohumeral muscular dystrophy. Currently, the function of FRG1 is unknown .. +PF06230 Protein of unknown function (DUF1009)
Pfam-B_8705 (release 9.0). Family of uncharacterised bacterial proteins.. +PF06231 Protein of unknown function (DUF1010)
Pfam-B_11467 (release 9.0). Family of plasmid encoded proteins with unknown function.. +PF06232 Embryo-specific protein 3, (ATS3)
Pfam-B_11504 (release 9.0). Family of plant seed-specific proteins.. +PF06233 Usg-like family
Pfam-B_11528 (release 9.0). Family of bacterial proteins, referred to as Usg. Usg is found in the same operon as trpF, trpB, and trpA and is expressed in a coupled transcription-translation system .. +PF06234 Toluene-4-monooxygenase system protein B (TmoB)
Moxon SJ, Iyer LM, Burroughs AM, Aravind L. Pfam-B_10626 (release 9.0). This family consists of several Toluene-4-monooxygenase system protein B (TmoB) sequences. Pseudomonas mendocina KR1 metabolises toluene as a carbon source. The initial step of the pathway is hydroxylation of toluene to form p-cresol by a multicomponent toluene-4-monooxygenase (T4MO) system . TmoB adopts a ubiquitin fold . Although TmoB is a component of the T4MO system, its precise role remains unclear.. +PF06235 NADH dehydrogenase subunit 4L (NAD4L)
Pfam-B_10667 (release 9.0). This family consists of NADH dehydrogenase subunit 4L (NAD4L) proteins from the mitochondria of several parasitic flatworms.. +PF06236 Tyrosinase co-factor MelC1
Pfam-B_10673 (release 9.0). This family consists of several tyrosinase co-factor MELC1 proteins from a number of Streptomyces species. The melanin operon (melC) of Streptomyces antibioticus contains two genes, melC1 and melC2 (apotyrosinase). It is thought that MelC1 forms a transient binary complex with the downstream apotyrosinase MelC2 to facilitate the incorporation of copper ion and the secretion of tyrosinase indicating that MelC1 is a chaperone for the apotyrosinase MelC2 .. +PF06237 Protein of unknown function (DUF1011)
Pfam-B_11463 (release 9.0). Family of uncharacterised eukaryotic proteins.. +PF06238 Borrelia_lipopr;
Borrelia burgdorferi BBR25 lipoprotein. Pfam-B_15000 (release 9.0). This family consists of a number of lipoproteins from the Lyme disease spirochete Borrelia burgdorferi .. +PF06239 Evolutionarily conserved signalling intermediate in Toll pathway
Pfam-B_9306 (release 9.0). Activation of NF-kappaB as a consequence of signaling through the Toll and IL-1 receptors is a major element of innate immune responses. ECSIT plays an important role in signalling to NF-kappaB, functioning as the intermediate in the signaling pathways between TRAF-6 and MEKK-1 .. +PF06240 CoxG;
Carbon monoxide dehydrogenase subunit G (CoxG). Pfam-B_9339 (release 9.0). The CO dehydrogenase structural genes coxMSL are flanked by nine accessory genes arranged as the cox gene cluster. The cox genes are specifically and coordinately transcribed under chemolithoautotrophic conditions in the presence of CO as carbon and energy source .. +PF06241 Protein of unknown function (DUF1012)
Pfam-B_9320 (release 9.0). Family of uncharacterised proteins found in both eukaryotes and bacteria.. +PF06242 Protein of unknown function (DUF1013)
Pfam-B_9390 (release 9.0). Family of uncharacterised proteins found in Proteobacteria.. +PF06243 Phenylacetic acid degradation B
Pfam-B_9426 (release 9.0). Phenylacetic acid degradation protein B (PaaB) is thought to be part of a multicomponent oxygenase involved in phenylacetyl-CoA hydroxylation .. +PF06244 Protein of unknown function (DUF1014)
Moxon SJ, Coggill PC. Pfam-B_11009 (release 9.0). This family consists of several hypothetical eukaryotic proteins of unknown function.. +PF06245 Protein of unknown function (DUF1015)
Pfam-B_9451 (release 9.0). Family of proteins with unknown function found in archaea and bacteria.. +PF06246 Isy1-like splicing family
Pfam-B_9462 (release 9.0). Isy1 protein is important in the optimisation of splicing .. +PF06247 Plasmodium ookinete surface protein Pvs28
Pfam-B_11346 (release 9.0). This family consists of several ookinete surface protein (Pvs28) from several species of Plasmodium. Pvs25 and Pvs28 are expressed on the surface of ookinetes. These proteins are potential candidates for vaccine and induce antibodies that block the infectivity of Plasmodium vivax in immunised animals .. +PF06248 Centromere/kinetochore Zw10
Pfam-B_9476 (release 9.0). Zw10 and rough deal proteins are both required for correct metaphase check-pointing during mitosis [1,2]. These proteins bind to the centromere/kinetochore .. +PF06249 Ethanolamine utilisation protein EutQ
Pfam-B_11530 (release 9.0). The eut operon of Salmonella typhimurium encodes proteins involved in the cobalamin-dependent degradation of ethanolamine. The role of EutQ in this process is unclear .. +PF06250 Protein of unknown function (DUF1016)
Pfam-B_9571 (release 9.0). Family of uncharacterised proteins found in viruses, archaea and bacteria.. +PF06251 DUF1017;
Capsule biosynthesis GfcC. Pfam-B_9574 (release 9.0). Many bacteria are covered in a layer of surface-associated polysaccharide called the capsule. These capsules can be divided into four groups depending upon the organisation of genes responsible for capsule assembly, the assembly pathway and regulation . This family plays a role in group 4 capsule biosynthesis . These proteins have a beta-grasp fold . Two beta-grasp domains, D2 and D3, are arranged in tandem. There is a C-terminal amphipathic helix which packs against D3. A helical hairpin insert in D2 binds to D3 and constrains its position, a conserved arginine residue at the end of this hairpin is essential for structural integrity .. +PF06252 Protein of unknown function (DUF1018)
Pfam-B_11125 (release 9.0). This family consists of several bacterial and phage proteins of unknown function.. +PF06253 Trimethylamine methyltransferase (MTTB)
Pfam-B_11132 (release 9.0). This family consists of several trimethylamine methyltransferase (MTTB) (EC:2.1.1.-) proteins from numerous Rhizobium and Methanosarcina species.. +PF06254 Protein of unknown function (DUF1019)
Pfam-B_9681 (release 9.0). Family of uncharacterised proteins found in Proteobacteria.. +PF06255 Protein of unknown function (DUF1020)
Pfam-B_11136 (release 9.0). This family consists of several MafB proteins from Neisseria meningitidis and Neisseria gonorrhoeae. The function of this family is unknown.. +PF06256 Nucleopolyhedrovirus LEF-12 protein
Pfam-B_11198 (release 9.0). This family consists of several Nucleopolyhedrovirus late expression factor-12 (LEF-12) proteins. The function of this family is unknown [1,2].. +PF06257 Protein of unknown function (DUF1021)
Pfam-B_11556 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06258 DUF1022;
Mitochondrial fission ELM1. Moxon SJ, Eberhardt R. Pfam-B_10595 (release 9.0). In plants, this family is involved in mitochondrial fission. It binds to dynamin-related proteins and plays a role in their relocation from the cytosol to mitochondrial fission sites . Its function in bacteria is unknown.. +PF06259 DUF1023;
Alpha/beta hydrolase. Pfam-B_9720 (release 9.0). Members of this family are predicted to have an alpha/beta hydrolase fold. They contain a predicted Ser-His-Asp catalytic triad, in which the serine is likely to act as a nucleophile .. +PF06260 Protein of unknown function (DUF1024)
Pfam-B_11584 (release 9.0). This family consists of several hypothetical Staphylococcus aureus and Staphylococcus aureus phage phi proteins. The function of this family is unknown.. +PF06261 Actinobacillus actinomycetemcomitans leukotoxin activator LktC
Pfam-B_11552 (release 9.0). This family consists of several Actinobacillus actinomycetemcomitans leukotoxin activator (LktC) proteins. Actinobacillus actinomycetemcomitans is a Gram-negative bacterium that has been implicated in the etiology of several forms of periodontitis, especially localised juvenile periodontitis. LktC along with LktB and LktD are thought to be required for activation and localisation of the leukotoxin . . +PF06262 Possibl zinc metallo-peptidase
Pfam-B_9726 (release 9.0). This is possibly a family of bacterial zinc metallo-peptidases. Although they carry the HExxHxxGxxD motif, they are missing a final methionine which would class them as Met-zincins.. +PF06265 Protein of unknown function (DUF1027)
Pfam-B_11526 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06266 HrpF protein
Pfam-B_11646 (release 9.0). The species Pseudomonas syringae encompasses plant pathogens with differing host specificities and corresponding pathovar designations. P. syringae requires the Hrp (type III protein secretion) system, encoded by a 25-kb cluster of hrp and hrc genes, in order to elicit the hypersensitive response (HR) in nonhosts or to be pathogenic in hosts. The exact function of HrpF is unknown but the protein is needed for pathogenicity .. +PF06267 Family of unknown function (DUF1028)
Pfam-B_9747 (release 9.0). Family of bacterial and archaeal proteins with unknown function. Some members are associated with a C-terminal peptidoglycan binding domain. So perhaps this could be an enzyme involved in peptidoglycan metabolism.. +PF06268 Fascin domain
Pfam-B_11660 (release 9.0). This family consists of several eukaryotic fascin or singed proteins. The fascins are a structurally unique and evolutionarily conserved group of actin cross-linking proteins. Fascins function in the organisation of two major forms of actin-based structures: dynamic, cortical cell protrusions and cytoplasmic microfilament bundles. The cortical structures, which include filopodia, spikes, lamellipodial ribs, oocyte microvilli and the dendrites of dendritic cells, have roles in cell-matrix adhesion, cell interactions and cell migration, whereas the cytoplasmic actin bundles appear to participate in cell architecture .\. Dictyostelium hisactophilin, another actin-binding protein, is a submembranous pH sensor that signals slight changes of the H+ concentration to actin by inducing actin polymerisation and binding to microfilaments only at pH values below seven . Members of this family are histidine rich, typically contain the repeated motif of HHXH .. +PF06269 Protein of unknown function (DUF1029)
Pfam-B_11672 (release 9.0). This family consists of several short Chordopoxvirus proteins of unknown function.. +PF06270 Protein of unknown function (DUF1030)
Pfam-B_11673 (release 9.0). This family consists of several short Circovirus proteins of unknown function.. +PF06271 RDD family
Pfam-B_1111 (release 9.0). +PF06273 Plant_eIF4B;
Plant specific eukaryotic initiation factor 4B. Pfam-B_11679 (release 9.0). This family consists of several plant specific eukaryotic initiation factor 4B proteins.. +PF06275 Protein of unknown function (DUF1031)
Pfam-B_11618 (release 9.0). This family consists of several Lactococcus lactis bacteriophage and Lactococcus lactis proteins of unknown function.. +PF06276 Ferric iron reductase FhuF-like transporter
Pfam-B_11690 (release 9.0). This family consists of several bacterial ferric iron reductase protein (FhuF) sequences.\. FhuF is involved in the reduction of ferric iron in cytoplasmic ferrioxamine B . This family also includes the IucA and IucC proteins.. +PF06277 Ethanolamine utilisation protein EutA
Pfam-B_11716 (release 9.0). This family consists of several bacterial EutA ethanolamine utilisation proteins. The EutA protein is thought to protect the lyase (EutBC) from inhibition by CNB12 .. +PF06278 Protein of unknown function (DUF1032)
Pfam-B_11729 (release 9.0). This family consists of several conserved eukaryotic proteins of unknown function.. +PF06279 Protein of unknown function (DUF1033)
Pfam-B_11732 (release 9.0). This family consists of several hypothetical bacterial proteins. Many of the sequences in this family are annotated as putative DNA binding proteins but the function of this family is unknown.. +PF06280 Fn3-like domain (DUF1034)
This family consists of several domains of unknown function which are present in several bacterial and plant peptidases. This domain is found in conjunction with Pfam:PF00082, Pfam:PF02225 and is often found with Pfam:PF00746. This domain has a structure similar to an Fn3 domain .. +PF06281 Protein of unknown function (DUF1035)
Pfam-B_11733 (release 9.0). This family consists of several Sulfolobus and Sulfolobus virus proteins of unknown function.. +PF06282 Protein of unknown function (DUF1036)
Pfam-B_11760 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06283 DUF1037;
Trehalose utilisation. Pfam-B_11803 (release 9.0). This family consists of several bacterial ThuA like proteins. ThuA appears to be involved in utilisation of trehalose . The thuA and thuB genes form part of the trehalose/sucrose transport operon thuEFGKAB, which is located on the pSymB megaplasmid. The thuA and thuB genes are induced in vitro by trehalose but not by sucrose and the extent of its induction depends on the concentration of trehalose available in the medium .. +PF06284 Cytomegalo_UL84;
Cytomegalovirus UL84 protein. Pfam-B_11948 (release 9.0). This family consists of several Cytomegalovirus UL84 proteins. The open reading frame UL84 of human cytomegalovirus encodes a multifunctional regulatory protein which is required for viral DNA replication and binds with high affinity to the immediate-early transactivator IE2-p86 .. +PF06286 Coleoptericin
Pfam-B_11996 (release 9.0). This family consists of several insect Coleoptericin, Acaloleptin, Holotricin and Rhinocerosin proteins which are all known to be antibacterial proteins .. +PF06287 Protein of unknown function (DUF1039)
Pfam-B_12079 (release 9.0). This family consists of several hypothetical bacterial proteins from Escherichia coli and Citrobacter rodentium. The function of this family is unknown.. +PF06288 Protein of unknown function (DUF1040)
Pfam-B_12122 (release 9.0). This family consists of several bacterial YihD proteins of unknown function . . +PF06289 Flagellar protein (FlbD)
Pfam-B_12586 (release 9.0). This family consists of several bacterial FlbD flagellar proteins. The exact function of this family is unknown .. +PF06290 Plasmid SOS inhibition protein (PsiB)
Pfam-B_12789 (release 9.0). This family consists of several plasmid SOS inhibition protein (PsiB) sequences .. +PF06291 Bor protein
Pfam-B_12850 (release 9.0). This family consists of several Bacteriophage lambda Bor and Escherichia coli Iss proteins. Expression of bor significantly increases the survival of the Escherichia coli host cell in animal serum. This property is a well known bacterial virulence determinant indeed, bor and its adjacent sequences are highly homologous to the iss serum resistance locus of the plasmid ColV2-K94, which confers virulence in animals. It has been suggested that lysogeny may generally have a role in bacterial survival in animal hosts, and perhaps in pathogenesis .. +PF06292 Domain of Unknown Function (DUF1041)
This family consists of several eukaryotic domains of unknown function. Members of this family are often found in tandem repeats and co-occur with Pfam:PF00168, Pfam:PF00130 and Pfam:PF00169 domains.. +PF06293 Lipopolysaccharide kinase (Kdo/WaaP) family
Krupa A, Srinivasan N. These lipopolysaccharide kinases are related to protein kinases Pfam:PF00069. This family includes waaP (rfaP) gene product is required for the addition of phosphate to O-4 of the first heptose residue of the lipopolysaccharide (LPS) inner core region. It has previously been shown that WaaP is necessary for resistance to hydrophobic and polycationic antimicrobials in E. coli and that it is required for virulence in invasive strains of S. enterica .. +PF06294 Domain of Unknown Function (DUF1042)
Spef is a region of sperm flagellar proteins. It probably exerts a role in spermatogenesis in that the protein is expressed predominantly in adult tissue. It is present in the tails of developing and epididymal sperm internal to the fibrous sheath and around the dense outer fibres of the sperm flagellum . The amino-terminal domain (residues 1-110) shows a possible calponin homology (CH) domain; however Spef does not bind actin directly under in vitro conditions, so the function of the amino-terminal calponin-like domain is unclear . Transcription aberrations leading to a truncated protein result in immotile sperm .. +PF06295 Protein of unknown function (DUF1043)
Pfam-B_12007 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06296 Protein of unknown function (DUF1044)
Pfam-B_12045 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06297 PET Domain
This domain is suggested to be involved in protein-protein interactions . The family is found in conjunction with Pfam:PF00412.. +PF06298 Photosystem II protein Y (PsbY)
Pfam-B_12212 (release 9.0). This family consists of several bacterial and plant photosystem II protein Y (PsbY) sequences. PsbY is a manganese-binding protein that has an L-arginine metabolising enzyme activity .. +PF06299 Protein of unknown function (DUF1045)
Pfam-B_12127 (release 9.0). This family consists of several hypothetical proteins from Agrobacterium, Rhizobium and Brucella species. The function of this family is unknown.. +PF06300 Tsp45I type II restriction enzyme
Pfam-B_12151 (release 9.0). This family consists of several type II restriction enzymes.. +PF06301 Bacteriophage lambda Kil protein
Pfam-B_12201 (release 9.0). This family consists of several Bacteriophage lambda Kil protein like sequences from both phages and bacteria. Induction of a lambda prophage causes the death of the host cell even in the absence of phage replication and lytic functions due to expression of the lambda kil gene .. +PF06303 DUF1047;
Organiser of macrodomain of Terminus of chromosome. Pfam-B_12243 (release 9.0). This family, many of whose members are YcbG, organises the macrodomain Ter of the chromosome of bacteria such as E coli. In these bacteria, insulated macrodomains influence the segregation of sister chromatids and the mobility of chromosomal DNA. Organisation of the Terminus region (Ter) into a macrodomain relies on the presence of a 13 bp motif called matS repeated 23 times in the 800-kb-long domain. MatS sites are the main targets in the E. coli chromosome of YcbG or MatP (macrodomain Ter protein). MatP accumulates in the cell as a discrete focus that co-localises with the Ter macrodomain. The effects of MatP inactivation reveal its role as the main organiser of the Ter macrodomain: in the absence of MatP, DNA is less compacted, the mobility of markers is increased, and segregation of the Ter macrodomain occurs early in the cell cycle. A specific organisational system is required in the Terminus region for bacterial chromosome management during the cell cycle.. +PF06304 Protein of unknown function (DUF1048)
Pfam-B_12247 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06305 Protein of unknown function (DUF1049)
Pfam-B_12262 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06306 Beta-1,4-N-acetylgalactosaminyltransferase (CgtA)
Pfam-B_12320 (release 9.0). This family consists of several beta-1,4-N-acetylgalactosaminyltransferase proteins from Campylobacter jejuni .. +PF06307 Herpesvirus IR6 protein
Pfam-B_12322 (release 9.0). This family consists of several Herpesvirus IR6 proteins. The equine herpesvirus 1 (EHV-1) IR6 protein forms typical rod-like structures in infected cells, influences virus growth at elevated temperatures, and determines the virulence of EHV-1 Rac strains .. +PF06308 23S rRNA methylase leader peptide (ErmC)
Pfam-B_12332 (release 9.0). This family consists of several very short bacterial 23S rRNA methylase leader peptide (ErmC) sequences. ermC confers resistance to macrolide-lincosamide streptogramin B antibiotics by specifying a ribosomal RNA methylase, which results in decreased ribosomal affinity for these antibiotics. ermC expression is induced by exposure to erythromycin . . +PF06311 NUMB domain
This presumed domain is found in the Numb family of proteins adjacent to the PTB domain... +PF06312 Neurexophilin
Pfam-B_12369 (release 9.0). This family consists of mammalian neurexophilin proteins. Mammalian brains contain four different neurexophilin proteins. Neurexophilins form a family of related glycoproteins that are proteolytically processed after synthesis and bind to alpha-neurexins. The structure and characteristics of neurexophilins indicate that they function as neuropeptides that may signal via alpha-neurexins . . +PF06309 Torsin
Pfam-B_12047 (release 9.0). This family consists of several eukaryotic torsin proteins. Torsion dystonia is an autosomal dominant movement disorder characterised by involuntary, repetitive muscle contractions and twisted postures. The most severe early-onset form of dystonia has been linked to mutations in the human DYT1 (TOR1A) gene encoding a protein termed torsinA. While causative genetic alterations have been identified, the function of torsin proteins and the molecular mechanism underlying dystonia remain unknown. Phylogenetic analysis of the torsin protein family indicates these proteins share distant sequence similarity with the large and diverse family of (Pfam:PF00004) proteins. It has been suggested that torsins play a role in effectively managing protein folding and that possible breakdown in a neuroprotective mechanism that is, in part, mediated by torsins may be responsible for the neuronal dysfunction associated with dystonia .. +PF06313 Drosophila ACP53EA protein
Pfam-B_12718 (release 9.0). This family consists of several Drosophila ACP53EA accessory gland (seminal) proteins.. +PF06314 Acetoacetate decarboxylase (ADC)
Pfam-B_12720 (release 9.0). This family consists of several acetoacetate decarboxylase (ADC) proteins (EC:4.1.1.4).. +PF06315 Isocitrate dehydrogenase kinase/phosphatase (AceK)
Pfam-B_13100 (release 9.0). This family consists of several bacterial isocitrate dehydrogenase kinase/phosphatase (AceK) proteins (EC:2.7.1.116) .. +PF06316 Enterobacterial Ail/Lom protein
Pfam-B_12325 (release 9.0). This family consists of several bacterial and phage Ail/Lom-like proteins. The Yersinia enterocolitica Ail protein is a known virulence factor. Proteins in this family are predicted to consist of eight transmembrane beta-sheets and four cell surface-exposed loops. It is thought that Ail directly promotes invasion and loop 2 contains an active site, perhaps a receptor-binding domain . The phage protein Lom is expressed during lysogeny, and encode host-cell envelope proteins. Lom is found in the bacterial outer membrane, and is homologous to virulence proteins of two other enterobacterial genera. It has been suggested that lysogeny may generally have a role in bacterial survival in animal hosts, and perhaps in pathogenesis .. +PF06317 Arenavirus RNA polymerase
Pfam-B_12490 (release 9.0). This family consists of several Arenavirus RNA polymerase proteins (EC:2.7.7.48) .. +PF06319 Protein of unknown function (DUF1052)
Pfam-B_12539 (release 9.0). This family consists of several bacterial proteins of unknown function.. +PF06320 GCN5-like protein 1 (GCN5L1)
Pfam-B_12526 (release 9.0). This family consists of several eukaryotic GCN5-like protein 1 (GCN5L1) sequences. The function of this family is unknown [1,2].. +PF06321 Major fimbrial subunit protein (FimA)
Pfam-B_13339 (release 9.0). This family consists of several Porphyromonas gingivalis major fimbrial subunit protein (FimA) sequences. Fimbriae of Porphyromonas gingivalis, a periodontopathogen, play an important role in its adhesion to and invasion of host cells. The fimA genes encoding fimbrillin (FimA), a subunit protein of fimbriae, have been classified into five types, types I to V, based on nucleotide sequences. It has been found that type II FimA can bind to epithelial cells most efficiently through specific host receptors . Human dental plaque is a multispecies microbial biofilm that is associated with two common oral diseases, dental caries and periodontal disease. There is an inter-species contact-dependent communication system between P. gingivalis and S. cristatus that involces the Arc-A enzyme .. +PF06322 Phage NinH protein
Pfam-B_12973 (release 9.0). This family consists of several phage NinH proteins. The function of this family is unknown.. +PF06323 Phage antitermination protein Q
Pfam-B_12730 (release 9.0). This family consists of several phage antitermination protein Q and related bacterial sequences. Phage 82 gene Q encodes a phage-specific positive regulator of late gene expression, thought, by analogy to the corresponding gene of phage lambda, to be a transcription antiterminator . . +PF06324 Pigment-dispersing hormone (PDH)
Pfam-B_12230 (release 9.0). This family consists of several eukaryotic pigment-dispersing hormone (PDH) proteins. The pigment-dispersing hormone (PDH) is produced in the eyestalks of Crustacea where it induces light-adapting movements of pigment in the compound eye and regulates the pigment dispersion in the chromatophores .. +PF06325 Ribosomal protein L11 methyltransferase (PrmA)
Pfam-B_12272 (release 9.0). This family consists of several Ribosomal protein L11 methyltransferase (EC:2.1.1.-) sequences.. +PF06326 Vesiculovirus matrix protein
Pfam-B_13088 (release 9.0). This family consists of several Vesiculovirus matrix proteins. The matrix (M) protein of vesicular stomatitis virus (VSV) expressed in the absence of other viral components causes many of the cytopathic effects of VSV, including an inhibition of host gene expression and the induction of cell rounding. It has been shown that M protein also induces apoptosis in the absence of other viral components. It is thought that the activation of apoptotic pathways causes the inhibition of host gene expression and cell rounding by M protein .. +PF06327 Domain of Unknown Function (DUF1053)
This domain is found in Adenylate cyclases.. +PF06328 Ig-like C2-type domain
This domain is a ligand-binding immunoglobulin-like domain . The two cysteine residues form a disulphide bridge.. +PF06330 Trichodiene synthase (TRI5)
Pfam-B_13220 (release 9.0). This family consists of several fungal trichodiene synthase proteins (EC:4.2.3.6). TRI5 encodes the enzyme trichodiene synthase, which has been shown to catalyse the first step in the trichothecene pathways of Fusarium and Trichothecium species [1,2]. . +PF06331 REX1;
Transcription factor TFIIH complex subunit Tfb5. This family is a component of the general transcription and DNA repair factor IIH. TFB5 has been shown to be required for efficient recruitment of TFIIH to a promoter .. +PF06333 TRAP240; TRAP_240kDa; Med13;
Mediator complex subunit 13 C-terminal. Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function. Med13 is part of the ancillary kinase module, together with Med12, CDK8 and CycC, which in yeast is implicated in transcriptional repression, though most of this activity is likely attributable to the CDK8 kinase. The large Med12 and Med13 proteins are required for specific developmental processes in Drosophila, zebrafish, and Caenorhabditis elegans but their biochemical functions are not understood .. +PF06334 Orthopoxvirus A47 protein
Pfam-B_13263 (release 9.0). This family consists of several Orthopoxvirus A47 proteins. The function of this family is unknown.. +PF06335 Protein of unknown function (DUF1054)
Pfam-B_13269 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06336 Coronavirus 5a protein
Pfam-B_13277 (release 9.0). This family consists of several Coronavirus 5a proteins. The function of this family is unknown .. +PF06337 DUF1055;
The DUSP (domain present in ubiquitin-specific protease) domain is found at the N-terminus of Ubiquitin-specific proteases. The structure of this domain has been solved . Its tripod-like structure consists of a 3-fold alpha-helical bundle supporting a triple-stranded anti-parallel beta-sheet .. +PF06338 ComK protein
Pfam-B_13324 (release 9.0). This family consists of several bacterial ComK proteins. The ComK protein of Bacillus subtilis positively regulates the transcription of several late competence genes as well as comK itself. It has been found that ClpX plays an important role in the regulation of ComK at the post-transcriptional level .. +PF06339 Ectoine synthase
Pfam-B_14542 (release 9.0). This family consists of several bacterial ectoine synthase proteins. The ectABC genes encode the diaminobutyric acid acetyltransferase (EctA), the diaminobutyric acid aminotransferase (EctB), and the ectoine synthase (EctC). Together these proteins constitute the ectoine biosynthetic pathway .. +PF06340 Vibrio cholerae toxin co-regulated pilus biosynthesis protein F
Pfam-B_13058 (release 9.0). This family consists of several Vibrio cholerae toxin co-regulated pilus biosynthesis protein F (TcpF) sequences. TcpF is known to be a secreted virulence protein but its exact function is unknown .. +PF06341 Protein of unknown function (DUF1056)
Pfam-B_13260 (release 9.0). This family consists of several putative head-tail joining bacteriophage proteins.. +PF06342 Alpha/beta hydrolase of unknown function (DUF1057)
Pfam-B_13294 (release 9.0). This family consists of several Caenorhabditis elegans specific proteins of unknown function. Members of this family have an alpha/beta hydrolase fold.. +PF06344 VPG_P3B;
Parechovirus Genome-linked protein. This family is of the Parechovirus genome-linked protein Vpg type P3B.. +PF06345 DRF Autoregulatory Domain
This motif is found in Diaphanous-related formins. It binds the N-terminal GTPase-binding domain; this link is broken when GTP-bound Rho binds to the GBD and activates the protein. The addition of DAD to mammalian cells induces actin filament formation, stabilises microtubules, and activates serum-response mediated transcription ( ).. +PF06346 Formin Homology Region 1
This region is found in some of the Diaphanous related formins (Drfs) ( ). It consists of low complexity repeats of around 12 residues.. +PF06347 Bacterial SH3 domain
Pfam-B_13248 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function. These are composed of SH3-like domains.. +PF06348 Protein of unknown function (DUF1059)
Pfam-B_13303 (release 9.0). This family consists of several short hypothetical archaeal proteins of unknown function.. +PF06350 HSL;
Hormone-sensitive lipase (HSL) N-terminus. Pfam-B_13329 (release 9.0). This family consists of several mammalian hormone-sensitive lipase (HSL) proteins (EC:3.1.1.-). Hormone-sensitive lipase, a key enzyme in fatty acid mobilisation, overall energy homeostasis, and possibly steroidogenesis, is acutely controlled through reversible phosphorylation by catecholamines and insulin .. +PF06351 Allene oxide cyclase
Pfam-B_13374 (release 9.0). This family consists of several plant specific allene oxide cyclase proteins (EC:5.3.99.6). The allene oxide cyclase (AOC)-catalysed step in jasmonate (JA) biosynthesis is important in the wound response of tomato . . +PF06353 Protein of unknown function (DUF1062)
Pfam-B_13377 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06355 Aegerolysin
Pfam-B_13415 (release 9.0). This family consists of several bacterial and eukaryotic Aegerolysin-like proteins. It has been found that aegerolysin and ostreolysin are expressed during formation of primordia and fruiting bodies. It has been suggested that these haemolysins play an important role in initial phase of fungal fruiting. The bacterial members of this family are expressed during sporulation . Ostreolysin was found cytolytic to various erythrocytes and tumour cells . It forms transmembrane pores 4 nm in diameter. The activity is inhibited by total membrane lipids, and modulated by lysophosphatides. The potential use of aegerolysins is reviewed with special emphasis on their properties which would allow thier use in therapeutics.. +PF06356 Protein of unknown function (DUF1064)
Pfam-B_13437 (release 9.0). This family consists of several phage and bacterial proteins of unknown function.. +PF06357 Omega-atracotox;
Pfam-B_14633 (release 9.0). This family consists of several Hadronyche versuta (Blue mountains funnel-web spider) specific omega-atracotoxin proteins. Omega-Atracotoxin-Hv1a is an insect-specific neurotoxin whose phylogenetic specificity derives from its ability to antagonise insect, but not vertebrate, voltage-gated calcium channels. Two spatially proximal residues, Asn(27) and Arg(35), form a contiguous molecular surface that is essential for toxin activity. It has been proposed that this surface of the beta-hairpin is a key site for interaction of the toxin with insect calcium channels . . +PF06358 Protein of unknown function (DUF1065)
Pfam-B_14830 (release 9.0). This family consists of several Benyvirus proteins of unknown function.. +PF06360 Euplotes raikovi mating pheromone
Pfam-B_14708 (release 9.0). This family consists of several Euplotes raikovi mating pheromone proteins. Diffusible polypeptide pheromones, which distinguish otherwise morphologically identical vegetative cell types from one another, are produced by some species of ciliates. In the marine sand-dwelling protozoan ciliate Euplotes raikovi, pheromone molecules promote the vegetative reproduction (mitogenic proliferation or growth) of the same cells from which they originate. As, understandably, such autocrine pheromone activity is primary to that of targeting and inducing a foreign cell to mate (paracrine functions), this finding provides an example of how the original function of a molecule can be obscured during evolution by the acquisition of a new one .. +PF06361 Rice tungro bacilliform virus P12 protein
Pfam-B_14960 (release 9.0). This family consists of several Rice tungro bacilliform virus P12 proteins. The function of this family is unknown .. +PF06362 Protein of unknown function (DUF1067)
Pfam-B_15074 (release 9.0). This family consists of several hypothetical Mycobacterium leprae specific proteins. The function of this family is unknown.. +PF06363 Picornaviridae P3A protein
This family consists of the P3A protein of picornaviridae. P3A has been identified as a genome-linked protein (VPg) and is involved in replication ( ).. +PF06364 Protein of unknown function (DUF1068)
Pfam-B_14602 (release 9.0). This family consists of several hypothetical plant proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown.. +PF06365 CD34/Podocalyxin family
Pfam-B_14609 (release 9.0) & Pfam-B_17463 (release 8.0). This family consists of several mammalian CD34 antigen proteins. The CD34 antigen is a human leukocyte membrane protein expressed specifically by lymphohematopoietic progenitor cells. CD34 is a phosphoprotein. Activation of protein kinase C (PKC) has been found to enhance CD34 phosphorylation . This family contains several eukaryotic podocalyxin proteins. Podocalyxin is a major membrane protein of the glomerular epithelium and is thought to be involved in maintenance of the architecture of the foot processes and filtration slits characteristic of this unique epithelium by virtue of its high negative charge. Podocalyxin functions as an anti-adhesin that maintains an open filtration pathway between neighbouring foot processes in the glomerular epithelium by charge repulsion .. +PF06366 Flagellar protein FlhE
Pfam-B_14631 (release 9.0). This family consists of several Enterobacterial FlhE flagellar proteins. The exact function of this family is unknown .. +PF06367 Diaphanous FH3 Domain
This region is found in the Formin-like and and diaphanous proteins [1,2].. +PF06368 Methylaspartate mutase E chain (MutE)
Pfam-B_14693 (release 9.0). This family consists of several methylaspartate mutase E chain proteins (EC:5.4.99.1). Glutamate mutase catalyses the first step in the fermentation of glutamate by Clostridium tetanomorphum. This is an unusual isomerisation in which L-glutamate is converted to threo-beta-methyl L-aspartate .. +PF06369 Sea anemone cytotoxic protein
Pfam-B_14701 (release 9.0). Sea anemones are a rich source of cytotoxic proteins. Cytolysins comprise a group of more than 30 highly basic proteins with molecular masses of about 20 kDa. Cytolysins isolated from the sea anemone, Heteractis magnifica, include magnificalysin I (HMg I), magnificalysin II (HMg II) and Heteractis magnifica toxin (HMgtxn). These are highly homologous at their N-terminals. HMg I and II have molecular masses of approximately 19 kDa, and pI values of 9.4 and 10.0, respectively. Cytolysins isolated from other sea anemones Actinia tenebrosa (Tenebrosin-C, TN-C), Actinia equina (Equinatoxin, EqT) and Stichodactyla helianthus (ShC) exhibit pore-forming, haemolytic, cytotoxic, and heart stimulatory activities .. +PF06370 Protein of unknown function (DUF1069)
Pfam-B_14815 (release 9.0). This family consists of several Maize streak virus 21.7 kDa proteins. The function of this family is unknown.. +PF06371 Diaphanous GTPase-binding Domain
This domain is bound to by GTP-attached Rho proteins, leading to activation of the Drf protein.. +PF06372 Gemin6 protein
Pfam-B_14816 (release 9.0). This family consists of several mammalian Gemin6 proteins. The exact function of Gemin6 is unknown but it has been found to form part of the Pfam:PF06003 complex. The SMN complex plays a key role in the biogenesis of spliceosomal small nuclear ribonucleoproteins (snRNPs) and other ribonucleoprotein particles .. +PF06373 Cocaine and amphetamine regulated transcript protein (CART)
Pfam-B_15325 (release 9.0). This family consists of several cocaine and amphetamine regulated transcript type I protein (CART) sequences. Cocaine and amphetamine regulated transcript (CART) peptide has been shown to be an anorectic peptide that inhibits both normal and starvation-induced feeding and completely blocks the feeding response induced by neuropeptide Y and regulated by leptin in the hypothalamus. The C-terminal part containing the three disulfide bridges is the biologically active part of the molecule affecting food intake. The solution structure of the active part of CART has a fold equivalent to other functionally distinct small proteins. CART consists mainly of turns and loops spanned by a compact framework composed by a few small stretches of antiparallel beta-sheet common to cystine knots . . +PF06374 NDUFC2;
NADH-ubiquinone oxidoreductase subunit b14.5b (NDUFC2). Pfam-B_15334 (release 9.0). This family consists of several NADH-ubiquinone oxidoreductase subunit b14.5b proteins (EC:1.6.5.3).. +PF06375 Bovine leukaemia virus receptor (BLVR)
Pfam-B_14559 (release 9.0). This family consists of several bovine specific leukaemia virus receptors which are thought to function as transmembrane proteins, although their exact function is unknown .. +PF06376 Protein of unknown function (DUF1070)
Pfam-B_14060 (release 9.0). This family consists of several short hypothetical plant proteins of unknown function.. +PF06377 Adipokinetic hormone
Pfam-B_14600 (release 9.0). This family consists of several insect adipokinetic hormone as well as the related crustacean red pigment concentrating hormone. Flight activity of insects comprises one of the most intense biochemical processes known in nature, and therefore provides an attractive model system to study the hormonal regulation of metabolism during physical exercise. In long-distance flying insects, such as the migratory locust, both carbohydrate and lipid reserves are utilised as fuels for sustained flight activity. The mobilization of these energy stores in Locusta migratoria is mediated by three structurally related adipokinetic hormones (AKHs), which are all capable of stimulating the release of both carbohydrates and lipids from the fat body .. +PF06378 Protein of unknown function (DUF1071)
Pfam-B_14587 (release 9.0). This family consists of several hypothetical bacterial and phage proteins of unknown function.. +PF06379 L-rhamnose-proton symport protein (RhaT)
Pfam-B_14617 (release 9.0). This family consists of several bacterial L-rhamnose-proton symport protein (RhaT) sequences [1,2].. +PF06380 Protein of unknown function (DUF1072)
Pfam-B_14592 (release 9.0). This family consists of several Barley yellow dwarf virus proteins of unknown function.. +PF06381 Protein of unknown function (DUF1073)
Pfam-B_14928 (release 9.0). This family consists of several hypothetical bacterial proteins. The function of this family is unknown.. +PF06382 Protein of unknown function (DUF1074)
Pfam-B_14962 (release 9.0). This family consists of several proteins which appear to be specific to Drosophila melanogaster. The function of this family is unknown.. +PF06384 Beta-catenin-interacting protein ICAT
Pfam-B_15027 (release 9.0). This family consists of several eukaryotic beta-catenin-interacting (ICAT) proteins. Beta-catenin is a multifunctional protein involved in both cell adhesion and transcriptional activation. Transcription mediated by the beta-catenin/Tcf complex is involved in embryological development and is upregulated in various cancers. ICAT selectively inhibits beta-catenin/Tcf binding in vivo, without disrupting beta-catenin/cadherin interactions .. +PF06385 Baculovirus LEF-11 protein
Pfam-B_15073 (release 9.0). This family consists of several Baculovirus LEF-11 proteins. The exact function of this family is unknown although it has been shown that LEF-11 is required for viral DNA replication during the infection cycle .. +PF06386 Gas vesicle synthesis protein GvpL/GvpF
Pfam-B_15376 (release 9.0). This family consists of several bacterial and archaeal gas vesicle synthesis protein (GvpL/GvpF) sequences. The exact function of this family is unknown.. +PF06387 D1 dopamine receptor-interacting protein (calcyon)
Pfam-B_15400 (release 9.0). This family consists of several D1 dopamine receptor-interacting (calcyon) proteins. D1/D5 dopamine receptors in the basal ganglia, hippocampus, and cerebral cortex modulate motor, reward, and cognitive behaviour. D1-like dopamine receptors likely modulate neocortical and hippocampal neuronal excitability and synaptic function via Ca(2+) as well as cAMP-dependent signaling . Defective calcyon proteins have been implicated in both attention-deficit/hyperactivity disorder (ADHD) and schizophrenia .. +PF06388 Protein of unknown function (DUF1075)
Pfam-B_14186 (release 9.0). This family consists of several eukaryotic proteins of unknown function.. +PF06389 Filovirus_VP24;
Filovirus membrane-associated protein VP24. Pfam-B_15734 (release 9.0). This family consists of several membrane-associated protein VP24 sequences from a variety of Ebola and Marburg viruses. The VP24 protein of Ebola virus is believed to be a secondary matrix protein and minor component of virions. VP24 possesses structural features commonly associated with viral matrix proteins and that VP24 may have a role in virus assembly and budding .. +PF06390 Neuroendocrine-specific golgi protein P55 (NESP55)
Pfam-B_16185 (release 9.0). This family consists of several mammalian neuroendocrine-specific golgi protein P55 (NESP55) sequences. NESP55 is a novel member of the chromogranin family and is a soluble, acidic, heat-stable secretory protein that is expressed exclusively in endocrine and nervous tissues, although less widely than chromogranins .. +PF06391 CDK-activating kinase assembly factor MAT1
Pfam-B_16773 (release 9.0). MAT1 is an assembly/targeting factor for cyclin-dependent kinase-activating kinase (CAK), which interacts with the transcription factor TFIIH . The domain found to the N-terminal side of this domain is a C3HC4 RING finger .. +PF06392 Acid shock protein repeat
Pfam-B_20230 (release 9.0). The Asr protein is synthesised as a precursor and the cleavage is essential for moderate to high acid tolerance .. +PF06393 BH3 interacting domain (BID)
Pfam-B_16321 (release 9.0). BID is a member of the BCL-2 superfamily of proteins are key regulators of programmed cell death, hence this family is related to Pfam:PF00452 . BID is a pro-apoptotic member of the Bcl-2 superfamily and as such posses the ability to target intracellular membranes and contains the BH3 death domain. The activity of BID is regulated by a Caspase 8-mediated cleavage event, exposing the BH3 domain and significantly changing the surface charge and hydrophobicity, which causes a change of cellular localisation .. +PF06394 Pepsin inhibitor-3-like repeated domain
Pfam-B_13438 (release 9.0). Pepsin inhibitor-3 consisting of two domains, each comprising an antiparallel beta-sheet flanked by an alpha-helix. In the enzyme-inhibitor complex, the N-terminal beta-strand of PI-3 pairs with one strand of the active site flap region of pepsin . The two domains are tandem repeats of sequence, and has therefore been termed repeated domain.. +PF06395 CDC24 Calponin
Pfam-B_32837 (release 8.0). Is a calponin homology domain.. +PF06396 Angiotensin II, type I receptor-associated protein (AGTRAP)
Pfam-B_15509 (release 9.0). This family consists of several angiotensin II, type I receptor-associated protein (AGTRAP) sequences. AGTRAP is known to interact specifically with the carboxyl-terminal cytoplasmic region of the angiotensin II type 1 (AT(1)) receptor to regulate different aspects of AT(1) receptor physiology [1,2]. The function of this family is unclear.. +PF06397 Desulfoferrodoxin, N-terminal domain
Pfam-B_11142 (release 9.0). Most members of this family are small (approximately 36 amino acids) proteins that from homodimeric complexes. Each subunit contains a high-spin iron atom tetrahedrally bound to four cysteinyl sulphur atoms This family has a similar fold to the rubredoxin metal binding domain . It is also found as the N-terminal domain of desulfoferrodoxin, see (Pfam:PF01880).. +PF06398 Integral peroxisomal membrane peroxin
Pfam-B_56111 (release 8.0). Peroxisomes play diverse roles in the cell, compartmentalising many activities related to lipid metabolism and functioning in the decomposition of toxic hydrogen peroxide. Sequence similarity was identified between two hypothetical proteins and the peroxin integral membrane protein Pex24p .. +PF06399 GTP cyclohydrolase I feedback regulatory protein (GFRP)
Pfam-B_63435 (release 9.0). Tetrahydrobiopterin, the cofactor required for hydroxylation of aromatic amino acids regulates its own synthesis in via feedback inhibition of GTP cyclohydrolase I. This mechanism is mediated by the regulatory subunit called GTP cyclohydrolase I feedback regulatory protein (GFRP) .. +PF06400 Alpha-2-macroglobulin RAP, N-terminal domain
Pfam-B_44514 (release 9.0). The alpha-2-macroglobulin receptor-associated protein (RAP) is a intracellular glycoprotein that binds to the 2-macroglobulin receptor and other members of the low density lipoprotein receptor family. The protein inhibits binding of all currently known ligands of these receptors . The N-terminal domain is predominately alpha helical . Two different studies have provided conflicted domain boundaries [2,3].. +PF06401 Alpha-2-macroglobulin RAP, C-terminal domain
Pfam-B_44514 (release 9.0). The alpha-2-macroglobulin receptor-associated protein (RAP) is a intracellular glycoprotein that binds to the 2-macroglobulin receptor and other members of the low density lipoprotein receptor family. The protein inhibits binding of all currently known ligands of these receptors . Two different studies have provided conflicted domain boundaries [2,3].. +PF06403 Lamprin
Pfam-B_15493 (release 9.0). This family consists of several lamprin proteins from the Sea lamprey Petromyzon marinus. Lamprin, an insoluble non-collagen, non-elastin protein, is the major connective tissue component of the fibrillar extracellular matrix of lamprey annular cartilage. Although not generally homologous to any other protein, soluble lamprins contain a tandemly repeated peptide sequence (GGLGY) which is present in both silkmoth chorion proteins and spider dragline silk. Strong homologies to this repeat sequence are also present in several mammalian and avian elastins. It is thought that these proteins share a structural motif which promotes self-aggregation and fibril formation in proteins through interdigitation of hydrophobic side chains in beta-sheet/beta-turn structures, a motif that has been preserved in recognisable form over several hundred million years of evolution .. +PF06404 Phytosulfokine precursor protein (PSK)
Pfam-B_16071 (release 9.0). This family consists of several plant specific phytosulfokine precursor proteins. Phytosulfokines, are active as either a pentapeptide or a C-terminally truncated tetrapeptide. These compounds were first isolated because of their ability to stimulate cell division in somatic embryo cultures of Asparagus officinalis .. +PF06405 Red chlorophyll catabolite reductase (RCC reductase)
Pfam-B_15577 (release 9.0). This family consists of several red chlorophyll catabolite reductase (RCC reductase) proteins. Red chlorophyll catabolite (RCC) reductase (RCCR) and pheophorbide (Pheide) a oxygenase (PaO) catalyse the key reaction of chlorophyll catabolism, porphyrin macrocycle cleavage of Pheide a to a primary fluorescent catabolite (pFCC) . . +PF06406 StbA protein
Pfam-B_12747 (release 9.0). This family consists of several bacterial StbA plasmid stability proteins .. +PF06407 Borna disease virus P40 protein
Pfam-B_15995 (release 9.0). This family consists of several Borna disease virus P40 proteins. Borna disease (BD) is a persistent viral infection of the central nervous system caused by the single-negative-strand, nonsegmented RNA Borna disease virus (BDV). P40 is known to be a nucleoprotein .. +PF06409 Nuclear pore complex interacting protein (NPIP)
Pfam-B_16418 (release 9.0). This family consists of a series of primate specific nuclear pore complex interacting protein (NPIP) sequences. The function of this family is unknown but is well conserved from African apes to humans .. +PF06411 HdeA/HdeB family
Pfam-B_63431 (release 9.0). HdeA (hns-dependent expression protein A) is a single domain alpha-helical protein localised in the periplasmic space. HdeA is involved in acid resistance essential for infectivity of enteric bacterial pathogens. Functional studies demonstrate that HdeA is activated by a dimer-to-monomer transition at acidic pH, leading to suppression of aggregation by acid-denatured proteins. The gene encoding HdeA was initially identified as part of an operon regulated by the nucleoid protein H-NS [1,2]. This family also contains HdeB .. +PF06412 Conjugal transfer protein TraD
Pfam-B_11863 (release 9.0). This family contains bacterial TraD conjugal transfer proteins . Mutations in the TraD gene result in loss of transfer .. +PF06413 Neugrin
Pfam-B_11274 (release 9.0). This family consists of several mouse and human neugrin proteins. Neugrin and m-neugrin are mainly expressed in neurons in the nervous system, and are thought to play an important role in the process of neuronal differentiation .. +PF06414 Zeta toxin
Pfam-B_12374 (release 9.0). This family consists of several bacterial zeta toxin proteins. Zeta toxin is thought to be part of a postregulational killing system in bacteria. It relies on antitoxin/toxin systems that secure stable inheritance of low and medium copy number plasmids during cell division and kill cells that have lost the plasmid .. +PF06415 BPG-independent PGAM N-terminus (iPGM_N)
Pfam-B_1338 (release 10.0). This family represents the N-terminal region of the 2,3-bisphosphoglycerate-independent phosphoglycerate mutase (or phosphoglyceromutase or BPG-independent PGAM) protein (EC:5.4.2.1). The family is found in conjunction with Pfam:PF01676 (located in the C-terminal region of the protein).. +PF06416 Protein of unknown function (DUF1076)
Pfam-B_2653 (release 9.0). This family consists of several hypothetical bacterial proteins exclusive to Escherichia coli and Salmonella typhi. The function of this family is unknown.. +PF06417 Protein of unknown function (DUF1077)
Pfam-B_6645 (release 9.0). This family consists of several hypothetical eukaryotic proteins of unknown function.. +PF06418 CTP synthase N-terminus
Pfam-B_226 (release 10.0). This family consists of the N-terminal region of the CTP synthase protein (EC:6.3.4.2). This family is found in conjunction with Pfam:PF00117 located in the C-terminal region of the protein. CTP synthase catalyses the synthesis of CTP from UTP by amination of the pyrimidine ring at the 4-position .. +PF06419 Conserved oligomeric complex COG6
Pfam-B_10345 (release 8.0). COG6 is a component of the conserved oligomeric golgi complex, which is composed of eight different subunits and is required for normal golgi morphology and localisation.. +PF06420 Mitochondrial genome maintenance MGM101
Pfam-B_35151 (release 8.0). The mgm101 gene was identified as essential for maintenance of the mitochondrial genome in Saccharomyces cerevisiae . Based on its DNA-binding activity, and experimental work with a temperature-sensitive mgm101 mutant, it has been proposed that the mgm101 gene product performs an essential function in the repair of oxidatively damaged mitochondrial DNA .. +PF06421 GTP-binding protein LepA C-terminus
Pfam-B_425 (release 10.0). This family consists of the C-terminal region of several pro- and eukaryotic GTP-binding LepA proteins .. +PF06422 CDR ABC transporter
Pfam-B_1005 (release 8.0). Corresponds to a region of the PDR/CDR subgroup of ABC transporters comprising extracellular loop 3, transmembrane segment 6 and linker region.. +PF06423 GWT1
Pfam-B_15982 (release 8.0). Glycosylphosphatidylinositol (GPI) is a conserved post-translational modification to anchor cell surface proteins to plasma membrane in eukaryotes. GWT1 is involved in GPI anchor biosynthesis; it is required for inositol acylation in yeast [1-2].. +PF06424 PRP1 splicing factor, N-terminal
Pfam-B_6467 (release 8.0). This domain is specific to the N-terminal part of the prp1 splicing factor, which is involved in mRNA splicing (and possibly also poly(A)+ RNA nuclear export and cell cycle progression). This domain is specific to the N terminus of the RNA splicing factor encoded by prp1 . It is involved in mRNA splicing and possibly also poly(A)and RNA nuclear export and cell cycle progression.. +PF06426 Serine acetyltransferase, N-terminal
Pfam-B_1192 (release 8.0). The N-terminal domain of serine acetyltransferase has a sequence that is conserved in plants and bacteria .. +PF06427 UDP-glucose:Glycoprotein Glucosyltransferase
Pfam-B_4648 (release 8.0). The N-terminal region of this group of proteins is required for correct folding of the ER UDP-Glc: glucosyltransferase.. +PF06428 GDP/GTP exchange factor Sec2p
Pfam-B_10665 (release 8.0). In Saccharomyces cerevisiae, Sec2p is a GDP/GTP exchange factor for Sec4p, which is required for vesicular transport at the post-Golgi stage of yeast secretion .. +PF06429 DUF1078;
Flagellar basal body rod FlgEFG protein C-terminal. Pfam-B_807 (release 10.0). This family consists of a number of C-terminal domains of unknown function. This domain seems to be specific to flagellar basal-body rod and flagellar hook proteins in which Pfam:PF00460 is often present at the extreme N terminus.. +PF06430 Lactococcus lactis RepB C-terminus
Pfam-B_717 (release 10.0). This family consists of the C-terminal region of RepB proteins from Lactococcus lactis (See Pfam:PF01051).. +PF06431 Polyomavirus large T antigen C-terminus
Pfam-B_214 (release 10.0). +PF06432 Phosphatidylinositol N-acetylglucosaminyltransferase
Pfam-B_33496 (release 8.0). Glycosylphosphatidylinositol (GPI) represents an important anchoring molecule for cell surface proteins. The first step in its synthesis is the transfer of N-acetylglucosamine (GlcNAc) from UDP-N-acetylglucosamine to phosphatidylinositol (PI). This step involves products of three or four genes in both yeast (GPI1, GPI2 and GPI3) and mammals (GPI1, PIG A, PIG H and PIG C), respectively.. +PF06433 Me-amine-deh_H;
Methylamine dehydrogenase heavy chain (MADH). Pfam-B_20644 (release 9.0). Methylamine dehydrogenase (EC:1.4.99.3) a periplasmic quinoprotein found in several methyltrophic bacteria. Induced when grown on methylamine as a carbon source MADH catalyses the oxidative deamination of amines to there corresponding aldehydes. MADH is a hetero- tetramer, comprised of two heavy chains (H) and two light chains (L). The H-chain forms a beta-propeller like structure .. +PF06434 Aconitate hydratase 2 N-terminus
Pfam-B_2605 (release 10.0). This family represents the N-terminal region of several bacterial Aconitate hydratase 2 proteins and is found in conjunction with Pfam:PF00330.. +PF06435 Repeat of unknown function (DUF1079)
Pfam-B_1911 (release 10.0). This family consists of several repeats of 31 residues in length and seems to be exclusive to Moraxella catarrhalis UspA proteins. The UspA1 and UspA2 proteins of Moraxella catarrhalis are structurally related and are exposed on the bacterial cell surface where can function adhesins . This family is commonly found with the Pfam:PF03895 family.. +PF06436 Pneumovirus matrix protein 2 (M2)
Pfam-B_2318 (release 10.0). This family consists of several Pneumovirus matrix glycoprotein M2 sequences. This family functions as a transcription processivity factor that is essential for virus replication .. +PF06437 IMP-specific 5'-nucleotidase
Pfam-B_43910 (release 8.0). The Saccharomyces cerevisiae ISN1 (YOR155c) gene encodes an IMP-specific 5'-nucleotidase, which catalyses degradation of IMP to inosine as part of the purine salvage pathway.. +PF06438 Heme-binding protein A (HasA)
Pfam-B_27216 (release 9.0). Free iron is limited in vertebrate hosts, thus an alternative to siderophores has been developed by pathogenic bacteria to access host iron bound in protein complexes. HasA is a secreted hemophore that has the ability to obtain iron from hemoglobin. Once bound to HasA, the heme is shuttled to the receptor HasR, which releases the heme into the bacterium .. +PF06439 Domain of Unknown Function (DUF1080)
Yeats C, Eberhardt R. This family has structural similarity to an endo-1,3-1,4-beta glucanase belonging to glycoside hydrolase family 16. However, the structure surrounding the active site differs from that of the endo-1,3-1,4-beta glucanase.. +PF06440 DNA polymerase III, theta subunit
Pfam-B_27631 (release 9.0). DNA polymerase III (EC 2.7.7.7) is comprised of three tightly associated subunits, alpha, epsilon and theta. This family contains the theta subunit. The structure of the theta subunit shows that the N-terminal two thirds is comprised of three helices while the C-terminal third is disordered . The function of the theta subunit is poorly understood, but the interaction of the theta subunit with the epsilon subunit is thought to enhance the 3' to 5' exonucleolytic proofreading activity of epsilon .. +PF06441 Epoxide hydrolase N terminus
This family represents the N-terminal region of the eukaryotic epoxide hydrolase protein. Epoxide hydrolases (EC:3.3.2.3) comprise a group of functionally related enzymes that catalyse the addition of water to oxirane compounds (epoxides), thereby usually generating vicinal trans-diols. EHs have been found in all types of living organisms, including mammals, invertebrates, plants, fungi and bacteria. In animals, the major interest in EH is directed towards their detoxification capacity for epoxides since they are important safeguards against the cytotoxic and genotoxic potential of oxirane derivatives that are often reactive electrophiles because of the high tension of the three-membered ring system and the strong polarization of the C--O bonds. This is of significant relevance because epoxides are frequent intermediary metabolites which arise during the biotransformation of foreign compounds . This family is often found in conjunction with Pfam:PF00561.. +PF06442 DHFR;
R67 dihydrofolate reductase. Pfam-B_27527 (release 9.0). R67 dihydrofolate reductase is a plasmid encoded enzyme that provides resistance to the antibacterial drug trimethoprim. The R67 dihydrofolate reductase does not share significant similarity to the chromosomal encoded dihydrofolate reductase .. +PF06443 SEF14-like adhesin
Pfam-B_36358 (release 9.0). Family of enterotoxigenic bacterial adhesins. . +PF06444 NADH dehydrogenase subunit 2 C-terminus
Pfam-B_1662 (release 10.0). This family consists of the C-terminal region specific to the eukaryotic NADH dehydrogenase subunit 2 protein and is found in conjunction with Pfam:PF00361.. +PF06445 AraC_E_bind;
GyrI-like small molecule binding domain. Pfam-B_36124 (release 9.0). This family contains the small molecule binding domain of a number of different bacterial transcription activators .\. This family also contains DNA gyrase inhibitors. The GyrI superfamily contains a diad of the SHS2 module, adapted for small-molecule binding . The GyrI superfamily includes a family of secreted forms that is found only in animals and the bacterial pathogen Leptospira . . +PF06446 Hepcidin
Pfam-B_41732 (release 9.0). Hepcidin is a antibacterial and antifungal protein expressed in the liver and is also a signaling molecule in iron metabolism. The hepcidin protein is cysteine-rich and forms a distorted beta-sheet with an unusual disulphide bond found at the turn of the hairpin .. +PF06448 Domain of Unknown Function (DUF1081)
This region is found in Apolipophorin proteins.. +PF06449 Mitochondrial domain of unknown function (DUF1082)
Pfam-B_2173 (release 10.0). This family consists of the C-terminal region of several plant mitochondria specific proteins. The function of this family is unknown. This family is found in conjunction with Pfam:PF02326.. +PF06450 Bacterial Na+/H+ antiporter B (NhaB)
Pfam-B_5993 (release 10.0). This family consists of several bacterial Na+/H+ antiporter B (NhaB) proteins. The exact function of this family is unknown [1,2].. +PF06451 Moricin
Pfam-B_56760 (release 9.0). Moricin is a antibacterial peptide that is highly basic. The structure of moricin reveals that it is comprised of a long alpha-helix. The N-terminus of the helix is amphipathic, and the C-terminus of the helix is predominately hydrophobic. The amphipathic N-terminal segment of the alpha- helix is mainly responsible for the increase in permeability of the bacterial membrane which kills the bacteria .. +PF06452 Domain of unknown function (DUF1083)
Pfam-B_2203 (release 10.0). This family consists of several domains of unknown function exclusively found in bacterial xylanase proteins (usually at the C-terminus) although it is tandemly repeated in a number of family members such as Swiss:P38535. This family is always found in conjunction with Pfam:PF00331 and usually with either Pfam:PF02018 or Pfam:PF00395. The function of this family is unknown.. +PF06453 Type II heat-labile enterotoxin , B subunit (LT-IIB)
Pfam-B_61882 (release 9.0). Family of B subunits from the type II heat-labile enterotoxin. The B subunits form a pentameric ring, which interacts with one A subunit. Thus, the structural arrangement of type I and type II heat-labile enterotoxins are very similar . . +PF06454 Protein of unknown function (DUF1084)
Pfam-B_12888 (release 10.0). This family consists of several hypothetical plant specific proteins of unknown function.. +PF06455 NADH dehydrogenase subunit 5 C-terminus
Pfam-B_3060 (release 10.0). This family represents the C-terminal region of several NADH dehydrogenase subunit 5 proteins and is found in conjunction with Pfam:PF00361 and Pfam:PF00662.. +PF06456 Arfaptin-like domain
Pfam-B_5314 (release 7.5). Arfaptin interacts with ARF1, a small GTPase involved in vesicle budding at the Golgi complex and immature secretory granules. The structure of arfaptin shows that upon binding to a small GTPase, arfaptin forms an elongated, crescent-shaped dimer of three-helix coiled-coils . The N-terminal region of ICA69 is similar to arfaptin . . +PF06457 Ectatomin
Pfam-B_63420 (release 9.0). Ectatomin is a toxic component from the Ectatomma tuberculatum ant venom. It is comprised of two subunits, A and B, which are homologous. The structure of ectatomin reveals that each subunit is comprised of two helices and a connecting hinge region, the forms a hairpin structure that is stabilised by disulphide bridges. The two hinges are connected by a disulphide bond .. +PF06458 DUF1085;
Pfam-B_4243 (release 10.0) & Galperin M. The MucBP (MUCin-Binding Protein) domain is found in a wide variety of bacterial proteins. The domain is found in bacterial peptidoglycan bound proteins and is often found in conjunction with Pfam:PF00746 and Pfam:PF00560.. +PF06459 Ryanodine Receptor TM 4-6
This region covers TM regions 4-6 of the ryanodine receptor 1 family.. +PF06460 Coronavirus NSP13
This family covers the NSP13 region of the coronavirus polyprotein. This protein has the predicted function of an mRNA cap-1 methyltransferase function ( ).. +PF06461 Domain of Unknown Function (DUF1086)
This family consists of several eukaryotic domains of unknown function which are present in chromodomain helicase DNA binding proteins. This domain is often found in conjunction with Pfam:PF00176, Pfam:PF00271, Pfam:PF06465, Pfam:PF00385 and Pfam:PF00628.. +PF06462 Propeller
Probable beta-propeller.. +PF06463 Molybdenum Cofactor Synthesis C
This region contains two iron-sulphur (3Fe-4S) binding sites. Mutations in this region of Swiss:O14940 cause MOCOD (Molybdenum Co-Factor Deficiency) type A.. +PF06464 DMAP1-binding Domain
This domain binds DMAP1, a transcriptional co-repressor.. +PF06465 Domain of Unknown Function (DUF1087)
Members of this family are found in various chromatin remodelling factors and transposases. Their exact function is, as yet, unknown.. +PF06466 PCAF (P300/CBP-associated factor) N-terminal domain
This region is spliced out of Swiss:Q92830 isoform 2. It is predicted to be of a mixed alpha/beta fold - though predominantly helical.. +PF06467 zf_MYM; zf-MYM;
MYM-type Zinc finger with FCS sequence motif. MYM-type zinc fingers were identified in MYM family proteins . Human protein Swiss:Q14202 is involved in a chromosomal translocation and may be responsible for X-linked retardation in XQ13.1 . Swiss:Q9UBW7 is also involved in disease. In myeloproliferative disorders it is fused to FGF receptor 1 ; in atypical myeloproliferative disorders it is rearranged . Members of the family generally are involved in development. This Zn-finger domain functions as a transcriptional trans-activator of late vaccinia viral genes, and orthologues are also found in all nucleocytoplasmic large DNA viruses, NCLDV. This domain is also found fused to the C termini of recombinases from certain prokaryotic transposons .. +PF06468 Spondin_N
This conserved region is found at the in the N-terminal half of several Spondin proteins. Spondins are involved in patterning axonal growth trajectory through either inhibiting or promoting adhesion of embryonic nerve cells ( ). . +PF06469 Domain of Unknown Function (DUF1088)
This family is found in the neurobeachins. The function of this region is not known.. +PF06470 SMC proteins Flexible Hinge Domain
This family represents the hinge region of the SMC (Structural Maintenance of Chromosomes) family of proteins. The hinge region is responsible for formation of the DNA interacting dimer. It is also possible that the precise structure of it is an essential determinant of the specificity of the DNA-protein interaction ( ). . +PF06471 NSP11
This region of coronavirus polyproteins encodes the NSP11 protein.. +PF06472 Ald_N;
ABC transporter transmembrane region 2. This domain covers the transmembrane of a small family of ABC transporters and shares sequence similarity with Pfam:PF00664. Mutations in this domain in Swiss:P28288 are believed responsible for Zellweger Syndrome-2 ; mutations in Swiss:P33897 are responsible for recessive X-linked adrenoleukodystrophy . A Saccharomyces cerevisiae homolog is involved in the import of long-chain fatty acids .. +PF06473 FGF binding protein 1 (FGF-BP1)
Pfam-B_14221 (release 10.0). This family consists of several mammalian FGF binding protein 1. Fibroblast growth factors (FGFs) play important roles during fetal and embryonic development . Fibroblast growth factor-binding protein (FGF-BP) 1 is a secreted protein that can bind fibroblast growth factors (FGFs) 1 and 2 .. +PF06474 MltD lipid attachment motif
This short motif is a lipid attachment site.. +PF06475 DUF1089;
Putative glycolipid-binding. Pfam-B_14397 (release 10.0). This family has a novel fold known as a spiral beta-roll, consisting of a 15-stranded beta sheet wrapped around a single alpha helix. It forms dimers. It has some structural similarity to the E. coli lipoprotein localisation factors LolA, Swiss:P61316 and LolB, Swiss:P61320. Its structure suggests that it may have a role in glycolipid binding. Its genomic context supports a role in glycolipid metabolism .. +PF06476 Protein of unknown function (DUF1090)
Pfam-B_14862 (release 10.0). This family consists of several bacterial proteins of unknown function and is known as YqjC in E. coli.. +PF06477 Protein of unknown function (DUF1091)
Pfam-B_14929 (release 10.0). This is a family of uncharacterised proteins. Based on its distant similarity to Pfam:PF02221 and conserved pattern of cysteine residues it is possible that these domains are also lipid binding.. +PF06478 Coronavirus RPol N-terminus
This family covers the N-terminal region of the coronavirus RNA-directed RNA Polymerase.. +PF06479 Ribonuclease 2-5A
This domain is a endoribonuclease . Specifically it cleaves an intron from Hac1 mRNA in humans, which causes it to be much more efficiently translated.. +PF06480 FtsH Extracellular
This domain is found in the FtsH family of proteins. FtsH is the only membrane-bound ATP-dependent protease universally conserved in prokaryotes ( ). It only efficiently degrades proteins that have a low thermodynamic stability - e.g. it lacks robust unfoldase activity. This feature may be key and implies that this could be a criterion for degrading a protein. In Oenococcus oeni FtsH is involved in protection against environmental stress ( ), and shows increased expression under heat or osmotic stress. These two lines of evidence suggest that it is a fundamental prokaryotic self-protection mechanism that checks if proteins are correctly folded (personal obs: Yeats C). The precise function of this N-terminal region is unclear.. +PF06481 COX Aromatic Rich Motif
COX2 (Cytochrome O ubiquinol OXidase 2) is a major component of the respiratory complex during vegetative growth. It transfers electrons from a quinol to the binuclear centre of the catalytic subunit 1. The function of this region is not known.. +PF06482 Coll_NC10;
Collagenase NC10 and Endostatin. NC10 stands for Non-helical region 10 and is taken from Swiss:P39059. A mutation in this region in Swiss:P39060 is associated with an increased risk of prostrate cancer. This domain is cleaved from the precursor and forms endostatin. Endostatin is a key tumour suppressor and has been used highly successfully to treat cancer. It is a potent angiogenesis inhibitor ( ). Endostatin also binds a zinc ion near the N-terminus; this is likely to be of structural rather than functional importance according to ( ).. +PF06483 Chitinase C
This ~170 aa region is found at the C-terminus of Pfam:PF00704.. +PF06484 Teneurin Intracellular Region
This family is found in the intracellular N-terminal region of the Teneurin family of proteins. These proteins are 'pair-rule' genes and are involved in tissue patterning, specifically probably neural patterning. The intracellular domain is cleaved in response to homophilic interaction of the extracellular domain, and translocates to the nucleus. Here it probably carries out to some transcriptional regulatory activity ( ). The length of this region and the conservation suggests that there may be two structural domains here (personal obs:C Yeats).. +PF06485 Protein of unknown function (DUF1092)
Pfam-B_14522 (release 10.0). This family consists of several hypothetical proteins of unknown function all from photosynthetic organisms including plants and cyanobacteria.. +PF06486 Protein of unknown function (DUF1093)
Pfam-B_15034 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06487 Sin3 associated polypeptide p18 (SAP18)
Pfam-B_15078 (release 10.0). This family consists of several eukaryotic Sin3 associated polypeptide p18 (SAP18) sequences. SAP18 is known to be a component of the Sin3-containing complex which is responsible for the repression of transcription via the modification of histone polypeptides . SAP18 is also present in the ASAP complex which is thought to be involved in the regulation of splicing during the execution of programmed cell death .. +PF06488 Lactococcus lactis bacteriophage major structural protein
Pfam-B_13945 (release 10.0). This family consists of several Lactococcus lactis bacteriophage major structural proteins.. +PF06489 Orthopoxvirus A49R protein
Pfam-B_14072 (release 10.0). This family consists of several Orthopoxvirus A49R proteins. The function of this family is unknown.. +PF06490 Flagellar regulatory protein FleQ
Pfam-B_13480 (release 9.0). This domain is found at the N terminus of a subset of sigma54-dependent transcriptional activators that are involved in regulation of flagellar motility e.g. FleQ in Pseudomonas aeruginosa. It is clearly related to Pfam:PF00072, but lacks the conserved aspartate residue that undergoes phosphorylation in the classic two-component system response regulator (Pfam:PF00072).. +PF06491 DUF1094;
Disulphide isomerase. Pfam-B_14101 (release 10.0). This family of proteins has disulphide isomerase activity, EC:5.3.4.1. It has a similar fold to thioredoxin, with an alpha-beta-alpha-beta-alpha-beta-beta-alpha topology. It has a conserved CGC motif in the loop immediately downstream of the first beta strand. This motif is essential for activity .. +PF06493 Protein of unknown function (DUF1096)
Pfam-B_15011 (release 10.0). This family represents the N-terminal region of several proteins found in C. elegans. The family is often found with Pfam:PF02363.. +PF06495 Fruit fly transformer protein
Pfam-B_13780 (release 10.0). This family consists of transformer proteins from several Drosophila species and also from Ceratitis capitata (Mediterranean fruit fly). The transformer locus (tra) produces an RNA processing protein that alternatively splices the doublesex pre-mRNA in the sex determination hierarchy of Drosophila melanogaster .. +PF06496 Protein of unknown function (DUF1097)
Pfam-B_15055 (release 10.0). This family consists of several bacterial putative membrane proteins.. +PF06497 Protein of unknown function (DUF1098)
Pfam-B_15446 (release 10.0). This family consists of several hypothetical Baculovirus proteins of unknown function.. +PF06500 Alpha/beta hydrolase of unknown function (DUF1100)
Pfam-B_15719 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function. Members of this family have an alpha/beta hydrolase fold.. +PF06501 Human herpesvirus U55 protein
Pfam-B_15779 (release 10.0). This family consists of several human herpesvirus U55 proteins. The function of this family is unknown.. +PF06502 Equine infectious anaemia virus S2 protein
Pfam-B_15780 (release 10.0). This family consists of several equine infectious anaemia virus S2 proteins. The function of this family is unknown.. +PF06503 Protein of unknown function (DUF1101)
Pfam-B_15836 (release 10.0). This family consists of several hypothetical Fijivirus proteins of unknown function.. +PF06504 Replication protein C (RepC)
Pfam-B_15903 (release 10.0). This family consists of several bacterial replication protein C (RepC) sequences.. +PF06505 Activator of aromatic catabolism
Pfam-B_2890 (release 9.0). This domain is found at the N terminus of a subset of sigma54-dependent transcriptional activators in several proteobacteria, including activators of phenol degradation such as XylR. It is found adjacent to Pfam:PF02830.. +PF06506 Propionate catabolism activator
Pfam-B_10794 (release 9.0). This domain is found at the N terminus of several sigma54- dependent transcriptional activators including PrpR, which activates catabolism of propionate.. +PF06507 Auxin response factor
Pfam-B_2015 (release 9.0). A conserved region of auxin-responsive transcription factors.. +PF06508 ExsB;
Queuosine biosynthesis protein QueC. Pfam-B_715 (release 9.0). This family of proteins participate in the biosynthesis of 7-carboxy-7-deazaguanine. They catalyse the conversion of 7-deaza-7-carboxyguanine to preQ0 [1-3].. +PF06510 Protein of unknown function (DUF1102)
Pfam-B_16043 (release 10.0). This family consists of several hypothetical archaeal proteins of unknown function.. +PF06511 Invasion plasmid antigen IpaD
Pfam-B_16150 (release 10.0). This family consists of several invasion plasmid antigen IpaD proteins. Entry of Shigella flexneri into epithelial cells and lysis of the phagosome involve the IpaB, IpaC, and IpaD proteins, which are secreted by type III secretion machinery.. +PF06512 Sodium ion transport-associated
Pfam-B_16808 (release 10.0). Members of this family contain a region found exclusively in eukaryotic sodium channels or their subunits, many of which are voltage-gated. Members very often also contain between one and four copies of Pfam:PF00520 and, less often, one copy of Pfam:PF00612.. +PF06513 Repeat of unknown function (DUF1103)
Pfam-B_16075 (release 10.0). This family consists of several repeats of around 30 residues in length which are found specifically in mature-parasite-infected erythrocyte surface antigen proteins from Plasmodium falciparum. This family often found in conjunction with Pfam:PF00226.. +PF06514 Photosystem II 12 kDa extrinsic protein (PsbU)
Pfam-B_13782 (release 10.0). This family consists of several photosystem II 12 kDa extrinsic protein (PsbU) proteins from cyanobacteria and algae. PsbU is an extrinsic protein of the photosystem II complex of cyanobacteria and red algae. PsbU is known to stabilise the oxygen-evolving machinery of the photosystem II complex against heat-induced inactivation . This family appears to be related to the Helix-hairpin-helix domain.. +PF06515 Borna disease virus P10 protein
Pfam-B_16237 (release 10.0). This family consists of several Borna disease virus P10 (or X) proteins. Borna disease virus (BDV) is unique among the non-segmented negative-strand RNA viruses of animals and man because it transcribes and replicates its genome in the nucleus of the infected cell. It has been suggested that the p10 protein plays a role in viral RNA synthesis or ribonucleoprotein transport .. +PF06516 Purine nucleoside permease (NUP)
Pfam-B_15961 (release 10.0). This family consists of several purine nucleoside permease from both bacteria and fungi .. +PF06517 Orthopoxvirus A43R protein
Pfam-B_16577 (release 10.0). This family consists of several Orthopoxvirus A43R proteins. The function of this family is unknown.. +PF06518 Protein of unknown function (DUF1104)
Pfam-B_16082 (release 10.0). This family consists of several hypothetical proteins of unknown function which appear to be found largely in Helicobacter pylori.. +PF06519 TolA C-terminal
Pfam-B_16081 (release 10.0). This family consists of several bacterial TolA proteins as well as two eukaryotic proteins of unknown function. Tol proteins are involved in the translocation of group A colicins. Colicins are bacterial protein toxins, which are active against Escherichia coli and other related species (See Pfam:PF01024). TolA is anchored to the cytoplasmic membrane by a single membrane spanning segment near the N-terminus, leaving most of the protein exposed to the periplasm .. +PF06521 PAR1 protein
Pfam-B_16232 (release 10.0). This family consists of several plant specific PAR1 proteins from Nicotiana tabacum and Arabidopsis thaliana. The function of this family is unknown.. +PF06522 NADH-ubiquinone reductase complex 1 MLRQ subunit
Pfam-B_16238 (release 10.0). The MLRQ subunit of mitochondrial NADH-ubiquinone reductase complex I is nuclear and is found in plants , insects, fungi and higher metazoans . It appears to act within the membrane and, in mammals, is highly expressed in muscle and neural tissue, indicative of a role in ATP generation .. +PF06523 Protein of unknown function (DUF1106)
Pfam-B_16281 (release 10.0). This family consists of several hypothetical bacterial proteins found in Escherichia coli and Citrobacter rodentium. The function of this family is unknown.. +PF06524 NOA36 protein
Pfam-B_16330 (release 10.0). This family consists of several NOA36 proteins which contain 29 highly conserved cysteine residues. The function of this protein is unknown.. +PF06525 Sulfocyanin (SoxE)
Pfam-B_16349 (release 10.0). This family consists of several archaeal sulfocyanin (or blue copper protein) sequences from a number of Sulfolobus species.. +PF06526 Protein of unknown function (DUF1107)
Pfam-B_16434 (release 10.0). This family consists of several short, hypothetical bacterial proteins of unknown function.. +PF06527 TniQ
Pfam-B_16755 (release 10.0). This family consists of several bacterial TniQ proteins. TniQ along with TniA and B is involved in the transposition of the mercury-resistance transposon Tn5053 which carries the mer operon. It has been suggested that the tni genes are involved in the dissemination of integrons .. +PF06528 Phage P2 GpE
Pfam-B_15359 (release 10.0). This family consists of several phage and bacterial proteins which are closely related to the GpE tail protein from Phage P2.. +PF06529 Vertebrate interleukin-3 regulated transcription factor
Pfam-B_16154 (release 10.0). This family includes vertebrate transcription factors, some of which are regulated by IL-3/adenovirus E4 promoter binding protein . Others were found to strongly repress transcription in a DNA-binding-site-dependent manner .. +PF06530 Phage antitermination protein Q
Pfam-B_3979 (release 10.0). This family consists of several phage antitermination protein Q and related bacterial sequences. Antiterminator proteins control gene expression by recognising control signals near the promoter and preventing transcriptional termination which would otherwise occur at sites that may be a long way downstream . . +PF06531 Protein of unknown function (DUF1108)
Pfam-B_16830 (release 10.0). This family consists of several bacterial proteins from Staphylococcus aureus as well as a number of phage proteins. The function of this family is unknown.. +PF06532 Protein of unknown function (DUF1109)
Pfam-B_17952 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06533 Protein of unknown function (DUF1110)
Pfam-B_18243 (release 10.0). This family consists of hypothetical proteins specific to Oryza sativa. One sequence (Swiss:Q943P1) appears to be tandemly repeated.. +PF06534 Repulsive guidance molecule (RGM) C-terminus
Pfam-B_18263 (release 10.0). This family consists of several mammalian and one bird sequence from Gallus gallus (Chicken). This family represents the C-terminal region of several sequences but in others it represents the full protein. All of the mammalian proteins are hypothetical and have no known function but Swiss:Q8JG54 from the chicken is annotated as being a repulsive guidance molecule (RGM). RGM is a GPI-linked axon guidance molecule of the retinotectal system. RGM is repulsive for a subset of axons, those from the temporal half of the retina. Temporal retinal axons invade the anterior optic tectum in a superficial layer, and encounter RGM expressed in a gradient with increasing concentration along the anterior-posterior axis. Temporal axons are able to receive posterior-dependent information by sensing gradients or concentrations of guidance cues. Thus, RGM is likely to provide positional information for temporal axons invading the optic tectum in the stratum opticum .. +PF06535 Repulsive guidance molecule (RGM) N-terminus
Pfam-B_18263 (release 10.0). This family consists of the N-terminal region of several mammalian and one bird sequence from Gallus gallus (Chicken). All of the mammalian proteins are hypothetical and have no known function but Swiss:Q8JG54 from the chicken is annotated as being a repulsive guidance molecule (RGM). RGM is a GPI-linked axon guidance molecule of the retinotectal system. RGM is repulsive for a subset of axons, those from the temporal half of the retina. Temporal retinal axons invade the anterior optic tectum in a superficial layer, and encounter RGM expressed in a gradient with increasing concentration along the anterior-posterior axis. Temporal axons are able to receive posterior-dependent information by sensing gradients or concentrations of guidance cues. Thus, RGM is likely to provide positional information for temporal axons invading the optic tectum in the stratum opticum .. +PF06536 Avian adenovirus fibre
Pfam-B_16053 (release 10.0). This family contains avian adenovirus fibre proteins, which have been linked to variations in virulence . Avian adenoviruses possess penton capsomers that consist of a pentameric base associated with two fibres .. +PF06537 Protein of unknown function (DUF1111)
Pfam-B_16636 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06540 Galanin message associated peptide (GMAP)
Pfam-B_16759 (release 10.0). This family consists of several galanin message associated peptides. In rat preprogalanin, galanin is C-terminally flanked by a 60 amino acid long peptide: galanin message-associated peptide (GMAP). GMAP sequences in different species show high degree of homology, but the biological function of this family is unknown .. +PF06541 Protein of unknown function (DUF1113)
Pfam-B_17933 (release 10.0). This family consists of several bacterial proteins of unknown function.. +PF06542 DUF1114;
Regulator protein PHA-1. Vella Briffa B, Sammut SJ, Pollington J. Pfam-B_16084 (release 10.0). This family represents the protein product of the gene pha-1 which coordinates with lin-35 Rb during animal development. The protein is expressed during embryonic development and functions in the cytoplasm. PHA-1 acts in a parallel pathway with UBC-18 to regulate the activity of a common cellular target .. +PF06543 Lactococcus bacteriophage repressor
Pfam-B_16088 (release 10.0). This family represents the C-terminus of Lactococcus bacteriophage repressor proteins.. +PF06544 Protein of unknown function (DUF1115)
Vella Briffa B, Sammut SJ. Pfam-B_16104 (release 10.0). This family represents the C-terminus of hypothetical eukaryotic proteins of unknown function.. +PF06545 Protein of unknown function (DUF1116)
Pfam-B_16143 (release 10.0). This family contains hypothetical bacterial proteins of unknown function.. +PF06546 Vertebrate heat shock transcription factor
Pfam-B_16244 (release 10.0). This family represents the C-terminal region of vertebrate heat shock transcription factors. Heat shock transcription factors regulate the expression of heat shock proteins - a set of proteins that protect the cell from damage caused by stress and aid the cell's recovery after the removal of stress . This C-terminal region is found with the N-terminal Pfam:PF00447, and may contain a three-stranded coiled-coil trimerisation domain and a CE2 regulatory region, the latter of which is involved in sustained heat shock response .. +PF06547 Protein of unknown function (DUF1117)
Pfam-B_16251 (release 10.0). This family represents the C-terminus of a number of hypothetical plant proteins.. +PF06548 Kinesin-related
Pfam-B_16517 (release 10.0). This family represents a region within kinesin-related proteins from higher plants. Many family members also contain the Pfam:PF00225 domain. Kinesins are ATP-driven microtubule motor proteins that produce directed force . Some family members are associated with the phragmoplast, a structure composed mainly of microtubules that executes cytokinesis in higher plants .. +PF06549 Protein of unknown function (DUF1118)
Pfam-B_17963 (release 10.0). This family consists of several hypothetical plant proteins of unknown function.. +PF06550 Protein of unknown function (DUF1119)
Pfam-B_17985 (release 10.0). This family consists of several hypothetical archaeal proteins of unknown function.. +PF06551 Protein of unknown function (DUF1120)
Pfam-B_17948 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06552 Plant specific mitochondrial import receptor subunit TOM20
Pfam-B_17991 (release 10.0). This family consists of several plant specific mitochondrial import receptor subunit TOM20 (translocase of outer membrane 20 kDa subunit) proteins. Most mitochondrial proteins are encoded by the nuclear genome, and are synthesised in the cytosol. TOM20 is a general import receptor that binds to mitochondrial pre-sequences in the early step of protein import into the mitochondria . . +PF06553 BNIP3
Pfam-B_18014 (release 10.0). This family consists of several mammalian specific BCL2/adenovirus E1B 19-kDa protein-interacting protein 3 or BNIP3 sequences. BNIP3 belongs to the Bcl-2 homology 3 (BH3)-only family, a Bcl-2-related family possessing an atypical Bcl-2 homology 3 (BH3) domain, which regulates PCD from mitochondrial sites by selective Bcl-2/Bcl-XL interactions. BNIP3 family members contain a C-terminal transmembrane domain that is required for their mitochondrial localisation, homodimerisation, as well as regulation of their pro-apoptotic activities. BNIP3-mediated apoptosis has been reported to be independent of caspase activation and cytochrome c release and is characterised by early plasma membrane and mitochondrial damage, prior to the appearance of chromatin condensation or DNA fragmentation .. +PF06554 Olfactory marker protein
Pfam-B_18049 (release 10.0). This family consists of several olfactory marker proteins. Expression of the olfactory marker protein (OMP) is highly restricted to mature olfactory receptor neurons in virtually all vertebrate species from fish to man.. +PF06556 IAP-like protein p27 C-terminus
Pfam-B_18169 (release 10.0). This family represents the C-terminal region of the African swine fever virus IAP-like protein p27. This family is found in conjunction with Pfam:PF00653. It has been suggested that the family may be a host range gene involved in aspects of infection in the arthropod host, ticks of the genus Ornithodoros .. +PF06557 Protein of unknown function (DUF1122)
Pfam-B_18183 (release 10.0). This family consists of several hypothetical archaeal and bacterial proteins of unknown function.. +PF06558 Secretion monitor precursor protein (SecM)
Pfam-B_18197 (release 10.0). This family consists of several bacterial Secretion monitor precursor (SecM) proteins. SecM is known to regulate SecA expression. The eubacterial protein secretion machinery consists of a number of soluble and membrane associated components. One critical element is SecA ATPase, which acts as a molecular motor to promote protein secretion at translocation sites that consist of SecYE, the SecA receptor, and SecG and SecDFyajC proteins, which regulate SecA membrane cycling .. +PF06559 2'-deoxycytidine 5'-triphosphate deaminase (DCD)
Pfam-B_18211 (release 10.0). This family consists of several bacterial 2'-deoxycytidine 5'-triphosphate deaminase proteins (EC:3.5.4.13).. +PF06560 Glucose-6-phosphate isomerase (GPI)
Pfam-B_18250 (release 10.0). This family consists of several bacterial and archaeal glucose-6-phosphate isomerase (GPI) proteins (EC:5.3.1.9).. +PF06563 Protein of unknown function (DUF1125)
Pfam-B_18065 (release 10.0). This family consists of several short Lactococcus lactis and bacteriophage proteins. The function of this family is unknown.. +PF06564 YhjQ protein
Pfam-B_18260 (release 10.0). This family consists of several bacterial YhjQ proteins. The function of this family is unknown. However, the family does contain a P-loop sequence motif suggesting a nucleotide binding function.. +PF06565 Repeat of unknown function (DUF1126)
Pfam-B_18695 (release 10.0). This family consists of several eukaryote specific repeats of around 35 residues in length. The function of this family is unknown.. +PF06566 Chondroitin sulphate attachment domain
Pfam-B_16515 (release 10.0). This family represents the chondroitin sulphate attachment domain of vertebrate neural transmembrane proteoglycans that contain EGF modules. Evidence has been accumulated to support the idea that neural proteoglycans are involved in various cellular events including mitogenesis, differentiation, axonal outgrowth and synaptogenesis . This domain contains several potential sites of chondroitin sulphate attachment, as well as potential sites of N-linked glycosylation .. +PF06567 Neural chondroitin sulphate proteoglycan cytoplasmic domain
Pfam-B_16515 (release 10.0). This family represents the C-terminal cytoplasmic domain of vertebrate neural chondroitin sulphate proteoglycans that contain EGF modules. Evidence has been accumulated to support the idea that neural proteoglycans are involved in various cellular events including mitogenesis, differentiation, axonal outgrowth and synaptogenesis . This domain contains a number of potential sites of phosphorylation by protein kinase C .. +PF06568 Domain of unknown function (DUF1127)
Pfam-B_18606 (release 10.0). This family is found in several hypothetical bacterial proteins. In some cases it represents it represents the C-terminal region whereas in others it represents the whole sequence.. +PF06569 Protein of unknown function (DUF1128)
Pfam-B_18651 (release 10.0). This family consists of several short, hypothetical bacterial proteins of unknown function.. +PF06570 Protein of unknown function (DUF1129)
Pfam-B_18737 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06572 Protein of unknown function (DUF1131)
Pfam-B_18811 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06573 Churchill protein
Pfam-B_19061 (release 10.0). This family consists of several eukaryotic Churchill proteins. This protein contains a novel zinc binding region that mediates FGF signaling during neural development (unpublished obs Sheng G and Stern C).. +PF06574 Flavokinase;
Moxon SJ, Mistry J, Eddy S. Pfam-B_18632 (release 10.0). This family corresponds to the N terminal domain of the bifunctional enzyme riboflavin kinase / FAD synthetase. These enzymes have both ATP:riboflavin 5'-phospho transferase and ATP:FMN-adenylyltransferase activity . They catalyse the 5'-phosphorylation of riboflavin to FMN and the adenylylation of FMN to FAD . This domain is thought to have the flavin mononucleotide (FMN) adenylyltransferase activity .. +PF06575 Protein of unknown function (DUF1132)
Pfam-B_19091 (release 10.0). This family consists of several hypothetical proteins from Neisseria meningitidis. The function of this family is unknown.. +PF06576 Protein of unknown function (DUF1133)
Pfam-B_19184 (release 10.0). This family consists of a number of hypothetical proteins from Escherichia coli O157:H7 and Salmonella typhi. The function of this family is unknown.. +PF06577 Protein of unknown function (DUF1134)
Pfam-B_19217 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06578 YOP proteins translocation protein K (YscK)
Pfam-B_19248 (release 10.0). This family consists of several YscK proteins. The function of this protein is unknown but it belongs to an operon involved in the secretion of Yop proteins across bacterial membranes.. +PF06579 Caenorhabditis elegans ly-6-related protein
Pfam-B_19267 (release 10.0). This family consists of several Caenorhabditis elegans specific ly-6-related HOT and ODR proteins. These proteins are involved in the olfactory system. Odr-2 mutants are known to be defective in the ability to chemotax to odorants that are recognised by the two AWC olfactory neurons. Odr-2 encodes a membrane-associated protein related to the Ly-6 superfamily of GPI-linked signaling proteins .. +PF06580 Histidine kinase
Pfam-B_794 (release 10.0). This family represents a region within bacterial histidine kinase enzymes. Two-component signal transduction systems such as those mediated by histidine kinase are integral parts of bacterial cellular regulatory processes, and are used to regulate the expression of genes involved in virulence . Members of this family often contain Pfam:PF02518 and/or Pfam:PF00672.. +PF06581 DUF1135;
Mad1 and Cdc20-bound-Mad2 binding. Pfam-B_19462 (release 10.0). This family is involved in the cell-cycle surveillance mechanism called the spindle checkpoint. This mechanism monitors the proper bipolar attachment of sister chromatids to spindle microtubules and ensures the fidelity of chromosome segregation during mitosis. A key player in mitosis is Mad2, and Mad2 exhibits an unusual two-state behaviour. A Mad1-Mad2 core complex recruits cytosolic Mad2 to kinetochores through Mad2 dimerisation and converts Mad2 to a conformer amenable to Cdc20 binding. p31comet inactivates the checkpoint by binding to Mad1- or Cdc20-bound Mad2 in such a way as to stop Mad2 activation and to promote the dissociation of the Mad2-Cdc20 complex .. +PF06582 Repeat of unknown function (DUF1136)
Pfam-B_19544 (release 10.0). This family consists of several eukaryote specific repeats of unknown function. This repeat seems to always be found with Pfam:PF00047.. +PF06583 Neogenin_C-term;
Pfam-B_16188 (release 10.0). This family represents the C-terminus of eukaryotic neogenin precursor proteins, which contains several potential phosphorylation sites . Neogenin is a member of the N-CAM family of cell adhesion molecules (and therefore contains multiple copies of Pfam:PF00047 and Pfam:PF00041) and is closely related to the DCC tumour suppressor gene product - these proteins may play an integral role in regulating differentiation programmes and/or cell migration events within many adult and embryonic tissues .. +PF06584 DIRP
Pfam-B_2017 (release 9.0). DIRP (Domain in Rb-related Pathway) is postulated to be involved in the Rb-related pathway, which is encoded by multiple eukaryotic genomes and is present in proteins including lin-9 of Caenorhabditis elegans, aly of fruit fly and mustard weed. Studies of lin-9 and aly of fruit fly proteins containing DIRP suggest that this domain might be involved in development. Aly, lin-9, act in parallel to, or downstream of, activation of MAPK by the RTK-Ras signalling pathway.. +PF06585 Haemolymph juvenile hormone binding protein (JHBP)
Pfam-B_19686 (release 10.0). This family consists of several insect-specific haemolymph juvenile hormone binding proteins (JHBP). Juvenile hormone regulates embryogenesis, maintains the status quo of larval development and stimulates reproductive maturation in the adult insect. JH is transported from the sites of its synthesis to target tissues by a haemolymph carrier called juvenile hormone-binding protein (JHBP). JHBP protects the JH molecules from hydrolysis by non-specific esterases present in the insect haemolymph . The crystal structure of the JHBP from Galleria mellonella shows an unusual fold consisting of a long alpha-helix wrapped in a much curved antiparallel beta-sheet. The folding pattern for this structure closely resembles that found in some tandem-repeat mammalian lipid-binding and bactericidal permeability-increasing proteins, with a similar organisation of the major cavity and a disulfide bond linking the long helix and the beta-sheet. It would appear that JHBP forms two cavities, only one of which, the one near the N- and C-termini, binds the hormone; binding induces a conformational change, of unknown significance . This family now includes DUF233, Pfam:PF03027.. +PF06586 TraK protein
Pfam-B_19687 (release 10.0). This family consists of several TraK proteins from Escherichia coli, Salmonella typhi and Salmonella typhimurium. TraK is known to be essential for pilus assembly but its exact role in this process is unknown .. +PF06587 Protein of unknown function (DUF1137)
Pfam-B_20097 (release 10.0). This family consists of several hypothetical proteins specific to Chlamydia species. The function of this family is unknown.. +PF06588 Muskelin N-terminus
Pfam-B_20299 (release 10.0). This family represents the N-terminal region of muskelin and is found in conjunction with several Pfam:PF01344 repeats. Muskelin is an intracellular, kelch repeat protein that is needed in cell-spreading responses to the matrix adhesion molecule, thrombospondin-1 .. +PF06589 Circumsporozoite-related antigen (CRA)
Pfam-B_19386 (release 10.0). This family consists of several circumsporozoite-related antigen (CRA) or exported protein-1 (EXP1) sequences found specifically in Plasmodium species. The function of this family is unknown.. +PF06590 PerB protein
Pfam-B_19494 (release 10.0). This family consists of several PerB or BfpV proteins found specifically in Escherichia coli. PerB is thought to play a role in regulating the expression of BfpA .. +PF06591 T4-like phage nuclear disruption protein (Ndd)
Pfam-B_19553 (release 10.0). This family consists of several nuclear disruption (Ndd) proteins from T4-like phages. Early in a bacteriophage T4 infection, the phage ndd gene causes the rapid destruction of the structure of the Escherichia coli nucleoid. The targets of Ndd action may be the chromosomal sequences that determine the structure of the nucleoid .. +PF06592 Protein of unknown function (DUF1138)
Pfam-B_19518 (release 10.0). This family consists of several hypothetical short plant proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown.. +PF06593 Raspberry bushy dwarf virus coat protein
Pfam-B_19630 (release 10.0). This family consists of several Raspberry bushy dwarf virus coat proteins.. +PF06594 HCBP_repeat;
Haemolysin-type calcium binding protein related domain. Pfam-B_20041 (release 10.0). This family consists of a number of bacteria specific domains which are found in haemolysin-type calcium binding proteins. This family is found in conjunction with Pfam:PF00353 and is often found in multiple copies.. +PF06595 Borna disease virus P24 protein
Pfam-B_20092 (release 10.0). This family consists of several Borna disease virus (BDV) P24 proteins. The function of this family is unknown.. +PF06596 Photosystem II reaction centre X protein (PsbX)
Pfam-B_20149 (release 10.0). This family consists of several photosystem II reaction centre X protein (PsbX) sequences from both prokaryotes and eukaryotes.. +PF06597 Clostridium P-47 protein
Pfam-B_20156 (release 10.0). This family consists of several P-47 proteins from various Clostridium species as well as two related sequences from Pseudomonas putida. The function of this family is unknown.. +PF06598 Chlorovirus glycoprotein repeat
Pfam-B_19883 (release 10.0). This family consists of s number of repeats found in Chlorovirus glycoproteins. The function of this family is unknown.. +PF06599 Protein of unknown function (DUF1139)
Pfam-B_20355 (release 10.0). This family consists of several hypothetical Fijivirus proteins of unknown function.. +PF06600 Protein of unknown function (DUF1140)
Pfam-B_20379 (release 10.0). This family consists of several short, hypothetical phage and bacterial proteins. The function of this family is unknown.. +PF06601 Orthopoxvirus F6 protein
Pfam-B_20433 (release 10.0). This family consists of several Orthopoxvirus F6L proteins the function of which are unknown.. +PF06602 Myotubularin-like phosphatase domain
Pfam-B_795 (release 10.0). This family represents the phosphatase domain within eukaryotic myotubularin-related proteins. Myotubularin is a dual-specific lipid phosphatase that dephosphorylates phosphatidylinositol 3-phosphate and phosphatidylinositol (3,5)-bi-phosphate . Mutations in gene encoding myotubularin-related proteins have been associated with disease .. +PF06603 DUF1141;
UpxZ family of transcription anti-terminator antagonists. Pfam-B_19606 (release 10.0). The UpxZ family of proteins acts to inhibit transcription of heterologous capsular polysaccharide loci in Bacteroides species by interfering with the action of the UpxY family of transcription anti-terminators. As antagonists of polysaccharide locus-specific UpxY transcription anti-terminators, the UpxZ proteins exert a hierarchical level of regulation, insuring that only one of the multiple phase-variable capsular polysaccharide loci per cell characteristic of this genus is transcribed at a time.. +PF06605 DUF1142;
Prophage endopeptidase tail. Pfam-B_16284 (release 10.0). This family is of prophage tail proteins that are probably acting as endopeptidases.. +PF06607 Prokineticin
Pfam-B_19802 (release 10.0). This family consists of several prokineticin proteins and related BM8 sequences. The suprachiasmatic nucleus (SCN) controls the circadian rhythm of physiological and behavioural processes in mammals. It has been shown that prokineticin 2 (PK2), a cysteine-rich secreted protein, functions as an output molecule from the SCN circadian clock. PK2 messenger RNA is rhythmically expressed in the SCN, and the phase of PK2 rhythm is responsive to light entrainment. Molecular and genetic studies have revealed that PK2 is a gene that is controlled by a circadian clock .. +PF06608 Protein of unknown function (DUF1143)
Pfam-B_19953 (release 10.0). This family consists of several hypothetical mammalian proteins (from mouse and human). The function of this family is unknown.. +PF06609 Fungal trichothecene efflux pump (TRI12)
Pfam-B_19969 (release 10.0). This family consists of several fungal specific trichothecene efflux pump proteins. Many of the genes involved in trichothecene toxin biosynthesis in Fusarium sporotrichioides are present within a gene cluster.It has been suggested that TRI12 may play a role in F. sporotrichioides self-protection against trichothecenes .. +PF06610 Protein of unknown function (DUF1144)
Pfam-B_20026 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06611 Protein of unknown function (DUF1145)
Pfam-B_20029 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06612 Protein of unknown function (DUF1146)
Pfam-B_20141 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06613 KorB C-terminal beta-barrel domain
Pfam-B_20369 (release 10.0). This family consists of several KorB transcriptional repressor proteins. The korB gene is a major regulatory element in the replication and maintenance of broad host-range plasmid RK2. It negatively controls the replication gene trfA, the host-lethal determinants kilA and kilB, and the korA-korB operon . This beta-barrel domain is found at the C-terminus of KorB.. +PF06614 Neuromodulin
Pfam-B_20438 (release 10.0). This family consists of several neuromodulin (Axonal membrane protein GAP-43) sequences and is found in conjunction with Pfam:PF00612. GAP-43 is a neuronal calmodulin-binding phosphoprotein that is concentrated in growth cones and pre-synaptic terminals .. +PF06615 Protein of unknown function (DUF1147)
Pfam-B_20563 (release 10.0). This family consists of several short Circovirus proteins of unknown function.. +PF06616 BsuBI/PstI restriction endonuclease C-terminus
Pfam-B_16289 (release 10.0). This family represents the C-terminus of bacterial enzymes similar to type II restriction endonucleases BsuBI and PstI (EC:3.1.21.4). The enzymes of the BsuBI restriction/modification (R/M) system recognise the target sequence 5'CTGCAG and are functionally identical with those of the PstI R/M system .. +PF06617 M-phase inducer phosphatase
Pfam-B_16267 (release 10.0). This family represents a region within eukaryotic M-phase inducer phosphatases (EC:3.1.3.48), which also contain the Pfam:PF00581 domain. These proteins are involved in the control of mitosis .. +PF06618 Protein of unknown function (DUF1148)
Pfam-B_20595 (release 10.0). This family consists of several Maize streak virus proteins of unknown function.. +PF06619 Protein of unknown function (DUF1149)
Pfam-B_20513 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06620 Protein of unknown function (DUF1150)
Pfam-B_20612 (release 10.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF06621 Single-minded protein C-terminus
Pfam-B_21144 (release 10.0). This family represents the C-terminal region of the eukaryotic single-minded (SIM) protein. Drosophila single-minded acts as a positive master gene regulator in central nervous system midline formation. There are two homologues in mammals: SIM1 and SIM2, which are members of the basic-helix-loop-helix PAS family of transcription factors. SIM1 and SIM2 are novel heterodimerisation partners for ARNT in vitro, and they may function both as positive and negative transcriptional regulators in vivo, during embryogenesis and in the adult organism . SIM2 is thought to contribute to some specific Down syndrome phenotypes . This family is found in conjunction with a Pfam:PF00989 domain and associated Pfam:PF00785 motif.. +PF06622 SepQ protein
Pfam-B_21060 (release 10.0). This family consists of several enterobacterial SepQ proteins from Escherichia coli and Citrobacter rodentium. The function of this family is unclear.. +PF06623 MHC_I C-terminus
Pfam-B_21327 (release 10.0). This family represents the C-terminal region of the MHC class I antigen. The family is found in conjunction with Pfam:PF00129 and Pfam:PF00047.. +PF06624 Ribosome associated membrane protein RAMP4
Pfam-B_20959 (release 10.0). This family consists of several ribosome associated membrane protein RAMP4 (or SERP1) sequences. Stabilisation of membrane proteins in response to stress involves the concerted action of a rescue unit in the ER membrane comprised of SERP1/RAMP4, other components of the translocon, and molecular chaperones in the ER .. +PF06625 Protein of unknown function (DUF1151)
Pfam-B_21020 (release 10.0). This family consists of several hypothetical eukaryotic proteins of unknown function.. +PF06626 Protein of unknown function (DUF1152)
Pfam-B_21029 (release 10.0). This family consists of several hypothetical archaeal proteins of unknown function.. +PF06627 Protein of unknown function (DUF1153)
Pfam-B_21038 (release 10.0). This family consists of several short, hypothetical bacterial proteins of unknown function.. +PF06628 Catalase-related immune-responsive
Vella Briffa B, Coggill P. Pfam-B_16304 (release 10.0). This family represents a small conserved region within catalase enzymes (EC:1.11.1.6). All members also contain the Catalase family, Pfam:PF00199 domain. Catalase decomposes hydrogen peroxide into water and oxygen, serving to protect cells from its toxic effects . This domain carries the immune-responsive amphipathic octa-peptide that is recognised by T cells .. +PF06629 MltA-interacting protein MipA
Pfam-B_8359 (release 9.0). This family consists of several bacterial MltA-interacting protein (MipA) like sequences. As well as interacting with the membrane-bound lytic transglycosylase MltA, MipA is known to bind to PBP1B, a bifunctional murein transglycosylase/transpeptidase. MipA is considered to be a structural protein mediating the assembly of MltA to PBP1B into a complex .. +PF06630 Enterobacterial exodeoxyribonuclease VIII
Pfam-B_11449 (release 9.0). This family consists of several Enterobacterial exodeoxyribonuclease VIII proteins.. +PF06631 Protein of unknown function (DUF1154)
Pfam-B_16329 (release 10.0). This family represents a small conserved region of unknown function within eukaryotic phospholipase C (EC:3.1.4.3). All members also contain Pfam:PF00387 and Pfam:PF00388.. +PF06632 DNA double-strand break repair and V(D)J recombination protein XRCC4
Pfam-B_21077 (release 10.0). This family consists of several eukaryotic DNA double-strand break repair and V(D)J recombination protein XRCC4 sequences. In the non-homologous end joining pathway of DNA double-strand break repair, the ligation step is catalysed by a complex of XRCC4 and DNA ligase IV. It is thought that XRCC4 and ligase IV are essential for alignment-based gap filling, as well as for final ligation of the breaks .. +PF06633 Protein of unknown function (DUF1155)
Pfam-B_21101 (release 10.0). This family consists of several Cucumber mosaic virus ORF IIB proteins. The function of this family is unknown.. +PF06634 Protein of unknown function (DUF1156)
Vella Briffa B, Eberhardt R. Pfam-B_16387 (release 10.0). This family represents a conserved region within hypothetical prokaryotic and archaeal proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids .. +PF06635 Nodulation protein NolV
Pfam-B_21143 (release 10.0). This family consists of several nodulation protein NolV sequences from different Rhizobium species . The function of this family is unclear.. +PF06636 Protein of unknown function (DUF1157)
Pfam-B_21250 (release 10.0). This family consists of several uncharacterised proteins from Melanoplus sanguinipes entomopoxvirus (MsEPV). The function of this family is unknown.. +PF06637 PV-1 protein (PLVAP)
Pfam-B_21397 (release 10.0). This family consists of several PV-1 (PLVAP) proteins which seem to be specific to mammals. PV-1 is a novel protein component of the endothelial fenestral and stomatal diaphragms . The function of this family is unknown.. +PF06638 Strabismus protein
Pfam-B_9849 (release 9.0). This family consists of several strabismus (STB) or Van Gogh-like (VANGL) proteins 1 and 2. The exact function of this family is unknown. It is thought, however that STB1 gene and STB2 may be potent tumour suppressor gene candidates .. +PF06639 Basal layer antifungal peptide (BAP)
Pfam-B_21444 (release 10.0). This family consists of several basal layer antifungal peptide (BAP) sequences specific to Zea mays. The BAP2 peptide exhibits potent broad-range activity against a range of filamentous fungi, including several plant pathogens . . +PF06640 P_prot_C-term;
P protein C-terminus. Pfam-B_16385 (release 10.0). This family represents the C-terminus of plant P proteins. The maize P gene is a transcriptional regulator of genes encoding enzymes for flavonoid biosynthesis in the pathway leading to the production of a red phlobaphene pigment , and P proteins are homologous to the DNA-binding domain of myb-like transcription factors . All members of this family contain the Pfam:PF00249 domain.. +PF06643 Protein of unknown function (DUF1158)
Pfam-B_21508 (release 10.0). This family consists of several enterobacterial YbdJ proteins. The function of this family is unknown. +PF06644 ATP11 protein
Moxon SJ, Hammonds G. Pfam-B_21093 (release 10.0). This family consists of several eukaryotic ATP11 proteins. In Saccharomyces cerevisiae, expression of functional F1-ATPase requires two proteins encoded by the ATP11 and ATP12 genes . Atp11p is a molecular chaperone of the mitochondrial matrix that participates in the biogenesis pathway to form F1, the catalytic unit of the ATP synthase .. +PF06645 Microsomal signal peptidase 12 kDa subunit (SPC12)
Pfam-B_21331 (release 10.0). This family consists of several microsomal signal peptidase 12 kDa subunit proteins. Translocation of polypeptide chains across the endoplasmic reticulum (ER) membrane is triggered by signal sequences. Subsequently, signal recognition particle interacts with its membrane receptor and the ribosome-bound nascent chain is targeted to the ER where it is transferred into a protein-conducting channel. At some point, a second signal sequence recognition event takes place in the membrane and translocation of the nascent chain through the membrane occurs. The signal sequence of most secretory and membrane proteins is cleaved off at this stage. Cleavage occurs by the signal peptidase complex (SPC) as soon as the lumenal domain of the translocating polypeptide is large enough to expose its cleavage site to the enzyme. The signal peptidase complex is possibly also involved in proteolytic events in the ER membrane other than the processing of the signal sequence, for example the further digestion of the cleaved signal peptide or the degradation of membrane proteins. Mammalian signal peptidase is as a complex of five different polypeptide chains. This family represents the 12 kDa subunit (SPC12).. +PF06646 High affinity transport system protein p37
Pfam-B_21350 (release 10.0). This family consists of several high affinity transport system protein p37 sequences which are specific to Mycoplasma species. The p37 gene is part of an operon encoding two additional proteins which are highly similar to components of the periplasmic binding-protein-dependent transport systems of Gram-negative bacteria.It has been suggested that p37 is part of a homologous, high-affinity transport system in M. hyorhinis, a Gram-positive bacterium .. +PF06648 Protein of unknown function (DUF1160)
Pfam-B_21501 (release 10.0). This family consists of several hypothetical Baculovirus proteins of unknown function.. +PF06649 Protein of unknown function (DUF1161)
Pfam-B_21545 (release 10.0). This family consists of several short, hypothetical bacterial proteins of unknown function.. +PF06650 Protein of unknown function (DUF1162)
Pfam-B_16458 (release 10.0). This family represents a conserved region within several hypothetical eukaryotic proteins. Family members might be vacuolar protein sorting related-proteins. . +PF06651 Protein of unknown function (DUF1163)
Pfam-B_16509 (release 10.0). This family represents the C-terminus of hypothetical Arabidopsis thaliana proteins of unknown function. . +PF06652 Methuselah N-terminus
Pfam-B_3337 (release 10.0). This family represents the N-terminal region of the Drosophila specific Methuselah protein. Drosophila Methuselah (Mth) mutants have a 35% increase in average lifespan and increased resistance to several forms of stress, including heat, starvation, and oxidative damage. The protein affected by this mutation is related to G protein-coupled receptors of the secretin receptor family. Mth, like secretin receptor family members, has a large N-terminal ectodomain, which may constitute the ligand binding site . This family is found in conjunction with Pfam:PF00002.. +PF06653 DUF1164;
Tight junction protein, Claudin-like. Pfam-B_21553 (release 10.0). This is a family of probable membrane tight junction, Claudin-like, proteins.. +PF06656 Tenuivirus PVC2 protein
Pfam-B_15006 (release 10.0). This family consists of several Tenuivirus PVC2 proteins from Rice grassy stunt virus, Maize stripe virus and Rice hoja blanca virus. The function of this family is unknown.. +PF06657 DUF1167;
Centrosome microtubule-binding domain of Cep57. Pfam-B_9878 (release 9.0). This C-terminal region of Cep57 binds, nucleates and bundles microtubules. The N-terminal part, family Cep57_CLD, Pfam:PF14073, is the centrosome localisation domain Cep57 .. +PF06658 Protein of unknown function (DUF1168)
Pfam-B_9807 (release 9.0). This family consists of several hypothetical eukaryotic proteins of unknown function. . +PF06661 VirE3
Pfam-B_16550 (release 10.0). This family represents a conserved region within Agrobacterium tumefaciens VirE3. Agrobacterium tumefaciens (a plant pathogen) has a tumour-inducing (Ti) plasmid of which part, the transfer (T)-region, is transferred to plant cells during the infection process. Vir proteins mediate the processing of the T-region and the transfer of a single-stranded (ss) DNA copy of this region, the T-strand, into the recipient cells. VirE3 is a translocated effector protein, but its specific role has not been established .. +PF06662 C5-epim_C-term;
D-glucuronyl C5-epimerase C-terminus. Pfam-B_16571 (release 10.0). This family represents the C-terminus of D-glucuronyl C5-epimerase (EC:5.1.3.-). Glucuronyl C5-epimerases catalyse the conversion of D-glucuronic acid (GlcUA) to L-iduronic acid (IdceA) units during the biosynthesis of glycosaminoglycans .. +PF06663 Protein of unknown function (DUF1170)
Pfam-B_16582 (release 10.0). This family represents a conserved region of unknown function within MAGUIN, a neuronal membrane-associated guanylate kinase-interacting protein. This region is situated between the Pfam:PF00595 and Pfam:PF00169 domains . All family members also contain an N-terminal Pfam:PF00536 domain.. +PF06664 DUF1171; MIG-14_Ce;
Wnt-binding factor required for Wnt secretion. Vella Briffa B, Pollington JE. Pfam-B_16593 (release 10.0). MIG-14 is a Wnt-binding factor. Newly synthesised EGL-20/Wnt binds to MIG-14 in the Golgi, targetting the Wnt to the cell membrane for secretion. AP-2-mediated endocytosis and retromer retrieval at the sorting endosome would recycle MIG-14 to the Golgi, where it can bind to EGL-20/Wnt for next cycle of secretion .. +PF06666 Protein of unknown function (DUF1173)
Pfam-B_16904 (release 10.0). This family contains a group of hypothetical bacterial proteins that contain three conserved cysteine residues towards the N-terminal. The function of these proteins is unknown.. +PF06667 Phage shock protein B
Pfam-B_21806 (release 10.0). This family consists of several bacterial phage shock protein B (PspB) sequences. The phage shock protein (psp) operon is induced in response to heat, ethanol, osmotic shock and infection by filamentous bacteriophages . Expression of the operon requires the alternative sigma factor sigma54 and the transcriptional activator PspF. In addition, PspA plays a negative regulatory role, and the integral-membrane proteins PspB and PspC play a positive one .. +PF06668 ITI_HC_C-term;
Inter-alpha-trypsin inhibitor heavy chain C-terminus. Pfam-B_16674 (release 10.0). This family represents the C-terminal region of inter-alpha-trypsin inhibitor heavy chains. Inter-alpha-trypsin inhibitors are glycoproteins with a high inhibitory activity against trypsin, built up from different combinations of four polypeptides: bikunin and the three heavy chains that belong to this family (HC1, HC2, HC3). The heavy chains do not have any protease inhibitory properties but have the capacity to interact in vitro and in vivo with hyaluronic acid, which promotes the stability of the extra-cellular matrix . All family members contain the Pfam:PF00092 domain.. +PF06669 Xylella fastidiosa surface protein related
Pfam-B_21796 (release 10.0). This family consists of several Xylella fastidiosa surface protein specific repeats which are found in found in conjunction with Pfam:PF05662, Pfam:PF05658 and Pfam:PF03895.. +PF06670 Microneme protein Etmic-2
Pfam-B_22009 (release 10.0). This family consists of several Microneme protein Etmic-2 sequences from Eimeria tenella. Etmic-2 is a 50 kDa acidic protein, which is found within the microneme organelles of Eimeria tenella sporozoites and merozoites .. +PF06671 Repeat of unknown function (DUF1174)
Pfam-B_22180 (release 10.0). This family consists of a number of Caenorhabditis elegans specific repeats of around 36 residues in length which are found in two hypothetical proteins. This family is found in conjunction with Pfam:PF00024.. +PF06672 Protein of unknown function (DUF1175)
Pfam-B_21722 (release 10.0). This family consists of several hypothetical bacterial proteins of around 210 residues in length. The function of this family is unknown.. +PF06673 Lactococcus lactis bacteriophage major capsid protein
Pfam-B_21754 (release 10.0). This family consists of several Lactococcus lactis bacteriophage major capsid proteins.. +PF06674 Protein of unknown function (DUF1176)
Pfam-B_21791 (release 10.0). This family consists of several hypothetical bacterial proteins of around 340 residues in length. Members of this family contain six highly conserved cysteine residues. The function of this family is unknown.. +PF06675 Protein of unknown function (DUF1177)
Pfam-B_21818 (release 10.0). This family consists of several hypothetical archaeal and and bacterial proteins of around 300 residues in length. The function of this family is unknown.. +PF06676 Protein of unknown function (DUF1178)
Pfam-B_21872 (release 10.0). This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown.. +PF06677 Sjogren's syndrome/scleroderma autoantigen 1 (Autoantigen p27)
Pfam-B_21881 (release 10.0). This family consists of several Sjogren's syndrome/scleroderma autoantigen 1 (Autoantigen p27) sequences. It is thought that the potential association of anti-p27 with anti-centromere antibodies suggests that autoantigen p27 might play a role in mitosis .. +PF06678 Protein of unknown function (DUF1179)
Pfam-B_21899 (release 10.0). This family consists of several hypothetical Caenorhabditis elegans proteins of around 106 residues in length. The function of the family is unknown.. +PF06679 Protein of unknown function (DUF1180)
Pfam-B_21907 (release 10.0). This family consists of several hypothetical mammalian proteins of around 190 residues in length. The function of this family is unknown.. +PF06680 Protein of unknown function (DUF1181)
Pfam-B_21912 (release 10.0). This family consists of several hypothetical proteins of around 120 residues in length which are found specifically in Trypanosoma brucei. The function of this family is unknown.. +PF06681 Protein of unknown function (DUF1182)
Pfam-B_21992 (release 10.0). This family consists of several hypothetical proteins of around 360 residues in length and seems to be specific to Caenorhabditis elegans. The function of this family is unknown.. +PF06682 Protein of unknown function (DUF1183)
Pfam-B_22014 (release 10.0). This family consists of several eukaryotic proteins of around 360 residues in length. The function of this family is unknown.. +PF06683 Protein of unknown function (DUF1184)
Pfam-B_16725 (release 10.0). This family contains a number of hypothetical proteins of unknown function from Arabidopsis thaliana.. +PF06684 DUF1185;
Amino acid synthesis. Moxon SJ, Eberhardt R. Pfam-B_22398 (release 10.0). This family of proteins is structurally similar to proteins with the Bacillus chorismate mutase-like (BCM-like) fold. This structure, combined with its genomic context, suggest that it has a role in amino acid synthesis .. +PF06685 Protein of unknown function (DUF1186)
Pfam-B_22662 (release 10.0). This family consists of several hypothetical bacterial proteins of around 250 residues in length and is found in several Chlamydia and Anabaena species. The function of this family is unknown.. +PF06686 Stage III sporulation protein AC/AD protein family
Pfam-B_22771 (release 10.0), TIGRFAMs. This family consists of several bacterial stage III sporulation protein AC (SpoIIIAC) and SpoIIIAD sequences. The exact function of this family is unknown. SpoIIIAD is the an uncharacterised protein which is part of the spoIIIA operon that acts at sporulation stage III as part of a cascade of events leading to endospore formation. The operon is regulated by sigmaG .. +PF06687 SUR7/PalI family
Pfam-B_22775 (release 10.0). This family consists of several fungal-specific SUR7 proteins. Its activity regulates expression of RVS161, a homologue of human endophilin, suggesting a function for both in endocytosis [1,2]. The protein carries four transmembrane domains and is thus likely to act as an anchoring protein for the eisosome to the plasma membrane. Eisosomes are the immobile protein complexes, that include the proteins Pil1 and Lsp1, which co-localise with sites of protein and lipid endocytosis at the plasma membrane. SUR7 protein may play a role in sporulation . This family also includes PalI which is part of a pH signal transduction cascade. Based on the similarity of PalI to the yeast Rim9 meiotic signal transduction component it has been suggested that PalI might be a membrane sensor for ambient pH .. +PF06688 Protein of unknown function (DUF1187)
Pfam-B_22781 (release 10.0). This family consists of several short, hypothetical bacterial proteins of around 62 residues in length. Members of this family are found in Escherichia coli and Salmonella typhi. The function of this family is unknown.. +PF06689 ClpX C4-type zinc finger
Pfam-B_465 (release 10.0). The ClpX heat shock protein of Escherichia coli is a member of the universally conserved Hsp100 family of proteins, and possesses a putative zinc finger motif of the C4 type. This presumed zinc binding domain is found at the N-terminus of the ClpX protein. ClpX is an ATPase which functions both as a substrate specificity component of the ClpXP protease and as a molecular chaperone. The molecular function of this domain is now known.. +PF06690 Protein of unknown function (DUF1188)
Pfam-B_22902 (release 10.0). This family consists of several hypothetical archaeal proteins of around 260 residues in length which seem to be specific to Methanobacterium, Methanococcus and Methanopyrus species. The function of this family is unknown.. +PF06691 Protein of unknown function (DUF1189)
Pfam-B_22923 (release 10.0). This family consists of several hypothetical bacterial proteins of around 260 residues in length. The function of this family is unknown.. +PF06692 Melon necrotic spot virus P7B protein
Pfam-B_22334 (release 10.0). This family consists of several Melon necrotic spot virus (MNSV) P7B proteins. The function of this family is unknown.. +PF06693 Protein of unknown function (DUF1190)
Pfam-B_22972 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 212 residues in length and is known as YjfM in Escherichia coli. The function of this family is unknown.. +PF06694 Plant nuclear matrix protein 1 (NMP1)
Pfam-B_22388 (release 10.0). This family consists of several plant specific nuclear matrix protein 1 (NMP1) sequences. Nuclear Matrix Protein 1 is a ubiquitously expressed 36 kDa protein, which has no homologues in animals and fungi, but is highly conserved among flowering and non-flowering plants. NMP1 is located both in the cytoplasm and nucleus and that the nuclear fraction is associated with the nuclear matrix. NMP1 is a candidate for a plant-specific structural protein with a function both in the nucleus and cytoplasm . . +PF06695 sm_multidrug_ex;
Putative small multi-drug export protein. Pfam-B_16747 (release 10.0). This family contains a small number of putative small multi-drug export proteins.. +PF06696 Streptococcal surface antigen repeat
Pfam-B_22674 (release 10.0). This family consists of a number of ~25 residue long repeats found commonly in Streptococcal surface antigens although one copy is present in the HPSR2-heavy chain potential motor protein of Giardia lamblia (Swiss:Q24984). This family is often found in conjunction with Pfam:PF00746.. +PF06697 Protein of unknown function (DUF1191)
Pfam-B_16754 (release 10.0). This family contains hypothetical plant proteins of unknown function.. +PF06698 Protein of unknown function (DUF1192)
Pfam-B_22780 (release 10.0). This family consists of several short, hypothetical, bacterial proteins of around 60 residues in length. The function of this family is unknown.. +PF06699 GPI biosynthesis protein family Pig-F
Pfam-B_8602 (release 8.0). PIG-F is involved in glycosylphosphatidylinositol (GPI) anchor biosynthesis [1-3].. +PF06701 Mib_herc2
Pfam-B_6026 (release 8.0). Named "mib/herc2 domain" in . Usually the protein also contains an E3 ligase domain (either Ring or Hect). . +PF06702 Protein of unknown function (DUF1193)
Pfam-B_16766 (release 10.0). This family represents the C-terminus of several hypothetical eukaryotic proteins of unknown function. Family members contain two conserved motifs: DRHHYE and QCC, as well as a number of conserved cysteine residues.. +PF06703 Microsomal signal peptidase 25 kDa subunit (SPC25)
Pfam-B_22374 (release 10.0). This family consists of several microsomal signal peptidase 25 kDa subunit proteins. Translocation of polypeptide chains across the endoplasmic reticulum (ER) membrane is triggered by signal sequences. Subsequently, signal recognition particle interacts with its membrane receptor and the ribosome-bound nascent chain is targeted to the ER where it is transferred into a protein-conducting channel. At some point, a second signal sequence recognition event takes place in the membrane and translocation of the nascent chain through the membrane occurs. The signal sequence of most secretory and membrane proteins is cleaved off at this stage. Cleavage occurs by the signal peptidase complex (SPC) as soon as the lumenal domain of the translocating polypeptide is large enough to expose its cleavage site to the enzyme. The signal peptidase complex is possibly also involved in proteolytic events in the ER membrane other than the processing of the signal sequence, for example the further digestion of the cleaved signal peptide or the degradation of membrane proteins. Mammalian signal peptidase is as a complex of five different polypeptide chains. This family represents the 25 kDa subunit (SPC25).. +PF06705 SF-assemblin/beta giardin
Pfam-B_22934 (release 10.0). This family consists of several eukaryotic SF-assemblin and related beta giardin proteins. During mitosis the SF-assemblin-based cytoskeleton is reorganised; it divides in prophase and is reduced to two dot-like structures at each spindle pole in metaphase. During anaphase, the two dots present at each pole are connected again. In telophase there is an asymmetrical outgrowth of new fibres. It has been suggested that SF-assemblin is involved in re-establishing the microtubular root system characteristic of interphase cells after mitosis . . +PF06706 Citrus tristeza virus 6-kDa protein
Pfam-B_22973 (release 10.0). This family consists of several Citrus tristeza virus (CTV) 6-kDa, 51 residue long hydrophobic (P6) proteins. The function of this family is unknown.. +PF06707 Protein of unknown function (DUF1194)
Pfam-B_22986 (release 10.0). This family consists of several hypothetical Rhizobiales specific proteins of around 270 residues in length. The function of this family is unknown.. +PF06708 Protein of unknown function (DUF1195)
Pfam-B_22828 (release 10.0). This family consists of several plant specific hypothetical proteins of around 160 residues in length. The function of this family is unknown.. +PF06709 Protein of unknown function (DUF1196)
Pfam-B_23162 (release 10.0). This family consists of several hypothetical bacterial proteins of around 51 residues in length which seem to be specific to Vibrio cholerae. The function of this family is unknown.. +PF06711 Protein of unknown function (DUF1198)
Pfam-B_23016 (release 10.0). This family consists of several bacterial proteins of around 150 residues in length which are specific to Escherichia coli, Salmonella species and Yersinia pestis. The function of this family is unknown.. +PF06712 Protein of unknown function (DUF1199)
Pfam-B_23160 (release 10.0). This family consists of several hypothetical Feline immunodeficiency virus (FIV) proteins. Members of this family are typically around 67 residues long and are often annotated as ORF3 proteins. The function of this family is unknown.. +PF06713 DUF1200;
Pfam-B_23245 (release 10.0). This family consists of several hypothetical proteins specific to Oceanobacillus and Bacillus species. Members of this family are typically around 130 residues in length. The function of this family is unknown. Members of this family have a PH domain like structure .. +PF06714 Gp5 N-terminal OB domain
This domain is found at the N terminus of the Gp5 baseplate protein of bacteriophage T4. This domain binds to the Gp27 protein . This domain has the common OB fold .. +PF06715 Gp5 C-terminal repeat (3 copies)
This repeat composes the C-terminal part of the bacteriophage T4 baseplate protein Gp5. This region of the protein forms a needle like projection from the baseplate that is presumed to puncture the bacterial cell membrane. Structurally three copies of the repeated region trimerise to form a beta solenoid type structure . This family also includes repeats from bacterial Vgr proteins.. +PF06716 Protein of unknown function (DUF1201)
Pfam-B_23269 (release 10.0). This family consists of several Sugar beet yellow virus (SBYV) putative membrane-binding proteins of around 54 residues in length. The function of this family is unknown.. +PF06717 Protein of unknown function (DUF1202)
Pfam-B_23300 (release 10.0). This family consists of several hypothetical bacterial proteins of around 335 residues in length. Members of this family are found exclusively in Escherichia coli and Salmonella species and are often referred to as YggM proteins. The function of this family is unknown.. +PF06718 Protein of unknown function (DUF1203)
Pfam-B_23313 (release 10.0). This family consists of several hypothetical bacterial proteins of around 155 residues in length. Family members are present in Rhizobium, Agrobacterium and Streptomyces species.. +PF06719 AraC_N-term;
AraC-type transcriptional regulator N-terminus. Pfam-B_16798 (release 10.0). This family represents the N-terminus of bacterial ARAC-type transcriptional regulators. In E. coli, these regulate the L-arabinose operon through sensing the presence of arabinose, and when the sugar is present, transmitting this information from the arabinose-binding domains to the protein's DNA-binding domains . This family might represent the N-terminal arm of the protein, which binds to the C-terminal DNA binding domains to hold them in a state where the protein prefers to loop and remain non-activating . All family members contain the Pfam:PF00165 domain.. +PF06720 Bacteriophage phi-29 early protein GP16.7
Pfam-B_23362 (release 10.0). This family consists of several bacteriophage phi-29 early protein GP16.7 sequences of around 130 residues in length. The function of this family is unknown.. +PF06721 Protein of unknown function (DUF1204)
Pfam-B_16832 (release 10.0). This family represents the C-terminus of a number of Arabidopsis thaliana hypothetical proteins of unknown function. Family members contain a conserved DFD motif.. +PF06722 Protein of unknown function (DUF1205)
Pfam-B_16780 (release 10.0). This family represents a conserved region of unknown function within bacterial glycosyl transferases. Many family members contain Pfam:PF03033.. +PF06723 MreB/Mbl protein
Pfam-B_471 (release 10.0). This family consists of bacterial MreB and Mbl proteins as well as two related archaeal sequences. MreB is known to be a rod shape-determining protein in bacteria and goes to make up the bacterial cytoskeleton. Genes coding for MreB/Mbl are only found in elongated bacteria, not in coccoid forms. It has been speculated that constituents of the eukaryotic cytoskeleton (tubulin, actin) may have evolved from prokaryotic precursor proteins closely related to today's bacterial proteins FtsZ and MreB/Mbl .. +PF06724 Domain of Unknown Function (DUF1206)
This region consists of two a pair of transmembrane helices and occurs three times in each of the family member proteins.. +PF06725 3D domain
This short presumed domain contains three conserved aspartate residues, hence the name 3D. It has been shown to be part of the catalytic double psi beta barrel domain of MltA .. +PF06726 Bladder cancer-related protein BC10
Wood V, Moxon SJ, Coggill PC. This family consists of a series of short proteins of around 90 residues in length. The human protein Swiss:O60629 or BC10 has been implicated in bladder cancer where the transcription of the gene coding for this protein is nearly completely abolished in highly invasive transitional cell carcinomas (TCCs) . The protein is a small globular protein containing two transmembrane helices, and it is a multiply edited transcript. All the editing sites are found in either the 5'-UTR or the N-terminal section of the protein, which is predicted to be outside the membrane. The three coding edits are all non-synonymous and predicted to encode exposed residues . The function of this family is unknown.. +PF06727 Protein of unknown function (DUF1207)
Pfam-B_23454 (release 10.0). This family consists of a number of hypothetical bacterial proteins of around 410 residues in length which seem to be specific to Chlamydia species. The function of this family is unknown.. +PF06728 GPI transamidase subunit PIG-U
Pfam-B_7677 (release 9.0). Many eukaryotic proteins are anchored to the cell surface via glycosylphosphatidylinositol (GPI), which is posttranslationally attached to the carboxyl-terminus by GPI transamidase. The mammalian GPI transamidase is a complex of at least four subunits, GPI8, GAA1, PIG-S, and PIG-T. PIG-U is thought to represent a fifth subunit in this complex and may be involved in the recognition of either the GPI attachment signal or the lipid portion of GPI . . +PF06729 NRIF3;
Kinetochore component, CENP-R. Pfam-B_23665 (release 10.0). This family consists of mammalian kinetochore sub-complex proteins CENP-R, also referred to as nuclear receptor co-activator NRIF3 proteins. NRIF3 exhibits a distinct receptor specificity in interacting with and potentiating the activity of only TRs and RXRs but not other examined nuclear receptors. NRIF3 as a co-regulator that possesses both transactivation and transrepression domains and/or functions. Collectively, the NRIF3 family of co-regulators may play dual roles in mediating both positive and negative regulatory effects on gene expression . CENP-R is one of the 15 components that make up the constitutive centromere associated complex (CCAN) part of the kinetochore. A sub-complex of CCAN, consisting of CENP-P/O/R/Q/U self-assembles on kinetochores with varying stoichiometry and undergoes a pre-mitotic maturation step. Kinetochore assembly is a cell cycle regulated multi-step process. The initial step occurs during interphase and involves loading of the 15-subunit constitutive centromere associated complex (CCAN). Kinetochores are multi-protein megadalton assemblies that are required for attachment of microtubules to centromeres and, in turn, the segregation of chromosomes in mitosis [2,3].. +PF06730 DUF1208;
Moxon SJ, Eberhardt R. Pfam-B_23546 (release 10.0). This family of proteins has a role in embryogenesis. During embryogenesis it is essential for ectoderm and axial mesoderm development . It may regulate cell proliferation and apoptosis .. +PF06732 Pescadillo N-terminus
Pfam-B_77615 (release 9.0). This family represents the N-terminal region of Pescadillo. Pescadillo protein localises to distinct substructures of the interphase nucleus including nucleoli, the site of ribosome biogenesis. During mitosis pescadillo closely associates with the periphery of metaphase chromosomes and by late anaphase is associated with nucleolus-derived foci and prenucleolar bodies. Blastomeres in mouse embryos lacking pescadillo arrest at morula stages of development, the nucleoli fail to differentiate and accumulation of ribosomes is inhibited. It has been proposed that in mammalian cells pescadillo is essential for ribosome biogenesis and nucleologenesis and that disruption to its function results in cell cycle arrest . This family is often found in conjunction with a Pfam:PF00533 domain.. +PF06733 DEAD_2
Pfam-B_1713 (release 10.0). This represents a conserved region within a number of RAD3-like DNA-binding helicases that are seemingly ubiquitous - members include proteins of eukaryotic, bacterial and archaeal origin. RAD3 is involved in nucleotide excision repair, and forms part of the transcription factor TFIIH in yeast .. +PF06734 UL97
Pfam-B_1747 (release 10.0). This family represents a conserved region within viral UL97 phosphotransferases. UL97 participates in the phosphorylation of the nucleoside analog ganciclovir (GCV) to produce GCV-monophosphate .. +PF06736 Protein of unknown function (DUF1211)
Pfam-B_2594 (release 10.0). This family represents a conserved region within a number of hypothetical proteins of unknown function found in eukaryotes, bacteria and archaea. These may possibly be integral membrane proteins.. +PF06737 Transglycosylase-like domain
This family of proteins are very likely to act as transglycosylase enzymes related to Pfam:PF00062 and Pfam:PF01464. These other families are weakly matched by this family, and include the known active site residues.. +PF06738 Protein of unknown function (DUF1212)
Pfam-B_1646 (release 10.0). This family represents a conserved region within a number of hypothetical membrane proteins of unknown function found in eukaryotes, bacteria and archaea.. +PF06739 Beta-propeller repeat
This family is related to Pfam:PF00400 and is likely to also form a beta-propeller. SBBP stands for Seven Bladed Beta Propeller.. +PF06740 Protein of unknown function (DUF1213)
Pfam-B_2177 (release 10.0). This family represents a short conserved repeat within Drosophila melanogaster proteins of unknown function. Approximately 50 copies of this repeat are present in each protein.. +PF06741 Ataxin-2-like; Ataxin-2_N;
Pfam-B_2543 (release 10.0). This domain is found associated with Lsm domain .. +PF06742 Protein of unknown function (DUF1214)
Pfam-B_2721 (release 10.0). This family represents the C-terminal region of several hypothetical proteins of unknown function. Family members are mostly bacterial, but a few are also found in eukaryotes and archaea.. +PF06743 FAST_Leu-rich;
FAST kinase-like protein, subdomain 1. Vella Briffa B, Fenech M. Pfam-B_2858 (release 10.0). This family represents a conserved region of eukaryotic Fas-activated serine/threonine (FAST) kinases (EC:2.7.1.-) that contains several conserved leucine residues. FAST kinase is rapidly activated during Fas-mediated apoptosis, when it phosphorylates TIA-1, a nuclear RNA-binding protein that has been implicated as an effector of apoptosis . Note that many family members are hypothetical proteins. This region is often found immediately N-terminal to the FAST kinase-like protein, subdomain 2.. +PF06744 Protein of unknown function (DUF1215)
Pfam-B_2952 (release 10.0). This family represents a conserved region situated towards the C-terminal end of several hypothetical bacterial proteins of unknown function. A few members resemble the ImcF protein, which has been proposed to be involved in Vibrio cholerae cell surface reorganisation that results in increased adherence to epithelial cells line and increased conjugation frequency.. +PF06745 KaiC
Pfam-B_2234 (release 10.0). This family represents a conserved region within bacterial and archaeal proteins, most of which are hypothetical. More than one copy is sometimes found in each protein. This family includes KaiC, which is one of the Kai proteins among which direct protein-protein association may be a critical process in the generation of circadian rhythms in cyanobacteria .. +PF06746 Protein of unknown function (DUF1216)
Pfam-B_3048 (release 10.0). This family represents a conserved region, within Arabidopsis thaliana proteins, of unknown function. Family members sometimes contain more than one copy.It has been reported that this domain will be found in other Brassicaceae.. +PF06747 CHCH domain
Westerman BA, Poutsma A, Steegers E, Oudejans CBM. we have identified a conserved motif in the LOC118487 protein that we have called the CHCH motif. Alignment of this protein with related members showed the presence of three subgroups of proteins, which are called the S (Small), N (N-terminal extended) and C (C-terminal extended) subgroups. All three sub-groups of proteins have in common that they contain a predicted conserved [coiled coil 1]-[helix 1]-[coiled coil 2]-[helix 2] domain (CHCH domain). Within each helix of the CHCH domain, there are two cysteines present in a C-X9-C motif. The N-group contains an additional double helix domain, and each helix contains the C-X9-C motif. This family contains a number of characterised proteins: Cox19 protein - a nuclear gene of Saccharomyces cerevisiae, codes for an 11-kDa protein (Cox19p) required for expression of cytochrome oxidase. Because cox19 mutants are able to synthesise the mitochondrial and nuclear gene products of cytochrome oxidase, Cox19p probably functions post-translationally during assembly of the enzyme. Cox19p is present in the cytoplasm and mitochondria, where it exists as a soluble intermembrane protein. This dual location is similar to what was previously reported for Cox17p, a low molecular weight copper protein thought to be required for maturation of the CuA centre of subunit 2 of cytochrome oxidase. Cox19p have four conserved potential metal ligands, these are three cysteines and one histidine. Mrp10 - belongs to the class of yeast mitochondrial ribosomal proteins that are essential for translation . Eukaryotic NADH-ubiquinone oxidoreductase 19 kDa (NDUFA8) subunit . The CHCH domain was previously called DUF657 .. +PF06748 Protein of unknown function (DUF1217)
Pfam-B_3199 (release 10.0). This family represents a conserved region that is found within bacterial proteins, most of which are hypothetical. Some members contain multiple copies.. +PF06749 Protein of unknown function (DUF1218)
Pfam-B_3286 (release 10.0). This family contains hypothetical plant proteins of unknown function. Family members contain a number of conserved cysteine residues.. +PF06750 Bacterial Peptidase A24 N-terminal domain
This family is found at the N-terminus of the pre-pilin peptidases (Pfam:PF01478). It's function has not been specifically determined; however some of the family have been characterised as bifunctional ( ), and this domain may contain the N-methylation activity (EC:2.1.1.-). It consists of an intracellular region between a pair of transmembrane. This region contains an invariant proline and two almost fully conserved disulphide bridges - hence the name DiS-P-DiS. The cysteines have been shown to be essential to the overall function of the enzyme in , but their role was incorrectly ascribed. . +PF06751 Ethanolamine ammonia lyase large subunit (EutB)
Pfam-B_6325 (release 10.0). This family consists of several bacterial ethanolamine ammonia lyase large subunit (EutB) proteins (EC:4.3.1.7). Ethanolamine ammonia-lyase is a bacterial enzyme that catalyses the adenosylcobalamin-dependent conversion of certain vicinal amino alcohols to oxo compounds and ammonia. The enzyme is a heterodimer composed of subunits of Mr approximately 55,000 (EutB) and 35,000 (EutC) . . +PF06752 E_Pc_C-term;
Enhancer of Polycomb C-terminus. Pfam-B_16811 (release 10.0). This family represents the C-terminus of eukaryotic enhancer of polycomb proteins, which have roles in heterochromatin formation . This family contains several conserved motifs.. +PF06753 Bradykinin
Pfam-B_7085 (release 10.0). This family consists of several bradykinin sequences. The skins of anuran amphibians, in addition to mucus glands, contain highly specialised poison glands, which, in reaction to stress or attack, exude a complex noxious cocktail of biologically active molecules. These secretions often contain a plethora of peptides among which bradykinin or structural variants have been identified .. +PF06754 Phosphonate metabolism protein PhnG
Pfam-B_6667 (release 10.0). This family consists of several bacterial phosphonate metabolism protein PhnG sequences. In Escherichia coli, the phn operon encodes proteins responsible for the uptake and breakdown of phosphonates. The exact function of PhnG is unknown, however it is thought likely that along with six other proteins PhnG makes up the the C-P (carbon-phosphorus) lyase .. +PF06755 Protein of unknown function (DUF1219)
Pfam-B_4928 (release 10.0). This family consists of several hypothetical proteins which seem to be specific to the Enterobacteria Escherichia coli and Shigella flexneri. Family members are often known as YeeV proteins and are around 125 residues in length. The function of this family is unknown.. +PF06756 S19_C-term;
Chorion protein S19 C-terminal. Pfam-B_16839 (release 10.0). This family represents the C-terminal region of eukaryotic chorion protein S19. In Drosophilidae, the S19 gene is known to form part of an autosomal cluster that also contains s16, s15 and s18 . Note that members of this family contain a conserved PVA motif, and many contain Pfam:PF03964.. +PF06757 Insect allergen related repeat, nitrile-specifier detoxification
Pfam-B_5947 (release 10.0). This family exemplifies a case of novel gene evolution. The case in point is the arms-race between plants and their infective insective herbivores in the area of the glucosinolate-myrosinase system. Brassicas have developed the glucosinolate-myrosinase system as chemical defence mechanism against the insects, and consequently the insects have adapted to produce a detoxifying molecule, nitrile-specifier protein (NSP). NSP is present in the small white butterfly Pieris rapae. NSP is structurally different from and has no amino acid homology to any known detoxifying enzymes, and it appears to have arisen by a process of domain and gene duplication of a sequence of unknown function that is widespread in insect species and referred to as insect-allergen-repeat protein. Thus this family is found either as a single domain or as a multiple repeat-domain .. +PF06758 Repeat of unknown function (DUF1220)
Pfam-B_6292 (release 10.0). +PF06760 Protein of unknown function (DUF1221)
Pfam-B_16837 (release 10.0). This is a family of plant proteins, most of which are hypothetical and of unknown function. All members contain the Pfam:PF00069 domain, suggesting that they may possess kinase activity.. +PF06761 ImcF-related;
Intracellular multiplication and human macrophage-killing. Pfam-B_3476 (release 10.0). This family represents a conserved region within several bacterial proteins that resemble IcmF, which has been proposed to be involved in Vibrio cholerae cell surface reorganisation, resulting in increased adherence to epithelial cells and increased conjugation frequency. Note that many family members are hypothetical proteins.. +PF06762 DUF1222;
Lipase maturation factor. Vella Briffa B, Eberhardt R. Pfam-B_3454 (release 10.0). This family of transmembrane proteins includes the lipase maturation factor, LMF1. Lipoprotein lipase and hepatic lipase require LMF1 to fold into their active states [1,2]. The precise role of LMF1 in lipase folding has yet to be determined .. +PF06763 Prophage minor tail protein Z (GPZ)
Pfam-B_6085 (release 10.0). This family consists of several prophage minor tail protein Z like sequences from Escherichia coli, Salmonella typhimurium and Lambda-like bacteriophages.. +PF06764 Protein of unknown function (DUF1223)
Pfam-B_6655 (release 10.0). This family consists of several hypothetical proteins of around 250 residues in length which are found in both plants and bacteria. The function of this family is unknown.. +PF06766 Fungal hydrophobin
Pfam-B_3587 (release 10.0). This is a family of fungal hydrophobins that seems to be restricted to ascomycetes. These are small, moderately hydrophobic extracellular proteins that have eight cysteine residues arranged in a strictly conserved motif. Hydrophobins are generally found on the outer surface of conidia and of the hyphal wall, and may be involved in mediating contact and communication between the fungus and its environment . Note that some family members contain multiple copies.. +PF06767 Sif protein
Pfam-B_7884 (release 10.0). This family consists of several SifA and SifB and SseJ proteins which seem to be specific to the Salmonella species. SifA, SifB and SseJ have been demonstrated to localise to the Salmonella-containing vacuole (SCV) and to Salmonella-induced filaments (Sifs). Trafficking of SseJ and SifB away from the SCV requires the SPI-2 effector SifA. SseJ trafficking away from the SCV along Sifs is unnecessary for its virulence function .. +PF06769 DUF1224;
Plasmid encoded toxin Txe. Pfam-B_7662 (release 10.0). The plasmid encoded Axe-Txe proteins in Enterococcus faecium act as an antitoxin-toxin pair. When the plasmid is lost, the antitoxin (Axe) is degraded relatively quickly by host enzymes. This allows the toxin to interact with its intracellular target, thus killing the cell or impeding cell growth . This family contains many hypothetical proteins. This domain forms complexes with Axe antitoxins containing Pfam:PF02604.. +PF06770 Actin-rearrangement-inducing factor (Arif-1)
Pfam-B_6086 (release 10.0). This family consists of several Nucleopolyhedrovirus actin-rearrangement-inducing factor (Arif-1) proteins. In response to Autographa californica multicapsid nuclear polyhedrosis virus (AcMNPV) infection, a sequential rearrangement of the actin cytoskeleton occurs this is induced by Arif-1 . Arif-1 is tyrosine phosphorylated and is located at the plasma membrane as a component of the actin rearrangement-inducing complex .. +PF06771 1111; Desmo_N-term;
Viral Desmoplakin N-terminus. Pfam-B_3693 (release 10.0). This family represents the N-terminus of viral desmoplakin. Desmoplakin is a component of mature desmosomes, which are the main adhesive junctions in epithelia and cardiac muscle. Desmoplakin is also essential for the maturation of adherens junctions . Note that many family members are hypothetical.. +PF06772 Bacterial low temperature requirement A protein (LtrA)
Pfam-B_8368 (release 10.0). This family consists of several bacteria specific low temperature requirement A (LtrA) protein sequences which have been found to be essential for growth at low temperatures in Listeria monocytogenes .. +PF06773 Bim protein N-terminus
Pfam-B_8427 (release 10.0). This family represents the N-terminal region of several mammal specific Bim proteins. The Bim protein is one of the BH3-only proteins, members of the Bcl-2 family that have only one of the Bcl-2 homology regions, BH3. BH3-only proteins are essential initiators of apoptotic cell death .. +PF06775 DUF1226;
Putative adipose-regulatory protein (Seipin). Pfam-B_8703 (release 10.0). Seipin is a protein of approximately 400 residues, in humans, which is the product of a gene homologous to the murine guanine nucleotide-binding protein (G protein) gamma-3 linked gene. This gene is implicated in the regulation of body fat distribution and insulin resistance and particularly in the auto-immune disease Berardinelli-Seip congenital lipodystrophy type 2. Seipin has no similarity with other known proteins or consensus motifs that might predict its function, but it is predicted to contain two transmembrane domains at residues 28-49 and 237-258, in human, and a third transmembrane domain might be present at residues 155-173. Seipin may also be implicated in Silver spastic paraplegia syndrome and distal hereditary motor neuropathy type V .. +PF06776 Invasion associated locus B (IalB) protein
Pfam-B_3703 (release 10.0). This family consists of several invasion associated locus B (IalB) proteins and related sequences. IalB is known to be a major virulence factor in Bartonella bacilliformis where it was shown to have a direct role in human erythrocyte parasitism. IalB is upregulated in response to environmental cues signaling vector-to-host transmission. Such environmental cues would include, but not be limited to, temperature, pH, oxidative stress, and haemin limitation. It is also thought that IalB would aide B. bacilliformis survival under stress-inducing environmental conditions . The role of this protein in other bacterial species is unknown.. +PF06777 Protein of unknown function (DUF1227)
Pfam-B_3660 (release 10.0). This family represents a conserved region within a number of eukaryotic DNA repair helicases (EC:3.6.1.-).. +PF06778 Chlorite dismutase
Pfam-B_3770 (release 10.0). This family contains chlorite dismutase enzymes of bacterial and archaeal origin. This enzyme catalyses the disproportionation of chlorite into chloride and oxygen . Note that many family members are hypothetical proteins.. +PF06779 Protein of unknown function (DUF1228)
Pfam-B_5646 (release 10.0). This family represents the N-terminus of several putative bacterial membrane proteins, which may be sugar transporters. Note that many family members are hypothetical proteins.. +PF06780 Erp_C-term;
Erp protein C-terminus. Pfam-B_4561 (release 10.0). This family represents the C-terminus of bacterial Erp proteins that seem to be specific to Borrelia burgdorferi (a causative agent of Lyme disease). Borrelia Erp proteins are particularly heterogeneous, which might enable them to interact with a wide variety of host components .. +PF06781 Uncharacterised protein family (UPF0233)
+PF06782 Uncharacterised protein family (UPF0236)
+PF06783 Uncharacterised protein family (UPF0239)
+PF06784 Uncharacterised protein family (UPF0240)
+PF06785 Uncharacterised protein family (UPF0242)
+PF06786 Uncharacterised protein family (UPF0253)
+PF06787 Uncharacterised protein family (UPF0254)
+PF06788 Uncharacterised protein family (UPF0257)
+PF06789 Uncharacterised protein family (UPF0258)
+PF06790 Uncharacterised protein family (UPF0259)
+PF06791 Prophage tail length tape measure protein
Pfam-B_3868 (release 10.0). This family represents a conserved region located towards the N-terminal end of prophage tail length tape measure protein (TMP). TMP is important for assembly of phage tails and involved in tail length determination. Mutated forms TMP cause tail fibres to be shortened .. +PF06792 Uncharacterised protein family (UPF0261)
+PF06793 Uncharacterised protein family (UPF0262)
+PF06794 Uncharacterised protein family (UPF0270)
+PF06795 Erythrovirus X protein
Pfam-B_9167 (release 10.0). This family consists of several Erythrovirus X proteins which seem to be found exclusively in human parvovirus and human erythrovirus. The function of this family is unknown.. +PF06796 Periplasmic nitrate reductase protein NapE
Pfam-B_9066 (release 10.0). This family consists of several bacterial periplasmic nitrate reductase NapE proteins. Seven genes, napKEFDABC, encoding the periplasmic nitrate reductase system were cloned from the denitrifying phototrophic bacterium Rhodobacter sphaeroides f. sp. denitrificans IL106. NapE is thought to be a transmembrane protein .. +PF06797 Protein of unknown function (DUF1229)
Pfam-B_9402 (release 10.0). This family consists of several hypothetical proteins of around 415 residues in length which seem to be specific to the bacterium Leptospira interrogans.. +PF06798 PrkA serine protein kinase C-terminal domain
Pfam-B_3917 (release 10.0). This is a family of PrkA bacterial and archaeal serine kinases approximately 630 residues long. This family corresponds to the C-terminal domain .. +PF06799 Protein of unknown function (DUF1230)
Pfam-B_9232 (release 10.0). This family consists of several hypothetical plant and photosynthetic bacterial proteins of around 160 residues in length. The function of this family is unknown although looking at the species distribution the protein may play a part in photosynthesis.. +PF06800 Sugar transport protein
Pfam-B_4126 (release 10.0). This is a family of bacterial sugar transporters approximately 300 residues long. Members include glucose uptake proteins , ribose transport proteins, and several putative and hypothetical membrane proteins probably involved in sugar transport across bacterial membranes.. +PF06802 Protein of unknown function (DUF1231)
Pfam-B_9856 (release 10.0). This family consists of several Orthopoxvirus specific proteins predominantly of around 340 residues in length. This family contains both B17 and B15 proteins, the function of which are unknown.. +PF06803 Protein of unknown function (DUF1232)
Pfam-B_4265 (release 10.0). This family represents a conserved region of approximately 60 residues within a number of hypothetical bacterial and archaeal proteins of unknown function.. +PF06804 NlpB/DapX lipoprotein
Pfam-B_10405 (release 10.0). This family consists of a number of bacterial lipoproteins often known as NlpB or DapX. This lipoprotein is detected in outer membrane vesicles in Escherichia coli and appears to be nonessential .. +PF06805 Bacteriophage lambda tail assembly protein I
Moxon SJ, Iyer LM, Burroughs AM, Aravind L. Pfam-B_7725 (release 10.0). This family consists of tail assembly proteins from lambdoid and T1 phages and related prophages, e.g. the tail assembly protein I (TAPI). Members of this family contain a core ubiquitin fold domain . The exact function of TAPI is not clear but it is not incorporated into the mature tail. Gene neighborhoods reveal that TAPI co-occurs with genes encoding the host-specificity protein TapJ, and TapK, which contains a JAB metallopeptidase fused to an NlpC/P60 peptidase. It is proposed that the TAPI protein is processed by the peptidase domains of TapK .. +PF06806 Putative excisionase (DUF1233)
Pfam-B_9240 (release 10.0). This family consists of several putative phage excisionase proteins of around 80 residues in length.. +PF06807 Pre-mRNA cleavage complex II protein Clp1
Pfam-B_9787 (release 10.0). This family consists of several pre-mRNA cleavage complex II Clp1 (or HeaB) proteins. Six different protein factors are required in vitro for 3' end formation of mammalian pre-mRNAs by endonucleolytic cleavage and polyadenylation. Clp1 is a subunit of cleavage complex IIA, which is required for cleavage, but not for polyadenylation of pre-mRNA .. +PF06808 DctM-like transporters
Pfam-B_4075 (release 10.0). This family contains a diverse range of predicted transporter proteins. Including the DctM subunit of the bacterial and archaeal TRAP C4-dicarboxylate transport (Dct) system permease. In general, C4-dicarboxylate transport systems allow C4-dicarboxylates like succinate, fumarate, and malate to be taken up. TRAP C4-dicarboxylate carriers are secondary carriers that use an electrochemical H+ gradient as the driving force for transport. DctM is an integral membrane protein that is one of the constituents of TRAP carriers . Note that many family members are hypothetical proteins.. +PF06809 Neural proliferation differentiation control-1 protein (NPDC1)
Pfam-B_10407 (release 10.0). This family consists of several neural proliferation differentiation control-1 (NPDC1) proteins. NPDC1 plays a role in the control of neural cell proliferation and differentiation. It has been suggested that NPDC1 may be involved in the development of several secretion glands. This family also contains the C-terminal region of the C. elegans protein CAB-1 (Swiss:Q93249) which is known to interact with AEX-3 .. +PF06810 Phage minor structural protein GP20
Pfam-B_8431 (release 10.0). This family consists of several phage minor structural protein GP20 sequences of around 180 residues in length. The function of this family is unknown.. +PF06812 ImpA-rel_N-term;
ImpA-related N-terminal. Pfam-B_4308 (release 10.0). This family represents a conserved region located towards the N-terminal end of ImpA and related proteins. ImpA is an inner membrane protein, which has been suggested to be involved with proteins that are exported and associated with colony variations in Actinobacillus actinomycetemcomitans . Note that many family members are hypothetical proteins.. +PF06813 Nodulin-like
Pfam-B_4440 (release 10.0). This family represents a conserved region within plant nodulin-like proteins.. +PF06814 Lung seven transmembrane receptor
Pfam-B_4367 (release 10.0). This family represents a conserved region with eukaryotic lung seven transmembrane receptors and related proteins.. +PF06815 rvt_connect;
Reverse transcriptase connection domain. This domain is known as the connection domain. This domain lies between the thumb and palm domains .. +PF06816 NOD; NOD1;
NOTCH signalling plays a fundamental role during a great number of developmental processes in multicellular animals [1-2]. NOD and NODP represent a region present in many NOTCH proteins and NOTCH homologs in multiple species such as NOTCH2 and NOTCH3, LIN12, SC1 and TAN1. Role of NOD domain remains to be elucidated.. +PF06817 rvt_thumb;
Reverse transcriptase thumb domain. This domain is known as the thumb domain. It is composed of a four helix bundle .. +PF06818 Fez1
Pfam-B_4593 (release 10.0). This family represents the eukaryotic Fez1 protein. Fez1 contains a leucine-zipper region with similarity to the DNA-binding domain of the cAMP-responsive activating-transcription factor 5 . There is evidence that Fez1 inhibits cancer cell growth through regulation of mitosis, and that its alterations result in abnormal cell growth . Note that some family members contain more than one copy of this region.. +PF06819 Archaeal Peptidase A24 C-terminal Domain
This region is of unknown function but is found in some archaeal Pfam:PF01478. It is predicted to be of mixed alpha/beta secondary structure by JPred.. +PF06820 Tail_fib_C-term;
Putative prophage tail fibre C-terminus. Pfam-B_5030 (release 10.0). This family represents the C-terminus of a prophage tail fibre protein found mostly in E. coli. All family members contain a conserved RLGP motif.. +PF06821 DUF1234;
Vella Briffa B, Eberhardt R. Pfam-B_4941 (release 10.0). Members of this family have serine hydrolase activity. They contain a conserved serine hydrolase motif, GXSXG/A, where the serine is a putative nucleophile .\. This family has an alpha-beta hydrolase fold [2,3]. Eukaryotic members of this family have a conserved LXCXE motif, which binds to retinoblastomas. This motif is absent from prokaryotic members of this family .. +PF06822 Protein of unknown function (DUF1235)
Pfam-B_4988 (release 10.0). This family contains a number of viral proteins of unknown function.. +PF06823 Protein of unknown function (DUF1236)
Pfam-B_5056 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function. Some family members contain more than one copy of the region represented by this family.. +PF06824 Protein of unknown function (DUF1237)
Pfam-B_4981 (release 10.0). This family contains a number of hypothetical proteins of about 450 residues in length. Their function is unknown, and most are bacterial. However, structurally this family is part of the 6 hairpin glycosidase superfamily, suggesting a glycosyl hydrolase function.. +PF06825 Heat shock factor binding protein 1
Pfam-B_20266 (release 10.0). Heat shock factor binding protein 1 (HSBP1) appears to be a negative regulator of the heat shock response .. +PF06826 Predicted Permease Membrane Region
This family represents five transmembrane helices that are normally found flanking (five either side) a pair of Pfam:PF02080 domains. This suggests that the paired regions form a ten helical structure, probably forming the pore, whereas the Pfam:PF02080) binds a ligand for export or regulation of the pore. Swiss:Q8L3K8 is described as a aspartate-alanine antiporter ( ). In conjunction with Swiss:Q8L3K9 it forms a 'proton motive metabolic cycle catalysed by an aspartate-alanine exchange'. The general conservation of domain architecture in this family suggests that they are functional orthologues.. +PF06827 Zinc finger found in FPG and IleRS
This zinc binding domain is found at the C-terminus of isoleucyl tRNA synthetase and the enzyme Formamidopyrimidine-DNA glycosylase EC:3.2.2.23.. +PF06830 Root cap
Pfam-B_5867 (release 10.0). The cells at the periphery of the root cap are continuously sloughed off from the root into the mucilage, and are thought to be programmed to die .This family represents a conserved region approximately 60 residues in length within plant root cap proteins, which may be involved in the process.. +PF06831 Formamidopyrimidine-DNA glycosylase H2TH domain
Formamidopyrimidine-DNA glycosylase (Fpg) is a DNA repair enzyme that excises oxidised purines from damaged DNA. This family is the central domain containing the DNA-binding helix-two turn-helix domain .. +PF06832 Penicillin-Binding Protein C-terminus Family
This conserved region of approximately 90 residues is found in a sub-group of bacterial Penicillin-Binding Proteins (PBPs). A variable length loop region separates this region from the transpeptidase unit (Pfam:PF00905). It is predicted by PROF to be an all beta fold.. +PF06833 Malonate decarboxylase gamma subunit (MdcE)
Pfam-B_10907 (release 10.0). This family consists of several bacterial malonate decarboxylase gamma subunit proteins. Malonate decarboxylase of Klebsiella pneumoniae consists of four different subunits and catalyses the conversion of malonate plus H+ to acetate and CO2. The catalysis proceeds via acetyl and malonyl thioester residues with the phosphribosyl-dephospho-CoA prosthetic group of the acyl carrier protein (ACP) subunit. MdcD and E together probably function as malonyl-S-ACP decarboxylase . . +PF06834 TraU protein
Pfam-B_10708 (release 10.0). This family consists of several bacterial TraU proteins. TraU appears to be more essential to conjugal DNA transfer than to assembly of pilus filaments .. +PF06835 DUF1239;
Lipopolysaccharide-assembly, LptC-related. Pfam-B_11065 (release 10.0). This family consists of several related groups of proteins one of which is the LptC family. LptC is involved in lipopolysaccharide-assembly on the outer membrane of Gram-negative organisms. The lipopolysaccharide component of the outer bacterial membrane is transported form its source of origin to the outer membrane by a set of proteins constituting a transport machinery that is made up of LptA, LptB, LptC, LptD, LptE. LptC is located on the inner membrane side of the intermembrane space.. +PF06836 Protein of unknown function (DUF1240)
Pfam-B_11130 (release 10.0). This family consists of a number of hypothetical putative membrane proteins which seem to be specific to Yersinia pestis. The function of this family is unknown.. +PF06837 Fijivirus P9-2 protein
Pfam-B_11357 (release 10.0). This family consists of several Fijivirus specific P9-2 proteins from Rice black streaked dwarf virus (RBSDV) and Fiji disease virus. The function of this family is unknown.. +PF06838 Alum_res;
Methionine gamma-lyase . Vella Briffa B, Haft D. Pfam-B_5307 (release 10.0). This is a putative pyridoxal 5'-phosphate-dependent methionine gamma-lyase enzyme involved in methionine catabolism.. +PF06839 GRF zinc finger
This presumed zinc binding domain is found in a variety of DNA-binding proteins. It seems likely that this domain is involved in nucleic acid binding. It is named GRF after three conserved residues in the centre of the alignment of the domain. This zinc finger may be related to Pfam:PF01396.. +PF06840 Protein of unknown function (DUF1241)
Pfam-B_11380 (release 10.0). This family consists of several programmed cell death 10 protein (PDCD10 or TFAR15) sequences. The function of this family is unknown.. +PF06841 T4_Gp19;
T4-like virus tail tube protein gp19. Pfam-B_11507 (release 10.0). This family consists of several tail tube protein gp19 sequences from the T4-like viruses [1,2]. This famiyl also contains bacterial members which suggest lateral transfer of genes.. +PF06842 Protein of unknown function (DUF1242)
Pfam-B_11544 (release 10.0). This family consists of a number of eukaryotic proteins of around 72 residues in length. The function of this family is unknown.. +PF06844 Protein of unknown function (DUF1244)
Pfam-B_11743 (release 10.0). This family consists of several short bacterial proteins of around 100 residues in length. The function of this family is unknown.. +PF06847 Archaeal Peptidase A24 C-terminus Type II
This region is of unknown function but is found in some archaeal Pfam:PF01478. It is predicted to be of mixed alpha/beta secondary structure by Prof.. +PF06848 Disaggregatase related repeat
Moxon SJ, Mistry J, Adindla S. Pfam-B_11958 (release 10.0). This family consists of several repeats which seem to be specific to the Methanosarcina archaea species and are often found in multiple copies in disaggregatase proteins. Members of this family are also found in single copies in several hypothetical proteins. This repeat is also known as DNRLRE repeat and is predicted form a mainly beta-strand structure with two alpha-helices [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. It is found in some cell surface proteins. . +PF06849 Protein of unknown function (DUF1246)
Pfam-B_5448 (release 10.0). This family represents the N-terminus of a number of hypothetical archaeal proteins of unknown function.. +PF06850 PHB_depo_C-term;
PHB de-polymerase C-terminus. Pfam-B_5697 (release 10.0). This family represents the C-terminus of bacterial poly(3-hydroxybutyrate) (PHB) de-polymerase. This degrades PHB granules to oligomers and monomers of 3-hydroxy-butyric acid.. +PF06851 Protein of unknown function (DUF1247)
Pfam-B_5762 (release 10.0). This family contains a number of hypothetical viral proteins of unknown function approximately 200 residues long.. +PF06852 Protein of unknown function (DUF1248)
Pfam-B_5811 (release 10.0). This family represents a conserved region within a number of proteins of unknown function that seem to be specific to C. elegans. Note that some family members contain more than one copy of this region.. +PF06853 Protein of unknown function (DUF1249)
Pfam-B_11475 (release 10.0). This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown.. +PF06854 Bacteriophage Gp15 protein
Pfam-B_11759 (release 10.0). This family consists of bacteriophage Gp15 proteins and related bacterial sequences. The function of this family is unknown. +PF06855 Protein of unknown function (DUF1250)
Pfam-B_11942 (release 10.0). This family consists of several short hypothetical bacterial proteins of around 70 residues in length. Members of this family seem to all belong to the order Bacillales or Lactobacillales. The function of this family is unknown.. +PF06856 Protein of unknown function (DUF1251)
Pfam-B_12000 (release 10.0). This family consists of the N-terminal region of several hypothetical Nucleopolyhedrovirus proteins of unknown function.. +PF06857 MdcD;
Malonate decarboxylase delta subunit (MdcD). Pfam-B_12010 (release 10.0). This family consists of several bacterial malonate decarboxylase delta subunit (MdcD) proteins. Malonate decarboxylase of Klebsiella pneumoniae consists of four different subunits and catalyses the conversion of malonate plus H+ to acetate and CO2. The catalysis proceeds via acetyl and malonyl thioester residues with the phosphribosyl-dephospho-CoA prosthetic group of the acyl carrier protein (ACP) subunit. MdcC is the (apo) ACP subunit . The family also contains the CitD family of citrate lyase acyl carrier proteins.. +PF06858 Nucleolar GTP-binding protein 1 (NOG1)
Pfam-B_5853 (release 10.0). This family represents a conserved region of approximately 60 residues in length within nucleolar GTP-binding protein 1 (NOG1). In S. cerevisiae, the NOG1 gene has been shown to be essential for cell viability, suggesting that NOG1 may play an important role in nucleolar functions . Family members include eukaryotic, bacterial and archaeal proteins.. +PF06859 Bicoid-interacting protein 3 (Bin3)
Pfam-B_5564 (release 10.0). This family represents a conserved region of approximately 120 residues within eukaryotic Bicoid-interacting protein 3 (Bin3). Bin3, which shows similarity to a number of protein methyltransferases that modify RNA-binding proteins, interacts with Bicoid, which itself directs pattern formation in the early Drosophila embryo. The interaction might allow Bicoid to switch between its dual roles in transcription and translation . Note that family members contain a conserved HLN motif.. +PF06861 BALF1 protein
Pfam-B_12069 (release 10.0). This family consists of several BALF1 proteins which seem to be specific to the Lymphocryptoviruses. BALF1, inhibits the antiapoptotic activity of EBV BHRF1 and of KSBcl-2 .. +PF06862 Protein of unknown function (DUF1253)
Pfam-B_6227 (release 10.0). This family represents the C-terminal portion (approximately 500 residues) of several hypothetical eukaryotic proteins of unknown function.. +PF06863 Protein of unknown function (DUF1254)
Pfam-B_5911 (release 10.0). This family represents a conserved region about 130 residues long within hypothetical proteins of unknown function. Family members include eukaryotic, bacterial and archaeal proteins.. +PF06864 Pilin accessory protein (PilO)
Pfam-B_12430 (release 10.0). This family consists of several enterobacterial PilO proteins. The function of PilO is unknown although it has been suggested that it is a cytoplasmic protein in the absence of other Pil proteins, but PilO protein is translocated to the outer membrane in the presence of other Pil proteins. Alternatively, PilO protein may form a complex with other Pil protein(s). PilO has been predicted to function as a component of the pilin transport apparatus and thin-pilus basal body . This family does not seem to be related to Pfam:PF04350.. +PF06865 Protein of unknown function (DUF1255)
Pfam-B_12498 (release 10.0). This family consists of several conserved hypothetical bacterial proteins of around 95 residues in length. The function of this family is unknown. +PF06866 Protein of unknown function (DUF1256)
Pfam-B_12377 (release 10.0). This family consists of several uncharacterised bacterial proteins which seem to be specific to the orders Clostridia and Bacillales. Family members are typically around 180 residues in length. The function of this family is unknown. These proteins are related to peptidase family M63 and so may be peptidases.. +PF06868 Protein of unknown function (DUF1257)
Pfam-B_5975 (release 10.0). This family contains hypothetical proteins of unknown function that are approximately 120 residues long. Family members include eukaryotic and bacterial proteins.. +PF06869 Protein of unknown function (DUF1258)
Pfam-B_6065 (release 10.0). This family represents a conserved region approximately 260 residues long within a number of hypothetical proteins of unknown function that seem to be specific to C. elegans. Note that this family contains a number of conserved cysteine and histidine residues.. +PF06870 A49-like RNA polymerase I associated factor
Pfam-B_20222 (release 10.0). Saccharomyces cerevisiae A49 is a specific subunit associated with RNA polymerase I (Pol I) in eukaryotes. Pol I maintains transcription activities in A49 deletion mutants. However, such mutants are deficient in transcription activity at low temperatures. Deletion analysis of the fusion yeast homolog indicate that only the C-terminal two thirds are required for function. Transcript analysis has demonstrated that A49 is maximising transcription of ribosomal DNA .. +PF06871 DUF1259;
Pfam-B_13298 (release 10.0). This family consists of several TraH proteins which seem to be specific to Agrobacterium and Rhizobium species. This protein is thought to be involved in conjugal transfer but its function is unknown. This family does not appear to be related to Pfam:PF06122.. +PF06872 EspG protein
Pfam-B_13549 (release 10.0). This family consists of several EspG like proteins from Citrobacter rodentium and Escherichia coli. EspG is secreted by the type III secretory system and is translocated into host epithelial cells. EspG is homologous with Shigella flexneri protein VirA and can rescue invasion in a Shigella virA mutant, indicating that these proteins are functionally equivalent in Shigella. EspG plays an accessory but as yet undefined role in EPEC virulence that may involve intestinal colonisation .. +PF06873 Cell surface immobilisation antigen SerH
Pfam-B_13151 (release 10.0). This family consists of several cell surface immobilisation antigen SerH proteins which seem to be specific to Tetrahymena thermophila. The SerH locus of Tetrahymena thermophila is one of several paralogous loci with genes encoding variants of the major cell surface protein known as the immobilisation antigen (i-ag) . . +PF06874 Firmicut_FBPase;
Firmicute fructose-1,6-bisphosphatase. Pfam-B_13194 (release 10.0). This family consists of several bacterial fructose-1,6-bisphosphatase proteins (EC:3.1.3.11) which seem to be specific to phylum Firmicutes. Fructose-1,6-bisphosphatase (FBPase) is a well known enzyme involved in gluconeogenesis . This family does not seem to be structurally related to Pfam:PF00316.. +PF06875 Plethodontid receptivity factor PRF
Pfam-B_13241 (release 10.0). This family consists of several plethodontid receptivity factor (PRF) proteins which seem to be specific to Plethodon jordani (Jordan's salamander). PRF is a courtship pheromone produced by males increase female receptivity .. +PF06876 Plant self-incompatibility response (SCRL) protein
Pfam-B_13253 (release 10.0). This family consists of several Plant self-incompatibility response (SCRL) proteins. The male component of the self-incompatibility response in Brassica has been shown to be encoded by the S locus cysteine-rich gene (SCR). SCR is related, at the sequence level, to the pollen coat protein (PCP) gene family whose members encode small, cysteine-rich proteins located in the proteo-lipidic surface layer (tryphine) of Brassica pollen grains .. +PF06877 DUF1260;
Regulator of ribonuclease activity B. Pfam-B_13601 (release 10.0). This family of proteins regulate mRNA abundance by binding to RNaseE and inhibiting its endonucleolytic activity [1-2]. A subset of these proteins are predicted to function as immunity proteins .. +PF06878 Pkip-1 protein
Pfam-B_13784 (release 10.0). This family consists of several Pkip-1 proteins which seem to be specific to Nucleopolyhedroviruses. The function of this family is unknown although it has been found that Pkip-1 is not essential for virus replication in cell culture or by in vivo intrahaemocoelic injection . . +PF06880 Protein of unknown function (DUF1262)
Pfam-B_6733 (release 10.0). This family represents a conserved region within a number of proteins of unknown function that seem to be specific to Arabidopsis thaliana. Note that some family members contain more than one copy of this region.. +PF06881 RNA polymerase II transcription factor SIII (Elongin) subunit A
Pfam-B_6598 (release 10.0). This family represents a conserved region within RNA polymerase II transcription factor SIII (Elongin) subunit A. In mammals, the Elongin complex activates elongation by RNA polymerase II by suppressing transient pausing of the polymerase at many sites within transcription units. Elongin is a heterotrimer composed of A, B, and C subunits of 110, 18, and 15 kilodaltons, respectively. Subunit A has been shown to function as the transcriptionally active component of Elongin .. +PF06882 Protein of unknown function (DUF1263)
Pfam-B_6668 (release 10.0). This family represents a conserved region located towards the C-terminus of a number proteins of unknown function that seem to be specific to Oryza sativa.. +PF06883 RNA polymerase I, Rpa2 specific domain
Pfam-B_4721 (release 10.0). This domain is found between domain 3 (Pfam:PF04565) and domain 5 (Pfam:PF04565), but shows no homology to domain 4 of Rpb2. The external domains in multisubunit RNA polymerase (those most distant from the active site) are known to demonstrate more sequence variability .. +PF06884 Protein of unknown function (DUF1264)
Pfam-B_6839 (release 10.0). This family contains a number of bacterial and eukaryotic proteins of unknown function that are approximately 200 residues long. Some family members are annotated as putative lipoproteins.. +PF06886 Targeting protein for Xklp2 (TPX2)
Pfam-B_6863 (release 10.0). This family represents a conserved region approximately 60 residues long within the eukaryotic targeting protein for Xklp2 (TPX2). Xklp2 is a kinesin-like protein localised on centrosomes throughout the cell cycle and on spindle pole microtubules during metaphase. In Xenopus, it has been shown that Xklp2 protein is required for centrosome separation and maintenance of spindle bi-polarity . TPX2 is a microtubule-associated protein that mediates the binding of the C-terminal domain of Xklp2 to microtubules. It is phosphorylated during mitosis in a microtubule-dependent way .. +PF06887 Protein of unknown function (DUF1265)
Vella Briffa B, Pollington JE. Pfam-B_7101 (release 10.0). This family represents a conserved region approximately 50 residues long within a number of proteins of unknown function that seem to be restricted to C. elegans. The GO annotation for this protein indicate that its a protein involved in nematode larval development and has a positive regulation on growth rate.. +PF06888 Putative Phosphatase
Pfam-B_7115 (release 10.0). This family contains a number of putative eukaryotic acid phosphatases. Some family members represent the products of the PSI14 phosphatase family in Lycopersicon esculentum (Tomato) .. +PF06889 Protein of unknown function (DUF1266)
Pfam-B_13878 (release 10.0). This family consists of several hypothetical bacterial proteins of around 235 residues in length. Members of this family seem to be found exclusively in the Enterobacteria Salmonella typhimurium and Escherichia coli. The function of this family is unknown.. +PF06890 Bacteriophage Mu Gp45 protein
Pfam-B_10848 (release 10.0). This family consists of Bacteriophage Mu Gp45 related proteins from both phages and bacteria. The function of this family is unknown although it has been suggested that family members may be involved in baseplate assembly.. +PF06891 P2 phage tail completion protein R (GpR)
Pfam-B_10918 (release 10.0). This family consists of P2 phage tail completion protein R (GpR) like sequences. GpR is thought to be a tail completion protein which is essential for stable head joining .. +PF06892 Phage regulatory protein CII (CP76)
Pfam-B_13444 (release 10.0). This family consists of several phage regulatory protein CII (CP76) sequences which are thought to be DNA binding proteins which are involved in the establishment of lysogeny .. +PF06894 Bacteriophage lambda minor tail protein (GpG)
Pfam-B_11957 (release 10.0). This family consists of Bacteriophage lambda minor tail protein G and related sequences. The role of GpG in tail assembly is not known .. +PF06896 Protein of unknown function (DUF1268)
Pfam-B_11819 (release 10.0). This family consists of several bacterial and phage proteins of around 115 residues in length. The function of this family is unknown.. +PF06897 Protein of unknown function (DUF1269)
Pfam-B_14034 (release 10.0). This family consists of several bacterial and archaeal proteins of around 200 residues in length. The function of this family is unknown. The family carries a repeated glycine-zipper sequence- motif, GxxxGxxxG, where the x following the G is frequently found to be an alanine. As glycine-zippers occur in membrane proteins, this family is likely to be found spanning a membrane.. +PF06898 Putative stage IV sporulation protein YqfD
Pfam-B_13823 (release 10.0). This family consists of several putative bacterial stage IV sporulation (SpoIV) proteins. YqfD of Bacillus subtilis (Swiss:P54469) is known to be essential for efficient sporulation although its exact function is unknown .. +PF06899 WzyE protein
Pfam-B_13849 (release 10.0). This family consists of several WzyE proteins which appear to be specific to Enterobacteria. Members of this family are described as putative ECA polymerases this has been found to be incorrect . The function of this family is unknown.. +PF06900 Protein of unknown function (DUF1270)
Pfam-B_13907 (release 10.0). This family consists of several hypothetical Staphylococcus aureus and phage proteins of 53 residues in length. The function of this family is unknown.. +PF06901 RTX iron-regulated protein FrpC
Pfam-B_14005 (release 10.0). This family consists of several RTX iron-regulated FrpC proteins which appear to be found exclusively in Neisseria meningitidis. FrpC has been shown to be related to the RTX family of bacterial cytotoxins. FrpC is found in the meningococcal outer membrane. The function of this family is unknown although it is thought to be a virulence factor .. +PF06902 DUF1271;
Divergent 4Fe-4S mono-cluster. Pfam-B_13906 (release 10.0). Members of this family contain three highly conserved cysteine residues. This family includes proteins containing divergent domains which are most likely to bind to iron-sulfur clusters.. +PF06903 VirK protein
Pfam-B_13955 (release 10.0). This family consists of several bacterial VirK proteins of around 145 residues in length. The function of this family is unknown .. +PF06904 Extensin-like protein C-terminus
Pfam-B_6925 (release 10.0). This family represents the C-terminus (approx. 120 residues) of a number of bacterial extensin-like proteins. Extensins are cell wall glycoproteins normally associated with plants, where they strengthen the cell wall in response to mechanical stress . Note that many family members of this family are hypothetical.. +PF06905 FAIM;
Fas apoptotic inhibitory molecule (FAIM1). Pfam-B_13985 (release 10.0). This family consists of several fas apoptotic inhibitory molecule (FAIM1) proteins. FAIM expression is upregulated in B cells by anti-Ig treatment that induces Fas-resistance, and overexpression of FAIM diminishes sensitivity to Fas-mediated apoptosis of B and non-B cell lines. FAIM1 is highly evolutionarily conserved and is widely expressed in murine tissues, suggesting that FAIM plays an important role in cellular physiology .. +PF06906 Protein of unknown function (DUF1272)
Pfam-B_14128 (release 10.0). This family consists of several hypothetical bacterial proteins of around 80 residues in length. This family contains a number of conserved cysteine residues and its function is unknown.. +PF06907 Latexin
Pfam-B_14203 (release 10.0). This family consists of several animal specific latexin proteins. Latexin is a carboxypeptidase A inhibitor and is expressed in a cell type-specific manner in both central and peripheral nervous systems in the rat .. +PF06908 Protein of unknown function (DUF1273)
Pfam-B_14270 (release 10.0). This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown.. +PF06910 Male enhanced antigen 1 (MEA1)
Pfam-B_14358 (release 10.0). This family consists of several mammalian male enhanced antigen 1 (MEA1) proteins. The Mea-1 gene is found to be localised in primary and secondary spermatocytes and spermatids, but the protein products are detected only in spermatids. Intensive transcription of Mea-1 gene and specific localisation of the gene product suggest that Mea-1 may play a important role in the late stage of spermatogenesis . . +PF06911 Senescence-associated protein
Pfam-B_7525 (release 10.0). This family contains a number of plant senescence-associated proteins of approximately 450 residues in length. In Hemerocallis, petals have a genetically based program that leads to senescence and cell death approximately 24 hours after the flower opens, and it is believed that senescence proteins produced around that time have a role in this program . This family extends to the higher vertebrates where the full-length protein is often a Spartin, associated with mitochondrial membranes and transportation along microtubules .. +PF06912 Protein of unknown function (DUF1275)
Pfam-B_13896 (release 10.0). This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown although a few members are thought to be membrane proteins.. +PF06916 Protein of unknown function (DUF1279)
Pfam-B_7316 (release 10.0). This family represents the C-terminus (approx. 120 residues) of a number of eukaryotic proteins of unknown function.. +PF06917 Periplasmic pectate lyase
Pfam-B_14500 (release 10.0). This family consists of several Enterobacterial periplasmic pectate lyase proteins (EC:4.2.2.2). A major virulence determinant of the plant-pathogenic enterobacterium Erwinia chrysanthemi is the production of pectate lyase enzymes that degrade plant cell walls .. +PF06918 Protein of unknown function (DUF1280)
Pfam-B_7457 (release 10.0). This family represents a conserved region approximately 200 residues long within a number of proteins of unknown function that seem to be specific to C. elegans.. +PF06919 Phage Gp30.7 protein
Pfam-B_14625 (release 10.0). This family consists of several phage Gp30.7 proteins of 121 residues in length. Family members seem to be exclusively from the T4-like viruses. The function of this family is unknown.. +PF06920 Dedicator of cytokinesis
Pfam-B_7154 (release 10.0). This family represents a conserved region approximately 200 residues long within a number of eukaryotic dedicator of cytokinesis proteins. These are potential guanine nucleotide exchange factors, which activate some small GTPases by exchanging bound GDP for free GTP.. +PF06922 Citrus tristeza virus P13 protein
Pfam-B_14711 (release 10.0). This family consists of several Citrus tristeza virus (CTV) P13 13-kDa proteins. Citrus tristeza virus (CTV), a member of the closterovirus group, is one of the more complex single-stranded RNA viruses . The function of this family is unknown.. +PF06923 Glucitol operon activator protein (GutM)
Pfam-B_14714 (release 10.0). This family consists of several glucitol operon activator (GutM) proteins. Expression of the glucitol (gut) operon in Escherichia coli is regulated by an unusual, complex system which consists of an activator (encoded by the gutM gene) and a repressor (encoded by the gutR gene) in addition to the cAMP-CRP complex (CRP, cAMP receptor protein). Synthesis of the mRNA, which initiates at the promoter specific to the gutR gene, occurs within the gutM gene. Expressional control of the gut operon appears to occur as a consequence of the antagonistic action of the products of the autogenously regulated gutM and gutR genes .. +PF06924 Protein of unknown function (DUF1281)
Pfam-B_14730 (release 10.0). This family consists of several hypothetical enterobacterial proteins of around 170 residues in length. Members of this family are found in Escherichia coli, Salmonella typhimurium and Shigella species. The function of this family is unknown.. +PF06925 Monogalactosyldiacylglycerol (MGDG) synthase
Pfam-B_8075 (release 10.0). This family represents a conserved region of approximately 180 residues within plant and bacterial monogalactosyldiacylglycerol (MGDG) synthase (EC:2.4.1.46). In Arabidopsis, there are two types of MGDG synthase which differ in their N-terminal portion: type A and type B .. +PF06926 Putative replisome organiser protein C-terminus
Pfam-B_6517 (release 10.0). This family represents the C-terminus (approximately 100 residues) of a putative replisome organiser protein in Lactococcus bacteriophages .. +PF06929 Rotavirus VP3 protein
Pfam-B_14798 (release 10.0). This family consists of several Rotavirus specific VP3 proteins. VP3 is known to be a viral guanylyltransferase and is thought to posses methyltransferase activity and therefore VP3 is a predicted multifunctional capping enzyme . . +PF06930 Protein of unknown function (DUF1282)
Pfam-B_14843 (release 10.0). This family consists of several hypothetical proteins of around 200 residues in length. The function of this family is unknown although a number of family members are thought to be putative membrane proteins.. +PF06931 Mastadenovirus E4 ORF3 protein
Pfam-B_14868 (release 10.0). This family consists of several Mastadenovirus E4 ORF3 proteins. Early proteins E4 ORF3 and E4 ORF6 have complementary functions during viral infection. Both proteins facilitate efficient viral DNA replication, late protein expression, and prevention of concatenation of viral genomes. A unique function of E4 ORF3 is the reorganisation of nuclear structures known as PML oncogenic domains (PODs). The function of these domains is unclear, but PODs have been implicated in a number of important cellular processes, including transcriptional regulation, apoptosis, transformation, and response to interferon .. +PF06932 Protein of unknown function (DUF1283)
Pfam-B_15035 (release 10.0). This family consists of several hypothetical proteins of around 115 residues in length which seem to be specific to Enterobacteria. The function of the family is unknown.. +PF06933 Special lobe-specific silk protein SSP160
Pfam-B_14947 (release 10.0). This family consists of several special lobe-specific silk protein SSP160 sequences which appear to be specific to Chironomus (Midge) species [1,2].. +PF06934 Fatty acid cis/trans isomerase (CTI)
Pfam-B_14967 (release 10.0). This family consists of several fatty acid cis/trans isomerase proteins which appear to be found exclusively in bacteria of the orders Vibrionales and Pseudomonadales. Cis/trans isomerase (CTI) catalyses the cis-trans isomerisation of esterified fatty acids in phospholipids, mainly cis-oleic acid (C(16:1,9)) and cis-vaccenic acid (C(18:1,11)), in response to solvents. The CTI protein has been shown to be involved in solvent resistance in Pseudomonas putida .. +PF06935 Protein of unknown function (DUF1284)
Pfam-B_14822 (release 10.0). This family consists of several hypothetical bacterial and archaeal proteins of around 130 residues in length. The function of this family is unknown, although it is thought that they may be iron-sulphur binding proteins.. +PF06936 Selenoprotein S (SelS)
Pfam-B_15061 (release 10.0). This family consists of several mammalian selenoprotein S (SelS) sequences. SelS is a plasma membrane protein and is present in a variety of tissues and cell types . The function of this family is unknown.. +PF06937 EURL protein
Pfam-B_14914 (release 10.0). This family consists of several animal EURL proteins. EURL is preferentially expressed in chick retinal precursor cells as well as in the anterior epithelial cells of the lens at early stages of development. EURL transcripts are found primarily in the peripheral dorsal retina, i.e., the most undifferentiated part of the dorsal retina. EURL transcripts are also detected in the lens at stage 18 and remain abundant in the proliferating epithelial cells of the lens until at least day 11. The distribution pattern of EURL in the developing retina and lens suggest a role before the events leading to cell determination and differentiation . . +PF06938 Protein of unknown function (DUF1285)
Pfam-B_15060 (release 10.0). This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. The structures revealed a conserved core with domain duplication and a superficial similarity for the C-terminal domain to pleckstrin homology-like folds. The conservation of the domain- interface indicates a potential binding site that is likely to involve a nucleotide-based ligand, with genome-context and gene-fusion analyses additionally supporting a role for this family in signal transduction, possibly during oxidative stress.. +PF06939 Protein of unknown function (DUF1286)
Pfam-B_15105 (release 10.0). This family consists of several hypothetical archaeal proteins of around 120 residues in length. All members of this family seem to be Sulfolobus species specific. The function of this family is unknown.. +PF06940 Domain of unknown function (DUF1287)
Pfam-B_15116 (release 10.0). This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. This family is related to Pfam:PF00877.. +PF06941 5' nucleotidase, deoxy (Pyrimidine), cytosolic type C protein (NT5C)
Pfam-B_14848 (release 10.0). This family consists of several 5' nucleotidase, deoxy (Pyrimidine), cytosolic type C (NT5C) proteins. 5'(3')-Deoxyribonucleotidase is a ubiquitous enzyme in mammalian cells whose physiological function is not known . . +PF06942 GlpM protein
Pfam-B_15323 (release 10.0). This family consists of several bacterial GlpM membrane proteins. GlpM is a hydrophobic protein containing 109 amino acids. It is thought that GlpM may play a role in alginate biosynthesis in Pseudomonas aeruginosa .. +PF06943 LSD1 zinc finger
Pfam-B_15249 (release 10.0). This family consists of several plant specific LSD1 zinc finger domains. Arabidopsis lsd1 mutants are hyper-responsive to cell death initiators and fail to limit the extent of cell death. Superoxide is a necessary and sufficient signal for cell death propagation. LSD1 monitors a superoxide-dependent signal and negatively regulates a plant cell death pathway. LSD1 protein contains three zinc finger domains, defined by CxxCxRxxLMYxxGASxVxCxxC. It has been suggested that LSD1 defines a zinc finger protein subclass and that LSD1 regulates transcription, via either repression of a pro-death pathway or activation of an anti-death pathway, in response to signals emanating from cells undergoing pathogen-induced hypersensitive cell death . . +PF06945 Protein of unknown function (DUF1289)
Moxon SJ, Eberhardt R. Pfam-B_15170 (release 10.0). This family consists of a number of hypothetical bacterial proteins. The aligned region spans around 56 residues and contains 4 highly conserved cysteine residues towards the N-terminus. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids .. +PF06946 Phage holin
Pfam-B_15309 (release 10.0). This family consists of several Listeria bacteriophage holin proteins and related bacterial sequences. Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the build up of a holin oligomer which causes the lysis .. +PF06947 Protein of unknown function (DUF1290)
Pfam-B_15248 (release 10.0). This family consists of several bacterial small basic proteins of around 100 residues in length. The function of this family is unknown.. +PF06949 Protein of unknown function (DUF1292)
Pfam-B_15310 (release 10.0). This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown.. +PF06950 Protein of unknown function (DUF1293)
Pfam-B_15399 (release 10.0). This family consists of several bacterial and phage proteins of around 115 residues in length. The function of this family is unknown.. +PF06951 Group XII secretory phospholipase A2 precursor (PLA2G12)
Pfam-B_15422 (release 10.0). This family consists of several group XII secretory phospholipase A2 precursor (PLA2G12) (EC:3.1.1.4) proteins. Group XII and group V PLA(2)s are thought to participate in helper T cell immune response through release of immediate second signals and generation of downstream eicosanoids . . +PF06952 PsiA protein
Pfam-B_15432 (release 10.0). This family consists of several Enterobacterial PsiA proteins. The function of PsiA is unknown although it is thought that it may affect the generation of an SOS signal in Escherichia coli .. +PF06953 Arsenical resistance operon trans-acting repressor ArsD
Pfam-B_15383 (release 10.0). This family consists of several bacterial arsenical resistance operon trans-acting repressor ArsD proteins. ArsD is a trans-acting repressor of the arsRDABC operon that confers resistance to arsenicals and antimonials in Escherichia coli. It possesses two-pairs of vicinal cysteine residues, Cys(12)-Cys(13) and Cys(112)-Cys(113), that potentially form separate binding sites for the metalloids that trigger dissociation of ArsD from the operon. However, as a homodimer it has four vicinal cysteine pairs .. +PF06954 Resistin
Pfam-B_15476 (release 10.0). This family consists of several mammalian resistin proteins. Resistin is a 12.5-kDa cysteine-rich secreted polypeptide first reported from rodent adipocytes. It belongs to a multigene family termed RELMs or FIZZ proteins. Plasma resistin levels are significantly increased in both genetically susceptible and high-fat-diet-induced obese mice. Immunoneutralisation of resistin improves hyperglycemia and insulin resistance in high-fat-diet-induced obese mice, while administration of recombinant resistin impairs glucose tolerance and insulin action in normal mice. It has been demonstrated that increases in circulating resistin levels markedly stimulate glucose production in the presence of fixed physiological insulin levels, whereas insulin suppressed resistin expression. It has been suggested that resistin could be a link between obesity and type 2 diabetes .. +PF06955 Xyloglucan endo-transglycosylase (XET) C-terminus
Pfam-B_20045 (release 10.0). This family represents the C-terminus (approximately 60 residues) of plant xyloglucan endo-transglycosylase (XET). Xyloglucan is the predominant hemicellulose in the cell walls of most dicotyledons. With cellulose, it forms a network that strengthens the cell wall. XET catalyses the splitting of xyloglucan chains and the linking of the newly generated reducing end to the non-reducing end of another xyloglucan chain, thereby loosening the cell wall . Note that all family members contain the Pfam:PF00722 domain.. +PF06956 Regulator of RNA terminal phosphate cyclase
Pfam-B_17814 (release 10.0). RtcR is a sigma54-dependent enhancer binding protein that activates transcription of the rtcBA operon. The product of the rtcA gene is an RNA 3'-terminal phosphate cyclase . This domain is found at the N terminus of the RtcR sequence. RtcR, and other sigma54-dependent activators, contain Pfam:PF00158 in the central region of the protein sequence.. +PF06957 Coatomer (COPI) alpha subunit C-terminus
Pfam-B_20121 (release 10.0). This family represents the C-terminus (approximately 500 residues) of the eukaryotic coatomer alpha subunit. Coatomer (COPI) is a large cytosolic protein complex which forms a coat around vesicles budding from the Golgi apparatus. Such coatomer-coated vesicles have been proposed to play a role in many distinct steps of intracellular transport . Note that many family members also contain the Pfam:PF04053 domain.. +PF06958 S-type Pyocin
Pfam-B_20020 (release 10.0). This family represents a conserved region approximately 180 residues long within bacterial S-type pyocins. Pyocins are polypeptide toxins produced by, and active against, bacteria. S-type pyocins cause cell death by DNA breakdown due to endonuclease activity .. +PF06959 RecQ helicase protein-like 5 (RecQ5)
Pfam-B_20083 (release 10.0). This family represents a conserved region approximately 200 residues long within eukaryotic RecQ helicase protein-like 5 (RecQ5). The RecQ helicases have been implicated in DNA repair and recombination, and RecQ5 may have an important role in DNA metabolism .. +PF06961 Protein of unknown function (DUF1294)
Pfam-B_3405 (release 10.0). This family includes a number of hypothetical bacterial and archaeal proteins of unknown function.. +PF06962 Putative rRNA methylase
Pfam-B_3461 (release 10.0). This family contains a number of putative rRNA methylases. Note that many family members are hypothetical proteins.. +PF06963 Ferroportin1 (FPN1)
Pfam-B_3588 (release 10.0). This family represents a conserved region approximately 100 residues long within eukaryotic Ferroportin1 (FPN1), a protein that may play a role in iron export from the cell . This family may represent a number of transmembrane regions in Ferroportin1.. +PF06964 Alpha-L-arabinofuranosidase C-terminus
Pfam-B_3625 (release 10.0). This family represents the C-terminus (approximately 200 residues) of bacterial and eukaryotic alpha-L-arabinofuranosidase (EC:3.2.1.55). This catalyses the hydrolysis of nonreducing terminal alpha-L-arabinofuranosidic linkages in L-arabinose-containing polysaccharides .. +PF06965 Na+/H+ antiporter 1
Pfam-B_1828 (release 10.0). This family contains a number of bacterial Na+/H+ antiporter 1 proteins. These are integral membrane proteins that catalyse the exchange of H+ for Na+ in a manner that is highly dependent on the pH .. +PF06966 Protein of unknown function (DUF1295)
Pfam-B_3514 (release 10.0). This family contains a number of bacterial and eukaryotic proteins of unknown function that are approximately 300 residues long.. +PF06967 Mo-dependent nitrogenase C-terminus
Pfam-B_6998 (release 10.0). This family represents the C-terminus (approximately 80 residues) of a number of bacterial Mo-dependent nitrogenases. These are involved in nitrogen fixation in cyanobacteria . Note that many family members are hypothetical proteins.. +PF06968 Biotin and Thiamin Synthesis associated domain
Pfam-B_5417 (release 10.0). +PF06969 HemN C-terminal domain
Pfam-B_833 (release 10.0). Members of this family are all oxygen-independent coproporphyrinogen-III oxidases (HemN). This enzyme catalyses the oxygen-independent conversion of coproporphyrinogen-III to protoporphyrinogen-IX , one of the last steps in haem biosynthesis. The function of this domain is unclear, but comparison to other proteins containing a radical SAM domain (Pfam:PF04055) suggest it may be a substrate binding domain.. +PF06970 Replication initiator protein A (RepA) N-terminus
Pfam-B_1808 (release 10.0). This of family of predicted proteins represents the N-terminus (approximately 80 residues) of replication initiator protein A (RepA), a DNA replication initiator in plasmids . Most proteins in this family are bacterial, but archaeal and eukaryotic members are also included .. +PF06971 Put_DNA-bind_C;
Putative DNA-binding protein N-terminus. Pfam-B_3389 (release 10.0). This family represents the N-terminus (approximately 50 residues) of a number of putative bacterial DNA-binding proteins.. +PF06972 Protein of unknown function (DUF1296)
Vella Briffa B, Eberhardt R. Pfam-B_4035 (release 10.0). This family represents a conserved region approximately 60 residues long within a number of plant proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids .. +PF06973 Domain of unknown function (DUF1297)
Pfam-B_3819 (release 10.0). This family represents the C-terminus (approximately 200 residues) of a number of archaeal proteins of unknown function. One member is annotated as being a possible carboligase enzyme.. +PF06974 Protein of unknown function (DUF1298)
Pfam-B_4362 (release 10.0). This family represents the C-terminus (approximately 170 residues) of a number of hypothetical plant proteins of unknown function.. +PF06975 Protein of unknown function (DUF1299)
Pfam-B_3488 (release 10.0). This family represents a conserved region approximately 50 residues long within a number of proteins of unknown function that seem to be specific to Arabidopsis thaliana. Note that many family members contain multiple copies of this region.. +PF06977 SdiA-regulated
Pfam-B_2520 (release 10.0). This family represents a conserved region approximately within a number of hypothetical bacterial proteins that may be regulated by SdiA, a member of the LuxR family of transcriptional regulators . Some family members contain the Pfam:PF01436 repeat.. +PF06978 Ribonucleases P/MRP protein subunit POP1
Pfam-B_7848 (release 10.0). This family represents a conserved region approximately 150 residues long located towards the N-terminus of the POP1 subunit that is common to both the RNase MRP and RNase P ribonucleoproteins (EC:3.1.26.5) . These RNA-containing enzymes generate mature tRNA molecules by cleaving their 5' ends.. +PF06979 Protein of unknown function (DUF1301)
Pfam-B_8295 (release 10.0). This family contains a number of eukaryotic proteins of unknown function that are approximately 160 residues long.. +PF06980 Protein of unknown function (DUF1302)
Pfam-B_7023 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function that are approximately 600 residues long. Most family members seem to be from Pseudomonas.. +PF06983 3-demethylubiquinone-9 3-methyltransferase
Pfam-B_6583 (release 10.0). This family represents a conserved region approximately 100 residues long within a number of bacterial and archaeal 3-demethylubiquinone-9 3-methyltransferases (EC:2.1.1.64). Note that some family members contain more than one copy of this region, and that many members are hypothetical proteins.. +PF06984 Mitochondrial 39-S ribosomal protein L47 (MRP-L47)
Pfam-B_6890 (release 10.0). This family represents the N-terminal region (approximately 8 residues) of the eukaryotic mitochondrial 39-S ribosomal protein L47 (MRP-L47). Mitochondrial ribosomal proteins (MRPs) are the counterparts of the cytoplasmic ribosomal proteins, in that they fulfil similar functions in protein biosynthesis. However, they are distinct in number, features and primary structure .. +PF06985 Heterokaryon incompatibility protein (HET)
Pfam-B_8200 (release 10.0). This family represents a conserved region approximately 150 residues long within various heterokaryon incompatibility proteins that seem to be restricted to ascomycete fungi. Genetic differences in specific het genes prevent a viable heterokaryotic fungal cell from being formed by the fusion of filaments from two different wild-type strains . Many family members also contain the Pfam:PF00400 repeat and the Pfam:PF05729 domain.. +PF06986 Type-1V conjugative transfer system mating pair stabilisation
Pfam-B_8400 (release 10.0). TraN is a large cysteine-rich outer membrane protein involved in the mating-pair stabilisation (adhesin) component of the F-type conjugative plasmid transfer system. TraN is believed to interact with the core type IV secretion system apparatus through the TraV protein .. +PF06988 NifT/FixU protein
Pfam-B_5696 (release 10.0). This family consists of several NifT and FixU bacterial proteins. The function of NifT is unknown although it is thought that the protein may be involved in biosynthesis of the FeMo cofactor of nitrogenase although perturbation of nifT expression in K. pneumoniae has only a limited effect on nitrogen fixation .. +PF06989 BAALC N-terminus
Pfam-B_5793 (release 10.0). This family represents the N-terminal region of the mammalian BAALC proteins.\. BAALC (brain and acute leukaemia, cytoplasmic), that is highly conserved among mammals but evidently absent from lower organisms. Two isoforms are specifically expressed in neuroectoderm-derived tissues, but not in tumours or cancer cell lines of non-neural tissue origin. It has been shown that blasts from a subset of patients with acute leukaemia greatly overexpress eight different BAALC transcripts, resulting in five protein isoforms. Among patients with acute myeloid leukaemia, those overexpressing BAALC show distinctly poor prognosis, pointing to a key role of the BAALC products in leukaemia. It has been suggested that BAALC is a gene implicated in both neuroectodermal and hematopoietic cell functions .. +PF06990 Galactose-3-O-sulfotransferase
Pfam-B_6301 (release 10.0). This family consists of several mammalian galactose-3-O-sulfotransferase proteins. Gal-3-O-sulfotransferase is thought to play a critical role in 3'-sulfation of N-acetyllactosamine in both O- and N-glycans .. +PF06991 MFAP1_C;
Splicing factor, Prp19-binding domain. Pfam-B_8343 (release 10.0). This family represents the C-terminus (approximately 300 residues) of proteins that are involved as binding partners for Prp19 as part of the nuclear pore complex.\. The family in Drosophila is necessary for pre-mRNA splicing, and the human protein has been found in purifications of the spliceosome. In the past this family was thought, erroneously, to be associated with microfibrillin.. +PF06992 Replication protein P
Pfam-B_6611 (release 10.0). This family consists of several Bacteriophage lambda replication protein P like proteins. The bacteriophage lambda P protein promoters replication of the phage chromosome by recruiting a key component of the cellular replication machinery to the viral origin. Specifically, P protein delivers one or more molecules of Escherichia coli DnaB helicase to a nucleoprotein structure formed by the lambda O initiator at the lambda replication origin . . +PF06993 Protein of unknown function (DUF1304)
Pfam-B_7246 (release 10.0). This family consists of several hypothetical bacterial proteins of around 120 residues in length. The function of this family is unknown.. +PF06994 Involucrin
Pfam-B_8443 (release 10.0). This family represents a conserved region approximately 60 residues long, multiple copies of which are found within eukaryotic involucrin, and which is rich in glutamine and glutamic acid residues. Involucrin forms part of the insoluble cornified cell envelope (a specialised protective barrier) of stratified squamous epithelia . Members of this family seem to be restricted to mammals.. +PF06995 Phage P2 GpU
Pfam-B_7670 (release 10.0). This family consists of several bacterial and phage proteins of around 130 residues in length which seem to be related to the bacteriophage P2 GpU protein (Swiss:O64315) which is thought to be involved in tail assembly .. +PF06996 Protein of unknown function (DUF1305)
Pfam-B_9388 (release 10.0). This family consists of several hypothetical bacterial proteins of around 300 residues in length. The function of this family is unknown although one member (Swiss:Q93IT4) from Salmonella enterica is thought to be involved in virulence .. +PF06998 Protein of unknown function (DUF1307)
Pfam-B_10058 (release 10.0). This family consists of several hypothetical bacterial proteins of around 150 residues in length. Some family members are described as putative lipoproteins but the function of the family is unknown.. +PF06999 Sucrase/ferredoxin-like
Pfam-B_8856 (release 10.0). This family contains a number of bacterial and eukaryotic proteins approximately 400 residues long that resemble ferredoxin and appear to have sucrolytic activity .. +PF07000 Protein of unknown function (DUF1308)
Pfam-B_13288 (release 10.0). This family consists of several hypothetical eukaryotic sequences of around 400 residues in length. The function of this family is unknown.. +PF07001 BAT2 N-terminus
Pfam-B_9101 (release 10.0). This family represents the N-terminus (approximately 200 residues) of the proline-rich protein BAT2. BAT2 is similar to other proteins with large proline-rich domains, such as some nuclear proteins, collagens, elastin, and synapsin .. +PF07002 Copine
Pfam-B_9705 (release 10.0). This family represents a conserved region approximately 180 residues long within eukaryotic copines. Copines are Ca(2+)-dependent phospholipid-binding proteins that are thought to be involved in membrane-trafficking, and may also be involved in cell division and growth .. +PF07004 DUF1309;
Sperm-tail PG-rich repeat. Pfam-B_8734 (release 10.0). This family represents a short conserved region carrying a PGP motif that is repeated in eukaryotic proteins of sperm-tails. Shippo orthologues from some species may include up to 40 Pro-Gly-Pro repeats.. +PF07005 Hop; PF07005;
Protein of unknown function, DUF1537. Pfam-B_8609 (release 10.0). This conserved region is found in proteins of unknown function in a range of Proteobacteria as well as the Gram-positive Oceanobacillus iheyensis.. +PF07006 Protein of unknown function (DUF1310)
Pfam-B_10153 (release 10.0). This family consists of several hypothetical proteins of around 125 residues in length. Members of this family seem to be specific to Listeria and Streptococcus species. The function of this family is unknown.. +PF07007 Protein of unknown function (DUF1311)
Pfam-B_10506 (release 10.0). This family consists of several bacterial proteins of around 120 residues in length. Members of this family contain four highly conserved cysteine residues. The function of this family is unknown.. +PF07009 Protein of unknown function (DUF1312)
Pfam-B_10829 (release 10.0). This family consists of several bacterial proteins of around 120 residues in length. The function of this family is unknown.. +PF07010 Endomucin
Pfam-B_10834 (release 10.0). This family consists of several mammalian endomucin proteins. Endomucin is an early endothelial-specific antigen that is also expressed on putative hematopoietic progenitor cells. . +PF07011 Protein of unknown function (DUF1313)
Pfam-B_10989 (release 10.0). This family consists of several hypothetical plant proteins of around 100 residues in length. The function of this family is unknown.. +PF07012 Curlin associated repeat
Pfam-B_10299 (release 10.0). This family consists of several bacterial repeats of around 30 residues in length. These repeats are often found in multiple copies in the curlin proteins CsgA and CsgB. Curli fibres are thin aggregative surface fibres, connected with adhesion, which bind laminin, fibronectin, plasminogen, human contact phase proteins, and major histocompatibility complex (MHC) class I molecules. Curli fibres are coded for by the csg gene cluster, which is comprised of two divergently transcribed operons. One operon encodes the csgB, csgA, and csgC genes, while the other encodes csgD, csgE, csgF, and csgG. The assembly of the fibres is unique and involves extracellular self-assembly of the curlin subunit (CsgA), dependent on a specific nucleator protein (CsgB). CsgD is a transcriptional activator essential for expression of the two curli fibre operons, and CsgG is an outer membrane lipoprotein involved in extracellular stabilisation of CsgA and CsgB . . +PF07013 Protein of unknown function (DUF1314)
Pfam-B_10999 (release 10.0). This family consists of several Alphaherpesvirus proteins of around 200 residues in length. The function of this family is unknown.. +PF07014 Hs1pro-1;
Hs1pro-1 protein C-terminus. Moxon SJ, Vella Briffa B. Pfam-B_11205 (release 10.0). This family represents the C-terminus (approximately 270 residues) of a number of plant Hs1pro-1 proteins, which are believed to confer nematode resistance .. +PF07015 VirC1 protein
Pfam-B_11309 (release 10.0). This family consists of several bacterial VirC1 proteins. In Agrobacterium tumefaciens, a cis-active 24-base-pair sequence adjacent to the right border of the T-DNA, called overdrive, stimulates tumour formation by increasing the level of T-DNA processing. It is thought that the virC operon which enhances T-DNA processing probably does so because the VirC1 protein interacts with overdrive. It has now been shown that the virC1 gene product binds to overdrive but not to the right border of T-DNA . . +PF07016 Cysteine-rich acidic integral membrane protein precursor
Pfam-B_11042 (release 10.0). This family consists of several 24 residue repeats from the Trypanosoma brucei cysteine-rich, acidic integral membrane protein precursor (CRAM). CRAM is concentrated in the flagellar pocket, an invagination of the cell surface of the trypanosome where endocytosis has been documented .. +PF07017 Antimicrobial peptide resistance and lipid A acylation protein PagP
Pfam-B_11014 (release 10.0). This family consists of several bacterial antimicrobial peptide resistance and lipid A acylation (PagP) proteins. The bacterial outer membrane enzyme PagP transfers a palmitate chain from a phospholipid to lipid A. In a number of pathogenic Gram-negative bacteria, PagP confers resistance to certain cationic antimicrobial peptides produced during the host innate immune response. . +PF07019 Rab5-interacting protein (Rab5ip)
Pfam-B_11031 (release 10.0). This family consists of several Rab5-interacting protein (RIP5 or Rab5ip ) sequences. The ras-related GTPase rab5 is rate-limiting for homotypic early endosome fusion. Rab5ip represents a novel rab5 interacting protein that may function on endocytic vesicles as a receptor for rab5-GDP and participate in the activation of rab5 .. +PF07020 Orthopoxvirus C10L protein
Pfam-B_12732 (release 10.0). This family consists of several Orthopoxvirus C10L proteins. C10L viral protein can play an important role in vaccinia virus evasion of the host immune system. It may consist in the blockade of IL-1 receptors by the C10L protein, a homologue of the IL-1 Ra . . +PF07021 Methionine biosynthesis protein MetW
Pfam-B_11086 (release 10.0). This family consists of several bacterial and one archaeal methionine biosynthesis MetW proteins. Biosynthesis of methionine from homoserine in Pseudomonas putida takes place in three steps. The first step is the acylation of homoserine to yield an acyl-L-homoserine. This reaction is catalysed by the products of the metXW genes and is equivalent to the first step in enterobacteria, gram-positive bacteria and fungi, except that in these microorganisms the reaction is catalysed by a single polypeptide (the product of the metA gene in Escherichia coli and the met5 gene product in Neurospora crassa). In Pseudomonas putida, as in gram-positive bacteria and certain fungi, the second and third steps are a direct sulfhydrylation that converts the O-acyl-L-homoserine into homocysteine and further methylation to yield methionine. The latter reaction can be mediated by either of the two methionine synthetases present in the cells . . +PF07022 Bacteriophage CI repressor helix-turn-helix domain
Pfam-B_11145 (release 10.0). This family consists of several phage CI repressor proteins and related bacterial sequences. The CI repressor is known to function as a transcriptional switch, determining whether transcription is lytic or lysogenic .. +PF07023 Protein of unknown function (DUF1315)
Pfam-B_11170 (release 10.0). This family consists of several bacterial proteins of around 90 residues in length. The function of this family is unknown.. +PF07024 ImpE protein
Pfam-B_11208 (release 10.0). This family consists of several bacterial proteins including ImpE (Swiss:Q93EC9) from Rhizobium leguminosarum. It has been suggested that the imp locus is involved in the secretion to the environment of proteins, including periplasmic RbsB protein, that cause blocking of infection specifically in pea plants . The exact function of this family is unknown.. +PF07026 Protein of unknown function (DUF1317)
Pfam-B_12646 (release 10.0). This family consists of several hypothetical bacterial and phage proteins of around 60 residues in length. The function of this family is unknown.. +PF07027 Protein of unknown function (DUF1318)
Pfam-B_11321 (release 10.0). This family consists of several bacterial proteins of around 100 residues in length and is often known as YdbL. The function of this family is unknown.. +PF07028 Protein of unknown function (DUF1319)
Pfam-B_10434 (release 10.0). This family contains a number of viral proteins of unknown function approximately 200 residues long. Family members seem to be restricted to badnaviruses.. +PF07029 CryBP1 protein
Pfam-B_11415 (release 10.0). This family consists of several CryBP1 like proteins from Bacillus thuringiensis and Paenibacillus popilliae. Members of this family are thought to be involved in the overall toxicity of the bacteria to their hosts [1,2].. +PF07030 Protein of unknown function (DUF1320)
Pfam-B_13638 (release 10.0). This family consists of both hypothetical bacterial and phage proteins of around 145 residues in length. The function of this family is unknown.. +PF07032 Protein of unknown function (DUF1322)
Pfam-B_13233 (release 10.0). This family consists of several hypothetical 9.4 kDa Borrelia burgdorferi (Lyme disease spirochete) proteins of around 78 residues in length. The function of this family is unknown.. +PF07033 Orthopoxvirus B11R protein
Pfam-B_13245 (release 10.0). This family consists of several Orthopoxvirus B11R proteins of around 70 residues in length. The function of this family is unknown.. +PF07034 Origin recognition complex (ORC) subunit 3 N-terminus
Pfam-B_10452 (release 10.0). This family represents the N-terminus (approximately 300 residues) of subunit 3 of the eukaryotic origin recognition complex (ORC). Origin recognition complex (ORC) is composed of six subunits that are essential for cell viability. They collectively bind to the autonomously replicating sequence (ARS) in a sequence-specific manner and lead to the chromatin loading of other replication factors that are essential for initiation of DNA replication .. +PF07035 Colon cancer-associated protein Mic1-like
Pfam-B_10041 (release 10.0). This family represents the C-terminus (approximately 160 residues) of a number of proteins that resemble colon cancer-associated protein Mic1.. +PF07037 Putative transcription regulator (DUF1323)
Pfam-B_12705 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 120 residues in length. This family appears to have an HTH domain and is therefore likely to act as a transcriptional regulator.. +PF07038 Protein of unknown function (DUF1324)
Pfam-B_12735 (release 10.0). This family consists of several Circovirus proteins of around 60 residues in length. The function of this family is unknown.. +PF07039 SGF29 tudor-like domain
Pfam-B_13304 (release 10.0). This domain is found in the yeast protein SAGA-associated factor 29. This domain is related to members of the Tudor domain superfamily such as Pfam:PF05641. The SAGA complex is involved in RNA polymerase II-dependent transcriptional regulation. The membership of the tudor domain superfamily suggests this domain may bind to RNA.. +PF07040 Protein of unknown function (DUF1326)
Pfam-B_11597 (release 10.0). This family consists of several hypothetical bacterial proteins which seem to be found exclusively in Rhizobium and Ralstonia species. Members of this family are typically around 210 residues in length and contain 5 highly conserved cysteine residues at their N-terminus. The function of this family is unknown.. +PF07041 Protein of unknown function (DUF1327)
Pfam-B_11630 (release 10.0). This family consists of several hypothetical bacterial proteins of around 115 residues in length which seem to be specific to Escherichia coli. The function of this family is unknown.. +PF07042 TrfA protein
Pfam-B_12321 (release 10.0). This family consists of several bacterial TrfA proteins. The trfA operon of broad-host-range IncP plasmids is essential to activate the origin of vegetative replication in diverse species. The trfA operon encodes two ORFs. The first ORF is highly conserved and encodes a putative single-stranded DNA binding protein (Ssb). The second, trfA, contains two translational starts as in the IncP alpha plasmids, generating related polypeptides of 406 (TrfA1) and 282 (TrfA2) amino acids. TrfA2 is very similar to the IncP alpha product, whereas the N-terminal region of TrfA1 shows very little similarity to the equivalent region of IncP alpha TrfA1. This region has been implicated in the ability of IncP alpha plasmids to replicate efficiently in Pseudomonas aeruginosa .. +PF07043 Protein of unknown function (DUF1328)
Pfam-B_12535 (release 10.0). This family consists of several hypothetical bacterial proteins of around 50 residues in length. The function of this family is unknown.. +PF07044 Protein of unknown function (DUF1329)
Pfam-B_12608 (release 10.0). This family consists of several hypothetical bacterial proteins of around 475 residues in length. The majority of family members are from Pseudomonas species but the family also contains sequences from Shewanella oneidensis and Thauera aromatica.. +PF07045 Protein of unknown function (DUF1330)
Pfam-B_12466 (release 10.0). This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown.. +PF07046 Cytoplasmic repetitive antigen (CRA) like repeat
Pfam-B_12798 (release 10.0). This family consists of several repeats of around 42 residues in length. These repeated sequences are found in multiple copies in Trypanosoma cruzi antigens, Swiss:Q26907 contains 23 copies of this repeat.. +PF07047 Optic atrophy 3 protein (OPA3)
Pfam-B_12863 (release 10.0). This family consists of several optic atrophy 3 (OPA3) proteins. OPA3 deficiency causes type III 3-methylglutaconic aciduria (MGA) in humans. This disease manifests with early bilateral optic atrophy, spasticity, extrapyramidal dysfunction, ataxia, and cognitive deficits, but normal longevity . . +PF07048 Protein of unknown function (DUF1331)
Pfam-B_12865 (release 10.0). This family consists of several Circovirus proteins of around 35 residues in length. Members of this family are described as ORF-10 proteins and their function is unknown.. +PF07051 Ovarian carcinoma immunoreactive antigen (OCIA)
Pfam-B_13720 (release 10.0). This family consists of several ovarian carcinoma immunoreactive antigen (OCIA) and related eukaryotic sequences. The function of this family is unknown [1,2].. +PF07052 Hepatocellular carcinoma-associated antigen 59
Pfam-B_10544 (release 10.0). This family represents a conserved region approximately 100 residues long within mammalian hepatocellular carcinoma-associated antigen 59 and similar proteins. Family members are found in a variety of eukaryotes, mainly as hypothetical proteins.. +PF07054 Pericardin like repeat
Pfam-B_13724 (release 10.0). This family consists of several repeated sequences of around 34 residues in length. This repeat is found in multiple copies in the Drosophila pericardin and other extracellular matrix proteins.. +PF07055 scADH;
Enoyl reductase FAD binding domain. Vella Briffa B, Coggill P. Pfam-B_10602 (release 10.0). This family carries the region of the enzyme trans-2-enoyl-CoA reductase, at the very C-terminus, that binds to FAD. The activity was characterised in Euglena where an unusual fatty acid synthesis path-way in mitochondria performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. The full enzyme catalyses the reduction of enoyl-CoA to acyl-CoA. The conserved region is seen as the motif FGFxxxxxDY .. +PF07056 Protein of unknown function (DUF1335)
Pfam-B_10636 (release 10.0). This family represents a conserved region approximately 130 residues long within a number of proteins of unknown function that seem to be specific to the white spot syndrome virus (WSSV).. +PF07057 DNA helicase TraI
Pfam-B_10638 (release 10.0). This family represents a conserved region approximately 130 residues long within the bacterial DNA helicase TraI (EC:3.6.1.-). TraI is a bifunctional protein that catalyses the unwinding of duplex DNA as well as acts as a sequence-specific DNA trans-esterase, providing the site- and strand-specific nick required to initiate DNA transfer .. +PF07058 Myosin II heavy chain-like
Pfam-B_10658 (release 10.0). This family represents a conserved region within a number of myosin II heavy chain-like proteins that seem to be specific to Arabidopsis thaliana.. +PF07059 Protein of unknown function (DUF1336)
Pfam-B_10173 (release 10.0). This family represents the C-terminus (approximately 250 residues) of a number of hypothetical plant proteins of unknown function.. +PF07061 DUF1337;
Swi5 is involved in meiotic DNA repair synthesis and meiotic joint molecule formation . It is known to interact with Swi2, Rhp51 and Swi6 .. +PF07062 Clc-like
Pfam-B_11218 (release 10.0). This family contains a number of Clc-like proteins that are approximately 250 residues long.. +PF07063 Domain of unknown function (DUF1338)
Pfam-B_10864 (release 10.0). This domain is found in a variety of bacterial and fungal hypothetical proteins of unknown function. The structure of this domain has been solved by structural genomics. The structure implies a zinc-binding function, so it is a putative metal hydrolase (information derived from TOPSAN for PDB:3iuz).. +PF07064 DUF1339;
Vella Briffa B, Wood V. Pfam-B_11581 (release 10.0). RIC1 has been identified in yeast as a Golgi protein involved in retrograde transport to the cis-Golgi network. It forms a heterodimer with Rgp1 and functions as a guanyl-nucleotide exchange factor .. +PF07065 D123
Pfam-B_10915 (release 10.0). This family contains a number of eukaryotic D123 proteins approximately 330 residues long. It has been shown that mutated variants of D123 exhibit temperature-dependent differences in their degradation rate . D123 proteins are regulators of eIF2, the central regulator of translational initiation .. +PF07066 Phage_Lacto_M3;
Lactococcus phage M3 protein. Pfam-B_13997 (release 10.0). This family consists of several Lactococcus phage middle-3 (M3) proteins of around 160 residues in length. The function of this family is unknown.. +PF07067 Protein of unknown function (DUF1340)
Pfam-B_14083 (release 10.0). This family consists of several hypothetical Streptococcus thermophilus bacteriophage proteins of around 235 residues in length. The function of this family is unknown.. +PF07068 Major capsid protein Gp23
Pfam-B_12245 (release 10.0). This family contains a number of major capsid Gp23 proteins approximately 500 residues long, from T4-like bacteriophages.. +PF07069 Porcine reproductive and respiratory syndrome virus 2b
Pfam-B_13261 (release 10.0). This family consists of several Porcine reproductive and respiratory syndrome virus (PRRSV) ORF2b proteins. The function of this family is unknown however it is known that large amounts of 2b protein are present in the virion and it is thought that this protein may be an integral component of the virion .. +PF07070 SpoOM protein
Pfam-B_13263 (release 10.0). This family consists of several bacterial SpoOM proteins which are thought to control sporulation in Bacillus subtilis.Spo0M exerts certain negative effects on sporulation and its gene expression is controlled by sigmaH .. +PF07071 Protein of unknown function (DUF1341)
Pfam-B_14024 (release 10.0). This family consists of several hypothetical bacterial proteins of around 220 residues in length. The function of this family is unknown.. +PF07072 Protein of unknown function (DUF1342)
Pfam-B_14075 (release 10.0). This family consists of several hypothetical bacterial proteins of around 250 residues in length. Members of this family are often known as YacF after the Escherichia coli protein Swiss:P36680. The function of this family is unknown.. +PF07073 Modulator of Rho-dependent transcription termination (ROF)
Pfam-B_13280 (release 10.0). This family consists of several bacterial modulator of Rho-dependent transcription termination (ROF) proteins. ROF binds transcription termination factor Rho and inhibits Rho-dependent termination in vivo . . +PF07074 Translocon-associated protein, gamma subunit (TRAP-gamma)
Pfam-B_13437 (release 10.0). This family consists of several eukaryotic translocon-associated protein, gamma subunit (TRAP-gamma) sequences. The translocation site (translocon), at which nascent polypeptides pass through the endoplasmic reticulum membrane, contains a component previously called 'signal sequence receptor' that is now renamed as 'translocon-associated protein' (TRAP). The TRAP complex is comprised of four membrane proteins alpha, beta, gamma and delta which are present in a stoichiometric relation, and are genuine neighbours in intact microsomes. The gamma subunit is predicted to span the membrane four times . . +PF07075 Protein of unknown function (DUF1343)
Pfam-B_13635 (release 10.0). This family consists of several hypothetical bacterial proteins of around 400 residues in length. The function of this family is unknown.. +PF07076 Protein of unknown function (DUF1344)
Pfam-B_13761 (release 10.0). This family consists of several short, hypothetical bacterial proteins of around 80 residues in length. Members of this family are found in Rhizobium, Agrobacterium and Brucella species. The function of this family is unknown.. +PF07077 Protein of unknown function (DUF1345)
Pfam-B_13768 (release 10.0). This family consists of several hypothetical bacterial proteins of around 230 residues in length. The function of this family is unknown.. +PF07078 DUF1346;
Forty-two-three protein. Pfam-B_13991 (release 10.0). This family consists of several mammalian proteins of around 320 residues in length called 40-2-3 proteins. The function of this family is unknown.. +PF07079 Protein of unknown function (DUF1347)
Pfam-B_14317 (release 10.0). This family consists of several hypothetical bacterial proteins of around 610 residues in length. Members of this family are highly conserved and seem to be specific to Chlamydia species. The function of this family is unknown.. +PF07080 Protein of unknown function (DUF1348)
Pfam-B_14137 (release 10.0). This family consists of several highly conserved hypothetical proteins of around 150 residues in length. The function of this family is unknown.. +PF07081 Protein of unknown function (DUF1349)
Pfam-B_14150 (release 10.0). This family consists of several hypothetical bacterial proteins but contains one sequence (Swiss:P40893) from Saccharomyces cerevisiae. Members of this family are typically around 200 residues in length. The function of this family is unknown.. +PF07082 Protein of unknown function (DUF1350)
Pfam-B_14167 (release 10.0). This family consists of several hypothetical proteins from both cyanobacteria and plants. Members of this family are typically around 250 residues in length. The function of this family is unknown but the species distribution indicates that the family may be involved in photosynthesis.. +PF07083 Protein of unknown function (DUF1351)
Pfam-B_14178 (release 10.0). This family consists of several bacterial and phage proteins of around 230 residues in length. The function of this family is unknown.. +PF07084 Thyroid hormone-inducible hepatic protein Spot 14
Pfam-B_14186 (release 10.0). This family consists of several thyroid hormone-inducible hepatic protein (Spot 14 or S14) sequences. Mainly expressed in tissues that synthesise triglycerides, the mRNA coding for Spot 14 has been shown to be increased in rat liver by insulin, dietary carbohydrates, glucose in hepatocyte culture medium, as well as thyroid hormone. In contrast, dietary fats and polyunsaturated fatty acids, have been shown to decrease the amount of Spot 14 mRNA, while an elevated level of cAMP acts as a dominant negative factor. In addition, liver-specific factors or chromatin organisation of the gene have been shown to contribute to the regulation of its expression . Spot 14 protein is thought to be required for induction of hepatic lipogenesis .. +PF07085 DRTGG domain
+PF07086 Protein of unknown function (DUF1352)
Pfam-B_14369 (release 10.0). This family consists of several hypothetical eukaryotic proteins of around 190 residues in length. The function of this family is unknown.. +PF07087 Protein of unknown function (DUF1353)
Pfam-B_14433 (release 10.0). This family consists of several hypothetical bacterial proteins of around 100 residues in length. The function of this family is unknown.. +PF07088 GvpD gas vesicle protein
Pfam-B_14302 (release 10.0). This family consists of several archaeal GvpD gas vesicle proteins. GvpD is thought to be involved in the regulation of gas vesicle formation [1,2].. +PF07090 Protein of unknown function (DUF1355)
Pfam-B_14563 (release 10.0). This family consists of several hypothetical bacterial proteins of around 250 residues in length. The function of this family is unknown. THe structure of this domain was solved by the Midwest Center for Structural Genomics (MCSG). The structure has been classified as part of the Class-I Glutamine amidotransferase superfamily.. +PF07091 Ribosomal RNA methyltransferase (FmrO)
Pfam-B_14605 (release 10.0). This family consists of several bacterial ribosomal RNA methyltransferase (aminoglycoside-resistance methyltransferase) proteins [1,2].. +PF07092 Protein of unknown function (DUF1356)
Pfam-B_14617 (release 10.0). This family consists of several hypothetical mammalian proteins of around 250 residues in length. The function of this family is unknown.. +PF07093 SGT1 protein
Pfam-B_14698 (release 10.0). This family consists of several eukaryotic SGT1 proteins. Human SGT1 or hSGT1 is known to suppress GCR2 and is highly expressed in the muscle and heart. The function of this family is unknown although it has been speculated that SGT1 may be functionally analogous to the Gcr2p protein of Saccharomyces cerevisiae which is known to be a regulatory factor of glycolytic gene expression .. +PF07094 Protein of unknown function (DUF1357)
Pfam-B_14833 (release 10.0). This family consists of several hypothetical bacterial proteins of around 225 residues in length. Members of this family appear to be specific Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown.. +PF07095 Intracellular growth attenuator protein IgaA
Pfam-B_14923 (release 10.0). This family consists of several bacterial intracellular growth attenuator (IgaA) proteins. IgaA is involved in negative control of bacterial proliferation within fibroblasts. IgaA is homologous to the E. coli YrfF and P. mirabilis UmoB proteins. Whereas the biological function of YrfF is currently unknown, UmoB has been shown elsewhere to act as a positive regulator of FlhDC, the master regulator of flagella and swarming. FlhDC has been shown to repress cell division during P. mirabilis swarming, suggesting that UmoB could repress cell division via FlhDC. This biological function, if maintained in S. enterica, could sustain a putative negative control of cell division and growth exerted by IgaA in intracellular bacteria . . +PF07096 Protein of unknown function (DUF1358)
Pfam-B_14731 (release 10.0). This family consists of several hypothetical eukaryotic proteins of around 125 residues in length. The function of this family is unknown.. +PF07097 Protein of unknown function (DUF1359)
Pfam-B_14784 (release 10.0). This family consists of several hypothetical bacterial and phage proteins of around 100 residues in length. Members of this family seem to be found exclusively in Lactococcus lactis and the bacteriophages that infect this species. The function of this family is unknown.. +PF07098 Protein of unknown function (DUF1360)
Pfam-B_14863 (release 10.0). This family consists of several bacterial proteins of around 115 residues in length. Members of this family are found in Bacillus species and Streptomyces coelicolor, the function of the family is unknown.. +PF07099 Protein of unknown function (DUF1361)
Pfam-B_14870 (release 10.0). This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown although some members are annotated as being putative integral membrane proteins.. +PF07100 DUF1362;
Anabaena sensory rhodopsin transducer. Pfam-B_14972 (release 10.0). The family of bacterial Anabaena sensory rhodopsin transducers are likely to bind sugars or related metabolites. The entire protein is comprised of a single globular domain with an eight-stranded beta-sandwich fold. There are a few characteristics which define this beta-sandwich fold as being distinct from other so-named folds, and these are: 1) a well conserved tryptophan, usually following a polar residue, present at the start of the first strand; this tryptophan appears to be central to a hydrophobic interaction required to hold the two beta-sheets of the sandwich together, and 2) a nearly absolutely conserved asparagine located at the end of the second beta-strand, that hydrogen bonds with the backbone carbonyls of the residues 2 and 4 positions downstream from it, thereby stabilising the characteristic tight turn between strands 2 and 3 of the structure.. +PF07101 Protein of unknown function (DUF1363)
Pfam-B_14992 (release 10.0). This family consists of several Trypanosoma brucei putative variant specific antigen proteins of around 80 residues in length.. +PF07102 Protein of unknown function (DUF1364)
Pfam-B_14821 (release 10.0). This family consists of several bacterial and phage proteins of around 95 residues in length. The function of this family is unknown. . +PF07103 Protein of unknown function (DUF1365)
Pfam-B_14846 (release 10.0). This family consists of several bacterial and plant proteins of around 250 residues in length. The function of this family is unknown.. +PF07104 Protein of unknown function (DUF1366)
Pfam-B_14849 (release 10.0). This family consists of several hypothetical Streptococcus thermophilus bacteriophage proteins of around 130 residues in length. One of the sequences in this family, from phage Sfi11 (Swiss:O80186) is known as Gp149. The function of this family is unknown. . +PF07105 Protein of unknown function (DUF1367)
Pfam-B_14892 (release 10.0). This family consists of several highly conserved, hypothetical phage proteins of around 200 residues in length. The function of this family is unknown. Some proteins are annotated as IrsA (intracellular response to stress).. +PF07106 Tat binding protein 1(TBP-1)-interacting protein (TBPIP)
Pfam-B_14830 (release 10.0). This family consists of several eukaryotic TBP-1 interacting protein (TBPIP) sequences. TBP-1 has been demonstrated to interact with the human immunodeficiency virus type 1 (HIV-1) viral protein Tat, then modulate the essential replication process of HIV. In addition, TBP-1 has been shown to be a component of the 26S proteasome, a basic multiprotein complex that degrades ubiquitinated proteins in an ATP-dependent fashion. Human TBPIP interacts with human TBP-1 then modulates the inhibitory action of human TBP-1 on HIV-Tat-mediated transactivation .. +PF07107 Wound-induced protein WI12
Pfam-B_15477 (release 10.0). This family consists of several plant wound-induced protein sequences related to WI12 from Mesembryanthemum crystallinum (Swiss:Q9XES3). Wounding, methyl jasmonate, and pathogen infection is known to induce local WI12 expression. WI12 expression is also thought to be developmentally controlled in the placenta and developing seeds. WI12 preferentially accumulates in the cell wall and it has been suggested that it plays a role in the reinforcement of cell wall composition after wounding and during plant development . This family seems partly related to the NTF2-like superfamily.. +PF07108 PipA protein
Pfam-B_15507 (release 10.0). This family consists of several Salmonella PipA (pathogenicity island-encoded protein A) and related phage sequences. PipA is thought to contribute to enteric but not to systemic salmonellosis . The family carries a highly conserved HEXXH sequence motif along with several highly conserved glutamic acid residues which might be indicative of the family being a metallo-peptidase.. +PF07109 Magnesium-protoporphyrin IX methyltransferase C-terminus
Pfam-B_12015 (release 10.0). This family represents the C-terminus (approximately 100 residues) of bacterial and eukaryotic Magnesium-protoporphyrin IX methyltransferase (EC:2.1.1.11). This converts magnesium-protoporphyrin IX to magnesium-protoporphyrin IX methylester using S-adenosyl-L-methionine as a cofactor .. +PF07110 EthD domain
Pfam-B_15539 (release 10.0). This family consists of several bacterial sequences which are related to the EthD protein of Rhodococcus ruber (Swiss:Q93EX2). In Rhodococcus ruber, EthD is thought to be involved in the degradation of ethyl tert-butyl ether (ETBE). EthD synthesis is induced by ETBE but it's exact function is unknown, it is however thought to be essential to the ETBE degradation system.. +PF07111 Alpha helical coiled-coil rod protein (HCR)
Pfam-B_15548 (release 10.0). This family consists of several mammalian alpha helical coiled-coil rod HCR proteins. The function of HCR is unknown but it has been implicated in psoriasis in humans and is thought to affect keratinocyte proliferation .. +PF07112 Protein of unknown function (DUF1368)
Pfam-B_14994 (release 10.0). This family consists of several proteins with seem to be specific to red algae plasmids. Members of this family are typically around 415 residues in length. The function of this family is unknown.. +PF07114 Protein of unknown function (DUF1370)
Pfam-B_15274 (release 10.0). This family consists of several hypothetical eukaryotic proteins of around 200 residues in length. Members of this family seem to be specific to mammals and their function is unknown.. +PF07115 Protein of unknown function (DUF1371)
Pfam-B_15275 (release 10.0). This family consists of several hypothetical bacterial proteins of around 110 residues in length. The function of this family is unknown but members seem to be specific to Borrelia burgdorferi (Lyme disease spirochete).. +PF07116 Protein of unknown function (DUF1372)
Pfam-B_15278 (release 10.0). This family consists of several Streptococcus bacteriophage sequences and related proteins from Streptococcus species. Members of this family are typically around 100 residues in length and their function is unknown.. +PF07117 Protein of unknown function (DUF1373)
Pfam-B_15084 (release 10.0). This family consists of several hypothetical proteins which seem to be specific to Oryzias latipes (Japanese ricefish). Members of this family are typically around 200 residues in length. The function of this family is unknown.. +PF07118 Protein of unknown function (DUF1374)
Pfam-B_15191 (release 10.0). This family consists of several hypothetical Sulfolobus virus proteins of around 100 residues in length. The function of this family is unknown.. +PF07119 Protein of unknown function (DUF1375)
Pfam-B_15247 (release 10.0). This family consists of several hypothetical, putative lipoproteins of around 80 residues in length. Members of this family seem to be specific to the Class Gammaproteobacteria. The function of this family is unknown.. +PF07120 Protein of unknown function (DUF1376)
Pfam-B_15380 (release 10.0). This family consists of several hypothetical bacterial proteins of around 95 residues in length. The function of this family is unknown.. +PF07122 Variable length PCR target protein (VLPT)
Pfam-B_15500 (release 10.0). This family consists of a number of 29 residue repeats which seem to be specific to the Ehrlichia chaffeensis variable length PCR target (VLPT) protein. Ehrlichia chaffeensis is a tick-transmitted rickettsial agent and is responsible for human monocytic ehrlichiosis (HME). The function of this family is unknown . . +PF07123 PsbW_2;
Photosystem II reaction centre W protein (PsbW). Pfam-B_15117 (release 10.0). This family consists of several plant specific photosystem II reaction centre W (PsbW) proteins. PsbW is a nuclear-encoded protein located in the thylakoid membrane of the chloroplast. PsbW is a core component of photosystem II but not photosystem I . This family does not appear to be related to Pfam:PF03912.. +PF07124 Phytoreovirus outer capsid protein P8
Pfam-B_15606 (release 10.0). This family consists of several Phytoreovirus outer capsid protein P8 sequences .. +PF07125 Protein of unknown function (DUF1378)
Pfam-B_15650 (release 10.0). This family consists of hypothetical bacterial and phage proteins of around 59 residues in length. Bacterial members of this family seem to be specific to Enterobacteria. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids .. +PF07126 Protein of unknown function (DUF1379)
Pfam-B_15837 (release 10.0). This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown.. +PF07127 Late nodulin protein
Pfam-B_15657 (release 10.0). This family consists of several plant specific late nodulin sequences which are homologous to the Pisum sativum (Garden pea) ENOD3 protein. ENOD3 is expressed in the late stages of root nodule formation and contains two pairs of cysteine residues towards the C-terminus which may be involved in metal-binding .. +PF07128 Protein of unknown function (DUF1380)
Pfam-B_15699 (release 10.0). This family consists of several hypothetical bacterial proteins of around 140 residues in length. Members of this family seem to be specific to Enterobacteria. The function of this family is unknown.. +PF07129 Protein of unknown function (DUF1381)
Pfam-B_15743 (release 10.0). This family consists of several hypothetical Staphylococcus aureus and Staphylococcus aureus bacteriophage proteins of around 65 residues in length. The function of this family is unknown.. +PF07130 YebG protein
Pfam-B_15760 (release 10.0). This family consists of several bacterial YebG proteins of around 75 residues in length. The exact function of this protein is unknown but it is thought to be involved in the SOS response. The induction of the yebG gene occurs as cell enter into the stationary growth phase and is dependent on is dependent on cyclic AMP and H-NS . . +PF07131 Protein of unknown function (DUF1382)
Moxon SJ, Eberhardt R. Pfam-B_15770 (release 10.0). This family consists of several hypothetical Escherichia coli and bacteriophage lambda-like proteins of around 60 residues in length. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids .. +PF07133 Merozoite surface protein (SPAM)
Pfam-B_15860 (release 10.0). This family consists of several Plasmodium falciparum SPAM (secreted polymorphic antigen associated with merozoites) proteins. Variation among SPAM alleles is the result of deletions and amino acid substitutions in non-repetitive sequences within and flanking the alanine heptad-repeat domain. Heptad repeats in which the a and d position contain hydrophobic residues generate amphipathic alpha-helices which give rise to helical bundles or coiled-coil structures in proteins. SPAM is an example of a P. falciparum antigen in which a repetitive sequence has features characteristic of a well-defined structural element [1,2]. . +PF07134 Protein of unknown function (DUF1383)
Pfam-B_15868 (release 10.0). This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 375 residues in length. The function of this family is unknown.. +PF07136 Protein of unknown function (DUF1385)
Pfam-B_12671 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function approximately 300 residues in length. Some family members are predicted to be metal-dependent.. +PF07137 Violaxanthin de-epoxidase (VDE)
Pfam-B_12679 (release 10.0). This family represents a conserved region approximately 150 residues long within plant violaxanthin de-epoxidase (VDE). In higher plants, violaxanthin de-epoxidase forms part of a conserved system that dissipates excess energy as heat in the light-harvesting complexes of photosystem II (PSII), thus protecting them from photo-inhibitory damage .. +PF07138 Protein of unknown function (DUF1386)
Pfam-B_16196 (release 10.0). This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 350 residues in length. The function of this family is unknown.. +PF07139 Protein of unknown function (DUF1387)
Pfam-B_10471 (release 10.0). This family represents a conserved region approximately 300 residues long within a number of hypothetical proteins of unknown function that seem to be restricted to mammals.. +PF07140 Interferon gamma receptor (IFNGR1)
Pfam-B_15930 (release 10.0). This family consists of several eukaryotic and viral interferon gamma receptor proteins. Molecular interactions among cytokines and cytokine receptors in eukaryotes form the basis of many cell-signaling pathways relevant to immune function. Human interferon-gamma (IFN-gamma) signals through a multimeric receptor complex consisting of two different but structurally related transmembrane chains: the high-affinity receptor-binding subunit (IFN-gammaRalpha) and a species specific accessory factor (AF-1 or IFN-gammaRbeta). The vaccinia viral interferon gamma receptor has been shown to be secreted from infected cells during early infection . The structure has been halved such that the N-terminus of this family is now represented by Tissue_fac Pfam:PF01108.. +PF07141 Putative bacteriophage terminase small subunit
Pfam-B_15957 (release 10.0). This family consists of several putative Lactococcus bacteriophage terminase small subunit proteins. The exact function of this family is unknown.. +PF07142 Repeat of unknown function (DUF1388)
Pfam-B_16000 (release 10.0). This family consists of several repeats of around 29 residues in length. Members of this family are found in the variable surface lipoproteins in Mycoplasma bovis and in mammalian neurofilament triplet H (NefH or NF-H) proteins. This repeat contains several Lys-Ser-Pro (KSP) motifs and in NefH these are thought to function as the main target for neurofilament directed protein kinases in vivo .. +PF07143 Hydroxyneurosporene synthase (CrtC)
Pfam-B_16004 (release 10.0). This family consists of several purple photosynthetic bacterial hydroxyneurosporene synthase (CrtC) proteins. The enzyme catalyses the conversion of various acyclic carotenes including 1-hydroxy derivatives. This broad substrate specificity reflects the participation of CrtC in 1'-HO-spheroidene and in spirilloxanthin biosynthesis .\. This family also contains the members of the old Pfam family DUF2006. Structural characterisation of DUF2006 family member Swiss:Q82US3 has revealed a lipocalin-like fold with domain duplication.. +PF07145 Ataxin-2; Ataxin-2_C;
Ataxin-2 C-terminal region. The PABP-interacting motif PAM2 has been identified in various eukaryotic proteins as an important binding site for Pfam:PF00658. It has been found in a wide range of eukaryotic proteins . Strikingly, this motif appears to occur solely outside of globular domains .. +PF07146 Protein of unknown function (DUF1389)
Pfam-B_16027 (release 10.0). This family consists of several hypothetical bacterial proteins which seem to be specific to Chlamydia pneumoniae. Members of this family are typically around 400 residues in length. The function of this family is unknown.. +PF07147 Mitochondrial 28S ribosomal protein S30 (PDCD9)
Pfam-B_16045 (release 10.0). This family consists of several eukaryotic mitochondrial 28S ribosomal protein S30 (or programmed cell death protein 9 PDCD9) sequences. The exact function of this family is unknown although it is known to be a component of the mitochondrial ribosome and a component in cellular apoptotic signaling pathways .. +PF07148 Maltose operon periplasmic protein precursor (MalM)
Pfam-B_16111 (release 10.0). This family consists of several maltose operon periplasmic protein precursor (MalM) sequences. The function of this family is unknown .. +PF07149 Pes-10
Pfam-B_16134 (release 10.0). This family consists of several Caenorhabditis elegans pes-10 and related proteins. Members of this family are typically around 400 residues in length. The function of this family is unknown.. +PF07150 Protein of unknown function (DUF1390)
Pfam-B_16182 (release 10.0). This family consists of several Paramecium bursaria chlorella virus 1 (PBCV-1) proteins of around 250 residues in length. The function of this family is unknown.. +PF07151 Protein of unknown function (DUF1391)
Pfam-B_16216 (release 10.0). This family consists of several Enterobacterial proteins of around 50 residues in length. Members of this family are found in Escherichia coli and Salmonella typhi where they are often known as YdfA. The function of this family is unknown.. +PF07152 YaeQ protein
Pfam-B_16245 (release 10.0). This family consists of several hypothetical bacterial proteins of around 180 residues in length which are often known as YaeQ. YaeQ is homologous to RfaH, a specialised transcription elongation protein. YaeQ is known to compensate for loss of RfaH function .. +PF07153 Marek's disease-like virus SORF3 protein
Pfam-B_16263 (release 10.0). This family consists of several SORF3 proteins from the Marek's disease-like viruses. Members of this family are around 350 residues in length. The function of this family is unknown.. +PF07154 Protein of unknown function (DUF1392)
Pfam-B_16270 (release 10.0). This family consists of several hypothetical cyanobacterial proteins of around 150 residues in length which seem to be specific to Anabaena species. The function of this family is unknown.. +PF07155 DUF1393;
ECF-type riboflavin transporter, S component. Moxon SJ, Eberhardt R. Pfam-B_16301 (release 10.0). This family is the substrate-binding component (S component) of the energy coupling-factor (ECF)-type riboflavin transporter. It is a transmembrane protein which binds riboflavin, and is responsible for riboflavin-uptake by cells [1,2].. +PF07156 Prenylcysteine lyase
Pfam-B_12448 (release 10.0). This family contains prenylcysteine lyases (EC:1.8.3.5) that are approximately 500 residues long. Prenylcysteine lyase is a FAD-dependent thioether oxidase that degrades a variety of prenylcysteines, producing free cysteine, an isoprenoid aldehyde and hydrogen peroxide as products of the reaction . It has been noted that this enzyme has considerable homology with ClP55, a 55 kDa protein that is associated with chloride ion pumps .. +PF07157 DNA circularisation protein N-terminus
Pfam-B_12343 (release 10.0). This family represents the N-terminus (approximately 100 residues) of a number of phage DNA circularisation proteins.. +PF07158 Dicarboxylate carrier protein MatC N-terminus
Pfam-B_16346 (release 10.0). This family represents the N-terminal region of the bacterial dicarboxylate carrier protein MatC. The MatC protein is an integral membrane protein that could function as a malonate carrier .. +PF07159 Protein of unknown function (DUF1394)
Pfam-B_16260 (release 10.0). This family consists of several hypothetical eukaryotic proteins of around 320 residues in length. The function of this family is unknown.. +PF07160 Protein of unknown function (DUF1395)
Pfam-B_16376 (release 10.0). This family consists of several hypothetical eukaryotic proteins of around 250 residues in length. The function of this family is unknown.. +PF07161 Protein of unknown function (DUF1396)
Pfam-B_16343 (release 10.0). This family consists of several putative lipoproteins from Mycobacterium species. The function of this family is unknown.. +PF07162 B9;
Ciliary basal body-associated, B9 protein. Vella Briffa B, Coggill P. Pfam-B_12595 (release 10.0). The B9-C2 domain is found in proteins associated with the ciliary basal body. B9 domains were identified as a specific family of C2 domains . There are three sub-families represented by this family, notably, Mks1-Xbx7, Stumpy-Tza1 and Tza2 groups of proteins. Mutations in human Mks1 result in the developmental disorder Mechler-Gruber syndrome ; mutations in mouse Stumpy lead to perinatal hydrocephalus and severe polycystic kidney disease . All the three distinct types of B9-C2 proteins cooperatively localise to the basal body or centrosome of cilia. . +PF07163 Pex26 protein
Pfam-B_16379 (release 10.0). This family consists of Pex26 and related mammalian proteins. Pex26 is a type II peroxisomal membrane protein which recruits Pex6-Pex1 complexes to peroxisomes . Mutations in Pex26 can lead to human disorders .. +PF07165 Protein of unknown function (DUF1397)
Pfam-B_16395 (release 10.0). This family consists of several insect specific proteins. Swiss:Q25513 is annotated as being a haemolymph glycoprotein precursor. The function of this family is unknown .. +PF07166 Protein of unknown function (DUF1398)
Pfam-B_16404 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 130 residues in length. Members of this family seem to be found exclusively in Escherichia coli and Salmonella species. The function of this family is unknown.. +PF07167 Poly-beta-hydroxybutyrate polymerase (PhaC) N-terminus
Pfam-B_16456 (release 10.0). This family represents the N-terminal region of the bacterial poly-beta-hydroxybutyrate polymerase (PhaC). Polyhydroxyalkanoic acids (PHAs) are carbon and energy reserve polymers produced in some bacteria when carbon sources are plentiful and another nutrient, such as nitrogen, phosphate, oxygen, or sulfur, becomes limiting. PHAs composed of monomeric units ranging from 3 to 14 carbons exist in nature. When the carbon source is exhausted, PHA is utilised by the bacterium. PhaC links D-(-)-3-hydroxybutyrl-CoA to an existing PHA molecule by the formation of an ester bond . This family appears to be a partial segment of an alpha/beta hydrolase domain.. +PF07168 FAE_3-kCoA_syn1; Ureide_perm;
Pfam-B_11634 (release 10.0). Heterocyclic nitrogen compounds may serve as nitrogen sources or nitrogen transport compounds in plants that are not able to fix nitrogen. This family represents ureide permease, a transporter of a wide spectrum of oxo derivatives of heterocyclic nitrogen compounds, including allantoin, uric acid and xanthine; it has 10 putative transmembrane domains with a large cytosolic central domain containing a 'Walker A' motif. Ureide permease is likely to transport other purine degradation products when nitrogen sources are low. Transport is dependent on glucose and a proton gradient . The family is found in bacteria, plants and yeast.. +PF07171 MlrC C-terminus
Pfam-B_6316 (release 10.0). This family represents the C-terminus (approximately 200 residues) of the product of a bacterial gene cluster that is involved in the degradation of the cyanobacterial toxin microcystin LR. Many members of this family are hypothetical proteins.. +PF07172 Glycine rich protein family
Pfam-B_15819 (release 10.0). This family of proteins includes several glycine rich proteins as well as two nodulins 16 and 24. The family also contains proteins that are induced in response to various stresses.. +PF07173 Protein of unknown function (DUF1399)
Pfam-B_13062 (release 10.0). This family represents a conserved region approximately 150 residues long within a number of hypothetical plant proteins of unknown function.. +PF07174 Fibronectin-attachment protein (FAP)
Pfam-B_16585 (release 10.0). This family contains bacterial fibronectin-attachment proteins (FAP). Family members are rich in alanine and proline, are approximately 300 long, and seem to be restricted to mycobacteria. These proteins contain a fibronectin-binding motif that allows mycobacteria to bind to fibronectin in the extracellular matrix .. +PF07175 Osteoregulin
Pfam-B_16589 (release 10.0). This family represents a conserved region approximately 180 residues long within osteoregulin, a bone-remodelling protein expressed highly in osteocytes within trabecular and cortical bone. A conserved RGD motif is found towards the C-terminal end of this region, and this is potentially involved in integrin recognition .. +PF07176 Alpha/beta hydrolase of unknown function (DUF1400)
Pfam-B_16606 (release 10.0). This family contains a number of hypothetical proteins of unknown function that seem to be specific to cyanobacteria. Members of this family have an alpha/beta hydrolase fold.. +PF07177 Neuralized
Pfam-B_16611 (release 10.0). This family contains a conserved region approximately 60 residues long within eukaryotic neuralized and neuralized-like proteins. Neuralized belongs to a group of ubiquitin ligases and is required in a subset of Notch pathway-mediated cell fate decisions during development of the Drosophila nervous system . Some family members contain multiple copies of this region.. +PF07178 TraL protein
Pfam-B_16378 (release 10.0). This family consists of several bacterial TraL proteins. TraL is a predicted peripheral membrane protein which is thought to be involved in bacterial sex pilus assembly . The exact function of this family is unclear.. +PF07179 SseB protein N-terminal domain
Pfam-B_16678 (release 10.0). This family consists of several SseB proteins which appear to be found exclusively in Enterobacteria. SseB is known to enhance serine-sensitivity in Escherichia coli and is part of the Salmonella pathogenicity island 2 (SPI-2) translocon . This entry contains the presumed N-terminal domain of SseB.. +PF07180 Protein of unknown function (DUF1401)
Pfam-B_16789 (release 10.0). This family consists of several hypothetical bacterial proteins of around 135 residues in length. Members of this family appear to be found exclusively in the Enterobacteria Escherichia coli, Citrobacter rodentium and Salmonella typhi. The function of this family is unknown.. +PF07181 VirC2 protein
Pfam-B_16860 (release 10.0). This family consists of several VirC2 proteins which seem to be found exclusively in Agrobacterium species and Rhizobium etli. VirC2 is known to be involved in virulence in Agrobacterium species but its exact function is unclear [1,2].. +PF07182 Protein of unknown function (DUF1402)
Pfam-B_16561 (release 10.0). This family consists of several hypothetical bacterial proteins of around 310 residues in length. Members of this family seem to be found exclusively in Agrobacterium, Rhizobium and Brucella species. The function of this family is unknown.. +PF07183 Protein of unknown function (DUF1403)
Pfam-B_16581 (release 10.0). This family consists of several hypothetical bacterial proteins of around 320 residues in length. Members of this family are mainly found in Rhizobium and Agrobacterium species. The function of this family is unknown.. +PF07184 Citrus tristeza virus P33 protein
Pfam-B_16614 (release 10.0). This family consists of several Citrus tristeza virus (CTV) P33 proteins. The function of P33 is unclear although it is known that the protein is not needed for virion formation .. +PF07185 Protein of unknown function (DUF1404)
Pfam-B_16616 (release 10.0). This family consists of several archaeal proteins of around 180 residues in length. Members of this family seem to be found exclusively in Sulfolobus tokodaii and Sulfolobus solfataricus. The function of this family is unknown.. +PF07187 Protein of unknown function (DUF1405)
Pfam-B_16845 (release 10.0). This family consists of several bacterial and related archaeal protein of around 180 residues in length. The function of this family is unknown.. +PF07188 Kaposi's sarcoma-associated herpesvirus (KSHV) K8 protein
Pfam-B_16868 (release 10.0). This family consists of Kaposi's sarcoma-associated herpesvirus (KSHV) K8 proteins. KSHV is a human Gammaherpesvirus related to Epstein-Barr virus (EBV) and herpesvirus saimiri. KSHV open reading frame K8 encodes a basic region-leucine zipper protein of 237 aa that homodimerises. K8 interacts and co-localises with human Pfam:PF04855, a cellular chromatin-remodelling factor, both in vivo and in vitro. K8 is thought to function as a transcriptional activator under specific conditions and its transactivation activity requires its interaction with the cellular chromatin remodelling factor hSNF5 . . +PF07189 Splicing factor 3B subunit 10 (SF3b10)
Pfam-B_16870 (release 10.0). This family consists of several eukaryotic splicing factor 3B subunit 10 (SF3b10) proteins. SF3b10 is a 10 kDa subunit of the splicing factor SF3b. SF3b associates with the splicing factor SF3a and a 12S RNA unit to form the U2 small nuclear ribonucleoproteins complex. SF3b10 and SF3b14b are also thought to facilitate the interaction of U2 with the branch site .. +PF07190 Protein of unknown function (DUF1406)
Pfam-B_16883 (release 10.0). This family consists of several Orthopoxvirus proteins of around 185 resides in length. Members of this family seem to be exclusive to Vaccinia, Camelpox and Cowpox viruses. Some family members are annotated as being C8 proteins but their function is unknown.. +PF07191 DUF1407;
Pfam-B_16889 (release 10.0). This family consists of several short, hypothetical bacterial proteins of around 70 residues in length. Members of this family have 8 highly conserved cysteine residues, which form two zinc ribbon domains.. +PF07192 SNURF/RPN4 protein
Pfam-B_16890 (release 10.0). This family consists of several mammalian SNRPN upstream reading frame (SNURF) proteins. SNURF or RPF4 is a RING-finger protein and a coregulator of androgen receptor-dependent transcription. It has been suggested that SNURF is involved in the regulation of processes required for late steps of spermatid maturation [1,2].. +PF07193 Protein of unknown function (DUF1408)
Pfam-B_16879 (release 10.0). This family consists of several hypothetical Lactococcus lactis and related phage proteins of around 75 residues in length. The function of this family is unknown.. +PF07194 P2 response regulator binding domain
Pfam-B_7970 (release 10.0). The response regulators for CheA bind to the P2 domain, which is found between Pfam:PF01627 and Pfam:PF02895 as either one or two copies. Highly flexible linkers connect P2 to the rest of CheA and impart remarkable mobility to the P2 domain. This feature is thought to enhance the inter CheA dimer phosphotransfer reactions within the signalling complex, thereby amplifying the phosphorylation signal .. +PF07195 Flagellar hook-associated protein 2 C-terminus
The flagellar hook-associated protein 2 (HAP2 or FliD) forms the distal end of the flagella, and plays a role in mucin specific adhesion of the bacteria . This alignment covers the C-terminal region of this family of proteins.. +PF07196 Flagellin hook IN motif
The function of this region is not clear, but it is found in many flagellar hook proteins, including FliD homologues ( ). It is normally repeated, but is also apparently seen as a singleton. A conserved IN is seen at the centre of the motif. The diversity of these motifs makes it likely that some members of the family are not identified.. +PF07197 Protein of unknown function (DUF1409)
Pfam-B_16557 (release 10.0). This family represents a short conserved region (approximately 50 residues long), sometimes repeated, within a number of hypothetical Oryza sativa proteins of unknown function.. +PF07198 Protein of unknown function (DUF1410)
Pfam-B_13132 (release 10.0). This family represents a conserved domain approximately 100 residues long, multiple copies of which are found within hypothetical Ureaplasma parvum proteins of unknown function, as well as related species.. +PF07199 Protein of unknown function (DUF1411)
Pfam-B_16764 (release 10.0). This family represents a conserved region approximately 150 residues long that is sometimes repeated within some Babesia bovis proteins of unknown function.. +PF07200 Modifier of rudimentary (Mod(r)) protein
Vella Briffa B, Wood V, Mistry J. Pfam-B_16631 (release 10.0). This family represents a conserved region approximately 150 residues long within a number of eukaryotic proteins that show homology with Drosophila melanogaster Modifier of rudimentary (Mod(r)) proteins. The N-terminal half of Mod(r) proteins is acidic, whereas the C-terminal half is basic , and both of these regions are represented in this family. Members of this family include the Vps37 subunit of the endosomal sorting complex ESCRT-I, a complex involved in recruiting transport machinery for protein sorting at the multivesicular body (MVB). The yeast ESCRT-I complex consists of three proteins (Vps23, Vps28 and Vps37). The mammalian homologue of Vps37 interacts with Tsg101 (Pfam: PF05743) through its mod(r) domain and its function is essential for lysosomal sorting of EGF receptors .. +PF07201 HrpJ-like domain
Pfam-B_16649 & Pfam-B_11026(release 10.0) & Pfam-B_1285(release 5.4). This family represents a conserved region approximately 200 residues long within a number of bacterial hypersensitivity response secretion protein HrpJ and similar proteins. HrpJ forms part of a type III secretion system through which, in phytopathogenic bacterial species, virulence factors are thought to be delivered to plant cells . This family also includes the InvE invasion protein from Salmonella. This protein is involved in host parasite interactions and mutations in the InvE gene render Salmonella typhimurium non-invasive . InvE S. typhimurium mutants fail to elicit a rapid Ca2+ increase in cultured cells, an important event in the infection procedure and internalisation of S. typhimurium into epithelial cells . This family includes bacterial SepL and SsaL proteins. SepL plays an essential role in the infection process of enterohemorrhagic Escherichia coli and is thought to be responsible for the secretion of EspA, EspD, and EspB . SsaL of Salmonella typhimurium is thought to be a component of the type III secretion system .. +PF07202 T-complex protein 10 C-terminus
Pfam-B_13039 (release 10.0). This family represents the C-terminus (approximately 180 residues) of eukaryotic T-complex protein 10. The T-complex is involved in spermatogenesis in mice .. +PF07203 Protein of unknown function (DUF1412)
Pfam-B_16907 (release 10.0). This family consists of several Caenorhabditis elegans proteins of around 70-75 residues in length. The function of this family is unknown.. +PF07204 Orthoreovirus membrane fusion protein p10
Pfam-B_16940 (release 10.0). This family consists of several Orthoreovirus membrane fusion protein p10 sequences. p10 is thought to be a multifunctional protein that plays a key role in virus-host interaction . . +PF07205 Domain of unknown function (DUF1413)
Pfam-B_16942 (release 10.0). This family consists of several hypothetical bacterial proteins which seem to be specific to firmicute species. Members of this family are typically around 100 residues in length. The function of this family is unknown.. +PF07206 Baculovirus late expression factor 10 (LEF-10)
Pfam-B_16893 (release 10.0). This family consists of several Baculovirus specific late expression factor 10 (LEF-10) sequences. LEF-10 is thought to be a late expressed structural protein although its exact function is unknown .. +PF07207 Light regulated protein Lir1
Pfam-B_16937 (release 10.0). This family consists of several plant specific light regulated Lir1 proteins.\. Lir1 mRNA accumulates in the light, reaching maximum and minimum steady-state levels at the end of the light and dark period, respectively. Plants germinated in the dark have very low levels of lir1 mRNA, whereas plants germinated in continuous light express lir1 at an intermediate but constant level. It is thought that lir1 expression is controlled by light and a circadian clock. The exact function of this family is unclear .. +PF07208 Protein of unknown function (DUF1414)
Pfam-B_16906 (release 10.0). This family consists of several hypothetical bacterial proteins of around 70 residues in length. Members of this family are often referred to as YejL. The function of this family is unknown.. +PF07209 Protein of unknown function (DUF1415)
Pfam-B_16932 (release 10.0). This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown.. +PF07210 Protein of unknown function (DUF1416)
Pfam-B_16939 (release 10.0). This family consists of several hypothetical bacterial proteins of around 100 residues in length. Members of this family appear to be Actinomycete specific. The function of this family is unknown.. +PF07212 Hyaluronidase;
Hyaluronidase protein (HylP). Pfam-B_16578 (release 10.0). This family consists of several phage associated hyaluronidase proteins (EC:3.2.1.35) which seem to be specific to Streptococcus pyogenes and Streptococcus pyogenes bacteriophages. The substrate of hyaluronidase is hyaluronic acid, a sugar polymer composed of alternating N-acetylglucosamine and glucuronic acid residues. Hyaluronic acid is found in the ground substance of human connective tissue and the vitreous of the eye and also is the sole component of the capsule of group A streptococci. The capsule has been shown to be an important virulence factor of this organism by virtue of its ability to resist phagocytosis. Production by S. pyogenes of both a hyaluronic acid capsule and hyaluronidase enzymatic activity capable of destroying the capsule is an interesting, yet-unexplained, phenomenon .. +PF07213 DAP10 membrane protein
Pfam-B_16910 (release 10.0). This family consists of several mammalian DAP10 membrane proteins. In activated mouse natural killer (NK) cells, the NKG2D receptor associates with two intracellular adaptors, DAP10 and DAP12, which trigger phosphatidyl inositol 3 kinase (PI3K) and Syk family protein tyrosine kinases, respectively. It has been suggested that the DAP10-PI3K pathway is sufficient to initiate NKG2D-mediated killing of target cells .. +PF07214 Protein of unknown function (DUF1418)
Pfam-B_16971 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 100 residues in length. Members of this family are often described as YbjC. In E. coli the ybjC gene is located downstream of nfsA (which encodes the major oxygen-insensitive nitroreductase). It is thought that nfsA and ybjC form an operon an its promoter is a class I SoxS-dependent promoter . The function of this family is unknown.. +PF07215 Protein of unknown function (DUF1419)
Pfam-B_16972 (release 10.0). This family consists of several bacterial proteins of around 110 residues in length. Members of this family seem to be specific to Agrobacterium species and to Rhizobium loti. The function of this family is unknown.. +PF07216 LcrG protein
Pfam-B_16974 (release 10.0). This family consists of several bacterial LcrG proteins. Yersiniae are equipped with the Yop virulon, an apparatus that allows extracellular bacteria to deliver toxic Yop proteins inside the host cell cytosol in order to sabotage the communication networks of the host cell or even to cause cell death. LcrG is a component of the Yop virulon involved in the regulation of secretion of the Yops . . +PF07217 Heterokaryon incompatibility protein Het-C
Pfam-B_16951 (release 10.0). In filamentous fungi, het loci (for heterokaryon incompatibility) are believed to regulate self/nonself-recognition during vegetative growth. As filamentous fungi grow, hyphal fusion occurs within an individual colony to form a network. Hyphal fusion can occur also between different individuals to form a heterokaryon, in which genetically distinct nuclei occupy a common cytoplasm. However, heterokaryotic cells are viable only if the individuals involved have identical alleles at all het loci . . +PF07218 Rhoptry-associated protein 1 (RAP-1)
Pfam-B_16981 (release 10.0). This family consists of several rhoptry-associated protein 1 (RAP-1) sequences which appear to be specific to Plasmodium falciparum .. +PF07219 HemY protein N-terminus
Pfam-B_16745 (release 10.0). This family represents the N-terminus (approximately 150 residues) of bacterial HemY porphyrin biosynthesis proteins. This is a membrane protein involved in a late step of protoheme IX synthesis .. +PF07220 Protein of unknown function (DUF1420)
Pfam-B_17056 (release 10.0). This family consists of several hypothetical putative lipoproteins which seem to be found specifically in the bacterium Leptospira interrogans. Members of this family are typically around 670 resides in length and their function is unknown.. +PF07221 N-acylglucosamine 2-epimerase (GlcNAc 2-epimerase)
Pfam-B_17012 (release 10.0). This family contains a number of eukaryotic and bacterial N-acylglucosamine 2-epimerase (GlcNAc 2-epimerase) enzymes (EC:5.3.1.8) approximately 500 residues long. This converts N-acyl-D-glucosamine to N-acyl-D-mannosamine.. +PF07222 Proacrosin binding protein sp32
Pfam-B_17278 (release 10.0). This family consists of several mammalian specific proacrosin binding protein sp32 sequences. sp32 is a sperm specific protein which is known to bind with with 55- and 53-kDa proacrosins and the 49-kDa acrosin intermediate. The exact function of sp32 is unclear, it is thought however that the binding of sp32 to proacrosin may be involved in packaging the acrosin zymogen into the acrosomal matrix . . +PF07223 Protein of unknown function (DUF1421)
Pfam-B_17006 (release 10.0). This family represents a conserved region approximately 350 residues long within a number of plant proteins of unknown function.. +PF07224 Chlorophyllase
Pfam-B_17130 (release 10.0). This family consists of several plant specific Chlorophyllase proteins (EC:3.1.1.14). Chlorophyllase (Chlase) is the first enzyme involved in chlorophyll (Chl) degradation and catalyses the hydrolysis of ester bond to yield chlorophyllide and phytol .. +PF07225 NDUFB4;
NADH-ubiquinone oxidoreductase B15 subunit (NDUFB4). Pfam-B_17132 (release 10.0). This family consists of several NADH-ubiquinone oxidoreductase B15 subunit proteins (EC:1.6.5.3). . +PF07226 Protein of unknown function (DUF1422)
Pfam-B_17087 (release 10.0). This family consists of several hypothetical bacterial proteins of around 120 residues in length. The function of this family is unknown.. +PF07227 Protein of unknown function (DUF1423)
Pfam-B_17028 (release 10.0). This family represents a conserved region approximately 500 residues long within a number of Arabidopsis thaliana proteins of unknown function.. +PF07228 Stage II sporulation protein E (SpoIIE)
Pfam-B_17063 (release 10.0). This family contains a number of bacterial stage II sporulation E proteins (EC:3.1.3.16). These are required for formation of a normal polar septum during sporulation. The N-terminal region is hydrophobic and is expected to contain up to 12 membrane-spanning segments .. +PF07229 VirE2
Pfam-B_17380 (release 10.0). This family consists of several VirE2 proteins which seem to be specific to Agrobacterium tumefaciens and Rhizobium etli. VirE2 is known to interact, via its C terminus, with VirD4. Agrobacterium tumefaciens transfers oncogenic DNA and effector proteins to plant cells during the course of infection. Substrate translocation across the bacterial cell envelope is mediated by a type IV secretion (TFS) system composed of the VirB proteins, as well as VirD4, a member of a large family of inner membrane proteins implicated in the coupling of DNA transfer intermediates to the secretion machine. VirE2 is therefore thought to be a protein substrate of a type IV secretion system which is recruited to a member of the coupling protein superfamily . . +PF07230 Phage_T4_Gp20;
Bacteriophage T4-like capsid assembly protein (Gp20). Pfam-B_17388 (release 10.0). This family consists of several bacteriophage T4-like capsid assembly (or portal) proteins. The exact mechanism by which the double-stranded (ds) DNA bacteriophages incorporate the portal protein at a unique vertex of the icosahedral capsid is unknown. In phage T4, there is evidence that this vertex, constituted by 12 subunits of gp20, acts as an initiator for the assembly of the major capsid protein and the scaffolding proteins into a prolate icosahedron of precise dimensions. The regulation of portal protein gene expression is an important regulator of prohead assembly in bacteriophage T4 . This family represents the protease responsible for the proteolysis of head proteins, a critical step in the morphogenesis of many tailed phages, Cleavage facilitates the conversion of the prohead to the mature capsid. All these cleavages are carried out by action at consensus S/A/G-X-E recognition sequences at 39 cleavage sites. Evidence of multiple processing sites in nine phiKZ proteins appears to represent a built-in mechanism by which the phage ensures that the majority of the propeptide regions are removed, and emphasizes the essential nature of processing in phiKZ-head morphogenesis . The family is classified by MEROPS as a serine peptidase.. +PF07231 Nematode_res_N;
Pfam-B_17124 (release 10.0). This family represents the N-terminus (approximately 180 residues) of plant Hs1pro-1, which is believed to confer resistance to nematodes .. +PF07232 Putative rep protein (DUF1424)
Pfam-B_17284 (release 10.0). This family consists of several archaeal proteins of around 320 residues in length. Members of this family seem to be found exclusively in Halobacterium and Haloferax species. The function of this family is unknown. This protein is probably a rep protein due to conservation of functional motifs.. +PF07233 Protein of unknown function (DUF1425)
Pfam-B_17314 (release 10.0). This family consists of several hypothetical bacterial proteins of around 125 residues in length. Several members of this family are described as putative lipoproteins and are often known as YcfL. The function of this family is unknown.. +PF07234 Protein of unknown function (DUF1426)
Pfam-B_17431 (release 10.0). This family consists of several Banana bunchy top virus proteins of around 120 residues in length. Swiss:Q9IGU4 is annotated a movement protein whereas most other family members are hypothetical. The function of this family is unknown.. +PF07235 Protein of unknown function (DUF1427)
Pfam-B_17474 (release 10.0). This family consists of several bacterial proteins of around 100 residues in length. The function of this family is unknown.. +PF07236 Phytoreovirus S7 protein
Pfam-B_17475 (release 10.0). This family consists of several Phytoreovirus S7 proteins which are thought to be viral core proteins .. +PF07237 Protein of unknown function (DUF1428)
Pfam-B_17402 (release 10.0). This family consists of several hypothetical bacterial and one archaeal sequence of around 120 residues in length. The function of this family is unknown.. +PF07238 PilZ domain
Pfam-B_17421 (release 10.0). PilZ is a c-di-GMP binding domain which is found C terminal to Pfam:PF07317. Proteins which contain PilZ are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias . This domain forms a beta barrel structure.. +PF07239 Outer membrane protein OpcA
Pfam-B_17433 (release 10.0). This family consists of several Neisseria species specific OpcA outer membrane proteins. Opc (formerly called 5C) is one of the major outer membrane proteins and has been shown to play an important role in meningococcal adhesion and invasion of both epithelial and endothelial cells .. +PF07240 Stress-inducible humoral factor Turandot
Pfam-B_17438 (release 10.0). This family consists of several Drosophila species specific Turandot proteins. The Turandot A (TotA) gene encodes a humoral factor, which is secreted from the fat body and accumulates in the body fluids. TotA is strongly induced upon bacterial challenge, as well as by other types of stress such as high temperature, mechanical pressure, dehydration, UV irradiation, and oxidative agents. It is also up-regulated during metamorphosis and at high age. Flies that over-express TotA show prolonged survival and retain normal activity at otherwise lethal temperatures. Although TotA is only induced by severe stress, it responds to a much wider range of stimuli than heat shock genes such as hsp70 or immune genes such as Cecropin A1 .. +PF07242 Protein of unknown function (DUF1430)
Pfam-B_17033 (release 10.0). This family represents the C-terminus (approximately 120 residues) of a number of hypothetical bacterial proteins of unknown function. These are possibly membrane proteins involved in immunity.. +PF07243 Phlebovirus glycoprotein G1
Pfam-B_17508 (release 10.0). This family consists of several Phlebovirus glycoprotein G1 sequences. Members of the Bunyaviridae family acquire an envelope by budding through the lipid bilayer of the Golgi complex. The budding compartment is thought to be determined by the accumulation of the two heterodimeric membrane glycoproteins G1 and G2 in the Golgi .. +PF07244 Surface antigen variable number repeat
This family is found primarily in bacterial surface antigens, normally as variable number repeats at the N-terminus. The C-terminus of these proteins is normally represented by Pfam:PF01103. The alignment centres on a -GY- or -GF- motif. Some members of this family are found in the mitochondria. It is predicted to have a mixed alpha/beta secondary structure.. +PF07245 Phlebovirus glycoprotein G2
Pfam-B_17508 (release 10.0). This family consists of several Phlebovirus glycoprotein G2 sequences. Members of the Bunyaviridae family acquire an envelope by budding through the lipid bilayer of the Golgi complex. The budding compartment is thought to be determined by the accumulation of the two heterodimeric membrane glycoproteins G1 and G2 in the Golgi .. +PF07246 Phlebovirus nonstructural protein NS-M
Pfam-B_17508 (release 10.0). This family consists of several Phlebovirus nonstructural NS-M proteins which represent the N-terminal region of the M polyprotein precursor. The function of this family is unknown.. +PF07247 Alcohol acetyltransferase
Pfam-B_17349 (release 10.0). This family contains a number of alcohol acetyltransferase (EC:2.3.1.84) enzymes approximately 500 residues long found in both bacteria and metazoa. These catalyse the esterification of isoamyl alcohol by acetyl coenzyme A .. +PF07248 Protein of unknown function (DUF1431)
Pfam-B_17470 (release 10.0). This family contains a number of Drosophila melanogaster proteins of unknown function. These contain several conserved cysteine residues.. +PF07249 Cerato-platanin
Pfam-B_17594 (release 10.0). This family contains a number of fungal cerato-platanin phytotoxic proteins approximately 150 residues long. Cerato-platanin contains four cysteine residues that form two disulphide bonds .. +PF07250 Glyoxal oxidase N-terminus
Pfam-B_17519 (release 10.0). This family represents the N-terminus (approximately 300 residues) of a number of plant and fungal glyoxal oxidase enzymes. Glyoxal oxidase catalyses the oxidation of aldehydes to carboxylic acids, coupled with reduction of dioxygen to hydrogen peroxide. It is an essential component of the extracellular lignin degradation pathways of the wood-rot fungus Phanerochaete chrysosporium .. +PF07252 Protein of unknown function (DUF1433)
Pfam-B_17690 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function approximately 100 residues in length.. +PF07253 Gypsy protein
Pfam-B_17444 (release 10.0). This family consists of several Gypsy/Env proteins from Drosophila and Ceratitis fruit fly species. Gypsy is an endogenous retrovirus of Drosophila melanogaster. Phylogenetic studies suggest that occasional horizontal transfer events of gypsy occur between Drosophila species. Gypsy possesses infective properties associated with the products of the envelope gene that might be at the origin of these interspecies transfers . This family contains many members with full-length matches; however, it also includes a number of very short sequences and short matches of sequences with other unrelated domains on them, which cannot be excluded. These matches may represent remnants of once-functional genes.. +PF07254 Protein of unknown function (DUF1434)
Pfam-B_17586 (release 10.0). This family consists of several hypothetical bacterial proteins of around 135 residues in length. Members of this family all appear to be Enterobacterial proteins. The function of this family is unknown.. +PF07255 Benyvirus 14KDa protein
Pfam-B_17595 (release 10.0). This family consists of several Benyvirus specific 14KDa proteins of around 125 residues in length. Members of this family contain 9 conserved cysteine residues. The function of this family is unknown.. +PF07256 Protein of unknown function (DUF1435)
Pfam-B_17631 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 80 residues in length. The function of this family is unknown.. +PF07258 HCaRG protein
Pfam-B_17801 (release 10.0). This family consists of several mammalian HCaRG(hypertension-related, calcium-regulated gene) proteins. HCaRG is negatively regulated by extracellular calcium concentration, and its basal mRNA levels are higher in hypertensive animals. HCaRG is a nuclear protein potentially involved in the control of cell proliferation . . +PF07259 ProSAAS precursor
Pfam-B_17813 (release 10.0). This family consists of several mammalian proSAAS precursor proteins. ProSAAS mRNA is expressed primarily in brain and other neuroendocrine tissues (pituitary, adrenal, pancreas); within brain, the mRNA is broadly distributed among neurons. ProSAAS is thought to be an endogenous inhibitor of prohormone convertase 1 may function as a neuropeptide . N-terminal fragments of proSAAS in intracellular Pick Bodies (PBs) may cause a functional disturbance of neurons in Pick's disease .. +PF07260 Progressive ankylosis protein (ANKH)
Pfam-B_17517 (release 10.0). This family consists of several progressive ankylosis protein (ANK or ANKH) sequences. The ANK protein spans the outer cell membrane and shuttles inorganic pyrophosphate (PPi), a major inhibitor of physiologic and pathologic calcification, bone mineralisation and bone resorption . Mutations in ANK are thought to give rise to Craniometaphyseal dysplasia (CMD) which is a rare skeletal disorder characterised by progressive thickening and increased mineral density of craniofacial bones and abnormally developed metaphyses in long bones .. +PF07261 Replication initiation and membrane attachment
Pfam-B_17543 (release 10.0). This family consists of several bacterial replication initiation and membrane attachment (DnaB) proteins, as well as DnaD which is a component of the PriA primosome. The PriA primosome functions to recruit the replication fork helicase onto the DNA . The DnaB protein is essential for both replication initiation and membrane attachment of the origin region of the chromosome and plasmid pUB110 in Bacillus subtilis. It is known that there are two different classes (DnaBI and DnaBII) in the DnaB mutants; DnaBI is essential for both chromosome and pUB110 replication, whereas DnaBII is necessary only for chromosome replication . DnaD has been merged into this family. This family also includes Ftn6, a cyanobacterial-specific divisome component possibly playing a role at the interface between DNA replication and cell division . Ftn6 possesses a conserved domain localised within the N-terminus of the proteins. This domain, named FND, exhibits sequence and structure similarities with the DnaD-like domains Pfam:PF04271 now merged into Pfam:PF07261.. +PF07262 Protein of unknown function (DUF1436)
Pfam-B_17809 (release 10.0). This family consists of several hypothetical bacterial proteins of around 160 residues in length. The function of this family is unknown.. +PF07263 Dentin matrix protein 1 (DMP1)
Pfam-B_17812 (release 10.0). This family consists of several mammalian dentin matrix protein 1 (DMP1) sequences. The dentin matrix acidic phosphoprotein 1 (DMP1) gene has been mapped to human chromosome 4q21 . DMP1 is a bone and teeth specific protein initially identified from mineralised dentin. DMP1 is primarily localised in the nuclear compartment of undifferentiated osteoblasts. In the nucleus, DMP1 acts as a transcriptional component for activation of osteoblast-specific genes like osteocalcin. During the early phase of osteoblast maturation, Ca(2+) surges into the nucleus from the cytoplasm, triggering the phosphorylation of DMP1 by a nuclear isoform of casein kinase II. This phosphorylated DMP1 is then exported out into the extracellular matrix, where it regulates nucleation of hydroxyapatite. DMP1 is a unique molecule that initiates osteoblast differentiation by transcription in the nucleus and orchestrates mineralised matrix formation extracellularly, at later stages of osteoblast maturation . The DMP1 gene has been found to be ectopically expressed in lung cancer although the reason for this is unknown .. +PF07264 Etoposide-induced protein 2.4 (EI24)
Pfam-B_17540 (release 10.0). This family contains a number of eukaryotic etoposide-induced 2.4 (EI24) proteins approximately 350 residues long as well as bacterial CysZ proteins (formerly known as DUF540). In cells treated with the cytotoxic drug etoposide, EI24 is induced by p53 . It has been suggested to play an important role in negative cell growth control .. +PF07265 Tapetum specific protein TAP35/TAP44
Pfam-B_17587 (release 10.0). This family consists of several plant tapetum specific proteins. Members of this family are found in Arabidopsis thaliana, Brassica napus and Sinapis alba. Members of this family may be involved in sporopollenin formation and/or deposition .. +PF07267 Nucleopolyhedrovirus capsid protein P87
Pfam-B_17842 (release 10.0). This family consists of several Nucleopolyhedrovirus capsid protein P87 sequences. P87 is expressed late in infection and concentrated in infected cell nuclei .. +PF07268 Exported protein precursor (EppA/BapA)
Pfam-B_17945 (release 10.0). This family consists of a number of exported protein precursor (EppA and BapA) sequences which seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). bapA gene sequences are quite stable but the encoded proteins do not provoke a strong immune response in most individuals. Conversely, EppA proteins are much more antigenic but are more variable in sequence. It is thought that BapA and EppA play important roles during the Borrelia burgdorferi infectious cycle .. +PF07270 Protein of unknown function (DUF1438)
Pfam-B_18024 (release 10.0). This family consists of several hypothetical proteins of around 170 residues in length which appear to be mouse specific. The function of this family is unknown.. +PF07271 Cytadhesin P30/P32
Pfam-B_18052 (release 10.0). This family consists of several Mycoplasma species specific Cytadhesin P32 and P30 proteins. P30 has been found to be membrane associated and localised on the tip organelle. It is thought that it is important in cytadherence and virulence .. +PF07272 Orthoreovirus P17 protein
Pfam-B_18125 (release 10.0). This family consists of several Orthoreovirus P17 proteins. P17 is specified be ORF2 of the S1 gene and represents a nonstructural protein which associate with cell membranes .. +PF07273 Protein of unknown function (DUF1439)
Pfam-B_18280 (release 10.0). This family consists of several hypothetical bacterial proteins of around 190 residues in length. Several members of this family are annotated as being putative lipoproteins and are often known as YceB. The function of this family is unknown.. +PF07274 Protein of unknown function (DUF1440)
Pfam-B_17831 (release 10.0). This family contains a number of bacterial proteins of unknown function approximately 180 residues long. These are possibly integral membrane proteins.. +PF07275 Antirestriction protein (ArdA)
Pfam-B_17857 (release 10.0). This family consists of several bacterial antirestriction (ArdA) proteins.\. ArdA functions in bacterial conjugation to allow an unmodified plasmid to evade restriction in the recipient bacterium and yet acquire cognate modification .. +PF07276 Apopolysialoglycoprotein (PSGP)
Pfam-B_17916 (release 10.0). This family represents a series of 13 reside repeats found in the apopolysialoglycoprotein of Oncorhynchus mykiss (Rainbow trout) and Oncorhynchus masou (Cherry salmon). Polysialoglycoprotein (PSGP) of unfertilised eggs of rainbow trout consists of tandem repeats of a glycotridecapeptide, Asp-Asp-Ala-Thr*-Ser*-Glu-Ala-Ala-Thr*-Gly-Pro-Ser- Gly (* denotes the attachment site of a polysialoglycan chain). In response to egg activation, PSGP is discharged by exocytosis into the space between the vitelline envelope and the plasma membrane, i.e. the perivitelline space, where the 200-kDa PSGP molecules undergo rapid and dramatic depolymerisation by proteolysis into glycotridecapeptides .. +PF07277 SapC
Pfam-B_17881 (release 10.0). This family contains a number of bacterial SapC proteins approximately 250 residues long. In Campylobacter fetus, SapC forms part of a paracrystalline surface layer (S-layer) that confers serum resistance .. +PF07278 Protein of unknown function (DUF1441)
Pfam-B_17966 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 160 residues in length. The function of this family is unknown. However, it appears to be distantly related to other HTH families so may act as a transcriptional regulator.. +PF07279 Protein of unknown function (DUF1442)
Pfam-B_18012 (release 10.0). This family consists of several hypothetical Arabidopsis thaliana proteins of around 225 residues in length. The function of this family is unknown.. +PF07280 Protein of unknown function (DUF1443)
Pfam-B_18027 (release 10.0). This family consists of several Baculovirus proteins of around 55 residues in length. The function of this family is unknown.. +PF07281 Insulin-induced protein (INSIG)
Pfam-B_17905 (release 10.0). This family contains a number of eukaryotic Insulin-induced proteins (INSIG-1 and INSIG-2) approximately 200 residues long. INSIG-1 and INSIG-2 are found in the endoplasmic reticulum and bind the sterol-sensing domain of SREBP cleavage-activating protein (SCAP), preventing it from escorting SREBPs to the Golgi. Their combined action permits feedback regulation of cholesterol synthesis over a wide range of sterol concentrations [1,2].. +PF07282 Transposase_35;
Putative transposase DNA-binding domain. Pfam-B_4755 (release 10.0). This putative domain is found at the C-terminus of a large number of transposase proteins. This domain contains four conserved cysteines suggestive of a zinc binding domain. Given the need for transposases to bind DNA as well as the large number of DNA-binding zinc fingers we hypothesise this domain is DNA-binding.. +PF07283 Conjugal transfer protein TrbH
Pfam-B_17942 (release 10.0). This family contains TrbH, a bacterial conjugal transfer protein approximately 150 residues long. This contains a putative membrane lipoprotein lipid attachment site .. +PF07284 2-vinyl bacteriochlorophyllide hydratase (BCHF)
Pfam-B_17961 (release 10.0). This family contains the bacterial enzyme 2-vinyl bacteriochlorophyllide hydratase (EC:4.2.1.-) (approximately 150 residues long). This is involved in the light-independent bacteriochlorophyll biosynthesis pathway by adding water across the 2-vinyl group .. +PF07285 Protein of unknown function (DUF1444)
Pfam-B_18053 (release 10.0). This family contains several hypothetical bacterial proteins of unknown function that are approximately 250 residues long.. +PF07286 Protein of unknown function (DUF1445)
Pfam-B_18180 (release 10.0). This family represents a conserved region approximately 150 residues long within a number of hypothetical bacterial and eukaryotic proteins of unknown function.. +PF07287 Protein of unknown function (DUF1446)
Pfam-B_17949 (release 10.0). This family consists of several bacterial and plant proteins of around 400 residues in length. The function of this family is unknown.. +PF07288 Protein of unknown function (DUF1447)
Pfam-B_18163 (release 10.0). This family consists of several bacterial proteins of around 70 residues in length. The function of this family is unknown.. +PF07289 Protein of unknown function (DUF1448)
Pfam-B_18223 (release 10.0). This family consists of several eukaryotic proteins of around 375 residues in length. The function of this family is unknown. It appears that this family includes a divergent GRAM domain.. +PF07290 Protein of unknown function (DUF1449)
Pfam-B_18269 (release 10.0). This family consists of several bacterial proteins of around 210 residues in length. The function of this family is unknown.. +PF07291 Methylamine utilisation protein MauE
Pfam-B_18306 (release 10.0). This family consists of several bacterial methylamine utilisation MauE proteins. Synthesis of enzymes involved in methylamine oxidation via methylamine dehydrogenase (MADH) is encoded by genes present in the mau cluster. MauE and MauD are specifically involved in the processing, transport, and/or maturation of the beta-subunit and that the absence of each of these proteins leads to production of a non-functional beta-subunit which becomes rapidly degraded .. +PF07292 Nmi/IFP 35 domain (NID)
Pfam-B_17864 (release 10.0). This family represents a domain of approximately 90 residues that is tandemly repeated within interferon-induced 35 kDa protein (IFP 35) and the homologous N-myc-interactor (Nmi). This domain mediates Nmi-Nmi protein interactions and subcellular localisation .. +PF07293 Protein of unknown function (DUF1450)
Pfam-B_18439 (release 10.0). This family consists of several hypothetical bacterial proteins of around 80 residues in length. Members of this family contain four highly conserved cysteine residues. The function of this family is unknown.. +PF07294 Fibroin P25
Pfam-B_18451 (release 10.0). This family consists of several insect fibroin P25 proteins. Silk fibroin produced by the silkworm Bombyx mori consists of a heavy chain, a light chain, and a glycoprotein, P25. The heavy and light chains are linked by a disulfide bond, and P25 associates with disulfide-linked heavy and light chains by non-covalent interactions. P25 is plays an important role in maintaining integrity of the complex .. +PF07295 Protein of unknown function (DUF1451)
Pfam-B_18524 (release 10.0). This family consists of several hypothetical bacterial proteins of around 160 residues in length. Members of this family contain four highly conserved cysteine resides toward the C-terminal region of the protein. The function of this family is unknown.. +PF07296 TraP protein
Pfam-B_18635 (release 10.0). This family consists of several bacterial conjugative transfer TraP proteins from Escherichia coli and Salmonella typhimurium. TraP appears to play a minor role in conjugation and may interact with TraB, which varies in sequence along with TraP, in order to stabilise the proposed transmembrane complex formed by the tra operon products .. +PF07297 Dolichol phosphate-mannose biosynthesis regulatory protein (DPM2)
Pfam-B_18649 (release 10.0). This family consists of several eukaryotic dolichol phosphate-mannose biosynthesis regulatory (DPM2) proteins. Biosynthesis of glycosylphosphatidylinositol and N-glycan precursor is dependent upon a mannosyl donor, dolichol phosphate-mannose (DPM). DPM2, an 84 amino acid membrane protein expressed in the endoplasmic reticulum (ER), makes a complex with DPM1 that is essential for the ER localisation and stable expression of DPM1. Moreover, DPM2 enhances binding of dolichol phosphate, a substrate of DPM synthase. Biosynthesis of DPM in mammalian cells is regulated by DPM2 .. +PF07298 NnrU protein
Pfam-B_18406 (release 10.0). This family consists of several plant and bacterial NnrU proteins. NnrU is thought to be involved in the reduction of nitric oxide. The exact function of NnrU is unclear. It is thought however that NnrU and perhaps NnrT are required for expression of both nirK and nor .. +PF07299 Fibronectin-binding protein (FBP)
Pfam-B_18450 (release 10.0). This family consists of several bacterial fibronectin-binding proteins which are thought to be involved in virulence in Listeria species [1,2].. +PF07301 Protein of unknown function (DUF1453)
Pfam-B_18607 (release 10.0). This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. Members of this family seem to be found exclusively in the Order Bacillales.. +PF07302 AroM protein
Pfam-B_18608 (release 10.0). This family consists of several bacterial and archaeal AroM proteins. In Escherichia coli the aroM gene is cotranscribed with aroL . The function of this family is unknown.. +PF07303 Occludin homology domain
Pfam-B_18556 (release 10.0). This domain represents a conserved region approximately 100 residues long within eukaryotic occludin proteins and the RNA polymerase II elongation factor ELL. Occludin is an integral membrane protein that localises to tight junctions , while ELL is an elongation factor that can increase the catalytic rate of RNA polymerase II transcription by suppressing transient pausing by polymerase at multiple sites along the DNA . This shared domain is thought to mediate protein interactions .. +PF07304 Steroid receptor RNA activator (SRA1)
Pfam-B_18506 (release 10.0). This family consists of several hypothetical mammalian steroid receptor RNA activator proteins. SRA-RNAs likely to encode stable proteins are widely expressed in breast cancer cell lines. SRA-RNA is a steroid receptor co-activator which acts as a functional RNA and is classified as belonging to the growing family of functional non-coding RNAs. . +PF07305 Protein of unknown function (DUF1454)
Pfam-B_18833 (release 10.0). This family consists of several Enterobacterial sequences of around 200 residues in length which are often known as YiiQ proteins. The function of this family is unknown.. +PF07306 Protein of unknown function (DUF1455)
Pfam-B_19038 (release 10.0). This family consists of several hypothetical putative outer membrane proteins which appear to be specific to Anaplasma marginale and Anaplasma ovis.. +PF07307 Heptaprenyl diphosphate synthase (HEPPP synthase) subunit 1
Pfam-B_18592 (release 10.0). This family contains subunit 1 of bacterial heptaprenyl diphosphate synthase (HEPPP synthase) (EC:2.5.1.30) (approximately 230 residues long). The enzyme consists of two subunits, both of which are required for catalysis of heptaprenyl diphosphate synthesis .. +PF07308 Protein of unknown function (DUF1456)
This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown.. +PF07309 Flagellar protein FlaF
Pfam-B_19331 (release 10.0). This family consists of several bacterial FlaF flagellar proteins. FlaF and FlaG are trans-acting, regulatory factors that modulate flagellin synthesis during flagellum biogenesis .. +PF07310 DUF1457;
Pfam-B_18761 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function approximately 200 residues long. This region is is distantly similar to other PAS domains.. +PF07311 DUF1458;
Moxon SJ, Anantharaman V. Pfam-B_18876 (release 10.0). Dodecin is a flavin-binding protein ,found in several bacteria and few archaea and represents a stand-alone version of the SHS2 domain . It most closely resembles the SHS2 domains of FtsA and Rpb7p, and represents a single domain small-molecule binding form .. +PF07312 Protein of unknown function (DUF1459)
Pfam-B_18877 (release 10.0). This family consists of several hypothetical Caenorhabditis elegans proteins of around 85 residues in length. The function of this family is unknown.. +PF07313 Protein of unknown function (DUF1460)
Pfam-B_18925 (release 10.0). This family consists of several hypothetical bacterial proteins of around 260 residues in length. The function of this family is unknown.. +PF07314 Protein of unknown function (DUF1461)
Pfam-B_18854 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function approximately 200 residues long. These are possibly integral membrane proteins.. +PF07315 Protein of unknown function (DUF1462)
Pfam-B_19094 (release 10.0). This family consists of several hypothetical bacterial proteins of around 100 residues in length. The function of this family is unknown.. +PF07316 Protein of unknown function (DUF1463)
Pfam-B_19113 (release 10.0). This family consists of several hypothetical bacterial proteins of around 140 residues in length. Members of this family seem to be found exclusively in Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown.. +PF07317 Flagellar regulator YcgR
Pfam-B_19142 (release 10.0). This domain is found N terminal to Pfam:PF07238. Proteins which contain YcgR domains are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias .. +PF07318 Protein of unknown function (DUF1464)
Pfam-B_19143 (release 10.0). This family consists of several hypothetical archaeal proteins of around 350 residues in length. The function of this family is unknown.. +PF07319 Primosomal protein DnaI N-terminus
Pfam-B_18931 (release 10.0). This family represents the N-terminus (approximately 120 residues) of bacterial primosomal DnaI proteins, although one family member appears to be of viral origin. DnaI is one of the components of the Bacillus subtilis replication restart primosome, and is required for the DnaB75-dependent loading of the DnaC helicase .. +PF07321 Type III secretion protein YscO
Pfam-B_19036 (release 10.0). This family contains the bacterial type III secretion protein YscO, which is approximately 150 residues long. YscO has been shown to be required for high-level expression and secretion of the anti-host proteins V antigen and Yops in Yersinia pestis .. +PF07322 Seadornavirus Vp10
Pfam-B_18930 (release 10.0). This family consists of several Seadornavirus Vp10 proteins found in the Banna and Kadipiro viruses. Members of this family are typically around 240 residues in length. The function of this family is unknown.. +PF07323 Protein of unknown function (DUF1465)
Pfam-B_19346 (release 10.0). This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown.. +PF07324 DiGeorge syndrome critical region 6 (DGCR6) protein
Pfam-B_19101 (release 10.0). This family contains DiGeorge syndrome critical region 6 (DGCR6) proteins (approximately 200 residues long) of a number of vertebrates. DGCR6 is a candidate for involvement in the DiGeorge syndrome pathology by playing a role in neural crest cell migration into the third and fourth pharyngeal pouches, the structures from which derive the organs affected in DiGeorge syndrome . Also found in this family is the Drosophila melanogaster gonadal protein gdl.. +PF07325 Curtovirus V2 protein
Pfam-B_19350 (release 10.0). This family consists of several Curtovirus V2 proteins. The exact function of V2 is unclear but it is known that the protein is required for a successful host infection process .. +PF07326 Protein of unknown function (DUF1466)
Pfam-B_19433 (release 10.0). This family consists of several hypothetical mammalian proteins of around 240 residues in length.. +PF07327 Neuroparsin
Pfam-B_19487 (release 10.0). This family consists of several locust specific neuroparsin proteins. Neuroparsins are produced by the A1 type of protocerebral median neurosecretory cells of the PI-CC system and display pleiotropic activities: inhibition of the effect of juvenile hormone, stimulation of fluid reabsorption of isolated recta, induction of an increase in hemolymph lipid and trehalose levels, and neurotrophic effects .. +PF07328 T-DNA border endonuclease VirD1
Pfam-B_19558 (release 10.0). This family consists of several T-DNA border endonuclease VirD1 proteins which appear to be found exclusively in Agrobacterium species. Agrobacterium, a plant pathogen, is capable to stably transform the plant cell with a segment of its own DNA called T-DNA (transferred DNA). This process depends, among others, on the specialised bacterial virulence proteins VirD1 and VirD2 that excise the T-DNA from its adjacent sequences. VirD1 is thought to interact with VirD2 in this process .. +PF07330 Protein of unknown function (DUF1467)
Pfam-B_19588 (release 10.0). This family consists of several bacterial proteins of around 90 residues in length. The function of this family is unknown.. +PF07331 DUF1468;
Tripartite tricarboxylate transporter TctB family. Pfam-B_19347 (release 10.0). This family consists of several hypothetical bacterial proteins of around 150 residues in length. This family was formerly known as DUF1468.. +PF07332 Protein of unknown function (DUF1469)
Pfam-B_19352 (release 10.0) & COG5393. This family consists of several hypothetical bacterial proteins of around 140 residues in length. The function of this family is unknown.. +PF07333 S locus-related glycoprotein 1 binding pollen coat protein (SLR1-BP)
Pfam-B_19392 (release 10.0). This family consists of a number of cysteine rich SLR1 binding pollen coat like proteins. Adhesion of pollen grains to the stigmatic surface is a critical step during sexual reproduction in plants. In Brassica, S locus-related glycoprotein 1 (SLR1), a stigma-specific protein belonging to the S gene family of proteins, has been shown to be involved in this step. SLR1-BP specifically binds SLR1 with high affinity. The SLR1-BP gene is specifically expressed in pollen at late stages of development and is a member of the class A pollen coat protein (PCP) family, which includes PCP-A1, an SLG (S locus glycoprotein)-binding protein .. +PF07334 Interferon-induced 35 kDa protein (IFP 35) N-terminus
Pfam-B_17864 (release 10.0). This family represents the N-terminus of interferon-induced 35 kDa protein (IFP 35) (approximately 80 residues long), which contains a leucine zipper motif in an alpha helical configuration . This family also includes N-myc-interactor (Nmi), a homologous interferon-induced protein.. +PF07335 Chitosanase;
Fungal chitosanase of glycosyl hydrolase group 75. Pfam-B_19431 (release 10.0). This family consists of several fungal chitosanase proteins. Chitin, xylan, 6-O-sulphated chitosan and O-carboxymethyl chitin are indigestible by chitosanase . EC:3.2.1.132. The mechanism is likely to be inverting, and the probable catalytic neutrophile base is Asp, with the probable catalytic proton donor being Glu. (see the Chitosanase web-page from CAZY).. +PF07336 Protein of unknown function (DUF1470)
Pfam-B_19432 (release 10.0). This family consists of several hypothetical bacterial proteins of around 180 residues in length. Members of this family are found in Streptomyces, Rhizobium, Ralstonia, Agrobacterium and Bradyrhizobium species. The function of this family is unknown.. +PF07337 DC-EC Repeat
This repeat is found in the CagY proteins - part of the CAG pathogenicity island - and involved in delivery of the protein CagA into host cells ( ). It forms part of a surface needle structure, and this repeat may form an alpha-helical rod structure ( ). A conserved -DC- and -EC- can be seen in regularly spaced in the alignment.. +PF07338 Protein of unknown function (DUF1471)
Pfam-B_19452 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 90 residues in length. Some members of this family are annotated as ydgH precursors and contain two copies of this region, one at the N-terminus and the other at the C-terminus. The function of this family is unknown.. +PF07339 Protein of unknown function (DUF1472)
Pfam-B_19493 (release 10.0). This family consists of several Enterobacterial proteins of around 125 residues in length and contains 6 highly conserved cysteine residues. The function of this family is unknown.. +PF07340 Cytomegalovirus IE1 protein
Pfam-B_22587 (release 10.0). Expression from a human cytomegalovirus early promoter (E1.7) has been shown to be activated in trans by the IE2 gene product. Although the IE1 gene product alone had no effect on this early viral promoter, maximal early promoter activity was detected when both IE1 and IE2 gene products were present . The IE1 protein from cytomegalovirus is also known as UL123.. +PF07341 Protein of unknown function (DUF1473)
Pfam-B_19856 (release 10.0). This family consists of several hypothetical bacterial proteins of around 150 residues in length. Members of this family seem to be found exclusively in Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown.. +PF07342 Protein of unknown function (DUF1474)
Pfam-B_19882 (release 10.0). This family consists of several bacterial proteins of around 100 residues in length. Members of this family seem to be found exclusively in Staphylococcus aureus. The function of this family is unknown.. +PF07343 Protein of unknown function (DUF1475)
Pfam-B_19887 (release 10.0). This family consists of several hypothetical plant proteins of around 250 residues in length. Members of this family seem to be found exclusively in Arabidopsis thaliana. The function of this family is unknown.. +PF07344 Amastin surface glycoprotein
Pfam-B_19245 (release 10.0). This family contains the eukaryotic surface glycoprotein amastin (approximately 180 residues long).In Trypanosoma cruzi, amastin is particularly abundant during the amastigote stage.. +PF07345 Domain of unknown function (DUF1476)
Pfam-B_19680 (release 10.0). This family consists of several hypothetical bacterial proteins of around 100 residues in length. Members of this family are found in Bradyrhizobium, Rhizobium, Brucella and Caulobacter species. The function of this family is unknown.. +PF07346 Protein of unknown function (DUF1477)
Pfam-B_19762 (release 10.0). This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 100 resides in length. The function of this family is unknown.. +PF07347 NADH:ubiquinone oxidoreductase subunit B14.5a (Complex I-B14.5a)
Pfam-B_19436 (release 10.0). This family contains the eukaryotic NADH:ubiquinone oxidoreductase subunit B14.5a (Complex I-B14.5a) (EC:1.6.5.3). This is approximately 100 residues long, and forms part of a multiprotein complex that resides on the inner mitochondrial membrane. The main function of the complex is the transport of electrons from NADH to ubiquinone, accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space .. +PF07348 Syd protein (SUKH-2)
Vella Briffa B, Zhang D, Aravind L. Pfam-B_19909 (release 10.0). This family contains a number of bacterial Syd proteins approximately 180 residues long. It has been suggested that Syd is loosely associated with the cytoplasmic surface of the cytoplasmic membrane, and that interaction with SecY may be involved in this membrane association . Operon analysis showed that Syd protein may function as immunity protein in bacterial toxin systems .. +PF07349 Protein of unknown function (DUF1478)
Pfam-B_20105 (release 10.0). This family consists of several hypothetical Sapovirus proteins of around 165 residues in length. The function of this family is unknown.. +PF07350 Protein of unknown function (DUF1479)
Pfam-B_20226 (release 10.0). This family consists of several hypothetical Enterobacterial proteins, of around 420 residues in length. Members of this family are often known as YbiU. The function of this family is unknown.. +PF07351 Protein of unknown function (DUF1480)
Pfam-B_20253 (release 10.0). This family consists of several hypothetical Enterobacterial proteins of around 80 residues in length. The function of this family is unknown.. +PF07352 Bacteriophage Mu Gam like protein
Pfam-B_19455 (release 10.0). This family consists of bacterial and phage Gam proteins. The gam gene of bacteriophage Mu encodes a protein which protects linear double stranded DNA from exonuclease degradation in vitro and in vivo .. +PF07353 Uroplakin II
Pfam-B_19993 (release 10.0). This family contains uroplakin II, which is approximately 180 residues long and seems to be restricted to mammals. Uroplakin II is an integral membrane protein, and is one of the components of the apical plaques of mammalian urothelium formed by the asymmetric unit membrane - this is believed to play a role in strengthening the urothelial apical surface to prevent the cells from rupturing during bladder distension .. +PF07354 Zona-pellucida-binding protein (Sp38)
Pfam-B_19996 (release 10.0). This family contains a number of zona-pellucida-binding proteins that seem to be restricted to mammals. These are sperm proteins that bind to the 90-kDa family of zona pellucida glycoproteins in a calcium-dependent manner . These represent some of the specific molecules that mediate the first steps of gamete interaction, allowing fertilisation to occur .. +PF07355 Glycine/sarcosine/betaine reductase selenoprotein B (GRDB)
Pfam-B_19711 (release 10.0). This family represents a conserved region approximately 350 residues long within the selenoprotein B component of the bacterial glycine, sarcosine and betaine reductase complexes.. +PF07356 Protein of unknown function (DUF1481)
Pfam-B_20042 (release 10.0). This family consists of several hypothetical bacterial proteins of around 230 residues in length. Members of this family are often referred to as YjaH and are found in the Orders Vibrionales and Enterobacteriales. The function of this family is unknown.. +PF07357 Dinitrogenase reductase ADP-ribosyltransferase (DRAT)
Pfam-B_20108 (release 10.0). This family consists of several bacterial dinitrogenase reductase ADP-ribosyltransferase (DRAT) proteins. Members of this family seem to be specific to Rhodospirillum, Rhodobacter and Azospirillum species. Dinitrogenase reductase ADP-ribosyl transferase (DRAT) carries out the transfer of the ADP-ribose from NAD to the Arg-101 residue of one subunit of the dinitrogenase reductase homodimer, resulting in inactivation of that enzyme. Dinitrogenase reductase-activating glycohydrolase (DRAG) removes the ADP-ribose group attached to dinitrogenase reductase, thus restoring nitrogenase activity. The DRAT-DRAG system negatively regulates nitrogenase activity in response to exogenous NH4+ or energy limitation in the form of a shift to darkness or to anaerobic conditions .. +PF07358 Protein of unknown function (DUF1482)
Pfam-B_20128 (release 10.0). This family consists of several Enterobacterial proteins of around 60 residues in length. The function of this family is unknown.. +PF07359 Liver-expressed antimicrobial peptide 2 precursor (LEAP-2)
Pfam-B_20235 (release 10.0). This family consists of several mammalian liver-expressed antimicrobial peptide 2 (LEAP-2) sequences. LEAP-2 is a cysteine-rich, and cationic protein. LEAP-2 contains a core structure with two disulfide bonds formed by cysteine residues in relative 1-3 and 2-4 positions. LEAP-2 is synthesised as a 77-residue precursor, which is predominantly expressed in the liver and highly conserved among mammals. The largest native LEAP-2 form of 40 amino acid residues is generated from the precursor at a putative cleavage site for a furin-like endoprotease. In contrast to smaller LEAP-2 variants, this peptide exhibits dose-dependent antimicrobial activity against selected microbial model organisms . The exact function of this family is unclear.. +PF07361 Cytochrome_b562;
Pfam-B_18074 (release 10.0). This family contains the bacterial cytochrome b562. This forms a four-helix bundle that non-covalently binds a single heme prosthetic group. .. +PF07362 Post-segregation antitoxin CcdA
Pfam-B_20349 (release 10.0). This family consists of several Enterobacterial post-segregation antitoxin CcdA proteins. The F plasmid-carried bacterial toxin, the CcdB protein, is known to act on DNA gyrase in two different ways. CcdB poisons the gyrase-DNA complex, blocking the passage of polymerases and leading to double-strand breakage of the DNA. Alternatively, in cells that overexpress CcdB, the A subunit of DNA gyrase (GyrA) has been found as an inactive complex with CcdB. Both poisoning and inactivation can be prevented and reversed in the presence of the F plasmid-encoded antidote, the CcdA protein .. +PF07363 Protein of unknown function (DUF1484)
Pfam-B_20389 (release 10.0). This family consists of several hypothetical bacterial proteins of around 110 residues in length. Members of this family appear to be found exclusively in Ralstonia solanacearum. The function of this family is unknown.. +PF07364 Protein of unknown function (DUF1485)
Pfam-B_20495 (release 10.0). This family consists of several hypothetical bacterial proteins of around 300 residues in length. Members of this family all appear to be in the Phylum Proteobacteria. The function of this family is unknown.. +PF07365 Alpha conotoxin precursor
Pfam-B_20562 (release 10.0). This family consists of several alpha conotoxin precursor proteins from a number of Conus species. The alpha-conotoxins are small peptide neurotoxins from the venom of fish-hunting cone snails which block nicotinic acetylcholine receptors (nAChRs) . . +PF07366 DUF1486;
SnoaL-like polyketide cyclase. Pfam-B_20348 (release 10.0), Pfam-B_4335 (release 18.0). This family includes SnoaL a polyketide cyclase involved in nogalamycin biosynthesis. This family was formerly known as DUF1486. The proteins in this family adopt a distorted alpha-beta barrel fold . Structural data together with site-directed mutagenesis experiments have shown that SnoaL has a different mechanism to that of the classical aldolase for catalysing intramolecular aldol condensation .. +PF07367 Fungal fruit body lectin
Pfam-B_20370 (release 10.0). This family consists of several fungal fruit body lectin proteins. Fruit body lectins are thought to have insecticidal activity [1,2] and may also function in capturing nematodes . . +PF07368 Protein of unknown function (DUF1487)
Pfam-B_20425 (release 10.0). This family consists of several uncharacterised proteins from Drosophila melanogaster. The function of this family is unknown.. +PF07369 Protein of unknown function (DUF1488)
Pfam-B_20604 (release 10.0). This family consists of several hypothetical bacterial proteins of around 85 residues in length. The function of this family is unknown.. +PF07370 Protein of unknown function (DUF1489)
Pfam-B_20654 (release 10.0). This family consists of several hypothetical bacterial proteins of around 150 residues in length. Members of this family seem to be founds exclusively in the Class Alphaproteobacteria. The function of this family is unknown.. +PF07371 Protein of unknown function (DUF1490)
Pfam-B_20678 (release 10.0). This family consists of several hypothetical bacterial proteins of around 90 residues in length. Members of the family seem to be found exclusively in Mycobacterium species. The function of this family is unknown.. +PF07372 Protein of unknown function (DUF1491)
Pfam-B_20742 (release 10.0). This family consists of several bacterial proteins of around 115 residues in length. Members of this family seem to be found exclusively in the Class Alphaproteobacteria. The function of this family is unknown.. +PF07373 CAMP factor (Cfa)
Pfam-B_20762 (release 10.0). This family consists of several bacterial CAMP factor (Cfa) proteins which seem to be specific to Streptococcus species. The CAMP reaction is a synergistic lysis of erythrocytes by the interaction of an extracellular protein (CAMP factor) produced by some streptococcal species with the Staphylococcus aureus sphingomyelinase C (beta-toxin) . . +PF07374 Protein of unknown function (DUF1492)
Pfam-B_20776 (release 10.0). This family consists of several hypothetical, highly conserved Streptococcal and related phage proteins of around 100 residues in length. The function of this family is unknown. It appears to be distantly related to Pfam:PF08281.. +PF07376 Prosystemin
Pfam-B_20835 (release 10.0). This family consists of several plant specific prosystemin proteins. Prosystemin is the precursor protein of the 18 amino acid wound signal systemin which activates systemic defence in plant leaves against insect herbivores .. +PF07377 Protein of unknown function (DUF1493)
Pfam-B_20460 (release 10.0). This family consists of several bacterial proteins of around 115 residues in length. Members of this family seem to be found exclusively in Salmonella and Yersinia species and several have been described as being putative cytoplasmic proteins. The function of this family is unknown.. +PF07378 Flagellar protein FlbT
Pfam-B_20574 (release 10.0). This family consists of several FlbT proteins. FlbT is a post-transcriptional regulator of flagellin. FlbT is associated with the 5' untranslated region (UTR) of fljK (25 kDa flagellin) mRNA and that this association requires a predicted loop structure in the transcript. Mutations within this loop abolish FlbT association and result in increased mRNA stability. It is therefore thought that FlbT promotes the degradation of flagellin mRNA by associating with the 5' UTR .. +PF07379 Protein of unknown function (DUF1494)
Pfam-B_20601 (release 10.0). This family consists of several bacterial proteins of around 175 residues in length. Members of this family seem to be found exclusively in Chlamydia species. The function of this family is unknown.. +PF07380 Pneumovirus M2 protein
Pfam-B_20478 (release 10.0). This family consists of several Pneumovirus M2 proteins. The M2-1 protein of respiratory syncytial virus (RSV) is a transcription processivity factor that is essential for virus replication .. +PF07381 Winged helix DNA-binding domain (DUF1495)
Pfam-B_20509 (release 10.0). This family consists of several hypothetical archaeal proteins of around 110 residues in length. The structure of this domain possesses a winged helix DNA-binding domain suggesting these proteins are bacterial transcription factors.. +PF07382 Histone H1-like nucleoprotein HC2
Pfam-B_20179 (release 10.0). This family contains the bacterial histone H1-like nucleoprotein HC2 (approximately 200 residues long), which seems to be found mostly in Chlamydia. HC2 functions in DNA condensation, although it has been suggested that it also has other roles .. +PF07383 Protein of unknown function (DUF1496)
Pfam-B_20554 (release 10.0). This family consists of several bacterial proteins of around 90 residues in length. Members of this family seem to be found exclusively in the Orders Vibrionales and Enterobacteriales. The function of this family is unknown.. +PF07384 Protein of unknown function (DUF1497)
Pfam-B_20585 (release 10.0). This family consists of several phage and bacterial proteins of around 59 residues in length. Members of this family seem to be found exclusively in Lactococcus lactis and the bacteriophages that infect this organism. The function of this family is unknown.. +PF07385 Protein of unknown function (DUF1498)
Pfam-B_20610 (release 10.0). This family consists of several hypothetical bacterial proteins of around 225 residues in length. The function of this family is unknown.. +PF07386 Protein of unknown function (DUF1499)
Pfam-B_20643 (release 10.0). This family consists of several hypothetical bacterial and plant proteins of around 125 residues in length. The function of this family is unknown.. +PF07387 Seadornavirus VP7
Pfam-B_20657 (release 10.0). This family consists of several Seadornavirus specific VP7 proteins of around 305 residues in length. The function of this family is unknown. However, it appears to be distantly related to protein kinases.. +PF07388 Alpha-2,8-polysialyltransferase (POLYST)
Pfam-B_20294 (release 10.0). This family contains the bacterial enzyme alpha-2,8-polysialyltransferase (EC:2.4.99.-) (approximately 500 residues long). This catalyses the polycondensation of alpha-2,8-linked sialic acid required for the synthesis of polysialic acid (PSA) .. +PF07389 Protein of unknown function (DUF1500)
Pfam-B_20659 (release 10.0). This family consists of several Orthopoxvirus specific proteins of around 100 residues in length. The function of this family is unknown.. +PF07390 Mycoplasma P30 protein
Pfam-B_20617 (release 10.0). This family consists of several P30 proteins which seem to be specific to Mycoplasma agalactiae. P30 is a 30-kDa immunodominant antigen and is known to be a transmembrane protein .. +PF07391 NPR nonapeptide repeat (2 copies)
This nine residue repeat which I have called NPR after NonaPeptide Repeat. It is found in two malarial proteins and has the consensus EEhhEEhhP where h stands for a hydrophobic amino acid.. +PF07392 Cyclin-dependent kinase inhibitor 2a p19Arf N-terminus
Pfam-B_20449 (release 10.0). This family represents the N-terminus (approximately 50 residues) of cyclin-dependent kinase inhibitor 2a p19Arf, which seems to be restricted to mammals. This is a tumour-suppressor protein that has been shown to inhibit the growth of human tumour cells lacking functional p53 by inducing a transient G2 arrest and subsequently apoptosis .. +PF07393 Exocyst complex component Sec10
Pfam-B_20545 (release 10.0). This family contains the Sec10 component (approximately 650 residues long) of the eukaryotic exocyst complex, which specifically affects the synthesis and delivery of secretory and basolateral plasma membrane proteins .. +PF07394 Protein of unknown function (DUF1501)
Pfam-B_20578 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function approximately 400 residues long.. +PF07395 Mig-14
Pfam-B_20642 (release 10.0). This family contains a number of bacterial mig-14 proteins (approximately 270 residues long). In Salmonella, mig-14 contributes to resistance to antimicrobial peptides, although the mechanism is not fully understood .. +PF07396 Phosphate-selective porin O and P
Pfam-B_20808 (release 10.0). This family represents a conserved region approximately 400 residues long within the bacterial phosphate-selective porins O and P. These are anion-specific porins, the binding site of which has a higher affinity for phosphate than chloride ions. Porin O has a higher affinity for polyphosphates, while porin P has a higher affinity for orthophosphate . In P. aeruginosa, porin O was found to be expressed only under phosphate-starvation conditions during the stationary growth phase .. +PF07397 Repeat of unknown function (DUF1502)
Pfam-B_20836 (release 10.0). This family consists of a number of repeats of around 34 residues in length. Members of this family seem to be found exclusively in three hypothetical Murid herpesvirus 4 proteins. The function of this family is unknown.. +PF07398 MDMPI C-terminal domain
Pfam-B_20685 (release 10.0). This domain is found at the C-terminus of the mycothiol maleylpyruvate isomerase enzyme (MDMPI). The structure of this protein has been solved . This domain appears weakly similar to Pfam:PF08608.. +PF07399 Protein of unknown function (DUF1504)
Pfam-B_20945 (release 10.0). This family consists of several hypothetical bacterial proteins of around 440 residues in length. The function of this family is unknown.. +PF07400 Interleukin 11
Pfam-B_20854 (release 10.0). This family contains interleukin 11 (approximately 200 residues long). This is a secreted protein that stimulates megakaryocytopoiesis, resulting in increased production of platelets, as well as activating osteoclasts, inhibiting epithelial cell proliferation and apoptosis, and inhibiting macrophage mediator production. These functions may be particularly important in mediating the hematopoietic, osseous and mucosal protective effects of interleukin 11 . Family members seem to be restricted to mammals.. +PF07401 Bovine Lentivirus VIF protein
Pfam-B_21067 (release 10.0). This family consists of several Lentivirus viral infectivity factor (VIF) proteins. VIF is known to be essential for ability of cell-free virus preparation to infect cells . Members of this family are specific to Bovine immunodeficiency virus (BIV) and Jembrana disease virus which also infects cattle.. +PF07402 Human herpesvirus U26 protein
Pfam-B_21150 (release 10.0). This family consists of several Human herpesvirus U26 proteins of around 300 residues in length. The function of this family is unknown.. +PF07403 Protein of unknown function (DUF1505)
Pfam-B_21179 (release 10.0). This family consists of several uncharacterised Caenorhabditis elegans proteins of around 115 resides in length. Members of this family contain 6 highly conserved cysteine residues. The function of this family is unknown.. +PF07404 Telomere-binding protein beta subunit (TEBP beta)
Pfam-B_20928 (release 10.0). This family consists of several telomere-binding protein beta subunits which appear to be specific to the family Oxytrichidae. Telomeres are specialised protein-DNA complexes that compose the ends of eukaryotic chromosomes. Telomeres protect chromosome termini from degradation and recombination and act together with telomerase to ensure complete genome replication. TEBP beta forms a complex with TEBP alpha and this complex is able to recognise and bind ssDNA to form a sequence-specific, telomeric nucleoprotein complex that caps the very 3' ends of chromosomes . . +PF07405 Protein of unknown function (DUF1506)
Pfam-B_20962 (release 10.0). This family consists of several bacterial proteins of around 130 residues in length. Members of this family seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown.. +PF07406 NICE-3 protein
Pfam-B_21003 (release 10.0). This family consists of several eukaryotic NICE-3 and related proteins. The gene coding for NICE-3 is part of the epidermal differentiation complex (EDC) which comprises a large number of genes that are of crucial importance for the maturation of the human epidermis . The function of NICE-3 is unknown.. +PF07407 Seadornavirus VP6 protein
Pfam-B_21021 (release 10.0). This family consists of several VP6 proteins from the Banna virus as well as a related protein VP5 from the Kadipiro virus. Members of this family are typically of around 420 residues in length. The function of this family is unknown.. +PF07408 Protein of unknown function (DUF1507)
Pfam-B_21047 (release 10.0). This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown.. +PF07409 Phage protein GP46
Pfam-B_20855 (release 10.0). This family contains GP46 phage proteins (approximately 120 residues long).. +PF07410 Streptococcus thermophilus bacteriophage Gp111 protein
Pfam-B_20904 (release 10.0). This family consists of several Streptococcus thermophilus bacteriophage Gp111 proteins of around 110 residues in length. The function of this family is unknown.. +PF07411 Domain of unknown function (DUF1508)
Pfam-B_20853 (release 10.0). This family represents a series of bacterial domains of unknown function of around 50 residues in length. Members of this family are often found as tandem repeats and in some cases represent the whole protein. All member proteins are described as being hypothetical.. +PF07412 Geminin
Pfam-B_20861 (release 10.0). This family contains the eukaryotic protein geminin (approximately 200 residues long). Geminin inhibits DNA replication by preventing the incorporation of MCM complex into prereplication complex, and is degraded during the mitotic phase of the cell cycle. It has been proposed that geminin inhibits DNA replication during S, G2, and M phases and that geminin destruction at the metaphase-anaphase transition permits replication in the succeeding cell cycle .. +PF07413 Herpes_UL37;
Betaherpesvirus immediate-early glycoprotein UL37. Pfam-B_21151 (release 10.0). This family consists of several Betaherpesvirus immediate-early glycoprotein UL37 sequences. The human cytomegalovirus (HCMV) UL37 immediate-early regulatory protein is a type I integral membrane N-glycoprotein which traffics through the ER and the Golgi network .. +PF07415 Gammaherpesvirus latent membrane protein (LMP2) protein
Pfam-B_21212 (release 10.0). This family consists of several Gammaherpesvirus latent membrane protein (LMP2) proteins. Epstein-Barr virus is a human Gammaherpesvirus that infects and establishes latency in B lymphocytes in vivo. The latent membrane protein 2 (LMP2) gene is expressed in latently infected B cells and encodes two protein isoforms, LMP2A and LMP2B, that are identical except for an additional N-terminal 119 aa cytoplasmic domain which is present in the LMP2A isoform. LMP2A is thought to play a key role in either the establishment or the maintenance of latency and/or the reactivation of productive infection from the latent state. The significance of LMP2B and its role in pathogenesis remain unclear .. +PF07416 Crinivirus P26 protein
Pfam-B_21324 (release 10.0). This family consists of several Crinivirus P26 proteins which seem to be found exclusively in the Lettuce infectious yellows virus. The function of this family is unknown.. +PF07417 Transcriptional regulator Crl
Pfam-B_20900 (release 10.0). This family contains the bacterial transcriptional regulator Crl (approximately 130 residues long). This is a transcriptional regulator of the csgA curlin subunit gene for curli fibres that are found on the surface of certain bacteria .. +PF07418 Acidic phosphoprotein precursor PCEMA1
Pfam-B_20971 (release 10.0). This family consists of several acidic phosphoprotein precursor PCEMA1 sequences which appear to be found exclusively in Plasmodium chabaudi. PCEMA1 is an antigen that is associated with the membrane of the infected erythrocyte throughout the entire intraerythrocytic cycle . The exact function of this family is unclear. . +PF07419 PilM
Pfam-B_20906 (release 10.0). This family contains the bacterial protein PilM (approximately 150 residues long). PilM is an inner membrane protein that has been predicted to function as a component of the pilin transport apparatus and thin-pilus basal body .. +PF07420 Protein of unknown function (DUF1509)
Pfam-B_21379 (release 10.0). This family consists of several uncharacterised viral proteins from the Marek's disease-like viruses. Members of this family are typically around 400 residues in length. The function of this family is unknown.. +PF07421 Neurotensin/neuromedin N precursor
Pfam-B_20947 (release 10.0). This family contains the precursor of bacterial neurotensin/neuromedin N (approximately 170 residues long). This the common precursor of two biologically active related peptides, neurotensin and neuromedin N. It undergoes tissue-specific processing leading to the formation in some tissues and cancer cell lines of large peptides ending with the neurotensin or neuromedin N sequence .. +PF07422 Sexual stage antigen s48/45 domain
Pfam-B_21037 (release 10.0) and Pfam-B_4621 (release 14.0). This family contains sexual stage s48/45 antigens from Plasmodium (approximately 450 residues long). These are surface proteins expressed by Plasmodium male and female gametes that have been shown to play a conserved and important role in fertilisation . This domain contains 6 conserved cysteines suggesting 3 disulphide bridges.. +PF07423 Protein of unknown function (DUF1510)
Pfam-B_21355 (release 10.0). This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown.. +PF07424 TrbM
Pfam-B_21098 (release 10.0). This family contains the bacterial protein TrbM (approximately 180 residues long). In Comamonas testosteroni T-2, TrbM is derived from the IncP1beta plasmid pTSA, which encodes the widespread genes for p-toluenesulfonate (TSA) degradation .. +PF07425 Pardaxin
Pfam-B_21422 (release 10.0). This family consists of several Pardaxin proteins. Pardaxin, a 33-amino-acid pore-forming polypeptide toxin isolated from the Red Sea Moses sole Pardachirus marmoratus, has a helix-hinge-helix structure. This is a common structural motif found both in antibacterial peptides that can act selectively on bacterial membranes (e.g., cecropin), and in cytotoxic peptides that can lyse both mammalian and bacterial cells (e.g., melittin). Pardaxin possesses a high antibacterial activity with a significantly reduced haemolytic activity towards human red blood cells compared with melittin . Pardaxin has also been found to have a shark repellent action .. +PF07426 Dynactin subunit p22
Pfam-B_21336 (release 10.0). This family contains p22, the smallest subunit of dynactin, a complex that binds to cytoplasmic dynein and is a required activator for cytoplasmic dynein-mediated vesicular transport. Dynactin localises to the cleavage furrow and to the midbodies of dividing cells, suggesting that it may function in cytokinesis . Family members are approximately 170 residues long.. +PF07428 15-O-acetyltransferase Tri3
Pfam-B_21449 (release 10.0). This family represents a conserved region approximately 400 residues long within 15-O-acetyltransferase (Tri3), which seems to be restricted to ascomycete fungi. In Fusarium sporotrichioides, this is required for acetylation of the C-15 hydroxyl group of trichothecenes in the biosynthesis of T-2 toxin .. +PF07429 Fuc4NAc_transf;
4-alpha-L-fucosyltransferase glycosyl transferase group 56. Pfam-B_21451 (release 10.0). This family contains the bacterial enzyme 4-alpha-L-fucosyltransferase (Fuc4NAc transferase) (EC 2.4.1.-) (approximately 360 residues long). This catalyses the synthesis of Fuc4NAc-ManNAcA-GlcNAc-PP-Und (lipid III) as part of the biosynthetic pathway of enterobacterial common antigen (ECA), a polysaccharide comprised of the trisaccharide repeat unit Fuc4NAc-ManNAcA-GlcNAc .. +PF07430 Phloem filament protein PP1
Pfam-B_20843 (release 10.0). This family represents a conserved region approximately 200 residues long, four copies of which are found within the plant phloem filament protein PP1. This is one of the constituents of the proteinaceous filaments found in the sieve elements of Cucurbita phloem .. +PF07431 Protein of unknown function (DUF1512)
Pfam-B_21354 (release 10.0). This family consists of several archaeal proteins of around 370 residues in length. The function of this family is unknown.. +PF07432 Histone H1-like protein Hc1
Pfam-B_21362 (release 10.0). This family consists of several bacterial histone H1-like Hc1 proteins. In Chlamydia, Hc1 is expressed in the late stages of the life cycle, concomitant with the reorganisation of chlamydial reticulate bodies into elementary bodies. This suggests that Hc1 protein plays a role in the condensation of chromatin during intracellular differentiation .. +PF07433 Protein of unknown function (DUF1513)
Pfam-B_21436 (release 10.0). This family consists of several bacterial proteins of around 360 residues in length. The function of this family is unknown.. +PF07434 CblD like pilus biogenesis initiator
Pfam-B_21452 (release 10.0). This family consists of several minor pilin proteins including CblD from Burkholderia cepacia which is known to CblD be the initiator of pilus biogenesis . The family also contains a variety of Enterobacterial minor pilin proteins.. +PF07435 YycH protein
Vella Briffa B, Szurmant H, Mistry J. Pfam-B_21457 (release 10.0). This family contains the bacterial protein YycH which is approximately 450 residues long. YycH plays a role in signal transduction and is found immediately downstream of the essential histidine kinase YycG. YycG forms a two component system together with its cognate response regulator YycF. PhoA fusion studies have shown that YycH is transported across the cytoplasmic protein. It is postulated that YycH functions as an antagonist to YycG . The molecule is made up of three domains, and has a novel three-dimensional structure. The N-terminal domain features a calcium binding site and the central domain contains two conserved loop regions .. +PF07436 Curtovirus V3 protein
Pfam-B_21777 (release 10.0). This family consists of several Curtovirus V3 proteins of around 90 residues in length. The function of this family is unknown.. +PF07437 YfaZ_precursor;
Pfam-B_21552 (release 10.0). This family contains the precursor of the bacterial protein YfaZ (approximately 180 residues long). Many members of this family are hypothetical proteins.. +PF07438 Protein of unknown function (DUF1514)
Moxon SJ, Eberhardt R. Pfam-B_21857 (release 10.0). This family consists of several Staphylococcus aureus and related bacteriophage proteins of around 65 residues in length. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids .. +PF07439 Protein of unknown function (DUF1515)
Pfam-B_21875 (release 10.0). This family consists of several hypothetical bacterial proteins of around 130 residues in length. Members of this family seem to be found exclusively in Rhizobium species. The function of this family is unknown.. +PF07440 Caerin 1 protein
Pfam-B_22039 (release 10.0). This family consists of several caerin 1 proteins from Litoria species. The caerin 1 peptides are among the most powerful of the broad-spectrum antibiotic amphibian peptides .. +PF07441 SigmaK-factor processing regulatory protein BofA
Pfam-B_21773 (release 10.0). This family contains the sigmaK-factor processing regulatory protein BofA (Bypass-of-forespore protein A) (approximately 80 residues long). During sporulation in Bacillus subtilis, transcription is controlled in the developing sporangium by a cascade of sporulation-specific transcription factors (sigma factors). Following engulfment, processing of sigmaK is inhibited by BofA. It has been suggested that this effect is exerted by alteration of the level of the SpoIVFA protein .. +PF07442 Ponericin
Pfam-B_21790 (release 10.0). This family contains a number of ponericin peptides (approximately 30 residues long) from the venom of the predatory ant Pachycondyla goeldii. These peptides exhibit antibacterial and insecticidal properties, and may adopt an amphipathic alpha-helical structure in polar environments such as cell membranes .. +PF07444 Ycf66 protein N-terminus
Pfam-B_21840 (release 10.0). This family represents the N-terminus (approximately 80 residues) of Ycf66, a protein that seems to be restricted to eukaryotes that contain chloroplasts and to cyanobacteria.. +PF07445 Primosomal replication protein priB and priC
Pfam-B_22037 (release 10.0). This family contains the bacterial primosomal replication proteins priB and priC (approximately 180 residues long). In Escherichia coli, these function in the assembly of the primosome .. +PF07447 Matrix protein VP40
Pfam-B_22295 (release 10.0). This family contains viral VP40 matrix proteins that seem to be restricted to the Filoviridae. These play an important role in the assembly process of virus particles by interacting with cellular factors, cellular membranes, and the ribonuclearprotein particle complex. It has been shown that the N-terminal region of VP40 folds into a mixture of hexameric and octameric states - these may have distinct roles .. +PF07448 Secreted phosphoprotein 24 (Spp-24)
Pfam-B_22318 (release 10.0). This family represents a conserved region approximately 140 residues long within secreted phosphoprotein 24 (Spp-24), which seems to be restricted to vertebrates. This is a non-collagenous protein found in bone that is related in sequence to the cystatin family of thiol protease inhibitors. This suggests that Spp-24 could function to modulate the thiol protease activities known to be involved in bone turnover. It is also possible that the intact form of Spp-24 found in bone could be a precursor to a biologically active peptide that coordinates an aspect of bone turnover .. +PF07449 Hydrogenase-1 expression protein HyaE
Pfam-B_21851 (release 10.0). This family contains bacterial hydrogenase-1 expression proteins approximately 120 residues long. This includes the E. coli protein HyaE, and the homologous proteins HoxO of R. eutropha and HupG of R. leguminosarum. Deletion of the hoxO gene in R. eutropha led to complete loss of the uptake [NiFe] hydrogenase activity, suggesting that it has a critical role in hydrogenase assembly .. +PF07450 Formate hydrogenlyase maturation protein HycH
Pfam-B_22086 (release 10.0). This family contains the bacterial formate hydrogenlyase maturation protein HycH, which is approximately 140 residues long. This may be required for the conversion of a precursor form of the large subunit of hydrogenlyase 3 into a mature form .. +PF07451 Stage V sporulation protein AD (SpoVAD)
Pfam-B_22130 (release 10.0). This family contains the bacterial stage V sporulation protein AD (SpoVAD), which is approximately 340 residues long. This is one of six proteins encoded by the spoVA operon, which is transcribed exclusively in the forespore at about the time of dipicolinic acid (DPA) synthesis in the mother cell. The functions of the proteins encoded by the spoVA operon are unknown, but it has been suggested they are involved in DPA transport during sporulation .. +PF07452 CHRD domain
CHRD (after SWISS-PROT abbreviation for chordin) is a novel domain identified in chordin, an inhibitor of bone morphogenetic proteins. This family includes bacterial homologues. It is anticipated to have an immunoglobulin-like beta-barrel structure based on limited similarity to superoxide dismutases but, as yet, no clear functional prediction can be made. Its most conserved feature is a GE[I/L]RCG[V/I/L] motif towards its C-terminal end Most bacterial proteins in this family have only one CHRD domain, whereas it is found repeated in many eukaryotic proteins such as human chordin (Swiss:Q9H2X0) and Drosophila SOG (Swiss:Q24025). .. +PF07453 NUMOD1 domain
+PF07454 Stage II sporulation protein P (SpoIIP)
Pfam-B_21650 (release 10.0). This family contains the bacterial stage II sporulation protein P (SpoIIP) (approximately 350 residues long). It has been shown that a block in polar cytokinesis in Bacillus subtilis is mediated partly by transcription of spoIID, spoIIM and spoIIP. This inhibition of polar division is involved in the locking in of asymmetry after the formation of a polar septum during sporulation . Engulfment in Bacillus subtilis is mediated by two complementary systems: the first includes the proteins SpoIID, SpoIIM and SpoIIP (DMP) which carry out the engulfment, and the second includes the SpoIIQ-SpoIIIAGH (Q-AH) zipper, that recruits other proteins to the septum in a second-phase of the engulfment. The course of events follows as the incorporation firstly of SpoIIB into the septum during division to serve directly or indirectly as a landmark for localising SpoIIM and then SpoIIP and SpoIID to the septum. SpoIIP and SpoIID interact together to form part of the DMP complex . SpoIIP itself has been identified as an autolysin with peptidoglycan hydrolase activity .. +PF07455 Phage polarity suppression protein (Psu)
Pfam-B_21666 (release 10.0). This family contains a number of phage polarity suppression proteins (Psu) (approximately 190 residues long). The Psu protein of bacteriophage P4 causes suppression of transcriptional polarity in Escherichia coli by overcoming Rho termination factor activity .. +PF07456 Heptaprenyl diphosphate synthase component I
Pfam-B_22032 (release 10.0). This family contains component I of bacterial heptaprenyl diphosphate synthase (EC:2.5.1.30) (approximately 170 residues long). This is one of the two dissociable subunits that form the enzyme, both of which are required for the catalysis of the biosynthesis of the side chain of menaquinone-7 .. +PF07457 Protein of unknown function (DUF1516)
Pfam-B_22136 (release 10.0). This family contains a number of hypothetical bacterial proteins of unknown function approximately 120 residues long.. +PF07458 Sperm protein associated with nucleus, mapped to X chromosome
Pfam-B_22197 (release 10.0). This family contains human sperm proteins associated with the nucleus and mapped to the X chromosome (SPAN-X) (approximately 100 residues long). SPAN-X proteins are cancer-testis antigens (CTAs), and thus represent potential targets for cancer immunotherapy because they are widely distributed in tumours but not in normal tissues, except testes. They are highly insoluble, acidic, and polymorphic .. +PF07459 CTX phage RstB protein
Pfam-B_22203 (release 10.0). This family contains a number of RstB proteins approximately 120 residues long, including RstB1 and RstB2, from the Vibrio cholerae phage CTX. Functional analyses indicate that rstB2 is required for integration of the CTXphi phage into the V. cholerae chromosome .. +PF07460 NUMOD3 motif (2 copies)
NUMOD3 is a DNA-binding motif found in homing endonucleases and related proteins. It occurs on its own or in tandem repeats in GIY-YIG (Pfam:PF01541) and HTH proteins. It constitutes a beta-turn-loop-helix subregion of the the DNA-binding domain of I-TevI homing endonuclease (Swiss:P13299) .. +PF07461 Nicotine adenine dinucleotide glycohydrolase (NADase)
Pfam-B_21586 (release 10.0). This family consists of several bacterial nicotine adenine dinucleotide glycohydrolase (NGA) proteins which appear to be specific to Streptococcus pyogenes. NAD glycohydrolase (NADase) is a potential virulence factor. Streptococcal NADase may contribute to virulence by its ability to cleave beta-NAD at the ribose-nicotinamide bond, depleting intracellular NAD pools and producing the potent vasoactive compound nicotinamide .. +PF07462 Merozoite surface protein 1 (MSP1) C-terminus
Pfam-B_21542 (release 10.0). This family represents the C-terminal region of merozoite surface protein 1 (MSP1) which are found in a number of Plasmodium species. MSP-1 is a 200-kDa protein expressed on the surface of the P. vivax merozoite. MSP-1 of Plasmodium species is synthesised as a high-molecular-weight precursor and then processed into several fragments. At the time of red cell invasion by the merozoite, only the 19-kDa C-terminal fragment (MSP-119), which contains two epidermal growth factor-like domains, remains on the surface. Antibodies against MSP-119 inhibit merozoite entry into red cells, and immunisation with MSP-119 protects monkeys from challenging infections. Hence, MSP-119 is considered a promising vaccine candidate .. +PF07463 NUMOD4 motif
NUMOD4 is a putative DNA-binding motif found in homing endonucleases and related proteins .. +PF07464 Apolipophorin-III precursor (apoLp-III)
Pfam-B_31170 (release 10.0). This family consists of several insect apolipoprotein-III sequences. Exchangeable apolipoproteins constitute a functionally important family of proteins that play critical roles in lipid transport and lipoprotein metabolism. Apolipophorin III (apoLp-III) is a prototypical exchangeable apolipoprotein found in many insect species that functions in transport of diacylglycerol (DAG) from the fat body lipid storage depot to flight muscles in the adult life stage .. +PF07465 Photosystem I protein M (PsaM)
Pfam-B_21548 (release 10.0). This family consists of several plant and cyanobacterial photosystem I protein M (PsaM) sequences. PsaM forms part of the photosystem I complex and its binding is stabilised by PsaI .. +PF07466 Protein of unknown function (DUF1517)
Pfam-B_22018 (release 10.0). This family consists of several hypothetical glycine rich plant and bacterial proteins of around 300 residues in length. The function of this family is unknown.. +PF07467 Beta-lactamase inhibitor (BLIP)
Pfam-B_41444 (release 10.0). The structure of BLIP reveals two structural domains, which form a polar, concave surface that docks onto a predominantly polar, convex protrusion on beta-lactamase. The ability of BLIP to adapt to a variety of class A beta-lactamases is thought to be due to flexibility between these two domains .. +PF07468 Agglutinin
Pfam-B_57133 (release 10.0). +PF07469 Domain of unknown function (DUF1518)
Pfam-B_1525 (release 10.0). This domain, which is usually found tandemly repeated, is found various receptor co-activating proteins.. +PF07470 Glycosyl Hydrolase Family 88
Pfam-B_10896 (release 10.0). Unsaturated glucuronyl hydrolase catalyses the hydrolytic release of unsaturated glucuronic acids from oligosaccharides (EC:3.2.1.-) produced by the reactions of polysaccharide lyases . . +PF07471 Phage DNA packaging protein Nu1
Pfam-B_11430 (release 10.0). Terminase, the DNA packaging enzyme of bacteriophage lambda, is a heteromultimer composed of subunits Nu1 and A. The smaller Nu1 terminase subunit has a low-affinity ATPase stimulated by non-specific DNA . . +PF07472 Fucose-binding lectin II (PA-IIL)
Pfam-B_17609 (release 10.0). In Pseudomonas aeruginosa the fucose-binding lectin II (PA-IIL) contributes to the pathogenic virulence of the bacterium. PA-IIL functions as a tetramer when binding fucose. Each monomer is comprised of a nine-stranded, antiparallel beta-sandwich arrangement and contains two calcium cations that mediate the binding of fucose in a recognition mode unique among carbohydrate-protein interactions .. +PF07473 Spasmodic peptide gm9a
Pfam-B_90829 (release 10.0). This family consists of several spasmodic peptide gm9a sequences. Conotoxin gm9a is a putative 27-residue polypeptide encoded by Conus gloriamaris and is known to be a homologue of the "spasmodic peptide", tx9a, isolated from the venom of the mollusk-hunting cone shell Conus textile . Upon injection of this venom component, normal mice are converted into behavioural phenocopies of a well-known mutant, the spasmodic mouse .. +PF07474 G2F domain
Nidogen, an invariant component of basement membranes, is a multifunctional protein that interacts with most other major basement membrane proteins. The G2 fragment or (G2F domain) contains binding sites for collagen IV and perlecan. The structure is composed of an 11-stranded beta-barrel with a central helix. This domain is structurally related to that of green fluorescent protein Pfam:PF01353. A large surface patch on the beta-barrel is conserved in all metazoan nidogens.. +PF07475 HPr Serine kinase C-terminal domain
This family represents the C terminal kinase domain of Hpr Serine/threonine kinase PtsK. This kinase is the sensor in a multicomponent phosphorelay system in control of carbon catabolic repression in bacteria . This kinase in unusual in that it recognises the tertiary structure of its target and is a member of a novel family unrelated to any previously described protein phosphorylating enzymes . X-ray analysis of the full-length crystalline enzyme from Staphylococcus xylosus at a resolution of 1.95 A shows the enzyme to consist of two clearly separated domains that are assembled in a hexameric structure resembling a three-bladed propeller .. +PF07476 Methylaspartate ammonia-lyase C-terminus
Methylaspartate ammonia-lyase EC:4.3.1.2 catalyses the second step of fermentation of glutamate. It is a homodimer. This family represents the C-terminal region of Methylaspartate ammonia-lyase and contains a TIM barrel fold similar to the Pfam:PF01188. This family represents the catalytic domain and contains a metal binding site .. +PF07477 Glycosyl hydrolase family 67 C-terminus
Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the C terminal region of alpha-glucuronidase which is mainly alpha-helical. It wraps around the catalytic domain (Pfam:PF07488), making additional interactions both with the N-terminal domain (Pfam:PF03648) of its parent monomer and also forming the majority of the dimer-surface with the equivalent C-terminal domain of the other monomer of the dimer .. +PF07478 D-ala D-ala ligase C-terminus
This family represents the C-terminal, catalytic domain of the D-alanine--D-alanine ligase enzyme EC:6.3.2.4. D-Alanine is one of the central molecules of the cross-linking step of peptidoglycan assembly. There are three enzymes involved in the D-alanine branch of peptidoglycan biosynthesis: the pyridoxal phosphate-dependent D-alanine racemase (Alr), the ATP-dependent D-alanine:D-alanine ligase (Ddl), and the ATP-dependent D-alanine:D-alanine-adding enzyme (MurF) .. +PF07479 NAD-dependent glycerol-3-phosphate dehydrogenase C-terminus
NAD-dependent glycerol-3-phosphate dehydrogenase (GPDH) catalyses the interconversion of dihydroxyacetone phosphate and L-glycerol-3-phosphate. This family represents the C-terminal substrate-binding domain .. +PF07481 Domain of Unknown Function (DUF1521)
This family of unknown function is found in a limited set of Bradyrhizobium proteins. There appears to be a periodic -DG- motif in it.. +PF07482 Domain of Unknown Function (DUF1522)
+PF07483 Tryptophan-rich Synechocystis species C-terminal domain
This domain is found at the C-terminus, normally between 2-3 copies, of a range of Synechocystis membrane proteins. This domain is fairly tryptophan rich as well. . +PF07484 Phage Tail Collar Domain
This region is occasionally found in conjunction with Pfam:PF03335. Most of the family appear to be phage tail proteins; however some appear to be involved in other processes. For instance Swiss:Q03314 from Rhizobium leguminosarum may be involved in plant-microbe interactions ( ). A related protein Swiss:Q9L3N1 is involved in the pathogenicity of Microcystis aeruginosa. The finding of this family in a structural component of the phage tail fibre baseplate (Swiss:P10930) suggests that its function is structural rather than enzymatic. Structural studies show this region consists of a helix and a loop ( ) and three beta-strands. This alignment does not catch the third strand as it is separated from the rest of the structure by around 100 residues. This strand is conserved in homologues but the intervening sequence is not. Much of the function of Swiss:P10930 appears to reside in this intervening region. In the tertiary structure of the phage baseplate this domain forms part of the 'collar'. The domain may bind SO4, however the residues accredited with this vary between the PDB file and the Swiss-Prot entry. The long unconserved region maybe due to domain swapping in and out of a loop or reflective of rapid evolution. . +PF07485 DUF1259;
Domain of Unknown Function (DUF1259). This family is the lppY/lpqO homologue family.. +PF07486 Cell Wall Hydrolase
These enzymes have been implicated in cell wall hydrolysis, most extensively in Bacillus subtilis. For instance Swiss:P50739 is expressed during sporulation as an inactive form and then deposited on the cell outer cortex. During germination the the enzyme is activated and hydrolyses the cortex( ). A similar role is carried out by the partially redundant Swiss:P42249 ( ). It is not clear whether these enzymes are amidases or peptidases.. +PF07487 SopE GEF domain
Pfam-B_18665 (release 7.8). This family represents the C-terminal guanine nucleotide exchange factor (GEF) domain of SopE. Salmonella typhimurium employs a type III secretion system to inject bacterial toxins into the host cell cytosol. These toxins transiently activate Rho family GTP-binding protein-dependent signaling cascades to induce cytoskeletal rearrangements. SopE, can activate Cdc42, an essential component of the host cellular signaling cascade, in a Dbl-like fashion despite its lack of sequence similarity to Dbl-like proteins, the Rho-specific eukaryotic guanine nucleotide exchange factors .. +PF07488 Glycosyl hydrolase family 67 middle domain
Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the central catalytic domain of alpha-glucuronidase .. +PF07489 Translocated intimin receptor (Tir) C-terminus
Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation . This family represents the Tir C-terminal domain which has been reported to bind uninfected host cells and beta-1 integrins although the role of intimin binding to integrins is unclear. This intimin C-terminal domain has also been shown to be sufficient for Tir recognition .. +PF07490 Translocated intimin receptor (Tir) N-terminus
Griffiths-Jones SR, Moxon SJ. Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation . This family represents the Tir N-terminal domain which is involved in Tir stability and Tir secretion .. +PF07491 Protein phosphatase inhibitor
Pfam-B_11090 (release 10.0). These proteins include Ypi1, (Swiss:P43587), a novel Saccharomyces cerevisiae type 1 protein phosphatase inhibitor and ppp1r11/hcgv (Swiss:O60927), annotated as having protein phosphatase inhibitor activity [2-3].. +PF07492 Neutral trehalase Ca2+ binding domain
Neutral trehalases mobilise trehalose accumulated by fungal cells as a protective and storage carbohydrate. This family represents a calcium-binding domain similar to EF hand. Residues 97 and 108 in Swiss:O42893 have been implicated in this interaction. It is thought that this domain may provide a general mechanism for regulating neutral trehalase activity in yeasts and filamentous fungi .. +PF07494 Two component regulator propeller
A large group of two component regulator proteins appear to have the same N-terminal structure of 14 tandem repeats. These repeats show homology to Pfam:PF01011 and Pfam:PF00400 indicating that they are likely to form a beta-propeller. This family has been built with artificially high cut-offs in order to avoid overlaps with other beta-propeller families. The fourteen repeats are likely to form two propellers; it is not clear if these structures are likely to recruit other proteins or interact with DNA.. +PF07495 Y_Y_Y domain
This domain is mostly found at the end of the beta propellers (Pfam:PF07494) in a family of two component regulators. However they are also found tandemly repeated in Swiss:Q891H4 without other signal conduction domains being present. It's named after the conserved tyrosines found in the alignment. The exact function is not known.. +PF07496 CW-type Zinc Finger
This domain appears to be a zinc finger. The alignment shows four conserved cysteine residues and a conserved tryptophan. It was first identified by , and is predicted to be a "highly specialised mononuclear four-cysteine zinc finger...that plays a role in DNA binding and/or promoting protein-protein interactions in complicated eukaryotic processes including ...chromatin methylation status and early embryonic development." Weak homology to Pfam:PF00628 further evidences these predictions (personal obs: C Yeats). Twelve different CW-domain-containing protein subfamilies are described, with different subfamilies being characteristic of vertebrates, higher plants and other animals in which these domain is found .. +PF07497 Rho termination factor, RNA-binding domain
Pfam-B_1610 (release 11.0). The Rho termination factor disengages newly transcribed RNA from its DNA template at certain, specific transcripts. It it thought that two copies of Rho bind to RNA and that Rho functions as a hexamer of protomers .. +PF07498 Rho termination factor, N-terminal domain
Pfam-B_1610 (release 11.0). The Rho termination factor disengages newly transcribed RNA from its DNA template at certain, specific transcripts. It it thought that two copies of Rho bind to RNA and that Rho functions as a hexamer of protomers . This domain is found to the N-terminus of the RNA binding domain (Pfam:PF07497).. +PF07499 RuvA, C-terminal domain
Pfam-B_1373 (release 11.0). Homologous recombination is a crucial process in all living organisms. In bacteria, this process the RuvA, RuvB, and RuvC proteins are involved. More specifically the proteins process the Holliday junction DNA. RuvA is comprised of three distinct domains. The domain represents the C-terminal domain and plays a significant role in the ATP-dependent branch migration of the hetero-duplex through direct contact with RuvB . Within the Holliday junction, the C-terminal domain makes no interaction with DNA .. +PF07500 Transcription factor S-II (TFIIS), central domain
Pfam-B_1373 (release 11.0). Transcription elongation by RNA polymerase II is regulated by the general elongation factor TFIIS. This factor stimulates RNA polymerase II to transcribe through regions of DNA that promote the formation of stalled ternary complexes. TFIIS is composed of three structural domains, termed I, II, and III. The two C-terminal domains (II and III), this domain and Pfam:PF01096 are required for transcription activity .. +PF07501 G5 domain
This domain is found in a wide range of extracellular proteins. It is found tandemly repeated in up to 8 copies. It is found in the N-terminus of peptidases belonging to the M26 family which cleave human IgA. The domain is also found in proteins involved in metabolism of bacterial cell walls suggesting this domain may have an adhesive function.. +PF07502 MANSC;
This region of similarity, comprising 8 conserved cysteines, is found in the N-terminal region of several membrane-associated and extracellular proteins . Although formerly called MANSC (for motif at N terminus with seven cysteines) it has now been renamed by MANEC (motif at N terminus with eight cysteines) by Richard Mitter and Stephen Fitzgerald after the discovery of an eighth conserved cysteine. It is postulated that this domain may play a role in the formation of protein complexes involving various protease activators and inhibitors .. +PF07503 HypF finger
The HypF family of proteins are involved in the maturation and regulation of hydrogenase ( ). In the N-terminus they appear to have two Zinc finger domains, as modelled by this family.. +PF07504 Fungalysin/Thermolysin Propeptide Motif
This motif is found in both the bacterial M4 peptidase propeptide and the fungal M36 propeptide. Its exact function is not clear, but it is likely to either inhibit the peptidase, so as to prevent its premature activation, or has a chaperone activity. Both of these roles have been ascribed to the M4 and M36 propeptides ( , ).. +PF07505 Phage protein Gp37/Gp68
Homologues of phage proteins Gp37 and Gp68 are found in several bacteria.. +PF07506 ParB;
RepB plasmid partitioning protein. This family includes proteins with sequence similarity to the RepB partitioning protein of the large Ti (tumour-inducing) plasmids of Agrobacterium tumefaciens[1-2].. +PF07507 WavE lipopolysaccharide synthesis
These proteins are encoded by putative wav gene clusters, which are responsible for the synthesis of the core oligosaccharide (OS) region of Vibrio cholerae lipopolysaccharide . . +PF07508 Recombinase
This domain is usually found associated with Pfam:PF00239 in putative integrases/recombinases of mobile genetic elements of diverse bacteria and phages.. +PF07509 Protein of unknown function (DUF1523)
+PF07510 Protein of unknown function (DUF1524)
COGs (COG3472) & PSI2 target BIG_246. This family of uncharacterised proteins contain a conserved HXXP motif. A similar motif is seen in protein families in the His-Me finger endonuclease superfamily which suggests this family of proteins may also act as endonucleases.. +PF07511 Protein of unknown function (DUF1525)
+PF07514 Putative helicase
Some members of this family have been annotated as helicases.. +PF07515 Protein of unknown function (DUF1528)
+PF07516 SecA Wing and Scaffold domain
SecA protein binds to the plasma membrane where it interacts with proOmpA to support translocation of proOmpA through the membrane. SecA protein achieves this translocation, in association with SecY protein, in an ATP dependent manner. This family is composed of two C-terminal alpha helical subdomains: the wing and scaffold subdomains .. +PF07517 SecA DEAD-like domain
SecA protein binds to the plasma membrane where it interacts with proOmpA to support translocation of proOmpA through the membrane. SecA protein achieves this translocation, in association with SecY protein, in an ATP dependent manner [1,2]. This domain represents the N-terminal ATP-dependent helicase domain, which is related to the Pfam:PF00270 .. +PF07519 Tannase and feruloyl esterase
This family includes fungal tannase and feruloyl esterase [2-3]. It also includes several bacterial homologues of unknown function.. +PF07520 Virulence factor SrfB
This family includes homologues of SsrAB is a two-component regulatory system encoded within the Salmonella pathogenicity island SPI-2. Among the products of genes activated by SsrAB within epithelial and macrophage cells is Swiss:Q9KIJ9 . Homologues are found in several other proteobacteria.. +PF07521 RNA-metabolising metallo-beta-lactamase
Pfam-B_760 (release 11.0). The metallo-beta-lactamase fold contains five sequence motifs. The first four motifs are found in Pfam:PF00753 and are common to all metallo-beta-lactamases. The fifth motif appears to be specific to function.\. This entry represents the fifth motif from metallo-beta-lactamases involved in RNA metabolism .. +PF07522 DNA repair metallo-beta-lactamase
The metallo-beta-lactamase fold contains five sequence motifs. The first four motifs are found in Pfam:PF00753 and are common to all metallo-beta-lactamases. The fifth motif appears to be specific to function.\. This entry represents the fifth motif from metallo-beta-lactamases involved in DNA repair .. +PF07523 Bacterial Ig-like domain (group 3)
This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins.. +PF07524 Bromodomain associated
This domain is predicted to bind DNA and is often found associated with Pfam:PF00439 and in transcription factors. It has a histone-like fold.. +PF07525 Clip; SOCS_Clip;
The SOCS box acts as a bridge between specific substrate- binding domains and more generic proteins that comprise a large family of E3 ubiquitin protein ligases.. +PF07526 Associated with HOX
The function of this domain is unknown . It is often found in plant proteins associated with Pfam:PF00046.. +PF07527 Hairy Orange
The Orange domain is found in the Drosophila proteins Hesr-1, Hairy, and Enhancer of Split [1,2]. The Orange domain is proposed to mediate specific protein-protein interaction between Hairy and Scute .. +PF07528 DZF domain
The function of this domain is unknown . It is often found associated with Pfam:PF00098 or Pfam:PF00035. This domain has been predicted to belong to the nucleotidyltransferase superfamily .. +PF07529 HSA
This domain is predicted to bind DNA and is often found associated with helicases.. +PF07530 Associated with zinc fingers
This function of this domain is unknown and is often found associated with Pfam:PF00096.. +PF07531 NHR1 homology to TAF
This corresponds to the region NHR1 that is conserved between the product of the nervy gene in Drosophila and the human mtg8b protein , which is hypothesised to be a transcription factor. . +PF07532 Bacterial Ig-like domain (group 4)
This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins.. +PF07533 TCH;
The function of this domain is unknown . It is often found associated with helicases and transcription factors.. +PF07534 TLD
This domain is predicted to be an enzyme and is often found associated with Pfam:PF01476. It's structure consists of a beta-sandwich surrounded by two helices and two one-turn helices .. +PF07535 DBF zinc finger
This domain is predicted to bind metal ions and is often found associated with Pfam:PF00533 and Pfam:PF02178. It was first identified in the Drosophila chiffon gene product , and is associated with initiation of DNA replication.. +PF07536 HWE histidine kinase
Two-component systems, consisting of a histidine kinase and a cognate response regulator protein, represent the best-known apparatus for transducing external cues into a physiological response in bacteria. The HWE domain is found in a subset of two-component system kinases, belonging to the same superfamily as Pfam:PF00512 . The family was defined by the presence of a highly conserved H residue in the kinase domain and a WxE motif in a C-terminal ATPase domain that is related to Pfam:PF02518. These proteins are found in a variety of alpha- and gamma-proteobacteria, with significant enrichment in the rhizobia.. +PF07537 CamS sex pheromone cAM373 precursor
Pfam-B_18913 (release 11.0). This family includes CamS (Swiss:Q8L313), from which Staphylococcus aureus sex pheromone staph-cAM373 is processed.. +PF07538 Clostridial hydrophobic W
A novel extracellular macromolecular system has been proposed based on the proteins containing ChW repeats . ChW stands for Clostridial hydrophobic with conserved W (tryptophan). This repeat was originally described in Clostridium acetobutylicum but is also found in other Gram-positive bacteria including Enterococcus faecalis, Streptococcus agalactiae and Streptomyces coelicolor.. +PF07539 Down-regulated in metastasis
Pfam-B_10642 (release 11.0). These eukaryotic proteins include DRIM (Down-Regulated In Metastasis) (Swiss:O75691), which is differentially expressed in metastatic and non-metastatic human breast carcinoma cells . It is believed to be involved in processing of non-coding RNA .. +PF07540 Nucleolar complex-associated protein
Pfam-B_8562 (release 11.0). Nucleolar complex-associated protein (Noc3p, Swiss:Q07896) is conserved in eukaryotes and has essential roles in replication and rRNA processing in Saccharomyces cerevisiae .. +PF07541 Eukaryotic translation initiation factor 2 alpha subunit
Pfam-B_5125 (release 11.0). These proteins share a region of similarity that falls towards the C terminus from Pfam:PF00575.. +PF07542 ATP12 chaperone protein
Pfam-B_6737 (release 11.0). Mitochondrial F1-ATPase is an oligomeric enzyme composed of five distinct subunit polypeptides. The alpha and beta subunits make up the bulk of protein mass of F1. In Saccharomyces cerevisiae both subunits are synthesised as precursors with amino-terminal targeting signals that are removed upon translocation of the proteins to the matrix compartment . These proteins include examples from eukaryotes and bacteria and may have chaperone activity, being involved in F1 ATPase complex assembly.. +PF07543 DUF1531;
Protein trafficking PGA2. Pfam-B_46790 (release 11.0). A Saccharomyces cerevisiae member of this family (PGA2) is an ER protein which has been implicated in protein trafficking .. +PF07544 CSE2;
RNA polymerase II transcription mediator complex subunit 9. Pfam-B_45625 (release 11.0). This family of Med9 proteins is conserved in yeasts. It forms part of the middle region of Mediator . Med9 has two functional domains. The species-specific amino-terminal half (aa 1-63) plays a regulatory role in transcriptional regulation, whereas this well-conserved carboxy-terminal half (aa 64-149) has a more fundamental function involved in direct binding to the amino-terminal portions of Med4 and Med7 and the assembly of Med9 into the Middle module. Also, some unidentified factor(s) in med9 extracts may impact the binding of TFIID to the promoter .. +PF07545 Vestigial/Tondu family
The mammalian TEF and the Drosophila scalloped genes belong to a conserved family of transcriptional factors that possesses a TEA/ATTS DNA-binding domain. Transcriptional activation by these proteins likely requires interactions with specific coactivators. In Drosophila, Scalloped (Sd) interacts with Vestigial (Vg) to form a complex, which binds DNA through the Sd TEA/ATTS domain. The Sd-Vg heterodimer is a key regulator of wing development, which directly controls several target genes and is able to induce wing outgrowth when ectopically expressed. This short conserved region is needed for interaction with Sd .. +PF07546 EMI domain
The Pfam alignment is truncated at the C-terminus and does not include the final cysteine defined in Callebaut et al . This is to stop the family overlapping with other domains.. +PF07547 RSD-2 N-terminal domain
This domain is found in three copies in the N-terminus of the C. elegans RSD-2 protein. RSD-2 (RNAi spreading defective) is involved in systemic RNAi . Mutations in the rsd-2 gene do not effect somatic genes but only germline expressed genes .. +PF07548 Chlamydia polymorphic membrane protein middle domain
This family contains several Chlamydia polymorphic membrane proteins. Chlamydia pneumoniae is an obligate intracellular bacterium and a common human pathogen causing infection of the upper and lower respiratory tract. This domain is found between the beta-helical repeats (Pfam:PF02415) and the C-terminal Pfam:PF03797. This domain is excised subsequent to secretion .. +PF07549 SecD/SecF GG Motif
This family consists of various prokaryotic SecD and SecF protein export membrane proteins.\. This SecD and SecF proteins are part of the multimeric protein export complex comprising SecA, D, E, F, G, Y, and YajC . SecD and SecF are required to maintain a proton motive force . This alignment encompasses a -GG- motif typically found in N-terminal half of the SecD/SecF proteins .. +PF07550 Protein of unknown function (DUF1533)
This family consists of several hypothetical bacterial proteins and is around 60 residues in length. It's function is not known.. +PF07551 Protein of unknown function (DUF1534)
This family is found in a group of small bacterial proteins. Its function is not known.. +PF07552 Spore Coat Protein X and V domain
This family is found in the Bacilliales coat protein X as a tandem repeat and also in coat protein V. The proteins are found in the insoluble fraction .. +PF07553 DUF1535;
Host cell surface-exposed lipoprotein. This is a family of lipoproteins that is involved in superinfection exclusion. Proteins in this family have been shown to act at the stage of DNA release from the phage head into the cell .. +PF07554 Uncharacterised Sugar-binding Domain
This domain is found in a wide variety of contexts, but mostly occurring in cell wall associated proteins. A lack of conserved catalytic residues suggests that it is a binding domain. From context, possible substrates are hyaluronate or fibronectin (personal obs: C Yeats). This is further evidenced by . Possibly the exact substrate is N-acetyl glucosamine. Finding it in the same protein as Pfam:PF05089 further supports this proposal. It is found in the C-terminal part of Swiss:O82833, which is removed during maturation ( ). Some of the proteins it is found in (e.g. Swiss:Q9RL69) are involved in methicillin resistance ( ). The name FIVAR derives from Found In Various Architectures. . +PF07555 Hyaluronidase_2;
beta-N-acetylglucosaminidase . Pfam-B_4394 (release 12.0). This family has previously been described as a hyaluronidase [1,2]. However, more recently it has been shown that this family has beta-N-acetylglucosaminidase activity .. +PF07556 Protein of unknown function (DUF1538)
This family contains several conserved glycines and phenylalanines.. +PF07557 Shugoshin C terminus
Shugoshin-like proteins contain this conserved sequence at the C terminus, which is rich in basic amino-acids. Shugoshin (Sgo1) protects Rec8 at centromeres during anaphase I (during meiosis) so that sister chromatids remain tethered . Sgo2 is a paralogue of Sgo1 and is involved in correctly orienting sister-centromeres .. +PF07558 Shugoshin N-terminal coiled-coil region
The Shugoshin protein is found to have this conserved N-terminal coiled-coil region and a highly conserved C-terminal basic region, family Shugoshin_C Pfam:PF07557. Shugoshin is a crucial target of Bub1 kinase function at kinetochores, necessary for both meiotic and mitotic localisation of shugoshin to the kinetochore . Human shugoshin is diffusible and mediates kinetochore-driven formation of kinetochore-microtubules during bipolar spindle assembly . Further, the primary role of shugoshin is to ensure bipolar attachment of kinetochores, and its role in protecting cohesion has co-developed to facilitate this process .. +PF07559 Flagellar basal body protein FlaE
This family consists of several bacterial FlaE flagellar proteins. These proteins are part of the flageller basal body rod complex.. +PF07560 Domain of Unknown Function (DUF1539)
+PF07561 Domain of Unknown Function (DUF1540)
This family has four conserved cysteines, which is suggestive of a metal binding function.. +PF07562 ANF_assoc;
Nine Cysteines Domain of family 3 GPCR. This conserved sequence contains several highly-conserved Cys residues that are predicted to form disulphide bridges. It is predicted to lie outside the cell membrane, tethered to the Pfam:PF00003 in several receptor proteins.. +PF07563 Protein of unknown function (DUF1541)
This family consists of several hypothetical bacterial and occurs as a tandem repeat.. +PF07564 Domain of Unknown Function (DUF1542)
This domain is found in several cell surface proteins. Some are involved in antibiotic resistance (e.g Swiss:Q9RL69 and Swiss:Q9LCJ9) and/or cellular adhesion (e.g. Swiss:Q931R6) .. +PF07565 Band 3 cytoplasmic domain
Pfam-B_1004 (release 3.0). This family contains the cytoplasmic domain of the Band 3 anion exchange proteins that exchange Cl-/HCO3-. Band 3 constitutes the most abundant polypeptide in the red blood cell membrane, comprising 25% of the total membrane protein. The cytoplasmic domain of band 3 functions primarily as an anchoring site for other membrane-associated proteins. Included among the protein ligands of cdb3 are ankyrin, protein 4.2, protein 4.1, glyceraldehyde-3-phosphate dehydrogenase (GAPDH), phosphofructokinase, aldolase, hemoglobin, hemichromes, and the protein tyrosine kinase (p72syk). . +PF07566 Domain of Unknown Function (DUF1543)
This domain is found as 1-2 copies in a small family of proteins of unknown function.. +PF07568 Histidine kinase
This is the dimerisation and phosphoacceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536. It is usually found adjacent to a C-terminal ATPase domain (Pfam:PF02518). This domain is found in a wide range of Bacteria and also several Archaea.. +PF07569 TUP1-like enhancer of split
Pfam-B_7106 (release 12.0). The Hira proteins are found in a range of eukaryotes and are implicated in the assembly of repressive chromatin. These proteins also contain Pfam:PF00400.. +PF07571 Protein of unknown function (DUF1546)
Pfam-B_3691 (release 12.0). Associated with Pfam:PF02969 in Transcription initiation factor TFIID subunit 6 (TAF6).. +PF07572 Bucentaur or craniofacial development
Pfam-B_10149 (release 12.0). Bucentaur or craniofacial development protein 1 (BCNT) in ruminents has a different domain architecture to that in mouse and human. For this reason it has been used as a model for molecular evolution [1-3]. Both bovine and human BCNTs are phosphorylated by casein kinase II in vitro .. +PF07573 Nitrogen regulatory protein AreA N terminus
Pfam-B_11486 (release 11.0). The AreA nitrogen regulatory protein proteins (which are GATA type transcription factors) share a highly conserved N terminus and Pfam:PF00320 at the C terminus.. +PF07574 Nse1 non-SMC component of SMC5-6 complex
Pfam-B_24547 (release 11.0). S. cerevisiae Nse1 (Swiss:Q07913) forms part of a complex with SMC5-SMC6 This non-structural maintenance of chromosomes (SMC) complex plays an essential role in genomic stability, being involved in DNA repair and DNA metabolism [1,2]. It is conserved in eukaryotes from yeast to human.. +PF07575 Nuceloporin_Nup85; Nucelopor_Nup85;
Pfam-B_55990 (release 11.0). A family of nucleoporins conserved from yeast to human. THe nuclear pore complex is a large assembly composed of two essential complexes: the heptameric Nup84 complex and the heteromeric Nic96-containing complex. The Nup84 complex is composed of one copy each of Nup84, Nup85, Nup120, Nup133, Nup145C, Sec13, and Seh1. The structure of a complex of Nup85 and Seh1 was solved . The N-terminus of Nup85 is inserted and forms a three-stranded blade that completes the Seh1 6-bladed beta-propeller in trans. Following its N-terminal insertion blade, Nup85 forms a compact cuboid structure composed of 20 helices, with two distinct modules, referred to as crown and trunk .. +PF07576 BRCA1-associated protein 2
Pfam-B_5419 (release 11.0). These proteins include BRCA1-associated protein 2 (BRAP2), which binds nuclear localisation signals (NLSs) in vitro and in yeast two-hybrid screening . These proteins share a region of sequence similarity at their N terminus. They also have Pfam:PF02148 at the C terminus.. +PF07577 Domain of Unknown Function (DUF1547)
This family appears to be found only in a small family of Chlamydia species.. +PF07578 Lipid A Biosynthesis N-terminal domain
This family is found at the N-terminus of a group of Chlamydial Lipid A biosynthesis proteins. It is also found by itself in a family of proteins of unknown function.. +PF07579 Domain of Unknown Function (DUF1548)
This family appears to be found only in a small family of Chlamydia proteins.. +PF07580 M26 IgA1-specific Metallo-endopeptidase C-terminal region
These peptidases, which cleave mammalian IgA, are found in Gram-positive bacteria. Often found associated with Pfam:PF00746, they may be attached to the cell wall.. +PF07581 The GLUG motif
This family is found in the IgA1 (M26) peptidases, which attached to the cell wall peptidoglycan by an amide bond ( ). IgA1 protease selectively cleaves human IgA1 and is likely to be a pathogenicity factor in some pathogens ( ). This family is also found in various other contexts, including with Pfam:PF05860. It is named GLUG after the mostly conserved G-L-any-G motif.. +PF07582 AP endonuclease family 2 C terminus
This highly-conserved sequence is found at the C terminus of several apurinic/apyrimidinic (AP) endonucleases. in a range of Gram-positive and Gram-negative bacteria. See also Pfam:PF01261.. +PF07583 DUF1549; PSC2;
Protein of unknown function (DUF1549). Blast single linkage clustering. A family of paralogues in the planctomyces.. +PF07584 DUF1550;
Aerotolerance regulator N-terminal. Blast clustering of Pirellula proteome. These proteins share a highly-conserved sequence at their N-terminus. They include several proteins from Rhodopirellula baltica and also several from proteobacteria. The proteins are produced by the Batl operon which appears to be important in pathogenicity and aerotolerance. This family is the conserved N-terminus, but the full length proteins carry multiple membrane-spanning domains . BatA ensures bacterial survival in the early stages of the infection process, when the infected sites are aerobic, and is produced under conditions of oxidative stress .. +PF07585 Protein of unknown function (DUF1551)
Blast clustering of Pirellula proteome. A family of proteins identified in Rhodopirellula baltica.. +PF07586 DUF1552;
Protein of unknown function (DUF1552). Blast clustering of Pirellula proteome. A family of proteins identified in Rhodopirellula baltica.. +PF07587 DUF1553;
Protein of unknown function (DUF1553). Blast clustering of Pirellula genome. A family of proteins found in Rhodopirellula baltica.. +PF07588 Protein of unknown function (DUF1554)
Blast clustering of Leptospira proteome. A family of proteins identified in Leptospira interrogans. . +PF07589 DUF1555;
Blast clustering of Pirellula genome. This motif has been identified in a wide range of bacteria at their C-terminus. It has been suggested that this is a protein sorting signal. Based on phylogenetic profiling it has been suggested that the EpsH family of proteins mediate this function .. +PF07590 Protein of unknown function (DUF1556)
Blast clustering of Pirellula proteome. +PF07591 DUF1557;
Pretoxin HINT domain. Blast clustering of Leptospira proteome.. A member of the HINT superfamily of proteases that is usually found N-terminal to the toxin module in polymorphic toxin systems. The domain is predicted to function in releasing the toxin domain by autoproteolysis .. +PF07592 Transposase_36;
Rhodopirellula transposase DDE domain. Blast clustering of Pirellula proteome. These transposases are found in the planctomycete Rhodopirellula baltica, the cyanobacterium Nostoc, and the Gram-positive bacterium Streptomyces.. +PF07593 ASPIC and UnbV
Blast clustering of Pirellula proteome. This conserved sequence is found associated with Pfam:PF00515 in several paralogous proteins in Rhodopirellula baltica. It is also found associated with Pfam:PF01839 in several eukaryotic integrin-like proteins (e.g. human ASPIC Swiss:Q9NQ78) and in several other bacterial proteins (e.g. Swiss:Q84HN1 ).. +PF07595 Planctomycete extracellular
Blast clustering of Pirellula proteome. This motif is conserved as the N terminus of several Rhodopirellula baltica proteins predicted to be extracellular.. +PF07596 DUF1559;
Protein of unknown function (DUF1559). Blast clustering of Pirellula proteome. A large family of paralogous proteins apparently unique to planctomycetes.. +PF07597 Protein of unknown function (DUF1560)
Blast clustering of Pirellula genome. Small family of short hypothetical proteins in Rhodopirellula baltica.. +PF07598 Protein of unknown function (DUF1561)
Blast clustering of Leptospira proteome. A family of paralogous proteins in Leptospira interrogans.. +PF07599 Protein of unknown function (DUF1563)
Blast clustering of Leptospira proteome. A small family of short hypothetical proteins in Leptospira interrogans.. +PF07600 Protein of unknown function (DUF1564)
Blast clustering of Leptospira proteome. A family of paralogous proteins in Leptospira interrogans. Several (e.g. Swiss:Q8F4V2) have been annotated as possible CopG-like transcriptional regulators (see Pfam:PF01402).. +PF07602 Protein of unknown function (DUF1565)
Blast clustering of Leptospira proteome. These proteins share a region of homology in their N termini, and are found in several phylogenetically diverse bacteria and in the archaeon Methanosarcina acetivorans. Some of these proteins also contain characterised domains such as Pfam:PF00395 (e.g. Swiss:Q8YWJ6) and Pfam:PF03422 (e.g. Swiss:Q9FBS2).. +PF07603 Protein of unknown function (DUF1566)
Blast clustering of the Leptospira proteome. These proteins of unknown function are found in Leptospira interrogans and in several gamma proteobacteria.. +PF07606 Protein of unknown function (DUF1569)
Blast clustering of Pirellula proteome. A family of hypothetical proteins identified in Rhodopirellula baltica.. +PF07607 Protein of unknown function (DUF1570)
Blast clustering of Pirellula proteome. A family of hypothetical proteins in Rhodopirellula baltica. This family carries a highly conserved HExxH sequence motif characteristic of members of the Peptidase clan MA.. +PF07608 Protein of unknown function (DUF1571)
Blast clustering of Pirellula proteome. A family of paralogous proteins in Rhodopirellula baltica.. +PF07609 Protein of unknown function (DUF1572)
Blast search with Q7UW06. These proteins, from several diverse bacteria, share a short conserved sequence towards their N termini.. +PF07610 Protein of unknown function (DUF1573)
Blast clustering of Pirellula proteome. These hypothetical proteins, from bacteria such as Rhodopirellula baltica, Bacteroides thetaiotaomicron, and Porphyromonas gingivalis, share a region of conserved sequence towards their N-termini.. +PF07611 Protein of unknown function (DUF1574)
A family of hypothetical proteins in Leptospira interrogans.. +PF07613 Protein of unknown function (DUF1576)
This small family is found in several undescribed proteins. The alignment is distinguished by the frequent occurrence of conserved glycine and aromatic residues.. +PF07614 Protein of unknown function (DUF1577)
Blast clustering of Leptospira proteome. A family of hypothetical proteins in Leptospira interrogans.. +PF07615 YKOF-related Family
+PF07617 Protein of unknown function (DUF1579)
Blast clustering of Pirellula proteome. A family of paralogous hypothetical proteins identified in Rhodopirellula baltica that also has members in Gloeobacter violaceus, Sinorhizobium meliloti and Agrobacterium tumefaciens.. +PF07618 Protein of unknown function (DUF1580)
Blast clustering of Pirelllula proteome. A family of short hypothetical proteins found in Rhodopirellula baltica.. +PF07619 Protein of unknown function (DUF1581)
Blast clustering of Pirellula proteome. Several Rhodopirellula baltica proteins share this probable domain. Most of these proteins are predicted to be secreted or membrane-associated.. +PF07620 SLEI
Blast clustering of Leptospira proteome. This highly conserved sequence motif is found at the C terminus of several short hypothetical proteins in Leptospira spp and related organisms.. +PF07621 Protein of unknown function (DUF1582)
Blast clustering of Pirellula proteome. A family of hypothetical proteins in Rhodopirellula baltica.. +PF07622 Protein of unknown function (DUF1583)
Blast clustering of Pirellula proteome. Most of these Rhodopirellula baltica hypothetical proteins also match Pfam:PF07619.. +PF07623 DUF1584;
Protein of unknown function (DUF1584). Blast clustering of Pirellula proteome. This sequence motif is highly conserved in several short hypothetical proteins in Rhodopirellula baltica. It also is associated with Pfam:PF07621 in Swiss:Q7UJJ9.. +PF07624 DUF1585;
Protein of unknown function (DUF1585). Blast clustering of Pirellula proteome . A conserved sequence region at the C terminus of several cytochrome-like proteins in Rhodopirellula baltica.. +PF07625 Protein of unknown function (DUF1586)
Blast clustering of Pirellula proteome. A family of short hypothetical proteins in Rhodopirellula baltica.. +PF07626 DUF1587;
Protein of unknown function (DUF1587). Blast clustering of Pirellula proteome. A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07624.. +PF07627 DUF1588; PSC1;
Protein of unknown function (DUF1588). Blast clustering of Pirellula proteome. A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07626 and Pfam:PF07624.. +PF07628 Protein of unknown function (DUF1589)
Blast clustering of Pirellula proteome. A family of short hypothetical proteins in Rhodopirellula baltica.. +PF07629 Protein of unknown function (DUF1590)
Blast clustering of Pirellula proteome. These hypothetical proteins in Rhodopirellula baltica have a conserved C terminal region.. +PF07631 DUF1592;
Protein of unknown function (DUF1592). Blast clustering of Pirellula proteome. A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07627, Pfam:PF07626, and Pfam:PF07624.. +PF07632 Protein of unknown function (DUF1593)
Blast clustering of Pirellula proteome. A family of proteins in Rhodopirellula baltica that are predicted to be secreted. Also, a member has been identified in Caulobacter crescentus (Swiss:Q9AAT9). These proteins mat be related to Pfam:PF01156.. +PF07634 RtxA repeat
This short repeat is found in the RtxA toxin family .. +PF07635 Cytochrom_C_p; PSC3;
Planctomycete cytochrome C. Blast clustering of Pirellula proteome. These proteins share a region of homology at their N-terminus that contains the C-{CPWHF}-{CPWR}-C-H-{CFYW} motif typical of cytochromes C, or CxxCH.. +PF07636 PSRT
Blast clustering of Pirellula proteome. This motif is found at the N terminus of several short hypothetical proteins in Rhodopirellula baltica and the predicted Arylsulfatase B (EC:3.1.6.12) Swiss:Q7UX97.. +PF07637 DUF1595;
Protein of unknown function (DUF1595). Blast clustering of Pirellula proteome . A family of proteins in Rhodopirellula baltica, associated with Pfam:PF07635, Pfam:PF07626, Pfam:PF07631, Pfam:PF07627, and Pfam:PF07624.. +PF07638 ECF sigma factor
Blast clustering of Pirellula proteome. These proteins are probably RNA polymerase sigma factors belonging to the extra-cytoplasmic function (ECF) subfamily and show sequence similarity to Pfam:PF04542 and Pfam:PF04545.. +PF07639 YTV
Blast clustering of Pirellula proteome. These hypothetical proteins in Rhodopirellula baltica contain several repeats of a sequence whose core is the residues YTV.. +PF07640 QPP
Blast clustering of Pirellula proteome. These Rhodopirellula baltica proteins share a highly conserved sequence, centred around an invariant QPP motif, at their N termini. This motif may represent an export signal.. +PF07642 Outer membrane protein family (DUF1597)
Blast clustering of Pirellula proteome. This family of proteins are likely to be outer membrane beta barrel proteins. Possibly acting as porins.. +PF07643 Protein of unknown function (DUF1598)
Blast clustering of Pirellula proteome. A family of Rhodopirellula baltica hypothetical proteins of about 500 amino acids in length.. +PF07644 Planctomycete PGAMP
Blast clustering of Pirellula proteome. This conserved sequence is centred around an invariant motif of PGAMP in several short hypothetical proteins from the planctomycete Rhodopirellula baltica. The motif also occurs twice in Swiss Q7UVK9.. +PF07645 Calcium-binding EGF domain
Pfam-B_330 (release 10.0) . +PF07646 Kelch motif
The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase for which a structure has been solved . The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415.. +PF07647 SAM domain (Sterile alpha motif)
+PF07648 Kazal-type serine protease inhibitor domain
Usually indicative of serine protease inhibitors. However, kazal-like domains are also seen in the extracellular part of agrins, which are not known to be protease inhibitors. Kazal domains often occur in tandem arrays. Small alpha+beta fold containing three disulphides.. +PF07649 C1-like domain
This short domain is rich in cysteines and histidines. The pattern of conservation is similar to that found in Pfam:PF00130.. +PF07650 KH domain
+PF07652 Flavivirus DEAD domain
Pfam-B_199 (release 3.0). +PF07651 ANTH domain
AP180 is an endocytotic accessory proteins that has been implicated in the formation of clathrin-coated pits.\. The domain is involved in phosphatidylinositol 4,5-bisphosphate binding and is a universal adaptor for nucleation of clathrin coats [1,2].. +PF07653 Variant SH3 domain
SH3 (Src homology 3) domains are often indicative of a protein involved in signal transduction related to cytoskeletal organisation. First described in the Src cytoplasmic tyrosine kinase Swiss:P12931. The structure is a partly opened beta barrel.. +PF07654 Immunoglobulin C1-set domain
+PF00008 EGF-like domain
Swissprot_feature_table. There is no clear separation between noise and signal. Pfam:PF00053 is very similar, but has 8 instead of 6 conserved cysteines. Includes some cytokine receptors. The EGF domain misses the N-terminus regions of the Ca2+ binding EGF domains (this is the main reason of discrepancy between swiss-prot domain start/end and Pfam). The family is hard to model due to many similar but different sub-types of EGF domains. Pfam certainly misses a number of EGF domains. . +PF07655 Secretin N-terminal domain
This is a short domain found in bacterial type II/III secretory system proteins. The architecture of these proteins suggest that this family may be functionally analogous to Pfam:PF03958.. +PF07443 HepA-related protein (HARP)
Pfam-B_21761 (release 10.0). This family represents a conserved region approximately 60 residues long within eukaryotic HepA-related protein (HARP). This exhibits single-stranded DNA-dependent ATPase activity, and is ubiquitously expressed in human and mouse tissues . Family members may contain more than one copy of this region.. +PF07657 N terminus of Notch ligand
This entry represents a region of conserved sequence at the N terminus of several Notch ligand proteins.. +PF07659 Domain of Unknown Function (DUF1599)
+PF07660 Secretin and TonB N terminus short domain
This is a short domain found at the N-terminus of the Secretins of the bacterial type II/III secretory system as well as the TonB-dependent receptor proteins. These proteins are involved in TonB-dependent active uptake of selective substrates.. +PF07661 MORN repeat variant
This family represents an apparent variant of the Pfam:PF02493 repeat (personal obs:C Yeats).. +PF07662 Na+ dependent nucleoside transporter C-terminus
This family consists of nucleoside transport proteins. Swiss:Q62773 is a purine-specific Na+-nucleoside cotransporter localised to the bile canalicular membrane . Swiss:Q62674 is a a Na+-dependent nucleoside transporter selective for pyrimidine nucleosides and adenosine it also transports the anti-viral nucleoside analogues AZT and ddC . This alignment covers the C-terminus of this family of transporters.. +PF07663 Sorbitol phosphotransferase enzyme II C-terminus
TIGRFAMs, Griffiths-Jones SR, Yeats C. +PF07664 Ferrous iron transport protein B C terminus
Escherichia coli has an iron(II) transport system (feo) which may make an important contribution to the iron supply of the cell under anaerobic conditions . FeoB has been identified as part of this transport system. FeoB is a large 700-800 amino acid integral membrane protein. The N-terminus has been previously erroneously described as being ATP-binding . Recent work shows that it is similar to eukaryotic G-proteins and that it is a GTPase . . +PF07666 M penetrans paralogue family 26
These proteins include those ascribed to M penetrans paralogue family 26 in .. +PF07667 Protein of unknown function (DUF1600)
BLAST clustering of M. penetrans proteome. These proteins appear to be specific to Mycoplasma species.. +PF07668 M penetrans paralogue family 1
This family of paralogous proteins identified in Mycoplasma penetrans includes homologues of p35 . . +PF07669 Eco57I restriction-modification methylase
Homologues of the Escherichia coli Eco57I restriction-modification methylase are found in several phylogenetically diverse bacteria. The structure of TaqI has been solved .. +PF07670 Nucleoside recognition
This region in the nucleoside transporter proteins are responsible for determining nucleoside specificity in the human CNT1 and CNT2 proteins (e.g Swiss:O00337) . In the FeoB proteins (e.g. Swiss:O25396), which are believed to be Fe2+ transporters, it includes the membrane pore region, so the function of this region is likely to be more general than just nucleoside specificity . This family may represent the pore and gate, with a wide potential range of specificity. Hence its name 'Gate'.. +PF07671 Protein of unknown function (DUF1601)
This repeat is found in a small number of proteins and is apparently limited to Coxiella and related species.. +PF07672 MFS_Mycoplamsa;
Mycoplasma MFS transporter. BLAST clustering of Mycoplasma proteome. These proteins share some similarity with members of the Major Facilitator Superfamily (MFS).. +PF07673 Protein of unknown function (DUF1602)
Clustering of A. pernix proteome. +PF07675 Cleaved Adhesin Domain
This is a family of bacterial protein modules thought to function in various roles including cell adhesion, cell lysis and carbohydrate binding . The beta-sandwich jelly-roll topology of these modules is known as the galactose-binding domain-like superfamily, clan CL0202. A tandem repeat of these modules (either two or three repeats) constitute the haemagglutinin/adhesin (HA) regions of the gingipains, RgpA, Swiss:Q51816 and Kgp, Swiss:P72194 and Swiss:P72197 expressed by Porphyromonas gingivalis (Bacteroides gingivalis) . They form components of the major extracellular virulence complex RgpA-Kgp - a mixture of proteinases and adhesin domains . The adhesin domains in this complex are found in proteinase-cleaved forms when isolated from the cell surface . Haemagglutinin genes of P. gingivalis (hagA1 HAGA1_PORGI - Swiss:P59915 - and hagA2 HAGA2_PORGI - Swiss:Q51845) suggest that such proteins are composed of eight to ten tandem repeats of these adhesin modules . Genomic data predicts that homologous protein modules are also expressed by a number of other bacteria and form part of putative multi-domain proteins, eg. Swiss:Q26BR9 and Swiss:B0VGL6. These domains may be acting in concert with other adhesion modules thought to be part of these multi-domain proteins such as fibronectin type III, Pfam:PF00041, and Meprin, A5, mu (MAM), Pfam:PF00629, domains.. +PF07676 WD40-like Beta Propeller Repeat
Yeats C, Mistry J, Adindla S. This family appears to be related to the Pfam:PF00400 repeat This This repeat corresponds to the RIVW repeat identified in cell surface proteins [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16].. +PF07677 A2M2;
A-macroglobulin receptor. This family includes the receptor domain region of the alpha-2-macroglobulin family.. +PF07678 A2M3;
A-macroglobulin complement component. This family includes the complement components region of the alpha-2-macroglobulin family.. +PF07679 Immunoglobulin I-set domain
+PF07680 TQO small subunit DoxA
Thiosulphate:quinone oxidoreductase (TQO) is one of the early steps in elemental sulphur oxidation. A novel TQO enzyme was purified from the thermo-acidophilic archaeon Acidianus ambivalens and shown to consist of a large subunit (DoxD) and a smaller subunit (DoxA). The DoxD- and DoxA-like two subunits are fused together in a single polypeptide in Swiss:Q8AAF0.. +PF07681 DoxX
These proteins appear to have some sequence similarity with Pfam:PF04173 but their function is unknown .. +PF07682 Sulphur oxygenase reductase
The sulphur oxygenase/reductase (SOR) of the thermo-acidophilic archaeon Acidianus ambivalens is an unusual enzyme consisting of 24 identical subunits arranged in a perfectly symmetrical hollow sphere and containing a mononuclear non-heme iron centre (personal communication: A. Kletzin). At 85 degrees C in vitro, elemental sulphur is oxidised to sulphite, thiosulphate and hydrogen sulphide with no external cofactors needed. The proposed equation is: 4S + O2 + 4 H2O ---> 2 HSO3- + 2 H2S + 2 H+.. +PF07683 Cobalamin synthesis protein cobW C-terminal domain
Pfam-B_1247 (release 5.4). This is a large and diverse family of putative metal chaperones that can be separated into up to 15 subgroups. In addition to known roles in cobalamin biosynthesis and the activation of the Fe-type nitrile hydratase, this family is also known to be involved in the response to zinc limitation. The CobW subgroup involved in cobalamin synthesis represents only a small sub-fraction of the family .. +PF07684 NOD; NOD1;
NOTCH signalling plays a fundamental role during a great number of developmental processes in multicellular animals [1-2]. NOD and NODP represent a region present in many NOTCH proteins and NOTCH homologs in multiple species such as NOTCH2 and NOTCH3, LIN12, SC1 and TAN1. The role of the NOD and NODP domains remains to be elucidated.. +PF07685 CobB/CobQ-like glutamine amidotransferase domain
+PF07686 Immunoglobulin V-set domain
This domain is found in antibodies as well as neural protein P0 and CTL4 amongst others.. +PF07688 KaiA domain
The cyanobacterial clock proteins KaiA and KaiB are proposed as regulators of the circadian rhythm in cyanobacteria. The overall fold of the KaiA monomer is that of a four-helix bundle, which forms a dimer in the known structure .. +PF07689 KaiB domain
The cyanobacterial clock proteins KaiA and KaiB are proposed as regulators of the circadian rhythm in cyanobacteria. Mutations in both proteins have been reported to alter or abolish circadian rhythmicity. KaiB adopts an alpha-beta meander motif and is found to be a dimer .. +PF07690 Major Facilitator Superfamily
Pfam-B_5 (Release 13.0). +PF07691 PA14 domain
Rigden DJ, Mello LV, Galperin MY. This domain forms an insert in bacterial beta-glucosidases and is found in other glycosidases, glycosyltransferases, proteases, amidases, yeast adhesins, and bacterial toxins, including anthrax protective antigen (PA). The domain also occurs in a Dictyostelium prespore-cell-inducing factor Psi and in fibrocystin, the mammalian protein whose mutation leads to polycystic kidney and hepatic disease. The crystal structure of PA shows that this domain (named PA14 after its location in the PA20 pro-peptide) has a beta-barrel structure. The PA14 domain sequence suggests a binding function, rather than a catalytic role. The PA14 domain distribution is compatible with carbohydrate binding.. +PF07687 Peptidase_M20;
Peptidase dimerisation domain. Pfam-B_253 (release 4.0). This domain consists of 4 beta strands and two alpha helices which make up the dimerisation surface of members of the M20 family of peptidases . This family includes a range of zinc metallopeptidases belonging to several families in the peptidase classification . Family M20 are Glutamate carboxypeptidases. Peptidase family M25 contains X-His dipeptidases.. +PF07692 HCR1;
Low iron-inducible periplasmic protein. Pfam-B_60541 (release 13.0). In Chlamydomonas reinhardtii, the gene encoding Swiss:Q9LD42 is induced by iron deficiency . In green algae, this protein is periplasmic. The two paralogues FEA1 and FEA2 are the major proteins secreted by iron-deficient Chlamydomonas reinhardtii, and both are up-regulated in response to iron deficiency. FEA1 but not FEA2 is up-regulated by high CO2 concentration. Both FEA1 and FEA2 are secreted into the periplasmic space and genetic evidence confirms that their association with the cell is required for growth in low iron .. +PF07693 KAP family P-loop domain
The KAP (after Kidins220/ARMS and PifA) family of predicted NTPases are sporadically distributed across a wide phylogenetic range in bacteria and in animals. Many of the prokaryotic KAP NTPases are encoded in plasmids and tend to undergo disruption to form pseudogenes. A unique feature of all eukaryotic and certain bacterial KAP NTPases is the presence of two or four transmembrane helices inserted into the P-loop NTPase domain. These transmembrane helices anchor KAP NTPases in the membrane such that the P-loop domain is located on the intracellular side .. +PF07694 5TMR of 5TMR-LYT
This entry represents the transmembrane region of the 5TM-LYT (5TM Receptors of the LytS-YhcK type) .. +PF07695 7TM diverse intracellular signalling
This entry represents the transmembrane region of the 7TM-DISM (7TM Receptors with Diverse Intracellular Signalling Modules) .. +PF07696 7TMR-DISM extracellular 2
This entry represents one of two distinct types of extracellular domain found in the 7TM-DISM (7TM Receptors with Diverse Intracellular Signalling Modules) bacterial transmembrane proteins . It is possible that this domain adopts a jelly roll fold and acts as a receptor for carbohydrates and their derivatives .. +PF07697 7TM-HD extracellular
This entry represents the extracellular domain of the 7TM-HD (7TM Receptors with HD hydrolase) .. +PF07698 7TM receptor with intracellular HD hydrolase
These bacterial 7TM receptor proteins have an intracellular Pfam:PF01966. This entry corresponds to the 7 helix transmembrane domain. These proteins also contain an N-terminal extracellular domain.. +PF07699 GCC2 and GCC3
+PF07700 Heme NO binding
The HNOB (Heme NO Binding) domain, is a predominantly alpha-helical domain and binds heme via a covalent linkage to histidine. The HNOB domain is predicted to function as a heme-dependent sensor for gaseous ligands, and transduce diverse downstream signals, in both bacteria and animals.. +PF07701 Heme NO binding associated
The HNOBA domain is found associated with the HNOB domain and Pfam:PF00211 in soluble cyclases and signalling proteins. The HNOB domain is predicted to function as a heme-dependent sensor for gaseous ligands, and transduce diverse downstream signals, in both bacteria and animals.. +PF07702 UTRA domain
The UbiC transcription regulator-associated (UTRA) domain is a conserved ligand-binding domain that has a similar fold to Pfam:PF04345 . It is believed to modulate activity of bacterial transcription factors in response to binding small molecules .. +PF07703 Alpha-2-macroglobulin family N-terminal region
This family includes a region of the alpha-2-macroglobulin family.. +PF07704 Rv0623-like transcription factor
This entry represents the Rv0623-like (Swiss:P96913) family of transcription factors associated with the PSK operon .. +PF07705 DUF1604; APHP;
Cell adhesion related domain found in bacteria.. +PF07706 Aminotransferase ubiquitination site
Pfam-B_15367 (release 13.0). This segment contains a probable site of ubiquitination that ensures rapid degradation of tyrosine aminotransferase in rats. The half life of the enzyme in vivo is about 2-4 hours. In addition, unpublished information identifies at least 2 phosphorylation sites including CAPK at Ser29 and, at the other end of the protein, a casein kinase II site at S*QEECDK. This region of TAT is probably primarily related to regulatory events. Most other transaminases are much more stable and are not phosphorylated.. +PF07707 BTB And C-terminal Kelch
+PF07708 Tash protein PEST motif
This motif is found in the Tash AT-hook proteins of Theileria annulata. These proteins are transported to the hosts nucleus and are likely to be involved in pathogenesis . It is also often found in conjunction with Pfam:PF04385. It is suggested that they may be 'part of PEST motifs' (a signal for rapid proteolytic degradation) in , though this is not definite. This motif is also found in other T. annulata proteins, which have no other known domains in (unpublished data: C Yeats).. +PF07709 Seven Residue Repeat
Associated with Pfam:PF02969 in This repeat is found in some Plasmodium and Theileria proteins.. +PF07710 P53 tetramerisation motif
Pfam-B_782 (release 3.0). +PF07711 Rab geranylgeranyl transferase alpha-subunit, insert domain
Pfam-B_20675 (release 13.0) . Rab geranylgeranyl transferase (RabGGT) catalyses the addition of two geranylgeranyl groups to the C-terminal cysteine residues of Rab proteins, which is crucial for membrane association and function of these proteins in intracellular vesicular trafficking . This domain is inserted between Pfam:PF01239 repeats. This domain adopts an Ig-like fold and is thought to be involved in protein-protein interactions and might be involved in the recognition and binding of REP .. +PF07712 Stress up-regulated Nod 19
Pfam-B032880 release 13.0. +PF07713 Protein of unknown function (DUF1604)
This family is found at the N-terminus of several eukaryotic RNA processing proteins (e.g Swiss:Q8N3B7).. +PF07714 Protein tyrosine kinase
+PF07715 TonB-dependent Receptor Plug Domain
The Plug domain has been shown to be an independently folding subunit of the TonB-dependent receptors ( ). It acts as the channel gate, blocking the pore until the channel is bound by ligand. At this point it under goes conformational changes opens the channel.. +PF07716 Basic region leucine zipper
PfamB-200; Release 14.0;. +PF07717 DUF1605;
Oligonucleotide/oligosaccharide-binding (OB)-fold. This family is found towards the C-terminus of the DEAD-box helicases (Pfam:PF00270). In these helicases it is apparently always found in association with Pfam:PF04408. There do seem to be a couple of instances where it occurs by itself - e.g. Swiss:Q84VZ2. The structure PDB:3i4u adopts an OB-fold. helicases (Pfam:PF00270). In these helicases it is apparently always found in association with Pfam:PF04408. This C-terminal domain of the yeast helicase contains an oligonucleotide/oligosaccharide-binding (OB)-fold which seems to be placed at the entrance of the putative nucleic acid cavity. It also constitutes the binding site for the G-patch-containing domain of Pfa1p. When found on DEAH/RHA helicases, this domain is central to the regulation of the helicase activity through its binding of both RNA and G-patch domain proteins .. +PF07718 DUF1606;
Coatomer beta C-terminal region. This family is found at the C-terminus of the coatamer beta subunit proteins (Beta-coat proteins). This C-terminal domain probably adapts the function of the N-terminal Pfam:PF01602 domain. . +PF07719 Tetratricopeptide repeat
Context matches from Pfam 14.0. This Pfam entry includes outlying Tetratricopeptide-like repeats (TPR) that are not matched by Pfam:PF00515.. +PF07720 Tetratricopeptide repeat
This Pfam entry includes tetratricopeptide-like repeats found in the LcrH/SycD-like chaperones .. +PF07721 Tetratricopeptide repeat
This Pfam entry includes tetratricopeptide-like repeats not detected by the Pfam:PF00515, Pfam:PF07719 and Pfam:PF07720 models.. +PF07722 Peptidase C26
These peptidases have gamma-glutamyl hydrolase activity; that is they catalyse the cleavage of the gamma-glutamyl bond in poly-gamma-glutamyl substrates. They are structurally related to Pfam:PF00117, but contain extensions in four loops and at the C terminus .. +PF07723 Leucine Rich Repeat
PfamB-152 (release 14.0). This Pfam entry includes some LRRs that fail to be detected with the Pfam:PF00560 model.. +PF07724 AAA domain (Cdc48 subfamily)
PfamB-40 (Release 14.0). This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model.. +PF07726 ATPase family associated with various cellular activities (AAA)
PfamB-40 (Release 14.0). This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model.. +PF07727 Reverse transcriptase (RNA-dependent DNA polymerase)
PfamB-40 (Release 14.0). A reverse transcriptase gene is usually indicative of a mobile element such as a retrotransposon or retrovirus. Reverse transcriptases occur in a variety of mobile elements, including retrotransposons, retroviruses, group II introns, bacterial msDNAs, hepadnaviruses, and caulimoviruses. This Pfam entry includes reverse transcriptases not recognised by the Pfam:PF00078 model.. +PF07728 AAA domain (dynein-related subfamily)
PfamB-136 (Release 14.0). This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model.. +PF07729 FCD domain
Pfam-B_117 (release 14.0). This domain is the C-terminal ligand binding domain of many members of the GntR family. This domain probably binds to a range of effector molecules that regulate the transcription of genes through the action of the N-terminal DNA-binding domain Pfam:PF00392. This domain is found in Swiss:P45427 and Swiss:P31460 that are regulators of sugar biosynthesis operons. It is also in the known structure of FadR where it binds to acyl-coA, the domain is alpha helical . This family has been named as FCD for (FadR C-terminal Domain).. +PF07730 Histidine kinase
This is the dimerisation and phosphoacceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536.. +PF07731 Multicopper oxidase
PfamB-49 (Release 14.0). This entry contains many divergent copper oxidase-like domains that are not recognised by the Pfam:PF00394 model.. +PF07732 Multicopper oxidase
PfamB-43 (Release 14.0). This entry contains many divergent copper oxidase-like domains that are not recognised by the Pfam:PF00394 model.. +PF07733 Bacterial DNA polymerase III alpha subunit
Pfam-B111 (Release 14.0). +PF07734 DUF1607;
Pfam-B128 (Release 14.0). Most of these proteins contain Pfam:PF00646 at the N terminus, suggesting that they are effectors linked with ubiquitination.. +PF07735 F-box associated
Pfam-B128 (Release 14.0). Most of these proteins contain Pfam:PF00646 at the N terminus, suggesting that they are effectors linked with ubiquitination.. +PF07736 Chorismate mutase type I
Chorismate mutase EC:5.4.99.5 catalyses the conversion of chorismate to prephenate in the pathway of tyrosine and phenylalanine biosynthesis. This enzyme is negatively regulated by tyrosine, tryptophan and phenylalanine [2,3].. +PF07737 Anthrax toxin lethal factor, N- and C-terminal domain
Pfam-B_23800 (release 14.0). The C-terminal domain is the catalytically active domain whereas the N-terminal domain is likely to be inactive.. +PF07738 Sad1 / UNC-like C-terminal
Pfam-B_5052 (release 14.0). The C. elegans UNC-84 protein is a nuclear envelope protein that is involved in nuclear anchoring and migration during development. The S. pombe Sad1 protein localises at the spindle pole body. UNC-84 and and Sad1 share a common C-terminal region, that is often termed the SUN (Sad1 and UNC) domain [1-2]. In mammals, the SUN domain is present in two proteins, Sun1 and Sun2 . The SUN domain of Sun2 has been demonstrated to be in the periplasm .. +PF07725 Leucine Rich Repeat
PfamB-184 (release 14.0). This Pfam entry includes some LRRs that fail to be detected by the Pfam:PF00560 model.. +PF07739 TipAS antibiotic-recognition domain
Pfam-B_8785 (release 14.0). This domain is found at the C-terminus of some MerR family transcription factors. The domain has an alpha-helical globin-like fold . The family includes Mta a central regulator of multidrug resistance in Bacillus subtilis.. +PF07740 Ion channel inhibitory toxin
Pfam-B_20319 (release 14.0). This is a family of potent toxins that function as ion-channel inhibitors for several different ions. Omega-Grammotoxin SIA is a VSCC antagonist that inhibits neuronal N- and P-type VSCC responses . Huwentoxin-IV, from the Chinese bird spider, is a highly potent neurotoxin that specifically inhibits the neuronal tetrodotoxin-sensitive voltage-gated sodium channel in rat dorsal root ganglion neurons . Hainantoxin-4, from the venom of spider Selenocosmia hainana, adopts an inhibitor cystine knot structural motif like huwentoin-IV, and is a potent antagonist that acts at site 1 on tetrodotoxin-sensitive (TTX-S) sodium channels . Study of the molecular nature of toxin-receptor interactions has helped elucidate the functioning of many ion-channels .. +PF07741 Brf1-like TBP-binding domain
Pfam-B_18050 (release 14.0). This region covers both the Brf homology II and III regions . This region is involved in binding TATA binding protein .. +PF07742 BTG family
Pfam-B_9208 (release 14.0). +PF07743 HSCB C-terminal oligomerisation domain
Pfam-B_6234 (release 14.0). This domain is the HSCB C-terminal oligomerisation domain and is found on co-chaperone proteins.. +PF07744 SPOC domain
The SPOC (Spen paralogue and orthologue C-terminal) domain is involved in developmental signalling .. +PF07745 Glycosyl hydrolase family 53
Pfam-B_5764 (release 14.0). This domain belongs to family 53 of the glycosyl hydrolase classification . These enzymes are enzymes are endo-1,4- beta-galactanases (EC:3.2.1.89). The structure of this domain is known and has a TIM barrel fold.. +PF07746 Aromatic-ring-opening dioxygenase LigAB, LigA subunit
Pfam-B_18522 (release 14.0). This is a family of aromatic ring opening dioxygenases which catalyse the ring-opening reaction of protocatechuate and related compounds .. +PF07747 MTH865-like family
Pfam-B_25445 (release 14.0). This domain has an EF-hand like fold.. +PF07748 Glycosyl hydrolases family 38 C-terminal domain
Pfam-B_731 (release 3.0). Glycosyl hydrolases are key enzymes of carbohydrate metabolism.. +PF07749 Endoplasmic reticulum protein ERp29, C-terminal domain
Pfam-B_5062 (release 14.0). ERp29 is a ubiquitously expressed endoplasmic reticulum protein found in mammals. ERp29 is comprised of two domains. This domain, the C-terminal domain, has an all helical fold . ERp29 is thought to form part of the thyroglobulin folding complex .. +PF07750 GcrA cell cycle regulator
Pfam-B_23428 (release 14.0). GcrA is a master cell cycle regulator that, together with CtrA (see Pfam:PF00072 and Pfam:PF00486), is involved in controlling cell cycle progression and asymmetric polar morphogenesis . During this process, there are temporal and spatial variations in the concentrations of GcrA and CtrA. The variation in concentration produces time and space dependent transcriptional regulation of modular functions that implement cell-cycle processes . More specifically, GcrA acts as an activator of components of the replisome and the segregation machinery .. +PF07751 Abi-like protein
Pfam-B_8740 (release 14.0). This family, found in various bacterial species, contains sequences that are similar to the Abi group of proteins, which are involved in bacteriophage resistance mediated by abortive infection in Lactococcus species [1,2]. The proteins are thought to have helix-turn-helix motifs, found in many DNA-binding proteins, allowing them to perform their function .. +PF07752 DUF1608;
Fenech M, Eberhardt RY. Pfam-B_2293 (release 14.0). Archaeal S-layer proteins consist of two copies of this domain [1-2].. +PF07753 Protein of unknown function (DUF1609)
Pfam-B_2149 (release 14.0). This region is found in a number of hypothetical proteins thought to be expressed by the eukaryote Encephalitozoon cuniculi, an obligate intracellular microsporidial parasite. It is approximately 200 residues long.. +PF07754 Domain of unknown function (DUF1610)
Pfam-B_8731 (release 14.0). This zinc ribbon domain is found in archaeal species. It is likely to bind zinc via its four well-conserved cysteine residues.. +PF07755 Protein of unknown function (DUF1611)
Pfam-B_8752 (release 14.0). This region is found in a number of hypothetical bacterial and archaeal proteins. The region is approximately 350 residues long. A member of this family (Swiss:Q6M063) is thought to associate with another subunit to form an H+-transporting ATPase, but no evidence has been found to support this.. +PF07756 Protein of unknown function (DUF1612)
Pfam-B_8688 (release 14.0). This family includes sequences of largely unknown function but which share a number of features in common. They are expressed by bacterial species, and in many cases these bacteria are known to associate symbiotically with plants. Moreover, the majority are coded for by plasmids, which in many cases are known to confer on the organism the ability to interact symbiotically with leguminous plants. An example of such a plasmid is NGR234, which encodes Y4CF, a protein of unknown function that is a member of this family . Other members of this family are expressed by organisms with a documented genomic similarity to plant symbionts .. +PF07757 DUF1613;
Predicted AdoMet-dependent methyltransferase. Pfam-B_8934 (release 14.0). Proteins in this family have been predicted to function as AdoMet-dependent methyltransferases .. +PF07758 Protein of unknown function (DUF1614)
Pfam-B_8844 (release 14.0). This is a family of sequences coming from hypothetical proteins found in both bacterial and archaeal species.. +PF07759 Protein of unknown function (DUF1615)
Pfam-B_8943 (release 14.0). This is a family of proteins of unknown function expressed by various bacterial species. Some members of this family (e.g. Swiss:Q8Z8Z7, Swiss:Q8ZRF4) are thought to be lipoproteins. Another member of this family (Swiss:Q93SV8) is thought to be involved in photosynthesis .. +PF07760 Protein of unknown function (DUF1616)
Pfam-B_8886 (release 14.0). This is a family of sequences from hypothetical archaeal proteins. The region in question is approximately 330 amino acid residues long.. +PF07761 Protein of unknown function (DUF1617)
Pfam-B_8981 (release 14.0). This is a family of sequences from hypothetical bacterial and bacteriophage proteins. The region in question is approximately 150 residues long and is highly conserved throughout the family.. +PF07762 Protein of unknown function (DUF1618)
Pfam-B_8857 (release 14.0). The members of this family are mainly hypothetical proteins expressed by Oryza sativa.. +PF07763 FEZ-like protein
Pfam-B_8854 (release 14.0). This is a family of eukaryotic proteins thought to be involved in axonal outgrowth and fasciculation . The N-terminal regions of these sequences are less conserved than the C-terminal regions, and are highly acidic . The C. elegans homolog, UNC-76 (Swiss:Q7JNU9), may play structural and signalling roles in the control of axonal extension and adhesion (particularly in the presence of adjacent neuronal cells ) and these roles have also been postulated for other FEZ family proteins . Certain homologs have been definitively found to interact with the N-terminal variable region (V1) of PKC-zeta, and this interaction causes cytoplasmic translocation of the FEZ family protein in mammalian neuronal cells . The C-terminal region probably participates in the association with the regulatory domain of PKC-zeta . The members of this family are predicted to form coiled-coil structures [2,3], which may interact with members of the RhoA family of signalling proteins , but are not thought to contain other characteristic protein motifs . Certain members of this family are expressed almost exclusively in the brain, whereas others (such as FEZ2, Swiss:Q76LN0) are expressed in other tissues, and are thought to perform similar but unknown functions in these tissues .. +PF07764 Omega Transcriptional Repressor
Pfam-B_63922 (release 14.0). The omega transcriptional repressor regulates expression of involved in copy number control and stable maintenance of plasmids. The omega protein belongs to the structural superfamily of MetJ/Arc repressors featuring a ribbon-helix-helix DNA-binding motif with the beta-ribbon located in and recognising the major groove of operator DNA .. +PF07765 KIP1-like protein
Pfam-B_2332 (release 14.0). This is a family of sequences found exclusively in plants. They are similar to kinase interacting protein 1 (KIP1), which has been found to interact with the kinase domain of PRK1, a receptor-like kinase . This particular region contains two coiled-coils, which are described as motifs involved in protein-protein interactions . It has also been suggested that the protein's coiled- coils allow it to dimerise in vivo .. +PF07766 LETM1-like protein
Fenech M, Wood V, Mistry J. Pfam-B_2202 (release 14.0). Members of this family are inner mitochondrial membrane proteins which play a role in potassium and hydrogen ion exchange . Deletion of LETM1 is thought to be involved in the development of Wolf-Hirschhorn syndrome in humans .. +PF07767 P60;
Nop53 (60S ribosomal biogenesis). Fenech M, Wood V, Mistry J. Pfam-B_8778 (release 14.0). This nucleolar family of proteins are involved in 60S ribosomal biogenesis. They are specifically involved in the processing beyond the 27S stage of 25S rRNA maturation . This family contains sequences that bear similarity to the glioma tumour suppressor candidate region gene 2 protein (p60) . This protein has been found to interact with herpes simplex type 1 regulatory proteins .. +PF07768 PVL ORF-50-like family
Pfam-B_8834 (release 14.0). This is a family of sequences found in both bacteria and bacteriophages. This region is approximately 130 residues long and in some cases is found as part of the PVL (Panton-Valentine leukocidin) group of genes, which encode a member of the leukocidin group of bacterial toxins that kill leukocytes by creation of pores in the cell membrane . PVL appears to be a virulence factor associated with a number of human diseases .. +PF07769 psiF repeat
Pfam-B_8872 (release 14.0). This region is approximately 35 residues long. It is found repeated in a number of putative phosphate starvation- inducible proteins expressed by various bacterial species. psiF (Swiss:Q7AH28) is known to be an example of such phosphate starvation-inducible proteins .. +PF07771 Tick salivary peptide group 1
Pfam-B_2333 (release 14.0). This contains a group of peptides derived from a salivary gland cDNA library of the tick Ixodes scapularis . Also present are peptides from a related tick species, Ixodes ricinus. They are characterised by a putative signal peptide indicative of secretion and conserved cysteine residues.. +PF07773 Protein of unknown function (DUF1619)
Pfam-B_8790 (release 14.0). This is a family of sequences derived from hypothetical eukaryotic proteins. The region in question is approximately 330 residues long and has a cysteine rich amino-terminus.. +PF07774 Protein of unknown function (DUF1620)
Pfam-B_8944 (release 14.0). These sequences are mainly derived from predicted eukaryotic proteins. The region in question lies towards the C-terminus of these large proteins and is approximately 300 amino acid residues long.. +PF07775 PaRep2b protein
Pfam-B_4098 (release 14.0). This is a family of proteins, expressed in the crenarchaeon Pyrobaculum aerophilum, whose members are variable in length and level of conservation. The presence of numerous frameshifts and internal stop codons in multiple alignments are thought to indicate that most family members are no longer functional . . +PF07776 Zinc-finger associated domain (zf-AD)
Pfam-B_14442 (release 14.0). The zf-AD domain, also known as ZAD, forms an atypical treble-cleft-like zinc co-ordinating fold. The zf-AD domain is thought to be involved in mediating dimer formation, but does not bind to DNA .. +PF07777 G-box binding protein MFMR
Pfam-B_5000 (release 14.0). This region is found to the N-terminus of the Pfam:PF00170 transcription factor domain. It is between 150 and 200 amino acids in length. The N-terminal half is rather rich in proline residues and has been termed the PRD (proline rich domain) , whereas the C-terminal half is more polar and has been called the MFMR (multifunctional mosaic region). It has been suggested that this family is composed of three sub-families called A, B and C , classified according to motif composition. It has been suggested that some of these motifs may be involved in mediating protein-protein interactions . The MFMR region contains a nuclear localisation signal in bZIP opaque and GBF-2 . The MFMR also contains a transregulatory activity in TAF-1. The MFMR in CPRF-2 contains cytoplasmic retention signals .. +PF07778 Mis6;
Pfam-B_17323 (release 13.0). Mis6 is an essential centromere connector protein acting during G1-S phase of the cell cycle. Mis6 is thought to be required for recruiting CENP-A, the centromere- specific histone H3 variant, an important event for centromere function and chromosome segregation during mitosis [1,2].. +PF07779 10 TM Acyl Transferase domain found in Cas1p
Cas1p protein of Cryptococcus neoformans is required for the synthesis of O-acetylated glucuronoxylomannans, a consitutent of the capsule, and is critical for its virulence . The multi TM domain of the Cas1p was unified with the 10 TM Sugar Acyltransferase superfamily . This superfamily is comprised of members from the OatA, MdoC, OpgC, NolL and GumG families in addition to the Cas1p family . The Cas1p protein has a N terminal PC-Esterase domain with the opposing Acyl esterase activity .. +PF07780 Spb1 C-terminal domain
Pfam-B_5001 (release 14.0). This presumed domain is found at the C-terminus of a family of FtsJ-like methyltransferases. Members of this family are involved in 60S ribosomal biogenesis, for example Swiss:P25582 .. +PF07781 Reovirus minor core protein Mu-2
Pfam-B_9308 (release 14.0). This family represents the Reovirus core protein Mu-2. Mu-2 is a microtubule associated protein and is thought to play a key role in the formation and structural organisation of reovirus inclusion bodies . . +PF07782 DC-STAMP-like protein
Pfam-B_9122 (release 14.0). This is a family of sequences which are similar to a region of the dendritic cell-specific transmembrane protein (DC-STAMP, Swiss:Q9H295). This is thought to be a novel receptor protein that shares no identity with other multimembrane-spanning proteins . It is thought to have seven putative transmembrane regions , two of which are found in the region featured in this family. DC-STAMP is also described as having potential N-linked glycosylation sites and a potential phosphorylation site for PKC , but these are not conserved throughout the family.. +PF07784 Protein of unknown function (DUF1622)
Pfam-B_9062 (release 14.0). This is a family of 14 highly conserved sequences, from hypothetical proteins expressed by both bacterial and archaeal species.. +PF07785 Protein of unknown function (DUF1623)
Pfam-B_9100 (release 14.0). The members of this family are all derived from relatively short hypothetical proteins thought to be expressed by various Nucleopolyhedroviruses.. +PF07786 Protein of unknown function (DUF1624)
Pfam-B_9180 (release 14.0). These sequences are found in hypothetical proteins of unknown function expressed by bacterial and archaeal species. The region in question is approximately 230 residues long.. +PF07787 Protein of unknown function (DUF1625)
Pfam-B_8907 (release 14.0). Sequences making up this family are derived from hypothetical proteins expressed by both prokaryotic and eukaryotic species. The region in question is approximately 250 residues long.. +PF07788 Protein of unknown function (DUF1626)
Pfam-B_9705 (release 14.0). This is a family consisting of sequences from hypothetical proteins of unknown function expressed by certain species of archaebacteria. One member (Swiss:Q9YCN7) is thought to be similar to tropomyosin .. +PF07789 Protein of unknown function (DUF1627)
Pfam-B_9837 (release 14.0). This is a group of sequences found in hypothetical proteins predicted to be expressed in a number of bacterial species. The region in question is approximately 150 amino acid residues long.. +PF07790 Protein of unknown function (DUF1628)
Pfam-B_5107 (release 14.0). The sequences making up this family are derived from hypothetical proteins of unknown function expressed by various archaeal species. The region in question is approximately 160 residues long.. +PF07791 Protein of unknown function (DUF1629)
Pfam-B_9012 (release 14.0). This family consists of sequences from hypothetical proteins thought to be expressed by two members of the Xanthomonas genus. The region in question is 125 amino acid residues long.. +PF07792 DUF1630;
Docking domain of Afi1 for Arf3 in vesicle trafficking. Pfam-B_9160 (release 14.0). This domain occurs at the N-terminal of Afi1, an Arf3p-interacting protein, is a protein necessary for vesicle trafficking in yeast. This domain is the interacting region of the protein which binds to Arf3, the highly conserved small GTPases (ADP-ribosylation factors). Afi1 is distributed asymmetrically at the plasma membrane and is required for polarized distribution of Arf3 but not of an Arf3 guanine nucleotide-exchange factor, Yel1p. However, Afi1 is not required for targeting of Arf3 or Yel1p to the plasma membrane. Afi1 functions as an Arf3 polarization-specific adapter and participates in development of polarity. Although Arf3 is the homologue of human Arf6 it does not function in the same way, not being necessary for endocytosis or for mating factor receptor internalization. In the S phase, however, it is concentrated at the plasma membrane of the emerging bud. Because of its polarized localisation and its critical function in the normal budding pattern of yeast, Arf3 is probably a regulator of vesicle trafficking, which is important for polarized growth.. +PF07793 Protein of unknown function (DUF1631)
Pfam-B_9170 (release 14.0). The members of this family are sequences derived from a group of hypothetical proteins expressed by certain bacterial species. The region concerned is approximately 440 amino acid residues in length.. +PF07794 Protein of unknown function (DUF1633)
Pfam-B_9750 (release 14.0). This family contains sequences derived from a group of hypothetical proteins expressed by Arabidopsis thaliana. These sequences are highly similar and the region concerned is about 100 residues long.. +PF07795 Protein of unknown function (DUF1635)
Pfam-B_9707 (release 14.0). The members of this family include sequences that are parts of hypothetical proteins expressed by plant species. The region in question is about 170 amino acids long.. +PF07796 Protein of unknown function (DUF1638)
Pfam-B_6091 (release 14.0) & Pfam-B_3149 (release 23.0). This family contains sequences covering an approximately 270 amino acid stretch of a group of hypothetical proteins.\. These proteins are expressed by archaeal species of the Methanosarcina genus.. +PF07797 Protein of unknown function (DUF1639)
Pfam-B_6036 (release 14.0). This approximately 50 residue region is found in a number of sequences derived from hypothetical plant proteins. This region features a highly basic 5 amino-acid stretch towards its centre.. +PF07798 Protein of unknown function (DUF1640)
Pfam-B_6194 (release 14.0). This family consists of sequences derived from hypothetical eukaryotic proteins. A region approximately 100 residues in length is featured. . +PF07799 Protein of unknown function (DUF1643)
Pfam-B_9851 (release 14.0). The members of this family are all sequences found within hypothetical proteins expressed by various bacterial species. The region concerned is approximately 150 residues long.. +PF07800 Protein of unknown function (DUF1644)
Pfam-B_5078 (release 14.0). This family consists of sequences found in a number of hypothetical plant proteins of unknown function. The region of interest contains nine highly conserved cysteine residues and is approximately 160 amino acids in length, and is probably a zinc-binding domain.. +PF07801 Protein of unknown function (DUF1647)
Fenech M, Pollington J. Pfam-B_5249 (release 14.0). The sequences making up this family are all derived from hypothetical proteins expressed by C. elegans. The region in question is approximately 160 amino acids long. The GO annotation for this protein indicates the protein to be involved in nematode larval development and to have a positive regulation on growth rate.. +PF07802 GCK domain
Pfam-B_8992 (release 14.0). This domain is found in proteins carrying other domains known to be involved in intracellular signalling pathways (such as Pfam:PF00071) indicating that it might also be involved in these pathways. It has 4 highly conserved cysteine residues, suggesting that it can bind zinc ions. Moreover, it is found repeated in some members of this family (such as Swiss:Q9LMF3); this may indicate that these domains are able to interact with one another, raising the possibility that this domain mediates heterodimerisation.. +PF07803 GSG1-like protein
Pfam-B_9727 (release 14.0). This family contains sequences bearing similarity to a region of GSG1 (Swiss:Q9Z1H7), a protein specifically expressed in testicular germ cells . It is possible that overexpression of the human homolog may be involved in tumourigenesis of human testicular germ cell tumours . The region in question has four highly-conserved cysteine residues.. +PF07804 HipA-like C-terminal domain
Pfam-B_8632 (release 14.0). The members of this family are similar to a region close to the C-terminus of the HipA protein expressed by various bacterial species (for example Swiss:P23874). This protein is known to be involved in high-frequency persistence to the lethal effects of inhibition of either DNA or peptidoglycan synthesis . When expressed alone, it is toxic to bacterial cells , but it is usually tightly associated with HipB , and the HipA-HipB complex may be involved in autoregulation of the hip operon. The hip proteins may be involved in cell division control and may interact with cell division genes or their products .. +PF07805 HipA-like N-terminal domain
Pfam-B_8632 (release 14.0). The members of this family are similar to a region close to the N-terminus of the HipA protein expressed by various bacterial species (for example Swiss:P23874). This protein is known to be involved in high-frequency persistence to the lethal effects of inhibition of either DNA or peptidoglycan synthesis . When expressed alone, it is toxic to bacterial cells , but it is usually tightly associated with HipB , and the HipA-HipB complex may be involved in autoregulation of the hip operon. The hip proteins may be involved in cell division control and may interact with cell division genes or their products .. +PF07806 Nodule-specific GRP repeat
Pfam-B_8942 (release 14.0). The region featured in this family is found repeated in a number of plant proteins, some of which are expressed specifically in nodules formed during symbiotic interactions with certain bacterial species . Some of these proteins are also termed glycine-rich proteins (GRPs), due to the presence of a glycine-rich C-terminal region in their structures . Bacterial infection is required for the induction of nodule-specific GRP genes, and it is thought that nodule-specific GRPs may play non-redundant roles required at specific stages of nodule development . Members of this group of proteins may be cytosolic, whereas others are thought to be membrane-associated .. +PF07807 RED-like protein C-terminal region
Pfam-B_9789 (release 14.0). This family contains sequences that are similar to the C-terminal region of Red protein (Swiss:Q13123). This and related proteins are thought to be localised to the nucleus, and contain a RED repeat which consists of a number of RE and RD sequence elements . The region in question has several conserved NLS sequences . The function of Red protein is unknown, but efficient sequestration to nuclear bodies suggests that its expression may be tightly regulated or that the protein self-aggregates extremely efficiently .. +PF07808 RED-like protein N-terminal region
Pfam-B_9780 (release 14.0). This family contains sequences that are similar to the N-terminal region of Red protein (Swiss:Q13123). This and related proteins contain a RED repeat which consists of a number of RE and RD sequence elements . The region in question has several conserved NLS sequences and a putative trimeric coiled-coil region , suggesting that these proteins are expressed in the nucleus . The function of Red protein is unknown, but efficient sequestration to nuclear bodies suggests that its expression may be tightly regulated of that the protein self-aggregates extremely efficiently .. +PF07809 RTP801 C-terminal region
Pfam-B_5179 (release 14.0). The members of this family are sequences similar to the C-terminal region of RTP801, the protein product of a hypoxia-inducible factor 1 (HIF-1)- responsive gene . Two members of this family expressed by Drosophila melanogaster, Scylla (Swiss:Q9NHN4) and Charybde (Swiss:Q9NHN5), are designated by the GenBank as Hox targets . RTP801 is thought to be involved in various cellular processes . Its overexpression caused the apoptosis- resistant phenotype in cycling cells, and apoptosis sensitivity in growth arrested cells . Moreover, the protein product of the mouse homolog of RTP801 (dig2 (Swiss:Q9D3F7)) is thought to be induced by diverse apoptotic signals, and also by dexamethasone treatment .. +PF07810 TMC domain
Pfam-B_5063 (release 14.0). These sequences are similar to a region conserved amongst various protein products of the transmembrane channel-like (TMC) gene family, such as Transmembrane channel-like protein 3 (Swiss:Q7TN63) and EVIN2 (Swiss:Q8IU68) - this region is termed the TMC domain . Mutations in these genes are implicated in a number of human conditions, such as deafness and epidermodysplasia verruciformis . TMC proteins are thought to have important cellular roles, and may be modifiers of ion channels or transporters .. +PF07811 TadE-like protein
Pfam-B_9054 (release 14.0). The members of this family are similar to a region of the protein product of the bacterial tadE locus (Swiss:Q9S4A6). In various bacterial species, the tad locus is closely linked to flp-like genes, which encode proteins required for the production of pili involved in adherence to surfaces . It is thought that the tad loci encode proteins that act to assemble or export an Flp pilus in various bacteria . All tad loci but TadA have putative transmembrane regions , and in fact the region in question is this family has a high proportion of hydrophobic amino acid residues.. +PF07812 TfuA-like protein
Pfam-B_9826 (release 14.0). This family consists of a group of sequences that are similar to a region of TfuA protein (Swiss:Q52872). This protein is involved in the production of trifolitoxin (TFX), an gene-encoded, post-translationally modified peptide antibiotic . The role of TfuA in TFX synthesis is unknown, and it may be involved in other cellular processes .. +PF07813 LTXXQ motif family protein
Pfam-B_6101 (release 14.0). This protein family includes two copies of a five residue motif is found in a number of bacterial proteins bearing similarity to the protein CpxP (Swiss:P32158). This is a periplasmic protein that aids in combating extracytoplasmic protein-mediated toxicity, and may also be involved in the response to alkaline pH . Another member of this family, Spy (Swiss:P77754) is also a periplasmic protein that may be involved in the response to stress . The homology between CpxP and Spy may indicate that these two proteins are functionally related .. +PF07814 Wings apart-like protein regulation of heterochromatin
Pfam-B_9039 (release 14.0). This family contains sequences expressed in eukaryotic organisms bearing high similarity to the WAPL conserved region of D. melanogaster wings apart-like protein. This protein is involved in the regulation of heterochromatin structure . hWAPL (Swiss:Q7Z5K2), the human homologue, is found to play a role in the development of cervical carcinogenesis, and is thought to have similar functions to Drosophila wapl protein . Malfunction of the hWAPL pathway is thought to activate an apoptotic pathway that consequently leads to cell death .. +PF07815 Abl-interactor HHR
Pfam-B_9732 (release 14.0). The region featured in this family is found towards the N-terminus of a number of adaptor proteins that interact with Abl-family tyrosine kinases . More specifically, it is termed the homeo-domain homologous region (HHR), as it is similar to the DNA-binding region of homeo-domain proteins . Other homeo-domain proteins have been implicated in specifying positional information during embryonic development, and in the regulation of the expression of cell-type specific genes . The Abl-interactor proteins are thought to coordinate the cytoplasmic and nuclear functions of the Abl-family kinases, and seem to be involved in cytoskeletal reorganisation, but their precise role remains unclear .. +PF07816 Protein of unknown function (DUF1645)
Pfam-B_8798 (release 14.0). These sequences are derived from a number of hypothetical plant proteins. The region in question is approximately 270 amino acids long. Some members of this family are annotated as yeast pheromone receptor proteins AR781 but no literature was found to support this.. +PF07817 GLE1-like protein
Pfam-B_9182 (release 14.0). The members of this family are sequences that are similar to the human protein GLE1 (Swiss:O75458). This protein is localised at the nuclear pore complexes and functions in poly(A)+ RNA export to the cytoplasm [1,2].. +PF07818 HCNGP-like protein
Pfam-B_9462 (release 14.0). This family comprises sequences bearing significant similarity to the mouse transcriptional regulator protein HCNGP (Swiss:Q02614). This protein is localised to the nucleus and is thought to be involved in the regulation of beta-2-microglobulin genes.. +PF07819 PGAP1-like protein
Pfam-B_9244 (release 14.0). The sequences found in this family are similar to PGAP1 (Swiss:Q765A7). This is an endoplasmic reticulum membrane protein with a catalytic serine containing motif that is conserved in a number of lipases. PGAP1 functions as a GPI inositol-deacylase; this deacylation is important for the efficient transport of GPI-anchored proteins from the endoplasmic reticulum to the Golgi body .. +PF07820 TraC-like protein
Pfam-B_9690 (release 14.0). The members of this family are sequences that are similar to TraC (Swiss:Q84HT8). The gene encoding this protein is one of a group of genes found on plasmid p42a of Rhizobium etli CFN42 that are thought to be involved in the process of plasmid self-transmission. Mobilisation of plasmid p42a is of importance as it is required for transfer of plasmid p42a, which is also known as plasmid pSym as it carries most of the genes required for nodulation and nitrogen fixation by the symbiotic bacterium. The predicted protein products of p42a are similar to known transfer proteins of Agrobacterium tumefaciens plasmid pTiC58 .. +PF07821 Alpha-amylase C-terminal beta-sheet domain
Pfam-B_1278 (release 14.0). This domain is organised as a five-stranded anti-parallel beta-sheet [1,2]. It is the probable result of a decay of the common-fold.. +PF07822 Neurotoxin B-IV-like protein
Pfam-B_66513 (release 14.0). The members of this family resemble neurotoxin B-IV (Swiss:P01525), which is a crustacean-selective neurotoxin produced by the marine worm Cerebratulus lacteus. This highly cationic peptide is approximately 55 residues and is arranged to form two antiparallel helices connected by a well-defined loop in a hairpin structure. The branches of the hairpin are linked by four disulphide bonds. Three residues identified as being important for activity, namely Arg-17, -25 and -34, are found on the same face of the molecule, while another residue important for activity, Trp30, is on the opposite side. The protein's mode of action is not entirely understood, but it may act on voltage-gated sodium channels, possibly by binding to an as yet uncharacterised site on these proteins. Its site of interaction may also be less specific, for example it may interact with negatively charged membrane lipids .. +PF07823 Cyclic phosphodiesterase-like protein
Pfam-B_73368 (release 14.0). Cyclic phosphodiesterase (CPDase, Swiss:O04147) is involved in the tRNA splicing pathway. This protein exhibits a bilobal arrangement of two alpha-beta modules. Two antiparallel helices are found on the outer side of each lobe and frame an antiparallel beta-sheet that is wrapped around an accessible cleft. Moreover, the beta-strands of each lobe interact with the other lobe. The central water-filled cavity houses the enzyme's active site .. +PF07824 Type III secretion chaperone domain
Pfam-B_32938 (release 14.0). Type III secretion chaperones are involved in delivering virulence effector proteins from bacterial pathogens directly into eukaryotic cells. The chaperones may prevent aggregation and degradation of their substrates, may target the effector to the secretion apparatus, and may ensure a secretion-component unfolded confirmation of their specific substrate. One member of this family, SigE (Swiss:O30917) forms homodimers in crystal. The monomers have a novel fold with an alpha-beta(3)-alpha-beta(2)-alpha topology .. +PF07825 Excisionase-like protein
Pfam-B_46296 (release 14.0). The phage-encoded excisionase protein (Xis, Swiss:P03699) is involved in excisive recombination by regulating the assembly of the excisive intasome and by inhibiting viral integration. It adopts an unusual 'winged'-helix structure in which two alpha helices are packed against two extended strands. Also present in the structure is a two-stranded anti-parallel beta-sheet, whose strands are connected by a four-residue 'wing'. During interaction with DNA, helix alpha2 is thought to insert into the major groove, while the wing contacts the adjacent minor groove or phosphodiester backbone. The C-terminal region of Xis is involved in interaction with phage-encoded integrase (Int), and a putative C-terminal alpha helix may fold upon interaction with Int and/or DNA .. +PF07826 IMP cyclohydrolase-like protein
Pfam-B_50235 (release 14.0). This enzyme (Swiss:O27099) is may catalyse the cyclization of 5-formylamidoimidazole-4-carboxamide ribonucleotide to inosine monophosphate (IMP), a reaction which is important in de novo purine biosynthesis in archaeal species. This single domain protein is arranged to form an overall fold that consists of a four-layered alpha-beta-beta-alpha core structure. The two antiparallel beta-sheets pack against each other and are covered by alpha-helices on one face of the molecule. The protein is structurally similar to members of the N-terminal nucleophile (NTN) hydrolase superfamily. A deep pocket was in fact found on the surface of IMP cyclohydrolase in a position equivalent to that of active sites of NTN-hydrolases, but an N-terminal nucleophile could not be found. Therefore, it is thought that this enzyme is structurally but not functionally similar to members of the NTN-hydrolase family .. +PF07827 KNTase C-terminal domain
Pfam-B_29524 (release 14.0). Kanamycin nucleotidyltransferase (KNTase) is involved in conferring resistance to aminoglycoside antibiotics and catalyses the transfer of a nucleoside monophosphate group from a nucleotide to kanamycin. This enzyme is dimeric with each subunit being composed of two domains. The C-terminal domain contains five alpha helices, four of which are organised into an up-and-down alpha helical bundle. Residues found in this domain may contribute to this enzyme's active site .. +PF07828 PA-IL-like protein
Pfam-B_99281 (release 14.0). The members of this family are similar to the galactophilic lectin-1 expressed by P. aeruginosa ((PA-IL, Swiss:Q05097). Lectins recognising specific carbohydrates found on the surface of host cells are known to be involved in the initiation of infections by this organism. The protein is thought to be organised into an extensive network of beta-sheets, as is the case with many other lectins .. +PF07829 Alpha-A conotoxin PIVA-like protein
Pfam-B_46690 (release 14.0). Alpha-A conotoxin PIVA (Swiss:P55963) is the major paralytic toxin found in the venom produced by the piscivorous snail Conus purpurascens. This peptide acts by blocking the acetylcholine binding site of the nicotinic acetylcholine receptor at the neuromuscular junction . The overall shape of the peptide is described as an "iron" with a highly charged hydrophilic loop of 15S-19R forming the "handle" domain that is exposed to the exterior of the protein. The stability of the conotoxin is primarily governed by three disulphide bonds. A triangular structural motif formed by residues 19R, 12H and 6Y is thought to constitute a "binding core" that is important in binding to the acetylcholine receptor .. +PF07830 Protein serine/threonine phosphatase 2C, C-terminal domain
Pfam-B_5253 (release 14.0). Protein phosphatase 2C (PP2C) is involved in regulating cellular responses to stress in various eukaryotes. It consists of two domains: an N-terminal catalytic domain and a C-terminal domain characteristic of mammalian PP2Cs. This domain consists of three antiparallel alpha helices, one of which packs against two corresponding alpha-helices of the N-terminal domain. The C-terminal domain does not seem to play a role in catalysis, but it may provide protein substrate specificity due to the cleft that is created between it and the catalytic domain .. +PF07831 Pyrimidine nucleoside phosphorylase C-terminal domain
Pfam-B_1661 (release 14.0). This domain is found at the C-terminal end of the large alpha/beta domain making up various pyrimidine nucleoside phosphorylases [1,2]. It has slightly different conformations in different members of this family. For example, in pyrimidine nucleoside phosphorylase (PYNP, Swiss:P77826) there is an added three-stranded anti-parallel beta sheet as compared to other members of the family, such as E. coli thymidine phosphorylase (TP, Swiss:P07650) . The domain contains an alpha/ beta hammerhead fold and residues in this domain seem to be important in formation of the homodimer .. +PF07832 Cfr10I/Bse634I restriction endonuclease
Pfam-B_46671 (release 14.0). Cfr10I (Swiss:P56200) and Bse634I (Swiss:Q8RT53) are two Type II restriction endonucleases. They exhibit a conserved tetrameric architecture that is of functional importance, wherein two dimers are arranged 'back-to-back' with their putative DNA-binding clefts facing opposite directions. These clefts are formed between two monomers that interact, mainly via hydrophobic interactions supported by a few hydrogen bonds, to form a U-shaped dimer. Each monomer is folded to form a compact alpha-beta structure, whose core is made up of a five-stranded mixed beta-sheet.The monomer may be split into separate N-terminal and C-terminal subdomains at a hinge located in helix alpha3 .. +PF07833 Copper amine oxidase N-terminal domain
Pfam-B_46519 (release 14.0). Copper amine oxidases catalyse the oxidative deamination of primary amines to the corresponding aldehydes, while reducing molecular oxygen to hydrogen peroxide. These enzymes are dimers of identical subunits, each comprising four domains. The N-terminal domain, which is absent in some amine oxidases, consists of a five-stranded antiparallel beta sheet twisted around an alpha helix. The D1 domains from the two subunits comprise the 'stalk' of the mushroom-shaped dimer, and interact with each other but do not pack tightly against each other [1,2].. +PF07834 RanGAP1 C-terminal domain
Pfam-B_23411 (release 14.0). Ran-GTPase activating protein 1 (RanGAP1, Swiss:P46061) is a GTPase activator for the nuclear Ras-related regulatory protein Ran, converting it to the putatively inactive GDP-bound state. Its C-terminal domain is required for RanGAP1 localisation at the vertebrate nuclear pore complex, and is sumoylated by the small ubiquitin-related modifier protein (SUMO-1, Swiss:Q93068). This domain is composed almost entirely of helical substructures that are organised into an alpha-alpha superhelix fold, with the exception of the peptide containing the lysine residue required for SUMO-1 conjugation .. +PF07835 Bacterial aa3 type cytochrome c oxidase subunit IV
Pfam-B_86185 (release 14.0). Bacterial cytochrome c oxidase is found bound to the to the cell membrane, where it is involved in the generation of the transmembrane proton electrochemical gradient. It is composed of four subunits. Subunit IV consists of one transmembrane helix that does not interact directly with the other subunits, but maintains its position by indirect contacts via phospholipid molecules found in the structure. The function of subunit IV is as yet unknown .. +PF07836 DmpG-like communication domain
Pfam-B_1675 (release 14.0). This domain is found towards the C-terminal region of various aldolase enzymes. It consists of five alpha-helices, four of which form an antiparallel helical bundle that plugs the C-terminus of the N-terminal TIM barrel domain . The communication domain is thought to play an important role in the heterodimerisation of the enzyme .. +PF07837 Formiminotransferase domain, N-terminal subdomain
Pfam-B_4434 (release 14.0). The formiminotransferase (FT) domain of formiminotransferase- cyclodeaminase (FTCD) forms a homodimer, and each protomer comprises two subdomains. The N-terminal subdomain is made up of a six-stranded mixed beta-pleated sheet and five alpha helices, which are arranged on the external surface of the beta sheet. This, in turn, faces the beta-sheet of the C-terminal subdomain to form a double beta-sheet layer. The two subdomains are separated by a short linker sequence, which is not thought to be any more flexible than the remainder of the molecule. The substrate is predicted to form a number of contacts with residues found in both the N-terminal and C-terminal subdomains .. +PF07839 Plant calmodulin-binding domain
Pfam-B_9279 (release 14.0). The sequences featured in this family are found repeated in a number of plant calmodulin-binding proteins (such as Swiss:Q8W235, Swiss:Q84ZT8 and Swiss:Q8H6X1), and are thought to constitute the calmodulin-binding domains [1,2]. Binding of the proteins to calmodulin depends on the presence of calcium ions [1,2]. These proteins are thought to be involved in various processes, such as plant defence responses and stolonisation or tuberization .. +PF07840 FadR C-terminal domain
Pfam-B_11411 (release 14.0). This family contains sequences that are similar to the fatty acid metabolism regulator protein (FadR, Swiss:P09371). This functions as a dimer, with each monomer being composed of an N-terminal DNA-binding domain and a regulatory C-terminal domain. A linker comprising two short alpha helices joins the two domains. In the C-terminal domain, an antiparallel array of six alpha helices forms a barrel-like structure, while a seventh alpha helix forms a 'lid' at the end closest to the N-terminal domain. This structure was found to be similar to that of the C-terminal domain of the Tet repressor. Long-chain acyl-CoA thioesters interact directly and reversibly with the C-terminal domain, and this interaction affects the structure and therefore the DNA binding properties of the N-terminal domain .. +PF07841 DM4/DM12 family
Pfam-B_5243 (release 14.0). This family contains sequences derived from hypothetical proteins expressed by two insect species, D. melanogaster and A. gambiae. The region in question is approximately 115 amino acid residues long and contains four highly- conserved cysteine residues.. +PF07842 GC-rich sequence DNA-binding factor-like protein
Fenech M, Mistry J, Wood V. Pfam-B_9357 (release 14.0) & Pfam-B_9894 (release 19.0). Sequences found in this family are similar to a region of a human GC-rich sequence DNA-binding factor homolog (Swiss:Q9Y5B6). This is thought to be a protein involved in transcriptional regulation due to partial homologies to a transcription repressor and histone-interacting protein . This family also contains tuftelin interacting protein 11 which has been identified as both a nuclear and cytoplasmic protein, and has been implicated in the secretory pathway. Sip1, a septin interacting protein is also a member of this family.. +PF07843 Protein of unknown function (DUF1634)
Pfam-B_9594 (release 14.0). This family contains many hypothetical bacterial and archaeal proteins. A few members of this family are annotated as being putative transmembrane proteins, and the region in question in fact contains many hydrophobic residues.. +PF07845 Protein of unknown function (DUF1636)
Pfam-B_9608 (release 14.0). The sequences featured in this family are derived from a number of hypothetical prokaryotic proteins. The region in question is approximately 130 amino acids long.. +PF07846 Metallothio_7;
Metallothionein family. Pfam-B_9622 (release 14.0). The sequences making up Metallothio_Cad are found repeated in metallothionein proteins expressed by several different Tetrahymena species. Metallothioneins are low molecular mass, cysteine-rich metal-binding proteins that are thought to be involved in the regulation of levels of trace metals, and detoxification of these metals when present in excess . Some of the metallothioneins found in this family (for example, Swiss:Q8T6B3) are known to be induced by cadmium and are thought to be involved in the cellular sequestration of toxic metal ions. The high proportion of cysteine residues allows the metal ions to be bound by the formation of clusters of metal-thiolate complexes . Tetrahymena spp. metallothioneins differ from other eukaryotic metallothioneins mainly in the length of their sequences and in the cysteine-containing motifs they exhibit.. +PF07847 Protein of unknown function (DUF1637)
Pfam-B_6051 (release 14.0). This family contains many eukaryotic hypothetical proteins. The region featured in this family is approximately 120 residues long. According to InterPro annotation, some members of this family may belong to the cupin superfamily.. +PF07848 PaaX-like protein
Pfam-B_9563 (release 14.0). This family contains proteins that are similar to the product of the paaX gene of Escherichia coli (Swiss:P76086). This protein is involved in the regulation of expression of a group of proteins known to participate in the metabolism of phenylacetic acid . In fact, some members of this family are annotated by InterPro as containing a winged helix DNA-binding domain (Interpro:IPR009058).. +PF07849 Protein of unknown function (DUF1641)
Pfam-B_9217 (release 14.0). Archaeal and bacterial hypothetical proteins are found in this family, with the region in question being approximately 40 residues long.. +PF07850 Renin receptor-like protein
Pfam-B_9266 (release 14.0). The sequences featured in this family are similar to a region of the human renin receptor (Swiss:Q8NG15) that bears a putative transmembrane spanning segment . The renin receptor is involved in intracellular signal transduction by the activation of the ERK1/ERK2 pathway, and it also serves to increase the efficiency of angiotensinogen cleavage by receptor-bound renin, therefore facilitating angiotensin II generation and action on a cell surface .. +PF07851 TMPIT-like protein
Pfam-B_9674 (release 14.0). A number of members of this family are annotated as being transmembrane proteins induced by tumour necrosis factor alpha, but no literature was found to support this.. +PF07852 Protein of unknown function (DUF1642)
Pfam-B_9838 (release 14.0). The sequences making up this family are derived from various hypothetical phage and prophage proteins. The region in question is approximately 140 amino acids long.. +PF07853 Protein of unknown function (DUF1648)
Pfam-B_9801 (release 14.0). Members of this family are hypothetical proteins expressed by either bacterial or archaeal species. Some of these are annotated as being transmembrane proteins, and in fact many of these sequences contain a high proportion of hydrophobic residues.. +PF07854 Protein of unknown function (DUF1646)
Pfam-B_9337 (release 14.0). Some of the members of this family are hypothetical bacterial and archaeal proteins, but others are annotated as being cation transporters expressed by the archaebacterium Methanosarcina mazei (Swiss:Q8PXG5, Swiss:Q8PXG7 and Swiss:Q8PXG8).. +PF07855 Protein of unknown function (DUF1649)
Pfam-B_9402 (release 14.0). This family is made up of sequences derived from hypothetical eukaryotic proteins of unknown function. . +PF07856 DUF1650; Orai-1_Ce;
Mediator of CRAC channel activity. Fenech M, Pollington JE. Pfam-B_9685 (release 14.0). ORAI-1 is a protein homologue of Drosophila Orai and human Orai1, Orai2 and Orai3. ORAI-1 GFP reporters are co- expressed with STIM-1 (ER CA(2+) sensors) in the gonad and intestine. The protein has four predicted transmembrane domains with a highly conserved region between TM2 ad TM3. This conserved domain is thought to function in channel regulation. ORAI1- related proteins are required for the production of the calcium channel, CRAC, along with STIM1-related proteins .. +PF07857 CEO family (DUF1632)
Pfam-B_9654 (release 14.0). These sequences are found in hypothetical eukaryotic proteins of unknown function. The region concerned is approximately 280 residues long. This family has been termed the CEO family for C. elegans ORF .. +PF07858 Limonene-1,2-epoxide hydrolase catalytic domain
Pfam-B_15033 (release 14.0). Epoxide hydrolases catalyse the hydrolysis of epoxides to corresponding diols, which is important in detoxification, synthesis of signal molecules, or metabolism. Limonene-1,2- epoxide hydrolase (LEH) differs from many other epoxide hydrolases in its structure and its novel one-step catalytic mechanism. Its main fold consists of a six-stranded mixed beta-sheet, with three N-terminal alpha helices packed to one side to create a pocket that extends into the protein core. A fourth helix lies in such a way that it acts as a rim to this pocket. Although mainly lined by hydrophobic residues, this pocket features a cluster of polar groups that lie at its deepest point and constitute the enzyme's active site .. +PF07859 alpha/beta hydrolase fold
Pfam-B_100 (release 15.0). This catalytic domain is found in a very wide range of enzymes.. +PF07860 WisP family C-Terminal Region
This family is found at the C-terminus of the Tropheryma whipplei WisP family proteins ( ).. +PF07861 WisP family N-Terminal Region
This family is found at the N-terminus of the Tropheryma whipplei WisP family proteins ( ).. +PF07862 Nitrogen fixation protein of unknown function
This domain is found in the Cyanobacteria, and may be involved in nitrogen fixation, but no role has been assigned ( ).. +PF07863 Homologues of TraJ from Bacteroides conjugative transposon
Members of this family have been implicated in as being involved in an unusual form of DNA transfer (conjugation) in Bacteroides ( ). The family has been named CtnDOT_TraJ to avoid confusion with other conjugative transfer systems.. +PF07864 Protein of unknown function (DUF1651)
This is a family containing bacterial proteins of unknown function.. +PF07865 Protein of unknown function (DUF1652)
This is a family containing hypothetical bacterial proteins.. +PF07866 Protein of unknown function (DUF1653)
This is a family of hypothetical bacterial proteins of unknown function.. +PF07867 Protein of unknown function (DUF1654)
This family consists of proteins from the Pseudomonadaceae.. +PF07868 Protein of unknown function (DUF1655)
This protein is found in some prophages found in Lactobacillales lactis ( ).. +PF07869 Protein of unknown function (DUF1656)
This family contains bacterial proteins, many of which are hypothetical. Some proteins in this family are putative membrane proteins.. +PF07870 Protein of unknown function (DUF1657)
This domain appears to be restricted to the Bacillales.. +PF07871 Protein of unknown function (DUF1658)
This family of small proteins seems to be found in several places in the Coxiella genome.. +PF07872 Protein of unknown function (DUF1659)
This family consists of hypothetical bacterial proteins of unknown function.. +PF07873 YabP family
This family of proteins is involved in spore coat assembly during the process of sporulation .. +PF07874 Prophage protein (DUF1660)
This protein is found in Lactobacillae prophages.. +PF07875 Coat F domain
The Coat F proteins, which contribute to the Bacillales spore coat. It occurs multiple times in the genomes it is found in.. +PF07876 Stress responsive A/B Barrel Domain
The function of this family is unknown, but it is upregulated in response to salt stress in Populus balsamifera ( ). It is also found at the C-terminus of an fructose 1,6-bisphosphate aldolase from Hydrogenophilus thermoluteolus (Swiss:Q9ZA13; ). Swiss:Q93NG5 is found in the pA01 plasmid, which encodes genes for molybdopterin uptake and degradation of plant alkaloid nicotine. The structure of one has been solved (Swiss:Q9LUV2) and the domain forms an a/b barrel dimer ( ). Although there is a clear duplication within the domain it is not obviously detectable in the sequence.. +PF07877 Protein of unknown function (DUF1661)
This is a family containing bacterial proteins of unknown function. Many of the proteins in this family are hypothetical.. +PF07878 Protein of unknown function (DUF1662)
This family contains bacterial proteins of unknown function. This domain belongs to the Ribbon-helix-helix superfamily suggesting these may be DNA-binding proteins.. +PF07879 PHB/PHA accumulation regulator DNA-binding domain
This domain is found at the N-terminus of the Polyhydroxyalkanoate (PHA) synthesis regulators. These regulators have been shown to directly bind DNA and PHA ( ). The invariant nature of this domain compared to the C-terminal Pfam:PF05233 domain(s) suggests that it contains the DNA-binding function.. +PF07880 Bacteriophage T4 gp9/10-like protein
Pfam-B_73396 (release 14.0). The members of this family are similar to gene products 9 (gp9) and 10 (gp10) of bacteriophage T4. Both proteins are components of the viral baseplate . Gp9 (Swiss:P10927) connects the long tail fibres of the virus to the baseplate and triggers tail contraction after viral attachment to a host cell. The protein is active as a trimer, with each monomer being composed of three domains. The N-terminal domain consists of an extended polypeptide chain and two alpha helices. The alpha1 helix from each of the three monomers in the trimer interacts with its counterparts to form a coiled-coil structure. The middle domain is a seven-stranded beta-sandwich that is thought to be a novel protein fold. The C-terminal domain is thought to be essential for gp9 trimerisation and is organised into an eight- stranded antiparallel beta-barrel, which was found to resemble the 'jelly roll' fold found in many viral capsid proteins. The long flexible region between the N-terminal and middle domains may be required for the function of gp9 to transmit signals from the long tail fibres . Together with gp11, gp10 (Swiss:P10928) initiates the assembly of wedges that then go on to associate with a hub to form the viral baseplate .. +PF07881 L-fucose isomerase, first N-terminal domain
Pfam-B_11456 (release 14.0). The members of this family are similar to L-fucose isomerase expressed by E. coli (Swiss:P11552, EC:5.3.1.3). This enzyme corresponds to glucose-6-phosphate isomerase in glycolysis, and converts an aldo-hexose to a ketose to prepare it for aldol cleavage. The enzyme is a hexamer, with each subunit being wedge-shaped and composed of three domains. Both domains 1 and 2 contain central parallel beta-sheets with surrounding alpha helices. Domain 1 demonstrates the beta-alpha-beta-alpha- beta Rossman fold. The active centre is shared between pairs of subunits related along the molecular three-fold axis, with domains 2 and 3 from one subunit providing most of the substrate-contacting residues, and domain 1 from the adjacent subunit contributing some other residues .. +PF07882 L-fucose isomerase, second N-terminal domain
Pfam-B_11456 (release 14.0). The members of this family are similar to L-fucose isomerase expressed by E. coli (Swiss:P11552, EC:5.3.1.3). This enzyme corresponds to glucose-6-phosphate isomerase in glycolysis, and converts an aldo-hexose to a ketose to prepare it for aldol cleavage. The enzyme is a hexamer, with each subunit being wedge-shaped and composed of three domains. Both domains 1 and 2 contain central parallel beta- sheets with surrounding alpha helices. The active centre is shared between pairs of subunits related along the molecular three-fold axis, with domains 2 and 3 from one subunit providing most of the substrate-contacting residues .. +PF07883 Cupin domain
Pfam-B_81 (release 15.0). This family represents the conserved barrel domain of the 'cupin' superfamily ('cupa' is the Latin term for a small barrel).. +PF07884 Vitamin K epoxide reductase family
Vitamin K epoxide reductase (VKOR) recycles reduced vitamin K, which is used subsequently as a co-factor in the gamma-carboxylation of glutamic acid residues in blood coagulation enzymes. VKORC1 is a member of a large family of predicted enzymes that are present in vertebrates, Drosophila, plants, bacteria and archaea . Four cysteine residues and one residue, which is either serine or threonine, are identified as likely active-site residues . In some plant and bacterial homologues the VKORC1 homologous domain is fused with domains of the thioredoxin family of oxidoreductases .. +PF07885 Ion channel
Pfam-B_55 (release 15.0). This family includes the two membrane helix type ion channels found in bacteria.. +PF07886 BA14K-like protein
Pfam-B_4068 (release 14.0). The sequences found in this family are similar to the BA14K proteins expressed by Brucella abortus (Swiss:Q44701) and by Brucella suis (Swiss:Q8FVU0). BA14K was found to be strongly immunoreactive; it induces both humoral and cellular responses in hosts throughout the infective process .. +PF07887 Calmodulin binding protein-like
Pfam-B_4579 (release 14.0). The members of this family are putative or actual calmodulin binding proteins expressed by various plant species. Some members (for example, Swiss:Q8H6T7), are known to be involved in the induction of plant defence responses . However, their precise function in this regards is as yet unknown.. +PF07888 CoCoA;
Calcium binding and coiled-coil domain (CALCOCO1) like. Pfam-B_4504 (release 14.0). Proteins found in this family are similar to the coiled-coil transcriptional coactivator protein coexpressed by Mus musculus (CoCoA/CALCOCO1, Swiss:Q8CGU1). This protein binds to a highly conserved N-terminal domain of p160 coactivators, such as GRIP1 (Swiss:Q61026), and thus enhances transcriptional activation by a number of nuclear receptors. CALCOCO1 has a central coiled-coil region with three leucine zipper motifs, which is required for its interaction with GRIP1 and may regulate the autonomous transcriptional activation activity of the C-terminal region .. +PF07889 Protein of unknown function (DUF1664)
Pfam-B_4797 (release 14.0). The members of this family are hypothetical plant proteins of unknown function. The region featured in this family is approximately 100 amino acids long.. +PF07890 DUF1665;
Fenech M, Mistry J, Wood V. Pfam-B_9434 (release 14.0). Rrp15p is required for the formation of 60S ribosomal subunits .. +PF07891 Protein of unknown function (DUF1666)
Pfam-B_9387 (release 14.0). These sequences are derived from hypothetical plant proteins of unknown function. The region in question is approximately 250 residues long.. +PF07892 Protein of unknown function (DUF1667)
Pfam-B_9631 (release 14.0). Hypothetical archaeal and bacterial proteins make up this family. A few proteins are annotated as being potential metal-binding proteins, and in fact the members of this family have four highly conserved cysteine residues, but no further literature evidence was found in this regard.. +PF07893 Protein of unknown function (DUF1668)
Pfam-B_5066 (release 14.0). The hypothetical proteins found in this family are expressed by Oryza sativa and are of unknown function.. +PF07894 Protein of unknown function (DUF1669)
Pfam-B_9335 (release 14.0). This family is composed of sequences derived from hypothetical eukaryotic proteins of unknown function. Some members of this family are annotated as being potential phospholipases but no literature was found to support this.. +PF07895 Protein of unknown function (DUF1673)
Pfam-B_4746 (release 14.0). This family contains hypothetical proteins of unknown function expressed by two archaeal species.. +PF07896 Protein of unknown function (DUF1674)
Pfam-B_4326 (release 14.0). The members of this family are sequences derived from hypothetical eukaryotic and bacterial proteins. The region in question is approximately 60 residues long.. +PF07897 Protein of unknown function (DUF1675)
Pfam-B_4280 (release 14.0). The members of this family are sequences derived from hypothetical plant proteins of unknown function. One member of this family (Swiss:Q9SFV5) is annotated as a putative RNA-binding protein, but no evidence was found to support this.. +PF07898 Protein of unknown function (DUF1676)
Pfam-B_4779 (release 14.0). This family contains sequences derived from proteins of unknown function expressed by Drosophila melanogaster and Anopheles gambiae.. +PF07899 Frigida-like protein
Pfam-B_4728 (release 14.0). This family is composed of plant proteins that are similar to FRIGIDA protein expressed by Arabidopsis thaliana (Swiss:Q9FDW0). This protein is probably nuclear and is required for the regulation of flowering time in the late-flowering phenotype. It is known to increase RNA levels of flowering locus C. Allelic variation at the FRIGIDA locus is a major determinant of natural variation in flowering time .. +PF07900 Protein of unknown function (DUF1670)
Pfam-B_9559 (release 14.0). The hypothetical eukaryotic proteins found in this family are of unknown function.. +PF07901 Protein of unknown function (DUF1672)
Pfam-B_9698 (release 14.0). This family is composed of hypothetical bacterial proteins of unknown function.. +PF07902 gp58-like protein
Pfam-B_4289 (release 14.0). Sequences found in this family are derived from a number of bacteriophage and prophage proteins. They are similar to gp58 (Swiss:Q38355), a minor structural protein of Lactococcus delbrueckii bacteriophage LL-H .. +PF07903 PaRep2a protein
Pfam-B_4102 (release 14.0). This is a family of proteins expressed by the crenarchaeon Pyrobaculum aerophilum. The members are highly variable in length and level of conservation. The presence of numerous frameshifts and internal stop codons in multiple alignments are thought to indicate that most family members are no longer functional .. +PF07904 CT20;
Chromatin modification-related protein EAF7. The S. cerevisiae member of this family Swiss:P53911 is part of NuA4, the only essential histone acetyltransferase complex in Saccharomyces cerevisiae involved in global histone acetylation .. +PF07905 Purine catabolism regulatory protein-like family
Pfam-B_4388 (release 14.0). The bacterial proteins found in this family are similar to the purine catabolism regulatory protein expressed by Bacillus subtilis (PucR, Swiss:O32138). PucR is thought to be a transcriptional activator involved in the induction of the purine degradation pathway, and may contain a LysR-like DNA-binding domain. It is similar to LysR-type regulators in that it represses its own expression . The other members of this family are also annotated as being putative regulatory proteins.. +PF07906 ShET2 enterotoxin, N-terminal region
Pfam-B_4512 (release 14.0). The members of this family are are sequences that are similar to the N-terminal half of the ShET2 enterotoxin produced by Shigella flexneri (Swiss:Q47635) and Escherichia coli (Swiss:Q47634). This protein was found to confer toxigenicity in the Ussing chamber, and the N-terminal region was found to be important for the protein's enterotoxic effect. It is thought to be a hydrophobic protein that forms inclusion bodies within the bacterial cell, and may be secreted by the Mxi system . Most members of this family are annotated as putative enterotoxins, but one member (Swiss:Q8X606) is a regulator of acetyl CoA synthetase, and another two members (Swiss:P76205 and Swiss:P23325) are annotated as ankyrin-like regulatory proteins and contain Ank repeats (Pfam:PF00023).. +PF07907 YibE/F-like protein
Pfam-B_4781 (release 14.0). The sequences featured in this family are similar to two proteins expressed by Lactococcus lactis, YibE (Swiss:Q9CHC5) and YibF (Swiss:Q9CHC4). Most of the members of this family are annotated as being putative membrane proteins, and in fact the sequences contain a high proportion of hydrophobic residues.. +PF07908 D-aminoacylase, C-terminal region
Pfam-B_13711 (release 14.0). D-aminoacylase (Swiss:Q9AGH8, EC:3.5.1.81) hydrolyses a wide variety of N-acyl derivatives of neutral D-amino acids, in a zinc-dependent manner. The enzyme is composed of a small beta-barrel domain and a larger catalytic alpha/beta-barrel. The C-terminal region featured in this family forms part of the beta-barrel domain, together with a short N-terminal segment. The beta-strands of both barrels were found to superimpose well. The small beta-barrel domain does not seem to contribute to the substrate-binding site or to be involved in the catalytic process .. +PF07909 Protein of unknown function (DUF1663)
Pfam-B_4106 (release 14.0). The members of this family are hypothetical proteins expressed by Trypanosoma cruzi, a eukaryotic parasite that causes Chagas' disease in humans. This region is found as multiple copies per protein.. +PF07910 DUF1671;
Peptidase family C78. Pfam-B_9699 (release 14.0). This family formerly known as DUF1671 has been shown to be a cysteine peptidase called (Ufm1)-specific protease .. +PF07911 Protein of unknown function (DUF1677)
Pfam-B_4922 (release 14.0). The sequences found in this family are all derived from hypothetical plant proteins of unknown function. The region features a number of highly conserved cysteine residues.. +PF07912 ERp29, N-terminal domain
Pfam-B_28781 (release 14.0). ERp29 (Swiss:P52555) is a ubiquitously expressed endoplasmic reticulum protein, and is involved in the processes of protein maturation and protein secretion in this organelle [1,2]. The protein exists as a homodimer, with each monomer being composed of two domains. The N-terminal domain featured in this family is organised into a thioredoxin-like fold that resembles the a domain of human protein disulphide isomerase (PDI) . However, this domain lacks the C-X-X-C motif required for the redox function of PDI; it is therefore thought that ERp29's function is similar to the chaperone function of PDI . The N-terminal domain is exclusively responsible for the homodimerisation of the protein, without covalent linkages or additional contacts with other domains .. +PF07913 Protein of unknown function (DUF1678)
Pfam-B_4886 (release 14.0). This family is composed of uncharacterized proteins expressed by Methanopyrus kandleri, a hyperthermophilic archaebacterium. . +PF07914 Protein of unknown function (DUF1679)
Pfam-B_4694 (release 14.0). The region featured in this family is found in a number of C. elegans proteins, in one case (Swiss:Q19034) as a repeat. In many of the family members, this region is associated with the CHK region described by SMART as being found in ZnF_C4 and HLH domain-containing kinases. In fact, one member of this family (Swiss:Q9GUC1) is annotated as being a member of the nuclear hormone receptor family, and contains regions typical of such proteins (Interpro:IPR000536, Interpro:IPR008946, and Interpro:IPR001628).. +PF07915 Glucosidase II beta subunit-like protein
Pfam-B_9407 (release 14.0). The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing . The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum . Mutations in the gene coding for PRKCSH have been found to be involved in the development of autosomal dominant polycystic liver disease (ADPLD), but the precise role the protein has in the pathogenesis of this disease is unknown . This family also includes an ER sensor for misfolded glycoproteins and is therefore likely to be a generic sugar binding domain.. +PF07916 TraG-like protein, N-terminal region
Fenech M, Mistry J, Coggill P. Pfam-B_4841 (release 14.0) & Pfam-B_689 (release 23.0). The bacterial sequences found in this family are similar to the N-terminal region of the TraG protein (Swiss:P33790). This is a membrane-spanning protein, with three predicted transmembrane segments and two periplasmic regions . TraG protein is known to be essential for DNA transfer in the process of conjugation, with the N-terminal portion being required for F pilus assembly [1,2]. The protein is thought to interact with the periplasmic domain of TraN (Swiss:P24082) to stabilise mating-cell interactions .. +PF07918 CAP160 repeat
Pfam-B_9359 (release 14.0). This region featured in this family is repeated in spinach cold acclimation protein CAP160 (Swiss:O50054) CAP160 is induced during periods of drought stress; its precise function is unknown but it has been implicated in the stabilisation of membranes, cytoskeletal elements, and ribosomes. By acting as a compatible solute, it may reduce the toxic effects of cellular solutes that accumulate at high concentration during dehydration; it may also function as an enzyme that produces such a solute . Other members of this family are also induced by water stress, abscisic acid, and/or low temperature, such as desiccation-responsive protein 29B (Swiss:Q04980) and CDet11-24 protein (Swiss:O23764).. +PF07919 DUF1683;
Gryzun, putative trafficking through Golgi. Fenech M, Pollington J. Pfam-B_9179 (release 14.0). The proteins featured in this family are all eukaryotic, and many of them are annotated as being Gryzun. Gryzun is distantly related to, but distinct from, the Trs130 subunit of the TRAPP complex but is absent from S. cerevisiae. RNAi of human Gryzun (Swiss:Q7Z392) blocks Golgi exit. Thus the family is likely to be involved with trafficking of proteins through membranes, perhaps as part of the TRAPP complex.. +PF07920 Protein of unknown function (DUF1684)
Pfam-B_9328 (release 14.0). The sequences featured in this family are found in hypothetical archaeal and bacterial proteins of unknown function. The region in question is approximately 200 amino acids long.. +PF07921 Fibritin C-terminal region
Pfam-B_31175 (release 14.0). This family features sequences bearing similarity to the C-terminal portion of the bacteriophage T4 protein fibritin (Swiss:P10104). This protein is responsible for attachment of long tail fibres to virus particle, and forms the 'whiskers' or fibres on the neck of the virion. The region seen in this family contains an N-terminal coiled-coil portion and the C-terminal globular foldon domain (residues 457-486), which is essential for fibritin trimerisation and folding . This domain consists of a beta-hairpin; three such hairpins come together in a beta-propeller-like arrangement in the trimer, which is stabilised by hydrogen bonds, salt bridges and hydrophobic interactions .. +PF07922 Glycosyltransferase family 52
Pfam-B_2778 (release 14.0). This family features glycosyltransferases belonging to glycosyltransferase family 52 , which have alpha-2,3- sialyltransferase (EC:4.2.99.4) and alpha-glucosyltransferase (EC 2.4.1.-) activity. For example, beta-galactoside alpha-2,3- sialyltransferase expressed by Neisseria meningitidis (Swiss:P72097) is a member of this family and is involved in a step of lipooligosaccharide biosynthesis requiring sialic acid transfer; these lipooligosaccharides are thought to be important in the process of pathogenesis . This family includes several bacterial lipooligosaccharide sialyltransferases similar to the Haemophilus ducreyi LST protein. Haemophilus ducreyi is the cause of the sexually transmitted disease chancroid and produces a lipooligosaccharide (LOS) containing a terminal sialyl N-acetyllactosamine trisaccharide . . +PF07923 N1221-like protein
Pfam-B_9309 (release 14.0). The sequences featured in this family are similar to a hypothetical protein product of ORF N1221 in the CPT1-SPC98 intergenic region of the yeast genome (Swiss:P53917). This encodes an acidic polypeptide with several possible transmembrane regions .. +PF07924 Nuclease A inhibitor-like protein
Pfam-B_43172 (release 14.0). This family consists of protein sequences that are similar to the nuclease A inhibitor expressed by bacteria of the genus Anabaena ((NuiA, Swiss:Q44296). This sequence is organised to form an alpha-beta-alpha sandwich fold, which is similar to the PR-1-like fold. NuiA interacts with nuclease A by means of residues located at one end of the molecule, including residues making up the loop between helices III and IV and the loop between strands C and D. The mechanism of inhibition of nuclease A by NuiA is as yet incompletely understood .. +PF07925 Reovirus RNA-dependent RNA polymerase lambda 3
Pfam-B_9372 (release 14.0). The sequences in this family are similar to the reoviral minor core protein lambda 3 (Swiss:P17378), which functions as a RNA-dependent RNA polymerase within the protein capsid.\. It is organised into 3 domains. N- and C-terminal domains create a 'cage' that encloses a conserved central catalytic domain within a hollow centre; this catalytic domain is arranged to form 'fingers', 'palm' and 'thumb' subdomains. Unlike other RNA polymerases, like HIV reverse transcriptase and T7 RNA polymerase, lambda 3 protein binds template and substrate with only localised rearrangements, and catalytic activity can occur with little structural change. However, the structure of the catalytic complex is similar to that of other polymerase catalytic complexes with known structure .. +PF07926 TPR/MLP1/MLP2-like protein
Pfam-B_9285 (release 14.0). The sequences featured in this family are similar to a region of human TPR protein (Swiss:P12270) and to yeast myosin-like proteins 1 (MLP1, Swiss:Q02455) and 2 (MLP2, Swiss:P40457). These proteins share a number of features; for example, they all have coiled-coil regions and all three are associated with nuclear pores [1,2,3]. TPR is thought to be a component of nuclear pore complex- attached intra-nuclear filaments , and is implicated in nuclear protein import . Moreover, its N-terminal region is involved in the activation of oncogenic kinases, possibly by mediating the dimerisation of kinase domains or by targeting these kinases to the nuclear pore complex . MLP1 and MLP2 are involved in the process of telomere length regulation, where they are thought to interact with proteins such as Tel1p and modulate their activity .. +PF07927 YcfA-like protein
Pfam-B_2914 (release 14.0). The viral, archaeal and bacterial proteins making up this family are similar to the YcfA protein expressed by E. coli (Swiss:Q9F561). Most of these proteins are hypothetical proteins of unknown function.. +PF07928 Vps54-like protein
Pfam-B_9294 (release 14.0). This family contains various proteins that are homologs of the yeast Vps54 protein, such as the rat homolog (Swiss:Q9JMK8), the human homolog (Swiss:Q86YF7), and the mouse homolog (Swiss:Q8R3X1). In yeast, Vps54 associates with Vps52 and Vps53 proteins to form a trimolecular complex that is involved in protein transport between Golgi, endosomal, and vacuolar compartments . All Vps54 homologs contain a coiled coil region (not found in the region featured in this family) and multiple dileucine motifs .. +PF07929 Plasmid pRiA4b ORF-3-like protein
Pfam-B_4929 (release 14.0). Members of this family are similar to the protein product of ORF-3 (Swiss:Q44206) found on plasmid pRiA4 in the bacterium Agrobacterium rhizogenes. This plasmid is responsible for tumourigenesis at wound sites of plants infected by this bacterium, but the ORF-3 product does not seem to be involved in the pathogenetic process . Other proteins found in this family are annotated as being putative TnpR resolvases (Swiss:Q9LCU7, Swiss:Q50439), but no further evidence was found to back this. Moreover, another member of this family is described as a probable lexA repressor (Swiss:Q7UEI4) and in fact carries a LexA DNA binding domain (Pfam:PF01726), but no references were found to expand on this.. +PF07930 D-aminopeptidase, domain B
Pfam-B_29283 (release 14.0). D-aminopeptidase (Swiss:Q9ZBA9) is a dimeric enzyme with each monomer being composed of three domains. Domain B is organised to form a beta barrel made up of eight antiparallel beta strands. It is connected to domain A, the catalytic domain, by an eight-residue sequence, and also interacts with both domains A and C via non-covalent bonds. Domain B probably functions in maintaining domain C in a good position to interact with domain A .. +PF07931 Chloramphenicol phosphotransferase-like protein
Pfam-B_29509 (release 14.0). The members of this family are all similar to chloramphenicol 3-O phosphotransferase (CPT, Swiss:Q56148) expressed by Streptomyces venezuelae. Chloramphenicol (Cm) is a metabolite produced by this bacterium that can inhibit ribosomal peptidyl transferase activity and therefore protein production. By transferring a phosphate group to the C-3 hydroxyl group of Cm, CPT inactivates this potentially lethal metabolite .. +PF07932 D-aminopeptidase, domain C
Pfam-B_29283 (release 14.0). D-aminopeptidase (Swiss:Q9ZBA9) is a dimeric enzyme with each monomer being composed of three domains. Domain C is organised to form a beta barrel made up of eight antiparallel beta strands. It is connected to domain B by a short linker sequence, and interacts extensively with the domain A, the catalytic domain. The gamma loop of domain C forms part of the wall of the catalytic pocket; domain C is in fact thought to confer substrate and inhibitor specificity to the enzyme .. +PF07933 Protein of unknown function (DUF1681)
Pfam-B_4989 (release 14.0). This family is composed of sequences derived from a number of hypothetical eukaryotic proteins of unknown function.. +PF07934 8-oxoguanine DNA glycosylase, N-terminal domain
Pfam-B_29151 (release 14.0). The presence of 8-oxoguanine residues in DNA can give rise to G-C to T-A transversion mutations. This enzyme is found in archaeal, bacterial and eukaryotic species, and is specifically responsible for the process which leads to the removal of 8-oxoguanine residues. It has DNA glycosylase activity (EC:3.2.2.23) and DNA lyase activity (EC:4.2.99.18) . The region featured in this family is the N-terminal domain, which is organised into a single copy of a TBP-like fold. The domain contributes residues to the 8-oxoguanine binding pocket .. +PF07935 ORF D-335-like protein
Pfam-B_4933 (release 14.0). The sequences featured in this family are similar to a probable integrase (Swiss:P20214) expressed by the SSV1 virus of the archaebacterium Sulfolobus shibatae. This protein may be necessary for the integration of the virus into the host genome by a process of site-specific recombination .. +PF07936 BDS_I_II;
Potassium-channel blocking toxin. Pfam-B_56105 (release 14.0). This family features the antihypertensive and antiviral proteins BDS-I (Swiss:P11494) and BDS-II (Swiss:P59084) expressed by Anemonia sulcata. BDS-I is organised into a triple-stranded antiparallel beta-sheet, with an additional small antiparallel beta-sheet at the N-terminus . Both peptides are known to specifically block the Kv3.4 potassium channel, and thus bring about a decrease in blood pressure . Moreover, they inhibit the cytopathic effects of mouse hepatitis virus strain MHV-A59 on mouse liver cells, by an unknown mechanism .. +PF07937 Protein of unknown function (DUF1686)
Pfam-B_5313 (release 14.0). The members of this family are all hypothetical proteins of unknown function expressed by the eukaryotic parasite Encephalitozoon cuniculi GB-M1. The region in question is approximately 250 amino acids long.. +PF07938 Fungal fucose-specific lectin
Pfam-B_48600 (release 14.0). Lectins are involved in many recognition events at the molecular or cellular level. These fungal lectins, such as Aleuria aurantia lectin (AAL, Swiss:P18891), specifically recognise fucosylated glycans. AAL is a dimeric protein, with each monomer being organised into a six-bladed beta-propeller fold and a small antiparallel two-stranded beta-sheet. The beta-propeller fold is important in fucose recognition; five binding pockets are found between the propeller blades. The small beta-sheet, on the other hand, is involved in the dimerisation process . . +PF07939 Protein of unknown function (DUF1685)
Pfam-B_5502 (release 14.0). The members of this family are hypothetical eukaryotic proteins of unknown function. The region in question is approximately 100 amino acid residues long.. +PF07940 Heparinase II/III-like protein
Pfam-B_5577 (release 14.0). This family features sequences that are similar to a region of the Flavobacterium heparinum proteins heparinase II (Swiss:Q46080) and heparinase III (Swiss:Q59289). The former is known to degrade heparin and heparan sulphate, whereas the latter predominantly degrades heparan sulphate. Both are secreted into the periplasmic space upon induction with heparin .. +PF07941 Potassium channel Kv1.4 tandem inactivation domain
Pfam-B_7603 (release 14.0). This family features the tandem inactivation domain found at the N-terminus of the Kv1.4 potassium channel. It is composed of two subdomains. Inactivation domain 1 (ID1, residues 1-38) consists of a flexible N-terminus anchored at a 5-turn helix, and is thought to work by occluding the ion pathway, as is the case with a classical ball domain. Inactivation domain 2 (ID2, residues 40-50) is a 2.5 turn helix with a high proportion of hydrophobic residues that probably serves to attach ID1 to the cytoplasmic face of the channel. In this way, it can promote rapid access of ID1 to the receptor site in the open channel. ID1 and ID2 function together to being about fast inactivation of the Kv1.4 channel, which is important for the channel's role in short-term plasticity .. +PF07942 N2227-like protein
Pfam-B_5433 (release 14.0). This family features sequences that are similar to a region of hypothetical yeast gene product N2227 (Swiss:P53934). This is thought to be expressed during meiosis and may be involved in the defence response to stressful conditions .. +PF07943 Penicillin-binding protein 5, C-terminal domain
Pfam-B_1086 (release 14.0). Penicillin-binding protein 5 expressed by E. coli (Swiss:P04287) functions as a D-alanyl-D-alanine carboxypeptidase. It is composed of two domains that are oriented at approximately right angles to each other. The N-terminal domain (Pfam:PF00768) is the catalytic domain. The C-terminal domain featured in this family is organised into a sandwich of two anti-parallel beta-sheets, and has a relatively hydrophobic surface as compared to the N-terminal domain. Its precise function is unknown; it may mediate interactions with other cell wall-synthesising enzymes, thus allowing the protein to be recruited to areas of active cell wall synthesis. It may also function as a linker domain that positions the active site in the catalytic domain closer to the peptidoglycan layer, to allow it to interact with cell wall peptides .. +PF07944 Putative glycosyl hydrolase of unknown function (DUF1680)
Pfam-B_4918 (release 14.0). The members of this family are sequences derived from hypothetical bacterial and eukaryotic proteins of unknown function. One members of this family is annotated as a possible arabinosidase, but no references were found to back this. These proteins are related to a large family of glycosyl hydrolases.. +PF07945 Janus-atracotoxin
Pfam-B_50381 (release 14.0). This family includes three peptides secreted by the spider Hadronyche versuta (Swiss:P82226, Swiss:P82227, Swiss:P82228). These are insect-selective, excitatory neurotoxins that may function by antagonising muscle acetylcholine receptors, or acetylcholine receptor subtypes present in other invertebrate neurons . Janus atracotoxin-Hv1c (J-ACTX-Hv1c, Swiss:P82228) is organised into a disulphide-rich globular core (residues 3-19) and a beta-hairpin (residues 20-34). There are 4 disulphide bridges, one of which is a vicinal disulphide bridge; this is known to be unimportant in the maintenance of structure but critical for insecticidal activity .. +PF07946 Protein of unknown function (DUF1682)
Pfam-B_4955 (release 14.0). The members of this family are all hypothetical eukaryotic proteins of unknown function. One member (Swiss:Q920S6) is described as being an adipocyte-specific protein, but no evidence of this was found.. +PF07947 YhhN-like protein
Pfam-B_5325 (release 14.0). The members of this family are similar to the hypothetical protein yhhN expressed by E. coli (Swiss:P37616). Many of the members of this family are annotated as being possible transmembrane proteins, and in fact they all have a high proportion of hydrophobic residues.. +PF07948 Nairovirus M polyprotein-like
Pfam-B_5426 (release 14.0). The sequences in this family are similar to the Dugbe virus M polyprotein precursor (Swiss:Q02004), which includes glycoproteins G1 and G2. Both are thought to be inserted in the membrane of the Golgi complex of the infected host cell, and G1 is known to have a role in infection of vertebrate hosts .. +PF07949 YbbR-like protein
Pfam-B_4990 (release 14.0). The members of this family are are all hypothetical bacterial proteins of unknown function, and are similar to the YbbR protein expressed by Bacillus subtilis (Swiss:O34659, Swiss:O87088). One member (Swiss:Q97EN2) is annotated as an uncharacterized secreted protein, whereas another member (Swiss:P43521) is described as a hypothetical protein in the 5'region of the def gene of Thermus thermophilus, which encodes a deformylase , but no further information was found in either case. This region is found repeated up to four times in many members of this family.. +PF07950 Protein of unknown function (DUF1691)
This family of fungal proteins is uncharacterised. Each protein contains two copies of this region.. +PF07951 Clostridium neurotoxin, C-terminal receptor binding
Pfam-B_3087 (release 15.0). The Clostridium neurotoxin family is composed of tetanus neurotoxins and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains . This domains is the C-terminal receptor binding domain, which adopts a modified beta-trefoil fold with a six stranded beta-barrel and a beta-hairpin triplet capping the domain . The first step in the intoxication process is a binding event between this domains and the pre-synaptic nerve ending .. +PF07952 Clostridium neurotoxin, Translocation domain
Pfam-B_4943 (release 15.0). The Clostridium neurotoxin family is composed of tetanus neurotoxin and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains . Subsequent to cell surface binding and receptor mediated endocytosis of the neurotoxin, an acid induced conformational change in the neurotoxin translocation domain is believed to allow the domain to penetrate the endosome and from a pore, thereby facilitating the passage of the catalytic domain across the membrane into the cytosol . The structure of the translocation reveals a pair of helices that are 105 Angstroms long and is structurally distinct from other pore forming toxins .. +PF07953 Clostridium neurotoxin, N-terminal receptor binding
Pfam-B_1058 (release 15.0). The Clostridium neurotoxin family is composed of tetanus neurotoxin and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains . This domains is the N-terminal receptor binding domain,which is comprised of two seven-stranded beta-sheets sandwiched together to form a jelly role motif . The role of this domain in receptor binding appears to be indirect.. +PF07954 Protein of unknown function (DUF1689)
Family of fungal proteins with unknown function. A member of this family has been found to localise in the mitochondria .. +PF07955 Protein of unknown function (DUF1687)
This is a putative redox protein which is predicted to have a thioredoxin fold containing a single active cysteine .. +PF07956 Protein of Unknown function (DUF1690)
Family of uncharacterised fungal proteins.. +PF07957 Ribosomal_MRP8;
Protein of unknown function (DUF3294). This family was annotated as mitochondrial Ribosomal protein MRP8, based on the presumed similarity of the S.cerevisiae protein to an E.coli mitochondrial ribosomal protein; however, this similarity is spurious, and the function is not known [Wood, V].. +PF07958 Protein of unknown function (DUF1688)
A family of uncharacterised proteins.. +PF07959 L-fucokinase
Pfam-B_121298 (release 15.0). In the salvage pathway of GDP-L-fucose, free cytosolic fucose is phosphorylated by L-fucokinase to form L-fucose-L-phosphate, which is then further converted to GDP-L-fucose in the reaction catalysed by GDP-L-fucose pyrophosphorylase .. +PF07960 CBP4
The CBP4 in S. cerevisiae is essential for the expression and activity of ubiquinol-cytochrome c reductase [1,2]. This family appears to be fungal specific.. +PF07961 MBA1-like protein
Mba1 is an inner membrane protein that is part of the mitochondrial protein export machinery . It binds to the large subunit of mitochondrial ribosomes and cooperates with the C-terminal ribosome-binding domain of Oxa1, which is a central component of the insertion machinery of the inner membrane. In the absence of both Mba1 and the C-terminus of Oxa1, mitochondrial translation products fail to be properly inserted into the inner membrane and serve as substrates of the matrix chaperone Hsp70 . It is proposed that Mba1 functions as a ribosome receptor that cooperates with Oxa1 in the positioning of the ribosome exit site to the insertion machinery of the inner membrane .. +PF07962 Replication Fork Protection Component Swi3
Pfam-B_9217 (release 15.0). Replication fork pausing is required to initiate a recombination events. More specifically, Swi1 is required for recombination near the mat1 locus. Swi3 has been found to co-purify with Swi1 Swi3, together with Swi1, define a fork protection complex that coordinates leading- and lagging-strand synthesis and stabilises stalled replication forks . The Swi1-Swi3 complex is required for accurate replication, fork protection and replication checkpoint signalling [1,2]. +PF07963 Prokaryotic N-terminal methylation motif
Pfam-B_6484 (release 14.0). This short motif directs methylation of the conserved phenylalanine residue. It is most often found at the N-terminus of pilins and other proteins involved in secretion, see Pfam:PF00114, Pfam:PF05946, Pfam:PF02501 and Pfam:PF07596.. +PF07964 Rec10 / Red1
Rec10 / Red1 is involved in meiotic recombination and chromosome segregation during homologous chromosome formation. This protein localises to the synaptonemal complex in S. cerevisiae and the analogous structures (linear elements) in S. pombe . This family is currently only found in fungi.. +PF07965 Integrin beta tail domain
Pfam-B_1876 (release 14.0). This is the beta tail domain of the Integrin protein. Integrins are receptors which are involved in cell-cell and cell-extracellular matrix interactions.. +PF07966 A1 Propeptide
Pfam-B_386 (release 15.0). Most eukaryotic endopeptidases (Merops Family A1) are synthesised with signal and propeptides. The animal pepsin-like endopeptidase propeptides form a distinct family of propeptides, which contain a conserved motif approximately 30 residues long. In pepsinogen A, the first 11 residues of the mature pepsin sequence are displaced by residues of the propeptide. The propeptide contains two helices that block the active site cleft, in particular the conserved Asp11 residue, in pepsin, hydrogen bonds to a conserved Arg residues in the propeptide. This hydrogen bond stabilises the propeptide conformation and is probably responsible for triggering the conversion of pepsinogen to pepsin under acidic conditions [1,2].. +PF07967 C3HC zinc finger-like
This zinc-finger like domain is distributed throughout the eukaryotic kingdom in NIPA (Nuclear interacting partner of ALK) proteins. NIPA is implicate to perform some sort of antiapoptotic role in nucleophosmin-anaplastic lymphoma kinase (ALK) mediated signaling events . The domain is often repeated, with the second domain usually containing a large insert (approximately 90 residues) after the first three cysteine residues. The Schizosaccharomyces pombe the protein containing this domain (Swiss:O94506) is involved in mRNA export from the nucleus .. +PF07968 Leukocidin/Hemolysin toxin family
+PF07969 Amidohydrolase family
Pfam-B_751 (release 15.0). +PF07970 DUF1692; Erv41; Erv46;
Endoplasmic reticulum vesicle transporter . Pfam-B_2028 (release 16.0). This family is conserved from plants and fungi to humans. Erv46 works in close conjunction with Erv41 and together they form a complex which cycles between the endoplasmic reticulum and Golgi complex. Erv46-41 interacts strongly with the endoplasmic reticulum glucosidase II. Mammalian glucosidase II comprises a catalytic alpha-subunit and a 58 kDa beta subunit, which is required for ER localisation. All proteins identified biochemically as Erv41p-Erv46p interactors are localised to the early secretory pathway and are involved in protein maturation and processing in the ER and/or sorting into COPII vesicles for transport to the Golgi .. +PF07971 Glycosyl hydrolase family 92
Pfam-B_1199 (release 16.0). Members of this family are alpha-1,2-mannosidases, enzymes which remove alpha-1,2-linked mannose residues from Man(9)(GlcNAc)(2) by hydrolysis. They are critical for the maturation of N-linked oligosaccharides and ER-associated degradation .. +PF07972 NrdI Flavodoxin like
Pfam-B_1603 (release 16.0). +PF07973 Threonyl and Alanyl tRNA synthetase second additional domain
Pfam-B_270 (release 16.0). The catalytically active from of threonyl/alanyl tRNA synthetase is a dimer. Within the tRNA synthetase class II dimer, the bound tRNA interacts with both monomers making specific interactions with the catalytic domain, the C-terminal domain, and this domain (the second additional domain). The second additional domain is comprised of a pair of perpendicularly orientated antiparallel beta sheets, of four and three strands, respectively, that surround a central alpha helix that forms the core of the domain .. +PF07974 EGF-like domain
Pfam-B_80 (Release 16.0). This family contains EGF domains found in a variety of extracellular proteins.. +PF07975 TFIIH C1-like domain
Pfam-B_10678 (release 16.0). The carboxyl-terminal region of TFIIH is essential for transcription activity. This regions binds three zinc atoms through two independent domain. The first contains a C4 zinc finger motif, whereas the second is characterised by a CX(2)CX(2-4)FCADCD motif. The solution structure of the second C-terminal domain revealed homology with the regulatory domain of protein kinase C (Pfam:PF00130) .. +PF07976 Phenol hydroxylase, C-terminal dimerisation domain
Pfam-B_19435 (release 16.0). Phenol hydroxylase acts a homodimer, to hydroxylates phenol to catechol or similar product. The enzyme is comprised of three domains. The first two domains from the active site. The third domain, this domain, is involved in forming the dimerisation interface. The domain adopts a thioredoxin-like fold . . +PF07977 FabA-like domain
This enzyme domain has a HotDog fold.. +PF07978 NIPSNAP
Pfam-B_3436 (release 16.0). Members of this family include many hypothetical proteins. It also includes members of the NIPSNAP family which have putative roles in vesicular transport . This domain is often found in duplicate. . +PF07979 Intimin C-type lectin domain
Pfam-B_1879 (Release 16.0). This domain is found at the C-terminus of intimin. Its structure has been solved and shown to have a C-lectin type of structure . Intimin is a bacterial adhesion molecule involved in intimate attachment of enteropathogenic and enterohemorrhagic Escherichia coli to mammalian host cells. Intimin targets the translocated intimin receptor (Tir), which is exported by the bacteria and integrated into the host cell plasma membrane.. +PF07980 SusD_RagB;
Pfam-B_1855 (release 16.0). This family includes several hypothetical proteins. It also contains RagB, Swiss:Q9ZA59, a protein involved in signalling and SusD, Swiss:Q8A1G2, an outer membrane protein involved in nutrient binding .. +PF07981 Plasmodium repeat_MYXSPDY
Pfam-B_3138 (release 16.0). This repeat is found in two hypothetical Plasmodium proteins.. +PF07982 Herpes UL74 glycoproteins
Pfam-B_3076 (release 16.0). Members of this family are viral glycoproteins that form part of an envelope complex .. +PF07983 X8 domain
Pfam-B_374 (Release 16.0). The X8 domain domain contains at least 6 conserved cysteine residues that presumably form three disulphide bridges. The domain is found in an Olive pollen allergen as well as at the C-terminus of several families of glycosyl hydrolases . This domain may be involved in carbohydrate binding. This domain is characteristic of GPI-anchored domains [4,5].. +PF07984 Domain of unknown function (DUF1693)
Pfam-B_3630 (release 16.0). This family contains many hypothetical proteins. It also includes four nematode prion-like proteins. This domain has been identified as part of the nucleotidyltransferase superfamily .. +PF07985 SRR1
Pfam-B_29119 (release 16.0). SRR1 proteins are signalling proteins involved in regulating the circadian clock in Arabidopsis .. +PF07986 Tubulin binding cofactor C
Pfam-B_4111 (release 16.0). Members of this family are involved in the folding pathway of tubulins and form a beta helix structure .. +PF07987 Bacterial_GLE1;
Domain of unkown function (DUF1775). Pfam-B_12641 (release 16.0). Domain found in bacteria with undetermined function. Its structure has been determined and is an immunoglobulin-like fold.. +PF07988 Wos2;
Pfam-B_4851 (release 16.0). This region of Myb proteins has previously been described as the transcriptional activation domain present in the vertebrate c-Myb and A-Myb, but neither vertebrate B-Myb proteins nor Myb proteins of invertebrates. Because vertebrate B-Myb (but neither A-Myb nor c-Myb) can partially complement Drosophila Myb null mutants, this region appears to have been a relatively recent insertion.. +PF07989 Spindle_assoc;
Microtubule associated. Pfam-B_45034 (release 16.0). This presumed domain has been identified in two microtubule associated proteins in Schizosaccharomyces pombe, Mto1 and Pcp1. Mto1 has been identified in association with spindle pole body and non-spindle pole body microtubules . The pericentrin homolog Pcp1 is also associated with the fungal centrosome or spindle pole body (SPB) .. +PF07990 Nucleic acid binding protein NABP
Pfam-B_10222 (release 16.0). Many members of this family are putative nucleic acid binding proteins. One member of this family has been partially characterised and contains two putative phosphorylation sites and a possible dimerisation / leucine zipper domain.. +PF07991 Acetohydroxy acid isomeroreductase, catalytic domain
Prodom_2380 (release 99.1). Acetohydroxy acid isomeroreductase catalyses the conversion of acetohydroxy acids into dihydroxy valerates. This reaction is the second in the synthetic pathway of the essential branched side chain amino acids valine and isoleucine.. +PF07992 Pyridine nucleotide-disulphide oxidoreductase
This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain.. +PF07993 Male sterility protein
Pfam-B_1115 (release 6.4). This family represents the C-terminal region of the male sterility protein in a number of arabidopsis and drosophila. A sequence-related jojoba acyl CoA reductase is also included.. +PF07994 Myo-inositol-1-phosphate synthase
Pfam-B_959 (release 4.1). This is a family of myo-inositol-1-phosphate synthases. Inositol-1-phosphate catalyses the conversion of glucose-6- phosphate to inositol-1-phosphate, which is then dephosphorylated to inositol . Inositol phosphates play an important role in signal transduction.. +PF07995 Glucose / Sorbosone dehydrogenase
Pfam-B_1863 (release 16.0). Members of this family are glucose/sorbosone dehydrogenases that possess a beta-propeller fold.. +PF07996 Type IV secretion system proteins
Pfam-B_4497 (release 16.0). Members of this family are components of the type IV secretion system. They mediate intracellular transfer of macromolecules via a mechanism ancestrally related to that of bacterial conjugation machineries .. +PF07997 Protein of unknown function (DUF1694)
Pfam-B_4517 (release 16.0). This family contains many hypothetical proteins.. +PF07998 DUF1695;
Peptidase family M54. Pfam-B_4509 (release 16.0). This is a family of metallopeptidases. Two human proteins have been reported to degrade synthetic substrates and peptides .. +PF07999 Retrotransposon hot spot protein
Pfam-B_4567 (release 16.0). Members of this family are retrotransposon hot spot proteins. They are associated with polymorphic subtelomeric regions in Trypanosoma. These proteins contain a P-loop motif.. +PF08000 DUF1696;
Pfam-B_4657 (release 16.0). This family contains many bacterial hypothetical proteins. The structures of Swiss:A1SD03, PDB:3hsa, and Swiss:A3QB43, PDB:3dcx, show similarities to the PH or pleckstrin homology domain. First evidence of PH-like domains in bacteria suggests role in cell envelope stress response .. +PF08001 CMV US
Pfam-B_4698 (release 16.0). This is a family of unique short (US) cytoplasmic glycoproteins which are expressed in cytomegalovirus .. +PF08002 Protein of unknown function (DUF1697)
Pfam-B_4800 (release 16.0). This family contains many hypothetical bacterial proteins.. +PF08003 DUF1698;
Protein of unknown function (DUF1698). Pfam-B_4787 (release 16.0). This family contains many hypothetical proteins. It also includes two putative methyltransferase proteins, Swiss:Q8EEE6 and Swiss:Q88MX8.. +PF08004 Protein of unknown function (DUF1699)
Pfam-B_4896 (release 16.0). This family contains many archaeal proteins which have very conserved sequences. . +PF08005 PHR domain
This domain is called PHR as it was original found in the proteins PAM (Swiss:O75592), highwire (Swiss:Q9NB71) and RPM (Swiss:Q17551) . This domain can be duplicated in the highwire, PFAM and PRM sequence.\. The C-terminal region of the protein BTBD1 includes the PHR domain and is known to interact with Topoisomerase I, an enzyme which relaxes DNA supercoils .. +PF08006 Protein of unknown function (DUF1700)
Pfam-B_5023 (release 16.0). This family contains many hypothetical bacterial proteins and two putative membrane proteins (Swiss:Q6GFD0 and Swiss:Q6G806). . +PF08007 DUF1701;
Cupin superfamily protein. Pfam-B_5011 (release 16.0). This family contains many hypothetical proteins that belong to the cupin superfamily.. +PF08008 Viral cysteine rich
Pfam-B_4965 (release 16.0). Members of this family are polydna viral proteins that contain a cysteine rich motif . Some members of this family have multiple copies of this domain.. +PF08009 TOM13;
CDP-alcohol phosphatidyltransferase 2. Pfam-B_51131 (release 16.0). This domain is found on CDP-alcohol phosphatidyltransferases. These enzymes catalyse the displacement of CMP from a CDP-alcohol by a second alcohol with formation of a phosphodiester bond and concomitant breaking of a phosphoride anhydride bond.. +PF08010 Bacteriophage protein GP30.3
Pfam-B_5273 (release 16.0). Proteins in this family are bacteriophage GP30.3 proteins. Their function is poorly characterised .. +PF08011 Protein of unknown function (DUF1703)
Pfam-B_5377 (release 16.0). This family contains many hypothetical bacterial proteins. It has been identified as a member of the PD-(D/E)XK nuclease superfamily through transitive meta profile searches . DUF1703 has the predicted secondary structure pattern of the restriction endonuclease-like fold core and contains an additional beta-strand at the C-terminus .. +PF08012 Protein of unknown function (DUF1702)
Pfam-B_5312 (release 16.0). This family of proteins contains many bacterial proteins that are encoded by the UnbL gene.\. The function of these proteins is unknown.. +PF08013 Tagatose 6 phosphate kinase
Pfam-B_5149 (release 16.0). Proteins in this family are tagatose 6 phosphate kinases. . +PF08014 Domain of unknown function (DUF1704)
Pfam-B_5490 (release 16.0). This family contains many hypothetical proteins.. +PF08015 Fungal mating-type pheromone
This family corresponds to mating-type pheromone proteins. The homobasidiomycetes, or mushroom fungi, have arguably the most complex mating system of all known organisms. Many species possess a mating system known as bifactorial incompatibility, where two unlinked loci control the mating -type of an individual incompatibility loci (the A and B mating-type loci). Each A mating-type sublocus encodes a pair of divergently transcribed homeodomain transcription factors while the genes responsible for B mating-type activity encode lipopeptide pheromones and G-protein -coupled pheromone receptors .. +PF08016 Polycystin cation channel
This family contains the cation channel region of PKD1 and PKD2 proteins.. +PF08017 Fibrinogen binding protein
Pfam-B_4323 (release 16.0). Proteins in this family bind to fibrinogen. Members of this family includes the fibrinogen receptor, FbsA, (Swiss:Q8GIU3) which mediates platelet aggregation .. +PF08018 Frog antimicrobial peptide
This family includes antimicrobial peptides secreted from skins of frogs. The secretion of antimicrobial peptides from the skins of frogs plays an important role in the self defense of these frogs. Structural characterization of these peptides showed that they belonged to four known families: the brevinin-1 family, the esculentin-2 family, the ranatuerin-2 family and the temporin family .. +PF08019 Domain of unknown function (DUF1705)
Pfam-B_1101 (release 16.0). Some members of this family are putative bacterial membrane proteins. This domain is found immediately N terminal to the sulfatase domain in many sulfatases.. +PF08020 Protein of unknown function (DUF1706)
Pfam-B_5540 (release 16.0). This family contains many hypothetical proteins from bacteria and yeast.. +PF08021 Siderophore-interacting FAD-binding domain
+PF08022 FAD-binding domain
Pfam-B_728 (release 4.2). +PF08023 Frog antimicrobial peptide
This family consists of the major classes of antimicrobial peptides secreted from the skin of frogs that protect the frogs against invading microbes. They are typically between 10-50 amino acids long and are derived from proteolytic cleavage of larger precursors. Major classes of peptides such esculentin, gaegurin, brevinin, rugosin and ranatuerin are included in this family .. +PF08024 Ant antimicrobial peptide
Short protein clustering. This family consists of the ponericin family of antimicrobial peptides isolated from predatory ant Pachycondyla goeldii. The ponericin peptides may adopt amphipathic alpha-helical structure in polar environments. In the ant colony, these peptides exhibit a defensive role against microbial pathogens arising from prey introduction and/or ingestion .. +PF08025 Spider antimicrobial peptide
Short protein clustering. This family includes antimicrobial peptides isolated from the crude venom of the wolf spider Oxyopes kitabensis. These peptides, known as oxyopinins, are the largest linear cationic amphipathic peptides chemically characterised and exhibit disrupting activities towards biological membranes .. +PF08026 Bee antimicrobial peptide
Short protein clustering. This family consists of antimicrobial peptides produced by bees. These peptides have strong antimicrobial and some anti-fungal activity and has homology to abaecin which is the largest proline-rich antimicrobial peptide isolated from European bumblebee Bombus pascuorum .. +PF08027 Albumin I
Pfam-B_100627 (release 16.0). The albumin I protein, a hormone-like peptide, stimulates kinase activity upon binding a membrane bound 43 kDa receptor. The structure of this domain reveals a knottin like fold, comprise of three beta strands .. +PF08028 Acyl-CoA dehydrogenase, C-terminal domain
Pfam-B_8101 (release 16.0). +PF08029 HisG, C-terminal domain
Pfam-B_1550 (release 16.0). +PF08030 Ferric reductase NAD binding domain
+PF08031 Berberine and berberine like
Pfam-B_649 (release 16.0). This domain is found in the berberine bridge and berberine bridge- like enzymes which are involved in the biosynthesis of numerous isoquinoline alkaloids. They catalyse the transformation of the N-methyl group of (S)-reticuline into the C-8 berberine bridge carbon of (S)-scoulerine .. +PF01238 Phosphomannose isomerase type I
This is a family of Phosphomannose isomerase type I enzymes (EC 5.3.1.8).. +PF08032 RNA 2'-O ribose methyltransferase substrate binding
Pfam-B_742 (release 16.0). This domain is a RNA 2'-O ribose methyltransferase substrate binding domain.. +PF08033 Sec23/Sec24 beta-sandwich domain
+PF08034 Trematode eggshell synthesis protein
This domain has been identified in a number of distantly related species of trematodes. This protein domain is crucial for eggshell synthesis in trematodes (Ebersberger I).. +PF08035 Opioids neuropeptide
This family corresponds to the conserved YGG motif that is found in a wide variety of opioid neuropeptides such as enkephalin.. +PF08036 Diapausin family of antimicrobial peptide
Short protein clustering. This family consists of diapausin-related antimicrobial peptides. Diapause during periods of environmental adversity is an essential part of the life cycle of many organisms with the molecular basis being different among animals. Diapause-specific peptides provide anti-fungal activity and act as N-type voltage-gated calcium channel blocker .. +PF08037 Attractin family
Short protein clustering. This family consists of the attractin family of water-borne pheromone. Mate attraction in Aplysia involves a long-distance water-borne signal in the form of the attractin peptide, that is released during egg laying. These peptides contain 6 conserved cysteines and are folded into 2 antiparallel helices. The second helix contains the IEECKTS sequence conserved in Aplysia attractins .. +PF08038 TOM7 family
Short protein clustering. This family consists of TOM7 family of mitochondrial import receptors. TOM7 forms part of the translocase of the outer mitochondrial membrane (TOM) complex and it appears to function as a modulator of the dynamics of the mitochondrial protein transport machinery by promoting the dissociation of subunits of the outer membrane translocase .. +PF08039 Mit_preoteolip;
Mitochondrial proteolipid. Short protein clustering. This family consists of proteins with similarity to the mitochondrial proteolipids. Mitochondrial proteolipid consists of about 60 amino acids residues and is about 6.8 kDa in size .. +PF08040 MNLL subunit
Short protein clustering. This family consists of the MNLL subunits of NADH-ubiquinone oxidoreductase complex. NADH-ubiquinone oxidoreductase is involved in the transfer of electrons from NADH to the electron transport chain. This oxidation of NADH is coupled to proton transfer across the membrane, generating a proton motive force that is utilised for the synthesis of ATP . MNLL subunit is one of the many subunits found in the complex and it contains a mitochondrial import sequence. However, the role of MNLL subunit is unclear .. +PF08041 PetM family of cytochrome b6f complex subunit 7
Short protein clustering. This family consists of the PetM family of cytochrome b6f complex subunit IV. The cytochrome b6f complex consists of 7 subunits and contains 2 beta hemes and 1 chlorophyll alpha per cytochrome f. It is highly active in transferring electrons from decylplastoquinol to oxidised plastocyanin .. +PF08042 PqqA family
Short protein clustering. This family consists of proteins belonging to the coenzyme Pyrroloquinoline quinone A (pqqA) family. PQQ is the non-covalently bounded prosthetic group of many quinoproteins catalysing reactions in the periplasm of Gram-negative bacteria. PQQ is formed by the fusion of glutamate and tyrosine and synthesis of PQQ require the proteins encoded by the pqqABCDEF operon but details of the biosynthetic pathway are unclear .. +PF08043 Actin_bind_SAA;
The repeat has the consensus sequence GDV(K/Q/R)(T/S/G)X(R/K/T) WLFETXPLD. This repeat motif is typically found in the N-terminus of the proteins, with a copy number between 2 and 28 repeats. Direct evidence for binding to and stabilising F-actin has been found in the human protein Swiss:Q702N9. The homologues in mouse and chicken localise in the adherens junction complex of the intercalated disc in cardiac muscle and in the myotendon junction of skeletal muscle. mXin may co-localise with Vinculin which is known to attach the actin to the cytoplasmic membrane . It has been shown that the amino-terminus of human xin (CMYA1) binds the EVH1 domain of Mena/VASP/EVL, and the carboxy-terminus binds the, for the filamin family unique, domain 20 of filaminC . This confirms the proposed role of xin repeat containing proteins as F-actin-binding adapter proteins.. +PF08044 Domain of unknown function (DUF1707)
This domain is found in a variety of Actinomycetales proteins. All of the proteins containing this domain are hypothetical and probably membrane bound or associated. Currently, it is unclear to the function of this domain.. +PF08045 Cell division control protein 14, SIN component
Cdc14 is a component of the septation initiation network (SIN) and is required for the localisation and activity of Sid1. Sid1 is a protein kinase that localises asymmetrically to one spindle pole body (SPB) in anaphase disappears prior to cell separation .. +PF08046 IlvGEDA operon leader peptide
Short protein clustering. This family consists of the leader peptides of ilvGEDA operon. The expression of the ilvGEDA operon of E coli K-12 is multivalently controlled by the three branched -chain amino acids. Regulation is thought to occur by attenuation of transcription in response to the changing levels of the cognate tRNAs. Transcription of this operon is usually terminated at the end of the leader (regulatory) region .. +PF08047 Histidine operon leader peptide
Short protein clustering. This family consists of the leader peptide of the histidine (his) operon. The his operon contains all the genes necessary for histidine biosynthesis. The region corresponding to the untranslated 5' end of the transcript, named the his leader region, displays the typical features of the T box transcriptional attenuation mechanism which is involved in the regulation of many amino acid biosynthetic operons .. +PF08048 Tap RepA1 leader peptide
Short protein clustering. This family consists of the RepA1 leader peptides. The frequency of replication of IncFII plasmid NR1 during the cell division cycle is regulated by the control of the synthesis of the plasmid-specific replication initiation protein (RepA1). When RepA1 is synthesised, it binds to the plasmid replication origin (ori) and effects the assembly of a replication complex composed of host proteins that mediate the replication of the plasmid . The tap gene encodes a 24-amino acids protein. The translation of tap is required for translation of repA.. +PF08049 IlvB leader peptide
Short protein clustering. This family consists of the leader peptides of the ilvB operon. This region encodes a potential leader polypeptide containing 32 amino acids, 12 of which are the regulatory amino acids valine and leucine. A model for the multivalent regulation of this operon by valyl- and leucyl-tRNA is proposed on the basis of the mutually exclusive formation of five strong stem-and-loop structures in the leader mRNA .. +PF08050 Tetracycline resistance leader peptide
Short protein clustering. This family consists of the tetracycline resistance leader peptide. The presence of 3 inverted repeats which can form 2 different conformations of mRNA suggests that the tetracycline resistance (TcR) region is regulated by a translational attenuation mechanism. A Rho-independent transcriptional terminator structure is present immediately after the translational stop codon of the TET protein .. +PF08051 Erythromycin resistance leader peptide
Short protein clustering. This family consists of erythromycin resistance gene leader peptides. These leader peptides are involved in the translational attenuation of erythromycin resistance genes. Interestingly, the consensus sequence of peptides conferring erythromycin resistance is similar to that of the leader peptides, thus indicating that a similar type of interaction between the nascent peptide and antibiotics can occur in both cases . This family also includes a small number of regions from within larger proteins from actinomycetes.. +PF08052 PyrBI operon leader peptide
Short protein clustering. This family consists of the pyrBI operon leader peptides. The expression of the pyrBI operon, which encodes the subunits of the pyrimidine biosynthetic enzyme aspartate transcarbamylase. is regulated primarily through a UTP-sensitive transcriptional attenuation control mechanism. In this mechanism, the concentration of UTP determines the extent of coupling between transcription and translation within the pyrBI leader region, hence determining the level of rho-independent transcriptional termination at an attenuator preceding the pyrB gene .. +PF08053 Tryptophanese operon leader peptide
Short protein clustering. This family consists of the tryptophanese (tna) operon leader peptide. Tna catalyses the degradation of L-tryptophan to indole, pyruvate and ammonia, enabling the bacteria to utilise tryptophan as a source of carbon, nitrogen and energy. The tna operon of E. coli contains two major structural genes, tnaA and tnaB. Preceding tnaA in the tna operon is a 319 -nucleotide transcribed regulatory region that contains the coding region for a 24-residue leader peptide, TnaC. The RNA sequence in the vicinity of the tnaC stop codon is rich in Cytidylate residues which is required for efficient Rho -dependent termination in the leader region of the tna operon .. +PF08054 Leucine operon leader peptide
Short protein clustering. This family consists of the leucine operon leader peptide. The leucine operon is involved in the control of the biosynthesis of leucine. Four adjacent leucine codons within the leucine leader RNA are critically important in transcription attenuation-mediated control of leucine operon expression in bacteria. The leader RNA contains translational start and stop signals, a cluster of four leucine codons and overlapping regions of dyad symmetry that are capable of forming stem-and-loop structures .. +PF08055 Tryptophan leader peptide
Short protein clustering. This family consists of the tryptophan (trp) leader peptides. Tryptophan accumulation is the principal event resulting in downregulation of transcription of the structural genes of the trp operon. The leader peptide of the trp operon forms mutually exclusive secondary structures that would either result in the termination of transcription of the trp operon when tryptophan is in plentiful supply or vice versa .. +PF08056 Tryptophan operon leader peptide
Short protein clustering. This family consists of the tryptophan operon leader peptides. The tryptophan operon is regulated by transcription attenuation in response to changes in the level of tryptophan. The transcript of the leader peptide can adopt alternative mutually-exclusive secondary structures that would either result in termination of transcription of the tryptophan structural genes or in transcription of the entire operon .. +PF08057 Erythromycin resistance leader peptide
Short protein clustering. This family consists of erythromycin resistance gene leader peptides. These leader peptides are involved in the transcriptional attenuation control of the synthesis of the macrolide-lincosamide -streptogramin B resistance protein. It acts as a transcriptional attenuator, in contrast to other inducible erm genes. The mRNA leader sequence can fold in either of two mutually exclusive conformations, one of which is postulated to form in the absence of induction, and to contain two rho factor-independent terminators. .. +PF08058 Nuclear pore complex component
Wood V, Mistry J, Novatchkova M. Proteins containing this domain are components of the nuclear pore complex . One member of this family is Nucleoporin POM34 (Swiss: Q12445) which is thought to have a role in anchoring peripheral Nups into the pore and mediating pore formation .. +PF08059 SEP domain
Pfam-B_1894 (release 16.0). The SEP domain is named after Saccharomyces cerevisiae Shp1, Drosophila melanogaster eyes closed gene (eyc), and vertebrate p47. In p47, the SEP domain has been shown to bind to and inhibit the cysteine protease cathepsin L . Most SEP domains are succeeded closely by a UBX domain .. +PF08060 NOSIC (NUC001) domain
This is the central domain in Nop56/SIK1-like proteins .. +PF08061 P68HR (NUC004) repeat
This short region is found in two copies in p68-like RNA helicases .. +PF08062 P120R (NUC006) repeat
This characteristic repeat of proliferating cell nuclear antigen P120 is found in three copies .. +PF08063 PADR1 (NUC008) domain
This domain is found in poly(ADP-ribose)-synthetases . The function of this domain is unknown.. +PF08064 UME (NUC010) domain
This domain is characteristic of UVSB PI-3 kinase, MEI-41 and ESR1 .. +PF08065 K167R (NUC007) repeat
This family represents the K167/Chmadrin repeat . The function of this repeat is unknown.. +PF08066 PMC2NT (NUC016) domain
This domain is found at the N-terminus of 3'-5' exonucleases with HRDC domains, and also in putative exosome components .. +PF08067 ROKNT (NUC014) domain
This presumed domain is found at the N-terminus of RNP K-like proteins that also contains KH domains Pfam:PF00013 .. +PF08068 DKCLD (NUC011) domain
This is a TruB_N/PUA domain associated N-terminal domain of Dyskerin-like proteins .. +PF08069 Ribosomal S13/S15 N-terminal domain
This domain is found at the N-terminus of ribosomal S13 and S15 proteins.\. This domain is also identified as NUC021 .. +PF08070 DTHCT (NUC029) region
The DTCHT region is the C-terminal part of DNA gyrases B / topoisomerase IV / HATPase proteins . This region is composed of quite low complexity sequence.. +PF08071 RS4NT (NUC023) domain
This is the N-terminal domain of Ribosomal S4 / S4e proteins. This domain is associated with S4 and KOW domains .. +PF08072 BDHCT (NUC031) domain
This is a C-terminal domain in Bloom's syndrome DEAD helicase subfamily .. +PF08073 CHDNT (NUC034) domain
The CHDNT domain is found in PHD/RING finger and chromo domain-associated helicases .. +PF08074 CHDCT2 (NUC038) domain
The CHDCT2 C-terminal domain is found in PHD/RING finger and chromo domain-associated CHD-like helicases .. +PF08075 NOPS (NUC059) domain
This domain is found at the C-terminus of NONA and PSP1 proteins adjacent to 1 or 2 Pfam:PF00076 domains .. +PF08076 Tetracycline resistance determinant leader peptide
Short protein clustering. This family consists of the tetracycline resistance determinant tet(M) leader peptides. A short open reading frame corresponding to a 28 amino acid peptide which contain a number of inverted repeat sequences was found immediately upstream of the tet(M). Transcriptional analyses has found that expression of tet(M) resulted from an extension of a small transcript representing the upstream leader region into the resistance determinant. Thus this leader sequence is responsible for transcriptional attenuation and thus regulation of the transcription of tet(M) .. +PF08077 Chloramphenicol resistance gene leader peptide
Short protein clustering. This family consists of chloramphenicol (Cm) resistance gene leader peptides. Inducible resistance to Cm in both Gram positive and Gram negative bacteria is controlled by translation attenuation. In translation attenuation, the ribosome-binding-site (RBS) for the resistance determinant is sequestered in a secondary structure domain within the mRNA. Preceding the secondary structure is a short, translated ORF termed the leader. Ribosome stalling in the leader causes the destabilization of the downstream secondary structure, allowing initiation of translation of the Cm resistance gene .. +PF08078 PsaX family
Short protein clustering. This family consists of the PsaX family of photosystem I (PSI) protein subunits.\. PSI is a large multi-subunit pigment protein complex embedded in the thylakoid membranes of green plants and cyanobacteria. PsaX is one of the 12 protein subunits found in PSI and these subunits are arranged as monomers or trimers within the membrane as shown by the structure of the trimeric complex from Synechococcus elongatus .. +PF08079 Ribosomal L30 N-terminal domain
This presumed domain is found at the N-terminus of Ribosomal L30 proteins and has been termed RL30NT or NUC018 .. +PF08080 RNPHF zinc finger
This domain is a putative zinc-binding domain (CHHC motif) in RNP H and F. The domain is often associated with Pfam:PF00076.. +PF08081 RBM1CTR (NUC064) family
This C-terminal region is found in RBM1-like RNA binding hnRNPs .. +PF08082 PRO8NT (NUC069), PrP8 N-terminal domain
The PRO8NT domain is found at the N-terminus of pre-mRNA splicing factors of PRO8 family . The NLS or nuclear localisation signal for these spliceosome proteins begins at the start and runs for 60 residues. N-terminal to this domain is a highly variable proline-rich region .. +PF08083 PROCN (NUC071) domain
The PROCN domain is the central domain in pre-mRNA splicing factors of PRO8 family .. +PF08084 PROCT (NUC072) domain
The PROCT domain is the C-terminal domain in pre-mRNA splicing factors of PRO8 family .. +PF08085 Entericidin EcnA/B family
Short protein clustering. This family consists of the entericidin antidote/toxin peptides. The entericidin locus is activated in stationary phase under high osmolarity conditions by rho-S and simultaneously repressed by the osmoregulatory EnvZ/OmpR signal transduction pathway. The entericidin locus encodes tandem paralogous genes (ecnAB) and directs the synthesis of two small cell-envelope lipoproteins which can maintain plasmids in bacterial population by means of post-segregational killing .. +PF08086 Ergtoxin family
Short protein clustering. This family consists of ergtoxin peptides which are toxins secreted by the scorpions.\. The ergtoxins are capable of blocking the function of K+ channels. More than 100 ergtoxins have been found from scorpion venoms and they have been classified into three subfamilies according to their primary structures .. +PF08087 Conotoxin O-superfamily
Short protein clustering. This family consists of members of the conotoxin O-superfamily. The O-superfamily of conotoxins consists of 3 groups of Conus peptides that belong to the same structural group. These 3 groups differ in their pharmacological properties: the w-conotoxins which inhibit calcium channels, the delta-conotoxins which slow down the inactivation rate of voltage -sensitive sodium channels and the muO-conotoxins block the voltage sensitive sodium currents .. +PF08088 Conotoxin I-superfamily
Short protein clustering. This family consists of the I-superfamily of conotoxins. This is a new class of peptides in the venom of some Conus species. These toxins are characterised by four disulfide bridges and inhibit of modify ion channels of nerve cells. The I-superfamily conotoxins is found in five or six major clades of cone snails and could possible be found in many more species .. +PF08089 Huwentoxin-II family
Short protein clustering. This family consists of the huwentoxin-II (HWTX-II) family of toxins secreted by spiders. These toxins are found in venom that secreted from the bird spider Selenocosmia huwena Wang. The HWTX-II adopts a novel scaffold different from the ICK motif that is found in other huwentoxins. HWTX-II consists of 37 amino acids residues including six cysteines involved in three disulfide bridges .. +PF08090 Enterotoxin_ST;
Heat stable E.coli enterotoxin 1. Short protein clustering. Heat-stable toxin 1 of entero-aggregative E.coli (EAST1) is a small toxin. It is not, however, solely associated with entero-aggregative E.coli but also with many other diarrhoaeic E. coli families. Some studies have established the role of EAST1 in some human outbreaks of diarrhoea. Isolates from farm animals have been shown to carry the astA gene coding for EAST1. However, the relation between the presence of EAST1 and disease is not conclusive .. +PF08091 Spider insecticidal peptide
Short protein clustering. This family consists of insecticidal peptides isolated from venom of spiders of Aptostichus schlingeri and Calisoga sp. Nine insecticidal peptides were isolated from the venom of the Aptostichus schlingeri spider and seven of these toxins cause flaccid paralysis to insect larvae within 10 min of injection. However, all nine peptides were lethal within 24 hours .. +PF08092 Magi peptide toxin family
Short protein clustering. This family consists of Magi peptide toxins (Magi 1, 2 and 5) isolated from the venom of Hexathelidae spider. These insecticidal peptide toxins bind to sodium channels and induce flaccid paralysis when injected into lepidopteran larvae. However, these peptides are not toxic to mice when injected intracranially at 20 pmol/g .. +PF08093 Magi 5 toxic peptide family
Short protein clustering. This family consists of toxic peptides (Magi 5) found in the venom of the Hexathelidae spider. Magi 5 is the first spider toxin with binding affinity to site 4 of a mammalian sodium channel and the toxin has an insecticidal effect on larvae, causing paralysis when injected into the larvae .. +PF08094 Conotoxin TVIIA/GS family
Short protein clustering. This family consists of conotoxins isolated from the venom of cone snail Conus tulipa and Conus geographus. Conotoxin TVIIA, isolated from Conus tulipa displays little sequence homology with other well-characterised pharmacological classes of peptides, but displays similarity with conotoxin GS, a peptide from Conus geographus. Both these peptides block skeletal muscle sodium channels and also share several biochemical features and represent a distinct subgroup of the four-loop conotoxins .. +PF08095 Hefutoxin family
Short protein clustering. This family consists of the hefutoxins that are found in the venom of the scorpion Heterometrus fulvipes. These toxins, kappa-hefutoxin1 and kappa-hefutoxin2, exhibit no homology to any known toxins. The hefutoxins are potassium channel toxins .. +PF08096 Bombolitin family
Short protein clustering. This family consists of the bombolitin peptides that are found in the venom of the bumblebee Megabombus pennsylvanicus. Bombolitins are structurally and functionally very similar. They lyse erythrocytes and liposomes, release histamine from rat peritoneal mast cells, and stimulate phospholipase A2 from different sources .. +PF08097 Conotoxin T-superfamily
Short protein clustering. This family consists of the T-superfamily of conotoxins. Eight different T-superfamily peptides from five Conus species were identified. These peptides share a consensus signal sequence, and a conserved arrangement of cysteine residues. T-superfamily peptides were found expressed in venom ducts of all major feeding types of Conus, suggesting that the T-superfamily is a large and diverse group of peptides, widely distributed in the 500 different Conus species .. +PF08098 Anemonia sulcata toxin III family
Short protein clustering. This family consists of the Anemonia sulcata toxin III (ATX III) neurotoxin family. ATX III is a neurotoxin that is produced by sea anemone; it adopts a compact structure containing four reverse turns and two other chain reversals, but no regular alpha-helix or beta-sheet. A hydrophobic patch found on the surface of the peptide may constitute part of the sodium channel binding surface .. +PF08099 Scorpion calcine family
Short protein clustering. This family consists of the calcine family of scorpion toxins. The calcine family consists of Maurocalcine and Imperatoxin. These toxins have been shown to be potent effector of ryanodyne-sensitive calcium channel from skeletal muscles. These toxins are thus useful for dihydropyridine receptor/ryanodyne receptor interaction studies [1,2].. +PF08100 Dimerisation domain
Pfam-B_455 (Release 16.0). This domain is found at the N-terminus of a variety of plant O-methyltransferases. It has been shown to mediate dimerisation of these proteins .. +PF08101 Domain of unknown function (DUF1708)
Mistry J, Wood V, Novatchkova M. This is a yeast domain of unknown function.. +PF08102 Scorpion antimicrobial peptide
Short protein clustering. This family consists of antimicrobial peptides secreted by scorpions. Novel antimicrobial peptides have been isolated from scorpions, namely the opistoporin and the pandinin . These peptides form essentially helical structures and demonstrate high antimicrobial activity against Gram-negative and Gram-positive bacteria respectively.. +PF08103 Uperin family
Short protein clustering. This family consists of the uperin family of antimicrobial peptides. Uperin is a wide-spectrum antibiotic peptide isolated from the Australian toadlet, Uperoleia mjobergii. Being only 17 amino acid residues long, it is smaller than most other wide-spectrum antibiotic peptides isolated from amphibians. Uperin adopts a well-defined amphipathic alpha-helix with distinct hydrophilic and hydrophobic faces .. +PF08104 Ponericin L family
Short protein clustering. This family consists of the ponericin L family of antimicrobial peptides that are isolated from the venom of the predatory ant Pachycondyla goeldii. Ponericin L family shares similarities with dermaseptins. Ponericin L may adopt an amphipathic alpha-helical structure in polar environments and these peptides exhibit a defensive role against microbial pathogens arising from prey introduction and/or ingestion .. +PF08105 Metchnikowin family
Short protein clustering. This family consists of the metchnikowin family of antimicrobial peptides from Drosophila.\. metchnikowin is a proline-rich peptide whose expression is immune-inducible. Induction of the metchnikowin gene expression can be mediated either by the TOLL pathway or by the imd gene product. The metchnikowin peptide is unique among the Drosophila antimicrobial peptides in that it is active against both bacteria and fungi .. +PF08106 Formaecin family
Short protein clustering. This family consists of the formaecin family of antimicrobial peptides isolated from the bulldog ant Myrmecia gulosa in response to bacterial infection. Formaecins are inducible peptide antibiotics and are active against growing Escherichia coli but were inactive against other Gram-negative and Gram-positive bacteria. Formaecin peptides are 16 amino acids long, are rich in proline and have N-acetylgalactosamine O-linked to a conserved threonine .. +PF08107 Pleurocidin family
Short protein clustering. This family consists of the pleurocidin family of antimicrobial peptides. Pleurocidins are found in the skin mucous secretions of the winter flounder (Pleuronectes americanus) and these peptides exhibit antimicrobial activity against Escherichia coli. Pleurocidin is predicted to assume an amphipathic alpha-helical conformation similar to other linear antimicrobial peptides and may play a role in innate host defense .. +PF08108 Halocidin family
Short protein clustering. This family consists of the halocidin family of antimicrobial peptides. Halocidins are isolated from the haemocytes of the tunicate, Halocynthia aurantium. They are dimeric in structures which are found via a disulfide linkage between cysteines of two different- sized monomers. Halocidins have been shown to have strong antimicrobial activities against a wide variety of pathogenic bacteria and could be ideal candidates as peptide antibiotics against multidrug-resistant bacteria .. +PF08109 Lactocin 705 family
Short protein clustering. This family consists of lactocin 705 which is a bacteriocin produced by Lactobacillus casei CRL 705. Lactocin 705 is a class IIb bacteriocin, whose activity depends upon the complementation of two peptides (705-alpha and 705-beta) of 33 amino acid residues each. Lactocin 705 is active against several Gram-positive bacteria, including food-borne pathogens and is a good candidate to be used for biopreservation of fermented meats .. +PF08110 Ocellatin family
Short protein clustering. This family consists of the ocellatin family of antimicrobial peptides. Ocellatins are produced from the electrical-stimulated skin secretions of the South American frog, Leptodactylus ocellatus. The family consists of three structurally related peptides, ocellatin 1, ocellatin 2 and ocellatin 3. These peptides present hemolytic activity against human erythrocytes and are also active against Escherichia coli .. +PF08111 Pea-VEAacid family
Short protein clustering. This family consists of the PEA-VEAacid neuropeptides family. These neuropeptides are isolated from the abdominal perisympathetic organs of the American cockroach. These peptides are found together with Pea-YLS-amide and Pea-SKNacid, giving a unique neuropeptide pattern in abdominal perisympathetic organs. The functions of these neuropeptides are unknown .. +PF08112 ATP synthase epsilon subunit
Short protein clustering. This family consists of epsilon subunits of the ATP synthase. The ATP synthase complex is composed of an oligomeric transmembrane sector (CF0), and a catalytic core (CF1). CF1 is composed of 5 subunits, of which the epsilon subunit functions as a potent inhibitor of ATPase activity in both soluble and bound CF1. Only when the epsilon inhibition is disabled is high ATPase activity detected in ATPase . +PF08113 Cytochrome c oxidase subunit IIa family
Short protein clustering. This family consists of the cytochrome c oxidase subunit IIa family. The bax-type cytochrome c oxidase from Thermus thermophilus is known as a two subunit enzyme. From its crystal structure, it was discovered that an additional transmembrane helix 'subunit IIa' spans the membrane. This subunit consists of 34 residues forming one helix across the membrane. The presence of this subunit seems to be important for the function of cytochrome c oxidases .. +PF08114 ATPase proteolipid family
Short protein clustering. This family consists of small proteolipids associated with the plasma membrane H+ ATPase. Two proteolipids (PMP1 and PMP2) are associated with the ATPase and both genes are similarly expressed in the wild-type strain of yeast with no modification of the level of transcription of one PMP gene is detected in a strain deleted of the other. Though both proteolipids show similarity with other small proteolipids associated with other cation -transporting ATPases, their functions remain unclear .. +PF08115 SFI toxin family
Short protein clustering. This family consists of the SFI family of spider toxins. This family of toxins might share structural, evolutionary and functional relationships with other small, highly structurally constrained spider neurotoxins. These toxins are highly selective agonists/antagonists of different voltage-dependent calcium channels and are extremely valuable reagents in the analysis of neuromuscular function .. +PF08116 PhTx neurotoxin family
Short protein clustering. This family consists of PhTx insecticidal neurotoxins that are found in the venom of Brazilian, Phoneutria nigriventer. The venom of the Phoneutria nigrivente contains numerous neurotoxic polypeptides of 30-140 amino acids which exert a range of biological effects. While some of these neurotoxins are lethal to mice after intracerebroventricular injections, others are extremely toxic to insects of the orders Diptera and Dictyoptera but had much weaker toxic effects on mice .. +PF08117 Ptu family
Short protein clustering. This family consists of toxic peptides that are isolated from the saliva of assassin bugs. The saliva contains a complex mixture of proteins that are used by the bug either to immobilise the prey or to digest it. One of the proteins (Ptu1) has been purified and shown to block reversibly the N-type calcium channels and to be less specific for the L- and P/Q- type calcium channels expressed in BHK cells .. +PF08118 Yeast mitochondrial distribution and morphology (MDM) proteins
Pfam-B_37122 (release 16.0). Proteins in this family are yeast mitochondrial inner membrane proteins MDM31 and MDM32.\. These proteins are required for the maintenance of mitochondrial morphology, and the stability of mitochondrial DNA .. +PF08119 Scorpion acidic alpha-KTx toxin family
Short protein clustering. This family consists of acidic alpha-KTx short chain scorpion toxins. These toxins named parabutoxins, block voltage-gated K channels and have extremely low pI values. Furthermore, they lack the crucial pore-plugging lysine. In addition, the second important residue of the dyad, the hydrophobic residue (Phe or Tyr) is also missing .. +PF08120 Tamulustoxin family
Short protein clustering. This family consists of the tamulustoxins which are found in the venom of the Indian red scorpion (Mesobuthus tamulus). Tamulustoxin shares no similarity with other scorpion venom toxins, although the positions of its six cysteine residues suggest that it shares the same structural scaffold. Tamulustoxin acts as a potassium channel blocker .. +PF08121 Waglerin family
Short protein clustering. This family consists of the lethal peptides (waglerins) that are found in the venom of Trimeresurus wagleri. Waglerins are 22-24 residue lethal peptides and are competitive antagonist of the muscle nicotinic receptor (nAChR). Waglerin-1 possesses a distinctive selectivity for the alpha-epsilon interface binding site of the mouse nAChR .. +PF08122 NADH-ubiquinone oxidoreductase B12 subunit family
Short protein clustering. This family consists of the NADH-ubiquinone oxidoreductase B12 subunit proteins. NADH is the central source of electrons in the mitochondrial and bacterial respiration. NADH-ubiquinone oxidoreductase is involved in the transfer of electrons from NADH to the electron transport chain. This oxidation of NADH is coupled to proton transfer across the membrane, generating a proton motive force that is utilised for the synthesis of ATP. The function of this subunit is unclear .. +PF08123 Histone methylation protein DOT1
Pfam-B_12064 (release 16.0). The DOT1 domain regulates gene expression by methylating histone H3 . H3 methylation by DOT1 has been shown to be required for the DNA damage checkpoint in yeast .. +PF08124 Polysaccharide lyase family 8, N terminal alpha-helical domain
Pfam-B_2438 (release 16.0). This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen.. +PF08125 Mannitol dehydrogenase C-terminal domain
+PF08126 Propeptide_C25
This is found at the N terminal end of some of the members of the C25 peptidase family (PF01364). Little is known about the function of this motif.. +PF08127 Peptidase family C1 propeptide
This motif is found at the N terminal of some members of the Peptidase_C1 family (Pfam:PF00112) and is involved in activation of this peptidase .. +PF08129 Alpha/beta enterocin family
Short protein clustering. This family consists of the alpha and beta enterocins and lactococcin G peptides. These peptides have some antimicrobial properties; they inhibit the growth of Enterococcus spp. and a few other gram-positive bacteria. These peptides act as pore- forming toxins that create cell membrane channels through a barrel-stave mechanism and thus produce an ionic imbalance in the cell. These family of antimicrobial peptides belong to the class II group of bacteriocin .. +PF08130 Type A lantibiotic family
Short protein clustering. This family consists of the type A lantibiotic peptides. Both Pep5 and epicidin-280 are ribosomally-synthesised antimicrobial peptides produced by Gram-positive bacteria that are characterised by the presence of lanthionine and/or methyllanthionine residues. The lantibiotics family has a highly specific activity against multi- drug resistant bacteria and has potential to be utilised in a wide range of medical applications [1,2].. +PF08131 Defensin-like peptide family
Short protein clustering. This family consists of the defensin-like peptides (DLPs) isolated from platypus venom. These DLPs show similar three-dimensional fold to that of beta-defensin-12 and sodium-channel neurotoxin Shl. However the side chains known to be functionally important to beta-defensin-12 and Shl are not conserved in DLPs. This suggests a different biological function. Consistent with this contention, DLPs have been shown to possess no anti-microbial properties and have no observable activity on rat dorsal-root-ganglion sodium-channel currents .. +PF08132 S-adenosyl-l-methionine decarboxylase leader peptide
Short protein clustering. This family consists of the S-adenosyl-l-methionine decarboxylase (AdoMetDC) leader peptides. AdoMetDC is a key regulatory enzymes in the biosynthesis of polyamines. All expressed plant AdoMetDC mRNA 5' leader sequences contain a highly conserved pair of overlapping upstream ORFs (uORFs) that overlap by one base. Sequences of the small uORFs are highly conserved between monocot, dicot and gymnosperm AdoMetDC mRNA species, suggesting a translational regulatory mechanism .. +PF08133 Anticodon nuclease activator family
Short protein clustering. This family consists of the anticodon nuclease activator proteins. Pre-existing host tRNAs are reprocessed during bacteriophage T4 infection of certain Escherichia coli strains. In this pathway, tRNA(Lys) is cleaved 5' by the anticodon nuclease to the wobble base and is later restored in polynucleotide kinase and RNA ligase reactions .. +PF08134 cIII protein family
Short protein clustering. This family consists of the cIII family of regulatory proteins. The lambda CIII protein has 54 amino acids and it forms an amphipathic helix within its amino acid sequence. Lambda cIII stabilises the lambda cII protein and the host sigma factor 32, responsible for transcribing genes of the heat shock regulon . . +PF08135 Major transforming protein E5 family
Short protein clustering. This family consists of the major transforming proteins (E5) of the bovine papilloma virus (BPV). The equine sarcoid is one of the most common dermatological lesion in equids. It is a benign, locally invasive dermal fibroblastic lesion and studies have shown an association of the lesions with BPV. E5 is a short hydrophobic membrane protein localising to the Golgi apparatus and other intracellular membranes. It binds to and constitutively activates the platelet-derived growth factor-beta in transformed cells. This stimulation activates a receptor signaling cascade which results in an intracellular growth stimulatory signal .. +PF08136 30S ribosomal protein subunit S22 family
Short protein clustering. This family consists of the 30S ribosomal proteins subunit S22 polypeptides. This polypeptide is 47 amino acids in length and has a molecular weight of about 5 kDa. The S22 subunit is a component of the stationary-phase-specific ribosomal protein and is assembled in the ribosomal particles in the stationary phase. This subunit along with other stationary-phase-specific ribosomal proteins result in compositional changes of ribosomes during the stationary phase. The significance of this change is not clear as yet .. +PF08137 DVL family
Short protein clustering. This family consists of the DVL family of proteins. In a gain-of-function genetic screen for genes that influence fruit development in Arabidopsis, DEVIL (DVL) gene was identified. DVL is a small protein and overexpression of the protein results in pleiotropic phenotypes featured by shortened stature, rounder rosette leaves, clustered inflorescences, shortened pedicles, and siliques with pronged tips. DVL family is a novel class of small polypeptides and the overexpression phenotypes suggest that these polypeptides may have a role in plant development .. +PF08138 Sex peptide (SP) family
Short protein clustering. This family consists of Sex Peptides (SP) that are found in Drosophila. On mating, Drosophila females decreases her remating rate and increases her egg-laying rate due, in part, to the transfer of SP from the male to the female. SP are found in seminal fluids transferred from the male to the female during mating. The male seminal fluid proteins are referred to as accessory gland proteins (Acps). The SP is one of the most interesting Acps and plays an important role in reproduction .. +PF08139 VirB;
Prokaryotic membrane lipoprotein lipid attachment site. Short protein clustering. In prokaryotes, membrane lipoproteins are synthesized with a precursor signal peptide, which is cleaved by a specific lipoprotein signal peptidase (signal peptidase II). The peptidase recognizes a conserved sequence and cuts upstream of a cysteine residue to which a glyceride-fatty acid lipid is attached .. +PF08140 Crustacean cuticle protein repeat
Short protein clustering. This family consists of the cuticle proteins from the Cancer pagurus and the Homarus americanus. These proteins are isolated from the calcified regions of the crustacean and they contain two copies of an 18 residue sequence motif, which thus far has been found only in crustacean calcified exoskeletons .. +PF08141 Small acid-soluble spore protein H family
Short protein clustering. This family consists of the small acid-soluble spore proteins (SASP) of the H type (sspH). SspH are unique to spores of Bacillus subtilis and are expressed only in the forespore compartment during sporulation of this organism. The sspH genes are monocistronic and are recognised by the forespore-specific sigma factor for RNA polymerase - sigma-G. The specific role of this protein is unclear but is thought to play a role in sporulation under conditions different from that of the common laboratory tests of spore properties .. +PF08142 AARP2CN (NUC121) domain
This domain is the central domain of AARP2. It is weakly similar to the GTP-binding domain of elongation factor TU .. +PF08143 CBFNT (NUC161) domain
This N terminal domain is found in proteins of CARG-binding factor A-like proteins .. +PF08144 CPL (NUC119) domain
This C terminal domain is fund in Penguin-like proteins associated with Pumilio like repeats .. +PF08145 BOP1NT (NUC169) domain
This N terminal domain is found in BOP1-like WD40 proteins .. +PF08146 BP28CT (NUC211) domain
This C terminal domain is found in BAP28-like nucleolar proteins .. +PF08147 DBP10CT (NUC160) domain
This C terminal domain is found in the Dbp10p subfamily of hypothetical RNA helicases . . +PF08148 DSHCT (NUC185) domain
This C terminal domain is found in DOB1/SK12/helY-like DEAD box helicases .. +PF08149 BING4CT (NUC141) domain
This C terminal domain is found in the BING4 family of nucleolar WD40 repeat proteins .. +PF08150 FerB (NUC096) domain
This is central domain B in proteins of the Ferlin family .. +PF08151 FerI (NUC094) domain
This domain is present in proteins of the Ferlin family. It is often located between two C2 domains .. +PF08152 GUCT (NUC152) domain
This is the C terminal domain found in the RNA helicase II / Gu protein family .. +PF08153 NGP1NT (NUC091) domain
This N terminal domain is found in a subfamily of hypothetical nucleolar GTP-binding proteins similar to human NGP1 .. +PF08154 NLE (NUC135) domain
This domain is located N terminal to WD40 repeats. It is found in the microtubule-associated protein Swiss:Q12024 . . +PF08155 NOGCT (NUC087) domain
This C terminal domain is found in the NOG subfamily of nucleolar GTP-binding proteins .. +PF08156 NOP5NT (NUC127) domain
This N terminal domain is found in RNA-binding proteins of the NOP5 family .. +PF08157 NUC129 domain
This C terminal domain is found in a novel family of hypothetical nucleolar proteins .. +PF08158 NUC130/3NT domain
This N terminal domain is found in a novel nucleolar protein family .. +PF08159 NUC153 domain
This small domain is found in a a novel nucleolar family . . +PF08161 NUC173 domain
This is the central domain of of novel family of hypothetical nucleolar proteins .. +PF08163 NUC194 domain
This is domain B in the catalytic subunit of DNA-dependent protein kinases.. +PF08164 Apoptosis-antagonizing transcription factor, C-terminal
This C terminal domain is found in traube proteins . This is the domain of the AATF proteins that interacts with BLOS2 or Ceap, that functions as an adaptor in processes such as protein and vesicle processing and transport, and perhaps transcription.. +PF08165 FerA (NUC095) domain
This is central domain A in proteins of the Ferlin family .. +PF08166 NUC202 domain
This domain is found in a novel family of nucleolar proteins .. +PF08167 NUC201;
rRNA processing/ribosome biogenesis. Rix1 is a nucleoplasmic particle involved in rRNA processing/ribosome assembly [1,2]. It associates with two other proteins, Ipi1 and Ipi3, to form the RIX1 complex that allows Rea1 - the AAA ATPase - to associate with the 60S ribosomal subunit. More than 170 assembly factors are involved in the construction and maturation of yeast ribosomes, and after these factors have completed their function they need to be released from the pre-ribosomes. Rea1 induces the release of the assembly protein complex in a mechanical fashion . This family is usually associated with NUC202, Pfam:PF08166.. +PF08168 NUC205 domain
This domain is found in a novel family of nucleolar proteins .. +PF08169 RBB1NT (NUC162) domain
This domain is found N terminal to the ARID/BRIGHT domain in DNA-binding proteins of the Retinoblastoma-binding protein 1 family .. +PF08170 POPLD (NUC188) domain
This domain is found in POP1-like nucleolar proteins .. +PF08171 Mad3_like;
Mad3/BUB1 homology region 2. Pfam-B_113144 (release 16.0). This domain is found in checkpoint proteins which are involved in cell division. This region has been shown to be necessary and sufficient for the binding of MAD3 to BUB3 in Saccharomyces cerevisiae. This domain is present in BUB1 which also binds BUB3 .. +PF08172 CASP C terminal
Pfam-B_7701 (release 16.0). This domain is the C-terminal region of the CASP family of proteins. It is a Golgi membrane protein which is thought to have a role in vesicle transport .. +PF08173 Membrane bound YbgT-like protein
Short protein clustering. This family contains a set of membrane proteins, typically 33 amino acids long. The family has no known function, but the protein is found in the operon CydAB in E. coli. Members have a consensus motif (MWYFXW) which is rich in aromatic residues. The protein forms a single membrane-spanning helix. This family seems to be restricted to Proteobacteria .. +PF08174 DUF1709;
Cell division protein anillin. Pfam-B_55293 (release 16.0). Anillin is a protein involved in septin organisation during cell division.\. It is an actin binding protein that is localised to the cleavage furrow, and it maintains the localisation of active myosin, which ensures the spatial control of concerted contraction during cytokinesis .. +PF08175 Small acid-soluble spore protein O family
Short protein clustering. This family consists of the small acid-soluble spore proteins (SASP) O type (sspO). SspO (originally cotK) are unique to the spores of Bacillus subtilis and are expressed only in the forespore compartment of sporulating cells of this organism. The sspO is the first gene in a likely operon with sspP and transcription of this gene is primarily by RNA polymerase with the forespore-specific sigma factor, sigma-G. Mutation deleting sspO causes the loss of the SspO from the forespore but had no discernible effect on sporulation, spore properties or spore germination .. +PF08176 Small acid-soluble spore protein K family
Short protein clustering. This family consists of the small acid-soluble spore proteins (SASP) belonging to the K type (sspK). The sspK are unique to the spores of Bacillus subtilis and are expressed only in the forespore compartment of sporulating cells of this organism. The sspK gene is monocistronic and transcription is primarily by the RNA polymerase with the forespore-specific sigma factor, sigma-G. Mutation deleting sspK results in loss of SspK from the spore but had no discernible effect on sporulation, spore properties or spore germination .. +PF08177 Small acid-soluble spore protein N family
Short protein clustering. This family consists of the small acid-soluble spore protein (SASP) N type (sspN). SspN is a 48 residues protein that is expressed only in the forespore compartment of sporulating Bacillus subtilis. The sspN gene is recognised equally by both sigma-G and sigma-F. The role of SspN is still not well-defined .. +PF08178 GnsA/GnsB family
Short protein clustering. This family consists of the GnsA/GnsB family. GnsA and GnsB are multicopy suppressors of the secG null mutation. These proteins participate in the synthesis of phospholipids, suggesting the functional relationship between SecG and membrane phospholipids. Overexpression of gnsA and gnsB causes a remarkable increase in the unsaturated fatty acid content. However, the gnsA-gnsB double null mutant exhibits no effect. Both proteins are predicted to possess a helix-turn-helix structure .. +PF08179 Small acid-soluble spore protein P family
Short protein clustering. This family consists of the small acid-soluble spore proteins (SASP) P type (sspP). sspP is expressed only in the forespore compartment of the sporulating cell. sspP is also expressed under sigma-G control from the same promoter as sspO. Mutations deleting sspP causes no discernible effect on sporulation, spore properties or spore germination .. +PF08180 B melanoma antigen family
Short protein clustering. This family consists of the B melanoma antigen (BAGE) peptides. The BAGE gene encodes a human tumour antigen that is recognised by a cytolytic T lymphocyte. BAGE genes are expressed in melanomas, bladder and lung carcinomas and in a few tumours of other histological types .. +PF08181 DegQ (SacQ) family
Short protein clustering. This family consists of the DegQ (formerly sacQ) regulatory peptides. The DegQ family of peptides control the rates of synthesis of a class of both secreted and intracellular degradative enzymes in Bacillus subtilis. DegQ is 46 amino acids long and activates the synthesis of degradative enzymes. The expression of this peptide was shown to be subjected both to catabolite repression and DegS-DegU-mediated control. Thus allowing an increase in the rate of synthesis of degQ under conditions of nitrogen starvation .. +PF08182 Pedibin/Hym-346 family
Short protein clustering. This family consists of the pedibin and Hym-346 signaling peptides. These two peptides have been isolated from Hydra vulgaris and Hydra magnipapillata. Experiments have indicated that both cause a reduction in the positional value gradient, the principle patterning process governing the maintenance of form in the adult hydra. The peptides cause an increase in the rate of foot regeneration following bisection of the body column. Thus both play important signaling roles in patterning processes in cnidaria and maybe in more complex metazoans .. +PF08183 Stage V sporulation protein family
Short protein clustering. This family consists of the stage V sporulation (SpoV) proteins of Bacillus subtilis which includes SpoVM. SpoVM is an small, 26 residue-long protein that is produced in the mother cell chamber of the sporangium during the process of sporulation in B. subtilis. SpoVM forms an amphipathic alpha-helix and is recruited to the polar septum shortly after the sporangium undergoes asymmetric division. The function of SpoVM depends on proper subcellular localisation .. +PF08184 Cuticle protein 7 isoform family
Short protein clustering. This family consists of cuticle protein 7 isoforms that are isolated from the carapace cuticle of a juvenile horseshoe crab, Limulus polyphemus. There are 3 isoforms of cuticle protein 7. The 3 isoforms are N-terminally blocked but could be deblocked by treatment with pyroglutaminase, showing that the N-terminal residue is a pyroglutamine residue . . +PF08186 Wound-inducible basic protein family
Short protein clustering. This family consists of the wound-inducible basic proteins from plants. The metabolic activities of plants are dramatically altered upon mechanical injury or pathogen attack. A large number of proteins accumulates at wound or infection sites, such as the wound-inducible basic proteins. These proteins are small, 47 amino acids in length, has no signal peptides and are hydrophilic and basic .. +PF08187 Myoactive tetradecapeptides family
Short protein clustering. This family consists of myoactive tetradecapeptides that are isolated from the gut of earthworms, Eisenia foetida and Pheretima vitata. These peptides were termed ETP and PTP respectively. Both peptides showed a potent excitatory action on spontaneous contractions of the anterior gut. These peptides show similarity to Molluscan tetradecapeptides and arthropodan tridecapeptides .. +PF08188 Spermatozal protamine family
Short protein clustering. This family consists of the spermatozal protamines. Spermatozal protamines play an important role in remodelling of the sperm chromatin during mammalian spermiogenesis. Nuclear elongation and chromatin condensation are concomitant with modifications in the basic protein complement associated with DNA. Somatic histones are initially replaced by testis -specific histone variants, then by transitional proteins, and ultimately by protamines .. +PF08189 Meleagrin/Cygnin family
Short protein clustering. This family consists of meleagrin and cygnin basic peptides that are isolated from turkey and black swan respectively. Both peptides are low in molecular weight and contains three disulphide bonds with high concentrations of aromatic residues. These peptides show similarity to transferrins and probably play some vital role in avian eggs but the exact function is still unknown .. +PF08190 Nop17p;
pre-RNA processing PIH1/Nop17. Pfam-B_10462 (release 16.0). This domain is involved in pre-rRNA processing . It has has been shown to be required either for nucleolar retention or correct assembly of the box C/D snoRNP in Saccharomyces cerevisiae . The C-terminal region of this family has similarity to the CS domain Pfam:PF04969.. +PF08191 LRR adjacent
Mistry J, Schubert WD. Pfam-B_1177 (release 16.0). These are small, all beta strand domains, structurally described for the protein Internalin (InlA) and related proteins InlB, InlE, InlH from the pathogenic bacterium Listeria monocytogenes. Their function appears to be mainly structural: They are fused to the C-terminal end of leucine-rich repeats (LRR), significantly stabilising the LRR, and forming a common rigid entity with the LRR. They are themselves not involved in protein-protein-interactions but help to present the adjacent LRR-domain for this purpose. These domains belong to the family of Ig-like domains in that they consist of two sandwiched beta sheets that follow the classical connectivity of Ig-domains. The beta strands in one of the sheets is, however, much smaller than in most standard Ig-like domains, making it somewhat of an outlier .. +PF08192 Peptidase family S64
Mistry J, Rawlings N. This family of fungal proteins is involved in the processing of membrane bound transcription factor Stp1 . The processing causes the signalling domain of Stp1 to be passed to the nucleus where several permease genes are induced. The permeases are important for uptake of amino acids, and processing of tp1 only occurs in an amino acid-rich environment. This family is predicted to be distantly related to the trypsin family (MEROPS:S1) and to have a typical trypsin-like catalytic triad .. +PF08193 DUF1711;
INO80 complex subunit Ies4. The INO80 ATPase is a member of the SNF2 family of ATPases and functions as an integral component of a multisubunit ATP-dependent chromatin remodelling complex. This family of proteins corresponds to the fungal Ies4 subunit of INO80.. +PF08194 DIM protein
Short protein clustering. Drosophila immune-induced molecules (DIMs) are short proteins induced during the immune response of Drosophila. This family includes DIMs 1 to 4 that have masses below 5 kDa .. +PF08195 TRI9 protein
Short protein clustering. Putative gene of 129 bp in the Trichothecene gene cluster of Fusarium sporotrichioides and F. graminearum. Encoding a predicted protein of 43 amino acids which function is unknown [1,2].. +PF08196 UL2 protein
Short protein clustering. Orf UL2 of Human cytomegalovirus (HCMV) which is a short protein of unknown function . +PF08197 pORF2a truncated protein
Short protein clustering. Most isolated ORF2 of TT virus (TTV) encode a 49 amino acids protein (pORF2a) because of an in-frame stop codon. ORF2s isolated from G1 TTV encode 202 amino acids protein (pORF2ab) . . +PF08198 Thymopoietin protein
Short protein clustering. Short protein of 49 amino acid isolated from bovine spleen cells . Thymopoietins (TMPOs) are a group of ubiquitously expressed nuclear proteins. They are suggested to play an important role in nuclear envelope organisation and cell cycle control .. +PF08199 Bacteriophage E2-like protein
Short protein clustering. Short conseved protein described in Lactococcus Bacteriophage c2 of 37 amino acids .. +PF08200 Bacteriophage 1.1 Protein
Short protein clustering. Gene 1.1 in Bacteriophage T7 encodes a 42 amino acid protein, rich in basic amino acids suggesting its interaction with nucleic acids . Many homologs are present in different T7 and T3-like bacteriophage.. +PF08201 BssC/TutF protein
Short protein clustering. BssC short protein (57 amino acids) has been described as the gamma-subunit of benzylsuccinate synthase from Thauera aromatica strain K172 . TutF has been identified and described as highly similar to BssC in T.aromatica strain T1 . . +PF08202 Mis12_component;
Mis12-Mtw1 protein family. Pfam-B_127825 (release 16.0). Mis12-Mtw1 is a eukaryotic conserved kinetochore protein that is involved in chromosome segregation .. +PF08203 Yeast RNA polymerase I subunit RPA14
This is a family of yeast proteins. A14 is one of the final two subunits of Saccharomyces cerevisiae RNA polymerase I and is proposed to play a role in the recruitment of pol I to the promoter .. +PF08204 CD47 immunoglobulin-like domain
Pfam-B_2739 (release 7.5). This family represents the CD47 leukocyte antigen V-set like Ig domain [1,2].. +PF08205 CD80-like C2-set immunoglobulin domain
Pfam-B_280 (release 17.0). These domains belong to the immunoglobulin superfamily.. +PF08206 Ribonuclease B OB domain
Pfam-B_484 (release 17.0). This family includes the N-terminal OB domain found in ribonuclease B proteins in one or two copies.. +PF08207 Elongation factor P (EF-P) KOW-like domain
+PF08208 RNA_polI_final;
DNA-directed RNA polymerase I subunit RPA34.5. This is a family of proteins conserved from yeasts to human. Subunit A34.5 of RNA polymerase I is a non-essential subunit which is thought to help Pol I overcome topological constraints imposed on ribosomal DNA during the process of transcription .. +PF08209 Sgf11 (transcriptional regulation protein)
The Sgf11 family is a SAGA complex subunit in Saccharomyces cerevisiae. The SAGA complex is a multisubunit protein complex involved in transcriptional regulation. SAGA combines proteins involved in interactions with DNA-bound activators and TATA-binding protein (TBP), as well as enzymes for histone acetylation and deubiquitylation .. +PF08210 APOBEC-like N-terminal domain
A mechanism of generating protein diversity is mRNA editing. Members of this family are C-to-U editing enzymes. The N-terminal domain of APOBEC-1 like proteins is the catalytic domain, while the C-terminal domain is a pseudocatalyitc domain. More specifically, the catalytic domain is a zinc dependent deaminases domain and is essential for cytidine deamination.APOBEC-3 like members contain two copies of this domain. RNA editing by APOBEC-1 requires homodimerisation and this complex interacts with RNA binding proteins to from the editosome (and references therein). This family also includes the functionally homologous activation induced deaminase (AID), which is essential for the development of antibody diversity in B lymphocytes, and the sea lamprey PmCDA1 and PmCDA2, which are predicted to play an AID-like role in the adaptive immune response of jawless vertebrates . Divergent members of this family are present in various eukaryotes such as Nematostella, C. elegans, Micromonas and Emiliania, and prokaryotes such as Wolbachia and Pseudomonas brassicacearum .. +PF08211 Cytidine and deoxycytidylate deaminase zinc-binding region
Pfam-B_8221 (release 16.0). +PF08212 Lipocalin-like domain
Pfam-B_2479 (Release 17.0). Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The structure is an eight-stranded beta barrel.. +PF08213 Mitochondrial domain of unknown function (DUF1713)
This domain is found at the C terminal end of mitochondrial proteins of unknown function.. +PF08214 DUF1714; RTT109;
Histone acetylation protein. Histone acetylation is required in many cellular processes including transcription, DNA repair, and chromatin assembly. This family contains the fungal KAT11 protein (previously known as RTT109) which is required for H3K56 acetylation. Loss of KAT11 results in the loss of H3K56 acetylation, both on bulk histone and on chromatin . KAT11 and H3K56 acetylation appear to correlate with actively transcribed genes and associate with the elongating form of Pol II in yeast . This family also incorporates the p300/CBP histone acetyltransferase domain which has different catalytic properties and cofactor regulation to KAT11 .. +PF08216 DUF1716;
Catenin-beta-like, Arm-motif containing nuclear. Pfam-B_13045 (release 16.0). CTNNBL is a family of eukaryotic nuclear proteins of the catenin-beta-like 1 type that contain an armadillo motif. A human nuclear protein with this domain (Swiss:Q8WYA6) is thought to have a role in apoptosis . The interaction of CTNNBL1 with its known partners (the Prp19-CDC5L complex and AID) is mediated by recognition of NLS (nuclear localisation signal) motifs. The RNA-splicing factor Prp31 is also an interactor, with recognition also occurring through the NLS. CTNNBL1 uses its central armadillo (ARM) domain to bind NLS-containing partners [2,3].. +PF08217 Fungal domain of unknown function (DUF1712)
The function of this family of proteins is unknown.. +PF08218 Citrate lyase ligase C-terminal domain
Pfam-B_3588 (release 16.0). This family is composed of the C-terminal domain of citrate lyase ligase EC:6.2.1.22.. +PF08219 Outer membrane protein TOM13
The TOM13 family of proteins are mitochondrial outer membrane proteins that mediate the assembly of beta-barrel proteins .. +PF08220 DeoR-like helix-turn-helix domain
+PF08221 RNA polymerase III subunit RPC82 helix-turn-helix domain
Pfam-B_9884 (release 8.0). This family consists of several DNA-directed RNA polymerase III polypeptides which are related to the Saccharomyces cerevisiae RPC82 protein. RNA polymerase C (III) promotes the transcription of tRNA and 5S RNA genes. In Saccharomyces cerevisiae, the enzyme is composed of 15 subunits, ranging from 160 to about 10 kDa . This region is a probably DNA-binding helix-turn-helix.. +PF08222 CodY helix-turn-helix domain
Pfam-B_7573 (release 9.0). This family consists of the C-terminal helix-turn-helix domain found in several bacterial GTP-sensing transcriptional pleiotropic repressor CodY proteins. CodY has been found to repress the dipeptide transport operon (dpp) of Bacillus subtilis in nutrient-rich conditions . The CodY protein also has a repressor effect on many genes in Lactococcus lactis during growth in milk .. +PF08223 PaaX-like protein C-terminal domain
Pfam-B_9563 (release 14.0). This family contains proteins that are similar to the product of the paaX gene of Escherichia coli (Swiss:P76086). This protein is involved in the regulation of expression of a group of proteins known to participate in the metabolism of phenylacetic acid . . +PF08224 Domain of unknown function (DUF1719)
Mistry J, Myung-il K. Pfam-B_27966 (release 16.0). This is a domain of unknown function. It may have a role in ATPase activation.. +PF08225 Pseudin antimicrobial peptide
Short protein clustering. Pseudins are a subfamily of the FSAP family (Frog Secreted Active Peptides) extracted from the skin of the paradoxical frog Pseudis paradoxa (Pseudidae). The pseudins belong to the class of cationic, amphipathic-helical antimicrobial peptides .. +PF08226 Domain of unknown function (DUF1720)
Pfam-B_19709 (release 16.0). This domain is found in different combinations with cortical patch components EF hand, SH3 and ENTH and is therefore likely to be involved in cytoskeletal processes. This family contains many hypothetical proteins.. +PF08227 DUF1721;
DASH complex subunit Hsk3 like. The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. This family also includes several higher eukaryotic proteins. However, other DASH subunits do not appear to be conserved in higher eukaryotes.. +PF08228 RNase P subunit Pop3
This family of fungal proteins form a subunit of RNase P, the ribonucleoprotein enzyme that cleaves the leader sequence of precursor tRNAs to generate mature tRNAs.\. The structure of Pop3 has been assigned the L7Ae/L30e fold .\. This RNA-binding fold is also present in human RNase P subunit Rpp38, raising the possibility that Pop3p and Rpp38 are functional homologs.. +PF08229 ER membrane protein SH3
This family of proteins are membrane localised chaperones that are required for correct plasma membrane localisation of amino acid permeases (AAPs) . SH3 prevents AAPs proteins from aggregating and assists in their correct folding. In the absence of SH3, AAPs are retained in the ER.. +PF08230 Cpl-7 lysozyme C-terminal domain
This domain was originally found in the C-terminal moiety of the Cpl-7 lysozyme encoded by the Streptococcus pneumoniae bacteriophage Cp-7 (Swiss:P19385). It is assumed that these repeats represent cell wall binding motifs although no direct evidence has been obtained so far.. +PF08231 SYF2 splicing factor
Pfam-B_11988 (release 17.0). Proteins in this family are involved in cell cycle progression and pre-mRNA splicing .. +PF08232 Striatin family
Pfam-B_7946 (release 17.0). Striatin is an intracellular protein which has a caveolin-binding motif, a coiled-coil structure, a calmodulin-binding site, and a WD (Pfam:PF00400) repeat domain . It acts as a scaffold protein and is involved in signalling pathways .. +PF08234 Chromosome segregation protein Spc25
Pfam-B_14484 (release 16.0). This is a family of chromosome segregation proteins. It contains Spc25, which is a conserved eukaryotic kinetochore protein involved in cell division. In fungi the Spc25 protein is a subunit of the Nuf2-Ndc80 complex , and in vertebrates it forms part of the Ndc80 complex .. +PF08235 LNS2 (Lipin/Ned1/Smp2)
Pfam-B_2646 (release 16.0). This domain is found in Saccharomyces cerevisiae protein SMP2, proteins with an N-terminal lipin domain (Pfam: PF04571) and phosphatidylinositol transfer proteins . SMP2 (also known as PAH1) is involved in plasmid maintenance and respiration , and has been identified as a Mg2+-dependent phosphatidate phosphatase (EC:3.1.3.4) that contains a haloacid dehalogenase (HAD)-like domain . Lipin proteins are involved in adipose tissue development and insulin resistance .. +PF08236 SRI (Set2 Rpb1 interacting) domain
Pfam-B_106465 (release 17.0) & pdb_2a7o. The SRI (Set2 Rpb1 interacting) domain mediates RNA polymerase II interaction and couples histone H3 K36 methylation with transcript elongation . This domain is conserved from yeast to humans. Members of this family form a compact, closed three-helix bundle, with an up-down-up topology. The first and second helices are antiparallel to each other and are of similar length; the third helix, which is packed across helices alpha1 and alpha2 is slightly shorter, consisting of only 15 amino acids. Most conserved hydrophobic residues are largely buried in the interior of the structure and form an extensive and contiguous hydrophobic core that stabilises the packing of the three-helix bundle. This domain mediates RNA polymerase II interaction and couples histone H3 K36 methylation with transcript elongation .. +PF08237 PE-PPE domain
This domain is found C terminal to the PE (Pfam:PF00934) and PPE (Pfam:PF00823) domains. The secondary structure of this domain is predicted to be a mixture of alpha helices and beta strands .. +PF08238 Sel1 repeat
Pfam-B_49 (Release 17.0). This short repeat is found in the Sel1 protein . It is related to TPR repeats.. +PF08239 Bacterial SH3 domain
Pfam-B_178 (Release 17.0). +PF08240 Alcohol dehydrogenase GroES-like domain
Pfam-B_7 (Release17.0). This is the catalytic domain of alcohol dehydrogenases. Many of them contain an inserted zinc binding domain. This domain has a GroES-like structure [1-2].. +PF08241 Methyltransferase domain
Pfam-B_21 (release 17.0). Members of this family are SAM dependent methyltransferases.. +PF08242 Methyltransferase domain
Pfam-B_46 (release 17.0). Members of this family are SAM dependent methyltransferases.. +PF08243 SPT2 chromatin protein
This family includes the Saccharomyces cerevisiae protein SPT2 which is a chromatin protein involved in transcriptional regulation .. +PF08244 glycosyl_hydro2; Glyco_hydro_32;
Glycosyl hydrolases family 32 C terminal. This domain corresponds to the C terminal domain of glycosyl hydrolase family 32. It forms a beta sandwich module .. +PF08245 Mur ligase middle domain
Pfam-B_26 (release 17.0). +PF08246 Cathepsin propeptide inhibitor domain (I29)
Pfam-B_14 (release 17.0) . This domain is found at the N-terminus of some C1 peptidases such as Cathepsin L where it acts as a propeptide. There are also a number of proteins that are composed solely of multiple copies of this domain such as the peptidase inhibitor salarin Swiss:Q70SU8. This family is classified as I29 by MEROPS.. +PF08247 ENOD40 protein
Short protein clustering. Rohrig et al. reported the in vitro translation of two peptides of 12 and 24 amino acids from the short, overlapping ORFs of soybean ENOD40 mRNA . The putative role of the enod40 genes has been in favour of organogenesis, such as induction of the cortical cell divisions that lead to initiation of nodule primordia, in developing lateral roots and embryonic tissues. This supports the hypothesis for a role of enod40 in lateral organ development .. +PF08248 Tryptophyllin-3 skin active peptide
Short protein clustering. PdT-3 or Tryptophyllin-3 peptide is a subfamily of the family Tryptophyllin and of the superfamily FSAP (Frog Skin Active Peptide). Originally identified in skin extracts of Neotropical leaf frogs, Phyllomedusa sp. This subfamily has an average length of 13 amino acids. The pharmacological activity of the tryptophyllins remains to be established but it seems that these peptides possess an action on liver protein synthesis and body weight .. +PF08249 Mastoparan protein
Short protein clustering. Mastoparans are a family of tetradecapeptides from wasp venom, that have been shown to directly activate GTP-binding regulatory proteins. These peptides show selectivity among G proteins: they strongly activate Go and Gi but not Gs or Gt. The peptide of this family are composed by 14 amino acids but they can assume different structures .. +PF08250 Sperm-activating peptides
Short protein clustering. The sperm-activating peptides (SAPs) are isolated in egg-conditioned media (egg jelly) of sea urchins. SAPs have several effects on sea urchin spermatozoa: stimulate sperm respiration and motility through intracellular alkalinization, transient elevation of cAMP, cGMP and Ca++levels in sperm cells [1,2].. +PF08251 Mastoparan peptide
Short protein clustering. Mastoparan (MP) peptides I II and III are extracted from the venom gland of the Neotropical social wasp Protopolybia exigua(Saussure) They are tetradecapeptides presenting from seven to ten hydrophobic amino acid residues and from two to four lysine residues in their primary sequences. These peptide cause the degranulation of mast cells. Protopolybia-MP-I also act causing hemolysis in erythrocytes.. +PF08252 arg-2/CPA1 leader peptide
Short protein clustering. In this family there are Leaders Peptides involved in the regulation the glutaminase subunit (small subunit) of arginine-specific carbamoyl phosphate synthetase. In Neurospora crassa it is a small upstream ORF of 24 codon above the arg-2 locus . In yeast it is the leader peptide of the CPA1 gene. The 5' region of CPA1 mRNA contains a 25 codon upstream open reading frame. The leader peptide, the product of the upstream open reading frame, plays an essential, negative role in the specific repression of CPA1 by arginine .. +PF08253 Erm Leader peptide
Short protein clustering. These short proteins are Leader peptides (15-19 amino acids) of erm genes that code for resistance determinants in Staphylococcus aureus .. +PF08254 Threonine leader peptide
Short protein clustering. Threonine leader peptide of the Threonine operon thrA1A2BC. It as been sequenced in different bacteria: E. coli, Serratia marcescens, Salmonella typhi [1,2].. +PF08255 Trp-operon Leader Peptide
Short protein clustering. The tryptophan operon regulatory region of C. freundii's (leader transcript) encodes a 14-residue peptide containing characteristic tandem tryptophan residues. It is about 10 nucleotides shorter than those of E. coli and S. typhimurium .. +PF08256 Aurein-like antibiotic peptide
Short protein clustering. This family of antibacterial peptides are secreted from the granular dorsal glands of the Green and Golden Bell Frog Litoria aurea, Southern Bell Frog L. raniformis, Blue Mountains tree-frog Litoria citropa (genus Litoria) and frogs from genus Uperoleia. They are a part of the FSAP peptide family. Amongst the more active of these are aurein 1.2, aurein 2.2 and aurein 3.1; caerin 1.1, maculatin 1.1, uperin 3.6 ; citropin 1.1, citropin 1.2, citropin 1.3 and a minor peptide are wide-spectrum antibacterial peptides .. +PF08257 Sulfakinin family
Short protein clustering. The sulfakinin (SK) family of neuropeptides have only been identified in crustaceans and insects. For most species there is the potential for producing two sulfakinin peptides one have a short sulfakinin sequence The function of the sulfakinins is difficult to assess. For the American cockroach, various forms of the endogenous sulfakinins have been shown to be active on the hindgut, and also on the heart. In C. vomitoria the peptides act as neurotransmitters or neuromodulators, linking the brain with all thoracic and abdominal ganglia. In adults of P. monodon they appear to be restricted to a few neurones in the brain with a neural pathway extending along to the ventral thoracic and abdominal ganglia .. +PF08258 WWamide peptide
Short protein clustering. This family contain neuropeptides, isolated from ganglia of the African giant snail, Achatina fulica. Each peptide has a Trp residue at both the N- and C-termini. Purified WWamide-1, -2 and -3 showed an inhibitory effect on the phasic contractions of the anterior byssus retractor muscle (ABRM) .. +PF08259 Periviscerokinin family
Short protein clustering. Abdominal Perisympathetic organs of insects contain Periviscerokinins neuropeptides of about 11 amino acids. . +PF08260 Insect kinin peptide
Short protein clustering. These neuropeptides are the first members of the insect kinin-family isolated from the American cockroach. Their occurrence in the retrocerebral complex suggests a physiological role as a neurohormone. The C-terminal sequence Phe-X-Ser-Trp-Gly-NH2 characterised the peptides as members of the insect kinin family. Data suggest a possible involvement of insect kinins in water-balance by regulating the osmoregulation. These peptides have length from 6 to 14 amino acids .. +PF08261 Carcinustatin peptide
Short protein clustering. A total of 20 peptides of the superfamily allostatin were isolated from the shore crab Carcinus maenas. They are named carcinustatin 1 to 20 and their length ranges from 5 to 27 amino acids. This family includes carcinustatin 8,9,15 and 16.. +PF08262 Leucophaea maderae tachykinin-related peptide
Short protein clustering. These peptides are designated Leucophaea maderae tachykinin-related peptides (Lem TRPs). Some were isolated from the midgut of L. maderae, whereas others appear to be brain specific. The Lem TRPs of the brain are myotropic and induce increases in the amplitude and frequency of spontaneous contractions and tonus of hindgut muscle in L. maderae . They were also isolated from brain-corpora, cardiaca-corpora, allata-suboesophageal ganglion extracts of the Locusta migratoria. They stimulate visceral muscle contractions of the oviduct and the foregut of Locusta migratoria .. +PF08263 Leucine rich repeat N-terminal domain
Pfam-B_35 (release 17.0). Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the N-terminus of tandem leucine rich repeats.. +PF08264 Anticodon-binding domain of tRNA
Pfam-B_23 (Release 17.0). This domain is found mainly hydrophobic tRNA synthetases. The domain binds to the anticodon of the tRNA.. +PF08265 YL1 nuclear protein C-terminal domain
Pfam-B_3088 (release 8.0). This domain is found in proteins of the YL1 family . These proteins have been shown to be DNA-binding and may be a transcription factor . This domain is found in proteins that are not YL1 proteins.. +PF08266 Cadherin-like
Pfam-B_179 (release 17.0). This cadherin domain is usually the most N-terminal copy of the domain. . +PF08267 Cobalamin-independent synthase, N-terminal domain
Pfam-B_553 (release 17.0). The N-terminal domain and C-terminal domains of cobalamin-independent synthases together define a catalytic cleft in the enzyme. The N-terminal domain is thought to bind the substrate, in particular, the negatively charged polyglutamate chain. The N-terminal domain is also thought to stabilise a loop from the C-terminal domain .. +PF08268 F-box associated domain
Pfam-B_322 (release 17.0). +PF08269 Cache domain
Pfam-B_865 (release 17.0). +PF08270 M protein trans-acting positive regulator (MGA) PRD domain
Pfam-B_5126 (release 7.7). Mga is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions . This corresponds to the PRD like region.. +PF08271 TFIIB_Zn_Ribbon;
Pfam-B_1298 (release 17.0). The transcription factor TFIIB contains a zinc-binding motif near the N-terminus. This domain is involved in the interaction with RNA pol II and TFIIF and plays a crucial role in selecting the transcription initiation site. The domain adopts a zinc ribbon like structure .. +PF08272 Topoisomerase I zinc-ribbon-like
Pfam-B_5615 (release 17.0). Some Proteobacteria topoisomerase I contain two zinc-ribbon-like domains at the C-terminus that structurally homologous to Pfam:PF01396. However, this domain no longer bind zinc. Indeed, only one of the four cysteine residues remains .. +PF08273 Zinc-binding domain of primase-helicase
Pfam-B_18441 (release 17.0). +PF08274 PhnA Zinc-Ribbon
+PF08275 DNA primase catalytic core, N-terminal domain
Pfam-B_313 (release 17.0). +PF08276 PAN-like domain
Pfam-B_291 (release 17.0). +PF08277 PAN-like domain
Pfam-B_1455 (release 17.0). +PF08278 DNA primase DnaG DnaB-binding
Pfam-B_3213 (release 17.0). Eubacterial DnaG primases interact with several factors to from the replisome. One of these factors in DnaB, a helicase. This domain has been demonstrated to be responsible for the interaction between DnaG and DnaB .. +PF08279 HTH domain
Pfam-B_125 (Release 17.0). This family includes helix-turn-helix domains in a wide variety of proteins.. +PF08280 M protein trans-acting positive regulator (MGA) HTH domain
Pfam-B_5126 (release 7.7). Mga is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions .. +PF08281 Sigma-70, region 4
Pfam-B_125 (Release 17.0). Region 4 of sigma-70 like sigma-factors are involved in binding to the -35 promoter element via a helix-turn-helix motif .. +PF08282 haloacid dehalogenase-like hydrolase
Pfam-B_66 (Release 17.0). This family contains haloacid dehalogenase-like hydrolase enzymes.. +PF08283 Geminivirus rep protein central domain
Pfam-B_286 (release 2.1). This is the cetral domain of the geminivirus rep proteins .. +PF08284 Retroviral aspartyl protease
Single domain aspartyl proteases from retroviruses, retrotransposons, and badnaviruses (plant dsDNA viruses). These proteases are generally part of a larger polyprotein; usually pol, more rarely gag. Retroviral proteases appear to be homologous to a single domain of the two-domain eukaryotic aspartyl proteases. +PF08285 Dolichol-phosphate mannosyltransferase subunit 3 (DPM3)
This family corresponds to subunit 3 of dolichol-phosphate mannosyltransferase, an enzyme which generates mannosyl donors for glycosylphosphatidylinositols, N-glycan and protein O- and C-mannosylation. DPM3 is an integral membrane protein and plays a role in stabilising the dolichol-phosphate mannosyl transferase complex .. +PF08286 Spc24 subunit of Ndc80
Spc24 is a component of the evolutionarily conserved kinetochore-associated Ndc80 complex and is involved in chromosome segregation . +PF08287 Spc19;
Spc19 is a component of the DASH complex.\. The DASH complex associates with the spindle pole body and is important for spindle and kinetochore integrity during cell division .. +PF08288 PIGA (GPI anchor biosynthesis)
Pfam-B_6971 (release 17.0). This domain is found on phosphatidylinositol n-acetylglucosaminyltransferase proteins. These proteins are involved in GPI anchor biosynthesis and are associated with disease the paroxysmal nocturnal haemoglobinuria .. +PF08289 Influenza Matrix protein (M1) C-terminal domain
Pfam-B_30 (Release 17.0). This region is thought to be a second domain of the M1 matrix protein.. +PF08290 Hepatitis core protein, putative zinc finger
This short region is found at the N-terminus of some hepatitis core proteins.\. Its conservation of four cys and his suggests a zinc binding domain.. +PF08291 Peptidase M15
+PF08292 RNA polymerase III subunit Rpc25
Pfam-B_9841 (release 17.0). Rpc25 is a strongly conserved subunit of RNA polymerase III and has homology to Rpa43 in RNA polymerase I, Rpb7 in RNA polymerase II and the archaeal RpoE subunit. Rpc25 is required for transcription initiation and is not essential for the elongating properties of RNA polymerase III .. +PF08293 Mit_rib_S27;
Mitochondrial ribosomal subunit S27. Pfam-B_31036 (release 17.0). This family of proteins corresponds to mitochondrial ribosomal subunit S27 in prokaryotes and to subunit S33 in humans . It is a small 106 residue protein.The evolutionary history of the mitoribosomal proteome that is encoded by a diverse subset of eukaryotic genomes, reveals an ancestral ribosome of alpha-proteobacterial descent that more than doubled its protein content in most eukaryotic lineages. Several new MRPs have originated via duplication of existing MRPs as well as by recruitment from outside of the mitoribosomal proteome .. +PF08294 TIM21
TIM21 interacts with the outer mitochondrial TOM complex and promotes the insertion of proteins into the inner mitochondrial membrane .. +PF08295 HDAC_interact;
Sin3 family co-repressor. Pfam-B_2731 (release 17.0). This domain is found on transcriptional regulators. It forms interactions with histone deacetylases .. +PF08297 U3_snoRNA;
U3 snoRNA associated. This family of proteins is associated with U3 snoRNA . U3 snoRNA is required for nucleolar processing of pre-18S ribosomal RNA.. +PF08298 PrkA AAA domain
Pfam-B_3917 (release 10.0). This is a family of PrkA bacterial and archaeal serine kinases approximately 630 residues long. This is the N-terminal AAA domain .. +PF08299 Bacterial dnaA protein helix-turn-helix
+PF08300 Hepatitis C virus non-structural 5a zinc finger domain
The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. This domain corresponds to the N-terminal zinc binding domain .. +PF08301 Hepatitis C virus non-structural 5a domain 1b
The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. This region corresponds to the 1b domain .. +PF08302 Fungal tRNA ligase phosphodiesterase domain
Pfam-B_49998 (release 17.0). This domain is found in fungal tRNA ligases and has cyclic phosphodiesterase activity . tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns.. +PF08303 tRNA ligase kinase domain
Pfam-B_49998 (release 17.0). This domain is found in fungal tRNA ligases and has kinase activity . tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns. This family contains a P-loop motif.. +PF08305 NPCBM/NEW2 domain
This novel putative carbohydrate binding module (NPCBM) domain is found at the N-terminus of glycosyl hydrolase family 98 proteins. This domain has also been called the NEW2 domain (Naumoff DG. Phylogenetic analysis of alpha-galactosidases of the GH27 family. Molecular Biology (Engl Transl). (2004)38:388-399.). +PF08306 Glycosyl hydrolase family 98
This domain is the putative catalytic domain of glycosyl hydrolase family 98 proteins.. +PF08307 Glycosyl hydrolase family 98 C-terminal domain
This putative domain is found at the C-terminus of glycosyl hydrolase family 98 proteins. This domain is not expected to form part of the catalytic activity.. +PF08308 PEGA domain
This domain is found in both archaea and bacteria and has similarity to S-layer (surface layer) proteins. It is named after the characteristic PEGA sequence motif found in this domain. The secondary structure of this domain is predicted to be beta-strands [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16].. +PF08309 LVIVD repeat
This repeat is found in bacterial and archaeal cell surface proteins, many of which are hypothetical. The secondary structure corresponding to this repeat is predicted to comprise 4 beta-strands which may associate to form a beta-propeller [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. The repeat copy number varies from 2-14. This repeat is sometimes found with the PKD domain Pfam:PF00801.. +PF08310 LGFP repeat
This 54 amino acid repeat is found in many hypothetical proteins. Several hypothetical proteins from C.glutamicum and C.efficiens along with PS1 protein contain this repeat region. The N-terminus region of PS1 contains an esterase domain which transfers corynomycolic acid. The C-terminus region consists of 4 tandem LGFP repeats. It is hypothesised that the PS1 proteins in Corynebacterium, when associated with the cell wall, may be anchored via the LGFP tandem repeats that may be important for maintaining cell wall integrity [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. Deletion of Swiss:Q01377 protein results in a 10-fold increase in the cell volume of the organism and infers the corresponding proteins involvement in the cell shape formation . The secondary structure of each repeat is predicted to comprise two beta-strands and one alpha-helix [Adindla et al. 2004].. +PF08311 Mad3/BUB1 homology region 1
Pfam-B_3330 (release 17.0). Proteins containing this domain are checkpoint proteins involved in cell division. This region has been shown to be essential for the binding of the binding of BUB1 and MAD3 to CDC20p .. +PF08312 cwf21 domain
Pfam-B_14400 (release 17.0). The cwf21 family is involved in mRNA splicing. It has been isolated as a subcomplex of the splicosome in Schizosaccharomyces pombe . The function of the cwf21 domain is to bind directly to the spliceosomal protein Prp8. Mutations in the cwf21 domain prevent Prp8 from binding . The structure of this domain has recently been solved which shows this domain to be composed of two alpha helices.. +PF08313 SCA7, zinc-binding domain
Pfam-B_21229 (release 17.0). This domain is found in the protein Sgf73/Sca7 which is a component of the multihistone acetyltransferase complexes SAGA and SILK . This domain is also found in Ataxin-7, a human protein which in its polyglutamine expanded pathological form, is responsible for the neurodegenerative disease spinocerebellar ataxia 7 (SCA7) . Ataxin-7 is an integral component of the mammalian SAGA-like complexes, the TATA-binding protein-free TAF-containing complex (TFTC) and the SPT3/TAF9/GCN5 acetyltransferase complex (STAGA). This domain is a minimal domain in ataxin-7-like proteins that is required for interaction with TFTC/STAGA subunits and is conserved highly through evolution. The domain contains a conserved Cys(3)His motif that binds zinc, thus indicating this to be a new zinc-binding domain .. +PF08314 Secretory pathway protein Sec39
Mistry J, Wood V, Schmitt HD. Mnaimneh et al identified Sec39p as a protein involved in ER-Golgi transport in a large scale promoter shut down analysis of essential yeast genes. Kraynack et al. (2005) showed that Sec39p (Dsl3p) is required for Golgi-ER retrograde transport and is part of a very stable protein complex that also includes Dsl1p (in mammals ZW10), Tip20p (Rint-1) and the ER localized Q-SNARE proteins Ufe1p (syntaxin-18), Sec20p and Use1p. This was confirmed in a genome-wide analysis of protein complexes by Gavin et al (2006) .. +PF08315 cwf18 pre-mRNA splicing factor
Pfam-B_19718 (release 17.0). The cwf18 family is involved in mRNA splicing. It has been isolated as a subcomplex of the splicosome in Schizosaccharomyces pombe .. +PF08316 Pal1 cell morphology protein
Pal1 is a membrane associated protein that is involved in the maintenance of cylindrical cellular morphology. It localises to sites of active growth. Pal1 physically interacts and displays overlapping localisation with the Huntingtin-interacting-protein (Hip1)-related protein Sla2p/End4p .. +PF08317 Spc7 kinetochore protein
This domain is found in cell division proteins which are required for kinetochore-spindle association .. +PF08318 Sec38;
COG4 transport protein. This region is found in yeast oligomeric golgi complex component 4 which is involved in ER to Golgi an intra Golgi transport .. +PF08320 PIG-X / PBN1
Mammalian PIG-X and yeast PBN1 are essential components of glycosylphosphatidylinositol-mannosyltransferase I . These enzymes are involved in the transfer of sugar molecules.. +PF08321 PPT1;
PPP5 TPR repeat region. Pfam-B_6912 (release 17.0). This region is specific to the PPP5 subfamily of serine/threonine phosphatases and contains TPR repeats.. +PF08323 Starch synthase catalytic domain
Pfam-B_148 (Release 17.0). +PF08324 PUL domain
The PUL (PLAP, Ufd3p and Lub1p) domain is a novel alpha-helical Ub-associated domain. It directly binds to Cdc48, a chaperone-like AAA ATPase that collects ubiquitylated substrates .. +PF08325 WLM domain
This is a predicted metallopeptidase domain called WLM (Wss1p-like metalloproteases). These are linked to the Ub-system by virtue of fusions with the UB-binding PUG (PUB), Ub-like, and Little Finger domains. More specifically, genetic evidence implicates the WLM family in de-SUMOylation .. +PF08326 Acetyl-CoA carboxylase, central region
Pfam-B_2008 (release 18.0). The region featured in this family is found in various eukaryotic acetyl-CoA carboxylases, N-terminal to the catalytic domain (Pfam:PF01039). This enzyme (EC:6.4.1.2) is involved in the synthesis of long-chain fatty acids, as it catalyses the rate-limiting step in this process.. +PF08327 Activator of Hsp90 ATPase homolog 1-like protein
Pfam-B_4145 (release 18.0). This family includes eukaryotic, prokaryotic and archaeal proteins that bear similarity to a C-terminal region of human activator of 90 kDa heat shock protein ATPase homolog 1 (AHSA1/p38, Swiss:O95433). This protein is known to interact with the middle domain of Hsp90, and stimulate its ATPase activity . It is probably a general upregulator of Hsp90 function, particularly contributing to its efficiency in conditions of increased stress . p38 is also known to interact with the cytoplasmic domain of the VSV G protein, and may thus be involved in protein transport . It has also been reported as being underexpressed in Down's syndrome. This region is found repeated in two members of this family (Swiss:Q8XY04 and Swiss:Q6MH87).. +PF08328 Adenylosuccinate lyase C-terminal
Pfam-B_1176 (release 18.0). This domain is found at the C-terminus of adenylosuccinate lyase(ASL; PurB in E. coli). It has been identified in bacteria, eukaryotes and archaea and is found together with the lyase domain Pfam:PF00206. ASL catalyses the cleavage of succinylaminoimidazole carboxamide ribotide to aminoimidazole carboxamide ribotide and fumarate and the cleavage of adenylosuccinate to adenylate and fumarate .. +PF08329 Chitinase A, N-terminal domain
Pfam-B_1049 (release 18.0). This domain is found in a number of bacterial chitinases and similar viral proteins. It is organised into a fibronectin III module domain-like fold, comprising only beta strands. Its function is not known, but it may be involved in interaction with the enzyme substrate, chitin [1,2]. It is separated by a hinge region from the catalytic domain (Pfam:PF00704); this hinge region is probably mobile, allowing the N-terminal domain to have different relative positions in solution .. +PF08331 Domain of unknown function (DUF1730)
Pfam-B_1023 (release 18.0). This domain of unknown function occurs in Iron-sulfur cluster-binding proteins together with the 4Fe-4S binding domain (Pfam:PF00037).. +PF08332 Calcium/calmodulin dependent protein kinase II Association
Pfam-B_1025 (release 18.0). This domain is found at the C-terminus of the Calcium/calmodulin dependent protein kinases II (CaMKII). These proteins also have a Ser/Thr protein kinase domain (Pfam:PF00069) at their N-terminus . The function of the CaMKII association domain is the assembly of the single proteins into large (8 to 14 subunits) multimers .. +PF08333 Protein of unknown function (DUF1725)
Pfam-B_2110 (release 18.0). This family include many eukaryotic and one bacterial sequence. Many of its members are annotated as being putative L1 retrotransposons or LINE-1 reverse transcriptase homologs. The region in question is found repeated in some family members.. +PF08334 GSPII_G;
Type II secretion system (T2SS), protein G. Pfam-B_1144 (release 18.0). The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for the transport of proteins across the outer membrane first exported to the periplasm by the Sec or Tat translocon in Gram-negative (diderm) bacteria [1,2]. The T2SG family includes proteins such as EpsG (P45773) in Vibrio cholera, XcpT also called PddA (Q00514) in Pseudomonas aeruginosa or PulG (P15746)in Klebsiella pneumoniae. The PulG is thought to be anchored in the inner membrane with its C-terminus directed towards the periplasme . Together with other members of the Type II secretion machinery, it is thought to assemble into a pilus-like structure that may function as a dynamic mechanism to push secreted proteins out of the cell. The polypeptide is organized into a long N-terminal alpha-helix followed by a loop region that separates it from a C-terminal anti-parallel beta-sheet .. +PF08335 GlnD PII-uridylyltransferase
Pfam-B_2147 (release 18.0). This is a family of bifunctional uridylyl-removing enzymes/uridylyltransferases (UR/UTases, GlnD) that are responsible for the modification (EC:2.7.7.59) of the regulatory protein P-II, or GlnB (e.g. Swiss:P05826, Pfam:PF00543). In response to nitrogen limitation, these transferases (e.g. Swiss:P27249) catalyse the uridylylation of the PII protein, which in turn stimulates deadenylylation of glutamine synthetase (GlnA). Deadenylylated glutamine synthetase is the more active form of the enzyme . Moreover, uridylylated PII can act together with NtrB and NtrC to increase transcription of genes in the sigma54 regulon, which include glnA and other nitrogen-level controlled genes . It has also been suggested that the product of the glnD gene is involved in other physiological functions such as control of iron metabolism in certain species . The region described in this family is found in many of its members to be C-terminal to a nucleotidyltransferase domain (Pfam:PF01909), and N-terminal to an HD domain (Pfam:PF01966) and two ACT domains (Pfam:PF01842) .. +PF08336 Prolyl 4-Hydroxylase alpha-subunit, N-terminal region
Pfam-B_2013 (release 18.0). The members of this family are eukaryotic proteins, and include all three isoforms of the prolyl 4-hydroxylase alpha subunit. This enzyme (EC:1.14.11.2) is important in the post-translational modification of collagen, as it catalyses the formation of 4-hydroxyproline. In vertebrates, the complete enzyme is an alpha2-beta2 tetramer; the beta-subunit is identical to protein disulphide isomerase [1-4]. The function of the N-terminal region featured in this family does not seem to be known.. +PF08337 Plexin cytoplasmic RasGAP domain
Pfam-B_3123 (release 18.0). This family features the C-terminal regions of various plexins (e.g. Swiss:P51805). Plexins are receptors for semaphorins, and plexin signalling is important in path finding and patterning of both neurons and developing blood vessels [1,2]. The cytoplasmic region, which has been called a SEX domain in some members of this family , is involved in downstream signalling pathways, by interaction with proteins such as Rac1, RhoD, Rnd1 and other plexins . This domain acts as a RasGAP domain .. +PF08338 Domain of unknown function (DUF1731)
Pfam-B_1045 (release 18.0). This domain of unknown function appears towards the C-terminus of proteins of the NAD dependent epimerase/dehydratase family (Pfam:PF01370) in bacteria, eukaryotes and archaea. Many of the proteins in which it is found are involved in cell-division inhibition.. +PF08339 RTX C-terminal domain
Pfam-B_2178 (release 18.0). This family describes the C-terminal region of various bacterial haemolysins and leukotoxins, which belong to the RTX family of toxins. These are produced by various Gram negative bacteria, such as E. coli (Swiss:P09983) and Actinobacillus pleuropneumoniae (Swiss:P15377). RTX toxins may interact with lipopolysaccharide (LPS) to functionally impair and eventually kill leukocytes . This region is found in association with the RTX N-terminal domain (Pfam:PF02382) and multiple hemolysin-type calcium-binding repeats (Pfam:PF00353).. +PF08340 Domain of unknown function (DUF1732)
Wuster A, Eberhardt R. Pfam-B_1065 (release 18.0). This domain of unknown function is often found at the C-terminus of bacterial proteins, many of which are hypothetical, including proteins of the YicC family which have Pfam:PF03755 at the N-terminus. These include a protein important in the stationary phase of growth, and required for growth at high temperature . Structural modelling suggests this domain may bind nucleic acids .. +PF08341 Fibronectin-binding protein signal sequence
Pfam-B_4004 (release 18.0). This domain is found near the N-terminus of fibronectin-binding proteins in Streptococcus where it functions as a signal sequence .. +PF08343 Ribonucleotide reductase N-terminal
Pfam-B_1066 (release 18.0). This domain is found at the N-terminus of bacterial ribonucleoside-diphosphate reductases (ribonucleotide reductases, RNRs) which catalyse the formation of deoxyribonucleotides . It occurs together with the RNR all-alpha domain (Pfam:PF00317) and the RNR barrel domain (Pfam:PF02867).. +PF08344 Transient receptor ion channel II
Pfam-B_1032 (release 18.0). This domain is found in the transient receptor ion channel (Trp) family of proteins. There is strong evidence that Trp proteins are structural elements of calcium-ion entry channels activated by G protein-coupled receptors . This domain does not tend to appear with the TRP domain (Pfam:PF06011) but is often found to the C-terminus of Ankyrin repeats (Pfam:PF00023).. +PF08345 Flagellar M-ring protein C-terminal
Pfam-B_1149 (release 18.0). This domain is found in bacterial flagellar M-ring (FliF) proteins together with the YscJ/FliF domain (Pfam:PF01514).. +PF08346 AntA/AntB antirepressor
Pfam-B_2097 (release 18.0). In E. coli the two proteins AntA and AntB have 62% amino acid identities near their N termini. AntA appears to be encoded by a truncated and divergent copy of AntB. The two proteins are homologous to putative antirepressors found in numerous bacteriophages, such as the hypothetical antirepressor protein encoded by the gene LO142 of the bacteriophage 933W .. +PF08347 N-terminal CTNNB1 binding
Pfam-B_2064 (release 18.0). This region tends to appear at the N-terminus of proteins also containing DNA-binding HMG (high mobility group) boxes (Pfam:PF00505) and appears to bind the armadillo repeat of CTNNB1 (beta-catenin), forming a stable complex. Signaling by Wnt through TCF/LCF is involved in developmental patterning, induction of neural tissues, cell fate decisions and stem cell differentiation . Isoforms of HMG T-cell factors lacking the N-terminal CTNNB1-binding domain cannot fulfill their role as transcriptional activators in T-cell differentiation [1,2].. +PF08348 YheO;
YheO-like PAS domain. Pfam-B_2023 (release 18.0). This family contains various hypothetical bacterial proteins that are similar to the E. coli protein YheO (Swiss:P64624). Their function is unknown, but are likely to be involved in signalling based on the presence of this PAS domain.. +PF08349 Protein of unknown function (DUF1722)
Pfam-B_4169 (release 18.0). This domain of unknown function is found in bacteria and archaea and is homologous to the hypothetical protein ybgA from E. coli.. +PF08350 Domain of unknown function (DUF1724)
Pfam-B_1158 (release 18.0). This domain of unknown function has so far only been found at the C-terminus of archaean proteins, including several transcriptional regulators of the ArsR family (see Pfam:PF01022).. +PF08351 Domain of unknown function (DUF1726)
Wuster A, Eberhardt R. Pfam-B_3131 (release 18.0). This domain of unknown function is often found at the N-terminus of proteins containing Pfam:PF05127. Its fold resembles that of Pfam:PF05127, but it does not appear to bind ATP .. +PF08352 Oligopeptide/dipeptide transporter, C-terminal region
Pfam-B_3025 (release 18.0). This family features a region found towards the C-terminus of oligopeptide ABC transporter ATP binding proteins, immediately following the ATP-binding domain (Pfam:PF00005). All characterised members appear able to be involved in the transport of oligopeptides or dipeptides. Some are important for sporulation or antibiotic resistance. Some dipeptide transporters also act on the heme precursor delta-aminolevulinic acid.. +PF08353 Domain of unknown function (DUF1727)
Pfam-B_2131 (release 18.0). This domain of unknown function is found at the C-terminus of bacterial proteins which include UDP-N-acetylmuramyl tripeptide synthase and the related Mur ligase.. +PF08354 Domain of unknown function (DUF1729)
Pfam-B_3179 (release 18.0). This domain of unknown function is found in fatty acid synthase beta subunits together with the MaoC-like domain (Pfam:PF01575) and the Acyltransferase domain (Pfam:PF00698) . The domain has been identified in fungi and bacteria.. +PF08355 EF hand associated
Pfam-B_4111 (release 18.0). This region typically appears on the C-terminus of EF hands in GTP-binding proteins such as Arht/Rhot (may be involved in mitochondrial homeostasis and apoptosis ). The EF hand associated region is found in yeast, vertebrates and plants.. +PF08356 EF hand associated
Pfam-B_3018 (release 18.0). This region predominantly appears near EF-hands (Pfam:PF00036) in GTP-binding proteins. It is found in all three eukaryotic kingdoms.. +PF08357 SEFIR domain
Pfam-B_33671 (release 17.0). This family comprises IL17 receptors (IL17Rs, e.g. Swiss:Q60943) and SEF proteins (e.g. Swiss:Q8QHJ9). The latter are feedback inhibitors of FGF signalling and are also thought to be receptors. Due to its similarity to the TIR domain (Pfam:PF01582), the SEFIR region is thought to be involved in homotypic interactions with other SEFIR/TIR-domain-containing proteins. Thus, SEFs and IL17Rs may be involved in TOLL/IL1R-like signalling pathways .. +PF08358 Carlavirus coat
Pfam-B_2014 (release 18.0). This domain is found together with the viral coat protein domain (Pfam:PF00286) in coat/capsid proteins of Carlaviruses infecting plants.. +PF08359 YsiA-like protein, C-terminal region
Pfam-B_20730 (release 17.0). The members of this family are thought to be TetR-type transcriptional regulators that bear particular similarity to YsiA (Swiss:P94548), a hypothetical protein expressed by B. subtilis.. +PF08360 QacR-like protein, C-terminal region
Pfam-B_96140 (release 17.0). This family features the C-terminal region of a number of proteins that bear similarity to the QacR protein (Swiss:P23217), a transcriptional regulator of the TetR family. QacR is able to bind various environmental agents, which include a number of cationic lipophilic compounds, and thus regulate the transcription of QacA (Swiss:P23215), a multidrug efflux pump . The C-terminal region contains the multifaceted, expansive drug-binding pocket, which is composed of several separate, but linked, binding sites .. +PF08361 MAATS-type transcriptional repressor, C-terminal region
Pfam-B_3020 (release 18.0). This family is named after the various transcriptional regulatory proteins that it contains, including MtrR (Swiss:Q6RV06), AcrR (Swiss:P34000), ArpR (Swiss:Q9KJC4), TtgR (Swiss:Q9AIU0) and SmeT (Swiss:Q8KLP4). These are members of the TetR family of transcriptional repressors, that are involved in the control of expression of multidrug resistance proteins [1,2,3].. +PF08362 YcdC-like protein, C-terminal region
Pfam-B_4012 (release 17.0). This family comprises proteins that belong to the TetR family of transcriptional regulators. They bear particular similarity to YcdC (Swiss:P75899), a putative HTH-containing protein. This family features the C-terminal region of these sequences, which does not include the helix-turn-helix.. +PF08363 Glucan-binding protein C
Pfam-B_3074 (release 18.0). This domain is found in the Streptococcus Glucan-binding protein C (GbpC) and also in surface protein antigen (Spa)-family proteins which show sequence similarity to GbpC .. +PF08364 Bacterial translation initiation factor IF-2 associated region
Pfam-B_3037 (release 18.0). Most of the sequences in this alignment come from bacterial translation initiation factors (IF-2, also Pfam:PF04760), but the domain is also found in the eukaryotic translation initiation factor 4 gamma in yeast and in a hypothetical Euglenozoa protein of unknown function.. +PF08365 Insulin-like growth factor II E-peptide
Pfam-B_4175 (release 18.0). This domain is found at the C-terminal domain of the insulin-like growth factor II (IGF-2, also see Pfam:PF00049) in vertebrates and seems to represent the E-peptide [1,2].. +PF08366 LLGL2
Pfam-B_4088 (release 18.0). This domain is found in lethal giant larvae homolog 2 (LLGL2) proteins and syntaxin-binding proteins like tomosyn . It has been identified in eukaryotes and tends to be found together with WD repeats (Pfam:PF00400).. +PF08367 Peptidase M16C associated
Pfam-B_3062 (release 18.0). This domain appears in eukaryotes as well as bacteria and tends to be found near the C-terminus of the metalloprotease M16C (Pfam:PF05193).. +PF08368 FAST kinase-like protein, subdomain 2
Vella Briffa B, Fenech M. Pfam-B_2858 (release 10.0). This family represents a conserved region of eukaryotic Fas-activated serine/threonine (FAST) kinases (EC:2.7.1.-) that contains several conserved leucine residues. FAST kinase is rapidly activated during Fas-mediated apoptosis, when it phosphorylates TIA-1, a nuclear RNA-binding protein that has been implicated as an effector of apoptosis . Note that many family members are hypothetical proteins. This subdomain is often found associated with the FAST kinase-like protein, subdomain 2.. +PF08369 Proto-chlorophyllide reductase 57 kD subunit
Pfam-B_2047 (release 18.0). This domain is found in bacteria and plant chloroplast proteins. It often appears at the C-terminal of Nitrogenase component 1 type Oxidoreductases (Pfam:PF00148) and sometimes independently in bacterial proteins such as the Proto-chlorophyllide reductase 57 kD subunit of the Cyanobacterium Synechocystis.. +PF08370 Plant PDR ABC transporter associated
Pfam-B_2126 (release 18.0). This domain is found on the C-terminus of ABC-2 type transporter domains (Pfam:PF01061). It seems to be associated with the plant pleiotropic drug resistance (PDR) protein family of ABC transporters. Like in yeast, plant PDR ABC transporters may also play a role in the transport of antifungal agents [1, also Pfam:PF06422]. The PDR family is characterised by a configuration in which the ABC domain is nearer the N-terminus of the protein than the transmembrane domain .. +PF08372 Plant phosphoribosyltransferase C-terminal
Pfam-B_3195 (release 18.0). This domain is found at the C-terminus of phosphoribosyltransferases and phosphoribosyltransferase-like proteins. It contains putative transmembrane regions. It often appears together with calcium-ion dependent C2 domains (Pfam:PF00168).. +PF08373 RAP domain
Pfam-B_5583 (release 17.0). This domain is found in various eukaryotic species, where it is found in proteins that are important in various parasite-host cell interactions. It is thought to be an RNA-binding domain . The domain is involved in plant defence in response to bacterial infection [2,3].. +PF08374 Protocadherin
Pfam-B_4100 (release 18.0). The structure of protocadherins is similar to that of classic cadherins (Pfam:PF00028), but particularly on the cytoplasmic domains they also have some unique features. They are expressed in a variety of organisms and are found in high concentrations in the brain where they seem to be localised mainly at cell-cell contact sites. Their expression seems to be developmentally regulated .. +PF08375 Proteasome regulatory subunit C-terminal
Pfam-B_4098 (release 18.0). This eukaryotic domain is found at the C-terminus of 26S proteasome regulatory subunits such as the non-ATPase Rpn3 subunit which is essential for proteasomal function . It occurs together with the PCI/PINT domain (Pfam:PF01399).. +PF08376 Nitrate and nitrite sensing
Pfam-B_37103 (release 17.0). The nitrate- and nitrite sensing domain (NIT) is found in receptor components of signal transducing pathways in bacteria which control gene expression, cellular motility and enzyme activity in response to nitrate and nitrite concentrations. The NIT domain is predicted to be all alpha-helical in structure .. +PF08377 MAP2/Tau projection domain
Pfam-B_26981 (release 17.0). This domain is found in the MAP2/Tau family of proteins which includes MAP2, MAP4, Tau, and their homologs. All isoforms contain a conserved C-terminal domain containing tubulin-binding repeats (Pfam:PF00418), and a N-terminal projection domain of varying size. This domain has a net negative charge and exerts a long-range repulsive force. This provides a mechanism that can regulate microtubule spacing which might facilitate efficient organelle transport [1,2].. +PF08378 Nuclease-related domain
Pfam-B_9750 (release 17.0). The nuclease-related domain (NERD) is found in a range of bacterial as well as archaeal and plant proteins. It has distant similarity to endonucleases (hence its name) and its predicted secondary structure is helix - sheet - sheet - sheet - sheet - weak sheet/long loop - helix - sheet - sheet. The majority of NERD-containing proteins are single-domain, but in several cases proteins containing NERD have additional domains which in 75% of cases are involved in DNA processing .. +PF08379 Bacterial transglutaminase-like N-terminal region
Pfam-B_2190 (release 18.0). This region is found towards the N-terminus of various archaeal and bacterial hypothetical proteins. Some of these are annotated as being transglutaminase-like proteins, and in fact contain a transglutaminase-like superfamily domain (Pfam:PF01841).. +PF08381 DZC;
Transcription factor regulating root and shoot growth via Pin3. Pfam-B_2116 (release 18.0). The BREVIS RADIX (BRX) domain was characterised as being a transcription factor in plants regulating the extent of cell proliferation and elongation in the growth zone of the root [1,2]. BRX is rate limiting for auxin-responsive gene-expression by mediating cross-talk with the brassino-steroid pathway. BRX has a ubiquitous, although quantitatively variable role in modulating the growth rate in both the root and the shoot . The family features a short region of alpha-helix, approximately 60 residues in length, which is found repeated up to three times . BRX is expressed in the vasculature and is rate-limiting for transcriptional auxin action .. +PF08383 Maf N-terminal region
Pfam-B_3103 (release 18.0). This region is found in various leucine zipper transcription factors of the Maf family. These are implicated in the regulation of insulin gene expression , in erythroid differentiation , and in differentiation of the neuroretina .. +PF08384 Pro-opiomelanocortin, N-terminal region
Pfam-B_1053 (release 18.0). This family features the N-terminal peptide of pro-opiomelanocortin (NPP). It is thought to represent an important pituitary peptide, given its high yield from pituitary glands, and exhibits a potent in vitro aldosterone-stimulating activity .. +PF08385 Dynein heavy chain, N-terminal region 1
Pfam-B_3094 (release 18.0). Dynein heavy chains interact with other heavy chains to form dimers, and with intermediate chain-light chain complexes to form a basal cargo binding unit . The region featured in this family includes the sequences implicated in mediating these interactions . It is thought to be flexible and not to adopt a rigid conformation .. +PF08386 TAP-like protein
Pfam-B_3096 (release 18.0). This is a family of putative bacterial peptidases and hydrolases that bear similarity to a tripeptidyl aminopeptidase isolated from Streptomyces lividans (Swiss:Q54410). A member of this family (Swiss:Q6E3K7) is thought to be involved in the C-terminal processing of propionicin F, a bacteriocidin characterised from Propionibacterium freudenreichii .. +PF08387 FBD
Pfam-B_1153 (release 18.0). This region is found in F-box (Pfam:PF00646) and other domain containing plant proteins; it is repeated in two family members. Its precise function is unknown, but it is thought to be associated with nuclear processes . In fact, several family members are annotated as being similar to transcription factors.. +PF08388 Group II intron, maturase-specific domain
Pfam-B_4063 (release 18.0). This region is found mainly in various bacterial and archaeal species, but a few members of this family are expressed by fungal and chlamydomonal species. It has been implicated in the binding of intron RNA during reverse transcription and splicing .. +PF08389 Exportin 1-like protein
Pfam-B_4058 (release 18.0). The sequences featured in this family are similar to a region close to the N-terminus of yeast exportin 1 (Xpo1, Crm1, Swiss:P14068). This region is found just C-terminal to an importin-beta N-terminal domain (Pfam:PF03810) in many members of this family. Exportin 1 is a nuclear export receptor that interacts with leucine-rich nuclear export signal (NES) sequences, and Ran-GTP, and is involved in translocation of proteins out of the nucleus [1,2].. +PF08390 TRAM1-like protein
Pfam-B_3108 (release 18.0). This family comprises sequences that are similar to human TRAM1 (Swiss:Q15629). This is a transmembrane protein of the endoplasmic reticulum, thought to be involved in the membrane transfer of secretory proteins . The region featured in this family is found N-terminal to the longevity-assurance protein region (Pfam:PF03798).. +PF08391 Ly49-like protein, N-terminal region
Pfam-B_1187 (release 18.0). The sequences making up this family are annotated as, or are similar to, Ly49 receptors (e.g. Swiss:P20937). These are type II transmembrane receptors expressed by mouse natural killer (NK) cells. They are classified as being activating (e.g.Ly49D and H) or inhibitory (e.g. Ly49A and G), depending on their effect on NK cell function . They are members of the C-type lectin receptor superfamily , and in fact in many family members this region is found immediately N-terminal to a lectin C-type domain (Pfam:PF00059).. +PF08392 FAE1/Type III polyketide synthase-like protein
Pfam-B_1177 (release 18.0). The members of this family are described as 3-ketoacyl-CoA synthases, type III polyketide synthases, fatty acid elongases and fatty acid condensing enzymes, and are found in both prokaryotic and eukaryotic (mainly plant) species. The region featured in this family contains the active site residues, as well as motifs involved in substrate binding .. +PF08393 Dynein heavy chain, N-terminal region 2
Pfam-B_3094 (release 18.0). Dyneins are described as motor proteins of eukaryotic cells, as they can convert energy derived from the hydrolysis of ATP to force and movement along cytoskeletal polymers, such as microtubules. This region is found C-terminal to the dynein heavy chain N-terminal region 1 (Pfam:PF08385) in many members of this family. No functions seem to have been attributed specifically to this region.. +PF08394 Archaeal TRASH domain
Pfam-B_18882 (release 17.0). This region is found in the C-terminus of a number of archaeal transcriptional regulators. It is thought to function as a metal-sensing regulatory module .. +PF08395 7tm Chemosensory receptor
This family includes a number of gustatory and odorant receptors mainly from insect species such as A. gambiae and D. melanogaster. They are classified as G-protein-coupled receptors (GPCRs), or seven-transmembrane receptors. They show high sequence divergence, consistent with an ancient origin for the family [1,2].. +PF08396 Spider toxin omega agatoxin/Tx1 family
Mondal S, Ramakumar S. The Tx1 family lethal spider neurotoxin induces excitatory symptoms in mice .. +PF08397 IRSp53/MIM homology domain
Pfam-B_4120 (release 18.0). The N-terminal predicted helical stretch of the insulin receptor tyrosine kinase substrate p53 (IRSp53) is an evolutionary conserved F-actin bundling domain involved in filopodium formation. The domain has been named IMD after the IRSp53 and missing in metastasis (MIM) proteins in which it occurs. Filopodium-inducing IMD activity is regulated by Cdc42 and Rac1 and is SH3-independent .. +PF08398 Parvovirus coat protein VP1
Pfam-B_2198 (release 18.0). This is the N-terminal region of the Parvovirus VP1 coat protein. Also see Parvovirus coat protein VP2 (Pfam:PF00740).. +PF08399 VWA N-terminal
Pfam-B_2075 (release 18.0). This domain is found at the N-terminus of proteins containing von Willebrand factor type A (VWA, Pfam:PF00092) and Cache (Pfam:PF02743) domains. It has been found in vertebrates, Drosophila and C. elegans but has not yet been identified in other eukaryotes. It is probably involved in the function of some voltage-dependent calcium channel subunits .. +PF08400 Prophage tail fibre N-terminal
Pfam-B_3101 (release 18.0). This domain is found at the N-terminus of prophage tail fibre proteins.. +PF08401 Domain of unknown function (DUF1738)
Pfam-B_3014 (release 18.0). This region is found in a number of bacterial hypothetical proteins. Some members are annotated as being similar to replication primases, and in fact this region is often found together with the Toprim domain (Pfam:PF01751).. +PF08402 TOBE domain
Pfam-B_4178 (release 18.0). The TOBE domain (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulphate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain. In this family a strong RPE motif is found at the presumed N-terminus of the domain.. +PF08403 Amino acid permease N-terminal
Pfam-B_3112 (release 18.0). This domain is found to the N-terminus of the amino acid permease domain (Pfam:PF00324) in metazoan Na-K-Cl cotransporters.. +PF08404 Baculoviridae P74 N-terminal
Pfam-B_3059 (release 18.0). This domain is found at the N-terminus of P74 occlusion-derived virus (ODV) envelope proteins which are required for oral infectivity. The envelope proteins are found in baculoviruses which are insect pathogens. The C-terminus of P74 is anchored to the membrane whereas the N-terminus is exposed to the virion surface. Furthermore P74 is unusual for a virus envelope protein as it lacks an N-terminal localisation signal sequence .\. Also see Pfam:PF04583.. +PF08405 Viral polyprotein N-terminal
Pfam-B_4167 (release 18.0). This domain is found at the N-terminus of non-structural viral polyproteins of the Caliciviridae subfamily.. +PF08406 CbbQ/NirQ/NorQ C-terminal
Pfam-B_3065 (release 18.0). This domain is found at the C-terminus of proteins of the CbbQ/NirQ/NorQ family of proteins which play a role in the post-translational activation of Rubisco . It is also found in the Thauera aromatica TutH protein which is similar to the CbbQ/NirQ/NorQ family , as well as in putative chaperones. The ATPase family associated with various cellular activities (AAA) Pfam:PF07728 is found in the same bacterial and archaeal proteins as the domain described here.. +PF08407 Chitin synthase N-terminal
Pfam-B_1105 (release 18.0). This is the N-terminal domain of Chitin synthase (Pfam:PF01644).. +PF08408 DNA polymerase family B viral insert
Pfam-B_3028 (release 18.0). This viral domain is found between the exonuclease domain of the DNA polymerase family B (Pfam:PF03104) and the Pfam:PF00136 domain, connecting the two.. +PF08409 Domain of unknown function (DUF1736)
Pfam-B_4104 (release 18.0). This domain of unknown function is found in various hypothetical metazoan proteins.. +PF08410 Domain of unknown function (DUF1737)
Pfam-B_2030 (release 18.0). This domain of unknown function is found at the N-terminus of bacterial and viral hypothetical proteins.. +PF08411 Exonuclease C-terminal
Pfam-B_3061 (release 18.0). This bacterial domain is found at the C-terminus of Exodeoxyribonuclease I/Exonuclease I (Pfam:PF00929), which is a single-strand specific DNA nuclease affecting recombination and expression pathways. The exonuclease I protein in E. coli is associated with DNA deoxyribophosphodiesterase (dRPase) .. +PF08412 Ion transport protein N-terminal
Pfam-B_4115 (release 18.0). This metazoan domain is found to the N-terminus of Pfam:PF00520 in voltage- and cyclic nucleotide-gated K/Na ion channels.. +PF08414 Respiratory burst NADPH oxidase
Pfam-B_2127 (release 18.0). This domain is found in plant proteins such as respiratory burst NADPH oxidase proteins which produce reactive oxygen species as a defence mechanism. It tends to occur to the N-terminus of an EF-hand (Pfam:PF00036), which suggests a direct regulatory effect of Ca2+ on the activity of the NADPH oxidase in plants .. +PF08415 Nonribosomal peptide synthase
Pfam-B_1156 (release 18.0). This domain is found in bacterial nonribosomal peptide synthetases (NRPS). NRPS are megaenzymes organised as iterative modules, one for each amino acid to be built into the peptide product . NRPS modules are involved in epothilone biosynthesis (EpoB), myxothiazol biosynthesis (MtaC and MtaD), and other functions . The NRPS domain tends to be found together with the condensation domain (Pfam:PF00668) and the phosphopantetheine binding domain (Pfam:PF00550).. +PF08416 Phosphotyrosine-binding domain
Pfam-B_3174 (release 18.0). The phosphotyrosine-binding domain (PTB, also phosphotyrosine-interaction or PI domain) in the protein tensin tends to be found at the C-terminus. Tensin is a multi-domain protein that binds to actin filaments and functions as a focal-adhesion molecule (focal adhesions are regions of plasma membrane through which cells attach to the extracellular matrix). Human tensin has actin-binding sites, an SH2 (Pfam:PF00017) domain and a region similar to the tumour suppressor PTEN . The PTB domain interacts with the cytoplasmic tails of beta integrin by binding to an NPXY motif .. +PF08417 Pheophorbide a oxygenase
Pfam-B_3102 (release 18.0). This domain is found in bacterial and plant proteins to the C-terminus of a Rieske 2Fe-2S domain (Pfam:PF00355). One of the proteins the domain is found in is Pheophorbide a oxygenase (PaO) which seems to be a key regulator of chlorophyll catabolism. Arabidopsis PaO (AtPaO) is a Rieske-type 2Fe-2S enzyme that is identical to Arabidopsis accelerated cell death 1 and homologous to lethal leaf spot 1 (LLS1) of maize , in which the domain described here is also found.. +PF08418 DNA polymerase alpha subunit B N-terminal
Pfam-B_4046 (release 18.0). This is the eukaryotic DNA polymerase alpha subunit B N-terminal domain which is involved in complex formation . Also see Pfam:PF04058.. +PF08421 Putative zinc binding domain
Pfam-B_2038 (release 18.0). This domain is found at the N-terminus of bacterial methyltransferases and contains four conserved cysteines suggesting a potential zinc binding domain.. +PF08423 Rad51
Pfam-B_684 (release 17.0). Rad51 is a DNA repair and recombination protein and is a homologue of the bacterial ATPase RecA protein.. +PF08424 DUF1740;
NRDE-2, necessary for RNA interference. Pfam-B_21376 (release 17.0). This is a family of eukaryotic proteins. Eukaryotic cells express a wide variety of endogenous small regulatory RNAs that regulate heterochromatin formation, developmental timing, defence against parasitic nucleic acids, and genome rearrangement. Many small regulatory RNAs are thought to function in nuclei, and in plants and fungi small interfering (si)RNAs associate with nascent transcripts and direct chromatin and/or DNA modifications. This family protein, NRDE-2, is required for small interfering (si)RNA-mediated silencing in nuclei. NRDE-2 associates with the Argonaute protein NRDE-3 within nuclei and is recruited by NRDE-3/siRNA complexes to nascent transcripts that have been targeted by RNA interference, RNAi, the process whereby double-stranded RNA (dsRNA) directs the sequence-specific degradation of mRNA .. +PF08426 ICE2
ICE2 is a fungal ER protein which has been shown to play an important role in forming/maintaining the cortical ER . It has also bee identified as a protein which is necessary for nuclear inner membrane targeting .. +PF08427 Domain of unknown function (DUF1741)
Pfam-B_35314 (release 17.0). This is a eukaryotic domain of unknown function.. +PF08428 Rib/alpha-like repeat
Pfam-B_3139 (release 18.0). The region featured in this family is found repeated in a number of bacterial surface proteins, such as Rib (Swiss:P72362) and alpha (Swiss:Q02192). These are expressed by group B streptococci, and Rib is thought to confer protective immunity.. +PF08429 PLU-1-like protein
Pfam-B_4023 (release 18.0). Sequences in this family bear similarity to the central region of PLU-1 (Swiss:Q9Y3Q5). This is a nuclear protein that may have a role in DNA-binding and transcription, and is closely associated with the malignant phenotype of breast cancer . This region is found in various other Jumonji/ARID domain-containing proteins (see Pfam:PF02373, Pfam:PF01388).. +PF08430 Forkhead N-terminal region
Pfam-B_3191 (release 18.0). The region described in this family is found towards the N-terminus of various eukaryotic fork head/HNF-3-related transcription factors (which contain the Pfam:PF00250 domain). These proteins play key roles in embryogenesis, maintenance of differentiated cell states, and tumorigenesis .. +PF08432 DUF1742;
AAA-ATPase Vps4-associated protein 1. Vps Four-Associated 1, Vfa1, in yeast, is an endosomal protein that interacts with the AAA-ATPase Vps4. It would seem to be involved in regulating the trafficking of other proteins to the endocytic vacuole . There is a CCCH zinc finger at the N-terminus.. +PF08433 Chromatin associated protein KTI12
Pfam-B_11625 (release 17.0). This is a family of chromatin associated proteins which interact with the Elongator complex, a component of the elongating form of RNA polymerase II . The Elongator complex has histone acetyltransferase activity.. +PF08434 Calcium-activated chloride channel
Pfam-B_3091 (release 18.0). The CLCA family of calcium-activated chloride channels has been identified in many epithelial and endothelial cell types as well as in smooth muscle cells and has four or five putative transmembrane regions. Additionally to their role as chloride channels some CLCA proteins function as adhesion molecules and may also have roles as tumour suppressors . The domain described here is found at the N-terminus of CLCAs.. +PF08435 Calici_coat_N;
Calicivirus coat protein C-terminal. Pfam-B_108 (release 18.0). This is the calicivirus coat protein (Pfam:PF00915) C-terminal region.. +PF08436 1-deoxy-D-xylulose 5-phosphate reductoisomerase C-terminal
Pfam-B_445 (release 18.0). This domain is found to the C-terminus of Pfam:PF02670 domains in bacterial and plant 1-deoxy-D-xylulose 5-phosphate reductoisomerases which catalyse the formation of 2-C-methyl-D-erythritol 4-phosphate from 1-deoxy-D-xylulose-5-phosphate in the presence of NADPH .. +PF08437 Glyco_transf_8N;
Glycosyl transferase family 8 C-terminal. Pfam-B_3038 (release 18.0). This domain is found at the C-terminus of the Pfam: PF01501 domain in bacterial glucosyltransferase and galactosyltransferase proteins.. +PF08438 GTPase of unknown function C-terminal
Pfam-B_4095 (release 18.0). This domain is found at the C-terminus of Pfam:PF01926 in archaeal and eukaryotic GTP-binding proteins. The C-terminal domain of the GTP-binding proteins is necessary for the complete activity of the protein of interacting with the 50S ribosome and binding of both adenine and guanine nucleotides, with a preference for guanine nucleotides.. +PF08439 Oligopeptidase F
Pfam-B_679 (release 18.0). This domain is found to the N-terminus of the Pfam:PF01432 domain in bacterial and archaeal proteins including Oligoendopeptidase F. An example of this protein is Lactococcus lactis PepF .. +PF08440 Potyviridae polyprotein
Pfam-B_237 (release 18.0). This domain is found in polyproteins of the viral Potyviridae taxon.. +PF08441 Integrin alpha
Pfam-B_609 (release 18.0). This domain is found in integrin alpha and integrin alpha precursors to the C terminus of a number of Pfam:PF01839 repeats and to the N-terminus of the Pfam:PF00357 cytoplasmic region. This region is composed of three immunoglobulin-like domains.. +PF08442 ATP-grasp domain
+PF08443 RimK-like ATP-grasp domain
This ATP-grasp domain is found in the ribosomal S6 modification enzyme RimK .. +PF08444 Aralkyl acyl-CoA:amino acid N-acyltransferase, C-terminal region
Pfam-B_7828 (release 9.0). This family features the C-terminal region of several mammalian specific aralkyl acyl-CoA:amino acid N-acyltransferase (glycine N-acyltransferase) proteins EC:2.3.1.13.. +PF08445 FR47-like protein
Pfam-B_71946 (release 17.0). The members of this family are similar to the C-terminal region of the D. melanogaster hypothetical protein FR47 (Swiss:Q9VR51). This protein has been found to consist of two N-acyltransferase-like domains swapped with the C-terminal strands.. +PF08446 PAS fold
Pfam-B_437 (Release 18.0). The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs . The PAS fold appears in archaea, eubacteria and eukarya.. +PF08447 PAS fold
Pfam-B_64 (Release 18.0). The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs . The PAS fold appears in archaea, eubacteria and eukarya.. +PF08448 PAS fold
Pfam-B_493 (Release 18.0). The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs . The PAS fold appears in archaea, eubacteria and eukarya.. +PF08449 UAA transporter family
Pfam-B_606 (release 18.0). This family includes transporters with a specificity for UDP-N-acetylglucosamine .. +PF08450 SMP-30/Gluconolaconase/LRE-like region
Pfam-B_3630 (release 7.0). This family describes a region that is found in proteins expressed by a variety of eukaryotic and prokaryotic species. These proteins include various enzymes, such as senescence marker protein 30 (SMP-30, Swiss:Q15493), gluconolactonase (Swiss:Q01578) and luciferin-regenerating enzyme (LRE, Swiss:Q86DU5). SMP-30 is known to hydrolyse diisopropyl phosphorofluoridate in the liver, and has been noted as having sequence similarity, in the region described in this family, with PON1 (Swiss:P52430) and LRE .. +PF08451 Adenosine/AMP deaminase N-terminal
Pfam-B_3145 (release 18.0). This domain is found to the N-terminus of the Adenosine/AMP deaminase domain (Pfam:PF00962) in metazoan proteins such as the Cat eye syndrome critical region protein 1 and its homologues.. +PF08452 DNA polymerase family B exonuclease domain, N-terminal
Pfam-B_3196 (release 18.0). This domain is found in viral DNA polymerases to the N-terminus of DNA polymerase family B exonuclease domains (Pfam:PF03104).. +PF08453 Peptidase family M9 N-terminal
Pfam-B_4156 (release 18.0). This domain is found in microbial collagenase metalloproteases to the N-terminus of Pfam:PF01752. . +PF08454 RyR and IP3R Homology associated
Pfam-B_4135 (release 18.0). This eukaryotic domain is found in ryanodine receptors (RyR) and inositol 1,4,5-trisphosphate receptors (IP3R) which together form a superfamily of homotetrameric ligand-gated intracellular Ca2+ channels . There seems to be no known function for this domain . Also see the IP3-binding domain Pfam:PF01365 and Pfam:PF02815.. +PF08455 Bacterial SNF2 helicase associated
Pfam-B_3199 (release 18.0). This domain is found in bacterial proteins of the SWF/SNF/SWI helicase family to the N-terminus of the SNF2 family N-terminal domain (Pfam:PF00176) and together with the Helicase conserved C-terminal domain (Pfam:PF00271). The function of the domain is not clear .. +PF08456 Viral methyltransferase C-terminal
Pfam-B_2153 (release 18.0). This domain is found to the C-terminus of the viral methyltransferase domain (Pfam:PF01660) in single-stranded-RNA positive-strand viruses with no DNA stage in the Virgaviridae family.. +PF08457 Sfi1 spindle body protein
Pfam-B_54813 (release 17.0). This is a family of fungal spindle pole body proteins that play a role in spindle body duplication. They contain binding sites for calmodulin-like proteins called centrins which are present in microtubule-organising centres.. +PF08458 Plant pleckstrin homology-like region
Pfam-B_7298 (release 8.0). This family describes a pleckstrin homology (PH)-like region found in several plant proteins of unknown function.. +PF08459 UvrC Helix-hairpin-helix N-terminal
Pfam-B_288 (release 18.0). This domain is found in the C subunits of the bacterial and archaeal UvrABC system which catalyses nucleotide excision repair in a multi-step process. UvrC catalyses the first incision on the fourth or fifth phosphodiester bond 3' and on the eighth phosphodiester bond 5' from the damage that is to be excised . The domain described here is found to the N-terminus of a helix hairpin helix (Pfam:PF00633) motif and also co-occurs with the Pfam:PF01541 catalytic domain which is found at the N-terminus of the same proteins.. +PF08460 Bacterial SH3 domain
Pfam-B_1108 (Release 18.0). +PF08462 Carmovirus coat protein
Pfam-B_4180 (release 18.0). This domain is found to the C-terminus of the Pfam:PF00729 domain in Carmoviruses.. +PF08463 EcoEI R protein C-terminal
Pfam-B_4136 (release 18.0). The restriction enzyme EcoEI recognises 5'-GAGN(7)ATGC-3' and is composed of the three proteins R, M, and S. The domain described here is found at the C-terminus of the R protein (HsdR) which is required for both nuclease and ATPase activity [1,2].. +PF08464 Geminivirus AC4/5 conserved region
Pfam-B_4177 (release 18.0). This domain is found in replication initiator (Rep) associated proteins such as AC5 in the Geminivirus/Begomovirus.. +PF08465 Thymidine kinase from Herpesvirus C-terminal
Pfam-B_4030 (release 18.0). This domain is found towards the C terminus in Herpesvirus Thymidine kinases.. +PF08466 Inward rectifier potassium channel N-terminal
Pfam-B_4080 (release 18.0). This metazoan domain is found to the N-terminus of the Pfam:PF01007 domain in Inward rectifier potassium channels (KIR2 or IRK2).. +PF08467 Luteovirus RNA polymerase P1-P2/replicase
Pfam-B_4011 (release 18.0). This domain is found in RNA-dependent RNA polymerase P1-P2 fusion/replicase proteins in plant Luteoviruses.. +PF08468 Methyltransferase small domain N-terminal
Pfam-B_4172 (release 18.0). This domain is found to the N-terminus of the methyltransferase small domain (Pfam:PF05175) in bacterial proteins .. +PF08469 Nucleoside triphosphatase I C-terminal
Pfam-B_4183 (release 18.0). This viral domain is found to the C-terminus of Poxvirus nucleoside triphosphatase phosphohydrolase I (NPH I, ) together with the helicase conserved C-terminal domain (Pfam:PF00271).. +PF08470 Nontoxic nonhaemagglutinin C-terminal
Pfam-B_4024 (release 18.0). Bacteria of the Clostridium genus produce protein neurotoxins, which are complexes consisting of neurotoxin (NT), haemagglutinin (HA), nontoxic nonhaemagglutinin (NTNH), and RNA [1, 2]. The domain described here is found at the C-terminus of the NTNH component.. +PF08471 Class II vitamin B12-dependent ribonucleotide reductase
Pfam-B_4121 (release 18.0). This domain is found to the N-terminus of the ribonucleotide reductase barrel domain (Pfam:PF02867). It occurs in bacterial class II ribonucleotide reductase proteins which depend upon coenzyme B12 (deoxyadenosylcobalamine) .. +PF08472 Sucrose-6-phosphate phosphohydrolase C-terminal
Pfam-B_4159 (release 18.0). This is the Sucrose-6-phosphate phosphohydrolase (S6PP or SPP) C-terminal domain as found in in plant sucrose phosphatases. These enzymes irreversibly catalyse the last step in sucrose synthesis following the formation of Sucrose-6-Phosphate via sucrose-phosphate synthase (SPS).. +PF08473 Neuronal voltage-dependent calcium channel alpha 2acd
Pfam-B_4072 (release 18.0). This eukaryotic domain has been found in the neuronal voltage-dependent calcium channel (VGCC) alpha 2a, 2c, and 2d subunits.\. It is also found in other calcium channel alpha-2 delta subunits to the N-terminus of a Cache domain (Pfam:PF02743).. +PF08474 Myelin transcription factor 1
Pfam-B_4029 (release 18.0). This domain is found in the myelin transcription factor 1 (MYT1) of chordates. MYT1 contains C2HC zinc finger domains (Pfam:PF01530) and is expressed in developing neurons of the central nervous system where it is involved in the selection of neuronal precursor cells .. +PF08461 Ribonuclease R winged-helix domain
N-terminus of DUF128 family. This domain is found at the amino terminus of Ribonuclease R and a number of presumed transcriptional regulatory proteins from archaebacteria.. +PF08475 Viral capsid protein 91 N-terminal
Pfam-B_4034 (release 18.0). This domain is found in Baculoviridae including the nucleopolyhedrovirus at the N-terminus of the viral capsid protein 91 (VP91) .. +PF08476 Viral D10 N-terminal
Pfam-B_4155 (release 18.0). This domain is found on the N-terminus of the viral protein D10 (VD10) and the related MutT motif proteins . The VD10 protein is probably essential for virus replication and is often found to the N-terminus of a Pfam:PF00293 domain.. +PF08477 Miro-like protein
Pfam-B_1154 (release 17.0). Mitochondrial Rho proteins (Miro-1, Swiss:Q8IXI2, and Miro-2, Swiss:Q8IXI1), are atypical Rho GTPases. They have a unique domain organisation, with tandem GTP-binding domains and two EF hand domains (Pfam:PF00036), that may bind calcium. They are also larger than classical small GTPases. It has been proposed that they are involved in mitochondrial homeostasis and apoptosis .. +PF08478 POTRA domain, FtsQ-type
Pfam-B_1605 (release 7.0). FtsQ/DivIB bacterial division proteins (Pfam:PF03799) contain an N-terminal POTRA domain (for polypeptide-transport-associated domain). This is found in different types of proteins, usually associated with a transmembrane beta-barrel. FtsQ/DivIB may have chaperone-like roles, which has also been postulated for the POTRA domain in other contexts .. +PF08479 POTRA domain, ShlB-type
The POTRA domain (for polypeptide-transport-associated domain) is found towards the N-terminus of ShlB family proteins (Pfam:PF03865). ShlB is important in the secretion and activation of the haemolysin ShlA. It has been postulated that the POTRA domain has a chaperone-like function over ShlA; it may fold back into the C-terminal beta-barrel channel .. +PF08480 Disaggregatase related
Pfam-B_4000 (release 18.0). This domain is found in disaggregatases and several hypothetical proteins of the archaeal genus Methanosarcina. Disaggregatases cause aggregates to separate into single cells and contain parallel beta-helix repeats. Also see Pfam:PF06848.. +PF08481 GBS Bsp-like repeat
Pfam-B_2122 (release 18.0). This domain is found as a repeat in a number of Streptococcus proteins including some hypothetical proteins and Bsp. Bsp is a protein of group B Streptococcus (GBS) which might control cell morphology .. +PF08482 ATP-dependent helicase C-terminal
Pfam-B_2170 (release 18.0). This domain is found near the C-terminus of bacterial ATP-dependent helicases such as HrpB.. +PF08483 IstB_N;
IstB-like ATP binding N-terminal. Pfam-B_3188 (release 18.0). This bacterial domain is found to the N-terminus of the Pfam:PF01695 like ATP binding domain in proteins which are putative transposase subunits .. +PF08484 C-methyltransferase C-terminal domain
Pfam-B_2106 (release 18.0). This domain is found in bacterial C-methyltransferase proteins. This domain is found C-terminal to methyltransferase domains such as Pfam:PF08241 or Pfam:PF08242. But this domain is not a methyltransferase.. +PF08485 Polysaccharide biosynthesis protein C-terminal
Pfam-B_4073 (release 18.0). This domain is found to the C-terminus of the Pfam:PF02719 domain in bacterial polysaccharide biosynthesis enzymes including the capsule protein CapD and several putative epimerases/dehydratases.. +PF08486 Stage II sporulation protein
Pfam-B_1108 (release 18.0). This domain is found in the stage II sporulation protein SpoIID. SpoIID is necessary for membrane migration as well as for some of the earlier steps in engulfment during bacterial endospore formation . The domain is also found in amidase enhancer proteins. Amidases, like SpoIID, are cell wall hydrolases .. +PF08487 Vault protein inter-alpha-trypsin domain
Pfam-B_2015 (release 18.0). Inter-alpha-trypsin inhibitors (ITIs) consist of one light chain and a variable set of heavy chains. ITIs play a role in extracellular matrix (ECM) stabilisation and tumour metastasis as well as in plasma protease inhibition . The vault protein inter-alpha-trypsin (VIT) domain described here is found to the N-terminus of a von Willebrand factor type A domain (Pfam:PF00092) in ITI heavy chains (ITIHs) and their precursors.. +PF08488 Wall-associated kinase
Pfam-B_4138 (release 18.0). This domain is found together with the eukaryotic protein kinase domain Pfam:PF00069 in plant wall-associated kinases (WAKs) and related proteins.\. WAKs are serine-threonine kinases which might be involved in signalling to the cytoplasm and are required for cell expansion .. +PF08489 Domain of unknown function (DUF1743)
Pfam-B_4001 (release 18.0). This domain of unknown function is found in many hypothetical proteins and predicted DNA-binding proteins such as transcription-associated proteins. It is found in bacteria and archaea.. +PF08490 Domain of unknown function (DUF1744)
Pfam-B_5917 (release 18.0). This domain is found on the epsilon catalytic subunit of DNA polymerase. It is found C terminal to Pfam:PF03104 and Pfam:PF00136.. +PF08491 Squalene epoxidase
Pfam-B_3107 (release 18.0). This domain is found in squalene epoxidase (SE) and related proteins which are found in taxonomically diverse groups of eukaryotes and also in bacteria.\. SE was first cloned from Saccharomyces cerevisiae where it was named ERG1. It contains a putative FAD binding site and is a key enzyme in the sterol biosynthetic pathway . Putative transmembrane regions are found to the protein's C-terminus.. +PF08492 SRP72 RNA-binding domain
Pfam-B_7529 (Release 18.0). This region has been identified as the binding site of the SRP72 protein to SRP RNA .. +PF08493 Aflatoxin regulatory protein
Pfam-B_2081 (release 18.0). This domain is found in the aflatoxin regulatory protein (AflR) which is involved in the regulation of the biosynthesis of aflatoxin in the fungal genus Aspergillus . It occurs together with the fungal Zn(2)-Cys(6) binuclear cluster domain (Pfam:PF00172).. +PF08494 DEAD/H associated
Pfam-B_4150 (release 18.0). This domain is found in ATP-dependent helicases as well as a number of hypothetical proteins together with the helicase conserved C-terminal domain (Pfam:PF00270) and the Pfam:PF00271 domain.. +PF08495 DUF1745;
The FIST N domain is a novel sensory domain, which is present in signal transduction proteins from Bacteria, Archaea and Eukarya. Chromosomal proximity of FIST-encoding genes to those coding for proteins involved in amino acid metabolism and transport suggest that FIST domains bind small ligands, such as amino acids .. +PF08496 Peptidase family S49 N-terminal
Pfam-B_4027 (release 18.0). This domain is found to the N-terminus of bacterial signal peptidases of the S49 family (Pfam:PF01343) [1,2].. +PF08497 Radical SAM N-terminal
Pfam-B_2018 (release 18.0). This domain tends to occur to the N-terminus of the Pfam:PF04055 domain in hypothetical bacterial proteins.. +PF08498 Sterol methyltransferase C-terminal
Pfam-B_3143 (release 18.0). This domain is found to the C-terminus of a methyltransferase domain (Pfam:PF08241) in fungal and plant sterol methyltransferases .. +PF08499 3'5'-cyclic nucleotide phosphodiesterase N-terminal
Pfam-B_3045 (release 18.0). This domain is found to the N-terminus of the calcium/calmodulin-dependent 3'5'-cyclic nucleotide phosphodiesterase domain (Pfam:PF00233).. +PF08500 Tombusvirus p33
Pfam-B_2139 (release 18.0). Tombusviruses, which replicate in a wide range of plant hosts, replicate with the help of viral replicase protein including the overlapping p33 and p92 proteins which contain the domain described here .. +PF08501 Shikimate dehydrogenase substrate binding domain
Pfam-B_99 (release 18.0). This domain is the substrate binding domain of shikimate dehydrogenase .. +PF08502 LeuA allosteric (dimerisation) domain
Pfam-B_223 (release 18.0). This is the C-terminal regulatory (R) domain of alpha-isopropylmalate synthase, which catalyses the first committed step in the leucine biosynthetic pathway .\. This domain, is an internally duplicated structure with a novel fold . It comprises two similar units that are arranged such that the two -helices pack together in the centre, crossing at an angle of 34 degrees, sandwiched between the two three-stranded, antiparallel beta-sheets. The overall domain is thus constructed as a beta-alpha-beta three-layer sandwich .. +PF08503 DapD_N;
Tetrahydrodipicolinate succinyltransferase N-terminal. Pfam-B_4065 (release 18.0). This domain is found at the N-terminus of tetrahydrodipicolinate N-succinyltransferase (DapH) which catalyses the acylation of L-2-amino-6-oxopimelate to 2-N-succinyl-6-oxopimelate in the meso-diaminopimelate/lysine biosynthetic pathway of bacteria, blue-green algae, and plants . The N-terminal domain as defined here contains three alpha-helices and two twisted hairpin loops .. +PF08504 Runx inhibition domain
Pfam-B_4085 (release 18.0). This domain lies to the C-terminus of Runx-related transcription factors and homologous proteins (AML, CBF-alpha, PEBP2). Its function might be to interact with functional cofactors .. +PF08505 DSL1;
Mitochondrial Myo2 receptor-related protein. Myo2p, a class V myosin, is essential for mitochondrial distribution, class V being vital for organelle distribution in S. cerevisiae. It is the myosin essential for mitochondrial distribution. The established mechanism for distribution of cellular components by class V myosins is that they interact with the cargo at the C-terminal tail domain and transport it along the actin cytoskeleton using the N-terminal motor domain. Cargo-specific myosin receptors act as the link between the myosin tail and cargo. Myo2 binds with MMR1 (mitochondrial Myo2p receptor-related 1), the receptor on cargo, via the C-terminal domain.. +PF08506 Cse1
Pfam-B_9217 (release 17.0). This domain is present in Cse1 nuclear export receptor proteins. Cse1 mediates the nuclear export of importin alpha. This domain contains HEAT repeats .. +PF08507 COPI associated protein
Proteins in this family colocalise with COPI vesicle coat proteins .. +PF08508 Fungal domain of unknown function (DUF1746)
This is a fungal domain of unknown function.. +PF08509 Adenylate cyclase G-alpha binding domain
This fungal domain is found in adenylate cyclase and interacts with the alpha subunit of heterotrimeric G proteins .. +PF08510 PIG-P
PIG-P (phosphatidylinositol N-acetylglucosaminyltransferase subunit P) is an enzyme involved in GPI anchor biosynthesis .. +PF08511 COQ9
COQ9 is an enzyme that is required for the biosynthesis of coenzyme Q . It may either catalyse a reaction in the coenzyme Q biosynthetic pathway or have a regulatory role.. +PF08512 DUF1747;
Histone chaperone Rttp106-like. This family includes Rttp106, a histone chaperone involved in heterochromatin-mediated silencing . This domain belongs to the Pleckstrin homology domain superfamily.. +PF08513 LisH
Pfam-B_8344 (release 17.0). The LisH (lis homology) domain mediates protein dimerisation and tetramerisation. The LisH domain is found in Sif2, a component of the Set3 complex which is responsible for repressing meiotic genes. It has been shown that the LisH domain helps mediate interaction with components of the Set3 complex .. +PF08514 STAG domain
Pfam-B_4766 (release 17.0). STAG domain proteins are subunits of cohesin complex - a protein complex required for sister chromatid cohesion in eukaryotes. The STAG domain is present in Schizosaccharomyces pombe mitotic cohesin Psc3, and the meiosis specific cohesin Rec11. Many organisms express a meiosis-specific STAG protein, for example, mice and humans have a meiosis specific variant called STAG3, although budding yeast does not have a meiosis specific version .. +PF08515 Transforming growth factor beta type I GS-motif
Pfam-B_630 (release 18.0). This motif is found in the transforming growth factor beta (TGF-beta) type I which regulates cell growth and differentiation. The name of the GS motif comes from its highly conserved GSGSGLP signature in the cytoplasmic juxtamembrane region immediately preceding the protein's kinase domain. Point mutations in the GS motif modify the signaling ability of the type I receptor .. +PF08516 ADAM cysteine-rich
Pfam-B_197 (release 18.0). ADAMs are membrane-anchored proteases that proteolytically modify cell surface and extracellular matrix (ECM) in order to alter cell behaviour. It has been shown that the cysteine-rich domain of ADAM13 regulates the protein's metalloprotease activity .. +PF08517 Ataxin-1 and HBP1 module (AXH)
Pfam-B_5484 (release 18.0). AXH is a protein-protein and RNA binding motif found in Ataxin-1 (ATX1). ATX1 is responsible for the autosomal-dominant neurodegenerative disorder Spinocerebellar ataxia type-1 (SCA1) in humans. The AXH module has also been identified in the apparently unrelated transcription factor HBP1 which is thought to be involved in the architectural regulation of chromatin and in specific gene expression .. +PF08518 Spa2 homology domain (SHD) of GIT
Pfam-B_13873 (release 18.0). GIT proteins are signaling integrators with GTPase-activating function which may be involved in the organisation of the cytoskeletal matrix assembled at active zones (CAZ). The function of the CAZ might be to define sites of neurotransmitter release. Mutations in the Spa2 homology domain (SHD) domain of GIT1 described here interfere with the association of GIT1 with Piccolo, beta-PIX, and focal adhesion kinase .. +PF08519 Replication factor RFC1 C terminal domain
Pfam-B_5399 (release 17.0). This is the C terminal domain of replication factor C, RFC1. RFC complexes hydrolyse ATP and load sliding clamps such as PCNA (proliferating cell nuclear antigen) onto double-stranded DNA. RFC1 is essential for RFC function in vivo .. +PF08520 Fungal protein of unknown function (DUF1748)
This is a family of fungal proteins of unknown function.. +PF08521 Two-component sensor kinase N-terminal
Pfam-B_4164 (release 18.0). This domain is found in bacterial two-component sensor kinases towards the N-terminus.. +PF08522 Domain of unknown function (DUF1735)
Pfam-B_2199 (release 18.0). This domain of unknown function is found in a number of bacterial proteins including acylhydrolases.. +PF08523 Multiprotein bridging factor 1
Pfam-B_4141 (release 18.0). This domain is found in the multiprotein bridging factor 1 (MBF1) which forms a heterodimer with MBF2. It has been shown to make direct contact with the TATA-box binding protein (TBP) and interacts with Ftz-F1, stabilising the Ftz-F1-DNA complex . It is also found in the endothelial differentiation-related factor (EDF-1). Human EDF-1 is involved in the repression of endothelial differentiation, interacts with CaM and is phosphorylated by PKC . The domain is found in a wide range of eukaryotic proteins including metazoans, fungi and plants.\. A helix-turn-helix motif (Pfam:PF01381) is found to its C-terminus.. +PF08524 rRNA processing
This is a family of proteins that are involved in rRNA processing . In a localisation study they were found to localise to the nucleus and nucleolus . The family also includes other metazoa members from plants to mammals where the protein has been named BR22 and is associated with TTF-1, thyroid transcription factor 1 . In the lungs, the family binds TTF-1 to form a complex which influences the expression of the key lung surfactant protein-B (SP-B) and -C (SP-C), the small hydrophobic surfactant proteins that maintain surface tension in alveoli .. +PF08525 Opacity-associated protein A N-terminal motif
This family includes the Haemophilus influenzae opacity-associated protein. This protein is required for efficient nasopharyngeal mucosal colonisation, and its expression is associated with a distinctive transparent colony phenotype. OapA is thought to be a secreted protein, and its expression exhibits high-frequency phase variation [1,2]. This motif occurs at the N-terminus of these proteins. It contains a conserved histidine followed by a run of hydrophobic residues.. +PF08526 Protein-arginine deiminase (PAD) N-terminal domain
Pfam-B_2195 (release 6.4). This family represents the N-terminal non-catalytic domain of protein-arginine deiminase. This domain has a cupredoxin-like fold.. +PF08527 Protein-arginine deiminase (PAD) middle domain
Pfam-B_2195 (release 6.4). This family represents the central non-catalytic domain of protein-arginine deiminase. This domain has an immunoglobulin-like fold.. +PF08528 Nrm1;
In metazoans, cyclin-dependent kinase(CDK) dependent phosphorylation of the retinoblastoma Tudor suppressor protein (Rb) alleviates repression of E2F and thereby activates G1/S transcription. The cell size regulator Whi5 appears to be an analogous target of CDK activity during G1 phase .. +PF08529 NusA N-terminal domain
Pfam-B_407 (Release 18.0). This domain represents the RNA polymerase binding domain of NusA.. +PF08530 X-Pro dipeptidyl-peptidase C-terminal non-catalytic domain
This domain contains a beta sandwich domain.. +PF08531 Alpha-L-rhamnosidase N-terminal domain
Pfam-B_8527 (release 8.0). This family consists of bacterial rhamnosidase A and B enzymes. This domain is probably involved in substrate recognition.. +PF08532 Beta-galactosidase trimerisation domain
Pfam-B_2131 (release 5.4). This is non catalytic domain B of beta-galactosidase enzymes belong to the glycosyl hydrolase 42 family. This domain is related to glutamine amidotransferase enzymes, but the catalytic residues are replaced by non functional amino acids. This domain is involved in trimerisation .. +PF08533 Beta-galactosidase C-terminal domain
Pfam-B_2131 (release 5.4). This domain is found at the C-terminus of beta-galactosidase enzymes that belong to the glycosyl hydrolase 42 family .. +PF08535 KorB domain
Pfam-B_20369 (release 10.0). This family consists of several KorB transcriptional repressor proteins. The korB gene is a major regulatory element in the replication and maintenance of broad host-range plasmid RK2. It negatively controls the replication gene trfA, the host-lethal determinants kilA and kilB, and the korA-korB operon . This domain includes the DNA-binding HTH motif .. +PF08536 Plant_TF;
Whirly transcription factor. Mistry J, Pachon DMR. This family contains the plant whirly transcription factors.. +PF08537 Fungal Nap binding protein NBP1
NBP1 is a nuclear protein which has been shown in Saccharomyces cerevisiae to be essential for the G2/M transition of the cell cycle.. +PF08538 Protein of unknown function (DUF1749)
This is a plant and fungal family of unknown function. This family contains many hypothetical proteins.. +PF08539 HbrB-like
HbrB is involved hyphal growth and polarity .. +PF08540 Hydroxymethylglutaryl-coenzyme A synthase C terminal
+PF08541 ACP_C;
3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III C terminal . Pfam-B_67 (release 18.0). This domain is found on 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III EC:2.3.1.41, the enzyme responsible for initiating the chain of reactions of the fatty acid synthase in plants and bacteria.. +PF08542 Replication factor C C-terminal domain
Pfam-B_930 (release 18.0). This is the C-terminal domain of RFC (replication factor-C) protein of the clamp loader complex which binds to the DNA sliding clamp (proliferating cell nuclear antigen, PCNA). The five modules of RFC assemble into a right-handed spiral, which results in only three of the five RFC subunits (RFC-A, RFC-B and RFC-C) making contact with PCNA, leaving a wedge-shaped gap between RFC-E and the PCNA clamp-loader complex. The C-terminal is vital for the correct orientation of RFC-E with respect to RFC-A .. +PF08543 Phosphomethylpyrimidine kinase
Pfam-B_787 (release 18.0). This enzyme EC:2.7.4.7 is part of the Thiamine pyrophosphate (TPP) synthesis pathway, TPP is an essential cofactor for many enzymes .. +PF08544 GHMP kinases C terminal
This family includes homoserine kinases, galactokinases and mevalonate kinases.. +PF08545 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III
Pfam-B_135 (release 18.0). This domain is found on 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III EC:2.3.1.180, the enzyme responsible for initiating the chain of reactions of the fatty acid synthase in plants and bacteria.. +PF08546 Ketopantoate reductase PanE/ApbA C terminal
Pfam-B_396 (release 18.0). This is a family of 2-dehydropantoate 2-reductases also known as ketopantoate reductases, EC:1.1.1.169. The reaction catalysed by this enzyme is: (R)-pantoate + NADP(+) <=> 2-dehydropantoate + NADPH. AbpA catalyses the NADPH reduction of ketopantoic acid to pantoic acid in the alternative pyrimidine biosynthetic (APB) pathway . ApbA and PanE are allelic . ApbA, the ketopantoate reductase enzyme is required for the synthesis of thiamine via the APB biosynthetic pathway .. +PF08547 Complex I intermediate-associated protein 30 (CIA30)
This protein is associated with mitochondrial Complex I intermediate-associated protein 30 (CIA30) in human and mouse. The family is also present in Schizosaccharomyces pombe which does not contain the NADH dehydrogenase component of complex I, or many of the other essential subunits.\. This means it is possible that this family of protein may not be directly involved in oxidative phosphorylation .. +PF08548 Peptidase M10 serralysin C terminal
Mistry J, Rawlings ND. Serralysins are peptidases related to mammalian matrix metallopeptidases (MMPs).\. The peptidase unit is found at the N terminal while this domain at the C terminal forms a corkscrew and is thought to be important for secretion of the protein through the bacterial cell wall. This domain contains the calcium ion binding domain Pfam:PF00353.. +PF08549 DUF1750; SWI-SNF_ssr4;
Fungal domain of unknown function (DUF1750). This is a fungal domain of unknown function.. +PF08550 Fungal protein of unknown function (DUF1752)
This is a family of fungal proteins of unknown function. This short section domain is bounded by two highly conserved tryptophans. The family contains Swiss:P34072 that is thought to be a negative regulator of RAS-cAMP pathway in S.cerevisiae. the Sch.pombe member is a GAF1 transcription factor Swiss:Q10280 that is also associated with the zinc finger family GATA Pfam:PF00320.. +PF08551 Eukaryotic integral membrane protein (DUF1751)
Pfam-B_13217 (release 18.0). This domain is found in eukaryotic integral membrane proteins. Swiss:Q12239, a Saccharomyces cerervisiae protein, has been shown to localise COP II vesicles .. +PF08552 DUF1753;
Inositolphosphorylceramide synthase subunit Kei1. Kei1 is a subunit of Saccharomyces cerevisiae inositol phosphorylceramide (IPC) synthase . It is localised to the Golgi and is cleaved by the late Golgi processing endopeptidase Kex2 . Kei1 is essential for both the activity and the Golgi localization of IPC synthase .. +PF08553 VID27 cytoplasmic protein
This is a family of fungal and plant proteins and contains many hypothetical proteins. VID27 is a cytoplasmic protein that plays a potential role in vacuolar protein degradation.. +PF08555 Eukaryotic family of unknown function (DUF1754)
Daub J, Mistry J, Wood V. Pfam-B_10536 (release 18.0). This is a eukaryotic protein family of unknown function.. +PF08557 Sphingolipid Delta4-desaturase (DES)
Daub J, Mistry J, Wood V. Pfam-B_9504 (release 18.0). Sphingolipids are important membrane signalling molecules involved in many different cellular functions in eukaryotes. Sphingolipid delta 4-desaturase catalyses the formation of (E)-sphing-4-enine . Some proteins in this family have bifunctional delta 4-desaturase/C-4-hydroxylase activity. Delta 4-desaturated sphingolipids may play a role in early signalling required for entry into meiotic and spermatid differentiation pathways during Drosophila spermatogenesis . This small domain associates with FA_desaturase Pfam:PF00487 and appears to be specific to sphingolipid delta 4-desaturase.. +PF08558 Telomere repeat binding factor (TRF)
Pfam-B_8956 (release 18.0) . Telomere repeat binding factor (TRF) family proteins are important for the regulation of telomere stability. The two related human TRF proteins hTRF1 and hTRF2 form homodimers and bind directly to telomeric TTAGGG repeats via the myb DNA binding domain Pfam:PF00249 at the carboxy terminus . TRF1 is implicated in telomere length regulation and TRF2 in telomere protection . Other telomere complex associated proteins are recruited through their interaction with either TRF1 or TRF2. The fission yeast protein Taz1p (telomere-associated in Schizosaccharomyces pombe) has similarity to both hTRF1 and hTRF2 and may perform the dual functions of TRF1 and TRF2 at fission yeast telomeres . This domain is composed of multiple alpha helices arranged in a solenoid conformation similar to TPR repeats. The fungal members have now also been found to carry two double strand telomeric repeat binding factors .. +PF08559 Cut8;
Cut8 six-helix bundle. Daub J, Wood V, Eberhardt R. In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome . Cut8 comprises three functional domains. An N-terminal lysine-rich segment (Pfam:PF14482) which binds to the proteasome when ubiquitinated, a central dimerisation domain (Pfam:PF14483) and a C-terminal six-helix bundle (this entry), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding . Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 . Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome . In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 . Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum .. +PF08560 Protein of unknown function (DUF1757)
This family of proteins are about 150 amino acids in length and have no known function.. +PF08561 Mitochondrial ribosomal protein L37
This family includes yeast MRPL37 a mitochondrial ribosomal protein .. +PF08562 Crisp
This domain is found on Crisp proteins which contain Pfam:PF00188 and has been termed the Crisp domain. It is found in the mammalian reproductive tract and the venom of reptiles, and has been shown to regulate ryanodine receptor Ca2+ signalling . It contains 10 conserved cysteines which are all involved in disulphide bonds and is structurally related to the ion channel inhibitor toxins BgK and ShK .. +PF08563 P53 transactivation motif
Pfam-B_3515 (release 19.0). The binding of the p53 transactivation domain by regulatory proteins regulates p53 transcription activation. This motif is comprised of a single amphipathic alpha helix and contains a highly conserved sequence [1-2].. +PF08564 Cdc37;
Cdc37 C terminal domain. Pfam-B_3345 (release 6.5). Cdc37 is a protein required for the activity of numerous eukaryotic protein kinases. This domains corresponds to the C terminal domain whose function is unclear. It is found C terminal to the Hsp90 chaperone (Heat shocked protein 90) binding domain Pfam:PF08565 and the N terminal kinase binding domain of Cdc37 Pfam:PF03234 .. +PF08565 Cdc37;
Cdc37 Hsp90 binding domain. Pfam-B_3345 (release 6.5). Cdc37 is a molecular chaperone required for the activity of numerous eukaryotic protein kinases. This domains corresponds to the Hsp90 chaperone (Heat shocked protein 90) binding domain of Cdc37 . It is found between the N terminal Cdc37 domain Pfam:PF03234, which is predominantly involved in kinase binding, and the C terminal domain of Cdc37 Pfam:PF08564 whose function is unclear.. +PF08566 Mitochondrial import protein Pam17
The presequence translocase-associated motor (PAM) drives the completion of preprotein translocation into the mitochondrial matrix. The Pam17 subunit is required for formation of a stable complex between cochaperones Pam16 and Pam18 and promotes the association of Pam16-Pam18 with the presequence translocase . Mitochondria lacking Pam17 are selectively impaired in the import of matrix proteins .. +PF08567 TFIIH p62 subunit, N-terminal domain
Pfam-B_31040 (release 19.0). The N-terminal domain of the TFIIH basal transcription factor complex p62 subunit (BTF2-p62) forms an interaction with the 3' endonuclease XPG, which is essential for activity. The 3' endonuclease XPG is a major component of the nucleotide excision repair machinery. The structure of the N-terminal domain reveals that it adopts a pleckstrin homology (PH) fold [1,2].. +PF08568 DUF1760;
Uncharacterised protein family, YAP/Alf4/glomulin. Mistry J, Wood V, Lonsdale D. This entry contains a number of protein families with apparently unrelated functions. These include the YAP binding proteins of yeasts. These are stress response and redox homeostasis proteins, induced by hydrogen peroxide or induced in response to alkylating agent methyl methanesulphonate (MMS) [1,2]. The family includes Aberrant root formation protein 4 (Alf4) of Arabidopsis thaliana (Mouse-ear cress), which is required for the initiation of lateral roots independent from auxin signalling. It may also function in maintaining the pericycle in the mitotically competent state needed for lateral root formation . The family includes glomulin (FAP68), which is essential for normal development of the vasculature and may represent a naturally occurring ligand of the immunophilins FKBP59 and FKBP12 [4,5].. +PF08569 Mo25-like
Pfam-B_5502 (release 18.0). Mo25-like proteins are involved in both polarised growth and cytokinesis. In fission yeast Mo25 is localised alternately to the spindle pole body and to the site cell division in a cell cycle dependent manner [1,2].. +PF08570 Protein of unknown function (DUF1761)
Pfam-B_85869 (release 19.0). Family of conserved fungal and bacterial membrane proteins with unknown function.. +PF08571 Yos1-like
Pfam-B_23321 (release 19.0). In yeast, Yos1 is a subunit of the Yip1p-Yif1p complex and is required for transport between the endoplasmic reticulum and the Golgi complex. Yos1 appears to be conserved in eukaryotes .. +PF08572 pre-mRNA processing factor 3 (PRP3)
Pfam-B_7232 (release 18.0). Pre-mRNA processing factor 3 (PRP3) is a U4/U6-associated splicing factor. The human PRP3 has been implicated in autosomal retinitis pigmentosa .. +PF08573 DNA repair protein endonuclease SAE2/CtIP C-terminus
SAE2 is a protein involved in repairing meiotic and mitotic double-strand breaks in DNA. It has been shown to negatively regulate DNA damage checkpoint signalling . SAE2 is homologous to the CtIP proteins in mammals and an homologous protein in plants. Crucial sequence motifs that are highly conserved are the CxxC and the RHR motifs in this C-terminal part of the protein . It is now known to be an endonuclease. In budding yeast, genetic evidence suggests that the SAE2 protein is essential for the processing of hairpin DNA intermediates and meiotic double-strand breaks by Mre11/Rad50 complexes. SAE2 binds DNA and exhibits endonuclease activity on single-stranded DNA independently of Mre11/Rad50 complexes, but hairpin DNA structures are cleaved cooperatively in the presence of Mre11/Rad50 or Mre11/Rad50/Xrs2. Hairpin structures are not processed at the tip by SAE2 but rather at single-stranded DNA regions adjacent to the hairpin. The catalytic activities of SAE2 are important for its biological functions .. +PF08574 Protein of unknown function (DUF1762)
This is a family of proteins of unknown function.\. Swiss:Q07532 is known to interact with RNA polymerase II and deletion of this protein results in hypersensitivity to the K1 killer toxin .. +PF08576 Eukaryotic protein of unknown function (DUF1764)
This is a family of eukaryotic proteins of unknown function. This family contains many hypothetical proteins.. +PF08577 PI31_Prot_Reg;
PI31 proteasome regulator . PI31 is a cellular regulator of proteasome formation and of proteasome-mediated antigen processing .. +PF08578 Protein of unknown function (DUF1765)
This region represents a conserved region found in hypothetical proteins from fungi, mycetozoa and entamoebidae.. +PF08579 Mitochondrial ribonuclease P subunit (RPM2)
Ribonuclease P (RNase P) generates mature tRNA molecules by cleaving their 5' ends. RPM2 is a protein subunit of the yeast mitochondrial RNase P. It has the ability to act as transcriptional activator in the nucleus where it plays a role in defining the steady-state levels of mRNAs for some nucleus-encoded mitochondrial components .. +PF08580 Yeast cortical protein KAR9
The KAR9 protein in Saccharomyces cerevisiae is a cytoskeletal protein required for karyogamy, correct positioning of the mitotic spindle and for orientation of cytoplasmic microtubules . KAR9 localises at the shmoo tip in mating cells and at the tip of the growing bud in anaphase .. +PF08581 Tup N-terminal
Pfam-B_9595 (release 19.0). The N-terminal domain of the Tup protein has been shown to interact with the Ssn6 transcriptional co-repressor .. +PF08583 UPF0287;
Cytochrome c oxidase biogenesis protein Cmc1 like. Cmc1 is a metallo-chaperone like protein which is known to localise to the inner mitochondrial membrane in Saccharomyces cerevisiae. It is essential for full expression of cytochrome c oxidase and respiration . Cmc1 contains two Cx9C motifs and is able to bind copper(I). Cmc1 is thought to play a role in mitochondrial copper trafficking and transfer to cytochrome c oxidase .. +PF08584 Ribonuclease P 40kDa (Rpp40) subunit
The tRNA processing enzyme ribonuclease P (RNase P) consists of an RNA molecule and at least eight protein subunits. Subunits hpop1, Rpp21, Rpp29, Rpp30, Rpp38, and Rpp40 (this entry) are involved in extensive, but weak, protein-protein interactions in the holoenzyme complex .. +PF08585 Domain of unknown function (DUF1767)
Eukaryotic domain of unknown function. This domain is found to the N-terminus of the nucleic acid binding domain.. +PF08586 RSC complex, Rsc14/Ldb7 subunit
RSC is an ATP-dependent chromatin remodelling complex found in yeast. The RSC components Rsc7/Npl6 and Rsc14/Ldb7 interact physically and/or functionally with Rsc3, Rsc30, and Htl1 to form a module important for a broad range of RSC functions .. +PF08587 Ubiquitin associated domain (UBA)
Pfam-B_10238 (Release 18.0). This is a UBA (ubiquitin associated) domain . Ubiquitin is involved in intracellular proteolysis.. +PF08588 Protein of unknown function (DUF1769)
Family of fungal protein with unknown function.. +PF08589 Fungal protein of unknown function (DUF1770)
The function of this family is unknown. These proteins are rather dissimilar except for a single strongly conserved motif (PDLRFEQ).. +PF08590 Domain of unknown function (DUF1771)
Pfam-B_10757 (release 18.0). This domain is always found adjacent to Pfam:PF01713.. +PF08591 Ribonucleotide reductase inhibitor
This family includes S. pombe Spd1. Spd1p inhibits fission yeast RNR activity by interacting with the Cdc22p .. +PF08592 Domain of unknown function (DUF1772)
This domain is of unknown function.. +PF08593 DUF1773;
Meiotically up-regulated glycoproteins C-terminal. Pfam-B_26890 (release 18.0). This is the C-terminal part of some meiotically up-regulated gene products from fission yeast. The actual function is not yet known but the proteins are likely to be cell-surface glycoproteins.. +PF08594 Uncharacterised protein family (UPF0300)
Pfam-B_20198 (release 18.0). This family of proteins appear to be specific to S. pombe.. +PF08595 RXT2-like, N-terminal
The family represents the N-terminal region of RXT2-like proteins. In S. cerevisiae, RXT2 has been demonstrated to be involved in conjugation with cellular fusion (mating) and invasive growth . A high throughput localisation study has localised RXT2 to the nucleus .. +PF08596 Lethal giant larvae(Lgl) like, C-terminal
The Lethal giant larvae (Lgl) tumour suppressor family is conserved from yeast to mammals. The Lgl family functions in cell polarity, at least in part, by regulating SNARE-mediated membrane delivery events at the cell surface . The N-terminal half of Lgl members contains WD40 repeats (see Pfam:PF00400), while the C-terminal half appears specific to the family .. +PF08597 Translation initiation factor eIF3 subunit
This is a family of proteins which are subunits of the eukaryotic translation initiation factor 3 (eIF3). In yeast it is called Hcr1. The Saccharomyces cerevisiae protein Swiss:Q05775 has been shown to be required for processing of 20S pre-rRNA and binds to 18S rRNA and eIF3 subunits Rpg1p and Prt1p .. +PF08598 Sds3-like
Repression of gene transcription is mediated by histone deacetylases containing repressor-co-repressor complexes, which are recruited to promoters of target genes via interactions with sequence-specific transcription factors. The co-repressor complex contains a core of at least seven proteins .\. This family represents the conserved region found in Sds3, Dep1 and BRMS1-homologue p40 proteins.. +PF08599 Nbs1_N;
DNA damage repair protein Nbs1. This C terminal region of the DNA damage repair protein Nbs1 has been identified to be necessary for the binding of Mre11 and Tel1 .. +PF08600 Rsm1-like
Rsm1 is a protein involved in mRNA export from the nucleus . +PF08601 Transcription factor PAP1
Pfam-B_20528 (release 18.0). The transcription factor Pap1 regulates antioxidant-gene transcription in response to H2O2 . This region is cysteine rich. Alkylation of cysteine residues following treatment with a cysteine alkylating agent can mask the accessibility of the nuclear exporter Crm1, triggering nuclear accumulation and Pap1 dependent transcriptional expression .. +PF08602 Mgr1-like, i-AAA protease complex subunit
The S. cerevisiae Mgr1 protein has been shown to be required for mitochondrial viability in yeast lacking mitochondrial DNA. It is a mitochondrial inner membrane protein, which interacts with Yme1 and is a new subunit of the i-AAA protease complex .. +PF08603 CAP;
Adenylate cyclase associated (CAP) C terminal. +PF08604 Nucleoporin Nup153-like
Pfam-B_56527 (release 19.0). This family contains both the nucleoporin Nup153 from human and Nup153 from fission yeast. These have been demonstrated to be functionally equivalent .. +PF08605 Fungal Rad9-like Rad53-binding
In Saccharomyces cerevisiae the Rad9 a key adaptor protein in DNA damage checkpoint pathways. DNA damage induces Rad9 phosphorylation, and Rad53 specifically associates with this region of Rad9, when phosphorylated, via Rad53 Pfam:PF00498 domains . This region is structurally composed of a pair of TUDOR domains .. +PF08606 Prp19/Pso4-like
Pfam-B_6384 (release 18.0). This regions is found specifically in PRP19-like protein.\. The region represented by this family covers the sequence implicated in self-interaction and a coiled-coiled motif . PRP19-like proteins form an oligomer that is necessary for spliceosome assembly .. +PF08534 Redoxin
This family of redoxins includes peroxiredoxin, thioredoxin and glutaredoxin proteins.. +PF08608 Wyosine base formation
Some proteins in this family appear to be important in wyosine base formation in a subset of phenylalanine specific tRNAs. It has been proposed that they participates in converting tRNA(Phe)-m(1)G(37) to tRNA(Phe)-yW .. +PF08609 Nucleotide exchange factor Fes1
Pfam-B_36022 (release 18.0). Fes1 is a cytosolic homologue of Sls1, an ER protein which has nucleotide exchange factor activity. Fes1 in yeast has been shown to bind to the molecular chaperone Hsp70 and has adenyl-nucleotide exchange factor activity .. +PF08610 Peroxisomal membrane protein (Pex16)
Pex16 is a peripheral protein located at the matrix face of the peroxisomal membrane .. +PF08611 Fungal protein of unknown function (DUF1774)
This is a fungal family of unknown function.. +PF08612 TATA_RF;
TATA-binding related factor (TRF) of subunit 20 of Mediator complex. This family of proteins is related to TATA-binding protein (TBP). TBP is a highly conserved RNA polymerase II general transcription factor that binds to the core promoter and initiates assembly of the preinitiation complex. Human TRF has been shown to associate with an RNA polymerase II-SRB complex . This Med20 subunit of Mediator is found in the non-essential part of the head .. +PF08613 Cyclin
Pfam-B_6792 (release 18.0). This family includes many different cyclin proteins. Members include the G1/S-specific cyclin pas1 , and the phosphate system cyclin PHO80/PHO85 .. +PF08614 Autophagy protein 16 (ATG16)
Autophagy is a ubiquitous intracellular degradation system for eukaryotic cells.\. During autophagy, cytoplasmic components are enclosed in autophagosomes and delivered to lysosomes/vacuoles. ATG16 (also known as Apg16) has been shown to be bind to Apg5 and is required for the function of the Apg12p-Apg5p conjugate in the yeast autophagy pathway .. +PF08615 RNase_H1_sml;
Ribonuclease H2 non-catalytic subunit (Ylr154p-like). Pfam-B_36578 (release 18.0). This entry represents the non-catalytic subunit of RNase H2, which in S. cerevisiae is Ylr154p/Rnh203p Swiss:Q12338 (. Whereas bacterial and archaeal RNases H2 are active as single polypeptides, the Saccharomyces cerevisiae homologue, Rnh2Ap, when expressed in Escherichia coli, fails to produce an active RNase H2. For RNase H2 activity three proteins are required [Rnh2Ap (Rnh201p), Ydr279p (Rnh202p) and Ylr154p (Rnh203p)]. Deletion of any one of the proteins or mutations in the catalytic site in Rnh2A leads to loss of RNase H2 activity . RNase H2 ia an endonuclease that specifically degrades the RNA of RNA:DNA hybrids. It participates in DNA replication, possibly by mediating the removal of lagging-strand Okazaki fragment RNA primers during DNA replication.. +PF08616 SPB_interacting;
Stabilization of polarity axis. Swiss:Q99222 has been shown to interact with the outer plaque of the spindle pole body . In Aspergillus nidulans the protein member is necessary for stabilization of the polarity axes during septation . and in S. cerevisiae it functions as a polarisation-specific docking factor .. +PF08617 Kinase binding protein CGI-121
CGI-121 has been shown to bind to the p53-related protein kinase (PRPK) . PRPK is a novel protein kinase which binds to and induces phosphorylation of the tumour suppressor protein p53. CGI-121 is part of a conserved protein complex, KEOPS. The KEOPS complex is involved in telomere uncapping and telomere elongation . Interestingly this family also include archaeal homologues, formerly in the DUF509 family. A structure for these proteins has been solved by structural genomics.. +PF08618 Transcription factor Opi1
Opi1 is a leucine zipper containing yeast transcription factor that negatively regulates phospholipid biosynthesis . It represses the expression of several UAS(INO) cis acting element containing genes and its activity is mediated by phosphorylations catalysed by protein kinase A, protein kinase C and casein kinase II .. +PF08619 Alkali metal cation/H+ antiporter Nha1 C terminus
The C terminus of the plasma membrane Nha1 antiporter plays an important role in the immediate cell response to hypo-osmotic shock which prevents an execessive loss of ions and water . This domain is found with Pfam:PF00999.. +PF08620 RPAP1-like, C-terminal
Inhibition of RPAP1 synthesis in Saccharomyces cerevisiae results in changes in global gene expression that are similar to those caused by the loss of the RNAPII subunit Rpb11 .\. This entry represents the C-terminal region that contains the motif GLHHH. This region is conserved from yeast to humans.. +PF08621 RPAP1-like, N-terminal
Inhibition of RPAP1 synthesis in Saccharomyces cerevisiae results in changes in global gene expression that are similar to those caused by the loss of the RNAPII subunit Rpb11 .\. This entry represents the N-terminal region of RPAP-1 that is conserved from yeast to humans.. +PF08622 Svf1-like
Family of proteins that are involved in survival during oxidative stress .. +PF08623 TATA-binding protein interacting (TIP20)
TIP120 (also known as cullin-associated and neddylation-dissociated protein 1) is a TATA binding protein interacting protein that enhances transcription .. +PF08624 Chromatin remodelling complex Rsc7/Swp82 subunit
Pfam-B_56720 (release 18.0). This family has been identified as a subunit of chromatin remodelling complexes. Saccharomyces cerevisiae Swiss:P32832 and its paralogue Swiss:P43554 have been identified as subunits of the RSC chromatin remodelling complex, and SWI/SNF chromatin remodelling complex respectively .. +PF08625 Utp13 specific WD40 associated domain
Pfam-B_8625 (release 19.0). Utp13 is a component of the five protein Pwp2 complex that forms part of a stable particle subunit independent of the U3 small nucleolar ribonucleoprotein that is essential for the initial assembly steps of the 90S pre-ribosome . Pwp2 is capable of interacting directly with the 35 S pre-rRNA 5' end .. +PF08626 Trs120;
Transport protein Trs120 or TRAPPC9, TRAPP II complex subunit. Pfam-B_15686 (release 19.0). This region is found at the N terminal of Saccharomyces cerevisiae Trs120 protein (Swiss:Q04183). Trs120 is a subunit of the multiprotein complex TRAPP (transport particle protein) which functions in ER to Golgi traffic . Trs120 is specific to the larger TRAPP complex, TRAPP II, along with Trs65p and Trs130p(TRAPPC10). It is suggested that Trs120p is required for the stability of the Trs130p subunit, suggesting that these two proteins might interact in some way . It is likely that there is a complex function for TRAPP II in multiple pathways .. +PF08627 CRT-like
Pfam-B_67420 (release 18.0). This region is found in proteins related to Plasmodium falciparum chloroquine resistance transporter (CRT).. +PF08628 Sorting nexin C terminal
Pfam-B_5897 (release 19.0). This region is found a the C terminal of proteins belonging to the sorting nexin family. It is found on proteins which also contain Pfam:PF00787.. +PF08629 PDE8 phosphodiesterase
Pfam-B_72889 (release 17.0). This region is found in members of the PDE8 phosphodiesterase family . It is found with Pfam:PF00233.. +PF08630 Dfp1/Him1, central region
Pfam-B_28140 (release 19.0). This is the middle regions described by Ogino et al . This region, together with the C-terminal zinc finger (Pfam:PF07535) is essential for the mitotic and kinase activation functions of Dfp1/Him1 .. +PF08631 Meiosis protein SPO22/ZIP4 like
SPO22/ZIP4 in yeast is a meiosis specific protein involved in sporulation . It has been shown to regulate crossover distribution by promoting synaptonemal complex formation .. +PF08632 Zds1_C;
Activator of mitotic machinery Cdc14 phosphatase activation C-term. Pfam-B_44907 (release 19.0). This region of the Zds1 protein is critical for sporulation and has also been shown to suppress the calcium sensitivity of Zds1 deletions . The C-terminal motif is common to both Zds1 and Zds2 proteins, both of which are putative interactors of Cdc55 and are required for the completion of mitotic exit and cytokinesis. They both contribute to timely Cdc14 activation during mitotic exit and are required downstream of separase to facilitate nucleolar Cdc14 release .. +PF08633 Rox3 mediator complex subunit
The mediator complex is part of the RNA polymerase II holoenzyme. Rox3 is a subunit of the mediator complex.. +PF08634 Mitochondrial protein Pet127
Pet127 has been implicated in mitochondrial RNA stability and/or processing and is localised to the mitochondrial membrane . The Pet127 family is part of the PD-(D/E)XK nuclease superfamily including a full set of active site residues.. +PF08635 Putative oxidoreductase C terminal
This is the C terminal of a family of putative oxidoreductases.. +PF08636 ER protein Pkr1
Pkr1 has been identified as an ER protein of unknown function.. +PF08637 ATP synthase regulation protein NCA2
Pfam-B_15813 (release 19.0). NCA2 has been shown to be required for the regulation of ATP synthase subunits Atp6p and Atp8p in Saccharomyces cerevisiae .. +PF08638 MED14;
Mediator complex subunit MED14. Pfam-B_13303 (release 19.0). Saccharomyces cerevisiae RGR1 mediator complex subunit affects chromatin structure, transcriptional regulation of diverse genes and sporulation, required for glucose repression, HO repression, RME1 repression and sporulation . This subunit is also found in higher eukaryotes and Med14 is the agreed unified nomenclature for this subunit. Med14 is found in the tail region of Mediator .. +PF08639 DNA replication regulator SLD3
The SLD3 DNA replication regulator is required for loading and maintenance of Cdc45 on chromatin during DNA replication .. +PF08640 U3 small nucleolar RNA-associated protein 6
Pfam-B_8720 (release 19.0). This is a family of U3 nucleolar RNA-associated proteins which are involved in nucleolar processing of pre-18S ribosomal RNA .. +PF08641 Kinetochore protein Mis14 like
Mis14 is a kinetochore protein which is known to be recruited to kinetochores independently of CENP-A .. +PF08642 Histone deacetylation protein Rxt3
Rxt3 has been shown in yeast to be required for histone deacetylation .. +PF08643 Fungal family of unknown function (DUF1776)
Mistry J, Groocock L. This is a fungal family of unknown function. One of the proteins in this family Swiss:P32792 has been localised to the mitochondria .. +PF08644 FACT complex subunit (SPT16/CDC68)
Pfam-B_4478 (release 19.0). Proteins in this family are subunits the FACT complex. The FACT complex plays a role in transcription initiation and promotes binding of TATA-binding protein (TBP) to a TATA box in chromatin .. +PF08645 Polynucleotide kinase 3 phosphatase
Pfam-B_6220 (release 19.0). Polynucleotide kinase 3 phosphatases play a role in the repair of single breaks in DNA induced by DNA-damaging agents such as gamma radiation and camptothecin . . +PF08646 Replication factor-A C terminal domain
Pfam-B_3457 (release 19.0). This domain is found at the C terminal of replication factor A. Replication factor A (RPA) binds single-stranded DNA and is involved in replication, repair, and recombination of DNA .. +PF08647 BRE1 E3 ubiquitin ligase
Pfam-B_35727 (release 19.0). BRE1 is an E3 ubiquitin ligase that has been shown to act as a transcriptional activator through direct activator interactions .. +PF08648 Protein of unknown function (DUF1777)
This is a family of eukaryotic proteins of unknown function. Some of the proteins in this family are putative nucleic acid binding proteins.. +PF08649 DASH complex subunit Dad1
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules . Throughout the cell cycle Dad1 remains bound to kinetochores throughout the cell cycle and its association is dependent on the Mis6 and Mal2 .. +PF08650 DASH complex subunit Dad4
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3].. +PF08651 DASH complex subunit Duo1
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3].. +PF08652 RAI1 like PD-(D/E)XK nuclease
Pfam-B_13095 (release 19.0). +PF08653 DASH complex subunit Dam1
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules . . +PF08654 DASH complex subunit Dad2
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3].. +PF08655 DASH complex subunit Ask1
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules . . +PF08656 DASH complex subunit Dad3
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3].. +PF08657 DASH complex subunit Spc34
The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis . In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules .. +PF08658 Rad54 N terminal
Pfam-B_26946 (release 19.0). This is the N terminal of the DNA repair protein Rad54 .. +PF08659 KR domain
This enzymatic domain is part of bacterial polyketide synthases and catalyses the first step in the reductive modification of the beta-carbonyl centres in the growing polyketide chain. It uses NADPH to reduce the keto group to a hydroxy group .. +PF08660 Oligosaccharide biosynthesis protein Alg14 like
Pfam-B_12992 (release 19.0). Alg14 is involved dolichol-linked oligosaccharide biosynthesis and anchors the catalytic subunit Alg13 to the ER membrane .. +PF08661 Replication factor A protein 3
Replication factor A is involved in eukaryotic DNA replication, recombination and repair.. +PF08662 Eukaryotic translation initiation factor eIF2A
Pfam-B_7957 (release 19.0). This is a family of eukaryotic translation initiation factors.. +PF08663 HalX domain
HalX is a domain of unknown function, previously (mis)annotated as HoxA-like transcriptional regulator.. +PF08664 YcbB domain
YcbB is a DNA-binding domain .. +PF08665 PglZ domain
This family is a member of the Alkaline phosphatase clan.. +PF08666 SAF domain
This domain family includes a range of different proteins. Such as antifreeze proteins and flagellar FlgA proteins, and CpaB pilus proteins.. +PF08667 BetR domain
This family includes an N-terminal helix-turn-helix domain.. +PF08668 HDOD domain
+PF08669 Glycine cleavage T-protein C-terminal barrel domain
Pfam-B_933 (release 4.0). This is a family of glycine cleavage T-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. The T-protein is an aminomethyl transferase. . +PF08670 MEKHLA domain
The MEKHLA domain shares similarity with the PAS domain and is found in the 3' end of plant HD-ZIP III homeobox genes, and bacterial proteins.. +PF08671 Anti-repressor SinI
SinR is a pleiotropic regulator of several late growth processes. It is a tetrameric DNA binding protein whose activity is down-regulated thorough the formation of a SinI:SinR protein complex. When complexed with SinI, the SinR tetramer is disrupted such that is no longer able to bind DNA.. +PF08672 Anaphase promoting complex (APC) subunit 2
The anaphase promoting complex or cyclosome (APC2) is an E3 ubiquitin ligase which is part of the SCF family of ubiquitin ligases. Ubiquitin ligases catalyse the transfer of ubiquitin from the ubiquitin conjugating enzyme (E2), to the substrate protein.. +PF08673 Phosphoserine phosphatase RsbU, N-terminal domain
RsbU is a phosphoserine phosphatase which acts as a positive regulator of the general stress-response factor of gram positive organisms, sigma-B. The phosphatase activity of RsbU is stimulated by association with the RsbT kinase. Deletions in the N terminal domain are deleterious to the activity of RsbU .. +PF08674 Acetylcholinesterase tetramerisation domain
The acetylcholinesterase tetramerisation domain is found at the C terminus and forms a left handed superhelix.. +PF08675 RNA binding domain
This domain corresponds to the RNA binding domain of Poly(A)-specific ribonuclease (PARN).. +PF08676 MutL C terminal dimerisation domain
MutL and MutS are key components of the DNA repair machinery that corrects replication errors . MutS recognises mispaired or unpaired bases in a DNA duplex and in the presence of ATP, recruits MutL to form a DNA signaling complex for repair. The N terminal region of MutL contains the ATPase domain and the C terminal is involved in dimerisation .. +PF08677 GP11 baseplate wedge protein
GP11 is a viral structural protein that connects short tail fibres to the baseplate. The tail region is responsible for attachment to the host bacteria during infection.. +PF08678 Rsbr N terminal
Rsbr is a regulator of the RNA polymerase sigma factor subunit sigma(B).\. The structure of the N terminal domain belongs to the globin fold superfamily .. +PF08679 Dissimilatory sulfite reductase D (DsrD)
The structure of the DsrD protein has shown it to contain a winged-helix motif similar to those found in DNA binding proteins . The structure suggests a possible role for DsrD in transcription of translation of genes which catalyse dissimilatory sulfite reduction.. +PF08680 Protein of unknown function (DUF1779)
This is a family of uncharacterised proteins. The structure of the ywmB protein from Bacillus subtilis has shown it to adopt an alpha/beta fold.. +PF08681 Protein of unknown function (DUF1778)
This is a family of uncharacterised proteins. The structure of one of the hypothetical proteins in this family has been solved and it forms a helix structure which may form interactions with DNA.. +PF08682 Protein of unknown function (DUF1780)
This is a family of uncharacterised proteins. The structure of a hypothetical protein from Pseudomonas aeruginosa has shown it to adopt an alpha/beta fold.. +PF08683 DUF1781; CKK;
Microtubule-binding calmodulin-regulated spectrin-associated. This is the C-terminal domain of a family of eumetazoan proteins collectively defined as calmodulin-regulated spectrin-associated, or CAMSAP, proteins. CAMSAP proteins carry an N-terminal region that includes the CH domain, a central region including a predicted coiled-coil and this C-terminal, or CKK, domain - defined as being present in CAMSAP, KIAA1078 and KIAA1543, The C-terminal domain is the part of the CAMSAP proteins that binds to microtubules. The domain appears to act by producing inhibition of neurite extension, probably by blocking microtubule function. CKK represents a domain that has evolved with the metazoa. The structure of a murine hypothetical protein from RIKEN cDNA has shown the domain to adopt a mainly beta barrel structure with an associated alpha-helical hairpin.. +PF08684 DNA mimic ocr
The structure of an ocr protein from bacteriophage T7 has shown that this protein mimics the size and shape of a bent DNA molecule . ocr has also been shown to be an inhibitor of the complex type I DNA restriction enzymes .. +PF08685 GON domain
Mistry J, Rawlings ND. The GON domain is found in the ADAMTS (a disintegrin and metalloproteinase domain with thrombospondin type-1 modules) family of proteins. It contains several conserved cysteine residues.. +PF08686 PLAC (protease and lacunin) domain
Mistry J, Rawlings ND. The PLAC (protease and lacunin) domain is a short six-cysteine region that is usually found at the C terminal of proteins. It is found in a range of proteins including PACE4 (paired basic amino acid cleaving enzyme 4) and the extracellular matrix protein lacunin .. +PF08687 Apx/Shroom domain ASD2
Mistry J, Hildebrand JD. This region is found in the actin binding protein Shroom which mediates apical contriction in epithelial cells and is required for neural tube closure.. +PF08688 Apx/Shroom domain ASD1
Mistry J, Hildebrand JD. This region is found in the actin binding protein Shroom which mediates apical contriction in epithelial cells and is required for neural tube closure. ASD1 has been implicated directly in F-actin binding.. +PF08689 Mediator complex subunit Med5
The mediator complex is required for the expression of nearly all RNA pol II dependent genes in Saccharomyces cerevisiae. Deletion of the MED5 gene leads to increased transcription of nuclear genes encoding components of the oxidative phosphorylation machinery, and decreased transcription of mitochondrial genes encoding components of the same machinery . There is no orthologue from pombe, and this subunit appears to be fungal specific .. +PF08690 GET complex subunit GET2
This family corresponds to the GET complex subunit GET2. The GET complex is involved in the retrieval of ER resident proteins from the Golgi .. +PF08691 DNA repair proteins Nse5 and Nse6
Nse5 and Nse6 are non essential nuclear proteins that are critical for chromosome segregation in fission yeast . Nse5 forms a dimer with Nse6 and facilitates DNA repair as part of the Smc5-Smc6 holocomplex.. +PF08692 Mitochondrial protein Pet20
Pet20 is a mitochondrial protein which is thought to play a role in the correct assembly/maintenance of mitochondrial components .. +PF08693 Transmembrane alpha-helix domain
SKG6/Axl2 are membrane proteins that show polarised intracellular localisation . SKG6_Tmem is the highly conserved transmembrane alpha-helical domain of SKG6 and Axl2 proteins , . The full-length fungal protein has a negative regulatory function in cytokinesis .. +PF08694 DUF1782;
Ubiquitin-fold modifier-conjugating enzyme 1. Ubiquitin-like (UBL) post-translational modifiers are covalently linked to most, if not all, target protein(s) through an enzymatic cascade analogous to ubiquitylation, consisting of E1 (activating), E2 (conjugating), and E3 (ligating) enzymes. Ubiquitin-fold modifier 1 (Ufm1) a ubiquitin-like protein is activated by a novel E1-like enzyme, Uba5, by forming a high-energy thioester bond. Activated Ufm1 is then transferred to its cognate E2-like enzyme, Ufc1, in a similar thioester linkage. This family represents the E2-like enzyme.. +PF08695 DUF1783;
Cytochrome oxidase complex assembly protein 1. Coa1 is an inner mitochondrial membrane protein that associates with Shy1 and is required for cytochrome oxidase complex IV assembly. It contains a conserved hydrophobic segment (amino acids 74-92) with the potential to form a membrane-spanning helix. The N-terminus of Coa1 is rich in positively charged amino acids and could form an amphipathic alpha helix, characteristic of a mitochondrial presequence. A cleavage site for the mitochondrial processing peptidase is predicted adjacent to the presequence. Upon in vitro import into mitochondria, Coa1 is processed to a mature form, indicating that it possesses a cleavable presequence . The eukaryotic cytochrome oxidase complex consists of 12-13 subunits, with three mitochondrial encoded subunits, Cox1-Cox3, forming the core enzyme. Translation of the Cox1 transcript requires the two promoters, Pet309 and Mss51, and the latter has an additional role in translational elongation. Coa1 is necessary for linking the activity of Mss51 to Cox1 insertion into the assembly complex .. +PF08696 DNA replication factor Dna2
Pfam-B_8878 (release 19.0). Dna2 is a DNA replication factor with single-stranded DNA-dependent ATPase, ATP-dependent nuclease, ( 5'-flap endonuclease) and helicase activities. It is required for Okazaki fragment processing and is involved in DNA repair pathways .. +PF08698 DUF1784;
Fcf2 pre-rRNA processing. Pfam-B_13623 (release 19.0). This is a family of eukaryotic nucleolar proteins that are involved in pre-rRNA processing .. +PF08699 Domain of unknown function (DUF1785)
Pfam-B_1585 (release 19.0). This region is found in argonaute proteins and often co-occurs with Pfam:PF02179 and Pfam:PF02171.. +PF08700 Vps51/Vps67
This family includes a presumed domain found in a number of components of vesicular transport. The VFT tethering complex (also known as GARP complex, Golgi associated retrograde protein complex, Vps53 tethering complex) is a conserved eukaryotic docking complex which is involved recycling of proteins from endosomes to the late Golgi . Vps51 (also known as Vps67) is a subunit of VFT and interacts with the SNARE Tlg1 . Cog1_N is the N-terminus of the Cog1 subunit of the eight-unit Conserved Oligomeric Golgi (COG) complex that participates in retrograde vesicular transport and is required to maintain normal Golgi structure and function. The subunits are located in two lobes and Cog1 serves to bind the two lobes together probably via the highly conserved N-terminal domain of approximately 85 residues .. +PF08701 GNL3L/Grn1 putative GTPase
Pfam-B_22650 (release 19.0). Grn1 (yeast) and GNL3L (human) are putative GTPases which are required for growth and play a role in processing of nucleolar pre-rRNA . This family contains a potential nuclear localisation signal.. +PF08702 Fibrinogen alpha/beta chain family
Fibrinogen is a protein involved in platelet aggregation and is essential for the coagulation of blood. This domain forms part of the central coiled coiled region of the protein which is formed from two sets of three non-identical chains (alpha, beta and gamma).. +PF08703 PLC-beta C terminal
This domain corresponds to the alpha helical C terminal domain of phospholipase C beta.. +PF08704 tRNA methyltransferase complex GCD14 subunit
Pfam-B_5615 (release 19.0). GCD14 is a subunit of the tRNA methyltransferase complex and is required for 1-methyladenosine modification and maturation of initiator methionyl-tRNA .. +PF08705 Gag protein p6
HIV protein p6 contains two late-budding domains (L domains) which are short sequence motifs essential for viral particle release. p6 interacts with the endosomal sorting complex and represents a docking site for several cellular and binding factors . The PTAP motif interacts with the cellular budding factor TSG101 . This domain is also found in some chimpanzee immunodeficiency virus (SIV-cpz) proteins.. +PF08706 D5 N terminal like
This domain is found in D5 proteins of DNA viruses and bacteriophage P4 DNA primases phages.. +PF08707 Primase C terminal 2 (PriCT-2)
This alpha helical domain is found at the C terminal of primases.. +PF08708 Primase C terminal 1 (PriCT-1)
This alpha helical domain is found at the C terminal of primases.. +PF08709 Inositol 1,4,5-trisphosphate/ryanodine receptor
This domain corresponds to the ligand binding region on inositol 1,4,5-trisphosphate receptor, and the N terminal region of the ryanodine receptor. Both receptors are involved in Ca2+ release. They can couple to the activation of neurotransmitter-gated receptors and voltage-gated Ca2+ channels on the plasma membrane, thus allowing the endoplasmic reticulum discriminate between different types of neuronal activity .. +PF08710 nsp9 replicase
nsp9 is a single-stranded RNA-binding viral protein likely to be involved in RNA synthesis . Its structure comprises of a single beta barrel .. +PF08711 TFIIS;
TFIIS helical bundle-like domain. pdb_1wjt & Pfam-B_7936 (release 8.0). Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species {1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function . Mediator exists in two major forms in human cells: a smaller form that interacts strongly with pol II and activates transcription, and a large form that does not interact strongly with pol II and does not directly activate transcription. Notably, the 'small' and 'large' Mediator complexes differ in their subunit composition: the Med26 subunit preferentially associates with the small, active complex, whereas cdk8, cyclin C, Med12 and Med13 associate with the large Mediator complex . This family includesthe C terminal region of a number of eukaryotic hypothetical proteins which are homologous to the Saccharomyces cerevisiae protein IWS1. IWS1 is known to be an Pol II transcription elongation factor and interacts with Spt6 and Spt5 [5,6].. +PF08712 Scaffold protein Nfu/NifU N terminal
This domain is found at the N terminus of NifU and NifU related proteins, and in the human Nfu protein. Both of these proteins are thought to be involved in the the assembly of iron-sulphur clusters .. +PF08713 DNA alkylation repair enzyme
Proteins in this family are predicted to be DNA alkylation repair enzymes. The structure of a hypothetical protein in this family shows it to adopt a supercoiled alpha helical structure.. +PF08714 Formaldehyde-activating enzyme (Fae)
Formaldehyde-activating enzyme is an enzyme required for energy metabolism and formaldehyde detoxification. It catalyses the condensation of formaldehyde and tetrahydromethanopterin to methylene tetrahydromethanopterin .. +PF08715 Papain like viral protease
This family of viral proteases are similar to the papain protease and are required for proteolytic processing of the replicase polyprotein. The structure of this protein has shown it adopts a fold similar that of de-ubiquitinating enzymes .. +PF08716 nsp7 replicase
nsp7 (non structural protein 7) has been implicated in viral RNA replication and is predominantly alpha helical in structure . It forms a hexadecameric supercomplex with nsp7 that adopts a hollow cylinder-like structure . The dimensions of the central channel and positive electrostatic properties of the cylinder imply that it confers processivity on RNA-dependent RNA polymerase .. +PF08717 nsp8 replicase
Viral nsp8 (non structural protein 8) forms a hexadecameric supercomplex with nsp7 that adopts a hollow cylinder-like structure . The dimensions of the central channel and positive electrostatic properties of the cylinder imply that it confers processivity on RNA-dependent RNA polymerase .. +PF08718 Glycolipid transfer protein (GLTP)
GLTP is a cytosolic protein that catalyses the intermembrane transfer of glycolipids .. +PF08719 Domain of unknown function (DUF1768)
This is a domain of unknown function. It is alpha helical in structure. The GO annotation for this protein suggests it is involved in nematode larval development and has a positive regulation on growth rate.. +PF08720 FluC_stalk;
Influenza C hemagglutinin stalk. This domain corresponds to the stalk segment of hemagglutinin in influenza C virus. It forms a coiled coil structure .. +PF08721 TnsA_C;
TnsA endonuclease C terminal. The Tn7 transposase is composed of proteins TnsA and TnsB. DNA breakage at the 5' end of the transposon is carried out by TnsA, and breakage and joining at the 3' end is carried out by TnsB. The C terminal domain of TnsA binds DNA.. +PF08722 TnsA_N;
TnsA endonuclease N terminal. The Tn7 transposase is composed of proteins TnsA and TnsB. DNA breakage at the 5' end of the transposon is carried out by TnsA, and breakage and joining at the 3' end is carried out by TnsB. The N terminal domain of TnsA is catalytic.. +PF08723 Gag protein p15
Gag p15 is a viral membrane-binding matrix protein which is alpha helical in structure.. +PF08724 Rep protein catalytic domain like
Adeno-associated virus (AAV) Replication (Rep) protein is essential for viral replication and integration. The catalytic domain has DNA binding and endonuclease activity.. +PF08725 Integrin beta cytoplasmic domain
Integrins are a group of transmembrane proteins which function as extracellular matrix receptors and in cell adhesion. Integrins are ubiquitously expressed and are heterodimeric, each composed of an alpha and beta subunit. Several variations of the the alpha and beta subunits exist, and association of different alpha and beta subunits can have different a different binding specificity. This domain corresponds to the cytoplasmic domain of the beta subunit.. +PF08726 efhand_Ca_insen;
Ca2+ insensitive EF hand. EF hands are helix-loop-helix binding motifs involved in the regulation of many cellular processes. EF hands usually bind to Ca2+ ions which causes a major conformational change that allows the protein to interact with its designated targets. This domain corresponds to an EF hand which has partially or entirely lost its calcium-binding properties. The calcium insensitive EF hand is still able to mediate protein-protein recognition .. +PF08727 Poliovirus 3A protein like
This domain is found in positive-strand RNA viruses. The 3A protein is a critical component of the poliovirus replication complex, and is also an inhibitor of host cell ER to Golgi transport.. +PF08728 CRT10
CRT10 is a transcriptional regulator of ribonucleotide reductase (RNR) genes . RNR catalyses the rate limiting step in dNTP synthesis. Mutations in CRT10 have been shown to enhance hydroxyurea resistance .. +PF08729 HPC2; HRD;
HPC2 and ubinuclein domain. Mistry J, Wood V, Balaji S, Iyer LM, Aravind L. HPC2 (Histone promoter control 2) is required for cell-cycle regulation of histone transcription . It regulates transcription of the histone genes during the S-phase of the cell cycle by repressing transcription at other cell cycle stages. HPC2 mutants display synthetic interactions with FACT complex which allows RNA Pol II to elongate through nucleosomes . Hpc2 is one of the proteins of one of the multi-subunit complexes that mediate replication- independent nucleosome assembly, along with histone chaperone proteins. the Hip4 sequence from SCH. pombe is an integral component of this complex that is required for transcriptional silencing at multiple loci . HPC2, ubinuclein/yemanuclein, and the cell cycle regulator FLJ25778 share a conserved domain that is predicted to bind histone tails . This domain is also referred to as the HRD or Hpc2-related domain.. +PF08730 Rad33
Rad33 is involved in nucleotide excision repair (NER). NER is the main pathway for repairing DNA lesions induced by UV. Cells deleted for RAD33 display intermediate UV sensitivity that is epistatic with NER .. +PF08731 Transcription factor AFT
AFT (activator of iron transcription) is an iron regulated transcriptional activator that regulates the expression of genes involved in iron homeostasis . This family includes the paralogous pair of transcription factors AFT1 and AFT2.. +PF08732 HIM1
HIM1 (high induction of mutagenesis protein 1) plays a role in the control of spontaneous and induced mutagenesis . It is thought to participate in the control of processing of mutational intermediates appearing during error-prone bypass of DNA damage.. +PF08733 PalH/RIM21
PalH (also known as RIM21) is a transmembrane protein required for proteolytic cleavage of Rim101/PacC transcription factors which are activated by C terminal proteolytic processing. Rim101/PacC family proteins play a key role in pH-dependent responses and PalH has been implicated as a pH sensor .. +PF08734 GYD domain
This protein is found in a range of bacteria. It is usually less than 100 amino acids in length. The function of the protein is unknown. It may belong to the dimeric alpha/beta barrel superfamily.. +PF08735 Putative pyruvate format-lyase activating enzyme (DUF1786)
This family is annotated as pyruvate formate-lyase activating enzyme (EC:1.97.1.4) in UniProt. It is not clear where this annotation comes from.. +PF08736 FERM adjacent (FA)
This region is found adjacent to Band 4.1 / FERM domains (Pfam:PF00373) in a subset of FERM containing protein. The region has been hypothesised to play a role in regulatory adaptation, based on similarity to other protein kinase substrates .. +PF08737 Rgp1
Rgp1 forms heterodimer with Ric1 (Pfam:PF07064) which associates with Golgi membranes and functions as a guanyl-nucleotide exchange factor .. +PF08738 Gon7 family
In S. cerevisiae Gon7 is a member of the KEOPS protein complex. A protein complex proposed to be involved in transcription and promoting telomere uncapping and telomere elongation .. +PF08740 BCS1 N terminal
Pfam-B_10126 (release 19.0). This domain is found at the N terminal of the mitochondrial ATPase BSC1. It encodes the import and intramitochondrial sorting for the protein .. +PF08741 YwhD family
PSI2 target PSI-blast from BH3813. This family of proteins are currently uncharacterised. They are around 170 amino acids in length.. +PF08742 DUF1787;
This domain contains 8 conserved cysteine residues, but this family only contains 7 of them to overlaps with other domains. It is found in disease-related proteins including von Willebrand factor, Alpha tectorin, Zonadhesin and Mucin. It is often found on proteins containing Pfam:PF00094 and Pfam:PF01826.. +PF08743 SUMO_ligase; Nse4;
Nse4 is a component of the Smc5/6 DNA repair complex. It forms interactions with Smc5 and Nse1 . The exact function of this highly conserved C-terminal domain is not known.. +PF08744 Plant transcription factor NOZZLE
Pfam-B_86265 (release 19.0). NOZZLE is a transcription factor that plays a role in patterning the proximal-distal and adaxial-abaxial axes .. +PF08745 UPF0278 family
Members of this family are uncharacterised proteins about 200 amino acids in length.. +PF08746 RING-like domain
This is a zinc finger domain that is related to the C3HC4 RING finger domain (Pfam:PF00097).. +PF08747 Domain of unknown function (DUF1788)
PSI2 target Npun02004481. Putative uncharacterised domain in proteins of length around 200 amino acids.. +PF08748 Domain of unknown function (DUF1789)
PSI2 target CAE43632.1. Putative uncharacterised domain found in phage-related conserved hypothetical protein from Bordetella.. +PF08750 CNP1-like family
PSI2 target CAB84161.1. This family of proteins are likely to be lipoproteins. CNP1 (cryptic neisserial protein) has been expressed in E. coli and shown to be localised periplasmicly .. +PF08751 TrwC relaxase
Relaxases are DNA strand transferases which function during the conjugative cell to cell DNA transfer. TrwC binds to the origin of transfer (oriT) and melts the double helix.. +PF08752 Gamma-COP;
Coatomer gamma subunit appendage platform subdomain. COPI-coated vesicles function in retrograde transport from the Golgi to the ER, and in intra-Golgi transport. This is the platform subdomain of the coatomer gamma subunit appendage domain.\. It carries a protein-protein interaction site at UniProt:P53620, residue W776, which in yeast binds to the ARFGAP Glo3p, and in mammalian gamma-COP binds to a Glo3p orthologue, ARFGAP2 .. +PF08753 NikR C terminal nickel binding domain
NikR is a transcription factor that regulates nickel uptake. It consists of two dimeric DNA binding domains separated by a tetrameric regulatory domain that binds nickel. This domain corresponds to the C terminal regulatory domain which contains four nickel binding sites at the tetramer interface .. +PF08755 Hemimethylated DNA-binding protein YccV like
YccV is a hemimethylated DNA binding protein which has been shown to regulate dnaA gene expression . The structure of one of the hypothetical proteins in this family has been solved and it forms a beta sheet structure with a terminating alpha helix.. +PF08756 YfkB-like domain
PSI2 structural target yfkB. This protein is adjacent to YfkA in B. subtilis. In other bacterial species it is fused to this protein. As YfkA contains a Radical SAM domain it suggests this domain is interacts with them.. +PF08757 CotH protein
PSI2 structural target cotH. Members of this family include the spore coat protein H (cotH).. +PF08758 Cadherin prodomain like
Cadherins are a family of proteins that mediate calcium dependent cell-cell adhesion. They are activated through cleavage of a prosequence in the late Golgi. This domain corresponds to the folded region of the prosequence, and is termed the prodomain. The prodomain shows structural resemblance to the cadherin domain, but lacks all the features known to be important for cadherin-cadherin interactions .. +PF08759 Domain of unknown function (DUF1792)
PSI2 structural target AAO75156.1. This putative domain is probably missannotated as a glycosyl transferase 8 family member. This domain is found at the C-terminus of protein such as Swiss:Q97P75 that also contain the glycosyl transferase domain at the N-terminus.. +PF08760 Domain of unknown function (DUF1793)
PSI2 structural target AAO78587.1. This presumed domain is found at the C-terminus of a glutaminase protein from fungi . This domain is also found as a single domain protein in Bacteroides thetaiotaomicron.. +PF08761 dUTPase
2-Deoxyuridine 5-triphosphate nucleotidohydrolase (dUTPase) catalyses the hydrolysis of dUTP to dUMP and pyrophosphate (EC:3.6.1.23). Members of this family have a novel all-alpha fold and are unrelated to the all-beta fold found in dUTPases of the majority of organisms . This family contains both dUTPase homologues of dUTPase including dCTPase of phage T4.. +PF08762 CRPV capsid protein like
This is a family of capsid proteins found in positive stranded ssRNA viruses such as cricket paralysis virus (CRPV). It forms an all beta sheet structure .. +PF08763 Voltage gated calcium channel IQ domain
Voltage gated calcium channels control cellular calcium entry in response to changes in membrane potential. The isoleucine-glutamine (IQ) motif in the voltage gated calcium channel IQ domain interacts with hydrophobic pockets of Ca2+/calmodulin . The interaction regulates two self-regulatory calcium dependent feedback mechanism, calcium dependent inactivation (CDI), and calcium-dependent facilitation (CDF).. +PF08764 Staphylococcus aureus coagulase
Staphylococcus aureus secretes a cofactor called coagulase. Coagulase is an extracellular protein that forms a complex with human prothrombin, and activates it without the usual proteolytic cleavages. The resulting complex directly initiates blood clotting.. +PF08765 Mor transcription activator family
Mor (Middle operon regulator) is a sequence specific DNA binding protein. It mediates transcription activation through its interactions with the C-terminal domains of the alpha and sigma subunits of bacterial RNA polymerase. The N terminal region of Mor is the dimerisation region, and the C terminal contains a helix-turn-helix motif which binds DNA.. +PF08766 DEK C terminal domain
DEK is a chromatin associated protein that is linked with cancers and autoimmune disease. This domain is found at the C terminal of DEK and is of clinical importance since it can reverse the characteristic abnormal DNA-mutagen sensitivity in fibroblasts from ataxia-telangiectasia (A-T) patients .\. The structure of this domain shows it to be homologous to the E2F/DP transcription factor family . This domain is also found in chitin synthase proteins like Swiss:Q8TF96, and in protein phosphastases such as Swiss:Q6NN85.. +PF08767 CRM1 C terminal
CRM1 (also known as Exportin1) mediates the nuclear export of proteins bearing a leucine-rich nuclear export signal (NES). CRM1 forms a complex with the NES containing protein and the small GTPase Ran. This region forms an alpha helical structure formed by six helical hairpin motifs that are structurally similar to the HEAT repeat, but share little sequence similarity to the HEAT repeat .. +PF08768 Domain of unknown function (DUF1794)
Mistry J, Pollington JE. This domain forms a beta barrel structure but the function is unknown. The GO annotation for this protein indicates that the protein has a function in nematode larval development and has a positive regulation on growth rate.. +PF08769 Sporulation initiation factor Spo0A C terminal
The response regulator Spo0A is comprised of a phophoacceptor domain and a transcription activation domain. This domain corresponds to the transcription activation domain and forms an alpha helical structure comprising of 6 alpha helices. The structure contains a helix-turn-helix and binds DNA .. +PF08770 Sulphur oxidation protein SoxZ
SoxZ forms an anti parallel beta structure and forms a complex with SoxY. Sulphur oxidation occurs at the thiol of a conserved cysteine residue of the SoxY subunit .. +PF08771 Rapamycin binding domain
This domain forms an alpha helical structure and binds to rapamycin .. +PF08772 Nin one binding (NOB1) Zn-ribbon like
This domain corresponds to a zinc ribbon and is found on the RNA binding protein NOB1 (Nin one binding).. +PF08773 Cathepsin C exclusion domain
Cathepsin C (dipeptidyl peptidase I) is the physiological activator of a group of serine proteases. This domain corresponds to the exclusion domain whose structure excludes the approach of a polypeptide apart from its termini. It forms an enclosed beta barrel structure composed from 8 anti-parallel beta strands . Based on a structural comparison and interaction data, it is suggested that the exclusion domain originates from a metallo-protease inhibitor .. +PF08774 VRR-NUC domain
+PF08775 ParB family
ParB is a component of the par system which mediates accurate DNA partition during cell division. It recognises A-box and B-box DNA motifs. ParB forms an asymmetric dimer with 2 extended helix-turn-helix (HTH) motifs that bind to A-boxes. The HTH motifs emanate from a beta sheet coiled coil DNA binding module . Both DNA binding elements are free to rotate around a flexible linker, this enables them to bind to complex arrays of A- and B-box elements on adjacent DNA arms of the looped partition site .. +PF08776 VASP tetramerisation domain
Vasodilator-stimulated phosphoprotein (VASP) is an actin cytoskeletal regulatory protein. This region corresponds to the tetramerisation domain which forms a right handed alpha helical coiled coil structure .. +PF08777 RNA binding motif
This domain is found in protein La which functions as an RNA chaperone during RNA polymerase III transcription, and can also stimulate translation initiation. It contains a five stranded beta sheet which forms an atypical RNA recognition motif .. +PF08778 HIF-1 alpha C terminal transactivation domain
Hypoxia inducible factor-1 alpha (HIF-1 alpha) is the regulatory subunit of the heterodimeric transcription factor HIF-1. It plays a key role in cellular response to low oxygen tension. This region corresponds to the C terminal transactivation domain.. +PF08779 SARS coronavirus X4 like
The structure of the coronavirus X4 protein (also known as 7a and U122) shows similarities to the immunoglobulin like fold and suggests a binding activity to integrin I domains . In SARS-CoV- infected cells, the X4 protein is expressed and retained intra-cellularly within the Golgi network . X4 has been implicated to function during the replication cycle of SARS-CoV .. +PF08780 Nucleotidyltransferase substrate binding protein like
Nucleotidyltransferases (EC 2.7.7) comprise a large enzyme family with diverse roles in polynucleotide synthesis and modification. This domain is structurally related to kanamycin nucleotidyltransferase (KNTase) and forms a complex with HI0073, a sequence homolog of the nucleotide-binding domain of this nucleotidyltransferase superfamily .. +PF08781 Transcription factor DP
DP forms a heterodimer with E2F and regulates genes involved in cell cycle progression. The transcriptional activity of E2F is inhibited by the retinoblastoma protein which binds to the E2F-DP heterodimer and negatively regulates the G1-S transition.. +PF08782 c-SKI Smad4 binding domain
c-SKI is an oncoprotein that inhibits TGF-beta signaling through interaction with Smad proteins . This domain binds to Smad4 . +PF08783 DWNN domain
DWNN is a ubiquitin like domain found at the N terminus of the RBBP6 family of splicing-associated proteins . The DWNN domain is independently expressed in higher vertebrates so it may function as a novel ubiquitin-like modifier of other proteins .. +PF08784 Replication protein A C terminal
This domain corresponds to the C terminal of the single stranded DNA binding protein RPA (replication protein A). RPA is involved in many DNA metabolic pathways including DNA replication, DNA repair, recombination, cell cycle and DNA damage checkpoints.. +PF08785 Ku C terminal domain like
The non-homologous end joining (NHEJ) pathway is one method by which double stranded breaks in chromosomal DNA are repaired. Ku is a component of a multi-protein complex that is involved in the NHEJ. Ku has affinity for DNA ends and recruits the DNA-dependent protein kinase catalytic subunit (DNA-PKcs). This domain is found at the C terminal of Ku which binds to DNA-PKcs .. +PF08786 Domain of unknown function (DUF1795)
This is a bacterial domain of unknown function. It forms an antiparallel beta sheet structure and contains some alpha helical regions.. +PF08787 Alginate lyase
Alginate lyases are enzymes that degrade the linear polysaccharide alignate.\. They cleave the glycosidic linkage of alignate through a beta-elimination reaction. This family forms an all beta fold and is different to all alpha fold of Pfam:PF05426.. +PF08788 NHR2 domain like
The NHR2 (Nervy homology 2) domain is found in the ETO protein where it mediates oligomerisation and protein-protein interactions. It forms an alpha-helical tetramer .. +PF08789 PBCV-specific basic adaptor domain
The small PBCV-specific basic adaptor domain is found fused to S/T protein kinases and the 2-Cysteine domain .. +PF08790 LYAR-type C2HC zinc finger
This C2HC zinc finger is found in LYAR proteins such as Swiss:Q08288 which are involved in cell growth regulation.. +PF08792 A2L zinc ribbon domain
This zinc ribbon domain is found associated with some viral A2L transcription factors .. +PF08793 2-cysteine adaptor domain
The virus-specific 2-cysteine adaptor domain is found fused to OTU/A20-like peptidases and S/T protein kinases. The domain associations of these proteins indicate that they might function as viral adaptors connecting the kinases and OTU/A20 peptidases to specific targets .. +PF08794 Lipoprotein GNA1870 C terminal like
GNA1870 is a surface exposed lipoprotein in Neisseria meningitidis that and is a potent antigen of Meningococcus. The structure of the C terminal domain consists of an anti-parallel beta barrel overlaid by a short alpha helical region .. +PF08795 Putative papain-like cysteine peptidase (DUF1796)
+PF08796 Protein of unknown function (DUF1797)
This is a domain of unknown function. It forms a central anti-parallel beta sheet with flanking alpha helical regions.. +PF08797 HIRAN domain
The HIRAN domain (HIP116, Rad5p N-terminal) is found in the N-terminal regions of the SWI2/SNF2 proteins typified by HIP116 and Rad5p. The HIRAN domain is found as a standalone protein in several bacteria and prophages, or fused to other catalytic domains, such as a nuclease of the restriction endonuclease fold and TDP1-like DNA phosphoesterases, in the eukaryotes . It has been predicted that this domain functions as a DNA-binding domain that probably recognises features associated with damaged DNA or stalled replication forks . +PF08798 CRISPR associated protein
This domain forms an anti-parallel beta strand structure with flanking alpha helical regions.. +PF08799 pre-mRNA processing factor 4 (PRP4) like
This small domain is found on PRP4 ribonuleoproteins. PRP4 is a U4/U6 small nuclear ribonucleoprotein that is involved in pre-mRNA processing.. +PF08800 VirE N-terminal domain
PSI2 target AAO76744.1. This presumed domain is found at the N-terminus of VirE proteins.. +PF08801 Nup133_N;
Nup133 N terminal like. Nup133 is a nucleoporin that is crucial for nuclear pore complex (NPC) biogenesis. The N terminal forms a seven-bladed beta propeller structure . This family now contains other sized nucleoporins, including Nup155, Nup8, Nuo132, Nup15 and Nup170.. +PF08802 Cytochrome B6-F complex Fe-S subunit
The cytochrome B6-F complex mediates electron transfer between photosystem II (PSII) and photosystem I (PSI), cyclic electron flow around PSI, and state transitions. This domain corresponds to the alpha helical transmembrane domain of the cytochrome B6-F complex iron-sulphur subunit.. +PF08803 Putative mono-oxygenase ydhR
ydhR is a homodimeric protein that comprises of a central four-stranded beta sheet and four surrounding alpha helices . It shows structural homology to the ActVA-Orf6 and YgiN proteins which indicates it could be a mono-oxygenase.. +PF08804 gp32 DNA binding protein like
gp32 is a single stranded (ss) DNA binding protein in bacteriophage T4 that is essential for DNA replication, recombination and repair. The ssDNA binding cleft of gp32 comprises regions from three structural subdomains .. +PF08805 PilS N terminal
Type IV pili are bacterial virulence-associated adhesins that promote bacterial attachment to host cells. In Salmonella typhi, the structural pilin protein PilS interacts with the cystic fibrosis transmembrane conductance regulator . Mutagenesis studies suggest that residues on an alpha-beta loop and the C terminal disulphide-bonded region of PilS might be involved in binding specificity of the pilus .. +PF08806 Sep15/SelM redox domain
Sep15 and SelM are eukaryotic selenoproteins that have a thioredoxin-like domain and a surface accessible active site redox motif . This suggests that they function as thiol-disulphide isomerases involved in disulphide bond formation in the endoplasmic reticulum .. +PF08807 Bacterial domain of unknown function (DUF1798)
This domain is found in many hypothetical proteins. The structure of one of the proteins in this family has been solved and it adopts an all alpha helical fold.. +PF08808 RES domain
PSI2 target CAE41587.1. This presumed domain contains 3 highly conserved polar groups that could form an active site. These are an arginine, glutamate and serine, hence the RES domain. The domain is found widely distributed in bacteria. The domain is about 150 residues in length.. +PF08809 Phage related hypothetical protein (DUF1799)
PSI2 target CAE43631.1. Members of this family are about 100 amino acids in length and are uncharacterised.. +PF08810 Kinase associated protein B
This bacterial protein forms an anti-parallel beta sheet with an extending alpha helical region.. +PF08811 Protein of unknown function (DUF1800)
PSI2 target AAK23953.1. This is a family of large bacterial proteins of unknown function.. +PF08812 YtxC-like family
PSI2 target YtxC B.subtilis. This family includes proteins similar to B. subtilis YtxC an uncharacterised protein.. +PF08813 Phage tail protein
PSI2 target CAE43633.1. This family of proteins include phage tail proteins. They probably include bacterial Ig-like domains related to Pfam:PF02368. Which also includes a number of phage tail invasin proteins.. +PF08814 XisH protein
PSI2 target ZP_00111899.1. The fdxN element, along with two other DNA elements, is excised from the chromosome during heterocyst differentiation in cyanobacteria. The xisH as well as the xisF and xisI genes are required .. +PF08815 Nuclear receptor coactivator
This region is found on eukaryotic nuclear receptor coactivators and forms an alpha helical structure.. +PF08816 Inhibitor of vertebrate lysozyme (Ivy)
This bacterial family is a strong inhibitor of vertebrate lysozyme.. +PF08817 WXG100 protein secretion system (Wss), protein YukD
Mistry J, Desvaux M, Burroughs AM, Iyer LM, Aravind L. The YukD protein family members participate in the formation of a translocon required for the secretion of WXG100 proteins (Pfam:PF06013) in monoderm bacteria, with the WXG100 protein secretion system (Wss). Like the cytoplasmic protein EsaC in Staphylococcus aureus, YukD was hypothesized to play a role of a chaperone. YukD adopts a ubiquitin-like fold . Usually, ubiquitin covalently binds to protein and flags them for protein degradation, however conjugation assays have indicated that the classical YukD lacks the capacity for covalent bond formation with other proteins . In contrast to the situation in firmicutes, YukD-like proteins in actinobacteria are often fused to a transporter involved in the ESAT-6/ESX/Wss secretion pathway [6,7]. Members of the YukD family are also associated in gene neighborhoods with other enzymatic members of the ubiquitin signaling and degradation pathway such as the E1, E2 and E3 trienzyme complex that catalyze ubiquitin transfer to substrates, and the JAB family metallopeptidases that are involved in its release . This suggests that a subset of the YukD family in bacteria are conjugated and released from proteins as in the eukaryotic ubiquitin-mediated signaling and degradation pathway .. +PF08818 Domain of unknown function (DU1801)
PSI2 target AAO81511.1. This large family of bacterial proteins is uncharacterised. They contain a presumed domain about 110 amino acids in length.. +PF08819 Domain of unknown function (DUF1802)
The function of this family is unknown. This region is found associated with a Pfam:PF04471 suggesting they could be part of a restriction modification system... +PF08820 Domain of unknown function (DUF1803)
PSI2 target AAO81393.1. This small domain is found in one or two copies in proteins from bacteria. The function of this domain is unknown.. +PF08821 CGGC domain
PSI2 target AAB98576.1. This putative domain contains a quite highly conserved sequence of CGGC in its central region. The domain has many conserved cysteines and histidines suggestive of a zinc binding function.. +PF08822 Protein of unknown function (DUF1804)
PSI2 target CAB84459.1. This family of bacterial protein is uncharacterised.. +PF08823 Putative peptidoglycan binding domain
This family may be a peptidoglycan binding domain.. +PF08824 Serine rich protein interaction domain
This is a serine rich domain that is found in the docking protein p130(cas) (Crk-associated substrate). This domain folds into a four helix bundle which is associated with protein-protein interactions .. +PF08825 E2 binding domain
E1 and E2 enzymes play a central role in ubiquitin and ubiquitin-like protein transfer cascades. This is an E2 binding domain that is found on NEDD8 activating E1 enzyme. The domain resembles ubiquitin, and recruits the catalytic core of the E2 enzyme Ubc12 in a similar manner to that in which ubiquitin interacts with ubiquitin binding domains .. +PF08826 DMPK coiled coil domain like
This domain is found in the myotonic dystrophy protein kinase (DMPK) and adopts a coiled coil structure. It plays a role in dimerisation .. +PF08827 Domain of unknown function (DUF1805)
This domain is found in bacteria and archaea and has an N terminal tetramerisation region that is composed of beta sheets.. +PF08828 Doublesex dimerisation domain
Doublesex (DSX) is a transcription factor that regulates somatic sexual differences in Drosophila.\. The structure of this domain has revealed a novel dimeric arrangement of ubiquitin-associated folds that has not previously been identified in a transcription factor .. +PF08829 Alpha C protein N terminal
The alpha C protein (ACP) is found in Streptococcus and acts as an invasin which plays a role in the internalisation and translocation of the organism across human epithelial surfaces. Group B Streptococcus is the leading cause of diseases including bacterial pneumonia, sepsis and meningitis.\. The N terminal of ACP is associated with virulence and forms a beta sandwich and a three helix bundle [1-3].. +PF08830 Protein of unknown function (DUF1806)
This is a bacterial family of uncharacterised proteins. The structure of one of the proteins in this family has been solved and it adopts a beta barrel-like structure.. +PF08831 Class II MHC-associated invariant chain trimerisation domain
The class II associated invariant chain peptide is required for folding and localisation of MHC class II heterodimers. This domain is involved in trimerisation of the ectoderm and interferes with DM/class II binding. The trimeric protein forms a cylindrical shape which is thought to be important for interactions between the invariant chain and class II molecules .. +PF08832 Steroid receptor coactivator
This domain is found in steroid/nuclear receptor coactivators and contains two LXXLL motifs that are involved in receptor binding . The family includes SRC-1/NcoA-1, NcoA-2/TIF2, pCIP/ACTR/GRIP-1/AIB1.. +PF08833 Axin beta-catenin binding domain
This domain is found on the scaffolding protein Axin which is a component of the beta-catenin destruction complex. It competes with the tumour suppressor adenomatous polyposis coli protein (APC) for binding to beta-catenin .. +PF08837 Protein of unknown function (DUF1810)
This is a family of uncharacterised proteins. The structure of one of the members in this family has been solved and it adopts a mainly alpha helical structure.. +PF08838 Protein of unknown function (DUF1811)
This is a bacterial family of uncharacterised proteins. Some of the proteins are annotated as being transcriptional regulators (see Swiss:Q4MQL7, Swiss:Q65MA2). The structure of one of the proteins in this family has revealed a beta-barrel like structure with helix-turn-helix like motif.. +PF08839 DNA replication factor CDT1 like
CDT1 is a component of the replication licensing system and promotes the loading of the mini-chromosome maintenance complex onto chromatin. Geminin is an inhibitor of CDT1 and prevents inappropriate re-initiation of replication on an already fired origin. This region of CDT1 binds to Geminin .. +PF08840 BAAT / Acyl-CoA thioester hydrolase C terminal
Pfam-B_4571 (release 20.0). This catalytic domain is found at the C terminal of acyl-CoA thioester hydrolases and bile acid-CoA:amino acid N-acetyltransferases (BAAT).. +PF08841 Diol dehydratase reactivase ATPase-like domain
Diol dehydratase (DDH, EC:4.2.1.28) and its isofunctional homologue glycerol dehydratase (GDH, EC.4.2.1.30) are enzymes which catalyse the conversion of glycerol 1,2-propanediol, and 1,2-ethanediol to aldehydes . These reactions require coenzyme B12. Cleavage of the Co-C bond of coenzyme B12 by substrates or coenzyme analogues results in inactivation during which coenzyme B12 remains tightly bound to the apoenzyme. This family comprises of the large subunit of the diol dehydratase and glycerol dehydratase reactivating factors whose function is to reactivate the holoenzyme by exchange of a damaged cofactor for intact coenzyme.. +PF08842 DUF1812; Mfa;
Fimbrillin-A associated anchor proteins Mfa1 and Mfa2. PSI2 target AAO79331.1. This family of proteins may be lipoproteins principally from bacilli. They are between 300 and 400 residues. Many Bacteroides-like bacterial species, including Porphyromonas gingivalis, the causal agent of periodontal infection, carry at least two types of fimbriae, namely FimA and Mfa1 fimbriae, following the names of their major subunit proteins . Normally, FimA fimbriae are long filaments that are easily detached from cells, whereas Mfa1 fimbriae are short filaments that are tightly bound to cells; however, in the absence of Mfa2 protein, the Mfa1 fimbriae are also very long and are not attached. Mfa2 and Mfa1 are associated with each other in whole P. gingivalis cells to the extent that Mfa2 is located on the cell surface and probably associated with Mfa1 fimbriae in such a way that it anchors the Mfa1 fimbriae to the cell surface and regulates Mfa1 filament length .. +PF08843 Nucleotidyl transferase of unknown function (DUF1814)
PSI2 target CAD86002.1. This large family of proteins are largely uncharacterised. Some are annotated as abortive infective proteins but support for this annotation could not be found. This family was recently identified as belonging to the nucleotidyltransferase superfamily .. +PF08844 Domain of unknown function (DUF1815)
PSI2 target ZP_00111304.2 (BIG_33). This presumed domain is about 100 amino acids in length and is functionally uncharacterised.. +PF08845 DUF1813;
Toxin SymE, type I toxin-antitoxin system. PSI2 target AAC77303.1. SymE (SOS-induced yjiW gene with similarity to MazE ) is an SOS-induced toxin. It inhibits cell growth, decreases protein synthesis and increases RNA degradation. It may play a role in the recycling of RNAs damaged under SOS response-inducing conditions. It is predicted to have an AbrB fold, similar to that of the antitoxin MazE. Its translation is repressed by the antisense RNA SymR, which acts as an antitoxin [1,2].. +PF08846 Domain of unknown function (DUF1816)
PSI2 target ZP_00109395.2 BIG_34. Swiss:Q4C9H3 is associated with the Pfam:PF01383 domain suggesting this presumed domain could have a role in phycobilisomes.. +PF08847 Domain of unknown function (DUF1817)
PSI2 target ZP_00111140.1 BIG_36. Members of this family are functionally uncharacterised.. +PF08848 Domain of unknown function (DUF1818)
PSI2 target ZP_00110314.1 BIG_37. This presumed domain is found in a small family of cyanobacterial protein. These proteins are functionally uncharacterised.. +PF08849 Putative inner membrane protein (DUF1819)
PSI2 target ZP_00108899.1 BIG_41. These proteins are functionally uncharacterised. Several are annotated as putative inner membrane proteins.. +PF08850 Domain of unknown function (DUF1820)
PSI2 target AAG07366.1 BIG_46. This family includes small functionally uncharacterised proteins around 100 amino acids in length.. +PF08852 Protein of unknown function (DUF1822)
PSI2 target ZP_00109005.1 BIG_39. This family of proteins are functionally uncharacterised.. +PF08853 Domain of unknown function (DUF1823)
PSI2 target ZP_00108651.1 BIG_42. This presumed domain is functionally uncharacterised.. +PF08854 Domain of unknown function (DUF1824)
This uncharacterised family of proteins are principally found in cyanobacteria.. +PF08855 Domain of unknown function (DUF1825)
This uncharacterised family of proteins are principally found in cyanobacteria.. +PF08856 Protein of unknown function (DUF1826)
These proteins are functionally uncharacterised.. +PF08857 Putative ParB-like nuclease
PSI2 target AAG07772.1 BIG_47. This domain is probably distantly related to Pfam:PF02195. Suggesting these uncharacterised proteins have a nuclease function.. +PF08858 IDEAL domain
This short domain is found at the C-terminus of proteins in the UPF0302 family. The domain is named after the sequence of the most conserved region in some members.\. The function of this domain is unknown.. +PF08859 DGC domain
This domain appears to be a zinc binding domain from the conservation of four potential chelating cysteines. The domain is named after a conserved central motif. The function of this domain is unknown.. +PF08860 Domain of unknown function (DUF1827)
This presumed domain has no known function.. +PF08861 Domain of unknown function DUF1828
This presumed domain is functionally uncharacterised.. +PF08862 Domain of unknown function DUF1829
This short domain is usually associated with Pfam:PF08861.. +PF08863 YolD-like protein
Members of this family are functionally uncharacterised. However it has been predicted that thes proteins are functionally equivalent to the UmuD subunit of polymerase V from gram-negative bacteria .. +PF08864 UPF0302 domain
This family is known as UPF0302. It is currently uncharacterised.. +PF08865 Domain of unknown function (DUF1830)
This family of short proteins is functionally uncharacterised.. +PF08866 Putative amino acid metabolism
Solution of the structure of the Lactobacillus plantarum protein from this family has indicated a potential new fold with remote similarities to TBP-like (TATA-binding protein) structures. This similarity, in combination with genomic context analysis, leads us to propose an involvement in amino-acid metabolism. The potentially novel fold is an alpha + beta fold comprising two beta sheets packed against a single helix. The enzyme is present in the cytosol.. +PF08867 FRG domain
This presumed domain contains a conserved N-terminal (F/Y)RG motif. It is functionally uncharacterised.. +PF08868 YugN-like family
This family of proteins related to B. subtilis YugN are functionally uncharacterised.. +PF08869 XisI protein
The fdxN element, along with two other DNA elements, is excised from the chromosome during heterocyst differentiation in cyanobacteria. The xisH as well as the xisF and xisI genes are required .. +PF08870 Domain of unknown function (DUF1832)
This family of proteins are functionally uncharacterised.. +PF08872 KGK domain
This presumed domain is found in one or two copies in cyanobacterial proteins. It is named after a short sequence motif.. +PF08873 Domain of unknown function (DUF1834)
This family of proteins are functionally uncharacterised. One member is the Gp37 protein from the FluMu prophage.. +PF08874 Domain of unknown function (DUF1835)
This family of proteins are functionally uncharacterised.. +PF08875 Domain of unknown function (DUF1833)
This family of proteins are functionally uncharacterised and are predicted to adopt an all-beta fold . They are often found in gene neighborhoods containing genes for an NlpC peptidase and a Ubiquitin domain predicted to be involved in tail assembly . . +PF08876 Domain of unknown function (DUF1836)
This family of proteins are functionally uncharacterised.. +PF08877 MepB protein
MepB is a functionally uncharacterised protein in the mepRAB gene cluster of Staphylococcus aureus.. +PF08878 Domain of unknown function (DUF1837)
This family of proteins are functionally uncharacterised.. +PF08879 WRC
The WRC domain, named after the conserved Trp-Arg-Cys motif, contains two distinctive features: a putative nuclear localisation signal and a zinc-finger motif (C3H). It is suggested that the WRC domain functions in DNA binding .. +PF08880 QLQ
The QLQ domain is named after the conserved Gln, Leu, Gln motif. The QLQ domain is found at the N-terminus of SWI2/SNF2 protein, which has been shown to be involved in protein-protein interactions. This domain has thus been postulated to be involved in mediating protein interactions .. +PF08881 CNVH;
CyanoVirin-N Homology domains are found in the sugar-binding antiviral protein cyanovirin-N (CVN) as well as filamentous ascomycetes and in the fern Ceratopteris richardii.. +PF08882 Acetone carboxylase gamma subunit
Acetone carboxylase is the key enzyme of bacterial acetone metabolism, catalysing the condensation of acetone and CO(2) to form acetoacetate.. +PF08883 Dopa 4,5-dioxygenase family
This family of proteins are related to Swiss:P87064 a DOPA 4,5-dioxygenase that is involved in synthesis of betalain. DOPA-dioxygenase is the key enzyme involved in betalain biosynthesis. It converts 3,4-dihydroxyphenylalanine to betalamic acid, a yellow chromophore.. +PF08884 Flagellin D3 domain
This domain is found in the central portion bacterial flagellin FliC. The domain contains a structural motif called a beta-folium fold . Although no specific function is assigned to this domain its deletion leads to a reduction in filament stability .. +PF08885 GSCFA family
This family of proteins are functionally uncharacterised. They have been named GSCFA after a highly conserved N-terminal motif in the alignment. Distant similarity to the Pfam:PF00657 lipases suggests these proteins are likely to be enzymes.. +PF08886 Glutamate-cysteine ligase
This is a rare family of glutamate--cysteine ligases, EC:6.3.2.2, demonstrated first in Thiobacillus ferrooxidans and present in a few other Proteobacteria . It is the first of two enzymes for glutathione biosynthesis. It is also called gamma-glutamylcysteine synthetase. The structure of this family has been solved, and is similar to that of human glutathione synthetase and very different to gamma-glutamylcysteine synthetase from Escherichia coli.. +PF08887 GAD-like domain
This domain is functionally uncharacterised, but it appears to be distantly related to the GAD domain Pfam:PF02938.. +PF08888 HopJ type III effector protein
Pathovars of Pseudomonas syringae interact with their plant hosts via the action of Hrp outer protein (Hop) effector proteins, injected into plant cells by the type III secretion system. The proteins in this family are called HopJ after the original member HopPmaJ .. +PF08889 WbqC-like protein family
This family of proteins are functionally uncharacterised. However it is found in an O-antigen gene cluster in E. coli and other bacteria suggesting a role in O-antigen production. Feng et al. suggest that wbnG may code for a glycine transferase .. +PF08890 Phage XkdN-like protein
This family of proteins are functionally uncharacterised. They are found in prophage sequence in various bacteria.. +PF08891 YfcL protein
This family of proteins are functionally uncharacterised. THey are related to the short YfcL protein from E. coli.. +PF08892 YqcI/YcgG family
This family of proteins are functionally uncharacterised. The family include YqcI and YcgG from B. subtilis. The alignment contains a conserved FPC motif at the N-terminus and CPF at the C-terminus.. +PF08893 Domain of unknown function (DUF1839)
This family of proteins are functionally uncharacterised.. +PF08894 Protein of unknown function (DUF1838)
This family of proteins are functionally uncharacterised.. +PF08895 Domain of unknown function (DUF1840)
This family of proteins are functionally uncharacterised.. +PF08896 Domain of unknown function (DUF1842)
This domain is found at the N-terminus of proteins that are functionally uncharacterised.. +PF08897 Domain of unknown function (DUF1841)
This family of proteins are functionally uncharacterised.. +PF08898 Domain of unknown function (DUF1843)
This domain is found at the C-terminus of a family of proteins that are functionally uncharacterised. The presumed domain is about 60 amino acid residues in length and is found independently in some proteins.. +PF08899 Domain of unknown function (DUF1844)
This family of proteins are functionally uncharacterised.. +PF08900 Domain of unknown function (DUF1845)
This family of proteins are functionally uncharacterised.. +PF08901 Protein of unknown function (DUF1847)
This family of proteins are functionally uncharacterised. THey contain 4 N-terminal cysteines that may form a zinc binding domain.. +PF08902 Domain of unknown function (DUF1848)
This family of proteins are functionally uncharacterised. The C-terminus contains a cluster of cysteines that are similar to the iron-sulfur cluster found at the N-terminus of Pfam:PF04055.. +PF08903 Domain of unknown function (DUF1846)
This family of proteins are functionally uncharacterised. Some members of the family are annotated as ATP-dependent peptidases. However, we can find no support for this annotation.. +PF08904 Domain of unknown function (DUF1849)
This family of proteins are functionally uncharacterised.. +PF08905 Domain of unknown function (DUF1850)
This family of proteins are functionally uncharacterised. Some members of this family appear to be misannotated as RocC an amino acid transporter from B. subtilis.. +PF08906 Domain of unknown function (DUF1851)
This domain is found at the C-terminus of a variety of proteins that are functionally uncharacterised.. +PF08907 Domain of unknown function (DUF1853)
This family of proteins are functionally uncharacterised.. +PF08908 Domain of unknown function (DUF1852)
This family of proteins are functionally uncharacterised.. +PF08909 Domain of unknown function (DUF1854)
This potential domain is functionally uncharacterised. It is found at the C-terminus of a number of ATP transporter proteins suggesting this domain may be involved in ligand binding.. +PF08910 DUF1855; Aida_N; Aida-C2;
Mistry J, Sammut SJ, Coggill P, Zhang D, Eberhardt R. This is the N-terminal domain of the axin interactor, dorsalization-associated protein family .. +PF08911 NUP50 (Nucleoporin 50 kDa)
Nucleoporin 50 kDa (NUP50) acts as a cofactor for the importin-alpha:importin-beta heterodimer, which in turn allows for transportation of many nuclear-targeted proteins through nuclear pore complexes. The C terminus of NUP50 binds importin-beta through RAN-GTP, the N terminus binds the C terminus of importin-alpha, while a central domain binds importin-beta. NUP50:importin-alpha:importin-beta then binds cargo and can stimulate nuclear import. The N-terminal domain of NUP50 is also able to actively displace nuclear localisation signals from importin-alpha .. +PF08912 Rho Binding
Rho Binding Domain is responsible for the recognition and binding of Rho binding domain-containing proteins (such as ROCK) to Rho, resulting in activation of the GTPase which in turn modulates the phosphorylation of various signalling proteins. This domain is within an amphipathic alpha-helical coiled-coil and interacts with Rho through predominantly hydrophobic interactions .. +PF08913 Vinculin Binding Site
Vinculin binding sites are predominantly found in talin and talin-like molecules, enabling binding of vinculin to talin, stabilising integrin-mediated cell-matrix junctions. Talin, in turn, links integrins to the actin cytoskeleton. The consensus sequence for Vinculin binding sites is LxxAAxxVAxxVxxLIxxA, with a secondary structure prediction of four amphipathic helices. The hydrophobic residues that define the VBS are themselves 'masked' and are buried in the core of a series of helical bundles that make up the talin rod .. +PF08914 Rap1 Myb domain
+PF08915 Archaea-specific editing domain of threonyl-tRNA synthetase
Archaea-specific editing domain of threonyl-tRNA synthetase, with marked structural similarity to D-amino acids deacylases found in eubacteria and eukaryotes. This domain can bind D-amino acids, and ensures high fidelity during translation. It is especially responsible for removing incorrectly attached serine from tRNA-Thr. The domain forms a fold that can be be defined as two layers of beta-sheets (a three-stranded sheet and a five-stranded sheet), with two alpha-helices located adjacent to the five-stranded sheet .. +PF08916 Phenylalanine zipper
The phenylalanine zipper consists of aromatic side chains from ten phenylalanine residues that are stacked within a hydrophobic core. This zipper mediates dimerisation of various proteins, such as APS, SH2-B and Lnk .. +PF08917 Transforming growth factor beta receptor 2 ectodomain
The Transforming growth factor beta receptor 2 ectodomain is a compact fold consisting of nine beta-strands and a single helix stabilised by a network of six intra strand disulphide bonds. The folding topology includes a central five-stranded antiparallel beta-sheet, eight-residues long at its centre, covered by a second layer consisting of two segments of two-stranded antiparallel beta-sheets (beta1-beta4, beta3-beta9) .. +PF08918 PhoQ Sensor
The PhoQ Sensor is required for the virulence of various Gram-negative bacteria by allowing interaction of PhoPQ with the intracellular membrane, resulting in remodelling of the bacterial cell surface and subsequent bacterial resistance to host antimicrobial peptides. The domain contains a major flat acidic surface, which binds to at least 3 calcium ions, neutralising the domain's negative charge and allowing interaction with the negatively charged membrane .. +PF08919 F-actin binding
The F-actin binding domain forms a compact bundle of four antiparallel alpha-helices, which are arranged in a left-handed topology. Binding of F-actin to the F-actin binding domain may result in cytoplasmic retention and subcellular distribution of the protein, as well as possible inhibition of protein function .. +PF08920 Splicing factor 3B subunit 1
This family consists of several eukaryotic splicing factor 3B subunit 1 proteins, which associate with p14 through a C-terminus beta-strand that interacts with beta-3 of the p14 RNA recognition motif (RRM) beta-sheet, which is in turn connected to an alpha-helix by a loop that makes extensive contacts with both the shorter C-terminal helix and RRM of p14. This subunit is required for 'A' splicing complex assembly (formed by the stable binding of U2 snRNP to the branchpoint sequence in pre-mRNA) and 'E' splicing complex assembly .. +PF08921 Domain of unknown function (DUF1904)
This domain is found in a set of hypothetical bacterial proteins.. +PF08922 Domain of unknown function (DUF1905)
This domain is found in a set of hypothetical bacterial proteins.. +PF08923 Mitogen-activated protein kinase kinase 1 interacting
Mitogen-activated protein kinase kinase 1 interacting protein is a small subcellular adaptor protein required for MAPK signaling and ERK1/2 activation. The overall topology of this domain has a central five-stranded beta-sheet sandwiched between a two alpha-helix and a one alpha-helix layer .. +PF08924 Domain of unknown function (DUF1906)
This domain is found in a set of uncharacterised hypothetical bacterial proteins.. +PF08925 Domain of Unknown Function (DUF1907)
The structure of this domain displays an alpha-beta-beta-alpha four layer topology, with an HxHxxxxxxxxxH motif that coordinates a zinc ion, and an acetate anion at a site that likely supports the enzymatic activity of an ester hydrolase .. +PF08926 Domain of unknown function (DUF1908)
This domain is found in a set of hypothetical/structural eukaryotic proteins.. +PF08928 Domain of unknown function (DUF1910)
This domain is found in a set of hypothetical bacterial proteins.. +PF08929 Domain of unknown function (DUF1911)
This domain is found in a set of hypothetical bacterial proteins.. +PF08930 Domain of unknown function (DUF1912)
This domain has no known function. It is found in various Streptococcal proteins.. +PF08931 Domain of unknown function (DUF1913)
This domain has no known function. It is found in a various putative receptor proteins from Lactococcus bacteriophages.. +PF08932 Domain of unknown function (DUF1914)
This domain has no known function. It is found in a various putative receptor proteins from Lactococcus bacteriophages.. +PF08933 Domain of unknown function (DUF1864)
This domain has no known function. It is found in various hypothetical and conserved domain proteins.. +PF08934 Rb C-terminal domain
The Rb C-terminal domain is required for high-affinity binding to E2F-DP complexes and for maximal repression of E2F-responsive promoters, thereby acting as a growth suppressor by blocking the G1-S transition of the cell cycle. This domain has a strand-loop-helix structure, which directly interacts with both E2F1 and DP1, followed by a tail segment that lacks regular secondary structure .. +PF08935 DUF1865;
Viral protein VP4 subunit. This domain is predominantly found in viral proteins from the family Picornaviridae. It is VP4 of the viral polyprotein which, in poliovirus, is part of the capsid that consists of 60 copies each of four proteins VP1, VP2, VP3, and VP4 arranged on an icosahedral lattice . VP4 is on the inside and differs from the others in being small, myristoylated and having an extended structure. Productive infection involves the externalisation of the VP4, which is cleaved from the rest, along with the N-terminus of VP1. There thus seem to be three stages of the virus, ie a multi-step process for cell entry involving RNA translocation through a membrane channel formed by the externalised N termini of VP1 .. +PF08936 Carboxysome Shell Carbonic Anhydrase
Carboxysome Shell Carbonic Anhydrase is a bacterial carbonic anhydrase localised in the carboxysome, where it converts bicarbonate ions to carbon dioxide for use in carbon fixation. It contains three domains, these being: (1) an N-terminal domain composed primarily of four alpha-helices; (2) a catalytic domain containing a tightly bound zinc ion and (3) a C-terminal domain with weak structural similarity to the catalytic domain .. +PF08937 MTH538 TIR-like domain (DUF1863)
This domain adopts the flavodoxin fold, that is, five parallel beta-strands and four helical segments. The structure is a three-layer sandwich with alpha-1 and alpha-4 on one side of the beta-sheet, and alpha-2 and alpha-3 on the other side. Probable role in signal transduction as a phosphorylation-independent conformational switch protein . This domain is similar to the TIR domain.. +PF08938 DUF1916;
Mistry J, Sammut SJ, Eberhardt R. This domain is found at the N-terminus of HBS1 proteins. It interacts with the ribosomal protein rpS3 at the mRNA entry site .. +PF08939 Domain of unknown function (DUF1917)
This domain is found in various hypothetical and basophilic leukaemia proteins. It has no known function.. +PF08940 Domain of unknown function (DUF1918)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF08941 USP8 interacting
This domain interacts with the UBP deubiquitinating enzyme USP8.. +PF08942 Domain of unknown function (DUF1919)
This domain has no known function. It is found in various hypothetical and putative bacterial proteins.. +PF08943 CsiD
This family consists of various bacterial proteins pertaining to the non-haem Fe(II)-dependent oxygenase family. Exact function is unknown, but a putative role includes involvement in the control of utilisation of gamma-aminobutyric acid .. +PF08944 NADPH oxidase subunit p47Phox, C terminal domain
The C terminal domain of the phagocyte NADPH oxidase subunit p47Phox contains conserved PxxP motifs that allow binding to SH3 domains, with subsequent activation of the NADPH oxidase, and generation of superoxide, which plays a crucial role in host defense against microbial infection .. +PF08945 Bcl-x interacting, BH3 domain
This domain is a long alpha helix, required for interaction with Bcl-x. It is found in BAM, Bim and Bcl2-like protein 11 . This domain is also known as the BH3 domain between residues 146 and 161.. +PF08946 Osmosensory transporter coiled coil
The osmosensory transporter coiled coil is a C-terminal domain found in various bacterial osmoprotective transporters, such as ProP, Proline/betaine transporter, Proline permease 2 and the citrate proton symporters. It adopts an antiparallel coiled-coil structure, and is essential for osmosensory and osmoprotectant transporter function .. +PF08947 BPS (Between PH and SH2)
The BPS (Between PH and SH2) domain, comprised of 2 beta strands and a C-terminal helix, is an approximately 45 residue region found in the adaptor proteins Grb7/10/14 that mediates inhibition of the tyrosine kinase domain of the insulin receptor by binding of the N-terminal portion of the BPS domain to the substrate peptide groove of the kinase, acting as a pseudosubstrate inhibitor .. +PF08948 Domain of unknown function (DUF1859)
This domain has no known function. It is predominantly found in the N-terminus of bacteriophage spike proteins .. +PF08949 Domain of unknown function (DUF1860)
This domain has no known function. It is predominantly found in the C-terminus of bacteriophage spike proteins .. +PF08950 Protein of unknown function (DUF1861)
This hypothetical protein, found in bacteria and in the eukaryote Leishmania, has no known function.. +PF08951 Enterocin A Immunity
Mistry J, Sammut SJ, Coggill P. Gram-positive lactobacilli produce bacteriocins to kill closely-related competitor species . To protect themselves from the bacteriocidal activity of this molecule they co-express an immunity protein (for discussion of this operon see Bacteriocin_IIc Pfam:PF10439). The immunity protein structure is a soluble, cytoplasmic, antiparallel four alpha-helical globular bundle with a fifth, more flexible and more divergent C-terminal helical hair-pin . The C-terminal hair-pin recognises the C-terminus of the producer bacteriocin and this interaction is sufficient to dis-orient the bacteriocin within the membrane and close up the permeabilising pore that on its own the bacteriocin creates . These immunity proteins interact in the same way with other bacteriocins, family Bacteriocin_II, Pfam:PF01721. Since many enterococci can produce more than one bacteriocin it seems likely that the whole operon can be carried on transferable plasmids .. +PF08952 Domain of unknown function (DUF1866)
This domain, found in Synaptojanin, has no known function.. +PF08953 Domain of unknown function (DUF1899)
This set of domains is found in various eukaryotic proteins. Function is unknown.. +PF08954 Domain of unknown function (DUF1900)
This domain is predominantly found in the structural protein coronin, and is duplicated in some sequences. It has no known function .. +PF08955 DUF1901;
BofC C-terminal domain. The C-terminal domain of the bacterial protein 'bypass of forespore C' contains a three-stranded beta-sheet and three alpha-helices. Its exact function is, as yet, unknown .. +PF08956 Domain of unknown function (DUF1869)
This domain is found in a set of hypothetical bacterial proteins.. +PF08958 Domain of unknown function (DUF1871)
This set of hypothetical proteins is produced by prokaryotes pertaining to the Bacillus genus.. +PF08960 Domain of unknown function (DUF1874)
This domain is found in a set of hypothetical viral and bacterial proteins.. +PF08961 Domain of unknown function (DUF1875)
The MIT domain, found in Nuclear receptor-binding factor 2, has no known function.. +PF08962 Domain of unknown function (DUF1876)
This domain is found in a set of hypothetical bacterial proteins.. +PF08963 Protein of unknown function (DUF1878)
This domain is found in a set of hypothetical bacterial proteins.. +PF08964 DUF1881;
Beta/Gamma crystallin. Mistry J, Sammut SJ, Eberhardt R. This family of beta/gamma crystallins includes the N-terminal domain of Dictyostelium discoideum Calcium-dependent cell adhesion molecule 1 (Swiss:P54657), which mediates cell-cell adhesion through homophilic interactions .. +PF08965 Domain of unknown function (DUF1870)
This domain is found in a set of hypothetical bacterial proteins. It contains a helix-turn-helix domain so may be a DNA-binding protein.. +PF08966 Domain of unknown function (DUF1882)
This domain is found in a set of hypothetical bacterial proteins.. +PF08967 Domain of unknown function (DUF1884)
This domain is found in a set of hypothetical bacterial proteins. It shows similarity to the N-terminus of ATP-synthase.. +PF08968 Domain of unknown function (DUF1885)
This domain is found in a set of hypothetical proteins produced by bacteria of the Bacillus genus.. +PF08969 DUF1873;
USP8 dimerisation domain. This domain is predominantly found in the amino terminal region of Ubiquitin carboxyl-terminal hydrolase 8 (USP8). It forms a five helical bundle that dimerises .. +PF08970 Sporulation inhibitor A
Members of this protein family contain two antiparallel alpha helices that are linked by a highly structured inter-helix loop to form a helical hairpin; the structure is stabilised by numerous hydrophobic and electrostatic interactions. These sporulation inhibitors are antikinases that bind to the histidine kinase KinA phosphotransfer domain and act as a molecular barricade that inhibit productive interaction between the ATP binding site and the phosphorylatable KinA His residue. This results in the inhibition of sporulation (by preventing phosphorylation of spo0A) .. +PF08971 Glycogen synthesis protein
Members of this family are involved in glycogen synthesis in Enterobacteria. The structure of the polypeptide chain comprises a bundle of two parallel amphipathic helices, alpha-1 and alpha-3, and a short hydrophobic helix alpha-2 sandwiched between them .. +PF08972 Domain of unknown function (DUF1902)
Members of this family of prokaryotic proteins adopt a fold consisting of one alpha-helix and four beta-strands. Their function has not, as yet, been elucidated .. +PF08973 DUF1893;
Domain of unknown function (DUF1893). Mistry J, Sammut SJ, Iyer, LM. A member of the deaminase fold that binds an unknown ligand in the crystal structure. The protein is ADP-ribosylated at a conserved aspartate . Contextual analysis suggests that the domain is likely to bind NAD or ADP ribose either to sense redox states or to function as a regulatory ADP ribosyltransferase .. +PF08974 Domain of unknown function (DUF1877)
This domain is found in a set of hypothetical bacterial proteins.. +PF08975 DUF1868;
Domain of unknown function (DUF1868). This group of 2H-phosphodiesterases comprises a single family typified by the protein mlr3352 from M.loti. Members are also present in various alpha-proteobacteria, Synechocystis, Streptococcus and Chilo iridescent virus. The presence of a member of this predominantly bacterial group in a large eukaryotic DNA virus represents a potential case of horizontal transfer from a bacterial source into a virus. Several proteins of bacterial origin have been noticed in the insect viruses (L.M.Iyer, E.V.Koonin and L.Aravind, unpublished observations and these appear to have been acquired from endo-symbiotic or parasitic bacteria that share the same host cells with the viruses. Presence of 2H proteins in the proteomes of large DNA viruses (e.g. T4 57B protein and the Fowl-pox virus FPV025) may point to some role for these proteins in regulating the viral tRNA metabolism. Each member of this family contains an internal duplication, each of which contains an HXTX motif that defines the family.. +PF08976 Domain of unknown function (DUF1880)
This domain is found predominantly in DJ binding protein. It has no known function.. +PF08977 Bypass of Forespore C, N terminal
The N-terminal domain of 'bypass of forespore C' is composed of a four-stranded beta-sheet covered by an alpha-helix. The beta-sheet has a beta2-beta1-beta4-beta3 topology, where strands beta1 and beta2 and strands beta3 and beta4 are connected by beta-turns, whereas strands beta2 and beta3 are joined by an alpha-helix that runs across one face of the beta-sheet. This domain is similar to the third immunoglobulin G-binding domain of protein G from Streptococcus, the latter belonging to a large and diverse group of cell surface-associated proteins that bind to immunoglobulins. It has been hypothesised that this domain may be a mediator of protein-protein interactions involved in proteolytic events at the cell surface .. +PF08978 Reoviridae VP9
This domain is found in various VP9 viral outer-coat proteins. It has no known function.. +PF08979 Domain of unknown function (DUF1894)
Members of this family have an important role in methanogenesis. They assume an alpha-beta globular structure consisting of six beta-strands and three alpha-helices forming the secondary structural topological arrangement of alpha1-beta1-alpha2-beta2-beta3-beta4-beta5-beta6-alpha3 .. +PF08980 Domain of unknown function (DUF1883)
This domain is found in a set of hypothetical bacterial proteins.. +PF08982 Domain of unknown function (DUF1857)
This domain has no known function. It is found in various hypothetical bacterial and fungal proteins.. +PF08983 Domain of unknown function (DUF1856)
This domain has no known function. It is found in the C-terminal segment of various vasopressin receptors.. +PF08984 Domain of unknown function (DUF1858)
This domain has no known function. It is found in various hypothetical bacterial proteins.. +PF08985 Domain of unknown function (DUF1888)
This domain is found in a set of hypothetical bacterial proteins.. +PF08986 Domain of unknown function (DUF1889)
This domain is found in a set of hypothetical bacterial proteins.. +PF08987 Protein of unknown function (DUF1892)
Members of this family, that are synthesised by Saccharomycetes, adopt a structure consisting of a four-stranded beta-sheet, with strand order beta2-beta1-beta4-beta3, and two alpha-helices, with an overall topology of beta-beta-alpha-beta-beta-alpha. They have no known function .. +PF08988 Protein of unknown function (DUF1895)
The YscE protein, produced by the pathogen Yersinia, assumes a secondary structure composed of two anti-parallel alpha-helices separated by a flexible loop. The function of this protein is, as yet, unknown .. +PF08989 Domain of unknown function (DUF1896)
This domain is found in a set of hypothetical bacterial proteins.. +PF08990 Erythronolide synthase docking
The N terminal docking domain found in modular polyketide synthase assumes an alpha-helical structure, wherein two alpha-helices are connected by a short loop. Two such N-terminal domains dimerise to form amphipathic parallel alpha-helical coiled coils: dimerisation is essential for protein function .. +PF08991 Domain of unknown function (DUF1903)
Members of this family adopt a coiled coil structure, with two antiparallel alpha-helices that are tightly strapped together by two disulfide bridges at each end. The protein sequence shows a cysteine motif, required for the stabilisation of the coiled-coil-like structure. Additional inter-helix hydrophobic contacts impart stability to this scaffold. The precise function of this eukaryotic domain is, as yet, unknown .. +PF08992 Quinohemoprotein amine dehydrogenase, gamma subunit
Members of this family contain a cross-linked, proteinous quinone cofactor, cysteine tryptophylquinone, which is required for catalysis of the oxidative deamination of a wide range of aliphatic and aromatic amines. The domain assumes a globular secondary structure, with two short alpha-helices having many turns and bends .. +PF08993 T4-helicase_N;
T4 gene Gp59 loader of gp41 DNA helicase. Bacteriophage T4 gene-59 helicase assembly protein is required for recombination-dependent DNA replication, which is the predominant mode of DNA replication in the late stage of T4 infection. T4 gene-59 helicase assembly protein accelerates the loading of the T4 gene-41 helicase during DNA synthesis by the T4 replication system in vitro. T4 gene-59 helicase assembly protein binds to both T4 gene-41 helicase and T4 gene-32 single-stranded DNA binding protein, and to single and double-stranded DNA. The structure of T4 gene-59 helicase assembly protein reveals a novel alpha-helical bundle fold with two domains of similar size, this being the N-terminal domain that consists of six alpha-helices linked by loop segments and short turns. The surface of the domain contains large regions of exposed hydrophobic residues and clusters of acidic and basic residues. This domain has structural similarity to members of the high-mobility-group (HMG) family of DNA minor groove binding proteins including rat HMG1A and lymphoid enhancer-binding factor, and is required for binding of the helicase to the DNA minor groove .. +PF08994 T4-helicase_C;
T4 gene Gp59 loader of gp41 DNA helicase C-term. Bacteriophage T4 gene-59 helicase assembly protein is required for recombination-dependent DNA replication, which is the predominant mode of DNA replication in the late stage of T4 infection. T4 gene-59 helicase assembly protein accelerates the loading of the T4 gene-41 helicase during DNA synthesis by the T4 replication system in vitro. T4 gene-59 helicase assembly protein binds to both T4 gene-41 helicase and T4 gene-32 single-stranded DNA binding protein, and to single and double-stranded DNA. The structure of T4 gene-59 helicase assembly protein reveals a novel alpha-helical bundle fold with two domains of similar size, this being the C-terminal domain that consists of seven alpha-helices with short intervening loops and turns. The surface of the domain contains large regions of exposed hydrophobic residues and clusters of acidic and basic residues. The hydrophobic region on the 'bottom' surface of the domain near the C-terminal helix binds the leading strand DNA, whilst the hydrophobic region on the 'top' surface of the domain lies between the two arms of the fork DNA, allowing for T4 gene 41 helicase binding and assembly into a hexameric complex around the lagging strand .. +PF08995 Necrosis inducing protein-1
Necrosis inducing protein-1, a fungal avirulence protein produced by plants, consists of two parts containing beta-sheets of two and three anti-parallel strands, respectively. Five intramolecular disulfide bonds, stabilise these parts and their position with respect to each other, providing a high level of stability .. +PF08996 DNA Polymerase alpha zinc finger
The DNA Polymerase alpha zinc finger domain adopts an alpha-helix-like structure, followed by three turns, all of which involve proline. The resulting motif is a helix-turn-helix motif, in contrast to other zinc finger domains, which show anti-parallel sheet and helix conformation. Zinc binding occurs due to the presence of four cysteine residues positioned to bind the metal centre in a tetrahedral coordination geometry. Function of this domain is uncertain: it has been proposed that the zinc finger motif may be an essential part of the DNA binding domain .. +PF08997 Ubiquinol-cytochrome C reductase complex, 6.4kD protein
The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is an essential component of the mitochondrial cellular respiratory chain. This family represents the 6.4kD protein, which may be closely linked to the iron-sulphur protein in the complex and function as an iron-sulphur protein binding factor .. +PF08998 Bacterial epsilon antitoxin
The epsilon antitoxin, produced by various prokaryotes, forms part of a postsegregational killing system which is involved in the initiation of programmed cell death of plasmid-free cells. The protein is folded into a three-helix bundle that directly interacts with the zeta toxin, inactivating it .. +PF08999 Surfactant protein C, N terminal propeptide
The N-terminal propeptide of surfactant protein C adopts an alpha-helical structure, with turn and extended regions. It's main function is the stabilisation of metastable surfactant protein C (SP-C), since the latter can irreversibly transform from its native alpha-helical structure to beta-sheet aggregates and form amyloid-like fibrils. The correct intracellular trafficking of proSP-C has also been reported to depend on the propeptide .. +PF09000 Cytotoxic
The cytotoxic domain confers cytotoxic activity to proteins, enabling the formation of nucleolytic breaks in 16S ribosomal RNA. The structure of the domain reveals a highly twisted central beta-sheet elaborated with a short N-terminal alpha-helix .. +PF09001 Domain of unknown function (DUF1890)
This domain is found in a set of hypothetical archaeal proteins.. +PF09002 Domain of unknown function (DUF1887)
This domain is found in a set of hypothetical bacterial proteins.. +PF09003 Bacteriophage lambda integrase, N-terminal domain
The amino terminal domain of bacteriophage lambda integrase folds into a three-stranded, antiparallel beta-sheet that packs against a C-terminal alpha-helix, adopting a fold that is structurally related to the three-stranded beta-sheet family of DNA-binding domains (which includes the GCC-box DNA-binding domain and the N-terminal domain of Tn916 integrase). This domain is responsible for high-affinity binding to each of the five DNA arm-type sites and is also a context-sensitive modulator of DNA cleavage .. +PF09004 Domain of unknown function (DUF1891)
This domain is found in a set of hypothetical eukaryotic proteins.. +PF09005 Domain of unknown function (DUF1897)
This domain is found in Psi proteins produced by Drosophila, and in various eukaryotic hypothetical proteins. It has no known function.. +PF09006 Lung surfactant protein D coiled-coil trimerisation
This domain, predominantly found in lung surfactant protein D, forms a triple-helical parallel coiled coil, and mediates trimerisation of the protein .. +PF09007 EBP50, C-terminal
This C terminal domain allows interaction of EBP50 with FERM (four-point one ERM) domains, resulting in the activation of Ezrin-radixin-moesin (ERM), with subsequent cytoskeletal modulation and cellular growth control .. +PF09008 Head binding
The head binding domain found in the Phage P22 tailspike protein contains two regular beta-sheets, A and B, oriented nearly perpendicular to each other and composed of five and three strands respectively. The topology of the strands is exclusively antiparallel. The tailspike protein trimerises through this domain, and the direction of the strands with respect to the molecular triad is almost parallel for beta-sheet A, whereas beta-sheet B is perpendicular to the triad, forming a dome-like structure. This domain is dispensable for thermostability and SDS resistance of the intact protein, and its deletion has only minor effects on tailspike folding kinetics .. +PF09009 Exotoxin A catalytic
Members of this family, which are found in prokaryotic exotoxin A, catalyse the transfer of ADP ribose from nicotinamide adenine dinucleotide (NAD) to elongation factor-2 in eukaryotic cells, with subsequent inhibition of protein synthesis .. +PF09010 Anti-Sigma Factor A
Anti-sigma factor A is a transcriptional inhibitor that inhibits sigma 70-directed transcription by weakening its interaction with the core of the host's RNA polymerase. It is an all-helical protein, composed of six helical segments and intervening loops and turns, as well as a helix-turn-helix DNA binding motif, although neither free anti-sigma factor nor anti-sigma factor bound to sigma-70 has been shown to interact directly with DNA. In solution, the protein forms a symmetric dimer of small (10.59 kDa) protomers, which are composed of helix and coil regions and are devoid of beta-strand/sheet secondary structural elements .. +PF09011 DUF1898;
Mistry J, Sammut SJ, Coggill P. This short 71 residue domain is an HMG-box domain. HMG-box domains mediate re-modelling of chromatin-structure. Mammalian HMG-box proteins are of two types: those that are non-sequence-specific DNA-binding proteins with two HMG-box domains and a long highly acidic C-tail; and a diverse group of sequence-specific transcription factor-proteins with either a single HMG-box or up to six copies, and no acidic C-tail .. +PF09012 DUF1920;
FeoC like transcriptional regulator. This family contains several transcriptional regulators, including FeoC, which contain a HTH motif. FeoC acts as a [Fe-S] dependant transcriptional repressor .. +PF09013 YopH, N-terminal
The N-terminal domain of YopH is a compact structure composed of four alpha-helices and two beta-hairpins. Helices alpha-1 and alpha-3 are parallel to each other and antiparallel to helices alpha-2 and alpha-4. This domain targets YopH for secretion from the bacterium and translocation into eukaryotic cells, and has phosphotyrosyl peptide-binding activity, allowing for recognition of p130Cas and paxillin .. +PF09014 Beta-2-glycoprotein-1 fifth domain
The fifth domain of beta-2-glycoprotein-1 (b2GP-1) is composed of four well-defined anti-parallel beta-strands and two short alpha-helices, as well as a long highly flexible loop. It plays an important role in the binding of b2GP-1 to negatively charged compounds and subsequent capture for binding of anti-b2GP-1 antibodies .. +PF09015 NgoMIV restriction enzyme
Members of this family are prokaryotic DNA restriction enzymes, exhibiting an alpha/beta structure, with a central region comprising a mixed six-stranded beta-sheet with alpha-helices on each side. A long 'arm' protrudes out of the core of the domain between strands beta2 and beta3 and is mainly involved in the tetramerisation interface of the protein. These restriction enzymes recognise the double-stranded sequence GCCGGC and cleave after G-1 .. +PF09016 Pas factor saposin fold
Members of this family adopt a compact structure comprising five alpha helices. Charged and polar residues are exposed mostly on the surface, while most of the hydrophobic residues are buried inside the hydrophobic core of the helical bundle. The precise function of this domain is unknown, but it is has been shown to induce secretion of periplasmic proteins, especially collagenase .. +PF09017 Microbial transglutaminase
Microbial transglutaminase (MTG) catalyses an acyl transfer reaction by means of a Cys-Asp diad mechanism, in which the gamma-carboxyamide groups of peptide-bound glutamine residues act as the acyl donors. The MTG molecule forms a single, compact domain belonging to the alpha+beta folding class, containing 11 alpha-helices and 8 beta-strands. The alpha-helices and the beta-strands are concentrated mainly at the amino and carboxyl ends of the polypeptide, respectively. These secondary structures are arranged so that a beta-sheet is surrounded by alpha-helices, which are clustered into three regions .. +PF09018 P3 major capsid protein
The P3 major capsid protein adopts a 'double-barrel' structure comprising two eight-stranded viral beta-barrels or jelly rolls, each of which contains a 12-residue alpha-helix. This protein then trimerises through a 'trimerisation loop' sequence, and is incorporated within the viral capsid .. +PF09019 EcoRII C terminal
The C-terminal catalytic domain of the Restriction Endonuclease EcoRII has a restriction endonuclease-like fold with a central five-stranded mixed beta-sheet surrounded on both sides by alpha-helices. It cleaves DNA specifically at single 5' CCWGG sites .. +PF09020 YopE, N terminal
The N terminal YopE domain targets YopE for secretion from the bacterium and translocation into eukaryotic cells .. +PF09021 HutP
The HutP protein family regulates the expression of Bacillus 'hut' structural genes by an anti-termination complex, which recognises three UAG triplet units, separated by four non-conserved nucleotides on the RNA terminator region. L-histidine and Mg2+ ions are also required. These proteins exhibit the structural elements of alpha/beta proteins, arranged in the order: alpha-alpha-beta-alpha-alpha-beta-beta-beta in the primary structure, and the four antiparallel beta-strands form a beta-sheet in the order beta1-beta2-beta3-beta4, with two alpha-helices each on the front (alpha1 and alpha2) and at the back (alpha3 and alpha4) of the beta-sheet .. +PF09022 Staphostatin A
The staphostatin A polypeptide chain folds into a slightly deformed, eight-stranded beta-barrel, with strands beta-4 through beta-8 forming an antiparallel sheet while the N-terminus forms a a psi-loop motif. Members of this family constitute a class of cysteine protease inhibitors distinct in the fold and the mechanism of action from any known inhibitors of these enzymes .. +PF09023 Staphostatin B
+PF09025 YopR Core
The YopR core domain, predominantly found in the Yersinia pestis virulence factor YopR, is composed of five alpha-helices, four of which are arranged in an antiparallel bundle. Little is known about this domain, though it may contribute to the virulence of the protein YopR .. +PF09026 Cenp-B_dimeris;
Centromere protein B dimerisation domain. The centromere protein B (CENP-B) dimerisation domain is composed of two alpha-helices, which are folded into an antiparallel configuration. Dimerisation of CENP-B is mediated by this domain, in which monomers dimerise to form a symmetrical, antiparallel, four-helix bundle structure with a large hydrophobic patch in which 23 residues of one monomer form van der Waals contacts with the other monomer. This CENP-B dimer configuration may be suitable for capturing two distant CENP-B boxes during centromeric heterochromatin formation .. +PF09027 GTPase binding
The GTPase binding domain binds to the G protein Cdc42, inhibiting both its intrinsic and stimulated GTPase activity. The domain is largely unstructured in the absence of Cdc42 .. +PF09028 Mac 1
The bacterial protein Mac 1 adopts an alpha/beta fold, with 14 beta strands and 9 alpha helices. The N-terminal domain is made up predominantly of alpha helices, whereas the C-terminal domain consists predominantly of beta sheets. Mac 1 blocks polymorphonuclear opsonophagocytosis, inhibits the production of reactive oxygen species and contains IgG endopeptidase activity .. +PF09029 5-aminolevulinate synthase presequence
The N terminal presequence domain found in 5-aminolevulinate synthase exists as an amphipathic helix, with a positively charged surface provided by lysine residues and no stable helix at the N-terminus. The domain is essential for the import process by which ALAS is transported into the mitochondria: translocase of the outer membrane (Tom) and translocase of the inner membrane protein complexes appear responsible for recognition and import through the mitochondrial membrane. The protein Tom20 is anchored to the mitochondrial outer membrane, and its interaction with presequences is thought to be the recognition step which allows subsequent import .. +PF09030 Creb binding
The Creb binding domain assumes a structure comprising of three alpha-helices which pack in a bundle, exposing a hydrophobic groove between alpha-1 and alpha-3 within which complimentary domains found in the protein 'activator for thyroid hormone and retinoid receptors' (ACTR) can dock. Docking of these domains is required for the recruitment of RNA polymerase II and the basal transcription machinery .. +PF09032 Siah interacting protein, N terminal
The N terminal domain of Siah interacting protein (SIP) adopts a helical hairpin structure with a hydrophobic core stabilised by a classic knobs-and-holes arrangement of side chains contributed by the two amphipathic helices. Little is known about this domain's function, except that it is crucial for interactions with Siah. It has also been hypothesised that SIP can dimerise through this N terminal domain .. +PF09033 DNA Fragmentation factor 45kDa, C terminal domain
The C terminal domain of DNA Fragmentation factor 45kDa (DFF-C) consists of four alpha-helices, which are folded in a helix-packing arrangement, with alpha-2 and alpha-3 packing against a long C-terminal helix (alpha-4). The main function of this domain is the inhibition of DFF40 by binding to its C-terminal catalytic domain through ionic interactions, thereby inhibiting the fragmentation of DNA in the apoptotic process. In addition to blocking the DNase activity of DFF40, the C-terminal region of DFF45 is also important for the DFF40-specific folding chaperone activity, as demonstrated by the ability of DFF45 to refold DFF40 .. +PF09034 TRADD, N-terminal domain
The N terminal domain of 'Tumour necrosis factor receptor type 1 associated death domain protein' (TRADD) folds into an alpha-beta sandwich with a four-stranded beta sheet and six alpha helices, each forming one layer of the structure. The domain allows docking of TRADD onto 'tumour necrosis factor receptor-associated factor' (TRAF): the binding is at the beta-sandwich domain, away from the coiled-coil domain. Binding ensures the recruitment of cIAPs to the signaling complex, which may be important for direct caspase-8 inhibition and the immediate suppression of apoptosis at the apical point of the cascade .. +PF09035 Excisionase from transposon Tn916
The phage-encoded excisionase protein Tn916-Xis adopts a winged-helix structure that consists of a three-stranded anti-parallel beta-sheet that packs against a helix-turn-helix (HTH) motif and a third C-terminal alpha-helix. It is encoded for by Tn916, which also codes for the integrase Tn916-Int. The protein interacts with DNA by the insertion of helix alpha-2 into the major groove and the contact of the hairpin that connects strands beta-2 and beta-3 with the adjacent phosphodiester backbone and/or minor groove. Tn916-Xis stimulates phage excision and inhibits viral integration by stabilising distorted DNA structures .. +PF09036 Bcr-Abl oncoprotein oligomerisation domain
The Bcr-Abl oncoprotein oligomerisation domain consists of a short N-terminal helix (alpha-1), a flexible loop and a long C-terminal helix (alpha-2). Together these form an N-shaped structure, with the loop allowing the two helices to assume a parallel orientation. The monomeric domains associate into a dimer through the formation of an antiparallel coiled coil between the alpha-2 helices and domain swapping of two alpha-1 helices, where one alpha-1 helix swings back and packs against the alpha-2 helix from the second monomer. Two dimers then associate into a tetramer. The oligomerisation domain is essential for the oncogenicity of the Bcr-Abl protein .. +PF09037 Stf0 sulphotransferase
Members of this family are essential for the biosynthesis of sulpholipid-1 in prokaryotes. They adopt a structure that belongs to the sulphotransferase superfamily, consisting of a single domain with a core four-stranded parallel beta-sheet flanked by alpha-helices .. +PF09038 Tumour suppressor p53-binding protein-1 Tudor
Members of this family consist of ten beta-strands and a carboxy-terminal alpha-helix. The amino-terminal five beta-strands and the C-terminal five beta-strands adopt folds that are identical to each other. This domain is essential for the recruitment of proteins to double stranded breaks in DNA, which is mediated by interaction with methylated Lys 79 of histone H3 .. +PF09039 Mu_I-gamma;
Mu DNA binding, I gamma subdomain. Members of this family are responsible for binding the DNA attachment sites at each end of the Mu genome. They adopt a secondary structure comprising a four helix bundle tightly packed around a hydrophobic core consisting of aliphatic and aromatic amino acid residues. Helices 1 and 2 are oriented antiparallel to each other. Helix 3 crosses helices 1 and 2 at angles of 60 and 120 degrees, respectively. Excluding the C-terminal helix 4, the fold of the I-gamma subdomain is remarkably similar to that of the homeodomain family of helix-turn-helix DNA-binding proteins, although their amino acid sequences are completely unrelated .. +PF09040 Gastric H+/K+-ATPase, N terminal domain
Members of this family adopt an alpha-helical conformation under hydrophobic conditions. The domain contains tyrosine residues, phosphorylation of which regulates the function of the ATPase. Additionally, the domain also interacts with various structural proteins, including the spectrin-binding domain of ankyrin III .. +PF09041 Aurora-A binding
The Aurora-A binding domain binds to two distinct sites on the Aurora kinase: the upstream residues bind at the N-terminal lobe, whilst the downstream residues bind in an alpha-helical conformation between the N- and C-terminal lobes. The two Aurora-A binding motifs are connected by a flexible linker that is variable in length and sequence across species. Binding of the domain results strong activation of Aurora-A and protection from deactivating dephosphorylation by phosphatase PP1 .. +PF09042 Titin Z
The titin Z domain, that recognises and binds to the C-terminal calmodulin-like domain of alpha-actinin-2 (Act-EF34), adopts a helical structure, and binds in a groove formed by the two planes between the helix pairs of Act-EF34. This interaction is essential for sarcomere assembly .. +PF09043 D-Lysine 5,6-aminomutase alpha subunit
Members of his family are involved in the 1,2 rearrangement of the terminal amino group of DL-lysine and of L-beta-lysine, using adenosylcobalamin (AdoCbl) and pyridoxal-5'-phosphate as cofactors. The structure is predominantly a PLP-binding TIM barrel domain, with several additional alpha-helices and beta-strands at the N and C termini. These helices and strands form an intertwined accessory clamp structure that wraps around the sides of the TIM barrel and extends up toward the Ado ligand of the Cbl cofactor, providing most of the interactions observed between the protein and the Ado ligand of the Cbl, suggesting that its role is mainly in stabilising AdoCbl in the precatalytic resting state .. +PF09044 Kp4
Members of this fungal family of toxins specifically inhibit voltage-gated calcium channels in mammalian cells. They adopt an alpha/beta-sandwich structure, comprising a five-stranded antiparallel beta-sheet with two antiparallel alpha-helices lying at approximately 45 degrees to these strands .. +PF09045 L27_2
The L27_2 domain is a protein-protein interaction domain capable of organising scaffold proteins into supramolecular assemblies by formation of heteromeric L27_2 domain complexes. L27_2 domain-mediated protein assemblies have been shown to play essential roles in cellular processes including asymmetric cell division, establishment and maintenance of cell polarity, and clustering of receptors and ion channels. Members of this family form specific heterotetrameric complexes, in which each domain contains three alpha-helices. The two N-terminal helices of each L27_2 domain pack together to form a tight, four-helix bundle in the heterodimer, whilst the third helix of each L27_2 domain forms another four-helix bundle that assembles the two units of the heterodimer into a tetramer .. +PF09046 AvrPtoB E3 ubiquitin ligase
The E3 ubiquitin ligase domain found in the bacterial protein AvrPtoB inhibits immunity-associated programmed cell death (PCD) when translocated into plant cells, probably by recruiting E2 enzymes and transferring ubiquitin molecules to cellular proteins involved in regulation of PCD and targeting them for degradation. The structure of this domain reveals a globular fold centred on a four-stranded beta-sheet that packs against two helices on one face and has three very extended loops connecting the elements of secondary structure, with remarkable homology to the RING-finger and U-box families of proteins involved in ubiquitin ligase complexes in eukaryotes .. +PF09047 MEF2 binding
The myocyte enhancer factor-2 (MEF2) binding domain, predominantly found in the calcineurin-binding protein CABIN 1, adopts an amphipathic alpha-helical structure, which allows it to bind a hydrophobic groove on the MEF2S domain, forming a triple-helical interaction. Interaction of this domain with MEF2 causes repression of transcription .. +PF09048 Cro
Members of this family are involved in the repression of transcription by binding as a homodimer to palindromic DNA operator sites in phage lambda: they repress genes expressed in early phage development and are necessary for the late stage of lytic growth. These proteins have a secondary structure consisting of three alpha-helices and three beta-sheets, and dimerise through interactions between the two antiparallel beta-strands .. +PF09049 Stannin transmembrane
Members of this family consist of a single highly hydrophobic transmembrane helix that transverses the lipid bilayer at a 20 degree angle with respect to the membrane normal. They contain a conserved cysteine residue (Cys32) that, together with Cys34 found in the stannin unstructured linker domain, constitutes the putative trimethyltin-binding site that resides at the end of the transmembrane domain close to the lipid/solvent interface .. +PF09050 Stannin unstructured linker
Members of this family are unstructured, acting as connectors of the stannin helical domains. They contain a conserved CXC metal-binding motif and a putative 14-3-3-zeta binding domain. Upon coordinating dimethytin, considerable structural or dynamic changes in the flexible loop region of SNN may take place, recruiting other binding partners such as 14-3-3-zeta, and thereby initiating the apoptotic cascade .. +PF09051 Stannin cytoplasmic
Members of this family consist of a distorted cytoplasmic helix that is partially absorbed into the plane of the lipid bilayer with a tilt angle of approximately 80 degrees from the membrane normal. They interact with the surface of the lipid bilayer, and contribute to the initiation of the apoptotic cascade on binding of the unstructured linker domain to dimethyltin .. +PF09052 Salmonella invasion protein A
Salmonella invasion protein A is an actin-binding protein that contributes to host cytoskeletal rearrangements by stimulating actin polymerisation and counteracting F-actin destabilising proteins. Members of this family possess an all-helical fold consisting of eight alpha-helices arranged so that six long, amphipathic helices form a compact fold that surrounds a final, predominantly hydrophobic helix in the middle of the molecule .. +PF09053 CagZ
CagZ is a 23 kDa protein consisting of a single compact L-shaped domain, composed of seven alpha-helices that run antiparallel to each other. 70% of the residues are in alpha-helix conformation and no beta-sheet is present. CagZ is essential for the translocation of the pathogenic protein CagA into host cells .. +PF09055 Nickel-containing superoxide dismutase
Nickel containing superoxide dismutase (NiSOD) is a metalloenzyme containing a hexameric assembly of right-handed 4-helix bundles of up-down-up-down topology with an N-terminal His-Cys-X-X-Pro-Cys-Gly-X-Tyr motif that chelates the active site Ni ions. NiSOD catalyses the disproportionation of superoxide to peroxide and molecular oxygen through alternate oxidation and reduction of Ni, protecting cells from the toxic products of aerobic metabolism .. +PF09056 Prokaryotic phospholipase A2
The prokaryotic phospholipase A2 domain is predominantly found in bacterial and fungal phospholipases, as well as various hypothetical and putative proteins. It enables the liberation of fatty acids and lysophospholipid by hydrolysing the 2-ester bond of 1,2-diacyl-3-sn-phosphoglycerides. The domain adopts an alpha-helical secondary structure, consisting of five alpha-helices and two helical segments .. +PF09057 Second Mitochondria-derived Activator of Caspases
Second Mitochondria-derived Activator of Caspases promotes apoptosis by activating caspases in the cytochrome c/Apaf-1/caspase-9 pathway, and by opposing the inhibitory activity of inhibitor of apoptosis proteins (XIAP-BIR3). The protein assumes an elongated three-helix bundle structure, and forms a dimer in solution .. +PF09058 L27_1
The L27 domain is a protein interaction module that exists in a large family of scaffold proteins, functioning as an organisation centre of large protein assemblies required for the establishment and maintenance of cell polarity. L27 domains form specific heterotetrameric complexes, in which each domain contains three alpha-helices .. +PF09059 TyeA
Members of this family are composed of two pairs of parallel alpha-helices, and interact with the bacterial protein YopN via hydrophobic residues located on the helices. Association of TyeA with the C terminus of YopN is accompanied by conformational changes in both polypeptides that create order out of disorder: the resulting structure then serves as an impediment to type III secretion of YopN .. +PF09060 L27_N
The L27_N domain plays a role in the biogenesis of tight junctions and in the establishment of cell polarity in epithelial cells. Each L27_N domain consists of three alpha-helices, the first two of which form an antiparallel coiled-coil. Two L27 domains come together to form a four-helical bundle with the antiparallel coiled-coils formed by the first two helices. The third helix of each domain forms another coiled-coil packing at one end of the four-helix bundle, creating a large hydrophobic interface: the hydrophobic interactions are the major force that drives heterodimer formation .. +PF09062 PI-PfuI Endonuclease subdomain
The endonuclease subdomain, found in the prokaryotic protein ribonucleotide reductase, assumes an alpha-beta-beta-alpha-beta-beta-alpha-alpha topology. The four stranded beta-sheet forms a saddle-shaped surface and assembles together through an interface made of alpha-helices. The presence of 14 basic residues on the surface of the beta-sheets suggests that this large groove may be involved in DNA binding .. +PF09063 Phage PP7 coat protein
Members of this family form the capsid of P. aeruginosa phage PP7. They adopt a secondary structure consisting of a six stranded beta sheet and an alpha helix .. +PF09064 Thrombomodulin like fifth domain, EGF-like
Members of this family adopt a fold similar to other EGF domains, with a flat major and a twisted minor beta sheet. Disulphide pairing, however, is not of the usual 1-3, 2-4, 5-6 type; rather 1-2, 3-4, 5-6 pairing is found. Its extended major sheet (strands beta-2 and beta-3 and the connecting loop) projects into thrombin's active site groove. This domain is required for interaction of thrombomodulin with thrombin, and subsequent activation of protein-C .. +PF09065 Haemadin
Members of this family adopt a secondary structure consisting of five short beta-strands (beta1-beta5), which are arranged in two antiparallel distorted sheets formed by strands beta1-beta4-beta5 and beta2-beta3 facing each other. This beta-sandwich is stabilised by six enclosed cysteines arranged in a [1-2, 3-5, 4-6] disulphide pairing resulting in a disulphide-rich hydrophobic core that is largely inaccessible to bulk solvent. The close proximity of disulfide bonds [3-5] and [4-6] organises haemadin into four distinct loops. The N-terminal segment of this domain binds to the active site of thrombin, inhibiting it .. +PF09066 Beta2-adaptin appendage, C-terminal sub-domain
Members of this family adopt a structure consisting of a 5 stranded beta-sheet, flanked by one alpha helix on the outer side, and by two alpha helices on the inner side. This domain is required for binding to clathrin, and its subsequent polymerisation. Furthermore, a hydrophobic patch present in the domain also binds to a subset of D-phi-F/W motif-containing proteins that are bound by the alpha-adaptin appendage domain (epsin, AP180, eps15) .. +PF09067 Erythropoietin receptor, ligand binding
Members of this family interact with erythropoietin (EPO), with subsequent initiation of the downstream chain of events associated with binding of EPO to the receptor, including EPO-induced erythroblast proliferation and differentiation through induction of the JAK2/STAT5 signaling cascade. The domain adopts a secondary structure composed of a short amino-terminal helix, followed by two beta-sandwich regions .. +PF09068 efhand_1; EF_hand_2;
+PF09069 efhand_2;
+PF09070 PFU (PLAA family ubiquitin binding)
Pfam-B_5813 (release 20.0). This domain is found N terminal to Pfam:PF08324 and binds to ubiquitin .. +PF09071 Alpha-amylase, C terminal
Members of this family, which are found in the prokaryotic protein glycosyltrehalose trehalohydrolase, assume a gamma-crystallin-type fold with a five-stranded anti-parallel beta-sheet that packs against the C-terminal side of a beta-alpha barrel. This domain is common to family 13 glycosidases and typically contains a five to ten strand beta-sheet, however its precise fold varies .. +PF09072 Translation machinery associated TMA7
TMA7 plays a role in protein translation. Deletions of the TMA7 gene results in altered protein synthesis rates .. +PF09073 BUD22
BUD22 has been shown in yeast to be a nuclear protein involved in bud-site selection. It plays a role in positioning the proximal bud pole signal . More recently it has been shown to be involved in ribosome biogenesis [2,3].. +PF09074 Mer2
Mer2 (Rec107) forms part of a complex that is required for meiotic double strand DNA break formation. Mer2 increases in abundance and is phosphorylated during the prophase phase of cell division . Blocking double strand break formation results in delayed dephosphorylation and dissociation of Mer2 from the chromosome .. +PF09075 Heat-stable enterotoxin B, secretory
Members of this family assume a helical secondary structure, with two alpha helices forming a disulphide crosslinked alpha-helical hairpin. The disulphide bonds are crucial for the toxic activity of the protein, and are required for maintenance of the tertiary structure, and subsequent interaction with the particulate form of guanylate cyclase, increasing cyclic GMP levels within the host intestinal epithelial cells .. +PF09076 Sklp_toxin;
Beta/Gamma crystallin. Sammut SJ, Eberhardt R. Members of this family assume a beta-gamma-crystallin fold [1,2], wherein nine beta-strands are connected by loop, and are separated into two sheets, each sheet forming the Greek key motif. The two Greek key motifs face each other in the global topology. The three-dimensional structure of the molecule is a 'sandwich'-shaped beta-barrel structure: hydrophobic side-chains are packed in the large interface area of the beta-sheets. In Streptomyces killer toxin-like protein domain confers a cytocidal effect to the toxin, causing cell death in both budding and fission yeasts, and morphological changes in yeasts and filamentous fungi . This family also includes chitin-biding antifungal proteins [2-3].. +PF09077 Mu B transposition protein, C terminal
The C terminal domain of the B transposition protein from Bacteriophage Mu comprises four alpha-helices arranged in a loosely packed bundle, where helix alpha1 runs parallel to alpha3, and anti-parallel to helices alpha2 and alpha4. The domain allows for non-specific binding of Mu to double-stranded DNA, allowing for integration into the bacterial genome, and mediates dimerisation of the protein .. +PF09078 CheY binding
Members of this family adopt a secondary structure consisting of an open-face beta/alpha sandwich, with four antiparallel beta-strands and two alpha-helices. They bind to a corresponding domain on CheY, with subsequent phosphorylation of the CheY Asp57 residue, and activation of CheY, which then affects flagellar rotation .. +PF09079 CDC6, C terminal
The C terminal domain of CDC6 assumes a winged helix fold, with a five alpha-helical bundle (alpha15-alpha19) structure, backed on one side by three beta strands (beta6-beta8). It has been shown that this domain acts as a DNA-localisation factor, however its exact function is, as yet, unknown. Putative functions include: (1) mediation of protein-protein interactions and (2) regulation of nucleotide binding and hydrolysis. Mutagenesis studies have shown that this domain is essential for appropriate Cdc6 activity .. +PF09080 K cyclin, C terminal
Members of this family adopt a secondary structure consisting of a five alpha-helix cyclin fold. Interaction with cyclin dependent kinases (CDKs) at a PSTAIRE sequence motif within the catalytic cleft of CDK results in the regulation of CDK activity .. +PF09081 Domain of unknown function (DUF1921)
This domain, which is found in a set of prokaryotic amylases, has no known function .. +PF09082 Domain of unknown function (DUF1922)
Members of this family consist of a beta-sheet region followed by an alpha-helix and an unstructured C-terminus. The beta-sheet region contains a CXCX...XCXC sequence with Cys residues located in two proximal loops and pointing towards each other. This precise function of this set of bacterial proteins is, as yet, unknown .. +PF09083 Domain of unknown function (DUF1923)
Members of this family are found in maltosyltransferases, and adopt a secondary structure consisting of eight antiparallel beta-strands, which form an open-sided 'jelly roll' Greek key beta-barrel. Their exact function is, as yet, unknown .. +PF09084 NMT1/THI5 like
Pfam-B_2797 (release 20.0). This family contains the NMT1 and THI5 proteins. These proteins are proposed to be required for the biosynthesis of the pyrimidine moiety of thiamine . They are regulated by thiamine .. +PF09085 Adhesion molecule, immunoglobulin-like
Members of this family are found in a set of mucosal cellular adhesion proteins and adopt an immunoglobulin-like beta-sandwich structure, with seven strands arranged in two beta-sheets in a Greek-key topology. They are essential for recruitment of lymphocytes to specific tissues .. +PF09086 Domain of unknown function (DUF1924)
This domain is found in a set of bacterial proteins, including Cytochrome c-type protein. It is functionally uncharacterised.. +PF09087 Cyclomaltodextrinase, N-terminal
Members of this family assume a beta-sandwich structure composed of the eight antiparallel beta-strands. A ten residue linker is also present at the C-terminal end, which connects the N terminal domain to a distal domain in the protein. This domain participates in oligomerisation of the protein, wherein the N-terminal domain of one subunit contacts the active centre of the other subunit, and is also required for binding of cyclodextrin to substrate .. +PF09088 MIF4G like
Members of this family are involved in mediating U snRNA export from the nucleus. They adopt a highly helical structure, wherein the polypeptide chain forms a right-handed solenoid. At the tertiary level, the domain is composed of a superhelical arrangement of successive antiparallel pairs of helices .. +PF09089 Phage short tail fibre protein gp12, middle domain
Members of this family adopt a right-handed triple-stranded beta-helix fold, and are found in the middle of the phage short tail fibre protein gp12 .. +PF09090 MIF4G like
Members of this family are involved in mediating U snRNA export from the nucleus. They adopt a highly helical structure, wherein the polypeptide chain forms a right-handed solenoid. At the tertiary level, the domain is composed of a superhelical arrangement of successive antiparallel pairs of helices .. +PF09092 Lyase, N terminal
Members of this family are predominantly found in chondroitin ABC lyase I, and adopt a jelly-roll fold topology consisting of a two-layered bent beta-sheet sandwich with one short alpha-helix. The convex beta sheet is composed of five antiparallel strands, whilst the concave beta-sheet contains five antiparallel beta-strands with a loop between two consecutive strands folding back onto the concave surface. This domain is required for binding of the protein to long glycosaminoglycan chains .. +PF09093 Lyase, catalytic
Members of this family are predominantly found in chondroitin ABC lyase I, and adopt a helical structure, with fifteen alpha-helices which are at least two turns long and several short helical turns. The bulk of the domain is formed by ten alpha-helices forming five hairpin-like pairs and arranged into an incomplete toroid, the (alpha/alpha)5 fold. Additionally, two long and two short alpha-helices at the N terminus of the domain wrap around the toroid. At the C-terminal end of the toroid there is one additional short alpha-helix. This domain is required for degradation of polysaccharides containing 1,4-beta-D-hexosaminyl and 1,3-beta-D-glucoronosyl or 1,3-alpha-L-iduronosyl linkages to disaccharides containing 4-deoxy-beta-D-gluc-4-enuronosyl groups .. +PF09094 Domain of unknown function (DUF1925)
Members of this family, which are found in a set of prokaryotic transferases, adopt an immunoglobulin/albumin-binding domain-like fold, with a bundle of three alpha-helices. Their function is, as yet, unknown .. +PF09095 Domain of unknown function (DUF1926)
Members of this family, which are found in a set of prokaryotic transferases, adopt a beta-sandwich fold, in which two layers of anti-parallel beta-sheets are arranged in a nearly parallel fashion. The exact function of this family is, as yet, unknown, however it has been proposed that they may play a role in transglycosylation reactions .. +PF09096 Baseplate structural protein, domain 2
Members of this family adopt a beta barrel structure with a Greek key topology, which is topologically similar to the FMN-binding split barrel. They are structural component of the viral baseplate, predominantly found in the structural protein gp27 .. +PF09097 Baseplate structural protein, domain 1
Members of this family adopt a beta barrel structure with a Greek key topology, which is topologically similar to the FMN-binding split barrel. They are structural component of the viral baseplate, predominantly found in the structural protein gp27 .. +PF09098 Quinohemoprotein amine dehydrogenase A, alpha subunit, haem binding
Sammut SJ, Eberhardt R. Members of this family are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase. They have a predominantly alpha-helical structure and can be divided into two subdomains, each binding a haem C group via a conserved CXXCH motif [1,2].. +PF09099 DUF1927;
Quinohemoprotein amine dehydrogenase, alpha subunit domain III. Sammut SJ, Eberhardt R. Members of this family, which are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase, adopt an immunoglobulin-like beta-sandwich fold, with seven strands arranged into two beta sheets; the fold is possibly related to the immunoglobulin and/or fibronectin type III superfamilies. The precise function of this domain has not, as yet, been defined [1,2].. +PF09100 DUF1928;
Quinohemoprotein amine dehydrogenase, alpha subunit domain IV. Sammut SJ, Eberhardt R. Members of this family, which are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase, adopt an immunoglobulin-like beta-sandwich fold, with seven strands arranged into two beta sheets; the fold is possibly related to the immunoglobulin and/or fibronectin type III superfamilies. The precise function of this domain has not, as yet, been defined [1,2].. +PF09101 Exotoxin A binding
Members of this family are found in Pseudomonas aeruginosa exotoxin A, and are responsible for binding of the toxin to the alpha-2-macroglobulin receptor, with subsequent internalisation into endosomes. The domain adopts a thirteen-strand antiparallel beta jelly roll topology, which belongs to the Concanavalin A-like lectins/glucanases fold superfamily .. +PF09102 Exotoxin A, targeting
Members of this family are found in Pseudomonas aeruginosa exotoxin A, and are responsible for transmembrane targeting of the toxin, as well as transmembrane translocation of the catalytic domain into the cytoplasmic compartment. A furin cleavage site is present within the domain: cleavage generates a 37 kDa carboxy-terminal fragment, which includes the enzymatic domain, which is then is translocated into the cytoplasm. The domain adopts a helical structure, with six alpha-helices forming a bundle .. +PF09103 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1
Members of this family assume an OB fold, which consists of a highly curved five-stranded beta-sheet that closes on itself to form a beta-barrel. OB1 has a shallow groove formed by one face of the curved sheet and is demarcated by two loops, one between beta 1 and beta 2 and another between beta 4 and beta 5, which allows for weak single strand DNA binding. The domain also binds the 70-amino acid DSS1 (deleted in split-hand/split foot syndrome) protein, which was originally identified as one of three genes that map to a 1.5-Mb locus deleted in an inherited developmental malformation syndrome .. +PF09104 BRCA2, oligonucleotide/oligosaccharide-binding, domain 3
Members of this family assume an OB fold, which consists of a highly curved five-stranded beta-sheet that closes on itself to form a beta-barrel. OB3 has a pronounced groove formed by one face of the curved sheet and is demarcated by two loops, one between beta 1 and beta 2 and another between beta 4 and beta 5, which allows for strong ssDNA binding .. +PF09105 Elongation factor SelB, winged helix
Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding .. +PF09106 Elongation factor SelB, winged helix
Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding .. +PF09107 Elongation factor SelB, winged helix
Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding .. +PF09108 Switch protein XOL-1, N-terminal
Members of this family, which are required for the formation of the active site of the sex-determining protein Xol-1, adopt a secondary structure consisting of five alpha helices and six antiparallel beta sheets, in a beta-alpha-beta-beta-beta-alpha-beta-alpha-alpha-alpha-beta arrangement. The fold of this family is similar to that found in ribosomal protein S5 domain 2-like .. +PF09109 Switch protein XOL-1, GHMP-like
Members of this family, which are required for the formation of the active site of the sex-determining protein Xol-1, adopt a secondary structure consisting of five alpha helices and seven antiparallel beta sheets, in a beta-alpha-beta-alpha-alpha-alpha-beta-beta-alpha-beta-beta-beta arrangement. The fold of this family is structurally similar to that found in the C-terminal domain of GHMP Kinase .. +PF09110 HAND
The HAND domain adopts a secondary structure consisting of four alpha helices, three of which (H2, H3, H4) form an L-like configuration. Helix H2 runs antiparallel to helices H3 and H4, packing closely against helix H4, whilst helix H1 reposes in the concave surface formed by these three helices and runs perpendicular to them. The domain confers DNA and nucleosome binding properties to the protein .. +PF09111 SLIDE
The SLIDE domain adopts a secondary structure comprising a main core of three alpha-helices. It has a role in DNA binding, contacting DNA target sites similar to c-Myb (Pfam:PF00249) repeats or homeodomains .. +PF09112 PngaseF_N;
Peptide-N-glycosidase F, N terminal. Members of this family adopt an eight-stranded antiparallel beta jelly roll configuration, with the beta strands arranged into two sheets. They are similar in topology to many viral capsid proteins, as well as lectins and several glucanases. The domain allows the protein to bind sugars and catalyses the complete removal of N-linked oligosaccharide chains from glycoproteins .. +PF09113 PngaseF_C;
Peptide-N-glycosidase F, C terminal. Members of this family adopt an eight-stranded antiparallel beta jelly roll configuration, with the beta strands arranged into two sheets. They are similar in topology to many viral capsid proteins, as well as lectins and several glucanases. The domain allows the protein to bind sugars and catalyses the complete removal of N-linked oligosaccharide chains from glycoproteins .. +PF09114 Transcription factor MotA, activation domain
Members of this family of viral protein domains are implicated in transcriptional activation. They are almost completely alpha-helical, with five alpha-helices and a short, two-stranded, beta-ribbon. Four alpha helices (alpha1, alpha3, alpha4 and alpha5) are amphipathic and pack their hydrophobic surfaces around the central helix alpha2 .. +PF09115 DNA polymerase III, delta subunit, C terminal
Members of this family, which are predominantly found in prokaryotic DNA polymerase III, assume an alpha helical structure, with a core of five alpha helices, and an additional small helix. They are essential for the formation of the polymerase clamp loader .. +PF09116 gp45 sliding clamp, C terminal
Members of this family are essential for the interaction of the gp45 sliding clamp with the corresponding polymerase. They adopt a DNA clamp fold, consisting of two alpha helices and two beta sheets - the fold is duplicated and has internal pseudo two-fold symmetry .. +PF09117 MiAMP1
MiAMP1 is a highly basic protein from the nut kernel of Macadamia integrifolia which inhibits the growth of several microbial plant pathogens in vitro while having no effect on mammalian or plant cells. It consists of eight beta-strands which are arranged in two Greek key motifs. These Greek key motifs then associate to form a Greek key beta-barrel .. +PF09118 Domain of unknown function (DUF1929)
Members of this family adopt a secondary structure consisting of a bundle of seven, mostly antiparallel, beta-strands surrounding a hydrophobic core. The 7 strands are arranged in 2 sheets, in a Greek-key topology. Their precise function, has not, as yet, been defined, though they are mostly found in sugar-utilising enzymes, such as galactose oxidase .. +PF09119 SicP binding
Members of this family bind the chaperone SicP, which is required both to maintain the stability of SptP, as well as to ensure the eventual secretion of the protein. The domain is found in the Salmonella effector protein SptP, which interacts with SicP chaperone dimers mainly through four regions of its chaperone-binding domain. The structure of the SptP-SicP complex contains four molecules of SicP, aligned in a linear fashion and arranged in two sets of tightly bound homodimers that bind two SptP molecules. The SicP homodimers do not interact with each other, but are held together by a molecular interface formed between two SptP molecules. Each SptP molecule is wrapped around by three SicP chaperones (two chaperones from one homodimer and a third one from the opposite homodimer pair) .. +PF09121 Tower
Members of this family adopt a secondary structure consisting of a pair of long, antiparallel alpha-helices (the stem) that support a three-helix bundle (3HB) at their end. The 3HB contains a helix-turn-helix motif and is similar to the DNA binding domains of the bacterial site-specific recombinases, and of eukaryotic Myb and homeodomain transcription factors. The Tower domain has an important role in the tumour suppressor function of BRCA2, and is essential for appropriate binding of BRCA2 to DNA .. +PF09122 Domain of unknown function (DUF1930)
Members of this family are found in 3-mercaptopyruvate sulfurtransferase, and have no known function. They adopt a structure consisting of a four-stranded antiparallel beta-sheet and an alpha-helix, arranged in a beta(2)-alpha-beta(2) fashion, and bearing a remarkable structural similarity to the FK506-binding protein class of peptidylprolyl cis/trans-isomerase .. +PF09123 Domain of unknown function (DUF1931)
Members of this family, which are found in a set of hypothetical bacterial proteins, contain a core of six alpha-helices, where one central helix is surrounded by the other five. The exact function of this family has not, as yet, been determined .. +PF09124 T4 recombination endonuclease VII, dimerisation
Members of this family, which are predominantly found in Bacteriophage T4 recombination endonuclease VII, adopt a helical secondary structure, with three alpha helices oriented parallel to each other. They mediate dimerisation of the protein, as well as binding to the DNA major groove .. +PF09125 Cytochrome C oxidase subunit II, transmembrane
Members of this family adopt a tertiary structure consisting of two antiparallel transmembrane helices, in a transmembrane helix hairpin fold .. +PF09126 Restriction endonuclease NaeI
Members of this family adopt a secondary structure consisting of nine alpha-helices, six 3-10 helices and 13 beta-strands. They bind two GCC-CGG recognition sequences to cleave DNA into blunt-ended products .. +PF09127 Leukotriene A4 hydrolase, C-terminal
Members of this family adopt a structure consisting of two layers of parallel alpha-helices, five in the inner layer and four in the outer, arranged in an antiparallel manner, with perpendicular loops containing short helical segments on top. They are required for the formation of a deep cleft harbouring the catalytic Zn2+ site in Leukotriene A4 hydrolase .. +PF09128 Regulator of G protein signalling-like domain
Members of this family adopt a structure consisting of twelve helices that fold into a compact domain that contains the overall structural scaffold observed in other RGS proteins and three additional helical elements that pack closely to it. Helices 1-9 comprise the RGS (Pfam:PF00615) fold, in which helices 4-7 form a classic antiparallel bundle adjacent to the other helices. Like other RGS structures, helices 7 and 8 span the length of the folded domain and form essentially one continuous helix with a kink in the middle. Helices 10-12 form an apparently stable C-terminal extension of the structural domain, and although other RGS proteins lack this structure, these elements are intimately associated with the rest of the structural framework by hydrophobic interactions. Members of the family bind to active G-alpha proteins, promoting GTP hydrolysis by the alpha subunit of heterotrimeric G proteins, thereby inactivating the G protein and rapidly switching off G protein-coupled receptor signalling pathways .. +PF09129 Cholesterol oxidase, substrate-binding
The substrate-binding domain found in Cholesterol oxidase is composed of an eight-stranded mixed beta-pleated sheet and six alpha-helices. This domain is positioned over the isoalloxazine ring system of the FAD cofactor bound by FAD_binding_4 (PF:PF01565) and forms the roof of the active site cavity, allowing for catalysis of oxidation and isomerisation of cholesterol to cholest-4-en-3-one .. +PF09130 Domain of unknown function (DUF1932)
This domain is found in a set of hypothetical prokaryotic proteins. Its exact function has not, as yet, been described.. +PF09131 Bacillus thuringiensis delta-Endotoxin, middle domain
Members of this family adopt a structure consisting of three four-stranded beta-sheets, each with a Greek key fold, with internal pseudo threefold symmetry. Thus they act as a receptor binding beta-prism, binding to insect-specific receptors of gut epithelial cells .. +PF09132 BmKX
Members of this family assume a structure adopted by most short-chain scorpion toxins, consisting of a cysteine-stabilised alpha/beta scaffold consisting of a short 3-10-helix and a two-stranded antiparallel beta-sheet. They are predominantly found in short-chain scorpion toxins, and their biological method of action has not, as yet, been defined .. +PF09133 SANTA (SANT Associated)
The SANTA domain (SANT Associated domain) is approximately 90 amino acids in length and is conserved in Eukaryota. It is sometimes found in association with the SANT domain (Pfam:PF00249, also known as Myb-like DNA-binding domain) implying a putative function in regulating chromatin remodelling . Sequence analysis has showed that the SANTA domain is likely to form four central beta-sheets with three flanking alpha- helixes . Many conserved hydrophobic residues are present which implying a possible role in protein-protein interactions .. +PF09134 Invasin, domain 3
Members of this family adopt a structure consisting of an immunoglobulin-like beta-sandwich, with seven strands in two beta-sheets, arranged in a Greek-key topology. It forms part of the extracellular region of the protein, which can be expressed as a soluble protein (Inv497) that binds integrins and promotes subsequent uptake by cells when attached to bacteria .. +PF09135 Alb1
Alb1 is a nuclear shuttling factor involved in ribosome biogenesis .. +PF09136 Glucodextranase, domain B
Members of this family adopt a structure consisting of seven/eight-strand antiparallel beta-sheets, in a Greek-key topology, similar to the immunoglobulin beta-sandwich fold. They act as cell wall anchors, where they interact with the S-layer present in the cell wall of Gram-positive bacteria by hydrophobic interactions. In glucodextranase, Domain B is buried in the S-layer, and a flexible linker located between domain B and the catalytic unit confers motion to the catalytic unit, which is capable of efficient hydrolysis of the substrates located close to the cell surface .. +PF09137 Glucodextranase, domain N
Members of this family, which are uniquely found in bacterial and archaeal glucoamylases and glucodextranases, adopt a structure consisting of 17 antiparallel beta-strands. These beta-strands are divided into two beta-sheets, and one of the beta-sheets is wrapped by an extended polypeptide, which appears to stabilise the domain. Members of this family are mainly concerned with catalytic activity, hydrolysing alpha-1,6-glucosidic linkages of dextran to release beta-D-glucose from the non-reducing end via an inverting reaction mechanism .. +PF09138 Urm1 (Ubiquitin related modifier)
Pfam-B_16507 (release 20.0). Urm1 is a ubiquitin related protein that modifies proteins in the yeast ubiquitin-like pathway urmylation . Structural comparisons and phylogenetic analysis of the ubiquitin superfamily has indicated that Urm1 has the most conserved structural and sequence features of the common ancestor of the entire superfamily .. +PF09139 Mitochondrial matrix Mmp37
Pfam-B_15301 (release 20.0). MMp37 is a mitochondrial matrix protein that functions in the translocation of proteins across the mitochondrial inner membrane . It has been shown that MMP37 proteins possess the NTase fold but they have only one active site carboxylate and thus probably are not able to carry out enzymatic reaction. These potentially non-active members of NTase fold superfamily may bind ATP, hydrolysis of which is necessary for the translocation of proteins through the membrane .. +PF09140 ATPase MipZ
Mistry J, Thanbichler M. Pfam-B_23525 (release 20.0). MipZ is an ATPase that forms a complex with the chromosome partitioning protein ParB near the chromosomal origin of replication . It is responsible for the temporal and spatial regulation of FtsZ ring formation .. +PF09141 Talin, middle domain
Members of this family adopt a structure consisting of five alpha helices that fold into a bundle. They contain a Vinculin binding site (VBS) composed of a hydrophobic surface spanning five turns of helix four. Activation of the VBS causes subsequent recruitment of Vinculin, which enables maturation of small integrin/talin complexes into more stable adhesions. Formation of the complex between VBS and Vinculin requires prior unfolding of this middle domain: once released from the talin hydrophobic core, the VBS helix is then available to induce the 'bundle conversion' conformational change within the vinculin head domain thereby displacing the intramolecular interaction with the vinculin tail, allowing vinculin to bind actin .. +PF09142 tRNA Pseudouridine synthase II, C terminal
The C terminal domain of tRNA Pseudouridine synthase II adopts a PUA (Pfam:PF01472) fold, with a four-stranded mixed beta-sheet flanked by one alpha-helix on each side. It allows for binding of the enzyme to RNA, as well as stabilisation of the RNA molecule .. +PF09143 AvrPphF-ORF-2
Members of this family of plant pathogenic proteins adopt an elongated structure somewhat reminiscent of a mushroom that can be divided into 'stalk' and 'head' subdomains. The stalk subdomain is composed of the N-terminal helix (alpha1) and beta strands beta3-beta4. An antiparallel beta sheet (beta5, beta7-beta8) forms the base of the head subdomain that interacts with the stalk. A pair of twisted antiparallel beta sheets (beta1 and beta6; beta2 and beta9/9') supported by alpha2 form the dome of the head. The head subdomain possesses weak structural similarity with the catalytic portion of a number of ADP-ribosyltransferase toxins .. +PF09144 Yersinia pseudotuberculosis mitogen
Members of this family of Yersinia pseudotuberculosis mitogens adopt a sandwich structure consisting of nine strands in two beta sheets, in a jelly-roll topology. As with other superantigens, they are able to excessively activate T cells by binding to the T cell receptor .. +PF09145 Ubiquitin-associated
Ubiquitin associated domains contain approximately 40 residues and bind ubiquitin noncovalently. They adopt a secondary structure consisting of three alpha-helices, and have been identified in various modular proteins involved in protein trafficking, clathrin assembly/disassembly, DNA repair, proteasomal degradation, and cell cycle regulation .. +PF09147 Domain of unknown function (DUF1933)
Members of this family are predominantly found in carbapenam synthetase, and are composed of two antiparallel six-stranded beta-sheets that form a sandwich, flanked on each side by two alpha-helices. Their exact function has not, as yet, been determined .. +PF09148 Domain of unknown function (DUF1934)
Members of this family are found in a set of hypothetical bacterial proteins. Their precise function has not, as yet, been defined.. +PF09149 Domain of unknown function (DUF1935)
Members of this family are found in various bacterial and eukaryotic hypothetical proteins, as well as in the cysteine protease calpain. Their exact function has not, as yet, been defined.. +PF09150 Orange carotenoid protein, N-terminal
Members of this family adopt an alpha-helical structure consisting of two four-helix bundles. They are predominantly found in prokaryotic orange carotenoid protein, and carotenoid binding proteins .. +PF09151 Domain of unknown function (DUF1936)
This domain is found in a set of hypothetical Archaeal proteins. Its exact function has not, as yet, been defined. It possesses a zinc ribbon fold.. +PF09152 Domain of unknown function (DUF1937)
This domain is found in a set of hypothetical bacterial proteins. Their exact function has not, as yet, been described.. +PF09153 Domain of unknown function (DUF1938)
Members of this family, which are predominantly found in the archaeal protein O6-alkylguanine-DNA alkyltransferase, adopt a secondary structure consisting of a three stranded antiparallel beta-sheet and three alpha helices. Their exact function has not, as yet, been defined, though it has been postulated that they confer thermostability to the archaeal protein .. +PF09154 Domain of unknown function (DUF1939)
Members of this family, which are predominantly found in Archaeal amylase, adopt a secondary structure consisting of an eight-stranded antiparallel beta-sheet containing a Greek key motif. Their exact function has not, as yet, been determined .. +PF09155 Domain of unknown function (DUF1940)
Members of this family adopt a secondary structure consisting of six alpha helices, with four long helices (alpha1, alpha2, alpha5, alpha6) form a left-handed, antiparallel alpha helical bundle. The function of this family of Archaeal hypothetical proteins has not, as yet, been defined .. +PF09156 Anthrax toxin lethal factor, middle domain
Members of this family, which are predominantly found in anthrax toxin lethal factor, adopt a structure consisting of a core of antiparallel beta sheets and alpha helices. They form a long deep groove within the protein that anchors the 16-residue N-terminal tail of MAPKK-2 before cleavage. It has been noted that this domain resembles the ADP-ribosylating toxin from Bacillus cereus, but the active site has been modified to augment substrate recognition .. +PF09157 Pseudouridine synthase II TruB, C-terminal
Members of this family adopt a secondary structure consisting of a four-stranded beta sheet and one alpha helix. They are predominantly RNA-binding domains, mostly found in Pseudouridine synthase II TruB .. +PF09158 Bacteriophage T4 MotA, C-terminal
Members of this family adopt a compact alpha/beta structure comprising three alpha-helices and six beta-strands in the order: alpha1-beta1-beta2-beta3-beta4-alpha2-beta5-beta6-alpha3. The beta-strands form a single anti-parallel beta-sheet and the three alpha-helices pack side-by-side onto one surface of the beta-sheet. In this architecture, the domain's hydrophobic core is at the sheet-helix interface, and the second surface of the beta-sheet is completely exposed. The domain is a DNA-binding motif, with a consensus sequence containing nine base pairs (5'-TTTGCTTTA-3'), that appears to bind to various mot boxes, allowing access to the minor groove towards the 5'-end of this sequence and the major groove towards the 3'-end .. +PF09159 Mitochondrial resolvase Ydc2 / RNA splicing MRS1
Members of this family adopt a secondary structure consisting of two beta sheets and one alpha helix, arranged as a beta-alpha-beta motif. Each beta sheet has five strands, arranged in a 32145 order, with the second strand being antiparallel to the rest. Mitochondrial resolvase Ydc2 is capable of resolving Holliday junctions and cleaves DNA after 5'-CT-3' and 5'-TT-3' sequences . This family also contains the mitochondrial RNA-splicing protein MRS1 which is involved in the excision of group I introns [2-3].. +PF09160 FimH, mannose binding
Members of this family adopt a secondary structure consisting of a beta sandwich, with nine strands arranged in two sheets in a Greek key topology. They are predominantly found in bacterial mannose-specific adhesins, since they are capable of binding to D-mannose .. +PF09162 Tap, RNA-binding
Members of this family adopt a structure consisting of an alpha+beta sandwich with an antiparallel beta-sheet, arranged in a 2(beta-alpha-beta) motif. They are mainly found in mRNA export factors, and mediate the sequence nonspecific nuclear export of cellular mRNAs as well as the sequence-specific export of retroviral mRNAs bearing the constitutive transport element .. +PF09163 Formate dehydrogenase N, transmembrane
Members of this family are predominantly found in the beta subunit of formate dehydrogenase, and consist of a single transmembrane helix. They act as a transmembrane anchor, and allow for conduction of electrons within the protein .. +PF09164 Vitamin D binding protein, domain III
Members of this family are predominantly found in Vitamin D binding protein, and adopt a multihelical structure. They are required for formation of an actin 'clamp', allowing the protein to bind to actin .. +PF09165 Ubiquinol-cytochrome c reductase 8 kDa, N-terminal
Members of this family adopt a structure consisting of many antiparallel beta sheets, with few alpha helices, in a non-globular arrangement. They are required for proper functioning of the respiratory chain .. +PF09166 Biliverdin reductase, catalytic
Members of this family adopt a structure consisting of four alpha helices and six beta sheets, in an alpha-beta-alpha-alpha-alpha-beta-beta-beta-beta-beta arrangement. They contain a catalytic active site, capable of reducing the gamma-methene bridge of the open tetrapyrrole, biliverdin IX alpha, to bilirubin with the concomitant oxidation of a NADH or NADPH cofactor .. +PF09167 Domain of unknown function (DUF1942)
Members of this family of bacterial proteins assume a beta-sandwich structure consisting of two antiparallel beta-sheets similar to an immunoglobulin-like fold, with an additional small, antiparallel beta-sheet. The longer-stranded beta-sheet is made up of four antiparallel beta-strands. The shorter-stranded beta-sheet consists of five beta-strands, four of these beta-strands form an antiparallel beta-sheet. The exact function of this family of proteins is unkown, though a putative role includes involvement in host-bacterial interactions involved in endocytosis or phagocytosis, possibly during bacterial internalisation .. +PF09168 X-Prolyl dipeptidyl aminopeptidase PepX, N-terminal
Members of this family adopt a secondary structure consisting of a helical bundle of eight alpha helices and three beta strands, the last alpha helix connecting to the first strand of the catalytic domain. The first strand of the N-terminus also forms a small parallel beta sheet with strand 5' of catalytic domain. The domain mediates dimerisation of the protein, with two proline residues present in the domain being critical for interaction .. +PF09169 BRCA2, helical
Members of this family adopt a helical structure, consisting of a four-helix cluster core (alpha 1, alpha 8, alpha 9, alpha 10) and two successive beta-hairpins (beta 1 to beta 4). An approx. 50-amino acid segment that contains four short helices (alpha 2 to alpha 4), meanders around the surface of the core structure. In BRCA2, the alpha 9 and alpha 10 helices pack with BRCA-2_OB1 (Pfam:PF09103) through van der Waals contacts involving hydrophobic and aromatic residues, and also through side-chain and backbone hydrogen bonds. The domain binds the 70-amino acid DSS1 (deleted in split-hand/split foot syndrome) protein, which was originally identified as one of three genes that map to a 1.5-Mb locus deleted in an inherited developmental malformation syndrome .. +PF09170 DUF1879;
CST, Suppressor of cdc thirteen homolog, complex subunit STN1. STN1 is a component of the CST complex, a complex that binds to single-stranded DNA and is required for protecting telomeres from DNA degradation. The CST complex binds single-stranded DNA with high affinity in a sequence-independent manner, while isolated subunits bind DNA with low affinity on their own. In addition to telomere protection, the CST complex probably has a more general role in DNA metabolism at non-telomeric sites.. +PF09171 Domain of unknown function (DUF1886)
This domain is predominantly found in the Archaeal protein N-glycosylase/DNA lyase.. +PF09172 Domain of unknown function (DUF1943)
Members of this family adopt a structure consisting of several large open beta-sheets. Their exact function has not, as yet, been determined .. +PF09173 Initiation factor eIF2 gamma, C terminal
Members of this family, which are found in the initiation factors eIF2 and EF-Tu, adopt a structure consisting of a beta barrel with Greek key topology. They are required for formation of the ternary complex with GTP and initiator tRNA .. +PF09174 Maf1 regulator
Maf1 is a negative regulator of RNA polymerase III . It targets the initiation factor TFIIIB .. +PF09175 Domain of unknown function (DUF1944)
Members of this family adopt a structure consisting of several large open beta-sheets. Their exact function has not, as yet, been determined .. +PF09176 Methylene-tetrahydromethanopterin dehydrogenase, N-terminal
Members of this family adopt a alpha-beta structure, with a core comprising three alpha/beta/alpha layers, in which each sheet contains four strands. They are predominantly found in prokaryotic methylene-tetrahydromethanopterin dehydrogenase, which catalyses the dehydrogenation of methylene-tetrahydromethanopterin and the reversible dehydrogenation of methylene-H(4)F .. +PF09177 Syntaxin 6, N-terminal
Members of this family, which are found in the amino terminus of various SNARE proteins, adopt a structure consisting of an antiparallel three-helix bundle. Their exact function has not been determined, though it is known that they regulate the SNARE motif, as well as mediate various protein-protein interactions involved in membrane-transport .. +PF09178 Domain of unknown function (DUF1945)
Members of this family, which are predominantly found in prokaryotic 4-alpha-glucanotransferase, adopt a structure composed of six antiparallel beta-strands, four of which form a beta-sheet and another two form a type I' beta-hairpin. The role of this family of domains, has not, as yet, been defined .. +PF09179 DUF1946;
TilS substrate binding domain. This domain is found in the tRNA(Ile) lysidine synthetase (TilS) protein.. +PF09180 Prolyl-tRNA synthetase, C-terminal
Members of this family are predominantly found in prokaryotic prolyl-tRNA synthetase. They contain a zinc binding site, and adopt a structure consisting of alpha helices and antiparallel beta sheets arranged in 2 layers, in a beta-alpha-beta-alpha-beta motif .. +PF09181 Prolyl-tRNA synthetase, C-terminal
Members of this family are predominantly found in prokaryotic prolyl-tRNA synthetase. They contain a zinc binding site, and adopt a structure consisting of alpha helices and antiparallel beta sheets arranged in 2 layers, in a beta-alpha-beta-alpha-beta motif .. +PF09182 Bacterial purine repressor, N-terminal
+PF09183 Domain of unknown function (DUF1947)
Members of this family are found in a set of hypothetical Archaeal proteins. Their exact function has not, as yet, been defined.. +PF09184 PPP4R2
PPP4R2 (protein phosphatase 4 core regulatory subunit R2) is the regulatory subunit of the histone H2A phosphatase complex. It has been shown to confer resistance to the anticancer drug cisplatin in yeast , and may confer resistance in higher eukaryotes.. +PF09185 Domain of unknown function (DUF1948)
Members of this family of Mycoplasma hypothetical proteins adopt a helical structure, with one central alpha-helix surrounded by five others, in a NusB-like fold. Their function has not, as yet, been determined .. +PF09186 Domain of unknown function (DUF1949)
Members of this family pertain to a set of functionally uncharacterised hypothetical bacterial proteins. They adopt a ferredoxin-like fold, with a beta-alpha-beta-beta-alpha-beta arrangement .. +PF09187 Domain of unknown function(DUF1950)
Members of this family pertain to a set of functionally uncharacterised hypothetical eukaryotic proteins .. +PF09188 Domain of unknown function (DUF1951)
Members of this family of Mycoplasma hypothetical proteins adopt a helical structure, with a buried central helix. Their function has not, as yet, been determined.. +PF09189 Domain of unknown function (DUF1952)
Members of this family are found in various Thermus thermophilus proteins. Their exact function has not, as yet, been determined.. +PF09190 DALR domain
This DALR domain is found in cysteinyl-tRNA-synthetases .. +PF09191 CD4, extracellular
Members of this family adopt an immunoglobulin-like beta-sandwich, with seven strands in 2 beta sheets, in a Greek key topology. They are predominantly found in the extracellular portion of CD4 proteins, where they enable interaction with major histocompatibility complex class II antigens .. +PF09192 Actin-fragmin kinase, catalytic
Members of this family assume a secondary structure consisting of eight beta strands and 11 alpha-helices, organised in two lobes. They are predominantly found in actin-fragmin kinase, where they act as a catalytic domain that mediates the phosphorylation of actin .. +PF09193 Cholecystokinin A receptor, N-terminal
Members of this family are found in the extracellular region of the cholecystokinin A receptor, where they adopt a tertiary structure consisting of a few helical turns and a disulphide-crosslinked loop. They are required for interaction of the cholecystokinin A receptor with it's corresponding hormonal ligand .. +PF09194 Restriction endonuclease BsobI
Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence CYCGRG (where Y = T/C, and R = A/G) and cleave after C-1. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates .. +PF09195 Restriction endonuclease BglII
Members of this family are predominantly found in prokaryotic restriction endonuclease BglII, and adopt a structure consisting of an alpha/beta core containing a six-stranded beta-sheet surrounded by five alpha-helices, two of which are involved in homodimerisation of the endonuclease. They recognise the double-stranded DNA sequence AGATCT and cleave after A-1, resulting in specific double-stranded fragments with terminal 5'-phosphates .. +PF09196 Domain of unknown function (DUF1953)
This domain is found in the Archaeal protein maltooligosyl trehalose synthase produced by Sulfolobus spp. Its function has not, as yet, been defined.. +PF09197 Rap1, DNA-binding
Members of this family, which are predominantly found in the yeast protein rap1, assume a secondary structure consisting of a three-helix bundle and an N-terminal arm. They contain an Arg-Asp-Arg-Lys sequence that interacts with an ACACC region in the 3' region of the DNA-binding site .. +PF09198 Bacteriophage T4 beta-glucosyltransferase
Members of this family are DNA-modifying enzymes encoded by bacteriophage T4 that transfer glucose from uridine diphosphoglucose to 5-hydroxymethyl cytosine bases of phage T4 DNA .. +PF09199 Domain of unknown function (DUF1954)
Members of this family are found in various staphylococcal toxins, and adopt an OB fold, wherein the domain folds into a five-stranded beta-barrel. The exact manner in which they confer pathogenic properties to the protein has not, as yet, been determined .. +PF09200 Monellin
Monellin, a protein produced by the West African plant Dioscoreophyllum cumminsii, is approximately 70,000 times sweeter than sucrose on a molar basis. The protein adopts an alpha-beta structure, with a cystatin-like fold, where each helix packs against a coiled antiparallel beta-sheet .. +PF09201 SRX
Members of this family, which are predominantly found in eukaryotic signal recognition particle receptor alpha, consist of a central six-stranded anti-parallel beta-sheet sandwiched by helix alpha1 on one side and helices alpha2-alpha4 on the other. They interact with the small GTPase SR-beta, forming a complex that matches a class of small G protein-effector complexes, including Rap-Raf, Ras-PI3K(gamma), Ras-RalGDS, and Arl2-PDE(delta) .. +PF09202 Rio2, N-terminal
Members of this family are found in Rio2, and are structurally homologous to the winged helix (wHTH) domain. They adopt a structure consisting of four alpha helices followed by two beta strands and a fifth alpha helix. The domain confers DNA binding properties to the protein, as per other winged helix domains .. +PF09203 MspA
MspA is a membrane porin produced by Mycobacteria, allowing hydrophilic nutrients to enter the bacterium. The protein forms a tightly interconnected octamer with eightfold rotation symmetry that resembles a goblet and contains a central channel. Each subunit fold contains a beta-sandwich of Ig-like topology and a beta-ribbon arm that forms an oligomeric transmembrane barrel .. +PF09204 ColicinD;
Bacterial self-protective colicin-like immunity. Colicin D, which is synthesised by various prokaryotes, adopts an antiparallel four helical bundle fold: the helices are tightly packed, forming a compact cylindrical molecule. The protein specifically cleaves the anticodon loop of all four tRNA-Arg isoacceptors, thereby inactivating prokaryotic protein synthesis and leading to cell death . This family also contains immunity proteins to klebicins and microcins. Many bacteria produce proteins that destroy their competitors. Colicin D is one such. The immunity proteins are expressed on the same operon as their cognate bacteriocins and protect the expressing bacterium from the effects of its own bacteriocin .. +PF09205 Domain of unknown function (DUF1955)
Members of this family are found in hypothetical proteins synthesised by the Archaeal organism Sulfolobus. Their exact function has not, as yet, been determined.. +PF09206 Alpha-L-arabinofuranosidase B, catalytic
Members of this family, which are present in fungal alpha-L-arabinofuranosidase B, adopt a beta-sandwich fold similar to that of Concanavalin A-like lectins/glucanase. The beta-sandwich fold consists of two anti-parallel beta-sheets with seven and and six strands, respectively. In addition, there are four helices outside of the beta-strands. The beta-sandwich strands are closely packed and curved with a jelly roll topology, creating a small catalytic pocket. The domain catalyses the hydrolysis of alpha-1,2-, alpha-1,3- and alpha-1,5-L-arabinofuranosidic bonds in L-arabinose-containing hemicelluloses such as arabinoxylan and L-arabinan .. +PF09207 Yeast killer toxin
Members of this family, which are produced by Williopsis fungi, adopt a secondary structure consisting of eight strands in two beta sheets, in a Greek-key topology .. +PF09208 Restriction endonuclease MspI
Members of this family of prokaryotic restriction endonucleases recognise the palindromic tetranucleotide sequence 5'-CCGG and cleave between the first and second nucleotides, leaving 2 base 5' overhangs. They fold into an alpha/beta architecture, with a five-stranded mixed beta-sheet sandwiched on both sides by alpha-helices .. +PF09209 Domain of unknown function (DUF1956)
Members of this family are found in various prokaryotic transcriptional regulator proteins. Their exact function has not, as yet, been identified.. +PF09210 Domain of unknown function (DUF1957)
This domain is found in a set of hypothetical bacterial proteins. Its exact function has not, as yet, been defined.. +PF09211 Domain of unknown function (DUF1958)
Members of this functionally uncharacterised family are found in prokaryotic penicillin-binding protein 4.. +PF09212 Carbohydrate binding module 27
Members of this family are carbohydrate binding modules that bind to beta-1, 4-mannooligosaccharides, carob galactomannan, and konjac glucomannan, but not to cellulose (insoluble and soluble) or soluble birchwood xylan. They adopt a beta sandwich structure comprising 13 beta strands with a single, small alpha-helix and a single metal atom .. +PF09213 M3
Members of this family of viral chemokine binding proteins adopt a structure consisting of two different beta-sandwich domains of partial topological similarity to immunoglobulin-like folds. They bind with the CC-chemokine MCP-1, acting as cytokine decoy receptors .. +PF09214 Bacteriophage Prd1, adsorption protein P2
Members of this family form a set of bacteriophage adsorption proteins, composed mainly of beta-strands whose complicated topology forms an elongated seahorse-shaped molecule with a distinct head, containing a pseudo-beta propeller structure with approximate 6-fold symmetry, and tail. They are required for the attachment of the phage to the host conjugative DNA transfer complex. This is a poorly understood large transmembrane complex of unknown architecture, with at least 11 different proteins .. +PF09215 Bacteriophage T4, Gp8
Members of this family of viral baseplate structural proteins adopt a structure consisting of a three-layer beta-sandwich with two finger-like loops containing an alpha-helix at the opposite sides of the sandwich. The two peripheral, five-stranded, antiparallel beta-sheets are stacked against the middle, four-stranded, antiparallel beta-sheet. Attachment of this family of proteins to the baseplate during assembly creates a binding site for subsequent attachment of Gp6 .. +PF09216 Pfg27
Members of this family are essential for gametocytogenesis in Plasmodium falciparum. They contain a fold composed of two pseudo dyad-related repeats of the helix-turn-helix motif, serving as a platform for RNA and Src homology-3 (SH3) binding .. +PF09217 Restriction endonuclease EcoRII, N-terminal
The N-terminal effector-binding domain of the Restriction Endonuclease EcoRII has a DNA recognition fold, allowing for binding to 5'-CCWGG sequences. It assumes a structure composed of an eight-stranded beta-sheet with the strands in the order of b2, b5, b4, b3, b7, b6, b1 and b8. They are mostly antiparallel to each other except that b3 is parallel to b7. Alternatively, it may also be viewed as consisting of two mini beta-sheets of four antiparallel beta-strands, sheet I from beta-strands b2, b5, b4, b3 and sheet II from strands b7, b6, b1, b8, folded into an open mixed beta-barrel with a novel topology. Sheet I has a simple Greek key motif while sheet II does not .. +PF09218 Domain of unknown function (DUF1959)
This domain is found in a set of uncharacterised Archaeal hypothetical proteins. Its function has not, as yet, been described.. +PF09220 L-A virus, major coat protein
Members of this family form the major coat protein of the Saccharomyces cerevisiae L-A virus .. +PF09221 Bacterioc_AS-48;
Bacteriocin class IId cyclical uberolysin-like. Sammut SJ, Coggill P, Eberhardt R. Members of this family are membrane-interacting peptides, produced by Firmicutes that display a broad anti-microbial spectrum against Gram-positive and Gram-negative bacteria. They adopt a helical structure, with four or five alpha helices forming a Saposin-like fold [2,5]. The structure has been found to be cyclical [1, 3, 5]. It should be pointed out that one reference implies that both circularin A and gassericin A are class V or IIc-type bacteriocins; however we find that these two proteins fall into different Pfam families families, this one and BacteriocIIc_cy, Pfam:PF12173.. +PF09222 Fimbrial adhesin F17-AG, lectin domain
Members of this family are carbohydrate-specific lectin domains found in bacterial fimbrial adhesins. They adopt a compact, elongated structure consisting of a beta-sandwich with two major sheets: one consisting of five long strands in mixed orientations, and a front sheet with four antiparallel strands, forming an immunoglobin-like fold .. +PF09223 YodA lipocalin-like domain
Members of this family of prokaryotic domains have been identified as part of the response of bacteria to a challenge with the toxic heavy metal cadmium. They are able to bind to cadmium, and ensure its subsequent elimination .. +PF09224 Domain of unknown function (DUF1961)
Members of this family are found in a set of hypothetical bacterial proteins. Their exact function has not, as yet, been determined.. +PF09225 Restriction endonuclease PvuII
Members of this family are predominantly found in prokaryotic restriction endonuclease PvuII. They recognise the double-stranded DNA sequence 5'-CAGCTG-3' and cleave after G-3, resulting in specific double-stranded fragments with terminal 5'-phosphates .. +PF09226 Restriction endonuclease HincII
Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence 5'-GTYRAC-3' and cleave after Y-3. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates .. +PF09227 Domain of unknown function (DUF1962)
Members of this family of fungal domains are functionally uncharacterised .. +PF09228 Prokaryotic Transcriptional repressor TraM
Members of this family of transcriptional repressors adopt a T-shaped structure, with a core composed of two antiparallel alpha-helices. These proteins can be divided into two parts, a 'globular head' and an 'elongated tail', and they negatively regulate conjugation and the expression of tra genes by antagonising traR/AAI-dependent activation .. +PF09229 Activator of Hsp90 ATPase, N-terminal
Members of this family, which are predominantly found in the protein 'Activator of Hsp90 ATPase' adopt a secondary structure consisting of an N-terminal alpha-helix leading into a four-stranded meandering antiparallel beta-sheet, followed by a C-terminal alpha-helix. The two helices are packed together, with the beta-sheet curving around them. They bind to the molecular chaperone HSP82 and stimulate its ATPase activity .. +PF09230 DNA fragmentation factor 40 kDa
Members of this family of eukaryotic apoptotic proteins induce DNA fragmentation and chromatin condensation during apoptosis .. +PF09231 Rice dwarf virus p3
Members of this family are core structural proteins found in the double-stranded RNA virus Phytoreovirus. They are large proteins without apparent domain division, with a number of all-alpha regions and one all beta domain near the C-terminal end .. +PF09232 Caenorhabditis elegans Her-1
Her-1 adopts an all-helical structure with two subdomains: residues 19-80 comprise a left-handed three-helix bundle with an overhand connection between the second and third helices, whilst residues 81-164 comprise a left-handed anti-parallel four-helix bundle in which the first helix consists of four consecutive turns of 3-10-helix. Fourteen Cys are conserved in all known HER-1 sequences and form seven disulfide bonds. The protein dictates male development in Caenorhabditis elegans, probably by playing a direct role in cell signaling during C. elegans sex determination. It also inhibits the function of tra-2a .. +PF09233 Restriction endonuclease EcoRV
Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence 5'-GATATC-3' and cleave after T-3. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates .. +PF09234 Domain of unknown function (DUF1963)
This domain is found in a set of hypothetical bacterial proteins. Its exact function has not, as yet, been described.. +PF09235 Ste50p, sterile alpha motif
The fungal Ste50p SAM domain consists of five helices, which form a compact, globular fold. It is required for mediation of homodimerisation and heterodimerisation (and in some cases oligomerisation) of the protein .. +PF09236 Alpha-haemoglobin stabilising protein
Alpha-haemoglobin stabilising protein (AHSP) acts a molecular chaperone for free alpha-haemoglobin, preventing the harmful aggregation of alpha-haemoglobin during normal erythroid cell development: it specifically protects free alpha-haemoglobin from precipitation. AHSP adopts a helical secondary structure consisting of an elongated antiparallel three alpha-helix bundle .. +PF09237 GAGA factor
Members of this family bind to a 5'-GAGAG-3' DNA consensus binding site, and contain a Cys2-His2 zinc finger core as well as an N-terminal extension containing two highly basic regions. The zinc finger core binds in the DNA major groove and recognises the first three GAG bases of the consensus in a manner similar to that seen in other classical zinc finger-DNA complexes. The second basic region forms a helix that interacts in the major groove recognising the last G of the consensus, while the first basic region wraps around the DNA in the minor groove and recognises the A in the fourth position of the consensus sequence .. +PF09238 Interleukin-4 receptor alpha chain, N-terminal
Members of this family are related in overall topology to fibronectin type III modules and fold into a sandwich comprising seven antiparallel beta sheets arranged in a three-strand and a four-strand beta-pleated sheet. They are required for binding of interleukin-4 to the receptor alpha chain, which is a crucial event for the generation of a Th2-dominated early immune response .. +PF09239 Topoisomerase VI B subunit, transducer
Members of this family adopt a structure consisting of a four-stranded beta-sheet backed by three alpha-helices, the last of which is over 50 amino acids long and extends from the body of the protein by several turns. This domain has been proposed to mediate intersubunit communication by structurally transducing signals from the ATP binding and hydrolysis domains to the DNA binding and cleavage domains of the gyrase holoenzyme .. +PF09240 Interleukin-6 receptor alpha chain, binding
Members of this family adopt a structure consisting of an immunoglobulin-like beta-sandwich, with seven strands in two beta-sheets, in a Greek-key topology. They are required for binding to the cytokine Interleukin-6 .. +PF09241 Herpesviridae viral cyclin
Members of this family of viral cyclins adopt a helical structure consisting of five alpha-helices, with one helix surrounded by the others. They specifically activate CDK6 of host cells to a very high degree .. +PF09242 Flavocytochrome c sulphide dehydrogenase, flavin-binding
Members of this family adopt a structure consisting of a beta(3,4)-alpha(3) core, and an alpha+beta sandwich. They are required for binding to flavin, and subsequent electron transfer .. +PF09243 Mitochondrial small ribosomal subunit Rsm22
Pfam-B_8789 (release 20.0). Rsm22 has been identified as a mitochondrial small ribosomal subunit and is a methyltransferase. In Schizosaccharomyces pombe, Rsm22 is tandemly fused to Cox11 (a factor required for copper insertion into cytochrome oxidase) and the two proteins are proteolytically cleaved after import into the mitochondria .. +PF09244 Domain of unknown function (DUF1964)
Members of this family of bacterial domains adopt a beta-sandwich fold, with Greek-key topology. They are C-terminal to the catalytic sucrose phosphorylase beta/alpha barrel domain, and are functionally uncharacterised .. +PF09245 Mycoplasma arthritidis-derived mitogen
Mycoplasma arthritidis-derived mitogen (MA-Mit) adopts a completely alpha-helical structure consisting of ten alpha helices. It is a superantigen that can activate large fractions of T cells bearing particular TCR V-beta elements. Two MA-Mit molecules form an asymmetric dimer and cross-link two MHC antigens to form a dimerised MA-Mit-MHC complex .. +PF09246 PHAT
The PHAT (pseudo-HEAT analogous topology) domain assumes a structure consisting of a layer of three parallel helices packed against a layer of two antiparallel helices, into a cylindrical shaped five-helix bundle. It is found in the RNA-binding protein Smaug, where it is essential for high-affinity RNA binding .. +PF09247 TATA box-binding protein binding
Members of this family adopt a structure consisting of three alpha helices and a beta-hairpin. They bind to TATA box-binding protein (TBP), inhibiting TBP interaction with the TATA element, thereby resulting in shutting down of gene transcription .. +PF09248 Domain of unknown function (DUF1965)
Members of this family of fungal domains adopt a structure that consists of an alpha/beta motif. Their exact function has not, as yet, been determined .. +PF09249 tRNA nucleotidyltransferase, second domain
Members of this family adopt a structure consisting of a five helical bundle core. They are predominantly found in Archaeal tRNA nucleotidyltransferase, following the catalytic nucleotidyltransferase domain .. +PF09250 Bifunctional DNA primase/polymerase, N-terminal
Members of this family adopt a structure consisting of a core of antiparallel beta sheets. They are found in various bacterial hypothetical proteins, and have been shown to harbour both primase and polymerase activities .. +PF09251 Salmonella phage P22 tail-spike
Members of this family of viral domains adopt a structure consisting of a single-stranded right-handed beta-helix, which in turn is made of parallel beta-strands and short turns. They are required for recognition of the 0-antigenic repeating units of the cell surface, and for subsequent infection of the bacterial cell .. +PF09252 Allergen Fel d I-B chain
Members of this family of cat allergens adopt a helical structure consisting of eight alpha helices, in a Uteroglobin-like fold. They are one of the most important causes of allergic asthma worldwide .. +PF09253 Pollen allergen ole e 6
Members of this family consist of two nearly antiparallel alpha-helices, that are connected by a short loop and followed by a long, unstructured C-terminal tail. They are highly allergenic, primarily mediating olive allergy .. +PF09254 Restriction endonuclease FokI, C terminal
Members of this family are predominantly found in prokaryotic restriction endonuclease FokI, and adopt a structure consisting of an alpha/beta/alpha core containing a five-stranded beta-sheet. They recognise the double-stranded DNA sequence 5'-GGATG-3' and cleave DNA phosphodiester groups 9 base pairs away on this strand and 13 base pairs away on the complementary strand .. +PF09255 Caf1 Capsule antigen
Members of this family are predominantly found in the F1 capsule antigen Caf1 synthesised by Yersinia bacteria. They adopt a structure consisting of a seven strands arranged in two beta-sheets, in a Greek-key topology, and mediate targeting of the bacterium to sites of infection .. +PF09256 BAFF-R, TALL-1 binding
Members of this family, which are predominantly found in the tumour necrosis factor receptor superfamily member 13c, BAFF-R, are required for binding to tumour necrosis factor ligand TALL-1 .. +PF09257 BCMA, TALL-1 binding
Members of this family, which are predominantly found in the tumour necrosis factor receptor superfamily member 17, BCMA, are required for binding to tumour necrosis factor ligand TALL-1 .. +PF09258 EXTL2;
Glycosyl transferase family 64 domain. Members of this family catalyse the transfer reaction of N-acetylglucosamine and N-acetylgalactosamine from the respective UDP-sugars to the non-reducing end of [glucuronic acid]beta 1-3[galactose]beta 1-O-naphthalenemethanol, an acceptor substrate analog of the natural common linker of various glycosylaminoglycans. They are also required for the biosynthesis of heparan-sulphate .. +PF09259 Fungal immunomodulatory protein Fve
Fve is a major fruiting body protein from Flammulina velutipes, a mushroom possessing immunomodulatory activity. It stimulates lymphocyte mitogenesis, suppresses systemic anaphylaxis reactions and oedema, enhances transcription of IL-2, IFN-gamma and TNF-alpha, and haemagglutinates red blood cells. It appears to be a lectin with specificity for complex cell-surface carbohydrates. Fve adopts a tertiary structure consisting of an immunoglobulin-like beta-sandwich, with seven strands arranged in two beta sheets, in a Greek-key topology. It forms a non-covalently linked homodimer containing no Cys, His or Met residues; dimerisation occurs by 3-D domain swapping of the N-terminal helices and is stabilised predominantly by hydrophobic interactions .. +PF09260 Domain of unknown function (DUF1966)
This domain is found in various fungal alpha-amylase proteins. Its exact function has not, as yet, been defined .. +PF09261 Alpha mannosidase, middle domain
Members of this family adopt a structure consisting of three alpha helices, in an immunoglobulin/albumin-binding domain-like fold. They are predominantly found in the enzyme alpha-mannosidase .. +PF09262 Peroxisome biogenesis factor 1, N-terminal
Members of this family adopt a double psi beta-barrel fold, similar in structure to the Cdc48 N-terminal domain. It has been suggested that this domain may be involved in interactions with ubiquitin, ubiquitin-like protein modifiers, or ubiquitin-like domains, such as Ubx. Furthermore, the domain may possess a putative adaptor or substrate binding site, allowing for peroxisomal biogenesis, membrane fusion and protein translocation .. +PF09263 Peroxisome biogenesis factor 1, N-terminal
Members of this family adopt a Cdc48 domain 2-like fold, with a beta-alpha-beta(3) arrangement. It has been suggested that this domain may be involved in interactions with ubiquitin, ubiquitin-like protein modifiers, or ubiquitin-like domains, such as Ubx. Furthermore, the domain may possess a putative adaptor or substrate binding site, allowing for peroxisomal biogenesis, membrane fusion and protein translocation .. +PF09264 Vibrio cholerae sialidase, lectin insertion
Members of this family are predominantly found in Vibrio cholerae sialidase, and adopt a beta sandwich structure consisting of 12-14 strands arranged in two beta-sheets. They bind to lectins with high affinity helping to target the protein to sialic acid-rich environments, thereby enhancing the catalytic efficiency of the enzyme .. +PF09265 Cytokinin dehydrogenase 1, FAD and cytokinin binding
Members of this family adopt an alpha+beta sandwich structure with an antiparallel beta-sheet, in a ferredoxin-like fold. They are predominantly found in plant cytokinin dehydrogenase 1, where they are capable of binding both FAD and cytokinin substrates. The substrate displays a 'plug-into-socket' binding mode that seals the catalytic site and precisely positions the carbon atom undergoing oxidation in close contact with the reactive locus of the flavin .. +PF09266 Viral DNA topoisomerase I, N-terminal
Members of this family are predominantly found in viral DNA topoisomerase, and assume a beta(2)-alpha-beta-alpha-beta(2) fold, with a left-handed crossover between strands beta2 and beta3 .. +PF09267 Dictyostelium STAT, coiled coil
Members of this family are found in Dictyostelium STAT proteins and adopt a structure consisting of four long alpha-helices, folded into a coiled coil. They are responsible for nuclear export of the protein .. +PF09268 Clathrin, heavy-chain linker
Members of this family adopt a structure consisting of alpha-alpha superhelix. They are predominantly found in clathrin, where they act as a heavy-chain linker domain .. +PF09269 Domain of unknown function (DUF1967)
Members of this family contain a four-stranded beta sheet and three alpha helices flanked by an additional beta strand. They are predominantly found in the bacterial GTP-binding protein Obg, and are still functionally uncharacterised .. +PF09270 Beta-trefoil;
Beta-trefoil DNA-binding domain. Members of this family of DNA binding domains adopt a beta-trefoil fold, that is, a capped beta-barrel with internal pseudo threefold symmetry. In the DNA-binding protein LAG-1, it also is the site of mutually exclusive interactions with NotchIC (and the viral protein EBNA2) and co-repressors (SMRT/N-Cor and CIR) .. +PF09271 LAG1, DNA binding
Members of this family are found in various eukaryotic hypothetical proteins and in the DNA-binding protein LAG-1. They adopt a beta sandwich structure, with nine strands in two beta-sheets, in a Greek-key topology, and allow for DNA binding . This domain is also known as RHR-N (Rel-homology region) as it related to Rel domain proteins.. +PF09272 Hepsin, SRCR
Members of this family form an extracellular domain of the serine protease hepsin. They are formed primarily by three elements of regular secondary structure: a 12-residue alpha helix, a twisted five-stranded antiparallel beta sheet, and a second, two-stranded, antiparallel sheet. The two beta-sheets lie at roughly right angles to each other, with the helix nestled between the two, adopting an SRCR fold. The exact function of this domain has not been identified, though it probably may serve to orient the protease domain or place it in the vicinity of its substrate .. +PF09273 Rubisco LSMT substrate-binding
Members of this family adopt a multihelical structure, with an irregular array of long and short alpha-helices. They allow binding of the protein to substrate, such as the N-terminal tails of histones H3 and H4 and the large subunit of the Rubisco holoenzyme complex .. +PF09274 ParG
Members of this family of plasmid partition proteins adopt a ribbon-helix-helix fold, with a core of four alpha-helices. They are an essential component of the DNA partition complex of the multidrug resistance plasmid TP228 .. +PF09275 Pertussis toxin S4 subunit
Members of this family of Bordetella pertussis toxins adopt a structure consisting of an OB fold, with a closed or partly opened beta-barrel in a Greek-key topology .. +PF09276 Pertussis toxin S5 subunit
Members of this family of Bordetella pertussis toxins adopt a structure consisting of an OB fold, with a closed or partly opened beta-barrel in a Greek-key topology .. +PF09277 Erythronolide synthase, docking
Members of this family of docking domains are found in prokaryotic erythronolide synthase. They adopt a structure consisting of a bundle of four alpha-helices, and mediate homodimerisation of the protein, stabilising the resulting complex .. +PF09278 MerR, DNA binding
Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold .. +PF09279 efhand_like;
Phosphoinositide-specific phospholipase C, efhand-like. Members of this family are predominantly found in phosphoinositide-specific phospholipase C. They adopt a structure consisting of a core of four alpha helices, in an EF like fold, and are required for functioning of the enzyme .. +PF09280 XPC-binding domain
Members of this family adopt a structure consisting of four alpha helices, arranged in an array. They bind specifically and directly to the xeroderma pigmentosum group C protein (XPC) to initiate nucleotide excision repair .. +PF09281 Taq polymerase, exonuclease
Members of this family are found in prokaryotic Taq DNA polymerase, where they assume a ribonuclease H-like motif. The domain confers 5'-3' exonuclease activity to the polymerase .. +PF09282 Mago binding
Members of this family adopt a structure consisting of a small globular all-beta-domain, with a three-stranded beta-sheet and a contiguous beta-hairpin. They bind to Mago alpha-helices via extensive electrostatic interactions and at a beta2-beta3 loop via hydrophobic interactions .. +PF09284 Rhamnogalacturonase B, N-terminal
Members of this family are found in prokaryotic Rhamnogalacturonase B, and adopt a structure consisting of a beta supersandwich, with eighteen strands in two beta-sheets. The exact function of the domain is unknown, but a putative role includes carbohydrate-binding .. +PF09285 Elongation factor P, C-terminal
Members of this family of nucleic acid binding domains are predominantly found in elongation factor P, where they adopt an OB-fold, with five beta-strands forming a beta-barrel in a Greek-key topology .. +PF09286 Pro-kumamolisin, activation domain
Members of this family are found in various subtilase propeptides, and adopt a ferredoxin-like fold, with an alpha+beta sandwich. Cleavage of the domain results in activation of the peptide .. +PF09287 CEP-1, DNA binding
Members of this family of DNA-binding domains are found the transcription factor CEP-1. They adopt a beta sandwich structure, with nine strands in two beta-sheets, in a Greek-key topology .. +PF09288 Fungal ubiquitin-associated domain
Members of this family of ubiquitin binding domains adopt a structure consisting of a three alpha-helix bundle. They are predominantly found in fungal ubiquitin-protein ligases .. +PF09289 Follistatin/Osteonectin-like EGF domain
Members of this family are predominantly found in osteonectin and follistatin and adopt an EGF-like fold [1,2].. +PF09290 Prokaryotic acetaldehyde dehydrogenase, dimerisation
Members of this family are found in prokaryotic acetaldehyde dehydrogenase (acylating), and adopt a structure consisting of an alpha-beta-alpha-beta(3) core. They mediate dimerisation of the protein .. +PF09291 Domain of unknown function (DUF1968)
Members of this family are found in mammalian T-cell antigen receptor, and adopt an immunoglobulin-like beta-sandwich fold, with seven strands in two beta-sheets in a Greek-key topology. Their exact function has not, as yet, been determined.. +PF09292 Endonuclease VIII-like 1, DNA bind
Members of this family are predominantly found in Endonuclease VIII-like 1 and adopt a glucocorticoid receptor-like fold. They allow for DNA binding .. +PF09293 T4 RNase H, C terminal
Members of this family are found in T4 RNaseH ribonuclease, and adopt a SAM domain-like fold, consisting of a bundle of four/five helices. These residues may have a role in providing a docking site for other proteins or enzymes in the replication fork .. +PF09294 Interferon-alpha/beta receptor, fibronectin type III
Members of this family adopt a secondary structure consisting of seven beta-strands arranged in an immunoglobulin-like beta-sandwich, in a Greek-key topology. They are required for binding to interferon-alpha .. +PF09295 ChAPs (Chs5p-Arf1p-binding proteins)
Pfam-B_11349 (release 19.0). ChAPs (Chs5p-Arf1p-binding proteins) are required for the export of specialised cargo from the Golgi.\. They physically interact with Chs3, Chs5 and the small GTPase Arf1, and they form also interactions with each other .. +PF09296 NADH pyrophosphatase-like rudimentary NUDIX domain
The N-terminal domain in NADH pyrophosphatase, which has a rudiment Nudix fold according to SCOP.. +PF09297 NADH pyrophosphatase zinc ribbon domain
This domain is found in between two duplicated NUDIX domains. It has a zinc ribbon structure.. +PF09298 DUF1969;
Fumarylacetoacetase N-terminal. Sammut SJ, Eberhardt R. The N-terminal domain of fumarylacetoacetate hydrolase is functionally uncharacterised, and adopts a structure consisting of an SH3-like barrel .. +PF09299 Mu transposase, C-terminal
Members of this family are found in various prokaryotic integrases and transposases. They adopt a beta-barrel structure with Greek-key topology .. +PF09300 Tectiviridae, minor capsid
Members of this family form the minor capsid protein of various Tectiviridae .. +PF09301 Domain of unknown function (DUF1970)
Members of this family consist of various uncharacterised viral hypothetical proteins.. +PF09302 XLF (XRCC4-like factor)
Mistry J, Wood V, Hentges P, Doherty A. XLF (also called Cernunnos) interacts with the XRCC4-DNA ligase IV complex to promote DNA non-homologous end-joining. It directly interacts with the XRCC4-Ligase IV complex and siRNA-mediated downregulation of XLF in human cell lines leads to radio-sensitivity and impaired DNA non-homologous end-joining . This family contains Nej1 (non-homologous end-joining factor) , and Lif1 .. +PF09303 KCNMB2, ball and chain domain
Members of this family are found in the cytoplasmic N-terminus of KCNMB2, the beta-2 subunit of large conductance calcium and voltage-activated potassium channels. They are responsible for the fast inactivation of these channels .. +PF09304 Cortexillin I, coiled coil
Members of this family are predominantly found in the actin-bundling protein Cortexillin I from Dictyostelium discoideum. They adopt a structure consisting of an 18-heptad-repeat alpha-helical coiled-coil, and are a prerequisite for the assembly of Cortexillin I .. +PF09305 TACI, cysteine-rich domain
Members of this family are predominantly found in tumour necrosis factor receptor superfamily, member 13b (TACI), and are required for binding to the ligands APRIL and BAFF .. +PF09306 Bacteriophage, scaffolding protein
Members of this family of scaffolding proteins are produced by various bacteriophages .. +PF09307 CLIP, MHC2 interacting
Members of this family are found in class II invariant chain-associated peptide (CLIP), and are required for association with class II major histocompatibility complex (MHC) in the MHC class II processing pathway .. +PF09308 LuxQ, periplasmic
Members of this family constitute the periplasmic sensor domain of the prokaryotic protein LuxQ, and assume a structure consisting of two tandem Per/ARNT/Simple-minded (PAS) folds .. +PF09309 FCP1, C-terminal
The C-terminal domain of FCP-1 is required for interaction with the carboxy terminal domain of RAP74. Interaction relies extensively on van der Waals contacts between hydrophobic residues situated within alpha-helices in both domains .. +PF09310 POU domain, class 2, associating factor 1
Members of this family are transcriptional coactivators that specifically associate with either OCT1 or OCT2, through recognition of their POU domains. They are essential for the response of B-cells to antigens and required for the formation of germinal centres .. +PF09311 Rabaptin-like protein
Members of this family are predominantly found in Rabaptin and allow for binding to the GTPase Rab5. This interaction is necessary and sufficient for Rab5-dependent recruitment of Rabaptin5 to early endosomal membranes .. +PF09312 SurA N-terminal domain
This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment.. +PF09313 Domain of unknown function (DUF1971)
Pfam-B_3000 (release 20.0). Members of this family of functionally uncharacterised domains are predominantly found in bacterial Tellurite resistance protein.. +PF09314 Domain of unknown function (DUF1972)
Pfam-B_3020 (release 20.0). Members of this family of functionally uncharacterised domains are found in bacterial glycosyltransferases and rhamnosyltransferases.. +PF09315 Domain of unknown function (DUF1973)
Pfam-B_3022 (release 20.0). Members of his family of functionally uncharacterised domains are found in various eukaryotic calcium-dependent chloride channels.. +PF09316 C-myb, C-terminal
Pfam-B_3027 (release 20.0). Members of this family are predominantly found in the proto-oncogene c-myb and the viral transforming protein myb. Truncation of the domain results in 'activation' of c-myb and subsequent tumourigenesis .. +PF09317 Domain of unknown function (DUF1974)
Pfam-B_3029 (release 20.0). Members of this family of functionally uncharacterised domains are predominantly found in various prokaryotic acyl-coenzyme a dehydrogenases. . +PF09318 Domain of unknown function (DUF1975)
Pfam-B_3057 (release 20.0). Members of this family of functionally uncharacterised domains are predominantly found in the N-terminal region of various prokaryotic alpha-glucosyltransferases.. +PF09320 Domain of unknown function (DUF1977)
Pfam-B_3043 (release 20.0). Members of this family of functionally uncharacterised domains are predominantly found in dnaj-like proteins.. +PF09321 Domain of unknown function (DUF1978)
Pfam-B_3044 (release 20.0). Members of this family are found in various hypothetical proteins produced by the bacterium Chlamydia pneumoniae. Their exact function has not, as yet, been identified.. +PF09322 Domain of unknown function (DUF1979)
Pfam-B_3053 (release 20.0). Members of this family of functionally uncharacterised domains are found in various Oryza sativa mutator-like transposases.. +PF09323 Domain of unknown function (DUF1980)
Pfam-B_3062 (release 20.0). Members of this family are found in a set of prokaryotic hypothetical proteins. Their exact function, has not, as yet, been defined.. +PF09324 Domain of unknown function (DUF1981)
Pfam-B_3041 (release 20.0). Members of this family of functionally uncharacterised domains are found in various plant and yeast protein transport proteins.. +PF09325 Vps5 C terminal like
Pfam-B_6206 (release 20.0). Vps5 is a sorting nexin that functions in membrane trafficking. This is the C terminal dimerisation domain .. +PF09326 Domain of unknown function (DUF1982)
Pfam-B_3077 (release 20.0). Members of this family of functionally uncharacterised domains are found in the C-terminal region of various prokaryotic NADH dehydrogenases.. +PF09327 Domain of unknown function (DUF1983)
Pfam-B_3073 (release 20.0). Members of this family of functionally uncharacterised domains are found in various bacteriophage host specificity proteins.. +PF09328 DUF1984;
Domain of unknown function (DUF1984). Pfam-B_3070 (release 20.0). Members of this family of functionally uncharacterised domains are found at the C-terminus of plant phytochelatin synthases.. +PF09329 Primase zinc finger
Pfam-B_9710 (release 20.0). This zinc finger is found in yeast Mcm10 proteins and DnaG-type primases .. +PF09330 D-lactate dehydrogenase, membrane binding
Members of this family are predominantly found in prokaryotic D-lactate dehydrogenase, forming the cap-membrane-binding domain, which consists of a large seven-stranded antiparallel beta-sheet flanked on both sides by alpha-helices. They allow for membrane association .. +PF09331 Domain of unknown function (DUF1985)
Pfam-B_3094 (release 20.0). Members of this family of functionally uncharacterised domains are found in a set of Arabidopsis thaliana hypothetical proteins.. +PF09332 Mcm10 replication factor
Mcm10 is a eukaryotic DNA replication factor that regulates the stability and chromatin association of DNA polymerase alpha .. +PF09333 ATG C terminal domain
Pfam-B_61662 (release 20.0). ATG2 (also known as Apg2) is a peripheral membrane protein. It functions in both cytoplasm to vacuole targeting and autophagy .. +PF09334 tRNA synthetases class I (M)
Pfam-B_107 (release 20.0). This family includes methionyl tRNA synthetases.. +PF09335 SNARE associated Golgi protein
This is a family of SNARE associated Golgi proteins. The yeast member of this family (Swiss:P36164) localises with the t-SNARE Tlg2 .. +PF09336 Vps4 C terminal oligomerisation domain
Pfam-B_8681 (release 20.0). This domain is found at the C terminal of ATPase proteins involved in vacuolar sorting. It forms an alpha helix structure and is required for oligomerisation .. +PF09337 His(2)-Cys(2) zinc finger
This domain binds to histone upstream activating sequence (UAS) elements that are found in histone gene promoters .. +PF09338 Glycine/sarcosine/betaine reductase component B subunits
Pfam-B_25756 (release 20.0). This is a family of glycine reductase, sarcosine reductase and betaine reductases. These enzymes catalyse the following reactions. sarcosine reductase: Acetyl phosphate + methylamine + thioredoxin disulphide = N-methylglycine + phosphate + thioredoxin Acetyl phosphate + NH(3) + thioredoxin disulphide = glycine + phosphate + thioredoxin. betaine reductase: Acetyl phosphate + trimethylamine + thioredoxin disulphide = N,N,N-trimethylglycine + phosphate + thioredoxin .. +PF09339 IclR helix-turn-helix domain
Pfam-B_70 (release 18.0). +PF09340 Histone acetyltransferase subunit NuA4
Pfam-B_29415 (release 20.0). The NuA4 histone acetyltransferase (HAT) multisubunit complex is responsible for acetylation of histone H4 and H2A N-terminal tails in yeast . NuA4 complexes are highly conserved in eukaryotes and play primary roles in transcription, cellular response to DNA damage, and cell cycle control .. +PF09341 Transcription factor Pcc1
Pcc1 is a transcription factor that functions in regulating genes involved in cell cycle progression and polarised growth .. +PF09342 Domain of unknown function (DUF1986)
Mistry J, Rawlings ND. Pfam-B_99782 (release 20.0). This domain is found in serine proteases and is predicted to contain disulphide bonds (see Swiss:P98159).. +PF09343 CHP2217;
Conserved hypothetical protein 2217 (DUF2460). This model represents a family of conserved hypothetical proteins. It is usually (but not always) found in apparent phage-derived regions of bacterial chromosomes.. +PF09344 CT1975-like protein
CRISPR is a term for Clustered, Regularly Interspaced Short Palidromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family is represented by CT1975 of Chlorobium tepidum.. +PF09345 Domain of unknown function (DUF1987)
This family of proteins are functionally uncharacterised.. +PF09346 DUF1988;
SMI1 / KNR4 family (SUKH-1). Proteins in this family are involved in the regulation of 1,3-beta-glucan synthase activity and cell-wall formation . Genome contextual information showed that SMI1 are primary immunity proteins in bacterial toxin systems . . +PF09347 Domain of unknown function (DUF1989)
This family of proteins are functionally uncharacterised.. +PF09348 Domain of unknown function (DUF1990)
This family of proteins are functionally uncharacterised.. +PF09349 DUF1991;
The proteins in this family are OHCU decarboxylase - enzymes of the purine catabolism that catalyse the conversion of OHCU into S(+)-allantoin . This is the third step of the conversion of uric acid (a purine derivative) to allantoin. Step one is catalysed by urate oxidase (Pfam:PF01014) and step two is catalysed by HIUases (Pfam:PF00576).. +PF09350 Domain of unknown function (DUF1992)
This family of proteins are functionally uncharacterised.. +PF09351 Domain of unknown function (DUF1993)
This family of proteins are functionally uncharacterised.. +PF09353 Domain of unknown function (DUF1995)
This family of proteins are functionally uncharacterised.. +PF09354 HNF3 C-terminal domain
This presumed domain is found in the C-terminal region of Hepatocyte Nuclear Factor 3 alpha and beta chains. Its specific function is uncertain. The N-terminal region of this presumed domain contains an EH1 (engrailed homology 1) motif, that is characterised by the FxIxxIL sequence .. +PF09355 Phage protein Gp19/Gp15/Gp42
This family of proteins are functionally uncharacterised. They are found in a variety of bacteriophage.. +PF09356 Phage conserved hypothetical protein BR0599
This entry describes a family of proteins found almost exclusively in phage or in prophage regions of bacterial genomes, including the phage-like Rhodobacter capsulatus gene transfer agent, which packages DNA. An apparent exception is Wolbachia pipientis wMel, a bacterial endosymbiont of the fruit fly, which has several candidate phage-related genes physically separate from obvious prophage regions.. +PF09357 RteC protein
Human colonic Bacteroides species harbor a family of large conjugative transposons, called tetracycline resistance (Tcr) elements. Activities of these elements are enhanced by pregrowth of bacteria in medium containing tetracycline, indicating that at least some Tcr element genes are regulated by tetracycline. An insertional disruption in the rteC gene abolished self-transfer of the Tcr element to Bacteroides recipients, indicating that the gene was essential for self-transfer .. +PF09358 Ubiquitin-activating enzyme e1 C-terminal domain
This presumed domain found at the C-terminus of Ubiquitin-activating enzyme e1 proteins is functionally uncharacterised.. +PF09359 VTC domain
This presumed domain is found in the yeast vacuolar transport chaperone proteins VTC2, VTC3 and VTC4. This domain is also found in a variety of bacterial proteins.. +PF09360 Iron-binding zinc finger CDGSH type
The CDGSH-type zinc finger domain binds iron rather than zinc as a redox-active pH-labile 2Fe-2S cluster. The conserved sequence C-X-C-X2-(S/T)-X3-P-X-C-D-G-(S/A/T)-H is a defining feature of this family . The domain is oriented towards the cytoplasm and is tethered to the mitochondrial membrane by a more N-terminal domain found in higher vertebrates, MitoNEET_N, Pfam:PF10660 . The domain forms a uniquely folded homo-dimer and spans the outer mitochondrial membrane, orienting the iron-binding residues towards the cytoplasm .. +PF09361 Phasin protein
This entry describes a group of small proteins found associated with inclusions in bacterial cells. Most associate with polyhydroxyalkanoate (PHA) inclusions, the most common of which consist of polyhydroxybutyrate (PHB). These are designated granule-associate proteins or phasins.. +PF09362 Domain of unknown function (DUF1996)
This family of proteins are functionally uncharacterised.. +PF09363 XFP C-terminal domain
Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 .. +PF09364 XFP N-terminal domain
Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 . This family is distantly related to transketolases e.g. Pfam:PF02779.. +PF09365 CHP02453;
Conserved hypothetical protein (DUF2461). Members of this family are widely (though sparsely) distributed bacterial proteins, about 230 residues in length. All members have a motif RxxRDxRFxxx[DN]KxxY. The function of this protein family is unknown.. +PF09366 Protein of unknown function (DUF1997)
This family of proteins are functionally uncharacterised.. +PF09367 CpeS-like protein
This family, that includes CpeS proteins, is functionally uncharacterised.. +PF09368 Sas10_Utp3_C;
Sas10 C-terminal domain. Sas10 is an Essential subunit of U3-containing Small Subunit (SSU) processome complex involved in the production of the 18S rRNA and assembly of the small ribosomal subunit.. +PF09369 Domain of unknown function (DUF1998)
This family of proteins are functionally uncharacterised. They are mainly found in helicase proteins so could be RNA binding. This family includes a probable zinc binding motif at its C-terminus.. +PF09370 TIM-barrel signal transduction protein
This domain is likely to have a TIM barrel fold related to IGPS. Although this family of proteins are functionally uncharacterised this domain is found as an N-terminal domain of sigma 54 -dependent transcriptional activators (enhancer-binding proteins) suggesting a potential role in signal recognition/receiving and signal transduction.. +PF09371 Tex-like protein N-terminal domain
This presumed domain is found at the N-terminus of Swiss:Q45388. This protein defines a novel family of prokaryotic transcriptional accessory factors .. +PF09372 PRANC domain
This presumed domain is found at the C-terminus of a variety of Pox virus proteins. The PRANC (Pox proteins Repeats of ANkyrin - C terminal) domain is also found on its own in some proteins. The function of this domain is unknown, but it appears to be related to the F-box domain and may play a similar role.. +PF09373 Pseudomurein-binding repeat
Pfam-B_12784 (Release 21.0). Methanothermobacter thermautotrophicus is a methanogenic Gram-positive microorganism with a cell wall consisting of pseudomurein. This repeat specifically binds to pseudomurein. This repeat is found at the N terminus of PeiW and PeiP which are pseudomurein binding phage proteins.. +PF09374 Predicted Peptidoglycan domain
Pfam-B_8737 (release 8.0). This family contains a potential peptidoglycan binding domain.. +PF09375 Imelysin
The imelysin peptidase was first identified in Pseudomonas aeruginosa. The active site residues have not been identified. However, His201 and Glu204 are completely conserved in the family and occur in an HXXE motif that is also found in family M14.. +PF09376 NurA domain
This family includes NurA a nuclease exhibiting both single-stranded endonuclease activity and 5'-3' exonuclease activity on single-stranded and double-stranded DNA from the hyperthermophilic archaeon Sulfolobus acidocaldarius .. +PF09377 SBDS protein C-terminal domain
This family is highly conserved in species ranging from archaea to vertebrates and plants. The family contains several Shwachman-Bodian-Diamond syndrome (SBDS) proteins from both mouse and humans. Shwachman-Diamond syndrome is an autosomal recessive disorder with clinical features that include pancreatic exocrine insufficiency, haematological dysfunction and skeletal abnormalities. Members of this family play a role in RNA metabolism .. +PF09378 HAS barrel domain
The HAS barrel is named after HerA-ATP Synthase. In ATP synthases, this domain is implicated in the assembly of the catalytic toroid and docking of accessory subunits, such as the subunit of the ATP synthase complex. Similar roles in docking of the functional partner, the NurA nuclease, and assembly of the HerA toroid complex appear likely for the HAS-barrel of the HerA family .. +PF09379 FERM N-terminal domain
This domain is the N-terminal ubiquitin-like structural domain of the FERM domain.. +PF09380 FERM C-terminal PH-like domain
Pfam-B_851 (release 2.1). +PF09381 Outer membrane protein G (OmpG)
Porins are channel proteins in the outer membrane of gram negative bacteria which mediate the uptake of molecules required for growth and survival. Escherichia coli OmpG forms a 14 stranded beta-barrel and in contrast to most porins, appears to function as a monomer . The central pore of OmpG is wider than other E. coli porins and it is speculated that it may form a non-specific channel for the transport of larger oligosaccharides .. +PF09382 RQC domain
Pfam-B_571 (release 21.0). This DNA-binding domain is found in the RecQ helicase among others and has a helix-turn-helix structure. The RQC domain, found only in RecQ family enzymes, is a high affinity G4 DNA binding domain .. +PF09383 NIL domain
Pfam-B_524 (release 21.0). This domain is found at the C-terminus of ABC transporter proteins involved in D-methionine transport as well as a number of ferredoxin-like proteins. This domain is likely to act as a substrate binding domain. The domain has been named after a conserved sequence in some members of the family.. +PF09384 U3_snoRNA_C;
Pfam-B_7112 (release 21.0). U3 snoRNA is ubiquitous in eukaryotes and is required for nucleolar processing of pre-18S ribosomal RNA . It is a component of the ribosomal small subunit (SSU) processome. UTP15 is needed for optimal pre-ribosomal RNA transcription by RNA polymerase I, together with a subset of U3 proteins required for transcription (t-UTPs) . This entry represents the C terminal of UTP15, and is found adjacent to WD40 repeats (Pfam:PF00400).. +PF09385 Histidine kinase N terminal
This domain is found at the N terminal of sensor histidine kinase proteins.. +PF09386 Antitoxin ParD
ParD is a plasmid anti-toxin than forms a ribbon-helix-helix DNA binding structure . It stabilises plasmids by inhibiting ParE toxicity in cells that express ParD and ParE. ParD forms a dimer and also regulates its own promoter (parDE).. +PF09387 Mitochondrial RNA binding protein MRP
MRP1 and MRP2 are mitochondrial RNA binding proteins that form a heteromeric complex. The MRP1/MRP2 heterotetrameric complex binds to guide RNAs and stabilises them in an unfolded conformation suitable for RNA-RNA hybridisation. Each MRP subunit adopts a 'whirly' transcription factor fold .. +PF09388 Spo0E like sporulation regulatory protein
Spore formation is an extreme response to starvation and can also be a component of disease transmission. Sporulation is controlled by an expanded two-component system where starvation signals result in sensor kinase activation and phosphorylation of the master sporulation response regulator Spo0A. Phosphatases such as Spo0E dephosphorylate Spo0A thereby inhibiting sporulation. This is a family of Spo0E-like phosphatases. The structure of a Bacillus anthracis member of this family has revealed an anti-parallel alpha-helical structure .. +PF09390 Protein of unknown function (DUF1999)
This family contains a putative Fe-S binding reductase (Swiss:Q72J89) whose structure adopts an alpha and beta fold.. +PF09391 Protein of unknown function (DUF2000)
This is a family of proteins of unknown function. The structure of one of the proteins in this family has been shown to adopt an alpha beta fold.. +PF09392 Type III secretion needle MxiH like
Type III secretion systems are essential virulence determinants for many gram-negative bacterial pathogens. MxiH is an extracellular alpha helical needle that is required for translocation of effector proteins into host cells . Once inside, the effector proteins subvert normal cell function to aid infection.. +PF09393 Protein of unknown function (DUF2001)
This family includes phage-like element PBSX protein (Swiss:P54332) whose structure adopts a beta barrel flanked with alpha helical regions.. +PF09394 Chagasin_I42;
Chagasin family peptidase inhibitor I42. Chagasin is a cysteine peptidase inhibitor which forms a beta barrel structure .. +PF09396 Thrombin light chain
Thrombin is an enzyme that cleaves bonds after Arg and Lys, converts fibrinogen to fibrin and activates factors V, VII, VIII. Prothrombin is activated on the surface of a phospholipid membrane where factor Xa removes the activation peptide and cleaves the remaining part into light and heavy chains. This domain corresponds to the light chain of thrombin.. +PF09397 Ftsk gamma domain
This domain directs oriented DNA translocation and forms a winged helix structure . Mutated proteins with substitutions in the FtsK gamma DNA-recognition helix are impaired in DNA binding .. +PF09398 FOP N terminal dimerisation domain
Fibroblast growth factor receptor 1 (FGFR1) oncogene partner (FOP) is a centrosomal protein that is involved in anchoring microtubules to subcellular structures. This domain includes a Lis-homology motif. It forms an alpha helical bundle and is involved in dimerisation .. +PF09399 SARS lipid binding protein
This is a family of proteins found in SARS coronavirus. The protein has a novel fold which forms a dimeric tent-like beta structure with an amphipathic surface, and a central hydrophobic cavity that binds lipid molecules . This cavity is likely to be involved in membrane attachment .. +PF09400 Protein of unknown function (DUF2002)
This is a family of putative cytoplasmic proteins. The structure of these proteins form an antiparallel beta and sheet and contain some alpha helical regions.. +PF09401 RNA synthesis protein NSP10
Non-structural protein 10 (NSP10) is involved in RNA synthesis. it is synthesised as a polyprotein whose cleavage generates many non-structural proteins. NSP10 contains two zinc binding motifs and forms two anti-parallel helices which are stacked against an irregular beta sheet .\. A cluster of basic residues on the protein surface suggests a nucleic acid-binding function.. +PF09402 MAN1_C;
Man1-Src1p-C-terminal domain. MAN1 is an integral protein of the inner nuclear membrane which binds to chromatin associated proteins and plays a role in nuclear organisation. The C terminal nucleoplasmic region forms a DNA binding winged helix and binds to Smad . This C-terminal tail is also found in S. cerevisiae and is thought to consist of three conserved helices followed by two downstream strands .. +PF09403 Adhesion protein FadA
FadA (Fusobacterium adhesin A) is an adhesin which forms two alpha helices.. +PF09404 Eukaryotic protein of unknown function (DUF2003)
This is a family of proteins of unknown function which adopt an alpha helical and beta sheet structure.. +PF09405 CASC3/Barentsz eIF4AIII binding
This domain is found on CASC3 (cancer susceptibility candidate gene 3 protein) which is also known as Barentsz (Btz). CASC3 is a component of the EJC (exon junction complex) which is a complex that is involved in post-transcriptional regulation of mRNA in metazoa. The complex is formed by the association of four proteins (eIF4AIII, Barentsz, Mago, and Y14), mRNA, and ATP. This domain wraps around eIF4AIII and stacks against the 5' nucleotide .. +PF09406 Protein of unknown function (DUF2004)
This is a family of proteins with unknown function. The structure of one of the proteins in this family has revealed a novel alpha-beta fold .. +PF09407 Protein of unknown function (DUF2005)
This is a family of proteins with unknown function.. +PF09408 Spike receptor binding domain
Spike is an envelope glycoprotein which aids viral entry into the host cell. This domain corresponds is the immunogenic receptor binding domain of the protein which binds to angiotensin-converting enzyme 2 (ACE2) .. +PF09409 PUB domain
The PUB (also known as PUG) domain is found in peptide N-glycanase where it functions as a AAA ATPase binding domain . This domain is also found on other proteins linked to the ubiquitin-proteasome system.. +PF09411 Lipid A 3-O-deacylase (PagL)
PagL is an outer membrane protein with lipid A 3-O-deacylase activity. It forms an 8 stranded beta barrel structure .. +PF09412 Endoribonuclease XendoU
This is a family of endoribonucleases involved in RNA biosynthesis which has been named XendoU in Xenopus laevis. XendoU is a U-specific metal dependent enzyme that produces products with a 2'-3' cyclic phosphate termini.. +PF09413 Domain of unknown function (DUF2007)
This is a family of proteins with unknown function.. +PF09414 RNA ligase
This is a family of RNA ligases. The enzyme repairs RNA strand breaks in nicked DNA:RNA and RNA:RNA but not in DNA:DNA duplexes.. +PF09415 DUF2008;
CENP-S associating Centromere protein X. The centromere, essential for faithful chromosome segregation during mitosis, has a network of constitutive centromere-associated (CCAN) proteins associating with it during mitosis. So far in vertebrates at least 15 centromere proteins have been identified, which are divided into several subclasses based on functional and biochemical analyses. These provide a platform for the formation of a functional kinetochore during mitosis. CENP-S is one that does not associate with the CENP-H-containing complex but rather interacts with CENP-X to form a stable assembly of outer kinetochore proteins that functions downstream of other components of the CCAN. This complex may directly allow efficient and stable formation of the outer kinetochore on the CCAN platform.. +PF09416 RNA helicase (UPF2 interacting domain)
UPF1 is an essential RNA helicase that detects mRNAs containing premature stop codons and triggers their degradation. This domain contains 3 zinc binding motifs and forms interactions with another protein (UPF2) that is also involved nonsense-mediated mRNA decay (NMD) .. +PF09418 Protein of unknown function (DUF2009)
Pfam-B_18128 (release 21.0). This is a eukaryotic family of proteins with unknown function.. +PF09419 DUF2010;
Mitochondrial PGP phosphatase. Pfam-B_22310 (release 21.0). This is a family of proteins that acts as a mitochondrial phosphatase in cardiolipin biosynthesis. Cardiolipin is a unique dimeric phosphoglycerolipid predominantly present in mitochondrial membranes. The inverted phosphatase motif includes the highly conserved DKD triad .. +PF09420 Ribosome biogenesis protein Nop16
Pfam-B_6406 (release 21.0). Nop16 is a protein involved in ribosome biogenesis.. +PF09421 Frequency clock protein
The frequency clock protein, is the central component of the frq-based circadian negative feedback loop, regulates various aspects of the circadian clock in Neurospora crassa . This protein has been shown to interact with itself via a coiled-coil .. +PF09422 WTX protein
The WTX protein is found to be inactivated in one third of Wilms tumours . The WTX protein is functionally uncharacterised.. +PF09423 PhoD-like phosphatase
+PF09424 Yqey-like protein
The function of this domain found in the YqeY protein is uncertain.. +PF09425 Divergent CCT motif
This short motif is found in a number of plant proteins. It appears to be related to the N-terminal half of the CCT motif. The CCT motif is about 45 amino acids long and contains a putative nuclear localisation signal within the second half of the CCT motif .. +PF09426 Vacuolar R-SNARE Nyv1 N terminal
Pfam-B_50964 (release 21.0). This domain corresponds to the N terminal domain of vacuolar R-SNARE Nyv1 which adopts a longin fold . In yeast it has been shown that this domain is sufficient to direct the transport of Nyv1 to limiting membrane of the vacuole .. +PF09427 SREBP_C;
Domain of unknown function (DUF2014) . Pfam-B_71890 (release 21.0). This domain is found at the C terminal of a family of ER membrane bound transcription factors called sterol regulatory element binding proteins (SREBP).. +PF09428 Fungal protein of unknown function (DUF2011)
This is a family of fungal proteins whose function is unknown.. +PF09429 WW domain binding protein 11
Pfam-B_13108 (release 21.0). The WW domain is a small protein module with a triple-stranded beta-sheet fold. This is a family of WW domain binding proteins.. +PF09430 Protein of unknown function (DUF2012)
Pfam-B_49614 (release 21.0). This is a eukaryotic family of uncharacterised proteins.. +PF09431 Protein of unknown function (DUF2013)
Pfam-B_11317 (release 21.0). This region is found at the C terminal of a group of cytoskeletal proteins.. +PF09432 Tho complex subunit THP2
The THO complex plays a role in coupling transcription elongation to mRNA export. It is composed of subunits THP2, HPR1, THO2 and MFT1 .. +PF09435 Fungal protein of unknown function (DUF2015)
This is a fungal family of uncharacterised proteins.. +PF09436 Domain of unknown function (DUF2016)
A predicted alpha+beta domain that is usually fused N-terminal to the JAB metallopeptidase. This protein in turn is found in conserved gene neighborhoods that include genes encoding the bacterial homologs of the ubiquitin modification system such as the E1, E2 and Ub proteins . The domain is also known as the JAB-N domain.. +PF09437 Pombe specific 5TM protein
+PF09438 Domain of unknown function (DUF2017)
Mistry J, Iyer LM, Burroughs AM, Aravind L. This is an alpha-helical domain found in gene neighborhoods that contain genes encoding ubiquitin, cysteine synthases and JAB peptidases .. +PF09439 Signal recognition particle receptor beta subunit
Pfam-B_7840 (release 21.0). The beta subunit of the signal recognition particle receptor (SRP) is a transmembrane GTPase which anchors the alpha subunit to the endoplasmic reticulum membrane .. +PF09440 eIF3 subunit 6 N terminal domain
Pfam-B_4886 (release 21.0). This is the N terminal domain of subunit 6 translation initiation factor eIF3.. +PF09441 ARS binding protein 2
This DNA-binding protein binds to the autonomously replicating sequence (ARS) binding element. It may play a role in regulating the cell cycle response to stress signals .. +PF09442 Domain of unknown function (DUF2018)
Acid-adaptive protein possibly of physiological significance when H.pylori colonises the human stomach, which adopts a unique four alpha-helical triangular conformations. The biologically active form is thought to be a tetramer. The protein is expressed along with six other proteins, some of which are related to iron storage and haem biosynthesis .. +PF09443 Cripto_Frl-1_Cryptic (CFC)
CFC domain is one half of the membrane protein Cripto, a protein overexpressed in many tumours [1,2] and structurally similar to the C-terminal extracellular portions of Jagged 1 and Jagged 2 . CFC is approx 40-residues long, compacted by three internal disulphide bridges, and binds Alk4 via a hydrophobic patch. CFC is structurally homologous to the VWFC-like domain .. +PF09444 MRC1-like domain
This putative domain is found to be the most conserved region in mediator of replication checkpoint protein 1.. +PF09445 RNA cap guanine-N2 methyltransferase
Pfam-B_9480 (Release 21.0). RNA cap guanine-N2 methyltransferases such as Schizosaccharomyces pombe Tgs1 and Giardia lamblia Tgs2 catalyse methylation of the exocyclic N2 amine of 7-methylguanosine .. +PF09446 VMA21-like domain
This presumed short domain appears to contain two potential transmembrane helices. VMA21 is localised in the ER where it is needed as an accessory factor for assembly of the V0 component of the vacuolar ATPase .. +PF09447 Cnl2/NKP2 family protein
This family includes the Cnl2 kinetochore protein .. +PF09448 Methylmuconolactone methyl-isomerase
MmlI is a short, approx 115 residue, protein of two alpha helices and four beta strands. It is involved in the catabolism of methyl-substituted aromatics via a modified oxo-adipate pathway in bacteria. The enzyme appears to be monomeric in some species and tetrameric in others . The known structure shows two copies of the protein form a dimeric alpha beta barrel.. +PF09449 Domain of unknown function (DUF2020)
Protein of unknown function found in bacteria.. +PF09450 Domain of unknown function (DUF2019)
Protein of unknown function found in bacteria.. +PF09451 Autophagy-related protein 27
+PF09452 ESCRT-I subunit Mvb12
The endosomal sorting complex required for transport (ESCRT) complexes play a critical role in receptor down-regulation and retroviral budding. A new component of the ESCRT-I complex was identified , multivesicular body sorting factor of 12 kD (Mvb12), which binds to the coiled-coil domain of the ESCRT-I subunit vacuolar protein sorting 23 (Vps23) .. +PF09453 HIRA B motif
The HirA B (Histone regulatory homologue A binding) motif is the essential binding interface between HIRA Pfam:PF07569 and ASF1a, of approx. 40 residues. It forms an antiparallel beta-hairpin that binds perpendicular to the strands of the beta-sandwich of ASF1a N-terminal core domain, via beta-sheet, salt bridge and van der Waals interactions . The two histone chaperone proteins, HIRA and ASF1a, form a heterodimer with histones H3 and H4. HIRA is the human orthologue of Hir proteins known to silence histone gene expression and create transcriptionally silent heterochromatin in yeast, flies, plants and humans. The yeast CAF1B proteins which bind H3 also carry this motif at their very C-terminus. . +PF09454 Vps23 core domain
ESCRT complexes form the main machinery driving protein sorting from endosomes to lysosomes. The core domain of the Vps23 subunit of the heterotrimeric ESCRT-I complex is a helical hairpin sandwiched in a fan-like formation between two other helical hairpins from Vps28 (Pfam:PF03997) and Vps37. Vps23 gives ESCRT-I complex its stability .. +PF09455 CRISPR-associated (Cas) DxTHG family
CRISPR is a term for Clustered Regularly Interspaced Short Palidromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR associated) proteins. The family describes Cas proteins of about 400 residues that include the motif [VIL]-D-x-[ST]-H-[GS]. The CRISPR and associated proteins are thought to be involved in the evolution of host resistance. The exact molecular function of this family is currently unknown.. +PF09456 RcsC Alpha-Beta-Loop (ABL)
This domain is found in the C-terminus of the phospho-relay kinase RcsC between Pfam:PF00512 and Pfam:PF00072, and forms a discrete alpha/beta/loop structure .. +PF09457 FIP domain
The FIP domain is the Rab11-binding domain (RBD) at the C-terminus of a family of Rab11-interacting proteins (FIPs). The Rab proteins constitute the largest family of small GTPases (>60 members in mammals). Among them Rab11 is a well characterised regulator of endocytic and recycling pathways. Rab11 associates with a broad range of post-Golgi organelles, including recycling endosomes .. +PF09458 H-type lectin domain
The H-type lectin domain is a unit of six beta chains, combined into a homo-hexamer. It is involved in self/non-self recognition of cells, through binding with carbohydrates . It is sometimes found in association with the F5_F8_type_C domain Pfam:PF00754.. +PF09459 Ethylbenzene dehydrogenase
Eythylbenzene dehydrogenase is a heterotrimer of three subunits that catalyses the anaerobic degradation of hydrocarbons. The alpha subunit contains the catalytic centre as a Molybdenum cofactor-complex. This removes an electron-pair from the hydrocarbon and passes it along an electron transport system involving iron-sulphur complexes held in the beta subunit and a Haem b molecule contained in the gamma subunit. The electron-pair is then subsequently passed to an as yet unknown receiver . The enzyme is found in a variety of different bacteria.. +PF09460 Saf-pilin pilus formation protein
This domain consists of the adjacent Saf-Nte and Saf-pilin chains of the pilus-forming complex. Pilus assembly in Gram-negative bacteria involves a Donor-strand exchange mechanism between the C- and the N-termini of this domain. The C-terminal subunit forms an incomplete Ig-fold which is then complemented by the 10-18 residue N-terminus of another, incoming, pilus subunit which is not involved in the Ig-fold. The N-terminus sequences contain a motif of alternating hydrophobic residues that occupy the P2 to P5 binding pockets in the groove of the first pilus subunit .. +PF09461 Phytotoxin PcF protein
PcF is a 52 residue protein factor of two alpha helices, containing a 4-hydroxyproline and three cysteine bridges. The presence of the hydroxyproline is unique in relation to other fungal phytotoxic proteins. The protein has a high content of acidic side-chains implying a lack of binding with lipid-rich components of membranes and appears to be an extracellular phytotoxin that causes leaf necrosis in strawberries.. +PF09462 Mus7/MMS22 family
This family includes a conserved region from the Mus7 protein . Mus7 is involved in the repair of replication-associated DNA damage in the fission yeast Schizosaccharomyces pombe. Mus7 functions in the same pathway as Mus81, a subunit of the Mus81-Eme1 structure-specific endonuclease, which has been implicated in the repair of the replication-associated DNA damage . The MMS22 proteins are involved in repairing double-stranded DNA breaks created by the cleavage reaction of topoisomerase II .. +PF09463 Opy2 protein
Opy2p acts as a membrane anchor in the HOG signalling pathway .. +PF09465 Lamin-B receptor of TUDOR domain
The Lamin-B receptor, found on the TUDOR domain Pfam:PF00567, is a chromatin and lamin binding protein in the inner nuclear membrane. It is one of the integral inner Nuclear Envelope membrane proteins responsible for targeting nuclear membranes to chromatin, being a downstream effector of Ran, a small Ras-like nuclear GTPase which regulates NE assembly. Lamin-B receptor interacts with Importin beta, a Ran-binding protein, thereby directly contributing to the fusion of membrane vesicles and the formation of the NE .. +PF09466 Hypothetical protein Yqai
This hypothetical protein is expressed in bacteria, particularly Bacillus subtilis. It forms a homo-dimer, with each monomer containing an alpha helix and four beta strands.. +PF09467 Hypothetical protein Yopt
This hypothetical protein is expressed in bacteria, particularly Bacillus subtilis. It forms homo-dimers, with each monomer consisting of one alpha helix and three beta strands.. +PF09468 Ydr279p protein family (RNase H2 complex component)
RNases H are enzymes that specifically hydrolyse RNA when annealed to a complementary DNA and are present in all living organisms. In yeast RNase H2 is composed of a complex of three proteins (Rnh2Ap, Ydr279p and Ylr154p), this family represents the homologues of Ydr279p . It is not known whether non yeast proteins in this family fulfil the same function.. +PF09469 Cordon-bleu ubiquitin-like domain
The Cordon-bleu protein domain is highly conserved among vertebrates. The sequence contains three repeated lysine, arginine, and proline-rich regions, the KKRAP motif. The exact function of the protein is unknown but it is thought to be involved in mid-brain neural tube closure. It is expressed specifically in the node . This domain has a ubiquitin-like fold.. +PF09470 Telethonin protein
Telethonin is a 167-residue protein which complexes with the large muscle protein, titin. The very N-terminus of titin, composed of two immunoglobulin-like (Ig) domains, referred to as Z1 and Z2, interacts with the N-terminal region (residues 1-53) of telethonin, mediating the antiparallel assembly of two Z1Z2 domains. The C-terminus of the telethonin appears to induce dimerisation of this 2:1 titin/telethonin structure which thus forms a complex necessary for myofibril assembly and maintenance of the intact Z-disk of skeletal and cardiac muscles .. +PF09471 IgA Peptidase M64
This is a family of highly selective metallo-endopeptidases. The primary structure of the Clostridium ramosum IgA proteinase shows no significant overall similarity to any other known metallo-endopeptidase .. +PF09472 Tetrahydromethanopterin S-methyltransferase, F subunit (MtrF)
Many archaea have evolved energy-yielding pathways marked by one-carbon biochemistry featuring novel cofactors and enzymes. This domain is mostly found in MtrF, where it covers the entire length of the protein. This polypeptide is one of eight subunits of the N5-methyltetrahydromethanopterin: coenzyme M methyltransferase complex found in methanogenic archaea. This is a membrane-associated enzyme complex that uses methyl-transfer reactions to drive a sodium-ion pump. MtrF itself is involved in the transfer of the methyl group from N5-methyltetrahydromethanopterin to coenzyme M. Subsequently, methane is produced by two-electron reduction of the methyl moiety in methyl-coenzyme M by another enzyme, methyl-coenzyme M reductase. In some organisms this domain is found at the C terminal region of what appears to be a fusion of the MtrA and MtrF proteins. The function of these proteins is unknown, though it is likely that they are involved in C1 metabolism.. +PF09474 type_III_YscX;
Type III secretion system YscX (type_III_YscX). Members of this family are encoded within bacterial type III secretion gene clusters. Among all species with type III secretion, those with this protein are found among those that target animal rather than plant cells. The member of this family in Yersinia was shown by mutation to be required for type III secretion of Yops effector proteins and therefore is believed to be part of the secretion machinery.. +PF09475 dot_icm_IcmQ;
Dot/Icm secretion system protein (dot_icm_IcmQ). Proteins in this entry are the IcmQ component of Dot/Icm secretion systems, as found in the obligate intracellular pathogens Legionella pneumophila and Coxiella burnetii. While this system resembles type IV secretion systems and has been called a form of type IV, the literature now seems to favour calling this the Dot/Icm system. This protein was shown to be essential for translocation.. +PF09476 pilus_cpaD; Pilus_cpaD;
Pilus biogenesis CpaD protein (pilus_cpaD). Proteins in this entry consist of a pilus biogenesis protein, CpaD, from Caulobacter, and homologues in other bacteria, including three in the root nodule bacterium Bradyrhizobium japonicum. The molecular function of the homologues is not known.. +PF09477 type_III_yscG; Type_III_yscG;
Bacterial type II secretion system chaperone protein (type_III_yscG). YscG is a molecular chaperone for YscE, where both are part of the type III secretion system that in Yersinia is designated Ysc (Yersinia secretion). The secretion system delivers effector proteins, designated Yops (Yersinia outer proteins), in Yersinia. This entry consists of YscG from Yersinia and functionally equivalent type III secretion proteins in other species: e.g. AscG in Aeromonas and LscG in Photorhabdus luminescens.. +PF09478 Carbohydrate binding domain CBM49
Mistry J, Urbanowicz B. Pfam-B_6310 (release 21.0). This domain is found at the C terminal of cellulases and in vitro binding studies have shown it to binds to crystalline cellulose .. +PF09479 flg_new;
Listeria-Bacteroides repeat domain (List_Bact_rpt). This model describes a conserved core region of about 43 residues, which occurs in at least two families of tandem repeats. These include 78-residue repeats which occur from 2 to 15 times in some proteins of Bacteroides forsythus ATCC 43037, and 70-residue repeats found in families of internalins of Listeria species. Single copies are found in proteins of Fibrobacter succinogenes, Geobacter sulfurreducens, and a few other bacteria.. +PF09480 Type III secretion system protein PrgH-EprH (PrgH)
In Salmonella, the gene encoding this protein is part of a four-gene operon PrgHIJK, while in other organisms it is found in type III secretion operons. PrgH has been shown to be required for type III secretion and is a structural component of the needle complex, which is the core component of type III secretion systems.. +PF09481 CRISPR_cse1;
CRISPR-associated protein Cse1 (CRISPR_cse1). Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry, represented by CT1972 from Chlorobaculum tepidum, is found in the CRISPR/Cas subtype Ecoli regions of many bacteria (most of which are mesophiles), and not in Archaea. It is designated Cse1.. +PF09482 Bacterial type III secretion apparatus protein (OrgA_MxiK)
This protein is encoded by genes which are found in type III secretion operons, and has been shown to be essential for the invasion phenotype in Salmonella and a component of the secretion apparatus. The protein is known as OrgA in Salmonella due to its oxygen-dependent expression pattern in which low-oxygen levels up-regulate the gene. In Shigella the gene is called MxiK and has been shown to be essential for the proper assembly of the needle complex, which is the core component of type III secretion systems.. +PF09483 Type III secretion protein (HpaP)
This entry represents proteins encoded by genes which are always found in type III secretion operons, although their function in the processes of secretion and virulence is unclear. Hpa stands for Hrp-associated gene, where Hrp stands for hypersensitivity response and virulence. see also PMID:18584024. +PF09484 cas_TM1802;
CRISPR-associated protein TM1802 (cas_TM1802). Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This minor cas protein is found in at least five prokaryotic genomes: Methanosarcina mazei, Sulfurihydrogenibium azorense, Thermotoga maritima, Carboxydothermus hydrogenoformans, and Dictyoglomus thermophilum, the first of which is archaeal while the rest are bacterial.. +PF09485 CRISPR_cse2;
CRISPR-associated protein Cse2 (CRISPR_cse2). Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family of proteins, represented by CT1973 from Chlorobaculum tepidum, is encoded by genes found in the CRISPR/Cas subtype Ecoli regions of many bacteria (most of which are mesophiles), and not in Archaea. It is designated Cse2.. +PF09486 Bacterial type III secretion protein (HrpB7)
This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow range of species including Xanthomonas, Burkholderia and Ralstonia.. +PF09487 Bacterial type III secretion protein (HrpB2)
This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow group of species including Xanthomonas, Burkholderia and Ralstonia.. +PF09488 osmo_MPGsynth;
Mannosyl-3-phosphoglycerate synthase (osmo_MPGsynth). This family consists of examples of mannosyl-3-phosphoglycerate synthase (MPGS), which together with mannosyl-3-phosphoglycerate phosphatase (MPGP) EC:2.4.1.217, comprises a two-step pathway for mannosylglycerate biosynthesis. Mannosylglycerate is a compatible solute that tends to be restricted to extreme thermophiles of archaea and bacteria. Note that in Rhodothermus marinus, this pathway is one of two; the other is condensation of GDP-mannose with D-glycerate by mannosylglycerate synthase.. +PF09489 Probable cobalt transporter subunit (CbtB)
This entry represents a family of proteins which have been proposed to act as cobalt transporters acting in concert with vitamin B12 biosynthesis systems. Evidence for this assignment includes 1) prediction of a single transmembrane segment and a C-terminal histidine-rich motif likely to be a metal-binding site, 2) positional gene linkage with known B12 biosynthesis genes, 3) upstream proximity of B12 transcriptional regulatory sites, 4) the absence of other known cobalt import systems and 5) the obligate co-localisation with a protein (CbtA) predicted to have five additional transmembrane segments.. +PF09490 Probable cobalt transporter subunit (CbtA)
This entry represents a family of proteins which have been proposed to act as cobalt transporters acting in concert with vitamin B12 biosynthesis systems. Evidence for this assignment includes 1) prediction of five transmembrane segments, 2) positional gene linkage with known B12 biosynthesis genes, 3) upstream proximity of B12 transcriptional regulatory sites, 4) the absence of other known cobalt import systems and 5) the obligate co-localisation with a small protein (CbtB) having a single additional transmembrane segment and a C-terminal histidine-rich motif likely to be a metal-binding site.. +PF09491 AlwI restriction endonuclease
This family includes the AlwI (recognises GGATC), Bsp6I (recognises GC^NGC) , BstNBI (recognises GASTC), PleI(recognises GAGTC) and MlyI (recognises GAGTC) restriction endonucleases.. +PF09492 pec_lyase;
Members of this family are isozymes of pectate lyase (EC:4.2.2.2), also called polygalacturonic transeliminase and alpha-1,4-D-endopolygalacturonic acid lyase.. +PF09493 CHP02450_Tryp;
Tryptophan-rich protein (DUF2389). Members of this family are small hypothetical proteins of 60 to 100 residues from Cyanobacteria and some Proteobacteria. Prochlorococcus marinus strains have two members, other species one only. Interestingly, of the eight most conserved residues, four are aromatic and three are invariant tryptophans. It appears all species that encode this protein can synthesise tryptophan de novo.. +PF09494 Slx4 endonuclease
The Slx4 protein is a heteromeric structure-specific endonuclease found from fungi to mammals. Slx4 with Slx1 acts as a nuclease on branched DNA substrates, particularly simple-Y, 5'-flap, or replication fork structures by cleaving the strand bearing the 5' non-homologous arm at the branch junction and thus generating ligatable nicked products from 5'-flap or replication fork substrates .. +PF09495 UPF0390;
Protein of unknown function (DUF2462). This protein is highly conserved, but its function is unknown. It can be isolated from HeLa cell nucleoli and is found to be homologous with Leydig cell tumour protein whose function is unknown [1, supplementary Table I].. +PF09496 Cenp-O;
Cenp-O kinetochore centromere component. This eukaryotic protein is a component of the inner kinetochore subcomplex of the centromere. It has been shown to be involved in chromosome segregation via regulation of the spindle in both yeast and human .. +PF09497 Transcription mediator complex subunit Med12
Med12 is a negative regulator of the Gli3-dependent sonic hedgehog signalling pathway via its interaction with Gli3 within the RNA polymerase II transcriptional Mediator. A complex is formed between Med12, Med13, CDK8 and CycC which is responsible for suppression of transcription. This subunit forms part of the Kinase section of Mediator .. +PF09498 CHP02448;
Protein of unknown function (DUF2388). This family consists of small hypothetical proteins, about 100 amino acids in length. The family includes five members (three in tandem) in Pseudomonas aeruginosa PAO1 and in Pseudomonas putida (strain KT2440), four in Pseudomonas syringae DC3000, and single members in several other Proteobacteria. The function is unknown.. +PF09499 ApaLI-like restriction endonuclease
This family includes R.ApaLI and R.XbaI restriction endonucleases. ApaLI recognises and cleaves the sequence GTGCAC.. +PF09500 yiiD_Cterm;
Putative thioesterase (yiiD_Cterm). This entry consists of a broadly distributed uncharacterised domain often found as a standalone protein. The member from Shewanella oneidensis is described from crystallography work as a putative thioesterase because it belongs to the HotDog clan of enzymes. About half of the members of this family are fused to an Acetyltransf_1 domain Pfam:PF00583.. +PF09501 Bac_small_yrzI;
Probable sporulation protein (Bac_small_yrzI). Members of this family are very small proteins, about 47 residues each, in the genus Bacillus. Single members are found in Bacillus subtilis and Bacillus halodurans, while arrays of six members in tandem are found in Bacillus cereus and Bacillus anthracis. An EIxxE motif present in most members of this family resembles cleavage sites by the germination protease GPR in a number of small acid-soluble spore proteins (SASP). A role in sporulation is possible.. +PF09502 Bacterial type III secretion protein (HrpB4)
This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow range of species including Xanthomonas, Burkholderia and Ralstonia.. +PF09504 Bsp6I restriction endonuclease
This family includes the Bsp6I (recognises and cleaves GC^NGC) restriction endonucleases.. +PF09505 Dimethylamine methyltransferase (Dimeth_PyL)
This family consists of dimethylamine methyltransferases from the genus Methanosarcina. It is found in three nearly identical copies in each of Methanosarcina acetivorans, Methanosarcina barkeri, and Methanosarcina mazei. It is one of a suite of three non-homologous enzymes with a critical UAG-encoded pyrrolysine residue in these species (along with trimethylamine methyltransferase and monomethylamine methyltransferase). It demethylates dimethylamine, leaving monomethylamine, and methylates the prosthetic group of the small corrinoid protein MtbC. The methyl group is then transferred by methylcorrinoid:coenzyme M methyltransferase to coenzyme M. Note that the pyrrolysine residue is variously translated as K or X, or as a stop codon that truncates the sequence.. +PF09506 Glucosylglycerol-phosphate phosphatase (Salt_tol_Pase)
Proteins in this family are glucosylglycerol-phosphate phosphatases, with the gene symbol stpA (Salt Tolerance Protein A). A motif characteristic of acid phosphatases is found, but otherwise this family shows little sequence similarity to other phosphatases. This enzyme acts on the glucosylglycerol phosphate, product of glucosylglycerol phosphate synthase and immediate precursor of the osmoprotectant glucosylglycerol.. +PF09508 CHP02336;
Lacto-N-biose phosphorylase. The gene which codes for this protein in gut-bacteria is located in a novel putative operon for galactose metabolism. The protein appears to be a carbohydrate-processing phosphorolytic enzyme (EC:2.4.1.211), unlike either glycoside hydrolases or glycoside lyase. Intestinal colonisation by bifidobacteria is important for human health, especially in pediatrics, because colonisation seems to prevent infection by some pathogenic bacteria that cause diarrhoea or other illnesses. The operon seems to be involved in intestinal colonisation by bifidobacteria mediated by metabolism of mucin sugars. In addition, it may also resolve the question of the nature of the bifidus factor in human milk as the lacto-N-biose structure found in milk oligosaccharides.. +PF09507 DNA polymerase subunit Cdc27
This protein forms the C subunit of DNA polymerase delta. It carries the essential residues for binding to the Pol1 subunit of polymerase alpha, from residues 293-332, which are characterised by the motif D--G--VT, referred to as the DPIM motif. The first 160 residues of the protein form the minimal domain for binding to the B subunit, Cdc1, of polymerase delta, the final 10 C-terminal residues, 362-372, being the DNA sliding clamp, PCNA, binding motif.. +PF09509 Hypoth_ymh;
Protein of unknown function (Hypoth_ymh). This entry consists of a relatively rare prokaryotic protein family (about 8 occurrences per 200 genomes). Genes for members of this family appear to be associated variously with phage and plasmid regions, restriction system loci, transposons, and housekeeping genes. Their function is unknown.. +PF09510 Rtt102p-like transcription regulator protein
This protein is found in fungi. The family includes Rtt102p, a transcription regulator protein which appears to be integrally associated with both the Swi-Snf and the RSC chromatin remodelling complexes, .. +PF09511 RNA ligase
TIGRFAMs, Coggill P, Mistry J, Wood V. TIGRFAMs & Pfam-B_49998 (release 17.0). Members of this family include T4 phage proteins with ATP-dependent RNA ligase activity. Host defence to phage may include cleavage and inactivation of specific tRNA molecules; members of this family act to reverse this RNA damage. The enzyme is adenylated, transiently, on a Lys residue in a motif KXDGSL. This family also includes fungal tRNA ligases that have adenylyltransferase activity . tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns.. +PF09512 Thiamine-precursor transporter protein (ThiW)
Levels of thiamine pyrophosphate (TPP) or thiamine regulate transcription or translation of a number of thiamine biosynthesis, salvage, or transport genes in a wide range of prokaryotes. The mechanism involves direct binding, with no protein involved, to a structural element called THI found in the untranslated upstream region of thiamine metabolism gene operons. This element is called a riboswitch and is seen also for other metabolites such as FMN and glycine. This protein family consists of proteins identified in operons controlled by the THI riboswitch and designated ThiW. The hydrophobic nature of this protein and reconstructed metabolic background suggests that this protein acts in transport of a thiazole precursor of thiamine.. +PF09514 SSXRD motif
Pfam-B_23332 (Release 21.0). SSX1 can repress transcription, and this has been attributed to a putative Kruppel associated box (KRAB) repression domain at the N-terminus. However, from the analysis of these deletion constructs further repression activity was found at the C-terminus of SSX1. Which has been called the SSXRD (SSX Repression Domain). The potent repression exerted by full-length SSX1 appears to localise to this region .. +PF09515 Thiamine transporter protein (Thia_YuaJ)
Members of this protein family have been assigned as thiamine transporters by a phylogenetic analysis of families of genes regulated by the THI element, a broadly conserved RNA secondary structure element through which thiamine pyrophosphate (TPP) levels can regulate transcription of many genes related to thiamine transport, salvage, and de novo biosynthesis. Species with this protein always lack the ThiBPQ ABC transporter. In some species (e.g. Streptococcus mutans and Streptococcus pyogenes), yuaJ is the only THI-regulated gene. Evidence from Bacillus cereus indicates thiamine uptake is coupled to proton translocation.. +PF09516 CfrBI restriction endonuclease
This family includes the CfrBI (recognises and cleaves C^CWWGG) restriction endonuclease.. +PF09517 Eco29kI restriction endonuclease
This family includes the Eco29kI (recognises and cleaves CCGC^GG ) restriction endonuclease.. +PF09518 HindIII restriction endonuclease
This family includes the HindIII (recognises and cleaves A^AGCTT) restriction endonuclease.. +PF09519 HindVP restriction endonuclease
This family includes the HindVP (recognises GRCGYC bu the cleavage site is unknown) restriction endonucleases.. +PF09520 RE_MjaII;
Type II restriction endonuclease, TdeIII. This family includes many TdeIII restriction endonucleases that recognise and cleave at GGNCC sites. TdeIII cleave unmethylated double-stranded DNA .. +PF09521 NgoPII restriction endonuclease
This family includes the NgoPII (recognises and cleaves GG^CC) restriction endonuclease.. +PF09522 R.Pab1 restriction endonuclease
+PF09523 CHP02444;
Protein of unknown function (DUF2390). Members of this family are bacterial hypothetical proteins, about 160 amino acids in length, found in various proteobacteria, including members of the genera Pseudomonas and Vibrio. The C-terminal region is poorly conserved and is not included in the model.. +PF09524 Conserved phage C-terminus (Phg_2220_C)
This entry represents the conserved C-terminal domain of a family of proteins found exclusively in bacteriophage and in bacterial prophage regions. The functions of this domain and the proteins containing it are unknown.. +PF09526 CHP02443;
Probable metal-binding protein (DUF2387). Members of this family are small proteins, about 70 residues in length, with a basic triplet near the N-terminus and a probable metal-binding motif CPXCX(18)CXXC. Members are found in various proteobacteria.. +PF09527 Putative F0F1-ATPase subunit (ATPase_gene1)
This model represents a protein found encoded in F1F0-ATPase operons in several genomes, including Methanosarcina barkeri (archaeal) and Chlorobium tepidum (bacterial). It is a small protein (about 100 amino acids) with long hydrophobic stretches and is presumed to be a subunit of the enzyme.. +PF09528 Ehrlichia tandem repeat (Ehrlichia_rpt)
This entry represents 77 residues of an 80 amino acid (240 nucleotide) tandem repeat, found in a variable number of copies in an immunodominant outer membrane protein of Ehrlichia chaffeensis, a tick-borne obligate intracellular pathogen.. +PF09529 intg_mem_TP0381;
Integral membrane protein (intg_mem_TP0381). This entry represents a family of hydrophobic proteins with seven predicted transmembrane alpha helices. Members are found in Bacillus subtilis (ywaF), TP0381 from Treponema pallidum (TP0381), Streptococcus pyogenes, Rhodococcus erythropolis, etc.. +PF09531 Nucleoporin protein Ndc1-Nup
Ndc1 is a nucleoporin protein that is a component of the Nuclear Pore Complex, and, in fungi, also of the Spindle Pole Body. It consists of six transmembrane segments, three lumenal loops, both concentrated at the N-terminus and cytoplasmic domains largely at the C-terminus, all of which are well conserved.. +PF09532 DFDF;
The FDF domain, so called because of the conserved FDF at its N termini, is an entirely alpha-helical domain with multiple exposed hydrophilic loops . It is found at the C terminus of Scd6p-like SM domains . It is also found with other divergent Sm domains and in proteins such as Dcp3p and FLJ21128, where it is found N terminal to the YjeF-N domain, a novel Rossmann fold domain .. +PF09533 CHP02269_MYXXA;
Predicted lipoprotein of unknown function (DUF2380). This family consists of at least 9 paralogs in Myxococcus xanthus, a member of the Deltaproteobacteria. One appears truncated toward the N-terminus; the others are predicted lipoproteins. The function is unknown.. +PF09534 Tryptophan-associated transmembrane protein (Trp_oprn_chp)
Members of this family are predicted transmembrane proteins with four membrane-spanning helices. Members are found in the Actinobacteria (Mycobacterium, Corynebacterium, Streptomyces), always associated with genes for tryptophan biosynthesis.. +PF09535 Protein of unknown function (Gmx_para_CXXCG)
This entry consists of at least 10 paralogous proteins from Myxococcus xanthus and that lack detectable sequence similarity to any other protein family. An imperfectly conserved CXXCG motif, a probable binding site, appears twice in the multiple sequence alignment.. +PF09536 Mxa_TIGR02265;
Protein of unknown function (DUF2378). This family consists of a set of at least 17 paralogous proteins in Myxococcus xanthus DK 1622 and and 12 in Stigmatella aurantiaca DW4/3-1. Members are about 200 amino acids in length. The function is unknown.. +PF09537 CHP2284;
Domain of unknown function (DUF2383). Members of this protein family are found mostly in the Proteobacteria, although one member is found in the the marine planctomycete Pirellula sp. strain 1. The function is unknown.. +PF09538 Protein of unknown function (FYDLN_acid)
Members of this family are bacterial proteins with a conserved motif [KR]FYDLN, sometimes flanked by a pair of CXXC motifs, followed by a long region of low complexity sequence in which roughly half the residues are Asp and Glu, including multiple runs of five or more acidic residues. The function of members of this family is unknown.. +PF09539 CHP02301;
Protein of unknown function (DUF2385). Members of this uncharacterised protein family are found in a number of alphaproteobacteria, including root nodule bacteria, Brucella suis, Caulobacter crescentus, and Rhodopseudomonas palustris. Conserved residues include two well-separated cysteines, suggesting a disulfide bond. The function is unknown.. +PF09543 CHP02267_MYXXA;
Protein of unknown function (DUF2379). This family consists of at least 7 paralogs in Myxococcus xanthus and 6 in Stigmatella aurantiaca, both members of the Deltaproteobacteria. The function is unknown.. +PF09544 Mxa_TIGR02268;
Protein of unknown function (DUF2381). This family consists of at least 8 paralogs in Myxococcus xanthus, a member of the Deltaproteobacteria. The function is unknown.. +PF09545 AccI restriction endonuclease
This family includes the AccI (recognises and cleaves GT^MKAC) restriction endonuclease.. +PF09546 spore_III_AE;
Stage III sporulation protein AE (spore_III_AE). This represents the stage III sporulation protein AE, which is encoded in a spore formation operon spoIIIAABCDEFGH under the control of sigma G. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species.. +PF09547 spore_IV_A;
Stage IV sporulation protein A (spore_IV_A). SpoIVA is designated stage IV sporulation protein A. It acts in the mother cell compartment and plays a role in spore coat morphogenesis. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species.. +PF09548 spore_III_AB;
Stage III sporulation protein AB (spore_III_AB). SpoIIIAB represents the stage III sporulation protein AB, which is encoded in a spore formation operon: spoIIIAABCDEFGH that is under sigma G regulation . A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species.. +PF09549 Bpu10I restriction endonuclease
This family includes the Bpu10I (recognises and cleaves CCTNAGC (-5/-2)) restriction endonucleases.. +PF09550 CHP2216_phage;
Conserved hypothetical phage protein (DUF2376). This entry describes a family of proteins found exclusively in phage or in prophage regions of bacterial genomes, including the phage-like Rhodobacter capsulatus gene transfer agent, which packages DNA.. +PF09551 spore_II_R;
Stage II sporulation protein R (spore_II_R). SpoIIR is designated stage II sporulation protein R. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. SpoIIR is a signalling protein that links the activation of sigma E to the transcriptional activity of sigma F during sporulation.. +PF09552 BstXI restriction endonuclease
This family includes the BstXI (recognises and cleaves CCANNNNN^NTGG) restriction endonuclease.. +PF09553 Eco47II restriction endonuclease
This family includes the Eco47II (which recognises GGNCC, but the cleavage site unknown) restriction endonuclease.. +PF09554 HaeII restriction endonuclease
This family includes the HaeII (recognises and cleaves RGCGC^Y) restriction endonuclease.. +PF09556 HaeIII restriction endonuclease
This family includes the HaeIII (recognises and cleaves GG^CC) restriction endonuclease.. +PF09557 CHP2271_C;
Domain of unknown function (DUF2382). This entry describes an uncharacterized domain, sometimes found in association with a PRC-barrel domain Pfam:PF05239 which is also found in rRNA processing protein RimM and in a photosynthetic reaction centre complex protein). This domain is found in proteins from Bacillus subtilis, Deinococcus radiodurans, Nostoc sp. PCC 7120, Myxococcus xanthus, and several other species. The function is not known.. +PF09558 CHP02922;
Protein of unknown function (DUF2375). TIGRFAMs, Coggilll P. Two members of this family are found in Colwellia psychrerythraea (strain 34H / ATCC BAA-681) and one each in various other species of Colwellia and Shewanella. One member from C. psychrerythraea is of special interest because it is preceded by the same cis-regulatory site as a number of genes that have the PEP-CTERM domain described by PEP_anchor (IPR013424).. +PF09559 Cas6 Crispr
The Cas6 Crispr family of proteins averaging 140 residues are characterised by having a GhGxxxxxGhG motif, where h indicates a hydrophobic residue, at the C-terminus . The CRISPR-Cas system is possibly a mechanism of defence against invading pathogens and plasmids that functions analogously to the RNA interference (RNAi) systems in eukaryotes .. +PF09560 Spo_YunB; Spo_yunB;
Sporulation protein YunB (Spo_YunB). Spo_YunB is the sporulation protein YunB. In Bacillus subtilis its expression is controlled by sigmaE.The gene YunB seems to code for a protein involved, at least indirectly, in the pathway leading to the activation of sigmaK. Inactivation of YunB delays sigmaK activation and results in reduced sporulation efficiency.. +PF09561 HpaII restriction endonuclease
This family includes the HpaII (recognises and cleaves C^CGG) restriction endonuclease.. +PF09562 LlaMI restriction endonuclease
This family includes the LlaMI (recognises and cleaves CC^NGG) restriction endonuclease.. +PF09563 LlaJI restriction endonuclease
This family includes the LlaJI (recognises GACGC) restriction endonucleases.. +PF09564 NgoBV restriction endonuclease
This family includes the NgoBV (recognises GGNNCC but cleavage site is unknown) restriction endonuclease.. +PF09565 NgoFVII restriction endonuclease
This family includes the NgoFVII (recognises GCSGC but cleavage site unknown) restriction endonuclease.. +PF09566 SacI restriction endonuclease
This family includes the SacI (recognises and cleaves GAGCT^C) restriction endonuclease.. +PF09567 MamI restriction endonuclease
This family includes the MamI (recognises and cleaves GATNN^NNATC) restriction endonuclease.. +PF09568 MjaI restriction endonuclease
This family includes the MjaI (recognises CTAG but cleavage site unknown) restriction endonuclease.. +PF09569 ScaI restriction endonuclease
This family includes the ScaI (recognises and cleaves AGT^ACT) restriction endonuclease.. +PF09570 SinI restriction endonuclease
This family includes the SinI (recognises and cleaves G^GWCC) restriction endonuclease.. +PF09571 XcyI restriction endonuclease
This family includes the XcyI (recognises and cleaves C^CCGGG) restriction endonucleases.. +PF09572 XamI restriction endonuclease
This family includes the XamI (recognises GTCGAC but cleavage site unknown) restriction endonuclease.. +PF09573 TaqI restriction endonuclease
This family includes the TaqI (recognises and cleaves T^CGA) restriction endonuclease.. +PF09574 Short_TIGR02808;
Protein of unknown function (Duf2374). This very small protein (about 46 amino acids) consists largely of a single predicted membrane-spanning region. It is found in Photobacterium profundum SS9 and in three species of Vibrio, always near periplasmic nitrate reductase genes, but far from the periplasmic nitrate reductase genes in Aeromonas hydrophila ATCC 7966.. +PF09575 Spore_SspJ;
Small spore protein J (Spore_SspJ). Spore_SspJ represents a group of small acid-soluble proteins (SASP) from Bacillus sp., which are present in spores but not in growing cells. The sspJ gene is transcribed in the forespore compartment by RNA polymerase with the forespore-specific sigmaG. Loss of SspJ causes a slight decrease in the rate of spore outgrowth in an otherwise wild-type background.. +PF09577 Sporulation protein YpjB (SpoYpjB)
These proteins are found in the endospore-forming bacteria which include Bacillus species. In Bacillus subtilis, ypjB was found to be part of the sigma-E regulon. Sigma-E is a sporulation sigma factor that regulates expression in the mother cell compartment. Null mutants of ypjB show a sporulation defect, but this gene is not, however, a part of the endospore formation minimal gene set.. +PF09578 Spore cortex protein YabQ (Spore_YabQ)
This protein is predicted to span the membrane several times. It is only found in genomes of species that perform sporulation, such as Bacillus subtilis, Clostridium tetani, and other members of the Firmicutes (low-GC Gram-positive bacteria). Mutation of this sigmaE-dependent gene blocks development of the spore cortex. The length of the C-terminal region, which includes some hydrophobic regions, is variable.. +PF09579 Sporulation protein YtfJ (Spore_YtfJ)
Proteins in this family are encoded by bacterial genomes if, and only if, the species is capable of endospore formation. YtfJ was confirmed in spores of B. subtilis; it appears to be expressed in the forespore under control of SigF.. +PF09580 Sporulation lipoprotein YhcN/YlaJ (Spore_YhcN_YlaJ)
This entry contains YhcN and YlaJ, which are predicted lipoproteins that have been detected as spore proteins but not vegetative proteins in Bacillus subtilis. Both appear to be expressed under control of the RNA polymerase sigma-G factor. The YlaJ-like members of this family have a low-complexity, strongly acidic, 40-residue C-terminal domain.. +PF09581 Stage III sporulation protein AF (Spore_III_AF)
This family represents the stage III sporulation protein AF (Spore_III_AF) of the bacterial endospore formation program, which exists in some but not all members of the Firmicutes (formerly called low-GC Gram-positives). The C-terminal region of these proteins is poorly conserved.. +PF09582 Iron only nitrogenase protein AnfO (AnfO_nitrog)
Proteins in this entry include Anf1 from Rhodobacter capsulatus (Rhodopseudomonas capsulata) and AnfO from Azotobacter vinelandii. They are found exclusively in species which contain the iron-only nitrogenase, and are encoded immediately downstream of the structural genes for the nitrogenase enzyme in these species.. +PF09583 Phage shock protein G (Phageshock_PspG)
This protein was previously designated as YjbO in Escherichia coli. It is found only in genomes that have the phage shock operon (psp), but it is only rarely encoded near other psp genes. The psp regulon is upregulated in response to a number of stress conditions, including ethanol, expression of the filamentous phage secretin protein IV and other secretins and heat shock.. +PF09584 Phage shock protein PspD (Phageshock_PspD)
Members of this family are phage shock protein PspD, found in a minority of bacteria that carry the defining genes of the phage shock regulon (pspA, pspB, pspC, and pspF). It is found in Escherichia coli, Yersinia pestis, and closely related species, where it is part of the phage shock operon. It is known to be expressed but its function is unknown.. +PF09585 Conserved hypothetical protein (Lin0512_fam)
This family consists of few members, broadly distributed. It occurs so far in several Firmicutes (twice in Oceanobacillus), one Cyanobacterium, one alpha Proteobacterium, and (with a long prefix) in plants. The function is unknown. The alignment includes a well conserved motif GxGxDxHG near the N-terminus.. +PF09586 Bacterial membrane protein YfhO
Pfam-B_2727 (release 21.0). This protein is a conserved membrane protein . The yfhO gene is transcribed in Difco sporulation medium and the transcription is affected by the YvrGHb two-component system. Some members of this family have been annotated as glycosyl transferases of the PMT family.. +PF09587 Bacterial capsule synthesis protein PGA_cap
Pfam-B_1441 (release 21.0). This protein is a putative poly-gamma-glutamate capsule biosynthesis protein found in bacteria. Poly-gamma-glutamate is a natural polymer that may be involved in virulence and may help bacteria survive in high salt concentrations. It is a surface-associated protein .. +PF09588 YqaJ-like viral recombinase domain
Pfam-B_3587 (release 21.0). This protein family is found in many different bacterial species but is of viral origin. The protein forms an oligomer and functions as a processive alkaline exonuclease that digests linear double-stranded DNA in a Mg(2+)-dependent reaction, It has a preference for 5'-phosphorylated DNA ends. It thus forms part of the two-component SynExo viral recombinase functional unit .. +PF09589 HrpA pilus formation protein
Pfam-B_3574 (release 21.0). HrpA is an essential component of the type III secretion system (TTSS) which pathogens use to inject virulence factors directly into their host cells, and to cause disease. The TTSS has an Hrp pilus appendage for channelling effector proteins through the plant cell wall and this pilus elongates by the addition of HrpA pilin subunits at the distal end .. +PF09590 Lentivirus surface glycoprotein
Pfam-B_3269 (release 21.0). This protein is found in feline immunodeficiency retrovirus. It represents the surface glycoprotein which is found in the polyprotein C-terminal to the Env protein.. +PF09591 UPF0328;
Protein of unknown function (DUF2463). Pfam-B_3132 (release 21.0). This protein is found in eukaryotic, parasitic microsporidia. Its function is unknown.. +PF09592 Protein of unknown function (DUF2031)
Pfam-B_2630 (release 21.0). This protein is expressed in Plasmodium; its function is unknown. It may be the product of gene family pyst-b .. +PF09593 Phage_C1;
Beta-satellite pathogenicity beta C1 protein. Pfam-B_991 (release 21.0). Cotton leaf-curl disease - CLCuD - is of major economic importance in cotton-growing areas of the far-east. The infectious agent appears to be a single-stranded DNA molecule of approx 1350 nucleotides in length, which, when inoculated with the Begomovirus into cotton, induces symptoms typical of CLCuD. This molecule requires the Begomovirus for replication and encapsidation . DNA beta encodes a single protein, betaC1. The intracellular distribution of betaC1 is consistent with the hypothesis that it has a role in transporting the DNA A of Begomovirus from the nuclear site of replication to the plasmodesmatal exit sites of the infected cell. The DNA beta-encoded protein, betaC1, is the determinant of both pathogenicity and suppression of gene silencing .. +PF09594 Protein of unknown function (DUF2029)
Pfam-B_1780 (release 21.0). This is a putative transmembrane protein from bacteria. It is likely to be conserved between Mycobacterium species .. +PF09595 Metaviral_G glycoprotein
Pfam-B_922 (release 21.0). This is a viral attachment glycoprotein from region G of metaviruses. It is high in serine and threonine suggesting it is highly glycosylated .. +PF09596 MamL-1 domain
The MamL-1 domain is a polypeptide of up to 70 residues, numbers 15-67 of which adopt an elongated kinked helix that wraps around ANK and CSL forming one of the complexes in the build-up of the Notch transcriptional complex for recruiting general transcription factors.. +PF09597 IGR protein motif
This domain is found in fungal proteins and contains a conserved IGR motif. Its function is unknown.. +PF09598 Stm1
Pfam-B_39435 (release 21.0). This region is found at the N terminal of the Stm1 protein. Stm1 is a G4 quadraplex and purine motif triplex nucleic acid-binding protein. It has been implicated in many biological processes including apoptosis and telomere biosynthesis. Stm1 is known to interact with CDC13 , and is known to associate with ribosomes and nuclear telomere cap complexes .. +PF09599 Salmonella-Shigella invasin protein C (IpaC_SipC)
This entry represents a family of proteins associated with bacterial type III secretion systems, which are injection machines for virulence factors into host cell cytoplasm. Characterized members of this protein family are known to be secreted and are described as invasins, including IpaC from Shigella flexneri and SipC from Salmonella typhimurium. Members may be referred to as invasins, pathogenicity island effectors, and cell invasion proteins.. +PF09600 Cyd operon protein YbgE (Cyd_oper_YbgE)
This entry describes a small protein of unknown function, about 100 amino acids in length, essentially always found in an operon with CydAB, subunits of the cytochrome d terminal oxidase. It appears to be an integral membrane protein. It is found so far only in the Proteobacteria.. +PF09601 Chp_urease_rgn;
Protein of unknown function (DUF2459). This conserved hypothetical protein of unknown function is found in several Proteobacteria. Its function is unknown and its genome context is not well-conserved. It is found amid urease genes in at least one species.. +PF09602 Polyhydroxyalkanoic acid inclusion protein (PhaP_Bmeg)
This entry describes a protein found in polyhydroxyalkanoic acid (PHA) gene regions and incorporated into PHA inclusions in Bacillus cereus and Bacillus megaterium. The role of the protein may include amino acid storage.. +PF09603 Fibrobacter succinogenes major domain (Fib_succ_major)
This domain of about 175 to 200 amino acids is found, in from one to five copies, in over 50 proteins in Fibrobacter succinogenes S85, an obligate anaerobe of the rumen. Many members of this family have an apparent lipoprotein signal sequence. Conserved cysteine residues, suggestive of disulfide bond formation, are also consistent with an extracytoplasmic location for this domain. This domain can also be found in small numbers of proteins in Chlorobium tepidum and Bacteroides thetaiotaomicron.. +PF09604 F subunit of K+-transporting ATPase (Potass_KdpF)
This entry describes a very small integral membrane peptide KdpF, a subunit of the K(+)-translocating Kdp complex. It is found upstream of the KdpA subunit (IPR004623). Because of its very small size and highly hydrophobic character, it is sometimes missed in genome annotation.. +PF09605 Hypothetical bacterial integral membrane protein (Trep_Strep)
This family consists of strongly hydrophobic proteins about 190 amino acids in length with a strongly basic motif near the C-terminus. It is found in rather few species, but in paralogous families of 12 members in the oral pathogenic spirochaete Treponema denticola and 2 in Streptococcus pneumoniae R6.. +PF09606 ARC105; ARC105_Med_act;
ARC105 or Med15 subunit of Mediator complex non-fungal. The approx. 70 residue Med15 domain of the ARC-Mediator co-activator is a three-helix bundle with marked similarity to the KIX domain. The sterol regulatory element binding protein (SREBP) family of transcription activators use the ARC105 subunit to activate target genes in the regulation of cholesterol and fatty acid homeostasis. In addition, Med15 is a critical transducer of gene activation signals that control early metazoan development .. +PF09607 Brinker DNA-binding domain
This DNA-binding domain is the first approx. 100 residues of the N-terminal end of Brinker. The structure of this domain in complex with DNA consists of four alpha-helices that contain a helix-turn-helix DNA recognition motif specific for GC-rich DNA. The Brinker nuclear repressor is a major element of the Drosophila Decapentaplegic morphogen signalling pathway .. +PF09608 Putative transmembrane protein (Alph_Pro_TM)
This family consists of predicted transmembrane proteins of about 270 amino acids. Members are found, so far, only among the Alphaproteobacteria and only once in each genome.. +PF09609 CRISPR-associated protein, GSU0054 family (Cas_GSU0054)
This entry represents a rare CRISPR-associated protein. So far, members are found in Geobacter sulfurreducens and in two unpublished genomes: Gemmata obscuriglobus and Actinomyces naeslundii. CRISPR-associated proteins typically are found near CRISPR repeats and other CRISPR-associated proteins, have low levels of sequence identify, have sequence relationships that suggest lateral transfer, and show some sequence similarity to DNA-active proteins such as helicases and repair proteins.. +PF09610 Mycoplasma virulence signal region (Myco_arth_vir_N)
This entry represents the N-terminal region of a family of large, virulence-associated proteins in Mycoplasma arthritidis and smaller proteins in Mycoplasma capricolum. It includes a probable signal sequence or signal anchor, which, in most instances, has four consecutive Lys residues before the hydrophobic stretch.. +PF09611 CRISPR-associated protein (Cas_Csy1)
CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2465 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy1, for CRISPR/Cas Subtype Ypest protein 1.. +PF09612 Bacterial protein of unknown function (HtrL_YibB)
The protein from this rare, uncharacterized protein family is designated HtrL or YibB in E. coli, where its gene is found in a region of LPS core biosynthesis genes . Homologues are found in Shigella flexneri, Campylobacter jejuni, and Caenorhabditis elegans only. The htrL gene may represent an insertion to the LPS core biosynthesis region, rather than an LPS biosynthetic protein.. +PF09613 Bacterial type III secretion protein (HrpB1_HrpK)
This family of proteins is encoded by genes found within type III secretion operons in a limited range of species including Xanthomonas, Ralstonia and Burkholderia.. +PF09614 CRISPR-associated protein (Cas_Csy2)
CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2464 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy2, for CRISPR/Cas Subtype Ypest protein 2.. +PF09615 CRISPR-associated protein (Cas_Csy3)
CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2463 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy3, for CRISPR/Cas Subtype Ypest protein 3.. +PF09617 CRISPR-associated protein GSU0053 (Cas_GSU0053)
This entry is found in CRISPR-associated (cas) proteins in the genomes of Geobacter sulfurreducens PCA and Desulfotalea psychrophila LSv54 (both Desulfobacterales from the Deltaproteobacteria), Gemmata obscuriglobus (a Planctomycete), and Actinomyces naeslundii MG1 (Actinobacteria).. +PF09618 CRISPR-associated protein (Cas_Csy4)
CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This protein family, typified by YPO2462 of Yersinia pestis, is a CRISPR-associated (Cas) family strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy4, for CRISPR/Cas Subtype Ypest protein 4.. +PF09619 Type III secretion system lipoprotein chaperone (YscW)
This entry is encoded within type III secretion operons. The protein has been characterised as a chaperone for the outer membrane pore component YscC. YscW is a lipoprotein which is itself localized to the outer membrane and, it is believed, facilitates the oligomerisation and localisation of YscC.. +PF09620 CRISPR-associated protein (Cas_csx3)
This entry is encoded in CRISPR-associated (cas) gene clusters, near CRISPR repeats, in the genomes of several different thermophiles: Archaeoglobus fulgidus (archaeal), Aquifex aeolicus (Aquificae), Dictyoglomus thermophilum (Dictyoglomi), and a thermophilic Synechococcus (Cyanobacteria). It is not yet assigned to a specific CRISPR/cas subtype (hence the x designation csx3).. +PF09621 Type III secretion system regulator (LcrR)
This family of proteins are encoded within type III secretion operons and have been characterised in Yersinia as a regulator of the Low-Calcium Response (LCR).. +PF09622 CHP02587;
Putative integral membrane protein (DUF2391). This entry is found in Nostoc sp. PCC 7120, Agrobacterium tumefaciens, Rhizobium meliloti, and Gloeobacter violaceus in a conserved two-gene neighbourhood. Proteins containing this entry appear to span the membrane seven times.. +PF09623 CRISPR-associated protein NE0113 (Cas_NE0113)
Members of this minor CRISPR-associated (Cas) protein family are encoded in cas gene clusters in Vibrio vulnificus YJ016, Nitrosomonas europaea ATCC 19718, Mannheimia succiniciproducens MBEL55E, and Verrucomicrobium spinosum.. +PF09624 CHP02588;
Protein of unknown function (DUF2393). The function of this protein is unknown. It is always found as part of a two-gene operon with IPR013416, a protein that appears to span the membrane seven times. It has so far been found in the bacteria Nostoc sp. PCC 7120, Agrobacterium tumefaciens, Rhizobium meliloti, and Gloeobacter violaceus.. +PF09625 VP9 protein
VP9 is a protein containing a ferredoxin fold. Two dimers come together to form one asymmetric unit which possesses a DNA recognition fold and specific metal binding sites possibly for zinc. It is postulated that being a non-structural protein VP9 is involved in the transcriptional regulation of the White spot syndrome virus, WSSV, from which it comes. WSSV is the major viral pathogen in shrimp aquaculture . VP9 is found N-terminal to the Pfam:PF07056 domain.. +PF09626 Dihaem cytochrome c
Dihaem cytochrome c (DHC) is a soluble c-type cytochrome that folds into two distinct domains, each binding a single haem group and connected by a small linker region. Despite little sequence similarity, the N-terminal domain (residues 12-75) is a class I type cytochrome c, that binds one of the haems, but the domain surrounding the other haem is structurally unique. DHC binds electrostatically to an oxygen-binding protein, sphaeroides haem protein (SHP), as a component of a conserved electron transfer pathway. DHC acts as the physiological electron donor for SHP during phototrophic growth . In certain species DHC is found upstream of Pfam:PF01292.. +PF09627 PrgU-like protein
This hypothetical protein of 125 residues is expressed in bacteria but is thought to be plasmid in origin. It forms a six beta-strand barrel with three accompanying alpha helices and is probably a homo-dimer in the cell. It may be involved in pheromone-inducible conjugation .. +PF09628 Yvfg;
Yvfg is a hypothetical protein of 71 residues expressed in some bacteria. The monomer consists of two parallel alpha helices, and the protein crystallises as a homo-dimer.. +PF09629 YorP protein
YorP is a 71 residue protein found in bacteria. As it is also found in a bacteriophage it might be of viral origin. The structure is of an alpha helix between two of five beta strands. The function is unknown.. +PF09630 Domain of unknown function (DUF2024)
This protein of 86 residues is expressed in bacteria. It consists of four alpha helices and two beta strands. Its function is unknown. One UniProt entry gives the gene name as Traf5.. +PF09631 Sen15 protein
The Sen15 subunit of the tRNA intron-splicing endonuclease is one of the two structural subunits of this hetero-tetrameric enzyme. Residues 36-157 of this subunit possess a novel homodimeric fold. Each monomer consists of three alpha-helices and a mixed antiparallel/parallel beta-sheet. Two monomers of Sen15 fold with two monomers of Sen34, one of the two catalytic subunits, to form an alpha2-beta2 tetramer as part of the functional endonuclease assembly.. +PF09632 Rac1-binding domain
The Rac1-binding domain is the C-terminal portion of YpkA from Yersinia. It is an all-helical molecule consisting of two distinct subdomains connected by a linker. the N-terminal end, residues 434-615, consists of six helices organised into two three-helix bundles packed against each other. This region is involved with binding to GTPases. The C-terminal end, residues 705-732. is a novel and elongated fold consisting of four helices clustered into two pairs, and this fold carries the helix implicated in actin activation. Rac1-binding domain mimics host guanidine nucleotide dissociation inhibitors (GDIs) of the Rho GTPases, thereby inhibiting nucleotide exchange in Rac1 and causing cytoskeletal disruption in the host . It is usually found downstream of Pfam:PF00069.. +PF09633 Protein of unknown function (DUF2023)
This protein of approx.120 residues consists of three beta strands and five alpha helices, thought to fold into a homo-dimer. It is expressed in bacteria.. +PF09634 Protein of unknown function (DUF2025)
This protein is produced from gene PA1123 in Pseudomonas. It contains three alpha helices and six beta strands and is thought to be monomeric. It appears to be present in the biofilm layer and may be a lipoprotein.. +PF09635 MetRS-N binding domain
The MetRS-N domain binds an Arc1-P domain in a tetrameric complex resembling a classical GST homo-dimer. Domain-swapping between symmetrically related MetRS-N and Arc1p-N domains generates a 2:2 tetramer held together by van der Waals forces. This domain is necessary for formation of the aminoacyl-tRNA synthetase complex necessary for tRNA nuclear export and shuttling as part of the translational apparatus. The domain is associated with Pfam:PF09334.. +PF09636 XkdW protein
This protein of approx. 100 residues contains two alpha helices and two beta strands and is probably monomeric. It is expressed in bacteria but is probably viral in origin. Its function is unknown.. +PF09637 Med18 protein
Med18 is one subunit of Mediator, a head-module multiprotein complex, that stimulates basal RNA polymerase II (Pol II) transcription. Med18 consists of an eight-stranded beta-barrel with a central pore and three flanking helices. It complexes with Med8 and Med20 proteins by forming a heterodimer of two-fold symmetry with Med20 and binding the C-terminal alpha-helix region of Med8 across the top of its barrel. This complex creates a multipartite TBP-binding site that can be modulated by transcriptional activators .. +PF09638 Ph1570 protein
This is a hypothetical protein from Pyroccous horikoshii of unknown function.\. It contains six alpha helices and eight beta strands and is thought to be monomeric.. +PF09639 YjcQ protein
YjcQ is a protein of approx. 100 residues containing four alpha helices and three beta strands. It is expressed in bacteria and also in viruses. It appears to be under the regulation of SigD RNA polymerase which is responsible for the expression of many genes encoding cell-surface proteins related to flagellar assembly, motility, chemotaxis and autolysis in the late exponential growth phase. The exact function of YjcQ is unknown . However, it is thought to be a prophage head protein in viruses .. +PF09640 Domain of unknown function (DUF2027)
This protein domain is of unknown function. though putatively involved in DNA mismatch repair. It is associated with Pfam:PF01713.. +PF09641 Protein of unknown function (DUF2026)
This protein of approx. 100 residues is found in bacteria. It contains up to five alpha helices and up to seven beta strands and is probably monomeric. Its function is unknown. It is cited as a major prophage head protein , so might generally be of viral origin.. +PF09642 YonK protein
YonK protein is expressed by the bacterial prophage SPbetaC . It is a 63 residue protein that associates into a homo-octamer in the form of a beta-stranded barrel with four outer helical features at points of the compass. Its function is unknown.. +PF09643 YopX protein
YopX is a protein that is largely helical, with three identical chains probably complexing into a twelve-chain structure.. +PF09644 Mg296 protein
This protein of 129 residues is expressed in bacteria. It consists of three identical chains of five alpha helices. Two copies of each chain associate into a complex of six units of possible biological significance but of unknown function.. +PF09645 F-112 protein
F-112 protein is of 70-110 residues and is found in viruses. Its winged-helix structure suggests a DNA-binding function.. +PF09646 Gp37 protein
This protein of 154 residues consists of a unit of helices and beta sheets that crystallises into a beautiful asymmetrical dodecameric barrel-structure, of two six-membered rings one on top of the other. It is expressed in bacteria but is of viral origin as it is found in phage BcepMu and is probably a pathogenesis factor .. +PF09648 YycH protein
Pfam-B_6483 (Release 21.0). This domain is exclusively found in YycI proteins in the low GC content Gram positive species. These two domains share the same structural fold with domains two and three of YycH Pfam:PF07435. Both, YycH and YycI are always found in pair on the chromosome, downstream of the essential histidine kinase YycG. Additionally, both proteins share a function in regulating the YycG kinase with which they appear to form a ternary complex. Lastly, the two proteins always contain an N-terminal transmembrane helix and are localized to the periplasmic space as shown by PhoA fusion studies.. +PF09649 Histone chaperone domain CHZ
This domain is highly conserved from yeasts to humans and is part of the chaperone protein HIRIP3 in vertebrates which interacts with the H3.3 chaperone HIRA, implicated in histone replacement during transcription. N- and C- termini of Chz family members are relatively divergent but do contain similar acidic stretches rich in Glu/Asp residues, characteristic of all histone chaperones .. +PF09650 Putative polyhydroxyalkanoic acid system protein (PHA_gran_rgn)
Proteins in this entry are encoded by genes involved in either polyhydroxyalkanoic acid (PHA) biosynthesis or utilisation, including proteins found at the surface of PHA granules. These proteins have so far been found in the Pseudomonadales, Xanthomonadales, and Vibrionales, all of which belong to the Gammaproteobacteria.. +PF09651 CRISPR-associated protein (Cas_APE2256)
This entry represents a conserved region of about 150 amino acids found in at least five archaeal and three bacterial species. These species all contain CRISPRs (Clustered Regularly Interspaced Short Palindromic Repeats). In six of eight species, the protein is encoded the vicinity of a CRISPR/Cas locus.. +PF09652 Putative CRISPR-associated protein (Cas_VVA1548)
This entry represents a conserved region of about 95 amino acids found exclusively in species with CRISPRs (Clustered Regularly Interspaced Short Palindromic Repeats). In all bacterial species that contain this entry, the genes encoding the proteins are in the midst of a cluster of cas (CRISPR-associated) genes.. +PF09654 CHP02652;
Protein of unknown function (DUF2396). These conserved hypothetical proteins have so far been found only in the Cyanobacteria. They are about 170 amino acids long and contain a CxxCx(14)CxxH motif near the N-terminus.. +PF09655 Conserved nitrate reductase-associated protein (Nitr_red_assoc)
Proteins in this entry are found in the Cyanobacteria, and are mostly encoded near nitrate reductase and molybdopterin biosynthesis genes. Molybdopterin guanine dinucleotide is a cofactor for nitrate reductase. These proteins are sometimes annotated as nitrate reductase-associated proteins, though their function is unknown.. +PF09656 CHP02611;
Putative transmembrane protein (PGPGW). Proteins in this entry are putative Actinobacterial proteins of about 150 amino acids in length, with three predicted transmembrane helices and an unusual motif with consensus sequence PGPGW.. +PF09657 CRISPR-associated protein Csx8 (Cas_Csx8)
Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes proteins of unknown function which are encoded in the midst of a cas gene operon.. +PF09658 CRISPR-associated protein (Cas_Csx9)
Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes archaeal proteins encoded in cas gene regions.. +PF09659 CRISPR-associated protein (Cas_Csm6)
Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins.. +PF09660 CHP02677;
Protein of unknown function (DUF2397). Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Betaproteobacteria).. +PF09661 CHP02678;
Protein of unknown function (DUF2398). Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Betaproteobacteria).. +PF09662 Phenylphosphate carboxylase gamma subunit (Phenyl_P_gamma)
Members of this protein family are the gamma subunit of phenylphosphate carboxylase. Phenol (methyl-benzene) is converted to phenylphosphate, then para-carboxylated by this four-subunit enzyme, with the release of phosphate, to 4-hydroxybenzoate. The enzyme contains neither biotin nor thiamin pyrophosphate. The gamma subunit has no known homologues.. +PF09663 Amidohydrolase ring-opening protein (Amido_AtzD_TrzD)
Members of this family are ring-opening amidohydrolases, including cyanuric acid amidohydrolase (EC:3.5.2.15) (AtzD and TrzD) and barbiturase. Note that barbiturase does not act as defined for EC:3.5.2.1 (barbiturate + water = malonate + urea) but rather catalyses the ring opening of barbiturase acid to ureidomalonic acid.. +PF09664 CHP02679;
Protein of unknown function C-terminus (DUF2399). Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Beta-proteobacteria). Just the C-terminal region is ioncluded here.. +PF09665 Type II restriction endonuclease (RE_Alw26IDE)
Members of this entry are type II restriction endonucleases of the Alw26I/Eco31I/Esp3I family. Characterised specificities of the three members are GGTCTC, CGTCTC and the shared subsequence GTCTC.. +PF09666 Sororin protein
Sororin is an essential, cell cycle-dependent mediator of sister chromatid cohesion . The protein is nuclear in interphase cells, dispersed from the chromatin in mitosis, and interacts with the cohesin complex .. +PF09667 Domain of unknown function (DUF2028)
This region of similarity is found in the vertebrate homologues of the drosophila Bobby Sox.. +PF09668 Aspartyl protease
Pfam-B_9589 (release 20.0). This family of eukaryotic aspartyl proteases have a fold similar to retroviral proteases which implies they function proteolytically during regulated protein turnover .. +PF09669 Phage regulatory protein Rha (Phage_pRha)
Members of this protein family are found in temperate phage and bacterial prophage regions. Members include the product of the rha gene of the lambdoid phage phi-80, a late operon gene. The presence of this gene interferes with infection of bacterial strains that lack integration host factor (IHF), which regulates the rha gene. It is suggested that Rha is a phage regulatory protein.. +PF09670 CRISPR-associated protein (Cas_Cas02710)
Members of this family are found, exclusively in the vicinity of CRISPR repeats and other CRISPR-associated (cas) genes, in Methanothermobacter thermautotrophicus (Methanobacterium thermoformicicum), Thermus thermophilus (Deinococcus-Thermus), Chloroflexus aurantiacus (Chloroflexi), and Thermomicrobium roseum (Thermomicrobia).. +PF09671 Spore coat protein (Spore_GerQ)
Members of this protein family are the spore coat protein GerQ of endospore-forming Firmicutes (low GC Gram-positive bacteria). This protein is cross-linked by a spore coat-associated transglutaminase.. +PF09673 Type-F conjugative transfer system pilin assembly protein
This entry represents TrbC, a protein that is an essential component of the F-type conjugative pilus assembly system for the transfer of plasmid DNA. The N-terminal portion of these proteins is heterogeneous.. +PF09674 CHP02757;
Protein of unknown function (DUF2400). Members of this uncharacterised protein family are found sporadically, so far only among spirochetes, epsilon and delta proteobacteria, and Bacteroides. The function is unknown and its gene neighbourhoods show little conservation.. +PF09675 Chlamydia-phage Chp2 scaffold (Chlamy_scaf)
Members of this entry are encoded by genes in chlamydia-phage such as Chp2. These viruses have around eight genes and obligately infect intracellular bacterial pathogens of the genus Chlamydia. This protein is annotated as VP3 or structural protein (as if a protein of mature viral particles), however, it is displaced from procapsids as DNA is packaged, and therefore is more correctly described as a scaffolding protein.. +PF09676 Type IV conjugative transfer system lipoprotein (TraV)
This entry includes TraV, which is a component of conjugative type IV secretion system. TraV is an outer membrane lipoprotein that is believed to interact with the secretin TraK. The alignment contains three conserved cysteines in the N-terminal half.. +PF09677 Type-F conjugative transfer system protein (TrbI_Ftype)
This entry represents TrbI, an essential component of the F-type conjugative transfer system for plasmid DNA transfer that has been shown to be localized to the periplasm.. +PF09678 Cytochrome c oxidase caa3 assembly factor (Caa3_CtaG)
Members of this family are the CtaG protein required for assembly of active cytochrome c oxidase of the caa3 type, as found in Bacillus subtilis.. +PF09679 Type-F conjugative transfer system pilin chaperone (TraQ)
This entry represents TraQ, a protein that makes a specific interaction with pilin (TraA) to aid its transfer through the inner membrane during the process of F-type conjugative pilus assembly.. +PF09680 Protein of unknown function (Tiny_TM_bacill)
This entry represents a family of hypothetical proteins, half of which are 40 residues or less in length. Members are found only in spore-forming species. A Gly-rich variable region is followed by a strongly conserved, highly hydrophobic region, predicted to form a transmembrane helix, ending with an invariant Gly. The consensus for this stretch is FALLVVFILLIIV.. +PF09681 N-terminal phage replisome organiser (Phage_rep_org_N)
This entry represents the N-terminal domain of a small family of phage proteins. The protein contains a region of low-complexity sequence that reflects DNA direct repeats able to function as an origin of phage replication. The region is N-terminal to the low-complexity region.. +PF09682 Phage holin protein (Holin_LLH)
This entry identifies a family of putative phage holin from a number of phage and prophage regions of Gram-positive bacteria. Like other holins, it is small (about 100 amino acids) with stretches of hydrophobic sequence and is encoded adjacent to lytic enzymes.. +PF09683 Bacteriocin (Lactococcin_972)
These sequences represent bacteriocins related to lactococcin. Members tend to be found in association with a seven transmembrane putative immunity protein.. +PF09684 Phage tail protein (Tail_P2_I)
These sequences represent the family of phage P2 protein I and related tail proteins from a number of temperate phage of Gram-negative bacteria.. +PF09685 Tic20-like protein
TIGRFAMs & Jackhmmer:D3PVW8. Chloroplast function requires the import of nuclear encoded proteins from the cytoplasm across the chloroplast double membrane. This is accomplished by two protein complexes, the Toc complex located at the outer membrane and the Tic complex located at the inner membrane. The Toc complex recognises specific proteins by a cleavable N-terminal sequence and is primarily responsible for translocation through the outer membrane, while the Tic complex translocates the protein through the inner membrane. This entry represents Tic20, a core member of the Tic complex. This protein is deeply embedded in the inner envelope membrane and is thought to function as a protein- conducting component of the Tic complex. This family also includes many proteins of unknown function from non-synthetic organisms.. +PF09686 Plasmid protein of unknown function (Plasmid_RAQPRD)
This entry identifies a family of proteins, which are about 100 amino acids in length, including a predicted signal sequence and a perfectly conserved motif RAQPRD towards the C terminus. Members are found in the Pseudomonas putida TOL plasmid pWW0 and in cryptic plasmid regions of Salmonella enterica subsp. enterica serovar Typhi and Pseudomonas syringae DC3000. The function of these proteins are unknown.. +PF09687 P_fal_TIGR01639; PHIST_a_b; PHIST_a_c; PRESAC; DUF3837; PRESAC;
Plasmodium RESA N-terminal. The short, four-helical domain first identified in the Plasmodium export proteins PHISTa and PHISTc has been extended to become this six-helical PRESAN domain identified in the P. falciparum-specific RESA-type (Ring-infected erythrocyte surface antigen) proteins in association with the DnaJ domain. Overall, at least 67 proteins have been detected in P. falciparum with complete copies of the PRESAN domain. No versions of this domain were detected in other apicomplexan genera, suggesting that the domain was 'invented' after the divergence of the lineage leading to the genus Plasmodium undergoing a dramatic proliferation only in P. falciparum. A secondary structure-prediction derived from the multiple alignment of the PRESAN family reveals that it is composed of an all-helical fold with six conserved helical segments. There is some evidence it might localise to membranes .. +PF09688 CHP1606_PLAF3D7;
Protein of unknown function (Wx5_PLAF3D7). This set of protein sequences represent a family of at least four proteins in Plasmodium falciparum (isolate 3D7). An interesting feature is five perfectly conserved Trp residues.. +PF09689 Plasmodium yoelii repeat (PY_rept_46)
This repeat is found in the products of only 2 genes in Plasmodium yoelii, in each of these proteins it is repeated 9 times. It is found in no other organism.. +PF09690 Plasmodium yoelii subtelomeric region (PYST-C1)
This group of sequences are defined by the N-terminal domain of a paralogous family of Plasmodium yoelii genes preferentially located in the subtelomeric regions of the chromosomes. There are no obvious homologues to these genes in any other organism. The C-terminal portions of the genes that contain this domain are divergent and some contain other yoelii-specific paralogous domains such as PYST-C2 (IPR006491).. +PF09691 Bacterial chaperone lipoprotein (PulS_OutS)
This family comprises lipoproteins from four gamma proteobacterial species: PulS protein of Klebsiella pneumoniae (P20440), the OutS protein of Erwinia chrysanthemi (Q01567) and Pectobacterium chrysanthemi, and the functionally uncharacterized E. coli protein EtpO. PulS and OutS have been shown to interact with and facilitate insertion of secretins into the outer membrane, suggesting a chaperone-like, or piloting function for members of this family.. +PF09692 Argonaute siRNA chaperone (ARC) complex subunit Arb1
Arb1 is required for histone H3 Lys9 (H3-K9) methylation, heterochromatin, assembly and siRNA generation in fission yeast .. +PF09693 Phage uncharacterised protein (Phage_XkdX)
This entry identifies a family of small (about 50 amino acid) phage proteins, found in at least 12 different phage and prophage regions of Gram-positive bacteria. In a number of these phage, the gene for this protein is found near the holin and endolysin genes.. +PF09694 Bacterial protein of unknown function (Gcw_chp)
This entry represents a conserved hypothetical protein about 240 residues in length found so far in Proteobacteria including Shewanella oneidensis and Ralstonia solanacearum, usually as part of a paralogous family. The function is unknown.. +PF09695 Bacterial protein of unknown function (YtfJ_HI0045)
These are sequences from gamma proteobacteria that are related to the E. coli protein, YtfJ.. +PF09696 Ctf8
Pfam-B_46199 (release 21.0). Ctf8 (chromosome transmissions fidelity 8) is a component of the Ctf18 RFC-like complex which is a DNA clamp loader involved in sister chromatid cohesion.. +PF09697 Protein of unknown function (Porph_ging)
This family of proteins of unknown function is found in Porphyromonas gingivalis (Bacteroides gingivalis).. +PF09698 Geobacter CxxxxCH...CXXCH motif (GSu_C4xC__C2xCH)
This motif occurs from three to eight times in eight different proteins of Geobacter sulfurreducens. The final CXXCH motif matches the cytochrome c family haem-binding site signature, suggesting that the sequence may be involved in haem-binding.. +PF09699 Doubled CXXCH motif (Paired_CXXCH_1)
This entry represents a domain of about 41 amino acids that contains, among other motifs, two copies of the motif CXXCH associated with haem binding. This domain is predicted to be a high molecular weight c-type cytochrome and is often found in multiple copies. Members are found mostly in species of Shewanella, Geobacter, and Vibrio.. +PF09700 CRISPR-associated protein (Cas_Cmr3)
CRISPR is a term for Clustered Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR associated) proteins. This highly divergent family, found in at least ten different archaeal and bacterial species, is represented by TM1793 from Thermotoga maritima.. +PF09701 CRISPR-associated protein (Cas_Cmr5)
CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family, represented by TM1791.1 of Thermotoga maritima, is found in both archaeal and bacterial species.. +PF09702 CRISPR-associated protein (Cas_Csa5)
CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry represents a minor family of Cas proteins found in various species of Sulfolobus and Pyrococcus (all archaeal). It is found with two different CRISPR loci in Sulfolobus solfataricus.. +PF09703 CRISPR-associated protein (Cas_Csa4)
CRISPR loci appear to be mobile elements with a wide host range. This entry represents a protein that tends to be found near CRISPR repeats. The species range for this species, so far, is exclusively archaeal. It is found so far in only four different species, and includes two tandem genes in Pyrococcus furiosus DSM 3638. CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins.. +PF09704 CRISPR-associated protein (Cas_Cas5)
CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This small Cas family is represented by CT1134 of Chlorobium tepidum.. +PF09706 CRISPR-associated protein (Cas_CXXC_CXXC)
CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes a conserved region of about 65 amino acids from an otherwise highly divergent protein found in a minority of CRISPR-associated protein regions. This region features two motifs of CXXC.. +PF09707 CRISPR-associated protein (Cas_Cas2CT1978)
This entry represents a minor branch of the Cas2 family of CRISPR-associated protein which are found in IPR003799. Cas proteins are found adjacent to a characteristic short, palindromic repeat cluster termed CRISPR, a probable mobile DNA element.. +PF09709 CRISPR-associated protein (Cas_Csd1)
CRISPR loci appear to be mobile elements with a wide host range. This entry represents proteins that tend to be found near CRISPR repeats. The species range, so far, is exclusively bacterial and mesophilic, although CRISPR loci are particularly common among the archaea and thermophilic bacteria. Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins.. +PF09710 Treponema clustered lipoprotein (Trep_dent_lipo)
This entry represents a family of six predicted lipoproteins from a region of about 20 tandemly arranged genes in the Treponema denticola genome. Two other neighbouring genes share the lipoprotein signal peptide region but do not show more extensive homology. The function of this locus is unknown.. +PF09711 CRISPR-associated protein (Cas_Csn2)
CRISPR loci appear to be mobile elements with a wide host range. This entry represents proteins found only in CRISPR-containing species, near other CRISPR-associated proteins (cas). The species range so far for these proteins is pathogenic bacteria only. Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats).. +PF09712 Poly(R)-hydroxyalkanoic acid synthase subunit (PHA_synth_III_E)
This entry represents the PhaE subunit of the heterodimeric class (class III) of polymerase for poly(R)-hydroxyalkanoic acids (PHAs), carbon and energy storage polymers of many bacteria. The most common PHA is polyhydroxybutyrate but about 150 different constituent hydroxyalkanoic acids (HAs) have been identified in various species.. +PF09713 Plant protein 1589 of unknown function (A_thal_3526)
This plant-specific family of proteins is defined by an uncharacterised region 57 residues in length. It is found toward the N terminus of most proteins that contain it. Examples include at least several proteins from Arabidopsis thaliana and Oryza sativa. The function of the proteins are unknown.. +PF09715 Plasmodium protein of unknown function (Plasmod_dom_1)
These sequences represent an uncharacterised family consisting of a small number of hypothetical proteins of the malaria parasite Plasmodium falciparum (isolate 3D7).. +PF09716 Malarial early transcribed membrane protein (ETRAMP)
These sequences represent a family of proteins from the malaria parasite Plasmodium falciparum, several of which have been shown to be expressed specifically in the ring stage as well as the rodent parasite Plasmodium yoelii. A homologue from Plasmodium chabaudi was localized to the parasitophorous vacuole membrane. Members have an initial hydrophobic, Phe/Tyr-rich, stretch long enough to span the membrane, a highly charged region rich in Lys, a second putative transmembrane region and a second highly charged, low complexity sequence region. Some members have up to 100 residues of additional C-terminal sequence. These genes have been shown to be found in the sub-telomeric regions of both Plasmodium falciparum and P. yoelii chromosomes.. +PF09717 Plasmodium falciparum domain of unknown function (CPW_WPC)
This group of sequences is defined by a domain of about 61 residues in length with six well-conserved cysteine residues and six well-conserved aromatic sites. The domain can be found in tandem repeats, and is known so far only in Plasmodium falciparum. It is named for motifs of CPxxW and (less well conserved) WPC. Its function is unknown.. +PF09718 Lambda phage tail tape-measure protein (Tape_meas_lam_C)
This represents a relatively well-conserved region near the C terminus of the tape measure protein of a lambda and related phage. The protein, which controls phage tail length, is typically about 1000 residues in length. Both low-complexity sequence and insertion/deletion events appear common in this family. Mutational studies suggest a ruler or template role in the determination of phage tail length. Similar behaviour is attributed to proteins from distantly related or unrelated families in other phage.. +PF09719 Putative redox-active protein (C_GCAxxG_C_C)
This entry represents a putative redox-active protein of about 140 residues, with four perfectly conserved Cys residues. It includes a CGAXXG motif. Most members are found within one or two loci of transporter or oxidoreductase genes. A member from Geobacter sulfurreducens, located in a molybdenum transporter operon, has a TAT (twin-arginine translocation) signal sequence for Sec-independent transport across the plasma membrane, a hallmark of bound prosthetic groups such as FeS clusters.. +PF09720 Stabl_TIGR02574;
Putative addiction module component. This entry defines several short bacterial proteins, typically about 75 amino acids long, which are always found as part of a pair (at least) of small genes. The other protein in the pair always belongs to a family of plasmid stabilisation proteins (IPR007712). It is likely that this protein and its partner comprise some form of addiction module - a pair of genes consisting of a stable toxin and an unstable antitoxin which mediate programmed cell death - although these gene-pairs are usually found on the bacterial main chromosome.. +PF09721 Transmembrane exosortase (Exosortase_EpsH)
Members of this family are designated exosortase, analogous to sortase in cell wall sorting mediated by LPXTG domains in Gram-positive bacteria. The phylogenetic distribution of the proteins in this entry is nearly perfectly correlated with the distribution of the proteins having the PEP-CTERM anchor motif, IPR013424. Members of this entry are integral membrane proteins with eight predicted transmembrane helices in common. Some members of this family have long trailing sequences past the region described by this model. This model does not include the region of the first predicted transmembrane region. The best characterised member is EpsH of Methylobacillus sp. 12S, where it is part of a locus associated with biosynthesis of the exopolysaccharide methanol-an.. +PF09722 CHP2293;
Protein of unknown function (DUF2384). Proteins in this family are found almost exclusively in the Proteobacteria, but also in Gloeobacter violaceus PCC 7421, a cyanobacterium. The function is unknown.. +PF09723 CxxC_CxxC_SSSS;
This entry represents a region of about 41 amino acids found in a number of small proteins in a wide range of bacteria. The region usually begins with the initiator Met and contains two CxxC motifs separated by 17 amino acids. One protein in this entry has been noted as a putative regulatory protein, designated FmdB. Most proteins in this entry have a C-terminal region containing highly degenerate sequence.. +PF09724 Uncharacterized conserved protein (DUF2036)
This family of proteins includes members ranging in size from approximately 300 to 460 residues. There are a number of well-conserved domains along the length.. +PF09725 Folate-sensitive fragile site protein Fra10Ac1
This entry represents the full-length proteins in which, in higher eukaryotes, the nested domain EDSLL lies. Fra10Ac1 is a highly conserved protein, of unknown function that is nuclear and highly expressed in brain .. +PF09726 Transmembrane protein
This entry is a highly conserved protein present in eukaryotes.. +PF09727 Cortactin-binding protein-2
This entry is the first approximately 250 residues of cortactin-binding protein 2. In addition to being a positional candidate for autism this protein is expressed at highest levels in the brain in humans. The human protein has six associated ankyrin repeat domains Pfam:PF00023 towards the C-terminus which act as protein-protein interaction domains .. +PF09728 Myosin-like coiled-coil protein
Taxilin contains an extraordinarily long coiled-coil domain in its C-terminal half and is ubiquitously expressed. It is a novel binding partner of several syntaxin family members and is possibly involved in Ca2+-dependent exocytosis in neuroendocrine cells . Gamma-taxilin, described as leucine zipper protein Factor Inhibiting ATF4-mediated Transcription (FIAT), localises to the nucleus in osteoblasts and dimerises with ATF4 to form inactive dimers, thus inhibiting ATF4-mediated transcription .. +PF09729 Gti1/Pac2 family
In S. pombe the gti1 protein promotes the onset of gluconate uptake upon glucose starvation . In S. pombe the Pac2 protein controls the onset of sexual development, by inhibiting the expression of ste11, in a pathway that is independent of the cAMP cascade .. +PF09730 Microtubule-associated protein Bicaudal-D
BicD proteins consist of three coiled-coiled domains and are involved in dynein-mediated minus end-directed transport from the Golgi apparatus to the endoplasmic reticulum (ER). For full functioning they bind with GSK-3beta Pfam:PF05350 to maintain the anchoring of microtubules to the centromere. It appears that amino-acid residues 437-617 of BicD and the kinase activity of GSK-3 are necessary for the formation of a complex between BicD and GSK-3beta in intact cells .. +PF09731 Mitochondrial inner membrane protein
Mitofilin controls mitochondrial cristae morphology. Mitofilin is enriched in the narrow space between the inner boundary and the outer membranes, where it forms a homotypic interaction and assembles into a large multimeric protein complex . The first 78 amino acids contain a typical amino-terminal-cleavable mitochondrial presequence rich in positive-charged and hydroxylated residues and a membrane anchor domain. In addition, it has three centrally located coiled coil domains .. +PF09732 Cactin;
Cactus-binding C-terminus of cactin protein. CactinC_cactus is the C-terminal 200 residues of the cactin protein which are necessary for the association of cactin with IkappaB-cactus as one of the intracellular members of the Rel complex. The Rel (NF-kappaB) pathway is conserved in invertebrates and vertebrates. In mammals, it controls the activities of the immune and inflammatory response genes as well as viral genes, and is critical for cell growth and survival. In Drosophila, the Rel pathway functions in the innate cellular and humoral immune response, in muscle development, and in the establishment of dorsal-ventral polarity in the early embryo . Most members of the family also have a Cactin_mid domain Pfam:PF10312 further upstream.. +PF09733 VEFS-Box of polycomb protein
The VEFS-Box (VRN2-EMF2-FIS2-Su(z)12) box is the C-terminal region of these proteins, characterised by an acidic cluster and a tryptophan/methionine-rich sequence, the acidic-W/M domain . Some of these sequences are associated with a zinc-finger domain about 100 residues towards the N-terminus. This protein is one of the polycomb cluster of proteins which control HOX gene transcription as it functions in heterochromatin-mediated repression .. +PF09734 RNA polymerase III transcription factor (TF)IIIC subunit
TFIIIC1 is a multisubunit DNA binding factor that serves as a dynamic platform for assembly of pre-initiation complexes on class III genes. This entry represents the tau 95 subunit which holds a key position in TFIIIC, exerting both upstream and downstream influence on the TFIIIC-DNA complex by rendering the complex more stable. Once bound to tDNA-intragenic promoter elements, TFIIIC directs the assembly of TFIIIB on the DNA, which in turn recruits the RNA polymerase III (pol III) and activates multiple rounds of transcription.. +PF09735 Membrane-associated apoptosis protein
Expression of this protein was found to be markedly reduced in patients with Alzheimer's disease . It is involved in the regulation of actin polymerisation in the brain as part of a WAVE2 signalling complex .. +PF09736 DUF2050;
Pre-mRNA-splicing factor of RES complex. This entry is characterised by proteins with alternating conserved and low-complexity regions. Bud13 together with Snu17p and a newly identified factor, Pml1p/Ylr016c, form a novel trimeric complex. called The RES complex, pre-mRNA retention and splicing complex. Subunits of this complex are not essential for viability of yeasts but they are required for efficient splicing in vitro and in vivo. Furthermore, inactivation of this complex causes pre-mRNA leakage from the nucleus. Bud13 contains a unique, phylogenetically conserved C-terminal region of unknown function .. +PF09737 De-etiolated protein 1 Det1
+PF09738 Double stranded RNA binding protein (DUF2051)
This is a novel protein identified as interacting with the leucine-rich repeat domain of human flightless-I, FliI protein.. +PF09739 DUF2044;
Mini-chromosome maintenance replisome factor. This entry is of proteins of approximately 600 residues in length containing alternating regions of conservation and low complexity. The Arabidopsis protein is a replisome factor found to bind with the mini-chromosome maintenance, MCM-binding, complex and is crucial for efficient DNA replication.. +PF09740 Uncharacterized conserved protein (DUF2043)
This is a 100 residue conserved region of a family of proteins found from fungi to humans. This region contains three conserved Cysteines and a motif of {CP}{y/l}{HG}.. +PF09741 Uncharacterized conserved protein (DUF2045)
This entry is the conserved 250 residues of proteins of approximately 450 amino acids. It contains several highly conserved motifs including a CVxLxxxD motif.The function is unknown.. +PF09742 Dyggve-Melchior-Clausen syndrome protein
Dymeclin (Dyggve-Melchior-Clausen syndrome protein) contains a large number of leucine and isoleucine residues and a total of 17 repeated dileucine motifs. It is characteristically about 700 residues long and present in plants and animals. Mutations in the gene coding for this protein in humans give rise to the disorder Dyggve-Melchior-Clausen syndrome (DMC, MIM 223800) which is an autosomal-recessive disorder characterised by the association of a spondylo-epi-metaphyseal dysplasia and mental retardation . DYM transcripts are widely expressed throughout human development and Dymeclin is not an integral membrane protein of the ER, but rather a peripheral membrane protein dynamically associated with the Golgi apparatus . . +PF09743 Uncharacterized conserved protein (DUF2042)
This entry is the conserved N-terminal 300 residues of a group of proteins found from protozoa to Humans. The function is unknown.. +PF09744 JNK_SAPK-associated protein-1
This is the N-terminal 200 residues of a set of proteins conserved from yeasts to humans. Most of the proteins in this entry have an RhoGEF Pfam:PF00621 domain at their C-terminal end.. +PF09745 Coiled-coil domain-containing protein 55 (DUF2040)
This entry is a conserved domain of approximately 130 residues of proteins conserved from fungi to humans. The proteins do contain a coiled-coil domain, but the function is unknown.. +PF09746 Tumour-associated protein
Membralin is evolutionarily highly conserved; though it seems to represent a unique protein family. The protein appears to contain several transmembrane regions. In humans it is expressed in certain cancers, particularly ovarian cancers . Membralin-like gene homologues have been identified in plants including grape, cotton and tomato .. +PF09747 Coiled-coil domain containing protein (DUF2052)
This entry is of sequences of two conserved domains separated by a region of low complexity, spanning some 200 residues. The function is unknown.. +PF09748 Transcription factor subunit Med10 of Mediator complex
Med10 is one of the protein subunits of the Mediator complex, tethered to Rgr1 protein. The Mediator complex is required for the transcription of most RNA polymerase II (Pol II)-transcribed genes. Med10 specifically mediates basal-level HIS4 transcription via Gcn4, and, additionally, there is a putative requirement for Med10 in Bas2-mediated transcription . Med10 is part of the middle region of Mediator .. +PF09749 Uncharacterised conserved protein
This entry is of proteins of approximately 300 residues conserved from plants to humans. It contains two conserved motifs, HxSL and FHVSL. The function is unknown.. +PF09750 DRY;
Alternative splicing regulator . This entry represents the conserved N-terminal region of SWAP (suppressor-of-white-apricot protein) proteins. This region contains two highly conserved motifs, viz: DRY and EERY, which appear to be the sites for alternative splicing of exons 2 and 3 of the SWAP mRNA . These proteins are thus thought to be involved in auto-regulation of pre-mRNA splicing. Most family members are associated with two Surp domains Pfam:PF01805 and an Arginine- serine-rich binding region towards the C-terminus.. +PF09751 Nuclear protein Es2
This entry is of a family of proteins of approximately 500 residues with alternating regions of low complexity and conservation where the domain similarities are strong. Apart from a predicted coiled-coil domain, no other known functional domains have been characterised. The protein appears to be expressed in the nucleus and particularly highly in the pons sub-region of the brain. The protein is clearly necessary for normal development of the nervous system .. +PF09752 Uncharacterized conserved protein (DUF2048)
The proteins in this family are conserved from plants to vertebrates. The function is unknown.. +PF09753 Membrane fusion protein Use1
This entry is of a family of proteins all approximately 300 residues in length. The proteins have a single C-terminal trans-membrane domain and a SNARE [soluble NSF (N-ethylmaleimide-sensitive fusion protein) attachment protein receptor] domain of approximately 60 residues. The SNARE domains are essential for membrane fusion and are conserved from yeasts to humans. Use1 is one of the three protein subunits that make up the SNARE complex and it is specifically required for Golgi-endoplasmic reticulum retrograde transport.. +PF09754 HCCA3;
This PAC2 (Proteasome assembly chaperone) family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 247 and 307 amino acids in length. These proteins function as a chaperone for the 26S proteasome. The 26S proteasome mediates ubiquitin-dependent proteolysis in eukaryotic cells. A number of studies including very recent ones have revealed that assembly of its 20S catalytic core particle is an ordered process that involves several conserved proteasome assembly chaperones (PACs). Two heterodimeric chaperones, PAC1-PAC2 and PAC3-PAC4, promote the assembly of rings composed of seven alpha subunits .. +PF09755 Uncharacterized conserved protein H4 (DUF2046)
This is the conserved N-terminal 350 residues of a family of proteins of unknown function possibly containing a coiled-coil domain.. +PF09756 DDRGK domain
This is a family of proteins of approximately 300 residues, found in plants and vertebrates. They contain a highly conserved DDRGK motif.. +PF09757 Arb2 domain
A second fission yeast Argonaute complex (Argonaute siRNA chaperone, ARC) that contains two previously uncharacterized proteins, Arb1 and Arb2, both of which are required for histone H3 Lys9 (H3-K9) methylation, heterochromatin assembly and siRNA generation . This family includes a region found in Arb2 and the Hda1 protein.. +PF09758 Uncharacterised conserved protein
This entry represents an N-terminal region of approximately 150 residues of a family of proteins of unknown function. It contains a highly conserved FPL motif.. +PF09759 Spinocerebellar ataxia type 10 protein domain
This is the conserved C-terminal 100 residues of Ataxin-10. Ataxin-10 belongs to the family of armadillo repeat proteins and in solution it tends to form homotrimeric complexes, which associate via a tip-to-tip association in a horseshoe-shaped contact with the concave sides of the molecules facing each other. This domain may represent the homo-association site since that is located near the C-terminus of Ataxin-10. The protein does not contain a signal sequence for secretion or any subcellular compartment confirming its cytoplasmic localisation, specifically to the olivocerebellar region .. +PF09762 Coiled-coil domain-containing protein (DUF2037)
This entry represents the conserved N-terminal 200 residues of a family of proteins conserved from plants to vertebrates. In Drosophila it comes from the Fidipidine gene, and is of unknown function.. +PF09763 Sec3;
Exocyst complex component Sec3. This entry is the conserved middle and C-terminus of the Sec3 protein. Sec3 binds to the C-terminal cytoplasmic domain of GLYT1 (glycine transporter protein 1). Sec3 is the exocyst component that is closest to the plasma membrane docking site and it serves as a spatial landmark in the plasma membrane for incoming secretory vesicles. Sec3 is recruited to the sites of polarised membrane growth through its interaction with Rho1p, a small GTP-binding protein.. +PF09764 WDYHV;
N-terminal glutamine amidase. This protein is conserved from plants to humans. It represents a family of N terminal glutamine amidases. The enzyme removes the NH2 group from a Gln, at the N-terminal, rendering it a Glu.. +PF09765 WD-repeat region
This entry is of a region of approximately 100 residues containing three WD repeats and six cysteine residues possibly as three cystine-bridges. These regions are contained within the Fancl protein in humans which is the putative E3 ubiquitin ligase subunit of the FA complex (Fanconi anaemia). Eight subunits of the Fanconi anaemia gene products form a multisubunit nuclear complex which is required for mono-ubiquitination of a downstream FA protein, FANCD2. The WD repeats are required for interaction with other subunits of the FA complex.. +PF09766 Fms-interacting protein
This entry carries part of the crucial 144 N-terminal residues of the FmiP protein, which is essential for the binding of the protein to the cytoplasmic domain of activated Fms-molecules in M-CSF induced haematopoietic differentiation of macrophages. The C-terminus contains a putative nuclear localisation sequence and a leucine zipper which suggest further, as yet unknown, nuclear functions. The level of FMIP expression might form a threshold that determines whether cells differentiate into macrophages or into granulocytes.. +PF09767 Predicted membrane protein (DUF2053)
This entry is of the conserved N-terminal 150 residues of proteins conserved from plants to humans. The function is unknown although some annotation suggests it to be a transmembrane protein.. +PF09768 Ku70-bp;
Peptidase M76 family. This is a family of metalloproteases. Proteins in this family are also annotated as Ku70-binding proteins.. +PF09769 Apolipoprotein O
Members of this family promote cholesterol efflux from macrophage cells. They are present in various lipoprotein complexes, including HDL, LDL and VLDL. The apoprotein is secreted by a microsomal triglyceride transfer protein (MTTP)-dependent mechanism, probably as a VLDL-associated protein that is subsequently transferred to HDL .. +PF09770 Topoisomerase II-associated protein PAT1
Members of this family are necessary for accurate chromosome transmission during cell division . . +PF09771 Transmemb_18; Tmem18A;
Transmembrane protein 188. The function of this family of transmembrane proteins has not, as yet, been determined.. +PF09772 Transmemb_26;
Transmembrane protein 26. The function of this family of transmembrane proteins has not, as yet, been determined.. +PF09773 Meckelin (Transmembrane protein 67)
Members of this family are thought to be related to the ciliary basal body. Defects result in Meckel syndrome type 3, [MIM:607361], an autosomal recessive disorder characterised by a combination of renal cysts and variably associated features including developmental anomalies of the central nervous system (typically encephalocele), hepatic ductal dysplasia and cysts, and polydactyly. Joubert syndrome type 6 [MIM:610688] is also a manifestation of certain mutations; it is an autosomal recessive congenital malformation of the cerebellar vermis and brainstem with abnormalities of axonal decussation (crossing in the brain) affecting the corticospinal tract and superior cerebellar peduncles. Individuals with Joubert syndrome have motor and behavioral abnormalities, including an inability to walk due to severe clumsiness and 'mirror' movements, and cognitive and behavioural disturbances .. +PF09774 Caffeine-induced death protein 2
Members of this family of proteins mediate the disruption of the DNA replication checkpoint (S-M checkpoint) mechanism caused by caffeine.. +PF09775 Keratinocyte-associated protein 2
Members of this family comprise various keratinocyte-associated proteins. Their exact function has not, as yet, been determined.. +PF09776 Mitochondrial ribosomal protein L55
Members of this family are involved in mitochondrial biogenesis and G2/M phase cell cycle progression. They form a component of the mitochondrial ribosome large subunit (39S) which comprises a 16S rRNA and about 50 distinct proteins.. +PF09777 Osteopetrosis-associated transmembrane protein 1 precursor
Members of this family of proteins are required for osteoclast and melanocyte maturation and function. Mutations give rise to autosomal recessive osteopetrosis [MIM:259700]; also called autosomal recessive Albers-Schonberg disease.. +PF09778 Guanylylate cyclase
Members of this family of proteins catalyse the conversion of guanosine triphosphate (GTP) to 3',5'-cyclic guanosine monophosphate (cGMP) and pyrophosphate.. +PF09779 DUF2349;
Ima1 N-terminal domain. This domain occurs at the N-terminus of the Schizosaccharomyces pombe inner nuclear membrane protein, Ima1. Ima1 interacts with other inner nuclear membrane proteins [1-2].. +PF09781 NADH:ubiquinone oxidoreductase, NDUFB5/SGDH subunit
Members of this family mediate the transfer of electrons from NADH to the respiratory chain. The immediate electron acceptor for the enzyme is believed to be ubiquinone, the reaction that occurs being: NADH + ubiquinone = NAD(+) + ubiquinol .. +PF09782 NADH:ubiquinone oxidoreductase, NDUFB6/B17 subunit
Members of this family mediate the transfer of electrons from NADH to the respiratory chain. The immediate electron acceptor for the enzyme is believed to be ubiquinone, the reaction that occurs being: NADH + ubiquinone = NAD(+) + ubiquinol .. +PF09783 Vacuolar import and degradation protein
Members of this family are involved in the negative regulation of gluconeogenesis. They are required for both proteosome-dependent and vacuolar catabolite degradation of fructose-1,6-bisphosphatase (FBPase), where they probably regulate FBPase targeting from the FBPase-containing vesicles to the vacuole .. +PF09784 Mitochondrial ribosomal protein L31
Pfam-B_24102 (release 21.0). This is a family of mitochondrial ribosomal proteins. L31 is essential for mitochondrial function in yeast .. +PF09785 Prp31 C terminal domain
Pfam-B_7665 (release 21.0). This is the C terminal domain of the pre-mRNA processing factor Prp31. Prp31 is required for U4/U6.U5 tri-snRNP formation . In humans this protein has been linked to autosomal dominant retinitis pigmentosa .. +PF09786 Cytochrome B561, N terminal
Members of this family are found in the N terminal region of cytochrome B561, as well as in various other putative uncharacterised proteins.. +PF09787 Golgin subfamily A member 5
Members of this family of proteins are involved in maintaining Golgi structure. They stimulate the formation of Golgi stacks and ribbons, and are involved in intra-Golgi retrograde transport. Two main interactions have been characterised: one with RAB1A that has been activated by GTP-binding and another with isoform CASP of CUTL1 .. +PF09788 Transmemb_55A; Tmem55A;
Transmembrane protein 55A. Members of this family catalyse the hydrolysis of the 4-position phosphate of phosphatidylinositol 4,5-bisphosphate, in the reaction: 1-phosphatidyl-myo-inositol 4,5-bisphosphate + H(2)O = 1-phosphatidyl-1D-myo-inositol 5-phosphate + phosphate.. +PF09789 Uncharacterized coiled-coil protein (DUF2353)
Members of this family of uncharacterised proteins have no known function.. +PF09790 Hyccin
Members of this family of proteins may have a role in the beta-catenin-Tcf/Lef signaling pathway, as well as in the process of myelination of the central and peripheral nervous system. Defects in Hyccin are the cause of hypomyelination with congenital cataracts [MIM:610532]. This disorder is characterised by congenital cataracts, progressive neurologic impairment, and diffuse myelin deficiency. Affected individuals experience progressive pyramidal and cerebellar dysfunction, muscle weakness and wasting prevailing in the lower limbs .. +PF09791 Oxidoreductase-like protein, N-terminal
Members of this family are found in the N terminal region of various oxidoreductase like proteins. Their exact function is, as yet, unknown.. +PF09792 DUF2295;
Ubiquitin 3 binding protein But2 C-terminal domain. Pfam-B_45554 (release 21.0). This family is of proteins conserved in yeasts. It binds to Uba3 and is involved in the NEDD8 signalling pathway . This family represents a presumed C-terminal domain.. +PF09793 Anticodon-binding domain
This domain of approximately 100 residues is conserved from plants to humans. It is frequently found in association with Lsm domain-containing proteins. It is an anticodon-binding domain of a prolyl-tRNA synthetase, whose PDB structure is available under the identifier 1h4q.. +PF09794 Transport protein Avl9
Pfam-B_12001 (release 21.0). Avl9 is a protein involved in exocytic transport from the Golgi. It has been speculated that Avl9 could play a role in deforming membranes for vesicle fission and/or in recruiting cargo .. +PF09795 Autophagy-related protein 31
Pfam-B_60001 (release 21.0). Autophagy is an intracellular degradation system that responds to nutrient starvation. Cis1/Atg31 has been shown to be required for autophagosome formation in Saccharomyces cerevisiae . It interacts with Atg17 .. +PF09796 Ubiquinol-cytochrome-c reductase complex subunit (QCR10)
The QCR10 family of proteins are a component of the ubiquinol-cytochrome c reductase complex (also known as complex III or cytochrome b-c1 complex). This complex is located on the inner mitochondrial membrane and it couples electron transfer from ubiquinol to cytochrome. This subunit (QCR10) is required for stable association of the iron-sulfur protein with the complex .. +PF09797 N-acetyltransferase B complex (NatB) non catalytic subunit
Pfam-B_12009 (release 21.0). This is the non-catalytic subunit of the N-terminal acetyltransferase B complex (NatB). The NatB complex catalyses the acetylation of the amino-terminal methionine residue of all proteins beginning with Met-Asp or Met-Glu and of some proteins beginning with Met-Asn or Met-Met. In Saccharomyces cerevisiae this subunit is called MDM20 and in Schizosaccharomyces pombe it is called Arm1. NatB acetylates the Tpm1 protein and regulates and tropomyocin-actin interactions. This subunit is required by the NatB complex for the N-terminal acetylation of Tpm1 .. +PF09798 DNA damage checkpoint protein
Pfam-B_41058 (release 21.0). This is a family of proteins which regulate checkpoint kinases. In Schizosaccharomyces pombe this protein is called Rad26 and in Saccharomyces cerevisiae it is called LCD1 .. +PF09799 Transmemb_17; Tmem17;
Predicted membrane protein. KOGs (KOG4694), (KOG4502). This is a 100 amino acid region of a family of proteins conserved from nematodes to humans. It is predicted to be a transmembrane region but its function is not known.. +PF09801 Integral membrane protein S linking to the trans Golgi network
Members of this family are integral membrane proteins involved in protein trafficking between the late Golgi and endosome. They may also serve as a receptor for ADP-ribosylation factor-related protein 1 (ARFRP1) . Sys1p is a small integral membrane protein with four predicted transmembrane domains that localises to the Trans Golgi network TGN in yeast and human cells .. +PF09802 Preprotein translocase subunit Sec66
Members of this family of proteins are a component of the heterotetrameric Sec62/63 complex composed of SEC62, SEC63, SEC66 and SEC72. The Sec62/63 complex associates with the Sec61 complex to form the Sec complex. Sec 66 is involved in SRP-independent post-translational translocation across the endoplasmic reticulum and functions together with the Sec61 complex and KAR2 in a channel-forming translocon complex. Furthermore, Sec66 is also required for growth at elevated temperatures .. +PF09803 Uncharacterized conserved protein (DUF2346)
Members of this family of proteins have no known function.. +PF09804 Uncharacterized conserved protein (DUF2347)
Members of this family of hypothetical proteins have no known function.. +PF09805 Nucleolar protein 12 (25kDa)
Members of this family of proteins are part of the yeast nuclear pore complex-associated pre-60S ribosomal subunit . The family functions as a highly conserved exonuclease that is required for the 5'-end maturation of 5.8S and 25S rRNAs, demonstrating that 5'-end processing also has a redundant pathway. Nop25 binds late pre-60S ribosomes, accompanying them from the nucleolus to the nuclear periphery; and there is evidence for both physical and functional links between late 60S subunit processing and export .. +PF09806 Cyclin-dependent kinase 2-associated protein
Members of this family of proteins are cell-growth suppressors, associating with and influencing the biological activities of important cell cycle regulators in the S phase including monomeric non-phosphorylated cyclin-dependent kinase 2 (CDK2) and DNA polymerase alpha/primase. An association between mutations in the gene coding for this protein and oral cancer has been described.. +PF09807 Uncharacterized conserved protein (DUF2348)
Members of this family of putative uncharacterized proteins have no known function.. +PF09808 Small nuclear RNA activating complex (SNAPc), subunit SNAP43
Members of this family are part of the SNAPc complex required for the transcription of both RNA polymerase II and III small-nuclear RNA genes. They bind to the proximal sequence element (PSE), a non-TATA-box basal promoter element common to these 2 types of genes. Furthermore, they also recruit TBP and BRF2 to the U6 snRNA TATA box.. +PF09809 MRP_L27;
Mitochondrial ribosomal protein L27. Members of this family of proteins are components of the mitochondrial ribosome large subunit. They are also involved in apoptosis and cell cycle regulation.. +PF09810 Morph_protein1;
Exonuclease V - a 5' deoxyribonuclease. Exonuclease V is a monomeric 5' deoxyribonuclease that is localised in the nucleus. It degrades single-stranded, but not double-stranded, DNA from the 5'-end, and the products are dinucleotides, except the 3'-terminal tri- and tetranucleotides, which are not degraded. The initial hydrolytic cut of exonuclease V on the dephosphorylated substrate produces a mixture of dinucleoside monophosphates and trinucleoside diphosphates. The enzyme is processive in action . Exo5 is specific for single-stranded DNA and does not hydrolyze RNA. However, Exo5 has the capacity to slide across 5' double-stranded DNA or 5' RNA sequences and resume cutting two nucleotides downstream of the double-stranded-to-single-stranded junction or RNA-to-DNA junction, respectively .. +PF09811 Essential protein Yae1, N terminal
Members of this family are found in the N terminal region of the essential protein Yae1. Their exact function has not, as yet, been determined. The family DUF1715, Pfam:PF08215 has now been merged into this family.. +PF09812 MRP_L28;
Mitochondrial ribosomal protein L28. Members of this family are components of the mitochondrial large ribosomal subunit. Mature mitochondrial ribosomes consist of a small (37S) and a large (54S) subunit. The 37S subunit contains at least 33 different proteins and 1 molecule of RNA (15S). The 54S subunit contains at least 45 different proteins and 1 molecule of RNA (21S) .. +PF09813 Coiled-coil domain-containing protein 56
Members of this family of proteins have no known function.. +PF09814 DUF2351;
HECT-like Ubiquitin-conjugating enzyme (E2)-binding. HECT_2 is a family of UbcH10-binding proteins.. +PF09815 XK-related protein
Members of this family comprise various XK-related proteins, that are involved in sodium-dependent transport of neutral amino acids or oligopeptides. These proteins are responsible for the Kx blood group system - defects results in McLeod syndrome [MIM:314850], an X-linked multi-system disorder characterised by late onset abnormalities in the neuromuscular and hematopoietic systems .. +PF09816 RNA polymerase II transcription elongation factor
Members of this family act as transcriptional transactivators of ELL and ELL2 elongation activities . Eaf proteins form a stable heterodimer complex with ELL proteins to facilitate the binding of RNA polymerase II to activate transcription elongation. The N-terminus of approx 120 residues is globular and highly conserved .. +PF09817 Uncharacterized conserved protein (DUF2352)
Members of this family of uncharacterised proteins have no known function.. +PF09818 Predicted ATPase of the ABC class
Members of this family include various bacterial predicted ABC class ATPases.. +PF09819 ABC-type cobalt transport system, permease component
Members of this family of prokaryotic proteins include various hypothetical proteins as well as ABC-type cobalt transport systems.. +PF09820 Predicted AAA-ATPase
Pfam-B_5377 (release 16.0). This family contains many hypothetical bacterial proteins. This family was previously the N-terminal part of the Pfam DUF1703 (Pfam:PF08011) family before it was split into two. This region is predicted to be an AAA-ATPase domain .. +PF09821 ABC_transp; AAA_36;
C-terminal AAA-associated domain. This had been thought to be an ATPase domain of ABC-transporter proteins. However, only one member has any trans-membrane regions. It is associated with an upstream ATP-binding cassette family, Pfam:PF00005.. +PF09822 ABC-type uncharacterized transport system
This domain is found in various eukaryotic and prokaryotic intra-flagellar transport proteins involved in gliding motility, as well as in several hypothetical proteins.. +PF09823 Domain of unknown function (DUF2357)
This entry was previously the N terminal portion of DUF524 (Pfam:PF04411) before it was split into two. This domain has no known function. It is predicted to adopt an all beta secondary structure pattern followed by mainly alpha-helical structures .. +PF09824 ArsR transcriptional regulator
Members of this family of archaeal proteins are conserved transcriptional regulators belonging to the ArsR family.. +PF09825 Biotin-protein ligase, N terminal
The function of this structural domain is unknown. It is found to the N terminus of the biotin protein ligase catalytic domain.. +PF09826 Beta propeller domain
Members of this family comprise secreted bacterial proteins containing C-terminal beta-propeller domain distantly related to WD-40 repeats.. +PF09827 CRISPR associated protein Cas2
Members of this family of bacterial proteins comprise various hypothetical proteins, as well as CRISPR (clustered regularly interspaced short palindromic repeats) associated proteins, conferring resistance to infection by certain bacteriophages.. +PF09828 Chromate resistance exported protein
Members of this family of bacterial proteins, are involved in the reduction of chromate accumulation and are essential for chromate resistance.. +PF09829 Uncharacterized protein conserved in bacteria (DUF2057)
This domain, found in various prokaryotic proteins, has no known function.. +PF09830 ATP adenylyltransferase
Members of this family of proteins catabolise Ap4N nucleotides (where N is A,C,G or U). Additionally they catalise the conversion of adenosine-5-phosphosulfate (AMPs) plus Pi to ADP plus sulphate, the exchange of NDP and phosphate and the synthesis of Ap4A from AMPs plus ATP .. +PF09831 Uncharacterized protein conserved in bacteria (DUF2058)
This domain, found in various prokaryotic proteins, has no known function.. +PF09832 Uncharacterized protein conserved in bacteria (DUF2059)
This domain, found in various prokaryotic proteins, has no known function.. +PF09834 Predicted membrane protein (DUF2061)
This domain, found in various prokaryotic proteins, has no known function.. +PF09835 Uncharacterized protein conserved in bacteria (DUF2062)
This domain, found in various prokaryotic proteins, has no known function.. +PF09836 Uncharacterized protein conserved in bacteria (DUF2063)
This domain, found in various prokaryotic proteins, has no known function.. +PF09837 Uncharacterized protein conserved in bacteria (DUF2064)
This family has structural similarity to proteins in the nucleotide-diphospho-sugar transferases superfamily. The similarity suggests that it is an enzyme with a sugar substrate.. +PF09838 Uncharacterized protein conserved in bacteria (DUF2065)
This domain, found in various prokaryotic proteins, has no known function.. +PF09839 Uncharacterized protein conserved in bacteria (DUF2066)
This domain, found in various prokaryotic proteins, has no known function.. +PF09840 Uncharacterized protein conserved in archaea (DUF2067)
This domain, found in various archaeal proteins, has no known function.. +PF09842 Predicted membrane protein (DUF2069)
This domain, found in various prokaryotes, has no known function.. +PF09843 Predicted membrane protein (DUF2070)
This domain of unknown function is found in various bacterial hypothetical proteins, as well as in prokaryotic polyketide synthase.. +PF09844 Uncharacterized conserved protein (COG2071)
This conserved protein (similar to YgjF), found in various prokaryotes, has no known function.. +PF09845 Zn-ribbon containing protein (DUF2072)
This archaeal protein has no known function.. +PF09846 Uncharacterized protein conserved in archaea (DUF2073)
This archaeal protein has no known function.. +PF09847 Predicted permease (DUF2074)
This domain, found in various archaeal hypothetical proteins, has no known function.. +PF09848 Uncharacterized conserved protein (DUF2075)
This domain, found in various prokaryotic proteins (including putative ATP/GTP binding proteins), has no known function.. +PF09849 Uncharacterized protein conserved in bacteria (DUF2076)
This domain, found in various hypothetical prokaryotic proteins, has no known function. The domain, however, is found in various periplasmic ligand-binding sensor proteins.. +PF09850 Uncharacterized protein conserved in bacteria (DUF2077)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09851 DUF2078;
Short C-terminal domain. +PF09852 Predicted membrane protein (DUF2079)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09853 Putative transposon-encoded protein (DUF2080)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09855 Nucleic-acid-binding protein containing Zn-ribbon domain (DUF2082)
This domain, found in various hypothetical prokaryotic proteins, as well as some Zn-ribbon nucleic-acid-binding proteins has no known function.. +PF09856 Predicted transcriptional regulator (DUF2083)
This domain is found in various prokaryotic transcriptional regulatory proteins belonging to the XRE family. Its exact function is, as yet, unknown.. +PF09857 Uncharacterized protein conserved in bacteria (DUF2084)
This domain, found in various hypothetical prokaryotic proteins,as well as proteins belonging to the UPF0386 family, has no known function.. +PF09858 Predicted membrane protein (DUF2085)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09859 DUF2086;
Oxygenase, catalysing oxidative methylation of damaged DNA. This family of bacterial sequences is predicted to catalyse oxidative de-methylation of damaged bases in DNA.. +PF09860 Uncharacterized protein conserved in bacteria (DUF2087)
This domain, found in various hypothetical prokaryotic proteins and transcriptional activators, has no known function. Structural modelling suggests this domain may bind nucleic acids .. +PF09861 Domain of unknown function (DUF2088)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09862 Protein of unknown function (DUF2089)
This domain, found in various hypothetical prokaryotic proteins, has no known function. This domain is a zinc-ribbon.. +PF09863 Uncharacterized protein conserved in bacteria (DUF2090)
This domain, found in various prokaryotic carbohydrate kinases, has no known function.. +PF09864 DUF2091;
Membrane-bound lysozyme-inhibitor of c-type lysozyme. Lysozymes are ancient and important components of the innate immune system of animals that hydrolyse peptidoglycan, the major bacterial cell wall polymer. Various mechanisms have evolved by which bacteria can evade this bactericidal enzyme, one being the production of lysozyme inhibitors. MliC (membrane bound lysozyme inhibitor of c-type lysozyme) of E. coli and Pseudomonas aeruginosa, possess lysozyme inhibitory activity and confer increased lysozyme tolerance upon expression in E. coli . Structural analyses show that the invariant loop of MliC plays a crucial role in the inhibition of the lysozyme by its insertion into the active site cleft of the lysozyme, where the loop forms hydrogen and ionic bonds with the catalytic residues .. +PF09865 Predicted periplasmic protein (DUF2092)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09866 Uncharacterized protein conserved in bacteria (DUF2093)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09867 Uncharacterized protein conserved in bacteria (DUF2094)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09868 Uncharacterized protein conserved in archaea (DUF2095)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09869 Uncharacterized protein conserved in archaea (DUF2096)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09870 Uncharacterized protein conserved in archaea (DUF2097)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09871 Uncharacterized protein conserved in archaea (DUF2098)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09872 Uncharacterized protein conserved in archaea (DUF2099)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09873 Uncharacterized protein conserved in archaea (DUF2100)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09874 Predicted membrane protein (DUF2101)
This domain, found in various archaeal and bacterial proteins, has no known function.. +PF09875 Uncharacterized protein conserved in archaea (DUF2102)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09876 Predicted metal-binding protein (DUF2103)
This domain, found in various putative metal binding prokaryotic proteins, has no known function.. +PF09877 Predicted membrane protein (DUF2104)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09878 Predicted membrane protein (DUF2105)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09879 Predicted membrane protein (DUF2106)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09880 Predicted membrane protein (DUF2107)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09881 Predicted membrane protein (DUF2108)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09882 Predicted membrane protein (DUF2109)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09883 Uncharacterized protein conserved in archaea (DUF2110)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09884 Uncharacterized protein conserved in archaea (DUF2111)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09885 Uncharacterized protein conserved in archaea (DUF2112)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09886 Uncharacterized protein conserved in archaea (DUF2113)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09887 Uncharacterized protein conserved in archaea (DUF2114)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09888 Uncharacterized protein conserved in archaea (DUF2115)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09889 Uncharacterized protein containing a Zn-ribbon (DUF2116)
This domain, found in various hypothetical archaeal proteins, has no known function. Structural modelling suggests this domain may bind nucleic acids .. +PF09890 Uncharacterized protein conserved in archaea (DUF2117)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09891 Uncharacterized protein conserved in archaea (DUF2118)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09892 Uncharacterized protein conserved in archaea (DUF2119)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09893 Uncharacterized protein conserved in archaea (DUF2120)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09894 Uncharacterized protein conserved in archaea (DUF2121)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09895 RecB-family nuclease (DUF2122)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09897 Uncharacterized protein conserved in archaea (DUF2124)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09898 Uncharacterized protein conserved in bacteria (DUF2125)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09899 Putative amidoligase enzyme (DUF2126)
Members of this family of bacterial domains are predominantly found in transglutaminase and transglutaminase-like proteins. Their exact function is, as yet, unknown, but they are likely to act as amidoligase enzymes Protein in this family are found in conserved gene neighborhoods encoding a glutamine amidotransferase-like thiol peptidase (in proteobacteria) or an Aig2 family cyclotransferase protein (in firmicutes) .. +PF09900 Predicted membrane protein (DUF2127)
COGs (COG4331&COG3305)). This domain, found in various hypothetical prokaryotic and archaeal proteins, has no known function.. +PF09902 Uncharacterized protein conserved in bacteria (DUF2129)
This domain, found in various hypothetical prokaryotic proteins, has no known function. Structural modelling suggests this domain may bind nucleic acids .. +PF09903 Uncharacterized protein conserved in bacteria (DUF2130)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09904 DUF2131;
Winged helix-turn helix. This family, found in various hypothetical prokaryotic proteins, is a probable winged helix DNA-binding domain.. +PF09905 Uncharacterized conserved protein (DUF2132)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09906 Uncharacterized protein conserved in bacteria (DUF2135)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09907 Uncharacterized protein conserved in bacteria (DUF2136)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09909 Uncharacterized protein conserved in bacteria (DUF2138)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09910 Uncharacterized protein conserved in archaea (DUF2139)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09911 Uncharacterized protein conserved in bacteria (DUF2140)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09912 Uncharacterized protein conserved in bacteria (DUF2141)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09913 Predicted membrane protein (DUF2142)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09916 Uncharacterized protein conserved in bacteria (DUF2145)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09917 Uncharacterized protein conserved in bacteria (DUF2147)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09918 Uncharacterized protein containing a ferredoxin domain (DUF2148)
This domain, found in various hypothetical bacterial proteins containing a ferredoxin domain, has no known function.. +PF09919 Uncharacterized conserved protein (DUF2149)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09920 Uncharacterized protein conserved in archaea (DUF2150)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09921 Uncharacterized protein conserved in archaea (DUF2153)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09922 Cell wall-active antibiotics response protein (DUF2154)
+PF09923 Uncharacterized protein conserved in bacteria (DUF2155)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09924 Uncharacterized conserved protein (DUF2156)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09925 Predicted membrane protein (DUF2157)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09926 Uncharacterized small protein (DUF2158)
Members of this family of prokaryotic proteins have no known function.. +PF09928 Predicted small integral membrane protein (DUF2160)
The members of this family of hypothetical prokaryotic proteins have no known function. It is thought that they are transmembrane proteins, but their function has not been inferred yet.. +PF09929 Uncharacterized conserved protein (DUF2161)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09930 Predicted transporter (DUF2162)
Members of this family of bacterial proteins are thought to be membrane transporters, but their exact function has not, as yet, been elucidated.. +PF09931 Uncharacterized conserved protein (DUF2163)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09932 Uncharacterized conserved protein (DUF2164)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09933 Predicted small integral membrane protein (DUF2165)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09935 Protein of unknown function (DUF2167)
This domain, found in various hypothetical membrane-anchored prokaryotic proteins, has no known function.. +PF09936 DUF2168;
SAM-dependent RNA methyltransferase. This family has a Rossmanoid fold, with a deep trefoil knot in its C-terminal region. It has structural similarity to RNA methyltransferases, and is likely to function as an S-adenosyl-L-methionine (SAM)-dependent RNA 2'-O methyltransferase .. +PF09937 Uncharacterized protein conserved in bacteria (DUF2169)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09938 Uncharacterized protein conserved in bacteria (DUF2170)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09939 Uncharacterized protein conserved in bacteria (DUF2171)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09940 Domain of unknown function (DUF2172)
This domain, found in various hypothetical prokaryotic proteins, has no known function. An aminopeptidase domain is conserved within the family, but its relevance has not been established yet.. +PF09941 Uncharacterized conserved protein (DUF2173)
This domain, found in various hypothetical prokaryotic proteins, has no known function.. +PF09943 Uncharacterized protein conserved in archaea (DUF2175)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09945 Predicted membrane protein (DUF2177)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09946 Predicted membrane protein (DUF2178)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09947 Uncharacterized protein conserved in archaea (DUF2180)
This domain, found in various hypothetical archaeal proteins, has no known function. A few of the family members contain a zinc finger domain.. +PF09948 Predicted metal-binding integral membrane protein (DUF2182)
This domain, found in various hypothetical bacterial membrane proteins having predicted metal-binding properties, has no known function.. +PF09949 Uncharacterized conserved protein (DUF2183)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09950 Uncharacterized protein conserved in bacteria (DUF2184)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09951 Protein of unknown function (DUF2185)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09952 Uncharacterized protein conserved in bacteria (DUF2186)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09953 Uncharacterized protein conserved in bacteria (DUF2187)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09954 Uncharacterized protein conserved in bacteria (DUF2188)
COGs (COG4876) & Jackhmmer:B5ZC26. This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09955 Predicted integral membrane protein (DUF2189)
Members of this family are found in various hypothetical prokaryotic proteins, as well as putative cytochrome c oxidases. Their exact function has not, as yet, been established.. +PF09956 Uncharacterized conserved protein (DUF2190)
This domain, found in various hypothetical prokaryotic proteins, as well as in some putative RecA/RadA recombinases, has no known function.. +PF09957 Uncharacterized protein conserved in bacteria (DUF2191)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09958 Uncharacterized protein conserved in archaea (DUF2192)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09959 Uncharacterized protein conserved in archaea (DUF2193)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09960 Uncharacterized protein conserved in bacteria (DUF2194)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09961 Uncharacterized protein conserved in bacteria (DUF2195)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09962 Uncharacterized conserved protein (DUF2196)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09963 Uncharacterized protein conserved in bacteria (DUF2197)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09964 Uncharacterized protein conserved in bacteria (DUF2198)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09965 Uncharacterized protein conserved in bacteria (DUF2199)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09966 Uncharacterized protein conserved in bacteria (DUF2200)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09967 VWA-like domain (DUF2201)
This domain, found in various hypothetical bacterial proteins, has no known function. However, it is clearly related to the VWA domain.. +PF09968 Uncharacterized protein domain (DUF2202)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09969 Uncharacterized conserved protein (DUF2203)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09970 Nucleotidyl transferase of unknown function (DUF2204)
This domain, found in various hypothetical archaeal proteins, has no known function. However, this family was identified as belonging to the nucleotidyltransferase superfamily .. +PF09971 Predicted membrane protein (DUF2206)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09972 Predicted membrane protein (DUF2207)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09973 Predicted membrane protein (DUF2208)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09974 Uncharacterized protein conserved in archaea (DUF2209)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09976 DUF2133;
Tetratricopeptide repeat. +PF09977 DUF2134;
Putative Tad-like Flp pilus-assembly. This domain, found in various hypothetical prokaryotic proteins, is likely to be involved in Flp lius biogenesis.. +PF09979 Uncharacterized protein conserved in bacteria (DUF2213)
Members of this family of bacterial proteins comprise various hypothetical and phage-related proteins. The exact function of these proteins has not, as yet, been determined.. +PF09980 Predicted membrane protein (DUF2214)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09981 Uncharacterized protein conserved in bacteria (DUF2218)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09982 Uncharacterized protein conserved in bacteria (DUF2219)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09983 Uncharacterized protein conserved in bacteria C-term(DUF2220)
This domain, found in various hypothetical bacterial proteins, has no known function. The family represents just the C-terminus.. +PF09984 Uncharacterized signal transduction histidine kinase domain (DUF2222)
Members of this family of domains are found in various BarA-like signal transduction histidine kinases, which are involved in the regulation of carbon metabolism via the csrA/csrB regulatory system. The role of this domain has not, as yet, been established.. +PF09985 Domain of unknown function (DUF2223)
Members of this family are found in various prokaryotic membrane-anchored proteins predicted to be involved in the regulation of amylopullulanase.. +PF09986 Uncharacterized protein conserved in bacteria (DUF2225)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09987 Uncharacterized protein conserved in archaea (DUF2226)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF09988 Uncharacterized metal-binding protein (DUF2227)
Members of this family of hypothetical bacterial proteins possess metal binding properties; however, their exact function has not, as yet, been determined.. +PF09989 CoA enzyme activase uncharacterised domain (DUF2229)
Members of this family include various bacterial hypothetical proteins, as well as CoA enzyme activases. The exact function of this domain has not, as yet, been defined.. +PF09990 Predicted membrane protein (DUF2231)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09991 Predicted membrane protein (DUF2232)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09992 Predicted periplasmic protein (DUF2233)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09994 Uncharacterized alpha/beta hydrolase domain (DUF2235)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09995 Uncharacterized protein conserved in bacteria (DUF2236)
This domain, found in various hypothetical bacterial proteins, has no known function. This family contains a highly conserved arginine and histidine that may be active site residues for an as yet unknown catalytic activity.. +PF09996 Uncharacterized protein conserved in bacteria (DUF2237)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09997 Predicted membrane protein (DUF2238)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09998 Uncharacterized protein conserved in bacteria (DUF2239)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF09999 Uncharacterized protein conserved in archaea (DUF2240)
This domain, found in various hypothetical archaeal proteins, has no known function.. +PF10000 DUF2241;
This domain, found in various hypothetical bacterial proteins, has no known function. However, its structure is similar to the ACT domain which suggests that it binds to amino acids and regulates other protein activity. This family was formerly known as DUF2241.. +PF10001 Uncharacterized protein conserved in bacteria (DUF2242)
This domain is found in various hypothetical bacterial proteins, and has no known function.. +PF10002 Predicted membrane protein (DUF2243)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10003 Integral membrane protein (DUF2244)
This domain, found in various bacterial hypothetical and putative membrane proteins, has no known function.. +PF10004 Uncharacterized protein conserved in bacteria (DUF2247)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10005 Uncharacterized protein conserved in bacteria (DUF2248)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10006 Uncharacterized conserved protein (DUF2249)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10007 Uncharacterized protein conserved in archaea (DUF2250)
Members of this family of hypothetical archaeal proteins have no known function.. +PF10008 Uncharacterized protein conserved in bacteria (DUF2251)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10009 Uncharacterized protein conserved in bacteria (DUF2252)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10011 Predicted membrane protein (DUF2254)
Members of this family of bacterial proteins comprises various hypothetical and putative membrane proteins. Their exact function, has not, as yet, been defined.. +PF10012 Uncharacterized protein conserved in bacteria (DUF2255)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10013 Uncharacterized protein conserved in bacteria (DUF2256)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10014 DUF2257; BsmA;
This family contains 2-oxoglutarate (2OG) and Fe-dependent dioxygenases. It includes L-isoleucine dioxygenase (IDO) .. +PF10015 Uncharacterized protein conserved in archaea (DUF2258)
Members of this family of hypothetical bacterial archaeal have no known function. Structural modelling suggests this domain may bind nucleic acids .. +PF10016 Predicted secreted protein (DUF2259)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10017 DUF2260;
Histidine-specific methyltransferase, SAM-dependent. The mycobacterial members of this family are expressed from part of the ergothioneine biosynthetic gene cluster. EGTD is the histidine methyltransferase that transfers three methyl groups to the alpha-amino moiety of histidine, in the first stage of the production of this histidine betaine derivative that carries a thiol group attached to the C2 atom of an imidazole ring .. +PF10018 VDRIP;
Vitamin-D-receptor interacting Mediator subunit 4. Members of this family function as part of the Mediator (Med) complex, which links DNA-bound transcriptional regulators and the general transcription machinery, particularly the RNA polymerase II enzyme. They play a role in basal transcription by mediating activation or repression according to the specific complement of transcriptional regulators bound to the promoter .. +PF10020 Uncharacterized protein conserved in bacteria (DUF2262)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10021 Uncharacterized protein conserved in bacteria (DUF2263)
This domain, found in various hypothetical bacterial and eukaryotic proteins, has no known function.. +PF10022 Uncharacterized protein conserved in bacteria (DUF2264)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10023 Predicted aminopeptidase (DUF2265)
Members of this family of bacterial proteins comprise various hypothetical proteins and putative aminopeptidases. Their exact function, has not, as yet, been defined.. +PF10025 Uncharacterized conserved protein (DUF2267)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10026 Predicted Zn-dependent protease (DUF2268)
This domain, found in various hypothetical bacterial proteins, as well as predicted zinc dependent proteases, has no known function.. +PF10027 Predicted integral membrane protein (DUF2269)
Members of this family of bacterial hypothetical integral membrane proteins have no known function.. +PF10028 Predicted integral membrane protein (DUF2270)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10029 Predicted periplasmic protein (DUF2271)
This domain, found in various hypothetical bacterial proteins and misannotated lysozyme proteins, it has no known function.. +PF10030 Uncharacterized protein conserved in bacteria (DUF2272)
Members of this family of hypothetical bacterial proteins have no known function. However, given its similarity to the CHAP domain it seems likely that this is an enzyme involved in cleaving peptidoglycan.. +PF10031 Small integral membrane protein (DUF2273)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10032 Phosphate transport (Pho88)
Members of this family of proteins are involved in regulating inorganic phosphate transport, as well as telomere length regulation and maintenance .. +PF10033 Autophagy-related protein 13
Members of this family of phosphoproteins are involved in cytoplasm to vacuole transport (Cvt), and more specifically in Cvt vesicle formation. They are probably involved in the switching machinery regulating the conversion between the Cvt pathway and autophagy. Finally, ATG13 is also required for glycogen storage .. +PF10034 DUF2211;
Q-cell neuroblast polarisation. Dyp-19, formerly known as DUF2211, is a transmembrane domain family that is required to orient the neuroblast cells, QR and QL accurately on the anterior-posterior axis: QL and QR are born in the same anterior-posterior position, but polarise and migrate left-right asymmetrically, QL migrating towards the posterior and QR migrating towards the anterior. It is also required, with unc-40, to express mab-5 correctly in the Q cell descendants . The Dpy-19 protein derives from the C. elegans DUMPY mutant, Swiss:P34413.. +PF10035 Uncharacterized protein conserved in bacteria (DUF2179)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10036 Putative carnitine deficiency-associated protein
This family of proteins conserved from nematodes to humans is of approximately 250 amino acids. It is purported to be carnitine deficiency-associated protein but this could not be confirmed. It carries a characteristic RLL sequence-motif. The function is unknown.. +PF10037 MRP_S27;
Mitochondrial 28S ribosomal protein S27. Members of this family of small ribosomal proteins possess one of three conserved blocks of sequence found in proteins that stimulate the dissociation of guanine nucleotides from G-proteins, leaving open the possibility that MRP-S27 might be a functional partner of GTP-binding ribosomal proteins .. +PF10038 Protein of unknown function (DUF2274)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10039 Predicted integral membrane protein (DUF2275)
This domain, found in various hypothetical bacterial proteins and in the RNA polymerase sigma factor, has no known function.. +PF10040 Uncharacterized conserved protein (DUF2276)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10041 Uncharacterized conserved protein (DUF2277)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10042 Uncharacterized conserved protein (DUF2278)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10043 Predicted periplasmic lipoprotein (DUF2279)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10044 Retinal tissue protein
Rtp is a family of proteins of approximately 112 amino acids in length which is conserved from nematodes to humans. The proposed tertiary structure is of almost entirely alpha helix interrupted only by loops located at proline residues. Three sites in the protein sequence reveal two types of possible post-translation modification. A serine residue, at position 41, is a candidate for protein kinase C phosphorylation. Glycine residues at position 69 and 91 are probable sites for acetylation by covalent amide linkage of myristate via N-myristoyl transferase. Rtp is differentially expressed in the trout retina between parr and smolt developmental stages (smoltification). It is likely to be a house-keeping protein .. +PF10045 Uncharacterized conserved protein (DUF2280)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10046 Biogenesis of lysosome-related organelles complex-1 subunit 2
Members of this family of proteins play a role in cellular proliferation, as well as in the biogenesis of specialized organelles of the endosomal-lysosomal system.. +PF10047 Protein of unknown function (DUF2281)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10048 Predicted integral membrane protein (DUF2282)
Members of this family of hypothetical bacterial proteins and putative signal peptide proteins have no known function.. +PF10049 Protein of unknown function (DUF2283)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10050 Predicted metal-binding protein (DUF2284)
Members of this family of metal-binding hypothetical bacterial proteins have no known function.. +PF10051 Uncharacterized protein conserved in archaea (DUF2286)
Members of this family of hypothetical archaeal proteins have no known function.. +PF10052 Protein of unknown function (DUF2288)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10053 Uncharacterized conserved protein (DUF2290)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10054 Predicted periplasmic lipoprotein (DUF2291)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10055 Uncharacterized small protein (DUF2292)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10056 Uncharacterized conserved protein (DUF2293)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10057 Uncharacterized conserved protein (DUF2294)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10058 Predicted integral membrane metal-binding protein (DUF2296)
This domain, found in various hypothetical bacterial and eukaryotic metal-binding proteins, has no known function.. +PF10060 Uncharacterized membrane protein (DUF2298)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10061 Uncharacterized conserved protein (DUF2299)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10062 Predicted secreted protein (DUF2300)
This domain, found in various bacterial hypothetical and putative signal peptide proteins, has no known function.. +PF10063 Uncharacterized integral membrane protein (DUF2301)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10065 Uncharacterized conserved protein (DUF2303)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10066 Uncharacterized conserved protein (DUF2304)
Members of this family of hypothetical archaeal proteins have no known function.. +PF10067 Predicted membrane protein (DUF2306)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10069 DUF2308;
Sensory domain found in DIguanylate Cyclases & Two-component systems. DICT is a sensory domain found associated with GGDEF, EAL, HD-GYP STAS, and two component systems . It assumes an alpha+beta fold with a 4-stranded beta-sheet and might have a role in light response .. +PF10070 Uncharacterized protein conserved in bacteria (DUF2309)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10071 Zn-ribbon-containing, possibly nucleic-acid-binding protein (DUF2310)
Members of this family of proteobacterial zinc ribbon proteins are thought to bind to nucleic acids, however their exact function has not as yet been defined.. +PF10073 Uncharacterized protein conserved in bacteria (DUF2312)
Members of this family of hypothetical bacterial proteins have no known function. Structural modelling suggests this domain may bind nucleic acids .. +PF10074 Uncharacterized conserved protein (DUF2285)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10075 COP9 signalosome, subunit CSN8
This PCI_Csn8 domain is conserved from plants to humans. It is a signature protein motif found in components of CSN (COP9 signalosome). It functions as a structural scaffold for subunit-subunit interactions within the complex and is a key regulator of photomorphogenic development .. +PF10076 Uncharacterized protein conserved in bacteria (DUF2313)
Members of this family of proteins comprise various hypothetical and putative bacteriophage tail proteins.. +PF10077 Uncharacterized protein conserved in bacteria (DUF2314)
This domain is found in various bacterial hypothetical proteins, as well as putative ankyrin repeat proteins. The exact function of the domains comprising this family has not, as yet, been determined.. +PF10078 Uncharacterized protein conserved in bacteria (DUF2316)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10079 Uncharacterized protein conserved in bacteria (DUF2317)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10080 Predicted membrane protein (DUF2318)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10081 DUF2319;
Alpha/beta-hydrolase family. This is a family of alpha/beta hydrolases which may function as lipases. This domain is the catalytic domain and includes the catalytic triad and the GXSXG sequence motif which is a characteristic of these enzymes .. +PF10082 Uncharacterized protein conserved in bacteria (DUF2320)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10083 Uncharacterized protein conserved in bacteria (DUF2321)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10084 Uncharacterized protein conserved in bacteria (DUF2322)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10086 Putative membrane peptidase family (DUF2324)
This domain, found in various hypothetical bacterial proteins, has no known function. This family appears to be related to the prenyl protease 2 family Pfam:PF02517, suggesting this family may be peptidases.. +PF10087 Uncharacterized protein conserved in bacteria (DUF2325)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10088 Uncharacterized protein conserved in bacteria (DUF2326)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10090 Uncharacterized protein conserved in bacteria (DUF2328)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10091 DUF2329;
Putative glucoamylase. The structure of UniProt:Q5LIB7 has an alpha/alpha toroid fold and is similar structurally to a number of glucoamylases. Most of these structural homologues are glucoamylases, involved in breaking down complex sugars (e.g. starch). The biologically relevant state is likely to be monomeric. The putative active site is located at the centre of the toroid with a well defined large cavity.. +PF10092 Uncharacterized protein conserved in bacteria (DUF2330)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10093 Uncharacterized protein conserved in bacteria (DUF2331)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10094 Uncharacterized protein conserved in bacteria (DUF2332)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10095 Uncharacterized protein conserved in bacteria (DUF2333)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10096 Uncharacterized protein conserved in bacteria (DUF2334)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10097 Predicted membrane protein (DUF2335)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10098 Uncharacterized protein conserved in bacteria (DUF2336)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10099 DUF2337;
Anti-sigma-K factor rskA. This domain, formerly known as DUF2337, is the anti-sigma-K factor, RskA. In Mycobacterium tuberculosis the protein positively regulates expression of the antigenic proteins MPB70 and MPB83 .. +PF10100 Uncharacterized protein conserved in bacteria (DUF2338)
Members of this family of hypothetical bacterial proteins have no known function.. +PF10101 Predicted membrane protein (DUF2339)
This domain, found in various hypothetical bacterial proteins, has no known function.. +PF10102 Domain of unknown function (DUF2341)
Members of this family are found in various bacterial proteins, including MotA/TolQ/ExbB proton channels and other transport proteins. The exact function of this set of domains has not, as yet, been determined.. +PF10103 Uncharacterised conserved protein (DUF2342)
Members of this family of bacterial hypothetical and uncharacterised proteins have no known function.. +PF10104 DUF2343;
Di-sulfide bridge nucleocytoplasmic transport domain. COGs (COG5085), KOGs (KOG4503). Brr6_like_C_C is the highly conserved C-terminal region of a group of proteins found in fungi. It carries four highly conserved cysteine residues. It is suggested that members of the family interact with each other via di-sulfide bridges to form a complex which is involved in nucleocytoplasmic transport . Brr6 in yeast is an essential integral membrane protein of the NE-ER, wit two predicted transmembrane domains, and is a dosage suppressor of Apq12, Pfam:PF12716 .. +PF10105 Uncharacterized protein conserved in bacteria (DUF2344)
This domain, found in various hypothetical bacterial proteins and Radical Sam domain proteins, has no known function.. +PF10106 Uncharacterized protein conserved in bacteria (DUF2345)
Members of this family are found in various bacterial hypothetical proteins, as well as Rhs element Vgr proteins.. +PF10107 Endonuclease related to archaeal Holliday junction resolvase
This domain is found in various predicted bacterial endonucleases which are distantly related to archaeal Holliday junction resolvases.. +PF10108 Exon_PolB;
Predicted 3'-5' exonuclease related to the exonuclease domain of PolB. This domain is found in various prokaryotic 3'-5' exonucleases and hypothetical proteins.. +PF10109 Mu-like prophage FluMu protein gp41
Jackhmmer:Q1GH51.1(1-98). Members of this family of prokaryotic proteins include various gp41 proteins and related sequences .. +PF10110 Membrane domain of glycerophosphoryl diester phosphodiesterase
Members of this family comprise the membrane domain of the prokaryotic enzyme glycerophosphoryl diester phosphodiesterase.. +PF10111 GlycosTransf;
Glycosyltransferase like family 2. Members of this family of prokaryotic proteins include putative glucosyltransferase, which are involved in bacterial capsule biosynthesis .. +PF10112 5-bromo-4-chloroindolyl phosphate hydrolysis protein
Members of this family of prokaryotic proteins mediate the hydrolysis of 5-bromo-4-chloroindolyl phosphate bonds.. +PF10113 Fibrillarin-like archaeal protein
Members of this family of proteins include archaeal fibrillarin homologs.. +PF10114 Hist_Kin_Sens;
Sensory domain found in PocR. PocR, a ligand binding domain, has a novel variant of the PAS-like Fold . Evidence suggests that it binds small hydrocarbon derivatives such as 1,3-propanediol . . +PF10115 Transcriptional activator HlyU
This domain, found in various hypothetical prokaryotic proteins, has no known function. One of the sequences in this family corresponds to the transcriptional activator HlyU, indicating a possible similar role in other members.. +PF10116 Protein required for attachment to host cells
Members of this family of bacterial proteins are required for the attachment of the bacterium to host cells .. +PF10117 McrBC 5-methylcytosine restriction system component
Members of this family of bacterial proteins modify the specificity of mcrB restriction by expanding the range of modified sequences restricted .. +PF10118 Predicted metal-dependent hydrolase
Members of this family of proteins comprise various bacterial transition metal-dependent hydrolases.. +PF10119 Predicted methyltransferase regulatory domain
Members of this family of domains are found in various prokaryotic methyltransferases, where they regulate the activity of the methyltransferase domain.. +PF10120 MethylPyrKinase;
Members of this family of archaeal and bacterial proteins are likely to be aldolases.. +PF10122 Mu-like prophage protein Com
Members of this family of proteins comprise the translational regulator of mom .. +PF10123 Mu-like prophage I protein
Members of this family of proteins comprise various viral Mu-like prophage I proteins.. +PF10124 Mu-like prophage major head subunit gpT
Members of this family of proteins comprise various caudoviral prophage proteins, including the Mu-like prophage major head subunit gpT.. +PF10125 NADH dehydrogenase I, subunit N related protein
This family comprises a set of NADH dehydrogenase I, subunit N related proteins found in archaea. Their exact function, has not, as yet, been determined.. +PF10126 Uncharacterized protein, homolog of nitrogen regulatory protein PII
This domain, found in various hypothetical archaeal proteins, has no known function. It is distantly similar to the nitrogen regulatory protein PII.. +PF10127 Predicted nucleotidyltransferase
Members of this family of bacterial proteins catalyze the transfer of nucleotide residues from nucleoside diphosphates or triphosphates into dimer or polymer forms.. +PF10128 Glucose-6-phosphate dehydrogenase subunit
Members of this family are found in various prokaryotic OpcA and glucose-6-phosphate dehydrogenase proteins. The exact function of the domain is, as yet, unknown.. +PF10129 OpgC protein
This domain, found in various hypothetical and OpgC prokaryotic proteins. It is likely to act as an acyltransferase enzyme.. +PF10130 PIN domain
Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases).. +PF10131 6-pyruvoyl-tetrahydropterin synthase related domain; membrane protein
This domain is found in various bacterial hypothetical membrane proteins, as well as in tetratricopeptide TPR_2 repeat protein. The exact function of the domain has not, as yet, been established.. +PF10133 Predicted RNA-binding protein
Members of this family of bacterial proteins are thought to have RNA-binding properties, however, their exact function has not, as yet, been defined.. +PF10134 Replication initiator protein A
Members of this family of bacterial proteins are single-stranded DNA binding proteins that are involved in DNA replication, repair and recombination.. +PF10135 Rod binding protein
Members of this family are involved in the assembly of the prokaryotic flagellar rod.. +PF10136 Site-specific recombinase
Members of this family of bacterial proteins are found in various putative site-specific recombinase transmembrane proteins.. +PF10137 Predicted nucleotide-binding protein containing TIR-like domain
Members of this family of bacterial nucleotide-binding proteins contain a TIR-like domain. Their exact function has not, as yet, been defined.. +PF10138 Tellurium_res;
vWA found in TerF C terminus . vWA domain fused to TerD domain typified by the TerF protein . Some times found as solos. . +PF10139 Putative bacterial virulence factor
Members of this family of prokaryotic proteins include various putative virulence factor effector proteins. Their exact function is, as yet, unknown.. +PF10140 essB;
WXG100 protein secretion system (Wss), protein YukC. Members of this family of proteins include predicted membrane proteins homologous to YukC in B. subtilis. The YukC protein family would participate to the formation of a translocon required for the secretion of WXG100 proteins (Pfam:PF06013) in monoderm bacteria, the WXG100 protein secretion system (Wss). This family includes EssB in Staphylococcus aureus.. +PF10141 Single-strand DNA-specific exonuclease, C terminal domain
Members of this set of prokaryotic domains are found in a set of single-strand DNA-specific exonucleases, including RecJ. Their exact function has not, as yet, been determined.. +PF10142 PhoPQ-activated pathogenicity-related protein
Members of this family of bacterial proteins are involved in the virulence of some pathogenic proteobacteria .. +PF10143 2,3-bisphosphoglycerate-independent phosphoglycerate mutase
Members of this family are found in various bacterial 2,3-bisphosphoglycerate-independent phosphoglycerate mutase enzymes, which catalyse the interconversion of 2-phosphoglycerate and 3-phosphoglycerate in the reaction: [2-phospho-D-glycerate + 2,3-diphosphoglycerate = 3-phospho-D-glycerate + 2,3-diphosphoglycerate].. +PF10144 Bacterial virulence factor haemolysin
Members of this family of bacterial proteins are membrane proteins that effect the expression of haemolysin under anaerobic conditions .. +PF10145 Phage-related minor tail protein
Members of this family are found in putative phage tail tape measure proteins. . +PF10146 Zinc finger-containing protein
This is a family of proteins which appears to have a highly conserved zinc finger domain at the C terminal end, described as -C-X2-CH-X3-H-X5-C-X2-C-. The structure is predicted to contain a coiled coil. Members are annotated as being tumour-associated antigen HCA127 in humans but this could not confirmed.. +PF10147 Growth arrest and DNA-damage-inducible proteins-interacting protein 1
Members of this family of proteins act as negative regulators of G1 to S cell cycle phase progression by inhibiting cyclin-dependent kinases. Inhibitory effects are additive with GADD45 proteins but occur also in the absence of GADD45 proteins. Furthermore, they act as a repressor of the orphan nuclear receptor NR4A1 by inhibiting AB domain-mediated transcriptional activity .. +PF10148 Schwannomin-interacting protein 1
Members of this family are coiled coil protein involved in linking membrane proteins to the cytoskeleton.. +PF10149 NAcGluc_Transf;
Transmembrane protein 231. This is a family of transmembrane proteins, given the number 231, of unknown function. It is conserved in eukaryotes.. +PF10150 Ribonuclease E/G family
Pfam-B_234 (Release 21.0). Ribonuclease E and Ribonuclease G are related enzymes that cleave a wide variety of RNAs .. +PF10151 Uncharacterised conserved protein (DUF2359)
This is a 450 amino acid region of a family of proteins conserved from insects to humans. The mouse protein, Q8BM55, is annotated as being a putative Vitamin K-dependent carboxylation gamma-carboxyglutamic (GLA) domain containing protein, but this could not be confirmed. The function is not known.. +PF10152 Predicted coiled-coil domain-containing protein (DUF2360)
This is the conserved 140 amino acid region of a family of proteins conserved from nematodes to humans. One C. elegans member is annotated as a Daf-16-dependent longevity protein 1 but this could not be confirmed. The function is unknown.. +PF10153 Uncharacterised conserved protein (DUF2361)
This is a region of 120 amino acids that is conserved in a family of proteins found from plants to fungi. The function is not known.. +PF10154 Uncharacterized conserved protein (DUF2362)
This is a family of proteins conserved from nematodes to humans. The function is not known.. +PF10155 Uncharacterized conserved protein (DUF2363)
This is a region of 120 amino acids of a family of proteins conserved from plants to humans. The function is not known.. +PF10156 DUF2364;
Subunit 17 of Mediator complex. This Mediator complex subunit was formerly known as Srb4 in yeasts or Trap80 in Drosophila and human. The Med17 subunit is located within the head domain and is essential for cell viability to the extent that a mutant strain of cerevisiae lacking it shows all RNA polymerase II-dependent transcription ceasing at non-permissive temperatures.. +PF10157 Uncharacterized conserved protein (DUF2365)
This is a family of conserved proteins found from nematodes to humans. The function is unknown.. +PF10158 Tumour suppressor protein
This is a region of 130 amino acids that is the most conserved region of hypothetical proteins involved in loss of heterozygosity and thus tumour suppression . The exact function is not known.. +PF10159 Kinase phosphorylation protein
This is a glycine-rich domain that is the most highly conserved region of a family of proteins that in vertebrates are associated with tumours in multiple myelomas. The region may contain phosphorylation sites for several protein kinases, as well as N-myristoylation sites and nuclear localisation signals, so it might act as a signal molecule in the nucleus .. +PF10160 Tmem40;
Predicted membrane protein. This is a region of 280 amino acids from a group of proteins conserved from plants to humans. It is predicted to be a membrane protein but its function is otherwise unknown.. +PF10161 Putative mitochondrial precursor protein
This is a family of small conserved proteins found from nematodes to humans. The C-terminal region is rich in asparagine. Members are putatively assigned to be mitochondrial precursor proteins but this could not be confirmed.. +PF10162 G8 domain
This domain is found in disease proteins PKHD1 and KIAA1199 and is named G8 after its 8 conserved glycines. It is predicted to contain 10 beta strands and an alpha helix.. +PF10163 Transcription factor e(y)2
EnY2 is a small transcription factor which is combined in a complex with the TAFII40 protein . The protein is conserved from paramecium to humans.. +PF10164 Uncharacterized conserved protein (DUF2367)
This is a highly conserved family of proteins which contains three pairs of cysteine residues within a length of 42 amino acids and is rich in proline residues towards the N-terminus. The function is unknown. Several members are putatively assigned as brain protein i3 but this was not validated.. +PF10165 Guanine nucleotide exchange factor synembryn
Ric8 is involved in the EGL-30 neurotransmitter signalling pathway . It is a guanine nucleotide exchange factor that regulates neurotransmitter secretion.. +PF10166 Uncharacterised conserved protein (DUF2368)
This family is conserved from nematodes to humans. The function is not known.. +PF10167 Uncharacterised conserved protein
This is the N-terminal 80 residues of a family of proteins conserved from plants to humans. It contains a characteristic NEP sequence motif. The function is not known.. +PF10168 Nuclear pore component
Nup88 can be divided into two structural domains; the N-terminal two-thirds of the protein has no obvious structural motifs but is the region for binding to Nup98, one of the components of the nuclear pore. the C-terminal end is a predicted coiled-coil domain . Nup88 is overexpressed in tumour cells .. +PF10169 Learning-associated protein
This is a family of 121-amino acid secretory proteins. Laps functions in the regulation of neuronal cell adhesion and/or movement and synapse attachment . Laps binds to the ApC/EBP (Aplysia CCAAT/enhancer binding protein) promoter and activates the transcription of ApC/EBP mRNA .. +PF10170 Cysteine-rich domain
This is the N-terminal approximately 100 amino acids of a family of proteins found from nematodes to humans. It contains between six and eight highly conserved cysteine residues and a characteristic DPF sequence motif. One member is putatively named as receptor for egg jelly protein but this could not confirmed.. +PF10171 Uncharacterised conserved protein (DUF2366)
This is a family of proteins conserved from nematodes to humans. The function is not known.. +PF10172 Det1 complexing ubiquitin ligase
DDA1 (De-etiolated 1, Damaged DNA binding protein 1 associated 1) protein binds strongly with DDB1 and Det1 forming a DDD complex which is part of the ubiquitin conjugation system .. +PF10173 DUF2343;
Mitochondrial K+-H+ exchange-related. The members of this family function as mitochondrial potassium-hydrogen exchange transporters. The family is part of a large mitochondrial KHE protein complex.. +PF10174 RIM-binding protein of the cytomatrix active zone
This is a family of proteins that form part of the CAZ (cytomatrix at the active zone) complex which is involved in determining the site of synaptic vesicle fusion . The C-terminus is a PDZ-binding motif that binds directly to RIM (a small G protein Rab-3A effector). The family also contains four coiled-coil domains .. +PF10175 M-phase phosphoprotein 6
This is a family of M-phase phosphoprotein 6s which is necessary for generation of the 3' end of the 5.8S rRNA precursor. It preferentially binds to poly(C) and poly(U).. +PF10176 Protein of unknown function (DUF2370)
This family is conserved from fungi to humans. The human member is annotated as a Golgi-associated protein-Nedd4 WW domain-binding protein but this could not be confirmed.. +PF10177 Uncharacterised conserved protein (DUF2371)
This is a family of proteins conserved from nematodes to humans. The function is not known.. +PF10178 Uncharacterised conserved protein (DUF2372)
This family consists of proteins found from plants to humans. The function is not known.. +PF10179 Uncharacterised conserved protein (DUF2369)
This is a proline-rich region of a group of proteins found from plants to fungi. The function is not known.. +PF10180 Uncharacterised conserved protein (DUF2373)
This is the C-terminal conserved region of a family of proteins found from fungi to humans. The function is not known.. +PF10181 GPI-GlcNAc transferase complex, PIG-H component
PIG-H is a family of conserved proteins that complexes with three other proteins to form the GPI-GnT (glycosylphosphatidylinositol anchor biosynthesis transferase) complex. It appears to be a peripheral membrane protein facing the cytoplasm involved in the first step in GPI anchor formation.. +PF10182 Flo11 domain
Pfam-B_18862 (Release 22.0). This presumed domain is found at the N-terminus of the S. cerevisiae Flo11 protein. Flo11 is required for diploid pseudohyphal formation and haploid invasive growth. It belongs to a family of proteins involved in invasive growth, cell-cell adhesion, and mating, many of which can substitute for each other under abnormal conditions .. +PF10183 ESSS subunit of NADH:ubiquinone oxidoreductase (complex I)
This subunit is part of the mitochondrial NADH:ubiquinone oxidoreductase (complex I). It carries mitochondrial import sequences .. +PF10184 Uncharacterized conserved protein (DUF2358)
DUF2358 is a family of conserved proteins found from plants to humans. The function is unknown.. +PF10185 Chaperone for wingless signalling and trafficking of LDL receptor
Mesd is a family of highly conserved proteins found from nematodes to humans. The final C-terminal residues, KEDL, are the endoplasmic reticulum retention sequence as it is an ER protein specifically required for the intracellular trafficking of members of the low-density lipoprotein family of receptors (LDLRs) . The N- and C-terminal sequences are predicted to adopt a random coil conformation, with the exception of an isolated predicted helix within the N-terminal region, The central folded domain flanked by natively unstructured regions is the necessary structure for facilitating maturation of LRP6 (Low-Density Lipoprotein Receptor-Related Protein 6 Maturation) .. +PF10186 DUF2355;
UV radiation resistance protein and autophagy-related subunit 14. KOGs (KOG4398), Wood V. The Atg14 or Apg14 proteins are hydrophilic proteins with a predicted molecular mass of 40.5 kDa, and have a coiled-coil motif at the N terminus region. Yeast cells with mutant Atg14 are defective not only in autophagy but also in sorting of carboxypeptidase Y (CPY), a vacuolar-soluble hydrolase, to the vacuole. Subcellular fractionation indicate that Apg14p and Apg6p are peripherally associated with a membrane structure(s). Apg14p was co-immunoprecipitated with Apg6p, suggesting that they form a stable protein complex. These results imply that Apg6/Vps30p has two distinct functions: in the autophagic process and in the vacuolar protein sorting pathway. Apg14p may be a component specifically required for the function of Apg6/Vps30p through the autophagic pathway . There are 17 auto-phagosomal component proteins which are categorized into six functional units, one of which is the AS-PI3K complex (Vps30/Atg6 and Atg14). The AS-PI3K complex and the Atg2-Atg18 complex are essential for nucleation, and the specific function of the AS-PI3K apparently is to produce phosphatidylinositol 3-phosphate (PtdIns(3)P) at the pre-autophagosomal structure (PAS). The localisation of this complex at the PAS is controlled by Atg14 . Autophagy mediates the cellular response to nutrient deprivation, protein aggregation, and pathogen invasion in humans, and malfunction of autophagy has been implicated in multiple human diseases including cancer. This effect seems to be mediated through direct interaction of the human Atg14 with Beclin 1 in the human phosphatidylinositol 3-kinase class III complex .. +PF10187 N-terminal domain of NEFA-interacting nuclear protein NIP30
This is a the N-terminal 100 amino acids of a family of proteins conserved from plants to humans. The full-length protein has putatively been called NEFA-interacting nuclear protein NIP30, however no reference could be found to confirm this.. +PF10188 Organic solute transport protein 1
Oscp1 is a family of proteins conserved from plants to humans. It is called organic solute transport protein or oxido-red- nitro domain-containing protein 1, however no reference could be find to confirm the function of the protein.. +PF10189 Conserved protein (DUF2356)
This is a 200 amino acid region of a family of proteins conserved from plants to humans. Some members have been putatively annotated as being integrator complex subunit 3 but this could not be confirmed. The function is unknown.. +PF10190 Tmem170;
Putative transmembrane protein 170. Tmem170 is a family of putative transmembrane proteins conserved from nematodes to humans. The protein is only of approximately 130 amino acids in length. The function is unknown.. +PF10191 DUF2354;
Golgi complex component 7 (COG7). COG7 is a component of the conserved oligomeric Golgi complex which is required for normal Golgi morphology and localisation. Mutation in COG7 causes a congenital disorder of glycosylation .. +PF10192 Rhodopsin-like GPCR transmembrane domain
This region of 270 amino acids is the seven transmembrane alpha-helical domains included within five GPCRRHODOPSN4 motifs of a G-protein-coupled-receptor (GPCR) protein, conserved from nematodes to humans. GPCRs are integral membrane receptors whose intracellular actions are mediated by signalling pathways involving G proteins and downstream secondary messengers .. +PF10193 Telomere length regulation protein
This family is the central conserved 110 amino acid region of a group of proteins called telomere-length regulation or clock abnormal protein-2 which are conserved from plants to humans. The full-length protein regulates telomere length and contributes to silencing of sub-telomeric regions. In vitro the protein binds to telomeric DNA repeats.. +PF10195 DNA-binding nuclear phosphoprotein p8
P8 is a short 80-82 amino acid protein that is conserved from nematodes to humans. It carries at least one protein kinase C domain suggesting a possible role in signal transduction and it is thought to be a phosphoprotein, but the sites of phosphorylation and the kinases involved remain to be determined .. +PF10197 N-terminal domain of CBF1 interacting co-repressor CIR
This is a 45 residue conserved region at the N-terminal end of a family of proteins referred to as CIRs (CBF1-interacting co-repressors). CBF1 (centromere-binding factor 1) acts as a transcription factor that causes repression by binding specifically to GTGGGAA motifs in responsive promoters, and it requires CIR as a co-repressor. CIR binds to histone deacetylase and to SAP30 and serves as a linker between CBF1 and the histone deacetylase complex .. +PF10198 Histone acetyltransferases subunit 3
Ada3 is a family of proteins conserved from yeasts to humans . It is an essential component of the Ada transcriptional coactivator (alteration/deficiency in activation) complex. Ada3 plays a key role in linking histone acetyltransferase-containing complexes to p53 (tumour suppressor protein) thereby regulating p53 acetylation, stability and transcriptional activation following DNA damage .. +PF10199 KYY;
Alpha and gamma adaptin binding protein p34. p34 is a protein involved in membrane trafficking. It is known to interact with both alpha and gamma adaptin . It has been speculated that p34 may play a chaperone role such as preventing the soluble adaptors from co-assembling with soluble clathrin, or helping to remove the adaptors from the coated vesicle. Another possible function is in aiding the recruitment of soluble adaptors onto the membrane .. +PF10200 NADH:ubiquinone oxidoreductase, NDUFS5-15kDa
This is a family of short, approximately 105 amino acid residue, proteins which form part of NADH:ubiquinone oxidoreductase complex I. Complex I is the first multisubunit inner membrane protein complex of the mitochondrial electron transport chain and it transfers two electrons from NADH to ubiquinone. The protein carries four highly conserved cysteine residues but these do not appear to be in a configuration which would favour metal binding so the exact function of the protein is uncertain .. +PF10203 Cytochrome c oxidase assembly protein PET191
Pet191_N is the conserved N-terminal of a family of conserved proteins found from nematodes to humans. It carries six highly conserved cysteine residues. Pet191 is required for the assembly of active cytochrome c oxidase but does not form part of the final assembled complex .. +PF10204 Dual oxidase maturation factor
DuoxA (Dual oxidase maturation factor) is the essential protein necessary for the final release of DUOX2 (an NADPH:O2 oxidoreductase flavoprotein) from the endoplasmic reticulum. Dual oxidases (DUOX1 and DUOX2) constitute the catalytic core of the hydrogen peroxide generator, which generates H2O2 at the apical membrane of thyroid follicular cells, essential for iodination of thyroglobulin by thyroid peroxidases. DuoxA carries five membrane-integral regions including a reverse signal-anchor with external N-terminus (type III) and two N-glycosylation sites . It is conserved from nematodes to humans.. +PF10205 Predicted coiled-coil domain-containing protein
This is the N-terminal 100 amino acid domain of a family of proteins conserved from nematodes to humans. It carries a characteristic KLRAQ sequence-motif. The function is not known.. +PF10206 Mitochondrial F1F0-ATP synthase, subunit f
This is a family of small proteins of approximately 110 amino acids, which are highly conserved from nematodes to humans. Some members of the family have been annotated in Swiss-Prot as being the f subunit of mitochondrial F1F0-ATP synthase but this could not be confirmed. The sequence has a well-conserved WRW motif. The exact function of the protein is not known.. +PF10208 Degradation arginine-rich protein for mis-folding
This is a family of small proteins of approximately 170 residues which contain four di-sulfide bridges that are highly conserved from nematodes to humans. Armet is a soluble protein resident in the endoplasmic reticulum and induced by ER stress. It appears to be involved with dealing with mis-folded proteins in the ER, thus in quality control of ER stress .. +PF10209 Uncharacterized conserved protein (DUF2340)
This is a family of small proteins of approximately 150 amino acids of unknown function.. +PF10210 Mitochondrial 28S ribosomal protein S32
This entry is of a family of short, approximately 100 amino acid residues, proteins which are mitochondrial 28S ribosomal proteins named as MRP-S32.\. Their exact function could not be confirmed.. +PF10211 Axonemal dynein light chain
Axonemal dynein light chain proteins play a dynamic role in flagellar and cilia motility. Eukaryotic cilia and flagella are complex organelles consisting of a core structure, the axoneme, which is composed of nine microtubule doublets forming a cylinder that surrounds a pair of central singlet microtubules. This ultra-structural arrangement seems to be one of the most stable micro-tubular assemblies known and is responsible for the flagellar and ciliary movement of a large number of organisms ranging from protozoan to mammals. This light chain interacts directly with the N-terminal half of the heavy chains .. +PF10212 Predicted coiled-coil domain-containing protein
This is the C-terminal 500 amino acids of a family of proteins with a predicted coiled-coil domain conserved from nematodes to humans. It carries a characteristic TTKRSYEDQ sequence-motif. The function is not known.. +PF10213 Mitochondrial ribosomal subunit protein
This is a conserved region of approx. 125 residues of one of the proteins that makes up the small subunit of the mitochondrial ribosome. In Saccharomyces cerevisiae the protein is MRP-S24 whereas in humans it is MRP-S28. The human mitochondrial ribosome has 29 distinct proteins in the small subunit and these have homologues in, for example, Drosophila melanogaster, Caenorhabditis elegans, and in the genomes of several fungi .. +PF10214 RNA polymerase I-specific transcription-initiation factor
RNA polymerase I-specific transcription-initiation factor Rrn6 and Rrn7 represent components of a multisubunit transcription factor essential for the initiation of rDNA transcription by Pol I . These proteins are found in fungi.. +PF10215 Oligosaccaryltransferase
Ost4 is a very short, approximately 30 residues, enzyme found from fungi to vertebrates. It is a member of the ER oligosaccaryltansferase complex, EC 2.4.1.119, that catalyses the asparagine-linked glycosylation of proteins. It appears to be an integral membrane protein that mediates the en bloc transfer of a preassembled high-mannose oligosaccharide onto asparagine residues of nascent polypeptides as they enter the lumen of the rough endoplasmic reticulum (RER).. +PF10216 CO2 hydration protein (ChpXY)
This small family of proteins includes paralogues ChpX and ChpY in Synechococcus sp. PCC7942 and other cyanobacteria, associated with distinct NAD(P)H dehydrogenase complexes. These proteins collectively enable light-dependent CO2 hydration and CO2 uptake; loss of both blocks growth at low CO2 concentrations.. +PF10217 Uncharacterized conserved protein (DUF2039)
This entry is a region of approximately 100 residues containing three pairs of cysteine residues. The region is conserved from plants to humans but its function is unknown.. +PF10218 Uncharacterized conserved protein (DUF2054)
This entry contains 14 conserved cysteines, three of which are CC-dimers. The region is of approximately 200 residues in length but its function is unknown.. +PF10220 Uncharacterized conserved protein (DUF2146)
This is a family of proteins conserved from plants to humans. In Dictyostelium it is annotated as Mss11p but this could not be confirmed. Mss11p is required for the activation of pseudo-hyphal and invasive growth by Ste12p in yeast.. +PF10221 Cell cycle and development regulator
This is a set of proteins conserved from worms to humans. The proteins are a PAN GU kinase substrate, Mat89Bb, essential for S-M cycles of early Drosophila embryogenesis, Xenopus embryonic cell cycles and morphogenesis, and cell division in cultured mammalian cells.. +PF10222 Uncharacterized conserved protein (DUF2152)
This is a family of proteins conserved from worms to humans. Its function is unknown.. +PF10223 Uncharacterized conserved protein (DUF2181)
This is region of approximately 250 residues conserved from worms to humans. Its function is unknown.. +PF10224 Predicted coiled-coil protein (DUF2205)
This entry represent a highly conserved 100 residue region which is likely to be a coiled-coil structure. The exact function is unknown.. +PF10225 Uncharacterized conserved protein (DUF2215)
This entry is the central 200 residues of a family of proteins conserved from worms to humans. The function is unknown.. +PF10226 Uncharacterized conserved proteins (DUF2216)
This is the conserved N-terminal half of a proteins which are found from worms to humans. some annotation suggests it might be PKR, the Hepatitis delta antigen-interacting protein A, but this could not be confirmed.. +PF10228 Uncharacterised conserved protein (DUF2228)
This is a family of conserved proteins of approximately 700 residues found from worms to humans.. +PF10229 Uncharacterized conserved protein (DUF2246)
This is a family of proteins conserved from worms to humans of approximately 300 residues. The function is unknown.. +PF10230 Uncharacterised conserved protein (DUF2305)
This family of proteins is conserved from plants to humans. The function is unknown.. +PF10231 Uncharacterised conserved protein (DUF2315)
This is a family of small conserved proteins found from worms to humans. The function is not known.. +PF10232 Arc32;
Mediator of RNA polymerase II transcription complex subunit 8. Arc32, or Med8, is one of the subunits of the Mediator complex of RNA polymerase II. The region conserved contains two alpha helices putatively necessary for binding to other subunits within the core of the Mediator complex . The N-terminus of Med8 binds to the essential core Head part of Mediator and the C-terminus hinges to Med18 on the non-essential part of the Head that also includes Med20 .. +PF10233 Uncharacterized conserved protein CG6151-P
This is a family of small, less than 200 residue long, proteins which are named as CG6151-P proteins that are conserved from fungi to humans. The function is unknown. The fungal members have a characteristic ICP sequence motif. Some members are annotated as putative clathrin-coated vesicle protein but this could not be defined.. +PF10234 Clusterin-associated protein-1
This protein is conserved from worms to humans. The protein of 413 amino acids contains a central coiled-coil domain, possibly the region that binds to clusterin. Cluap1 expression is highest in the nucleus and gradually increases during late S to G2/M phases of the cell cycle and returns to the basal level in the G0/G1 phases. In addition, it is upregulated in colon cancer tissues compared to corresponding non-cancerous mucosa. It thus plays a crucial role in the life of the cell .. +PF10235 Microtubule-associated protein CRIPT
The CRIPT protein is a cytoskeletal protein involved in microtubule production. The C-terminal domain is essential for binding to the PDZ3 domain of the SAP90 protein, one of a super-family of PDZ-containing proteins that play an important role in coupling the membrane ion channels with their signalling partners. SAP90 is concentrated in the post synaptic density of glutamatergic neurons .. +PF10236 Mitochondrial ribosomal death-associated protein 3
This is a family of conserved proteins which were originally described as death-associated-protein-3 (DAP-3). The proteins carry a P-loop DNA-binding motif, and induce apoptosis . DAP3 has been shown to be a pro-apoptotic factor in the mitochondrial matrix and to be crucial for mitochondrial biogenesis and so has also been designated as MRP-S29 (mitochondrial ribosomal protein subunit 29).. +PF10237 DPPF;
Probable N6-adenine methyltransferase. This is a protein of approximately 200 residues which is conserved from plants to humans . It contains a highly conserved QFW motif close to the N-terminus and a DPPF motif in the centre. The DPPF motif is characteristic of N-6 adenine-specific DNA methylases, and this family is found in eukaryotes .. +PF10238 E2F-associated phosphoprotein
This entry represents the conserved C-terminal portion of an E2F binding protein. E2F transcription factors play an essential role in cell proliferation and apoptosis and their activity is frequently deregulated in human cancers. E2F activity is regulated by a variety of mechanisms, frequently mediated by proteins binding to individual members or a subgroup of the family. EAPP interacts with a subset of E2F factors and influences E2F-dependent promoter activity. EAPP is present throughout the cell cycle but disappears during mitosis .. +PF10239 FAM98AB;
Protein of unknown function (DUF2465). FAM98A and B proteins are found from worms to humans but their function is unknown. This entry is of a family of proteins that is rich in glycines.. +PF10240 Fam125A;
Protein of unknown function (DUF2464). This is a family of proteins conserved from worms to humans. Members have been annotated as FAM125A proteins, but their function is unknown.. +PF10241 Uncharacterized conserved protein
This is a family of short proteins which are conserved over a region of 80 residues. There is a characteristic KxDL motif towards the C-terminus. The function is unknown.. +PF10242 Lipoma HMGIC fusion partner-like protein
This is a group of proteins expressed from a series of genes referred to as Lipoma HGMIC fusion partner-like. The proteins carry four highly conserved transmembrane domains in this entry. In certain instances, eg in LHFPL5, mutations cause deafness in humans and hypospadias , and LHFPL1 is transcribed in six liver tumour cell lines .. +PF10243 Microtubule-binding protein MIP-T3
This protein, which interacts with both microtubules and TRAF3 (tumour necrosis factor receptor-associated factor 3), is conserved from worms to humans. The N-terminal region is the microtubule binding domain and is well-conserved; the C-terminal 100 residues, also well-conserved, constitute the coiled-coil region which binds to TRAF3. The central region of the protein is rich in lysine and glutamic acid and carries KKE motifs which may also be necessary for tubulin-binding, but this region is the least well-conserved .. +PF10244 Mitochondrial ribosomal subunit
MRP-L51 is a family of small proteins from the intact 55 S mitochondrial ribosome . It has otherwise been referred to as bMRP-64 . The exact function of this family is not known.. +PF10245 Mitochondrial 28S ribosomal protein S22
This is the conserved N-terminus and central portion of the mitochondrial small subunit 28S ribosomal protein S22. Mammalian mitochondria carry out the synthesis of 13 polypeptides that are essential for oxidative phosphorylation and, hence, for the synthesis of the majority of the ATP used by eukaryotic organisms. The number of proteins produced by prokaryotes is smaller, reflected in the lower number of ribosomal proteins present in them .. +PF10246 Mitochondrial ribosomal protein MRP-S35
This is a family of short mitochondrial ribosomal proteins, less than 200 amino acids long. that are highly conserved from worms to humans. The structure has previously been referred to as MRP-S18 but the current numbering fits the preferred nomenclature from these authors.. +PF10247 Mit_gmP;
Reactive mitochondrial oxygen species modulator 1. This is a family of small, approximately 100 amino acid, proteins found from yeasts to humans. The majority of endogenous reactive oxygen species (ROS) in cells are produced by the mitochondrial respiratory chain. An increase or imbalance in ROS alters the intracellular redox homeostasis, triggers DNA damage, and may contribute to cancer development and progression . Members of this family are mitochondrial reactive oxygen species modulator 1 (Romo1) proteins that are responsible for increasing the level of ROS in cells. Increased Romo1 expression can have a number of other effects including: inducing premature senescence of cultured human fibroblasts [2,3] and increased resistance to 5-fluorouracil .. +PF10248 Myelodysplasia-myeloid leukemia factor 1-interacting protein
This entry is the conserved central region of a group of proteins that are putative transcriptional repressors . The structure contains a putative 14-3-3 binding motif involved in the subcellular localisation of various regulatory molecules, and it may be that interaction with the transcription factor DREF could be regulated through this motif. DREF regulates proliferation-related genes in Drosophila . Mlf1IP is expressed in both the nuclei and the cytoplasm and thus may have multi-functions .. +PF10249 NADH-ubiquinone oxidoreductase subunit 10
NDUFB10 is a family of conserved proteins of up to 180 residues. It is one of the 41 protein subunits within the hydrophobic fraction of the NADH:ubiquinone oxidoreductase (complex I), a multiprotein complex located in the inner mitochondrial membrane whose main function is the transport of electrons from NADH to ubiquinone, which is accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space. NDUFB10 is encoded in the nucleus.. +PF10250 GDP-fucose protein O-fucosyltransferase
This is a family of conserved proteins representing the enzyme responsible for adding O-fucose to EGF (epidermal growth factor-like) repeats. Six highly conserved cysteines are present in O-FucT-1 as well as a DXD-like motif (ERD), conserved in mammals, Drosophila, and C. elegans. Both features are characteristic of several glycosyltransferase families. The enzyme is a membrane-bound protein released by proteolysis and, as for most glycosyltransferases, is strongly activated by manganese .. +PF10251 Presenilin enhancer-2 subunit of gamma secretase
This entry is a short 101 peptide protein which is the smallest subunit of the gamma-secretase aspartyl protease complex that catalyses the intramembrane cleavage of a subset of type I transmembrane proteins. The other active constituents of the complex are presenilin (PS) nicastrin and anterior pharynx defective-1 (APH-1) protein. PEN-2 adopts a hairpin orientation in the membrane with its N- and C-terminal domains facing the luminal/extracellular space, and the C-terminal domain maintains PS stability within the complex .. +PF10252 Casein kinase substrate phosphoprotein PP28
This domain is a region of 70 residues conserved in proteins from plants to humans and contains a serine/arginine rich motif. In rats the full protein is a casein kinase substrate, and this region contains phosphorylation sites for both cAMP-dependent protein kinase and casein kinase II .. +PF10253 PRCC_Cterm;
Mitotic checkpoint regulator, MAD2B-interacting. This family constitutes the major, conserved, portion of PRCC proteins. In humans this family interacts with MAD2B, the mitotic checkpoint protein [1,2].\. In Schizosaccharomyces pombe this protein is part of the Cwf-complex that is known to be involved in pre-mRNA splicing .. +PF10254 PACS-1 cytosolic sorting protein
PACS-1 is a cytosolic sorting protein that directs the localisation of membrane proteins in the trans-Golgi network (TGN)/endosomal system. PACS-1 connects the clathrin adaptor AP-1 to acidic cluster sorting motifs contained in the cytoplasmic domain of cargo proteins such as furin, the cation-independent mannose-6-phosphate receptor and in viral proteins such as human immunodeficiency virus type 1 Nef .. +PF10255 RNA polymerase I-associated factor PAF67
RNA polymerase I is a multisubunit enzyme and its transcription competence is dependent on the presence of PAF67 . This family of proteins is conserved from worms to humans.. +PF10256 QRDY;
Golgin subfamily A member 7/ERF4 family. KOGs (KOG4069 & KOG4101). This family of proteins includes Golgin subfamily A member 7 proteins as well as Ras modification protein ERF4.. +PF10257 Retinoic acid induced 16-like protein
This is the conserved N-terminal 450 residues of a family of proteins described as retinoic acid-induced protein 16-like proteins. The exact function is not known. The proteins are found from worms to humans.. +PF10258 PHAX RNA-binding domain
RNA_GG_bind is the highly conserved U3 snoRNA-binding domain of PHAX (phosphorylated adaptor for RNA export) whose function is to transport U3 snoRNA from the nucleus after transcription . It is characterised by having two pairs of adjacent glycines, as GGx12GG.. +PF10259 Rogdi leucine zipper containing protein
This is a family of conserved proteins which have been suggested as containing leucine-zipper domains. A leucine zipper domain is a region of 30 amino acids with leucines repeating every seven or eight residues; these proteins do have many such leucines. The protein in Drosophila comes from the gene ROGDI.. +PF10260 Uncharacterized conserved domain (SAYSvFN)
This domain of approximately 75 residues contains a highly conserved SATSv/iFN motif. The function is unknown but the domain is conserved from plants to humans.. +PF10261 Inositol phospholipid synthesis and fat-storage-inducing TM
This is a family of transmembrane proteins which are variously annotated as possibly being inositol phospholipid synthesis protein and fat-storage-inducing. The members are conserved from yeasts to humans and are localised to the endoplasmic reticulum where they are involved in triglyceride lipid droplet formation .. +PF10262 SelT;
KOGs (KOG3286) & COG3526. This entry is an approximately 100 residue region of selenoprotein-T, conserved from plants to humans. The protein binds to UDP-glucose:glycoprotein glucosyltransferase (UGTR), the endoplasmic reticulum (ER)-resident protein, which is known to be involved in the quality control of protein folding . Selenium (Se) plays an essential role in cell survival and most of the effects of Se are probably mediated by selenoproteins, including selenoprotein T. However, despite its binding to UGTR and that its mRNA is up-regulated in extended asphyxia, the function of the protein and hence of this region of it is unknown . Selenoprotein W contains selenium as selenocysteine in the primary protein structure and levels of this selenoprotein are affected by selenium .. +PF10263 SprT-like family
This family represents a domain found in eukaryotes and prokaryotes. The domain contains a characteristic motif of the zinc metallopeptidases. This family includes the bacterial SprT protein.. +PF10264 Winged helix Storkhead-box1 domain
This is the conserved N-terminal winged helix domain of Storkhead-box1 protein which is likely to be a DNA binding domain. In humans the full-length protein controls polyploidization of extravillus trophoblast and is implicated in pre-eclampsia.. +PF10265 Uncharacterized conserved protein (DUF2217)
This is a family of conserved proteins of from 500 - 600 residues found from worms to humans. Its function is not known.. +PF10266 Hereditary spastic paraplegia protein strumpellin
This is a family of proteins conserved from plants to humans, in which two closely situated point mutations in the human protein lead to the condition of hereditary spastic paraplegia. Strumpellin contains one known domain called a spectrin repeat that consists of three alpha-helices of a characteristic length wrapped in a left-handed coiled coil. The spectrin proteins have multiple copies of this repeat, which can then form multimers in the cell. Spectrin associates with the cell membrane via spectrin repeats in the ankyrin protein. The spectrin repeat is a structural platform for cytoskeletal protein assemblies.. +PF10267 Tmcc1; Transmemb_cc2; Tmemcc2;
Predicted transmembrane and coiled-coil 2 protein. This family of transmembrane coiled-coil containing proteins is conserved from worms to humans. Its function is unknown.. +PF10268 Tmem161AB; Transmemb_161AB; Tmem161AB;
Predicted transmembrane protein 161AB. Transmemb_161AB is a family of conserved proteins found from worms to humans. Members are putative transmembrane proteins but otherwise the function is not known.. +PF10269 Tmem185A;
Transmembrane Fragile-X-F protein . This is a family of conserved transmembrane proteins that appear in humans to be expressed from a region upstream of the FragileXF site and to be intimately linked with the Fragile-X syndrome. Absence of TMEM185A does not necessarily lead to developmental delay, but might in combination with other, yet unknown, factors. Otherwise, the lack of the TMEM185A protein is either disposable (redundant) or its function can be complemented by the highly similar chromosome 2 retro-pseudogene product, TMEM185B .. +PF10270 Tmem32; Tmemb_32;
Membrane magnesium transporter. This entry represents a novel family of membrane magnesium transporters (MMgT) . The proteins, MMgT1 and MMgT2, are localised to the Golgi complex and post-Golgi vesicles, including the early endosomes, suggesting that they may provide regulated pathways for Mg(2+) transport in the Golgi and post-Golgi organelles of epithelium-derived cells .. +PF10271 Putative transmembrane protein
This is a family of conserved proteins found from worms to humans. They are putative transmembrane proteins but the function is unknown.. +PF10272 Putative transmembrane protein precursor
This is a family of proteins conserved from worms to humans. The proteins are purported to be transmembrane protein-precursors but the function is unknown.. +PF10273 Pre-rRNA-processing protein TSR2
This entry represents the central conserved section of a family of proteins described as pre-rRNA-processing protein TSR2. The region has a distinctive WGG motif but the function is unknown.. +PF10274 Parkin co-regulated protein
This family of proteins is transcribed anti-sense along the DNA to the Parkin gene product and the two appear to be transcribed under the same promoter. The protein has predicted alpha-helical and beta-sheet domains which suggest its function is in the ubiquitin/proteasome system . Mutations in parkin are the genetic cause of early-onset and autosomal recessive juvenile parkinsonism.. +PF10275 Otubain;
Peptidase C65 Otubain. This family of proteins conserved from plants to humans is a highly specific ubiquitin iso-peptidase that removes ubiquitin from proteins. The modification of cellular proteins by ubiquitin (Ub) is an important event that underlies protein stability and function in eukaryote being a dynamic and reversible process. Otubain carries several key conserved domains: (i) the OTU (ovarian tumour domain) in which there is an active cysteine protease triad (ii) a nuclear localisation signal, (iii) a Ub interaction motif (UIM)-like motif phi-xx-A-xxxs-xx-Ac (where phi indicates an aromatic amino acid, x indicates any amino acid and Ac indicates an acidic amino acid), (iv) a Ub-associated (UBA)-like domain and (v) the LxxLL motif.. +PF10276 Zinc-finger domain
This is a short zinc-finger domain conserved from fungi to humans. It is Cx8Hx14Cx2C.. +PF10277 Frag1/DRAM/Sfk1 family
KOGs (KOG3979) & KOGs (KOG4320) & Pfam-B_15139 (release 21.0). This family includes Frag1, DRAM and Sfk1 proteins. Frag1 (FGF receptor activating protein 1) is a protein that is conserved from fungi to humans. There are four potential iso-prenylation sites throughout the peptide, viz CILW, CIIW and CIGL. Frag1 is a membrane-spanning protein that is ubiquitously expressed in adult tissues suggesting an important cellular function . Dram is a family of proteins conserved from nematodes to humans with six hydrophobic transmembrane regions and an Endoplasmic Reticulum signal peptide. It is a lysosomal protein that induces macro-autophagy as an effector of p53-mediated death, where p53 is the tumour-suppressor gene that is frequently mutated in cancer. Expression of Dram is stress-induced . This region is also part of a family of small plasma membrane proteins, referred to as Sfk1, that may act together with or upstream of Stt4p to generate normal levels of the essential phospholipid PI4P, thus allowing proper localisation of Stt4p to the actin cytoskeleton [3-4].. +PF10278 Mediator of RNA pol II transcription subunit 19
Med19 represents a family of conserved proteins which are members of the multi-protein co-activator Mediator complex. Mediator is required for activation of RNA polymerase II transcription by DNA binding transactivators .. +PF10279 Latarcin precursor
This family represents the precursor proteins for a number of short antimicrobial peptides called Latarcins. Latarcins were discovered in the venom of the spider Lachesana tarabaevi . Latarcins are likely to adopt amphipathic alpha-helical structure in the plasma membrane.. +PF10280 HSPC296_Med11;
Mediator complex protein . Mediator is a large, modular protein complex that is conserved from yeast to human and conveys regulatory signals from DNA-binding transcription factors to RNA polymerase II. Not only are the polypeptides conserved but the structural organisation is also largely conserved. One or two subunits are either fungal or vertebral specific but Med11 is one of the subunits that is conserved from fungi to humans . Med11 appears to be necessary for the full and successful assembly of the core head sub-region .. +PF10281 Putative stress-responsive nuclear envelope protein
Pfam-B_11056 (release 21.0). This family of proteins found in fungi is a putative stress-responsive nuclear envelope protein Ish1 .. +PF10282 DUF2394; Muc_lac_enz;
Lactonase, 7-bladed beta-propeller. Pfam-B_1372 (release 21.0). This entry contains bacterial 6-phosphogluconolactonases (6PGL)YbhE-type (EC:3.1.1.31) which hydrolyse 6-phosphogluconolactone to 6-phosphogluconate. The entry also contains the fungal muconate lactonising enzyme carboxy-cis,cis-muconate cyclase (EC:5.5.1.5) and muconate cycloisomerase (EC:5.5.1.1), which convert cis,cis-muconates to muconolactones and vice versa as part of the microbial beta-ketoadipate pathway.\. Structures of proteins in this family have revealed a 7-bladed beta-propeller fold .. +PF10283 Zinc-finger (CX5CX6HX5H) motif
Pfam-B_93850 (release 21.0). This domain is a zinc-finger motif that in humans is part of the APLF, aprataxin- and PNK-like forkead association domain-containing protein. The ZnF is highly conserved both in primary sequence and in the spacing between the putative zinc coordinating residues and is configured CX5CX6HX5H. Many of the proteins containing the APLF-like ZnF are involved in DNA strand break repair and/or contain domains implicated in DNA metabolism.. +PF10284 Luciferase helical bundle domain
This domain is found associated with the the catalytic domain of dinoflagellate luciferase . Luciferase is involved in catalysing the light emitting reaction in bioluminescence. The structure of this domain has been solved . This domain has a three helix bundle structure that holds four important histidines that are thought to play a role in the pH regulation of the enzyme.. +PF10285 Luciferase catalytic domain
This domain is the catalytic domain of dinoflagellate luciferase . Luciferase is involved in catalysing the light emitting reaction in bioluminescence. The structure of this domain has been solved . The core part of the domain is a 10 stranded beta barrel that is structurally similar to lipocalins and FABP .. +PF10287 Putative TOS1-like glycosyl hydrolase (DUF2401)
Pfam-B_11571 (release 21.0). This family of proteins is conserved in fungi. One member is annotated putatively as OPEL, a house-keeping protein, but this could not be confirmed. It contains 5 highly conserved cysteines two of which form a characteristic CGC sequence motif. It has recently been shown that this family is related to known glycosyl hydrolases .. +PF10288 Protein of unknown function (DUF2392)
Pfam-B_10085 (release 21.0). This is a family of proteins conserved from plants to humans. The function is not known. It carries a characteristic GRG sequence motif.. +PF10290 Glycine-rich protein domain (DUF2403)
Pfam-B_11570 (release 21.0). This domain is found in the N-terminal region of members of DUF2401 Pfam:PF10287. The function of this glycine-rich region is unknown.. +PF10291 SAFF;
Muniscin C-terminal mu homology domain. Yang H, Pfam-B_7632 (release 21.0). The muniscins are a family of endocytic adaptors that is conserved from yeast to humans.This C-terminal domain is structurally similar to mu homology domains, and is the region of the muniscin proteins involved in the interactions with the endocytic adaptor-scaffold proteins Ede1-eps15. This interaction influences muniscin localisation. The muniscins provide a combined adaptor-membrane-tubulation activity that is important for regulating endocytosis.. +PF10292 Srab;
Serpentine type 7TM GPCR receptor class ab chemoreceptor. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srab is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' . The expression pattern of the srab genes is biologically intriguing. Of the six promoters successfully expressed in transgenic organisms, one was exclusively expressed in the tail phasmid neurons, two were exclusively expressed in a head amphid neuron, and two were expressed both in the head and tail neurons as well as a limited number of other cells .. +PF10293 Domain of unknown function (DUF2405)
Pfam-B_12420 (release 21.0). This is a conserved region of a family of proteins conserved in fungi. The function is unknown.. +PF10294 Putative methyltransferase
Pfam-B_19672 (Release 21.0). +PF10295 Uncharacterised protein (DUF2406)
Pfam-B_13850 (release 21.0). This is a family of small proteins conserved in fungi. The function is not known.. +PF10296 Putative integral membrane protein conserved region (DUF2404)
Pfam-B_12178 (release 21.0). This domain is conserved from plants to humans. The function is not known.. +PF10297 Minimal binding motif of Hap4 for binding to Hap2/3/5
In Saccharomyces cerevisiae, the haem-activated protein complex Hap2/3/4/5 plays a major role in the transcription of genes involved in respiration . Hap4_Hap_bind is the essential domain of Hap4 which allows it to associate with Hap2, Hap3 and Hap5 to form the Hap complex .. +PF10298 WhiA N-terminal LAGLIDADG-like domain
This domain is found at the N terminal of sporulation factor WhiA. This domain is related to the LAGLIDADG Homing endonuclease domain while the C terminal domain of WhiA is predicted to be a DNA binding helix-turn-helix domain .. +PF10300 Deme6; IML2;
Protein of unknown function (DUF3808). Pfam-B_15386 (release 21.0). This is a family of proteins conserved from fungi to humans. Members of this family also carry a TPR_2 domain Pfam:PF07719 at their C-terminus.. +PF10302 DUF2407 ubiquitin-like domain
Pfam-B_17915 (release 21.0). This is a family of proteins found in fungi. The function is not known. This domain is related to the ubiquitin domain.. +PF10303 Protein of unknown function (DUF2408)
Pfam-B_16841 (release 21.0). This is a family of proteins conserved in fungi. The function is unknown.. +PF10304 Domain of unknown function (DUF2411)
Pfam-B_15078 (release 21.0). This is a 38 residue domain that is found in proteins at the extreme C-terminal end of some HEAT repeats Pfam: PF02985. the function of this domain is not known.. +PF10305 RNA pol II promoter Fmp27 protein domain
Pfam-B_15444 (release 21.0). Fmp27_SW is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation . It contains characteristic SW and GKG sequence motifs.. +PF10306 Hypothetical protein FLILHELTA
Pfam-B_18082 (release 21.0). This is a family of conserved proteins found in fungi. It contains a characteristic FL(I)LHE(L)TA sequence motif, where the bracketed residues are I, L or V. The function is not known.. +PF10307 Hypothetical protein (DUF2410)
Pfam-B_19378 (release 21.0). This is a family of proteins conserved in fungi. The function is not known.There are two characteristic sequence motifs, GGWW and TGR.. +PF10309 Protein of unknown function (DUF2414)
Pfam-B_22455 (release 21.0). This is a family of proteins conserved from fungi to mammals. One mouse member is referred to as ELG protein but this is not a homologue of human ELG protein. The function is not known.. +PF10310 Protein of unknown function (DUF2413)
Pfam-B_20450 (release 21.0). This is a family of proteins conserved in fungi. The function is not known.. +PF10311 Increased loss of mitochondrial DNA protein 1
Pfam-B_22448 (release 21.0). This is a family of proteins of approximately 200 residues that are conserved in fungi. Ilm1 is part of the peroxisome, a complex that is the sole site of beta-oxidation in Saccharomyces cerevisiae and known to be required for optimal growth in the presence of fatty acid. Ilm1 may participate in the control of the C16/C18 ratio since it interacts strongly with Mga2p, a transcription factor that controls expression of Ole1, the sole fatty acyl desaturase in S. cerevisiae responsible for conversion of the saturated fatty acids stearate (C18) and palmitate (C16) to oleate and palmitoleate, respectively .. +PF10312 Conserved mid region of cactin
Pfam-B_20647 (release 21.0). This is the conserved middle region of a family of proteins referred to as cactins. The region contains two of three predicted coiled-coil domains. Most members of this family have a CactinC_cactus Pfam:PF09732 domain at the C-terminal end. Upstream of Mid_cactin in Drosophila members are a serine-rich region, some non-typical RD motifs and three predicted bipartite nuclear localisation signals, none of which are well-conserved. Cactin associates with IkappaB-cactus as one of the intracellular members of the Rel (NF-kappaB) pathway which is conserved in invertebrates and vertebrates. In mammals, this pathway controls the activities of the immune and inflammatory response genes as well as viral genes, and is critical for cell growth and survival. In Drosophila, the Rel pathway functions in the innate cellular and humoral immune response, in muscle development, and in the establishment of dorsal-ventral polarity in the early embryo .. +PF10313 Uncharacterised protein domain (DUF2415)
Pfam-B_25751 (release 21.0). This is a short, 30 residue domain, from a family of proteins conserved in fungi. The function is unknown. There is a characteristic DLL sequence motif.. +PF10315 Protein of unknown function (DUF2416)
Pfam-B_28778 (release 21.0). This is a family of conserved proteins found in fungi. The function is not known.. +PF10316 Srbc;
Serpentine type 7TM GPCR chemoreceptor Srbc . Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srbc is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10317 Srd;
Serpentine type 7TM GPCR chemoreceptor Srd. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srd is part of the larger Str superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10318 Srh;
Serpentine type 7TM GPCR chemoreceptor Srh. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srh is part of the Str superfamily of chemoreceptors . Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10319 Srj;
Serpentine type 7TM GPCR chemoreceptor Srj. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srj is part of the Str superfamily of chemoreceptors. The srj family is designated as the out-group based on its location in preliminary phylogenetic analyses of the entire superfamily . Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10320 Srsx;
Serpentine type 7TM GPCR chemoreceptor Srsx. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srsx is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10321 Srt;
Serpentine type 7TM GPCR chemoreceptor Srt. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srt is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10322 Sru;
Serpentine type 7TM GPCR chemoreceptor Sru. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Sru is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10323 Srv;
Serpentine type 7TM GPCR chemoreceptor Srv. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srv is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10324 Srw;
Serpentine type 7TM GPCR chemoreceptor Srw. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srw is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' . The genes encoding Srw do not appear to be under as strong an adaptive evolutionary pressure as those of Srz .. +PF10325 Srz;
Serpentine type 7TM GPCR chemoreceptor Srz. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srz is a solo families amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' . The genes encoding Srz appear to be under strong adaptive evolutionary pressure .. +PF10326 Str;
Serpentine type 7TM GPCR chemoreceptor Str. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Str is a member of the Str superfamily of chemoreceptors. Almost a quarter (22.5%) of str and srj family genes and pseudogenes in C. elegans appear to have been newly formed by gene duplications since the species split . Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10327 Serpentine_Sri;
Serpentine type 7TM GPCR chemoreceptor Sri. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Sri is part of the Str superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10328 Serpentine_Srx;
Serpentine type 7TM GPCR chemoreceptor Srx. Thomas JH, Robertson H. Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type . Srx is part of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' .. +PF10329 Region of unknown function (DUF2417)
Pfam-B_22799 (release 21.0). This is a region of a family of proteins conserved in fungi some of whose members also have the Abhydrolase_1, Pfam:PF00561, domain in their sequence. The function of this region is not known.. +PF10330 Putative Sin3 binding protein
Pfam-B_24989 (release 21.0). This is a family of the conserved N-terminal end of a group of proteins conserved in fungi. It is likely to be a Sin3 binding protein. Sin3p does not bind DNA directly even though the yeast SIN3 gene functions as a transcriptional repressor. Sin3p is part of a large multiprotein complex . Stb3 appears to bind directly to ribosomal RNA Processing Elements (RRPE) although there are no obvious domains which would accord with this, implying that Stb3 may be a novel RNA-binding protein .. +PF10332 Protein of unknown function (DUF2418)
Pfam-B_29723 (release 21.0). This is a conserved 100 residue central region of a family of proteins found in fungi. It carries a characteristic EYD sequence motif. The function is not known.. +PF10333 GPI-Mannosyltransferase II co-activator
Pfam-B_50403 (release 21.0). Pga1 is found only in yeasts and not in mammals. It localises in the ER as a glycosylated integral membrane protein. It binds to the GPI-mannosyltransferase II subunit of the GPI and it is responsible for the second mannose addition to GPI precursors. The GPI-anchoring complex is a glycolipid that functions as a membrane anchor for many cell-surface proteins .. +PF10334 Protein of unknown function (DUF2421)
Pfam-B_39020 (release 21.0). This is a family of proteins conserved in fungi. The function is not known.. +PF10335 Putative nucleotidyltransferase substrate binding domain
This domain is found associated with presumed nucleotidyltransferase domains and seems to be distantly related to other helical substrate binding domains.. +PF10336 Protein of unknown function (DUF2420)
Pfam-B_32350 (release 21.0). This is a family of proteins conserved in fungi. The function is not known.. +PF10337 Protein of unknown function (DUF2422)
Pfam-B_42729 (release 21.0). This is a family of proteins conserved in fungi. The function is not known. This family is the C-terminal half of some member proteins which contain the DUF2421 Pfam:PF10334 domain at their N-terminus.. +PF10338 Protein of unknown function (DUF2423)
Pfam-B_46946 (release 21.0). This is a family of proteins conserved in fungi. The function is not known.. +PF10339 Yeast-specific zinc responsive
Pfam-B_50673 (release 21.0). This is a small family of proteins from Saccharomyces and related species. The function is not known but member proteins are highly induced in zinc-depleted conditions [1,2] and have increased expression in NAP1-deletion mutants . The S. cerevisiae genes are named VEL by association with Velum formation in the wine making process http://www.ajevonline.org/content/48/1/55.abstract. +PF10340 Protein of unknown function (DUF2424)
Pfam-B_51256 (release 21.0). This is a family of proteins conserved in yeasts. The function is not known.. +PF10341 Est3;
Shelterin complex subunit, TPP1/ACD. TPP1 is a component of the telomerase holoenzyme, involved in telomere replication. It has been demonstrated that TPP1 dimerises and binds to DNA and RNA. Furthermore, TPP1 stimulates the dissociation of RNA/DNA hetero-duplexes [1,2]. Yeast telomerase protein TPP1 (Est3 in yeast) is a novel type of GTPase . The key residues in Swiss:Q03096 are an Asp at residue 86 and the Arg at residue 110. The Asp is totally conserved in the family, whereas the Arg is not so well conserved. The N-terminal of TPP1 is likely to be the binding surface for TINF2, whereas the C-terminus probably binds to POT1, thereby tethering POT1 to the shelterin complex . The complex bound to telomeric DNA increases the activity and processivity of the human telomerase core enzyme, thus helping to maintain the length of the telomeres [5,6]. This domain is conserved from fungi to mammals, hence family Telomere_Pot1 has been merged into the family . The human shelterin complex includes six proteins: telomere repeat binding factor 1 (TRF1), TRF2, repressor/activator protein 1 (RAP1), TRF1-interacting nuclear protein 2 (TIN2), TIN2-interacting protein 1 (TPP1) and protection of telomeres 1 (POT1) .. +PF10342 Drmip_Hesp; Drmip_MAPK;
Ser-Thr-rich glycosyl-phosphatidyl-inositol-anchored membrane family. Pfam-B_42324 (release 21.0). Some members of this family appear to be serine- threonine-rich membrane-anchored proteins, anchored by glycosyl-phosphatidylinositol. In A. fumigatus these proteins play a role in fungal cell wall organisation. In Lentinula edodes this family is involved in fruiting body formation, and may have a more general role in signalling in other organisms as it interacts with MAPK. The family is also found in archaea and bacteria.. +PF10343 Protein of unknown function (DUF2419)
Pfam-B_35257 (release 21.0). This is a family of conserved proteins found from plants to humans. The function is not known. A few members are annotated as being cobyrinic acid a,c-diamide synthetase but this could not be confirmed.. +PF10344 DUF2425;
Mitochondrial protein from FMP27. Pfam-B_54917 (release 21.0). This family contains mitochondrial FMP27 proteins which in yeasts together with SEN1 are long genes that exist in a looped conformation, effectively bringing together their promoter and terminator regions. Pol-II is located at both ends of FMP27 when this gene is transcribed from a GAL1 promoter under induced and non-induced conditions . The exact function of the Fmp27 protein is not certain.. +PF10345 Cohesin loading factor
Cohesin_load is a common cohesin loading factor protein that is conserved in fungi. It is associated with the cohesin complex and is required in G1 for cohesin binding to chromosomes but dispensable in G2 when cohesion has been established. It is referred to as both Ssl3, in pombe, and Scc4, in S.cerevisiae. It complexes with Mis4 .. +PF10346 Conidiation protein 6
Pfam-B_35316 (release 21.0). Con-6 is the conserved N-terminal region of a family of small proteins found in fungi . It is expressed at approximately 6 hours after the induction of development and is induced just prior to major constriction-chain growth .. +PF10347 RNA pol II promoter Fmp27 protein domain
Pfam-B_5282 (release 21.0). Fmp27_GFWDK is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation . It contains characteristic GFWDK sequence motifs. Some members are associated with domain Fmp27_SW (Pfam:PF10305) towards the N terminus.. +PF10348 Domain of unknown function (DUF2427)
Pfam-B_52268 (release 21.0). This is the N-terminal region of a family of proteins conserved in fungi. Several members are annotated as being Ftp1 but this could not be confirmed. The function is not known.. +PF10349 WW-domain ligand protein
Pfam-B_5077 (release 21.0). The WWbp domain is characterised by several short PY and PT-like motifs of the PPPPY form. These appear to bind directly to the WW domains of WWP1 and WWP2 and other such diverse proteins as dystrophin and YAP (Yes-associated protein). This is the WW-domain binding protein WWbp via PY and PY_like motifs. The presence of a phosphotyrosine residue in the pWBP-1 peptide abolishes WW domain binding which suggests a potential regulatory role for tyrosine phosphorylation in modulating WW domain-ligand interactions. Given the likelihood that WWP1 and WWP2 function as E3 ubiquitin-protein ligases, it is possible that initial substrate-specific recognition occurs via WW domain-substrate protein interaction followed by ubiquitin transfer and subsequent proteolysis . This domain lies just downstream of the GRAM (Pfam:PF02893) in many members.. +PF10350 Putative death-receptor fusion protein (DUF2428)
Pfam-B_6748 (release 21.0). This is a family of proteins conserved from plants to humans. The function is not known. Several members have been annotated as being HEAT repeat-containing proteins while others are designated as death-receptor interacting proteins, but neither of these could be confirmed.. +PF10351 Golgi-body localisation protein domain
Pfam-B_6317 (release 21.0). This is the C-terminus of a family of proteins conserved from plants to humans. The plant members are localised to the Golgi proteins and appear to regulate membrane trafficking, as they are required for rapid vesicle accumulation at the tip of the pollen tube . The C-terminus probably contains the Golgi localisation signal and it is well-conserved.. +PF10353 Protein of unknown function (DUF2430)
Pfam-B_67886 (release 21.0). This is a family of short, 111 residue, proteins found in S. pombe. The function is not known.. +PF10354 Domain of unknown function (DUF2431)
Pfam-B_6967 (release 21.0). This is the N-terminal domain of a family of proteins found from plants to humans. The function is not known.. +PF10355 Protein of unknown function (Ytp1)
Pfam-B_7247 (release 21.0). This is a family of proteins found in fungi. The region appears to contain regions similar to mitochondrial electron transport proteins. The C-terminal domain is hydrophobic and negatively charged. There are consensus sites for both N-linked glycosylation and cAMP-dependent protein kinase phosphorylation .. +PF10356 Protein of unknown function (DUF2034)
This protein is expressed in fungi but its function is unknown.. +PF10357 Domain of Kin17 curved DNA-binding protein
Pfam-B_7469 (release 21.0). Kin17_mid is the conserved central 169 residue region of a family of Kin17 proteins. Towards the N-terminal end there is a zinc-finger domain, and in human and mouse members there is a RecA-like domain further downstream. The Kin17 protein in humans forms intra-nuclear foci during cell proliferation and is re-distributed in the nucleoplasm during the cell cycle .. +PF10358 Eeig1;
N-terminal C2 in EEIG1 and EHBP1 proteins. Wood V, Coggill PC, Zhang D, Aravind L. Pfam-B_7857 (release 21.0). This version of the C2 domain was initally identified in the vertebrate estrogen early-induced gene 1 (EEIG1) , and its Drosophila ortholog required for uptake of dsRNA via the endocytotic machinery to induce RNAi silencing . It is also in C.elegans ortholog Sym-3 (SYnthetic lethal with Mec-3) and the mammalian protein EHBP1 (EH domain Binding Protein-1) that regulates endocytotic recycling and two plant proteins, RPG that regulates Rhizobium-directed polar growth and PMI1 (Plastid Movement Impaired 1) that is essential for intracellular movement of chloroplasts in response to blue light . . +PF10359 RNA pol II promoter Fmp27 protein domain
Pfam-B_8838 (release 21.0). Fmp27_WPPW is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation . It contains characteristic HQR and WPPW sequence motifs. and is towards the C-terminal in members which contain Fmp27_SW Pfam:PF10305.. +PF10360 Protein of unknown function (DUF2433)
Pfam-B_83000 (release 21.0). This is a conserved 120 residue region of a family of proteins found in fungi. The function is not known.. +PF10361 Protein of unknown function (DUF2434)
Pfam-B_84994 (release 21.0). This is a family of proteins conserved in fungi. The function is not known.. +PF10363 DUF2435; Pmp3;
Protein of unknown function (DUF2435). Pfam-B_7476 (release 21.0). This is a conserved region of approximately 400 residues which is found only in eukaryotes. It is associated with HEAT domains Pfam:PF02985 in all members. The function is not known.. +PF10364 Putative capsular polysaccharide synthesis protein
Pfam-B_99492 (release 21.0). Found only in Vibrio species, pombe and one other fungi, this is a the N-terminal 150 residues of a family of proteins of unknown function.\. There is a characteristic NKWYS sequence motif.. +PF10365 Domain of unknown function (DUF2436)
Pfam-B_5683 (Release 22.0). This domain is found on peptidase C25 proteins and has no known function.. +PF10366 Vacuolar sorting protein 39 domain 1
This domain is found on the vacuolar sorting protein Vps39 which is a component of the C-Vps complex . Vps39 is thought to be required for the fusion of endosomes and other types of transport intermediates with the vacuole . In Saccharomyces cerevisiae, Vps39 has been shown to stimulate nucleotide exchange . The precise function of this domain has not been characterised.. +PF10367 Vacuolar sorting protein 39 domain 2
This domain is found on the vacuolar sorting protein Vps39 which is a component of the C-Vps complex . Vps39 is thought to be required for the fusion of endosomes and other types of transport intermediates with the vacuole . In Saccharomyces cerevisiae, Vps39 has been shown to stimulate nucleotide exchange . This domain is involved in localisation and in mediating the interactions of Vps39 with Vps11 .. +PF10368 Putative cell-wall binding lipoprotein
YkyA is a family of proteins containing a lipoprotein signal and a hydrolase domain. It is similar to cell wall binding proteins and might also be recognisable by a host immune defence system. It is thus likely to belong to pathways important for pathogenicity .. +PF10369 Small subunit of acetolactate synthase
ALS_ss_C is the C-terminal half of a family of proteins which are the small subunits of acetolactate synthase. Acetolactate synthase is a tetrameric enzyme, containing probably two large and two small subunits, which catalyses the first step in branched-chain amino acid biosynthesis. This reaction is sensitive to certain herbicides .. +PF10370 Domain of unknown function (DUF2437)
This is the N-terminal 50 amino acids of a group of bacterial proteins annotated as fumarylacetoacetate hydrolase-containing enzymes. In most cases members are associated with FAA_hydrolase Pfam:PF01557 further towards the C-terminus.. +PF10371 Domain of unknown function
EKR is a short, 33 residue, domain found in bacterial and some lower eukaryotic species which lies between a POR (pyruvate ferredoxin/flavodoxin oxidoreductase) Pfam:PF01558 and the 4Fe-4S binding domain Fer4 Pfam:PF00037. It contains a characteristic EKR sequence motif. The exact function of this domain is not known.. +PF10372 Bacterial membrane-spanning protein N-terminus
YojJ is the N-terminus of a family of bacterial proteins some of which are associated with DUF147 Pfam:PF02457 towards the C-terminus. It is a putative membrane-spanning protein.. +PF10373 Est1 DNA/RNA binding domain
Pfam-B_24280 (release 22.0). Est1 is a protein which recruits or activates telomerase at the site of polymerisation . This is the DNA/RNA binding domain of EST1 .. +PF10374 Telomerase activating protein Est1
Pfam-B_39673 (release 22.0). Est1 is a protein which recruits or activates telomerase at the site of polymerisation .. +PF10375 GRIP-related Arf-binding domain
The GRAB (GRIP-related Arf-binding) domain is towards the C-terminus of Rud3 type proteins. This domain is related to the GRIP domain, but the conserved tyrosine residue found at position 4 in all GRIP domains is replaced by a leucine residue. The Arf small GTPase is localised to the cis-Golgi where it recruits proteins via their GRAB domain, as part of the transport of cargo from the endoplasmic reticulum to the plasma membrane .. +PF10376 Double-strand recombination repair protein
Mei5 is one of a pair of meiosis-specific proteins which facilitate the loading of Dmc1 on to Rad51 on DNA at double-strand breaks during recombination. Recombination is carried out by a large protein complex based around the two RecA homologues, Rad51 and Dmc1. This complex may play both a catalytic and a structural role in the interaction between homologous chromosomes during meiosis. Mei5 is seen to contain a coiled-coli region.. +PF10377 Autophagy-related protein 11
Pfam-B_21462 (release 21.0). The function of this family is conflicting. In the fission yeast, Schizosaccharomyces pombe, this protein has been shown to interact with the telomere cap complex [1,2]. However, in budding yeast, Saccharomyces cerevisiae, this protein is called ATG11 and is shown to be involved in autophagy .. +PF10378 RMM;
Putative RRM domain . Griffiths-Jones S, Coggill PC. This is a putative RRM, RNA-binding, domain found only in fungi. It occurs in proteins annotated as Nrd1 yeast proteins, which are known to carry RRM domains. It is not homologous with any of the other RRM domains, eg RRM_1 Pfam:PF00076.. +PF10379 Virulence protein nec1
Mistry J, Morningstar A. Pfam-B_11405 (release 21.0). This is a family of virulence proteins that are found in pathogenic Streptomyces species.. +PF10380 Transcription factor CRF1
Pfam-B_25525 (release 21.0). CRF1 is a transcription factor that co-represses ribosomal genes with FHL1 via the TOR signalling pathway and protein kinase A .. +PF10381 Autophagocytosis associated protein C-terminal
Pfam-B_10019 (release 7.3). Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the vacuole. The small C-terminal domain is likely to be a distinct binding region for the stability of the autophagosome complex . It carries a highly characteristic conserved FLKF sequence motif.. +PF10382 Protein of unknown function (DUF2439)
Pfam-B_19050 (release 22.0). Proteins in this family have been implicated in telomere maintenance in Saccharomyces cerevisiae and in meiotic chromosome segregation in Schizosaccharomyces pombe . +PF10383 Transcription-silencing protein Clr2
Clr2 is a chromatin silencing protein, one of a quartet of proteins forming the core of SHREC, a multienzyme effector complex that mediates hetero-chromatic transcriptional gene silencing in fission yeast. Clr2 does not have any obvious well-conserved domains but, along with the other core proteins, binds to the histone deacetylase Clr3, and on its own might also have a role in chromatin organisation at the cnt domain, the site of kinetochore assembly.. +PF10384 Centromere protein Scm3
Pfam-B_19394 (release 21.0). Scm3 is a centromere protein that has been shown in Saccharomyces cerevisiae to be required for G2/M progression and Cse4 localisation . The C terminal region of Scm3 proteins is variable in size and sometimes consists of DNA binding motifs .. +PF10385 RNA polymerase beta subunit external 1 domain
RNA polymerases catalyse the DNA-dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared with three in eukaryotes (not including mitochondrial or chloroplast polymerases). This domain in prokaryotes spans the gap between domains 4 and 5 of the yeast protein. It is also known as the external 1 region of the polymerase and is bound in association with the external 2 region .. +PF10386 Protein of unknown function (DUF2441)
This is a family of highly conserved, predicted, proteins from Bacillus species. The structure forms a homo-dimer. The function is unknown.. +PF10387 Protein of unknown function (DUF2442)
Gene3D, pdb_2auw & Pfam-B_2245 (release 23.0). This family of bacterial and fungal proteins has several members annotated as being putative molybdopterin-guanine dinucleotide biosynthesis protein A; however this could not be verified. Hence the function is not known. This family also includes the DUF3532 that was found to be related and was merged into this family. Members of this family also fall into the NE0471 N-terminal domain-like superfamily, a family of proteins with a unique fold in SCOP:143880.. +PF10388 EAL-domain associated signalling protein domain
In Bacillus species this highly conserved region of the YkuI protein lies immediately downstream of the EAL (diguanylate cyclase/phosphodiesterase domain 2) Pfam:PF00563 domain so that together they form a monomer which dimerises for its enzymatic action. The region contains three alpha helices and five beta strands and is the C-terminal half of the structure.. +PF10389 Bacteriophage coat protein B
CoatB is a single filamentous bacteriophage alpha helix of approximately 44 residues. It is likely to assemble into a complex of 35 monomers in a Catherine-wheel like formation . It is the major coat protein of the virion.. +PF10390 RNA polymerase II elongation factor ELL
ELL is a family of RNA polymerase II elongation factors. It is bound stably to elongation-associated factors 1 and 2, EAFs, and together these act as a strong regulator of transcription activity. by direct interaction with Pol II. ELL binds to pol II on its own but the affinity is greatly increased by the cooperation of EAF . Some members carry an Occludin domain Pfam:PF07303 just downstream. There is no S. cerevisiae member.. +PF10391 Fingers domain of DNA polymerase lambda
DNA polymerases catalyse the addition of dNMPs onto the 3-prime ends of DNA chains. There is a general polymerase fold consisting of three subdomains that have been likened to the fingers, palm, and thumb of a right hand. DNA_pol_lambd_f is the central three-helical region of DNA polymerase lambda referred to as the F and G helices of the fingers domain. Contacts with DNA involve this conserved helix-hairpin-helix motif in the fingers region which interacts with the primer strand. This motif is common to several DNA binding proteins and confers a sequence-independent interaction with the DNA backbone .. +PF10392 Golgi transport complex subunit 5
Pfam-B_24958 (release 21.0). The COG complex, the peripheral membrane oligomeric protein complex involved in intra-Golgi protein trafficking, consists of eight subunits arranged in two lobes bridged by Cog1. Cog5 is in the smaller, B lobe, bound in with Cog6-8, and is itself bound to Cog1 as well as, strongly, to Cog7.. +PF10393 Trimeric coiled-coil oligomerisation domain of matrilin
This short domain is a coiled coil structure and has a single cysteine residue at the start which is likely to form a di-sulfide bridge with a corresponding cysteine in an upstream EGF (Pfam:PF00008) domain thereby spanning a VWA (Pfam:PF00092) domain. All three domains can be associated together as in the cartilage matrix protein matrilin, where this domain is likely to be responsible for oligomerisation .. +PF10394 Histone acetyl transferase HAT1 N-terminus
This domain is the N-terminal half of the structure of histone acetyl transferase HAT1. It is often found in association with the C-terminal part of the GNAT Acetyltransf_1 (Pfam:PF00583) domain. It seems to be motifs C and D of the structure. Histone acetyltransferases (HATs) catalyse the transfer of an acetyl group from acetyl-CoA to the lysine E-amino groups on the N-terminal tails of histones. HATs are involved in transcription since histones tend to be hyper-acetylated in actively transcribed regions of chromatin, whereas in transcriptionally silent regions histones are hypo-acetylated .. +PF10395 Utp8 family
Pfam-B_24590 (release 22.0). Utp8 is an essential component of the nuclear tRNA export machinery in Saccharomyces cerevisiae. It is a tRNA binding protein that acts at a step between tRNA maturation /aminoacylation, and translocation of the tRNA across the nuclear pore complex .. +PF10396 GTP-binding protein TrmE N-terminus
This family represents the shorter, B, chain of the homo-dimeric structure which is a guanine nucleotide-binding protein that binds and hydrolyses GTP. TrmE is homologous to the tetrahydrofolate-binding domain of N,N-dimethylglycine oxidase and indeed binds formyl-tetrahydrofolate. TrmE actively participates in the formylation reaction of uridine and regulates the ensuing hydrogenation reaction of a Schiff's base intermediate. This B chain is the N-terminal portion of the protein consisting of five beta-strands and three alpha helices and is necessary for mediating dimer formation within the protein .. +PF10397 Adenylosuccinate lyase C-terminus
This is the C-terminal seven alpha helices of the structure whose full length represents the enzyme adenylosuccinate lyase. This sequence lies C-terminal to the conserved motif necessary for beta-elimination reactions , Adenylosuccinate lyase catalyses two steps in the synthesis of purine nucleotides: the conversion of succinylaminoimidazole-carboxamide ribotide into aminoimidazole-carboxamide ribotide, the eighth step of the de novo pathway, and the formation of adenosine monophosphate (AMP) from adenylosuccinate, the second step in the conversion of inosine monophosphate into AMP .. +PF10398 Protein of unknown function (DUF2443)
This is a small family of highly conserved proteins from bacteria, in particular Helicobacter species, The structure is a bundle of alpha helices. The function is not known.. +PF10399 Ubiquitinol-cytochrome C reductase Fe-S subunit TAT signal
This is the N-terminal region of the E or R chain, Ubiquitinol-cytochrome C reductase Fe-S subunit, of the hetero-hexameric cytochrome bc1 complex. This region is a TAT-signal region. The cytochrome bc1 complex is an oligomeric membrane protein complex that is a component of respiratory and photosynthetic electron transfer chains.\. The enzyme couples the transfer of electrons from ubiquinol to cytochrome c with the the generation of a protein gradient across the membrane . The motif is also associated with Rieske (Pfam:PF00355), UCR_TM (Pfam:PF02921) and Ubiq-Cytc-red_N (Pfam:PF09165).. +PF10400 Virulence activator alpha C-term
This structure is homo-dimeric, and the domain here is the C-terminal half of the structure, often associated with PadR upstream, (Pfam:PF03551), which is a transcriptional regulator.. +PF10401 Interferon-regulatory factor 3
This is the interferon-regulatory factor 3 chain of the hetero-dimeric structure which also contains the shorter chain CREB-binding protein. These two subunits make up the DRAF1 (double-stranded RNA-activated factor 1).\. Viral dsRNA produced during viral transcription or replication leads to the activation of DRAF1. The DNA-binding specificity of DRAF1 correlates with transcriptional induction of ISG (interferon-alpha,beta-stimulated gene). IRF-3 preexists in the cytoplasm of uninfected cells and translocates to the nucleus following viral infection. Translocation of IRF-3 is accompanied by an increase in serine and threonine phosphorylation, and association with the CREB coactivator occurs only after infection.. +PF10403 Rad4 beta-hairpin domain 1
This short domain is found in the Rad4 protein. This domain binds to DNA .. +PF10404 Rad4 beta-hairpin domain 2
This short domain is found in the Rad4 protein. This domain binds to DNA .. +PF10405 Rad4 beta-hairpin domain 3
This short domain is found in the Rad4 protein. This domain binds to DNA .. +PF10406 Transcription factor TFIID complex subunit 8 C-term
This is the C-terminal, Delta, part of the TAF8 protein . The N-terminal is generally the histone fold domain, Bromo_TP (Pfam:PF07524). TAF8 is one of the key subunits of the transcription factor for pol II, TFIID. TAF8 is one of the several general cofactors which are typically involved in gene activation to bring about the communication between gene-specific transcription factors and components of the general transcription machinery .. +PF10407 Cdc14 phosphatase binding protein N-terminus
Wood V, Pfam-B_23062 (release 22.0). Cytokinesis in yeasts involves a family of proteins whose essential function is to bind Cdc14-family phosphatase and prevent this from being sequestered and inhibited in the nucleolus. This is the highly conserved N-terminus of a family of proteins which act as cytokinesis checkpoint controls by allowing cells to cope with cytokinesis defects. These proteins are required for rDNA silencing and mini-chromosome maintenance .. +PF10408 Ubiquitin elongating factor core
Wood V, Pfam-B_4085 (release 22.0). This is the most conserved part of the core region of Ufd2P ubiquitin elongating factor or E4, running from helix alpha-11 to alpha-38. It consists of 31 helices of variable length connected by loops of variable size forming a compact unit; the helical packing pattern of the compact unit consists of five structural repeats that resemble tandem Armadillo (ARM) repeats. This domain is involved in ubiquitination as it binds Cdc48p and escorts ubiquitinated proteins from Cdc48p to the proteasome for degradation. The core is structurally similar to the nuclear transporter protein importin-alpha. The core is associated with the U-box at the C-terminus, Pfam:PF04564, which has ligase activity.. +PF10409 C2 domain of PTEN tumour-suppressor protein
This is the C2 domain-like domain, in greek key form, of the PTEN protein, phosphatidyl-inositol triphosphate phosphatase, and it is the C-terminus. This domain may well include a CBR3 loop which means it plays a central role in membrane binding. This domain associates across an extensive interface with the N-terminal phosphatase domain DSPc (Pfam:PF00782) suggesting that the C2 domain productively positions the catalytic part of the protein onto the membrane .. +PF10410 DnaB-helicase binding domain of primase
This domain is the C-terminal region three-helical domain of primase . Primases synthesise short RNA strands on single-stranded DNA templates, thereby generating the hybrid duplexes required for the initiation of synthesis by DNA polymerases. Primases are recruited to single-stranded DNA by helicases, and this domain is the region of the primase which binds DnaB-helicase . It is associated with the Toprim domain (Pfam:PF01751) which is the central catalytic core.. +PF10411 Disulfide bond isomerase protein N-terminus
This is the N-terminal domain of the disulfide bond isomerase DsbC. The whole molecule is V-shaped, where each arm is a DsbC monomer of two domains linked by a hinge; and the N-termini of each monomer join to form the dimer interface at the base of the V, so are vital for dimerisation . DsbC is required for disulfide bond formation and functions as a disulfide bond isomerase during oxidative protein-folding in bacterial periplasm. It also has chaperone activity .. +PF10412 Type IV secretion-system coupling protein DNA-binding domain
The plasmid conjugative coupling protein TrwB forms hexamers from six structurally very similar protomers . This hexamer contains a central channel running from the cytosolic pole (made up by the AADs) to the membrane pole ending at the transmembrane pore shaped by 12 transmembrane helices, rendering an overall mushroom-like structure. The TrwB_AAD (all-alpha domain) domain appears to be the DNA-binding domain of the structure. TrwB, a basic integral inner-membrane nucleoside-triphosphate-binding protein, is the structural prototype for the type IV secretion system coupling proteins, a family of proteins essential for macromolecular transport between cells and export .. +PF10413 Amino terminal of the G-protein receptor rhodopsin
Rhodopsin is the archetypal G-protein-coupled receptor. Such receptors participate in virtually all physiological processes, as signalling molecules. They utilise heterotrimeric guanosine triphosphate (GTP)-binding proteins to transduce extracellular signals to intracellular events. Rhodopsin is important because of the pivotal role it plays in visual signal transduction. Rhodopsin is a dimeric transmembrane protein and its intradiskal surface consists of this amino terminal domain and three loops connecting six of the seven transmembrane helices. The N-terminus is a compact domain of alpha-helical regions with breaks and bends at proline residues outside the membrane . The transmembrane part of rhodopsin is represented by 7tm_1 (Pfam:PF00001). The N-terminal domain is extracellular is and is necessary for successful dimerisation and molecular stability .. +PF10414 Sirohaem synthase dimerisation region
Bacterial sulfur metabolism depends on the iron-containing porphinoid sirohaem. CysG, S-adenosyl-L-methionine (SAM)-dependent bis-methyltransferase, dehydrogenase and ferrochelatase, synthesises sirohaem from uroporphyrinogen III via reactions which encompass two branchpoint intermediates in tetrapyrrole biosynthesis, diverting flux first from protoporphyrin IX biosynthesis and then from cobalamin (vitamin B12) biosynthesis. CysG is a dimer of two structurally similar protomers held together asymmetrically through a number of salt-bridges across complementary residues in the CysG_dimeriser region to produce a series of active sites, accounting for CysG's multifunctionality, catalysing four diverse reactions: two SAM-dependent methylations, NAD+-dependent tetrapyrrole dehydrogenation and metal chelation. The CysG_dimeriser region holding the two protomers together is of 74 residues .. +PF10415 Fumarase C C-terminus
Fumarase C catalyses the stereo-specific interconversion of fumarate to L-malate as part of the Kreb's cycle. The full-length protein forms a tetramer with visible globular shape. FumaraseC_C is the C-terminal 65 residues referred to as domain 3. The core of the molecule consists of a bundle of 20 alpha-helices from the five-helix bundle of domain 2. The projections from the core of the tetramer are generated from domains 1 and 3 of each subunit . FumaraseC_C does not appear to be part of either the active site or the activation site but is helical in structure forming a little bundle.. +PF10416 Transcription-initiator DNA-binding domain IBD
In Trichomonas vaginalis, thought to be the earliest extant eukaryote, the sole initiator element for control of the start of transcription is Inr, and this is recognised by the initiator binding protein IBP39. IBP39 contains an N-terminal Inr binding domain, IBD, connected via a flexible, proteolytically sensitive, linker (residues 127-145) to a C-terminal domain. The IBD structure reveals a winged-helix-wing conformation with each element binding to DNA, the central helix-turn-helix contributing the majority of the specificity-determining contacts with the Inr core motif TCAPy(T/A). The binding of IBP39 to the Inr directly recruits RNA polymerase II and in this way initiates transcription .. +PF10417 C-terminal domain of 1-Cys peroxiredoxin
This is the C-terminal domain of 1-Cys peroxiredoxin (1-cysPrx), a member of the peroxiredoxin superfamily which protect cells against membrane oxidation through glutathione (GSH)-dependent reduction of phospholipid hydroperoxides to corresponding alcohols . The C-terminal domain is crucial for providing the extra cysteine necessary for dimerisation of the whole molecule. Loss of the enzyme's peroxidase activity is associated with oxidation of the catalytic cysteine, upstream of this domain; and glutathionylation, presumably through its disruption of protein structure, facilitates access for GSH, resulting in spontaneous reduction of the mixed disulfide to the sulfhydryl and consequent activation of the enzyme . The domain is associated with family AhpC-TSA, Pfam:PF00578, which carries the catalytic cysteine.. +PF10418 Iron-sulfur cluster binding domain of dihydroorotate dehydrogenase B
Lactococcus lactis is one of the few organisms with two dihydroorotate dehydrogenases, DHODs, A and B . The B enzyme is a prototype for DHODs in Gram-positive bacteria that use NAD+ as the second substrate. DHODB is a hetero-tetramer composed of a central homodimer of PyrDB subunits resembling the DHODA structure and two PyrK subunits along with three different cofactors: FMN, FAD, and a [2Fe-2S] cluster. The [2Fe-2S] iron-sulfur cluster binds to this C-terminal domain of the PyrK subunit, which is at the interface between the flavin and NAD binding domains and contains three beta-strands. The four cysteine residues at the N-terminal part of this domain are the ones that bind, in pairs, to the iron-sulfur cluster. The conformation of the whole molecule means that the iron-sulfur cluster is localised in a well-ordered part of this domain close to the FAD binding site . The FAD and and NAD binding domains are FAD_binding_6, Pfam:PF00970 and NAD_binding_1, Pfam:PF00175.. +PF10419 TFIIIC_subunit;
Pfam-B_14433 (release 21.0). This is a family of proteins subunits of TFIIIC . TFIIIC in yeast and humans is required for transcription of tRNA and 5 S RNA genes by RNA polymerase III. Yeast members of this family are fused to phosphoglycerate mutase domain.. +PF10420 Cytokine interleukin-12p40 C-terminus
IL12p40_C is the largely beta stranded C-terminal, D3, domain of interleukin-12p40 or interleukin-12B. This interleukin is produced on stimulation by macrophage-engulfed micro-organisms and other stimuli, when it dimerises with interleukin-12p35 to form a heterodimer which then binds to receptors on natural killer cells to activate them to destroy the micro-organisms . This domain contains two disulfide bridges, one of which serves to bind p40 to p35 and the other to hold the beta strands within the domain together. The cupped shape of the p35 binding interface matches the elbow-like bend between D2 and D3 in p40 . The domain is often associated with family fn3, Pfam:PF00041.. +PF10421 2'-5'-oligoadenylate synthetase 1, domain 2, C-terminus
This is the largely alpha-helical, C-terminal half of 2'-5'-oligoadenylate synthetase 1, being described as domain 2 of the enzyme and homologous to a tandem ubiquitin repeat. It carries the region of enzymic activity between 320 and 344 at the extreme C-terminal end . Oligoadenylate synthetases are antiviral enzymes that counteract vial attack by degrading viral RNA. The enzyme uses ATP in 2'-specific nucleotidyl transfer reactions to synthesise 2'.5'-oligoadenylates, which activate latent ribonuclease, resulting in degradation of viral RNA and inhibition of virus replication . This domain is often associated with NTP_transf_2 Pfam:PF01909.. +PF10422 Monopolin complex subunit LRS4
Pfam-B_63451 (release 22.0). Monopolin is a protein complex, originally identified in Saccharomyces cerevisiae, that is required for the segregation of homologous centromeres to opposite poles of a dividing cell during meiosis I . The orthologous complex in Schizosaccharomyces pombe is not required for meiosis I chromosome segregation, but is proposed to play a similar physiological role in clamping microtubule binding sites . In S .cerevisiae this subunit is called LRS4, and in S. pombe it is known as Mde4.. +PF10423 Bacterial AMP nucleoside phosphorylase N-terminus
This is the N-terminal domain of bacterial AMP nucleoside phosphorylase (AMNp). The N- and C-termini form distinct domains which intertwine with each other to form a stable monomer which associates with five other monomers to yield the active hexamer. The N-terminus consists of a long helix and a four-stranded sheet with a novel topology. The C-terminus binds the nucleoside whereas the N-terminus acts as the enzymatic regulatory domain. AMNp (EC:3.2.2.4) catalyses the hydrolysis of AMP to form adenine and ribose 5-phosphate. thereby regulating intracellular AMP levels .. +PF10425 C-terminus of bacterial fibrinogen-binding adhesin
This is the C-terminal half of a bacterial fibrinogen-binding adhesin SdrG. SdrG is a Gram-positive cell-wall-anchored adhesin that allows attachment of the bacterium to host tissues via specific binding to the beta-chain of human fibrinogen (Fg). SdrG binds to its ligand with a dynamic "dock, lock, and latch" mechanism which represents a general mode of ligand-binding for structurally related cell wall-anchored proteins in most Gram-positive bacteria. The C-terminal part of SdrG(276-596) is integral to the folding of the immunoglobulin-like whole to create the docking grooves necessary for Fg binding. The domain is associated with families of Cna_B, Pfam:PF05738 .. +PF10426 Recombination-activating protein 1 zinc-finger domain
This is a C2-H2 zinc-finger domain closely resembling the classical TFIIIA-type zinc-finger, CX3FX5LX2-3H, despite having a valine and a tyrosine at the core instead of a phenylalanine and a leucine, hence CX3VX1LX2YX2H. The structure, nevertheless, contains the characteristic two-stranded beta-sheet and alpha-helix of a classical zinc-finger. The domain binds one zinc and, in complex with the zinc-RING-finger domain, helps to stabilise the whole of the dimerisation region of recombination activating protein 1 (RAG1) . The function of the whole is to bind double-stranded DNA.. +PF10427 Argonaute hook
This region has been called the argonaute hook . It has been shown to bind to the Piwi domain Pfam:PF02171 of Argnonaute proteins.. +PF10428 RAM signalling pathway protein
Pfam-B_35594 (release 22.0). SOG2 proteins in Saccharomyces cerevisiae are involved in cell separation and cytokinesis .. +PF10429 Nuclear pore RNA shuttling protein Mtr2
Mtr2 is a monomeric, dual-action, RNA-shuttle protein found in yeasts. Transport across the nuclear-cytoplasmic membrane is via the macro-molecular membrane-spanning nuclear pore complex, NPC. The pore is lined by a subset of NPC members called nucleoporins that present FG (Phe-Gly) receptors, characteristically GLFG and FXFG motifs, for shuttling RNAs and proteins. RNA cargo is bound to soluble transport proteins (nuclear export factors) such as Mex67 in yeasts, and TAP in metazoa, which pass along the pore by binding to successive FG receptors. Mtr2 when bound to Mex67 maximises this FG-binding. Mtr2 also acts independently of Mex67 in transporting the large ribosomal RNA subunit through the pore .. +PF10430 Tie-2 Ig-like domain 1
+PF10431 C-terminal, D2-small domain, of ClpB protein
This is the C-terminal domain of ClpB protein, referred to as the D2-small domain, and is a mixed alpha-beta structure. Compared with the D1-small domain (included in AAA, Pfam:PF00004) it lacks the long coiled-coil insertion, and instead of helix C4 contains a beta-strand (e3) that is part of a three stranded beta-pleated sheet. In Thermophilus the whole protein forms a hexamer with the D1-small and D2-small domains located on the outside of the hexamer, with the long coiled-coil being exposed on the surface. The D2-small domain is essential for oligomerisation, forming a tight interface with the D2-large domain of a neighbouring subunit and thereby providing enough binding energy to stabilise the functional assembly . The domain is associated with two Clp_N, Pfam:PF02861, at the N-terminus as well as AAA, Pfam:PF00004 and AAA_2, Pfam:PF07724.. +PF10432 Bacterial phospho-glucose isomerase C-terminal region
This is the C-terminal half of a bacterial phospho-glucose isomerase EC:5.3.1.9 protein which is similar to eukaryote homologues to the extent that the sequence includes the cluster of threonines and serines that forms the sugar phosphate-binding site in conventional PGI. This domain contributes a good proportion of the active catalytic site residues. This PGI uses the same catalytic mechanisms for both glucose ring-opening and isomerisation for the interconversion of glucose 6-phosphate to fructose 6-phosphate . It is associated with family SIS, Pfam:PF01380.. +PF10433 MMS1;
Mono-functional DNA-alkylating methyl methanesulfonate N-term. Pfam-B_64607 (release 22.0). MMS1 is a protein that protects against replication-dependent DNA damage in Saccharomyces cerevisiae . MMS1 belongs to the DDB1 family of cullin 4 adaptors and the two proteins are homologous. MMS1 bridges the interaction of MMS22 and Crt10 with Cul8/Rtt101 . Cul8/Rtt101 is a cullin protein involved in the regulation of DNA replication subsequent to DNA damage. The N-terminal region of MMS1 and the C-terminal of MMS22 are required for the the MMS1-MMS22 interaction . The human HIV-1 virion-associated protein Vpr assembles with DDB1 through interaction with DCAF1 (chromatin assembly factor) to form an E3 ubiquitin ligase that targets cellular substrates for proteasome-mediated degradation and subsequent G2 arrest .. +PF10434 Monopolin complex protein MAM1
Pfam-B_58835 (release 22.0). Monopolin is a protein complex, originally identified in Saccharomyces cerevisiae, that is required for the segregation of homologous centromeres to opposite poles of a dividing cell during meiosis I . MAM1 is required in S. cerevisiae for monopolar attachment .. +PF10435 Beta-galactosidase, domain 2
This is the second domain of the five-domain beta-galactosidase enzyme that altogether catalyses the hydrolysis of beta(1-3) and beta(1-4) galactosyl bonds in oligosaccharides as well as the inverse reaction of enzymatic condensation and trans-glycosylation. This domain is made up of 16 antiparallel beta-strands and an alpha-helix at its C terminus. The fold of this domain appears to be unique. In addition, the last seven strands of the domain form a subdomain with an immunoglobulin-like (I-type Ig) fold in which the first strand is divided between the two beta-sheets. In penicillin spp this strand is interrupted by a 12-residue insertion which forms an additional edge-strand to the second beta-sheet of the sub-domain. The remainder of the second domain forms a series of beta-hairpins at its N terminus, four strands of which are contiguous with part of the Ig-like sub-domain, forming in total a seven-stranded antiparallel beta-sheet. This domain is associated with family Glyco_hydro_35, Pfam:PF01301, which is N-terminal to it, but itself has no metazoan members.. +PF10436 Mitochondrial branched-chain alpha-ketoacid dehydrogenase kinase
Catabolism and synthesis of leucine, isoleucine and valine are finely balanced, allowing the body to make the most of dietary input but removing excesses to prevent toxic build-up of their corresponding keto-acids. This is the butyryl-CoA dehydrogenase, subunit A domain 3, a largely alpha-helical bundle of the enzyme BCDHK. This enzyme is the regulator of the dehydrogenase complex that breaks branched-chain amino-acids down, by phosphorylating and thereby inactivating it when synthesis is required. The domain is associated with family HATPase_c Pfam:PF02518 which is towards the C-terminal.. +PF10437 Bacterial lipoate protein ligase C-terminus
This is the C-terminal domain of a bacterial lipoate protein ligase. There is no conservation between this C-terminus and that of vertebrate lipoate protein ligase C-termini, but both are associated with the domain BPL_LipA_LipB Pfam:PF03099, further upstream. This domain is required for adenylation of lipoic acid by lipoate protein ligases. The domain is not required for transfer of lipoic acid from the adenylate to the lipoyl domain. Upon adenylation, this domain rotates 180 degrees away from the active site cleft. Therefore, the domain does not interact with the lipoyl domain during transfer.. +PF10438 Cyclo-malto-dextrinase C-terminal domain
This domain is at the very C-terminus of cyclo-malto-dextrinase proteins and consists of 8 beta strands, is largely globular and appears to help stabilise the acitve sites created by upstream domains, Cyc-maltodext_N Pfam:PF09087, and Alpha-amylase Pfam:PF00128. Cyclo-malto-dextrinases hydrolyse cyclodextrans to maltose and glucose and catalyse trans-glycosylation of oligosaccharides to the C3-, C4- or C6-hydroxyl groups of various acceptor sugar molecules.. +PF10439 Bacteriocin class II with double-glycine leader peptide
This is a family of bacteriocidal bacteriocins secreted by Streptococcal species in order to kill off closely-related competitor Gram-positives. The sequence includes the peptide precursor, this being cleaved off proteolytically at the double-glycine. The family does not carry the YGNGVXC motif characteristic of pediocin-like Bacteriocins, Bacteriocin_II Pfam:PF01721. The producer bacteria are protected from the effects of their own bacteriocins by production of a specific immunity protein which is co-transcribed with the genes encoding the bacteriocins, eg family EntA_Immun Pfam:PF08951. The bacteriocins are structurally more specific than their immunity-protein counterparts. Typically, production of the bacteriocin gene is from within an operon carrying up to 6 genes including a typical two-component regulatory system (R and H), a small peptide pheromone (C), and a dedicated ABC transporter (A and -B) as well as an immunity protein . The ABC transporter is thought to recognise the N termini of both the pheromone and the bacteriocins and to transport these peptides across the cytoplasmic membrane, concurrent with cleavage at the conserved double-glycine motif. Cleaved extracellular C can then bind to the sensor kinase, H, resulting in activation of R and up-regulation of the entire gene cluster via binding to consensus sequences within each promoter . It seems likely that this whole regulon is carried on a transmissible plasmid which is passed between closely related Firmicute species since many clinical isolates from different Firmicutes can produce at least two bacteriocins. and the same bacteriocins can be produced by different species.. +PF10440 Ubiquitin-binding WIYLD domain
This presumed domain has been predicted to contain three alpha helices. The domain was named the WIYLD domain based on the pattern of most conserved residues . It binds ubiquitin. In the Arabidopsis thaliana histone-lysine N-methyltransferase SUVR4, Swiss:Q8W595, binding of ubiquitin to this domain stimulates enzymatic activity and converts its activity from a strict dimethylase to a di/trimethylase .. +PF10441 Urb2/Npa2 family
Pfam-B_28626 (Release 22.0). This family includes the Urb2 protein from yeast that are involved in ribosome biogenesis .. +PF10442 FIST C domain
The FIST C domain is a novel sensory domain, which is present in signal transduction proteins from Bacteria, Archaea and Eukarya. Chromosomal proximity of FIST-encoding genes to those coding for proteins involved in amino acid metabolism and transport suggest that FIST domains bind small ligands, such as amino acids .. +PF10443 RNA12 protein
Pfam-B_18000 (Release 22.0). This family includes RNA12 from S. cerevisiae. That protein contains an RRM domain. This region is C-terminal to that and includes a P-loop motif suggesting this region binds to NTP. The RNA12 proteins is involved in pre-rRNA maturation .. +PF10444 DUF2455;
Nbl1 / Borealin N terminal. Mistry J, Hartsuiker E, Wood V. Nbl1 is a subunit of the conserved CPC, the chromosomal passenger complex, which regulates mitotic chromosome segregation. In Fungi and Animalia, this complex consists of the kinase Aurora B/AIR-2/Ipl1p, INCENP/ICP-1/Sli15p, and Survivin/BIR-1/Bir1p. In Animalia, a fourth subunit (Borealin/Dasra/CSC-1) is required for targeting CPC to centromeres and central spindles. Nbl1 has been shown in budding yeast to be essential for viability, and for CPC localisation, stability, integrity, and function . The N terminus of Borealin is homologous to Nbl1 . This family contains both Nbl1, and the N terminal region of Borealin.. +PF10445 Protein of unknown function (DUF2456)
Hartsuiker E, Wood V, Mistry J. Pfam-B_97171 (release 22.0). This is a family of uncharacterised proteins.. +PF10446 Protein of unknown function (DUF2457)
Hartsuiker E, Wood V, Mistry J. This is a family of uncharacterised proteins.. +PF10447 Exosome component EXOSC1/CSL4
Pfam-B_6887 (release 22.0). This family of proteins are components of the exosome 3'->5' exoribonuclease complex. The exosome mediates degradation of unstable mRNAs that contain AU-rich elements (AREs) within their 3' untranslated regions .. +PF10448 POC1;
20S proteasome chaperone. Pfam-B_75798 (release 22.0). This family contains chaperones of the 20S proteasome which function in early 20S proteasome assembly. The structures of two of the proteins in this family (DMP1 and DMP2) have been solved, and they closely resemble that of the mammalian proteasome assembling chaperone PAC3, although there is little sequence similarity between them .. +PF10450 POC4;
In yeast, POC1 is a chaperone of the 20S proteasome which functions in early 20S proteasome assembly.. +PF10451 Telomere regulation protein Stn1
Pfam-B_51291 (release 21.0). The budding yeast protein Stn1 is a DNA-binding protein which has specificity for telomeric DNA. Structural profiling has predicted an OB-fold . This domain is the N-terminal part of the molecule, which adopts the OB fold. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution .. +PF10452 TORC1 subunit TCO89
Pfam-B_61649 (release 22.0). TC089 is a component of the TORC1 complex. TORC1 is responsible for a wide range of rapamycin-sensitive cellular activities.. +PF10453 Nuclear fragile X mental retardation-interacting protein 1 (NUFIP1)
Proteins in this family have been implicated in the assembly of the large subunit of the ribosome and in telomere maintenance . Some proteins in this family contain a CCCH zinc finger. This family contains a protein called human fragile X mental retardation-interacting protein 1, which is known to bind RNA and is phosphorylated upon DNA damage .. +PF10454 Protein of unknown function (DUF2458)
Hartsuiker H, Wood V, Mistry J. This a is family of uncharacterised proteins.. +PF10455 Bin/amphiphysin/Rvs domain for vesicular trafficking
Pfam-B_12557 (release 22.0). This Pfam entry includes proteins that are not matched by Pfam:PF03114.. +PF10456 WASP-binding domain of Sorting nexin protein
Pfam-B_43522 (release 20.0). The C-terminal region of the Sorting nexin group of proteins appears to carry a BAR-like (Bin/amphiphysin/Rvs) domain. This domain is very diverse and the similarities with other BAR domains are few. In the Sorting nexins it is associated with family PX, Pfam:PF00787.13, and in combination with PX appears to be necessary to bind WASP along with p85 to form a multimeric signalling complex .. +PF10457 Cholesterol-capturing domain
Pfam-B_16187 (release 22.0). Human meta-static lymph node (MLN) 64 is a late endosomal membrane protein, and carries this MENTAL (MLN64N-terminal) domain at its N-terminus. The domain is composed of four trans-membrane helices with three short intervening loops . The function of the domain is to capture cholesterol and pass it to the associated START domain Pfam:PF01852 for transfer to a cytosolic acceptor protein or membrane. In mammals, the MENTAL domain is involved in the localisation of MLN64 and MENTHO in late endosomes, and also in homo-and of hetero-interactions of these two proteins .. +PF10458 Valyl tRNA synthetase tRNA binding arm
This domain is found at the C-terminus of Valyl tRNA synthetases.. +PF10459 Peptidase S46
Rawlings N, Mistry J. Dipeptidyl-peptidase 7 (DPP-7) is the best characterised member of this family. It is a serine peptidase that is located on the cell surface and is predicted to have two N-terminal transmembrane domains.. +PF10460 Peptidase M30
Rawlings N, Mistry J. This family contains the metallopeptidase hyicolysin. Hyicolysin has a zinc ion which is liganded by two histidine and one glutamate residue.. +PF10461 Peptidase S68
Rawlings N, Mistry J. This family of serine peptidases contains PIDD proteins. PIDD forms a complex with RAIDD and procaspase-2 that is known as the 'PIDDosome'. The PIDDosome forms when DNA damage occurs and either activates NF-kappaB, leading to cell survival, or caspase-2, which leads to apoptosis.. +PF10462 Peptidase M66
Rawlings N, Mistry J. This family of metallopeptidases contains StcE, a virulence factor found in Shiga toxigenic Escherichia coli organisms. StcE peptidase cleaves C1 esterase inhibitor .. +PF10463 Peptidase U49
Rawlings N, Mistry J. This family contains Lit peptidase from Escherichia coli. Lit protease functions in bacterial cell death in response to infection by bacteriophage T4. Following binding of Gol peptide to domains II and III of elongation factor Tu, the Lit peptidase cleaves domain I of the elongation factor. This prevents binding of guanine nucleotides, shuts down translation and leads to cell death.. +PF10464 Peptidase U40
Rawlings N, Mistry J. This family contains P5 murein endopeptidase from bacteriophage phi-6. P5 murein endopeptidase has lytic activity against several gram-negative bacteria. It is thought that the enzyme cleaves the cell wall peptide bridge formed by meso-2,6-diaminopimelic acid and D-Ala. +PF10465 Peptidase_I24;
PinA peptidase inhibitor . Rawlings N, Mistry J. PinA inhibits the endopeptidase La. It binds to the La homotetramer but does not interfere with the ATP binding site or the active site of La.. +PF10466 Saccharopepsin inhibitor I34
The saccharopepsin inhibitor is highly specific for the aspartic peptidase saccharopepsin.\. It is largely unstructured in the absence of saccharopepsin , but in the presence, the inhibitor undergoes a conformation change forming an almost perfect alpha-helix from Asn2 to Met32 in the active site cleft of the peptidase.. +PF10467 Peptidase inhibitor clitocypin
Rawlings N, Mistry J. Clitocypin binds and inhibits cysteine proteinases. It has no similarity to any other known cysteine proteinase inhibitors but bears some similarity to a lectin-like family of proteins from mushrooms .. +PF10468 Carboxypeptidase inhibitor I68
Rawlings N, Mistry J. This is a family of tick carboxypetidase inhibitors.. +PF10469 AKAP7 2'5' RNA ligase-like domain
AKAP7_NLS is the N-terminal domain of the cyclic AMP-dependent protein kinase A, PKA, anchor protein AKAP7. This protein anchors PKA for its role in regulating PKA-mediated gene transcription in both somatic cells and oocytes . AKAP7_NLS carries the nuclear localisation signal (NLS) KKRKK, that indicates the cellular destiny of this anchor protein . Binding to the regulatory subunits RI and RII of PKA is mediated via the family AKAP7_RIRII_bdg. at the C-terminus. This family represents a region that contains two 2'5' RNA ligase like domains Pfam:PF02834. Presumably this domain carried out some as yet unknown enzymatic function.. +PF10470 PKA-RI-RII subunit binding domain of A-kinase anchor protein
AKAP7_RIRII_bdg is the C-terminal domain of the cyclic AMP-dependent protein kinase A, PKA, anchor protein AKAP7. This protein anchors PKA, for its role in regulating PKA-mediated gene transcription in both somatic cells and oocytes, by binding to its regulatory subunits, RI and RII, hence being known as a dual-specific AKAP . The 25 crucial amino acids of RII-binding domains in general form structurally conserved amphipathic helices with unrelated sequences; hydrophobic amino acid residues form the backbone of the interaction and hydrogen bond- and salt-bridge-forming amino acid residues increase the affinity of the interaction . The N-terminus, of family AKAP7_NLS, carries the nuclear localisation signal.. +PF10471 Anaphase-promoting complex APC subunit 1
The anaphase-promoting complex (APC) or cyclosome is a cell cycle-regulated ubiquitin-protein ligase that regulates important events in mitosis such as the initiation of anaphase and exit from telophase. The APC, in conjunction with other enzymes, assembles multi-ubiquitin chains on a variety of regulatory proteins thereby targeting them for proteolysis by the 26S proteasome. CDC26 is one of the nine or so subunits identified within APC but its exact function is not known . The APC/C becomes active at the metaphase/anaphase transition and remains active during G1 phase. One mechanism linked to activation of the APC/C is phosphorylation. The yeast APC/C is composed of at least 13 subunits, but the function of many of the subunits is unknown. Hcn1 is the smallest subunit of the S. pombe APC/C, and is found to be essential for cell viability, APC/C integrity, and proper APC/C regulation. In addition, Hcn1 phosphorylation indicates a specific role for the phosphorylation of this subunit late in the cell cycle .. +PF10472 eIF2-alpha phosphatase phosphorylation constitutive repressor
This is the conserved N-terminal domain of CReP, constitutive repressor of eIF2-alpha phosphorylation/protein phosphatase 1, catalytic subunit. It functions in the dephosphorylation of eIF2-alpha under basal conditions in the absence of stress. In response to translation inhibition, there is reduced synthesis of the labile CReP that contributes to elevated levels of eIF2-alpha phosphorylation . The C-terminus, family PP1c, is shared with the apoptosis-associated protein Gadd34 and herpes simplex virus .. +PF10473 Cenp-F_leu_zip;
Leucine-rich repeats of kinetochore protein Cenp-F/LEK1. Cenp-F, a centromeric kinetochore, microtubule-binding protein consisting of two 1,600-amino acid-long coils, is essential for the full functioning of the mitotic checkpoint pathway [1,2]. There are several leucine-rich repeats along the sequence of LEK1 that are considered to be zippers, though they do not appear to be binding DNA directly in this instance .. +PF10474 Protein of unknown function C-terminus (DUF2451)
This protein is found in eukaryotes but its function is not known. The C-terminal part of some members is DUF2450.. +PF10475 Protein of unknown function N-terminal domain (DUF2450)
This protein is found in eukaryotes but its function is not known. The C-terminal part of some members is DUF2451.. +PF10476 Protein of unknown function C-terminus (DUF2448)
The family DUF2349 is the N-terminal part of this family. This protein is found in eukaryotes but its function is not known.. +PF10477 Nucleocytoplasmic shuttling protein for mRNA cap-binding EIF4E
EIF4E-T is the transporter protein for shuttling the mRNA cap-binding protein EIF4E protein, targeting it for nuclear import. EIF4E-T contains several key binding domains including two functional leucine-rich NESs (nuclear export signals) between residues 438-447 and 613-638 in the human protein. The other two binding domains are an EIF4E-binding site, between residues 27-42 in Q9EST3, and a bipartite NLS (nuclear localisation signals) between 194-211, and these lie in family EIF4E-T_N. EIF4E is the eukaryotic translation initiation factor 4E that is the rate-limiting factor for cap-dependent translation initiation .. +PF10479 Fragile site-associated protein C-terminus
This is the conserved C-terminal half of the protein KIAA1109 which is the fragile site-associated protein FSA . Genome-wide-association studies showed this protein to linked to the susceptibility to coeliac disease . The protein may also be associated with polycystic kidney disease .. +PF10480 Beta-1 integrin binding protein
ICAP-1 is a serine/threonine-rich protein that binds to the cytoplasmic domains of beta-1 integrins in a highly specific manner, binding to a NPXY sequence motif on the beta-1 integrin. The cytoplasmic domains of integrins are essential for cell adhesion, and the fact that phosphorylation of ICAP-1 by interaction with the cell-matrix implies an important role of ICAP-1 during integrin-dependent cell adhesion . Overexpression of ICAP-1 strongly reduces the integrin-mediated cell spreading on extracellular matrix and inhibits both Cdc42 and Rac1. In addition, ICAP-1 induces release of Cdc42 from cellular membranes and prevents the dissociation of GDP from this GTPase . An additional function of ICAP-1 is to promote differentiation of osteoprogenitors by supporting their condensation through modulating the integrin high affinity state ,. +PF10481 Cenp-F_N;
Cenp-F N-terminal domain. Mitosin or centromere-associated protein-F (Cenp-F) is found bound across the centromere as one of the proteins of the outer layer of the kinetochore . Most of the kinetochore/centromere functions appear to depend upon binding of the C-terminal par to f the molecule, whereas the N-terminal part, here, may be a cytoplasmic player in controlling the function of microtubules and dynein .. +PF10482 Tumour-suppressor protein CtIP N-terminal domain
CtIP is predominantly a nuclear protein that complexes with both BRCA1 and the BRCA1-associated RING domain protein (BARD1). At the protein level, CtIP expression varies with cell cycle progression in a pattern identical to that of BRCA1. Thus, the steady-state levels of CtIP polypeptides, which remain low in resting cells and G1 cycling cells, increase dramatically as Dividing cells traverse the G1/S boundary. CtIP can potentially modulate the functions ascribed to BRCA1 in transcriptional regulation, DNA repair, and/or cell cycle checkpoint control . This N-terminal domain carries a coiled-coil region and is essential for homodimerisation of the protein . The C-terminal domain is family Pfam:PF08573.. +PF10483 Hap2_elong;
Elongator subunit Iki1. Coggill P, Eberhardt R. Pfam-B_26773 (release 22.0). This family is a component of the RNA polymerase II elongator complex [1,2]. This complex is involved in elongation of RNA polymerase II transcription and in modification of wobble nucleosides in tRNA [3,4].. +PF10484 Mitochondrial ribosomal protein S23
MRP-S23 is one of the proteins that makes up the 55S ribosome in eukaryotes from nematodes to humans. It does not appear to carry any common motifs, either RNA binding or ribosomal protein motifs . All of the mammalian MRPs are encoded in nuclear genes that are evolving more rapidly than those encoding cytoplasmic ribosomal proteins. The MRPs are imported into mitochondria where they assemble coordinately with mitochondrially transcribed rRNAs into ribosomes that are responsible for translating the 13 mRNAs for essential proteins of the oxidative phosphorylation system . MRP-S23 is significantly up-regulated in uterine cancer cells .. +PF10486 DUF2447;
Phosphoinositide 3-kinase gamma adapter protein p101 subunit. Class I PI3Ks are dual-specific lipid and protein kinases involved in numerous intracellular signaling pathways. Class IB PI3K, p110gamma, is mainly activated by seven-transmembrane G-protein-coupled receptors (GPCRs), through its regulatory subunit p101 and G-protein beta-gamma subunits .. +PF10487 Nucleoporin subcomplex protein binding to Pom34
This is one of the many peptides that make up the nucleoporin complex (NPC), and is found across eukaryotes . The Nup188 subcomplex (Nic96p-Nup188p-Nup192p-Pom152p) is one of at least six that make up the NPC, and as such is symmetrically localised on both faces of the NPC at the nuclear end, being integrally bound to the C-terminus of Pom34p .. +PF10488 Phosphatase-1 catalytic subunit binding region
This conserved C-terminus appears to be a protein phosphatase-1 catalytic subunit (PP1C) binding region, which may in some circumstances also be retroviral in origin since it is found in both herpes simplex virus and in mouse and man. This domain is found in Gadd-34 apoptosis-associated proteins as well as the constitutive repressor of eIF2-alpha phosphorylation/protein phosphatase 1, regulatory (inhibitor) subunit 15b, otherwise known as CReP. Diverse stressful conditions are associated with phosphorylation of the {alpha} subunit of eukaryotic translation initiation factor 2 (eIF2{alpha}) on serine 51. This signaling event, which is conserved from yeast to mammals, negatively regulates the guanine nucleotide exchange factor, eIF2-B and inhibits the recycling of eIF2 to its active GTP bound form. In mammalian cells eIF2{alpha} phosphorylation emerges as an important event in stress signaling that impacts on gene expression at both the translational and transcriptional levels .. +PF10489 Ret finger protein-like 3 antisense
This short transcript is purported to be the antisense protein of exon 2 of RFPL3 gene, however this was not confirmed. Since the RFPL3 (ret-finger protein-like 3) gene is expressed in testis the suggestion is that this may have a role in the antisense regulation of the RFPL genes. RFPL transcripts encode proteins with tripartite structure of RING finger, coiled-coil, and B30-2 domains, which are characteristic of the RING-B30 family. Each of these domains is thought to mediate protein-protein interactions by promoting homo- or heterodimerisation .. +PF10490 Rb-bdg_C_Cenp-F;
Rb-binding domain of kinetochore protein Cenp-F/LEK1. Cenp-F, a centromeric kinetochore, microtubule-binding protein consisting of two 1,600-amino acid-long coils, is essential for the full functioning of the mitotic checkpoint pathway [1,2]. This domain is at the very C-terminus of the C-terminal coiled-coil, and is one of the key Rb-binding domains .. +PF10491 NLS-binding and DNA-binding and dimerisation domains of Nrf1
In Drosophila, the erect wing (ewg) protein is required for proper development of the central nervous system and the indirect flight muscles. The fly ewg gene encodes a novel DNA-binding domain that is also found in four genes previously identified in sea urchin, chicken, zebrafish, and human . Nuclear respiratory factor-1 is a transcriptional activator that has been implicated in the nuclear control of respiratory chain expression in vertebrates. The first 26 amino acids of nuclear respiratory factor-1 are required for the binding of dynein light chain. The interaction with dynein light chain is observed for both ewg and Nrf-1, transcription factors that are structurally and functionally similar between humans and Drosophila . The highest level of expression of both ewg and Nrf-1 was found in the central nervous system, somites, first branchial arch, optic vesicle, and otic vesicle. In the mouse Nrf-1 protein, Swiss-Prot:Q8C4C0, there is also an NLS domain at 88-116, and a DNA binding and dimerisation domain at 127-282. Ewg is a site-specific transcriptional activator, and evolutionarily conserved regions of ewg contribute both positively and negatively to transcriptional activity .. +PF10492 Nrf1 activator activation site binding domain
In Drosophila, the erect wing (ewg) protein is required for proper development of the central nervous system and the indirect flight muscles. The fly ewg gene encodes a novel DNA-binding domain that is also found in four genes previously identified in sea urchin, chicken, zebrafish, and human . Nuclear respiratory factor-1 is a transcriptional activator that has been implicated in the nuclear control of respiratory chain expression in vertebrates. The first 26 amino acids of nuclear respiratory factor-1 are required for the binding of dynein light chain. The interaction with dynein light chain is observed for both ewg and Nrf-1, transcription factors that are structurally and functionally similar between humans and Drosophila . The highest level of expression of both ewg and Nrf-1 was found in the central nervous system, somites, first branchial arch, optic vesicle, and otic vesicle. In the mouse Nrf-1 protein, Swiss:Q8C4C0, there is an activation domain at 303-469, the most conserved part of which is this domain 446-469. Ewg is a site-specific transcriptional activator, and evolutionarily conserved regions of ewg contribute both positively and negatively to transcriptional activity . The family Nrf1_DNA-bind is associated with this domain towards the N-terminal, as is the N terminal of the activation domain.. +PF10493 Rough deal protein C-terminal region
Buljan M,, Coggill P. Rod, the Rough deal protein, displays a dynamic intracellular staining pattern, localising first to kinetochores in pro-metaphase, but moving to kinetochore microtubules at metaphase. Early in anaphase the protein is once again restricted to the kinetochores, where it persists until the end of telophase. This behaviour is in all respects similar to that described for ZW10 , and indeed the two proteins function together, localisation of each depending upon the other . These two proteins are found at the kinetochore in complex with a third, Zwilch, in both flies and humans. The C-terminus is the most conserved part of the protein. During pro-metaphase, the ZW10-Rod complex, dynein/dynactin, and Mad2 all accumulate on unattached kinetochores; microtubule capture leads to Mad2 depletion as it is carried off by dynein/dynactin; ZW10-Rod complex accumulation continues, replenishing kinetochore dynein. The continuing recruitment of the ZW10-Rod complex during metaphase may serve to maintain adequate dynein/dynactin complex on kinetochores for assisting chromatid movement during anaphase . The ZW10-Rod complex acts as a bridge whose association with Zwint-1 links Mad1 and Mad2, components that are directly responsible for generating the diffusible 'wait anaphase' signal, to a structural, inner kinetochore complex containing Mis12 and KNL-1AF15q14, the last of which has been proved to be essential for kinetochore assembly in C. elegans. Removal of ZW10 or Rod inactivates the mitotic checkpoint .. +PF10494 Serine-threonine protein kinase 19
This serine-threonine protein kinase number 19 is expressed from the MHC and predominantly in the nucleus. Protein kinases are involved in signal transduction pathways and play fundamental roles in the regulation of cell functions. This is a novel Ser/Thr protein kinase, that has Mn2+-dependent protein kinase activity that phosphorylates alpha -casein at Ser/Thr residues and histone at Ser residues. It can be covalently modified by the reactive ATP analogue 5'-p-fluorosulfonylbenzoyladenosine in the absence of ATP, and this modification is prevented in the presence of 1 mM ATP, indicating that the kinase domain of is capable of binding ATP .. +PF10495 Pericentrin-AKAP-450 domain of centrosomal targeting protein
This domain is a coiled-coil region close to the C-terminus of centrosomal proteins that is directly responsible for recruiting AKAP-450 and pericentrin to the centrosome. Hence the suggested name for this region is a PACT domain (pericentrin-AKAP-450 centrosomal targeting). This domain is also present at the C-terminus of coiled-coil proteins from Drosophila and S. pombe, and that from the Drosophila protein is sufficient for targeting to the centrosome in mammalian cells. The function of these proteins is unknown but they seem good candidates for having a centrosomal or spindle pole body location. The final 22 residues of this domain in AKAP-450 appear specifically to be a calmodulin-binding domain indicating that this member at least is likely to contribute to centrosome assembly .. +PF10496 SNARE-complex protein Syntaxin-18 N-terminus
This is the conserved N-terminal of Syntaxin-18. Syntaxin-18 is found in the SNARE complex of the endoplasmic reticulum and functions in the trafficking between the ER intermediate compartment and the cis-Golgi vesicle. In particular, the N-terminal region is important for the formation of ER aggregates . More specifically, syntaxin-18 is involved in endoplasmic reticulum-mediated phagocytosis, presumably by regulating the specific and direct fusion of the ER with the plasma or phagosomal membranes .. +PF10497 Zinc-finger domain of monoamine-oxidase A repressor R1
R1 is a transcription factor repressor that inhibits monoamine oxidase A gene expression. This domain is a four-CXXC zinc finger putative DNA-binding domain found at the C-terminal end of R1. The domain carries 12 cysteines of which four pairs are of the CXXC type .. +PF10498 Intra-flagellar transport protein 57
Eukaryotic cilia and flagella are specialised organelles found at the periphery of cells of diverse organisms. Intra-flagellar transport (IFT) is required for the assembly and maintenance of eukaryotic cilia and flagella, and consists of the bidirectional movement of large protein particles between the base and the distal tip of the organelle. IFT particles contain multiple copies of two distinct protein complexes, A and B, which contain at least 6 and 11 protein subunits. IFT57 is part of complex B but is not, however, required for the core subunits to stay associated . This protein is known as Huntington-interacting protein-1 in humans.. +PF10500 Nuclear RNA-splicing-associated protein
SR-25, otherwise known as ADP-ribosylation factor-like factor 6-interacting protein 4, is expressed in virtually all tissues. At the N-terminus there is a repeat of serine-arginine (SR repeat), and towards the middle of the protein there are clusters of both serines and of basic amino acids. The presence of many nuclear localisation signals strongly implies that this is a nuclear protein that may contribute to RNA splicing . SR-25 is also implicated, along with heat-shock-protein-27, as a mediator in the Rac1 (GTPase ras-related C3 botulinum toxin substrate 1) signalling pathway .. +PF10501 Ribosomal_S39;
Ribosomal subunit 39S. The 39S ribosomal protein appears to be a subunit of one of the larger mitochondrial 66S or 70S units . Under conditions of ethanol-stress in rats the larger subunit is largely dissociated into its smaller components . In E. coli, in the absence of the enzyme pseudouridine synthase (RluD) synthase, there is an accumulation of 50S and 30S subunits and the appearance of abnormal particles (62S and 39S), with concomitant loss of 70S ribosomes .. +PF10502 Signal peptidase, peptidase S26
Mistry J. Rawlings N. This is a family of membrane signal serine endopeptidases which function in the processing of newly-synthesised secreted proteins. Peptidase S26 removes the hydrophobic, N-terminal, signal peptides as proteins are translocated across membranes. The active site residues take the form of a catalytic dyad that is Ser, Lys in subfamily S26A; the Ser is the nucleophile in catalysis, and the Lys is the general base.. +PF10503 Esterase PHB depolymerase
Mistry J, Fushinobu S. This family of proteins include acetyl xylan esterases (AXE), feruloyl esterases (FAE), and poly(3-hydroxybutyrate) (PHB) depolymerases.. +PF10504 Protein of unknown function (DUF2452)
This protein is found in eukaryotes but its function is unknown.. +PF10505 NMDA receptor-regulated gene protein 2 C-terminus
Buljan M, Coggill P, Berhardt R. The transition of neuronal cells from pre-cursor to mature state is regulated by the N-methyl-d-aspartate (NMDA) receptor, a glutamate-gated ion channel that is permeable to Ca2+. NMDA receptors probably mediate this activity by permitting expression of NARG2. NARG2 is transiently expressed, being a regulatory protein that is present in the nucleus of dividing cells and then down-regulated as progenitors exit the cell cycle and begin to differentiate. NARG2 contains repeats of (S/T)PXX, (11 in mouse , six in human), a putative DNA-binding motif that is found in many gene-regulatory proteins including Kruppel, Hunchback and Antennapedi . This C-terminal domain belongs to the PD-(D/E)XK nuclease superfamily .. +PF10506 PDZ domain of MCC-2 bdg protein for Usher syndrome
The protein has a high homology to the tumour suppressor MCC (mutated in colon cancer; or MCC1 hereafter) and was named MCC2. MCC2 protein binds the first PDZ domain of AIE-75 with its C-terminal amino acids -DTFL. A possible role of MCC2 as a tumor suppressor has been put forward. The carboxyl terminus of the predicted protein was DTFL which matched the consensus motif X-S/T-X-phi (phi: hydrophobic amino acid residue) for binding to the PDZ domain of AIE-75.. +PF10507 Protein of unknown function (DUF2453)
Some members of this family are purported to contain GAF domains but this could not be confirmed. The function is not known. It is likely to be a transmembrane protein.. +PF10508 Proteasom_PSMB5;
Proteasome non-ATPase 26S subunit. The 26S proteasome, a eukaryotic ATP-dependent, dumb-bell shaped, protease complex with a molecular mass of approx 20kDa consists of a central 20S proteasome,functioning as a catalytic machine, and two large V-shaped terminal modules, having possible regulatory roles,composed of multiple subunits of 25- 110 kDa attached to the central portion in opposite orientations. It is responsible for degradation of abnormal intracellular proteins, including oxidatively damaged proteins, and may play a role as a component of a cellular anti-oxidative system. Expression of catalytic core subunits including PSMB5 and peptidase activities of the proteasome were elevated following incubation with 3-methylcholanthrene. The 20S proteasome comprises a cylindrical stack of four rings, two outer rings formed by seven alpha-subunits (alpha1-alpha7) and two inner rings of seven beta-subunits (beta1-beta7). Two outer rings of alpha subunits maintain structure, while the central beta rings contain the proteolytic active core subunits beta1 (PSMB6), beta2 (PSMB7), and beta5 (PSMB5). Expression of PSMB5 can be altered by chemical reactants, such as 3-methylcholanthrene .. +PF10509 Galactokinase galactose-binding signature
PROSITE_PS00106, Pfam-B_2277 (release 22.0). This is the highly conserved galactokinase signature sequence which appears to be present in all galactokinases irrespective of how many other ATP binding sites, etc that they carry . The function of this domain appears to be to bind galactose , and the domain is normally at the N-terminus of the enzymes, EC:2.7.1.6 . This domain is associated with the families GHMP_kinases_C, Pfam:PF08544 and GHMP_kinases_N, Pfam:PF00288.. +PF10510 Phosphatidylinositol-glycan biosynthesis class S protein
PIG-S is one of several key, core, components of the glycosylphosphatidylinositol (GPI) trans-amidase complex that mediates GPI anchoring in the endoplasmic reticulum. Anchoring occurs when a protein's C-terminal GPI attachment signal peptide is replaced with a pre-assembled GPI . Mammalian GPITransamidase consists of at least five components: Gaa1, Gpi8, PIG-S, PIG-T, and PIG-U, all five of which are required for function. It is possible that Gaa1, Gpi8, PIG-S, and PIG-T form a tightly associated core that is only weakly associated with PIG-U. The exact function of PIG-S is unclear .. +PF10511 Trappin protein transglutaminase binding domain
Trappin-2, itself a protease inhibitor, has this unique N-terminal domain that enables it to become cross-linked to extracellular matrix proteins by transglutaminase . This domain contains several repeated motifs with the the consensus sequence Gly-Gln-Asp-Pro-Val-Lys, and these together can anchor the whole molecule to extracellular matrix proteins, such as laminin, fibronectin, beta-crystallin, collagen IV, fibrinogen, and elastin, by transglutaminase-catalysed cross-links. The whole domain is rich in glutamine and lysine, thus allowing and transglutaminase(s) to catalyse the formation of an intermolecular epsilon-(gamma-glutamyl)lysine isopeptide bond . Cementoin is associated with the WAP family, Pfam:PF00095, at the C-terminus.. +PF10512 Cell division cycle-associated protein 8
The chromosomal passenger complex of Aurora B kinase, INCENP, and Survivin has essential regulatory roles at centromeres and the central spindle in mitosis. Borealin is also a member of the complex. Approximately half of Aurora B in mitotic cells is complexed with INCENP, Borealin, and Survivin. Depletion of Borealin by RNA interference delays mitotic progression and results in kinetochore-spindle mis-attachments and an increase in bipolar spindles associated with ectopic asters .. +PF10513 Enhancer of polycomb-like
Pfam-B_3033 (release 22.0). This is a family of EPL1 (Enhancer of polycomb-like) proteins. The EPL1 protein is a member of a histone acetyltransferase complex which is involved in transcriptional activation of selected genes .. +PF10514 Pro-apoptotic Bcl-2 protein, BAD
BAD is a Bcl-2 homology domain 3 (BH3)-only pro-apoptotic member of the Bcl-2 protein family that is regulated by phosphorylation in response to survival factors . Binding of BAD to mitochondria is thought to be exclusively mediated by its BH3 domain. Membrane localisation of BAD mediates membrane translocation of Bcl-XL. The C-terminal part of BAD is sufficient for membrane binding. There are two segments with differing lipid-binding preferences, LBD1 and LBD2, that are responsible for this binding: (i) LBD1 located in the proximity of the BH3 domain (amino acids 122-131) and (ii) LBD2, the putative C-terminal alpha-helix-5 . Phosphorylation-regulated 14-3-3 protein binding may expose the cholesterol-preferring LBD1 and bury the LBD2, thereby mediating translocation of BAD to raft-like micro-domains .. +PF10515 beta-amyloid precursor protein C-terminus
PROSITE_PS00320, Pfam-B_2082 (release 22.0). This is the amyloid, C-terminal, protein of the beta-Amyloid precursor protein (APP) which is a conserved and ubiquitous transmembrane glycoprotein strongly implicated in the pathogenesis of Alzheimer's disease but whose normal biological function is unknown. The C-terminal 100 residues are released and aggregate into amyloid deposits which are strongly implicated in the pathology of Alzheimer's disease plaque-formation. The domain is associated with family A4_EXTRA, Pfam:PF02177, further towards the N-terminus.. +PF10516 SHNi-TPR
Pfam-B_14727 (release 22.0). SHNi-TPR family members contain a reiterated sequence motif that is an interrupted form of TPR repeat .. +PF10517 Electron transfer DM13
The DM13 domain is a component of a novel electron-transfer system potentially involved in oxidative modification of animal cell-surface proteins . It contains a nearly absolutely conserved cysteine, which could be involved in a redox reaction, either as a naked thiol group or through binding a prosthetic group like heme .. +PF10518 TAT (twin-arginine translocation) pathway signal sequence
+PF10520 Kua-ubiquitin conjugating enzyme hybrid localisation domain
+PF10521 Protein of unknown function (DUF2454)
Pfam-B_82729 (release 22.0). A Schizosaccharomyces pombe member of this family is known to interact with Tel2. Tel2 is a component of the TOR complexes .. +PF10522 RII binding domain
Vijayaraghavan et al. Mol Endocrinology 13(5):705-717 (1999). This domain is found is a wide variety of AKAPs (A kinase anchoring proteins) .. +PF10523 BEN domain
The BEN domain is found in diverse animal proteins such as BANP/SMAR1, NAC1 and the Drosophila mod(mdg4) isoform C, in the chordopoxvirus virosomal protein E5R and in several proteins of polydnaviruses. Computational analysis suggests that the BEN domain mediates protein-DNA and protein-protein interactions during chromatin organisation and transcription .. +PF10524 Nuclear factor I protein pre-N-terminus
The Nuclear factor I (NFI) family of site-specific DNA-binding proteins (also known as CTF or CAAT box transcription factor) functions both in viral DNA replication and in the regulation of gene expression in higher organisms. The N-terminal 200 residues contains the DNA-binding and dimerisation domain, but also has an 8-47 residue highly conserved region 5' of this, whose function is not known. Deletion of the N-terminal 200 amino acids removes the DNA-binding activity, dimerisation-ability and the stimulation of adenovirus DNA replication .. +PF10525 Engrailed homeobox C-terminal signature domain
PROSITE_PS00033, Pfam-B_11539 (release 22.0). Engrailed homeobox proteins are characterised by the presence of a conserved region of some 20 amino-acid residues located at the C-terminal of the 'homeobox' domain. This domain of approximately 20 residues forms a kind of a signature pattern for this subfamily of proteins .. +PF10528 GLEYA domain
This presumed domain is found in fungal adhesins and is related to the PA14 domain.. +PF10529 Histidine-rich Calcium-binding repeat region
This is a histidine-rich calcium binding repeat which appears in proteins called histidine-rich-calcium binding proteins (HRC). HRC is a high capacity, low affinity Ca2+-binding protein, residing in the lumen of the sarcoplasmic reticulum. HRC binds directly to triadin. This binding interaction occurs between the histidine-rich region of HRC and multiple clusters of charged amino acids, named as the KEKE motifs, in the lumenal domain of triadin. The region in which this repeat is found in many copies is long and variable but is the acidic region of the protein. There is also a cysteine-rich region further towards the C-terminus . HRC may regulate sarcoplasmic reticular calcium transport and play a critical role in maintaining calcium homeostasis and function in the heart. HRC as a candidate regulator of sarcoplasmic reticular calcium uptake .. +PF10530 Toxin with inhibitor cystine knot ICK or Knottin scaffold
Spider toxins of the CSTX family are ion channel toxins containing an inhibitor cystine knot (ICK) structural motif or Knottin scaffold. The four disulfide bonds present in the CSTX spider toxin family are arranged in the following pattern: 1-4, 2-5, 3-8 and 6-7. CSTX-1 is the most important component of C. salei venom in terms of relative abundance and toxicity and therefore is likely to contribute significantly to the overall toxicity of the whole venom. CSTX-1 blocked rat neuronal L-type, but no other types of HVA Cav channels . Interestingly, the omega-toxins from Phoneutria nigriventer venom (another South American species also belonging to the Ctenidae family) are included as they carry the same disulfide bond arrangement. suggested that CSTX-1 may interact with Cav channels. Calcium ion voltage channel heteromultimer containing an L-type pore-forming alpha1-subunit is the most probable candidate for the molecular target of CSTX-1 these toxins .. +PF10531 SLBB domain
+PF10532 Plant specific N-all beta domain
This domain was identified by Babu and colleagues . It is found associated with the WRKY domain Pfam:PF03106.. +PF10533 Plant zinc cluster domain
This zinc binding domain was identified by Babu and colleagues and found associated with the WRKY domain Pfam:PF03106 .. +PF10534 Connector enhancer of kinase suppressor of ras
The CRIC - Connector enhancer of kinase suppressor of ras - domain functions as a scaffold in several signal cascades and acts on proliferation, differentiation and apoptosis.. +PF10536 Plant mobile domain
This domain was identified by Babu and colleagues in a variety of transposases .. +PF10537 ATP-utilising chromatin assembly and remodelling N-terminal
ACF (for ATP-utilising chromatin assembly and remodelling factor) is a chromatin-remodelling complex that catalyses the ATP-dependent assembly of periodic nucleosome arrays. The WAC (WSTF/Acf1/cbp146) domain is an approximately 110-residue module present at the N-termini of Acf1-related proteins in a variety of organisms. The DNA-binding region of Acf1 includes the WAC domain, which is necessary for the efficient binding of ACF complex to DNA.. +PF10538 Immunoreceptor tyrosine-based activation motif
Signal transduction by T and B cell antigen receptors and certain receptors for Ig Fc regions involves a conserved sequence motif, termed an immunoreceptor tyrosine-based activation motif (ITAM). It is also found in the cytoplasmic domain of apoptosis receptor.. +PF10539 Development and cell death domain
The DCD domain is found in plant proteins involved in development and cell death. The DCD domain is an approximately 130 amino acid long stretch that contains several mostly invariable motifs. These include a FGLP and a LFL motif at the N-terminus and a PAQV and a PLxE motif towards the C-terminus of the domain. The DCD domain is present in proteins with different architectures. Some of these proteins contain additional recognisable motifs, like the KELCH repeats or the ParB domain.. +PF10540 Munc13 (mammalian uncoordinated) homology domain
Munc13 proteins constitute a family of three highly homologous molecules (Munc13-1, Munc13-2 and Munc13-3) with homology to Caenorhabditis elegans unc-13p. Munc13 proteins contain a phorbol ester-binding C1 domain and two C2 domains, which are Ca2+/phospholipid binding domains. Sequence analyses have uncovered two regions called Munc13 homology domains 1 (MHD1) and 2 (MHD2) that are arranged between two flanking C2 domains. MHD1 and MHD2 domains are present in a wide variety of proteins from Arabidopsis thaliana, C. elegans, Drosophila melanogaster, mouse, rat and human, some of which may function in a Munc13-like manner to regulate membrane trafficking. The MHD1 and MHD2 domains are predicted to be alpha-helical.. +PF10541 Nuclear envelope localisation domain
The KASH (for Klarsicht/ANC-1/Syne-1 homology) or KLS domain is a highly hydrophobic nuclear envelope localisation domain of approximately 60 amino acids comprising a 20-amino-acid transmembrane region and a 30-35-residue C-terminal region that lies between the inner and the outer nuclear membranes . During meiotic prophase, telomeres cluster to form a bouquet arrangement of chromosomes. SUN and KASH domain proteins form complexes that span both membranes of the nuclear envelope. The KASH domain links the dynein motor complex of the microtubules, through the outer nuclear membrane to the Sad1 domain in the inner nuclear membrane which then interacts with the bouquet proteins Bqt1 and Bqt2 that are complexed with Bqt4, Rap1 and Taz1 and attached to the telomere . SUN domain-containing proteins are essential for recruiting KASH domain proteins at the outer nuclear membrane, and KASH domains provide a generic NE tethering device for functionally distinct proteins whose cytoplasmic domains mediate nuclear positioning, maintain physical connections with other cellular organelles, and possibly even influence chromosome dynamics .. +PF10542 Vitelline membrane cysteine-rich region
In Drosophila melanogaster the vitelline membrane (VM) is the first layer of the eggshell produced by the follicular epithelium. It is composed of at least four different proteins. VM proteins are similarly organised with a central highly conserved 38-amino acid domain which is flanked by unrelated regions. The domain contains three highly conserved cysteines.. +PF10543 ORF6N domain
This domain was identified by Iyer and colleagues .. +PF10544 T5orf172 domain
This domain was identified by Iyer and colleagues .. +PF10545 Alcohol dehydrogenase transcription factor Myb/SANT-like
The myb/SANT-like domain in Adf-1 (MADF) is an approximately 80-amino-acid module that directs sequence specific DNA binding to a site consisting of multiple tri-nucleotide repeats. The MADF domain is found in one or more copies in eukaryotic and viral proteins and is often associated with the BESS domain. It is likely that the MADF domain is more closely related to the myb/SANT domain than it is to other HTH domains.. +PF10546 P63C domain
This domain was identified by Iyer and colleagues .. +PF10547 P22_AR N-terminal domain
This domain was identified by Iyer and colleagues .. +PF10548 P22AR C-terminal domain
This domain was identified by Iyer and colleagues . It is found associated with Pfam:PF10547.. +PF10549 ORF11CD3 domain
This domain was identified by Iyer and colleagues .. +PF10550 Conantokin-G mollusc-toxin
The conantokins are a family of neuroactive peptides found in the venoms of fish-hunting cone snails. They possess a high content of gamma-carboxyglutamic acid (Gla) (4-5 residues), a non-standard amino-acid made by the post-translational modification of glutamate (Glu) residue. Conantokins are the only natural biochemically characterised peptides known to be N-methyl-D-aspartate (NMDA) receptor antagonists.. +PF10551 MULE transposase domain
This domain was identified by Babu and colleagues .. +PF10552 ORF6C domain
This domain was identified by Iyer and colleagues .. +PF10553 MSV199 domain
This domain was identified by Iyer and colleagues .. +PF10554 Ash protein family
This family was identified by Iyer and colleagues . It includes the Ash protein from bacteriophage P4.. +PF10555 Phospho-N-acetylmuramoyl-pentapeptide-transferase signature 1
Phospho-N-acetylmuramoyl-pentapeptide-transferase (EC 2.7.8.13) (mraY) is a bacterial enzyme responsible for the formation of the first lipid intermediate of the cell wall peptidoglycan synthesis. It catalyses the formation of undecaprenyl-pyrophosphoryl-N-acetylmuramoyl-pentapeptide from UDP-MurNAc-pentapeptide and undecaprenyl-phosphate. It is an integral membrane protein with probably ten transmembrane domains. This domain is located at the end of the first cytoplasmic loop and the beginning of the second transmembrane domain.. +PF10557 Cullin protein neddylation domain
This is the neddylation site of cullin proteins which are a family of structurally related proteins containing an evolutionarily conserved cullin domain. With the exception of APC2, each member of the cullin family is modified by Nedd8 and several cullins function in Ubiquitin-dependent proteolysis, a process in which the 26S proteasome recognises and subsequently degrades a target protein tagged with K48-linked poly-ubiquitin chains. Cullins are molecular scaffolds responsible for assembling the ROC1/Rbx1 RING-based E3 ubiquitin ligases, of which several play a direct role in tumorigenesis. Nedd8/Rub1 is a small ubiquitin-like protein, which was originally found to be conjugated to Cdc53, a cullin component of the SCF (Skp1-Cdc53/CUL1-F-box protein) E3 Ub ligase complex in Saccharomyces cerevisiae, and Nedd8 modification has now emerged as a regulatory pathway of fundamental importance for cell cycle control and for embryogenesis in metazoans. The only identified Nedd8 substrates are cullins. Neddylation results in covalent conjugation of a Nedd8 moiety onto a conserved cullin lysine residue .. +PF10558 Mitochondrial 18 KDa protein (MTP18)
This family of proteins are mitochondrial 18KDa proteins that are often misannotated as carbonic anhydrases. It was shown that knockdown of MTP18 protein results in a cytochrome c release from mitochondria and consequently leads to apoptosis . Overexpression studies suggest that MTP18 is required for mitochondrial fission .. +PF10559 Plug_Sec61p;
Plug domain of Sec61p. The Sec61/SecY translocon mediates translocation of proteins across the membrane and integration of membrane proteins into the lipid bilayer. The structure of the translocon revealed a plug domain blocking the pore on the lumenal side.The plug is unlikely to be important for sealing the translocation pore in yeast but it plays a role in stabilising Sec61p during translocon formation. The domain runs from residues 52-74 .. +PF10561 Uncharacterised protein family UPF0565
This family of proteins has no known function.. +PF10562 Calmodulin-binding domain C0 of NMDA receptor NR1 subunit
Pfam-B_7118 (release 22.0). This is a very short highly conserved domain that is C-terminal to the cytosolic transmembrane region IV of the NMDA-receptor 1. It has been shown to bind Calmodulin-Calcium with high affinity. The ionotropic N-methyl-D-aspartate receptor (NMDAR) is a major source of calcium flux into neurons in the brain and plays a critical role in learning, memory, neural development, and synaptic plasticity. Calmodulin (CaM) regulates NMDARs by binding tightly to the C0 and C1 regions of their NR1 subunit. The conserved tryptophan is considered to be the anchor residue .. +PF10563 Cadmium carbonic anhydrase repeat
This domain is the cadmium carbonic anhydrase repeat unit of the beta-carbonic anhydrase of a marine diatom , that uses both zinc and cadmium for catalysis of the reversible hydration of carbon dioxide for use in inorganic carbon acquisition for photosynthesis (thus being a cambialistic enzyme). Compared with alpha- and gamma-carbonic anhydrases that use three histidines to coordinate the zinc-atom, this beta-carbonic anhydrase has two cysteines and one histidine, and rapidly binds cadmium .. +PF10564 Sialic-acid binding micronemal adhesive repeat
This domain is a novel carbohydrate-binding domain found on micronemal proteins. Micronemal proteins (MICs) are released onto the parasite surface just before invasion of host cells and play important roles in host cell recognition, attachment and penetration. Toxoplasma gondii can infect and replicate within all nucleated cells . This domain interacts with sialylated oligosaccharides; the protein in Toxoplasma gondii is a monomer but several MAR domains are carried on the protein. Each MAR domain contains one central sialic acid-binding pocket .. +PF10565 N-methyl D-aspartate receptor 2B3 C-terminus
Pfam-B_53396 (release 22.0). This domain is found at the C-terminus of many NMDA-receptor proteins, many of which also carry the Ligated ion-channel family Pfam:PF00060 further upstream as well as the ANF_receptor family Pfam:PF01094. This region is predicted to be a large extra-cellular domain of the NMDA receptor proteins, being highly hydrophilic, and is thought to be integrally involved in the function of the receptor. The region also carries a number of potential N-glycosylation sites .. +PF10566 Glycoside hydrolase 97
This domain is the catalytic region of the bacterial glycosyl-hydrolase family 97. This central part of the GH97 family protein sequences represents a typical and complete (beta/alpha)8-barrel or catalytic TIM-barrel type domain. The N- and C-terminal parts of the sequences, mainly consisting of beta-strands, form two additional non-catalytic domains . In all known glycosidases with the (beta-alpha)8-barrel fold, the amino acid residues at the active site are located on the C-termini of the beta-strands [2,3].. +PF10567 RNA-recognition motif
Griffiths-Jones S, Coggill P. Pfam-B_57293 (release 21.0). This conserved domain is found in fungal proteins and appears to be involved in RNA-processing. It binds to poly-adenylated RNA, interacts genetically with mRNA 3'-end processing factors, copurifies with the nuclear cap-binding protein Cbp20p, and is found in complexes containing other translation factors, such as EIF4G as in Swiss:P39935 and Swiss:P39936.. +PF10568 Outer mitochondrial membrane transport complex protein
Pfam-B_30563 (release 22.0). The TOM37 protein is one of the outer membrane proteins that make up the TOM complex for guiding cytosolic mitochondrial beta-barrel proteins from the cytosol across the outer mitochondrial membrane into the intramembrane space. In conjunction with TOM70 it guides peptides without an MTS into TOM40, the protein that forms the passage through the outer membrane . It has homology with Metaxin-1, also part of the outer mitochondrial membrane beta-barrel protein transport complex .. +PF10569 Alpha-macro-globulin thiol-ester bond-forming region
This short highly conserved region of proteinase-binding alpha-macro-globulins contains the cysteine and a glutamine of a thiol-ester bond that is cleaved at the moment of proteinase binding, and mediates the covalent binding of the alpha-macro-globulin to the proteinase. The GCGEQ motif is highly conserved.. +PF10570 Myelin-PO_N;
Myelin-PO cytoplasmic C-term p65 binding region. Pfam-B_1437 (release 22.0), PROSITE_PS00568. Myelin protein zero is the major myelin protein in the peripheral central nervous system and is essential for normal myelination. The family is a single-pass transmembrane molecule containing one Ig-like loop in the extracellular domain and this highly basic 69 residue C-terminal cytoplasmic domain which is the region that interacts with protein p65 .. +PF10571 Uncharacterised protein family UPF0547
This domain contains a zinc-ribbon motif.. +PF10572 Uncharacterised protein family UPF0556
This family of proteins has no known function.. +PF10573 Uncharacterised protein family UPF0561
This family of proteins has no known function.. +PF10574 Uncharacterised protein family UPF0552
This family of proteins has no known function.. +PF10576 Iron-sulfur binding domain of endonuclease III
Escherichia coli endonuclease III (EC 4.2.99.18) is a DNA repair enzyme that acts both as a DNA N-glycosylase, removing oxidised pyrimidines from DNA, and as an apurinic/apyrimidinic (AP) endonuclease, introducing a single-strand nick at the site from which the damaged base was removed. Endonuclease III is an iron-sulfur protein that binds a single 4Fe-4S cluster. The 4Fe-4S cluster does not seem to be important for catalytic activity, but is probably involved in the proper positioning of the enzyme along the DNA strand . The 4Fe-4S cluster is bound by four cysteines which are all located in a 17 amino acid region at the C-terminal end of endonuclease III. A similar region is also present in the central section of mutY and in the C-terminus of ORF-10 and of the Micro-coccus UV endonuclease .. +PF10577 Uncharacterised protein family UPF0560
This family of proteins has no known function.. +PF10578 Seminal vesicle protein repeat
+PF10579 Rapsyn N-terminal myristoylation and linker region
Neuromuscular junction formation relies upon the clustering of acetylcholine receptors and other proteins in the muscle membrane. Rapsyn is a peripheral membrane protein that is selectively concentrated at the neuromuscular junction and is essential for the formation of synaptic acetylcholine receptor aggregates. Acetylcholine receptors fail to aggregate beneath nerve terminals in mice where rapsyn has been knocked out. The N-terminal six amino acids of rapsyn are its myristoylation site, and myristoylation is necessary for the targeting of the protein to the membrane .. +PF10580 Gap junction protein N-terminal region
+PF10581 Synapsin N-terminal
This highly conserved domain of synapsin proteins has a serine at position 9 or 10 which is a phosphorylation site. The domain appears to be the part of the molecule that binds to calmodulin .. +PF10582 Gap junction channel protein cysteine-rich domain
Pfam-B_1437 (release 22.0), PROSITE_PS00408. +PF10583 Involucrin of squamous epithelia N-terminus
Pfam-B_7423 (release 22.0), PROSITE_PS00795. This is the N-terminal three beta strands of involucrin, a protein present in keratinocytes of epidermis and other stratified squamous epithelia. Involucrin first appears in the cell cytosol, but ultimately becomes cross-linked to membrane proteins by transglutaminase thus helping in the formation of an insoluble envelope beneath the plasma membrane .\. Apigenin is a plant-derived flavanoid that has significant promise as a skin cancer chemopreventive agent. It has been found that apigenin regulates normal human keratinocyte differentiation by suppressing it and this is associated with reduced cell proliferation without apoptosis . The downstream part of the protein is represented by the family Involucrin, Pfam:PF00904.. +PF10584 Proteasome subunit A N-terminal signature
This domain is conserved in the A subunits of the proteasome complex proteins.. +PF10585 Ubiquitin-activating enzyme active site
Ubiquitin-activating enzyme (E1 enzyme) activates ubiquitin by first adenylating with ATP its C-terminal glycine residue and thereafter linking this residue to the side chain of a cysteine residue in E1, yielding an ubiquitin-E1 thiolester and free AMP. Later the ubiquitin moiety is transferred to a cysteine residue on one of the many forms of ubiquitin-conjugating enzymes (E2) . This domain carries the last of five conserved cysteines that is part of the active site of the enzyme, responsible for ubiquitin thiolester complex formation, the active site being represented by the sequence motif PICTLKNFP .. +PF10587 Eukaryotic elongation factor 1 beta central acidic region
Pfam-B_9497 (release 22.0), PROSITE_PS00824. +PF10588 NADH-ubiquinone oxidoreductase-G iron-sulfur binding region
Pfam-B_202 (release 22.0), PROSITE_PS00642. +PF10589 NADH-ubiquinone oxidoreductase-F iron-sulfur binding region
Pfam-B_339 (release 22.0), PROSITE_PS00645. +PF10590 Pyridoxine 5'-phosphate oxidase C-terminal dimerisation region
Pfam-B_685 (release 22.0), PROSITE_PS01064. Pyridoxine 5'-phosphate oxidase (PNPOx) catalyses the terminal step in the biosynthesis of pyridoxal 5'-phosphate (PLP), a cofactor used by many enzymes involved in amino acid metabolism. The enzyme oxidises either the 4'-hydroxyl group of pyridoxine 5'-phosphate (PNP) or the 4'-primary amine of pyridoxamine 5'-phosphate (PMP) to an aldehyde. PNPOx is a homodimeric enzyme with one flavin mononucleotide (FMN) molecule non-covalently bound to each subunit. This domain represents one of the two dimerisation regions of the protein, located at the edge of the dimer interface, at the C-terminus, being the last three beta strands, S6, S7, and S8 along with the last three residues to the end. In Swiss:P21159, S6 runs from residues 178-192, S7 from 200-206 and S8 from 211-215. the extended loop, of residues 167-177 may well be involved in the pocket formed between the two dimers that positions the FMN molecule .. +PF10591 Secreted protein acidic and rich in cysteine Ca binding region
Pfam-B_3882 (release 22.0), PROSITE_PS00613. The SPARC_Ca_bdg domain of Secreted Protein Acidic and Rich in Cysteine is responsible for the anti-spreading activity of human urothelial cells. It is rich in alpha-helices. This extracellular calcium-binding domain contains two EF-hands that each coordinates one Ca2+ ion, forming a helix-loop-helix structure that not only drives the conformation of the protein but is also necessary for biological activity. The anti-spreading activity was dependent on the coordination of Ca2+ by a Glu residue at the Z position of EF-hand 2 .. +PF10592 AIPR protein
This family of proteins was identified in as an abortive infection phage resistance protein often found in restriction modification system operons.. +PF10593 Z1 domain
This uncharacterised domain was identified by Iyer and colleagues . It is found associated with a helicase domain of superfamily type II.. +PF10595 Uncharacterised protein family UPF0564
This family of proteins has no known function. However, one of the members, Swiss:Q22CP8, is annotated as an EF-hand family protein.. +PF10596 U6-snRNA interacting domain of PrP8
This domain incorporates the interacting site for the U6-snRNA as part of the U4/U6.U5 tri-snRNPs complex of the spliceosome, and is the prime candidate for the role of cofactor for the spliceosome's RNA core. The essential spliceosomal protein Prp8 interacts with U5 and U6 snRNAs and with specific pre-mRNA sequences that participate in catalysis. This close association with crucial RNA sequences, together with extensive genetic evidence, suggests that Prp8 could directly affect the function of the catalytic core, perhaps acting as a splicing cofactor .. +PF10597 U5-snRNA binding site 2 of PrP8
The essential spliceosomal protein Prp8 interacts with U5 and U6 snRNAs and with specific pre-mRNA sequences that participate in catalysis . This close association with crucial RNA sequences, together with extensive genetic evidence, suggests that Prp8 could directly affect the function of the catalytic core, perhaps acting as a splicing cofactor .. +PF10598 RNA recognition motif of the spliceosomal PrP8
The large RNA-protein complex of the spliceosome catalyses pre-mRNA splicing. One of the most conserved core proteins is PrP8 which occupies a central position in the catalytic core of the spliceosome, and has been implicated in several crucial molecular rearrangements that occur there, and has recently come under the spotlight for its role in the inherited human disease, Retinitis Pigmentosa . The RNA-recognition motif of PrP8 is highly conserved and provides a possible RNA binding centre for the 5-prime SS, BP, or 3-prime SS of pre-mRNA which are known to contact with Prp8. The most conserved regions of an RRM are defined as the RNP1 and RNP2 sequences. Recognition of RNA targets can also be modulated by a number of other factors, most notably the two loops beta1-alpha1, beta2-beta3 and the amino acid residues C-terminal to the RNP2 domain .. +PF10599 Retro-transposon transporting motif
This is the highly conserved C-terminal motif GRKIxxxxxRRKx of nucleoporins that plays a critical and unique role in the nuclear import of retro-transposons in both yeasts and higher organisms. It would appear that the arginine residues at positions 2 and 9-10 constitute a bipartite nuclear localisation signal, with two basic peptide motifs separated by an interchangeable spacer sequence, that is crucial for the retro-transposon activity .. +PF10600 PDZ-associated domain of NMDA receptors
Pfam-B_10923 (release 22.0). This domain is found in higher eukaryotes between the second and third PDZ domains, Pfam:PF00595, of glutamate receptor like proteins. Its exact function is not known.. +PF10601 LITAF-like zinc ribbon domain
Clustering of trematode sequences. Members of this family display a conserved zinc ribbon structure with the motif C-XX-C- separated from the more C-terminal HX-C(P)X-C-X4-G-R motif by a variable region of usually 25-30 (hydrophobic) residues. Although it belongs to one of the zinc finger's fold groups (zinc ribbon), this particular domain was first identified in LPS-induced tumour necrosis alpha factor (LITAF) which is produced in mammalian cells after being challenged with lipopolysaccharide (LPS) . The hydrophobic region probably inserts into the membrane rather than traversing it. Such an insertion brings together the N- and C-terminal C-XX-C motifs to form a compact Zn2+-binding structure .. +PF10602 26S proteasome subunit RPN7
Pfam-B_4112 (release 22.0). RPN7 (known as the non ATPase regulatory subunit 6 in higher eukaryotes) is one of the lid subunits of the 26S proteasome and has been shown in Saccharomyces cerevisiae to be required for structural integrity . The 26S proteasome is is involved in the ATP-dependent degradation of ubiquitinated proteins.. +PF10604 Polyketide cyclase / dehydrase and lipid transport
Lakshminarayan L, Mistry J. This family contains polyketide cylcases/dehydrases which are enzymes involved in polyketide synthesis. It also includes other proteins of the START superfamily .. +PF10605 3HB-oligomer hydrolase (3HBOH)
FIG094011 (Release 2.0). D-(-)-3-hydroxybutyrate oligomer hydrolase (also known as 3HB-oligomer hydrolase) functions in the degradation of poly-3-hydroxybutyrate (PHB). It catalyses the hydrolysis of D(-)-3-hydroxybutyrate oligomers (3HB-oligomers) into 3HB-monomers.. +PF10606 Homer-binding domain of metabotropic glutamate receptor
Pfam-B_17370 (release 22.0). This is the proline-rich region of metabotropic glutamate receptor proteins that binds Homer-related synaptic proteins. The Homer proteins form a physical tether linking mGluRs with the inositol trisphosphate receptors (IP3R) that appears to be due to the proline-rich "Homer ligand" (PPXXFr). Activation of PI turnover triggers intracellular calcium release . MGluR function is altered in the mouse model of human Fragile X syndrome mental retardation, a disorder caused by loss of function mutations in the Fragile X mental retardation gene Fmr1. Homer 3 (and to a lesser extent Homer 1b/c) has been shown to form a multimeric complex with mGlu1a and the IP3 receptor, indicating that Homers may play a role in the localisation of receptors to their signalling partners .. +PF10607 RanBPM_CRA;
CTLH/CRA C-terminal to LisH motif domain. Pfam-B_3765 (release 22.0), UPF0559. RanBPM is a scaffolding protein and is important in regulating cellular function in both the immune system and the nervous system. This domain is at the C-terminus of the proteins and is the binding domain for the CRA motif (for CT11-RanBPM), which is comprised of approximately 100 amino acids at the C terminal of RanBPM. It was found to be important for the interaction of RanBPM with fragile X mental retardation protein (FMRP), but its functional significance has yet to be determined . This region contains CTLH and CRA domains annotated by SMART; however, these may be a single domain, and it is refereed to as a C-terminal to LisH motif .. +PF10608 Polyubiquitination (PEST) N-terminal domain of MAGUK
Pfam-B_18049 (release 22.0). The residues upstream of this domain are the probable palmitoylation sites, particularly two cysteines. The domain has a putative PEST site at the very start that seems to be responsible for poly-ubiquitination . PEST domains are polypeptide sequences enriched in proline (P), glutamic acid (E), serine (S) and threonine (T) that target proteins for rapid destruction. The whole domain, in conjunction with a C-terminal domain of the longer protein, is necessary for dimerisation of the whole protein .. +PF10609 ParA/MinD ATPase like
Pfam-B_177 (release 21.0). This family contains ATPases involved in plasmid partitioning . It also contains the cytosolic Fe-S cluster assembling factor NBP35 which is required for biogenesis and export of both ribosomal subunits .. +PF10610 Thin aggregative fimbriae synthesis protein
FIGFAMs, Mistry J, Coggill P. FIG009025 (Release 2.0). Fimbriae are cell-surface protein polymers, of eg. E coli and Salmonella spp, that mediate interactions important for host and environmental persistence, development of biofilms, motility, colonisation and invasion of cells, and conjugation. Four general assembly pathways for different fimbriae have been proposed, one of which is extracellular nucleation-precipitation (ENP), that differs from the others in that fibre-growth occurs extracellularly. Thin aggregative fimbriae (Tafi) are the only fimbriae dependent on the ENP pathway. Tafi were first identified in Salmonella spp and the controlling operon termed agf; however subsequent isolation of the homologous operon in E coli led to its being called csg. Tafi are known as curli because, in the absence of extracellular polysaccharides, their morphology appears curled; however, when expressed with such polysaccharides their morphology appears as a tangled amorphous matrix. The gene agfC is found to be transcribed at low levels, localised to the periplasm in a mature form, and in combination with AgfE is important for AgfA extracellular assembly, which facilitates the synthesis of Tafi. The genes involved in Tafi production are organised into two adjacent divergently transcribed operons, agfBAC and agfDEFG, both of which are required for biosynthesis and assembly .. +PF10611 Protein of unknown function (DUF2469)
FIG004032 (Release 2.0). Member proteins often found in Actinomycetes clustered with signal peptidase and/or RNAse-HII.. +PF10612 Spore coat protein Z
FIGFAMs, Mistry J, Coggill P. FIG014057 (Release 2.0). This family has members annotated as Spore coat protein Z, otherwise known as CotZ, It is a cysteine-rich spore coat family, and along with CotY is necessary for assembly of intact exosporium.. +PF10613 Ligated ion channel L-glutamate- and glycine-binding site
Pfam-B_203 (release 22.0). This region, sometimes called the S1 domain, is the luminal domain just upstream of the first, M1, transmembrane region of transmembrane ion-channel proteins, and it binds L-glutamate and glycine . It is found in association with Lig_chan, Pfam:PF00060.. +PF10614 Tafi-CsgF;
Type VIII secretion system (T8SS), CsgF protein. FIGFAMs, Mistry J, Coggill P, Desvaux M. FIG077109 (Release 2.0). The extracellular nucleation-precipitation (ENP) pathway or Type VIII secretion system (T8SS) in Gram-negative (diderm) bacteria is responsible for the secretion and assembly of prepilins for fimbiae biogenesis, the prototypical curli. Besides the T2SS that can be involved in the assembly of prototypical Type 4 pilus, the T4SS that can be involved in the biogenesis of the prototypical pilus T, the T3SS involved in the assembly of the injectisome and the T7SS involved in the formation of the prototypical Type 1 pilus, the T8SS differs in that fibre-growth occurs extracellularly. The curli, also called thin aggregative fimbriae (Tafi), are the only fimbriae dependent on the T8SS. Tafi were first identified in Salmonella spp and the controlling operon termed agf; however subsequent isolation of the homologous operon in E coli led to its being called csg. In the absence of extracellular polysaccharides Tafi appear curled, although when expressed with such polysaccharides their morphology appears as a tangled amorphous matrix . CsgF is one of three putative curli assembly factors appearing to act as a nucleator protein. Unlike eukaryotic amyloid formation, curli biogenesis is a productive pathway requiring a specific assembly machinery .. +PF10615 Protein of unknown function (DUF2470)
FIGFAMs, Mistry J, Coggill P. FIG076093 (Release 2.0). This family is a putative haem-iron utilisation family, as many members are annotated as being pyridoxamine 5'-phosphate oxidase-related, FMN-binding; however this could not be confirmed.. +PF10616 Protein of unknown function (DUF2471)
FIGFAMs, Mistry J, Coggill P. FIG076041 (Release 2.0). The function of this family is unknown. Members all come from Burkholderia spp. Swiss:A2WH83 is annotated as Serine/threonine-protein kinase, but this could not be confirmed.. +PF10617 Protein of unknown function (DUF2474)
FIGfam, Mistry J, Coggill P. FIG073099 (Release 2.0). This family of short proteins has no known function.. +PF10618 DUF2473;
Phage tail tube protein. FIG030252 (Release 2.0). This bacterial family of proteins contains phage tail tube proteins related to the Mu phage protein Swiss:P79679 . Bacteriophage Mu has an eicosahedral head and contractile tail. The tail is composed of an outer sheath and an inner tube.. +PF10620 Phosphoribosyl-dephospho-CoA transferase MdcG
FIG003211 (Release 2.0). MdcG is a phosphoribosyl-dephospho-CoA transferase that is involved in the biosynthesis of the prosthetic group of malonate decarboxylase . Malonate decarboxylase from Klebsiella pneumoniae contains an acyl carrier protein (MdcC) to which a 2'-(5' '-phosphoribosyl)-3'-dephospho-CoA prosthetic group is attached via phosphodiester linkage. MdcG catalyses the following reaction: 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA + apo-[acyl-carrier-protein] = holo-[acyl-carrier-protein] + diphosphate.. +PF10621 zf-Fe2-S2-FpoO;
F420H2 dehydrogenase subunit FpoO . FIGfam, Mistry J, Coggill P. FIG062058 (Release 2.0). This is the FpoO subunit of F420H2 dehydrogenase, an enzyme which oxidises reduced coenzyme F420. Reduced coenzyme F420 is a universal electron carrier in methanogens.. +PF10622 Energy-converting hydrogenase B subunit P (EhbP)
FIG124174 (Release 2.0). Ehb (energy-converting hydrogenase B) is an methanogenic archaeal enzyme that functions in one of the metabolic pathways involved in methanol reduction to methane. This family contains subunit P of Ehb.. +PF10623 Plasmid conjugative transfer protein PilI
FIG136100 (Release 2.0). The thin pilus of plasmid R64 belongs to the type IV family and is required for liquid matings. pilI is one of 14 genes that have been identified as being involved in biogenesis of the R64 thin pilus .. +PF10624 Plasmid conjugative transfer entry exclusion protein TraS
FIG105028 (Release 2.0). Entry exclusion (Eex) is a process which prevents redundant transfer of DNA between donor cells. TraS is a protein involved in Eex. It blocks redundant conjugative DNA synthesis and transport between donor cells, and it is suggested that TraS interferes with a signalling pathway that is required to trigger DNA transfer . TraS on the recipient cell is known to form an interaction with TraG on the donor cell .. +PF10625 Universal stress protein B (UspB)
FIG002192 (Release 2.0). UspB in Escherichia coli is a 14kDa protein which is predicted to be an integral membrane protein. Overexpression of UspB results in cell death in stationary phase, and mutants of uspB are sensitive to ethanol exposure during stationary phase .. +PF10626 Conjugative transposon protein TraO
FIG055244 (Release 2.0). This is a family of conjugative transposon proteins.. +PF10627 Curli assembly protein CsgE
FIG091002 (Release 2.0). Curli are a class highly aggregated surface fibres that are part of a complex extracellular matrix. They promote biofilm formation in addition to other activities. CsgE is a non-structural protein involved in curli biogenesis .\. CsgE forms an outer membrane complex with the curli assembly proteins CsgG and CsgF .. +PF10628 Outer spore coat protein E (CotE)
FIG006437 (Release 2.0). CotE is a morphogenic protein that is required for the assembly of the outer coat of the endospore and spore resistance to lysozyme . CotE also regulates the expression of cotA, cotB, cotC and other genes encoding spore outer coat proteins . The timing of cotE expression has been shown in Bacillus subtilis to affect spore coat morphology but not lysozyme resistance .. +PF10629 Protein of unknown function (DUF2475)
This family of proteins has no known function.. +PF10630 Protein of unknown function (DUF2476)
This is a family of proteins of unknown function.\. The family is rich in proline residues.. +PF10631 Protein of unknown function (DUF2477)
This is a family of proteins with no known function. The family is rich in proline residues.. +PF10632 He_PIG associated, NEW1 domain of bacterial glycohydrolase
Naumoff D, Coggill P. Pfam-B_97991 (release 22.0). The English-language version of the first reference can be found on pages 388-399 of the above. This domain has been named NEW1 but its actual function is not known. It is found on proteins which are bacterial galactosidases . The domain is associated with the He_PIG family, Pfam:PF05345, a putative Ig-containing domain.. +PF10633 NPCBM-assoc;
NPCBM-associated, NEW3 domain of alpha-galactosidase. Naumoff D, Coggill P. Pfam-B_97993 (release 22.0). The English-language version of the first reference can be found on pages 388-399 of the above. This domain has been named NEW3 but its actual function is not known. It is found on proteins which are bacterial galactosidases . The domain is associated with the NPCBM family, Pfam:PF08305, a novel putative carbohydrate binding module found at the N-terminus of glycosyl hydrolases.. +PF10634 Fe2+ transport protein
FIG005173 (Release 2.0). This is a bacterial family of periplasmic proteins that are thought to function in high-affinity Fe2+ transport.. +PF10635 DisA bacterial checkpoint controller linker region
The DisA protein is a bacterial checkpoint protein that dimerises into an octameric complex. The protein consists of three distinct domains. the first, N-terminal region, from 1-145 is globular and is represented by family DisA_N, Pfam:PF02457; the next 146-289 residues is this domain that consists of an elongated bundle of three alpha helices (alpha-6, alpha-10, and alpha-11), one side of which carries an additional three helices (alpha7-9), thus forming a spine like-linker between domains 1 and 3. The C-terminal residues of domain 3 are family HHH, Pfam:PF00633, the specific DNA-binding domain. The octameric complex thus has structurally linked nucleotide-binding and DNA-binding HhH domains and the nucleotide-binding domains are bound to a cyclic di-adenosine phosphate such that DisA is a specific di-adenylate cyclase. The di-adenylate cyclase activity is strongly suppressed by binding to branched DNA, but not to duplex or single-stranded DNA, suggesting a role for DisA as a monitor of the presence of stalled replication forks or recombination intermediates via DNA structure-modulated c-di-AMP synthesis .. +PF10636 Hemin uptake protein hemP
FIG024330 (Release 2.0). This is a bacterial family of proteins that are involved in the uptake of the iron source hemin .. +PF10637 Oxoglutarate and iron-dependent oxygenase degradation C-term
Pfam-B_18095 (release 22.0). Ofd1 is a prolyl 4-hydroxylase-like 2-oxoglutarate-Fe(II) dioxygenase that accelerates the degradation of Sre1N in the presence of oxygen. The domain is conserved from yeasts to humans. Yeast Sre1 is the orthologue of mammalian sterol regulatory element binding protein (SREBP), and it responds to changes in oxygen-dependent sterol synthesis as an indirect measure of oxygen availability. However, unlike the prolyl 4-hydroxylases that regulate mammalian hypoxia-inducible factor, Ofd1 uses multiple domains to regulate Sre1N degradation by oxygen; the Ofd1 N-terminal dioxygenase domain is required for oxygen sensing and this Ofd1 C-terminal domain accelerates Sre1N degradation in yeasts .. +PF10638 Spindle body associated protein C-terminus
This C-terminal domain of spindle-body-associated protein Sfi1 has an important role to play in the bridge-splitting during bi-polar spindle assembly, and this separation event possibly requires interaction with integral components of the nuclear envelope, such as the Mps2-Bbp1 complex . Centrally to this domain is a region carrying centrin-binding repeats with repeating units containing tryptophan, family Sfi1_central, Pfam:PF08457.. +PF10639 Uncharacterised protein family UPF0546
This family of proteins has no known function. Many members are annotated as potential transmembrane proteins.. +PF10640 mRNA capping enzyme N-terminal, ATPase and guanylyltransferase
This domain is the N-terminus of the large subunit viral mRNA capping enzyme, and carries both the ATPase and the guanylyltransferase activities of the enzyme. The guanylyltransferase enzymatic region runs from residues 242 (leucine)-273(arginine) , the core of the acitve site being the lysine residue at 260 . The ATPase activity is at the very N-terminal part of the domain .. +PF10642 Mitochondrial import receptor subunit or translocase
Pfam-B_89651 (release 22.0). This protein family is very short and is only found in yeasts. Tom5 is one of three very small translocases of the mitochondrial outer membrane. Tom5 links mitochondrial preprotein receptors to the general import pore . Although Tom5 has allegedly been identified in vertebrates this could not be confirmed.. +PF10643 Photosystem P840 reaction-centre cytochrome c-551
FIGFam, Mistry J, Coggill P. FIG055090 (Release 2.0). A photosynthetic reaction-centre complex is found in certain green sulphur bacteria such as Chlorobium vibrioforme which are anaerobic photo-auto-trophic organisms. The primary electron donor is P840, a probable B-Chl a dimer, and the primary electron acceptor is a B-Chl monomer. Also on the donor side c-type cytochromes are known to function as electron donors to photo-oxidised P840. This family is thus the secondary endogenous donor of the photosynthetic reaction-centre complex and is a membrane-bound cytochrome containing a single haem group.. +PF10644 Misat_Myo_SegII;
Misato Segment II tubulin-like domain. Pfam-B_7826 (release 21.0). The misato protein contains three distinct, conserved domains, segments I, II and III. Segments I and III are common to Tubulins Pfam:PF00091, but segment II aligns with myosin heavy chain sequences from D. melanogaster (PIR C35815), rabbit (SP P04460), and human (PIR S12458). Segment II of misato is a major contributor to its greater length compared with the various tubulins. The most significant sequence similarities to this 54-amino acid region are from a motif found in the heavy chains of myosins from different organisms. A comparison of segment II with the vertebrate myosin heavy chains reveals that it is homologous to a myosin peptide in the hinge region linking the S2 and LMM domains. Segment II also contains heptad repeats which are characteristic of the myosin tail alpha-helical coiled-coils . This myosin-like homology may be due only to the fact that both myosin and Misato carry coiled-coils, which appear similar but are not necessarily homologous (Wood V, personal communication).. +PF10645 Carbohydrate binding
Pfam-B_63360 (release 22.0). This is a carbohydrate binding domain which has been shown in Schizosaccharomyces pombe to be required for septum localisation .. +PF10646 GerMN;
Sporulation and spore germination. The GerMN domain is a region of approximately 100 residues that is found, duplicated, in the Bacillus GerM protein and is implicated in both sporulation and spore germination. The domain is found in a number of different bacterial species both alone and in association with other domains such as Amidase_3 Pfam:PF01520, Gmad1 and Gmad2. It is predicted to have a novel alpha-beta fold.. +PF10647 Lipoprotein LpqB beta-propeller domain
The Gmad1 domain is found associated with the GerMN family, Pfam:PF10646, in bacterial spore formation. It is predicted to have a beta-propeller fold and to have a passive binding role rather than a catalytic function owing to the low number of conserved hydrophilic residues.. +PF10648 Immunoglobulin-like domain of bacterial spore germination
This domain is found linked to the GerMN domain Pfam:PF10646 in some bacterial proteins. It is predicted to contain an immunoglobulin-like all-beta fold.. +PF10649 Protein of unknown function (DUF2478)
FIGfam, Mistry J, Coggill P. FIG046046 (Release 2.0) . This is a family of hypothetical bacterial proteins found in the vicinity of Molybdenum ABC transporter ATP-binding gene-products MobA MobB and MobC. However the function could not be confirmed. This family appears to belong to the P-loop superfamily by alignment to Pfam:PF03266. However, the characteristic P-loop sequence motif appears to have diverged beyond recognition in this family.. +PF10650 Putative zinc-finger domain
This domain is conserved in fungi and might be a zinc-finger domain as it contains three conserved Cs and an H in the C-x8-C-x5-C-x3-H conformation typical of a zinc-finger.. +PF10651 Domain of unknown function (DUF2479)
FIGfam, Mistry J, Coggill P. FIG015005 (Release 2.0). This domain is found in phage from a number of different bacteria. It is purported to be a putative long tail fibre (Bacteriophage A118) protein, but this could not be confirmed.. +PF10652 Protein of unknown function (DUF2480)
FIGfam, Mistry J, Coggill P. FIG020045 (Release 2.0) . All the members of this family are uncharacterised proteins, but the environment in which they are found on the bacterial genome suggests a function as a glucose-6-phosphate isomerase (EC 5.3.1.9). This could not, however, be confirmed.. +PF10653 Protein gp45 of Bacteriophage A118
FIGfam, Mistry J, Coggill P. FIG0160151 (Release 2.0). This domain is found in bacteriophage and is thought to have a gp45 function within the phage tail-fibre system.. +PF10654 DUF2482;
Protein of unknown function (DUF2481) . FIGfam, Mistry J, Coggill P. FIG020094 (Release 2.0). This is a hypothetical protein family homologous to Lmo2305 in Bacteriophage A118 systems.. +PF10655 Hypothetical protein of unknown function (DUF2482)
FIGfam, Mistry J, Coggill P. FIG030041 (Release 2.0). All the members of this very small, very short family are derived from bacteriophages, of the SA bacteriophages 11, Mu50B, system, and from the Staphylococcal_phi-Mu50B-like_prophages subsystem. All members are hypothetical proteins.. +PF10656 Hypothetical protein of unknown function (DUF2483)
FIGfam, Mistry J, Coggill P. FIG032091 (Release 2.0). This is a family of proteins found in bacteriophage particularly of the SA bacteriophages 11, Mu50B, family, homologous to phi-ETA orf16.. +PF10657 Photosystem P840 reaction centre protein PscD
FIGfam, Mistry J, Coggill P. FIG031038 (Release 2.0). The photosynthetic reaction centres (RCs) of aerotolerant organisms contain a heterodimeric core, built up of two strongly homologous polypeptides each of which contributes five transmembrane peptide helices to hold a pseudo-symmetric double set of redox components. Two molecules of PscD are housed within a subunit. PscD may be involved in stabilising the PscB component since it is found to co-precipitate with FMO (Fenna-Mathews-Olson BChl a-protein) and PscB. It may also be involved in the interaction with ferredoxin .. +PF10658 Protein of unknown function (DUF2484)
FIGfam, Mistry J, Coggill P. FIG030013 (Release 2.0). A role of this family in UDP-N-acetylenolpyruvoylglucosamine reductase, as MurB, could not be confirmed.. +PF10659 Trypanosome variant surface glycoprotein C-terminal domain
Pfam-B_1351 (release 3.0). The trypanosome parasite expresses these proteins to evade the immune response.. +PF10660 Iron-containing outer mitochondrial membrane protein N-terminus
MitoNEET_N is the N-terminal region of the MitoNEET and Miner-type proteins that carry a zf-CDGSH, Pfam:PF09360, redox-active 2Fe-2S cluster. The whole protein regulates oxidative capacity. The domain is an anchor sequence that tethers the protein to the outer membrane.. +PF10661 WXG100 protein secretion system (Wss), protein EssA
FIGfam, Mistry J, Coggill P, Desvaux M. FIG043089 (Release 2.0). The WXG100 protein secretion system (Wss) is responsible for the secretion of WXG100 proteins (Pfam:PF06013) such as ESAT-6 and CFP-10 in Mycobacterium tuberculosis or EsxA and EsxB in Staphylococcus aureus. In S. aureus, the Wss seems to be encoded by a locus of eight CDS, called ess (eSAT-6 secretion system). This locus encodes, amongst several other proteins, EssA, a protein predicted to possess one transmembrane domain. Due to its predicted membrane location and its absolute requirement for WXG100 protein secretion, it has been speculated that EssA could form a secretion apparatus in conjunction with the polytopic membrane protein EsaA, YukC (Pfam:PF10140) and YukAB, which is a membrane-bound ATPase containing Ftsk/SpoIIIE domains (Pfam:PF01580) called EssC in S. aureus and Snm1/Snm2 in Mycobacterium tuberculosis. Proteins homologous to EssA, YukC, EsaA and YukD seem absent from mycobacteria .. +PF10662 Ethanolamine utilisation - propanediol utilisation
FIGfam, Mistry J, Coggill P. FIG047026 (Release 2.0). Members of this family function in ethanolamine and propanediol degradation pathways, however the exact roles of these proteins is poorly understood [1-3].. +PF10664 DUF2485; NADHqo1-M; NADHdh-M;
Cyanobacterial and plastid NDH-1 subunit M. FIGfam, Mistry J, Coggill P. FIG006356 (Release 2.0). The proton-pumping NADH:ubiquinone oxidoreductase catalyses the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 sub-complexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit . The cyanobacterial NDH-1 complex contains additional subunits, NdhM and NdhN, compared with the minimal set of the bacterial enzyme and these seem to be specific for thylakoid-located NDH-1 of photosynthetic organisms . The three subunits of NDH-1, NdhM, NdhN and NdhO are essential for effecting cyclic electron flow around photosystem I, by supplying extra-ATP for photosynthesis in both plastids and cyanobacteria [3, 4].. +PF10665 Phage_Gp9;
Minor capsid protein. FIGfam, Mistry J, Coggill P. FIG016324 (Release 2.0). This is a putative tail-knob or minor capsid protein from bacteriophages.. +PF10666 Phage protein Gp14
FIGfam, Mistry J, Coggill P. This phage protein family is of unknown function but is expressed from within a cluster of tail- and base plate-producing genes .. +PF10667 Protein of unknown function (DUF2486)
FIGfam, Mistry J, Coggill P. FIG008383 (Release 2.0). This family is made up of members from various Burkholderia spp. The function is unknown.. +PF10668 Phage terminase small subunit
FIGfams, Mistry J, Coggill P. FIG022212 (Release 2.0). This family of small highly conserved proteins come from a subset of Firmicute species. Its putative function is as a phage terminase small subunit.. +PF10669 Protein gp23 (Bacteriophage A118)
FIGfam, Mistry J, Coggill P. FIG018382 (Release 2.0). This is the highly conserved family of the major tail subunit protein.. +PF10670 NikM;
Domain of unknown function (DUF4198). Pfam-B_42996 (release 22.0). This family was previously missannotated in Pfam as NikM.. +PF10671 Toxin co-regulated pilus biosynthesis protein Q
FIGfam, Mistry J, Coggill P. FIG032035 (Release 2.0). The toxin-coregulated pilus (TCP) of Vibrio cholerae and the soluble TcpF protein that is secreted via the TCP biogenesis apparatus are essential for intestinal colonisation in the disease of cholera. TcpQ is part of an outer membrane complex of the TCP biogenesis apparatus, comprised of TcpC and TcpQ, and the TcpQ is required for proper localisation of TcpC to the outer membrane. The domain is found in other Proteobacterial species apart from Vibrio.. +PF10672 S-adenosylmethionine-dependent methyltransferase
Members of this family are S-adenosylmethionine-dependent methyltransferases from gamma-proteobacterial species. The diversity in the roles of methylation is matched by the almost bewildering number of methyltransferase enzymes that catalyse the methylation reaction. Although several classes of methyltransferase enzymes are known, the great majority of methylation reactions are catalysed by the S-adenosylmethionine-dependent methyltransferases.. +PF10673 Protein of unknown function (DUF2487)
This is a bacterial family of uncharacterised proteins.. +PF10674 DUF2488;
Protein of unknown function (DUF2488). This protein is conserved in the green lineage and located in the chloroplast.. +PF10675 Protein of unknown function (DUF2489)
This is a bacterial family of uncharacterised proteins.. +PF10676 Spore germination protein gerPA/gerPF
This is a bacterial family of proteins that are required for the formation of functionally normal spores. Proteins in this family may be involved in establishing normal coat structure and/or permeability which could control the access of germinants to their receptor.. +PF10677 Protein of unknown function (DUF2490)
This is a bacterial family of uncharacterised proteins. They appear to belong to the outer membrane beta barrel superfamily.. +PF10678 Protein of unknown function (DUF2492)
This is a bacterial family of uncharacterised proteins.. +PF10679 Protein of unknown function (DUF2491)
This is a bacterial family of uncharacterised proteins.. +PF10680 RNA polymerase I specific transcription initiation factor
Pfam-B_44021 (release 22.0). Initiation of transcription of ribosomal DNA (rDNA) in yeast involves an interaction of upstream activation factor (UAF) with the upstream element of the promoter, to form a stable UAF-template complex. UAF, together with the TATA-binding transcription initiation factor protein TBP, then recruits an essential core factor to the promoter, to form a stable preinitiation complex . This Rrn9 domain, which seems to be constrained to fungi, is the two highly conserved regions of proteins which form one of the subunits of UAF and appears to be the region responsible for the interaction with TBP. The family includes the S.pombe Arc1 protein, Swiss:Q10204, which is found to be essential for the accumulation of condensin at kinetochores .. +PF10681 Chaperone for protein-folding within the ER, fungal
Pfam-B_27706 (release 22.0). This conserved fungal family is an essential molecular chaperone in the endoplasmic reticulum. Molecular chaperones transiently interact with unfolded proteins to inhibit their self-aggregation and to support their folding and/or assembly. Rot1 is a general chaperone with some substrate specificity, its substrates being the structurally unrelated Kre5 Kre6 Big1 Atg22, which are type I, type II, and polytopic membrane proteins. The dependencies of each for Rot1 do not share similarities. However, their folding does require BiP, and one of these proteins was simultaneously associated with both Rot1 and BiP. In addition, Rot1 may cooperate with BiP/Kar2 in the folding of Kre6 .. +PF10682 Glycoprotein of human cytomegalovirus HHV-5
This is glycoprotein UL40 from human cytomegalovirus or herpesvirus 5. The signal sequence of the UL40 polypeptide contains an HLA-E ligand identical with HLA-Cw*0304. The first 37 residues of UL40, including this ligand, are predicted to encode a signal peptide. The virus thus prevents the lysis by NK (natural killer) cells of the cell it has invaded [1-2].. +PF10683 Hermes_DBD;
Hermes transposase DNA-binding domain . This domain confers specific DNA-binding on Hermes transposase .. +PF10684 Putative biofilm-dependent modulation protein
This is a family of tightly conserved proteins from Enterobacteriaceae which are annotated as being biofilm-dependent modulation protein homologues.. +PF10685 Stress-induced bacterial acidophilic repeat motif
This repeat is found in proteins which are expressed under conditions of stress in bacteria. The repeat contains a highly conserved, characteristic sequence motif,KGG, that is also recognised by plants and lower eukaryotes and repeated in their LEA (late embryogenesis abundant) family of proteins, thereby rendering those proteins bacteriostatic. An example of such an LEA family is LEA_5, Pfam:PF00477. Further downstream from this motif is a Walker A, nucleotide binding, motif GXXXXGK(S,T), that in YciG of E coli, eg Swiss:Q8X7B4, is QSGGNKSGKS [URL]. YciG is expressed as part of a three-gene operon, yciGFE, and this operon is induced by stress and is regulated by RpoS, which controls the general stress-response in E coli. YciG was shown to be important for stationary-phase resistance to thermal stress and in particular to acid stress.. +PF10686 Protein of unknown function (DUF2493)
Members of this family are all Proteobacteria. The function is not known.. +PF10688 Bacterial inner membrane protein
This is a family of inner membrane proteins. Many of the members are YgjV protein.. +PF10689 Protein of unknown function (DUF2496)
This family consists of proteins from Gammaproteobacteria spp. Many members are annotated as being like the E coli protein YbaM.. +PF10690 Myticin pre-proprotein from the mussel
Myticin is a cysteine-rich peptide produced in three isoforms, A, B and C, by Mytilus galloprovincialis, the Mediterranean mussel. Some isoforms show antibacterial activity against gram-positive bacteria, while others are additionally active against the fungus Fusarium oxysporum and a gram-negative bacterium, Escherichia coli D31. Myticin-prepro is the precursor peptide. The mature molecule, named myticin, consists of 40 residues, with four intramolecular disulfide bridges and a cysteine array in the primary structure different from that of previously characterised cysteine-rich antimicrobial peptides. The first 20 amino acids are a putative signal peptide, and the antimicrobial peptide sequence is a 36-residue C-terminal extension. Such a structure suggests that myticins are synthesised as prepro-proteins that are then processed by various proteolytic events before storage in the haemocytes as the active peptide. Myticin precursors are expressed mainly in the haemocytes. The family Mytilin has been merged into this family.. +PF10691 Protein of unknown function (DUF2497)
Members of this family belong to the Alphaproteobacteria. The function of the family is not known.. +PF10692 Protein of unknown function (DUF2498)
Members of this family are Gammaproteobacteria. Many are annotated as like E coli protein YciN. The function is not known.. +PF10693 Protein of unknown function (DUF2499)
Members of this family are found in plants, lower eukaryotes, and bacteria and the chloroplast where it is annotated as Ycf49 or Ycf49-like. The function is not known though several members are annotated as putative membrane proteins.. +PF10694 Protein of unknown function (DUF2500)
The members of this family are largely confined to the Gammaproteobacteria. The function is not known.. +PF10696 Protein of unknown function (DUF2501)
Members of this family are all Proteobacteria. Several are annotated as being YjjA or YjjA-like, but this protein is uncharacterised.. +PF10697 Protein of unknown function (DUF2502)
Members of this family are all Gammaproteobacteria. The function is not known.. +PF10698 Protein of unknown function (DUF2505)
Members of this family are all Actinobacteria. The function is not known.. +PF10699 Male gamete fusion factor
The gene encoding Arabidopsis HAP2 is allelic with GCS1 (Generative cell-specific protein 1). HAP2 is expressed only in the haploid sperm and is required for efficient guidance of the pollen tube to the ovules. In Arabidopsis the protein is a predicted membrane protein with an N-terminal secretion signal, a single transmembrane domain and a C-terminal histidine-rich domain . HAP2-GCS1 is found from plants to lower eukaryotes and is necessary for the fusion of the gametes in fertilisation. It is involved in a novel mechanism for gamete fusion where a first species-specific protein binds male and female gamete membranes together after which a second, broadly conserved protein, either directly or indirectly, causes fusion of the two membranes together. The broadly conserved protein is represented by this HAP2-GCS1 domain, conserved from plants to lower eukaryotes . In Plasmodium berghei the protein is expressed only in male gametocytes and gametes, having a male-specific function during the interaction with female gametes, and being indispensable for parasite fertilisation. The gene in plants and eukaryotes might well have originated from acquisition of plastids from red algae .. +PF10702 Protein of unknown function (DUF2507)
This family is conserved in Firmicutes. The function is not known.. +PF10703 Molybdenum cofactor biosynthesis protein F
MoaF protein is essential for the production of the monoamine-inducible 30kDa protein in Klebsiella . It is necessary for reconstituting organoautotrophic growth in Ralstonia eutropha . It is conserved in Proteobacteria and some lower eukaryotes. The operon regulating the Moa genes is responsible for molybdenum cofactor biosynthesis.. +PF10704 Protein of unknown function (DUF2508)
This family is conserved in Firmicutes. Several members are annotated as being the protein YaaL. The function is not known.. +PF10705 Chloroplast protein precursor Ycf15 putative
In some species of plants the ycf15 gene is probably not a protein-coding gene because the protein in these species has premature stop codons. Most of the members of the family are hypothetical or uncharacterised .. +PF10706 Aminoglycoside-2''-adenylyltransferase
This family is conserved in Bacteria. It confers resistance to kanamycin, gentamicin, and tobramycin . The protein is also produced by plasmids in various bacterial species and confers resistance to essentially all clinically available aminoglycosides except streptomycin, and it eliminates the synergism between aminoglycosides and cell-wall active agents .. +PF10707 PhoP regulatory network protein YrbL
This is a family of proteins that are activated by PhoP. PhoP protein controls the expression of a large number of genes that mediate adaptation to low Mg2+ environments and/or virulence in several bacterial species. YbrL is proposed to be acting in a loop activity with PhoP and PrmA analogous to the multicomponent loop in Salmonella where the PhoP-dependent PmrD protein activates the regulatory protein PmrA, and the activated PmrA then represses transcription from the PmrD promoter which harbours binding sites for both the PhoP and PmrA proteins. Expression of YrbL is induced in low Mg2+ in a PhoP-dependent fashion and repressed by Fe3+ in a PmrA-dependent manner .. +PF10708 Protein of unknown function (DUF2510)
This is family of proteins conserved in Actinobacteria. Many members are annotated as putative membrane proteins but this could not be confirmed.. +PF10709 Protein of unknown function (DUF2511)
This family is conserved in bacteria. The function is not known.. +PF10710 Protein of unknown function (DUF2512)
Proteins in this family are predicted to be integral membrane proteins, and many of them are annotated as being YndM protein. They are all found in Firmicutes. The true function is not known.. +PF10711 Hypothetical protein (DUF2513)
This family is found in bacteria. The function is not known.. +PF10712 NAD-specific glutamate dehydrogenase
The members of this are annotated as being NAD-specific glutamate dehydrogenase encoded in antisense gene pair with DnaK-J .. +PF10713 Protein of unknown function (DUF2509)
This family is conserved in Proteobacteria. The function is not known but many of the members are annotated as protein YgdB.. +PF10714 Late embryogenesis abundant protein 18
This is a family of late embryogenesis-abundant proteins There is high accumulation of this protein in dry seeds, and in the roots of full-grown plants in response to dehydration and ABA (abscisic acid application) treatments . This LEA protein disappears after germination. It accumulates in growing regions of well irrigated hypocotyls and meristems suggesting a role in seedling growth resumption on rehydration . As a group the LEA proteins are highly hydrophilic, contain a high percentage of glycine residues, lack Cys and Trp residues and do not coagulate upon exposure to high temperature, and for these reasons are considered to be members of a group of proteins called hydrophilins . Expression of the protein is negatively regulated during etiolating growth, particularly in roots, in contrast to its expression patterns during normal growth .. +PF10715 Endoribonuclease RegB T4-bacteriophage encoded
The RegB endoribonuclease encoded by bacteriophage T4 is a unique sequence-specific nuclease that cleaves in the middle of GGAG or, in a few cases, GGAU tetranucleotides, preferentially those found in the Shine-Dalgarno regions of early phage mRNAs. Phage RB49 in addition to gpRegB utilises Escherichia coli endoribonuclease E for the degradation of its transcripts for gene regB. The deduced primary structure of RegB proteins of 32 phages studied is almost identical to that of T4, while the sequences of RegB encoded by phages RB69, TuIa and RB49 show substantial divergence from their T4 counterpart.. +PF10716 NADH dehydrogenase transmembrane subunit
The NdhL family is a component of the NDH-1L complex that is one of the proton-pumping NADH:ubiquinone oxidoreductases that catalyse the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. NDH-1L is essential for photoheterotrophic cell growth. NdhL appears to contain two transmembrane helices and it is necessary for the functioning of though not the correct assembly of the NDH-1 complex in Synechocystis 6803. The conservation between cyanobacteria and green plants suggests that chloroplast NDH-1 complexes contain related subunits .. +PF10717 Occlusion-derived virus envelope protein ODV-E18
This family of occlusion-derived viral envelope proteins are detected in viral-induced intranuclear microvesicles and are not detected in the plasma membrane, cytoplasmic membranes, or the nuclear envelope. The ODV-E18 protein is encoded by baculovirus late genes with transcription initiating from a TAAG motif. It exists as a dimer in the ODV envelope and contains a hydrophobic domain which is putatively acting as a target or retention signal for intranuclear microvesicles .. +PF10718 Hypothetical chloroplast protein Ycf34
This family is of proteins annotated as hypothetical chloroplast protein YCF34. The function is not known.. +PF10719 Late competence development protein ComFB
This family is conserved in bacteria. Some members, with three conserved cysteines, are annotated as late competence development protein ComFB.. +PF10720 Protein of unknown function (DUF2515)
This family is conserved in Firmicutes. Several members are annotated as YppC. The function is not known.. +PF10721 Protein of unknown function (DUF2514)
This family is conserved in bacteria and some viruses. The function is not known.. +PF10722 Putative bacterial sensory transduction regulator
YbjN is a putative sensory transduction regulator protein found in Proteobacteria. As it is a multi-copy suppressor of the coenzyme A-associated temperature sensitivity in temperature-sensitive mutant strains of Escherichia coli the suggestion is that it both helps CoA-A1 and possibly works as a general stabiliser for some other unstable proteins . This family was expanded to subsume other related families: DUF1790, DUF1821 and DUF2596.. +PF10723 Replication regulatory protein RepB
This is a family of proteins which regulate replication of rolling circle replication (RCR) plasmids that have a double-strand replication origin (dso). Regulation of replication of RCR plasmids occurs mainly at initiation of leading strand synthesis at the dso, such that Rep protein concentration controls plasmid replication .. +PF10724 Protein of unknown function (DUF2516)
This family is conserved in Actinobacteria. The function is not known.. +PF10725 Protein of unknown function (DUF2517)
This family is conserved in Proteobacteria. Several members are annotated as being protein YbfA. The function is not known.. +PF10726 Protein of function (DUF2518)
This family is conserved in Cyanobacteria. Several members are annotated as the protein Ycf51. The function is not known.. +PF10727 Rossmann-like domain
This family of proteins contain a Rossmann-like domain.. +PF10728 Domain of unknown function (DUF2520)
This presumed domain is found C-terminal to a Rossmann-like domain suggesting that these proteins are oxidoreductases.. +PF10729 Cell division activator CedA
CedA is made up of four antiparallel beta-strands and an alpha-helix. It activates cell division by inhibiting chromosome over-replication. This is mediated by binding to dsDNA via the beta-sheet. [1,2].. +PF10730 Protein of unknown function (DUF2521)
Family of unknown function specific to Bacillus.. +PF10731 Thrombin inhibitor from mosquito
Members of this family are all inhibitors of thrombin, the peptidase that is at the end of the blood coagulation cascade and which creates the clot by cleaving fibrinogen. The interaction between thrombin and fibrinogen involves two different areas of contact - via the thrombin active site and via a second substrate-binding site known as an exosite. The inhibitor acts by blocking the exosite, rather than by interacting with the active site. The inhibitors are from mosquitoes that feed on human blood and which, by inhibiting thrombin, prevent the blood from clotting and keep it flowing.. +PF10732 Protein of unknown function (DUF2524)
This family of proteins with unknown function appears to be restricted to Bacillaceae bacteria. . +PF10733 Protein of unknown function (DUF2525)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. The family has a highly conserved sequence.. +PF10734 Protein of unknown function (DUF2523)
This is a family of phage related proteins whose function is uncharacterised.. +PF10735 Protein of unknown function (DUF2526)
This family of proteins with unknown function is restricted to Enterobacteriaceae. The family has a highly conserved sequence.. +PF10736 Protein of unknown function (DUF2627)
This family of proteins with unknown function appears to be restricted to a family of Enterobacterial proteins. It has a highly conserved sequence.. +PF10737 Spore germination protein GerPC
GerPC is required for the formation of functionally normal spores. The gerP locus encodes a number of proteins which are thought to be involved in the establishment of normal spore coat structure and/or permeability, which allows the access of germinants to their receptor .. +PF10738 Probable lipoprotein LpqN
This family is conserved in Mycobacteriaceae and is likely to be a lipoprotein .. +PF10739 Protein of unknown function (DUF2550)
This family is conserved in Corynebacterineae. The function is not known though most members are annotated as either secreted, or membrane, proteins.. +PF10740 Protein of unknown function (DUF2529)
This family is conserved in the Bacillales. The function is not known. Several members are annotated as being YWJG, a protein expressed downstream of pyrG, a gene encoding for cytidine triphosphate synthetase.. +PF10741 GspM_II;
Type II secretion system (T2SS), protein M subtype b. Mistry J, Coggill P, Desvaux M. The T2SMb family is conserved in Proteobacteria and Actinobacteria, and differs from the T2SM proteins in Vibrio spp. (Pfam:PF04612).. +PF10742 Protein of unknown function (DUF2555)
This family is conserved in Cyanobacteria. The function is not known.. +PF10743 Regulatory phage protein cox
This family of phage Cox proteins is expressed by Enterobacteria phages. The Cox protein is a 79-residue basic protein with a predicted strong helix-turn-helix DNA-binding motif. It inhibits integrative recombination and it activates site-specific excision of the HP1 genome from the Haemophilus influenzae chromosome, Hp1. Cox appears to function as a tetramer. Cox binding sites consist of two direct repeats of the consensus motif 5'-GGTMAWWWWA, one Cox tetramer binding to each motif. Cox binding interferes with the interaction of HP1 integrase with one of its binding sites, IBS5. This competition is central to directional control. Both Cox binding sites are needed for full inhibition of integration and for activating excision, because it plays a positive role in assembling the nucleoprotein complexes that produce excisive recombination, by inducing the formation of a critical conformation in those complexes .. +PF10744 Med1-Trap220;
Mediator of RNA polymerase II transcription subunit 1. Pfam-B_51442 (release 22.0). Mediator complexes are basic necessities for linking transcriptional regulators to RNA polymerase II. This domain, Med1, is conserved from plants to fungi to humans and forms part of the Med9 submodule of the Srb/Med complex. it is one of three subunits essential for viability of the whole organism via its role in environmentally-directed cell-fate decisions . Med1 is part of the tail region of the Mediator complex .. +PF10745 Protein of unknown function (DUF2530)
This family of proteins with unknown function appears to be restricted to mycobacteria.. +PF10746 Phage holin family 6
Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis.. +PF10747 Protein of unknown function (DUF2522)
This family of proteins with unknown function appears to be restricted to Bacillus.. +PF10748 Protein of unknown function (DUF2531)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10749 Protein of unknown function (DUF2534)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10750 Protein of unknown function (DUF2536)
This family of proteins with unknown function appears to be restricted to Bacillus spp. Structural modelling suggests this domain may bind nucleic acids .. +PF10751 Protein of unknown function (DUF2535)
This family of proteins with unknown function appears to be restricted to Bacillus spp.. +PF10752 Protein of unknown function (DUF2533)
This family of proteins with unknown function appears to be restricted to Bacillus spp.. +PF10753 Protein of unknown function (DUF2566)
This family is conserved in Enterobacteriaceae. The function is not known.. +PF10754 Protein of unknown function (DUF2569)
This family is conserved in bacteria. The function is not known, but several members are annotated as being YdgK or a homologue thereof.. +PF10756 DUF2581;
This domain has a bacterial type PH domain structure. This domain was previously known as DUF2581. This family is conserved in the Actinomycetales. Although several members are annotated as RbiX homologues, RbiX being a putative regulator of riboflavin biosynthesis, the function could not be confirmed.. +PF10755 Protein of unknown function (DUF2585)
This family is conserved in Proteobacteria. The function is not known.. +PF10757 Biofilm formation regulator YbaJ
YbaJ regulates biofilm formation. It also has an important role in the regulation of motility in the biofilm. YbaJ functions in increasing conjugation, aggregation and decreasing the motility, resulting in an increase of biofilm . +PF10758 Protein of unknown function (DUF2586)
This bacterial family of proteins has no known function.. +PF10759 Protein of unknown function (DUF2587)
This is a bacterial family of proteins with no known function.. +PF10761 Protein of unknown function (DUF2590)
This family of proteins has no known function.. +PF10762 Protein of unknown function (DUF2583)
Some members in this family of proteins are annotated as YchH however currently no function is known.. +PF10763 Protein of unknown function (DUF2584)
This bacterial family of proteins have no known function.. +PF10764 Inhibitor of sigma-G Gin
Gin allows sigma-F to delay late forespore transcription by preventing sigma-G to take over before the cell has reached a critical stage of development. Gin is also known as CsfB .. +PF10765 Protein of unknown function (DUF2591)
This bacterial family of proteins has no known function.. +PF10766 Protein of unknown function (DUF2592)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10767 Protein of unknown function (DUF2593)
This family of proteins appear to be restricted to Enterobacteriaceae. Some members in the family are annotated as YbjO however currently there is no known function.. +PF10768 Class II flagellar assembly regulator
The FliX protein is possibly a transient component of the flagellum that is required for the assembly process. FliX may contribute to the targeting or assembly of the P- and L-ring protein monomers at the cell pole. The family carries a potential N-terminal signal sequence and at least one transmembrane domain indicating that it might function either in or in association with the cell membrane .. +PF10769 Protein of unknown function (DUF2594)
This family of proteins with unknown function appear to be restricted to Enterobacteriaceae.. +PF10771 Protein of unknown function (DUF2582)
This family is conserved in bacteria and archaea. The function is not known.. +PF10772 Protein of unknown function (DUF2597)
This family of proteins has no known function.. +PF10774 BssS;
Domain of unknown function (DUF4226). This family of mycobacterial proteins are uncharacterised.. +PF10775 ATP synthase complex subunit h
Subunit h is a component of the yeast mitochondrial F1-F0 ATP synthase. It is essential for the correct assembly and functioning of this enzyme. Subunit h occupies a central place in the peripheral stalk between the F1 sector and the membrane .. +PF10776 Protein of unknown function (DUF2600)
This is a bacterial family of proteins. Some members in the family are annotated as YtpB however currently no function is known.. +PF10777 Inner membrane protein YlaC
Members of this family include proteins annotated as inner membrane protein YlaC in E. coli and Salmonella. The function of this family is unknown.. +PF10778 Halocarboxylic acid dehydrogenase DehI
Haloacid dehalogenases catalyse the removal of halides from organic haloacids. DehI can process both L- and D-substrates. A crucial aspartate residue is predicted to activate a water molecule for nucleophilic attack of the substrate chiral centre resulting in an inversion of the configuration of either L- or D-substrates in contrast to D-only enzymes .. +PF10779 Haemolysin XhlA
XhlA is a cell-surface associated haemolysin that lyses the two most prevalent types of insect immune cells (granulocytes and plasmatocytes) as well as rabbit and horse erythrocytes . This family has had DUF1267, Pfam:PF06895, merged into it.. +PF10780 39S ribosomal protein L53/MRP-L53
MRP-L53 is also known as Mrp144. It is part of the 39S ribosome .. +PF10781 Dextransucrase DSRB
DSRB is a novel dextransucrase which produces a dextran different from the typical dextran, as it contains (1-6) and (1-2) linkages, when this strain is grown in the presence of sucrose .. +PF10782 Protein of unknown function (DUF2602)
This bacterial family of proteins has no known function.. +PF10783 Protein of unknown function (DUF2599)
This family is conserved in Actinobacteria. The function is not known.. +PF10784 Plasmid stability protein
This family is conserved in the Enterobacteriales. It is a putative plasmid stability protein in that it is expressed from the operon involved in stability, but its actual function has not yet been characterised.. +PF10785 NADH-ubiquinone oxidoreductase complex I, 21 kDa subunit
This family is the N-terminal domain of NADH-ubiquinone oxidoreductase 21 kDa subunits from fungi, lower metazoa and plants.. +PF10786 Glucose-6-phosphate 1-dehydrogenase (EC 1.1.1.49)
This family is conserved in Firmicutes and Proteobacteria. Several members are annotated as being glucose-6-phosphate 1-dehydrogenase (EC:1.1.1.49) but this could not be confirmed.. +PF10787 Uncharacterised protein from bacillus cereus group
This family is conserved in the Bacillus cereus group. Several members are called YfmQ but the function is not known.. +PF10788 Protein of unknown function (DUF2603)
This family is conserved in Epsilon-proteobacteria. The function is not known.. +PF10789 Phage RNA polymerase binding, RpbA
Upon infection the RpbA encode phage protein binds to the ADP-ribosylated core RNA polymerase and modulates function to preferentially bind T4 promoters.\. This is a non-essential protein to the phage life cycle.. +PF10790 Protein of Unknown function (DUF2604)
Family of bacterial proteins with undetermined function.. +PF10791 Mitochondrial F1-F0 ATP synthase subunit F of fungi
The membrane bound F1-FO-type H+ ATP synthase of mitochondria catalyses the terminal step in oxidative respiration converting the generation of the electrochemical gradient into ATP for cellular biosynthesis. The general structure and the core subunits of the enzyme are highly conserved in both prokaryotic and eukaryotic organisms.. +PF10792 Protein of unknown function (DUF2605)
This family is conserved in Cyanobacteria. The function is not known.. +PF10793 Gloverin-like protein
This family of proteins are Gloverin-like. Gloverin is a 13.8kDa inducible antibacterial insect protein which inhibits the synthesis of vital outer membrane proteins leading to a permeable outer membrane. Gloverin contains a large number of glycine residues .. +PF10794 Protein of unknown function (DUF2606)
Family of bacterial proteins with unknown function. These proteins have been classified as membrane proteins. +PF10795 Protein of unknown function (DUF2607)
This family is conserved in Gammaproteobacteria. The function is not known.. +PF10796 Sigma-S stabilisation anti-adaptor protein
This family is conserved in Enterobacteriaceae. It is one of a series of proteins, expressed by these bacteria in response to stress, that help to regulate Sigma-S, the stationary phase sigma factor of Escherichia coli and Salmonella. IraP is essential for Sigma-S stabilisation in some but not all starvation conditions .. +PF10797 Protein of unknown function
This family is conserved in Firmicutes and Proteobacteria. The function is not known but several members are annotated as being homologues of E coli YhfT, a protein thought to be involved in fatty acid oxidation.. +PF10798 Biofilm development protein YmgB/AriR
YmgB is part of the three gene cluster ymgABC which has a role in biofilm development and stability. YmgB represses biofilm formation in rich medium containing glucose, decreases cellular motility and also protects the cell from acid which indicates that YmgB has an important function in acid-resistance . YmgB binds as a dimer to genes which are important for biofilm formation via a ligand. Due to its important function in acid resistance it is also known as AriR (regulator of acid resistance influenced by indole) .. +PF10799 Biofilm formation protein (YliH/bssR)
YliH is induced in biofilms and is involved in repression of motility in the biofilms . YliH is also known as bssR (regulator of biofilm through signal secreton).. +PF10800 Protein of unknown function (DUF2528)
This family of proteins has no known function. Some of the sequences are annotated as ea10 however the function of this protein is unknown.. +PF10801 Protein of unknown function (DUF2537)
This bacterial family of proteins has no known function.. +PF10802 Protein of unknown function (DUF2540)
This family of proteins with unknown function appears to be restricted to Methanococcus.. +PF10803 DUF2539;
Spore germination GerPB. Members of this family are required for formation of functionally normal spores. They may be involved in the establishment of spore coat structure or permeability .. +PF10804 Protein of unknown function (DUF2538)
This family of proteins has no known function.. +PF10805 Protein of unknown function (DUF2730)
This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF10806 Protein of unknown function (DUF2731)
This eukaryotic family of proteins has no known function.. +PF10807 Protein of unknown function (DUF2541)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. All proteins are annotated as YaaI precursor however currently no function is known.. +PF10808 Protein of unknown function (DUF2542)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. The family has a highly conserved sequence.. +PF10809 Protein of unknown function (DUF2732)
This family of proteins has no known function.. +PF10810 Protein of unknown function (DUF2545)
This family of proteins with unknown function is restricted to Enterobacteriaceae. The sequence is highly conserved.. +PF10811 Protein of unknown function (DUF2532)
This bacterial family of proteins has no known function.. +PF10812 Protein of unknown function (DUF2561)
This family of proteins with unknown function appears to be restricted to Mycobacterium spp.. +PF10813 Protein of unknown function (DUF2733)
This viral family of proteins has no known function.. +PF10814 Protein of unknown function (DUF2562)
This protein of unknown function appears to be restricted to Mycobacterium spp.. +PF10815 ComZ
ComZ is part of a two gene operon. It affects competence regulation by negatively affecting the transcription of the ComG operon. ComZ contains a leucine zipper motif .. +PF10816 Domain of unknown function (DUF2760)
Gunasekaran P, Mistry J. Pfam-B_001564 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF10817 Protein of unknown function (DUF2563)
This family of proteins with unknown function appears to be restricted to Mycobacterium.. +PF10818 Protein of unknown function (DUF2547)
This bacterial family of proteins has no known function.. +PF10819 Protein of unknown function (DUF2564)
This family of proteins with unknown function appears to be restricted to Bacillus spp.. +PF10820 Protein of unknown function (DUF2543)
This family of proteins with unknown function appear to be restricted to Enterobacteriaceae. The family has a highly conserved sequence.. +PF10821 Protein of unknown function (DUF2567)
This is a bacterial family of proteins with unknown function.. +PF10823 Protein of unknown function (DUF2568)
One member in this family is annotated as yrdB which is part of a four gene operon however currently no function is known.. +PF10824 Protein of unknown function (DUF2580)
This family of proteins with unknown function appears to be mainly found in actinobacteria.. +PF10825 Protein of unknown function (DUF2752)
Gunasekaran P, Mistry J. Pfam-B_001601 (release 23.0). This family is conserved in bacteria. Many members are annotated as being putative membrane proteins.. +PF10826 Protein of unknown function (DUF2551)
This Archaeal family of proteins has no known function.. +PF10827 Protein of unknown function (DUF2552)
This bacterial family of proteins has no known function.. +PF10828 Protein of unknown function (DUF2570)
This is a family of proteins with unknown function.. +PF10829 Protein of unknown function (DUF2554)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10830 Protein of unknown function (DUF2553)
This family of bacterial proteins has no known function.. +PF10831 Protein of unknown function (DUF2556)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10832 Protein of unknown function (DUF2559)
This family of proteins appear to be restricted to Enterobacteriaceae. The sequences are annotated as yhfG however currently no function is known.. +PF10833 Protein of unknown function (DUF2572)
This bacterial family of proteins has no known function.. +PF10834 Protein of unknown function (DUF2560)
This family of proteins has no known function.. +PF10835 Protein of unknown function (DUF2573)
Some members in this bacterial family of proteins are annotated as YusU however no function is currently known. This family of proteins appears to be restricted to Bacillus spp.. +PF10836 Protein of unknown function (DUF2574)
This family of proteins appears to be restricted to Enterobacteriaceae. Members of the family are annotated as yehE however currently no function is known.. +PF10837 Protein of unknown function (DUF2575)
This family of proteins appears to be restricted to Enterobacteriaceae. Members in the family are annotated as yaaY but currently there is no known function.. +PF10838 Protein of unknown function (DUF2677)
Members in this family of proteins are annotated as UL121 however currently no function is known.. +PF10839 Protein of unknown function (DUF2647)
This eukaryotic family of proteins are annotated as ycf68 but have no known function.. +PF10840 Protein of unknown function (DUF2645)
This family of proteins appear to be restricted to Enterobacteriaceae. Some members in the family are annotated as YjeO however no function for this protein is currently known.. +PF10841 Protein of unknown function (DUF2644)
This family of proteins with unknown function appear to be restricted to Pasteurellaceae.. +PF10842 Protein of unknown function (DUF2642)
This family of proteins with unknown function appear to be restricted to Bacillus spp.. +PF10843 Protein of unknown function (DUF2578)
This is a Saccharomycete family of proteins with unknown function. The protein in S. cerevisiae is strongly induced in response to many stress conditions and is repressed in drug resistant yeast strains.. +PF10844 Protein of unknown function (DUF2577)
This family of proteins has no known function. +PF10845 Protein of unknown function (DUF2576)
The function of this viral family of proteins is unknown.. +PF10846 Protein of unknown function (DUF2722)
This eukaryotic family of proteins has no known function.. +PF10847 Protein of unknown function (DUF2656)
This bacterial family of proteins has no known function.. +PF10848 Protein of unknown function (DUF2655)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10849 Protein of unknown function (DUF2654)
Some members in this family of proteins are annotated as a-gt.4 however currently no function is known.. +PF10850 Protein of unknown function (DUF2653)
This family of proteins with unknown function appears to be restricted to Bacillus spp.. +PF10851 Protein of unknown function (DUF2652)
This family of proteins has no known function.. +PF10852 Protein of unknown function (DUF2651)
This family of proteins with unknown function appears to be restricted to Bacillus spp.. +PF10853 Protein of unknown function (DUF2650)
This family of proteins with unknown function appear to be restricted to Caenorhabditis elegans.. +PF10854 Protein of unknown function (DUF2649)
Members in this family of proteins are annotated as Plectrovirus orf 10 transmembrane proteins however currently no function is known.. +PF10855 Protein of unknown function (DUF2648)
This family of proteins with unknown function appears to be restricted to Bacillales Staphylococcus.. +PF10856 Protein of unknown function (DUF2678)
This family of proteins has no known function.. +PF10857 Protein of unknown function (DUF2701)
This viral family of proteins has no known function.. +PF10858 Protein of unknown function (DUF2659)
This bacterial family of proteins has no known function.. +PF10859 Protein of unknown function (DUF2660)
This is a family of proteins with unknown function.. +PF10860 Protein of unknown function (DUF2661)
This viral family of proteins have no known function.. +PF10861 Protein of Unknown function (DUF2784)
Gunasekaran P, Mistry J. Pfam-B_001600 (release 23.0). This is a family of uncharacterised protein. The function is not known however it is conserved in Bacteria.. +PF10862 DUF2662;
FcoT-like thioesterase domain. Proteins in this family have a HotDog fold. This family was formerly known as domain of unknown function 2662 (DUF2662). The structure of Rv0098 from M. tuberculosis suggested a thioesterase function. Assays showed that this protein was a thioesterase with a preference for long chain fatty acyl groups . The maximal Kcat was observed for palmitoyl-CoA although longer and shorter molecules were also cleaved. In solution this protein forms a homo-hexameric complex.. +PF10863 Protein of unknown function (DUF2702)
This eukaryotic family of proteins has no known function.. +PF10864 Protein of unknown function (DUF2663)
Some members in this family of proteins are annotated as YpbF however currently no function is known.. +PF10865 Domain of unknown function (DUF2703)
This family of protein has no known function, but it may be distantly related to the thioredoxin fold. It contains the CXXC motif that is characteristic of thioredoxins.. +PF10866 Protein of unknown function (DUF2704)
This viral family of proteins has no known function.. +PF10867 Protein of unknown function (DUF2664)
This family of proteins is a viral family, annotated as UL96. Currently no function is known.. +PF10868 Protein of unknown function (DUF2667)
This family of proteins with unknown function appears to be restricted to Arabidopsis thaliana.. +PF10869 Protein of unknown function (DUF2666)
This Archaeal family of proteins has no known function.. +PF10870 Protein of unknown function (DUF2729)
This viral family of proteins has no known function.. +PF10871 Protein of unknown function (DUF2748)
This is a bacterial family of proteins with unknown function.. +PF10872 Protein of unknown function (DUF2740)
This family of proteins with unknown function has a highly conserved sequence.. +PF10873 Protein of unknown function (DUF2668)
Members in this family of proteins are annotated as Cysteine and tyrosine-rich protein 1, however currently no function is known.. +PF10874 Protein of unknown function (DUF2746)
This family of proteins has no known function.. +PF10875 Protein of unknown function (DUF2670)
This bacterial family of proteins has no known function.. +PF10876 Protein of unknown function (DUF2669)
This family of proteins has no known function.. +PF10877 Protein of unknown function (DUF2671)
This family of proteins with unknown function appears to be restricted to Rickettsia spp.. +PF10878 Protein of unknown function (DUF2672)
This family of proteins with unknown function appear to be restricted to Rickettsiae.. +PF10879 Protein of unknown function (DUF2674)
This family of proteins with unknown function appears to be conserved to Rickettsia spp.. +PF10880 Protein of unknown function (DUF2673)
This family of proteins with unknown function appears to be restricted to Rickettsiae spp.. +PF10881 Protein of unknown function (DUF2726)
This bacterial family of proteins has no known function.. +PF10882 DUF2679;
This family of proteins with unknown function appear to be related to bacterial PH domains. This family was formerly known as DUF2679.. +PF10883 Protein of unknown function (DUF2681)
This family of proteins is found in bacteria. Proteins in this family are typically between 81 and 117 amino acids in length.. +PF10884 Protein of unknown function (DUF2683)
This family of proteins with unknown function appears to be restricted to Methanosarcinaceae.. +PF10885 Protein of unknown function (DUF2684)
Members in this family of proteins are annotated as yqgD however currently no function is known.. +PF10886 Protein of unknown function (DUF2685)
Members in this family of proteins are annotated as uvdY.-2 which is an open reading frame within uvsY. However currently there is no known function.. +PF10887 Protein of unknown function (DUF2686)
Some members in this family of proteins are annotated as yjfZ however currently no function is known.. +PF10888 Protein of unknown function (DUF2742)
Members in this family of phage proteins are the product of the gene phiRv1, however no function is known.. +PF10890 Protein of unknown function (DUF2741)
Members in this family of proteins are annotated as ubiquinol-cytochrome C reductase however this cannot be confirmed.. +PF10891 Protein of unknown function (DUF2719)
This family of proteins with unknown function appears to be restricted to Nucleopolyhedrovirus.. +PF10892 Protein of unknown function (DUF2688)
Members in this family of proteins are annotated as KleB however currently no function is known.. +PF10893 Protein of unknown function (DUF2724)
This is a family of proteins with unknown function.. +PF10894 Protein of unknown function (DUF2689)
Members in this family of proteins are annotated as TrbD however currently no function is known.. +PF10895 Protein of unknown function (DUF2715)
This family of proteins with unknown function appears to be restricted to Treponema pallidum.. +PF10896 Protein of unknown function (DUF2714)
This family of proteins with unknown function appears to be restricted to Mycoplasmataceae.. +PF10897 Protein of unknown function (DUF2713)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10898 Protein of unknown function (DUF2716)
This bacterial family of proteins has no known function.. +PF10899 Protein of unknown function (DUF2743)
This is a bacterial family of proteins with unknown function.. +PF10901 Protein of unknown function (DUF2690)
This bacterial family of proteins has no known function.. +PF10902 Protein of unknown function (DUF2693)
This viral family of proteins has no known function.. +PF10903 Protein of unknown function (DUF2691)
This bacterial family of proteins has no known function.. +PF10904 Protein of unknown function (DUF2694)
This family of proteins with unknown function appears to be restricted to Mycobacterium spp.. +PF10905 Protein of unknown function (DUF2695)
This bacterial family of proteins has no known function.. +PF10906 Protein of unknown function (DUF2697)
This is a eukaryotic family of proteins with unknown function.. +PF10907 Protein of unknown function (DUF2749)
This bacterial family of proteins appear to come from the Trb operon however currently no function is known.. +PF10908 Protein of unknown function (DUF2778)
Gunasekaran P, Mistry J. Pfam-B_001575 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF10909 Protein of unknown function (DUF2682)
This viral family of proteins has no known function.. +PF10910 Protein of unknown function (DUF2744)
This is a viral family of proteins with unknown function.. +PF10911 Protein of unknown function (DUF2717)
Members in this family of proteins are annotated as gene 6.5 protein however currently there is no known function.. +PF10912 Protein of unknown function (DUF2700)
This family of proteins with unknown function appears to be restricted to Caenorhabditis elegans.. +PF10913 Protein of unknown function (DUF2706)
This family of proteins with unknown function appears to be restricted to Rickettsia spp.. +PF10914 Protein of unknown function (DUF2781)
Gunasekaran P, Mistry J. Pfam-B_001738 (release 23.0). This is a eukaryotic family of uncharacterised proteins. Some of the proteins in this family are annotated as membrane proteins.. +PF10915 Protein of unknown function (DUF2709)
This bacterial family of proteins has no known function.. +PF10916 Protein of unknown function (DUF2712)
This family of proteins with unknown function appear to be restricted to Bacillales.. +PF10917 Protein of unknown function (DUF2708)
This family of proteins with unknown function appears to be restricted to Caenorhabditis elegans.. +PF10918 Protein of unknown function (DUF2718)
This viral family of proteins has no known function.. +PF10920 Protein of unknown function (DUF2705)
This bacterial family of proteins has no known function.. +PF10921 Protein of unknown function (DUF2710)
This family of proteins with unknown function appears to be restricted to Mycobacteriaceae.. +PF10922 Protein of unknown function (DUF2745)
This is a viral family of proteins with unknown function.. +PF10923 P-loop Domain of unknown function (DUF2791)
Gunasekaran P, Mistry J. Pfam-B_001611 (release 23.0). This is a family of proteins found in archaea and bacteria. This domain contains a P-loop motif suggesting it binds to a nucleotide such as ATP.. +PF10924 Protein of unknown function (DUF2711)
Some members in this family of proteins are annotated as ywbB however currently there is no known function.. +PF10925 Protein of unknown function (DUF2680)
Members in this family of proteins are annotated as yckD however currently no function is known.. +PF10926 Protein of unknown function (DUF2800)
Gunasekaran P, Mistry J. Pfam-B_001630 (release 23.0). This is a family of uncharacterised proteins found in bacteria and viruses. Some members of this family are annotated as being Phi APSE P51-like proteins.. +PF10927 Protein of unknown function (DUF2738)
This is a viral family of proteins with unknown function.. +PF10928 Protein of unknown function (DUF2810)
Gunasekaran P, Mistry J. Pfam-B_001682 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF10929 Protein of unknown function (DUF2811)
Gunasekaran P, Mistry J. Pfam-B_001693 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF10930 Protein of unknown function (DUF2737)
This family of proteins has no known function.. +PF10931 Protein of unknown function (DUF2735)
Some members in this family of proteins are annotated as glutamine synthetase translation inhibitor however this function can not be confirmed.. +PF10932 Protein of unknown function (DUF2783)
Gunasekaran P, Mistry J. Pfam-B_001590 (release 23.0). This is a bacterial family of uncharacterised protein.. +PF10933 Protein of unknown function (DUF2827)
Gunasekaran P, Mistry J. Pfam-B_001771 (release 23.0). This is a family of uncharacterised proteins found in Burkholderia.. +PF10934 Protein of unknown function (DUF2634)
Some members in this family of proteins are annotated as phage related, xkdS however currently there is no known function.. +PF10935 Protein of unknown function (DUF2637)
This family of proteins has no known function.. +PF10936 Protein of unknown function DUF2617
This bacterial family of proteins has no known function. . +PF10937 Protein of unknown function (DUF2638)
This family of proteins has no known function.. +PF10938 YfdX protein
YfdX is a protein found in Proteobacteria of unknown function. The protein coding for this gene is regulated by EvgA in E. coli .. +PF10939 Protein of unknown function (DUF2631)
This is s bacterial family of proteins with unknown function.. +PF10940 Protein of unknown function (DUF2618)
This bacterial family of proteins has no known function. The sequences within the family are highly conserved.. +PF10941 Protein of unknown function DUF2620
This is a bacterial family of proteins with unknown function.. +PF10942 Protein of unknown function (DUF2619)
This bacterial family of proteins has no known function.. +PF10943 Protein of unknown function (DUF2632)
This is a family of membrane proteins with unknown function.. +PF10944 Protein of unknown function (DUF2630)
This bacterial family of proteins have no known function.. +PF10945 Protein of unknown function (DUF2629)
Some members in this family of proteins are annotated as yhjR however currently no function is known.. +PF10946 Protein of unknown function DUF2625
Some members in this family of proteins are annotated as ybfG however currently no function is known.. +PF10947 Protein of unknown function (DUF2628)
Some members in this family of proteins are annotated as yigF however currently no function is known.. +PF10948 Protein of unknown function (DUF2635)
This is a family of phage proteins with unknown function.. +PF10949 Protein of unknown function (DUF2777)
This family of proteins with unknown function appears to be restricted to Bacillus cereus.. +PF10950 Protein of unknown function (DUF2775)
This eukaryotic family of proteins has no known function.. +PF10951 Protein of unknown function (DUF2776)
This bacterial family of proteins has no known function.. +PF10952 Protein of unknown function (DUF2753)
This bacterial family of proteins has no known function.. +PF10953 Protein of unknown function (DUF2754)
This family of proteins with unknown function appear to be restricted to Enterobacteriaceae.. +PF10954 Protein of unknown function (DUF2755)
Some members in this family of proteins are annotated as YaiY however no function is known. The family appears to be restricted to Enterobacteriaceae.. +PF10955 Protein of unknown function (DUF2757)
Members in this family of proteins are annotated as YabK however currently no function is known.. +PF10956 Protein of unknown function (DUF2756)
Some members in this family of proteins are annotated yhhA however currently no function is known. The family appears to be restricted to Enterobacteriaceae.. +PF10957 Protein of unknown function (DUF2758)
This family of proteins has no known function.. +PF10958 Protein of unknown function (DUF2759)
This family of proteins with unknown function appear to be restricted to Bacillaceae.. +PF10959 Protein of unknown function (DUF2761)
Members in this family of proteins are annotated as KleF however no function is known.. +PF10960 Protein of unknown function (DUF2762)
Members in this family of proteins are annotated as holin-like protein BhlA however this cannot be confirmed.. +PF10961 Protein of unknown function (DUF2763)
This eukaryotic family of proteins has no known function.. +PF10962 Protein of unknown function (DUF2764)
This bacterial family of proteins has no known function.. +PF10963 Protein of unknown function (DUF2765)
This family of proteins has no known function.. +PF10964 Protein of unknown function (DUF2766)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10965 Protein of unknown function (DUF2767)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10966 Protein of unknown function (DUF2768)
This family of proteins with unknown function appear to be restricted to Bacillus spp.. +PF10967 Protein of unknown function (DUF2769)
This family of proteins have no known function.. +PF10968 Protein of unknown function (DUF2770)
Members in this family of proteins are annotated as yceO however currently no function is known.. +PF10969 Protein of unknown function (DUF2771)
This bacterial family of proteins has no known function.. +PF10970 Protein of unknown function (DUF2772)
Members in this family of proteins are annotated as spore germination protein GE however this cannot be confirmed.. +PF10971 Protein of unknown function (DUF2773)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF10972 Protein of unknown function (DUF2803)
Pfam-B_1049 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF10973 Protein of unknown function (DUF2799)
Pfam-B_1111 (release 23.0). Some members in this family of proteins are annotated as yfiL which has no known function.. +PF10974 Protein of unknown function (DUF2804)
Pfam-B_1045 (release 23.0). This is a family of proteins with unknown function.. +PF10975 Protein of unknown function (DUF2802)
Pfam-B_1126 (release 23.0). This bacterial family of proteins has no known function.. +PF10976 Protein of unknown function (DUF2790)
Pfam-B_1206 (release 23.0). This family of proteins with unknown function appear to be restricted to Pseudomonadaceae.. +PF10977 Protein of unknown function (DUF2797)
Pfam-B_1162 (release 23.0). This family of proteins has no known function.. +PF10978 Protein of unknown function (DUF2785)
Pfam-B_1219 (release 23.0). Some members in this family are annotated as hypothetical membrane spanning proteins however this cannot be confirmed. The family has no known function.. +PF10979 Protein of unknown function (DUF2786)
Pfam-B_1231 (release 23.0). This family of proteins has no known function.. +PF10980 Protein of unknown function (DUF2787)
Pfam-B_1249 (release 23.0). This bacterial family of proteins has no known function.. +PF10981 Protein of unknown function (DUF2788)
Pfam-B_1255 (release 23.0). This bacterial family of proteins have no known function.. +PF10982 Protein of unknown function (DUF2789)
Pfam-B_1269 (release 23.0). This bacterial family of proteins has no known function.. +PF10983 Protein of unknown function (DUF2793)
Pfam-B_1370 (release 23.0). This is a bacterial family of proteins with unknown function.. +PF10984 Protein of unknown function (DUF2794)
Pfam-B_1384 (release 23.0). This is a bacterial family of proteins with unknown function.. +PF10985 Protein of unknown function (DUF2805)
Pfam-B_001474 (release 23.0). This is a bacterial family of proteins with unknown function.. +PF10986 Protein of unknown function (DUF2796)
Pfam-B_1354 (release 23.0). This bacterial family of proteins has no known function.. +PF10987 Protein of unknown function (DUF2806)
Pfam-B_001505 (release 23.0). This bacterial family of proteins has no known function.. +PF10988 Protein of unknown function (DUF2807)
Pfam-B_001516 (release 23.0). This bacterial family of proteins shows structural similarity to other pectin lyase families. Although structures from this family align with acetyl-transferases, there is no conservation of catalytic residues found. It is likely that the function is one of cell-adhesion. In PDB:3jx8, it is interesting to note that the sequence of contains several well defined sequence repeats, centred around GSG motifs defining the tight beta turn between the two sheets of the super-helix; there are 8 such repeats in the C-terminal half of the protein, which could be grouped into 4 repeats of two. It seems likely that this family belongs to the superfamily of trimeric autotransporter adhesins (TAAs), which are important virulence factors in Gram-negative pathogens . In the case of Parabacteroides distasonis, which is a cmoponent of the normal distal human gut microbiota, TAA-like complexes probably modulate adherence to the host (information derived from TOPSAN).. +PF10989 Protein of unknown function (DUF2808)
Pfam-B_001529 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF10990 Protein of unknown function (DUF2809)
Pfam-B_001533 (release 23.0). Some members in this family of proteins are annotated as yjgA however currently no function for the protein is known.. +PF10991 Protein of unknown function (DUF2815)
Pfam-B_002212 (release 23.0). This is a phage related family of proteins with unknown function.. +PF10992 Protein of unknown function (DUF2816)
Pfam-B_002257 (release 23.0). This eukaryotic family of proteins has no known function.. +PF10993 Protein of unknown function (DUF2818)
Pfam-B_002280 (release 23.0). This bacterial family of proteins has no known function.. +PF10994 Protein of unknown function (DUF2817)
Pfam-B_002258 (release 23.0). This family of proteins has no known function.. +PF10995 Protein of unknown function (DUF2819)
Pfam-B_002304 (release 23.0). This bacterial family of proteins has no known function.. +PF10996 Beta-Casp domain
The beta-CASP domain is found C terminal to the beta-lactamase domain in pre-mRNA 3'-end-processing endonuclease. The active site of this enzyme is located at the interface of these two domains .. +PF10997 Protein of unknown function (DUF2837)
Pfam-B_002349 (release 23.0). This bacterial family of proteins has no known function.. +PF10998 Protein of unknown function (DUF2838)
Pfam-B_002381 (release 23.0). This bacterial family of proteins has no known function.. +PF10999 Protein of unknown function (DUF2839)
Pfam-B_002396 (release 23.0). This bacterial family of unknown function appear to be restricted to Cyanobacteria.. +PF11000 Protein of unknown function (DUF2840)
Pfam-B_002399 (release 23.0). This bacterial family of proteins have no known function.. +PF11001 Protein of unknown function (DUF2841)
Pfam-B_002409 (release 23.0). This family of proteins with unknown function are all present in yeast.. +PF11002 RFPL defining motif (RDM)
Mistry J, Bonnefont J. The RDM domain is found on RFPL (Ret finger protein like) proteins. In humans, RFPL transcripts can be detected at the onset of neurogenesis in differentiating human embryonic stem cells, and in the developing human neocortex . The RDM domain is thought to have emerged from a neofunctionalisation event. It is found N terminal to the SPRY domain (Pfam:PF00622).. +PF11003 Protein of unknown function (DUF2842)
Pfam-B_002411 (release 23.0). This bacterial family of proteins have no known function.. +PF11004 DUF2843;
3-deoxy-D-manno-oct-2-ulosonic acid (Kdo) hydroxylase. Pfam-B_002426 (release 23.0). This is a family of 3-deoxy-D-manno-oct-2-ulosonic acid 3-hydroxylases, which catalyse the conversion of 3-deoxy-D-manno-oct-2-ulosonic acid (Kdo) to D-glycero-D-talo-oct-2-ulosonic acid (Ko). It contains a potential iron-binding motif, HXDX(n)H (n>40). Hydroxylation activity is iron-dependent .. +PF11005 Protein of unknown function (DUF2844)
Pfam-B_002433 (release 23.0). This bacterial family of proteins has no known function.. +PF11006 Protein of unknown function (DUF2845)
Pfam-B_002437 (release 23.0). This bacterial family of proteins has no known function.. +PF11007 Spore coat associated protein JA (CotJA)
CotJA is part of the CotJ operon which contains CotJA and CotJC. The operon encodes spore coat proteins. Interaction of CotJA with CotJC is required for the assembly of both CotJA and CotJC into the spore coat .. +PF11008 Protein of unknown function (DUF2846)
Pfam-B_002451 (release 23.0). Some members in this family of proteins with unknown function are annotated as lipoproteins however this cannot be confirmed.. +PF11009 Protein of unknown function (DUF2847)
Pfam-B_002453 (release 23.0). Some members in this bacterial family of proteins with unknown function are annotated as YtxJ, a putative general stress protein. This cannot be confirmed.. +PF11010 Protein of unknown function (DUF2848)
Pfam-B_002463 (release 23.0). This bacterial family of proteins has no known function.. +PF11011 Protein of unknown function (DUF2849)
Pfam-B_002560 (release 23.0). This bacterial family of proteins has no known function.. +PF11012 Protein of unknown function (DUF2850)
Pfam-B_002587 (release 23.0). This family of proteins with unknown function appear to be restricted to Vibrionaceae.. +PF11013 Protein of unknown function (DUF2851)
Pfam-B_002589 (release 23.0). This bacterial family of proteins has no known function.. +PF11014 Protein of unknown function (DUF2852)
Pfam-B_002611 (release 23.0). This bacterial family of proteins has no known function.. +PF11015 Protein of unknown function (DUF2853)
Pfam-B_002619 (release 23.0). This bacterial family of proteins has no known function.. +PF11016 Protein of unknown function (DUF2854)
Pfam-B_002643 (release 23.0). This family of proteins has no known function.. +PF11017 Protein of unknown function (DUF2855)
Pfam-B_002665 (release 23.0). This family of proteins has no known function.. +PF11018 Pupal cuticle protein C1
Insect cuticles are composite structures whose mechanical properties are optimised for biological function. The major components are the chitin filament system and the cuticular proteins, and the cuticle's properties are determined largely by the interactions between these two sets of molecules. The proteins can be ordered by species.. +PF11019 Protein of unknown function (DUF2608)
This family is conserved in Bacteria. The function is not known.. +PF11020 Domain of unknown function (DUF2610)
This family is conserved in Proteobacteria. One member is annotated as being elongation factor P but this could not be confirmed. This domain is related to the Ribbon-helix-helix superfamily so may be a DNA-binding protein.. +PF11021 Protein of unknown function (DUF2613)
This is a family of putative small secreted proteins expressed by Actinobacteria. The function is not known.. +PF11022 Protein of unknown function (DUF2611)
This family is conserved in the Dikarya of Fungi. The function is not known.. +PF11023 Protein of unknown function (DUF2614)
This is a family of proteins conserved in the Bacillaceae family. Some members are annotated as being protein YgzB. The function is not known.. +PF11024 Dispersed gene family protein 1 of Trypanosoma cruzi region 4
Pfam-B_187 (release 23.0). This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. Other domains on this protein include DGF-1_N, DGF-1_2, and DGF-1_5. This domain is just downstream from the C-terminus, but not the C-terminus of proteins, also annotated as being DGF-1, that constitute family DGF-1_C.. +PF11025 Glycoprotein GP40 of Cryptosporidium
Pfam-B_197 (release 23.0). This family is highly conserved in Cryptosporidium spp. Many members are annotated as being a 60 kDa glycoprotein.. +PF11026 Protein of unknown function (DUF2721)
Pfam-B_520 (release 23.0). This family is conserved in bacteria. The function is not known.. +PF11027 Protein of unknown function (DUF2615)
This small. approximately 100 residue, family is conserved from worms to humans. It is cysteine-rich with a characteristic FDxCEC sequence motif. The function is not known.. +PF11028 Protein of unknown function (DUF2723)
Pfam-B_590 (release 23.0). This family is conserved in bacteria. The function is not known.. +PF11029 DAZ associated protein 2 (DAZAP2)
DAZ associated protein 2 has a highly conserved sequence throughout evolution including a conserved polyproline region and several SH2/SH3 binding sites. It occurs as a single copy gene with a four-exon organisation and is located on chromosome 12. It encodes a ubiquitously expressed protein and binds to DAZ and DAZL1 through DAZ repeats [1,2].. +PF11030 Nucleocapsid protein N
This is the N protein of the nucleocapsid. The nucleocapsid functions to protect the RNA against nuclease degradation and to promote it's reverse transcription . The NC protein promotes viral RNA dimerisation and encapsidation and initiates reverse transcription by activating the annealing of the primer tRNA to the initiation site .. +PF11031 Bacteriophage T holin
Bacteriophage effects host lysis with T holin along with an endolysin. T disrupts the membrane allowing sequential events which lead to the attack of the peptidoglycan. T has an usual periplasmic domain which transduces environmental information for the real-time control of lysis timing .. +PF11032 Apolipoprotein M (ApoM)
ApoM is a 25 kDa plasma protein associated with high-density lipoproteins (HDLs). ApoM is important in the formation of pre-ss-HDL and also in increasing cholesterol efflux from macrophage foam cells . Lipoproteins consist of lipids solubilized by apolipoproteins. ApoM lacks an external amphipathic motif and is uniquely secreted to plasma without cleavage of its terminal signal peptide .. +PF11033 Competence protein J (ComJ)
ComJ is a competence specific protein .. +PF11034 Protein of unknown function (DUF2823)
This family of proteins are possible glucose repressible proteins however this cannot be confirmed. Currently, no function is known.. +PF11035 Small nuclear RNA activating complex subunit 2-like
This family of proteins is SnAPC subunit 2-like. SnAPC allows the transcription of human small nuclear RNA genes to occur by recognition of the proximal sequence element .. +PF11036 Virulence promoting factor
YqgB encodes adaptive factors that acts in synergy with vqfZ , enabling the bacteria to cope with the physical environment in vivo, facilitating colonisation of the host .. +PF11037 Insulin-resistance promoting peptide in skeletal muscle
Musclin is a muscle derived secretory peptide which induces insulin resistance in vitro. It encodes a 130 amino acid sequence including a NH(2) terminal 30 amino acid signal sequence. Musclin expression level is tightly regulated by nutritional changes [1,2].. +PF11038 Dispersed gene family protein 1 of Trypanosoma cruzi region 5
Pfam-B_157 (release 23.0). This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. Other domains on this protein include DGF-1_N, DGF-1_2, and DGF-1_4. This domain is just downstream from the C-terminus, but not the C-terminus of proteins, also annotated as being DGF-1, that constitute family DGF-1_C.. +PF11039 Protein of unknown function (DUF2824)
This family of proteins has no known function. Some members in the family are annotated as the P22 head assembly protein gp14 however this cannot be confirmed.. +PF11040 Dispersed gene family protein 1 of Trypanosoma cruzi C-terminus
Pfam-B_30 (release 23.0). This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. This is the very C-terminal part of the protein.. +PF11041 Protein of unknown function (DUF2612)
This is a phage protein family expressed from a range of Proteobacteria species. The function is not known.. +PF11042 Protein of unknown function (DUF2750)
Pfam-B_609 (release 23.0). This family is conserved in Proteobacteria. The function is not known.. +PF11043 Protein of unknown function (DUF2856)
Some members in this viral family of proteins with unknown function are annotated as Abc2 however this cannot be confirmed.. +PF11044 Plectrovirus spv1-c74 ORF 12 transmembrane protein
This is a family of proteins expressed by Plectroviruses. The plectroviruses are single-stranded DNA viruses belonging to the Inoviridae. Except that it is a putative transmembrane protein the function is not known.. +PF11045 Putative inner membrane protein of Enterobacteriaceae
This family is conserved in the Enterobacteriaceae. It is a putative inner membrane protein, named YbjM, but the function is not known.. +PF11046 Transcriptional repressor of hyc and hyp operons
This family is conserved in Proteobacteria. It is likely to be the transcriptional repressor molecule for the hyc and hyp operons, which express, amongst others, the protein HycA. This protein may be harnessed for the reduction of technetium oxide, an unwelcome product of radio-nucleotide bioaccumulation. HycA produces formate hydrogenlyase, one of the key proteins necessary for metal compound reduction .. +PF11047 Salmonella outer protein D
SopD is a type III virulence effector protein whose structure consists of 38% alpha-helix and 26% beta-strand.. +PF11049 Glycoprotein K1 of Kaposi's sarcoma-associated herpes virus
Pfam-B_38 (release 23.0). This is a highly glycosylated cytoplasmic and membrane protein similar to the immunoglobulin receptor family that is expressed as an inducible early-lytic-cycle gene product in primary effusion lymphoma cell-lines. This domain would appear to be the cytoplasmic region of the protein .. +PF11050 Virus envelope protein E26
E26 is a multifunctional protein. One form of E26 associates with viral DNA or DNA binding proteins, while a second form associates with intracellular membranes .. +PF11051 Mannosyltransferase putative
Pfam-B_379 (release 23.0). This family is conserved in fungi. Several members are annotated as being alpha-1,3-mannosyltransferase but this could not be confirmed.. +PF11052 Trans-sialidase of Trypanosoma hydrophobic C-terminal
Pfam-B_103 (release 23.0). This is a highly conserved sequence motif that is the very C-terminus of a number of more diverse proteins from Trypanosoma cruzi. All members of the family are annotated putatively as being trans-sialidase but this appears to be a diverse group.. +PF11053 Terminase DNA packaging enzyme
Phage T4 terminase functions in packaging concatemeric DNA. The T4 terminase is composed of a large subunit, gp17 ad a small subunit, gp16. The role of gp16 is not well characterised however it is known that it binds to double-stranded DNA but not single stranded DNA .. +PF11054 Sporozoite TA4 surface antigen
This family of proteins is a Eukaryotic family of surface antigens. One of the better characterised members of the family is the sporulated TA4 antigen. The TA4 gene encodes a single polypeptide of 25 kDa which contains a 17 and a 8 kD polypeptide .. +PF11055 Glucose signalling factor 2
Gsf2 is localised to the ER and functions to promote the secretion of certain hexose transporters .. +PF11056 Recombination, repair and ssDNA binding protein UvsY
UvsY protein enhances the rate of single-stranded-DNA-dependant ATP hydrolysis by UvsX protein. The enhancement of ATP hydrolysis by UvsY protein is shown to result from the ability of UvsY protein to increase the affinity of UvsX protein for single-stranded DNA .. +PF11057 Cortexin of kidney
In the middle of cortexin protein there is a single membrane-spanning domain which indicates that this protein may be a membrane protein involved in intracellular or extracellular signalling of the kidney or brain, since it is expressed specifically in the kidneys and brain only. The protein is highly conserved among species . Cortexin is also thought to be important to neurons of both the developing and adult cerebral cortex .. +PF11058 Antirestriction protein Ral
Ral alleviates restriction and enhances modification by the E.Coli restriction and modification system .. +PF11059 Protein of unknown function (DUF2860)
Pfam-B_002667 (release 23.0). This bacterial family of proteins has no known function.. +PF11060 Protein of unknown function (DUF2861)
Pfam-B_002683 (release 23.0). This bacterial family of proteins has no known function.. +PF11061 Protein of unknown function (DUF2862)
Pfam-B_003005 (release 23.0). This family of proteins has no known function.. +PF11062 Protein of unknown function (DUF2863)
Pfam-B_002981 (release 23.0). This bacterial family of proteins have no known function.. +PF11064 Protein of unknown function (DUF2865)
Pfam-B_002953 (release 23.0). This bacterial family of proteins has no known function.. +PF11065 Protein of unknown function (DUF2866)
Pfam-B_002950 (release 23.0). This bacterial family of proteins have no known function.. +PF11066 Protein of unknown function (DUF2867)
Pfam-B_002931 (release 23.0). This bacterial family of proteins have no known function.. +PF11067 Protein of unknown function (DUF2868)
Pfam-B_002930 (release 23.0). Some members in this family of proteins with unknown function are annotated as putative membrane proteins. However, this cannot be confirmed.. +PF11068 DUF2869;
Pfam-B_002915 (release 23.0). The structure of a representative of this family has been solved (pdb:4dci) and found to form a tetrameric structure of prefoldin-like architecture with the beta-barrel core and helical coiled coil tentacles. This suggests that this family may act as molecular chaperones.. +PF11069 Protein of unknown function (DUF2870)
Pfam-B_002904 (release 23.0). This is a eukaryotic family of proteins with unknown function.. +PF11070 Protein of unknown function (DUF2871)
Pfam-B_002884 (release 23.0). This family of proteins has no known function.. +PF11071 Protein of unknown function (DUF2872)
Pfam-B_002883 (release 23.0). This bacterial family of proteins has no known function.. +PF11072 Protein of unknown function (DUF2859)
Gunasekaran P, Mistry J. Pfam-B_001915 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11073 Rift valley fever virus non structural protein (NSs) like
Gunasekaran P, Mistry J. Pfam-B_001643 (release 23.0). This family contains several Phlebovirus non structural proteins which act as a major determinant of virulence by antagonising interferon beta gene expression .. +PF11074 Domain of unknown function(DUF2779)
Gunasekaran P, Mistry J. Pfam-B_001581 (release 23.0). This domain is conserved in bacteria. The function is not known.. +PF11075 Protein of unknown function VcgC/VcgE (DUF2780)
Gunasekaran P, Mistry J. Pfam-B_001695 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11076 Putative inner membrane protein YbhQ
This family is conserved in Proteobacteria. The function is not known but most members are annotated as being inner membrane protein YbhQ.. +PF11077 Protein of unknown function (DUF2616)
This cysteine-rich family is expressed by the double-stranded Nucleopolyhedrovirus, a member of the Baculoviridae family of dsDNA viruses. The function is not known.. +PF11078 Optomotor-blind protein N-terminal region
Pfam-B_595 (release 23.0). This family is conserved in Drosophila spp. Optomotor-blind is one of the essential toolkit proteins for coordinating development in diverse animal taxa, and in Drosophila it plays a key role in establishing the abdominal pigmentation pattern, in development of the central nervous system and leg and wing imaginal disc-formation of Drosophila melanogaster. This is the N-terminal region of the protein and does not include the T-box-containing transcription factor that plays a part in DNA-binding.. +PF11079 Bacterial protein YqhG of unknown function
This family of putative proteins is conserved in the Bacillaceae family of the Firmicutes. The function is not known.. +PF11080 Protein of unknown function (DUF2622)
This family is conserved in the Enterobacteriaceae family. Several members are named as YdiZ, a putative cytoplasmic protein. The function is not known.. +PF11081 Protein of unknown function (DUF2890)
Pfam-B_629 (release 23.0). This family is conserved in dsDNA adenoviruses of vertebrates. The function is not known.. +PF11082 Protein of unknown function (DUF2880)
Pfam-B_001492 (release 23.0). This bacterial family of proteins has no known function.. +PF11083 Lantibiotic streptin immunity protein
Pfam-B_43518 (release 22.0). Streptococcal species produce a lantibiotic, streptin, in a similar manner to the production of nisin and subtilin by other lactic acid bacteria, in order to compete against competing bacteria within the environment. The immunity protein protects the bacterium from destruction by its own lantibiotic. In general, there is little homology between the immunity proteins of different genera of bacteria.. +PF11084 Protein of unknown function (DUF2621)
Mistrey J, Coggill P. This family is conserved in the Bacillaceae family. Several members are named as YneK. The function is not known.. +PF11085 Conserved membrane protein YqhR
This family is conserved in the Bacillaceae family of the Firmicutes. The function is not known.. +PF11086 Protein of unknown function (DUF2878)
Pfam-B_001539 (release 23.0). This bacterial family of proteins has no known function. Some members annotate the proteins as the permease component of a Mn2+/Zn2+ transport system however this cannot be confirmed.. +PF11087 Protein of unknown function (DUF2881)
This viral family of proteins has no known function. Some members are annotated as p34 however this cannot be confirmed.. +PF11088 Glycoprotein encoding membrane proteins RL5A and RL6
RL5A and RL6 are part of the RL11 family which are predicted to encode membrane glycoproteins. Two adjacent open reading frames potentially encode a domain that is the hallmark of proteins encoded by the RL11 family.. +PF11089 Exopolysaccharide production repressor
SyrA is a small protein located in the cytoplasmic membrane that lacks an apparent DNA binding domain. SyrA mediates the transcriptional up-regulation of exo genes involved in the biosynthesis of the symbiotic exopolysaccharide succinoglycan. It does this through a mechanism which requires a two component system .. +PF11090 Protein of unknown function (DUF2833)
This family of proteins with unknown function are found in the bacteriophage T7. Some of the members of this family are annotated as gene 13 protein.. +PF11091 Tail-tube assembly protein
This tail tube protein is also referred to as Gp48. It is required for the assembly and length regulation of the tail tube of bacteriophage T4 .. +PF11092 Neuronal protein 3.1 (p311)
P311 has several PEST-like motifs and is found in neuron and muscle cells. P311 could have some function in myo-fibroblast transformation and prevention of fibrosis . It has also been identified as a potential regulator of alveolar generation .. +PF11093 Mitochondrial export protein Som1
Som1 is a component of the mitochondrial protein export system. The various Som1 proteins exhibit a highly conserved region and a pattern of cysteine residues . Stabilisation of Som1 occurs through an interaction between Som1 and Imp1, a peptidase required for proteolytic processing of certain proteins during their transport across the mitochondrial membrane . This suggests that Som1 represents a third subunit of the Imp1 peptidase complex . +PF11094 Membrane-associated tegument protein
The UL11 gene product of herpes simplex virus is a membrane-associated tegument protein that is incorporated into the HSV virion and functions in viral envelopment . UL11 is acylated which is crucial for lipid raft association .. +PF11095 Gem-associated protein 7 (Gemin7)
Gemin7 is a novel component of the survival of motor neuron complex which functions in the assembly of spliceosomal small nuclear ribonucleoproteins. Gemin7 interacts with several Sm proteins of spliceosomal small nuclear ribonucleoproteins, especially SmE .. +PF11097 Protein of unknown function (DUF2883)
This family of proteins have no known function but appear to be restricted to phage.. +PF11098 Chlorosome envelope protein C
Chlorosomes are light-harvesting antennae found in green bacteria. CsmC is one of the proteins that exists in the chlorosome envelope. CsmC has been shown to exist as a homomultimer with CsmD in the chlorosome envelope . CsmC is thought to be important in chlorosome elongation and shape .. +PF11099 Apoptosis regulator M11L like
Apoptosis regulators function to modulate the apoptotic cascades and thereby favour productive viral replication. M11L inhibits mitochondrial-dependant apoptosis by mimicking and competing with host proteins for the binding and blocking of Bak and Bax, two executioner proteins .. +PF11100 Conjugal transfer protein TrbE
TrbE is essential for conjugation and phage adsorption. It contains four common motifs and one conserved domain .. +PF11101 Protein of unknown function (DUF2884)
Pfam-B_001481 (release 23.0). Some members in this bacterial family of proteins are annotated as YggN which currently has no known function.. +PF11102 DUF2886; Cap_synth_GfcC;
Group 4 capsule polysaccharide formation lipoprotein gfcB. Pfam-B_1366 (release 23.0). This family includes lipoprotein GfcB (YmcC), involved in group 4 capsule polysaccharide formation .. +PF11103 Protein of unknown function (DUF2887)
Pfam-B_1330 (release 23.0). This bacterial family of proteins has no known function. These proteins may be distantly related to the PD(D/E)XK superfamily.. +PF11104 Competence_A;
Type IV pilus assembly protein PilM;. The type IV pilus assembly protein PilM is required for competency and pilus biogenesis [1-2]. It binds to PilN and ATP .. +PF11105 Arthropod cardioacceleratory peptide 2a
CCAP exerts a reversible and dose-dependant cardio-stimulatory effect on the semi-isolated heart of experimental beetles. CCAP also increases free hemolymph sugar concentration in young larvae and adults of the meal-worm beetle .. +PF11106 Exopolysaccharide production protein YjbE
YjbE is part of a four gene operon which is involved in exopolysaccharide production. The expression of YjbE is higher than the rest of the operon yjbEFGH. It appears to be restricted to Enterobacteriaceae .. +PF11107 Fanconi anemia group F protein (FANCF)
FANCF regulates its own expression by methylation at both mRNA and protein levels. Methylation-induced inactivation of FANCF has an important role on the occurrence of ovarian cancers by disrupting the FA-BRCA pathway .. +PF11108 Viral glycoprotein L
GL forms a complex with gH, a glycoprotein known to be essential for entry of HSV-1 into cells and virus-induced cell fusion . It is a hetero-oligomer of gH and gL which is incorporated into virions and transported to the cell surface which acts during entry of virus into cells . +PF11109 Orexigenic neuropeptide Qrfp/P518
Qrfp/P518 has a direct role in maintaining bone mineral density . Qrfp has also found to be important in energy homeostasis by regulating appetite and energy expenditure in mice . The c-terminal 28 residues are the functional 26RFa .. +PF11110 Baseplate hub distal subunit
These baseplate proteins are also referred to as Gp28. Gp28 is the structural component of the central part of the bacteriophage T4 baseplate, which possesses a hydrophobic region and is membrane bound . Gp28 forms a complex with gp27 which is another structural component of the baseplate .. +PF11111 Centromere protein M (CENP-M)
The prime candidate for specifying centromere identity is the array of nucleosomes assembles with CENP-A . CENP-A recruits a nucleosome associated complex (NAC) comprised of CENP-M along with two other proteins . Assembly of the CENP-A NAC at centromeres is partly dependant on CENP-M. The CENP-A NAC is essential, as disruption of the complex causes errors of chromosome alignment and segregation that preclude cell survival .. +PF11112 Pyocin activator protein PrtN
PrtN is a transcriptional activator for pyocin synthesis genes . It activates the expression of various pyocin genes by interaction with the DNA sequences conserved in the 5' noncoding regions of the pyocin genes .. +PF11113 Head assembly gene product
This head assembly protein is also refereed to as gene product 40 (Gp40). A specific gp20-gp40 membrane insertion structure constitutes the T4 prohead assembly initiation complex . This protein in T4 stimulates head formation .. +PF11114 Minor_capsid; Minor_capsid-2;
Minor capsid protein. Most of the members of this family are annotated as being minor capsid proteins. The genomes carrying the genes usually have three similar proteins adjacent to each other, hence this one being named as No.2.. +PF11115 Protein of unknown function (DUF2623)
This family is conserved in the Enterobacteriaceae family. Several members are named as YghW. The function is not known.. +PF11116 Protein of unknown function (DUF2624)
This family is conserved in the Bacillaceae family. Several members are named as YqfT. The function is not known.. +PF11117 Protein of unknown function (DUF2626)
This family is conserved in the Bacillaceae family. Several members are named as YqgY. The function is not known.. +PF11118 Protein of unknown function (DUF2627)
This family is conserved in the Bacillaceae family. Several members are named as YqzF. The function is not known.. +PF11119 Protein of unknown function (DUF2633)
This family is conserved largely in the Bacillaceae family. Several members are named as YfgG. The function is not known.. +PF11120 Protein of unknown function (DUF2636)
This family is conserved in the Enterobacteriaceae family. Several members are named as being YhjT, but the function is not known.. +PF11121 Protein of unknown function (DUF2639)
This family is conserved in the Bacillaceae family. Several members are named as being YflJ, but the function is not known.. +PF11122 Inner spore coat protein D
This family is conserved in the Enterobacteriaceae family. CotD is an inner spore coat protein that is expressed in the middle phase of mother cell gene expression. Along with CotD, CotH, CotS and CotT it is assumed to assemble into the loose skeleton of the matrix, between the shells of SpoIVA and CotE. Coat proteins do not share much sequence similarity between species, but this does not imply they do not share secondary, tertiary, or quaternary features .. +PF11123 DNA packaging protein
This DNA packaging protein is also referred to as gene 18 product (gp18). This protein is required for DNA packaging and functions in a complex with gp19 .. +PF11124 Inorganic phosphate transporter Pho86
Pho86p is an ER protein which is produced in response to phosphate starvation. It is essential for growth when phosphate levels are limiting . Pho86p is also involved in the regulation of Pho84p, a high-affinity phosphate transporter which is localised to the endoplasmic reticulum (ER) in low phosphate medium. When the level of phosphate increases Pho84p is transported to the vacuole. Pho86p is required for packaging of Pho84p in to COPII vesicles .. +PF11125 Protein of unknown function (DUF2830)
Several members in this viral family of proteins are annotated as lysis proteins.. +PF11126 Transcriptional regulator DsbA
DsbA is a double stranded binding protein found in bacteriophage T4 which is involved in transcriptional regulation. DsbA, along with other viral proteins, interacts with the host RNA polymerase core enzyme enabling initiation of transcription. DsbA acts as an enhancer protein of late genes in vitro. The protein consists of mainly alpha helices .. +PF11127 Protein of unknown function (DUF2892)
Pfam-B_604 (release 23.0). This family is conserved in bacteria. The function is not known.. +PF11128 Plant viral coat protein nucleocapsid
Pfam-B_645 (release 23.0). This family of nucleocapsid proteins is from ssRNA negative-strand viruses of plant origin.. +PF11129 Rev protein of equine infectious anaemia virus
Pfam-B_124 (release 23.0). The sequence of this family is highly conserved and carries a nuclear export signal from residues 31-55, and RNA binding/nuclear localisation signals of RRDR at residue 76 and KRRRK at residue 159. Rev is an essential regulatory protein required for nucleocytoplasmic transport of incompletely spliced viral mRNAs that encode structural proteins. Rev has been shown to down-regulate the expression of viral late genes and alter sensitivity to Gag-specific cytotoxic-T-lymphocytes (CTL). Equine infectious anaemia virus (EIAV) exhibits a high rate of genetic variation in vivo, and results in a clinically variable disease in infected horses.. +PF11130 F pilus assembly Type-IV secretion system for plasmid transfer
Pfam-B_678 (release 23.0). This family of TraC proteins is conserved in Proteobacteria. TraC is a cytoplasmic, peripheral membrane protein and is one of the proteins encoded by the F transfer region of the conjugative plasmid that is required for the assembly of F pilin into the mature F pilus structure. F pili are filamentous appendages that help establish the physical contact between donor and recipient cells involved in the conjugation process .. +PF11131 Rap-phr extracellular signalling
PhrC and PhrF stimulate ComA-dependent gene expression to different levels and are both required for full expression of genes activated by ComA, which activates the expression of genes involved in competence development and the production of several secreted products .. +PF11132 Transcriptional regulator protein (SplA)
The SplA protein functions in trans as a negative regulator of the level of splB-lacZ expression in the developing forespore .. +PF11133 Head fiber protein
This head fiber protein is also refereed to as Gp8.5. Gp8.5 is a structural protein in phage. It is a dispensable head protein.. +PF11134 Phage stabilisation protein
Members of this family are phage proteins that are probably involved with stabilising the condensed DNA within the capsid .. +PF11135 Protein of unknown function (DUF2888)
Some members in this family of proteins with unknown function are annotated as immediate early protein ICP-18 however this cannot be confirmed.. +PF11136 Protein of unknown function (DUF2889)
Pfam-B_001473 (release 23.0). This bacterial family of proteins has no known function.. +PF11137 Protein of unknown function (DUF2909)
Pfam-B_764 (release 23.0). This is a family of proteins conserved in Proteobacteria of unknown function.. +PF11138 Protein of unknown function (DUF2911)
Pfam-B_001491 (release 23.0). This bacterial family of proteins has no known function.. +PF11139 Protein of unknown function (DUF2910)
Pfam-B_001487 (release 23.0). Some members in this bacterial family annotate the proteins as cytochrome C biogenesis proteins however this cannot be confirmed. Currently no function for this family is known.. +PF11140 Protein of unknown function (DUF2913)
Pfam-B_001499 (release 23.0). This family of proteins with unknown function appear to be restricted to Gammaproteobacteria.. +PF11141 Protein of unknown function (DUF2914)
Pfam-B_001640 (release 23.0). This bacterial family of proteins has no known function.. +PF11142 Protein of unknown function (DUF2917)
Pfam-B_001647 (release 23.0). This bacterial family of proteins appears to be restricted to Proteobacteria.. +PF11143 Protein of unknown function (DUF2919)
Pfam-B_001684 (release 23.0). This bacterial family of proteins has no known function. Some members are annotated as YfeZ however this cannot be confirmed.. +PF11144 Protein of unknown function (DUF2920)
Pfam-B_001778 (release 23.0). This bacterial family of proteins has no known function.. +PF11145 Protein of unknown function (DUF2921)
Pfam-B_001920 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11146 Protein of unknown function (DUF2905)
Pfam-B_542 (release 23.0). This is a family of bacterial proteins conserved of unknown function.. +PF11148 Protein of unknown function (DUF2922)
Pfam-B_001999 (release 23.0). This bacterial family of proteins has no known function.. +PF11149 Protein of unknown function (DUF2924)
Pfam-B_002010 (release 23.0). This bacterial family of proteins has no known function.. +PF11150 Protein of unknown function (DUF2927)
Pfam-B_739 (release 23.0). This family is conserved in Proteobacteria. Several members are described as being putative lipoproteins, but otherwise the function is not known.. +PF11151 Protein of unknown function (DUF2929)
Pfam-B_002101 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes.. +PF11152 Protein of unknown function (DUF2930)
Pfam-B_002135 (release 23.0). This family of proteins has no known function.. +PF11153 Protein of unknown function (DUF2931)
Pfam-B_002146 (release 23.0). Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. Currently, there is no known function.. +PF11154 Protein of unknown function (DUF2934)
Pfam-B_002301 (release 23.0). This bacterial family of proteins has no known function.. +PF11155 Domain of unknown function (DUF2935)
Pfam-B_002056 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes. The structure of this protein has been solved and each domain is composed of four alpha helices. A metal cluster composed of iron and magnesium lies between the two domains.. +PF11157 Protein of unknown function (DUF2937)
Pfam-B_002314 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11158 Protein of unknown function (DUF2938)
Pfam-B_002317 (release 23.0). This bacterial family of proteins has no known function. Some members are thought to be membrane proteins however this cannot be confirmed.. +PF11159 Protein of unknown function (DUF2939)
Pfam-B_002321 (release 23.0). This bacterial family of proteins has no known function.. +PF11160 Protein of unknown function (DUF2945)
Pfam-B_002448 (release 23.0). This family of proteins has no known function.. +PF11161 Protein of unknown function (DUF2946)
Pfam-B_002487 (release 23.0). This family of proteins with unknown function appear to be restricted to Proteobacteria.. +PF11162 Protein of unknown function (DUF2946)
Pfam-B_002520 (release 23.0). This family of proteins has no known function.. +PF11163 Protein of unknown function (DUF2947)
Pfam-B_002524 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11164 Protein of unknown function (DUF2948)
Pfam-B_002527 (release 23.0). This family of proteins with unknown function appear to be restricted to Proteobacteria.. +PF11165 Protein of unknown function (DUF2949)
Pfam-B_002571 (release 23.0). This family of proteins with unknown function appear to be restricted to Cyanobacteria.. +PF11166 Protein of unknown function (DUF2951)
Pfam-B_002585 (release 23.0). This family of proteins has no known function. It has a highly conserved sequence.. +PF11167 Protein of unknown function (DUF2953)
Pfam-B_002617 (release 23.0). This family of proteins has no known function.. +PF11168 Protein of unknown function (DUF2955)
Pfam-B_002614 (release 23.0). Some members in this family of proteins with unknown function annotate the proteins as membrane protein. However, this cannot be confirmed.. +PF11169 Protein of unknown function (DUF2956)
Pfam-B_002632 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11170 Protein of unknown function (DUF2957)
Pfam-B_002671 (release 23.0). Some members annotate the proteins to be putative lipoproteins however this cannot be confirmed. Currently no function is known for this family of proteins.. +PF11171 Protein of unknown function (DUF2958)
Pfam-B_002712 (release 23.0). Some members are annotated as lipoproteins however this cannot be confirmed. This family of proteins has no known function.. +PF11172 Protein of unknown function (DUF2959)
Pfam-B_002747 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11173 Protein of unknown function (DUF2960)
Pfam-B_002756 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11174 Protein of unknown function (DUF2970)
Pfam-B_713 (release 23.0). This short family is conserved in Proteobacteria. The function is not known.. +PF11175 Protein of unknown function (DUF2961)
Pfam-B_002770 (release 23.0). This family of proteins has no known function.. +PF11176 Protein of unknown function (DUF2962)
Pfam-B_002773 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11177 Protein of unknown function (DUF2964)
Pfam-B_002804 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11178 Protein of unknown function (DUF2963)
Pfam-B_002790 (release 23.0). This family of proteins with unknown function appears to be restricted to Mollicutes.. +PF11179 Protein of unknown function (DUF2967)
Pfam-B_002838 (release 23.0). This family of proteins with unknown function appears to be restricted to Drosophila.. +PF11180 Protein of unknown function (DUF2968)
Pfam-B_002850 (release 23.0). This family of proteins has no known function.. +PF11181 Heat induced stress protein YflT
YflT is a heat induced protein.. +PF11182 Alginate O-acetyl transferase AlgF
AlgF is essential for the addition of O-acetyl groups to alginate, an extracellular polysaccharide. The presence of O-acetyl groups plays an important role in the ability of the polymer to act as a virulence factor .. +PF11183 Polymyxin resistance protein PmrD
PmrB forms a two-component system (TCS) with PmrA that allows Gram-negative bacteria to survive the cationic antimicrobial peptide polymyxin G . The TCS is linked to another one via the polymyxin resistance protein PmrD. PmrD is the first protein identified to mediate the connectivity between the two TCSs. It binds to the N terminal domain of the PmrA response regulator which prevents its dephosphorylation, thereby promoting the the transcription of genes involved in polymyxin resistance .. +PF11184 Protein of unknown function (DUF2969)
Pfam-B_002861 (release 23.0). This family of proteins with unknown function appears to be restricted to Lactobacillales.. +PF11185 Protein of unknown function (DUF2971)
Pfam-B_002776 (release 23.0). This bacterial family of proteins has no known function.. +PF11186 Protein of unknown function (DUF2972)
Pfam-B_002895 (release 23.0). Some members in this family of proteins with unknown function are annotated as sugar transferase proteins, however this cannot be confirmed.. +PF11187 Protein of unknown function (DUF2974)
Pfam-B_002933 (release 23.0). This bacterial family of proteins has no known function.. +PF11188 Protein of unknown function (DUF2975)
Pfam-B_2875 & Pfam-B_3379 (release 23.0) & JH:B0MX27. This family of bacterial proteins have no known function. These proteins are likely to be integral membrane proteins. The proteins contain a highly conserved glutamic acid close to their C-terminus.. +PF11189 Protein of unknown function (DUF2973)
Pfam-B_002929 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently they have no known function.. +PF11190 Protein of unknown function (DUF2976)
Pfam-B_002963 (release 23.0). This family of proteins has no known function. Some members are annotated as membrane proteins however this cannot be confirmed.. +PF11191 Protein of unknown function (DUF2782)
Gunasekaran P, Mistry J. Pfam-B_001700 (release 23.0). This is a bacterial family of proteins whose function is unknown.. +PF11192 Protein of unknown function (DUF2977)
Pfam-B_002980 (release 23.0). This family of proteins has no known function.. +PF11193 Protein of unknown function (DUF2812)
Gunasekaran P, Mistry J. Pfam-B_001697 (release 23.0). This is a bacterial family of uncharacterised proteins, however some members of this family are annotated as membrane proteins.. +PF11195 Protein of unknown function (DUF2829)
Gunasekaran P, Mistry J. Pfam-B_001848 (release 23.0). This is a uncharacterised family of proteins found in bacteria and bacteriphages.. +PF11196 Protein of unknown function (DUF2834)
Gunasekaran P, Mistry J. Pfam-B_001850 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11197 Protein of unknown function (DUF2835)
Gunasekaran P, Mistry J. Pfam-B_001851 (release 23.0). This is a bacterial family of uncharacterised proteins. One member of this family (Swiss:A4VM42) is annotated as the A subunit of Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV).. +PF11198 Protein of unknown function (DUF2857)
Gunasekaran P, Mistry J. Pfam-B_001886 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11199 Protein of unknown function (DUF2891)
Gunasekaran P, Mistry J. Pfam-B_001921 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11200 Protein of unknown function (DUF2981)
Pfam-B_003040 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11201 Protein of unknown function (DUF2982)
Pfam-B_003059 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11202 DUF2983;
Phosphoribosyl transferase (PRTase). This PRTase family has a C terminal RNA binding Pelota domain . These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response .. +PF11203 Protein of unknown function (DUF2984)
Pfam-B_003006 (release 23.0). Some members in this bacterial family of proteins are annotated as membrane proteins however this cannot be confirmed.. +PF11204 Protein of unknown function (DUF2985)
Pfam-B_003090 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11205 Protein of unknown function (DUF2987)
Pfam-B_003120 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11207 Protein of unknown function (DUF2989)
Pfam-B_003139 (release 23.0). Some members in this bacterial family of proteins are annotated as lipoproteins however this cannot be confirmed.. +PF11208 Protein of unknown function (DUF2992)
Pfam-B_003142 (release 23.0). This bacterial family of proteins has no known function. However, the cis-regulatory yjdF motif, just upstream from the gene encoding the proteins for this family, is a small non-coding RNA, Rfam:RF01764. The yjdF motif is found in many Firmicutes, including Bacillus subtilis. In most cases, it resides in potential 5' UTRs of homologues of the yjdF gene whose function is unknown. However, in Streptococcus thermophilus, a yjdF RNA motif is associated with an operon whose protein products synthesise nicotinamide adenine dinucleotide (NAD+). Also, the S. thermophilus yjdF RNA lacks typical yjdF motif consensus features downstream of and including the P4 stem. Thus, if yjdF RNAs are riboswitch aptamers, the S. thermophilus RNAs might sense a distinct compound that structurally resembles the ligand bound by other yjdF RNAs. On the ohter hand, perhaps these RNAs have an alternative solution forming a similar binding site, as is observed with some SAM riboswitches .. +PF11209 Protein of unknown function (DUF2993)
Pfam-B_003144 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11210 Protein of unknown function (DUF2996)
Pfam-B_003176 (release 23.0). This family of proteins has no known function.. +PF11211 Protein of unknown function (DUF2997)
Pfam-B_003181 (release 23.0). This family of proteins has no known function.. +PF11212 Protein of unknown function (DUF2999)
Pfam-B_003194 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11213 Protein of unknown function (DUF3006)
Pfam-B_003197 (release 23.0). This family of proteins has no known function.. +PF11214 Mediator complex subunit 2
This family of mediator complex subunit 2 proteins is conserved in fungi. Cyclin-dependent kinase CDK8 or Srb10 interacts with and phosphorylates Med2. Post-translational modifications of Mediator subunits are important for regulation of gene expression .. +PF11215 Protein of unknown function (DUF3010)
Pfam-B_003238 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11216 Protein of unknown function (DUF3012)
Pfam-B_003254 (release 23.0). This family of proteins with unknown function is restricted to Gammaproteobacteria.. +PF11217 Protein of unknown function (DUF3013)
Pfam-B_003257 (release 23.0). This bacterial family of proteins with unknown function appear to be restricted to Firmicutes.. +PF11218 Protein of unknown function (DUF3011)
Pfam-B_003246 (release 23.0). This bacterial family of proteins has no known function. Most members belong to Proteobacteria.. +PF11219 Protein of unknown function (DUF3014)
Pfam-B_003267 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11220 Protein of unknown function (DUF3015)
Pfam-B_003292 (release 23.0). This bacterial family of proteins has no known function.. +PF11221 Subunit 21 of Mediator complex
Med21 has been known as Srb7 in yeasts, hSrb7 in humans and Trap 19 in Drosophila. The heterodimer of the two subunits Med7 and Med21 appears to act as a hinge between the middle and the tail regions of Mediator .. +PF11222 Protein of unknown function (DUF3017)
Pfam-B_003304 (release 23.0). This bacterial family of proteins with unknown function appear to be restricted to Actinobacteria.. +PF11223 Protein of unknown function (DUF3020)
This family of fungal proteins is conserved towards the C-terminus of HMG domains. The function is not known.. +PF11224 Protein of unknown function (DUF3023)
Pfam-B_003319 (release 23.0). This bacterial family of proteins with unknown function appear to be restricted to Alphaproteobacteria.. +PF11225 Protein of unknown function (DUF3024)
Pfam-B_003325 (release 23.0). This family of proteins has no known function.. +PF11226 Protein of unknown function (DUF3022)
Pfam-B_003318 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11227 Protein of unknown function (DUF3025)
Pfam-B_003327 (release 23.0). Some members in this bacterial family of proteins are annotated as transmembrane proteins however this cannot be confirmed. Currently this family of proteins has no known function.. +PF11228 Protein of unknown function (DUF3027)
Pfam-B_003334 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11229 Protein of unknown function (DUF3028)
Pfam-B_003337 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11230 Protein of unknown function (DUF3029)
Pfam-B_003347 (release 23.0). Some members in this family of proteins are annotated as ykkI. Currently no function is known.. +PF11231 Protein of unknown function (DUF3034)
Pfam-B_003362 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11232 Med25_PTOV1;Med25_PTOV-SD2;
Mediator complex subunit 25 PTOV activation and synapsin 2. Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-active part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function . The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA domain, an SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This family is the combined PTOV and SD2 domains. the PTOV domain being the domain through which Med25 co-operates with the histone acetyltransferase CBP, but the function of the SD2 domain is unclear .. +PF11233 Protein of unknown function (DUF3035)
Pfam-B_003372 (release 23.0). This family of proteins with unknown function appear to be restricted to Alphaproteobacteria.. +PF11235 Mediator complex subunit 25 synapsin 1
The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA, domain, this SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This The function of the SD domains is unclear .. +PF11236 Protein of unknown function (DUF3037)
Pfam-B_003382 (release 23.0). This bacterial family of proteins has no known function.. +PF11237 Protein of unknown function (DUF3038)
Pfam-B_003387 (release 23.0). This family of proteins with unknown function appear to be restricted to Cyanobacteria.. +PF11238 Protein of unknown function (DUF3039)
Pfam-B_003408 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11239 Protein of unknown function (DUF3040)
Pfam-B_003409 (release 23.0). Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed.. +PF11240 Protein of unknown function (DUF3042)
Pfam-B_003420 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes.. +PF11241 Protein of unknown function (DUF3043)
Pfam-B_003428 (release 23.0). Some members in this family of proteins with unknown function are annotated as membrane proteins. This cannot be confirmed.. +PF11242 Protein of unknown function (DUF2774)
This is a viral family of proteins with unknown function.. +PF11243 Protein of unknown function (DUF3045)
Members in this family of proteins are annotated as gene protein 30.1. Currently no function is known.. +PF11244 Mediator complex subunit 25 C-terminal NR box-containing
The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA, domain, an SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and this C-terminal NR box-containing domain (646-650) from C69-747. The NR box of MED25 is critical for its recruitment to the promoter, probably through an interaction with pre bound RAR .. +PF11245 Protein of unknown function (DUF2544)
This is a bacterial family of proteins with unknown function.. +PF11246 Base plate wedge protein 53
The baseplate of bacteriophage T4 controls host cell recognition, attachment, tail sheath contraction and viral DNA ejection. The structure of the baseplate suggests a mechanism of baseplate structural transition during the initial stages of T4 infection. The baseplate is assembled from six identical wedges that surround the central hub. Gp53, along with other T4 gene products, combine sequentially to assemble a wedge .. +PF11247 Protein of unknown function (DUF2675)
Members in this family of proteins are annotated as Gene protein 5.5. Currently no function is known.. +PF11248 Protein of unknown function (DUF3046)
Pfam-B_3651 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11249 Protein of unknown function (DUF3047)
Pfam-B_3654 (release 23.0). This bacterial family of proteins has no known function.. +PF11250 Protein of unknown function (DUF3049)
Pfam-B_3659 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11251 Protein of unknown function (DUF3050)
Pfam-B_3661 (release 23.0). This bacterial family of proteins has no known function.. +PF11252 Protein of unknown function (DUF3051)
Pfam-B_3671 (release 23.0). This viral family of proteins has no known function.. +PF11253 Protein of unknown function (DUF3052)
Pfam-B_3674 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11254 Protein of unknown function (DUF3053)
Pfam-B_3678 (release 23.0). Some members in this family of proteins are annotated as the membrane protein YiaF. No function is currently known.. +PF11255 Protein of unknown function (DUF3054)
Pfam-B_3684 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known.. +PF11256 Protein of unknown function (DUF3055)
Pfam-B_3685 (release 23.0). This family of proteins with unknown function appear to be restricted to Firmicutes.. +PF11258 Protein of unknown function (DUF3048)
Pfam-B_3658 (release 23.0). Some members in this bacterial family of proteins are annotated as YerB. However currently no function is known.. +PF11259 Protein of unknown function (DUF3060)
Pfam-B_3702 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed.. +PF11260 Major ampullate spidroin 1 and 2
Pfam-B_001419 (release 23.0). Dragline silk is composed of two proteins, major ampullate spidroin 1 (MaSp1) and major ampullate spidroin 2 (MaSp2) . MaSp1 contains five alpha-helices . Only the C-terminus of the proteins are shown.. +PF11261 Interferon regulatory factor 2-binding protein zinc finger
Pfam-B_1430 (release 23.0). IRF-2BP1 and IRF-2BP2 are nuclear transcriptional repressor proteins and can inhibit both enhancer-activated and basal transcription. They both contain N-terminal zinc finger represented in this family and C-terminal RING finger domains .. +PF11262 Transcription factor/nuclear export subunit protein 2
Pfam-B_002604 (release 23.0). THO and TREX form a eukaryotic complex which functions in messenger ribonucleoprotein metabolism and plays a role in preventing the transcription-associated genetic instability [1,2]. Tho2, along with four other subunits forms THO . +PF11263 Borrelia burgdorferi attachment protein P66
Pfam-B_003349 (release 23.0). P66 is an outer membrane protein in Borrelia burgdorferi, the agent of Lyme disease. P66 has a role in the attachment of Borrelia burgdorferi to human cell-surface receptors .. +PF11264 Thylakoid formation protein
Pfam-B_003380 (release 23.0). THF1 is localised to the outer plastid membrane and the stroma. THF1 has a role in sugar signalling . THF1 is also thought to have a role in chloroplast and leaf development . THF1 has been shown to play a crucial role in vesicle-mediated thylakoid membrane biogenesis .. +PF11265 Mediator complex subunit 25 von Willebrand factor type A
The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA domain which is this one, an SD2 domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This VWA or von Willebrand factor type A domain when bound to RAR and the histone acetyltransferase CBP is responsible for recruiting Med1 to the rest of the Mediator complex .. +PF11266 Protein of unknown function (DUF3066)
Pfam-B_3735 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11267 Protein of unknown function (DUF3067)
Pfam-B_3740 (release 23.0). This family of proteins has no known function.. +PF11268 Protein of unknown function (DUF3071)
Pfam-B_3805 (release 23.0). Some members in this family of proteins are annotated as DNA-binding proteins however this cannot be confirmed. Currently no function is known.. +PF11269 Protein of unknown function (DUF3069)
Pfam-B_3783 (release 23.0). This family of proteins with unknown function appear to be restricted to Gammaproteobacteria.. +PF11270 Protein of unknown function (DUF3070)
Pfam-B_3804 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11271 Protein of unknown function (DUF3068)
Pfam-B_3769 (release 23.0). Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed.. +PF11272 Protein of unknown function (DUF3072)
Pfam-B_3823 (release 23.0). This bacterial family of proteins has no known function.. +PF11273 Protein of unknown function (DUF3073)
Pfam-B_3852 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11274 Protein of unknown function (DUF3074)
Pfam-B_3858 (release 23.0). This eukaryotic family of proteins has no known function but appears to be part of the START superfamily.. +PF11275 Protein of unknown function (DUF3077)
Pfam-B_3820 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11276 Protein of unknown function (DUF3078)
Pfam-B_3846 (release 23.0). This bacterial family of proteins has no known function.. +PF11277 Mediator complex subunit 24 N-terminal
This subunit of the Mediator complex appears to be conserved only from insects to humans. It is essential for correct retinal development in fish. Subunit composition of the mediator contributes to the control of differentiation in the vertebrate CNS as there are divergent functions of the mediator subunits Crsp34/Med27, Trap100/Med24, and Crsp150/Med14 .. +PF11278 Protein of unknown function (DUF3079)
Pfam-B_3866 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11279 Protein of unknown function (DUF3080)
Pfam-B_3870 (release 23.0). Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. Currently this family has no known function.. +PF11280 Protein of unknown function (DUF3081)
Pfam-B_3884 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11281 Protein of unknown function (DUF3083)
Pfam-B_3898 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11282 Protein of unknown function (DUF3082)
Pfam-B_3896 (release 23.0). This family of proteins has no known function.. +PF11283 Protein of unknown function (DUF3084)
Pfam-B_3912 (release 23.0). This bacterial family of proteins has no known function.. +PF11284 Protein of unknown function (DUF3085)
Pfam-B_3922 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11285 Protein of unknown function (DUF3086)
Pfam-B_3929 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11286 Protein of unknown function (DUF3087)
Pfam-B_3938 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11287 Protein of unknown function (DUF3088)
Pfam-B_3952 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11288 Protein of unknown function (DUF3089)
Pfam-B_3953 (release 23.0). This family of proteins has no known function but appears to have an alpha/beta hydrolase domain and so is likely to be enzymatic.. +PF11289 Protein of unknown function (DUF3092)
Pfam-B_3988 (release 23.0). This viral family of proteins has no known function.. +PF11290 Protein of unknown function (DUF3090)
Pfam-B_3954 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11291 Protein of unknown function (DUF3091)
Pfam-B_3979 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11292 Protein of unknown function (DUF3093)
Pfam-B_4007 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria. Some members are annotated as alanine rich membrane proteins however this cannot be confirmed.. +PF11293 Protein of unknown function (DUF3094)
Pfam-B_4017 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11294 Protein of unknown function (DUF3095)
Pfam-B_4020 (release 23.0). Some members in this bacterial family of proteins are annotated as adenylyl cyclase however this cannot be confirmed. Currently no function is known.. +PF11295 Protein of unknown function (DUF3096)
Pfam-B_4028 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11296 Protein of unknown function (DUF3097)
Pfam-B_4031 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11297 Protein of unknown function (DUF3098)
Pfam-B_4061 (release 23.0). This bacterial family of proteins has no known function.. +PF11298 Protein of unknown function (DUF3099)
Pfam-B_4064 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known.. +PF11299 Protein of unknown function (DUF3100)
Pfam-B_4068 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known.. +PF11300 Protein of unknown function (DUF3102)
Pfam-B_4016 (release 23.0). This family of proteins has no known function.. +PF11301 Protein of unknown function (DUF3103)
Pfam-B_4046 (release 23.0). This family of proteins with unknown function appear to be restricted to Proteobacteria.. +PF11302 Protein of unknown function (DUF3104)
Pfam-B_4053 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11303 Protein of unknown function (DUF3105)
Pfam-B_4062 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known.. +PF11304 Protein of unknown function (DUF3106)
Pfam-B_4069 (release 23.0). Some members in this family of proteins are annotated as transmembrane proteins however this cannot be confirmed. Currently no function is known.. +PF11305 Protein of unknown function (DUF3107)
Pfam-B_3881 (release 23.0). Some members in this family of proteins are annotated as ATP-binding proteins however this cannot be confirmed. Currently no function is known.. +PF11306 Protein of unknown function (DUF3108)
Pfam-B_3856 (release 23.0). This is a bacterial family of putative lipoproteins. The structure for Swiss:Q64U78, PDB:3fzx, the first structural template for this large family including several homologues in the human gut microbiome and in metagenomic datasets, folds into a beta barrel that topologically looks like a small-scale porin (such as FepA). Swiss:Q64U78 is a putative exported protein, and this fold is of the YmcC-like type, with a predicted signal peptide SpI cleavage site AGAMA|QNQDC, and a Phobius server prediction of non-cytoplasmic localisation for amino acids 21-236. The possibility of it being a membrane protein can be ruled out by the hydrophilic nature of the solvent exposed surface outside the barrels. Analysis of sequence conservation suggests that an area near Glu172/Trp206 is potentially interesting. These two residues are also conserved in Dali hit PDB:2in5, a hypothetical lipoprotein classified as a new YmcC-like fold in SCOP (SCOP:159271, with a 12-stranded meander beta-sheet folded into a deformed beta-barrel) despite large structural differences between the two structures, suggesting similarity in function.. +PF11307 Protein of unknown function (DUF3109)
Pfam-B_4077 (release 23.0). This bacterial family of proteins has no known function.. +PF11308 DUF3111; GHL;
Glycosyl hydrolases related to GH101 family, GHL1-GHL3. Pfam-B_4091 (release 23.0). This family of bacterial and lower eukaryote glycosyl hydrolases is related to CAZy family GH101, and is made up of sub-families GHL1-GHL3. In the example Swiss:C02A26, the substrate-binding Asp is residue 596, the nucleophilic Asp is residue 706, and the proton donor Glu is residue 747.. +PF11309 Protein of unknown function (DUF3112)
Pfam-B_4107 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11310 Protein of unknown function (DUF3113)
Pfam-B_4134 (release 23.0). This family of proteins has no known function. It has a highly conserved sequence.. +PF11311 Protein of unknown function (DUF3114)
Pfam-B_4178 (release 23.0). Some members in this family of proteins with unknown function are annotated as cytosolic proteins. This cannot be confirmed.. +PF11312 Protein of unknown function (DUF3115)
Pfam-B_4191 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11313 Protein of unknown function (DUF3116)
Pfam-B_4194 (release 23.0). This family of proteins with unknown function appears to be restricted to Bacillales.. +PF11314 Protein of unknown function (DUF3117)
Pfam-B_4211 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11315 Mediator complex subunit 30
Pfam-B_28118 (release 23.0). Med30 is a metazoan-specific subunit of Mediator, having no homologues in yeasts.. +PF11316 DUF3118;
Putative rhamnosyl transferase . Pfam-B_4218 (release 23.0). Most members of this family are uncharacterised, but one is a putative side-chain-rhamnosyl transferase .. +PF11317 Protein of unknown function (DUF3119)
Pfam-B_4223 (release 23.0). This family of proteins has no known function.. +PF11318 Protein of unknown function (DUF3120)
Pfam-B_4230 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11319 Protein of unknown function (DUF3121)
Pfam-B_4233 (release 23.0). Some members in this family of proteins with unknown function are annotated as phospholipase proteins however this cannot be confirmed. Currently this family has no known function.. +PF11320 Protein of unknown function (DUF3122)
Pfam-B_4242 (release 23.0). This family of proteins with unknown function appear to be restricted to Cyanobacteria.. +PF11321 Protein of unknown function (DUF3123)
Pfam-B_4246 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11322 Protein of unknown function (DUF3124)
Pfam-B_4248 (release 23.0). This bacterial family of proteins has no known function.. +PF11323 Protein of unknown function (DUF3125)
Pfam-B_4250 (release 23.0). This family of proteins with unknown function appears to be restricted to Staphylococcus.. +PF11324 Protein of unknown function (DUF3126)
Pfam-B_4268 (release 23.0). This family of proteins with unknown function appear to be restricted to Alphaproteobacteria.. +PF11325 Domain of unknown function (DUF3127)
Pfam-B_4273 (release 23.0). This bacterial family of proteins has no known function. However, it does show distant similarity to Pfam:PF00436, with proteins such as Swiss:D1W984 being similar to both families. This suggests that this family may have a DNA-binding function.. +PF11326 Protein of unknown function (DUF3128)
Pfam-B_4309 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11327 Protein of unknown function (DUF3129)
Pfam-B_4316 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11328 Protein of unknown function (DUF3130
Pfam-B_4322 (release 23.0). This bacterial family of proteins has no known function.. +PF11329 Protein of unknown function (DUF3131)
Pfam-B_4335 (release 23.0). This bacterial family of proteins has no known function.. +PF11330 Protein of unknown function (DUF3132)
Pfam-B_4348 (release 23.0). This viral family of proteins are 55kDa. No function is currently known.. +PF11331 Protein of unknown function (DUF3133)
Pfam-B_4400 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11332 Protein of unknown function (DUF3134)
Pfam-B_4408 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11333 Protein of unknown function (DUF3135)
Pfam-B_4409 (release 23.0). This family of proteins with unkown function appears to be restricted to Proteobacteria.. +PF11334 Protein of unknown function (DUF3136)
Pfam-B_4419 (release 23.0). This family of proteins with unknown function appear to be restricted to Cyanobacteria.. +PF11335 Protein of unknown function (DUF3137)
Pfam-B_4422 (release 23.0). This bacterial family of proteins has no known function.. +PF11336 Protein of unknown function (DUF3138)
Pfam-B_4423 (release 23.0). This family of proteins with unknown function appear to be restricted to Proteobacteria.. +PF11337 Protein of unknown function (DUF3139)
Pfam-B_4425 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes.. +PF11338 Protein of unknown function (DUF3140)
Pfam-B_4435 (release 23.0). Some members in this family of proteins are annotated as DNA binding proteins. No function is currently known.. +PF11339 Protein of unknown function (DUF3141)
Pfam-B_4443 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11340 Protein of unknown function (DUF3142)
Pfam-B_4454 (release 23.0). This bacterial family of proteins has no known function.. +PF11341 Protein of unknown function (DUF3143)
Pfam-B_4460 (release 23.0). This family of proteins has no known function.. +PF11342 Protein of unknown function (DUF3144)
Pfam-B_4465 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11343 Protein of unknown function (DUF3145)
Pfam-B_4467 (release 23.0). This family of proteins with unknown function appear to be restricted to Actinobacteria.. +PF11344 Protein of unknown function (DUF3146)
Pfam-B_4468 (release 23.0). This family of proteins with unknown function appear to be restricted to Cyanobacteria.. +PF11345 Protein of unknown function (DUF3147)
Pfam-B_4475 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known.. +PF11346 Protein of unknown function (DUF3149)
Pfam-B_4495 (release 23.0). This bacterial family of proteins has no known function.. +PF11347 Protein of unknown function (DUF3148)
Pfam-B_4488 (release 23.0). This family of proteins has no known function.. +PF11348 Protein of unknown function (DUF3150)
Pfam-B_4471 (release 23.0). This bacterial family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11349 Protein of unknown function (DUF3151)
Pfam-B_4506 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11350 Protein of unknown function (DUF3152)
Pfam-B_4512 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function.. +PF11351 Protein of unknown function (DUF3154)
Pfam-B_4516 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11352 Protein of unknown function (DUF3155)
Pfam-B_4534 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11353 Protein of unknown function (DUF3153)
Pfam-B_4513 (release 23.0). This family of proteins with unknown function appear to be restricted to Cyanobacteria. Some members are annotated as membrane proteins however this cannot be confirmed.. +PF11354 Protein of unknown function (DUF3156)
Pfam-B_4555 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11355 Protein of unknown function (DUF3157)
Pfam-B_4561 (release 23.0). This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11356 Type IV pilus biogenesis
Pfam-B_3750 (release 23.0). Type IV pili are required for auto-agglutination, twitching motility, biofilm formation, adherence and DNA uptake during transformation . PilP is an inner membrane protein, required for pilus expression and transformation . PilP interacts with PilQ which suggests that the two proteins may have coordinated activity in functions such as pilus extrusion/retraction .. +PF11357 Cell cycle regulatory protein
Pfam-B_3875 (release 23.0). Speedy (Spy1) is a cell cycle regulatory protein which activates CDK2, the major kinase that allows progression through G1/S phase and further replication events . Spy1 expression overcomes a p27-induced cell cycle arrest to allow for DNA synthesis, so cell cycle progression occurs due to an interaction between Spy1 and p27 . Spy1 is also known as Ringo protein A.. +PF11358 Protein of unknown function (DUF3158)
Pfam-B_3964 (release 23.0). Some members in this family of proteins are annotated as integrase regulator R however this cannot be confirmed. This family of proteins with unknown function appear to be restricted to Proteobacteria.. +PF11359 Glycoprotein UL132
Pfam-B_4015 (release 23.0). Glycoprotein UL132 is a low-abundance structural component of Human cytomegalovirus (HCMV) . The function of this protein is not fully understood.. +PF11360 Protein of unknown function (DUF3110)
Pfam-B_4086 (release 23.0). This family of proteins has no known function.. +PF11361 Protein of unknown function (DUF3159)
Pfam-B_4163 (release 23.0). Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed. Currently this family of proteins has no known function.. +PF11362 Protein of unknown function (DUF3161)
Pfam-B_4173 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11363 Protein of unknown function (DUF3164)
Pfam-B_4249 (release 23.0). This family of proteins has no known function.. +PF11364 Protein of unknown function (DUF3165)
Pfam-B_4331 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function.. +PF11365 Protein of unknown function (DUF3166)
Pfam-B_4333 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11367 Protein of unknown function (DUF3168)
Pfam-B_4337 (release 23.0). This family of proteins has no known function but is likely to be a component of bacteriophage.. +PF11368 Protein of unknown function (DUF3169)
Pfam-B_4342 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function.. +PF11369 Protein of unknown function (DUF3160)
Pfam-B_4384 (release 23.0). This family of proteins has no known function.. +PF11371 Protein of unknown function (DUF3172)
Pfam-B_4527 (release 23.0). This family of proteins has no known function.. +PF11372 Domain of unknown function (DUF3173)
Pfam-B_4543 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes. These proteins appear to be distantly related to HHH domains and are therefore likely to be DNA-binding.. +PF11373 Protein of unknown function (DUF3175)
Pfam-B_4566 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11374 Protein of unknown function (DUF3176)
Pfam-B_4567 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11375 Protein of unknown function (DUF3177)
Pfam-B_4580 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function.. +PF11376 Protein of unknown function (DUF3179)
Pfam-B_4591 (release 23.0). This family of proteins has no known function.. +PF11377 Protein of unknown function (DUF3180)
Pfam-B_4592 (release 23.0). Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function.. +PF11378 Protein of unknown function (DUF3181)
Pfam-B_4595 (release 23.0). This family of proteins has no known function.. +PF11379 Protein of unknown function (DUF3182)
Pfam-B_4440 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11380 Protein of unknown function (DUF3184)
Pfam-B_4192 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11381 Protein of unknown function (DUF3185)
Pfam-B_4606 (release 23.0). Some members in this bacterial family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known.. +PF11382 Protein of unknown function (DUF3186)
Pfam-B_4607 (release 23.0). This bacterial family of proteins has no known function.. +PF11383 Protein of unknown function (DUF3187)
Pfam-B_4660 (release 23.0). This family of proteins with unknown function appear to be restricted to Proteobacteria. These proteins are likely to be outer membrane proteins.. +PF11384 Protein of unknown function (DUF3188)
Pfam-B_4573 (release 23.0). This bacterial family of proteins has no known function.. +PF11385 Protein of unknown function (DUF3189)
Pfam-B_4499 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes. +PF11386 Vitelline envelope receptor for lysin
Pfam-B_1349 (release 23.0). VERL, the egg vitelline envelope (VE) receptor for lysin, is a giant unbranched glycoprotein comprising 30% of the vitelline envelope. Lysin binds to VERL and creates a hole as VERL molecules lose cohesion and splay apart. These proteins are important in the mediation of fertilisation . +PF11387 Protein of unknown function (DUF2795)
Pfam-B_1395 (release 23.0). This family of proteins has no known function.. +PF11388 Phagosome trafficking protein DotA
Pfam-B_001493 (release 23.0). DotA is essential for intracellular growth in Legionella . DotA is thought to play an important role in regulating initial phagosome trafficking decisions either upon or immediately after macrophage uptake .. +PF11389 Leptospira porin protein OmpL1
Pfam-B_001515 (release 23.0). OmpL1 is a member of the outer membrane (OM) proteins in the mammalian pathogen Leptospira. Specifically, it is a porin .. +PF11390 NADH-dependant formate dehydrogenase delta subunit FdsD
Pfam-B_1352 (release 23.0). FdsD is the delta subunit of the enzyme formate dehydrogenase. This subunit may play a role in maintaining the quaternary structure by means of electrostatic interactions with the other subunits . The delta subunit is not involved in the active centre of the enzyme .. +PF11391 Protein of unknown function (DUF2798)
Pfam-B_1194 (release 23.0). This family of proteins has no known function.. +PF11392 Protein of unknown function (DUF2877)
Pfam-B_002434 (release 23.0). This bacterial family of proteins are putative carboxylase proteins however this cannot be confirmed.. +PF11393 Macrophage killing protein with similarity to conjugation protein
Pfam-B_002787 (release 23.0). IcmL contains two amphipathic beta-sheet regions, required for the pore-forming ability which may be related to the transfer of this protein into a host cell membrane . The icmL gene shows significant similarity to plasmid genes involved in conjugation however IcmL is thought to be required for macrophage killing. It is unknown whether conjugation plays a role in macrophage killing .. +PF11394 Protein of unknown function (DUF2875)
Pfam-B_002814 (release 23.0). This family of proteins with unknown function appear to be restricted to Proteobacteria.. +PF11395 Protein of unknown function (DUF2873)
Pfam-B_002845 (release 23.0). This viral family of proteins has no known function.. +PF11396 Protein of unknown function (DUF2874)
Pfam-B_002962 (release 23.0). This bacterial family of proteins is probably periplasmic and of unknown function.\. There may be between one and six copies of this domain per sequence.. +PF11397 Glycosyltransferase (GlcNAc)
Pfam-B_002901 (release 23.0). GlcNAc is an enzyme that carries out the first glycosylation step of hydroxylated Skp1, a ubiquitous eukaryotic protein, in the cytoplasm .. +PF11398 Protein of unknown function (DUF2813)
Pfam-B_002207 (release 23.0). This entry contains YjbD from Escherichia coli (Swiss:P75828), which is annotated as a nucleotide triphosphate hydrolase.. +PF11399 Protein of unknown function (DUF3192)
Pfam-B_002991 (release 23.0). Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed.. +PF11401 Tetrabrachion
Tetrabrachion forms a parallel right-handed coiled coil structure with hydrophobic interactions and salt bridges forming a thermostable tetrameric structure. It contains large hydrophobic cavities. No function is known for this family of proteins .. +PF11402 Antifungal protein
Antifungal protein consists of five antiparallel beta strands which are highly twisted creating a beta barrel stabilised by four internal disulphide bridges . A cationic site adjacent to a hydrophobic stretch on the protein surface may constitute a phospholipid binding site .. +PF11403 Yeast metallothionein
Metallothioneins are characterised by an abundance of cysteine residues and a lack of generic secondary structure motifs. This protein functions in primary metal storage, transport and detoxification . For the first 40 residues in the protein the polypeptide wraps around the metal by forming two large parallel loops separated by a deep cleft containing the metal cluster .. +PF11404 Potassium voltage-gated channel
Fast inactivation of voltage-dependant potassium channels occurs by a 'ball-and-chain'-type mechanism. It controls membrane excitability and signal propagation in central neurons . Inactivation is regulated by protein phosphorylation where phosphorylation of serine residues leads to a reduction of the fast inactivation .. +PF11405 Bromelain_inhib;
Bromelain inhibitor VI. Bromelain inhibitor VI is a double-chain inhibitor consisting of a 11-residue and a 41-residue chain. This protein is the 41-residue heavy chain which is joined to the 11-residue chain by disulphide bonds. The inhibitor acts to inhibit the cysteine proteinase bromelain .. +PF11406 Antimicrobial peptide tachystatin A
Tachystatin A contains a cysteine-stabilised triple-stranded beta-sheet and shows features common to membrane-interactive peptides. Tachystatin A is thought to have an antimicrobial activity similar to defensins.Tachystatin A is also a chitin-binding peptide .. +PF11407 Type II restriction enzyme MunI
Type II restriction enzyme MunI recognises the palindromic sequence C/AATTG. It makes contact with the DNA via the major groove .. +PF11408 Sgs1 RecQ helicase
RecQ helicases unwind DNA in an ATP-dependent manner. Sgs1 has a HRDC (helicase and RNaseD C-terminal) domain which modulates the helicase function via auxiliary contacts to DNA . . +PF11409 Smad anchor for receptor activation (SARA)
Smad proteins mediate transforming growth factor-beta (TGF-beta) signaling from the transmembrane serine-threonine receptor kinases to the nucleus . SARA recruits Smad2 to the TGF-beta receptors for phosphorylation .. +PF11410 Antifungal peptide
This peptide has six cysteines involved in three disulphide bonds. It contains a global fold which involves a cysteine-knotted three-stranded antiparallel beta-sheet along with a flexible loop and four beta-reverse turns. It also has an amphiphilic character which is the main structural basis of its biological function .. +PF11411 DNA ligase IV
DNA ligase IV along with Xrcc4 functions in DNA non-homologous end joining. This process is required to mend double-strand breaks. Upon ligase binding to an Xrcc4 dimer, the helical tails unwind leading to a flat interaction surface .. +PF11412 Disulphide bond corrector protein DsbC
DsbC rearranges incorrect disulphide bonds during oxidative protein folding. It is activated by the N-terminal domain of DsbD, a transmembrane electron transporter. DsbD binds to a DsbC dimer and selectively activates it using electrons from the cytoplasm .. +PF11413 Hypoxia-inducible factor-1
HIF-1 is a transcriptional complex and controls cellular systemic homeostatic responses to oxygen availability . In the presence of oxygen HIF-1 alpha is targeted for proteasomal degradation by pHVL, a ubiquitination complex .. +PF11414 Adenomatous polyposis coli tumour suppressor protein
The tumour suppressor protein, APC, has a nuclear export activity as well as many different intracellular functions. The structure consists of three alpha-helices forming two separate antiparallel coiled coils .. +PF11415 Termicin;
Antifungal peptide termicin. Termicin is a cysteine-rich antifungal peptide which exhibits antibacterial activity. A cysteine stabilised alpha beta motif is formed due to an alpha-helical segment and a two-stranded antiparallel beta-sheet .. +PF11416 Integral membrane protein Sed5p
Sed5p interacts with Sly1p , a positive regulator of intracellular membrane fusion, allowing SM proteins to stay associated with the assembling fusion machinery. This allows for participation in late fusion steps .. +PF11417 Loader and inhibitor of phage G40P
G39P inhibits the initiation of DNA replication by blocking G40P replicative helicase. G39P has a bipartite stricture consisting of a folded N-terminal domain and an unfolded C-terminal domain. The C terminal is essential for helicase interaction .. +PF11418 Phi29 scaffolding protein
This protein is also referred to as gp7. The protein contains a DNA-binding function and may halve a role in mediating the structural transition from prohead to mature virus and also scaffold release .Gp7 is arranged within the capsid as a series of concentric shells .. +PF11419 Protein of unknown function (DUF3194)
This family of proteins has no known function however the structure has been determined. The protein consists of two alpha-helices packed on the same side of a central beta-hairpin .. +PF11420 Bacteriocin subtilosin A
Subtilosin A is a bacteriocin from Bacillus subtilis.The protein has a cyclized peptide backbone and forms three cross-liks between the sulphurs of Cys13, Cys7 and Cys4 and the alpha-positions of Phe22,Thr28 and Phe31 .. +PF11421 ATP synthase F1 beta subunit
The NMR solution structure of the protein in SDS micelles was found to contain two helices, an N-terminal amphipathic alpha-helix and a C-terminal alpha-helix separated by a large unstructured internal domain. The N-terminal alpha-helix is the Tom20 receptor binding site whereas the C-terminal alpha-helix is located upstream of the mitochondrial processing peptidase cleavage site .. +PF11422 Initiator binding protein 39 kDa
IBP39 recognises the initiator which is solely responsible for transcription start site selection. IBP39 contains an N-terminal Inr binding domain connected to a C-terminal domain. The C domain structure indicates that it interacts with the T. vaginalis RNAP II large subunit C-terminal domain. Binding of IBP39 to Inr recruits RNAP II and initiates transcription .. +PF11423 Regulatory protein Mnt
Mnt is a repressor which is involved in the genetic switch between lysogenic and lytic growth in bacteriophage P22. The C-terminal domain of the protein consists of a dimer of two antiparallel coiled coils with a right handed twist, which is both stronger and has closer inter-helical separation compared with those found in left-handed coiled coils .. +PF11424 Protein of unknown function (DUF3195)
This archaeal family of proteins has no known function.. +PF11426 Tn7_TnsC;
Tn7 transposition regulator TnsC. TnsC is a molecular switch that regulates transposition and interacts with TnsA which is a component of the transposase. The two proteins interact via the residues 504-555 on TnsC. The TnsA/TnsC interaction is very important in Tn7 transposition .. +PF11427 Tc3_transposase;
Tc3 is transposase with a specific DNA-binding domain which contains three alpha-helices, two of which form a helix-turn-helix motif which makes four base-specific contacts with the major groove. The N-terminus makes contacts with the minor groove. There is a base specific recognition between Tc3 and the transposon DNA. The DNA binding domain forms a dimer in which each monomer binds a separate transposon end. This implicates that the dimer has a role in synapsis and is necessary for the simultaneous cleavage of both transposon termini .. +PF11428 Protein of unknown function (DUF3196)
This proteins is the product of the gene MPN330 and is thought to involved in a cellular function that has yet to be characterised. The proteins has 11 helices and a novel fold . No function is currently known for this protein.. +PF11429 Colicin D
Colicin D is a tRNase which kills sensitive E.coli cells via a specific tRNA cleavage. It targets the four isoaccepting tRNAs for Arg and cleaves the phosphodiester bond between positions 38 and 39 at the 3' junction of the anticodon stem and the loop .. +PF11430 Programmed cell death activator EGL-1
Initiation of programmed cell death in C.elegans occurs by the binding of EGL-1 to CED-9 which disrupts a complex involving CED-4/CED-9 and allows CED-4 to activate CED-3, a caspase. It is the C terminal domain of EGL-1 which is involved in the formation of the complex with CED-9. The formation of the complex induces structural rearrangements in CED-9 and EGL-1 adopts an extended alpha-helical conformation .. +PF11431 Membrane transport protein MerF
The mercury transport membrane protein, MerF has a core helix-loop-helix domain. It has two vicinal pairs of cysteine residues which are involved in the transport of Hg(II) across the membrane and are exposed to the cytoplasm .. +PF11432 Protein of unknown function (DUF3197)
This bacterial family of proteins has no known function.. +PF11433 Protein of unknown function (DUF3198)
Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently, this archaeal family has no known function.. +PF11434 Chemotaxis-inhibiting protein CHIPS
The chemotaxis inhibitory protein, CHIPS, is an excreted virulence factor which acts by binding to C5a and formylated peptide receptor (FPR), blocking phagocyte responses. A fragment of CHIPS, which contains residues 31-121 comprises of an alpha helix packed onto a four stranded anti-parallel beta-sheet. Most of the conserved residues of CHIPS are present in the alpha-helix .. +PF11435 RNA binding protein She2p
She2p is a RNA binding protein which binds to RNA via a helical hairpin. The protein is required for the actin dependent transport of ASH1 mRNA in yeast, a form of mRNP translocation. ASH1 mRNP requires recognition of zip code elements by the RNA binding protein She2p. She2p contains a globular domain consisting of a bundle of five alpha-helices .. +PF11436 Protein of unknown function (DUF3199)
Some members in this family of proteins with unknown function are annotated as YqbG however this cannot be confirmed. Currently the proteins has no known function.. +PF11437 Vanadium-binding protein 2
The Vanadium binding protein, Vanabin2, contains four alpha-helices connected by nine disulphide bonds. Vanadium accumulates in Ascidians however the biological reason remains unclear .. +PF11438 36-mer N-terminal peptide of the N protein (N36)
The arginine-rich motif of the N protein is involved in transcriptional antitermination of phage lambda. N36 forms a complex with boxB RNA by binding tightly to the major groove of the boxB hairpin via hydrophobic and electrostatic interactions forming a bent alpha helix .. +PF11439 DUF3200;
Type III secretion system filament chaperone CesA. This family represents a chaperone protein for the type III secretion system - TTSS - translocon protein EspA, to prevent the latter's self-polymerisation. The TTSS is a highly specialised bacterial protein secretory pathway, similar in many ways to the flagellar system, that is essential for the pathogenesis of many Gram-negative bacteria. The twenty or so proteins making up the TTSS apparatus, referred to as the needle complex, allow the injection of virulence proteins (known as effectors) directly into the cytoplasm of the eukaryotic host cells they infect; however, the injection process itself is mediated by a subset of extracellular proteins that are secreted by the needle complex to the bacterial surface and assembled into the type III translocon - EspA. EspB and EspD. EspA polymerises into an extracellular filament, and, as with other fibrous proteins, is apt to undergo massive polymerisation when overexpressed. CesA is the secretion chaperone protein that binds to EspA. CesA is dimeric and helical, and it traps EspA in a monomeric state and inhibits its polymerisation.. +PF11440 DNA alpha-glucosyltransferase
The T4 bacteriophage of E.coli protects its DNA via two glycosyltransferases which glucosylate 5-hydroxymethyl cytosines (5-HMC) using UDP-glucose. These two proteins are the retaining alpha-glucosyltransferase (AGT) and the inverting beta-glucosyltransferase (BGT). The proteins in this family are AGT. AGT adopts the GT-B fold and binds both the sugar donor and acceptor to the C-terminal domain. There is evidence for a role of AGT in the base-flipping mechanism and for its specific recognition of the acceptor base .. +PF11441 Pilot protein MxiM
MxiM, a Shigella pilot protein, is essential for the assembly and membrane association of the Shigella secretin MxiD. MxiM contains an orthologous secretin component and has a specific binding domain for the acyl chains of bacterial lipids . The C terminal domain of MxiD hinders lipid binding to MxiM .. +PF11442 Protein of unknown function (DUF2826)
Gunasekaran P, Mistry J. Pfam-B_001753 (release 23.0). This is a family of uncharacterised proteins that is highly conserved in Trypanosoma cruzi.. +PF11443 Domain of unknown function (DUF2828)
Gunasekaran P, Mistry J. Pfam-B_001814 (release 23.0). This is a uncharacterised domain found in eukaryotes and viruses.. +PF11444 Protein of unknown function (DUF2895)
Gunasekaran P, Mistry J. Pfam-B_002001 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11445 Protein of unknown function (DUF2894)
Gunasekaran P, Mistry J. Pfam-B_001968 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11446 Protein of unknown function (DUF2897)
Gunasekaran P, Mistry J. Pfam-B_002015 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11447 Protein of unknown function (DUF3201)
This archaeal family of proteins has no known function.. +PF11448 Protein of unknown function (DUF3005)
Gunasekaran P, Mistry J. Pfam-B_3492 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11449 Protein of unknown function (DUF2899)
Gunasekaran P, Mistry J. Pfam-B_002023 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11450 Protein of unknwon function (DUF3008)
Gunasekaran P, Mistry J. Pfam-B_3521 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11452 Protein of unknown function (DUF3000)
Gunasekaran P, Mistry J. Pfam-B_3481 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11453 Protein of unknown function (DUF2950)
Gunasekaran P, Mistry J. Pfam-B_002484 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11454 Protein of unknown function (DUF3016)
Gunasekaran P, Mistry J. Pfam-B_3517 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11455 Protein of unknown function (DUF3018)
Gunasekaran P, Mistry J. Pfam-B_3532 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11456 Protein of unknown function (DUF3019)
Gunasekaran P, Mistry J. Pfam-B_3539 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11457 Protein of unknown function (DUF3021)
Gunasekaran P, Mistry J. Pfam-B_3526 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11458 Membrane-integrating protein Mistic
Mistic is an integral membrane protein that folds autonomously into the membrane .The protein forms a helical bundle with a polar lipid-facing surface. Mistic can be used for high-level production of other membrane proteins in their native conformations .. +PF11459 Protein of unknwon function (DUF2893)
Gunasekaran P, Mistry J. Pfam-B_001947 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11460 Protein of unknown function (DUF3007)
Gunasekaran P, Mistry J. Pfam-B_3514 (release 23.0). This is a family of uncharacterised proteins found in bacteria and eukaryotes.. +PF11461 Rab interacting lysosomal protein
RILP contains a domain which contains two coiled-coil regions and is found mainly in the cytosol. RILP is recruited onto late endosomal and lysosomal membranes by Rab7 and acts as a downstream effector of Rab7. This recruitment process is important for phagosome maturation and fusion with late endosomes and lysosomes .. +PF11462 Protein of unknown function (DUF3203)
This family of proteins with unknown function appears to be restricted to Gammaproteobacteria.. +PF11463 R.HinP1I restriction endonuclease
Hinp1I is a type II restriction endonuclease, recognising and cleaving a palindromic tetranucleotide sequence (G/CGC) resulting in 2 nt 5' overhanging ends . HINP1I has a conserved catalytic core domain containing an active site motif SDC18QXK and a DNA-binding domain .. +PF11464 Rabenosyn Rab binding domain
Rabenosyn-5 (Rbsn) is a multivalent effector with interacts with the Rab family.Rsbn contains distinct Rab4 and Rab5 binding sites within residues 264-500 and 627-784 respectively . Rab proteins are GTPases involved in the regulation of all stages of membrane trafficking .. +PF11465 Natural killer cell receptor 2B4
2B4 is a transmembrane receptor which is expressed primarily on natural killer cells. It plays a role in activating NK-mediated cytotoxicity through its interaction with CD48 on target cells in a subset of CD8 T cells . The structure of 2B4 consists of an immunoglobulin variable domain fold and contains two beta-sheets. One of the beta-sheets, the six-stranded sheet, contains structural features that may have a role in ligand recognition and receptor function .. +PF11466 Prion-like protein Doppel
Dpl is a homologue related to the prion protein (PrP). Dpl is toxic to neurons and is expressed in the brains of mice that do not express PrP. In DHPC and SDS micelles, Dpl shoes about 40% alpha-helical structure however in aqueous solution it consists of a random coil. The alpha helical segment can adopt a transmembrane localisation also in a membrane . The unprocessed Dpl protein is thought to posses a possible channel formation mechanism which may be related to toxicity through direct interaction with cell membranes and damage to the cell membrane .. +PF11467 Lens epithelium-derived growth factor (LEDGF)
LEDGF is a chromatin-associated protein that protects cells from stress-induced apoptosis. It is the binding partner of HIV-1 integrase in human cells. The integrase binding domain (IBD) of LEDGF is a compact right-handed bundle composed of five alpha-helices. The residues essential for the interaction with the integrase are present in the inter-helical loop regions of the bundle structure .. +PF11468 Aromatic prenyltransferase Orf2
In vivo Orf2 attaches a geranyl group to a 1,3,6,8-tetrahydroxynaphthalene-derived polyketide during naphterpin biosynthesis . In vitro, Orf2 catalyses carbon-carbon based and carbon-oxygen based prenylation of hydroxyl-containing aromatic acceptors of synthetic, microbial and plant origin .. +PF11469 DUF3204;
This is a family of archaeal ribonuclease_III proteins.. +PF11470 TUG;
GLUT4 regulating protein TUG. TUG is a GLUT4 regulating protein and functions to retain membrane vesicles containing GLUT4 intracellularly. TUG releases the GLUT4 containing vesicles to the cellular exocytic machinery in response to insulin stimulation which allows translocation to the plasma membrane . TUG has an N-terminal ubiquitin-like domain (UBL1) which in similar proteins appears to participate in protein-protein interactions . The region does have a area of negative electrostatic potential and increased backbone motility which leads to suggestions of a potential protein-protein interaction site .. +PF11471 DUF3205;
Maltoporin periplasmic N-terminal extension. Pollington J, Coggill P. This domain would appear to be the periplasmic, N-terminal extension of the outer membrane maltoporins, Pfam:PF02264, LamB.. +PF11472 Protein of unknown function (DUF3206)
This bacterial family of proteins has no known function.. +PF11473 RNA binding protein B2
B2 is expressed by the insect Flock House virus (FHV) as a counter-defense mechanism against antiviral RNA silencing during infection. In vitro, B2 binds to dsRNA as a dimer and inhibits the cleavage of it by Dicer. B2 blocks cleavage of the FHV genome by Dicer and also the incorporation of FHV small interfering RNAs into the RNA-induced silencing complex .. +PF11474 Telomerase reverse transcriptase TEN domain
This is the N terminal domain of the protein telomerase reverse transcriptase called TEN. The TEN domain is able to bind both RNA and telomeric DNA and contributes towards telomerase catalysis. The TEN domain has a structure that consists of a core beta sheet surrounded by seven alpha helices and a short beta hairpin .. +PF11475 Virion protein N terminal domain
This is the N terminal domain of a family of virion proteins which contains a zinc finger domain. Currently no function is known.. +PF11476 Toxoplasma gondii micronemal protein 1 TgMIC1
TgMIC1 is released as part of a complex by Toxoplasma gondii prior to invasion. The complex which consists of TgMIC4-MIC1-MIC6 participates in host cell attachment and penetration and is critical in invasion. This is the C terminal domain of TgMIC1 which has a Galectin-like fold which interacts with and stabilises TgMIC6 providing a mechanism for an exit from the early secretory compartments and trafficking of the complex to micronemes .. +PF11477 Sialyltransferase PMO188
PMO188 is a sialyltransferase from P.multocida. It transfers sialic acid from cytidine 5'-monophosphonuraminic acid to an acceptor sugar . It has important catalytic residues such as Asp141, His311, Glu338, Ser355 and Ser356 .. +PF11478 Antimicrobial chitin binding protein tachystatin B
Tachystatin B is an antimicrobial chitin binding peptide and consists of two isotopes B1 and B2.Both structures contain a short antiparallel beta sheet with an inhibitory cysteine knot motif. Tyr(14) and Arg(17) are thought to be the essential residues for chitin binding .. +PF11479 RNA silencing suppressor P21
P21 is produced by Beet yellows virus to suppress the antiviral silencing response mounted by the host. P21 acts by binding directly to siRNA which is a mediator in the process. P21 has an octameric ring structure with a large central cavity . . +PF11480 Colicin-E5 Imm protein
Imms bind specifically to cognate colicins in order to protect their host cells . Imm-E5 is a specific inhibitor protein of colicin E5. It binds to E5 C-terminal ribonuclease domain (CRD) to prevent cell death. The binding mode of E5-CRD and Imm-E5 mimics that of mRNA and tRNA suggesting an evolutionary pathway from the RNA-RNA interaction through the RNA-protein interaction of tRNA/E5-CRD .. +PF11482 Protein of unknown function (DUF3208)
This bacterial family of proteins has no known function.. +PF11483 Protein of unknown function (DUF3209)
This family of proteins has no known function.. +PF11485 Protein of unknown function (DUF3211)
This archaeal family of proteins has no known function.. +PF11486 Protein of unknown function (DUF3212)
Members in this family of proteins are annotated as YfmB however currently no function for this protein is known.. +PF11487 Type II restriction enzyme SfiI
SfiI is a restriction enzyme that can cleave two DNA sites simultaneously to leave 3-base 3' overhangs. It acts as a homo-tetramer and recognises a specific eight base-paid palindromic DNA sequence. After binding two copies of its recognition sequence, SfiI becomes activated leading to cleavage of all four DNA strands. The structure of SfiI consists of a central twisted beta-sheet surrounded by alpha-helices.. +PF11488 Transcriptional regulatory protein LGE1
This family of proteins is conserved from fungi to human. In yeasts it is involved in the ubiquitination of histones H2A and H2B. This ubiquitination step is a vital one in the regulation of the transcriptional activity of RNA polymerase II. In S. cerevisiae, Rad6 and Bre1 are present in a complex, also containing Lge1, that is required for H2B ubiquitination. Bre1 is the H2B ubiquitin ligase that interacts with acidic activators, such as Gal4, and recruits Rad6 and its binding partner Lge1 to target promoters . In S. pombe the equivalent protein to Lge1 appears to be Shf1.. +PF11489 Protein of unknown function (DUF3210)
This is a family of proteins conserved in yeasts. The function is not known. The Schizosaccharomyces pombe member is Swiss:O94497 and the Saccharomyces cerevisiae member is Swiss:P40563.. +PF11490 DNA_pol3_alph_N;
DNA polymerase III polC-type N-terminus II. Pfam-B_853 (release 23.0). This is the second N-terminal domain, NII domain, of the DNA polymerase III polC subunit A that is found only in Firmicutes. DNA polymerase polC-type III enzyme functions as the 'replicase' in low G + C Gram-positive bacteria . Purine asymmetry is a characteristic of organisms with a heterodimeric DNA polymerase III alpha-subunit constituted by polC which probably plays a direct role in the maintenance of strand-biased gene distribution; since, among prokaryotic genomes, the distribution of genes on the leading and lagging strands of the replication fork is known to be biased . It has been predicted that the N-terminus of polC folds into two globular domains, NI and NII. A predicted hydrophobic surface patch suggests this domain may be involved in protein binding . This domain is associated with DNA_pol3_alpha Pfam:PF07733 and DNA_pol3_a_NI Pfam:PF14480.. +PF11491 Protein of unknown function (DUF3213)
The backbone structure of this family of proteins has been determined however the function remains unknown. The protein has an alpha and beta structure with a ferredoxin-like fold .. +PF11492 Cricket paralysis virus, VP4
This is a family of minor capsid proteins, known as VP4, from the dicistroviridae. The dicistroviridae is a group of small, RNA-containing viruses that are closely structurally related to the picornaviridae. VP4 is a short, extended polypeptide chain found within the viral capsid, at the interface between the external protein shell and packaged RNA genome .. +PF11493 Thylakoid soluble phosphoprotein TSP9
The plant-specific protein, TSP9 is phosphorylated and released in response to changing light conditions from the photosynthetic membrane. The protein resembles the characteristics of transcription/translation regulatory factors. The structure of the protein is predicted to consist of a random coil .. +PF11494 Ta0938
Ta0938 is a protein of unknown function however the structure has been determined. The protein has a novel fold and a putative Zn-binding motif. The structure has two different parts, one region contains a beta sheet flanked by two alpha helices and the other contains a bundle of loops which contain all cysteines in the protein .. +PF11495 Archaeal transcriptional regulator TrmB
TrmB is an alpha-glucoside sensing transcriptional regulator. The protein is the transcriptional repressor for gene cluster encoding trehalose/maltose ABC transporter in T.litoralis and P.furiosus . TrmB has lost its DNA binding domain but retained its sugar recognition site. A nonreducing glucosyl residue is shared by all substrates bound to TrmB which suggests that its a common recognition motif .. +PF11496 Class II histone deacetylase complex subunits 2 and 3
This family of class II histone deacetylase complex subunits HDA2 and HDA3 is found in fungi, The member from S. pombe is referred to as Ccq1 in Swiss:Q10432. These proteins associate with HDA1 to generate the activity of the HDA1 histone deacetylase complex. HDA1 interacts with itself and with the HDA2-HDA3 subcomplex to form a probable tetramer and these interactions are necessary for catalytic activity. The HDA1 histone deacetylase complex is responsible for the deacetylation of lysine residues on the N-terminal part of the core histones (H2A, H2B, H3 and H4). Histone deacetylation gives a tag for epigenetic repression and plays an important role in transcriptional regulation, cell cycle progression and developmental events. HDA2 and HDA3 have a conserved coiled-coil domain towards their C-terminus .. +PF11497 NADH-quinone oxidoreductase chain 15
This protein, Nqo15, is a part of respiratory complex 1 which is a complex that plays a central role in cellular energy production in both bacteria and mitochondria. Nqo15 has a similar fold to Frataxin, the mitochondrial iron chaperone. This protein may have a role in iron-sulphur cluster regeneration in the complex. This domain represents more than half the molecular mass of the entire complex .. +PF11498 Transcriptional activator LAG-3
The C.elegans Notch pathway, involved in the control of growth, differentiation and patterning in animal development, relies on either of the receptors GLP-1 or LIN-12 . Both these receptors promote signalling by the recruitment of LAG-3 to target promoters, where it then acts as a transcriptional activator. LAG-3 works as a ternary complex together with the DNA binding protein, LAG-1 .. +PF11500 Spindle pole body formation-associated protein
This is the central coiled-coil region of cut12 also found in other fungi, barring S. cerevisiae. The full protein has two predicted coiled-coil regions, and one consensus phosphorylation site for p34cdc2 and two for MAP kinase. During fission yeast mitosis, the duplicated spindle pole bodies (SPBs) nucleate microtubule arrays that interdigitate to form the mitotic spindle. Cut12 is localised to the SPB throughout the cell cycle, predominantly around the inner face of the interphase SPB, adjacent to the nucleus . Cut12 associates with Fin1 and is important in this context for the activity of Plo1 .. +PF11501 Non structural protein Nsp1
Nsp1 is the N-terminal cleavage product from the viral replicase that mediates RNA replication and processing . The specific function of the protein is unknown however the structure has been determined. The protein has a novel alpha/beta fold formed by a 6 stranded beta barrel with an alpha helix covering one end of the barrel and another helix alongside the barrel . Nsp1 could be involved in the degradation of mRNA.. +PF11502 B-cell lymphoma 9 protein
The Wnt pathway plays a role in embryonic development, stem cell growth and tumorigenesis. BCL9 associates with beta-catenin and Tcf in the nucleus when the Wnt pathway is stimulated leading to the transactivation of Wnt target genes .. +PF11503 Protein of unknown function (DUF3215)
This family of proteins with unknown function appears to be restricted to Saccharomycetaceae.. +PF11504 Colicin Ia
Colicins are toxic molecules secreted to kill other bacteria in times of stress. Colicin Ia kills susceptible E.coli cells by binding to the colicin I receptor leading to the formation of a voltage-dependant ion channel. The protein can be divided into three domains, a translocation domain, a receptor binding domain and a channel forming domain .. +PF11505 Protein of unknown function (DUF3216)
This family of archaeal proteins with unknown function appears to be restricted ton Thermococcaceae.. +PF11506 Protein of unknown function (DUF3217)
This family of proteins with unknown function appears to be restricted to Mycoplasma. Some members in this family of proteins are annotated as MG376 however this cannot be confirmed.. +PF11507 Ebola virus-specific transcription factor VP30
VP30 is a nucleocapsid-associated Ebola virus-specific transcription factor . It acts by stabilising nascent mRNA in Ebola virus replication. The C terminal domain of VP30 folds into a dimeric helical assembly. VP30 assembles into hexamers in solution by an N-terminal oligomerisation domain which activates the transcription function of the protein. The oligomerisation is mediated by hydrophobic amino acids at 94-112 .. +PF11508 Protein of unknown function (DUF3218)
This family of proteins with unknown function appears to be restricted to Pseudomonas.. +PF11510 Fanconi Anaemia group E protein FANCE
Fanconi Anaemia (FA) is a cancer predisposition disorder. In response to DNA damage, the FA core complex monoubiquitinates the downatream FANCD2 protein. The protein FANCE has an important role in DNA repair as it is the FANCD2-binding protein in the FA core complex so it represents the link between the FA core complex and FANCD2 . The sequence shown is the C terminal domain of the protein which consists predominantly of helices and does not contain any beta-strand. The fold of the polypeptide is a continuous right-handed solenoidal pattern from the N terminal to the C terminal end .. +PF11511 Intrinsic membrane protein PufX
PufX organises RC-LH1, the photosynthesis reaction centre-light harvesting complex 1 core complex of Rhodobacter sphaeroides . It also facilitates the exchange of quinol for quinone between the reaction centre and cytochrome bc(1) complexes. In organic solvent, PufX contains two hydrophobic helices which are flanked by unstructured regions and connected by a helical bend .. +PF11512 Agrobacterium tumefaciens protein Atu4866
Atu4866 is a protein with unknown function from Agrobacterium tumefaciens however the structure has been determined. Atu4866 adopts a streptavidin-like fold and has a beta-barrel/sandwich which is formed by eight antiparallel beta-strands . Atu4866 has a potential ligand-binding site where is has a stretch of conserved residues on the surface .. +PF11513 Thermoplasma acidophilum protein TA0956
TA0956 is a protein from Thermoplasma acidophilum which currently has no known function however the structure has been determined. The protein has a two-layered alpha/beta-sandwich topology and is a putative Elongation factor 1-alpha binding motif .. +PF11514 Protein of unknown function (DUF3219)
This family of proteins with unknown function appears to be restricted to Bacillaceae. Some members in this family of proteins are annotated as YkvR however this cannot be confirmed.. +PF11515 Mouse development and cellular proliferation protein Cullin-7
The Cullin Ring Ligase family member, Cul7, is required for normal mouse development and cellular proliferation. Cul7 has a CPH domain which is a p53 interaction domain. The CPH domain interaction surface of P53 is present in the tetramerisation domain .. +PF11516 Protein of unknown function (DUF3120)
This family of proteins with unknown function appears to be restricted to Bordetella.. +PF11517 Nuclear abundant poly(A) RNA-bind protein 2 (Nab2)
Nab2 is a yeast heterogeneous nuclear ribonucleoprotein that modulates poly(A) tail length and mRNA. This is the N terminal domain of the protein which mediates interactions with the C-terminal globular domain, Myosin-like protein 1 and the mRNA export factor, Gfd1 .The N-terminal domain of Nab2 shows a structure of a helical fold. The N terminal domain of Nab2 is thought to mediate protein protein interactions that facilitate the nuclear export of mRNA . An essential hydrophobic Phe73 patch on the N terminal domain is thought to be a important component of the interface between Nab2 and Mlp1 .. +PF11518 Protein of unknown function (DUF3221)
This family of proteins with unknown function appears to be restricted to Bacillus. Some members in this family of proteins are annotated as YobA however this cannot be confirmed. YobA is a protein with unknown function.. +PF11519 Protein of unknown function (DUF3222)
This family of proteins with unknown function appears to be restricted to Rhodopseudomonas.. +PF11520 Chromatin protein Cren7
Cren7 is a chromatin protein found in Crenarchaeota and has a higher affinity for double-stranded DNA than for single-stranded DNA. The protein contains negative DNA supercoils and is associated with genomic DNA in vivo.Cren7 interacts with duplex DNA through a beta-sheet and a long flexible loop. The function has not been completely determined but it is thought that the protein may have a role similar to that of archaeal proteins in Euryarchaea .. +PF11521 C-terminal general transcription factor TFIIE alpha
TFIIE is compiled of two subunits, alpha and beta. This family of proteins are the C terminal domain of the alpha subunit of the protein which is the largest subunit and contains several functional domains which are important for basal transcription and cell growth. The C terminal end of the protein binds directly to the amino-terminal PH domain of p62/Tfb1 (of IIH) which is involved in the recruitment of the general transcription factor IIH to the transcription preinitiation complex. P53 competes for the same binding site as TFIIE alpha which shows their structural similarity. Like p53, TFIIE alpha 336-439 can activate transcription in vivo .. +PF11522 Yeast phosphatidylinositol-4-OH kinase Pik1
Pik1 is a regulator of membrane traffic and participates in the mating-pheromone signal-transduction cascade. The protein is localised to the nucleus and cytoplasm in the Golgi. Pik1 is thought to have an actin-independent role in membrane transport .. +PF11523 Protein of unknown function (DUF3223)
This family of proteins has no known function.. +PF11524 Selenium binding protein
Selenium is an important nutrient that needs to be regulated since lack of the nutrient leads to cell abnormalities and high concentrations are toxic.\. SeBP regulates the level of free selenium in the cell by sequestering the nutrient during transport. SeBP acts as a pentamer and delivers the selenium to the selenophosphate synthetase enzyme . Each subunit is composed of an alpha helix on top of a four stranded twisted ss sheet, stabilised by hydrogen bonds .. +PF11525 Copper resistance protein K
CopK is a periplasmic dimeric protein which is strongly up-regulated in the presence of copper, leading to a high periplasmic accumulation . CopK has two different binding sites for Cu(I), each with a different affinity for the metal. Binding of the first Cu(I) ion induces a conformational change of CopK which involves dissociation of the dimeric apo-protein. Binding of a second Cu(I) further increases the plasticity of the protein. CopK has features that are common with functionally related proteins such as a structure consisting of an all-beta fold and a methionine-rich Cu(I) binding site .. +PF11526 Subunit of cleavage factor IA Pcf11
Pcf11 is a subunit of an essential polyadenylation factor in Saccharomyces cerevisiae, CFIA. Pcf11 binds to Clp1, another subunit of CFIA whose interaction is responsible for maintaining a tight coupling between the Clp1 nucleotide binding subunit and the other components of the polyadenylation machinery .. +PF11527 The ARF-like 2 binding protein BART
BART binds specifically to ARL2.GTP with a high affinity however it does not bind to ARL2.GDP. It is thought that this specific interaction is due to BART being the first identified ARL2-specific effector. The function is not completely characterised . BART is predominantly cytosolic but can also be found to be associated with mitochondria. BART is also involved in binding to the adenine nucleotide transporter ANT1 .. +PF11528 Protein of unknown function (DUF3224)
This bacterial family of proteins has no known function.. +PF11529 Melampsora lini avirulence protein AvrL567-A
AvrL567-A is a protein from the fungal pathogen flax which induces plant disease resistance in flax plants . The protein has a novel fold .. +PF11530 Minor type IV pilin, PilX
PilX is a protein from Neissaria meningitidis which is crucial for the formation of bacterial aggregates and adhesion to human cells . The structure of PilX is similar to all pilins as it has the common alpha/beta roll fold. PilX subunits have surface-exposed motifs which are thought to stabilise bacterial aggregates against pilus retraction. It also illustrates how a minor pilus component can modulate the virulence properties of pili which have a simple composition and structure .. +PF11531 Coactivator-associated arginine methyltransferase 1 N terminal
CARM1 is an arginine methyltransferase which methylates a variety of different proteins and plays a role in gene expression. This is the N terminal domain of the protein which has a PH domain, normally present to regulate protein-protein interactions.A molecular switch is also present on the N terminal domain .. +PF11532 Heterogeneous nuclear ribonucleoprotein M
HnRNP M is a splicing regulatory factor that binds to the auxiliary RNA cis-element ISE/ISS-2 which promotes splicing of exon IIIb and silencing of exon IIIC in the fibroblast growth factor receptor 2 (FGFR2) . By binding to ISE/ISS-3, HnRNP M plays a role in the regulation of alternative splicing in FGFR2 as it induces exon skipping and promotes exon inclusion .. +PF11533 Protein of unknown function (DUF3225)
This bacterial family of proteins has no known function.. +PF11534 Hexameric tyrosine-coordinated heme protein (HTHP)
HTHP is from the marine bacterium Silicibacter pomeroyi and has peroxidase and catalase activity. HTHP consists of six monomers which each binds a solvent accessible heme group and is stabilised by the interaction of three neighbouring monomers . The heme iron is penta-coordinated with a tyrosine residue as proximal ligand .. +PF11535 Calcium binding
CcbP is a Ca(2+) binding protein which, in Anabaena, is thought to bind Ca(2+) by protein surface charge. When bound to Ca(2+), the protein becomes more compact and the level of free calcium decreases. The free Ca(2+) concentration which is regulated by CcbP is critical for the differentiation process . Calcium signalling is widespread in bacterial species, and prokaryotic cells like eukaryotes are equipped with all the elements to maintain Ca2+ homeostasis .. +PF11536 Protein of unknown function (DUF3226)
This archaeal family of proteins has no known function.. +PF11537 Protein of unknown function (DUF3227)
This archaeal family of proteins has no known function.. +PF11539 Protein of unknown function (DUF3228)
This family of proteins has no known function.. +PF11538 Snurportin1
Snurportin1 is a novel nuclear import receptor which contains an N-terminal importin beta binding domain which is essential for its function of a snRNP-specific nuclear import receptor . Snurportin1 interacts with m3G-cap where it enhances the m3G-cap dependent nuclear import of U snRNPs in Xenopus laevis oocytes and digitonin-permeabilized HeLa cells .. +PF11540 Cytoplasmic dynein 1 intermediate chain 2
Intermediate chain IC 2 forms part of the complex cytoplasmic dynein 1 along with a heavy chain (HC), two light intermediate chains (LICs) and three light chains (LCs). The complex is responsible for hydrolysing ATP to generate force toward the minus end of microtubules . IC binds to the HC via the N terminal binding domain on the HC and ICs contain binding sites for the LCs. The ICs are responsible for binding to kinetochores and the Golgi apparatus through an interaction with the p150Glued subunit of dynactin which is another complex . . +PF11542 Mitochondrial division protein 1
Mdv1 is a component of the mitochondrial fission machinery in Saccharomyces cerevisiae. The protein is also involved in peroxisome proliferation . Mdv1 along with Fis1 is also involved in controlling Dnm-1 dependant devision, a GTPase involved in the mediation of mitochondrial division. In this role, Mdv1 is the linker between Fis1 and Dnm1. Mdv1 plays a key role in the regulation of Dnm1 self-assembly .. +PF11543 Nuclear pore localisation protein NPL4
Npl4 is part of the heterodimer UN along with Ufd1 which is involved in the recruitment of p97, an AAA ATPase, for tasks involving the ubiquitin pathway. Npl4 has a ubiquitin-like domain which has within its structure a beta-grasp fold with a helical insert .. +PF11544 Spindle pole body component Spc42p
Spc42p is a 42-kD component of the S.cerevisiae spindle body that localises to the electron dense central region of the SPB .Spc42p is a phosphoprotein which forms a polymeric layer at the periphery of the SPB central plaque. This functions during SPB duplication and also facilitates the attachment of the SPB to the nuclear membrane .. +PF11545 Cell surface heme-binding protein Shp
Shp is part of a complex which functions in heme uptake in Streptococcus pyogenes. During which, Shp transfers its heme to HtsA which is a component of an ABC transporter. The heme binding region of Shp contains an immunoglobulin-like beta-sandwich fold and has a unique heme-iron coordination with the axial ligands being two methionine residues from the same Shp molecule . Surrounding the heme pocket, there is a negative surface which may serve as a docking interface for heme transfer .. +PF11546 Staphylococcal complement inhibitor SCIN
SCIN is released by Staphylococcus aureus to counteract the host immune defense. The protein binds to and inhibits C3 convertases on the bacterial surface, reducing phagocytosis and blocking downstream effector functions by C3b deposition on its surface . An 18 residue stretch 31-48 is crucial for SCIN activity .. +PF11547 E3 ubiquitin ligase EDD
EDD, the ER ubiquitin ligase from the HECT ligases, contains an N-terminal ubiquitin-associated domain which binds ubiquitin. Ubiquitin is recognised by helices alpha-1 and -3 in in the UBA domain. EDD is involved in DNA damage repair pathways and binds to mono-ubiquitinated proteins .. +PF11548 Protein-tyrosine phosphatase receptor IA-2
IA-2 is a protein-tyrosine phosphatase receptor that upon exocytosis, the cytoplasmic domain is cleaved and moves to the nucleus where it enhances transcription of the insulin gene . The mature exodomain of IA-2 participates in adhesion to the extracellular matrix and is self-proteolyzed in vitro by reactive oxygen species which may be a new shedding mechanism .. +PF11549 Protein transport protein SEC31
Sec31 is involved in COPII coat formation as it forms through the sequential binding of three cytoplasmic proteins: Sar1, Sec23/24 and Sec13/31. Sec13/31 is recruited by the pre-budding complex and polymerisation of Sec13/31 occurs to form an octahedral cage that is the outer shell of the COPII coat . Sec13/31 is a hetero-tetramer which is organised as a linear array of alpha-solenoid and beta-propeller domains to form a rod in which twenty-four copies assemble to form the COPII cub-octahedron .. +PF11550 Intracellular growth locus C protein
IglC protein is involved in the escape of F.tularensis live vaccine strain . It has been shown that the expression of IglC is essential for F.tularensis to induce macrophage apoptosis . IglC adopts a beta-sandwich conformation that has no similarity to any known protein structure .. +PF11551 Outer membrane protein Omp28
Omp28 is a 28-kDa outer membrane protein from Porphyromonas gingivalis. Omp28 is thought to be a surface adhesion/receptor protein. Omp28 is expressed in a wide distribution of P.gingivalis strains .. +PF11553 Protein of unknown function (DUF3231)
This bacterial family of proteins has no known function.. +PF11554 Protein of unknown function (DUF3232)
This bacterial family of proteins has no known function.. +PF11555 EGFR receptor inhibitor Mig-6
When the kinase domain of EGFR binds to segment one of Mitogen induced gene 6 (Mig-6), EGFR becomes inactive due to the conformation it adopts which is Src/CDK like. The binding of the two proteins prevents EGFR acting as a cyclin-like activator for other kinase domains .The structure of Mig-6(1) consists of alpha helices-G and -H with a polar surface and hydrophobic residues for interactions with EGFR. A critical step for the activation of EGFR is the formation of an asymmetric dimer involving the kinase domains of the protein. Since Mig-6 binds to the kinase domain it blocks this process and EGFR becomes inactive .. +PF11556 Erythrocyte binding antigen 175
EBA-175 is involved in the formation of a tight junction, a necessary step in invasion. This family represents the region VI which is a cysteine rich domain essential for EBA-175 trafficking. The structure is a homodimer that contains a five-alpha-helical core stabilised by four disulphide bridges .. +PF11557 Protein of unknown function (DUF3233)
Pfam-B_5068 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11558 Het-s 218-289
This family of proteins is residues 218-289 of Het-s, a protein of Podospora anserina. Het-s plays a role in heterokaryon incompatibility which prevents different forms of parasitism . This region of the protein is the C-terminal end and is unstructured in solution but forms infectious fibrils in vitro which has a structure consisting of a left-handed beta solenoid which contains two windings per molecule .. +PF11559 Afadin- and alpha -actinin-Binding
This family is found in mammals where it is localised at cell-cell adherens junctions , and in Sch. pombe and other fungi where it anchors spindle-pole bodies to spindle microtubules . It is a coiled-coil structure, and in pombe, it is required for anchoring the minus end of spindle microtubules to the centrosome equivalent, the spindle-pole body. The name ADIP derives from the family being composed of Afadin- and alpha -Actinin-Binding Proteins Localised at Cell-Cell Adherens Junctions.. +PF11560 Lamina-associated polypeptide 2 alpha
LAPs are components of the nuclear lamina which supports the nuclear envelope.LAP2alpha is a non-membrane-associated member of the LAP family which is unique. This family of proteins is the C terminal domain of LAP2alpha which consists of residues 459-693 and constitutes a dimeric structure with an antiparallel coiled coil. LAP2alpha is involved in cell-cycle regulation and chromatin organisation and preferentially binds to lamin A/C .. +PF11561 Single strand annealing-weakened 1
This family of yeast proteins is involved in single-strand-annealing, or SSA. SSA entails multiple steps: end resection and ssDNA formation; annealing of complementary ssDNAs; removal of 3' single-stranded non-homologous tails; gap fill-in synthesis; and ligation. Saw1 in combination with Slx4 catalyses the 3' non-homologous tail removal during recombination. Saw1 interacts physically with Rad1/Rad10, Msh2/Msh3, and Rad52 proteins, and works by targeting Rad1/Rad10 to Rad52-coated recombination intermediates .. +PF11563 Protoglobin
Pollington J, Eberhardt R. This family includes protoglobin from Methanosarcina acetivorans C2A. It is also found near the N-terminus of the Haem-based aerotactic transducer HemAT in Bacillus subtilis (Swiss:O07621). It is part of the haemoglobin superfamily. Protoglobin has specific loops and an amino-terminal extension which leads to the burying of the haem within the matrix of the protein. Protoglobin-specific apolar tunnels allow the access of O2, CO and NO to the haem distal site . In HemAT it acts as an oxygen sensor domain .. +PF11564 Restriction endonuclease BpuJI - N terminal
BpuJI is a restriction endonuclease which recognises the asymmetric sequence 5'-CCCGT and cuts at multiple sites in the surrounding area of the target sequence. This family of proteins is the N terminal domain of BpuJI which has DNA recognition functions. The recognition domain has two subdomains D1 and D2. The recognition of the target sequence occurs through major groove contacts of amino acids on the helix-turn-helix region and the N-terminal arm .. +PF11565 Alpha helical Porin B
Porin B is a porin from Corynebacterium glutamicum which allows the exchange of material across the mycolic acid layer which is the protective nonpolar barrier. Porin B has an alpha helical core structure consisting of four alpha-helices surrounding a nonpolar interior. There is a disulphide bridge between helices 1 and 4 to form a stable covalently bound ring . The channel of PorB is oligomeric .. +PF11566 Inhibitor_PI31;
PI31 proteasome regulator N-terminal. PI31 is a regulatory subunit of the immuno-proteasome which is an inhibitor of the 20 S proteasome in vitro.PI31 is also an F-box protein Fbxo7.Skp1 binding partner which requires an N terminal FP domain in both proteins for the interaction to occur via the FP beta sheets. The structure of PI31 FP domain contains a novel alpha/beta-fold and two intermolecular contact surfaces . This is the N-terminal domain of the members.. +PF11567 Plasmodium falciparum UIS3 membrane protein
UIS3 is a membrane protein essential for sporozoite development in infected hepatocytes. This family is 130-229 of the Plasmodium falciparum UIS3 protein which is compact and has an all alpha-helical structure.PfUIS3(130-229) interacts with lipids, phospholipid lysosomes, the human liver fatty acid-binding protein and with the lipid phosphatidylethanolamine. The interaction with liver fatty acid-binding protein provides the parasite with a method to import essential fatty acids/lipids during rapid growth phases of sporozoites .. +PF11568 Mediator complex subunit 29
Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-active part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function . Med29, along with Med11 and Med28, in mammals, is part of the core head-region of the complex. Med29 is the apparent orthologue of the Drosophila melanogaster Intersex protein, which interacts directly with, and functions as a transcriptional coactivator for, the DNA-binding transcription factor Doublesex, so it is likely that mammalian Med29 serves as a target for one or more DNA-binding transcriptional activators .. +PF11569 Homeodomain leucine-zipper encoding, Homez
Homez contains two leucine zipper-like motifs and an acidic domain and belongs to the superfamily of homeobox-containing proteins. The presence of leucine zippers suggests that Homez can function as a homo or heterodimer in the nucleus . It is thought that the first leucine zipper and homeodomain 1 (HD1)of Homez is responsible for dimerisation and HD2 has a specific DNA-binding activity. Homez is also thought to function as a transcriptional repressor due to the acidic region in its C-terminal domain . Homez is involved in a complex regulatory network .. +PF11570 Coiled-coil receptor-binding R-domain of colicin E2
E2 is a DNase which utilises the outer membrane receptor BtuB to bind to and enter the cell. This family of proteins is E2R135 (residues 321-443) which is the part of E2 which is responsible for binding to BtuB in a coiled coil formation .. +PF11571 Mediator complex subunit 27
Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species {1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function . Mediator exists in two major forms in human cells: a smaller form that interacts strongly with pol II and activates transcription, and a large form that does not interact strongly with pol II and does not directly activate transcription. The ubiquitous expression of Med27 mRNA suggests a universal requirement for Med27 in transcriptional initiation. Loss of Crsp34/Med27 decreases amacrine cell number, but increases the number of rod photoreceptor cells .. +PF11572 Protein of unknown function (DUF3234)
This bacterial family of proteins has no known function. Some members in this family of proteins are annotated as TTHA0547 however this cannot be confirmed.. +PF11573 Mediator complex subunit 23
Med23 is one of the subunits of the Tail portion of the Mediator complex that regulates RNA polymerase II activity. Med23 is required for heat-shock-specific gene expression, and has been shown to mediate transcriptional activation of E1A in mice.. +PF11574 Protein of unknown function (DUF3235)
Some members in this family of proteins with unknown function are annotated as RpfA however this cannot be confirmed.. +PF11575 FhuF 2Fe-2S C-terminal domain
Pfam-B_11690 (release 9.0). This family consists of several bacterial ferric iron reductase protein (FhuF) sequences.\. FhuF is involved in the reduction of ferric iron in cytoplasmic ferrioxamine B . This domain is the C-terminal domain that contains 4 conserved cysteine residues that are found to be part of a 2Fe-2S cluster .. +PF11576 Protein of unknown function (DUF3236)
This family of proteins with unknown function appears to be restricted to Methanobacteria.. +PF11577 NF-kappa-B essential modulator NEMO
NEMO is a regulatory protein which is part of the IKK complex along with the catalytic IKKalpha and beta kinases. The IKK complex phosphorylates IkappaB targeting it for degradation which results in the release of NF-kappaB which initiates the inflammatory response, cell proliferation or cell differentiation . NEMO activates the IKK complex's activity by associating with the unphosphorylated IKK kinase C termini.The core domain of NEMO is a dimer which binds to two fragments of IKK .. +PF11578 Protein of unknown function (DUF3237)
This family of proteins has no known function. +PF11579 Protein of unknown function (DUF3238)
This family of proteins with unknown function appears to be restricted to Bacillus cereus.. +PF11580 Protein of unknown function (DUF3239)
This bacterial family of proteins may be membrane proteins however this cannot be confirmed. Currently there is no known function.. +PF11581 Antagonist of EGFR signalling, Argos
Argos is a natural secreted antagonist of EGFR signalling which functions by binding growth factor ligands that activate EGFR by forming a clamp like structure using three disulphide-bonded beta-sheet domains .. +PF11582 Protein of unknown function (DUF3240)
This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11583 P-aminobenzoate N-oxygenase AurF
AurF is a metalloenzyme which is involved in the biosynthesis of antibiotic aureothin by catalysing the formation of p-nitrobenzoic acid from p-aminobenzoic acid. AurF is a non-heme di-iron monooxygenase which creates nitroarenes via the sequential oxidation of aminoarenes .. +PF11584 Proteinaceous host-selective toxin ToxA
ToxA is produced by particular Pyrenophora tritici-repentis races and is a proteinaceous host-selective toxin. It is necessary and sufficient to cause cell death in sensitive wheat cultivars .ToxA adopts a single-domain, beta-sandwich fold which has novel topology. The protein is directly involved in recognition events required for ToxA action. It is thought to be distantly related to FnIII proteins, gaining entry to the host via an integrin-like receptor .. +PF11585 Insect antimicrobial peptide, stomoxyn
Stomoxyn, localised in the gut epithelium, is an insect antimicrobial peptide which functions in killing a range of microorganisms, parasites and some viruses. Stomoxyn has a structure consisting of a random coil in water however in TFE it adopts a stable helical structure. Stomoxyn is thought to have a similar function to cecropin A from Hyalophora cecropia due to structural similarities .. +PF11586 Protein of unknown function (DUF3242)
This protein from Thermotoga maritima is a hypothetical ORFan protein, TM1622, whose structure has been determined. The protein is composed of seven beta strands and three alpha helices .. +PF11587 Major prion protein bPrPp - N terminal
This family represents the N-terminal domain (1-30) of the bovine prion protein (bPrPp). The proteins structure consists of mainly alpha helices. BPrPp forms a stable helix which inserts in a transmembrane location in the bilayer, with the N -terminal (1-30) functioning as a cell-penetrating peptide .. +PF11588 Protein of unknown function (DUF3243)
This family of proteins with unknown function appears to be restricted to Firmicutes.. +PF11589 Domain of unknown function (DUF3244)
This domain adopts an immunoglobulin-like beta-sandwich fold and structurally is most similar to fibronectin.. +PF11590 DNA polymerase catalytic subunit Pol
This family of proteins represents the catalytic subunit, Pol, of the Herpes simplex virus DNA polymerase. Pol binds UL42, making up the DNA polymerase. UL42 is a processivity subunit which binds to the C-terminal of Pol in a similar way that the cell cycle regulator p21 binds to PCNA .. +PF11591 Ferredoxin chloroplastic transit peptide
The structure of chloroplast ferredoxin in water is unstructured however in a 30:70 molar-ratio mixture of 2,2,2-trifluoroethanol, residues 3 to 13 form an alpha-helix. The rest of the peptide remains unstructured . This family is the N-terminal of the [2Fe-2S) ferredoxin from C.reinhardtii. This protein catalyses the final reaction in a pathway which allows the production of H(2) from water in the chloroplast . . +PF11592 Central core of the bacterial effector protein AvrPto
This family of proteins represents the bacterial effector protein AvrPto from Pseudomonas syringae. This is the central core region of the protein which consists of a three-helix bundle motif. AvrPto is part of a type III secretion system from P.syringae which is involved in the bacterial speck disease of tomato. In resistant plants, AvrPto interacts with the host Pto kinase, which elicits an antibacterial defense response. In plants lacking resistance, the Pto kinase is not present and AvrPto acts as a virulence factor, promoting bacterial growth .. +PF11593 Mediator complex subunit 3 fungal
Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function . Mediator subunit Hrs1/Med3 is a physical target for Cyc8-Tup1, a yeast transcriptional co-repressor .. +PF11594 Mediator complex subunit 28
Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function . Subunit Med28 of the Mediator may function as a scaffolding protein within Mediator by maintaining the stability of a submodule within the head module, and components of this submodule act together in a gene-regulatory programme to suppress smooth muscle cell differentiation. Thus, mammalian Mediator subunit Med28 functions as a repressor of smooth muscle-cell differentiation, which could have implications for disorders associated with abnormalities in smooth muscle cell growth and differentiation, including atherosclerosis, asthma, hypertension, and smooth muscle tumours .. +PF11595 Protein of unknown function (DUF3245)
This is a family of proteins conserved in fungi. The function is not known, and there is no S. cerevisiae member.. +PF11596 Protein of unknown function (DUF3246)
This is a small family of fungal proteins one of whose members, Swiss:A3LUS4 from Pichia stipitis is described as being an extremely serine rich protein-mucin-like protein.. +PF11597 Mediator complex subunit 13 N-terminal
Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function. Med13 is part of the ancillary kinase module, together with Med12, CDK8 and CycC, which in yeast is implicated in transcriptional repression, though most of this activity is likely attributable to the CDK8 kinase. The large Med12 and Med13 proteins are required for specific developmental processes in Drosophila, zebrafish, and Caenorhabditis elegans but their biochemical functions are not understood .. +PF11598 Cartilage oligomeric matrix protein
This family of proteins represents the five-stranded coiled-coil domain of cartilage oligomeric matrix protein (COMP). This region has a binding site between two internal rings formed by Leu37 and Thr40 . +PF11599 RRNA methyltransferase AviRa
This family of proteins represents the methyltransferase AviRa from Streptomyces viridochromogenes. This protein mediates the resistance to the antibiotic avilamycin. AviRa methylates a specific guanine base within the peptidyl-transferase loop of the 23S ribosomal RNA .. +PF11600 Chromatin assembly factor 1 complex p150 subunit, N-terminal
CAF-1_p150 is a polypeptide subunit of CAF-1, which functions in depositing newly synthesised and acetylated histones H3/H4 into chromatin during DNA replication and repair . CAF-1_p150 includes the HP1 interaction site, the PEST, KER and ED interacting sites. CAF-1_p150 interacts directly with newly synthesised and acetylated histones through the acidic KER and ED domains. The PEST domain is associated with proteins that undergo rapid proteolysis .. +PF11601 Shal-type voltage-gated potassium channels
This family of proteins represents Shal-type voltage-gated potassium channels which interact with Kv channel-interacting proteins to modulate cell surface expression and function of Kv4 channels. The interaction of the Shal-type protein Kv4.2 and the Kv interacting protein KChiP1 forms a structure which is like the structure between calmodulin and its target peptides when they interact. Interactions of an N terminal alpha helix in Kv4.2 and a C terminal alpha helix in KChIP1 are essential for the modulation of Kv4.2 by KChIPs .. +PF11602 ATPase P4 of dsRNA bacteriophage phi-12
P4 is a packaging motor which is involved in the packaging of phi-12 genome into preformed capsids using ATP. P4 is located at the vertices of the icosahedral capsid. ATP drives RNA translocation through cooperative conformational changes .. +PF11603 Regulatory protein Sir1
Sir1p interacts with the BAH domain of the Orc1p subunit of the origin recognition complex (ORC) resulting in the establishment of silent chromatin at HMR and HML in S.cerevisiae . The amino acids from the ORC interaction region of Sir1p are presented on a conserved, convex surface that forms a complementary interface with the Orc1 BAH domain, critical for transcriptional silencing .. +PF11604 Copper binding periplasmic protein CusF
CusF is a periplasmic protein involved in copper and silver resistance in Escherichia coil. CusF forms a five-stranded beta-barrel OB fold. Cu(I) binds to H36, M47 and M49 which are conserved residues in the protein .. +PF11605 Vacuolar protein sorting protein 36 Vps36
Vps36 is a subunit of ESCRT-II, a protein involved in driving protein sorting from endosomes to lysosomes. The GLUE domain of Vps36 allows for a tight interaction to occur between the protein and Vps28, a subunit of ESCRT-I. This interaction is critical for ubiquitinated cargo progression from early to late endosomes .. +PF11606 Family 31 carbohydrate binding protein
This family of proteins represents the family 31 carbohydrate-binding module of beta-1,2-xylanase. This protein is from Alcaligenes sp. strain XY-234. The AlcCBM31 module makes a beta-sandwich structure with an immunoglobulin fold and contains two intra-molecular disulfide bonds. AlcCBM31 shows affinity with only beta-1,3-xylan .. +PF11607 Protein of unknown function (DUF3247)
This family of proteins is the protein product of the gene XC5848 from Xanthomonas campestris. The protein has no known function however its structure has been determined. The protein adopts a Lsm fold however differences with the fold were observed at the N-terminal and internal regions .. +PF11608 Limkain b1
This family of proteins represents Limkain b1, which is a novel human autoantigen, localised to a subset of ABCD3 and PXF marked peroxisomes. Limkain b1 may be a relatively common target of human autoantibodies reactive to cytoplasmic vesicle-like structures .. +PF11609 Protein of unknown function (DUF3248)
This family of proteins is thought to be the product of the gene TT1592 from Thermus thermophilus however this cannot be confirmed. Currently there is no known function.. +PF11610 Scaffold protein Ste5-Fus5 binding region
This family of proteins represents the Fus5 binding region of Ste5. Ste5 functions in the yeast mating pathway and is required for signalling through the mating response MAPK pathway. Ste5 has separate binding sites for each member of the MAPK cascade. This region of Ste5 allosterically activates autophosphroylation of Fus3, a mitogen-activated protein kinase. Auto-activated Fus3 has a negative regulatory role, and promotes Ste5 phosphorylation which leads to a decrease in pathway transcriptional output .. +PF11611 TRF2;
Domain of unknown function (DUF4352). Members of these family are poutative lipoproteins that fall into the Antigen MPT63/MPB63 (immunoprotective extracellular protein) superfamily.. +PF11612 GspJ;
Type II secretion system (T2SS), protein J. Pollington J, Desvaux M. The T2SJ proteins are pseudopilins, which are targeted to the membrane in E. Coli. T2SJ forms a complex with T2SI (Pfam:PF02501) and T2SK (Pfam:PF03934) which is part of the Type II secretion apparatus involved in the translocation of proteins across the outer membrane in E.coli. The T2SK-I-J complex has quasihelical characteristics .. +PF11613 Agonist of corticotropin releasing factor R2, Urocortin-2
This family of proteins represents urocortin 2, a member of the corticoliberin family which is a selective agnonist of corticotropin releasing factor 2. The backbone of the protein is mainly alpha-helical but it contains a helix-loop-helix motif .. +PF11614 Bre5;
IG-like fold at C-terminal of FixG, putative oxidoreductase. Pollington J, Coggill P. This domain is part of a transmembrane protein, FixG, itself part of the FixGHIS operon closely associated with the FixNOPQ operon that is the symbiotically essential cbb3-type haem-copper oxidase complex. FixG expression is induced by oxygen-deprivation. This C-terminal domain adopts an E-set Ig-like fold.. +PF11615 Protein of unknown function (DUF3249)
This family of proteins represents the gene product of the protein CAF4, the yeast protein YKR036c. This protein contains seven WD40 repeats in its C terminus. The function however is unknown .. +PF11616 WD repeat binding protein EZH2
This family of proteins represents Enhancer of zest homolog 2, (EZH2) a 30 residue peptide which binds to a WD-repeat domain of EED by residues 39-68. EED is a component of PRC2 complex which is involved in gene expression . This interaction is required for the HMTase activity of PCR2 .. +PF11617 Protein metal binding site
This family of proteins represents a unique protein copper binding site that involves a tryptophan metabolite, kynurenine in the protein MopE. The production of kyneurenin by modification of tryptophan and its involvement in copper binding is an innate property of MopE .. +PF11618 Protein of unknown function (DUF3250)
This family of proteins represents a protein with unknown function. It may be the C2 domain from KIAA1005 however this cannot be confirmed.. +PF11619 Transcription factor P53 - C terminal domain
This family of proteins is the C terminal domain of the transcription factor P53. While the rest of the protein is quite conserved between the different transcription factors such as p53 and p73, the C terminal domain is highly divergent. The DM-p53 structure is characterized by an additional N-terminal beta-strand and a C-terminal helix .. +PF11620 GA-binding protein alpha chain
This family of proteins represents the transcription factor GABP alpha. This alpha domain is a five-stranded beta-sheet crossed by a distorted helix termed an OST domain. The surface of the GABP alpha OST domain contains two clusters of negatively-charged residues suggesting there are positively-charged partner proteins. The OST domain binds to the CH1 and CH3 domains of the co-activator histone acetyltransferase CBP/p300, a direct link between GABP and transcriptional machinery has been made .. +PF11621 C3 binding domain 4 of IgG-bind protein SBI
This family of proteins represents Sbi domain IV which binds the central complement protein C3. Sbi-IV interacts with Sbi-III to induce a consumption of complement via alternative pathway activation . When not interacting with Sbi-III, Sbi-IV inhibits the alternative pathway without complement consumption. The structure of Sbi-IV consists of a three-helix bundle fold .. +PF11622 Protein of unknown function (DUF3251)
This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. Some members if this family are annotated as putative lipoprotein YajI however this cannot be confirmed.. +PF11623 Protein of unknown function (DUF3252)
This family of proteins has no known function. Some members are annotated as Ssl0352 however this cannot be confirmed. Currently there is no known function.. +PF11624 MHC class I-like protein M157
This family of proteins represents M157,a divergent form of MHC class I-like proteins which is the protein product of the mouse cytomegalovirus. This protein is unique in its ability to engage both activating (Ly49H) and inhibitory (Ly49I) natural killer cell receptors. M157 is involved in intra- and intermolecular interacts within and between its domains to form a compact MHC-like molecule .. +PF11625 Protein of unknown function (DUF3253)
This bacterial family of proteins has no known function.. +PF11626 Rap1_C; TRF2IP; Yippee-Rap1;
TRF2-interacting telomeric protein/Rap1 - C terminal domain. This family of proteins represents the C-terminal domain of the protein Rap-1, which plays a distinct role in silencing at the silent mating-type loci and telomeres . The Rap-1 C terminus adopts an all-helical fold. Rap1 carries out its function by recruiting the Sir3 and Sir4 proteins to chromatin via its C terminal domain . Rap1 is otherwise known as TRF2-interacting protein, as it is one of the six subunit components of the Shelterin complex. Shelterin protects telomere ends from attack by DNA-repair mechanisms [2,3,4,5].. +PF11627 Nuclear factor hnRNPA1
This family of proteins represents hnRNPA1, a nuclear factor that binds to Pol II transcripts. The family of hnRNP proteins are involved in numerous RNA-related activities .. +PF11628 T-cell surface glycoprotein CD3 zeta chain
The incorporation of the zetazeta signalling module requires one basic TCR alpha and two zetazeta aspartic acid TM residues . The structure of the zetazeta(TM) dimer consists of a left-handed coiled coil with polar contacts. Two aspartic acids are critical for zetazeta dimerisation and assembly with TCR .. +PF11629 C terminal SARAH domain of Mst1
This family of proteins represents the C terminal SARAH domain of Mst1. SARAH controls apoptosis and cell cycle arrest via the Ras, RASSF, MST pathway. The Mst1 SARAH domain interacts with Rassf1 and Rassf5 by forming a heterodimer which mediates the apoptosis process .. +PF11630 Protein of unknown function (DUF3254)
This family of proteins is most likely a family of anti-lipopolysaccharide factor proteins however this cannot be confirmed.. +PF11631 Protein of unknown function (DUF3255)
Members in this family of proteins are annotated as YxeF however no function is currently known. The family appears to be restricted to Bacillus.. +PF11632 Lactococcin G-beta
This family of proteins is LcnG-beta, which with LcnG-alpha constitute the two-peptide bacteriocin lactococcin G (LcnG). This family of proteins represents the N terminal domain which has an alpha-helical structure and is amphiphilic. Both peptides have a GxxxG motif which they use for interaction through a helix-helix structure .. +PF11633 Nsp3;
Single-stranded poly(A) binding domain. This family of proteins represents Nsp3c, the product of ORF1a in group 2 coronavirus. The domain exhibits a macrodomain fold containing the nsp3 residues 528 to 648, with a flexibly extended N-terminal tail from residues 513 to 527 and a C-terminal flexible tail of residues 649 to 651. SUD-M(527-651) binds single-stranded poly(A); the contact area with this RNA on the protein surface, and the electrophoretic mobility shift assays confirm that SUD-M has higher affinity for purine bases than for pyrimidine bases.. +PF11634 Nuclease inhibitor from bacteriophage T4
This family of proteins represents IPI from bacteriophage T4. This protein is a nuclease inhibitor which is injected by T4 to protect its DNA from gmrS/gmrD CT of pathogenic Escherichia coli into the infected host . The structure of this protein consists of two small beta-sheets flanked by N and C termini by alpha-helices. The protein has a gmrS/gmrD hydrophobic binding site .. +PF11635 Mediator complex subunit 16
Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function.\. Med16 is one of the subunits of the Tail portion of the Mediator complex and is required for lipopolysaccharide gene-expression . Several members including the human protein, Swiss:Q9Y2X0, have one or more WD40 domains on them, Pfam:PF00400.. +PF11636 Troponin I residues 1-32
This family of proteins represents the cardiac N-extension of troponin I. This region of the protein (1-32) interacts with the N-lobe of cTnC and modulates myofilament calcium(2) sensitivity . . +PF11637 ATP-dependant DNA helicase UvsW
This family of proteins represents the DNA helicase UvsW from bacteriophage T4. The protein is a member of the monomeric SF2 helicase superfamily and shows structural homology to the eukaryotic SF2 helicase Rad54. UvsW is thought to have a role in recombination and the rescue of stalled replication forks .. +PF11638 DnaA N-terminal domain
pdb_2jmp & Jackhmmer:B3CS04. This family of proteins represents the N-terminal domain of DnaA, a protein involved in the initiation of bacterial chromosomal replication. The structure of this domain is known . It is also found in three copies in some proteins such as Swiss:B5V0X4. The exact function of this domain is uncertain but it has been suggested to play a role in oligomerisation.. +PF11639 REDY-like protein HapK
This family of proteins represents HapK, a protein of unknown function, with two homologues PigK and RedY. The monomer structure of the protein contains a four-stranded anti parallel beta-sheet, three alpha-helices and a short C terminal tail which it uses for dimer formation . The surface of HapK has a deep cavity with consists of a kinked helix and a beta-four strand. HapK could be involved in prodigiosin biosynthesis, specifically the binding of a bipyrrole intermediate such as HBM or MBM .. +PF11640 Telomere-length maintenance and DNA damage repair
Pfam-B_6865 (release 23.0). ATM is a large protein kinase, in humans, critical for responding to DNA double-strand breaks (DSBs). Tel1, the orthologue from budding yeast, also regulates responses to DSBs. Tel1 is important for maintaining viability and for phosphorylation of the DNA damage signal transducer kinase Rad53 (an orthologue of mammalian CHK2). In addition to functioning in the response to DSBs, numerous findings indicate that Tel1/ATM regulates telomeres. The overall domain structure of Tel1/ATM is shared by proteins of the phosphatidylinositol 3-kinase (PI3K)-related kinase (PIKK) family, but this family carries a unique and functionally important TAN sequence motif, near its N-terminal, LxxxKxxE/DRxxxL. which is conserved specifically in the Tel1/ATM subclass of the PIKKs. The TAN motif is essential for both telomere length maintenance and Tel1 action in response to DNA damage . It is classified as an EC:2.7.11.1.. +PF11641 Glycosylphosphatidylinositol-anchored merozoite surface protein
This family of proteins represents the core region of Bd37, a surface antigen of B.divergens which is GPI-anchored at the surface of the merozoite. The structure of the protein consists of mainly alpha folds and has three sub domains .. +PF11642 Mite allergen Blo t 5
This family of proteins is Blo t 5, an allergen protein from Blomia tropicalis mites. This protein shoes strong reactivity with IgE in asthmatic and rhinitis patients. The structure of the protein contains three alpha helices which form a coiled-coil .. +PF11644 Protein of unknown function (DUF3256)
This family of proteins with unknown function appears to be restricted to Bacteroidales.. +PF11645 DUF3257;
PD-(D/E)XK endonuclease. This family of endonucleases includes a group I intron-encoded endonuclease . This family belongs to the PD-(D/E)XK superfamily [2-3].. +PF11646 Protein of unknown function DUF3258
This viral family are possible phage integrase proteins however this cannot be confirmed.. +PF11647 C-terminal region of Pasteurella multocida toxin residues 569-1285
This family represents the C terminal region of Pasteurella multocida toxin (PMT) which displays a Trojan horse-like shape with three domains, C1, C2 and C3. The C3 domain possesses the Cys-His-Asp catalytic triad. PMT is an enzyme toxin carrying the cysteine protease-like catalytic triad which functions on the cytoplasmic face of the plasma membrane of target cells .. +PF11648 C-terminal domain of RIG-I
This family of proteins represents the regulatory domain RD of RIG-I, a protein which initiates a signalling cascade that provides essential antiviral protection for the host. The RD domain binds viral RNA, activating the RIG-I ATPase by RNA-dependant dimerisation. The structure of RD contains a zinc-binding domain and is thought to confer ligand specificity .. +PF11649 Virus neck protein
This family of protein represents gene product 14, a major component of the neck in T4-like viruses along with gene product 13. Gene product 14 is rich is beta-sheets. The formation of the neck to the head of the bacteriophage is crucial for the tail attachment .. +PF11650 P22 tail accessory factor
This tail accessory factor of the P22 virus is also referred to as gene product 4 (Gp4). The proteins structure consists of 60% alpha helices. Gp4 is the first tail accessory factor to be added to newly DNA-filled capsids during P22-morphogenesis. In solution, the protein acts as a monomer and has low structural stability. The interaction of gp4 with the portal protein involves the binding of two non-equivalent sets of six gp4 proteins . Gp4 acts as a structural adaptor for gp10 and gp26, the other tail accessory factors .. +PF11651 P22 coat protein - gene protein 5
This family of proteins represents gene product 5 from bacteriophage P22. This protein is involved in the formation of the pro-capsid shells in the bacteriophage. In total, there are 415 molecules of the coat protein which are arranged in an icosahedral shell .. +PF11652 Protein of unknown function (DUF3259)
This eukaryotic family of proteins has no known function.. +PF11653 Bacteriophage T7 virion assembly protein
This family of proteins represents the gene product 7.3 from T7 bacteriophage. The protein is localised to the tail and is thought to be important in virion assembly. Particles assembled in the absence of the protein fail to adsorb to cells .. +PF11654 Protein of unknown function (DUF2665)
Some members in this family are annotated as Non classical export proteins, however no specific function is known.. +PF11655 Protein of unknown function (DUF2589)
This family of proteins has no known function.. +PF11656 ComReg_Spx;
YjbD family (DUF3811). This is a family of proteobacteria proteins of unknown function. This family is unrelated to Pfam:PF03960 which contains a set of transcription factors that are also named YjbD.. +PF11657 Transcriptional activator TraM
TraM is required for quorum dependence. It binds to and in-activates TraR which controls the replication of the tumour-inducing virulence plasmid. TraM interacts in a two-step process with DNA-TraR to form a large, stable anti-activation complex [1,2].. +PF11658 Protein of unknown function (DUF3260)
Pfam-B_003054 (release 23.0). Some members in this family of proteins are annotated as YhjU however this cannot be confirmed. Currently this family has no known function.. +PF11659 Protein of unknown function (DUF3261)
Pfam-B_003077 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11660 Protein of unknown function (DUF3262)
Pfam-B_003096 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11661 Protein of unknown function (DUF2986)
Pfam-B_003109 (release 23.0). This family of proteins has no known function.. +PF11662 Protein of unknown function (DUF3263)
Pfam-B_003189 (release 23.0). This family of proteins with unknown function appears to be restricted to Actinobacteria.. +PF11663 Toxin with endonuclease activity YhaV
Pfam-B_003231 (release 23.0). YhaV causes reversible bacteriostasis and is part of a toxin-antitoxin system in Escherichia coli along with PrlF. The toxicity of YhaV is counteracted by PrlF by the formation of a tight complex which binds to the promoter of the prlF-yhaV operon. In vitro, YhaV also has endonuclease activity .. +PF11665 Protein of unknown function (DUF3265)
Pfam-B_001494 (release 23.0). This family of proteins with unknown function appear to be restricted to Vibrio.. +PF11666 Protein of unknown function (DUF2933)
Pfam-B_002197 (release 23.0). This bacterial family of proteins has no known function.. +PF11667 Protein of unknown function (DUF3267)
Pfam-B_002418 (release 23.0). This family of proteins has no known function.. +PF11668 HCMV glycoprotein pUL130
Pfam-B_002736 (release 23.0). This family of proteins represents pUL130 from Human cytomegalovirus, a glycoprotein secreted from infected cells that is incorporated into the virion envelope as a Golgi-matured form. The protein promotes endothelial cell infection through a producer cell modification of the virion .. +PF11669 WW domain-binding protein 1
Pfam-B_003402 (release 23.0). This family of proteins represents WBP-1, a ligand of the WW domain of Yes-associated protein. This protein has a proline-rich domain. WBP-1 does not bind to the SH3 domain .. +PF11670 Major surface protein 1a (MSP1a)
Pfam-B_001532 (release 23.0). MSP1a is part of the A.marginale major surface protein 1 (MSP1) complex and exists as a heterodimer with MSP1b. The complex has adhesive functions in bovine erythrocytes invasion .. +PF11671 Complementary sex determiner protein
Pfam-B_4678 (release 23.0). This family of proteins represents the complementary sex determiner in the honeybee. In the honeybee, the mechanism of sex determination depends on the csd gene which produces an SR-type protein. Males are homozygous while females are homozygous for the csd gene. Heterozygosity generates an active protein which initiates female development .. +PF11672 Protein of unknown function (DUF3268)
Pfam-B_4693 (release 23.0). This family of proteins has no known function.. +PF11673 Protein of unknown function (DUF3269)
Pfam-B_4706 (release 23.0). This family of proteins has no known function.. +PF11674 Protein of unknown function (DUF3270)
Pfam-B_4714 (release 23.0). This family of proteins with unknown function appears to be restricted to Streptococcus.. +PF11675 Protein of unknown function (DUF3271)
Pfam-B_4697 (release 23.0). This family of proteins with unknown function appears to be restricted to Plasmodium.. +PF11676 Protein of unknown function (DUF3272)
Pfam-B_4726 (release 23.0). This family of proteins with unknown function appears to be restricted to Streptococcus.. +PF11677 Protein of unknown function (DUF3273)
Pfam-B_4727 (release 23.0). Some members in this family of proteins are annotated as multi-transmembrane proteins however this cannot be confirmed. Currently this family has no known function.. +PF11678 Protein of unknown function (DUF3274)
Pfam-B_4733 (release 23.0). This bacterial family of proteins has no known function.. +PF11679 Protein of unknown function (DUF3275)
Pfam-B_4743 (release 23.0). This family of proteins with unknown function appear to be restricted to Proteobacteria.. +PF11680 Protein of unknown function (DUF3276)
Pfam-B_4744 (release 23.0). This bacterial family of proteins has no known function.. +PF11681 Protein of unknown function (DUF3277)
Pfam-B_4749 (release 23.0). This family of proteins represents a putative bacteriophage protein. No function is currently known.. +PF11682 Protein of unknown function (DUF3279)
Pfam-B_4753 (release 23.0). This family of proteins with unknown function appears to be restricted to Enterobacteriaceae.. +PF11683 Protein of unknown function (DUF3278)
Pfam-B_4766 (release 23.0). This bacterial family of proteins has no known function.. +PF11684 Protein of unknown function (DUF2380)
Pfam-B_4754 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11685 Protein of unknown function (DUF3281)
Pfam-B_4757 (release 23.0). This family of bacterial proteins has no known function.. +PF11686 Protein of unknown function (DUF3283)
Pfam-B_4778 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11687 Domain of unknown function (DUF3284)
Pfam-B_4781 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes.. +PF11688 Protein of unknown function (DUF3285)
Pfam-B_4791 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11690 Protein of unknown function (DUF3287)
Pfam-B_4801 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11691 Protein of unknown function (DUF3288)
Pfam-B_4815 (release 23.0). This family of proteins with unknown function appears to be restricted to Cyanobacteria.. +PF11692 Protein of unknown function (DUF3289)
Pfam-B_4824 (release 23.0). This family of proteins with unknown function appears to be restricted to Proteobacteria.. +PF11693 Protein of unknown function (DUF2990)
Pfam-B_4826 (release 23.0). This family of proteins represents a fungal protein with unknown function.. +PF11694 Protein of unknown function (DUF3290)
Pfam-B_4838 (release 23.0). This family of proteins with unknown function appears to be restricted to Firmicutes.. +PF11695 Domain of unknown function (DUF3291)
Pfam-B_4872 (release 23.0). This bacterial family of proteins has no known function.. +PF11696 Protein of unknown function (DUF3292)
Pfam-B_4874 (release 23.0). This eukaryotic family of proteins has no known function.. +PF11697 Protein of unknown function (DUF3293)
Pfam-B_4879 (release 23.0). This bacterial family of proteins has no known function.. +PF11698 V-ATPase subunit H
Pfam-B_2481 (release 6.5). The yeast Saccharomyces cerevisiae vacuolar H+-ATPase (V-ATPase) is a multisubunit complex responsible for acidifying organelles. It functions as an ATP dependent proton pump that transports protons across a lipid bilayer. This domain corresponds to the C terminal domain of the H subunit of V-ATPase. The N-terminal domain is required for the activation of the complex whereas the C-terminal domain is required for coupling ATP hydrolysis to proton translocation .. +PF11699 Mif2;
CENP-C_C is a C-terminal family of fungal and eukaryote proteins necessary for centromere formation. CENP-C is the inner-kinetochore centromere (CEN) binding protein. In the budding-yeast, Mif2, the yeast homologue, binds in the CDEIII region of the centromere, and has been shown to recruit a substantial subset of all inner and outer kinetochore proteins . Mif2 adopts a cupin fold and is extremely similar both in polypeptide chain conformation and in dimer geometry to the dimerisation domain of a bacterial transcription factor . The Mif2 dimer appears to be part of an enhanceosome-like structure that nucleates kinetochore assembly in budding yeast . This C-terminal domain is the region via which CENP-C localises to centromeres throughout the cell cycle 2,3].. +PF11700 Vacuole effluxer Atg22 like
Pfam-B_14077 (release 22.0). Autophagy is a major survival survival mechanism in which eukaryotes recycle cellular nutrients during stress conditions. Atg22, Avt3 and Avt4 are partially redundant vacuolar effluxes, which mediate the efflux of leucine and other amino acids resulting from autophagy . This family also includes other transporter proteins.. +PF11701 Myosin-binding striated muscle assembly central
The UNC-45 or small muscle protein 1 of C.elegans is expressed in two forms from different genomic positions in mammals, as a general tissue protein UNC-45a and a specific form Unc-45b expressed only in striated and skeletal muscle. All members carry up to three amino-terminal tetratricopeptide repeat (TPR) domains towards their N-terminal, a UCS domain at the C-terminal that contains a number of Arm repeats Pfam:PF00514 and this central region of approximately 400 residues. Both the general form and the muscle form of UNC-45 function in myotube formation through cell fusion. Myofibril formation requires both GC and SM UNC-45, consistent with the fact that the cytoskeleton is necessary for the development and maintenance of organised myofibrils . The S. pombe Rng3p, is crucial for cell shape, normal actin cytoskeleton, and contractile ring assembly, and is essential for assembly of the myosin II-containing progenitors of the contractile ring. Widespread defects in the cytoskeleton are found in null mutants of all three fungal proteins . Mammalian Unc45 is found to act as a specific chaperone during the folding of myosin and the assembly of striated muscle by forming a stable complex with the general chaperone Hsp90. The exact function of this central region is not known .. +PF11702 Protein of unknown function (DUF3295)
This family is conserved in fungi but the function is not known.. +PF11703 UPF0506
This uncharacterised family is found in Schistosoma genomes. Although uncharacterised it appears to belong to the knottin fold. The sequence is composed of two repeats of a 6 cysteine motif.. +PF11704 Vesicle coat protein involved in Golgi to plasma membrane transport
Pfam-B_3276 (release 23.0), ADDA_17305. In yeast cells this family functions in the regulated delivery of Gap1p (a general amino acid permease) to the cell surface, perhaps as a component of a post-Golgi secretory-vesicle coat complex . Birt-Hogg-Dube (BHD)4 syndrome is an autosomal dominant disorder characterised by hamartomas of skin follicles, lung cysts, spontaneous pneumothorax, and renal cell carcinoma. Folliculin is the protein from the BHD4 gene and is found to have no significant homology to any other human proteins. It is expressed in most tissues. These same symptoms also occur in TSC or tuberous sclerosis complex, suggesting that the same pathway is involved, and it is likely that the target is the down-stream Tor2 - an essential gene. Folliculin appears to bind Tor2, and down-regulation of Tor2 activity leads to up-regulation of nitrogen responsive genes including membrane transporters and amino acid permeases .. +PF11705 DNA-directed RNA polymerase III subunit Rpc31
Pfam-B_203281 (release 23.0). RNA polymerase III contains seventeen subunits in yeasts and in human cells. Twelve of these are akin to RNA polymerase I or II and the other five are RNA pol III-specific, and form the functionally distinct groups (i) Rpc31-Rpc34-Rpc82, and (ii) Rpc37-Rpc53. Rpc31, Rpc34 and Rpc82 form a cluster of enzyme-specific subunits that contribute to transcription initiation in S.cerevisiae and H.sapiens. There is evidence that these subunits are anchored at or near the N-terminal Zn-fold of Rpc1, itself prolonged by a highly conserved but RNA polymerase III-specific domain .. +PF11706 CGNR zinc finger
Pfam-B_19432 (release 10.0). This family consists of a C-terminal zinc finger domain. It seems likely to be DNA-binding given the conservation of many positively charged residues. The domain is named after a highly conserved motif found in many members of the family.. +PF11707 Ribosome 60S biogenesis N-terminal
Pfam-B_2493 (release 23.0). Npa1p is required for ribosome biogenesis and operates in the same functional environment as Rsa3p and Dbp6p during early maturation of 60S ribosomal subunits . The protein partners of Npa1p include eight putative helicases as well as the novel Npa2p factor. Npa1p can also associate with a subset of H/ACA and C/D small nucleolar RNPs (snoRNPs) involved in the chemical modification of residues in the vicinity of the peptidyl transferase centre . The protein has also been referred to as Urb1, and this domain at the N-terminal is one of several conserved regions along the length.. +PF11708 Pre-mRNA splicing Prp18-interacting factor
Pfam-B_999 (release 23.0). The spliceosome, an assembly of snRNAs (U1, U2, U4/U6, and U5) and proteins, catalyses the excision of introns from pre-mRNAs in two successive trans-esterification reactions. Step 2 depends upon integral spliceosome constituents such as U5 snRNA and Prp8 and non-spliceosomal proteins Prp16, Slu7, Prp18, and Prp22. ATP hydrolysis by the DEAH-box enzyme Prp16 promotes a conformational change in the spliceosome that leads to protection of the 3'ss from targeted RNase H cleavage. This change, which probably reflects binding of the 3'ss PyAG in the catalytic centre of the spliceosome, requires the ordered recruitment of Slu7, Prp18, and Prp22 to the spliceosome. There is a close functional relationship between Prp8, Prp18, and Slu7, and Prp18 interacts with Slu7, so that together they recruit Prp22 to the spliceosome. Most members of the family carry a zinc-finger of the CCHC-type upstream of this domain.. +PF11709 Mitochondrial ribosomal protein subunit
Pfam-B_4972 (release 23.0). This family is the mitochondrial ribosomal small-subunit protein Mrp51. Its function is not entirely clear, but deletion of the MRP51 gene completely blocked mitochondrial gene expression.. +PF11710 G protein-coupled glucose receptor regulating Gpa2
Pfam-B_11657 (release 23.0). Git3 is one of six proteins required for glucose-triggered adenylate cyclase activation, and is a G protein-coupled receptor responsible for the activation of adenylate cyclase through Gpa2 - heterotrimeric G protein alpha subunit, part of the glucose-detection pathway. Git3 contains seven predicted transmembrane domains, a third cytoplasmic loop and a cytoplasmic tail . This is the conserved N-terminus of these proteins, and the C-terminal conserved region is now in family Git3_C.. +PF11711 Inner membrane protein import complex subunit Tim54
Pfam-B_3533 (release 23.0). Mitochondrial function depends on the import of hundreds of different proteins synthesised in the cytosol. Protein import is a multi-step pathway which includes the binding of precursor proteins to surface receptors, translocation of the precursor across one or both mitochondrial membranes, and folding and assembly of the imported protein inside the mitochondrion. Most precursor proteins carry amino-terminal targeting signals, called pre-sequences, and are imported into mitochondria via import complexes located in both the outer and the inner membrane (IM). The IM complex, TIM, is made up of at least two proteins which mediate translocation of proteins into the matrix by removing their signal peptide and another pair of proteins, Tim54 and Tim22, that insert the polytopic proteins, that carry internal targetting information, into the inner membrane .. +PF11712 Endoplasmic reticulum-based factor for assembly of V-ATPase
Pfam-B_2410 (release 23.0). The yeast vacuolar proton-translocating ATPase (V-ATPase) is the best characterised member of the V-ATPase family. A total of thirteen genes are required for encoding the subunits of the enzyme complex itself and an additional three for providing factors necessary for the assembly of the whole. Vma12 is one of these latter, all three of which are localised to the endoplasmic reticulum .. +PF11713 Peptidase C80 family
This family belongs to cysteine peptidase family C80.. +PF11714 Thrombin inhibitor Madanin
Members of this family are the peptidase inhibitor madanin proteins. These proteins were isolated from tick saliva .. +PF11715 Nucleoporin Nup120/160
Pfam-B_1841 (release 23.0). Nup120 is conserved from fungi to plants to humans, and is homologous with the Nup160 of vertebrates. The nuclear core complex, or NPC, mediates macromolecular transport across the nuclear envelope. Deletion of the NUP120 gene causes clustering of NPCs at one side of the nuclear envelope, moderate nucleolar fragmentation and slower cell growth . The vertebrate NPC is estimated to contain between 30 and 60 different proteins. most of which are not known. Two important ones in creating the nucleoporin basket are Nup98 and Nup153, and Nup120, in conjunction with Nup 133, interacts with these two and itself plays a role in mRNA export . Nup160, Nup133, Nup96, and Nup107 are all targets of phosphorylation. The phosphorylation sites are clustered mainly at the N-terminal regions of these proteins, which are predicted to be natively disordered. The entire Nup107-160 subcomplex is stable throughout the cell cycle, thus it seems unlikely that phosphorylation affects interactions within the Nup107-160 subcomplex, but rather that it regulates the association of the subcomplex with the NPC and other proteins .. +PF11716 Mycothiol maleylpyruvate isomerase N-terminal domain
+PF11717 RNA binding activity-knot of a chromodomain
Pfam-B_4165 (release 22.0). This is a novel knotted tudor domain which is required for binding to RNA. The know influences the loop conformation of the helical turn Ht2 - residues 61-6 3- that is located at the side opposite the knot in the tudor domain-chromodomain; stabilisation of Ht2 is essential for RNA binding .. +PF11718 Pre-mRNA 3'-end-processing endonuclease polyadenylation factor C-term
Pfam-B_2254 (release 23.0). This is the C-terminal conserved region of the pre-mRNA 3'-end-processing of the polyadenylation factor CPSF-73/CPSF-100 proteins. The exact function of this domain is not known.. +PF11719 DNA replication and checkpoint protein
Pfam-B_1966 (release 23.0). Genome duplication is precisely regulated by cyclin-dependent kinases CDKs, which bring about the onset of S phase by activating replication origins and then prevent relicensing of origins until mitosis is completed. The optimum sequence motif for CDK phosphorylation is S/T-P-K/R-K/R, and Drc1-Sld2 is found to have at least 11 potential phosphorylation sites. Drc1 is required for DNA synthesis and S-M replication checkpoint control. Drc1 associates with Cdc2 and is phosphorylated at the onset of S phase when Cdc2 is activated. Thus Cdc2 promotes DNA replication by phosphorylating Drc1 and regulating its association with Cut5 . Sld2 and Sld3 represent the minimal set of S-CDK substrates required for DNA replication .. +PF11720 Peptidase inhibitor I78 family
This family includes Aspergillus elastase inhibitor and belongs to MEROPS peptidase inhibitor family I78.. +PF11721 Di-glucose binding within endoplasmic reticulum
Pfam-B_783 (release 23.0) pdb_2jwp. Malectin is a membrane-anchored protein of the endoplasmic reticulum that recognises and binds Glc2-N-glycan. It carries a signal peptide from residues 1-26, a C-terminal transmembrane helix from residues 255-274, and a highly conserved central part of approximately 190 residues followed by an acidic, glutamate-rich region. Carbohydrate-binding is mediated by the four aromatic residues, Y67, Y89, Y116, and F117 and the aspartate at D186. NMR-based ligand-screening studies has shown binding of the protein to maltose and related oligosaccharides, on the basis of which the protein has been designated "malectin", and its endogenous ligand is found to be Glc2-high-mannose N-glycan .. +PF11722 CCCH zinc finger in TRM13 protein
This domain is found at the N-terminus of TRM13 methyltransferase proteins. It is presumed to be a zinc binding domain.. +PF11723 Homotrimeric ring hydroxylase
Pfam-B_24837 (release 22.0). This domain is found on aromatic hydroxylating enzymes such as 2-oxo-1,2-dihydroquinoline 8-monooxygenase from Pseudomonas putida and carbazole 1,9a-dioxygenase from Janthinobacterium. These enzymes are homotrimers and are distantly related to the typical oxygenase . This domain is found C terminal to the Rieske domain which binds an iron-sulphur cluster.. +PF11724 YvbH-like oligomerisation region
This region is found at the C-terminus of a group of bacterial PH domains. This region is composed of a helical hairpin that appears to mediate oligomerisation based on the known structure. This elaboration of the bacterial PH domain is only found in Bacillales.. +PF11725 Pathogenicity factor
Pfam-B_735 (release 23.0). This family is secreted by gram-negative Gammaproteobacteria such as Pseudomonas syringae of tomato and the fire blight plant pathogen Erwinia amylovora, amongst others. It is an essential pathogenicity factor of approximately 198 kDa. Its injection into the host-plant is dependent upon the bacterial type III or Hrp secretion system . The family is long and carries a number of predicted functional regions, including an ERMS or endoplasmic reticulum membrane retention signal at both the C- and the N-termini, a leucine-zipper motif from residues 539-560, and a nuclear localisation signal at 1358-1361. this conserved AvrE-family of effectors is among the few that are required for full virulence of many phytopathogenic pseudomonads, erwinias and pantoeas .. +PF11726 Protein of unknown function (DUF3296)
Pfam-B_768 (release 23.0). This family is expressed in Gammaproteobacteria. One of the E coli members is annotated as yagK, but otherwise the function is not known.. +PF11727 Invariant surface glycoprotein
Pfam-B_785 (release 23.0). This family is found in Trypanosome species, and appears to be one of two invariant surface glycoproteins, ISG65 and ISG75. that are found in the mammalian stage of the parasitic protozoan. the sequence suggests the two families are polypeptides with N-terminal signal sequences, hydrophilic extracellular domains, single trans-membrane alpha-helices and short cytoplasmic domains. they are both expressed in the bloodstream form but not in the midgut stage. Both polypeptides are distributed over the entire surface of the parasite [1,2].. +PF11728 DUF939 C-terminal domain
This region is a presumed intracellular domain found in a set of bacterial presumed transporter proteins. The region is about 160 amino acids in length.. +PF11729 nodavirus capsid protein
Pfam-B_805 (release 23.0). The capsid or coat protein of this family is expressed in Nodaviridae, that are ssRNA positive-strand viruses, with no DNA stage. These viruses are the causative agents of viral nervous necrosis in marine fish.. +PF11730 Protein of unknown function (DUF3297)
Pfam-B_797 (release 23.0). This family is expressed in Proteobacteria and Actinobacteria. The function is not known.. +PF11731 Pathogenicity locus
Pfam-B_826 (release 23.0). Cdd1 is expressed as part of the pathogenicity locus operon in several different orders of bacteria . Many members of the family are annotated as being putative mitomycin resistance proteins but this could not be confirmed.. +PF11732 Transcription- and export-related complex subunit
Pfam-B_819 (release 23.0). The THO/TREX complex is the transcription- and export-related complex associated with spliceosomes that preferentially deal with spliced mRNAs as opposed to unspliced mRNAs. Thoc2 plays a role in RNA polymerase II (RNA pol II)-dependent transcription and is required for the stability of DNA repeats . In humans, the TRE complex is comprised of the exon-junction-associated proteins Aly/REF and UAP56 together with the THO proteins THOC1 (hHpr1/p84), Thoc2 (hRlr1), THOC3 (hTex1), THOC5 (fSAP79), THOC6 (fSAP35), and THOC7 (fSAP24). Although much evidence indicates that the function of the TREX complex as an adaptor between the mRNA and components of the export machinery is conserved among eukaryotes, in Drosophila the majority of mRNAs can be exported from the nucleus independently of the THO complex .. +PF11733 Non-capsid protein NP1
Pfam-B_837 (release 23.0). This family is the non-capsid protein NP1 of the ssDNA, Parvovirinae virus Bocavirus of cattle and humans.. +PF11734 TilS substrate C-terminal domain
This domain is found in the tRNA(Ile) lysidine synthetase (TilS) protein.. +PF11735 Cryptococcal mannosyltransferase 1
Pfam-B_916 (release 23.0). The capsule of pathogenic fungi is a complex polysaccharide whose formation is determined by a number of enzymes including, most importantly, alpha-1,3-mannosyltransferase 1, EC:2.4.1.- .. +PF11736 Protein of unknown function (DUF3299)
Pfam-B_876 (release 23.0). This is a family of bacterial proteins of unknown function.. +PF11737 Protein of unknown function (DUF3300)
Pfam-B_886 (release 23.0). This hypothetical bacterial gene product has a long hydrophobic segment and is thus likely to be a membrane protein.. +PF11738 Protein of unknown function (DUF3298)
Pfam-B_854 (release 23.0). This family of bacterial protein C-terminal regions is highly conserved but the function is not known. Several members are annotated as being endo-1,4-beta-xylanase-like, but this could not be confirmed, and the structure can be defined as a heat-shock cognate 70kd protein 44kd ATPase.. +PF11739 Dicarboxylate transport
Pfam-B_935 (release 23.0). In certain bacterial families this protein is expressed from the ydbH gene, and there is a suggestion that this is a form of DctA or dicarboxylate transport protein. Dicarboxylate transport proteins are found in aerobic bacteria which grow on succinate or other C4-dicarboxylates .. +PF11740 Plasmid replication region DNA-binding N-term
Pfam-B_844 (release 23.0). The broad host-range plasmid RK2 is able to replicate in and be inherited in a stable manner in diverse Gram-negative bacterial species. It encodes a number of co-ordinately regulated operons including a central control korF1 operon that represses the kfrA operon. The KfrA polypeptide is a site-specific DNA-binding protein whose operator overlaps the kfrA promoter. The N-terminus, containing an helix-turn-helix motif, is essential for function. Downstream from this family is an extended coiled-coil domain containing a heptad repeat segment which is probably responsible for formation of multimers, and may provide an example of a bridge to host structures required for plasmid partitioning .. +PF11741 AMIN domain
Pfam-B_11438 (release 23.0). This N-terminal domain of various bacterial protein families is crucial for the targetting of periplasmic or extracellular proteins to specific regions of the bacterial envelope. AMIN is derived from the N-terminal domain of AmiC, an N-acetylmuramoyl-l-alanine amidase of Escherichia coli which localises to the septal ring during division and plays a key role in the separation of daughter cells. The AMIN domain is present in several protein families besides amidases suggesting that AMIN may represent a general targetting determinant involved in the localisation of periplasmic protein complexes .. +PF11742 Protein of unknown function (DUF3302)
Pfam-B_953 (release 23.0). This family of unknown function is expressed by proteobacteria.. +PF11743 Protein of unknown function (DUF3301)
Pfam-B_952 (release 23.0). This family is conserved in Proteobacteria, but the function is not known.. +PF11744 Aluminium activated malate transporter
+PF11745 Protein of unknown function (DUF3304)
Pfam-B_956 (release 23.0). This is a family of bacterial proteins of unknown function.. +PF11746 Protein of unknown function (DUF3303)
Pfam-B_958 (release 23.0). Several members are annotated as being LysM domain-like proteins, but these did not match any LysM domains reported in the literature.. +PF11747 Killing trait
Pfam-B_983 (release 23.0). RebB is one of three proteins necessary for the production of R- bodies, refractile inclusion bodies produced by a small number of bacterial species, essential for the expression of the killing trait of the endosymbiont bacteria that produce them for attack upon the host Paramecium. R-bodies are highly insoluble protein ribbons which coil into cylindrical structures in the cell and the genes for their synthesis and assembly are encoded on a plasmid. One of these three proteins is RebB.. +PF11748 Protein of unknown function (DUF3306)
Pfam-B_984 (release 23.0). This family of proteobacterial species proteins has no known function.. +PF11749 Protein of unknown function (DUF3305)
Pfam-B_976 (release 23.0). Several members of this family are annotated as being molybdopterin-guanine dinucleotide biosynthesis protein A; however, this could not be confirmed. The family is found in proteobacteria.. +PF11750 Protein of unknown function (DUF3307)
Pfam-B_1017 (release 23.0). This family of bacterial proteins has no known function.. +PF11751 Protein of unknown function (DUF3308)
Pfam-B_993 (release 23.0). Some members of this family of bacterial proteins are annotated as being one of the several TonB-dependent siderophore receptors, but this could not be confirmed.. +PF11752 Protein of unknown function (DUF3309)
Pfam-B_1113 (release 23.0). This family is conserved in bacteria but its function is not known.. +PF11753 Protein of unknwon function (DUF3310)
Pfam-B_1078 (release 23.0). This is a family of conserved bacteriophage proteins of unknown function.. +PF11754 Velvet factor
Pfam-B_963 (release 23.0). The velvet factor is conserved in many fungal species and is found to have gained different roles depending on the organism's need, expanding the conserved role in developmental programmes . The velvet factor orthologues can be adapted to the fungal-specific life cycle and may be involved in diverse functions such as sclerotia formation and toxin production, as in A. parasiticus , nutrition-dependent sporulation, as in A. fumigatus , or the microconidia-to-macroconidia ratio and cell wall formation, as in the heterothallic fungus Fusarium verticilloides [ .. +PF11755 Protein of unknown function (DUF3311)
Pfam-B_1042 (release 23.0). This is a family of short bacterial proteins of unknown function.. +PF11756 Nitrous oxide-stimulated promoter
Pfam-B_1030 (release 23.0). The function of ygaB is not known but it is a promoter that is stimulated by the presence of nitrous oxide . It is regulated by the gene-product of the bacterial nsrR gene.. +PF11757 Suppressor of RNA silencing P21-like
Pfam-B_1073 (release 23.0). This is a large family of putative suppressors of RNA silencing proteins, P20-P25, from ssRNA positive-strand viruses such as Closterovirus, Potyvirus and Cucumovirus families. RNA silencing is one of the major mechanisms of defence against viruses, and, in response, some viruses have evolved or acquired functions for suppression of RNA silencing. These counter-defencive viral proteins with RNA silencing suppressor (RSS) activity were originally discovered in the members of plant virus genera Potyvirus and Cucumovirus. Each of the conserved blocks of amino acids found in P21-like proteins corresponds to a computer-predicted alpha-helix, with the most C-terminal element being 42 residues long. This suggests conservation of the predominantly alpha-helical secondary structure in the P21-like proteins.. +PF11758 Aureocin-like type II bacteriocin
This is a small family of type II bacteriocins usually encoded on a plasmid. Characteristically the members are small, cationic, rich in Lys and Try, and bring about a generalised membrane permeabilisation leading to leakage of ions. The family includes aureocin A, lacticins Q and Z, and BhtB as well as an archaeal member.. +PF11759 Keratin-associated matrix
The major structural proteins of mammalian hair are the hair keratin intermediate filaments (KIFs) and the keratin-associated proteins (KRTAPs). In the hair cortex, hair keratins are embedded in an inter-filamentous matrix consisting of KRTAPs which are essential for the formation of a rigid and resistant hair shaft as a result of disulfide bonds between cysteine residues. There are essentially three groups of KRTAPs, viz: the high-sulfur (HS) and ultra-high-sulfur (UHS) KRTAPs (cysteine content: 16-30 and >30 mol%, respectively) and the high-glycine/tyrosine (HGT: 35-60 mol% glycine and tyrosine) KRTAPs.. +PF11760 Cobalamin synthesis G N-terminal
Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process . Within the cobalamin synthesis pathway CbiG catalyses the both the opening of the lactone ring and the extrusion of the two-carbon fragment of cobalt-precorrin-5A from C-20 and its associated methyl group (deacylation) to give cobalt-precorrin-5B . The N-terminal of the enzyme is conserved in this family, and the C-terminal and the mid-sections are conserved independently in other families, CbiG_C and CbiG_mid, although the distinct function of each region is unclear.. +PF11761 Cobalamin biosynthesis central region
Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process .. +PF11762 L-arabinose isomerase C-terminal domain
This is a family of L-arabinose isomerases, AraA, EC:5.3.1.4. These enzymes catalyse the reaction: L-arabinose <=> L-ribulose. This reaction is the first step in the pathway of L-arabinose utilisation as a carbon source after entering the cell L-arabinose is converted into L-ribulose by the L-arabinose isomerases enzyme . This is a C-terminal non catalytic domain.. +PF11763 Cell-wall adhesin ligand-binding C-terminal
Pfam-B_85585 (release 23.0). The DIPSY domain is characterised by the distinctive D*I*PSY motif at the very C-terminus of yeast cell-wall glycoproteins. It appears not to be conserved in any other species, however. In fungi, cell adhesion is required for flocculation, mating and virulence, and is mediated by covalently bound cell wall proteins termed adhesins. Map4, an adhesin required for mating in Schizosaccharomyces pombe, is N-glycosylated and O-glycosylated, and is an endogenous substrate for the mannosyl transferase Oma4p. Map4 has a modular structure with an N-terminal signal peptide, a serine and threonine (S/T)-rich domain that includes nine repeats of 36 amino acids (rich in serine and threonine residues, but lacking glutamines), and a C-terminal DIPSY domain with no glycosyl-phosphatidyl inositol (GPI)-anchor signal. The N-terminal S/T-rich regions, are required for cell wall attachment, but the C-terminal DIPSY domain is required for agglutination and mating in liquid and solid media .. +PF11764 COMPASS (Complex proteins associated with Set1p) component N
Pfam-B_7375 (release 23.0). The n-SET or N-SET domain is a component of the COMPASS complex, associated with SET1, conserved in yeasts and in other eukaryotes up to humans. The COMPASS complex functions to methylate the fourth lysine of Histone 3 and for the silencing of genes close to the telomeres of chromosomes . This domain promotes trimethylation in conjunction with an RRM domain and is necessary for binding of the Spp1 component of COMPASS into the complex .. +PF11765 Hyphally regulated cell wall protein N-terminal
The proteins in this family are all fungal and largely annotated as being hyphally regulated cell wall proteins, and several are listed as the enzyme EC:3.2.1.18. This enzyme is acetylneuraminyl hydrolase or exo-alpha-sialidase, that hydrolyses glycosidic linkages of terminal sialic acid residues in oligosaccharides, glycoproteins, glycolipids, colominic acid and synthetic substrates.. +PF11766 Cell-wall agglutinin N-terminal ligand-sugar binding
This is likely to be the sugar or ligand binding domain of the yeast alpha-agglutinins.. +PF11767 Histone lysine methyltransferase SET associated
Pfam-B_8752 (release 23.0). SET domains are protein lysine methyltransferase enzymes. SET domains appear to be protein-protein interaction domains. A subset of SET domains have been called PR domains. The SET domain consists of two regions known as N-SET and SET-C. SET-C forms an unusual and conserved knot-like structure of probably functional importance. Additionally to SET-N and SET-C, an insert region (SET-I) and flanking regions of high structural variability form part of the overall structure . This domain is found in fungi associated with SET and N-SET domains.. +PF11768 Protein of unknown function (DUF3312)
Pfam-B_5984 (release 23.0). This is a eukaryotic family of uncharacterised proteins. This family shows similarity to WD40 repeat proteins.. +PF11769 Protein of unknown function (DUF3313)
Pfam-B_1303 (release 23.0). This a bacterial family of proteins which are annotated as putative lipoproteins.. +PF11770 GRB2-binding adapter (GAPT)
This is a family of transmembrane proteins which bind the growth factor receptor-bound protein 2 (GRB2) in B cells . In contrast to other transmembrane adaptor proteins, GAPT is not phosphorylated upon BCR ligation. It associates with GRB2 constitutively through its proline-rich region .. +PF11771 Protein of unknown function (DUF3314)
This small family contains human, mouse and fish members but the function is not known.. +PF11772 DNA-directed RNA polymerase subunit beta
Pfam-B_4675 (release 23.0). This short 60-residue long bacterial family is the beta subunit of the DNA-directed RNA polymerase, likely to be EC:2.7.7.6. It is membrane-bound and is referred to by the name EpuA.. +PF11773 Type II secretory pathway pseudopilin
Pfam-B_4690 (release 23.0). The secreton (type II secretion) and type IV pilus biogenesis branches of the general secretory pathway in Gram-negative bacteria share many features that suggest a common evolutionary origin. Five components of the secreton, the pseudopilins, are similar to subunits of type IV pili. Pseudopilin PulG is one of the secreton pseudopilins, and is found to assemble into pilus-like bundles . PulG interacts with proteins H, I and J within the multi-protein complex as well as blocking extracellular secretion and reducing the amount of PulE protein as well as the amounts of PulL, PulM, PulC and PulD when G is over-expressed . In Klebsiella the pilus-like structure is composed largely of PulG .. +PF11774 Lsr2
Lsr2 is a small, basic DNA-bridging protein present in Mycobacterium and related actinomycetes. It is a functional homologue of the H-NS-like proteins . H-NS proteins play a role in nucleoid organisation and also function as a pleiotropic regulator of gene expression .. +PF11775 Cobalamin biosynthesis protein CobT VWA domain
Pfam-B_10956 (release 9.0). This family consists of several bacterial cobalamin biosynthesis (CobT) proteins. CobT is involved in the transformation of precorrin-3 into cobyrinic acid . . +PF11776 Domain of unknown function (DUF3315)
This is a Proteobacterial family of uncharacterised proteins. Some of the proteins in this family are annotated as being putative membrane proteins.. +PF11777 Protein of unknown function (DUF3316)
Pfam-B_4718 (release 23.0). This family of bacterial proteins has no known function. Several members are, however, annotated as being putative acyl-CoA synthetase, but this could not be confirmed.. +PF11778 Septation initiation
Pfam-B_41015 (release 23.0). This family is required for activation of the spg1 GTPase signalling cascade which leads to the initiation of septation and the subsequent termination of mitosis. It may act as a scaffold at the spindle pole body to which other components of the spg1 signalling cascade attach in pombe [1,2,3]. In S.cerevisiae it is both required for the proper formation of the spindle pole body outer plaque and may also connect the outer plaque to the central plaque embedded in the nuclear envelope .. +PF11779 Protein of unknown function (DUF3317)
Pfam-B_3618 (release 23.0). This is a short family of proteins conserved from fungi and plants to human. One each of the human and mouse members is annotated as being androgen down-regulated protein expressed in mouse prostate, with a potential signal transduction function, and all appear to be membrane proteins.. +PF11780 Protein of unknown function (DUF3318)
Pfam-B_1341 (release 23.0). This is a bacterial family of uncharacterised proteins.. +PF11781 RNA polymerase I-specific transcription initiation factor Rrn7
Pfam-B_4705 (release 23.0). Rrn7 is a transcription binding factor that associates strongly with both Rrn6 and Rrn11 to form a complex which itself binds the TATA-binding protein and is required for transcription by the core domain of the RNA PolI promoter [1,2]. . +PF11782 Protein of unknown function (DUF3319)
Pfam-B_4745 (release 23.0). This is a family of short bacterial proteins, a few of which are annotated as being minor tail protein. Otherwise the function is unknown.. +PF11783 Cytochrome c bacterial
Pfam-B_4681 (release 23.0). This is a family of long bacterial cytochrome c proteins, found in Proteobacteria and Chlorobi families.. +PF11784 Protein of unknown function (DUF3320)
Pfam-B_4770 (release 23.0). This family is conserved in Proteobacteria and Chlorobi families. Many members are annotated as being putative DNA helicase-related proteins.. +PF11785 Aft1 osmotic stress response (OSM) domain
This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The OSM domain has been shown to be involved in the osmotic stress response .. +PF11786 Aft1 HRA domain
This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The HRA domain is involved in meiotic recombination. It has been shown to be necessary and sufficient to activate recombination .. +PF11787 Aft1 HRR domain
This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The HRR domain is involved in meiotic recombination. It has been shown to be necessary and sufficient to repress recombination .. +PF11788 39S mitochondrial ribosomal protein L46
Pfam-B_1897 (release 23.0). This is the L46 subunit of the mammalian mitochondrial ribosome, conserved from plants and fungi.. +PF11789 Zinc-finger of the MIZ type in Nse subunit
Pfam-B_1696 (release 23.0). Nse1 and Nse2 are novel non-SMC subunits of the fission yeast Smc5-6 DNA repair complex. This family is the zinc-finger domain similar to the MIZ type of zinc-finger .. +PF11790 Glycosyl hydrolase catalytic core
Pfam-B_1680 (release 23.0), IPR013781. This family is probably a glycosyl hydrolase, and is conserved in fungi and some Proteobacteria. The pombe member is annotated as being from IPR013781.. +PF11791 Aconitate B N-terminal domain
Pfam-B_2605 (release 10.0). This family represents the N-terminal domain of Aconitase B.. +PF11792 Baculoviridae late expression factor 5 C-terminal domain
Pfam-B_5141 (release 7.6). This C-terminal domain is likely to be a zinc-binding domain.. +PF11793 FANCL C-terminal domain
This domain is found at the C-terminus of the Fancl protein in humans which is the putative E3 ubiquitin ligase subunit of the FA complex (Fanconi anaemia). Eight subunits of the Fanconi anaemia gene products form a multisubunit nuclear complex which is required for mono-ubiquitination of a downstream FA protein, FANCD2.. +PF11794 4-hydroxyphenylacetate 3-hydroxylase N terminal
Pfam-B_3148 (release 6.5). HpaB Swiss:Q57160 encodes part of the 4-hydroxyphenylacetate 3-hydroxylase from Escherichia coli . HpaB is part of a heterodimeric enzyme that also requires HpaC. The enzyme is NADH-dependent and uses FAD as the redox chromophore. This family also includes PvcC Swiss:O30372 may play a role in one of the proposed hydroxylation steps of pyoverdine chromophore biosynthesis . . +PF11795 Uncharacterized protein conserved in bacteria N-term (DUF3322)
This domain, found in various hypothetical bacterial proteins, has no known function. The family represents just the N-terminus.. +PF11796 Protein of unknown function N-terminus (DUF3323)
Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Beta-proteobacteria).. +PF11797 Protein of unknown function C-terminal (DUF3324)
Pfam-B_7106 (release 9.0). This family consists of several hypothetical bacterial proteins of unknown function.. +PF11798 IMS family HHH motif
Pfam-B_1349 (release 2.1). These proteins are involved in UV protection, eg (Swiss:P07375).. +PF11799 impB/mucB/samB family C-terminal domain
Pfam-B_1349 (release 2.1). These proteins are involved in UV protection (Swiss).. +PF11800 Replication protein C C-terminal region
Pfam-B_4463 (release 6.6). Replication protein C is involved in the early stages of viral DNA replication.. +PF11801 Tom37 C-terminal domain
Pfam-B_30563 (release 22.0). The TOM37 protein is one of the outer membrane proteins that make up the TOM complex for guiding cytosolic mitochondrial beta-barrel proteins from the cytosol across the outer mitochondrial membrane into the intramembrane space. In conjunction with TOM70 it guides peptides without an MTS into TOM40, the protein that forms the passage through the outer membrane . It has homology with Metaxin-1, also part of the outer mitochondrial membrane beta-barrel protein transport complex .. +PF11802 Centromere-associated protein K
CENP-K is one of seven new CENP-A-nucleosome distal (CAD) centromere components (the others being CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S) that are identified as assembling on the CENP-A nucleosome associated complex, NAC. The CENP-A NAC is essential, as disruption of the complex causes errors of chromosome alignment and segregation that preclude cell survival despite continued centromere-derived mitotic checkpoint signalling. CENP-K is centromere-associated through its interaction with one or more components of the CENP-A NAC.. +PF11803 UDP-glucuronate decarboxylase N-terminal
Pfam-B_36254 (release 23.0). The N-terminus of the UDP-glucuronate decarboxylases may be involved in localisation to the perinuclear Golgi membrane.. +PF11804 Protein of unknown function (DUF3325)
PFAM-B_2004 (release 23.0). This family of short proteins are functionally uncharacterized. This family is restricted to Alpha-, Beta- and Gamma-proteobacteria.. +PF11805 Protein of unknown function (DUF3326)
PFAM-B_2030 (release 23.0). This protein is functionally uncharacterized. It is about 300-500 amino acids in length. This family is found in plants and bacteria.. +PF11806 Domain of unknown function (DUF3327)
PFAM-B_2060 (release 23.0). +PF11807 Domain of unknown function (DUF3328)
PFAM-B_2062 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in eukaryotes.. +PF11808 Domain of unknown function (DUF3329)
PFAM-B_2082 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in bacteria.. +PF11809 Domain of unknown function (DUF3330)
PFAM-B_2077 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in bacteria.. +PF11810 Domain of unknown function (DUF3332)
PFAM-B_2104 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in bacteria.. +PF11811 Domain of unknown function (DUF3331)
PFAM-B_2106 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family vary in length from 96 to 160 amino acids.. +PF11812 Domain of unknown function (DUF3333)
PFAM-B_2108 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in bacteria. This presumed domain is typically between 116 to 159 amino acids in length.. +PF11813 Protein of unknown function (DUF3334)
PFAM-B_2118 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family are typically between 227 to 238 amino acids in length.. +PF11814 Peptidase_C39 like family
PFAM-B_2152 (release 23.0). +PF11815 Domain of unknown function (DUF3336)
PFAM-B_2157 (release 23.0). This family of proteins are functionally uncharacterised. This family is found in bacteria and eukaryotes. This presumed domain is typically between 143 to 227 amino acids in length.. +PF11816 Domain of unknown function (DUF3337)
PFAM-B_2058 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in eukaryotes. This presumed domain is typically between 285 to 342 amino acids in length.. +PF11817 Foie gras liver health family 1
Pfam-B_4417 (release 23.0). Mutating the gene foie gras in zebrafish has been shown to affect development; the mutants develop large, lipid-filled hepatocytes in the liver, resembling those in individuals with fatty liver disease . Foie-gras protein is long and has several well-defined domains though none of them has a known function. We have annotated this one as the first . The C-terminus of this region contains TPR repeats.. +PF11818 C-terminal domain of tail specific protease (DUF3340)
PFAM-B_2330 (release 23.0). This presumed domain is found at the C-terminus of tail specific proteases. Its function is unknown. This family is found in bacteria and eukaryotes. This presumed domain is typically between 88 to 187 amino acids in length.. +PF11819 Domain of unknown function (DUF3338)
PFAM-B_2474 (release 23.0). This family of proteins are functionally uncharacterised. This family is found in eukaryotes. This presumed domain is about 130 amino acids in length.. +PF11820 Protein of unknown function (DUF3339)
PFAM-B_2694 (release 23.0). This family of proteins are functionally uncharacterised. This family is found in eukaryotes. Proteins in this family are about 70 amino acids in length.. +PF11821 Protein of unknown function (DUF3341)
PFAM-B_2731 (release 23.0). This family of proteins are functionally uncharacterised. This family is found in bacteria. Proteins in this family are about 170 amino acids in length.. +PF11822 Domain of unknown function (DUF3342)
PFAM-B_2751 (release 23.0). This family of proteins are functionally uncharacterised. This family is found in bacteria. This presumed domain is typically between 170 to 303 amino acids in length. The N-terminal half of this family is a BTB-like domain.. +PF11823 Protein of unknown function (DUF3343)
PFAM-B_2956 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 78 to 102 amino acids in length.. +PF11824 Protein of unknown function (DUF3344)
PFAM-B_3041 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 367 to 1857 amino acids in length.. +PF11825 DUF3345;
Nuclear/hormone receptor activator site AF-1. PFAM-B_3322 (release 23.0). Nuclear receptors (NRs) are a family of ligand-inducible transcription factors, and, like other transcription factors, they contain a distinct DNA binding domain that allows for target gene recognition and several activation domains that possess the ability to activate transcription . One of these activation domains is at the N-terminal, although there are two distinct motifs within this domain, between residues 20-36 and between 74 and the end of this domain, which are the binding regions. One of the co-activators is TIF1beta, which appears to bind at the first motif .. +PF11826 Protein of unknown function (DUF3346)
PFAM-B_3462 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 231 to 659 amino acids in length.. +PF11827 Protein of unknown function (DUF3347)
PFAM-B_3580 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 169 to 570 amino acids in length.. +PF11828 Protein of unknown function (DUF3348)
PFAM-B_3615 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 244 to 323 amino acids in length.. +PF11829 Protein of unknown function (DUF3349)
PFAM-B_3716 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 99 to 124 amino acids in length.. +PF11830 Domain of unknown function (DUF3350)
PFAM-B_3789 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 50 to 64 amino acids in length.. +PF11831 DUF3351;
pre-mRNA splicing factor component. PFAM-B_3985 (release 23.0). This family is a region of the Myb-Related Cdc5p/Cef1 proteins, in fungi, and is part of the pre-mRNA splicing factor complex.. +PF11832 Protein of unknown function (DUF3352)
PFAM-B_2160 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 538 to 575 amino acids in length.. +PF11833 Protein of unknown function (DUF3353)
PFAM-B_2231 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 205 to 258 amino acids in length.. +PF11834 Domain of unknown function (DUF3354)
PFAM-B_2265 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 60 amino acids in length.. +PF11835 Domain of unknown function (DUF3355)
PFAM-B_2268 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 111 to 177 amino acids in length.. +PF11836 Protein of unknown function (DUF3356)
PFAM-B_2406 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 104 to 119 amino acids in length.. +PF11837 Domain of unknown function (DUF3357)
PFAM-B_2464 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 96 to 119 amino acids in length.. +PF11838 DUF3358;
ERAP1-like C-terminal domain. PFAM-B_2558 (release 23.0). This large domain is composed of 16 alpha helices organized as 8 HEAT-like repeats. This domain forms a concave face that faces towards the active site of the peptidase.. +PF11839 Protein of unknown function (DUF3359)
PFAM-B_2625 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 80 amino acids in length.. +PF11840 Protein of unknown function (DUF3360)
PFAM-B_2754 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 489 to 517 amino acids in length.. +PF11841 Domain of unknown function (DUF3361)
PFAM-B_2780 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 154 to 168 amino acids in length.. +PF11842 Domain of unknown function (DUF3362)
PFAM-B_2839 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 117 to 158 amino acids in length.. +PF11843 Protein of unknown function (DUF3363)
PFAM-B_2310 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 323 to 658 amino acids in length.. +PF11844 Domain of unknown function (DUF3364)
PFAM-B_2336 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 60 amino acids in length.. +PF11845 Protein of unknown function (DUF3365)
PFAM-B_2563 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 198 to 657 amino acids in length.. +PF11846 Domain of unknown function (DUF3366)
PFAM-B_2678 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 200 amino acids in length.. +PF11847 Domain of unknown function (DUF3367)
PFAM-B_2726 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 667 to 694 amino acids in length.. +PF11848 Domain of unknown function (DUF3368)
PFAM-B_2745 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is about 50 amino acids in length.. +PF11849 Domain of unknown function (DUF3369)
PFAM-B_2927 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 170 amino acids in length. The domain appears to be related to the GAF domain.. +PF11850 Protein of unknown function (DUF3370)
PFAM-B_3037 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 452 to 532 amino acids in length.. +PF11851 Domain of unknown function (DUF3371)
PFAM-B_3115 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 125 to 142 amino acids in length.. +PF11852 Domain of unknown function (DUF3372)
PFAM-B_3259 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This presumed domain is about 170 amino acids in length.. +PF11853 Protein of unknown function (DUF3373)
PFAM-B_3442 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 472 to 574 amino acids in length.. +PF11854 Protein of unknown function (DUF3374)
PFAM-B_3548 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 665 to 712 amino acids in length.. +PF11855 Protein of unknown function (DUF3375)
PFAM-B_3589 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 479 to 499 amino acids in length.. +PF11856 Protein of unknown function (DUF3376)
PFAM-B_3667 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 770 to 1142 amino acids in length.. +PF11857 Domain of unknown function (DUF3377)
PFAM-B_3829 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 70 amino acids in length.. +PF11858 Domain of unknown function (DUF3378)
PFAM-B_3989 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 80 amino acids in length.. +PF11859 Protein of unknown function (DUF3379)
PFAM-B_2469 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 234 to 251 amino acids in length.. +PF11860 Protein of unknown function (DUF3380)
PFAM-B_2757 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 194 to 284 amino acids in length. This protein is found associated with Pfam:PF01471.. +PF11861 Domain of unknown function (DUF3381)
PFAM-B_2792 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 156 to 174 amino acids in length. This domain is found associated with Pfam:PF07780, Pfam:PF01728.. +PF11862 Domain of unknown function (DUF3382)
PFAM-B_2882 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 100 amino acids in length. This domain is found associated with Pfam:PF02653.. +PF11863 Protein of unknown function (DUF3383)
PFAM-B_3017 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 356 to 501 amino acids in length.. +PF11864 Domain of unknown function (DUF3384)
PFAM-B_3114 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 422 to 486 amino acids in length. This domain is found associated with Pfam:PF02145.. +PF11865 Domain of unknown function (DUF3385)
PFAM-B_3188 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 160 to 172 amino acids in length. This domain is found associated with Pfam:PF00454, Pfam:PF02260, Pfam:PF02985, Pfam:PF02259 and Pfam:PF08771.. +PF11866 Protein of unknown function (DUF3386)
PFAM-B_3390 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are about 220 amino acids in length.. +PF11867 Domain of unknown function (DUF3387)
PFAM-B_3465 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 255 to 340 amino acids in length. This domain is found associated with Pfam:PF04851, Pfam:PF04313.. +PF11868 Protein of unknown function (DUF3388)
PFAM-B_3650 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 261 to 275 amino acids in length. This protein is found associated with Pfam:PF01842.. +PF11869 Protein of unknown function (DUF3389)
PFAM-B_3739 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 80 amino acids in length.. +PF11870 Domain of unknown function (DUF3390)
PFAM-B_3832 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 90 amino acids in length. This domain is found associated with Pfam:PF02589.. +PF11871 Domain of unknown function (DUF3391)
PFAM-B_2190 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is typically between 122 to 139 amino acids in length. This domain is found associated with Pfam:PF01966.. +PF11872 Protein of unknown function (DUF3392)
PFAM-B_2322 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 110 amino acids in length.. +PF11873 Domain of unknown function (DUF3393)
PFAM-B_2361 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is typically between 188 to 206 amino acids in length. This domain is found associated with Pfam:PF01464.. +PF11874 Domain of unknown function (DUF3394)
PFAM-B_2758 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 190 amino acids in length. This domain is found associated with Pfam:PF06808.. +PF11875 Domain of unknown function (DUF3395)
PFAM-B_2767 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 147 to 176 amino acids in length. This domain is found associated with Pfam:PF00226.. +PF11876 Protein of unknown function (DUF3396)
PFAM-B_2995 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 302 to 382 amino acids in length.. +PF11877 Protein of unknown function (DUF3397)
PFAM-B_3446 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 114 to 128 amino acids in length.. +PF11878 Domain of unknown function (DUF3398)
PFAM-B_3712 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 100 amino acids in length.. +PF11879 Domain of unknown function (DUF3399)
PFAM-B_3857 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 100 amino acids in length. This domain is found associated with Pfam:PF02214, Pfam:PF00520.. +PF11880 Domain of unknown function (DUF3400)
PFAM-B_3996 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 50 amino acids in length. This domain is found associated with Pfam:PF02754, Pfam:PF02913, Pfam:PF01565.. +PF11881 Domain of unknown function (DUF3401)
PFAM-B_2478 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 231 to 250 amino acids in length. This domain is found associated with Pfam:PF02145, Pfam:PF00595.. +PF11882 Domain of unknown function (DUF3402)
PFAM-B_2702 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 350 to 473 amino acids in length. This domain is found associated with Pfam:PF07923.. +PF11883 Domain of unknown function (DUF3403)
PFAM-B_2739 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00069, Pfam:PF08276, Pfam:PF00954, Pfam:PF01453.. +PF11884 Domain of unknown function (DUF3404)
PFAM-B_2879 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 260 amino acids in length. This domain is found associated with Pfam:PF02518, Pfam:PF00512.. +PF11885 Protein of unknown function (DUF3405)
PFAM-B_3057 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 636 to 810 amino acids in length.. +PF11886 Domain of unknown function (DUF3406)
PFAM-B_3286 (release 23.0). This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 270 amino acids in length. This domain is found associated with Pfam:PF04548.. +PF11887 Protein of unknown function (DUF3407)
PFAM-B_3559 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 360 to 454 amino acids in length. This protein is found associated with Pfam:PF02470.. +PF11888 Protein of unknown function (DUF3408)
PFAM-B_3594 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 128 to 160 amino acids in length.. +PF11889 Domain of unknown function (DUF3409)
PFAM-B_3824 (release 23.0). This domain is functionally uncharacterised. This domain is found in viruses. This presumed domain is about 60 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF05550, Pfam:PF05578.. +PF11890 Domain of unknown function (DUF3410)
PFAM-B_1956 (release 23.0). This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 90 amino acids in length. This domain is found associated with Pfam:PF02826, Pfam:PF00389. This domain has a conserved RRE sequence motif.. +PF11891 Domain of unknown function (DUF3411)
PFAM-B_1986 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 168 to 186 amino acids in length. This domain has a conserved RYQ sequence motif.. +PF11892 Domain of unknown function (DUF3412)
PFAM-B_1106 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 120 amino acids in length. This domain is found associated with Pfam:PF03641.. +PF11893 Domain of unknown function (DUF3413)
PFAM-B_1403 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 250 amino acids in length. This domain is found associated with Pfam:PF00884.. +PF11894 Protein of unknown function (DUF3414)
PFAM-B_1638 (release 23.0). This family of proteins are functionally uncharacterised. The family is found in eukaryotes and has a conserved LLG sequence motif.. +PF11895 Domain of unknown function (DUF3415)
PFAM-B_1962 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00141.. +PF11896 Domain of unknown function (DUF3416)
PFAM-B_601 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 190 amino acids in length. This domain is found associated with Pfam:PF00128.. +PF11897 Protein of unknown function (DUF3417)
PFAM-B_724 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 145 to 860 amino acids in length. This protein is found associated with Pfam:PF00343. This protein has a conserved AYF sequence motif.. +PF11898 Domain of unknown function (DUF3418)
PFAM-B_1028 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 582 to 594 amino acids in length. This domain is found associated with Pfam:PF07717, Pfam:PF00271, Pfam:PF04408.. +PF11899 Protein of unknown function (DUF3419)
PFAM-B_1329 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 398 to 802 amino acids in length.. +PF11900 Domain of unknown function (DUF3420)
PFAM-B_1362 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00023.. +PF11901 Protein of unknown function (DUF3421)
PFAM-B_1420 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 119 to 296 amino acids in length.. +PF11902 Protein of unknown function (DUF3422)
PFAM-B_513 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 426 to 444 amino acids in length.. +PF11903 Protein of unknown function (DUF3423)
PFAM-B_670 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 73 to 118 amino acids in length. This protein appears to be related to ribbon-helix-helix DNA-binding domains, suggesting these proteins may also bind DNA.. +PF11904 DUF3424;
PFAM-B_942 (release 23.0). This domain, and the associated ANK family repeat Pfam:PF00023 domain, together act as a chaperone for biogenesis and folding of the DP receptor for prostaglandin D2.. +PF11905 Domain of unknown function (DUF3425)
PFAM-B_1128 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 120 to 143 amino acids in length.. +PF11906 Protein of unknown function (DUF3426)
PFAM-B_1212 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 262 to 463 amino acids in length.. +PF11907 Domain of unknown function (DUF3427)
PFAM-B_1236 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 243 to 275 amino acids in length. This domain is found associated with Pfam:PF04851, Pfam:PF00271.. +PF11909 NADH-quinone oxidoreductase cyanobacterial subunit N
The proton-pumping NADH:ubiquinone oxidoreductase catalyzes the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 subcomplexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit . The cyanobacterial NDH-1 complex contains additional subunits, NdhM and NdhN, compared with the minimal set of the bacterial enzyme and these seem to be specific for thylakoid-located NDH-1 of photosynthetic organisms .. +PF11910 Cyanobacterial and plant NDH-1 subunit O
The proton-pumping NADH:ubiquinone oxidoreductase catalyzes the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 subcomplexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit [1, 2]. The three nuclear-encoded subunits NdhM,NdhN and NdhO are vital for the functional integrity of the plastidial complex .. +PF11911 Protein of unknown function (DUF3429)
PFAM-B_1072 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 147 to 245 amino acids in length.. +PF11912 Protein of unknown function (DUF3430)
PFAM-B_1305 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 209 to 265 amino acids in length.. +PF11913 Protein of unknown function (DUF3431)
PFAM-B_1346 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 291 to 390 amino acids in length. This protein has a conserved NLRC sequence motif.. +PF11914 Domain of unknown function (DUF3432)
PFAM-B_1326 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 100 amino acids in length. This domain is found associated with Pfam:PF00096. This domain has two conserved sequence motifs: YPSPV and PSP.. +PF11915 Protein of unknown function (DUF3433)
PFAM-B_1502 (release 23.0). This is a family of functionally uncharacterised proteins. The family is found in eukaryotes, and represents the conserved central region of the member proteins.. +PF11916 DUF3434;
Vacuolar protein 14 C-terminal Fig4p binding. PFAM-B_1661 (release 23.0). Vac14 is a scaffold for the Fab1 kinase complex, a complex that allows for the dynamic interconversion of PI3P and PI(3,5)P2p (phosphoinositide phosphate (PIP) lipids, that are generated transiently on the cytoplasmic face of selected intracellular membranes). This interconversion is regulated by at least five proteins in yeast: the lipid kinase Fab1p, lipid phosphatase Fig4p, the Fab1p activator Vac7p, the Fab1p inhibitor Atg18p, and Vac14p, a protein required for the activity of both Fab1p and Fig4p. The C-terminal region of Vac14 binds to Fig4p. The full length Vac14 in yeasts is likely to be a protein carrying a succession of HEAT repeats, most of which have now degenerated. This regulatory system is crucial for the proper functioning of the mammalian nervous system.. +PF11917 Protein of unknown function (DUF3435)
PFAM-B_1788 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 435 to 791 amino acids in length. This family is related to Pfam:PF00589 suggesting it may be an integrase enzyme.. +PF11918 Domain of unknown function (DUF3436)
PFAM-B_18 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF03572. This domain has two conserved sequence motifs: DPRL and SYEP.. +PF11919 Domain of unknown function (DUF3437)
PFAM-B_1910 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 142 to 163 amino acids in length.. +PF11920 Protein of unknown function (DUF3438)
PFAM-B_1942 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 276 to 307 amino acids in length.. +PF11921 Domain of unknown function (DUF3439)
PFAM-B_1105 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 46 to 94 amino acids in length. This domain is found associated with Pfam:PF01462, Pfam:PF00560.. +PF11922 Domain of unknown function (DUF3440)
PFAM-B_1674 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 53 to 190 amino acids in length. This domain is found associated with Pfam:PF01507. This domain has a conserved KND sequence motif.. +PF11923 Domain of unknown function (DUF3441)
PFAM-B_1795 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in archaea and eukaryotes. This domain is typically between 104 to 119 amino acids in length. This domain is found associated with Pfam:PF05833, Pfam:PF05670. This domain has two conserved residues (P and G) that may be functionally important.. +PF11924 Protein of unknown function (DUF3442)
PFAM-B_890 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 334 to 948 amino acids in length.. +PF11925 Protein of unknown function (DUF3443)
PFAM-B_1634 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 400 to 434 amino acids in length. This protein has two conserved sequence motifs: NPV and DNNG.. +PF11926 Domain of unknown function (DUF3444)
PFAM-B_1267 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 210 amino acids in length. This domain is found associated with Pfam:PF00226. This domain has two conserved sequence motifs: FSH and FSH.. +PF11927 Protein of unknown function (DUF3445)
PFAM-B_501 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 264 to 418 amino acids in length. This protein has a conserved RLP sequence motif. This protein has two completely conserved R residues that may be functionally important.. +PF11928 Domain of unknown function (DUF3446)
PFAM-B_833 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 80 to 99 amino acids in length. This domain is found associated with Pfam:PF00096. This domain has a single completely conserved residue P that may be functionally important.. +PF11929 Domain of unknown function (DUF3447)
PFAM-B_10 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00023. This domain has a conserved SHN sequence motif. It seems likely that this region represents divergent Ankyrin repeats.. +PF11931 Domain of unknown function (DUF3449)
PFAM-B_769 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 181 to 207 amino acids in length. This domain has two conserved sequence motifs: PIP and CEICG. The domain carries a zinc-finger domain of the C2H2-type.. +PF11932 Protein of unknown function (DUF3450)
PFAM-B_773 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are about 260 amino acids in length.. +PF11933 Domain of unknown function (DUF3451)
PFAM-B_877 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 199 to 238 amino acids in length. This domain is found associated with Pfam:PF06512, Pfam:PF00520. This domain has a conserved ADD sequence motif.. +PF11934 Domain of unknown function (DUF3452)
PFAM-B_1048 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 124 to 150 amino acids in length. This domain is found associated with Pfam:PF01858, Pfam:PF01857. This domain has a single completely conserved residue W that may be functionally important.. +PF11935 Domain of unknown function (DUF3453)
PFAM-B_1335 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 239 to 261 amino acids in length.. +PF11936 Domain of unknown function (DUF3454)
PFAM-B_1847 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 60 amino acids in length. This domain is found associated with Pfam:PF00066, Pfam:PF00008, Pfam:PF06816, Pfam:PF07684, Pfam:PF00023.. +PF11937 Protein of unknown function (DUF3455)
PFAM-B_1386 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 174 to 251 amino acids in length.. +PF11938 TLR4 regulator and MIR-interacting MSAP
PFAM-B_1799 (release 23.0). This family of proteins, found from plants to humans, is PRAT4 (A and B), a Protein Associated with Toll-like receptor 4. The Toll family of receptors - TLRs - plays an essential role in innate recognition of microbial products, the first line of defence against bacterial infection . PRAT4A influences the subcellular distribution and the strength of TLR responses and alters the relative activity of each TLR. PRAT4B regulates TLR4 trafficking to the cell surface and the extent of its expression there . TLR4 recognizes lipopolysaccharide (LPS), one of the most immuno-stimulatory glycolipids constituting the outer membrane of the Gram-negative bacteria.\. This family has also been described as a SAP-like MIR-interacting protein family.. +PF11939 Protein of unknown function (DUF3457)
PFAM-B_1822 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 162 to 197 amino acids in length. This protein has a conserved CSL sequence motif.. +PF11940 Domain of unknown function (DUF3458)
PFAM-B_160 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 402 to 419 amino acids in length. This domain is found associated with Pfam:PF01433. This domain has a conserved FSAPV sequence motif.. +PF11941 Domain of unknown function (DUF3459)
PFAM-B_897 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 110 amino acids in length. This domain is found associated with Pfam:PF00128, Pfam:PF02922.. +PF11942 Spt5 transcription elongation factor, acidic N-terminal
This is the very acidic N-terminal region of the early transcription elongation factor Spt5 . The Spt5-Spt4 complex regulates early transcription elongation by RNA polymerase II and has an imputed role in pre-mRNA processing via its physical association with mRNA capping enzymes. The actual function of this N-terminal domain is not known although it is dispensable for binding to Spt4 .. +PF11943 Protein of unknown function (DUF3460)
PFAM-B_2362 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 70 amino acids in length. This protein has a conserved WDK sequence motif.. +PF11944 Protein of unknown function (DUF3461)
PFAM-B_2125 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 130 amino acids in length. This protein has two conserved sequence motifs: KFK and HLE.. +PF11945 DUF3462;
WAHD domain of WASH complex. PFAM-B_2071 (release 23.0). This domain forms part of the WASH-complex of domains and proteins that activates the Arp2/3 complex, see Pfam:PF04062. The Arp2/3 complex regulates endocytosis, sorting, and trafficking within the cell. The WAHD domain attaches to the FAM21 proteins via its N-terminal residues and to the microtubules via its C-terminal residues.. +PF11946 Domain of unknown function (DUF3463)
PFAM-B_2277 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 140 amino acids in length. This domain is found associated with Pfam:PF04055. This domain has two conserved sequence motifs: CTPWG and PCYL, plus a highly conserved CxxCxxHC motif.. +PF11947 Protein of unknown function (DUF3464)
PFAM-B_2676 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 137 to 196 amino acids in length.. +PF11948 Protein of unknown function (DUF3465)
PFAM-B_2827 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 131 to 151 amino acids in length. This protein has a conserved HWTH sequence motif.. +PF11949 Protein of unknown function (DUF3466)
PFAM-B_2541 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 564 to 612 amino acids in length.. +PF11950 Protein of unknown function (DUF3467)
PFAM-B_2299 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in bacteria, archaea and viruses. Proteins in this family are typically between 101 to 118 amino acids in length.. +PF11951 DUF3468;
Fungal specific transcription factor domain. PFAM-B_2291 (release 23.0). This family of are likely to be transcription factors. This protein is found in fungi. Proteins in this family are typically between 454 to 826 amino acids in length. This protein is found associated with Pfam:PF00172.. +PF11952 Protein of unknown function (DUF3469)
PFAM-B_2159 (release 23.0). This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 108 to 439 amino acids in length.. +PF11953 Domain of unknown function (DUF3470)
PFAM-B_2503 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00037. This domain has a single completely conserved residue N that may be functionally important.. +PF11954 Domain of unknown function (DUF3471)
PFAM-B_2961 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 98 to 114 amino acids in length. This domain is found associated with Pfam:PF00144.. +PF11955 Plant organelle RNA recognition domain
Moxon SJ, Barkan A, Coggill P. Pfam-B_1780 (release 8.0). This family, which was previously known as DUF860, has been shown to be a component of group II intron ribonucleoprotein particles in maize chloroplasts. The domain is required for the splicing of the introns with which it associates, and promotes splicing in the context of a heterodimer with the RNase III-domain protein RNC1. All of the members are predicted to localise to mitochondria or chloroplasts . It seems likely that most PORR proteins function in organellar RNA metabolism .. +PF11956 Ankyrin-G binding motif of KCNQ2-3
Pfam-B_10256 (release 23.0). Interactions with ankyrin-G are crucial to the localisation of voltage-gated sodium channels (VGSCs) at the axon initial segment and for neurons to initiate action potentials. This conserved 9-amino acid motif ((V/A)P(I/L)AXXE(S/D)D) is required for ankyrin-G binding and functions to localise sodium channels to a variety of 'excitable' membrane domains both inside and outside of the nervous system . This motif has also been identified in the potassium channel 6TM proteins KCNQ2 and KCNQ3 , that correspond to the M channels that exert a crucial influence over neuronal excitability. KCNQ2/KCNQ3 channels are preferentially localised to the surface of axons both at the axonal initial segment and more distally, and this axonal initial segment targeting of surface KCNQ channels is mediated by these ankyrin-G binding motifs of KCNQ2 and KCNQ3 . KCNQ3 is a major determinant of M channel localisation to the AIS, rather than KCNQ2 . Phylogenetic analysis reveals that anchor motifs evolved sequentially in chordates (NaV channel) and jawed vertebrates (KCNQ2/3) .. +PF11957 THO complex subunit 1 transcription elongation factor
Pfam-B_2059 (release 23.0). The THO complex plays a role in coupling transcription elongation to mRNA export. It is composed of subunits THP2, HPR1, THO2 and MFT1 . The THO complex is a nuclear complex that is required for transcription elongation through genes containing tandemly repeated DNA sequences. The THO complex is also part of the TREX (TRanscription EXport) complex that is involved in coupling transcription to export of mRNAs to the cytoplasm [2,3].. +PF11958 Domain of unknown function (DUF3472)
PFAM-B_2598 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria, eukaryotes and viruses. This domain is typically between 174 to 190 amino acids in length. This domain has a single completely conserved residue G that may be functionally important.. +PF11959 Domain of unknown function (DUF3473)
PFAM-B_3065 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 130 amino acids in length. This domain is found associated with Pfam:PF01522. This domain has two completely conserved residues (P and H) that may be functionally important.. +PF11960 Domain of unknown function (DUF3474)
PFAM-B_3095 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 126 to 140 amino acids in length. This domain is found associated with Pfam:PF00487.. +PF11961 Domain of unknown function (DUF3475)
PFAM-B_3098 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 60 amino acids in length. This domain is found associated with Pfam:PF05003.. +PF11962 DUF3476;
Peptidase_G2, IMC autoproteolytic cleavage domain. PFAM-B_3119 (release 23.0). This domain is found at the very C-terminus of bacteriophage parallel beta-helical tailspike proteins. It carries the enzymic residues that induce autoproteolytic cleavage to bring about maturation of the folding process of the helix in a chaperone-like manner. The domain thus mediates the assembly of a large tailspike protein and then releases itself after maturation. These C-terminal regions that autoproteolytically release themselves after maturation are exchangeable between functionally unrelated N-terminal proteins and have been identified in a number of bacteriophage tailspike proteins .. +PF11963 Protein of unknown function (DUF3477)
PFAM-B_3147 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in viruses. Proteins in this family are typically between 246 to 7162 amino acids in length. This protein is found associated with Pfam:PF08716, Pfam:PF01661, Pfam:PF05409, Pfam:PF08717, Pfam:PF01831, Pfam:PF08715, Pfam:PF08710.. +PF11964 DUF3478;
PFAM-B_640 (release 23.0). These proteins adopt an alpha/beta SpoIIAA-like fold, similar to that found in STAT (Pfam:PF01740). They adopt open and closed conformations arising from different arrangements of their alpha-2 and alpha-3 helices. They may be membrane associated and may function as carriers of non-polar compounds .. +PF11965 Domain of unknown function (DUF3479)
PFAM-B_1065 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is about 160 amino acids in length. This domain is found associated with Pfam:PF02514.. +PF11966 Fibronectin-binding repeat
Pfam-B_5998 (release 23.0). Streptococcal surface repeat domain - SSURE - is a protein fragment found to bind to extracellular matrix protein fibronectin but not to collagen or submaxillary mucin in Streptococci. Anti-SSURE antibodies recognised the corresponding protein on the surface of streptococcal cells. The full-length proteins are thus fibronectin-binding surface adhesins.. +PF11967 Recombination protein O N terminal
Recombination protein O (RecO) is involved in DNA repair and Pfam:PF00470 pathway recombination. This domain forms a beta barrel structure.. +PF11968 Putative methyltransferase (DUF3321)
Pfam-B_6141 (release 23.0). This family is broadly conserved, being found in fungi, plants, arthropods and up to primates. it may be a methyltransferase.. +PF11969 Scavenger mRNA decapping enzyme C-term binding
Pfam-B_9894 (release 8.0). This family consists of several scavenger mRNA decapping enzymes (DcpS) and is the C-terminal region. DcpS is a scavenger pyrophosphatase that hydrolyses the residual cap structure following 3' to 5' decay of an mRNA. The association of DcpS with 3' to 5' exonuclease exosome components suggests that these two activities are linked and there is a coupled exonucleolytic decay-dependent decapping pathway. The C-terminal domain contains a histidine triad (HIT) sequence with three histidines separated by hydrophobic residues. The central histidine within the DcpS HIT motif is critical for decapping activity and defines the HIT motif as a new mRNA decapping domain, making DcpS the first member of the HIT family of proteins with a defined biological function.. +PF11970 G protein-coupled glucose receptor regulating Gpa2 C-term
Pfam-B_11657 (release 23.0). Git3 is one of six proteins required for glucose-triggered adenylate cyclase activation, and is a G protein-coupled receptor responsible for the activation of adenylate cyclase through Gpa2 - heterotrimeric G protein alpha subunit, part of the glucose-detection pathway. Git3 contains seven predicted transmembrane domains, a third cytoplasmic loop and a cytoplasmic tail . This family is the conserved C-terminal domain of the member proteins.. +PF11971 CAMSAP CH domain
This domain is the N-terminal CH domain from the CAMSAP proteins.i. +PF11972 HTH DNA binding domain
Pfam-B_8688 (release 14.0). This is a helix-turn-helix DNA binding domain.. +PF11973 NQRA C-terminal domain
Pfam-B_3622 (release 8.0). This family consists of the C-terminal domain of several bacterial Na(+)-translocating NADH-quinone reductase subunit A (NQRA) proteins. The Na(+)-translocating NADH: ubiquinone oxidoreductase (Na(+)-NQR) generates an electrochemical Na(+) potential driven by aerobic respiration .. +PF11974 Alpha-2-macroglobulin MG1 domain
This is the N-terminal MG1 domain from alpha-2-macroglobulin .. +PF11975 Family 4 glycosyl hydrolase C-terminal domain
+PF11976 Ubiquitin-2 like Rad60 SUMO-like
Wood V, Chahwan C, Coggill P. The small ubiquitin-related modifier SUMO-1 is a Ub/Ubl family member, and although SUMO-1 shares structural similarity to Ub, SUMO's cellular functions remain distinct insomuch as SUMO modification alters protein function through changes in activity, cellular localisation, or by protecting substrates from ubiquitination . Rad60 family members contain functionally enigmatic, integral SUMO-like domains (SLDs). Despite their divergence from SUMO, each Rad60 SLD interacts with a subset of SUMO pathway enzymes: SLD2 specifically binds the SUMO E2 conjugating enzyme (Ubc9)), whereas SLD1 binds the SUMO E1 (Fub2, also called Uba2) activating and E3 (Pli1, also called Siz1 and Siz2) specificity enzymes. Structural analysis of PDB:2uyz reveals a mechanistic basis for the near-synonymous roles of Rad60 and SUMO in survival of genotoxic stress and suggest unprecedented DNA-damage-response functions for SLDs in regulating SUMOylation . The Rad60 branch of this family is also known as RENi (Rad60-Esc2-Nip45), and biologically it should be two distinct families SUMO and RENi (Rad60-Esc2-Nip45).. +PF11977 Zc3h12a-like Ribonuclease NYN domain
This domain is found in the Zc3h12a protein which has shown to be a ribonuclease that controls the stability of a set of inflammatory genes . It has been suggested that this domain belongs to the PIN domain superfamily . This domain has also been identified as part of the NYN domain family .. +PF11978 Shoulder domain
This domain is found in the Major Vault Protein and has been called the shoulder domain . This family includes two bacterial proteins Swiss:A6FXE2 and Swiss:A1ZGE7. This suggests that some bacteria may possess vault particles.. +PF11979 Domain of unknown function (DUF3480)
PFAM-B_2031 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 350 to 362 amino acids in length. This domain is found associated with Pfam:PF01363.. +PF11980 Domain of unknown function (DUF3481)
Pfam-B_2819 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00754, Pfam:PF00431, Pfam:PF00629. This domain has two completely conserved residues (Y and E) that may be functionally important.. +PF11981 Domain of unknown function (DUF3482)
PFAM-B_3168 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 289 to 301 amino acids in length. This domain is found associated with Pfam:PF01926. THe central region of these proteins contains a hydrophobic region that is similar to Pfam:PF05433.. +PF11982 Domain of unknown function (DUF3483)
PFAM-B_3204 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 230 amino acids in length. This domain is found associated with Pfam:PF02754.. +PF11983 Domain of unknown function (DUF3484)
PFAM-B_3216 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 65 to 81 amino acids in length. This domain is found associated with Pfam:PF02491.. +PF11984 Protein of unknown function (DUF3485)
PFAM-B_3236 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 223 to 526 amino acids in length. This protein is found associated with Pfam:PF09721.. +PF11985 Protein of unknown function (DUF3486)
PFAM-B_3271 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are about 190 amino acids in length.. +PF11986 Influenza A Proapoptotic protein
PB1-F2 is a protein found in almost all known strains of Influenza A virus - a negative sense ssRNA Orthomyxovirus . It originates from translation of the viral polymerase gene in an alternative reading frame . PB1-F2 consists of two independent structural domains, two closely neighboring short helices at the N terminus, and an extended C-terminal helix . Although the protein has originally been described to induce apoptosis, it has now been shown that PB1-F2 more likely acts as an apoptosis promoter in concert with other apoptosis-inducing agents . PB1-F2 promotes apoptosis by localising to the mitochondria where it destabilises the membrane. This will cause release of cytochrome C which activates the caspase cascade of apoptosis through the endogenous pathway . In this way it acts like the Bcl-2 protein family which are physiological apoptotic regulators in cells .. +PF11987 Translation-initiation factor 2
IF-2 is a translation initiator in each of the three main phylogenetic domains (Eukaryotes , Bacteria and Archaea ). IF2 interacts with formylmethionine-tRNA, GTP, IF1, IF3 and both ribosomal subunits . Through these interactions, IF2 promotes the binding of the initiator tRNA to the A site in the smaller ribosomal subunit and catalyses the hydrolysis of GTP following initiation-complex formation .. +PF11988 Retrograde transport protein Dsl1 N terminal
Dsl1 is a peripheral membrane protein required for transport between the Golgi and the endoplasmic reticulum . It is localised to the ER membrane, and in vitro it specifically binds to coatomer, the major component of the protein coat of COPI vesicles . It is comprised primarily of alpha helical bundles . It complexes with another subunit of the Dsl1p complex called Tip20 which forms heterodimers by pairing the N termini of each protein . A central disorganised region between the N and C termini of Dsl1 contains binding sites for coatomer . The C terminus of Dsl1 contains a binding site to the Sec39 subunit of the Dsl1p complex .. +PF11989 Retrograde transport protein Dsl1 C terminal
Dsl1 is a peripheral membrane protein required for transport between the Golgi and the endoplasmic reticulum . It is localised to the ER membrane, and in vitro it specifically binds to coatomer, the major component of the protein coat of COPI vesicles . Binding sites for coatomer are found on a disorganised region between the C and N termini of Dsl1 . The C terminal domain is involved in binding to the Sec39 subunit of the Dsl1p complex . The N terminal complexes with another subunit of the Dsl1p complex called Tip20 which forms heterodimers by pairing the N termini of each protein .. +PF11990 Protein of unknown function (DUF3487)
PFAM-B_2242 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 121 to 136 amino acids in length. This protein has a conserved RLN sequence motif.. +PF11991 Tryptophan dimethylallyltransferase
PFAM-B_2054 (release 23.0). This family of proteins represents tryptophan dimethylallyltransferase (EC:2.5.1.34), which catalyses the first step of ergot alkaloid biosynthesis . Ergot alkaloids, which are produced by endophyte fungi, can enhance plant host fitness, but also cause livestock toxicosis to host plants. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 390 to 465 amino acids in length.. +PF11992 Domain of unknown function (DUF3488)
PFAM-B_3123 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 323 to 339 amino acids in length. This domain is found associated with Pfam:PF01841. This domain has a conserved PLW sequence motif. This domain contains 6 transmembrane helices.. +PF11993 Ribosomal S4P (gammaproteobacterial)
PFAM-B_3290 (release 23.0). This family of proteins are ribosomal SSU S4 p proteins. This protein is found in gamma-proteobacteria. Proteins in this family are typically between 162 to 178 amino acids in length.. +PF11994 Protein of unknown function (DUF3489)
PFAM-B_3545 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 84 to 211 amino acids in length. This protein has a single completely conserved residue W that may be functionally important.. +PF11995 Domain of unknown function (DUF3490)
PFAM-B_3558 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 160 amino acids in length. This domain is found associated with Pfam:PF00225. This domain is found associated with Pfam:PF00225. This domain has two conserved sequence motifs: EVE and ESA.. +PF11996 Protein of unknown function (DUF3491)
PFAM-B_3393 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 286 to 3225 amino acids in length. This protein is found associated with Pfam:PF04488. This protein is found associated with Pfam:PF04488.. +PF11997 Domain of unknown function (DUF3492)
PFAM-B_2107 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 259 to 282 amino acids in length. This domain is found associated with Pfam:PF00534. This domain has two conserved sequence motifs: GGVS and EHGIY.. +PF11998 Protein of unknown function (DUF3493)
PFAM-B_3788 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 79 to 331 amino acids in length.. +PF11999 Protein of unknown function (DUF3494)
PFAM-B_3080 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 243 to 678 amino acids in length. This protein has a single completely conserved residue G that may be functionally important.. +PF12000 DUF3495;
Gkycosyl transferase family 4 group. PFAM-B_3335 (release 23.0). This domain is found associated with Pfam:PF00534.. +PF12001 Domain of unknown function (DUF3496)
PFAM-B_3407 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 110 amino acids in length.. +PF12002 MgsA AAA+ ATPase C terminal
The MgsA protein possesses DNA-dependent ATPase and ssDNA annealing activities . MgsA contributes to the recovery of stalled replication forks and therefore prevents genomic instability caused by aberrant DNA replication . Additionally, MgsA may play a role in chromosomal segregation . This is consistent with a report that MgsA co-localises with the replisome and affects chromosome segregation . This domain represents the C terminal region of MgsA.. +PF12003 Domain of unknown function (DUF3497)
PFAM-B_3419 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 213 to 257 amino acids in length. This domain is found associated with Pfam:PF02793, Pfam:PF00002, Pfam:PF01825. This domain has a single completely conserved residue W that may be functionally important.. +PF12004 Domain of unknown function (DUF3498)
PFAM-B_3438 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 433 to 538 amino acids in length. This domain is found associated with Pfam:PF00616, Pfam:PF00168. This domain has two conserved sequence motifs: DLQ and PLSFQNP.. +PF12005 Protein of unknown function (DUF3499)
PFAM-B_3439 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 125 to 163 amino acids in length.. +PF12006 Protein of unknown function (DUF3500)
PFAM-B_3479 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 335 to 438 amino acids in length. This protein has a conserved GHH sequence motif. This protein has two completely conserved G residues that may be functionally important.. +PF12007 Protein of unknown function (DUF3501)
PFAM-B_3488 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are about 200 amino acids in length. The structure of protein Swiss:Q63J81 from B. pseudomallei has been solved. This protein contains two domains, domain I (1:31, 46:81) is a helical domain, domain II (32:45,82-193) is a mainly beta protein with a beta barrel. According to crystal contacts the proteins probably functions as a dimer. The gene neighbourhood analysis suggests that this protein may be functionally related to rubrerythrin and ferredoxin. The wedge surface between the two domains might be functionally important. The fold of this protein could best be described as a circularly permuted C2-like fold (details derived from TOPSAN).. +PF12008 Type I restriction and modification enzyme - subunit R C terminal
This enzyme has been characterised and shown to belong to a new family of the type I class of restriction and modification enzymes. This family is involved in bacterial defence by making double strand breaks in specific double stranded DNA sequences, e.g. that of invading bacteriophages. EcoR124 is made up of three subunits, HsdR, HsdS and HsdM. The R subunit has ATPase and restriction endonuclease activity. This domain is the C terminal of the R subunit .. +PF12009 Telomerase ribonucleoprotein complex - RNA binding domain
Telomeres in most organisms are comprised of tandem simple sequence repeats . The total length of telomeric repeat sequence at each chromosome end is determined in a balance of sequence loss and sequence addition . One major influence on telomere length is the enzyme telomerase . It is a reverse transcriptase that adds these simple sequence repeats to chromosome ends by copying a template sequence within the RNA component of the enzyme . The RNA binding domain of telomerase - TRBD - is made up of twelve alpha helices and two short beta sheets . How telomerase and associated regulatory factors physically interact and function with each other to maintain appropriate telomere length is poorly understood. It is known however that TRBD is involved in formation of the holoenzyme (which performs the telomere extension) in addition to recognition and binding of RNA .. +PF12010 Domain of unknown function (DUF3502)
PFAM-B_3448 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 140 amino acids in length. This domain is found associated with Pfam:PF01547.. +PF12011 Domain of unknown function (DUF3503)
PFAM-B_2686 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in viruses. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF00271.. +PF12012 Domain of unknown function (DUF3504)
PFAM-B_2196 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 156 to 173 amino acids in length.. +PF12013 Protein of unknown function (DUF3505)
PFAM-B_2856 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 247 to 1018 amino acids in length. This region contains two segments that are likely to be C2H2 zinc binding domains.. +PF12014 Domain of unknown function (DUF3506)
PFAM-B_3293 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 131 to 148 amino acids in length. This domain has a conserved KLTGD sequence motif.. +PF12015 Domain of unknown function (DUF3507)
PFAM-B_3482 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 180 amino acids in length. This domain has a conserved ENL sequence motif.. +PF12016 Stonin 2
Stonin 2 is involved in clathrin mediated endocytosis . It binds to Eps15 by its highly conserved NPF motif. The complex formed has been shown to directly associate with the clathrin adaptor complex AP-2, and to localize to clathrin-coated pits (CCPs) . In addition, stonin2 was recently identified as a specific sorting adaptor for synaptotagmin, and may thus regulate synaptic vesicle recycling .. +PF12017 Transposase_37;
PFAM-B_3357 (release 23.0). Protein in this family are transposases found in insects. This region is about 230 amino acids in length and is found associated with Pfam:PF05485.. +PF12018 Domain of unknown function (DUF3508)
PFAM-B_3527 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 280 amino acids in length. This domain has two conserved sequence motifs: GFC and GLL. This family is also known as UPF0704.. +PF12019 Type II transport protein GspH
GspH is involved in bacterial type II export systems . Like all pilins, GspH has an N terminus alpha helix . This helix is followed by nine beta strands forming two beta sheets, one of five antiparallel strands and one of four antiparallel strands . GspH is a minor pseudopilin; it is expressed much less than other pseudopilins in the type II secretion pilus (major pilins) . The function and localisation of minor pseudo-pilins are still to be fully unraveled . It has been suggested that some minor pseudopilins may assemble either into the base or the tip of pili, or both. They function as initiators or regulators of pilus biogenesis and dynamics, and/or as adaptors between various pseudopilin component and other members of the T2SS .. +PF12020 TAFA family
PFAM-B_3899 (release 23.0). This family of secreted proteins are brain specific and thought to be chemokines . These proteins are found in vertebrates. Proteins in this family are typically between 94 to 133 amino acids in length and contain a number of conserved cysteines.. +PF12021 Protein of unknown function (DUF3509)
PFAM-B_2180 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 92 to 110 amino acids in length. This protein has two completely conserved residues (G and R) that may be functionally important.. +PF12022 Domain of unknown function (DUF3510)
PFAM-B_2857 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 130 amino acids in length. This domain is found associated with Pfam:PF06148.. +PF12023 Domain of unknown function (DUF3511)
PFAM-B_3314 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain has two completely conserved residues (Y and K) that may be functionally important.. +PF12024 Domain of unknown function (DUF3512)
PFAM-B_3525 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 231 to 249 amino acids in length. This domain is found associated with Pfam:PF00439.. +PF12025 Phage protein C
PFAM-B_3530 (release 23.0). This family of phage proteins is functionally uncharacterised. Proteins in this family are typically between 68 to 86 amino acids in length.. +PF12026 Domain of unknown function (DUF3513)
PFAM-B_3541 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 192 to 218 amino acids in length. This domain is found associated with Pfam:PF00018, Pfam:PF08824. This domain has a conserved QPP sequence motif.. +PF12027 Protein of unknown function (DUF3514)
PFAM-B_3570 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 368 to 823 amino acids in length.. +PF12028 Protein of unknown function (DUF3515)
PFAM-B_3590 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 166 to 214 amino acids in length. This protein has a conserved RCG sequence motif.. +PF12029 Domain of unknown function (DUF3516)
PFAM-B_3601 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 460 to 473 amino acids in length. This domain is found associated with Pfam:PF00270, Pfam:PF00271.. +PF12030 Domain of unknown function (DUF3517)
PFAM-B_3933 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 340 amino acids in length. This domain is found associated with Pfam:PF00443.. +PF12031 Domain of unknown function (DUF3518)
PFAM-B_3830 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 260 amino acids in length. This domain is found associated with Pfam:PF01388.. +PF12032 Regulatory CLIP domain of proteinases
CLIP is a regulatory domain which controls the proteinase action of various proteins of the trypsin family, e.g. easter and pap2. The CLIP domain remains linked to the protease domain after cleavage of a conserved residue which retains the protein in zymogen form. It is named CLIP because it can be drawn in the shape of a paper clip. It has many disulphide bonds and highly conserved cysteine residues, and so it folds extensively. . +PF12033 Protein of unknown function (DUF3519)
PFAM-B_2444 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 117 to 1154 amino acids in length. This protein has a single completely conserved residue Q that may be functionally important.. +PF12034 Domain of unknown function (DUF3520)
PFAM-B_3604 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 180 amino acids in length. This domain is found associated with Pfam:PF00092.. +PF12035 Protein of unknown function (DUF3521)
PFAM-B_3612 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 42 to 74 amino acids in length.. +PF12036 Protein of unknown function (DUF3522)
PFAM-B_3665 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 220 to 787 amino acids in length.. +PF12037 Domain of unknown function (DUF3523)
PFAM-B_3746 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 257 to 277 amino acids in length. This domain is found associated with Pfam:PF00004. This domain has a conserved LER sequence motif.. +PF12038 Domain of unknown function (DUF3524)
PFAM-B_3749 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF00534. This domain has two conserved sequence motifs: HENQ and FNS. This domain has a single completely conserved residue S that may be functionally important.. +PF12039 Protein of unknown function (DUF3525)
PFAM-B_3833 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in viruses. Proteins in this family are about 360 amino acids in length.. +PF12040 Domain of unknown function (DUF3526)
PFAM-B_3851 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 149 to 170 amino acids in length. This domain has a single completely conserved residue P that may be functionally important.. +PF12041 Transcriptional regulator DELLA protein N terminal
Gibberellins are plant hormones which have great impact on growth signalling. DELLA proteins are transcriptional regulators of growth related proteins which are downregulated when gibberellins bind to their receptor GID1. GID1 forms a complex with DELLA proteins and signals them towards 26S proteasome. The N terminal of DELLA proteins contains conserved DELLA and VHYNP motifs which are important for GID1 binding and proteolysis of the DELLA proteins. . +PF12042 Tubuliform egg casing silk strands structural domain
Spiders use fibroins to make silk strands. This family includes tubuliform silk fibroins which are used to protect egg cases. This domain is a structural domain which is found in repeats of up to 20 in many individuals (although this is not necessarily the case). RP1 makes up structural domains in the N terminal while RP2 makes up structural domains in the C terminal. . +PF12043 Domain of unknown function (DUF3527)
PFAM-B_3945 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 120 amino acids in length. This domain has a conserved CDCGGWD sequence motif.. +PF12044 Putative peptidase family
PFAM-B_3942 (release 23.0). This family of proteins is functionally uncharacterised. However, it does contain an HEXXH motif characteristic of metallopeptidases. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 625 to 773 amino acids in length.. +PF12045 Protein of unknown function (DUF3528)
PFAM-B_3981 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 185 to 298 amino acids in length. This protein is found associated with Pfam:PF00046.. +PF12046 Protein of unknown function (DUF3529)
PFAM-B_3346 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 176 to 190 amino acids in length.. +PF12047 Cytosine specific DNA methyltransferase replication foci domain
This domain is part of a cytosine specific DNA methyltransferase enzyme. It functions non-catalytically to target the protein towards replication foci. This allows the DNMT1 protein to methylate the correct residues. This domain targets DMAP1 and HDAC2 to the replication foci during the S phase of mitosis. They are thought to have some importance in conversion of critical histone lysine moieties. . +PF12048 Protein of unknown function (DUF3530)
PFAM-B_2450 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 272 to 336 amino acids in length. These proteins are distantly related to alpa/beta hydrolases so they may act as enzymes.. +PF12049 Protein of unknown function (DUF3531)
PFAM-B_2583 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 149 to 199 amino acids in length.. +PF12051 Protein of unknown function (DUF3533)
PFAM-B_2348 (release 23.0). This family of transmembrane proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 393 to 772 amino acids in length.. +PF12052 Voltage gated calcium channel subunit beta domain 4Aa N terminal
The beta subunit of voltage gated calcium channels is coded for by four genes 1-4. Gene 4 can produce two types of beta4A domain (beta4Aa and beta4Ab) according to how the gene splicing is carried out. This family is part of the beta4Aa N terminal domain. It is made up of an alpha helix and a beta strand. It is thought to regulate the channel properties through protein-protein interactions with non Ca channel proteins. . +PF12053 Domain of unknown function (DUF3534)
PFAM-B_2753 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 150 amino acids in length. This domain is found associated with Pfam:PF00595. This domain has a conserved GILD sequence motif.. +PF12054 Domain of unknown function (DUF3535)
PFAM-B_2858 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 439 to 459 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF02985, Pfam:PF00176. This domain has two completely conserved residues (P and K) that may be functionally important.. +PF12055 Domain of unknown function (DUF3536)
PFAM-B_3129 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 274 to 285 amino acids in length. This domain is found associated with Pfam:PF03065.. +PF12056 Protein of unknown function (DUF3537)
PFAM-B_3199 (release 23.0). This family of transmembrane proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 427 to 453 amino acids in length.. +PF12057 Domain of unknown function (DUF3538)
PFAM-B_3373 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 120 amino acids in length. This domain is found associated with Pfam:PF00240. This domain has a conserved SDL sequence motif.. +PF12058 Protein of unknown function (DUF3539)
PFAM-B_3564 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 90 amino acids in length. This protein has a conserved NHP sequence motif.. +PF12059 Protein of unknown function (DUF3540)
PFAM-B_2948 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 212 to 238 amino acids in length. This protein has a conserved SCL sequence motif.. +PF12060 Domain of unknown function (DUF3541)
PFAM-B_2172 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 230 amino acids in length.. +PF12061 Protein of unknown function (DUF3542)
PFAM-B_2032 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in eukaryotes and viruses. Proteins in this family are typically between 516 to 1283 amino acids in length. This protein is found associated with Pfam:PF00931.. +PF12062 heparan sulfate-N-deacetylase
PFAM-B_2134 (release 23.0). This family of proteins is are heparan sulfate N-deacetylase enzymes. This protein is found in eukaryotes. This proteinenzyme is often found associated with Pfam:PF00685.. +PF12063 Domain of unknown function (DUF3543)
PFAM-B_2213 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 217 to 291 amino acids in length. This domain is found associated with Pfam:PF00069. This domain has a single completely conserved residue A that may be functionally important.. +PF12064 Domain of unknown function (DUF3544)
PFAM-B_3553 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 198 to 216 amino acids in length. This domain is found associated with Pfam:PF00628, Pfam:PF01753, Pfam:PF00439, Pfam:PF00855.. +PF12065 Protein of unknown function (DUF3545)
PFAM-B_3270 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 60 to 77 amino acids in length. This protein has two completely conserved residues (R and L) that may be functionally important.. +PF12066 Domain of unknown function (DUF3546)
PFAM-B_3237 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 93 to 114 amino acids in length. This domain has two completely conserved Y residues that may be functionally important.. +PF12067 DUF3547;
Sox C-terminal transactivation domain. PFAM-B_3310 (release 23.0). This domain is found at the C-terminus of the Sox family of transcription factors. It is found associated with Pfam:PF00505. It binds to the Armadillo repeats (Pfam:PF00514) in Catenin beta-1 (CTNNB1), which is involved in transcriptional regulation . It functions as a transactivating domain (TAD) .. +PF12068 Domain of unknown function (DUF3548)
PFAM-B_3247 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 184 to 216 amino acids in length. This domain is found associated with Pfam:PF00566. This domain is found at the N-terminus of GYP7 proteins.. +PF12069 Protein of unknown function (DUF3549)
PFAM-B_2034 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 340 amino acids in length. This protein has a conserved LDE sequence motif.. +PF12070 Protein of unknown function (DUF3550/UPF0682)
PFAM-B_2472 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 249 to 606 amino acids in length.. +PF12071 Protein of unknown function (DUF3551)
PFAM-B_3610 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 79 to 104 amino acids in length. This protein has a single completely conserved residue C that may be functionally important.. +PF12072 Domain of unknown function (DUF3552)
PFAM-B_3508 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is about 200 amino acids in length. This domain is found associated with Pfam:PF00013, Pfam:PF01966. This domain has a single completely conserved residue A that may be functionally important.. +PF12073 Protein of unknown function (DUF3553)
PFAM-B_3361 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 60 amino acids in length. This protein has two conserved sequence motifs: GQVQS and TVNF.. +PF12074 Domain of unknown function (DUF3554)
PFAM-B_2029 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 287 to 356 amino acids in length. This domain is found associated with Pfam:PF02985.. +PF12075 KN motif
PFAM-B_3795 (release 23.0). This small motif is found at the N-terminus of Kank proteins and has been called the KN (for Kank N-terminal) motif. This protein is found in eukaryotes. Proteins in this family are typically between 413 to 1202 amino acids in length. This protein is found associated with Pfam:PF00023. This protein has two conserved sequence motifs: TPYG and LDLDF. Kank1 was obtained by positional cloning of a tumor suppressor gene in renal cell carcinoma, while the other members were found by homology search. The family is involved in the regulation of actin polymerization and cell motility through signaling pathways containing PI3K/Akt and/or unidentified modulators/effectors .. +PF12076 WAX2 C-terminal domain
Pfam-B_3756 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF04116. This domain has a conserved LEGW sequence motif. This region has similarity to short chain dehydrogenases .. +PF12077 Transmembrane protein of unknown function (DUF3556)
PFAM-B_2567 (release 23.0). This family of transmembrane proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 576 to 592 amino acids in length.. +PF12078 Domain of unknown function (DUF3557)
PFAM-B_2154 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 150 amino acids in length.. +PF12079 Protein of unknown function (DUF3558)
PFAM-B_3489 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 177 to 195 amino acids in length.. +PF12080 GldM C-terminal domain
PFAM-B_3275 (release 23.0). This domain is found in bacteria at the C-terminus of the GldM protein. This domain is typically between 169 to 182 amino acids in length. This domain has two completely conserved residues (Y and N) that may be functionally important. GldM, is named for the member from Cytophaga johnsonae (Flavobacterium johnsoniae), which is required for a type of rapid gliding motility found in certain members of the Bacteriodetes .. +PF12081 GldM N-terminal domain
PFAM-B_3275 (release 23.0). This domain is found in bacteria at the N-terminus of the GldM protein. This domain is typically between 169 to 182 amino acids in length. This domain has two completely conserved residues (Y and N) that may be functionally important. GldM, is named for the member from Cytophaga johnsonae (Flavobacterium johnsoniae), which is required for a type of rapid gliding motility found in certain members of the Bacteriodetes .. +PF12083 Domain of unknown function (DUF3560)
PFAM-B_2138 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 120 amino acids in length. This domain has a conserved GHHSE sequence motif.. +PF12084 Protein of unknown function (DUF3561)
PFAM-B_2401 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 110 amino acids in length.. +PF12085 Protein of unknown function (DUF3562)
PFAM-B_3549 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 62 to 84 amino acids in length. This protein has two completely conserved residues (A and Y) that may be functionally important.. +PF12086 Protein of unknown function (DUF3563)
PFAM-B_3639 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 50 amino acids in length. This protein has conserved AYL and DLE sequence motifs.. +PF12087 Protein of unknown function (DUF3564)
PFAM-B_3736 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 118 to 142 amino acids in length. This protein has a conserved WSRE sequence motif.. +PF12088 Protein of unknown function (DUF3565)
PFAM-B_3033 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 30 to 78 amino acids in length. This protein has two conserved sequence motifs: WVA and CGH.. +PF12089 Transmembrane domain of unknown function (DUF3566)
PFAM-B_3331 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 136 to 304 amino acids in length. This region represents a transmembrane region found at the C-terminus of the proteins.. +PF12090 Spt20 family
PFAM-B_3386 (release 23.0). This presumed domain is found in the Spt20 proteins from both human and yeast. The Spt20 protein is part of the SAGA complex which is a large cmplex mediating histone deacetylation. Yeast Spt20 has been shown to play a role in structural integrity of the SAGA complex as as no intact SAGA could be purified in spt20 deletion strains.. +PF12091 Protein of unknown function (DUF3567)
PFAM-B_3056 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 90 amino acids in length. This protein has a conserved EIVDK sequence motif.. +PF12092 Protein of unknown function (DUF3568)
PFAM-B_3573 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 130 amino acids in length.. +PF12093 Coronavirus NS8 protein
Pfam-B_2038 (Release 23.0). This family of proteins is functionally uncharacterised. This protein is found in coronaviruses. Proteins in this family are typically between 39 to 121 amino acids in length. This protein has two conserved sequence motifs: EDPCP and INCQ.. +PF12094 Protein of unknown function (DUF3570)
PFAM-B_3745 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 396 to 444 amino acids in length.. +PF12095 Protein of unknown function (DUF3571)
PFAM-B_3506 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 85 to 97 amino acids in length.. +PF12096 Protein of unknown function (DUF3572)
PFAM-B_2130 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 100 amino acids in length.. +PF12097 Protein of unknown function (DUF3573)
PFAM-B_2286 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 372 to 530 amino acids in length.. +PF12098 Protein of unknown function (DUF3574)
PFAM-B_3542 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 144 to 163 amino acids in length. This protein has a conserved TPRF sequence motif.. +PF12099 Protein of unknown function (DUF3575)
PFAM-B_2229 (release 23.0). This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family are typically between 187 to 236 amino acids in length.. +PF12100 Domain of unknown function (DUF3576)
PFAM-B_2102 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 100 amino acids in length. This domain has a single completely conserved residue G that may be functionally important.. +PF12101 Protein of unknown function (DUF3577)
PFAM-B_2116 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 143 to 307 amino acids in length.. +PF12102 Domain of unknown function (DUF3578)
PFAM-B_2328 (release 23.0). This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 177 to 191 amino acids in length.. +PF12103 Surface lipoprotein of Spirochaetales order
Lipl32 is an outer membrane surface lipoprotein of Leptospira like bacteria.. +PF12104 T cell CD4 receptor C terminal region
This domain is the C terminal domain of the CD4 T cell receptor. The C terminal domain is the cytoplasmic domain which relays the signal for T cell activation. This process involves co-receptor internalisation. This domain is involved in binding to the N terminal of Lck co-receptor in a Zn2+ clasp structure.. +PF12105 SpoU, rRNA methylase, C-terminal
PFAM-B_2024 (release 23.0). This domain is found in bacteria. This domain is about 60 amino acids in length. This domain is found in association with Pfam:PF00588. This domain has a conserved LFE sequence motif. Some members of the Pfam family SpoU_methylase, Pfam:PF00588, carry this very distinctive sequence region at their extreme C-terminus. The exact function of this region is not known.. +PF12106 Colicin C terminal ribonuclease domain
Colicin is a protein produced by bacteria with Col plasmids. Its function is to attack E. coli through actions on its inner membrane ion channels or through ribonuclease or deoxyribonuclease actions. The C terminal domain is the ribonuclease domain. It specifically cleaves tRNA anticodons which recognise codons in the form NAY (N:any nucleotide, A:adenosine, Y:pyrimidine) which corresponds to Tyrosine, Histidine, Asparagine and Aspartic Acid. E5-CRD can be referred to as an RNA restriction enzyme that specifically recognizes and cleaves single-stranded GU sequences. . +PF12107 Plasminogen (Pg) ligand in fibrinolytic pathway
Pg is an important mediator of angiostatin production in the fibrinolytic pathway. Pg is made up of five subunit kringle molecules (Pg-K1 to Pg-K5), of which the first three make the protein angiostatin. VEK-30 is a domain of the group A streptococcal protein PAM. It binds to Pg-K2 of angiostatin and activates the molecule to mediate its anti-angiogenic effects. VEK-30 binds to angiostatin via a C terminal lysine with argininyl and glutamyl side chain residues known as a 'through space isostere'. . +PF12108 Splicing factor SF3a60 binding domain
This domain is found in eukaryotes. This domain is about 30 amino acids in length. This domain has a single completely conserved residue Y that may be functionally important. SF3a60 makes up the SF3a complex with SF3a66 and SF3a120. This domain is the binding site of SF3a60 for SF3a120. The SF3a complex is part of the spliceosome, a protein complex involved in splicing mRNA after transcription.. +PF12109 CXCR4 Chemokine receptor N terminal
CXCR4 and its ligand stromal cell-derived factor-1 (a.k.a. CXCL12) are essential for proper fetal development. CXCR4 is also the major coreceptor for T-tropic strains of human immunodeficiency virus 1 (HIV-1), and SDF-1 inhibits HIV-1 infection. Additionally, SDF-1 and CXCR4 mediate cancer cell migration and metastasis. The N terminal domain of most chemokine receptors is the ligand binding domain and so the N terminal domain of CXCR4 is the binding site for SDF-1. . +PF12110 Nuclear protein 96
Nup96 (often known by the name of its yeast homolog Nup145C) is part of the Nup84 heptameric complex in the nuclear pore complex. Nup96 complexes with Sec13 in the middle of the heptamer. The function of the heptamer is to coat the curvature of the nuclear pore complex between the inner and outer nuclear membranes. Nup96 is predicted to be an alpha helical solenoid. The interaction between Nup96 and Sec13 is the point of curvature in the heptameric complex. . +PF12111 Polyribonucleotide phosphorylase C terminal
PNPase regulates the expression of small non-coding RNAs that control expression of outer-membrane proteins. The enzyme also affects complex processes, such as the tissue-invasive virulence of Salmonella enterica and the regulation of a virulence-factor secretion system in Yersinia. In Escherichia coli, PNPase is involved in the quality control of ribosomal RNA precursors and is required for growth following cold shock. This family contains the C terminal protomer domain of the PNPase core. The function of the C terminal protomer is to catalyse phosphorolysis through its two active sites. . +PF12112 Protein of unknown function (DUF3579)
PFAM-B_2027 (release 23.0). This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 98 to 126 amino acids in length. This protein has a conserved FRP sequence motif.. +PF12113 SVM protein signal sequence
PFAM-B_2829 (release 23.0). This region is presumed to be a signal peptide sequence found in Sequence-variable mosaic (SVM) proteins . This domain is found in phytoplasmas. This presumed signal sequence is about 30 amino acids in length.. +PF12114 Period protein 2/3C-terminal region
PFAM-B_2037 (release 23.0). This domain is found in eukaryotes. This domain is typically between 164 to 200 amino acids in length. This domain is found associated with Pfam:PF08447.. +PF12115 Salivary protein of 15kDa inhibits CD4+ T cell activation
PFAM-B_2039 (release 23.0). This is a family of 15kDa salivary proteins from Acari Arachnids that is induced on feeding and assists the parasite to remain attached to its arthropod host. By repressing calcium fluxes triggered by TCR engagement, Salp15 inhibits CD4+ T cell activation. Salp15 shows weak similarity to Inhibin A, a member of the TGF-beta superfamily that inhibits the production of cytokines and the proliferation of T cells.. +PF12116 Stage III sporulation protein D
PFAM-B_2045 (release 23.0). This stage III sporulation protein is a small DNA-binding family that is essential for gene expression of the mother-cell compartment during sporulation. The domain is found in bacteria and viruses, and is about 40 amino acids in length. It has a conserved RGG sequence motif.. +PF12117 DUF_B2046;
Protein of unknown function (DUF3580). PFAM-B_2046 (release 23.0). This domain is found in viruses, and is about 120 amino acids in length. It is found in association with Pfam:PF01057.. +PF12118 SprA-related family
PFAM-B_2057 (release 23.0). This protein is found in bacteria. Proteins in this family are typically between 234 to 465 amino acids in length. There is a conserved GEV sequence motif.Most members are annotated as being SprA-related.. +PF12119 Protein of unknown function (DUF3581)
PFAM-B_2081 (release 23.0). This protein is found in bacteria. Proteins in this family are about 240 amino acids in length.. +PF12120 DNApol_Rpb2_rif; RNApol_Rpb2_rif;
Rifampin ADP-ribosyl transferase. This protein is found in bacteria. Proteins in this family are typically between 136 to 150 amino acids in length. The opportunistic pathogen Mycobacterium smegmatis is resistant to rifampin because of the presence of a chromosomally encoded rifampin ADP-ribosyltransferase (Arr-ms). Arr-ms is a small enzyme whose activity thus renders rifamycin antibiotics ineffective .. +PF12121 Dermaseptin
This protein is found in eukaryotes. Proteins in this family are typically between 30 to 76 amino acids in length. This protein is found associated with Pfam:PF03032. This domain is part of a dermaseptin protein which is used as an antimicrobial agent. The full protein is almost completely defined in an alpha helical domain. It creates high levels of disorder at the level of the phospholipid head group of bacterial membranes suggesting that it partitions into the bilayer where it severely disrupts membrane packing.. +PF12122 Protein of unknown function (DUF3582)
PFAM-B_2087 (release 23.0). This domain is found in bacteria, and is approximately 130 amino acids in length. It is found associated with Pfam:PF01694. There is a conserved ASW sequence motif. This domain has a single completely conserved residue F that may be functionally important.. +PF12123 N-acetylmuramoyl-l-alanine amidase
This domain is found in bacteria and viruses. This domain is about 50 amino acids in length. This domain is classified with the enzyme classification code EC:3.5.1.28. This domain is the C terminal of the enzyme which hydrolyses the link between N-acetylmuramoyl residues and L-amino acid residues in certain cell-wall glycopeptides.. +PF12124 Coronavirus polyprotein cleavage domain
This domain is found in SARS coronaviruses, and is about 70 amino acids in length. It is found associated with various other coronavirus proteins due to the polyprotein nature of most viral translation. PL2pro is a domain of the non-structural protein nsp3. The domain performs three of the cleavages required to separate the translated polyprotein into its distinct proteins.. +PF12125 D domain of beta-TrCP
This domain is found in eukaryotes, and is approximately 40 amino acids in length. It is found associated with Pfam:PF00646, Pfam:PF00400. The protein that contains this domain functions as a ubiquitin ligase. Ubiquitination is required to direct proteins towards the proteasome for degradation. This protein is part of the WD40 class of F box proteins. The D domain of these F box proteins is involved in mediating the dimerisation of the protein. Dimerisation is necessary to polyubiquitinate substrates so this D domain is vital in directing substrates towards the proteasome for degradation.. +PF12126 Protein of unknown function (DUF3583)
PFAM-B_2092 (release 23.0). This domain is found in eukaryotes, and is typically between 302 and 338 amino acids in length. It is found in association with Pfam:PF00097 and Pfam:PF00643. Most members are promyelocytic leukemia proteins, and this family lies towards the C-terminus.. +PF12127 SigmaW regulon antibacterial
PFAM-B_2114 (release 23.0). This protein is found in bacteria. Proteins in this family are about 330 amino acids in length. The operon from which this protein is derived confers immunity for the host species to a broad range of antibacterial compounds, unlike the specific immunity proteins that are linked to and co-regulated with their antibiotic-synthesis proteins.. +PF12128 Protein of unknown function (DUF3584)
PFAM-B_2142 (release 23.0). This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 943 to 1234 amino acids in length. This family contains a P-loop motif suggesting it is a nucleotide binding protein. It may be involved in replication.. +PF12129 Male germ-cell putative homeodomain transcription factor
PFAM-B_2140 (release 23.0). This domain is found in bacteria and eukaryotes, and is typically between 101 and 140 amino acids in length. Phtf proteins do not display any sequence similarity to known or predicted proteins, but their conservation among species suggests an essential function. The 84 kDa Phtf1 protein is an integral membrane protein, anchored to a cell membrane by six to eight trans-membrane domains, that is associated with a domain of the endoplasmic reticulum (ER) juxtaposed to the Golgi apparatus. It is present during meiosis and spermiogenesis, and, by the end of spermiogenesis, is released from the mature spermatozoon within the residual bodies . Phtf1 enhances the binding of FEM1B -feminisation homologue 1B - to cell membranes. Fem-1 was initially identified in the signaling pathway for sex determination, as well as being implicated in apoptosis, but its biochemical role is still unclear, and neither FEM1B nor PHTF1 is directly implicated in apoptosis in spermatogenesis. It is the ANK domain of FEM1B that is necessary for the interaction with the N-terminal region of Phtf1 .. +PF12130 Protein of unknown function (DUF3585)
PFAM-B_2156 (release 23.0). This domain is found in eukaryotes. This domain is typically between 135 and 149 amino acids in length and is found associated with Pfam:PF00307.. +PF12131 Protein of unknown function (DUF3586)
PFAM-B_2164 (release 23.0). This domain is found in eukaryotes. This domain is about 80 amino acids in length and is found associated with Pfam:PF08246, and Pfam:PF00112.. +PF12132 Protein of unknown function (DUF3587)
PFAM-B_2181 (release 23.0). This protein is found in viruses. Proteins in this family are typically between 209 and 248 amino acids in length.. +PF12133 Open reading frame 6 from SARS coronavirus
PfamB-2188 (release 23.0). This family is found in Coronaviruses. Proteins in this family are typically between 42 to 63 amino acids in length.. +PF12134 PRP8 domain IV core
This domain is found in eukaryotes, and is about 20 amino acids in length. It is found associated with Pfam:PF10597, Pfam:PF10596, Pfam:PF10598, Pfam:PF08083, Pfam:PF08082, Pfam:PF01398, Pfam:PF08084. There is a conserved LILR sequence motif. The domain is a selenomethionine domain in a subunit of the spliceosome. The function of PRP8 domain IV is believed to be interaction with the splicosomal core.. +PF12135 Sialidase enzyme penultimate C terminal domain
This domain is found in bacteria and eukaryotes, and is about 30 amino acids in length. The protein from which this domain is found is a sialidase enzyme which is used by virulent bacteria as a toxin. It is the penultimate C terminal domain.. +PF12136 RNA polymerase Rpo13 subunit HTH domain
This domain is found in archaea, and is about 40 amino acids in length. It has a single completely conserved residue E that may be functionally important. It is found in the archaeal DNA dependent RNA polymerase. The domain is a 'helix-turn-helix' (HTH) domain in the Rpo13 subunit of the RNA polymerase. This domain is involved in downstream DNA binding, and the entire subunit has also been implicated in contacting transcription factor II B.. +PF12137 RNA polymerase recycling family C-terminal
PFAM-B_2201 (release 23.0). This domain is found in bacteria. This domain is about 360 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF00176. The function of this domain is not known, but structurally it forms an alpha-beta fold in nature with a central beta-sheet flanked by helices and loops, the beta-sheet being mainly antiparallel and flanked by four alpha helices, among which the two longer helices exhibit a coiled-coil arrangement.. +PF12138 Spherulation-specific family 4
PFAM-B_2198 (release 23.0). This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 250 and 398 amino acids in length. There is a conserved NPG sequence motif and there are two completely conserved G residues that may be functionally important. Starvation will often induce spherulation - the production of spores - and this process may involve DNA-methylation. Changes in the methylation of spherulin4 are associated with the formation of spherules, but these changes are probably transient. Methylation of the gene accompanies its transcriptional activation, and spherulin4 mRNA is only detectable in late spherulating cultures and mature spherules. It is a spherulation-specific protein.. +PF12139 Adenosine-5'-phosphosulfate reductase beta subunit
PFAM-B_2202 (release 23.0). This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 112 to 142 amino acids in length. This family is found in association with Pfam:PF00037, and has a conserved FPIRTT sequence motif. The whole beta subunit has the enzymic properties of EC:1.8.99.2.. +PF12140 Protein of unknown function (DUF3588)
PFAM-B_2205 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 866 amino acids in length, and the family is found in association with Pfam:PF02820. The exact function of this family is not known.. +PF12141 Protein of unknown function (DUF3589)
PFAM-B_2240 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 541 and 717 amino acids in length. The function of this family is not known,. +PF12142 Polyphenol oxidase middle domain
PFAM-B_2219 (release 23.0). This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length, and the family is found in association with Pfam:PF00264. Most members are annotated as being polyphenol oxidases, and many are from plants or plastids. There is a conserved DWL sequence motif which gives the family its name.. +PF12143 Protein of unknown function (DUF_B2219)
PFAM-B_2219 (release 23.0). This domain family is found in eukaryotes, and is typically between 138 and 152 amino acids in length. and the family is found in association with Pfam:PF00264. Many members are plant or plastid polyphenol oxidases, and there is a highly conserved sequence motif: KFDV, from which the name derives. This is the C-terminal domain of these oxidases.. +PF12144 Eukaryotic Mediator 12 catenin-binding domain
PFAM-B_2250 (release 23.0). +PF12145 Eukaryotic Mediator 12 subunit domain
PFAM-B_2250 (release 23.0). This domain is found in eukaryotes, and is typically between 325 and 354 amino acids in length. The function of this particular region of the Mediator subunit Med12 is not known, but there is a conserved sequence motif: LCEWAV, from which the name derives.. +PF12146 Putative lysophospholipase
PFAM-B_2243 (release 23.0). This domain is found in bacteria and eukaryotes and is approximately 110 amino acids in length. It is found in association with Pfam:PF00561. Many members are annotated as being lysophospholipases, and others as alpha-beta hydrolase fold-containing proteins.. +PF12147 Hydrolase_5;
Putative methyltransferase. PFAM-B_2243 (release 23.0). This domain is found in bacteria and eukaryotes and is approximately 110 amino acids in length. It is found in association with Pfam:PF00561. The family shows homology to methyltransferases.. +PF12148 Protein of unknown function (DUF3590)
This domain is found in eukaryotes, and is typically between 83 and 97 amino acids in length. It is found in association with Pfam:PF00097, Pfam:PF02182, Pfam:PF00628, Pfam:PF00240. There are two conserved sequence motifs: RAR and NYN. The domain is part of the protein NIRF which has zinc finger and ubiquitinating domains. The function of this domain is likely to be mainly structural, however this has not been confirmed.. +PF12149 Herpes simplex virus virion protein 16 C terminal
This domain is found in viruses, and is about 30 amino acids in length. It is found in association with Pfam:PF02232. This domain is the C terminal of the HSV virion protein 16. This protein is a transcription promoter. The C terminal domain is the carboxyl subdomain of the acidic transcriptional activation domain. The protein binds to DNA binding proteins to carry out its function. Such proteins include TATA binding protein, CBP, TBP-binding protein, etc.. +PF12150 Cytosolic motility protein
This domain family is found in eukaryotes, and is approximately 50 amino acids in length. These proteins are found in nematodes. They complex with MSP (major sperm protein) to allow motility. Their action is quite similar to the action of bacterial actin molecules.. +PF12151 Mannan-binding protein
This domain family is found in bacteria, and is approximately 40 amino acids in length, There is a single completely conserved residue G that may be functionally important. The domain occurs in two types of proteins. In mannan binding proteins , it forms a homodimeric molecule which complexes into a homo-octamer. In thiamidases it occurs without repeats but in the presence of other domains. MVL is distinct amongst other oligomannoside binding proteins in that it exhibits specificity for certain tetrasaccharides. Each molecule of MVL has four distinct carbohydrate binding sites.. +PF12152 Eukaryotic translation initiation factor 4G1
This domain is found in eukaryotes, and is about 80 amino acids in length. It is found in association with Pfam:PF02854. This domain is part of the protein eIF_4G. It binds to eIF_4E by wrapping around its N terminal to form the eIF_4F complex. This complex binds various eIF_4E-BPs (binding proteins) to regulate initiation of translation.. +PF12153 LPS binding domain of CAP18 (C terminal)
This domain family is found in eukaryotes, and is approximately 30 amino acids in length, and the family is found in association with Pfam:PF00666. CAP18 is a protein which is derived from rabbit granulocytes. It has two domains, an N terminal DUF and a C terminal Gram negative LPS binding domain. This domain is the C terminal domain.. +PF12154 Glycoprotein B N-terminal antigenic domain of HCMV
PFAM-B_2260 (release 23.0). This domain is found in viruses, and is approximately 40 amino acids in length. The domain is found in association with Pfam:PF00606. There are two conserved sequence motifs: SVS and TSS. This family is the amino-terminal antigenic domain of glycoprotein B of human cytomegalovirus.. +PF12155 NADH dehydrogenase subunit 2 N-terminal
PFAM-B_2270 (release 23.0). This domain is found in eukaryotes, and is approximately 90 amino acids in length. It is found associated with Pfam:PF00361. All members are annotated as being NADH dehydrogenase subunit 2, and this region is the N-terminus.. +PF12156 Putative metal-binding domain of cation transport ATPase
PFAM-B_2287 (release 23.0). This domain is found in bacteria, and is approximately 90 amino acids in length. It is found associated with Pfam:PF00403, Pfam:PF00122, Pfam:PF00702. The cysteine-rich nature and composition suggest this might be a cation-binding domain; most members are annotated as being cation transport ATPases.. +PF12157 Protein of unknown function (DUF3591)
PFAM-B_2298 (release 23.0). This domain is found in eukaryotes and is typically between 445 to 462 amino acids in length. Most members are annotated as being transcription initiation factor TFIID subunit 1, and this region is the conserved central portion of these proteins.. +PF12158 Protein of unknown function (DUF3592)
PFAM-B_2016 (release 23.0). This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 150 and 242 amino acids in length.. +PF12159 Protein of unknown function (DUF3593)
PFAM-B_2028 (release 23.0). This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 98 and 228 amino acids in length. There is a conserved LHG sequence motif.. +PF12160 Fibrinogen alpha C domain
This domain family is found in eukaryotes, and is approximately 70 amino acids in length, and the family is found in association with Pfam:PF08702. This domain is the C terminal domain of fibrinogen in mammals. The domain lies in the C terminal half of the alpha C region in these proteins. The function of the domain is that of intramolecular and intermolecular interactions to form fibrin.. +PF12161 HsdM N-terminal domain
PFAM-B_2036 (release 23.0). This domain is found at the N-terminus of the methylase subunit of Type I DNA methyltransferases. This domain family is found in bacteria and archaea, and is typically between 123 and 138 amino acids in length. The family is found in association with Pfam:PF02384. Mutations in this region of EcoKI methyltransferase Swiss:P08957 abolish the normally strong preference of this system for methylating hemimethylated substrate . The structure of this domain has been shown to be all alpha-helical.. +PF12162 STAT1 TAZ2 binding domain
This domain family is found in eukaryotes, and is approximately 20 amino acids in length, and the family is found in association with Pfam:PF02865, Pfam:PF00017, Pfam:PF01017, Pfam:PF02864. This domain is the C terminal domain of STAT1. This domain binds selectively to the TAZ2 domain of CRB (CREB-binding protein). In this process it becomes a transcriptional activator and can initiate transcription of certain genes.. +PF12163 DNA replication regulator
This family of proteins is found exclusively in epsilon-proteobacteria. Proteins in this family are approximately 180 amino acids in length. The structure of HobA is a modified Rossmann fold consisting of a five-stranded parallel beta-sheet (beta1-5) flanked on one side by alpha-2, alpha-3 and alpha-6 helices and alpha-4 and alpha-5 on the other. The alpha-1 helix is extended away from and has minimal interaction with the globular part of the protein. Four monomers interact to form a tetrameric molecule. Four calcium atoms bind to the tetramer and these binding sites may have functional relevance. The function of HobA is to regulate DNA replication and its does this by binding to DNA-A, but the exact mechanism of how this regulation occurs is purely speculative. +PF12164 SporV_proteinAA;
Stage V sporulation protein AA. This domain family is found in bacteria - primarily Firmicutes, and is approximately 90 amino acids in length. There is a single completely conserved residue G that may be functionally important. Most annotation associated with this domain suggests that it is involved in the fifth stage of sporulation, however there is little publication to back this up.. +PF12165 Domain of unknown function (DUF3594)
PFAM-B_2040 (release 23.0). This presumed domain is functionally uncharacterised.This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00628.. +PF12166 Protein of unknown function (DUF3595)
PFAM-B_2166 (release 23.0). This family of proteins is functionally uncharacterised.This family of proteins is found in eukaryotes. Proteins in this family are typically between 578 and 2525 amino acids in length.. +PF12167 Domain of unknown function (DUF3596)
PFAM-B_2234 (release 23.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00589. It is likely that this domain acts to bind DNA.. +PF12168 DNA polymerase III subunits tau domain IV DnaB-binding
PFAM-B_2296 (release 23.0). This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau.. +PF12169 DNA polymerase III subunits gamma and tau domain III
PFAM-B_2296 (release 23.0). This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau.. +PF12170 DNA polymerase III tau subunit V interacting with alpha
PFAM-B_2296 (release 23.0). This domain family is found in bacteria, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau. The extreme C-terminal region of this domain 5 is the part which interacts with the alpha subunit of the DNA polymerase III holoenzyme.. +PF12171 Zinc-finger double-stranded RNA-binding
PFAM-B_2302 (release 23.0). This domain family is found in archaea and eukaryotes, and is approximately 30 amino acids in length. The mammalian members of this group occur multiple times along the protein, joined by flexible linkers, and are referred to as JAZ - dsRNA-binding ZF protein - zinc-fingers. The JAZ proteins are expressed in all tissues tested and localise in the nucleus, particularly the nucleolus. JAZ preferentially binds to double-stranded (ds) RNA or RNA/DNA hybrids rather than DNA. In addition to binding double-stranded RNA, these zinc-fingers are required for nucleolar localisation.. +PF12172 Rubredoxin-like zinc ribbon domain (DUF35_N)
Pfam-B_1390 (release 4.2). This domain has no known function and is found in conserved hypothetical archaeal and bacterial proteins. The domain is duplicated in Swiss:O53566. The structure of a DUF35 representative reveals two long N-terminal helices followed by a rubredoxin-like zinc ribbon domain represented in this family and a C-terminal OB fold domain. Zinc is chelated by the four conserved cysteines in the alignment.. +PF12173 Bacteriocin class IIc cyclic gassericin A-like
PfamB_170026 (release 23.0), manual. This class of bacteriocins was previously described as class V. The members include gassericin A, acidocin B and butyrovibriocin AR10, all of which are hydrophobic cyclical structures . The N- and C-termini are covalently linked, and the circular molecule is resistant to several proteases and peptidases . The immunity protein that protects Lactobacillus gasseri from the toxic effects of its bacteriocin, gassericin A, has been identified. It is found to be a small positively-charged hydrophobic peptide of 53 amino acids containing a putative transmembrane segment - a structure unlike that of the more common immunity proteins as found in Pfam:PF08951.. +PF12174 RCD1-SRO-TAF4 (RST) plant domain
Overmyer K, Coggill P. Pfam-B_8550 (release 23.0). This domain is found in plant RCD1, SRO and TAF4 proteins, hence its name of RST. It is required for interaction with multiple plant transcription factors. Radical-Induced Cell Death1 (RCD1) is an important regulator of stress and hormonal and developmental responses in Arabidopsis thaliana, as is its closest homologue, SRO1 - Similar To RCD-One1. TBP-Associated Factor 4 (TAF4) and TAF4-b are components of the transcription initiation factor complex TFIID.. +PF12175 White spot syndrome virus structural envelope protein VP
This family of proteins is found in viruses. Proteins in this family are approximately 210 amino acids in length. There is a conserved NNT sequence motif. These proteins are structural envelope proteins in viruses. This is the beta barrel C terminal domain. There is a protruding N terminal domain which completes the proteins. Three of four envelope proteins in white spot syndrome virus share sequence homology with each other and are present in this family - VP24, VP26 and VP28. VP19 is the other major envelope protein but shares no sequence homology with the other proteins. These proteins are essential for entry into cells of the crustacean host.. +PF12176 Methanol-cobalamin methyltransferase B subunit
This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 460 amino acids in length. MtaB folds as a TIM barrel and contains a novel zinc-binding motif. Zinc(II) lies at the bottom of a funnel formed at the C-terminal beta-barrel end and ligates to two cysteinyl sulfurs (Cys-220 and Cys-269) and one carboxylate oxygen (Glu-164). The function of this protein is to catalyse the cleavage of the C O bond in methanol by an SN2 mechanism. It complexes with MtaA and MtaC to perform this function.. +PF12177 Prohormone convertase enzyme
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF01483, Pfam:PF00082. There are two completely conserved residues (Y and D) that may be functionally important. This protein is the C terminal domain of a prohormone convertase enzyme which targets hormones in dense core secretory granules. This C terminal tail domain is the domain responsible for targeting these dense core secretory granules. The domain adopts an alpha helical structure. . +PF12178 Chromosome passenger complex (CPC) protein INCENP N terminal
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. INCENP is a regulatory protein in the chromosome passenger complex. It is involved in regulation of the catalytic protein Aurora B. It performs this function in association with two other proteins - Survivin and Borealin. These proteins form a tight three-helical bundle. The N terminal domain is the domain involved in formation of this three helical bundle.. +PF12179 I-kappa-kinase-beta NEMO binding domain
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00069. These proteins are involved in inflammatory reactions. They cause release of NF-kappa-B into the nucleus of inflammatory cells and upregulation of transcription of proinflammatory cytokines. They perform this function by phosphorylating I-kappa-B proteins which are targeted for degradation to release NF-kappa-B. This kinase (I-kappa-kinase-beta) is found in association with IKK-alpha and NEMO (NF-kappa-B essential modulator). This domain is the binding site of IKK-beta for NEMO.. +PF12180 TSG101 and ALIX binding domain of CEP55
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. This domain is the active domain of CEP55. CEP55 is a protein involved in cytokinesis, specifically in abscission of the plasma membrane at the midbody. To perform this function, CEP55 complexes with ESCRT-I (by a Proline rich sequence in its TSG101 domain) and ALIX. This is the domain on CEP55 which binds to both TSG101 and ALIX. It also acts as a hinge between the N and C termini. This domain is called EABR.. +PF12181 DNA binding domain of the motility gene repressor (MogR)
This domain family is found in bacteria, and is approximately 150 amino acids in length. MogR is involved in repression of transcription of the flagellar gene in Listeria bacteria. This allows a phenotypical switch from an extracellular bacterium to an intracellular pathogen. MogR binds AT rich flagellar gene promoter regions upstream of the flagellar gene. These regions follow the pattern 5'-TTTTNNNNNAAAA-3'. This domain is the DNA binding domain of MogR.. +PF12182 Lipoprotein;
Bacterial lipoprotein. This domain family is found in bacteria, and is approximately 60 amino acids in length. There is a single completely conserved Y residue that may be functionally important. This domain is from a bacterial lipoprotein, a major virulence factor in Gram negative bacteria.. +PF12183 Restriction endonuclease NotI
This family of proteins is found in bacteria. Proteins in this family are typically between 270 and 341 amino acids in length. There is a conserved CPF sequence motif. The type IIP restriction enzyme, NotI, is a homodimer that recognizes the 8 bp DNA sequence 5'-GC/GGCCGC-3' and cleaves both strands of DNA to create 5', 4 base cohesive overhangs.. +PF12185 Nup358/RanBP2 E3 ligase domain
This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00638, Pfam:PF00641, Pfam:PF00160. There are two conserved sequence motifs: TFFC and EDF. Nup358/RanBP2 is a nucleoporin involved in ubiquitination of many different protein targets from various cellular pathways. It complexes with Ubc9, SUMO-1 and RanGAP1 to perform this function. This is the ligase domain which binds to Ubc9.. +PF12186 Acyl-CoA dehydrogenase C terminal
This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF02770, Pfam:PF00441, Pfam:PF02771. There is a conserved ARRL sequence motif. The C terminal domain is an alpha helical domain. The flavin ring of Acyl-CoA dehydrogenase is buried in the crevice between the two alpha helical domains and the beta-sheet domain of one subunit, and the adenosine pyrophosphate moiety is stretched into the subunit junction of a neighbouring subunit, composed of two C terminal domains.. +PF12187 Viral/Archaeal nuclease
This family of proteins is found in archaea and viruses. Proteins in this family are typically between 211 and 244 amino acids in length. These proteins are nucleases from fusseloviruses and sulfolobus archaea.. +PF12188 STAT2_Cterm;
Signal transducer and activator of transcription 2 C terminal. This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF02865, Pfam:PF00017, Pfam:PF01017, Pfam:PF02864. There is a conserved DLP sequence motif. STATs are involved in transcriptional regulation and are the only regulators known to be modulated by tyrosine phosphorylation. STAT2 forms a trimeric complex with STAT1 and IRF-9 (Interferon Regulatory Factor 9), on activation of the cell by interferon, which is called ISGF3 (Interferon-stimulated gene factor 3). The C terminal domain of STAT2 contains a nuclear export signal (NES) which allows export of STAT2 into the cytoplasm along with any complexed molecules.. +PF12189 Single-strand DNA-binding protein
This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved IELE sequence motif. VirE1 is an acidic chaperone protein which binds to VirE2, a ssDNA binding protein. These proteins are virulence factors of the plant pathogens Agrobacteria. VirE1 competes for the ssDNA binding site of VirE2.. +PF12190 Fungal protease inhibitor
This protein family is found in eukaryotes, and is approximately 50 amino acids in length. These proteins are fungal protease inhibitors.. +PF12191 Tumour necrosis factor receptor stn_TNFRSF12A_TNFR domain
This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 184 amino acids in length. This is the stn_TNFRSF12A_TNFR domain from the tumour necrosis factor receptor. The function of this domain is unknown.. +PF12192 Fungal calcium binding protein
This domain is found in eukaryotes, and is approximately 60 amino acids in length. There is a single completely conserved residue C that may be functionally important. This is a calcium binding domain from the fungal protein CBP (calcium binding protein). This protein is a virulence factor with unknown virulence mechanisms. CBP complexes as a highly intertwined homodimer. Each monomer is comprised of four alpha helices which adopt the saposin fold, characteristic of a protein family that binds to membranes and lipids.. +PF12193 Sulf_coatprot_C;
Sulfolobus virus coat protein C terminal. This domain family is found in viruses, and is approximately 70 amino acids in length. It is the C terminal of a coat protein in sulfolobus viruses.. +PF12194 ste5minscaffold;
Protein kinase Fus3-binding. This domain family is found in eukaryotes, and is approximately 190 amino acids in length. This domain is the penultimate C terminal domain from the protein ste5 which co-catalyses the phosphorylation of fus3 by ste7. It is involved in the MAPK pathways. This domain is the minimal scaffold domain of ste5. It binds to the mitogen activated protein kinase fus3 before it is phosphorylated.. +PF12195 Endosialidase; Endobetabarrel;
Beta barrel domain of bacteriophage endosialidase. This domain family is found in bacteria and viruses, and is approximately 80 amino acids in length.This domain is the beta barrel domain of bacteriophage endosialidase which represents the one of the two sialic acid binding sites of the enzyme. The domain is nested in the beta propeller domain of the endosialidase enzyme. The endosialidase protein complexes to form homotrimeric molecules.. +PF12196 FHA Ki67 binding domain of hNIFK
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00076. There are two conserved sequence motifs: TPVCTP and LERRKS. This domain is found on the human nucleolar protein hNIFK. It binds to the fork-head-associated domain of human Ki67. High-affinity binding requires sequential phosphorylation by two kinases, CDK1 and GSK3, yielding pThr238, pThr234 and pSer230. This interaction is involved in cell cycle regulation.. +PF12197 Bacillus cereus group antimicrobial protein
This domain is found in bacteria, and is approximately 40 amino acids in length. This domain is found in bacillus cereus group bacteria. It is an antimicrobial protein.. +PF12198 Theoretical tuberculin protein
This domain family is found in bacteria, and is approximately 30 amino acids in length. This protein is a theoretical model of the tuberculin protein from Mycobacterium tuberculosis.. +PF12199 Extracellular fibrinogen binding protein C terminal
This domain family is found in bacteria, and is approximately 70 amino acids in length. There is a conserved VLK sequence motif. It is the C terminal domain of bacterial extracellular fibrinogen binding protein. It contains a helical motif involved in complement regulation. This motif binds to complement and changes its conformation to a form which cannot activate downstream components of the complement cascade.. +PF12200 Domain of unknown function (DUF3597)
This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 126 and 281 amino acids in length. The function of this domain is unknown. The structure of this domain has been found to contain five helices with a long flexible loop between helices one and two.. +PF12201 Bcl2-interacting killer, BH3-domain containing
This is a family of pro-apoptotic Bcl-x proteins, B cell leukaemia/lymphoma 2, or BIKs. BIK proteins rely for their activity upon an intact BH3 domain lying between residues 48 and 80, as in UniProt:Q13323.. +PF12202 Oxidative-stress-responsive kinase 1 C terminal
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00069. There is a single completely conserved residue F that may be functionally important. OSR1 is involved in the signalling cascade which activates Na/K/2Cl cotransporter during osmotic stress. This domain is the C terminal domain of OSR1 which recognises a motif (Arg-Phe-Xaa-Val) on the OSR1-activating protein WNK1.. +PF12203 Glutamine rich N terminal domain of histone deacetylase 4
This domain is found in eukaryotes, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00850. The domain forms an alpha helix which complexes to form a tetramer. The glutamine rich domains have many intra- and inter-helical interactions which are thought to be involved in reversible assembly and disassembly of proteins. The domain is part of histone deacetylase 4 (HDAC4) which removes acetyl groups from histones. This restores their positive charge to allow stronger DNA binding thus restricting transcriptional activity.. +PF12204 Domain of unknown function (DUF3598)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 230 and 398 amino acids in length. These proteins are formed entirely from B sheets which form a barrel structure similar to those seen in the lipocalin superfamily.. +PF12205 G protein-coupled receptor kinase-interacting protein 1 C term
This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF01412, Pfam:PF00023, Pfam:PF08518. GIT1 plays an important role in cell adhesion, motility, cytoskeletal remodeling and membrane trafficking. To perform this function, it localises p21-activated kinase (PAK) and PAK-interactive exchange factor to focal adhesions. Its activation is regulated by interaction between its paxillin-binding C terminal and the LD motifs of paxillin. The C terminal folds into a four helix bundle.. +PF12206 Domain of unknown function (DUF3599)
This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. This domain is the phage-like element pbsx protein xkdh.. +PF12207 Domain of unknown function (DUF3600)
This family of proteins is found in bacteria. Proteins in this family are approximately 230 amino acids in length. This domain is the C terminal of the putative ecf-type sigma factor negative effector.. +PF12208 Domain of unknown function (DUF3601)
This domain family is found in bacteria, and is approximately 80 amino acids in length.. +PF12209 Leucine permease transcriptional regulator helical domain
This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF03399. This domain is a helical domain in the middle of leucine permease transcriptional regulator.. +PF12210 Hepatocyte growth factor-regulated tyrosine kinase substrate
This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00790, Pfam:PF01363, Pfam:PF02809. This domain is the helical region of Hrs which forms the core complex of ESCRT with STAM.. +PF12211 Low molecular weight S layer protein N terminal
This family of proteins is found in bacteria. Proteins in this family are typically between 328 and 381 amino acids in length. There is a conserved LGDG sequence motif. Clostridial species have a layer of surface proteins surrounding their membrane. This layer is comprised of a high molecular weight protein and a low molecular weight protein. This domain is the N terminal domain of the low molecular weight protein. It is a structural domain.. +PF12212 Piwi/Argonaute/Zwille siRNA-binding domain
This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. There is a conserved LKDIL sequence motif. There is a single completely conserved residue L that may be functionally important. This domain is part of an Argonaute protein. It is an siRNA binding domain.. +PF12213 DNA polymerases epsilon N terminal
This domain is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF04042. There is a single completely conserved residue F that may be functionally important. This domain is the N terminal domain of DNA polymerase epsilon subunit B. It forms a primarily alpha helical structure in which four helices are arranged in two hairpins with connecting loops containing beta strands which form a short parallel sheet. DNA polymerase epsilon is required in DNA replication for synthesis of the leading strand. This domain has close structural relation to AAA+ protein C terminal domains.. +PF12214 Cell cycle regulated microtubule associated protein
PFAM-B_2368 (release 23.0). This domain is found in eukaryotes. This domain is typically between 127 to 182 amino acids in length. This domain is found associated with Pfam:PF06886. This domain is found in the protein TPX2 (a.k.a p100) which is involved in cell cycling. It is only expressed between the start of the S phase and completion of cytokinesis. The microtubule-associated protein TPX2 has been reported to be crucial for mitotic spindle formation. This domain is close to the C terminal of TPX2. The protein importin alpha regulates the activity of TPX2 by binding to the nuclear localisation signal in this domain.. +PF12215 beta-Glucocerebrosidase 2 N terminal
PFAM-B_2416 (release 23.0). This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 320 to 354 amino acids in length. This domain is found associated with Pfam:PF04685. This domain is found in the protein beta-Glucocerebrosidase 2. It is found just after the extreme N terminus. This protein is located in the ER. The N terminal is thought to be the luminal domain while the C terminal is the cytosolic domain. The catalytic domain of GBA-2 is unknown.. +PF12216 Immune evasion protein
PFAM-B_2496 (release 23.0). This protein is found in archaea and viruses. Proteins in this family are typically between 265 to 342 amino acids in length. The proteins in this family are or are related to the m04 encoded protein gp34 of pathogenic microorganisms such as murine cytomegalovirus. m06 and m152 genes are expressed earlier in the intracellular replication phases of these microorganism' life cycles. They function to inhibit MHC-1 loading and export. gp34 is theorized to prevent immune reactions from NK cells which would ordinarily recognise and attack cells lacking MHC.. +PF12217 Catalytic beta propeller domain of bacteriophage endosialidase
This domain family is found in bacteria and viruses, and is typically between 443 and 460 amino acids in length. This domain is the highly conserved beta propeller of bacteriophage endosialidase which represents the catalytically active part of the enzymes. This core domain forms stable SDS-resistant trimers. There is a nested beta barrel domain in this domain (Pfam:PF12195). The endosialidase protein complexes to form a homotrimeric molecule.. +PF12218 N terminal extension of bacteriophage endosialidase
This domain family is found in bacteria and viruses, and is approximately 70 amino acids in length. This domain is found in the bacteriophage protein endosialidase. The two N-terminal domains (this domain and the beta propeller) assemble in the compact 'cap' whereas the C-terminal domain forms an extended tail-like structure. The very N-terminal part of the 'cap' region (residues 246 to 312) holds the only alpha-helix of the protein and is presumably the residual part of the deleted N-terminal head-binding domain. The endosialidase protein complexes to form homotrimeric molecules.. +PF12219 Catalytic domain of bacteriophage endosialidase
This domain family is found in bacteria and viruses, and is approximately 160 amino acids in length. There are two conserved sequence motifs: VSR and YGA. This domain is the C terminal domain of the bacteriophage protein endosialidase. The endosialidase protein forms homotrimeric molecules and this domain complexes into a tail-spike stalk. The stalk region folds in a triple beta-helix that is interrupted by a small triple beta-prism domain. The tail-spike is a multifunctional protein device used by the phage to fulfill the following functions: (i) to adsorb to the bacterial polySia capsule (ii) to de-polymerise the capsule to gain access to the outer bacterial membrane, and finally (iii) to mediate tight adhesion to the membrane, a prerequisite for the initiation of the infection cycle.. +PF12220 U1 small nuclear ribonucleoprotein of 70kDa MW N terminal
PFAM-B_2533 (release 23.0). This domain is found in eukaryotes. This domain is about 90 amino acids in length. This domain is found associated with Pfam:PF00076. This domain is part of U1 snRNP, which is the pre-mRNA binding protein of the penta-snRNP spliceosome complex. It extends over a distance of 180 A from its RNA binding domain, wraps around the core domain of U1 snRNP consisting of the seven Sm proteins and finally contacts U1-C, which is crucial for 5'-splice-site recognition.. +PF12221 Bacterial membrane protein N terminal
PFAM-B_2550 (release 23.0). This domain is found in bacteria. This domain is typically between 65 to 81 amino acids in length. This domain is found associated with Pfam:PF01145. This domain is the N terminal of the bacterial membrane protein HflK. HflK complexes with HflC to form a membrane protease which is modulated by the GTPase HflX. The N terminal domain of HflK is the membrane spanning region which anchors the protein in the bacterial membrane.. +PF12222 Peptide N-acetyl-beta-D-glucosaminyl asparaginase amidase A
PFAM-B_2578 (release 23.0). This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 558 and 775 amino acids in length. There is a conserved TGG sequence motif. PNGase A is a protein which cleaves glycopeptides.. +PF12223 Protein of unknown function (DUF3602)
PFAM-B_2582 (release 23.0). This domain family is found in eukaryotes, and is typically between 78 and 89 amino acids in length.. +PF12224 Putative amidoligase enzyme
This family of proteins are likely to act as amidoligase enzymes Protein in this family are found in conserved gene neighborhoods encoding a glutamine amidotransferase-like thiol peptidase (in proteobacteria) or an Aig2 family cyclotransferase protein (in firmicutes) .. +PF12225 Methylene-tetrahydrofolate reductase C terminal
PFAM-B_2600 (release 23.0). This family is found in bacteria and archaea, and is approximately 100 amino acids in length. There is a conserved NGPCGG sequence motif. This family is the C terminal of methylene-tetrahydrofolate reductase. This protein reduces FAD using the reducing equivalents from reduced FAD, subsequently reduces tetrahydrofolate. The C terminal of MTHFR contains the FAD binding site and is the catalytic portion of the enzyme.. +PF12226 Turkey astrovirus capsid protein
PFAM-B_2608 (release 23.0). This family of proteins is found in viruses. Proteins in this family are typically between 241 and 261 amino acids in length. These proteins are capsid proteins from various astrovirus strains.. +PF12227 Protein of unknown function (DUF3603)
PFAM-B_2609 (release 23.0). This protein is found in bacteria and eukaryotes. Proteins in this family are about 250 amino acids in length.. +PF12228 Protein of unknown function (DUF3604)
PFAM-B_2610 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 621 and 693 amino acids in length.. +PF12229 Putative peptidoglycan binding domain
+PF12230 Pre-mRNA splicing factor PRP21 like protein
PFAM-B_2642 (release 23.0). This domain family is found in eukaryotes, and is typically between 212 and 238 amino acids in length. The family is found in association with Pfam:PF01805. There are two completely conserved residues (W and H) that may be functionally important. PRP21 is required for assembly of the prespliceosome and it interacts with U2 snRNP and/or pre-mRNA in the prespliceosome. This family also contains proteins similar to PRP21, such as the mammalian SF3a. SF3a also interacts with U2 snRNP from the prespliceosome, converting it to its active form.. +PF12231 Rap1-interacting factor 1 N terminal
Assefa S, Gavin OL, Eberhardt R. PFAM-B_2647 (release 23.0). This domain family is found in eukaryotes, and is typically between 135 and 146 amino acids in length. Rif1 is a protein which interacts with Rap1 to regulate telomere length. Interaction with telomeres limits their length. The N terminal region contains many HEAT- and ARMADILLO- type repeats. These are helical folds which form extended curved proteins or RNA interface surfaces.. +PF12232 Myogenic determination factor 5
PFAM-B_2654 (release 23.0). This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00010, Pfam:PF01586. There is a conserved CSD sequence motif. Myf5 is responsible for directing cells to the skeletal myocyte lineage during development. Myf5 is likely to act in a similar way to the other MRF4 proteins such as MyoD which perform the same function. These are histone acetyltransferases and histone deacetylases which activate and repress genes involved in the myocyte lineage.. +PF12233 Human adult T cell leukemia/lymphoma virus protein
PFAM-B_2655 (release 23.0). This family of proteins is found in viruses. Proteins in this family are approximately 100 amino acids in length. p12I binds to the immature beta and gamma-c chains of the interleukin-2 receptor retarding their translocation to the plasma membrane. p12I forms dimers which bind to these chains.. +PF12234 RAVE protein 1 C terminal
PFAM-B_2692 (release 23.0). This domain family is found in eukaryotes, and is typically between 621 and 644 amino acids in length. This family is the C terminal region of the protein RAVE (regulator of the ATPase of vacuolar and endosomal membranes). Rav1p is involved in regulating the glucose dependent assembly and disassembly of vacuolar ATPase V1 and V0 subunits.. +PF12235 Fragile X-related 1 protein C terminal
PFAM-B_2701 (release 23.0). This domain family is found in eukaryotes, and is typically between 126 and 160 amino acids in length. The family is found in association with Pfam:PF05641, Pfam:PF00013. This family is the C terminal region of the fragile X related 1 protein FXR1P. FXR1P contains two KH domains and a RGG box that are characteristic motifs in RNA-binding proteins as well as nuclear localization and export signals. FXR1P is thought to regulate mRNA transport and translation.. +PF12236 Bacteriophage head to tail connecting protein
PFAM-B_2709 (release 23.0). This family of head-tail connector proteins is found in bacteria and viruses. Proteins in this family are typically between 516 and 555 amino acids in length. This protein is found in Phage T7 and T3 among others.. +PF12237 Phosphorylated CTD interacting factor 1 WW domain
PFAM-B_2805 (release 23.0). This domain family is found in bacteria and eukaryotes, and is approximately 180 amino acids in length. This domain is the WW domain of PCIF1. PCIF1 interacts with phosphorylated RNA polymerase II carboxy-terminal domain (CTD). The WW domain of PCIF1 can directly and preferentially bind to the phosphorylated CTD compared to the unphosphorylated CTD. PCIF1 binds to the hyperphosphorylated RNAP II (RNAP IIO) in vitro and in vivo. Double immunofluorescence labeling in HeLa cells demonstrated that PCIF1 and endogenous RNAP IIO are co-localized in the cell nucleus. Thus, PCIF1 may play a role in mRNA synthesis by modulating RNAP IIO activity.. +PF12238 Merozoite surface antigen 2c
PFAM-B_2755 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 263 and 318 amino acids in length. There is a conserved SFT sequence motif. MSA-2 is a plasma membrane glycoprotein which can be found in Babesia bovis species.. +PF12239 Protein of unknown function (DUF3605)
PFAM-B_2795 (release 23.0). This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 161 and 256 amino acids in length.. +PF12240 Angiomotin C terminal
PFAM-B_2808 (release 23.0). This domain family is found in eukaryotes, and is typically between 197 and 211 amino acids in length. This family is the C terminal region of angiomotin. Angiomotin regulates the action of angiogenesis inhibitor angiostatin . The C terminal region of angiomotin appears to be involved in directing the protein chemotactically .. +PF12241 Trans-2-enoyl-CoA reductase catalytic region
Vella Briffa B, Coggill P. Pfam-B_10602 (release 10.0). This family of trans-2-enoyl-CoA reductases, EC:1.3.1.44, carries the the catalytic sites of the enzyme, characterised by the conserved sequence motifs: YNThhhFxK, and YShAPxR. In Euglena where the enzyme has been characterised it catalyses the reduction of enoyl-CoA to acyl-CoA in an unusual fatty acid pathway in mitochondria. the whole path performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation.. +PF12242 NAD(P)H binding domain of trans-2-enoyl-CoA reductase
Vella Briffa B, Coggill P. Pfam-B_10602 (release 10.0). This family carries the region of the enzyme trans-2-enoyl-CoA reductase, EC:1.3.1.44, which binds NAD(P)H. The activity of the enzyme was characterised in Euglena where an unusual fatty acid synthesis path-way in the mitochondria performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. The full enzyme catalyses the reduction of enoyl-CoA to acyl-CoA. The binding site is conserved as GA/CSpGYG, where p is any polar residue .. +PF12243 CTD kinase subunit gamma CTK3
Pfam-B_12814 (release 23.0). The C-terminal domain kinase (CTDK-1), is a three-subunit complex comprised of Ctk1, Ctk2, and Ctk3, that plays a key role in regulation of transcription and translation and in coordinating these two processes. Both Ctk2 and Ctk3 are regulated at the level of protein turnover, and are unstable proteins processed through a ubiquitin-proteasome pathway. Their physical interaction is required to protect both subunits from degradation, and both Ctk2 and Ctk3 are required for Ctk1 CTD kinase activation . The mammalian P-TEFb is mirrored by the combined complexes in yeast of the CTDK1 and the Bur1/2 .. +PF12244 Protein of unknown function (DUF3606)
PFAM-B_2813 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 85 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF12245 DUF3607;
Bacterial Ig-like domain (group 3). PFAM-B_2816 (release 23.0). This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins.. +PF12246 Temperature dependent protein affecting M2 dsRNA replication
PFAM-B_2862 (release 23.0). This domain family is found in eukaryotes, and is typically between 231 and 255 amino acids in length. There is a single completely conserved residue P that may be functionally important. MKT1 is required for maintenance of K2 toxin above 30 degrees C in strains with the L-A-HN variant of the L-A double-stranded RNA virus of Saccharomyces cerevisiae. MKT1 is a 93 kDa protein with serine-rich regions and the retroviral protease signature, DTG. This family is the C terminal region of MKT1.. +PF12247 Temperature dependent protein affecting M2 dsRNA replication
PFAM-B_2862 (release 23.0). This domain family is found in eukaryotes, and is typically between 231 and 255 amino acids in length. There is a single completely conserved residue P that may be functionally important. MKT1 is required for maintenance of K2 toxin above 30 degrees C in strains with the L-A-HN variant of the L-A double-stranded RNA virus of Saccharomyces cerevisiae. MKT1 is a 93 kDa protein with serine-rich regions and the retroviral protease signature, DTG. This family is the N terminal region of MKT1.. +PF12248 Farnesoic acid 0-methyl transferase
PFAM-B_2872 (release 23.0). This domain family is found in bacteria and eukaryotes, and is approximately 110 amino acids in length.Farnesoic acid O-methyl transferase (FAMeT) is the enzyme that catalyses the formation of methyl farnesoate (MF) from farnesoic acid (FA) in the biosynthetic pathway of juvenile hormone (JH).. +PF12249 Arabinofuranosyltransferase A C terminal
PFAM-B_2900 (release 23.0). This domain family is found in bacteria, and is typically between 179 and 190 amino acids in length. This family is the C terminal region of AftA. The enzyme catalyses the addition of the first key arabinofuranosyl residue from the sugar donor beta-D-arabinofuranosyl-1-monophosphoryldecaprenol to the galactan domain of the cell wall, thus priming the galactan for further elaboration by the arabinofuranosyltransferases. The C terminal region is predicted to be directed towards the periplasm.. +PF12250 Arabinofuranosyltransferase N terminal
PFAM-B_2900 (release 23.0). This domain family is found in bacteria, and is typically between 430 and 441 amino acids in length. This family is the N terminal region of AftA. The enzyme catalyses the addition of the first key arabinofuranosyl residue from the sugar donor beta-D-arabinofuranosyl-1-monophosphoryldecaprenol to the galactan domain of the cell wall, thus priming the galactan for further elaboration by the arabinofuranosyltransferases. The N terminal region has been predicted to span 11 transmembrane regions.. +PF12251 snRNA-activating protein of 50kDa MW C terminal
PFAM-B_2919 (release 23.0). This domain family is found in eukaryotes, and is typically between 196 and 207 amino acids in length. There is a conserved CEH sequence motif. SNAP50 is part of the snRNA-activating protein complex which activates RNA polymerases II and III. There is a cysteine-histidine cluster which contains two possible zinc finger motifs.. +PF12252 Dot/Icm substrate protein
PFAM-B_2926 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 397 and 1543 amino acids in length. This family is the SidE protein in the Dot/Icm pathway of Legionella pneumophila bacteria. There is little literature describing the family.. +PF12253 CAF1B;
Chromatin assembly factor 1 subunit A. The CAF-1 or chromatin assembly factor-1 consists of three subunits, and this is the first, or A . The A domain is uniquely required for the progression of S phase in mouse cells , independent of its ability to promote histone deposition but dependent on its ability to interact with HP1 - heterochromatin protein 1-rich heterochromatin domains next to centromeres that are crucial for chromosome segregation during mitosis. This HP1-CAF-1 interaction module functions as a built-in replication control for heterochromatin, which, like a control barrier, has an impact on S-phase progression in addition to DNA-based checkpoints .. +PF12254 DNA polymerase alpha subunit p180 N terminal
PFAM-B_2966 (release 23.0). This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00136, Pfam:PF08996, Pfam:PF03104. This family is the N terminal of DNA polymerase alpha subunit p180 protein. The N terminal contains the catalytic region of the alpha subunit.. +PF12255 Insecticide toxin TcdB middle/C-terminal region
PFAM-B_3032 (release 23.0). This domain family is found in bacteria, and is approximately 150 amino acids in length. The family is found in association with Pfam:PF03534. This family is the C-terminal-sided middle region of the bacterial insecticide toxin TcdB.. +PF12256 Insecticide toxin TcdB middle/N-terminal region
PFAM-B_3032 (release 23.0). This domain family is found in bacteria and archaea, and is typically between 164 and 180 amino acids in length. The family is found in association with Pfam:PF05593. This family is the N-terminal-sided middle region of the bacterial insecticide toxin TcdB. This region appears related to the FG-GAP repeat Pfam:PF01839.. +PF12257 Protein of unknown function (DUF3608)
PFAM-B_3083 (release 23.0). This domain family is found in eukaryotes, and is approximately 280 amino acids in length. The family is found in association with Pfam:PF00610.. +PF12258 Microcephalin protein
PFAM-B_3105 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 384 and 835 amino acids in length. Microcephalin is involved in determining the size of the brain in animals. It is a protein, which if expressed homozygously causes the organism to have the condition microcephaly. Organisms expressing the mutated form of this protein in a homozygous manner develop a condition called microcephaly - a drastically reduced brain mass and volume. Microcephalin is predicted to contain three BRCA1 C-terminal domains, the first of which is the probable microcephaly mutation site.. +PF12259 Protein of unknown function (DUF3609)
PFAM-B_3173 (release 23.0). This domain family is found in eukaryotes and viruses, and is typically between 348 and 360 amino acids in length.. +PF12260 Protein-kinase domain of FAM69
Assefa S, Gavin OL, Coggill P. PFAM-B_3196 (release 23.0). This is the C-terminal region of a family of FAM69 proteins from Metazoa and Viridiplantae that are active protein-kinases. The family members have a short transmembrane helix close to the N-terminus, and thereafter are highly enriched with cysteines. FAM69 proteins are localised to the endoplasmic reticulum. Many members also have a short EF-hand, calcium-binding, domain just upstream of the kinase domain. The exact function of the more N-terminal family is uncertain.. +PF12261 Thermostable hemolysin
PFAM-B_3198 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 200 and 228 amino acids in length. T_hemolysin is a pore-forming toxin of bacteria, able to lyse erythrocytes from a number of mammalian species.. +PF12262 Bacterial virulence factor lipase N-terminal
PFAM-B_3205 (release 23.0). This domain family is found in bacteria, and is typically between 258 and 271 amino acids in length. There are two conserved sequence motifs: DGT and DGWST. This family is the N-terminal region of bacterial virulence factor lipase. The N-terminal region contains a potential signalling sequence.. +PF12263 Protein of unknown function (DUF3611)
PFAM-B_3207 (release 23.0). This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 180 and 205 amino acids in length. There are two completely conserved residues (W and G) that may be functionally important.. +PF12264 VSL_protease; Peptidase_C03;
Waikavirus capsid protein 1. Assefa S, Gavin OL, Eberhardt R. PFAM-B_3208 (release 23.0). The rice tungro spherical waikavirus polyprotein is cleaved into 7 proteins, including three capsid proteins, by the tungro spherical virus-type peptidase Pfam:PF12381. This family represents the capsid protein 1 [1,2].. +PF12265 Histone-binding protein RBBP4 or subunit C of CAF1 complex
Pfam-B_318 (release 23.0). The CAF-1 complex is a conserved heterotrimeric protein complex that promotes histone H3 and H4 deposition onto newly synthesized DNA during replication or DNA repair; specifically it facilitates replication-dependent nucleosome assembly with the major histone H3 (H3.1). This domain is an alpha helix which sits just upstream of the WD40 seven-bladed beta-propeller in the human RbAp46 protein. RbAp46 folds into the beta-propeller and binds histone H4 in a groove formed between this N-terminal helix and an extended loop inserted into blade six .. +PF12266 Protein of unknown function (DUF3613)
PFAM-B_3240 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 126 amino acids in length.. +PF12267 Protein of unknown function (DUF3614)
PFAM-B_3244 (release 23.0). This family of proteins is found in viruses. Proteins in this family are typically between 162 and 495 amino acids in length.. +PF12268 Protein of unknown function (DUF3612)
PFAM-B_3220 (release 23.0). This domain family is found in bacteria, and is approximately 180 amino acids in length. The family is found in association with Pfam:PF01381.. +PF12269 CpG binding protein zinc finger C terminal domain
PFAM-B_3253 (release 23.0). This domain family is found in eukaryotes, and is approximately 240 amino acids in length. This domain is the zinc finger domain of a CpG binding DNA methyltransferase protein. It contains a CxxC motif which forms the zinc finger and binds to DNA.. +PF12270 Cytochrome c oxidase subunit IV
PFAM-B_3280 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. This family is the fourth subunit of the cytochrome c oxidase complex. This subunit does not have a catalytic capacity but instead, is required for assembly and/or stability of the complex.. +PF12271 Chitin synthase III catalytic subunit
PFAM-B_3298 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 288 and 332 amino acids in length. This family is the catalytic domain of chitin synthase III. Chitin is a major component of fungal cell walls and this enzyme is responsible for its formation.. +PF12272 Protein of unknown function (DUF3610)
PFAM-B_3173 (release 23.0). This domain family is found in eukaryotes, and is typically between 146 and 160 amino acids in length. There are two conserved sequence motifs: FNN and IDS.. +PF12273 Chitin synthesis regulation, resistance to Congo red
Pfam-B_2935 (release 23.0). RCR proteins are ER membrane proteins that regulate chitin deposition in fungal cell walls. Although chitin, a linear polymer of beta-1,4-linked N-acetylglucosamine, constitutes only 2% of the cell wall it plays a vital role in the overall protection of the cell wall against stress, noxious chemicals and osmotic pressure changes. Congo red is a cell wall-disrupting benzidine-type dye extensively used in many cell wall mutant studies that specifically targets chitin in yeast cells and inhibits growth. RCR proteins render the yeasts resistant to Congo red by diminishing the content of chitin in the cell wall . RCR proteins are probably regulating chitin synthase III interact directly with ubiquitin ligase Rsp5, and the VPEY motif is necessary for this, via interaction with the WW domains of Rsp5 .. +PF12274 Protein of unknown function (DUF3615)
PFAM-B_3306 (release 23.0). This domain family is found in bacteria and eukaryotes, and is typically between 86 and 97 amino acids in length. There is a conserved FAE sequence motif. There is a single completely conserved residue F that may be functionally important.. +PF12275 Protein of unknown function (DUF3616)
PFAM-B_3338 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 335 and 392 amino acids in length. There is a conserved GLRGPV sequence motif.. +PF12276 Protein of unknown function (DUF3617)
PFAM-B_3343 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 155 and 179 amino acids in length. There is a single completely conserved residue C that may be functionally important.. +PF12277 Protein of unknown function (DUF3618)
PFAM-B_3350 (release 23.0). This domain family is found in bacteria, and is approximately 50 amino acids in length.. +PF12278 Sex determination protein N terminal
PFAM-B_3366 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 168 and 410 amino acids in length. This family is the N terminal end of the sex determination protein of many different animals. It plays a role in the gender determination of around 20% of all animals.. +PF12279 Protein of unknown function (DUF3619)
PFAM-B_3353 (release 23.0). This protein is found in bacteria. Proteins in this family are about 140 amino acids in length. This protein has two conserved sequence motifs: AAR and DDLP.. +PF12280 Brain specific membrane anchored protein
PFAM-B_3398 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 285 and 331 amino acids in length. BSMAP has a putative transmembrane domain and is predicted to be a type I membrane glycoprotein.. +PF12281 Protein of unknown function (DUF3620)
PFAM-B_3384 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 281 and 358 amino acids in length. There are two completely conserved residues (G and P) that may be functionally important.. +PF12282 Signal transduction histidine kinase
PFAM-B_3401 (release 23.0). This domain is found in bacteria. This domain is about 150 amino acids in length. This domain is found associated with Pfam:PF07568, Pfam:PF08448, Pfam:PF02518. This domain has a single completely conserved residue P that may be functionally important. This family is mostly annotated as a histidine kinase involved in signal transduction but there is little published evidence to support this.. +PF12283 Bacteriophage protein K
PFAM-B_3455 (release 23.0). This family of proteins is found in viruses. Proteins in this family are approximately 60 amino acids in length. This family is a protein expressed by bacteriophages which has an unknown function. There is evidence that it is non-essential for in vivo production of a mature phage.. +PF12284 Hox protein A13 N terminal
PFAM-B_3464 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 149 and 306 amino acids in length. The family is found in association with Pfam:PF00046. This family is the N terminal of the Hox gene protein involved in formation of the digital arch of the hands and feet as well as in correct genital formation. Mutation of the protein is associated with hand-foot-genital syndrome.. +PF12285 Protein of unknown function (DUF3621)
PFAM-B_3468 (release 23.0). This family of proteins is found in viruses. Proteins in this family are typically between 49 and 62 amino acids in length. There are two conserved sequence motifs: QPLDLS and EQQ.. +PF12286 Protein of unknown function (DUF3622)
PFAM-B_3490 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 72 and 107 amino acids in length. There is a conserved VSK sequence motif.. +PF12287 Cytoplasmic activation/proliferation-associated protein-1 C term
PFAM-B_3504 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 343 and 708 amino acids in length. This family is the C terminal region of caprin-1. Caprin-1 is a protein involved in regulating cellular proliferation. In mutated phenotypes, the G1 phase of the cell cycle is greatly lengthened, impairing normal proliferation. The C terminal region of caprin-1 contains RGG motifs which are characteristic of RNA binding domains. It is possible that caprin-1 functions through an RNA binding mechanism.. +PF12288 Carboxysome shell peptide mid-region
PFAM-B_3507 (release 23.0). This domain family is found in bacteria and eukaryotes, and is approximately 430 amino acids in length. This family is annotated frequently as a carboxysome shell peptide, however there is little publication to confirm this.. +PF12289 Rotavirus VP1 structural protein
PFAM-B_3516 (release 23.0). This domain family is found in viruses, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF02123. VP1 is a structural protein of the inner core layer of the rotavirus virion. It complexes with VP2 and Vp3 to form this layer.. +PF12290 topoisom_IIA_B; Topoisom_IIA_B;
Protein of unknown function (DUF3802). PFAM-B_3547 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 114 and 143 amino acids in length. There is a conserved KNLFD sequence motif.. +PF12291 Protein of unknown function (DUF3623)
PFAM-B_3578 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 261 and 345 amino acids in length.. +PF12292 Protein of unknown function (DUF3624)
PFAM-B_3603 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There is a conserved GRC sequence motif.. +PF12293 Protein of unknown function (DUF3625)
PFAM-B_3621 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 327 and 360 amino acids in length.. +PF12294 Protein of unknown function (DUF3626)
PFAM-B_3627 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 294 and 374 amino acids in length.. +PF12295 Symplekin tight junction protein C terminal
PFAM-B_3634 (release 23.0). This domain family is found in eukaryotes, and is approximately 180 amino acids in length. There is a single completely conserved residue P that may be functionally important. Symplekn has been localized, by light and electron microscopy, to the plaque associated with the cytoplasmic face of the tight junction-containing zone (zonula occludens) of polar epithelial cells and of Sertoli cells of testis. However, both the mRNA and the protein can also be detected in a wide range of cell types that do not form tight junctions. Careful analyses have revealed that the protein occurs in all these diverse cells in the nucleoplasm, and only in those cells forming tight junctions is it recruited, partly but specifically, to the plaque structure of the zonula occludens. . +PF12296 Hydrophobic surface binding protein A
PFAM-B_3635 (release 23.0). This protein is found in eukaryotes. Proteins in this family are typically between 171 to 275 amino acids in length. Although the HsbA amino acid sequence suggests that HsbA may be hydrophilic, HsbA adsorbed to hydrophobic PBSA (Polybutylene succinate-co-adipate) surfaces in the presence of NaCl or CaCl2. When HsbA was adsorbed on the hydrophobic PBSA surfaces, it promoted PBSA degradation via the CutL1 polyesterase. CutL1 interacts directly with HsbA attached to the hydrophobic QCM electrode surface. These results suggest that when HsbA is adsorbed onto the PBSA surface, it recruits CutL1, and that when CutL1 is accumulated on the PBSA surface, it stimulates PBSA degradation.. +PF12297 Ellis van Creveld protein 2 like protein
PFAM-B_3668 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 571 and 1310 amino acids in length. There are two conserved sequence motifs: LPA and ELH. EVC2 is implicated in Ellis van Creveld chondrodysplastic dwarfism in humans. Mutations in this protein can give rise to this congenital condition. LIMBIN is a protein which shares around 80% sequence homology with EVC2 and it is implicated in a similar condition in bovine chondrodysplastic dwarfism.. +PF12298 Eukaryotic mitochondrial regulator protein
PFAM-B_3672 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 168 and 381 amino acids in length. Bot1p localizes to the mitochondria in live cells and cofractionates with purified mitochondrial ribosomes. Bot1p has a novel function in the control of cell respiration by acting on the mitochondrial protein synthesis machinery. Observations also indicate that in fission yeast, alterations of mitochondrial function are linked to changes in cell cycle and cell morphology control mechanisms.. +PF12299 Protein of unknown function (DUF3627)
PFAM-B_3687 (release 23.0). This domain family is found in bacteria and viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF02498.. +PF12300 Protein of unknown function (DUF3628)
PFAM-B_3692 (release 23.0). This domain family is found in bacteria, and is typically between 153 and 183 amino acids in length. The family is found in association with Pfam:PF00270, Pfam:PF00271.. +PF12301 CD99 antigen like protein 2
PFAM-B_3714 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 165 and 237 amino acids in length. CD99L2 and CD99 are involved in trans-endothelial migration of neutrophils in vitro and in the recruitment of neutrophils into inflamed peritoneum.. +PF12302 Protein of unknown function (DUF3629)
PFAM-B_3723 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 256 and 292 amino acids in length.. +PF12304 Beta-casein like protein
PFAM-B_3744 (release 23.0). This protein is found in eukaryotes. Proteins in this family are typically between 216 to 240 amino acids in length. This protein has two conserved sequence motifs: VLR and TRIY. BCLP is associated with cell morphology and a regulation of growth pattern of tumor. It is found in adenocarcinomas of uterine cervical tissues.. +PF12305 Protein of unknown function (DUF3630)
PFAM-B_3766 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a single completely conserved residue D that may be functionally important.. +PF12306 Inclusion body protein
PFAM-B_3786 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 173 and 191 amino acids in length. PixA is thought to be specifically produced in Xenorhabdus nematophila. It is an inclusion body protein.. +PF12307 Protein of unknown function (DUF3631)
PFAM-B_3787 (release 23.0). This protein is found in bacteria. Proteins in this family are typically between 180 to 701 amino acids in length.. +PF12308 Neurogenesis glycoprotein
PFAM-B_3817 (release 23.0). This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF02191. There are two conserved sequence motifs: SAQ and VQN. Noelin-1 is a glycoprotein which is secreted mainly by postmitotic neurogenic tissues in the developing central and peripheral nervous systems, first appearing after neural tube closure. It is likely that it forms large multimeric complexes.It has a divergent function in neurogenesis. In animal caps neuralized by expression of noggin, co-expression of Noelin-1 causes expression of neuronal differentiation markers several stages before neurogenesis normally occurs in this tissue. Finally, only secreted forms of the protein can activate sensory marker expression, while all forms of the protein can induce early neurogenesis.. +PF12309 KIF-1 binding protein C terminal
PFAM-B_3821 (release 23.0). This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 365 and 621 amino acids in length. There is a conserved LLP sequence motif. KBP is a binding partner for KIF1Balpha that is a regulator of its transport function and thus represents a type of kinesin interacting protein.. +PF12310 Transcription factor protein N terminal
PFAM-B_3835 (release 23.0). This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00178. There is a conserved PAVIVE sequence motif. Elf-1 is an immune cell specific transcription factor. It is found in T cells, B cells, megakaryocytes,and mast cells and is involved in the control of transcription for various immune proteins. These include IL-2, GM-CSF, IL-5, IL-2 receptor alpha chain, and CD4 in T cells, IgH, blk, and lyn in B cells, TdT in T and B cells, IL-3 in megakaryocytes, and SCL and Fc-epsilon-RI alpha chain in mast cells.. +PF12311 Protein of unknown function (DUF3632)
PFAM-B_3839 (release 23.0). This domain family is found in eukaryotes, and is approximately 170 amino acids in length. There is a conserved ALE sequence motif.. +PF12312 NeA_polyprotein;
Nepovirus subgroup A polyprotein . PFAM-B_3843 (release 23.0). This family of proteins is found in viruses. Proteins in this family are typically between 259 and 1110 amino acids in length. The family is found in association with Pfam:PF03688, Pfam:PF03689, Pfam:PF03391. This family is one of the polyproteins expressed by Nepoviruses in subgroup A.. +PF12313 NPR1/NIM1 like defence protein C terminal
PFAM-B_3871 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 251 and 588 amino acids in length. The family is found in association with Pfam:PF00023, Pfam:PF00651. There are two conserved sequence motifs: LENRV and DLN. NPR1 (NIM1) is a defence protein in many plant species.. +PF12314 Inner membrane complex protein
PFAM-B_3921 (release 23.0). This domain is found in bacteria and eukaryotes. This domain is about 120 amino acids in length. This family is the inner membrane complex of parasitic organisms. This is a cytoskeletal structure associated with the pellicle of these parasites.. +PF12315 Protein of unknown function (DUF3633)
PFAM-B_3949 (release 23.0). This domain family is found in bacteria and eukaryotes, and is approximately 210 amino acids in length. The family is found in association with Pfam:PF00412.. +PF12316 Segment polarity protein dishevelled (Dsh) C terminal
PFAM-B_3969 (release 23.0). This domain family is found in eukaryotes, and is typically between 177 and 207 amino acids in length. The family is found in association with Pfam:PF00778, Pfam:PF02377, Pfam:PF00610, Pfam:PF00595. The segment polarity gene dishevelled (dsh) is required for pattern formation of the embryonic segments. It is involved in the determination of body organisation through the Wingless pathway (analogous to the Wnt-1 pathway).. +PF12317 Intraflagellar transport complex B protein 46 C terminal
PFAM-B_3974 (release 23.0). This family of proteins is found in eukaryotes. Proteins in this family are typically between 298 and 416 amino acids in length. IFT46 is a flagellar protein of complex B. Like all IFT proteins, it is required for transport of IFT particles into the flagella.. +PF12318 Membrane bound FAD containing D-sorbitol dehydrogenase
PFAM-B_3647 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 168 and 189 amino acids in length. There is a conserved ALM sequence motif. This family is a membrane protein (FAD-SLDH) involved in oxidation of D-sorbitol to L-sorbose.. +PF12319 Tryptophan-Threonine-rich plasmodium antigen C terminal
PFAM-B_3676 (release 23.0). This protein is found in eukaryotes. Proteins in this family are typically between 254 to 536 amino acids in length. This family is the C terminal of a surface antigen of malarial Plasmodium species. It is currently being targeted for use as part of a subunit vaccine against Plasmodium falciparum, the main species involved in causing human malaria.. +PF12320 Type 5 capsule protein repressor C-terminal domain
PFAM-B_3790 (release 23.0). This domain is found in bacteria and archaea. This domain is about 90 amino acids in length. This domain is found associated with Pfam:PF00149. SbcD works in complex with SbdC (SbcDC) which is a transcription regulator. It down-regulates transcription of arl and mgr to inhibit type 5 capsule protein production. It acts as part of the SOS pathway of bacteria.. +PF12321 Protein of unknown function (DUF3634)
PFAM-B_3607 (release 23.0). This family of proteins is found in bacteria. Proteins in this family are typically between 103 and 114 amino acids in length.. +PF12322 T4 bacteriophage base plate protein
PFAM-B_3861 (release 23.0). This protein is found in viruses. Proteins in this family are typically between 208 to 249 amino acids in length. This protein has a single completely conserved residue S that may be functionally important. This family includes the two base plate proteins in T4 bacteriophages. These are gp51 and gp26, encoded by late genes.. +PF12323 HTH_14;
Helix-turn-helix domain. Pfam-B_1210 (release 3.0) & Pfam-B_4602 (Release 7.5). This is the N terminal helix-turn-helix domain of Transposase_2 Pfam:PF01385.. +PF12324 Helix-turn-helix domain of alkylmercury lyase
Pfam-B_3505 (release 6.5). Alkylmercury lyase (EC:4.99.1.2) cleaves the carbon-mercury bond of organomercurials such as phenylmercuric acetate. This is the N terminal helix-turn-helix domain associated with Pfam:PF03243.. +PF12325 TATA element modulatory factor 1 TATA binding
Pfam-B_97264 (release 23.0). This is the C-terminal conserved coiled coil region of a family of TATA element modulatory factor 1 proteins conserved in eukaryotes . The proteins bind to the TATA element of some RNA polymerase II promoters and repress their activity. by competing with the binding of TATA binding protein. TMF1_TATA_bd is the most conserved part of the TMFs . TMFs are evolutionarily conserved golgins that bind Rab6, a ubiquitous ras-like GTP-binding Golgi protein, and contribute to Golgi organisation in animal and plant cells. The Rab6-binding domain appears to be the same region as this C-terminal family .. +PF12326 N-glycosylation protein
Pfam-B_29822 (release 23.0). This family is not required for survival of S.cerevisiae, but its deletion leads to heightened sensitivity to oxidative stress. It appears to be involved in N-glycosylation, and resides in the endoplasmic reticulum.. +PF12327 FtsZ family, C-terminal domain
This family includes the bacterial FtsZ family of proteins. Members of this family are involved in polymer formation. FtsZ is the polymer-forming protein of bacterial cell division. It is part of a ring in the middle of the dividing cell that is required for constriction of cell membrane and cell envelope to yield two daughter cells. FtsZ is a GTPase, like tubulin. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea.. +PF12328 Rpp20 subunit of nuclear RNase MRP and P
Pfam-B_33537 (release 23.0). The nuclear RNase P of Saccharomyces cerevisiae is made up of at least nine protein subunits; Pop1, Pop3, Pop4, Pop5, Pop6, Pop7, Pop8, Rpr2 and Rpp1. Many of these subunits seem to be present also in the RNase MRP, with the exception of Rpr2 (Rpp21) which is unique to RNase P. Human nuclear RNase P and MRP appear to contain at least 10 protein subunits, Rpp14, Rpp20, Rpp21, Rpp25, Rpp29, Rpp30, Rpp38, Rpp40, hPop1 and hPop5, although there is recent evidence that not all of these subunits are shared between P and MRP. Archaeal RNase P has at least four protein subunits homologous to eukaryotic RNase P/MRP proteins . In the yeast RNase P, Pop6 and Pop7 (the Rpp20 homologue) interact with each other and they are both interaction partners of Pop4 ; in the human MRP Rpp25 and Rpp20 interact with each other and Rpp25 binds to Rpp29 (Pop4) .. +PF12329 TATA element modulatory factor 1 DNA binding
This is the middle region of a family of TATA element modulatory factor 1 proteins conserved in eukaryotes that contains at its N-terminal section a number of leucine zippers that could potentially form coiled coil structures. . The whole proteins bind to the TATA element of some RNA polymerase II promoters and repress their activity. by competing with the binding of TATA binding protein. TMFs are evolutionarily conserved golgins that bind Rab6, a ubiquitous ras-like GTP-binding Golgi protein, and contribute to Golgi organisation in animal and plant cells.. +PF12330 Domain of unknown function (DUF3635)
This family may be a potential Haspin-related leucine-zipper. A leucine zipper was proposed to be present towards the C-terminus of human Haspin, (up-stream of the current family) ; however, as this domain would appear to span several helices and be largely within a loop structure , the actual zipper might be further downstream, and be this family, which is the very C-terminal part of the Sch. pombe sequence.. +PF12331 Protein of unknown function (DUF3636)
This domain family is found in eukaryotes, and is approximately 160 amino acids in length.. +PF12333 Rix1 complex component involved in 60S ribosome maturation
This domain family is found in eukaryotes, and is typically between 91 and 105 amino acids in length. This family is the N terminal of Ipi1, a component of the Rix1 complex which works in conjunction with Rea1 to mature the 60S ribosome.. +PF12334 Rickettsia outer membrane protein B
This domain family is found in bacteria, and is approximately 220 amino acids in length. The family is found in association with Pfam:PF03797. This family is the middle region of one of the outer membrane proteins of Rickettsia which is involved in adhesion to eukaryotic cells for uptake.. +PF12335 Myotubularin protein
This domain family is found in eukaryotes, and is approximately 220 amino acids in length. The family is found in association with Pfam:PF02141, Pfam:PF03456, Pfam:PF03455. This family is the middle region of SBF2, a member of the myotubularin family. Myotubularin-related proteins have been suggested to work in phosphoinositide-mediated signalling events that may also convey control of myelination. Mutations of SBF2 are implicated in Charcot-Marie-Tooth disease.. +PF12336 SOX transcription factor
This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00505. There are two conserved sequence motifs: KKDK and LPG. This family is made up of SOX transcription factors. These are involved in upregulation of nestin, a neural promoter.. +PF12337 Protein of unknown function (DUF3637)
This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00073, Pfam:PF08935.. +PF12338 Ribulose-1,5-bisphosphate carboxylase small subunit
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00101. There is a conserved APF sequence motif. There are two completely conserved residues (L and P) that may be functionally important. This family is the small subunit of ribulose-1,5-bisphosphate.. +PF12339 DNA-J related protein
This domain family is found in bacteria, and is approximately 130 amino acids in length. The family is found in association with Pfam:PF00226. There is a conserved YYLD sequence motif. Mostof the sequences in this family are annotated as DNA-J related proteins but there is little publication to back this up.. +PF12340 Protein of unknown function (DUF3638)
This domain family is found in eukaryotes, and is approximately 230 amino acids in length. There are two conserved sequence motifs: LLE and NMG.. +PF12341 Protein of unknown function (DUF3639)
This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00400. There are two completely conserved residues (E and R) that may be functionally important.. +PF12342 Protein of unknown function (DUF3640)
This family of proteins is found in viruses. Proteins in this family are typically between 25 and 211 amino acids in length.. +PF12343 Cold shock protein DEAD box A
This domain family is found in bacteria, and is typically between 68 and 89 amino acids in length. The family is found in association with Pfam:PF00270, Pfam:PF00271, Pfam:PF03880. This family is the C terminal region of DEAD box A, a protein expressed under conditions of cold shock which is involved in various cellular processes such as transcription, translation and DA recombination.. +PF12344 Ultra-violet resistance protein B
This domain family is found in bacteria, archaea and eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00271, Pfam:PF02151, Pfam:PF04851. There are two conserved sequence motifs: YAD and RRR. This family is the C terminal region of the UvrB protein which conveys mutational resistance against UV light to various different species.. +PF12345 Protein of unknown function (DUF3641)
This domain family is found in bacteria and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF04055. This family consists of proteins which are commonly annotated as Radical SAM domains but there is little annotation to back this up.. +PF12346 HJURP_repeat;
Holliday junction recognition protein-associated repeat. Vertebral Holliday junction recognition proteins carry an SCM3 domain at their N-terminus as do the eukaryotic fungi, but they also carry this central, conserved region. The function of this family is not known. Further downstream there is also a repeated domain, also of unknown function. Investigation of Scm3 and associated proteins is likely to be directly relevant to understanding the mechanism of HJURP-mediated CENP-A chromatin assembly at human centromeres.. +PF12347 HJURP_repeat;
Holliday junction regulator protein family C-terminal repeat. Although this family is conserved in the Holliday junction regulator, HJURP, proteins in higher eukaryotes, alongside an Scm3, Pfam:PF10384, family, its exact function is not known. The C-terminal region of Scm3 proteins has been evolving rapidly, and this short repeat at the C-terminal end can be present in up to two copies in the higher eukaryotes.. +PF12348 CLASP N terminal
This region is found at the N terminal of CLIP-associated proteins (CLASPs).\. CLASPs are widely conserved microtubule plus-end-tracking proteins that regulate the stability of dynamic microtubules . In yeast, Drosophila, and Xenopus, a single CLASP orthologue is present. In mammals, a second paralogue (CLASP2) exists which has some functional overlap with CLASP1 .. +PF12349 SREBP-CAP_SSD;
Sterol-sensing domain of SREBP cleavage-activation. Sterol regulatory element-binding proteins (SREBPs) are membrane-bound transcription factors that promote lipid synthesis in animal cells. They are embedded in the membranes of the endoplasmic reticulum (ER) in a helical hairpin orientation and are released from the ER by a two-step proteolytic process. Proteolysis begins when the SREBPs are cleaved at Site-1, which is located at a leucine residue in the middle of the hydrophobic loop in the lumen of the ER . Upon proteolytic processing SREBP can activate the expression of genes involved in cholesterol biosynthesis and uptake. SCAP stimulates cleavage of SREBPs via fusion of the their two C-termini . This domain is the transmembrane region that traverses the membrane eight times and is the sterol-sensing domain of the cleavage protein . WD40 domains are found towards the C-terminus.. +PF12350 CTD kinase subunit gamma CTK3 C-terminus
Pfam-B_12814 (release 23.0). The C-terminal domain kinase (CTDK-1), is a three-subunit complex comprised of Ctk1, Ctk2, and Ctk3, that plays a key role in regulation of transcription and translation and in coordinating these two processes. Both Ctk2 and Ctk3 are regulated at the level of protein turnover, and are unstable proteins processed through a ubiquitin-proteasome pathway. Their physical interaction is required to protect both subunits from degradation, and both Ctk2 and Ctk3 are required for Ctk1 CTD kinase activation . The mammalian P-TEFb is mirrored by the combined complexes in yeast of the CTDK1 and the Bur1/2 . It is not clear what independent function this C-terminal domain has.. +PF12351 Ca2+ regulator and membrane fusion protein Fig1
During the mating process of yeast cells, two Ca2+ influx pathways become activated. The resulting elevation of cytosolic free Ca2+ activates downstream signaling factors that promote long term survival of unmated cells. Fig1 is a regulator of the low affinity Ca2+ influx system (LACS) , and is also required for efficient membrane fusion during yeast mating .. +PF12352 Snare region anchored in the vesicle membrane C-terminus
Within the SNARE proteins interactions in the C-terminal half of the SNARE helix are critical to the driving of membrane fusion; whereas interactions in the N-terminal half of the SNARE domain are important for promoting priming or docking of the vesicle Pfam:PF05008.. +PF12353 Eukaryotic translation initiation factor 3 subunit G
This domain family is found in eukaryotes, and is approximately 130 amino acids in length. The family is found in association with Pfam:PF00076. This family is subunit G of the eukaryotic translation initiation factor 3. Subunit G is required for eIF3 integrity.. +PF12354 Bacterial adhesion/invasion protein N terminal
This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00560, Pfam:PF08191, Pfam:PF09479. There are two completely conserved residues (I and F) that may be functionally important. Internalin mediates bacterial adhesion and invasion of epithelial cells in the human intestine through specific interaction with its host cell receptor E-cadherin. This family is the N terminal of internalin, the cap domain of the protein. The cap domain is conserved between different internalin types. The cap domain does not interact with E cadherin, therefore its function is presumably structural: capping the hydrophobic core.. +PF12355 Down syndrome cell adhesion molecule C terminal
This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00047, Pfam:PF07679, Pfam:PF00041. The Down syndrome cell adhesion molecule (Dscam) belongs to a family of cell membrane molecules involved in the differentiation of the nervous system. This is the C terminal cytoplasmic tail region of Dscam.. +PF12356 Protein of unknown function (DUF3643)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 217 and 4852 amino acids in length. There is a conserved TLA sequence motif.. +PF12357 Phospholipase D C terminal
This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00168, Pfam:PF00614. There is a conserved FPD sequence motif. This family is the C terminal of phospholipase D. PLD is a major plant lipid-degrading enzyme which is involved in signal transduction.. +PF12358 Protein of unknown function (DUF3644)
This domain family is found in bacteria, and is typically between 65 and 80 amino acids in length.. +PF12359 Protein of unknown function (DUF3645)
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. There is a conserved HPD sequence motif.. +PF12360 Paired box protein 7
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00046, Pfam:PF00292. Pax7 belongs to a family of genes that encode paired-box-containing transcription factors involved in the control of developmental processes. Pax7 has a distinct role in the specification of myogenic satellite cells.. +PF12361 Duffy-antigen binding protein
This family of proteins is found in eukaryotes. Proteins in this family are typically between 449 and 1061 amino acids in length. The family is found in association with Pfam:PF05424. There are two conserved sequence motifs: NKNGG and QKHDF. This family is part of the Duffy-antigen binding protein of Plasmodium spp. This protein is an antigen on these parasites which enable them to invade erythrocytes.. +PF12362 DNA polymerase III gamma and tau subunits C terminal
This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00004. The proteins in this family are frequently annotated as the gamma and tau subunits of DNA polymerase III, however there is little accompanying literature to back this up.. +PF12363 Phage protein
This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 119 and 164 amino acids in length. The proteins in this family are frequently annotated as phage proteins, however there is little accompanying literature to back this up or to describe the nature of these phage proteins.. +PF12364 Protein of unknown function (DUF3648)
This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 53 and 3115 amino acids in length. There are two completely conserved residues (A and F) that may be functionally important.. +PF12365 Protein of unknown function (DUF3649)
This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length.. +PF12366 Cancer susceptibility candidate 1
This domain family is found in eukaryotes, and is typically between 216 and 263 amino acids in length. Casc1 has many SNPs associated with cancer susceptibility.. +PF12367 Pyruvate ferredoxin oxidoreductase beta subunit C terminal
This domain family is found in bacteria and archaea, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF02775. There are two completely conserved residues (A and G) that may be functionally important. PFO is involved in carbon dioxide fixation via a reductive TCA cycle. It forms a heterodimer (alpha/beta). The beta subunit has binding motifs for Fe-S clusters and thiamine pyrophosphate.. +PF12368 Protein of unknown function (DUF3650)
This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00581. There is a single completely conserved residue N that may be functionally important.. +PF12369 Gonadotropin hormone receptor transmembrane region
This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00560, Pfam:PF00001. There are two completely conserved C residues that may be functionally important. This family contains the transmembrane region of Follicular stimulating hormone and leutenizing hormone - the two major gonadotropin hormone receptors. These receptors are G protein coupled receptors involved in development and maturation of germ cells in both fecund genders. The transmembrane region is conserved between the two different receptors while the extracellular ligand binding domains are less well conserved.. +PF12371 Protein of unknown function (DUF3651)
This domain family is found in eukaryotes, and is approximately 70 amino acids in length. This family is frequently annotated as a membrane protein but there is little associated literature to back this up.. +PF12372 Huntingtin protein region
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF02985. This family is in the middle region of the Huntingtin protein associated with Huntington's disease. The protein is of unknown function, however it is known that a polyglutamine (CAG) repeat in the gene coding for it results in the development of Huntington's disease.. +PF12373 Major surface glycoprotein 2 C terminal
This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF02349. This family is the C terminal of major surface glycoprotein 2 of virulent bacteria. It is a virulence factor antigen.. +PF12374 Double-sex mab3 related transcription factor 1
This domain family is found in eukaryotes, and is typically between 61 and 73 amino acids in length. The family is found in association with Pfam:PF00751. This family is a transcription factor involved in sex determination. The proteins in this family contain a zinc finger-like DNA-binding motif, DM domain.. +PF12375 Phage protein
This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 112 and 194 amino acids in length.. +PF12376 Protein of unknown function (DUF3654)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 193 and 612 amino acids in length.. +PF12377 Duffy binding protein N terminal
This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF05424. This family contains the N terminus of the Duffy receptor binding domain.. +PF12378 Trypsin-sensitive surface-exposed protein
This domain family is found in bacteria, and is typically between 67 and 79 amino acids in length. This family contains trypsin-sensitive surface-exposed proteins called cytadhesins. Cytadhesins are virulence factor proteins which mediate attachment of bacterial cells to host cells for invasion.. +PF12379 Protein of unknown function (DUF3655)
This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF08716, Pfam:PF01661, Pfam:PF05409, Pfam:PF06471, Pfam:PF08717, Pfam:PF06478, Pfam:PF09401, Pfam:PF06460, Pfam:PF08715, Pfam:PF08710.. +PF12380 Gill-associated viral 3C-like peptidase
a positive-stranded RNA virus of prawns, that has been called yellow head virus protease and gill-associated virus 3C-like peptidase. The GAV cysteine protease is predicted to be the key enzyme in the processing of the GAV replicase polyprotein precursors, pp1a and pp1ab. This protease employs a Cys(2968)-His(2879) catalytic dyad.. +PF12381 Tungro spherical virus-type peptidase
This is the protease for self-cleavage of the positive single-stranded polyproteins of a number of plant viral genomes. The protease activity of the polyprotein is at the C-terminal end, adjacent to the putative RNA polymerase.. +PF12382 Retrotransposon peptidase
This is a small family of fungal retroviral aspartyl peptidases.. +PF12383 Severe acute respiratory syndrome coronavirus 3b protein
This family of proteins is found in viruses. Proteins in this family are typically between 32 and 154 amino acids in length. This family contains the SARS coronavirus 3b protein which is predominantly localized in the nucleolus, and induces G0/G1 arrest and apoptosis in transfected cells.. +PF12384 Ty3 transposon peptidase
Ty3 is a gypsy-type, retrovirus-like, element found in the budding yeast. The Ty3 aspartyl protease is required for processing of the viral polyprotein into its mature species.. +PF12385 Papain-like cysteine protease AvrRpt2
This is a family of cysteine proteases, found in actinobacteria, protobacteria and firmicutes. Papain-like cysteine proteases play a crucial role in plant-pathogen/pest interactions. On entering the host they act on non-self substrates, thereby manipulating the host to evade proteolysis . AvrRpt2 from Pseudomonas syringae pv. tomato DC3000 triggers resistance to P. syringae-2-dependent defence responses, including hypersensitive cell death, by cleaving the Arabidopsis RIN4 protein which is monitored by the cognate resistance protein RPS2 .. +PF12386 Pseudomurein endo-isopeptidase Pei
This peptidase has the catalytic triad C-H-D at the C-terminal end, a triad similar to that in thiol proteases and animal transglutaminases. It catalyses the in vitro lysis of M. marburgensis cells under reducing conditions and exhibits characteristics of metal-activated peptidases.. +PF12387 Pestivirus NS2 peptidase
The pestivirus NS2 peptidase is responsible for single cleavage between NS2 and NS3 of the bovine viral diarrhea virus polyprotein, a cleavage that is correlated with cytopathogenicity . The peptidase is activated by its interaction with 'J-domain protein interacting with viral protein' - Jiv. [2, 1].. +PF12388 Dual-action HEIGH metallo-peptidase
The catalytic triad for this family of proteases is HE-H-H, which in many members is in the sequence motif HEIGH.. +PF12389 Camelysin metallo-endopeptidase
+PF12390 Selenocysteine synthase N terminal
This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF03841. There is a single completely conserved residue P that may be functionally important. This family is the N terminal region of selenocysteine synthase which catalyses the conversion of seryl-tRNA(Sec) into selenocysteyl-tRNA(Sec).. +PF12391 Protocatechuate 3,4-dioxygenase beta subunit N terminal
This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00775. There are two completely conserved residues (Y and R) that may be functionally important. This family is the N terminal region of the beta subunit of protocatechuate 3,4-dioxidase. This enzyme utilizes a mononuclear, non-heme Fe3+ centre to catalyse metabolic cellular reactions.. +PF12392 Collagenase
This domain family is found in bacteria, archaea and eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF01136.. +PF12393 Dr family adhesin
This domain family is found in bacteria, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF04619. This family is the Dr-family adhesin expressed by uropathogenic E. coli.. +PF12394 Protein of unknown function (DUF3657)
This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF05057.. +PF12395 Protein of unknown function
This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF08874. There are two completely conserved residues (D and R) that may be functionally important.. +PF12396 Protein of unknown function (DUF3659)
This domain family is found in bacteria and eukaryotes, and is approximately 70 amino acids in length.. +PF12397 U3 small nucleolar RNA-associated protein 10
This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF08146. This family is the protein associated with U3 snoRNA which is involved in the processing of pre-rRNA.. +PF12398 Receptor serine/threonine kinase
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00954, Pfam:PF01453, Pfam:PF00069, Pfam:PF08276. There is a conserved ELPL sequence motif.. +PF12399 Branched-chain amino acid ATP-binding cassette transporter
This domain family is found in bacteria, archaea and eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00005. There is a conserved AYLG sequence motif. This family is the C terminal of an ATP dependent branched-chain amino acid transporter.. +PF12400 Vaculolar membrane protein
This domain family is found in eukaryotes, and is typically between 123 and 138 amino acids in length.. +PF12401 Protein of unknown function (DUF2662)
This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00498.. +PF12402 NocA-like zinc-finger protein 1
This domain family is found in eukaryotes, and is typically between 42 and 57 amino acids in length. There is a conserved GAY sequence motif. There is a single completely conserved residue G that may be functionally important. Nlz1 self-associated via its C terminus, interacted with Nlz2, and bound to histone deacetylases.. +PF12403 Paired-box protein 2 C terminal
This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00292. This family is the C terminal of the paired-box protein 2 which is a transcription factor involved in embryonic development and organogenesis.. +PF12404 Peptidase
This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00883. There is a conserved WAF sequence motif.. +PF12406 Surface protein
This family of proteins is found in eukaryotes. Proteins in this family are typically between 131 and 312 amino acids in length.. +PF12407 Homeobox protein
This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00046. This family is a homeobox protein involved in differentiation of embryonic cells to form the abdominal region.. +PF12408 Ribose-5-phosphate isomerase
This domain family is found in bacteria, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF02502. There are two completely conserved residues (D and F) that may be functionally important.. +PF12409 P_ATPase;
P5-type ATPase cation transporter. This domain family is found in eukaryotes, and is typically between 110 and 126 amino acids in length. The family is found in association with Pfam:PF00122, Pfam:PF00702. P-type ATPases comprise a large superfamily of proteins, present in both prokaryotes and eukaryotes, that transport inorganic cations and other substrates across cell membranes.. +PF12410 Poxvirus DNA dependent RNA polymerase 30kDa subunit
This family of proteins is found in viruses. Proteins in this family are typically between 193 and 259 amino acids in length. The family is found in association with Pfam:PF01096. There are two conserved sequence motifs: GIEYSKD and LRY. This family is N terminal of the 30 kDa subunit of poxvirus DNA-d-RNA-pol. It has structural similarity to the eukaryotic transcriptional elongation factor SII.. +PF12411 Choline sulfatase enzyme C terminal
This domain family is found in bacteria, eukaryotes and viruses, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00884. There are two completely conserved residues (R and W) that may be functionally important. This family is the C terminal of choline sulfatase, the enzyme responsible for catalysing the conversion of choline-O-sulfate and, at a lower rate, phosphorylcholine, into choline.. +PF12412 Protein of unknown function (DUF3667)
This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF12413 Homeobox protein distal-less-like N terminal
This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00046. This family is the N terminal of a homeobox protein involved in embryonic development and adult neural regeneration.. +PF12414 Calcitonin gene-related peptide regulator C terminal
This domain family is found in eukaryotes, and is typically between 69 and 99 amino acids in length. The family is found in association with Pfam:PF00076. This family is the C terminal of Fox-1, a protein involved in the regulation of calcitonin gene-related peptide to mediate the neuron-specific splicing pattern. Fox-1, with Fox-2, functions to repress exon 4 inclusion.. +PF12415 Poxvirus DNA dependent RNA polymerase
This domain family is found in viruses, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF04566, Pfam:PF00562, Pfam:PF04567, Pfam:PF04560, Pfam:PF04565. This family is the second largest subunit of the poxvirus DNA dependent RNA polymerase. It has structural similarity to the second-largest RNA polymerase subunits of eubacteria, archaebacteria, and eukaryotes.. +PF12416 Cep120 protein
This family includes the Cep120 protein which is associated with centriole structure and function .. +PF12417 Zinc finger protein
This domain family is found in eukaryotes, and is typically between 64 and 80 amino acids in length.. +PF12418 Acyl-CoA dehydrogenase N terminal
This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF02770, Pfam:PF00441, Pfam:PF02771. This family is one of the enzymes involved in AcylCoA interaction in beta-oxidation.. +PF12419 SNF2 Helicase protein
This domain family is found in bacteria, archaea and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00271, Pfam:PF00176. Most of the proteins in this family are annotated as SNF2 helicases but there is little accompanying literature to confirm this.. +PF12420 Protein of unknown function
This domain family is found in eukaryotes, and is typically between 96 and 116 amino acids in length.. +PF12421 Fibronectin type III protein
This domain family is found in bacteria and viruses, and is typically between 126 and 146 amino acids in length. The family is found in association with Pfam:PF09327, Pfam:PF00041. There are two completely conserved G residues that may be functionally important. Many of the proteins in this family are annotated as fibronectin type III however there is little accompanying literature to confirm this.. +PF12422 Condensin II non structural maintenance of chromosomes subunit
This domain family is found in eukaryotes, and is approximately 150 amino acids in length. This family is part of a non-SMC subunit of condensin II which is involved in maintenance of the structural integrity of chromosomes. Condensin II is made up of SMC (structural maintenance of chromosomes) and non-SMC subunits. The non-SMC subunits bind to the catalytic ends of the SMC subunit dimer. The condensin holocomplex is able to introduce superhelical tension into DNA in an ATP hydrolysis- dependent manner, resulting in the formation of positive supercoils in the presence of topoisomerase I and of positive knots in the presence of topoisomerase II.. +PF12423 Kinesin protein 1B
This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00225, Pfam:PF00498. KIF1B is an anterograde motor for transport of mitochondria in axons of neuronal cells.. +PF12424 Plasma membrane calcium transporter ATPase C terminal
This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00689, Pfam:PF00122, Pfam:PF00702, Pfam:PF00690. There is a conserved QTQ sequence motif. This family is the C terminal of a calcium transporting ATPase located in the plasma membrane.. +PF12425 Protein of unknown function (DUF3673)
This domain family is found in eukaryotes, and is approximately 50 amino acids in length.. +PF12426 RNA dependent RNA polymerase
This domain family is found in viruses, and is approximately 40 amino acids in length. There is a conserved MFNLKF sequence motif. There are two completely conserved residues (E and P) that may be functionally important.. +PF12427 Branched-chain amino acid aminotransferase
This domain family is found in bacteria, and is typically between 23 and 35 amino acids in length. The family is found in association with Pfam:PF01063. There is a conserved TRT sequence motif.. +PF12428 Protein of unknown function (DUF3675)
This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00097. There are two completely conserved residues (R and L) that may be functionally important.. +PF12429 Protein of unknown function (DUF3676)
This domain family is found in eukaryotes, and is approximately 230 amino acids in length.. +PF12430 Abscisic acid G-protein coupled receptor
This domain family is found in eukaryotes, and is typically between 177 and 216 amino acids in length. This family is part of the abscisic acid (ABA) G-protein coupled receptor. ABA is a stress hormone in plants.. +PF12431 Transcriptional regulator
This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00072. There is a single completely conserved residue G that may be functionally important. CitT is a transcriptional regulator which allows transcription of the citM gene which codes for the secondary transporter in the Mg-citrate transport complex.. +PF12432 Protein of unknown function (DUF3677)
This domain family is found in eukaryotes, and is approximately 80 amino acids in length.. +PF12433 Parvovirus non-structural protein 1
This family of proteins is found in viruses. Proteins in this family are typically between 109 and 668 amino acids in length. Parvoviral NSPs regulate host gene expression through histone acetylation.. +PF12434 Malate dehydrogenase enzyme
This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00390, Pfam:PF03949, Pfam:PF01515. There is a conserved AAL sequence motif. There is a single completely conserved residue R that may be functionally important. Malate dehydrogenase is one of the enzymes involved in the citric acid cycle in mitochondria. It converts malate to oxaloacetate using NAD as a cofactor.. +PF12435 Protein of unknown function (DUF3678)
This domain family is found in eukaryotes, and is approximately 40 amino acids in length.. +PF12436 USP7;
ICP0-binding domain of Ubiquitin-specific protease 7. This domain is one of two C-terminal domains on the much longer ubiquitin-specific proteases. This particular one is found to interact with the herpesvirus 1 trans-acting transcriptional protein ICP0/VMW110.. +PF12437 Glutamine synthetase type III N terminal
This domain family is found in bacteria and eukaryotes, and is approximately 160 amino acids in length. The family is found in association with Pfam:PF00120. This family is the N terminal region of glutamine synthetase type III which is one of the enzymes responsible for generation of glutamine through conversion glutamate to glutamine by the incorporation of ammonia (NH3).. +PF12438 Protein of unknown function (DUF3679)
This domain family is found in bacteria, and is approximately 60 amino acids in length.. +PF12439 Glycogen debranching enzyme N terminal
This domain family is found in bacteria and archaea, and is typically between 218 and 229 amino acids in length. The family is found in association with Pfam:PF06202. Glycogen debranching enzyme catalyses the debranching of amylopectin in glycogen. This is done by transferring three glucose subunits of glycogen from one parallel chain to another. This has the effect of enabling the glucose residues to become more accessible for glycolysis.. +PF12440 Melanoma associated antigen family N terminal
This domain family is found in eukaryotes, and is typically between 82 and 96 amino acids in length. The family is found in association with Pfam:PF01454. This family is the N terminal of various melanoma associated antigens. These are tumour rejection antigens which are expressed on HLA-A1 of tumour cells and they are recognised by cytotoxic T lymphocytes (CTLs).. +PF12441 Protein of unknown function (DUF3680)
This domain family is found in bacteria and archaea, and is approximately 40 amino acids in length.. +PF12442 Protein of unknown function (DUF3681)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 112 and 212 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF12443 AT-hook-containing transcription factor
This domain family is found in eukaryotes, and is approximately 110 amino acids in length. This family contains a transcription factor which regulates the expression of the costimulatory molecules on lymphocytes.. +PF12444 Sox developmental protein N terminal
This domain family is found in eukaryotes, and is typically between 69 and 88 amino acids in length. The family is found in association with Pfam:PF00505. There are two conserved sequence motifs: YDW and PVR. This family contains Sox8, Sox9 and Sox10 proteins which have structural similarity. Sox proteins are involved in developmental processes.. +PF12445 Flagellin protein
This domain family is found in bacteria, and is typically between 125 and 147 amino acids in length. The family is found in association with Pfam:PF00669, Pfam:PF00700. There are two completely conserved G residues that may be functionally important. This family is the flagellin motor protein which confers motility to bacterial cells.. +PF12446 Protein of unknown function (DUF3682)
This domain family is found in eukaryotes, and is typically between 125 and 136 amino acids in length.. +PF12447 Protein of unknown function (DUF3683)
This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF02754, Pfam:PF01565, Pfam:PF02913.. +PF12448 Kinesin associated protein
This domain family is found in eukaryotes, and is typically between 143 and 173 amino acids in length. The family is found in association with Pfam:PF04849. This family is a region of the protein milton. Milton recruits the heavy chain of kinesin to mitochondria to allow the motor movement function of kinesin.. +PF12449 Protein of unknown function (DUF3684)
This domain family is found in eukaryotes, and is typically between 1072 and 1090 amino acids in length.. +PF12450 von Willebrand factor
This domain family is found in bacteria, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00092. There are two conserved sequence motifs: STF and DVD. There are two completely conserved residues (E and N) that may be functionally important. In hemostasis, platelet adhesion to the damaged vessel wall is mediated by several proteins, including von Willebrand factor. In solution vWF becomes immobilized via its A3 domain on the fibrillar collagen of the vessel wall and acts as an intermediary between collagen and the platelet receptor glycoprotein Ibalpha (GPIbalpha), which is the only platelet receptor that does not require prior activation for bond formation.. +PF12451 Vacuolar protein sorting protein 11 C terminal
This domain family is found in eukaryotes, and is approximately 50 amino acids in length. Vps 11 is one of the evolutionarily conserved class C vacuolar protein sorting genes (c-vps: vps11, vps16, vps18, and vps33), whose products physically associate to form the c-vps protein complex required for vesicle docking and fusion.. +PF12452 Protein of unknown function (DUF3685)
This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. There are two completely conserved residues (L and D) that may be functionally important.. +PF12453 Protein tyrosine phosphatase N terminal
This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00041. There is a single completely conserved residue L that may be functionally important. This family consists of various protein tyrosine phosphatase haematopoietic receptors, e.g. CD45, which dephosphorylate growth stimulating proteins. This limits growth signalling in haematopoietic cells.. +PF12454 GPI-anchored cell wall organization protein
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. Ecm33 is an essential cell wall component and is important for cell wall integrity.. +PF12455 Dynein associated protein
This domain family is found in eukaryotes, and is approximately 280 amino acids in length. The family is found in association with Pfam:PF01302. There is a single completely conserved residue E that may be functionally important. Dynactin has been associated with Dynein, a kinesin protein which is involved in organelle transport, mitotic spindle assembly and chromosome segregation. Dynactin anchors Dynein to specific subcellular structures.. +PF12456 Inositol phosphatase
This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF02383. hSac2 functions as an inositol polyphosphate 5-phosphatase.. +PF12457 Tuftelin interacting protein N terminal
This domain family is found in eukaryotes, and is typically between 99 and 114 amino acids in length. The family is found in association with Pfam:PF08697, Pfam:PF01585. There are two completely conserved residues (G and F) that may be functionally important. TIP is involved in enamel assembly by interacting with one of the major proteins responsible for biomineralisation of enamel - tuftelin.. +PF12458 ATPase involved in DNA repair
This domain family is found in bacteria, and is approximately 450 amino acids in length. There are two conserved sequence motifs: DVF and SPNGED.. +PF12459 D-Ala-teichoic acid biosynthesis protein
This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are two completely conserved residues (L and Y) that may be functionally important.. +PF12460 MMS19_N;
RNAPII transcription regulator C-terminal. MMS19 is required for both nucleotide excision repair (NER) and RNA polymerase II (RNAP II) transcription . This C-terminal domain, along with the N-terminal, MMS19_N, form part of a silencing complex in fission yeast that contains Dos2, Rik1, Mms19 and Cdc20 (the catalytic subunit of DNA polymerase-epsilon). This complex regulates RNA polymerase II (RNA Pol II) activity in heterochromatin and is required for DNA replication and heterochromatin assembly . This domain apparently shares homology with some HEAT repeat sequences.. +PF12461 Protein of unknown function (DUF3688)
This domain family is found in bacteria and viruses, and is typically between 79 and 104 amino acids in length. There is a conserved YRW sequence motif. There is a single completely conserved residue Y that may be functionally important.. +PF12462 Nucleolin_N;
DNA helicase IV / RNA helicase N terminal. This domain family is found in bacteria, and is approximately 170 amino acids in length. This family is found in bacterial DNA helicase IV, at the N-terminus of Pfam:PF00580.. +PF12463 Protein of unknown function (DUF3689)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 399 and 797 amino acids in length.. +PF12464 Maltose acetyltransferase
This domain family is found in bacteria, archaea and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00132. Mac uses acetyl-CoA as acetyl donor to acetylated cytoplasmic maltose.. +PF12465 Proteasome beta subunits C terminal
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00227. There is a conserved GTT sequence motif. There is a single completely conserved residue Y that may be functionally important. This family includes the C terminal of the beta-type subunits of the proteasome, a multimeric complex that degrades proteins into peptides as part of the MHC class I-mediated Ag-presenting pathway.. +PF12466 Glutamate dehydrogenase N terminal
This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF05088. There is a conserved ALR sequence motif. Glutamate dehydrogenase (GDH) is a homohexameric, mitochondrial enzyme that reversibly catalyses the oxidative deamination of L-glutamate to 2-oxoglutarate using either NADP(H) or NAD(H) with comparable efficacy.. +PF12467 Cucumber mosaic virus 1a protein family
This domain family is found in viruses, and is typically between 156 and 171 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01660. 1a protein is the major virulence factor of the cucumber mosaic virus (CMV). The Ns strain of CMV causes necrotic lesions to Nicotiana spp. while other strains cause systemic mosaic. The determinant of the pathogenesis of these different strains is the specific amino acid residue at the 461 residue of the 1a protein.. +PF12468 Type III secretion system leucine rich repeat protein
This domain family is found in bacteria, and is approximately 50 amino acids in length. There are two completely conserved residues (Y and W) that may be functionally important. This family consists of leucine-rich repeat proteins involved in type III secretion.. +PF12469 CRISPR-associated protein
This domain family is found in bacteria and archaea, and is typically between 101 and 138 amino acids in length. The proteins in this family are frequently annotated as CRISPR-associated proteins however there is little accompanying literature to confirm this.. +PF12470 Suppressor of Fused Gli/Ci N terminal binding domain
This domain family is found in eukaryotes, and is typically between 192 and 219 amino acids in length. The family is found in association with Pfam:PF05076. There is a conserved HGRHFT sequence motif. This family is the C terminal domain of the Suppressor of Fused protein (Su(fu)). Su(fu) is a repressor of the Gli and Ci transcription factors of the Hedgehog signalling cascade. It functions by binding these proteins and preventing their translocation to the nucleus. The C terminal domain is only found in eukaryotic Su(fu) proteins; it is not present in bacterial homologues. The C terminal domain binds to the N terminal of Gli/Ci while the N terminal of Su(fu) binds to the C terminal of Gli/Ci. This dual binding mechanism is likely an evolutionary advancement in this signalling cascade which is not present in bacterial homologues.. +PF12471 GTP cyclohydrolase N terminal
This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. This family is the N terminal of GTP cyclohydrolase, the rate limiting enzyme in the synthesis of tetrahydrobiopterin.. +PF12472 Phage related protein
This domain family is found in bacteria and viruses, and is approximately 60 amino acids in length.. +PF12473 Kinesin protein
This domain family is found in eukaryotes, and is typically between 131 and 151 amino acids in length. The family is found in association with Pfam:PF00225, Pfam:PF00498. There is a single completely conserved residue W that may be functionally important.. +PF12474 Polo kinase kinase
This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00069. Polo-like kinase 1 (Plx1) is essential during mitosis for the activation of Cdc25C, for spindle assembly, and for cyclin B degradation. This family is Polo kinase kinase (PKK) which phosphorylates Polo kinase and Polo-like kinase to activate them. PKK is a serine/threonine kinase.. +PF12475 Amdovirus non-structural protein
This domain family is found in viruses, and is approximately 50 amino acids in length. This family contains proteins of each of the four types of Amdovirus non-structural protein.. +PF12476 Protein of unknown function (DUF3696)
This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length.. +PF12477 Sex factor F TraW protein N terminal
This domain family is found in bacteria, and is approximately 30 amino acids in length. There is a single completely conserved residue G that may be functionally important. The traW gene of the E. coli K-12 sex factor, F, encodes one of the numerous proteins required for conjugative transfer of this plasmid.. +PF12478 Ubiquitin-associated protein 2
This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00627. There are two conserved sequence motifs: AVEMPG and QFG.. +PF12479 Protein of unknown function (DUF3698)
This domain family is found in eukaryotes, and is typically between 89 and 105 amino acids in length.. +PF12480 Protein of unknown function (DUF3699)
This domain family is found in eukaryotes, and is approximately 80 amino acids in length.. +PF12481 Aluminium induced protein
This domain family is found in eukaryotes, and is approximately 120 amino acids in length. There are two conserved sequence motifs: YGL and LRDR. This family is related to GATase enzyme domains.. +PF12482 Phage integrase protein
This domain family is found in bacteria, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00589.. +PF12483 E3 Ubiquitin ligase
This domain family is found in bacteria, archaea and eukaryotes, and is typically between 150 and 163 amino acids in length. There is a single completely conserved residue E that may be functionally important. GIDE is an E3 ubiquitin ligase which is involved in inducing apoptosis.. +PF12484 Polymorphic PE/PPE proteins C terminal
This domain family is found in bacteria, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00823. There is a conserved SVP sequence motif. There is a single completely conserved residue W that may be functionally important. The proteins in this family are PE/PPE proteins implicated in immunostimulation and virulence.. +PF12485 Lymphocyte signaling adaptor protein
This domain family is found in eukaryotes, and is typically between 144 and 156 amino acids in length. The family is found in association with Pfam:PF07647, Pfam:PF07653. There is a conserved LGKK sequence motif. SLY contains a Src homology 3 domain and a sterile alpha motif, suggesting that it functions as a signaling adaptor protein in lymphocytes.. +PF12486 ImpA domain protein
This family of proteins is found in bacteria. Proteins in this family are typically between 207 and 469 amino acids in length. The family is found in association with Pfam:PF06812.. +PF12487 Protein of unknown function (DUF3703)
This family of proteins is found in bacteria. Proteins in this family are typically between 113 and 135 amino acids in length.. +PF12488 Protein of unknown function (DUF3704)
This domain family is found in eukaryotes, and is approximately 30 amino acids in length.. +PF12489 Nuclear coactivator
This domain family is found in eukaryotes, and is typically between 127 and 138 amino acids in length. This family is ARA70, a nuclear coactivator which interacts with peroxisome proliferator-activated receptor gamma (PPARgamma) to regulate transcription and the addition of the PPARgamma ligand (prostaglandin J2) enhances this interaction.. +PF12490 Breast carcinoma amplified sequence 3
This domain family is found in eukaryotes, and is typically between 229 and 245 amino acids in length. The proteins in this family have been shown to be proto-oncogenes implicated in the development of breast cancer.. +PF12491 Apolipoprotein B100 C terminal
This domain family is found in eukaryotes, and is approximately 60 amino acids in length. There are two conserved sequence motifs: QLS and LIDL. ApoB100 has an essential role in the assembly and secretion of triglyceride-rich lipoproteins and lipids transport.. +PF12493 Protein of unknown function (DUF3709)
This domain family is found in bacteria, and is approximately 30 amino acids in length. There are two conserved sequence motifs: RCLMK and LIEL.. +PF12494 Protein of unknown function (DUF3695)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 157 and 192 amino acids in length. There is a single completely conserved residue D that may be functionally important.. +PF12495 Vegetative insecticide protein 3A N terminal
This family of proteins is found in bacteria. Proteins in this family are typically between 170 and 789 amino acids in length. The family is found in association with Pfam:PF02018. Vip3A represents a novel class of proteins insecticidal to lepidopteran insect larvae.. +PF12496 Bcl2-/adenovirus E1B nineteen kDa-interacting protein 2
This domain family is found in eukaryotes, and is typically between 119 and 133 amino acids in length. There is a conserved HGGY sequence motif. This family is Bcl2-/adenovirus E1B nineteen kDa-interacting protein 2. It interacts with pro- and anti- apoptotic molecules in the cell.. +PF12497 Estrogen receptor beta
This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00104, Pfam:PF00105. There is a conserved IPS sequence motif. There are two completely conserved residues (Y and W) that may be functionally important. ERbeta binds estrogens with an affinity similar to that of ERalpha, and activates expression of reporter genes containing estrogen response elements in an estrogen-dependent manner. ERbeta acts as a transcription factor once bound to its ligand and it can dimerise with ERalpha.. +PF12498 Basic leucine-zipper C terminal
This family of proteins is found in eukaryotes. Proteins in this family are typically between 174 and 411 amino acids in length. The family is found in association with Pfam:PF00170. There is a conserved KVK sequence motif. There is a single completely conserved residue K that may be functionally important. Various bZIP proteins have been found and shown to play a role in seed-specific gene expression. bZIP binds to the alpha-globulin gene promoter, but not to promoters of other major storage genes such as glutelin, prolamin and albumin.. +PF12499 Pherophorin
This domain family is found in eukaryotes, and is typically between 147 and 160 amino acids in length. The proteins in this family are frequently annotated as pherophorins however there is little accompanying literature to confirm this.. +PF12500 DUF3706;
TRSP domain C terminus to PRTase_2 . Anantharaman V, Gavin OL. This domain occurs C terminus to PRTase_2 and has a highly conserved GXXE and TRSP signatures . It is found in bacteria. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response .. +PF12501 Phosphate ATP-binding cassette transporter
This domain family is found in bacteria, and is typically between 143 and 173 amino acids in length. The family is found in association with Pfam:PF00528. There is a single completely conserved residue P that may be functionally important.. +PF12502 Protein of unknown function (DUF3710)
This family of proteins is found in bacteria. Proteins in this family are typically between 237 and 284 amino acids in length. There are two conserved sequence motifs: DLG and DGPRW.. +PF12503 Cucumber mosaic virus 1a protein C terminal
This domain family is found in viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01660. There is a conserved GLG sequence motif. 1a protein is the major virulence factor of the cucumber mosaic virus (CMV). The Ns strain of CMV causes necrotic lesions to Nicotiana spp. while other strains cause systemic mosaic. The determinant of the pathogenesis of these different strains is the specific amino acid residue at the 461 residue of the 1a protein.. +PF12505 Protein of unknown function (DUF3712)
This domain family is found in eukaryotes, and is approximately 130 amino acids in length.. +PF12506 Protein of unknown function (DUF3713)
This family of proteins is found in bacteria. Proteins in this family are typically between 92 and 1225 amino acids in length. There is a single completely conserved residue S that may be functionally important.. +PF12507 Human Cytomegalovirus UL139 protein
This family of proteins is found in eukaryotes and viruses. Proteins in this family are approximately 140 amino acids in length. UL139 product shared sequence homology with human CD24, a signal transducer modulating B-cell activation responses, and the sequences in the G1c variant of UL139 contained a specific attachment site of prokaryotic membrane lipoprotein lipid.. +PF12508 Protein of unknown function (DUF3714)
This domain family is found in bacteria, and is approximately 200 amino acids in length.. +PF12509 Protein of unknown function (DUF3715)
This domain family is found in eukaryotes, and is approximately 170 amino acids in length.. +PF12510 Smoothelin cytoskeleton protein
This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00307. Smoothelin is a cytoskeletal protein specifically expressed in differentiated smooth muscle cells and has been shown to co-localize with smooth muscle alpha actin.. +PF12511 Protein of unknown function (DUF3716)
This domain family is found in eukaryotes, and is approximately 60 amino acids in length.. +PF12512 Protein of unknown function (DUF3717)
This family of proteins is found in bacteria. Proteins in this family are typically between 75 and 117 amino acids in length. There is a conserved AIN sequence motif. There are two completely conserved residues (L and Y) that may be functionally important.. +PF12513 Mitochondrial degradasome RNA helicase subunit C terminal
This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00271. The yeast mitochondrial degradosome (mtEXO) is an NTP-dependent exoribonuclease involved in mitochondrial RNA metabolism. mtEXO is made up of two subunits: an RNase (DSS1) and an RNA helicase (SUV3). These co-purify with mitochondrial ribosomes.. +PF12514 Protein of unknown function (DUF3718)
This domain family is found in bacteria and viruses, and is approximately 70 amino acids in length. There is a single completely conserved residue C that may be functionally important. . +PF12515 Ca2+-ATPase N terminal autoinhibitory domain
This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00689, Pfam:PF00122, Pfam:PF00702, Pfam:PF00690. There is a conserved RRFR sequence motif. There are two completely conserved residues (F and W) that may be functionally important. This family is the N terminal autoinhibitory domain of an endosomal Ca2+-ATPase.. +PF12516 Protein of unknown function (DUF3719)
This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved HLR sequence motif. There are two completely conserved residues (W and H) that may be functionally important.. +PF12517 Protein of unknown function (DUF3720)
This domain family is found in eukaryotes, and is approximately 100 amino acids in length. There are two completely conserved A residues that may be functionally important.. +PF12518 Protein of unknown function
This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. There is a conserved WMPC sequence motif. There are two completely conserved residues (A and C) that may be functionally important.. +PF12519 Protein of unknown function (DUF3722)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 415 and 473 amino acids in length.. +PF12520 Protein of unknown function (DUF3723)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 374 and 1069 amino acids in length. There is a conserved LGF sequence motif.. +PF12521 Protein of unknown function (DUF3724)
This domain family is found in viruses, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF00073. There is a single completely conserved residue Y that may be functionally important.. +PF12522 Cytomegalovirus glycoprotein N terminal
This domain family is found in viruses, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF03554. This family is an envelope glycoprotein of human cytomegalovirus (HCMV).. +PF12523 Protein of unknown function (DUF3725)
This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF01577. There is a conserved FLE sequence motif.. +PF12524 dsDNA virus glycoprotein L C terminal
This domain family is found in viruses, and is typically between 55 and 80 amino acids in length. The family is found in association with Pfam:PF05259. This family is the C terminal of glycoprotein L from various types of double stranded DNA viruses (dsDNA).. +PF12525 Protein of unknown function (DUF3726)
This domain family is found in bacteria and eukaryotes, and is approximately 80 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF12526 Protein of unknown function (DUF3729)
This family of proteins is found in viruses. Proteins in this family are typically between 145 and 1707 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01661, Pfam:PF05417, Pfam:PF01660, Pfam:PF00978. There is a single completely conserved residue L that may be functionally important.. +PF12527 Protein of unknown function (DUF3727)
This domain family is found in bacteria and eukaryotes, and is approximately 100 amino acids in length.. +PF12528 Prepilin peptidase dependent protein C (DUF3728)
This family of proteins is found in bacteria. Proteins in this family are typically between 106 and 121 amino acids in length. The family is found in association with Pfam:PF07963. There are two completely conserved C residues that may be functionally important. This family is frequently annotated as prepilin peptidase dependent protein C.. +PF12529 Xylosyltransferase C terminal
This domain family is found in eukaryotes, and is typically between 169 and 183 amino acids in length. The family is found in association with Pfam:PF02485. There is a single completely conserved residue G that may be functionally important. Xylosyltransferases are enzymes involved in the biosynthesis of the glycosaminoglycan linker region in proteoglycans.. +PF12530 Protein of unknown function (DUF3730)
This domain family is found in eukaryotes, and is typically between 220 and 262 amino acids in length.. +PF12531 DNA-K related protein
This domain family is found in bacteria, and is approximately 250 amino acids in length. There are two conserved sequence motifs: RPG and WRR. The proteins in this family are frequently annotated as DNA-K related proteins however there is little accompanying literature to confirm this.. +PF12532 Protein of unknown function (DUF3732)
This domain family is found in bacteria and eukaryotes, and is typically between 180 and 198 amino acids in length. There is a conserved DQP sequence motif.. +PF12533 Neuronal helix-loop-helix transcription factor
This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found C-terminal to Pfam:PF00010. There is a single completely conserved residue W that may be functionally important. Neuronal basic helix-loop-helix (bHLH) transcription factors such as neuroD and neurogenin have been shown to play important roles in neuronal development.. +PF12534 Leucine-rich repeat containing protein 8
This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00560. There are two completely conserved residues (W and Y) that may be functionally important. Many of the proteins in this family are annotated as leucine-rich repeat containing protein 8 however there is little accompanying literature to back this up.. +PF12535 Hydrolase of X-linked nucleoside diphosphate N terminal
This family of proteins is found in eukaryotes. Proteins in this family are typically between 847 and 5344 amino acids in length. These enzymes hydrolyse the molecular motif of a nucleoside diphosphate linked to some other moiety, X.. +PF12536 Patatin phospholipase
This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF01734. There are two completely conserved residues (F and G) that may be functionally important. The proteins in this family are frequently annotated as patatin family phospholipases however there is little accompanying literature to confirm this.. +PF12537 Protein of unknown function (DUF3735)
This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved LSG sequence motif. There is a single completely conserved residue G that may be functionally important.. +PF12538 DNA transporter
This domain family is found in bacteria, and is typically between 107 and 121 amino acids in length. The family is found in association with Pfam:PF01580. The FtsK/SpoIIIE family of DNA transporters are responsible for translocating missegregated chromosomes after the completion of cell division.. +PF12539 Chromosome segregation protein Csm1/Pcs1
Saccharomyces cerevisiae Csm1 is part of the monopolin complex. Csm1 forms a complex with Mde4 and promotes monoorientation during meiosis . Csm1 also plays a mitotic role in DNA replication . This family also contains the Schizosaccharomyces pombe homologue to Csm1, Pcs1. Pcs1 forms a complex with Mde4 and acts in the central kinetochore domain to clamp microtubule binding sites together . The two complexes (Csm1/Lrs4 and Pcs1/Mde4) contribute to the prevention of merotelic attachment .. +PF12540 Protein of unknown function (DUF3736)
This domain family is found in eukaryotes, and is typically between 135 and 160 amino acids in length.. +PF12541 Protein of unknown function (DUF3737)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 281 and 297 amino acids in length.. +PF12542 Pre-mRNA splicing factor
This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF10197. There is a single completely conserved residue Y that may be functionally important. Cwc25 has been identified to associate with pre-mRNA splicing factor Cef1/Ntc85, a component of the Prp19-associated complex (NTC) involved in spliceosome activation. Cwc25 is neither tightly associated with NTC nor required for spliceosome activation, but is required for the first catalytic reaction.. +PF12543 Protein of unknown function (DUF3738)
This family of proteins is found in bacteria. Proteins in this family are typically between 251 and 457 amino acids in length.. +PF12544 Lysine-2,3-aminomutase
This domain family is found in bacteria, archaea and eukaryotes, and is typically between 111 and 127 amino acids in length. The family is found in association with Pfam:PF04055. LAM catalyses the interconversion of L-alpha-lysine and L-beta-lysine, which proceeds by migration of the amino group from C2 to C3 concomitant with cross-migration of the 3-pro-R hydrogen of L-alpha-lysine to the 2-pro-R position of L-beta-lysine.. +PF12545 Filamentous haemagglutinin family outer membrane protein
This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF05860.. +PF12546 Blue/Ultraviolet sensing protein C terminal
This domain family is found in eukaryotes, and is typically between 113 and 125 amino acids in length. The family is found in association with Pfam:PF03441, Pfam:PF00875. Cryptochromes are blue/ultraviolet-A light sensing photoreceptors involved in regulating various growth and developmental responses in plants.. +PF12547 Capicua transcriptional repressor modulator
This family of proteins is found in eukaryotes. Proteins in this family are typically between 49 and 781 amino acids in length. There is a conserved IQT sequence motif. ATXN1 directly binds Capicua and modulates Capicua repressor activity in Drosophila and mammalian cells. The polyglutamine expanded mutant type of ATXN-1 does not bind Capicua with as high affinity as wild-type ATXN-1. It is associated with spinocerebellar ataxia type 1 (SCA1).. +PF12548 Sulfatase protein
This domain family is found in eukaryotes, and is typically between 144 and 173 amino acids in length. The family is found in association with Pfam:PF00884.. +PF12549 Tyrosine hydroxylase N terminal
This domain family is found in eukaryotes, and is approximately 30 amino acids in length. There is a single completely conserved residue G that may be functionally important. Tyrosine hydroxylase converts L-tyrosine to L-DOPA in the catecholamine synthesis pathway.. +PF12550 Transcriptional activator of glycolytic enzymes
This domain family is found in eukaryotes, and is approximately 80 amino acids in length. This family is activates the transcription of glycolytic enzymes.. +PF12551 Poly-beta-hydroxybutyrate polymerase N terminal
This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF07167, Pfam:PF00561. There is a single completely conserved residue W that may be functionally important. PHBC is the third enzyme of the poly-beta-hydroxybutyrate biosynthetic pathway.. +PF12552 Protein of unknown function (DUF3741)
This domain family is found in eukaryotes, and is approximately 50 amino acids in length.. +PF12553 Protein of unknown function (DUF3742)
This domain family is found in bacteria, and is approximately 50 amino acids in length. There is a single completely conserved residue Y that may be functionally important.. +PF12554 DUF3743;
Mitotic-spindle organizing gamma-tubulin ring associated. The name MOZART is derived from letters of 'mitotic-spindle organizing proteins associated with a ring of gamma-tubulin'. This family operates as part of the gamma-tubulin ring complex, gamma-TuRC, one of the complexes necessary for chromosome segregation. This complex is located at centrosomes and mediates the formation of bipolar spindles in mitosis; it consists of six subunits. However, unlike the other four known subunits, this family does not carry the conserved 'Spc97-Spc98' GCP domain, so the TUBCGP nomenclature cannot be used for it. MOZART1 is required for gamma-TuRC recruitment to centrosomes .. +PF12555 Thiamine pyrophosphokinase C terminal
This domain family is found in bacteria, and is approximately 50 amino acids in length. The proteins in this family catalyses the pyrophosphorylation of thiamine in yeast and synthesizes thiamine pyrophosphate (TPP), a thiamine coenzyme.. +PF12556 Cobaltochelatase CobS subunit N terminal
This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF07728. There are two completely conserved residues (P and F) that may be functionally important. This family is the N terminal of the CobS subunit of cobaltochelatase. Cobaltochelatase belongs to the AAA+ superfamily of proteins. CobS and CobT form a chaperone like complex.. +PF12557 Cob(I)alamin adenosyltransferase N terminal
This domain family is found in bacteria and eukaryotes, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF02572. Cob(I)alamin adenosyltransferase adenosylates Co(I) in an ATP-dependent manner in the conversion of aquacobalamin to its coenzyme form. This is the third step in this process, after two steps involved in the reduction of Co(III) to Co(I).. +PF12558 ATP-binding cassette cobalt transporter
This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00005. There is a conserved REP sequence motif. There is a single completely conserved residue P that may be functionally important. The proteins in this family are frequently annotated as ABC Cobalt transporters however there is little accompanying literature to confirm this.. +PF12559 Serine endopeptidase inhibitors
This family includes both microviridins and marinostatins. It seems likely that in both cases it is the C-terminus which becomes the active inhibitor after post-translational modifications of the full length, pre-peptide. it is the ester linkages within the key, 12-residue. region that circularise the molecule giving it its inhibitory conformation [1, 2, 3].. +PF12560 Protein of unknown function (DUF3745)
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00097, Pfam:PF10426.. +PF12561 ToxR activated gene A lipoprotein
This domain family is found in bacteria, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF10462. There is a conserved GAG sequence motif. This family is a bacterial lipoprotein.. +PF12562 Protein of unknown function (DUF3746)
This domain family is found in viruses, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF04595.. +PF12563 Hemolytic toxin N terminal
This domain family is found in bacteria, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF07968, Pfam:PF00652. This family is a bacterial virulence factor - hemolysin - which forms pores in erythrocytes and causes them to lyse.. +PF12564 Type III restriction/modification enzyme methylation subunit
This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF01555. There are two completely conserved residues (F and S) that may be functionally important. This family is a bacterial phage resistance protein. It functions in a type III restriction/modification enzyme complex. It is part of the methylation subunit of the complex. It binds DNA and methylates it.. +PF12565 Protein of unknown function (DUF3747)
This family of proteins is found in bacteria. Proteins in this family are typically between 215 and 413 amino acids in length. There is a conserved DSNGYS sequence motif.. +PF12566 Protein of unknown function (DUF3748)
This domain family is found in bacteria and eukaryotes, and is approximately 120 amino acids in length.. +PF12567 Leukocyte receptor CD45
This family of proteins is found in eukaryotes. Proteins in this family are typically between 77 and 1130 amino acids in length. The family is found in association with Pfam:PF00041. CD45 plays a critical role in T-cell receptor (TCR)-mediated signaling. CD45 interacts with SKAP55 which is a transcriptional activator of IL-2.. +PF12568 Acetyltransferase (GNAT) domain
This domain family is found in bacteria, and is approximately 40 amino acids in length. The proteins in this family are acetyltransferases of the GNAT family.. +PF12569 NMDA receptor-regulated protein 1
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF07719, Pfam:PF00515. There is a single completely conserved residue L that may be functionally important. NARP1 is the mammalian homologue of a yeast N-terminal acetyltransferase that regulates entry into the G(0) phase of the cell cycle.. +PF12570 Protein of unknown function (DUF3750)
This family of proteins is found in bacteria. Proteins in this family are typically between 175 and 265 amino acids in length.. +PF12571 Phage tail-collar fibre protein
This domain family is found in bacteria and viruses, and is approximately 160 amino acids in length. There are two completely conserved residues (K and W) that may be functionally important. The members are annotated as being putative phage tail or tail-collar proteins.. +PF12572 Protein of unknown function (DUF3752)
This domain family is found in eukaryotes, and is typically between 140 and 163 amino acids in length.. +PF12573 2-oxoisovalerate dehydrogenase E1 alpha subunit N terminal
This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00676. There are two conserved sequence motifs: VPEP and RPG. This family is the alpha subunit of the E1 component of 2-oxoisovalerate dehydrogenase. This is the enzyme complex responsible for metabolism of pyruvate, 2-oxoglutarate, branched chain 2-oxo acids and acetoin. The E1 component is a heterotetramer of alpha2beta2. The homodimerised beta subunits are flanked by two alpha subunits in a 'vise' structure.. +PF12574 120 KDa Rickettsia surface antigen
This domain family is found in bacteria, and is approximately 40 amino acids in length. This family is a Rickettsia surface antigen of 120 KDa which may be used as an antigen for immune response against the bacterial species.. +PF12575 Protein of unknown function (DUF3753)
This family of proteins is found in viruses. Proteins in this family are approximately 70 amino acids in length. There is a conserved YLK sequence motif. There are two completely conserved residues (D and F) that may be functionally important.. +PF12576 Protein of unknown function (DUF3754)
This domain family is found in bacteria, archaea and eukaryotes, and is typically between 135 and 166 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF12577 PPAR gamma N-terminal region
Peroxisome proliferator-activated receptors (PPAR) are nuclear hormone receptors that control the expression of genes involved in lipid homeostasis in mammals. This sequence region is found at the N-terminus of these proteins. The family is found in association with Pfam:PF00104, Pfam:PF00105. It is not clear if this region is a separate protein domain.. +PF12578 Myotubularin-associated protein
This domain family is found in eukaryotes, and is typically between 115 and 138 amino acids in length. Myotubularin is a dual-specific phosphatase that dephosphorylates phosphatidylinositol 3-phosphate and phosphatidylinositol (3,5)-bisphosphate. 3-PAP is a catalytically inactive member of the myotubularin gene family, which coprecipitates lipid phosphatidylinositol 3-phosphate-3-phosphatase activity from lysates of human platelets.. +PF12579 Protein of unknown function (DUF3755)
This domain family is found in eukaryotes, and is approximately 40 amino acids in length. There is a single completely conserved residue N that may be functionally important.. +PF12580 Tripeptidyl peptidase II
This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF00082. Tripeptidyl peptidase II (TPPII) is a crucial component of the proteolytic cascade acting downstream of the 26S proteasome in the ubiquitin-proteasome pathway. It is an amino peptidase belonging to the subtilase family removing tripeptides from the free N terminus of oligopeptides.. +PF12581 Protein of unknown function (DUF3756)
This domain family is found in viruses, and is approximately 40 amino acids in length.. +PF12582 Protein of unknown function (DUF3757)
This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 154 amino acids in length.. +PF12583 Tripeptidyl peptidase II N terminal
This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF00082. Tripeptidyl peptidase II (TPPII) is a crucial component of the proteolytic cascade acting downstream of the 26S proteasome in the ubiquitin-proteasome pathway. It is an amino peptidase belonging to the subtilase family removing tripeptides from the free N terminus of oligopeptides.. +PF12584 DUF3758;
Trafficking protein particle complex subunit 10, TRAPPC10. This domain forms part of the TRAPP complex for mediating vesicle docking and fusion in the Golgi apparatus. The fungal version is referred to as Trs130, and an alternative vertebrate alias is TMEM1 [1,2].. +PF12585 Protein of unknown function (DUF3759)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 107 and 132 amino acids in length. There is a single completely conserved residue H that may be functionally important.. +PF12586 Protein of unknown function (DUF3760)
This domain family is found in eukaryotes, and is typically between 46 and 64 amino acids in length.. +PF12587 Protein of unknown function (DUF3761)
This family of proteins is found in bacteria. Proteins in this family are typically between 100 and 157 amino acids in length.. +PF12588 Phophatidylserine decarboxylase
This domain family is found in bacteria and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF02666. Phosphatidylserine decarboxylase (PSD) is an important enzyme in the synthesis of phosphatidylethanolamine in both prokaryotes and eukaryotes.. +PF12589 Methyltransferase involved in Williams-Beuren syndrome
This domain family is found in eukaryotes, and is typically between 72 and 83 amino acids in length. The family is found in association with Pfam:PF08241. This family is made up of S-adenosylmethionine-dependent methyltransferases . The proteins are deleted in Williams-Beuren syndrome (WBS), a complex developmental disorder with multisystemic manifestations including supravalvular aortic stenosis (SVAS) and a specific cognitive phenotype .. +PF12590 Acyl-ATP thioesterase
This domain family is found in bacteria and eukaryotes, and is typically between 120 and 131 amino acids in length. The family is found in association with Pfam:PF01643. The plant acyl-acyl carrier protein (ACP) thioesterases (TEs) have roles in fatty acid synthesis.. +PF12591 Protein of unknown function (DUF3762)
This domain family is found in viruses, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF05533.. +PF12592 Protein of unknown function (DUF3763)
This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF07728. There is a single completely conserved residue F that may be functionally important.. +PF12593 Microcystin synthetase C terminal
This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF08242, Pfam:PF00501. There is a conserved YAN sequence motif. Microcystins form a large family of small cyclic heptapeptides harbouring extensive modifications in amino acid residue composition and functional group chemistry. These peptide hepatotoxins contain a range of non-proteinogenic amino acids and unusual peptide bonds, and are typically N-methylated. They are synthesized on large enzyme complexes consisting of non-ribosomal peptide synthetases and polyketide synthases. This family is made up of the C terminal of microcystin synthetase, one of the proteins involved in this synthesis pathway.. +PF12594 Protein of unknown function (DUF3764)
This family of proteins is found in bacteria. Proteins in this family are typically between 89 and 101 amino acids in length.. +PF12595 Rhomboid serine protease
This domain family is found in eukaryotes, and is approximately 210 amino acids in length. The family is found in association with Pfam:PF01694. Rhomboid is a seven-transmembrane spanning protein that resides in the Golgi and acts as a serine protease to cleave Spitz.. +PF12596 87kDa_TransP;
This domain family is found in eukaryotes, and is typically between 78 and 110 amino acids in length. The family is found in association with Pfam:PF05485. There are two completely conserved residues (D and G) that may be functionally important. This family is an 87kDa transposase protein which catalyses both the precise and imprecise excision of a nonautonomous P transposable element.. +PF12597 Protein of unknown function (DUF3767)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 112 and 199 amino acids in length.. +PF12598 T-box transcription factor
This domain family is found in eukaryotes, and is typically between 77 and 89 amino acids in length. The family is found in association with Pfam:PF00907. There are two completely conserved residues (S and P) that may be functionally important. T-box genes encode transcription factors involved in morphogenesis and organogenesis of vertebrates and invertebrates. +PF12599 Protein of unknown function (DUF3768)
This family of proteins is found in bacteria. Proteins in this family are typically between 108 and 129 amino acids in length. There are two conserved sequence motifs: NDP and RVLT.. +PF12600 Protein of unknown function (DUF3769)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 560 and 931 amino acids in length.. +PF12601 Rubivirus non-structural protein
This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF05407. The rubella virus (RUB) nonstructural (NS) protein (NSP) ORF encodes a protease that cleaves the NSP precursor (240 kDa) at a single site to produce two products.. +PF12602 Fertility inhibition protein N terminal
This domain family is found in bacteria, and is typically between 62 and 102 amino acids in length. The family is found in association with Pfam:PF04352. The FinOP (fertility inhibition) system of F-like plasmids consists of an antisense RNA (FinP) and a 22 kDa protein (FinO) which act in concert to prevent the translation of TraJ, the positive regulator of the transfer operon.. +PF12603 Protein of unknown function (DUF3770)
This domain family is found in viruses, and is approximately 250 amino acids in length. The family is found in association with Pfam:PF04196.. +PF12604 Tail fiber protein gp37 C terminal
This domain family is found in bacteria and viruses, and is typically between 49 and 166 amino acids in length. The family is found in association with Pfam:PF03906. In T-even phages, gp37 and gp38 are components of the tail fiber that are critical for phage-host interaction.. +PF12605 Casein kinase 1 gamma C terminal
This domain family is found in eukaryotes, and is typically between 54 and 99 amino acids in length. The family is found in association with Pfam:PF00069. CK1gamma is a membrane-bound member of the CK1 family. Gain-of-function and loss-of-function experiments show that CK1gamma is both necessary and sufficient to transduce LRP6 signalling in vertebrates and Drosophila cells.. +PF12606 Tumour necrosis factor receptor superfamily member 19
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 49 and 288 amino acids in length. There are two completely conserved residues (K and Y) that may be functionally important. The members of tumor necrosis factor receptor (TNFR) superfamily have been designated as the "guardians of the immune system" due to their roles in immune cell proliferation, differentiation, activation, and death (apoptosis). The messenger RNA of RELT is especially abundant in hematologic tissues such as spleen, lymph node, and peripheral blood leukocytes as well as in leukemias and lymphomas. RELT is able to activate the NF-kappaB pathway and selectively binds tumor necrosis factor receptor-associated factor 1.. +PF12607 Protein of unknown function (DUF3772)
This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00924.. +PF12608 Protein of unknown function (DUF3773)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are approximately 110 amino acids in length.. +PF12609 Wound-induced protein
This family of proteins is found in eukaryotes. Proteins in this family are typically between 81 and 97 amino acids in length. The proteins in the family are often annotated as wound-induced proteins however there is little accompanying literature to confirm this.. +PF12610 Suppressor of cytokine signalling
This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF07525, Pfam:PF00017. The suppressors of cytokine signaling (SOCS) family play important roles in regulating a variety of signal transduction pathways that are involved in immunity, growth and development of organisms.. +PF12611 Protein of unknown function (DUF3766)
This domain family is found in bacteria, and is approximately 20 amino acids in length. There is a conserved FTNID sequence motif. There is a single completely conserved residue T that may be functionally important.. +PF12612 Tubulin folding cofactor D C terminal
This domain family is found in eukaryotes, and is typically between 182 and 199 amino acids in length. The family is found in association with Pfam:PF02985. There is a single completely conserved residue R that may be functionally important. Tubulin folding cofactor D does not co-polymerise with microtubules either in vivo or in vitro, but instead modulates microtubule dynamics by sequestering beta-tubulin from GTP-bound alphabeta-heterodimers in microtubules.. +PF12613 Flagellin structural protein
This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00669, Pfam:PF00700. This family is the bacterial flagellin structural protein. It is involved with cell motility.. +PF12614 Ribosome recycling factor
This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 130 amino acids in length. There are two conserved sequence motifs: LPS and LKR. Overproduction of ribosome recycling factor (RRF) reduces tna operon expression and increases the rate of cleavage of TnaC-tRNA(2)(Pro), relieving the growth inhibition associated with plasmid-mediated tnaC overexpression.. +PF12615 F sex factor protein N terminal
This domain family is found in bacteria, and is typically between 96 and 107 amino acids in length. The family is found in association with Pfam:PF10412. TraD is a cytoplasmic membrane protein with possible DNA binding domains. It is part of the bacterial F sex factor complex.. +PF12616 Protein of unknown function (DUF3775)
This domain family is found in bacteria, and is approximately 80 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF12617 Iron-Sulfur binding protein C terminal
This domain family is found in bacteria and eukaryotes, and is typically between 179 and 201 amino acids in length. The family is found in association with Pfam:PF00037. LdpA (light-dependent period) plays a role in controlling the redox state in cyanobacteria to modulate its. circadian clock. LdpA is a protein with Iron-Sulfur cluster-binding motifs.. +PF12618 Protein of unknown function (DUF3776)
This domain family is found in eukaryotes, and is approximately 100 amino acids in length.. +PF12619 Mini-chromosome maintenance protein 2
This domain family is found in eukaryotes, and is typically between 138 and 153 amino acids in length. The family is found in association with Pfam:PF00493. Mini-chromosome maintenance (MCM) proteins are essential for DNA replication. These proteins use ATPase activity to perform this function.. +PF12620 Protein of unknown function (DUF3778)
This domain family is found in eukaryotes, and is typically between 48 and 61 amino acids in length. There is a conserved LRF sequence motif.. +PF12621 Phosphate metabolism protein
This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF02714. There are two completely conserved residues (W and D) that may be functionally important. This family is likely to be involved in phosphate metabolism however there is little accompanying literature to confirm this.. +PF12622 mRNA biogenesis factor
The full-length Wbp11 proteins carry several copies of a PPGPPP motif throughout their length. This motif is thought to be necessary for folding of the molecule as it helps to bind the WW domain, Wbp11, Pfam:PF09429 . This domain together with Wbp11 may function as components of an mRNA factory in the nucleus.. +PF12623 RNA repair, ligase-Pnkp-associating, region of Hen1
This domain is the N-terminal region of the bacterial Hen1 protein. This protein forms stable hetero-tetramer with Pnkp. The hetero-tetramer was able to repair transfer RNAs cleaved by ribotoxins in vitro . This domain provides the ligase activity of the hetero-tetramer.. +PF12624 N-terminal region of Chorein, a TM vesicle-mediated sorter
Pfam-B_PB000002 (release 24.0). Although mutations in the full-length vacuolar protein sorting 13A (VPS13A) protein in vertebrates lead to the disease of chorea-acanthocytosis, the exact function of any of the regions within the protein is not yet known. This region is the proposed leucine zipper at the N-terminus. The full-length protein is a transmembrane protein with a presumed role in vesicle-mediated sorting and intracellular protein transport.. +PF12625 Arabinose-binding domain of AraC transcription regulator, N-term
Pfam-B_PB000001 (release 24.0). AraC is a bacterial transcriptional regulatory protein with a DNA-binding domain at the C-terminus, HTH_AraC, Pfam:PF00165, and this dimerisation domain which harbours the arabinose-binding pocket at the N-terminus. AraC positively and negatively regulates expression of the proteins required for the uptake and catabolism of the sugar L-arabinose 1,2,3].. +PF12626 Polymerase A arginine-rich C-terminus
Pfam-B_105 (release 24.0). The C-terminus of polymerase A in E coli is arginine-rich and is necessary for full functioning of the enzyme.. +PF12627 Probable RNA and SrmB- binding site of polymerase A
Pfam-B_105 (release 24.0). This region encompasses much of the RNA and SrmB binding motifs on polymerase A.. +PF12628 Falstatin, cysteine peptidase inhibitor
This family of peptidase inhibitors is expressed from plasmodial protozoal species. Falstatin is found to be a potent reversible inhibitor of the P. falciparum cysteine proteases falcipain-2 and falcipain-3, as well as other parasite- and non-parasite-derived cysteine proteases, but is only a relatively weak inhibitor of the P. falciparum cysteine proteases falcipain-1 and dipeptidyl aminopeptidase 1. Thus, P. falciparum requires expression of falstatin to limit proteolysis by certain host or parasite cysteine proteases during erythrocyte invasion.. +PF12629 Poxvirus poly(A) polymerase C-terminal domain
This domain is found at the C-terminus of the pox virus PolyA polymerase protein .. +PF12630 Poxvirus poly(A) polymerase N-terminal domain
This domain is found at the N-terminus of the pox virus Poly(A) polymerase protein . According to SCOP this domain contains a helix-hairpin-helix motif.. +PF12631 Catalytic cysteine-containing C-terminus of GTPase, MnmE
Pfam-B_102 (release 24.0). This short C-terminal region contains the only cysteine present in these proteins. It is proposed that MnmE is a tRNA-modifying enzyme and that Cys-451 functions as a catalytic residue in the modification reaction.. +PF12632 Mysoin-binding motif of peroxisomes
Vezatin is a peroxisome transmembrane receptor that is involved in membrane-membrane and cell-cell adhesions. In the movement of peroxisomes it binds to class V and class VIIa myosins to guide the organelle through the microtubules and allow pathogens to internalise themselves into host cells . Vezatin is crucial for spermatozoan production . In mouse cells it interacts with the cadherin-catenin complex bridging it to the C-terminal FERM domain of myosin VIIA .. +PF12633 Adenylate cyclase NT domain
+PF12634 Inheritance of peroxisomes protein 1
Inp1 is a family of peripheral membrane proteins of peroxisomes. Inp1p binds Pex25p, Pex30p, and Vps1p, all of which are involved in controlling peroxisome division. The levels of Inp1p vary with the cell cycle, and Inp1 acts as a factor that retains peroxisomes in cells and controls peroxisome division . Inp1p promotes the retention of peroxisomes in mother cells and buds of budding yeast by attaching peroxisomes to as-yet-unidentified cortical structures .. +PF12635 Protein of unknown function (DUF3780)
This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria. Proteins in this family are typically between 189 and 206 amino acids in length. There are two conserved sequence motifs: PEERWWL and GWR. This family is found in a very sporadic set of bacterial species, suggesting that it may have been horizontally transferred. One protein is annotated as plasmid borne.. +PF12636 Protein of unknown function (DUF3781)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 82 and 98 amino acids in length. There are two conserved sequence motifs: GKNWY and ITA.. +PF12637 TSCPD domain
This family of proteins is found in bacteria, archaea and viruses. The domain is found in isolation in many proteins where it has a conserved C-terminal motif TSCPD after which the domain is named. Most copies of the domain possess 4 conserved cysteines that may be part of an Iron-sulfur cluster. This domain is found at the C-terminus of some ribonucleoside-diphosphate reductase enzymes.. +PF12638 Staygreen protein
This family of proteins have been implicated in chlorophyll degradation [1,2]. Intriguingly members of this family are also found in non-photosynthetic bacteria.. +PF12639 Colicin-DNAse;
DNase/tRNase domain of colicin-like bacteriocin. Colicin-like bacteriocins are complex structures with an N-terminal beta-barrel translocation domain (Pfam:PF09000), a long double-alpha-helical receptor-binding domain (Pfam:PF11570) and this C-terminal RNAse/DNase domain with endonuclease activity. Their competitor bacteriocidal action is by a process that involves binding to a surface receptor, entering the cell, and, finally, killing it. The lethal action of colicin E3 is a specific cleavage in the ribosomal decoding A site. The crystal structure of colicin E3 reveals a Y-shaped molecule with the receptor binding domain forming a 100 Angstrom long stalk and the two globular heads of the translocation domain and this catalytic domain comprising the two arms .. +PF12640 UPF0489 domain
This family is probably an enzyme which is related to the Arginase family.. +PF12641 Flavodoxin domain
This family represents a flavodoxin domain.. +PF12642 Conjugative transposon protein TcpC
This family of proteins are annotated as conjugative transposon protein TcpC. The transfer clostridial plasmid (tcp) locus is part of some conjugative antibiotic resistance and virulence plasmids. TcpC was one of five genes whose products had low-level sequence identity to Tn916 proteins, having similarity to ORF13 homologues from Tn916, Tn5397, and CW459tet .\. This family of proteins is found in bacteria. Proteins in this family are typically between 302 and 351 amino acids in length.. +PF12643 MazG-like family
This family of short proteins are distantly related to the MazG enzyme. This suggests that these proteins are enzymes that catalyse a related reaction.. +PF12644 Protein of unknown function (DUF3782)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 91 and 186 amino acids in length.. +PF12645 Helix-turn-helix domain
This domain appears to be a helix-turn-helix domain suggesting that this might be a transcriptional regulatory protein. Some members of this family are annotated as conjugative transposon domains.. +PF12646 Domain of unknown function (DUF3783)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length.. +PF12647 RNHCP domain
This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 143 amino acids in length. There is a conserved RNHCP sequence motif.. +PF12648 TcpE family
This family of proteins includes TcpE a conjugative transposon membrane protein.This family of proteins is found in bacteria. Proteins in this family are typically between 122 and 168 amino acids in length.. +PF12650 Domain of unknown function (DUF3784)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 96 and 110 amino acids in length.. +PF12651 Ribbon-helix-helix domain
This short bacterial protein contains a ribbon-helix-helix domain that is likely to be DNA-binding.. +PF12652 CotJB protein
CotJ is a sigma E-controlled operon involved in the spore coat of Bacillus subtilis . This protein has been identified as a spore coat protein .. +PF12653 Protein of unknown function (DUF3785)
+PF12654 Domain of unknown function (DUF3786)
Pfam-B_16102 (release 23.0) . This presumed domain is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 201 and 257 amino acids in length. Some proteins also contains an iron-sulfur cluster.. +PF12655 Domain of unknown function (DUF3787)
This family of proteins is functionally uncharacterised. This family of proteins is found in Clostridia. Proteins in this family are approximately 60 amino acids in length. There is a conserved TAAW sequence motif that may be functionally important.. +PF12656 DExH-box splicing factor binding site
Pfam-B_900 (release 24.0). Yeast Spp2, a G-patch protein and spliceosome component, interacts with the ATP-dependent DExH-box splicing factor Prp2 . As this interaction involves the G-patch sequence in Spp2 and is required for the recruitment of Prp2 to the spliceosome before the first catalytic step of splicing, it is proposed that Spp2 might be an accessory factor that confers spliceosome specificity on Prp2 .. +PF12657 Transcription factor IIIC subunit delta N-term
In humans there are six subunits of transcription factor IIIC, and this one is the 90 kDa subunit; whereas in fungi the complex resolves into nine different subunits and this is No. 9 in yeasts . The whole subunit is involved in RNA polymerase III-mediated transcription. It is possible that this N-terminal domain interacts with TFIIIC subunit 8 .. +PF12658 Telomere capping, CST complex subunit
Stn1 and Ten1 are DNA-binding proteins with specificity for telomeric DNA substrates and both protect chromosome termini from unregulated resection and regulate telomere length. Stn1 complexes with Ten1 and Cdc13 to function as a telomere-specific replication protein A (RPA)-like complex . These three interacting proteins associate with the telomeric overhang in budding yeast, whereas a single protein known as Pot1 (protection of telomeres-1) performs this function in fission yeast, and a two-subunit complex consisting of POT1 and TPP1 associates with telomeric ssDNA in humans. S.pombe has Stn1- and Ten1-like proteins that are essential for chromosome end protection. Stn1 orthologues exist in all species that have Pot1, whereas Ten1-like proteins can be found in all fungi. Fission yeast Stn1 and Ten1 localise at telomeres in a manner that correlates with the length of the ssDNA overhang, suggesting that they specifically associate with the telomeric ssDNA. Two separate protein complexes are required for chromosome end protection in fission yeast. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution . Ten1 is one of the three components of the CST complex, which, in conjunction with the Shelterin complex helps protect telomeres from attack by DNA-repair mechanisms .. +PF12659 Telomere capping C-terminal wHTH
This domain consists of tandem winged helix-turn-helix motifs. Stn1 and Ten1 are DNA-binding proteins with specificity for telomeric DNA substrates and both protect chromosome termini from unregulated resection and regulate telomere length. Stn1 complexes with Ten1 and Cdc13 to function as a telomere-specific replication protein A (RPA)-like complex . These three interacting proteins associate with the telomeric overhang in budding yeast, whereas a single protein known as Pot1 (protection of telomeres-1) performs this function in fission yeast, and a two-subunit complex consisting of POT1 and TPP1 associates with telomeric ssDNA in humans. S.pombe has Stn1- and Ten1-like proteins that are essential for chromosome end protection. Stn1 orthologues exist in all species that have Pot1, whereas Ten1-like proteins can be found in all fungi. Fission yeast Stn1 and Ten1 localise at telomeres in a manner that correlates with the length of the ssDNA overhang, suggesting that they specifically associate with the telomeric ssDNA. Two separate protein complexes are required for chromosome end protection in fission yeast. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution .. +PF12660 Putative zinc-finger of transcription factor IIIC complex
This zinc-finger domain is at the very C-terminus of a number of different TFIIIC subunit proteins. This domain might be involved in protein-DNA and/or protein-protein interactions .. +PF12661 Human growth factor-like EGF
Wouters M, Coggill P. hEGF, or human growth factor-like EGF, domains have six conserved residues disulfide-bonded into the characteristic 'ababcc' pattern. They are involved in growth and proliferation of cells, in proteins of the Notch/Delta pathway, neurogulin and selectins. hEGFs are also found in mosaic proteins with four-disulfide laminin EGFs such as aggrecan and perlecan. The core fold of the EGF domain consists of two small beta-hairpins packed against each other. Two major structural variants have been identified based on the structural context of the C-terminal Cys residue of disulfide 'c' in the C-terminal hairpin: hEGFs and cEGFs. In hEGFs the C-terminal thiol resides in the beta-turn, resulting in shorter loop-lengths between the Cys residues of disulfide 'c', typically C[8-9]XC. These shorter loop-lengths are also typical of the four-disulfide EGF domains, laminin ad integrin. Tandem hEGF domains have six linking residues between terminal cysteines of adjacent domains. hEGF domains may or may not bind calcium in the linker region. hEGF domains with the consensus motif CXD4X[F,Y]XCXC are hydroxylated exclusively in the Asp residue.. +PF12662 Complement Clr-like EGF-like
Wouters M, Coggill P. cEGF, or complement Clr-like EGF, domains have six conserved cysteine residues disulfide-bonded into the characteristic pattern 'ababcc'. They are found in blood coagulation proteins such as fibrillin, Clr and Cls, thrombomodulin, and the LDL receptor. The core fold of the EGF domain consists of two small beta-hairpins packed against each other. Two major structural variants have been identified based on the structural context of the C-terminal cysteine residue of disulfide 'c' in the C-terminal hairpin: hEGFs and cEGFs. In cEGFs the C-terminal thiol resides on the C-terminal beta-sheet, resulting in long loop-lengths between the cysteine residues of disulfide 'c', typically C[10+]XC. These longer loop-lengths may have arisen by selective cysteine loss from a four-disulfide EGF template such as laminin or integrin. Tandem cEGF domains have five linking residues between terminal cysteines of adjacent domains. cEGF domains may or may not bind calcium in the linker region. cEGF domains with the consensus motif CXN4X[F,Y]XCXC are hydroxylated exclusively on the asparagine residue.. +PF12663 Protein of unknown function (DUF3788)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 137 and 149 amino acids in length. This family may be distantly related to RelE proteins.. +PF12664 Protein of unknown function (DUF3789)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two completely conserved residues (V and C) that may be functionally important.. +PF12666 PrgI family protein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 116 and 146 amino acids in length. This protein is found in an operon that is part of a Type IV secretion system.. +PF12667 NigD-like protein
JCSG target Q5LAY5_BACFN. This family of proteins is functionally uncharacterised. This family of proteins is found in Bacteroides species. Proteins in this family are typically between 234 and 260 amino acids in length. These proteins possess an N-terminal lipoprotein attachment site. The family includes NigD a protein found in the Nig operon that encodes a bacteriocin called nigrescin. It has been suggested that NigD may be the immunity protein for nigrescin (NigC) because it is directly downstream .. +PF12668 Protein of unknown function (DUF3791)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 71 and 125 amino acids in length.. +PF12669 Virus attachment protein p12 family
This family of proteins are related to Virus attachment protein p12 from the African swine fever virus. The family appears to contain an N-terminal signal peptide followed by a short cysteine rich region. The cysteine rich region is extremely variable and it is possible that only the N-terminal region is homologous.. +PF12670 Protein of unknown function (DUF3792)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. These proteins are integral membrane proteins.. +PF12671 Putative amidase domain
+PF12672 Protein of unknown function (DUF3793)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 211 amino acids in length. There are two conserved sequence motifs: PHE and LGYP.. +PF12673 Domain of unknown function (DUF3794)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF01476.. +PF12674 Putative zinc ribbon domain
This domain appears to be a zinc binding DNA-binding domain.. +PF12675 Protein of unknown function (DUF3795)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 99 and 171 amino acids in length. This protein is likely to be zinc binding given the conserved cysteines.. +PF12676 Protein of unknown function (DUF3796)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length.. +PF12677 Domain of unknown function (DUF3797)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 50 amino acids in length. There is a conserved CGN sequence motif.. +PF12678 RING-H2 zinc finger
There are 8 cysteine/ histidine residues which are proposed to be the conserved residues involved in zinc binding. The protein, of which this domain is the conserved region, participates in diverse functions relevant to chromosome metabolism and cell cycle control .. +PF12679 ABC-2 family transporter protein
This family is related to the ABC-2 membrane transporter family .. +PF12680 SnoaL-like domain
This family contains a large number of proteins that share the SnoaL fold.. +PF12681 Glyoxalase-like domain
This domain is related to the Glyoxalase domain Pfam:PF00903.. +PF12682 Flavodoxin
This is a family of flavodoxins. Flavodoxins are electron transfer proteins that carry a molecule of non-covalently bound FMN.. +PF12683 Protein of unknown function (DUF3798)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 247 and 417 amino acids in length. Most of the proteins in this family have an N-terminal lipoprotein attachment site. These proteins have distant similarity to periplasmic ligand binding families such as Pfam:PF02608, which suggests that this family have a similar role.. +PF12684 PDDEXK-like domain of unknown function (DUF3799)
Jackhmmer:JCSG target 392282. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 265 and 420 amino acids in length. It appears that these proteins are distantly related to the PDDEXK superfamily and so these domains are likely to be nucleases. This family has a C-terminal cysteine cluster similar to that found in Pfam:PF01930.. +PF12685 SpoIIIAH-like protein
Stage III sporulation protein AH (SpoIIIAH) is a protein that is involved in forespore engulfment. It forms a channel with SpoIIIAH that is open on the forespore end and closed (or gated) on the mother cell end. This allows sigma-E-directed gene expression in the mother-cell compartment of the sporangium to trigger the activation of sigma-G forespore-specific gene expression by a pathway of intercellular signaling. This family of proteins is found in bacteria, archaea and eukaryotes and so must have a wider function that in sporulation. Proteins in this family are typically between 174 and 223 amino acids in length.. +PF12686 Protein of unknown function (DUF3800)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 215 and 302 amino acids in length. There is a DE motif at the N-terminus and a QXXD motif at the C-terminus that may be functionally important.. +PF12687 Protein of unknown function (DUF3801)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 158 and 187 amino acids in length. This family includes the PcfB protein.. +PF12688 Tetratrico peptide repeat
BH0479 of Bacillus halodurans is a hypothetical protein which contains a tetratrico peptide repeat (TPR) structural motif. The TPR motif is often involved in mediating protein-protein interactions. This protein is likely to function as a dimer. The first 48 amino acids are not present in the clone construct. This Pfam entry includes tetratricopeptide-like repeats not detected by the Pfam:PF00515, Pfam:PF07719, Pfam:PF07720 and Pfam:PF07221 models.. +PF12689 Acid Phosphatase
This family contains phosphatase enzymes and other proteins of the HAD superfamily. It includes MDP-1 which is a eukaryotic magnesium-dependent acid phosphatase [1-2].. +PF12690 Intracellular proteinase inhibitor
This is a bacterial domain which has been named BsuPI in Bacillus subtilis.\. This domain is found in Swiss:P39804, where it has been suggested to regulate the major intracellular proteinase (ISP-1) activity in vivo . The structure of proteins in this family adopt a beta barrel topology.. +PF12691 Minor capsid protein from bacteriophage
This family is from one of three adjacent genes, all of which are involved in formation of the minor phage capsid.. +PF12692 S-adenosyl-L-methionine methyltransferase
This domain is found in bacterial proteins. The structure of the proteins in this family suggest that they function as a methyltransferase.. +PF12693 GspL periplasmic domain
This domain is the periplasmic domain of the GspL/EpsL family proteins. These proteins are involved in type II secretion systems.. +PF12694 Putative molybdenum carrier
The structure of proteins in this family contain central beta strands with flanking alpha helices. The structure is similar to that of a molybdenum cofactor carrier protein.. +PF12695 Alpha/beta hydrolase family
Jackhmmer:Q186B9_CLOD6. This family contains a diverse range of alpha/beta hydrolase enzymes.. +PF12696 TraM recognition site of TraD and TraG
Pfam-B_1146 (release 5.4). This family includes both TraG and TraD as well as VirD4 proteins. TraG is essential for DNA transfer in bacterial conjugation. These proteins are thought to mediate interactions between the DNA-processing (Dtr) and the mating pair formation (Mpf) systems . This domain interacts with the relaxosome component TraM via the latter's tetramerisation domain. TraD is a hexameric ring ATPase that forms the cytoplasmic face of the conjugative pore .. +PF12697 Alpha/beta hydrolase family
Jackhmmer:Q186D8_CLOD6. This family contains alpha/beta hydrolase enzymes of diverse specificity.. +PF12698 ABC-2 family transporter protein
Jackhmmer:Q17ZU3_CLOD6. This family is related to the ABC-2 membrane transporter family Pfam:PF01061 .. +PF12699 phiKZ-like phage internal head proteins
Hardies SC, Coggill P. Phage internal head proteins (IP) are proteins that are encoded by a bacteriophage and assembled into the mature virion inside the capsid head. The most analogous characterised IP proteins are those of bacteriophage T4, which are known to be proteolytically processed during phage maturation, and then subsequently injected into the host cell during infection. The phiKZ_IP family consists of internal head proteins encoded by phiKZ-like phages. Each phage encodes three to six members of this family . Members of the family reside in the head and are cleaved during phage maturation to separate an N-terminal propeptide from a C-terminal domain. The C-terminal domain remains in the mature capsid. The N-terminal propeptide domain is either mostly or completely removed from the mature capsid. In one case, an unrelated polypeptide is embedded in the propeptide and also remains in the mature capsid. The phiKZ-like IP proteins are not discernibly homologous to the T4 IP proteins, and it is not known if the phiKZ-like IP proteins are injected into the host cell, or have some other function within the head. The alignment and HMM model exclude most of the propeptide region, but include the cleavage sites. The first 100 residues, including the cleavage sites, constitute the most conservative part of the seed alignment.. +PF12700 HlyD family secretion protein
Jackhmmer:Q182V7_CLOD6. This family is related to Pfam:PF00529.. +PF12701 Scd6-like Sm domain
The Scd6-like Sm domain is found in Scd6p from S. cerevisiae, Rap55 from the newt Pleurodeles walt, and its orthologs from fungi, animals, plants and apicomplexans . The domain is also found in Dcp3p and the human EDC3/FLJ21128 protein where it is fused to the the Rossmanoid YjeF-N domain [1,2]. In addition both EDC3 and Scd6p are found fused to the FDF domain [1,2].. +PF12702 DUF3803;
JCSG_target_392987_3hty Pfam-B_17140 (release 24.0). This is a family of proteins of 115 residues on average. The family has two highly conserved tryptophan residues. The fold is very similar to the lipocalin-like fold from several comparable structures.. +PF12703 Toxin of toxin-antitoxin type 1 system
Gardner P, Coggill P. This family is the toxin of a type 1 toxin-antitoxin system which is found in a relatively widespread range of bacterial species. The species distribution suggests frequent horizontal gene transfer. In a type 1 system, as characterised for the plasmid-encoded E coli hok/sok system, the toxin-encoding stable mRNA encodes a protein which rapidly leads to cell death unless the translation is suppressed by a short-lived small RNA. The plasmid-encoded module prevents the growth of plasmid-free offspring, thus ensuring the persistence of the plasmid in the population. Plasmid-free cells arising after cell-division will be killed because the stable mRNA toxin is present while the comparably unstable anti-toxin is rapidly degraded. Where the system is transcribed chromosomally, the mechanism is poorly understood .. +PF12704 MacB-like periplasmic core domain
This family represents the periplasmic core domain found in a variety of ABC transporters. The structure of this family has been solved for the MacB protein . Some structural similarity was found to the periplasmic domain of the AcrB multidrug efflux transporter.. +PF12705 PD-(D/E)XK nuclease superfamily
Jackhmmer:Q18AP1_CLOD6. Members of this family belong to the PD-(D/E)XK nuclease superfamily. +PF12706 Beta-lactamase superfamily domain
Jackhmmer:Q189N7_CLOD6. This family is part of the beta-lactamase superfamily and is related to Pfam:PF00753.. +PF12707 Protein of unknown function (DUF3804)
This family is approximately 130 residues. Dali search indicates this protein carries a NTF2-fold with a hydrophobic cavity as a structural homologue to 1JB2, 2R4I, 3FSD and 2UX0. In this hydrophobic cavity, Arg 118 provides the H-bonding force to hold a PEG molecule from crystallisation. The interface interaction suggests that the biomolecule of Swiss:Q46KI2 is a dimer. Two members of the family are annotated as putative EF-Tu domain 2 but there is no match to this family so this is likely to be a false assignment. There are two highly conserved tryptophan residues towards the C-terminal end of the family.. +PF12708 Pectate lyase superfamily protein
Jackhmmer:Q184L0_CLOD6. This family of proteins possesses a beta helical structure like Pectate lyase. This family is most closely related to glycosyl hydrolase family 28.. +PF12709 Central kinetochore-associated
This is a family of proteins integrally involved in the central kinetochore. Slk19 is a yeast member and it may play an important role in the timing of nuclear migration. It may also participate, directly or indirectly, in the maintenance of centromeric tensile strength during mitotic stagnation, for instance during activation of checkpoint controls, when cells need to preserve nuclear integrity until cell cycle progression can be resumed .. +PF12710 haloacid dehalogenase-like hydrolase
Jackhmmer:Q18AI1_CLOD6. +PF12711 Kinesin motor
This family is closely related to Kinesin-related, Pfam:PF06548.. +PF12712 Domain of unknown function (DUF3805)
This family represent the N-terminal domain of the structure. In two related Bacteroides species the gene lies immediately upstream from a putative ATP binding component of an ATP transporter and a putative histidinol phosphatase. The structure of this domain is strikingly similar to the N-terminal structure of 1tui, also of unknown function. The domain carries four conserved tryptophan residues.. +PF12713 Domain of unknown function (DUF3806)
This family represent the C-terminal domain of the structure. In two related Bacteroides species the gene lies immediately upstream from a putative ATP binding component of an ATP transporter and a putative histidinol phosphatase. The structure of this domain is strikingly similar to the N-terminal structure of 1ma7 whose C-terminal domain is a phage integrase, Pfam:PF00589.. +PF12714 TILa domain
Pfam-B_897 (release 5.2). This cysteine rich domain occurs along side the TIL Pfam:PF01826 domain and is likely to be a distantly related relative.. +PF12715 Abhydrolase family
This is a family of probable bacterial abhydrolases.. +PF12716 Nuclear pore assembly and biogenesis
This is a family of conserved fungal proteins involved in nuclear pore assembly . Apq12 is an integral membrane protein of the nuclear envelope (NE) and endoplasmic reticulum. Its absence leads to a partial block in mRNA export and cold-sensitive defects in the growth and localisation of a subset of nucleoporins, particularly those asymmetrically localised to the cytoplasmic fibrils . The defects in nuclear pore assembly appear to be due to defects in regulating membrane fluidity .. +PF12717 non-SMC mitotic condensation complex subunit 1
Pfam-B_410 (release 24.0). The three non-SMC (structural maintenance of chromosomes) subunits of the mitotic condensation complex are Cnd1-3. The whole complex is essential for viability and the condensing of chromosomes in mitosis.. +PF12718 Tropomyosin like
This family is a set of eukaryotic tropomyosins. Within the yeast Tmp1 and Tmp2, biochemical and sequence analyses indicate that Tpm2p spans four actin monomers along a filament, whereas Tpmlp spans five. Despite its shorter length, Tpm2p can compete with Tpm1p for binding to F-actin. Over-expression of Tpm2p in vivo alters the axial budding of haploids to a bipolar pattern, and this can be partially suppressed by co-over-expression of Tpm1p. This suggests distinct functions for the two tropomyosins, and indicates that the ratio between them is important for correct morphogenesis . The family also contains higher eukaryote Tmp3 members.. +PF12719 Nuclear condensing complex subunits, C-term domain
Pfam-B_484 (release 24.0). The Cnd1-3 proteins are the three non-SMC (structural maintenance of chromosomes) proteins that go to make up the mitotic condensation complex along with the two SMC protein families, XCAP-C and XCAP-E, (or in the case of fission yeast, Cut3 and Cut14). The five-member complex seems to be conserved from yeasts to vertebrates. This domain is the C-terminal, cysteine-rich domain of Cnd3. The complex shuttles between the nucleus, during mitosis, and the cytoplasm during the rest of the cycle. Thus this family is made up of the C-termini of XCAP-Gs, Ycg1 and Ycs5 members.. +PF12720 Protein of unknown function (DUF3807)
This is a family of conserved fungal proteins of unknown function.. +PF12721 RIP homotypic interaction motif
RIP proteins are receptor-interacting serine/threonine-protein kinases or cell death proteins . This interacting domain is involved in virus recognition. The RHIM domain is necessary for the recruitment of RIP and RIP3 by the IFN-inducible protein DNA-dependent activator of IRFs (DAI), also known as DLM-1 or Z-DNA binding protein (ZBP1). Both the RIP kinases contribute to DAI-induced NF-kappaB activation. RIP3 undergoes auto phosphorylation on binding to DAI .. +PF12722 High-temperature-induced dauer-formation protein
Hid1 (high-temperature-induced dauer-formation protein 1) represents proteins of approximately 800 residues long and is conserved from fungi to humans. It contains up to seven potential transmembrane domains separated by regions of low complexity. Functionally it might be involved in vesicle secretion or be an inter-cellular signalling protein or be a novel insulin receptor .. +PF12723 Protein of unknown function (DUF3809)
Jackhmmer:NP_295729.1. This family of proteins is functionally uncharacterised. This family of proteins is found in Deinococci bacteria. Proteins in this family are typically between 117 and 157 amino acids in length.. +PF12724 Flavodoxin domain
Jackhmmer:Q186N5_CLOD6. This is a family of flavodoxins. Flavodoxins are electron transfer proteins that carry a molecule of non-covalently bound FMN.. +PF12725 Protein of unknown function (DUF3810)
Jackhmmer:Q185R6_CLOD6. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 333 and 377 amino acids in length. There is a conserved HEXXH sequence motif that is characteristic of metallopeptidases. This family may therefore belong to an as yet uncharacterised family of peptidase enzymes.. +PF12726 SEN1 N terminal
Pfam-B_2547 (release 24.0). This domain is found at the N terminal of the helicase SEN1. SEN1 is a Pol II termination factor for noncoding RNA genes . The N terminal of SEN1, unlike the C terminal, is not required for growth .. +PF12727 PBP superfamily domain
Jackhmmer:Q18A58_CLOD6. This family belongs to the periplasmic binding domain superfamily. It is often associated with a helix-turn-helix domain.. +PF12728 Helix-turn-helix domain
Jackhmmer:Q18A58_CLOD6. This domain is a DNA-binding helix-turn-helix domain.. +PF12729 4HB_MCP_2;
Four helix bundle sensory module for signal transduction. This family is a four helix bundle that operates as a ubiquitous sensory module in prokaryotic signal-transduction. The 4HB_MCP is always found between two predicted transmembrane helices indicating that it detects only extracellular signals. In many cases the domain is associated with a cytoplasmic HAMP domain suggesting that most proteins carrying the bundle might share the mechanism of transmembrane signalling which is well-characterised in E coli chemoreceptors.. +PF12730 ABC-2 family transporter protein
Jackhmmer:Q18D57_CLOD6. This family is related to the ABC-2 membrane transporter family Pfam:PF01061 .. +PF12731 Mating-type protein beta 1
Pfam-B_4610 (release 8.0). This domain is found in some fungi and is the C-terminus of a homeodomain-containing transcription factor protein involved in mating.. +PF12732 YtxH-like protein
Jackhmmer:Q18C91_CLOD6. This family of proteins is found in bacteria. Proteins in this family are typically between 100 and 143 amino acids in length. The N-terminal region is the most conserved. Proteins is this family are functionally uncharacterised.. +PF12733 Cadherin-like beta sandwich domain
Aravind L, Coggill P. This domain is found in several bacterial, metazoan and chlorophyte algal proteins. A profile-profile comparison recovered the cadherin domain and a comparison of the predicted structure of this domain with the crystal structure of the cadherin showed a congruent seven stranded secondary structure. The domain is widespread in bacteria and seen in the firmicutes, actinobacteria, certain proteobacteria, bacteroides and chlamydiae with an expansion in Clostridium. In contrast, it is limited in its distribution in eukaryotes suggesting that it was derived through lateral transfer from bacteria. In prokaryotes, this domain is widely fused to other domains such as FNIII (Fibronectin Type III), TIG, SLH (S-layer homology), discoidin, cell-wall-binding repeat domain and alpha-amylase-like glycohydrolases. These associations are suggestive of a carbohydrate-binding function for this cadherin-like domain. In animal proteins it is associated with an ATP-grasp domain.. +PF12734 Cysteine-rich TM module stress tolerance
Aravind L, Coggill P. The members of this family are short cysteine-rich membrane proteins that most probably dimerise together to form a transmembrane sulfhydryl-lined pore. The CYSTM module is always present at the extreme C-terminus of the protein in which it is present. Furthermore, like the yeast prototypes, the majority of the proteins also possess a proline/glutamine-rich segment upstream of the CYSTM module that is likely to form a polar, disordered head in the cytoplasm. The presence of an atypical well-conserved acidic residue at the C-terminal end of the TM helix suggests that this might interact with a positively charged moiety in the lipid head group. Consistently across the eukaryotes, the different versions of the CYSTM module appear to have roles in stress-response or stress-tolerance, and, more specifically, in resistance to deleterious substances, implying that thes might be general functions of the whole family.. +PF12735 TRAPP trafficking subunit Trs65
This family is one of the subunits of the TRAPP Golgi trafficking complex . TRAPP subunits are found in two different sized complexes, TRAPP I and TRAPP II. While both complexes contain the same seven subunits, Bet3p, Bet5p, Trs20p, Trs23p, Trs31p, Trs33p and Trs85p, with TRAPPC human equivalents, TRAPP II has the additional three subunits ,Trs65p, Trs120p and Trs130p . While it has been implicated in cell wall biogenesis and stress response, the role of Trs65 in TRAPP II is supported by the findings that the protein co-localises with Trs130p, and deletion of TRS65 in yeast leads to a conditional lethal phenotype if either one of the other TRAPP II-specific subunits is modified . Furthermore, the trs65 mutant has reduced Ypt31/32p guanine nucleotide exchange, GEF, activity .. +PF12736 Cell-cycle sustaining, positive selection,
Aravind L, Coggill P. The 'CABIT' domain (for 'cysteine-containing, all- in Themis') is found in a newly identified gene family that has three mammalian homologues (Themis, Icb1 and 9130404H23Rik) that encode proteins with two CABIT domains and a highly conserved proline-rich region. In contrast, Fam59A, Fam59B and related proteins from mammals to cnidarians, including the insect Serrano proteins, have a single copy of the CABIT domain, a proline-rich region and often a C-terminal SAM (sterile-motif) domain. Multiple-sequence alignment has predicted that the CABIT domain adopts an all-strand structure with at least 12 strands, ie a dyad of six-stranded beta-barrel units. The CABIT domain contains a nearly absolutely conserved cysteine residue which is likely to be central to its function. CABIT domain proteins function downstream of tyrosine kinase signalling and interact with GRB2.. +PF12737 C-terminal domain of homeodomain 1
Pfam-B_4610 (release 8.0). Mating in fungi is controlled by the loci that determine the mating type of an individual, and only individuals with differing mating types can mate. Basidiomycete fungi have evolved a unique mating system, termed tetrapolar or bifactorial incompatibility, in which mating type is determined by two unlinked loci; compatibility at both loci is required for mating to occur. The multi-allelic tetrapolar mating system is considered to be a novel innovation that could have only evolved once, and is thus unique to the mushroom fungi. This domain is C-terminal to the homeodomain transcription factor region.. +PF12738 twin BRCT domain
Pfam-B_181 (release 24.0). This is a BRCT domain that appears in duplicate in most member sequences. BRCT domains are peptide- and phosphopeptide-binding modules. BRCT domains are present in a number of proteins involved in DNA checkpoint controls and DNA repair [1,2].. +PF12739 ER-Golgi trafficking TRAPP I complex 85 kDa subunit
This family is one of the subunits of the TRAPP Golgi trafficking complex. TRAPP subunits are found in two different sized complexes, TRAPP I and TRAPP II, and this Trs85 is in the smaller complex. TRAPP I, but Not TRAPP II, functions in ER-Golgi transport . Trs85p was reported to function in the cytosol-to-vacuole targeting pathway, suggesting a role for this subunit in autophagy as well as in secretion . The overall architecture of TRAPP I shows the other components to be Bet3p (TRAPPC3), Bet5p (TRAPPC1), Trs20p (TRAPPC2) , Trs23p (TRAPPC4), Trs31p (TRAPPC5), Trs33p (TRAPPC6a and b) and Trs85p.. +PF12740 Chlorophyllase enzyme
This family consists of several chlorophyllase and chlorophyllase-2 (EC:3.1.1.14) enzymes. Chlorophyllase (Chlase) is the first enzyme involved in chlorophyll (Chl) degradation and catalyses the hydrolysis of an ester bond to yield chlorophyllide and phytol . The family includes both plant and Amphioxus members.. +PF12741 Susd and RagB outer membrane lipoprotein
This is a family of SusD-like proteins, one member of which, BT1043 (Swiss:Q8A8X4), is an outer membrane lipoprotein involved in host glycan metabolism. The structures of this and SusD-homologues in the family are dominated by tetratrico peptide repeats that may facilitate association with outer membrane beta-barrel transporters required for glycan uptake. The structure of BT1043 complexed with N-acetyllactosamine reveals that recognition is mediated via hydrogen bonding interactions with the reducing end of beta-N-acetylglucosamine, suggesting a role in binding glycans liberated from the mucin polypeptide. Mammalian distal gut bacteria have an expanded capacity to utilize glycans. In the absence of dietary sources, some species rely on host-derived mucosal glycans. The ability of Bacteroides thetaiotaomicron, a prominent human gut symbiont, to forage host glycans contributes to both its ability to persist within an individual host and its ability to be transmitted naturally to new hosts at birth.. +PF12742 Gryzun, putative Golgi trafficking
Members of this family are involved in Golgi trafficking.. +PF12743 Oestrogen-type nuclear receptor final C-terminal
This is the very C-terminal region of a subfamily of nuclear receptors that includes oestrogen receptors and other subfamily 3 group A members. The actual function of this region is not known, but the domain is absent from all the other types of nuclear receptors. Oestrogen receptors modulate AP-1-dependent transcription through two distinct mechanisms: via protein-protein interactions on DNA; and via non-genomic actions. The mechanism used depends on the cellular localisation of the receptor. In addition to the more extensively studied cross-talk on DNA, additional non-genomic actions might be very important in target tissues in which membrane-associated ERs are found. These non-genomic actions probably contribute to the overall physiological responses mediated by ligand-bound ERs and might possibly be mediated via this C-terminal domain.. +PF12744 Autophagy protein Atg19, Atg8-binding
Autophagy is generally known as a process involved in the degradation of bulk cytoplasmic components that are non-specifically sequestered into an autophagosome, where they are sequestered into double-membrane vesicles and delivered to the degradative organelle, the lysosome/vacuole, for breakdown and eventual recycling of the resulting macromolecules. In contrast to autophagy, however, the Cvt pathway is a highly selective process that involves the sequestration of at least two specific cargos that are resident vacuolar hydrolases, aminopeptidase I (Ape1) and alpha-mannosidase (Ams1). These proteins are sequestered within a double-membrane vesicle, termed a Cvt vesicle. The Cvt vesicle is fairly consistent in size, and is much smaller than the autophagosome, being 140-160 nm in diameter. The prApe1 is sequestered within either Cvt vesicles or autophagosomes, depending on the nutrient conditions, and delivered to the vacuole. Autophagy and the Cvt pathway are topologically and mechanistically similar and share most of the same machinery. The Ape1 complex is ultimately enwrapped within either Cvt vesicles or autophagosomes at the perivacuolar PAS. The receptor protein Atg19 binds to the Ape1 complex through the prApe1 propeptide to form the Cvt complex in the cytosol. In the absence of Atg19, prApe1 can form an Ape1 complex, but does not localise at the PAS. Atg19 is a peripheral membrane protein with differing binding sites for both Ape1 and Ams1. The Atg8-binding region in the yeast proteins is this very C-terminal residues .. +PF12745 Anticodon binding domain of tRNAs
Pfam-B_20896 (release 24.0). This is an HGTP_anticodon binding domain, found largely on Gcn2 proteins which bind tRNA to down regulate translation in certain stress situations.. +PF12746 GNAT acetyltransferase
Many of the members are annotated s being Zwittermicin A resistance proteins, whereas others are listed as being GNAT acetyltransferases. The family has similarities to the GNAT acetyltransferase family.. +PF12747 DdrB-like protein
This family includes the Deinococcus DdrB protein which is a ssDNA binding protein. This family also includes some possibly distantly related cyanobacterial proteins. However, these are not strongly supported. The structure of DdrB is known.. +PF12749 Eukaryotic metallothionein
This is a family of eukaryotic metallothioneins.. +PF12750 Maff2 family
Jackhmmer:Q187G1_CLOD6. This family of short membrane proteins are related to the protein Maff2. Maff2 lies just outside the direct repeats of a tetracycline resistance transposable element. This protein may contain transmembrane helices.. +PF12751 Vacuolar segregation subunit 7
Pfam-B_10847 (release 24.0). Vac7 is localised at the vacuole membrane, a location which is consistent with its involvement in vacuole morphology and inheritance . Vac7 has been shown to function as an upstream regulator of the Fab1 lipid kinase pathway . The Fab1 lipid p[pathway is important for correct regulation of membrane trafficking events.. +PF12752 SUZ domain
The SUZ domain is a conserved RNA-binding domain found in eukaryotes and enriched in positively charged amino acids. It was first characterized in the C.elegans protein Szy-20 where it has been shown to bind RNA and allow their localization to the centrosome. Warning- the domain has a compositionally biased character.. +PF12753 Nuclear pore complex subunit Nro1
Pfam-B_4826 (release 24.0). +PF12754 Cell-cycle control medial ring component
During size-dependent cell cycle transitions controlled by the ubiquitous cyclin-dependent kinase Cdk1, Blt1 has been shown to co-localise with Cdr2 in the medial interphase nodes, as well as with Mid1 which was previously shown to localise to similar interphase structures. Physical interactions between Blt1-Mid1, Blt1-Cdr2 and Cdr2-Mid1 were detected, indicating that medial cortical nodes are formed by the ordered, Cdr2-dependent assembly of multiple interacting proteins during interphase. Q5KIH8.1/10-346; Q5KIH8.1/10-90;. +PF12755 Vacuolar 14 Fab1-binding region
Vac14 is a scaffold for the Fab1 kinase complex, a complex that allows for the dynamic interconversion of PI3P and PI(3,5)P2p (phosphoinositide phosphate (PIP) lipids, that are generated transiently on the cytoplasmic face of selected intracellular membranes). This interconversion is regulated by at least five proteins in yeast: the lipid kinase Fab1p, lipid phosphatase Fig4p, the Fab1p activator Vac7p, the Fab1p inhibitor Atg18p, and Vac14p, a protein required for the activity of both Fab1p and Fig4p. This domain appears to be the one responsible for binding to Fab1. The full length Vac14 in yeasts is likely to be a protein carrying a succession of HEAT repeats, most of which have now degenerated. This regulatory system is crucial for the proper functioning of the mammalian nervous system.. +PF12756 C2H2 type zinc-finger (2 copies)
Pfam-B_88 (release 24.0). This family contains two copies of a C2H2-like zinc finger domain.. +PF12757 Protein of unknown function (DUF3812)
Pfam-B_8029 (release 24.0). This is a family of fungal proteins whose function is not known.. +PF12758 Protein of unknown function (DUF3813)
PfamB_1273 (release 24.0). This is an uncharacterised family of Bacillus proteins.. +PF12759 InsA_C;
InsA C-terminal domain. This short domain is found at the C-terminus of the InsA protein. This domain contains a helix-turn-helix domain.. +PF12760 Zn_ribbon_3;
Transposase zinc-ribbon domain. Pfam-B_3 (Release 24.0). This zinc binding domain is found in a range of transposase proteins such as ISSPO8, ISSOD11, ISRSSP2 etc. It is likely a zinc-binding beta ribbon domain that could bind the DNA.. +PF12761 E3;
Actin cytoskeleton-regulatory complex protein END3. Pfam-B_51079 (release 24.0). Endocytosis is accomplished through the sequential recruitment at endocytic sites of proteins that drive cargo sorting, membrane invagination and vesicle release . End3p is part of the coat module protein complex Pan1, along with Pan1p, Sla1p, and Sla2p . The proteins in this complex are regulated by phosphorylation events. End3p also regulates the cortical actin cytoskeleton [3,4]. The subunits of the Pan1 complex are homologous to mammalian intersectin.. +PF12762 Transposase_38;
ISXO2-like transposase domain. Pfam-B_3 (Release 24.0). This domain probably functions as an integrase that is found in a wide variety of transposases, including ISXO2.. +PF12763 efhand_3;
Cytoskeletal-regulatory complex EF hand. Pfam-B_51079 (release 24.0). This is an efhand family from the N-terminal of actin cytoskeleton-regulatory complex END3 and similar proteins from fungi and closely related species.. +PF12764 Glycine-rich region of argonaut
Pfam-B_7248 (release 24.0). This domain is often found at the very N-terminal of argonaut-like proteins.. +PF12765 HEAT repeat associated with sister chromatid cohesion
Pfam-B_443 (release 24.0). This HEAT repeat is found most frequently in sister chromatid cohesion proteins such as Nipped-B. HEAT repeats are found tandemly repeated in many proteins, and they appear to serve as flexible scaffolding on which other components can assemble.. +PF12766 Pyridoxamine 5'-phosphate oxidase
Pfam-B_2486 (release 24.0). Pyridoxamine 5'-phosphate oxidase catalyses the oxidation of pyridoxamine-5-P (PMP) and pyridoxine-5-P (PNP) to pyridoxal-5-P (PLP), the terminal step in the de novo biosynthesis of PLP in Escherichia coli and part of the salvage pathway of this coenzyme in both E. coli and mammalian cells. This region is the flavoprotein FMN-binding domain.. +PF12767 Transcriptional regulator of RNA polII, SAGA, subunit
Pfam-B_319 (release 24.0). The yeast SAGA complex is a multifunctional coactivator that regulates transcription by RNA polymerase II [1,2]. It is formed of five major modular subunits and shows a high degree of structural conservation to human TFTC and STAGA . The complex can also be conceived of as consisting of two histone-fold-containing core subunits, and this family is one of these. As a family it is likely to carry binding regions for interactions with a number of the other components of the complex.. +PF12768 Cortical protein marker for cell polarity
Pfam-B_2071 (release 24.0). Diploid yeast cells repeatedly polarize and bud from their poles, due probably to the presence of highly stable membrane markers, and Rax2 is one such marker. It is inherited immutably at the cell cortex for multiple generations, and has a half-life exceeding several generations. The persistent inheritance of cortical protein markers would provide a means of coupling a cell's history with the future development of a precise morphogenetic form . Both Rax1 and Rax2 localise to the distal pole as well as to the division site and they interact both with each other and with Bud8p and Bud9p in the establishment and/or maintenance of the cortical markers for bipolar budding . thus Rax2 is likely to control cell polarity during vegetative growth, and in fission yeast this is done by regulating the localisation of for3p .. +PF12769 Domain of unknown function (DUF3814)
Pfam-B_10 (release 24.0). This is a domain of unknown function. It is often found in combination with Pfam:PF05222, Pfam:PF01262 and Pfam:PF02233 on alanine dehydrogenase and pyridine nucleotide transhydrogenase enzymes.. +PF12770 CHAT domain
Pfam-B_4 (Release 24.0). These proteins appear to be related to peptidases in peptidase clan CD that includes the caspases. This domain has been termed the CHAT domain for Caspase HetF Associated with Tprs. This family has been identified as a sister group to the separins .. +PF12771 Starch-binding associating with outer membrane
JCSG structure (Target 390167). SusD is a secreted starch-binding protein with an N-terminal lipid tail that allows it to associate with the outer membrane.. +PF12772 Growth hormone receptor binding
Pfam-B_7 (release 24.0). Growth hormone receptor binding protein is produced either by proteolysis of the GHR (growth hormone receptor) at the cell surface thereby releasing its extracellular domain, the GHBP (growth hormone-binding protein), or, in rodents, by alternative processing of the GHR transcript. The sheddase proteolytic enzyme responsible for the cleavage is TACE (tumour necrosis factor-alpha-converting enzyme) [1,2]. Growth hormone (GH) binding to GH receptor (GHR) is the initial step that leads to the physiological functions of the hormone . The biological effects of GHBP are determined by the serum levels of growth hormone (GH), which can vary. Low levels of GH can result in a dwarf phenotype and have been positively correlated with an increased life expectancy. High levels of GH can lead to gigantism or a clinical syndrome termed acromegaly and have been implicated in diabetic eye and kidney damage .. +PF12773 Double zinc ribbon
Pfam-B_12 (Release 24.0). This family consists of a pair of zinc ribbon domains.. +PF12774 Hydrolytic ATP binding site of dynein motor region D1
Pfam-B_14 (release 24.0). the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D1 unit of the motor and contains the hydrolytic ATP binding site . . +PF12775 P-loop containing dynein motor region D3
Pfam-B_14 (release 24.0). the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D3 and is an ATP binding site .. +PF12776 Myb/SANT-like DNA-binding domain
Pfam-B_16 (Release 24.0). This presumed domain appears to be related to other Myb/SANT like DNA binding domains. In particular Pfam:PF10545 seems most related. This family is greatly expanded in plants and appears in several proteins annotated as transposon proteins.. +PF12777 Microtubule-binding stalk of dynein motor
Pfam-B_14 (release 24.0). the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This family is the region between D4 and D5 and is the two predicted alpha-helical coiled coil segments that form the stalk supporting the ATP-sensitive microtubule binding component .. +PF12778 PXPV repeat (3 copies)
Pfam-B_15 (Release 24.0). This short repeat is found in multiple copies in a variety of Burkholderia proteins. The function of this region is unknown.. +PF12779 YXWGXW repeat (2 copies)
Pfam-B_15 (Release 24.0). This short repeat contains the motif YXWXXGXW where X can be any amino acid. It is generally found in 2-5 copies in short secreted bacterial proteins. Its function is as yet unknown.. +PF12780 P-loop containing dynein motor region D4
Pfam-B_14 (release 24.0). The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D4 ATP-binding region of the motor .. +PF12781 ATP-binding dynein motor region D5
Pfam-B_14 (release 24.0). The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D5 ATP-binding region of the motor, but has lost its P-loop .. +PF12782 Invertebrate innate immunity transcript family
Pfam-B_9 (release 24.0). The immune response of the purple sea urchin appears to be more complex than previously believed in that it uses immune-related gene families homologous to vertebrate Toll-like and NOD/NALP-like receptor families as well as C-type lectins and a rudimentary complement system. In addition, the species also produces this unusual family of mRNAs, also known as 185/333, which is strongly upregulated in response to pathogen challenge .. +PF12783 Guanine nucleotide exchange factor in Golgi transport N-terminal
Pfam-B_13 (release 24.0). The full-length Sec7 functions proximally in the secretory pathway as a protein binding scaffold for the coat protein complexes COPII-COPI. The COPII-COPI-protein switch is necessary for maturation of the vesicular-tubular cluster, VTC, intermediate compartments for Golgi compartment biogenesis. This N-terminal domain however does not appear to be binding either of the COP or the ARF . . +PF12784 PD-(D/E)XK nuclease family transposase
Pfam-B_5 (Release 24.0). Members of this family belong to the PD-(D/E)XK nuclease superfamily . These proteins are transposase proteins.. +PF12785 Variant erythrocyte surface antigen-1
Pfam-B_22 (release 24.0). This family represents the N-terminal of the variant erythrocyte surface antigen 1, versions a and b, of Babesia. Babesia bovis is a tick-borne, intra-erythrocytic, protozoal parasite of cattle that shares many lifestyle parallels with the most virulent of the human malarial parasites, Plasmodium falciparum. Babesia uses antigenic variation to establish consistent infections of long duration. The two variants of VESA1, a and b, are expressed from different but closely related genes, and variation is achieved through the involvement of a segmental gene conversion mechanism and low-frequency epigenetic in situ switching of transcriptional activity from the VESA1 gene-pair to a possible other gene pair.. +PF12786 GB virus C genotype envelope
Pfam-B_19 (release 24.0). This the envelope protein from the ssRNA GB virus genotype C.. +PF12787 EcsC protein family
Jackhmmer:Q186V8_CLOD6. Proteins in this family are related to EcsC from B. subtilis. This protein is found in an operon with EcsA and EcsB which are components of an ABC transport system . The function of this protein is unknown.. +PF12788 YmaF family
This family of proteins contain 6 HXH motifs and is named after the B. subtilis YmaF protein.\. It seems likely that these are involved in metal binding. The function of this protein is unknown.. +PF12789 Phage tail repeat like
This family largely contains proteins from the eukaryote Trichomonas vaginalis. These proteins contain multiple HXH repeats. Some proteins in this family are annotated as having phage tail repeats. The function of this family is unknown.. +PF12790 Type VI secretion lipoprotein
Pfam-B_27 (release 24.0). One of the virulence mechanisms of E coli is the production of toxins which it produces from dedicated machineries called secretion systems. Seven secretion systems have been described, which assemble from 3 to upto more than 20 subunits. These secretion systems derive from or have co-evolved with bacterial organelles such as ABC transporters (type I), type IV pili (type 2), flagella (type 3), or conjugative machines (type IV). The type VI secretion system (T6SS) is present in most pathogens that have contact with animals, plants, or humans. SciN is a lipoprotein tethered to the outer membrane and expressed in the periplasm of E coli and is essential for T6S-dependent secretion of the Hcp-like SciD protein and for biofilm formation.. +PF12791 Anti-sigma factor N-terminus
Borovok I, Coggill P. The heat shock genes in B. subtilis can be classified into several groups according to their regulation , and the sigma gene, sigI, of Bacillus subtilis belongs to the group IV heat-shock response genes and has many orthologues in the bacterial phylum Firmicutes . Regulation of sigma factor I is carried out by RsgI from the same operon, and this N-terminal cytoplasmic portion of RsgI ('upstream' of the single transmembrane helix) has been shown to interact directly with Sigma-I .. +PF12792 CSS motif domain associated with EAL
This family with its characteristic highly conserved CSS sequence motif is found N-terminal to the EAL, Pfam:PF00563, domain in many cyclic diguanylate phosphodiesterases.. +PF12793 Sugar transport-related sRNA regulator N-term
Pfam-B_33 (release 24.0). Small, non-coding RNA molecules play important regulatory roles in a variety of physiological processes in bacteria. SgrR_N is the N-terminus of a family of proteins which regulate the transcription of these sRNAs, in particular SgrS. SgrR_N contains a helix-turn-helix motif characteristic of winged-helix DNA-binding transcriptional regulators. SgrS is a small RNA required for recovery from glucose-phosphate stress in bacteria . In examining the regulation of sgrR expression it was found that SgrR negatively auto-regulates its own transcription in the presence and absence of stress, and thus SgrR coordinates the response to glucose-phosphate stress by binding specifically to sgrS promoter DNA .. +PF12794 Mechanosensitive ion channel inner membrane domain 1
Pfam-B_24 (release 24.0). The small mechanosensitive channel, MscS, is a part of the turgor-driven solute efflux system that protects bacteria from lysis in the event of osmotic shock. The MscS protein alone is sufficient to form a functional mechanosensitive channel gated directly by tension in the lipid bilayer. The MscS proteins are heptamers of three transmembrane subunits with seven converging M3 domains, and this domain is one of the inner membrane domains.. +PF12795 Mechanosensitive ion channel porin domain
Pfam-B_24 (release 24.0). The small mechanosensitive channel, MscS, is a part of the turgor-driven solute efflux system that protects bacteria from lysis in the event of osmotic shock. The MscS protein alone is sufficient to form a functional mechanosensitive channel gated directly by tension in the lipid bilayer. The MscS proteins are heptamers of three transmembrane subunits with seven converging M3 domains, and this MscS_porin is towards the N-terminal of the molecules. The high concentration of negative charges at the extracellular entrance of the pore helps select the cations for efflux.. +PF12796 Ankyrin repeats (3 copies)
Jackhmmer:Q183I8_CLOD6. +PF12797 4Fe-4S binding domain
This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich.. +PF12798 4Fe-4S binding domain
This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich.. +PF12799 Leucine Rich repeats (2 copies)
Leucine rich repeats are short sequence motifs present in a number of proteins with diverse functions and cellular locations. These repeats are usually involved in protein-protein interactions. Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains.. +PF12800 4Fe-4S binding domain
This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich.. +PF12801 4Fe-4S binding domain
Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich.. +PF12802 MarR family
The Mar proteins are involved in the multiple antibiotic resistance, a non-specific resistance system. The expression of the mar operon is controlled by a repressor, MarR. A large number of compounds induce transcription of the mar operon. This is thought to be due to the compound binding to MarR, and the resulting complex stops MarR binding to the DNA. With the MarR repression lost, transcription of the operon proceeds . The structure of MarR is known and shows MarR as a dimer with each subunit containing a winged-helix DNA binding motif.. +PF12803 mRNA (guanine-7-)methyltransferase (G-7-MTase)
The Sendai virus RNA-dependent RNA polymerase complex, which consists of L and P proteins, participates in the synthesis of viral mRNAs that possess a methylated cap structure. The N-terminal of the L protein acts as the RNA-dependent RNA polymerase part of the molecule, family Paramyx_RNA_pol, Pfam:PF00946. This domain is the C-terminal part of the L protein and it catalyses cap methylation through its mRNA (guanine-7-)methyltransferase (G-7-MTase) activity .. +PF12804 MobA-like NTP transferase domain
This family includes the MobA protein (Molybdopterin-guanine dinucleotide biosynthesis protein A). The family also includes a wide range of other NTP transferase domain.. +PF12805 FUSC-like inner membrane protein yccS
Pfam-B_45 (release 24.0). This family has similarities to the fusaric acid resistance protein family. The proteins are lodged in the inner membrane.. +PF12806 Acetyl-CoA dehydrogenase C-terminal like
Pfma-B_46 (release 24.0). this domain would appear to be the very C-terminal region of many bacterial acetyl-CoA dehydrogenases.. +PF12807 Translation initiation factor eIF3 subunit 135
Translation initiation factor eIF3 is a multi-subunit protein complex required for initiation of protein biosynthesis in eukaryotic cells. The complex promotes ribosome dissociation, the binding of the initiator methionyl-tRNA to the 40 S ribosomal subunit, and mRNA recruitment to the ribosome. The protein product from TIF31 genes in yeast is p135 which associates with the eIF3 but does not seem to be necessary for protein translation initiation .. +PF12808 Mto1_bdg;
Micro-tubular organiser Mto1 C-term Mto2-binding region. Wood V, Coggill P, Eberhardt R. Pfam-B_28820 (release 24.0). The C-terminal region of the micro-tubular organiser protein 1 (mto1) is the binding domain for attachment to Mto2p.The full-length Mto1 protein is required for microtubule nucleation from non-spindle pole body MTOCs in fission yeast . The interaction of Mto2p with this region of Mto1 is critical for anchoring the cytokinetic actin ring to the medial region of the cell and for proper coordination of mitosis with cytokinesis .. +PF12809 Eukaryotic metallothionein
This is a family of eukaryotic metallothioneins.. +PF12810 Glycine rich protein
This family of proteins is greatly expanded in Trichomonas vaginalis. The proteins are composed of several glycine rich motifs interspersed through the sequence. Although many proteins have been annotated by similarity in the family these annotations given the biased composition of the sequences these are unlikely to be functionally relevant.. +PF12811 Bax inhibitor 1 like
The Bax-inhibitor-1 region of the receptor molecules is conserved from bacteria to humans.. +PF12812 PDZ-like domain
Pfam-B_17100 (release 24.0). PDZ domains are found in diverse signalling proteins in bacteria, yeasts, plants, insects and vertebrates. this is a family of PDZ-like domains from bacteria, plants and fungi.. +PF12813 XPG domain containing
Pfam-B_10579 (release 24.0). This family is largely of fungal proteins and is related to the XP-G protein family.. +PF12814 Meiotic cell cortex C-terminal pleckstrin homology
Pfam-B_1220 (release 24.0). The PH domain of these largely fungal proteins is necessary for the cortical localisation of the protein during meiosis, since the overall function of the protein is to anchor dynein at the cell cortex during the horsetail phase. During prophase I of fission yeast, horsetail nuclear movement occurs, and this starts when all the telomeres become bundled at the spindle pole body - SPB. Subsequent to this, the nucleus undergoes a dynamic oscillation, resulting in elongated nuclear morphology. Horsetail nuclear movement is thought to be predominantly due to the pulling of astral microtubules that link the SPB to cortical microtubule-attachment sites at the opposite end of the cell; the pulling force is believed to be provided by cytoplasmic dynein and dynactin.. +PF12815 Spt5 C-terminal nonapeptide repeat binding Spt4
Pfam-B_197031 (release 23.0). The C-terminal domain of the transcription elongation factor protein Spt5 is necessary for binding to Spt4 to form the functional complex that regulates early transcription elongation by RNA polymerase II. The complex may be involved in pre-mRNA processing through its association with mRNA capping enzymes. This CTD domain carries a regular nonapeptide repeat that can be present in up to 18 copies, as in S. pombe . The repeat has a characteristic TPA motif.. +PF12816 Golgi CORVET complex core vacuolar protein 8
Pfam-B_90 (release 24.0). Vps8 is one of the Golgi complex components necessary for vacuolar sorting . Eukaryotic cells contain a highly dynamic endo-membrane system, in which individual organelles keep their identity despite continuous vesicle generation and fusion. Vesicles that bud from a donor membrane are targeted and delivered to each individual organelle, where they release their cargo after fusion with the acceptor membrane. Vps8 is the core component of the endosomal tethering complex CORVET (class C core vacuole/endosome tethering). Vps8 co-operates with Vps21-GTP to mediate endosomal clustering in a reaction that is dependent on Vps3. Vps8 is the only CORVET subunit that is enriched on late endosomes, suggesting that it is a marker for the maturation of late endosomes. Late endosomes form intralumenal vesicles, and the resulting multivesicular bodies fuse with the vacuole to release their cargoes .. +PF12818 dsDNA viral tegument protein
Pfam-B_48 (release 24.0). This is a family of tegument proteins from double-stranded DNA herpesvirus and related viral species.. +PF12819 Carbohydrate-binding protein of the ER
Pfam-B_41 (release 24.0). Malectin is a membrane-anchored protein of the endoplasmic reticulum that recognises and binds Glc2-N-glycan. The domain is found on a number of plant receptor kinases.. +PF12820 Serine-rich domain associated with BRCT
Pfam-B_51 (release 24.0). This domain is found on BRCA1 proteins.. +PF12821 Protein of unknown function (DUF3815)
This family of membrane proteins is functionally uncharacterised.. +PF12822 Protein of unknown function (DUF3816)
This family of proteins is functionally uncharacterised but are likely to be membrane transporters. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 177 and 208 amino acids in length. A subset of this family is associated with the TM1506 proteins. In this context, transport through the channel is predicted to be regulated by the TM1506 protein by either regulating redox potential or modification of substrates . +PF12823 Domain of unknown function (DUF3817)
Pfam-B_123 (release 24.0). This domain is of unknown function. It is sometimes found adjacent to Pfam:PF07690 and Pfam:PF03176 which are both transporter domains.. +PF12824 Mitochondrial ribosomal protein subunit L20
Pfam-B_1364 (release 24.0). This family is the essential mitochondrial ribosomal protein subunit L20 of fungi.. +PF12825 Domain of unknown function in PX-proteins (DUF3818)
Pfam-B_972 (release 24.0). This domain is found on proteins carrying a PX domain. Its function is unknown.. +PF12826 Helix-hairpin-helix motif
Jackhmmer:DISA_CLOD6. The HhH domain of DisA, a bacterial checkpoint control protein, is a DNA-binding domain .. +PF12827 Peroxisomal biogenesis protein family
Pfam-B_15020 (release 24.0). Peroxin-22 is a integral peroxisomal membrane protein family. The N-terminus is in the matrix and the C-terminus is in the cytosol. The N-terminus carries a 25-amino acid peroxisome membrane-targeting signal. It interacts with the ubiquitin-conjugating peripheral peroxisomal membrane enzyme Pex4p anchoring it at the peroxisomal membrane. Both Pex proteins are involved at the same stage of peroxisome biogenesis.. +PF12828 PX-associated
Pfam-B_972 (release24,0). This domain is associated with the PX domain.. +PF12829 Transcriptional regulation of mitochondrial recombination
Pfam-B_7788 (release 24.0). This family is involved in the transcriptional regulation of recombination in the mitochondria,. +PF12830 Sister chromatid cohesion C-terminus
Pfam-B_443 (release 24.0). This domain lies towards the C-terminus of nipped-B or sister chromatid cohesion proteins.. +PF12831 FAD dependent oxidoreductase
PfamB_47 (release 24.0). This family of proteins contains FAD dependent oxidoreductases and related proteins.. +PF12832 MFS_1 like family
Pfam-B_20770 (release 24.0). In fungal members this domain is found at the C-terminus of putative transporter proteins.. +PF12833 Helix-turn-helix domain
+PF12834 Integrase_l_N;
Phage integrase, N-terminal. Pfam-B_50 (release 24.0). This is a family of DNA-binding prophage integrases. It is found largely in Proteobacteria.. +PF12835 Integrase
Pfam-B_50 (release 24.0). This is a family of DNA-binding prophage integrases found in Proteobacteria.. +PF12836 Helix-hairpin-helix motif
The HhH domain is a short DNA-binding domain .. +PF12837 4Fe-4S binding domain
This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich.. +PF12838 4Fe-4S dicluster domain
Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters.. +PF12840 Helix-turn-helix domain
This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins.. +PF12841 YvrJ protein family
This family of short proteins are related to B. subtilis YvrJ protein. None of the members of this family have been functionally characterised.. +PF12842 Domain of unknown function (DUF3819)
Pfam-B_986 (release 24.0). This is an uncharacterised domain that is found on the CCR4-Not complex component Not1. Not1 is a global regulator of transcription that affects genes positively and negatively and is thought to regulate transcription factor TFIID .. +PF12843 Protein of unknown function (DUF3820)
Pfam-B_72 (release 24.0). This a bacterial family that is functionally uncharacterised.. +PF12844 Helix-turn-helix domain
Members of this family contains a DNA-binding helix-turn-helix domain.. +PF12845 TBD domain
The Tbk1/Ikki binding domain (TBD) is a 40 amino acid domain able to bind kinases, has been found to be essential for poly(I:C)-induced IRF activation . The domain is found in SINTBAD, TANK and NAP1 protein. This domain is predicted to form an a-helix with residues essential for kinase binding clustering on one side .. +PF12846 AAA-like domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins.. +PF12847 Methyltransferase domain
Protein in this family function as methyltransferases.. +PF12848 ABC transporter
This domain is related to Pfam:PF00005.. +PF12849 PBP superfamily domain
This domain belongs to the periplasmic binding protein superfamily.. +PF12850 Calcineurin-like phosphoesterase superfamily domain
Members of this family are part of the Calcineurin-like phosphoesterase superfamily.. +PF12851 TET_DSBH;
Oxygenase domain of the 2OGFeDO superfamily . A double-stranded beta helix (DSBH) fold domain of the 2-oxoglutarate (2OG)-Fe(II)-dependent dioxygenase (2OGFeDO) superfamily found in various eukaryotes, bacteria and bacteriophages . Members of this family catalyze nucleic acid modifications, such as thymidine hydroxylation during base J synthesis in kinetoplastids , and the conversion of 5 methyl-cytosine (5-mC) to 5-hydroxymethyl-cytosine (hmC) , or further oxidation to 5-formylcytosine (5fC) and 5-carboxylcytosine (5caC) . Metazoan TET proteins contain a cysteine-rich region inserted into the core of the DSBH fold. Vertebrate TET proteins are oncogenes that are mutated in various myeloid cancers . Fungal and algal versions of this family are linked to a predicted transposase and show lineage-specific expansions .. +PF12852 Cupin
Pfam-B_918 (release 24.0). This is a family of bacterial and eukaryotic proteins that belong to the Cupin superfamily. Some of the proteins in this family are annotated as being members of the AraC family of transcription factors, in which case this domain corresponds to the ligand binding domain.. +PF12853 C-terminal of NADH-ubiquinone oxidoreductase 21 kDa subunit
This family is the C-terminal domain of NADH-ubiquinone oxidoreductase 21 kDa subunits from fungi.. +PF12854 PPR repeat
Pfam-B_105542 (release 24.0). This family matches additional variants of the PPR repeat that were not captured by the model for Pfam:PF01535. The exact function is not known.. +PF12855 Life-span regulatory factor
Pfam-B_42039 (release 24.0). This family is involved in the chronological life-span of S. cerevisiae. Over-expression leads to an extended viability of wild-type strains, indicating a role in regulation.. +PF12856 Anaphase-promoting complex subunit 9
Apc9 is one of the subunits of the anaphase-promoting complex, or cyclosome , which is essential for regulating entry into anaphase and exit from mitosis. The APC is a ubiquitin-protein ligase complex. All APC subunits are members of the cullin family proteins, which bind to a ring-finger subunit via a conserved cullin domain . The APC is made up of four parts, the third of which is a tetratricopeptide repeat arm (TPR) that contains Apc9 .. +PF12857 TOBE-like domain
Pfam-B_1384 (release 24.0). The TOBE domain (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulfate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain.. +PF12859 Anaphase-promoting complex subunit 1
Apc1 is the largest of the subunits of the anaphase-promoting complex or cyclosome. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1,2]. Infection of human fibroblasts with human cytomegalovirus (HCMV) leads to cell cycle dysregulation, which is associated with the inactivation of the anaphase-promoting complex .. +PF12860 PAS fold
Pfam-B_21375 (release 24.0). The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs . The PAS fold appears in archaea, eubacteria and eukarya.. +PF12861 Anaphase-promoting complex subunit 11 RING-H2 finger
Apc11 is one of the subunits of the anaphase-promoting complex or cyclosome . The APC subunits are cullin family proteins with ubiquitin ligase activity . Polyubiquitination marks proteins for degradation by the 26S proteasome and is carried out by a cascade of enzymes that includes ubiquitin-activating enzymes (E1s), ubiquitin-conjugating enzymes (E2s), and ubiquitin ligases (E3s). Apc11 acts as an E3 enzyme and is responsible for recruiting E2s to the APC and for mediating the subsequent transfer of ubiquitin to APC substrates in vivo. In Saccharomyces cerevisiae this RING-H2 finger protein defines the minimal ubiquitin ligase activity of the APC, and the integrity of the RING-H2 finger is essential for budding yeast cell viability .. +PF12862 Anaphase-promoting complex subunit 5
Apc5 is a subunit of the anaphase-promoting complex/cyclosome (APC/C) which is a multi-subunit ubiquitin ligase that mediates the proteolysis of cell cycle proteins in mitosis and G1. Apc5, although it does not harbour a classical RNA binding domain, Apc5 binds the poly(A) binding protein (PABP), which directly binds the internal ribosome entry site (IRES) of growth factor 2 mRNA. PABP was found to enhance IRES-mediated translation, whereas Apc5 over-expression counteracted this effect. In addition to its association with the APC/C complex, Apc5 binds much heavier complexes and co-sediments with the ribosomal fraction [1,2]. The N-terminus of Afi1 serves to stabilise the union between Apc4 and Apc5, both of which lie towards the bottom-front of the APC . This region of the Apc5 member proteins carries a TPR-like motif.. +PF12863 Domain of unknown function (DUF3821)
This is a domain largely confined to sequences from Methanomicrobiales found on putative lipases. The function is not known.. +PF12864 Protein of unknown function (DUF3822)
This is a family of uncharacterised bacterial proteins. However, structural-similarity searches indicate the family takes on an actin-like ATPase fold.. +PF12866 Protein of unknown function (DUF3823)
This is a family of uncharacterised proteins from Bacteroidetes. It has characteristic DN and DR sequence-motifs. The function is not known.. +PF12867 DinB superfamily
The DinB family are an uncharacterised family of potential enzymes. The structure of these proteins is composed of a four helix bundle .. +PF12868 Domain of unknwon function (DUF3824)
This is a repeating domain found in fungal proteins. It is proline-rich, and the function is not known.. +PF12869 tRNA_anti-like
JCSG_target_390051_3f1z. This is a family of bacterial, archeael and viral proteins that is related to the tRNA_anti family Pfam:PF01336. The major characteristic of families like tRNA_anti is their OB-fold, and many of them bind DNA.. +PF12870 Lumazine-binding domain
JCSG_target_391926_3k7c. This is a family of putative lipoproteins from bacteria. Many members of the family are defined as having a lumazine-binding domain. Lumazine is a fluorescent accessory protein having 6,7-dimethyl-8-(1'-D-ribityl) lumazine (DMRL) as its authentic chromophore; it modulates the emission of bacterial luciferase to shorter wavelengths with increasing luminous strength. The family is related to the NTF2-like transpeptidase family.. +PF12871 Pre-mRNA-splicing factor 38-associated hydrophilic C-term
This domain is a hydrophilic region found at the C-terminus of plant and metazoan pre-mRNA-splicing factor 38 proteins. The function is not known.. +PF12872 OST_LOTUS;
OST-HTH/LOTUS domain. A predicted RNA-binding domain found in insect Oskar and vertebrate TDRD5/TDRD7 proteins that nucleate or organize structurally related ribonucleoprotein (RNP) complexes, the polar granule and nuage, is poorly understood . The domain adopts the winged helix-turn- helix fold and bind RNA with a potential specificity for dsRNA .In eukaryotes this domain is often combined in the same polypeptide with protein-protein- or lipid- interaction domains that might play a role in anchoring these proteins to specific cytoskeletal structures. Thus, proteins with this domain might have a key role in the recognition and localization of dsRNA, including miRNAs, rasiRNAs and piRNAs hybridized to their targets. In other cases, this domain is fused to ubiquitin-binding, E3 ligase and ubiquitin-like domains indicating a previously under-appreciated role for ubiquitination in regulating the assembly and stability of nuage-like RNP complexes. Both bacteria and eukaryotes encode a conserved family of proteins that combines this predicted RNA-binding domain with a previously uncharacterized RNAse domain belonging to the superfamily that includes the 5'->3' nucleases, PIN and NYN domains .. +PF12873 Domain of unknown function (DUF3825)
Potential uncharacterized enzymatic domain associated with bacterial Pfam:PF12872 domains. Has conserved residues suggestive of an enzymatic role probably related to RNA metabolism.. +PF12874 Zinc-finger of C2H2 type
This is a zinc-finger domain with the CxxCx(12)Hx(6)H motif, found in multiple copies in a wide range of proteins from plants to metazoans. Some member proteins, particularly those from plants, are annotated as being RNA-binding.. +PF12875 Protein of unknown function (DUF3826)
JCSG_target_393061_3g6i. This is a putative sugar-binding family.. +PF12876 Sugar-binding cellulase-like
JCSG_target_394744_3gyc. This is a putative cellulase family.. +PF12877 Domain of unknown function (DUF3827)
Zenonos ZA, Mistry J. This family contains the Swiss:Q9HCM3 protein which has been found to be fused fused to BRAF gene in many cases of pilocytic astrocytomas. The fusion is due mainly to a tandem duplication of 2 Mb at 7q34 [1-2]. Although nothing is known about the function of Swiss:Q9HCM3 protein, the BRAF protein is a well characterised oncoprotein. It is a serine/threonine protein kinase which is implicated in MAP/ERK signalling, a critical pathway for the regulation of cell division, differentiation and secretion [1-3].. +PF12878 SICA extracellular beta domain
The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. There can be between 1 and 10 copies of this cysteine-rich domain .. +PF12879 SICA C-terminal inner membrane domain
The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. The C-terminal domain is thought to remain in the erythrocyte, found juxtaposition to the single transmembrane domain. To date, all full length proteins contain a single copy of this domain .. +PF12881 NUT protein N terminus
Zenonos ZA, Mistry J. This family includes the NUT protein. The gene encoding for NUT protein (Nuclear Testis protein) is found fused to BRD3 or BRD4 genes, in some aggressive types of carcinoma, due to chromosomal translocations [1-2]. Proteins of the BRD family contain two bromodomains that bind transcriptionally active chromatin through associations with acetylated histones H3 and H4 [1-2]. Such proteins are crucial for the regulation of cell cycle progression. On the other hand, little is known about NUT protein. NUT is known to have a Nuclear Export Sequence (NES) as well as a Nuclear Localization Signal (NLS), both located towards the C-terminal end of the protein [1-2]. A fused NUT-GFP protein showed either cytoplasmic or nuclear localization, suggesting that it is subject to nuclear/cytoplasmic shuttling. Consistent with this possibility, treatment with leptomycin B an inhibitor of CRM1-dependent nuclear export resulted in re-distribution of NUT-GFP to the nucleus [3-4]. Inspection of NUT revealed a C-terminal sequence similar to known nuclear export sequences (NES) which are often regulated by phosphorylation [3-4].. +PF12882 NUT protein C terminal
Zenonos ZA, Mistry J. This family includes the NUT protein. The gene encoding for NUT protein (Nuclear Testis protein) is found fused to BRD3 or BRD4 genes, in some aggressive types of carcinoma, due to chromosomal translocations [1-2]. Proteins of the BRD family contain two bromodomains that bind transcriptionally active chromatin through associations with acetylated histones H3 and H4 [1-2]. Such proteins are crucial for the regulation of cell cycle progression. On the other hand, little is known about NUT protein. NUT is known to have a Nuclear Export Sequence (NES) as well as a Nuclear Localization Signal (NLS) [1-2], both located C-terminal to this domain. A fused NUT-GFP protein showed either cytoplasmic or nuclear localization, suggesting that it is subject to nuclear/cytoplasmic shuttling. Consistent with this possibility, treatment with leptomycin B an inhibitor of CRM1-dependent nuclear export resulted in re-distribution of NUT-GFP to the nucleus [3-4]. Inspection of NUT revealed a C-terminal sequence similar to known nuclear export sequences (NES) which are often regulated by phosphorylation [3-4].. +PF12883 Protein of unknown function (DUF3828)
JCSG_target_392985_3kzt. This is a family of bacterial proteins of unknown function.. +PF12884 Transducer of regulated CREB activity, N terminus
Zenonos ZA, Mistry J. This family includes the N terminal region of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) . The proteins display a highly conserved predicted N-terminal coiled-coil domain and an invariant sequence matching a protein kinase A (PKA) phosphorylation consensus sequence (RKXS) . The coiled-coil structure interacts with the bZIP domain of CREB . This interaction may occur via ionic bonds because it is disrupted under high-salt conditions . In addition to CREB-binding, the N-terminal region plays a role in the tetramer formation of TORCs , but the physiological function of the multimeric complex has not been clarified yet.. +PF12885 Transducer of regulated CREB activity middle domain
Zenonos ZA, Mistry J. This family includes the region between the N and C terminus of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) . Although the C- and N- terminal domains of these proteins have been well characterised [1-2], no functional role has been assigned to the central region, yet.. +PF12886 Transducer of regulated CREB activity, C terminus
Zenonos ZA, Mistry J. This family includes the C terminal region of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) . The C terminus region is negatively charged, resembling the transcription activation domains. When this domain, from all three human TORC proteins, was expressed as fusion proteins with the DNA-binding domain of GAL4 (GAL4-BD), and tested for induction of a minimal promoter linked to GAL4-binding sites (UAS-GAL4), UAS-GAL4 was potently induced by GAL4-BD fusions containing the C-terminal portion of all three human TORCs .. +PF12887 SICA extracellular alpha domain
The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. This domain is typically found at the N-terminus, with 1 or 2 copies per protein. The domain is cysteine-rich domain and similar to PFAM:PF12878 .. +PF12888 Lipid-binding putative hydrolase
JCSG_target_393040_3h3i. This is a small family of lipid-binding proteins found in Bacteroidetes.. +PF12889 Protein of unknown function (DUF3829)
JCSG_target_393163_3iee. This is a small family of proteins from several bacterial species, whose function is not known. It may, however, be related to the GvpL_GvpF family of proteins, Pfam:PF06386.. +PF12890 Dihydro-orotase-like
This is a small family of dihydro-orotase-like proteins from various bacteria.. +PF12891 Glycoside hydrolase family 44
This is a family of bacterial glycoside hydrolases formerly known as cellulase family J, and now known as Cel44A. It is one of the major enzymatic components of the cellulosome of Clostridium thermocellum strain F1 and of many other Firmicutes.. +PF12892 T surface-antigen of pili
The FCT and equivalent region genes of Streptococcus pyogenes and other related bacteria encode surface proteins that include fibronectin- and collagen-binding proteins and the serological markers known as T antigens. Some of these proteins give rise to pilus-like appendages . The FctA family is found in many Firmicutes and related bacteria. In S. pyogenes, the pili have a role in bacterial adherence and colonisation of human tissues .. +PF12893 Putative lumazine-binding
This is a family of uncharacterised proteins. However, the family belongs to the NTF2-like superfamily of various enzymes, and some of the members of the family are putative dehydrogenases.. +PF12894 Anaphase-promoting complex subunit 4 WD40 domain
Apc4 contains an N-terminal propeller-shaped WD40 domain.The N-terminus of Afi1 serves to stabilise the union between Apc4 and Apc5, both of which lie towards the bottom-front of the APC,. +PF12895 Anaphase-promoting complex, cyclosome, subunit 3
Apc3, otherwise known as Cdc27, is one of the subunits of the anaphase-promoting complex or cyclosome. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1, 2]. The protein members of this family contain TPR repeats just as those of Apc7 do, and it appears that these TPR units bind the C-termini of the APC co-activators CDH1 and CDC20 .. +PF12896 Anaphase-promoting complex, cyclosome, subunit 4
Apc4 is one of the larger of the subunits of the anaphase-promoting complex or cyclosome. This family represents the long domain downstream of the WD40 repeat/s that are present on the Apc4 subunits. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1,2]. Results in C.elegans show that the primary essential role of the spindle assembly checkpoint is not in the chromosome segregation process itself but rather in delaying anaphase onset until all chromosomes are properly attached to the spindle. the APC/C is likely to be required for all metaphase-to-anaphase transitions in a multicellular organism .. +PF12897 Alanine-glyoxylate amino-transferase
JCSG_target_390749_3ez1. These proteins catalyse the reversible transfer of an amino group from the amino acid substrate to an acceptor alpha-keto acid . They require pyridoxal 5'-phosphate (PLP) as a cofactor to catalyse this reaction. Trans-amination reactions are of central importance in amino acid metabolism and in links to carbohydrate and fat metabolism. This class of aminotransferases acts as dimers in a head-to-tail configuration .. +PF12898 Stc1 domain
The domain contains 8 conserved cysteines that may bind to zinc. In S. pombe this protein acts as a protein linker which links the chromatin modifying CLRC complex to RNAi by tethering it to the RITS complex. The region is reported as a LIM domain here, but has a slightly different arrangement of its CxxC pairs from the Pfam LIM domain Pfam:PF00412, hence why it is not part of that family . The tandem zinc-finger structure could mediate protein-protein interactions.. +PF12899 Alkaline and neutral invertase
This is a family of bacterial and plant alkaline and neutral invertases, EC:3.2.1.26, previously known as Invertase_neut Pfam:PF04853.. +PF12900 Pyridoxamine 5'-phosphate oxidase
Pyridoxamine 5'-phosphate oxidase is a FMN flavoprotein that catalyses the oxidation of pyridoxamine-5-P (PMP) and pyridoxine-5-P (PNP) to pyridoxal-5-P (PLP). This entry contains several pyridoxamine 5'-phosphate oxidases, and related proteins.. +PF12901 SUZ-C motif
The SUZ-C domain is a conserved motif found in one or more copies in several RNA-binding proteins. It is always found at the C-terminus of the protein and appear to be required for localization of the protein to specific subcellular structures. It was first characterized in the C.elegans protein Szy-20 which localizes to the centrosome. It is widely distributed in eukaryotes.. +PF12902 Ferritin-like
JCSG_target_390707_3hli. This is a family of bacterial ferritin-like substances that also includes a C-terminal domain of VioB, polyketide synthase enzymes, that make up one of the key components of the violacein biosynthesis pathway. Violacein is a purple-coloured, broad-spectrum antibacterial pigment.. +PF12903 Protein of unknown function (DUF3830)
JCSG_target_392654_3kop. This is a family of bacterial and archaeal proteins, the structure for one of whose members has been characterised. PDB:3kop from Swiss:A0JVT3 probably adopts a new hexameric form compared to previous structures. The putative active is near the domain interface. 3kop is most closely related, structurally to PDB:1zx8, where the potential active site is located near residues E51 and Y53 (conserved in 1zx8). Beyond the two residues above, the other residues are not conserved. Also the shape of the active site differs from that of 1zx8. PDB:1zx8 belongs to family DUF369. Pfam:PF04126, which is part of the cyclophilin-like clan.. +PF12904 Putative collagen-binding domain of a collagenase
JCSG_target_393067_3kzs. This domain is likely to be the collagen-binding domain of a family of bacterial collagenase enzymes. It is the C-terminal part of the PDB:3kzs structure determined from Swiss:Q8A905 (information derived from TOPSAN).. +PF12905 Endo-alpha-N-acetylgalactosaminidase
Virulence of pathogenic organisms such as the Gram-positive Streptococcus pneumoniae is largely determined by the ability to degrade host glycoproteins and to metabolise the resultant carbohydrates. This family is the enzymatic region, EC:3.2.1.97, of the cell surface proteins that specifically cleave Gal-beta-1,3-GalNAc-alpha-Ser/Thr (T-antigen, galacto-N-biose), the core 1 type O-linked glycan common to mucin glycoproteins. This reaction is exemplified by the S. pneumoniae protein Swiss:B2DRU5, where Asp764 is the catalytic nucleophile-base and Glu796 the catalytic proton donor.. +PF12906 RING-variant domain
+PF12907 Zinc-binding
This is small family of metazoan zinc-binding proteins.. +PF12909 Protein of unknown function (DUF3832)
JCSG_target_391895_3k6q. This is a family of proteins from bacteria and archaea of unknwon function. The N-terminal part of the structure from Swiss:Q0AZ30 shows remote homology to the N-terminus of the bacterial toxin/antitoxin 'addiction module', and the C-terminus is distantly related to the TTHA1013/TTHA0281 superfamily.. +PF12910 Antitoxin of toxin-antitoxin stability system N-terminal
JCSG_target_391895_3k6q. This domain appears to be the N-terminus of the RelB antitoxin of toxin-antitoxin stability system or prevent-host death system. Together RelE toxin and the RelB antitoxin form a non-toxic complex. Although toxin-antitoxin gene cassettes were first found in plasmids, it is clear that these loci are abundant in free-living prokaryotes, including many pathogenic bacteria, and these toxin-antitoxin loci provide a control mechanism that helps free-living prokaryotes cope with nutritional stress [1,2].. +PF12911 N-terminal TM domain of oligopeptide transport permease C
Pfam-B_1473 (release 24.0). Oligopeptide permeases (Opp) have been identified in numerous gram-negative and -positive bacteria. These transport systems belong to the superfamily of highly conserved ATP-binding cassette transporters. Typically, Opp importers comprise a complex of five proteins. The oligopeptide-binding protein OppA is responsible for the capture of peptides from the external medium. Two integral highly hydrophobic membrane spanning proteins, OppB and OppC, form a channel through the membrane used for peptide translocation. This N-terminal domain appears to be the first TM domain of the molecule .. +PF12912 N-term_NLPC_P60;
NLPC_P60 stabilising domain, N term. Pfam-B_845 (release 24.0). This domain, at the N-terminus, appears to be the stabilising domain for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The next domain is an SH3b1, the third an SH3b2 and the last, the C-terminal region, the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN).. +PF12913 SH3 domain of the SH3b1 type
JCSG_target_405735_3m1u. This domain appears to be an SH3 domain of the SH3b1-type, and is just C-terminal to an N-terminal domain that is probably the stabilising domain for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The next domain is an SH3b2 and the last, the C-terminal region, is the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN).. +PF12914 SH3 domain of SH3b2 type
JCSG_target_405735_3m1u. This domain appears to be an SH3 domain of the SH3b2-type, and is the second SH3 domain to be found, downstream of an N-terminal domain that is probably the stabilising domain, for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The last, the C-terminal region, is the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN).. +PF12915 Protein of unknown function (DUF3833)
Pfam-B_259 (release 24.0). This is a family of uncharacterised proteins found in Proteobacteria.. +PF12916 Protein of unknown function (DUF3834)
This family is likely to be related to solute-binding lipo-proteins.. +PF12917 HD_1;
HD containing hydrolase-like enzyme . This is a family of bacterial and archaeal hydrolases.. +PF12918 TcdB toxin N-terminal helical domain
This is a short helical bundle domain found associated with the catalytic domain of the TcdB toxin from C. difficile . The function of this domain is unknown, but it may be involved in substrate recognition.. +PF12919 TcdA/TcdB catalytic glycosyltransferase domain
This domain represents the N-terminal glycosyltransferase from a set of toxins found in some bacteria. This domain in TcdB glycosylates the host RhoA protein.. +PF12920 TcdA/TcdB pore forming domain
This family represents the most conserved region within the C. difficile Toxin A and Toxin B pore forming region.. +PF12921 Mitochondrial ATPase expression
ATP13 is necessary for the expression of subunit 9 of mitochondrial ATPase. The protein has a basic amino terminal signal sequence that is cleaved upon import into mitochondria .. +PF12922 non-SMC mitotic condensation complex subunit 1, N-term
Pfam-B_15091 (release 24.0). The three non-SMC (structural maintenance of chromosomes) subunits of the mitotic condensation complex are Cnd1-3. The whole complex is essential for viability and the condensing of chromosomes in mitosis. This is the conserved N-terminus of the subunit 1.. +PF12923 Ribosomal RNA-processing protein 7 (RRP7)
Pfam-B_1057 (release 24.0). RRP7 is an essential protein in yeast that is involved in pre-rRNA processing and ribosome assembly . It is speculated to be required for correct assembly of rpS27 into the pre-ribosomal particle [1-2].. +PF12924 Copper-binding of amyloid precursor, CuBD
This short domain, part of the extra-cellular N-terminus of the amyloid precursor protein, APP, can bind both copper and zinc, CuBD. The structure of Cu2+-bound CuBD reveals that the metal ligands are His147, His151, Tyr168 and two water molecules, which are arranged in a square pyramidal geometry. The structure of Cu+-bound CuBD is almost identical to the Cu2+-bound structure except for the loss of one of the water ligands. The geometry of the site is unfavourable for Cu+, thus providing a mechanism by which CuBD could readily transfer Cu ions to other proteins.. +PF12925 E2 domain of amyloid precursor protein
The E2 domain is the largest of the conserved domains of the amyloid precursor protein. The structure of E2 consists of two coiled-coil sub-structures connected through a continuous helix, and bears an unexpected resemblance to the spectrin family of protein structures.E 2 can reversibly dimerise in solution, and the dimerisation occurs along the longest dimension of the molecule in an antiparallel orientation, which enables the N-terminal substructure of one monomer to pack against the C-terminal substructure of a second monomer. The high degree of conservation of residues at the putative dimer interface suggests that the E2 dimer observed in the crystal could be physiologically relevant. Heparin sulfate proteoglycans, the putative ligands for the precursor present in extracellular matrix, bind to E2 at a conserved and positively charged site near the dimer interface .. +PF12926 Mitotic-spindle organizing gamma-tubulin ring associated
FAM128A and FAM128B proteins have been re-named MOZART2A and B. The name MOZART is derived from letters of 'mitotic-spindle organizing proteins associated with a ring of gamma-tubulin'. This family operates as part of the gamma-tubulin ring complex, gamma-TuRC, one of the complexes necessary for chromosome segregation. This complex is located at centrosomes and mediates the formation of bipolar spindles in mitosis; it consists of six subunits. However, unlike the other four known subunits, the MOZART proteins, both 1 and 2, do not carry the conserved 'Spc97-Spc98' GCP domain, so the TUBCGP nomenclature cannot be used for it. The exact function of MOZART2 is not clear .. +PF12927 Domain of unknown function (DUF3835)
Pfam-B_14632 (release 24.0). This is a C-terminal domain conserved in fungi.. +PF12928 tRNA-splicing endonuclease subunit sen54 N-term
Pfam-B_644 (release 24.0). This is an N-terminal family of archaeal and metazoan sen54 proteins that forms one of the tRNA-splicing endonuclease subunits.. +PF12929 Stretch-activated Ca2+-permeable channel component
Pfam-B_1244 (release 24.0). MID1 is a yeast Saccharomyces cerevisiae gene encoding a plasma membrane protein required for Ca2+ influx induced by the mating pheromone, alpha-factor. Mid1 protein plays a crucial role in supplying Ca2+ during the mating process. Mid1 is composed of 548-amino-acid residues with four hydrophobic regions named H1, H2, H3 and H4, and two cysteine-rich regions (C1 and C2) at the C-terminal. This family contains the H3, H4, C1 and C2 regions. suggesting that H1 is a signal sequence responsible for the alpha-factor-induced Mid1 delivery to the plasma membrane. The region from H1 to H3 is required for the localisation of Mid1 in the plasma and ER membranes. Trafficking of Mid1-GFP to the plasma membrane is dependent on the N-glycosylation of Mid1 and the transporter protein Sec12. This findings suggests that the trafficking of Mid1-GFP to the plasma membrane requires a Sec12-dependent pathway from the ER to the Golgi, and that Mid1 is recruited via a Sec6- and Sec7-independent pathway from the Golgi to the plasma membrane.. +PF12930 Family of unknown function (DUF3836)
Family of uncharacterised proteins found in Bacteroidales species. Test.. +PF12931 Sec23-binding domain of Sec16
Sec16 is a multi-domain vesicle coat protein. The C-terminal region is the part that binds to Sec23, a COPII vesicle coat protein. This association is part of the transport vesicle coat structure .. +PF12932 Vesicle coat trafficking protein Sec16 mid-region
Sec16 is a multi-domain vesicle coat protein. This central region is the functional part of the molecules and thus is vital for the family's role in mediating the movement of protein-cargo between the organelles of the secretory pathway .. +PF12933 FTO catalytic domain
This domain is the catalytic AlkB-like domain from the FTO protein . This domain catalyses a demethylase activity with a preference for 3-methylthymidine.. +PF12934 FTO C-terminal domain
This domain is found at the C-terminus of the FTO protein which was shown to be associated with increased BMI and obesity risk in humans. The N-terminal domain of this protein is a DNA demethylase and this domain is found to associate with the N-terminal domain in the crystal structure . This domain is alpha helical with three helices that form a bundle .. +PF12935 Vesicle coat trafficking protein Sec16 N-terminus
Sec16 is a multi-domain vesicle coat protein. The overall function of Sec16 is in mediating the movement of protein-cargo between the organelles of the secretory pathway. Over-expression of truncated mutants of only the N-terminus are lethal, and this portion does not appear to be essential for function so may act as a stabilising region .. +PF12936 KRI1-like family C-terminal
Pfam-B_8372 (release 7.7). The yeast member of this family (Kri1p) is found to be required for 40S ribosome biogenesis in the nucleolus . This is the C-terminal domain of the family.. +PF12937 F-box-like
Pfam-B_22368 (release 24.0). This is an F-box-like family.. +PF12938 M domain of GW182
+PF12939 Domain of unknown function (DUF3837)
A small, compact all-alpha helical domain of unknown function. This domain is currently only found in Clostridiales species. . +PF12940 Recombination-activation protein 1 (RAG1)
This famiy contains recombination activating protein 1, which is the catalytic component of the RAG complex. The RAG complex is a multi-protein complex that mediates DNA cleavage during V(D)J (variable-diversity-joining) recombination . RAG1 mediates DNA-binding to the conseved recombination signal sequences (RSS) . Many of the proteins in this family are fragments.. +PF12941 DUF3838;
HCV NS5a protein C-terminal region. This is a family of proteins found in the hepatitis C virus. This family contains the C-terminal region of the NS5A protein. CC The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525.. +PF12942 Archaeal ammonia monooxygenase subunit A (AmoA)
This is an archeael family that contains ammonia monooxygenase subunit A. Ammonia monooxygenase is an enzyme that oxidises ammonia to nitrite and nitrate, thus playing a significant role in the nitrogen cycle. Ammonia-oxidising archaea (AOA) are widespread in marine environments .. +PF12943 Protein of unknown function (DUF3839)
This is a family of uncharacterised proteins that are found in Trichomonas.. +PF12944 Protein of unknown function (DUF3840)
This is a family of uncharacterised proteins found in hepatitis A viruses.. +PF12945 Flagellar protein YcgR
Mistry J, Auchincloss A. This domain is found N terminal to Pfam:PF07238. Proteins which contain YcgR domains are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias . This family contains members not captured by Pfam:PF07317.. +PF12946 MSP1 EGF domain 1
This EGF-like domain is found at the C-terminus of the malaria parasite MSP1 protein. MSP1 is the merozoite surface protein 1. This domain is part of the C-terminal fragment that is proteolytically processed from the the rest of the protein and is left attached to the surface of the invading parasite.. +PF12947 EGF domain
This family includes a variety of EGF-like domain homologues. This family includes the C-terminal domain of the malaria parasite MSP1 protein .. +PF12948 MSP7-like protein C-terminal domain
MSP7 is a protein family the malaria parasite that has been found to be associated with processed fragments from the MSP1 protein in a complex involved in red blood cell invasion.. +PF12949 SAP_2;
Mistry J, Sazer S, Wood V. This is a HeH domain. HeH domains form helix-extended loop-helix (HeH) structures.\. This domain is closely related to Pfam:PF03020 and Pfam:PF02037.. +PF12950 TaqI-like C-terminal specificity domain
This domain is found at the C-terminus of the TaqI protein and is involved in DNA-binding and substrate recognition.. +PF12951 Autotransporter-associated beta strand repeat
This model represent a core 32-residue region of a class of bacterial protein repeat found in one to 30 copies per protein. Most proteins with a copy of this repeat have domains associated with membrane autotransporters (Pfam:PF03797). The repeats occur with a periodicity of 60 to 100 residues. A pattern of sequence conservation is that every second residue is well-conserved across most of the domain. These repeats as likely to have a beta-helical structure.. +PF12952 Domain of unknown function (DUF3841)
This presumed domain is around 190 amino acids in length. As yet no function has been given to any member of the family.. +PF12953 Domain of unknown function (DUF3842)
This short protein is found mainly in firmicute bacteria. It is functionally uncharacterised.. +PF12954 Protein of unknown function (DUF3843)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences .. +PF12955 Domain of unknown function (DUF3844)
This presumed domain is found in fungal species. It contains 8 largely conserved cysteine residues. This domain is found in proteins that are thought to be found in the endoplasmic reticulum.. +PF12956 Domain of Unknown Function with PDB structure
JCSG structure PDB:3GF6. Member PDB:3GF6 has statistically significant similarity to TNF-like jelly roll fold may indicate an immunomodulatory function or a bioadhesion role . +PF12957 Domain of unknown function (DUF3846)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences . This domain is found associated with an Pfam:PF07275 like domain. This suggests that this family may also be involved in evading host restriction.. +PF12958 Protein of unknown function (DUF3847)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences .. +PF12959 Protein of unknown function (DUF3848)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences . This domain frequently seen with DUF3849.. +PF12960 Protein of unknown function (DUF3849)
JCSG-Joint Centrer for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences . This domain frequently seen with DUF3848.. +PF12961 Domain of Unknown Function with PDB structure (DUF3850)
JCSG structure PDB:3IUW. The search results from NCBI sequence alignment indicates a conserved domain belonging to ASCH superfamily . Dali searching results show that the protein is a structurally similar to the PUA domain, suggesting it may be involved in RNA recognition. It has been reported that the deletion of PUA genes results in impaired growth (RluD) and competitive disadvantage (TruB) in Escherichia coli. Suggestions have been put forward that, apart from their usual catalytic role, certain PUS enzymes (e.g. TruB) may also act as chaperones for RNA folding. The interface interaction indicates that the biomolecule of protein NP_809782.1 should be a dimer.. +PF12962 Protein of unknown function (DUF3851)
JCSG - Joint Center for Structural Genomics. A family of uncharacterised proteins found by clustering human gut metagenomic sequences .. +PF12963 Protein of unknown function (DUF3852)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences . This domain frequently seen with DUF3848.. +PF12964 Protein of unknown function (DUF3853)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences .. +PF12965 Domain of unknown function (DUF3854)
JCSG - Joint Center for Structural Genomics. A family of uncharacterised proteins found by clustering human gut metagenomic sequences . This domain is likely to be related to the Toprim domain.. +PF12966 N-ATPase, AtpR subunit
Membrane protein with three predicted transmembrane segments, two of which contain conserved Arg residues. AtpR genes are found in the N-ATPase (archaeal-type F1-Fo-ATPase) operons and are predicted to interact with the conserved Glu/Asp residues in the c subunits, regulating the assembly and/or function of the membrane-embedded ring of 'c' (proteolipid) subunits (PFAM:PF00137).. +PF12967 Domain of Unknown Function with PDB structure (DUF3855)
JCSG structure PDB:1O22. Family based on orphan protein (TM0875) from Thermotoga maritima that has been structurally determined as PDB:1022. The TM0875 gene of Thermotoga maritima encodes a hypothetical protein NP_228683 of unknown function. Analysis of TM0875 genomic context reveals the presence of MMT1 (a predicted Co/Zn/Cd cation transporter) and an inactive homolog of metal-dependent proteases. 1O22 shows weak structural similarity with the phosphoribosylformylglycinamidine synthase 1t4a (Dali Z-scr=4.6), the yggU protein (PDB structure:1n91; with DALI Z-scr=3), and with the thioesterase superfamily member (PDB structure 2cy9 - found using FATCAT), even though they have very low sequence identity.. +PF12968 Domain of Unknown Function (DUF3856)
JCSG structure PDB:2HR2. TPR-like protein. The 2hr2 structure belongs to the SCOP all alpha class, TPR-like superfamily, CT2138-like family. A DALI search gives hits with the putative peptidyl-prolyl isomerase 2fbn (Z=16), the SGTA protein (Z=16), the PLCR protein 2qfc (Z=16), a putative FK506-binding protein (PDB:1qz2-A; DALI Z-score 15.3; RMSD 2.9; 16% sequence identity within 132 superimposed residues), and with the tetratricopeptide repeats of the protein phosphatase 5 (PDB:2bug; DALI Z-score 15.1; RMSD 2.5; 19% sequence identity within 117 superimposed residues).. +PF12969 Domain of Unknown Function with PDB structure (DUF3857)
JCSG structure PDB:3KD4. This family is based on the first domain of the PDB structure PDB:3KD4(residues 1-228). It is structurally similar to domains in other hydrolases, eg. M1 family aminopeptidase (3ebi, Z=10, rmsd 3.6A for 152 CA, seq id 12%), despite lack of any significant sequence similarity.. +PF12970 Domain of Unknown Function with PDB structure (DUF3858)
JCSG structure PDB:3KD4. This family is based on the third domain of the PDB structure 3KD4(residues 410-525). It is structurally similar to part of neuropilin-2 (Z=4.6, rmsd 3.6A for 83 CA, 7% seq id). This domain and the second domain appears to be part of peptide-n-glycanase (1x3w, 2g9f).. +PF12971 Alpha-N-acetylglucosaminidase (NAGLU) N-terminal domain
Pfam-B_6295 (release 7.7). Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate . Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations . The structure shows that the enzyme is composed of three domains. This N-terminal domain has an alpha-beta fold .. +PF12972 Alpha-N-acetylglucosaminidase (NAGLU) C-terminal domain
Pfam-B_6295 (release 7.7). Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate . Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations . The structure shows that the enzyme is composed of three domains. This C-terminal domain has an all alpha helical fold .. +PF12973 ChrR Cupin-like domain
Members of this family are part of the cupin superfamily. This family includes the transcriptional activator ChrR.. +PF12974 ABC transporter, phosphonate, periplasmic substrate-binding protein
This is a family of periplasmic proteins which are part of the transport system for alkylphosphonate uptake.. +PF12975 Domain of unknown function (DUF3859)
This short domain is functionally uncharacterised.. +PF12976 Domain of Unknown Function with PDB structure (DUF3860)
JCSG structure PDB:2OD5. A protein family created to cover PDB:2OD5. 2OD5 is a hypothetical protein (JCVI_PEP_1096688149193) from an environmental metagenome (unidentified marine microbe).. +PF12977 Domain of Unknown Function with PDB structure (DUF3861)
JCSG structure PDB:3CJL. The 3cjl structure is likely a representative of a new fold with some resemblance to 3-helical bundle folds such as the serum albumin-like fold of SCOP. No significant hits reported by a Dali search. This protein is the first structural representative of a small (about 60 proteins) family of proteins that are found among proteo- and enterobacteria (REF http://www.topsan.org/Proteins/JCSG/3CJL).. +PF12978 Domain of Unknown Function with PDB structure (DUF3862)
JCSG structure PDB:3D4E. PDB:3D4E shared structural similarity to beta-lactamase inhibitory proteins (BLIP) which already include 1XXM, 1S0W, 1JTG, 2G2U, 2G2W, 2B5R, and 3due. All of structures are involved in beta-lactamase inhibitor complex. (REF http://www.topsan.org/Proteins/JCSG/3d4e). +PF12979 Domain of Unknown Function with PDB structure (DUF3863)
JCSG structure PDB:3LM3. Domain based on 1-364 domain of PDB:3LM3 which is encoded by the BDI_3119 gene from Parabacteroides distasonis atcc 8503.. +PF12980 Domain of Unknown Function with PDB structure (DUF3864)
JCSG structure PDB:3LM3. Domain based on 366-449 domain of PDB:3LM3 which is encoded by the BDI_3119 gene from Parabacteroides distasonis atcc 8503.. +PF12981 Domain of Unknown Function with PDB structure (DUF3865)
JCSG structure PDB:3B5P. Family based of PDB:3B5P encoded by ZP_00108531 from nitrogen-fixing cyanobacterium Nostoc punctiforme pcc 73102 is a CADD-like protein of unknown function. Superposition between protein structures encoded by CT610 from Chlamydia trachomatis (PDB code 1rwc), pyrroloquinolinquinone synthase C (PqqC, PDB code 1otv) and ZP_00108531 revealed that putative active sites in CT610 and ZP_00108531 are identical. ( REF: http://www.topsan.org/Proteins/JCSG/3B5P).. +PF12982 Protein of unknown function (DUF3866)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 352 and 374 amino acids in length.. +PF12983 Protein of unknown function (DUF3867)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 190 amino acids in length.. +PF12984 Domain of unknown function, B. Theta Gene description (DUF3868)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_1065. Based on Bacteroides thetaiotaomicron gene BT_1065, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2].. +PF12985 Domain of unknown function (DUF3869)
Ellrott K, Bakolitsa C. JCSG structure PDB:3KOG. A family based on the N-terminal domain of 3KOG, which shows weak but consistent remote homology with adhesive families such as immunoglobulins and cadherins, suggesting it might form an attachment module.. +PF12986 Domain of unknown function (DUF3870)
Ellrott K, Bakolitsa C. JCSG structure PDB:3KOG. A family based on the C-terminal domain of 3KOG which shows structural similarity to pore-forming proteins , suggesting it may have a lytic function. . +PF12987 Domain of unknown function, B. Theta Gene description (DUF3871)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_2984. Based on Bacteroides thetaiotaomicron gene BT_2984, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2].. +PF12988 Domain of unknown function, B. Theta Gene description (DUF3872)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_2593. Based on Bacteroides thetaiotaomicron gene BT_2593, a conserved protein found in a conjugate transposon. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2].. +PF12989 Domain of unknown function, B. Theta Gene description (DUF3873)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_2286. Based on Bacteroides thetaiotaomicron gene BT_2286, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2].. +PF12990 Domain of unknonw function from B. Theta Gene description (DUF3874)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_4228. Based on Bacteroides thetaiotaomicron gene BT_4228, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2].. +PF12991 Domain of unknown function, B. Theta Gene description (DUF3875)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_4769. Based on Bacteroides thetaiotaomicron gene BT_4769, a conserved protein found in a conjugate transposon. As seem in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2].. +PF12992 Domain of unknown function, B. Theta Gene description (DUF3876)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_0092. Based on Bacteroides thetaiotaomicron gene BT_0092, a conserved protein found in a conjugate transposon. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2].. +PF12993 Domain of unknown function, E. rectale Gene description (DUF3877)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: EUBREC_0237. Based on Eubacterium rectale gene EUBREC_0237. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture .. +PF12994 Domain of unknown function, E. rectale Gene description (DUF3878)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: EUBREC_0973. Based on Eubacterium rectale gene EUBREC_0973. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737). it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture .. +PF12995 Domain of unknown function, E. rectale Gene description (DUF3879)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: EUBREC_1343. Based on Eubacterium rectale gene EUBREC_1343. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture .. +PF12996 DUF based on E. rectale Gene description (DUF3880)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: EUBREC_3218. Based on Eubacterium rectale gene EUBREC_3218. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), It appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture .. +PF12997 Domain of unknown function, E. rectale Gene description (DUF3881)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: EUBREC_3695. Based on Eubacterium rectale gene EUBREC_3695. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture .. +PF12998 Inhibitor of growth proteins N-terminal histone-binding
Pfam-B_205 (release 24.0). Histones undergo numerous post-translational modifications, including acetylation and methylation, at residues which are then probable docking sites for various chromatin remodelling complexes. Inhibitor of growth proteins (INGs) specifically bind to residues that have been thus modified. INGs carry a well-characterised C-terminal PHD-type zinc-finger domain, binding with lysine 4-tri-methylated histone H3 (H3K4me3), as well as this N-terminal domain that binds unmodified H3 tails. Although these two regions can bind histones independently, together they increase the apparent association of the ING for the H3 tail.. +PF12999 Glucosidase II beta subunit-like
The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing . The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum .. +PF13000 Acetyl-coenzyme A transporter 1
The mouse Acatn is a 61 kDa hydrophobic protein with six to 10 transmembrane domains. It appears to promote 9-O-acetylation in gangliosides.. +PF13001 Proteasome stabiliser
Pfam-B_682 (release 24.0). The proteasome consists of two subunits, and the capacity of the proteasome to degrade protein depends crucially on the interaction between these two subunits. This interaction is affected by a wide range of factors including metabolites, such as ATP, and proteasome-associated proteins such as Ecm29. Ecm29 stabilises the interaction between the two subunits.. +PF13002 Arrestin_N terminal like
This is a family of proteins related to the Arrestin_N terminal family.. +PF13003 Ribosomal protein L1
This family includes putative ribosomal L1 and L10 proteins and fragments.. +PF13004 Bacteroidetes-Associated Carbohydrate-binding Often N-terminal
The BACON (Bacteroidetes-Associated Carbohydrate-binding Often N-terminal) domain is an all-beta domain found in diverse architectures, principally in combination with carbohydrate-active enzymes and proteases. These architectures suggest a carbohydrate-binding function which is also supported by the nature of BACON's few conserved amino-acids. The phyletic distribution of BACON and other data tentatively suggest that it may frequently function to bind mucin.. +PF13005 HTH_Tnp_IS66;
zinc-finger binding domain of transposase IS66 . This is a zinc-finger region of the N-terminus of the insertion element IS66 transposase.. +PF13006 Insertion element 4 transposase N-terminal
This family represents the N-terminal region of proteins carrying the transposase enzyme, DDE_Tnp_1 (that was Transposase_11), Pfam:PF01609, at the C-terminus. The full-length members are Insertion Element 4, IS4. Within the collection of E.coli strains, ECOR, the number of IS4 elements varies from zero to 14, with an average of 5 copies/strain .. +PF13007 Transposase C of IS166 homeodomain
This is a leucine-zipper-like or homeodomain-like region of transposase TnpC of insertion element IS66.. +PF13008 Zinc-binding domain of Paramyxoviridae V protein
The Paramyxoviridae, which include such respiroviruses as para-influenzae and measles, produce phosphoproteins - protein P - that are integral to the polymerase transcription-replication complex. Protein P consists of two functionally distinct moieties, an N-terminal PNT, and a C-terminal PCT . The P gene region transcribes proteins from all three ORFs, and the V protein consists of the PNT moiety and a more C-terminal 2-zinc-binding domain. This conserved region consists of the two-zinc-binding section sandwiched between beta sheets 6 and 7 of the overall V protein. It is the binding of this core domain of V protein with the DDB1 protein (part of the ubiquitin-ligase complex) of eukaryotes which represents the key element of the virus-host protein interaction . In the Henipavirus family which includes Nipah and Hendra viruses, the V protein is able to block IFN (interferon) signalling by preventing IFN-induced STAT phosphorylation and nuclear translocation . The P gene of morbillivirus is co-transcriptionally edited leading to a V protein being produced.. +PF13009 Putative phage integrase
Pfam-B_5288 (release 24.0). This family is found in association with IS elements.. +PF13010 Primase helical domain
This alpha helical domain is found in a set of bacterial plasmid replication proteins . The domain is found to the C-terminus of the primase/polymerase domain. Mutants of this domain are defective in template binding, dinucleotide formation and conformation change prior to DNA extension .. +PF13011 leucine-zipper of insertion element IS481
This is the upstream region of the conjoined ORF AB of insertion element 481. The significance of IS481 in the detection of Bordetella pertussis is discussed in . The B portion of the ORF AB carries the transposase activity in family rve, PFAM:PF00665.. +PF13012 Maintenance of mitochondrial structure and function
This is C-terminal to the Mov24 region of the yeast proteasomal subunit Rpn11 and seems likely to regulate the mitochondrial fission and tubulation processes, ie the outer mitochondrial membrane proteins. This function appears to be unrelated to the proteasome activity of the N-terminal region .. +PF13013 F-box-like domain
The F-box domain has a role in mediating protein-protein interactions in a variety of contexts, such as polyubiquitination, transcription elongation, centromere binding and translational repression.. +PF13014 KH domain
KH motifs bind RNA in vitro . This RNA-binding domain is required for the efficient anchoring of ASH1-mRNA to the distal tip of the daughter cell. ASH1 is a specific repressor of transcription that localizes asymmetrically to the daughter cell nucleus. RNA localisation is a widespread mechanism for achieving localised protein synthesis .. +PF13015 Glucosidase II beta subunit-like protein
The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing . The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum . The beta-subunit confers substrate specificity for di- and monoglucosylated glycans on the glucose-trimming activity of the alpha-subunit .. +PF13016 Cys-rich Gliadin N-terminal
This is a cysteine-rich N-terminal region of gliadin and avenin plant proteins. The exact function is not known.. +PF13017 piRNA pathway germ-plasm component
Maelstrom is a germ-plasm component protein, that is shown to be functionally involved in the piRNA pathway. It is conserved throughout Eukaryota, though it appears to have been lost from all examined teleost fish species. The domain architecture shows that it is coupled with several DNA- and RNA- related domains such as HMG box, SR-25-like and HDAC_interact domains. Sequence analysis and fold recognition have found a distant similarity between Maelstrom domain and the DnaQ 3'-5' exonuclease family with the RNase H fold (Exonuc_X-T, Pfam:PF00929); notably, that the Maelstrom domains from basal eukaryotes contain the conserved 3'-5' exonuclease active site residues (Asp-Glu-Asp-His-Asp, DEDHD). However, the animal and some amoeba maelstrom contain another set of conserved residues (Glu-His-His-Cys-His-Cys, EHHCHC). This evolutionary link together with structural examinations leads to the hypothesis that Maelstrom domains may have a potential nuclease-transposase activity or RNA-binding ability that may be implicated in piRNA biogenesis. A protein function evolution mode, namely "active site switch", has been proposed , in which the amoeba Maelstrom domains are the possible evolutionary intermediates due to their harbouring of the specific characteristics of both 3'-5' exonuclease and Maelstrom domains.. +PF13018 Extended Signal Peptide of Type V secretion system
Coggill P, Desvaux M. This conserved domain is called ESPR for Extended Signal Peptide Region. It is present at the N-terminus of the signal peptides of proteins belonging to the Type V secretion systems, including the autotransporters (T5aSS), TpsA exoproteins of the two-partner system (T5bSS) and trimeric autotransporters (TAAs). So far, the ESPR is present only in Gram-negative bacterial proteins originating from the classes Beta- and Gamma-proteobacteria. ESPR severely impairs inner membrane translocation, suggesting that it adopts a particular conformation or it interacts with a cytoplasmic or inner membrane co-factor, prior to exportation. Deletion of ESPR causes mis-folding of the TAAs passenger domain in the periplasm , substantially impairing its translocation across the outer membrane .. +PF13019 Ubiquitin-like;
Telomere stability and silencing. Pfam-B_2457 (release 24.0). Sde2 has been identified in fission yeast as an important factor in telomere formation and maintenance. This is a more N-terminal domain on these nuclear proteins, and is essential for telomeric silencing and genomic stability.. +PF13020 Domain of unknown function (DUF3883)
This is a domain is uncharacterised. It is found on restriction endonucleases.. +PF13021 Domain of unknown function (DUF3885)
Pfam-B_1173 (release 24.0). A putative Rac prophage DNA binding protein. This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved YDDRG sequence motif. There is a single completely conserved residue D that may be functionally important.. +PF13022 Helix-turn-helix of insertion element transposase
This is a family of largely phage proteins which are likely to be a helix-turn-helix insertion elements.. +PF13023 HD domain
HD domains are metal dependent phosphohydrolases.. +PF13024 Protein of unknown function (DUF3884)
Pfam-B_1352 (release 24.0). This family of proteins is functionally uncharacterised. However several proteins are annotated as Tagatose 1,6-diphosphate aldolase, but evidence to support this could not be found. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 106 amino acids in length. There are two completely conserved residues (Y and F) that may be functionally important.. +PF13025 Protein of unknown function (DUF3886)
Pfam-B_1536 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two completely conserved L residues that may be functionally important.. +PF13026 Protein of unknown function (DUF3887)
Pfam-B_1534 (release 24.0). This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 128 and 167 amino acids in length. The proteins in this family contain an N-terminal lipid attachment site.. +PF13027 Protein of unknown function (DUF3888)
Pfam-B_1080 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 111 and 149 amino acids in length.. +PF13028 Protein of unknown function (DUF3889)
Pfam-B_1146 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two completely conserved residues (A and Y) that may be functionally important.. +PF13029 Domain of unknown function (DUF3890)
Pfam-B_1148 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 70 amino acids in length.. +PF13030 Protein of unknown function (DUF3891)
Pfam-B_1216 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 250 amino acids in length.. +PF13031 Protein of unknown function (DUF3892)
Pfam-B_1252 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 87 and 104 amino acids in length.. +PF13032 Domain of unknown function (DUF3893)
Pfam-B_1590 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 123 and 144 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF13033 Protein of unknown function (DUF3894)
Pfam-B_1594 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 66 and 79 amino acids in length. There are two conserved sequence motifs: FNIC and MALLNLT.. +PF13034 Protein of unknown function (DUF3895)
Pfam-B_1598 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two completely conserved residues (Y and L) that may be functionally important.. +PF13035 Protein of unknown function (DUF3896)
Pfam-B_1603 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13036 Protein of unknown function (DUF3897)
This is a bacterial family of uncharacterised proteins. Some of the proteins in this family are annotated as putative lipoproteins.. +PF13037 Domain of unknown function (DUF3898)
Pfam-B_1179 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. There are two conserved sequence motifs: DFG and FEKG.. +PF13038 Domain of unknown function (DUF3899)
Pfam-B_1174 (release 24.0). Putative Tryptophanyl-tRNA synthetase.. +PF13039 Protein of unknown function (DUF3900)
Pfam-B_1279 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 360 amino acids in length.. +PF13040 Protein of unknown function (DUF3901)
Pfam-B_1316 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a single completely conserved residue L that may be functionally important.. +PF13041 PPR repeat family
This repeat has no known function. It is about 35 amino acids long and is found in up to 18 copies in some proteins.\. The family appears to be greatly expanded in plants and fungi. The repeat has been called PPR .. +PF13042 Protein of unknown function (DUF3902)
Pfam-B_1357 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. There is a conserved LGI sequence motif.. +PF13043 Domain of unknown function (DUF3903)
Pfam-B_1600 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length.. +PF13044 Protein of unknown function (DUF3904)
Pfam-B_1386 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in viruses. Proteins in this family are typically between 437 and 448 amino acids in length.. +PF13045 Protein of unknown function (DUF3905)
Pfam-B_1447 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length.. +PF13046 Protein of unknown function (DUF3906)
Pfam-B_1532 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved EKK sequence motif.. +PF13047 Protein of unknown function (DUF3907)
Pfam-B_1274 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 160 amino acids in length. There is a conserved AYTG sequence motif.. +PF13048 Protein of unknown function (DUF3908)
Pfam-B_1533 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 140 amino acids in length. There is a single completely conserved residue Y that may be functionally important.. +PF13049 Protein of unknown function (DUF3910)
Pfam-B_1539 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length.. +PF13050 Protein of unknown function (DUF3911)
Pfam-B_1540 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13051 Protein of unknown function (DUF3912)
Pfam-B_1615 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13052 Protein of unknown function (DUF3913)
Pfam-B_1619 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13053 Protein of unknown function (DUF3914)
Pfam-B_1562 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two conserved sequence motifs: KFDIR and DLW.. +PF13054 Protein of unknown function (DUF3915)
Pfam-B_1549 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length.. +PF13055 Protein of unknown function (DUF3917)
Pfam-B_1608 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length.. +PF13056 Protein of unknown function (DUF3918)
Pfam-B_1567 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two completely conserved residues (G and R) that may be functionally important.. +PF13057 Protein of unknown function (DUF3919)
Pfam-B_1479 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 251 and 262 amino acids in length. There is a conserved YLNG sequence motif.. +PF13058 Protein of unknown function (DUF3920)
Pfam-B_1595 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length.. +PF13059 Protein of unknown function (DUF3992)
Pfam-B_1628 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 87 and 98 amino acids in length.. +PF13060 Protein of unknown function (DUF3921)
Pfam-B_1624 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13061 Protein of unknown function (DUF3923)
Pfam-B_1586 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13062 Protein of unknown function (DUF3924)
Pfam-B_1601 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13063 Protein of unknown function (DUF3925)
Pfam-B_1644 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length.. +PF13064 Protein of unknown function (DUF3927)
Pfam-B_1668 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 50 amino acids in length. There is a conserved SVL sequence motif. There is a single completely conserved residue D that may be functionally important.. +PF13065 Protein of unknown function (DUF3928)
Pfam-B_1675 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length.. +PF13066 Protein of unknown function (DUF3929)
Pfam-B_1716 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length.. +PF13067 Protein of unknown function (DUF3930)
Pfam-B_1721 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 51 and 67 amino acids in length.. +PF13068 Protein of unknown function (DUF3932)
Pfam-B_1731 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13069 Protein of unknown function (DUF3933)
Pfam-B_1720 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13070 Protein of unknown function (DUF3934)
Pfam-B_1719 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two conserved sequence motifs: GTG and SKG.. +PF13071 Protein of unknown function (DUF3935)
Pfam-B_1715 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two conserved sequence motifs: FVF and LGV.. +PF13072 Protein of unknown function (DUF3936)
Pfam-B_1705 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved GKAW sequence motif. There is a single completely conserved residue G that may be functionally important.. +PF13073 Protein of unknown function (DUF3937)
Pfam-B_1711 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13074 Protein of unknown function (DUF3938)
Pfam-B_1607 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length.. +PF13075 Protein of unknown function (DUF3939)
Pfam-B_1535 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length.. +PF13076 Protein of unknown function (DUF3940)
Pfam-B_1673 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 50 amino acids in length.. +PF13077 Protein of unknown function (DUF3909)
Pfam-B_1537 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length.. +PF13078 Protein of unknown function (DUF3942)
Pfam-B_1722 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length.. +PF13079 Protein of unknown function (DUF3916)
Pfam-B_1564 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. There is a single completely conserved residue S that may be functionally important.. +PF13080 Protein of unknown function (DUF3926)
Pfam-B_1663 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 46 and 63 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF13081 Domain of unknown function (DUF3941)
Pfam-B_1728 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 30 amino acids in length. There is a conserved YSK sequence motif.. +PF13082 Protein of unknown function (DUF3931)
Pfam-B_1734 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13083 KH domain
+PF13084 Domain of unknown function (DUF3943)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length.. +PF13085 2Fe-2S iron-sulfur cluster binding domain
The 2Fe-2S ferredoxin family have a general core structure consisting of beta(2)-alpha-beta(2) which abeta-grasp type fold. The domain is around one hundred amino acids with four conserved cysteine residues to which the 2Fe-2S cluster is ligated.. +PF13086 AAA domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins.. +PF13087 AAA domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins.. +PF13088 BNR repeat-like domain
This family of proteins contains BNR-like repeats suggesting these proteins may act as sialidases.. +PF13089 Polyphosphate kinase N-terminal domain
Splitting PF02503 into domains. Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules.. +PF13090 Polyphosphate kinase C-terminal domain
Splitting PF02503 into domains. Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules. This C-terminal domain has a structure similar to phospholipase D.. +PF13091 PLD-like domain
+PF13092 Kinetochore complex Sim4 subunit Fta1
CENP-L is one of the components that assembles onto the CENP-A-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC . Fta1 is the equivalent component of the fission yeast Sim4 complex . The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals.. +PF13093 CENP-U;
Kinetochore complex Fta4 of Sim4 subunit, or CENP-50. Fission yeast has three kinetochore protein complexes. Two complexes, Sim4 and Ndc80-MIND-Spc7 (NMS), are constitutive components, whereas the third complex, DASH, is transiently associated with kinetochores only in mitosis and is required for precise chromosome segregation. The Sim4 complex functions as a loading dock for the DASH complex. Sim4 consists of a number of different proteins including Ftas 1-7 and Dad1 .. +PF13094 CENP-Q, a CENPA-CAD centromere complex subunit
CENP-Q is one of the components that assembles onto the CENPA-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENPA nucleosomes directly recruit a proximal CENPA-nucleosome-associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENPA NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENPA-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC . Fta7 is the equivalent component of the fission yeast Sim4 complex .. +PF13095 Kinetochore Sim4 complex subunit FTA2
Fission yeast has three kinetochore protein complexes. Two complexes, Sim4 and Ndc80-MIND-Spc7 (NMS), are constitutive components, whereas the third complex, DASH, is transiently associated with kinetochores only in mitosis and is required for precise chromosome segregation. The Sim4 complex functions as a loading dock for the DASH complex. Sim4 consists of a number of different proteins including Ftas 1-7 and Dad1 . The equivalent higher eukaryotic protein is CENP-P. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC .. +PF13096 ShortName;
CENP-A-nucleosome distal (CAD) centromere subunit, CENP-P. CENP-P is one of the components that assembles onto the CENP-A-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC . Fta7 is the equivalent component of the fission yeast Sim4 complex .. +PF13097 CENP-A nucleosome associated complex (NAC) subunit
CENP-U is one of the components that assembles onto the CENP-A-nucleosome associated complex (NAC). The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC . FTA4 is the equivalent component of the fission yeast Sim4 complex. . +PF13098 Thioredoxin-like domain
+PF13099 Domain of unknown function (DUF3944)
This short domain is sometimes found N terminal to Pfam:PF03981.. +PF13100 OstA-like protein
This is a family of OstA-like proteins that are related to Pfam:PF03968.. +PF13101 Protein of unknown function (DUF3945)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences . This is a C-terminal repeated region.. +PF13102 DUF3946;
Phage integrase SAM-like domain. JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences . This family appears related to the N-terminal domain of phage integrases.. +PF13103 TonB C terminal
This family contains TonB members that are not captured by Pfam:PF03544.. +PF13104 Protein of unknown function (DUF3956)
Pfam-B_1228 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length.. +PF13105 Protein of unknown function (DUF3959)
Pfam-B_1424 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 260 amino acids in length.. +PF13106 Domain of unknown function (DUF3961)
Pfam-B_1483 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 40 amino acids in length.. +PF13107 Protein of unknown function (DUF3964)
Pfam-B_1516 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two conserved sequence motifs: FYF and AFW.. +PF13108 Protein of unknown function (DUF3969)
Pfam-B_1576 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length.. +PF13109 AsmA-like C-terminal region
Jackhammer-O25308 (H pylori). This family is similar to the C-terminal of the AsmA protein of E. coli.. +PF13110 Protein of unknown function (DUF3966)
Pfam-B_1525 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 86 amino acids in length.. +PF13111 Protein of unknown function (DUF3962)
Pfam-B_1505 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 233 and 796 amino acids in length. There is a conserved FSY sequence motif.. +PF13112 Protein of unknown function (DUF3965)
Pfam-B_1524 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 380 amino acids in length.. +PF13113 Protein of unknown function (DUF3970)
Pfam-B_1596 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved NPKY sequence motif.. +PF13114 RecO N terminal
This entry contains members that are not captured by Pfam:PF11967.. +PF13115 YtkA-like
+PF13116 Protein of unknown function
Jackhammer_O25308 (H.pylori). Some members of this family are related to the AsmA family proteins.. +PF13117 Cag pathogenicity island protein Cag12
This is a Proteobacterial family of Cag pathogenicity island proteins.. +PF13118 Protein of unknown function (DUF3972)
This is a Proteobacterial family of unknown function. Some of the proteins in this family are annotated as being kinesin-like proteins.. +PF13119 Domain of unknown function (DUF3973)
Pfam-B_1636 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved YCI sequence motif.. +PF13120 Domain of unknown function (DUF3974)
Pfam-B_1643 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length.. +PF13121 Domain of unknown function (DUF3976)
Pfam-B_1743 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length.. +PF13122 Protein of unknown function (DUF3977)
Pfam-B_1744 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13123 Protein of unknown function (DUF3978)
Pfam-B_1745 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length.. +PF13124 Protein of unknown function (DUF3963)
Pfam-B_1512 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 85 amino acids in length. There is a conserved DIQKW sequence motif.. +PF13125 Protein of unknown function (DUF3958)
Pfam-B_1404 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. There are two conserved sequence motifs: RLF and TWH.. +PF13126 Protein of unknown function (DUF3975)
Pfam-B_1736 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length.. +PF13127 Protein of unknown function (DUF3955)
Pfam-B_966 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 68 and 87 amino acids in length. There are two completely conserved residues (G and E) that may be functionally important.. +PF13128 Protein of unknown function (DUF3954)
Pfam-B_934 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 60 amino acids in length.. +PF13129 Protein of unknown function (DUF3953)
Pfam-B_875 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 47 and 76 amino acids in length.. +PF13130 Domain of unknown function (DUF3952)
Pfam-B_704 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. There is a conserved VMSAS sequence motif.. +PF13131 Protein of unknown function (DUF3951)
Pfam-B_698 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 56 and 71 amino acids in length. There is a conserved YTP sequence motif.. +PF13132 Domain of unknown function (DUF3950)
Pfam-B_688 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 30 amino acids in length. There is a conserved NFS sequence motif.. +PF13133 Protein of unknown function (DUF3949)
Pfam-B_636 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 69 and 87 amino acids in length.. +PF13134 Protein of unknown function (DUF3948)
Pfam-B_550 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length.. +PF13135 Protein of unknown function (DUF3947)
Pfam-B_493 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13136 Protein of unknown function (DUF3984)
Pfam-B_3236 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 393 and 442 amino acids in length.. +PF13137 Protein of unknown function (DUF3983)
Pfam-B_2658 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 40 amino acids in length. There is a conserved AWRN sequence motif.. +PF13138 Protein of unknown function (DUF3982)
Pfam-B_2022 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 47 and 73 amino acids in length. There are two conserved sequence motifs: EKL and EIP.. +PF13139 Domain of unknown function (DUF3981)
Pfam-B_1754 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length.. +PF13140 Domain of unknown function (DUF3980)
Pfam-B_1748 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length.. +PF13141 Protein of unknown function (DUF3979)
Pfam-B_1747 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length.. +PF13142 Domain of unknown function (DUF3960)
Pfam-B_1431 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 72 and 89 amino acids in length.. +PF13143 Protein of unknown function (DUF3986)
Pfam-B_362 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length.. +PF13144 SAF-like
The members of this family are similar to those in the SAF family, and include flagellar basal-body proteins and pilus-assembly proteins.. +PF13145 PPIC-type PPIASE domain
+PF13146 TRL-like protein family
This family includes the Swiss:O87326 TRL protein that is found in a locus that includes several tRNAs. The function of this protein is not known . The proteins in this family usually have a lipoprotein attachment site at their N-terminus.. +PF13147 Amidohydrolase
This family of enzymes are a part of a large metal dependent hydrolase superfamily . The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source . This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit . Dihydroorotases (EC:3.5.2.3) are also included [4-5].. +PF13148 Protein of unknown function (DUF3987)
JCSG - Joint Center for Structural Genomics. A family of uncharacterised proteins found by clustering human gut metagenomic sequences .. +PF13149 Protein of unknown function (DUF3988)
JCSG - Joint Center for Structural Genomics. A family of uncharacterised proteins found by clustering human gut metagenomic sequences .. +PF13150 Protein of unknown function (DUF3989)
JCSG - Joint Center for Structural Genomics. A family of uncharacterised proteins found by clustering human gut metagenomic sequences .. +PF13151 Protein of unknown function (DUF3990)
JCSG - Joint Center for Structural Genomics. A family of uncharacterised proteins found by clustering human gut metagenomic sequences .. +PF13152 Protein of unknown function (DUF3967)
Pfam-B_1529 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 173 and 249 amino acids in length.. +PF13153 Protein of unknown function (DUF3985)
Pfam-B_3329 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length.. +PF13154 Protein of unknown function (DUF3991)
This family of proteins is often associated with family Toprim, Pfam:PF01751.. +PF13155 Toprim-like
This is a family or Toprim-like proteins.. +PF13156 Restriction endonuclease
Prokaryotic family found in type II restriction enzymes containing the hallmark (D/E)-(D/E)XK active site. Presence of catalytic residues implicates this region in the enzymatic cleavage of DNA [1,2]. +PF13157 Protein of unknown function (DUF3992)
Pfam-B_480 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 98 and 122 amino acids in length. There is a single completely conserved residue T that may be functionally important.. +PF13158 Protein of unknown function (DUF3993)
Pfam-B_782 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 160 amino acids in length.. +PF13159 Domain of unknown function (DUF3994)
Pfam-B_903 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 97 and 111 amino acids in length.. +PF13160 Protein of unknown function (DUF3995)
Pfam-B_958 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 138 and 149 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important.. +PF13161 Protein of unknown function (DUF3996)
Pfam-B_998 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 172 and 203 amino acids in length.. +PF13162 Protein of unknown function (DUF3997)
Pfam-B_1597 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length.. +PF13163 Protein of unknown function (DUF3999)
Pfam-B_2128 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 440 and 470 amino acids in length. There is a single completely conserved residue D that may be functionally important.. +PF13164 Protein of unknown function (DUF4002)
Pfam-B_3350 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 112 and 125 amino acids in length. There are two completely conserved C residues that may be functionally important.. +PF13165 Protein of unknown function (DUF4001)
Pfam-B_3337 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are at least two pairs of cysteine residues in this short family of proteins.. +PF13166 AAA domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. This family includes the PrrC protein that is thought to be the active component of the anticodon nuclease .. +PF13167 GTP-binding GTPase N-terminal
This is the N-terminal region of GTP-binding HflX-like proteins. The full-length members bind and interact with the 50S ribosome and are GTPases, hydrolysing GTP/GDP/ATP/ADP. This N-terminal region is necessary for stability of the whole protein.. +PF13168 Poxvirus B22R protein C-terminal
Pfam-B_3510 (release 7.3). This is the highly conserved C-terminal region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses.. +PF13169 Poxvirus B22R protein N-terminal
Pfam-B_3510 (release 7.3). This is the highly conserved N-terminal region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses.. +PF13170 Protein of unknown function (DUF4003)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 327 and 345 amino acids in length.. +PF13171 Protein of unknown function (DUF4004)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 210 amino acids in length.. +PF13172 PepSY-associated TM helix
This family represents a conserved TM helix found in bacteria and archaea.. +PF13173 AAA domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily.. +PF13174 Tetratricopeptide repeat
+PF13175 AAA ATPase domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily.. +PF13176 Tetratricopeptide repeat
+PF13177 DNA polymerase III, delta subunit
DNA polymerase III, delta subunit (EC 2.7.7.7) is required for, along with delta' subunit, the assembly of the processivity factor beta(2) onto primed DNA in the DNA polymerase III holoenzyme-catalysed reaction . The delta subunit is also known as HolA.. +PF13178 Protein of unknown function (DUF4005)
Pfam-B_2171 (release 24.0). This is a C-terminal region of plant IQ-containing putative calmodulin-binding proteins.. +PF13179 Family of unknown function (DUF4006)
This is a family of short, approx 65 residue-long, bacterial proteins of unknown function.. +PF13180 PDZ domain
+PF13181 Tetratricopeptide repeat
+PF13182 Protein of unknown function (DUF4007)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 284 and 326 amino acids in length. This domain is found associated with Pfam:PF01507 in some proteins, suggesting a functional link.. +PF13183 4Fe-4S dicluster domain
Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters.. +PF13184 NusA-like KH domain
+PF13185 GAF domain
+PF13186 DUF4008;
Iron-sulfur cluster-binding domain. This domain occurs as an additional C-terminal iron-sulfur cluster binding domain in many radical SAM domain, Pfam:PF04055 proteins. The domain occurs in a number of proteins that modify a protein to become an active enzyme, or a peptide to become a ribosomal natural product. The domain is named SPASM because it occurs in the maturases of Subilitosin, PQQ, Anaerobic Sulfatases, and Mycofactocin.. +PF13187 4Fe-4S dicluster domain
+PF13188 PAS domain
+PF13189 Cytidylate kinase-like family
This family includes enzymes related to cytidylate kinase.. +PF13190 PDGLE domain
This short presumed domain is usually found on its own. However, it is also found associated with Pfam:PF01891 suggesting it may have a role in cobalt uptake. The domain is named after a short motif found within many members of the family.. +PF13191 AAA ATPase domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily.. +PF13192 Thioredoxin domain
+PF13193 DUF4009;
AMP-binding enzyme C-terminal domain. Mistry J, Eberhardt R. This is a small domain that is found C terminal to Pfam:PF00501. It has a central beta sheet core that is flanked by alpha helices.. +PF13194 Domain of unknown function (DUF4010)
This is a family of putative membrane proteins found in archaea and bacteria.\. It is sometimes found C terminal to Pfam:PF02308.. +PF13195 Protein of unknown function (DUF4011)
This family of proteins is found in archaea and bacteria. Many members are annotated as being putative DNA helicase-related proteins.. +PF13196 Protein of unknown function (DUF4012)
This is a family of uncharacterised proteins found in archaea and bacteria.. +PF13197 Protein of unknown function (DUF4013)
This is a family of uncharacterised proteins that is found in archaea and bacteria.. +PF13198 Protein of unknown function (DUF4014)
Pfam-B_4873 (release 24.0). This is a bacterial and viral family of uncharacterised proteins.. +PF13199 Glycosyl hydrolase family 66
Pfam-B_3959 (Release 24.0). This family is a set of glycosyl hydrolase enzymes including cycloisomaltooligosaccharide glucanotransferase (EC:2.4.1.-) and dextranase (EC:3.2.1.11) activities.. +PF13200 Putative glycosyl hydrolase domain
Pfam-B_597 (Release 24.0). This domain is related to other known glycosyl hydrolases suggesting this domain is also involved in carbohydrate break down.. +PF13201 Putative glycoside hydrolase xylanase
This is a family of putative bacterial xylanases. Comparative structural data from TOPSAN indicates there to be a C-terminal carbohydrate binding domain similar to those of carbohydrate enzymes such as glucanase and xylanase. There is also structural similarity of the N-terminal domain, according to TOPSAN, to endo-1,4-beta-xylanase (from Streptomyces sviceus) and beta-xylosidase (from Magnetospirillum magnetotacticum MS-1). The N-terminal domain fold is an immunoglobulin-like beta-sandwich.. +PF13202 EF_hand_3;
+PF13203 Putative metallopeptidase domain
This domain, found in various hypothetical bacterial proteins, has no known function. However, it is related to Pfam:PF01435.. +PF13204 Hydrolase_6;
Protein of unknown function (DUF4038). A family of putative cellulases. . +PF13205 Bacterial Ig-like domain
+PF13206 Trypanosomal VSG domain
This family represents the B-type variant surface glycoproteins from trypanosomal parasites. This family is related to Pfam:PF00913.. +PF13207 AAA domain
+PF13208 DUF4016;
Anantharaman V, Aldam G, Mistry J. The TerB-N domain is found N terminus to TerB, and TerB-C containing proteins . It has a predominantly alpha-helical structure and contains an absolutely conserved glutamate . The presence of a conserved acidic residue suggests that it might chelate metal like TerB . These proteins occur in an two-gene operon containing an AAA+ ATPase and SF-II DNA helicase suggesting a role in stress stress response or phage defense .. +PF13209 Protein of unknown function (DUF4017)
Pfam-B_2009 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13210 Domain of unknown function (DUF4018)
Pfam-B_2010 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 190 amino acids in length.. +PF13211 Protein of unknown function (DUF4019)
Pfam-B_2012 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 130 and 183 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF13212 Domain of unknown function (DUF4020)
Pfam-B_2014 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 176 and 195 amino acids in length.. +PF13213 Protein of unknown function (DUF4021)
Pfam-B_2025 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved YGM sequence motif.. +PF13214 Protein of unknown function (DUF4022)
Pfam-B_2027 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 85 amino acids in length.. +PF13215 Protein of unknown function (DUF4023)
Pfam-B_2030 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved KLP sequence motif.. +PF13216 Protein of unknown function (DUF4024)
Pfam-B_2031 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved RDE sequence motif.. +PF13217 Protein of unknown function (DUF4025)
Pfam-B_2033 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved EGT sequence motif.. +PF13218 Protein of unknown function (DUF4026)
Pfam-B_2037 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 450 amino acids in length. The family is found in association with Pfam:PF10077.. +PF13219 Protein of unknown function (DUF4027)
Pfam-B_2038 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved CLGGF sequence motif.. +PF13220 Protein of unknown function (DUF4028)
Pfam-B_2040 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 67 and 93 amino acids in length. There are two conserved sequence motifs: IVKI and YVKKWF.. +PF13221 Protein of unknown function (DUF4029)
Pfam-B_2041 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 119 amino acids in length.. +PF13222 Protein of unknown function (DUF4030)
Pfam-B_2044 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 164 and 197 amino acids in length.. +PF13223 Protein of unknown function (DUF4031)
Pfam-B_2059 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 91 and 130 amino acids in length. There is a conserved HYD sequence motif.. +PF13224 Domain of unknown function (DUF4032)
Pfam-B_2062 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 170 amino acids in length. The family is found in association with Pfam:PF06293.. +PF13225 Domain of unknown function (DUF4033)
Pfam-B_2072 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 80 amino acids in length.. +PF13226 Domain of unknown function (DUF4034)
Pfam-B_2075 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 280 amino acids in length. There is a conserved PRW sequence motif.. +PF13227 Protein of unknown function (DUF4035)
Pfam-B_2076 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 67 and 93 amino acids in length.. +PF13228 Domain of unknown function (DUF4037)
Pfam-B_2110 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 100 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF13229 Right handed beta helix region
This region contains a parallel beta helix region that shares some similarity with Pectate lyases.. +PF13230 GATase_II;
Glutamine amidotransferases class-II. This family captures members that are not found in Pfam:PF00310.. +PF13231 Dolichyl-phosphate-mannose-protein mannosyltransferase
This family contains members that are not captured by Pfam:PF02366.. +PF13232 LYR-motif-like;
This is a family of proteins carrying the LYR motif of family Complex1_LYR, Pfam:PF05347, likely to be involved in Fe-S cluster biogenesis in mitochondria.. +PF13233 Complex1_LYR-like
This is a family of proteins carrying the LYR motif of family Complex1_LYR, Pfam:PF05347, likely to be involved in Fe-S cluster biogenesis in mitochondria.. +PF13234 rRNA-processing arch domain
Pfam-B_8473 (release 24). +PF13236 Clustered mitochondria
The CLU domain (CLUstered mitochondria) is a eukaryotic domain found in proteins from fungi, protozoa, plants to humans. It is required for correct functioning of the mitochondria and mitochondrial transport [1,2] although the exact function of the domain is unknown . In Dictyostelium the full-length protein is required for a very late step in fission of the outer mitochondrial membrane suggesting that mitochondria are transported along microtubules, as in mammalian cells, rather than along actin filaments, as in budding yeast . Disruption of the protein-impaired cytokinesis and caused mitochondria to cluster at the cell centre . It is likely that CLU functions in a novel pathway that positions mitochondria within the cell based on their physiological state. Disruption of the CLU pathway may enhance oxidative damage, alter gene expression, cause mitochondria to cluster at microtubule plus ends, and lead eventually to mitochondrial failure .. +PF13237 4Fe-4S dicluster domain
This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich.. +PF13238 AAA domain
+PF13239 2TM domain
This short region contains two transmembrane alpha helices that are found associated with a wide range of other domains. This domain may be involved in cell lysis or peptidoglycan turnover.. +PF13240 zinc-ribbon domain
This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR. Pfam:PF12773.. +PF13241 Hydrolase_like;
Putative NAD(P)-binding. This domain is found in fungi, plants, archaea and bacteria.. +PF13242 HAD-hyrolase-like
+PF13243 Prenyltransferase-like
+PF13244 Domain of unknown function (DUF4040)
+PF13245 Part of AAA domain
+PF13246 Putative hydrolase of sodium-potassium ATPase alpha subunit
This is a putative hydrolase of the sodium-potassium ATPase alpha subunit.. +PF13247 4Fe-4S dicluster domain
Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters.. +PF13248 zinc-ribbon domain
This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR. Pfam:PF12773.. +PF13249 Prenyltransferase-like
+PF13250 Domain of unknown function (DUF4041)
Pfam-B_2162 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and viruses, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF10544.. +PF13251 Domain of unknown function (DUF4042)
Pfam-B_2172 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 180 amino acids in length.. +PF13252 Protein of unknown function (DUF4043)
Pfam-B_2174 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 369 and 424 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF13253 Protein of unknown function (DUF4044)
Pfam-B_2177 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 56 amino acids in length. There is a single completely conserved residue M that may be functionally important.. +PF13254 Domain of unknown function (DUF4045)
Pfam-B_2180 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is typically between 384 and 430 amino acids in length.. +PF13255 Protein of unknown function (DUF4046)
Pfam-B_2182 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 64 and 331 amino acids in length.. +PF13256 Domain of unknown function (DUF4047)
Pfam-B_2183 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. There are two conserved sequence motifs: TEA and FPKT.. +PF13257 Domain of unknown function (DUF4048)
Pfam-B_2186 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is typically between 228 and 257 amino acids in length.. +PF13258 Domain of unknown function (DUF4049)
Pfam-B_2191 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 310 and 324 amino acids in length.. +PF13259 Protein of unknown function (DUF4050)
Pfam-B_2193 (release 24.0). This family of proteins is functionally uncharacterized. This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 173 amino acids in length. There are two conserved sequence motifs: IPL and FLVD.. +PF13260 Protein of unknown function (DUF4051)
Pfam-B_2194 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13261 Protein of unknown function (DUF4052)
Pfam-B_2197 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 220 amino acids in length.. +PF13262 Protein of unknown function (DUF4054)
Pfam-B_2204 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 120 and 152 amino acids in length.. +PF13263 PHP-associated
This is a subunit, probably the alpha, of bacterial and eukaryotic DNA polymerase III, associated with the PHP domain, Pfam:PF02811.. +PF13264 Domain of unknown function (DUF4055)
Pfam-B_2501 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 140 amino acids in length.. +PF13265 Protein of unknown function (DUF4056)
Pfam-B_2502 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 355 and 380 amino acids in length.. +PF13266 Protein of unknown function (DUF4057)
Pfam-B_2503 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 279 and 322 amino acids in length.. +PF13267 Protein of unknown function (DUF4058)
Pfam-B_2520 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 244 and 264 amino acids in length.. +PF13268 Protein of unknown function (DUF4059)
Pfam-B_2521 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved DKT sequence motif.. +PF13269 Protein of unknown function (DUF4060)
Pfam-B_2524 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There are two conserved sequence motifs: VEVV and SYVAT.. +PF13270 Domain of unknown function (DUF4061)
Pfam-B_2526 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 90 amino acids in length. There is a conserved AFG sequence motif.. +PF13271 Domain of unknown function (DUF4062)
Pfam-B_2536 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 80 amino acids in length. There is a conserved SST sequence motif.. +PF13272 Protein of unknown function (DUF4063)
Pfam-B_3026 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 95 and 123 amino acids in length. There is a conserved RRA sequence motif.. +PF13273 Protein of unknown function (DUF4064)
+PF13274 Protein of unknown function (DUF4065)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and viruses. Proteins in this family are typically between 155 and 202 amino acids in length.. +PF13275 S4 domain
The S4 domain is a small domain consisting of 60-65 amino acid residues that was detected in the bacterial ribosomal protein S4.. +PF13276 HTH-like domain
This domain contains a predicted helix-turn-helix suggesting a DNA-binding function.. +PF13277 YmdB-like protein
This family of putative phosphoesterases contains the B. subtilis protein YmdB Swiss:O31775.. +PF13278 Putative amidotransferase
This domain contains similarities to other amidotransferase families such as Pfam:PF00117. Some members of the family lack the likely catalytic residues.. +PF13279 Thioesterase-like superfamily
This family contains a wide variety of enzymes, principally thioesterases. These enzymes are part of the Hotdog fold superfamily .. +PF13280 WYL domain
This presumed domain is around 170 amino acids in length.\. It is found to the C-terminus of a DNA-binding helix-turn-helix domain. This domain may be involved in binding to an as yet unknown ligand that allows a transcriptional regulation response to that molecule. There are a number of proteins that contain two tandem copies of this domain such as Swiss:Q47P13. This suggests that this domain may form a dimeric arrangement.. +PF13281 Domain of unknown function (DUF4071)
Pfam-B_2008 (release 24.0). This domain is found at the N-terminus of many serine-threonine kinase-like proteins.. +PF13282 Domain of unknown function (DUF4070)
Pfam-B_2005 (release 24.0). This is a bacterial domain often found at the C-terminus of Radical_SAM methylases.. +PF13283 Bacteriophage N adsorption protein A C-term
Pfam-B_2015 (release 24.0). The function of this domain is unknown but it is found at the C-terminus of bacteriophage N4 adsorption protein A, in association with an N-terminal region of TPR repeats.. +PF13284 Domain of unknown function (DUF4072)
Pfam-B_2021 (release 24.0). This short domain is normally found at the very N-terminus of Hyrdrolases Pfam:PF00702.. +PF13285 Domain of unknown function (DUF4073)
Pfam-B_2039 (release 24.0). This family is frequently found at the C-terminus of bacterial proteins carrying the family, Metallophos Pfam:PF00149.. +PF13286 Phosphohydrolase-associated domain
Pfam-B_2016 (release 24.0). This domain is found on bacterial and archaeal metal-dependent phosphohydrolases.. +PF13287 Fn3 associated
+PF13288 DXP reductoisomerase C-terminal domain
This is the C-terminal domain of the 1-deoxy-D-xylulose-5-phosphate reductoisomerase enzyme. This domain forms a left handed super-helix.. +PF13289 SIR2-like domain
This family of proteins are related to the sirtuins.. +PF13290 Chitobiase/beta-hexosaminidase C-terminal domain
+PF13291 ACT domain
ACT domains bind to amino acids and regulate associated enzyme domains. These ACT domains are found at the C-terminus of the RelA protein.. +PF13292 1-deoxy-D-xylulose-5-phosphate synthase
This family contains 1-deoxyxylulose-5-phosphate synthase (DXP synthase), an enzyme which catalyses the thiamine pyrophosphoate-dependent acyloin condensation reaction between carbon atoms 2 and 3 of pyruvate and glyceraldehyde 3-phosphate, to yield 1-deoxy-D- xylulose-5-phosphate, a precursor in the biosynthetic pathway to isoprenoids, thiamine (vitamin B1), and pyridoxol (vitamin B6).. +PF13293 Domain of unknown function (DUF4074)
Pfam-B_2050 (release 24.0). This family is found at the C-terminal of Homeobox proteins in Metazoa.. +PF13294 Domain of unknown function (DUF4075)
Pfam-B_2028 (release 24.0). The members of this family are putative mature parasite-infected erythrocyte surface antigen protein from Bacillus spp.. +PF13295 Domain of unknown function (DUF4077)
Pfam-B_2043 (release 24.0). This is the N-terminal region of methyl-accepting chemotaxis proteins from Bacillus spp. The function is not known.. +PF13296 Putative type VI secretion system Rhs element Vgr
Pfam-B_2052 (release 24.0). This is a family of putative type VI secretion system Rhs element Vgr proteins from Proteobacteria.. +PF13297 DUF4076; Sde2_C;
Telomere stability C-terminal. Pfam-B_2053 (release 24.0). This short C-terminal domain is found in higher eukaryotes further downstream from the Sde2 family, Pfam:PF13019. It is found in all Sde2-related proteins except those from fission yeast, fly, and mosquito. Its exact function in telomere formation and maintenance has not yet been established.. +PF13298 DNA polymerase Ligase (LigD)
This is the N terminal region of ATP dependant DNA ligase.. +PF13299 Cleavage and polyadenylation factor 2 C-terminal
Pfam-B_2065 (release 24.0). This family lies at the C-terminus of many fungal and plant cleavage and polyadenylation specificity factor subunit 2 proteins. The exact function of the domain is not known, but is likely to function as a binding domain for the protein within the overall CPSF complex .. +PF13300 Domain of unknown function (DUF4078)
Pfam-B_3305 (release 24.0). This family is found from fungi to humans, but its exact function is not known.. +PF13301 Protein of unknown function (DUF4079)
This is an uncharacterised family of proteins.. +PF13302 Acetyltransferase (GNAT) domain
This domain catalyses N-acetyltransferase reactions.. +PF13303 Phosphotransferase system, EIIC
The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The sugar-specific permease of the PTS consists of three domains (IIA, IIB and IIC). The IIC domain catalyses the transfer of a phosphoryl group from IIB to the sugar substrate.. +PF13304 AAA domain
+PF13305 WHG domain
This presumed domain is around 80 amino acids in length. It is found to the C-terminus of a DNA-binding helix-turn-helix domain. This domain may be involved in binding to an as yet unknown ligand that allows a transcriptional regulation response to that molecule. The domain is named WHG after three conserved residues near the C-terminus of the domain.. +PF13306 Leucine rich repeats (6 copies)
This family includes a number of leucine rich repeats. This family contains a large number of BSPA-like surface antigens from Trichomonas vaginalis.. +PF13307 Helicase C-terminal domain
This domain is the second of two tandem AAA domains found in a wide variety of helicase enzymes.. +PF13308 YARHG domain
This presumed extracellular domain is about 70 amino acids in length. It is named YARHG after a conserved motif in the sequence. This domain is associated with peptidases and bacterial kinase proteins. Its molecular function is unknown.. +PF13309 HTH domain
This domain is a helix-turn-helix domain that is likely to act as a DNA-binding domain.. +PF13310 Virulence protein RhuM family
There are currently no experimental data for members of this group or their homologues. However, these proteins are implicated in virulence/pathogenicity because RhuM is encoded in the SPI-3 pathogenicity island in Salmonella typhimurium [1-2].. +PF13311 Protein of unknown function (DUF4080)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences .. +PF13312 Domain of unknown function (DUF4081)
Pfam-B_2088 (release 24.0). This domain is often found N-terminal to the GNAT acetyltransferase domain, Pfam:PF00583 and FR47, Pfam:PF08445.. +PF13313 Domain of unknown function (DUF4082)
Pfam-B_2054 (release 24.0). This family appears to be a parallel beta-helix repeated region that sits between successive Cadherin domains, Pfam:PF00028.. +PF13314 Domain of unknown function (DUF4083)
Pfam-B_2061 (release 24.0). This is a family of very short, approximately 60 residue, proteins from Firmicutes, that are all putatively annotated as being MutT/Nudix. However, the characteristic Nudix motif of GX(5)EX(7)REUXEE is absent.. +PF13315 Protein of unknown function (DUF4085)
Pfam-B_2570 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 101 and 269 amino acids in length.. +PF13316 Protein of unknown function (DUF4087)
Pfam-B_3066 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 140 and 280 amino acids in length. There is a conserved RCGW sequence motif.. +PF13317 Protein of unknown function (DUF4088)
Pfam-B_3345 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 258 and 300 amino acids in length.. +PF13318 Protein of unknown function (DUF4089)
Pfam-B_3700 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF13319 Protein of unknown function (DUF4090)
Pfam-B_3702 (release 24.0). This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length.. +PF13320 Domain of unknown function (DUF4091)
Pfam-B_3704 (release 24.0). This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 70 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF13321 Domain of unknown function (DUF4084)
Pfam-B_2026 (release 24.0). This family of Firmicute proteins is frequently associated with the EAL, GGDEF and PAS families, Pfam:PF00563, Pfam:PF00990, and Pfam:PF00989. The exact function is not known.. +PF13322 Domain of unknown function (DUF4092)
Pfam-B_2068 (release 24.0). This family is found in Proteobacteria. The function is not known.. +PF13323 N-terminal domain with HPIH motif
Pfam-B_2165 (release 24.0). This family is found in fungi on proteins carrying the PAS, Pfam:PF00989, domain. There is a well-conserved characteristic HPIH motif, but the function is not known.. +PF13324 Grap2 and cyclin-D-interacting
Pfam-B_2169 (release 24.0). GCIP, or Grap2 and cyclin-D-interacting protein, is found in eukaryotes, and in the protein Swiss:O95273, residues 149-190 constitute a helix-loop-helix domain, residues 190-240 an acidic region, and 240-261 a leucine zipper domain. GCIP interacts with full-length Grap2 protein and with the COOH-terminal unique and SH3 domains (designated QC domain) of Grap2. It is potentially involved in the regulation of cell differentiation and proliferation through Grap2 and cyclin D-mediated signalling pathways . In mice, it is involved in G1/S-phase progression of hepatocytes, which in older animals is associated with the development of liver tumours. In vitro it acts as an inhibitory HLH protein, for example, blocking transcription of the HNF-4 promoter. In its function as a cyclin D1-binding protein it is able to reduce CDK4-mediated phosphorylation of the retinoblastoma protein and to inhibit E2F-mediated transcriptional activity . GCIP has also been shown to have interact physically with Rad (Ras associated with diabetes), Rad being important in regulating cellular senescence .. +PF13325 N-terminal region of micro-spherule protein
Pfam-B_2099 (release 24.0). This domain is found in plants and higher eukaryotes, and is the N-terminal region of micro-spherule proteins which repress the transactivation activities of Nrf1 (p45 nuclear factor-erythroid 2 (p45 NF-E2)-related factor 1) . In conjunction with DIPA the full-length protein acts as a transcription repressor . The exact function of the region is not known.. +PF13326 Photosystem II Pbs27
This family of proteins contains Pbs27, a highly conserved component of photosystem II. Pbs27 is comprised of four helices arranged in a right handed up-down-up-down fold, with a less ordered region located at the N-terminus .. +PF13327 Type III secretion system subunit
Pfam-B_2123 (release 24.0). This is a family of bacterial putative type III secretion apparatus proteins associated with the locus of enterocyte effacement (LEE).. +PF13328 HD domain
HD domains are metal dependent phosphohydrolases.. +PF13329 Autophagy-related protein 2 CAD motif
Pfam-B_2170 (release 24.0). The Atg2 protein, an integral membrane protein, is required for a range of functions including the regulation of autophagy in conjunction with the Atg1-Atg13 complex. Atg2 binds Atg9. The precise function of this region, with its characteristic highly conserved CAD sequence motif, is not known.. +PF13330 Mucin-2 protein WxxW repeating region
Pfam-B_2205 (release 24.0). This family is repeating region found on mucins 2 and 5. The function is not known, but the repeat can be present in up to 32 copies, as in Swiss:C3Y5K5, from Branchiostoma floridae. The region carries a highly conserved WxxW sequence motif and also has at least six well conserved cysteine residues.. +PF13331 Domain of unknown function (DUF4093)
This domain lies at the C-terminus of primase proteins carrying the TOPRIM, Pfam:PF01751, domain. The exact function of the domain is not known.. +PF13332 Haemagluttinin repeat
+PF13333 Integrase core domain
+PF13334 Domain of unknown function (DUF4094)
Pfam-B_2504 (release 24.0). This domain is found in plant proteins that often carry a galactosyltransferase domain, Pfam:PF01762, at their C-terminus.. +PF13335 Magnesium chelatase, subunit ChlI
This is a family of putative bacterial magnesium chelatase subunit ChlI proteins. The domain lacks the P-loop region present at the N-terminal of Mg_chelatase, Pfam:PF01078.. +PF13336 Acetyl-CoA hydrolase/transferase C-terminal domain
This family contains several enzymes which take part in pathways involving acetyl-CoA. Acetyl-CoA hydrolase EC:3.1.2.1 (Swiss:P32316) catalyses the formation of acetate from acetyl-CoA, CoA transferase (CAT1) EC:2.8.3.- (Swiss:P38946) produces succinyl-CoA, and acetate-CoA transferase EC:2.8.3.8 (Swiss:Q59323) utilises acyl-CoA and acetate to form acetyl-CoA.. +PF13337 Putative ATP-dependent Lon protease
This is a family of proteins that are annotated as ATP-dependent Lon proteases.. +PF13338 Domain of unknown function (DUF4095)
+PF13339 Apoptosis antagonizing transcription factor
Pfam-B_2199 (release 24.0). The N-terminal and leucine-zipper region of the apoptosis antagonizing transcription factor-Che1.. +PF13340 Putative transposase of IS4/5 family (DUF4096)
+PF13341 RAG2 PHD domain
This domain is found at the C-terminus of the RAG2 protein. The structure of this domain has been shown bound to histone H3 trimethylated at lysine 4 (H3K4me3) .. +PF13342 C-terminal repeat of topoisomerase
+PF13343 Bacterial extracellular solute-binding protein
This family includes bacterial extracellular solute-binding proteins.. +PF13344 Haloacid dehalogenase-like hydrolase
This family is part of the HAD superfamily.. +PF13345 Domain of unknown function (DUF4098)
This domain is a C-terminal repeat found in many bacterial species.. +PF13346 ABC-2 family transporter protein
This family is related to the ABC-2 membrane transporter family Pfam:PF01061 .. +PF13347 MFS/sugar transport protein
This family is part of the major facilitator superfamily of membrane transport proteins.. +PF13348 Tyrosine phosphatase family C-terminal region
+PF13349 Domain of unknown function (DUF4097)
+PF13350 Tyrosine phosphatase family
This family is closely related to the Pfam:PF00102 and Pfam:PF00782 families.. +PF13351 Protein of unknown function (DUF4099)
JCSG - Joint Center for Structural Genomics. A family of uncharacterized proteins found by clustering human gut metagenomic sequences . The C-terminal repeat region of this family is DUF4098, Pfam:PF13345.. +PF13352 Protein of unknown function (DUF4100)
This is a family of uncharacterised proteins found in Physcomitrella.. +PF13353 4Fe-4S single cluster domain
This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich.. +PF13354 Beta-lactamase enzyme family
This family is closely related to Beta-lactamase, Pfam:PF00144, the serine beta-lactamase-like superfamily, which contains the distantly related Pfam:PF00905 and PF00768 D-alanyl-D-alanine carboxypeptidase.. +PF13355 Protein of unknown function (DUF4101)
This is a family of uncharacterised proteins, and is sometimes found in combination with Pfam:PF00226.. +PF13356 Domain of unknown function (DUF4102)
This presumed domain is found at the N-terminus of a wide variety of phage integrase proteins.. +PF13358 DDE superfamily endonuclease
This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction.. +PF13359 DDE_4;
DDE superfamily endonuclease. This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction.. +PF13360 PQQ-like domain
This domain contains several repeats of the PQQ repeat.. +PF13361 UvrD-like helicase C-terminal domain
This domain is found at the C-terminus of a wide variety of helicase enzymes. This domain has a AAA-like structural fold.. +PF13362 Toprim domain
The toprim domain is found in a wide variety of enzymes involved in nucleic acid manipulation .. +PF13363 Beta-galactosidase, domain 3
This is the second domain of the five-domain beta-galactosidase enzyme that altogether catalyses the hydrolysis of beta(1-3) and beta(1-4) galactosyl bonds in oligosaccharides as well as the inverse reaction of enzymatic condensation and trans-glycosylation. This domain has an Ig-like fold .. +PF13364 Beta-galactosidase jelly roll domain
This domain is found in beta galactosidase enzymes. It has a jelly roll fold .. +PF13365 Trypsin-like peptidase domain
This family includes trypsin like peptidase domains.. +PF13366 PD-(D/E)XK nuclease superfamily
Members of this family belong to the PD-(D/E)XK nuclease superfamily. +PF13367 Protease prsW family
This is a family of putative peptidases, possibly belonging to the MEROPS M79 family. Swiss:B7GHM8, PrsW, appears to be a member of a widespread family of membrane proteins that includes at least one previously known protease. PrsW appears to be responsible for Site-1 cleavage of the RsiW anti-sigma factor, the cognate anti-sigma factor, and it senses antimicrobial peptides that damage the cell membrane and other agents that cause cell envelope stress, The three acidic residues, E75, E76 and E95 in Swiss:B7GHM8, appear to be crucial since their mutation to alanine renders the protein inactive. Based on predictions of the bioinformatics programme TMHMM it is likely that these residues are located on the extracytoplasmic face of PrsW placing them in a position to act as a sensor for cell envelope stress .. +PF13368 Topoisomerase C-terminal repeat
This domain is repeated up to five times to form the C-terminal region of bacterial topoisomerase immediately downstream of the zinc-finger motif.. +PF13369 Transglutaminase-like superfamily
+PF13370 4Fe-4S single cluster domain
+PF13371 Tetratricopeptide repeat
+PF13372 DUF4104;
Coggill P, Eberhardt R. This domain forms an 18-stranded beta-barrel pore which is likely to act as an alginate export channel .. +PF13373 DUF2407 C-terminal domain
Pfam-B_17915 (release 21.0). This is a family of proteins found in fungi. The function is not known. There is a characteristic GFDRL sequence motif.. +PF13374 Tetratricopeptide repeat
+PF13375 RnfC Barrel sandwich hybrid domain
This domain is part of the barrel sandwich hybrid superfamily. It is found at the N-terminus of the RnfC Electron transport complex protein. It appears to be most related to the N-terminal NQRA domain (Pfam:PF05896).. +PF13376 Bacteriocin-protection, YdeI or OmpD-Associated
This is a family of archaeal and bacterial proteins predicted to be periplasmic. YdeI is important for resistance to polymyxin B in broth and for bacterial survival in mice upon oral, but not intraperitoneal inoculation, suggesting a role for YdeI in the gastrointestinal tract of mice . Production of the ydeI gene is regulated by the Rcs (regulator of capsule synthesis) phospho-relay system pathway independently of RcsA, and additionally transcription of the protein is regulated by the stationary-phase sigma factor, RpoS (sigma-S) . YdeI confers protection against cationic AMPs (Antimicrobial peptides) or bacteriocins in conjunction with the general porin Omp, thus justifying its name of OmdA, for OmpD-Associated protein .. +PF13377 Periplasmic binding protein-like domain
Thi domain is found in a variety of transcriptional regulatory proteins. It is related to bacterial periplasmic binding proteins, although this domain is unlikely to be found in the periplasm. This domain likely acts to bind a small molecule ligand that the DNA-binding domain responds to.. +PF13378 Enolase C-terminal domain-like
This domain appears at the C-terminus of many of the proteins that carry the MR_MLE, Pfam:PF01188 and MR_MLE_N Pfam:PF02746 domains. EC:4.2.1.40.. +PF13379 NMT1-like family
This family is closely related to the Pfam:PF09084 family.. +PF13380 CoA binding domain
This domain has a Rossmann fold and is found in a number of proteins including succinyl CoA synthetases, malate and ATP-citrate ligases.. +PF13382 Adenine deaminase C-terminal domain
This family represents a C-terminal region of the adenine deaminase enzyme.. +PF13383 Methyltransferase domain
This family appears to be a methyltransferase domain.. +PF13384 Homeodomain-like domain
+PF13385 Concanavalin A-like lectin/glucanases superfamily
This domain belongs to the Concanavalin A-like lectin/glucanases superfamily.. +PF13386 Cytochrome C biogenesis protein transmembrane region
Coggill P, Eberhardt R. +PF13387 Domain of unknown function (DUF4105)
This is a family of uncharacterised bacterial proteins. There is a highly conserved histidine residue and a well-conserved NCT motif.. +PF13388 Protein of unknown function (DUF4106)
This family of proteins are found in large numbers in the Trichomonas vaginalis proteome. The function of this protein is unknown.. +PF13389 Protein of unknown function (DUF4107)
This family of putative proteins are found in Trichomonas vaginalis in large numbers.\. The function of this protein is unknown.. +PF13390 Protein of unknown function (DUF4108)
This family of putative proteins are found in Trichomonas vaginalis in large numbers.\. The function of this protein is unknown.. +PF13391 HNH endonuclease
Pfam-B_14 (Rel 25.0). +PF13392 HNH endonuclease
This is a zinc-binding loop of Fold group 7 as found in endo-deoxy-ribonucleases and HNH nucleases.. +PF13393 Histidyl-tRNA synthetase
This is a family of class II aminoacyl-tRNA synthetase-like and ATP phosphoribosyltransferase regulatory subunits.. +PF13394 4Fe-4S single cluster domain
+PF13395 HNH endonuclease
This HNH nuclease domain is found in CRISPR-related proteins.. +PF13396 Phospholipase_D-nuclease N-terminal
This family is often found at the very N-terminus of proteins from the phospholipase_D-nuclease family, PLDc, Pfam:PF00614. However, a large number of members are full-length within this family.. +PF13397 Domain of unknown function (DUF4109)
This is a family of bacterial proteins with several highly conserved characteristic sequence motifs, such as: APR, WxC and ERR. The function is not known.. +PF13398 Peptidase M50B-like
This is a family of bacterial and plant peptidases in the same family as MEROPS:M50B.. +PF13399 LytR cell envelope-related transcriptional attenuator
This family appears at the C-terminus of members of the LytR_cpsA_psr, Pfam:PF03816, family. +PF13400 Putative Flp pilus-assembly TadE/G-like
This is an N-terminal domain on a family of putative Flp pilus-assembly proteins. The exact function is not known. The Flp-pilus biogenesis genes include the Tad genes, and some members of this family are putatively assigned as being TadG [1,2].. +PF13401 AAA domain
+PF13402 Peptidase M60-like family
This family of peptidases contains a zinc metallopeptidase motif (HEXXHX(8,28)E) and possesses mucinase activity .. +PF13403 Hint domain
This domain is found in inteins.. +PF13404 AsnC-type helix-turn-helix domain
+PF13405 EF_hand_4;
+PF13406 Transglycosylase SLT domain
This family is related to the SLT domain Pfam:PF01464.. +PF13407 Periplasmic binding protein domain
This domain is found in a variety of bacterial periplasmic binding proteins.. +PF13408 Recombinase zinc beta ribbon domain
This short bacterial protein contains a zinc ribbon domain that is likely to be DNA-binding. This domain is found in site specific recombinase proteins. This family appears most closely related to Pfam:PF04606.. +PF13409 Glutathione S-transferase, N-terminal domain
This family is closely related to Pfam:PF02798.. +PF13410 Glutathione S-transferase, C-terminal domain
This domain is closely related to Pfam:PF00043.. +PF13411 MerR HTH family regulatory protein
+PF13412 Winged helix-turn-helix DNA-binding
+PF13413 Helix-turn-helix domain
This domain is a helix-turn-helix domain that probably binds to DNA.. +PF13414 TPR repeat
+PF13415 Galactose oxidase, central domain
+PF13416 Bacterial extracellular solute-binding protein
This family includes bacterial extracellular solute-binding proteins.. +PF13417 Glutathione S-transferase, N-terminal domain
+PF13418 Galactose oxidase, central domain
+PF13419 Haloacid dehalogenase-like hydrolase
+PF13420 Acetyltransferase (GNAT) domain
+PF13421 SPFH domain-Band 7 family
+PF13422 Domain of unknown function (DUF4110)
Pfam-B_8504 (release 24.0). This is a family that is found predominantly at the C-terminus of Kelch-containing proteins. However, the exact function of this region is not known.. +PF13423 Ubiquitin carboxyl-terminal hydrolase
+PF13424 Tetratricopeptide repeat
+PF13425 O-antigen ligase like membrane protein
+PF13426 PAS domain
+PF13427 Domain of unknown function (DUF4111)
Although the exact function of this domain is not known it frequently appears downstream of the family, Nucleotidyltransferase, Pfam:PF01909. It is also found in species associated with methicillin-resistant bacteria.. +PF13428 Tetratricopeptide repeat
+PF13429 Tetratricopeptide repeat
+PF13430 Domain of unknown function (DUF4112)
This family has several highly conserved GD sequence-motifs of unknown function. The family is found in bacteria, archaea and fungi.. +PF13431 Tetratricopeptide repeat
+PF13432 Tetratricopeptide repeat
+PF13433 Periplasmic binding protein domain
This domain is found in a variety of bacterial periplasmic binding proteins.. +PF13434 L_oxygenase;
L-lysine 6-monooxygenase (NADPH-requiring). This is family of Rossmann fold oxidoreductases that catalyses the NADPH-dependent hydroxylation of lysine at the N6 position, EC:1.14.13.59.. +PF13435 Cytochrome c554 and c-prime
This family is a tetra-haem cytochrome involved in the oxidation of ammonia. It is found in both phototrophic and denitrifying bacteria.. +PF13436 Glycine-zipper containing OmpA-like membrane domain
+PF13437 HlyD family secretion protein
This is a family of largely bacterial haemolysin translocator HlyD proteins.. +PF13438 Domain of unknown function (DUF4113)
Although the function is not known this domain occurs almost invariably at the very C-terminus of the IMS family DNA-polymerase repair proteins, IMS, Pfam:PF00817.. +PF13439 Glycosyltransferase Family 4
+PF13440 Polysaccharide biosynthesis protein
+PF13441 YMGG-like Gly-zipper
+PF13442 Cytochrome C oxidase, cbb3-type, subunit III
+PF13443 Cro/C1-type HTH DNA-binding domain
This is a helix-turn-helix domain that probably binds to DNA.. +PF13444 Acetyltransferase (GNAT) domain
This family contains proteins with N-acetyltransferase functions.. +PF13445 zf-RING_LisH;
RING-type zinc-finger. Pfam-B_49 (release 24.0). This zinc-finger is a typical RING-type of plant ubiquitin ligases .. +PF13446 A repeated domain in UCH-protein
Pfam-B_2127 (release 24.0). This is a repeated domain found in de-ubiquitinating proteins. It's exact function is not known although it is likely to be involved in the binding of the Ubps in the complex with Rsp5 and Rup1.. +PF13447 Seven times multi-haem cytochrome CxxCH
This domain carries up to seven CxxCH repeated sequence motifs, characteristic of multi-haem cytochromes.. +PF13448 Domain of unknown function (DUF4114)
This is a repeated domain that is found towards the C-terminal of many different types of bacterial proteins. There are highly conserved glutamate and aspartate residues suggesting that this domain might carry enzymic activity.. +PF13449 Esterase-like activity of phytase
This is a repeated domain that carries several highly conserved Glu and Asp residues indicating the likelihood that the domain incorporates the enzymic activity of the PLC-like phospho-diesterase part of the proteins. . +PF13450 NAD(P)-binding Rossmann-like domain
+PF13451 Probable zinc-binding domain
This is a probable zinc-binding domain with two CxxC sequence motifs, found in various families of bacteria.. +PF13452 zf-MaoC;
N-terminal half of MaoC dehydratase. It is clear from the structures of bacterial members of MaoC dehydratase, Pfam:PF01575, that the full-length functional dehydratase enzyme is made up of two structures that dimerise to form a whole. Divergence of the N- and C- monomers in higher eukaryotes has led to two distinct domains, this one and MaoC_dehydratas. However, in order to function as an enzyme both are required together.. +PF13453 Transcription factor zinc-finger
+PF13454 FAD-NAD(P)-binding
+PF13455 Meiotically up-regulated gene 113
Pfam-B_48720 (release 24.0). This is a family of fungal proteins found to be up-regulated in meiosis.. +PF13456 Reverse transcriptase-like
This domain is found in plants and appears to be part of a retrotransposon.. +PF13457 SH3-like domain
+PF13458 Periplasmic binding protein
This family includes a diverse range of periplasmic binding proteins.. +PF13459 4Fe-4S single cluster domain
+PF13460 NADH(P)-binding
+PF13461 Cell-wall surface anchor repeat
+PF13462 Thioredoxin
+PF13463 Winged helix DNA-binding domain
+PF13464 Domain of unknown function (DUF4115)
This short domain is often found at the C-terminus of proteins containing a helix-turn-helix domain. The function of this domain is unknown.. +PF13465 Zinc-finger double domain
+PF13466 STAS domain
The STAS (after Sulphate Transporter and AntiSigma factor antagonist) domain is found in the C-terminal region of Sulphate transporters and bacterial antisigma factor antagonists. It has been suggested that this domain may have a general NTP binding function .. +PF13467 Ribbon-helix-helix domain
This short bacterial protein contains a ribbon-helix-helix domain that is likely to be DNA-binding.. +PF13468 Glyoxalase-like domain
This domain is related to the Glyoxalase domain Pfam:PF00903.. +PF13469 Sulfotransferase family
+PF13470 PIN domain
Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases).. +PF13471 Transglutaminase-like superfamily
This family includes uncharacterised proteins that are related to the transglutaminase like domain Pfam:PF01841.. +PF13472 GDSL-like Lipase/Acylhydrolase family
This family of presumed lipases and related enzymes are similar to Pfam:PF00657.. +PF13473 Cupredoxin-like domain
The cupredoxin-like fold consists of a beta-sandwich with 7 strands in 2 beta-sheets, which is arranged in a Greek-key beta-barrel.. +PF13474 SnoaL-like domain
This family contains a large number of proteins that share the SnoaL fold.. +PF13475 Domain of unknown function (DUF4116)
+PF13476 AAA domain
+PF13477 Glycosyl transferase 4-like
+PF13478 XdhC Rossmann domain
This entry is the rossmann domain found in the Xanthine dehydrogenase accessory protein.. +PF13479 AAA domain
This AAA domain is found in a wide variety of presumed phage proteins.. +PF13480 Acetyltransferase (GNAT) domain
This family contains proteins with N-acetyltransferase functions.. +PF13481 AAA domain
This AAA domain is found in a wide variety of presumed DNA repair proteins.. +PF13482 RNase_H superfamily
+PF13483 Beta-lactamase superfamily domain
This family is part of the beta-lactamase superfamily and is related to Pfam:PF00753.. +PF13484 4Fe-4S double cluster binding domain
+PF13485 Peptidase MA superfamily
+PF13486 Reductive dehalogenase subunit
This family is most frequently associated with a Fer4 iron-sulfur cluster towards the C-terminal region.. +PF13487 HD domain
HD domains are metal dependent phosphohydrolases.. +PF13488 Glycine zipper
+PF13489 Methyltransferase domain
This family appears to be a methyltransferase domain.. +PF13490 Putative zinc-finger
This is a putative zinc-finger found in some anti-sigma factor proteins.. +PF13491 Domain of unknown function (DUF4117)
This family is frequently found on DNA-translocase FtsK proteins at the N-terminus. The function is not known but might well be enzymatic.. +PF13492 GAF domain
+PF13493 Domain of unknown function (DUF4118)
This domain is found in a wide variety of bacterial signalling proteins. It is likely to be a transmembrane domain involved in ligand sensing.. +PF13494 Domain of unknown function, B. Theta Gene description (DUF4119)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_0594. Based on Bacteroides thetaiotaomicron gene BT_0594, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. +PF13495 Phage_integr_N2;
Phage integrase, N-terminal SAM-like domain. +PF13496 Domain of unknown function (DUF4120)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_2585. Based on Bacteroides thetaiotaomicron gene BT_2585, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2].. +PF13497 Domain of unknown function (DUF4121)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_2588. Based on Bacteroides thetaiotaomicron gene BT_2588, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2].. +PF13498 Domain of unknown function (DUF4122)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_2607. Based on Bacteroides thetaiotaomicron gene BT_2607, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2].. +PF13499 EF_hand_5;
+PF13500 AAA domain
This domain is found in a number of proteins involved in cofactor biosynthesis such as dethiobiotin synthase and cobyric acid synthase. This domain contains a P-loop motif.. +PF13501 Sulfur oxidation protein SoxY
This domain is found in the sulfur oxidation protein SoxY. It is closely related to the Desulfoferrodoxin family Pfam:PF01880. Dissimilatory oxidation of thiosulfate is carried out by the ubiquitous sulfur-oxidizing (Sox) multi-enzyme system. In this system, SoxY plays a key role, functioning as the sulfur substrate-binding protein that offers its sulfur substrate, which is covalently bound to a conserved C-terminal cysteine, to another oxidizing Sox enzyme . The structure of this domain shows an Ig-like fold .. +PF13502 AsmA-like C-terminal region
This family is similar to the C-terminal of the AsmA protein of E. coli.. +PF13503 Domain of unknown function (DUF4123)
This presumed domain is functionally uncharacterised. It is about 120 amino acids in length and contains several conserved motifs that may be functionally important. This domain is sometimes associated with the FHA domain.. +PF13504 Leucine rich repeat
+PF13505 Outer membrane protein beta-barrel domain
This domain is found in a wide range of outer membrane proteins. This domain assumes a membrane bound beta-barrel fold.. +PF13506 Glycosyl transferase family 21
This is a family of ceramide beta-glucosyltransferases - EC:2.4.1.80.. +PF13507 CobB/CobQ-like glutamine amidotransferase domain
This family captures members that are not found in Pfam:PF00310, Pfam:PF07685 and Pfam:PF13230.. +PF13508 Acetyltransferase (GNAT) domain
This domain catalyses N-acetyltransferase reactions.. +PF13509 S1 domain
The S1 domain was originally identified as a repeat motif in the ribosomal S1 protein. It was later identified in a wide range of proteins. The S1 domain has an OB-fold structure. The S1 domain is involved in nucleic acid binding.. +PF13510 2Fe-2S iron-sulfur cluster binding domain
The 2Fe-2S ferredoxin family have a general core structure consisting of beta(2)-alpha-beta(2) which a beta-grasp type fold. The domain is around one hundred amino acids with four conserved cysteine residues to which the 2Fe-2S cluster is ligated. This cluster appears within sarcosine oxidase proteins.. +PF13511 Domain of unknown function (DUF4124)
This presumed domain is found in a variety of bacterial proteins. It is found associated at the N-terminus associated with other domains such as the SLT domain and glutaredoxin domains in some proteins. The function of this domain is unknown, but it may have an Ig-like fold.. +PF13512 Tetratricopeptide repeat
+PF13513 HEAT-like repeat
The HEAT repeat family is related to armadillo/beta-catenin-like repeats (see Pfam:PF00514). These EZ repeats are found in subunits of cyanobacterial phycocyanin lyase and other proteins and probably carry out a scaffolding role.. +PF13514 AAA domain
This domain is found in a number of double-strand DNA break proteins. This domain contains a P-loop motif.. +PF13515 Fusaric acid resistance protein-like
+PF13516 Leucine Rich repeat
+PF13517 Repeat domain in Vibrio, Colwellia, Bradyrhizobium and Shewanella
This domain of about 100 residues is found in multiple (up to 35) copies in long proteins from several species of Vibrio, Colwellia, Bradyrhizobium, and Shewanella (hence the name VCBS) and in smaller copy numbers in proteins from several other bacteria. The large protein size and repeat copy numbers, species distribution, and suggested activities of several member proteins suggests a role for this domain in adhesion (TIGR).. +PF13518 Helix-turn-helix domain
This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding.. +PF13519 von Willebrand factor type A domain
+PF13520 Amino acid permease
+PF13521 AAA domain
+PF13522 Glutamine amidotransferase domain
This domain is a class-II glutamine amidotransferase domain found in a variety of enzymes, such as asparagine synthetase and glutamine--fructose-6-phosphate transaminase.. +PF13523 Acetyltransferase (GNAT) domain
This domain catalyses N-acetyltransferase reactions.. +PF13524 Glycosyl transferases group 1
+PF13525 Outer membrane lipoprotein
This outer membrane lipoprotein carries a TPR-like region towards its N-terminal. YfiO in E.coli is one of three outer membrane lipoproteins that form a multicomponent YaeT complex in the outer membrane of Gram-negative bacteria that is involved in the targeting and folding of beta-barrel outer membrane proteins. YfiO is the only essential lipoprotein component of the complex. It is required for the proper assembly and/or targeting of outer membrane proteins to the outer membrane. Through its interactions with NlpB it maintains the functional integrity of the YaeT complex.. +PF13526 Protein of unknown function (DUF4125)
+PF13527 Acetyltransferase (GNAT) domain
This domain catalyses N-acetyltransferase reactions.. +PF13528 Glycosyl transferase family 1
+PF13529 Peptidase_C39 like family
+PF13530 Sterol carrier protein domain
+PF13531 Bacterial extracellular solute-binding protein
This family includes bacterial extracellular solute-binding proteins.. +PF13532 2OG-Fe(II) oxygenase superfamily
+PF13533 Biotin-lipoyl like
+PF13534 4Fe-4S dicluster domain
This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich.. +PF13535 ATP-grasp domain
This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity.. +PF13536 Multidrug resistance efflux transporter
This is a membrane protein family acting as a multidrug resistance efflux transporter.. +PF13537 Glutamine amidotransferase domain
This domain is a class-II glutamine amidotransferase domain found in a variety of enzymes such as asparagine synthetase and glutamine-fructose-6-phosphate transaminase.. +PF13538 UvrD-like helicase C-terminal domain
This domain is found at the C-terminus of a wide variety of helicase enzymes. This domain has a AAA-like structural fold.. +PF13539 D-alanyl-D-alanine carboxypeptidase
This family resembles VanY, Pfam:PF02557, which is part of the peptidase M15 family.. +PF13540 Regulator of chromosome condensation (RCC1) repeat
+PF13541 Subunit ChlI of Mg-chelatase
+PF13542 Helix-turn-helix domain of transposase family ISL3
+PF13543 SAM like domain present in kinase suppressor RAS 1
+PF13544 Type IV pilin N-term methylation site GFxxxE
This short sequence motif appears a the N-terminus of type IV prokaryotic filamentous adhesins or pilins. The N-terminal residue, which is methylated, is hydrophobic (generally a phenylalanine or a methionine), and this leader peptide is hydrophilic. The fifth residue of the mature sequence is a glutamate which seems to be required for the methylation step.. +PF13545 Crp-like helix-turn-helix domain
This family represents a crp-like helix-turn-helix domain that is likely to bind DNA.. +PF13546 DDE superfamily endonuclease
This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction.. +PF13547 GTA TIM-barrel-like domain
This domain is found in the gene transfer agent protein. An unusual system of genetic exchange exists in the purple nonsulfur bacterium Rhodobacter capsulatus. DNA transmission is mediated by a small bacteriophage-like particle called the gene transfer agent (GTA) that transfers random 4.5-kb segments of the producing cell's genome to recipient cells, where allelic replacement occurs . The genes involved in this process appear to be found widely in bacteria . According to the SUPERFAMILY database this domain has a TIM barrel fold.. +PF13548 Domain of unknown function (DUF4126)
+PF13549 ATP-grasp domain
This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity.. +PF13550 Putative phage tail protein
This putative domain is found in the large gene transfer agent protein. These produce defective phage like particles. This domain is similar to other phage-tail protein families.. +PF13551 Winged helix-turn helix
This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding.. +PF13552 Protein of unknown function (DUF4127)
This family of uncharacterised bacterial proteins are about 500 amino acids in length.. +PF13553 Function to find
Weichenberger CX, D'Osualdo A. Joint Center of Structural Genomics (JCSG). The function to find (FIIND) was initially discovered in two proteins, NLRP1 (aka NALP1, CARD7, NAC, DEFCAP) and CARD8 (aka TUCAN, Cardinal)\. . NLRP1 is a member of the Nod-like receptor (NLR) protein superfamily and is involved in apoptosis and inflammation. To date, it is the only NLR protein known to have a FIIND domain. The FIIND\. domain is also present in the CARD8 protein where, like in NLRP1, it is followed by a C-terminal CARD domain. Both proteins are described\. to form an "inflammasome", a macro-molecular complex able to process caspase 1 and activate pro-IL1beta . The FIIND domain is present\. in only a very small subset of the kingdom of life, comprising primates, rodents (mouse, rat), carnivores (dog) and a few more,\. such as horse. The function of this domain is yet to be determined. Publications describing the newly discovered NLRP1 protein failed to identify it as a separate domain; for example, it was taken as part of the adjacent leucine rich repeat domain (LRR) . Upon discovery of CARD8 it was noted that the N-terminal region shared significant sequence identity with an undescribed region in NLRP1 . Before getting its final name, FIIND , this domain was termed NALP1-associated domain (NAD) .. +PF13554 Bacteriophage related domain of unknown function
Wahab A, Serrano P, Geralt M, Wuthrich K. Bordetella bronchiseptica RB50 PDB:2L25. The three-dimesnional structure of NP_888769.1 (PDB:2L25) reveals\. a tail terminator protein gpU fold, which suggests that the protein could have a bacteriophage origin.. +PF13555 P-loop containing region of AAA domain
+PF13556 PucR C-terminal helix-turn-helix domain
This helix-turn-helix domain is often found at the C-terminus of PucR-like transcriptional regulators such as Swiss:O32138 and is likely to be DNA-binding.. +PF13557 Putative MetA-pathway of phenol degradation
+PF13558 Putative exonuclease SbcCD, C subunit
Possible exonuclease SbcCD, C subunit, on AAA proteins.. +PF13559 Domain of unknown function (DUF4129)
This presumed domain is found at the C-terminus of proteins that contain a transglutaminase core domain. The function of this domain is unknown. The domain has a conserved TXXE motif.. +PF13560 Helix-turn-helix domain
This domain is a helix-turn-helix domain that probably binds to DNA.. +PF13561 Enoyl-(Acyl carrier protein) reductase
+PF13562 Sugar nucleotidyl transferase
This is a probable sugar nucleotidyl transferase family.. +PF13563 2'-5' RNA ligase superfamily
This family contains proteins related to Pfam:PF02834. These proteins are likely to be enzymes, but they may not share the RNA ligase activity.. +PF13564 DoxX-like family
This family of uncharacterised proteins are related to DoxX Pfam:PF07681.. +PF13565 Homeodomain-like domain
+PF13566 Domain of unknown function (DUF4130
+PF13567 Domain of unknown function (DUF4131)
This domain is frequently found to the N-terminus of the Competence domain, Pfam:PF03772.. +PF13568 Outer membrane protein beta-barrel domain
This domain is found in a wide range of outer membrane proteins. This domain assumes a membrane bound beta-barrel fold.. +PF13569 Domain of unknown function (DUF4132)
This domain might be involved in the biosynthesis of the molybdopterin cofactor in E.coli.. +PF13570 YWTD;
Coggill P, Eberhardt R. +PF13571 Domain of unknown function (DUF4133)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_0094. Based on Bacteroides thetaiotaomicron gene BT_0094, a putative uncharacterized protein as seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or vs when in culture .. +PF13572 Domain of unknown function (DUF4134)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_0095. Based on Bacteroides thetaiotaomicron gene BT_0095, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or vs when in culture .. +PF13573 PbH1;
Coggill P, Eberhardt R. This repeat occurs several times in SprB, a cell surface protein involved in gliding motility in the bacterium Flavobacterium johnsoniae . +PF13574 Metallo-peptidase family M12B Reprolysin-like
This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B.. +PF13575 Domain of unknown function (DUF4135)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 380 amino acids in length. The family is found in association with Pfam:PF05147. This domain may be involved in synthesis of a lantibiotic compound.. +PF13576 Pentapeptide repeats (9 copies)
+PF13577 SnoaL-like domain
This family contains a large number of proteins that share the SnoaL fold.. +PF13578 Methyltransferase domain
This family appears to be a methyltransferase domain.. +PF13579 Glycosyl transferase 4-like domain
+PF13580 SIS domain
SIS (Sugar ISomerase) domains are found in many phosphosugar isomerases and phosphosugar binding proteins. SIS domains are also found in proteins that regulate the expression of genes involved in synthesis of phosphosugars.. +PF13581 Histidine kinase-like ATPase domain
+PF13582 Metallo-peptidase family M12B Reprolysin-like
This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B.. +PF13583 Metallo-peptidase family M12B Reprolysin-like
This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B.. +PF13584 Oxygen tolerance
This family of proteins carries up to three membrane spanning regions and is involved in tolerance to oxygen in in Bacteroides spp.. +PF13585 C-terminal domain of CHU protein family
The function of this C-terminal domain is not known; there are several conserved tryptophan and asparagine residues.. +PF13586 Transposase DDE domain
Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis.. +PF13587 N-terminal domain of DJ-1_PfpI family
This domain is found at the N-terminus of proteins from the DJ-1_PfpI family, Pfam:PF01965. The exact function is not known.. +PF13588 Type I restriction enzyme R protein N terminus (HSDR_N)
This family consists of a number of N terminal regions found in type I restriction enzyme R (HSDR) proteins. Restriction and modification (R/M) systems are found in a wide variety of prokaryotes and are thought to protect the host bacterium from the uptake of foreign DNA . Type I restriction and modification systems are encoded by three genes: hsdR, hsdM, and hsdS. The three polypeptides, HsdR, HsdM, and HsdS, often assemble to give an enzyme (R2M2S1) that modifies hemimethylated DNA and restricts unmethylated DNA .. +PF13589 Histidine kinase-, DNA gyrase B-, and HSP90-like ATPase
This family represents, additionally, the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90.. +PF13590 Domain of unknown function (DUF4136)
This domain is found in bacterial lipoproteins. The function is not known.. +PF13591 MerR HTH family regulatory protein
+PF13592 Winged helix-turn helix
This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding.. +PF13593 SBF-like CPA transporter family (DUF4137)
These family members are membrane transporter proteins of the CPA and AT superfamily.. +PF13594 Amidohydrolase
This family of enzymes are a part of a large metal dependent hydrolase superfamily . The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source . This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit . Dihydroorotases (EC:3.5.2.3) are also included [4-5].. +PF13595 Domain of unknown function (DUF4138)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_4780. Based on Bacteroides thetaiotaomicron gene BT_4780, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2].. +PF13596 PAS domain
+PF13597 Anaerobic ribonucleoside-triphosphate reductase
+PF13598 Domain of unknown function (DUF4139)
This family is usually found at the C-terminus of proteins.. +PF13599 Pentapeptide repeats (9 copies)
+PF13600 N-terminal domain of unknown function (DUF4140)
This family is often found at the N-terminus of its member proteins, with DUF4139, Pfam:PF13598, at the C-terminus.. +PF13601 Winged helix DNA-binding domain
+PF13602 Zinc-binding dehydrogenase
+PF13603 Leucyl-tRNA synthetase, Domain 2
This is a family of the conserved region of Leucine-tRNA ligase or Leucyl-tRNA synthetase, EC:6.1.1.4.. +PF13604 AAA domain
This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. There is a Walker A and Walker B.. +PF13605 Domain of unknown function (DUF4141)
Ellrott K, Bakolitsa C. Bacteroides thetaiotaomicron: BT_4772. Based on Bacteroides thetaiotaomicron gene BT_4772, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2].. +PF13606 Ankyrin repeat
Ankyrins are multifunctional adaptors that link specific proteins to the membrane-associated, spectrin- actin cytoskeleton. This repeat-domain is a 'membrane-binding' domain of up to 24 repeated units, and it mediates most of the protein's binding activities.. +PF13607 Succinyl-CoA ligase like flavodoxin domain
This domain contains the catalytic domain from Succinyl-CoA ligase alpha subunit and other related enzymes. A conserved histidine is involved in phosphoryl transfer.. +PF13608 Protein P3 of Potyviral polyprotein
This is the P3 protein section of the Potyviridae polyproteins. The function is not known except that the protein is essential to viral survival.. +PF13609 Gram-negative porin
+PF13610 DDE domain
This DDE domain is found in a wide variety of transposases including those found in IS240, IS26, IS6100 and IS26.. +PF13611 Serine peptidase of plant viral polyprotein, P1
Rawlings N, Coggill P. This family is the P1 protein of the Potyviridae polyproteins that is a serine peptidase at the N-terminus. The catalytic triad in Swiss:Q65730, the ssRNA positive-strand Brome streak mosaic rymovirus, is His-311, Asp-322 and Ser-355.. +PF13612 Transposase DDE domain
Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contains three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction .. +PF13613 DDE_4_2;
Helix-turn-helix of DDE superfamily endonuclease. This domain is the probable DNA-binding region of transposase enzymes, necessary for efficient DNA transposition. Most of the members derive from the IS superfamily IS5 and rather fewer from IS4.. +PF13614 AAA domain
This family includes a wide variety of AAA domains including some that have lost essential nucleotide binding residues in the P-loop.. +PF13615 Putative alanine racemase
This is a family of eukaryotic proteins which are putatively alanine racemase.. +PF13616 PPIC-type PPIASE domain
Rotamases increase the rate of protein folding by catalysing the interconversion of cis-proline and trans-proline.. +PF13617 YnbE-like lipoprotein
This family includes lipoproteins similar to E. coli YnbE Swiss:P64448. Protein in this family are typically 60 amino acids in length and contain an N-terminal lipid attachment site, which has been included in the alignment to increase sensitivity. The specific function of these proteins is unknown.. +PF13618 Gluconate 2-dehydrogenase subunit 3
This family corresponds to subunit 3 of the Gluconate 2-dehydrogenase enzyme that catalyses the conversion of gluconate to 2-dehydro-D-gluconate EC:1.1.99.3.. +PF13619 KTSC domain
This short domain is named after Lysine tRNA synthetase C-terminal domain. It is found at the C-terminus of some Lysyl tRNA synthetases as well as a single domain in bacterial proteins. The domain is about 60 amino acids in length and contains a reasonably conserved YXY motif in the centre of the sequence. The function of this domain is unknown but it could be an RNA binding domain.. +PF13620 Carboxypeptidase regulatory-like domain
+PF13621 Cupin-like domain
This cupin like domain shares similarity to the JmjC domain.. +PF13622 Thioesterase-like superfamily
This family contains a wide variety of enzymes, principally thioesterases. These enzymes are part of the Hotdog fold superfamily .. +PF13623 SurA N-terminal domain
This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment.. +PF13624 SurA N-terminal domain
This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment.. +PF13625 Helicase conserved C-terminal domain
This domain family is found in a wide variety of helicases and helicase-related proteins.. +PF13627 Prokaryotic lipoprotein-attachment site
In prokaryotes, membrane lipoproteins are synthesized with a precursor signal peptide, which is cleaved by a specific lipoprotein signal peptidase (signal peptidase II). The peptidase recognizes a conserved sequence and cuts upstream of a cysteine residue to which a glyceride-fatty acid lipid is attached .. +PF13628 Domain of unknown function (DUF4142)
This is a bacterial family of unknown function.. +PF13629 Pilus formation protein N terminal region
+PF13630 SdpI/YhfL protein family
This family of proteins includes the SdpI and YhfL proteins from B. subtilis. The SdpI protein is a multipass integral membrane protein that protects toxin-producing cells from being killed. Killing is mediated by the exported toxic protein SdpC an extracellular protein that induces the synthesis of an immunity protein .. +PF13631 Cytochrome b(N-terminal)/b6/petB
+PF13632 Glycosyl transferase family group 2
Members of this family of prokaryotic proteins include putative glucosyltransferases, which are involved in bacterial capsule biosynthesis .. +PF13633 Prokaryotic N-terminal methylation site
This short motif directs methylation of the conserved phenylalanine residue. It is most often found at the N-terminus of pilins and other proteins involved in secretion, see Pfam:PF00114, Pfam:PF05946, Pfam:PF02501 and Pfam:PF07596. It is often described as TypeIV_pilin_GFxxxE.. +PF13634 Nucleoporin FG repeat region
This family includes a number of FG repeats that are found in nucleoporin proteins. This family includes the yeast nucleoporins Nup116, Nup100, Nup49, Nup57 and Nup 145.. +PF13635 Domain of unknown function (DUF4143)
This domain is almost always found C-terminal to an ATPase core family.. +PF13636 pre-rRNA processing and ribosome biogenesis
This family represents an evolutionarily conserved sequence motif of a set of proteins that are involved in pre-rRNA processing and ribosome biogenesis in S. cerevisiae.. +PF13637 Ankyrin repeats (many copies)
+PF13638 PIN domain
Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases).. +PF13639 Ring finger domain
+PF13640 2OG-Fe(II) oxygenase superfamily
This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily .. +PF13641 Glycosyltransferase like family 2
Members of this family of prokaryotic proteins include putative glucosyltransferase, which are involved in bacterial capsule biosynthesis .. +PF13642 protein structure with unknown function
Serrano P, Geralt M, Mohanty B, Horst R, Wuthrich K. Shewanella amazonensis SB2B PDB:2L6O. A family based on the three-dimensional structure of YP_926445.1 (PDB:2L6O). +PF13643 Domain of unknown function (DUF4145)
This domain is found in a variety of restriction endonuclease enzymes. The exact function of this domain is uncertain.. +PF13644 DKNYY family
This family represents a group of proteins found enriched in fusobacteria. These proteins contain many repeats of a DKNXXYY motif. The repeats are spaced at about 35 amino acid residues intervals. These proteins are likely to be associated with the membrane. The specific function of these proteins is unknown.. +PF13645 L,D-transpeptidase catalytic domain
This family is related to Pfam:PF03734.. +PF13646 HEAT repeats
This family includes multiple HEAT repeats.. +PF13647 Glycosyl hydrolase family 80 of chitosanase A
Naumoff D, Coggill P. This is a small family of bacterial chitosanases.. +PF13648 Lipocalin-like domain
+PF13649 Methyltransferase domain
This family appears to be a methyltransferase domain.. +PF13650 Aspartyl protease
This family consists of predicted aspartic proteases, typically from 180 to 230 amino acids in length, in MEROPS clan AA. This model describes the well-conserved 121-residue C-terminal region. The poorly conserved, variable length N-terminal region usually contains a predicted transmembrane helix.. +PF13651 Adenine-specific methyltransferase EcoRI
This methylase recognizes the double-stranded sequence GAATTC, causes specific methylation on A-3 on both strands, and protects the DNA from cleavage by the EcoRI endonuclease.. +PF13652 Domain of unknown function (DUF4146)
This is a family of short proteins which appear to be pre-cursors. All members are from Pseudomonas spp. The function is not known.. +PF13653 Glycerophosphoryl diester phosphodiesterase family
This family also includes glycerophosphoryl diester phosphodiesterases as well as agrocinopine synthase, the similarity to GDPD has been noted . This family appears to have weak but not significant matches to mammalian phospholipase C Pfam:PF00388, which suggests that this family may adopt a TIM barrel fold.. +PF13654 AAA domain
This family includes a wide variety of AAA domains including some that have lost essential nucleotide binding residues in the P-loop.. +PF13655 N-terminal domain of reverse transcriptase
This domain is found at the N-terminus of bacterial reverse transcriptases.. +PF13656 RNA polymerase Rpb3/Rpb11 dimerisation domain
The two eukaryotic subunits Rpb3 and Rpb11 dimerise to from a platform onto which the other subunits of the RNA polymerase assemble (D/L in archaea). The prokaryotic equivalent of the Rpb3/Rpb11 platform is the alpha-alpha dimer. The dimerisation domain of the alpha subunit/Rpb3 is interrupted by an insert domain (Pfam:PF01000). Some of the alpha subunits also contain iron-sulphur binding domains (Pfam:PF00037). Rpb11 is found as a continuous domain. Members of this family include: alpha subunit from eubacteria, alpha subunits from chloroplasts, Rpb3 subunits from eukaryotes, Rpb11 subunits from eukaryotes, RpoD subunits from archaeal spp, and RpoL subunits from archaeal spp. Many of the members of this family carry only the N-terminal region of Rpb11.. +PF13657 HipA N-terminal domain
This domain is found to the N-terminus of HipA-like proteins. It is also found in isolation in some proteins.. +PF13659 Methyltransferase domain
This family contains methyltransferase domains.. +PF13660 Domain of unknown function (DUF4147)
This domain is frequently found at the N-terminus of proteins carrying the glycerate kinase-like domain MOFRL, Pfam:PF05161.. +PF13661 2OG-Fe(II) oxygenase superfamily
This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily .. +PF13662 Toprim domain
The toprim domain is found in a wide variety of enzymes involved in nucleic acid manipulation .. +PF13663 Domain of unknown function (DUF4148)
+PF13664 Domain of unknown function (DUF4149)
+PF13665 Domain of unknown function (DUF4150)
+PF13667 ThiC-associated domain
This domain is most frequently found at the N-terminus of the ThiC family of proteins, Pfam:PF01964. The function is not known.. +PF13668 Ferritin-like domain
This family contains ferritins and other ferritin-like proteins such as members of the DPS family and bacterioferritins.. +PF13669 Glyoxalase/Bleomycin resistance protein/Dioxygenase superfamily
+PF13670 Peptidase propeptide and YPEB domain
This region is likely to have a protease inhibitory function (personal obs:C Yeats). The name is derived from Peptidase & Bacillus subtilis YPEB.. +PF13671 AAA domain
This family of domains contain only a P-loop motif, that is characteristic of the AAA superfamily. Many of the proteins in this family are just short fragments so there is no Walker B motif.. +PF13672 Protein phosphatase 2C
Protein phosphatase 2C is a Mn++ or Mg++ dependent protein serine/threonine phosphatase.. +PF13673 Acetyltransferase (GNAT) domain
This family contains proteins with N-acetyltransferase functions such as Elp3-related proteins.. +PF13675 Type IV pili methyl-accepting chemotaxis transducer N-term
This domain is found on many type IV pili methyl-accepting chemotaxis transducer proteins where there is also a HAMP, signature towards the C-terminus.. +PF13676 TIR domain
This is a family of bacterial Toll-like receptors.. +PF13677 Membrane MotB of proton-channel complex MotA/MotB
This is the MotB member of the E.coli MotA/MotB proton-channel complex that forms the stator of the bacterial membrane flagellar motor. Key residues act as a plug to prevent premature proton flow. The plug is in the periplasm just C-terminal to the MotB TM, consisting of an amphipathic alpha helix flanked by Pro-52 and Pro-65, eg in Swiss:D3V2T1. In addition to the Pro residues, Ile-58, Tyr-61, and Phe 62 are also essential for plug function .. +PF13678 NFkB-p65-degrading zinc protease
Rawlings N, Coggill P. This family of bacterial metallo-peptidases is thought to compromise the inflammatory response by degrading p65 thereby down-regulating the NF-kappaB signalling pathway . NF-kappa-B is a pleiotropic transcription factor which is present in almost all cell types and is involved in many biological processes such as inflammation, immunity, differentiation, cell growth, tumorigenesis and apoptosis. NF-kappa-B is a homo- or heterodimeric complex formed by the Rel-like domain-containing proteins RELA/p65, RELB, NFKB1/p105, NFKB1/p50, REL and NFKB2/p52; and the heterodimeric p65-p50 complex appears to be most abundant one.. +PF13679 Methyltrans_27;
Methyltransferase domain. This family appears to be a methyltransferase domain.. +PF13680 Protein of unknown function (DUF4152)
This family of proteins is functionally uncharacterised. This family of proteins is found in archaea. Proteins in this family are approximately 230 amino acids in length. The structure of PF2046 from pyrococcus furiosus has been solved. It shows an RNaseH like fold that conserves critical catalytic residues . This suggests that these proteins may cleave nucleic acid.. +PF13681 Type IV pilus assembly protein PilX C-term
This family is likely to be the C-terminal region of type IV pilus assembly PilX or PilW proteins.. +PF13682 MCPsignal_assoc;
Chemoreceptor zinc-binding domain. The chemoreceptor zinc-binding domain (CZB) is found in bacterial signal transduction proteins - most frequently receptors involved in chemotaxis and motility, but also in c-di-GMP signalling and nitrate/nitrite-sensing. Originally discovered in the cytoplasmic chemoreceptor TlpD from Helicobacter pylori, it is often found C-terminal to the MCPsignal domain in cytoplasmic chemoreceptor proteins. The CZB domain contains a core sequence motif, Hxx[WFYL]x21-28Cx[LFMVI]Gx[WFLVI]x18-27HxxxH. The highly-conserved H-C-H-H residues of this motif are believed to coordinate zinc; mutating the latter two histidines of the motif to alanines abolishes Zn binding. This domain binds zinc with high affinity, with a Kd in the femtomolar range. Although the function of the CZB domain is not yet known, scientists have speculated that it may function as either an unknown signal input domain, based on its frequent association with signalling output domains, or as a domain that helps to stabilise protein tertiary or quaternary structure.. +PF13683 Integrase core domain
+PF13684 Dihydroxyacetone kinase family
This is the kinase domain of the dihydroxyacetone kinase family.. +PF13685 Iron-containing alcohol dehydrogenase
+PF13686 DsrE/DsrF/DrsH-like family
DsrE is a small soluble protein involved in intracellular sulfur reduction . The family also includes YrkE proteins.. +PF13687 Domain of unknown function (DUF4153)
Members of this family are annotated as putative inner membrane proteins.. +PF13688 Peptidase_M84;
Metallo-peptidase family M12. +PF13689 Domain of unknown function (DUF4154)
This family of proteins is found in bacteria. Proteins in this family are typically between 172 and 207 amino acids in length. Many members are annotated as valyl-tRNA synthetase but this could not be confirmed.. +PF13690 Chemotaxis phosphatase CheX
CheX is very closely related to the CheC chemotaxis phosphatase, but it dimerises in a different way, via a continuous beta sheet between the subunits. CheC and CheX both dephosphorylate CheY, although CheC requires binding of CheD to achieve the activity of CheX. The ability of bacteria to modulate their swimming behaviour in the presence of external chemicals (nutrients and repellents) is one of the most rudimentary behavioural responses known, but the the individual components are very sensitively tuned .. +PF13691 tRNase Z endonuclease
This is family of tRNase Z enzymes, that are closely related structurally to the Lactamase_B family members. tRNase Z is the endonuclease that is involved in tRNA 3'-end maturation through removal of the 3'-trailer sequences from tRNA precursors. The fission yeast Schizosaccharomyces pombe contains two candidate tRNase Zs encoded by two essential genes. The first, Swiss:Q10155, is targeted to the nucleus and has an SV40 nuclear localisation signal at its N-terminus, consisting of four consecutive arginine and lysine residues between residues 208 and 211 (KKRK) that is critical for the NLS function. The second, Swiss:P87168, is targeted to the mitochondria, with an N-terminal mitochondrial targeting signal within the first 38 residues .. +PF13692 Glycosyl transferases group 1
+PF13693 Winged helix-turn-helix DNA-binding
+PF13694 Sec63/Sec62 complex-interacting family
This is a family of closely related Hph proteins that are integral endoplasmic reticulum (ER) membrane proteins required for yeast survival under environmental stress conditions. They interact with several subunits of the Sec63/Sec62 complex that mediates post-translational translocation of proteins into the ER. Cells with mutant Hph1 and Hph2 proteins revealed phenotypes resembling those of mutants defective for vacuolar proton ATPase (V-ATPase) activity. The yeast V-ATPase is a multisubunit complex whose function, structure, and assembly have been well characterized. Cells with impaired V-ATPase activity fail to acidify the vacuole, cannot grow at alkaline pH, and are sensitive to high concentrations of extracellular calcium .. +PF13695 Zinc-binding domain
This is a family with several pairs of CxxC motifs possibly representing a multiple zinc-binding region. Only one pair of cysteines is associated with a highly conserved histidine residue.. +PF13696 Zinc knuckle
This is a zinc-binding domain of the form CxxCxxxGHxxxxC from a variety of different species.. +PF13698 Domain of unknown function (DUF4156)
The function of this family is unknown but members are annotated as putative lipoprotein outer membrane proteins.. +PF13699 Domain of unknown function (DUF4157)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 80 amino acids in length. This domain contains an HEXXH motif that is characteristic of many families of metallopeptidases. However, no peptidase activity has been shown for this domain.. +PF13700 Domain of unknown function (DUF4158)
The exact function of this domain is not clear, but it frequently occurs as an N-terminal region of transposase 3 or IS3 family of insertion elements.. +PF13701 Transposase DDE domain group 1
Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis.. +PF13702 Lysozyme-like
+PF13703 PepSY-associated TM helix
This family represents a conserved TM helix found in bacteria and archaea.. +PF13704 Glycosyl transferase family 2
Members of this family of prokaryotic proteins include putative glucosyltransferases,. +PF13705 TRC8 N-terminal domain
This region is found at the N-terminus of the TRC8 protein Swiss:Q8WU17. TRC8 is an E3 ubiquitin-protein ligase also known as RNF139. This region contains 12 transmembrane domains. This region has been suggested to contain a sterol sensing domain . It has been found that TRC8 protein levels are sterol responsive and that it binds and stimulates ubiquitylation of the endoplasmic reticulum anchor protein INSIG .. +PF13706 PepSY-associated TM helix
This family represents a conserved TM helix found in bacteria and archaea.. +PF13707 RloB-like protein
This family includes the RloB protein that is found within a bacterial restriction modification operon. This family includes the AbiLii protein that is found as part of a plasmid encoded phage abortive infection mechanism . Deletion within abiLii abolished the phage resistance. The family includes some proteins annotated as CRISPR Csm2 proteins.. +PF13708 Methyltransferase domain
This family contains methyltransferase domains.. +PF13709 Domain of unknown function (DUF4159)
Members of this family are hypothetical proteins. TM prediction shows them to have two transmembrane regions, with a cytosolic region of about 25 amino acids between the two, and an N-terminus outside the membrane.. +PF13710 ACT domain
ACT domains bind to amino acids and regulate associated enzyme domains. These ACT domains are found at the C-terminus of the RelA protein.. +PF13711 Domain of unknown function (DUF4160)
+PF13712 Glycosyltransferase like family
Members of this family of prokaryotic proteins include putative glucosyltransferases, which are involved in bacterial capsule biosynthesis.. +PF13713 Transcription factor BRX N-terminal domain
The BREVIS RADIX (BRX) domain was characterised as being a transcription factor in plants regulating the extent of cell proliferation and elongation in the growth zone of the root [1,2]. BRX is rate limiting for auxin-responsive gene-expression by mediating cross-talk with the brassino-steroid pathway. BRX has a ubiquitous, although quantitatively variable role in modulating the growth rate in both the root and the shoot . This family features a short region, also alpha-helical, N-terminal to the repeated alpha-helices of family BRX, Pfam:PF08381 . BRX is expressed in the vasculature and is rate-limiting for transcriptional auxin action .. +PF13714 Phosphoenolpyruvate phosphomutase
This domain includes the enzyme Phosphoenolpyruvate phosphomutase (EC:5.4.2.9). This protein Swiss:O86937 has been characterised as catalysing the formation of a carbon-phosphorus bond by converting phosphoenolpyruvate (PEP) to phosphonopyruvate (P-Pyr) . This enzyme has a TIM barrel fold.. +PF13715 Cna_B_2;
Domain of unknown function (DUF4480). Coggill P, Eberhardt R. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF07715 and Pfam:PF00593. There is a single completely conserved residue G that may be functionally important.. +PF13716 Divergent CRAL/TRIO domain
This family includes divergent members of the CRAL-TRIO domain family. This family includes ECM25 that contains a divergent CRAL-TRIO domain identified by Gallego and colleagues .. +PF13717 zinc-ribbon domain
This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR, Pfam:PF12773.. +PF13718 GNAT acetyltransferase 2
Coggill P, Eberhardt R. This domain has N-acetyltransferase activity [1,2]. It has a GCN5-related N-acetyltransferase (GNAT) fold .. +PF13719 zinc-ribbon domain
This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR, Pfam:PF12773.. +PF13720 Udp N-acetylglucosamine O-acyltransferase; Domain 2
This is domain 2, or the C-terminal domain, of Udp N-acetylglucosamine O-acyltransferase. This enzyme is a zinc-dependent enzyme that catalyses the deacetylation of UDP-3-O-((R)-3-hydroxymyristoyl)-N-acetylglucosamine to form UDP-3-O-(R-hydroxymyristoyl)glucosamine and acetate.. +PF13721 SecD export protein N-terminal TM region
This domain appears to be the fist transmembrane region of the SecD export protein. SecD is directly involved in protein secretion and important for the release of proteins that have been translocated across the cytoplasmic membrane.. +PF13722 C-terminal domain on CstA (DUF4161)
This domain is found at the C=terminal of most known CstA domain-containing proteins. The function is not known.. +PF13723 Beta-ketoacyl synthase, N-terminal domain
+PF13724 DNA-binding domain
Coggill P, Eberhardt R. Pfam-B_65234 (release 24.0). This domain, often found on ovate proteins, binds to single-stranded and double-stranded DNA. Binding to DNA is not sequence-specific .. +PF13725 Possible tRNA binding domain
Coggill P, Eberhardt R. This domain, found at the C-terminus of tRNA(Met) cytidine acetyltransferase, may be involved in tRNA-binding .. +PF13726 Na+-H+ antiporter family
This family includes integral membrane proteins, some of which are NA+-H+ antiporters .. +PF13727 CoA-binding domain
+PF13728 F plasmid transfer operon protein
TraF protein undergoes proteolytic processing associated with export. The 19 amino acids at the amino terminus of the polypeptides appear to constitute a typical membrane leader peptide - not included in this family, while the remainder of the molecule is predicted to be primarily hydrophilic in character . F plasmid TraF and TraH are required for F pilus assembly and F plasmid transfer, and they are both localised to the outer membrane in the presence of the complete F transfer region, especially TraV, the putative anchor .. +PF13729 F plasmid transfer operon, TraF, protein
+PF13730 Helix-turn-helix domain
+PF13731 WxL domain surface cell wall-binding
The WxL motif appears in two or three copies in these bacterial proteins and confers a cell surface localisation function. It seems likely that this region is the cell wall-binding domain of gram-positive bacteria, and may interact with the peptidoglycan .. +PF13732 Domain of unknown function (DUF4162)
This domain is found at the C-terminus of bacterial ABC transporter proteins. The function is not known.. +PF13733 N-terminal region of glycosyl transferase group 7
This is the N-terminal half of a family of galactosyltransferases from a wide range of Metazoa with three related galactosyltransferases activities, all three of which are possessed by one sequence in some cases. EC:2.4.1.90, N-acetyllactosamine synthase; EC:2.4.1.38, Beta-N-acetylglucosaminyl-glycopeptide beta-1,4- galactosyltransferase; and EC:2.4.1.22 Lactose synthase. Note that N-acetyllactosamine synthase is a component of Lactose synthase along with alpha-lactalbumin, in the absence of alpha-lactalbumin EC:2.4.1.90 is the catalysed reaction.. +PF13734 Spi protease inhibitor
This family includes the inhibitor Spi and the pro-peptides of streptopain (SpeB). SpeB is produced as a 43 kDa pre-pro-protein, which is secreted via the recently described Sec secretory pathway Exportal. There is tight coupling between this inhibitor and its associated protease: the gene for the inhibitor Spi is located directly downstream from the gene for the streptococcal cysteine protease SpeB, and the sequence of the inhibitor is very similar to that of the SpeB propeptide. This is an example of an inhibitor molecule that is a structural homologue of the cognate propeptide, and is genetically linked to the protease gene .. +PF13735 tRNA nucleotidyltransferase domain 2 putative
+PF13737 Transposase DDE domain
Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis.. +PF13738 Pyridine nucleotide-disulphide oxidoreductase
+PF13739 Domain of unknown function (DUF4163)
The structure of this domain is and alpha-beta-two layer sandwich, identified from a Fervidobacterium nodosum Rt17-B1 like protein. The function is not known except that it is found in association with Heat-shock cognate 70kd protein 44kd ATPase, Pfam:PF11738.. +PF13740 ACT domain
ACT domains bind to amino acids and regulate associated enzyme domains.. +PF13741 Mitochondrial ribosomal protein S25
PfamB-B_2836 (release 25.0). This is the family of fungal 37S mitochondrial ribosomal S25 proteins.. +PF13742 OB-fold nucleic acid binding domain
This family contains OB-fold domains that bind to nucleic acids.. +PF13743 Thioredoxin
+PF13744 Helix-turn-helix domain
Members of this family contains a DNA-binding helix-turn-helix domain.. +PF13745 HxxPF-repeated domain
This family is found in non-ribosomal peptide synthetase proteins, and can occur up to twelve times.. +PF13746 4Fe-4S dicluster domain
This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich.. +PF13747 Domain of unknown function (DUF4164)
This is a family of short, approx 100 residue-long, bacterial proteins of unknown function. There is several conserved LE/LD sequence pairs.. +PF13748 ABC transporter transmembrane region
This family represents a unit of six transmembrane helices.. +PF13749 ATP-dependent DNA helicase recG C-terminal
This domain may well interact selectively and non-covalently with ATP, adenosine 5'-triphosphate, a universally important coenzyme and enzyme regulator.. +PF13750 Bacterial Ig-like domain (group 3)
This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins.. +PF13751 Transposase DDE domain
Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis.. +PF13752 Domain of unknown function (DUF4165)
+PF13753 Putative flagellar system-associated repeat
This family appears to be a repeated unit that can occur up to 29 times in these outer membrane proteins. It is putatively associated with a novel flagellar system.. +PF13754 Bacterial Ig-like domain (group 3)
This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins.. +PF13755 Sensor N-terminal transmembrane domain
Coggill P, Eberhardt R. This domain is found at the N-terminus of the sensor component of the two-component regulatory system. It includes a transmembrane region and part of the periplasmic region, which is likely to be involved in stimulus sensing .. +PF13756 Stimulus-sensing domain
Coggill P, Eberhardt R. This domain is found in the periplasmic region of the sensor component of the two-component regulatory system. The periplasmic region is likely to be involved in stimulus sensing .. +PF13757 Vault protein inter-alpha-trypsin domain
Inter-alpha-trypsin inhibitors (ITIs) consist of one light chain and a variable set of heavy chains. ITIs play a role in extracellular matrix (ECM) stabilisation and tumour metastasis as well as in plasma protease inhibition . The vault protein inter-alpha-trypsin (VIT) domain described here is found to the N-terminus of a von Willebrand factor type A domain (Pfam:PF00092) in ITI heavy chains (ITIHs) and their precursors.. +PF13758 Prefoldin subunit
This family includes prefoldin subunits that are not detected by Pfam:PF02996.. +PF13759 Putative 2OG-Fe(II) oxygenase
Coggill P, Eberhardt R. This family has structural similarity to the 2OG-Fe(II) oxygenase superfamily.. +PF13761 Domain of unknown function (DUF4166)
Coggill P, Eberhardt R. This domain is often found at the C-terminus of proteins containing Pfam:PF03435.. +PF13762 Mitochondrial splicing apparatus component
MNE1 is a novel component of the mitochondrial splicing apparatus responsible for the processing of a COX1 group I intron in yeast . Yeast cells lacking MNE1 are deficient in intron splicing in the gene encoding the Cox1 subunit of cytochrome oxidase but do contain wild-type levels of the bc1 complex.. +PF13763 Domain of unknown function (DUF4167)
+PF13764 E3 ubiquitin-protein ligase UBR4
This is a family of E£ ubiquitin ligase enzymes.. +PF13765 SPRY-associated domain
SPRY and PRY domains occur on PYRIN proteins. Their function is not known.. +PF13766 2-enoyl-CoA Hydratase C-terminal region
This is the C-terminal region of enoyl-CoA hydratase.. +PF13767 Domain of unknown function (DUF4168)
+PF13768 von Willebrand factor type A domain
+PF13769 Virulence factor
This domain is found in conserved virulence factors . It is often found in association with Pfam:PF02985 and Pfam:PF08712.. +PF13770 Domain of unknown function (DUF4169)
+PF13771 PHD-like zinc-binding domain
The members of this family are annotated as containing PHD domain, but the zinc-binding region here is not typical of PHD domains. The conformation here is a well-conserved cysteine-histidine rich region spanning 90 residues, where the Cys and His are arranged as HxxC(31)CxxC(6)CxxCxxxxCxxxxHxxC (21)CxxH.. +PF13772 AIG2-like family
This family is found in bacteria and metazoa.. +PF13773 Domain of unknown function (DUF4170)
+PF13774 Regulated-SNARE-like domain
Longin is one of the approximately 26 components required for transporting proteins from the ER to the plasma membrane, via the Golgi apparatus. It is necessary for the steps of the transfer from the ER to the Golgi complex . Longins are the only R-SNAREs that are common to all eukaryotes, and they are characterised by a conserved N-terminal domain with a profilin-like fold called a longin domain .. +PF13775 Domain of unknown function (DUF4171)
This short family is frequently found at the N-terminus of Homeobox proteins.. +PF13776 Domain of unknown function (DUF4172)
The family is often found in association with Pfam:PF02661.. +PF13777 Domain of unknown function (DUF4173)
This domain of unknown function contains multiple predicted transmembrane domains.. +PF13778 Domain of unknown function (DUF4174)
This domain of unknown function is found in a putative tumour suppressor gene and in a ligand for the the urokinase-type plasminogen activator receptor, which plays a role in cellular migration and adhesion [2,3].. +PF13779 Domain of unknown function (DUF4175)
+PF13780 Domain of unknown function (DUF4176)
+PF13781 DoxX-like family
This family of uncharacterised proteins are related to DoxX Pfam:PF07681.. +PF13782 Stage V sporulation protein AB
This family of proteins is required for sporulation .. +PF13783 Domain of unknown function (DUF4177)
+PF13784 Fic/DOC family N-terminal
This domain is found at the N-terminus of the Fic/DOC family, Pfam:PF02661.. +PF13785 Domain of unknown function (DUF4178)
+PF13786 Domain of unknown function (DUF4179)
+PF13787 Protein of unknown function with HXXEE motif
This domain contains an HXXEE motif, another conserved histidine and a YXPG motif. Its function is unknown.. +PF13788 Domain of unknown function (DUF4180)
+PF13789 Domain of unknown function (DUF4181)
+PF13790 Domain of unknown function (DUF4182)
This protein of unknown function contains a number of highly conserved cysteine residues, which may form disulphide bonds.. +PF13791 Sigma factor regulator C-terminal
This family is the C-terminal domain of a sigma factor regulator, this may represent a sensory domain .. +PF13792 Sulfate transporter N-terminal domain with GLY motif
This domain is found usually at the N-terminus of sulfate-transporter proteins. It carries a highly conserved GLY sequence motif, but the function of the domain is not known.. +PF13793 N-terminal domain of ribose phosphate pyrophosphokinase
This family is frequently found N-terminal to the Pribosyltran, Pfam:PF00156.. +PF13794 tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE)-like
+PF13795 HupE / UreJ protein
These proteins contain many conserved histidines that may be involved in nickel binding.. +PF13796 Putative sensor
This family is often found at the N-terminus of proteins containing Pfam:PF07730 and Pfam:PF02518. The N-termini of proteins containing these two domains often function in stimulus sensing.. +PF13797 Post-transcriptional regulator
This family includes post-transcriptional regulators .. +PF13798 Protein of unknown function with PCYCGC motif
This domain contains a PCYCGC motif and four other conserved cysteines. Its function is unknown.. +PF13799 Domain of unknown function (DUF4183)
This domain of unknown function contains a highly conserved ING motif.. +PF13800 Sigma factor regulator N-terminal
Coggill P, Eberhardt R. This domain is found near the N-terminus of a sigma factor regulator. The N-terminus is responsible for interaction with the sigma factor .. +PF13801 Heavy-metal resistance
This is a metal-binding protein which is involved in resistance to heavy-metal ions [1,2]. The protein forms a four-helix hooked hairpin, consisting of two long alpha helices each flanked by a shorter alpha helix. It binds a metal ion in a type-2 like centre . It contains two copies of an LTXXQ motif.. +PF13802 Galactose mutarotase-like
This family is found N-terminal to glycosyl-hydrolase domains, and appears to be similar to the galactose mutarotase superfamily.. +PF13803 Domain of unknown function (DUF4184)
This domain of unknown function contains several highly conserved histidines.. +PF13804 Retro-transcribing viruses envelope glycoprotein
This family comes from human endogenous retrovirus K envelope glycoproteins.. +PF13805 Eisosome component PIL1
In the budding yeast, S. cerevisiae, Pil1 and another cytoplasmic protein, Lsp1, together form large immobile assemblies at the plasma membrane that mark sites for endocytosis, called eisosomes. Endocytosis functions to recycle plasma membrane components, to regulate cell-surface expression of signalling receptors and to internalise nutrients in all eukaryotic cells.. +PF13806 Rieske-like [2Fe-2S] domain
+PF13807 G-rich domain on putative tyrosine kinase
This domain is found between two families, Wzz, Pfam:PF02706 and CbiA Pfam:PF01656. There is a highly conserved GNVR sequence motif which characterises this domain. The function is not known.. +PF13808 DDE_Tnp_1-associated
This domain is frequently found N-terminal to the transposase, IS family DDE_Tnp_1, Pfam:PF01609 and its relatives.. +PF13809 Tubulin like
Many of the residues conserved in Tubulin, Pfam:PF00091, are also highly conserved in this family.. +PF13810 Domain of unknown function (DUF4185)
+PF13811 Domain of unknown function (DUF4186)
+PF13812 Pentatricopeptide repeat domain
This family matches additional variants of the PPR repeat that were not captured by the model for Pfam:PF01535. The exact function is not known.. +PF13813 Membrane bound O-acyl transferase family
+PF13814 Replication-relaxation
This family includes proteins which are essential for plasmid replication and plasmid DNA relaxation .. +PF13815 Iguana/Dzip1-like DAZ-interacting protein N-terminal
The DAZ gene-product - Deleted in Azoospermia - and a closely related sequence are required early in germ-cell development in order to maintain germ-cell populations. This family is the N-terminal region that is the only part of the protein in some fungi and lower metazoa.. +PF13816 Haem-containing dehydratase
This family includes aldoxime dehydratase, EC:4.99.1.5. This is a haem-containing enzyme, which catalyses the dehydration of aldoximes to their corresponding nitrile . It also includes phenylacetaldoxime dehydratase, EC:4.99.1.7. This haem-containing enzyme catalyses the dehydration of Z-phenylacetaldoxime to phenylacetonitrile . The enzyme forms an elliptic beta barrel, composed of eight beta-strands, flanked by alpha-helices .. +PF13817 IS66 C-terminal element
+PF13820 Putative nucleic acid-binding region
This is a family of putative nucleic acid-binding proteins. Several members are annotated as being nuclear receptor coactivator 6 proteins but this could not be confirmed.. +PF13821 Domain of unknown function (DUF4187)
This family is found at the very C-terminus of proteins that carry a G-patch domain, Pfam:PF01585. The domain is short and cysteine-rich.. +PF13822 Acyl-CoA carboxylase epsilon subunit
This family includes the epsilon subunits of propionyl-CoA carboxylase, EC:6.4.1.3, and acetyl-CoA carboxylase, EC:6.4.1.2. These enzymes are involved in the biosynthesis of long-chain fatty acids. The epsilon subunit is necessary for an efficient interaction between the alpha and beta subunits of these enzymes .. +PF13823 Alcohol dehydrogenase GroES-associated
This short domain is frequently found at the N-terminus of the alcohol dehydrogenase GroES-like domain, Pfam: PF08240.. +PF13824 Zinc-finger of mitochondrial splicing suppressor 51
Mss51 regulates the expression of cytochrome oxidase, so this domain is probably DNA-binding.. +PF13825 Paramyxovirus structural protein V/P N-terminus
This family consists of several Paramyxoviridae structural protein P and V sequences . From a structural point of view, P is the best-characterised protein of the replicative complex. P is organised into two moieties that are functionally and structurally distinct: a C-terminal moiety (PCT) and an N-terminal moiety (PNT). PCT is the most conserved in sequence and contains all regions required for virus transcription, whereas PNT, which is poorly conserved, provides several additional functions required for replication . P protein plays a crucial role in the enzyme by positioning L onto the N/RNA template through an interaction with the C-terminal domain of N. Without P, L is not functional. The N, P, and L proteins of SeV and measles and mumps viruses are functionally equivalent. However, sequence identity between proteins from these viruses is limited, and the viruses have been placed in different genera (Respirovirus, Morbilivirus, and Rubulavirus, respectively). SeV P protein (568 aa) is a modular protein with distinct functional domains. The N-terminal part of P (PNT) is a chaperone for N and prevents it from binding to non-viral RNA in the infected cell .. +PF13826 Domain of unknown function (DUF4188)
+PF13827 Domain of unknown function (DUF4189)
This domain of unknown function contains six well-conserved cysteine residues.. +PF13828 Domain of unknown function (DUF4190)
This integral membrane domain is functionally uncharacterised. One of the membrane helices contains two GXXG motifs that are usually associated with dimerisation.. +PF13829 Domain of unknown function (DUF4191)
+PF13830 Domain of unknown function (DUF4192)
+PF13831 PHD-finger
PHD folds into an interleaved type of Zn-finger chelating 2 Zn ions in a similar manner to that of the RING and FYVE domains . Several PHD fingers have been identified as binding modules of methylated histone H3 .. +PF13832 PHD-zinc-finger like domain
+PF13833 EF_hand_6;
+PF13834 Domain of unknown function (DUF4193)
This domain of unknown function contains four conserved cysteines and a conserved histidine, including a CXXXXH motif.. +PF13835 Domain of unknown function (DUF4194)
+PF13836 Domain of unknown function (DUF4195)
This family is found at the N-terminus of metazoan proteins that carry PHD-like zinc-finger domains. The function is not known.. +PF13837 Myb/SANT-like DNA-binding domain
This presumed domain appears to be related to other Myb/SANT-like DNA binding domains. In particular Pfam:PF10545 seems most related. This family is greatly expanded in plants and appears in several proteins annotated as transposon proteins.. +PF13838 Clathrin-H-link
This short domain is found on clathrins, and often appears on proteins directly downstream from the Clathrin-link domain Pfam:PF09268.. +PF13839 GDSL/SGNH-like Acyl-Esterase family found in Pmr5 and Cas1p
N-terminal C rich potential sugar binding domain followed by the PC-Esterase domain .. +PF13840 ACT domain
The ACT domain is a structural motif of 70-90 amino acids that functions in the control of metabolism, solute transport and signal transduction. They are thus found in a variety of different proteins in a variety of different arrangements . In mammalian phenylalanine hydroxylase the domain forms no contacts but promotes an allosteric effect despite the apparent lack of ligand binding .. +PF13841 Beta defensin
The beta defensins are antimicrobial peptides implicated in the resistance of epithelial surfaces to microbial colonisation .. +PF13842 DDE_Tnp_1-like zinc-ribbon
This zinc-ribbon domain is frequently found at the C-terminal of proteins derived from transposable elements.. +PF13843 Transposase IS4
+PF13844 Glycosyl transferase family 41
This family of glycosyltransferases includes O-linked beta-N-acetylglucosamine (O-GlcNAc) transferase, an enzyme which catalyses the addition of O-GlcNAc to serine and threonine residues [1,2]. In addition to its function as an O-GlcNAc transferase, human OGT, Swiss:O15294, also appears to proteolytically cleave the epigenetic cell-cycle regulator HCF-1 .. +PF13845 Septum formation
This domain is found in a protein which is predicted to play a role in septum formation during cell division .. +PF13846 Domain of unknown function (DUF4196)
Pfam-B_104093 (release 24.0). This is a short region of ccdc82_homologues that is conserved from Schizo. pombe up to humans. The function is not known.. +PF13847 Methyltransferase domain
This family appears to be have methyltransferase activity.. +PF13848 Thioredoxin-like domain
+PF13850 Endoplasmic Reticulum-Golgi Intermediate Compartment (ERGIC)
This family is the N-terminal of ERGIC proteins , ER-Golgi intermediate compartment clusters, otherwise known as Ervs, and is associated with family COPIIcoated_ERV, Pfam:PF07970.. +PF13851 Growth-arrest specific micro-tubule binding
This family is the highly conserved central region of a number of metazoan proteins referred to as growth-arrest proteins. In mouse, Gas8 is predominantly a testicular protein, whose expression is developmentally regulated during puberty and spermatogenesis. In humans, it is absent in infertile males who lack the ability to generate gametes. The localisation of Gas8 in the motility apparatus of post-meiotic gametocytes and mature spermatozoa, together with the detection of Gas8 also in cilia at the apical surfaces of epithelial cells lining the pulmonary bronchi and Fallopian tubes suggests that the Gas8 protein may have a role in the functioning of motile cellular appendages . Gas8 is a microtubule-binding protein localised to regions of dynein regulation in mammalian cells.. +PF13852 Protein of unknown function (DUF4197)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 228 and 249 amino acids in length.. +PF13853 Olfactory receptor
The members of this family are transmembrane olfactory receptors.. +PF13854 Kelch motif
The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase for which a structure has been solved . The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415.. +PF13855 Leucine rich repeat
Jackhmer:JCS-Target417241. +PF13856 ATP-binding sugar transporter from pro-phage
Members of this short family are putative ATP-binding sugar transporter-like protein.. +PF13857 Ankyrin repeats (many copies)
+PF13858 Protein of unknown function (DUF4199)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 167 and 182 amino acids in length.. +PF13859 BNR repeat-like domain
This family of proteins contains BNR-like repeats suggesting these proteins may act as sialidases.. +PF13860 FlgD Ig-like domain
This domains has an immunoglobulin like beta sandwich fold. It is found in the FlgD protein the flagellar hook capping protein. THe structure for this domain shows that it is inserted within a TUDOR like beta barrel domain .. +PF13861 FlgD Tudor-like domain
This domain has a tudor domain-like beta barrel fold. It is found in the FlgD protein the flagellar hook capping protein. The structure for this domain shows that it contains a nested Ig-like domain within it . However in some firmicute proteins this inserted domain is absent such as Q67K21.. +PF13862 p21-C-terminal region-binding protein
This family of p21-binding proteins is important as a modulator of p21 activity. The domain binds the C-terminal region of p21 in a ternary complex with CDK2, which results in inhibition of the kinase activity of CDK2.. +PF13863 Domain of unknown function (DUF4200)
This family is found in eukaryotes. It is a coiled-coil domain of unknwon function.. +PF13864 Calmodulin-binding
+PF13865 C-terminal duplication domain of Friend of PRMT1
Fop, or Friend of Prmt1, proteins are conserved from fungi and plants to vertebrates. There is little that is actually conserved except for this C-terminal LDXXLDAYM region where X is any amino acid). The Fop proteins themselves are nuclear proteins localised to regions with low levels of DAPI, with a punctate/speckle-like distribution. Fop is a chromatin-associated protein and it colocalises with facultative heterochromatin. It is is critical for oestrogen-dependent gene activation .. +PF13866 SAP30 zinc-finger
SAP30 is a subunit of the histone deacetylase complex, and this domain is a zinc-finger. Solution of the structure shows a novel fold comprising two beta-strands and two alpha-helices with the zinc organising centre showing remote resemblance to the treble clef motif. In silico analysis of the structure revealed a highly conserved surface dominated by basic residues. NMR-based analysis of potential ligands for the SAP30 zn-finger motif indicated a strong preference for nucleic acid substrates. The zinc-finger of SAP3 probably functions as a double-stranded DNA-binding motif, thereby expanding the known functions of both SAP30 and the mammalian Sin3 co-repressor complex .. +PF13867 Sin3 binding region of histone deacetylase complex subunit SAP30
This C-terminal domain of the SAP30 proteins appears to be the binding region for Sin3.. +PF13868 Tumour suppressor, Mitostatin
Trichoplein or mitostatin, was first defined as a meiosis-specific nuclear structural protein. It has since been linked with mitochondrial movement. It is associated with the mitochondrial outer membrane, and over-expression leads to reduction in mitochondrial motility whereas lack of it enhances mitochondrial movement. The activity appears to be mediated through binding the mitochondria to the actin intermediate filaments (IFs) .. +PF13869 Nucleotide hydrolase
Nudix hydrolases are found in all classes of organism and hydrolyse a wide range of organic pyrophosphates, including nucleoside di- and triphosphates, di-nucleoside and diphospho-inositol polyphosphates, nucleotide sugars and RNA caps, with varying degrees of substrate specificity.. +PF13870 Domain of unknown function (DUF4201)
This is a family of coiled-coil proteins from eukaryotes. The function is not known.. +PF13871 Helicase_C-like
Strawberry notch proteins carry DExD/H-box groups and Helicase_C domains. These proteins promote the expression of diverse targets, potentially through interactions with transcriptional activator or repressor complexes .. +PF13872 P-loop containing NTP hydrolase pore-1
+PF13873 Myb/SANT-like DNA-binding domain
This presumed domain appears to be related to other Myb/SANT like DNA binding domains. This family is greatly expanded in arthropods and higher eukaryotes.. +PF13874 Nucleoporin complex subunit 54
This is the human Nup54 subunit of the nucleoporin complex, equivalent to Nup57 of yeast. Nup54, Nup58 and Nup62 all have similar affinities for importin-beta. It seems likely that they are the only FG-repeat nucleoporins of the central channel, and as such they would form a zone of equal affinity spanning the central channel. The diffusion of importin-beta import complexes through the central channel may be a stochastic process as the affinities are similar, whereas movement from cytoplasmic fibrils to the central channel and from the central channel to the nuclear basket would be facilitated by the subtle differences in affinity between them [1,2].. +PF13875 Domain of unknown function (DUF4202)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 187 and 205 amino acids in length. There are two conserved sequence motifs: LED and KMS. The function of these proteins is unknown, although many are incorrectly annotated as glutamyl tRNA synthetases.. +PF13876 Phage protein (N4 Gp49/phage Sf6 gene 66) family
This family of phage proteins is functionally uncharacterised. The family includes bacteriophage Sf6 gene 66 Swiss:Q716B1 as well as phage N4 GP49 protein Swiss:A0MZD7. Proteins in this family are typically between 87 and 154 amino acids in length. There is a conserved NGF sequence motif.. +PF13877 Potential Monad-binding region of RPAP3
This domain is found at the C-terminus of RNA-polymerase II-associated proteins. These proteins bind to Monad and are involved in regulating apoptosis.\. They contain TPR-repeats towards the N_terminus.. +PF13878 zinc-finger of acetyl-transferase ESCO
+PF13879 KIAA1430 homologue
This is a family of KIAA1430 homologues. The function is not known.. +PF13880 ESCO1/2 acetyl-transferase
+PF13881 Ubiquitin-2 like Rad60 SUMO-like
+PF13882 Bravo-like intracellular region
This is the very C-terminal intracellular region of neural adhesion molecule L1 proteins that are also known as Bravo or NrCAM. It lies upstream of the IG and Fn3 domains and has the highly conserved motif FIGEY. The function is not known.. +PF13883 Pyridoxamine 5'-phosphate oxidase
+PF13884 Chaperone of endosialidase
This is the very C-terminal, chaperone, domain of the bacteriophage protein endosialidase. It releases itself, via the serine-lysine dyad at the N-terminus, from the remainder of the end-tail-spike. Cleavage occurs after the threonine which is the final residue of the End-tail-spike family, Pfam:PF12219. The endosialidase protein forms homotrimeric molecules in bacteriophages . The catalytic dyad allows this portion of the molecule to be cleaved from the more N-terminal region such that the latter can fold and presumably bind to DNA.. +PF13885 Keratin, high sulfur B2 protein
+PF13886 Domain of unknown function (DUF4203)
This is the N-terminal region of 7tm proteins. The function is not known.. +PF13887 Myelin gene regulatory factor -C-terminal domain 1
This domain is found just downstream of Peptidase_S74, Pfam:PF13884. The function is not known.. +PF13888 Myelin gene regulatory factor C-terminal domain 2
This domain is found further downstream of Peptidase_S74, Pfam:PF13884, and MRF_C1, Pfam:PF13887. The function is not known.. +PF13889 DUF4204;
Chromosome segregation during meiosis. The proteins come from eukaryotes, plants and animals, and are necessary for chromosome segregation during meiosis.. +PF13890 Rab3 GTPase-activating protein catalytic subunit
This family is the probable catalytic subunit of the GTPase activating protein that has specificity for Rab3 subfamily (RAB3A, RAB3B, RAB3C and RAB3D). It is likely to convert active Rab3-GTP to the inactive form Rab3-GDP. Rab3 proteins are involved in regulated exocytosis of neurotransmitters and hormones. The Rab3 GTPase-activating complex is a heterodimer composed of RAB3GAP and RAB3-GAP150. This complex interacts with DMXL2 [1,2,3].. +PF13891 Potential DNA-binding domain
This domain is likely to be the DNA-binding domain of chromatin re-modelling proteins and helicases.. +PF13892 DNA-binding domain
DBINO is a DNA-binding domain found on global transcription activator SNF2L1 proteins and chromatin re-modelling proteins.. +PF13893 RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain)
JCSG:Target_421317_RF20609A. The RRM motif is probably diagnostic of an RNA binding protein. RRMs are found in a variety of RNA binding proteins, including various hnRNP proteins, proteins implicated in regulation of alternative splicing, and protein components of snRNPs. The motif also appears in a few single stranded DNA binding proteins.. +PF13894 C2H2-type zinc finger
This family contains a number of divergent C2H2 type zinc fingers.. +PF13895 Immunoglobulin domain
This domain contains immunoglobulin-like domains.. +PF13896 Glycosyl-transferase for dystroglycan
This glycosyl-transferase brings about the glycosylation of the alpha-dystroglycan subunit. Dystroglycan is an integral member of the skeletal muscular dystrophin glycoprotein complex, which links dystrophin to proteins in the extracellular matrix [1,2].. +PF13897 Golgi-dynamics membrane-trafficking
Sec14-like Golgi-trafficking domain The GOLD domain is always found combined with lipid- or membrane-association domains .. +PF13898 Domain of unknown function (DUF4205)
The proteins in this family are uncharacterised but often named FAM188B.. +PF13899 Thioredoxin-like
Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond.. +PF13900 Putative binding domain
This short domain is often found nested inside other longer domains. The function is not known, but the domain carries a highly conserved GVQW motif. The members are rich in proline and cysteine. This may be a binding domain.. +PF13901 Domain of unknown function (DUF4206)
This is a family of cysteine-rich proteins. Many members also carry a pleckstrin-homology domain, Pfam:PF00169. +PF13902 R3H-associated N-terminal domain
This family is found at the N-terminus of R3H, Pfam:PF01424, domain-containing proteins. The function is not known.. +PF13903 PMP-22/EMP/MP20/Claudin tight junction
Members of this family are claudins, that form tight junctions between cells.. +PF13904 Domain of unknown function (DUF4207)
This family is found in eukaryotes; it has several conserved tryptophan residues. The function is not known.. +PF13905 Thioredoxin-like
Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond.. +PF13906 C-terminus of AA_permease
This is the C-terminus of AA-permease enzymes that is not captured by the models Pfam:PF00324 and Pfam:PF13520.. +PF13907 Domain of unknown function (DUF4208)
This domain is found at the C-terminus of chromodomain-helicase-DNA-binding proteins. The exact function of the domain is undetermined.. +PF13908 Wnt and FGF inhibitory regulator
Shisa is a transcription factor-type molecule that physically interacts with immature forms of the Wnt receptor Frizzled and the FGF receptor within the endoplasmic reticulum to inhibit their post-translational maturation and trafficking to the cell surface.. +PF13909 C2H2-type zinc-finger domain
+PF13910 Domain of unknown function (DUF4209)
This short domain is found in bacteria and eukaryotes, though not in yeasts or Archaea. It carries a highly conserved RNxxxHG sequence motif.. +PF13911 AhpC/TSA antioxidant enzyme
This family contains proteins related to alkyl hydro-peroxide reductase (AhpC) and thiol specific antioxidant (TSA).. +PF13912 C2H2-type zinc finger
+PF13913 zinc-finger of a C2HC-type
This family contains a number of divergent C2H2 type zinc fingers.. +PF13914 Phostensin PP1-binding and SH3-binding region
Phostensin has been identified as a PP1 regulatory protein binding PP1 at the KISF motif. The domain also appears to carry an incomplete incomplete SH3-binding domain PxRxP further upstream. It is likely that Phostensin targets PP1 to the F-actin cytoskeleton . Phostensin binds to actin and decreases the elongation and depolymerisation rates of actin filament pointed ends .. +PF13915 Domain of unknown function (DUF4210)
This short domain is found in fungi, plants and animals, and the proteins appear to be necessary for chromosome segregation during meiosis.. +PF13916 PP1-regulatory protein, Phostensin N-terminal
Phostensin has been identified as a PP1 regulatory protein binding protein. This domain is N-terminal to the PP1- and SH3-binding regions though may carry an additional SH3-binding motif. It is likely that Phostensin targets PP1 to the F-actin cytoskeleton . Phostensin binds to actin and decreases the elongation and depolymerisation rates of actin filament pointed ends .. +PF13917 Zinc knuckle
The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. The motifs are mostly from retroviral gag proteins (nucleocapsid). Prototype structure is from HIV. Also contains members involved in eukaryotic gene regulation, such as C. elegans GLH-1. Structure is an 18-residue zinc finger.. +PF13918 PLD-like domain
+PF13919 Asx-hm;
Iyer LM, Aravind L, Godzik A, Coggill P. A conserved alpha helical domain with a characteristic LXXLL motif . The LXXLL motif is detected in diverse transcription factors, coactivators and corepressors and is implicated in mediating interactions between them . The ASXH domain is found in animals, fungi and plants and is predicted to play a role in mediating contact between transcription factors and chromatin-associated complexes. In Drosophila Asx and Human ASXL1, the ASXH domain is predicted to mediate interactions with the Calypso and BAP1 deubiquitinases (DUBs) which further belong to the UCHL5/UCH37 clade of DUBs .. +PF13920 Zinc finger, C3HC4 type (RING finger)
+PF13921 Myb-like DNA-binding domain
This family contains the DNA binding domains from Myb proteins, as well as the SANT domain family .. +PF13922 PHD domain of transcriptional enhancer, Asx
This is the DNA-binding domain on the additional sex combs-like 1 proteins. The Asx protein acts as an enhancer of trithorax and polycomb in displaying bidirectional homoeotic phenotypes in Drosophila, suggesting that it is required for maintenance of both activation and silencing of Hox genes. Asx is required for normal adult haematopoiesis and its function depends on its cellular context.. +PF13923 Zinc finger, C3HC4 type (RING finger)
+PF13924 Lipocalin-like domain
This family includes domains distantly related to lipocalins. However, they do contain the important GXW motif in the first strand. The protein in this family include aln5 Swiss:B6SEG2 which is involved in biosynthesis of alnumycin . The family also includes the ZFK protein from Trypanosoma brucei which is a protein kinase. This domain is at the C-terminus of that protein . The domain is also found as the C-terminal domain in StiJ a protein involved in producing stigmatellin. This domain has been assumed to catalyse a final cyclisation reaction .. +PF13925 con80 domain of Katanin
The con80 domain of katanin is the C-terminal region of the protein that binds to the N-terminal domain of katanin-p60, the catalytic ATPase. The complex associates with a specific subregion of the mitotic spindle leading to increased microtubule disassembly and targeting of p60 to the spindle poles . The assembly and function of the mitotic spindle requires the activity of a number of microtubule-binding proteins. Katanin, a heterodimeric microtubule-severing ATPase, is found localized at mitotic spindle poles. A proposed model is that katanin is targeted to spindle poles through a combination of direct microtubule binding by the p60 subunit and through interactions between the WD40 domain and an unknown protein .. +PF13926 Domain of unknown function (DUF4211)
+PF13927 Immunoglobulin domain
This family contains immunoglobulin-like domains.. +PF13928 Flocculin type 3 repeat
This repeat is found in the Flocculation protein FLO9 Swiss:P39712 close to its C-terminus.. +PF13929 mRNA stabilisation
This domain is an mRNA stabilisation factor .. +PF13930 DNA/RNA non-specific endonuclease
+PF13931 Kinesin-associated microtubule-binding
This domain binds to micotubules [1,2].. +PF13932 GidA associated domain 3
The GidA associated domain 3 is a motif that has been identified at the C-terminus of protein GidA. It consists of 4 helices, the last three being rather short and forming small bundle at the top end of the first longer one. It is here named helical domain 3 because in GidA it is preceded by two other C-terminal helical domain (based on crystal structures [1,2]). GidA is an tRNA modification enzyme found in bacteria and mitochondrial. Based on mutational analysis this domain has been suggested to be implicated in binding of the D-stem of tRNA and to be responsible for the interaction with protein MnmE . Structures of GidA in complex with either tRNA or MnmE are missing. Reported to bind to Pfam family MnmE, Pfam:PF12631.. +PF13933 Putative peptidase family
This family of putative peptidases are closely related to the M35 family Pfam:PF02102. In this family the metal binding HEXXH motif is replaced with HRXXH. The exact function of these proteins is unknown. Members of this family are found to be fungal allergens.. +PF13934 Nuclear pore complex assembly
ELYS (embryonic large molecule derived from yolk sac) is conserved from fungi such Aspergillus nidulans and Schizosaccharomyces pombe to human . It is important for the assembly of the nuclear pore complex .. +PF13935 Ead/Ea22-like protein
This family contains phage proteins and bacterial proteins that are likely to represent integrated phage proteins. This family includes the Lambda phage Ea22 early protein as well as the Bacteriophage P22 Ead protein.. +PF13936 Helix-turn-helix domain
This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding.. +PF13937 Domain of unknown function (DUF4212)
This family includes several putative integral membrane proteins.. +PF13938 Domain of unknown function (DUF4213)
This domain of unknown function has an enolase N-terminal domain-like fold. Its genomic context suggests that it may have a role in anaerobic vitamin B12 biosynthesis. This domain is often found at the N-terminus of proteins containing DUF364, Pfam:PF04016.. +PF13939 Toxin TisB, type I toxin-antitoxin system
TisB (toxicity-induced by SOS B) is an SOS-induced toxic peptide. It is a hydrophobic membrane-spanning protein which inhibits cell growth . Its expression is inhibited by the antisense RNA IstR-1, which acts as an antitoxin .. +PF13940 Toxin Ldr, type I toxin-antitoxin system
This family includes the Ldr (long direct repeat) toxins. In Escherichia coli there are four Ldr toxins, LdrA, LdrB, LdrC and LdrD. These toxins inhibit cell growth, decrease cell viability and cause nucleoid condensation. LdrD expression is inhibited by the antisense RNA RdlD, which functions as an antitoxin .. +PF13941 MutL protein
This small family includes, GlmL/MutL from Clostridium tetanomorphum and Clostridium cochlearium. GlmL is located between the genes for the two subunits, epsilon (GlmE) and sigma (GlmS), of the coenzyme-B12-dependent glutamate mutase (methylaspartate mutase), the first enzyme in a pathway of glutamate fermentation. Members shows significant sequence similarity to the hydantoinase branch of the hydantoinase/oxoprolinase family .. +PF13942 YfhG lipoprotein
This family includes the YfhG protein from E. coli Swiss:P0AD44. Members of this family have an N-terminal lipoprotein attachment site. The members of this family are functionally uncharacterized.. +PF13943 WPP domain
+PF13944 Lipocalin-like domain
Jackhmmer:JCSG_Target393211_GS13544C. +PF13945 Salt tolerance down-regulator
NST1 is a family of proteins that seem to be involved, directly or indirectly, in the salt sensitivity of some cellular functions in yeast. It does this without affecting sodium accumulation. It negatively affects salt-tolerance through an interaction with the splicing factor Msl1p. This interaction stresses the importance of efficient RNA processing under salt stress conditions .. +PF13946 Domain of unknown function (DUF4214)
Pfam-B_781 (release 24.0). This domain is found on a variety of different proteins including transferases, and allergen V5/Tpx-1 related proteins.. +PF13947 Wall-associated receptor kinase galacturonan-binding
This cysteine-rich GUB_WAK_bind domain is the extracellular part of this serine/threonine kinase that binds to the cell-wall pectins.. +PF13948 Domain of unknown function (DUF4215)
The function of this family is unknown.. +PF13949 ALIX V-shaped domain binding to HIV
The binding of the LYPxL motif of late HIV p6Gag and EIAV p9Gag to this domain is necessary for viral budding.This domain is generally central between an N-terminal Bro1 domain, Pfam:PF03097 and a C-terminal proline-rich domain. The retroviruses thus used this domain to hijack the ESCRT system of the cell.. +PF13950 UDP-glucose 4-epimerase C-term subunit
This domain is the very C-terminal subunit of UDP-glucose 4-epimerase.. +PF13952 Domain of unknown function (DUF4216)
This DUF is sometimes found at the C-terminal end of proteins carrying a Transposase_21 domain, Pfam:PF02992.. +PF13953 PapC C-terminal domain
The PapC C-terminal domain is a structural domain found at the C-terminus of the E. coli PapC protein. Pili are assembled using the chaperone usher system. In E.coli this is composed of the chaperone PapD and the usher PapC. This domain represents the C-terminal domain from PapC and its homologues. This domain has a beta-sandwich structure similar to the plug domain of PapC .. +PF13954 PapC N-terminal domain
The PapC N-terminal domain is a structural domain found at the N-terminus of the E. coli PapC protein. Pili are assembled using the chaperone usher system. In E.coli this is composed of the chaperone PapD and the usher PapC. This domain represents the N-terminal domain from PapC and its homologues. This domain is involved in substrate binding .. +PF13955 Toxin Fst, type I toxin-antitoxin system
Fst (faecalis plasmid stabilization toxin), also known as RNA I, is a toxic peptide. Its N-terminus forms a transmembrane alpha helix, its C terminus is disordered and is likely to be cytosolic. Its translation is inhibited by the antisense RNA, RNA II, which acts as an antitoxin [1,2].. +PF13956 Toxin Ibs, type I toxin-antitoxin system
The Ibs (induction brings stasis) proteins are a family of toxic peptides. Their expression is inhibited by the Sib antisense RNAs, which act as antitoxins .. +PF13957 Toxin YafO, type II toxin-antitoxin system
YafO is a toxin which inhibits protein synthesis. It acts as a ribosome-dependent mRNA interferase. It forms part of a type II toxin-antitoxin system, where the YafN protein acts as an antitoxin [1,2]. This domain forms complexes with yafN antitoxins containing Pfam:PF02604.. +PF13958 Toxin ToxN, type III toxin-antitoxin system
ToxN acts as a toxin, it is part of a type III toxin-antitoxin system. It acts as a ribosome independent endoribonuclease. It interacts with, and is inhibited by, the RNA antitoxin, ToxI [1,2]. Three ToxN monomers bind to three ToxI monomers to create a trimeric ToxN-ToxI complex .. +PF13959 Domain of unknown function (DUF4217)
This short domain is found at the C-terminus of many helicase proteins.. +PF13960 Domain of unknown function (DUF4218)
+PF13961 Domain of unknown function (DUF4219)
This domain is very short and is found at the N-terminal of many Gag-pol polyprotein and related proteins. There is a highly conserved YxxWxxxM sequence motif.. +PF13962 Domain of unknown function
The PGG domain is named for the highly conserved sequence motif found at the startt of the domain. The function is not known.. +PF13963 Transposase-associated domain
+PF13964 Kelch motif
+PF13965 dsRNA-gated channel SID-1
This is a family of proteins that are transmembrane dsRNA-gated channels. They passively transport dsRNA into cells and do not act as ATP-dependent pumps . They are required for systemic RNA interference [2,3].. +PF13966 zinc-binding in reverse transcriptase
This domain would appear to be a zinc-binding region of a putative reverse transcriptase.. +PF13967 Late exocytosis, associated with Golgi transport
This family represents the first three transmembrane regions of 11-TM proteins involved in vesicle transport. In S. cerevisiae these proteins are members of the yeast facilitator superfamily and are integral membrane proteins localised to the cell periphery, in particular to the bud-neck region. The distribution is consistent with a role in late exocytosis which is in agreement with the proteins' ability to substitute for the function of Sro7p, required for the sorting of the protein Enap1 into Golgi-derived vesicles destined for the cell surface .. +PF13968 Domain of unknown function (DUF4220)
This family is found in plants and is often associated with DUF294, Pfam:PF04578.. +PF13969 Pab87 octamerisation domain
This domain was first characterised as the C-terminal domain of Pab87 serine protease from Pyrococcus abyssi . The domain is reported to play a crucial role in Pab87 octamerisation and active site compartmentalisation. Its up-and-down 8-stranded beta-barrel 3D structure is reminiscent of the one found in lipocalins.. +PF13970 Domain of unknown function (DUF4221)
JCSG_target-390208:A6KZ57. This family of bacterial proteins contains highly conserved asparagine and cysteine residues.\. The function is not known.. +PF13971 MEI4-Rec24;
Meiosis-specific protein Mei4. Wood V, Coggill P, Eberhardt R. Pfam-B_78600 (release 24.0). This family of meiosis specific proteins is required for correct meiotic chromosome segregation and recombination . It is required for meiotic DNA double-strand break (DSB) formation .. +PF13972 Bacterial transcriptional repressor
This family of bacterial transcriptional repressors is characterised by the short approximately 50 amino acid stretch of residues constituting the helix-turn-helix DNA binding motif, around the YRFhY motif. The target proteins that are repressed are involved in the transcriptional control of multi-drug efflux pumps, pathways for the biosynthesis of antibiotics, response to osmotic stress and toxic chemicals, control of catabolic pathways, differentiation processes, and pathogenicity. The regulatory network in which TetR itself is involved is in being released in the presence of tetracycline, binding to the target operator, and repressing tetA transcription .. +PF13973 Domain of unknown function (DUF4222)
This short protein is likely to be of phage origin. For example it is found in the Swiss:B6DZ51 Enterobacteria phage YYZ-2008. It is largely found in enteric bacteria. The molecular function of this protein is unknown.. +PF13974 YebO-like protein
This short protein is uncharacterized. It seems likely to be of phage origin as it is found in Swiss:Q9MCU2 and Swiss:Q9MCS4. The protein is also found in a variety of enteric bacteria.. +PF13975 gag-polyprotein putative aspartyl protease
This family of putative aspartyl proteases is found pre-dominantly in retroviral proteins.. +PF13976 GAG-pre-integrase domain
This domain is found associated with retroviral insertion elements and lies just upstream of the integrase region on the polyproteins.. +PF13977 Bacterial transcriptional repressor
This family of bacterial transcriptional repressors is characterised by the short approximately 50 amino acid stretch of residues constituting the helix-turn-helix DNA binding motif, around the YRFhY motif. The target proteins that are repressed are involved in the transcriptional control of multi-drug efflux pumps, pathways for the biosynthesis of antibiotics, response to osmotic stress and toxic chemicals, control of catabolic pathways, differentiation processes, and pathogenicity . Another target protein is BetI, an osmoprotectant which controls the choline-glycine betaine pathway in E.coli .. +PF13978 Protein of unknown function (DUF4223)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. These proteins are likely to be lipoproteins (attachment site currently included in alignment).. +PF13979 SopA-like catalytic domain
This domain is found in the E. coli Type III secretion effector proteins SopA and NleL [1,2]. These proteins have been shown to act as E3 ubiquitin ligase enzymes. This domain contains the active site cysteine residue.. +PF13980 Uncharacterised protein family (UPF0370)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved DWP sequence motif.. +PF13981 SopA-like central domain
This domain is found in the E. coli Type III secretion effector proteins SopA and NleL [1,2]. These proteins have been shown to act as E3 ubiquitin ligase enzymes.. +PF13982 YbfN-like lipoprotein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. Members of this family are lipoproteins.. +PF13983 YsaB-like lipoprotein
This family of proteins is functionally uncharacterised. These proteins are related to E.coli YsaB Swiss:Q0TBP2. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. These proteins are lipoproteins.. +PF13984 MsyB protein
The MsyB protein has been found to be able to restore protein export defects caused by a temperature-sensitive secY or secA mutation . However, its exact molecular function is still unknown, but it may play a role in protein export. Proteins in this family are approximately 120 amino acids in length. This family of proteins is found in bacteria.. +PF13985 YbgS-like protein
This family of proteins is functionally uncharacterised. The family includes the YbgS protein from E. coli Swiss:P0AAV6. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. Some members of this family are annotated as homeobox protein, but this annotation cannot be verified.. +PF13986 Domain of unknown function (DUF4224)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 50 amino acids in length. The protein is likely to be of phage origin and is found as protein Gp02 Swiss:C8CLF5 in the Xylella phage Xfas53 .. +PF13987 YedD-like protein
This family of proteins related to the YedD protein is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. These proteins are lipoproteins.. +PF13988 Protein of unknown function (DUF4225)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 182 and 282 amino acids in length.. +PF13989 YejG-like protein
The YejG protein family is a group of functionally uncharacterised proteins related to Swiss:P0AD21. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length.. +PF13990 YjcZ-like protein
This family of proteins is functionally uncharacterised. The family includes the YjcZ protein from E. coli Swiss:P39267. This family of proteins is found in enteric bacteria. Proteins in this family are approximately 300 amino acids in length. There are two conserved sequence motifs: FGD and MPR.. +PF13991 BssS protein family
The BssS protein family is a group of proteins that are involved in regulation of biofilm formation . Proteins in this family are approximately 80 amino acids in length.. +PF13992 YecR-like lipoprotein
The YecR-like family of lipoproteins includes the YecR protein from E. coli Swiss:P76308. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 110 amino acids in length.. +PF13993 YccJ-like protein
The YccJ-like family of proteins includes the E. coli YccJ protein Swiss:P0AB14 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF13994 PgaD-like protein
This family includes the PgaD protein from E. coli Swiss:P69432. The homopolymer poly-beta-1,6-N-acetyl-D-glucosamine (beta-1,6-GlcNAc; PGA) serves as an adhesin for the maintenance of biofilm structural stability in eubacteria. The pgaABCD operon is required for its synthesis and export. It has been shown that PgaD is essential for this process .. +PF13995 YebF-like protein
The YebF-like protein family appears to be a group of colicin immunity proteins. As well as YebF the family includes cmi, the colicin M immunity protein . This domain family is found in bacteria, and is approximately 80 amino acids in length. The alignment contains two conserved cysteine residues that form a disulphide bond in the solved structure .. +PF13996 YobH-like protein
The YobH-like protein family includes the YobH protein from E. coli Swiss:Q2MB16 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There are two conserved sequence motifs: GYG and GLGL.. +PF13997 YqjK-like protein
The YqjK-like protein family includes the E. coli YqjK protein Swiss:Q47710 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a single completely conserved residue R that may be functionally important.. +PF13998 MgrB protein
The MgrB protein is a short lipoprotein. The mgrB gene has a mg2+ responsive promoter . Deletion of mgrB results in a potent increase in PhoP-regulated transcription . The PhoQ/PhoP signaling system responds to low magnesium and the presence of certain cationic antimicrobial peptides. Over-expression of mgrB decreased transcription at both high and low concentrations of magnesium. Localization and bacterial two-hybrid studies suggest that MgrB resides in the inner-membrane and interacts directly with PhoQ. This domain family is found in bacteria, and is approximately 40 amino acids in length. There are two conserved sequence motifs: CDQ and GIC.. +PF13999 MarB protein
The MarB protein is found in the multiple antibiotic resistance (mar) locus in Escherichia coli. The MarB protein is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved GSDKSD sequence motif.. +PF14000 DNA packaging protein FI
This family includes the lambda phage DNA-packaging protein FI. Proteins in this family are typically between 124 and 140 amino acids in length. There is a conserved EEE sequence motif.. +PF14001 YdfZ protein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved YDRNRN sequence motif. The E. coli protein has been shown to bind selenium .. +PF14002 YniB-like protein
The YniB-like protein family includes the E. coli YniB protein Swiss:P76208 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 180 amino acids in length. This family of proteins are integral membrane proteins.. +PF14003 YlbE-like protein
The YlbE-like protein family includes the B. subtilis protein YlbE Swiss:O34958 which is functionally uncharacterised. This family of cytosolic proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There is a conserved WYR sequence motif.. +PF14004 Protein of unknown function (DUF4227)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF14005 YpjP-like protein
The YpjP-like protein family includes the B. subtilis YpjP protein Swiss:P54172 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 200 amino acids in length.. +PF14006 YqzL-like protein
The YqzL-like protein family includes the B. subtilis YqzL protein Swiss:C0H452 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length.. +PF14007 YtpI-like protein
The YtpI-like protein family includes the B. subtilis YtpI protein Swiss:O34922 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 101 amino acids in length.. +PF14008 Iron/zinc purple acid phosphatase-like protein C
This domain is found at the C-terminus of Purple acid phosphatase proteins.. +PF14009 Domain of unknown function (DUF4228)
This domain is found in plants. The function is not known.. +PF14010 Phosphoenolpyruvate carboxylase
This family of phosphoenolpyruvate carboxylases is based on seqeunces not picked up by the model for PEPcase, PF00311. Most of the family members are from Archaea.. +PF14011 EspG family
This family of proteins contains the the EspG1, EspG2 and EspG3 proteins from M. tuberculosis. These proteins are involved in the ESAT-6 secretion system 1 (ESX-1) of Mycobacterium tuberculosis which is important for virulence and intercellular spread . Proteins in this family are typically between 254 and 295 amino acids in length.. +PF14012 Protein of unknown function (DUF4229)
This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 122 amino acids in length.. +PF14013 MT0933-like antitoxin protein
This family of proteins contains the MT0933 protein Swiss:O05901 which has been identified as an antitoxin to /protein MT0934 . This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 90 amino acids in length.. +PF14014 Protein of unknown function (DUF4230)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 203 and 228 amino acids in length.. +PF14015 Protein of unknown function (DUF4231)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 148 and 288 amino acids in length.. +PF14016 Protein of unknown function (DUF4232)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 177 and 242 amino acids in length. Many members of this family are lipoproteins.. +PF14017 Protein of unknown function (DUF4233)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 122 and 147 amino acids in length. Proteins in this family are integral membrane proteins.. +PF14018 Domain of unknown function (DUF4234)
This presumed integral membrane protein domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 70 amino acids in length.. +PF14019 Protein of unknown function (DUF4235)
This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 88 and 119 amino acids in length.. +PF14020 Protein of unknown function (DUF4236)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 69 and 402 amino acids in length.. +PF14021 Protein of unknown function (DUF4237)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 123 and 781 amino acids in length.. +PF14022 Protein of unknown function (DUF4238)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 274 and 374 amino acids in length.. +PF14023 Protein of unknown function (DUF4239)
Jackhmmer:Catenulispora acidiphila . This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 254 and 270 amino acids in length.. +PF14024 Protein of unknown function (DUF4240)
This presumed domain is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 169 and 263 amino acids in length. This domain is often associated with the WGR domain Pfam:PF05406.. +PF14025 Protein of unknown function (DUF4241)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 205 and 315 amino acids in length. There is a conserved GDG sequence motif at the C-terminus.. +PF14026 Protein of unknown function (DUF4242)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 90 and 170 amino acids in length. There is a single completely conserved residue C that may be functionally important.. +PF14027 Protein of unknown function (DUF4243)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 348 and 477 amino acids in length.. +PF14028 SpaB C-terminal domain
This presumed domain is found at the C-terminus of the SpaB protein Swiss:P39774. SpaB is involved in the synthesis of the lantibiotic subtilin.\. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 317 and 1029 amino acids in length. The family is often found in association with Pfam:PF04737, Pfam:PF04738. This domain is found in isolation in some proteins. This domain is also found in EpiB involved in epidermin biosynthesis.. +PF14029 Protein of unknown function (DUF4244)
Jackhmmer:Catenulispora acidiphila . This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 66 and 95 amino acids in length. There is a conserved EYA sequence motif.. +PF14030 Protein of unknown function (DUF4245)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 188 and 235 amino acids in length.. +PF14031 Putative serine dehydratase domain
This domain is found at the C-terminus of yeast D-serine dehydratase . Structures have been solved for two bacterial members of this family. The yeast protein has been shown to be a zinc dependant enzyme.. +PF14032 PknH-like extracellular domain
This domain is functionally uncharacterised. It is found as the periplasmic domain of the bacterial protein kinase PknH . The domain is also found in isolation in numerous proteins, for example the lipoproteins lpqQ, lprH, lppH and lpqA from M. tuberculosis. This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 268 amino acids in length. There are two completely conserved C residues that are likely to form a disulphide bond. A second pair of cysteines are less well conserved probably form a second disulphide bond. It seems likely that this domain functions to bind some as yet unknown ligand.. +PF14033 Protein of unknown function (DUF4246)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and fungi. Proteins in this family are typically between 392 and 644 amino acids in length.. +PF14034 Sporulation protein YtrH
This family of proteins is involved in sporulation. It may contribute to the formation and stability of the thick peptidoglycan layer between the two membranes of the spore, known as the cortex . In Bacillus subtilis its expression is regulated by sigma-E .. +PF14035 YlzJ-like protein
The YlzJ-like protein family includes the B. subtilis YlzJ protein Swiss:C0H413, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 72 amino acids in length. There are two completely conserved residues (L and G) that may be functionally important.. +PF14036 YlaH-like protein
The YlaH-like protein family includes the B. subtilis YlaH protein Swiss:O07632, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a conserved LGFA sequence motif.. +PF14037 YoqO-like protein
The YoqO-like protein family includes the B. subtilis YoqO protein Swiss:O31923, which is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 120 amino acids in length. There are two completely conserved residues (I and Y) that may be functionally important.. +PF14038 YqzE-like protein
The YqzE-like protein family includes the B. subtilis YqzE protein Swiss:O32020, which is functionally uncharacterised. It is a part of the ComG operon, which is regulated by the competence transcription factor ComK . This family of proteins is found in bacteria. Proteins in this family are typically between 49 and 66 amino acids in length.. +PF14039 YusW-like protein
The YusW-like protein family includes the B. subtilis YusW protein Swiss:O32189, which is functionally uncharacterised. This family of proteins is found in bacteria, and is approximately 90 amino acids in length.. +PF14040 Deoxyribonuclease NucA/NucB
Members of this family act as deoxyribonucleases .. +PF14041 LppP/LprE lipoprotein
The family includes putative lipoproteins LppP and LprE from species of Mycobacterium. LppP is required for optimal growth of M. tuberculosis .. +PF14042 Domain of unknown function (DUF4247)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 143 and 271 amino acids in length.. +PF14043 WVELL protein
This family includes the B. subtilis YfjH protein Swiss:O31578, which is functionally uncharacterised. This is not a homologue of E. coli YfjH, a synonym for IscX, which belongs to Pfam:PF04384. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length and contain a highly conserved WVELL motif.. +PF14044 NETI protein
This family includes the B. subtilis YebG protein Swiss:O34700, which is functionally uncharacterised. This is not a homologue of E. coli YebG, which belongs to Pfam:PF07130. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 66 amino acids in length and contain a conserved NETI motif.. +PF14045 YIEGIA protein
This family includes the B. subtilis YphB protein Swiss:P50742, which is functionally uncharacterised. Its expression is regulated by the sporulation transcription factor sigma-F, however it is not essential for sporulation or germination . This is not a homologue of E. coli YphB, which belongs to Pfam:PF01263.\. This family of proteins is found in bacteria. Proteins in this family are typically between 276 and 300 amino acids in length and contain a conserved YIEGIA motif.. +PF14046 Nuclear receptor repeat
Joint Center of Structural Genomics (JCSG). This is a repeat domain involved in dimerisation of nuclear receptors proteins and in transcriptional regulation in general. It contains a Leu-Xaa-Xaa-Leu-Leu motif which has been characterized for the orphan nuclear receptor Dax-1, which represses the constitutively expressed protein Ad4BP/SF-1. The LXXLL motif plays in important role in binding of Dax-1 to Ad4BP/SF-1 . The domain is subject to structure\. determination by the Joint Center of Structural Genomics.. +PF14047 Dppa2/4 conserved region
Joint Center of Structural Genomics (JCSG). This domain has been characterized in the finding of a developmental pluripotency associated gene (Dppa) in the lower vertebrate Xenopus laevis . Previous to this discovery, Dppa genes were known only in higher vertebrates. The domain is subject to structure determination by the Joint Center of Structural Genomics.. +PF14048 C-terminal domain of methyl-CpG binding protein 2 and 3
Joint Center of Structural Genomics (JCSG). CpG-methylation is a frequently occurring epigenetic modification of vertebrate genomes resulting in transcriptional repression. This domain was found at the C-terminus of the methyl-CpG-binding domain (MBD) containing proteins MBD2 and MBD3 , the latter was shown to not bind directly to methyl-CpG DNA but rather interact with components of the NuRD/Mi2 complex , an abundant deacetylase complex. The domain is subject to structure determination by the Joint Center of Structural Genomics.. +PF14049 Dppa2/4 conserved region in higher vertebrates
Joint Center of Structural Genomics (JCSG). Developmental pluripotency associated genes (Dppa) in lower vertebrates have remained undetected until the discovery of a Dppa homologue in Xenopus laevis , reporting a new domain termed Dppa2/4 conserved region (DCR). In higher vertebrate Dppa proteins the DCR domain is located next to the here-reported domain. The domain is subject to structure determination by the Joint Center of Structural Genomics.. +PF14050 N-terminal conserved domain of Nudc.
Joint Center of Structural Genomics (JCSG). The N-terminus of nuclear distribution gene C homolog (NUDC) proteins contains a highly conserved region consisting of a predicted three helix bundle. In the human homolog this segment has been targeted for structure determination by the Joint Center for Structural Genomics. NUDC forms a complex with other NUD proteins and is involved in several cellular division activities. Recently it was shown that NUDC regulates platelet-activating factor (PAF) acetylhydrolase with PAF being a pro-inflammatory secondary lipidic messenger .. +PF14051 N-terminal domain of DPF2/REQ.
Joint Center of Structural Genomics (JCSG). This putative domain has been detected on the human DPF2 protein and was subsequently targeted for structure determination by the Joint Center for Structural Genomics (JCSG). Possibly, the C-terminus extends by 30 amino acids and forms a separate domain. DPF2 interacts with estrogen related receptor alpha (Err-alpha), an orphan receptor which acts as a regulator in energy metabolism . It was also identified as an adaptor molecule that links nuclear factor kappa-light-chain-enhancer of activated B cells (NF-kappa-B) dimer RelB/p52 and switch/sucrose-nonfermentable (SWI/SNF) chromatin remodeling factor .. +PF14052 Capsule assembly protein Wzi
Many bacteria are covered in a layer of surface-associated polysaccharide called the capsule. These capsules can be divided into four groups depending upon the organisation of genes responsible for capsule assembly, the assembly pathway and regulation . This family plays a role in group 1 capsule biosynthesis. It is likely to be involved in the later stages of capsule assembly. It is likely to consist of a beta-barrel structure .. +PF14053 Domain of unknown function (DUF4248)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 86 amino acids in length.. +PF14054 Domain of unknown function (DUF4249)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 279 and 365 amino acids in length. There are two completely conserved residues (C and G) that may be functionally important.. +PF14055 NVEALA protein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 75 and 92 amino acids in length. There is a conserved NVEALA sequence motif.. +PF14056 Domain of unknown function (DUF4250)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There are two completely conserved residues (N and R) that may be functionally important.. +PF14057 GGGtGRT protein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 330 amino acids in length and contain many highly conserved residues including a GGGtGRT motif.. +PF14058 PcfK-like protein
The PcfK-like protein family includes the Enterococcus faecalis PcfK protein Swiss:Q82YK9 which is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 137 and 257 amino acids in length. There are two completely conserved residues (D and L) that may be functionally important.. +PF14059 Domain of unknown function (DUF4251)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 164 and 196 amino acids in length.. +PF14060 Domain of unknown function (DUF4252)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 182 amino acids in length.. +PF14061 Polycomb-like MTF2 factor 2
Joint Center of Structural Genomics (JCSG). Mammalian Polycomb-like gene MTF2/PCL2 forms a complex with Polycomb repressive complex-2 (PRC2) and collaborates with PRC1 to achieve repression of Hox gene expression . The human MTF2 gene is expressed in three splicing variants, each of them contains the short C-terminal domain defined here. The domain is subject to structure determination by the Joint Center of Structural Genomics.. +PF14062 Domain of unknown function (DUF4253)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length.. +PF14063 Protein of unknown function (DUF4254)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 195 and 207 amino acids in length.. +PF14064 HmuY protein
HmuY is a novel heme-binding protein that recruits heme from host carriers and delivers it to its cognate outer-membrane transporter, the TonB-dependent receptor HmuR.\. This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 278 amino acids in length.. +PF14065 Protein of unknown function (DUF4255)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 190 and 320 amino acids in length.. +PF14066 Protein of unknown function (DUF4256)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 190 amino acids in length.. +PF14067 LssY C-terminus
This domain is found at the C-terminus of Legionella LssY proteins, which may be a part of the type I secretion system . This domain is functionally uncharacterised. This domain is found in bacteria, and is typically between 182 and 195 amino acids in length. It is often found in association with Pfam:PF09335 and PF01569. There are two completely conserved residues (P and W) that may be functionally important.. +PF14068 Putative membrane protein
This family of bacterial proteins is functionally uncharacterised. Proteins in this family are approximately 100 amino acids in length. There is a conserved FGIGF sequence motif, and many members are putative membrane proteins.. +PF14069 Stage VI sporulation protein F
The sporulation-specific SpoVIF (YjcC) protein of Bacillus subtilis is essential for the development of heat-resistant spores. Its expression is governed by SigK [1,2].. +PF14070 Putative motility protein
This family of proteins is regulated in B. subtilis by SigD, and is likely to be involved in motility or flagellin production, Proteins in this family are approximately 60 amino acids in length, and contain two highly conserved asparagine residues.. +PF14071 Putative coat protein
This is a family of putative bacterial coat proteins. Proteins in this family are approximately 140 amino acids in length.. +PF14072 DNA-sulfur modification-associated
This is family of bacterial proteins likely to be necessary for binding to DNA and recognising the modification sites. Members are found in bacteria, archaea and on viral plasmids, and are typically between 354 and 474 amino acids in length. There is a conserved DGQHR sequence motif.. +PF14073 Centrosome localisation domain of Cep57
The CLD or centrosome localisation domain of Cep57 is found at the N-terminus, and lies approximately between residues 58 and 239. This region lies within the first alpha-helical coiled-coil segment of Cep57, and localises to the centrosome internally to gamma-tubulin, suggesting that it is either on both centrioles or on a centromatrix component. This N-terminal region can also multimerise with the N-terminus of other Cep57 molecules. The C-terminal part, Family Cep57_MT_bd, Pfam:PF06657, is the microtubule-binding region of Cep57.. +PF14074 Protein of unknown function (DUF4257)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length.. +PF14075 Ubinuclein conserved middle domain
Joint Center of Structural Genomics (JCSG). Ubinuclein 1 and 2 (UBN1, UBN2) are members of a histone chaperone complex involved in the formation of a certain type of facultative heterochromatin, called senescence-associated heterochromatin foci (SAHF) . The domain described here is conserved in many eukaryotes such as human, rat, drosophila, and zebra-fish and has been targeted for protein structure determination by the Joint Center for Structural Genomics.. +PF14076 Domain of unknown function (DUF4258)
Jackhmmer:Chitinophaga pinensis . This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 95 and 124 amino acids in length.. +PF14077 Alternative WD40 repeat motif
Joint Center of Structural Genomics (JCSG). WD repeats are short subdomains of about 40 amino acids and fold into 4 antiparallel beta hairpins. This domain here has been detected on the C-terminus of WD repeat-containing protein 18 during target selection by the Joint Center for Structural Genomics.. +PF14078 Domain of unknown function (DUF4259)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 118 and 145 amino acids in length.. +PF14079 Domain of unknown function (DUF4260)
This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 114 and 126 amino acids in length. There is a conserved GLK sequence motif.. +PF14080 Domain of unknown function (DUF4261)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 80 amino acids in length.. +PF14081 Domain of unknown function (DUF4262)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 147 and 227 amino acids in length. Swiss:Q50763 is incorrectly annotated as the KatG protein.. +PF14082 Domain of unknown function (DUF4263)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 244 and 403 amino acids in length.. +PF14083 PGDYG protein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. There is a conserved PGDYG motif.. +PF14084 Protein of unknown function (DUF4264)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF14085 Domain of unknown function (DUF4265)
Jackhmmer:Chitinophaga pinensis . This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 139 and 168 amino acids in length.. +PF14086 Domain of unknown function (DUF4266)
This presumed lipoprotein domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 50 amino acids in length.. +PF14087 Domain of unknown function (DUF4267)
This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 126 and 142 amino acids in length.. +PF14088 Domain of unknown function (DUF4268)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 151 and 387 amino acids in length.. +PF14089 KinB-signalling pathway activation in sporulation
This family of small proteins is found in the membrane and is necessary for kinase KinB signalling during sporulation. There is a conserved GFF sequence motif. The initiation of sporulation in Bacillus subtilis is dependent on the phosphorylation of the Spo0A transcription factor mediated by the phospho-relay and by two major kinases, KinA and KinB.. +PF14090 Helix-turn-helix domain
This helix-turn-helix domain is often found in phage proteins and is likely to be DNA-binding.. +PF14091 Domain of unknown function (DUF4269)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 176 and 187 amino acids in length. There is a conserved KTE sequence motif.. +PF14092 Domain of unknown function (DUF4270)
This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 444 and 534 amino acids in length.. +PF14093 Domain of unknown function (DUF4271)
This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 221 and 326 amino acids in length.. +PF14094 Domain of unknown function (DUF4272)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 221 and 399 amino acids in length.. +PF14096 Domain of unknown function (DUF4274)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 80 amino acids in length.. +PF14097 Stage V sporulation protein AE1
Members of this family are all described as putative stage V sporulation protein AE, although this could not be confirmed. Proteins in this family are approximately 190 amino acids in length.. +PF14098 Small, acid-soluble spore protein I
This family of proteins is putatively assigned as a small, acid-soluble spore protein 1. Proteins in this family are approximately 70 amino acids in length. There is a conserved LPGLGV sequence motif.. +PF14099 Polysaccharide lyase
This family includes heparin lyase I, EC:4.2.2.7.\. Heparin lyase I depolymerises heparin by cleaving the glycosidic linkage next to an iduronic acid moiety [1,2]. The structure of heparin lyase I consists of a beta-jelly roll domain with a long, deep substrate-binding groove and an unusual thumb domain containing many basic residues extending from the main body of the enzyme . This family also includes glucuronan lyase, EC:4.2.2.14 . The structure glucuronan lyase is a beta-jelly roll .. +PF14100 Methane oxygenase PmoA
This family is a putative methane oxygenase . +PF14101 Domain of unknown function (DUF4275)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length.. +PF14102 Capsule biosynthesis CapC
This family of proteins play a role in capsule biosynthesis. They are essential for gamma-polyglutamic acid (PGA) production .. +PF14103 Domain of unknown function (DUF4276)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 190 and 224 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF14104 Domain of unknown function (DUF4277)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 110 amino acids in length. There is a conserved NGLGF sequence motif.. +PF14105 Domain of unknown function (DUF4278)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 58 and 136 amino acids in length. There is a single completely conserved residue R that may be functionally important.. +PF14106 Domain of unknown function (DUF4279)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 134 and 145 amino acids in length.. +PF14107 Domain of unknown function (DUF4280)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 129 and 456 amino acids in length. There is a single completely conserved residue C that may be functionally important.. +PF14108 Domain of unknown function (DUF4281)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 147 and 232 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important.. +PF14109 GldH lipoprotein
Members of this protein family are predicted lipoproteins, exclusive to the Bacteroidetes phylum. Proteins in this family are typically between 155 and 167 amino acids in length. Members include GldH, a protein linked to a type of rapid surface gliding motility found in certain Bacteroidetes, such as Flavobacterium johnsoniae and Cytophaga hutchinsonii . Gliding motility appears closely linked to chitin utilization in the model species Flavobacterium johnsoniae. Not all Bacteroidetes with members of this protein family may have gliding motility.. +PF14110 Domain of unknown function (DUF4282)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 93 and 155 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF14111 Domain of unknown function (DUF4283)
This domain family is found in plants, and is approximately 100 amino acids in length. Considering the very diverse range of other domains it is associated with it is possible that this domain is a binding/guiding region. There are two highly conserved tryptophan residues.. +PF14112 Domain of unknown function (DUF4284)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 124 and 142 amino acids in length.. +PF14113 Domain of unknown function (DUF4285)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 157 and 206 amino acids in length.. +PF14114 Domain of unknown function (DUF4286)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 100 and 112 amino acids in length.. +PF14115 YuzL-like protein
The YuzL-like protein family includes the B. subtilis YuzL protein Swiss:C0H3R0 which is functionally uncharacterised. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length.. +PF14116 YyzF-like protein
The YyzF-like protein family includes the B. subtilis YyzF protein Swiss:C0H3T9 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF14117 Domain of unknown function (DUF4287)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 70 and 180 amino acids in length.. +PF14118 YfzA-like protein
The YfzA-like protein family includes the B. subtilis YfzA protein Swiss:C0H3X6 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length.. +PF14119 Domain of unknown function (DUF4288)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length.. +PF14120 YhzD-like protein
The YhzD-like protein family includes the B. subtilis YhzD protein Swiss:C0H3Y1 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved GKL sequence motif.. +PF14121 Domain of unknown function (DUF4289)
This family of membrane bet-barrel proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 655 and 722 amino acids in length. Swiss:Q2S343 is identified by Gene3D as a membrane bound beta-barrel.. +PF14122 YokU-like protein
The YokU-like protein family includes the B. subtilis YokU protein Swiss:C0H434 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two conserved CXXC sequence motifs.. +PF14123 Domain of unknown function (DUF4290)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 200 and 221 amino acids in length. There are two conserved sequence motifs: EYGR and KLWD.. +PF14124 Domain of unknown function (DUF4291)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 190 and 214 amino acids in length. There are two conserved sequence motifs: VYQAY and RMTW.. +PF14125 Domain of unknown function (DUF4292)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 243 and 287 amino acids in length.. +PF14126 Domain of unknown function (DUF4293)
This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 136 and 154 amino acids in length.. +PF14127 Domain of unknown function (DUF4294)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 192 and 226 amino acids in length.. +PF14128 Domain of unknown function (DUF4295)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are two completely conserved residues (K and Y) that may be functionally important.. +PF14129 Domain of unknown function (DUF4296)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length.. +PF14130 Domain of unknown function (DUF4297)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is typically between 207 and 221 amino acids in length.. +PF14131 Domain of unknown function (DUF4298)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 94 and 105 amino acids in length. There are two completely conserved residues (Y and D) that may be functionally important.. +PF14132 Domain of unknown function (DUF4299)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 275 and 313 amino acids in length. There are two conserved sequence motifs: RGF and DAY. There are two completely conserved residues (P and D) that may be functionally important.. +PF14133 Domain of unknown function (DUF4300)
This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 281 and 303 amino acids in length. There are two conserved sequence motifs: NCR and PYQ.. +PF14134 Domain of unknown function (DUF4301)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 505 and 516 amino acids in length.. +PF14135 Domain of unknown function (DUF4302)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 344 and 443 amino acids in length. There are two completely conserved residues (R and L) that may be functionally important.. +PF14136 Domain of unknown function (DUF4303)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 169 and 192 amino acids in length.. +PF14137 Domain of unknown function (DUF4304)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 223 amino acids in length.. +PF14138 Cytochrome c oxidase assembly protein COX16
This family represents homologues of COX16 which has been shown to be involved in assembly of cytochrome oxidase . Protein in this family are typically between 106 and 134 amino acids in length.. +PF14139 YpzG-like protein
The YpzG-like protein family includes the B. subtilis YpzG protein Swiss:C0H444 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a conserved QVNG sequence motif.. +PF14140 YpzI-like protein
The YpzI-like protein family includes the B. subtilis YpzI protein Swiss:C0H446 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length.. +PF14141 YqzM-like protein
The YqzM-like protein family includes the B. subtilis YqzM protein Swiss:C0H453 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length.. +PF14142 YrzO-like protein
The YrzO-like protein family includes the B. subtilis YrzO protein Swiss:C0H458 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length.. +PF14143 YrhC-like protein
The YrhC-like protein family includes the B. subtilis YrhC protein Swiss:O05395 which is functionally uncharacterised. YrhC is on the same operon as the MccA and MccB genes, which are involved in the conversion of methionine to cysteine. Expression of this operon is repressed in the presence of sulphate or cysteine . This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF14144 Seed dormancy control
This family of plant proteins appears to be a highly specific controller seed dormancy.. +PF14145 YrhK-like protein
The YrhK-like protein family includes the B. subtilis YrhK protein Swiss:O05401 which is functionally uncharacterised. Its expression is under the control of the motility sigma factor sigma-D . This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length.. +PF14146 Domain of unknown function (DUF4305)
This family includes the B. subtilis YdiK protein Swiss:O05524, which is functionally uncharacterised. This is not a homologue of E. coli YdiK, which belongs to Pfam:PF01594. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF14147 Sporulation protein YhaL
This family of proteins is involved in sporulation. In B. subtilis its expression is regulated by the early mother-cell-specific transcription factor sigma-E .. +PF14148 YhdB-like protein
The YhdB-like protein family includes the B. subtilis YhdB protein Swiss:O07530, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 57 and 82 amino acids in length. There are two conserved sequence motifs: LMVRT and FLHAY.. +PF14149 YhfH-like protein
The YhfH-like protein family includes the B. subtilis YhfH protein Swiss:O07606, which is functionally uncharacterised. Its expression is repressed by the Spx paralogue MgsR, which regulates genes involved in stress response . This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 53 amino acids in length.. +PF14150 YesK-like protein
The YesK-like protein family includes the B. subtilis YesK protein Swiss:O31514, which is functionally uncharacterised. Its expression is regulated by the sporulation-specific sigma factor sigma-E . This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length.. +PF14151 YfhD-like protein
The YfhD-like protein family includes the B. subtilis YfhD protein Swiss:O31572, which is functionally uncharacterised. Its expression is regulated by the sporulation-specific sigma factor sigma-F . This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF14152 YfhE-like protein
The YfhE-like protein family includes the B. subtilis YfhE protein Swiss:O31573, which is functionally uncharacterised. Its expression may be regulated by the sigma factor sigma-B, which regulates the expression of stress-response proteins . This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved QEV sequence motif.. +PF14153 Spore coat protein CotO
Bacillus spores are protected by a protein shell consisting of over 50 different polypeptides, known as the coat. This family of proteins has an important morphogenetic role in coat assembly, it is involved in the assembly of at least 5 different coat proteins including CotB, CotG, CotS, CotSA and CotW. It is likely to act at a late stage of coat assembly .. +PF14154 Domain of unknown function (DUF4306)
This family includes the B. subtilis YjdJ protein Swiss:O05524, which is functionally uncharacterised. This is not a homologue of E. coli YjdJ, which belongs to Pfam:PF00583. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 152 amino acids in length.. +PF14155 Domain of unknown function (DUF4307)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 132 and 153 amino acids in length. There is a single completely conserved residue C that may be functionally important.. +PF14156 Antirepressor AbbA
This family inactivates the repressor AbrB, which represses genes switched on during the transition from the exponential to the stationary phase of growth. It binds to AbrB and prevents it from binding to DNA .. +PF14157 YmzC-like protein
The YmzC-like protein family includes the B. subtilis YmzC protein Swiss:O31797, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 91 amino acids in length. There is a conserved ELR sequence motif.. +PF14158 YndJ-like protein
The YndJ-like protein family includes the B. subtilis YndJ protein Swiss:O31813, which is functionally uncharacterised. This family is found in bacteria and archaea, and is typically between 222 and 269 amino acids in length. There are two completely conserved G residues that may be functionally important.. +PF14159 DUF4308;
CAAD domains of cyanobacterial aminoacyl-tRNA synthetase. This domain is present in aminoacyl-tRNA synthetases (aaRSs), enzymes that couple tRNAs to their cognate amino acids . aaRSs from cyanobacteria containing the CAAD (for cyanobacterial aminoacyl-tRNA synthetases appended domain) protein domains are localised in the thylakoid membrane. The domain bears two putative transmembrane helices and is present in glutamyl-, isoleucyl-, leucyl-, and valyl-tRNA synthetases, the latter of which has probably recruited the domain more than once during evolution.. +PF14160 Centrosome-associated C terminus
This is the C-terminus of a family of proteins that colocalise with the centrosome/microtubule organisation centre in interphase and at the spindle poles in mitosis.. +PF14161 Centrosome-associated N terminus
This is the N-terminus of a family of proteins that colocalise with the centrosome/microtubule organisation centre in interphase and at the spindle poles in mitosis.. +PF14162 YozD-like protein
The YozD-like protein family includes the B. subtilis YozD protein Swiss:O31863, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF14163 Superinfection exclusion protein B
This family includes superinfection exclusion proteins. These proteins prevent the growth of superinfecting phage which are insensitive to repression. It aborts lytic development of superinfecting phage [1-3].. +PF14164 YqzH-like protein
The YqzH-like protein family includes the B. subtilis YqzH protein Swiss:O32014, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF14165 YtzH-like protein
The YtzH-like protein family includes the B. subtilis YtzH protein Swiss:O32066, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There is a conserved DIL sequence motif.. +PF14166 YueH-like protein
The YueH-like protein family includes the B. subtilis YueH protein Swiss:O32093, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF14167 YfkD-like protein
The YfkD-like protein family includes the B. subtilis YfkD protein Swiss:O34579, which is functionally uncharacterised. Its expression is regulated by the sigma factor sigma-B, which regulates the expression of stress-response proteins, and by the forespore-specific sigma factor sigma-G [1,2]. This family of proteins is found in bacteria. Proteins in this family are typically between 254 and 265 amino acids in length.. +PF14168 YjzC-like protein
The YjzC-like protein family includes the B. subtilis YjzC protein Swiss:O34585, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF14169 Cold-inducible protein YdjO
This family includes the B. subtilis YdjO protein Swiss:O34759, which is functionally uncharacterised. This is not a homologue of E. coli YdjO, Swiss:P76210. B. subtilis YdjO is cold-inducible . Its expression is induced by the extracytoplasmic function sigma factor sigma-W . This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length.. +PF14171 Toxin SpoIISA, type II toxin-antitoxin system
SpoIISA is a toxin which causes lysis of vegetatively growing cells. It forms part of a type II toxin-antitoxin system, where the SpoIISB protein, Pfam:PF14185, acts as an antitoxin. It is a transmembrane protein, with a cytoplasmic domain accounting for approximately two-thirds of the protein. The structure of the cytoplasmic domain resembles that of the GAF domains, Pfam: PF01590. SpoIISB binds to the cytoplasmic domain of SpoIISA with high affinity .. +PF14172 Domain of unknown function (DUF4309)
This family includes the B. subtilis YjgB protein Swiss:O34960, which is functionally uncharacterised. This is not a homologue of E. coli YjgB, Swiss: P27250. Expression of B. subtilis YjgB is regulated by the alternative transcription factor sigma-B . This family is found in bacteria, and is approximately 140 amino acids in length.. +PF14173 ComG operon protein 7
This family is required for DNA-binding during transformation of competent bacterial cells .. +PF14174 YycC-like protein
The YycC-like protein family includes the B. subtilis YycC protein Swiss:P37481, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a conserved HIL sequence motif.. +PF14175 YaaC-like Protein
The YaaC-like protein family includes the B. subtilis YaaC protein Swiss:P37526, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 320 and 333 amino acids in length.. +PF14176 YxiJ-like protein
The YxiJ-like protein family includes the B. subtilis YxiJ protein Swiss:P42320, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length.. +PF14177 YkyB-like protein
The YkyB-like protein family includes the B. subtilis YkyB protein Swiss:P42430, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. There are two conserved sequence motifs: NRHAKTA and HLG.. +PF14178 YppF-like protein
The YppF-like protein family includes the B. subtilis YppF protein Swiss:P50834, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved LLDF sequence motif.. +PF14179 YppG-like protein
The YppG-like protein family includes the B. subtilis YppG protein Swiss:P50835, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 115 and 181 amino acids in length. There are two completely conserved residues (F and G) that may be functionally important.. +PF14181 YqfQ-like protein
The YqfQ-like protein family includes the B. subtilis YqfQ protein Swiss:P54474, also known as VrrA, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 146 and 237 amino acids in length. There are two conserved sequence motifs: QYGP and PKLY.. +PF14182 YgaB-like protein
The YgaB-like protein family includes the B. subtilis YgaB protein Swiss:P71080, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length.. +PF14183 YwpF-like protein
The YwpF-like protein family includes the B. subtilis YwpF protein Swiss:P94588, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 146 and 167 amino acids in length. There is a conserved IIN sequence motif.. +PF14184 Regulatory protein YrvL
YrvL prevents expression and activity of the YrvI sigma factor. It may function as an anti-sigma factor [1,2]. +PF14185 Antitoxin SpoIISB, type II toxin-antitoxin system
Members of this family act as antitoxins. They bind to the SpoIISA toxin, Pfam:PF14171. They are disordered proteins which adopt structure only when bound to SpoIISA .. +PF14186 Cytoskeletal adhesion
This is the C-terminal domain of the axin-interacting protein family, and is a distinct version of the C2 domain. This domain is critical for interactions with cytoskeletal in the context of cellular adhesion points .. +PF14187 Domain of unknown function (DUF4310)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 231 amino acids in length.. +PF14188 Domain of unknown function (DUF4311)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 260 amino acids in length.. +PF14189 Domain of unknown function (DUF4312)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 99 and 118 amino acids in length.. +PF14190 Domain of unknown function (DUF4313)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 136 and 171 amino acids in length.. +PF14191 YodL-like
The YodL-like protein family includes the B. subtilis YodL protein Swiss:O30472, which is functionally uncharacterised. This domain family is found in bacteria, and is approximately 100 amino acids in length. There are two completely conserved residues (Y and D) that may be functionally important.. +PF14192 Domain of unknown function (DUF4314)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 56 and 93 amino acids in length.. +PF14193 Domain of unknown function (DUF4315)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length.. +PF14194 Cysteine-rich VLP
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. It contains 6 conserved cysteines and a conserved VLP sequence motif.. +PF14195 Domain of unknown function (DUF4316)
This domain is functionally uncharacterised. This domain is found in bacteria, and is typically between 56 and 95 amino acids in length.. +PF14196 L-2-amino-thiazoline-4-carboxylic acid hydrolase
This family of enzymes catalyses the conversion of L-2-amino-delta2-thiazoline-4-carboxylic acid (L-ATC) to N-carbamoyl-L-cysteine . It cleaves the carbon-sulphur bond in the ring structure of L-ATC to produce N-carbamoyl-L-cysteine .. +PF14197 Centrosome localisation domain of PPC89
The N-terminal region of the fission yeast spindle pole body protein PPC89 has low similarity to the human Cep57 protein. The CLD or centrosome localisation domain of Cep57 and PPC89 is found at the N-terminus. This region localises to the centrosome internally to gamma-tubulin, suggesting that it is either on both centrioles or on a centromatrix component. This N-terminal region can also multimerise with the N-terminus of other Cep57 molecules. The C-terminal part, Family Cep57_MT_bd, Pfam:PF06657, is the microtubule-binding region of Cep57 and PPC89.. +PF14198 Transposon-encoded protein TnpV
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 114 and 125 amino acids in length.. +PF14199 Domain of unknown function (DUF4317)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 225 and 451 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF14200 Ricin-type beta-trefoil lectin domain-like
+PF14201 Domain of unknown function (DUF4318)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There is a single completely conserved residue F that may be functionally important.. +PF14202 Transposon-encoded protein TnpW
This family of proteins is found in bacteria. Proteins in this family are typically between 54 and 75 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF14203 Domain of unknown function (DUF4319)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There are two completely conserved residues (E and K) that may be functionally important.. +PF14204 Ribosomal L18 C-terminal region
This domain is the C-terminal end of ribosomal L18/L5 proteins.. +PF14205 Cysteine-rich KTR
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 60 amino acids in length. There are 4 conserved cysteines and a conserved KTR sequence motif.. +PF14206 Cysteine-rich CPCC
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 68 and 104 amino acids in length. There are six conserved cysteines and a conserved CPCC sequence motif.. +PF14207 DpnD/PcfM-like protein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 57 and 153 amino acids in length. There are two completely conserved residues (E and A) that may be functionally important.. +PF14208 Domain of unknown function (DUF4320)
This family of proteins is found in bacteria. Proteins in this family are typically between 120 and 131 amino acids in length. There are two completely conserved residues (G and Y) that may be functionally important.. +PF14209 Domain of unknown function (DUF4321)
This family of proteins is functionally uncharacterised. It is found in bacteria, and is approximately 50 amino acids in length.. +PF14210 Domain of unknown function (DUF4322)
This presumed domain is functionally uncharacterised. This domain family is found in archaea, and is approximately 60 amino acids in length. There is a conserved QTV sequence motif.. +PF14213 Domain of unknown function (DUF4325)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 99 and 341 amino acids in length.. +PF14214 Helitron helicase-like domain at N-terminus
This family is found in Helitrons, recently recognised eukaryotic transposons that are predicted to amplify by a rolling-circle mechanism. In many instances a protein-coding gene is disrupted by their insertion.. +PF14215 bHLH-MYC and R2R3-MYB transcription factors N-terminal
This is the N-terminal region of a family of MYB and MYC transcription factors. The DNA-binding HLH domain is further downstream, Pfam:PF00010. Members of the MYB and MYC family regulate the biosynthesis of phenylpropanoids in several plant species (DOI:10.1007/s11295-009-0232-y).. +PF14216 Domain of unknown function (DUF4326)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 100 and 162 amino acids in length. There are two completely conserved residues (P and C) that may be functionally important.. +PF14217 Domain of unknown function (DUF4327)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length.. +PF14218 Circadian oscillating protein COP23
This family includes the circadian oscillating protein COP23 from Cyanothece sp. (strain PCC 8801), Swiss:Q54702. The levels of this peripheral membrane protein display a circadian oscillation .. +PF14219 Domain of unknown function (DUF4328)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 218 and 342 amino acids in length.. +PF14220 Domain of unknown function (DUF4329)
This domain is functionally uncharacterised. It is found in bacteria and eukaryotes, and is approximately 130 amino acids in length. It is often found in association with Pfam:PF05593 and Pfam:PF03527. There is a single completely conserved residue D and a highly conserved HTH motif which may be functionally important.. +PF14221 Domain of unknown function (DUF4330)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 165 and 177 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF14222 Cell morphogenesis N-terminal
This family is the conserved N-terminal region of proteins that are involved in cell morphogenesis.. +PF14223 gag-polypeptide of LTR copia-type
This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type.. +PF14224 Domain of unknown function (DUF4331)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 223 and 526 amino acids in length. There is a conserved FPY sequence motif.. +PF14225 Cell morphogenesis C-terminal
This family is the conserved C-terminal region of proteins that are involved in cell morphogenesis.. +PF14226 non-haem dioxygenase in morphine synthesis N-terminal
This is the highly conserved N-terminal region of proteins with 2-oxoglutarate/Fe(II)-dependent dioxygenase activity.. +PF14227 gag-polypeptide of LTR copia-type
This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type.. +PF14228 Cell morphogenesis central region
This family is the conserved central region of proteins that are involved in cell morphogenesis.. +PF14229 Domain of unknown function (DUF4332)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 134 and 356 amino acids in length. This domain contains helix-hairpin-helix motifs.. +PF14230 Domain of unknown function (DUF4333)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 140 and 255 amino acids in length. There are two completely conserved C residues that may be functionally important.. +PF14231 GXWXG protein
This domain is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. There is a conserved GXWXG motif. This domain is frequently found at the N-terminus of Pfam:PF14232.. +PF14232 Domain of unknown function (DUF4334)
This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. This domain is frequently found at the C-terminus of Pfam:PF14231.. +PF14233 Domain of unknown function (DUF4335)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 204 and 480 amino acids in length. There are two completely conserved residues (G and D) that may be functionally important.. +PF14234 Domain of unknown function (DUF4336)
+PF14235 Domain of unknown function (DUF4337)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 201 amino acids in length. There is a single completely conserved residue Q that may be functionally important.. +PF14236 Domain of unknown function (DUF4338)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 206 and 475 amino acids in length.. +PF14237 Domain of unknown function (DUF4339)
This domain is found in bacteria, archaea and eukaryotes, and is approximately 50 amino acids in length. There are two completely conserved residues (G and W) that may be functionally important.. +PF14238 Domain of unknown function (DUF4340)
This domain is found in bacteria, and is typically between 183 and 196 amino acids in length.. +PF14239 RRXRR protein
This domain is found in bacteria, eukaryotes and viruses, and is approximately 180 amino acids in length. It contains a conserved RRXRR motif. It is often found in association with Pfam:PF01844.. +PF14240 YHYH protein
This domain family is found in bacteria, eukaryotes and viruses, and is typically between 141 and 198 amino acids in length. There is a conserved YHYH sequence motif.. +PF14241 Domain of unknown function (DUF4341)
This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF04143. There are a number of conserved glycine residues that may be functionally important.. +PF14242 Domain of unknown function (DUF4342)
This family of proteins is found in bacteria. Proteins in this family are typically between 97 and 206 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF14243 Domain of unknown function (DUF4343)
This domain family is found in bacteria, eukaryotes and viruses, and is typically between 127 and 142 amino acids in length.. +PF14244 gag-polypeptide of LTR copia-type
This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type.. +PF14245 Type IV pilin PilA
Jackhmmer:Q59589, Jackhmmer:B0C6E0. This family consists of proteins which form type IV pili. In M. xanthus these pili are required for social motility [1,2].. +PF14246 AefR-like transcriptional repressor, C-terminal region
This family comprises the C-terminal domain of transcriptional regulators of the TetR family. It includes the AefR transcriptional regulator from P. syringae . It is found in association with Pfam:PF00440.. +PF14247 Domain of unknown function (DUF4344)
This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 247 and 291 amino acids in length. There is a conserved EED sequence motif.. +PF14248 Domain of unknown function (DUF4345)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 125 and 141 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF14249 Tocopherol cyclase
This family contains tocopherol cyclases. These enzymes are involved in the synthesis of tocopherols and tocotrienols (vitamin E) .. +PF14250 AbrB-like transcriptional regulator
This family of DNA-binding proteins is likely to act as a transcriptional regulator . This family does not include E.coli AbrB, Swiss:P75747, which belongs to Pfam:PF05145.. +PF14251 Domain of unknown function (DUF4346)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 127 and 502 amino acids in length. There are two conserved sequence motifs: LDP and DHA. Many members of this family have been annotated as dihydropteroate synthases, however no experimental evidence can be found for this and Swiss:Q57571 has been shown not to possess dihydropteroate synthase activity .. +PF14252 Domain of unknown function (DUF4347)
This domain family is found in bacteria and eukaryotes, and is approximately 160 amino acids in length. There are two completely conserved residues (C and G) that may be functionally important.. +PF14253 Bacteriophage abortive infection AbiH
This family of proteins confers resistance to bacteriophage .. +PF14254 Domain of unknown function (DUF4348)
+PF14255 Cysteine-rich CPXCG
This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There are 5 conserved cysteines which occur in a CPXCG motif and a DCXXCCXP motif.. +PF14256 YwiC-like protein
The YwiC-like protein family includes the B. subtilis YwiC protein Swiss:P46909, which is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF14257 Domain of unknown function (DUF4349)
This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 282 and 353 amino acids in length. There is a single completely conserved residue D that may be functionally important.. +PF14258 Domain of unknown function (DUF4350)
This domain family is found in bacteria, archaea and eukaryotes, and is approximately 70 amino acids in length.. +PF14259 RNA recognition motif (a.k.a. RRM, RBD, or RNP domain)
JCSG:Target_421663_WS20613B. +PF14260 C4-type zinc-finger of DNA polymerase delta
In fission yeast this zinc-finger domain appears is the region of Pol3 that binds directly to the B-subunit, Cdc1 . Pol delta is a hetero-tetrameric enzyme comprising four evolutionarily well-conserved proteins: the catalytic subunit Pol3 and three smaller subunits Cdc1, Cdc27 and Cdm1 .. +PF14261 Domain of unknown function (DUF4351)
This domain is found in bacteria, and is approximately 60 amino acids in length.. +PF14262 Domain of unknown function (DUF4353)
This family is found in bacteria and archaea, and is typically between 262 and 279 amino acids in length.. +PF14263 Domain of unknown function (DUF4354)
JCSG:Target416839_SP17692A. Several members of this family are annotated as being ATP/GTP-binding site motif A (P-loop) proteins, but this could not be confirmed. The one PDB:3NRF structure solved for this family exhibits an immunoglobin-like beta-sandwich fold. Crystal packing suggests that a tetramer is a significant oligomerisation state, and a disulfide bridge is formed between Cys 125 at the C-terminal end of the monomer, and Cys 69.. +PF14264 Glucosyl transferase GtrII
This family includes glucosyl transferase II from the Shigella phage SfII, Swiss:O21944, which mediates seroconversion of S. flexneri when the phage is integrated into the host chromosome .. +PF14265 Domain of unknown function (DUF4355)
This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 180 and 214 amino acids in length.. +PF14266 Domain of unknown function (DUF4356)
This family of proteins is found in bacteria. Proteins in this family are approximately 540 amino acids in length.. +PF14267 Domain of unknown function (DUF4357)
This domain family is found in bacteria and archaea, and is approximately 60 amino acids in length. There are two completely conserved residues (G and W) that may be functionally important.. +PF14268 YoaP-like
The YoaP-like domain is found at the C-terminus of the B. subtilis YoaP protein Swiss:O34983. It is found in bacteria and archaea, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00583. There is a single completely conserved residue A that may be functionally important.. +PF14269 Arylsulfotransferase (ASST)
JCSG:Target416597_Pfam-B_1234 (release 25.0). +PF14270 Domain of unknown function (DUF4358)
This domain family is found in bacteria, and is approximately 110 amino acids in length.. +PF14271 Domain of unknown function (DUF4359)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. There are two completely conserved residues (P and S) that may be functionally important.. +PF14272 Glycine-rich SFCGS
This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. There are a number of highly conserved motifs including an SFCGSGGAGA motif.. +PF14273 Domain of unknown function (DUF4360)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 200 and 228 amino acids in length. There is a conserved GCP sequence motif near the N-terminus.. +PF14274 Domain of unknown function (DUF4361)
JCSG:Target_416718_SP15308B. +PF14275 Domain of unknown function (DUF4362)
This family of proteins is found in bacteria. Proteins in this family are typically between 93 and 146 amino acids in length. There is a conserved IRIV sequence motif.. +PF14276 Domain of unknown function (DUF4363)
This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length.. +PF14277 Domain of unknown function (DUF4364)
This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 180 amino acids in length.. +PF14278 Transcriptional regulator C-terminal region
JCSG:Target403231_MJ9673J Pfam-B_17743 (release 25.0). This domain is a tetracycline repressor, domain 2, or C-terminus.. +PF14279 HNH endonuclease
This domain is related to other HNH domain families such as Pfam:PF01844. Suggesting that these proteins have a nucleic acid cleaving function.. +PF14280 Domain of unknown function (DUF4365)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 182 and 530 amino acids in length. There is a single completely conserved residue D that may be functionally important.. +PF14281 PD-(D/E)XK nuclease superfamily
Members of this family belong to the PD-(D/E)XK nuclease superfamily.. +PF14282 FlxA-like protein
This family includes FlxA from E. coli, Swiss:P77609. The expression of FlxA is regulated by the FliA sigma factor, a transcription factor specific for class 3 flagellar operons. However FlxA is not required for flagellar function or formation .. +PF14283 Domain of unknown function (DUF4366)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 227 and 387 amino acids in length.. +PF14284 PcfJ-like protein
The PcfJ-like protein family includes the E. faecalis PcfJ protein Swiss:Q5G3N2, which is functionally uncharacterised. It is found in bacteria and viruses, and is typically between 159 and 170 amino acids in length. There is a conserved HCV sequence motif.. +PF14285 Domain of unknown function (DUF4367)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 229 and 435 amino acids in length.. +PF14286 DHHW protein
This family of proteins is found in bacteria. Proteins in this family are typically between 366 and 404 amino acids in length. There is a conserved DHHW motif.. +PF14287 Domain of unknown function (DUF4368)
This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00239 and Pfam:PF07508. There is a single completely conserved residue G that may be functionally important.. +PF14288 1,3-beta-glucan synthase subunit FKS1, domain-1
The FKS1_dom1 domain is likely to be the 'Class I' region just N-terminal to the first set of transmembrane helices that is involved in 1,3-beta-glucan synthesis itself . This family is found on proteins with family Glucan_synthase, Pfam:PF02364.. +PF14289 Domain of unknown function (DUF4369)
This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00578.. +PF14290 Domain of unknown function (DUF4370)
+PF14291 Domain of unknown function (DUF4371)
+PF14292 SusE outer membrane protein
This family includes the SusE outer membrane protein from Bacteroides thetaiotaomicron, Swiss:Q45769. This protein has a role in starch utilisation, but is not essential for growth on starch .. +PF14293 YWFCY protein
This family is found in bacteria, and is approximately 60 amino acids in length. There is a conserved YWFCY motif. It is often found in association with Pfam:PF02534.. +PF14294 Domain of unknown function (DUF4372)
This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF01609. There is a single completely conserved residue G that may be functionally important.. +PF14295 PAN domain
+PF14296 O-antigen polysaccharide polymerase Wzy
This family includes O-antigen polysaccharide polymerases . These enzymes link O-units via a glycosidic linkage to form a long O-antigen . These enzymes vary in specificity and sequence .. +PF14297 Domain of unknown function (DUF4373)
This domain is found in bacteria, eukaryotes and viruses, and is approximately 90 amino acids in length.. +PF14298 Domain of unknown function (DUF4374)
This family of proteins is found in bacteria. Proteins in this family are typically between 406 and 466 amino acids in length.. +PF14299 Phloem protein 2
Phloem protein 2 (PP2) is one of the most abundant and enigmatic proteins in the phloem sap. PP2 is translocated in the assimilate stream where its lectin activity or RNA-binding properties can exert effects over long distances .. +PF14300 Domain of unknown function (DUF4375)
This family of proteins is found in bacteria. Proteins in this family are typically between 156 and 204 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF14301 Domain of unknown function (DUF4376)
This domain family is found in bacteria and viruses, and is approximately 110 amino acids in length.. +PF14302 Domain of unknown function (DUF4377)
This domain family is found in bacteria and archaea, and is approximately 80 amino acids in length.. +PF14303 No apical meristem-associated C-terminal domain
This domain is found in a number of different types of plant proteins including NAM-like proteins.. +PF14304 Transcription termination and cleavage factor C-terminal
The C-terminal section of CSTF proteins is a discreet structure is crucial for mRNA 3'-end processing. This domain interacts with Pcf11 and possibly PC4, thus linking CstF2 to transcription, transcriptional termination, and cell growth.. +PF14305 Glycos_tran_Wfd;
Eberhardt R, Iyer LM, Abhiman S, Burroughs AM, Aravind L. A member of the ATP-grasp fold predicted to be involved in the biosynthesis of cell surface polysaccharides such as the O-antigen in proteobacteria, the capsule in firmicutes and the polyglutamate chain of teichuronopeptide . . +PF14306 PUA-like domain
This PUA like domain is found at the N-terminus of ATP-sulfurylase enzymes.. +PF14307 Glycosyltransferase WbsX
Members of this family are found in within O-antigen biosynthesis clusters in Gram negative bacteria, where they are predicted to function as glycosyltransferases [1,2].. +PF14308 X-domain of DnaJ-containing
IN certain plant and yeast proteins, the DnaJ-1 proteins have a three-domain structure. The x-domain lies between the N-terminal DnaJ and the C-terminal Z domains. The exact function is not known.. +PF14309 Domain of unknown function (DUF4378)
+PF14310 Fibronectin type III-like domain
This domain has a fibronectin type III-like structure . It is often found in association with Pfam:PF00933 and Pfam:PF01915. Its function is unknown.. +PF14311 Domain of unknown function (DUF4379)
This domain is found in bacteria, eukaryotes and viruses, and is approximately 60 amino acids in length. It contains a CXXCXH motif and a CPXC motif.. +PF14312 FG-GAP repeat
+PF14313 N-terminal region of Paramyxovirinae phosphoprotein (P)
The soyouz module moiety is the N-terminal region of the phosphoprotein (P) from the subfamily Paramyxovirinae of the family Paramyxoviridae viruses. The main genera in this subfamily include the Rubulaviruses, avulaviruses, respiroviruses, henipaviruses, and morbilliviruses, all of which are enveloped viruses with a non-segmented, negative, single-stranded RNA genome encapsidated by the nucleoprotein (N) within a helical nucleocapsid.. +PF14314 Virus-capping methyltransferase
This is the methyltransferase region of the Mononegavirales single-stranded RNA viral RNA polymerase enzymes. This region is involved in the mRNA-capping of the virion particles.. +PF14315 Domain of unknown function (DUF4380)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 288 and 372 amino acids in length. There are two completely conserved residues (G and E) that may be functionally important.. +PF14316 Domain of unknown function (DUF4381)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 158 and 180 amino acids in length.. +PF14317 YcxB-like protein
The YcxB-like protein family includes the B. subtilis YcxB protein Swiss:Q08793, which is a functionally uncharacterised transmembrane protein. This family of proteins is found in bacteria, and is approximately 60 amino acids in length.. +PF14318 Mononegavirales mRNA-capping region V
This V domain of L RNA-polymerase carries a new motif, GxxTx(n)HR, that is essential for mRNA cap formation. Nonsegmented negative-sense (NNS) RNA viruses, Mononegavirales, cap their mRNA by an unconventional mechanism. Specifically, 5'-monophosphate mRNA is transferred to GDP derived from GTP through a reaction that involves a covalent intermediate between the large polymerase protein L and mRNA. The V region is essential for this process .. +PF14319 Transposase zinc-binding domain
This domain is likely to be a zinc-binding domain. It is found at the N-terminus of transposases belonging to the IS91 family.. +PF14320 Phosphoprotein P region PCT disordered
The N-terminal half of the phosphoprotein P of the Paramyxovirinae viruses. The very first 60 residues have been built as the family Soyouz-module, Pfam:PF14313. The remaining part of the region, here, is disordered, and is liable to induced folding under the right physiological conditions. The region undergoes an unstructured-to-structured transition upon binding to Measles virus tail, C, unstructured region.. +PF14321 Domain of unknown function (DUF4382)
This family is found in bacteria and archaea, and is typically between 142 and 161 amino acids in length.. +PF14322 Starch-binding associating with outer membrane
JCSG:Target390309 Jackhmmer:Q8A1B4. SusD is a secreted starch-binding protein with an N-terminal lipid tail that allows it to associate with the outer membrane.. +PF14323 GxGYxY sequence motif in domain of unknown function
This family carries a characteristic sequence motif, GxGYxYP, but is of unknown function. Associated families are sugar-processing domains.. +PF14324 PINIT domain
The PINIT domain is a protein domain that is found in PIAS proteins . The PINIT domain is about 180 amino acids in length.. +PF14325 Domain of unknown function (DUF4383)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 137 and 164 amino acids in length.. +PF14326 Domain of unknown function (DUF4384)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 80 amino acids in length.. +PF14327 Hinge domain of cleavage stimulation factor subunit 2
The hinge domain of cleavage stimulation factor subunit 2 proteins, CSTF2, is necessary for binding to the subunit CstF-77 within the polyadenylation complex and subsequent nuclear localisation. This suggests that nuclear import of a pre-formed CSTF complex is an essential step in polyadenylation. Accurate and efficient polyadenylation is essential for transcriptional termination, nuclear export, translation, and stability of eukaryotic mRNAs. CSTF2 is an important regulatory subunit of the polyadenylation complex.. +PF14328 Domain of unknown function (DUF4385)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 149 and 163 amino acids in length.. +PF14329 Domain of unknown function (DUF4386)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 214 and 245 amino acids in length.. +PF14330 Domain of unknown function (DUF4387)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are approximately 110 amino acids in length. There is a conserved RSKN sequence motif.. +PF14331 ImcF-related N-terminal domain
This domain is found in bacterial ImcF (intracellular multiplication and human macrophage-killing) proteins. It is found to the N-terminus of the ImcF-related domain, Pfam:PF06761.. +PF14332 Domain of unknown function (DUF4388)
This domain family is found in bacteria, and is typically between 102 and 135 amino acids in length.. +PF14333 Domain of unknown function (DUF4389)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 104 and 223 amino acids in length. There is a single completely conserved residue R that may be functionally important.. +PF14334 Domain of unknown function (DUF4390)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 192 and 203 amino acids in length.. +PF14335 Domain of unknown function (DUF4391)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 220 and 257 amino acids in length.. +PF14336 Domain of unknown function (DUF4392)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 282 and 585 amino acids in length. There are two completely conserved G residues that may be functionally important.. +PF14337 Domain of unknown function (DUF4393)
This family of proteins is found in bacteria, archaea and viruses. Proteins in this family are typically between 254 and 285 amino acids in length.. +PF14338 Mrr N-terminal domain
This domain is found at the N-terminus of the Mrr restriction endonuclease catalytic domain, Pfam:PF04471 [1,2]. Fold recognition analysis predicts that it is a diverged member of the winged helix variant of helix turn helix proteins. It may play a role in DNA sequence recognition .. +PF14339 Domain of unknown function (DUF4394)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 262 and 476 amino acids in length.. +PF14340 Domain of unknown function (DUF4395)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 142 and 168 amino acids in length. There are two completely conserved C residues that may be functionally important.. +PF14341 PilX N-terminal
This domain is found at the N-terminus of the PilX prepilin-like proteins which are involved in type 4 fimbrial biogenesis .. +PF14342 Domain of unknown function (DUF4396)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 167 and 310 amino acids in length.. +PF14343 PrcB C-terminal
This domain is found at the C-terminus of Treponema denticola PrcB, Swiss:B8YNY4. PrcB interacts with the PrtP protease (dentilisin) and is required for the stability of the protease complex .. +PF14344 Domain of unknown function (DUF4397)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 120 amino acids in length.. +PF14345 GDYXXLXY protein
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 171 and 199 amino acids in length. It contains a conserved GDYXXLXY motif.. +PF14346 Domain of unknown function (DUF4398)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 127 and 269 amino acids in length.. +PF14347 Domain of unknown function (DUF4399)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 135 and 1079 amino acids in length.. +PF14348 Domain of unknown function (DUF4400)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 209 and 249 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF14349 Motility related/secretion protein
This domain is found repeated three times in the N-terminal half of the gliding motility-related SprA proteins. The role of this domain in motility is uncertain . It is also found in proteins required for secretion .. +PF14350 Beta protein
This family includes the beta protein from Bacteriophage T4, Swiss:P13057. Beta protein prevents the gop protein, Swiss:P13058, from killing the bacterial host cell .. +PF14351 Domain of unknown function (DUF4401)
This family of proteins is found in bacteria. Proteins in this family are typically between 357 and 735 amino acids in length. The family is found in association with Pfam:PF09925. There is a single completely conserved residue K that may be functionally important.. +PF14352 Domain of unknown function (DUF4402)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 155 and 182 amino acids in length.. +PF14353 CpXC protein
This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea, and is typically between 122 and 134 amino acids in length. It contains four conserved cysteines forming two CpXC motifs.. +PF14354 Restriction alleviation protein Lar
This family includes the restriction alleviation protein Lar encoded by the Rac prophage of Escherichia coli, Swiss:P33229. This protein modulates the activity of the Escherichia coli restriction and modification system .. +PF14355 Abortive infection C-terminus
This domain is found at the C-terminus of the Lactococcus lactis abortive infection protein Abi-859, Swiss:Q48620. This protein confers bacteriophage resistance .. +PF14356 Domain of unknown function (DUF4403)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 455 and 518 amino acids in length. There is a single completely conserved residue W that may be functionally important.. +PF14357 Domain of unknown function (DUF4404)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two completely conserved residues (P and G) that may be functionally important.. +PF14358 Domain of unknown function (DUF4405)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. There are two conserved histidines that may be functionally important. This family is N-terminally truncated compared to other members of the clan.. +PF14359 Domain of unknown function (DUF4406)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 98 and 145 amino acids in length.. +PF14360 PAP2 superfamily C-terminal
This family is closely related to the C-terminal a region of PAP2.. +PF14361 RsbT co-antagonist protein rsbRD N-terminal domain
This domain is found at the N-terminus of a number of anti-sigma-factor antagonist proteins including B. subtilis RsbRD, Swiss:P54504. These proteins are negative regulators of the general stress transcription factor sigma(B) . It is found in association with Pfam:PF01740.. +PF14362 Domain of unknown function (DUF4407)
This family of proteins is found in bacteria. Proteins in this family are typically between 366 and 597 amino acids in length. There is a single completely conserved residue R that may be functionally important.. +PF14363 Domain associated at C-terminal with AAA
This domain is found in association with the AAA family, Pfam:PF00004.. +PF14364 Domain of unknown function (DUF4408)
This domain is found at the N-terminus of member of the DUF761 family Pfam:PF05553. Many members are plant proteins.. +PF14365 Domain of unknown function (DUF4409)
+PF14366 Domain of unknown function (DUF4410)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 238 amino acids in length.. +PF14367 Domain of unknown function (DUF4411)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 153 and 170 amino acids in length. There is a single completely conserved residue D that may be functionally important.. +PF14368 Probable lipid transfer
The members of this family are probably involved in lipid transfer. The family has several highly conserved cysteines, paired in various ways.. +PF14369 zinc-finger
+PF14370 C-terminal topoisomerase domain
This domain is found at the C-terminal of topoisomerase and other similar enzymes.. +PF14371 Domain of unknown function (DUF4412)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is typically between 75 and 104 amino acids in length.. +PF14372 Domain of unknown function (DUF4413)
This domain is part of an RNase-H fold section of longer proteins some of which are transposable elements possibly of the Pong type, since some members are putative Tam3 transposases.. +PF14373 Superinfection immunity protein
This family includes the E. coli bacteriophage T4 superinfection immunity (imm) protein, Swiss:P08986. When E. coli is sequentially infected with two T-even type bacteriophage the DNA of the superinfecting phage is excluded from the host, into the periplasmic space. The immunity protein plays a role in this process .. +PF14374 60S ribosomal protein L4 C-terminal domain
This family is found at the very C-terminal of 60 ribosomal L4 proteins.. +PF14375 Cysteine-rich CWC
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 74 and 102 amino acids in length. It contains eight conserved cysteines, including a conserved CWC sequence motif.. +PF14376 Haem-binding domain
This domain contains a potential haem-binding motif, CXXCH . This family is found in association with Pfam:PF00034 and Pfam:PF03150.. +PF14377 Domain of unknown function (DUF4414)
This family is frequently found on DNA binding proteins of the URE-B1 type and on ligases.. +PF14378 PAP2 superfamily
+PF14379 MYB-CC type transfactor, LHEQLE motif
This family is found towards the C-terminus of Myb-CC type transcription factors, and carries a highly conserved LHEQLE sequence motif.. +PF14380 Wall-associated receptor kinase C-terminal
This WAK_assoc domain is cysteine-rich and lies C-terminal to the binding domain, GUB_WAK_bind, Pfam:PF13947.. +PF14381 Ethylene-responsive protein kinase Le-CTR1
EDR1 regulates disease resistance and ethylene-induced senescence, and is also involved in stress response signalling and cell death regulation .. +PF14382 ECR11_N;
Exosome complex exonuclease RRP4 N-terminal region. ECR1_N is an N-terminal region of the exosome complex exonuclease RRP proteins. It is a G-rich domain which structurally is a rudimentary single hybrid fold with a permuted topology.. +PF14383 DUF761-associated sequence motif
This family is found frequently at the N-terminus of family DUF3741, Pfam:PF12552.. +PF14384 Domain of unknown function (DUF4415)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 82 and 104 amino acids in length.. +PF14385 Domain of unknown function (DUF4416)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 176 and 187 amino acids in length. There is a conserved DPG sequence motif.. +PF14386 Domain of unknown function (DUF4417)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 220 and 340 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF14387 Domain of unknown function (DUF4418)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 132 and 150 amino acids in length.. +PF14388 Domain of unknown function (DUF4419)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 348 and 454 amino acids in length.. +PF14389 Leucine-zipper of ternary complex factor MIP1
This leucine-zipper is towards the N-terminus of MIP1 proteins. These proteins, here largely from plants, are subunits of the TORC2 (rictor-mTOR) protein complex controlling cell growth and proliferation . The leucine-zipper is likely to be the region that interacts with plant MADS-box factors ,. +PF14390 Domain of unknown function (DUF4420)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 310 and 334 amino acids in length.. +PF14391 Domain of unknown function (DUF4421)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 336 and 370 amino acids in length.. +PF14392 Zinc knuckle
The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. This particular family is found in plant proteins.. +PF14393 Domain of unknown function (DUF4422)
This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 255 and 371 amino acids in length.. +PF14394 Domain of unknown function (DUF4423)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 170 amino acids in length.. +PF14395 Phage phiEco32-like COOH.NH2 ligase-type 2
Iyer LM, Abhiman S, Burroughs AM, Aravind L. A family of COOH-NH2 ligases/GCS superfamily found in the neighborhood of YheC/D-like ATP-grasp and the CotE family of proteins in the firmicutes. Contextual analysis suggests that it might be involved in cell wall modification and spore coat biosynthesis .. +PF14396 Cystic fibrosis TM conductance regulator (CFTR), regulator domain
+PF14397 Sugar-transfer associated ATP-grasp
Iyer LM, Abhiman S, Burroughs AM, Aravind L. A member of the ATP-grasp fold predicted to be involved in the biosynthesis of cell surface polysaccharides .. +PF14398 YheC/D like ATP-grasp
Iyer LM, Abhiman S, Burroughs AM, Aravind L. A member of the ATP-grasp fold predicted to be involved in the modification/biosynthesis of spore-wall and capsular proteins .. +PF14399 BtrH;
NlpC/p60-like transpeptidase. Iyer LM, Abhiman S, Burroughs AM, Aravind L, Eberhardt R. Members of this family are often found in the gene neighbourhood, or fused to, non-ribosomal peptide synthetases. They are predicted to function as transpeptidases in peptide metabolite biosynthesis .. +PF14400 Inactive transglutaminase fused to 7 transmembrane helices
Iyer LM, Abhiman S, Burroughs AM, Aravind L. A family of inactive transglutaminases fused to seven transmembrane helices. The transglutaminase domain is predicted to be extracellularly located. Members of this family are associated in gene neighborhoods with a pepsin-like peptidase and an ATP-grasp of the RimK-family. The ATP-grasp is predicted to modify the 7TM protein or a cofactor that interacts with it .. +PF14401 RimK-like ATPgrasp N-terminal domain
Iyer LM, Abhiman S, Burroughs AM, Aravind L. An uncharacterized alpha+beta fold domain that is mostly fused to a RimK-like ATP-grasp and is found in bacteria and euryarchaea. Members of this family are almost always associated in gene neighborhoods with a GNAT-like acetyltransferase fused to a papain-like petidase. Additionally M20-like peptidases, GCS2, 4Fe-4S Ferredoxins, a distinct metal-sulfur cluster protein and ribosomal proteins are found in the gene neighborhoods. Contextual analysis suggests a role for these in peptide biosynthesis .. +PF14402 7 transmembrane helices usually fused to an inactive transglutaminase
Iyer LM, Abhiman S, Burroughs AM, Aravind L. A family of seven transmembrane helices fused to an inactive transglutaminase domain. The transglutaminase domain is predicted to be extracellularly located. Members of this family are associated in gene neighborhoods with a pepsin-like peptidase and an ATP-grasp of the RimK-family. The ATP-grasp is predicted to modify the 7TM protein or a cofactor that interacts with it .. +PF14403 Circularly permuted ATP-grasp type 2
Iyer LM, Abhiman S, Burroughs AM, Aravind L. Circularly permuted ATP-grasp prototyped by Roseiflexus RoseRS_2616 that is associated in gene neighborhoods with a GCS2-like COOH-NH2 ligase, alpha/beta hydrolase fold peptidase, GAT-II -like amidohydrolase, and M20 peptidase. Members of this family are predicted to be involved in the biosynthesis of small peptides .. +PF14404 Ribosomally synthesized peptide in Streptomyces species
Iyer LM, Abhiman S, Burroughs AM, Aravind L. A ribosomally synthesized peptide related to microviridin and marinostatin, usually in the gene neighborhood of one or more RimK-like ATP-grasp. The gene-context suggests that it is further modified by the ATP-grasp. The peptide is predicted to function in a defensive or developmental role, or as an antibiotic .. +PF14406 Ribosomally synthesized peptide in Bacteroidetes
Iyer LM, Abhiman S, Burroughs AM, Aravind L. Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp, and an ABC ATPase fused to\. a papain-like domain. It is often present in multiple tandem gene copies. The gene contexts suggest that it is modified by the ATP-grasp as in the biosynthesis of microviridin and marinostatin. They might function in defense or development or as peptide antibiotics .. +PF14407 Ribosomally synthesized peptide prototyped by Frankia Franean1_4349.
Iyer LM, Abhiman S, Burroughs AM, Aravind L. Ribosomally synthesized peptide linked to cyclases in chloroflexi. It may have a link to cyclic nucleotide signaling .. +PF14408 Ribosomally synthesized peptide in actinomycetes
Iyer LM, Abhiman S, Burroughs AM, Aravind L. Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp and an aspartyl-O-methylase. Gene contexts suggest that it is further modified by the ATP-grasp and the methylase. It might function in defense or development, or as a peptide antibiotic .. +PF14409 Ribosomally synthesized peptide in Herpetosiphon
Iyer LM, Abhiman S, Burroughs AM, Aravind L. Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp, and an ABC ATPase fused to a papain-like domain. It is often present in multiple tandem gene copies. Gene contexts suggest that it is modified by the ATP=grasp. It might function in defense or development, or as a peptide antibiotic .. +PF14410 HNH/ENDO VII superfamily nuclease with conserved GHE residues
Zhang D, Iyer LM, Aravind L. A predicted nuclease of the HNH/EndoVII superfamily of the treble clef fold which is closely related to the NucA-like family. The name is derived from the conserved G, H and E residues. It is found in several bacterial polymorphic toxin systems . Some GH-E members preserve the conserved cysteines of the treble-clef suggesting that they might represent potential evolutionary intermediates from a classical HNH domain to the derived NucA-like form .. +PF14411 A nuclease of the HNH/ENDO VII superfamily with conserved LHH
Zhang D, Iyer LM, Aravind L. LHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif, LHH. It is found in bacterial polymorphic toxin systems and functions as a toxin module. Like WHH and AHH, LHH nuclease contain 4 conserved histidines of which, the first one is predicted to bind metal-ion and other three ones are involved in activation of water molecule for hydrolysis.. +PF14412 A nuclease family of the HNH/ENDO VII superfamily with conserved AHH
Zhang D, Iyer LM, Aravind L. AHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif, AHH. It is found in bacterial polymorphic toxin systems and functions as a toxin module. Like WHH and LHH, the AHH nuclease contains 4 conserved histidines of which, the first one is predicted to bind a metal-ion and the other three ones are involved in activation of a water molecule for hydrolysis.. +PF14413 Thg1 C terminal domain
Thg1 polymerases contain an additional region of conservation C-terminal to the core palm domain that comprise of 5 helices and two strands . This region has several well-conserved charged residues including a basic residue found towards the end of the first helix of this unit might contribute to the Thg1-specific active site . This C-terminal module of Thg1 is predicted to form a helical bundle that functions equivalently to the fingers of the other nucleic acid polymerases, probably in interacting with the template HtRNA .. +PF14414 A nuclease of the HNH/ENDO VII superfamily with conserved WHH
Zhang D, Iyer LM, Aravind L;. WHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif WHH. It is found in bacterial polymorphic toxin systems and functions as a toxin module. WHH is the shortest version of HNH nuclease families. Like AHH and LHH, the WHH nuclease contains 4 conserved histidines of which the first one is predicted to bind a metal-ion and other three ones are involved in activation of water molecule for hydrolysis .. +PF14415 Domain of unknown function (DUF4424)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 310 and 361 amino acids in length.. +PF14416 PMR5 N terminal Domain
The plant family with PMR5, ESK1, TBL3 etc have a N-terminal C rich predicted sugar binding domain followed by the PC-Esterase (acyl esterase) domain .. +PF14417 MEDS: MEthanogen/methylotroph, DcmR Sensory domain
MEDS is prototyped by DcmR and is likely to function with the PocR domain in certain organisms in sensing hydrocarbon derivatives The MEDS domain occurs fused to Histidine Kinase and as standalone version . Sequence analysis shows that it is a catalytically inactive version of the P-loop NTPase domain of the RecA superfamily .. +PF14418 OST-HTH Associated domain
OHA occurs with OST-HTH . . +PF14419 SPOUT_MTase_11;
AF2226-like SPOUT RNA Methylase fused to THUMP. SPOUT superfamily RNA methylase fused to RNA binding THUMP domain .. +PF14420 Clr5 domain
This domain is found at the N-terminus of the Clr5 protein which has been shown to be involved in silencing in fission yeast. This domain has been found to often be associated with proteins that contain ankyrin repeats and large regions of disordered sequence .. +PF14421 CDD_CDA_1;
A distinct subfamily of CDD/CDA-like deaminases. Iyer LM, Zhang D, Aravind L. A distinct branch of the CDD/CDA-like deaminases prototyped by Leishmania LmjF36.5940. Members of this family are widely distributed across several microbial eukaryotes such as kinetoplastids, chlorophyte algae, stramenopiles and the alveolate Perkinsus. Domain architectures suggest that these proteins might possess mRNA editing or DNA mutagenizing activity .. +PF14423 Immunity protein Imm5
Iyer LM, Zhang D, Aravind L.. A predicted Immunity protein, with an all-alpha fold, present in bacterial polymorphic toxin systems as an immediate neighbor of the toxin .. +PF14424 DEAM-TOXIN1;
The BURPS668_1122 family of deaminases. Iyer LM, Zhang D, Aravind L. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Burkholderia BURPS668_1122 . Members of this family are found as toxins in polymorphic toxin systems in a wide range of bacteria and in the eukaryote Perkinsus. Members of this family typically possess a DxE catalytic motif in Helix-2 of the core fold instead of the more common C[H]xE motif. The Perkinsus versions are predicted to be inactive .. +PF14425 Immunity protein Imm3
Iyer LM, Zhang D, Aravind L. A predicted Immunity protein, with a mostly all-alpha fold, present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene .. +PF14426 Immunity protein Imm2
Iyer LM, Zhang D, Aravind L. A predicted Immunity protein, with a mostly all-alpha fold, present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene .. +PF14427 Pput2613-DEAM;
Pput_2613-like deaminase. Iyer LM, Zhang D, Aravind L. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Pseudomonas Pput_2613 . Members of this family are predicted to function as toxins in bacterial polymorphic toxin systems .. +PF14428 SCP1201-DEAM;
SCP1.201-like deaminase. Iyer LM, Zhang D, Aravind, L. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Streptomyces SCP1.201 . Members of this family are predicted to function as toxins in bacterial polymorphic toxin systems .. +PF14429 C2 domain in Dock180 and Zizimin proteins
The Dock180/Dock1 and Zizimin proteins are atypical GTP/GDP exchange factors for the small GTPases Rac and Cdc42 and are implicated cell-migration and phagocytosis. Across all Dock180 proteins, two regions are conserved: C-terminus termed CZH2 or DHR2 (or the Dedicator of cytokinesis) whereas CZH1/DHR1 contain a new family of the C2 domain .. +PF14430 Immunity protein Imm1
Iyer LM, Zhang D, Aravind L. A predicted immunity protein, with an alpha+beta fold and a conserved C-terminal tryptophan residue. The protein is present in a wide range of bacteria in polymorphic toxin systems as an immediate gene neighbor of the toxin gene .. +PF14431 YwqJ-like deaminase
Iyer LM, Zhang D, Aravind L. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Bacillus YwqJ . Members of this family are present in a wide phyletic range of bacteria and a few basidiomycetes. Bacterial versions are predicted to function as toxins in bacterial polymorphic toxin systems .. +PF14432 DYW_Deaminase;
DYW family of nucleic acid deaminases. Iyer LM, Zhang D, Aravind, L. A family of nucleic acid deaminases prototyped by the plant PPR DYW proteins that are implicated in chloroplast and mitochondrial RNA transcript maturation by numerous C to U editing events . The name derives from the DYW motif present at the C-terminus of the classical plant PPR DYW deaminases. Members of this family are present in bacteria, plants, Naegleria, and fungi . Plants and Naegleria show lineage-specific expansions of this family. The classical DYW family contain an additional C-terminal metal-binding cluster composed of 2 histidines and a CxC motif and are often fused to PPR repeats. Ascomycete versions, which are independent lateral transfers, contain a large insert within the domain and are often fused to ankyrin repeats. Bacterial versions are predicted to function as toxins in polymorphic toxin systems .. +PF14433 SUKH-3 immunity protein
Zhang D, Iyer LM, Aravind L. This family belongs to the SUKH superfamily and functions as immunity proteins in bacterial toxin systems .. +PF14434 Immunity protein Imm6
Iyer LM, Zhang D, Aravind L. A predicted immunity protein, with an alpha+beta fold (mostly alpha helices). The protein is present in polymorphic toxin systems as an immediate gene neighbor of the toxin gene .. +PF14435 SUKH-4 immunity protein
Zhang D, Iyer LM, Aravind L. This family belongs to the SUKH superfamily and functions as immunity proteins in bacterial toxin systems .. +PF14436 Bacterial EndoU nuclease
Zhang D, Iyer LM, Aravind L. This is a bacterial verion of EndoU nuclease. It is found at C-terminal region of polymorphic toxin proteins. . +PF14437 MafB19-like deaminase
Iyer LM, Zhang D, Aravind L. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Neisseria MafB19 . Members of this family are present in a wide phyletic range of bacteria and are predicted to function as toxins in bacterial polymorphic toxin systems .. +PF14438 Ataxin 2 SM domain
Anantharaman V, Eberhardt R. This SM domain is found in Ataxin-2 .. +PF14439 Bd3614-like deaminase
Iyer LM, Zhang D, Aravind L. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Bdellovibrio Bd3614 . They are typified by a distinct N-terminal globular domain. The Bdellovibrio version occurs in a predicted operon with a 23S rRNA G2445-modifying methylase suggesting that it might be involved in RNA editing .. +PF14440 Xanthomonas XOO_2897-like deaminase
Iyer LM, Zhang D, Aravind L. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Xanthomonas XOO_2897 . Members of this family are present in a wide phyletic range of bacteria and are predicted to function as toxins in bacterial polymorphic toxin systems . The Xanthomonas XOO_2897 lack an immunity protein and is predicted to be deployed against its eukaryotic host .. +PF14441 OTT_1508-like deaminase
Iyer LM, Zhang D, Aravind L, Eberhardt R. A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Orientia OTT_1508 . Members of this family are present in a wide phyletic range of bacteria,including several intracellular parasites and eukaryotes such as fungi, Leishmania, Selaginella, and some apicomplexa. In bacteria, these deaminases are predicted to function as toxins in bacterial polymorphic toxin systems . Versions in intracellular bacteria lack immunity proteins and are likely to be deployed against their eukaryotic hosts. Eukaryotic versions are predicted to function as nucleic acid (either DNA or RNA) deaminases. Among eukaryotes, some fungi show lineage-specific expansions of this family. Many fungal versions are fused to a distinct N-terminal globular domain. Various fungal versions are fused to domains involved in chromatin function. Apicomplexan versions are fused to tRNA guanine transglycosylase domain . . +PF14442 Bd3614-N;
Bd3614-like deaminase N-terminal. Iyer LM, Zhang D, Aravind L. This is a globular domain that occurs N-terminal to the Bd3614-like deaminases, which are predicted to be involved in RNA editing .. +PF14443 DBC1
DBC1 and it homologs from diverse eukaryotes are a catalytically inactive version of the Nudix hydrolase (MutT) domain . DBC1 is predicted to bind NAD metabolites and regulate the activity of SIRT1 or related deacetylases by sensing the soluble products or substrates of the NAD-dependent deacetylation reaction .. +PF14444 S1-like
S1-like RNA binding domain found in DBC1 . +PF14445 Prokaryotic RING finger family 2
Burroughs AM, Iyer LM, Aravind L. RING finger family found sporadically in bacteria and archaea, and associated with other components of the ubiquitin-based signaling and degradation system, including ubiquitin and the E1 and E2 proteins. The bacterial versions contain transmembrane helices .. +PF14446 Prokaryotic RING finger family 1
Burroughs AM, Iyer LM, Aravind L. RING finger family found sporadically in bacteria and archaea, and associated in gene neighborhoods with other components of the ubiquitin-based signaling and degradation system, including ubiquitin, the E1 and E2 proteins and the JAB-like metallopeptidase. The bacterial versions contain transmembrane helices .. +PF14447 Prokaryotic RING finger family 4
Burroughs AM, Iyer LM, Aravind L. RING finger family domain found sporadically in bacteria. The finger is fused to an N-terminal alpha-helical domain, ROT/Trove-like repeats and a C-terminal TerD domain . The architecture suggests a possible role in an RNA-processing complex .. +PF14448 NUC_N;
Zhang D, Iyer LM, Aravind L, Eberhardt R. Zhang D, Iyer LM, Aravind L. This is a conserved short region that is found in many bacterial polymorphic toxin proteins . It is often located before C-terminal nuclease domains .. +PF14449 Pre-toxin TG
Zhang D, Iyer LM, Aravind L, Eberhardt R. Zhang D, Iyer LM, Aravind L. PT-TG is a conserved region found in many bacterial toxin proteins. It could function as a linker that links N-terminal secretion-related domain and C-terminal toxin domain. It contains a TG motif . . +PF14450 Cell division protein FtsA
Pfam-B_1177 (release 5.4). FtsA is essential for bacterial cell division, and co-localises to the septal ring with FtsZ. It has been suggested that the interaction of FtsA-FtsZ has arisen through coevolution in different bacterial strains . The FtsA protein contains two structurally related actin-like ATPase domains which are also structurally related to the ATPase domains of HSP70 (see PF00012). FtsA has a SHS2 domain PF02491 inserted in to the RnaseH fold PF02491 .. +PF14451 Mut7-C ubiquitin
Iyer LM, Burroughs AM, Aravind L, Eberhardt R. This member of the ubiquitin superfamily is found at the N-terminus of Mut7-C like RNAses, suggestive of an RNA-binding role .. +PF14452 Bac_multiUb;
Iyer LM, Burroughs AM, Aravind L, Eberhardt R. A ubiquitin superfamily domain that is often present in multiple tandem copies in the same polypeptide. Members of this family are associated in gene neighborhoods, or on occasions fused to, bacterial homologs of components of ubiquitin-dependent modification system such as the E1, E2 and JAB metallopeptidase enzymes and a distinct metal-binding domain . The E2/UBC fold domain appears to be inactive. The JAB domain in these operons is usually fused to the E1 domain .. +PF14453 ThiS-like ubiquitin
Iyer LM, Burroughs AM, Aravind L. A member of the ubiquitin superfamily that is often fused to the ThiF-like (E1)- ubiquitin activating enzyme and is present in gene neighborhoods with components of the thiamine biosynthesis pathway .. +PF14454 Prokaryotic Ubiquitin
Iyer LM, Burroughs AM, Aravind L. A Ubiquitin-superfamily protein that is present across several bacterial lineages, and found in gene neighborhoods with components of the ubiquitin modification system such as the E1, E2 and JAB proteins, and a novel alpha-helical protein, which is predicted to be enzymatic .. +PF14455 Predicted metal binding domain
Iyer LM, Burroughs AM, Aravind L. A predicted metal-binding domain that is found in gene-neighborhood associations with genes encoding components of the bacterial homologs of the ubiquitin modification pathway including the E1, E2, JAB metallopeptidase and ubiquitin proteins. The domain is characterised by a conserved motif with a CxxxxxEYHxxxxH signature.. +PF14456 Alpha-helical domain 2
Iyer LM, Burroughs AM, Aravind L. An alpha-helical domain found in gene neighborhoods encoding genes containing bacterial homologs of components of the ubiquitin modification pathway such as the E1, E2, Ub and JAB peptidase proteins.. +PF14457 Prokaryotic E2 family A
Iyer LM, Burroughs AM, Aravind L. A member of the E2/UBC superfamily of proteins found in several bacteria. The active site residues are very similar to the eukaryotic E2 proteins [1,2]. Members of this family are usually fused to E1 and JAB domains C-terminal to the E2 domain. The protein is usually in the gene neighborhood of a gene encoding a distinct metallobetalactamase family protein .. +PF14460 Prokaryotic E2 family D
Iyer LM, Burroughs AM, Aravind L. A member of the E2/UBC superfamily of proteins found in several bacteria. Members of this family lack the conserved histidine of the classical E2-fold. However, they have an absolutely conserved histidine carboxyl-terminal to the conserved cysteine [1,2]. Members of this family are usually present in a conserved gene neighborhood with genes encoding members of the Ub modification pathway such as the E1, Ub and JAB proteins. These neighborhoods also contain a gene encoding a rapidly diverging alpha-helical protein .. +PF14459 Prokaryotic E2 family C
Iyer LM, Burroughs AM, Aravind L. A divergent member of the E2/UBC superfamily of proteins found in bacteria. Members of the family contain a conserved cysteine in place of the histidine of the classical E2/UBC proteins [1,2]. Members of this family are usually fused to an E1 domain at their C-terminus. The protein is usually in the gene neighborhood of a gene encoding a JAB peptidase and another encoding a predicted metal binding domain .. +PF14461 Prokaryotic E2 family B
Iyer LM, Burroughs AM, Aravind L. A member of the E2/UBC superfamily of proteins found in several bacteria. The active site residues are similar to the eukaryotic E2 proteins but lack the conserved asparagine [1,2]. Members of this family are usually fused to an E1 domain at the C-terminus. The protein is usually in the gene neighborhood of a gene encoding a member of the pol-beta nucleotidyltransferase superfamily . Many of the operons in this family are in ICE-like mobile elements and plasmids .. +PF14462 Prokaryotic E2 family E
Iyer LM, Burroughs AM, Aravind L. A member of the E2/UBC superfamily of proteins found in diverse bacteria. Analysis of the active site residues suggest that members of this family are inactive as they lack the characteristic catalytic residues of the E2 enzymes [1,2]. They are usually fused to or in the neighborhood of a multi/poly ubiquitin domain protein. Other proteins of the ubiquitin modification pathway such as the E1 and JAB proteins are also found in its gene neighborhood along with a distinct predicted metal-binding protein.. +PF14463 E1 N-terminal domain
Iyer LM, Burroughs AM, Aravind L. An uncharacterized alpha/beta domain fused to E1 proteins. This protein is usually present in gene neighborhoods with genes encoding a JAB protein and a predicted metal-binding protein. In related E1 proteins, the E1-N domain is replaced by an E2/UBC superfamily domain .. +PF14464 Prokaryotic homologs of the JAB domain
Iyer LM, Burroughs AM, Aravind L. These are metalloenzymes that function as the ubiquitin isopeptidase/ deubiquitinase in the ubiquitin-based signaling and protein turnover pathways in eukaryotes . Prokaryotic JAB domains are predicted to have a similar role in their cognates of the ubiquitin modification pathway [2,3]. The domain is widely found in bacteria, archaea and phages where they are present in several gene contexts in addition to those that correspond to the prokaryotic cognates of the eukaryotic Ub pathway. Other contexts in which JAB domains are present include gene neighbor associations with ubiquitin fold domains in cysteine and siderophore biosynthesis, and phage tail morphogenesis, where they are shown or predicted to process the associated ubiquitin [2,4]. A distinct family, the RadC-like JAB domains are widespread in bacteria and are predicted to function as nucleases . In halophilic archaea the JAB domain shows strong gene-neighborhood associations with a nucleotidyltransferase suggesting a role in nucleotide metabolism .. +PF14465 NFRKB Winged Helix-like
de novo domain prediction, confirmed by X-ray structure determination. This domain covers regions 370-495 of human nuclear factor related to kappaB binding (NFRKB) protein.. +PF14466 Domain of unknown function (DUF4425)
BVU_3708 from Bacteroides vulgatus, JCSG target GS13500. A small family of bacterial proteins, found in several Bacteroides species. Structure determination (NMR and Xray) shows an immunoglobulin beta barrel fold.\. Multiple homologs have been found in human gut metagenomics data sets.. +PF14467 Domain of unknown function (DUF4426)
Pseudomonas aeruginosa PA0388, JCSG target SP19004A. Members of this entry are found mostly in g-proteobacteria, especially in Vibrio. Strangely enough, there seems to be one eukaryotic homolog in Nematostella vectensis (NEMVEDRAFT_v1g226006), where the PA0388-like domain is fused with a domain homogous to the Methionine biosynthesis protein MetW (see below). In several Pseudomonas species, but also in Vibrio vulnificus and Azotobacter vinelandii PA0388 homologs are genomic neighbors of Nucleoside 5-triphosphatase RdgB (dHAPTP, dITP, XTP-specific) (EC 3.6.1.15) and Methionine biosynthesis protein MetW. On the other hand, in most Vibrio species it appears as a part of a conserved operon involved in possible response to stress.. +PF14468 Protein of unknown function (DUF4427)
PSYMP_19184 [Pseudomonas syringae pv. morsprunorum str. M302280PT]. This domain is often found at the C-terminal of proteins with Pfam:PF10899 domain, for instance in STY1911 protein from a multiple drug resistant Salmonella enterica serovar Typhi CT18.. +PF14469 28 kDa A-kinase anchor
Jaroszewski L, Godzik A. Q86UN6 A-kinase anchor protein 14 isoform a [Homo sapiens]. 28 kDa AKAP (AKAP28) is highly enriched in human airway axonemes. The mRNA for AKAP28 is up-regulated as primary airway cells differentiate and is specifically expressed in tissues containing cilia and/or flagella . Homologs of AKAP28 are present in all animals and in some, including mice the AKAP28-like domain are preceded by another uncharacterized domain . +PF14470 Bacterial PH domain
Proteins in this family are distantly related to PH domains.. +PF14471 Domain of unknown function (DUF4428)
This putative zinc finger domain is found in uncharacterised bacterial proteins.. +PF14472 Domain of unknown function (DUF4429)
This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and viruses, and is approximately 90 amino acids in length. This domain is often found in two tandem copies.. +PF14473 RD3 protein
RD3 is a human protein that is found preferentially expressed in the retina . Mutations in RD3 causes Leber Congenital Amaurosis type 12 .. +PF14474 RTC4-like domain
This presumed domain is found in the RTC4 protein from yeasts. In Saccharomyces cerevisiae, Cdc13 binds telomeric DNA to recruit telomerase and to "cap" chromosome ends. RTC4 was identified in a screen to identify novel proteins and pathways that cap telomeres, or that respond to uncapped telomeres . This domain is also found in proteins that contain a DNA-binding myb domain.. +PF14475 Sec1-binding region of Mso1
Mso1p is a component of the secretory vesicle docking complex whose function is closely associated with that of Sec1p. It is a small hydrophilic protein that is enriched in the microsomal membrane fraction , and this binding domain is towards the N-terminus of Mso1. The yeast Sec1p protein functions in the docking of secretory transport vesicles to the plasma membrane . Mso1p and Sec1p interact at sites of exocytosis and the Mso1p-Sec1p interaction site depends on a functional Rab GTPase Sec4p and its GEF Sec2p . The C-terminal region of Mso1 (not built) assists in targetting Sec1 to the sites of polarised membrane transport .. +PF14476 Petal formation-expressed
Pfam-B_480 (release 25.0). The members of this plant family from Arabidopsis thaliana appear to be proteins found in the chloroplast, expressed in the pollen tube during the petal differentiation and expansion stage. The function is not known.. +PF14477 Membrane-polarising domain of Mso1
Mso1p is a component of the secretory vesicle docking complex whose function is closely associated with that of Sec1p. It is a small hydrophilic protein that is enriched in the microsomal membrane fraction . The yeast Sec1p protein functions in the docking of secretory transport vesicles to the plasma membrane . Mso1p and Sec1p interact at sites of exocytosis and the Mso1p-Sec1p interaction site depends on a functional Rab GTPase Sec4p and its GEF Sec2p . This C-terminal region of Mso1 assists in targetting Sec1 to the sites of polarised membrane transport, the SNARES and Sec4 .. +PF14478 Domain of unknown function (DUF4430)
JCSG-Target:417407-SP17946A. Although this family has overlaps with SLBB, the majority of its sequences are unique. Several family members, eg UniProtKB:A0RGA8, that do not overlap have an LPXTG-cell wall anchor at their C-terminus, a SSF_Family 10_polysaccharide_lyase or Glycosyltransferase structure associated with them in the middle region, as shown by InterPro, as well as this domain at the N-terminus.. +PF14479 Prion-inhibition and propagation
Greenwald J, Coggill P. Pfam-B_407 (release 25.0). This N-terminal region, HeLo, has a prion-inhibitory effect in cis on its own prion-forming domain (PFD) and in trans on HET-s prion propagation . The domain is found exclusively in the fungal kingdom. Its structure, as it occurs in the HET-s/HET-S proteins, consists of two bundles of alpha-helices that pack into a single globular domain . The domain boundary determined from its structure and from protease-resistance experiments overlaps with the C-terminal prion-forming domain of HET-s (PF11558 . The HeLo domains of HET-s and HET-S are very similar and their few differences (and not the prion-forming domains) determine the compatibility-phenotype of the fungi in which the proteins are expressed. The mechanism of the HeLo domain-function in heterokaryon-incompatibility is still under investigation, however the HeLo domain is found in similar protein architectures as other cell death and apoptosis-inducing domains. The only other HeLo protein to which a function has been associated is LopB from L. maculans . Although its specific role in L. maculans is unknown, LopB- mutants have impaired ability to form lesions on oilseed rape. The HeLo domain is not related to the HET domain (PF06985) which is another domain involved in heterokaryon incompatibility.. +PF14480 DNA polymerase III polC-type N-terminus I
Pfam-B_853 (release 23.0). This is the first N-terminal domain, NI domain, of the DNA polymerase III polC subunit A that is found only in Firmicutes. DNA polymerase polC-type III enzyme functions as the 'replicase' in low G + C Gram-positive bacteria . Purine asymmetry is a characteristic of organisms with a heterodimeric DNA polymerase III alpha-subunit constituted by polC which probably plays a direct role in the maintenance of strand-biased gene distribution; since, among prokaryotic genomes, the distribution of genes on the leading and lagging strands of the replication fork is known to be biased . It has been predicted that the N-terminus of polC folds into two globular domains, NI and NII. A predicted patch of elecrostatic potential at the surface of this domain suggests a possible involvement in nucleic acid binding . This domain is associated with DNA_pol3_alpha Pfam:PF07733 and DNA_pol3_a_NI Pfam:PF11490.. +PF14481 Type 4 fimbrial biogenesis protein PilY2
Jaroszewski L, Godzik A. Pseudomonas aeruginosa PAO1 gene PA4555, JCSG target SP18988A. Members of this family were experimentally shown to be involved in fimbrial biogenesis, but its exact role appears to be unknown.. +PF14482 Cut8 proteasome-binding domain
In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome . Cut8 comprises three functional domains. An N-terminal lysine-rich segment (this entry) which binds to the proteasome when ubiquitinated, a central dimerisation domain (Pfam:PF14483) and a C-terminal six-helix bundle (Pfam:PF08559), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding . Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 . Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome . In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 . Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum .. +PF14483 Cut8 dimerisation domain
In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome . Cut8 comprises three functional domains. An N-terminal lysine-rich segment (Pfam:PF14482) which binds to the proteasome when ubiquitinated, a central dimerisation domain (this entry) and a C-terminal six-helix bundle (Pfam:PF08559), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding . Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 . Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome . In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 . Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum .. +PF14484 Fish-specific NACHT associated domain
This domain is frequently found associated with the NACHT domain (Pfam:PF05729) in fish and other vertebrates .. +PF14485 Domain of unknown function (DUF4431)
+PF14486 Domain of unknown function (DUF4432)
JCSG_target390294_A6THE6. +PF14487 Domain of unknown function (DUF4433)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 201 and 230 amino acids in length. There is a single completely conserved residue E that may be functionally important. This family is distantly similar to Pfam:PF01885 suggesting these may be ADP-ribosylases.. +PF14488 Domain of unknown function (DUF4434)
JCSG_Target_393000_GS13553A. +PF14489 QueF-like protein
This protein is involved in the biosynthesis of queuosine. In some proteins this domain appears to be fused to Pfam:PF06508.. +PF14490 Helix-hairpin-helix containing domain
This presumed domain contains at least one helix-hairpin-helix motif. This domain is often found in RecD helicases.. +PF14491 Protein of unknown function (DUF4435)
This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 285 and 362 amino acids in length. This domain is sometimes associated with AAA domains.. +PF14492 Elongation Factor G, domain II
This domain is found in Elongation Factor G. It shares a similar structure with domain V (Pfam:PF00679).. +PF14493 Helix-turn-helix domain
This presumed domain is found at the C-terminus of a large number of helicase proteins.. +PF14494 Domain of unknown function (DUF4436)
Pfam-B_6430 (release 25.0). This is a family of membrane and transmembrane proteins from mycobacterial and related species. The function is not known.. +PF14495 Cytochrome c-550 domain
This domain is a heme binding cytochrome known as cytochrome c550, or cytochrome c549, or PsbV .. +PF14496 C-terminal novel E3 ligase, LRR-interacting
Jackhmmer:E7K2H2_PDB:3ckd. This NEL or novel E3 ligase domain is found at the C-terminus of bacterial virulence factors. Its sequence is different from those of the eukaryotic HECT and RING-finger E3 ligases, and it subverts the host ubiquitination process. At the N-terminus of the family-members there is a series of LRR repeats, and the NEL domain interacts with the most N-terminal repeat. The key residue for the ligation step is the cysteine, eg found at position 386 in UniProtKB:E7K2H2. The LRR section sequesters this active site until invasion has occurred .. +PF14497 Glutathione S-transferase, C-terminal domain
This domain is closely related to Pfam:PF00043.. +PF14498 Glycosyl hydrolase family 65, N-terminal domain
This domain represents a domain found to the N-terminus of the glycosyl hydrolase 65 family catalytic domain.. +PF14499 Domain of unknown function (DUF4437)
This family of proteins is found in bacteria. Proteins in this family are typically between 152 and 283 amino acids in length.. +PF14500 Dos2-interacting transcription regulator of RNA-Pol-II
This domain, along with the C-terminal part, Pfam:PF12460 , is an essential component of a silencing complex in fission yeast that contains Dos2, Rik1, Mms19 and Cdc20 (the catalytic subunit of DNA polymerase-epsilon). This complex regulates RNA polymerase II (RNA Pol II) activity in heterochromatin and is required for DNA replication and heterochromatin assembly .. +PF14501 GHKL domain
This family represents the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90.. +PF14502 Helix-turn-helix domain
+PF14503 YhfZ C-terminal domain
This domain is often found in association with the helix-turn-helix domain HTH_41 (Pfam:PF14502). It includes YhfZ proteins from Escherichia coli and Shigella flexneri.. +PF14504 CAP-associated N-terminal
JCSG:target_417453-SP18049A. The function of this domain is unknown, but it is found towards the N-terminus of bacterial proteins carrying the CAP domain, Pfam:PF00188. All members that do not otherwise carry an additional Cu_amine_oxidN1, Pfam:PF07833, domain are likely to be extracellular as they start with a signal-peptide. Most other non-bacterial proteins with the CAP domain are allergenic .. +PF14505 Domain of unknown function (DUF4438)
+PF14506 CppA N-terminal
This is the N-terminal domain of the CppA protein found in species of Streptococcus. CppA is a putative C3-glycoprotein degrading proteinase, involved in pathogenicity [1,2]. It is often found associated with Pfam:PF14507.. +PF14507 CppA C-terminal
This is the C-terminal domain of the CppA protein found in species of Streptococcus. CppA is a putative C3-glycoprotein degrading proteinase, involved in pathogenicity [1,2]. It is often found associated with Pfam:PF14506.. +PF14508 Glycosyl-hydrolase 97 N-terminal
This N-terminal domain of glycosyl-hydrolase-97 contributes part of the active site pocket. It is also important for contact with the catalytic and C-terminal domains of the whole [2,3].. +PF14509 Glycosyl-hydrolase 97 C-terminal, oligomerisation
Glycosyl-hydrolase-97 is made up of three tightly linked and highly conserved globular domains. The C-terminal domain is found to be necessary for oligomerisation of the whole molecule in order to create the active-site pocket and the Ca++-binding site.. +PF14510 ABC-transporter extracellular N-terminal
Pfam-B_101 (release 25.0). This domain is found at the N-terminus of ABC-transporter proteins from fungi, plants to higher eukaryotes. It would appear to be an extracellular domain.. +PF14511 Type II restriction endonuclease EcoO109I
Coggill P, Eberhardt R. pdb_1wtd-Jackhmmer:Q9RPJ3. This is a family of Type II restriction endonucleases.. +PF14512 Putative TM nitroreductase
Jackhmmer:Q9X1S2_pdb:1vkw. Compared with the more traditional NADH oxidase/flavin reductase family, this family is a duplication, consisting of two similar domains arranged as the subunits of the dimeric NADH oxidase/flavin reductase with one conserved active site.. +PF14513 Diacylglycerol kinase N-terminus
This domain is found at the N-terminus of diacylglycerol kinases.. +PF14514 Transcriptional regulator, TetR, C-terminal
This family comprises proteins that belong to the TetR family of transcriptional regulators. This family features the C-terminal region of these sequences, which does not include the N-terminal helix-turn-helix.. +PF14515 Haem-oxygenase-associated N-terminal helices
Jackhmmer:Q9HY91, pdb_3bjdA. This domain represents a pair of alpha helices, which are found at the N-terminus of some Haem-oxygenase globular domain.. +PF14516 AAA-like domain
This family of proteins are part of the AAA superfamily.. +PF14517 Tachylectin
This family of lectins binds N-acetylglucosamine and N-acetylgalactosamine and may be involved in innate immunity [1-3]. It has a five-bladed beta-propeller structure with five carbohydrate-binding sites, one per beta sheet .. +PF14518 Iron-containing redox enzyme
Pfam-B_412 (release 26.0). The CADD, Chlamydia protein associating with death domains, crystal structure reveals a dimer of seven-helical bundles. Each bundle contains a di-iron centre adjacent to an internal cavity that forms an active site similar to that of methane mono-oxygenase hydrolase .. +PF14519 Macro-like domain
Jachmmer_A6ZME4, pdb_1njr. This domain is an ADP-ribose binding module. It is found in a number of yeast proteins.. +PF14520 Helix-hairpin-helix domain
+PF14521 Lysine-specific metallo-endopeptidase
Pfam-B_2237 (release 26.0), pdb_1g12 . This is the catalytic region of aspzincins, a group of lysine-specific metallo-endopeptidases in the MEROPS:M35 family. They exhibit the following active-site architecture. The active site is composed of two helices and a loop region and includes the HExxH and GTxDxxYG motifs. In UniProt:P81054, His117, His121 and Asp130 coordinate to the catalytic zinc ligands. An electrostatically negative region composed of Asp154 and Glu157 attracts a positively charged Lys side chain of a substrate in a specific manner .. +PF14522 Cytochrome c7
This family includes cytochromes c7 and c7-type. In cytochromes c7 all three haems are bis-His co-ordinated. In c7-type the last haem is His-Met co-ordinated .. +PF14523 Syntaxin-like protein
This domain includes syntaxin-like domains including from the Vam3p protein .. +PF14524 Wzt C-terminal domain
This domain is found at the C-terminus of the Wzt protein . The crystal structure of C-Wzt(O9a) reveals a beta sandwich with an immunoglobulin-like topology that contains the O-antigenic polysaccharide binding pocket. This domain is often associated with the ABC-transporter domain.. +PF14525 AraC-binding-like domain
This domain is related to the AraC ligand binding domain Pfam:PF02311.. +PF14526 Integron-associated effector binding protein
This family contains Cass2 from Vibrio cholerae, an integron-associated protein that has been shown to bind cationic drug compounds with submicromolar affinity. Cass2 has been proposed to be representative of a larger family of independent effector-binding proteins associated with lateral gene transfer within Vibrio and other closely-related species.. +PF14527 WhiA LAGLIDADG-like domain
This domain is found within the sporulation regulator WhiA. It is a LAGLIDADG superfamily like domain [1-2].. +PF14528 LAGLIDADG-like domain
This domain is part of the LAGLIDADG superfamily .. +PF14529 Endonuclease-reverse transcriptase
This domain represents the endonuclease region of retrotransposons from a range of bacteria, archaea and eukaryotes.\. These are enzymes largely from class EC:2.7.7.49.. +PF14530 Domain of unknown function (DUF4439)
This domain has a ferritin-like fold.. +PF14531 Kinase-like
This family includes the pseudokinases ROP2 and ROP8 from Toxoplasma gondii (Swiss:Q06AK3 and Swiss:O15693). These proteins have a typical bilobed protein kinase fold, but lack catalytic actvity .. +PF14532 Sigma-54 interaction domain
+PF14533 Ubiquitin-specific protease C-terminal
Pfam-B_1954 (release 25.0). This C-terminal domain on many long ubiquitin-specific proteases has no known function.. +PF14534 Domain of unknown function (DUF4440)
+PF14535 AMP-binding enzyme C-terminal domain
This is a small domain that is found C terminal to Pfam:PF00501. It has a central beta sheet core that is flanked by alpha helices.. +PF14536 Domain of unknown function (DUF4441)
Pfam-B_1275 (release 25.0). This family is largely made up of uncharacterised proteins from the Ciliophora. The function is not known.. +PF14537 Cytochrome c3
+PF14538 Raptor N-terminal CASPase like domain
This domain is found at the N-terminus of the Raptor protein. It has been identified to have a CASPase like structure . It conserves the characteristic cys/his dyad of the caspases suggesting it may have a peptidase activity.. +PF14539 Domain of unknown function (DUF4442)
This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 139 and 165 amino acids in length. There is a conserved PYF sequence motif. There is a single completely conserved residue N that may be functionally important.. +PF14540 Nucleotidyltransferase-like
Structural comparisons with PDB:1kny indicate that this N-terminal domain resembles a nucleotidyltransferase fold.. +PF14541 Xylanase inhibitor C-terminal
The N- and C-termini of the members of this family are jointly necessary for creating the catalytic pocket necessary for cleaving xylasnase. Phytopathogens produce xylanase that destroys plant cells, so its destruction through proteolysis is vital for plant-survival.. +PF14542 GCN5-related N-acetyl-transferase
This family of GCN5-related N-acetyl-transferases bind both CoA and acetyl-CoA. They are characterised by highly conserved glycine, a cysteine residue in the acetyl-CoA binding site near the acetyl group, their small size compared with other GNATs and a lack of of an obvious substrate-binding site. It is proposed that they transfer an acetyl group from acetyl-CoA to one or more unidentified aliphatic amines via an acetyl (cysteine) enzyme intermediate. The substrate might be another macromolecule.. +PF14543 Xylanase inhibitor N-terminal
The N- and C-termini of the members of this family are jointly necessary for creating the catalytic pocket necessary for cleaving xylanase.\. Phytopathogens produce xylanase that destroys plant cells, so its destruction through proteolysis is vital for plant-survival.. +PF14544 Domain of unknown function (DUF4443)
This is a family of archaeal proteins. The domain is a putative gyrase domain.. +PF14545 BCAP_N;
Dof, BCAP, and BANK (DBB) motif,. PROSITE, Pfam-B_2980 (release 26.0). The DBB domain is named from the Drosophila (Downstream of FGFR - Dof, also known as Heartbroken or Stumps) protein, the BANKS and BCAP, both signalling in B-cell pathway, proteins. This domain defines a minimal region required for mediating Dof dimerisation. Since this domain can interact both with itself and with a region in the C-terminal part of the molecule, it may mediate either intermolecular or intramolecular interactions . Mutants lacking this domain disrupt FGFR signal transduction and fibroblast growth-factor signalling .. +PF14547 Hydrophobic seed protein
This domain has a four-helix bundle structure. It contains four disulfide bonds, of which three function to keep the C- and N-terminal parts of the molecule in place .. +PF14549 DNA-binding transcriptional regulator Cro
Bacteriophage P22 Cro protein represses genes normally expressed in early phage development and is necessary for the late stage of lytic growth. It does this by binding to the OL and OR operator-regions normally used by the repressor protein for lysogenic maintenance.. +PF14550 Putative phage protease XkdF
Pfam-B_5816 (release 26.0). This domain is largely found on phage proteins. In a number of cases the domain is associated with a SAM-dependent methyltransferase.. +PF14551 MCM N-terminal domain
This family contains the N-terminal region of MCM proteins. This region is composed of three structural domains. Firstly a four helical bundle, secondly a zinc binding motif and thirdly an OB-like fold .. +PF14552 Tautomerase enzyme
CATH:3c6vA00, Pfam-B_819 (release 26.0). +PF14553 YqbF, hypothetical protein domain
This N-terminal domain is found in Bacillus and related spp. The function is not known.. +PF14554 VEGF heparin-binding domain
This short domain is found at the C-terminus of VEGF. It has been shown to have heparin binding activity.. +PF14555 UBA-like domain
+PF14556 AF2331-like
AF2331-like is a 11-kDa orphan protein of unknown function from Archaeoglobus fulgidus. The structure consists of an alpha + beta fold formed by an unusual homodimer, where the two core beta-sheets are interdigitated, containing strands alternating from both subunits. AF2331 contains multiple negatively charged surface clusters and is located on the same operon as the basic protein AF2330. It is suggested that AF2331 and AF2330 may form a charge-stabilized complex in vivo, though the role of the negatively charged surface clusters is not clear.. +PF14557 Putative AphA-like transcriptional regulator
Members of this family are putative transcriptional regulators that appear to be related to the Pfam:PF03551 family. This family includes AphA-like members.. +PF14558 ML-like domain
This domain is distantly similar to Pfam:PF02221 and conserves its pattern of conserved cysteines. This suggests that this domain may be involved in lipid binding.. +PF14559 Tetratricopeptide repeat
+PF14560 Ubiquitin-like domain
This entry contains ubiquitin-like domains [1-2].. +PF14561 Tetratricopeptide repeat
+PF14562 Restriction endonuclease BglI
This restriction endonuclease binds DNA as a dimer. BglI recognises and cleaves the interrupted DNA sequence GCCNNNNNGGC and cleaves between the fourth and fifth unspecified base pair to produce 3' overhanging ends .. +PF14563 Domain of unknown function (DUF4444)
This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved LIPL sequence motif. There are two completely conserved G residues that may be functionally important.. +PF14564 Membrane binding
This family includes the C-terminal domain of Dictyostelium discoideum Calcium-dependent cell adhesion molecule 1 (Swiss:P54657), which has an immunoglobulin-like fold. It tethers the protein to the cell membrane .. +PF14565 Interleukin 22 IL-10-related T-cell-derived-inducible factor
Interleukin-22 is distantly related to interleukin (IL)-10, and is produced by activated T cells. IL-22 is a ligand for CRF2-4, a member of the class II cytokine receptor family.. +PF14566 Inositol hexakisphosphate
Pfam-B_194 (release 26.0). Inositol hexakisphosphate, often called phytate, is found in abundance in seeds and acting as an inorganic phosphate reservoir. Phytases are phosphatases that hydrolyze phytate to less-phosphorylated myo-inositol derivatives and inorganic phosphate. The active-site sequence (HCXXGXGR) of the phytase identified from the gut micro-organism Selenomonas ruminantium forms a loop (P loop) at the base of a substrate binding pocket that is characteristic of protein tyrosine phosphatases (PTPs). The depth of this pocket is an important determinant of the substrate specificity of PTPs. In humans this enzyme is thought to aid bone mineralization and salvage the inositol moiety prior to apoptosis .. +PF14567 SMI1-KNR4 cell-wall
Pfam-B_7167 (release 26.0). Members of this family are related to the SMI1/KNR4-like or SUKH superfamily of proteins.. +PF14568 SMI1-KNR4 cell-wall
Pfam-B_725 (releawse 26.0). Members of this family are related to the SMI1/KNR4-like or SUKH superfamily of proteins.. +PF14569 Zinc-binding RING-finger
This RING/U-box type zinc-binding domain is frequently found in the catalytic subunit (irx3) of cellulose synthase. The enzymic class is EC:2.4.1.12, whereby the synthase removes the glucose from UDP-glucose and adds it to the growing cellulose, thereby releasing UDP. The domain-structure is treble-clef like (PDB:1weo).. +PF14570 RING/Ubox like zinc-binding domain
+PF14571 Stress-induced protein Di19, C-terminal
C-terminal domain of Di19, a protein that increases the sensitivity of plants to environmental stress, such as salinity, drought, osmotic stress and cold. the protein is also induced by an increased supply of stress-related hormones such as abscisic acid ABA and ethylene . There is a zinc-finger at the N-terminus, zf-Di19, Pfam:PF05605.. +PF14572 Phosphoribosyl synthetase-associated domain
pdb_2c4k; Jackhmmer:Q14558. This family includes several examples of enzymes from class EC:2.7.6.1, phosphoribosyl-pyrophosphate transferase.. +PF14573 Acyl-carrier
+PF14574 Domain of unknown function (DUF4445)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 525 and 664 amino acids in length. The family is found in association with Pfam:PF00111.. +PF14575 Ephrin type-A receptor 2 transmembrane domain
Epha2_TM represents the left-handed dimer transmembrane domain of of EphA2 receptor. This domain oligomerises and is important for the active signalling process.. +PF14576 Sieve element occlusion N-terminus
Sieve element occlusion (SEO) proteins, or forisomes, are phloem proteins which accumulate during sieve element differentiation . This domain represents the N-terminus of SEO proteins.. +PF14577 Sieve element occlusion C-terminus
Sieve element occlusion (SEO) proteins, or forisomes, are phloem proteins which accumulate during sieve element differentiation . This domain represents the C-terminus of SEO proteins.. +PF14578 Elongation factor Tu domain 4
Elongation factor Tu consists of several structural domains, and this is usually the fourth.. +PF14579 Helix-hairpin-helix motif
The HHH domain is a short DNA-binding domain .. +PF14580 Leucine-rich repeat
+PF14581 SseB protein C-terminal domain
This family consists of several SseB proteins which appear to be found exclusively in Enterobacteria. SseB is known to enhance serine-sensitivity in Escherichia coli and is part of the Salmonella pathogenicity island 2 (SPI-2) translocon .\. This presumed domain is found at the C-terminus of SseB proteins.. +PF14582 Metallophosphoesterase, calcineurin superfamily
Members of this family are part of the Calcineurin-like phosphoesterase superfamily.. +PF14583 Oligogalacturonate lyase
This is a family of oligogalacturonate lyases, referred to more generally as pectate lyase family 22. These proteins fold into 7-bladed beta-propellers.. +PF14584 Protein of unknown function (DUF4446)
This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 165 and 176 amino acids in length.. +PF14585 CagY type 1 repeat
This repeat is found at the N-terminus of the CagY proteins - part of the CAG pathogenicity island - and involved in delivery of the protein CagA into host cells ( ).. +PF14586 Class I Histocompatibility antigen, NKG2D ligand, domains 1 and 2
Members of this family are known as retinoic-acid-inducible proteins. They are ligands for the activating immunoreceptor NKG2D, which is widely expressed on natural killer cells, T cells, and macrophages.. +PF14587 O-Glycosyl hydrolase family 30
+PF14588 YjgF/chorismate_mutase-like, putative endoribonuclease
YjgF_Endoribonuc is a putative endoribonuclease. The structure is of beta-alpha-beta-alpha-beta(2) domains common both to bacterial chorismate mutase and to members of the YjgF family. These proteins form trimers with a three-fold symmetry with three closely-packed beta-sheets. The YjgF family is a large, widely distributed family of proteins of unknown biochemical function that are highly conserved among eubacteria, archaea and eukaryotes .. +PF14589 Polysulfide reductase
CATH:2vpzC00, Pfam-B_200168. Bacterial polysulfide reductase is an integral membrane protein complex responsible for quinone-coupled reduction of polysulfide, a process important in extreme environments such as deep-sea vents and hot springs. Polysulfides are a class of compounds composed of chains of sulfur atoms, which in their simplest form are present as an anion with general formula Sn(2-). In nature, polysulfides are found in particularly high concentrations in extreme volcanic or geothermically active environments. Here, the reduction and oxidation of polysulfides are vital processes for many bacteria and are essential steps in the global sulfur cycle. In particular, the reduction of polysulfide to hydrogen sulfide in these environments is usually linked to energy-generating respiratory processes, supporting growth of many microorganisms, particularly hyperthermophiles.. +PF14590 Domain of unknown function (DUF4447)
This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length.. +PF14591 NTP_transf_5;
Members of this family are of unknown function.. +PF14592 Chondroitinase B
This family includes chondroitinases. These enzymes cleave the glycosaminoglycan dermatan sulfate .. +PF14593 PH domain
+PF14594 Siphovirus ReqiPepy6 Gp37-like protein
This family includes numerous phage proteins from Siphoviruses. The function of this protein is uncertain, but it is related to Pfam:PF06605. In Rhodococcus phage ReqiPepy6 this protein is called Gp37 .. +PF14595 Thioredoxin
+PF14596 STAT6 C-terminal
This family represents the C-terminus of mammalian STAT6 (Signal transducer and activator of transcription 6), it contains an LXXLL motif which binds to NCOA1 (Nuclear receptor coactivator 1) .. +PF14597 Metallo-beta-lactamase superfamily
This is a small family of putative metal-dependent hydrolases.. +PF14598 PAS domain
This family includes the PAS-B domain of NCOA1 (Nuclear receptor coactivator 1), which binds to an LXXLL motif in the C-terminal region of STAT6 (Signal transducer and activator of transcription 6) .. +PF14599 Zinc-ribbon
This is a typical zinc-ribbon finger, with each pair of zinc-ligands coming from more-or-less either side of two knuckles. It is found in eukaryotes.. +PF14600 Cellulose-binding domain
This C-terminal domain belongs to the CAZy family of carbohydrate-binding domains that are associated with glycosyl-hydrolases. It is suggested to bind cellulose.. +PF14601 DNA_binding protein, TFX, C-term
This is the C-terminal region of TFX-like DNA-binding proteins.. +PF14602 Hexapeptide repeat of succinyl-transferase
+PF14603 Helically-extended SH3 domain
This domain is the 70 C-terminal residues of ADAP - Adhesion and de-granulation promoting adapter protein. It shows homology to SH3 domains; however, conserved residues of the fold are absent. It thus represents an altered SH3 domain fold. An N-terminal, amphipathic, helix makes extensive contacts to residues of the regular SH3 domain fold thereby creating a composite surface with unusual surface properties. The domain can no longer bind conventional proline-rich peptides . There are key phosphorylation sites within the two hSH3 domains and it would appear that binding at these sites does not materially affect the folding of these regions although the equilibrium towards the unfolded state may be slightly altered . The binding partners of the hSH3 domains are still unknown .. +PF14604 Variant SH3 domain
Jackhmmer, JCSG:target_422527. +PF14605 Nup53/35/40-type RNA recognition motif
Jackhmmer, JCSG:target_422743. +PF14606 GDSL-like Lipase/Acylhydrolase family
Jackhmmer, JCSG:target_416889. +PF14607 N-terminus of Esterase_SGNH_hydro-type
Jackhmmer, JCSG_target_416889. This domain lies upstream of SGNH hydrolase, but its function is not known. There is a highly conserved GxDLY sequence-motif.. +PF14608 Zinc finger C-x8-C-x5-C-x3-H type
Pfam-B_880 (release 26.0). This is a zinc-finger of the type C-x8-C-x5-C-x3-H.. +PF14609 gamma-Tubulin ring complex non-core subunit mod21
Pfam-B_276835 (release 26.0). GCP5-Mod21 is a non-core subunit of the larger gamma-tubulin ring complex that effects microtubule nucleation from both centrosomal and non-centrosomal sites. This subunit, unlike GCP2 and and GCP3 and others, is not thought to be essential for viability in the fission yeast, and may not be expressed in very high concentrations. Fission yeast can form a large gamma-Tubulin complex C similar to that found in higher eukaryotes and this complex is important for maintaining normal levels of microtubule nucleation in vivo .. +PF14610 GPI-anchored_2;
Protein of unknown function (DUF4448). Pfam-B_5686 (release 26.0). This is a family of predicted membrane glycoproteins from fungi. However there appears, visually, to be some similarity with the family of GPI-anchored fungal proteins, Pfam:PF10342.. +PF14611 Mitochondrial inner-membrane-bound regulator
Pfam-B_1679 (release 26.00. SLS is a fungal domain found bound to the mitochondrial inner-membrane . It reacts physically with fungal Kar2p to promote translocation across the endoplasmic-reticulum membrane. This action appeared to be mediated via the promotion of the Sec63p-mediated activation of Kar2p's ATPase activity. This indicates that the Sls1p protein is a GrpE-like protein in the endoplasmic reticulum. In S.cerevisiae the SLS1 gene (ScSLS1) is not essential but is also involved in ERAD and folding [2,3].. +PF14612 IEC3 subunit of the Ino80 complex, chromatin re-modelling
Pfam-B_3771 (release 26.0). This is a family of fungal chromatin re-modelling proteins found in one of the chromatin-central complexes, Ino80. The function was identified in Schizosaccharomyces pombe but there is no orthologue in S. cerevisiae. . +PF14613 Protein of unknown function (DUF4449)
Pfam-B_1378 (release 26.0). This is a fungal DUF of unknown function.. +PF14614 Domain of unknown function (DUF4450)
JCSG:Target_393004-GS13576A. This is a family of bacterial proteins of unknown function.. +PF14615 Ribosome-assembly protein 3
Pfam-B_11864 (release 26.0). This is a family of 60S ribosome-assembly proteins, from fungi.. +PF14616 Domain of unknown function (DUF4451)
Pfam-B_5126 (release 26.0). This is family of fungal proteins up-regulated during meiosis.. +PF14617 U3-containing 90S pre-ribosomal complex subunit
Pfam-B_3046 (release 26.0). This is a family of fungal and plant CMS1-like proteins. The family has similarity to the DEAD-box helicases. . +PF14618 Domain of unknown function (DUF4452)
Pfam-B_6056 (release 26.0). This fungal family has no known function. However, it is rich in paired, as CXXC, cysteines and histidines, but these do not fall in the conformation that might suggest zinc-binding.. +PF14619 Snf2-ATP coupling, chromatin remodelling complex
Pfam-B_4045 (release 26.0). This domain appears to play a crucial role in chromatin remodelling for yeast SWI/SNF. It binds histones. It is required for mobilising nucleosomes and lies within the catalytic subunit of the yeast SWI/SNF. It is found to be universally conserved .. +PF14620 YpeB sporulation
Pfam-B_309 (release 26.0). YPEB is a protein that is necessary for the functioning of SleB during spore-cortex hydrolysis.. +PF14621 RFX5 DNA-binding domain
Pfam-B_20855 (release 26.0). RFX5 and RFXAP reveals molecular details associated with MHCII gene expression.. +PF14622 Ribonuclease-III-like
Pfam-B_6419 (release 26.0). Members of this family are involved in rDNA transcription and rRNA processing. They probably also cleave a stem-loop structure at the 3' end of U2 snRNA to ensure formation of the correct U2 3' end; they are involved in polyadenylation-independent transcription termination. Some members may be mitochondrial ribosomal protein subunit L15, others may be 60S ribosomal protein L3.. +PF14623 Hint-domain
This short domain is a conserved region of intein-containing proteins from lower eukaryotes. +PF14624 VWA / Hh protein intein-like
Buerglin T, Coggill P. VWA-Hint proteins carry this conserved domain of around 300 residues, now named the Vwaint domain. Such proteins do not seem to have a signal peptide for secretion. Generally, this domain lies between the N-terminal VWA domain and the more C-terminal 'Vint'-type Hint domain. The exact function of this domain is not known.. +PF14625 Lustrin, cysteine-rich repeated domain
This repeated domain is found in proteins from lower eukaryotes in lustrin, perlucin, pearl nacre, and other similar protein-types. Each repeat lies between Kunitz-BPTI repeats, in certain species, which are also cysteine-rich. The cysteines may form the disulfide bonds observed for other members of this superfamily.. +PF14626 Zc3h12a-like Ribonuclease NYN domain
This family is found to be a divergent form of the NYN-domain- containing RNAse family.. +PF14627 Domain of unknown function (DUF4453)
This short domain is found only on a small subgroup of proteins from Gram-negative Proteobacteria that also carry a YARHG domain, Pfam:PF13308. They carry three conserved tryptophan and three conserved cysteine residues.. +PF14628 Domain of unknown function (DUF4454)
This C-terminal domain is found only on a small subgroup of proteins from Gram-positive Clostridiales that also carry a YARHG domain, Pfam:PF13308.. +PF14629 Origin recognition complex (ORC) subunit 4 C-terminus
This entry represents the C-terminus of origin recognition complex subunit 4 [1,2].. +PF14630 Origin recognition complex (ORC) subunit 5 C-terminus
This entry represents the C-terminus of origin recognition complex subunit 5 .. +PF14631 Fanconi anaemia protein FancD2 nuclease
The Fanconi anaemia protein FancD2 is a nuclease necessary for the repair of DNA interstrand-crosslinks.. +PF14632 Acidic N-terminal SPT6
pfam-B_9510 (release 26.0). The N-terminus of SPT6 is highly acidic. The full SPT6 protein is a transcription regulator, but the exact function of this acidic region is not certain.. +PF14633 SH2 domain
pfam-B_9510 (release 26.0). +PF14634 zinc-RING finger domain
+PF14635 Helix-hairpin-helix motif\
Pfam-B_9510 (release 26.0). +PF14636 Folliculin-interacting protein N-terminus
This is the N-terminus of folliculin-interacting proteins [1,2].. +PF14637 Folliculin-interacting protein middle domain
This is the middle domain of folliculin-interacting proteins [1,2].. +PF14638 Folliculin-interacting protein C-terminus
This is the C-terminus of folliculin-interacting proteins [1,2]. This region is responsible for binding to folliculin .. +PF14639 Holliday-junction resolvase-like of SPT6
Pfam-B_9510 (release 26.0). The YqgF domain of SPT6 proteins is homologous to the E.coli RuvC but its putative catalytic site lacks the carboxylate side chains critical for coordinating magnesium ions that mediate phosphodiester bond-cleavage . +PF14640 Transmembrane protein 223
+PF14641 HtH;
Helix-turn-helix DNA-binding domain of SPT6. Pfam-B_9510 (release 26.0). This helix-turn-helix represents the first of two DNA-binding domains on the SPT6 proteins.. +PF14642 FAM47 family
The function of this Chordate family of proteins is not known.. +PF14643 Domain of unknown function (DUF4455)
This domain family is found in bacteria and eukaryotes, and is approximately 480 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important.. +PF14644 Domain of unknown function (DUF4456)
This domain family is found in bacteria and eukaryotes, and is approximately 210 amino acids in length. There is a single completely conserved residue E that may be functionally important.. +PF14645 Chibby family
This family includes the eukaryotic chibby proteins. These proteins inhibit the wingless/Wnt pathway by binding to beta-catenin and inhibiting beta-catenin-mediated transcriptional activation. Chibby is Japanese for small, and is named after the RNAi phenotype seen in Drosophila .. +PF14646 MYCBP-associated protein family
This family of eukaryotic proteins includes the mammalian MYCBP-associated proteins. These proteins may be synaptic processes and may have a role in spermatogenesis .. +PF14647 FAM91 N-terminus
+PF14648 FAM91 C-terminus
+PF14649 Spatacsin C-terminus
This family includes the C-terminus of spatacsin.. +PF14650 FAM75 family
+PF14651 Lipocalin / cytosolic fatty-acid binding protein family
Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The family also encompasses the enzyme prostaglandin D synthase (EC:5.3.99.2).. +PF14652 Domain of unknown function (DUF4457)
This family of proteins is found in eukaryotes. It is found repeated several times in the vertebrate KIAA0556 proteins.. +PF14653 Insulin growth factor-like family
This family includes the insulin growth factor-like proteins. These proteins are potential ligands for the IGFLR1 cell membrane receptor .. +PF14654 Mucin_C;
Mucin, catalytic, TM and cytoplasmic tail region. Pfam-B_ 91014 (release 26.0). This family represents the non-tandem repeat domain including cleavage site, the transmembrane helix domain, and the cytoplasmic tail of epiglycanin and related mucins .. +PF14655 Rab3 GTPase-activating protein regulatory subunit N-terminus
This family includes the N-terminus of the Rab3 GTPase-activating protein non-catalytic subunit. Rab3 GTPase-activating protein is a GTPase activating protein with specificity for Rab3 subfamily .. +PF14656 Rab3 GTPase-activating protein regulatory subunit C-terminus
This family includes the N-terminus of the Rab3 GTPase-activating protein non-catalytic subunit. Rab3 GTPase-activating protein is a GTPase activating protein with specificity for Rab3 subfamily .. +PF14657 AP2-like DNA-binding integrase domain
This family includes AP2-like domains found in a variety of phage integrase proteins. Presumably these domains are DNA-binding.. +PF14658 efhand_7;
+PF14659 Phage integrase, N-terminal SAM-like domain
This domain is found in a variety of phage integrase proteins.. +PF14660 Domain of unknown function (DUF4458)
this domain is found in tandem repeats on the N-terminus of secreted LRR proteins from human associated Bacteroidetes domain boundaries are based on the JCSG solved 3D structure of JCSG target SP16667A (BT_0210). +PF14661 HAUS augmin-like complex subunit 6 N-terminus
This family includes the N-terminus of HAUS augmin-like complex subunit 6. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2].. +PF14662 Coiled-coil region of CCDC155
This is a small family of eukaryotic proteins of unknown function.ThiS is the central coiled-coil region.. +PF14663 Rapamycin-insensitive companion of mTOR RasGEF_N domain
Pfam-B_389 (release 26.0). Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the more conserved central section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin.. +PF14664 Rapamycin-insensitive companion of mTOR, N-term
Pfam-B_389 (release 26.0). Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the N-terminal conserved section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin.. +PF14665 Rapamycin-insensitive companion of mTOR, phosphorylation-site
Pfam-B_389 (release 26.0). Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient- and growth-factor signalling. This short region is the phoshorylation site. Rictor does interact with 14-3-3 in a Thr1135-dependent manner. Rictor can be inhibited by short-term rapamycin treatment showing that Thr1135 is an mTORC1-regulated site.. +PF14666 Rapamycin-insensitive companion of mTOR, middle domain
Pfam-B_389 (release 26.0). Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the more conserved central section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin.. +PF14667 Polysaccharide biosynthesis C-terminal domain
This family represents the C-terminal integral membrane region of polysaccharide biosynthesis proteins.. +PF14668 Rapamycin-insensitive companion of mTOR, domain 5
Pfam-B_389 (release 26.0). Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. These long eukaryotic proteins carry several well-conserved domains, and this is No.5.. +PF14669 Putative aspartate racemase
Pfam-B_34791 (release 26.0). This is a small family of vertebrate putative aspartate racemases. The family lies on TOPAZ 1 proteins.. +PF14670 Coagulation Factor Xa inhibitory site
Jackhmmer CATH:3kl6_B_0. This short domain on coagulation enzyme factor Xa is found to be the target for a potent inhibitor of coagulation, TAK-442 .. +PF14671 Dual specificity protein phosphatase, N-terminal half
Jackhmeer:CATH:1ohe_A_01. The active core of the dual specificity protein phosphatase is made up of two globular domains both with the DSP-like fold. This family represents the N-terminal half of the core. These domains are arranged in tandem, and are associated via an extensive interface to form a single globular whole. The conserved PTP signature motif (Cys-[X]5-Arg) that defines the catalytic centre of all PTP-family members is located within the C-terminal domain, family DSPc, Pfam:PF00782. Although the centre of the catalytic site is formed from DSPc, two loops from the N-terminal domain, DSPn, also contribute to the catalytic site, facilitating peptide substrate specificity .. +PF14672 Late cornified envelope
This is a family of late cornified envelope proteins that are expressed in skin .. +PF14673 Domain of unknown function (DUF4459)
Pfam-B_10980 (release 26.0). This family appears only on sequences from Salmonella spp. These sequences also all carry a YARHG domain, Pfam:PF13308.. +PF14674 FANCI solenoid 1 cap
This is the solenoid 1 cap (S1-cap) domain of the Fanconi anemia group I protein .. +PF14675 FANCI solenoid 1
This is the solenoid 1 (S1) domain of the Fanconi anemia group I protein .. +PF14676 FANCI solenoid 2
This is the solenoid 2 (S2) domain of the Fanconi anemia group I protein .. +PF14677 FANCI solenoid 3
This is the solenoid 3 (S3) domain of the Fanconi anemia group I protein .. +PF14678 FANCI solenoid 4
This is the solenoid 4 (S4) domain of the Fanconi anemia group I protein .. +PF14679 FANCI helical domain 1
This is the helical domain 1 (HD1) of the Fanconi anemia group I protein .. +PF14680 FANCI helical domain 2
This is the helical domain 2 (HD2) of the Fanconi anemia group I protein .. +PF14681 Uracil phosphoribosyltransferase
This family includes the enzyme uracil phosphoribosyltransferase (EC:2.4.2.9). This enzyme catalyzes the first step of UMP biosynthesis.. +PF14682 Sporulation initiation phospho-transferase B, C-terminal
Sporulation initiation phospho-transferase B or SpoOB is part of a phospho-relay that initiates sporulation in Bacillus subtilis. Spo0B is a two-domain protein consisting of an N-terminal alpha-helical hairpin domain and a C-terminal alpha/beta domain, represented by this family. Two subunits of Spo0B dimerise by a parallel association of helical hairpins to form a novel four-helix bundle from which the active histidine - involved in the auto-phosphorylation - protrudes. In the phospho-relay, the signal-receptor histidine kinases are dephosphorylated by a common response regulator, Spo0F. Spo0B then takes phosphorylated Spo0F as substrate hereby mediating the transfer of a phosphoryl group to Spo0A, the ultimate transcription factor.. +PF14683 Polysaccharide lyase family 4, domain III
CBM-like is domain III of rhamnogalacturonan lyase (RG-lyase). The full-length protein specifically recognises and cleaves alpha-1,4 glycosidic bonds between l-rhamnose and d-galacturonic acids in the backbone of rhamnogalacturonan-I, a major component of the plant cell wall polysaccharide, pectin. This domain possesses a jelly roll beta-sandwich fold structurally homologous to carbohydrate binding modules (CBMs), and it carries two sulfate ions and a hexa-coordinated calcium ion.. +PF14684 Tricorn protease C1 domain
This domain is the C1 core domain of tricorn protease. This is a mixed alpha-beta domain .. +PF14685 Tricorn protease PDZ domain
This domain is the PDZ domain of tricorn protease .. +PF14686 Polysaccharide lyase family 4, domain II
FnIII-like is domain II of rhamnogalacturonan lyase (RG-lyase). The full-length protein specifically recognises and cleaves alpha-1,4 glycosidic bonds between l-rhamnose and d-galacturonic acids in the backbone of rhamnogalacturonan-I, a major component of the plant cell wall polysaccharide, pectin. This domain displays an immunoglobulin-like or more specifically Fibronectin-III type fold and shows highest structural similarity to the C-terminal beta-sandwich subdomain of the pro-hormone/propeptide processing enzyme carboxypeptidase gp180 from duck. It serves to assist in producing the deep pocket, with domain III, into which the substrate fits .. +PF14687 Domain of unknown function (DUF4460)
This domain family is found in eukaryotes, and is typically between 103 and 119 amino acids in length. There is a conserved HPD sequence motif. There are two completely conserved residues (N and F) that may be functionally important.. +PF14688 Domain of unknown function (DUF4461)
This domain family is found in eukaryotes, and is approximately 310 amino acids in length.. +PF14689 Sensor_kinase_SpoOB-type, alpha-helical domain
Sporulation initiation phospho-transferase B or SpoOB is part of a phospho-relay that initiates sporulation in Bacillus subtilis. Spo0B is a two-domain protein consisting of an N-terminal alpha-helical hairpin domain and a C-terminal alpha/beta domain. Two subunits of Spo0B dimerise by a parallel association of helical hairpins to form a novel four-helix bundle from which the active histidine - involved in the auto-phosphorylation - protrudes. In the phospho-relay, the signal-receptor histidine kinases are dephosphorylated by a common response regulator, Spo0F. Spo0B then takes phosphorylated Spo0F as substrate thereby mediating the transfer of a phosphoryl group to Spo0A, the ultimate transcription factor. The exact function of this alpha-helical domain is not known; it does not always occur just as the N-terminal domain of SPOB_ab, Pfam:PF14682. SCOP describes this domain as a histidine kinase-like fold lacking the kinase ATP-binding site.. +PF14690 zinc-finger of transposase IS204/IS1001/IS1096/IS1165
+PF14691 Dihydroprymidine dehydrogenase domain II, 4Fe-4S cluster
Domain II of the enzyme dihydroprymidine dehydrogenase binds FAD. Dihydroprymidine dehydrogenase catalyses the first and rate-limiting step of pyrimidine degradation by converting pyrimidines to the corresponding 5,6- dihydro compounds . This domain carries two Fe4-S4 clusters.. +PF14692 Domain of unknown function (DUF4462)
This domain family is found in eukaryotes, and is approximately 30 amino acids in length.. +PF14693 ShortName;
Ribosomal protein TL5, C-terminal domain. This family contains the C-terminal domain of ribosomal protein TL5. The N-terminal domain, which binds to 5S rRNA, is contained in family Ribosomal_L25p, Pfam:PF01386. Full length (N- and C-terminal domain) homologues of TL5 are also known as CTC proteins. TL5 or CTC are not found in Eukarya or Archaea. In some Bacteria, including E. coli, this ribosomal subunit occurs as a single domain protein (named Ribosomal subunit L25), where the only domain is homologous to TL5 N-terminal domain (hence included in family Pfam:PF01386). The function of the C-terminal domain of TLC is at present unknown.. +PF14694 Lines N-terminus
This family represents the N-terminus of protein lines . In Drosophila this protein is involved in embryonic segmentation and may function as a transcriptional regulator [2-3].. +PF14695 Lines C-terminus
This family represents the C-terminus of protein lines . In Drosophila this protein is involved in embryonic segmentation and may function as a transcriptional regulator [2-3].. +PF14696 Hydroxyphenylpyruvate dioxygenase, HPPD, N-terminal
This domain is one of two barrel-shaped regions that together form the active enzyme, 4-hydroxyphenylpyruvic acid dioxygenase, EC:1.13.11.27. As can be deduced from the disposition of the various Glyoxalase families, _2, _3 and _4 in Pfam, Pfam:PF00903, Pfam:PF12681, Pfam:PF13468, Pfam:PF13669, these two regions are similar to be indicative of a gene-duplication event. At the individual sequence level slight differences in conformation have given rise to slightly different functions. In the case of UniProt:P80064, 4-hydroxyphenylpyruvic acid dioxygenase catalyses the formation of homogentisate from 4-hydroxyphenylpyruvate, and the pyruvate part of the HPPD substrate (4-hydroxyphenylpyruvate), derived from L-tyrosine, and the O2 molecule occupy the three free coordination sites of the catalytic iron atom in the C-terminal domain. In plants and photosynthetic bacteria, the tyrosine degradation pathway is crucial because homogentisate, a tyrosine degradation product, is a precursor for the biosynthesis of photosynthetic pigments, such as quinones or tocopherols .. +PF14697 4Fe-4S dicluster domain
Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters.. +PF14698 Argininosuccinate lyase C-terminal
This domain is found at the C-terminus of argininosuccinate lyase [1-2].. +PF14699 N-terminal domain from the human glycogen debranching enzyme
Jcakhammer:GDE_HUMAN. this domain is found on the very N-terminal of eukaryotic variant\. of the glycogen debranching enzyme (GDE), where it is immediately followe by the aldolase-like domain. The eukaryotic GDE performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33), performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzyme hGDE_N domain is involved in the glucosyltransferase activity, probably\. as a substrate binding module (by analogy to other glucosyltransferases). +PF14700 DNA-directed RNA polymerase N-terminal
This is the N-terminal domain of DNA-directed RNA polymerase. This domain has a role in interaction with regions of upstream promoter DNA and the nascent RNA chain, leading to the processivity of the enzyme . In order to make mRNA transcripts the RNA polymerase undergoes a transition from the initiation phase (which only makes short fragments of RNA) to an elongation phase. This domain undergoes a structural change in the transition from initiation to elongation phase. The structural change results in abolition of the promoter binding site, creation of a channel accommodating the heteroduplex in the active site and formation of an exit tunnel which the RNA transcript passes through after peeling off the heteroduplex .. +PF14701 glucanotransferase domain of human glycogen debranching enzyme
Jackhammer:GDE_HUMAN. this is a glucanotransferase catalytic domain of the eukaryotic variant of the glycogen debranching enzyme (GDE).\. The eukaryotic GDEs performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33),\. performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzymes. hDGE_amylase domain is a catalytic domain responsible for the glucanotransferase function. It belongs to the alpha-amylase clan and is predicted to have a structure of a 8 stranded alpha/beta barrel (TIM barreal) where strands are interuppted by long loops and additional mini-domains. In most other amylases, the catalytic domain is followed by a beta- barrel substrate binding domain, but presence of such domain cannot be verified in the human (and other eukaryotic) GDE enzymes. +PF14702 central domain of human glycogen debranching enzyme
Jackhammer:GDE_HUMAN. this is a central domain of the eukaryotic variant of the glycogen debranching enzyme (GDE). The eukaryotic GDE performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33), performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzyme The hGDE_central domain follows the glucanotransferas domain and precedes the glucosidase (GDE_N) domain. It is very likely that the current definition contains two or more domains, by analogy with baterial GDEs, this domain should be involved in substrate binding either for the N-terminal glucanotransferase and/or the the C-terminal glucosidase (or both) . +PF14703 Domain of unknown function (DUF4463)
This is a cytosolic (predicted) domain present in integral membrane proteins, such as TM63C_HUMAN TRANSMEMBRANE PROTEIN 63C. This domain usually preceeds a DUF221 (PF02714)domain and follows a RSN1_TM (PF13967) Fold recognition programs consistenly and with high significance predict this domain to be distantly homologous to RNA binding proteins from the RRM clan.. +PF14704 Dermatopontin
Members of this family mediate cell adhesion via cell surface integrin binding . They also induce haemagglutination and aggregation of amebocytes [2-3].. +PF14705 Costars
This domain is found both alone and at the C-terminus of actin-binding Rho-activating protein (ABRA). It binds to actin, and in muscle regulates the actin cytoskeleton and cell motility [1-2]. It has a winged helix-like fold consisting of three alpha-helices and four antiparallel beta strands. Unlike typical winged helix proteins it does not bind to DNA, but contains a hydrophobic groove which may be responsible for interaction with other proteins .. +PF14706 Transposase DNA-binding
This domain occurs at the C-terminus of transposases including E. coli tnpA (Swiss:Q46731). TnpA encodes a transposase and an inhibitor protein, the inhibitor only differs from the transposase by the absence of the N-terminal 55 amino acids, which includes most of this domain . This domain consists of alpha helices and turns, and functions as a DNA-binding domain .. +PF14707 C-terminal region of aryl-sulfatase
+PF14709 double strand RNA binding domain from DEAD END PROTEIN 1
A C-terminal domain in human dead end protein 1 (DND1_HUMAN) homologous to double strand RNA binding domains (PF00035, PF00333) . +PF14710 Respiratory nitrate reductase alpha N-terminal
This is the N-terminal tail of the respiratory nitrate reductase alpha chain. The nitrate reductase complex is a dimer of heterotrimers each consisting of an alpha, beta and gamma chain. The N-terminal tail of the alpha chain interacts with the beta chain and contributes to the stability of the heterotrimer .. +PF14711 Respiratory nitrate reductase beta C-terminal
This domain occurs near the C-terminus of the respiratory nitrate reductase beta chain. The nitrate reductase complex is a dimer of heterotrimers each consisting of an alpha, beta and gamma chain. This domain plays a role in the interactions between subunits and shielding of the Fe-S clusters . +PF14712 Snapin/Pallidin
This family of proteins includes Snapin, this protein is associated with the SNARE complex, which mediates synaptic vesicle docking and fusion . It also includes the yeast snapin-like protein SNN1, which is a part of a complex involved in endosomal cargo sorting . The family also includes pallidin, a component of a complex involved in biogenesis of lysosome-related organelles .. +PF14713 Domain of unknown function (DUF4464)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 224 and 241 amino acids in length. There is a conserved YID sequence motif.. +PF14714 KH-domain-like of EngA bacterial GTPase enzymes, C-terminal
The KH-like domain at the C-terminus of the EngA subfamily of essential bacterial GTPases has a unique domain structure position. The two adjacent GTPase domains (GD1 and GD2), two domains of family MMR_HSR1, Pfam:PF01926, pack at either side of the C-terminal domain. This C-terminal domain resembles a KH domain but is missing the distinctive RNA recognition elements. Conserved motifs of the nucleotide binding site of GD1 are integral parts of the GD1-KH domain interface, suggesting the interactions between these two domains are directly influenced by the GTP/GDP cycling of the protein. In contrast, the GD2-KH domain interface is distal to the GDP binding site of GD2.. +PF14715 N-terminal domain of cytochrome oxidase-cbb3, FixP
Pfam-B_28684 (release 26.0). This is the N-terminal domain of FixP, the cytochrome oxidase type-cbb3. the exact function is not known.. +PF14716 Helix-hairpin-helix domain
+PF14717 Domain of unknown function (DUF4465)
JCSG structure SP13250B. A large family of uncharacterized proteins mostly from human gut bacteroides, but also some environmental and water bacteria (Planctomycetes) as well as metagenomic samples Most proteins from this family are secreted or located on the outer surface and may participate in cell-cell interactions or cell-nutrient interactions This function is supported by a solved structure of a Bacteroides ovatus homolog, which adapts a galactose binding (jelly-roll) beta barrel structure. +PF14718 Soluble lytic murein transglycosylase L domain
Soluble lytic murein transglycosylase (SLT) consists of three domains, an N-terminal U domain, an L domain (linker domain) and a C-terminal domain (C). The L domain may be involved in the interaction of the enzyme with peptidoglycan .. +PF14719 Phosphotyrosine interaction domain (PTB/PID)
+PF14720 NiFe/NiFeSe hydrogenase small subunit C-terminal
This domain is found at the C-terminus of hydrogenase small subunits including periplasmic [NiFeSe] hydrogenase small subunit, uptake hydrogenase small subunit and periplasmic [NiFe] hydrogenase small subunit. This C-terminal domain binds two of the three iron-sulfur clusters in this enzyme [1-3].. +PF14721 Apoptosis-inducing factor, mitochondrion-associated, C-term
Jackhmmer:JCSG-Target_422903. This C-terminal domain appears to be a dimerisation domain of the mitochondrial apoptosis-inducing factor 1. protein. The domain also appears at the C-terminus of FAD-dependent pyridine nucleotide-disulfide oxidoreductases. Apoptosis inducing factor (AIF) is a bifunctional mitochondrial flavoprotein critical for energy metabolism and induction of caspase-independent apoptosis. On reduction with NADH, AIF undergoes dimerisation and forms tight, long-lived FADH2-NAD charge-transfer complexes proposed to be functionally important.. +PF14722 SSFA2_N;
Ki-ras-induced actin-interacting protein-IP3R-interacting domain. This family includes the N-terminus of the actin-interacting protein sperm-specific antigen 2, or KRAP (Ki-ras-induced actin-interacting protein) . This region is found to be the residues that interact with inositol 1,4,5-trisphosphate receptor (IP3R). KRAP was first localised as a membrane-bound form with extracellular regions suggesting it might be involved in the regulation of filamentous actin and signals from the outside of the cells . It has now been shown to be critical for the proper subcellular localisation and function of IP3R. Inositol 1,4,5-trisphosphate receptor functions as the Ca2+ release channel on specialised endoplasmic reticulum membranes, so the subcellular localisation of IP3R is crucial for its proper function .. +PF14723 Sperm-specific antigen 2 C-terminus
This family includes the C-terminus of the actin-interacting protein sperm-specific antigen 2 .. +PF14724 Mitochondrial-associated sphingomyelin phosphodiesterase
The GO annotation for this family indicates that it is a single-pass membrane protein, and it appears to be found in mitochondrial membranes. Sphingolipids play important roles in regulating cellular responses, and although mitochondria contain sphingolipids, direct regulation of their levels in mitochondria or mitochondria-associated membranes is mostly unclear. Sphingomyelin phosphodiesterases catalyse the hydrolysis of sphingomyelin to ceramide and phosphocholine, and these metabolites are involved in signalling pathways.. +PF14725 Domain of unknown function (DUF4466)
Jackhmmer-JCSG:target_419245-SP18803A. +PF14726 Rotatin, an armadillo repeat protein, centriole functioning
Pfma-B_645 (release 26.0). Rotatin and its homologues such as Ana3 in Drosophila are found to be essential for centriole function . A deficiency of rotatin in mice leads to randomised heart tube looping, defects in embryonic turning , and abnormal expression of HNF3beta, lefty, and nodal. Thus it is required for left-right and axial patterning. Ana3 - the Drosophila homologue - is present in centrioles and basal bodies, is required for the structural integrity of both centrioles and basal bodies and for centriole cohesion. Rotatin also localises to centrioles and basal bodies and appears to be essential for cilia function . This family represents the N-terminal domain.. +PF14727 PTHB1 N-terminus
This family includes the N-terminus of PTHB1 protein. This protein forms a part of the BBSome complex, which is required for ciliogenesis .. +PF14728 PTHB1 C-terminus
This family includes the C-terminus of PTHB1 protein. This protein forms a part of the BBSome complex, which is required for ciliogenesis .. +PF14729 Domain of unknown function with cystatin-like fold (DUF4467)
JCSG target SP18127A; Pfam-B_491 (release 26.0). Large family of predicted lipoproteins from Gram-positive bacteria Experimentally determined structure shows a cystatitin-like fold, allowing us to classify this family in the NFT2 clan, despite lack of any detectable sequence similarity between members of this family and other families in this clan. +PF14730 Domain of unknown function (DUF4468) with TBP-like fold
Jackhammer: JCSG target SP13279C. A large family of (predicted) secreted proteins with unknown functions from human gut and oral cavity.\. Typically forms a N-terminal domain with FMN binding domain at the C-terminus. Experimentaly determined 3D structure of this domain shows a variant of a TATA box binding - like fold, but no detectable sequence similarity to other proteins with this fold. +PF14731 Staphopain proregion
This domain is the proregion of the cysteine protease staphopain. Like many papain type peptidases, staphopain is synthesised as an inactive precursor and cleavage of the proregion is required for activation. This proregion has a half-barrel or barrel-sandwich hybrid fold. The proregion blocks the active site cleft of the mature enzyme on one side of the nucleophilic cysteine . +PF14732 Ubiquitin/SUMO-activating enzyme ubiquitin-like domain
This is the C-terminal domain of ubiquitin-activating enzyme and SUMO-activating enzyme 2. It is structurally similar to ubiquitin. This domain is involved in E1-SUMO-thioester transfer to the SUMO E2 conjugating protein .. +PF14733 AP2-coincident C-terminal
Woodcroft B, Eberhardt R. This family is found at the C-terminus of apicomplexan proteins containing the AP2 domain (Pfam:PF00847).. +PF14734 Domain of unknown function (DUF4469) with IG-like fold
Jackhammer:JCSG target GS13689A. A C-terminal domain in a large family of (predicted) secreted proteins with uknown functions from human gut bacteroides. +PF14735 HAUS augmin-like complex subunit 4
This family includes HAUS augmin-like complex subunit 4. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2].. +PF14736 N_Asn_aminohyd;
Protein N-terminal asparagine amidohydrolase. This family of enzymes catalyse the deamindation of N-terminal asparagines in peptides and proteins to aspartic acid [1-2].. +PF14737 Domain of unknown function (DUF4470)
This family is conserved from fungi to Metazoa and includes plants. The function is not known, but several members have zinc-finger domain, zf-MYND, Pfam:PF01753, at their very C-terminus. Others are also associated with DUF1279, Pfam:PF06916.. +PF14738 Solute carrier (proton/amino acid symporter), TRAMD3 or PAT1
PAT1 (proton amino acid transporter 1), also known as TRAMD3 of AAT-1, is the molecular correlate of the intestinal imino acid carrier. It is a proton-amino acid co-transporter having a stoichiometry of 1:1. Due to its mechanism, PAT1 activity increases at acidic pH, which correlates well with the acidic micro-climate close to the brush-border in the intestine. Glycine, proline, and alanine are the preferred substrates of the transporter. The maximum velocity is similar for the three substrates. All substrates are transported with low affinity, showing Km values in the range of 2-10 mM. The transporter does not discriminate between L- and D-isoforms of these amino acids; in addition, beta-alanine is transported with similar affinity as alpha-alanine. Similar to the IMINO transporter, the amino acid analog MeAIB is recognized by PAT1. The transporter is strongly expressed in the small intestine, colon, kidney, and brain.. +PF14739 Domain of unknown function (DUF4472)
This family is specific to the Chordates. Some members also carry Kinesin-motor domains at their N-terminus, Kinesin, Pfam:PF00225.. +PF14740 Domain of unknown function (DUF4471)
This family is conserved from fungi to Metazoa and includes plants. The function is not known, but several members have zinc-finger domain, zf-MYND, Pfam:PF01753, at their very C-terminus. Others are also associated with DUF1279, Pfam:PF06916. This domain is more C-terminal in many members to DUF4470, Pfam:PF14737.. +PF14741 N-terminal glycosyl-hydrolase-114-associated domain
Naumoff D, Coggill P. This short domain is also a very small family found at the N-terminus of GH114, glycosyl-hydrolases.. +PF14742 N-terminal domain of (some) glycogen debranching enzymes
Jackhammer:YP_001865398. This domain is found on the N-terminal of some glycogen debranching enzymes and is usually followed by the GDE_C (PF06202) and in this sense it is analogous (but probably not homologous) to the GDE_N (PF12439). Its exact function is unknown. +PF14743 DNA ligase OB-like domain
This domain has an OB-like fold, but does not appear to be related to Pfam:PF03120. It is found at the C-terminus of the ATP dependent DNA ligase domain Pfam:PF01068 [1-3].. +PF14744 WASH complex subunit 7
This family is the central, conserved region of proteins that form subunit 7 of the WASH complex . In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes .. +PF14745 WASH complex subunit 7, N-terminal
This family is the conserved N-terminal region of proteins that form subunit 7 of the WASH complex . In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes .. +PF14746 WASH complex subunit 7, C-terminal
This family is the conserved C-terminal region of proteins that form subunit 7 of the WASH complex . In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes . The C-terminus is predicted to include a transmembrane region.. +PF14747 Domain of unknown function (DUF4473)
Pfam-B_8489 (release 26.0). This short family is largely confined to Caenorhabditis proteins. The function is not known. There are two well-conserved aspartate residues.. +PF14748 Pyrroline-5-carboxylate reductase dimerisation
Pyrroline-5-carboxylate reductase consists of two domains, an N-terminal catalytic domain (Pfam:PF03807) and a C-terminal dimerisation domain. This is the dimerisation domain .. +PF14749 Acyl-coenzyme A oxidase N-terminal
Acyl-coenzyme A oxidase consists of three domains. An N-terminal alpha-helical domain, a beta sheet domain (Pfam:PF02770) and a C-terminal catalytic domain (Pfam:PF01756). This entry represents the N-terminal alpha-helical domain .. +PF14750 Integrator complex subunit 2
This family of proteins are subunits of the integrator complex involved in snRNA transcription and processing .. +PF14751 Domain of unknown function (DUF4474)
Jackhmmer:JCSG target SP18061A. Domain found on N-termina of few families of uncharacterized Clostridia proteins. Typically followed by a proline-rich domain or other kinds of repeats. +PF14752 Retinol binding protein receptor
Proteins in this family function as retinol binding protein receptors .. +PF14753 Domain of unknown function (DUF4475)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 99 and 305 amino acids in length.. +PF14754 PPRSV-IRF3_ant;
Papain-like auto-proteinase. Pfam-B_8065 (release 26.0). The replicase polyproteins of the Nidoviruses such as, porcine arterivirus PRRSV, equine arterivirus EAV, human coronavirus 229E, and severe acute respiratory syndrome coronavirus (SARS-CoV), are predicted to be cleaved into 14 non-structural proteins (nsps) by the nsp4 main proteinase Pfam:PF05579 and three accessory proteinases residing in nsp1-alpha, nsp1-beta and nsp2. This family is the two nsp1 proteins that together act in a papain-like way to separate off the rest of the various functional domains of the polyprotein. Once inside the host cell, this nsp1 interferes with the regulation of interferon, thereby enabling the virus to replicate.. +PF14755 Intracellular membrane remodeller
Pfam-B_2813 (release 26.0). This domain represents subunit nsp3 of the RNA-arteriviruses, such as porcine arterivirus PRRSV and equine arterivirus EAV, and is a tetraspanning transmembrane protein that contains a cluster of four highly conserved cysteine residues. These are predicted to reside in the first luminal domain of the protein. Arterivirus nsp3 proteins are uniformly predicted to contain four transmembrane helices, with the N and C termini of the protein residing in the cytoplasm. NSP3 are localised to the ER and appear to be essential for formation of double-membrane vesicles that originate from the ER during the life-cycle of the virus.. +PF14756 Peptidase_C33-associated domain
Pfam-B_535 (release 26.0). The nsps or non-structural protein subunits of the arteriviral polyproteins such as porcine arterivirus PRRSV and equine arterivirus EAV are auto-cleaved into functional units. the function of this particular domain is not known.. +PF14757 Immunogenic region of nsp2 protein of arterivirus polyprotein
Pfam-B_58 (release 26.0). This domain is in a non-essential part of the nsp2 (non-structural protein) subunit section of the arterivirus polyprotein. This domain carries seven small sequence-regions that are predicted to be potential B-cell epitopes.. +PF14758 Non-essential region of nsp2 of arterivirus polyprotein
Pfam-B_6704 (release 26.0) . This non-essential region of the nsp2 subunit of the arterivirus polyprotein of such as porcine arterivirus PRRSV and equine arterivirus EAV may offer immunogneic surfaces to B-cells. It is associated with Peptidase_C33, Pfam:PF05412.. +PF14759 Reductase C-terminal
This domain occurs at the C-terminus of various reductase enzymes, including putidaredoxin reductase, ferredoxin reductase, 3-phenylpropionate/cinnamic acid dioxygenase ferredoxin--NAD(+) reductase component, benzene 1,2-dioxygenase system ferredoxin--NAD(+) reductase subunit, rhodocoxin reductase, biphenyl dioxygenase system ferredoxin--NAD(+) reductase component, rubredoxin-NAD(+) reductase and toluene 1,2-dioxygenase system ferredoxin--NAD(+) reductase component. In putidaredoxin reductase this domain is involved in dimerisation . In the FAD-containing NADH-ferredoxin reductase (BphA4) it is responsible for interaction with the Rieske-type [2Fe-2S] ferredoxin (BphA3) .. +PF14760 Rnk N-terminus
This domain occurs at the N-terminus of Rnk, an RNA polymerase-interacting protein of the GreA/GreB family (Pfam:PF01272). It has a coiled coil structure .. +PF14761 Hermansky-Pudlak syndrome 3
This domain is at the N-terminus of these vertebrate proteins. This region carries the clathrin-binding motif LLDFE at residues 172-176 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 .. +PF14762 Hermansky-Pudlak syndrome 3, middle region
This domain is downstream of the N-terminus of these vertebrate proteins. This region carries a number of tyrosine sorting motifs and one of two di-leucine sorting boxes at residues 542-548 well as a peroxisomal matrix targetting motif at residues 614-623 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 .. +PF14763 Hermansky-Pudlak syndrome 3, C-terminal
This domain is downstream of the mid domain family, Pfam:PF14762, of these vertebrate proteins. This region carries a number of tyrosine sorting motifs and the second of two di-leucine sorting boxes at residues 711-717 well as the ER membrane-retention signal KKPL at residues 1000-1003 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 .. +PF14764 AP-5 complex subunit, vesicle trafficking
This family would appear to be the second of the two larger subunits of the fifth Adaptor-Protein complex, AP-5. Adaptor protein (AP) complexes facilitate the trafficking of cargo from one membrane compartment of the cell to another by recruiting other proteins to particular types of vesicles. AP-5 is involved in trafficking proteins from endosomes towards other membranous compartments . There are genetic links between AP-5 and hereditary spastic paraplegia, a group of human genetic disorders characterised by progressive spasticity in the lower limbs .. +PF14765 Polyketide synthase dehydratase
Pfam-B_852 (release 26.0). This is the dehydratase domain of polyketide synthases . Structural analysis shows these DH domains are double hotdogs in which the active site contains a histidine from the N-terminal hotdog and an aspartate from the C-terminal hotdog. Studies have uncovered that a substrate tunnel formed between the DH domains may be essential for loading substrates and unloading products .. +PF14766 Replication protein A interacting N-terminal
This family of proteins represents the N-terminal domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. The N-terminal domain is responsible for interaction with importin beta [1-2].. +PF14767 Replication protein A interacting middle
This family of proteins represents the middle domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. This domain is responsible for interaction with RPA [1-2].. +PF14768 Replication protein A interacting C-terminal
This family of proteins represents the C-terminal domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. The C-terminal domain is a putative zinc finger [1-2].. +PF14769 Flagellar C1a complex subunit C1a-32
Jackhmmer:Q6P047, Pfam-B_2704 (release26.0). This family represents one small subunit, C1a-32, of the C1a projection (the seventh projection of flagellar) . Numerous studies have indicated that each of the seven projections associated with the central pair of microtubules in flagellar plays a distinct role in regulating eukaryotic ciliary/flagellar motility. The C1a projection is a complex of proteins including PF6, C1a-86, C1a-34, C1a-32, C1a-18, and calmodulin. C1a projection is involved in modulating flagellar beat frequency and this is mediated via the C1a-34, C1a-32, and C1a-18 sub-complex by modulating the activity of both the inner and outer dynein arms .. +PF14770 Transmembrane protein 18
The function of this family is not known, however it is predicted to be a three-pass membrane protein.. +PF14771 Domain of unknown function (DUF4476)
+PF14772 Sperm tail
NYD-SP28 is expressed in a development-dependent manner, localised in spermatogenic cell cytoplams and human spermatozoa tail. It is post-translationally modified during sperm capacitation and ultimately contributes to the success of fertilisation .. +PF14773 Helicase-associated putative binding domain, C-terminal
Jackhmmer:A4D997, Pfam-B_8865 (release 26.0). The function of this short, serine-rich C-terminal region is not known. However, as it is frequently found at the very C-terminus of P-loop containing nucleoside triphosphate hydrolases, it might possibly be a binding domain.. +PF14774 FAM177 family
This family of proteins is found in eukaryotes. Proteins in this family are typically between 134 and 205 amino acids in length.. +PF14775 Sperm tail C-terminal domain
NYD-SP28 is expressed in a development-dependent manner, localised in spermatogenic cell cytoplams and human spermatozoa tail. It is post-translationally modified during sperm capacitation and ultimately contributes to the success of fertilisation . This short region is found at the very C-terminus of family members of family NYD-SP28, Pfam:PF14772.. +PF14776 Cation-channel complex subunit UNC-79
This family is a component of a cation-channel complex.. +PF14777 Cilia BBSome complex subunit 10
Jackhmmer:A8MTZ0, Pfam-B_35417 (release 26.0). The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme . BBIP10 localises to the primary cilium, and is present exclusively in ciliated organisms. It is required for cytoplasmic microtubule polymerisation and acetylation, two functions not shared with any other BBSome subunits. BBIP10 physically interacts with HDAC6. BBSome-bound BBIP10 may therefore function to couple acetylation of axonemal microtubules and ciliary membrane growth . The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction .. +PF14778 Olfactory receptor 4-like
In C.elegans, odr-4 and odr-8 are required for localising a subset of odorant GPCRs to the cilia of olfactory neurons . Olfactory receptors (ORs) are synthesised in endoplasmic reticulum of the olfactory neurons, trafficked to the cell surface membrane and transported to the tip of the olfactory cilium, where they bind with odorants. Various accessory proteins are required for proper targetting of different ORs to the cell membrane. ODR-4 was the first accessory protein to be described.. +PF14779 Ciliary BBSome complex subunit 1
The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme . The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction . BBS1 predominantly localizes to the basal body and or transitional zone of ciliated cells. It has been found in a heptameric complex with BBS2, BBS5, BBS7, BBS8, and BBS9, termed the BBSome. Mutations in BBS1 can lead to retinal inadequacy .. +PF14780 Domain of unknown function (DUF4477)
Jackhmmer:Q6NW34, Pfam-B_4074 (release 26.0). +PF14781 Ciliary BBSome complex subunit 2, N-terminal
Pfam-B_5448 (release 26.0). The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme . The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction . BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia .. +PF14782 Ciliary BBSome complex subunit 2, C-terminal
Pfam-B_5884 (release 26.0). The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme . The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction . BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia .. +PF14783 Ciliary BBSome complex subunit 2, middle region
Pfam-B_5884 (release 26.0). The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme . The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction . BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia .. +PF14784 C-terminal domain of the ECSIT protein
Jackhammer:Q9BQ95:268-396. This family represents the C-terminal domain of the evolutionarily conserved signaling intermediate in Toll pathway protein, an adapter protein of the Toll-like and IL-1 receptor signaling pathway, which is involved in the activation of NF-kappa-B via MAP3K1. This domain is missing in isoform 2. Fold recognition suggests that this domain may be distantly homologous to the pleckstrin homology domain. +PF14785 Maltose transport system permease protein MalF P2 domain
This is the second periplasmic domain (P2 domain) of the maltose transport system permease protein MalF [1-2].. +PF14786 DEATH_2;
CATH:1d2z_B_00, Pfam-B_14779 (release 26.0). This Tube-Death domain has an insertion between helices 2 and 3, and a C-terminal tail compared with the Death domain of Pelle proteins in Drosophila. The two N-terminal Death domains of the serine/threonine kinase Pelle and the adaptor protein Tube interact to form a six-helix bundle fold arranged in an open-ended linear array with plastic interfaces mediating their interactions. This interaction leads to the nuclear translocation of the transcription factor Dorsal and activation of zygotic patterning genes during Drosophila embryogenesis, and is assisted by the significant and indispensable contacts in the heterodimer contributed by the insertion and C-terminal tail described above .. +PF14787 GAG-polyprotein viral zinc-finger
+PF14788 EF hand
+PF14789 Tetrahydrodipicolinate N-succinyltransferase middle
This is the middle domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase .. +PF14790 Tetrahydrodipicolinate N-succinyltransferase N-terminal
This is the N-terminal domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase .. +PF14791 DNA polymerase beta thumb
The catalytic region of DNA polymerase beta is split into three domains. An N-terminal fingers domain, a central palm domain and a C-terminal thumb domain. This entry represents the thumb domain .. +PF14792 DNA polymerase beta palm
The catalytic region of DNA polymerase beta is split into three domains. An N-terminal fingers domain, a central palm domain and a C-terminal thumb domain. This entry represents the palm domain .. +PF14793 Domain of unknown function (DUF4478)
This domain is found in bacteria, and is approximately 110 amino acids in length. It is found in association with Pfam:PF03641 and Pfam:PF11892.. +PF14794 Domain of unknown function (DUF4479)
This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF01588.. +PF14795 Leucine-tRNA synthetase-specific domain
This short region is found only in leucyl-tRNA synthetases. It is flexibly linked to the enzyme-core by beta-ribbons structures . +PF14796 Clathrin-adaptor complex-3 beta-1 subunit C-terminal
Pfam-B_195384 (release 26.0). This domain lies at the C-terminus of the clathrin-adaptor protein complex-3 beta-1 subunit. The AP-3 complex is associated with the Golgi region of the cell as well as with more peripheral structures. The AP-3 complex may be directly involved in trafficking to lysosomes or alternatively it may be involved in another pathway, but that mis-sorting in that pathway may indirectly lead to defects in pigment granules .. +PF14797 Serine-rich region of AP3B1, clathrin-adaptor complex
Pfam-B_195384 (release 26.0). This short low-complexity, highly serine-rich region lies on clathrin-adaptor complex 3 beta-1 subunit proteins, between family Adaptin_N, Pfam:PF01602 and a C-terminal domain, AP3B1_C,Pfam:PF14796.. +PF14798 Calcium homeostasis modulator
This family of proteins control cytosolic calcium concentration. They are transmembrane proteins which may be pore-forming ion channels .. +PF14799 FAM195 family
+PF14800 Domain of unknown function (DUF4481)
+PF14801 tRNA methyltransferase complex GCD14 subunit N-term
This is the N-terminal domain of GCD14, itself a subunit of the tRNA methyltransferase complex that is required for 1-methyladenosine modification and maturation of initiator methionyl-tRNA . The exact function of the N-terminus is not known but it is necessary for maintaining the overall folding and for full enzymatic activity.. +PF14802 TMEM192 family
The function of this family of transmembrane proteins is unknown. In vertebrates, proteins in this family are located in the lysosomal membrane and late endosome [1-2]. In Arabidopsis, a member of this family has been found to weakly interact with FRIGIDA, a determinant of flowering time .. +PF14803 Nudix N-terminal
Ths domain occurs at the N-terminus of several Nudix (Nucleoside Diphosphate linked to X) hydrolases.. +PF14804 Jag N-terminus
This domain is found at the N-terminus of proteins containing Pfam:PF13083 and Pfam:PF01424, including the jag proteins.. +PF14805 Tetrahydrodipicolinate N-succinyltransferase N-terminal
This is the N-terminal domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase .. +PF14806 Coatomer beta subunit appendage platform
This family is found at the C-terminus of the coatamer beta subunit proteins (Beta-coat proteins). It is a platform domain on the appendage that carries a highly conserved tryptophan.. +PF14807 Adaptin AP4 complex epsilon appendage platform
Pfam-B_21377 (release 26.0). This domain is found at the C terminal of clathrin-adaptor epsilon subunit, and at the C-terminus of the appendage on the platform domain.. +PF14808 TMEM164 family
This family of proteins is found in eukaryotes. Proteins in this family are typically between 214 and 330 amino acids in length. There are two conserved sequence motifs: LNPCH and DPF.. +PF14809 C1 domain of tRNA-guanine transglycosylase dimerisation
This short region of the tRNA-guanine transglycosylase enzyme acts as the dimerisation domain of the whole protein .. +PF14810 Patch-forming domain C2 of tRNA-guanine transglycosylase
Domain C2 of tRNA-guanine transglycosylase is formed by a four-stranded anti-parallel beta-sheet lined with two alpha helices. It has conserved basic residues on the surface of the beta-sheets as does the C-terminal domain PUA, Pfam:PF01472. The catalytic domain, TGT has conserved basic residues on the outer surface of the N-terminal three-stranded beta sheet, which closes the barrel, and it is postulated that these basic residues from the three domains form a continuous, positively charged patch to which the tRNA binds .. +PF14811 Protein of unknown function TPD sequence-motif
This is a family of eukaryotic proteins of unknown function. A few members have an associated zinc-finger domain. All members carry a highly conserved TPD sequence-motif.. +PF14812 Transmembrane domain of transglycosylase PBP1 at N-terminal
CATH:3fwl_A_01, Pfam-B_367 (release 26.0). This is the N-terminal, transmembrane, domain of the transglycosylases ()penicillin-binding proteins), the multi-domain membrane proteins essential for cell wall synthesis that are targeted by penicillin antibiotics. The TM domain is a single helix, several of whose residues lie in close proximity to hydrophobic residues in the TGT domain. The TM helix seems to be necessary for stabilizing the protein-membrane interaction, and the resulting orientation limits the interaction between PBPb1 and lipid II in the membrane in a 2D lateral diffusion fashion .. +PF14813 NADH dehydrogenase 1 beta subcomplex subunit 2
This family represents an accessory subunit of the mitochondrial membrane respiratory chain NADH dehydrogenase (Complex I), that is believed not to be involved in catalysis [1-2].. +PF14814 Bifunctional transglycosylase second domain
UB2H is the second domain of the transglycosylases, or penicillin-binding proteins PBP1bs)), the multi-domain membrane proteins essential for cell wall synthesis that are targeted by penicillin antibiotics. The exact function of the UB2H domain is uncertain, but it may act as the binding component of PBP1b with different binding partners, or it may participate in the regulation between DNA repair and/or synthesis and cell wall formation during the bacterial cell cycle .. +PF14815 NUDIX domain
+PF14816 Family of unknown function, FAM178
+PF14817 HAUS augmin-like complex subunit 5
This family includes HAUS augmin-like complex subunit 5. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2].. +PF14818 Domain of unknown function (DUF4482)
This family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF11365.. +PF14819 Nitrile reductase, 7-cyano-7-deazaguanine-reductase N-term
The QueF monomer is made up of two ferredoxin-like domains aligned together with their beta-sheets that have additional embellishments. This subunit is composed of a three-stranded beta-sheet and two alpha-helices. QueF reduces a nitrile bond to a primary amine. The two monomer units together create suitable substrate-binding pockets .. +PF14820 Small proline-rich 2
This family of small proteins is rich in proline, cysteine and glutamate. They contain a tandemly repeated nonamer, PKCPEPCPP . They are components of the cornified envelope of keratinocytes .. +PF14821 Threonine synthase N terminus
This domain is found at the N-terminus of many threonine synthase enzymes .. +PF14822 Vasohibin
This family of proteins function as angiogenesis inhibitors in animals [1-2].. +PF14823 Sirohaem biosynthesis protein C-terminal
This domain is the C-terminus of a multifunctional enzyme which catalyses the biosynthesis of sirohaem. Both of the catalytic activities of this enzyme (precorrin-2 dehydrogenase EC:1.3.1.76) and sirohydrochlorin ferrochelatase (EC:4.99.1.4) are located in the N-terminal domain of this enzyme, Pfam:PF13241 .. +PF14824 Sirohaem biosynthesis protein central
This is the central domain of a multifunctional enzyme which catalyses the biosynthesis of sirohaem. Both of the catalytic activities of this enzyme (precorrin-2 dehydrogenase EC:1.3.1.76) and sirohydrochlorin ferrochelatase (EC:4.99.1.4) are located in the N-terminal domain of this enzyme, Pfam:PF13241 .. +PF14825 Domain of unknown function (DUF4483)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 326 amino acids in length. There is a single completely conserved residue N that may be functionally important.. +PF14826 FACT complex subunit SPT16 N-terminal lobe domain
The FACT or facilitator of chromatin transcription complex binds to and alters the properties of nucleosomes. This family represents the N-terminal lobe of the NTD, or N-terminal domain, and acts as a protein-protein interaction domain presumably with partners outside of the FACT complex . Knockout of the whole NTD domain, 1-450 residues in UniProt:P32558, in yeast serves to tender the cells sensitive to DNA replication stress but is not lethal. The C-terminal half of NTD is structurally similar to aminopeptidases, and the most highly conserved surface residues line a cleft equivalent to the aminopeptidase substrate-binding site, family peptidase_M24, Pfam:PF00557 .. +PF14827 Sensory domain of two-component sensor kinase
CATH:3by8_A_00, Pfam-B_120 (release 26.0). Cache_3 is the periplasmic sensor domains of sensor histidine kinase of E. coli DcuS. This domain forms one of the components of the two-component signalling system that allows bacteria to adapt to changing environments. The ability of bacteria to monitor and adapt to their environment is crucial to their survival, and two-component signal transduction systems mediate most of these adaptive responses. One component is a histidine kinase sensor - this domain - most commonly part of a homodimeric transmembrane sensor protein, and the second component is a cytoplasmic response regulator. The two components interact in tandem through a phospho-transfer cascade .. +PF14828 Amnionless
The amnionless protein forms a complex with cubilin. This complex is necessary for vitamin B12 uptake .. +PF14829 Glycerol-3-phosphate acyltransferase N-terminal
GPAT_N is the N-terminal domain of glycerol-3-phosphate acyltransferases, and it forms a four-helix bundle . Glycerol-3-phosphate (1)-acyltransferase(G3PAT) catalyses the incorporation of an acyl group from either acyl-acyl carrier proteins or acyl-CoAs into the sn-1 position of glycerol 3-phosphate to yield 1-acylglycerol-3-phosphate. G3PATs can either be selective, preferentially using the unsaturated fatty acid, oleate (C18:1), as the acyl donor, or non-selective, using either oleate or the saturated fatty acid, palmitate (C16:0), at comparable rates. The differential substrate-specificity for saturated versus unsaturated fatty acids seen within this enzyme family has been implicated in the sensitivity of plants to chilling temperatures . The exact function of this domain is not known. it lies upstream of family Acyltransferase, Pfam:PF01553.. +PF14830 Haemocyanin beta-sandwich
This antiparallel beta sandwich domain occurs in mollusc haemocyanins. Each mollusc haemocyanin contains several globular oxygen binding functional units. Each unit consists of an alpha-helical copper binding domain (Pfam:PF00264) and an antiparallel beta sandwich domain [1-2].. +PF14831 Domain of unknown function (DUF4484)
KOGs (KOG4704), PF09804. This domain is found, in a few members, a the the C-terminus of family Avl9, Pfam:PF09794. The function is not known.. +PF14832 Putative oxalocrotonate tautomerase enzyme
4-oxalocrotonate tautomerase enzyme is involved in the anthranilate synthase pathway.1. +PF14833 NAD-binding of NADP-dependent 3-hydroxyisobutyrate dehydrogenase
3-Hydroxyisobutyrate is a central metabolite in the valine catabolic pathway, and is reversibly oxidized to methylmalonate semi-aldehyde by a specific dehydrogenase belonging to the 3-hydroxyacid dehydrogenase family. The reaction is NADP-dependent and this region of the enzyme binds NAD. The NAD-binding domain of 6-phosphogluconate dehydrogenase adopts a Rossmann fold .. +PF14834 Glutathione S-transferase, C-terminal domain
GST conjugates reduced glutathione to a variety of targets including S-crystallin from squid, the eukaryotic elongation factor 1-gamma, the HSP26 family of stress-related proteins and auxin-regulated proteins in plants. Stringent starvation proteins in E. coli are also included in the alignment but are not known to have GST activity. The glutathione molecule binds in a cleft between N and C-terminal domains. The catalytically important residues are proposed to reside in the N-terminal domain .. +PF14835 zf-RING of BARD1-type protein
The RING domain of the breast and ovarian cancer tumour-suppressor BRCA1 interacts with multiple cognate proteins, including the RING protein BARD1. Proper function of the BRCA1 RING domain is critical, as evidenced by the many cancer-predisposing mutations found within this domain. A dimer is formed between the RING domains of BRCA1 and BARD1. The BRCA1-BARD1 structure provides a model for its ubiquitin ligase activity, illustrates how the BRCA1 RING domain can be involved in associations with multiple protein partners and provides a framework for understanding cancer-causing mutations at the molecular level . The corresponding BRCA1-RING domain is on family zf-C3HC4_2, Pfam:PF13923.. +PF14836 Ubiquitin-like domain
This ubiquitin-like domain is found in several ubiquitin carboxyl-terminal hydrolases and in gametogenetin-binding protein.. +PF14837 Integrator complex subunit 5 N-terminus
This family of proteins represents the N-terminus of subunit 5 of the integrator complex involved in snRNA transcription and processing .. +PF14838 Integrator complex subunit 5 C-terminus
This family of proteins represents the C-terminus of subunit 5 of the integrator complex involved in snRNA transcription and processing .. +PF14839 DOR family
This family of proteins regulate autophagy and gene transcription .. +PF14840 Processivity clamp loader gamma complex DNA pol III C-term
This domain lies at the C-terminus of the delta subunit of the DNA polymerase III clamp loader gamma complex. Within the complex the several C-terminal domains, of gamma, delta and delta' form a helical scaffold, on which the rest of he subunits are hung. The gamma complex, an AAA+ ATPase, is the bacterial homologue of the eukaryotic replication factor C that loads the sliding clamp (beta, homologous to PCNA) onto DNA.. +PF14841 FliG middle domain
This is the middle domain of the flagellar rotor protein FliG [1-2].. +PF14842 FliG N-terminal domain
This is the N-terminal domain of the flagellar rotor protein FliG .. +PF14843 Growth factor receptor domain IV
This is the fourth extracellular domain of receptor tyrosine protein kinases. Interaction between this domain and the furin-like domain (Pfam:PF00757) regulates the binding of ligands to the receptor L domains (Pfam:PF01030) .. +PF14844 PH domain associated with Beige/BEACH
This PH domain is found in proteins containing the Beige/BEACH domain (Pfam:PF02138), it immediately precedes the Beige/BEACH domain .. +PF14845 beta-acetyl hexosaminidase like
+PF14846 Domain of unknown function (DUF4485)
This family is found in eukaryotes, and is approximately 90 amino acids in length.. +PF14847 Ras-binding domain of Byr2
CATH:1k8r_B_00, Pfam-B_3317 (release 26.0). This domain is the binding/interacting region of several protein kinases, such as the Schizosaccharomyces pombe Byr2. Byr2 is a Ser/Thr-specific protein kinase acting as mediator of signals for sexual differentiation in S. pombe by initiating a MAPK module, which is a highly conserved element in eukaryotes. Byr2 is activated by interacting with Ras, which then translocates the molecule to the plasma membrane. Ras proteins are key elements in intracellular signaling and are involved in a variety of vital processes such as DNA transcription, growth control, and differentiation. They function like molecular switches cycling between GTP-bound 'on' and GDP-bound 'off' states .. +PF14848 DNA-binding domain
JCSG_Target_393235 / GS13689A, Pfam-B_2593 (release 26.0). +PF14849 YidC periplasmic domain
This is the periplasmic domain of YidC, a bacterial membrane protein which is required for the insertion and assembly of inner membrane proteins [1,2].. +PF14850 DNA-binding domain of Proline dehydrogenase
This domain lies at the N-terminus of bifunctional proline-dehydrogenases and is found to bind DNA.. +PF14851 FAM176 family
Members of the FAM176 family regulate autophagy and apoptosis [1-2].. +PF14852 Fis1 N-terminal tetratricopeptide repeat
The mitochondrial fission protein Fis1 consists of two tetratricopeptide repeats. This domain is the N-terminal tetratricopeptide repeat [1-2]. +PF14853 Fis1 C-terminal tetratricopeptide repeat
The mitochondrial fission protein Fis1 consists of two tetratricopeptide repeats. This domain is the C-terminal tetratricopeptide repeat [1-2]. +PF14854 Leucine rich adaptor protein
+PF14855 Pilus-assembly fibrillin subunit, chaperone
Pfam-B_9717 (release 26.0). PapJ is part of the Pap pilus assembly complex that plays an auxiliary role by ensuring the proper integration of PapA into the fimbrial shaft. PapA is the major shaft protein of the pilus.. +PF14856 Ecp2;
Pathogen effector; putative necrosis-inducing factor. Stergiopoulos I, Coggill P. The domain corresponds to the mature part of the Ecp2 effector protein from the tomato pathogen Cladopsorium fulvum. Effectors are low molecular weight proteins that are secreted by bacteria, oomycetes and fungi to manipulate their hosts and adapt to their environment. Ecp2 is a 165 amino acid secreted protein that was originally identified as a virulence factor in C. fulvum, since disruption reduces virulence of the fungus on tomato plants. We have recently determined that Ecp2 is a member of a novel, widely distributed and highly diversified within the fungal kingdom multigene superfamily, which we have designated Hce2, for Homologs of C. fulvum Ecp2 effector. Although Ecp2 is present in most organisms as a small secreted protein, the mature part of this protein can be found fused to other protein domains, including the fungal Glycoside Hydrolase family 18, Glyco_hydro_18 Pfam:PF00704 and other, unknown, protein domains. The intrinsic function of Ecp2 remains unknown but it is postulated by that it is a necrosis-inducing factor in plants that serves pathogenicity on the host.. +PF14857 TMEM151 family
This family of proteins is found in eukaryotes. Proteins in this family are typically between 338 and 558 amino acids in length.. +PF14858 Domain of unknown function (DUF4486)
This domain family is found in eukaryotes, and is typically between 542 and 565 amino acids in length.. +PF14859 Colicin M
Colicin M is a toxin produced by, and active against, Escherichia coli. It catalyses the hydrolysis of lipid I and lipid II peptidoglycan intermediates, therefore inhibiting peptidoglycan biosynthesis and leading to lysis of the bacterial cells .. +PF14860 DrrA phosphatidylinositol 4-phosphate binding domain
This domain binds to phosphatidylinositol 4-phosphate. It is found in Legionella pneumophila DrrA, a protein involved in the redirection of endoplasmic reticulum-derived vesicles to the Legionella-containing vacuoles [1,2].. +PF14861 Plant antimicrobial peptide
This family includes plant antimicrobial peptides [1-2]. They adopt an alpha-helical hairpin fold stabilised by two disulphide bonds .. +PF14862 Big defensin
Big defensins are antimicrobial peptides. They consist of a hydrophobic N-terminal half, which is active against Gram-positive bacteria, and a cationic C-terminal half, which is active against Gram-negative bacteria. The C-terminal half adopts a beta-defensin-like structure [1,2].. +PF14863 Alkyl sulfatase dimerisation
This domain is found in alkyl sulfatases such as the Pseudomonas aeruginosa SDS hydrolase (Swiss:Q9I5I9), where it acts as a dimerisation domain . +PF14864 Alkyl sulfatase C-terminal
This domain is found at the C-terminus of alkyl sulfatases. Together with the N-terminal catalytic domain, this domain forms a hydrophobic chute and may recruit hydrophobic substrates .. +PF14865 Macin
The macins are antimicrobial proteins [1-3]. They form a disulphide-stabilised alpha-beta motif .. +PF14866 Potassium channel toxin
This family includes scorpion potassium channel toxins [1-2].. +PF14867 Lantibiotic alpha
Lantibiotics are two-component lanthionine-containing peptide antibiotics active on Gram-positive bacteria [1-2].. +PF14868 Domain of unknown function (DUF4487)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 209 and 938 amino acids in length. There is a conserved WCF sequence motif. There is a single completely conserved residue W that may be functionally important.. +PF14869 Domain of unknown function (DUF4488)
Pfam_7936 (release 26.0). In most members this family covers almost the whole sequence, but a few member-sequences also carry a TonB_C domain, PF03544.. +PF14870 PSII_YCF48;
Photosynthesis system II assembly factor YCF48. Pfam-B_547 (release 26.0). YCF48 is one of several assembly factors of the photosynthesis system II. The photosynthesis system II occurs in Cyanobacteria that are Gram-negative bacteria performing oxygenic photosynthesis. One of the three membranes surrounding these bacteria is the inner thylakoid membrane (TM) system that is localised within the cell and houses the large pigment-protein complexes of the photosynthetic electron transfer chain, i.e. Photosystem (PS) II, PSI, the cytochrome b6f complex, and the ATP synthase. YCF48 is necessary for efficient assembly and repair of the PSII . YCF48 is found predominantly in the thykaloid membrane . It is a BNR repeat protein.. +PF14871 Hypothetical glycosyl hydrolase 6
GHL6 is a family of hypothetical glycoside hydrolases.. +PF14872 Hypothetical glycoside hydrolase 5
GHL5 is a family of hypothetical glycoside hydrolases.. +PF14873 N-terminal domain of BNR-repeat neuraminidase
Pfam-B_13890 (release 26.0). This domain is usually found at the N-terminus of the BNR-repeat neuraminidase protein family.. +PF14874 Flagellar-associated PapD-like
Pfam-B_1987 (release 26.0). This domain is a putative PapD periplasmic pilus chaperone protein family.. +PF14875 N-term cysteine-rich ER, FAM69
The FAM69 family of cysteine-rich type II transmembrane proteins localise to the endoplasmic reticulum (ER) in cultured cells, probably via N-terminal di-arginine motifs. These proteins carry at least 14 luminal cysteines which are conserved in all FAM69s. There are currently few indications of the involvement of FAM69 members in human diseases . It would appear that FAM69 proteins are predicted to be have a protein kinase structure and function. Analysis of three-dimensional structure models and conservation of the classic catalytic motifs of protein kinases in four of human FAM69 proteins suggests they might have retained catalytic phosphotransferase activity. An EF-hand Ca2+-binding domain, inserted within the structure of the kinase domain, suggests they function as Ca2+-dependent kinases (unpublished).. +PF14876 RSF1P;
Respiratory growth transcriptional regulator. Pfam-B_36578 (release 26.0). This is a family of transcriptional regulators that determine the transition from fermentative activity to growth on glycerol .. +PF14877 Mitochondrial translation initiation factor
Pfam-B_22619 (release 26.0). This is a family of mitochondrial initiation factors IF3.. +PF14878 Death-like domain of SPT6
Pfam-B_9510 (release 26.0). This DLD domain maintains the characteristic overall topology of death domains, as it consists of a six-helix bundle with three stacked antiparallel helices and an additional helix inserted between the final two helices of the bundle. Although it is unlikely that the Spt6 DLD functions in an apoptotic process in yeast, its prominent location and the observation that it displays the most highly conserved region of the Spt6 surface suggest that it mediates important intermolecular interactions [1,2].. +PF14879 Domain of unknown function (DUF4489)
Pfam-B_28643 (release 26.0). +PF14880 Cytochrome oxidase c assembly
Pfam-B_122767 (release 26.0). COX14 plays an essential role in cytochrome oxidase assembly. The COX14 product is a low-molecular weight membrane protein of mitochondria, but it is not a subunit of cytochrome oxidase . Orthology-prediction methods have identified the vertebrate C12orf62 orthologues to be orthologues of the yeast COX14 .. +PF14881 Tubulin domain
This family includes the tubulin alpha, beta and gamma chains, as well as the bacterial FtsZ family of proteins. Misato from Drosophila and Dml1p from fungi are descendants of an ancestral tubulin-like protein, and exhibit regions with similarity to members of a GTPase family that includes eukaryotic tubulin and prokaryotic FtsZ. Dml1p and Misato have been co-opted into a role in mtDNA inheritance in yeast, and into a cell division-related mechanism in flies, respectively. Dml1p might additionally function in the partitioning of the mitochondrial organelle itself, or in the segregation of chromosomes, thereby explaining its essential requirement. This domain subject to extensive post-translational modifications.. +PF14882 Hypothetical glycosyl hydrolase 12
GHL12 is a family of hypothetical glycoside hydrolases.. +PF14883 Hypothetical glycosyl hydrolase family 13
GHL13 is a family of hypothetical glycoside hydrolases.. +PF14884 Type I membrane glycoproteins cell-cell fusogen
Podbilewicz B, Coggill P. Podbilewicz B, , Pfam-B_25631 (release 26.0). EFF-AFF was first identified when EFF1 mutants were found to block cell fusion in all epidermal and vulval epithelia in the worm . However, fusion between the anchor cell and the utse syncytium that establishes a continuous uterine-vulval tube proceeds normally in eff-1 mutants and thus Aff1 was established as necessary for this and the fusion of heterologous cells in C. elegans . The transmembrane forms of FF proteins, like most viral fusogens, possess an N-terminal signal sequence followed by a long extracellular portion, a predicted transmembrane domain, and a short intracellular tail. A striking conservation in the position and number of all 16 cysteines in the extracellular portion of FF proteins from different nematode species suggests that these proteins are folded in a similar 3D structure that is essential for their fusogenic activity . C. elegans AFF-1 and EFF-1 proteins are essential for developmental cell-to-cell fusion and can merge insect cells. Thus FFs comprise an ancient family of cellular fusogens that can promote fusion when expressed on a viral particle .. +PF14885 Hypothetical glycosyl hydrolase family 15
GHL15 is a family of hypothetical glycoside hydrolases.. +PF14886 FAM183A and FAM183B related
The function of this family of metazoan sequences is not known.. +PF14887 HMG (high mobility group) box 5
Nucleolar transcription factor/upstream binding factor contains six HMG box domains. This is the fifth HMG box domain in these proteins. This domain has lost DNA-binding ability .. +PF14888 Penicillin-binding protein Tp47 domain C
Domain C is the largest domain in this unusual penicillin-binding protein PBP), Tp47. This domain is mainly characterised by an immunoglobulin fold with two opposing beta-sheets that form the typical barrel-like structure. In contrast to the classical immunoglobulin fold, however, this has an additional beta-strand inserted after strand 3. Also, the strands are connected by rather large loops. Helices are inserted between strands 2 and 3 and between strands 4 and 5. Domain C interacts with domain B via a surface that has a slightly concave, goblet-like shape. Tp47 is unusual in that it displays β-lactamase activity, and thus it does not fit the classical structural and mechanistic paradigms for PBPs, and thus Tp47 appears to represent a new class of PBP .. +PF14889 Penicillin-binding protein Tp47 domain a
This is the first domain in this unusual penicillin-binding protein PBP), Tp47 is mainly composed of beta-strands and is sequentially non-contiguous. The first three domains in Tp47 interact with each other through intimate domain-domain interfaces. Domain A contacts domain B through its N-terminal segment. Domain A also interacts tightly with domain C, Tp47 is unusual in that it displays β-lactamase activity, and thus it does not fit the classical structural and mechanistic paradigms for PBPs, and thus Tp47 appears to represent a new class of PBP . . +PF14890 Intein splicing domain
Inteins are segments of protein which excise themselves from a precursor protein and mediate the rejoining of the remainder of the precursor (the extein). Most inteins consist of a splicing domain which is split into two segments by a homing endonuclease domain. This domain represents the splicing domain .. +PF14891 Effector protein
This family of proteins contains an HEXXH motif, typical of zinc metallopeptidases. The family includes the E. coli effector protein NleD, which cleaves and inactivates c-Jun N-terminal kinase (JNK) .. +PF14892 Domain of unknown function (DUF4490)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 101 and 220 amino acids in length. In mice, a member of this family whose expression is induced by p53 may play a role in DNA damage response .. +PF14893 PNMA
The PNMA family includes paraneoplastic antigens Ma 1, 2 and 3, found in the serum of patients with paraneoplastic neurological disorders [1,2]. The family also includes modulator of apoptosis 1, which has a role in death receptor-dependent apoptosis .. +PF14894 Lsm C-terminal
This domain is found at the C-terminus of archaeal Lsm (like-Sm) proteins .. +PF14895 Protein phosphatase 1 inhibitor
This family of proteins interacts with and inhibits the phosphatase activity of protein phosphatase 1 (PP1) complexes .. +PF14896 EmbC C-terminal domain
Pfam-B_4670 (release 7.5). Arabinosyltransferase is involved in arabinogalactan (AG) biosynthesis pathway in mycobacteria. AG is a component of the macromolecular assembly of the mycolyl-AG-peptidoglycan complex of the cell wall. This enzyme has important clinical applications as it is believed to be the target of the antimycobacterial drug Ethambutol . This domain represents the C-terminal extracellular domain that is likely to bind to carbohydrate .. +PF14897 EpsG family
This family of proteins are related to the EpsG protein from B. subtilis Swiss:P71056. These proteins are likely glycosyl transferases belonging to the membrane protein GT-C clan.. +PF14898 Domain of unknown function (DUF4491)
This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 107 amino acids in length. There is a conserved EYY sequence motif.. +PF14899 Domain of unknown function (DUF4492)
This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. The function of these proteins is unknown.. +PF14900 Domain of unknown function (DUF4493)
This family of proteins is found in bacteria. Proteins in this family are typically between 264 and 710 amino acids in length. Many of these proteins have a lipid attachment site suggesting they are lipoproteins.. +PF14901 Cleavage inducing molecular chaperone
Coggill P, Hetherington K. Pfam-B_1192 (release 26.0). Jiv90 is a fragment of the DnaJ protein in eukaryotes and in J-domain protein interacting with viral protein (Jiv) located in the N terminal region of the pestivirus viral polypeptide. The viral protein interacts stably with non structural (NS) protein NS2, causing a conformational change in NS2-NS3 and stimulates NS2-NS3 cleavage in trans. Cleavage of NS2-NS3 increases cytopathogenicity and consequently aids viral replication. Jiv therefore acts as a regulating cofactor for NS2 auto-protease. The efficient release of NS3 from the viral polypeptide by Jiv is considered crucial to the pestivirus cytopathogenicity . In eukaryotes, it usually lies 40 residues downstream of DnaJ family Pfam:PF00226. However, the function in eukaryotes is still unknown.. +PF14902 Domain of unknown function (DUF4494)
This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 172 amino acids in length. There are two conserved sequence motifs: VDA and EAE. There is a single completely conserved residue E that may be functionally important.. +PF14903 WG containing repeat
+PF14904 Family of unknown function
Coggill P, Hetherington K. Function of this protein family is not known.. +PF14905 Outer membrane protein beta-barrel family
This family includes proteins annotated as TonB dependent receptors. But it is also likely to contain other membrane beta barrel proteins of other functions.. +PF14906 Domain of unknown function (DUF4495)
This domain family is found in eukaryotes, and is typically between 322 and 336 amino acids in length. There are two conserved sequence motifs: QMW and DLW. Proteins in this family vary in length from 793 to 1184 amino acids.. +PF14907 Uncharacterised nucleotidyltransferase
This family is likely to be an uncharacterised group of nucleotidyltransferases.. +PF14908 Domain of unknown function (DUF4496)
This domain family is found in eukaryotes, and is typically between 134 and 154 amino acids in length. Proteins in this family vary in length between 264 and 772 amino acid residues.. +PF14909 Spermatogenesis-assoc protein 6
This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family has similarity to the motor domain of kinesin related proteins and with the Caenorhabditis elegans neural calcium sensor protein (NCSâ€2).. +PF14910 S-phase genomic integrity recombination mediator, N-terminal
MMS22L (Methyl methanesulfonate-sensitivity protein 22-like) is found in yeast, plants and vertebrates, and is integrally concerned with DNA forking and repair mechanisms during replication. MMS22L complexes with TONSL and this complex accumulates at regions of ssDNA associated with distressed replication forks or at processed DNA breaks. Its depletion results in high levels of endogenous DNA double-strand breaks caused by an inability to complete DNA synthesis after replication fork collapse . Thus the complex mediates recovery from replication stress and homologous recombination in vertebrates, yeasts and plants [2,3]. This family is the more N-terminal region of the proteins.. +PF14911 S-phase genomic integrity recombination mediator, C-terminal
MMS22L (Methyl methanesulfonate-sensitivity protein 22-like) is found in yeast, plants and vertebrates, and is integrally concerned with DNA forking and repair mechanisms during replication. MMS22L complexes with TONSL and this complex accumulates at regions of ssDNA associated with distressed replication forks or at processed DNA breaks. Its depletion results in high levels of endogenous DNA double-strand breaks caused by an inability to complete DNA synthesis after replication fork collapse . Thus the complex mediates recovery from replication stress and homologous recombination in vertebrates, yeasts and plants [2,3]. This family is the more C-terminal region of the proteins.. +PF14912 Testicular haploid expressed repeat
This repeat is the only conserved part of the THEG proteins from vertebrate spermatids. Both human and mouse THEG are specifically expressed in the nucleus of haploid male germ cells and are involved in the regulation of nuclear functions [1,2]. Although the differential gene expression of THEG in spermatid-Sertoli cell co-culture supports the relevance of germ cell-Sertoli cell interaction for gene regulation during spermatogenesis, THEG was not found to be essential for spermatogenesis in mice .. +PF14913 DPCD protein family
Coggill P, Hetherington K. This protein is a found in eukaryotes and a mutation in this protein is thought to cause Primary Ciliary Dyskinesia (PCD) . This protein is 203 amino acids in length, 23 kDa in size and its function remains unknown. The gene that encodes this protein is a candidate gene for PCD and is expressed during ciliogenesis. PCD affects the airways and reproductive organs, and probing Northern blots show DPCD expression in humans is highest in the testes. Additionally, there is no indication of major splice variants .. +PF14914 LRRC37A/B like protein 1 C-terminal domain
Coggill P, Hetherington K. This family represents the C-terminal domain of the putative Leucine Rich Repeat Containing protein 37A or protein 37B (LRRC37A/B) found in eukaryotes. The Leucine Rich Repeats (LRR) lies in the central region. The gene that encodes this protein is found in the chromosomal position 17q11.2, and its microdeletion results in the disease, neurofibromatosis type-1 (NF1) . The function of the protein, LRRC37B is unknown, however experimental data shows expression in the aorta, heart, skeletal muscle, liver and brain during gestation .. +PF14915 CCDC144C protein coiled-coil region
Coggill P, Hetherington K. This family includes the human protein CCDC144C and the ankyrin repeat domain-containing protein 26-like 1 found in eukaryotes. Its function remains unknown, however, it is known to contain a coiled-coil domain which corresponds to this region. The ankyrin repeat which features in this protein is a common amino acid motif.. +PF14916 Coiled-coil domain of unknown function
This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The function is not known and the proteins carry no other domains.. +PF14917 Coiled coil protein 74, C terminal
Pfam-B_23141 (release 26.0). This is a C-terminal conserved domain of coiled-coil proteins from vertebrates. The function is not known. Expression levels in humans are elevated in breast cancer [].. +PF14918 MDM2-binding
MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle . MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner . MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells . MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) . It is unclear which regions of MTBP interact with which binding-partner. See PF14919, PF14920. . +PF14919 MDM2-binding
MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle . MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner . MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells . MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) . It is unclear which regions of MTBP interact with which binding-partner. See PF14918, PF14920.. +PF14920 MDM2-binding
MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle . MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner . MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells . MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) . It is unclear which regions of MTBP interact with which binding-partner. See PF14918, PF14919.. +PF14921 Adenomatosis polyposis coli down-regulated 1
The domain is duplicated in most members of this family. APCDD is directly regulated by the beta-catenin/Tcf complex, and its elevated expression promotes proliferation of colonic epithelial cells in vitro and in vivo . APCDD1 has an N-terminal signal-peptide and a C-terminal transmembrane region. The domain is rich in cysteines, there being up to 12 such residues, a structural motif important for interaction between Wnt ligands and their receptors. APCDD1 is expressed in a broad repertoire of cell types, indicating that it may regulate a diverse range of biological processes controlled by Wnt signalling .. +PF14922 Protein of unknown function
This is a family of eukaryotic proteins. Most members carry a highly distinctive, conserved sequence motif of FWWh, where h represents a hydrophobic residue. The function of the family is not known.. +PF14923 Coiled-coil protein 142
The function of this coiled-coil domain-containing family is not known. It is found in eukaryotes.. +PF14924 Protein of unknown function (DUF4497)
This domain family is found in eukaryotes, and is typically between 107 and 123 amino acids in length. There are two completely conserved G residues that may be functionally important.. +PF14925 Domain of unknown function
Members of this family carry two distinct, highly conserved sequence motifs, CPPPLYYTHL and HPHLAWLY. The family is found in eukaryotes, and the function is not known. This family lies at the C-terminus of members.. +PF14926 Domain of unknown function (DUF4498)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 308 amino acids in length.. +PF14927 Neurensin
The neurensin family includes the neuronal membrane proteins neurensin-1 and neurensin-2 . Neurensin-1 plays a role in neurite extension .. +PF14928 Short tail fibre protein receptor-binding domain
This domain is a receptor binding domain found on bacteriophage short tail fibre proteins. It contains a zinc-binding site and a potential lipopolysaccharide-binding site .. +PF14929 TAF RNA Polymerase I subunit A
Coggill P, Hetherington K. TATA box binding protein associated factor RNA Polymerase I subunit A is found in eukaryotes and is encoded by the gene TAF1A in humans. Its function is to aid transcription of DNA into RNA by binding to the promoter at the -10 TATA box site. It is a component of the transcription factor SL1/TIF-IB complex, involved in PIC assembly (preinitiation complex) during RNA polymerase I-dependent transcription. The rate of PIC formation depends on the rate of association of this protein. This protein also stabilises nucleolar transcription factor 1/UBTF on rDNA.. +PF14930 Quinohemoprotein amine dehydrogenase, alpha subunit domain II
This is the second domain of the alpha subunit of quinohemoprotein amine dehydrogenase [1,2]. +PF14931 Intraflagellar transport complex B, subunit 20
IFT20 is subunit 20 of the intraflagellar transport complex B . The intraflagellar transport complex assembles and maintains eukaryotic cilia and flagella. IFT20 is localised to the Golgi complex and is anchored there by the Golgi polypeptide, GMAP210, whereas all other subunits except IFT172 localise to cilia and the peri-basal body or centrosomal region at the base of cilia [1,2,3]. IFT20 accompanies Golgi-derived vesicles to the point of exocytosis near the basal bodies where the other IFT polypeptides are present, and where the intact IFT particle is assembled in association with the inner surface of the cell membrane. Passage of the IFT complex then follows, through the flagellar pore recognition site at the transition region, into the ciliary compartment. There also appears to be a role of intraflagellar transport (IFT) polypeptides in the formation of the immune synapse in non ciliated cells. The flagellum, in addition to being a sensory and motile organelle, is also a secretory organelle . A number of IFT components are expressed in haematopoietic cells, which have no cilia, indicating an unexpected role of IFT proteins in immune synapse-assembly and intracellular membrane trafficking in T lymphocytes; this suggests that the immune synapse could represent the functional homologue of the primary cilium in these cells [6,7].. +PF14932 HAUS augmin-like complex subunit 3
This domain is subunit three of the augmin complex found from Drosophila to humans . The HAUS-augmin complex is made up of eight subunits.\. The augmin complex interacts with gamma-TuRC, and attenuation of this interaction severely impairs spindle MT generation. Furthermore, we provide evidence that human augmin plays critical and non-redundant roles in the kinetochore-MT attachment and also central spindle formation during anaphase in human cells.The HAUS complex is required for mitotic spindle assembly and for maintenance of centrosome integrity .. +PF14933 CEP19-like protein
Coggill P, Hetherington K. This family includes the centrosomal protein of 19 kDa found in eukaryotes. In humans, it is encoded for by the gene CEP19 which is also known as C3orf34. These proteins localize in the centrosomes. Centrosomes are dynamic organelles that assemble around the centrioles. They organise the microtubule cytoskeleton and mitotic spindle apparatus and are required for cell division and cell migration. C3orf34 localizes near the centrosome in early interphase, to spindle poles during mitosis, and to distinct foci oriented towards the midbody at telophase .. +PF14934 Domain of unknown function (DUF4499)
Coggill P, Hetherington K. This family contains a protein found in eukaryotes. Transmembrane protein C10orf57 is encoded for by the gene chromosome 10 open reading frame 57 (C10orf57) located in chromosomal position 10q22.3. The exact function of this protein is still unknown, however it is thought to be an integral membrane protein. The protein sequence is 123 amino acids in length and has a mass of approximately 14.2 kDa. The family also includes some longer proteins that possess an N-terminal dehydrogenase domain, Pfam:PF01073.. +PF14935 Transmembrane protein 138
Coggill P, Hetherington L. This family of proteins is found in eukaryotes and members are approximately 160 amino acids in length. There are two conserved sequence motifs: YYY and DPR. This transmembrane protein belongs to a family found in eukaryotes and is involved in the biogenesis and degradation of ciliated cells . Mutations in this protein cause the disease Joubert syndrome(JBTS) where the cilia becomes non-motile. Ciliopathy can be severe since cilia provide the cell with large amounts of information through signals. Ciliopathy can affect cell behaviour as the appropriate signals between the cell and its environment are not made, which can affect cell survival.. +PF14936 Tumour protein p53-inducible protein 11
Coggill P, Hetherington K. TP53 is a tumour suppressor gene, when switched on it suppresses tumour development by inducing stable growth arrest or cell apoptosis . The tumour protein TP53 inducible protein 11 encoded for by the gene TP53I11, has a protein sequence of 189 amino acids in length and 21 kDa in mass. The role of this protein is thought to negatively regulate cell proliferation in response to stress, and therefore suppress tumour formation .. +PF14937 Domain of unknown function (DUF4500)
Coggill P, Hetherington K. This family is found in eukaryotes. The function of this protein remains unknown. The gene which encodes for this protein is named chromosome 6 open reading frame 162 (C6orf162) and is found between the chromosomal positions 6q15-q16.1. It is thought that this protein may be an important part of membrane function.. +PF14938 Soluble NSF attachment protein, SNAP
The soluble NSF attachment protein (SNAP) proteins are involved in vesicular transport between the endoplasmic reticulum and Golgi apparatus . They act as adaptors between SNARE (integral membrane SNAP receptor) proteins and NSF (N-ethylmaleimide-sensitive factor) . They are structurally similar to TPR repeats .. +PF14939 ShortName;
DDB1-and CUL4-substrate receptor 15, WD repeat. DCAFs, Ddb1- and Cul4-associated factors, are substrate receptors for the Cul4-Ddb1 Ubiquitin Ligase. There are 18 different factors, the majority of which are WD40-repeat-proteins .. +PF14940 Transmembrane 219
Coggill P, Hetherington K. This protein belongs to a family found in eukaryotes. Proteins in this family are typically between 240 and 315 amino acids in length. The domains in this family vary in length from 202 to 249 amino acids. Its exact function remains unknown, however, it is thought to have a role as a transmembrane protein. More specifically, it is possible that this transmembrane protein may have a role as an insulin-like growth factor binding protein 3-receptor (IGFBP-3R). This receptor binds to the ligand, insulin growth factor 3, which is a p53-induced, apoptosis factor important for cancer prevention .. +PF14941 Transcriptional regulator, Out at first
Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 198 and 332 amino acids in length. The domains in this family vary in length from 239 to 242 amino acids. The gene, OAF (out at first), which encodes this protein, has a promoter which may help mediate regulation of neighbouring genes . An alternative name for this protein is HCV NS5A-transactivated protein 13 target protein 2, which stands for Hepatitis C virus nonstructural 5A-transactivated protein 13 target protein 2. NS5A inhibits double-stranded-RNA-activated protein kinase (PKR) activity, which is thought to allow Hepatitis C Virus replication to continue in the presence of an alpha interferon (IFN)induced antiviral response .. +PF14942 Organelle biogenesis, Muted-like protein
Coggill P, Hetherington P. The protein is a coiled-coil protein and belongs to a family found in eukaryotes. It undergoes alternative splicing forming two isoforms. The larger isoform is 187 amino acids long in protein sequence length and 21 kDa in mass. The smaller isoform is 110 amino acids long in protein sequence length and 12 kDa in mass. This protein associates with other proteins in order to form biogenesis of lysosome-related organelles complex-1 BLOC1 complex. BLOC-1 is required for the normal biogenesis of specialized organelles of the endosomal-lysosomal system .. +PF14943 Mitochondrial ribosome subunit S26
This family of proteins corresponds to mitochondrial ribosomal subunit S26 in eukaryotes . +PF14944 Tongue Cancer Chemotherapy Resistant Protein 1
Coggill P, Hetherington K. This family of proteins are found in eukaryotes. Tongue Cancer Chemotherapy Resistant-associated Protein 1 (TCRP1) is resistant to the chemotherapy drug, cisplatin, which induces apoptosis in tumour cells. There is suggestion that TCRP1 can be targeted to reverse chemotherapy resistance. The precise mechanism of TCRP1 inducing resistance against chemotherapy is still not clear, but it is thought that TCRP1 alters cell signalling pathways affecting apoptosis or DNA repair capacity. Proteins in this family are typically between 194 and 235 amino acids in length .. +PF14945 Normal lung function maintenance, Low in Lung Cancer 1 protein
Coggill P, Hetherington K. This protein is part of a family found in eukaryotes. It is 137 amino acids long in protein sequence length and mass is approximately 15.7 kDa. The protein is present in the normal lung epithelium, but absent or downregulated in most primary non-small lung cancers. The gene is known as Low in Lung Cancer 1 (LLC1). This protein is thought to have a role in the maintenance of normal lung function and its absence may lead to lung tumourigenesis .. +PF14946 Domain of unknown function (DUF4501)
Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 167 and 308 amino acids in length. The exact function of this protein remains unknown, but it is thought to be a single-pass membrane protein. This family contains many highly conserved cysteine residues.. +PF14947 Winged helix-turn-helix
This winged helix-turn-helix domain contains an extended C-terminal alpha helix which is responsible for dimerisation of this domain .. +PF14948 RESP18 domain
This domain is found in the glucocorticoid-responsive protein regulated endocrine-specific protein 18 (RESP18) and in the N-terminal extracellular region of receptor-type tyrosine-protein phosphatases containing the protein-tyrosine phosphatase receptor IA-2 domain (Pfam:PF11548) [1,2].. +PF14949 ARF7 effector protein C-terminus
This family represents the C-terminus of the ARF7 effector protein (ARF7EP). ARF7EP interacts with ADP-ribosylation factor-like protein 14 and unconventional myosin-Ie and through this interaction controls movement of MHC-II-containing vesicles along the actin cytoskeleton in dendritic cells . It contains a conserved CXCXXXXCXXCXXXCXXCXXXXCXXXCXC motif in it's C-terminal half.. +PF14950 Domain of unknown function (DUF4502)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 181 and 876 amino acids in length.. +PF14951 Domain of unknown function (DUF4503)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 313 and 876 amino acids in length.. +PF14952 Putative treble-clef, zinc-finger, Zn-binding
This domain resembles the zinc-binding domain of prokaryotic topoisomerases, family DNA_ligase_ZBD Pfam:PF03119. The function of the eukaryotic proteins it is carried on is not known.. +PF14953 Domain of unknown function (DUF4504)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 253 and 329 amino acids in length. There are two conserved sequence motifs: LLGYP and SFS.. +PF14954 Limb expression 1
This entry represents the limb expression 1 (LIX1) family .. +PF14955 Mitochondrial ribosome subunit S24
This family of proteins corresponds to mitochondrial ribosomal subunit S24 in eukaryotes [1-2].. +PF14956 Domain of unknown function (DUF4505)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 166 and 225 amino acids in length.. +PF14957 Cdc42 effector
The Cdc42 effector (CEP) or binder of Rho GTPases (BORG) proteins are involved in the organisation of the actin cytoskeleton . They may function as negative regulators of Rho GTPase signaling .. +PF14958 Domain of unknown function (DUF4506)
This domain family is found in eukaryotes, and is approximately 140 amino acids in length.. +PF14959 gamma-Secretase-activating protein C-term
GSAP, or gamma-secretase-activating protein, also known as PION, regulates gamma-secretase activity. The holo-protein is a large, approx 850 residue protein that is rapidly cleaved to an active 16 kDa C-terminal fragment that is the stable, predominant form. GSAP is expressed in inclusion bodies and is important in brain function. It dramatically and selectively increases neurotoxic beta-Amyloid production in the brain through a mechanism involving its interactions with both gamma-secretase and its substrate, the amyloid precursor protein C-terminal fragment (APP-CTF). Accumulation of neurotoxic beta-Amyloid is a major hallmark of Alzheimer's disease. Formation of beta-Amyloid is catalysed by gamma-secretase, a protease with numerous substrates that catalyses the intra-membrane cleavage of integral membrane proteins such as Notch receptors and APP (beta-amyloid precursor protein) . The secondary structure of GSAP is largely alpha-helical, lacking well-defined tertiary structure. GSAP represents a type of gamma-secretase regulator that directs enzyme specificity by interacting with a specific substrate .. +PF14960 ATP synthase regulation
Members of this family are subunits of mitochondrial ATP synthase (F-ATPase) [1-2] and vacuolar ATPase (V-ATPase) . In F-ATPase, this subunit regulates mitochondrial ATP synthase population .. +PF14961 Broad-minded protein
Broad-minded protein (BROMI) interacts with cell cycle-related kinase (CCRK), together these proteins regulate ciliary membrane and axonemal growth .. +PF14962 Mitochondria Localisation Sequence
Coggill P, Hetherington K. This family contains a protein found in eukaryotes. Proteins in this family are typically between 240 and 613 amino acids in length. The family is found in association with Pfam:PF07992. This protein family is an N-terminal domain for the mitochondrial localisation sequence for an apoptosis-inducing factor . The protein is also known as Corneal endothelium-specific protein 1 or as Ovary-specific acidic protein. It is thought to be important for membrane function and is expressed in the ovary and corneal endothelium.. +PF14963 Calcium signal-modulating cyclophilin ligand
Calcium signal-modulating cyclophilin ligand was originally identified in a screen for cyclophilin B-interacting proteins. It is likely to be involved in calcium signalling . It has also been shown to interact with many other signalling molecules including proto-oncogene tyrosine-protein kinase LCK, tumor necrosis factor receptor superfamily member 13B and EGFR [2-4].. +PF14964 Domain of unknown function (DUF4507)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 346 and 434 amino acids in length.. +PF14965 Negative regulator of p53/TP53
Coggill P, Hetherington K. This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 213 and 245 amino acids in length. It is found in various tissues, including the brain, liver and kidneys. It was first discovered as a functional unknown gene, murine brain I3 (BRI3). This protein is also known as HCCRBP-1 and it plays a role in tumourigenesis, as it binds to an oncogene, HCCR-1, and acts as a negative regulator of p53/TP53 tumour suppressor. BRI3BP induces tumourigenesis by activating protein kinase C (PKC) activity but decreasing the pro-apoptotic PKC-alpha and PKC-delta isoform levels. BRI3BP is over-expressed in many tumours .. +PF14966 DNA repair REX1-B
This family of proteins includes Chlamydomonas reinhardtii REX1-B (Required for Excision 1-B) which is involved in a light-independent DNA repair pathway .. +PF14967 FAM70 protein
This family of proteins is found in eukaryotes. Proteins in this family are typically between 241 and 349 amino acids in length. The function of this family is unknown.. +PF14968 Coiled coil protein 84
The function of this coiled-coil domain-containing family is not known. It is found in eukaryotes.. +PF14969 Domain of unknown function (DUF4508)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 253 amino acids in length.. +PF14970 Domain of unknown function (DUF4509)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 212 and 449 amino acids in length. There is a conserved WLL sequence motif.. +PF14971 Domain of unknown function (DUF4510)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 242 and 452 amino acids in length. There are two conserved sequence motifs: LEA and WMD.. +PF14972 Mitochondrial morphogenesis regulator
This family of proteins regulate mitochondrial morphogenesis via a mechanism which is independent of mitofusins and dynamin-related protein 1 .. +PF14973 TERF1-interacting nuclear factor 2 N-terminus
This is the N-terminus of TERF1-interacting nuclear factor 2. It is required for the formation of the shelterin complex. The shelterin complex is involved in the protection and maintenance of telomeres [1-3].. +PF14974 Domain of unknown function (DUF4511)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 116 and 127 amino acids in length.. +PF14975 Domain of unknown function (DUF4512)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 74 and 104 amino acids in length. There are two completely conserved residues (C and P) that may be functionally important.. +PF14976 FAM72 protein
This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 264 amino acids in length. The function of this family is unknown.. +PF14977 FAM194 protein
This family is found in eukaryotes, and is approximately 210 amino acids in length. There is a conserved YPSG sequence motif. The function of this family is unknown.. +PF14978 Mitochondrial ribosome protein 63
This family of proteins is present in the intact 55S subunit of the mitochondrial ribosome. It is not known if it belongs to the 28S or to the 39S subunit .. +PF14979 Transmembrane 52
This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 160 and 236 amino acids in length. There is a conserved LLCG sequence motif. The function of this family is unknown.. +PF14980 TIP39 peptide
+PF14981 FAM165 family
This family of proteins known as FAM165 are found in eukaryotes. Members of this family are as yet uncharacterised. Proteins in this family are typically short membrane proteins between 55 and 70 amino acids in length.. +PF14982 UPF0731 family
The UPF0731 family of uncharacterised proteins is found in mammals.. +PF14983 Domain of unknown function (DUF4513)
This family of uncharacterised proteins is found in chordates.. +PF14984 CD24 protein
+PF14985 TM140 protein family
This family of uncharacterised membrane proteins are called transmembrane protein 140. They are found in mammals.. +PF14986 Domain of unknown function (DUF4514)
This family of uncharacterised proteins are found in mammals.. +PF14987 NADH dehydrogenase 1 alpha subcomplex subunit 3
This family of proteins are accessory subunits of the mitochondrial membrane respiratory chain NADH dehydrogenase (Complex I). This subunit is not believed to be catalytic [1-2].. +PF14988 Domain of unknown function (DUF4515)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 198 and 469 amino acids in length. There are two completely conserved L residues that may be functionally important.. +PF14989 Coiled-coil domain containing 32
Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 188 amino acids in length. The gene that encodes this protein is C15orf57 but its protein product is called Protein CCDC32 (Coiled-coil domain containing 32). The exact function of this protein is still unknown.. +PF14990 Domain of unknown function (DUF4516)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 56 and 69 amino acids in length.. +PF14991 Protein melan-A
+PF14992 TMCO5 family
The TMCO5 family includes human transmembrane and coiled-coil domain-containing proteins 5A and 5B.. +PF14993 Neuropeptide S precursor protein
+PF14994 Testis-specific gene 13 protein
This family of uncharacterised proteins are found in chordates. In humans this gene is found to be expressed specifically in the testes.. +PF14995 Transmembrane protein
Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 138 and 164 amino acids in length. There are two completely conserved residues (H and E) that may be functionally important and four transmembrane helices. The domains in this family vary in length from 124 to 126 amino acids. The precise function of the protein family is still unknown.. +PF14996 Retinal Maintenance
Coggill P, Hetherington K. RMP is encoded for by a gene, C8orf37. Mutations in the gene cause two types of retinal dystrophies: cone-rod dystrophy type 16 (CORD16) and retinitis pigmentosa type 64 (RP64). CORD16 affects the cone receptors which detect red, green or blue wavelengths of light and RP64 affects the cone receptors first and then the rod receptors. Both of these affect the photo-receptors in the eye leading to colour blindness or blindness respectively .. +PF14997 CECR6/TMEM121 family
This family includes Cat eye syndrome critical region protein 6, a protein which has been identified in a screen for candidate genes for the developmental disorder Cat Eye Syndrome (CES) . It also includes the TMEM121 transmembrane proteins. The function of this family is unknown.. +PF14998 Transcription Regulator
Coggill P, Hetherington K. The precise function of this family is not clear, but it is thought to play a role in somitogenesis, development and transcriptional repression. Ripply is also known by an alternative name, Bowline. Bowline, is an associate protein of the transcriptional co-repressor XGrg-4 . This family contains two conserved sequence motifs: WRPW and FPVQATI. The WRPW motif is thought to be required for binding to tle/groucho proteins . Ripply3 is also known as Down Syndrome Critical Region Protein 6 homolog . This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 154 amino acids in length.. +PF14999 Shadow of prion protein, neuroprotective
Coggill P, Hetherington K. This protein family is a Prion-like protein and its function is neuroprotective and similar to PrP(C)-like. Shadoo is mainly expressed in the brain, and highly expressed in the hippocampus, the area of the brain which co-ordinates memory as well as spatial memory and navigation. This protein may also alter the biological actions of normal and abnormal Prion Protein (PrP) which lead to lethal neurodegenerative diseases . This family of proteins is found in eukaryotes. Proteins in this family are approximately 150 amino acids in length, of which the first 90 are alanine rich.. +PF15000 Tumour suppressor candidate 2
This family of proteins are candidate tumour suppressors [1-2].. +PF15001 AP-5 complex subunit sigma-1
This family of proteins are subunits of the adaptor protein complex AP-5 .. +PF15002 ERK and JNK pathways, inhibitor
Coggill P, Hetherington K. This coiled-coiled domain, CCDC134, is a secretory protein that inhibits Mitogen activated protein kinase (MAPK) pathways such as Raf-1/MEK/ERK and JNK/SAPK but not p38. CCDC134 is widely expressed in normal adult tissues, tumour tissues and cell lines, which shows its importance in cell signal transduction pathways, transcription regulation and therefore cell survival . Additionally, CCDC134 is known to bind to a transcription adaptor, hADA2a, which forms part of the general control nonderepressible 5 (GCN5) histone acetyltransferase complex. Acetylation usually 'switches genes on' for transcription. Moreover, knocking out CCDC134 suppressed hADA2a-induced cell apoptosis activity and G1/S cell cycle arrest suggesting its importance in cell survival . This family of proteins is found in eukaryotes. Proteins in this family are typically between 188 and 257 amino acids in length. This family is a coiled-coil domain containing protein 134 (CCDC134) whereby the coiled-coiled domain is a ubiquitous motif involved in oligomerisation.. +PF15003 HAUS augmin-like complex subunit 2
Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 291 amino acids in length. HAUS augmin-like complex subunit 2 is alternatively called centrosomal protein of 27 kDa (CEP27). It localized in the microtubule organising centre, the centrosome. These microtubules are part of the cytoskeleton and give the cell its shape, provides it with a platform for motility and are crucial for mitosis . This protein is part of the HAUS augmin-like complex. This interacts with the gamma-tubulin ring complex (gamma-TuRC) which is required for spindle generation. HAUS2 may also increase the tension between spindle and kinetochore allowing for chromosome segregation during mitosis . This protein is involved in mitotic spindle assembly, maintenance of centrosome integrity and completion of cytokinesis.. +PF15004 Myeloma-overexpressed-like
This family of proteins is found in eukaryotes. It includes human myeloma-overexpressed gene 2 protein. Proteins in this family are typically between 45 and 74 amino acids in length. There are two conserved sequence motifs: MKP and DEMF. The function of this family is unknown.. +PF15005 Izumo sperm-egg fusion
Coggill P, Hetherington K. Izumo is a molecule with a single immunoglobulin (Ig) domain. It is thought that Izumo bind to putative Izumo receptors on the oocyte. Izumo is not detectable on the surface of fresh sperm but becomes exposed only after an exocytotic process, the acrosome reaction, has occurred. Studies have shown that knock-out mice (Izumo-/- males) were sterile despite normal mating behaviour and ejaculation, indicating the importance of the protein in fertilization . There are cysteine residues thought to form a disulphide bridge. Izumo is a typical type I membrane glycoprotein with one immunoglobulin-like domain and a putative N-glycoside link motif (Asn 204) . There is a conserved GCL sequence motif. Izumo expression has been found to be testis-specific [1,2]. This family of proteins is found in eukaryotes and are typically between 193 and 305 amino acids in length.. +PF15006 Domain of unknown function (DUF4517)
Coggill P, Hetherington K. The function of this protein remains unknown. This family of proteins is found in eukaryotes and are typically between 160 and 182 amino acids in length.. +PF15007 Centrosomal spindle body, CEP44
Coggill P, Hetherington K. CEP44 is a coiled coil domain found localised in the centrosome and spindle poles.. +PF15008 Domain of unknown function (DUF4518)
Coggill P, Hetherington K. The precise function of this protein family is unknown but it is thought to be involved in apoptosis regulation.. +PF15009 Transmembrane protein 173
Transmembrane protein 173, also known as stimulator of interferon genes protein (STING), is a transmembrane adaptor protein which is involved in innate immune signalling processes. It induces expression of type I interferons (IFN-alpha and IFN-beta) via the NF-kappa-B and IRF3, pathways in response to non-self cytosolic RNA and dsDNA [1-4].. +PF15010 Putative cell signalling
Coggill P, Hetherington K. The precise function of this protein family is unknown, however studies have shown it undergoes Protein N-myristoylation; a type of lipid modification in eukaryotic and viral proteins. Protein N-myristoylation is usually an irreversible co-translational protein modification which is useful in cell signal transduction pathways . This indicates that FAM131 may have some sort of role in cell signalling due to its ability to be myristoylated. This family of proteins is found in eukaryotes and are typically between 257 and 361 amino acids in length.. +PF15011 Casein Kinase 2 substrate
Coggill P, Hetherington K. It is suggested that CK2S (C10orf109) is important in the regulation of cancer cell proliferation. Studies have indicated that CK2S is the downstream target of a protein kinase, casein kinase 2 (CK2), which is upregulated in cancer cells. CK2S has been found to be upregulated in cancer cells. The precise mechanism of CK2 targetting CK2S is not well characterised. It is found to be localised in the nucleus and cytoplasm . This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 221 amino acids in length. There is a single completely conserved residue P that may be functionally important.. +PF15012 Domain of unknown function (DUF4519)
This family of proteins is found in eukaryotes. Proteins in this family are typically between and 59 amino acids in length. There are two conserved sequence motifs: KET and VLP. There is a single completely conserved residue P that may be functionally important.. +PF15013 CCSMST1 family
This family of proteins was discovered in a screen of Bos taurus placental ESTs. The B. taurus member of this family was named cattle cerebrum and skeletal muscle-specific transcript 1 . This family of proteins is found in eukaryotes. Proteins in this family are typically between 97 and 157 amino acids in length. There is a single completely conserved residue D that may be functionally important. The function of this family is unknown.. +PF15014 Ceroid-lipofuscinosis neuronal protein 5
+PF15015 Spermatogenesis-associated, N-terminal
NYD-SP12, also known as SPATA16, is a germ-cell specific participant in the Golgi apparatus, and its expression is confined to spermatogenic epithelium, not being found in interstitial cells . Computer analysis of the protein-sequence showed that NYD-SP12 contains a cluster of phosphorylation sites for protein kinase C as well as for cyclic nucleotide-dependent protein kinases [2,3]. It is postulated that since the mutation of some Golgi apparatus’ proteins are responsible for male infertility that NYD-SP12 might play a role in modification and sorting of acrosomal enzymes . OMIM:102530.. +PF15016 Domain of unknown function (DUF4520)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 197 and 638 amino acids in length.This is the C-terminal domain of the member proteins.. +PF15017 Drug resistance and apoptosis regulator
Coggill P, Hetherington K. AF1q is an oncogenic factor involved in leukaemia development, thyroid tumourigenesis, and breast cancer metastasis. AF1q plays a critical role in the regulation of apoptosis and drug resistance. Initially identified as a mixed-lineage leukaemia fusion partner (MLL11) in infant acute myelomonocytic leukemia carrying t(1;11)(q21;q23) translocation. It is located in chromosome 1 band 21 . AF1Q may be a novel mediator of metastasis promotion in human breast cancer through regulation of the MMP pathway and RhoC expression .This family of proteins is found in eukaryotes. Proteins in this family are typically between 25 and 482 amino acids in length.. +PF15018 TRP-interacting helix
Coggill P, Hetherington K. This highly conserved motif is thought to be a transmembrane helix that binds to transient receptor potential (TRP) calcium channel. It is known that proline-rich proteins inactivate tannins found in food compounds, and it is putatively thought that PRR24 does too. This is important since tannins often inhibit the uptake of iron . InaF is a protein required for TRP calcium channel function in Drosophila [2,3]. TRP-related channels have been suggested to mediate store-operated calcium entry, important for Ca2+ homeostasis in a wide variety of cell types . The amino acid sequence of PRR-24 contains two completely conserved Y residues that may be functionally important. This domain family is found in eukaryotes, and is approximately 40 amino acids in length.. +PF15019 FTDALS;
C9orf72-like protein family. Coggill P, Hetherington K. The precise function of this family is unknown but members have been found to be localised in the cytoplasm of brain tissue. Defects in the gene, C9orf72, are the cause of frontotemporal dementia and/or amyotrophic lateral sclerosis (FTDALS) which is an autosomal dominant neurodegenerative disorder. The disorder is caused by a large expansion of a GGGGCC hexa-nucleotide within the first C9orf72 intron located between the first and the second non-coding exons. The expansion leads to the loss of transcription of one of the two transcripts encoding isoform 1 and to the formation of nuclear RNA foci . This domain family is found in eukaryotes, and is typically between 230 and 250 amino acids in length. There is a single completely conserved residue F that may be functionally important.. +PF15020 Cation channel sperm-associated protein subunit delta
The CATSPER (cation channel of sperm) complex is a tetrameric complex consisting of CATSPER1, CATSPER2, CATSPER3 and CATSPER4, it functions as an alkalinisation-activated calcium channel. This complex requires several auxiliary subunits, including CATSPERD. CATSPERD is essential for the cation channel function and may play a role in channel assembly or transport .. +PF15021 Protein of unknown function (DUF4521)
This family of vertebrate proteins is functionally uncharacterised. The family includes the Chromosome 20 protein C20orf196.. +PF15022 Protein of unknown function (DUF4522)
This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. In human this protein is known as C4orf36.. +PF15023 Protein of unknown function (DUF4523)
This family of proteins is functionally uncharacterised. This family of proteins is found in mammals.. +PF15024 Glycosyltransferase family 18
Enzymes belonging to glycosyltransferase family 18 (alpha-1,6-mannosylglycoprotein 6-beta-N-acetylglucosaminyltransferase) contribute to the creation of branches in complex-type N-glycans. This domain is responsible for the catalytic activity of the enzyme .. +PF15025 Domain of unknown function (DUF4524)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 197 and 638 amino acids in length.This is the N-terminal domain of the member proteins. The human gene is from C5orf34.. +PF15026 FAM74 protein
This family of uncharacterised proteins are found in humans and are known as FAM74 proteins. Members of this family contain several short protein repeats.. +PF15027 Domain of unknown function (DUF4525)
This domain is found in eukaryotes. It is often found at the N-terminus of glycosyltransferase family 18 enzymes (Pfam:PF15024). It is also found in coiled-coil domain-containing protein 126.. +PF15028 Pre-T-cell antigen receptor
The pre-T-cell antigen receptor (pre-TCR), expressed by immature thymocytes, has a pivotal role in early T-cell development, including TCR beta-selection, survival and proliferation of CD4(-)CD8(-) double-negative thymocytes, and subsequent alpha/beta T-cell lineage differentiation . This protein contains an immunoglobulin domain .. +PF15029 Protein of unknown function (DUF4526)
This family of proteins is functionally uncharacterised. This family of proteins is found in mammals and includes the human integral membrane protein TMEM174 protein.. +PF15030 Protein of unknown function (DUF4527)
This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates.. +PF15031 Domain of unknown function (DUF4528)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 95 and 154 amino acids in length. This family includes Human C15orf61.. +PF15032 Protein of unknown function (DUF4529)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. The proteins contain a conserved VLPPLK sequence motif.. +PF15033 Kinocilin protein
This family of kinocilin proteins is found in vertebrate. In mouse it has been shown that this protein is expressed primarily in the kinocilium of sensory cells in the inner ear .. +PF15034 KRTAP type 7 family
This family of keratin associated proteins are found in vertebrate.. +PF15035 Ciliary rootlet component, centrosome cohesion
+PF15036 Interleukin 34
+PF15037 Interleukin-17 receptor extracellular region
This domain is found at the N-terminus (extracellular region) of interleukin-17 receptor C and Interleukin-17 receptor E. This is the presumed ligand-binding domain . Human putative interleukin-17 receptor E-like consists only of this domain.. +PF15038 Jiraiya
Jiraiya inhibits bone morphogenetic protein (BMP) signaling during embryogenesis . The human member of this family is TMEM221.. +PF15039 Domain of unknown function (DUF4530)
This family of proteins is found in eukaryotes. Proteins in this family are typically around 140 amino acids in length. The human member of this family is C19orf69.. +PF15040 Humanin family
This family of proteins is found exclusively in humans. Humanin is a short anti-apoptotic peptide that interacts with Bax .. +PF15041 Domain of unknown function (DUF4531)
This family of uncharacterised proteins is found in mammals. This family includes the human protein C19orf71.. +PF15042 Late cornified envelope-like proline-rich protein 1
This family of uncharacterised proteins is found in mammals.. +PF15043 CB1 cannabinoid receptor-interacting protein 1
This family of proteins interacts with cannabinoid receptor 1 (CNR1) and attenuates CNR1-mediated tonic inhibition of voltage-gated calcium channels .. +PF15044 Mitochondrial function, CLU-N-term
CLU_N is the N-terminal domain of the Clueless protein, also known as TIF31-like in other organisms. The function of this domain is not known. It family is found in association with Pfam:PF13236.. +PF15045 Clathrin-binding box of Aftiphilin, vesicle trafficking
Aftiphilin forms a stable complex with p200 and gamma-synergin. This family contains a clathrin box, with two identified clathrin-binding motifs. This family of proteins is found in eukaryotes.. +PF15046 Protein of unknown function (DUF4532)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes.. +PF15047 Protein of unknown function (DUF4533)
This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. This family includes two human proteins: C12orf60 and C12orf69.. +PF15048 Organic solute transporter subunit beta protein
+PF15049 Protein of unknown function (DUF4534)
This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. Proteins in this family are typically between 170 and 190 amino acids in length. The protein includes the human integral membrane TMEM217 protein.. +PF15050 SCIMP protein
This family contains the SCIMP proteins which are a a transmembrane adaptor protein involved in major histocompatibility complex class II signaling .. +PF15051 FAM198 protein
This family of proteins is found in eukaryotes. The function of this family is unknown. Murine FAM198B is downregulated by FGFR signalling .. +PF15052 TMEM169 protein family
Coggill P, Hetherington K. This domain is thought to be structured transmembrane helices and includes the intermediary cytoplasmic domain. It is found in eukaryotes, and is approximately 130 amino acids in length.. +PF15053 Mjmu-R1-like protein family
Coggill P, Hetherington K. This protein family is thought to have a role in spermatogenesis. This family of proteins is found in eukaryotes. In humans, it is found in chromosome 17 open reading frame 75 (C17orf75). Proteins in this family are typically between 217 and 399 amino acids in length.. +PF15054 Domain of unknown function (DUF4535)
Coggill P, Hetherington K. This family includes the uncharacterised protein C7orf73 that is found in eukaryotes. Members are generally less than 100 residues in length. Although the precise function of the domain is still unknown, members have a predicted N-terminal signal peptide sequence which suggests they are short secreted peptides.. +PF15055 Domain of unknown function (DUF4536)
Coggill P, Hetherington K. This domain family is thought to be a transmembrane helix. It is found in eukaryotes, and is approximately 50 amino acids in length. In humans, it is located in the chromosomal position, C9orf123.. +PF15056 Neuritin protein family
Coggill P, Hetherington K. The domain family Neuritin1 (NRN1) is a GPI-anchored protein expressed in post-mitotic-differentiating neurons in the developing nervous system . NRN1 is a glutamate and neurotrophin receptor target encoding a neuronal protein that functions extracellularly to modulate neurite outgrowth (OMIM:607409).\. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 158 amino acids in length.. +PF15057 Domain of unknown function (DUF4537)
The function of this domain family is unknown. It is found in eukaryotes, and is typically between 119 and 141 amino acids in length. In humans, it is found in the chromosomal position C11orf16.. +PF15058 Speriolin N terminus
This family represents the N-terminus of the sperm centrosome protein speriolin [1-2].. +PF15059 Speriolin C-terminus
This family represents the C-terminus of the sperm centrosome protein speriolin [1-2].. +PF15060 Differentiation and proliferation regulator
Coggill P, Hetherington K. Pancreatic progenitor cell differentiation and proliferation factor-like protein (PPDFL) is alternatively named Exocrine differentiation and proliferation factor-like protein. PPDFL regulates exocrine cell fate. This protein is highly expressed in exocrine progenitor cells which eventually differentiate to form exocrine pancreatic cells .. +PF15061 Domain of unknown function (DUF4538)
Coggill P, Hetherington K. This protein family is thought to be a transmembrane helix. Its function remains unknown. This family of proteins is found in eukaryotes. Proteins in this family are typically between 58 and 87 amino acids in length.. +PF15062 Haemopoietic lineage transmembrane helix
Coggill P, Hetherington K. ADP-ribosylation factor-like protein 6-interacting protein 6 (ARP6) is a transmembrane helix present in the J2E erythro-leukaemic cell line, but not its myeloid variants. In tissues, ARL-6 mRNA was most abundant in brain and kidney. While ARL-6 protein was predominantly cytosolic, it is known to bind to SEC61-beta subunit of a protein conducting channel SEC61p .. +PF15063 Thyroid cancer protein 1
Thyroid cancer protein 1 (TC1) is thought to decrease in apoptosis and increase cell proliferation. It is found to be expressed in thyroid papillary carcinoma . This suggests its importance in thyroid cancer. The molecular mechanism of TC1, involves up-regulating cell signalling through ERK-1/2 signalling pathway and it positively regulates transition between the G1 and S phase in the cell cycle . It is thought to positively regulate Wnt/beta-catenin signalling pathway by interacting with its repressor . In humans, it is located in the chromosomal position, C8orf4. This family of proteins is found in eukaryotes and contains a conserved NIF sequence motif.. +PF15064 Cation channel sperm-associated protein subunit gamma
This family represents the gamma subunit of the CATSPER, or cation channel sperm-associated protein complex. The complex appears only to be expressed in the flagellum of sperm. The complex is activated at alkaline intracellular pH, and being restricted to the flagellum is the mediating calcium channel.. +PF15065 Lysosomal transcription factor, NCU-G1
NCU-G1 is a set of highly conserved nuclear proteins rich in proline with a molecular weight of approximately 44 kDa. Especially high levels are detected in human prostate, liver and kidney. NCU-G1 is a dual-function family capable of functioning as a transcription factor as well as a nuclear receptor co-activator by stimulating the transcriptional activity of peroxisome proliferator-activated receptor-alpha (PPAR-alpha) .. +PF15066 Cancer-associated gene protein 1 family
CAGE-1 is a family of proteins overexpressed in tumour tissues compared with surrounding tissues. CAGE-1 gene showed testis-specific expression among normal tissues and displayed wide expression in a variety of cancer cell lines and cancer tissues . CAGE-1 is predominantly expressed during post-meiotic stages. It localises to the acrosomal matrix and acrosomal granule showing it to be a component of the acrosome of mammalian spermatids and spermatozoa .. +PF15067 FAM124 family
Coggill P, Hetherington K. The exact function of this protein family remains unknown. This family of proteins is found in eukaryotes. Proteins in this family are approximately 480 amino acids in length. There is a conserved LFL sequence motif.. +PF15068 FAM101 family
This protein family includes the actin regulators, Refilin A and B, however the exact function of this protein family remains unknown. Refilin is thought to stabilise peri-nuclear actin filament bundles, important in fibroblasts. Refilin is important as changes in localisation and shape in the nucleus plays a role in cellular and developmental processes .. +PF15069 FAM163 family
Coggill P, Hetherington K. This protein family is alternatively named Neuroblastoma-derived secretory proteins. Highly expressed in neuroblastoma compared to other tissues, suggesting that it may be used as a marker for metastasis in bone marrow .. +PF15070 Putative golgin subfamily A member 2-like protein 5
The function of the GOLGA2L5 protein family remains unknown. This family of proteins is thought to be found in the Golgi apparatus of eukaryotes. Proteins in this family are typically between and 840 amino acids in length.. +PF15071 Transmembrane family 220, helix
Coggill P, Hetherington K. Transmembrane 220 (TMEM220) is a domain of unknown function. It is thought to be a transmembrane helix. The length of this protein is typically between 150 and 160 amino acids. In humans, it is found in the chromosomal position 17p13.1.. +PF15072 Domain of unknown function (DUF4539)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 230 and 625 amino acids in length.. +PF15073 Domain of unknown function (DUF4540)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 302 amino acids in length. In humans, it is found in the chromosomal position, C7orf72.. +PF15074 Domain of unknown function (DUF4541)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 100 and 163 amino acids in length. There is a conserved KLHRDDR sequence motif. There is a single completely conserved residue Y that may be functionally important. In humans, the gene is found in the chromosomal location, C5orf49.. +PF15075 Domain of unknown function (DUF4542)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 123 and 173 amino acids in length. There is a conserved IPPYN sequence motif. The gene that encodes this protein in humans, is found in the chromosomal position, C17orf98.. +PF15076 Domain of unknown function (DUF4543)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 90 amino acids in length. The human member of this family is C17orf67.. +PF15077 Domain of unknown function (DUF4544)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 118 and 256 amino acids in length. The human member of this family is C11orf85.. +PF15078 Domain of unknown function (DUF4545)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 417 amino acids in length. The human member of this family is C1orf141.. +PF15079 Domain of unknown function (DUF4546)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 88 and 212 amino acids in length. The human member of this family is C1orf49.. +PF15080 Domain of unknown function (DUF4547)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 144 and 206 amino acids in length. The human member of this family is C3orf43.. +PF15081 Domain of unknown function (DUF4548)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 178 amino acids in length. The human member of this family is C1orf105.. +PF15082 Domain of unknown function (DUF4549)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 143 and 1871 amino acids in length. The human member of this family is C6orf183.. +PF15083 Colipase-like
Eberhardt RY, Coggill P, Hetherington K. This is a family of colipase-like proteins.. +PF15084 Domain of unknown function (DUF4550)
This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 100 amino acids in length. This domain contains an N-terminal HXE motif.. +PF15085 Neuropeptide FF
Eberhardt RY, Coggill P, Hetherington K. +PF15086 Uncharacterised protein family UPF0542
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved LSWKL sequence motif. This family includes human protein C5orf43.. +PF15087 Protein of unknown function (DUF4551)
This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa. This family includes human protein C12orf56.. +PF15088 NADH dehydrogenase [ubiquinone] 1 subunit C1, mitochondrial
Eberhardt RY, Coggill P, Hetherington K. +PF15089 Domain of unknown function (DUF4552)
This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. Proteins in this family are typically between 425 and 649 amino acids in length.. +PF15090 Domain of unknown function (DUF4553)
This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. This family includes the human protein C10orf12.. +PF15091 Domain of unknown function (DUF4554)
This family of proteins is functionally uncharacterised. This family of proteins is found in some vertebrates. This family includes human protein C11orf80.. +PF15092 Uncharacterised protein family UPF0728
This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa. There is a conserved GPY sequence motif.. +PF15093 Domain of unknown function (DUF4555)
This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa.This family includes the human protein C7orf31.. +PF15094 Domain of unknown function (DUF4556)
This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. This family includes human protein C1orf127.. +PF15095 Interleukin 33
Eberhardt RY, Coggill P, Hetherington K. +PF15096 G6B family
Eberhardt RY, Coggill P, Hetherington K. +PF15097 Immunoglobulin J chain
Eberhardt RY, Coggill P, Hetherington K. +PF15098 TMEM89 protein family
The function of this family of transmembrane proteins, TMEM89, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are approximately 159 amino acids in length.. +PF15099 Phosphoinositide-interacting protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family, PIRT, is not known, however it is predicted to be a multi-pass membrane protein. This family of proteins is thought to have a role in positively regulating TRPV1 channel activity via phosphatidylinositol 4,5-bisphosphate (PIP2). This family of proteins is found in eukaryotes. Proteins in this family are located in the cell membrane . Proteins in this family are approximately 140 amino acids in length.. +PF15100 TMEM187 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family, TMEM187, is not known, however it is predicted to be a multi-pass membrane protein. Members of this family are as yet uncharacterised. This protein family is also alternatively named ITBA1. This family of proteins are found in eukaryotes. Proteins in this family are typically between 239 and 267 amino acids in length.. +PF15101 Domain of unknown function (DUF4557)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved TVF sequence motif.. +PF15102 TMEM154 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins has not, as yet, been determined. However, it is thought to be a therapeutic target for ovine lentivirus infection . This family of proteins is found in eukaryotes and members are typically between 138 and 320 amino acids in length.. +PF15103 G0/G1 switch protein 2
Eberhardt RY, Coggill P, Hetherington K. This family of proteins regulate apoptosis by binding to Bcl-2 and preventing the formation of the anti-apoptotic BAX-BCL2 heterodimers .. +PF15104 Domain of unknown function (DUF4558)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 78 and 121 amino acids in length. One member is annotated as being a flagellar associated protein.. +PF15105 TMEM61 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 150 and 211 amino acids in length.. +PF15106 TMEM156 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins, TMEM 156, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins are found in eukaryotes. Proteins in this family are approximately 310 amino acids in length. In humans, the gene encoding this protein is located in the chromosomal position, 4p14.. +PF15107 FAM216B protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of proteins, FAM216B, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins are found in eukaryotes. Proteins in this family are approximately 150 amino acids in length. In humans, the gene encoding this protein is located in the position, C13orf30.. +PF15108 Voltage-dependent calcium channel gamma-like subunit protein family
Eberhardt RY, Coggill P, Hetherington K. This family of transmembrane proteins, TMEM37, has a role in stabilising the calcium channel in an inactivated (closed) state. It is a subunit of the L-type calcium channels. This family of proteins are found in eukaryotes. Proteins in this family are approximately 210 amino acids in length.. +PF15109 TMEM125 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins, TMEM125, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 55 and 232 amino acids in length.. +PF15110 TMEM141 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins, TMEM141, has not, as yet, been determined. Members of this family remain uncharacterised. TMEM141 protein family is found in eukaryotes. Proteins in this family are typically between 103 and 124 amino acids in length. There are two completely conserved residues (C and W) that may be functionally important.. +PF15111 TMEM101 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins, TMEM101, has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 127 and 257 amino acids in length.. +PF15112 Domain of unknown function (DUF4559)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein CXorf38.. +PF15113 TMEM117 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 181 and 504 amino acids in length.. +PF15114 Uncharacterised protein family UPF0640
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 70 and 80 amino acids in length. There are two conserved sequence motifs: PGK and YRFLP.. +PF15115 Domain of unknown function with conserved HDNR motif
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 219 amino acids in length. There is a conserved HDNR sequence motif. The function is not known.. +PF15116 CAMPATH-1 antigen
Eberhardt RY, Coggill P, Hetherington K. +PF15117 Uncharacterised protein family UPF0697
This family of uncharacterised proteins is found in vertebrates. Proteins in this family are typically around 100 amino acids in length.. +PF15118 Domain of unknown function (DUF4560)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 66 and 78 amino acids in length. There are two conserved sequence motifs: FCK and RTL.. +PF15119 Apolipoprotein C4
Eberhardt RY, Coggill P, Hetherington K. +PF15120 Domain of unknown function (DUF4561)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes.. +PF15121 TMEM71 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family, TMEM71, is not known, however it is predicted to be a transmembrane protein. This family of proteins is found in eukaryotes and located in the cell membrane. Proteins in this family vary between 41 and 291 amino acids in length.. +PF15122 TMEM206 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins, TMEM206, has not, as yet, been determined. Members of this family are remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are approximately 350 amino acids in length.. +PF15123 Domain of unknown function (DUF4562)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved HRYQNPW sequence motif. This family includes the human protein C4orf45.. +PF15124 Domain of unknown function (DUF4563)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C3orf24.. +PF15125 TMEM238 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins, TMEM238; has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 61 and 153 amino acids in length.. +PF15127 Protein of unknown function (DUF4565)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C2orf88.. +PF15128 T-cell leukemia translocation-altered
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is required for osteoclastogenesis .. +PF15129 FAM150 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins known as FAM150 is found in eukaryotes. Members of this family are as yet uncharacterised. Proteins in this family are approximately 143 amino acids in length. The function of this family has not, as yet, been determined, however it is predicted to be a secretory protein family.. +PF15130 Domain of unknown function (DUF4566)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein C6orf62.. +PF15131 Domain of unknown function (DUF4567)
This family of proteins is functionally uncharacterised. This family of proteins is found in some mammals.. +PF15132 Domain of unknown function (DUF4568)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes.. +PF15133 Domain of unknown function (DUF4569)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein CXorf21.. +PF15134 Domain of unknown function (DUF4570)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. . +PF15135 Uncharacterised protein UPF0515
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There are two conserved sequence motifs: PLT and HSC.. +PF15136 Uncharacterised protein family UPF0449
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved LPTRP sequence motif.. +PF15137 Domain of unknown function (DUF4571)
This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrate. This family includes human protein C21orf62.. +PF15138 Syncollin
Eberhardt RY, Coggill P, Hetherington K. This family has a role in zymogen granule exocytosis [1-2].. +PF15139 Domain of unknown function (DUF4572)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 220 amino acids in length.. +PF15140 Domain of unknown function (DUF4573)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically approximately 360 amino acids in length.. +PF15141 Domain of unknown function (DUF4574)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 86 amino acids in length.. +PF15142 INCA1
Eberhardt RY, Coggill P, Hetherington K. This family of proteins inhibits cyclin-dependent kinase activity [1-2].. +PF15143 Domain of unknown function (DUF4575)
This family of uncharacterised proteins is found in eukaryotes.. +PF15144 Domain of unknown function (DUF4576)
This family of uncharacterised proteins is found in eukaryotes.. +PF15145 Domain of unknown function (DUF4577)
Eberhardt RY, Coggill P, Hetherington K. The function of this family of proteins, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically 128 amino acids in length.. +PF15146 Fanconi anemia-associated
Eberhardt RY, Coggill P, Hetherington K. This family of proteins plays a role in the Fanconi anemia-associated DNA damage response .. +PF15147 Domain of unknown function (DUF4578)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 44 and 137 amino acids in length.. +PF15148 Apolipoprotein F
Eberhardt RY, Coggill P, Hetherington K. +PF15149 Cation channel sperm-associated protein subunit beta protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of transmembrane proteins, CATSPERB, has not, as yet, been determined. However, it is thought to play a role in sperm hyperactivation by associating with CATSPER1 . This family of proteins is found in eukaryotes. Proteins in this family are typically between 220 and 1107 amino acids in length.. +PF15150 Phorbol-12-myristate-13-acetate-induced
Eberhardt RY, Coggill P, Hetherington K. This family carries a BH3 domain between residues 23 and 40.. +PF15151 Response gene to complement 32 protein family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 44 and 130 amino acids in length. There is a conserved KLGDT sequence motif.. +PF15152 Kisspeptin
Eberhardt RY, Coggill P, Hetherington K. +PF15153 Cytokine-like protein 1
Eberhardt RY, Coggill P, Hetherington K. The function of this family of proteins, CYTL1, has not, as yet, been determined. However it is thought to be a secretory protein expressed in CD34+ haemopoietic cells . This family of proteins is found in eukaryotes. Proteins in this family are typically between 134 and 145 amino acids in length. There are two conserved sequence motifs: PPTCYSR and DDC.. +PF15155 MORF4 family-associated protein1
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 127 amino acids in length.. +PF15156 Ceroid-lipofuscinosis neuronal protein 6
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 190 and 310 amino acids in length.. +PF15157 IQ-like
Eberhardt RY, Coggill P, Hetherington K. This family of proteins includes Human IQ domain-containing protein J (IQCJ).. +PF15158 Domain of unknown function (DUF4579)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 192 and 239 amino acids in length. The human member of this family is C8orfK29.. +PF15159 Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y
Eberhardt RY, Coggill P, Hetherington K. This family of proteins represents subunit Y of the GPI-N-acetylglucosaminyltransferase (GPI-GnT) complex. It may regulate activity of the complex by binding the catalytic subunit, PIG-A .. +PF15160 Spermatogenesis-associated serine-rich protein 1
Eberhardt RY, Coggill P, Hetherington K. Spermatogenesis-associated serine-rich protein 1 is a serine-rich protein differentially expressed during spermatogenesis .. +PF15161 Neuropeptide-like
Eberhardt RY, Coggill P, Hetherington K. This family contains putative neuropeptides .. +PF15162 Domain of unknown function (DUF4580)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 63 and 185 amino acids in length.. +PF15163 Meiosis-expressed
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is essential for spermiogenesis .. +PF15164 Williams-Beuren syndrome chromosomal region 28 protein homologue
WBS28 is an integral membrane family. These proteins have been identified as being linked to Williams-Beuren syndrome, OMIM:194050. This family of proteins is found in eukaryotes, and are typically 266 amino acids in length.. +PF15165 Meiotic recombination protein REC114-like
Eberhardt RY, Coggill P, Hetherington K. REC114-like members are necessary for meiotic DNA double-strand break formation. It functions in conjunction with Mei4. This family of proteins is found in eukaryotes. Proteins in this family are typically between 43 and 259 amino acids in length.. +PF15167 Domain of unknown function (DUF4581)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically 131 amino acids in length.. +PF15168 Triple QxxK/R motif-containing protein family
Eberhardt RY, Coggill P, Hetherington K. TRIQK member-proteins share a characteristic triple repeat of the sequence QXXK/R, as well as a hydrophobic C-terminal region. Xenopus and mouse triqk genes are broadly expressed throughout embryogenesis, and mtriqk is also generally expressed in mouse adult tissues. TRIQK proteins are localized to the endoplasmic reticulum membrane. This family is found in eukaryotes and members are typically between and 86 amino acids in length.. +PF15169 Domain of unknown function (DUF4564)
This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C17orf62.. +PF15170 Calcium/calmodulin-dependent protein kinase II inhibitor
Eberhardt RY, Coggill P, Hetherington K. CaM-KIIN is the inhibitor of Calcium/calmodulin-dependent protein kinase II (CaMKII). CaMKII plays a central part in long-term potentiation, which underlies some forms of learning and memory. CaM-KIIN is a natural, specific inhibitor of CaMKII . This family is found in eukaryotes.. +PF15171 Neuropeptide secretory protein family, NPQ, spexin
Eberhardt RY, Coggill P, Hetherington K. Spexin, alternatively named NPQ, is a peptide hormone and is derived from a pro-hormone. This family of proteins has a role in inducing stomach wall contraction and is expressed in the submucosal layer of the mouse oesophagus and stomach. Spexin, like most peptide hormones, is a ligand for G-protein coupled receptors . Spexin is also thought to have a role in controlling arterial blood pressure as well as salt and water balance .. +PF15172 Prolactin-releasing peptide
Eberhardt RY, Coggill P, Hetherington K. +PF15173 FAM180 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 182 amino acids in length. There are two conserved sequence motifs: ELAS and DFE. The function of this family is unknown.. +PF15174 Prion-related protein testis-specific
Eberhardt RY, Coggill P, Hetherington K. PRNT is a family of prion-related proteins expressed in the testis [1,2]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 52 and 94 amino acids in length.. +PF15175 Spermatogenesis-associated protein 24
Eberhardt RY, Coggill P, Hetherington K. This family of proteins bind to DNA and to TBP (TATA box binding protein), TATA-binding protein (TBP)-related protein 2 (TRF2) and several polycomb factors. It is likely to function as a transcription regulator [1-2].. +PF15176 Leucine-rich repeat family 19 TM domain
Eberhardt RY, Coggill P, Hetherington K. LRR19-TM is the single-span transmembrane region of LRRC19, a leucine-rich repeat protein family. LRRC19 functions as a transmembrane receptor inducing pro-inflammatory cytokines. This suggests its role in innate immunity . This family of proteins is found in eukaryotes.. +PF15177 Interleukin-28A
Eberhardt RY, Coggill P, Hetherington K. The protein family, Interleukin-28A, plays an important role in modulating the immune system. This protein family is induced by viral infection and interacts with a class II receptor . This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 195 amino acids in length.. +PF15178 Mitochondrial import receptor subunit TOM5 homolog
Eberhardt RY, Coggill P, Hetherington K. This is a family of transmembrane proteins thought to form part of the pre-protein translocase complex of the outer mitochondrial membrane (TOM complex) . This family of proteins is found in eukaryotes. Proteins in this family are approximately 50 amino acids in length.. +PF15179 Myc target protein 1
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is regulated by the c-Myc oncoprotein. It regulates the expression of several other c-Myc target genes .. +PF15180 Neuropeptides B and W
Eberhardt RY, Coggill P, Hetherington K. The function of this family, NPBW, which includes Neuropeptides B and W, is thought to be involved in activating G-protein coupled receptors, GPR7 and GPR8. It is thought to play a regulatory role in the organisation of neuroendocrine signals accessing the anterior pituitary gland. It is predicted that this effect will stimulate the increase in water-drinking and food-intake. This suggests it plays a role in the hypothalamic response to stress. This family of proteins is found in eukaryotes [1,2].. +PF15181 Spermatid-specific manchette-related protein 1
Eberhardt RY, Coggill P, Hetherington K. This family of proteins, SMRP1, is thought to have a role in spermatogenesis and may be involved in differentiation or function of ciliated cells . This family of proteins is found in eukaryotes. Proteins in this family are typically approximately 260 amino acids in length.. +PF15182 Otospiralin
Eberhardt RY, Coggill P, Hetherington K. This family of proteins, Otospiralin, has a role in maintaining the neurosensory epithelium of the inner ear [1,2]. This family of proteins is found in eukaryotes. Proteins in this family are approximately 90 amino acids in length.. +PF15183 Melanocortin-2 receptor accessory protein family
Eberhardt RY, Coggill P, Hetherington K. This family is thought to be involved in cell trafficking. It is required for MC2R expression in certain cell types, suggesting that it is involved in the processing, trafficking or function of MC2R. MRAP may be involved in the intracellular trafficking pathways in adipocyte cells . This family of proteins is found in eukaryotes. Proteins in this family are typically between 47 and 205 amino acids in length.. +PF15184 Mitochondrial import receptor subunit TOM6 homolog
Eberhardt RY, Coggill P, Hetherington K. TOMM6 forms part of the pre-protein translocase complex of the outer mitochondrial membrane (TOM complex) . This family of proteins is found in eukaryotes. Proteins in this family are typically between 43 and 74 amino acids in length.. +PF15185 Bcl-2-modifying factor, apoptosis
Eberhardt RY, Coggill P, Hetherington K. BMF is thought to play a role in inducing apoptosis. It is thought to bind to Bcl-2 proteins . This family of proteins is found in eukaryotes. Proteins in this family are typically between 75 and 190 amino acids in length. There are two conserved sequence motifs: GNA and DQF.. +PF15186 Testis-expressed sequence 13 protein family
Eberhardt RY, Coggill P, Hetherington K. The function of this family of proteins has not, as yet, been determined. However, members are thought to be encoded for by spermatogonially-expressed, germ-cell-specific genes . This family of proteins is found in eukaryotes. Proteins in this family are typically between 177 and 384 amino acids in length. There are two conserved sequence motifs: FIN and LAL.. +PF15187 Oesophageal cancer-related gene 4
Eberhardt RY, Coggill P, Hetherington K. Augurin is alternatively named oesophageal cancer-related gene 4 protein. The function of this family of transmembrane proteins, is to induce the senescence of oligodendrocyte and neural precursor cells, characterised by G1 arrest, RB1 dephosphorylation and accelerated CCND1 and CCND3 proteasomal degradation . Augurin has been found to stimulate the release of ACTH via the release of hypothalamic CRF . This family of proteins is found in eukaryotes. Proteins in this family are typically 145 amino acids in length.. +PF15188 Coiled-coil domain-containing protein 167
Eberhardt RY, Coggill P, Hetherington K. The function of this family of coiled-coil domains, has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 103 amino acids in length.. +PF15189 Domain of unknown function (DUF4582)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 126 and 788 amino acids in length. In humans, it is encoded for on the chromosomal position, C17orf104.. +PF15190 Domain of unknown function (DUF4583)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins, also known as UPF0694, is found in eukaryotes. Proteins in this family are around 135 amino acids in length. In humans, it is found on the chromosomal position, C14orf109.. +PF15191 Synaptonemal complex central element protein 3
Eberhardt RY, Coggill P, Hetherington K. +PF15192 TMEM213 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 154 amino acids in length. The function of this family is unknown.. +PF15193 FAM24 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 101 amino acids in length. There are two conserved sequence motifs: FDLRT and CLY. The function of this family is unknown.. +PF15194 TMEM191C family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 302 amino acids in length. There are two conserved sequence motifs: QDC and RLF. The function of this family is unknown.. +PF15195 TMEM210 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 149 amino acids in length. The function of this family is unknown.. +PF15196 Activator of apoptosis harakiri
Eberhardt RY, Coggill P, Hetherington K. +PF15197 Leukemia-associated protein 2
Eberhardt RY, Coggill P, Hetherington K. +PF15198 Dexamethasone-induced
Eberhardt RY, Coggill P, Hetherington K. +PF15199 D-amino acid oxidase activator
Eberhardt RY, Coggill P, Hetherington K. +PF15200 Keratinocyte differentiation-associated
Eberhardt RY, Coggill P, Hetherington K. +PF15201 Progressive rod-cone degeneration
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is involved in vision .. +PF15202 Adipogenin
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is involved in the stimulation of adipocyte differentiation and development .. +PF15203 TMEM95 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 102 and 231 amino acids in length. There is a conserved LGG sequence motif. The function of this family is unknown.. +PF15204 Kita-kyushu lung cancer antigen 1
Eberhardt RY, Coggill P, Hetherington K. This is a family of cancer antigens .. +PF15205 Placenta-specific protein 9
Eberhardt RY, Coggill P, Hetherington K. This family of proteins was identified as being enriched in placenta .. +PF15206 FAM209 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 170 amino acids in length. The function of this family is unknown.. +PF15207 TMEM240 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 54 and 175 amino acids in length. The function of this family is unknown.. +PF15208 Rab15 effector
Eberhardt RY, Coggill P, Hetherington K. This family of proteins has a role in receptor recycling from the endocytic recycling compartment .. +PF15209 Interleukin 31
Eberhardt RY, Coggill P, Hetherington K. +PF15210 Surfactant-associated protein 2
Eberhardt RY, Coggill P, Hetherington K. +PF15211 VEGF co-regulated chemokine 1
Eberhardt RY, Coggill P, Hetherington K. +PF15212 Spermatogenesis-associated protein 19, mitochondrial
Eberhardt RY, Coggill P, Hetherington K. +PF15213 CMT1A duplicated region transcript 4 protein
Eberhardt RY, Coggill P, Hetherington K. +PF15214 Peroxisomal testis-specific protein 1
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is testis-specific .. +PF15215 Follicular dendritic cell secreted peptide
Eberhardt RY, Coggill P, Hetherington K. +PF15216 Thymic stromal lymphopoietin
Eberhardt RY, Coggill P, Hetherington K. +PF15217 TSC21 family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is testis-specific .. +PF15218 Spermatogenesis-associated protein 25
Eberhardt RY, Coggill P, Hetherington K. This family of proteins may be involved in spermatogenesis .. +PF15219 Testis-expressed 12
Eberhardt RY, Coggill P, Hetherington K. +PF15220 Hypoxia-inducible lipid droplet-associated
Eberhardt RY, Coggill P, Hetherington K. This family of proteins stimulate intracellular lipid accumulation, function as autocrine growth factors and enhance cell growth [1-2].. +PF15221 Lens epithelial cell protein LEP503
Eberhardt RY, Coggill P, Hetherington K. This protein may be involved in lens epithelial cell differentiation [1-2].. +PF15222 Kidney androgen-regulated
Eberhardt RY, Coggill P, Hetherington K. The function of this family is unknown.. +PF15223 Domain of unknown function (DUF4584)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are approximately 835 amino acids in length. The family is found in association with Pfam:PF02437.. +PF15224 Scrapie-responsive protein 1
Eberhardt RY, Coggill P, Hetherington K. This protein family has an important function in acting against the prion protein, Scrapie [1,2].This family of proteins is found in eukaryotes. Proteins in this family are approximately 98 amino acids in length.. +PF15225 Interleukin 32
Eberhardt RY, Coggill P, Hetherington K. +PF15226 HCF-1 beta-propeller-interacting protein family
Eberhardt RY, Coggill P, Hetherington K. HPIP is a small cellular polypeptide that binds to the beta-propeller domain of HCF-1. HPIP regulates HCF-1 activity by modulating its subcellular localisation. HCF-1 is a cellular protein required by VP16 to activate the herpes simplex virus- immediate-early genes. VP16 is a component of the viral tegument and, after release into the cell, binds to HCF-1 and translocates to the nucleus to form a complex with the POU domain protein Oct-1 and a VP16-responsive DNA sequence. HPIP-mediated export may provide the pool of cytoplasmic HCF-1 required for import of virion-derived VP16 into the nucleus .. +PF15227 zinc finger of C3HC4-type, RING
This is a family of primate-specific Ret finger protein-like (RFPL) zinc-fingers of the C3HC4 type. Ret finger protein-like proteins are primate-specific target genes of Pax6, a key transcription factor for pancreas, eye and neocortex development . This domain is likely to be DNA-binding . This zinc-finger domain together with the RDM domain, Pfam:PF11002, forms a large zinc-finger structure of the RING/U-Box superfamily. RING-containing proteins are known to exert an E3 ubiquitin protein ligase activity with the zinc-finger structure being mandatory for binding to the E2 ubiquitin-conjugating enzyme .. +PF15228 Death-associated protein
Eberhardt RY, Coggill P, Hetherington K. +PF15229 POM121 family
Eberhardt RY, Coggill P, Hetherington K. +PF15230 Serine/arginine repetitive matrix protein C-terminus
Eberhardt RY, Coggill P, Hetherington K. This domain is found near to the C-terminus of Serine/arginine repetitive matrix proteins 3 and 4.. +PF15231 Variable charge X/Y family
Eberhardt RY, Coggill P, Hetherington K. The variable charge X/Y (VCX/VCY) family of proteins has members on the Human X and Y chromosomes, is expressed in male germ calls and may play a role in spermatogenesis or in sex ratio distortion .. +PF15232 Domain of unknown function (DUF4585)
Eberhardt RY, Coggill P, Hetherington K. The function of this protein domain family is yet to be characterised. It is putatively thought to lie in the C-terminal domain of the DNA nucleotide repair protein, Xeroderma pigmentosa complementation group A (XPA). The function of XPA is to bind to DNA and repair any mismatched base pairs. This domain family is often found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved DPE sequence motif. In humans, this protein is encoded for in the chromosomal position, Chromosome 5 open reading frame 65. Mutations in the gene lead to myelodysplastic syndromes, where there is inefficient stem cell production in the bone marrow. This suggests that the protein may have a role in forming blood cells .. +PF15233 Synaptonemal complex central element protein 1
Eberhardt RY, Coggill P, Hetherington K. This family of proteins includes synaptonemal complex central element protein 1, a component of the synaptonemal complex involved in meiosis, and synaptonemal complex central element protein 1-like, which may be involved in meiosis [1-2].. +PF15234 Linker for activation of T-cells
Eberhardt RY, Coggill P, Hetherington K. +PF15235 G protein-regulated inducer of neurite outgrowth C-terminus
Eberhardt RY, Coggill P, Hetherington K. This represents the C-terminus of the G protein-regulated inducer of neurite outgrowth proteins .. +PF15236 Coiled-coil domain-containing protein 66
Eberhardt RY, Coggill P, Hetherington K. This protein family, named Coiled-coil domain-containing protein 66 (CCDC) refers to a protein domain found in eukaryotes, and is approximately 160 amino acids in length. CCDC66 protein is detected mainly in the inner segments of photoreceptors in many vertebrates including mice and humans. It has been found in dogs, that a mutation in the CCDC66 gene causes generalized progressive retinal atrophy (gPRA). This shows that the protein encoded for by this gene is vital for healthy vision and guards against photoreceptor cell degeneration. The structure of CCDC66 proteins includes a heptad repeat pattern which contains at least one coiled-coil domain. There are at least two or more alpha-helices which form a cable-like structure .. +PF15237 PTRF/SDPR family
Eberhardt RY, Coggill P, Hetherington K. This family of proteins includes muscle-related coiled-coil protein (MURC), protein kinase C delta-binding protein (PRKCDBP), polymerase I and transcript release factor (PTRF) and serum deprivation-response protein (SDPR). MURC activates the Rho/ROCK pathway . PRKCDBP appears to act as an immune potentiator . PTRF is involved in caveolae formation and function . SDPR is involved in the targetting of protein kinase Calpha to caveolae .. +PF15238 FAM181
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 256 and 426 amino acids in length.. +PF15239 Domain of unknown function (DUF4586)
Eberhardt RY, Coggill P, Hetherington K. This protein family, refers to a domain of unknown function. The precise role of this protein domain remains to be elucidated. This family of proteins is found in eukaryotes and are typically between 256 and 320 amino acids in length. There is a single completely conserved residue, phenylalanine (F), that may be functionally important. In humans, the protein is found in the position, chromosome 4 open reading frame 47.. +PF15240 Proline-rich
Eberhardt RY, Coggill P, Hetherington K. This family includes several eukaryotic proline-rich proteins.. +PF15241 Cylicin N-terminus
Eberhardt RY, Coggill P, Hetherington K. This is the N-terminus of cylicin proteins, which may play a role in spermatid differentiation .. +PF15242 Family of FAM53
Eberhardt RY, Coggill P, Hetherington K. The FAM53 protein family refers to a family of proteins, which bind to a transcriptional regulator that modulates cell proliferation . It is known to be highly important in neural tube development . It is found in eukaryotes and is typically between 303 and 413 amino acids in length.. +PF15243 Anaphase-promoting complex subunit 15
Eberhardt RY, Coggill P, Hetherington K. This is a component of the anaphase promoting complex/cyclosome .. +PF15244 Hydroxy-steroid dehydrogenase
Eberhardt RY, Coggill P, Hetherington K. This family also goes by the name of Spermatogenesis-associated protein 7 or SPAT7. It is an aldo-keto reductase (AKR) human type 3 3-alpha-hydroxy-steroid dehydrogenase (H3-alpha-HSD3, AKR1C2), and it plays a crucial role in the regulation of the intracellular concentrations of testosterone and 5-alpha-dihydrotestosterone (5-alpha-DHT), two steroids directly linked to the aetiology and the progression of many prostate diseases and cancer [1,2]. Mutations in the gene cause Leber congenital amaurosis (LCA) and juvenile retinitis pigmentosa (RP), the most common hereditary causes of visual impairment in infants and children .. +PF15245 Transcription cofactor vestigial-like protein 4
Eberhardt RY, Coggill P, Hetherington K. These proteins act as transcriptional enhancer factor (TEF-1) cofactors .. +PF15246 Nck-associated protein 5, Peripheral clock protein
Eberhardt RY, Coggill P, Hetherington K. NCKAP5 is short for Nck-associated protein 5, which is also known as the Peripheral clock protein. NCKAP5 is a protein family, which interacts with the SH3-containing region of the adaptor protein Nck. Nck is a protein that interacts with receptor tyrosine kinases and guanine nucleotide exchange factor Sos. The role of Nck can be thought of as similar to Grb2. The role of NCKAP5 is to assist Nck with its adaptor protein role .. +PF15247 Histone RNA hairpin-binding protein RNA-binding domain
Eberhardt RY, Coggill P, Hetherington K. This family represents the RNA-binding domain of histone RNA hairpin-binding protein .. +PF15248 Domain of unknown function (DUF4587)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function. The precise function of this protein domain remains to be elucidated. This domain family is found in eukaryotes, and is typically between 64 and 79 amino acids in length. There are two conserved sequence motifs: QNAQ and HHH. In humans, it is found in the position, chromosome 21 open reading frame 58.. +PF15249 Glioma tumor suppressor candidate region
Eberhardt RY, Coggill P, Hetherington K. This domain family is found in eukaryotes, and is typically between 105 and 124 amino acids in length. There is a single completely conserved residue F that may be functionally important. Mutations in the gene for this protein in humans leads to the development of oligodendrogliomas . There is evidence that these protein interacts with SH3 domains .. +PF15250 Raftlin
Eberhardt RY, Coggill P, Hetherington K. This family of proteins plays a role in the formation and/or maintenance of lipid rafts .. +PF15251 Domain of unknown function (DUF4588)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 200 and 274 amino acids in length. There is a conserved LYK sequence motif. There is a single completely conserved residue A that may be functionally important.. +PF15252 Domain of unknown function (DUF4589)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function. The precise function of the protein domain remains to be elucidated. This family of proteins is found in eukaryotes and are typically between 215 and 293 amino acids in length. The protein contains two conserved sequence motifs: SSS and KST.. +PF15253 SCL-interrupting locus protein N-terminus
Eberhardt RY, Coggill P, Hetherington K. +PF15254 Coiled-coil domain-containing protein 14
Eberhardt RY, Coggill P, Hetherington K. This protein family, Coiled-coil domain-containing protein 14 (CCDC14) is a domain of unknown function. This family of proteins is found in eukaryotes. Proteins in this family are typically between 301 and 912 amino acids in length.. +PF15255 WASH complex subunit CAP-Z interacting, central region
Eberhardt RY, Coggill P, Hetherington K. This domain is found on WASH complex subunits FAM21 and CAP-ZIP proteins, as well as on VPEF (vaccinia virus penetration factor). This family of proteins is found in eukaryotes. Proteins in this family are typically between 305 and 1321 amino acids in length. The exact function of this region is not known.. +PF15256 SPATIAL
Eberhardt RY, Coggill P, Hetherington K. SPATIAL (stromal protein associated with thymii and lymph node) proteins may be involved in spermatid differentiation .. +PF15257 Domain of unknown function (DUF4590)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins remains to be characterised and is a domain of unknown function. This domain family is found in eukaryotes, and is approximately 120 amino acids in length. There are two conserved sequence motifs: CCE and PCY. In humans, the gene encoding this protein lies in the position, chromosome 1 open reading frame 173.. +PF15258 Protein family of FAM222A
Eberhardt RY, Coggill P, Hetherington K. This protein family, FAM222A are a domain of unknown function. This family of proteins is found in eukaryotes and are typically between 411 and 562 amino acids in length. In humans, the gene encoding this protein domain lies in the position, chromosome 12 open reading frame 34.. +PF15259 G-2 and S-phase expressed 1
Eberhardt RY, Coggill P, Hetherington K. This family is the N-terminus of GTSE1 proteins. GTSE-1 (G2 and S phase-expressed-1) protein is specifically expressed during S and G2 phases of the cell cycle. It is mainly localised to the microtubules and when overexpressed delays the G2 to M transition. the full protein negatively regulates p53 transactivation function, protein levels, and p53-dependent apoptosis. This domain family is found in eukaryotes, and is approximately 140 amino acids in length. There is a conserved FDFD sequence motif.. +PF15260 Protein family FAM219A
Eberhardt RY, Coggill P, Hetherington K. This protein family, FAM219A is a domain of unknown function. This protein family has been found in eukaryotes. Proteins in this family are typically between 144 and 191 amino acids in length. There are two conserved sequence motifs: QLL and LDE.. +PF15261 Domain of unknown function (DUF4591)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function. It is found in eukaryotes, and is approximately 120 amino acids in length. In humans, the gene encoding this protein lies in the position chromosome 11 open reading frame 63.. +PF15262 Domain of unknown function (DUF4592)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function, which lies to the N-terminus of the protein. This domain family is found in eukaryotes, and is typically between 114 and 130 amino acids in length. There are two completely conserved residues (L and A) that may be functionally important. In humans, the gene that encodes this protein lies in the position, chromosome 2 open reading frame 55.. +PF15263 Domain of unknown function (DUF4593)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a putative uncharacterised protein family. Its existence is uncertain and its precise function is unknown. This family of proteins is thought to be found in eukaryotes. Proteins in this family are estimated to be around 155 amino acids in length.. +PF15264 Tumour suppressing sub-chromosomal transferable candidate 4
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is expressed from a gene cluster where in humans the TSSC4 gene is not imprinted [1,2]. This same cluster is associated with the Beckwith-Wiedermann syndrome . This domain family is found in eukaryotes, and is typically between 120 and 147 amino acids in length. There is a conserved YSL sequence motif.. +PF15265 FAM186A;
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function. This family of proteins is found in eukaryotes and are typically between 441 and 534 amino acids in length.. +PF15266 Domain of unknown function (DUF4594)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function. The protein family is found in eukaryotes, and is typically between 170 and 183 amino acids in length.In humans, the gene encoding this protein lies in the position, chromosome 15 open reading frame 52.. +PF15268 Dapper
Eberhardt RY, Coggill P, Hetherington K. This is a family of signalling proteins [1-2]. They act in a diverse range of signaling pathways and have a range of binding partners. They act as homo- and heterodimers .. +PF15269 Zinc-finger
Eberhardt RY, Coggill P, Hetherington K. this is a family of eukaryotic zinc-fingers.. +PF15270 Metallo-carboxypeptidase inhibitor
Pfam-B_261362 (release 26.0). ACI44, a metallo-carboxypeptidase inhibitor, is one member of a battery of selective inhibitors protecting roundworms of the genus Ascaris, common parasites of the human gastrointestinal tract, from host enzymes and the immune system .. +PF15271 Spindle pole body component BBP1, Mps2-binding protein
Pfam-B_31027 (release 26.0). This N-terminal domain of BBP1, a spindle pole body component, interacts directly, though transiently, with the polo-box domain of Cdc5p. full length BBP1 localises at the cytoplasmic side of the central plaque periphery of the spindle pole body (SPB) and plays an important role in inserting a duplication plaque into the nuclear envelope and assembling a functional inner plaque . Although not a membrane protein itself, BBP1 binds to Mps2 as well as to Spc29 and the half-bridge protein Kar1, thus providing a model for how the SPB core is tethered within the nuclear envelope and to the half-bridge .. +PF15272 Spindle pole body component BBP1, C-terminal
Pfam-B_58229 (release 26.0). This C-terminal domain of BBP1, a spindle pole body component, carries coiled-coils that are necessary for the localisation of BBP1 to the spindle pole body (SPB) . Although not a membrane protein itself, BBP1 binds to Mps2 as well as to Spc29 and the half-bridge protein Kar1, thus providing a model for how the SPB core is tethered within the nuclear envelope and to the half-bridge . +PF15273 NHS-like
Eberhardt RY, Coggill P, Hetherington K. This family of proteins includes Nance-Horan syndrome protein (NHS) .. +PF15274 Muscular LMNA-interacting protein
Eberhardt RY, Coggill P, Hetherington K. MLIP is a Muscle-enriched A-type Lamin-interacting Protein, an innovation of amniotes, and is expressed ubiquitously and most abundantly in heart, skeletal, and smooth muscle. MLIP interacts directly and co-localises with lamin A and C in the nuclear envelope. MLIP also co-localises with promyelocytic leukemia (PML) bodies within the nucleus. PML, like MLIP, is only found in amniotes, suggesting that a functional link between the nuclear envelope and PML bodies may exist through MLIP .. +PF15275 PEHE domain
Eberhardt RY, Coggill P, Hetherington K. This domain was first identified in drosophila MSL1 (male-specific lethal 1) . In drosophila it binds to the histone acetyltransferase males-absent on the first protein (MOF) and to protein male-specific lethal-3 (MSL3) [2-3].. +PF15276 Protein phosphatase 1 binding
Eberhardt RY, Coggill P, Hetherington K. This domain contains a protein phosphatase 1 (PP1) binding site .. +PF15277 Exocyst complex component SEC3 N-terminal PIP2 binding PH
This is the N-terminal domain of fungal and eukaryotic Sec3 proteins. Sec3 is a component of the exocyst complex that is involved in the docking of exocytic vesicles with fusion sites on the plasma membrane.This N-terminal domain contains a cryptic pleckstrin homology (PH) fold, and all six positively charged lysine and arginine residues in the PH domain predicted to bind the PIP2 head group are conserved. The exocyst complex is essential for many exocytic events, by tethering vesicles at the plasma membrane for fusion. In fission yeast, polarised exocytosis for growth relies on the combined action of the exocyst at cell poles and myosin-driven transport along actin cables .. +PF15278 Sec3 exocyst complex subunit
This small Sec3 C-terminal domain family is based around the fission yeast protein, and is rather shorter than the budding yeast/vertebrate domain Sec3_C, family. Pfam:PF09763. In fact it is only this coiled-coil region that they carry in common. The full length fission yeast, UniProtKB:Q10324, protein Sec3 is redundant with Exo70 for viability and for the localisation of other exocyst subunits, suggesting that these components act as exocyst tethers at the plasma membrane. Sec3, Exo70 and Sec5 are transported by the myosin V Myo52 along actin cables. The exocyst holo-complex, including Sec3 and Exo70, is present on exocytic vesicles, which can reach cell poles by either myosin-driven transport or random walk .. +PF15279 Sine oculis-binding protein
Eberhardt RY, Coggill P, Hetherington K. SOBP is associated with syndromic and nonsyndromic intellectual disability. It carries a zinc-finger of the zf-C2H2 type at the N-terminus, and a highly characteristic C-terminal PhPhPhPhPhPh motif. The deduced 873-amino acid protein contains an N-terminal nuclear localisation signal (NLS), followed by 2 FCS-type zinc finger motifs, a proline-rich region (PR1), a putative RNA-binding motif region, and a C-terminal NLS embedded in a second proline-rich motif. SOBP is expressed in various human tissues, including developing mouse brain at embryonic day 14. In postnatal and adult mouse brain SOBP is expressed in all neurons, with intense staining in the limbic system. Highest expression is in layer V cortical neurons, hippocampus, pyriform cortex, dorsomedial nucleus of thalamus, amygdala, and hypothalamus. Postnatal expression of SOBP in the limbic system corresponds to a time of active synaptogenesis . the family is also referred to as Jackson circler, JXC1. In seven affected siblings from a consanguineous Israeli Arab family with mental retardation, anterior maxillary protrusion, and strabismus mutations were found in this protein [1,2].. +PF15280 Protein aurora borealis N-terminus
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is required for the activation of the protein kinase Aurora-A .. +PF15281 Consortin C-terminus
Eberhardt RY, Coggill P, Hetherington K. Consortin is a trans-Golgi network cargo receptor involved in targeting connexins to the plasma membrane .. +PF15282 BMP-2-inducible protein kinase C-terminus
Eberhardt RY, Coggill P, Hetherington K. This family represents the C-terminus of BMP2K and related proteins [1-2].. +PF15283 Domain of unknown function (DUF4595) with porin-like fold
JCSG target SP16885A/PDB 4ghb. Large family of predicted secreted proteins mostly from CFG group, but also from Burkholderia, Pseudomonas and Streptomyces. Function of these proteins is not known. A 3D structure of a representative of this family from Bacteroides uniformis was solved by JCSG and deposited to PDB as 4ghb. There is some overlap with RHS-repeat (PF05593) family despite lack of obvious repeats in the structure. +PF15284 Phage-encoded virulence factor
Pfam-B_45688 (release 26). PAGK represents a new of virulence factors that is translocated into the host cytoplasm via bacterial outer membrane vesicles (OMV). Members are small proteins composed of ¡­70 amino acids. In Salmonella they are secreted independently of the SPI-2 type-III secretion system, T3SS. The OMV functions as a vehicle for transferring virulence determinants to the cytoplasm of the infected host cell. OMVs are released from the cell envelopes of Gram-negative bacteria and comprise a variety of outer membrane and periplasmic constituents, including proteins, phospholipids, lipopolysaccharides, and DNA .. +PF15285 Beclin-1 BH3 domain, Bcl-2-interacting
The BH3 domain is a short motif known to bind to Bcl-xLs. This interaction is important in apoptosis.. +PF15286 Apoptosis regulator M11, B cell 2 leukaemia/lymphoma like
Pfam:PF02180. Bcl-2_3 is a small family of eukaryotic proteins associated with autophagy. The family is found in association with Pfam:PF00452,. +PF15287 KRBA1 family repeat
KRBA1 is a short repeating motif found in mammalian proteins. It is characterised by a highly conserved sequence of residues, SSPLxxLxxCLK. The function of the repeat, which can be present in up to seven copies, is unknown as is the function of the full length proteins.. +PF15288 Zinc knuckle
This Zinc knuckle is found in FAM90A mammalian proteins.. +PF15289 Regulatory factor X-associated C-terminal binding domain
This C-terminal domain of Regulatory factor X-associated protein binds to RFXANK [1,2], the Ankyrin-repeat regulatory factor X proteins. RFXA is part of the RFX complex, Mutants of either RFXAP or RFXANK protein fail to bind to each other. RFX5 binds only to the RFXANK-RFXAP scaffold and not to either protein alone, and neither the scaffold nor RFX5 alone can bind DNA. The binding of the RFXANK-RFXAP scaffold to RFX5 leads to a conformational change in the latter that exposes the DNA-binding domain of RFX5. The DNA-binding domain of RFX5 anchors the RFX complex to MHC class II X and S promoter boxes .. +PF15290 Golgi-localised syntaxin-1-binding clamp
Eberhardt RY, Coggill P, Hetherington K. Syntaphilin or Syntabulin is a family of eukaryotic proteins. Syntaphilin binds to syntaxin-1 thereby inhibiting SNARE complex formation by absorbing free syntaxin-1. So it is a syntaxin-1 clamp that controls SNARE assembly.. +PF15291 Dermcidin, antibiotic peptide
Dermcidin is a family of peptides produced in the sweat to protect against pathogenic Gram-positive bacteria.. +PF15292 Treslin N-terminus
Eberhardt RY, Coggill P, Hetherington K. This family represents the N-terminus of treslin, a checkpoint regulator which plays a role in DNA replication preinitiation complex formation [1-2].. +PF15293 Nuclear fragile X mental retardation-interacting protein 2
Eberhardt RY, Coggill P, Hetherington K. +PF15294 Leucine zipper
Eberhardt RY, Coggill P, Hetherington K. This family includes Leucine zipper transcription factor-like protein 1 (LZTFL1) and Leucine zipper protein 2 (LUZP2) .. +PF15295 Coiled-coil domain-containing protein 50 N-terminus
Eberhardt RY, Coggill P, Hetherington K. +PF15296 Codanin-1 C-terminus
Eberhardt RY, Coggill P, Hetherington K. This domain is found near to the C-terminus of codanin-1 . . +PF15297 Cytoskeleton-associated protein 2 C-terminus
Eberhardt RY, Coggill P, Hetherington K. This family includes the C-terminus of CKAP2 and CKAP2L. CKAP2 is a microtubule associated protein which stabilises microtubules .. +PF15298 AJAP1/PANP C-terminus
Eberhardt RY, Coggill P, Hetherington K. This family includes the C-terminus of adherens junction-associated protein 1 (AJAP1) and of PILR-associating neural protein (PANP). AJAP1 inhibits cell adhesion and migration . PANP is a ligand for the immune inhibitory receptor paired immunoglobulin-like type 2 receptor alpha .. +PF15299 Amyotrophic lateral sclerosis 2 chromosomal region candidate gene 8
Eberhardt RY, Coggill P, Hetherington K. This domain is found in amyotrophic lateral sclerosis 2 chromosomal region candidate gene 8 protein .. +PF15300 INTS6/SAGE1/DDX26B/CT45 C-terminus
Eberhardt RY, Coggill P, Hetherington K. This domain is found at the C-terminus of integrator complex subunit 6 (INTS6), sarcoma antigen 1 (SAGE1), protein DDX26B (DDX26B) and members of the cancer/testis antigen family 45.. +PF15301 SLAIN motif-containing family
Eberhardt RY, Coggill P, Hetherington K. The SLAIN motif containing family is named after the presence of a SLAIN motif in SLAIN1 . They are a family of microtubule plus-end tracking proteins .. +PF15302 P33 mono-oxygenase
Eberhardt RY, Coggill P, Hetherington K. This family of proteins contains a flavine-containing mono-oxygenase motif. It may have a role in the regulation of neuronal survival, differentiation and axonal outgrowth .. +PF15303 E3 ubiquitin-protein ligase Arkadia N-terminus
Eberhardt RY, Coggill P, Hetherington K. This domain is found at the N-terminus of E3 ubiquitin-protein ligase Arkadia .. +PF15304 A-kinase anchor protein 2 C-terminus
Eberhardt RY, Coggill P, Hetherington K. This family includes the C-terminus of A-kinase anchor protein 2 (AKAP2). It includes the site where the regulatory subunits (RII) of protein kinase AII binds .. +PF15305 Intraflagellar transport protein 43
Eberhardt RY, Coggill P, Hetherington K. Intraflagellar transport protein 43 (IFT43) is a subunit of the IFT complex A (IFT-A) machinery of primary cilia .. +PF15306 LIN37
Eberhardt RY, Coggill P, Hetherington K. LIN37 is a component of the DREAM (or LINC) complex which represses cell cycle-dependent genes in quiescent cells and plays a role in the cell cycle-dependent activation of G2/M genes [1-2].. +PF15307 Sperm acrosome-associated protein 7
SPACA7 is a family of eukaryotic proteins expressed in the testes. Proteins in this family are typically between 104 and 195 amino acids in length. There is a conserved DEIL sequence motif. The function is not known.. +PF15308 CEP170 C-terminus
Eberhardt RY, Coggill P, Hetherington K. This family includes the C-terminus of centrosomal protein of 170 kDa (CEP170) .. +PF15309 ALMS motif
Eberhardt RY, Coggill P, Hetherington K. This domain is found at the C-terminus of Alstrom syndrome protein 1 (ALMS1), KIAA1731 and C10orf90 [1-2].. +PF15310 Vitamin A-deficiency (VAD) rat model signalling
VAD1-2 is a family of proteins found in eukaryotes. The family is expressed in testes and is involved in signalling during spermatogenesis.. +PF15311 Hydrolethalus syndrome protein 1 C-terminus
Eberhardt RY, Coggill P, Hetherington K. +PF15312 Junctional sarcoplasmic reticulum protein
JSRP, junctional sarcoplasmic reticulum protein 1, or junctional-face membrane protein of 45 kDa homologue, is a family of eukaryotic proteins. The family is to the junctional face membrane of the skeletal muscle sarcoplasmic reticulum (SR); it colocalises with its Ca2+-release channel (the ryanodine receptor), and interacts with calsequestrin and the skeletal-muscle dihydro-pyridine receptor Cav1. It is key for the functional expression of voltage-dependent Ca2+ channels.. +PF15313 Hexamethylene bis-acetamide-inducible protein
Eberhardt RY, Coggill P, Hetherington K. HEXIM is a transcriptional regulator that functions as a general RNA polymerase II transcription inhibitor. In cooperation with 7SK snRNA it sequesters P-TEFb in a large inactive 7SK snRNP complex preventing RNA polymerase II phosphorylation and subsequent transcriptional elongation. HEXIM may also regulate NF-kappa-B, ESR1, NR3C1 and CIITA-dependent transcriptional activity.. +PF15314 Proline-rich acidic protein 1, pregnancy-specific uterine
PRAP, or proline-rich acidic protein 1, is a family of eukaryotic proteins. PRAP is abundantly expressed in the epithelial cells of the human liver, kidney, gastrointestinal tract, and cervix. It is significantly down-regulated in hepatocellular carcinoma and right colon adenocarcinoma compared with the respective adjacent normal tissues. In the mouse it is expressed in the epithelial cells of the mouse and rat gastrointestinal tracts, and pregnant mouse uterus. This article describes the isolation, distribution, and functional characterization of the human homologue. PRAP was abundantly expressed in the epithelial cells of the human liver, kidney, gastrointestinal tract, and cervix. PRAP plays an important role in maintaining normal growth suppression .. +PF15315 Facioscapulohumeral muscular dystrophy candidate 2
This family of proteins is found in eukaryotes. The family is localised close to the D4Z4 repeats on chromosome 4 and 10 that are associated with the autosomal dominant facioscapulohumeral muscular dystrophy (FSHD). FRG2 are transcriptionally upregulated in FSHD myoblast cultures suggesting involvement in the pathogenesis of FSHD .. +PF15316 MyoD family inhibitor
Eberhardt RY, Coggill P, Hetherington K. Members of this family inhibits the transactivation activity of the MyoD family of myogenic factors . They affect axin-mediated regulation of the Wnt and JNK signaling pathways , and regulate expression from viral promoters .. +PF15317 Cardiac transcription factor regulator, Developmental protein
Eberhardt RY, Coggill P, Hetherington K. The family of proteins are cardiac transcription regulators, named Lbh, short for Limb, bud and heart. They regulate embryological development in the heart . More specifically, in humans, they may act as transcriptional activators in MAPK signaling pathway to mediate cellular functions . This family of proteins is found in eukaryotes. Proteins in this family are typically between 92 and 116 amino acids in length.. +PF15318 Putative Bcl-2 like protein of testis
This family of proteins is found in eukaryotes. The family may represent a set of Bcl-2-like proteins involved in apoptosis, see UniProt:Q9BQM9.. +PF15319 RAD9, RAD1, HUS1-interacting nuclear orphan protein
RHINO, or RAD9, RAD1, HUS1-interacting nuclear orphan, is a family of eukaryotic proteins . Under genotoxic stresses such as ionizing radiation during the S phase, RHINO plays a role in DNA damage response signalling. It is recruited to sites of DNA damage through interaction with the 9-1-1 cell-cycle checkpoint response complex and TOPBP1 in a ATR-dependent (ataxia telangiectasia and Rad3-related) manner. It is required for the progression of the G1 to S phase transition of breast cancer cells, and it is known to play a role in the stimulation of CHEK1 phosphorylation. It interacts with RAD9A, RAD18, TOPBP1 and UBE2N .. +PF15320 mRNA cap methylation, RNMT-activating mini protein
This family of proteins is found in eukaryotes. Proteins in this family are typically between 102 and 154 amino acids in length. There is a single completely conserved residue D that may be functionally important. RAM is a family of eukaryotic proteins that are an obligate component of the mammalian cap methyltransferase, RNMT (RNA guanine-7 methyltransferase). RAM consists of an N-terminal RNMT-activating domain and a C-terminal RNA-binding domain. Either RAM or RNMT independently have rather weak binding affinity for RNA, but together their RNA affinity is significantly increased. RAM is necessary for efficient cap methylation, maintaining mRNA expression levels, for mRNA translation and for cell viability.. +PF15321 ATPase family AAA domain containing 4
ATAD4 is a family of proteins is found in eukaryotes. The family is also known as PRR15L, or proline-rich 15-like. ATAD4 is expressed almost exclusively in post-mitotic cells both during foetal development and in adult tissues, such as the intestinal epithelium and the testis. Its expression in mouse and human gastrointestinal tumours is linked, directly or indirectly, to the disruption of the Wnt signaling pathway.. +PF15322 Protein missing in infertile sperm 1, putative
This family of proteins is found in eukaryotes. Proteins in this family are typically between 249 and 341 amino acids in length.. +PF15323 Developmental protein
Eberhardt RY, Coggill P, Hetherington K. This family of proteins are found in eukaryotes. These proteins have an important role to play in developmental biology, particularly embryogenesis. It plays an important role in cell survival and axial pattern . It is also thought to be a crucial subunit in the tRNA splicing ligase complex . Proteins in this family are typically between 141 and 232 amino acids in length. There are two conserved sequence motifs: HPE and PQR.. +PF15324 Hedgehog signalling target
Eberhardt RY, Coggill P, Hetherington K. TALPID3 is a family of eukaryotic proteins that are targets for Hedgehog signalling. Mutations in this gene noticed first in chickens lead to multiple abnormalities of development.. +PF15325 Modulator of retrovirus infection
MRI, or modulator of retrovirus infection, is a family of eukaryotic proteins that regulate the activity of the proteasome in the uncoating of retroviruses .. +PF15326 Testis expressed sequence 15
TEX15 is a family of eukaryotic proteins that is required for chromosomal synapsis and meiotic recombination. TEX15 regulates the loading of DNA repair proteins onto sites of double-stranded-breaks and, thus, its absence causes a failure in meiotic recombination . Two polymorphisms in the TEX15 gene could be considered the genetic risk factors for spermatogenic failure in the Chinese Han population .. +PF15327 Tankyrase binding protein C terminal domain
Eberhardt RY, Coggill P, Hetherington K. This protein domain family is found at the C-terminal end of the Tankyrase binding protein in eukaryotes. The precise function of this protein is still unknown. However, it is known interacts with the enzyme tankyrase, a telomeric poly(ADP-ribose) polymerase, by binding to it. Tankyrin catalyses poly(ADP-ribose) chain formation onto proteins. More specifically, it binds to the ankyrin domain in tankyrase . The protein domain is approximately 170 amino acids in length and contains two conserved sequence motifs: FPG and LKA.. +PF15328 Putative GRINL1B complex locus protein 2
Eberhardt RY, Coggill P, Hetherington K. This protein family is named Putative GRINL1B complex locus protein 2. GRINL1B is short for: glutamate receptor, ionotropic, N-methyl D-aspartate-like 1B. The name indicates what sort of receptor it is thought to be, a ligand gated ion channel specific to the neurotransmitter Glutamate. This family of proteins is found in eukaryotes. Proteins in this family are typically between 325 and 463 amino acids in length. The protein is thought to be the product of a pseudogene with a role in helping assemble a gene transcription unit .. +PF15330 SHP2-interacting transmembrane adaptor protein, SIT
SIT, or SHP2-interacting transmembrane adaptor protein, is a disulfide-linked dimer that regulates human T Cell activation.. +PF15331 Cellular tumour antigen p53-inducible 5
TP53IP5 suppresses cell growth, and its intracellular location and expression change in a cell-cycle-dependent manner.. +PF15332 Lck-interacting transmembrane adapter 1
LIME1 is a family of eukaryotic transmembrane adaptors. It plays an important role in linking BCR stimulation to B-cell activation and is expressed in primary B cells. LIME localises to lipid rafts in T cells in response to TCR stimulation , and is phosphorylated by Lck and recruits signalling molecules such as Lck, PI3K, Grb2, Gads, and SHP-2 . LIME acts as the transmembrane adaptor linking BCR-induced membrane-proximal signalling to B-cell activation .. +PF15333 TATA box-binding protein-associated factor 1D
TAF1D is a family of eukaryotic proteins that are members of the SL1 complex The SL1 complex includes TBP and TAF1A, TAF1B and TAF1C, and plays a role in RNA polymerase I transcription [1,2]. Alternatives names have included 'JOSD3, Josephin domain containing 3'.. +PF15334 Aurora kinase A and ninein interacting protein
AIB is a family of eukaryotic proteins necessary for the adequate functioning of Aurora-A, a protein involved in chromosome alignment, centrosome maturation, mitotic spindle assembly and aspects of tumourigenesis. AIB is likely to act as a regulator of Aurora-A activity.. +PF15335 Caspase activity and apoptosis inhibitor 1
CAAP1, or caspase activity and apoptosis inhibitor 1, is a family of eukaryotic proteins involved in the regulation of apoptosis. It modulates a caspase-10 dependent mitochondrial caspase-3/9 feedback amplification loop.. +PF15336 Autism susceptibility gene 2 protein
Auts2, or FBRSL2, Fibrosin-1-like protein 2, is a family of eukaryotic proteins associated both with a susceptibility to autism and with influencing the number of corpora lutea produced by breeding sows .. +PF15337 Vascular protein family Vasculin-like 1
Eberhardt RY, Coggill P, Hetherington K. GC-rich promoter-binding protein 1-like 1 or Vasculin-like protein family 1, is likely to be a transcription factor. The domain family is found in eukaryotes, and is approximately 90 amino acids in length.. +PF15338 p53-regulated apoptosis-inducing protein 1
TPIP1 is a family of eukaryotic proteins whose expression is induced by wild-type p53. Ectopically expressed TPIP1, which is localised within mitochondria, leads to apoptotic cell death through dissipation of mitochondrial A(psi)m. Phosphorylation of p53 Ser-46 regulates the transcriptional activation of TPIP1, thereby mediating p53-dependent apoptosis.. +PF15339 Acrosome formation-associated factor
Afaf is a family of single pass type I membrane proteins. Afaf is a vesicle factor derived from the early endosome trafficking pathway that is involved in the biogenesis of the acrosome on the maturing spermatozoon head.. +PF15340 Cooperator of PRMT5 family
COPR5 is a family of histone H4-binding proteins expressed in the nucleus. It interacts with the N-terminus of histone H4 thereby mediating the association between histone H4 and PRMT5, PRMT5, the Janus kinase-binding protein 1 that catalyses the formation of symmetric dimethyl-arginine residues in proteins. COPR5 is specifically required for histone H4 'Arg-3' methylation mediated by PRMT5, but not histone H3 'Arg-8' methylation, suggesting that it modulates the substrate specificity of PRMT5. This family of proteins is found in eukaryotes.. +PF15341 Ribosome biogenesis protein SLX9
Eberhardt RY, Coggill P, Hetherington K. SLX9 is present in pre-ribosomes from an early stage and is implicated in the processing events that remove the ITS1 spacer sequences. In eukaryotes, biogenesis of ribosomes starts in the nucleolus with transcription by RNA polymerase I of a large precursor RNA molecule, called 35S pre-rRNA in yeast, in which the 18S, 5.8S, and 25S mature rRNAs reside, while RNA polymerase III transcribes a 3'-extended pre-5S rRNA. The 35S precursor also contains external transcribed spacer elements (5' and 3'-ETS) at either end as well as internal transcribed spacers (ITS1 and ITS2) that separate the mature sequences .. +PF15342 FAM212 family
Eberhardt RY, Coggill P, Hetherington K. This domain family is found in eukaryotes, and is approximately 60 amino acids in length.. +PF15343 Decidual protein induced by progesterone family
DEPP is a family of proteins expressed in various tissues, including pancreas, placenta, ovary, testis and kidney. High levels are found during the first trimester. Its expression is induced by progesterone, testosterone and, to a much lower extent, oestrogen. The family is alternatively known as fasting-induced gene protein, FIG.. +PF15344 FAM217 family
This family of proteins is found in eukaryotes. Proteins in this family are typically between 329 and 507 amino acids in length. There is a conserved YPDFLP sequence motif.. +PF15345 Transmembrane protein 51
This family of proteins is found in eukaryotes. Proteins in this family are typically between 233 and 253 amino acids in length.. +PF15346 Arginine and glutamate-rich 1
ARGLU, arginine and glutamate-rich 1 protein family, is required for the oestrogen-dependent expression of ESR1 target genes. It functions in cooperation with MED1. The family of proteins is found in eukaryotes.. +PF15347 Phosphoprotein associated with glycosphingolipid-enriched
PAG, or Cbp/PAG (Csk binding protein/phospho-protein associated with glycosphingolipid-enriched microdomains) is a transmembrane family that has a negative regulatory role in T-cell activation through being an adapter for C-terminal Src kinase, Csk. This family of proteins is found in eukaryotes.. +PF15348 Gemini of Cajal bodies-associated protein 8
Eberhardt RY, Coggill P, Hetherington K. GEMIN8 proteins are found in the nuclear bodies called gems (Gemini of Cajal bodies) that are often in proximity to Cajal (coiled) bodies themselves. They are also found in the cytoplasm . The family is part of the SMN (survival motor neurone) complex that plays an essential role in spliceosomal snRNP assembly in the cytoplasm and is required for pre-mRNA splicing in the nucleus. GEMIN8 binds directly to SMN1 and mediates the interaction of the GEMIN6-GEMIN7 heterodimer .. +PF15349 DDB1- and CUL4-associated factor 16
DCA16 is a family of eukaryotic proteins that interacts with DDB1 and CUL4A. The family may function as a substrate receptor for the CUL4-DDB1 E3 ubiquitin-protein ligase complex .. +PF15350 Ewing's tumour-associated antigen 1 homologue
This family of proteins is found in eukaryotes, where members are expressed at high levels in the brain, liver kidney and Ewing tumour cell lines. Proteins in this family are typically between 648 and 898 amino acids in length.. +PF15351 Junctional protein associated with coronary artery disease
JCAD is a component of VE-cadherin-based cell-cell junctions in endothelial cells. The cell-cell or adherens junction is an adhesion complex that plays a crucial role in the organisation and function of epithelial and endothelial cellular sheets. These junctions join the actin cytoskeleton to the plasma membrane to form adhesive contacts between cells or between cells and extracellular matrix. The junctions also mediate both cell adhesion and cell-signalling. JCAD localises close to the apical membrane in epithelial cells. This family is found in eukaryotes.. +PF15352 Susceptibility to monomelic amyotrophy
This family of proteins is associated with a susceptibility to monomelic amyotrophy.. +PF15353 Headcase protein family homologue
Eberhardt RY, Coggill P, Hetherington K. HECA was characterised first in Drosophila where it regulates the proliferation and differentiation of cells during adult morphogenesis. In humans, HECA affects cell cycle progression and proliferation in head and neck cancer cells. It by slows down cell division of oral squamous cell carcinoma cells and may thereby act as a tumour-suppressor in head and neck cancers. . +PF15354 Kidney-associated antigen 1
KAAG1, kidney-associated antigen 1, or RU2AS (RU2 antisense gene protein) has been found in mammals. It is expressed in testis and kidney, and, at lower levels, in urinary bladder and liver. It is expressed by a high proportion of tumours of various histologic origin, including melanomas, sarcomas and colorectal carcinomas.. +PF15355 Stretch-responsive small skeletal muscle X protein, Chisel
The murine X-linked gene Chisel (Csl/Smpx) is selectively expressed in cardiac and skeletal muscle cells. It localises to the costameric cytoskeleton of muscle cells through its association with focal adhesion proteins, where it may participate in regulating the dynamics of actin through the Rac1/p38 kinase pathway. Thus it is implicated in the maintenance of muscle integrity and in responses to biomechanical stress.. +PF15356 Psoriasis susceptibility locus 2
SPR1 is psoriasis susceptibility locus 2 protein family.. +PF15357 Psoriasis susceptibility 1 candidate 1
This family is considered a candidate for susceptibility to psoriasis.. +PF15358 Testis-specific serine kinase substrate
TSKS, testis-specific serine kinase substrate, is expressed in the testis and is downregulated in cancerous testicular tissue, in comparison with adjacent normal tissue. TSKS expression is very low to undetectable in seminoma, teratocarcinoma, embryonal, and Leydig cell tumours, while high in testicular tissue adjacent to tumours which contain pre-malignant carcinoma in situ . Recently it has been shown in human testis to be localised to the equatorial segment of ejaculated human sperm. The finding of a TSKS family member in mature sperm suggests that this family of kinases might play a role in sperm function . TSKS is localised during spermiogenesis to the centrioles of post-meiotic spermatids, where it reaches its greatest concentration during the period of flagellogenesis .. +PF15359 Carnitine deficiency-associated protein 3
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 128 and 251 amino acids in length. CDV3 is also known as TPP36 - tyrosine-phosphorylated protein 36. The function is not known.. +PF15360 APJ endogenous ligand
Apelin is among the most potent stimulators of cardiac contractility known. The apelin-APJ signaling pathway is an important novel mediator of cardiovascular control . Apelin is an adipokine secreted by adipocytes where it is co-expressed with apelin receptor (APJ) in adipocytes. It suppresses adipogenesis through MAPK kinase/ERK dependent pathways and prevents lipid droplet fragmentation, thereby inhibiting basal lipolysis through AMP kinase dependent enhancement of perilipin expression. It also inhibits hormone-stimulated acute lipolysis through decreasing perilipin phosphorylation. Apelin induces a decrease of free fatty acid release via its dual inhibition on adipogenesis and lipolysis . As a vaso-active and vascular cell growth-regulating peptide Apelin is a target of the BMP pathway, the TGF-beta/bone morphogenic protein (BMP) system - a major pathway for angiogenesis .. +PF15361 Resistance to inhibitors of cholinesterase homologue 3
Eberhardt RY, Coggill P, Hetherington K. RIC3 is a protein associated with nicotinic acetylcholine receptors (nAChRs), neurotransmitter-gated ion channels expressed at the neuromuscular junction and within the central and peripheral nervous systems. It can enhance functional expression of multiple nAChR subtypes. RIC3 promotes functional expression of homomeric alpha-7 and alpha-8 nicotinic acetylcholine receptors at the cell surface.. +PF15362 Enamelin
ENAMELIN is involved in the mineralisation and structural organisation of enamel. It is necessary for the extension of enamel during the secretory stage of dental enamel formation. The proteins are expressed in teeth, particularly in odontoblasts, ameloblasts and cementoblasts.. +PF15363 Domain of unknown function (DUF4596)
Eberhardt RY, Coggill P, Hetherington K. This domain family is found in eukaryotes, and is approximately 50 amino acids in length. There is a conserved ELET sequence motif. There are two completely conserved residues (S and E) that may be functionally important.. +PF15364 PAXIP1-associated-protein-1 C term PTIP binding protein
Eberhardt RY, Coggill P, Hetherington K. This protein domain family is the C-terminal domain of PAXIP1-associated-protein-1, which also goes by the name PTIP-associated protein 1. This family of proteins is found in eukaryotes. The function of this protein is to localise at the site of DNA damage and form foci with PTIP at the DNA break point. Furthermore, studies have shown that depletion of PA1 increases cellular sensitivity to ionizing radiation. Proteins in this family are typically between 122 and 254 amino acids in length .. +PF15365 Proline-rich nuclear receptor coactivator
Eberhardt RY, Coggill P, Hetherington K. The PNRC family, proline-rich nuclear receptor coactivator, is found in eukaryotes, and is approximately 60 amino acids in length. There is a conserved YAG sequence motif.. +PF15366 Domain of unknown function (DUF4597)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 63 and 76 amino acids in length. There is a conserved TPPTPT sequence motif.. +PF15367 Calcium-binding and spermatid-specific protein 1
CABS1 is a family of proteins found in eukaryotes. It is also known as NYD-SP26. It binds calcium and is specifically expressed in the elongate spermatids and then localised into the principal piece of flagella of matured spermatozoa.. +PF15368 Spermatogenesis family BioT2
BioT2 is a family of eukaryotic proteins expressed only in the testes. BioT2 is found abundantly in five types of murine cancer cell lines, suggesting it plays a role in testes development as well as tumourigenesis [1,2,3].. +PF15369 Uncharacterised protein KIAA1328
Eberhardt RY, Coggill P, Hetherington K. This function of this protein family remains uncharacterised. This family of proteins is found in eukaryotes.. +PF15370 Domain of unknown function (DUF4598)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 159 and 251 amino acids in length.. +PF15371 Domain of unknown function (DUF4599)
The function of this family of eukaryotic proteins is not known.. +PF15372 Domain of unknown function (DUF4600)
+PF15373 Domain of unknown function (DUF4601)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function, which is found in eukaryotes. In humans, the gene encoding this protein is found in the position, chromosome 19 open reading frame 45.. +PF15374 Coiled-coil domain-containing protein 71L
Eberhardt RY, Coggill P, Hetherington K. The protein family, Coiled-coil domain-containing protein 71L, is a domain of unknown function, which is found in eukaryotes.. +PF15375 Domain of unknown function (DUF4602)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 173 and 294 amino acids in length. This family includes Human C1orf131.. +PF15376 Domain of unknown function (DUF4603)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function. In particular, this domain lies at the C-terminal end of a protein found in eukaryotes.. +PF15377 Domain of unknown function (DUF4604)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 141 and 174 amino acids in length and contain a conserved LSF sequence motif.. +PF15378 Domain of unknown function (DUF4605)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 82 and 137 amino acids in length.. +PF15379 Domain of unknown function (DUF4606)
This domain family is found in eukaryotes, and is approximately 100 amino acids in length.. +PF15380 Domain of unknown function (DUF4607)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 207 and 359 amino acids in length.. +PF15381 Domain of unknown function (DUF4608)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 85 and 157 amino acids in length.. +PF15382 Domain of unknown function (DUF4609)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 70 and 139 amino acids in length.. +PF15383 Transmembrane protein 237
Eberhardt RY, Coggill P, Hetherington K. This protein family is found in eukaryotes. The function of this protein is to aid the production of new cilia in ciliogenesis. Mutations in the protein cause a disease, named Joubert syndrome type 14 (JBTS14) and also affect cell signalling using the Wnt pathway . Proteins in this family are typically between 203 and 512 amino acids in length. There are two completely conserved G residues that may be functionally important.. +PF15384 Domain of unknown function (DUF4610)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 164 and 206 amino acids in length. There is a conserved NPG sequence motif.. +PF15385 Specifically androgen-regulated gene protein
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes, the function of this protein is still unknown but it is thought to be an androgen receptor. Protein expression is up-regulated in the presence of androgens, but not in the presence of glucocorticoids. SARG tends to be highly expressed in prostate tissue . Proteins in this family are typically between 340 and 587 amino acids in length. There is a conserved EETI sequence motif.. +PF15386 PRR14;
Drosophila Tantalus-like. Iyer LM, Aravind L, Eberhardt RY, Coggill P, Hetherington K. An alpha+beta fold domain found in metazoan proteins such as Drosophila Tantalus . Drosophila Tantalus binds the chromatin protein Additional sex combs (Asx) and also binds DNA in vitro .. +PF15387 Domain of unknown function (DUF4611)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 71 and 100 amino acids in length. There is a conserved AKR sequence motif.. +PF15388 Protein Family FAM117
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function found in eukaryotes. Proteins in this family are typically between 269 and 453 amino acids in length. There are two conserved sequence motifs: RRT and TQT.. +PF15389 Domain of unknown function (DUF4612)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 109 and 323 amino acids in length.. +PF15390 Domain of unknown function (DUF4613)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 625 and 725 amino acids in length.. +PF15391 Domain of unknown function (DUF4614)
This domain family is found in eukaryotes, and is approximately 180 amino acids in length. There is a conserved EALT sequence motif.. +PF15392 Joubert syndrome-associated
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is domain of unknown function, which is found in eukaryotes. However, mutations in the gene lead to Joubert's Syndrome, indicating that the protein that the gene encodes for is vital for correct ciliogenesis .. +PF15393 Domain of unknown function (DUF4615)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 161 and 229 amino acids in length. There is a single completely conserved residue F that may be functionally important.. +PF15394 Domain of unknown function (DUF4616)
Eberhardt RY, Coggill P, Hetherington K. This protein family is a domain of unknown function found at the C-terminal domain of the proteins. This protein family is found in eukaryotes. Proteins in this family are typically between 166 and 538 amino acids in length.. +PF15395 Domain of unknown function (DUF4617)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 702 and 1745 amino acids in length.. +PF15396 Protein Family FAM60A
Eberhardt RY, Coggill P, Hetherington K. This protein family, FAM60A is a family of proteins is found in eukaryotes. It is known to be a cell cycle protein that binds to the promoter of a gene transcription repressor complex, named SIN4-HDAC complex. This means that FAM60A has an important role to play in 'switching on' gene expression . Proteins in this family are typically between 179 and 324 amino acids in length.. +PF15397 Domain of unknown function (DUF4618)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 238 and 363 amino acids in length. There are two conserved sequence motifs: EYP and KCTPD.. +PF15398 Domain of unknown function (DUF4619)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 128 and 299 amino acids in length.. +PF15399 Domain of unknown function (DUF4620)
+PF15400 Testis-expressed sequence 33 protein family
This family of proteins is found in eukaryotes. Proteins in this family are typically between 147 and 280 amino acids in length. There are two conserved sequence motifs: NIRH and SYT. The function is not known.. +PF15401 Tryptophan-ring motif of head of Trimeric autotransporter adhesin
TAA-head_Trp-ring is the tryptophan-ring motif of some Gram-negative Enterobacteriaceae. The Trp-ring folds into a beta-meander type on the top of the head domain of its trimeric autotransporter adhesin proteins. In conjunction with the GIN domain it is thought to be the region of the head that adheres to fibronectin.. +PF15402 N-terminus of kinetochore NMS complex subunit Spc7
+PF15403 HiaBD2_N domain of Trimeric autotransporter adhesin (GIN)
HiaBD2_N may represent the GIN domain of the Head region of TAAs - trimeric autotransporter adhesins. Not all TAAs carry this domain; however, in those that do, the GIN in combination with the Trp-ring domain is necessary for adhesion to fibronectin in the host cell.. +PF15404 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15405 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15406 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15407 Sporulation protein family 7
Pfam-B_51974 (release 26.0). Spo7_2 constitutes a different set of fungal and related species from those found in Spo7. This domain is found in general at the N-terminus. In many members the domain is associated with a Pleckstrin-homology - PH - domain.. +PF09061 Stirrup
Bio::Pfam::PfamLiveDBManager=HASH(0x4ef6130). The Stirrup domain, found in the prokaryotic protein ribonucleotide reductase, has a molecular mass of 9 kDa and is folded into an alpha/beta structure. It allows for binding of the reductase to DNA via electrostatic interactions, since it has a predominance of positive charges distributed on its surface .. +PF15408 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15409 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15410 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15411 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15412 Binding domain of Nse4/EID3 to Nse3-MAGE
This family includes Nse4 and EID3 members , that bind over this region to the Nse3 pocket, in MAGE family Pfam:PF01454 .. +PF15413 Pleckstrin homology domain
This Pleckstrin homology domain is found in some fungal species.. +PF15414 Protein of unknown function (DUF4621)
JCSG:Target_394740-GS13541A. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 350 amino acids in length.. +PF15415 Protein of unknown function (DUF4622)
JCSG:Target_390149-GS13960A. This family of proteins is found in bacteria. Proteins in this family are typically between 348 and 360 amino acids in length.. +PF15416 Domain of unknown function (DUF4623)
This family of proteins is found in bacteria. Proteins in this family are approximately 470 amino acids in length. There are two conserved sequence motifs: HLL and RYL.. +PF15417 Domain of unknown function (DUF4624)
JCSG:Target_390388-GS13780A. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length.. +PF15418 Domain of unknown function (DUF4625)
JCSG:Target_390125-GS13882B. This family contains a likely bacterial Ig-like fold, suggesting it may be a family of lipoproteins.. +PF15419 Leukemia NUP98 fusion partner 1
This family of proteins includes leukemia NUP98 fusion partner 1, the gene encoding this protein is involved in a chromosomal translocation with the NUP98 locus in a form of T-cell acute lymphoblastic leukemia .. +PF15420 Alpha/beta-hydrolase family N-terminus
This is the N-terminal transmembrane domain of a family of alpha/beta hydrolases which may function as lipases. The C-terminal domain (Pfam:PF10081) is the catalytic domain .. +PF15421 Putative polysaccharide deacetylase
JCSG:Target_416920-SP13771A. +PF15422 Domain of unknown function (DUF4626)
+PF15423 FLYWCH-type zinc finger-containing protein
This family is the N-terminus of some FLYWCH-zinc-finger proteins, found in eukaryotes. The family is found in association with Pfam:PF04500. There are two conserved sequence motifs: EQE and QEPS.. +PF15424 Odontogenic ameloblast-associated family
+PF15425 Domain of unknown function (DUF4627)
This family of proteins is found in bacteria. Proteins in this family are approximately 230 amino acids in length. There is a conserved WYK sequence motif.. +PF15427 S100P-binding protein
S100PBPR is a family of proteins found in eukaryotes, and localised to cell nuclei where S100P is also present, and the two proteins co-immunoprecipitate. S100P is a member of the S100 family of calcium-binding proteins and there have been several recent reports of its over-expression in pancreatic ductal adenocarcinoma. In situ hybridisation shows S100PBPR transcripts to be found in islet cells but not duct cells of the healthy pancreas. An interaction between S100P and S100PBPR may be involved in early pancreatic cancer.. +PF15428 Immunity protein 14
A predicted immunity protein with mostly all-beta fold and several conserved hydrophobic residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI1 or Tox-HNH family . The protein is also found heterogeneous polyimmunity loci.. +PF15429 Domain of unknown function (DUF4628)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 152 and 673 amino acids in length.. +PF15430 Single domain von Willebrand factor type C
+PF15431 Transmembrane protein 190
+PF15432 Accessory Sec secretory system ASP3
Sec-ASP3 is family of bacterial proteins involved in the Sec secretory system. The family forms part of the accessory SecA2/SecY2 system specifically required to export GspB, a serine-rich repeat cell-wall glycoprotein adhesin encoded upstream in the same operon.. +PF15433 Mitochondrial 28S ribosomal protein S31
Eberhardt RY, Coggill P, Hetherington K. MRP-S31 is the mitochondrial 28S ribosomal subunit S31. This family of proteins is found in eukaryotes. Proteins in this family are typically between 246 and 395 amino acids in length. There are two conserved sequence motifs: RHFMELV and GLSKN.. +PF15434 Family 104
This family of proteins is found in eukaryotes. Proteins in this family are typically between 113 and 185 amino acids in length. There is a conserved SLQ sequence motif.. +PF15435 UNC119-binding protein C5orf30 homologue
UNC119_bdg is a family of eukaryotic proteins that probably plays a role in trafficking of proteins, via interaction with unc119 family cargo adapters. The family may play a role in ciliary membrane localisation.. +PF15436 Plasminogen-binding protein pgbA N-terminal
PGBA_N is an N-terminal family of bacterial proteins that bind plasminogen. This activity was identified in In Helicobacter pylori where it is thought to contribute to the virulence of this bacterium. Both PgbA and PgbB are surface-exposed proteins that mediate binding to plasminogen such that it can be converted into plasmin in the presence of a Pg activator.. +PF15437 Plasminogen-binding protein pgbA C-terminal
PGBA_C is an C-terminal family of bacterial proteins that bind plasminogen. This activity was identified in Helicobacter pylori where it is thought to contribute to the virulence of this bacterium. Both PgbA and PgbB are surface-exposed proteins that mediate binding to plasminogen such that it can be converted into plasmin in the presence of a plasminogen activator.. +PF15438 Antigenic membrane protein of phytoplasma
Phyto-Amp is a family of phytopathogenic wall-less bacterial antigenic membrane proteins . The bacteria are limited to the phloem and pose a major threat to agriculture worldwide. They are transmitted in a persistent, propagative manner by phloem-sucking Hemipteran insects. Phytoplasma membrane proteins are in direct contact with hosts and are assumed to be involved in determining vector specificity. Phyto-Amp is thought to be one family of proteins that mediates such specificity. The proteins appear to be encoded by circular extrachromosomal elements, at least one of which is a plasmid .. +PF15439 Neuronal tyrosine-phosphorylated phosphoinositide-3-kinase adapter
NYAP_N is an N-terminal family of eukaryotic proteins that are substrates of tyrosine kinase in the brain. When first identified, the family members were referred to as unconventional myosin XVI, or Myr 8 . However, proteins have now been identified as being integrally involved in neuronal function and morphogenesis. The family is involved in both the activation of phosphoinositide 3-kinase (PI3K) and the recruitment of the downstream effector WAVE complex to the close vicinity of PI3K; it also appears to regulate the brain size and neurite outgrowth in mice .. +PF15440 THRAP3/BCLAF1 family
This family includes thyroid hormone receptor-associated protein 3 (THRAP3), which is a spliceosome component and a subunit of the TRAP complex which plays a role in pre-mRNA splicing and in mRNA decay . It also includes the transcriptional repressor Bcl-2-associated transcription factor 1 (BCLAF1) .. +PF15441 Rho guanine nucleotide exchange factor 5/35
This family includes Rho guanine nucleotide exchange factor 5 and Rho guanine nucleotide exchange factor 35.. +PF15442 Domain of unknown function (DUF4629)
This domain family is found in eukaryotes, and is approximately 150 amino acids in length. There are two conserved sequence motifs: MHML and LGKK.. +PF15443 Domain of unknown function (DUF4630)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 124 and 286 amino acids in length.. +PF15444 Transmembrane protein 247
This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 197 and 222 amino acids in length. The function of this family is unknown.. +PF15445 acidic terminal segments, variant surface antigen of PfEMP1
ATS is the intracellular and relatively conserved acidic terminal segment of the Plasmodium falciparum erythrocyte membrane protein-1 (PfEMP1) . this domain appears to be present in all variants of the highly polymorphic PfEMP1 proteins.. +PF15446 PHD/FYVE-zinc-finger like domain
Pfam-B_5236 (release 26.0). This family appears to be a combination domain of several consecutive zinc-binding regions.. +PF15447 N-terminal segments of PfEMP1
NTS, the N-terminal segment, is the most variable part of the variant surface antigen family of Plasmodium falciparum, the erythrocyte membrane protein-1 (PfEMP1) proteins. PfEMP1 is an important target for protective immunity and is implicated in the pathology of malaria through its ability to adhere to host endothelial receptors . A structural and functional study of the N-terminal domain of PfEMP1 from the VarO variant comprising the N-terminal segment (NTS) and the first DBL domain (DBL1α1), shows this region is directly implicated in rosetting. NTS, previously thought to be a structurally independent component of PfEMP1, forms an integral part of the DBL1α domain that is found to be the important heparin-binding site . This family is closely associated with PFEMP, Pfam:PF03011, and Duffy_binding, Pfam:PF05424.. +PF15448 N-terminal segments of P. falciparum erythrocyte membrane protein
NTS_2 is a family of the most variable part of the variant surface antigen family of Plasmodium falciparum, the erythrocyte membrane protein-1 (PfEMP1) . However, in this group of proteins conservation is high. PfEMP1 is an important target for protective immunity and is implicated in the pathology of malaria through its ability to adhere to host endothelial receptors.. +PF15449 Retinal protein
This family of proteins is found in the photoreceptor cells of the retina [1,2]. Mutations of the gene encoding this protein have been associated with retinal disorders such as retinitis pigmentosa and late-onset progressive retinal atrophy [1-4]. The function of this family of proteins is unknown, but it is likely to be important in the development and function of the retina [2-3].. +PF15450 Domain of unknown function (DUF4631)
This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 394 and 668 amino acids in length.. +PF15451 Domain of unknown function (DUF4632)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 59 and 190 amino acids in length.. +PF15452 Neuronal tyrosine-phosphorylated phosphoinositide-3-kinase adapter
NYAP_C is a C-terminal family of eukaryotic proteins that are substrates of tyrosine kinase in the brain. When first identified, the family members were referred to as unconventional myosin XVI, or Myr 8 . However, proteins have now been identified as being integrally involved in neuronal function and morphogenesis. The family is involved in both the activation of phosphoinositide 3-kinase (PI3K) and the recruitment of the downstream effector WAVE complex to the close vicinity of PI3K; it also appears to regulate the brain size and neurite outgrowth in mice .. +PF15453 Protein incorporated later into Tight Junctions
Pilt is a family of eukaryotic tight junction-proteins that binds to guanylate-kinase. Pilt is a component of TJs (Tight junctions) rather than AJs (Adhesin junctions). The protein is incorporated into TJs after TJ strands are formed, thereby suggesting the name Pilt for 'protein incorporated later into TJs'. Pilt binds to the guanylate-kinase region of hDlg otherwise known as Disk large homologue .. +PF15454 Late endosomal/lysosomal adaptor and MAPK and MTOR activator
LAMTOR is a family of eukaryotic proteins that have otherwise been referred to as Lipid raft adaptor protein p18, Late endosomal/lysosomal adaptor and MAPK and MTOR activator 1, and Protein associated with DRMs and endosomes. It is found to be one of three small proteins constituting the Rag complex or Ragulator that interact with each other, localise to endosomes and lysosomes, and play positive roles in the MAPK pathway. The complex does this by interacting with the Rag GTPases, recruiting them to lysosomes, and bringing about mTORC1 activation.. +PF15455 Proline-rich 19
This family includes proline-rich protein 19.. +PF15456 Up-regulated During Septation
Uds1 is a domain family is found mostly in fungi, and is typically between 120 and 138 amino acids in length. The GO annotation for the S.pombe protein describes the protein as barrier septum assembly involved in cell cycle cytokinesis, GO:0071937. Many of the uncharacterised members are listed as being involucrin repeat proteins, but this can not be substantiated.. +PF15457 Type III T3SS secreted effector HopW1-1/HopPmaA
HopW1-1 is a family of bacterial modular P. syringae Avr effectors that induce accumulation of the signal molecule salicylic acid (SA) and the transcripts of HWI1 (HOPW1-1-INDUCED GENE1) in Arabidopsis. Thus HopW1-1 elicits a resistance response in Arabidopsis .. +PF15458 Nineteen complex-related protein 2
NTR2 or Nineteen complex-related protein 2 is a family of largely fungal and plant proteins that form a complex with the DExD/H-box RNA helicase Prp43. Along with NTR1 it is an accessory factor of Prp43 in catalysing spliceosome disassembly. Disassembly of the spliceosome after completion of the splicing reaction is necessary for recycling of splicing factors to promote efficient splicing . NTR2 and NTR1 associate with a post-splicing complex containing the excised intron and the spliceosomal U2, U5, and U6 snRNAs, that supports a link with a late stage in the pre-mRNA splicing process .. +PF15459 60S ribosome biogenesis protein Rrp14
Pfam-B_10508 (release 26.0). RRP14 is a family of nucleolar 60S ribosomal biogenesis proteins from eukaryotes. RRP14 functions in ribosome synthesis as it is required for the maturation of both small and large subunit rRNAs and it helps to prevent premature cleavage of the pre-rRNA at site C2 . It also plays a role in cell polarity and/or spindle positioning 2],. +PF15460 Something about silencing, SAS, complex subunit 4
SAS4 is a family of largely fungal silencing regulators. This silencing is mediated by chromatin. SAS4 specifically silences the yeast mating-type genes HML and HMR . SAS4 is found to be one subunit of a complex, the SAS complex, that interacts with chromatin assembly factor Asf1p, and asf1 mutants show silencing defects similar to mutants in the SAS complex. Thus, ASF1-dependent chromatin-assembly may mediate the role of the SAS complex in silencing . Co-expression of Sas2, SAS4, and Sas5 in Escherichia coli leads to formation of a stable SAS complex that acetylates histones. SAS4 is essential for the acetyltransferase activity of Sas2, and Sas5 is also important .. +PF15461 Beta-carotene 15,15'-dioxygenase
BCD is a family of bacterial and archaeal proteins is found in bacteria and archaea that catalyse or regulate the conversion of beta-carotene to retinal . Characterisation of BCD proteins shows them to cleave beta-carotene at its central double bond (15,15′) to yield two molecules of all-trans-retinal. However, the oxygen atom of retinal originated not from water but from molecular oxygen, suggesting that the enzyme was a beta-carotene 15,15′-dioxygenase, rather than a mono-oxygenase that catalyzes the same biochemical reaction [2,3].. +PF15462 Bartter syndrome, infantile, with sensorineural deafness (Barttin)
Barttin is a family of mammalian proteins that are chloride ion channel beta-subunits crucial for renal Cl-re-absorption and inner ear K+ secretion. Bartter syndrome is a term covering a heterogeneous group of autosomal recessive salt-losing nephropathies that are caused by disturbed transepithelial sodium chloride re-absorption in the distal nephron. Mutations in the BCD proteins lead to sensorial deafness.. +PF15463 Extracellular mutant protein 11
ECM11 is a family of largely fungal proteins. ECM11 interacts with Cdc6, an essential protein involved in the initiation of DNA replication, and is a nuclear protein involved in maintaining chromatin structure . It was previously identified as a protein involved in yeast cell wall biogenesis and organisation, but is also found to be required in meiosis where its function is related to DNA replication and crossing-over .. +PF15464 Domain of unknown function (DUF4633)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 94 and 123 amino acids in length.. +PF15465 Domain of unknown function (DUF4634)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 98 and 133 amino acids in length.. +PF15466 Domain of unknown function (DUF4635)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 120 and 154 amino acids in length. There are two conserved sequence motifs: LEQ and DLE.. +PF15467 Secretogranin-3
Secretogranin_3 is a family of vertebrate proteins that is one of the granin family. Granins are rich in acidic amino acids, exhibit aggregation at low pH, and possess a high capacity for calcium binding. Because granins are restricted in their localisation to secretory granules of neuroendocrine cells, two interesting characteristics of their sorting mechanisms have been observed. These are, first, that they aggregate on low pH/high calcium concentrations and second that two of them carry an N-terminal disulfide loop, mutations in which lead to mis-sorting. Thus, granins are thought to be essential for the sorting of secretory proteins at the trans-Golgi network. Chromogranin A (CgA) binds to SGIII in secretory granules of endocrine cells . SGIII directly binds to cholesterol components of the secretory granule membrane and targets CgA to secretory granules in pituitary and pancreatic endocrine cells . Mutations in the SGIII gene may influence the risk of obesity through possible regulation of hypothalamic neuropeptide secretion .. +PF15468 Domain of unknown function (DUF4636)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 196 and 244 amino acids in length.. +PF15469 Exocyst complex component Sec5
Pfam-B_353125 (release 26.0). This Sec5 family of eukaryotic proteins conserved is not representing the Sec5-Ral binding site.. +PF15470 Domain of unknown function (DUF4637)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 142 and 178 amino acids in length.. +PF15471 Transmembrane protein family 171
This family of proteins is found in eukaryotes. TMEM171 is also known as parturition-related protein 2. Proteins in this family are typically between 242 and 326 amino acids in length.. +PF15472 Domain of unknown function (DUF4638)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 240 and 272 amino acids in length.. +PF15473 PEST, proteolytic signal-containing nuclear protein family
Eberhardt RY, Coggill P, Hetherington K. PCNP is a PEST-containing nuclear protein that is ubiquitinated by NIRF, a Np95/ICBP90-like RING finger protein. PEST sequences, which are rich in proline (P), glutamic acid (E), serine (S) and threonine (T), are found in a number of short-lived proteins, such as transcription factors and cell cycle-associated proteins. Their function is generally controlled by proteolysis, mostly via ubiquitin-mediated degradation. Thus, NIRF and PCNP are a ubiquitin ligase and its substrate, respectively, that may constitute a novel signalling pathway with some relation to cell proliferation .. +PF15474 Meiotically up-regulated gene family
This protein was identified as being up-regulated during meiosis in S.pombe. This family of proteins is found in largely in plants and fungi. Proteins in this family are typically between 128 and 920 amino acids in length.. +PF15475 Transmembrane protein C12orf23, UPF0444
This family of proteins is found in eukaryotes. Proteins in this family are typically between 94 and 119 amino acids in length.. +PF15476 Histone deacetylase complex subunit SAP25
SAP25 is a family of proteins found in eukaryotes. SAP25 is a core component of the mSin3 co-repressor complex whose subcellular location is regulated by PML. mSin3, the transcriptional co-repressor, is associated with histone deacetylases (HDACs) and is utilised by many DNA-binding transcriptional repressors. SAP25 is a nucleo-cytoplasmic shuttling protein that is actively exported from the nucleus by a CRM1-dependent mechanism. It binds to the PAH1 domain of mSin3A, associates with the mSin3A-HDAC complex in vivo, and represses transcription when tethered to DNA [1,2].. +PF15477 Small acidic protein family
This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a single completely conserved residue G that may be functionally important.. +PF15478 Family of unknown function with LKAAEAR motif
This family of proteins is found in eukaryotes. Proteins in this family are typically between 119 and 235 amino acids in length. There is a conserved LKAAEAR sequence motif.. +PF15479 Domain of unknown function (DUF4639)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 161 and 601 amino acids in length.. +PF15480 Domain of unknown function (DUF4640)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 99 and 306 amino acids in length.. +PF15481 Chondroitin proteoglycan 4
CPG4 is a domain family found in nematodes of one of nine core chondroitin proteoglycans. Vertebrates produce multiple chondroitin sulfate proteoglycans that play important roles in development and tissue mechanics. In the nematode Caenorhabditis elegans, the chondroitin chains lack sulfate but nevertheless play essential roles in embryonic development and vulval morphogenesis. CPG4 has the largest predicted mass of the C. elegans CPGs at 84 kDa. The majority of its 35 predicted glycosaminoglycan attachment sites reside in the COOH-terminal half of the protein, of which four sites were confirmed by DTT modification . The family is rich in conserved cysteines.. +PF15482 Coiled-coil domain-containing glutamate-rich protein family 1
This is a family of coiled-coil family proteins found in eukaryotes. Proteins in this family are typically between 160 and 397 amino acids in length.. +PF15483 Domain of unknown function (DUF4641)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 201 and 519 amino acids in length.. +PF15484 Domain of unknown function (DUF4642)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 115 and 196 amino acids in length.. +PF15485 Domain of unknown function (DUF4643)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 254 and 462 amino acids in length.. +PF15486 Domain of unknown function (DUF4644)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 143 and 191 amino acids in length.. +PF15487 FAM220 family
This protein family is a domain of unknown function which is found in eukaryotes. Proteins in this family are typically between 217 and 277 amino acids in length. There are two completely conserved residues (S and L) that may be functionally important.. +PF15488 Domain of unknown function (DUF4645)
This family of proteins is found in eukaryotes. Proteins in this family are typically between 200 and 298 amino acids in length.. +PF15489 CST, telomere maintenance, complex subunit CTC1
Pfam-B_19246 (release 26.0). CTC1 is one of the three components of the CST complex that assists Shelterin to protect the ends of telomeres from attack by DNA-repair mechanisms. Mutations in human CTC1 have been recognised as contributing to cerebroretinal microangiopathy.. +PF15490 Telomere-capping, CST complex subunit
Ten1_2 is a family of primarily plant and vertebrate telomere-capping proteins that is evolutionarily related to the mostly fungal family of Ten1, Pfam:PF12658.. +PF15491 CST, telomere maintenance, complex subunit CTC1
CTC1 is one of the three components of the CST complex that assists Shelterin to protect the ends of telomeres from attack by DNA-repair mechanisms. This family largely represents sequences from plants species.. +PF15492 Neuroblastoma-amplified sequence, N terminal
Nbas_N is an N-terminal family of metazoan sequences. This domain lies at the N-terminal of several WD40-containing proteins. The human protein is over-expressed in neuroblastoma cells .. +PF15493 Domain of unknown function, YrpD
JCSG:Target-418961/SP17457A. This family of proteins is found in bacteria. Proteins in this family are typically between 236 and 351 amino acids in length. The member from Bacillus subtilis, UniProtKB:O05411, is named YrpD.. +PF15494 Scavenger receptor cysteine-rich domain
SRCR_2 is a scavenger receptor cysteine-rich domain family found largely on vertebrate sequences up-stream of the trypsin-like transmembrane serine protease, Spinesin.. +PF15495 Major fimbrial subunit protein type IV, Fimbrillin, C-terminal
JCSG:Target-417041/SP13489F. Fimbrillin_C is a C-terminal family of major fimbrial subunit protein type IV proteins largely from Bacillus species. The family is associated with family P_gingi_FimA, Pfam:PF06321.. +PF15496 Domain of unknown function (DUF4646)
Pfam-B_61885 (release 26.0). This is a family of proteins largely from fungi. The function is not known.. +PF15497 snRNA-activating protein complex subunit 19, SNAPc subunit 19
Eberhardt RY, Coggill P, Hetherington K. SNAPc19 is a family of proteins found in eukaryotes. It is one of the five core components of the snRNA-activating protein complex or SNAPc that helps direct the nucleation of RNA polymerases II and III. The core RNA polymerase II snRNA promoters consist of a single essential element, the proximal sequence element (PSE), whereas the core RNA polymerase III snRNA promoters consist of both a PSE and a TATA box. The SNAPc binds to the PSE of both of these. SNAPc recognises the PSE sequence common to all human snRNA genes, irrespective of polymerase specificity. SNAPc is also known as the PSE transcription factor (PTF) or PSE-binding protein (PBP). The human SNAP19 and SNAP45 subunits are dispensable for transcription in vitro and are not as widely conserved as the other three, SNAP190, SNAP43 and SNAP50, suggesting that these vertebrate-specific SNAPc subunits may have adapted specialised regulatory roles for snRNA gene transcription .. +PF15498 Nephrin and CD2AP-binding protein, Dendrin
Eberhardt RY, Coggill P, Hetherington K. Dendrin is a family of eukaryotic proteins found in the podocytes of the kidneys. Dendrin, originally identified in telencephalic dendrites, is a constituent of the slit diaphragm, SD, complex of podocytes, where it directly binds to nephrin and CD2AP. Kidney podocytes and their slit diaphragms (SDs) form the final barrier to urinary protein loss. SD proteins also participate in intracellular signalling pathways. Dendrin appears to prevent programmed cell death (apoptosis) through its binding to nephrin. The SD protein nephrin serves as a component of a signalling complex that directly links podocyte junctional integrity to actin cytoskeletal dynamics. Thus, dendrin is identified as an SD family with proapoptotic signalling properties that accumulates in the podocyte nucleus in response to glomerular injury.. +PF15499 Ubiquitin-specific peptidase-like, SUMO isopeptidase
Rawlings N, Coggill P. Peptidase_C98 is a small family of SUMO - small ubiquitin-related modifier - isopeptidases found in eukaryotes. Reversible attachment of SUMO is an essential protein modification in all eukaryotic cells, The family neither binds nor cleaves ubiquitin, but is a potent SUMO isopeptidase, and the invariant residues required for SUMO binding and cleavage, in UniProtKB:Q5W0Q7, are Cys-236, His-456 and Asp-472, all of which are fully conserved in the family. Member proteins are low-abundance proteins that colocalise with coilin in Cajal bodies. Peptidase_C98 depletion does not affect global sumoylation, but causes striking coilin mis-localisation and impairs cell proliferation, functions that are not dependent on the catalytic activity. Thus, Peptidase_C98 represents a third type of SUMO protease, with essential functions in Cajal body biology.. +PF15500 Putative RNase-like toxin
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and conserved cysteine, . +PF15501 Nuclear protein MDM1
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is present in the nucleus . The function of MDM1 is not known.. +PF15502 M-phase-specific PLK1-interacting protein
Eberhardt RY, Coggill P,. +PF15503 PPP1R35;
Protein phosphatase 1 regulatory subunit 35 C-terminus. Eberhardt RY, Coggill P,. This is the C-terminus of protein phosphatase 1 regulatory subunit 35. This protein interacts with and inhibits the serine/threonine-protein phosphatase PPP1CA .. +PF15504 Domain of unknown function (DUF4647)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 282 and 480 amino acids in length.. +PF15505 Domain of unknown function (DUF4648)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 115 and 207 amino acids in length.. +PF15506 OCC1 family
Eberhardt RY, Coggill P. The human member of this family, overexpressed in colon carcinoma 1 protein (Swiss:Q8TAD7) has been shown to be overexpressed in several colon carcinomas .. +PF15507 Domain of unknown function (DUF4649)
Pfam-B_83 (release 26.0). This family of Firmicute sequences has members that are annotated as ribose-phosphate pyrophosphokinase; however there is no evidence for this attribution. Member proteins are all shorter than 100 residues in length.. +PF15508 beta subunit of N-acylethanolamine-hydrolyzing acid amidase
De Vivo M, Coggill P. NAAA-beta is a family of vertebral sequences that form the beta subunit of vertebral N-acylethanolamine-hydrolyzing acid amidase, a member of the choloylglycine hydrolase acid ceramidase family. The alpha subunit is represented by family CBAH, Pfam:PF02275.. +PF15509 Domain of unknown function (DUF4650)
Pfam-B_31507 (release 26.0). This family of vertebrate proteins lies to the C-terminus of Ubiquitin-specific peptidase-like protein family peptidase_C98, Pfam:PF15499. It might be acting as the exosite for the peptidase.. +PF15510 Centromere kinetochore component W
Pfam-B_49340 (release 26.0). CENP-W is a family of vertebral kinetochore proteins that associates directly with CENP-T. CENP-W members are histone-fold proteins. The histone fold region is critical for binding to centromeric DNA. Importantly, the CENP-T-W complex does not directly associate with CENP-A, but with histone H3 in the centromere region. CENP-T and -W form a hetero-tetramer with CENP-S and -X and bind to a ~100 bp region of nucleosome-free DNA forming a nucleosome-like structure. The DNA-CENP-T-W-S-X complex is likely to be associated with histone H3-containing nucleosomes rather than with CENP-nucleosomes.. +PF15511 Centromere kinetochore component CENP-T
Pfam-B_9162 (release 26.0). CENP-T is a family of vertebral kinetochore proteins that associates directly with CENP-W. The N-terminus of CENP-T proteins interacts directly with the Ndc80 complex in the outer kinetochore. Importantly, the CENP-T-W complex does not directly associate with CENP-A, but with histone H3 in the centromere region. CENP-T and -W form a hetero-tetramer with CENP-S and -X and bind to a ~100 bp region of nucleosome-free DNA forming a nucleosome-like structure. The DNA-CENP-T-W-S-X complex is likely to be associated with histone H3-containing nucleosomes rather than with CENP-nucleosomes.. +PF15512 Chromatin assembly factor complex 1 subunit p60, C-terminal
Pfam-B_74766 (release 26.0). CAF-1_p60_C is a family of vertebral proteins that is involved in chromatin assembly. CAF-1_p60 is one of the three subunits of the CAF-1 complex, and this domain binds to the C-terminal region of CAF-1_p150, family Pfam:PF12253. The N-terminal part of the CAF-1_p60 proteins is a WD-repeat structure, Pfam:PF00400.. +PF15513 Domain of unknown function (DUF4651)
JCSG target SP18156A . family of short, secreted proteins specific to the Streptococcus genus, with distant homologs, not recognized by this HMM, found in other cocci. In all sequenced genomes, proteins from this family appear in a conserved genomic context with an thioredoxin, tRNA synthase and tRNA binding protein, but the functional implication of this is unclear. +PF15514 Restriction endonuclease ThaI
This family of restriction endonucleases belongs to the PD-(D/E)XK superfamily. It cuts the recognition site CG^CG leaving blunt ends .. +PF15515 MvaI/BcnI restriction endonuclease family
This family of proteins includes the restriction endonucleases MvaI and BcnI. These enzymes both function as monomers. MvaI cleaves the sequence CC/WGG, where W is an A or a T nucleotide, leaving sticky ends. BcnI cleaves the sequence CC/SGG, where S is G or C, leaving sticky ends [1-2].. +PF15516 BpuSI N-terminal domain
This is the N-terminal (nuclease) domain of the BpuSI restriction endonuclease .. +PF15517 TBP-interacting protein N-terminus
This is the N-terminal restriction endonuclease-like domain found in several archaeal TATA-binding protein (TBP)-interacting proteins .. +PF15518 L protein N-terminus
This endonuclease domain is found at the N-terminus of many bunyavirus L proteins .. +PF15519 linker between RRM2 and RRM3 domains in RBM39 protein
Jackhmmer:Q14498, residues 339-411. A conserved linker between the second and the third RRM domain in human RBM39 (CAPER) protein, also present in other RNA binding proteins, especially those involved in RNA splicing. This linker was implicated in interactions with ESR1 and ESR2. Preliminary results from JCSG suggest that this is a structured domain with a well defined fold.. +PF15520 Putative toxin 40
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the type 2 secretion system . . +PF15521 Putative toxin 41
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin contains two structural domains, an N-terminal alpha/beta domain and a C-terminal all-beta domain. The domain contains conserved GxR, RxxxoH GxE and GxxH motifs and a conserved histidine residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the Photorhabdus virulence cassette (PVC)-type export system . . +PF15522 Putative toxin 42
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system . . +PF15523 Putative toxin 44
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha helical fold and conserved [DNE]xxH motif and arginine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, or Photorhabdus virulence cassette (PVC)-type secretion system . . +PF15524 Putative toxin 45
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly all-beta fold and a conserved ExD motif and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 7 or TcdB/TcaC-type secretion system . . +PF15525 Domain of unknown function (DUF4652)
JCSG target SP18005A. This family of uncharacterised proteins from Clostridia and Bacilli classes has an unusual structure of three beta propeller repeats that do not form a barrel, as in well known 6-, 7- etc beta propeller barrels, but instead are stacked in a three-layer beta-sheet sandwich. The function of all the proteins from this family is unknown.. +PF15526 Putative toxin 46
An RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, with two conserved lysine residues and and [DS]xDxxxH, RxG[ST] and RxxD motifs. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 4, type 5 or type 7 secretion system . This is also referred to as the E. cloacae CdiAC and has been shown to target tRNAs. . +PF15527 Putative toxin 47
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly beta fold and two conserved histidines, two aspartates and a glutamate residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 5 secretion system . . +PF15528 Putative toxin 48
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved ND and DxxR motifs and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or TcdB/TcaC secretion system . . +PF15529 Putative toxin 49
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved ND and DxxR motifs and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or TcdB/TcaC secretion system. Interestingly, the toxin is also found in type-II toxin-antitoxin systems . . +PF15530 Putative toxin 50
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly all-beta fold and conserved FGPY motif and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 5 secretion system . . +PF15531 Putative toxin 51
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and conserved aspartate and glutamate residues, and an RxW motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system . . +PF15532 Putative toxin 53
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and two conserved histidines present in an RxH and THIP motif. The domain additionally has a highly conserved arginine residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6 or type 7 secretion system . . +PF15533 Putative toxin 54
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and [DN]xHxxK and DxxxD motifs. It is usually exported by the Type 2 secretory system . . +PF15534 Putative toxin 56
A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains a conserved histidine residue and a KH motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2 secretion system . . +PF15535 Putative toxin 57
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and a conserved glutamate residue, and [KR] and Hx[DH] motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system . . +PF15536 Putative toxin 58
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved aspartate, arginine, histidine and cysteine residues that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system . . +PF15537 Putative toxin 59
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold with two conserved histidine residues. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2 or TcdB/TcaC-type secretion system . A of this, the Pseudomonas RhsT-C has been experimentally characterized. . +PF15538 Putative toxin 61
A predicted toxin domain found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold with a conserved glutamine residue and a [KR]STxxPxxDxx[ST] motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system . . +PF15539 CAF1 complex subunit p150, region binding to CAF1-p60 at C-term
CAF1-p150_C2 is part of the binding region of the CAF1 complex p150 subunit to the p60 subunit. The CAF1 complex is essential in human cells for the de novo deposition of histones H3 and H4 at the DNA replication fork [1,2].. +PF15540 Putative toxin 62
A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains a two conserved aspartates, a glutamate, a histidine and an arginine residue and an RT motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6 or type 7 secretion system . . +PF15541 Putative toxin 63
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system . . +PF15542 Putative toxin 64
A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains two conserved histidine, a serine, two lysine, and a threonine residue and a HxVP motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6, type 7, and MuF-type secretion system . . +PF15543 Putative toxin 65
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system . . +PF15544 Putative toxin 66
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system . . +PF15545 Putative toxin 67
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and HxR and HxxxH motifs that is usually exported by the type 2 and type 6 secretion system . . +PF15546 Domain of unknown function (DUF4653)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 93 and 229 amino acids in length.. +PF15547 Domain of unknown function (DUF4654)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 169 amino acids in length. There is a conserved IDC sequence motif.. +PF15548 Domain of unknown function (DUF4655)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 533 and 570 amino acids in length.. +PF15549 DPPA3;
PGC7/Stella/Dppa3 domain . Iyer LM, Aravind L, Eberhardt RY, Coggill P. The domain belongs to a fast evolving family known only from the placental mammals [1-3]. The PGC7/Stella/Dppa3 protein protects imprinted regions from demethylation post-fertilization . This suggests that it might bind methylated DNA sequences directly . The conserved core includes a postively charged helical segment and a C-terminal CXCXXC motif that is predicted to chelate a metal ion . Most placental mammals contain 3-6 paralogs of this domain family. The CXCXXC motif is also conserved in a subset of fungal MBD4-like proteins .. +PF15550 Draxin
Eberhardt RY, Coggill P. This family of proteins inhibit Wnt signaling and act as chemorepulsive axon guidance molecules [1-2].. +PF15551 Domain of unknown function (DUF4656)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 286 and 398 amino acids in length.. +PF15552 Domain of unknown function (DUF4657)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 305 and 370 amino acids in length.. +PF15553 Testis-expressed protein 19
Eberhardt RY, Coggill P. This family of proteins is expressed in testis .. +PF15554 FSIP1 family
Eberhardt RY, Coggill P. +PF15555 Domain of unknown function (DUF4658)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 161 amino acids in length.. +PF15556 ZW10 interactor
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 127 and 281 amino acids in length.. +PF15557 CAF1 complex subunit p150, region binding to PCNA
CAF1-p150_N is part of the N-terminus of the CAF1 complex p150 subunit that binds to PCNA - proliferating cell nuclear antigen. The PCNA mediates the connection between CAF-1 and the DNA replication fork. The CAF1 complex is essential in human cells for the de novo deposition of histones H3 and H4 at the DNA replication fork [1,2].. +PF15558 Domain of unknown function (DUF4659)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 427 and 674 amino acids in length. There are two completely conserved residues (D and I) that may be functionally important.. +PF15559 Domain of unknown function (DUF4660)
Eberhardt RY, Coggill P. This family of proteins is found in eukaryotes. Proteins in this family are typically between 93 and 189 amino acids in length.. +PF15560 Immunity protein 8
A predicted immunity protein with an alpha+beta fold and several conserved charged and hydrophobic residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family . The protein is also found in heterogeneous polyimmunity loci. . +PF15561 Immunity protein 9
A predicted immunity protein with an alpha+beta fold and several conserved polar and hydrophobic residues. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems . . +PF15562 Immunity protein 10
A predicted immunity protein with two transmembrane helices, and a WxW motif and a conserved arginine between the two helices. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems . . +PF15563 Immunity protein 11
A predicted immunity protein with an alpha+beta fold and a conserved HxxRN motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems . . +PF15564 Immunity protein 13
A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in heterogeneous polyimmunity loci of polymorphic toxin systems . +PF15565 Immunity protein 16
A predicted immunity protein with a mostly alpha-helical fold and a conserved DxG motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-SHH family of HNH/Endonuclease VII fold nucleases . . +PF15566 Immunity protein 18
A predicted immunity protein with an alpha+beta fold and a conserved histidine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox12 or Ntox37 or Notx 7 families . . +PF15567 Immunity protein 19
A predicted immunity protein with an alpha+beta fold and a conserved tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a protease domain such as Tox-PL1 and Ntox40. In some instances, it is also fused to a papain-like toxin, ADP-ribosyl glycohydrolase and a S8-like peptidase . Based on these associations the domain is likely to be a protease inhibitor. . +PF15568 Immunity protein 20
A predicted immunity protein with an alpha+beta fold and conserved GR, and GxK motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family of nucleases . . +PF15569 Immunity protein 21
A predicted immunity protein with an alpha+beta fold and conserved phenylalanine and tryptophan residues and a GGD motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox19 family . . +PF15570 Immunity protein 24
A predicted immunity protein with an alpha+beta fold with conserved tryptophan, proline, aspartate, serine and arginine residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-AHH family of HNH/Endonuclease VII fold nucleases . The gene for this toxin is also found in heterogeneous polyimmunity loci. . +PF15571 Immunity protein 25
A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI1, Tox-URI2 or Tox-ParBL1 families . The gene for this toxin is also found in heterogeneous polyimmunity loci that show variations in structure even between closely related strains. . +PF15572 Immunity protein 26
A predicted immunity protein with an alpha+beta fold and a conserved C-terminal tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-ColE3 family . . +PF15573 Immunity protein 27
A predicted immunity protein with an alpha+beta fold and a conserved KxGDxxK motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems . . +PF15574 Immunity protein 28
A predicted immunity protein with an all alpha-helical fold and a conserved HRG motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems . . +PF15575 Immunity protein 29
A predicted immunity protein with an all alpha-helical fold and a conserved proline residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-REAse-1 or Tox-REase-6 families . . +PF15576 Domain of unknown function (DUF4661)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are typically between 281 and 302 amino acids in length.. +PF15577 Spc7_C2
Spc7_C2 is a short family to the C-terminus of fungal Spc7 proteins. The Ndc80-MIND-Spc7 complex plays a role in kinetochore function during late meiotic prophase and throughout the mitotic cell cycle . The N-terminal region of Spc7 co-localises with the mitotic spindle, and it has been argued that Spc7 has the potential to associate with spindle microtubules and that this association is regulated by the C-terminal part of the Spc7 protein [2,3]. However, this family represents only the conserved region towards the end of the C-terminus; the majority of the C-terminal part is in family Spc7, Pfam:PF08317.. +PF15578 Domain of unknown function (DUF4662)
Eberhardt RY, Coggill P, Hetherington K. This family of proteins is found in eukaryotes. Proteins in this family are approximately 290 amino acids in length.. +PF15579 Immunity protein 32
A predicted immunity protein with an alpha+beta fold and conserved tryptophan and phenylalanine residues, and a GT motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-REase-5 family .. +PF15580 Immunity protein 33
A predicted immunity protein with an alpha+beta fold and a conserved tryptophan, and WE and PGW motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox24 or Ntox10 families .. +PF15581 Immunity protein 35
A predicted immunity protein with an alpha+beta fold and YxxxD, WxG, KxxxE motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene .. +PF15582 Immunity protein 40
A predicted immunity protein with an alpha+beta fold and a conserved YxC motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-JAB1 family . The immunity protein typically contains a signal peptide and a lipobox.. +PF15583 Immunity protein 41
A predicted immunity protein with an alpha+beta fold and a conserved glutamate residue. The domain is often fused to one or more immunity domains in polyimmunity proteins . . +PF15584 Immunity protein 44
A predicted immunity protein with a mostly all-beta fold and GxxE, WxDxRY motifs and a glutamate residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox48 family. This domain is often fused to the Imm71 immunity domain .. +PF15585 Immunity protein 46
A predicted immunity protein with an alpha+beta fold and a conserved GxaG motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a Tox-REase-3 domain . . +PF15586 Immunity protein 47
A predicted immunity protein with an alpha+beta fold and a conserved Wea (a: aromatic) motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox7 family . . +PF15587 Immunity protein 48
A predicted immunity protein with an alpha+beta fold and a conserved lysine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family . The protein is also found in heterogeneous polyimmunity loci.. +PF15588 Immunity protein 7
A predicted immunity protein with a mostly all-beta fold and a conserved arginine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a Pput_2613 deaminase domain . The protein is also found in heterogeneous polyimmunity loci. . +PF15589 Immunity protein 12
A predicted immunity protein with an alpha+beta fold and conserved WxG and YxxxC motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the NGO1392-family of HNH/Endonuclease VII fold nucleases . . +PF15590 Immunity protein 15
A predicted immunity protein with an alpha+beta fold and a conserved aspartate and GGxP motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox10 or Tox-ParB families .. +PF15591 Immunity protein 17
A predicted immunity protein with a mostly all-beta fold and a conserved GxS motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox17 or Ntox7 families . . +PF15592 Immunity protein 22
A predicted immunity protein with an alpha+beta fold and a conserved SF motif and tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox21, Ntox29 or Tox-ART-RSE-like ADP-ribosyltransferase families . . +PF15593 Immunity protein 23
A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox18 family . . +PF15594 Immunity protein 30
A predicted immunity protein with an all-beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-HHH or Ntox24 families .. +PF15595 Immunity protein 31
A predicted immunity protein with an alpha+beta fold and a conserved tryptophan and Dx[DE] motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-RES or Tox-URI1 families. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems .. +PF15596 Immunity protein 34
A predicted immunity protein with a mostly alpha-helical fold and conserved aspartate and cysteine residues and an SE motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the LD-peptidase or Tox-Caspase families .. +PF15597 Immunity protein 36
A predicted immunity protein with an alpha+beta fold and a conserved [DE]R motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox13 or Ntox40 families . In some proteins this domain is fused to the Imm38 -like (PFAM:PF15599) immunity domain.. +PF15598 Immunity protein 37
A predicted immunity protein with an alpha+beta fold and a conserved arginine. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox40 family .. +PF15599 Immunity protein 38
A predicted immunity protein with an alpha+beta fold and a conserved E+G and ExxY motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox40, Tox-CdiAC and Tox-ARC families . The protein is also found in polyimmunity loci in polymorphic toxin systems.. +PF15600 Immunity protein 39
A predicted immunity protein with an alpha+beta fold and a conserved DxEA motif and arginine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-ColD family .. +PF15601 Immunity protein 42
A predicted immunity protein with an alpha+beta fold and conserved tyrosine and tryptophan residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-REase-10 family .. +PF15602 Immunity protein 43
A predicted immunity protein with a mostly alpha-helical fold and conserved arginine and phenylalanine residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox48 family . This domain is often fused to the Imm72 immunity domain.. +PF15603 Immunity protein 45
A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-ARC family. This domain is also found in heterogeneous polyimmunity loci .. +PF15604 Putative toxin 43
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a most all-alpha helical fold and a conserved HxxD motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion systems . . +PF15605 Putative toxin 52
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all alpha-helical fold and conserved aspartate and glutamate residues, and K[DE] and[DN]HxxE motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5 or type 7 secretion system . . +PF15606 Putative toxin 55
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha helical fold and conserved lysine and cysteine residues, and GNxxD and WxCxH motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system . . +PF15607 Putative toxin 60
A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha-helical fold with conserved DxK, GNxxxG, and DxxxD motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6 or type 7 secretion system . . +PF15608 PELOTA RNA binding domain
This RNA binding Pelota domain is at the C terminus of a PRTase family . These PRTase+Pelota genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo-nucleoside involved in stress response . . +PF15609 Phosphoribosyl transferase
This PRTase family, with C terminal TRSP domain, are related to OPRTases , and are predicted to use Orotate as substrate. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response .. +PF15610 PRTase ComF-like
This PRTase family is related to the ComF PRTases . These genes are found in the smaller biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response .. +PF15611 EH_Signature domain
This domain with a strongly conserved glutamate at the N-terminus and a histidine at the C-terminus , is found in a SWI2/SNF2 four gene operon . Its strict-neighborhood association with\. SWI2/SNF2 ATPase strongly suggests a function in conjunction with it . The other genes in the operon are a OmpA protein and a TM protein . This has a DNA related function along with the TerY-P triad .. +PF15612 WSTF, HB1, Itc1p, MBD9 motif 1
A conserved alpha helical motif that along with the WHIM2 and WHIM3 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins .Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins . The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes . The conserved basic residue in WHIM1 is involved in packing with the DDT motif. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA . . +PF15613 WSTF, HB1, Itc1p, MBD9 motif 2
A conserved alpha helical motif that along with the WHIM1 and WHIM3 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins . Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins . The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes . The acidic residue from the GxD signature of WHIM2 is a major determinant of the interaction between the ISWI and WHIM motifs. The N-terminal portion of the WHIM2 motif also contacts the inter-nucleosomal linker DNA. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA .. +PF15614 WSTF, HB1, Itc1p, MBD9 motif 3
A conserved alpha helical motif that along with the WHIM1 and WHIM2 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins . Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins . The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes . WHIM3 along with WHIM2-N constitutes the inter-nucleosomal linker DNA binding site in the major groove of DNA. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA .. +PF15615 TerB-C domain
TerB-C occurs C terminal of TerB in TerB-N containing proteins. This domain displays multiple conserved acidic residues (TerBC) . The presence of conserved acidic residues in both TerB-N and TerB-C suggests that they, like the TerB domain, might also chelate metals. These two domains might also occur together in the same protein independently of TerB .. +PF15616 TerY-C metal binding domain
TerY-C is found C terminal to TerY-like vWA domains in some proteins . It has 8 conserved metal chelating cysteines or histidines . It occasionaly occurs as solos .. +PF15617 C-C_Bond_Lyase of the TIM-Barrel fold
This family of TIM-Barrel fold C-C bond lyase is related to Citrate -lyase. These genes are found in the biosynthetic operon, with other enzymatic domains, associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response .. +PF15619 Ciliary protein causing Leber congenital amaurosis disease
Eberhardt RY, Coggill P, Hetherington K. Lebercilin is a family of eukaryotic ciliary proteins. Mutations in the gene, LCA5, are implicated in the disease Leber congenital amaurosis. In photoreceptors, lebercilin is uniquely localised at the cilium that bridges the inner and outer segments. Lebercilin functions as an integral element of selective protein transport through photoreceptor cilia. Lebercilin specifically interacts with the intraflagellar transport (IFT), and disruption of IFT can lead to Leber congenital amaurosis.. +PF15620 Centromere assembly component CENP-C middle DNMT3B-binding region
Pfam-B_64009 (release 26.0). CENP-C is a component of the centromere assembly complex in eukaryotes. CENP-C recruits the DNA methyltransferases DNMT3B, in order to establish the necessary epigenetic DNA-methylation essential for maintenance of chromatin structure and genomic stability. This middle region of CENP-C is the binding-domain for DNMT3B. Binding of CENP-C and DNMT3B to DNA occurs at both centromeric and peri-centromeric satellite repeats. CENP-C and DNMT3B regulate the histone code in these regions [1,2].. +PF15621 Proline-rich submaxillary gland androgen-regulated family
Eberhardt RY, Coggill P, Hetherington K. SMR is a family of proteins found in eukaryotes. The family of SMR proteins is expressed in the submaxillary gland. SMR members may play a role in protection or detoxification.. +PF15622 Kinetochore assembly subunit CENP-C N-terminal
Pfam-B_21609 (release 26.0). CENP-C is a vertebrate family that forms a core component of the centromeric chromatin. On depletion of CENP-C proper formation of both centromeres and kinetochores is prevented. The N-terminal of CENP-C is necessary for recruitment of some but not all components of the Mis12 complex of the kinetochore [1,2].. +PF15623 Cancer/testis gene family 47
Eberhardt RY, Coggill P, Hetherington K. CT47 is a family of proteins found in eukaryotes. Proteins in this family are typically between 262 and 291 amino acids in length. There is a conserved HIL sequence motif. The function of this family is not known.. +PF15624 Kinetochore CENP-C fungal homologue, Mif2, N-terminal
PB002175 (release 26.0). Mif2_N is a family of fungal proteins homologous to mammalian CENP-C. On depletion of CENP-C proper formation of both centromeres and kinetochores is prevented. The N-terminal of CENP-C is necessary for recruitment of some but not all components of the Mis12 complex of the kinetochore [1,2].. +PF15625 CC2D2A N-terminal C2 domain
Many ciliary proteins are involved in ciliogenesis and implicated for ciliophathies. A recent study has shown that many of them contain various new versions of C2 domains which are predicted to mediate membrane localizations for Y-shaped linkers of transition\. zone of cilia . This is the first C2 domain of ciliary CC2D2A proteins which also have another C2 domain (CC2D2AC-C2) and a new inactive transglutaminase-like peptidase domain (CC2D2A-TGL).. +PF15626 single CXXC unit
This is a solo version of the zf-CXXC domain with a conserved CXXCXXCX(n)C, zinc-binding motif. This is, thus far, only detected in the plant lineage in diverse chromatin proteins . Structural comparisons show that the mono-CXXC is homologous to the structural- zinc binding domain of medium chain dehydrogenases . The regular zf-CXXC domain binds nonmethyl-CpG dinucleotides.. +PF15627 CEP76 C2 domain
Many ciliary proteins are involved in ciliogenesis and implicated for ciliophathies. A recent study has shown that many of them contain various new versions of C2 domains which are predicted to mediate membrane localizations for Y-shaped linkers of transition zone of cilia . This is the new C2 domain that is contained by ciliary CEP76 proteins .. +PF15628 RRM in Demeter
This is a predicted RRM-fold domain present at the C-terminus of Demeter-like glycoslyases . These proteins are involved in DNA demethylation in plants where they catalyze removal of the 5mC base and subsequently cleave the backbone through lyase activity. Orthologs of Demeter are present in plants and stramenopiles. The RRM fold domain is predicted to facilitate interaction of the catalytic domain with ssDNA or regulatory RNA .. +PF15629 Permuted single zf-CXXC unit
This is a permuted version of a single unit of the zf-CXXC domain that is detected in the Demeter-like proteins of land plants. Structural comparisons show that the mono-CXXC is homologous to the structural-zinc binding domain of medium chain dehydrogenases . The classical zf-CXXC domain binds nonmethyl-CpG dinucleotides.. +PF15630 Kinetochore component CENP-S
CENP-S is a family of vertebral and fungal kinetochore component proteins. CENP-S complexes with CENP-X to form a stable CENP-T-W-S-X heterotetramer.. +PF15631 NTF2 fold immunity protein
A predicted immunity protein of the NTF2 fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-NucA family . This domain is also fused to ankyrin repeats and the PFAM:PF14025.. +PF15632 ATP-grasp in the biosynthetic pathway with Ter operon
This ATP-grasp family is related to carbamoyl phosphate synthetase. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo-nucleoside involved in stress response . In press. Mol. BioSyst. 2012, DOI:10.1039/C2MB25239B. "Ter-dependent stress response systems: novel pathways related to metal sensing, production of a nucleoside-like metabolite, and DNA-processing" Anantharaman V, Iyer LM, Aravind L;. +PF15633 HYD1 signature containing ADP-ribosyltransferase
A predicted toxin of the ADP-ribosyltransferase superfamily present in bacterial polymorphic toxin systems. The domain has characteristic histidine, tyrosine and aspartate residues that comprise the active site. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, or type 7 secretion system .. +PF15634 HYE1 signature containing ADP-ribosyltransferase
A predicted toxin of the ADP-ribosyltransferase superfamily present in bacterial polymorphic toxin systems. The domain has characteristic histidine, tyrosine and glutamate residues that comprise the active site .. +PF15635 GHH signature containing HNH/Endo VII superfamily nuclease toxin 2
A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic s[AGP]HH signature motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type secretion system .. +PF15636 GHH signature containing HNH/Endo VII superfamily nuclease toxin
A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic sG[HQ]H signature motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, type 7 or TcdB/TcaC-type secretion system. The metazoan teneurin proteins possess an inactive of this domain at their C-terminus .. +PF15637 HNH/Endo VII superfamily nuclease toxin with a HHH motif
A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with characteristic conserved s[GD]xxR and HHH motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion system .. +PF15638 Metallopeptidase toxin 2
A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems .. +PF15639 Metallopeptidase toxin 3
A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems .. +PF15640 Metallopeptidase toxin 4
A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems .. +PF15641 Metallopeptidase toxin 5
A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems .. +PF15642 Toxin in Odyssella and Amoebophilus
A predicted all-alpha fold toxin present in bacterial polymorphic toxin systems of the endosymbionts Odyssella and Amoebophilus .. +PF15643 Papain fold toxin 2
A papain fold toxin domain found in bacterial polymorphic toxin systems . . +PF15644 Papain fold toxin 1
A papain fold toxin domain found in bacterial polymorphic toxin systems. In these systems they might function either as a releasing peptidase or toxin .. +PF15645 Dermonecrotoxin of the Papain-like fold
A papain fold toxin domain found in bacterial polymorphic toxin systems . . +PF15646 Restriction endonuclease fold toxin 2
A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 7 or PrsW-peptidase dependent secretion system .. +PF15647 Restriction endonuclease fold toxin 3
A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, type 7 or PrsW-peptidase dependent secretion system .. +PF15648 Restriction endonuclease fold toxin 5
A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, or PrsW-peptidase dependent secretion system. Versions of this domain are also found in caudoviruses .. +PF15649 Restriction endonuclease fold toxin 7
A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, or type 7 secretion system .. +PF15650 Restriction endonuclease fold toxin 9
A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system .. +PF15651 Salivary glad secreted protein domain toxin
An alpha+beta fold domain with four conserved cysteine residues and a conserved [DE}xx[ND] motif. This domain is mainly present at the c-terminus of RHS repeats containing proteins in insects and crustaceans. Although no bacterial homologs have been identified, the domain architecture suggests an origin from bacterial polymorphic toxin systems . . +PF15652 HNH/Endo VII superfamily toxin with a SHH signature
A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with two conserved histidine residues. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6 or type 7 secretion system .. +PF15653 URI fold toxin 2
A predicted toxin of the URI nuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system .. +PF15654 Toxin with a conserved tryptophan and TIP tripeptide motif
A predicted toxin domain with two membrane spanning alpha helices and RxxR, Wx[ST]IP motifs. The domain is present in bacterial polymorphic toxin systems. The toxin is usually exported by the type 2 or Photorhabdus virulence cassette (PVC)-type secretion system .. +PF15655 NTF2 fold immunity protein
A predicted immunity protein of the NTF2 fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-JAB-2 family .. +PF15656 Toxin with a H, D/N and C signature
A predicted alpha/beta fold peptidase domain with a strongly conserved triad of a histidine, aspartate/asparagine and cysteine residues that are predicted to comprise the active site of the predicted peptidase. Proteins bearing this predicted toxin domain are particularly common in both intracellular and extracellular pathogens .. +PF15657 HNH/Endo VII superfamily nuclease toxins
A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic conserved [ED]H motif and two histidine residues. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion system .. +PF15658 Latrotoxin C-terminal domain
A toxin domain present in arthropod alphaproteobacterial, gammaproteobacterial endosymbionts and also at the C-termini of the latrotoxins of the black widow spider. The domain is characterized by a conserved, hydrophobic helix and is predicted to associate with the cell membrane .. +PF15659 JAB-like toxin 1
+PF15660 Immunity protein 49
diff -r 000000000000 -r 68a3648c7d91 pfam_annot/pfamA.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/pfamA.txt Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,14831 @@ +1 PF00389 2-Hacid_dh 2-Hacid_DH; D-isomer specific 2-hydroxyacid dehydrogenase, catalytic domain Finn RD, Griffiths-Jones SR anon Prosite Domain This family represents the largest portion of the catalytic domain of 2-hydroxyacid dehydrogenases as the NAD binding domain is inserted within the structural domain. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null --hand HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.50 0.71 -4.69 98 16860 2012-10-02 14:31:05 2003-04-07 12:59:11 25 50 4524 180 4691 12854 6366 308.20 17 85.12 CHANGED lllhp....sh..pptshphlcc.........plphtp....shsp-..clhcthps..s-ulhstsps.....plspcll.pth..spLKlluptusGhDslDlcsAsc+GIhVsNsPs.ssspulAEhsluhllulsRclspspppl+pGpapppthhshphtspshsllGhsthGttssthtpthththhhhshhhs.pttpttthhhht.thhh...................psspllshps.tshppthhhtpptttthhsshhlsssttsshhststhtshtpptthsssshssppp.sshtp.LhshsNVllTPHluus..TpEAppshutpsspslhphhpG....psstssVs .......................................................................................h.t.hpt..............h..pl.phhp.............hspp....ph..h....p...t..l...t.s...........s..-..s.l.h..s.p.st.s...........l..s.p..c..ll...pth............spL+..hl.u....p....h.u...s.G....h....-....s..l....D....l.s.s....A...p.c..........p...........G...........IhV............s........N.s......P.s......s...s..s....p......u.....V....A..EhsluhlLshsRplspsppph+pGpapttshhshthtstphtslshsthsthhtthtpthththhhh.hh.ttttttttthh..t..h..........................................................................................................................................................tst.hh.htshts.ppthhhspphhthhhsshhlhssttsshhppshhtshhpttthsssshss.tptsshtpsLhphsNVllTPHluus..T.tE.up.pp.hs.p...t.s.hpslh.p.hh.ps......t....p.l.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 1361 2755 3878 +2 PF00198 2-oxoacid_dh 2-oxoacid dehydrogenases acyltransferase (catalytic domain) Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain These proteins contain one to three copies of a lipoyl binding domain followed by the catalytic domain. 23.00 23.00 23.10 23.50 22.90 22.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.30 0.70 -5.09 100 10039 2012-10-02 12:01:53 2003-04-07 12:59:11 18 49 3997 65 2920 7770 5825 224.80 35 47.20 CHANGED ssstpplPlsshR+slAcphspStp.shPphshs...s-l-hspLhplRp.p..................lppph..tt........KlohhshllKAsuhALccaPhlNush..s..s-s.....llhpcplslGlAVsos.....pG..LlVPVl+sscptulhplupclpcLsp+ARss+.Lpss-hpG.GTFTlSNlGsh.G.sphhoPIIN..PpsAILulGpl.pcpP.ls.tssp.........lshpph.....hsloLohDHRllDGAsuucFlppl+.chLE..sPttll.l .......................................................t....ppl.h.stlR+tlApphhcuhp.ss.s..p...lThh...s-lDhst...lhshRp.p.......................................hpp.t.htcpps..............KLohhsahsKA.lstAL..+..c.a.P.tlNuuh.........s...............scs........llh+ph...hs..lG.......lAV..s...T..s............pG.....L.lV..P...V.l+..ss...-.p.h.ultpls.p.........c.l...pcLup....+.......AR.cG...K..Ls.....s...p-h..pG...GTFTI....oNhG...s........h....G..uh..h..T.PI....l.Ns..Pps.A.I.L.G.luph..tp+......P.ls....hssp................................lshcsh...h.LuLSaDHRllDGtpuu.pFLspl+.phLE..sPtthl............................. 0 940 1801 2459 +3 PF04029 2-ph_phosp 2-phosphosulpholactate phosphatase Kerrison ND, Finn RD anon COG2045 Family Thought to catalyse 2-phosphosulpholactate = sulpholactate + phosphate. Probable magnesium cofactor. Involved in the second step of coenzyme M biosynthesis. Inhibited by vanadate in Methanococcus jannaschii. Also known as the ComB family [1]. 21.70 21.70 24.00 23.70 21.40 20.80 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.39 0.70 -5.40 61 532 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 495 17 238 524 260 226.60 27 93.52 CHANGED plplhhsscth.tt........pssVVIDVLRAToTIssALpsGAc.tlhPsssl--Ahthtpt......cshLluGE.RsGh+ltGFDhGNSPh-hp......tppl.....p.GKpllhoTTNGT+Alpcsp.sAp..pllhuuhlNtpAVschltpt.....scs...lhlVsuGhp.GpaolEDhlsAGhlhptLhpptt...........thsDtshsAhtlapp......ttshhphlppusHup+LtpLGh....pcDlca.CsphDhhslVPhh..psGhlhtt .............................h.....................spsslVlDVLRAooslssAlssGAp.....pl.hss..t.sh..-.c.Ahthtpp.............pshllsGE..Rsu..h..pl.p.......GFDh.uNSPhphp...................t..ptl.......p..G.+p.llhoTTNG.Tpu.lppst...pu..p....plls.u.uhhNspAlAchltpt.......scs..............lhllsuG..p..c.........G..........p...........h...........ul...............EDhlsAGhlhptLtpptt..............................hsDtuhsAhthapp...............ttshh.p.hlp..p.uspupcLhp.h.Gh........pcDlph..Cs.ph.chhshVPhh..ptt....th...................................................... 0 95 178 222 +4 PF03171 2OG-FeII_Oxy 2OG-Fe(II) oxygenase superfamily Aravind L anon Aravind L Domain This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily [1]. This family includes the C-terminal of prolyl 4-hydroxylase alpha subunit. The holoenzyme has the activity EC:1.14.11.2 catalysing the reaction: Procollagen L-proline + 2-oxoglutarate + O2 <=> procollagen trans- 4-hydroxy-L-proline + succinate + CO2. The full enzyme consists of a alpha2 beta2 complex with the alpha subunit contributing most of the parts of the active site [3]. The family also includes lysyl hydrolases, isopenicillin synthases and AlkB. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.51 0.72 -3.71 138 7390 2012-10-10 13:59:34 2003-04-07 12:59:11 15 72 1609 66 3461 10006 2827 101.90 28 28.36 CHANGED sspthplspYs...................thshuhssHsDs...................shlTllhp...........psuGLplhpps.....................thhsssshss..uhllshG-.h.hhossthpushH+lhssp...............tpsRhShshFhps ..................................................................s...hpl.tYP.h.........................................psphshG.h...s.s.....Ho.Dh........................................................shl.TlLhp.......................................sps.uGLQ..l..hpps.............................................................pWls.l.s.s.h.s......s.....u.......hllN.l.G.......Dh..h.....p............h...h.......o.....N.....u.....t..h......c......S....s...h...HRVhsst....................................ttsRh.SlshFhp........................................................ 0 752 2020 2858 +5 PF01073 3Beta_HSD 3-beta hydroxysteroid dehydrogenase/isomerase family Finn RD, Bateman A anon Pfam-B_504 (release 3.0) Family The enzyme 3 beta-hydroxysteroid dehydrogenase/5-ene-4-ene isomerase (3 beta-HSD) catalyses the oxidation and isomerisation of 5-ene-3 beta-hydroxypregnene and 5-ene-hydroxyandrostene steroid precursors into the corresponding 4-ene-ketosteroids necessary for the formation of all classes of steroid hormones. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.68 0.70 -5.70 12 1802 2012-10-10 17:06:42 2003-04-07 12:59:11 14 22 886 0 831 37083 19903 231.50 24 68.31 CHANGED lVTGGuGFlGppIlphLlptc..ltElRlhD......hthssph.p.chtphpst....hlpGDlpDtpplcpAhpGsslVIHTAullDlhG.....hhpppplhcVNlpGTpsll-AChpsuVphhlYTSShpVlGPN.hucslhsGcEppsapss..apcsYspSKthAEchVLtANG..h+sGu.phhTCALRP.hIaGcGsphlhstl.pshcps.hhhthuptpsh.s.VYVsNlAWuHlLAA+sLpss......tstltGphYalsDsoPpppYt-hshplh+shGhchss.h....hPh .......................................................................................hVTGus..G...F..l...G.......t..p...l..l.......p..t.L.....l...p......p......s.............................h..........p....l......p.....s.....h....c.......................................h..t........t..............t................................................t..........t..h..t.....................hh.p...u...D.......l.......p.........s...............p.........t.......l.........t...........p..............A..........h...............p...............s...............s...............c...........s.............V.......h.......H.......s..........A...........u.............s...........h...h.s............................s...t.....p...........h.......h....p...s.....N.....l.......p....G.......T.....p..s...l.....l........c..............A.........s............t............p...............s..............s................V..............p.............p.......l..........V.........a........T...S...o......h..........s......s............l...........h.............s..................................................t...........p...........s............l.........h..............s.........s................s...............E.............p..........h........s........h................................h.........s..........h........Y........s.....p......o...K....h....h........A..E.......p....h....l........h.........t.........t.........s.....t.............................................................t...........................h....h.....o..s..s.....l...R........P...h........h..I........a..........G.......................s...........p...........p........................h........h..........s..........t............h.......h........p........h..........h.......p.........t............t........................h..........h.......h.......h.......s.........................p............................h......s..........h.....s....a........l...............t......N...l..s........h......H...l..........h.........A....h..........p....t......h...............................................G.....p.................a........h.....s.........t.................h..............................h........................................................................................................................................................................................................ 0 251 444 652 +6 PF04419 4F5 4F5 protein family Bateman A, Wood V anon Wood V Family Members of this family are short proteins that are rich in aspartate, glutamate, lysine and arginine. Although the function of these proteins is unknown, they are found to be ubiquitously expressed [1]. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild --amino -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -7.95 0.72 -3.22 40 536 2012-10-03 11:22:52 2003-04-07 12:59:11 9 10 268 1 333 500 2 37.30 34 47.06 CHANGED MuRGsQ+ptsRc+stKKptt.t......spsspsphpssp+t..p .....MuRGsQ+chuRpKNtKKpsp.t......t+ptp-thsssp+t..p.............................. 0 100 162 256 +7 PF03061 4HBT Thioesterase superfamily Bateman A anon Pfam-B_2758 (release 6.4) Domain This family contains a wide variety of enzymes, principally thioesterases. This family includes 4HBT (EC 3.1.2.23) which catalyses the final step in the biosynthesis of 4-hydroxybenzoate from 4-chlorobenzoate in the soil dwelling microbe Pseudomonas CBS-3. This family includes various cytosolic long-chain acyl-CoA thioester hydrolases. Long-chain acyl-CoA hydrolases hydrolyse palmitoyl-CoA to CoA and palmitate, they also catalyse the hydrolysis of other long chain fatty acyl-CoA thioesters. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.14 0.72 -3.85 140 18406 2012-10-02 20:54:35 2003-04-07 12:59:11 17 57 4420 294 5240 15763 6423 79.30 17 47.56 CHANGED hGhlaGGsh....hshh-pusshhhtphstt..............hsssshphslsahcs.sphup.hlpspuplhchG+sshhhphclhspssthhs .................................................GhlaGGhh......h.s.h.h.-.....p.ss.u....h...s...s.t..phstt.................................................tsss.shph........s.lsa..hc...s..s..p..h..........u........p...h.....l.....ps......pu.....p.....l..h......c..h.....G..p..p.sh..hhphclhsppt....h.......................................... 1 1455 3083 4266 +8 PF02872 5_nucleotid_C 5_nucleotidaseC; 5'-nucleotidase, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_1318 (release 3.0) Domain \N 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.11 0.71 -4.25 119 7487 2009-01-15 18:05:59 2003-04-07 12:59:11 13 118 3115 27 1573 5671 648 166.60 21 29.23 CHANGED slu.......psth.......................t.sh....tp-ss....luslls-uh................tttsss...............c.............lulhss.G.sl........Rs..s........................tG.s...lTht-lh.slhPFs.Np........lhshplsGp.plpphLE...........................tsssthh............phu.........G.lch.ph.Dhsps.................upRlssl..................sup....sl..-sspp..Yplssss.......a.hs....sGucs..a..h......htp ...............................................................................................th........htpss.....hspllssA......................tth.s.ps........................s...................luhh...ss..G..sl..........................Rs..sh..............................tG..s........lThp...slh..sl....hP....as...Ns..............l.ssh.c.loGp.p.l.+..ch.LE..........................................psthh...................................................plh.......G..lpa...p.h....chs.ps.............................................up.R..ltsl.................................................pGc.................sl......-....s..s...pp....Yplusss.............a..hu...........s.G.Gst...a......h................................................... 0 507 918 1282 +9 PF00003 7tm_3 7 transmembrane sweet-taste receptor of 3 GCPR Sonnhammer ELL anon Prosite Family This is a domain of seven transmembrane regions that forms the C-terminus of some subclass 3 G-coupled-protein receptors. It is often associated with a downstream cysteine-rich linker domain, NCD3G Pfam:PF07562, which is the human sweet-taste receptor, and the N-terminal domain, ANF_receptor Pfam:PF01094. The seven TM regions assemble in such a way as to produce a docking pocket into which such molecules as cyclamate and lactisole have been found to bind and consequently confer the taste of sweetness [1]. 25.80 25.80 25.80 26.30 25.70 25.70 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.91 0.70 -5.02 109 4297 2009-09-14 23:22:24 2003-04-07 12:59:11 17 59 213 0 2615 3557 7 227.40 29 32.05 CHANGED hGhhh.slhlh...........slFhpappTPlV+usst.pLsallLhulhlsahsshhal.ucPs................shs.........Chl+phhhulsFslshSslLsKohp.lhh..........hFc..tst..stt..hhh......................sttphhhlhhhshl.Q..lllsslWls..hsPP..................................ht.......sht..tptp.llltC.........p..s.sshshh.............hhLuYhshLhllshhhAa........................hsRclP...csFNEAKaIsFoMhlhshlWlsFlP.hahss......pu.p.....................................apssl.shullhSuhuhLhslFhPKsalI ..............................................................................................hhhh..shhlh...........hlFl.....p.a..p..s.T...Pl.V+...Asst...pLsallLhulhls.ahs.........s.hh...a.l....u.c.P.s.......................................hs........................C......h.lRph...h.hGlsFslsh..S.....slLsKThp..lhh.................................................h.Fc.....tsp..s.tp....hhp.............................................stt.hhl...lh.hhshl.Q...............ll....lsshWls.......hsPP.......h.............................................................pt............shp....sp.tp....ll...l.....p...C..................p...p....s.oshuh..h....................................ssL.G..Y.h.slLh.l.h.s.hhhAa........................hsRplP...-sF.NEAK..a..I.sFo..M...............hs..h..s....h.l..W....l.sF..l...P...h...ah.s.o...pu.p.....................................hh.s..s..s.h...hu..ll.hSu..h.sh.Lss...lFhPKsall......................................................................... 0 448 660 1746 +11 PF01661 Macro DUF27;A1pp; Macro domain Bateman A, Mistry J, Wood V anon Pfam-B_434 (release 4.1) Domain This domain is an ADP-ribose binding module. It is found in a number of otherwise unrelated proteins. It is found at the C-terminus of the macro-H2A histone protein Swiss:Q02874. This domain is found in the non-structural proteins of several types of ssRNA viruses such as NSP3 from alphaviruses Swiss:P03317. This domain is also found on its own in a family of proteins from bacteria Swiss:P75918, archaebacteria Swiss:O59182 and eukaryotes Swiss:Q17432. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.40 0.71 -4.19 124 5297 2012-10-02 00:07:53 2003-04-07 12:59:11 16 119 2806 93 1542 4535 236 112.30 29 10.96 CHANGED VNAANssLhs.....GGG.VsuAI++tuG.spltpts.pph......tt......stsGpAhlTsuhpLs......s+aVIHs..VG..Phap.....spppptchLtssYcssLpl........s..pcpslc.....................olAhPsISoGlaGaPh-cusplh ..........................................................VNAANsp.h.h.t.........G.uG..V.s..sAI.........pct..u..G..sp.l..t..p.t.s....pp..h............s...........................h.s.Gp...A..h.l..T...p...u..h..s.Ls...............s+hlI..Hs..VG...Ph.hp...................spppptphL...ts..sY...cssLph..............s..pp.p.s.hp...................................................................olAh.....P.s.I.SoGl..a.uhPhppusph..................................................................................................................... 0 543 862 1216 +12 PF02177 APP_N A4_EXTRA; Amyloid A4 N-terminal heparin-binding SMART anon Alignment kindly provided by SMART Domain This N-terminal domain of APP, amyloid precursor protein, is the heparin-binding domain of the protein. this region is also responsible for stimulation of neurite outgrowth. The structure reveals both a highly charged basic surface that may interact with glycosaminoglycans in the brain and an abutting hydrophobic surface that is proposed to play an important functional role such as in dimerisation or ligand-binding. Structural similarities with cysteine-rich growth factors, taken together with its known growth-promoting properties, suggest the APP N-terminal domain could function as a growth factor in vivo [1]. 22.10 21.60 22.10 31.90 19.70 21.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.61 0.72 -4.10 15 356 2009-01-15 18:05:59 2003-04-07 12:59:11 11 16 88 12 114 317 3 98.20 60 14.97 CHANGED a-P.........pVAhhCG+hshahs...hpsGpWhsDssst...tsChpscp.-ILcYC+KlYPchsITNlVEuu.p.VpIssWCchsp...spC+s....sHhVpPYRCLsGcFhS-ALLVP- ............A.EPQlAMFC.G+LN....MHhN...........lQsGKWEsDPoGT...KoCltoKE.tlLQ.YCQE.hY..PELQITNVVEAN....QP..VoIpNWC++u+....KpCKs.....HhVlPa+CLVGEFVSDALLVP-............................... 0 20 28 61 +13 PF00962 A_deaminase Adenosine/AMP deaminase Bateman A anon Sarah Teichmann Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.85 0.70 -5.52 15 4245 2012-10-03 00:45:34 2003-04-07 12:59:11 17 22 2418 67 1678 3809 1081 319.30 23 76.53 CHANGED sl.Ks-hHlHtsushs.cpLLchh+pphht..stssh.p.tphlphtps.csLphhhhshslsshsl+.t........................................................................................h.hhpssRhhsh.ptcchhtstsphlEshapPhhps..........shssp.hsthclhpthlsGhDps-cEst..hss+hhhsch.p.....sppaspph...scshshYh.h..hhsshslhs................phppptthsslhhpsHuGEsushpplhsAhhh....htuccIuHGltltccPhL.thht.p...........QIslphsPlSNssLthhtshccpPlhpahccGlsVSLuTDDPhhFphT...LhcEYslAspsa...thop..s-hsclA+NSlhtSuhscctKpchLuc ........................................................................................................................................................................................h.t.s-lHhH....lsGslp....phl...hc.l..sp....p..t..t...........h.....t..............................................h.........p..........h.......h....h.....p.........h....h.....t....t...........t.......s....L..............p...a........h....p..h..h..s..h.s.h.tslt..............................................................................................................................................p.ps.hp.t.hs.h.t....h.h.p.c.h....t.....p...s...s.ltahElh...a.s.P.....hhs....................................pts.........h....s...h...t......t......s...l..p...s....l.......h.p..u..h...c......p....u.......p...c...p.hs..............lps.p.h.....l.....h.s..h...Rp..............stph...s...p....ph................hp...hh....h...t...a....t....p............hlsul.sl..s..u.s.E...s....h.sst................ahp.h..h..p..p...s....p.p.t.......s..h..t..hssHA.......G..E........s........s........s........s........p........p...l..h..p...Alth.........................hts.c..R..I.uH....Gl.....p.....h......h.....c..........c.......................t..........L.....h......p..h...l...h..p..p........................................pIs..l...Eh....CPh.....S..Nh.p............h...........t..h...............h..............t....s.......h.............t....p........H........P....l.....t.......p.....h...........h.........p.........t.G.........l.....................s........o...lsoD.DPsh........sss.....................l.cEat.hs.t.....pth....shs.........pp...ht.p.h................s.h.Nul..phua.hst..p.p.Kpthh......................................................................................................... 0 516 944 1371 +14 PF01490 Aa_trans Transmembrane amino acid transporter protein Bashton M, Bateman A anon Pfam-B_419 (release 4.0) Family This transmembrane region is found in many amino acid transporters including UNC-47 and MTR. UNC-47 encodes a vesicular amino butyric acid (GABA) transporter, (VGAT). UNC-47 is predicted to have 10 transmembrane domains Swiss:P34579 [1]. MTR is a N system amino acid transporter system protein involved in methyltryptophan resistance Swiss:P38680. Other members of this family include proline transporters and amino acid permeases. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.16 0.70 -5.99 24 6390 2012-10-03 01:44:59 2003-04-07 12:59:11 13 58 501 0 4195 7221 166 337.70 15 78.72 CHANGED pssoshpushpllsuhlGsGlLuLPaA.hpphGhlsGlhhllhhuhlohaohhlLspshph.........tppttoYt-ls...........tphhGsth.hhlhshs.hlp.lGhsluYhlhsupsl.slhpshh....tt.......h.h..ppshahhlhshlhhsLo.hlsshstLuhhSlluslhhhh.....................hshlhhhhhshshhsshshshhsh........thphhphhhulGslsFAasspshlhsIpssh+s.Ppp........cshhhuhhhsslhahhhGhhGYhsFGsssts..sllhshs.p......hhshsplhlsltllhuhsl.hhPlhphl-phlh...................................................................................s.hps.hhthh...chhhRshlVlhohhlA.lhhPhhstllullGuhushsloalhPshhahphhtsphhshphhhh...hthhhhhhulhhhhhushshlh ........................................................................................................................................................................t...t...shh.hhp..........sh.....l..Gs.G.l...L.u..h...P.....h...u....h.t..................p.........h......G.....h.....h.................u..h.....h....h..h..h.........h...h...u..h.hs..h....a.s....h.h.l.l....h.....p.s.h....................................t....h..sa...t..phs.......................................................thh.h..G.....hh....th....hh.t....h..h.h........h...h........h.......s.h..............sh.s....ah....lh....h.u.p..h....t...h..h..t.....t..........................................................h...p...h....h....h..h.....h...h...h..h..h.h...h...h...s....h.......s.....h......h.....t.....p...h...p..........l....t..h....h.u..h....h.uhh.hhh.h...........................................................hhh...h..h..h...h..h....................h......................t.h...........................................t.............t....h......h.....s....h....s...h....h...h.....a........u......a.............................s...p.......................h...h...l.................s....p....h...............p...............p....s......pp..........................ts.......h....h......h..s.....hh.......h...s..h.h.h.....a.....h..h...........h.........u.h...h...u.............Y............h....t.....a.........G......s..........t......s....t...s...............s.....l.hh.s..h.s..............................h.h.h...s.......h....h...h....h...h....t.h...h.h...s.....h....s........l.......h....h.........s..h...h..t...h...l...t.hhh.......................................................................................................................................................................................................................................t.........h.......h.h...........hh.h.h..p..h...h..h..h....h..h...s............hh...lu....h....h...h..P.........h..s...........l....h...........u...h..h..............G.uh...h..ss................hsh...h.......hP..s.h..h...hh.......h...........h..h......................t..........................................................h.............................................................h..h.h..h..h.h..s.hh....h........h.............................................................................................................................................................................................. 2 1380 2401 3501 +15 PF00004 AAA ATPase family associated with various cellular activities (AAA) Sonnhammer ELL anon Prosite Family AAA family proteins often perform chaperone-like functions that assist in the assembly, operation, or disassembly of protein complexes [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.75 0.71 -3.95 207 52090 2012-10-05 12:31:07 2003-04-07 12:59:11 24 426 6164 264 20937 92205 29927 130.80 25 20.90 CHANGED lLLhGPPGsGKThlA+ulAsp...............h.............s....hs.................a.hplsusplh.t................ahGtutppl+plapp.A....cpts........s.sllFlDElDulssp+................t..tppshspLLsthDshpst..........................s.lhlluATNc..s-tl-sALh.h.uRh-phlhhsh ....................................................................................................................................hlLhGP.P..G....s.........G....K.........T....h........l....A....c..........u...l...A...pp...........................h..............................s................tp.......................................................................h...h......p...l....s...s...u....s....h.....h.....pt..............................................................................................................................................................a..h..G......p....h....t....p......c......l......+......p......l.....h..cp....s..........cpps...............................sll...F...l.....D...E....l....c...s...ls.sst.......................................tt.t....t.t...p...s....h.....s....t...L....l......s..t......h...-...s.h.p........................................................................................h...h...h..l.....u...A.....T....st.............p..........l.....s........s.lh.........tR.hph......h................................................................................................................................................................... 0 6957 12667 17600 +16 PF00696 AA_kinase aakinase; Amino acid kinase family Bateman A, Birney E, Griffiths-Jones SR anon Pfam-B_100 (release 2.1) Family This family includes kinases that phosphorylate a variety of amino acid substrates, as well as uridylate kinase and carbamate kinase. This family includes: Aspartokinase EC:2.7.2.4, Swiss:P00561. Acetylglutamate kinase EC:2.7.2.8, Swiss:Q07905. Glutamate 5-kinase EC:2.7.2.11, Swiss:P07005. Uridylate kinase EC:2.7.4.-, Swiss:P29464. Carbamate kinase EC:2.7.2.2, Swiss:O96432. 24.90 24.90 25.00 24.90 24.70 24.80 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.76 0.70 -4.83 135 25691 2009-01-15 18:05:59 2003-04-07 12:59:11 23 74 5328 286 5789 16900 9693 240.60 20 64.39 CHANGED phh..V.lKlGGssls.st...........t.lp.plspplt......t.......................hhpt........shcl.llVsu...uG.....s....hssthhpt.............t..........................t.h..ptt.................tltp....................httlhst.hpphss.thhshhhps......tsh.shth....................................tptlpphlp..p..s.hlsllsuhh.....ss....ps....th..................sthssDtsAshlAttlpA...c..lhhh..oDVsGlastpP.....su.phlsplshp-htphh......................psGhtshh.tuhpss.pp..s..shplhlhs .....................................................................................................hlVlKhGGsuls.st......................................p.lp.p.ls...p.p.lt.......p...........................................ht.pt.....................................Gh..cl..llV....pu.....uG........s.....hssthhtt..........................................................................................................t..h.h.h.pt.t.....t..........................................................thtp..................................................hh.s..hlsth.....h......p.t...h...s...s.......hs....h.sh..h.hpp................ts.h...s.t.t.h...hth..........................................t.....hs.tp.tlp.p.h.Lc......p.......s.tls..l.h.suh........ss...............ps...th....................................shhssDpsAu.hlA...t.....t..l....p...A.............D...hlllh........T-V..-.G..l....as.s.s...Pth............s....s....u...p........h...l...s....p..l..s..h..p..-..hhchh..........................................................tG.ht.sh.hts....Ahphs.pp....t.s..h.shhlh.............................................................................. 0 1858 3683 4867 +17 PF03109 ABC1 ABC1 family Bateman A anon Pfam-B_339 (release 6.5) Family This family includes ABC1 from yeast [1] and AarF from E. coli [2]. These proteins have a nuclear or mitochondrial subcellular location in eukaryotes. The exact molecular functions of these proteins is not clear, however yeast ABC1 suppresses a cytochrome b mRNA translation defect and is essential for the electron transfer in the bc 1 complex [1] and E. coli AarF is required for ubiquinone production [2]. It has been suggested that members of the ABC1 family are novel chaperonins [1]. These proteins are unrelated to the ABC transporter proteins. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.40 0.71 -4.13 32 5521 2012-10-02 22:05:25 2003-04-07 12:59:11 11 49 2872 0 2412 4867 2734 116.90 31 21.56 CHANGED ppLG.pshcchFppF-ppPlAuASluQVH+ApLps........GppVAVKVQ+PslppphptDlthhphlsphhpth..t........clttllc-hpcpLhtElDahpEApssc+htcthpch.....shlplPpla .................................................t.thG.tsh.p.p.hF..s..pF-...t...p.P..lAuASIu......Q..............V..HpAp.L+s.....................................Gc..c....V.sVKVp..+Psltp.................hlpt.Dlpl....l......p...hl.u.......p.h....h....p...t....h.h.st...................hhc.hp...pll.......cEhp..........c.......plh.........pE...........lDhhpEAtNs.pphp...c..p..hp.sp............s.hl..hlPcl........................................................ 0 828 1543 2052 +18 PF01842 ACT ACT domain Bateman A anon Bateman A Domain This family of domains generally have a regulatory role. ACT domains are linked to a wide range of metabolic enzymes that are regulated by amino acid concentration. Pairs of ACT domains bind specifically to a particular amino acid leading to regulation of the linked enzyme. The ACT domain is found in: D-3-phosphoglycerate dehydrogenase EC:1.1.1.95 Swiss:P08328, which is inhibited by serine [1]. Aspartokinase EC:2.7.2.4 Swiss:P53553, which is regulated by lysine. Acetolactate synthase small regulatory subunit Swiss:P00894, which is inhibited by valine. Phenylalanine-4-hydroxylase EC:1.14.16.1 Swiss:P00439, which is regulated by phenylalanine. Prephenate dehydrogenase EC:4.2.1.51 Swiss:P21203. formyltetrahydrofolate deformylase EC:3.5.1.10, Swiss:P37051, which is activated by methionine and inhibited by glycine. GTP pyrophosphokinase EC:2.7.6.5 Swiss:P11585 20.80 20.20 20.80 20.20 20.70 20.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.83 0.72 -4.43 180 20885 2012-10-02 00:29:19 2003-04-07 12:59:11 20 130 4518 161 5944 23355 10616 64.10 16 16.98 CHANGED thlt...l.tst.-c.sGlluclhshlucpslNlpplpttsstt...........thhhhhhhssppshpphhcplcphhs ......................h..l.h..h...Dc.PGllucls.shlu.c..p..slsls.ph.p...psstp......................hthhhhhh...hspt.t.tthht.h.....t............................................. 0 1838 3898 5060 +19 PF04083 Abhydro_lipase abhydro_lipase; Partial alpha/beta-hydrolase lipase region Wood V, Finn R anon Pfam-B_267 (release 7.3); Family This family corresponds to a N-terminal part of an alpha/beta hydrolase domain. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.17 0.72 -4.61 82 1362 2012-10-03 11:45:05 2003-04-07 12:59:11 11 14 288 4 980 1460 9 63.00 36 14.19 CHANGED sscllppaGYssEpHpVpTcDGYlLslaRIstsp...........................................t.tspPs...VhL.HGLlsSSssWlhss ........................phlptaGYssEpHp.VpTcDGYlLslaRIPpsp.......................................................................................................ttt.ttt+..sVhLp..HGLlssSssWlhp.h........................................................................... 0 284 440 775 +20 PF00583 Acetyltransf_1 Acetyltransf; Acetyltransferase (GNAT) family Bateman A anon MRC-LMB Genome group Family This family contains proteins with N-acetyltransferase functions such as Elp3-related proteins. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.44 0.72 -3.95 241 50804 2012-10-02 22:59:21 2003-04-07 12:59:11 19 363 5227 314 15580 72453 11235 83.90 18 39.27 CHANGED hptsspllGhsththhtp................................................ttstltslhVp......ssa+spG.lGptLlpthhchstp..hs.......hpplthts.tts.Ntt...uhphYp.+hGFp ......................................................................................................h...tspllG...h....h..h..h...hh..p.............................................................................ttht.l...t....p..l..h..V.p...................s...p...h...+........s..p..........G....l......G....p......t....L....l........p.....t......h....h.....p..h....upp.....ts..............................h.p..p..l....h....L..t.s.....tt.......p....st............uh..t.hY.p..chGa................................................... 1 4938 9501 12964 +21 PF01648 ACPS 4'-phosphopantetheinyl transferase superfamily Bateman A anon Pfam-B_1679 (release 4.1) & Pfam-B_3672 (Release 7.5) Family Members of this family transfers the 4'-phosphopantetheine (4'-PP) moiety from coenzyme A (CoA) to the invariant serine of Pfam:PF00550. This post-translational modification renders holo-ACP capable of acyl group activation via thioesterification of the cysteamine thiol of 4'-PP [1]. This superfamily consists of two subtypes: The ACPS type such as Swiss:P24224 and the Sfp type such as Swiss:P39135. The structure of the Sfp type is known [3], which shows the active site accommodates a magnesium ion. The most highly conserved regions of the alignment are involved in binding the magnesium ion. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.84 0.71 -4.27 154 7983 2009-01-15 18:05:59 2003-04-07 12:59:11 15 48 4744 99 1916 5342 1894 110.90 21 48.33 CHANGED slGlDlEph.....pt................................thtp..lhpph.h...sspEhphltphss....................................thhhthWohKEAhhKuhs....t..................h..hsthphphtss.................................................h..htttttphpht...hphttptthssshs .....................................................................lGlDltcl......pph..............................ppspp...h..h...c+l...h...os.s.E.h.phhp.shpt...........................................................................h.p.hh.s.thausK.EAhhKAhG......pG.......................................th..hshpp.l.plhps...............................................t.................................................................................................................................................................................. 0 622 1194 1612 +22 PF01064 Activin_recp Activin types I and II receptor domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_338 (release 3.0) Domain This Pfam entry consists of both TGF-beta receptor types. This is an alignment of the hydrophilic cysteine-rich ligand-binding domains, Both receptor types, (type I and II) posses a 9 amino acid cysteine box, with the the consensus CCX{4-5}CN. The type I receptors also possess 7 extracellular residues preceding the cysteine box. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.21 0.72 -11.34 0.72 -3.34 26 942 2012-10-03 01:43:02 2003-04-07 12:59:11 18 17 116 67 464 892 0 83.70 25 16.38 CHANGED lpCh.Css..........Cs..pss.....ppCps...................suhCasthphspst...hphhpp....GChshppts......hhCps..................sss...spshtCCp.sDaCNcshp..h ........................................................Ch.Cp.......C....pss.........tpCps...................sstCasshpp...sss........hphhpp..........GChstt.tps.................hpCts.............................psps.........spsh.h..C.Cs...s-hCNpph....h................................................. 0 82 118 270 +23 PF00441 Acyl-CoA_dh_1 Acyl-CoA_dh; Acyl-CoA dehydrogenase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain C-terminal domain of Acyl-CoA dehydrogenase is an all-alpha, four helical up-and-down bundle. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.47 0.71 -4.11 58 26994 2012-10-01 23:33:27 2003-04-07 12:59:11 19 118 3319 208 9422 25135 11877 150.80 24 33.41 CHANGED spGathshps.lshpRhslustshGhsppsl-pshpascpR..psFG.p.lhshpslpppLAchtsclcus.+hhshpsup...thcpspss........tttsuh.sKhhuschstclsppAhplhGuhGh.......h.pchs..lp+haR-s+shpIht.GosEl.tttllu+phh ..........................................................................................................................spGhthhhp...t..LshsR..l...s........l....u....s....t.....u....l...G..........h....u........p..t......u.....h..c..t.....s.....h..p.....Y...u...p..p....R....................p....p...........F............G......p.............s...............l......s........p......h......p...s..........l..............p......p...p........L.......A.......c...h...t.....s...p..l..p....s.......u.....+.......h.......h.......s......h....p.s.A.t..................th...-...p...upss................................shtsu.h....s........K........h.....h...s....s.....c...t..............u...h........p....s....s..s...p...A.l.Q......l.....h...........G....G.h..Ga..............................h..p-hs..........lt+..h...h.....R....-....s.......+....l....h...p...lhp....G.ospl..p.t...hluc...h................................................................ 0 2614 5667 7871 +24 PF01757 Acyl_transf_3 DUF33; Acyltransferase family Bashton M, Bateman A anon Pfam-B_708 (release 4.2) Family This family includes a range of acyltransferase enzymes. This domain is found in many as yet uncharacterised C. elegans proteins and it is approximately 300 amino acids long. 31.00 31.00 31.00 31.00 30.90 30.90 hmmbuild -o /dev/null --hand HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.29 0.70 -5.61 111 12610 2012-10-02 17:00:17 2003-04-07 12:59:11 17 41 3761 0 3737 10989 1698 328.50 15 74.75 CHANGED hhtlD.....hlRsluhlhllh.hHshh...hhhh.shhh......................................hhsthslslFhhlS..Gahh...................................hpttshhphhhcRhh+lh....lshlhhshlhhhhthhhhththhthhhhhhhh.............................................................................................................................................hhshhW........alhsh......hhhhllhshh...............hhhhpphtthhhhhhhhhhhhhhhhhhhhhh...................hth..hhh.............................hhhahhGhhhsphttphptthh..............hhhhhhhhshhhhhhhhhhhhhht................................h..hhthhhhhhshhhhhhhshhhtpht.....thh..............hlthlu.phShslYlhH...........hhlhhhhtthhhhh.................................hhhhhhhhhhslhlsh....lhhh ...................................................................h...l-slRuluhlhVlh...hH....hhh...............hh.h.......................................................................................................huh..h..u...V.s..l..FFhl.S..Galhh.................................................................t.tp..h.....s.h..t..p....a...h..h.......+.....Rh.hR.lh................hsh.l.h...h...h.........h..l.....h.........h.........h........h......h.........h......h........h.....h......t......h.....h....h...h....t.....h..t...h.....h.hh.h..............................................................................................................................................................................................................t....h..h..h..p.....h...W.................hLss...........hhha..ll...h..sll............................................hhh.h.....t......p........h....t.......t.........h.........h......h......h.........h......h.......h.........h...h....h......h.....h....h.....s....h...h...h...h...h...h..h..h................................................h....................h..h..........................................................hhha..hhG....h........h......h....s....h....h....h...h...t....h...t...t..t...h........................................................................h.h.h..h.h....h..h..h.....h......h....h.s..h..h..h..h.h.h..h.h.hhhh.....................................................................h..h.....h....h...h....h......h....h.....h.....h..h....s..h....h....h...h...h....h...h.h..h...t.h....p..ht.................hh........................................h.ht.hlu..p.h.S..a.u....l....Y....L..h..H................................h..l...h...h..h..h.t..t..h..h.hth...................................................................hhh.hh.hh.h.h.h.s.hhh.sh....hh.................................................................................................................................................. 0 1161 2171 3133 +25 PF00928 Adap_comp_sub Adaptor complexes medium subunit family Finn RD, Bateman A, Coggill P anon Pfam-B_1007 (release 3.0) Family This family also contains members which are coatomer subunits. 20.80 20.80 20.90 20.90 19.80 20.70 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.86 0.70 -5.29 46 2208 2012-10-02 01:13:52 2003-04-07 12:59:11 16 30 497 31 1352 2129 25 258.90 25 54.82 CHANGED hsh+sss.hp....appNElalDllEc....l.ss.lhsp.sGp...llpu-.lpGplphpshLsG.hPplplulsc.h.............................................sh.l--hpFHtCVphsp.a.cp....................p+hlsFlPPDG..cFpL......hpY...+l.......................................................spp....hthPhpl.hsphp........................................................p.tst.schchhlphctp......htp...sspslhlplPl....Pps......................................ssssshps.stG....pspapsppp...hLhWsI....tclss......p...hp......................................................u.plphssthp...................phss............................lslpFplshhssSul...........................pVchlcl..h..t......pshpsh+hV+YhTpus.paplRh ................................................................................................................................hp.t.s.hp....apps.ElalDllEp....................l.s.........h.l..............hst.....s.....G..p....................llpuc....ltGpl....phps.h..Lo...G...hP..-...lp..lulsst................................................................................................................ttshtl-D..spF..H...CV..chsc..F...-s....................................................c+hl......uF......l.......P.....P.....D......G.....pFp......L...........hp........Y....Rh.........................................................................................................................................................................ssp.........h.l.h.pl..ps.hp.......................................................p....up..s+h-h.hl...p.hctp..........................htp.....hspsV.lpl...P.h....Pps............................................................................ssssp..hps.....s.G................php.aps.ppp....slhWpI.....tphss..........tp.....shp.......................................................................................................u.ph..ph..s.s..t...st.t..........................th.s.........................................................................................................lslpFp.....l....s......h..s...sS.Gl................pV..chlpl....p................tthpsh.h.V+a.spst...h....................................................................................................... 0 456 720 1076 +26 PF00107 ADH_zinc_N adh_zinc; Zinc-binding dehydrogenase Sonnhammer ELL anon Prosite Family \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.35 0.71 -4.60 108 42002 2012-10-10 17:06:42 2003-04-07 12:59:11 21 474 5352 463 12979 36140 9081 125.70 20 28.57 CHANGED ulGhhulphA+shG.......spllssstspp+hchAcphG...Astslsspptt......................hhppltchssu.........GsDsll-ssG.ssshppulphlphsGplshlGhss............tshshshhtlhhpphslhGshhss........pphtphlphlhp .......................................................................ulGhhul.p.h....A.+..t.hG...........scV.l....u....s........s....t...........s.........s......p.........+........h.........c.......h.........A.....c......p...........h.....G..........A......s........p......s....l....s......ppts.................................................hhptl..t..c..h.sss.......................G.s..D...h..s..l...-.....s.......s.....G..........................s............t...........s............h.....p............t.......s.....l.....p...........h.....l........p................s........G.........p......l......s.h.l.G.hss........................t..h...h...s.....h..................h....h........h..p...t..h.p..l.h.....G..h..hht............t...........t................................................................................... 0 3503 7483 10666 +27 PF02682 AHS1 DUF213; Allophanate hydrolase subunit 1 Mian N, Bateman A anon COG2049 Family This family is the first subunit of allophanate hydrolase. 20.40 20.40 20.40 21.10 19.70 19.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.08 0.71 -4.85 158 3092 2012-10-02 15:38:38 2003-04-07 12:59:11 11 20 2321 14 753 2183 1289 203.30 32 52.43 CHANGED h..p.lpshG......-pulllphs.pt.hs.......shtppl....hsltptlppt.......hsulh.-llPuhsolhlpa-.shth............sht..plt....stl....pph.hpp...ttspttsst..............p...........lclPVtYssphG......................DLtpVAchsGLo.hc-VlphHosspYpVhhlG.FtPGFsYLuulstp...L.t.sPRpssPRtplPuGSVuIAGpQoulYPhsSPGGWplIG+Tsh..tlF.Ds.pp.psP...sll ...............................................................chh.hG-pulllc....ht........lsh..........shp.tcl....hsltptLppt................hssll...-l.lPuhs....slhlhac..stt.l..............................shp...slh..ptL..............pph..hpp.............tpshpsss.............................................+h...............lcIPVhYs..s.th...G..........................DLp.VAchsG.Lo...hcpVlchHoss...pY....hVhhlG.FhPGFP....YLsulspp..........L..t.sPRpspPRhtlPuGSVGIuGspoGlYP.hs.oPGGWQlIG+T.s..l...plFcs.pc.p..h....................... 0 200 423 624 +28 PF03915 AIP3 Actin interacting protein 3 Wood V, Griffiths-Jones SR anon Pfam-B_38461 (release 7.2) Family \N 24.50 24.50 24.80 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.37 0.70 -5.57 18 245 2009-09-10 22:05:34 2003-04-07 12:59:11 8 6 183 3 166 277 0 303.00 27 37.23 CHANGED FL+lssKsKKshlsh...slo.hspLRLLFlEKFAYSPGss..sFP-IYIpDPpouVpYELE-..Lp.DlK-GolLsLphcp..................c.ssshssLpc.hlcsl+pplscppssl..................hpclpsh.tsssh.hsp.ssuss.....sttspppssstt.ttps..................hpElpslc+-Lull+QhasstpsslpsolsslhpKlssh+s.sulsss.....ssSsRuYh-pupscLu--SDpLls+VDDLQDllEsLRKDVApRGVRPsc+QL-slsK-lppApc-LpcMppaIspEKPhWKKIWEsEL-+VCEEQQFLsLQEDLshDLp-DLcKApETFsLVEQsocEQtKssups................+s+sl..lslscPG.ohpph+DtVLsEVpuLpPsHESRlEAIERAEKLRp+E+chp+sscFpcELusFVEcuKLKKSGGlEElERhR+tKD- .....................................................................................FLQhtscsK+hhlsp........tlo.hsplphh....FlcpF....shp..t.p....chP..p..IYIpDs..ps.lhaELED......lp.-l+D+olLhL..c................................................................................................................................................................................................................................................tphptlp.-lt.h+p......h.pt..tthtt.htshh.p.h.t.t..htt....s..t..............ttt.hhpttpp.htt.s.tl.tc...lp-LpshlEth+tD....VhpRtsps..ptlcth...ltthptpltphp.hh.t.p..hpphhptEhp.l..sptpp.hp....c.h....th.t.htth.thh..ht...h.................................................................................................................................................................................................. 0 57 95 141 +29 PF00842 Ala_racemase_C Ala_racemase; Alanine racemase, C-terminal domain Bateman A, Finn RD anon Pfam-B_1496 (release 2.1) Domain \N 25.00 25.00 26.50 25.50 24.90 24.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.61 0.71 -4.73 121 5638 2009-01-15 18:05:59 2003-04-07 12:59:11 16 16 4223 90 1060 3928 1372 125.90 36 32.58 CHANGED lhs.Lcucllpl+pl.psGps..lGYGtsapspcss.plAslslGYADG..hs.Rthus.ps................l....hlsGpps..Pll....GRlsMDhhhlDlo...........ph...ssp.G........-pVhlhG.tp......hss--lActssTIsYEllsplut.Rl.Rhah ......................shoLpoplltV+pl.p.s..G.-..s..VGYGs.sap..u..pc.cp..pluslslGYAD..G..as.Rph..ss..ts...........................V.......ll.......s....G....p....cs....sll....GRVsMD.hhVDls.......................p.sphG......sp.VhLaG..pp........................lsl--lAph.s.s.TI.sYEl.l.stl.sp..RlsRha.h.......................... 0 358 686 893 +30 PF01168 Ala_racemase_N UPF0001; Alanine racemase, N-terminal domain Finn RD anon Manual Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.87 0.70 -4.79 613 12691 2012-10-03 05:58:16 2003-04-07 12:59:11 15 33 4649 104 2910 10839 5318 218.10 20 66.44 CHANGED lsLsslppNlpthppts....s..........ss.lhAVlKusuYutuhhpsut.h..t.u.....s..c.....shulsplpEAl........................tLc....ps..............tl.hh..........ppl..hhp.......slhhslsshcth.p.tlsptttt................tpslplhlpl-o.Ghp........RhG.........hs..s........pth.........phhp...tlp...........s.....lplpGlhoHhus.u............-....p.t........tpp.p.hpph....pphhptl........t.....t...............hhshusSushhht.........h......p...hsh.VRsGhslaGh...ps .................................................................................................................shttlppNlpthppts...ts............ss.lhA..VsK.s.......hsts.h...h.p.hs...h.....................t......t..u.....h.....p.....................t.h..u.....s.p..l.p..Euh........................................................................pLp..psh.................lh....h....h.....................pp....l...........h.hp..........t...sh...h.....h....s...l.s...s.h.chh...p..tlpptt................................t.ps.lp..l..hl..p.....l.....s............s...uhp.................RhG..................lp...s......cch................phhp.......plpp..................h..ss......lp.l.t.Gl..hohhuts........................................-c.p...............htp....p...h....pp.h......................pphh.ptl..............pt..t......................hlS.h.u.sS..ushhht..................p...hhsh...VRhGhslaG.t.............................................................................................. 0 960 1857 2470 +31 PF01315 Ald_Xan_dh_C Aldehyde oxidase and xanthine dehydrogenase, a/b hammerhead domain Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.35 0.72 -3.99 30 5239 2009-09-12 04:57:33 2003-04-07 12:59:11 17 77 1902 114 1766 4735 2068 108.20 30 12.95 CHANGED TGcAhYsDDlshssst.ahshltSshAHA+IhuIDsspAhshPGVlullTucDlsstspshh....s.hsp.......lhAc-cVphhGpslAhVlAcspttAccAAcLVpl-YEcLs ............................................oGcupYs...-D...h...s....h....s....s....h....h...a..sth.l.pSs.h.A.H.A+......I..t...........u.lDs.st.A..t.......s.hPGVl..s..llT..tp..D...lss...h........t....hssh...........h..tsp........................hh..s.......s...s.......c..V.c....ahGpslA.hVlAcotttAccAsphl..c...l-Y-.L............................................ 0 567 1035 1411 +32 PF02738 Ald_Xan_dh_C2 Molybdopterin-binding domain of aldehyde dehydrogenase Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 19.60 19.60 19.70 19.60 19.10 19.50 hmmbuild -o /dev/null HMM SEED 547 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.43 0.70 -6.19 30 9467 2009-01-15 18:05:59 2003-04-07 12:59:11 13 89 2059 116 3266 9151 5213 368.20 19 64.65 CHANGED Alc......ppshhsph............hphhppGD...sspuFspADt.......................llcuphphupQpahahETpuulAl.c.p-sclplasSTQsPphspphlApsLGlPtp+lclhspclGGGFG.pcscshshsshsALAAh+htRPV+hhhsRpEchhho.utRHshhhph+lGhsp-G+ltAlchchhssuGshsshu.slh.puhhts....ssYchsslchsuhsstTNhssss.AaR.GaGssQuhauhEshlcclAppLGl-s.ElRchNhh...cts.tsha.spphsshshhp...th-cshppspappRp.......ttlcp.....hshsp.pahc+Gluhsssphshshsss...tusutlplhsDGo...lhlphGus-hGQGhpTphtQlsAcsL.........ulshccIplppss.TspsPpusuTuuSpuoshsGtAltpAscplpc+Lp.htsphht.........shsacch........sssuhsssls.sApuhatss...........s.shphtt...s..spsh.hasaGssssEVElDshTGcscll+s.hlhDsGpslNPtlshGQIcGuhsQGhGhhhhEElhas.ssG.lhspuhhsYcIPshs-lP ......................................................................................................thh.........................................................................................hp.........h................hEs....hs...............t...............t......t..........t.....h.......hh...s.Q.........................t.....h.s...h.....h.....t.....h..................t..................t....l.....h............hG.GuFG.....t..........................................................s...h.....h.....h......s..h...h.....h.........t.....t.......s.....lp.................h...sR..p.h..t....t....................p.h..p......h...u......h..p.......t.......p.......G............l.............u....ht.....h......t........................h.....................t......................h....................................t.......................................................................................................Y......h......t..h...h..t...................h...s............................s..h.....h.R.....u.....u.s.....s.hh.h.E.hhp.h.A..tht......h..ssht.hR.hph......................................................................................................................................................................................................................................................................................................................................................................................................................t.........s.....l..........h...........s..Gp......h..l...sh..p..GpG.h...Thh.t...h..hup.h................................th.s....p..t..l.......h...................s......t..............t...................h.................s............................s.....h........u......S.t.............h.s...h...................s....s........h..t........h..h....h....t.........................................................h........................................................h.......................................................................................................................s.......s.....s......h..s.....l...t.l..s....................s..h...........p..l.p..h.................h.s.h..G.......lNs.h..tQ..............h.Guh.h...u.h.u..h.tt..........h.................................................t..........h..................................................................................................................................... 0 967 1893 2579 +33 PF00248 Aldo_ket_red aldo_ket_red; Aldo/keto reductase family Finn RD, Griffiths-Jones SR anon Prosite Domain This family includes a number of K+ ion channel beta chain regulatory domains - these are reported to have oxidoreductase activity [2]. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.57 0.70 -5.58 61 28932 2009-09-12 09:01:48 2003-04-07 12:59:11 16 110 4596 377 9885 23584 7566 269.00 25 87.49 CHANGED pluhGshthst.............thspppshph...lctAl.ctGhs.....hlDTAph.........Y...........utuEph.lGc...hLpp.........+cplhlsoKs..................................ht.shstp.....plppslcpSLccLph.-hlD.lhhlHtsssths..........................hp-sh......ptlcchhp....pGpl+tlGlS...shs.hpplpph........tpttht.....hsssQspashh...........tthppplhp..hspp..p.slsll.uauslut...............GhLptt..........................................................................thhphlpplucp.hsh.ohsplulpaslp..............s.....................tsssslsGs..sshpclcpslpsh...ph.pLsppphstlcplht .........................................................................................................luhGsh.th.........................ths.t.p.p.s.h.ph.......lctAl....-t...G.hp...........................hhD.T.A.ph.........................................Y...........................p.sE...p......h....lGc.....ulpp.................................h.R-c....l....h......l......sTKh..........................................................................................................h.....t..s....h..s.hc...................plh.p.u.l-.p.S.L...c.........+...L..........t.........h....D..........Y.....l..............D....L...a....h....l.H...h.....s..s...s..t..ss...............................................................................hp-..sh...................pAh.p.c.l...hc........................pG...+..l.+.t.........l.....G.....lS.....................Nas...sp.p...lpph.....................................t.ps.tht..................hs.s..s..Q.s.c.hshh................................h..p..p..p...l..ls.............hspp.......p..sl.s...hh..ua.....o......P.Lu.p..........................................................G...hl.t...t....................................................................................................................................................................h...pt.l.p..p.....l..A.....p.....c...h.....s......t...o...s...u........Q........l.....s.....L...t.Wh..l....................................t......................................................................................................t.s.s.ssIs.uu........sp.p..p..l...p..p....N.h.tsh........sh...pLotc-httlpth..t................................................................................................................................................................................................. 0 3142 5960 8241 +34 PF01263 Aldose_epim Aldose 1-epimerase Finn RD, Bateman A anon Prosite Family \N 21.10 21.10 21.20 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -12.04 0.70 -5.31 66 8201 2012-10-02 23:57:29 2003-04-07 12:59:11 15 27 3660 74 1995 6054 1148 286.20 18 90.64 CHANGED phlslpss.pthpsplhphGAplhShpsss.t...t-llhshsp.....ta.tt........ushlsthAsRl.ttGta.hpGtshplshNs................stpshHGtsps...phWpltphp.........stsphphh..sssc.ps...aPtplpsplsapLsspst...Lplphcupsst....cshshshs.HsYFNls.........hptpplplpu..pphhphss...hs....hshptssh.sh...ppsphlspch............thDpsahhss..........hphshhhhptspshtlchpsspsshhlassshht.................hhhpttuhshpsphhssssspsphs....hhLpsGEpaptphthp ..........................................................................................................t..hplpss...thps..p.l.h.s.h.GAplhu.hphss.....................pcll..h..s..hss..........t..h.tt......................hu..s.h.l.s.hus..Rl..tt........u...t.h...........h............t...G....p....sh.p.l...shsp......................ssp.s.h.HGh..scs........................p.Wp.lhpps............................ptlphs.h.p..................ssp..ts..................a...P...t......sh............plplpap.....L............ss.......s......s................lp.....lp.h..psp...s......s.............psh......shsh..s.Hs......Y.Fs.ls.....................hp.p.pp....lp....l...su.....t.thh..........h...sp...............h....s............t...........h...t.h.t...s....s.....s....h....-h........ppst...l.s.tp.............................................th..Dcs.ahh.t..............................................phthh.h.h..p...........p...p..t....h..t.l..p..h....t....s...s...t.......s..l.......l..a....ss...s.ht.....................................................................hhth..s...h.s.h..ts.p..h..h..s.s.p..s..t.......p.....s.....ths........hhLtss.p..hp................................................................................................................................................. 0 599 1167 1644 +35 PF03155 Alg6_Alg8 ALG6, ALG8 glycosyltransferase family Mifsud W anon Pfam-B_3941 (release 6.5) Family N-linked (asparagine-linked) glycosylation of proteins is mediated by a highly conserved pathway in eukaryotes, in which a lipid (dolichol phosphate)-linked oligosaccharide is assembled at the endoplasmic reticulum membrane prior to the transfer of the oligosaccharide moiety to the target asparagine residues. This oligosaccharide is composed of Glc(3)Man(9)GlcNAc(2). The addition of the three glucose residues is the final series of steps in the synthesis of the oligosaccharide precursor. Alg6 transfers the first glucose residue, and Alg8 transfers the second one [1]. In the human alg6 gene, a C->T transition, which causes Ala333 to be replaced with Val, has been identified as the cause of a congenital disorder of glycosylation, designated as type Ic OMIM:603147 [2]. 25.00 25.00 26.20 26.00 22.80 23.30 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.88 0.70 -5.65 9 764 2012-10-03 03:08:05 2003-04-07 12:59:11 10 16 267 0 527 725 13 353.20 28 86.10 CHANGED hsslhl+hh..LhPh.tp......sDFEspRpWhtIThsLPlupWYhpuosp....WsLDYPPhhAYapalLuhlupa.h.sschls.hpshGaES.sp..hlF.RhoVIhs-lLhhls.lhhaspsh...th.+..spppphhsuhhlLhpPGLllIDHhHFQYNulhhGlhlhuIsslhpcp.alhuAhhFslulsFKphhLYhAPsaFsaLLtp.Clp.sp....ht...........taspllpLussVlusFslhahPahhh....ppl.QVLpRLFPFsRGLhcshhA.NFWshaNsh.Kltphlsh.................................................plt..lohhhTllu.LPuhlhLahcPp....+hFhhuLshCuhu.FLFuapVHEKuILLslLPhslLhsttst..h.hhhhhsssuhaSLaPLLh+sshhh.phlhhhsa.hh.....hs........................p...thhh..hphlphhalluhhslhhh.phlp.hl...s.spKaP.LhllLsushuslshhhha...lah.h .....................................................h....hh+hh..l..s...t............sDaEspRpWhtlT.pLPlppW....Y..h.t..........WsLD.YPPhhA............aapahhu...huth....h...ssthht..................p..s.....h.shts..th...hhF.RhoVlhs-hlhhhs......lh.......h.h......h......................................t....................................t........h...............h.h.....llh.suhlllDahHFQ............YNs.h..hGhhlhul..hh...t.............tp..................h....h.....uu.hhFshhlshKph.LYhu.sh.hha.LLtt....sh.............t...........................................................hhphh.tluhhshhshhh.h.hPa.h............................tth.QlhpRlFPh.tRGLhc...shhA.NhWshhsh..h......+hh..hh.....................................................h...hshh..hT.....h.h...h....................shh.....l....h.h......p..........sp...........................th.h.hslh.su...hu....FhFuapVHEKulLls......hlPh...sll..h...t.................................................hh...hhh..huhh...ohhP..LLhps..t.h..h.hhhhhha.hhh......................................................................................................t.....h.h........h..h..h..h......h..h...h......th...h.....hh...............p...h..l..hh.s.hs..th...h........h............................................................................... 0 184 291 438 +36 PF00245 Alk_phosphatase alk_phosphatase; Alkaline phosphatase Finn RD anon Prosite Domain \N 19.80 19.80 19.80 19.90 19.70 19.70 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.42 0.70 -5.79 10 3561 2012-10-03 20:55:17 2003-04-07 12:59:11 15 33 1898 200 1070 2985 602 329.70 28 81.45 CHANGED AKNVIlhlGDGMGVuolTAARhLKsptcsphs.-h.LuhDchPhsGhu+TYssDpp......VsDSAAoATAahCGVKTspssIGVo............spGs-V..hSVLEtAKcAGKusGlVTTTclsHATPAuhhAHsssRshhussss.....ssthhptG.....ptDIucQLlssh...cIDVlLGGGR+aFh.pspss.......tttGhRhDGRNLlcEa......K+pGapYVhsRp...pLlpspssp.sp.LLGLFtsuchta-l.....................pccsspsPSLtEMT-sAl+lLo.+Np......+GFFLhVEGuRID+ucH-scshtALsEsltFDpAlctAh-hs..p-c-TLllVTADHoHs......aohGuYs.+sspshGLusupsssDttshpll.asp...GsGtslcs.........................tt+.slst.scthts.apppuslshsocsHoGEDVsVaAhGPpAc.......hl+GlpEQTplu+sMstAhsL .....................................................................................KNlIhhlGD.G.Mu.s...hsssR...h......t..t.....t.................t......s................................................................................ht...........h.......-t...........h....h.u...........psas.st.........................................lsDSAssuTAh.sGhK..o....ss..sl..u.ls...................................pt...p...h........olh..c..h.A.c..t..t..G.h.ss.GhVsT.scl...pcATPAuh.hAHsspRp..s..s...............................................................p.hhpt.......................h-..V.hhGG..G..t..p...a................................................................t.hp.s.t.s.L..h.pth.............p..t....t....u.....h....p....h.s......s.t.t...........th......t.............p.....t..s.....p....................l.l...G....L.....F......s....s...s.p........h.......ht...h................................................................................p..t....t.....t.....t.....P.........o..L...tpMsptA..l..p..h..L.....p.....p..st....................pG.FFLh....VE....G...upIDhttHsss......h..t...........ht-hhth-pAlphA....hp......h.s.........p...p.......t......p....TL.llsTADHsps.......................s.t...............................t.......................................s.........h.........t............t.................................t.....................................p...........h..h..................h.........................s....................................................................................................................................................................................................t...s...tt...Hsutp..l...lh.......A..h.G..s.t..t.t........................h...G.h-ps.h.hh.hh.h...................................................................................................... 0 299 560 845 +37 PF02806 Alpha-amylase_C alpha-amylase_C; Alpha amylase, C-terminal all-beta domain Sonnhammer ELL, Griffiths-Jones SR anon ref [2] Domain Alpha amylase is classified as family 13 of the glycosyl hydrolases. The structure is an 8 stranded alpha/beta barrel containing the active site, interrupted by a ~70 a.a. calcium-binding domain protruding between beta strand 3 and alpha helix 3, and a carboxyl-terminal Greek key beta-barrel domain. 21.20 17.00 21.20 17.10 21.10 16.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.74 0.72 -3.85 246 4792 2012-10-02 20:10:03 2003-04-07 12:59:11 13 63 3353 197 1256 4323 339 94.60 25 14.30 CHANGED W....ssssslluFtR.............stt....hlllhN...hss..hppsh.psulP..sG....p........Yp-ll....susttph....us.......s......t.......h..............h...pGp........h..plpl....Pshusls.ht.h..p .........................Wl.hpcsspslluFhRt.......................spt.t...llslhN...Fs.s.s.....s.p.p...sY..pl.GlP...sG..........p........Yp.-ll................NoDs.tpa....uGs......sh..spttp................lpt...............................hs..cGp..............l.plsl....Psh...uslhht...tt............................................................. 0 397 756 1059 +38 PF05111 Amelin Ameloblastin precursor (Amelin) Moxon SJ anon Pfam-B_6419 (release 7.7) Family This family consists of several mammalian Ameloblastin precursor (Amelin) proteins. Matrix proteins of tooth enamel consist mainly of amelogenin but also of non-amelogenin proteins, which, although their volumetric percentage is low, have an important role in enamel mineralisation. One of the non-amelogenin proteins is ameloblastin, also known as amelin and sheathlin. Ameloblastin (AMBN) is one of the enamel sheath proteins which is though to have a role in determining the prismatic structure of growing enamel crystals [1]. 25.00 25.00 30.00 29.90 16.90 16.90 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.48 0.70 -5.73 4 141 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 79 0 27 129 0 207.90 45 100.34 CHANGED MSASKIPLFKMKDLILhLsLLKMShAVPAFPQQPGs.....PGMASLSLETMRQLGSLQGLNsLSQYSRaGFGKShNSLWhpGLLPPHSSFPWhRPREHETQ...............QPSLQPpQPGhKPFLQPTAsTuhQsTsQKsuPQPPhH.GpLPLQ-uELPhscp.QVAPSEpPPsPELPshDFADPQhPo..........................lFQIARhISRGPMPQNKsS.LYPGMhYhoYGANQL...............GGRssPhAYGulFPGFGGhR.slcthPpNPsMGGDFTLEFDSPVAATKGPEKGEGGAQGSPl.EApssc.ENPALLSplAPGAHsGLLAhPpDsIPuhARGPuG+.pthL.uVTPAAADPLhTPELAEVYETYGADhTTsLG.ptEATMDoTMoPDT.QT.MPGNKspQPQhhH-AWHFQEP .....................................................................................................................................................................................................................................................................................................................................................shtuhP.NPshGGDFTLEaDSPsAuTKGPEpt....EGGspssPh.-spsss.EsPAhLsEht..PsshuG.LLA.PpsslPsLsRuPuGp.+t...tVTPAsADPLhTPtLA-sYcTYsuD.TTsls.pcEs..Th..DsThsPso.po..hPtN+sppPphhpcs.................... 0 2 2 6 +39 PF01510 Amidase_2 N-acetylmuramoyl-L-alanine amidase Bateman A anon Pfam-B_735 (release 4.0) Domain This family includes zinc amidases that have N-acetylmuramoyl-L-alanine amidase activity EC:3.5.1.28. This enzyme domain cleaves the amide bond between N-acetylmuramoyl and L-amino acids in bacterial cell walls (preferentially: D-lactyl-L-Ala). The structure is known for the bacteriophage T7 structure and shows that two of the conserved histidines are zinc binding. 21.10 21.10 21.10 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.26 0.71 -4.10 117 7328 2009-01-15 18:05:59 2003-04-07 12:59:11 20 149 3627 173 1411 5665 2129 140.50 22 44.18 CHANGED ttsttth..psphlllHpTssss..sts.t..............hhphps..............pssuhH...ahlst......cGp.....l..h.ph...hshs..........thuhH.su............................................h........Nst....s..lGl.phht.u....................................................sssQh.ps...ht..pL..hthl.........ppa.....s.ls.p........................plh..uH.......pcls...................sPush ...................................................tt.........phphlllHpTu.s.ss..spstt.h....................h...ts...............................pplusHal.lsp...................cG..p............l...h..ph...........lshs......................phuWH.AGs...........................................................................................h.Nsp.........S....IGI.Eltsst.........................................................................ttsh.ss.sph..ps.........ht....pL....sttl.............hppa...s..ls.p.............................plh.uHp-luspp...............sPu..h..................................................... 0 418 807 1119 +40 PF01520 Amidase_3 N-acetylmuramoyl-L-alanine amidase Bateman A anon Pfam-B_888 (release 4.0) Family This enzyme domain cleaves the amide bond between N-acetylmuramoyl and L-amino acids in bacterial cell walls. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.22 0.71 -4.49 103 7380 2012-10-02 19:46:12 2003-04-07 12:59:11 13 197 3575 11 1463 5592 1358 194.90 29 50.10 CHANGED lslDsGH.....G....Gp.Ds.....................G.....Ahu....ss....hh.EcslsLpl.upplt.ph.LpppG....hpVhhTRss.D.....t.h................sLppRsph..........A.....sp...hpuDlalSlHtN.uh............ssss......pGsplah...................................................ptptspupplApt...ltpp.ltpththt.......scG.......l+tss....................hhll..cp..sph.....PulLlEhGFloN.p-tphlpssphppp.lApuIspults ...............................................................................lhlDsGH.....G......Gp...Ds.................................G........AlG..................su......hp..EKclsL.p.l.Ap.clp...ph.Lpp.pG.......hclh.h.T.Rss..D..............hhl....................................................sL..p.p.Rs.ph...............A.........pp..........ppAD...lFlSIH.ss..uh...............................................ss..ss..s........pGsplah................................................................................................................................t.tph..ppuhplA....pt....lhp...p...lt..p..h.s.t.hp...................s+s.......................scp.us............................................hhVL....+p.....ssh............P.ul.LlEh..GFloN..sp.-..p.p.h.L.ts.sp.ap..pp.lApuIhpGlh.p....................................................................................................................... 0 519 1014 1265 +41 PF01593 Amino_oxidase Flavin containing amine oxidoreductase Bashton M, Bateman A anon Pfam-B_606 (release 4.1)b Domain This family consists of various amine oxidases, including maze polyamine oxidase (PAO) [1] and various flavin containing monoamine oxidases (MAO). The aligned region includes the flavin binding site of these enzymes. The family also contains phytoene dehydrogenases and related enzymes. In vertebrates MAO plays an important role regulating the intracellular levels of amines via there oxidation; these include various neurotransmitters, neurotoxins and trace amines [2]. In lower eukaryotes such as aspergillus and in bacteria the main role of amine oxidases is to provide a source of ammonium [3]. PAOs in plants, bacteria and protozoa oxidase spermidine and spermine to an aminobutyral, diaminopropane and hydrogen peroxide and are involved in the catabolism of polyamines [1]. Other members of this family include tryptophan 2-monooxygenase, putrescine oxidase, corticosteroid binding proteins and antibacterial glycoproteins. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.44 0.70 -5.42 84 8963 2012-10-10 17:06:42 2003-04-07 12:59:11 19 111 2576 276 3844 15154 7049 357.50 13 77.77 CHANGED luGLsAAppLtpt....Gh..............pVhllEAp-RlGGRlt..ohptt.....shhh-hGsphhh.s.ss.hhphhtphshp.phthh.tstthhhthphttt.hhthssth.........................................t.hhshtphhphttthtsthhthtpthhphtttsh...tphhhppshtphhht.....................................................................................................htshhhsthth..hthttust.hthhthth.shpptthhhshtt....hshhsthhsthts..ts...plphsppVpplphps.............ttststttsu..........thpsDt.Vlsssshsshp..............thhPthst.t.phh.pththssss.plhlphppthh.pt...................th.shhhsss...............ththhhhshphsss.....ttsshhshlhssts.....thhtshsccphhptlhpplpphhs.....th.ssh...............t..hppWtpsthstusashhsstss.....hhsthtssh.....................slahAGstsstth..sslpGAlpSGhpsAppll ..........................................................................................................................................................................................................................................................uGLssA...h.....L.....t.p.t...........uh................................plhl.hE......t......p....s........p.h.G.G.+....h.............s..h.....t..t...................sh..h....h..-.h....G...s...t.....h....h................................h.........t...h.....h.....t...............t.......h....h......................t................h.............................................................................................................................................................................................................................................................................................................................................................................................t.................................................t....h.............................................................................................................................................................................................................h...........h...h...............................t.................h............................h......................................................................................................h..................t........................................t..h........t.......h.......t...h...h...h...t........t..............plp..h..s...p.........V...pp.lp.h..ps......................................tt.h..t..l..h..h..t..sst..................................................hp.s-h..l......lh.s.s.s.h.t..hht........................................h...s.......h.s..t.................t....h........h...t........t.....h................h.....t...s.........s....hh..h....h..........h....t..t.......h.h..............................................................................th.hh.t......................................h......hh......t..................................h..h.....h......h..h...h..t........................................h..t...h....s.............p.....t.....h....h....t.........h.h.....t......l.t.p....h..hs.................t.................................................................t.h..........................h...........h..............h...t................t....................h..............h..t...............................................................................tlh.hsGt....h..t...........................h.h..ssh.su...s...................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1229 2344 3195 +42 PF00501 AMP-binding AMP-binding enzyme Finn RD, Eberhardt R anon Prosite Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.67 0.70 -6.09 354 67453 2012-10-02 15:58:18 2003-04-07 12:59:11 23 2256 6205 141 23415 62718 26362 376.00 19 52.12 CHANGED hppps...pptP.cps.....Al....h..............tspplTYp-LppcusplAphL..t.p..h.Gl.p..tsc.h..Vulhh.pss-hlluhlAlh+sGushlPl..ss......shss.....cclthhlpcu.......psphll...s.............................................t.......hhhhp.......t....tt...................................................t.....sch...Ahll..Y..TSGoT....GpPKGVhloH.psl....hsh.hs.............ht.......tt........hh.hth.ss.......hsashu...h.htlhssLhtGu............................slllhpt..t.hs....thh...hp....hlp.c......hplT...hhhssPshhp.hl.......hp.t...t........................................ths...................s.L......+h..................................................l..h.......s..uGpslssphht.hpphhtt........lhss.YG.TEssshs.....................hhh...................ttt...hs.l.G...pP..l.....s.ssphtll..Dtt.tp.l...s.....sG.s...GE...lh.lp....Gs....s..lhcGYh........s.c...P.ch.....Tscp.ah......t...........................tt.ha+TGDlup.h.p.....D.G....h.lthlGRhc.c.lKlp .......................................................................................................................................................................................h..........s...pt..Al...h..............................tttpho....a...t..p.....l....t....p..........p....s....p..p.h....A....t....h....L...........h.....t.................h.....G......l....t..............................s.......sh....................V.u...l.h...h...........p..s......................p..............h....h...........h......s.....h..h..u.h...........h.........p....h...............G....s......s....h.........s....s.....l.......s...s.....................................t.h..s..........................p...p...l.....t.....h.....h..l.p...p..s........................t..s.p....hll.................................................................................................................................................................................h.....h.....h.........h....p.............................t....................................................................................................................................................................................................................................ts......pch.....s...h....l..h......a........T........S.....G..o.....T..................G......p....P..........K............G..........V...h.....h.....s.....+.....tsl....................hs.t.h.h.t.h.........h.............t.......................................-....hh..hth...hs......................h.h..a..s....hu................h....h.t..hh..s..s.......l.....h....t.....G..u...........................................s.h...h.l.h.......s.......t.....h...............tth..................hp...............................hlp....p........................h.....p..s..o............h.h.h.s..s...P.s..hhp..hl.............................hp..t....................................................................................................................................shs...............s...l...+.h.................................................................................................................................................................................h...h.......................s.......uG..p....s...l..s...s......p......h..h......p......t......h..t..p.hhs.....................................l.h..p.s...Y.....G.................T.Essshs...............................sh.................................................tt....p.h.s.s...h..G.................ts.......h........................s...s...h......p...h...t......ll................D.t...p................h..p..h...........s..................................ut..............GE................l..h....l.p...............Gs.............s......l.h..p.........G......Y...h...........................p...p.........s...ph.....................o..t..pt...hh...s...........................................................................tt..h.a..c...T...G..D..luh..h..p..............t.........c..G..........................h...l....h....h..h.G.Rh..c..c...lhh.t................................................................................................................................................................................................................ 0 6394 13492 19232 +43 PF05195 AMP_N Aminopeptidase P, N-terminal domain Finn RD anon Manual Domain This domain is structurally very similar [1] to the creatinase N-terminal domain (Pfam:PF01321). However, little or no sequence similarity exists between the two families. 20.40 20.40 21.80 20.90 20.20 19.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.84 0.71 -4.77 154 2593 2012-10-02 11:23:57 2003-04-07 12:59:11 11 16 2060 51 934 2167 1097 135.80 28 29.60 CHANGED hstptatpRRpcLhppht................sulsllsuuspthRs.sDscYs.FRQ-Sp.........FaYL..................TG...asEP-ulhll.........ptp......t.lLFspt+-............ptElWsGhRhG.-sAhctausDpAasls-...lsphL.sphlpstp..t...laash..s.pssthc ...........................................tppatp.RR.ptLhp.pht......................suhsll.uus.ph...h..R....s..sD.s.c.Y.s.FRQ..sSs..............FaYl...................TG....h...s...E...P-.ulllL........................ststs......cslLFscs..+c.................ptElWh.Gt.Rh..u........-..t...A.pph.G.l..Dcsh..shs-.......ls..phL..phl..tths..h...lahs............t......................................................................... 0 292 563 795 +44 PF03098 An_peroxidase Animal haem peroxidase Bateman A anon Prosite Family \N 19.30 19.30 19.70 19.80 19.00 19.10 hmmbuild -o /dev/null HMM SEED 530 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.83 0.70 -6.13 123 2338 2009-01-15 18:05:59 2003-04-07 12:59:11 10 150 421 314 1401 2365 62 385.50 22 55.96 CHANGED YRoh-GpsNNh..ppP.thG..ssss.asRh.lsstY..tcuhspsts............tth.LPs...........sRt.l..Spplh...tptp.......................hssssph....ohhhh.au.QalsHDhshssps.................C.t............t..shs....l.ls.sDshhs..........sh.hhRoh.s...ss.t.....................tpQ..............................lNphTualDuS.lYGsspptsppLRs...hpt.....GpL+...........ss.s...hhshs.........p.h..................hhhsG-tRssp.suLsshpslahREHNplAcpLpp.............h......................sPc..........................................................................................WsDEp..laQcARpllsA.........................h.Q+.................................IshpEalPtlL.Gp.p...shph.hh..............................................................hps..............Ys.slssslssEFtsuAaRahHohlssthhhhtpph................................tl....lpcthassp...hhpp.........................ls....hl.hGhhs.....................................psutphc...phhssplpppl..a...phstt........h.DLsulsIpRuR.D+GlssYNpaRchhs......Lpshpoac-ltt..........hssppl.............................hppLpplY...................................sss-slDlalGulsEp.h...Gu..............hhGsoh......ssllspQFp+h+pGDRFaa....E...ss..........................s.FTtpplpplp.....ps.oLscllscs.....h.th...tp..ht..ssF ..............................................................................................................................................t..h.R...h...s...................................................hP............st..l....hh.....................................................................................................................t......s.hh.......h.au...phh.a.ph..s.....................................................................................................................................................................................................hs..hothlDhu.lYGp.......s........t..t..tl.....R.......t..........G.hht............................................................................................................................h..h..u..p...h.h.....s......hl.....h.hthlahR.HN................h...lsp..L.tt......................................p..p......................................................................................................................................................h..ss-p.....l.....a....ppu.......Rhl.hh...u..........................h..p......................................l.sh.-al.hlh.s..........ht............................................................................................h.s................hp.....thsstl..s.F.shs.a+.a......Hshl.s.h.hhstph..........................................h........p.h..h....t..........h..h.t.......................................t...........hh...th.t............................................................................t.hst......t..ltp.h........h.........t......................cl..shsltcuR.c............h.....s.ls.shsphR.t...h.h.s....................................h......hp....s..ap-h..........................p.....t..h...............................................tthtthY...........................................................................tp..pt..l-la.GhhhEt...........st.................h.G.hh..............................phhh.ph.....t..............h.h...uD...+...hhh.t.....p...........................t.h.s...th..t.l.........p...sh.plhh................................................................................................................................................................................................................................................................................. 0 473 646 1103 +45 PF01821 ANATO Anaphylotoxin-like domain Bateman A, SMART anon Prosite Domain C3a, C4a and C5a anaphylatoxins are protein fragments generated enzymatically in serum during activation of complement molecules C3, C4, and C5. They induce smooth muscle contraction. These fragments are homologous to a three-fold repeat in fibulins. 21.40 21.40 21.40 21.50 20.90 21.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.24 0.72 -3.82 65 552 2009-01-15 18:05:59 2003-04-07 12:59:11 13 71 77 20 202 520 0 35.50 37 3.14 CHANGED CChDGhptss..hspoC-cRsthl.........tpsppChpsFhpCC ...CCt-Ghptts..hspoCcpRsthl............ps.tCtpsFhpCC..... 0 14 34 79 +46 PF01094 ANF_receptor Receptor family ligand binding region Bateman A, Finn RD anon Prosite Family This family includes extracellular ligand binding domains of a wide range of receptors. This family also includes the bacterial amino acid binding proteins of known structure. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.96 0.70 -5.66 119 7015 2012-10-02 13:57:41 2003-04-07 12:59:11 23 144 390 116 4127 13698 3388 312.90 15 40.55 CHANGED thttAhphAlcclNpsss.hh...sshplshthtss.............................sstt.....ssshstphhtpptlhull..Gsssos.sstssuplssth..pl.Phloh.........................u.usssthss....tp..ashhhR..sssssptt.spuhscllpp..................asWppluhlhssssh...spth.......hpslppthpph.th.hhth.........tth.spspshp..shhptlpp.....pps+lllhtstssph......ptlhpps.......t.th..ttta.halhhshh.pt.....................................htstphtcshpuslshphhtstt.thpphhpphp...........................................................................ttt.shhshhhYDulhhhApAlpphhpppssthtst......................................................................s.tlh.phhps...........spa.......pGhsGplphsptssph.shthhhhphp ...........................................................................................................................h.huhpt.h.s...t...p.......h.................t....h..p..l..t....h.....h....p.......................................................................h............................................t.................h...h..u...lh.......G..s......s.st..................s..........................h......s....p..h...h.....t....h..h............pl....P................l.....o...h....................................................s...s....s...s.......h..ss...................t.p...as...........h.hp....hh.s..........s.....t...............t..............s....t....u......h...h....p....l.l.pt...................a.t..W.....p......h...l..s.h.l...h.....s...s.s..ph.........h..h.........................................hp.t.h.pp.....h...pp.....t........h.shth...............................h...t.....t..t...p.............p.......th.......p....l..p.p.......................t..............p.....sp...l.l..l..l..h...s..p..t.p.th.............................................ttlhpt.h....................th.......t....t....h....h..a....l.h..s.s....h.h....t...............................................................................................h.h.t............h..t....s.....h...h......s.....h.....t.......h..................p............p..................t...h.....p...p....a...h...t.p.h.p...............................................................................................................................................t..h......t.................s..h...h....a....s..AV.....h...s...h...A..h.....A.l..p.p.h...h...t...p..................................................................................................s.....p...l..h...p.hh..h.p.......................................h..p.h.................................................p.s....s.G..p.....h...p..ts...t..............h.................................................................................................................................................................................................................................................... 0 856 1347 2917 +47 PF00023 Ank ank; Ankyrin repeat Bateman A, Sonnhammer ELL anon Swissprot_feature_table Repeat Ankyrins are multifunctional adaptors that link specific proteins to the membrane-associated, spectrin- actin cytoskeleton. This repeat-domain is a 'membrane-binding' domain of up to 24 repeated units, and it mediates most of the protein's binding activities. Repeats 13-24 are especially active, with known sites of interaction for the Na/K ATPase, Cl/HCO(3) anion exchanger, voltage-gated sodium channel, clathrin heavy chain and L1 family cell adhesion molecules. The ANK repeats are found to form a contiguous spiral stack such that ion transporters like the anion exchanger associate in a large central cavity formed by the ANK repeat spiral, while clathrin and cell adhesion molecules associate with specific regions outside this cavity [2][3]. 20.60 14.70 20.60 14.70 20.50 14.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.19 0.73 -7.67 0.73 -3.89 1072 8812 2012-10-02 12:10:21 2003-04-07 12:59:11 25 2020 897 214 5230 139773 9187 32.40 29 5.11 CHANGED pGpTsLHhAspps......................phpllchLlp....tuushshpsp ..............................................................................G.TsLH.h.A..stps............................................................................phcll.chLlp.........pGAshph......................................................... 1 2564 3256 4303 +48 PF00191 Annexin annexin; Annexin Finn RD anon Prosite Family This family of annexins also includes giardin that has been shown to function as an annexin [1]. 20.90 20.90 20.90 20.90 20.20 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.96 0.72 -4.07 177 6936 2009-01-15 18:05:59 2003-04-07 12:59:11 15 27 307 347 3328 6752 29 64.80 29 64.50 CHANGED hDAp.hLcpAhcshG...Tc-psllcIlsoRoptphppIpptYpptau.+sLpcslp.s-hSGcacchLlsL .....................................-Ap..hLc.pu..h..+.G..h.G.............TD.E....ps...l...l....c..Il..s..s...R..o.p..t..p...l..p....p....Ip.p...tY.........p........p........h..a..s.....+......s..L.tc.....slc...s.......-..h....S.G.ca.cchllsl..................... 1 791 1389 2220 +49 PF03861 ANTAR ANTAR domain Yeats C anon [1] Domain ANTAR (AmiR and NasR transcription antitermination regulators) is an RNA-binding domain found in bacterial transcription antitermination regulatory proteins. The majority of the domain consists of a coiled-coil. 20.10 20.10 20.10 20.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.33 0.72 -4.39 179 2458 2009-09-15 11:05:26 2003-04-07 12:59:11 9 45 1401 6 888 2155 145 56.00 30 24.33 CHANGED phpplcptLpsRc........hI-pAKGlLMpp.ps..ls.EpcAachLRctuhppshpltclAcpll .................................htplpptLpsR+.............ll-pAKGlLMpp.ps..lo.EpcAachLpctuMcpphplt-lAptll................ 1 317 595 768 +50 PF04729 ASF1_hist_chap Anti-silence; ASF1 like histone chaperone Mifsud W anon Pfam-B_3167 (release 7.5) Family This family includes the yeast and human ASF1 protein. These proteins have histone chaperone activity [1]. ASF1 participates in both the replication-dependent and replication-independent pathways. The structure three-dimensional has been determined as a a compact immunoglobulin-like beta sandwich fold topped by three helical linkers [2]. 20.70 20.70 22.80 23.80 20.60 19.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.76 0.71 -4.58 24 449 2011-09-19 02:26:53 2003-04-07 12:59:11 8 4 313 29 308 425 2 145.60 53 66.27 CHANGED M.S...hVslpslplh.sNPusFpsPapFcIoFEslpsLp-D.....LEWKllYVGS...............................Apspca...............................................DQhLDslhVGPlst.GhppFsFcussPshppIP....p-llGVTllLlossY+spEFlRVGYYVNNpYp-pELcENPPs.....ps.l-+lpRpIlsscPRVTRFsIsW- ................................MuhVplhsVtVL.sNPu.FhsPapFE.IoFEClE.sLpcD...............LEW...KllYVGS...............................................AcSc...ca..................................................DQ.LDSlLVGPl.Ps.Gh........ppF.lFpA..............Ds.Psssh.IP...psDhlGVTVlL.lT.C.oYcGpEFlRVGYYVNNE.Y.s..s..t..EL......+.....E.s......PP.s..............KP....h.-+lpRNIL.AppPRVTRFtIsW-........................... 0 115 168 246 +52 PF02822 Antistasin Antistasin family Bateman A anon PDB Family Members of this family are inhibitors of trypsin family proteases. This domain is highly disulphide bonded. The domain is also found in some large extracellular proteins in multiple copies. 21.20 21.20 21.40 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.67 0.72 -3.63 60 460 2009-09-11 13:25:17 2003-04-07 12:59:11 9 70 93 8 298 535 14 27.10 43 9.73 CHANGED Csthp..C...phtCshGappD.p.sG.Cp...h.CpC ..................Cstlt....C..........phtC.s.aGFppD.p.pGCp...h..CcC 0 107 126 218 +53 PF00847 AP2 AP2-domain; AP2 domain Bateman A anon Pfam-B_409 (release 3.0) Domain This 60 amino acid residue domain can bind to DNA [2] and is found in transcription factor proteins. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.71 0.72 -3.81 206 5868 2012-10-03 08:51:45 2003-04-07 12:59:11 15 52 728 5 2816 5799 189 52.10 39 17.05 CHANGED sth+GV..phcp.hs+...Ws..Aplh................t...pphhlGpass..-..cAtpshptsthphpst .....................tYRGV....Rp+t....hG+...........Ws..AEI+c..s.........................p....pRlWLGT...FsT..sE....cAA+AYDtAAhph+G....................... 0 596 1740 2384 +54 PF02424 ApbE ApbE family Bateman A anon Pfam-B_1963 (release 5.4) Family This prokaryotic family of lipoproteins are related to ApbE from Salmonella typhimurium. ApbE is involved in thiamine synthesis [1]. More specifically is may be involved in the conversion of aminoimidazole ribotide (AIR) to 4-amino-5-hydroxymethyl-2-methyl pyrimidine (HMP). 20.00 20.00 20.20 20.10 19.00 19.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.54 0.70 -5.10 245 4122 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 2989 11 985 3311 1196 240.90 28 74.58 CHANGED lpclsp.hSs..a..c..ss...S-lsclNpsss.hp..h..tlss-hhp..llppuhclsphosG...sFDsTl.GPLh.sLW.......uFssp............thPs...tppls................pthth..sshpp.hh............ttphhhppsshtlDLuuIAKGausDplsph.L..ppt..........Glp.shlVphG.G-ltsh..Gpp....pup.........s.......Wplul..p..s..P..............................ppt..........phht..hl.....plps.t...uluTS..G.sYcpaa.....s..Gp+apHllDP...pTGhPhp...pphsSVTVl..usss....ht.....ADuhuTulh..lh.G.....cp.uhphscp....ps.........ls.s......hhl ..............................................................................tthcphhSsa.p.....ts.....Spl...clNp.....ts......u....h......p.....sh........tl.s.t.phhp.llp.huhchuttosG....thDlol.GPLl.phW.........thG.pt................sphPs.......tp.pl..p............................................thh....t..h.....s.s.hpplhh.....................tppp..l.hhpp....ss.htlDLuulAKGYusDplsp.....h.....L..cpp.......................................Glp...shllsl.G.....G.s.lhsh......Gtp........sp.sp.........................s...............WpluI...p..c...P..p............................................tps.shhs.....hl....plss...t................ulsTS...G.sYc.pah..p....hs...............Gpc..apHllDP..........pTGh.....Plp...pp.....lsS.V.T.Vl.....u.s..s.u.............h.p............A.DuhuTulh.....sh..G.........pp....uhphhpp......ts....lt.shhl............................................... 2 336 675 848 +55 PF04049 APC8 Anaphase promoting complex subunit 8 / Cdc23 Wood V, Finn RD anon Pfam-B_13808 (release 7.3); Family The anaphase-promoting complex is composed of eight protein subunits, including BimE (APC1), CDC27 (APC3), CDC16 (APC6), and CDC23 (APC8). 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.99 0.71 -4.46 26 298 2009-09-11 06:08:17 2003-04-07 12:59:11 8 107 258 0 221 308 6 146.10 29 24.42 CHANGED Mssp.......slpcl+ppLhtushpho-+sLhQSAKWu........AEhL.uLs.........................................ps.hsspsstpsttp...........spEh.-tYLLAKSYFDs+EYDRsAahLcssp...............................................SpcuhFLtLYupYluGEK+cpE-sptslssts.........sstNcchs ................................................h...........htpl+hpLhtuhhphscRsLhpuuKWu.....................AEh.L.uLs..................................................................................s....h.ts.ss.ptt.t...............................p-h.stYh..LAKoYFDs+EYDRsAahlps..sp....................................................op+uh..F.LthYupYLuGEK+..+p-.-p.t.hss.p................................................................................................................. 0 76 124 184 +56 PF04106 APG5 Autophagy protein Apg5 Wood V, Finn RD anon Pfam-B_12134 (release 7.3); Family Apg5 is directly required for the import of aminopeptidase I via the cytoplasm-to-vacuole targeting pathway [1]. 20.30 20.30 21.40 21.20 20.20 19.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.15 0.71 -4.83 34 341 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 279 5 222 339 2 200.20 30 65.17 CHANGED WahPlGlLaDLhsssssh........................................................PWplplHass....aPsptLl.hssc..........cslcstahsslKEAsal+sGs.......scsIhshscpDpppLWpulhs..cshcpFhpIspKLh............................s.psth+pIPlRlYlsss..................hthlQphlpsh.................tpsp.h.TLt-hLpphLPshh.................................................stp..........ptphspsllpGIclsh..csPlp.lscphsasDsFL....aIslhhh ........................................WahPlGlLaDlhs.sss.h..................................................................................................................................................................................................................PWplsl.Haps....aPpp.Ll..hssp.......................cslcstFhsslKE.......ADhl+pts........psl.s.hp+cDp.ppLWtul.s..c........cac.pFh..tlsp+Lh.....................................................................................s.ttssh+pI.....PlRlYhsss..................................................................h.hlQphh..t..sh...........................................tpsp.p.TLschLpplh.Pshh...............................................................................................................................tp..................t.thspshlpG..l.pssh....csPlt.lscphu.asDsFLalslh............................................. 1 76 119 183 +57 PF04602 Arabinose_trans arab_transf; Mycobacterial cell wall arabinan synthesis protein Waterfield DI, Finn RD anon Pfam-B_4670 (release 7.5) Family Arabinosyltransferase is involved in arabinogalactan (AG) biosynthesis pathway in mycobacteria. AG is a component of the macromolecular assembly of the mycolyl-AG-peptidoglycan complex of the cell wall. This enzyme has important clinical applications as it is believed to be the target of the antimycobacterial drug Ethambutol [1]. 25.00 25.00 28.00 27.90 17.90 22.20 hmmbuild -o /dev/null HMM SEED 657 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.09 0.70 -6.34 7 497 2012-10-03 03:08:05 2003-04-07 12:59:11 7 7 176 8 90 457 0 565.40 47 61.09 CHANGED cussps.RIARhVAslAGlhGhlLslssPLLPVpQTTATlpWPQ....sGplsplTAPLluhsPpsLslTlPCpAsAsLPsss....GlVlSTsPttGh-AscsGLhlcsspssVsVshRssVsssAPRstVsu....ssCphlchhAsssussA-FVGlst.....suGp.......ss-hRPQlsGVFTDLpsPAssGLuhpssIDTRF.TuPTsLKhhAMlLGlssslhAllALthLDphsttth+h...hlPhtW.................hssthsDssVlusLllWHllGAsoSDDGY.LThARVup+AGYhANYYRaFGosEuPFsWYaslLAhhApVSTAulWMRLPshlsuluCWLlISRpVLPRLGsu...lsssRsAhaTAGuhFLAAWLPFNNGLRPEPlIALGlLlTWsLVERAluhpRLhPsAlAhllAhhTlsluPpGLIAlAALLsGuRsltplltRR+ptsGhhs.lAPLhAAsoVlhhllF+-QTlATVlEus+lKhsVGPolsWapEaLRYYaLhlto.sDGSluRRFAVLshlLCLFsslhhhLRRu+lPGsApGPsWRLIGhThhohLhLhFTPTKWshpFGsFAGLuGulGALsAhAsophuL+oRRNhslalsAlLFVLAhuhAulNGWaYVSNaGVPW.st.PhlstaslTohFLsLollsuLlAuWhHFph .............................t.........h..+hhAhluG...llGh..lLslhsPlLPVpQssAplsWPQ...............sG...p.h.s.....sls..APLluhsPhshsholPC.ssht.....s...Lss..st................slllu.ThP.tt..u...c.A...st...p...uLhlps........s........t......s......t......lsls.Rssslh.s.sshppl...u....ssC.ptlpltustst....stA......phsGl.t.............sGs.....................................s........shRPQlsGlFTDL.......p.......s.....s.......A......s....s.....G..........L...shpsslDoRasooPTsLKhhshll.ulhh..slsuLlALhhLDp.h.-Gt..pth.c..................hhP.spW...........................................................hp....phhDssVlusLlhWHhlGANoSDDGYlLsMARVupcAGYhuNYaRWFGsPEuPF.GW.YYslLAhhspVSs.Aol.W.MRLPsLhsulssWhllSRcVlPRLG.u....ltss+..sAh..WsAAhVFLuhWlPaNNGLRPEPlIALGsLl.TWs.lERulutp.RLhP.uAlAslhAuhTLssuPoGLhAlusLls.uhpslh.pllh.+.............R.............p..................t......h.........................G.............h.h....s....h...lA......PhhAAuollhhslFtDQTLuslhEus.+l+ttlGP...ohsWapEhlRY.hLh.h.o.sDGSluRRFulLhhhlsL.hssl......hhhLR......+..t.RlP..GsAtGPs.hRl....lGhh....hhohhhhhFTPTKWsHHFGlaAGluuul..uAls.slshuth.sh+.sp...RNRshah..u.u..l......l.F.l..hAhuhuuhNGWWYVusaGVPW.st.Ptltuhsh...oohhL...s....Ls.llshhhAsW.Hhh.......................................... 0 15 61 81 +58 PF03079 ARD ARD/ARD' family Mifsud W anon Pfam-B_2276 (release 6.4) Family The two acireductone dioxygenase enzymes (ARD and ARD', previously known as E-2 and E-2') from Klebsiella pneumoniae share the same amino acid sequence Swiss:Q9ZFE7, but bind different metal ions: ARD binds Ni2+, ARD' binds Fe2+. ARD and ARD' can be experimentally interconverted by removal of the bound metal ion and reconstitution with the appropriate metal ion. The two enzymes share the same substrate, 1,2-dihydroxy-3-keto-5-(methylthio)pentene, but yield different products. ARD' yields the alpha-keto precursor of methionine (and formate), thus forming part of the ubiquitous methionine salvage pathway that converts 5'-methylthioadenosine (MTA) to methionine. This pathway is responsible for the tight control of the concentration of MTA, which is a powerful inhibitor of polyamine biosynthesis and transmethylation reactions [1,2]. ARD yields methylthiopropanoate, carbon monoxide and formate, and thus prevents the conversion of MTA to methionine. The role of the ARD catalysed reaction is unclear: methylthiopropanoate is cytotoxic, and carbon monoxide can activate guanylyl cyclase, leading to increased intracellular cGMP levels [1,2]. This family also contains other members, whose functions are not well characterised. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.82 0.71 -4.28 11 964 2012-10-10 13:59:34 2003-04-07 12:59:11 9 12 776 3 436 1144 153 149.50 30 82.56 CHANGED phahh--psp.Dp+lPccpsPtchhphpcLsclu.hhha+lsscsppsscchpplhch+shshhsh..........spssscphPpa--KlcpFaEEHlHsD-ElRahlcGsGYFDV+..sps-sWIRlhlcpGDLIllPsGIhHRFTsssssalKAhRLFsssstWss .................................................................................................................p.t.t...p.hs..t......t...........tlt..h..h......ch.s..t.s..................s.s.p.p..h..t.hltthp.sh..t.h..................l.hshps.s..t..P.s....h....-....p.h....lppF.hpE....H.hH.s.D.-.E.lR.ahl.sGpG..h..F.s.l+.....sp.s.....ct....ahclhscp.GDLIslPAGhhHh.F..s.h.s.s........s.s.h..lp.A............lRlFssp.sWss.............................. 0 130 246 354 +59 PF01412 ArfGap Putative GTPase activating protein for Arf Ponting CP, Schultz J, Bork P anon SMART Domain Putative zinc fingers with GTPase activating proteins (GAPs) towards the small GTPase, Arf. The GAP of ARD1 stimulates GTPase hydrolysis for ARD1 but not ARFs. 27.50 27.50 27.50 27.90 27.00 26.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.97 0.71 -4.31 141 3394 2009-01-15 18:05:59 2003-04-07 12:59:11 13 165 341 30 2020 3186 33 114.20 32 18.59 CHANGED pphlppltp...h.ssN.phCsDC...s..stsP.pWsSls.hG.....lalClcCSGl...HRs.LG..s.......HlS+V+SlsLD..p..Wsscplphhpth.GNp.pssphacsp.....................sst..........ssppphcpaI.ppKYtp+t.as..pst .....................................p..hhpplhp..h.ssN...phCsDC....s............u.s....P...pWASls.hG............lhlClcCS..Gl.H.R..s.LG.s.............................H...lS.+V.......+....S..ls.LD........s.....W.ss.p.......p..l......p......h..........h.pph...GN.s.t.ss.p..haEsp................................t...s.ss.......................ssppphcp.aIpsKYpp+tah..t.................................................................................... 0 652 1022 1510 +60 PF01388 ARID ARID/BRIGHT DNA binding domain Bateman A anon [2] Domain This domain is know as ARID for AT-Rich Interaction Domain [1], and also known as the BRIGHT domain [3]. 21.00 21.00 21.00 21.30 20.80 20.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.86 0.72 -4.13 53 1730 2009-01-15 18:05:59 2003-04-07 12:59:11 16 96 289 17 1077 1669 7 89.90 29 8.00 CHANGED pspspppppFlcplhpFhcppGss..........h.tchPt................lst+.lDLapLaphVpchGGhc......pVscp+p........WsclupcL..shss.....ttssu...........ppL+stYt+hLhsYE ..................................................t.....tctpFlppLhpFhc.p.p...G...s.s................................h..p+.h.Ph....................................l..s.t+.LDLapLaphV.p.ch.G.G.hp....................p.V.sp..p.+t.............................W.p.clsppL..sh..s.s........s.pssu...................s.sL+ptYp+h.L.hsaE........................ 1 277 474 782 +61 PF04683 Proteasom_Rpn13 ARM_1; Proteasome complex subunit Rpn13 ubiquitin receptor Waterfield DI, Finn RD, Coggill P, Wood V anon Pfam-B_4497 (release 7.5) Family This family was thought originally to be involved in cell-adhesion [1,2], but the members are now known to be proteasome subunit Rpn13, a novel ubiquitin receptor. The 26S proteasome is a huge macromolecular protein-degradation machine consisting of a proteolytically active 20S core, in the form of four disc-like proteins, and one or two 19S regulatory particles. The regulatory particle(s) sit on the top and or bottom of the core, de-ubiquitinate the substrate peptides, unfold them and guide them into the narrow channel through the centre of the core. Rpn13 and its homologues dock onto the regulatory particle through the N-terminal region which binds Rpn2. The C-terminal part of the domain binds de-ubiquitinating enzyme Uch37/UCHL5 and enhances its isopeptidase activity. Rpn13 binds ubiquitin via a conserved amino-terminal region called the pleckstrin-like receptor for ubiquitin, termed Pru, domain [4]. The domain forms two contiguous anti-parallel beta-sheets with a configuration similar to the pleckstrin-homology domain (PHD) fold [5]. Rpn13's ability to bind ubiquitin and the proteasome subunit Rpn2/S1 simultaneously supports evidence of its role as a ubiquitin receptor. Finally, when complexed to di-ubiquitin, via the Pru, and Uch37 via the C-terminal part, it frees up the distal ubiquitin for de-ubiquitination by the Uch37 [5]. 25.00 25.00 25.70 25.50 22.50 24.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.14 0.72 -3.90 78 393 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 279 4 246 369 1 86.10 40 26.93 CHANGED G+hphc....s......phVpPcscKG.hlhlhpsc.-....sl............h+FpWpsRss.......spsEp.......-lll..hPs-spFpclsps............psGRVasL.+Fpsusp+.hFFWhQ-t ..............G+hplc....s......phVpP-t+KG.hlaltp......s-.D.......sL.................lHFsW+sRss.........spsEc................DLIl..hPsDspFp+V..sps............soGRValL.KF.pu.....uspR...hFFWhQ-................... 0 78 132 204 +62 PF00514 Arm Armadillo_seg; Armadillo/beta-catenin-like repeat Bateman A, Ponting C, Schultz J, Bork P anon SMART Repeat Approx. 40 amino acid repeat. Tandem repeats form super-helix of helices that is proposed to mediate interaction of beta-catenin with its ligands. CAUTION: This family does not contain all known armadillo repeats. 20.60 20.30 20.60 20.30 20.50 20.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.45 0.73 -7.89 0.73 -3.96 242 12724 2012-10-11 20:00:57 2003-04-07 12:59:11 18 321 451 497 7518 13312 326 40.80 24 18.23 CHANGED ssp.ptphll..psGslshLlpLLp......sps.plpppAshuLsNlss ..................tptphll..psGs.lshLlpLLp...............sss.pl.pp.p.AshALsNls....................... 0 2088 3612 5404 +63 PF00339 Arrestin_N arrestin; Arrestin (or S-antigen), N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Ig-like beta-sandwich fold. Scop reports duplication with C-terminal domain. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.74 0.71 -4.39 42 2044 2012-10-02 22:29:00 2003-04-07 12:59:11 24 33 301 26 1377 2143 4 138.80 19 29.63 CHANGED hslhls....p.chlaasG..-sl......sGpVll.sspp.hcs.RtlhlplpGts+sshpc.cshshphpc.............t...p....shhchpphLhpp..............h.hGsasaPFpaplP...ss.PsShpuphG........tlcYpl+shl....-ts.hchspp...pppshhVlphlshs ..........................................................................................................................h.ht....p..t......h..ss.......p.l............sGhlh..l....p......t.....h......ph...p..t.............l.hlphput..hcht....h...p...p.....ps..h...s..p...tt.......................................................................p.t....th.h.p.hp.t.t....l.h.tp..........................................h.........G..p.a..p.....a.P.F.ph.p......l.....P.......t.s..h....P.....s..S..h..p....h.p....G......................................tl.pYp.l.cuhl...............cps.....t..h...p......hpp.hhllp.h........................................................................... 0 452 648 1092 +64 PF02752 Arrestin_C arrestin_C; Arrestin (or S-antigen), C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Ig-like beta-sandwich fold. Scop reports duplication with N-terminal domain. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.69 0.71 -4.06 173 1981 2012-10-02 22:29:00 2003-04-07 12:59:11 17 29 291 26 1311 1936 0 151.20 19 30.82 CHANGED tsstlphplslsppuas.Gpslslplpls.......N.pos.hs.........lpplphpLhpphpahspt.......................................................................ppp.ptpphlsp..pttssl....................................tstppsphp..hpl...........plP...........................................................slss...os.........................................................psphIplpYplclplphs........................t..plphplPlhl..hps...sh ..............................................................................................................................................t.ps.lplphsls+..ps.as.....G.-s...lsl.p.h.pls.......N.po..s.hs.....................lp...plcsp...lh.pph...shhsp...............................................................................................................................................................................................ppp.phpphlsp..tpstsh............................................................ssss..shp.......hpl......lP..........................................................................................................................slss.......ohh.........................................................psphIpl.pYp.lc....lplpls.........................................t...httcltl..clPlhlhp...................................................................................................... 0 417 606 1041 +65 PF04959 ARS2 Arsenite-resistance protein 2 Moxon SJ anon Pfam-B_5269 (release 7.6) Family Arsenite is a carcinogenic compound which can act as a co-mutagen by inhibiting DNA repair. Arsenite-resistance protein 2 is thought to play a role in arsenite resistance [1]. 21.10 21.10 21.20 21.70 20.90 21.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.52 0.70 -4.15 8 282 2009-09-11 08:45:49 2003-04-07 12:59:11 8 12 212 1 189 287 1 200.40 25 26.08 CHANGED MPNRCGlIHVRGPhPsN.RITpsE..............................VsEapKoaEEKLuPLLuh+-sLSE-EApKMG+KDPEpEVEKFVoANTQELuKDKWLCPLSGKKFKGPEFVRKHIhNKHu-KlEEV+KEV-FFNNYLtDAKRPuLPE.KPh.sPGsst..ssshuP.uhs.YsPQ..sPQuhhPaGtP...RPPhhGa........s....GGPsFPPs.aG.......................uGRGNYDsFRGQssthuhP+s.R.hcGs .........................................................................................................................................................................h......p..a...+s..h-pKl..h.Lp....p...p.................-....h..pK..hGt.K...s.....s...-....p....tl.Echlp...t.s..pc..scs..K.....ahC..Ph..s.......u..KhFK..us-Fl+KHlhpKHs....Ehl....c....c..l...+...p....E...l...t...aaNN...Yl.....h...D...Pp+..P.s.h...sc.........t...t.....s....s.s..............................t.s......s.......a....t.....................h.................................................................................................................................................................................................................................................................. 0 65 107 155 +66 PF01037 AsnC_trans_reg ASNC_trans_reg; AsnC family Finn RD, Bateman A anon Pfam-B_773 (release 3.0) Family The AsnC family is a family of similar bacterial transcription regulatory proteins. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.17 0.72 -4.27 263 13775 2012-10-02 00:20:33 2003-04-07 12:59:11 16 36 2996 126 3987 9832 2190 73.20 21 46.93 CHANGED ltlplp.pp.........hcphtctltp......hPEVhpsatloG.shDall+lhs.pshpshpphltp...ltplssltps.poplllpp .......................................................lplp.pp.........hpphtptlpp...........hs....E..V..h.psahl.oG..chDall+lhs.p......sh.pphp..chltch....l...t.p...lsu...l.pps.pohllhp............................ 1 944 2215 3156 +67 PF05118 Asp_Arg_Hydrox Aspartyl/Asparaginyl beta-hydroxylase Finn RD anon Pfam-B_2775 (release 7.7) Family Iron (II)/2-oxoglutarate (2-OG)-dependent oxygenases catalyse oxidative reactions in a range of metabolic processes. Proline 3-hydroxylase hydroxylates proline at position 3, the first of a 2-OG oxygenase catalysing oxidation of a free alpha-amino acid. The structure of proline 3-hydroxylase contains the conserved motifs present in other 2-OG oxygenases including a jelly roll strand core and residues binding iron and 2-oxoglutarate, consistent with divergent evolution within the extended family. This family represent the arginine, asparagine and proline hydroxylases. The aspartyl/asparaginyl beta-hydroxylase (EC:1.14.11.16) specifically hydroxylates one aspartic or asparagine residue in certain epidermal growth factor-like domains of a number of proteins [1]. 20.10 20.10 20.10 20.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.92 0.71 -4.41 11 1101 2012-10-10 13:59:34 2003-04-07 12:59:11 10 40 713 5 388 906 1501 146.90 31 44.69 CHANGED pLppNWptIR-EuLtLts...httuttas-tuasshhKssWcpahLhhhsstpssAtphs.PpTssLLcplPpspu.................shFupL.PGuclhPHpDPasus.....lRhHLGLsTP.s-tChIcV-spcpsW+-GEsllFD-oa.H.utNpo-psRllLhhDl.+P. ............................................................................h..ptpW..IRpEh..tl................t.........t...........s.........s..........s........h......ct...sWcp..F..hL..h........hh...s.t............t.h....ss.....s.t.............p..hC....P.....pTs.pl..l.p..p..l.Pshpu............................AhFuhL..sPGscls.Hc..s...P....hsup.........lRhH.L..GL.h.......sP.....p.............s......t............C....hl.....p..V.....s...s.....p.....p.....h....s....W.c.....-.....Gcs.llFD.-o.a.H.shN..c.....s...s.....p......s...RllLh.sDl.+P............... 0 129 195 286 +68 PF01177 Asp_Glu_race Asp/Glu/Hydantoin racemase Finn RD, Bateman A anon Prosite Family This family contains aspartate racemase, maleate isomerases EC:5.2.1.1 [1], glutamate racemase, hydantoin racemase and arylmalonate decarboxylase EC:4.1.1.76 [2]. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.87 0.70 -4.43 177 8372 2012-10-03 04:39:14 2003-04-07 12:59:11 17 13 4320 108 2004 6434 2979 210.50 21 83.24 CHANGED lGhl..sstushshh...ppl.........tphhsst.......................hlhhtssp.............hhptt.............................h..hhh..............pssptl.t.......tssc................................sllluCsos.shhtshp.....tt....thP...l.ls.hscss............ht........th.t......thp+...lullus..hsshtsthhpphlpp..G.....l........p..t.......th........shhp...hup.hsst...htthhpthp............ppss....-sllLu.CTphshh.phhpth..t..........h.......llDsspshsp....ts..h ...................................................................lGhh..Sshuslohh....cpl....................hc.hlsppp...................hl.hhtsss...........thP..........ppstpp......................................lhph.hh....................p.h.sp.tL.p........tssc.............................................hlllAC..N..Tu..p..s......s.s.h.thh........................pp.h..slP...............l..lu..lhpsu............................hc..................shtp......stsp+..........lull...uT......psTh...p.s..s...h...a...p...pt..lpph..s......l........................p..t..................th..hh............pls-.........tup..hpsp........shphltphlp................h.........ppss.............-sllLG.CT.chsll.hs....l..pphh.t............l.......llDostthsct..h..................................................................... 0 548 1190 1650 +69 PF01400 Astacin Astacin (Peptidase family M12A) Bateman A anon Swissprot Domain The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. Members of this family contain two conserved disulphide bridges, these are joined 1-4 and 2-3. Members of this family have an amino terminal propeptide which is cleaved to give the active protease domain. All other linked domains are found to the carboxyl terminus of this domain. This family includes: Astacin Swiss:P07584, a digestive enzyme from Crayfish. Meprin, Swiss:Q16819, a multiple domain membrane component that is constructed from a homologous alpha and beta chain. Proteins involved in morphogenesis such as Swiss:P13497, and Tolloid from drosophila Swiss:P25723. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.34 0.71 -4.91 26 2029 2012-10-03 04:41:15 2003-04-07 12:59:11 19 142 328 13 1290 1935 24 172.70 33 37.99 CHANGED ppWsps......IPYhlssshsspp+shlppAhpcacppTCl+Fh.ps.......scsshlhhhpssGCh.ShVG+p..sGt.QslSlG.pGC..phGllsHElhHAlGFaHEpoRsDRDcalsI.apNI.sut.tsFpKhs.spssshGhsYDYsSlMHYsstuFSpss.hsTlhs+sst...hpssIGQRhphShhDlpplNphYpCss .................................................................................................h..W.........lPYh.l....s.....s..p..........h........s..t...t...........p..p......t......h.l..hpAhppa...p.p......p......T......C.lc..Fh.ps.........................pppt....h..l.........h......h..............p......s.......s....G...C..h....S...h.l....G..............+............p.......s...........G............t........Q...t..l.....S......l............s.......t............s........C.................p........h.......G.............h.l....hHEl...hHulGFaHEpoR......s.......D......R......D.........p...a....Vp.I.......h...p........N...I.....................s.......s..h........p.......h................N...............F.......t.....c.......h..............p.....t....p.s........s...............s................h.........s.h.........s..........YD..asSl.....MH..Yu.t......h........s.............F.....o.......t.....s.......s.........................s........T.......Ihsh.s...........................ts.l.G...Q..R.....t.hSt..hDlh.......pls..thY.pC..t................................................................................... 0 532 658 1119 +70 PF02178 AT_hook AT hook motif SMART anon Alignment kindly provided by SMART Motif At hooks are DNA binding motifs with a preference for A/T rich regions. 14.50 0.10 14.50 4.00 14.40 -999999.99 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.40 0.75 -6.23 0.75 -3.59 40 2152 2009-11-26 15:21:48 2003-04-07 12:59:11 14 167 542 4 1233 2051 224 12.60 52 7.35 CHANGED p+tRGRPpKstsp ...tRtRGRP+Kst..t.... 6 380 638 979 +71 PF03029 ATP_bind_1 ATP-bind; Conserved hypothetical ATP binding protein Griffiths-Jones SR anon Pfam-B_1301 (release 6.4) & Pfam-B_2154 (Release 8.0) Family Members of this family are found in a range of archaea and eukaryotes and have hypothesised ATP binding activity. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.56 0.70 -4.83 30 1967 2012-10-05 12:31:07 2003-04-07 12:59:11 12 16 660 9 1105 2459 404 206.90 26 79.02 CHANGED VsGssGuGKTTassulschh.hpth...hlNLDPus..h.a.s.hslc-hlohsclhp-..hslGPNGulhhuh-...........................ahphsl...h...tccl...........................tt..s...hhLFDTPGQlEhathhsshs.hhp.h....phshssVhLlDopphsc...sssahushh.s..hhh.hplPalsslsKhDlhs.........h.hh.t........................hppLspslsphl-phshs.phhssuspsppuhpsLlshl-pthpt ............................................................lhGshGsG..........K..........TTas......t.slpp.hh..................h....p.......................................................................................................................................................................h....c..-......h.ls.h.t.slhp-..................hslu.s.p..u...u.h.h...hu.....h.-......................................................ahph...sl................pppl.....................................................................hh..LFss.....P...G...Q..h..............c......hah.h.h.s..s..hs..............................phsl.s..slh.L....l...D.o....p..p.h.s.s.........ss.cahss..h...............h...h..............hc.l.P.al.ss.lsKh...Dhhp...........................................................................................................................pthspsltthlp.h.....s..s.....hhs..s...t...p.p.t.pu.hptlhthlcps...h...................................................................................... 0 386 688 948 +72 PF00306 ATP-synt_ab_C ATP synthase alpha/beta chain, C terminal domain Finn RD, Griffiths-Jones SR anon Pfam-B_15 (release 1.0) Domain \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.85 0.71 -3.60 150 20853 2009-09-12 08:32:44 2003-04-07 12:59:11 22 34 11148 406 3509 16060 5135 103.40 33 21.66 CHANGED tplusplpth....LtpacELpshsp....h..hG.-tL.u-pD.+h...h...Lp..+.uc+lcc.hLpQ....stassh...........s.................................lpc...plphh.....h.tllph..hpphs.pth.ttl........t............hpphcphtp ......................................csAppl+psLppY+ELp-lls........l..lGh......D-Lo-pD.+hp...lsRA++lpc.aLpQ...shassp.....s..t.phhs........................................................................lt-...slpsh.....h.tlhp..G...hD..pl....P.ppth..hhh.t..sh..s.......................tt......................................................................................................... 0 1226 2225 2960 +73 PF02874 ATP-synt_ab_N ATP synthase alpha/beta family, beta-barrel domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes the ATP synthase alpha and beta subunits the ATP synthase associated with flagella. 20.90 19.00 20.90 19.00 20.80 18.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.90 0.72 -3.76 196 22044 2012-10-02 13:55:04 2003-04-07 12:59:11 18 33 11118 393 3707 16715 4934 65.20 30 14.01 CHANGED lsplhGsVlslthst.s.hsslhsslplp..c...........tp.hh.h.uhshpLtsspVtslshssTsGlpc.GspVhsTG ..................................................p.hGsVlsV.......t.......s...........s......p.....h......sl..s..shtschhc.................................s..ss.h.tlt.pLtssp..Vtslshu.s..o..........cGlpc.G.pVhcTG.................. 0 1311 2362 3125 +74 PF04718 ATP-synt_G ATPsynth_g; Mitochondrial ATP synthase g subunit Waterfield DI, Finn RD anon Pfam-B_5977 (release 7.5) Family The Fo sector of the ATP synthase is a membrane bound complex which mediates proton transport. It is composed of nine different polypeptide subunits (a, b, c, d, e, f, g F6, A6L). The function of subunit g is currently unknown. The conserved region covers all but the very N-terminus of the member sequences. No prokaryotic members have been identified thus far [1]. 25.00 25.00 26.40 26.20 24.20 24.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.28 0.72 -3.37 37 369 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 264 0 237 356 3 99.30 30 72.48 CHANGED sscssphsst.slhau+stlphh+phh+sE.LsPP.s.uchpphhpshhphhp............hpsss......hh..hp..pls..hc-ushsulhusEllsaFhlGElIGR.RpllGYps .................................................................h.thssthsst.slhau+...stlp.hhhhhh+sE..lsPP.osuch...phhpslhphhp.................hpsss............h+...pls.............sc-shhsul..husEllsWFhlGElIGR.tpllGYp................... 0 67 119 188 +75 PF03768 Attacin_N Attacin, N-terminal region Finn RD anon Pfam-B_2791 (release 7.0) Family This family includes attacin and sarcotoxin, but not diptericin (which share similarity to the C-terminal region of attacin). All members of this family are insect antibacterial proteins which are induced by the fat body and subsequently released into secreted into the hemolymph where they act synergistically to kill the invading microorganism [1]. 23.90 23.90 25.30 35.40 23.70 23.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.85 0.72 -4.10 17 105 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 32 0 30 120 0 66.10 44 30.45 CHANGED htGolTsNscGuussss+lshs.sscshhsAlGuV.hsssp+....hussTtGlshs.NssGHGhSLo+o+ .......huGSlouNPsGGusAplclshs.lGssptsslGpVFAuuNop......uGPVTsGssluhNssGHGhSLT+TH.. 0 7 9 20 +76 PF03797 Autotransporter Autotransporter beta-domain Bateman A anon [1] Domain Secretion of protein products occurs by a number of different pathways in bacteria. One of these pathways known as the type V pathway was first described for the IgA1 protease [2]. The protein component that mediates secretion through the outer membrane is contained within the secreted protein itself, hence the proteins secreted in this way are called autotransporters. This family corresponds to the presumed integral membrane beta-barrel domain that transports the protein. This domain is found at the C terminus of the proteins it occurs in. The N terminus contains the variable passenger domain that is translocated across the membrane. Once the passenger domain is exported it is cleaved auto-catalytically in some proteins, in others a different protease is used and in some cases no cleavage occurs [1]. 22.00 20.80 22.00 20.80 21.90 20.70 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.69 0.70 -5.04 212 8209 2012-10-03 17:14:36 2003-04-07 12:59:11 14 136 1526 14 904 6497 851 238.90 16 26.88 CHANGED sstpsshWspshushtp....pspsstsuhp......tphsGhtlGs-tth.......s.sshhlGhshuhspuphp....................s.stsupspspshtsuhYsph............................psshhlsuths..auhhcp.......thts..............tptpupapspshssplpsuaphth..........pshtlpPhsplpahtsptss..apEps.....t.hshpht.pshpslpsplGlchphpht..............................thpsh.hphshtap.hsstppsstss.....t......hsht..hscsshthpsGsphpls.pphsl.hhshs.....tptspss ..............................................................................................s...t.shWhphhusp.p........ts......s..s........thp...................tphtshpl..GhDhhh.................s.sph..hh..Gh.h.huhspsphp..............................ssttup.s.ps...pu...ht..huhYush...............................tsshalcsh..hp..hshh.pt..........t.ts...........................t.psph.p.u.pshsss..lcs..G.h.p.ath..............pshhlpP....s.p.l.pa....t..hpt.ss..ap-ss.......................s..hp.h.p..s.pshpshts....plG...hch.shp.hpht......................phpsh..hphshhpp...hssstp...ht...hss..............................shttt.hst.s..th..phthGh.ph..p.hs....pphsh..hhshs........h......ttt................................................................................................. 0 205 439 657 +77 PF03547 Mem_trans Auxin_eff; Membrane transport protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs & Pfam-B_5261 (Release 7.5) Family This family includes auxin efflux carrier proteins and other transporter proteins from all domains of life. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -11.80 0.70 -6.17 25 9093 2012-10-02 17:06:44 2003-04-07 12:59:11 13 27 3153 0 2760 7555 2542 180.00 10 85.92 CHANGED sshpsll.lhllhllG.Yhus+h.plhss-pspslNchVhhhulPsLhFpplupssshphhhphhhhslhhhlhshlshhlsall.....s+.hh+hshchptslhshushsNssslGlPlLtulaGs...................tuhuhhlhhsslsslhhaolhh..................hlhp.putph.h.tt.s........................................................................................................................................................................................................................................................................................................................................................hhh.h..hhhh+hlhsPsshusllullhuhl.sahhsh.h..................phltpolslLusuulPhshhsLGlhLuhp.hhhshssthsth.........hhlRhllsPhlhlshshhhu..l.s..hhpshllp............sAhPsuhsshlhAppaslp.chsSsslhhuhllulhshslah ............................................................................................................................................................................................................................................................................................................................................................................h.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................................................. 0 820 1729 2289 +78 PF02310 B12-binding B12 binding domain Bateman A, Mian N anon Pfam-B_359 (release 5.2) Domain This domain binds to B12 (adenosylcobamide)[1-3], it is found in several enzymes, such as glutamate mutase Swiss:Q05488, methionine synthase Swiss:Q99707 and methylmalonyl-CoA mutase Swiss:P22033. It contains a conserved DxHxxGx(41)SxVx(26)GG motif, which is important for B12 binding [2]. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.47 0.71 -4.24 133 8147 2009-01-15 18:05:59 2003-04-07 12:59:11 14 80 2972 71 3031 7333 2597 115.00 22 16.83 CHANGED hpslhssstschaslGht.hlushlcp.t.Ga.........-Vh..hl.......shps...ps................................cpllp.tltph.ps-l..l.ul.....Ss..........hssshsth..tclhct......hcphtsplhlllGGshsshs.ph...t........hu.sshhhGpG.csshtslh ...................................................................................................s.phlhuphttDsHD.l.G....tp.ll....ush....l....ps....t...Ga............-.Vh......l.......s...h.hh.....ss.....................................................................................-cllc...tA.h.cp...ps-..l......l.Gl..........Sul.....................hss...s...h...s.t...h....hp.l.hct...............hcp..t....t....s....l.h..ll.lG.G.s.h..s...s....tsh.....t...................t.s...................h....................................................... 0 1180 2176 2679 +79 PF02607 B12-binding_2 B12 binding domain Bateman A, Eberhardt R anon Bateman A Domain This B12 binding domain is found in methionine synthase EC:2.1.1.13 Swiss:Q99707, and other shorter proteins that bind to B12. This domain is always found to the N-terminus of Pfam:PF02310. The structure of this domain is known [1], it is a 4 helix bundle. Many of the conserved residues in this domain are involved in B12 binding, such as those in the MXXVG motif. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.39 0.72 -3.95 255 3838 2009-01-15 18:05:59 2003-04-07 12:59:11 12 34 2594 16 1269 3424 1047 78.30 34 9.66 CHANGED ttphhpplhpullph-ppthpphlpp...........sl.p..........hsshpllpchlhsshpclGchap.psphhlsp.hhuuphhcpslshlhs ...........................................ltc+LppullcG.pchlppssc-..........................Ah.pp.s.........................hpPlclIpssLhsGMsh.VG-LFs..pGchFLPp..VlpSAcs.MKpAVuhLpP............................. 0 482 904 1109 +80 PF02362 B3 B3 DNA binding domain Bashton M, Bateman A anon Pfam-B_582 (release 5.2) Family This is a family of plant transcription factors with various roles in development, the aligned region corresponds the B3 DNA binding domain as described in [1] this domain is found in VP1/AB13 transcription factors [2]. Some proteins also have a second AP2 DNA binding domain Pfam:PF00847 such as RAV1 Swiss:Q9ZWM9 [1]. DNA binding activity was demonstrated by [3]. 30.10 30.10 30.10 30.10 29.90 30.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.41 0.72 -4.37 114 2169 2012-10-02 12:51:43 2003-04-07 12:59:11 16 44 101 2 1310 2264 0 98.00 24 23.90 CHANGED Fhphh.....h.ssshpp.....shl...slPppF..sppp.........th..........tt.plhlps..pG............ppWphph.........pppspt.....hhls.......pGWppFspspsLp.sGDhlsFp..h..tt.psh..hlplhpts ..............................................Khh.h.oss.spt......s.th......slPcpaucph..........................t................tt..plh.h.cD..hpG.............ppWph+a...h.....................pspspp.......hhLs.......sGW.pp.FVpsppLt.sGD..sllFh......h....ps...pph..hltlh...s......................... 0 219 746 1021 +81 PF01313 Bac_export_3 Bacterial export proteins, family 3 Finn RD, Bateman A anon Pfam-B_898 (release 3.0) Family This family includes the following members; FliQ, MopD, HrcS, Hrp, YopS and SpaQ All of these members export proteins, that do not possess signal peptides, through the membrane. Although the proteins that these exporters move may be different, the exporters are thought to function in similar ways [1]. 25.00 25.00 25.00 25.30 23.60 24.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.49 0.72 -4.30 202 3289 2012-10-03 02:46:00 2003-04-07 12:59:11 14 2 2171 0 652 1501 334 75.20 37 85.64 CHANGED p.l..l...slsppAlhlslhluuPhllsuLlVGLllulhQAsTQIpEQTLoFlPKllulhlslhlhusWhhspl.hsaspp .......t..llthsppAhhlsLhluuPhlllAlllGLllulhQAsTQIpEpTLoFlPKlluV.hlslhlhusWhhshLhsasp.t........... 0 203 390 513 +82 PF02673 BacA Bacitracin resistance protein BacA Mian N, Bateman A anon COG1968 Family Bacitracin resistance protein (BacA) is a putative undecaprenol kinase. BacA confers resistance to bacitracin, probably by phosphorylation of undecaprenol [1]. 20.80 20.80 20.90 20.90 20.60 20.00 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.61 0.70 -5.19 287 4568 2012-10-03 02:02:08 2003-04-07 12:59:11 13 2 3924 0 995 3043 2915 255.80 36 94.07 CHANGED pAllLGllpGlTEFLPlSSoGHL.llssplluhp................s.uhsFslllQlGollAVllhFhcclhplhtshhpt...................................tptst+hhhhllluslP.ssllG.ll.hcchIc.....shh.t..shh..........h...luhsLllhGlllhhu-phspp......................hcshcplshpsAlhlGlsQslA.L.l.PGsSRSGuTIsuuLhlGhs.RcsAAcFSFlLulPshhuA..uhhclh....................c.hhp...........hss..s.sh....h.lhlGhls...uFlsuhlslchhlcalp+p...shhsFuhYRlllG....lll ................................................................................AllLGlVEGlTEFLPlSSTGHl.Il.s.s.c.l.lshp...............................shspsFpllIQLGulLAVllhahc+lhtlht.thhtt.........................................tptshphhhpllluhlPusl..l.G....ll.......hc..D..h..Ic............shh........ssh..................s..VuhsLllsG.llhlhs-...phppp.........................................................................thpslccloappAhhIGhh.Q.sLA.l.l.PGhSRSGuTIsGGllhGh.s.Rp.sAu-FSFhLul...PshhGA.ss.lclh.........................K..hhph..............hss....s...sh...........shh.hlGhlsAFlluhluI+hhlpalp+t....satsFuhYRlllGhl............................... 0 336 666 853 +83 PF01011 PQQ Bacterial_PQQ; PQQ enzyme repeat Bateman A anon Pfam-B_1319 (release 3.0) Repeat The family represent a single repeat of a beta propeller. This propeller has been found in several enzymes which utilise pyrrolo-quinoline quinone as a prosthetic group. 20.30 20.10 20.30 20.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.89 0.73 -8.05 0.73 -4.17 130 3998 2012-10-05 17:30:42 2003-04-07 12:59:11 16 133 1153 135 877 8732 2893 36.40 26 10.04 CHANGED sthhhs..shsGtlhAlDspT.GchhWphpsssss.stshs ................h.hh...shsu.lhAlDhpT..GchhWphphssss.................. 0 220 462 667 +84 PF03704 BTAD BAD; Bacterial transcriptional activator domain Yeats C anon Yeats C Family Found in the DNRI/REDD/AFSR family of regulators. This region of AFSR (Swiss:P25941) along with the C terminal region is capable of independently directing actinorhodin production. This family contains TPR repeats. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.57 0.71 -3.89 64 2191 2012-10-11 20:00:58 2003-04-07 12:59:11 12 200 672 3 901 2383 154 140.00 25 19.33 CHANGED lDlppFcch.sttGptshttG.c.tpAsptLppALuLW+GssLuslts......tshhpspst+LcEtRlpsh-tph-....scLpLG...Rtp..chlsELpsLlspaPh+EphatpLMhALaRsGRpu-ALpsYcclRptLs-ELGl.-PuspLppLpptlL ..................................................................hDh.tFtth...htt..u..thh..t.t....t....p............t.t.uh..th..h..pp..AL..s.........L.........a..+..G...s.s.Ls.s.hss....................h..h...p...s..t..t.t....p...L..c....c...h...pl..p....s..hc..p...hs..c.....ttl.thG..............ct.s......ps..ls.t..L.pp.lls........tcPh..c..EphhttLhtALh.psGR...p....uc...ALps.Ycch+phL...t.c.E.LGl.-.PusplptLhptlL.................................... 0 399 719 858 +85 PF01426 BAH BAH domain Bateman A, Aravind L anon [2] Domain This domain has been called BAH (Bromo adjacent homology) domain and has also been called ELM1 and BAM (Bromo adjacent motif) domain. The function of this domain is unknown but may be involved in protein-protein interaction [3]. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.87 0.71 -4.35 41 2185 2009-01-15 18:05:59 2003-04-07 12:59:11 13 109 308 34 1324 2175 2 128.50 19 14.46 CHANGED tphplG-hVhlpsss....................................scs.....hhlsplhcl.......................................................................................................................................hpsssss.......thlc..spha..........hRPp-s....hhspthsp.......p..Elahosc...ptsh.hpslpu+CpVhhtsch.stp..h....................tsshF...aCchhYss.pptsFppls ...............................................................................................................h..hpl.G.DhVhl.psps.......................................................................sp.........h..lu..cIpcl.......................................................................................................................................hpsppsp................hh.hp...spWa...............................................h+Pc-s...............t.tcthtp..............................pElF.h...osp..........h-shs....h.s.s.l..h.GK..CpVhh.h..p..c.....h.pp.h...psht..................................................ppcsa...hhphhYs..pptth..................................................................... 0 367 643 1003 +86 PF01145 Band_7 SPFH domain / Band 7 family Bateman A, Finn RD anon Bateman A Family This family has been called SPFH [1], Band 7 or PHB domain. Recent phylogenetic analysis has shown this domain to be a slipin or Stomatin-like integral membrane domain conserved from protozoa to mammals. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.28 0.71 -4.37 120 13903 2012-10-01 22:02:33 2003-04-07 12:59:11 20 43 4514 5 4366 10740 5942 188.90 21 54.61 CHANGED hhlss..................sphull...hp..hGchpp..............................shpsG....................hph.hhP....................hhpphh...hhshp.hpphphss................................................sshopDt...........hslsl...shsl.pa..+l..........................scshphhtph..............t.ps.......hpphlpshlpssl+shl....uphsh...p-lh.........ss.....................+.splspplppplp..........................................................pp.....................hpphG......................lp.lh..slp.lpclphsp...phtps...lppphtupppt......................ptphtpuctcs ..............................................................................................................hVtpsptull..........hp........hG+hpp..................................hh.psG....................................lph..hlP.................................hl..cplt......................hlshc...hpshchss..............................................................................................................................................psl.Tp...D.p................................ssl.p.l.......ss.s..l..ha....+l................................pcstph..hhsl.........................................................ps..............hc.p.t....lpp....h...spss...L....Rsll..............................Gph..sl.............-cll....................................................sp...........................................R.pp....ls.pp.l.pp.t..ls........................................................................................pp............................h..s..s...a..G.....................................lp..lh.....clp...l.p.......clp..sp..........plppu...............ht.pph.pAcppp......................pAthhpAcu..t............................................................................................................................................................................................................................................................................. 0 1454 2620 3579 +87 PF03594 BenE Benzoate membrane transport protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.30 20.30 20.30 20.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.20 0.70 -5.91 9 1251 2012-10-03 01:44:59 2003-04-07 12:59:11 8 2 1036 0 255 994 220 346.90 45 93.78 CHANGED sshs.SsllAG.hlAslluYuuslsIhapAA...psAtsossQhuSWlhululuhulsulhLShRa+sPlloAWSTPGsALLlouhsshslsEAlGAalVuuhLlhLsGlousFs+llppIPtulAuAMhAGILhtFulpshpAlsspPhLshsMlhsYLLsRpauPRYuVhhlLlsGlshuhhhGphphsslsh..clupPpalsPsFShtAhlsLALPLhLVuhsuQ.lPGhAlL+usGY.psPsuPllssTGLAShlsAPhGuholsLAAIoAAICpGP-AH.Dss+RYhAulhsGhFYllhGlFuuolluLFuuLPtsllshLAGLALLGulusuLttAhp--ppR-..........AAllTFllTASGhohlGlGuAFWGLlhGhlshhl .......................................s.sslhAG.hlAlLlGYuS.ShsllaQAA...tsuG...AosuQluuWhhALuluMGloolh.Lo.l..hYRsPl.lsAWS.TP.....G...A...ALL...l.o..u..lt..G..h........o.....h.s....-AlGsF.l....lsssLlllsGloGhFsRLhp.h.IPtulA..uAMLAGI...LLp.............FGl..psFsu...lssp.s...L....shsMlhsaL.....l...s...+...th...s...P...R..Y..........A.....l.....lu.....s.h.....lh.....Gl...s.....l...s....h..h.....t....G...p.l..s..h...s.s......l.t.h.....ph.s...hP..sa..l...sPpFS.hushlul.A.l.............PLFlVTMASQNhPGlAsh+AsGY.....p..........sPsuPllshTGLhuLlhuPFGsaulslAAITAA.IC.tus-AH.D.scRahAuhssGlFYllsGlFGuslsulhsALPhshlthLAGLALLuoIuuuLhpAh.p..s..p.cpR-..........AAllsFLVTASGloLhGIGSAFWGLluGhlshh.................................................................................................. 0 54 114 193 +88 PF02944 BESS BESS motif Bateman A anon Bateman A Motif The BESS motif is named after the proteins in which it is found (BEAF [2], Suvar(3)7 [3] and Stonewall [1]). The motif is 40 amino acid residues long and is composed of two predicted alpha helices. Based on the protein in which it is found and the presence of conserved positively charged residues it is predicted to be a DNA binding domain. This domain appears to be specific to drosophila. 20.80 20.80 20.80 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.34 0.72 -4.37 43 441 2009-01-15 18:05:59 2003-04-07 12:59:11 15 14 44 0 223 465 0 36.70 27 9.80 CHANGED pDsDp.hFLhSlhPtl+pLsspp+hch+hclhpllhch .....ssDp.hFLhSlhPhl+pLss.pp+hch+hclhpllh-...... 0 53 68 167 +89 PF02369 Big_1 Bacterial Ig-like domain (group 1) Bateman A anon Bateman A Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in bacterial surface proteins such as intimins and invasins involved in pathogenicity. 30.40 30.40 30.40 30.40 30.30 30.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.43 0.72 -4.10 20 5706 2012-10-03 16:25:20 2003-04-07 12:59:11 11 124 515 5 385 5116 64 96.30 26 34.44 CHANGED hls..phpAsloplh.......Asss-ssTlTAoVpDtsGsPlssppVoF.....ssstssLsss.....shTcssGhAploLouo......psGstsVoAols.sssss.pspsV .................................s.....tuph.s......h.h...t....hhA..s...sss.s.so.lpA..sVp.D...s...p.G...N.s.l..s..s..t.sVsF............ssssss...ls.s.s......................spTss.sGh.A.p..l.o..l....o.u.s.........p.s..G.s..hs.V.oAo.ls..ssssp.p.................................. 0 82 201 296 +90 PF02785 Biotin_carb_C Biotin carboxylase C-terminal domain Griffiths-Jones SR anon ref [1] Domain Biotin carboxylase is a component of the acetyl-CoA carboxylase multi-component enzyme which catalyses the first committed step in fatty acid synthesis in animals, plants and bacteria. Most of the active site residues reported in reference [1] are in this C-terminal domain. 20.70 20.70 20.70 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.09 0.72 -4.06 109 10242 2009-01-15 18:05:59 2003-04-07 12:59:11 14 97 4650 113 3077 8078 3751 106.20 38 14.54 CHANGED EsRlhAE..DPtps.....F.hPus.G.plsthp.h...............P............susulRlDou.ltp.....Gs.plssaYDsMlAKlIsa..ussRppAlp+hppAL.schp..ltG..lpTNlsaLppllppspFtsu.....phsTsal- ................................................EsRIsAE......DPtps.........F..hPus.G.......c.l.p..php..h...................................P.........................uG..GV...RlDou..l..h.s................Gh..slsPaYDSMluK.l...............Is..a......G.p..s.....Rpp.Alt+hppAL..p.E..h..h.............l..c.....G.......l......p......T.Nlsahh.p.llps..sF.tsG.........shsTpal-....................... 0 942 1879 2586 +91 PF02012 BNR BNR/Asp-box repeat Bateman A anon Bateman A Repeat Members of this family contain multiple BNR (bacterial neuraminidase repeat) repeats or Asp-boxes. The repeats are short, however the repeats are never found closer than 40 residues together suggesting that the repeat is structurally longer. These repeats are found in many glycosyl hydrolases as well as other extracellular proteins of unknown function. 23.00 12.80 23.00 12.80 22.90 12.70 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.75 0.76 -6.08 0.76 -3.20 188 830 2012-10-02 00:45:24 2003-04-07 12:59:11 15 46 547 34 224 1738 836 12.00 57 2.20 CHANGED hhSpDsGpTWpt ...hSsDsG+TWp... 0 106 173 212 +92 PF00528 BPD_transp_1 BPD_transp; Binding-protein-dependent transport system inner membrane component Bateman A anon LMB bacterial genome group and Prosite Family The alignments cover the most conserved region of the proteins, which is thought to be located in a cytoplasmic loop between two transmembrane domains. The members of this family have a variable number of transmembrane helices. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.97 0.71 -4.78 81 156339 2012-10-03 05:18:07 2003-04-07 12:59:11 17 117 4892 36 36523 106567 36248 195.90 15 65.39 CHANGED ulhlGhhsu...hhhsphhcphlhshh.hhh.....slPshh.......lhhl......lh..............................................shht.tshhs.h..lhlhhhhhsshshhhptthlp.tlspshhcsucshGhsphphlhphhlPsuhsslhsshhhshsts.ltssshhphlhs.........lGhhhhpuhhshshs.h.................................hhhhhhslhhlllsllhshlhphls.+hpt ............................................................................................................................................................................................h.hGh.hhu.................h..ht..h....h....h....p..t....h...h...t...h...h...h........l...h.h.........s.l..P..sll............................h.hhh.................h.h.hh.....................................................................................................................................................................................................................h.t.h.ht....h.....s...h...h...s....s..........l..l...s....h....s....h....h...h......h.......s......h......h.......h...h......h....h..t....s....s....l.p....s....l...s..p...s....h.......h-..A............A.p.s.....h.G.s.............s.p....h.p.hh.h.clh..L.P...t..s...h....s...s...l.l....s...s.h...h...h.s.h...s......t...s....l...s......s......h....s...h...s...t......h......l........s......................................h.s.t...h...h....h....t.....s.......h.....h.....t....h....t...h.s.hh.......................................................h.hh.h.s..h.s..l.h...h...l...l...l..s.h.l..h...h.h....t.h.h......t............................................................................................................................................ 0 10630 22134 29422 +93 PF02237 BPL_C Biotin protein ligase C terminal domain Bateman A anon Bateman A Domain The function of this structural domain is unknown. It is found to the C terminus of the biotin protein ligase catalytic domain Pfam:PF01317. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.15 0.72 -4.27 50 3072 2012-10-01 19:11:18 2003-04-07 12:59:11 12 11 2996 75 760 2212 402 47.40 30 14.56 CHANGED hlGcpVplpt..tstpl..pGhspuI.DcpGtLllctssu.............hctlhuG-l.ht ...................lG+tVplhh.....ssppl.....pGlupuI.DcpGtLllcpssG.......................hp.s.lhuG-lsl................ 0 221 459 621 +94 PF03099 BPL_LplA_LipB BPL_LipA_LipB; Biotin/lipoate A/B protein ligase family Bateman A, Reche P anon Reche P Domain This family includes biotin protein ligase, lipoate-protein ligase A and B. Biotin is covalently attached at the active site of certain enzymes that transfer carbon dioxide from bicarbonate to organic acids to form cellular metabolites. Biotin protein ligase (BPL) is the enzyme responsible for attaching biotin to a specific lysine at the active site of biotin enzymes. Each organism probably has only one BPL. Biotin attachment is a two step reaction that results in the formation of an amide linkage between the carboxyl group of biotin and the epsilon-amino group of the modified lysine [2]. Lipoate-protein ligase A (LPLA) catalyses the formation of an amide linkage between lipoic acid and a specific lysine residue in lipoate dependent enzymes [3]. The unusual biosynthesis pathway of lipoic acid is mechanistically intertwined with attachment of the cofactor [5]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.78 0.71 -4.11 66 11826 2012-10-02 14:22:40 2003-04-07 12:59:11 14 48 4791 113 3031 8364 4175 120.70 19 40.71 CHANGED huppt.sst............hthpthppsslhlsccpouGp.....tsacs.hu.slhaollht.t....p...h..h..shthshssh.csL.....................t..s..h......h.sDlhh............ss+Kl..uGlhhch......pt...ttthpthslslshs ................................................................................thh...p.p...s.....s...l......l.......s..c.p..p..o..G....Gp..............ts.a...p..s..............ts...s...lhhol.....l.....h......h......t.......p...t.......................................htth...........shhht.h.shl..csL.........................................................t..u.s.p.sp.......hp..h.N.D...l..hl.........................................ss.+.Kl......uGlhhch................pp......sst.hp....t..h.s.lslshs........................................................ 0 1020 1916 2578 +95 PF02485 Branch Core-2/I-Branching enzyme Mian N, Bateman A anon Pfam-B_842 (release 5.4) Family This is a family of two different beta-1,6-N-acetylglucosaminyltransferase enzymes, I-branching enzyme (eg Swiss:Q06430) and core-2 branching enzyme (eg Swiss:Q02742). I-branching enzyme is responsible for the production of the blood group I-antigen during embryonic development [1]. Core-2 branching enzyme forms crucial side-chain branches in O-glycans [2]. 23.40 23.40 24.30 23.50 23.10 22.30 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -5.22 47 1645 2009-01-15 18:05:59 2003-04-07 12:59:11 16 15 375 10 869 1568 50 236.40 20 55.03 CHANGED lAFhals.+GsLPhh.lW-pFhp...sH.cs..haslYlHsp.ss.hpp.h.t........sshF.sR.Is.SptVsaGphohlsA-++LLAsALh.D..sNchFlLLSpSClPLhsFsplapalhps..spSFl-shspss.ttp.sRas.p......hhP..clphp...............caRKGSQWhtlsRphAhhl..l..tDshhashFppaC...................................ssChsDEHYhsTl.........lshh..hsstssNpol.......TalcWs........pttsHPtpat....hpslo.chlpplp ......................................................lAahhhs..p...s....h.hh....hhhphh.....h....t...............pshas.lalDtc....s..t.ht.p.th.t......................ps.h.....s.s..h.h..h...s.......p..........t..l..ha..ush.ohlpAphphh..........tsh..........L.....p......................s......h.....s......h....caal.L.SusD..hPlpo.p...p...........l.h...phl..........p.....t...........................t......p...p.................s.....h..h...p..p..h...p......t..s............s...h.h...........h.p....t+....hp.h.............hh......hthp.................................................................thphhtG....S.t............Wh..sLo.....Rpas....pal...l..........pD..ph...hhphh...phhp....................................psh.h..s...-Epa..atTl........................lp.....t........ht.....p.....s..h....h...s.ps.l.............................phh...pWs.............................tph.......t.hs.t.h....h.................................................................................................................... 0 213 446 651 +96 PF00533 BRCT BRCA1 C Terminus (BRCT) domain Bateman A anon [3] Family The BRCT domain is found predominantly in proteins involved in cell cycle checkpoint functions responsive to DNA damage. The BRCT domain of XRCC1 forms a homodimer in the crystal structure. This suggests that pairs of BRCT domains associate as homo- or heterodimers. BRCT domains are often found as tandem-repeat pairs [2]. Structures of the BRCA1 BRCT domains revealed a basis for a widely utilised head-to-tail BRCT-BRCT oligomerisation mode [3]. This conserved tandem BRCT architecture facilitates formation of the canonical BRCT phospho-peptide interaction cleft at a groove between the BRCT domains. Disease associated missense and nonsense mutations in the BRCA1 BRCT domains disrupt peptide binding by directly occluding this peptide binding groove, or by disrupting key conserved BRCT core folding determinants [5]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.45 0.72 -3.83 114 8489 2012-10-02 11:51:29 2003-04-07 12:59:11 21 297 4740 142 3410 8863 1950 78.30 25 11.50 CHANGED tttphhpshphhl.......tthpphp+ppl...pphlpphGupl.........................tsph.sppssall....ssps...................tt.tc.............httshphshtlls.pWlhcsl .............................................................................................s.....htGhshll...........sush.p..p..h.s.R..s.ch..............cph.l.p.p.h.G.u.+l................................................................................ssol....o...c.c...T..s...a.ll..............sGcs................................................su..sK.......................................ht.cA..p........p....h....G......l...p....l..l.s-p.hhp............................................................................................. 0 1124 1906 2771 +97 PF04089 BRICHOS BRICHOS domain Sanchez-Pulido L anon Sanchez-Pulido L Domain The BRICHOS domain is about 100 amino acids long. It is found in a variety of proteins implicated in dementia, respiratory distress and cancer. Its exact function is unknown; roles that have been proposed for it include (a) in targeting of the protein to the secretory pathway, (b) intramolecular chaperone-like function, and (c) assisting the specialised intracellular protease processing system [1]. This C-terminal domain is embedded in the endoplasmic reticulum lumen, and binds to the N-terminal, transmembrane, SP_C, Pfam:PF08999, provided that it is in non-helical conformation. Thus the Brichos domain of proSP-C is a chaperone that induces alpha-helix formation of an aggregation-prone TM region [2]. 21.10 21.10 21.10 21.30 20.90 20.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.12 0.72 -4.12 40 591 2009-09-14 09:13:14 2003-04-07 12:59:11 9 13 95 6 287 508 1 93.50 25 36.19 CHANGED huuucsushlaDappslsAh+shstppCYlhphspstlPshpsLtchhhph......ptpsth.ssphhcpphh.....sspplpDhshLG.tI.tpLCtshPhYhl .....................ssucsushl.aDFp..ps..loAhhs.h.......shp.......pCalhthsps.hl.s.cslhchhhph...................pttshh.p..ohh..hpcphh.....sspplp-...h...s..L.G...hI.hpL..CpshshYhh....................... 0 42 57 123 +98 PF04427 Brix Brix domain Bateman A anon Dlakic M Domain \N 28.10 28.10 28.30 28.30 27.70 28.00 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.40 0.71 -4.64 149 1923 2010-01-08 13:34:18 2003-04-07 12:59:11 13 24 441 1 1305 1849 23 196.60 24 58.96 CHANGED l..hsu................p...psspphpphhcDLpplh..Psup........phs+tp......h.php......chhchst.ps....ssslllh..pppct.psstlhls+h.....spGPoh....pFp............l......ps...hphhc-l..........t...............Phllhsshtsp....................hphlpphhpshFss........hs..t.......phc+llshp.t.....................p.........pch................................IhhRpa.h...............................p........................................................................................................hpLpElGPRh.slc...........lhcl ................................................hpup..pss.tphpphhp-lpt.....lh...Psup.....php+tp...................t..plp....................phl.phsp.ps....soslllh..............pp....p..cp.....p....s.......ss......L......hls+h........PpG.Poh........pFpl......pshphhc.-.lhtthptsh.....spp..................................................PhLlhsshtsp........................hphltp.hh.tp.lFss...........sh.............thc+Vlshs.t...................p...cch.................................................................I.hRpa.h.......hhp.............p..............p.................................................................................................................................................................hpLtElGPRhslplhc........................................................................................................................... 0 469 738 1083 +99 PF03097 BRO1 BRO1-like domain Bateman A, Kim J, Mistry J anon Bateman A Domain This domain is found in a number proteins including Rhophilin Swiss:Q61085 and BRO1 Swiss:P48582. It is known to have a role in endosomal targeting. ESCRT-III subunit Snf7 binds to a conserved hydrophobic patch in the BRO1 domain that is required for protein complex formation and for the protein-sorting function of BRO1 [2]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.12 0.70 -5.71 70 1087 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 297 31 739 1088 8 341.70 21 47.18 CHANGED shlslshKcop..pl-..hspsLppaIpppY...spsssh...a.....pcclpplspLR....pshhs.........psstsulch........................LhcYaspLphLpt+h..P....sppht.l...........pFsW.aDuh...........................tpst......shspps..ltaE+uslLFNluAlaoplAspp..........sp...p.ss-GlKpAsshFQpAAGsF....palp-s...........hhpsPoh....D..lstpsLpsLtpLhL..........................AQAQEshhtKslt...s................shcsullAK.......LuspsuphYpps.........hpshppsshtt...............................................hp........tpWhshlphKtta......apAlApYatuhsh.....................................pppppaGct.......l...................................Ac...............................LptAhptlp-Ahphtth...................ttttlhpslpthpsplppcLpptp+D.........NDhIYhp.lPs.ssls...slt.shshs+sl.shsp.h............pt.p...ssclFppLlPhsltput......ohas-cpsphlppph ............................................................................................................................................+....s......hp..h.....h.thl.t.h...tp...................h............tpth.t.thtt.hR....................pthht............t...thhph...................................lh.pYhs..L.hl....t..+h.....s..............tpp.t...l.............................................Fp.W...hssh..............................................................ptt.................hs.ps.....ltaEhsslLaNluulhophus.pt................................sp.....s..sp.p...u.h....+.t.A...hphap....p......A.A..Gha........palpcp..............................................h..psss.......D.....hs.pslp....sL.plhL................................................................A.QAQE..shh.tKsht.......p...........................phpssl......lA.+.......lus...ps...sphYppu..........hpt.hp..p.....s..s..t..t..................................................................................................................h.........ppWhthl........phKtth......atAhAph..atuhth......................................ppppph...Gptl...........................................................up...............................L.ptA.phhp.puht.tt.............................................t...h.t.hpthh....ptlp...p.phpphp+-.........N-hlY.h.p.lPs......t.h...................ht...h.hst..sh......................................a..thhP.th....t.ht...........h........................................................................................................................................................................................ 0 248 387 590 +100 PF00439 Bromodomain bromodomain; Bromodomain Finn RD anon Prosite Domain Bromodomains are 110 amino acid long domains, that are found in many chromatin associated proteins. Bromodomains can interact specifically with acetylated lysine [3]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.74 0.72 -4.06 69 7921 2009-01-15 18:05:59 2003-04-07 12:59:11 20 370 355 282 4778 7646 90 84.20 26 10.39 CHANGED hpplh........p...phhppt...st.F.t.......stpthssYhp.......hl...ppPhsLspIpp.......+l...csspYp...s...................................h..tpahpDhphlhpNuhpa........st.tss.hhp....tupp ................................................................h...........h.ppp........huh...s...FhpPV........................stp..p..h..P..-.Y.a.c.........................lI............+p.PM....DLs.Tl.c.c..................................+l...............cs..p....p....Y.p.......s............................................................................................................................h...pca.h..........p.....Dh.pL.......hhpNshtY.............Nt..sso.lhp.u..h............................. 0 1623 2367 3612 +101 PF03909 BSD BSD domain Yeats C anon [1] Domain This domain contains a distinctive -FW- motif. It is found in a family of eukaryotic transcription factors as well as a set of proteins of unknown function. 20.40 20.40 20.70 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.85 0.72 -4.23 81 1184 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 308 4 822 1161 12 60.20 21 16.77 CHANGED hst.ppph.sh...t.p........tcphpplLppsss.LpphhpchV.......PptlscppFWppYFhththhptptspp ....................................s......ph.ph...p.p........hc.hppllppsPt...lpphatchV............Pptls.c....p.p....FWpp.aFhthhhht.ptt................ 0 250 448 674 +102 PF03092 BT1 BT1 family Mifsud W anon Pfam-B_1804 (release 6.4) Family Members of this family are transmembrane proteins. Several are Leishmania putative proteins that are thought to be pteridine transporters. One such protein Swiss:Q25272, previously termed (and is still annotated as) ORFG, was shown to encode a biopterin transport protein using null mutants [1], thus being subsequently renamed BT1. The significant similarity of ORFG/BT1 to Trypanosoma brucei ESAG10 (a putative transmembrane protein and another member of this family) was previously noted [2]. This family also contains five putative Arabidopsis thaliana proteins of unknown function. In addition, it also contains two predicted prokaryotic proteins (from the cyanobacteria Synechocystis and Synechococcus). 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 433 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.50 0.70 -5.77 12 726 2012-10-03 03:33:39 2003-04-07 12:59:11 11 11 173 0 486 924 508 341.70 21 71.34 CHANGED pGlus.lhphss.hhhp-chGlssuthQtLsslushsWslKshhuhlsDs.ashhGYp+R.YhhlSslhG.h.uhhauLLsuh.sSsshAuhhlhLuohuhA.sDlls-uhhschhRppPps..usuh.ShhWhh.hlGullushhsGsLs-thtsphshhloAsl.hlshl..................suhhlhcp..........................................................................................Eshthscs...sphhs.....+cslsp.W+hhhh.............ssIhtssL..............hhshashslslspA...................hFYhsTspht.....FohpFhspVt.llGsluuLlGVslasthhpphsaRhhhhloslhpsluulhDlIlVc+hNhhlGIsDa.hallGDullhplshhltaMPhlVLhuRLCPpG.EuslaALlhuhhsLGpssSutLGulLhcahh...hTpss.....asNLshLlllsslss.LlslPLshLL.s.t...c.hDtsschspcts ........................................................................................................................................................................................................................................h..hsh..hhpp.hthp..ss..t.hth.....ht..shh.t.h.PWs.h.Ks...hhGh.loDs..h..s..l..h.G.a...+.....R+sY.h.h.l..u....t...hl...s.......h....h.........h..h............h.u..h..h...............s......t..............................................h.............................h......h......h..........h..h....h...h......s......h....s..hs..........h.....-.....l.h.-..uh...h..s.......p.......................t...............s..................................t............s............t............h.........................o...hh......h.h.........h...u.s..l.....h..........s............h.h.....s..........G........h.......h.......p.................h.......................p..................h.......h..h............hh...h.h...h............................................h...h........................................................................................................................................................................................................................................h.t...hthh.....................................................lh.shh........................................hh.h.h...h.........ts...........................................h..ah..t....h..............h.s...............hhs......hht..hhs.......thh.hhuhhhap...h..h.....t..p...........h...s....h..+.............h..h....h....h..t.h.h.....h...h..h.h.h.........l......t...h.s.....h........h............ls....st..hah.hh..t.hl.....phh..hthhPhhhh.........sphsP...u......E..ushauhhhuh.shu..h.ut.hu.h.l.thh..............hstt................aptl.hhlhht.h....hhsl.h......hl..........................tt.................................................................................................................................................................................................................................................................................................................................................................................. 0 200 338 460 +103 PF00651 BTB BTB/POZ domain Bateman A, Bardwell VJ anon Prosite Domain The BTB (for BR-C, ttk and bab) [1] or POZ (for Pox virus and Zinc finger) [2] domain is present near the N-terminus of a fraction of zinc finger (Pfam:PF00096) proteins and in proteins that contain the Pfam:PF01344 motif such as Kelch and a family of pox virus proteins. The BTB/POZ domain mediates homomeric dimerisation and in some instances heteromeric dimerisation [2]. The structure of the dimerised PLZF BTB/POZ domain has been solved and consists of a tightly intertwined homodimer. The central scaffolding of the protein is made up of a cluster of alpha-helices flanked by short beta-sheets at both the top and bottom of the molecule [3]. POZ domains from several zinc finger proteins have been shown to mediate transcriptional repression and to interact with components of histone deacetylase co-repressor complexes including N-CoR and SMRT [4,5,6]. The POZ or BTB domain is also known as BR-C/Ttk or ZiN. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.48 0.72 -4.08 96 15670 2012-10-02 01:20:04 2003-04-07 12:59:11 26 989 511 79 10101 15120 88 105.10 20 20.57 CHANGED hsp.pppt...hsDlslhlt.......pphpuH+slLuupSsYFpshFpsp.................tpts...........l.hpslssp..shptlLcah..Ysupl.h.............tpsltplLphA.phhpltslhptCpphlhpph ..............................................................h.pt....hs.D..ls.l..hlt...................tpph....sH......+...........s................l...............L...u....u...p.......S.s......a..F.....c..s...h..hpss........................................tcpp...........................tlp.l......p...s.......l......s.......s..p...........s...hp...t.l.....L..c...a..h.......Y...o....u...p...l.phs..................................tps....l...........p....l...l......t...s...A...p....hh......p..l......t..t..l.hphstphh....h..................................................................... 0 2968 4235 7306 +104 PF03437 BtpA BtpA family Finn RD anon Pfam-B_4453 (release 6.6) Family The BtpA protein is tightly associated with the thylakoid membranes, where it stabilises the reaction centre proteins of photosystem I. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.39 0.70 -5.13 6 562 2012-10-03 05:58:16 2003-04-07 12:59:11 10 3 505 0 178 689 382 246.50 36 93.56 CHANGED pcKPlIGVVHLhPLPGSspasu...........sLstVID+Alp-Apslp-uGhDAlIlENaGDtPa.Kp.Vs.tTVuAMolIssclpp-VulPlGINVLRNDuluAhuIAhulsAcFIRVNlLoGsthoDpGIlEGsAtELh+h++hL....su+.lclLADVtVKHAhahus..lpsslhDTlER..uhADAVIloGpsTGucsDl--LchAKcsss...sPVllGSGVs.cNlpphhphADGhIlGThlK+sGph.N.lDh-Rspplschscc ...............................................................................................................................pKslI.uMlHLpsLPGsPtass...........shptll-+Ahc-htsLpsGGVDulhhpN.ashP.a.h..p..c.....l.t.scssuuMuhl...htpl..tp.p.l..p........l.P.....h.....GVNVLh.DshuuhslAhAssA.cFIR......p.ha.sG.sas.uD....Glh.-.sssu-hlRa...p+pl....s.As.pl+llh.slhsctu..sh..L...u..s........c..s..l..........s..p.h.s+.s.sh.p...stsDAlh...Vo...G...hs.sG..s....p....s....s.........t...........L....c....p.l..+csss..........tsPVl.ssoGV.s.h.-.N..l.pc...L.s.h..ADGsl...luos.h....Kc....c...G.hhtN.....................V...D.tRVppFMctlp.p........................... 0 60 104 151 +105 PF03131 bZIP_Maf bZIP Maf transcription factor Mifsud W, Eberhardt R anon Pfam-B_482 (release 6.5) Family Maf transcription factors contain a conserved basic region leucine zipper (bZIP) domain, which mediates their dimerisation and DNA binding property [1]. Thus, this family is probably related to Pfam:PF00170. This family also includes the DNA_binding domain of Skn-1 (Swiss:P34707), this domain lacks the leucine zipper found in other bZip domains, and binds DNA is a monomer [2,3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.04 0.72 -3.69 15 969 2012-10-02 13:17:30 2003-04-07 12:59:11 12 8 158 8 512 2370 5 91.10 34 20.20 CHANGED hSD-cLlohoVRELNRpL...+GhocEElh+LKQ+RRTLKNRGYApsCRhKRlpQ+csLcpc+scLppplcpLppEhuthppEpDulpt+hptLt .......................................os-pllshsVc-h...NchL........ctLo..c-plthl.....+.p....hR...Rp..h...KNR..shAQs....CRp++l...pphppLE....p...-h.pp.......L.ppc.h-cLh.p.Ep....s.p.h.tp.phcth+p+hpth.h................................ 0 105 151 298 +106 PF00168 C2 C2 domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.90 4.50 20.90 6.10 20.80 -999999.99 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.89 0.72 -4.14 382 23398 2012-10-10 12:23:49 2003-04-07 12:59:11 25 644 543 119 13594 21474 315 86.10 20 17.18 CHANGED LplplhpApsL.shc..........................................hps..psDPYVplplts.......tp...........................h+Tcshcps..hNPhWs.....Epatap..h..sphpp......LplpVh.Dpcthst...............cchlGpsp ............................................................................................................................................ltlhpu....p...sL.....s.h.s....................................................................................hts....ts..D....P...a.l.p..l..p..lt.s...........pp......................................................................................t+T......c...s.....h....p.....p....s.....h.........s..P..h....as..............-.p..h..t.ap...h.............t.p.hpp.......................Lp.l.p...Vh....D..t..c.t.htt...................schlGph.t............................................................... 0 3560 5729 9404 +108 PF02743 Cache_1 Cache; Cache domain Bateman A anon [1] Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.49 0.72 -4.42 19 6574 2012-10-01 23:40:40 2003-04-07 12:59:11 13 246 1855 16 2046 5858 159 78.30 20 11.74 CHANGED lT-PYh-ss...........ssphVlThuhPlhs............ttphhGVluhDlslcsLhphhpplplGtpGYsFllstsGpllsHPspcshscpttt. ............................................................otsYh..s.t............................st..p..h..h..l..oh..u..t..P.l..hs.....................................................s.u.p.h..h...G...V....l..u....h....D..l....s....l....s........p....l....t...p....h....l...p....p.....h.......p..........h......s...p..s....G....a.s....h.l...l..s.....p.....s.....G.p..ll.s...+..sp.p.........h................................ 1 687 1216 1644 +109 PF04857 CAF1 CAF1 family ribonuclease Bateman A anon Pfam-B_1567 (release 7.5) Family The major pathways of mRNA turnover in eukaryotes initiate with shortening of the polyA tail. CAF1 Swiss:P39008 encodes a critical component of the major cytoplasmic deadenylase in yeast. Both Caf1p is required for normal mRNA deadenylation in vivo and localises to the cytoplasm. Caf1p copurifies with a Ccr4p-dependent polyA-specific exonuclease activity. Some members of this family include and inserted RNA binding domain Pfam:PF01424. This family of proteins is related to other exonucleases Pfam:PF00929 (Bateman A pers. obs.). The crystal structure of Saccharomyces cerevisiae Pop2 (Swiss:P39008) has been resolved at 2.3 Angstrom…resolution [3]. 23.50 23.50 23.60 24.00 23.30 23.10 hmmbuild -o /dev/null --hand HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.68 0.70 -5.06 27 1134 2012-10-03 01:22:09 2003-04-07 12:59:11 15 22 328 16 721 1069 11 271.30 26 67.58 CHANGED pcVWppNhpcphphlpphlcphs...alAhDTEFsGllscshtp......htsss-.pYptL+pNVsthpllQlGLohh.....sppuphss..........shs.........................sapaNFp.Fshcp........chh.stcSIchLpppGhDFpcppcpGlshtphs.......................................................................................................................................................................................................................................................................................................phlhsSsllhst.p..lpWlsapusYDhuaLl+llp.s......tpLPppht-FhphlpthF.Pp....lYDlKhlhp..h..........................................................................hp...................ph....uLpclA-tLplpR.......................................................................................................sGp.tHpAGuDoLlTstsFhc ...................................................................................................................................................................................................-VhtpNhppp.hthlpp..h.l..p..phs....alAh........Ds.EFs..Gls....s..p..shst................htsss-.p.Y.p...t.l+p..s....s.c..hh.pl..lQlGlohh...............sppsphs..........t.hs...................................................................................sapFN.Fp...Fshpp.........................chh..stsSlph.Lt......p......p......G.h....s......Fpch..pp..G..Ishhphs..................................................................................................................................................................................................................................................................................................................phh.h..h.Ss.l..l.ht.......p.....h.h.h.lsa..p..uhhDh.salh+hhhs..............sLPps...t.pFhphl...phhF..Pt..........laDhKalhp..t......................................................................................................................................................................t.psuL.pplt..c.t.L.thpp...........................................................................................................................................................................................................................................s....HpAG.DuhhTu.sah........................................................................................................................................ 0 236 379 557 +110 PF03135 CagE_TrbE_VirB CagE, TrbE, VirB family, component of type IV transporter system Mifsud W anon Pfam-B_843 (release 6.5) Family This family includes the Helicobacter pylori protein CagE Swiss:Q48252, which together with other proteins from the cag pathogenicity island (PAI), encodes a type IV transporter secretion system. The precise role of CagE is not known, but studies in animal models have shown that it is essential for pathogenesis in Helicobacter pylori induced gastritis and peptic ulceration [1]. Indeed, the expression of the cag PAI has been shown to be essential for stimulating human gastric epithelial cell apoptosis in vitro [2]. Similar type IV transport systems are also found in other bacteria. This family includes the TrbE Swiss:P54910 and VirB Swiss:P05353 proteins from the respective trb and Vir conjugal transfer systems in Agrobacterium tumefaciens. Homologues of VirB proteins from other species are also members of this family, e.g. VirB from Brucella suis Swiss:Q9RPY1. 20.50 20.50 20.60 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.95 0.71 -4.56 15 1506 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 797 0 316 1404 70 190.30 20 25.11 CHANGED phLuphpp.psltas-lLpFhsphls.Gppp.shtlsps..hlDshls......uplhhscct...........hhhcpssp....ppasuhlul+-Y.sscopsshlsslLptchEhllhpoFshhs+ppupshlp...ppphhstscputspltclsptlcphsusphshGhap.olhlaAcshppLccpstcspstLpspGhlustEoluh-suaaupLPuNhphpsR .....................................................................................................t.........pth.hsc.hpahphhls..sp.p...lhhspt....lsthls.......sphh..hstcp...............h...h.p..t.t.t......................ppasshl.s.lp....p.Y.s.phpsshhsh.hh......t........h.........s......h...p...hhhhpsa....p...hhs.......ppp....s..................hsh....lp...t....................p.............p...p.....................hh..............p....h....................s.................s.....s...u....ts...p......h....................t.-.hs.p......ul..p..p.lss..sph.s.h.G.a....pholh............lh..............u.......c....s............h....c.....plcppspts.t.sh.l...p.s...t.G....hhs..h.h-..sls.h.s..u..aauplPuphhhp.R...................................... 0 58 171 237 +111 PF02515 CoA_transf_3 CAIB-BAIF; CoA-transferase family III Mian N, Bateman A, Heider J anon Pfam-B_887 (release 5.4) Family CoA-transferases are found in organisms from all lines of descent. Most of these enzymes belong to two well-known enzyme families, but recent work on unusual biochemical pathways of anaerobic bacteria has revealed the existence of a third family of CoA-transferases. The members of this enzyme family differ in sequence and reaction mechanism from CoA-transferases of the other families. Currently known enzymes of the new family are a formyl-CoA: oxalate CoA-transferase, a succinyl-CoA: (R)-benzylsuccinate CoA-transferase, an (E)-cinnamoyl-CoA: (R)-phenyllactate CoA-transferase, and a butyrobetainyl-CoA: (R)-carnitine CoA-transferase. In addition, a large number of proteins of unknown or differently annotated function from Bacteria, Archaea and Eukarya apparently belong to this enzyme family. Properties and reaction mechanisms of the CoA-transferases of family III are described and compared to those of the previously known CoA-transferases. 20.70 20.70 20.70 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.21 0.71 -5.06 118 8297 2009-01-15 18:05:59 2003-04-07 12:59:11 12 30 2042 90 3017 7416 4895 185.50 29 47.03 CHANGED lsLDL+sspG+tlhpcLlp....pADVllcNh+PGshc+lGLsh...csLc....phN.....PcLlhsuloGaG.p....sG..Phss..psuaDh.shpAhoGl.....hsh.s..........sss.s...P.hhsuhs..lsDhsu.uhhuuh...ulLAALh....pRp..c.oGp..Gph....l-lohh-s.shth.hs.hhhthhts.s.th.t..........Gstp...........sssssh..shaps....tDG.......alsl..u.shssphWpth .................................................lsLDL+..s..scGpphht.cLl.t..............pA....D....Vll..EN..a+....P.G..sh.....c.+..h.Glsh......-.s.Lp......th..N.........P.+....Lla..ss.l.o.GaG..p............s.G.....P.h.ss....p.s..u......a.............Dh.l.h..p.A.h.uGh...........................hsh..s........G...........................ss....s...............P...h.....h.su...ss...........lu.D...hss.G.h.h.ush...ulL.u....ALh..............pRp......c......o........G.....c..............G.....ph..............lDsuhh-s...shs..h........h.t............h..h..h....t...h..h..t..s..u....t.st................t.h.................uttp.........................sshssh...ssaps...............tD..G...............hlhl..u...shpst..at.............................................................................. 0 656 1710 2447 +112 PF02888 CaMBD Calmodulin binding domain Bateman A anon Psi-blast P70604/413-489 Family Small-conductance Ca2+-activated K+ channels (SK channels) are independent of voltage and gated solely by intracellular Ca2+. These membrane channels are heteromeric complexes that comprise pore-forming alpha-subunits and the Ca2+-binding protein calmodulin (CaM) [1]. CaM binds to the SK channel through this the CaM-binding domain (CaMBD), which is located in an intracellular region of the alpha-subunit immediately carboxy-terminal to the pore. Channel opening is triggered when Ca2+ binds the EF hands in the N-lobe of CaM. The structure of this domain complexed with CaM is known [1]. This domain forms an elongated dimer with a CaM molecule bound at each end; each CaM wraps around three alpha-helices, two from one CaMBD subunit and one from the other. 25.00 25.00 28.80 28.10 23.70 22.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.63 0.72 -4.17 5 334 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 83 7 163 297 0 74.40 61 13.92 CHANGED DoQLTKEhKNAAAsVLQETWhIY....KHT+h.p+t-ppRlRKHQRKFLpAIHp...FRoVKhEpRKlsEQsNohsDluKs+pl ............DTQLTKR.lK.NAAANVLRETWLIY....KaT+Ll.......KKhDpu+VR+HQRKFLpA.I..Hp........LRpVKh-QRKLsDQANTLVDluKhQsl.................... 0 31 45 98 +113 PF01302 CAP_GLY CAP-Gly domain Bateman A, Finn RD anon Prosite Domain Cytoskeleton-associated proteins (CAPs) are involved in the organisation of microtubules and transportation of vesicles and organelles along the cytoskeletal network. A conserved motif, CAP-Gly, has been identified in a number of CAPs, including CLIP-170 and dynactins. The crystal structure of Caenorhabditis elegans F53F4.3 protein Swiss:Q20728 CAP-Gly domain was recently solved [2]. The domain contains three beta-strands. The most conserved sequence, GKNDG, is located in two consecutive sharp turns on the surface, forming the entrance to a groove [2]. 24.90 24.90 25.10 24.90 24.60 24.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.56 0.72 -4.27 146 2400 2009-09-10 15:43:18 2003-04-07 12:59:11 20 120 319 45 1448 2206 31 68.20 38 10.48 CHANGED lG.pRl.pl...........tstthGsl+a......lG.psp.hss............................G.............h......WlGlEh.Dp....s....h.....GK....NDGo..l.pGh+YF..pC........p.sp.....tGhFl+spplp ...................................lGsRV.l............ssschGsl+a......lG...tsp.hss....................................G................h......WsGVEL..Dc.....P....t.............GK...................NDGo........V......p.........G......h+....YF..pC...........p..sp......tGlFspss+l.p.............................. 0 487 694 1078 +114 PF01039 Carboxyl_trans Carboxyl transferase domain Finn RD, Bateman A anon Pfam-B_299 (release 3.0) Family All of the members in this family are biotin dependent carboxylases. The carboxyl transferase domain carries out the following reaction; transcarboxylation from biotin to an acceptor molecule. There are two recognised types of carboxyl transferase. One of them uses acyl-CoA and the other uses 2-oxoacid as the acceptor molecule of carbon dioxide. All of the members in this family utilise acyl-CoA as the acceptor molecule. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 493 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.40 0.70 -6.15 34 11662 2012-10-02 13:07:06 2003-04-07 12:59:11 17 72 5959 146 2951 11205 7464 323.30 27 68.57 CHANGED tpascG+hssc-Rl-lll-sGS.Fsph-shhtpcssphuh...c..hPssullTGhGsltGptshlhupDhsshGGshushputK...lsch.chAlps...............GtPhlslsDuuGA....c.t-GVpsLpGhGpIFtpsspASu.sIPpIollhGsssGGuuY.PuLsDhslhVcs.tuhhalTGPsllc..............pVhG..EphospphGGuptHhtpoGluHhsupsD.-ulphl+chlSalP...sst.......ssPlhtshDssc+..............shlPss.ppsYDsRplIptlsD.............pupFhEhpssaApslVsGhARlsGhsVGllANpsp......................AG.sL..cSu.KsAcFlchCss.hslPllhLsshsGFhsGpcpEasGIl+aGAKllhAhucusVPtlolIst..cuaGGualVMsupthsschh...huassAcluVMGscGAssIlaRcchttts.........p.pth....................hppphtchccphsssYhssuptasDsllcPschRs+lshshphhhpcpt.hh.h+p+tpl .......................................................................................................................................................c..................p....h...................h...............t.....................................h...........t............................t.............................t......t....................................ssul....l.sG....h...G....p....l......p.............G.......h...................s.......h.l.hs.....D..hs.............hh.uGo.hush.sucK..........ls+hh-hAhpp...................................p.h.P.l..l.hl..s.s..S.GG.........A.......R...........h.....Q.......E............u......hh..............u......L.......h..............t...h..u..+...l..............t......s.........t..p.....h..p...........t..t...........ls.I.o.l....l.h.sP..s..sG..G..s.A...h.s..h......h.s..D.......h.......h.......lh..p.....uhl.hhsGPcVl.c...............................p.s..h....s......E......h........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 955 1849 2497 +115 PF00755 Carn_acyltransf Choline/Carnitine o-acyltransferase Bateman A anon Pfam-B_438 (release 2.1) Family \N 19.40 19.40 20.50 19.40 18.70 19.20 hmmbuild -o /dev/null HMM SEED 591 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.77 0.70 -6.43 17 1813 2012-10-02 12:01:53 2003-04-07 12:59:11 15 34 362 41 1101 1697 26 442.50 26 79.93 CHANGED sLP+LPVPsLpcTls+YLc.ulcPlhs-.-paccopplspcFss...shGppLQchLhphuttp......sNWlscaWhphhYLphR.hPlslNSs.h..sshsph.hpsp.........psQhtpAuplhpshlpahptlcpcpl...........sphhtG....................ttPlsMsQapplFsosRl...PGhp...pDslhphtcsc.......HllVlp+spaaplclhcs..suphlo.s-ltpQLppIhppupp.psh.tsluhLTopsRspWApsRptLhpsss..NpcsLctI-pulFslsLDcs..................pss.cs....t..hphh.........htss.p...ssutNRWaDKohphllspsGpsGhshEHossDGhsllplh-h.h.hspphhc......................phhts.s......lshPp+Lpaplssp.hpspIpputpphpthhs-L-lhshpFpsaGKshlKp.pplSPDuFIQlAlQLAaY+.haG+hssTYEoAosRhFtcGRTETlRosos-uhcFVpuM...........s..sstott-.............+hpLhpcAsppHsphhcpshpGpGlDRHLhuLphlupt.....pslphP..c...hahcpsathu...sshhlS.....os..plssphhhhhs...aGPVssDGYGlsYphp.ppplhhslSuapSsspTsup+asphLpcAhp-hts ..........................................................................................................................................................LP.p.lPlP.lppThp...pa.lt..shps..l..........h........s...................p..............p..............h..............t......p..........h..........pt...h....sppF.t.......................t.u.........LpthL....ht....t.........................psa.........l..............t....p....hW.p.......h.h....ht....c.........sl.hss..............h.........................................................t...t....h..h.uuth.h..s.h...h..ah..t..lcpttl..........................s.h.hht...............................................Phs.tpa.h...hFsssRl...P...t.t........tD.h..t.......tsp......................HlhVhpps.pha..hh...h..ht.................s...p........h..p.......tpl.....p....hp........l...h...........p....t..s.t.....................................................t..........................lu.hLTst....tRs....Wu.ph+p....hh.t...tt.......NtpsL.p.h.lcp.uhhhlsLDp.............................t.t...............................................tt.h.h......tp.s...hsRWaDK....s.hp...h.ll...tsG...............p.h.uh..hEH.s.....h.Du..hhphh...p.hh..........t...........................................t.t................s.s..ptl.pa..p.....hs..t........h..t........lt.......u.....t.th.t..t.hhpphp.h.h..h...a.t.t.......a.G..+.t..h..hKp...th..S.PDuhlQhs..hQ.hAa.a.p........h.s........p......................sY..Ess.h+hFhpGR.T-.....sh..R.sso.pshta...sps..h..................................................p.......t...s...tp.............................................................................hhthhptAhptH..th.h..t......s.....GtGhDRHLhsLh.h.t...............................................t....P.....t................h......s.......ath.......tp..h.p...................hs...........ph.s.s...h.......h...h...s....a.us.................s...................pG..aG..hsY......p...h...hthssh........................h..pst......th...l.t.h.ph......................................................................................................................... 1 386 559 894 +116 PF03378 CAS_CSE1 CAS/CSE protein, C-terminus Mifsud W anon Pfam-B_3786 (release 6.6) Family Mammalian cellular apoptosis susceptibility (CAS) proteins are homologous to the yeast chromosome-segregation protein, CSE1 [1]. This family aligns the C-terminal halves (approximately). CAS is involved in both cellular apoptosis and proliferation [2,3]. Apoptosis is inhibited in CAS-depleted cells, while the expression of CAS correlates to the degree of cellular proliferation. Like CSE1, it is essential for the mitotic checkpoint in the cell cycle (CAS depletion blocks the cell in the G2 phase), and has been shown to be associated with the microtubule network and the mitotic spindle [3], as is the protein MEK, which is thought to regulate the intracellular localisation (predominantly nuclear vs. predominantly cytosolic) of CAS. In the nucleus, CAS acts as a nuclear transport factor in the importin pathway [4]. The importin pathway mediates the nuclear transport of several proteins that are necessary for mitosis and further progression. CAS is therefore thought to affect the cell cycle through its effect on the nuclear transport of these proteins [4]. Since apoptosis also requires the nuclear import of several proteins (such as P53 and transcription factors), it has been suggested that CAS also enables apoptosis by facilitating the nuclear import of at least a subset of these essential proteins [5]. 19.60 19.60 19.60 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.31 0.70 -6.18 16 387 2012-10-11 20:00:58 2003-04-07 12:59:11 10 14 287 3 271 370 12 387.00 29 44.71 CHANGED ERlLhlpcsssp.slhsss-lsPhsppLLspLFphlphssS....pENEalMKClMRVLhllp-ullPh.ssslLs+LssIhphluKNPSNP+FtaYhFEulushlRhsspusss...sFEpuLaPsFssILppDVp.....................EFhPYV.........FQlhu......tLLEhss.ssslPs.sYhsLhsslL..................sPshWEppGNlPuLlRLLpAhIt+uuppI..sssspLpslLGIFQKLluSKs.sDppGF.LLpullpphPssslp.YhtpIhpllFpRLQsSKTs+FhpphlhFhphhss+p.....GushhIphh-slQsslFs.lh.plllP-hpKlutsl-RKlssluhTKhL.sEo.Ahhsp.YtKhWuhhhpuLlpLhchPspsss....c--hls.tDss.hGassuFopLssstpptcDPhP-..lsDsKhalup.LpchsptpsG+lsshlsppLss-uppsLtpY ...............................................................................E+lhhh...p...t..s....t..tt...l....h.stscl.tPhs.p.LLppLFphl.ph.sso...............tENEalM+..slMRs.h.hhp........-sh.h..sh.hs.sllsp.Lhp.......hl.thls+N....PS.pPcFsHYhFEuluhh.....l+..h.ssp...s...ss.s..h.....phEps..LassFp.tILppDVp.....................EFhPYl..............FQ...lhu......hLLEh...p..................s......sl........P.p.sYhsLhs.LL..........................p.P.slW..-.pp.G....Nl.PuLlRLLpuhlp+ss..ptl..............ht..t..s.pltslLGlF.Q.+LluoKs...s-....pt...GF........LLpslltph...s...p...lp...a...h.tp.IhhllhpR.L.Q.s....u....+T...s.......c..ahpphl.hF..hs..hhshph.....................usshllphh.-.pl...Q....s..t............hFs.lhp...pl...h...lP....ph..p.p..l....................s.t...h...-.+Klssluho+hL..s.p.s.....h.hs....p.....h.tphWs...hhpuLlt...lhp...........s............t.s.sh...................pcc...h.........h..s....t.............-.s.....................u.......a.ts.u.......aspLshst.t.t..h..Dshsp....l...t...s...sp..alsp...Ltphs.tt.p.s.sth.thht..h................................................................................... 0 103 159 224 +117 PF00690 Cation_ATPase_N Na_K_ATPase_N; Cation transporter/ATPase, N-terminus Bateman A, Griffiths-Jones SR anon Pfam-B_138 (release 2.1) Domain Members of this families are involved in Na+/K+, H+/K+, Ca++ and Mg++ transport. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.10 0.72 -4.51 168 9625 2009-01-15 18:05:59 2003-04-07 12:59:11 21 104 3314 73 3641 8446 271 66.10 25 7.18 CHANGED hpphsscplhppL.......p.ss.......ppGL.op....ppsppRh....ppaG.Nplt.ptctpshhthhlppa.psshlllLlhuullS ...............................................s.pplhpph.................p...s......................ppG....L...os...............p-s......p....c..Rh.................ppa.G.....N.........p..l......s...tpc.....tpsh.......hh..hhhpp.a.ps..shh.h...lLhhuAllS................................... 0 1148 2096 2947 +118 PF01545 Cation_efflux Cation efflux family Bateman A anon Pfam-B_232 (release 4.0) Family Members of this family are integral membrane proteins, that are found to increase tolerance to divalent metal ions such as cadmium, zinc, and cobalt. These proteins are thought to be efflux pumps that remove these ions from cells. 22.80 22.80 22.80 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.80 0.70 -5.34 89 12806 2012-10-02 19:55:49 2003-04-07 12:59:11 16 54 4824 10 4122 10055 1117 268.50 19 81.33 CHANGED lhlulhhshh.hslhclhsuhhs.sShulluDuhashhDhhuthlslhuhphu...................p+.sssppa.......sa..Gat+hEslsulhsulhllhh.uhhhhhp....ulpphlps....................tphphshh.hhh...................h.sllu.....lslshhhhhhhpp.......................................................................................................................................hp..ots....l.pusthchhs...Dshso....luslluhllhhhh.......................hhh..................hDslsulllulhllhsuhplh+puht.Llsts.sssthh...pplpphl...........hsslh.slpcl+hhphGs...phhlslplp.hsssh...shpphcp..........ltpplcptlppphstltp...h..lp.hpsttptp ....................................................................................................h.hhuhhhshh.hh.lhch...h...s...u...h...hs...s.......ShulluDuhc.lsDh............hu.hl.............s..lh..u..h......phu...............................................................p+...s.s....s.tpa.......................sa...........Ga......t.+......hEh.l.uuhhs.u.l.........h..lh..hh..u...h.h.l.lh.c.......ulp+.l...hps.................................................................ptlp.ss...hh...hhh............................................................................uhlu........ll.s...s..h.h..hhhhhtc.......................................................................................................................................................................................................................................................................................t......................................................................................................................................hp...s.s.......l...p...ush..h..c..hhs....Ds.lso....................luslluhllhhhs.........................................................shhh..........................................hDslhulllul.hllh.s..uhplh..pc...uhphLhpt...s....st....ph..........................ppl.p.phl.p.........................hstlh...s.l...c..c..l..+..s.h.......p..hus...........phhhsh+l....h.s..sph...............h..ph.pt.........................................lhpplpp.hl.....tp.p.h......t.lt.t....hh.lp..h-.....t.......................................................................................................................................... 1 1327 2487 3430 +119 PF04586 Peptidase_U35 Caudo_protease; Caudovirus prohead protease Waterfield DI, Finn RD anon Pfam-B_4836 (release 7.5) Family Family of Caudovirus prohead proteases also found in a number of bacteria possibly as the result of horizontal transfer. 20.70 20.70 20.80 20.90 20.60 20.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.00 0.71 -4.22 36 1699 2012-10-01 19:43:34 2003-04-07 12:59:11 12 10 1302 0 286 1376 536 154.10 23 52.03 CHANGED psh-l+stp...pst.....lpGYAshFsp.sp.........htEhltsGAFspsLtp.ts....lhhLasHDt.sp.slGpsps.........cLppDspGLchchcl..sssstuc-l.hphl+pGslsuhShGFpsh.....ppphptps.........hRplpcl-.Lh..ElSlVoh.PAhscuplp......tpshpphtph ............................................................................t.....p..t........tt...hhhpGaushas....s.................ht-hlt.s.sA.h.p..tsh.tp...s..........lhhLa.......pH........c..........s.........p.........s...l...Gpsp...........pl.p.......D.......spG.L.hhcsch.......ss.......s.....s.......tuc-h...h...t...t...l..+....s..G...s.l.suh..S..hGFpst...............p.phppss.......................t........hpplpchc.Lh..ElSl...V..o...h...PA..spsplp......hpt......t.......................................... 0 81 174 230 +120 PF01607 CBM_14 Chitin_bind_2; Chitin binding Peritrophin-A domain Tellam RL, Hutter H, Bateman A anon [1] Domain This domain is called the Peritrophin-A domain and is found in chitin binding proteins particularly peritrophic matrix proteins of insects and animal chitinases. Copies of the domain are also found in some baculoviruses. Relevant references that describe proteins with this domain include [1-3]. It is an extracellular domain that contains six conserved cysteines that probably form three disulphide bridges. Chitin binding has been demonstrated for a protein containing only two of these domains [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.46 0.72 -4.04 209 6920 2012-10-01 20:20:38 2003-04-07 12:59:11 19 153 308 1 4290 7189 6 53.80 23 23.27 CHANGED Cst..........tsGhhss......ssC.spa.hhCt......supsh.......hhpCssG.hhFst.....ptptCshspp.........spC .............................C..........tu.h.hsp......spsC...spa.ahCh.........s.G.p.sh................hhpC.s........s........G........h......hF...st...................ptp...t....Cshspp..........tC....................... 0 1391 1754 3585 +121 PF00942 CBM_3 Cellulose_bind; CBD_3; Cellulose binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1126 (release 3.0) Domain \N 20.60 20.60 20.70 20.80 20.10 19.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -9.79 0.72 -3.96 29 651 2012-10-01 21:34:18 2003-04-07 12:59:11 13 120 165 35 192 639 2 83.10 27 12.10 CHANGED l.hcsusss.ussNtlcs+hplpN.oGssuhsLsclplRYaash-t.thstsahsc.sslu.........susl.hshsphss.ssu.ssaYlEls ................h.hpsssss..sssspIpsphplhN.oGssslsLsclplRYaash-.t..st...s.tshs..sDauplu..............su..sl.p.....tp.hsp.lpsstss..AshYlElu............................ 0 101 152 164 +122 PF02018 CBM_4_9 CBD_6; CBM_4; Carbohydrate binding domain Bateman A anon Chris Ponting Domain This family includes diverse carbohydrate binding domains. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.94 0.71 -4.14 47 1727 2012-10-03 19:46:52 2003-04-07 12:59:11 12 250 613 41 590 1782 213 134.50 15 21.89 CHANGED sshlhsssFEs...................shssWtspsss.............ssssss.....sGphslplsspss.....sasuhhhphs..stlppGpsYplShhsptsss........pplplplphpss.........shpthts......thshs.spWpplpss.aThs...sssssshlhlps..........ssss ...........................................................................thl.NssF-p..............................sh.ss...Wp..s....hs.ss...........................sshsss..................sGs..h.s..l..t....l......s...s...tss..........shss...h..h.h.p..hs....hslp..t....Gp.s.YplShhs+ssss.................tplplplptsss...................htth.ts...............tsshs..spWpp.h..p.hs.ashs.....ssssps.lhht.......sts.............................................................. 0 271 468 540 +123 PF03422 CBM_6 Carbohydrate binding module (family 6) Bateman A anon Pfam-B_1231 (release 6.6) Family \N 21.00 21.00 21.20 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.65 0.71 -3.97 41 1755 2012-10-03 19:46:52 2003-04-07 12:59:11 10 402 465 60 713 1810 331 121.80 21 17.74 CHANGED cAEsastt..uGlshpppss..t.....sGhslshhssG-Wlsast..lchssuushphp.spVAsssus...usl-lclsu.....suslluolsl.ss.TGuWpsa.......tsspsslshss.Gs+slhLshsust......hhNlDahpFsp ...........................................................ht.......t.u.hphtspst....t...........sGhsl...u..h...h.p.s...G....-..a..l..pas....Vs.hs...s..u...u..s......h.....s.hp..h.+s..A.ss.sss...........uslpl.p.lsu...............ss.h.l...u..s.hs.l...s......s.....TGu.......W.p....s..a......................ps.h..s..s.s..l..s..hss...G.s.p.s..lhlhhsuss............hNlDhhph........................................... 0 338 629 686 +124 PF00571 CBS CBS domain Bateman A anon [1] Domain CBS domains are small intracellular modules that pair together to form a stable globular domain [2]. This family represents a single CBS domain. Pairs of these domains have been termed a Bateman domain [6]. CBS domains have been shown to bind ligands with an adenosyl group such as AMP, ATP and S-AdoMet [5]. CBS domains are found attached to a wide range of other protein domains suggesting that CBS domains may play a regulatory role making proteins sensitive to adenosyl carrying ligands. The region containing the CBS domains in Cystathionine-beta synthase is involved in regulation by S-AdoMet [4]. CBS domain pairs from AMPK bind AMP or ATP [5]. The CBS domains from IMPDH and the chloride channel CLC2 bind ATP [5]. 24.00 16.50 24.00 16.50 23.90 16.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.50 0.72 -4.16 850 77225 2010-01-08 14:28:41 2003-04-07 12:59:11 23 464 5124 378 23594 56987 15502 57.10 19 26.04 CHANGED lpcl..hsp..............sshs..ls.s....sslp.cshp..hht.....cpp......hp.tls.V.....ls......cp...........scll..Gl.lohpDllpthht ............................................................................th.hp.................phhs....ls.s..........tslp..cshp.....hhp...........................cpp............................hptlsV.......................ls.......................................-p.............................spll.....GllohcDlhp....t........................................ 0 7547 14981 19849 +125 PF02754 CCG DUF224; Cysteine-rich domain Bateman A anon Bateman A Family The key element of this family is the CX31-38CCX33-34CXXC sequence motif normally found at the C-terminus in archaeal and bacterial Hdr-like proteins [2]. There may be one or two copies, and the motif is probably an iron-sulfur binding cluster. In some instances one of the cysteines is replaced by an aspartate, and aspartate can in principle also function as a ligand of an iron-sulfur cluster [2]. The family includes a subunit from heterodisulphide reductase and a subunit from glycolate oxidase [1] Swiss:P52074 and glycerol-3-phosphate dehydrogenase. 20.60 11.80 20.60 11.80 20.50 11.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.31 0.72 -3.98 61 13777 2009-01-15 18:05:59 2003-04-07 12:59:11 11 93 2942 0 4139 10623 3794 86.00 20 33.05 CHANGED lsaasuC..hhcst........h.pstphhtplhshhshch.h..ptptCCGusuhhsst...pt......huh....plsppplpphpc...h.s.....s-hllssCssChhplc ...........................................lsaassC...hhchh...........................ttps.t.p.s.h..t...p.l.h...t..t...t........s..h.....c.....l...h.....h............t.....t....p...t....C...C.Gt.su.t.h..s.sp..............pt.........hup..............ph..s.pp....p.l....c....t.h.pp...............t...t...........................sc..h..l..lsss..s.s.Chhtl.t....................................................... 1 1454 2943 3646 +126 PF03379 CcmB CcmB protein Mifsud W anon Pfam-B_3059 (release 6.6) Family CcmB is the product of one of a cluster of Ccm genes that are necessary for cytochrome c biosynthesis in eubacteria. Expression of these proteins is induced when the organisms are grown under anaerobic conditions with nitrate or nitrite as the final electron acceptor. CcmB is required for the export of haem to the periplasm. 23.10 23.10 23.10 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.43 0.70 -5.13 13 1869 2012-10-03 10:13:34 2003-04-07 12:59:11 8 2 1817 0 379 1383 1059 208.50 38 96.40 CHANGED hhsllpR-L+lAhRssushhssLhFFLlVlsLhPlulGP-splLuRIAPGIlWluALLusLLuL-RlFtsDaEDGSL-...lhhsshPLthllluKshAHWllTGLPLllsuPLhuLLLsLshsuassLhhTLLLGTPsLShlGulGuALTVGL+RGG..lLLuLLlLPLhIPlLIFusuAlpsuuhs...hshss.hhlLuuhhlsslsLuPFAsAAALRl ..................................................................htlhth-Lpluh..R...p...tuplh....ssLhFF.Ll.V....Is..L.F.PL...u..l....G....P......-.......s..p.....L.......L........s..+...lu.PGll.......W.luALL.uuL.L...u.L..-RL.FR.....sDhpDG..oLEp..lhL...t..s..hP..........L..s.hllLuKlhA...H...W....l.lo.G.L..P.L.ll.lu.P.l..l..u.l.h.L...s.....h.....s...s..t....u....h....t....h....h....h.L....o..L.L..L...G.T..P.s.L.u.h..lG.AlGsA.LTlGL++G.G....lL...Lul..LlLP.L.hlPlLIFu.su...Ahsuushs......hshs...u.......L....t.l....LuAhhhh....s....h.s.L.s.PhAhuuuL+........................................................ 0 109 232 305 +127 PF04103 CD20 CD20-like family Bateman A, Moxon SJ, Pollington J, Finn RD anon Pfam-B_1979 (rel 7.3), Pfam-B_10092 (rel 9.0) Family This family includes the CD20 protein and the beta subunit of the high affinity receptor for IgE Fc. The high affinity receptor for IgE is a tetrameric structure consisting of a single IgE-binding alpha subunit, a single beta subunit, and two disulfide-linked gamma subunits. The alpha subunit of Fc epsilon RI and most Fc receptors are homologous members of the Ig superfamily. By contrast, the beta and gamma subunits from Fc epsilon RI are not homologous to the Ig superfamily. Both molecules have four putative transmembrane segments and a probably topology where both amino- and carboxy termini protrude into the cytoplasm [1]. This family also includes LR8 like proteins from humans, mice and rats. The function of the human LR8 protein is unknown although it is known to be strongly expressed in the lung fibroblasts [2]. This family also includes sarcospan is a transmembrane component of dystrophin-associated glycoprotein. Loss of the sarcoglycan complex and sarcospan alone is sufficient to cause muscular dystrophy. The role of the sarcoglycan complex and sarcospan is thought to be to strengthen the dystrophin axis connecting the basement membrane with the cytoskeleton [3]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.79 0.71 -4.28 80 1022 2012-10-02 01:14:40 2003-04-07 12:59:11 10 9 113 4 533 1008 0 133.70 19 47.50 CHANGED sLGsl...QIhlGlhplslG..hlhhhhh.s.........hhhhsGhPaWuulhalluGsLu.lsuppcss....phllpsslshNllSslsAhsulllhshslthtp.hhth.........................................tshhhstph.......htulhsslLlhslLphhlulshshhss ........................................LGhh...QlhlGlhhh.sh.G...hh.h.hh.h.hs..................hhht.uh.sh.W.u..uh..h..hll.uG.lu.lhs.....tppsp...........phh.hp..s.phshs.l.lus.hh..u.h.......s........uhhlh..shsh......th.t.h.............................................................................................................................h..............hhsl.hhhh.hhshlphhlsh..shh.................................................................................... 0 125 173 258 +128 PF05179 CDC73 Cdc73; RNA pol II accessory factor, Cdc73 family Wood V, Bateman A anon Pfam-B_6394 (release 7.7) Family \N 25.00 25.00 25.70 27.20 22.40 24.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.79 0.70 -5.04 6 336 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 288 1 245 334 3 259.70 26 58.02 CHANGED Tcsplhlpstchh.thslhshhpuspspp-.s+sp.ssss.sostscc.plctpptp..upaspaspcphss..p.scthcIsshGSh+GssLsulcpG...........................h.....ssupu+tstss.suK+ssRsPIIllPSAsoSLIThhNlK-hLp-hpaVP..sst++tpGsp+ssplslQ++psp.p........ThsacVlDsspKLp.P--WDRVVAVFVhG.sWQFKsW.W...s.sPs-IFs+IpuFal+aspD..cssssVppWNVchlplSpsKRHhDRsVhpphWEoLE+altp+ ........................................................................................................................................................................................................................................................................................t........................................h.st.hh..t.........t...htthp..s.shup....h...t.t....s....h........hppt..........................................................................h.....s.s.t..s..t.....sp...p.........t.......p.....p.t.p....p..s..PI...I...lls.s..u..s........o..S..LlshhNsKp..hL.p-..tp..a.ls........spp..t...p...t....p.........s....s...t...p.p..s.l.h........lp..+......p.h.p.ph..............................shp...ah...lV..Ds..s...t.ph..........p.....s...........c.WsRVVAVF..sp.....Gts...W...QFKsa....W.........s.s.P.....s.....-lFp+...........lpGaalpacsp.......phstp.V..ppW.................s..Vphl..p..l.......sc................p..K...............RahD+tsh.pFWcpl-.chhh............................. 0 84 131 200 +129 PF01066 CDP-OH_P_transf CDP-alcohol phosphatidyltransferase Finn RD, Bateman A anon Pfam-B_651 (release 3.0) Family All of these members have the ability to catalyse the displacement of CMP from a CDP-alcohol by a second alcohol with formation of a phosphodiester bond and concomitant breaking of a phosphoride anhydride bond. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.44 0.72 -3.71 124 11822 2009-01-15 18:05:59 2003-04-07 12:59:11 16 43 4879 0 3729 8699 4901 147.30 20 63.29 CHANGED hhhhPNh..lT...h....hslhh.uhhsshhhhhs.....ph..........................................hhushhhhlshlhDslDGtlARthspsSt....hGthlDshsD........thshshhh....................................................................hslhhhhshhhsshhhhhhht .............................................................h...hPNhlT.l....hp.lhh.....s...h.h..h.hh.hh.hs.....th..................................................................................................hhusllhhlusl...hDhl.....DGhlAR....p.h..s.t.s..op............hGthLDs..luD.....plh.hs..sshhhhh.........................................................................................................h.h...............................hhhhhhhhhhhhh.......................................................................................................................................................................................................................................................... 0 1273 2397 3165 +130 PF00150 Cellulase cellulase; Cellulase (glycosyl hydrolase family 5) Sonnhammer ELL anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.05 0.70 -5.20 65 5309 2012-10-03 05:44:19 2003-04-07 12:59:11 13 205 1466 165 2106 7158 527 265.90 14 56.85 CHANGED lstsGpsh.....phhGhsst.........W..tsth.....stpshhphhpshGhNslRlshs......h.........ssah.....s.s.sh........s.......thhsclcpllshu.hspGhYl.IlDhHp........................................ts.tsshss............spsh................Fpp..........h..lAs+a...ssss.pllaE..lhNEPps.st..........stW...............stlpshsppslssIRss.uss...phIllsssp..............................Wus..........ssst.shs.......P..................ts..........tsplhaohHhYs.ssphs.t....................................................spthpsthphh.hspGh.slhluEaGss.....ssss.................tspsstW......lshh......ppp.slshsh.Wshssps ...........................................................................................................................................h.............................................................pp.h.p...h.h....p..s....hG.h.shl.R.l..s.ht.........................................h.................tth.........................t..sh.....................................hst........................................s.h.h.p..t...l..c..p........h...l.p.h......s...pp..t...G..l..h....l.....l...l.....-....h....Hs.................................................h.......................................................t..t.............t.....p.......tt...............................st.ph.............................................................................ht.p.............................h.h....p....t....l....u....p........p........a................s.....p.....s...s....hl.......h........a.-...l.hN....E...Pts........................................................................t..h...t....t.h..h....p...phh...p..t..l+..........t...h....s.sp.........thl....hl..s...s....t..............................................................................ast.................................t.t......hhp................P.....................................t.p..h.h.....h......s.h.H.....Y....s.................................................................................................................t.t.......h.p..t..h...h....t.......h.......h..p..t...s......h....s...h...hls..E......aGh......................t.......................................h....................ht.h...................t.......................h......t................................................................................................................................................................................................................................... 0 829 1465 1881 +131 PF04218 CENP-B_N CENP-B N-terminal DNA-binding domain Bateman A anon Bateman A Domain Centromere Protein B (CENP-B) is a DNA-binding protein localised to the centromere. Within the N-terminal 125 residues, there is a DNA-binding region, which binds to a corresponding 17bp CENP-B box sequence. CENP-B dimers either bind two separate DNA molecules or alternatively, they may bind two CENP-B boxes on one DNA molecule, with the intervening stretch of DNA forming a loop structure. The CENP-B DNA-binding domain consists of two repeating domains, RP1 and RP2. This family corresponds to RP1 has been shown to consist of four helices in a helix-turn-helix structure [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.33 0.72 -4.54 6 534 2012-10-04 14:01:11 2003-04-07 12:59:11 8 19 145 3 370 898 32 51.80 26 11.42 CHANGED ++pRssLThcEKlclIpphE-sp..S+sslA+caslstoTlpsIlcpKcplLpth ..........................p.+ps..Lolc-Klcll.pp.l..-p.Gp......s.pspluccaGl.scoTlps.lhK.s.+cclh...h.............. 1 79 158 248 +132 PF04734 Ceramidase_alk Neutral/alkaline non-lysosomal ceramidase Mifsud W anon Pfam-B_3385 (release 7.5) Family This family represents a group of neutral/alkaline ceramidases found in both bacteria and eukaryotes [1,2,3]. 19.40 19.40 19.40 19.50 19.10 19.30 hmmbuild -o /dev/null HMM SEED 674 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.05 0.70 -6.65 42 810 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 474 3 419 771 118 444.80 26 82.17 CHANGED sYllGsGpADITGPssElshhGYAshpQhusGl+pRlauRAFIlu..ppss.........pRhValshDsshhspuV+hsVLctLps..phs.shYscpNVslouTHoHuGPGGahpYhLhpl.....oohGFscpsapAlV-GIlhSIp+AHpsLp....PGplhhups-ltsAslNRSshAY.sNPpcERupY......stsVDKphTlL+hsc.ssspslGslsWFuVHsTSMsssNpLlSGDNKGhAAalaEcphp.....................................................................sp.ss.tssFVAuFuQoNsGDsSPNlhGshC.p.oG..Cph.pSoCs.utsthChupGP..tt..sthcSsplIGc+QaptAppLas.....susp.lsGs..VcshHtalDhsshshs..........sthssss..h+TCsAAhGaSFAAG.ToDGP........G................h....FsFsQuss....pssP.............hWphlpshl..t.PotcpppCQtPKPI..LLssGphs.PYsWsPsIlslQllRlGpLhllusPuEhTTMuGRRhRcsltsshtssh................p.pVVluGhuNsYupYlsT.EEYslQRYEGASTLaGPaTLsAYhp.htphhsuLssu......tss....ssGP......pPPs.p.scplohhsuV.laDstPhspsFGDVhsp..ssss.YphG.-s.VsssFhuuNPRN..sL+p-sTahtVE+hp...................tsssWpsVtsDsDWshhacWc.......Rsssh.s..tScsTlpWpI.........PpsstsGsYRl+aaG.shKshhsu....lpsapGsopsFpVt ..................................................................................................hGhshhDhT...s.........h..thshhG.Y..up......p.ph..s..tGlcp+lhuRualht........tt.....................tp+hlh.ls.hDhhhh......p.....t.....lp.....tlhp.p.ltt.......tht.s..a..p..pp...sl...hlsuTHoHu.u......P......u....u..h....h....t.....h.h..h.th.........s.t.....u...a...p.t..........h..p.h.l.V.s.u.....lh...pul.pApps..lt..........s.upl.h.sp.s..............pl....s...ss..h.N.R.......s.........................s....a...........t....NP........t...pt.tth.......................ttssDtphshlph.p.......ss....p.....h.Ghl..sa.ass.Hs....s.s..h.t......s.s......s...p.....hlouD...hG.h.sshhhEp.ht.......................................................................................................................................................................h.......up.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s................................................................................................................................ 1 157 259 363 +133 PF03859 CG-1 CG-1 domain Bouche N, Bateman A anon Pfam-B_18451 (Release 7.1) Domain CG-1 domains are highly conserved domains of about 130 amino-acid residues containing a predicted bipartite NLS and named after a partial cDNA clone isolated from parsley encoding a sequence-specific DNA-binding protein [1]. CG-1 domains are associated with CAMTA proteins (for CAlModulin -binding Transcription Activator) that are transcription factors containing a calmodulin -binding domain and ankyrins (ANK) motifs [2]. 23.10 23.10 23.80 30.40 22.80 22.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.57 0.71 -4.56 15 270 2009-09-10 19:07:15 2003-04-07 12:59:11 11 34 93 0 157 282 2 111.00 51 10.85 CHANGED lhpEs+pRWL+sp............ElhtILt..sac+athh...pssp+PtSGSlhLasRKVlRhFRKDGHsW+KKKDGKTl+EAHE+LKVGs...................l-sLpCYYAHu..-psssFpRRsYWLLppshpcIVLVHYhcVp ........................................h...p.phRW.psp............EIsthLh....saccap...h.....pP.sRPt............sGSlhLasRKhl+..aRK.DGasW+K+KDGKT...s+EsH.KLKVtu...................h-s..LashYsHu......p.sPsFpRRsYWLLp.ps..cIVLVHYhpV.............................. 0 32 82 120 +134 PF00307 CH actinin-binding; Calponin homology (CH) domain Finn RD anon Prosite Domain The CH domain is found in both cytoskeletal proteins and signal transduction proteins [1]. The CH domain is involved in actin binding in some members of the family. However in calponins there is evidence that the CH domain is not involved in its actin binding activity [4]. Most member proteins have from two to four copies of the CH domain, however some proteins such as calponin and Swiss:P15498 have only a single copy. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.54 0.72 -4.01 194 10780 2012-10-03 10:10:54 2003-04-07 12:59:11 26 564 496 152 5898 10465 47 104.80 20 12.85 CHANGED ccshlpWls.ptht..t....................................................................th.tlp.......sh.tpslpDGhhLspLlctl.pP...ph.........h.shppl..........p...........pphcNhphslp...hspc.hGhshhh........sspDlh.....pss.p........llshlhplhphht ................................................................................................................................................................................................pthhpWhp.ptht....t..................................................................................................th.plp.................sh..tpsh.p......D.G.hsL..s..tL....l....c.t..l...p.P.....sh........................................l..s.h.ppl..................................................p..................p...thc.Nl..p.h.slp...............hsc......p..........h.Gl.phhh...............sscDls............css.p.............llshlhplhth..t.............................................................................................. 0 1688 2487 4051 +135 PF04420 CHD5 CHD5-like protein Bateman A, Wood V, Mistry J anon Wood V Family Members of this family are probably coiled-coil proteins that are similar to the CHD5 (Congenital heart disease 5) protein. In Saccharomyces cerevisiae this protein localises to the ER and is thought to play a homeostatic role [2]. 29.30 29.30 30.00 29.50 28.70 29.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.77 0.71 -4.57 27 271 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 233 14 174 269 0 152.20 27 75.98 CHANGED lhslFhl.lhphLlsshtsutlspllhhhh.t.......spphpptpphpcElhpl+pEhsshSuQDcaAKWsKLpRch-Klpp-lcphspplsuppsphcthlphhhhlhssshhhhLphaatKsPlahLP...pshhPhhlchllu.....hPpushG..........................uVSlshWhhssss .................................lhhh.lhh.llssh.ss...hlsphl..h..........ppssppppph+tEltph+cEhssl...Ss...tD...EFA+aA...K...L...cR+hsKhpccLcs...........hspphsuppsphchhlshshhlhpsshhhhLhhhat.psPlhhlP...psah..h.lphllu........FPpsshG........................uVulssWhhsCt........................................ 1 44 83 135 +136 PF03067 Chitin_bind_3 Chitin binding domain Bateman A anon Pfam-B_2364 (release 6.4) Domain This domain is found associated with a wide variety of cellulose binding domain. This domain however is a chitin binding domain. This domain is found in isolation in baculoviral spheroidins and spindolins, protein of unknown function. 21.20 21.20 21.40 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.76 0.71 -4.02 128 1556 2010-01-08 15:51:16 2003-04-07 12:59:11 10 34 838 15 481 1278 28 176.70 28 53.03 CHANGED HGalpp.....PsuRshhCth.ss....................t....t...............................tsu.sspap.psh.Eusts..............h.pts.s..........DGplCuA.Gs.........s...phss....lD.ts...ss.pWtps...sl..........pssts..hshpaphT.AsH..tsshacaalT...Kssasssps..LshssL-hh.sh.........tt.t...........ts...ssstpa....ssslPp.......RsGppVlhshWphu...Dsss...................uFYsCsDVs ...............................................................................HGalpp.....P.s.SRshhCt.sss.......................................................s.....psG...ssta.ps..pul.Eusts...........................h.tts..s.......DGplsuu.Gs................t........thss.L.D.tps....us...cW.h+s..sl......................psG.s......hshpWphT...As..H....ts........upacaY.l..T........Kssa....s...s...sps....Lsh..ssh-hh...sh.t..h.....st.t...............................................................s.....sssssa......psslPss........RsG.hpV......l.hshWphu...D..oss..............................................................uFYsshDVs........................................................................... 0 173 287 412 +137 PF00379 Chitin_bind_4 insect_cuticle; Insect cuticle protein Finn RD anon Prosite Family Many insect cuticular proteins include a 35-36 amino acid motif known as the R&R consensus. The extensive conservation of this region led to the suggestion that it functions to bind chitin. Provocatively, it has no sequence similarity to the well-known cysteine-containing chitin-binding domain found in chitinases and some peritrophic membrane proteins. Chitin binding has been shown experimentally for this region [1]. Thus arthropods have two distinct classes of chitin binding proteins, those with the chitin-binding domain found in lectins, chitinases and peritrophic membranes (cysCBD) and those with the cuticular protein chitin-binding domain (non-cysCBD) [1]. 21.50 21.50 21.50 21.50 21.00 21.40 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.69 0.72 -3.70 157 3539 2009-01-15 18:05:59 2003-04-07 12:59:11 18 25 87 0 2490 3895 1 53.00 32 25.39 CHANGED YpasYpsssu.tt.....pppup...............tsstslp.GsYoahssD.Gp.hhsVp.Y.sA.Dcs.G.F ..........................................YpasY..ps...pD..s...ps..............pppsp............................sussVp.GsYo..h..hssD.Gp..hpsVp.Y.sA.Dc.s.GF.......... 0 942 1256 2169 +138 PF04968 CHORD CHORD Finn RD anon Pfam-B_1217 (release 7.0) Family CHORD represents a Zn binding domain. Silencing of the C. elegans CHORD-containing gene results in semisterility and embryo lethality, suggesting an essential function of the wild-type gene in nematode development [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.57 0.72 -3.73 47 678 2009-09-11 00:43:51 2003-04-07 12:59:11 7 14 243 3 422 649 3 62.90 45 38.49 CHANGED hstpCpNtGCsppap.ssc....s.scssChaHPGsPlFH-uhKtWoCCpp+...shDFs-FhpI.GCsp.GpHs .................h.htCpppGCu..p....pas....s..s..p...............s...pc-sCtYHPG..sP..lFH.........-uh...K..............uWSCCcc+...shDFspFLsI.GCsp.GpH.................. 0 135 212 319 +139 PF02017 CIDE-N CIDE-N domain Bateman A anon [3] Domain This domain is found in CAD nuclease Swiss:O76075 , ICAD Swiss:O00273 the inhibitor of CAD nuclease. The two proteins interact through this domain. 21.80 21.80 21.90 22.20 20.80 21.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.51 0.72 -4.21 7 382 2012-10-03 10:59:06 2003-04-07 12:59:11 10 9 81 7 199 371 0 76.00 38 26.28 CHANGED p.+Ph+lpshcpsh++GVsApSLpELlsKst-hhtlsp..tssoLsLtEDGT.V-sE-aF.sLscsTchhlLttGppWps ....................+Ph+lpstpRsh++GlsA.soLpELhp..K..........sp..........c..t..L.tls................tsssL.VL..-.EDGTtV-sE-YFpsLssNTphhlLppGppWp................... 1 36 51 107 +140 PF02487 CLN3 CLN3 protein Mian N, Bateman A anon Pfam-B_1060 (release 5.4) Family This is a family of proteins from the CLN3 gene. A missense mutation of glutamic acid (E) to lysine (K) at position 295 in the human protein (Swiss:Q13286) has been implicated in Juvenile neuronal ceroid lipofuscinosis (Batten disease) [1]. 24.50 24.50 25.00 25.10 23.60 24.40 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.29 0.70 -5.69 2 477 2012-10-03 03:33:39 2003-04-07 12:59:11 12 13 234 0 293 463 9 272.00 30 81.25 CHANGED .hhhFWLhGLhNNh.YVVhLSAAhDIlu...............P..shs.......pSl....VLLADIhPoLhIKLhuPhhlchl.YS.Rlhs.hhhushuhhLVuF.+slhssLhGlshASISSGhGEVTFLpLTtaY.phslshWSSGTGGAGlhGuhSYhhLTp.htlssphTLLsh.hlP..hhh.aaFhLpSs-sp.shtp.pt.spAcps.lss..ss.o+ss......Sop.pl.pphphh+tLla.YhVPLshVYhhEYhINQulh.hLhF..s............hpatp.YhhYthLYQhGVFlSRS.hHhhRhR.halLAhLQslNLshhllpsWF.hh.S.ahVhllIhYEGhLGGAuYVNTFhNIh.ppsspcpEFAMuAssIuDohGl.LuuLLuLsLcshLC+hp .....................................................s.......................................................................................................................lllsslhPshhh+h..h..P.h.hh...hl.a.......Rhhh.hh.hthh.uhh.......hl....uh..........s..............t.......s......h................h..t......l..hGlhhsShuuuhGEhshLt.h.st.ha.........................s...l.shauuGTGhuGlhGuh.a.h.hhpt...hths.t...shh......h.s....h.a......hh.....h.....l..............................................................................................................................................................................................................................................h.......t.....p.h......h....h..........h....hh..hh.hPLh.hVYh...hE...Yhlspul..h..lha................p.h..t.t...Y.hh.thhY....Q.......h.....G....VFhSRS....o.........h...............h.........h..........p........h...........p......hhh....shhQh...h.hhh...h.h..........hts...........h...h..................h.......h.........p....................h.......hlh.hl....hhh.GhhGGhsYVNs.a.............h............h........p.........................................t..............p..phshshhs.s.shuh..hushh.......................................................... 0 118 172 253 +141 PF02861 Clp_N Clp amino terminal domain Bateman A anon Pfam-B_102 (release 6.0) Family This short domain is found in one or two copies at the amino terminus of ClpA and ClpB proteins from bacteria and eukaryotes. The function of these domains is uncertain but they may form a protein binding site [1]. 20.70 7.40 20.70 7.40 20.60 7.30 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.23 0.72 -4.03 167 16656 2009-09-14 13:11:56 2003-04-07 12:59:11 15 55 4576 80 4358 12959 3258 51.20 24 11.11 CHANGED A.pphApphscpalssEHLLluLlppspuhstplLpphGlshpplcptlpphhs ...................Atp.Apphsc.palssEHlLluL....l.......c....p....s..p.......u........s..s......s....p..l....L..p....p.h...G.l.s..h..p.pl.cpt.lpphh................................. 0 1453 2953 3792 +142 PF02353 CMAS Mycolic acid cyclopropane synthetase Bashton M, Bateman A anon Pfam-B_862 (release 5.2) Family This family consist of Cyclopropane-fatty-acyl-phospholipid synthase or CFA synthase EC:2.1.1.79 this enzyme catalyse the reaction: S-adenosyl-L-methionine + phospholipid olefinic fatty acid <=> S-adenosyl-L-homocysteine + phospholipid cyclopropane fatty acid. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.72 0.70 -5.32 12 4148 2012-10-10 17:06:42 2003-04-07 12:59:11 15 22 2427 44 1236 14323 7479 263.10 35 68.06 CHANGED pchcpphcslptHYDlSsDFFpLaLDPohTYSCAYFcc.......sDhTL-EAQhsKlDlhLcKLpLpPG.pLLDlGCGWGuhhh+AspcYDVpVlGlTLScpQhphspptlsphshtcphclhLtsac-hs-....hD+lVSlGhFEHh.......Gh-pYssaFptsapll.ssGhhLLHoIsshc.cphs.tth..........cFIsp.IFPGGpLPolptl.p.pspcsGFplhchpsLp.HYAcTLchWs-sLptph-cAhsl.huEchhchahhYLsuCAttFRhGhlsltQh ......................................................................................p...ppshpsl.ptH.Y...D...l....u....N..-...a..ap.h.a..L..D.s.s....M....p..Y.S......C..Aa...a....c.s......................................s..c...p....o....L.....-...p...A....Q............s.....K..l....c...h.......l....h......c...K.....L.....p.....L.........c.......P......G.........t......c.....l.....LDI..G..C..G...W...G...u...h.....s......h...h...A.....A.....c.....c.........Y......G...s....p...V...s..G.......l.......T......l......S.p......-......Q....h.......p.........h....u............p.......p.....+..........h....c............p......h.........s......h......t..........c.........p..........l...........p.......l.......h.....L......p.....D...........Y......R........-......l.....s..s.................pa..D..+.......I....V......S.....l....t...M..h...E...H..V.......................G...h.....c.....p.....a....s...s....a....F....p...p...l....p...c...h.L.c.s.......s...G..h..h..........l........l......H.....s....I....s....t..t....c......p....t....h...t...s............................................sa.....I..p...+...Y.....I......F.....P......G......G.....h.....L...P.....o.....l.....p.....p...l...h.....p...........h......p...c...s.......s.....h........p.....l......p...c.h....c..s......h....t....h....H...Y......s......c..T.L.pt..W.......tcp..ap............s.......p...h...............s..................c............l....h...................t............h........h..............s...........-........c........a........h.......R........h....W.ph....Y....LsuCssuFct..t.tlplhQh.................................................................................................................................................... 0 353 757 1043 +144 PF00780 CNH CNH domain SMART anon Alignment kindly provided by SMART Family Domain found in NIK1-like kinase, mouse citron and yeast ROM1, ROM2. Unpublished observations. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.79 0.70 -4.98 62 1933 2012-10-05 17:30:42 2003-04-07 12:59:11 17 73 285 0 1105 1799 2 265.10 21 23.92 CHANGED sssh...stspplllG.T-pGlalhsh.............t..ht.sphhph.t......lpQltVlpphslLlhl........u......s....cpLhsasLssLpstp.........t.....................plsts+ssphhs...........sshstsphLssuh+p.....slhlhchhp.shppht......................chhp-..hths................s.shslphh.......csplClGs...........ppsFcllsls.p......sts..tslh.p.sstt..........h....tthpslshhpls.......p.s...chLLCasphuhaVst...pGt......psc....shtlpWss...tPpuhsh.htsalluFps.sh.lElRslpss.....cl...hpplssppl+hlssss ............................................................................................................................................h......t...lllG.s-.p.G...l...hhhph........................................t.sph..p.hhs................hpQltl....l..pt..s........l.Llsl................u...................................s......pplhha.LstLpsp..t..............................................................................................pl.schc.G.sthhp.............hsth....pt....h....p........h...L..s..sAl+p.............plhlhp.h..t.....t...s..htph..............................................................p.hp-...h.p.hs................s.shslshh............p.p...lslGh.....................................sss.Fp..hl.sls..p...............uss.....slh.....sp.p..........................................t..h.....p..s.h...t.lh.pls...............................p..s........chL.l....s....a.....s.....p....huhaVst.............pGc....................hsc......p..lpW.s.t.........h.Pp.u...h..............s..h...t.........t......s.......lhu.a...tp....pu...l-l+.s...lpss......pl........hppht.h.pphphh....s............................................................................... 1 320 476 785 +145 PF00027 cNMP_binding Cyclic nucleotide-binding domain Sonnhammer ELL, Finn RD anon Prosite Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.62 0.72 -4.17 342 26013 2009-01-15 18:05:59 2003-04-07 12:59:11 24 502 4562 267 10760 22035 3366 90.20 18 23.38 CHANGED hpphptGphlhppG-....sstlallhpGplclhptspssp............................hthhtsGshhGchshhtsp.........tsssshA.....hsssplhtlsppphpplhpppsp .......................................................h...httGph.l.h..p...p.......G.-...........ss..p...h.......ah..l.h.p..G.......p......l..c......l...h.t.t...s...p..s...s.pt......................................................................................................hht.hh.t..s....G....s.....h..h..G...c......h....u..l..hpsp..........................psss.spA.............hs.s..s.p.l.h.tl.s.t.p...phpp..lhtp...t......................................................... 0 4474 6939 9042 +146 PF02629 CoA_binding DUF184; CoA binding domain Mian N, Bateman A, Griffiths-Jones SR anon COG1832 Domain This domain has a Rossmann fold and is found in a number of proteins including succinyl CoA synthetases, malate and ATP-citrate ligases. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.99 0.72 -3.47 70 6798 2012-10-10 17:06:42 2003-04-07 12:59:11 14 37 4404 64 1756 6726 3805 96.70 32 28.56 CHANGED scpsclhlhGhosp..phs.aphppthphshp.hlhulsPp.csG.pplt.........GlPlatslc-hhcch......t..sssulIhV....PushAtcsl.Etlc.Asl+slVslo.G .......................................cpo+Vll.G..ho...G....p..pth..a...+.s....p...p...t....h....s......h.......G.......sp....hlh..G...l.sPt...+....s....G....p.p..ht..........................................GlP.V...a...s..s..l.c...-..shpph....s......ss.sulIhV......P...s...sh...s...t...c...ulhE.ul.....-..AslchllslTtG.............................................. 0 610 1119 1487 +147 PF01144 CoA_trans Coenzyme A transferase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.19 0.70 -5.16 43 6881 2012-10-04 00:26:15 2003-04-07 12:59:11 18 14 2405 102 2146 5554 1112 208.60 24 73.96 CHANGED hpphspAlu.plcDGthlhlG....hG.hP.sLlsslhcps.....hsshshlpsss..Gh....hGluslhhstplccsl....ssh.hsp.......shhhspphhsuph.phchhsQGshh-thhhGGstls..u.lsssslG.shhttt......................phhshs..........Gtthllptuhpsslshl+ttptDthuplhacsostphs.shhs...........sthlT.hpV.cls...hsph.s.phhhPGlhl-pllts ..................................................................................h..phtptlu.tl....pDGthl.s.....l...G........hG..h.........P......sLl....s.s.l.h..pp.......................psh.shl.psps.....Gh...............hG.l.G...h.......h...p.p..t..p....l..cphl..........ssh..ssp.............shhhspt.hhs...u...p...h....ph.t.hspGs...hs-hhh.hGuht.....ls..G.lss.h.u...l...s...s.htst................................................................................pl.h..s..hs.................G..t..h...h.L.h..t.t....u..h.p.h.s.l.sl......h..c.t.pp.s....Dt.....pu.....plhh....c....psh..h..s......sshs.......................shlT.lpV.c..ls........h.s...p...h.......s....p...hhPGlhV-plh..t.................................................. 0 620 1257 1756 +148 PF02514 CobN-Mg_chel cobN-Mg_chel; CobN/Magnesium Chelatase Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_647 (release 5.4) Family This family contains a domain common to the cobN protein and to magnesium protoporphyrin chelatase. CobN is implicated in the conversion of hydrogenobyrinic acid a,c-diamide to cobyrinic acid [1]. Magnesium protoporphyrin chelatase is involved in chlorophyll biosynthesis [2]. 18.40 18.40 18.50 18.50 18.20 18.30 hmmbuild -o /dev/null --hand HMM SEED 1098 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.88 0.70 -13.70 0.70 -7.17 129 1875 2009-01-15 18:05:59 2003-04-07 12:59:11 11 21 1069 0 690 1853 1652 877.50 30 85.15 CHANGED lhpYhttGG......tcNhpshlphLspph..........thts..........tsPhthPp.h.GlY+.st.........thhtt.t.h.t.....................t..tssVGllh.aRuhhhuushshhDul..lcsLEpcG.hsslslassu.........tstst..ltphhht...........tlD.sllshsuFulsssss.t............uhphL.ppLsVPllpslhh..hpohcpWpsuspGLs..sh-luhpVslPElDGtltsh.slut+pt........s...t..h.....tthtPhs-Rlchlss+stpalpL+cpsss-K+lAlllaNaP.sp..pupl.GsAs...hLDs..sSlhplLptLpppGYsl..........s...lPps.scpLhctlh.tthsss..h.st.t.h.p......hh.hshpcYhpaasp...Ls.thppclpppWGssPG..........t....hlh....t...............hhlsGlphG.NlhlulQPsRG.....hpt..Ds.h...phhHssslsPsHpYlAaYhWLcct.....apADAllHhGsHGoLEaLPGKplGLSssC.aP-hllGslPplYsYhlNsPuEGs...AKRRutAsllsHLoPPlspAuL..YssLtcLcphlccYppst....t.sst..ctptl...tppIhphsp.phsLsp.-...........................................................................................................................shtphlsp..lcs...aLp-lcps.IspGLHlhG..psPp.......s-ph...sphlhshhphs............sh.t..h.....................................thtttphhphlpthstth......t..................................................................................................................................................................................................................................................................................................................................................................................h..hh.thh.tl.....ssspElsuLLpALsGcalsPGPuGsPsRs..-lLPTGRNhauhDPptlPopsAaphGpchA-pLLc.cahp-p..Gc..aPcslulsLWGossh+otG-slAQhLtLlGV+P.....laD.ssGRVsslElIPLs-L.........GRPRIDVslplSGlFRDhFPphlpLlDcAlphlAs..................h.D.Ess-.Nhl+ppsht..............t.t.s.....p.uthRlFussPGs..YGu.G.lsthl-uu.sWcscs-Lu-sYlstpuaAYG.......t.s...s........tu...........p...pspc................shcptLpsl-sshpsp-opE...hslhDsDcYapahGGlstAlcpls..Gp.pPshYhuDps...pssps+l+oLp-pls+.hRoRlLNP+WlcGMhcHGYcGAtElutpl-thaGasATs.stVscahacplscsalhDcp...hpcalpp.tNPhAhpshspRLLEAtpRGhWp.sssphlc ..................................................................................h...ah..Gu.......tNhtthh..hh.t.h......................................s..hs..t......u........l..ht..........................................................................................................tts.hlullh..+shh.ss.s.hshhp...slhptl......ct.tG.hps.lslas.u...........................t.............l.ph.h.t.....................ss.hlls.hh.t....hs.hss.......................t.thh...tth..s.hP.l.hpsl.......htshttW.t...t...s.....pGls.......sh...phshpls.l.PEhDGtl.sh.shuhpp................t......h.......th.s..cRhthlsthshpahpLpphsstp++lAllhhsaP..s.......t.....psplusu..s....hLDs..t..S...hhtlLpthp..ttGYpl.............t.......h..........P...............t.................s....sptLhp.lh...........s.....s...............................hsht...pY.th..htt...l....thtpth....tt.WG.s.s...............................t......hh..........t..tt.......................hhlsuhphG...NlhlhlQPstG........ht.t....ss...t.hHs...shsP.HtYhAhYhWlpp..............................atu-AllHhGpHGsLEalPGKthulSt.t.C.aP-hhlGslP.lY.ahsssPuEus...AKRRuhA..sllsalsPPhtpAth..Yt.t.ltcL.cpllsca.ph.........sst......ch..l..............tppIhph..ht..thtlpp-h............th...............................................................................................................................................t.thltc..lcthLt-lcp.t........l.tGLHlhGps.s.p.......s.pt.h...hthlhshhp..................sl..............................................................................................................................t....ht.ht......t........................................................................................................................................................................................................................................................................................................................................................................................................................h.....hhtt.h..h.tl...tsstEhtslhpuLpGtal.suPuGsPhRs..plLPTGRNh.auhDsptlPo.hAhphGhthAppllp.pahtc........p..Gp.aPcslulslWGossh+otGpsl.AphLhLhGlcP.....hhs..st............tRl.......s.sh.cl.ls.....tpL......................sRP......R.l..DVslp.lSGhFRDhFs.p.hthlDpAlphsAt...........................................h...-.Es.c.N.ltt+s.t...................................t.s.................ptuthRlFustsGs...Y.Gu.G.lpthl-s......t...tW...........ps...............ct-....Luc.sahshtuaAYu..............t..t................u...........t......ttt.........................hhpttLpphphshps.-spE...hslhsss...caapa.GGhhtsspthp...........G......p...t.......st....h......a..hsDp.s...p...pp..sphcslp-phth.hRs+hlNPcWhpuhhpHGYc.Guh-h.s.t.pls.hhGa...........sAT...s.thl.....s...sahap.tltpsalhD.t...hpphhtp...N.PhAhpphs...t+hlEA.pR......uhWp.ss.p...t............................................................................ 1 220 463 581 +149 PF02492 cobW CobW/HypB/UreG, nucleotide-binding domain Bateman A, Mian N, Bashton M anon Pfam-B_428 (release 4.0) & Pfam-B_1247 (release 5.4) Domain This domain is found in HypB, a hydrogenase expression / formation protein, and UreG a urease accessory protein. Both these proteins contain a P-loop nucleotide binding motif [2,3]. HypB has GTPase activity and is a guanine nucleotide binding protein [3]. It is not known whether UreG binds GTP or some other nucleotide. Both enzymes are involved in nickel binding. HypB can store nickel and is required for nickel dependent hydrogenase expression [1]. UreG is required for functional incorporation of the urease nickel metallocenter.[4] GTP hydrolysis may required by these proteins for nickel incorporation into other nickel proteins [1]. This family of domains also contains P47K (Swiss:P31521), a Pseudomonas chlororaphis protein needed for nitrile hydratase expression, and the cobW gene product (Swiss:P29937), which may be involved in cobalamin biosynthesis in Pseudomonas denitrificans [5]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.41 0.71 -4.83 62 9942 2012-10-05 12:31:07 2003-04-07 12:59:11 14 44 3597 7 2693 11049 3134 177.40 27 56.79 CHANGED sshslsG.lGuGKTTLLpcll.....pp...h+lAVIhN-hs..sshDuphl..........pps..ss...............llplssGshCpo.lptDh.hslpsltcht.......................................phDhllIEssG.lssPssh......................shthDshls....llDs.........................scscchspph.ptlthADllllNKsDLssts.tshcthtpphcplssptpllh ......................................................................................................................hhllsGaL.......G...uGKT....TL...LpclL........................p.pps....h.+lAV...I..h....N.....-.......hu.....t......s.....s...l.D.....u.....p.....l....l.......................................................tps......ss.....................lhp.l.s...s....G......C...h.....C.C....o.....hp.......t....D....h....h.s...l.p.s....l....hcht...............................................................................................................................................................ph.D.....h.l.l.....I....Eso.G....lsp.P.s.sl.......h.....................................h.........h..t..l......c...s......h..l.s.........l.lDs....h...........................................................................................sp.t..p..s...h..s...p...h.h...h..p..Q..l...t..h..A...D.........l.........l.........llNKs.D...L..ss.....t.......p........p..thp...p.th.p.p.lsspt.h..t..................................................................................................................... 0 789 1689 2240 +150 PF00135 COesterase Carboxylesterase family Sonnhammer ELL anon Prosite Domain \N 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 535 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.71 0.70 -5.80 123 8736 2012-10-03 11:45:05 2003-04-07 12:59:11 23 92 1650 371 4719 13851 2441 405.50 22 85.47 CHANGED hhhhhhhhhhhhhhhsstss.........................ltst......GplcGh.hts.tt.....t..h.sFhGIPaAcPPlGpLRFctPpP.....sps....W.pslhcuophsstChQ.sphh...................hpsshs...................SED.CLYLNlasPphspps............................................hPVhVaIaGGuFhh..........Gsu........s.hsusthhhpcsVllVohsYRLGshGFLohsspphs......................GNhGLhDQhhALcWVppNIssFGGDPsplTlFGcSAGuuSVshhhlS..........ts...cuLF++AIh.SGsshssas.hpsss..hphspplApthGCsssssp..................plhcCL+.ptsspcllsspt..hhhthhshhs...............hhP.............sl-us..................hlscpP......tphhppu.hp.plshll.GssppEGhhhhshhhtt.tt.........t.p...................................hhphhtthhh.hspht........tpplhptYhss.pt..st.pph.pthtplhuDhhahts.hhthhtphppsssss.YhYpFsapss........hthh.thtsssHus-ltalFstshhtt.th.........................ppcctt...cphhphassFApsGsP...............s...t.stWsshspp.....t.hthhhtstpthhtpp.......hpaa ..............................................................................................h........................................................h..h.........u.h...G.................................................h...........ahGlPa.A.tsP..h.....t....................................R.a.......t......................P.....................................h.....p...t.......h......p....s.......p.....p..................s........s..........s.Q.................................................................................................sE...D...C...L...h..L........N..l..a..s.P..t......t..t..tt...........................................................hP...V.hla.........l.....a................G.G..u..a....................................Gsu........................h..t.....s.....t...........h.....h.............t......t......s......l......l.....l......Vohs....YR.....l......G........h.......h.............G....F.....h...s....h......s.....t.t.............................uN..h.....G.......l...h........D....Q......h....h.....A.L....p....W..l........p........c.......N........I..t...........t..........F.........G............G...........D....P............p...p..l..T................l....h....G.p...S.A.......Gu........h...u..l..t..h.h...hhs....................s.s...............ps........L...F..........p...p..u........I..h......S...............G..........s.........h...h....................................h.....h..t...................thh..th...s.....p.....h..s.ht....tt...................................................hp.p..L....p...t.h.s......p..p...l..ht...........................................................P...........................................h.l.-.s......................................................hls...p.......................th..h......t.......t.......t.............h.............p...........h.......s.......h..hh...G......s.p....p..Es................h..h.h..........................................................................................................h..........h....................................t.h..h......a........t.................................................................................................................t.h......t...hhs....-..h......h.h.....h....s.........................h............................................h................t...........t.............................t....t..............................ahY.ath.s.................................................................t.s..H.s..-..h....hh...a..t.............................................................t..h.......t.h.thhhs.Fs..t..t..s....ps...........................................a.................................................................................................................................................................................................................................................................................... 2 1694 2505 3925 +151 PF01484 Col_cuticle_N Nematode cuticle collagen N-terminal domain Bateman A, Bashton M anon Pfam-B_200 (release 4.0) Family The function of this domain is unknown. It is found in the N-terminal region of nematode cuticle collagens, see Pfam:PF01391. Cuticle is a tough elastic structure secreted by hypodermal cells and is primarily composed of collagen proteins [2]. 20.80 20.80 20.90 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.38 0.72 -4.28 113 1198 2009-01-15 18:05:59 2003-04-07 12:59:11 12 22 21 0 1054 927 0 51.90 23 16.32 CHANGED hss..usshSslullus.llslshlasplpshpsplps-hpth+spuc-hWs-h .....h..shhusshSslAllss.llslPhlashlpplpsplps-lshh+sp.......u...cshWpch........... 0 359 519 1054 +152 PF01391 Collagen Collagen triple helix repeat (20 copies) Bateman A, Eddy SR anon Swissprot Repeat Members of this family belong to the collagen superfamily [1]. Collagens are generally extracellular structural proteins involved in formation of connective tissue structure. The alignment contains 20 copies of the G-X-Y repeat that forms a triple helix. The first position of the repeat is glycine, the second and third positions can be any residue but are frequently proline and hydroxyproline. Collagens are post translationally modified by proline hydroxylase to form the hydroxyproline residues. Defective hydroxylation is the cause of scurvy. Some members of the collagen superfamily are not involved in connective tissue structure but share the same triple helical structure. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -11.88 0.72 -11.88 0.72 -4.43 41 44425 2009-09-12 04:47:45 2003-04-07 12:59:11 13 899 1492 46 19240 35044 5977 63.30 39 36.42 CHANGED GssG.sGssGssGssGssGssGssGssGssGssG.sGsPGssG.sGssG.sGssGssGts .............................................................................................G........G........s......G................s.....G.....................G...............t......G...........s.....G..............t........G............s......G...............t......G....s....t......G................t.......G............s......G........s.....t...G.........s.....G..........s....G.................G........G..G..h............................................................................................ 0 3964 5810 11551 +153 PF03772 Competence Competence protein Bateman A anon COG0658 Family Members of this family are integral membrane proteins with 6 predicted transmembrane helices. Some members of this family have been shown to be essential for bacterial competence in uptake of extracellular DNA [1,4]. These proteins may transport DNA across the cell membrane. These proteins contain a highly conserved motif in the amino terminal transmembrane region that has two histidines that may form a metal binding site. 20.20 20.20 20.30 20.60 20.10 20.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.75 0.70 -5.34 123 4251 2009-01-15 18:05:59 2003-04-07 12:59:11 11 14 4131 0 907 3604 477 262.70 21 39.09 CHANGED LlhG-+s.tlspphhptappsGlsHLlAISGhHlullss....hhhhlhphh................hssptht..hhhulhhhhh...Y.shlsGhss.sshRAhlMhslhhhuhhh...t+c....hsshssLshuhhllL.lhsPhslhssGF.LSFhAlhullhhh........thhpphhthhthhhh............................sshss...plssh..PlhhhhFtplSh.hullsNLlsl.Pl.huhll....lPhhlhuhlhhhh..............shuthhhh....hh..th......lphlhthh.phhup........hsh.....lhh..stsshhhhhhhhhhlhhhhhhhthh ..............................................lhhGc.p.p.tls.p.c.......h.pphhppsGlsHLhAlSGhHluhhhs.......hhhhl.httl...................................................h.s.ptht........hhhshhhhhhY.s.h.L.sGhss...uslRAhlhhs.lhh.h.h.h.hh........t.p.p..............hs.shp.s...lshshh.h.ll.lhsPhhlhshGFhLShhAshsllhhh............................thh...t.h.hth..h......hh..h..............................................................................hs.hsh.plshh..PlhhhhFtphsh....hu.......hls...Nllhl.Pl..hshlh...lPh...hlhshlh.hhh..........................lsthhhh.................................hh......shh.......hthlhh.h...l....phhsp..............h.........hhh...s.t..sh.hhh.lh.h.hhhhhhh.hh.....h......................................................................... 0 309 604 770 +154 PF05071 NDUFA12 Complex1_17_2kD; NADH ubiquinone oxidoreductase subunit NDUFA12 Bateman A anon COG3761 Family This family contains the 17.2 kD subunit of complex I (NDUFA12) and its homologues. The family also contains a second related eukaryotic protein of unknown function, e.g. Swiss:Q9BV02. 21.30 21.30 22.00 21.50 20.40 20.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -11.06 0.72 -3.57 88 843 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 595 0 467 747 993 102.70 29 67.27 CHANGED GphVGcDchGNpYYcst...........stc+RWVlYs.uhs.......E..ASpVsP-WHGWLHaphDpsPo...pp.shst+sWpc..sHp.NhT.GTstA..YtPtG.....uh.....t......psh.ptD.YcAWsP ..................phVGpDchGNpYYEst...............pthhtp+RWV..Y..s.stt............-........uopl..P......P..pW...........HuWL+at..t-pP.Po........pp...t...h...t.......c...p.a.pp....+.p..Nho...so...tt..Yhs.s...................................................................................................................... 0 150 256 364 +155 PF00329 Complex1_30kDa complex1_30Kd; Respiratory-chain NADH dehydrogenase, 30 Kd subunit Finn RD anon Prosite Family \N 21.10 21.10 21.30 21.70 20.60 21.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.32 0.72 -3.78 131 5889 2009-01-15 18:05:59 2003-04-07 12:59:11 14 20 4153 16 1220 4082 2058 105.60 34 33.03 CHANGED plhphL+pp...thshLsslsusDhhptt........chplsYp.lhsh.....tpspp........ltl+stl.st..cssplsSlsslasuAsahEREsaDhaGlpFpG.HPc......h+.Rllhs.-s...a.s.......aPLRK .........................................................hLh........tashLh.slsuhDhtsts..................caslhYp...Lh.s.h......p.p.s.pc....................lpl.K.l.....hs..st...........ps............s......p.......lP...Sls..la..uAsWhEREsaDMaGIhasG.HP.c........L+.RIl...h.s..ps...W.G.......aPLRK.......................... 0 403 782 1023 +156 PF02950 Conotoxin Conotoxin Bateman A anon Pfam-B_529 (release 6.4) Domain Conotoxins are small snail toxins that block ion channels. 25.10 25.10 25.20 25.10 25.00 24.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.34 0.72 -3.18 112 926 2012-10-01 22:06:18 2003-04-07 12:59:11 12 1 54 27 2 953 0 70.10 26 96.65 CHANGED hKLshlhllslLLlo.......ss..stsspt.sp.......p.tpthpphpssp..pphh...ptp..h.....................hsppCCsh.....hC........hCh ........................KLss.lLhlslLLhs.....hsshthsu-pssc....................p.scp.t.p.chpssc....p.p.th...hh..........................htppCCsh......hCt.......h.sC...................... 0 0 0 1 +157 PF05019 Coq4 Coenzyme Q (ubiquinone) biosynthesis protein Coq4 Wood V anon Pfam-B_14948 (release 7.6) Family Coq4p was shown to peripherally associate with the matrix face of the mitochondrial inner membrane. The putative mitochondrial- targeting sequence present at the amino-terminus of the polypeptide efficiently imported it to mitochondria. The function of Coq4p is unknown, although its presence is required to maintain a steady-state level of Coq7p, another component of the Q biosynthetic pathway [1]. The overall structure of Coq4 is alpha helical and shows resemblance to haemoglobin/myoglobin (information from TOPSAN). 20.50 20.50 20.50 20.80 20.00 20.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.36 0.70 -5.44 27 475 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 363 8 319 505 118 198.00 31 79.73 CHANGED YsuHlPL....sshp+hhLssGSulsulhcPcR.uDhIAsLGEsTuh.hhLppL+cpMhsDspGRpIL+-+PRIoopoLshstLpphPcsThGtsYspaLccpsVoPDoRssV+alDD.EhAYVhpRYREsHDFhHslhuhPhsh.GElAlKhhEhhNhGLPMshLuulhushRL+spp+pphhphYlPWAl+sGhpu+.......sLlNVYWEchhEpDls-LRcELGIp.sPc ............................................................................tph.hhhhuuhhu.hh.pP.c.tp.....l.shhuE.so.uh...hl..phh.ppMhpsspGpplLp-+.Pclssps...l...........s..hstL......ps.L..Pps...olG........tsYhpaLcp..............ps.lo........P...D........s.........R.s..s......l........c.......a.l....-.....-..........-......h.....A..Ylh..pRaR-sHD.haHslh.G.hs.h.s.h.....GElulKhhE...hh........p...h.......t..l......P.......h...sh...L....s.....ul...h.u...s.h.......c.........L............p.....t...p..t.p...........t........h........hp...........h.h..l......P...W....A..hcs....G....hp.u.c..............lhslhaEc.hh-pslpclRpcLsIp............................................................................................ 1 90 172 272 +158 PF03471 CorC_HlyC Transporter associated domain Bateman A anon Bateman A Domain This small domain is found in a family of proteins with the Pfam:PF01595 domain and two CBS domains with this domain found at the C-terminus of the proteins, the domain is also found at the C terminus of some Na+/H+ antiporters. This domain is also found in CorC that is involved in Magnesium and cobalt efflux. The function of this domain is uncertain but might be involved in modulating transport of ion substrates. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.46 0.72 -4.20 174 11377 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 4360 35 2334 7354 2259 80.60 23 18.84 CHANGED pphssssallsGphslc-lpchhsl.p....lsp.c..c.....hc.TluGhlhpplsclPp..G-......ph........p...s...............hp.hpVhchcsp+..Ipplclphhp ....................................p..hscssallcGpss.lc-...l.sch.h....sl..c.........ls-..-............-..............hc..TluGhl...hp..tl...s..c.l.Pp...hG-.......pl................p....s..................................hp..hplh....ch.-.sp+....lpplplph..t................................ 0 682 1425 1903 +159 PF02389 Cornifin Cornifin (SPRR) family Bateman A anon Pfam-B_1215 (release 5.2) Family SPRR genes (formerly SPR) encode a novel class of polypeptides (small proline rich proteins) that are strongly induced during differentiation of human epidermal keratinocytes in vitro and in vivo. The most characteristic feature of the SPRR gene family resides in the structure of the central segments of the encoded polypeptides that are built up from tandemly repeated units of either eight (SPRR1 and SPRR3) or nine (SPRR2) amino acids with the general consensus XKXPEPXX where X is any amino acid [1]. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.63 0.71 -12.31 0.71 -4.54 5 373 2009-09-13 06:27:35 2003-04-07 12:59:11 10 30 87 0 232 424 33 118.10 16 44.10 CHANGED cQHQVKQPCQPPPQEsFVPp.TKEPCHocVPpPGNT........................KlP-sGsThV.EsshT........................KVPEPspTKVPEPCpoKVPEPspTKVPEPCsTKVPEPsYPKVPEPupsKVPE.G..................................PsHsKsPEPGasKVPEPGhPKVPEPCQo+VPEPCPSTVTPusAQQKTK ................................................................................................................................................................................................................t........th.tst.....st.hs.t....t.....th.p....st..ts..ths..t.t.....th.t..st........th.s..t....t....th.....t..................................................................................................................t................................................................. 0 106 118 167 +160 PF02628 COX15-CtaA Cytochrome oxidase assembly protein Mian N, Bateman A anon COG1612 Family This is a family of integral membrane proteins. CtaA is required for cytochrome aa3 oxidase assembly in Bacillus subtilis [1]. COX15 is required for cytochrome c oxidase assembly in yeast (Swiss:P40086). 21.50 21.50 21.50 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.24 0.70 -5.55 146 2337 2012-10-03 10:28:09 2003-04-07 12:59:11 10 13 2048 0 837 1935 3330 287.80 24 85.08 CHANGED thhhhhshhhshsllllGuhTRLTsSGLuCs-.WPsChGt..hsPhsptp.............................hth....ahEahHRhhuthlGllhlshhlhsh.............h.h.h...tthh.hh.shhlhhLlshQuhlGhhhVp.s....l...............p.hh.lssHLhhuhhlhuhlhhhshphhth............................hsh..tthphhshh.sls.llhlplhhGu..hluu.pAuh.ss...................st.h..h..................t.h...........................phhHRhhAhlshhhhlhlhhhhhhtt.........tthpphuhhh..hsllhh.QlslGlsslhh.tlPl.h...lulhHphsAslLlsshl .........................................t.hhhhhsshhhhslllhGuhs..RlTsSGLuCss.W.PhCt.Gt...hsPhstt.t.................................................................hphhhEasHRhhuthlGl.hhlhhslhsh..............................................tt.tth......tpphh...hh.shhhhsh.lshQ...............uhl..G...hhs..VthsL.....................................................................sshh...lu...h...HLh.hu..hl.h...hus.hh.h.hshtlhp.....................................tp.hh..tth+..h.h..s....hh.hhh..hlhlt.l...h....h.Gu....hV..uu....p...A.uh.sh...............................t..h.h...........................h.....hp..thl..........................................phsH.RhhAhh.shshlhhhhhhshpth..........................tthtthshhh..hhllhl..Qs.hlG..lhslh.........h.....tls...........l...h............lu.hhHthhushLhshh.h................................................................................................... 0 262 541 716 +161 PF02936 COX4 Cytochrome c oxidase subunit IV Bateman A anon PSI-blast P00423 Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit IV. The Dictyostelium member of this family is called COX VI Swiss:P26310. The yeast protein Swiss:P53077 appears to be the yeast COX IV subunit. 23.50 23.50 23.60 24.00 23.30 23.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.77 0.71 -4.45 10 463 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 283 51 246 436 0 129.20 29 76.99 CHANGED chVuptshupPshsDl.-pWtshsp..pEpssllssLR-+pKssWpsLSh-EKKAlYhISFspphsc.httssGEhKhlhusslhslulohslhslh+hhshPphP+Ths+EWQcussEhhhspchNPlsG.huptYchcsph.p .......................t.t..u.shhhshp.ch..s...........p.-...ss.hpsL+-+pKssWppLohpEKpAhYhluFspphsc....hpt.sssEhp...plhs.ss..hhhluhohhlhhh..+ha......s.h..s...P.pThsc.E.Wpttp.chhh...s.hphNPlpG..uttashpt..................................................... 0 62 111 185 +162 PF03626 COX4_pro Prokaryotic Cytochrome C oxidase subunit IV Finn RD anon Pfam-B_3217 (release 7.0) Family Cytochrome c oxidase (COX) is a multi-subunit enzyme complex that catalyses the final step of electron transfer through the respiratory chain on the mitochondrial inner membrane. This family is composed of cytochrome c oxidase subunit 4 from prokaryotes. 25.10 25.10 25.20 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.75 0.72 -3.89 250 2280 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1772 0 517 1230 725 81.10 30 74.43 CHANGED ahhsallhllLTs..lshhls...........................hts.......hh.s.hshshsll..hh.lAhlpshlhlhaFMHlpt.spp.........hhphllhshllhsslhlhh ........hhGFlLullLTl.lsFhhs...........................hts.......sh...o..ssh..lshl...ls.hAllQshlpLlhFhHhsp.psctt.p....hhthlFsll..l..lsh.lllG.olWlh................ 0 131 290 402 +163 PF02937 COX6C Cytochrome c oxidase subunit VIc Bateman A anon PSI-blast P04038 Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIc. 21.40 21.40 21.40 22.60 20.50 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.37 0.72 -4.09 8 161 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 101 51 89 163 0 69.70 43 80.44 CHANGED ushLsKPQMRGLLu+RL+hHlVsAFlluLGVsAhaKFusA-PRK+AYADFY+NYDAMK-FEcM+cAGlFQSVK ..........s...lsKPt.MRGLLs.+pl+hclssAhslo.lssusha......KasVs-.PRK+AYADFY+NYDuhK-FEcM..+c..AGlFpSs.............. 0 23 28 53 +164 PF02935 COX7C Cytochrome c oxidase subunit VIIc Bateman A anon PSI-blast P00430 Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIIc. The yeast member of this family is called COX VIII Swiss:P04039. 25.00 25.00 25.10 25.00 24.70 24.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.21 0.72 -4.17 14 253 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 202 51 155 269 0 68.00 35 72.16 CHANGED Mlupp.....sRRssspulRt.+...........atpGPsp.........NlPFpVpN.KahlhshhhsFhusGFusPFlllcaQLhKp .............................tpt.....sR.p.h.ss..s...s.h..Rt.p.................asEGPtp............NLPF....s..l.....ps..K.ahhh..hhahsahusGF.usPFh.ls.haQhhK........... 0 42 76 124 +165 PF04516 CP2 CP2 transcription factor Mifsud W anon Pfam-B_2156 (release 7.5) Family This family represents a conserved region in the CP2 transcription factor family. 25.00 25.00 28.30 28.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.70 0.70 -5.31 29 661 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 171 0 367 552 0 208.40 37 38.82 CHANGED oh..p.p.h....h.shhs.tspphshst............tpphcFphsLpAPTAhhp+tsE.PlTYLNKGQuYslolh-s..ss.sshpssp...hR.ohlRlsFc--cpppc.tppWphW+.......u+Q+suK.psl-hsDhpp.......shsplp.lEpsua..suhsVhWssst.s...EssIhlphNhLSTDFSppK..GVKGlPlRlphcT.hhss..........tssssctschsaCpVKlF+D+GAERKlpsDht+lpKphtKh+ ......................................................tttpt.......................................ttspt.sFpYsLpAspS.t.K.t-tshTYLNp...G..Q.Y...t.l.plh-s..tt..s.t.htthp...........V+.SllhVVFc-c+.p...p.phpphchW+........pp.s....pRllDl.D.h..........Shshl.p.......hp.psu...NulpFhWsssc......cspl.FI..pVp.C.lST-..FospK..G...KGlPh....plQIDTap.sp................................p.sc.lHpA.CQIKVF.p.sKGA-RK.h+s-ccKhpK+psp..t.......................... 0 66 102 219 +166 PF00118 Cpn60_TCP1 cpn60_TCP1; TCP-1/cpn60 chaperonin family Sonnhammer ELL, Finn RD anon Prosite Family This family includes members from the HSP60 chaperone family and the TCP-1 (T-complex protein) family. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 485 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.32 0.70 -5.88 94 23232 2009-01-15 18:05:59 2003-04-07 12:59:11 19 54 11481 758 4711 17190 6810 316.10 36 86.46 CHANGED lu......chlpooLGPpGh......sKhlhp..tstphhlTsDGsoIlcclcl.pcPs....AcllhpsupspscpsGDGTTosllLsspllppu.phl...pt.GlcPpplhcGhchAhptslchL......pph..shpsp..............pplhplupsuhsu+..........phlupllscAl...h.............................hclsp....ltlhphpsuph..pcoplh..cGlh.l..c+shhss.......ph.........pph...css.plllhstslph..ps.........t..hts.tth.th.t.p.t.lhphlcplhcts.hsl.................l......lspcsl.....ss.uhphLspsslhulpcl............ccpplccluhhoGup.hlsp..............tslp.s.s.......LGpsppl....plsp..cphsh.lp................................................s...................hsspssolllpGsopthlcEhccslcDAlsss+sslc..tst..llsGGGss.hpluptL.........chsps............hssc.........pph....ulchhupALcths+tLApNuGh.ss..........hpllsp.lps..............tpsp..............thGlshtssp......hh.D.hhptG......llDshpVpppslptAspsAs.hlLplDplltstp ..........................................................................................................................................................................................................................................................................................................................................................................To...........AT..VL......A..p..u.l.lp....E.G.h+.sV....................sA...Gh.......NPh.....s..l.....+.R..G..I-.+.A.l......tssl..cpL....................cph.u.h...spsp................................ctIA..pV.usl.SAssc......................................p.p.lG..p..l...IA-AM..................................................................cKVGp........-GVITl.E..-.ups..................-...p..Lph........sEG.Mp.F......D+.GYlSsYh.............................pph................t.htl.........-sP.aILlh...-.c........K.lSsh..pp....................................................................................l..l..sh.L.-.tlh....p.tu..ps.L.............................l.........Il.u.-.......c.l........-s-AL.s.sL.....l.h.N.....p.h.p..uhhp........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1615 2762 3850 +167 PF02787 CPSase_L_D3 Carbamoyl-phosphate synthetase large chain, oligomerisation domain Griffiths-Jones SR anon ref [1] Domain Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. 25.30 25.30 25.30 25.80 25.10 25.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.40 0.71 -4.35 95 8742 2009-01-15 18:05:59 2003-04-07 12:59:11 14 76 7559 40 1436 7541 2587 120.70 43 14.93 CHANGED lhp.p...LppPsspRlahltcAh.+pGhol-claclT.pID.aFLpplcpllphE.cplpp.....hth.sp.......hLpcsKphGFSDppIA....pl.......h..st........scpplRph.RpchslhPsaKhVDTCAuEFtu.pTPYaYSTY ......................................pcLpcPsDcRhFhlAs.......Al......+t....G..a..o........l-cla...-LT.KIDhW.FLpKhcpIlchp...pp......Lcph...................pslsh-...............hLppAKphGFSD+pIA......th.....................................h......ps...........................oEhs.VRch.R.c..c.h..sI.pP...hhKplDTsAAEasu...sTsYhY.TY................................ 0 471 910 1212 +168 PF04969 CS CS domain Finn RD, Fenech M, Eberhardt R anon Pfam-B_1217 (release 7.0) Domain The CS and CHORD (Pfam:PF04968) are fused into a single polypeptide chain in metazoans but are found in separate proteins in plants; this is thought to be indicative of an interaction between CS and CHORD [1]. It has been suggested that the CS domain is a binding module for HSP90, implying that CS domain-containing proteins are involved in recruiting heat shock proteins to multiprotein assemblies [2]. Two CS domains are found at the C-terminus of Ubiquitin carboxyl-terminal hydrolase 19 (USP19) (Swiss:O94966), these domains may play a role in the interaction of USP19 with cellular inhibitor of apoptosis 2 [3]. 21.30 10.00 21.30 10.10 21.20 9.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.72 0.72 -3.43 100 2605 2012-10-02 21:54:05 2003-04-07 12:59:11 11 115 372 25 1627 2732 72 77.20 20 23.00 CHANGED spasWtQo.spVtlsl.l.sss....tcslplphpp.pp...lplt.............lp.st...p.hhhpsc.LhspIss-cSpaplpss........plplpLpK ...........................................phpWtQ...ots.p.......V.h.....lpl...l.sst..............tc.s..l......p.....V...phpp...p.p.......lplp..................................................hp.ttt.......pthhh....p..h......c.....L..h..pt.Ip..s.....-..pS...p..a...plpss.........plp.lpLpK....................................................... 0 581 857 1254 +169 PF00988 CPSase_sm_chain Carbamoyl-phosphate synthase small chain, CPSase domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_345 (release 3.0) Domain The carbamoyl-phosphate synthase domain is in the amino terminus of protein. Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines [1]. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00289. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117. 21.10 21.10 22.80 22.30 20.70 19.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.42 0.71 -4.64 134 6372 2009-01-15 18:05:59 2003-04-07 12:59:11 17 41 5635 40 1365 5095 2458 123.10 45 25.61 CHANGED hpAhLlLEDGolapGp.uhGu...p..u..pshGElVFNTuMTGYQEllTDPSYsGQIlshTaPhIGNhGlNtpD...........hE..S......p.....pstspGlll+-hspt..sSsa+up.poLspaLpppslsGIsGlDTRuLs++lRp.pGshpuhIs ...................................thLhLtDG.s.hapG..u.hG.u..............p......t.............t.sh..GEl............VFsTuMTG.YQEhlTDP...SYp..sQIlshTY..Ph...IGNhGl.ss-.D..........................................hE..S............p..........plpssGllVp-h.s.pp....sSNaR.sp......poLscaLcpcslsGIu.GIDTRtLT+hl.R-.p.G.shpGtI.h.................................................................................................. 0 437 856 1143 +170 PF03178 CPSF_A CPSF A subunit region Mifsud W anon Pfam-B_1224 (release 6.5) Family This family includes a region that lies towards the C-terminus of the cleavage and polyadenylation specificity factor (CPSF) A (160 kDa) subunit. CPSF is involved in mRNA polyadenylation and binds the AAUAAA conserved sequence in pre-mRNA. CPSF has also been found to be necessary for splicing of single-intron pre-mRNAs [1]. The function of the aligned region is unknown but may be involved in RNA/DNA binding. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.87 0.70 -5.34 80 1132 2012-10-05 17:30:42 2003-04-07 12:59:11 10 23 315 30 802 1130 24 296.80 22 26.18 CHANGED puslpllss........hp.........sl......s..shpL..p.p.sEtshulppsphp.spt....................t.cp....hllVGTuhshspc..sp............Gplh.lacltp...................cLchltcpcl.cGs...ssAlsth..pG+.llsuhG.pplhlaclsccp.....Llthu.hptsh...hlsslps.......h.......ss....hllluDhh.cSlthl...tapp.........-sp.hhhhucDhp..sh.lsssphL.Dt.c.ollsuDptsNlallch...s.cssps.ssp...........................+L.ppupFalGchsssh..hsll.....t.p.........................llauohpGolG..hllP.lsccshchhppLppplpsph..............slsGhs.pta.....Ruhh................s+sllDG-Llcp.......ahp ..............................................................................................................................................l.lhs..p...hp......hl...p..thph...t...t...tEhsh.......shh.s.p...http..........................................pthhhVGTuhsh..s..p.tsp........................Gtlh.laph.p..............................................................cL.phl..tcp.ph....c.ss............shulsth........pG....+.llsul....u....pp......lh...laph..spcp..............................Lh..ths.h.tph.........hlhtlpsh...................................ts.....hll.lu..Dlh.c.S.lhhl......tapt................-sp.....l..........hu.c..Dht.........s..t..h..ss...s..s..phl..Dt....c..sh.......lsuDp...................sNlhlhph..........s..s.psspp.ptp...................................................................................................+Lthhsp..aal..G....ch.ls.sh...p....ss....lh.......tpps..........................................................................................llau.T..l.p.GslG..hl...hs..l...scp..p........hchh.tl..ptp.lt..p.....h........................................................s..h...s.Gh.s..h...ta.............Rs..hh.s.......................................scshlDG-Lhppa.................................................................................................................................. 1 286 462 673 +171 PF00313 CSD 'Cold-shock' DNA-binding domain Finn RD anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.10 0.72 -4.19 40 14886 2012-10-03 20:18:02 2003-04-07 12:59:11 17 69 3908 61 3740 7958 3141 65.20 46 67.33 CHANGED hpGsVKWFNscKGFGFIs.--Gst.DVFVHaSuIpssG.....a+oLpEGpcVpF-lpp..Gs+GspAsNVpsh ..............................pGpV..K.WF...Ns.p..K....G..F..GFIs.......s.......-..........s......G..........s..........t.....D..VFVH..a........SA..I.p.s.sG..........a.+oL......p..E.G.Q..c.............V.pF-..lpp.........Gp+G..s...p.A.sNVh..h............................. 0 1082 2120 2935 +172 PF04442 CtaG_Cox11 Cytochrome c oxidase assembly protein CtaG/Cox11 Kerrison ND anon DOMO:DM04116; Family Cytochrome c oxidase assembly protein is essential for the assembly of functional cytochrome oxidase protein.\ In eukaryotes it is an integral protein of the mitochondrial inner membrane. Cox11 is essential for the insertion of Cu(I) ions to form the CuB site. This is essential for the stability of other structures in subunit I, for example haems a and a3, and the magnesium/manganese centre. Cox11 is probably only required in sub-stoichiometric amounts relative to the structural units [1]. The C terminal region of the protein is known to form a dimer. Each monomer coordinates one Cu(I) ion via three conserved cysteine residues (111, 208 and 210) in Saccharomyces cerevisiae (Swiss:P19516). Met 224 is also thought to play a role in copper transfer or stabilising the copper site [2]. 25.00 25.00 26.00 27.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.05 0.71 -4.42 135 1018 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 946 2 456 917 1462 143.90 41 68.61 CHANGED hVP.LYclFCc.sTGhsGpTp..............t.ssstph...........ts..RplpVcFsAssssshPWcFcPpppplpV+PGEsshshYpApN.occslsGpAs.sVsPspAutYFsKlcCFCFspQsLpsGEph-MPVhFalDP-lscD.....lcslTLSYTFFcs .....................hVPLYchhCp...sTGhsGpstp.............pttssp.............hsts.....Rp...l...pVpFsAssssshPWcFpPpppplp...............VpPGEsshshYpApN.o.scslhGpAshsV..sPtpAut.........YFsKl.pCFCFpcQpLpsGE.ph-MPVhFalDP-hspD.....lcslTLSYTFFc.......... 0 129 252 360 +173 PF01148 CTP_transf_1 Cytidylyltrans; Cytidylyltransferase family Finn RD, Bateman A anon Pfam-B_921 (release 3.0) Family The members of this family are integral membrane protein cytidylyltransferases. The family includes phosphatidate cytidylyltransferase EC:2.7.7.41 as well as Sec59 from yeast. Sec59 is a dolichol kinase EC:2.7.1.108. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.96 0.70 -4.73 48 6283 2012-10-02 14:06:56 2003-04-07 12:59:11 15 17 4727 0 1821 4702 2431 255.40 24 86.77 CHANGED htRhlsshhhlslhhhhlhhut..........hhhhhllhhlshhshhEhhphhthphhthhhhhs......................h.hhhhhhshhhhhh..h..hhthhhhhh.hhhhlhh.....................htthhhhhh.hhhhshhh.............................h.hht..htGhhhhlhshhllhssDshAYhhG+taG+p....hhPplSPsKTlEGhlGGhlsullhuhlhhhhht..........................................................hshhthlhlullsulhuhhGDlhcShhKRphtlKD.GphlPGHGGlhDRhDuhlhsuslhahhhhhh ....................................................................................................................................................................................Rhhsu.h.lhls.hhl.h.h.l.hhss...................hhhh.h.hh.h...h.l....s......h...luhh.E.hh...p..h.h.....t.h.p.t...h...p.h........h..hhh........................................................h..hh....h..h...h..h.....h...h....h.....h.............h...h.....................h........h...........h.....h.........h.....h......h.......h.....h....h....h..h.....h.h...h.lhhhh..........................................................t..ht.p..h..h...h...s..l..h...s..h..h..h.h.shhh.....................................................................................hh.h.h..t...s...h.........t....G..h......h...h....l.....l..h..h...h...h.l.lh..s....sDsuAYh.....h.....G.....+.....h......a.....G......+......+....Kl..............hP........p.........l.......S....Ps.K...ThEGhlGG.ll..s.uh...ll..u..h..h..h..s.h.h..hs...............................................................................................................................................................................................hs.hht...h.l.l.h..u.ll..h.u.l...h.u...hGDLh.EShhKRphG...l.K.........DS...G...p...llP.G.HG.....G....lLDRhDS..h........hhssPlhhhhh...h............................................................................... 1 615 1145 1531 +174 PF04145 Ctr Ctr copper transporter family Bateman A anon Pfam-B_3006 (release 7.3) Family The redox active metal copper is an essential cofactor in critical biological processes such as respiration, iron transport, oxidative stress protection, hormone production, and pigmentation. A widely conserved family of high-affinity copper transport proteins (Ctr proteins) mediates copper uptake at the plasma membrane. A series of clustered methionine residues in the hydrophilic extracellular domain, and an MXXXM motif in the second transmembrane domain, are important for copper uptake. These methionine probably coordinate copper during the process of metal transport. 22.10 22.10 22.20 22.20 21.80 22.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.75 0.71 -4.14 116 1128 2009-01-15 18:05:59 2003-04-07 12:59:11 10 21 303 3 804 1068 7 123.20 22 63.53 CHANGED MsMh....Fphsh.ssslLFpsWpss..otutahsohlhlhlluhhhchLchhppphcpthh......tp........thtt......t...............................................................................thphhtclhp.....uhLah......lphsluYh..LM.....LlsMoaNshlhlulllGh....slGhhhF ...................................................................................h.Mh.....F.hsh.p....hsl..laptWphp...............o..s..thhhuhlslhhluhhhEhLphhpt.ht..t.t.hh........tt.........................................................................................................................................................................................................................................................h..h.chhp.....shlah......lphsluYh..lM........LhhMoaNshlhlullhGhhlGhhh................................................................................................................... 1 242 446 686 +175 PF00394 Cu-oxidase Multicopper oxidase Finn RD, Griffiths-Jones SR anon Prosite Domain Many of the proteins in this family contain multiple similar copies of this plastocyanin-like domain. 20.30 19.90 20.30 19.90 20.20 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.94 0.71 -4.33 72 9871 2012-10-02 17:41:00 2003-04-07 12:59:11 17 57 2516 325 2092 9906 260 140.70 23 37.09 CHANGED cphslhluDWY.cpsspsh...h.ts............s..sDuhlINGpst...s....................................hhslsVpsGKpYRlRll.ssuspsshsFpIsuH.phollEs......DGsasp..Phss-slpIhsGQpaulllsA...sp.s...sssYaltst........h..s....htsssssulLcYpsu ........................................................................................................................t..hhlh.pDaa...pstps....h.p...h.t..tt.s...th...............................thhssp..h.lh..N.G.............................................................................................................................sss...h.s.s.p...sh..p...s..t.h.s...............p.l.h...ll..s..t.u...s.p...c.s....h....s......a.....hI..........s.........G........H..............h.l..ltu..........c.....G....p.a.ss..............s.h..s...l....-.....s.....h..h........l.t.s.G.............pthssl...hsh......pp............ss.Y....hhhs..........................................s.................................................... 0 504 1249 1757 +176 PF03712 Cu2_monoox_C Copper type II ascorbate-dependent monooxygenase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of members of this family adopt the same PNGase F-like fold. 20.50 20.50 20.60 20.90 20.20 20.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.01 0.71 -4.88 25 590 2009-01-15 18:05:59 2003-04-07 12:59:11 10 37 157 18 376 631 214 149.50 26 25.41 CHANGED pa.AGlhhLhsshh....sIPPspssash-ssC.hpp.scshaP......FAhpsHsHthG+hlsuhphRssp.....hphlu+psshsP..QtFYslc..phlcVpsGDtLsscCoY-op..sp.........spssthGtospDEMCshYlhYYs.....-ssphpshtsChustssp...hhpshssts ...............................................AGhh.hsh........hIPPsppsh...p.shCphpp..sp.hhs...............FAhhhH.....sHhhG+..tlps..h.phR.ssp.......................phphlscss.......sas......thQt...h.h.....lp.......p.lsl.hs...........GDhL.....hscCsasop....s+.............................spsThhGh..ssp-EMChhalhY.Ys......t...p.h..h....t..t................h.................................... 0 189 216 300 +177 PF01082 Cu2_monooxygen Copper type II ascorbate-dependent monooxygenase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of members of this family adopt the same PNGase F-like fold. 21.10 21.10 21.60 21.50 20.10 20.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.20 0.71 -4.20 38 534 2009-01-15 18:05:59 2003-04-07 12:59:11 15 24 136 18 344 574 28 124.70 27 21.58 CHANGED spph-lthpsl..hPsp.sssYaCphh+lss....p+aaIlpaEPlhst......shlHHMlLatCs......pshsp..................sstC.....h.tphthC.......spllhAWAhGutshta.PccVGhslGtst.s.s+allLEVHYss..sshpsups .............................h...h-lhh.sl...hPpp..pssYhChhh...cls......pcpal..l.p.a.-Phhs................shVHHhll......at...Cs........psh.sph......................st.C...........hs.tph.t..sC................ppll..hAWAh.Gut....s..hph..Pc..csGhslGsss...sspYlhLplHYss..sth....th.......................... 0 156 180 269 +178 PF02845 CUE CUE domain SMART anon Alignment kindly provided by SMART Family CUE domains have been shown to bind ubiquitin [3-4]. It has been suggested that CUE domains are related to Pfam:PF00627 [4] and this has been confirmed by the structure of the domain [5]. CUE domains also occur in two protein of the IL-1 signal transduction pathway, tollip and TAB2 [2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -7.57 0.72 -4.44 132 1581 2012-10-01 23:03:33 2003-04-07 12:59:11 11 43 284 10 1034 1491 4 41.20 24 8.01 CHANGED ppstl..pplpphFPslspphIptsL.ppsssl-tslstlLphs ............thl..pplpphFPs.lspphlppsLt..tpsssl-tslspLLph.......... 1 287 499 788 +179 PF00190 Cupin_1 Seedstore_11s;Cupin; Cupin Griffiths-Jones SR anon Prosite Domain This family represents the conserved barrel domain of the 'cupin' superfamily [1] ('cupa' is the Latin term for a small barrel). This family contains 11S and 7S plant seed storage proteins, and germins. Plant seed storage proteins provide the major nitrogen source for the developing plant. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.78 0.71 -4.76 225 3563 2012-10-10 13:59:34 2003-04-07 12:59:11 17 24 631 221 1395 6143 837 141.10 18 62.47 CHANGED hshtpssp....hhpspuGtl...pthsspphs.hhtphthsththth.pssulhsP+ap..sAspllaVhcGpuhhshlssss...........................................................................................p.hppc......lppGDlhslPtGhshhhhsst..psshthsshssssss...................lst.hhtpsFhlusppsp ........................................................................................................p.t.su..h...p...h.s.s..pphs........tth..t..h..s..tsth....h.tss.u.h.h.....P...Hap..pA..s....c..lhal...l...p.G.......p..uhlsh.l.ssss......................................................................................................................................ph.hspp......lptGD..lhh......hPt..G...h..s..a.a.h.h...s.......s........t......s....s...sh..t.l..h.h...h..p...sss..t...................................hs..hh.psa.hs.................................................................................................. 0 198 814 1141 +180 PF04889 Cwf_Cwc_15 Cwf15/Cwc15 cell cycle control protein Mifsud W anon Pfam-B_6589 (release 7.6) Family This family represents Cwf15/Cwc15 (from Schizosaccharomyces pombe and Saccharomyces cerevisiae respectively) and their homologues. The function of these proteins is unknown, but they form part of the spliceosome and are thus thought to be involved in mRNA splicing [1]. 27.20 27.20 28.10 27.50 27.00 27.10 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.91 0.70 -4.66 30 382 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 282 0 279 376 9 193.80 37 96.06 CHANGED MTTAHR....PTac.PA+GtpspuG......otpasSRsLPuHTpLK...........hRps.GQssp-ch.......t...p+DLRsEL.ctEtpptscc..........................thhth-stspppshch....spcp.cc..h.........................K+.h.............tpspshDADc........tp.............ssssD--.................uD...........s--DEsttLhtELE+IK+ERtEcct+c-cEtttpctcp+cpclhpGNPLLN.................ttssFslKRRWDDDVVFKNpA+s..cpt+c.pFlNDslRS-FHKKFhsKYl+ ...................................................MTTAtRPTacsAp.Gtpst..tt........otthpsR.LP.uH...........TplK..................hRps..sQ.s.....s.t-h.................p..t+-h+tEL.ptEttthtpp...................................................ttt.t......t.t.................................................................................................st..shDuDs.s..............................................tppp-p-................tc.........psc-D-s.ttLh..tELp+IK+ERtpcpt+cEtcpttp-tc.+.tplhpGNPL.LN.........................tsh..plKR..RWDDDV.V..F......K...NpA....+u....cptc......c....FlNDhlRS-FH++FhpKYl+....................................... 0 99 157 228 +181 PF04677 CwfJ_C_1 CwfJ_N_1; Protein similar to CwfJ C-terminus 1 Kerrison ND anon DOMO:DM04663; Family This region is found in the N terminus of Schizosaccharomyces pombe protein CwfJ (Swiss:Q09909). CwfJ is part of the Cdc5p complex involved in mRNA splicing [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.63 0.71 -4.46 8 549 2012-10-01 23:45:21 2003-04-07 12:59:11 10 23 278 0 412 1265 265 125.80 29 20.81 CHANGED spcp++hpps.csChFClsssslccHLlVSlGppsYluLPc.ssL...........spsHslIlPlpHhsss.hols-....-lh-EIppF+KuLstM.asupspDslFaEhs...spRs.HhplpsIPlPpphuch..ushhFp ..................................h...pp.ttt.spChaC....h...s....s.....s.....p.....h.....t....p...H....l...ll.o.l.Gsc.............sY.L.s..lsp..ssL....................ssG..HslIlPlpH.hssh.......sphsc................-sh.c......E....l....p......p....a.....+....p....u....L....p.c..M.....a..p....s.......p....s....p...s.......s.......l.....h..a...Eps..............t+.p......hHhthps.....lP..lPtphsphh...a............................................................ 0 144 225 337 +182 PF04676 CwfJ_C_2 CwfJ_N_2; Protein similar to CwfJ C-terminus 2 Kerrison ND anon DOMO:DM04663; Family This region is found in the N terminus of Schizosaccharomyces pombe protein CwfJ (Swiss:Q09909). CwfJ is part of the Cdc5p complex involved in mRNA splicing [1]. 21.30 21.30 21.50 21.30 21.10 20.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.14 0.72 -3.50 48 528 2009-01-15 18:05:59 2003-04-07 12:59:11 9 19 269 0 399 536 6 99.10 26 16.57 CHANGED castpt.hchtshpt...........hp..hspshsYFhVpls......................hsp..shhphl-.....................cpp..cFslp..FuRcVlus..lLsL.pRhpW+c..stpsppcEctcstpF+ptacsF.DaT ................................................h.ph.t.........pshcp..hs.pshsYFtVphs.............................hss....shs.Hhlc..............................................cpt..c.FPhp..FGR..-Vluu..hLs.lt.s....+...tW+p.....sppstc-cppcstpF+ct..ac.aDaT.......................... 0 140 219 326 +183 PF01705 CX CX module Hutter H, Bateman A anon Hutter H Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 6 conserved cysteines that probably form three disulphide bridges. 25.00 25.00 25.80 26.40 24.00 24.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.31 0.72 -4.00 14 114 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 8 0 108 106 0 59.90 32 25.21 CHANGED YYWsspa....lpsscpP.shCEYpIs-..-DtELpNVsFsNGo+PpSlhFuCsst.ppCCGh-CCssh ....................YYWttta.......hts.pts..hCpa.lsp..pDhp..lpslpFssGop...P.pslhFuCtts.ppCCGhcCCt....... 0 30 40 108 +184 PF04673 Cyclase_polyket cyclase_polyket; Polyketide synthesis cyclase Mifsud W anon Pfam-B_5596 (release 7.5) Family This family represents a number of cyclases involved in polyketide synthesis in a number of actinobacterial species. 23.70 23.70 25.00 53.00 23.30 23.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.28 0.72 -3.95 23 122 2009-09-11 03:24:20 2003-04-07 12:59:11 7 2 90 1 40 120 0 96.20 46 87.19 CHANGED MsPususcVAclFuEpD...sTELP+lhGspRRpLFpa+s.LYFHLh-.ssscssssltpA+sHPpFhclSccLpsalssYDP.sTWRuPpD.AMAppFYpWsA .....MsPususcVAclFA-SD...uoELP+hhGVpRRpLFpacs.LYhHLlE..s....c....csssttltps..+.s..cP...cFhclSccLpsalssYDP.tTWRuPpD.AMAppFYpWp... 0 10 30 38 +185 PF00134 Cyclin_N cyclin; Cyclin, N-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Cyclins regulate cyclin dependent kinases (CDKs). Swiss:P22674 is a Uracil-DNA glycosylase that is related to other cyclins [4]. Cyclins contain two domains of similar all-alpha fold, of which this family corresponds with the N-terminal domain. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.54 0.71 -4.57 127 6213 2012-10-03 00:42:12 2003-04-07 12:59:11 18 86 547 190 3707 6602 83 123.90 20 31.34 CHANGED -IapahpphEtp...t.s.saht.pp.....ls..pMRsILlDWLlc.VppcacLh.ETLaLslshlDRFLutp......l.+pcLQLlGlsuhhlAuK........aEEh...............hsPplp-ashlo.....Ds.sas...............pcpllpMEphlLpsLpapls ......................................................................................................................hp....th.Rt.h.h...h...p..a....l............h..........p........l..t....p.p....h...p...........l........t..t...p..T.h..h.h...u.l......s.....h.h.......-.......R...F......htpt..s.....................lpp...p....p......h.......p...L....l....u......s..ssl.h..........l.....Au...K.............................h..E..Eh...................................................s.s..p.l..p.c.h..h.....h.hs.........cp..shs.................................................tpp..lhphEh.hlLpt.Lpapl....................................................................................... 0 1218 1928 2867 +186 PF02276 CytoC_RC Photosynthetic reaction centre cytochrome C subunit Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_5109 (release 5.2) Family Photosynthesis in purple bacteria is dependent on light-induced electron transfer in the reaction centre (RC), coupled to the uptake of protons from the cytoplasm. The RC contains a cytochrome molecule which re-reduces the oxidised electron donor. 22.40 22.40 22.50 26.80 20.30 22.30 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.26 0.70 -5.32 25 104 2012-10-01 23:37:15 2003-04-07 12:59:11 13 3 88 20 28 104 73 250.30 32 79.94 CHANGED h.ppP.ss-shQoG.RGsGMptsp.scslsp.sss..shss.stsssss..usPpAu-lYpNVp.VLGDLosupFsRLMsAhTpWVu.Pc-GCsYCHss.p..sausDshYTKhVuRpMlpMTQplNssWss.HV...upsGVTCYTCHRGpPVPstlWap..ssshssuhtGhsssQNtuss..............ss.saoSLPsDshp.aL....L-.....scsI......+Vpshsuhs...ssssc...s.lppAEhTYuLMhHhSsSLGVNCTaCHNoRAFa-W.sQuTPQRssAahGIcMVR-lNssYltPL.pshhPspRLGPh.GDssKlsCtTCHpGshpPLtGsshlcDaPELAss .......................................P.....s.QhG.RGhuh......t...t......t..s.s...s....stshAsp.hapNVp.VL.tcls.spFs+lMsuhopWlu...pps..CsYCHs....t.......shAsDshapKhVuRpMlpMTpplNs...pW.....ps..Hs.......stsG....V...oCYTCHRGpshPt.hW...hp.......tt.h....t..ps..t.................s.thsuLP.Dshp.aL.....t......tt.l......pl.s........puhs.....t..tt...s.hppsEhoauLM..ahSsuLGVsCsaCHNopsFhsh.p..p..ssPp+shuhhulpMsp-lNtpah.sh.t..hPt..phh......G...s-s.+hsChTCHpGh.pPL.G..h.phhtpa..L..s................................... 0 14 24 25 +187 PF05038 Cytochrom_B558a cytochr_b558a; Cytochrome Cytochrome b558 alpha-subunit Moxon SJ anon Pfam-B_5327 (release 7.7) Family Cytochrome b-245 light chain (p22-phox) is one of the key electron transfer elements of the NADPH oxidase in phagocytes [1]. 25.00 25.00 26.50 25.80 21.30 23.30 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.15 0.71 -4.66 4 85 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 62 1 43 84 0 159.20 60 95.33 CHANGED GpIEWAMWANEQALAoGLILlsGGIVusAGpFppW.FGAYuIAAGVLVCLLEYPRGKRsKG.oThERsGQ+hLTtsVKshGPLoRNYYlRAhLHLulsVPuGFlLATILGssCLsIASlIYLhAAl+GEpWpPI.s+.c-.Rs.VGtoIKpPPoNPPPRPPsEhR+KsuE-.sssA.........NPhsVTspsV ............GpIEWAMWANEQALAuGl.lLlsGGIVu.......sAG.pFptW.F....uA...YuI..sAGVhVCLLEYPRG.KRpKG.oTMERsGQ+YhTslVKhFGPLT...RNYYlRAhLHhh.LuVPuGFLLATILGTsCLuIAShIYLLAAlRGEpWp.PIE......s.+scp..RsplGsoIKpPPoNPPPRPPs-sR+K.s-t.t.ss...........NPhsVp.................................... 0 14 17 26 +188 PF01820 Dala_Dala_lig_N Dala_Dala_ligas; D-ala D-ala ligase N-terminus Bateman A, Moxon SJ anon PSI-BLAST 2dln Family This family represents the N-terminal region of the D-alanine--D-alanine ligase enzyme EC:6.3.2.4 which is thought to be involved in substrate binding [2]. D-Alanine is one of the central molecules of the cross-linking step of peptidoglycan assembly. There are three enzymes involved in the D-alanine branch of peptidoglycan biosynthesis: the pyridoxal phosphate-dependent D-alanine racemase (Alr), the ATP-dependent D-alanine:D-alanine ligase (Ddl), and the ATP-dependent D-alanine:D-alanine-adding enzyme (MurF) [3]. 21.60 21.60 23.00 21.60 21.30 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.29 0.71 -3.78 176 7730 2009-01-15 18:05:59 2003-04-07 12:59:11 16 22 4347 83 1553 5359 3003 82.10 29 32.44 CHANGED h+..lullhGGpSsE+-VSltSApslhpuL....p.p..ppaclh.......l.hls.cpG.tahhhtt..t.ht..t......................................................htphDllFslLHGs..hGEDGslQGlL-hhslPYsGs ............................................................plsllhGGhSsE+plSl.Sutslhtsl..................p......tth.p.sh...............h...h.....p....t................................................................................................................................................thsh...ha.hlHG....GE..DGslQGhhchhtlPasGs........................................ 0 540 1043 1317 +189 PF01113 DapB_N DapB; Dihydrodipicolinate reductase, N-terminus Finn RD, Bateman A, Studholme, DJ anon Prosite Domain Dihydrodipicolinate reductase (DapB) reduces the alpha,beta-unsaturated cyclic imine, dihydro-dipicolinate. This reaction is the second committed step in the biosynthesis of L-lysine and its precursor meso-diaminopimelate, which are critical for both protein and cell wall biosynthesis. The N-terminal domain of DapB binds the dinucleotide NADPH. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.49 0.71 -4.20 99 5206 2012-10-10 17:06:42 2003-04-07 12:59:11 15 17 4187 43 1251 5533 3347 112.90 28 41.03 CHANGED l+lslsGAsGRMGppllcslpp.p......sshpLsuul-css......t.shs...........t.....sl.l...sclppshsp..sD..........VllDFT.pPpushpplchshpt.shshVlGTTGa...opcphpplpp.hu...cc..lsllhusNa ........................................h+lslhG.stG+MG....p....t.l.l.p..slt.p...t........p.s..h.p.L..s..u..s..l-pss...............................................................t.ht......h...s...l.....l............ss..l..s...s.l.hsp.........sD.......................................V.ll.......D.F.......T.....p..P..p...s....s........h.....p..p...l..p...h....s...h...pp...s..........h.....s......h.V..l......G...T.T.G.h........sptt..h..t...t.lpp...hu...pp..........hsh.lhusNh........................................................................................... 0 372 808 1057 +190 PF01682 DB DB module Hutter H, Bateman A anon [1] Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 12 conserved cysteines that probably form six disulphide bridges. This domain is found associated with ig Pfam:PF00047 and fn3 Pfam:PF00041 domains, as well as in some lipases Pfam:PF00657. 25.00 25.00 28.80 25.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.97 0.72 -11.48 0.72 -3.96 26 351 2009-01-15 18:05:59 2003-04-07 12:59:11 14 38 33 0 314 315 0 96.30 27 28.50 CHANGED CCpspt.lsstChp.hCsapsh......h.shhhtsspCsh..phsplhpCAupscDHosCCtcpuVs.......spChshCp.p.s......hsshthshhsChp.phsshhpCFh ..................................CCppps..l.sstChs.hC..sapth.......................p.h.hhtsspCsh..phsphhpCAup..G..+.............D..HopCCt..cpGls.....................spChshCpsp.s......hsslthshhsChp.thpshhpCa............................................ 0 111 144 302 +191 PF05011 DBR1 Lariat debranching enzyme, C-terminal domain Wood V, Bateman A anon Pfam-B_9676 (release 7.6) Domain This presumed domain is found at the C-terminus of lariat debranching enzyme. This domain is always found in association with Pfam:PF00149. 21.10 21.10 22.50 21.40 20.20 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.26 0.71 -4.31 24 333 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 279 0 244 330 6 132.20 28 26.79 CHANGED lPc....os+s..T+FLALDKCLPtRcFLQll-lsstsss............hpLpYD.EWLAIh+shsphhp......l.sshss.sstspG....c.tacshhE-ppphVpEcl.ttscLpl.PcNFshTAPsacsu..sth...sst.PstatNPQTspFCcLLGlcshh .....................................h......tsps...T+FLALDKCLPc....R...c....F.....LQl..l-ltstssss............................................pLpYD.EW...L.....uIh+shpshhs..............................l.t.p.........stp.t..................hts.....p.pthp.l..pp...l...........t...t..p......htl....P.p....sF.....T.ss.s.ass....s...............................hhNPQTttasthlsl....h....................................................... 0 86 135 205 +192 PF03107 C1_2 DC1; C1 domain Bateman A anon Pfam-B_16 (release 6.5) Domain This short domain is rich in cysteines and histidines. The pattern of conservation is similar to that found in Pfam:PF00130, therefore we have termed this domain DC1 for divergent C1 domain. This domain probably also binds to two zinc ions. The function of proteins with this domain is uncertain, however this domain may bind to molecules such as diacylglycerol (A Bateman pers. obs.). This family are found in plant proteins. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.83 0.72 -3.83 90 558 2012-10-02 13:15:50 2003-04-07 12:59:11 11 57 24 0 370 664 0 31.30 34 11.07 CHANGED hhCslC.c+phssh......hYpC.ppss........aslHsp.Cu ..........CslC.ccp..lssp.....hhYpC..pcCs........aslHsp.Cs. 0 312 351 358 +193 PF00383 dCMP_cyt_deam_1 dCMP_cyt_deam; Cytidine and deoxycytidylate deaminase zinc-binding region Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.79 0.72 -4.37 59 17292 2012-10-02 00:10:39 2003-04-07 12:59:11 17 96 5079 154 4759 12505 5590 105.20 26 47.17 CHANGED hp.cchhhphAhthAppuh..stsphsVGAllVp..sspllupGhNtp.tuts......................................................shHAEhsAltpAsp....t.ph........................s..sslal.............ThpPCs.h......Csphllpt.u.lp+Vlhst ..................................................................................................................................................p.hhphAlp.h.Appuh.......st.sphsV.G.A.ll..V...........p....................s........s........p..........l...........l..........u.........p..G.h.N.t.p.....t.s.ts.............................................................................................................................s.hHAEh.p...Alpp......Aspt....t...t.ph............................................................................................................t.s.....sslYV.......................................T.l...pP..Csh................Cspsll...ps...t..ltclhhh.t............................................................................................................ 0 1638 3063 4050 +194 PF05026 DCP2 Dcp2, box A domain Wood V anon Pfam-B_10622 (release 7.6) Domain This domain is always found to the amino terminal side of Pfam:PF00293. This domain is specific to mRNA decapping protein 2 and this region has been termed Box A [2]. Removal of the cap structure is catalysed by the Dcp1-Dcp2 complex [3]. 28.00 28.00 29.10 29.90 26.80 27.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.02 0.72 -4.09 23 313 2009-09-11 21:45:37 2003-04-07 12:59:11 8 12 259 7 225 319 2 85.10 41 13.28 CHANGED cchL-DLssRFIlNlPpE-LsolERlhFQlEEApWFYpDFlR.hsP..tLPshsh+sFupplhp+....CPLl.pa......ttshcpALpcFtpYK ...........clLDDLssRFIlNlPpEE...l....p....sh.........RlhFQlEpAaWFY.DFhp.............t.......t.s......P......sLP.sh.s.l+sFut..t.............l.F....p+......CPh..Ltta......ttchcchhpcappYK............... 0 83 130 190 +195 PF03607 DCX Doublecortin Griffiths-Jones SR anon PROSITE Family \N 21.40 21.40 22.10 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.58 0.72 -4.38 53 1063 2009-01-15 18:05:59 2003-04-07 12:59:11 12 41 119 7 602 931 2 60.20 32 14.44 CHANGED lllsp.+phpoa-slLsclTc....plpLs.tG.VRplaThcG.+plsslcc.LpsGpsYVsu.upE.tFK ......................hhlsp.+ph+oF-slLs-lTc.......tlp..L.....s...pG..V+plY.Th-G.+p.....l..ssLp-..l...t..-...u.ps..aVss.u..E..F+........... 0 142 196 349 +196 PF03455 dDENN dDENN domain Callebaut I anon Callebaut I Domain This region is always found associated with Pfam:PF02141. It is predicted to form a globular domain [1]. This domain is predicted to be completely alpha helical. Although not statistically supported it has been suggested that this domain may be similar to members of the Rho/Rac/Cdc42 GEF family [1]. 21.10 21.10 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.47 0.72 -3.90 46 1393 2009-01-15 18:05:59 2003-04-07 12:59:11 14 84 197 2 782 1322 6 70.50 27 5.94 CHANGED lsp.plpcsFLphhsp..lhtsYcsalph.p..tt...............tFpppuFlcs.pspshp.....pFlpphhco...QhFspFIcc+ ...........................sttlptsFl+hhsp...lht..s......Y....c.palphhp...tt.p............................................sF.pp....p.uFLps...psps.p.......pFl.pphh.cT...QhFttFIpp+............................................ 0 198 284 494 +197 PF02791 DDT DDT domain Iyer LM, Aravind L, Bateman A anon [1] Family The DDT domain is named after (DNA binding homeobox and Different Transcription factors) and is approximately 60 residues in length [1]. Along with the WHIM motifs, it comprises an entirely alpha helical module found in diverse eukaryotic chromatin proteins [2]. Based on the structure of Ioc3, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [2][3]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. In particular, the DDT domain, in combination with the WHIM1 and WHIM2 motifs form the SLIDE domain binding pocket [2]. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.84 0.72 -4.20 42 658 2012-10-07 04:36:59 2003-04-07 12:59:11 12 81 241 0 419 641 5 61.50 26 4.04 CHANGED sp.shschL......tlacFLpsFuclLpLssF.....oh--FtpAltspssp....LhsElHhsLl+h..llps.ps ............s.pthuchL................hlacFLpsFuclLtLs.sh...................ol.-.s.h.ppAL.......h.spssp......................hhsElphsLL+h..lhpt.t..................... 2 113 213 327 +198 PF00270 DEAD DEAD/DEAH box helicase Bateman A, Bruskiewich R, Sonnhammer ELL anon Published_alignment Domain Members of this family include the DEAD and DEAH box helicases. Helicases are involved in unwinding nucleic acids. The DEAD box helicases are involved in various aspects of RNA metabolism, including nuclear transcription, pre mRNA splicing, ribosome biogenesis, nucleocytoplasmic transport, translation, RNA decay and organellar gene expression. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null --hand HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.48 0.71 -4.76 182 63415 2012-10-05 12:31:07 2003-04-07 12:59:11 24 708 5596 153 23464 73599 16995 166.80 22 21.58 CHANGED osl.Q...tpsls...hlh..p.s............cD.ll........stutTGoGKThsahl...shl...ptlht............................................................................................................t................................................................................................................pslllsPT+pLspQhhpph...pphhph..............h.l...........psshl...hG........shs..hp.pptthl.........ps.scllluTPsc.....lhshlpp....th.th........hp.lph..lVlDEscphhs.........sats....plpp......lhpt..h....................t.................hp.hlhhSAThs.pslpcl ............................................................................................................................................................................................................................................................Q...tp..s.l.........hh......p..s...............................ps....h..l.....................st.u...t..T...........G...o....GK............T.....h.....s...h..h..l........s.h..l........p..t..l.....pt...t...........................................................................................................................................................................................................................................................................................................................................................................psl..l...l...s...P...s...+.....t....L.......A...t....Q..........h.......h.......p...p....h...........p.p..h.h.tt................................t.h.......................................................ps.s..hl................hG...................................s.hs.........hp.......p..p...hphl...............................pt.t....s...c..l.........l....l..u.....T.s..ut.............................l..h..s...h...lpp...............h...th..................................pp.....l....p....h.........l.........l.l..D.E...s..c..c...h.h.s..................................sh.t..................................p.l.p.p..........................lh.ph.....h.....................................p........................................hp..hlhh..S.A.Ths..tt....h................................................................................................................................................................................................................................................................................................................... 0 8187 14068 19550 +199 PF00531 Death death; Death domain Bateman A, Griffiths-Jones SR anon Reference [1] and [2]. Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.70 0.72 -4.21 88 3125 2012-10-01 21:41:45 2003-04-07 12:59:11 17 542 171 79 1743 2836 0 80.60 19 10.09 CHANGED pplhphl..sphhsppWpplu.cpL...slspsplcphcp...ps......ptshplLptWtpp......puolssLhpsLpphstpcssct.lpphh ................................t......h....sp.hspsW...p..pL....A..ppL.......sh.s..ppplp..tlcp.............pp...t....ptshp...lLptWtpp..........t.....t.s......u..........s......l.......s..p..LhpsLpp..hs..ppchsc..lt...t.......................... 0 804 885 1205 +200 PF04626 DEC-1_C Dec-1 protein, C terminal region Kerrison ND anon DOMO:DM04594; Family The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing [1]. Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). Alternative splicing generates different carboxyl terminal ends in different protein isoforms, so this is region is the most C terminal region that is present in the main isoforms. 25.00 25.00 88.40 36.70 24.70 24.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.84 0.71 -3.99 2 24 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 11 0 11 30 0 120.90 52 10.72 CHANGED MPSMMQREVEDEDNKAEDDLVGEAGPQMPENEGTARHKVDALGVGGNKRKKSKSKSAPPTVINYYYAAPQRPVVQSYGTSYGGGGYGSNAYGVPRPVNSYQSQGYRAAVGNDEVDEMLRQHQTMARTINPKQ ....................t.....Qpt.p.tppsct.p..-sll.GEAtPQMPEstGpARHK.VDhLGlGGs+..RKKSK....upou.PsVINYYauuP....p.............h...........h.....s.............SYGTSYG...GGGYGS...NAYGs..........h.s...NsYQ...t......GYR.AAVGNDElDpMLRQHQTMAps......p............... 0 3 3 8 +201 PF02141 DENN DENN (AEX-3) domain Mian N, Bateman A anon IPR001194 Family DENN (after differentially expressed in neoplastic vs normal cells) is a domain which occurs in several proteins involved in Rab- mediated processes or regulation of MAPK signalling pathways [1]. 23.40 23.40 23.40 23.40 23.30 23.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.14 0.71 -4.57 40 1791 2012-10-02 14:18:06 2003-04-07 12:59:11 16 97 243 2 1055 1806 19 178.90 27 16.35 CHANGED hsPcshCllS+hsahssF.cchLstlhphhtpus..............hslcsaltshltpsshPsPGpshph.p...husp-hhhhspPt.sspL..Ph..pssshp.....tLapsLuscNllpLasssLh.Ep+IlhhSpchshLopsscAlsuLLaPhpWQasYIPlLPspLl-sL.sAPsPFllGlpuphhs.....hhps.s-lllV..DLDss ......................................................................................................................................h...pshsl.lS.p.hshhpha.p..........c...hLt...lhphhh.ps.......................................................l.p.t...h..l..tp......l..hph.s...h..P.s.s....u..ps.hhh...........................ts.t.thh..h..h....pt..P......sspL.......sh.......pshsh.p.........................l...h..p.tL.u..l..c..sl..l..p........lh.....sshLh..Ec+l..lhhSp.ch.................s..h......Lo....t.ssc...u........l..su..ll..........a..P..hp....W...p....a...s..a...IP...lL..P..s...p.....h.....h...c.........h.l....s...............u..PsP.........al.lGlp.u..p..hhp.....................t-l..l.hl..DlDs............................................. 0 352 490 745 +202 PF00610 DEP Domain found in Dishevelled, Egl-10, and Pleckstrin (DEP) Ponting C, Schultz J, Bork P, Martemyanov K, Thorner J anon SMART Domain The DEP domain [1] is responsible for mediating intracellular protein targeting and regulation of protein stability in the cell [2-3]. The DEP domain is present in a number of signaling molecules, including Regulator of G protein Signaling (RGS) proteins, and has been implicated in membrane targeting [4-5]. New findings in yeast, however, demonstrate a major role for a DEP domain in mediating the interaction of an RGS protein to the C-terminal tail of a GPCR, thus placing RGS in close proximity with its substrate G protein alpha subunit [6-7]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.43 0.72 -4.23 133 2616 2009-01-15 18:05:59 2003-04-07 12:59:11 16 134 308 16 1545 2411 15 72.20 25 9.41 CHANGED ulplpcc+h..thps..atpsFsGs-hVcWLh........h..pt.htsRpcAlpluptLl..............ppGhlpplpscp.t..............Ft-s.thhYpF .......................lps+ch...hh..ps..h..psF...s.....G.s...-lV-WLh......................p.h.....p....hps.RpcAlpluptL.l....................................................cpG..hlpp.lssct.p...........................Fp-s.thhYpF..................................... 0 410 638 1072 +203 PF02272 DHHA1 DHHA1 domain Bateman A anon Bateman A Family This domain is often found adjacent to the DHH domain Pfam:PF01368 and is called DHHA1 for DHH associated domain. This domain is diagnostic of DHH subfamily 1 members [1]. This domains is also found in alanyl tRNA synthetase e.g. Swiss:P00957, suggesting that this domain may have an RNA binding function. The domain is about 60 residues long and contains a conserved GG motif. 21.20 16.00 21.20 16.90 21.10 15.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.45 0.72 -4.27 133 12856 2009-01-15 18:05:59 2003-04-07 12:59:11 14 38 4786 16 3060 9901 3277 66.90 22 10.25 CHANGED ppsllhhs......pssphpsssRs.pslshp.......pllpphtt.hsh...tGGG+spsAuushppsp...........lpphlptlpp ...................................t...hllhs.........ttss....p...l...p...hssR....S..h.ps..lshp...........pllc.ph..t........h.h.....pGGGHsts..A..us.s.hpsss.............lpphhpth..t........................... 0 1073 2042 2615 +204 PF04922 DIE2_ALG10 DIE2/ALG10 family Wood V, Bateman A anon Pfam-B_9570 (release 7.6) Family The ALG10 protein from Saccharomyces cerevisiae encodes the alpha-1,2 glucosyltransferase of the endoplasmic reticulum. This protein has been characterised in rat as potassium channel regulator 1 [2]. 19.70 19.70 29.00 23.10 18.30 18.20 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.52 0.70 -5.44 5 365 2012-10-03 03:08:05 2003-04-07 12:59:11 7 9 246 0 265 367 4 294.00 27 77.66 CHANGED s+hVPEPYMDEIFHlsQAQpYCcGcao..pWDPhITTPPGLYllSlu...sLpPu..hhusSsloh........LRhlNhLsuV..hhhshLlhRhIplhN.t.u..slsahAloLusaPlLaFFoFLYYTDluSlhhVLhuh...LshsaGsh+s...SAFhuslSshF....RQTNIIWsuFlAso.hhs.phu.tp.pllQcphs-...........LRohlpaLK.........hFl+.....SlccFSsLlL.....................PYhhlhluFhlFllWN.GuIVLGDKSuHpAuLHluQIFYFhsFsAhFSaPhaISsNhl+Hh++p..lp+phsppShlllulVhLlsaF.ThVHPFLLADNRHYTFYlWRRllsp..+hlh+ahLsP.......uYlauh....ashtslosp...........hs+loWpLLahlsTlloLVPuPLlEFRYYILPYllWRL .............................................................s.......ah....DEhFHl.Qs.tYhp........t.....pa.............WDshITT.PGL....Yh.h.uhh............h.sh..h......t..h..s.....................................LR.hs..h..hh..h.......h..h..hh.h.....p.......h............................h...t........................................................................................h..........s....h....s..l.....hhP.haha.hLaYTDhhShhh....ll..h...................h....h...........t.....h.....................s.s.......h....h...u.hh.......uhhh........RQTNlhWhh....h.h.hs................................h..........................................................................................................................................................h...........................................sah.h.hhhFhhFlh.hN.tulsl..................Gc+ptH.s.s..hHhsQhhYh...hhh.h.h...s.h....s...h............h...........................................h......h..............................h.........................................................h........h........h............h....h....h....h........hhh..ohh......H.ahLADNRHYhFYlaphhh............hhthh..hhs.......................hYhhsh...........h..h.h...h......................................................h.......hh..h..h.shhhshhst.LhE.RYahlPhhhh...................................................... 2 82 142 217 +205 PF01843 DIL DIL domain Bateman A anon [1] Family The DIL domain has no known function. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.40 0.72 -4.15 56 1158 2009-09-11 05:00:57 2003-04-07 12:59:11 14 77 276 3 698 1062 2 99.80 30 7.74 CHANGED plhsQLapaIss.hhNsLlhR+sh...soappGhpl+.hslspl-cWscstshpts...shppLcplhQAspLLpl.pKpphp.-hchltphCs...sLsstQlh+llstYpssshp .................................QlhsQlFhhlss.hhN..........s.Llh..R+ch..................soasp.Ghpl+.hsls.pLE.cWhcs...ps.......ht.u.................uhppLp............l.hQAspLLph..p.....K..tp.p...shphlpshC...........................tLost.Qlh+lls.Ytssp..t....................................................... 0 167 330 521 +206 PF03018 Dirigent disease_resp; Dirigent-like protein Griffiths-Jones SR anon Pfam-B_835 (release 6.4) Family This family contains a number of proteins which are induced during disease response in plants. Members of this family are involved in lignification. 27.60 27.60 28.30 28.30 25.30 27.40 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.86 0.71 -4.68 86 847 2009-09-13 07:42:01 2003-04-07 12:59:11 9 14 70 0 466 810 1 136.60 29 66.27 CHANGED phscl+hYh.HDhls......G.sssTu.stVssssssst.................tFGslsVhDDsLTcGsshs..Sp.h....VGRAQGhYlhsupp.sh.....shhhuhohVFss..Gcas....GSTlslhGts.h.hsps..REhslVGGTGpFRhARGashh+Th.............phpssssllchs.lplh..h ..........................................................................................phphYhH-hht...........u.sssos.s.lstsstss..............................hFGs.l.sVhDDsLTpGss.hs........Sp..h........VGRAQGhYhhs.u...ts...sh....shhhshs.hsFp....s...s..cap............G...STlslhGt.s.......h..hsps....+Eh.ulVGGT..GpFphA+Ga...shhpoh.............t.tsssshhchslhl......................... 0 35 260 373 +207 PF02377 Dishevelled Dishevelled specific domain Mian N, Bateman A anon Pfam-B_1381 (release 5.2) Family This domain is specific to the signaling protein dishevelled. The domain is found adjacent to the PDZ domain Pfam:PF00595, often in conjunction with DEP (Pfam:PF00610) and DIX (Pfam:PF00778). 19.70 19.70 19.70 19.70 18.70 18.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.64 0.72 -3.77 4 215 2009-01-15 18:05:59 2003-04-07 12:59:11 10 14 74 0 107 198 0 70.60 48 10.92 CHANGED pR-RsRRRsp.EpAshhpGpsh.GcpcRcsus.h-SSSTlLSSELEooShhDS-EDDohSRhSSSTEQSSuSRL .....................pRERsRRRpp.....E.cs...s....+.h...NGps..t...u.c.p..c..R...c.hu.u....h-SSSTlMSSELESTSFhDSDEDD.o..h.S...RhS......SSTEQSSuSRL..................... 0 17 27 58 +208 PF02916 DNA_PPF DNA polymerase processivity factor Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 25.10 26.10 24.70 24.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.20 0.72 -4.00 11 530 2012-10-02 11:47:48 2003-04-07 12:59:11 10 9 359 26 20 382 453 114.60 54 23.78 CHANGED hKhpKhThslLh.huhlsusshhuhtphl..hs+hsssosYSE..hshSsllstDssIpDlsplpSlluPsssDs-.....lstlpp.stslplssspos.sa.sA.pShlsucs+ulVh ..............KKAcKFTlhLLVhSlLVSSVoLFAlQQFVsLTNRLNuTSNYSE..Yol..SVsVhADS-IcNVoQL..o..S..Vs..APTus.DsENI.pcLLuDIK.o.opssDLTVs...p.So.SYLAAYKSLIuGE.oKAIVL........................................... 0 2 4 14 +209 PF01965 DJ-1_PfpI ThiJ; DJ-1/PfpI family Bateman A, Enright A, Ouzounis C anon Enright A Family The family includes the protease PfpI Swiss:Q51732 [1]. This domain is also found in transcriptional regulators such as Swiss:Q9RJG8. This N-terminal region of the full-length AdpA proteins is necessary for dimerisation of the molecule. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.94 0.71 -4.74 39 9251 2012-10-03 00:28:14 2003-04-07 12:59:11 19 42 4024 121 2389 11671 1005 150.20 21 67.66 CHANGED plsslusppst......hpsp.s...........hplpsDtslsclsspp........aDslllPGGhssscpLt............ttlhchl+chhpp.uphluAICpuP.hlLhsssllp....................G++hTua.shps........hpttstphh-..t.Vs....hss.............llTutssssshpFshpllctLt ................................................................................t.........................h.t.t.t.................ht.l.t..s..s.t.s.......l..s..c.l.p....s.sc....................aD.ull..l.P....G.....G...h.u...s..s..p....s..Lp.c.....................................ppl.h.p...h....l.....c.p....h....h..........p..........s.....s..........K............lu...A.lCpuP..th....L....s...s.s.....s.l.hp.........................................G.+.p..h.T.u...a..s....s..h.pp..........................th.p.t..h..u..s...p..a...h...c.............t...tss.......................hDt............................................pllTupsPsss.thuhtllp.l............................................................................................ 0 717 1413 1949 +210 PF00751 DM DM-domain; DM DNA binding domain Bateman A anon [1] Family The DM domain is named after dsx and mab-3 [1]. dsx contains a single amino-terminal DM domain, whereas mab-3 contains two amino-terminal domains. The DM domain has a pattern of conserved zinc chelating residues C2H2C4 [2]. The dsx DM domain has been shown to dimerise and bind palindromic DNA [3]. 21.70 21.70 22.20 21.70 21.00 21.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.05 0.72 -4.43 26 1027 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 215 1 380 930 0 45.10 62 16.36 CHANGED RsPpCuRCRNHGl.hs.LKGHK+h.Cpa+sCpCpKCtLlt-RQ+lMAAQ ........R.PpCARCRNHGl...hosLKGHKRa.Cc..........aR-CpCpK.......CpLlsERQRVMAAQ........... 0 112 146 271 +211 PF01068 DNA_ligase_A_M DNA_ligase; ATP dependent DNA ligase domain Finn RD, Bateman A anon Pfam-B_788 (release 3.0) Domain This domain belongs to a more diverse superfamily, including Pfam:PF01331 and Pfam:PF01653 [3]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.23 0.71 -4.95 46 3956 2012-10-02 00:43:09 2003-04-07 12:59:11 16 83 2053 23 1751 4137 1269 187.60 23 33.16 CHANGED PMLAp.hp..shtphhtph...................ttshhsEhKaDGtR..splH..t.pssphphaSRshcshTt........phsclhphlt.pthhssh.p..hlLDGElls..hs.pptphhPFttltp+h.+pph..........t..............phslshalFDlLhh.........sGps.L.......hphslpcR+plLpphh...................psclhlspthpssshc-lpchhcpslppGtEGlllKs...ssuhYcsu..+R.spsWlKlK .................................................................................................................................stsahhEhKaD..G.hR........s.ht....................h...ps.....s.......p........h........plh.S......R....s....sc.shos..................................th.s.p.l..h...t....hh..........th.....h...........h..........t..t.............p.............hlLD..G..Ells.............h........t..p.........t.....t......h...h.s...F.pt.ltpp.h......ptph...................................................................phtlphhsFDllhh..................................sG.ps...l....................hph...sh...pcR.+...p...h...L..cp..hh......................................s..s..s.p...h...p.h..s....t...t......h...............s......p............s..............p...p.........h..t.......p.........h.......h.....p........p.......s....h.......p.......t...u.......h.......E..GlhhKp..............hsu...Y.....psG.....+R.....tsWhKlK.................................................................................................. 1 555 1043 1451 +212 PF04679 DNA_ligase_A_C ATP dependent DNA ligase C terminal region Kerrison ND anon DOMO:DM04655; Family This region is found in many but not all ATP-dependent DNA ligase enzymes (EC:6.5.1.1). It is thought to constitute part of the catalytic core of ATP dependent DNA ligase [1]. 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.41 0.72 -3.58 150 2662 2009-01-15 18:05:59 2003-04-07 12:59:11 10 60 1269 10 1353 2742 442 102.70 27 15.67 CHANGED ppsth....GuLLLusacs...........sp.......LthlG+VGTGFsspphpcLtppLp.slt.....tsps.Ph...........t........sshW.l..........cPpl...VsEVpa.s-hTts..................G.....pLRaPp.ah.tlRpDK ........................................tuhhuuhLlGsacst...........................sp.......................................hhhlu.+s.uoG.ao-ppl.pp.lp..pp.Lp...sht...........hppt.sh...................................t..........tssha..l................................cPph....VhElpa.s.ph.stu...........................G...slRa.Pp.ah...+lRpDK.............................. 0 429 801 1111 +213 PF04675 DNA_ligase_A_N DNA ligase N terminus Kerrison ND anon DOMO:DM04655; Family This region is found in many but not all ATP-dependent DNA ligase enzymes (EC:6.5.1.1). It is thought to be involved in DNA binding and in catalysis. In human DNA ligase I (Swiss:P18858), and in Saccharomyces cerevisiae (Swiss:P04819), this region was necessary for catalysis, and separated from the amino terminus by targeting elements. In vaccinia virus (Swiss:P16272) this region was not essential for catalysis, but deletion decreases the affinity for nicked DNA and decreased the rate of strand joining at a step subsequent to enzyme-adenylate formation [1]. 21.40 21.40 21.40 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.99 0.71 -4.46 154 1767 2009-01-15 18:05:59 2003-04-07 12:59:11 9 63 1048 8 992 1785 366 162.80 20 23.21 CHANGED h.atpls.chhppl....pp.....souRhph.................sphlsshh...cp...h.ttp................lssslalhhspl.hPshp.sp.clGlup.phLh+.slupshs..hs.pplcp......thpp...........................hGDlu.sstphh......pppp.h.h...........psLTlp..c.VhppLpclAphsG..........ps......SppcKhphlppLhsps..ss.................tEt+allRhlhscLRlG.luctslhsAlup ........................................................h.ths.phhptl.........pt..................sstp.th.........................hp.hltphh.....pp...h.t.t........................hh.shhhhht.h.l.hs..........ht..t.................h...hsl.tp.p....h.lhc.hhsp..hhs......hs.....tthct..................phtt..........................................................hG..Dlu.tshtthh..............................pppp..h..................ssLTlp....p.V.ptLpplup...h.st......................................t..........spppph.p.h.......ltpl....hp.ps..ss.......................pEt+all.Rh.l.t.t.............c.l.R..l.G.hupphlhpAlu........................................... 1 319 572 820 +214 PF00875 DNA_photolyase DNA photolyase Bateman A, Griffiths-Jones SR anon Pfam-B_777 (release 3.0) Domain This domain binds a light harvesting cofactor. 24.70 24.70 24.80 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.76 0.71 -4.50 143 3841 2012-10-02 18:00:56 2003-04-07 12:59:11 13 21 2592 52 1233 3283 3027 163.30 23 33.49 CHANGED slhWFR.cDLRlpDNsALh....tA......hps...s..t.......llsla..lh-sp.........................ussptt..aLhcuLpsLpppL....pphG...spLhlhpG.s.......stphls....pl.sp........phs.sssVahscchpshtpppDpplpptLp.p.........slph..ppapsp.hLhp...Ptpl.......s...psu.psacVFTPFh+thhp....ph.............shss..Pp .................................lhWFR.cD.LRlpDN.ALt.....................tA............hps.....s..st................................l.ls.la..lhsPp..........t.t.......................................ustphp...alhpsLpsLppsL....tp.h...s..............hs..L...hlhp.u.s.................................shphl.....pl...sp....................................ph.....s.....ss.....pl.....a.hs.tp.h.........p.s...p.p.pR.D..p..t.l..p..p...t..hp..p.................t..s..l.th..pth.p.s.p...hlht...s.tpl...h............s......tps..p.aplFosF.hpthhp.hh.................h.h.................................................................... 0 406 772 1048 +215 PF00136 DNA_pol_B DNA polymerase family B Sonnhammer ELL anon Prosite Family This region of DNA polymerase B appears to consist of more than one structural domain, possibly including elongation, DNA-binding and dNTP binding activities. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.64 0.70 -5.71 67 6234 2012-10-02 01:06:00 2003-04-07 12:59:11 16 79 2425 290 1405 5186 2614 299.90 23 42.52 CHANGED Gpph+lhshLLctstpcshllPspppt......................................................................pptsYpGAhVl-Php.Ghapsslh....llDFuSLYPSIIhsaN.....LCaoTllts......................................................................................................thsphp.p........chhp..hh..t....hhsp..splppulLspLLcphlp......hR+th+pthtpsps.hp............phlhDtpQhAlKlssNSsYGhhGsssu.hL.shslAtolTshGRphlppTpchlcph.........h...................................shcVlYGDTDSlalphss.sh......................ptshphucchsptlsppl........hhpslcLEhEKsaptllLls.KK+YhGlhh..........t..sch.hKGl-hVR+sssphspphhppllchlhpcpsssps.......httlp............hhhchlps.......................htp.h.tttl..slsphlhoptLo..+shssYpspp...........Hlplst+htpcs...........tphPtluDRlsYVllps......................................................tptsh.hchAccsp.ah............l.lDscaYls.plhtslpplhpslhhs.................sshtpsphltthh.pp .................................................................................................................................................................................................................................................................................h.s.h.V....hp.s.......s.h.....a...t......l......hhDatuLYP.SIh.shp.............l.s.sshh..........................................................................................................................................................................................................................................................................................................................p..h.s...l...l...sth...ltphht.........................................hRpt.h.+...p......h.tt...t..............................hhs...Q....Ah.....KlhhNuh.YGhhG.s.t............t.u.....h.h...s.h...lA.s.slThhG......R.phlppscph.lct....................................................................................................................th.p.lhY..GDTDSh.hl....h.......................................................................t....t..h.t.t.h...ht...p.........................................t......phch-..th....a...........hhh.......................t....K.........K........p...Yh.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h......................................................................................................................................................................... 0 474 778 1158 +216 PF03104 DNA_pol_B_exo1 DNA_pol_B_exo; DNA polymerase family B, exonuclease domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family This domain has 3' to 5' exonuclease activity and adopts a ribonuclease H type fold. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.10 0.70 -5.57 32 3652 2012-10-03 01:22:09 2003-04-07 12:59:11 14 66 1910 166 1392 3971 1871 275.30 17 26.07 CHANGED hsp-stsshhpshshpsYFYhts.cspp..phtp......................................hhcthpsthhplchlp+.............psh.h.ts......hh+l.hss...h......plpp.h..............sthpha................................EhslshhtRahlDp.slhshsWhplpts............................ht.sphch.h.hpsl.sh.tpt......sthplhuFDIEshtttt....FP..-sppD.lltIShhh.t.s.......................hssshpphhaoltsps...............................sstlh.a..ssEhclLptahphlpphcP-llsuYNhssFDhsYlhsRsptl.......hshthp.htph..tth..............................ppppphphsGtlthDhathhpcch........phsSYpLssV ......................................................................................................................................................................h...........................................................................................................................................................................................................................................................................................................hhtl.hhtt..t.h....................tht..p...ltt.........................................thhp.ha..................................................................................................Es.sl..................+ahh-p....t....lh.....s.......s.W.h..php.t.................................................................................p...h...h.........t...p.h.....s.h...t.................................sshp.h....h.uh..DI.E.s.ttp.t..........................t...p...lh.t..I..u..........................................................httshtp.hh.h.h.l..t.s.p.s............................................................................................................shpl.....h......ssE....hpl.Lp.........thh.........t..hh....t....p...h..........c...P...D....l....l.h..GaN......l..p..FDh.h.lhpR..........s.p...t.h...........................h.t..h.t..........t..........ph.....tt.t..t..h.......................................................p.t..thh.p.htGplhhDhh.p.hhppth.........phtoapLpsl.................................................................................................. 0 451 750 1140 +217 PF04081 DNA_pol_delta_4 DNA polymerase delta, subunit 4 Wood V, Finn RD anon Pfam-B_25322 (release 7.3); Family \N 25.00 25.00 27.80 27.60 24.50 24.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.68 0.71 -3.85 3 194 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 154 0 132 178 1 116.20 30 71.61 CHANGED sG+KssI+DVaPVVVRcEusQuHp.K...uEpuP..............h+p-.ELu.tlEEP.......WNQIcuERhuEsl...HsEslTclEhlLRaFDlou+YGPClGITRLQRWpRAKphGLNPP.EVhpVLhL+EGDsEsRh+ESLaH .....................................................................................................tt.tt.................................................................................................................................................................pt..sthE.clLRpFDls.pYGPClGloRLcRWcRAppLGLs.PP.EVhslL.tcps.c................................ 0 38 71 100 +218 PF00772 DnaB DnaB-like helicase N terminal domain Bateman A anon Pfam-B_1000 (release 2.1) Domain The hexameric helicase DnaB unwinds the DNA duplex at the Escherichia coli chromosome replication fork. Although the mechanism by which DnaB both couples ATP hydrolysis to translocation along DNA and denatures the duplex is unknown, a change in the quaternary structure of the protein involving dimerisation of the N-terminal domain has been observed and may occur during the enzymatic cycle. This N-terminal domain is required both for interaction with other proteins in the primosome and for DnaB helicase activity [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.18 0.72 -4.15 156 5852 2009-01-15 18:05:59 2003-04-07 12:59:11 16 42 4498 45 1143 3975 2460 101.40 33 21.41 CHANGED t.Pps...h.-AEpulLGulLh..s..sc..shsplhshL.p...s-cFapttHphIFcshhcLhpps...pslDhloltppL.....cppsplcp.h.uGhsYLspLspssso...ss.slptYAc.llp- ................................Ppsl.-AEpuVLGulhl......c......s-..th.s...p.l...t..-..h..l...p....scDFYpts.HchIF.pshhcLhpps........csl.Dh.lT.l...tppL.........pp..ps.p.L.c.p....l....G.....G...hsYL.....s.cL....s.ps.s.Po....uA...NlphYAc.IVp-............................................................. 1 376 752 973 +219 PF03796 DnaB_C DnaB-like helicase C terminal domain Bateman A, Eberhardt R anon Pfam-B_1000 (release 2.1) Domain The hexameric helicase DnaB unwinds the DNA duplex at the Escherichia coli chromosome replication fork. Although the mechanism by which DnaB both couples ATP hydrolysis to translocation along DNA and denatures the duplex is unknown, a change in the quaternary structure of the protein involving dimerisation of the N-terminal domain has been observed and may occur during the enzymatic cycle. This C-terminal domain contains an ATP-binding site and is therefore probably the site of ATP hydrolysis. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null --hand HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.57 0.70 -5.50 36 6233 2012-10-05 12:31:07 2003-04-07 12:59:11 10 50 4730 57 1194 6420 5596 259.00 43 57.26 CHANGED GlsTGah-LDphos..Ghppu-LIIlAARPuMGKTAFAlslApslAhcp..............p.tsVulFSLEMuscQLshRhluspupl.......ssppLRs.Gpls..cc-ap+lspshspLscts.laIDDosslolsplRu+sRRL+pp..tslslllIDYLQLhpust...ts-s.....RppElopISRuLKsLA+ELslPVlALSQLSRslEpRs.DKRPhLSDLRESG.............................................................................................................................................................................................................................................................................................................................................................................................................................................uIEQD.....ADlVhFlaR--hYpp................cspttt..................................hsElI.........luKp..RsGssG.............oVp.....LtFpspas+Fssltt ...................................................................................................................................................................................................................................................................................GlsTG.ap-L.D.c.........h.T..u..G...h....p.t.u-.L.....IIlA........ARPuM..GKT.sFA....lNlups.s.A...h....p.p......................p....tsVs.lF..SLE.....M.u.u...c.......Q.....l...s...h....R......h.........l...u.....u.....t......u.pl.........................st.p+..l.....R......s......G......p......L...s.............-...-...-...............W............t.....+...l......s...t....s...h....s.......p......L........p....c...t........s......l.a..I..D......D.....o...s...u........l...o....h...s..-....l..R.........u.+....s....R....R....l....t..............p...................c....................t.........s.......l.s......lI.lID.Y....L....Q....L....h.p.uss.............ts-s...........Rp...pE.l.uE...I.S...RsLK.s...LA..K....EL...plPVlALS....QLs.R..u...l.............E....p......R...............s................D.......K....R.....P....h...h.........S......D..LRE..S...G..............................................................................................................................................................................................................................................................................................................................................................................................................................................u...I.E..Q.D.....ADllh...F.lY....R....-....-..h..Y.p..p...............p...s...p.t..ps........................................................hsElI...luKpR.N.G.s.h.G.....o.Vc...LtF.tpas+Fssh..t......................................................................................................................... 0 402 793 1014 +220 PF00226 DnaJ DnaJ domain Birney E, Finn RD anon Prosite Domain DnaJ domains (J-domains) are associated with hsp70 heat-shock system and it is thought that this domain mediates the interaction. DnaJ-domain is therefore part of a chaperone (protein folding) system. The T-antigens, although not in Prosite are confirmed as DnaJ containing domains from literature [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.02 0.72 -4.23 257 27964 2012-10-01 22:35:57 2003-04-07 12:59:11 26 628 5589 64 12023 23598 6923 62.00 36 16.78 CHANGED -aYclLGlsp.........sAopp-IK+AYR+LAhpaHPD+Ntsss.......................ApccFcclscAYclLoDsp+.RptYD ............................................shYplL..Glsp.............................sA..s.....t...p.-....I....K.........+...A...YR...+L..u..h........c...a.....H.....P..D+sssss...........................................................Apc+.....F.p..........c.....l.......s....c.....AY..-........l.........L.s.D.pp...Rt.YD.................................................................... 0 4164 7147 9951 +221 PF03351 DOMON DOMON domain Aravind L, Coggill P anon Aravind L Domain The DOMON (named after dopamine beta-monooxygenase N-terminal) domain is 110-125 residues long. It is predicted to form an all beta fold with up to 11 strands and is secreted to the extracellular compartment. The beta-strand folding produces a hydrophobic pocket which appears to bind soluble haem. This is consistent with the predominant architectures where the protein is associated with cytochromes or enzymatic domains whose activity involves redox or electron transfer reactions potentially as a direct participant in the electron transfer process. The DOMON domain superfamily, of which this is just one member, shows (1) multiple hydrophobic residues that contribute to the hydrophobic core of the strands of the beta-sandwich, and small residues found at the boundaries of strands and loops, (2) a strongly conserved charged residue (usually arginine/lysine) at the end of strand 9, which possibly stabilises the loop between 9 and 10, and (3) a polar residue (usually histidine, lysine or arginine), that interacts or coordinates with ligands [1]. The suggested superfamily includes both haem- and sugar-binding members: the haem-binding families being the ethyl-Benzoate dehydrogenase family EB_dh, Pfam:PF09459, the cellobiose dehydrogenase family CBDH and this family, and the sugar-binding families being the xylanases, CBM_4_9, Pfam:PF02018. The common feature of the superfamily is the 11-beta-strand structure, although the first and eleventh strands are not well conserved either within families or between families. 25.40 25.40 25.50 25.50 25.30 25.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.66 0.71 -4.19 83 1117 2009-09-13 17:03:55 2003-04-07 12:59:11 12 71 205 0 875 1100 7 114.80 18 23.84 CHANGED ps..sphplpWphs.tspplphplp..sp.....ssasulGFSs...pst.MsssDhllshsp.s...upsplpDta..........sssts.sphD...p.....pshphhs...st.psshhphpFpRplso.....s-s.pDhtl.tsssh.plla.AhG ............................................t...th.ltaph.....t.....s.p...t.l.phplp........sp........ssaluhGhSs............stt..M.......s.su.D.hhlshhs.s........sps..h.lp-ha....................sssp..s....sphD....t..............psh.ph..hp.................s.t.....p...s.s..h..h.hhpF.p.....R.lts.........s-s..t-h.l.....stsh..hllhu...................................................................... 0 378 488 741 +222 PF04124 Dor1 Dor1-like family Wood V, Finn RD anon Pfam-B_12640 (release 7.3); Family Dor1 is involved in vesicle targeting to the yeast Golgi apparatus and complexes with a number of other trafficking proteins, which include Sec34 and Sec35 [1]. 19.80 19.80 19.80 19.90 19.70 19.70 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.94 0.70 -5.98 7 407 2012-10-03 17:31:52 2003-04-07 12:59:11 7 12 287 0 282 582 28 253.10 23 60.75 CHANGED lpcLsspsl-pLc+...c.ttLsp-ttpl.tphpsLuhsNY+shlcsAcsppshhpphss.ctphssLhppl......................scLpptspcF.ppupplsEp.....p+hsphhhpppsplh-lLElPpLMs............................pClcpu..haccAL-LpuaspRLppphsp...PllpslssplcpshtphLspLlppLcss.lpLspsl+llsYLR+h.sshscsp.LRhpFLps..p-thLps....hlp.ls.sss..hlpphIphhRsphachlhQYhulFs..-ssh........h.sp.shssstlhs.ashsphoshhphlEthl.+t...lus.lcplhhphhhht.uFthsstDhcuhhs.hhpphlhpphppslppsh- ..............................................................................................................................h.p.......lt.p.tth..phptls....tpht.hlts...tpt....th..t..h...t.h....ttp.htt.l.....t.t.l.....................................................................thtt.......t.h....th....p.t..t.p......pp.........................p.p.s....hhpp.pp..l...-....lL-.lPpLhp....................................tshpt..s.............app.u.l.p..l..ta.hp.p.L...tp.............hh........s....p..............s......lhp..........tl.....tps.t..t...h.t.hh.t.p.Llt...L.p.t......lpL....stsl..+.hlsaLR+.h....s................h............p................t................p....hp...........hhtt......ptthhtt............................................................t.s..........hl...p+hlph...hR.phas....hls.Ypsl...Fs....pt....................................t.........th...........s.tlhp...a.....p.ht.h.h...ph.lct..L.ph............t.thpplh.phhhhs.uhshhshDht..h............................................................................................................................................... 0 96 157 238 +223 PF04173 DoxD TQO small subunit DoxD Kletzin A, Studholme DJ anon [2] Family Swiss:P97207 is a subunit of the terminal quinol oxidase present in the plasma membrane of Acidianus ambivalens, with calculated molecular mass of 20.4 kDa [1]. Thiosulphate:quinone oxidoreductase (TQO) is one of the early steps in elemental sulphur oxidation. A novel TQO enzyme was purified from the thermo-acidophilic archaeon Acidianus ambivalens and shown to consist of a large subunit (DoxD) and a smaller subunit (DoxA). The DoxD- and DoxA-like two subunits are fused together in a single polypeptide in Swiss:Q8AAF0. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.96 0.71 -4.57 7 130 2012-10-02 13:32:46 2003-04-07 12:59:11 8 6 104 0 41 859 161 154.60 30 51.50 CHANGED ah..lRlslGhhahsAhlR+tlLpPuKLsPsSouYVGtKhlpFLPpu.ushKshL.hll.s.sLLhshLlhFohlEhlhGLhhllGhhTRLsulsshshuhGhhLuAhWLGoTC.DEWQIuhLhsuuuhhlhhoGutph.ulDhlLh+Khpp...t.hlhlhp.........al.Lh ................................................................h.lRhshGahahsuhhR+h...l....h...t....s.....KLsP.s.u....stalG.Kh..p......aLP.pu.....hs.......h.....+....s........hl.thll.s.sl....L..a.......th.h....l..lF.oh...l...Eh.lhG.Lh.lllGlhTR....L...s...u.......l.......s...s........h...h....L....u....h....s.......l...h........L........u........u........u.....W..h..G.s.......T...C...h.....D.......E....W.......p....l.....u......h...l..h.....h.....u.su....h.sl..h..h..s..G...uGt.a...SlDhh.lh.p+ht......................hhhh............................................ 0 17 29 37 +224 PF00930 DPPIV_N DPPIV_N_term; Dipeptidyl peptidase IV (DPP IV) N-terminal region Finn RD, Bateman A anon Pfam-B_1017 (release 3.0) Family This family is an alignment of the region to the N-terminal side of the active site. The Prosite motif does not correspond to this Pfam entry. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.13 0.70 -5.86 43 2539 2012-10-05 17:30:42 2003-04-07 12:59:11 16 64 1098 252 1068 2839 947 293.20 19 41.08 CHANGED SsDtphlLlths........hpp.aRaShpusaalaDlps...........tphppLsss...........t.lphspaSPs..GptluaVhsNNlYlp...phsss..pthplTs..DG.....psslh.....NGlsDWVYEEE.huspsuhWWSPDup..........tlAahchs-otVshhphshassptt..........YPcshph+...YPKAGssN.spVpLhlhsl...psspsh......pltsss..tsp..DhYlspVsW..ssc.....sc..lhlphhsRtQsthplhhs...-h............ts.spsshhp.......p-ssssWl....chpptshhh.........pssspalhhsp.+sG....apHLhhassssp.....................t...tlTpGsW-V....hp.lhuhDtppshlYFtusc..cs....Ptp+plYplshp.......ssph..ppLosstspp........sssFSss.spaalhsapuPssPh ..................................................................................................................................................................................................................th...sst...s..........p........l........s....ah..................p......t......s.....l....ah......................t..tt..........t.htlT..........su...........hh...........................G....s.....t...h.....hh......p...............-..E..........h......t......................u.......h.....h...Wussup...................................hlhh.hphs.p..s.l..h..h....................................................t..th.........Y.Ph..........sG...t.....s....s.hp....l.hh.h.ph.........tt.t............................................h.....................t.............h...l......s..t...htW......ss......................tp..hh.h.hhsR....p..p...lhhh.....s................................................................................tp...tt.....hh.............................ppp.st..tal.........p....t.t.hhh....................................................sspph..hhh.p....+s....G.......ap....HLahhshsstt..................................................................hp.....tlT.....p.G.p.apV..........................tp...hht...hs...pp...pp....h...la...Fhusp.....pp............sh..p.....p.....p...lYp.l.shs................................ssth....ppl.o..s.p.t..spp..............................s.ss.hSs......s.....t.p.....ahl...hp.s.t.................................................................................................................................................................... 0 360 605 877 +225 PF05186 Dpy-30 Dpy-30 motif Wood V, Bateman A anon Pfam-B_13490 (release 7.7) Motif This motif is found in a wide variety of domain contexts. It is found in the Dpy-30 proteins hence the motifs name. It is about 40 residues long and is probably formed of two alpha-helices. It may be a dimerisation motif analogous to Pfam:PF02197 (Bateman A pers obs). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.99 0.72 -4.47 14 749 2012-10-01 20:11:07 2003-04-07 12:59:11 8 40 244 4 498 708 14 41.20 37 11.81 CHANGED ss+pYLspsVsPhLlpGLstlA+pRPpDPlpaLApaLh+pps ..................h+pYL.p.p.p.Vs.PsLhpGLs.pls......+..p........+P...s......DPl...c.aLApaLhcpp....... 0 197 267 394 +226 PF01414 DSL Delta serrate ligand Ponting CP, Schultz J, Bork P anon SMART Domain \N 28.90 28.90 29.50 28.90 28.40 28.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.66 0.72 -3.97 18 522 2012-10-03 09:47:55 2003-04-07 12:59:11 14 157 113 2 298 473 0 61.00 42 8.53 CHANGED Wppshasusps....clcYphRssCD-pYYGpuCspFCRPRDDtFGHaoCsppGpKhC.sGWpGp.C ..................Wpt...tsths.....plcaphRlhC.c-pYYGp..sCspaC.+P....R.D.D.h.FG.HYsCs.p.s.Gs+..sChsGWpG..C........... 0 117 141 227 +227 PF01666 DX DX module Hutter H, Bateman A anon [1] Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 6 conserved cysteines that probably form three disulphide bridges. 25.00 25.00 25.70 25.70 24.00 24.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.98 0.72 -3.54 5 33 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 5 0 33 30 0 78.20 28 17.46 CHANGED PYhTppKCospcsIPhc.t.auFCDsDTGRluILGchpl......cGs-Np-sc.RYCooN+DCoso....oVCVh........hssssupCascP .......YpTshsCssspslstsap.auFCcs-Tp+lhllGphsh......sGpch...pchp..ppCshNpDC...up.s....pV.CVh...........s.ppthCa.sP............................. 0 7 12 33 +228 PF00782 DSPc Dual specificity phosphatase, catalytic domain SMART anon Alignment kindly provided by SMART Domain Ser/Thr and Tyr protein phosphatases. The enzyme's tertiary fold is highly similar to that of tyrosine-specific phosphatases, except for a "recognition" region [2]. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.73 0.71 -4.57 24 6758 2012-10-02 20:12:17 2003-04-07 12:59:11 15 122 1439 108 3888 7101 596 125.70 20 31.76 CHANGED lYLuutssst....shhpthslshllNlstths................sphphhplP.lpD..........ppssplstahspshpFIc.ps...ppputpVLVHCtAGlSRSAolslAYLMpppsh.shs-Ahphl......+p+R.stlsPNhsFhtQLhpacpp ................................................................................................................................................tt...tlph...l.l..s.hs.....t..................................................t.t....h....p.h..h.p.l...h.D.............................................................tt....sh.......t..h....h......p...p....s....h..p..h...lc....ps...................pp..p...s........s........p.........V....l..V..H.........C....t....s...G........l.uRS........us..........l.........l....h.....A.....Y.....L....h.......p............p......t.............t.............h....s.............h...........p..-.A.h..p.h.l.....................+pp.R....s...h..h.........s.....s...ahttL......t.................................................. 0 1325 2005 2947 +229 PF00035 dsrm Double-stranded RNA binding motif Eddy SR anon Published_alignment Domain Sequences gathered for seed by HMM_iterative_training Putative motif shared by proteins that bind to dsRNA. At least some DSRM proteins seem to bind to specific RNA targets. Exemplified by Staufen, which is involved in localisation of at least five different mRNAs in the early Drosophila embryo. Also by interferon-induced protein kinase in humans, which is part of the cellular response to dsRNA. 23.00 21.00 23.00 21.00 22.90 20.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -9.21 0.72 -3.39 110 8915 2012-10-02 17:51:51 2003-04-07 12:59:11 20 142 4747 68 3095 7369 1675 65.70 27 21.06 CHANGED sts...hLpc.hspptt......hthththhtppssspt...tFts...plpl........ss.pths....pGtu.......p.sK.KpAcppAAppuLppL ...............................................KotLpE.hhp.t.pt.....................hshtap.h.lp.ppGs....s+p....p..pFss..............plpl.......................ss..pp..hs.........pGpG.............p.SK.KpAc......ppAAppALp.............. 0 830 1497 2299 +230 PF01951 Archease DUF101; Archease protein family (MTH1598/TM1083) Enright A, Ouzounis C, Bateman A, Anantharaman V anon Enright A Domain This archease family of proteins [1], has two SHS2 domains [2], with one inserted into another. It is predicted to be an enzyme [2]. It is predicted to act as a chaperone in DNA/RNA metabolism [1]. 25.00 25.00 41.10 41.10 24.60 24.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.78 0.71 -4.22 96 503 2012-10-02 11:08:51 2003-04-07 12:59:11 11 6 472 2 280 485 93 134.80 29 85.80 CHANGED acal-H..TADltlcuhGsoLcEsFpsAuhAhhslhs.-hsplc...........sc.cp...hclplp.upDh-sLLacaLsELlahhcsc.thlhpc...hclp.hs..........................................thplcupshGEphc.p+Ht..hts-lKAlTYathc............................lpp.psst..........................apspsllDl ..............achl-H..TADlt.lcuaGsol-EsFppuuhuhhshhs...D.hsplc...........st..pp.....hplchp....ucDh-sLLacaLsEllahhss.c...thlh.pc....hclp..hc.............................................thplcupshGEphsht..+H....t..sElKAlTYpthp............................lpp..ppst..............................acstlllDl...................................... 0 104 172 233 +231 PF02575 YbaB_DNA_bd DUF149; YbaB/EbfC DNA-binding family Mian N, Bateman A, Eberhardt R anon COG0718 Family This is a family of DNA-binding proteins. Members of this family form homodimers which bind DNA via a tweezer-like structure [1-3]. The conformation of the DNA is changed when bound to these proteins [3]. In bacteria, these proteins may play a role in DNA replication-recovery following DNA damage [1]. 25.20 25.20 25.20 25.30 25.00 25.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.97 0.72 -4.10 139 4505 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 4084 9 1047 2557 1477 92.10 35 83.07 CHANGED hcQApp.hQpp....hpchQccLsphplpGpuGuG....hVpVphsGctclhslpIcspll.c......-DhEhLpDLlhuAhN-Ahp+scphtpp...chsphou.Gh..shP ..................................MKQAQp.MQcc....MpchQc-L....up...hclsGpuGuG....lVpVoh....s....G....ppplpclcIcssll...-...............-DhEhLpDLlhuAhN-Ahp+s-ctppc...chuphou.Gh.t......................... 0 372 713 898 +232 PF02580 Tyr_Deacylase DUF154; D-Tyr-tRNA(Tyr) deacylase Mian N, Bateman A, Moxon SJ anon COG1490 Family This family comprises of several D-Tyr-tRNA(Tyr) deacylase proteins. Cell growth inhibition by several d-amino acids can be explained by an in vivo production of d-aminoacyl-tRNA molecules. Escherichia coli and yeast cells express an enzyme, d-Tyr-tRNA(Tyr) deacylase, capable of recycling such d-aminoacyl-tRNA molecules into free tRNA and d-amino acid. Accordingly, upon inactivation of the genes of the above deacylases, the toxicity of d-amino acids increases. Orthologues of the deacylase are found in many cells [1]. 25.00 25.00 25.50 25.50 24.10 24.10 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.71 0.71 -4.21 37 3993 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 3782 94 988 2599 777 142.60 45 94.69 CHANGED +sVlQRVppApVsV....-sc........llGpI....spGlllLlGltcs.....Dop-chchhscKllslRlF.....-D-psK.hNhSlpDls.GplLlVSQFTLhu-spKGpRPsFppuuss-pAptLY-pFsphl+pts..............................pVcsGpFGAcMpVpLsNDGPVTlll-s .........................................+sllQRVppAsVs..V..........................-uc............lsGp.I.........spG....LllLlGlpps.....................Ds....c.....p.....cs.....c.....hl...........scKlhshRlF......................pD..-...p....G..........K.....MNhSl.p-l.............s.....Gp.....lL...l................VSQ..FTLhADT.+K.Gp.R.PuFsp.....AAs....P-tAptLY-hFs.p.p.h+ppt...h............................pVpTGpFGAcM.pVs.LlNDGPVTlhL-............................................ 0 335 604 813 +233 PF02583 Trns_repr_metal DUF156; Metal-sensitive transcriptional repressor Mian N, Bateman A, Eberhardt R anon COG1937 Family This is a family of metal-sensitive repressors, involved in resistance to metal ions. Members of this family bind copper, nickel or cobalt ions via conserved cysteine and histidine residues. In the absence of metal ions, these proteins bind to promoter regions and repress transcription. When bound to metal ions they are unable to bind DNA, leading to transcriptional derepression [1-5]. 21.20 21.20 21.30 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.84 0.72 -3.79 147 4323 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 2741 7 851 2245 340 83.60 30 88.61 CHANGED tpp+ccll.pRL+RIc...GQlcGl.p+Ml.Ep...cc..Ch-lLpQluAl+uAls....pltthllcsHlcpClt.pshps.......tppp...ptlcElhphlp+h ...........p+ppll...sRL+RIcGQlcul.p+Ml.Ec.....c.c...Ct-lLpQluAl+uAls....slhtt..llcpHlp.cCls..pshpp.........tppc...pplc-hhphlpp.............................. 0 298 568 728 +234 PF02588 DUF161 Uncharacterized BCR, YitT family COG1284 Mian N, Bateman A anon Yeats C Family This is probably a bacterial ABC transporter permease (personal obs:Yeats C). 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.64 0.72 -3.68 189 12563 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 2439 0 2056 8039 1861 84.60 21 58.10 CHANGED phlhhlhGuhlhuhGhshhhtss.hssGGhssluhllp.phhs...................lshuhhhhlls.lslhlhuhhh...................................h............ph.........................................................slhollshhlhuhhl .....................hlhhlhGuh.lh.ulG.l.s.hh.htss.h..ssGG...hssl.uhllp..p..hhs...........................l.s.h.uhhh..hh...lN.hsllll.uhhh.........................................................................h............sh.................................................................................slhollshhlhuhh...................................................................................... 0 729 1389 1730 +235 PF02639 DUF188 Uncharacterized BCR, YaiI/YqxD family COG1671 Mian N, Bateman A anon COG1671 Family \N 24.30 24.30 24.30 24.70 24.10 24.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.41 0.71 -4.70 167 2006 2012-10-03 20:43:45 2003-04-07 12:59:11 9 4 1980 0 382 1203 282 129.60 39 85.25 CHANGED hlh+sApRtpl.lhlVANphlphPs...............p.shlctlhVssGhDsADshIscpspsGDlVlTuDIPLAucllpKGuhslsP+GchaopcNItptLshRchMpclRsu..Gh.......o.GGPssauppDRppFtssLD+hlt+ ...............................lh+sAcRhp.l.lslVANps..h.t.s.ss.............................p..phlcslhV.ss.GhDsADpcIVppscsG..D.....l..VITtDIPLAutllcKGu.hsLsP+GchYosssIcptLshRshhsplRtu.....Gh....p....TuGP.sshopcDRptFtspL-+hl................ 0 115 236 310 +236 PF02641 DUF190 Uncharacterized ACR, COG1993 Mian N, Bateman A anon COG1993 Family \N 20.50 20.50 20.50 20.50 20.20 19.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.33 0.72 -4.14 5 725 2012-10-01 21:59:08 2003-04-07 12:59:11 10 8 534 4 284 539 18 99.30 27 76.82 CHANGED lKpKLLRIYouEs-+aEGcPhYKsllc+L+E.cGIcGATVaRGIsG.YGK++clHuc-lFpLSscLPVllElVDccEsIpRsLccl+EhhKs.GLITlEcVcVh .............................t....hLplahuEp...c+......h..p..u.cP.la.c.tllchh+c.tul.A.G.AT.VhRGltG..aGp..pp.h.l.Hs..schh.pL...u..p...c.lP.l.slphVDstcclpthls.p.l.p.phh....pp....u...LlTh-.s...h................................... 0 107 189 248 +237 PF02958 EcKinase DUF227; Ecdysteroid kinase Bateman A, Eberhardt R anon Pfam-B_2081 (release 6.4) Family This family includes ecdysteroid 22-kinase, an enzyme responsible for the phosphorylation of ecdysteroids (insect growth and moulting hormones) at C-22, to form physiologically inactive ecdysteroid 22-phosphates [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -12.02 0.70 -5.27 54 1726 2012-10-02 22:05:25 2003-04-07 12:59:11 15 28 286 0 1069 4192 1235 241.30 19 65.13 CHANGED pG-NYsShhhRlplch.........p.spsppp.......hohllKs............h..tpstttphhp.phtlFppEhshYppllPchcplh.cps.............s..hphhscshhhp.....tp.p....phllhEDLs.pGapsssR.hpsLsh-cschslcKLApaHAsShshtp...p.s..htph...pGhhppthhps......pshh...pshhpshhchh.pphsththht.....p+lpplts..phhcphhphhp..........sssspasVLsHGDhWsNNlMFcYcsps...p.p-shhlDFQhspauSPuhDLhYhlhoSsp.-h.+hpph-pllphYappLhc.pLctLsa .............................................................................................................................................................thhs.hhph.h.h..................t...t.t.....p..................hshhlKh.........................................tp.......hht....t....t..h..a....t..p..Eh.....hY..pp..hhP..thtth.tt.............................h..hspshhsp.................................t............hll..h.EDL....p..........t....u...a..p......h.s.c...ht.s.h......s......hpc....h.ph.slppLAphHAhoh...shtp..........p..........................t......h........s...h.h....t..t........h.....t..t..........................t.....h..h........t..th..h.p..t..hh.p...h.h.......t..t...........t.h.........................................tth.p.t..h....t......p.h..h...p....h..h.p.hht................tttth....p...s...l....s.....HGDha..............hs..Nhha.......p.......h.........s......tt......t........................................p...........s............h...hlD...........a....Q..hsthu....s....s....shD....l....h...a....h...l...h......s...s...h....p......ph.....p...............pp...h....pt..h.l...phYaptLhp.lt..................................................... 1 287 431 878 +238 PF02995 DUF229 Protein of unknown function (DUF229) Bateman A anon Pfam-B_1566 (release 6.4) Family Members of this family are uncharacterised. They are 500-1200 amino acids in length and share a long region conservation that probably corresponds to several domains. The Go annotation for the protein indicates that it is involved in nematode larval development and has a positive regulation on growth rate. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.73 0.70 -6.08 17 540 2012-10-03 20:55:17 2003-04-07 12:59:11 12 17 54 0 424 1427 722 390.00 22 71.40 CHANGED c.hphppCspsp.hhspphs.phpphhlphpt.....hh.....tshpChY.pphtRtpst...pshhhh.phhth...hppsh.l.ss...................................s-hhpspChp.shsphh.............pDshtalpp.....ph..p.............ptpss.ptcc.SVhllGlDSlS+hphhRphP+shpalp.phsahEh.GYNKVGDNohPNLlslLoG...........hsp.thptsp....psttshDphsaIWKpFppt.GYtThauEDt..sshssFsY.....p.GFpcpPsDaYhRPhhhthEpphphhtp.ths....ChGp+.tpphlh-ahtpFh.+apsph....hFuahWssphoH-.hphssth.................DcthhpaLpphccpuhh-solllhhSDHG...........................................................hRaGphcpsh....pGhhEERhPhhhlhhPpah+..cpaPphhpNLphNpc+LoosaDlHtTLccllpLsshsctphps...t.ps.............+shSLFhPlPpp.RoCtpAsI.p+aCsCpshpplssss.....hhpphuptlVpplNchlts.........pthCpsLpLpplp ..........................................................................................................................................t.................................................................h.C.h.....h.h.........................................................t.....................................................................phh.h.C......t.............................hh..............................................t...sVhhhslDShSphp.hh.R...............p..hPcshp........al........p...p...............................s........h......h.ph..uaN...p........l.u..-soh..sNhhslhsG............................stt.th.t.h.......t.t.hDth.s.hla.ppapp.......t.GYh....Th....a....uED..............ths.h.ap..................h.GF...p..P.s.....D..aYh.....R.sh....h....h....t..hp..p......ht..............th...................C.h.s.tc..hphhh-..a.h....t............phh.tt.a....pp..................hFuh....h..a.....s...p...h....sH....s...h.p.hh.thh.............................................................Dpt...hh.phl.pp.h.pp..p.t..................................hpp..ohllhhuDH.G.......................................................................................................................................................hR......a..u.......t..h.t...p.t.................pG..h.h...E..c....p.....Ph...hh.l...hlP.hh+..............pp..h.sphhp...sLp.Npp+Ls...............os...a.Dl+tT.L.......hcll.p.htt.....t......................t.......................................ps.S.Lh....l.P.p..RsCt.ps.s..I.....p.aChC......ht..t.h....p.p............................h.thuphh...lt.......hNphh.t....................Ct.hplt...h......................................................................................................................................................................................................................... 0 173 212 373 +239 PF01697 Glyco_transf_92 DUF23; Glycosyltransferase family 92 Bashton M, Bateman A, Eberhardt R anon Pfam-B_1694 (release 4.1) Domain Members of this family act as galactosyltransferases, belonging to glycosyltransferase family 92 [1,2]. The aligned region contains several conserved cysteine residues and several charged residues that may be catalytic residues. This is supported by the inclusion of this family in the GT-A glycosyl transferase superfamily. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.81 0.70 -5.10 52 812 2012-10-03 05:28:31 2003-04-07 12:59:11 22 17 132 0 706 894 66 240.60 15 52.56 CHANGED +slslCluPlass-s....phhpalphh+lt..Gus+hhlYhts.hspphhpllcpY.pct...Gh.lplp.a..................................hp.ptph.h+spthuhsDChlp..h+ttscahuhhDlD-hlhsps..phh.....pphpphhpshts.phhthphpstthhp..tp.hssh...sh..h............................t..................................KsllcPpplsthhhHhshph.t.....................s.thhplpp..h.....cs.p.............................................................................hhpht...phhtch.hpphhhshhhpshppphht .....................................................................hhhCh.t.shhht....t....plhpalth.hphh................Gs..s+h.hh.Y....ts......h...sp.p..h..hc..l..lc....Y..pt...........G.h..l..p..l...h..sh................................................................t.p..t.th..h..hts..phhsh...sDCl..h+................p.............t......p...s.......c........ahshhDlDEhl....h..s....hp..........................p..h...t....ph.h.p...p...h...t...............t...........h........t...t......h...t........hpphhh....t..........t...t..hs..th............t....t...h...p.h.hh.th........t..t...............................................................K.hlhps.....ptl.....t..h.h.h.H..h.....sh.phh................................t.h..h..........h...Hhp...................................................................................................................................................hh.......................................................................................................................................................................................... 1 240 340 659 +241 PF02996 Prefoldin DUF232; Prefoldin subunit Bateman A, Moxon SJ anon Pfam-B_1664 (release 6.4) Family This family comprises of several prefoldin subunits. The biogenesis of the cytoskeletal proteins actin and tubulin involves interaction of nascent chains of each of the two proteins with the oligomeric protein prefoldin (PFD) and their subsequent transfer to the cytosolic chaperonin CCT (chaperonin containing TCP-1). Electron microscopy shows that eukaryotic PFD, which has a similar structure to its archaeal counterpart, interacts with unfolded actin along the tips of its projecting arms. In its PFD-bound state, actin seems to acquire a conformation similar to that adopted when it is bound to CCT [1]. 23.20 23.20 23.20 24.00 23.00 23.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.16 0.71 -4.46 37 1254 2012-10-02 17:27:01 2003-04-07 12:59:11 12 25 488 2 812 1220 90 116.60 19 50.84 CHANGED hcttlpplpsphsplppthsclcpshpslcslpp........spshchhlslusslahpupl..cssc.....lllplGsshhlEhshc-Ahchlcc+lpplpcphcplppplpplppphsphttphpphtpp ...................................................p...tphppphpplptthsc....hppsh.cslpplpp..........................tppsh.c...hhl.sl.s......ss..h.a......sp.............upl..ss..s..sc............................Vhlt...lG..ss..hhlEhshc-Ahphlcc+.....lptlp.pplcp....lp....pplpt....h....ppphp...hht...htph......................................................... 0 274 444 656 +244 PF03080 DUF239 DUF239; Glucoamylase; Domain of unknown function (DUF239) Griffiths-Jones SR anon Pfam-B_913 (release 6.4) Family This is a family of plant and bacterial proteins, a small number of which are putative carboxy-terminal peptidases (see for example Swiss:Q9XIN9). 22.00 21.50 22.40 21.90 21.70 21.30 hmmbuild --amino -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.71 0.70 -5.28 54 602 2009-01-15 18:05:59 2003-04-07 12:59:11 10 22 47 0 366 574 2 189.70 33 56.66 CHANGED aaGspAslsVasPpltp..t..paShuplal.sGs..stp...hssIpAGWtVpPplaGDspsRhasaW...cshpts.GCYNhhCs...GFV.Qsspp.lslGsslpssSs.hsst.p.htlphhlh+...D.psGNWWLp..h.ts........hlGYWPupLFsp..lsst.AshlpaGGpVhsst......st..pssP.MGSG+.Fstp.s..htcAuahpslp.llDtss...phhss......psh..hssp.p.CYslpsht.......................hGth..haYG...GP .............................................hhGspuslslap.Pplpt...s...phShuplalhsGs...ttp..............hss.IpsGWp..............V.........PphY...s...Ds..ts+hahaW....o.............t............-............sh....pts....GC.a....Nh...Cs....GFl.Qs..s.pp..hshGss.lt...shSs....hsut.p....htlplhlap.................D.tpGsWWlt....h.sst......hlG.....YWPttLFs..........lt.p....t....As.hlpaGGplhssp.....s..sss...MGSGp....astp...s...htpuuahpslp.hhstst...phhs.........h...hsppsp.CYslt.h...................t..t.h.hhaGGP................................................. 0 63 183 236 +245 PF03087 DUF241 Arabidopsis protein of unknown function Griffiths-Jones SR anon Pfam-B_1563 (release 6.5) Family This family represents a number of Arabidopsis proteins. Their functions are unknown. 23.10 23.10 23.20 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.62 0.70 -4.87 11 556 2012-10-01 23:20:42 2003-04-07 12:59:11 9 4 17 0 392 492 0 161.50 25 80.30 CHANGED huLusLpELYcslpchLchssoppthtppp....hhEchLDuSlclLDlCussRDlhspl+EplpsLQSuLRR+c..ut.....lcsclcuYls.RKpl+KEhpKhltuLKphpst.............t...ptsslsslhcpshtholslh+olhphLSs.....scs.h.hpstLhshhhhppstt.pstt...............hcsEhpplDtthpt....sppphhcclcphEhs...........lc-lEcpLEuLa+pLIpsRVSLLNI ..............................................................t.....h.tlh.thtphh.hs...st.....................h-t.h-t.l.llDhpsh.hp-.h.th+t.htphp.sl++tc.....................tspl.t..t.hht......h.+ph.p...Ktht+..h.tthtt.t.............................................hhthhtps...hsh....shh.pshh..hlst...................th.h.hsphh........p.t...................................................................................................................................................ppl.p.tlc.s...............................................ltthEttht.laRpLlpsRs...slLN....................................................................................... 0 16 176 308 +246 PF03103 DUF243 Domain of unknown function (DUF243) Bateman A anon Pfam-B_1157 (release 6.5) Family This family of uncharacterised proteins is only found in fly proteins. It is found associated with YLP motifs Pfam:PF02757 in some proteins. 25.00 25.00 51.50 46.00 19.40 23.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.57 0.72 -3.87 31 401 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 29 0 246 417 0 96.70 39 34.43 CHANGED llpKchYlHssP.--.E-......tptth.susspKpY+llFIKAPss..ssspAslthspstsEEKTllYVLsKKs-ttp.sttls.s.tsspsuKPEVaFIKY .........lpKcFYsasAP.E-s.-p.....ts......tphh....susspKsh+VlFIKuPps..sh.psA.s.lthsp.tssEp+TsIYVLsKps-.sshsp.pls...s.psspssKPEVaFlKY............. 0 45 60 165 +247 PF03140 DUF247 Plant protein of unknown function Mifsud W anon Pfam-B_1292 (release 6.5) Family The function of the plant proteins constituting this family is unknown. 24.80 24.80 24.90 25.50 24.10 24.50 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.35 0.70 -5.46 48 1014 2009-01-15 18:05:59 2003-04-07 12:59:11 10 19 25 0 634 924 0 301.50 22 82.37 CHANGED IaRVPtpl+.ch..sp.cuY.pPplVSlGPY.H+u..pppL................psMEpaKhRhLpphls+ss.............tslpphlsslpslEpcs.Rss....Ys-sss................hssc-...FlcMllLDGCFlLchhhthsp......................stsDs.lashphhhsh...l..ppDhlLLENQlPaFVLccLhphhp.................................ttpspssLsplshp....ah.............t.shthss......thhtptps............pHlLcLh+pshl......ss.tp......tsttshpp.t...........................................phlhsAs-LcpsGV+F..+ppcs.............sp........................hhD.lpF..c....pG.s......LclPplhlc-sTpplhtNLlAFEQs.psss...............sshlTuYlhFMssLlsoscDVslL.pccGllcshlGs-.....p-Vuc.hFsp.Lscs......ss.hs.hcs...salss....lhpplspappp....php.....phhApl++.paFssPWshhuh...lAAlllllhT .........................................................................................................h...h.........p....t.a..PthlulG..P.h...+tt......t.th...........................h-phKhhhhtthhtt.t.......................htthht...ht..t........ph..+t.............Ytt...............................ht.tpp.....hh.hhhhDus...Fll.hh....h.................................................................................t..................h...............l.....DhhhlENQlP....hhl.l.pt.lhth.........................................................................h....thhht......hh...................................................................................Hhh.chhh...hh..............................................................................................................................................................h.s.stpLt.t.sG.lph...t.ttt.........................................................................hhs..lph...p............tu..h....................lp..lP.l.....l.pt..st.hhhNhhAhEtt..t...........................t..hssY...h.hhs.l.lss..pDlthL..hpptl.l......p.h...h...t......s.p............pt.ssp..hhpt.lspt.............................hh..hs..............hh.t.........hht.tlpt....ah.pp.........h.............................hhh.t...hh......a...h...s...h.hhs..hhshhhh.......................................................... 0 33 377 521 +248 PF03141 Methyltransf_29 DUF248; Putative S-adenosyl-L-methionine-dependent methyltransferase Mifsud W, Moxon SJ, Eberhardt R anon Pfam-B_1462 (release 6.5) Family This family is a putative S-adenosyl-L-methionine (SAM)-dependent methyltransferase [1,2]. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.73 0.70 -6.17 22 712 2012-10-10 17:06:42 2003-04-07 12:59:11 11 15 46 0 435 2409 358 380.90 35 73.10 CHANGED -YhPChDsppshph....spcphpapERHCPs..........pcphpCLlPsP.cGYKsPlpWPcSRDhlWasNlPHs+...LsppKusQNWlphpG-hhpFPGGGTpF.pGAspYI-pluphls.......hsuplRssLDlGC.GVASFGAaLLs+sllTMShAP...+DsHEuQlQFALERGlPAhluVh...uTpRLPaPSpuFDhsHCSRChIsWppp-GhhLhElsRlLRPGGYalhSusPlh.........+pcp-.ppthcchpslscplCWchlsccsp......hsIWpKPhss.sChtp.Rpt..spPslCc.sc-sDssWYsphcsClo.hP-s.......ptsustlp.WPpRLpssPsRlps.....h.t.sh-tFcpDschWpcpVstYhclhp.hlppsclRNVMDMsAshGGFAAALtch...lWVMNVVPs.....ssssTLslIY-RGLlGsYHDWCEuFSTYPRTYDLlHAssLFShhp......pR.Csh.psILLEMDRILRPpGtlIIRDph-llscVcc.lspuh+Wcsphp-p...ccss....pEplLhspK .................................................................................................hhPC.s.......t.......ht.h..hERpCP..............t...Clls.P..sYt..............Ph.WPtS.......+.......c.h...hhtNh..sa.t...ls....p..t.ppW....h....ptp..hh.....F.P.u.u...u.o.....F...t..G.s..........t..Y...l....p.................l......t.p......hhs...........................st..th...R...s.....sLDh...GC...G...................V....A.......S....a....G...u..h...L....h.....p.....c......s....l...l..s....h...S..h......AP........p..D..t....H....p....s....Q....l......Q......F....A......L......E....R.....G.......l....P......A....h....l...u......s..h.......u..o..p....+.....L.P.a.P.u.p.s...FD..hs......H..C..u..R..C..h...l.....s.....W...t....t......................s....G..h...........h...L.h.Els.RlLRPGG...a..a..l...ho...u...s..h...............................t...........t..t.....pt..h....t.h.....s.pt....hC...Wphh....p..tp......hslapK..sp..pCh......pt........hCp..t.p.ss..s.t...sW......................h.hpsCht..s.............t....aPtRh...s....hltt........t...t.p.att.Dpph.....Wpphltthh.h...........l.........t.......sp...............hRNlhDMpAhhGGFAAA.L..h..p........lW.VMNVVP...................tt.sTLslIa-RGLl......G....hhp........DW...CEuFsT.YPRTY.DLlHAstl..hoh.h...p......p+....Cp.h...lhlEhDRILRPtGh.hllRDp.thl.plpt...hhtthpWp...s...h..h.....t......p................hh............................................................................................................................. 0 54 278 354 +249 PF03164 Mon1 DUF254; Trafficking protein Mon1 Bateman A, Wood V anon Edwards YJK Family Members of this family have been called SAND proteins [4] although these proteins do not contain a SAND domain. In Saccharomyces cerevisiae a protein complex of Mon1 and Ccz1 functions with the small GTPase Ypt7 to mediate vesicle trafficking to the vacuole [7]. The Mon1/Ccz1 complex is conserved in eukaryotic evolution and members of this family (previously known as DUF254) are distant homologues to domains of known structure that assemble into cargo vesicle adapter (AP) complexes [5]. [3] describes orthologues in Fugu rubripes. 20.40 20.40 20.50 21.20 19.60 20.30 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.53 0.70 -5.87 7 443 2009-09-11 23:32:11 2003-04-07 12:59:11 9 13 291 0 297 444 4 359.80 32 71.63 CHANGED s.hsEthptpcKplFlLSEAGKPIaopa.Gs--tlsShhGlhpAlVSahpsst...sslpShpupup+lsFLp+SPLlLVusScospS.stpLhppLthlYtQIlShLTtsplp+lFpp+.pNaDLRRLLuGoEphhcsLl......pphsps..shLhsulpslPLssohR-tlossltp.......sphcsLlFulllAt.s+LlshVRhKchh....LHstDLpLlhsLlus..ps..csuEsWsPlCLP+FNssGFhaAalua.Lss....s.ssCLlLlSscR-sFFshpss+pclhp+Lcc.pthhpsLtcshpp........staplpplG..hPpL+HFLYKs.......KpssQassPthchshpstpEpp.......RL.ulYppLHs+l+p......sRshphhhchsp+-s.................................LhAWVTssF-LYhhhs.PlsoKshslpsVpKLl+Wl+KEEsRLFIhsshoa ..........................................................................t......hptppKHhalLSpA.GKPla........o.pa....G...........s.p....p.....hl.................s.shh....Glhpsl..l....S......ah.p.s..st.......................stlp...sh.......p..u..ss......h...+hV.al..p.cuPLhLVul..............S....p...........h..........t..........po..................t.p...........Lp.....tpL.ph...lahQIlShLThsplp+lFpp+..saDLR.+hLt.Go...-thh.ssLl..................................pthsps....s...s....hl..hs.ul.ps..l...Lt..t.s...hRptlsshl...p.................................spstsllaullhst..spllsl.lp.....+.p..h........................L+PsDLpLlhshl.s.............pt..s....h.........p..............s..u...........EsWhPlCLPp.FNssGahasalsa..lp.......................................thsl....lLlSs..p..+....-..s..F.at....lpp.....h+...ppl........pt...lpp.tsshtt.....lt..puh.pt..........................................hp...h..t...l.s........hs...l.pHFl..YKs........+t.s....Qa..s.....sph......psshtp...t..ppp.................................+.Lhth..Ypp...La........s...ph+s..........................................t.t..h.......+..hhhh.....h..s...ppts..................................................................hhsWh............Ts.FEL..Y......hsh.s.....s.......s.s+.s......hhts.hpclhpWhpppcp.clFl......................................................................................................... 0 103 166 245 +250 PF03194 LUC7 DUF259; LUC7 N_terminus Mifsud W anon Pfam-B_2902 (release 6.5) Family This family contains the N terminal region of several LUC7 protein homologues and only contains eukaryotic proteins. LUC7 has been shown to be a U1 snRNA associated protein [1] with a role in splice site recognition [2]. The family also contains human and mouse LUC7 like (LUC7L) proteins [3] and human cisplatin resistance-associated overexpressed protein (CROP) [4]. 33.00 33.00 33.50 34.40 32.30 32.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.90 0.70 -4.85 8 689 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 292 0 416 616 10 242.10 34 76.98 CHANGED sDphRphLDQLMG...osRsscpp+st..l+asDccVC+uaLlshCPHDlFssT+hD.LGsCsKlH-.ph+t-YEpAs+pccah..aEh-hhchlpp.........................hlsDsD++lchu+pRLccspE-pss.sss.p..scpltslscc...........................IschLscsEsLGccGcV--u.clhcclEcL+sc+pcltc..................................................ps+sssPusu.h....................................spQKL+VCElCGAaLultDsDcRLADHFsGKhHLGYsplR-pltELccstsc.........+pc-Rcc+t......hsspRph .......................................t....tthL-pLMG...........tt.p....s.....h..s.pctp...........lpa.s.D.cV............C+.aLhshCPH-l..hs..sT............+....D......LG................C..............K.l.H...s...........t.....L+tpY....E...t...us...c.p...t....ch.........h.....aEh-...hhchLpp.........................hls-s-R+lchuccRL.....tpop...........cc.....h....s...s...t....s....s...s.p.......tcclppLscc................................Isp.hL.......tc.sE.pL.Gt...pGpV--up.ph.hpclE...pl+tc+cchcp...............................................................................................................................h....p..s.....t.h..s.....s...........................................................................................tppKLcVC-VCuAa..Lul..tDs..-..pRl....s..DHhhGKhHlGahpl..RcplpcLpcphtp......................................ppptptt.............................................................................................................................................. 0 139 215 329 +251 PF01657 Stress-antifung DUF26; Salt stress response/antifungal Bashton M, Bateman A, Eberhardt R anon Pfam-B_980 (release 4.1) Family This domain is often found in association with the kinase domains Pfam:PF00069 or Pfam:PF07714. In many proteins it is duplicated. It contains six conserved cysteines which are involved in disulphide bridges [1]. It has a role in salt stress response [2] and has antifungal activity [3]. 25.00 25.00 25.20 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -11.24 0.72 -3.70 175 2349 2009-01-15 18:05:59 2003-04-07 12:59:11 12 43 34 4 1517 2447 0 101.20 22 40.19 CHANGED hphC.....sssth.s.....sss.a.ppslpsllssLss..p.ust..............hassu.......s.......sss....pVYuls.Cps.D.l....s....s....ss..CpsC.lspuhppht....p.C......st....ppsuhlh.....hss.ChlRYs.ts.F ..................................................t...............t........sss.a..ppslp...pl.hs.s..Lss.pssss.....................ttFsssp..................s...st..sss........plYu.lspC.....p..s..D....l.....s......s.....ss....Cp..sC.lss.uhs.pl.......ph.......C.............ss.....pps..u...tlh........hsp...ChlRYp...................................... 0 213 869 1163 +252 PF03195 DUF260 Protein of unknown function DUF260 Mifsud W anon Pfam-B_2998 (release 6.5) Family \N 21.50 21.50 22.10 24.70 21.00 19.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.22 0.72 -3.73 46 709 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 35 0 506 727 0 98.50 43 47.36 CHANGED sCAACKaLRR.+Csss.ClhAPYF.....Ps..sp.s...ppFtslH+lFGsuNlsKhLppl..sspp.R.....scuhsSlhYEApuRhpDPVhGssGhlhpLppplpphps.-lshspppl ...............................sCuACKhLRR.+Csps.ClhAPYF................Ps......sp..s...p+FsslHKlFGASNlsKhLp....cl.............P..pp..R.....s-AssSlsYEA...........puRl+DPVY.GCVGhI.tLQpplpplps-Lshhpt..h............................................ 0 69 316 409 +254 PF03268 DUF267 Caenorhabditis protein of unknown function, DUF267 Mifsud W anon Pfam-B_4201 (release 6.5) Family \N 25.00 25.00 48.50 36.40 21.30 20.90 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.07 0.70 -5.60 7 50 2012-10-01 21:54:26 2003-04-07 12:59:11 9 2 6 0 49 42 0 323.90 31 88.29 CHANGED lLGsa+allKhosLDCSppu........+lpulhsplhslshlshhhhRhhhhhph-upsLohuWAEsNhFuFhulpuhshulsLauWTKsuhlspahp+LtclR.LRlpsNpc.hDpYtph+hchFlaSh.allshhupAIashlp.pKIhhussshs.shhhhh.hh..hhshahshlpLshahLlphulsREhcaFNpELEcAppsKpLpsssllpcFsaRQtcllchlp.sscpLpsasussPLFhahuLhNulalho...hhs.lsslYhIhlhh.LhulIhhshhhLhPAuhVQ-plhpTo+ILMssp-FcpSKDspVYpTYRhMlDRSh+spophhVlsuhsIs.pshphAhFlIPNls .................................................lhG.achhh+hohLDC.shhs........phpthhstlhulhllhhhhh+.hhhh.hth..pup.LShsWAEushauFhuhpuhhsuhslhsWTppshl.pa.cpLsplRhLRlpss.pp.hDsYptl+h+Ahlhsh..hhsshhupu....lashhp.p+lhhu......sspss....hhahhhshlshlshah..sslsLshYhLlpsulsRElcaFNcELccApccKpL.................ps.slLpcFshRQt-llchlphsNcpLssasshuPhFhhh......uhlNusYlso...F.hsslPslahlhLhh.lhuslhhshhhLhPsutlQcplppTucILhsscphcpspDsplapTYRlMlDRs.+scopltVlsuFslsppshstAhFhlPNls........ 0 15 20 49 +255 PF03269 DUF268 Caenorhabditis protein of unknown function, DUF268 Mifsud W anon Pfam-B_4252 (release 6.5) Family \N 21.10 21.10 22.00 27.00 20.90 20.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.14 0.71 -4.69 6 72 2012-10-10 17:06:42 2003-04-07 12:59:11 9 5 28 0 60 76 45 152.30 40 44.35 CHANGED DGhSGVVlGShpPWVEVpALppG.......sspILTVEYNs..LsIpEcF+-R..lSSIhPhDFspNacpYusoFDFAASFSSIEHsGLGRYGDPlDPIGDLREMLKIKCsLK.GGLLFLGlPlGpDAl.aNsHRIYGslRLAMMhhGFEWIsTFSG-oEpuhDLoupcL+ccsLFuhsQpTLVLRKL ........................................................pshlhuS...PhhEh.ul.pG..............AtplLolp.s...lph....psp..hooh...-Fspp..a.p.pastp.FDFsuohooIEHsGLGRYGDPlD.PhGDl+thhcl+ClLKpGG................LLFLulPlG.s.DultaNAHRIYGslR.LsMhh.Ga-hlsoaut.ppp...ph.................................................................. 0 18 24 60 +256 PF03312 DUF272 Protein of unknown function (DUF272) Mifsud W, Pollington J anon Pfam-B_3609 (release 6.5) Family This family of proteins is restricted to C.elegans and has no known function. The protein contains a ubiquitin fold. The GO annotation for the protein indicates that it has a function in nematode larval development. 25.00 25.00 25.10 27.90 23.50 24.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.81 0.71 -3.91 15 49 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 5 0 48 51 0 125.30 30 28.11 CHANGED FLWlsDpppcolh.pospasLphGHFF-GhFpcpssG.p..WpChcYl+pIctll.cGtlsss......KlplpsslppapPtsssp+.aPpsaucalGhllDsc...sKLstsCs.G+pVplptp+ls...tcpasWhVocll ............................aLWlhDpppculh....ho..........p....papLthGHFFEGhFpcptsu..+............WpC...p.....cYh+tl.pt..L..l..cGslsts......+I.lplplpp.a.pPs.sss.pc.aspshucalGcll...-tp..................s+Ls.tsss.G+...plplphtp...lt...ppsahWhVscl.................................................. 0 14 16 48 +257 PF03409 Glycoprotein DUF274; Glycoprotein_Ce; Transmembrane glycoprotein Finn RD, Pollington J anon Pfam-B_4416 (release 6.6) Family This family of proteins has some GO annotations for positive regulation of growth rate and nematode larval development. This is probably a family of membrane glycoproteins [1]. 19.50 19.50 19.50 21.00 19.30 19.20 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.11 0.70 -5.71 17 125 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 5 0 124 122 0 283.90 24 79.27 CHANGED s-sshplYl.AuuDcsphLppI...hlTssspshoLspLpss...pssG.hpsas..lsssshloTo.sssshtsLsGhIYloospQtpDss.FtVYslpss.pslshs....sspoTllhLNT.h........stPhtoShlophsQsssoslhhYtGhPtDshpphs......sphFsNPhhhps.........tsphFFssVEshpl.uLssaYl+ohs.slsFhlcstahs...hsshsTTus.sTTGhhMp..spssssh.sVNhtpDss.hs.GsSGs.lsutls..tusloVsh.ssssshppshsssp.hs..saphshhu.pshplsST..sshsGpaYlQYFshpGs..s...oooshssp.sssp..........................................................lpTTTKuusslplhhShhlhhhh.h ...................................................................................sshplYl.ApsDs..sthLppI...hhsssst.ph...........oL.pLtt.....ps.sG...p.sh...lp..s.shhlsos.sss.hptLsGhlYloo.tQhp.....ss................s.F.Vhslptt.p.lpht.......psThlhLNoth...........P...sohlophtQs.ss..h.ha.uhPtst.p..t......p.hFpNPh.h............................haFsplE.hpl.sh.haYhps..s.shph.lpstahs...................ps.hs..Touh.sTTGhhhp..s....pph.slphhp.....D.p.....hsGhsGh.l..........s...h....splsh.....t..s.....t....t..sh...tt.........h.h.h.s.pp..hplps.s..s...s.G.aah.....QYahhps............ss.s..s..............................................................................ooTt......................hh..................................................................................................................................................................................................................................... 0 26 36 124 +258 PF03357 Snf7 DUF279; SNF7; ESCRT-III; Snf7 Mifsud W, Moxon SJ, Mistry J, Wood V anon Pfam-B_1641 (release 6.6) Family This family of proteins are involved in protein sorting and transport from the endosome to the vacuole/lysosome in eukaryotic cells. Vacuoles/lysosomes play an important role in the degradation of both lipids and cellular proteins. In order to perform this degradative function, vacuoles/lysosomes contain numerous hydrolases which have been transported in the form of inactive precursors via the biosynthetic pathway and are proteolytically activated upon delivery to the vacuole/lysosome. The delivery of transmembrane proteins, such as activated cell surface receptors to the lumen of the vacuole/lysosome, either for degradation/downregulation, or in the case of hydrolases, for proper localisation, requires the formation of multivesicular bodies (MVBs). These late endosomal structures are formed by invaginating and budding of the limiting membrane into the lumen of the compartment. During this process, a subset of the endosomal membrane proteins is sorted into the forming vesicles. Mature MVBs fuse with the vacuole/lysosome, thereby releasing cargo containing vesicles into its hydrolytic lumen for degradation. Endosomal proteins that are not sorted into the intralumenal MVB vesicles are either recycled back to the plasma membrane or Golgi complex, or remain in the limiting membrane of the MVB and are thereby transported to the limiting membrane of the vacuole/lysosome as a consequence of fusion. Therefore, the MVB sorting pathway plays a critical role in the decision between recycling and degradation of membrane proteins [1]. A few archaeal sequences are also present within this family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.96 0.71 -4.87 34 2816 2012-10-03 05:15:35 2003-04-07 12:59:11 16 31 380 24 1871 2693 44 169.10 19 70.47 CHANGED cshhpLccshctlc+cpcplEpplcc.ctpl....+chtpp.....ts...........KctAhhhLKc++phEpplsphtsphssl-phthslcshpssppshsuMptuscshKsh..ppphcl-clcclM--hp-ph-htcpIpEslucshs.....sshDE--lptEL-pLtpE.h.pp...................sspLPssPossh .......................................pLcpshc..pLp+p...t...p.........pl..cpp.cp....p..tpl.........+ch..h.pp......sp..............................pptA.hh....h.h+pthp....h.c.pp.hpphh..........stts.p....l.ct.hthplp.................st.ps...........ppps...hpu.hpt....uscs....hc.ph...pp..p.h..s......l.p..........c......lpplh..c-h.p.c...p..h.ph....h....s............l..p-....h..l..ss...s....h.s...............sp.h-.....E........-...E....l......-...t...E..l.p......p..l.h..pE...ht.p.................................t.ph.s.ths....h......................................................................... 1 636 1020 1514 +259 PF03380 DUF282 Caenorhabditis protein of unknown function, DUF282 Mifsud W anon Pfam-B_2840 (release 6.6) Family \N 22.80 21.80 22.80 21.80 20.30 18.00 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.08 0.72 -4.21 29 67 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 7 0 65 80 0 37.50 43 23.36 CHANGED PCosCsK.IYDssCQGhGlPShtsaCssAu-VslsYolGs .......sCssCsp.lY-s..sCpGhGlPshhsaCsTAuElslpYolG...... 0 23 24 65 +260 PF03368 Dicer_dimer DUF283; dsRNA_bind; Dicer dimerisation domain Bateman A, Mistry J, Eberhardt R anon Bateman A Domain This domain is found in members of the Dicer protein family which function in RNA interference, an evolutionarily conserved mechanism for gene silencing using double-stranded RNA (dsRNA) molecules. It is essential for the activity of Dicer [1,2]. It is a divergent double stranded RNA-binding domain [3]. The N-terminal alpha helix of this domain is in a different orientation to that found in canonical dsRNA-binding domains. This results in a change of charge distribution at the potential dsRNA-binding surface and in the N- and C-termini of the domain being in close proximity [4]. This domain has weak dsRNA-binding activity. It mediates heterodimerisation of Dicer proteins with their respective protein partners [4]. 22.60 22.60 23.10 22.70 22.50 22.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.00 0.72 -4.21 29 491 2009-01-15 18:05:59 2003-04-07 12:59:11 9 58 217 1 308 546 1 94.40 28 5.81 CHANGED AlshLt+YCupLPpDsasphpPpaphtpts.s...........thhs.plhLP..lsusl+p.lhGp.shpsp+hAKpsAAapACptLachG.LsD+LlPl..hccphtt .............................................ulshlp+YCupL........P.s..D.t...........a....s.p.h.p..P.paphpphp..s.............................................thhs.plhLP.....hsuP..l...+p...lhG....shs........spchA+psAAhpACppL+ch...G...tLs.D.pLhPh..hpc...t.......... 0 93 165 251 +261 PF03382 DUF285 Mycoplasma protein of unknown function, DUF285 Mifsud W anon Pfam-B_2864 (release 6.6) Family This region appears distantly related to leucine rich repeats. 25.00 10.00 25.20 10.00 24.70 9.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.06 0.71 -11.96 0.71 -4.54 147 2904 2012-10-02 21:32:02 2003-04-07 12:59:11 9 141 295 0 944 3084 4291 103.50 29 50.46 CHANGED oshpthF..tss...p.......ph........s.psl.......ssWD.....TSsVTsMssMFtsAp...sF..Nps.I....u........s.W.sTSsV........ssM.stMF....tsAps..............F.NQ...sl....us............WssSsVpsMppMFps...AssF.Nps...l....u..sW..ss.usV.pshp.....tMFtsu..ps...Fsps.l......s.sW ......................................................................................................................................................................................................sl........spa..s......s.S.pV.....s.s..M.pt........M......F..t........s......s.....p..........s.h.........s.ts....l......u................s.W..s.s...S..s...V..............ssM..s..tMF..........ts.s...p.s...................Fsp......sl....us.............................W..s.....s..S..p.V.........s.....s......M.p..tM.Fts............sp.sa...s..ts..........l....s...pW..sh.....pp..h.........t.....h................................................................................................................. 0 393 623 794 +262 PF01060 DUF290 Worm_family_2; Transthyretin-like family Bateman A anon [1] Family This family called family 2 in [1], has weak similarity to transthyretin (formerly called pre-albumin) which transports thyroid hormones.\ The specific function of this protein is unknown. 24.90 24.90 24.90 25.20 24.80 24.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.84 0.72 -3.97 60 474 2009-01-15 18:05:59 2003-04-07 12:59:11 18 15 21 0 419 374 1 79.90 33 48.62 CHANGED +G+LhCsscP..spslp.V+La-c-.p........s.DchLspspT..cssGpFplpGspsEl....os.........I-P..hlpIaHcCsstsh.......Cp.+chplpIP .................pGpLhCs.spP.....ss...slp.V+La-cD.p..........................s.D..-lls.p.shT....cssGpFplpG.s..p.sEh.......ss..................I-P....hlpIaHcCsstsh................Cp.+phph.lP............................... 0 150 203 419 +263 PF03478 DUF295 Protein of unknown function (DUF295) Bateman A anon Pfam-B_790 (release 7.0) Family This family of proteins are found in plants. The function of the proteins is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.81 0.72 -4.60 168 1123 2009-09-11 23:18:59 2003-04-07 12:59:11 13 33 21 0 656 979 2 57.40 22 14.95 CHANGED hhclp..sl....Gc.cslFl.up.spshsss..s...pp...............u.lc.sNslY.....Fs....cs.............................shsl..asl ................hhclp..sL....G.s.csl.....Fl....Gp..s......p.uhs.hs....u.....pp......hs.......u..lc..ssslY...Fh.....cs......................................h.................................................... 0 78 186 369 +264 PF03556 Cullin_binding DUF298; Cullin binding Finn RD, Mistry J, Wood V, Eberhardt R anon Pfam-B_3021 (release 7.0) Family This domain binds to cullins and to Rbx-1, components of an E3 ubiquitin ligase complex for neddylation [1-3]. Neddylation is the process by which the C-terminal glycine of the ubiquitin-like protein Nedd8 is covalently linked to lysine residues in a protein through an isopeptide bond. The structure of this domain is composed entirely of alpha helices [1,2]. 27.90 27.90 28.80 28.30 27.80 27.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.92 0.71 -3.95 46 733 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 292 14 456 685 5 115.20 37 44.82 CHANGED lpcl+pplsplcp.....cL..........ptDspt............F+clYcaoFsaup...cps.....Q+sLsl-hAlshWpLlh..........................ssp.h..............shlctWhcFL.ppp...............pc+u....................Is+DoWshhLpFspph...p.tshosYD.--uAWPsllD-FVEahc ................................................................................pclpspl.spLcppL......................p...-.ssp...........F+chYpasFsau+.....pts.......Q+uL..cl-hAlshWpLlL............................stp..a..........shlshWhpFL.ppp...........................ph+s...........................Is+DpWs.lL-Fsppl.......s..s-hSNYD..-...-...........G.A.WPsLlD-FVEah.............................. 0 137 223 342 +265 PF03619 Solute_trans_a DUF300; Organic solute transporter Ostalpha Finn RD, Eberhardt R anon Pfam-B_3382 (release 7.0) Family This family is a transmembrane organic solute transport protein. In vertebrates these proteins form a complex with Ostbeta, and function as bile transporters [1]. In plants they may transport brassinosteroid-like compounds and act as regulators of cell death [2]. 20.70 20.70 20.80 21.10 20.50 20.60 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.75 0.70 -5.08 55 925 2009-01-15 18:05:59 2003-04-07 12:59:11 11 17 295 0 664 890 27 242.80 29 59.14 CHANGED sphshh..luuhhslhAhhlShatIhpHLhpYppPppQRhllR.........ILhMVPlYAlsSaluLlh.ptu.........hah-slR-sYEAFlIYsFhsLLhsaLGG....Ep....slhthhp.t+.shp........ashPl.................t.hh.cs.............................hhs.pahphsKtGlLQY...........................sllKPlhsllsllhphhGhY.tpuphshstuahalsllhNhSlolALYsLslFYhshcc..-LpPacPlhKFLslKsllFhSFWQGlllulLsth..Gllpsttthtt..........plusulQshLlClEMhhhAlsHhaAFshpsY .................................................................h.....hhush.hshhshhlohh.lh...HhhtYp...p...P......................Q.+..h..ll...R.........Ilhh.....lPlY.uh..Salu..Lhh..pts.............hahsslR-sYE..Aa.slY...sFhtLhhp.alGu..............cp...........sl.ht...hp...sc..hp................psh.sh....................................hhh...h....s...............................................................................................hhhs.....thht.......hhKhu.slQa...........................s.ll+Plh..sll..s..llhph.h...G...hY..p.........-...up......h......s....hp.t..ua...halsllhNlS..........h..sh.AL....YsLhlFa.hsh..+c.............Lp...s....h.p.P..l.hKFl.s..lK.h..l.lF..h...........oaW..Qu.hhl.u.lL.th.....Gh....l.sh...t................p..lssulps.h.llClEMhhhulhthauFshpsY............................................. 2 236 372 538 +266 PF03703 bPH_2 DUF304; Bacterial PH domain Yeats C, Bateman A anon Yeats C Domain Domain found in uncharacterised family of membrane proteins. 1-3 copies found in each protein, with each copy flanked by transmembrane helices. Members of this family have a PH domain like structure [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.37 0.72 -3.96 219 4794 2012-10-04 00:02:24 2003-04-07 12:59:11 9 20 1523 0 1147 3777 643 80.00 18 42.58 CHANGED h+sht..apl..osc+.ltlpt.Ghhs..+cppplshsclpslph.p..l.RhhGh.uclslphssss....................plphhslspsccltphl ..........................h.hp...atl....pscp..lh..lpp..G.l.ht.......+c..p..thlshpR.lQslsh..pp.u.hltRhhGl..sslplpouuss...................................h.lsh.lsh.ppspplht.................................................. 0 385 845 1038 +267 PF03713 DUF305 Domain of unknown function (DUF305) Yeats C, Bateman A anon Yeats C Domain Domain found in small family of bacterial secreted proteins with no known function. Also found in Paramecium bursaria chlorella virus 1. This domain is short and found in one or two copies. The domain has a conserved HH motif that may be functionally important. This domain belongs to the ferritin superfamily. It contains two sequence similar repeats each of which is composed of two alpha helices. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.20 0.71 -4.39 86 1321 2012-10-01 21:25:29 2003-04-07 12:59:11 8 8 713 3 551 1532 484 122.80 24 68.56 CHANGED DhsFhppMlsHHpQAlcMuphsh...p+upssplcsLAppIhsuQpsEIspMpuWLptWstssssss................................................t.ttttttssMs.......GMhostphspLpsupG.............sshDphFLphMIsHHpGAlsMApstl.........p...pu.pssplp.plApsIlssQpsEIspMpphL ............................................................................a...h..ap..u.l.hs................t....th.ths.tl..tpt.pht.h..hh.................................................................................t....tth...............th..hpt........h.t...h.t.t.h..t........................sphDhhFhph.M.l.s.H.HpGAl.t..MA.p.h.t.l............p......u..ps.s.p...h.cplApp...llpsQptEIttMpth............................................................................................................................. 0 158 358 476 +268 PF03754 DUF313 Domain of unknown function (DUF313) Finn RD anon Pfam-B_2540 (release 7.0) Family Family of proteins from Arabidopsis thaliana with uncharacterised function. 24.40 24.40 24.40 24.50 24.30 24.20 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.27 0.71 -4.16 14 95 2012-10-02 12:51:43 2003-04-07 12:59:11 8 4 11 0 70 93 0 102.50 29 36.72 CHANGED pphPcWLlpsM+chpG...pcP.+LIhcKs.LhpTDl.ssspuRL...uhPhspllpsDFLT.-EpRhl.......................c-ct.sscshGlsshLV-sctpcaslpLK+WsM......pushpYsLspGWNsVVcs .................................P..lhphhpphpu...p-s..+hlhpKp.LhtoDl.ssspsRL....hPhp.plhp.p..-......FLTppEpchl..................................p.p.pt...t..pcthGltshlls........ph....pph.tlph++WpM........pts..YsL..psWp.pllp.s............ 0 21 40 47 +269 PF03761 DUF316 Domain of unknown function (DUF316) Finn RD, Bateman A, Pollington J anon Pfam-B_2972 (release 7.0) Family This family of proteins with unknown function are from Caenorhabditis elegans. The protein has GO references indicating the protein is a positive regulator of growth rate and is also involved in nematode larval development. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.91 0.70 -5.06 9 172 2012-10-02 13:45:52 2003-04-07 12:59:11 10 6 7 0 171 227 0 232.30 16 67.50 CHANGED hlhlhhhllhhshssspcLo.cENppRhpoCGpchls.ps...........ssshhsptpppsWhlhsthpstsptphh........suohISsRHllTsuplhhsscptW.h.....cpssspssCsus..pL.VP.-lLcclcl......sphpsppshptpls+AhlhshC......h..t.phttsPMllElcts.....sssshsCLuscspp.hccs-hlcsYGl......psstchhcpphslsss........s.hphsosthhsptctuGsLlpphss+hsllGhtussshtspt......sthFaslsp.hpppICchsGIC ............................................h.........................tpLs.pENt.h.tpCG.......................................................................................................ts..Whh..h...h..t...................................sushISsRHllsss..p...hh..h.......p.....t........p........t.....h....t......................t..h....t.p.Ct..t.t......h..........ls.p.p..h.h.p.p.h.h....................t................h....hp.hhhhthC......................................t....h....ll...Ehpps........................h.....t....sCl.s.......p.......p....................tp......h..p.hath......................tt...h......t...hph.t.h......................t.h.ht......t.t.....s......tp....Gus.hlt........h..s.sc...p..l.lGl...hs.tss..t..........................hah..p.hth.htpplCphsGlC.................................................................................................................................................................................................... 0 23 38 171 +270 PF03777 DUF320 Small secreted domain (DUF320) Yeats C anon Yeats C Domain Small domain found in a family of secreted streptomyces proteins. It occurs singly or as a pair. Many of the domains have two cysteines that may form a disulphide bridge. 20.20 20.20 22.10 20.50 19.90 19.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -8.88 0.72 -4.20 17 305 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 44 0 123 327 0 60.30 52 54.99 CHANGED usuusAtu..susApGuAssSPGVlSGNsVQlPVHVPVNVCGNTVsVlGlLNPAhGNtCsNs .................s..uuhAtu..suuA.pG.....s.As.u..SPG....VlSG..NsVQlPVcVPVNVCGNoVsVlGlLNPAaGNsCsN.............. 0 47 99 123 +271 PF03860 DUF326 Domain of Unknown Function (DUF326) Yeats C anon Yeats C Repeat This family is a small cysteine-rich repeat. The cysteines mostly follow a C-X(2)-C-X(3)-C-X(2)-C-X(3) pattern, though they often appear at other positions in the repeat as well. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.56 0.73 -7.95 0.73 -3.88 20 528 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 287 12 108 324 6 22.60 39 31.24 CHANGED spchpppCt....chCpcCuctCppps .....pchCphCA....chCppCA-tCtch..... 0 38 77 102 +272 PF03935 SKN1 DUF338; Beta-glucan synthesis-associated protein (SKN1) Finn RD, Moxon SJ anon DOMO_DM01831 Family This family consists of the beta-glucan synthesis-associated proteins KRE6 and SKN1. Beta1,6-Glucan is a key component of the yeast cell wall, interconnecting cell wall proteins, beta1,3-glucan, and chitin. It has been postulated that the synthesis of beta1,6-glucan begins in the endoplasmic reticulum with the formation of protein-bound primer structures and that these primer structures are extended in the Golgi complex by two putative glucosyltransferases that are functionally redundant, Kre6 and Skn1. This is followed by maturation steps at the cell surface and by coupling to other cell wall macromolecules [1]. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.72 0.70 -6.35 26 512 2012-10-02 19:29:29 2003-04-07 12:59:11 10 17 128 0 415 1147 311 346.50 30 68.38 CHANGED asLhh--cEsDDaLHNPDP..csph-+p+hhh-hchhs+RuhsslhGllhLhluhlslFlshPlLTao...sssp+tss..............pshphloshpYPhLuuIR.ouLlDPDTPpsAho+pup.c.GppWpLVFSDEFNs-GRTFY-GDDQFaTAsDlHYsATpDLEWYDPDAl.TTtNGTLplRhDAFpNHsLhYRSGMlQSWNKhCFT.tGhlElSAsLPshGsluGLWPGlWTMGNLGRPGYhAoTEGlWPYSY.-uCDsGITPNQSSsDG.....................ISYLPGQRLssCTC.sGEDHPsP.....G.sGRGAPEIDllEuphssshs.........lGsASQShQlAPFDIWYhPDY...salplYsposTshNoYsGGPFQQAlSulTsLNssWY..tt..su.ttFQpYuaEYh...N-ccsG.......YlsWaVGcpsTaTlhupALtPNG..NIutRhIScEPMSlIhNLGlSsNWAhIDWtpLh..FPssMpIDYVRlYQspsphs....VsCDPs-YPTh-YIppH.ssYpNsNLTsW....ppuGY..oaPKNpLhss.C .......................................................................................................h....................................................................................................................................................................h...lD..pT...P......th.........h.......s......p.......s.........tth.L..............lhSDE..F.....p........................s.....R......o.......F..........h....G.....-..D.........h.a...p...u......c.....h.........................s.s...t..s....h.E.....h.....Y.............s............p...h...h.......s.T...............t................s.....G.......h............h..................h........p..................t..........................................................................t................t..................h..........................a.......puu................M................l...p............o.....W................N.................p.................hC..ap.....t........G..hl..Eh.............p.........hpL.........P..............s...................................s..t.......h...G....................h......WPuhW.h.G..N..Lu....Rs.h.a..uospt..h......WPaoY.ptC-.....shh...s.......................................................................+lstCss.....s....t.........t.s............uRuAPEIDhhEuth..................................hSpS..hQhu...P.............................................................................s.....p..h........s......................h........p..........p........uhSs.........ht.th.a..............................a.hathE......ah......................p.u..........lpW.h.tt...p.......a...p.....h..s..........t.u.....h......t.....s..ss..........p........s..........+.......h...........E.......P.......M...................hlhN..huh..S...sa...............t.....................................p...............l................hP....h.lDalRlYQ...........s...t...p.......lsCDP.s..a.P..T.p.aIt.t..a...t.Y..s..p......................................................................................................................... 0 198 310 400 +273 PF03990 DUF348 Domain of unknown function (DUF348) Yeats C anon Yeats C Domain This domain normally occurs as tandem repeats; however it is found as a single copy in the S. cerevisiae DNA-binding nuclear protein YCR593 (Swiss:P25357). This protein is involved in sporulation part of the SET3C complex, which is required to repress early/middle sporulation genes during meiosis ([2]). The bacterial proteins are likely to be involved in a cell wall function as they are found in conjunction with the Pfam:PF07501 domain, which is involved in various cell surface processes. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.69 0.72 -4.38 167 1769 2009-01-15 18:05:59 2003-04-07 12:59:11 9 27 678 0 449 1328 21 42.70 28 24.80 CHANGED sVslsl.-Gcpp.plh...TtusTVs-lL....pptsls.l........s.ppDplsPuhss ............Vslsl.sGcpp.plh...Tss.sTVs-hL......pptsls.l........s.spDtVsPuhs.................. 0 167 335 408 +274 PF03993 DUF349 Domain of Unknown Function (DUF349) Yeats C anon Yeats C Domain This domain is found singly or as up to five tandem repeats in a small set of bacterial proteins. There are two or three alpha-helices, and possibly a beta-strand. 21.40 21.40 21.60 21.40 20.60 21.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.62 0.72 -3.82 128 2482 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 722 0 644 2200 886 77.50 24 48.45 CHANGED -sLWpR.FcsApcta.cp+pthhpphcpppppNhptKctLlpcA.Eplts.ssc.....hpsstpph+pl.ccW+slG....plP.Rcps .................................-tLWp+.Fpsspstahct+pt.th.......pptcppppp.shptKc....tLlpc....A.c.tLss..ssc................hpsstpph+pL.h....c....cW+.s.lG....tls.+c.............................. 0 229 508 614 +275 PF04008 Adenosine_kin DUF355; Adenosine specific kinase Bateman A, Lott S, Mistry J anon COG1839 Family The structure of a member of this family from the hyperthermophilic archaeon Pyrobaculum aerophilum contains a modified histidine residue which is interpreted as stable phosphorylation [1]. In vitro binding studies confirmed that adenosine and AMP but not ADP or ATP bind to the protein [1]. 25.00 25.00 34.70 34.70 20.10 17.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.85 0.71 -4.72 34 283 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 277 26 124 228 14 154.30 59 95.54 CHANGED Vsl-hP-.ssNlIlGQoHF.IKTVEDLaEsLlsoVPul+FGlAFsEASG.pLlRhsGND-cLhclAhcNAhtIuAGHsFllhl+su.aPINVLNslKsVsEVspIFsATANPlpVIVAcTc.pGRGllGVlDGhsPhGVEs-cDhpcR+chLRc.IGYKh .....Vsl-pP-.ssNlIlGQoHF.IKTVEDLaEALVsssPul+FGlAFCEASG.RLVRaoGNDt-LlcLAscNAhsIGAGHsFlIaL+cG.aPI.NVLNslKsVPEVCpIaCATANPlpVlVApTs.pGRGllGVVDGhoPLGVEo-cDltpR+-LLRp.I.GYK......................... 0 45 78 104 +276 PF04013 Methyltrn_RNA_2 DUF358; Putative SAM-dependent RNA methyltransferase Bateman A, Eberhardt R anon COG1901 Family This family is likely to be an S-adenosyl-L-methionine (SAM)-dependent RNA methyltransferase [1]. 20.50 20.50 20.60 20.50 20.40 20.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.12 0.71 -4.77 7 215 2012-10-01 22:53:19 2003-04-07 12:59:11 7 2 203 7 89 165 66 185.90 39 94.48 CHANGED MR.FllhuscAhTssshsLcDLPGsuGRlDllCRshssAhalSHuhRcsVplallLhGsPsPP+olphcusclc...hpPDEtslAhhlp+ALpuht....tttcph.pspPGlaVsphuFEsllptlhc..hslhhLcEcGtDIpssphs....NPsFllGDHlshsccphphL-ch...sh+lSlGPhsLhssHslsllpthLD+hth ...................................MRsFll+A+sAsTsup.h..Lc-lsG.pu+h-lLs+shhsulFhupuhRcDVhlaLVLpus.....D.s.+TIphcus.-lpp..sh.-pt.lAhll+th.tust......hsc-ph+.spPGl..pVcs.hoFEtLlt-lucc..t.......pLYhhcccGcsIR-hchss......NPsFlLoDHIshsccsts.lc+lG..scKI.SLGPchLaASpClsllHNElDpt...................... 0 20 51 70 +277 PF04020 DUF360 Membrane protein of unknown function Bateman A anon COG1950 Family These proteins a predicted transmembrane proteins with probably four transmembrane spans. The function of these bacterial proteins is unknown. The sequences do not appear to contain any conserved polar residues that could form an active site. 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.72 0.72 -3.70 107 1355 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1191 0 468 1052 200 109.60 31 76.40 CHANGED h...hphllphll.sAluLhlsuhlh..s........G...lplsu.....................hhsslluAlllGllNsll+PlLh.llolPlhl....lThG........LFsh.........VINAlhlhLs.uhll.s....G.....Fpl.pu.Fh.sAlluulllollshll ..............hphllphll....sulslhlluh..lh.s.........G.....lplsu.....................hhsAlluAlllullNsll+PlLh..ll....oLPlsl....lTL....G........LFt.h.........VlNAl.hlhls.uhlls...............uFpl....su.....Fh.sAlluulllSllshll.......... 1 163 349 428 +278 PF04037 DUF382 Domain of unknown function (DUF382) Wood V, Finn RD, Moxon SJ, Sammut SJ anon Pfam-B_10232 (release 7.3); Family This domain is specific to the human splicing factor 3b subunit 2 and it's orthologues. Splicing factor 3b subunit 2 or SAP145 is a suppressor of U2 snRNA mutations. Pre-mRNA splicing is catalysed by a large ribonucleoprotein complex called the spliceosome. Spliceosomes are multi-component enzymes that catalyse pre-mRNA splicing and form step-wise by the ordered interaction of UsnRNPs and non-snRNP proteins with short conserved regions of the pre-mRNA at the 5' and 3' splice sites and branch site [1]. 21.90 21.90 22.30 24.60 21.60 21.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.57 0.71 -4.32 24 357 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 295 0 253 336 8 123.00 59 20.84 CHANGED Vp+P-lVEhaDssupDPhhLltlKup+NsVPVPpHWspKRcYLpuKRGlEK.PFpLPcaIcpTGIschRssl.-c....-scpoLKQKpRERVpPKhG+lDIDYp+L+DAFF+aQo..KPc.LopaGDl......YYEGKEh ........Vt+P-VVEhaDsoApDP+LLVpLKuhRNoVPVPpHWshKRcYLp..............GKRG.IEKPPFpLPcFIpcTGItEMRp.........Al.EK............--ppoLKpK.RE+VpPKMGKlDIDYQKLHDAFFKaQT..KPc.LThaGDl......YYEGKE........................... 0 88 143 209 +279 PF04075 DUF385 Domain of unknown function (DUF385) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of Mycobacterium tuberculosis proteins. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.68 0.71 -4.19 10 1560 2012-10-02 11:35:36 2003-04-07 12:59:11 9 13 225 27 512 1351 399 123.90 24 82.54 CHANGED s.hh.Rt.sthaRcssG.tlGpph...G.hP.hlLLpTsGR+oGpsR+TPL..htsccssp..ahlVAShGGss++PsWYhNl+AsPcVclplGscphssTA.R.ls..s--+Achathsstshsu.YssYQstTsRp.lP...VhVLps .....................................................h.............h.t..s....h.u.t...h.............s...h.......s...hhl..LpssGR.KS.GpsRpoP..L.............hh.h..c.c.s.sp.........hhl.Vuo.t...Gu.....p...p....ss....WhpNlp.A.s..P.p...s..pl....p....h.....s.s.....c.....p.....h.s...s...s.A...c.ls..ss.-.+....s.phh..h..h.t.......s...s...h...h...t........s.t.....t..ls...hh.......................................... 1 139 367 466 +280 PF04087 DUF389 Domain of unknown function (DUF389) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of hypothetical bacterial proteins with an undetermined function. 22.60 22.60 22.90 22.70 21.90 22.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.89 0.71 -4.23 127 1004 2009-09-11 12:27:41 2003-04-07 12:59:11 9 4 851 0 318 852 168 140.40 38 35.44 CHANGED MlIuPLhuPlhuhuhuhshsDhpLhp+ulhs.....lhlGhhlulhhuhlhu.hlhs.........l..p..hssElhuR..opPs....lhDlhlAlsuGsAGshuh.spp............h.usulsGVAIAsALlPPlsssGluluhu...................................p.hshuhGuhhLahsNl.luIslu ....................MLISPLMuPIlGluhululhDhcLl++Shps.....Lhltlhlullsuslah.hlsP......................ls....ssuElluR.........TsPs........lhDllIAlsuGlAGhluhppc.........ptssslsGVAIAsALhPPlsssGhulAtu....................................................s.hphhhGuhhLahlNsshIsls...................................... 2 94 200 279 +281 PF04155 Ground-like DUF398; Ground-like domain Bateman A, Moxon SJ, Burglin TR anon Pfam-B_3296 (release 7.3) Domain This family consists of the ground-like domain and is specific to C.elegans. It has been proposed that the ground-like domain containing proteins may bind and modulate the activity of Patched-like membrane molecules, reminiscent of the modulating activities of neuropeptides [1]. 35.40 35.40 35.70 36.60 31.30 35.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.44 0.72 -3.67 41 278 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 8 0 267 225 0 77.80 24 27.83 CHANGED spsspCsstcLcplhpcshpt.........pshstspctlppsspppass..papVICupscFuasspss..paCph.ppsshsChsap .................................ps.pCsstcLcplhppshp...........................sssppupctlppsscpphss...pFsVICupscFuahspss..taCph.ppsshsChsat........... 0 89 124 267 +282 PF04241 DUF423 Protein of unknown function (DUF423) Kerrison ND, Finn RD, Pollington J anon COG2363 Family This family of proteins with unknown function is a possible integral membrane protein from Caenorhabditis elegans. This family of proteins has GO references indicating the protein is involved in nematode larval development and is a positive regulator of growth rate. 25.00 25.00 25.90 25.60 23.20 22.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.16 0.72 -4.06 133 2074 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 1956 0 507 1237 289 88.50 40 68.64 CHANGED ulGAFuAH...uL......c..sh.lsschhph.a.pTuspYphhHulALl.slu....hhht.......ssphhthuuhhhhhGllLFSGSLYhluLouh.....+hlGh.lTPlGGl ........................hLGAFGAH...sL..................p.....pp..lust.....t.h.sh...h.pTGlpYQhaHuLAlLslu....lhhtp...........huhhh.thu..uhhhhhGhlLFSGSLYsLuLotl.........+hhuh.lTPlGGl............................. 0 160 296 414 +284 PF04255 DUF433 Protein of unknown function (DUF433) Kerrison ND anon COG2442 Family \N 21.40 21.40 21.40 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.31 0.72 -4.55 140 905 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 315 2 319 945 159 53.80 31 47.88 CHANGED Ish...sPslhuG+PsI+GTRlsVpsllphl.usGhoh-EILps..YPp.LstcDlhAALtYA ................IshsPphhtGpPsIcGpRlsV.tsllphl.ssGh.o.h--llpc..aPt.LshcslhsALtYh............ 0 106 251 305 +285 PF01862 PvlArgDC DUF44; Pyruvoyl-dependent arginine decarboxylase (PvlArgDC) Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family Methanococcus jannaschii contains homologues of most genes required for spermidine polyamine biosynthesis. Yet genomes from neither this organism nor any other euryarchaeon have orthologues of the pyridoxal 5'-phosphate- dependent ornithine or arginine decarboxylase genes, required to produce putrescine. Instead,these organisms have a new class of arginine decarboxylase (PvlArgDC) formed by the self-cleavage of a proenzyme into a 5-kDa subunit and a 12-kDa subunit that contains a reactive pyruvoyl group. Although this extremely thermostable enzyme has no significant sequence similarity to previously characterised proteins, conserved active site residues are similar to those of the pyruvoyl-dependent histidine decarboxylase enzyme, and its subunits form a similar (alpha-beta)(3) complex. Homologues of PvlArgDC are found in several bacterial genomes, including those of Chlamydia spp., which have no agmatine ureohydrolase enzyme to convert agmatine (decarboxylated arginine) into putrescine. In these intracellular pathogens, PvlArgDC may function analogously to pyruvoyl-dependent histidine decarboxylase; the cells are proposed to import arginine and export agmatine, increasing the pH and affecting the host cell's metabolism. Phylogenetic analysis of Pvl- ArgDC proteins suggests that this gene has been recruited from the euryarchaeal polyamine biosynthetic pathway to function as a degradative enzyme in bacteria [1]. 21.00 21.00 21.70 23.30 20.80 20.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.93 0.71 -4.64 33 246 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 222 50 144 240 51 163.40 30 93.55 CHANGED hlP++hFhTpGsGcpc.spLs.......uF-hALt-AGItphNLVpVSSIlPPps..chlscppuhthLsPGpllasVhu+..tsoscsschIuAulGhAhsp........pspaGhlsEacuhsppcc.utchucchApphhpsp....................utclhcsttlsppscl..p.ucasoslAAsValh .........hPpphFhTtGsGcuc..stLs........uF-hALhcAsItshNLVplSSllPsps...pllshpps...c.hl..Gpll.slhAp..ssos.csschIuAulGluhsp.c.......ppthGhlsEapuhsp.s.pc..stchscchspthhppp....................shclt..-hphhspptpl..p.sthsTslussshh............................................... 0 48 93 125 +286 PF04332 DUF475 Protein of unknown function (DUF475) Kerrison ND anon COG2899 Family Predicted to be an integral membrane protein with multiple membrane spans. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.00 0.70 -4.84 7 451 2012-10-03 02:02:08 2003-04-07 12:59:11 10 2 409 0 130 677 107 296.80 50 83.95 CHANGED FDNAllNAuIL+pMS.hWQKhFLTlGILIAVFGMRLlFPllIVuhoAtlsPlcshcLALps...........sspYpcllpsAHPpIAAFGGhFLLMlFLsFhhc-cc.h+WLphlE+PLs+lG+lshlssllshlhLllhuthhstsuc.ht........tVLhAGLhGllsYhlVsulsphFc.....................ss....shthuG+AGlAhFLYLElLDASFSFDGVIGAFAITsD.VlIALGLG.IGAMFVRSlTlYLV+pGTL-cYVYLEHGAHaAIGsLAlIhLlolp..aclsEllTGLlGlsaIuhuhhtSlhtNRpp .......................................................................................................................................FDNAlVNAslL+cMoshWQ+hFLTlGILIAVFGMRLVFPllIVulsA..tlsPlcAlcLAlps...........PspYppllscuHstIAAFGGhF...LL..MlFLsFhF-ccc.l+WlphlE..p...h.....u......+.lu......p......l......s..slplhlAll.....hll.lhuthlst.spp..t...........olllAGl.hG....l.lT...a.lsVpu..luph.h-st............................................t.ts.......shsss..s....+..u.G...h..u.....t.F....L....Y....L...EV...LDAS...FSFDGVIGAFAITssl..llIAlGLG.IGAMaVRSlT..laLVc.+GTLscYlYLEHGAH...Y..AIG..s....L.A.l..Ihll.shh........h....cl..P.E..l.l.T.G.L..l..Gl.s...h.Iuhuhh....oSlphN+t.t............................................................ 2 35 82 106 +287 PF04720 DUF506 Protein of unknown function (DUF506) Waterfield DI, Finn RD anon Pfam-B_4111 (release 7.5) Family Family of uncharacterised plant proteins. 25.00 25.00 26.30 25.50 24.80 19.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.48 0.70 -4.71 10 299 2012-10-11 20:44:42 2003-04-07 12:59:11 7 7 31 0 209 295 2 182.20 30 59.25 CHANGED Ls-lVpuFlE-sspt.pp.p.........pusus-uSs--s-psssu.p.tcsp-clcpLL.spshc-pp...l.scltpsscps.t..................stosh+cclsshLRp.hGYDAulCKS+WcpSsc....lPAG-YEYlDVlhsss.....t.RlllDlDFpSEFEIARsTcpYpplLppLPtVFVGps-RL+pll+llCcAAKpShKc+GlalPPWR+scYMQuKWhGsYcR ................................................................................................................................................................................................................h.....p....p.t......hh...t......pt.t.......h.tph.thhpt...........................ttsshhphlsptLpt...hGasuu.l.CcS.+W..p....p..o.sp........hP.u.G.................caEYlD...V.l.h...........t...s..p................Rlll-lcFRupF..ElA.R..soppYpt.ll.p.tLP....t.lFVGps-+LpplVplhspAu+pSh+........ppuhplPPWR+.pYMpuKWhus..R......................... 0 62 148 177 +289 PF04784 DUF547 Protein of unknown function, DUF547 Mifsud W anon Pfam-B_3926 (release 7.5) Family Family of uncharacterised proteins from C. elegans and A. thaliana. 20.20 20.20 20.30 20.20 20.00 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.49 0.71 -4.22 101 769 2009-01-15 18:05:59 2003-04-07 12:59:11 9 30 360 0 388 743 150 124.20 29 25.55 CHANGED pls.....pph.scpEpLAFWINlYNAhslchlLpph...........lp.Slp...h.................s.a.pc.................................thhsluGpthoLssIE+tILRtp...................................hs-.PRlHFAlsCuohuCPsLc..saoupplcppL-puscc.Fl ...................................................t.ls..pph.scp-plAFWINlYNAhsh.............+sh.Lphh......lsts.tch...h........................shh.pc.................................sshsl....u.......G...p..ph.ohssIEptILRs........................................................................hs-.Ph.l+FALsCuuhos.Pslc....sYoupp...lcppLcpAtccal.................................................. 0 122 263 342 +290 PF04484 DUF566 Family of unknown function (DUF566) Finn RD anon Pfam-B_3992 (release 7.5) Family Family of related proteins that is plant specific. 25.00 25.00 27.60 27.40 23.60 23.60 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.95 0.70 -4.91 18 186 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 20 0 122 170 0 289.00 27 56.20 CHANGED t.....................phuosssstspssspssutcthtst...............s.ss+spshsssupt.sossspsssSpSs........................s.hspthhuPspstss.....s..h....................sPs+tppsss..........................tsssssslh...shhsshh.+uKpstschE.csHpLRLLt..NRhLQWRFsNA+s-sshhspphsAE.ptLassWhplspLps.Vsh+RltlQhh+QclKLhtlLptQhsaL-cWstlEpcHuoSLsuAhcuLcAuTLRLPlssGApuDhpulKpAlsSAVDVMpshuSSlhpLLu.KV-thsslloELuplsspEphhLp .........................................................................................................................................................................................................................................................................................................................ttsts......................................................tsp.pth.s.pssp..ss..ss.p...h.s.s......................................................t..ss.h.s.pthh.oPsp.....tt.ss.........ss..st...h...........................sPsp.ptsss....................................................................................t.stssslh........shhs.shp..+s+t..stsp....hE.-sHpLRlLt..NRhLQWRFs.....NA+A-sshtspp.hsAE.ppLassWhphspLpcsVshKRhplQhh+pchK...Lhsl..LpsQ.......hs........hL-pWu.lEpcausuLssshcuLpAuoLRLPlssGA..........psD.hts.lppulsuAlclMpshtusltt.hhs.+spthsslls-LutlstpEphhh......................... 0 19 72 95 +291 PF04526 DUF568 Protein of unknown function (DUF568) Waterfield DI, Finn RD anon Pfam-B_4977 (release 7.5) Family Family of uncharacterised plant proteins. 25.00 25.00 25.80 25.00 21.60 20.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.40 0.72 -3.71 19 208 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 26 0 118 209 0 101.80 32 29.55 CHANGED MsGoQullAap...sus..GshpshT.slsSYss..Ltpus...LuFsVspluAp...ss............uchtIFATlpLP...sNssslNpVWQsGshst.GsshshHshSGsNlpShusLDl .......................MsGoQALlAap.......sss...Guht..lh..Ths..lsuhss.....l..ptus......l.uasls.s.luAph..ss.............uphpIaATlpLs.....sstoplNpVWQsGshs...ussPthHs.h..su.sNlpShuslDh.................................. 0 14 75 98 +292 PF04502 DUF572 Family of unknown function (DUF572) Finn RD anon Pfam-B_3967 (release 7.5) Family Family of eukaryotic proteins with undetermined function. 24.00 24.00 24.00 24.00 23.40 23.80 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.22 0.70 -5.21 18 653 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 303 0 474 629 5 286.40 27 91.35 CHANGED MuERKslNKYYPPDFD.............PuKlP+h+psK....spQhsVRhMhPFNhRCsoCspY...IhcGpKFNARKEcVtsE.YLGl.IaRFYhKCspChsEIshKTDPcNsDYshEsGAoR.sapsc..........cttp-phcphcccccp........................EptsssMctLEpRTtDu+pEh-shtsL-EL+chppR..csslshsuhLcph..tcpcccptpcp-E-Dpphhcpltht......pppcp+Rhhs-..........p-tpc-.................t.sstspsspsutspsoshhspsstppspssstpphptss...................t.+sthssllhtKKptssssppst...................................tpsttpsspsssssuLstLst....hssS--s ...................................................uERKshsKYYPPDaD....................................................sph...sht.cp.s+..................pt...hh.l.R..h.hPFsh.hCssCs..pa..........IhpGh+FNAcKcpVss...........Yh.u.h.Ia+FhhKCs.tCss.pIph+TDPcN....s....D....Yshpp.G.ApR..shc.........................................ttp...ppp.h...t....t..h.....p..c..c...c....cc.........................c.ttscsMt.tLE....pcttD..pc.pthc.........h.h.ttLp..cl.pp.hp.pc......pssh.s...hs...p...h...Lcph.............hcp.pc..c....t......h.pp.....pp...c.pcp..t..h...hpph..h......................pp.tp..p.chht................................tp................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 176 266 389 +293 PF04504 DUF573 Protein of unknown function, DUF573 Mifsud W anon Pfam-B_2087 (release 7.5) Family \N 29.50 29.50 29.90 29.50 29.40 29.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.20 0.72 -3.72 21 208 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 21 0 126 194 0 93.60 31 25.96 CHANGED thFQRl.Wo--DElslLQGhlDa.cscpGt..sshp.DhsuFa-hlKppIshcsSKsQhssKlRpLK+Kapsphp+......tsp-.sFspsHDpcsFcLu+hIWGs ...........................t..hpRl.WoccDElhlL......pGhlsa..psppGh.........ssht..Dhssha.-tl+pp.l.p.h.c.h.op.sQLh-KlRRLK+KYpsthp+........tu....tt..shppsH-pphacLu+plWs............... 0 23 58 87 +294 PF04535 DUF588 Domain of unknown function (DUF588) Bateman A anon Pfam-B_1439 (release 7.5) Domain This family of plant proteins contains a domain that may have a catalytic activity. It has a conserved arginine and aspartate that could form an active site. These proteins are predicted to contain 3 or 4 transmembrane helices. 21.10 21.10 22.30 21.40 20.80 20.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.80 0.71 -4.64 84 771 2012-10-03 17:26:12 2003-04-07 12:59:11 7 12 74 0 461 799 0 135.20 22 69.99 CHANGED ppshphsplsLRlsshshslsuhslMuospps..h...............h.hpspasshsuapahVsusulsssYsllphshshhthhh.......tsh.hhhhhhhhDtlhshLhhuAuuAAuuls.ls......ppGstpht.h....hCpthspF.Cpps.suulshuFluhlh ............................t..hthhslhLRlsshshslsuhslMusspp........................................h.tspa.sshsu...a...pa...hlsusulsssYslhphhhshhthhh................tsthhhhh..h.....hhhD................t.......l..hshLhhuuuuAAsuls..lh..............tpu.t.t.............hC..........t....h..........s..pF..Cpph.ssulshuFhuhh................................................ 1 61 279 376 +295 PF01883 DUF59 Domain of unknown function DUF59 Enright A, Ouzounis C, Bateman A, Eberhardt R anon Enright A Family This family has an alpha/beta topology, with 13 conserved hydrophobic residues at its core and a putative active site containing a highly conserved cysteine [1]. Members of this family are involved in a range of physiological functions. The family includes PaaJ (PhaH) Swiss:O84984 from Pseudomonas putida. PaaJ forms a complex with PaaG (PhaF) Swiss:O84982, PaaI (PhaG) Swiss:O84983 and PaaK (PhaI) Swiss:O84985, which hydroxylates phenylacetic acid to 2-hydroxyphenylacetic acid [2]. It also includes PaaD Swiss:P76080 from Escherichia coli, a member of a multicomponent oxygenase involved in phenylacetyl-CoA hydroxylation [3]. It is found near the N-terminus of the chloroplast scaffold protein HCF101 Swiss:Q8LD16, involved in the assembly of [4Fe-4S] clusters and their transfer to apoproteins [4]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -9.16 0.72 -3.99 74 6019 2012-10-01 19:25:19 2003-04-07 12:59:11 14 30 3431 23 1809 4321 2549 73.70 27 34.66 CHANGED cpplhsALppltsP.sG.tsllstshlcs....lslps.......spVphslpls.Psps..thpsl+ppscpslt.slsG..sspVpl ......................pplhpsL.c.pVhDP.....E....ls.hslssLGl..l..h..s.........l.p.l.cs...................spl.p......l.....s.hTl..T..s.suCP....hsshltpplppslp..tlst......ps......................................... 0 581 1153 1528 +296 PF04547 Anoctamin DUF590; Calcium-activated chloride channel Mifsud W anon Pfam-B_2735 (release 7.5) Family The family carries eight putative transmembrane domains, and, although it has no similarity to other known channel proteins, it is clearly a calcium-activated ionic channel. It is expressed in various secretory epithelia, the retina and sensory neurons, and mediates receptor-activated chloride currents in diverse physiological processes [1]. 28.00 28.00 28.60 28.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.64 0.70 -5.84 102 1393 2012-10-02 00:51:22 2003-04-07 12:59:11 7 39 257 0 866 1250 37 444.10 27 58.75 CHANGED I+sYF.....GEc...luhYFuaLsaYophLlhsAllGlhsalhthh...................................................................................................................t..hp..hsshFulhhslWuolFlEhWKR+.psplshcWshhshp.............p.p..R....spFputhh............................ths.lTs......................c..hhsthc...phh+h.lhshsllhhhlshhhshhluhhh............................hchhltphht...................hph.h.shlssll.hlhl.llshlYpplAphLTchENa+...........opspY-suhhhKhal..............................hpFlssasslFalAFhps........................................h....shpcLphplhhlhlspQlh...p.lhEhllPhlhph.......................................................................................................................hpph..............phppptpppp.......................................................................................................................pt.hhpphcp-h.phpsas..................................shh....s-YhEhhlQFGalsLFusuFPLuslhuLlsNllEl+sDthKhhp...h+RPhsp.+spsIGsW.pllphluhlulloNshllshsp................................tphpshhst...............................................................hlhhllhhE.................alhhhl+all....phhls-h.....Pptl......ptphcRpca ....................................................................................................................l+pYaGp...+luhYFuaLsh..YTthLh.sulhGlhhah.h.shh.....................................................................................................................................p......hslhFul....h.h.sl..W.....u...s..hFlE..h...WK.Rp.ps........tls.hpWshhshp.............t.cp.R...spaps.hh..............................................................hhs.hst.........................................................p.....c...h.hs.hp...ch..hhh...hhsh.shh.hhhl..hh..h......hshhhshhh......................................................hc....hhhhthht..........................................ph.hhshhsshl....hlhl........l.h..s............lY.......p.p.l.A.h.hLTph....E..+................Tp.s...pa-pphhh...Khal.......................................hpFlN...as.shFY..l.A.Fhht........................................................................h...........shhc..Lp.plhhl.h......l.spQ.hh...s.shhE...hh.hPh.h..h..ph.........................................................................................................................................................................................h.pph.........................hppt.tt.t..................................................................................................................................................................htphc.-h...pL.ts.ht...............................................shh.-..YhEh.......h....lQFGal.olF....ss.uaPL.As.lh...ALlN.NllElRhD......AhKh.sp..............h+R..........P............h................s..t.....+......s...p.....s.........I............G.........h......W...t.hhphluhlullsN.s..hl...luhsp.....................................................................tphpthhst....................................................................................................................................................h.tstlhhlllhE.................pl.hhhlphhlthhlPch....Pt.l.t.pht+.............................................................................................................................................................. 0 302 410 651 +297 PF04576 Zein-binding DUF593; Zein-binding Mifsud W, Eberhardt R anon Pfam-B_2802 (release 7.5) Family This domain binds to zein proteins, Pfam:PF01559 [1]. Zein proteins are seed storage proteins. 21.00 21.00 21.20 22.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.14 0.72 -3.86 26 261 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 22 0 164 232 0 91.70 43 16.67 CHANGED lptL+ctlctp+cshpsLhpELEcERsAuAsAAsEAMuMIhRLQcEKAslpMEApQapRhhEEptpaDpEtlphLpcllhcREc-hptLctcl- ......................h.ttL+ctlptp+cshpsLhtELEcERsAuAsAAsEA.MAMIhRLQcEKAuhpMEApQapRhhEEptpaDp-slptLp-llhcREp-hppLctcl..................... 0 23 93 133 +298 PF04642 DUF601 Protein of unknown function, DUF601 Mifsud W anon Pfam-B_5475 (release 7.5) Family This family represents a conserved region found in several uncharacterised plant proteins. 21.70 21.70 24.00 23.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.95 0.70 -5.06 4 14 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 3 0 8 16 0 185.00 38 47.09 CHANGED MuGTLEDSKLRAh-KAKp.ps-scutoRQcs...Ss.AuKsscsPTS-utossKsssuKDssKRtADKKRKpsEcDupSPsRSSRsRpEEKssu.sppKtcsKcu.sQsLV..VLSSp.SEscpSphRoo.PlPAPPhsFADhhRTLVtPGusIsPhcEhKtsN+ENYLRFAtKLGchl.EFN.sFhSHEDQL.DKDpEIESFKpsE-ENA+hV-RANpVLsRM+sAEs+VQpLElsNhDLsAKLcuGKNAY.ssI-pEspuRA-LhsCEEKh+KLEEtQAshlssARpEERRKVRAQF+DFSSKYGsFatpSEEV ...................................................................................................................................................................................................................h.th-.Elpsa+.sE--Nu+hVp+AspVhsRM+tsE.plppLElsNhDL.tKLp..........................hpth.EtQt.hl..tR............................................ 0 6 6 6 +299 PF04641 Rtf2 DUF602; Rtf2 RING-finger Mifsud W, Pollington J anon Pfam-B_5482 (release 7.5) Family It is vital for effective cell-replication that replication is not stalled at any point by, for instance, damaged bases. Replication termination factor 2 (Rtf2) stabilizes the replication fork stalled at the site-specific replication barrier RTS1 by preventing replication restart until completion of DNA synthesis by a converging replication fork initiated at a flanking origin. The RTS1 element terminates replication forks that are moving in the cen2-distal direction while allowing forks moving in the cen2-proximal direction to pass through the region. Rtf2 contains a C2HC2 motif related to the C3HC4 RING-finger motif, and would appear to fold up, creating a RING finger-like structure but forming only one functional Zn2+ ion-binding site [1]. This domain is also found at the N-terminus of peptidyl-prolyl cis-trans isomerase 4, a divergent cyclophilin family [2]. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.05 0.70 -5.19 23 575 2012-10-03 15:03:13 2003-04-07 12:59:11 7 9 286 0 402 606 6 231.20 20 66.07 CHANGED MGsDGGoI..PpRcELV+h.........+++scpl...............D.ppt.ptt+WphCuLopc.Lpc...PIVsspLGpLYNK-sllchLL-Kp....thscsss......HI+uLKDlhpLplosNPs.pttt............psspps.alCPloul-h.sGph+FhhLhsCGCVFSE+AL+-l.....K...sch......................C.hCspsas.pc............Dl...ls........lNsocE.-h-hh+tch.cccpuc......pKpcKcpK+pKpstssusstpsstsstsst.................................tt..tt..t..phtptpshsppsppscsap.........SLFsoppppK ......................................................................................................................................................................................................................aphCsL.S.p.hp................P....l..ss....GplashpsllphLh..ptt.........s..ttht.....................................+lpsl....K-lh.cL.p.hsts.................................................pcspa.h.CPls..t...pth.....sspp...+hl.hl...t..ssG....s....V..ao....tcAlc..cl......c.........sph..................................................C...hh...sspsap...cp................D.l...Is.......................lp.....s.........sp.........p.......-h..........th......h.p.p..ph...pthp..p...............tp.t...pp....t.ct..t...t..................t......t......................................................................................................................t................................................................................................................................................................. 0 152 233 342 +300 PF04652 DUF605 Vta1 like Mifsud W, Mistry J, Wood V anon Pfam-B_5537 (release 7.5) Family Vta1 (VPS20-associated protein 1) is a positive regulator of Vps4. Vps4 is an ATPase that is required in the multivesicular body (MVB) sorting pathway to dissociate the endosomal sorting complex required for transport (ESCRT). Vta1 promotes correct assembly of Vps4 and stimulates its ATPase activity through its conserved Vta1/SBP1/LIP5 region [1]. 24.70 24.70 24.70 24.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.38 0.70 -5.06 39 629 2009-09-11 15:36:18 2003-04-07 12:59:11 11 22 297 9 412 611 7 210.40 18 49.99 CHANGED pslp.alppApEh-ptc.PllAYaC+haslpphlchst..pssEsppah....hsLhDpLE...................phKpphs......sp.......cslh.scssupualEpFALclFspADpp.Ru.sp..hs+.sss+sFasAuhhh-llphF...Gp...................l....sp-stp+hKYAKa+AscItKul+pG........csPssss..hp-pcpt.............................................................................................................................................................t.sstssssssstsssp..s...........t.s.t..s....sss.PssP...........................................Pss......s.ss...sst.....st...............................................tstPsspp.s................................................sht.sscplspAQKaA+aAlSAL....sa-DlpTAhcpLppALclLs ..................................................................................................................................................................................................................................h.hu.pht.............................hhsahh.......thahhp.....h...ht.............ts.t..thh........lhp.....lE...................t.p.tt......................................................t.....hh.tah.thh..t.pt..p........t...................t.....hhp.h.ss..hhphh.t.h.......t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 142 230 338 +301 PF04678 DUF607 Protein of unknown function, DUF607 Mifsud W anon Pfam-B_5620 (release 7.5) Family This family represents a conserved region found in several uncharacterised eukaryotic proteins. 23.80 23.80 23.90 24.20 23.40 23.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.14 0.71 -4.35 41 400 2009-09-11 03:12:19 2003-04-07 12:59:11 8 11 197 0 257 399 0 179.00 27 53.31 CHANGED ht.........hpstscutchssslcsuushl.slpssV..hlp.................Ppt...............hhthlt.hhsh...t..t.pp..............hlcphppclpslpchKpphcptAc......ppsp+lhWuGluhlu.sQhullh+LTFa.-huWDlMEPVTYalohushhsuYsaFLhppc-hoYpuhhcpphst+.c+hhptpsFDlpc.......YpcLp ....................................................................................................................s........tt.tpuhphsthhptsushl..hpshh........hL.....................................Ppp.................................hhphltthhsh.............tpt...h.............plcphcppL.psLp.phKtclcptAc......ppsptlh.WuG.Luhhs..sQhGllh+LTaW..-aSW...DlMEPlTYFlThushh.s..s..YuYFlhTp.p.-hsYpshhppphhtt.c+hhppppFDlp+YppL.................................. 0 78 135 199 +302 PF04685 DUF608 Protein of unknown function, DUF608 Mifsud W anon Pfam-B_5657 (release 7.5) Family This family represents a conserved region with a pankaryotic distribution in a number of uncharacterised proteins. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.21 0.70 -5.74 46 661 2012-10-03 02:33:51 2003-04-07 12:59:11 8 24 450 3 246 857 87 284.20 22 30.49 CHANGED GcFshhEuh-h.thhsohcsphY..uShuLhhLaPcLEcolhpshspsh.pps.........................GslPHDlGhs............cpPh.tss...hp....ssttWpDLsssaVLpVYRDa..hhoG-...................................................................ps...aLcphaPsltpul-ahh..p......aDpD.sDGl.-ssst.D....................................pTYDsh...hhGsouYsuuLalAALpAuhphuchlsc.........................ttptppapphlpcu+phhcch.................LWNGc..Yat..hD....................................ptphupulhuDQLhGpa...auphhGL.....ssllsc-phcoALpslachNhh.............hhhsuchGssNuhhP.......cGp............hc.ssspspE.VWo.GlsaulAAhhl.cGhh-c.uhclscsshpphh...phGh.appsEsh...t.......hhuspYhRshuhWuhhh ...............................................................................................................................................................................................................................................................................................-........hh..hh..h.............t....................................................................................................................hh.t..ha................h.ht.....hht........t......hs.p...t..sG..h.....p..s...t.h......................................................................................................................................................................p.h....h....D..s.......h.........h.....t..G......S...u....a.........s.....u..tL...a...h.s..AL.....p...u...hhthA........chhs.c...............................p.t.t.sp.pa.pthhpps+psh-pp........LW..s...Gc......aat..h...............................................................................................s.p.ptsc.h...hs-QltGph...ahp...s.uL...........s.h..hspc.+sppAhpslhch..h............................h.s..sphGhssthh...............sst.................s...hp...................................................................................................h......................................................................................................................... 0 82 143 207 +303 PF04727 ELMO_CED12 DUF609; ELMO/CED-12 family Mifsud W anon Pfam-B_3095 (release 7.5) Family This family represents a conserved domain which is found in a number of eukaryotic proteins including CED-12, ELMO I and ELMO II. ELMO1 is a component of signalling pathways that regulate phagocytosis and cell migration and is the mammalian orthologue of the C. elegans gene, ced-12. CED-12 is required for the engulfment of dying cells and cell migration. In mammalian cells, ELMO1 interacts with Dock180 as part of the CrkII/Dock180/Rac pathway responsible for phagocytosis and cell migration. ELMO1 is ubiquitously expressed, although its expression is highest in the spleen, an organ rich in immune cells [1]. ELMO1 has a PH domain and a polyproline sequence motif at its C terminus which are not present in this alignment. 20.60 20.60 21.80 21.90 20.40 20.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.03 0.71 -4.71 60 873 2009-01-15 18:05:59 2003-04-07 12:59:11 8 24 242 0 533 801 13 171.00 26 34.37 CHANGED pphphlcphtphshDspp...............................spHpch.........LppLaphhhss........................................tphssphsp................pWcclGFQ.upsPsTDFRus..GhLGLhsLlaascpaps..thpclLppspptt...........................................................................................paPaAlsuIslTt................hlhphl..ch.sph...........stcthpsh.............................htsthpsFpcLashshhtFschWh...................pptsslh-Fsplhp ................................................................................................h...t.lcphhph.hDsps................................p..H...pch.........LhpLaphhhss.................................................p.hpshhsc..................pWc.clGFp.s....s..sP......ssDFcs.s..GhLuLtsLlYFuc..p...ats..thpc.llhcspppp........................................................................................................................................h....phPFAhsuIslTp................hlhphL.ch..sph.......................pscthpth...................................................hhsp.psFppha.shsh.hh.s.+hWh...................phts.oh.-FspVh......................................................................................................................................... 0 181 274 395 +304 PF04747 DUF612 Protein of unknown function, DUF612 Mifsud W anon Pfam-B_3614 (release 7.5) Family This family includes several uncharacterised proteins from Caenorhabditis elegans. 18.30 18.30 18.80 27.70 18.20 18.20 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -13.08 0.70 -5.60 2 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 4 0 25 45 0 198.90 36 72.46 CHANGED MRSPKSVRRPHIRQQLTNRRKNLGRVAKSQRNQFRQWLLTAVLPNSINDQRKEAFASLELTEQPQQVEKVKKSEKKKAQKQIAKDHEAEQKVNAKKAAEKEARRAEAEAKKRAAQEEEHKQWKAEQERIQKEQEKKEADLKKLQAEKKKEKAVKAEKAEKAEKTKKASTPAPVEEEIVVKKVANDRSAAPAPEPKTPTNTPAEPAEQVQEITGKKNKKNKKKSESEATAAPASVEQVVEQPKVVTEEPHQQAAPQEKKNKKNKRKSESENVPAASETPVEPVVETTPPASENQKKNKKDKKKSESEKVVEEPVQAEAPKSKKPTADDNMDFLDFVTAKEEPKDEPAETPAAPVEEVVENVVENVVEKSTTPPATENKKKNKKDKKKSESEKVTEQPVESAPAPPQVEQVVEp.......................VEcPV..APsSKKPTADDsMDFLDFVTAKP-+oEss.......h.VEss+s-ppTAssuts+KKNKKsKpKppSEp...Ess ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tts.t..ss....ss.s.....SpKPTAD.sMDFLDFVTAKs-cs-.s............h....p.....................................t.......................................................................... 0 3 9 25 +305 PF04749 PLAC8 DUF614; PLAC8 family Mifsud W, Bateman A anon Pfam-B_3635 (release 7.5) Family This family includes Swiss:Q9NZF1, the Placenta-specific gene 8 protein. 21.40 21.40 21.40 21.40 21.30 21.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.97 0.72 -3.48 114 1148 2009-01-15 18:05:59 2003-04-07 12:59:11 12 23 216 0 794 1125 22 103.20 24 44.26 CHANGED WssGLh...cCh........s.Dhs...sChhshh.sPChhhuph......uphhsttt................ssCshhshhhhhh............................h.....shhRs.plRpcasl............pu..s.sssDshhph.hC.sCuls........Qpt...RE...l ............................................................................................Wpsul.h.s..Ch..............s..D..h.s.........hC.hhshh..CP...C..h.hhuphtptlt.t....................................spChhhshhhshh...........................................................h......hh..hs.....shhRtplRp+a..sl...............................ps.....s...ss...sD..hhs.th.hC..sCulsQthRE................................................. 0 276 496 660 +306 PF04759 DUF617 Protein of unknown function, DUF617 Mifsud W anon Pfam-B_3842 (release 7.5) Family This family represents a conserved region in a number of uncharacterised plant proteins. 20.90 20.90 21.40 22.00 19.20 20.60 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.07 0.71 -4.60 23 195 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 19 0 135 184 0 163.50 47 61.51 CHANGED plTGTlF.....GaRRG+VshulQ-sspus..Psl.LlELAh.sTusLh+EM..uuGhVRIALEs-+p..sspsstt...........................LL.-EshWshYCNGRKsGYAlRR..Eso-sDhtVLchLcsVShGAGVLPu............ttsssst-G-lhYMRA+FERVVGS+DSEoFYMlNP-.......Gsu......G......PELSIFhlRs .................................lTGTlF.....GaR+G.+VphulQccs.+us.......Ps.l.LlELAh.sTusLs+EM..uuGlVRIALEC-+ppspp.s.........t..............................Ll-EslWshYCNGRKsGYAlRR...-so-sDhpVLchlcsVShGAGVLPs.................ssssus-GElhYMRA+FERVVGS+DS..EuFYMhNP-.......usu..............G....PELSIFhlRh.................................... 0 15 75 104 +307 PF04818 CTD_bind DUF618; RNA polymerase II-binding domain. Mifsud W, Eberhardt R anon Pfam-B_3687 (release 7.6) Family This domain binds to the phosphorylated C-terminal domain (CTD) of RNA polymerase II [1,2]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.19 0.72 -3.73 41 1005 2009-01-15 18:05:59 2003-04-07 12:59:11 8 25 269 19 650 939 0 63.20 28 8.67 CHANGED KLshlYLsNDVlQpu.K++...ph.pFh....psFssllssshpplhpph..spchcpplpRllslWp-Rslas ...+LshlYLhN-llp..pu...++p.............pt.pFh.............tpFppslssshpp...l...h...pph.......spcs+ppltR...l...lsl...Wpcpslat................ 0 167 294 487 +308 PF04789 DUF621 Protein of unknown function (DUF621) Waterfield DI, Finn RD, Fenech M anon Pfam-B_6219 (release 7.5) Family Family of uncharacterised proteins. Some (such as Swiss:O01625) are annotated as having possible G-protein-coupled receptor-like activity. 20.80 20.80 20.80 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.99 0.70 -5.42 14 52 2012-10-03 04:04:29 2003-04-07 12:59:11 10 4 6 0 48 64 0 200.60 40 89.57 CHANGED huElPsp-KsslYYhlloLFVlSTlsoTLLTuuFLllullLWu+FKs........MKFFWFLoQLTlSVFIlSsLNLlINVPATLFSLlTK-FlpSclFhhlSYlIDFCHYoILFSNLVIAIQRhhVFFaRpl.TsphF-S.lIYhWLl.VWlluhhlhhhhhhsNC+Ypapp........ps..p+Y.LpCpst...sslVshssPphIQl................lEhllQhGIPlhILslYlAllhKIhhMKpooLsKsEhplLKQAIFlFllFQsSSsVFLhsQTlphsssTAFLIKRhINT..............hEIhAGAATPCFFFFTSKEIRKllSoKlSAsSSQGsS ............................................................................................pt.haa.hl.hFlh..hhsolLohsFlh.h.hhhW..h+...............h+aFWFL.pLThuhFlhS..NhhlslPAsL.ulho.phhpo...............t.a.hh....ph.h..hlhsNhhhu......................................................h.h................h.h.C.........h.........h................h-.h.hQhhlPhhI.h.hhYhulhh.......+lh.h.h..Ktss.p...p.......EhhlLhQuhhlFhhFQhss..lhhhs..h....t....AFhlK+hlpT...............h.-............................................................................................ 0 14 17 48 +309 PF04802 SMK-1 DUF625; SMK-1_Ce; Component of IIS longevity pathway SMK-1 Waterfield DI, Finn RD, Wood V, Mistry J, Pollington J anon Pfam-B_6319 (release 7.5) Family SMK-1 is a component of the IIs longevity pathway which regulates aging in C.elegans. Specifically, SMK-1 influences DAF-16-dependant regulation of the aging process by regulating the transcriptional specificity of DAF-16 activity [1]. SMK-1 plays a role in longevity by modulating the transcriptional specificity of DAF-16 [1]. 21.40 21.40 21.60 21.90 19.90 20.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.26 0.71 -4.78 25 450 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 272 0 288 477 3 185.10 41 22.89 CHANGED l.scsYlp+L..lslFctsE-l.-shcsLHhLpsIlK..........sllLLNcsslhEhllu.D-hlhslVGsLEY.....DP-hPp.sK.pHRpaLpppu.+FKEVIPIpssplppKIHQTaRlQYLKDVVLs..RlLDDsshusLsohIahNps-IlshLQcDp.pFLpELFuhh..................pssssss-+++-hlhFL+-hCshu+sLQs.....s+ppha+sL ......................scsYIcKL..lpLFchCE-L.Esh-sLHpLapIlK..........uIl...h..L...N.c..ssl..hEhh.hS.D.-sIhcVlGsLEY..............DP.sh.s.p....s+....p....H.R.caLpcpu....+FKE.....................Vl..PIp...-s...p..l...h...pKIHQTYRlQYlpDllLs...pl..h--shhSsLsShIFF.N.pl-I.Vs..hL..Q..cDp...pF...Ls-LFuth........................................................................s.sp...ss.s.p..c++c-hV.Fl+-hCshup.sLQs.....s+pshacsL........................................... 0 91 148 218 +310 PF04783 DUF630 Protein of unknown function (DUF630) Kerrison ND, Eberhardt R anon Pfam-B_2481 (release 7.6) Family This region is sometimes found at the N-terminus of putative plant bZIP proteins. Its function is not known. Structural modelling suggests this domain may bind nucleic acids [1]. 21.30 21.30 21.30 21.70 20.80 19.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.85 0.72 -4.07 33 295 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 25 0 203 284 0 60.00 42 8.62 CHANGED MG.CstS+.l-.......s-........-sVthC+-R++hlKpAlctRpsLAuAHsAYhpSLRssGsALpcFspsEs ..................................MG.CstSK.l-.......p-..............-uVshC+-R++hl+pAlctRtsLAuAHsuYhpSL+ssGsALppFsps-................. 0 21 111 159 +311 PF01683 EB EB module Hutter H, Bateman A anon Hutter H Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 8 conserved cysteines that probably form four disulphide bridges. This domain is found associated with kunitz domains Pfam:PF00014. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.12 0.72 -3.90 63 858 2009-09-13 14:03:54 2003-04-07 12:59:11 13 76 43 0 729 809 1 57.00 25 22.16 CHANGED C.s...........sh....hhhsspChsp.................................................st.Gps.Cp.hspQCt....ss..otC..hs....u.....pCpCssu.........hpthsst..C ............................................................................................................................tstCh.t..................................................s.t..Gps.Cp..ts...ppCt..............ss..ShC...hs......u.........hCpCss.u.........hh....s.shC.................... 0 278 372 715 +312 PF04782 DUF632 Protein of unknown function (DUF632) Kerrison ND anon Pfam-B_2481 (release 7.6) Family This plant protein may be a leucine zipper, but there is no experimental evidence for this. 19.50 19.50 20.20 19.60 19.00 19.00 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.95 0.70 -5.33 33 381 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 30 0 243 367 1 266.10 28 43.51 CHANGED slh-llc-I-chFh+Aupuup-VochLEs....s+......h.appp.sphpt.............................uuplhpslohs...hs.ts....................htptsssttss.ts.tstoauoTL-+La.AWEKKLYcEVK.....................upEpl+hpY-KKhppLpph-t+Gscttpl-+T+sslpcLpo+lhlshpslsshSppIp+LRDcELhPQLl-LlpG.............................Lh+MWcsMhcsHchQhpllpps+..lst.....stsp..............so-tappsThpLEsplppWpssFssalssQ+sYlpuLssWLphsl........tpppt..s.s..p..sPslashCppWppsl-.cls-ctstcAlcuFssslpsl ...................................................hhphhp-lcp.F.+AupuupcVuthLEs..s+.......ht..hptp...t..t.th...........................................................ttphh.pslsht......hs.ts.............................t..s...s.h.tt...s....stshuoTL-+LhsWEKKLYcEVK....................................................stEth+hta-+Kht.Lpp.-.+G.tp....pl-+T+s.lccLpo+.lhlshpshpshoppIpclRDpELhPQLh-LlpG.............................hhpMWcsMhcpHphQhphlpphp..hs......t.....................psc.pt..ptshpLc.tlppWtssFsphlptQ+sYlpuL.suWLph..sl..........t...t......s......p.h.ssslhshsppWtttlc..plspp.......s.pulcshhtslp............................................................. 0 26 142 192 +313 PF04826 Arm_2 DUF634; Armadillo-like Kerrison ND, Eberhardt R anon Pfam-B_2700 (release 7.6) Family This domain contains armadillo-like repeats [1]. Proteins containing this domain interact with numerous other proteins, through these interactions they are involved in a wide variety of processes including carcinogenesis [2], control of cellular ageing and survival [1], regulation of circadian rhythm [3] and lysosomal sorting of G protein-coupled receptors [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.57 0.70 -5.27 11 392 2012-10-11 20:00:58 2003-04-07 12:59:11 8 14 78 0 216 672 4 217.60 29 44.07 CHANGED sssphcsh.Lssp-LcKLLslLcpTcDPhIpEhALlsLGNsAAashNQ-IIR-lGGlslIushlssssspl+.pKALNALNNLSsNsENQpclKsYlsQVC-DlhssPLNSsVQlAGL+LLsNhTVTs-YQHllss.lssFhpLLstGNscTKhplLKlLlNhScNPuMT+cLlsspVsSSLluLFspp.s+ElLLplLplFpNIscplKtcuplssppcFocuoLF.hlhp-sp.hspKlpuLssH.pDs-V+tKVl ...........................................................hsstph.pc...llt.llchopDPhIpphuhhsh.up.s.s..s......a.s.h......s.p...........sh.I+c.h..Gsl.sl.ltsh.l.s.p.s.s......s....pl+....ppA....Lpsl.s...sh.oss....h-N..pt.p..l...c...h......a..l.s.p.Vsc....-h...h.s...t..s.l.......s..S...s...l.Q..h...u...G..L+hLsph..T..........l...s..sp..h..p..p...h..l...s...s.......h...s...s.h.h...p.....L.Lst.G.st..p.....s+.hplLK.lLhN.loc.NPsh....sc..c.Lls.s..p..s...ushh.sLas.....pp.spp.llphlshhppltpth+.t......s....p.......as.tsLh..hhtp.t.....hspph..t........................................................................................................ 0 33 46 80 +314 PF04859 DUF641 Plant protein of unknown function (DUF641) Kerrison ND anon Pfam-B_6069 (release 7.6) Family Plant protein of unknown function. 23.10 23.10 23.10 24.10 22.90 22.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.60 0.71 -4.40 13 143 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 20 0 98 139 2 124.70 33 29.11 CHANGED spstspsspsh-ulluclFuslSSlKuAYhpLQpAHsPYDs-pIQuADpsVVuELcpLS-LK+tat+ppsssu.....spssthhAclpEpppLl+TYElhl++..LctElcsKDsEIcpL+pplppthtssppLEK+l ..........................t....pphtsh-ulluclFsslSulKuAYhpLQtApsPYDs-pIpuADphlVuELcpLS-LKchahcpphs.ss...............................st.sh.hhuph.pEppsll+o.YEhhsc+..LpuplptKDuElttL+pplpctt.ptspplpc+l................................ 0 14 62 80 +315 PF04884 DUF647 Vitamin B6 photo-protection and homoeostasis Mifsud W, Eberhardt R anon Pfam-B_6523 (release 7.6) Family In plants, this domain plays a role in auxin-transport, plant growth and development [1,2] and appears to be expressed by all cells in the plant as well as in plastids. The family has been shown to play a role in vitamin B6 photo-protection and homoeostasis in plants [3]. 28.30 28.30 31.30 29.20 28.20 28.20 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.45 0.70 -5.20 27 424 2012-10-10 14:40:21 2003-04-07 12:59:11 9 9 206 0 293 410 13 219.60 31 52.45 CHANGED sptshph.pthpphhshlhphFLPpGYPpSVosDYlsYtha........cslQshsSslsusLuTpAlLp.u...................................lGVGs.......ssAsssuAslpWlL+DuhGtlupILFAthh............................GotlDs-sKpaRlhADllsDhAhhl-l.loPhaPp......hhlhlhssusls+ulsuVAuGu.o+AuLotHF.AhpsNLA-lsAKssuQpTls..sLlGhhlGh.......hlsshls......shtsshhs.hhhlsslHlhsNYpAVRuVphcTLNcpRsslllppalps.upl. ..............................h.............hthhhthhlPtGaPpSVos-Yh..Ytha.....................cslQ.shsoshtusLuTpulLt.u................................................................................lGV.Gp........tsus..stuAs..lpWll+.Du.hGhlupllFuhh..............................................................G..sphDscsKp......aRh...hA...........DllpDhuhhlEl.hoP.....h.h.Pt..............hhl.lhshuslh+ul..........suVuu................uu.T+uulptHa...A.................h....p.....s...Nhu-lsAKs...............tuQpsls..slhGhh...lGh...................hlhphls........................s..hhhhs..hhhlshlHlhssapul+ul.hpTLNttRhpllhppahpp.s........................................ 0 105 187 246 +316 PF04900 Fcf1 DUF652; Fcf1 Mifsud W, Wood V, Mistry J anon Pfam-B_6634 (release 7.6) Family Fcf1 is a nucleolar protein involved in pre-rRNA processing [1]. Depletion of yeast Fcf1 and Fcf2 leads to a decrease in synthesis of the 18S rRNA and results in a deficit in 40S ribosomal subunits [1]. 21.40 21.40 21.50 21.50 20.80 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.51 0.72 -3.92 60 807 2012-10-03 20:43:45 2003-04-07 12:59:11 7 10 439 0 568 803 47 95.30 35 44.72 CHANGED hcsLhu...cspshlTpCVltELcclu..phpsulplsps.p.hpRhpCsHpss.......Ac-CIhphV..s........pc+.........YlVATpDp-L+++lR.+lPG.VPlhalp.psphhlEph ................................chLhu.csh.hlTpCVhtELEcLG....t.c....hp.hAh.....p...........lA..+...c...............t...hpc.h..p....CsHpss.....................h.A--Cllphl..p......................................ppc...........YlVAT......pDp...........cL+++lR.clPG.VPlhalp..ppphhlEt.................. 0 188 312 453 +317 PF04949 Transcrip_act DUF662; Transcriptional activator Finn RD, Eberhardt R anon Pfam-B_6952 (release 7.6) Family This family of proteins may act as a transcriptional activator. It plays a role in stress response in plants [1]. 27.20 27.20 27.20 29.60 27.10 27.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.09 0.71 -4.60 6 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 19 0 44 72 0 147.50 58 88.85 CHANGED Qh.plhsppoGulShsG.usthc-c-EEhSRSALSsFKAKEEEIER+KMEVRERVQAQLGRVEEEoKRLAhIREELEuhuDPhRKEVsslRK+IDsVNRELKPLGpTCQKKE+EYKEsLEAaNEKNKEKApLloKLhELVuESE+hRMKKLEELSKolEol ....................h.t.su.uhuh....us.thsc--EEhSRSALSsF+AKEEEIE+KKMEVREKVpsQLGRVEEETKRLAhIRE.............ELE.......uhADPhRKEVuhVRK+IDsVN+ELK.PLGpoCQKKE+EYKEsLEAFNEKN+EKspLlo+LME...LVuESE+lRhKKLEELsKsl-oh................. 0 11 27 34 +318 PF04950 DUF663 Protein of unknown function (DUF663) Moxon SJ anon Pfam-B_5147 (release 7.6) Family This family contains several uncharacterised eukaryotic proteins. 20.20 20.20 20.50 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.82 0.70 -5.45 11 954 2009-01-15 18:05:59 2003-04-07 12:59:11 7 33 507 12 524 951 15 256.70 33 35.98 CHANGED El-hs.c.sAcpchpKYRGL+uhhsSsWDssE........s.Pp-apclaphpNhcpoKpphlpp..............pt.hpsshsGtaVplhlcpVPhphhpsass....tp...sl.lsauLL.aEpKhullshpl++apthccPl+o..p-pLllphGaRRFhs................pPlaSp....sssss+pKhpRahpscpsssAThYuPlsassssllsh+p.psssp.............hplsAoGsllsssssphlhK+hlLsGHPaKIaK+suslRYMFassEDVpaFK..............................slplhT+sGR+GhIKEsl.GTH.GthKsoF-s+lpspDsVhhpLYKRV ...........................................................................................................................................................ht+hp.tl+thhsu.a-stE.........................st........c....a..pclh.p..h..t....ch....c.t.....p..+.....p.p.hlpc..........................t.h.tGhpsGh..hVcl..plcs..V.P..sphhp.php.............................Pl..hlhuLLtpEp+.hshlphplp...t..p..........p.h....c..c.s.lKo..+-.LIhphGhRRh.s....................tPlaSt.............pssssh...p+h.+ah....t.h....p.shAo.ahG.............Plsh..s...s...s...shls.a..pphp.sps...................................................thcl....huTGsllss..D.......t....sphl...sK+l...hL.sG..........h..PhK.Ih+..p..hshl.+.MF.s....-lthFc..............................shtlh.Th.t.G.hpG.IKcs............l....t...................s..............c....Ghh............+s.Fpspl..p...Dhlhhphah.h................................ 0 197 301 442 +319 PF05057 DUF676 Putative serine esterase (DUF676) Moxon SJ anon Pfam-B_5941 (release 7.7) Family This family of proteins are probably serine esterase type enzymes with an alpha/beta hydrolase fold. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.53 0.70 -5.05 14 1193 2012-10-03 11:45:05 2003-04-07 12:59:11 9 58 403 0 795 2452 363 177.00 20 25.94 CHANGED spptsHLVVhlHGl.us..........ssDh........phltpplhp......phspc..hlhhhsppsstspThsGlch.hGcRLspEllphlpctpst..........KISFluHSLGGLltRhAluhlh.t......................httlcshsFlolAoPaLGshhspss.........ptlptshuhthlspoGppL.hh..............Ds.cspsshLh+l.tptst..hhps.......LthF.............KpRllauNs.pDthsshho.....sph .......................................................................................................................t....pl..hlhhHG....l.us.................................tc..h............................................h.ht..p......h................................t......tt.......h....h....h..........................p....t......s.................p.....T..........t....s........hc.......h.....s..p...+...l.......h..p.....p......lh.p..h.l.p.t..h..t.t....................................p+.l..S...a..luHSLG.G.L.l.h.R.h.u.lshhhtp...........................................................p.....h..s..F.....l........o....l..uo.....P.H.......lGs............hh.tp.........h.........................................................................h...h................................h...shp..........................................................................t...........hh.h...............................................h..F......................p.h.hhs....D..s...........s.h.............................................................................................................................................. 1 242 449 666 +320 PF05097 DUF688 Protein of unknown function (DUF688) Moxon SJ anon Pfam-B_6331 (release 7.7) Family This family contains several uncharacterised proteins found in Arabidopsis thaliana. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.90 0.70 -5.54 7 218 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 22 0 135 189 0 203.30 14 61.22 CHANGED ME-..KpLshstPLhSlRRhsph...s...pp....p..s.hp.....hP.hcs-h......pp..s.VppPuoVPFhWEQtPG+PK............st.ts...........................................................................................tcss--p-sth.-AhDTlSpstSF.hNCS.sSGlSthttss......ss.ss-..hpspDhMMuRFLPAAKAhslcp.spasu++ps.....sh.pp.hpQltchh...........sucp+ss.s.Yc.phsss....p...hcD-Ep--ps--Dt........h..ophhup+sCGhL.PplChKsSlshLNPVsuht.t....lpssSpctsK..u+hs....t..o.pK........uhs.h.c+KLpthhp.sht.s.spphpspSpp.p.h...Sps.psSp.hht...ttth.P.acsss..S..........................hpphpc..tELhhs+sshptst..Sshh-+Tlassp.p......................................hlE ..........................................................................................................................................................h.p...su.u......VPFhWEppPGpP.K........................stt..p..s..................sP................................................................sP...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.ht.tt.................................................................................................................................................................................................. 0 23 78 104 +321 PF05108 DUF690 Protein of unknown function (DUF690) Moxon SJ anon Pfam-B_6322 (release 7.7) Family This family contains several uncharacterised bacterial membrane proteins. 19.20 19.20 22.70 20.60 18.00 18.40 hmmbuild -o /dev/null HMM SEED 483 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.67 0.70 -6.12 7 613 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 233 0 136 440 0 410.80 30 94.49 CHANGED hshshoT+hQVoGaRFlhRRlttAlshtDsRMhsDPLRtpopulslGhllslshhhGshlhuhl+PpGtlGsssllsDRsouALYVRluspLaPVLNLTSARLlsGpssNPtsVKSSELs+hPpGsLlGIPGAPtths..sssssSsWslCDolutstSp....sstsTVIAGPh-.su.ptssLpsspulL..Vsh....susTWllhsG+RutIDLsD+AVT.uLGlss....sstPpslupuLFNAlP-ss.lpsPhIPsAGuPsShGlsss......IGuVlph.......ps.ssusQYYlVLsDGlQplssssAslLRsssSaGhstPPslsPshls+hP.sppLssphaP-pPlphVD...tstsPshChhWp+sAusssuplslluGpsLPVssu....sVpLsps.t.s...sAspVhlssGtutFVsspu..ssutsotSLaaVossGVRaGlssscs............scALGLs..ss.sAPWslLpLhssGPsLSRssALLtHDTLssDs .........................................................................................ss+.QVoGapFlhRRhttulshtcs.c.Mht-....Ph+ppstulhhGhll...sllhhhGsh...lhu.hh.....p...P...s..u.....t...h.....u.....s.s...s..l..lss+pouuLYV..h........l..........s.........s.........p........L+PVhNLsSARLl.s..................G.......p.s..s..s.P..t..h..Vp...ss....pL..s..p....h..P.pGshlGIPG..APtth.s........ps.s..s..s.u..............s..Wsl.C....D......s..s.s.ts......ut................st.s.osl........ss.....s..h.p.ss......th..........t..Lt..s..sp.ul..L..lph..................sus.s.a.l.l........h...........s...........u.....p...........R.....t....l.....-....................s....s....p.......A....l.h......u.LGls...........stsp..lupulhsulPtss...s...lt....sP....t...l...s.......t..u...G..s..s..s..s...h...s.h..ss........lGsVhth....................................s...ss.s...paYllL.DG.lttl.s.h.hAtl.L.......p.....t....s......s.......u.............G............s...t....s.......t...ls..s..s......l...s....t.........h.....P.....s.....p............l......s....s..p......hP.s..t.....ph...ls.........htt.t.s.....s.hCh.Wp........s.........s...s..............t...t.......tsp...htl.....h.s.Gt.th.P.ls..s.........htls.ts.................s........sstVhlssG..t..u..t.aVt....stu................s...sts.s....t..o.h..a.h.lo-sGlRault.s..s..ps.............................hcsLG...Ls..............ss..........sPWsllp....hlssGPsLS+ssAhhthss.............................................................................................................................. 0 31 85 123 +322 PF05127 Helicase_RecD DUF699; Helicase Bateman A, Eberhardt R anon Pfam-B_884 (release 7.7) Domain This domain contains a P-loop (Walker A) motif, suggesting that it has ATPase activity, and a Walker B motif. In tRNA(Met) cytidine acetyltransferase (TmcA) it may function as an RNA helicase motor (driven by ATP hydrolysis) which delivers the wobble base to the active centre of the GCN5-related N-acetyltransferase (GNAT) domain [1]. It is found in the bacterial exodeoxyribonuclease V alpha chain (RecD), which has 5'-3' helicase activity. It is structurally similar to the motor domain 1A in other SF1 helicases [2]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.22 0.71 -4.64 106 1317 2012-10-05 12:31:07 2003-04-07 12:59:11 9 23 1199 2 421 2768 389 161.80 45 21.72 CHANGED llTAsRGRGKSuulGluhutlhtpth.......plhlTAPshpslpslapashpshp..........................ttt....htht.......ttplpahsP-shhptt.ts............-l.......LllDEAAAIPlPlLcpl...ltta...+llh...........uoTlpG....YEGoGRGFsl+Fhpt.Lcpp.......................................ts.phpplpLppPIRautsDPlEpW.l.chLlLc ............................................................................t.lTAsRGRGK.SA....h....h....G..h.h...l..u..p.h....s.up.............................hhVTAP...u...hsusc...s.L.h.p.F.u........................................................................................................................................tpph.p...F...l...u.PDsLltp.....s.....pA....................................................Dh.......LllDEA.......A.A.I.Ph.P.L..LcpL......lt.t...a.....+s.lh.....................o..oTlpG.....................YE..GTGRGF.LKFhsph......................................................hs.pl+chpLppPI.RaAtsDPLEph.lschLlhD...................................................................................................................................................................... 0 132 223 339 +323 PF05129 Elf1 DUF701; Transcription elongation factor Elf1 like Bateman A, Wood V, Mistry J anon Pfam-B_8884 (release 7.7) Domain This family of short proteins contains a putative zinc binding domain with four conserved cysteines. Swiss:P36053 has been identified as a transcription elongation factor in Saccharomyces cerevisiae [1]. 27.40 27.40 27.40 27.50 27.30 27.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.27 0.72 -4.29 37 383 2012-10-03 10:42:43 2003-04-07 12:59:11 8 4 317 1 269 345 1 79.40 41 60.55 CHANGED G+RKppp+...s.tp....+th.pLspt....FsCsFCsacpSlplpl....c+p......pthuplpCplC..stpa.psp.lstLspslDVYucWlDtscpss ..............................G+RKppp+......s.stp....KhttpLs..opFsCPFCNH-pSlpV+h......D+p..........pshGplsCplC...........tppF.pTs..Is.h.......LopPl...DVYS-WlDAC-th................ 0 89 147 218 +325 PF05197 TRIC DUF714; TRIC channel Bateman A anon Pfam-B_9855 (release 7.7) Family TRIC (trimeric intracellular cation) channels are differentially expressed in intracellular stores in animal cell types. TRIC subtypes contain three proposed transmembrane segments, and form homo-trimers with a bullet-like structure. Electrophysiological measurements with purified TRIC preparations identify a monovalent cation-selective channel [1]. 25.40 25.40 25.70 27.50 24.70 25.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.23 0.71 -4.89 13 194 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 104 0 124 182 0 184.70 41 61.19 CHANGED l+c...pPGAlshS++sPlAsWLSuMLaCFGGulLushLLGEPPltsLsNsssILLAoslWYLVFYCPhDlhY+hssaLPl+LllsuMKEVsRshKIsuGVsHAtchYpcualVMlllGasKGAGuullpshEQLlRGVW+PEoNEhL+MSFPTKsoLlGullFTLQ+sphLslupHsLhahaTl.FhVhhKlsMhhhpsp ......................tsGu.thuR+pPlusWLssMLhs....FuuslLushLLG.EP.......ls.hpNssslLLAosl.WYllFasPhDlhY+sssa.....LP.V+llhsuM.KElhR.spKIhs.GVpHA....t+.hY....puall....MI.hlGhl+...GuGuul.hpshEpLlRGs.W..p.P.p..s..s.EhL+.SasTKsoLhuu.l.l.FsLpppp...h...Ls.huppsLhahhhl.FhVhhKl..hhht................................................................. 0 36 46 84 +326 PF01031 Dynamin_M dynamin_2; Dynamin central region Finn RD, Bateman A anon Pfam-B_220 (release 3.0) Family This region lies between the GTPase domain, see Pfam:PF00350, and the pleckstrin homology (PH) domain, see Pfam:PF00169. 24.60 24.60 24.60 24.80 24.40 24.50 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.70 0.70 -5.64 93 2249 2009-09-16 15:19:57 2003-04-07 12:59:11 15 54 534 28 1262 2222 40 229.30 26 35.58 CHANGED slDlLpsclhsL+hGYlsVhsR.uQpDIpspholpcAlcpEcpFFpsHstYps.hsctpGT.shLAcpLsp.LhpHI+csLPplcspIppplpphppELppY.Gssss...tstsc+sthllphlspFspsapsh.lcGpps..........ssp-LpGGu+IphlFpchhsphhps.hps..hpphhcc-lcphlcptpGhchshFlstpuFEhlVKpQIcpLc-PulcslchVh-tlpclhpphssp....phs+FPsLppplpshlpsllpcphpsscphlpshlchEtsalsTpc.sahsshp..thpp........ppppp ............................................hplltsc..h.h.......L+.h.G..........ahsVhN.........R.......uQ.t.....-...........l.........p.....t.......p.......h.......s...hp...t.........u.h.p.t.EptFF..t.p...c........................a.......p....p....h......s......p......p...hGo...hLtcpLs..........p.L.hp+Ic.pp.LP.t.l...csplpptht.phpp-lp.ph.sp.hs.........t...tpptthl..lph.....hppF...spt..hpph..l.cGptt............................pht.h.h...s....+l....................F....pp.h....h...........h...t....h......................t...............p...l..h...p.p.G............p.............hhhs...sac.ll+tpl.thlctPshptlphV..........h.p.l.phhpp.htt...............phtp.........a...P..t....Lppt..h..phh.phhp..pt...sp.p....l...........h.hthp.....h.h.......s....h..................pt..................................................................................................................................................................................... 0 336 647 994 +327 PF03028 Dynein_heavy Dynein heavy chain and region D6 of dynein motor Griffiths-Jones SR anon Pfam-B_928 (release 6.4) Domain This family represents the C-terminal region of dynein heavy chain. The chain also contains ATPase activity and microtubule binding ability and acts as a motor for the movement of organelles and vesicles along microtubules. Dynein is also involved in cilia and flagella movement. The dynein subunit consists of at least two heavy chains and a number of intermediate and light chains [1]. The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This C-terminal domain carries the D6 region of the dynein motor where the P-loop has been lost in evolution but the general structure of a potential ATP binding site appears to be retained [2]. 21.70 21.70 21.70 22.00 21.50 21.60 hmmbuild -o /dev/null HMM SEED 708 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.38 0.70 -13.22 0.70 -6.50 18 2690 2009-01-15 18:05:59 2003-04-07 12:59:11 10 239 285 14 1923 2507 199 542.40 27 19.03 CHANGED hpsshssPh.-aLospsWsulptLsphpp...........FpsLscclcpss....ppW+pah-p-sP.EcpclPppWc..................ppsshpKLhllRslRPDRhshAlpsFlpptL..GpcaV-s.psl-hucsac.....-osssTPlhFlLSPGsDPhpcVEsLu+chG......ppapslSLGQGQth.lA-ptl-pAsKpG+WVlLQ........NlHLsspWLs.pLEKhL-phs....ssHscFRLFLoAEPsss.....lPhulLcsSIKlTNEPPsGl+ANL++uhss...hsp-hlEhss+p.sEa+sllFuLCaFHAVVtER++FGP.........hGWNhsYsFNpuDLphSlpsLpsaL-ts.........sKlP...W-sLRYlhGEIhYGG+IoD-aDRRLhpoaL-cahpsphh-s..-hhLsP......t.uatlPss.scacpYhpaI-p.LPsps.PhhaGLHsNAEIsahTppscplhsslLchQP+puus..ttGuu..hop.............................E-hlpsllc-ll-+ls...chFsht-lhtKhs....s.+sPhhhVhhQEh-RhNhLlp-l+pSLp-LshGl+GclshosphEsLhsuLhhspVPspWsphuYPShhsLsuWhsDLhpRlcpLppWspc........shPpslWLuGFFNPQoFLTAlhQshAR+N..pWsLDchsLps-VTK+pp-phss..s..............sc-G...saVaGlalEGARWDhp.pshlh-uc.K-LhsshPVlahKslsh..pctcpcshYcCP.VY+TcsRus.........saVashpL+o+c..psu+WlluGVALlhp ..............................................................................................................................tal.t..h.........t.h..h.........h....t.t...h.....t...................a.tt.....l.....p.pht..tt.............................ttW.p..t.h.h.pp..t.t..s........p....t........h..P.t.....hp.................................................ths.h.p......+....l....hll..+sh....RsD+..hh..sh........p.pa...l.............p................t..h..........G.............t.p....a....hps..........h.....s....h.tt....hp.....-osst....s.Pl.lhlL...............S.....s.......G......s......D........P.h..t..t..l.......t........hu.....pp.t................................tp....hpt..lu.h.G..Q.G..Q.t...hA....t..p...hlptu..h.p..p.G.p.WlhLp...........NsH.....L.s...p.a....h.....p.L..........-.c.h...h.p.p.ht............................c......sF...Rl.al..o..s..s..p..sp.............................hP...slLpt......u.....l.K.hs..E.....................sPt..........G...l.+tslhpsh.t...............................s....p.......h....p.......................s...t.....p..........t.....hptll..as..ls......ahHull.ER....++aus...........hGWsh.....Y..t..FspuDhph.........uh....p......l..p.....a.lpt....................................................t.p.ls...a.pslp.hhh.uph.......YGG+lsDsh.Dp+hh..shhpt....hhs....t...h...h..p.............t.h.h.........................................h.......h..P...............t...............p.........h..p...t.............a....h...p...a.......l...pp...L..............P.....................p...P.t.haG..hp...NA.chsh......t.p...s.p.t.....hh......pp..lh.h......p....st...st...........ttt...........s............................................................................pph...h...th...ht...p...hhpphs.............................h..s..........h..................p...h.............................p...s.h...............hhh.h..........pEh............php.Lht.lp..psL.p..........l.....h............s............l.p...G.....h......h...h.o..t.......t.h...c........t....h..pshh.s..........p.l..P.......t........W.............t......h.............s....h.s...s..............s.....ls.................aht....chh.R...h.t......hp.tWh.t..............................h.s.....hWlsuha..sp.u.aLT.....uhhQth..........sRt..........th..s.l.Dp...h.......hth.phh.....................................................................................................st.p..u.....hhl.G..Lhh-....................G.....Ata..................s.....t.......t........................l....................p.t......t......h..h......h.s...hh...h.h.....s....................................................................t..................h.......sP..lYh.......Rts.........................................hlh...h......l..............st.............th.ah.tGsshhh..................................................................................................................................................... 0 816 1017 1584 +328 PF04261 Dyp_perox Dyp_perox_fam; Dyp-type peroxidase family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family of dye-decolourising peroxidases lack a typical heme-binding region. 20.40 20.40 20.40 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.00 0.70 -5.69 15 2768 2012-10-02 00:20:33 2003-04-07 12:59:11 7 7 1901 38 529 1917 88 307.60 30 85.21 CHANGED QsGIloPhsssulalshslsutp.t......thh+phsutlstLptut................Dth.shtttt.sucLolslGhGsshac.....Rhuhs.pcPpcL+saschsssphpAssTsGDLhlaIpApct-lsFpsh+slhcphssslpVptchcGFph.........hssRsLhGFlDGTcNPpuscstct...sll......sscssshsGGSYVsVQ+ahHsl-sW-+lslp-QEslIGRcK.sssELstp....................stPssSHlsLss.c..-u.suhpILR+uhPauc.....sGph-tGLhFluYs+shcstct.lppMh.G...GssDtLh.-aopsloGuhFFsPsssph .............................................................................................................................................................................QsGIhs......t....ptssha..lshslt..st............................thht.....hstthstL..ptut.h.............................-ph..hthhh.ssspLohsluhGsshac..............ph.uhs...t..t..........P....p....c.L.c..c....hs..t..a.ss.s..p.h.ps.........s.h.o.puDlhlpIpus.cpslsapshpslh..c..t..h.....s.s...h..l...pVc..chc.GFth...............................hssR..s..LhG..FhDG.......TtNP.....pup......pstph..........shl.....................t-t.........h.htGGSYhhVp....+hpap...lct.W....cR..............h........s....l..........c-.......Q..........E..............p.....h...hGR.sKp....s.st...lstp...........................................................hhPtsSHlphss........c...t..........p............s....p......s..........h.....p.........I......lR+uhsYus................................sut..h.-t..GLhFluYppshcph.h.lpshh................u.....p.tDtL.....c....a.sp.sl.s..GuhFFsPshs..h.................................... 0 143 307 438 +329 PF02221 E1_DerP2_DerF2 ML domain Bateman A, Yeats C anon Yeats C Domain ML domain - MD-2-related lipid recognition domain. This family consists of proteins from plants, animals and fungi, including dust mite allergen Der P 2 (Swiss:P49278). It has been implicate in lipid recognition, particularly in the recognition of pathogen related products. A mutation in Npc2 (Swiss:Q15668) causes a rare form of Niemann-Pick type C2 disease. This domain has a similar topology to immunoglobulin domains. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.33 0.71 -3.91 137 1034 2012-10-01 19:31:57 2003-04-07 12:59:11 10 11 326 68 586 1089 2 125.90 20 73.86 CHANGED shshpsC...s.t.....sphpplsls........ss..hslhpGpsh.slshsh.ss.pp.spshps...plpsphss.......lplsh............csCc.........................pCPl.ptG.phhshphsh.s.........ls.p.hPss.....phplphplhsp.s.sp..tl..sChphsspl .................................................s...hp.C.....st......hp.lpplsls..........ss....hslhcGpsh.s.....lphsh.ss......ppssps.h.ps..plpsphss.........lpl.sh.................shCc..............................shpC.....P.....l.ptG...ph...........s.hphsh.s........................l..p.hPss...........ph..plphplhsp.s...tp...pl....sChphsht................................ 0 239 316 476 +330 PF03271 EB1 EB1-like C-terminal motif Mifsud W anon Pfam-B_1529 (release 6.5) Family This motif is found at the C-terminus of proteins that are related to the EB1 protein. The EB1 proteins contain an N-terminal CH domain Pfam:PF00307. The human EB1 protein was originally discovered as a protein interacting with the C-terminus of the APC protein. This interaction is often disrupted in colon cancer, due to deletions affecting the APC C-terminus. Several EB1 orthologues are also included in this family. The interaction between EB1 and APC has been shown to have a potent synergistic effect on microtubule polymerisation. Neither of EB1 or APC alone has this effect. It is thought that EB1 targets APC to the + ends of microtubules, where APC promotes microtubule polymerisation. This process is regulated by APC phosphorylation by Cdc2, which disrupts APC-EB1 binding. Human EB1 protein can functionally substitute for the yeast EB1 homologue Mal3. In addition, Mal3 can substitute for human EB1 in promoting microtubule polymerisation with APC. 21.10 21.10 21.10 21.50 20.10 20.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.18 0.72 -3.91 61 559 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 308 17 346 525 3 42.60 52 14.59 CHANGED slEcERDFYFsKLR-IEllsQp..............ctppps..lh......................pcIpcILYu .....LEKERD.FYFuK...LR-IEllCQc.p............-t-sss....ll......................pcIhcILYA............. 0 102 172 257 +331 PF05009 EBV-NA3 EBNA-3; Epstein-Barr virus nuclear antigen 3 (EBNA-3) Moxon SJ anon Pfam-B_4674 (release 7.6) Family This family contains EBNA-3A, -3B, and -3C which are latent infection nuclear proteins important for Epstein-Barr virus (EBV)-induced B-cell immortalisation and the immune response to EBV infection [1]. 24.40 24.40 274.20 273.20 22.60 22.40 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.64 0.70 -5.13 11 121 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 6 5 0 56 0 235.00 84 38.12 CHANGED MAlRQRlpDlRRsPhshcs...QR+W+LsSPupoW.MGYRTtolhhhoassstssss.lhLsAThGCpsGtRstsTFSAGsapPP+sust-QchhsspupVtplRppsscRY+hFFD.hlsltpSLptIWpslLps-.pRlsFhcFlGaLs+s-pshl+hWaccslGsh+sppPWhpssPst.sac..cslsscshstAah+GpshGlshLtssu.E.tcssssETssEpE......DsEs-u-D-..........plPpIlPp.t..shppRPslFlpR ..................................WPMGYRTATLRTLTPVPNRVGADSIMLTATFGCQNAARTLNTFSATVWTPPHAGPREQERYAREAEVRFLRGKWQRRYRRIYD.LIELCGSLHHIWQNLLQTEENLLDFVRFMGVMSSCNNPAVNYWFHKTIGNFKPYYPWN.APPNENPYHARRGIKEHVIQNAFRKAQlQGLSMLATGG.EPRGDATSETSSDEDTGRQGSDVELESSDD..........ELPYIDPNME..PVQQRPVMFVSR. 0 0 0 0 +332 PF00679 EFG_C Elongation factor G C-terminus Bateman A, Griffiths-Jones SR anon Pfam-B_40 (release 2.1) Domain This domain includes the carboxyl terminal regions of Elongation factor G, elongation factor 2 and some tetracycline resistance proteins and adopt a ferredoxin-like fold. 24.00 24.00 24.00 24.00 23.60 23.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.83 0.72 -4.12 172 18629 2012-10-02 20:07:24 2003-04-07 12:59:11 19 79 5330 69 5120 13346 6308 88.10 29 13.46 CHANGED hlhEPlhplplpsP.....p-hhGslhsplsp.RRG.plhshpsp.........ss......tshlpsplPhsphh.satscL+ohTpGputhshpa..spYp.sssphhpph ......................lhEPhhplp..l...sP...................c-a.hGs.Vhst.ls....p.RRG..ph.hsMp.p...................ss........................................tstlp..hpl.PlsEhh..Ga...tscL+ShT...pGpu.s.h.s.hp.F..scYp.sshsh.t..h............................ 0 1815 3287 4370 +333 PF03764 EFG_IV Elongation factor G, domain IV Bateman A, Griffiths-Jones SR anon Pfam-B_40 (release 2.1) Domain This domain is found in elongation factor G, elongation factor 2 and some tetracycline resistance proteins and adopts a ribosomal protein S5 domain 2-like fold. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.12 0.71 -4.54 81 9390 2012-10-03 01:04:38 2003-04-07 12:59:11 13 49 5414 57 2657 7106 2810 117.60 36 17.30 CHANGED sp.Vua+E.hhpssc.tptphtcp.sGhcspau+hhlphcP..........ssuh.Fhstsp....GstlscEahsulppGhppuhppGsLusaPlhsl+lsLhDushHs..lcSoshsap.Auphuh+puhhpAp ....................................................................PpVuYREThp.pssc.hc....tpat+Q...o.GG....c.G.Q....a.u..clhlp..h...p.Php........................spGh....p.F.......s..plh...................GG....s...l.P...c......E...............a..lsuVpcG.l.p.p.uh.c......p.GlL....A..G....aP...........lh..Dl..+ssL.hDGuaHs............VDS..o..p...hAF.+hAuphAh+puhpcAt..................................... 0 986 1715 2277 +334 PF00036 EF-hand_1 efhand; EF_hand_1; EF hand Eddy SR anon Prosite Domain The EF-hands can be divided into two classes: signaling proteins and buffering/transport proteins. The first group is the largest and includes the most well-known members of the family such as calmodulin, troponin C and S100B. These proteins typically undergo a calcium-dependent conformational change which opens a target binding site. The latter group is represented by calbindin D9k and do not undergo calcium dependent conformational changes. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.44 0.74 -7.46 0.74 -4.24 893 3781 2012-10-02 16:17:27 2003-04-07 12:59:11 27 216 892 544 1838 18263 638 28.50 33 12.77 CHANGED plcphapthDp-ssGtIshpEhtphhpph ...........lccsFphaDpDssGhIshpEhhphhp............ 0 649 934 1356 +335 PF04189 Gcd10p eIF3_gamma; Gcd10p family Wood V, Finn RD, Bateman A anon Pfam-B_8933 (release 7.3); Family eIF-3 is a multi-subunit complex that stimulates translation initiation in vitro at several different steps. This family corresponds to the gamma subunit if eIF3 [1,2]. The Yeast protein Gcd10p has also been shown to be part of a complex with the methyltransferase Gcd14p that is involved in modifying tRNA [3]. 21.00 21.00 22.70 22.80 20.60 19.80 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.77 0.70 -5.28 34 336 2009-09-13 16:20:58 2003-04-07 12:59:11 8 6 291 0 241 322 10 288.60 26 61.49 CHANGED pshIp.spaVllc.hsu-thKlVclpsss....hlpLGKhGsF.lssllGhsaGpoFEIh.sp.................ph.h.l.psph.tpph.sp......................t.ttt......t.tpsscsN+sll.......Dsup.sQK..Lot--IppLKcp.stu...Gp-IIppllpsppoFcpKTtFSQ-KYl+RKccKahcha...Tl.h.ssshLsphahcc..-s.......t+lhclRt-oLuhlLshuNl+s..............sG+YLVhDc.TuGLlluAhhERMuu..........................................pGpllthapspt.sshs...hLchhs.....t..ts.p..pttlpslshhthhcs........ttpppppp.sshspc-hsshcss ...................................................................hIp.spaVll...h.sphh+.hh.p.lp..st....................plp.lGK.t.h.hspllGpsastoFE..lhspt............................................p.h.t.t...........................................................t.....................ptpp...s.spsNR.sll.......Dssp...sQp..Lot--IcpLKcp..uhs....................Gc-IIppLlpspsoFcpK.TtFSQpKYlp+KpKKYhp...hh..............plh..sssp.lsphahtc..-s..............s+lh.plR.h-oLu.lLshuNl+s....................................................................su+hLlh-s.suGLlsuAhhERhGu...................................................................................................pGsllphh....ss...s...sshs...hlphhs...hs.p...........p...t.......h....th.l.plp.h...p.......................................s........................................................................................................................ 0 83 133 201 +336 PF03610 EIIA-man PTS system fructose IIA component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.00 21.00 21.00 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.41 0.71 -4.02 36 8406 2012-10-02 12:41:15 2003-04-07 12:59:11 11 38 2718 39 912 3993 258 113.60 21 41.09 CHANGED ulllsoHu.chApGlhchhphlhGc....sltslshhss..tshsshtppltpAl.pphsts-tllllsDlhuuss.stsst.hhcc.t..ht...hlsshslPhlhpuhsst...sphshsphhsplh ........................tlllsoHG.phApulhpssphl...h...Gp....t..p.lt.s.l...s..h.h..ss..........p.ssssh...hpcl.pptl.....p..p..h....s....s....sc......s.lllLsDlhu.u.o.shst...ss..p...h...h...h..c...t.p......hp.............llsGhNlPhll...pshhtp.....tths.h.tt.ht...h....................................................... 0 255 550 740 +337 PF01448 ELM2 ELM2 domain Bateman A anon Bateman A Family The ELM2 (Egl-27 and MTA1 homology 2) domain is a small domain of unknown function. It is found in the MTA1 protein that is part of the NuRD complex [1]. The domain is usually found to the N terminus of a myb-like DNA binding domain Pfam:PF00249. ELM2 is also found associated with an ARID DNA binding domain Pfam:PF01388 in Swiss:O82364. This suggests that ELM2 may also be involved in DNA binding, or perhaps is a protein-protein interaction domain. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.86 0.72 -3.32 53 960 2009-01-15 18:05:59 2003-04-07 12:59:11 19 46 123 0 518 897 2 55.30 31 7.90 CHANGED IpVGsc.aQ.....AcIPphtspspppppppt.........................ccpl...lWsP.........s....tlsc...ppl-p.......alp.hupspt .............IpVGsp.aQ...........A.-..IP-.hhsps...ptspcp...pp...............................................................................tcpL......VWsP.........p...sls-.......pcl-p...aLs.hApp..s...................................................... 0 109 160 306 +338 PF02990 EMP70 Endomembrane protein 70 Griffiths-Jones SR anon Pfam-B_1312 (release 6.4) Family \N 22.80 22.80 22.90 23.10 21.20 22.70 hmmbuild -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.91 0.70 -6.04 15 1526 2009-01-15 18:05:59 2003-04-07 12:59:11 11 27 425 0 910 1437 39 423.80 34 82.92 CHANGED hsYcYYs..LsFCpPpp..lpcpsEsLG..EVLpGDRlhsSPaclchhccppCpslCpp.......plspccschlpctIppsYahpallDs.LPlsspltphcs...........h.h..Gah.s.p.......................................pspKhYlaNHlchhlpYH....cc.........RllslpVpshSlc...............p...........ppP.hlcEsp-.......lhFTYSVcWpcoc.lcWuo...RWDtYLc......hpshpIHWFSIlNShllVlFLoGlVuhIlhRTL+pDlu+YNp.-ptp.............DspEEsGWKLVHGDVFRsPp....pshLLSsllGoGsQlhhMlhsslhFAhlGhLuPusRGuLhTshlllYslhGhluGYsSuplY+phcGp..................pWKpshlLTuhLhPGslFshhhlLNhllWupsSSuAlPFuThlhllslWhhlSlPLohlGullGh......Rppsh..csPsRTNpIPRpIPp..QsWYhpslhuhLhuGlLPFGuIaIELaFIaoSlWtpc..hYYhFGFLFlVFlILllssuploIllTYFpL..CuED....Y+WWWRSFlsuG.uoulYlFlYSlaYahsKhp.lsGhh ...........................................................................................................................sYpYat..hs.a.Cts....t..........................t................................t..........sLG..p............lL...Gccl....o..a.p.....hp..h....p......p....p..t.....hh..Cp....................t.h..s.t.t.p.hp.ht.phlppt.Y.hphh.....................l.Ds.LPhhh..h....h....tt................................................................................................t.tt.p.h.h.l.sHhph.......l..ap..................t.p..........................................................pllt.hpl.s.......t........................................................................t.......l.t....t..tt...........l.aoYsVt.a.p..sp....hta..ts..............R.aD..h..ah.t.......................p.plHWh.....................SlhNSh..h....h.VhhLs..........u.h.l..shI.h.hRs.L.+.pDhs..pYs.p..pt.t...............................................t..t-....-.GW.K.llHGDV.FRs.Pp.........t.shlLsshlGsGsQlh...hhshhsl...h.h.......Ahl............G...........h.........ls......PspRGu.l.hT...shlhlashhu.hh.uGYsuu..p......h...Y+ph....t...Gp..........................................pWh..pshh....hT.uh..lh...Pu..hlFsh.hh.hl...Nhhhhs.......t...S...ouAl..PFsThl.s.llhlW.hhlslPLshlGuhhG..........................+p.sh...p.Ps......+..s.....s.pI.PR...p......I......P............p........a....ah........p.....s.........h..h....l..hu..GlLPF..u..sl....alE...L....a.FI.h...............s.S...l..W..pp.....hYa...ha..G................FLh.ls..h..h.........l....L....h.lssu.plo.l.lhs.Y.h.L........s.sE.s..................a.pW.Wpuahsuu.ssu.halhh..YuhhY...hh..c.p.h.t..h......................................... 1 327 520 748 +339 PF01223 Endonuclease_NS Endonuclease; DNA/RNA non-specific endonuclease Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.68 0.70 -4.70 59 2367 2012-10-05 18:28:12 2003-04-07 12:59:11 18 35 1457 25 866 2278 269 199.70 19 53.76 CHANGED hhptaslsaspp..pchshasttplsssshttspsp.....................................spat.-.slsst.hpst.ssatsth.......aDRGHlss.......sspthspss.psTFhh.sNhsPQ.htshNp..ss.........WttlEshsRphsp...............ptpslhlhoGshhh.s.........................................................ttlslPphhaKllh..ts.t.................htuhlhspts..pth...............tt.............h.hsl.tl.t.......sG..lsahsslsst .........................................................................................................................................h....as.sastp.....pchs.asu.p...l.pt..ps.htttptp................................................................................ttspa....t....-..s...pl..s....t......t.....h....p..s.t...ss...Ypt.su......................hDRG.Hhss...............sshph...u..p.ps.psoF.hh....oNhsPQ..t....t......shNp.....sh.................W......s.t.l.E.p.h.l.R.p...h...s.p....................................ph.p.slhVhoGslhh.p.........................................................................h.h.t.tspl....lP...phaaKllh.s.pttt....................................h.uahh.pt.....t.........................................................................h.httlpt.......................ss....hphh......t.......................................................................................................................... 0 233 407 665 +340 PF00812 Ephrin Ephrin Bateman A anon Pfam-B_1390 (release 2.1) Family \N 25.00 25.00 25.20 25.70 24.80 23.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.06 0.71 -4.70 14 554 2012-10-02 17:41:00 2003-04-07 12:59:11 12 4 91 31 288 430 1 131.80 39 52.16 CHANGED tsuuscasVYWNSoNs+Fhp.pshsltlpIsDhLDIlCP+hEss..........spshEhYhLYMVsh-thcsCph.tpcsphhhpCs+Pps.......................sPl+FopKFQcFoPhshGhEF+tG+-YYYISosssshtsp.......ChphphpVhh+sup ..............................h.......h.Ws.s..t..N..+...............Fh......t...t............sh.sl...tlp.l.s......DhLDIhCP+hcss.........sttphE..hYh.LY...............hV.s.t-tacsCph...p...pp..s.h.hhhp.Cs+Ppt........................slKFo.KFQcFoP.s..hGhEF+sG+-YYaISosssst...tth.........C...hphp.hhlhhps................................. 0 47 74 164 +341 PF05139 Erythro_esteras Erythromycin esterase Bateman A anon COG2312 Family This family includes erythromycin esterase enzymes [1,2] that confer resistance to the erythromycin antibiotic. 26.30 26.30 26.80 26.80 25.40 26.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.09 0.70 -5.25 73 695 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 516 4 246 666 50 310.70 22 67.58 CHANGED slAlEuDWPDAhpls+YVpst.ss..................sspt............tsapRFPtWMW.RNpEhhchlcWLRsaNsshs.....tpc+...lsFaGlDlY....ShtsShpsVlp...YLcpsDPptA.......ptA+ppYushs.h.sccsttYGhtsh....t..hpssccpllppLpcLhppthphttp...ts....-chh.ApQNAplltsAEpYY.....+sMhtstsps.......WNlRDpHMs-oLptLhcph..stp.....uKslVWAHNoHlGcApuTphu.........ts.hslGQll+-paGcc.shhlGhsohpGsVsAuspW......ssshphhplpPuhssSaEplhcpsuhspallch...........ssthtphL..tp...RhpRuIG.VlYpPco..Eh.SaYhpssLscQFDullah-cT .....................................................................................................................luhEssassuh.t.l.scYlpst.ts..............................s.p....................hppa...th..ha..psp-hhshl.cWhRpaNtp..............ttpp....lthhG.hDl...........s.tp.s..h.p.t.lhp.......Yl.c.p.h.s...sphh...............ttscpt.h..tth....p....h...tppsthh.sh................htppppphhpthpclhphhtp..ttt....s.................cphhhs.ppsAphltphtphh.................pshht.s..s.pp.....................hsh.....RD.ptM.h-slthlhcph......t.......................u+hlVWAHNuH..l.ucspss.hh.................................t.hshGphlpc.p.hGcp....hhslGhsshpGphsshpt.........st.hthhtlts.s..ssShE.t.h..h.t.p...s...s....h......s......t.hhlsh................pt.ht.thl..tt...t..hhh.....G....shhhst..........hhh....ht...ptaDshlahpp........................................................................ 0 83 170 220 +342 PF04800 ETC_C1_NDUFA4 ETC_CI_21; ETC complex I subunit conserved region Waterfield DI, Finn RD anon Pfam-B_6275 (release 7.5) Family Family of pankaryotic NADH-ubiquinone oxidoreductase subunits (EC:1.6.5.3) (EC:1.6.99.3) from complex I of the electron transport chain initially identified in Neurospora crassa as a 21 kDa protein [1]. 25.00 25.00 25.10 26.80 20.80 24.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.35 0.72 -4.15 6 625 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 542 2 330 566 1035 98.40 40 65.94 CHANGED RpsRIYpPA+sshQSGpu+spcWpl-F-.spt...RWENPLMGWooouDPlup...hcLoFsT+E-AluFAE+pGapYpVcEPps.+hKsKuYusN.......Fuas++pp..h ...............psRIYpP.A+.sAMQ..SG..p.s....pT.......c......p.......Whl-a-....ppt....R.h..sPLMGWsSS......uDshsp.......lpL..pFso+E-AluaAc+pGhsY.pVp.E..Pp.............p...............p....h..+.......+...u...Yus..N.......Ftas+.t.h..p....................... 0 107 195 261 +343 PF04716 ETC_C1_NDUFA5 ETC_CI_29_9; ETC complex I subunit conserved region Waterfield DI, Finn RD anon Pfam-B_4159 (release 7.5) Family Family of eukaryotic NADH-ubiquinone oxidoreductase subunits (EC:1.6.5.3) (EC:1.6.99.3) from complex I of the electron transport chain initially identified in Neurospora crassa as a 29.9 kDa protein. The conserved region is found at the N-terminus of the member proteins [1]. 20.10 20.10 20.20 21.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.69 0.72 -4.44 32 283 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 238 0 190 282 2 58.80 37 34.55 CHANGED ssP+tsLhsLYschLcpLp.phPcsusYRptTEtlsppRLplVcp......ppshcclEpp...lss ....sP+psLhhLYschLctLp..phPcsu.sYRptTEtlTpcRLslVcp..............-tslpclEcpls...................................... 0 66 104 156 +344 PF04621 ETS_PEA3_N PEA3 subfamily ETS-domain transcription factor N terminal domain Kerrison ND anon DOMO:DM04577; Family The N terminus of the PEA3 transcription factors is implicated in transactivation and in inhibition of DNA binding [1]. Transactivation is potentiated by activation of the Ras/MAP kinase and protein kinase A signalling cascades. The N terminal region contains conserved MAP kinase phosphorylation sites [2]. 30.00 30.00 30.10 30.10 28.10 29.90 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.48 0.70 -5.21 10 264 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 49 0 96 260 0 269.80 47 68.50 CHANGED MDGFYD...QQVPFhVss.sppu...ppstsRsss-RKRKF.lso-LAp.......DoEELFQDLSQLQEtWLAE......AQVPD.DEQFVPDFQS-N.LhFHGPP.sKIK+E.pSPup-h.SsCSpcps.shsaGEKCLYsh..........................SAaDpKPssuhKPsoPsoTPsSPhss.sth....uotPhpcsosss...........................hthPh.sps.P.hls.Ps.s..sssass-pR.......FpRQLSEPChsFPs.suth................s+-sRPsYaRQMSEPhlPh...PPQGFKQEhhDPlYscpGhPs............stsap.....tshsIKQEPRDFsaDSEVPsCpSsYh+ssuF...sstp-..Gata-+-s+hFaDDoCVVPERhEGclKQE.suhaREGPPYQRRG ........................................uhhDQp.V....Pah.st..pp.s.....tt.ttt....h.sp.t+hh...sp.......s...............DSE........-LFQDLSQhQEsWLAE...................A.Q.VPD..DEQFVPD.apu-s.......L..................sFHu.P.hKIK+E.pSPso-..puCSpcps.h.hsaGEpCLYs......................................sAY-p.p..suh+s.sPssss......oP...................h..hpttu..s...................................................................................Ph.....spt.................h..s....c.pp.......................................a.p.Q.sEs.CpsFss..s................................sttsts.YpRQhSEP....hPh...P.QuFKQEYhDPlYEp..t.shsu...............................s.stp........sshhIKQEPpDasaD...S..-...VPsC.t.......S.Yh+tt.sF.......st.pt.........Gh.a-Ks.R..a.DDsCVVP.E+h.-.G.clKQE.ssha..REGPsYQRRG................................................................................................... 0 7 15 38 +345 PF04777 Evr1_Alr Erv1 / Alr family Finn RD anon Pfam-B_5005 (release 7.6) Family Biogenesis of Fe/S clusters involves a number of essential mitochondrial proteins. Erv1p of Saccharomyces cerevisiae mitochondria is required for the maturation of Fe/S proteins in the cytosol. The ALR (augmenter of liver regeneration) represents a mammalian orthologue of yeast Erv1p. Both Erv1p and full-length ALR are located in the mitochondrial intermembrane an d it thought to operate downstream of the mitochondrial ABC transporter [1]. 25.60 25.60 25.90 25.80 25.40 25.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.63 0.72 -4.09 59 881 2009-09-10 22:39:07 2003-04-07 12:59:11 8 18 378 43 544 854 401 96.80 29 29.08 CHANGED puhWshLHshuupaP..pp...Potpp..........pp...shpsahphhsphaPC.ppCupcFpphht....p..tssp.............lso+cshstWLhphHN..pVNp+LuKs...................s..sph.pppats ..................................puhWp.lhHohssp.as....cp.......Po..tpp....................pp...shpshhphh.sph.aP.C..p.............cCupc..Fp.phhp............c......p.sp.............................lp.o.ppshshWlhphHN..pVNp+LuKs...................ps.sph.p.pa..s............................. 0 190 296 442 +346 PF03372 Exo_endo_phos Endonuclease/Exonuclease/phosphatase family Dlakic M anon Dlakic M Domain This large family of proteins includes magnesium dependent endonucleases and a large number of phosphatases involved in intracellular signalling [1]. This family includes: AP endonuclease proteins EC:4.2.99.18 e.g Swiss:P27695, DNase I proteins EC:3.1.21.1 e.g. Swiss:P24855, Synaptojanin an inositol-1,4,5-trisphosphate phosphatase EC:3.1.3.56 Swiss:O43426, Sphingomyelinase EC:3.1.4.12 Swiss:P11889 and Nocturnin Swiss:O35710. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.30 0.70 -4.44 326 20400 2012-10-02 01:25:08 2003-04-07 12:59:11 18 388 4759 145 7593 17817 4636 258.00 13 59.21 CHANGED h..oa..Nlpt....................................................................................ltp.........hlppts.................sD..lls.....................lp....Esp............................................t.h...tthhtt.......hs...tt.h........................................shslhs+h................s..httthhthh............................................sttththhhthtttth...............................................................lh...ssH..............................................hss................tstttp...t.phhpth........tthhtt................................................hllhG.DhN...........................................................................................tshh....stth......pshpthh...h.................................................................................tl........Dalhs.............htththtththht...........................................SD..H ................................................................................................................................................................................oaNlts....................................................................................................lhp.....................hl......p.p.s................................sD......lls.....................LQ..........Esp..............................................................................................................................................................................................p.h........tthht.h.......................h..................h.............................................................................................................G.lu.l...h.s...+.h.................................................................h..t..h..h.t.thh............................................................................................................................................................................stt.t.h.h..............h..h.h..h.t..t.........................................................................tt.tlh........lh......ssH........................................................................................hss......................................ttpttp............ht......thh.phh......................p.phh.ptt.............................................................................................................llls.G..Dh.N.........................................................................................................................................................................................................................................phh................p.t.h......................p.h.h.p.t.h...h..t................................................t..h...shtht..................t..h...............................................................pl...............D.alhh......................................................thh.h.t.t...........................................................SDH............................................................................................................................................................................................................................................................................................................................. 0 2415 4456 6161 +347 PF02609 Exonuc_VII_S Exonuclease VII small subunit Bashton M, Bateman A anon COG1722 Family This family consist of exonuclease VII, small subunit EC:3.1.11.6 This enzyme catalyses exonucleolytic cleavage in either 5'->3' or 3'->5' direction to yield 5'-phosphomononucleotides. This exonuclease VII enzyme is composed of one large subunit and 4 small ones [1]. 20.80 20.80 20.80 21.30 20.70 20.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.35 0.72 -4.19 148 4033 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 3983 6 847 2207 1003 52.90 37 66.86 CHANGED FEculpcLEpIVpcLEsGclsLE-ulchaccGhpLhcpCpptLppAEp+lphl ..................FEculp-LEpIVpcL.E.s.G.-ls.LE-ulshac+GhpLs+pspppLppAEp+lphl........... 0 283 566 723 +348 PF00929 RNase_T Exonuclease; Exonuc_X-T; Exonuclease Bateman A anon Pfam-B_1153 (release 3.0) Family This family includes a variety of exonuclease proteins, such as ribonuclease T and the epsilon subunit of DNA polymerase III.; 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.07 0.71 -3.96 57 17634 2012-10-03 01:22:09 2003-04-07 12:59:11 19 169 4800 65 4662 12914 5053 160.30 21 36.06 CHANGED lslDsEsTGhss..............hspllEluslplsstt...........htt.hphalpPpp...lsshsschsGIosth..lpstsph.p..shpthpphhp.t........phhlsps..hsh.hshhhpp.hchhhh.h......ht.hhcththspthhtthtt...........sLstlspphthpt.pp.......HpAlpDsptshplh ............................................................................lhlDhE.T.T.Ghss...........................ttsc.ll...-..l.u.s....l..pl..pssp.............................hssphp.h..h...l...p..P..p......p..........................l.....s....s..h...sh...p....l....p.....G................Io....s...ph.............l..p..s...t..s...s.......htp.......shp.p....h.h.p.....h...l....p.....s...........................................th..hVu.....+......N.....s......s.....FD.....hs.hh..t.t.p.h..t..c..h.h.h.h...hs.........................p.s.h...h..c.s...h..p....l...s..p....t.....h......h....thtp................................hpL.s.p.l.sp...t...h....s..l...p.h.ppt.....................H.p.AltDsptospl............................................................................... 0 1427 2706 3817 +349 PF03016 Exostosin Exostosin family Mifsud W anon Pfam-B_2031 (release 6.4) Family The EXT family is a family of tumour suppressor genes. Mutations of EXT1 Swiss:Q16394 on 8q24.1, EXT2 Swiss:Q93063 on 11p11-13, and EXT3 on 19p have been associated with the autosomal dominant disorder known as hereditary multiple exostoses (HME). This is the most common known skeletal dysplasia. The chromosomal locations of other EXT genes suggest association with other forms of neoplasia. EXT1 and EXT2 have both been shown to encode a heparan sulphate polymerase with both D-glucuronyl (GlcA) and N-acetyl-D-glucosaminoglycan (GlcNAC) transferase activities [1]. The nature of the defect in heparan sulphate biosynthesis in HME is unclear. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.97 0.70 -5.38 33 1543 2010-01-08 15:44:36 2003-04-07 12:59:11 10 45 197 0 1043 1489 205 267.00 21 51.06 CHANGED tpsshpVYlYcls......................................................pshh.sppash-hhlhpplppsp........hhstcPscAslFalPhhsshp.......................pphh.t.l.phhpph..shW....pc.tGcD...Hllsssps......................ssthhh...thtspshhsl....h..s...................ptphh.spD...lslPshhpssshssthst.........tpRphLhhFsGshtht..t.................ttslRstllcph..........ts..sst.t.......................tptppts..p..........hhchhpsupFCLh.PtGt.p......sptplh-ulhuGClPVll.uc.......shp.....LPFtshl.DapchoVhlscpcl......splhphLcsl ...............................................................................................................................................................h....hhlalY.........................................................................................................................pt......ht.......t..hh.hhttl......pst..................................hhstssppAshahl..s...h..sh.........................................................t.t........h.......t.l.....t....h....h....t....ph............sh.W......................pc......Gts......Hh..hhshts.............................................................t..........thh..shhsh.................t..........................................................................ptthh.thD.......lshP..h.....h..........ts....t....h........................p.......R..ph.L.h..h.F..tG.t....h...................................................................ttth+.h...lhp.p..h.....................ps....tt...p.......................................ht.t.t..t...p...............................hhphh..tpS.....pFCLh...Pt.Gt...p..................ss.Rlh-ulh.sGC..lP....Vll..uc......................ph......LPa....t...p.hl....sapphu...lhlsppcl..............pl.phLpt................................................................................ 0 337 682 871 +351 PF03124 EXS EXS family Mifsud W anon Pfam-B_605 (release 6.5) Family We have named this region the EXS family after (ERD1, XPR1, and SYG1). This family includes C-terminus portions from the SYG1 G-protein associated signal transduction protein from Saccharomyces cerevisiae, and sequences that are thought to be murine leukaemia virus (MLV) receptors (XPR1). N-terminus portions from these proteins are aligned in the SPX Pfam:PF03105 family. The previously noted similarity between SYG1 and MLV receptors over their whole sequences [1] is thus borne out in Pfam:PF03105 and this family. While the N-termini aligned in Pfam:PF03105 are thought to be involved in signal transduction, the role of the C-terminus sequences aligned in this family is not known. This region of similarity contains several predicted transmembrane helices. This family also includes the ERD1 (ERD: ER retention defective) yeast proteins Swiss:P16151. ERD1 proteins are involved in the localisation of endogenous endoplasmic reticulum (ER) proteins. erd1 null mutants secrete such proteins even though they possess the C-terminal HDEL ER lumen localisation label sequence. In addition, null mutants also exhibit defects in the Golgi-dependent processing of several glycoproteins, which led to the suggestion that the sorting of luminal ER proteins actually occurs in the Golgi, with subsequent return of these proteins to the ER via `salvage' vesicles [2]. 25.00 25.00 26.30 25.10 23.00 23.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.26 0.70 -5.42 68 815 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 326 0 551 823 12 287.00 27 50.36 CHANGED hhh.lapshhLlllhhahaulslahWpptplNa..s....hIhchs..........................ppplshpphhp.............lushhs.................................hhhslshhhhhhh.h.t...p.hth.................PhhhlhlhhhlllhPh....phh........htpuRhahlpslhRllhush....h.VpFsD..FaLuDtLsShshsltDlthhhChah..h.h.............ptt..............stCtpsphhh............sllsslPshhRhhQClRR.ah-spct...............sHLhNAhKYssshhshhhtsh..hchppspt.......................hhhhalhsuhlsShYshhWDlhhDWuL.........................hpps.t.s....................................cshhLR..ccLha.............sp.......hhYYhAhlhsllLRhsWhhp..hh.................pht.hh..pp....plhshl...luhlElhRRhlWsFFRlENEHls ..................................................................sh..hath.hhhh.hhhhhshsh..h.hap..t..tl...sa..s......hlhphp...............................................tpthp.tphh...............................................hsshhh..........................................................hhhh.h.hhh.hh...............................................Phhhh..hhhhh.....hlh.Ph....thh.....................................hpsRhhhhhhh................hRlh.hush............h.ltFtD..haluD.lsSh......s......h..........sh...hsh..thhhChahh..........................................................t.C....t.t.hh..............................hl.slP...hhRhh.QClRc...ahc.s.tph..........................aLhNuhKYssshhhh....hhtsh....hph........t.t..t............................hhhhalhhshlsohYshhWDlhhDWuL..........................hp......s..................................................................................................................pthhLR......cplhh.................................p........h.hYYhuh...l.sllLRhsWhhp.h.h...................ph..h.....ps....phhs.hh...huhlElhR.RhhWsaaRlEsEal............................................. 0 185 336 476 +352 PF04554 Extensin_2 Extensin-like region Bateman A anon Pfam-B_1707 (release 7.5) Family \N 20.80 20.80 21.10 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -11.10 0.72 -4.10 14 490 2009-01-15 18:05:59 2003-04-07 12:59:11 8 22 25 0 352 530 3 49.70 73 84.32 CHANGED YcYpS.PPPP.hp........PPPP...Ya.......Y+SPPPP...s....YhYpS.PPP..Pha..........SPPPP.........YhYpSPPPP ................YlYsS.P.PPPhYp......Po..P+..spYKSPPPP.........Y...VYuS.PPP.......PhY.........SPSPp.........spYKSPPPP...................... 0 227 309 310 +353 PF00646 F-box F-box domain Bateman A anon Prosite Domain This domain is approximately 50 amino acids long, and is usually found in the N-terminal half of a variety of proteins. Two motifs that are commonly found associated with the F-box domain are the leucine rich repeats (LRRs; Pfam:PF00560 and Pfam:PF07723) and the WD repeat (Pfam:PF00400). The F-box domain has a role in mediating protein-protein interactions in a variety of contexts, such as polyubiquitination, transcription elongation, centromere binding and translational repression [3]. 20.50 15.60 20.50 15.60 20.40 15.50 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.28 0.72 -4.22 463 9939 2012-10-02 00:56:31 2003-04-07 12:59:11 28 388 416 10 7006 15392 101 45.20 20 10.27 CHANGED htl.pLPtc........................llppllpplshts.........hhphphls+phcphlpphphhtptht ...........................................................................h.pLPt-..........................................................lltcIl.p..p.L..s.hps..............................lh.p.hp..tls+pac.plhpp.........hh........................ 0 1706 3015 5562 +354 PF00754 F5_F8_type_C F5/8 type C domain Bateman A, Finn RD anon Pfam-B_478 (release 2.1) Domain This domain is also known as the discoidin (DS) domain family [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.67 0.71 -4.31 185 10300 2012-10-03 19:46:52 2003-04-07 12:59:11 20 1197 1614 169 3934 9293 615 125.60 16 18.56 CHANGED phsuSo..phssth............stttlD...ss............t.stWps....t.......sssspalpl..DL.spstplsslhhpst..psttt...........hspsaplphS..sDG.....p.........sWpphtt................ss.ssss.hh..........hh.sshpu.....RalRlhsh...........h.stsphsthhEl .....................................................................s.....t.................tttl.D....us............................s.s..t..W.ps......t.........................sstspa.l..pl.................D.........L....s...p.......p.....p....l.s..t.l..t...h...p...sp....tsttt........................hspsa..p.l.t..hS........s..cu.........p..................sW.pphtp.........................................ss..s..s..s.s..hh........................................hh....t.s.h.p.u..............+a..l..Rlh.h................tt....h............................................................................................. 0 2052 2661 3306 +355 PF00487 FA_desaturase Fatty acid desaturase Finn RD, Bateman A anon Bateman A Domain \N 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.16 0.70 -4.94 127 9036 2009-01-15 18:05:59 2003-04-07 12:59:11 19 46 2567 0 3138 8639 3621 228.30 15 66.62 CHANGED hs.hhhhhhhhhhhshhhthhhhltH-ssHt.....thhtpphhsphhuthhshh....hhhshht.a...p.......Hhh.HH......phhss.t........Dssthhh..............................hhhhtphhhhhhhthhhhhhhhhhthtttthhttthh........................................hhhhhhhshhhhhhhh.hhhhhhht..................shhhhhhshhhhhhhhhhhs...........h.t..Hhhh...........tttttstshhtpp....hh............hhshhhhs..hs.aHh..................tHHh...aP...sls....ahpLsphttthtphhtttthshhtht ...................................................................................................hh.h.h.h.h..h.h..h..h.hh.t...hhh..hh.tHphuHt......................th....t..p....t.....h..h..p..p..h.h..u.h.h..hhhh.........................h.sshht..a...thp...........................Hpt.HH......................phsss.t...............Dss.shh............................................................................t...t..h....h..h.....t..h..h.h...h....h......h...h..h....h...h..h..h..h......h.....t...h..t..h....t.p....h..t..phtt................................................................h...h.hh.hhh.hh..h...h....s.h..h...hh........hh...h...h.........h...h...t........................................................hh...h..h....h..h....l.......t..h..h.....h...h.t.h.h.h.hhs.........................ht.......Hhh.............................h.tpttpthhp.pt................h...........................t..h.h.h....s..h..h.hh..s........hs..hHs...................H...Hh..........aP............th........h..h........................t........................................................................................................... 0 1025 1826 2610 +356 PF02913 FAD-oxidase_C FAD linked oxidases, C-terminal domain Griffiths-Jones SR anon Structural domain Domain This domain has a ferredoxin-like fold. 21.20 13.90 21.20 13.90 21.10 13.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.30 0.70 -5.06 62 9445 2012-10-02 00:48:38 2003-04-07 12:59:11 14 65 3272 53 2996 8280 3740 227.50 22 39.33 CHANGED hPpttssshshasshpsAs..psstplhpp.thhssshEhhDptshphshshhsh.........................tt.shlllchpusp...th.tpphp..tltphhptsssss................hhsps.tphp..plWp...hR+thhs.............hhhpttshsht.DsslPhsclsshlpphcphhsphs..........hhhshhuHsG.......DGNlHhhlhhs.ts......tthcphpphhtcl....hthshphGGolouEHGlGht+pt....ahttthuttslshh+plKpshDPpsILNPG+l .......................................................................................................................................................................Pt.ttshhhta..s..s.......hpsAh.....psl...t.h....h....t..........t.....t.............h..s....s..u..hE......hhDph....s.l.p..hs.pph.hth......................................................h.p.t..t.s..h.ll.lE...h.s...G.s.s.............t..t.h.....p.........p.......p........hp................hltph.hp..p.t.s..s.t.p............................................h.......hsps.tptp.........plWt.........hRcpshs..........................................h...t...p...h...t...s..hhh.t..p...Ds..slPh..spl.sphlpp..h.pthhp.phs...............................................ht.h.s..hhuHsG.............DGN..l.Hh..h..l..h....h...s..h..s..p......................tp..h..p..p.h....c..t..h.h.p.cl...........hp.h.s.h.c.h.G.Go.l....o...u.EHG..lGht+.t.........hh.h........h..u..ttt.......h.thh.ctlKtshDPpslhNPGK......................................................................... 0 916 1869 2506 +357 PF01565 FAD_binding_4 FAD binding domain Bashton M, Bateman A anon Pfam-B_352 (release 4.0) Domain This family consists of various enzymes that use FAD as a co-factor, most of the enzymes are similar to oxygen oxidoreductase. One of the enzymes Vanillyl-alcohol oxidase (VAO) has a solved structure, the alignment includes the FAD binding site, called the PP-loop, between residues 99-110 [1]. The FAD molecule is covalently bound in the known structure, however the residue that links to the FAD is not in the alignment. VAO catalyses the oxidation of a wide variety of substrates, ranging form aromatic amines to 4-alkylphenols. Other members of this family include D-lactate dehydrogenase, this enzyme catalyses the conversion of D-lactate to pyruvate using FAD as a co-factor; mitomycin radical oxidase, this enzyme oxidises the reduced form of mitomycins and is involved in mitomycin resistance. This family includes MurB an UDP-N-acetylenolpyruvoylglucosamine reductase enzyme EC:1.1.1.158. This enzyme is involved in the biosynthesis of peptidoglycan [2]. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.52 0.71 -4.70 142 20637 2012-10-02 01:00:47 2003-04-07 12:59:11 18 164 4917 139 7390 17693 6472 133.60 21 26.34 CHANGED P.thllhPpotp-ltphlchup..pp...shs..lhshGuGsshs.uts..........psu...lllsh.t..p.hsp..lhpl-s....t...stpsplpuGshhtpltptlttp..uhhhs.p.sush.ss..olGGhlsssusGhtstta.G.hh..-hlhulcllh.ssGpllphu .........................................................hllhPpsh.p-ltth..........l....ph.st.............pp..................p..h.s.........l..h...s...h.G.u.G........o.s...hh..utsh.................tsu.................ll..l.sh..p...........p..hsp...........l..h.p...l..c...............................t...........s..t..h..l..p...l..p..u.....G.s.h.h.t..p............l.t.p..t.........h......t........t....p...........u...h..h..h......s................p....s............u....s...t....................s............s........o........l.G...G.s...l..s.s...........s...u...u.......G...h..t..........s...h.........t..........h...G.....hh......-h..lhs.l....cl..lh..ss..Gplhph................................................................. 0 2058 4492 6188 +358 PF00970 FAD_binding_6 Cyt_reductase; Oxidoreductase FAD-binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_143 (release 3.0) Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.35 0.72 -3.87 115 14779 2012-10-03 00:38:56 2003-04-07 12:59:11 19 159 4278 144 4286 11998 1730 96.10 21 26.84 CHANGED tspLls+pplScDs+hF+F.tL..s.sssph...luL..PsGpalhlpss...ls.sc.hhRuYoPsSss..s..-hGh..h-LllKlY.........sGGtMSp.aL.-s.L.plGsh.lcl+GPhGp.htY ...............................................................................lhphp...s..t..s..h.h..h.....hp..h..th...........s....t..........t....t..........h..t.a......p.s.G....Q...a......l..t.........l...t...l...........t.................p.......s............p.........................h........h............R.s....Y.........S........h......s.................s.......s................s........s.............pp.s...t...............lc......l..tl...+t.h............................s..G.h....h..S..s....h...L....p.....p.....l.....p.......s...G.......D.....p......l.p....l.t.s....PhGp.h..h................................................... 1 1115 2425 3487 +359 PF03441 FAD_binding_7 FAD binding domain of DNA photolyase Griffiths-Jones SR anon SCOP Domain \N 20.10 20.10 20.10 20.10 19.40 19.90 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.86 0.70 -5.19 163 4493 2009-09-11 23:21:46 2003-04-07 12:59:11 9 22 2644 55 1440 3950 7444 245.00 30 51.07 CHANGED sGEps..AhppL....psFl..p..p..plpsYpps..Rsh.P......ut.....s.........................................uoSpLSPaLpaGplSsRplhptspptt...........................shpsalpcL.hWR-a.hhphhhptPpht.....................t.shppta.ps.h...Wt..................ts.p...tthpsWppGcTGaPllDAuMRpLhpTGa..hHNRhRhhsuSFLsK.cLhl-WctGtc.....................aF...........hppLlDh-.usN.ssWQa.uu.sGhss...hRlaNPhpQuc+aDPsGpal+pWlPELtslssc.hl..Hp...........................Paphst......................s.......YPp.P..ll-hppsRctslctatphp ......................................................................................ucptAhppL.....ppFh.p............ptl.t.pY.ppt..Rsh...s.......sh....s..........................................uo....Sp....LSsaLt............h...G..tlS.sR..phhpplhpttt.......t................................................tsspsa.lpcL..hWR.EF.ah.tl.h.h.t.h.P.p.h.t............................................................................p.ts...h.ps..t..h..pp..h......W........................................s..tthpt..W...p.p...GcTGaPll.D.Au.....MRp.LhpTG.a..hHNRhR.h...h........s.A.oFLsK..cLh..l...cW.c...G..t..c.....................aF.......................hppLlD.h....Dh...us...N......ssW....Q......ahuu....s...G..............s..D....s.........h..............h....R.h.aNP..............h..t.Q.........u.c.+a.....DspGp..a...l..+.p.....al.PpL.tt...lss......p......l.ap.............................................sht...........................................................t..............YP..s..hl.phtttp...hthh....t................................................................................................... 0 488 923 1231 +360 PF03101 FAR1 FAR1 DNA-binding domain Mifsud W, Bateman A anon Pfam-B_2535 (release 6.4) Domain This domain contains a WRKY like fold and is therefore most likely a zinc binding DNA-binding domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.27 0.72 -3.46 43 1076 2012-10-02 23:28:20 2003-04-07 12:59:11 10 54 116 0 699 1040 0 87.40 24 15.96 CHANGED pFYspYAtpsGFulRhsppp+o...ptss.hhpppFsCs+pGhppppppp.............................ptpp.....sps.oRsGCcAtltl+hpp...sucWhVsphs.....h-HNHtLss .....................................................aYptYAtphGFsl+hpp.s..p.ps........ts.t..t..hh..ppp..a.s...C..s..+pGhppppptp.....................................................t..pp............pspsR.suC..pA.h.h.t.l..p...hpt.........ss......p..W...hl....p.p.h...........hcHNH.l............................................. 0 82 399 567 +361 PF04300 FBA F-box associated region Bateman A anon Bateman A Family Members of this family are associated with F-box domains, hence the name FBA. This domain is probably involved in binding other proteins that will be targeted for ubiquitination. Swiss:Q9UK22 is involved in binding to N-glycosylated proteins. 25.00 25.00 27.50 25.90 21.50 23.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.36 0.71 -4.32 6 396 2012-10-03 19:46:52 2003-04-07 12:59:11 8 16 84 7 192 352 2 158.30 36 59.10 CHANGED WKshYhhpshcRNLLRNPCu.............................EtshpuWp.lp.pGGDtWpVEpLPhssusphP.sstVpphFloSFcWCpKpQhVDLcAEGhW-ELLDoFpP-IVVcDWausRtDsGChYpLcVpLLuAD.psVLspFossPssh.Qhss.ssWpcVSHsFScYsPGVRalpFpHtGpDTQFWsGaaGsRVTNSSVlVcs .................................................................p.hh...h...hp.htRNLl+NP...su...........................................................................-.p.sh..phWp...lp..pG..G..s.tWtlEp....t.......p....t....t.hs...s...........tl..........p....ph.Fl.o.S.athChKp.Qll....DL.tEG.h....htclh.DphpPcIh...lpDW.as.s...Rt..ssus.h.Yp.lpVpLL...........stp...p.....ps.............l..............sp..........F...............p......ss..........................h...h.ph.ss.......tt....WpploHsFpsYs.sGVRalhFpHtGp..Dsp.aWtGaaGs+lTsSSl.l..h.......................... 0 32 57 98 +362 PF00611 FCH Fes/CIP4, and EFC/F-BAR homology domain Ponting C, Schultz J, Bork P anon SMART Family Alignment extended from [1]. Highly alpha-helical. The cytosolic endocytic adaptor proteins in fungi carry this domain at the N-terminus; several of these have been referred to as muniscin proteins [2]. These N-terminal BAR, N-BAR, and EFC/F-BAR domains are found in proteins that regulate membrane trafficking events by inducing membrane tubulation. The domain dimerises into a curved structure that binds to liposomes and either senses or induces the curvature of the membrane bilayer to cause biophysical changes to the shape of the bilayer; it also thereby recruits other trafficking factors, such as the GTPase dynamin. Most EFC/F-BAR domain-family members localise to actin-rich structures [3]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.07 0.72 -3.80 157 2299 2009-11-12 14:53:54 2003-04-07 12:59:11 18 60 270 55 1342 2138 1 91.60 21 13.78 CHANGED psFhsph.........tuhcslhp+hcpuhphh.c-ltpah.+cRuplEccYuppLp.clspchhptht.........ttpp......uol.................ppuapplhspscphuptHtphuppl ..............................hhtth.............tthcsL.pphppuhp.hh.c......-l...tpah.+c.Ru..plEcpYup....p.....Lp...cLucchtttht...................sppp.............soh......................................................tpuapslh.sp.scphuptHtplupph........................................................................................................ 0 361 566 953 +363 PF00111 Fer2 fer2; 2Fe-2S iron-sulfur cluster binding domain Sonnhammer ELL anon Prosite Domain \N 20.70 15.00 20.70 15.40 20.60 14.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.55 0.72 -4.23 206 17474 2012-10-02 17:47:23 2003-04-07 12:59:11 22 166 3695 266 5439 17310 7619 75.20 21 24.22 CHANGED hphpup..thphpsssspp.lLcshc...p.t.slslshuCps.......Gs....CusCtsplhtu.................hpspphttt.h.....................LuCtshsp .......................................h....sp...th.pl...p..s..s..s..s..p.s.....lLcshc.............p...t..........s....l...s......l......t......h.......uCpt................................Gs..C.Gs..Cp..lh.lh..p.Gps.........p...................ptp......t........................................................LsC.sh......................................................................................... 0 1464 3188 4405 +364 PF01799 Fer2_2 fer2_2; [2Fe-2S] binding domain Bateman A anon Bateman A Domain \N 20.90 20.90 21.20 21.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.77 0.72 -4.08 157 5947 2012-10-02 17:47:23 2003-04-07 12:59:11 15 71 2180 114 2017 5159 1915 75.40 40 17.35 CHANGED TlE..GL..............s.p..ssplcslQpAFl-ppuhQCGaCTPGhlhuutuLLc...p..................ps.ps......occ-.lccu.l.sG..NLCRCTGYppIlcAlpp .............................................TlEGL.....................s.p.ssp.l..cslQpAahcp......pusQ..CGa.C.TPGhlMoshu.LLcp............................................ss...pP.............ocp-..I.c.cu.l..u.G.....N..LCRCTGYptIlpAlp.t......................... 0 620 1185 1606 +365 PF04324 Fer2_BFD fer2_BFD; BFD-like [2Fe-2S] binding domain Kerrison ND, Finn RD anon COG2906 Domain The two Fe ions are each coordinated by two conserved cysteine residues. This domain occurs alone in small proteins such as Bacterioferritin-associated ferredoxin (BFD, Swiss:P13655). The function of BFD is not known, but it may may be a general redox and/or regulatory component involved in the iron storage or mobilisation functions of bacterioferritin in bacteria [1]. This domain is also found in nitrate reductase proteins in association with Nitrite and sulphite reductase 4Fe-4S domain (Pfam:PF01077), Nitrite/Sulfite reductase ferredoxin-like half domain (Pfam:PF03460) and Pyridine nucleotide-disulphide oxidoreductase (Pfam:PF00070). It is also found in NifU nitrogen fixation proteins, in association with NifU-like N terminal domain (Pfam:PF01592) and NifU-like domain (Pfam:PF01106). 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.09 0.72 -3.91 190 6775 2009-01-15 18:05:59 2003-04-07 12:59:11 10 90 2902 2 1800 5000 364 54.70 27 11.81 CHANGED thl..CtCpsVoctplppsl.....................tp.............hp...s.....hpplpptsts.............GssCG.sChs....ht......pllpptt ...................hlCpCpsVocupItpAl.....................tp..............tuspo.......lppl+ppTcs.........................Gss.CG...sChs..........ht....plltt..t............................... 0 527 1115 1483 +366 PF00037 Fer4 fer4; 4Fe-4S binding domain Bateman A, Eddy SR anon Prosite Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.61 0.73 -7.60 0.73 -4.16 533 8206 2012-10-03 08:56:42 2003-04-07 12:59:11 22 662 2969 164 3310 39684 11065 23.50 39 7.69 CHANGED hhlstcpChsCGtChpsCPssAlp ......h..ls.ccClsCGhChpsCPhsAI........ 0 1442 2410 2918 +367 PF00210 Ferritin ferritin; Ferritin-like domain Finn RD, Bateman A anon Prosite Domain This family contains ferritins and other ferritin-like proteins such as members of the DPS family and bacterioferritins. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.60 0.71 -4.43 243 10640 2012-10-01 21:25:29 2003-04-07 12:59:11 19 13 4559 1610 2459 7325 1350 137.80 20 82.92 CHANGED hptLNctlstE...htushtYhthuhhhc..shshtshsphhcppupEEh.pHApclh-hlhthG........G.....ph.......phhp...........h.................................s.ssh-hlctslptEpplspplpclhphup....pp......p..DhsotshL.pthlp-.p.cchthlpshlppl..cth ...........................................phLNctl...spp...hhuhh.hYhph........phhhc......shsh..slt..phh..cc.........ps.p-ph...p.....Hs.......-cl.h.-+lh..p..lG............u.....s...hphh...........thhp..........................h.....p..........................................................hs.ssp-h.lc...p...s...lp...t...p..p.t.l.s...........p.pl.p...c.h.h.p.h.up......pt................p..D......sotshl.pth.l.p-.pccphhhlpshlp................................................................................................ 0 681 1367 1912 +368 PF01839 FG-GAP FG-GAP repeat Bateman A anon [1] Repeat This family contains the extracellular repeat that is found in up to seven copies in alpha integrins. This repeat has been predicted to fold into a beta propeller structure [1]. The repeat is called the FG-GAP repeat after two conserved motifs in the repeat [1]. The FG-GAP repeats are found in the N terminus of integrin alpha chains, a region that has been shown to be important for ligand binding [2]. A putative Ca2+ binding motif is found in some of the repeats. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.94 0.72 -4.16 470 2456 2012-10-05 17:30:42 2003-04-07 12:59:11 18 220 351 67 1119 4198 1515 38.40 33 6.96 CHANGED hstslssu..DlsGDGhsDlll..........................ssts....stlhlh ............Ghulusu..DlNGDGhsDlllG.....................................stptp......GtlYl..................................... 0 347 596 821 +369 PF02181 FH2 Formin Homology 2 Domain SMART anon Alignment kindly provided by SMART Family \N 25.20 25.20 25.20 25.20 25.00 25.10 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.25 0.70 -5.57 48 2361 2009-01-15 18:05:59 2003-04-07 12:59:11 18 102 317 21 1446 2150 30 329.40 23 32.57 CHANGED tppt.psct+LKsLpWsKl..............................p.ssp.ps...........lWsclpppp..............ph-ht-h-thFsststpthtpt............................sstpppspclplLDs++upNluIhLppl..phshc-lhpulhph-p.phLs.............h-hl-pLh.chhPsc-Ehptl..p........phps-......hppLucsE...pFlhplsp.lsthppRlpshhFptsFpsplpplptplpslppAscpL+pScphpplLchlLthGNaMNsGst..GsAhGF+LsoLhKLsDsKus.-s+pT..LLHalsphltc.............................................phsclhshs.s-LpslccAuplsh-slppslppLpcslpplcpplptttp.......t.tptFhphhppFlppucpclcplpsthppshpthcchscYa...spssp....phs.pphFshlppFlphac ..................................................................................................................................h.....tsphth.h.h.W..pp..l....................................................................tp.h..pt.......................lW..sp......hp...ptt..........................................t.ph..t..c..hpthFttpttt...t...........................................................................t.tppp.pplpl..............L.-s.++upN.............ls.I..hL.p........p......h......p........h.s..........cc........l.t.....p...s...l..h..p..h..-p....ph..ls.............................................h-hl..p..p..Lh..phhP.p.p..cEh.phl.......p..........................ph...ptp..............hppLsps-..pFh.ht...h................p..ls.....php...t.....R.l.pshha.p......hpFpp...........p..h.p.p.lp....p..........lp..s.............ltt.Asp...p.......l.....p...pS....p.....p....hpp..lLp......h....lLthGNah..N....s..u..........p..................up....A.......hGF.c.L.s........L.+........L......t-.....s....Kus....c........p.p...........h...o......LLHal..s.ph.h.tp....................................................................................................................p.h.s..p..h..h..p.....h.........s-l..........ttl....p..p..A.u...p............l..s.hc.p.ltpp.............lppl.ppt.hpt.hpp.plph.tp.........................................h.....hp.h.h...........p......pFlpp...sp.tphp...tlpt..hp...p............h............p....hhpp..hhtaa..............s.ssp..........ph.p...p.phFthhtpFht.a....................................................................................................... 0 487 743 1102 +370 PF02661 Fic Fic/DOC family Bashton M, Bateman A anon COG2184 & COG3654 Family This family consists of the Fic (filamentation induced by cAMP) protein and doc (death on curing). The Fic protein is involved in cell division and is suggested to be involved in the synthesis of PAB or folate, indicating that the Fic protein and cAMP are involved in a regulatory mechanism of cell division via folate metabolism [1]. This family contains a central conserved motif HPFXXGNG in most members. The exact molecular function of these proteins is uncertain. P1 lysogens of Escherichia coli carry the prophage as a stable low copy number plasmid. The frequency with which viable cells cured of prophage are produced is about 10(-5) per cell per generation [1]. A significant part of this remarkable stability can be attributed to a plasmid-encoded mechanism that causes death of cells that have lost P1 [2]. In other words, the lysogenic cells appear to be addicted to the presence of the prophage. The plasmid withdrawal response depends on a gene named doc (death on curing) that is represented by this family [2]. Doc induces a reversible growth arrest of E. coli cells by targetting the protein synthesis machinery. Doc hosts the C-terminal domain of its antitoxin partner Phd (prevents host death) through fold complementation, a domain that is intrinsically disordered in solution but that folds into an alpha-helix on binding to Doc [3].This domain forms complexes with Phd antitoxins containing Pfam:PF02604. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.65 0.72 -3.29 221 6829 2009-01-15 18:05:59 2003-04-07 12:59:11 13 61 3371 61 1631 5456 731 97.70 21 34.91 CHANGED hls...hptlhp.lHp.tl......hppht............................Gth......Rs..h...................shhptshspsp.ph.thh..........thhs....................hhptuAth.thtlsphHPFhDGNtRsuthhhthhL ......................................................................................................................................p.t.lhplHp..hl..........hp.t.hht........................s................................Gph.........Rp.th...........................................................t.thp.h...h.h.s.t.st...pl..t.t.h............pphhphhp...................t..p...................htphu..hh.ahph.ttlHPFtDGNGRsuRhhhthhL.................................. 0 529 1058 1363 +371 PF00254 FKBP_C FKBP; FKBP-type peptidyl-prolyl cis-trans isomerase Finn RD anon Prosite Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.95 0.72 -3.97 174 16739 2009-01-15 18:05:59 2003-04-07 12:59:11 23 143 5125 202 5251 12394 3280 99.80 27 37.70 CHANGED ssp.sppGD.pV......plcYpGpl.........s...Gph..........FDuo..........h........+spshpaplGp...sp...VIp......................Ga-pulhs...........MpsG-ctplhlss.phuYGp......................................puh.s.......IPss.........................usLhF-V-Ll ..........................................................................................................................................................tt....sppsc..tV.......plcYp.G.pl....................D.........Gph..................................F.Duo..........................................................................pups.h..s...a....h.l.....G..t.............sp....lIs...........................................................G.a...-cul..hu.........................h.p....sG...-.c..h.p.l.h.ls...s...p..........u..YGt...........................................................................psh....s.................st.................................................pslhFclcl..................................................................................................................................................................... 0 1593 3021 4223 +372 PF01003 Flavi_capsid Flavivirus capsid protein C Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family Flaviviruses are small enveloped viruses with virions comprised of 3 proteins called C, M and E. Multiple copies of the C protein form the nucleocapsid, which contains the ssRNA molecule. 21.00 21.00 21.60 24.90 20.90 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.64 0.71 -4.55 27 4730 2009-01-15 18:05:59 2003-04-07 12:59:11 14 17 108 12 0 4102 0 108.70 56 4.51 CHANGED ts.ups+slNMLKRshscs...httsKRhhhsLhsG.GPhRhVLAhlsFh+...FsulsPTsuLhpRW+sVsKppAh+tLpsFKK-lGshlssl.s+Rt++.tth.s.sshlLhl....hhlshshA ..................................KKstpsshNMLKRsRNRVSTspQLsKRFSh.GLLsGpGPh.KLVMAhlAFLR...FLAIPPTAGlLsRWGohKKssAIKVL+GFKKEIusMLsIlNRR++o.........shhLlMl....hsss..hhhh.............. 1 0 0 0 +373 PF02832 Flavi_glycop_C Flavivirus glycoprotein, immunoglobulin-like domain Bateman A, Griffiths-Jones SR anon Pfam-B_146 (release 3.0) Domain \N 20.80 20.80 21.00 20.80 20.70 20.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.32 0.72 -4.10 12 9726 2009-01-15 18:05:59 2003-04-07 12:59:11 11 26 131 56 0 6580 0 95.50 58 6.56 CHANGED oYsMCss.KFpapKsPuDTGHGTVVlcVpYoGssuPC+IPlpsssphpshssVGRLlTsNPhlsss..putVhIEhEPPFGDSYIlVGhGpppLshpWa+ ..SYuMCTs.pFKlhKEsAETQHGTlVlclpYcGsDuPCKIPhsshD.hpshsslGRLITsNPlVosp..-ssVNIEsEPPFGDSYIVlGhG-ptLplpWaK............. 0 0 0 0 +374 PF00869 Flavi_glycoprot Flavivirus glycoprotein, central and dimerisation domains Bateman A, Griffiths-Jones SR anon Pfam-B_146 (release 3.0) Domain \N 20.00 20.00 20.60 20.20 18.90 18.90 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.83 0.70 -5.43 35 10895 2012-10-01 19:42:26 2003-04-07 12:59:11 15 32 141 36 1 7165 0 269.90 59 20.34 CHANGED h+ClulpNRDFlcGsoGsTWVslVLEpGuCVTlhAcsKPolDlhLsshptps.AtsRcYChcAploshpssuRCPThGEApLsEEpsssaVCKRsaoDRGWGNGCGLFGKGSIVsCAKFoC...scphpGhhlptpplpYsVtlpVHsupttsstt......ssspttphphTspusppshsLu-Y..GplolsCcspSGlDhsphhlhpls......s+sWhVHR-WFpDLsLPWp.....tsusss...Wcsp-pLV-FcpPHAsK.sVhsLGsQEGulhpALuGAh.lphsusp...hpLpu.............GHlpC+lph-KL+lKG .......................................+ClGluNRDFVEGlSGuTWVDlVLEtGSCVTsMAKsKPTLDlcLhphEApp.AplRchClcAploshoTsuRCPTpGEApLsEEpDtsaVC++shVDRGWGNGCGLFGKGS.llTCAKFpC...tpphpG+llQhENlcYoVhlpVHoG-pptsss.......ssppusphpITPpusosphpLs-Y..GslTl-CpPRoGLDhNphllLphc......sKuWLVHRpWFhDLsLPWo.....uuusTp...Wpp+EhLVpFcssHApKQ-VVsLGSQEGAhHoALsGAs.lph..Suss.....plhu.............GHLKCRl+MDKLpLKG....................................................................................... 1 1 1 1 +375 PF00949 Peptidase_S7 Flavi_helicase; Peptidase S7, Flavivirus NS3 serine protease Finn RD anon Pfam-B_199 (release 3.0) Family The viral genome is a positive strand RNA that encodes a single polyprotein precursor. Processing of the polyprotein precursor into mature proteins is carried out by the host signal peptidase and by NS3 serine protease, which requires NS2B (Pfam:PF01002) as a cofactor [4]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.50 0.71 -4.75 50 3378 2012-10-02 13:45:52 2003-04-07 12:59:11 16 23 150 15 17 3915 116 149.10 68 4.59 CHANGED hc.lpsGVY..RIhptulh.GppQ.......hGVGhhppGVhHTMWHVTRGAsLph...sstthsPtWus....V+cDllsYGGsWcLpt+WcG.c-VQlhAh.Pst.hp.hQspPGhhph.tG.plGAlslDaPsGTSGSPIl...NppGcllGLYGNGlhhs......ssYlSuIuQu ..............................................................................................................................................................................u.-L--GlYRIhQRGlL.GpoQ.......lGsGVhpEGVFHTMWHVTRGAVLMa...pGKRL.....EPsWAS....VKKDLISY...G...G.G.W+....Lp...up...Wpp.GEEVQVIAVEPGKNPKNV..............................................................................QTtPGsFKTspG.....E.....l...G.....A.l..u..L.D..F.pPGTSGSPIl...sR.cGKVVG.L.YGNGVVTp.....sGsYVSAIAQs...................... 0 13 14 14 +376 PF01004 Flavi_M Flavivirus envelope glycoprotein M Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family Flaviviruses are small enveloped viruses with virions comprised of 3 proteins called C, M and E. The envelope glycoprotein M is made as a precursor, called prM. The precursor portion of the protein is the signal peptide for the proteins entry into the membrane. prM is cleaved to form M in a late-stage cleavage event. Associated with this cleavage is a change in the infectivity and fusion activity of the virus. 21.20 10.00 21.30 20.40 20.60 9.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.50 0.72 -4.55 30 4840 2009-01-15 18:05:59 2003-04-07 12:59:11 14 26 115 0 0 4095 1 71.00 55 2.92 CHANGED SVslssHsppsLssRsp.sWhcucpspcaLs+VEsWllRNPsaAlssssluWhlGsohsQRlllhlhlLLlAPAYu .......SVALsPHsGhGL-TRsp.TWMSSEGAW+plp+lEoWhLRpPGFsllAhhLAahIGoohhQR.slIFILLhLVuPSh.... 0 0 0 0 +377 PF00948 Flavi_NS1 Flavivirus non-structural Protein NS1 Finn RD, Bateman A anon Pfam-B_157 (release 3.0) Family The NS1 protein is well conserved amongst the flaviviruses. It contains 12 cysteines, and undergoes glycosylation in a similar manner to other NS proteins. Mutational analysis has strongly implied a role for NS1 in the early stages of RNA replication. 19.90 19.90 19.90 20.40 19.80 19.20 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.14 0.70 -6.04 11 4700 2009-09-11 01:25:55 2003-04-07 12:59:11 16 26 120 0 1 3925 0 270.30 70 11.15 CHANGED hGCAIDhpR+EL+CGuGIFVas-VcsWh-pYKYhPEoPppLApslpcAac-Gl...CGlRSssRLEptMW+ultsELNhlLpENplcLoVVVscscshhppu..s+sLphpsc-LchuWKoWGKuhlaus-huNsoFllDGPpTpE..........CPsppRAWN.....sacVEDFGFGlhoT+laLclREcsTpECDotlhGsAVKss+AVHoDhoYWIES.phNsTWplE+A.htEVKsCoWPcoHTLWu.sGVlES-hlIPhoLAGPhSpHN+RsGY+TQspGPWcpsclclDF-aCPGTTVslsEcCspRGsSlRTTTsSGKLIs-WCCRSCTLPPLRFpspsGCWYGMEIRPl+ccEssLV+ShVsAssGp ................oGCVIsWKs+ELKCGSGIFVTN-VHTWTEQYKFQs.-SP.p+.L.As.uItpAa..c..-Gl...CGlRSsoRLEphMWcplpsELNhlL.EN.thchol..h..s..sc...G.hhttu...+.lpsps.chchuWKsWGKuh......hh......ss-.t..NpoFllDGPpT.E...............CPstpRAWN................hEVEDaGFGlhoTplaLKlR-t.s..CDp+lhusAlKsphAVHuDhuYWIES..th..Nto.WKlt+AshhEVKoChWPcoHTLWu.sGVLES-hIIPt.huGPhSpHNhRPGYhTQstGPWc..G+lEhDFshC.GTTVslsEpCGpRGPSlRTTTsoGKlIp-WCCRSC...TLPPLRahs-sGCWYGMEIRPhpccE.cshVpS.VsAhpu.p................ 0 0 0 1 +378 PF01005 Flavi_NS2A Flavivirus non-structural protein NS2A Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family NS2A is a hydrophobic protein about 25 kDa is size. NS2A is cleaved from NS1 by a membrane bound host protease [2]. NS2A has been found to associate with the dsRNA within the vesicle packages. It has also been found that NS2A associates with the known replicase components and so NS2A has been postulated to be part of this replicase complex [1]. 25.10 25.10 25.20 40.20 24.80 25.00 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.28 0.70 -5.05 37 3339 2009-01-15 18:05:59 2003-04-07 12:59:11 14 19 108 0 0 3344 0 208.40 46 6.38 CHANGED Gllslhlshp.llR+Rh..Tu+thlhuulhlLslhlhGhlThpDLhRYllhVGhsFst.psGs-lhaLsLl.AsFclRsuhLsuahLRppaTsREslllhluhshlphuhst....h.hslhphhculAluhhll+Ahsptppssluh.llulhs.tthhhlhtuhphhlhllsssulhp.t+ssst+.Kshshllslu.....lhus.Ghh..hlshhtlhhtps.t+.R ................GlLslulhhEEVhRsRa..upKhhhsullssFllLlhGplTapDLhRhsIMVGAshoD.chGMGlTYLALh.ATFKlpPhFAlGhhLR.+LTS+EslLLslGluhhspsp......LPpsl.-lsDuLAlGhMlLKllTphpsaQLhssLluLossssthhLpsAW+shshlLulVSLhPLph.oootQ.Ks.s.WlP.lsLu.....hhGs.....sPlshFlhshs+s.p+R............... 0 0 0 0 +379 PF01002 Flavi_NS2B Flavivirus non-structural protein NS2B Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family Flaviviruses encode a single polyprotein. This is cleaved into three structural and seven non-structural proteins. All, but two, are cleaved by the NS2B-NS3 protease complex. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.29 0.71 -4.55 30 3263 2009-01-15 18:05:59 2003-04-07 12:59:11 14 18 115 15 2 3330 1 127.00 55 3.81 CHANGED hoEslTAVGlhhsLsuulh+.sssphhsPhssuGllllsYlloG+sssLhlE+su-lpW-p-AphoGuSscLcVchDspGsh+Lh--ctsshphhlhhsshlshuAhaPhuIhhshsuWahhp...pss+R ......LNEulMAVGlVSILsSSLL+.NDlPMAGPLlAG..GLLhACYVIoGpSADLpLE+AADVoWE--AEhoGuScpl.VplsDDGoMpIKs-Ec-shLTlLL+ssLLslSGlaPhSIPsTlhsWahWp...pppQR............................... 0 0 0 2 +380 PF01613 Flavin_Reduct Flavin reductase like domain Bashton M, Bateman A anon Pfam-B_710 (release 4.1) Domain This is a flavin reductase family consisting of enzymes known to be flavin reductases as well as various oxidoreductase and monooxygenase components. VlmR is a flavin reductase that functions in a two-component enzyme system to provide isobutylamine N-hydroxylase with reduced flavin and may be involved in the synthesis of valanimycin [1]. SnaC is a flavin reductase that provides reduced flavin for the oxidation of pristinamycin IIB to pristinamycin IIA as catalysed by SnaA, SnaB heterodimer [2]. This flavin reductase region characterised by enzymes of the family is present in the C-terminus of potential FMN proteins from Synechocystis sp. suggesting it is a flavin reductase domain [1]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.19 0.71 -4.37 141 7912 2012-10-02 11:35:36 2003-04-07 12:59:11 13 38 3556 70 2331 6180 2131 151.60 19 73.61 CHANGED htp..hsssVsllos.t........sspsh............G....hss.oshsslShc.P.Phlh.hsls.......pp...............................................................................spohshlpp...ssp............Fslsl....Lsp.sp.tp.lu....ptFus.......................tttc+..hs.shphpts.......................sus.......Phl..psuh.uthcC...clhp..p......hss.G.s............HslhlucVhsh...thtps..t.................Llahp.tpapshs .................................................h.....hstslslloo..st.............suths..............................s.........hss.oh......h...s.s...l....sh..s....P..Ph....lh..l....s.ls...........pp................................................................................ppohth.lpp...ssp..........................Fslsl........lst...sp...t.....p..hs.............pthus..s................................................tt-+......h..s..slshttu.........................................sus..........................P.hl........pp.uh..sth-C..clhp....h..................hph...G.s.................................................................as.lhl..ucl..h.s.h........thssp......................................................h...................................................... 0 675 1440 1926 +381 PF00258 Flavodoxin_1 flavodoxin; Flavodoxin Finn RD anon Prosite Domain \N 24.40 23.20 24.40 23.20 24.30 23.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.84 0.71 -4.19 92 10556 2012-10-03 05:08:30 2003-04-07 12:59:11 20 121 3998 199 2717 8477 791 138.50 22 32.80 CHANGED llYuSpoGsscphActlscthpt.tsh.spshshtphs.........tpl.ptthllhsssThstGpss.s.......h.phhthh.............................................................tt..htshphulhGhGspsatt.......attssptlcpplpp.hGuppls......................shsts-pps...shcpthptW .....................................................................lhauS.p.o.Gssc.t....l....Ac.t...........l.t....ct.l............t..........t..................s............h...........p........s..........p........l....h...s..h.s...c.hs...........................tpl..t.p....t......c.....h...l..lls...s..u..T.a...G...p.G.-...hP.pp................h.h.c.hhp...h...........................................................................................tt...pL..s.s..h..p...hulh...G.h....G...D..psatt..................FstuscplcptLpp.hG.u.p.h.ls............................ttlth.D....p....t.......ct.......................................................................................................................... 0 813 1530 2194 +382 PF04500 FLYWCH FLYWCH zinc finger domain Krauss V, Dorn R anon Krauss V Domain Mutations in the mod(mdg4) gene have effects on variegation (PEV), the properties of insulator sequences, correct path-finding of growing nerve cells, meiotic pairing of chromosomes, and apoptosis. The occurrence of FLYWCH motifs in mod(mdg4) gene product and other proteins is discussed in [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.26 0.72 -4.08 128 1130 2012-10-02 23:28:20 2003-04-07 12:59:11 11 45 72 1 775 1222 0 60.70 23 26.35 CHANGED ahpop+.Gpth...LlhsGahatpsppp.....s.sptaWcC.sphpp......hpC+ARlhTp.....sspplhhhp.s....HNH ..............hhstp.Gt.h....L.l.hp.u.ahYptpppt.............s.sphhWpC.spppp...........pC+u+lhTp.............stplhhhp..t......HsH.................. 0 187 296 645 +383 PF03358 FMN_red NADPH-dependent FMN reductase Mifsud W anon Pfam-B_2010 (release 6.6) Family \N 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.81 0.71 -4.71 276 11795 2012-10-03 05:08:30 2003-04-07 12:59:11 10 50 3935 96 3325 13002 2408 145.70 18 69.81 CHANGED h+llsl.sGSs........R.......ps......uhsptlsphstp......hh.....t.........t.h-s...............cll.-....lsch..hP.....hhstch.............ssts.....pphtppltpAD..ulllsoPEYssuhsusLKshlDhhs........................tpthp.sKss...............uhl..ususu.tGuhpshtpL+thhs....hpshslss.thslstshp .........................................................................................plhhl...G..S....p...................tt.......u..h.sptl.s..p..h..htc...................th......tt..................................s..h-s..........................phh...p...........lsch......hs.............................hh.s.tsh.................................tssh...................tt.h....h....p..p......l.t.p.AD...ullhuoPpYht...uhs..........usl..Kshl.D.h.hs...................................h.tpsht...sK.ss........................................s.lh........ss.u..s....s..........t..u.......s...........p...s.........h...p...h.t..h.h...h...hth.hhh.s............t............................................................................................ 0 1024 2182 2831 +384 PF02434 Fringe Fringe-like Mian N, Bateman A anon Pfam-B_1900 (release 5.4) Family The drosophila protein fringe (FNG) is a glucosaminyltransferase that controls the response of the Notch receptor to specific ligands [2]. FNG is localised to the Golgi apparatus [1] (not secreted as previously thought). Modification of Notch occurs through glycosylation by FNG. The xenopus homologue, lunatic fringe, has been implicated in a variety of functions. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.81 0.70 -5.17 27 1045 2012-10-03 05:28:31 2003-04-07 12:59:11 11 26 186 2 715 1675 8 183.50 22 50.40 CHANGED p.hphcD..lFIAVKTT+KaHcsRlsllhcTWhspA+cQTahFTDs-DppLppp.....hs.pllsTNCSssHsRps.........LsCKhuspaDpFlpSsp....+WaCHVDDDNYlNlspLlcLLssYshopDlYlG+PSls+Plpshct.............................p.spFWFA.......TGGA..........GFCl..................SRuLALKM..........P..aAStuphhssuctlphPDDCTlGaIlpshLslplh+osLFHSHLEsLtplsspplpcQV............................olSYuth....sphNhlphtts...Fs.ppDPoR ............................................................................................................h..........................ph.......l....t..TW.t.h..................................h....................o.....s.........................p..t....h...................................h.........t.....tt....................t..............................h.t.h....h........h......h...t..h...h...h..p..s..pt................cWahhsDDDT.Y.l.....hpsLh.p...hL.....p......s......a....s.....s..s....p...s...l.Y.lG...p.............h......s.h...h.pt.........................................................a.h.........sGGA.........................Gasl..................S+t.hhp+.h........................hh.p.t.t..p....h....p....s..t....................-....Dh......h.Ghhhtt...hth.................................................................................................................................................................................................................................................... 0 208 292 533 +385 PF01534 Frizzled Frizzled/Smoothened family membrane region Bateman A anon Pfam-B_949 (release 4.0) Family This family contains the membrane spanning region of frizzled and smoothened receptors. This membrane region is predicted to contain seven transmembrane alpha helices. Proteins related to Drosophila frizzled (Swiss:P18537) are receptors for Wnt (mediating the beta-catenin signalling pathway) [1], but also the planar cell polarity (PCP) pathway and the Wnt/calcium pathway. The predominantly alpha-helical Cys-rich ligand-binding region (CRD) of Frizzled is both necessary and sufficient for Wnt binding [2]. The smoothened receptor mediates hedgehog signalling [3]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.19 0.70 -5.65 11 1043 2012-10-03 04:04:29 2003-04-07 12:59:11 12 9 159 0 557 939 0 269.20 38 53.08 CHANGED saFop-E+pFschWIGlWSslChhSTLFTVhTFLIDhc.RF+YPERPIIFLSsCYhhVSluallphhst.c-cluCst.....................................tttttpplp.sohpspuCTllFlLlYFFsMAuSlWWVILolTWFLAAGhKWG.pEAI-t+upYFHLuAWulPulpTIslLAlupVDGDsloGlCaVG.hshcuLpGFVLuPLslYLllGshFLLAGhVSLF+IRolhpppG.....spo-KLEKLMlRIGlFSlLYhVPAslVluCYhYEttptspWthshhsp.Ctphp...s.s.t....s...t+Pp.hsVhMlKYhMsLlVGITSusWlWSuKTlpoW+pahpRhps+ ............................................................happp-hphs.phaluhhuhl.C.h.huThFT..l.hTF.Ll...Dhp...R....Fp....Y.P..ERPIlaluhCY.h.hSlua...lhthhh.....tcplsCst................................................................................t.t.t.....p...h...l..h.....t......s.....p..stuCsllFhllYaFsMAuolWWVlL...olTWFL..AAu.h..K....W....u..pE...........AI...........p..t.......p......upY...F..........HhsAWu..lPul.hTlslLsh.sp......V...-....G.....D.l.o.Gl..CaVG...s..hp....s.LpsF....VLsPLhlhLhlGs..FLlsGh.l.u.L......h+lRp.hpptt...................ppscK.L.c...+hM...lR....IGlFohLYhlPsh..hlluCahYE.h.ht.W.....t...h.....h....t.......h.................t.................................................................................................Pph.lhhl+hhh..h.llG.IssuhWlh.o.t.KTh...pWtphhpt....p........................................................... 0 125 171 367 +386 PF01827 FTH DUF38; FTH domain Bateman A anon Pfam-B_67 (release 4.2) Domain This presumed domain is likely to be a protein-protein interaction module [1]. It is found in many proteins from C. elegans. The domain is found associated with the F-box Pfam:PF00646. This domain is named FTH after FOG-2 homology domain [1]. 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.74 0.71 -4.59 87 1268 2009-09-12 02:26:25 2003-04-07 12:59:11 22 47 6 0 1226 1298 1 138.50 17 38.99 CHANGED pphhp..thtphLcs....tpslpl+plp.lpthsh...........s.clhslLsh...FpsphLcpIpl.........................ptpphpphccls.pL-QWKpAKplp..hpsthh.......sI-plhHFppFpl.phsp..hoh.pDslclRD.......L.hpsss....Fppspl..............ph.phsshcls+lFp.Pp ...........................................................................................t....t.h.phlps...pp.l..p..lcplp....l..p.s...h..s.....................p...p.l..h...p.l.Lsh...hcs.p.hLc.plpl............................................tppt.p..h....h.p.h....p....c.l....h..ph....-...QWK....p.Ac.p.lp.........lp.shhh...t.............slc..p..h..h..Hhp....p.h.p..l...ph......p..........p........h..oh..pc........lh.......t....l+ch..................h..hpp.s.p.......h.p.h.h................................................................................................... 1 257 261 1226 +387 PF01913 FTR Formylmethanofuran-tetrahydromethanopterin formyltransferase Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain This enzyme EC:2.3.1.101 is involved in archaebacteria in the formation of methane from carbon dioxide. N-terminal distal lobe of alpha+beta ferredoxin-like fold. SCOP reports fold duplication with C-terminal proximal lobe. 19.60 19.60 19.80 21.90 19.50 18.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.88 0.71 -4.47 36 245 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 155 24 95 260 64 134.40 49 47.64 CHANGED MclNGVpI-DTFAEAFshpssRllITAtoccWAhpAApcsTGFuTSVIuCssEAGIEp.hlsPsETPDGRPGssIhlh....shstcpLccQlhcRlGQCVLTsPTTAlFsul...........tpphslGtpL+aFGDGaphpc..clsG.....R....+hW+lPl .............l.-TFAEAFshtssRlllTAts.cWAhhAApshTGFATSVIuCssEAGIE+..LsPs.....ETPDGRPGlul.LlF.....uhutctLt..+Ql.pRlG.QCVLTsPTTAsFsGl..........................p...sscplslGtpLRaFGDGaQhuK..pl.sG.........+RaWRlPV.. 2 27 67 82 +388 PF02741 FTR_C FTR, proximal lobe Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain The FTR (Formylmethanofuran--tetrahydromethanopterin formyltransferase) enzyme EC:2.3.1.101 is involved in archaebacteria in the formation of methane from carbon dioxide. C-terminal proximal lobe of alpha+beta ferredoxin-like fold. SCOP reports fold duplication with N-terminal distal lobe. 25.00 25.00 37.30 36.40 19.60 18.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.72 0.71 -4.43 38 245 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 155 24 95 256 56 142.80 52 50.61 CHANGED -GEFlsE-phGhh.cGVuGGNFhlhucsp.uALtAAEAAV-AlppVpGsIsPFPGGlVuSGSKVGop...Y.+h.lsASTN-taCPTL+sp.s.cSclPssVsuVhEIVIDGlsccuVtcAM+sGIcAAs.....pssGVl+ISAGNYGGKLG.a+F+L+-L .-GEFlsE-shGhs.cuVGGGNhLlLAcspssALtAAEAAVsAhc.plssVIhPFPGGlVRSGSKVGS+...Y.+t.ltASTN-AaCPTL+uh.s.cScLs.....s....-lpuVhEIVIDGLsptsVttAM+sGIpAus.....thsGlhcIoAGNYGGpLG.h.h.L.t............. 0 27 67 82 +389 PF01728 FtsJ FtsJ-like methyltransferase Bashton M, Bateman A anon Pfam-B_1791 (release 4.1) Family This family consists of FtsJ from various bacterial and archaeal sources FtsJ is a methyltransferase, but actually has no effect on cell division. FtsJ's substrate is the 23S rRNA. The 1.5 A crystal structure of FtsJ in complex with its cofactor S-adenosylmethionine revealed that FtsJ has a methyltransferase fold. This family also includes the N terminus of flaviviral NS5 protein. It has been hypothesised that the N-terminal domain of NS5 is a methyltransferase involved in viral RNA capping [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.27 0.71 -4.29 239 10200 2012-10-10 17:06:42 2003-04-07 12:59:11 14 62 4426 60 2263 11482 3102 173.80 25 13.42 CHANGED ahS..RuuaKLhph.cpa...........p........l..............................................p....ph.slDlGuuPGGaopshhpps.......................................................sspVhulDlt.h............................phshtl..........puDh...............tsh....hshp.plhph.....................................................................................................sDlllsDh.......................................p.phhuh....pLshsslt.hs................................p...................ssG...................hlsKlhp.....................stph.p.pllpplpp.......tFph.....................................................lthh+ss..........so+s.pu.sE.....a...llshth ................................................................................................................hSRuuhKLpth....hcph........h........l.........................................................................................ps.......tp.l..lDL..G...susGG.Wo...hhsptt.................................................................................................................stcVh.u..lDhG...s.s..........................................................t...............hs....p.h..................pt.sh...........................hch.........pshp...sl..h....h....s...t........................................................................................................................................................................................................................................tpsD.h.l.l.s..Dh...........................................................u.....................s......shl..p.h...............t.h.s...l...h.s....Lp...h..s......h..........................................................................................................L..p..............................ss..s.......................................h.s...lK....lhp............................................................hh......pllc.ph..cp...............h..p.p......................................................s..t..lh.+.ss..........hSRs..ps..pEh....ahls...h................................................................................................................................................................................................. 0 771 1367 1878 +390 PF02687 FtsX DUF214; FtsX-like permease family Bashton M, Bateman A anon COG0577 Family This is a family of predicted permeases and hypothetical transmembrane proteins. Swiss:P57382 has been shown to transport lipids targeted to the outer membrane across the inner membrane. Both Swiss:P57382 and Swiss:O54500 have been shown to require ATP. This region contains three transmembrane helices. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.51 0.71 -4.35 133 37091 2012-10-03 05:18:07 2003-04-07 12:59:11 16 39 4654 0 8576 29605 7637 121.80 17 28.36 CHANGED hhhshlhllluhlslh..shhthtlt.....p+ppchulh+slGhsppplhthhhhcshll....shluhllG.hlluhhhshhhtphh................................................hhhshhhshhshhhshh...hshhlshlsshhsshphtp.hss .......................................t.hhshlhlllusls....lh......shhh.htlt.........pRp.cElul.h+s..l.G...ssp.pplht.hhhhE....shll......ullu..sllG...lh....l..u....h.h...h..s..h...h...l..t.t.hh.................................................................................................hths.h...t...h...s...h...h...s...h....l....h....s..hh.........hsh.l.l..s.l..l.s..u.h.h.s.shphhph................................................................................................................................. 0 3457 6175 7555 +391 PF04082 Fungal_trans Fungal specific transcription factor domain Wood V, Finn RD anon Pfam-B_306 (release 7.3); Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.79 0.70 -5.52 102 10823 2012-10-01 23:57:08 2003-04-07 12:59:11 13 234 216 0 8645 11592 1 237.30 12 31.77 CHANGED lphaaph....hp.sha............sll+c...ssahpph.............................spshLl...hsllslGsthtpptttpp.........................................................................phphhh.............pht.sspplhhlQ...uL....lllphathths...s..pphtht.....................apGhslp.hspshuLptpsst.p........................................hshp.tEh...p+.......RlaassahhDphhuhhhGpss......h...htspplphs....LPssss.h..stp.tt.t.............................................hhhhhpLp....plhsp...............................................lhs.lhshptphpppptp..........................hpplppplptWpp ................................................................................................................................................................................aht..........h....h..h.......................h....lp................ph.tth...............................................................................................h..lh................hsl..h..s...h..u...s........h.....h......t...t......t........t................................................................................................................................t.t.thhh...................................................t...h.t..t..s..p..l.t...t..lQ.............uh.................ll..h....s......h...h.h...h....tts....................p....p....t..s.ah................................hh..u.h..u..hc....h..u...h......p....l..G...L....+..p..p.s.s.t..t........................................................................................hs........t...-h........+..+...................R..l.....aW.s.lah...h...D..........p.............h...h...u.....h...t...h..G..p..s............................h...............hp.p..p..p..h..s.h........................h..P.....t.............t..............................................................................................................................................................................................................................................................................................................................................h............................................................................................................................................................................................................................................... 0 1444 4077 7109 +392 PF01363 FYVE FYVE zinc finger Bateman A, Armstrong J anon Pfam-B_655 (release 3.0) Domain The FYVE zinc finger is named after four proteins that it has been found in: Fab1, YOTB/ZK632.12, Vac1, and EEA1. The FYVE finger has been shown to bind two Zn++ ions [1]. The FYVE finger has eight potential zinc coordinating cysteine positions. Many members of this family also include two histidines in a motif R+HHC+XCG, where + represents a charged residue and X any residue. We have included members which do not conserve these histidine residues but are clearly related. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.60 0.72 -4.08 174 4289 2012-10-03 17:27:21 2003-04-07 12:59:11 16 325 331 13 2818 4121 81 71.10 32 8.02 CHANGED stWhsD.pps.spCht..Cpp.pF.s..............h..........h....pR+..HHCRpCGp....................lhC.......................................ssCS.........................spph.hh..............................................................t.ths.....psh..................................................RVCssCaptlpp ............................Wh.D.ppsspC..ht.......C.p..p.....p.F...s.............................h.........................................h.....pR.+......HH.C..R.........t.........CG.p........................lhC.......................................spCS................................spph.ls........................................................................................................h.t.t.pss...........................................................................................RVC.st.Capth..t.......................................................................................................... 0 898 1408 2169 +393 PF01392 Fz Fz domain Bateman A anon Bateman A Family Also known as the CRD (cysteine rich domain), the C6 box in MuSK receptor. This domain of unknown function has been independently identified by several groups [1,2,3,4]. The domain contains 10 conserved cysteines. 22.60 22.60 22.70 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.53 0.71 -3.73 26 1926 2009-01-15 18:05:59 2003-04-07 12:59:11 17 130 149 11 1122 1701 0 112.50 29 19.98 CHANGED Cpsls..hshCpslsYstshhPNhLsHps..psEsthp.....hspahsLlph......pCpsshphFLCuhasPhC.hspht.....l.PCRshCEts+p.tCtslhpt...........ashtWP-hLcCschPh.pc.......hCh ....................................................Cpslp.....h.hC..p..s....l..s.....Y.s.h.T.h..h.P....N.....h.l..s..a.ps....pp-s.thp............................hp.t.a.h....sL.lph..........pC....p..s....p...l..ph..FLCuha.sPhC...p.p...h.......t.........................l.PC...R...slC...cps+p...tCp.slhpt.....................hs.htW.Pc..hhpCs.ph.P.tst.......C........................................................... 0 341 427 729 +394 PF01585 G-patch G7; G-patch domain Bateman A anon Pfam-B_585 (release 4.1) Family This domain is found in a number of RNA binding proteins, and is also found in proteins that contain RNA binding domains. This suggests that this domain may have an RNA binding function. This domain has seven highly conserved glycines. 20.30 20.30 20.30 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.14 0.72 -4.17 55 3420 2012-10-01 21:03:39 2003-04-07 12:59:11 18 129 386 0 2214 3446 25 44.30 34 7.29 CHANGED ssshGhchhp+MGact...GpGLG.....csppGltpPlpsphpppp......tGlGtp .............pshGt+hL.p.K.MGWpt.................G.p..GLG...............pp....t.p.......G.h...t..p..Plp..sphpp..pp.......tGlGh.............................. 0 704 1102 1668 +395 PF01019 G_glu_transpept Gamma-glutamyltranspeptidase Bateman A anon Pfam-B_878 (release 3.0) Family \N 19.10 19.10 19.30 19.30 18.90 19.00 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.63 0.70 -6.02 52 5126 2012-10-03 21:14:07 2003-04-07 12:59:11 16 27 2591 76 1814 4680 4706 454.30 29 87.76 CHANGED hclLcp..GGNAlDAAlAsshsLuVlpPpusGlGGGuFhllhsssstps..sl..suREpAPtssot.....cha...........tsttphsthGsh.uluVPGtlsGhtphac+aG..plshtpLlpPAIcLAccGaslstthutshtpttth...hpppssht.......phF.....hs..sGps.hcsG-hhp.pPsLAcTLctlAp..pG..............scuFYpG.c..lAcpllcp..........hpp...t..GGllTtcDLssYc...s.chtpP.lpssa..s.........thtlhtsPPsouGhslh.hLslL-pas....tt............spthHhllEAh+hAaucRsp.lG........DPsass....s....hppLlsppaspphtptIssppshs..........................................................h.......t..sssToHhullDppGNsVShTpolshhFGSslhsstoGllLNNchsDFo...........ls.sstsN............tltPGKRPhSohsPsllhc...sGpshh..slGssGGspIhsshhpsllph.....lphshs.............lppAlsuPRhap.....p..s........plphEt.........shstphhptLp..ptG+plphhts.sshstshhhh......................tsushhuuuDsRpsG ...................................................h.clLc.p..GGNAlDA.........A........lAsshs.LuVs.............p.P.p..us..G.l.G.G..suFhl.....l.......h......s.................s................p................s........t......p...........h.........................sl...................s.h.......p...thA....P...t....t..s.s..........pha.........................ttts.....t...t...h..s..h...h....u.h.......h..u.......l.u.l.PG..s.l....tuh....t........h.....h.....p.....+.....a...G.............ph.shppllpPAIp.lA..c.p...G.a....................ls........t....hs....p....h...h........t.........ptttt..........htp...........s..h.t.........................th.F........................h..........pG...ps...h......p...............G.......c..h.....hh..ps.....p.......LApTLchlAp..pG................................scu.F..Y..p..G..p.....lAc..plstt....................................hpp...........t.....GGh......lohpDL.ts..Y..p..................s..t...p.............p.P....lss.sa....t..............................................hhhl.h......P.P.s.u.s.G..........l.s.h.ht.h.L.s.lL-.ths.......htth.s.ts.......................................................spthHhhhEAh.+hA.au...........D.R.spa..l..u...............Ds.s..ahp....................hptLl.s.p..s.Y.h.tph..t.p......Is.p....p.shs...............................................................................t.....p.....tss...TsahsllDppG...s.sVS.......hTpolt.h..........hFGSul..h..s.........st.....s.....GhlLNNc.h.s.sFo..............................hs.ss..tsN......................tl.t..P....s.KRP.h.oohs.Psl...lhc.............s...............u.................p...s......hh...............sh.G.s.s...G.G..sth.....pshhpsllsh...............................ls.a..s.h.s....................hppAlst.PRhtt...............p........................plp.hEp........................sh.s.........p...h.h..p.tLp....thG.a.p....l....t...h....h...t........h........t....s...h...t...t..h.hhh............................ss..hh.uuuDsRpt.................................................................................................... 0 561 1043 1459 +396 PF04114 Gaa1 Gaa1-like, GPI transamidase component Wood V, Finn RD anon Pfam-B_12685 (release 7.3); Family GPI (glycosyl phosphatidyl inositol) transamidase is a multi-protein complex. Gpi16, Gpi8 and Gaa1 for a sub-complex of the GPI transamidase. GPI transamidase that adds glycosylphosphatidylinositols (GPIs) to newly synthesised proteins. 19.50 19.50 23.20 20.20 18.90 19.20 hmmbuild -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.80 0.70 -5.80 4 376 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 269 0 274 373 2 347.30 24 66.68 CHANGED hpGpNlYGlLRAPRusuTEulVLsVPapsusu.phN.puuVuLhluLAcaFpthshWuKDIIhlloEpshhGhpuWLEAYHD.....shshh..pP..LphRuGSIQAAlsLElsosEssp..l-Vth-GLNGpLPNLDLhNlhppIht+cG.hsthph+hpspDhpopss..ps..L+pLhhhlhsQAsusspusHG..LF.pYRI-uLTL....R.h+uptphuaDhsshG..+AlEuhFRSLNNLLE+hHQSFFFYlllu.p+FlSIGsYMPullhLshshhLpAhptWhs.ttsshsL.cshu.....t..s.L.s...s.hhtl.asolsu.hLlophhthssalh.hLtpphht..hssh.hpssshhhLSl..huhhh................h.hhhlhsLlhhuhtlsslulhNFSLuhlsAhhhVP..lth.sKccspR.....olhhAsLshps.hlhhlslLhl..hphs.sPht.lhhcshphhhshls.uVhthlshsshla.VlshhahPhWllhhshohpK ..................................................................................................G.slaulhp.A.PRusu....sEuhVlss.s...........................s.........................p..............s..............s.....tulslhlslhpa..hp..p....hW.u.KDllhl..h...s...................p..........p.........h.................u....h....p....uWlcsYHs............................................................l...h..puGs.l.puAlsl-..h.s....t..t..........t............lplhh-GhNGpLPNLDLh...N.hh...............th....s.t..t..t......................t.h.....t...............t................................................................t..t...t.................hpt....hh..hh.hh..p...tu.....u.h.s.p.ss..Hu.......Fh.aplpulTl...............................t.s..h...t..p.....................s.....h..t..hG........+hlEuhhRslNNLLE+hHQSaFhYll.t.ppFlSIu.Y.h...shhh...lhhs.hhl............u..h....h..t.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 92 160 233 +397 PF01590 GAF GAF domain SMART, Hughes J anon [1] Domain This domain is present in cGMP-specific phosphodiesterases, adenylyl and guanylyl cyclases, phytochromes, FhlA and NifA. Adenylyl and guanylyl cyclases catalyse ATP and GTP to the second messengers cAMP and cGMP, respectively, these products up-regulating catalytic activity by binding to the regulatory GAF domain(s). The opposite hydrolysis reaction is catalysed by phosphodiesterase. cGMP-dependent 3',5'-cyclic phosphodiesterase catalyses the conversion of guanosine 3',5'-cyclic phosphate to guanosine 5'-phosphate. Here too, cGMP regulates catalytic activity by GAF-domain binding. Phytochromes are regulatory photoreceptors in plants and bacteria which exist in two thermally-stable states that are reversibly inter-convertible by light: the Pr state absorbs maximally in the red region of the spectrum, while the Pfr state absorbs maximally in the far-red region. This domain is also found in FhlA (formate hydrogen lyase transcriptional activator) and NifA, a transcriptional activator which is required for activation of most Nif operons which are directly involved in nitrogen fixation. NifA interacts with sigma-54. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.70 0.71 -4.09 450 14959 2012-10-02 14:34:25 2003-04-07 12:59:11 21 1267 4851 93 4355 20618 2072 147.20 15 24.89 CHANGED shpplhpphhpplt.phhssspshl......................................................h..hptpthhh.hhhth.hsp............................................tthth.httsh............................hspshpsspslh...................h.........shhhtt...............ththpuh..hssP....h...hsp.......tp.........................lhGllsltppps....................ctasppc....hp....llpt.luppluhsl .............................................................................................................................................................................................................................hhpth.hptht.ph.h.s.h.ct...shl........................................................................................h...h..p..t.....s.....t....h.....................h....h...t.h...s..h.sp....................................................................................................t...t.t....h....h......h...t..p.sl............................................................h.t..p....h.h....p....s......s...p..s.h.s...............................................l.t.....s....h...t.........p....p.....h.........t...........s..h..s......t...p..t..................................................th.t.h.p.u..h...lshP..................l..ht.p..................sp...................................................lh..G.ll..s..h..pppps.....................................+hh..s...t...t.p..ht...........lhps.hutthsh........................................................................................ 0 1335 2627 3635 +398 PF01140 Gag_MA gag_MA; Matrix protein (MA), p15 Finn RD, Bateman A anon Pfam-B_229 (release 3.0) Family The matrix protein, p15, is encoded by the gag gene. MA is involved in pathogenicity [1]. 20.80 20.80 21.00 22.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.88 0.71 -4.28 9 296 2012-10-01 20:35:47 2003-04-07 12:59:11 14 20 82 4 9 313 0 123.90 61 20.54 CHANGED GQolT....TPLSLTL-HWcDV+cRA+NQSVEl+Kt+W.ThCsuEWPTFsVGWPt-GTFNhslIhQVKphVFp.sPaGHPDQVPYIVTWcuLAhsPPPWVcPFlsPs......hsPos.stPsuPs.PSsP.pss..LY .............GQTVT....TPLSLTLpHWsDVp+hApN.QSVDVKK.RRWlTFCSAEWPTFsV....GWPpDGTFNlsII.QVKu+VFsPGPHGHPDQVPYIVTWE.ALAhDPPPWVKPFVsPK.........s..Po.AP.hPssPs..spsPspSs................................................................. 0 4 4 9 +399 PF01141 Gag_p12 gag_p12; Gag polyprotein, inner coat protein p12 Finn RD, Bateman A anon Pfam-B_821 (release 3.0) Family The retroviral p12 is a virion structural protein. p12 is proline rich. The function carried out by p12 in assembly and replication is unknown. p12 is associated with pathogenicity of the virus [1]. 25.00 25.00 33.60 33.60 24.50 21.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.06 0.72 -3.85 10 138 2009-09-12 05:17:05 2003-04-07 12:59:11 13 8 48 0 0 153 0 81.50 71 10.13 CHANGED PALTPolpsK..P.KPQVLP.DsGGPLIDLLTEDPPPYtsPtPsPPst-ssctEAssssEsP........sPSPMA.....SRLRGRREPPsADSTTSQAF ..........PALTPSIKsK..PsKPQVLP...DsGGPLIDLLTEDPPPYGsQ..PSSSst-sscEEAssTuEls........sPSPMV.....SRLRGRRDPPAADSTTSQAF..................... 0 0 0 0 +400 PF02140 Gal_Lectin Galactose binding lectin domain Mian N, Bateman A anon IPR000922 Family \N 21.30 21.30 21.60 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.31 0.72 -3.67 126 1519 2009-01-15 18:05:59 2003-04-07 12:59:11 13 122 181 22 813 1468 19 79.30 33 12.15 CHANGED LpCs.s..phlplp.hAsYGRss..s.sC......s...hppsp................Cpus....s.ohshlpp.....pCpG+psCs.lsss.sssF...uD.P.C.sushKYLpV.....papC ..........................................................LpCsss....pll.tIp.sAsYG.......Rss.....sth..C............st.....t..p..h.p..p.h..p...................................Chss.......suhphl...pp........p.C.p....s+..p.....p.....Cs.lsss..sssF....sD...P....C..s.G.o.h.K..YLpV..pYpC...................... 0 314 482 674 +402 PF03127 GAT GAT domain Bateman A anon [1] Domain The GAT domain is responsible for binding of GGA proteins to several members of the ARF family including ARF1 [1] and ARF3. The GAT domain stabilises membrane bound ARF1 in its GTP bound state, by interfering with GAP proteins [2]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.10 0.72 -4.04 33 1104 2009-01-15 18:05:59 2003-04-07 12:59:11 9 17 265 13 629 997 2 98.80 23 18.93 CHANGED tps-phuKht.scl-pVpsssclLsEMLsphssupppss-.-....Llp.......................-LhppCcphppplhcLh.........scspD--..sls-lLplNDsLspslp+Ycphscspps.tp....sts ...................................................t...t.htphh.splp.l.p.s.ss.clLsEMLpp.hs..s......s.....p..p..p...s..s.c..-......llp.................................-.LhppCcphpppl..hcLl........................................sp..sp-.--...h..l..tcl...LpsNDpLspsltpYcp.htpsp.......ssst......................................... 0 138 279 452 +403 PF00320 GATA GATA zinc finger Finn RD anon Prosite Domain This domain uses four cysteine residues to coordinate a zinc ion. This domain binds to DNA. Two GATA zinc fingers are found in the GATA transcription factors. However there are several proteins which only contains a single copy of the domain. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.18 0.72 -4.60 69 3798 2012-10-03 10:42:43 2003-04-07 12:59:11 22 92 384 40 2299 3654 15 35.30 45 8.26 CHANGED Cs..sCsTop..TPhWR+us..sGph...LCNACGLaa+hpthh .......Cs..s..C..t..osp.....TP.hW..R+..ss............sGph.........LCNACGLa.aKhps.......... 0 638 1149 1789 +404 PF00117 GATase Glutamine amidotransferase class-I Sonnhammer ELL anon Prosite Domain \N 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.44 0.71 -4.85 134 29637 2012-10-03 00:28:14 2003-04-07 12:59:11 23 151 6931 113 7451 26546 18089 186.40 22 46.33 CHANGED lllDshtuhptslh+tltphs......hplplh..s......pth........................psculllSsGPGsst........httthphlpphhp.....plPllGICLGhQhlshshGu......................plh..........................ptt.hshpGtsp.ltp..t............hhhshspshhstphHuhtls.....t.lsps..hplsshstss.......tlhulhcppt.....hhulQFHPEshhsspstphLhshhlph ...............................................................................................llDhh...tht.tslh...ct..h.t....p.hs..................s.p.s...p...l...h....h..s...t.....t.tph......................................................................hpscu..l....l.ls...s....G......P.G.s.stt.......................hpt.t...h.p.h..l....p..t..h.hp..............p...l...P...l.....l..GICL.Gh.Q..h...h.sh.t.h.Gs.....................................................................................plh...............................................ptt....h.s.p..t.G.t....s..h.....l.t..p...t.......................................................hh..h..s...h..........p.....s....h...h...s....h.....p.......H...u....ht.lst.................................hs.ss......hp....l....s.....u....p.s......p.s........................thhu.....h.......h.......c......p..................s.....p..........hhulQFHPE..h....t..t..s..s..t..s..h..p.h.Lh.shh...h.................................................................................................................................... 0 2366 4699 6278 +406 PF02934 GatB_N PET112_N; GatB/GatE catalytic domain Finn RD, Bateman A anon Prosite Domain This domain is found in the GatB and GatE proteins [1]. 23.40 23.40 23.80 23.90 22.10 23.20 hmmbuild -o /dev/null --hand HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.94 0.70 -5.55 147 4153 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 3865 37 1215 3100 3120 280.70 48 60.26 CHANGED Es..lIGLElHsQLsT.coKlFsssssp.aus.....p..P.NopssslslGh..PGsL...........................Pl.lNccAlchAl+huLALssc.I...sph..spFcRKsYFYPDLPKGYQIoQaphP....lspsG..hlc..lp......................pup.......+c...............................................................lt..IpRlHlEEDAGK.lHpts.................................thohlDhNRuGlPLlEIVo-P....Dh+oscEAtt....alpcL+pllpalulsDssM-cGolRsDsNlS.................l+......................Gp.....t...........hGsRsElKNlNSh+tlpcAlcaEhpRQhclLc.............................................................................................................................................pG..t....p.l.QETRtaDsspspThsMRsKEsApDYRYFP-PDLsPlhlscchl-cl ...............................EsVIGLEVHspLsT.poKlFss.....sssp...FGs............c......PNopssslshuh..PGsL....................................PV...lNcpsVchAl+huLALssc..I..s.p.p..stFsRKNYFYP.....D.PKuYQISQ......a....-....t....P....IstsGhl-.lp...............sGpp.........Kc...................................................................ltIpRhH.lEEDAGKshHts..............................................................shShlDhNRsGsPLlEIVoc....P....Dh....R....S....scEAhA....YlcpL+pllpalGlSDsp...M.......-EGShRs..DsNlS..lR..P...........................hGp..........pp............aGTRsElKNlNSh+hlp...cAl.c...YEhpRQhcl.L..c.......................................................................................................................................................s.G...G......p.l....h.....Q.ETRha.Dps........p...s....pThsMRs.KEsA.pDYRYFPEPDLsPlhlsc-al-p.h.............................................................................................................................................. 0 421 796 1040 +407 PF02637 GatB_Yqey DUF186; GatB domain Bateman A anon Bateman A Domain This domain is found in GatB. It is about 140 amino acid residues long. This domain is found at the C terminus of GatB Swiss:O30509 which transamidates Glu-tRNA to Gln-tRNA. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.59 0.71 -4.49 103 3872 2012-10-02 13:42:24 2003-04-07 12:59:11 13 14 3640 37 1195 3139 2254 147.40 34 29.94 CHANGED -aFEpslpts..ss....s+tsuNWlhs-lhuhLscpshslpp...................lss...ppLupllph....IpcGpISsKhA.Kclh.pphh..ps.stss.........cpllccc.GLtplo..Dpstlppllc-llspN.spt.lcca+sGKp....+shualVGQVMKt.o+G+A.s..PphVsclLpccL ..................................................................................................caFEts.lttu....ss......sKhsuNW..l..hs-lhth...L.N.p......p.s....h.s.lpc........h..........................sl.oP...ppLupl.l.pl.........I.c.-G..sI.......SuKl......A.K.c.Vh.p.t.lh........ps...usss........................c.p.ll-c...c.G..L....h.Q.lo.............D......s.....u...s........l....psh.....lc-l..ls....s..N....spt...Vc....c...a...+........s......GKp........+u.....hGalVGQlMKs....o+.....G...p.A.NPphVscLLppcL............................ 1 410 790 1026 +408 PF03615 GCM GCM motif protein Griffiths-Jones SR anon PROSITE Family \N 25.00 25.00 31.80 34.40 22.30 21.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.60 6 146 2012-10-02 23:28:20 2003-04-07 12:59:11 10 1 79 1 94 148 0 139.50 68 28.86 CHANGED caDcFsEWsDGaVRaIYuscDccA+KHlSGWAMRNTNNHNspILKKSCLGVllCSpcCpLPsGuplpLRPAICDKAR+KQQsKpCPNptCc.G+LElhPCRGHsGYPVTHFWR+sGpuIFFQAKGsHDHPRPE.sKsooEARRuh .....................aDtFpEWsDGaVRaI.You.c.-+pAQRHlSGWAMRNTNNHNsp...ILKKSCLGV.VVCu.ps.CsLPsGs+l..pLRPAICDKARpKQQcKtCPN..Cs.GsLELlPCRGHuGYPVTNFWRp-GpuIFFQAKGsHDHPRPE.oKspsEARRu.h.......... 0 21 27 55 +409 PF03074 GCS Glutamate-cysteine ligase Griffiths-Jones SR anon Pfam-B_541 (release 6.4) Family This family represents the catalytic subunit of glutamate-cysteine ligase (E.C. 6.3.2.2), also known as gamma-glutamylcysteine synthetase (GCS).\ This enzyme catalyses the rate limiting step in the biosynthesis of glutathione. The eukaryotic enzyme is a dimer of a heavy chain and a light chain with all the catalytic activity exhibited by the heavy chain (this family). 25.00 25.00 27.20 25.20 19.30 22.80 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.37 0.70 -5.40 10 455 2012-10-02 17:21:26 2003-04-07 12:59:11 11 6 292 4 308 460 7 297.10 42 57.32 CHANGED IYMDsMGFGMGCCCLQVTFQAsNIsEARaLYDQLusICPlhLALSAATPFaRGhLuDhDsRWsVISASVDDRTtEERGlsPL................psp.hcI.KSRYDSlDsYlSsss...EcYNDIsLsIscclY-pLl-sG...IDchLApHlAHLFIRDPLslF-EpIclDDssco-HFENIQSTNWQTMRFKPP.....PPcS.-IGWRVEFRPMEVQLTDFENAAYsVFVVLLTRsIlSa..+lsFhhPlShVDENMKhApcRDAlLppKFhFRKDI..Cps.s....hssKsssso--suE....MSIDEIING..KcGsFPGLIP........llRpYLEstclDsDTRC.lpsYL+hISKRAoGEl.TsAcWhRpFlssHPDYKcDShlTDcIsYDLlp+scpIAs ...........................................................................laMDuMuFGMGssC.LQl.TaQuts.ls-uRhlYDpLsslsPlhhALoAAoPha+G.als-.DsRWshIusuVDsRT.cE..h...u......Ph..............................................tp.t.................hI.KSRY..sShs.Yl....u..t.............p....................................p.Y...sD.s.lshs..pplhp.p.LhptG........hDchLAp..HhAHLFIRDPlslFpEplp....s..pp.....s-.HFE....................NlQSTNWQphRFKPP...........P.s....s.s...s...lGWRVEFRshElQlTDFENAAassFlsLlo+.sI.....L.s.a..........plsh.hlPloplpENMphApt.psAshp..t.hFaFR.c.s.....................................................................p............p..........................holspIh.pG......p.............t........s.FsGLls..............l.l...pp.a..l......p..p.h......p....hs......p...pt.................l.tYLphIppRAs..Gpl......TsApahRpFlhpHPtY+pDShlspplsaDLl.th.tl..t.................................... 0 114 166 260 +410 PF03009 GDPD Glycerophosphoryl diester phosphodiesterase family Griffiths-Jones SR anon Pfam-B_4008 (release 6.4) Family E. coli has two sequence related isozymes of glycerophosphoryl diester phosphodiesterase (GDPD) - periplasmic and cytosolic. This family also includes agrocinopine synthase, the similarity to GDPD has been noted [1]. This family appears to have weak but not significant matches to mammalian phospholipase C Pfam:PF00388, which suggests that this family may adopt a TIM barrel fold. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.62 0.70 -4.88 34 9993 2012-10-01 22:17:21 2003-04-07 12:59:11 12 139 4031 48 2657 7515 2527 235.00 21 65.67 CHANGED HRGsusphP...........ENTltuappAhptGADhlEhDVplTKDGhsVlhHD..........tplscssssss.....................hlpchohp-lpphp................................................tst....h.tpth........................sTLp-hlph.........hssshphtlclp.hsphhthpt....hstthsthh.phh..........tstplhhpoFphcthphhpphts.....phshhhLhptss.ths...........tthphhts.shhsththhsts.......................hlpts+ppG.ltVhsaTlsst..................tphthhhchGVDGllTDpsst ..............................................................................................HRG..s....s...s.....h..hP.............................................ENT........l........t...A.hp.t.A.........h............p........h......G...s......c.....hl..E..h.Dlp..............hT+...Ds...p.............lVlhH.....D.............psl...c....R...so.ssps..................................................................................hlp-.hTh....p.-.l.pplc..................................................................................................................................................................h.t..............h.....s..p..t.h.................................hsoLc-hlch...................thp..h..t.h....t..l....c....l..c..............h....s....t.h.h.t................................t.h.h.......t.........h.h.....t.....hh................................................................................................................t....p...........h.h....lp..S..F..s...h.p...l.pth.pphts.......................ph..h...s..h...l.h...t........t.............................................h.t......h...t..t...........t..h..h..h.....t..h.t.......h.s...h..............................................................................................h...l.pt..h...+.p....t..s..h..l..h........s..a...Tlsp......................................t.ht.h.h....h.....p...h...s.......l......c....u.lh.TD.st.h.................................................................................................................................................................................................................................................................. 0 842 1578 2169 +411 PF00626 Gelsolin Gelsolin repeat Bateman A anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.46 0.72 -4.29 54 5387 2012-10-01 21:06:05 2003-04-07 12:59:11 17 133 440 183 3048 5007 35 78.00 19 21.63 CHANGED ph..s.ss.hstsplpsscsallDssh...........plahWhGpp..ss.tcpshus.hstplp............pph.shs.hhp.spu...pcsspFh ................................h....s..s.hsts.pl.ps..s....D.sa..ll-.sst...............................................................plahWhGpp..........ustp....E......c......t......t.........u..h..h...hs..tplp..................................pphts..h.....s...hhh....pG...tEs.tF............................................................. 0 854 1348 2192 +412 PF01408 GFO_IDH_MocA Oxidoreductase family, NAD-binding Rossmann fold Bateman A, Griffiths-Jones SR anon Pfam-B_342 (release 3.0) Family This family of enzymes utilise NADP or NAD. This family is called the GFO/IDH/MOCA family in swiss-prot. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.62 0.71 -3.59 52 20348 2012-10-10 17:06:42 2003-04-07 12:59:11 17 96 4230 256 6149 17689 10023 120.50 24 33.45 CHANGED h+lullGs.Gth...upha...htshhps.pth.......clsulhs.sttpucth....upphshs.....sass..........hp.plhsps..c...lDslhlssPs......th....HhphshthLptGh..aVlhEKPlsh......ohp-spclhphsccps..........hh.ltlsa ....................................................................................................h+lullGs....G...th.............up.t.a.........h.t..s...h..h..p..t......ssh................................cl.s..u..l..s..s....h....s.........p....p....s..p.t.h................................up.p.h....s..ht..............hh..s.s...................................................................hc...cllsps.........p.............lD...sVh.l..s.oPs.............sh.......Hhp....hs..h...tA..lp........u..GK........HVh.....s.....E...K...Ph..uh..........shp-..sc.c....l...h....ph...Ac...c...p.s....hh.lhh............................................................................... 0 2220 4074 5250 +413 PF02894 GFO_IDH_MocA_C Oxidoreductase family, C-terminal alpha/beta domain Bateman A, Griffiths-Jones SR anon Pfam-B_342 (release 3.0) Domain This family of enzymes utilise NADP or NAD. This family is called the GFO/IDH/MOCA family in swiss-prot. 20.80 12.60 20.80 12.60 20.70 12.50 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.30 0.71 -10.30 0.71 -4.29 37 12324 2012-10-02 13:21:44 2003-04-07 12:59:11 12 35 3630 193 3449 9524 4058 106.70 15 29.99 CHANGED +chlppst.lGplhhhp..thhtspttt.spht.p............suGshh-huhH.lDhhphLhGt..spss...shh..hphsptsphtsst.......................hshthssht....ssthssshspshshphhhh .................................................phlppGt.lG...c...l...h....h....h.....p.................h...........t...h....h...............t.......s...............s......t...h....t....hp......................tuuG...s...l...hD.hu.sHhlDh......s...h.a...l....h....G.........p.............s.....p...h...ss..............sh..............h.......h...........t..t....t............s.........................................................................h.h..........h..............................hh.......................................................................................................... 0 1200 2256 2931 +414 PF00990 GGDEF DUF9; GGDEF domain Bateman A anon Pfam-B_112 (release 3.0) Domain This domain is found linked to a wide range of non-homologous domains in a variety of bacteria. It has been shown to be homologous to the adenylyl cyclase catalytic domain [1] and has diguanylate cyclase activity [4]. This observation correlates with the functional information available on two GGDEF-containing proteins, namely diguanylate cyclase and phosphodiesterase A of Acetobacter xylinum, both of which regulate the turnover of cyclic diguanosine monophosphate. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.97 0.71 -4.65 49 42746 2012-10-01 23:51:22 2003-04-07 12:59:11 16 1972 2995 26 12809 36762 3058 153.10 28 27.32 CHANGED AthDsLTsLsNRphhpppLppthpps.tp....ppthullhlDlDpFKplNDpaGHpsGDplLpplAptLppslRp....sDlluRhGG-EFsllLspss.p....stphtpthcphlpphphsh.thsshth.lohSlGluth..........tpspshppllcpADpALYpAKppG+Np .......................................................pDs.LTG.LhNR....p....h...h.p.....p...p.......l........p.......p.......h...h.........p.......p.....s......tt....................t..p.......t.....h.....u....l....l....h....l..Dl...D.p.F..K..p....l....N....D.....s..a.G....H..t..sG...D.....p.....lL.p.pl.A....p.t.l...p....p.......t......l......c...........p................s-..............h............l.............u...............R..............h......G.........G....-........E..F...s.....l...l..........l.........s.....s...s......s...t.p........................p.........h.....t....p.....h.........h.p....p...h...l...............p...................t.............h.........p...................h..............................h.................................................t............t...........h..............t...................h........t.................l.....o...s......S..l..Gl....u..h.h...................................p...s.....p......s.....h.....p....p....l.....l....pp...A.D...t.Ah.Y.p.A.KppG+s...................................................................................................... 0 3883 8047 10652 +415 PF03321 GH3 GH3 auxin-responsive promoter Mifsud W anon Pfam-B_3652 (release 6.5) Family \N 19.60 19.60 19.60 19.60 19.50 19.10 hmmbuild -o /dev/null HMM SEED 529 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.79 0.70 -5.99 64 945 2012-10-02 15:58:18 2003-04-07 12:59:11 8 9 449 8 439 940 720 439.50 24 91.84 CHANGED ctlc......p..h-pht....................psst...plQcclLpclL.ppsssT-ah++ashp......uh..hs........c..........FpppVP.......lhsY--l.cPaIcRltsG-........ps.llhsps.lphFstSSGTouupsKhIPhoc-.lcphphhtuhthhh..hppphssh.hpGK..hLths...sphppp..GhhhGsloshhhc..s.h..hp............hhpsPsptlht.-sapshhppllpthh.cpclttluul.uhhllhhhchLEpphpch...........................................................ltclWPshc.lhshsssuht.Yhtphcthhsu.h.hh.psYsuSE.Gahulpsp....spcsuhhlh.shu.aaEFlPhcc.t............................sphlsLs-VclGppYtlllTThuG...................LaRYplGDsl+h.Tuhpst..plphssRpphhlslhu-+hstcclppAlppu....h.ptt.....shpls-aT.st.sp.h.stsu+ashhW.lpsc......st.........................shp..phsptlDpuL...NssYctpRptshplsslcl+llptGsFpchhc.thu+.Gu...QaKsPRhs..sspph..t...l .........................................................t.....hp.hh..ppst...thQpplLtpll.ppstpopahp.pa.tht...............t.h.shp.................p..........FppplPlhsY-ch.pshl.pR...h.hp.Gc................ps..ll.h.st.....lhhahh.o.S...G..........To.ss...t...KhlPhspc....hpphp..hshth...hh...........htp...s......p.G+..hh.h.....th.p...ss....h...ht.shts.....h..h..p...s.h......hp..............p.Pst........h...h.....sthp.p.h..httlhpthh.ppplthlu...uh.s.hhhhhhchlp.t.....phpp.h..........................................................................................................htcl..WPphp.l.hhssssht..Yhtph.c.t.hhs....t..h.hh.phYsuSE.u.ah.uhp.p.............................pp..s..s..h..hlh.shu..aaEFlPhcp.t......................psphlsLt-VchGppYtlllTT...uG...........................LaRYplGDslch.suh....t.s.....t............phphssRpph..hlsh.u.-c.h.tpphppAlppu.........h..t..........shplh..-ao....sh.....s........t...............................st...........s..c..........hhh.W.lpht......sp.........................shp...phsthl-ptL...sssYcttRht.hsltslclphl..p.ts..h..Fpphhc..hsp.Gu...QhKhPRh....p.......t......................................... 0 135 318 377 +416 PF00288 GHMP_kinases_N GHMP_kinases; GHMP kinases N terminal domain Finn RD, Mistry J anon Prosite Family This family includes homoserine kinases, galactokinases and mevalonate kinases. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -9.32 0.72 -3.91 455 15529 2012-10-03 01:04:38 2003-04-07 12:59:11 21 62 4900 114 3877 10927 3091 64.70 24 18.95 CHANGED clpl.pssl....Phu.uGLGSSuusssuslhu....lsphh.....s....h.s.............l........sp............pl...hphuhtsc..........tGsss....ssshhGs ....................................lplpssl.....P.h.u.uGLuSSuushsusl..hu.....hsphh........s...........h..s............................................L........sp.................................pcL.........hphu.t...ts..E............ts..ss..ssshhG.............................................................................. 0 1260 2384 3266 +417 PF00594 Gla gla; Vitamin K-dependent carboxylation/gamma-carboxyglutamic (GLA) domain Bateman A anon Prosite. Domain This domain is responsible for the high-affinity binding of calcium ions. This domain contains post-translational modifications of many glutamate residues by Vitamin K-dependent carboxylation to form gamma-carboxyglutamate (Gla). 20.40 20.40 21.20 20.40 19.90 18.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.92 0.72 -4.27 29 998 2009-01-15 18:05:59 2003-04-07 12:59:11 15 54 109 47 394 801 0 41.10 45 11.04 CHANGED lEEhctGslEREChEEhCshEEApEhapss.tTptaap+Yhs ..........hEEhppGsLEREChEEh.CsaEEARElFE.s.sp.cT...pt.FWppYh................ 0 28 60 159 +418 PF00208 ELFV_dehydrog E_L_F_V_dh; GLFV_dehydrog; Glutamate/Leucine/Phenylalanine/Valine dehydrogenase Finn RD anon Prosite Domain \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.48 0.70 -4.80 124 6466 2012-10-10 17:06:42 2003-04-07 12:59:11 16 23 3999 221 1773 5816 1828 223.60 33 52.44 CHANGED GGS.hsRscATGhGlhahscchhcp..ttp...plcGppVslpG..GNVutasschhhch.GA+lVulSDupGslh....c.sGl......clptlhch.+pppts..lssas.............c.hh.ss..p....................ht.......l.......csDlhlPs.AhpNplstcsAc.hl.....+.......s+hlsEGANhPsT..-Ahc.lhpcpG..........lhhsPshhANAGGVssSuhEhsQN.tth.WopEcVsp+Lcpl...........Mpshacsshptupc..hshs........hhtuAslhuhp+VAcAhtspG .........................................................GGS..hsRscATG......hGlhhhsc......phh..cp....shs...................h.cG..t....p.V...s...l...p.G...GNVutas.s..c..h..h.h..........p.........h...G..A..........+..V....l..s..h......S.....D......u.....s.......G...h...l.h......s....s...Gl........cl.stLh..c....h....+pppts........lspas......................thp.hh...p.st....ph..............................................................ap........h.......psD.lhlPs......A.h.pN.plst...c.s.A.p.hl................................p.....................s...phVsEGAN..hPo.T.....-.A.....h.........c...lh.p..c..p.G...............ll.hsPshsuNA.G...........G.......Vs.s.S.h..h.EhsQN..........t.h.W..s..t..-cl..pt+Lcpl..........................................Mtshapt...s.h.ph.u.pp...ths.................hhhuA.hhuh.+lspAhh.pG...................................................................................................................... 1 635 1175 1539 +419 PF00042 Globin globin; Globin Bateman A, Chothia C anon Structure_superposition Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.33 0.72 -3.71 73 6000 2012-10-01 21:46:00 2003-04-07 12:59:11 17 34 2886 1971 1261 5331 34 99.80 25 43.59 CHANGED ppthlpssWsp.lhs......ph.thGschhtclFpsaPps+shFsph.ts.p.....tsssphcsHut+Vlsulspslspl.....s.slpstlppLuspHtpct....lssspFphhtpslh ........................................hshlpsshsh.ltt................pssch.ssch........h..t....R....hF..t..saPp.s.p.p.hF..s....ph..........s.t...................ss..t.phpspu..ppl.hs.ulsphlppl..................sl...t...sslppL...u.phH...sshh........V.cPppapllupsL................................. 0 348 594 949 +420 PF04898 Glu_syn_central Glutamate synthase central domain Bateman A anon Pfam-B_455 (release 7.6) Domain The central domain of glutamate synthase connects the amino terminal amidotransferase domain with the FMN-binding domain and has an alpha / beta overall topology [1]. This domain appears to be a rudimentary form of the FMN-binding TIM barrel according to SCOP. 21.50 21.50 21.60 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.48 0.70 -5.15 37 3372 2012-10-03 05:58:16 2003-04-07 12:59:11 9 34 3044 15 950 2992 3369 283.90 42 18.60 CHANGED lhppQpAFGYThE-lchlltPMAcsGpEslGSMGsDoPlAVLSc+s+hLYcYF+QhFAQVTNPPIDPlREp..........lVMSLpohlGscuNlLc.sspp..s+plpLcoPlLsps-............................hpp.l+sh..........pthpstslshsashp....pu............................hpu.LcsulcplsppAcpAlcsGtslllLSDRp........hstp.+ssIPu...LLAluAVH+HLlcpslRscsullVEou-sR-sHHFAsLlGYGAsAlsPYLAhEolpp..htccshh........phshppshpNYp+AlspGllKlhSKMGISTlsSY+GAQIFEAlGLup .................................................h.phQptFGYotE-lchlltsMupsG...pEslGSMGsDoPlA..VL.S.pc.s..+hLacYF+Q...hF...AQVTNPPIDs.lREp...........hVhSLtohlG.t.-.h.......N.l.L.s...........st..tp.........s+...+l.plpsPlLsp.s.-...........................................................................................................hpp....lpshpp................ppac..sppl..sh.sasss....................................................................................tps...Lctulccl..ssc.A.ppA.V.c.....s.G.s.s.lll.L.SDRs...................l.s.p.s..+hsIPu...LLA.luAVHp+Ll.......c.......p.......s......LRs.psullVEoGpsR-sHHaAsLlG.aGAsA.l.s....PYLAaE.oltc.........hhcpttl...........................sh.s.h.cpshpNYtculsKGlhKlMSKMGISTltSYpGAQlFEAlGLs.p........................................................................... 1 289 615 816 +422 PF04960 Glutaminase Glutaminase Bateman A anon COG2066 Family This family of enzymes deaminates glutamine to glutamate EC:3.5.1.2. 19.80 19.80 19.80 19.80 19.70 19.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.78 0.70 -5.43 17 2528 2012-10-02 21:13:33 2003-04-07 12:59:11 10 25 1760 57 544 1601 173 277.00 43 80.87 CHANGED GpVAsYIPtLA+ssssphGlulsssDGphhpsGDschsFolQSISKVhshslAhtchGtc.lap+VGpEPSGpsFNSlstLEhcp.uhP+NPhINAGAlsVosllpucss.t-thphllphlcplsGspplshsptVhpSEhpTu.RNtAlAaah+phGshpp-l-...ssLchYF+tCulchoCpsLAhhushLAssGhsPhosEpVlstchs+phtAlMhTCGhY-sSGpFAa+VGLPuKSGVuGGIluVVPs..........hhululaSPsLDchGNSltGsthLcpLuphhshSlF .......................................GclADYIPtLApV.s.ssphGlAlsT.s.D.G.p.h.a.p.A....G....Du.....chp.Fol..QSISKVhoLslAhpc..h..Gtct..lap+VGt-PoGp.sFNSlltLE..h..cp.GhPpNPhINAGAlslss.h..l.......p.......u...........c..h.......s........p........th.....pclLchhcpLuG........p..p....l..shsptV....spS.Eh.p.p.u.t.RNtAlAah.h+..oh..G.h..p..s..Dsp..........psLcsYh.+tCulphssh-LAphushLAstGh.s.Ph..s....s......c.pV.lss..pp.s+plpAlMhTs..G..hYstuG..-F.Aa...............+....V...G.LPuK.SGVGGGIlAlVPs...........phuIulaSPtLDpt.GNS.ltGhthlc.p....Lspph.Ghsla.............................................. 1 146 287 427 +425 PF04488 Gly_transf_sug Glycosyltransferase sugar-binding region containing DXD motif Kerrison ND anon DOMO:DM04307; Family The DXD motif is a short conserved motif found in many families of glycosyltransferases, which add a range of different sugars to other sugars, phosphates and proteins. DXD-containing glycosyltransferases all use nucleoside diphosphate sugars as donors and require divalent cations, usually manganese. The DXD motif is expected to play a carbohydrate binding role in sugar-nucleoside diphosphate and manganese dependent glycosyltransferases [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.52 0.72 -3.63 22 1796 2012-10-03 05:28:31 2003-04-07 12:59:11 10 51 807 0 967 2109 434 94.40 21 24.68 CHANGED spchhtshcohh.phpP-...hphhlhscpht..............................shphlhpp.ss.......hh.phhs......hh.h.hthuDhhRhhlLa+YGGlYhDhDshslpsl.......sslhsppthh .................................................................................................................................................................................hpphh..phpPs.....aph.h.h.hscp........................................................................................................................................h.thh..hpp..hs.............hhpsap............p.h..h....h.h....tuDhhRhhlLa....phG.GlYhDhD.sh.sl+sl.......s.hh......t................................. 0 321 557 794 +426 PF00722 Glyco_hydro_16 glycosyl_hydro9; Glycosyl hydrolases family 16 Bateman A anon Pfam-B_759 (release 2.1) Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.48 0.71 -5.00 120 4255 2012-10-02 19:29:29 2003-04-07 12:59:11 16 172 1239 83 2173 4494 761 179.30 18 46.71 CHANGED hsppass.sasssps.h........s.......tslsLslc+......................to.GushpS...pptahaG...phpsplK..supusGlVouFal..s...o.ts...sspDEIDh.EFLGs.sssp......lpTNha....spGpus......p....E.phhh...haDso.psFHsYulhWssspIhahVDGhslRphpppps...s....s.a...Pp.pPMp.lhso..lWsus.......sausptG..........hhphDWst ..............................................................................................................................t......h......ts..........h.l..thtp............................................hs...uutlpo...............ph.p....h....h.....a...G..............p.h...c.s..p..hK...........hst.....u.s...G.h.....hs.A.hah......p...................s............s..s..ps....E...IDl..E..h.......l.......Gs..p.ssp............................hp.s..s.....l..a..................sp.u.tss......................................p.........ptp.h.h..............h.....s....ss....s.saH.....s.....Y.....s.....l.........................W.......s.........s.......p........p........l..h...a....h.l.....D....s.........t.........h.t.....p....hpttt................................a...........P............ht....l....h...h....s........a.s.ut..................t..........................hta............................................................................................... 0 645 1357 1879 +427 PF00704 Glyco_hydro_18 glycosyl_hydro8; Glycosyl hydrolases family 18 Bateman A anon Pfam-B_574 (release 2.1) Domain \N 29.60 29.60 29.60 29.60 29.50 29.50 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.36 0.70 -5.00 165 8946 2012-10-03 05:44:19 2003-04-07 12:59:11 23 253 2235 288 3234 8695 370 279.40 18 59.57 CHANGED tpllsYas....pau.htts..................h.hppls.......pthoHlhauFsslssssth..............................................................tt.ststpsthpphspl+..ppss..slKlllSl...GGht.us.............sasthstss......pt+pt.FlpSshphlppat.....................hDGl.DlDWEassst...............................ctpsastllpcL+pthpp..........................phhLosA..........hssss....hhpth...shsphtph.lDalslhsaDahu......htt......suhpssLasss..............................................................hssp.slphahpts.ss.spKl.hGhshY.upuaphsss..t...............................................s.ttuhhsapplsphh.........................tht.thsss.spss.ahhps...............h..lsa-sspohttKspahpptslGGlhhWsl.s.hD ........................................................................................................................................................................................................................................h.sh.h......h.sh.h......................................................................................................................h..t.....h....t...ht.....t.......th..p...h.h....h...u.....l............G..Gh....tt.........................h...t.h..s.t.s.t...............tt.tpt..a.hp..shh.p.h.h..p..p..hs......................................................h.D..G..l...D............l...D....aE..hssst....................................................................stps.as.hL.lp.....pl+.pthsp.................................phhL.o.h..A...........................h..ss.s.t...............hhpth......ph......t.....t....h........s....p.......h.....l...............D......a.ls.l...M..sY....D...hhu.......htt................ss..ts..s..lht.........................................................................t...h.p.......s.l...p...h...h........h...t........t...s..hs...sp..K..lhlGh...s.ha..s..h..sat................................................................................................................................h............................................................................................h.....h...................................h.....h...s......h..p...........................t.h...G...h.............................................................................................................................................................................................................................. 0 1050 1817 2735 +428 PF02055 Glyco_hydro_30 O-Glycosyl hydrolase family 30 Mian N, Bateman A anon IPR001139 Family \N 20.20 20.20 20.50 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.55 0.70 -6.30 3 1151 2012-10-03 05:44:19 2003-04-07 12:59:11 11 52 589 84 517 1057 187 328.70 21 77.79 CHANGED AsDCspKTF.KTGlVCVCNlTYCDEI.PPlslshGQAApYTTS+SGARLHRDVlYATso-PhToLHhTIDSSKKYQTIQGFGSTFSDASGANLKSLPDphuDLILKQYFSDoGLNLQFGRVPIASsDFSuRVYTYsDss-DYsMpNFSLs+EDaQWKIPYI+pAQKYNpc.LKLFAuPWoAPGWLKTTsussGhGuLpGpsGDsYHQoYA+YFV+FLEEYuKsGIpFWGLSTQNEPTuGSDKKsKhQShGFTAEaQRDFIKpDLGPALAuSouGKNVKLLILDDNRGNLPKWADTVLNDhDAAKYVuGIAVHuYQD..uEoDsHLsETH+pHPNsFIFGTEASEGSKSKDppVDYGSWDRAtDYuSDILDNhNNWVTGWTERNLlLDApGGPSWVSsFsDAPVIAFPAhAQFYKQPMFYAIAHFSHFIKPGAVRIDHSLNhhN.ElEsoAFLNPDGSKVVVlLNKuSLsss.aoLoIKDsAcSpsHYphTLSP+sIlTLYIQ ..........................................................................................................................t..........................................................................................................................................................................................................l....l..........t...p..h.Qp.h...GhG..ss...hsttsh........h.......t...h.......t.....p..t...........hh.......p.........ha....s..........................p.................t.......h.thshhR......h........sh.......su....s.....D.h..s................t..............a..s.h..........s....c.................t..........D......t............h.....t....p.....h...s......h...................................p......t..........................l...s.hl.p....t..h.......................t......h..........s............t....h.p..lh.u.oPWosP....saMK......s.......s...t............t.........h...........................................................s...................h...........................................................................................h......t..........s..........aApYh.hcal.pt.h.t.t.p.Gl..h.slohpNEP....................................h...s...hhhsst.....t..tta...hht....LtP.t......ht...tt................t.htl..........h............h..........h.....-.........p..........p..................................................t...............h..................................h......h..t.p......t...........s.....p...h..h.tGhuhHhY....................................h............h....t..h.P..p.h.......hh.oEt.......................................................................................................................................................h.......h..h...t.....h.....h..........t...hh..Wshhh....p....................s..........t...........................t.....................t........h.......h..............t............t.........h.....h.....p...........a.Y.hhuphu.+al.....G...u.p..lt.............................................................h................suh......p..s......-.s.p.h.s.h.l.h.h.N.t.t..........................................hh...................................................................................................................... 0 224 348 456 +429 PF01055 Glyco_hydro_31 Glycosyl_hydr15; Glycosyl hydrolases family 31 Finn RD, Bateman A anon Pfam-B_369 (release 3.0) Family Glycosyl hydrolases are key enzymes of carbohydrate metabolism. Family 31 comprises of enzymes that are, or similar to, alpha- galactosidases. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null --hand HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.43 0.70 -5.66 107 5543 2012-10-03 05:44:19 2003-04-07 12:59:11 21 109 2144 97 2001 5559 314 398.50 24 54.47 CHANGED YhhsGs...................s..s.pcllcpY.sploG..........+ssh.PhWuh.Gaap..sRasY..........ps....ps................................................cltplsppaccpsIPl-shahDh-..............Yhc.shc..sF...oactppF..........P......................pscphlccL+.ppGh+hlshlcP.........slptsps.......h.sapcuhppsh.al.......+.pssG.p..................hhuts.Ws.....................s.........ssasDF.....hsP.c.sppWW.tsphpp....hhsp.........ul-uh.WhDhsEsus..........h...........................................................................................................................................................................................................sh.shsshphsss............pphchHNlYuhhhspusacu..lhp..h.psscRsalloRSsasGup+auut.WoG......Dst.usW............pslph...olsthLshu.lsGlshhGsDluGFtus...........sst.....ELhsRW.hQhGuFhPahRsH.sshs....sptp..................EPahasp....spshh....+phlplRYpLLPYlYohhhcupps.GhPlh.RPLhhcaPp.D....tpshs.lccQahhGs.s...lLVuPVl.................pps.ssphpsY.LP......pG..p...Wa-hh......stcth....puG..p...hlp..l.suPl..splPlalR..sGsIl ...................................................................................................................................................................................................................................................hh.G...p.....ttlhppa.tt.lsG.............ps..s.h...P.h.a.......u........h...Gh.h........sp.hth.................s.....pp...........................................................................p.l...p.hh.ct..h....p..p..t...s..l.P......h............s...s.h....h.hDh.......................................................ahp...shp..........sa................p.a..s.....t..pa..............................P..............s...p.thl.p.p.l..+..p..p.G..h..+.h.h.h.h..l...p.P........................hlttpps......................a.pc..h.h...p...p...s.h.hl.................p..p.pu...t...........................................................................................h....h...th...a.........................................................................................s........ssh...D..h.....hs..P....p...stp.a.a..t..p..h..h..p..t.......hhtt...................Ghs.s.h.h..h.................Dh.s.E...........................................................................................................................................................................................................................s.ssh.h.h.ss...............p..p...h..+N.h..........a.s..h....h................h..s..ps..shps................htp........................tp.....csh....l.hs......R.........u........u.h...s.Gsp+.a.......s.sh....W..sG...............Dsh....ss....W.....................ps..ht.pl.ts.Lshu.hsG.h.s...............h.h...........u...tD.l..G..GF..ss.....................sst................................ELh.h.RW.h.phG....s.a....P...h.h.Rh..H....s..s.s........t.........................................-PW..a.spt.............stsh..h..................+ph....h....p....l.R.h.p.L..h.PYlYshhh.p.....s......p.......p..p........GhP......l.h..RP..h...hh.c...asp....D.....................t..s.h..p.....l.........p.....p.............p.a........hh.Gs..s..................l.LVu.PVh.....................................pts...t......t.....p......h...ph.....Y..LP........pu...p...............W....hchh.......................ss.p.th...................pGu...p..............hhp...........h.tssh........t............p........l.P.l.ah+sssh.................................................................................................................................................................................................................. 1 765 1179 1663 +430 PF01532 Glyco_hydro_47 Glycosyl hydrolase family 47 Bateman A anon Pfam-B_958 (release 4.0) Domain Members of this family are alpha-mannosidases that catalyse the hydrolysis of the terminal 1,2-linked alpha-D-mannose residues in the oligo-mannose oligosaccharide Man(9)(GlcNAc)(2). 20.40 20.40 20.40 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.47 0.70 -5.83 139 1956 2012-10-03 02:33:51 2003-04-07 12:59:11 15 40 313 21 1367 1890 72 398.90 31 70.19 CHANGED hFh+uWsuYtc................aAaspDELpPlSpss.............pss..........................h....s...shGhTlVDuLDTLhlM...........shp..................cEapcuhcalt......ppl..s...............as.................st.t...l...........slFE...........................TsIR............hLGGLLSAatL..o...............................................tcsh..................lLc+Ah-L.u-pLh..sAF.pT..s.oGl.......PhsplN........................l.........tp...st.t..thsst.........................................................ts..s.....h.AchGoltLEFsh............LSpLTG-spYhchsp+shphlhp.........ttp...GLhPhhl...sspsGpas.t..............................................................................................................................................................phslGutuDS......aY........EYLlK.........talhh..u...t...p..................................p............................ahchap...........................puhpulpca.hhp.......t.............thhalup.....h..............t...th.hps...ph-HLsCFhuGhhuLuuh.h..........................sclchA.pclscsChphYpph..oGlhPEhhphs....................................................................s.......thhhp........................................................................................................................................................................tstpYhLRP..............EslESlaahYRhTsD.pYpchGWchapulpc.ts..+s....ps.............................GauulpDVp........................sp............hpD..........................pMESFa..LAETLKYhYLL.Fs...........-ssh..ls......................LccaVFNTEAHPl.h ..............................................................................h.puapsYhp..................a.Aa.shDEL.pPlo.p.ps................................ps..........................h......ssh.uhTl...lDuL..D.TLhlh............s..p...................p-Fpcuhph..lt..........pl...s....................................as............s...t...l........slFE...........................ss.I.R..........................hlGG..LLoA.a.ls...............................................................................................................tpt.hLphAh-l.u.p.t.Lh..sAF..p...T.s.ou...l...........P...h.s.hls...........h.......tp.t.h.........sst.....................................................................................ts......h.Aphuol.lEFt.h..........................LSpl.TGssha..p..hs..p..p.....hhp.hl.p................G.Lh.s..hl......ssp..sG.pah...t...........................................................................................................................................................................................................................................................................................................................................................................................................................................tphslGu...hsDS....................aY.....................EYLlK............talhhs............c............................................................tt...........................hhpha...........................................pu.h.p....ul.pahhht......s...............hhhlst.hp...............................th.hp.h.hp.p.Lss..F.hsGhhsLu.....................................................tp...h.p..hAhpltps..hhth.a............pph...shhPEhhpht.................................................................hp.........................................................................................................................................................................................................................................tptha.L....R....P........E..h.lEShahh.a..R.h....T..t....D..........h..Yhc.............hGh..p......hh..pu.lpp..hs..+s..ts.....................................GauulpsVh..........................s..sp......................hpD..................................................p..h-......SFaL.......uETlKYhYLLFs....pss........ls.............................................................hspaVFsT.....EAHsh........................................................................................................... 0 462 748 1108 +431 PF03200 Glyco_hydro_63 Mannosyl oligosaccharide glucosidase Mifsud W anon Pfam-B_2589 (release 6.5) Family This is a family of eukaryotic enzymes belonging to glycosyl hydrolase family 63. They catalyse the specific cleavage of the non-reducing terminal glucose residue from Glc(3)Man(9)GlcNAc(2). Mannosyl oligosaccharide glucosidase EC:3.2.1.106 is the first enzyme in the N-linked oligosaccharide processing pathway. 19.00 19.00 19.00 19.00 18.90 18.90 hmmbuild -o /dev/null HMM SEED 801 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.88 0.70 -13.48 0.70 -6.70 15 722 2012-10-03 02:33:51 2003-04-07 12:59:11 11 9 421 0 454 910 161 431.20 23 58.05 CHANGED lsllhlu..htshhhhthhhst..............+slssasss.h.p.s..hpsttssshaWGoYRPplYFGlRsRoPcSLlsGLMWhs...s.tsGtsslRHh..CEQGD.sLssYGWhcHDGRsFGpQcIpD.ps.hsLpT-FVKp.t.t..aGGDWusRIpupspsusps.s......psSLhaYsusEGps.....slsscl.stpspls........................hloGpop-LGpFplpl............ps.sss......hhcspahuhpsPs...lhplpD.......................lVhpsLppphpp....hss.t..tphlslss.h.........pspppuNllhh......QlohpsshphDIlFpSusstcpsp.............LTGpslsspLcc+pppF-cKFpppFsLppKh...............hsssphpFA+sALSNhLGGIGYFYGsSlVp.......................puhpptcslhYhPus..........LaTAVPSRPFFPRGFLWDEGFHpLLlt+WDscloh-lluHWhsLlNs-GWIPREQILGsEARSKVP-EFllQpspsANPPTLhLslccLl-shcsst..t..................................thhsaL++laPRLcsWFpWFpsTQ.pG.l.s..........oYRWRGRshs....LNPKTLsSGLDDYPRASHPossEhHVDLpCWhuluopsMsslAchLGpscshtc.......ptts.lssN..LschHWu-cppsYsDaGsHTc...................................tVtL....s....t.............h.R.s..hptPphphV.sphGYVSLFPFLL+lls.sDSs+LcplLclI+DscpLWosYGLRSLS+oushYhp+NoEHDsPYWRGPIWINhNYLhLpuL+HYu.....phsGPapspApplYpELRsNLlsNlh+QYppTGalWEQYDDp.oGcGcGs+sFTGWoSLVlLIMuEpY ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.hh..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s..........t......h............h.h....h..............t.h.h...............s.........p................+L..........t.plL..p.hhhD.p.c.h...hos.....aGlR..SLS...+..............p...............s.....................h...............a....h.............p........t.............p..........s.......................................................................Y...................WRGP.lWhshNaLhlcuL.t.c.at.......................h.s..................................................................................................................................................................................................................................................................................................................................................................... 0 176 289 401 +432 PF03648 Glyco_hydro_67N Glyco_hydro_67; Glycosyl hydrolase family 67 N-terminus Finn RD, Moxon SJ anon CAZY Domain Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the N-terminal region of alpha-glucuronidase. The N-terminal domain forms a two-layer sandwich, each layer being formed by a beta sheet of five strands. A further two helices form part of the interface with the central, catalytic, module (Pfam:PF07488) [1]. 22.00 22.00 22.10 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.72 0.71 -4.08 38 324 2012-10-01 20:56:08 2003-04-07 12:59:11 9 8 239 17 133 354 23 111.20 22 14.45 CHANGED sWLRYt..lsssth..phpshsspIVslsso..sslpsAtpELppGlpullGpsh..p.hssphsppss.......lllGThcshph.....ttshsp....Lss-Gahlpo....tpspllIsG.ps-pGsLYGsFca ...............................................................t.lh.h..tps.......................sslphAhpELppslptlhGpps...t...lh.s.p.ssppss............lllGohss..h......tth..t..h....s..t....t...ht....p...l....t..p....EGallps....................t.............s...pptllIsG.sscpGsLYGsach.............. 0 49 88 115 +433 PF03663 Glyco_hydro_76 Glycosyl hydrolase family 76 Finn RD anon CAZY Family Family of alpha-1,6-mannanases. 20.60 20.00 20.60 20.00 20.50 19.90 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.64 0.70 -5.08 90 1340 2012-10-03 02:33:51 2003-04-07 12:59:11 9 29 493 1 758 1302 51 320.60 20 73.21 CHANGED Sl....csAAsplAtshh.saYpGsp...G...shsGhhsss.......haW......WpuGuhassll-YWhh....TG..s..........soYNcllppulhaQsGps.........c.ahPsNtops...GND..DQuFWGlAsMsA.AEtsFsss..sst.p...WLsLAQuVFNs.s.sRWDsp....s.CsGGLRWQIasas.sGYsYKNolSNGshFpLAARLARYT..sNp..T.Ys-WA-KlWD.Whts.ssLl.......s....sp..apVaDG.s.....ssssN...C.os..hsph...pWoYNtGlaLsGAAaMYNh........T........pu...s.......tWpsRscslLsu..sh.....sh..F..Fsss...........lhhEh.uC......Es........t.p....CssDQtsFKuhhuRaLuhTstls.P..Th...-...pIhshLpsSApAAA.ppCoG...................us..ssp...hCGhpWh...ts.a.....DGhh.G...lGpQhuALpslpuhll......t.ssshoss .......................................................................................................................................................................................s...........................h...................W.tuth.hs....s......hlchh.hh......tt...s............................sphp.p...h..htp...thh.hth..stt............................h.s.h......s......s......t..............h......D....Dtsa.......hu.l.u......hhp....u......hc..........hshsp............p..........a.L.p.hAp.tl..asphh...s....tWssp..................s.......G..Glh.W.............p.................................p.......s.......h.....saKNs......h.........uNush...h.h.u.u....+....L..h.p........h.................s......tsp...................p..YhchApch.ac.Wh..tp......sll.......ts......pp......hh.l..hDGh........................phps.......s.tp.....hsp.h.........paoYNpGsh.lsus.s.l.aph.....................T................ts..s.....pah..p..csptlhps......sh.......................ph......h...h.ss.............................lh.p...................................................sssD..t.t.....F+uhhhRhhs.hh......phh...s...t....p............................thhsh.lttsupssh........tt...........................st.....hhuh.W.t.................................................................s............................................................................................................................................................................................ 0 203 429 641 +434 PF03644 Glyco_hydro_85 Glycosyl hydrolase family 85 Finn RD anon CAZY Family Family of endo-beta-N-acetylglucosaminidases. These enzymes work on a broad spectrum of substrates. 36.00 36.00 36.50 40.30 31.30 35.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.04 0.70 -5.30 36 655 2010-01-08 16:09:57 2003-04-07 12:59:11 8 45 559 13 183 522 4 304.40 35 29.23 CHANGED hhusYt..ssph.......spGs.sph...sYs.FtaWphlDhFVYFS....H.....................thlslPsssalssAH+pGV.VLGT.....lhh...EhssthpphcphLp.sppus..hhh...........A-+Ll-luchYGFDG.WhlN.Essh.s.......................stspphhsFlphLpcphpp...........t...............hlhWYDu.hT.cGplpWQNtLsppNthF................hpssDuhFhNYtWst.....................................pp......lctStphApshshs.hc............lasGlDVauRu..............htuthpsptslcph.......t..psphSlulFAPuWsac.........................spFhpp-spaW.......................................shlsp...................................lsF.hTsFspGpGtpaah ...............................tsuhs..psph.............s.Gs.pp........uYs.FsaWQYlD.hVa.Wt....c..........................GlV..Psssh.l....sAu.HRNGVs..V........hG.T...........laF.............sas..s....u..h...t.....htph...L...c.....p...c.scus...asl.......................AcKLV-hA+aaGaDG.aFINpEssss.............................shspphppFhhYhpchutp......h........................................ph.WYDu.hT.ssth.aQs..u.Ls.-hNh...F................h..ss.DshFhNasWsp.................................................sp............cho.lttApth.G.ts.sa.p..............lasGl-lptsG..............hpsph..c.h...sshLcp...............t.phch....SluLF...AP...s.....hhhp.........s..................htpsatc.pEchaasGhpssPptp..................ssspsWh.Ghus.hlss+oshsu................................................sF.hTsFNsGpGhcaa.......................................................... 0 71 108 148 +435 PF04101 Glyco_tran_28_C Glycosyltransferase family 28 C-terminal domain Bateman A anon Pfam-B_1105 (release 6.4) Domain The glycosyltransferase family 28 includes monogalactosyldiacylglycerol synthase (Swiss:P93115, EC 2.4.1.46) and UDP-N-acetylglucosamine transferase (Swiss:P74657, EC 2.4.1.-). Structural analysis suggests the C-terminal domain contains the UDP-GlcNAc binding site. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.96 0.71 -4.59 34 6284 2012-10-03 16:42:29 2003-04-07 12:59:11 11 38 4635 6 1557 7997 2525 160.10 23 45.21 CHANGED hlhVhsG.Spstp.lNphlhphtthhtpt...............................hplhhtsGts.shpthptp..htph................hsahcphsphhppA-.llIo+uGAuTlhEhhthGtPsIllPh.ptht.tH.pppthplsp.thushhhhttphs....cpLtptltchhppppp....hppspt........hpphtshhcpl ....................................................................................................................................lLl.h..GG....S...G.Ap...h..lN....p..h.l.....p...s.h..s..t.l.t.pp...........................................................................h.p.ll.a.sG..cs...php.p.h.p.pt.....htpht.....................................thph..h.s.a.....h..c.....c.........h.......s.p.h.h.........s.........t.........A.........D...l..V..lsR.u.GAsTl.sElh..s.......h.....G.....h..P...s.l...l...l...P.h.s..tt....p..t.cQhtNA.t...l.tc...tG.u..u....h..h....l.....t....p.s....p...hs..........sp..tl...h..p.t.l.t.....p...h.p...ptt.......h.th.ttt.....................hh........................................................................... 1 542 1040 1326 +436 PF00852 Glyco_transf_10 Fucosyl_transf; Glycosyltransferase family 10 (fucosyltransferase) Bateman A anon Pfam-B_1677 (release 2.1) Family This family of Fucosyltransferases are the enzymes transferring fucose from GDP-Fucose to GlcNAc in an alpha1,3 linkage [1]. This family is know as glycosyltransferase family 10 [2]. 21.00 21.00 21.10 21.60 20.80 20.80 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.29 0.70 -5.88 49 1458 2010-01-08 14:05:26 2003-04-07 12:59:11 14 27 340 9 858 1374 288 287.80 23 74.46 CHANGED pphhhshhhhshhhshhhhhhhh.htsssssh.t..t.............................ppphhl.LlWhaPFstphs.............hssCtphh.shss..ChlTssRshhscA...cAVlhHH+-lp.t...hssLPpt....................................RP..tQcWlWashESPops.....phsslps.lFNhThoYRpDSDIhhPYGhLhstpsttp....................hslP....pKs.......+..lVsWlVSN...astppt...RspYYpcLpcH.lpVDlaG+s............sp.lstsp...hhpslupYKFYLuFENS.lHpDYITEKLa.NsLtsGsVPVVLGPsRtNYEpF..lPs.-uFIHV-DFsosccLApYLhtLDcN-ptYhcYFpWRcphpl.....phht..htp...................hh....CpsCptlppt....pphps..hpsl.tsWah ...............................................................................................................................................................................hh....................................................................................................................................................................C..h.......p..pt..........t.......psllh...................h................h.s..............................................+....p...a.hh.ht..E..s...P......t..................h.......h....t..ha..N....h..Th.o.a......+.h.c....S....Dl.....h..s...h...t..h..h.....h...t....................................................................p.Ks..................t...hss.ah.sSs..................sp.......s.t..pt...............R.....p......a..h....p....p...L....t....ph.....l.....p......lDs.a...Gps...................................................................tt.h...s..t..tp..............hhp.h.l.s.pYKFhL....uF.EN.o.h......s...pD.YlTEKh.a...s....ul...hss.....s.....VPlhhG....p.....ssh..p..p..h.........hP..s...pS..a..I..plp..D.F.p.ssppLApalphLs..pspphY...p.Y.h...t.....W.+...tp.h....................................................................C.hC.hh.t............................................................................................... 0 422 516 726 +437 PF00982 Glyco_transf_20 TrehaloseP_syn; Glycosyltransferase family 20 Finn RD, Bateman A anon Pfam-B_1035 (release 3.0) Family Members of this family belong to glycosyl transferase family 20 [1]. OtsA (Trehalose-6-phosphate synthase) is homologous to regions in the subunits of yeast trehalose-6-phosphate synthase/phosphate complex, [2]. 19.40 19.40 19.40 19.40 19.30 19.20 hmmbuild -o /dev/null HMM SEED 474 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.53 0.70 -6.02 23 2717 2012-10-03 16:42:29 2003-04-07 12:59:11 16 20 1706 12 1143 2357 379 432.00 33 74.10 CHANGED s+hllVSN+lPls...hpRps...suc..aphshSuuG..Llsshpulppp......tph..hWhGhsGlpssppc.....pspls.......ppLppcas..........shPlaLs......-chhcpaYpG......aupslLWPlFHYhhs.......ptpa-cstWpuYhcsN+hFA-plsphhp...csDhlWlHDYHLMLLPphLRcchsc......hKIGFFLHsPFPSSEIaRsLP..sRpEILcGlLusDLlGFHTh-YARHFLSsCsRlLs..................lcspss....tlcatGRplsVtuhPIGIDssphtcslppssVtp+lppL+pca.pthKlIlGVDRLDhlKGlspKLhAa.EpFLppaPEapGKVVLlQlusPoRscsc-.hpplcpplp-lVsRINucaGsls..apPlhalcc.slsap-hlALashuDlslVoulRDGMNLVuaEYlssppc...............+cGsLlLSEFsGuApsLs.GullVNPWshcclAcAlppALsMoc-c+pt+ac+ha+hlspassshWucsFlp-Lpc .......................................................................................................................................p............................................ht...u...sG..G...Lss.u.l.shh.pt............tts.....hWhG.....W.s.........G..ss.tsp............tlt.....................t.s..p..h.s.............................................................................hhsl..L.s.............ppph...cpaYps..............................Fu..N..ssLWPlaH........Yh.................................hspa...p...c.p...tW.puYhcVNphFAcpl....hthhp.............cs..D.........h.....l.WlHDYH..Lhhlsp.hL.Rc.t..t.sp................................s+IGFFLHl..PF..PssElapt..L..P...................h+..ccl.Lc.....ullshD..............LlGFpT.s.t....ppF.lsss....p.phh.t............................................................hp.spss.........thph.....G....+....th......p....lts...aPI...GI....-s..pp..h.t.p.hs.........p...t.....s.....t.........l....tt..c......h....t.....pl..+..p...p.....h..........t......s.................h......p.l.l..luVDRLDYoKGlsp+lhAaEthL.....cp.hP..pa.p.............s..+..l.shlQ......lA..s.s....S.....R.......t...c.....l.p.........t.....Y............p....p....l+pplpphsGRING.c.a.........G...pls.........a........s.P..l.h.a........l.p....p...ph.s....h.pp...Lhulaph........uDlsLVTslRDGMNLVA+EYlAsp...ss.............................................................................psGsLlLScFAG.A.A.p.p..L...s...t..u............l............l.V.NP.............a.......Dhcth....As....Al...p...p...A...L...s....M...s.....t...E+.....tp.....Rhpphhc.hlhppsl.shWtpsFlpcLt.t................................................................................. 0 336 691 979 +438 PF01755 Glyco_transf_25 LPS_glycoyl_T; Glycosyltransferase family 25 (LPS biosynthesis protein) Bashton M, Bateman A anon Pfam-B_1857 (release 4.1) Family Members of this family belong to Glycosyltransferase family 25 [1] This is a family of glycosyltransferases involved in lipopolysaccharide (LPS) biosynthesis. These enzymes catalyse the transfer of various sugars onto the growing LPS chain during its biosynthesis. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.38 0.71 -4.67 22 1865 2012-10-03 05:28:31 2003-04-07 12:59:11 12 20 892 0 399 1567 1071 173.10 22 55.71 CHANGED h.thalISLcpu.cR+phhppphtp..........plsFpFFDAlsscphpp...........................p..tt.h..hhpphhhtp.LohGElGChhSHhhLWpcslcps....hphhhIhEDDlhlspph...ht..........t.tal.h.thh...a....h.h.ph.sah..shphcpp.h..............................................h..h.pthhGTsGYllo.puAcchlphh.pphh....hslD.hh ............................................................................................................h...halIs.L.tpt...p.Rhtth..ttp.htp............th.t.h...p...hh-Alsspthst......................................................t.t...h..h.......h..h.h.p........h........t.........l.o.....G.E.........lGCahSHhtlWcphl.....c...p...s..................phh..ll.hE.DDl..h..h.p.p..s.....h.....t...h.t..h..t...................t..th...lh.................................................hh.h.......................................................................................................................................................................................h.............htshuYhlo.puAphhlph.......h......hshD......................................................................................................................................................... 0 99 205 301 +439 PF00777 Glyco_transf_29 Sialyltransf; Glycosyltransferase family 29 (sialyltransferase) Bateman A anon Pfam-B_1020 (release 2.1) Family Members of this family belong to glycosyltransferase family 29 [1]. 20.90 20.90 21.10 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.87 0.70 -5.25 105 2130 2010-01-08 14:04:21 2003-04-07 12:59:11 13 26 197 3 993 1851 142 224.50 21 69.43 CHANGED ss.tph..hhtppssthsp.....phhhs.hs.ppps..plspsltplhs.......t.Pht.pp...........h+pCAVVGNuGlLpsSphGpcIDsaDhVlRhNhAPlps.accDVGsKTohphhsPpsh.pp..............hspsshhhhls..hhstshhWlsuhhhpp.......sshhthhh.hhhphhhspppl.........................................hlhpP........pal+tltphahppsh........csphPS..........TGhlhlhhAlphC-c.VplYGFhs.....tsp.sh.pHYY..-phh.phht.......HshshEhthlpcL.tppG..hlchhhsp ................................................................................................................................................h...h..........................t.......................hppCA....VVGNuGhLhsSphGp.c.............I.Dp..tDhVh........R....h..NtAP.sp.s....ap.pDV.Gs+Ts..hhh......hsspsh....t.......................................ttp..h.h..h....h........hs..thh.ah.shh.pt...............................t....h..hh....h.......h...h.........h...t....p.h................................................h..h.h.p.P................thh.t...h...t.t.....hhhp..h.........................phph.o..........oGhh..h.hhhAh...ph.C-p....lplaGFhs...........pt.t...h..HYa.....-.p............t..h...................Hph.hEh.hh..ph.p.....th.......t.......................................................................................... 0 297 410 626 +440 PF03360 Glyco_transf_43 Glyco_tranf_43; Glycosyltransferase family 43 Mifsud W anon Pfam-B_1447 (release 6.6) Family \N 20.20 20.20 20.80 20.60 20.10 19.60 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.28 0.70 -4.77 15 602 2012-10-03 05:28:31 2003-04-07 12:59:11 11 9 129 16 335 571 6 196.50 36 58.65 CHANGED LupTLppVs.sLHWIVVEDussssP.hVuslLcRoGLs..aTalst+TPpsac....t.p.hhsRGshQRNhALcalR.............ppppcGVVYFADDDNoYsLcLFp-.hRslc..+lGlWPVGLlG...........................GhhVEuPhlpss..KVluap....ssWcsc..RsFslD...........MAGFAlNlcllLp.....+ssAsFshcs.p...GhpEoshLcpLsh.chs-lEPh.ussCo...cILVWHTRTcp ...................................LupTLhhVs...sLhWlVVEDutpps......sspl...Lpp..oG.......l....asHLss.....p.s....tthp............p.h.h.sRusp..Q.R..NhALp.alR............................ppphpGV.....VaFAD..D.DNo.........YslcL.F-E......h................Rp...sc....plusWPVu.....h.lu................................shhhEuP.h.s...psu.....+Vl..Gap......................ssap.sp...R..s..FslD............................................MA.GFAlshp.llhp................................pspsh.hphps.p.....GhpE.o..sh..L..pp.ls....s.pc...l.Esh....u..ssCo.........clLVWHs+oc........................... 0 90 147 256 +441 PF01501 Glyco_transf_8 Glycosyl transferase family 8 Bateman A anon Pfam-B_730 (release 4.0) & Pfam-B_5903 (Release 7.5) Family This family includes enzymes that transfer sugar residues to donor molecules. Members of this family are involved in lipopolysaccharide biosynthesis and glycogen synthesis. This family includes Lipopolysaccharide galactosyltransferase Swiss:P27128, lipopolysaccharide glucosyltransferase 1 Swiss:P27129, and glycogenin glucosyltransferase Swiss:P46976. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.73 0.70 -5.05 67 5748 2012-10-03 05:28:31 2003-04-07 12:59:11 15 52 1943 49 1770 4515 1043 233.00 18 55.72 CHANGED slshshspsY..................................lhushsslpSllpssst...............................hthphhhhssslsscthpplpthhpth..h.......................................httphhshhsasRhhlsc.........................lh..sphcKllaLDuDllV.hpslspLaslshs.s.hhusstp...............................p....tth...t...sshFNsGhhlhshstWpppslppphhph..hphptp......h.hs-QshlshhFt................sphp.LshtaNhhshhh..................t..hp.h.ppstllHa....sG...ttKPWpphs ...................................................................................................................................................................................l.hshsts..a.......................................h.tshsh...htSlh..hp..sp....................................................hththh.l...h...s.s...t....l..s........t...p.........h....p..h...l...p....p....h..h...p...p....h....t....t....h..........h..h......s....................................................................................................ttph..h..s..h.h..s..a..h.R.h.h.l....p................................hh...sp....h.....c....+.llYLDu.D.lls....h........s..........s....l.s..c.L.......a....s....h...sht....sth...h.us....s.s.....................................................................................t...................h....t............ht..................ss.h.....F..NuG.l....hll.shp.t..h...p.....p......p....s...l...p.p....p.h...hph............hptptt....................h.hh.D.......Qs....h..L..N...h...h...ht......................................sp.h...h.....L...s.h.p..a....Nh.hthh.................................................t.......t.....h.....p..p..s...hl..lH.a........hu.........KPW................................................................................................................................................. 0 480 946 1364 +442 PF00535 Glycos_transf_2 glycosyl_transf_2; Glycosyl transferase family 2 Bateman A anon MRC-LMB Genome group Family Diverse family, transferring sugar from UDP-glucose, UDP-N-acetyl- galactosamine, GDP-mannose or CDP-abequose, to a range of substrates including cellulose, dolichol phosphate and teichoic acids. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.86 0.71 -4.57 253 43678 2012-10-03 05:28:31 2003-04-07 12:59:11 21 600 5370 41 12098 45842 18220 163.70 16 45.40 CHANGED SlllPsaNc.......tphltpslpolhpp..........................h.phEllllDDuS.sDso..hplhcphht...........plpllppsps...Ghusuhstulpt.upGc....hlhhlDuDshhtss.hlppllphhpp.ssthshsshhthttpttththh..................................hhhttttttthththhhhhsstthhppphhtphh ......................................................................................olllPs.a..Np........tph...l....t...p...s....l...p...o...l...hpp......................................................h.shE.....l....l..l.....l.....D....D......u.........S....s.......D........s....o...........hp.....l...h.pphtt..................................p.l..p...h....l.....p........p.......p.........t.......s...........h......G.......h..........u......s.......u.......h......s......t....G....l.........p......t......A.......p...uc.....................hl.h.h.l.D...u.D......s..h.......h......s.........s....s....h....l.....p....p........h....l...p....h....h......p........p.................s........h.....t.....h.....s..h...s....s....h..h.h.h...t..t.........................................................................................................................................................hh........................................................................................................................................................................... 0 4072 7912 10203 +443 PF00953 Glycos_transf_4 Glycosyl transferase family 4 Finn RD, Bateman A anon Pfam-B_534 (release 3.0) Family \N 21.40 21.40 21.40 21.70 21.20 21.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.31 0.71 -4.43 116 8422 2009-01-15 18:05:59 2003-04-07 12:59:11 16 18 4730 0 2028 5693 4534 172.80 31 48.36 CHANGED hhhhh....hssh...llshlGhlDD...h.........h......slsshh+lhhphlsuhlhhh..................................shttlshhhs.................................................h.hs.hh.h.....hh....slhhlsuhh.....NAhNhhDGlDGLu.......uuhulls.hlshshls..........................ssh.hhhh.............slhu.ul.lG....FLhaN.h.P.....A...+lFhGDsGShhlGhh..lu....hlulhht ................................................................................................h..hlLhshhshshlGhlDDh............................................h.u.Lss+..hK.hhh....Qhlsullhh.hhhh............................................thstls.h.hh...........................................................................................h.h.sl.u.......h.h...hl......hl....slhhl....luhs....NAl.NL.sD........GLDGLAuusssls.hsuhullu..............................................................hht.............sphpl.sl.h...............sh..............ullG....Ah..LG....FLhaN...hhP............A....clFMGDsGSLhLGhh.luslulhh.p.................................................................. 1 688 1325 1728 +444 PF00903 Glyoxalase Glyoxalase/Bleomycin resistance protein/Dioxygenase superfamily Bateman A anon Pfam-B_1207 (release 3.0) & Pfam-B_5495 (Release 8.0) Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.44 0.71 -4.16 82 14717 2012-10-02 15:00:03 2003-04-07 12:59:11 20 46 3790 237 3666 25008 5991 128.70 16 62.42 CHANGED tlsHhs........lhlsDhpcuhpFYpcs...LGhplhpphs.............................hhhhhhsssttlplhhttsssstttth.....................................................................................................................................ththhshsscDlttthcclpppG..................sphhtt.scthhsthhhh.......................hpDP.sGhhlEl ...............................................................................................................................................................ltHhs..............lhs..s..-...h..p...c..s...h..p...FY...p...cl......L....G...h.....c...l......h.cp.hs....................................................................h.h.h....h..h..h....t......s...s.....s.....s.....t......l.....p....l....s.......h.....t...........s....s...s..t.t.t.t.t...................................................................................................................................................................................t..h.u....a..h..s.........s....h...c...-.....l...t.......t...h.....h......p....p....l....p.....p...t..G..................................h..p...h....p...t........h.............s.....p....h....h....h...t....t....h...h..........................................hpDP...sG.hhEh.......................................................................................................................................................................................................................................................................................................... 0 972 2122 3010 +445 PF04464 Glyphos_transf glyphos_transf; CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase Waterfield DI, Finn RD anon COG1887 Family Wall-associated teichoic acids are a heterogeneous class of phosphate-rich polymers that are covalently linked to the cell wall peptidoglycan of gram-positive bacteria. They consist of a main chain of phosphodiester-linked polyols and/or sugar moieties attached to peptidoglycan via a linkage unit. CDP-glycerol:poly(glycerophosphate) glycerophosphotransferase is responsible for the polymerisation of the main chain of the teichoic acid by sequential transfer of glycerol-phosphate units from CDP-glycerol to the linkage unit lipid [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 369 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.14 0.70 -5.70 22 3039 2012-10-03 16:42:29 2003-04-07 12:59:11 9 50 1148 20 433 2103 385 317.90 20 61.59 CHANGED hhhhhthhhP..hcsshllFtuhttcshusNsptlac.hhp..c.tsshchlWshcp...........tth.tssphlthsShchhhhhhcuchllss........s.hss..hhh++psphalQsWHGs.sLKphGh-..t.....h.p.....phht....psppaD...ahlossphpsphap....csFs..hstt.phlphGhPRsDsLhpspttp..httlppphslshscp..llLYAPTaRcsttt.......................hthtlDhcplh....pthtpchhllh+hH.hhsstht.........hppsshllslsstp......clp-LhhsuDlLITDYSSlhFDaulLc+PllhYs.Dh-pY...ppp.RGhYh-.a.ptsPGslscs.p-Llchlpph.ppspt.tcphphhpccasp.apDG+uScRlhphlht ...........................................................................................................................................h..h..............tp.lh..h..s.......t....t..hsss.t.hlhp...hh.......p...t..p..h...p..h..hh.hhp.........................t..th.....t..hl.....................p......h.....c....h.......h.....hh.....tp....A.c.h.hh.s....................s.h.........h..h....h..p..........p......p....h....h...lQ.........hWHus.sh.K.p.h.shs..t...............................h.h...thpph.s..........hhl.s.....s...s.p..h..t...p...h.at.........puat.......hpp...t....p....l..l...h.G......h.PR...Dhlh.....pp....p.tp.............tth...p....p..p.....h......s......l..........t..tcp..........ll.LYA..P.T..a..Rssttt..................................h.h...s..h.....p.tlt........tht.p.....sh....h..l..l.h+hH.......h...h...t..p..p.h.........................thtp.h..h..h.s.h.s.s..h..............................ch.p-..l..hhh..sDl..l.ITDY.SS.lha.-..a.h..h..h..c..+..P...h.l...a...a....s.............Dh....p...p....Y...............pp....R.s.h..h..s...h....p...........h..s.............u.....h........s..p.......s......pt.Lh.p.t....l..p..p......................................t....h............h.....hp.....t..h....hpst...p.s.spphhp....t........................................................................................................................ 0 162 296 399 +446 PF00958 GMP_synt_C GMP synthase C terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1137 (release 3.0) Domain GMP synthetase is a glutamine amidotransferase from the de novo purine biosynthetic pathway. This family is the C-terminal domain specific to the GMP synthases Swiss:P49915 EC:6.3.5.2. In prokaryotes this domain mediates dimerisation. Eukaryotic GMP synthases are monomers. This domain in eukaryotes includes several large insertions that may form globular domains. 21.10 21.10 21.10 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.23 0.72 -4.27 161 4898 2009-09-11 08:43:03 2003-04-07 12:59:11 17 31 4651 24 1280 3535 2105 95.50 57 18.75 CHANGED h-Elccs.......GLYcclWQuFAV.Llsl+SVGVhGDpRoYsas...................................................................................................................luLRAVpSpDuMTA-as.....c.lP.........a-hLpclSsRIlNEVpuVNRVVYDITSKPPuTIEW ...........................................h.EElppAGL.cclaQsFsV.l.h.s.V+S..VGVMGDsRT.Y-..a.s...........................................................................................................................luLRAVsohDhMTAcaA.........+l.P..........................a-lLt+lSsRIlNEVc.slNRVVYDIoSK..........PPATIEW................... 0 423 820 1086 +447 PF01825 GPS Latrophilin/CL-1-like GPS domain Bateman A anon SMART Family Domain present in latrophilin/CL-1, sea urchin REJ and polycystin. 21.50 21.50 21.50 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.64 0.72 -3.85 139 2565 2009-01-15 18:05:59 2003-04-07 12:59:11 16 386 110 4 1472 2317 28 46.70 35 3.56 CHANGED tpstCsaWs..........pspusWu.ocG..Cps.hp.sst............................sp...s.pCpCsH...LT.sFulLh .............................h.pstCsaWs......................sttGtWo..opG..Cph..hp...ssp..................................sp.......o.pCpC...sH....L.T...sFAlLM............................ 0 524 628 925 +448 PF02893 GRAM GRAM domain SMART anon Alignment kindly provided by SMART Family The GRAM domain is found in in glucosyltransferases, myotubularins and other putative membrane-associated proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.24 0.72 -4.45 33 2555 2012-10-04 00:02:24 2003-04-07 12:59:11 15 89 326 5 1481 2393 6 70.70 24 10.41 CHANGED pch+.phFp......lstsEpllss......asCtlhp................ph.hpGplYlospclsFpS.hhss.t...........shslPhsslpplcKhs ...........................................................................................ap.phFt......lst.sEpLhps...........................as.CsL.p.................................ph..h..pGplal.ospp...l.sF.t..S..h.h..sp.s..............................plslPhtplpplcK..t............................... 0 338 634 1024 +449 PF03514 GRAS GRAS domain family Bateman A anon Pfam-B_393 (release 7.0) Family Proteins in the GRAS (GAI, RGA, SCR) family are known as major players in gibberellin (GA) signaling, which regulates various aspects of plant growth and development [1]. Mutation of the SCARECROW (SCR) gene results in a radial pattern defect, loss of a ground tissue layer, in the root. The PAT1 protein is involved in phytochrome A signal transduction [2]. A sequence, structure and evolutionary analysis showed that the GRAS family emerged in bacteria and belongs to the Rossmann-fold, AdoMET (SAM)-dependent methyltransferase superfamily [3]. All bacterial, and a subset of plant GRAS proteins, are predicted to be active and function as small-molecule methylases. Several plant GRAS proteins lack one or more AdoMet (SAM)-binding residues while preserving their substrate-binding residues. Although GRAS proteins are implicated to function as transcriptional factors, the above analysis suggests that they instead might either modify or bind small molecules [3]. 19.90 19.90 20.10 20.50 19.80 19.00 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.25 0.70 -5.74 31 1831 2012-10-10 17:06:42 2003-04-07 12:59:11 9 14 343 0 790 1894 0 305.00 31 67.57 CHANGED LhphLlpCAcAlss..sshp..hAptlLtclsph..uSstG.s.....shpRlAsYFsEALssRlss.....h.p.h.ssptss......sschhtuh..................ph...a.cssPhh+FuahTANpAILEAhcsp...ppl..HIIDFsIspGhQWPuLhpuLA.pRs....su......sP.plRlTGlus.t........stttLppsGpRLspFAcplslsFcFps....hsp.phspl...phptL....plps...sEslAVNhshtlH+hh.......................................................spssshtp.....thLphl+slsPclVTllEpE..ssp.Nss......sFhsRFhEuLcaYoulFDSL.css............hs.ts..ppRthlEchhlu+cIhsllAsEGsc.RhcRpEshtpWRp+htp.uGFpslsluspshpQAchLLphas.scG..aplc...-c......susLh.LuWps+sLlssSAW+ ................................................hphLhtCApAltt....ts.hp......hApt.lltp..l.t.h......us.stu...s.................s.hp...+.lAtaF.scuLtt.Rlht..............................h.....stt.........................ph.h.h.....................................h........hhphs...Pah+huahsANpAIl-Ah.t.sp...................ppl.....Hl.lDa.s.........h....t......G...h....QWssLhptLu..........Rs.......sG......................PP..thRlT.uls.s...................stttlppsG.+LsphAc....p..h.t..l...F..c.aps.......................hst....ph..t..s.l.............p.....p....L.........plp..............sE.sls...VN..h.hth....+plh.......................................................................................................................sp....ss.s.htp.............hL..........phl+.th........pPc..........lhs..lsE...p-.......ush....s...s.s...................FhsRFh-u...Lp.aYushFDuL.c..ss.........................h.s...ts........tpc.hhhpp.hhuppIhsllu..s..........-..G.p.....Rh...........E.....R....Et...........h.t..........pWptph..tt..uGF.t...h.ls........sht.p.s..p..hl.t..........h....................t..........a....h.......tt............t.t...hh...tW.t...hh..osW........................................................................................................ 0 78 520 682 +450 PF01465 GRIP GRIP domain Bateman A anon Sean Munro Family The GRIP (golgin-97, RanBP2alpha,Imh1p and p230/golgin-245) domain is found in many large coiled-coil proteins. It has been shown to be sufficient for targeting to the Golgi. The GRIP domain contains a completely conserved tyrosine residue. At least some of these domains have been shown to bind to GTPase Arl1, see structures in [4,5]. 20.50 20.50 20.50 20.80 19.80 19.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.90 0.72 -4.44 44 627 2009-01-15 18:05:59 2003-04-07 12:59:11 15 21 256 8 415 639 5 44.60 31 4.30 CHANGED pshshpYLKNVllpFLppp-..t.+ppll..sVlsolLcFopp-pppl. ............tsshEYLKNVllpFlpt+..-.....tpp..p..Ll..sllsslLpFoscEpptl........ 1 146 206 324 +451 PF02946 GTF2I GTF2I-like repeat Bateman A anon Pfam-B_101 (release 6.4) Family This region of sequence similarity is found up to six times in a variety of proteins including GTF2I. It has been suggested that this may be a DNA binding domain [1,2]. 20.60 20.60 20.60 21.00 18.90 17.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.42 0.72 -4.14 18 742 2009-09-10 22:54:23 2003-04-07 12:59:11 9 8 42 10 300 939 0 75.00 47 38.66 CHANGED LRcpVc-LFspKYucALGhspsV.VPYpphhppPpslhVpGLP-GlsFR+PssYslspLc+ILcspcpIpFslK+P ............LRcpVc-LFspKa...u..EALGh..spsVtVPYp+hppsPpslhVpGLP-GlsFRcPssaslspLc+ILcspppI+FlIKRP...... 0 17 38 98 +452 PF01018 GTP1_OBG GTP1/OBG Bateman A, Studholme DJ anon Pfam-B_875 (release 3.0) Domain The N-terminal domain of Swiss:P20964 has the OBG fold, which is formed by three glycine-rich regions inserted into a small 8-stranded beta-sandwich these regions form six left-handed collagen-like helices packed and H-bonded together. 21.50 21.50 21.50 21.70 20.70 21.30 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.19 0.71 -4.58 8 5081 2009-01-15 18:05:59 2003-04-07 12:59:11 17 23 4657 3 1432 3621 2408 149.90 46 37.77 CHANGED FVDpscIhlpuGsGGsGhVSFRREKalPtGGPDGGsGG+GGDVlhEsDpslsTLlDaRap+HaKApsGppGtucNpsG+sGcDlllKVPsGThVhD.-ssplluDLVcsGpRhllApGG+GGLGNu+FtSssp+APpaApsGpcG-pRplpLELKl .................................................FlDpspIhlpAGcGGsG.sl.............u...F....R......R..E...K...al..P...h.....G....G........P..s.GG....DGGcGGsVhh........s..Dc..s..l..p...TL...lD......a..R......a...p...+.+....a....+A..pp.Gps....G...t.....u......p..s......p.p..G.+..........s.......G....c..D..lhl+VPsGT....h.......V........h...............D........t.............c..................T........s........c........l.................l........uD...Ls..c..p...G.p.c.hl.lA+GGpGG....h.G..Nt.+.F.....t.....o....s..s.........NpA...P.....ch.u.....p................GpsG.-c+plhLELKl.................................................. 0 493 898 1211 +453 PF00009 GTP_EFTU Elongation factor Tu GTP binding domain Bateman A anon Prosite Domain This domain contains a P-loop motif, also found in several other families such as Pfam:PF00071, Pfam:PF00025 and Pfam:PF00063. Elongation factor Tu consists of three structural domains, this plus two C-terminal beta barrel domains. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.24 0.71 -4.86 216 69868 2012-10-05 12:31:07 2003-04-07 12:59:11 22 241 24054 243 11959 72575 19001 186.10 31 41.29 CHANGED cphhNlullGHVDcGKTTLssplhhhsuh....t..t...t..t..............hDphppE+cRG.....ITIpsutsphp.....................................spptphsllDsPGHtD.FspphhpG..hu.hDuAlLVlsAs........cG.......stsQTccthhhApphsls......hll..hlNKhDps...........p.tchtclhcpltpph...........................................................................................lpth..hss...........................thsl.l.sSAhpuh..................slp......................................................................................................s.Ll-slsphlPs ..........................................................................................................................................................................p..st.....GKooh...h........t...t..l..h.............................................................................................................hD......h..t......E....c...p...R..G.......I..T...I...p..h..u..h..h.php.....................................................................................................htphhls.l...l..D.s...........P..........G............H.............t............D..........F............h...........t...........p...........h...........h...........p....u................hs.........s......D........s.........A..........l........L...l.....V...s..A.s..............................s.G.........................hp..s..Q......T.....+......c........t......h.........h........h.........A.........h........p.........h........G..............l................ll..V..s.l...NK.....h.Dps..........................ps.c..h..p....c..l....h...c...p...l.p......ph.......................................................................................................................................................................................................................lp.th...shsst...................................tss.h.l...h.S.....u......hp.u.....................................s.h.......................................................................................................................................p.Ll-sl.t................................................................................................................................................................................................................................................................................. 1 4102 7392 10000 +454 PF03144 GTP_EFTU_D2 Elongation factor Tu domain 2 Bateman A anon Prosite Domain Elongation factor Tu consists of three structural domains, this is the second domain. This domain adopts a beta barrel structure. This the second domain is involved in binding to charged tRNA [1]. This domain is also found in other proteins such as elongation factor G and translation initiation factor IF-2. This domain is structurally related to Pfam:PF03143, and in fact has weak sequence matches to this domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.28 0.72 -3.86 305 58968 2012-10-05 12:31:07 2003-04-07 12:59:11 20 172 19933 232 11011 44749 11474 68.00 28 14.18 CHANGED GslssuRl.sGpl+hGsplhlh..t....t..............sclpsl.hhpt.thpps.....................hsusssGhhltshshcc...lptGpsls ...............GslsssRVpsGsl.+.s.G.ph.V..h..hh...s.s.....................................scV...p..ul..p.h...a...+.p....t...h.p.c.s.................................................ls.G.s..s..s..G....h.....l..p.s..h.s..hc-......lppGpsh........................................................... 1 3673 6761 9154 +455 PF03143 GTP_EFTU_D3 Elongation factor Tu C-terminal domain Bateman A anon PF00009 Domain Elongation factor Tu consists of three structural domains, this is the third domain. This domain adopts a beta barrel structure. This the third domain is involved in binding to both charged tRNA [1] and binding to EF-Ts Pfam:PF00889 [2]. 21.30 7.10 21.30 8.40 21.20 -999999.99 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.29 0.72 -3.83 82 23082 2009-01-15 18:05:59 2003-04-07 12:59:11 12 49 16442 102 2870 18678 2585 86.00 47 25.05 CHANGED slpsppcFcApVhlLs+.....poshhsGYpP.hah+Tsclss.hhpL.........stts......chlhsGDsshlplchhp.PlslEptt......RFAlR-GG+TVusGllpclh ..................................s+ssscFsAQ...VI.V...LNH............PGp.I.ss.G........Y...sP.V.Lc.s.H.Tu.clss.........+h..sEl....pt.h-.............sucs........................c.ltsGD..ss.h...l.p....l...........+...P.h....s...l.Eth..........................cF.................................................................................... 1 948 1617 2282 +456 PF01493 GXGXG DUF14; GXGXG motif Bashton M, Bateman A anon Pfam-B_428 (release 4.0) Family This domain is found in glutamate synthase, tungsten formylmethanofuran dehydrogenase subunit c (FwdC) and molybdenum formylmethanofuran dehydrogenase subunit c (FmdC). A repeated G-XX-G-XXX-G motif is seen in the alignment. 19.80 19.80 20.60 20.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.42 0.71 -5.32 29 4251 2009-09-11 14:28:49 2003-04-07 12:59:11 14 50 3290 15 1375 3769 3934 186.10 39 14.86 CHANGED hphslhsscRslssclsstls+thGpps.h...............sslplphpGoAGpShGuahssGhp.............lplpG-AsD..alGpuhsGGcIllpsssss...........shtsspsshhGssshaGATGGplahpGpAG-Rhul+N...........SuuphVlEGs.GsassEYMsGGhllVLGcsG..........cNhGAGMoGGhsYlhsc.s..............shspphshchVchp...plps...........tstppL+tl .............................................................h....ltNssRsl.Gshlustl...sc..p.aGppG..L..........................s.t.s.slplph..p..G.o..A..G.Q..S.F.G...u..a.h..s..t...G.l.p.........................................lplpGDA.ND..YVG.K.G.h.uGG..pll.l.p..Pshss.............th.s.pcshIhGNs.sh.......YG........A..TuG.c.la.hp.Gp.A..G..E..RFuVRN...........................SGApsVVEG................s.G..D......a..uCE.YM........T........GGhVllLGp..sG....................pNFu...AGMoG.GhAYVh-pss..............ph.t.t..p.s.N...chlph............lpt..................h..................................................... 1 407 886 1167 +457 PF02756 GYR GYR motif Ponting CP anon Ponting CP Motif The GYR motif is found in several drosophila proteins. Its function is unknown, however the presence of completely conserved tyrosine residues may suggest it could be a substrate for tyrosine kinases. 20.60 20.60 20.70 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.19 0.73 -6.39 0.73 -4.01 4 377 2009-01-15 18:05:59 2003-04-07 12:59:11 9 37 13 0 227 370 0 17.80 73 19.15 CHANGED -DGY+YKTVRRL+hRhRH ...DGYcYKTVRRLKaRpR+.. 0 34 34 134 +458 PF03457 HA Helicase associated domain Yeats C anon Yeats C Domain This short domain is found in multiple copies in bacterial helicase proteins. The domain is predicted to contain 3 alpha helices. The function of this domain may be to bind nucleic acid. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.41 0.72 -3.87 186 906 2009-09-13 12:23:34 2003-04-07 12:59:11 9 52 116 1 382 984 370 69.10 24 38.11 CHANGED hss...hpppWppta.ptLppatppcG...p..tp.lPpsh....................shpLGpWlppQR....pth+p..................sp......Lst...-+hptLspl..shh .............................pttWpp.ta.ptLppatppcG.................p...hp..lPp.sh..t.....................shtLGpWlspQR.............pphcp....................................................up............Lss......-RhptLcplGh.................... 0 258 341 371 +459 PF04408 HA2 Helicase associated domain (HA2) Bateman A anon Bateman A Domain This presumed domain is about 90 amino acid residues in length. It is found is a diverse set of RNA helicases. Its function is unknown, however it seems likely to be involved in nucleic acid binding. 21.30 21.30 21.30 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.36 0.72 -4.00 140 7005 2009-01-15 18:05:59 2003-04-07 12:59:11 18 172 2406 5 3299 6442 376 102.00 28 9.96 CHANGED pulphLhtLGAL..........st....ps.................pLT.tlG+phuphP.l-PpluKhL..l.husphsC.........hcchlsIsuhLos.pssFhp..............................s.t.tttsp.........................................................t..t+tph.............ps.........DalshL ...............................................................................t.AhphLppL.GAL.........................ss......ps.......................................................pLT..slG...+ph...Apl...P....l.....-P+L..u.+.Ml.......l..t.u...t....p..h......s..C.................................hpp..sls...IsAhL..oh....ps..sh.p.................................................................................................s.t.t.pt.tsp............................................................................t..h+.th....................................uDhh............................................................................................................................................... 0 1063 1774 2655 +460 PF04774 HABP4_PAI-RBP1 Hyaluronan / mRNA binding family Kerrison ND anon Pfam-B_2044 (release 7.6) Family This family includes the HABP4 family of hyaluronan-binding proteins, and the PAI-1 mRNA-binding protein, PAI-RBP1. HABP4 has been observed to bind hyaluronan (a glucosaminoglycan), but it is not known whether this is its primary role in vivo. It has also been observed to bind RNA, but with a lower affinity than that for hyaluronan [1]. PAI-1 mRNA-binding protein specifically binds the mRNA of type-1 plasminogen activator inhibitor (PAI-1), and is thought to be involved in regulation of mRNA stability [2]. However, in both cases, the sequence motifs predicted to be important for ligand binding are not conserved throughout the family, so it is not known whether members of this family share a common function. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.94 0.72 -3.54 39 511 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 219 0 289 525 1 99.40 26 29.83 CHANGED s+RpFDR+SGosRu..............s-..KRpGuGptNWGosp--.h................sph.-.s.ss.t-ps.ssEp.s.....t-cs........scEssscptt...Epc..sKEMTL-Ea.KsLp-Kp+uh.s...hph.cctcss .......................................................pRta-Rp.S..G.os+s................tp.K+tG......uGttNW.GsspD-..l..........................sc.h.....-ps....s..tp.cs..s.....t....tEpts..........tppt............sppptspt.t.....Ept......spEhTL-Ea.Khhpp.pp+s.h.t...hphcc.t............................................................. 0 102 154 221 +461 PF02183 HALZ Homeobox associated leucine zipper SMART anon Alignment kindly provided by SMART & Pfam-B_1492 (Release 7.5) Family \N 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.13 0.72 -4.24 69 597 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 81 0 280 618 0 43.70 38 16.66 CHANGED KQLE+Da-hLKpsa-sLppc.-sLppEppc.L+uplhsLppphttp ....KQhEhDh-hLKcsa.-sLpp-sc.......pLp+E.......hpc.L+A.hht.p.h....t.......... 0 43 169 230 +462 PF00672 HAMP DUF5; HAMP domain Bateman A anon Pfam-B_113 (release 2.1) Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.25 0.72 -3.79 222 60876 2009-09-13 11:30:41 2003-04-07 12:59:11 20 935 4667 65 16414 49005 5371 70.40 19 12.75 CHANGED hh.hhhhhslllshhhshhlspplhpPlppltpssppl.u.p...........G...cl...s.............s.h......ttsElupLupuhNpMtppLpp ...........................................h..hhhhhs.l.l.l.s..h.....h....h..s...h....h....l....s.....p........p.........l.......h....p........P.....l....p....p....l.t....ps....s....p....p.l..u..p..........................G............cl.......s..........................................................ppl.......shp...sp..c..E...l.up...Lu....pu...h....Np.MhppLp.t....................................................................... 0 5121 10085 13473 +463 PF04849 HAP1_N HAP1 N-terminal conserved region Mifsud W anon Pfam-B_4571 (release 7.6) Family This family represents an N-terminal conserved region found in several huntingtin-associated protein 1 (HAP1) homologues. HAP1 binds to huntingtin in a polyglutamine repeat-length-dependent manner. However, its possible role in the pathogenesis of Huntington's disease is unclear [1-3]. This family also includes a similar N-terminal conserved region from hypothetical protein products of ALS2CR3 genes found in the human juvenile amyotrophic lateral sclerosis critical region 2q33-2q34 [4]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.93 0.70 -5.48 8 271 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 86 0 153 239 0 249.20 44 35.16 CHANGED hppp...............lssaulRADsltsa-.............ps-Wt.osthusstp.slos.phppsL+ah..hLCssRssQMTK.TasDlDulhpLLEE................KERDLELAARIGQSLLKpN+sLp......................EcN-tLEEpL..............................ppsh-plsQLRHELshKDELLQhYos..ssEEuEsESssossh..........................+.pcupstspph.pL-sLQcKLKpLEEENppLRpEAs+L+pETh..ThE-KEQpLlpDCV+pLc-ustQluuLoEELupKoE-hsRQQEEIo+LluQIVDLQ+KsKpaulEsEELpppLsuuK-uQcQLpAE.......LpELp-KYtEChtMLtEsQEElKsLRs+s .........................................h........................................................................p....tt.h..h....lLsspRVtQMTK.T.YsDI-sVT+LLEE........................KERDLELAARIGQuLLc+NpsL.p..........................................................EpN-hLEEpl........................................................................................tphh-plsQLpHELshK-ELLphhopss.-..-u-s-usssp.h..........................................h..t...s....pthhpL-.LQcKL+pLEE...ENh.LRs.EAspLcp-T...shEEc.EQpL.ltD.C..VcpL...p..-uN.QhsploEELutKs--hhRQQEEIopLluQll-LQp+h+ths..hE....pEELhtpLtus.K-sQ.pp.L..shE................................LpELp-+.tEshthL+EuQEElKpLRp+.......................................................... 0 29 43 97 +464 PF02184 HAT HAT (Half-A-TPR) repeat SMART anon Alignment kindly provided by SMART Family The HAT (Half A TPR) repeat is found in several RNA processing proteins [1]. 21.00 21.00 21.20 21.00 20.60 20.80 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.16 0.72 -7.40 0.72 -4.20 3 422 2012-10-11 20:00:58 2003-04-07 12:59:11 11 23 237 0 304 452 7 32.20 46 6.81 CHANGED KEIDRARuIYERFVaVH.P-VpNWIKaARFEEc .......Eh-RARsIYE.....R......a....l..h....V.H.Pc.lcsWIKYA+FE...... 0 108 161 234 +465 PF02518 HATPase_c Histidine kinase-, DNA gyrase B-, and HSP90-like ATPase SMART, Griffiths-Jones SR anon SMART Domain This family represents the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.30 0.72 -4.35 659 129386 2012-10-11 19:05:54 2003-04-07 12:59:11 21 5116 7141 397 36085 119054 19749 113.50 24 18.95 CHANGED sstttLtplltsllsNAlchs.tt................splplplptpt.................................................................tphplpV...pD..................s..Gh...Gls..............................p.htplhp.hhphp......................t.tthsG..pGlGLslspp.lsct....hs.Gp.......lphpsp.......................................spGop.hpl.....plsht ................................................................................................................................h...ttlpplltNL...l..s...NA...l....c....astst..............................................spl..p..l...p....h....p...p..p.t............................................................................................................................................................................tp..l..t.l..p......V....p.D...........................s...Gh....G.l..st.....................................................................................................c.t.h..t...p...l....F...c......a.h...pspt.........................................th...s.G.........sG....l....G......L.u.....l....s.....c..p....l.s..c.t.........h...s.....G.p..........l.p.l...p.u.p.........................................................................sp.G.op...hplplP..t........................................................................................................... 0 12473 24121 30935 +466 PF00955 HCO3_cotransp Anion_Exchanger; HCO3- transporter family Croning MDR, Finn RD, Bateman A anon Pfam-B_1004 (release 3.0) Family This family contains Band 3 anion exchange proteins that exchange CL-/HCO3- such as Swiss:P48751. This family also includes cotransporters of Na+/HCO3- such as Swiss:O15153. 19.70 19.70 20.00 20.10 17.80 19.20 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.61 0.70 -5.84 18 2233 2012-10-03 01:44:59 2003-04-07 12:59:11 16 22 295 7 1138 1987 18 300.50 27 52.48 CHANGED sLpRTGRlFGGLIpDlKR+hPaYlSDhpDALssQCLAAllFIYFAsLSPAITFGGLLG-tTcshhGV.E.llSTAlpGllFsLluuQPLlIlG.TGPlLVFEchhFsFCcspsl-YLshRlWIGhWlshlsLllVAhEuShLVRaITRFTpEhFusLISlIFIYEohtKLlclacsaPlptsa.............................................t.hsht.hshtps..hsuhhhsssssh..tstPNsALhSllLhhGTFhlAhhL+cFKNS+FFPuplR+lIuDFuVsIuIllMlhlDahls.shTpKLpVPsshpsTsss.RGWaIsPh...ss.PhWhhhAuslPALLlhILIFM-pQITslIVs+KE+KLpKGuGaHLDLLllulhhulCulhGLPWhsAATVhSloHlNuLpl.ScssAPGEpPchhtV+EQRVTGllVhlLlGLSllhsslL+hIPMsVLFGlFLYMGVoSLsGIQl................a-RlhLlhMPsKapPDhhYlR+VpshRhHLFThI ...........................................................................................................................................................................................................................................................................................................................................t...h...................s................................h...............................h..h...h......h................a..h.hhh.sh.t.....h.h..h.p....tc.hs.h.sh.hh..s........................................................................................................................................................................................................................................hh.....................h...h.ths...................................................................................................................................................................................................................................................................................h.hlhhh-...........t.ls........hh.t...t.........h.......ps..........sh.th..sh.hhhs............h.shhu.hP.......................................t..............................................................................................................................................................................h...............EpRhs.....hh...........h..h.........................h..............l...t..h....P.s..........hh.GhFhhhu.....h.s.....h........................h........h......................................................................................................................... 0 277 517 835 +467 PF01966 HD HD domain Enright A, Ouzounis C, Bateman A anon Enright A Family HD domains are metal dependent phosphohydrolases. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.83 0.71 -4.10 154 24448 2012-10-01 20:28:14 2003-04-07 12:59:11 17 317 4881 70 6755 26042 7473 120.90 18 28.53 CHANGED hhpHslpVstlsttlutphs..........................p.thhhhuuLLHDlu+sh..p............................Hshhutphlpphtt...................lhphltpHppth........................................................hshtstllphADtlpsh .................................................................................hpHolp..V..h.p.h.u..t.t...l.u.p.phs.......................................................................s.h...p.l...l...t...h..A...u....L...lHDl.G..+.....s....h....hs.c...................................................................h..t...H.s...h..u...t....p....h...h..p.p.h.t..t..h.............................l...h.....p..hl..t..t.H.p.tth.t....................................................................................................h..........t.h....l.lthsD.ht..h............................................................................................................................................................................ 0 2723 4803 5965 +468 PF00271 Helicase_C helicase_C; Helicase conserved C-terminal domain Sonnhammer ELL anon Published_alignment Family The Prosite family is restricted to DEAD/H helicases, whereas this domain family is found in a wide variety of helicases and helicase related proteins. It may be that this is not an autonomously folding unit, but an integral part of the helicase. 20.90 11.60 20.90 11.60 20.80 11.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.48 0.72 -4.12 480 89228 2012-10-05 12:31:07 2003-04-07 12:59:11 26 1286 6190 172 32236 78491 16395 81.30 23 8.77 CHANGED phhpttshpshhlaG...................shspppRpphlppFpsspt...................................plLluTslsupGlclsslshVl.h-h....................shs.pp...........hhQphGRsuRhu ................................................................................................................................t......th..t.h.h.h..l..cu..................................................p.h..s..p...p....p......R....p.......p.......h...h....p...p....F....p....p..s.ph...................................................................................................................pl.L..l..u.......T....s...........l....h.....s....p...........G.....l.........c..l.........s.........s..........l.......s........h.........V..l.......h..ch.....................................................................sh.s..hps.....................h.h.Q..p.......h.....G...RsGRs.......................................................... 0 11050 19238 26835 +469 PF02602 HEM4 Uroporphyrinogen-III synthase HemD Bashton M, Bateman A anon COG1587 Family This family consists of uroporphyrinogen-III synthase HemD EC:4.2.1.75 also known as Hydroxymethylbilane hydrolyase (cyclizing) from eukaryotes, bacteria and archaea. This enzyme catalyses the reaction: Hydroxymethylbilane <=> uroporphyrinogen-III + H(2)O. Some members of this family are multi-functional proteins possessing other enzyme activities related to porphyrin biosynthesis, such as Swiss:Q59294 with Pfam:PF00590, however the aligned region corresponds with the uroporphyrinogen-III synthase EC:4.2.1.75 activity only. Uroporphyrinogen-III synthase is the fourth enzyme in the heme pathway [2]. Mutant forms of the Uroporphyrinogen-III synthase gene cause congenital erythropoietic porphyria in humans a recessive inborn error of metabolism also known as Gunther disease [1]. 27.50 27.50 27.80 27.50 27.20 27.40 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.59 0.70 -5.12 137 4602 2009-01-15 18:05:59 2003-04-07 12:59:11 10 22 3824 15 1293 3545 2287 221.40 21 70.07 CHANGED sphspt...l..pphG.hpslhhPhlc.hts..........t.ltthh.ptht...phchllhTSssuVch.hhp.h...........................t..hphht....shplh.uVGppTupslcp...h.G......hps.h.....hss.phsu.csL..hp.hltt..........thts.pp...lLh.....hpuphs..pstlh..ptL.p.....ptG..hplpplh...sYcs......t......................httphhphlpp..tph..........cslshsSspsscp.hhphhtt...........hhpshplsu.IGssTucshp.chGhps.....hhu..cpsshcul..lp ...............................................t..h.thl.pt...h.G..hts...hth...Phlc.hps.......................t..t.l.p.ttl.....t.....t.......l.............................phchllhoSts...AVchhhphhp............................................................t...th.ph..t........sh.p.h.h..ul.GpsT.Ap..t.L..cp.........h..G.........hps.th......................hPp....p.tsu..-sL....lp..tltt................................phss...p+....................lLl...................hcustu............pph.ls.......psL.p.....................ptG.....h.p.Vpp.h....sYpp........t...............................htpht.p.t.hpp.....tph......................................ssl.shoSupslps....h.hphhsp.................................hhppspl..hs..lG.psActh....p...p....hG......h..p.s.........h.h..s..pp.s.tulh.t................................................................................................... 0 418 849 1111 +470 PF00173 Cyt-b5 heme_1; Cytochrome b5-like Heme/Steroid binding domain Bateman A anon Bateman A Domain This family includes heme binding domains from a diverse range of proteins. This family also includes proteins that bind to steroids. The family includes progesterone receptors such as Swiss:O00264 [1,2]. Many members of this subfamily are membrane anchored by an N-terminal transmembrane alpha helix. This family also includes a domain in some chitin synthases. There is no known ligand for this domain in the chitin synthases. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.61 0.72 -4.18 210 5879 2009-01-15 18:05:59 2003-04-07 12:59:11 23 159 836 98 3763 5749 252 82.90 25 17.81 CHANGED haohpElpcHs...........spcssWlllcG.+VYDlTp.FlccHPGG..............tcslhttAGpDsT-tF...................................................ssh...Hu..spA+phLcp...ahlGplt ............................hohpEltp+s............pppsh.al.s.l....p..G.......p......VYDlTp...ah..p....p.....HPGG.......................................tphl..h..p...h..A.......G...c...Du.Tc...t.F..........................................................................................................t.th.....Hs.......ph.h...p.p..hhpp.........hhlGpl.h................................................................................................................................................... 0 1318 2157 3127 +471 PF01814 Hemerythrin Hemerythrin HHE cation binding domain Bateman A, Yeats C anon Yeats C Domain Iteration of the HHE family ([2]) found it to be related to Hemerythrin. It also demonstrated that what has been described as a single domain ([1]) in fact consists of two cation binding domains. Members of this family occur all across nature and are involved in a variety of processes. For instance, in Nereis diversicolor Swiss:P80255 binds Cadmium so as to protect the organism from toxicity ([3]). However Hemerythrin is classically described as Oxygen-binding through two attached Fe2+ ions. And the bacterial Swiss:Q7WX96 is a regulator of response to NO, which suggests yet another set-up for its metal ligands ([4]). In Staphylococcus aureus P72360 has been noted to be important when the organism switches to living in environments with low oxygen concentrations ([4]); perhaps this protein acts as an oxygen store or scavenger. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.79 0.71 -4.00 189 6352 2009-01-15 18:05:59 2003-04-07 12:59:11 18 141 3149 37 2104 4919 320 127.90 17 51.44 CHANGED shhphlpppHcpltphht.....plpptssph.....................htthtphhpplhp....hptHhptEEph......l.....aPhhp..................thtt.h.pthpp..-H.....cphtph.lpplpphhppht............................tthhphhpthhphltpHlppE-..phlhshh ...........................................................................................................phlpppH.p.tl.t.c.hhs...........pltphhsph.......................................hptl.t.p.h.h.p...t...lhp...........h.p....t.H.h.p.p.EEph......h......aPhlp............................h..tt...h...pthpp......cH...................................cphtph.....lp.pl..p.phhp.shs..............................................p.th.hp.t..h....p....t.h...h..p.h.l.h.pHlthE-phhh.......................................................... 0 719 1432 1824 +472 PF00353 HemolysinCabind hemolysinCabind; Hemolysin-type calcium-binding repeat (2 copies) Finn RD, Bateman A anon Prosite Repeat \N 20.30 5.80 20.30 5.80 20.20 5.70 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.88 0.74 -7.19 0.74 -3.06 305 35811 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1002 944 173 11803 38527 14276 17.60 43 19.13 CHANGED hGusGsDplhGusGsDhl ..............GusGsDt.LhGGsGsDhl.......... 0 2356 7679 9410 +473 PF00132 Hexapep hexapep; Bacterial transferase hexapeptide (six repeats) Sonnhammer ELL anon Prosite Repeat \N 20.60 16.00 20.60 16.00 20.50 15.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.13 0.73 -8.23 0.73 -4.23 216 70883 2012-10-02 11:29:45 2003-04-07 12:59:11 19 311 5065 687 17837 54606 32792 35.10 23 25.26 CHANGED sshIGpsshlsssshlhssspIGcsshlusssslst .....................shlGpsshl..ss..s..s..h.....l....h.....s...s....s.....p....I.....Gcs...s.hlusssslt..................... 2 5663 11374 14981 +474 PF03129 HGTP_anticodon Anticodon binding domain Bateman A anon [1] Domain This domain is found in histidyl, glycyl, threonyl and prolyl tRNA synthetases [1] it is probably the anticodon binding domain [2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.87 0.72 -4.04 116 17881 2012-10-02 17:25:11 2003-04-07 12:59:11 15 90 4984 142 5154 13645 6909 92.50 22 17.01 CHANGED pVhllslsp...........thh.....phstcltppLppsG.............lpsphD..pspplG..t+hccs-hhGlPahlllGpc-l..p.........p..splsl+pR....sst.cp.....h..plshsclhph.lpph ..................................................plhllslsp..............pht......phA.pc...l.....h...p.p.L...p.p..t..G...............................lc.l...p...h....D.........c...s......c....ph..G.....h+..h+.c.s-.h.h.shP..at..l....l.lGcc-l..p....................................s..spVsl+..pp..................pst...-p..................t....pl.sl..s.c..lhphl...t................................................. 0 1742 3245 4321 +475 PF03578 HGWP HGWP repeat Bateman A anon Pfam-B_220 (release 7.0) Repeat This short (30 amino acids) repeat is found in a number of plant proteins. It contains a conserved HGWP motif, hence its name. The function of these proteins is unknown. 20.40 20.40 20.40 20.60 19.40 20.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -7.27 0.72 -4.63 91 1454 2009-01-15 18:05:59 2003-04-07 12:59:11 10 26 5 0 1162 1453 0 27.60 65 31.14 CHANGED AhsA.ADWChRLHGW.lhPPLL....GV..as.FTs ..........A.sA.ADWChRLHGWPIM.PPLL....GlYs.FTs............... 1 0 0 1 +476 PF00633 HHH Helix-hairpin-helix motif Bateman A anon Prosite Motif The helix-hairpin-helix DNA-binding motif is found to be duplicated in the central domain of RuvA [2]. The HhH domain of DisA, a bacterial checkpoint control protein, is a DNA-binding domain [5]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.31 0.73 -7.19 0.73 -3.96 104 8740 2012-10-03 02:11:09 2003-04-07 12:59:11 18 51 4519 41 1840 8454 3232 29.70 42 10.45 CHANGED phpshhssoh--LtslsGlGttpApslhph ...........t.asGplPpsh-pLhsLPGVGcpTAsslhs....... 0 624 1195 1556 +477 PF01079 Hint Hint module Finn RD, Bateman A anon Pfam-B_766 (release 3.0) Family This is an alignment of the Hint module in the Hedgehog proteins. It does not include any Inteins which also possess the Hint module. 29.70 29.70 29.70 29.80 29.50 29.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.45 0.70 -5.14 46 484 2012-10-03 10:25:13 2003-04-07 12:59:11 15 23 195 12 241 449 25 193.60 31 44.69 CHANGED uEs.......SsA.A+oGGCFPGsupVplpsGtpK.lp-LpsGD+VLA...sD.ssGpllaS-VlhFlDR-s......pppcpFhsIcT-ss.cplsLTssHLlFVucsp................ssss........................pssFAucVpsGphlhl........psssptlpsscVhclst.ppppGsYAPLTspGTlVV-sVlASCYAllcscsLAHhuFuPl.Rlhpt.........................ltshhhs..................................st.pppG.......lHWYuplLYpluphlLs .........................................................pts.GC..FP...usup.VplpsG.tp+hlc-L..psGD+VLu...hs...tpG..p.h.h..aScll...h.Fl.DR-.s...............p...t..p.ptFhsl..c...T......c...........s..s....cpLtLTs.sHLlasspps.......................sts.........................................pshFAsclpsGpplhs.......................sssttlhsspVtp...........l....sh.....t...pt....hG.hYAPLTtpG.ollVssV.lASCaA......hlp.......ppthuphsahPh.Rhh..t.........................h............................................................................................................................................... 0 86 134 190 +478 PF01634 HisG ATP phosphoribosyltransferase Bateman A anon Pfam-B_1142 (release 4.1) Family \N 25.50 25.50 26.00 25.50 25.10 25.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.92 0.71 -4.66 18 3683 2012-10-03 15:33:52 2003-04-07 12:59:11 13 11 3572 24 1021 2521 1995 161.50 36 61.55 CHANGED l+stDIPtaVtpGtsDLGIsGhDllpEpp.....ts....VppLh-LsFGpC+LslA.lPps.t..h..pssc-ls..........thRIATpaspLsccYhccps.......................hssclhhlsGSlElu..PtlGlADuIlDLVpoGpTL+tNGL+.I..-slhc.SoAtLIss+ssh..cppcs..hlppllsRlpuVl ...................lRssDlPsaVtpGssDlGIsGcDlLhEpt.............ts....................hhp..lh..cL.....sF.....G.....t.....C.......+.....h.....sl.A.sPps......ts...h......ps..tslt............ttRIATpYPpls+cah.s.p.cG.......................ls.s.c.llp.LsGS.VElA.............P.h...h...GLADuIsDlV...............sT..GsTLcANGLc.l..-sIhc.SpApLIss...cssh.......pp..cpp...hlcpll.p+lpsh.............................. 0 333 672 875 +479 PF00512 HisKA signal; His Kinase A (phospho-acceptor) domain SMART anon SMART Domain Dimerisation and phospho-acceptor domain of histidine kinases. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.08 0.72 -3.98 265 79834 2012-10-11 19:05:54 2003-04-07 12:59:11 20 3878 5148 22 22919 67684 10793 67.20 24 10.76 CHANGED tppphlusluHElRoPLsulhshschLpp.................tphstpp..................pphlptltppspc.ltpllsclLphu+hcss ......................................hpphlusloHE......LRTP....L...s...s....l....h....u...h....s..c....hltp........................................tthsspp..........................................pch...l...p...t...l.t...p...p...s...p...c..lt.p.l.lsclLshu+hpt...................................................................................... 0 7780 15044 19599 +480 PF00850 Hist_deacetyl Histone deacetylase domain Bateman A anon Pfam-B_343 (release 3.0) Domain Histones can be reversibly acetylated on several lysine residues. Regulation of transcription is caused in part by this mechanism. Histone deacetylases catalyse the removal of the acetyl group. Histone deacetylases are related to other proteins [1]. 20.40 20.40 20.60 20.70 20.30 20.30 hmmbuild -o /dev/null --hand HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.46 0.70 -5.13 148 5457 2012-10-01 22:40:15 2003-04-07 12:59:11 14 64 2207 138 2589 5145 3452 289.40 27 65.04 CHANGED H....................st......s.HP.Ess.....pRlpt....lhptLtp......s.s.........lh.th......................................................................sp.ss.pppl.t....tlH............................................................stpY.lppl......................................................pptt.pt..tt..................................................st.s.h................sstshpsu.....hhusGuslpAsc.tlh.....ps..........................................................t...cpu.....asls......RPPGHHA.p............................................t................................................................................................................sp............ut.....GFClaNslAlAAphh....................hp...hs.hp...RVhIlDhDVHHGsGTpchFh...............................................s................c.................spVhhhSlHp....s.....t..aP.....oGhh.pch.G..................................................................ttutu...tslNl.PLss........s..........ssDpsahphh...........pp.hlh.sh...hpp...............................F.pP..-llllsAGhDuttsD............sluph.slo.................spuatph...schlhphuptht...l.lhllEGGYsh.pslupsstthlpslh .............................................................................................................................................................................................................hhh..............t.HP..pstRlp.........hhp..t..Lht...........t..s...lhtph....................................................................htsp.As...tpcl....t...hh..H............................................................s.tc.Ylphl................................................................................................................................pphp....tt.tt...........................................................................stss.h.........thhc.t.s........thusG.usl.tus..c.tlh........p.s........................................................................................................................................................................................................................psu.....h.uls..tsshHHA..p..........................................................t............................................................................................................................................................................sp..............us............G.F.Chh...N.s.lulAhphh....................................................hp..........t..hp....RVhll.Dh.............D...lH...HGs..GspphFh........................................................................................................s......................................c.................scVhhhSh...Hp................................................tthaP........ssG.th.p-h..G......................................................................................t.u.s...............hslNl..P.Lss.....G.....................hsD...t.sa.h..p.hh..........................cphl....l....hp..t....................................................F...pP...-hllls..........sGh.....Du.htsD................................Lu.p...h..s..lo..............................................p.s...as.p.h........sphl.hphu.........................................llhl...ht..GGYsh....pslspshth.ht...h.................................................................................................. 0 894 1512 2100 +481 PF00010 HLH Helix-loop-helix DNA-binding domain Eddy SR anon Unknown Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.60 0.72 -4.27 164 13830 2009-01-15 18:05:59 2003-04-07 12:59:11 21 125 1653 43 7025 12625 9 53.50 29 14.63 CHANGED pRpttsthE+pRppplNpsh.ppL+phlPp.s......................................tspK.lsKsplLchAlcYIppLp .................................+.t+s.h...hE.R.......p..R.R......p.c........lNpt.......h.p.....p.L.+pl..lPp..s......................................................................................................................spK..h.....s....K..sslLct.AlpYlppL......................................................................... 0 1550 2941 4943 +482 PF00403 HMA Heavy-metal-associated domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.90 0.72 -3.82 73 19664 2009-01-15 18:05:59 2003-04-07 12:59:11 21 116 4700 120 6028 15739 1631 61.20 26 16.34 CHANGED phtl.shsCssCspplppslppl.sGV...pslplshts.pplslph....sss............pp..lhptlp........p...huap .................hplpGM.oC.u..u.Csppl-c.sLp.p........l...s....G..V..............pp.s..p...V....s..l...s...s...p..p.....s..pV.p..h..............sssh...........................pp.....lhpslp..........p....hGa.......................................................... 1 1678 3706 5056 +483 PF00368 HMG-CoA_red HMG-CoA_red1; Hydroxymethylglutaryl-coenzyme A reductase Finn RD anon Prosite Family The HMG-CoA reductases catalyse the conversion of HMG-CoA to mevalonate, which is the rate-limiting step in the synthesis of isoprenoids like cholesterol. Probably because of the critical role of this enzyme in cholesterol homeostasis, mammalian HMG-CoA reductase is heavily regulated at the transcriptional, translational, and post-translational levels [2]. 17.10 17.10 17.80 17.30 16.70 16.40 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.14 0.70 -5.80 116 2199 2009-01-15 18:05:59 2003-04-07 12:59:11 13 21 1868 100 595 1829 689 348.70 33 68.69 CHANGED pculphRRphlp...phsstp..........................................lppls.tshDhp.....................................shspshENhIGhlplPlGlAGP.lhlsGct........ahlPhATTEGuLVASssRGs+slstuG.Gspstlhp-uMoRuPshth.....sltcAtphtpWlp..cshpplcphup.....sTopau+Lpclcs.hl.......sGphlalRFthsTGDAMGhNMlopusEtshphl....pcph..ss....hphlulSGNhssDKKsuslNhlpGRGKpVsAEshlst-llpp.hL...+ssscslsc.lsht+NhlGSAhuGuh...GaNAHhANhlsAlalATGQDsAplsESSpshThh-sp.ss...............-LhhSlThPoltVGTVGGGTtL.ssQptsLclLGlp.ss......sAppLAc.....IlAAsVLAGELSLhuA......Luuscls+uHhpLs..R ..........................................................p......Rhthlt........t..t.p..........................................hp.h.......pths..p.....................................lhs.phhENslGhhtlPlGls.ss.lh..ls.Gpp.........................YhV.PhsT.pEsul...VAusspGuKhlp.t...........u..G....G.hps...............hlhp.c.tM.htps...............slt.....slsc....s.t.p.h.p.thlp......pphpp..ltp..h.ss................hspRtGthpclps..ph................................ts.p..h..l.h..lcht..scTtDAMGtNMlsphhEslhsh.l....pp...h..ts.............tl..huI.uN.aso-....................phVssp.stI.shchl.pc.hh......c.s...scpls..c.lsh.spphstss.htA..h...saNtthhNslsAlhlATGpDhtsl.tu.upuh.st.h.-uphpu.............................tpLhsslolP.l..luTVGGuotl..PhtptsL.clLGst..............sAcpLAp.....lluussLAtphuhhpA......Lsu...stltpuHMpLp................................................... 1 177 339 495 +484 PF02301 HORMA MAD2; HORMA domain Bateman A, Mian N anon [2] Domain The HORMA (for Hop1p, Rev7p and MAD2) domain has been suggested to recognise chromatin states that result from DNA adducts, double stranded breaks or non-attachment to the spindle and acts as an adaptor that recruits other proteins. MAD2 is a spindle checkpoint protein which prevents progression of the cell cycle upon detection of a defect in mitotic spindle integrity. 20.70 20.70 20.90 20.80 20.50 20.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.31 0.70 -4.85 95 975 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 319 45 638 916 11 183.40 20 60.33 CHANGED o.ptStpllpchlphul..............ss...........IhYhRG..........laPscsF..pphchh...............................................s......sspss...plhp.alpp...tlh-hLppp..........hlpplslsIh................ttc......slEpapFslph.............sp................................................................hstpphppplps..........llRpls.shsphL..s.L................ct...shshclhhas...........................s...........-...hpsPh.chsps.................................p...hhss..pphpltshsTshH ............................................................tshthlpchhphul..............ps...........ILY.Ru.................lYPt...psF..pph..ph.h..........................................................................................................hspssplhp..alpp.......tltch.lppt.....................hlppl.slsIh.....................ptp...................slEpapFchph..............tt......................................t...........................................................................t.t.hs..p..plppplps...........ll+p..ls.sts.p.h.L..s.L......................ct.......h..shplhhas............................................s...........s.hpssh...p.tpp.............................hphtthts....................................................................................................................................................................... 1 196 321 493 +485 PF00104 Hormone_recep hormone_rec; Ligand-binding domain of nuclear hormone receptor Sonnhammer ELL, Griffiths-Jones SR, Bateman A anon Prosite Domain This all helical domain is involved in binding the hormone in these receptors. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.13 0.71 -5.20 86 7507 2009-11-03 19:52:29 2003-04-07 12:59:11 25 75 533 1034 3461 7141 2 187.90 19 41.59 CHANGED phhp.ptpppph....phhpthshpphh.phhppp...hhtsscahpp.hstFp.pL.shp-ph.tlLcshahhhhhlphhtpss...........htppphhhssstth.hhphpph......................th..c.lp...........hhpphhp.phtpL.ph....sphEhshhhshlhaph.....stpphp.......chpc..h.scphpptlsssLccYh............pthphsp....................stRlscl..lpllstlp............ph...............hphhpp..................hplscla ....................................................................................................hh.....................h.tphs..phh.sp.t...lht.hlcaAKp.lP.s....Fp...pL...sh..........pD...............Q............l...tL....L.c...s..sh.h.El....h.hl....t.hshp.h...............................................tpst.lh.h...s.s.s.hh...h.s...p.p.phpph.............................................h.hhp.hhp..........................................................................hhh.ph..s....p...php.pL..pl......sppEhs....h.l.p.AllLhss........................s.t..slp...............................pspp.........lcp...lp.cp..h.hps.Lppah....................ptt.t...t..p................................................................t+h..s..cL..lhhls...tl+........ph..........s..h.t...................h...................................................................................... 0 821 1092 2529 +486 PF03241 HpaB 4-hydroxyphenylacetate 3-hydroxylase C terminal Bateman A anon Pfam-B_3148 (release 6.5) Family HpaB Swiss:Q57160 encodes part of the 4-hydroxyphenylacetate 3-hydroxylase from Escherichia coli [2]. HpaB is part of a heterodimeric enzyme that also requires HpaC. The enzyme is NADH-dependent and uses FAD as the redox chromophore. This family also includes PvcC Swiss:O30372 may play a role in one of the proposed hydroxylation steps of pyoverdine chromophore biosynthesis [1]. 25.00 25.00 59.60 31.30 22.50 20.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.80 0.71 -5.48 87 994 2012-10-01 23:33:27 2003-04-07 12:59:11 8 3 749 14 219 738 251 197.20 33 41.46 CHANGED chssthh.ps.uhhphhsapu.sRhul+h-hlhGhutths-ssGs..schptVpp+luEllthtEhhhuhshAsttpuptt...sGshhPsh.hhpss+hhhsphhs+lhcllpcluuuullh..lPSps.Dhpss........-ltshl-+Ylpus.shsuc-Rl+lh+LhhDhssopauuRpphaEhahuGss.th+.htlhpths..hpsttchscphhsh ..................t.hspthh.cs.uhh+hhshQuhsRhuVKhDFlhGlhtpsh-ssGs..scaptVQscLuEllshpphhaAhu..uhsscup.h....hs...........G..salP-hthhpshRhhssphY.s+Ipcllcchsuuullh..LPSps.Dhp.sP........plsthLsKYl+Gstuhst.cRlKlh+LhWDhhuSpFGuRa.plYEhpauGs.pph+.hph.hpphp...tssphchhhshsp........................ 0 82 138 182 +487 PF02185 HR1 Hr1 repeat SMART anon Alignment kindly provided by SMART Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.42 0.72 -4.23 106 1359 2009-01-15 18:05:59 2003-04-07 12:59:11 11 60 288 5 738 1202 2 69.40 23 12.45 CHANGED ptlpplpcclphEhpl+pGuEshh+h......sssppp...........hppspsplpcopp+lphL+ppLcchptpt.stpsss .............h.hhpplp+clphEhpl+pGAENhhchh......ss-++t......................htpsptplpcSpp+..lphL+tpLpc.hptph...p...t................... 0 142 264 477 +488 PF00570 HRDC HRDC domain Bateman A anon Medline:98060076 Domain The HRDC (Helicase and RNase D C-terminal) domain has a putative role in nucleic acid binding. Mutations in the HRDC domain cause human disease. It is interesting to note that the RecQ helicase in Deinococcus radiodurans has three tandem HRDC domains [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.84 0.72 -4.26 147 6087 2012-10-03 03:05:55 2003-04-07 12:59:11 18 73 3520 22 1677 4778 961 67.30 27 11.64 CHANGED phslhcpLpphRcphA.ccp..s.lsshhlhs-psLhplApt.hPpshpcLtpl...pGluptplc.caGpp..hlphl ......................tlhptLtphRcph.A.ccp.......s..lsshh.....lhsDpsLhphA..c......t..h....Pp....o....hs-lhpl....pGlu.tp.+lc..+aGpt..hltll................................ 0 541 1053 1412 +489 PF03878 YIF1 Hrf1; YIF1 Wood V, Bateman A anon Wood V Family YIF1 (Yip1 interacting factor) is an integral membrane protein that is required for membrane fusion of ER derived vesicles [1]. It also plays a role in the biogenesis of ER derived COPII transport vesicles [2]. 25.00 25.00 27.70 27.00 21.90 24.90 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.54 0.70 -5.22 6 451 2012-10-01 22:34:14 2003-04-07 12:59:11 10 7 302 0 283 437 3 221.80 34 73.12 CHANGED hltsshss.huhtYGpslsspGpEhVcpshs+aluss+L+YYFsVsssYVspKLtLllFPahHps...............Wphphc.......p-s.lPPR.DlNAPDLYIPhMAFlTYlLlAGhtLGlQc+FoPEhLGlpASoALAalhlEllhlpLuLYLlslp..oshsslDLLAasGYKaVGlILusLssllah...uYYllhsassluhuhFllRoL+hslL...usssstshsshs...........tppp++hYhLhhlAAsQ.llhaWLo ......................................h...t.h...huhtaG.pshhspup-hhpppl.s..+al..s.s.....s.....t..L+aYFsVsstYVhpKLtL.llFPa.hHc.s.................................................Wphphp..........................ptt.hsP.R.D..lN.......u........P....DLYIP...................hMuhlTYlLluu....hhhGh.......p.......s............p....Fs.P.EhLuhtsopuls...hhhh.EllhlpluhYll.s..lp......ssh.shlD..LlA.asGYKaV...u.h.lls.l.ls.s.h.l......h......s.....t......hsa.....a.....h..s.hhashhuhshFl.........l.........RoL+hhll..........s.sss.t.t......t....................ttpph+hhhlhhhu.hh.Q.hhhhWL.......................................................................... 0 99 159 232 +490 PF02793 HRM Hormone receptor domain Bateman A anon Bateman A Family This extracellular domain contains four conserved cysteines that probably for disulphide bridges. The domain is found in a variety of hormone receptors. It may be a ligand binding domain. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.80 0.72 -3.96 131 2035 2009-01-15 18:05:59 2003-04-07 12:59:11 17 119 140 37 924 1697 0 64.90 28 8.33 CHANGED s.hCstsaDsh.hCWPpsssGphsshsCPphh...t.pt.........................pu.....pspRpCsts....GtWsph...........sshspCtt. .............................t..hCstpaD.th..hCWP.potsGph.s.thsCPp.hh.......t.......................................................pG......pshRpC.sts......G....Wsth...........s.sshspCt..................................................................... 0 149 213 503 +492 PF01381 HTH_3 Helix-turn-helix Bateman A anon SCOP Domain This large family of DNA binding helix-turn helix proteins includes Cro Swiss:P03036 and CI Swiss:P03034. Within the protein Swiss:Q5F9C2, the full protein fold incorporates a helix-turn-helix motif, but the function of this member is unlikely to be that of a DNA-binding regulator, the function of most other members, so is not necessarily characteristic of the whole family [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.58 0.72 -4.12 175 36941 2012-10-04 14:01:11 2003-04-07 12:59:11 17 294 5111 176 7651 48830 5885 53.20 22 32.31 CHANGED l+phRp..ptshop..pplActhG..lspstlsphEpG....pppsshptlhpluchls.lshshl .............................lpthRp....pp..s..l.oQ........pcL..A.c.t.hG............lo.p..s.s....l....u....p..h....E......pG........pp...p.P...s....h....p..h....l...h...p.l...uphhs.hsht.................................... 0 2481 4888 6400 +493 PF01402 RHH_1 Ribbon-helix-helix protein, copG family Bateman A anon Bateman A Domain The structure of this protein repressor, which is the shortest reported to date and the first isolated from a plasmid, has a homodimeric ribbon-helix-helix arrangement [2]. The helix-turn-helix-like structure is involved in dimerisation and not DNA binding as might have been expected [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.46 0.73 -7.61 0.73 -3.86 49 5913 2012-10-02 18:44:02 2003-04-07 12:59:11 16 35 2497 74 1378 3973 439 38.40 24 37.71 CHANGED plolplscchhcpLcchuccts.hS+Sphl+tAlpphhpp .............lslplsp-hhcpLcphu.......p..p...p...s..ho+.S.cll+chlpphl............. 1 404 897 1169 +494 PF02954 HTH_8 Bacterial regulatory protein, Fis family Griffiths-Jones SR anon Structural domain Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.63 0.72 -4.41 86 19848 2012-10-04 14:01:11 2003-04-07 12:59:11 14 176 2830 54 5558 15626 3713 41.30 31 9.21 CHANGED tlcc..lEcphltpALcpsssshscAAch.LGloRpoLth+l+ch ........................tp.hE+phIppsL..p...p...sp...G...s......h...s...c....A....Ach.LGl.s...RsTLhcKlcch................ 0 1884 3516 4654 +495 PF04005 Hus1 Hus1-like protein Wood V, Bateman A anon Pfam-B_12502 (release 7.3) Family Hus1, Rad1, and Rad9 are three evolutionarily conserved proteins required for checkpoint control in fission yeast. These proteins are known to form a stable complex in vivo [1]. Hus1-Rad1-Rad9 complex may form a PCNA-like ring structure, and could function as a sliding clamp during checkpoint control. 20.60 20.60 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.81 0.70 -5.47 34 342 2012-10-02 11:47:48 2003-04-07 12:59:11 7 8 284 3 231 353 3 286.70 24 90.95 CHANGED M+F+splss...hpphpcllsslupluKhChl+LsssplhFll.t........sssGsQlWsplphss...lF-..pYp.lpSs.sp..spIsLEl.slssLhRAL+oups..............us..s.........................lpl+Ls++............tphPhLslphpssoh.........................................................tppslsp..-lPVclL.sppphpplpEPph.-sDlp...IhLP.sLhpL+sll-+h+pl................................ushlplpAs...........hpGpLp..........................Lslpo-.hlsloopa.psLtsstht.t..............................t.t.tttssspt.sptthsplpVDhKchsphLp.stplssspslhslsccpslhlhhhh.......pcsslpaalPuhs ..........................................M+F+s.l.s.........hphhpphhsslspL....t......K..h.s........h......l+lsspplphhht...............sssusplWs..pl.t..h.pt...............lFp..pap.hp..us..st...................NpIhlEl.shpsLtRAL+oups.......................................up......s...........................................hpl+Ls++..........................tphP.h.Lslshp.h.sh..................................................................tp.pphlsp...-lPVcll..p...pth...p.plp......p.Ph..s.s..p........s...-lp.....l.h.LP....s........Lt........p........l+..sll-+hcpl.........................................us.hl........lpAs...............hpG..pLp........................................Ltl.po-....h....hplsoha.ps.Lts....h........................................................................t..ttt....p..p.....t.tthspl..plch+ch.phLt....stph.p....spslhs....................lhppp...hhhh..hhh..................pss.lphhls...s.................................................................................................................................... 0 77 128 192 +496 PF03810 IBN_N IBN_NT; Importin-beta N-terminal domain Griffiths-Jones SR anon PROSITE Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.70 0.72 -4.06 52 2930 2012-10-11 20:00:58 2003-04-07 12:59:11 14 77 330 59 2037 2826 22 72.40 20 7.63 CHANGED AEptLpphppp..Psah.tlhpll.p..ps.s.psR...hhAulhL+shlpp+..Wp..................plstpp+ptI+spllphlhp ..........................................ApptLp.phpp....p...s......s..hh..hLh......pl..........l............s.......p.............s................s......................t..........s.R.............hh...A...ul.hL+sh..l....p..pp....Wp...................................t.lsppp+t.tI+ptllphlh.p......................................................... 1 655 1084 1648 +497 PF01485 IBR IBR domain Bateman A, Mistry J anon [1] Family The IBR (In Between Ring fingers) domain is often found to occur between pairs of ring fingers (Pfam:PF00097). This domain has also been called the C6HC domain and DRIL (for double RING finger linked) domain [2]. Proteins that contain two Ring fingers and an IBR domain (these proteins are also termed RBR family proteins) are thought to exist in all eukaryotic organisms. RBR family members play roles in protein quality control and can indirectly regulate transcription [3]. Evidence suggests that RBR proteins are often parts of cullin-containing ubiquitin ligase complexes. The ubiquitin ligase Parkin is an RBR family protein whose mutations are involved in forms of familial Parkinson's disease [3][4]. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.68 0.72 -4.10 166 4521 2012-10-06 20:19:25 2003-04-07 12:59:11 16 133 321 4 3180 4384 29 66.80 23 18.68 CHANGED p+...a.ph...................tppph.........p..aCPs.ssCpthlthpptspp..................................................hpCs......Ct.tpaChpCppth....Ht..........sh..sC ..............................................................................................t.ph...........................+....hC.Pp........Cp.tsl..pt..s.s.sssp..............................................................................................................hpCs..t...........Ct..tpF.Ca..h..Ct..t.t.h......Ht..................................................... 0 984 1588 2459 +498 PF01614 IclR Bacterial transcriptional regulator Bashton M, Bateman A anon Pfam-B_755 (release 4.1) Family This family of bacterial transcriptional regulators includes the glycerol operon regulatory protein and acetate operon repressor both of which are members of the iclR family. These proteins have a Helix-Turn-Helix motif at the N-terminus. However this family covers the C-terminal region that may bind to the regulatory substrate (unpublished observation, Bateman A.). 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.35 0.71 -4.45 23 10733 2012-10-02 14:34:25 2003-04-07 12:59:11 13 33 2457 47 2964 8004 908 124.00 23 46.91 CHANGED tphpss.tlGp+lPlauouhGKsLLAah.sccthpphlcp..hphpphTptTlos.psLhcpLsplRpp.GauhssEEpptGltulAAPlast...puplluAlSlossssRhscpphpp...hhshlppsApplopp .............................................................................h....h..tlGp+.hP.h..a.s.o.uhG+slL...Ahh...sp....p.p...h.p.p.h...l.pp..............t.h...p....t...h......o.......t....p..........o.....l...s......s...............t.......t...L....h.....p.......p......L....t......p.......lR.......c....p......Ga.u......h...........s...........p..........p......E..h..p............G.....l.p.....slAs....P.l...h..st..........pG.p...sl...u...Al......olu............u....s...s..t...R..h...sp..p..p.hpp..........hhs.hlhpsAppls..t...................................................... 0 689 1722 2433 +499 PF04760 IF2_N Translation initiation factor IF-2, N-terminal region Kerrison ND, Laursen BS, Studholme DJ anon DOMO:DM04974; Family This conserved feature at the N-terminus of bacterial translation initiation factor IF2 has recently had its structure solved. It shows structural similarity to the tRNA anticodon Stem Contact Fold domains of the methionyl-tRNA and glutaminyl-tRNA synthetases, and a similar fold is also found in the B5 domain of the phenylalanine-tRNA synthetase. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.33 0.72 -4.31 141 7695 2012-10-04 14:01:11 2003-04-07 12:59:11 10 48 4486 1 1683 5556 2986 52.00 26 10.04 CHANGED .splpVp-LAccls.....h.ssp-llcpL.p.h....Glt..tshsssl-p-psphltpphshp .........pplsVtELAcchs.......h.pss-llcpLhp.h........Ghh...sohsps.l-p-.s.hphlspchsh........................... 0 547 1098 1438 +500 PF05198 IF3_N Translation initiation factor IF-3, N-terminal domain Bateman A, Finn RD anon Pfam-B_629 (release 2.1) Domain \N 21.30 21.30 21.40 21.30 21.20 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.11 0.72 -4.05 181 4308 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 4162 1 1055 2633 2165 67.60 52 37.87 CHANGED ppp...plN-cI+.s..pcVRLl.spcGcplG..................llshp-ALc.hAc-tsLDLVclu.........PsAcPPVCKIMDYGKa+Y-ppK....Kp+Eu+ ......................hNpt.Ip.......plRll....s..p.Gcp.lG..................................lhshp-ALp.hAcct.sLDLVclu...........Ps..Ac.P.PVC+IM...D...YGKF..+YEppK....Kp+Et+....................... 1 369 701 898 +501 PF05004 IFRD Interferon-related developmental regulator (IFRD) Moxon SJ anon Pfam-B_4730 (release 7.6) Family Interferon-related developmental regulator (IFRD1) is the human homologue of the rat early response protein PC4 and its murine homologue TIS7 [1]. The exact function of IFRD1 is unknown but it has been shown that PC4 is necessary to muscle differentiation and that it might have a role in signal transduction. This family also contains IFRD2 and its murine equivalent SKMc15 which are highly expressed soon after gastrulation and in the hepatic primordium, suggesting an involvement in early hematopoiesis [2]. 25.00 25.00 25.20 25.60 24.40 24.50 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.98 0.70 -5.34 7 396 2012-10-11 20:00:58 2003-04-07 12:59:11 8 7 219 0 250 334 1 275.60 27 66.11 CHANGED SD--us.pohS....s.s..ssuu.s-.hss.ht-thtppclE-pLcptlDtsp-KS...ApsR.psLctlphhl.s+hh.-Fl.-chhTLh-hlp+sh+KG+u.cEphLusplhuLlslQhGsG.cs-Elhcphts.LpsllpDu.otuhpsRtpCsoslulhsalussDspplht..tshc.lFthsa.h+sssss.Vlsst...slhusuL.AWuLLL......Tlsss.ph..phhppphstLsp......LLpusslslRlAAGEslAllaEhup............sh-p-.........F..hh-...shEpL..........sphLRpLAoDS.sKh+AK+D++tQRssFRDVlchlE .............................................................................................s.......t......t.p...............t...h...ptst...p...pph.p........t...Lpthl-th..h-Kp......sp.sRpsuLpulhph....ht.s+..h.h..........-...hl...c...+hhT.Lhcsht+sl..........K+.Gp......u...c.....E........p..t.h.A.hthhsllslph.s.s.....p.................sc.clhc..p........hts.hL.................pp..hlp.Ds....sss.hp.....s...........ss..hp..sLulssal..us.s..-...p-h....pshc.ha.h.h..hp..tss.p.........h..sst....tsslhsuA.lpuW.ulLl.........................Thh......s.s....plp....p......h....h........cpt........h....t.t..Lss..........lL.p.u.s..-hs....lRlAAGEsl.....ALl..aEhup..........................................s.pp-........................................h..hh.p......shptL....................................hphlppLAs....-......u..sKtt.uKpD++p.Q.RusFRsllphlE...................................................... 0 64 125 193 +502 PF00817 IMS impB/mucB/samB family Bateman A anon Pfam-B_1349 (release 2.1) Family These proteins are involved in UV protection (Swiss). 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.70 0.71 -4.41 113 9024 2009-01-15 18:05:59 2003-04-07 12:59:11 15 52 4250 224 2364 7309 2792 150.20 30 34.04 CHANGED lDhDsFaAosE...thhp.......P..pL..cscPlsVssttt.................................................uhlhsssYtARp.hGl+su.MslhpAtc..l...............C...P.p.l..........lll..................................ssshshYpchSpplh.pllpch.s............lchhS.lDEsal........Dlos..h.phhss...............................................................................................................pplupplccplhcph.t..lssSlGlusschlAKlAoc ...............................................................................................lDhcsFaAulE..........hhpp............P......pL...pspPl..sVuus.t.p...........................................................................................tGllh.sss.h........p.A.+.p..hGl.+.su.hs.h.h.p...Ahc...h....................................s........P...p..l..................................hhl............................................................................................................................sss.h.phYtc..hS.p.plh.plh..pc.a..o......sh........................lEs.h.S..l....DE.ual..............DlTs........shp..hh.s.s.....................................................................................................................................................................................................................................................................tp.l.u.ppl+ppl.h..p.c.h..t.....lssos..G.l.u.s..s..KhLAKlAs................................................................................................................................................................................... 0 760 1436 1969 +503 PF04836 IFRD_C Interferon-related protein conserved region Waterfield DI, Finn RD anon Pfam-B_4453 (release 7.6) Family Family of proteins thought to be involved in regulating gene activity in the proliferative and/or differentiative pathways induced by NGF [1]. 20.30 20.30 21.30 20.30 19.20 20.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.71 0.72 -3.79 13 196 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 99 0 108 166 1 54.50 53 13.18 CHANGED NEhLR-lF-LGPslh...sssh+shKlo+hE+HhhNuAAFKARTpsRuKhRDKRusV ........NEhLR-lFpLGPslh...sssshKshKlSRhE+HLaN......uAAFKARTKuRuKsRDKRuDl.... 1 24 40 71 +504 PF04762 IKI3 IKI3 family Wood V, Bateman A anon Wood V Family Members of this family are components of the elongator multi-subunit component of a novel RNA polymerase II holoenzyme for transcriptional elongation [1]. This region contains WD40 like repeats. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 928 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.64 0.70 -13.77 0.70 -6.74 34 404 2012-10-05 17:30:42 2003-04-07 12:59:11 7 9 257 0 291 526 10 666.90 27 69.81 CHANGED MRNLpslspsphp.tsps.........hslhso..saDsso-slhhshu...ssps.sslElpphtpss........th.thlssa....................sclluhpahsD.......spplshshssGDllhsp.t.............ssspshlEIVGol-sGIpAusWSPDEEllAlsTtp.............psllhM.o+pFEslsEhsLs..s-Dlph..SpaVoVGWGKKETQF+G+t..uKA..................................h+DPThs.pl-pGtlot.Ds.tpssISWRGDGpYFAVool..css...........RRslRVasRE.Gp.L-SsSE.sVDGL...........EpsLuW+PpGsLIAusQ..R.pscp............h-lVFFERNGLRHGEFsL+h..st-Etl.....................hpLpWNuDSslLAl..hh.........p-c.............lQLWThsNYHWYLKQEl.h..t...t.............hpWcsE..+shplhssss..spl.hh-asapsspussh.stD.Gh......ssVIDGpplhlTPhphusVPPPMuhtclphs..........sslh-lAhutsspp............hAslspcs.lhhht..shpth..............................stpPhltsphsh.........t.tst.tph+plsalscs....plhllhsss.........................hsclhlhph.t-spp............hhhpphsphppllhhhspsshpt....hhhpsh.sGplh...........plssptphp.h.............phPphs.phclsthpsst................hhhhGL.ossG+LaAssphlu........sssTShhl..........T.................................psaLL.................aTTspch......................................LpFlcLs..............phpthtls.ssst.......DER....sRpIERGS+LVslhPoc.huVVLQhPRGNLETIaPRhhVLuslRph..lc....pccY+cAFhsCRopRlDlNIlaD.asP-hFhpNlth....FlcQlpclsalsLFlS..sLp-EDVTp..ThY+-sh...................................hp..ttttthhtsspsoKVNplC-AlLpsL........sphhspalpsIlTuasppsPPsLcsuLplltpLpt.......................sstps-pAlcalsFLsDVNpLYcsALGlYDlcLsLhVAQpSQ+...DP+EYLPFLppLpphsphc.R+FpIDcaLp+apKALpaLtphs ....................................................................................................................................................................................................h...........................................................................................lhsh..h.t..........hhhh.ttGplhhh................................tt..h-hhs..htsGl.shtWSsDt-hlhhhTt.t....................tpllhh.s.pta-sl.....-..ht...pt.t..........tt.lshsa...Gpp.pTQFpGp..hp.............................................................ttspl.oWR.GD..utahul.s...h....s...................................................hRhh+la..s.....R......-.....h.........L...pss.....u...E...h.....ssh..........................................t....sl..sW+Pp..G..s..hIAshp...p..h.pp...............plsFaE.+.N.GL.h.Hup.Fs..l.............tp...h......................t.l.Ws..ssSplLAl...h.............tsh...........................................lplWs.h..t.NY.Ha....YLKppl...h.................................hhWc......pshphhhh.....st...............................th............hphhathst..u.....................p.u......................shVlDG...pplhlTs....hp.s.............lPPPhs.hph.h................t.s.h..tlsh..p.........................hshh.tpt.l.hh..............................................................................h............................h..hhh.ttp.....hhhh....t................................h.....................................................t.....h..s....t............hhhp.........tuphh.................................t.........................t..s...h...hth.h.httt......................hhhul.s.ppttha.h.....s..t............ls....................pshoSh.h..........h............................................tall.....hTs.....p.p.................................................................h.hh.lt................................h........t.........................t....................hRt.lERGuhllsshspp.htllLQhsRGNLEslh.RhhVLs.lt.p...lt....th.....atpAh.hhRp.+lshsllh..D.a.tsp...Fht.p..ht......Flpp........l........tp.......p..hls.Fhs...................pL.........pppshst..shY...h...............................................................tpKlshlCpthht.hh.........................tp.h.sllTsasppt.s..t.lp.sLthltpht..........................................................t...t-pu...lpal.hhL..-sptlap.uLuhY-hpLshhlAppuQh.............DP+EYLPaLpp...Lpt...................................h.p.....pa.ID.hLtpatpAltpL.t.............................................................................................................................................. 0 106 165 244 +505 PF00478 IMPDH IMP dehydrogenase / GMP reductase domain Finn RD, Bateman A anon Prosite Domain This family is involved in biosynthesis of guanosine nucleotide. Members of this family contain a TIM barrel structure. In the inosine monophosphate dehydrogenases 2 CBS domains Pfam:PF00571 are inserted in the TIM barrel [2]. This family is a member of the common phosphate binding site TIM barrel family. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null --hand HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.10 0.70 -5.56 84 7826 2012-10-03 05:58:16 2003-04-07 12:59:11 20 15 4695 86 1800 6840 6242 406.30 38 94.26 CHANGED cuLTF-DVLLlPstotlh.scsclshp.....lo....+plpLslPllS.AsMDTVTEt.cMAIuhAptGGlGl.IH+.Nhohc.pQsptVccVK+.h-sshlpsshhhtPptpltps.thhtphthsuhslspttt.hhhlht....h.................................h.ppphlss..shshtps.phh.pp+htcl.lVscpspLhullohpDh.+sppaPpAs+....DppspLhluAAlGststshcRhpt..Lsc..A..Gl..DslllDoAHGaSpthlchl+tlKppaP..plplIAGNVsTs-uscs.LIpAG.ADulKVGlGPGSICTTRhVsGVGhP..QlTAlhcsAcsAcph......G....lPlIADGGIchsGDIsKAlAuGAssVMlGShhAGTpEuPGEhhh.hsG+paKpYRGMGSlsAMpc............GSscRYhpts......tpchVspGlpGtVPaKGslpchlhplhuGL+.uuhsYsGupslp-L+cps.......alRlosAth.EupsH ............................................................................................................................................................................................................................................................................p.uLTF-DVLLlPu.c.......Sphlssps-lsop..........................ls....cs..l.pls.l...P.l.l.o.As.MD......T.V.T....-s..chA...IAhAp..p.G.GlGl.....l......H......+...s......h......o...h...p......pQ.A...p....p....V..ccVKc..csshh.ss.hh..p..h.th..h.tt.thsshsl.tt.....hh....................................................................hhh..s.t.......h.t.ch.ph..hstt..h.shlhhpch.p..phPp.us+.....D.p.pu.+..L..h..V...u..A...A..l.G..s.........s...s........s.....s.........h...-..R....s....c..t.........L.l.c....A....G.....l........D....h.....l.....l.............l............D...........o.....A.......H...........G...H...........S.......p..t...........V.......l.......p.........h...........l.....p.......c......l...+..p...p..a..P...........p......l.p...l...I...A.GN...Vu...Ts.-u.scs....L...h.c.A.............G..A.Du......V.K.V.G.I.G...P....G....S....I.C.T..T..R......l...........l......s.G..........V.....G.s.P....Ql.o.A...lh.-.s...A....p....A.....A..+.t..h...............s........h.PlI..ADGGI+huGD.l..uKA..l...A..u.GAs....s.V.MlG..S....h....h.....AGs.-Eo.P.G.....-h..hh....h...................pG.........+.p...a.....K...p.......Y.hGMGShsA.Mpp................................GutsRYFpts........................tpKh..VsEGlcu.p....Vs.a....+Gslpshl..hp..hhGGLR.S.uhuYsGutslp-Lppps........pFlclss.uthtEut......................................................................................................... 0 567 1087 1490 +506 PF03941 INCENP_ARK-bind Inner centromere protein, ARK binding region Wood V, Griffiths-Jones SR anon Pfam-B_67765 (release 7.2) Family This region of the inner centromere protein has been found to be necessary and sufficient for binding to aurora-related kinase. This interaction has been implicated in the coordination of chromosome segregation with cell division in yeast. 20.60 20.60 20.60 21.00 20.20 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.75 0.72 -4.12 20 272 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 207 13 206 269 2 57.30 33 6.18 CHANGED -IsSDs-s--sst.....hslsuWAcuspL.cptlhcQtth...DssplFGsIsPLclEEIFps ..........................-ltSDsps-D-sp.............p.p..slP..sWAc..uspL.pptlhpQtth.....-spplF.G.s...l.....s..hphE-lFt...................... 0 68 112 169 +507 PF04179 Init_tRNA_PT Initiator tRNA phosphoribosyl transferase Wood V, Finn RD anon Pfam-B_16986 (release 7.3); Family This enzyme (EC:2.4.2.-) modifies exclusively the initiator tRNA in position 64 using 5'-phosphoribosyl-1'-pyrophosphate as the modification donor. As the initiator tRNA participates both in the initiation and elongation of translation, the 2'-O-ribosyl phosphate modification discriminates the initiator tRNAs from the elongator tRNAs [1]. 30.00 30.00 41.80 33.60 28.10 28.90 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.82 0.70 -5.62 20 202 2012-10-02 20:12:17 2003-04-07 12:59:11 7 6 165 0 148 203 4 381.70 35 89.56 CHANGED pL++ssLSlpNRLpSIhpDupFVcc.Vssta.pLPLVANERCGtWYlsPchcusSuYFKSTDGHTuQWsFShRRLNLHLLsllscpsGhlIVDSTRRGKhMPDALSKTlPIWCAVlNpsla.spsp.............................phLhhPPphVusSEcspItp+.lstFVppLppL.tlDlppLpppl......pKPlRPhWh...............sscshhpssh.p.ptpsahsllLCTuS++sp..............suphpc.......sG..YlQGAuDDcEtWAp......GLTPslFW........sspppL..lsp.........uE--LsshlspLlppppppssspsst...lts............................ssslhlGthss...................tspashVlshssp.h.t.......psppppsppLphshtuuKhGS+pLRptLsplpsFlptphus................spplllsC-oG..+DlSsGlhLsllChaaspchp.........................hpt.......t.splsKphl+ppLshlhp.h...plNPSRuTLpSVNuaLM ........................................l++psholhNRLpSI.tDutFlpp.lttha...t.................h......PLluN.RCG.WYhsPpt..h..t..s.o....sYFKSTDGHhspWsFShRRLNlpll.h......h....tpps.Gh.....llVDSTR+.G.KthPDALSKTlPIWssVlNpsl.h.pt...t......................................................p.lhh..Ps.hlstoEcspIpp+.lstastplp.......ph...ths...h.tpLtttl......tKPlRPhWh.....................tsps..h....s...p........t.....tt....sahsllhsoASp..psp...............ss.t...pc.........hu..YlQGAuDDpE.Wuh......GLoPslFW........tptp.L..hss.........scppl.sp.hl..tplltpppttts.ttt......lt..............................................................sppl..lsthts....................................t......hs.h.ll.hstp......................s....pt.thlph.h..s.ssKhupp.....t.Lcp.Lsph.thhtt.htt.................................t.tlllh..C....p..sG..p.Dh..SlushLsllshh...as.p.ph..........................h.t........t...h.s....K....l..+p+Lshlhp.h....pspPSRssLppVpsaL................................................................................................................................................... 0 49 89 127 +508 PF02022 Integrase_Zn Integrase Zinc binding domain Bateman A anon PSI-BLAST 1wjb Domain Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. This domain is the amino-terminal domain zinc binding domain. The central domain is the catalytic domain Pfam:PF00665. The carboxyl terminal domain is a DNA binding domain Pfam:PF00552. 21.00 21.00 21.50 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.85 0.72 -4.51 92 14892 2009-01-15 18:05:59 2003-04-07 12:59:11 14 72 180 53 30 13552 0 39.60 77 6.93 CHANGED ApE-Hc+aHsNh+sLppcFslPthVA+cIVppCspCphpG ........AQEEHEKYHSNWRAMASDFNLPPlVAKEIVASCDKCQLKG..... 0 10 11 12 +509 PF00520 Ion_trans ion_trans; Ion transport protein Finn RD anon Pfam-B_33 (release 1.0) Family This family contains Sodium, Potassium, Calcium ion channels. This family is 6 transmembrane helices in which the last two helices flank a loop which determines ion selectivity. In some sub-families (e.g. Na channels) the domain is repeated four times, whereas in others (e.g. K channels) the protein forms as a tetramer in the membrane. A bacterial structure of the protein is known for the last two helices but is not the Pfam family due to it lacking the first four helices 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.53 0.71 -5.07 285 20399 2012-10-03 11:11:44 2003-04-07 12:59:11 26 434 1652 37 9781 20841 2116 196.60 17 31.06 CHANGED hhshlhsslFshEhll+hhuhsh......................hpYhpssaNhhDhlsllsshlshhht.ht.........................................................hthlchhRhh...R...hl+lhphhps....lptl.....ltslh.pshtslhplhllhhhhhhlaulhuhplatstht.t.................................................................................................................ptpssFcshstuhhhlapshTstuasslhhsh.............................shhshlahhhhhhlsshhllNlhlull ..........................................................................................................................................................................................hphhhhhhFs.h.Eh..h..l.....+.h..hshs..................................................................................ht.ah...p...s.....h....a.s....h..h....D...h..l....h...l...h...h...s....h.l..s.h..h.h.t.ht.............................................................................................................hp.hl.R.h.h.R.hh....R...............hl.....+..l....h..p..h...h....p...t.........................h..p..hl.................l......s..l..h....p......s...............h.......t...s.........l...h.....p.....l..h........l..l.........h....h...h....h...h.h........l...a........u....l..h..u..h...t....l....a...t...t...p...h....t.................................................................................................................................................................................................................................t..t..t....p.F...s...s...h......h....t..uh...........h.h...l.........h.........p.............h...............h.T..s...t....u....a.s..s..l.h..t.............................................................................s.h.h.s.h...l..ah....hsh....hh..l..s.s...h....h..hl.sl.hlul.......................................................................................................................................................................... 2 2792 3895 6681 +510 PF03770 IPK Inositol polyphosphate kinase Finn RD anon Pfam-B_1382 (release 7.0) Family ArgRIII has has been demonstrated to be an inositol polyphosphate kinase [1]. 22.20 22.20 23.50 22.30 22.10 22.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.31 0.71 -4.67 109 1104 2009-01-15 18:05:59 2003-04-07 12:59:11 11 19 318 15 725 1054 13 210.70 27 40.30 CHANGED allLEsLstsappP...CVhDlKhGsRpas.-....As................................pKtpp.pp+sppoTStpLGhRlsGhpla.p......................................ppst..hh.....phsKhaGR.slstp.phpculppFltss............thppttthht.....thlp+Lpplpshlcppct....aRhYuSSLLhlYDuc...................................................................................................................................................................t..spscl+hIDFA+.ssh................hsssshDcGaLhGLcsLlphhp .....................................................................................................................................alhLEsls..t..t.......ap..pP...ClhDlKhGsRpasp-...ss.................................................................................................................................pKhpp..t.+pptooossLGhRlsGhpl...ht................................................................................................tpst.....h.......hhs.K........a.G+..s....h.s.hp..ph.p.p.s.lpp.Fht...................................tp.l..ht.........................hlp+Lpplpp...hlcppp....................achhuSSLL..hlYDup..................................................................................................................................................................................................................................................................................................................................................................................................................................................ttsplphIDFA+sh...................................................h.tts.Dc...GalhGLcsLlplh................................................................................................................................................................................................................................................................................................................................................. 0 250 366 558 +511 PF00612 IQ IQ calmodulin-binding motif Ponting C, Schultz J, Bork P anon SMART Motif Calmodulin-binding motif. 20.30 11.10 20.30 11.10 20.20 11.00 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.00 0.74 -6.85 0.74 -3.79 345 16654 2009-01-15 18:05:59 2003-04-07 12:59:11 22 705 467 44 8271 15224 358 20.70 32 5.40 CHANGED ppuslhlQshhRuahsR+pap .........pAAlhIQuhaRGahsR+ph........... 0 2330 3800 5778 +512 PF01007 IRK Inward rectifier potassium channel Finn RD, Bateman A anon Pfam-B_18 (release 3.0) Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.12 0.70 -5.65 13 1452 2012-10-03 11:11:44 2003-04-07 12:59:11 15 11 294 52 764 1256 32 284.90 37 78.63 CHANGED VpKsGpCNVpasNlptc.ptRYLsDlFTTlVDlKWRapLllFshuFlsSWLhFGhlaWLIAahHGDLpt..................ssstTP......CVtsVpuFsuAFLFSlETQTTIGYGaRslTEECPlAIhlllhQuIlGsIIsuFMlGshhAKhApPKKRAETLhFScpAVIuhRDGKLCLMaRVGsLRKSHlVpAplRupLl+s+pTpEGEhlPLcQhDlsVshDsGs.DplFLVoPlTIsH.IDcsSPLa-luppsL.pppDFEIVVILEGhVEoTuhTsQARoSYlscEILWGHRFpPVloh..EcGhYcVDYSpFcpThEVs....TPtCSA+-LsEpK......Lhpshp..........sh-sph ......................................................................................................htKsGpsNl..h.t..p...h...t...p...................h.a..ltDl..aTThlD...h.p.W.R.a.hh.lFshsahho..W.hh.Fu...hh....aahlA...hh+G..D....l......................................s..h..ps.......C.l...p.l...p..u...FsuAFLFSlETpsTIGY.Gh....R...h.lT.-.cCP.ulhll.l.h..QsllGh.llsuFh............h..GshhsKhupP+...+..RA.p..Tlh.....FScpAV...............I...u...hR......c........G...+.L..CL....MhR..V..u..s......hR.......p..Sh..ll..p..upl..+s.p.L.l...+...s.p.......T.....E.G..E.....h..l....l.pQ.h-.l.s................l..sh.s.......t...u................c.....p..lFLl.P..l.....slh.H..ID.ppSP......lash.......s.t.p.....s.........l.....t.p.p..c..FE...........ll...............VhL-GhsE..uT....u.........h.o.s.QsRoSY.l........sp..E..IhW..............GaRF.s.ll....................c......p..s..hY..p...l.Dasp..Fp..psh.ps.......sP.......h...s.hh.............................t............................................... 1 170 277 492 +513 PF02174 IRS PTB domain (IRS-1 type) Bateman A, Mian N anon IPR002404 Domain \N 21.00 21.00 21.10 21.10 20.60 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.14 0.72 -4.40 32 923 2009-01-15 18:05:59 2003-04-07 12:59:11 12 28 105 45 552 834 1 96.60 28 10.89 CHANGED ptstF.V.lp..........hptsupcssl.pGsh.hLplopcsLhL............hpsppshlpWPhptLRRa........Gp.s..pshFoFEuGR+CsoGtG.asFpspc...upp..laphlppshps..pp ...................................t....a.V...............h.....s.....pt.sh..hG.s.h...hLplTp..c.......slhl.................sptpsc.psh..hpWsLpslRR..a.............Gt..s..sshFohEsG.Rp..sssG...Gh.a.thpssc......upp...I..aphltphhpt...t......................... 0 117 160 311 +514 PF02922 CBM_48 isoamylase_N; Isoamylase_N; Carbohydrate-binding module 48 (Isoamylase N-terminal domain) Griffiths-Jones SR anon Structural domain Domain This domain is found in a range of enzymes that act on branched substrates - isoamylase, pullulanase and branching enzyme. This family also contains the beta subunit of 5' AMP activated kinase. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.28 0.72 -3.94 59 8560 2012-10-02 20:10:03 2003-04-07 12:59:11 13 125 3303 51 2144 6979 701 85.90 27 11.95 CHANGED sLGuph...........sspFplWuPsAppVplhh.hss.....h.tpphshp.....tpsGlWplhls.......sht.......t.hYtaclpsss......................G.....hhhhhDPYA ...........................hGuphpst............GspFsl.WA...P.s...A.p.pV..pls...s.hss..................ht..t.p..p...hs.hp............tpssGlWc.h.hls...........slt.G......................t.hYpYclpsst.......................G..........hhhhhDPaA............................................................................... 0 684 1346 1804 +515 PF00857 Isochorismatase Isochorismatase family Bateman A, Griffiths-Jones SR anon Pfam-B_566 (release 3.0) Family This family are hydrolase enzymes. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.87 0.71 -4.33 64 13319 2009-09-13 17:43:14 2003-04-07 12:59:11 15 81 4229 124 3488 9497 1254 172.10 20 81.06 CHANGED sALl..ll..DhQ.pshh............tstphpphltshpcLlcsu..+pts.....hs....llhspphhp.................................hhh.ss.ssplhsplps......s-hhlp...Kp......phsuF.......hsosLpphLcp...psl..c...plllsGhtTphCV...tsTshsAhpp.Ga.....plhllsDu.....suuhs......spt.pptulppht......hh.uhlhsspp .........................................................................................................................ALl.llDhQ..pshh...............................hsh..ss...t..sp..p....ll..s.s..l....s..pL...h.p..t..s......+.t.t.s..............hs...............llhspphtt............................................................................................................s....h...h..h......s..s.........s..t....p...........l..h.......s...........p...L.......ss.............schl..lp....................Ks................phuuF.......................................tsTs.Lp.p......h.L.+p........p.ul.....c...............pll..lsGltTchCV.................tpTs..h......s......A......h.......ph...Ga...............................pl.hllpD.A.....su.shs................tpt..pphul.t.tht...........thh.s...................................................... 1 1002 1990 2805 +516 PF02373 JmjC jmjC; JmjC domain, hydroxylase Bateman A, Wood V, Mistry J anon [1] Family The JmjC domain belongs to the Cupin superfamily [3]. JmjC-domain proteins may be protein hydroxylases that catalyse a novel histone modification [4]. This is confirmed to be a hydroxylase: the human JmjC protein named Tyw5p unexpectedly acts in the biosynthesis of a hypermodified nucleoside, hydroxy-wybutosine, in tRNA-Phe by catalysing hydroxylation [5]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.21 0.71 -3.84 32 2778 2012-10-10 13:59:34 2003-04-07 12:59:11 17 174 310 95 1692 3393 72 108.20 25 10.42 CHANGED hlYhG.hhophsaHhEspshhS...lNahchsus+hWaslPsppspphc...phhpcp.......h.tp.th.lpphsshhsPph..LhptslsshchsQcsGEhVhs.stsaHsshNhGashu.uhNF ..........................................................................hhhuh.hot..hs.h.Hh-.stt.hh.u.............ls..a....h............h.........h..........s....us......K.......t...W......a...hl.P.s.p...p..t...p...p..hc........chhpph..........................tp..h....lp.hh...hs...h..h....sph.........lhp.h...s..l......s......h.c.............h..hQps...G-hlhh.st..sh.H.t.......s..hNh.G.h.s.hs.uhNF............................................................. 0 478 771 1239 +517 PF02099 Josephin Josephin Mian N, Bateman A anon IPR002950 Family \N 19.90 19.90 20.20 21.00 19.40 19.60 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.19 0.71 -4.60 10 651 2009-01-15 18:05:59 2003-04-07 12:59:11 12 14 168 8 262 634 1 135.20 47 52.38 CHANGED +Q-us..LCAlHCLNsLLQ............................GsaFocs-LusIApcLDppEcsphsp.....sspsstoaht..csSpNhspsG.FSIpVLppALclWsLpllsa..psschpstph...cP-spsuFIhN..........hscHWFsIR+l.........supWaNLNSlLuA.PcaIucpYLusFLcplcupGaSlFlVps.s ........................+Qctp..LCA.HsLNNlLQ..............................................................................................u.p.hFo..pLspIsppL.s.pph.h..............................th....psp.tN.h...ss........G.aslp..........VI.......sALp.s.hu.hchl.a....s.c..h..phhtl................s...p.huFIhN.......................................hccHWhslRcl.........................................s.tpWaN...LsShLsu..P-..hI.u.-..s.h..L..t.hFL...s.p.l.p..p..p.G.h.plFlV..s........................................... 1 94 133 200 +518 PF02214 BTB_2 K_tetra; BTB/POZ domain Bateman A, Eberhardt R anon Pfam-B_27 (Release 5.2) Domain In voltage-gated K+ channels this domain is responsible for subfamily-specific assembly of alpha-subunits into functional tetrameric channels [1]. In KCTD1 (Swiss:Q719H9) this domain functions as a transcriptional repressor [2]. It also mediates homomultimerisation of KCTD1 and interaction of KCTD1 with the transcription factor AP-2-alpha [2-3]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.34 0.72 -3.78 46 3855 2012-10-02 01:20:04 2003-04-07 12:59:11 17 108 285 59 2405 3406 33 92.00 29 21.51 CHANGED lhlNVGGh+FpTppsTLsph.PcohLuphhc....................hp.assspsEaFFDRsPptFcsILsaYRo...GcL+ts.phslptahcElpaatlsph...lcpC ............................................................lhlNVG.Gt.h.apTp.....h.p.T.L......sph...Pc.o..h.L.uph.hptp.....................................................ht.hc..s.p.ps..c.a.Fh..D..............R..c..st...h............FphILsa...hRs.............G.......cL.....h........h.....s.......t..p.....h.......s.....h.t....t.h........hcEhcaatlsth......................................... 0 702 944 1575 +519 PF02705 K_trans K+_trans; K+ potassium transporter Bashton M, Bateman A anon Pfam-B_677 (release 5.5) Family This is a family of K+ potassium transporters that are conserved across phyla, having both bacterial (KUP) Swiss:P30016 [3], yeast (HAK) Swiss:P50505 [2], and plant (AtKT) Swiss:O22397 [1] sequences as members. 24.80 24.80 24.80 24.90 24.70 24.60 hmmbuild -o /dev/null HMM SEED 534 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.90 0.70 -6.15 84 2676 2012-10-03 01:44:59 2003-04-07 12:59:11 11 6 1808 0 887 2162 250 506.20 40 80.78 CHANGED slsALGlVaGDIGTSPLYshpssh...............thshssstpsllGlLSLIhWoLhlllolKYlhllhRADNcGE.GGIhALhuLlp.................................................ptsptttal.......................hhhlullGuuhhhGDGlITPAISVLSAlEGLp...lhsPslp.......shllslolsILlsLFhlQphGTsplGphFGPlMllWFhsluslGlhs..lhpp.PtlLpAlNPhYAlpFlhp..psh......huahlLGuVhLslT.GuEALYADMGHFG+psIphuWhhhVhPuLlLNYhGQGAhllppP...tsh........pNPFa...thh.Pp....hhhhPhl.lLAThAslIASQAlIoGuFSlspQAlpLshhP+l+lhaTSppptGQIYlPhlNWlLhluslhlllsFpsSssLuuAYGlAVohoMllTTlLhhhhhthha.chshhhshhhsshFhhlDhsFhsANlh.KlhcGGWhPlllusllhslMhoWppG...pphlhpt.....................................htpptl.............slsphhptltp..ps.s...............................+lsGs.....AlFh...oss........pslP.slh+h.lcp.pslaccslhlolhph.shPpls.scRhplp.....pls.......ssha+lhlpaGFh- ............................................................................h.lsAlGVVYGDIGTSPLYsh+psh..................tsthshshspsslhGhlSLIFWsL..hll...solKYlhhlhR..AD.N.p...........G...........E..G..........GIhuLhuLst.................................................................................t.t.t..tptphh..........................hllhullGuuhhhG.....Dul....lTPAI.S..................VhSAlEGLc.......lssP..thp..................shl.ls.lol.l.lLshLFhlQ+aGTshVGph.Fu...P...lMllW...F...lhL...u..s...l...Glhs.....Ihtp....PpVLpA.l....NPh.aAlpFhhp....pst.....huah....s....L....G..uVh......L..........ulT.G...........uEALY..A..DhGHF.....G.....+....hsIp......l............A.Wh......h......lV...h.PsL...l.......L.NY....h....GQG..AhlLpps.....psh............pNP..FF..hhs..Ps..............hh.hhP.hl.llAs..l.A.s..l..IASQAlISGsFS..lspQAl.pLGh.lP+....h.+IhHT...Sc....pp.GQIYIPhlNWhL..hluslhl..llsFc...................s.....Ss.......pLuuAY...........G................lAVTsTMllTol.Lhs..hl...hhth......W..chshh.hs.hhhhlh.F..h.sl.-..h.saF.sAslh.K...lhpGGWlPlhluhlhhhlMhsWcpGpthhh..cp.............................................................................t..c..p..t.h...........sl..pthl.t.hpp....ps.s.....................................................RVsG.s.....uVah.....ops.........ss..lPhshhH..lpp.cslHc+slhlslhs..ssPhV...scR.hp.lc..........plu................tha+lhhpaGap-..................................................................................................................... 0 173 506 707 +520 PF01920 Prefoldin_2 KE2; Prefoldin subunit Enright A, Ouzounis C, Bateman A, Finn RD anon Enright A Family This family includes prefoldin subunits that are not detected by Pfam:PF02996. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.18 0.72 -4.28 56 1643 2012-10-02 17:27:01 2003-04-07 12:59:11 15 22 498 7 1100 1563 97 102.30 18 68.87 CHANGED QphlspapplppplppltpphpphctplpEhcpshcELphls--...ppla+hlGslhlcp.shpcspspLccct-tlptclcplcpphcplppchpchcpplpp.hhtst .................................................t..tphpphppphptlt...pp.h.p..p...lcpph........p.c.t..c.....h......s..hc........EL...p.....l...s..s-................pp.la.chl....G.s.l.........hlcp.shp.cshpplppchctlpp.c....l.........c.plc.pphpphppphpc....hcppl.thht..t.............................. 2 356 593 877 +521 PF01344 Kelch_1 Kelch motif Bateman A anon [1] Repeat The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase [1] for which a structure has been solved [2]. The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415. 20.10 20.00 20.10 20.00 20.00 19.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.11 0.72 -4.41 207 19043 2012-10-05 17:30:42 2003-04-07 12:59:11 20 753 1386 88 10669 22556 644 46.00 27 22.72 CHANGED tRsttussshssplYllGGhss..................tphhsslphacsp.....sspWpthssht ..........................Rtthusssl..s..s..p..l.Y..s.....l..G.Ghss.....................................tphh..ss..V...E.p...YDsp...............sspWp.hssh.............................................. 0 3018 4230 6832 +522 PF00013 KH_1 KH-domain; KH; KH domain Bateman A, Eddy SR, Finn RD anon Published_alignment Domain KH motifs bind RNA in vitro. Autoantibodies to Nova, a KH domain protein, cause paraneoplastic opsoclonus ataxia. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.88 0.72 -4.32 396 19558 2012-10-02 00:34:43 2003-04-07 12:59:11 24 384 4888 106 9071 18086 3141 61.70 24 16.12 CHANGED thplhls.spt..hutlIG+sGp.sIcplpcpo.usp.Ipl.......pp....................p.hlpl.p..G.s..pslppAtp.......hl .....................pl.ls..sph......hu..plI..G+..t......Gp..s.......I+p.....lpp...p...o..G....s....p...Ipl...............................spt........................................pt..h..lpl..s........G..s..pphptAtt........................................................... 0 2547 4378 6873 +523 PF00109 ketoacyl-synt Beta-ketoacyl synthase, N-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Dotter Domain The structure of beta-ketoacyl synthase is similar to that of the thiolase family (Pfam:PF00108) and also chalcone synthase. The active site of beta-ketoacyl synthase is located between the N and C-terminal domains. The N-terminal domain contains most of the structures involved in dimer formation and also the active site cysteine [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.80 0.70 -4.90 167 24208 2012-10-02 12:25:54 2003-04-07 12:59:11 21 1647 5964 191 6576 27004 6262 223.60 26 22.85 CHANGED scslAlsGhusphPsGss.........s-phWchltpGpsuls.........phs......hsth......ssphsGpshs..t...........tFDsthFshss+ps.tMDPppRl.hLpsuhEAlEcAGl.sstph...psst...........sGVhhGssts...........sh.......tt..hhttts.......htshs..hsss......ssushuu+luat...hGhcGPuhsVsTACoSu.hsuh+hAhpslRpGcs-hAlAGGssshhsPtsh.ssasptt..hhusp...s.....s+Aasstu.........-GalhuEGhGslllpcht-A ................................................................................................................................................................lslhGh........uhh.s.h.u.s.............................ptha.p..lhtGt.s.slt.............................hs......................t.t.....h....................t...s...t......h................s.................u...............p............h..........s...........................................................th.......-.......s.............h....................t............h...........s........c................................t..h....D...s.t.....p.........ph...hl.t.s.u...h..c...A..........l.c.c................A...G.l...ss.pph.........................pssp.............sGVh.h.Gsshs..........sh....................................tt.h..hht.ttt..................hp.s.s.....hsst..........................hs.sh..h...u..u..c.l.u.hh..........hs.........l...c...G....P..s.....h...s...l.s.T....A....Co...S..S..hs...ulchA.h........p.....sl..................p...t.........G......c.....s............-........h......s..........l...u..G....G....s.p....h....h....h....s.....s.....h.......s..h.....h.....s.F.......s.....th......t....hh....ots........s..................................s+shcsts..................DG..a.sh...GE...G.sGhllLccLpcA............................................................................ 1 1853 4027 5613 +524 PF02801 Ketoacyl-synt_C ketoacyl-synt_C; Beta-ketoacyl synthase, C-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Dotter Domain The structure of beta-ketoacyl synthase is similar to that of the thiolase family (Pfam:PF00108) and also chalcone synthase. The active site of beta-ketoacyl synthase is located between the N and C-terminal domains. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.34 0.71 -4.26 165 23531 2012-10-02 12:25:54 2003-04-07 12:59:11 17 1627 5851 190 6174 21515 3921 108.60 35 10.85 CHANGED hAhltGsAsspsG.tssuhssPsusuQ..tcs.....lcpALssAsls.Ps-l-hVEAHGTGT.lGDshEspAltssaGptt...........p..lhluSlKSslGHspuAAGssullKslLAlccuhlPPolphc ......................................................................hA.lhGhussp..DG.......h...p...........s........hs.....sPs.utut.......tps..........lcp..A...L....p..p.....A.....u........l....s....s.....p................p.............l.....s....a....l....p..........AHG.T......u.T.....hG...Ds..........hEspA.l.tp.s.a.Gptt...............................h..hl.u.S....s........Ko.h.GHh...u.AAGss....th.l.....tslh..u.l....c....c.s..hlPsolph.............................. 0 1709 3766 5251 +525 PF00225 Kinesin kinesin; Kinesin motor domain Bateman A, Finn RD anon Prosite Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.08 0.70 -5.74 84 9740 2012-10-05 12:31:07 2003-04-07 12:59:11 18 241 436 149 6035 9371 509 290.90 32 34.34 CHANGED RlRPhsspEpppsppthsph.........................................................tpttpshtssptpppsFsFDpVFss..sso........ptplapph.h.sllpssl.cGaNsolFAYG.QTGSGKTaTMtG.................................tp..GllPRslcplFpplpp.t..........................................................................................................taplp.lSalEIYNEplhDLLssp..............................ppp......Lpl+-.csp.t....shVpsLoph.Vpshp-lhpllphGppsRpsusTphNppSSRSHulaplplpppshstt.t.......................u+LsLVDLAGSERsscos.........stsp.h+EussIN+SL.sLGpVIsALsps................................psHlPYR-SKLTclLpDSLGG..su+ThMlsslSPusp......shpETlsTL+aAsRA+pl ......................................................................................................................................................................................................................................................................t....pta...t....F....D.t.l..a.s.......pss..................................Qpp.l.a.p...th........s.p.s.l....l.p...........s...s.h......p.......G.......a..N...............s...............s...l.FA.............Y.G...Q.......T..............G..................o............GK......T...a.T.M...Gs.......................................................................................pp.Gl.....l..s....p....s...........h.p...p.l.Fp...t...l...p.p...t...p...t..t....................................................................................................................................................................................................................................................p.a.plp..s...Sah.E...I.....Y......s.......E.....p.....l....h....D.L.....Lssp.......................................................................tt..........tlp.lc.-...c...s...p.....s..........hhl.t......s...l....p....p.....h..................l...p.............s......h..p..............-...hh..........p......l....h...............p.......h.....G.............p...p.....p...............R.......p...............s.............u........s...T....t...h....N..p....p................S.......S.........R.....S............Hu...l....hpl.p...lpppph..t..t..........................................................hhuclpL...V.DL.A......GS........E......Rh............t..........p.......ots...................................Gp.........p...........h.....................c.........E..............ut..........p.....I.N.p..............S.L...s.L...G....p.V..Ip..u...Lspt..............................................................................pt..a..l..P....Y.R..............sS........K....LT.............p......l...L...............p................-........S..L..............G................G.......s.............u.............+.....T.............h...........hl.ss..l......o........P.......st.........................shpET.l.sTL..c.aAp.Rs+p.............................................................................................................................................................................................................................................. 0 2295 3451 4869 +526 PF00467 KOW L24;Ribosomal_L24; KOW motif Bateman A, Finn RD anon Prosite Family This family has been extended to coincide with ref [1]. The KOW (Kyprides, Ouzounis, Woese) motif is found in a variety of ribosomal proteins and NusG. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.06 0.73 -7.32 0.73 -4.05 235 10200 2012-10-01 20:16:17 2003-04-07 12:59:11 24 51 5026 253 2995 5949 3093 34.10 32 20.34 CHANGED hGchVhlhsGt.pG.ctGplhclpppp............p.Vhlc .......G-pVpVlsG..t.......pG..pp.Gp.Vhcl..cc...............sp.Vhl........................................ 0 1014 1819 2444 +527 PF01352 KRAB KRAB box Bateman A anon Bateman A Family The KRAB domain (or Kruppel-associated box) is present in about a third of zinc finger proteins containing C2H2 fingers. The KRAB domain is found to be involved in protein-protein interactions [2,3]. The KRAB domain is generally encoded by two exons. The regions coded by the two exons are known as KRAB-A and KRAB-B. The A box plays an important role in repression by binding to corepressors, while the B box is thought to enhance this repression brought about by the A box. KRAB-containing proteins are thought to have critical functions in cell proliferation and differentiation, apoptosis and neoplastic transformation [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -8.05 0.72 -4.51 69 10165 2009-01-15 18:05:59 2003-04-07 12:59:11 22 1426 46 1 4775 8709 1 40.40 54 7.64 CHANGED VoFcDVAVsFopEEWphL-suQ+sLY+-VMLENapNLlSlG ..............lTF.cDVAVsF..opE.EWph..L-..su..Q....+sL..Y.R-.VM.L.EN.YpNLlSlG............. 0 382 388 643 +528 PF05178 Kri1 Krr1; KRI1-like family Wood V, Bateman A anon Pfam-B_8372 (release 7.7) Family The yeast member of this family (Kri1p) is found to be required for 40S ribosome biogenesis in the nucleolus [1]. 25.20 25.20 25.40 25.40 24.70 25.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.66 0.72 -3.74 75 304 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 266 0 220 306 7 112.60 31 16.75 CHANGED +cRKccE+cp........+cpElp+LKsLKhpElp-Klp+I+csuG.....tt..........................hsh...s-c-l.....-....s-FDspcaDppMpc.hFs.....-cYYsp...........................tc.cKPpa.--D.-l..t-hh......sstc..............c- ...................................+cRKcpEKpp...........+pcElc+LKsLKhcE.lp-K..LcKl+cssG......ps..............................hsl...s-c-l.........-....sDFDsscaDcpMpc.hFs..--.YYst.............................tctcKPpa..--D.-l..t-h.s.....t.......................................................................................... 0 76 123 184 +529 PF02735 Ku ku; Ku70/Ku80 beta-barrel domain Bateman A anon Bateman A Domain The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the central DNA-binding beta-barrel domain. This domain is found in both the Ku70 Swiss:P12956 and Ku80 Swiss:P13010 proteins that form a DNA binding heterodimer [1]. 25.00 25.00 26.10 26.00 24.30 23.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.93 0.71 -4.91 29 1701 2009-01-15 18:05:59 2003-04-07 12:59:11 11 32 1062 4 808 1704 49 191.60 20 42.31 CHANGED lplG.....tlslsVpl..ashspppthhhhhhhtcc.........p.spscpcplstpssp.............................tlp...ts-hh+ua......hhsschlslsc--lcplp.hss.......tslclluFhshs..lhchhahcsuhalhPccpts....tpsassLh..cshhcppphAls+ash+s...p.plssLhPpt....................................................puhhlhpLsas--lR.shshhthtsstt.......sppplchh ...................................................................................................................phu.....hlslsVph....asss.p.p.p.p.h.......h.h..hhtpp.........................hshhc..hp..ph...s.p.ps.sp................................................................................pVp...tc-.lsKuY......hhs.s.c.....h..Vh......lsc--l..p.p.lt..tss...............................pslclluFls..ts..t...........l......h.h.hh....c..c.s.h..a.l...h.Psp..p...ss..........................................................c.s.as..hLh..cultcps+.l........Als+h..sh+p.......+.p.lssLhPtt............................................................................pslhlhpL.as--lR...sh.s...h.h..t.......................h..................................................................... 0 264 480 665 +530 PF03730 Ku_C Ku70/Ku80 C-terminal arm Bateman A anon Bateman A Family The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the C terminal arm. This alpha helical region embraces the beta-barrel domain Pfam:PF02735 of the opposite subunit [1]. 21.20 21.20 21.60 21.40 20.90 20.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.21 0.72 -3.46 18 467 2009-09-10 20:09:42 2003-04-07 12:59:11 9 16 255 7 249 454 0 93.20 21 14.94 CHANGED lc.saposphtNP.LQpaapsLpslALchpEsh.slDtplschpphpc..+hssh..lsch+phhh.s-h....p.-spsutcpppcs..Eus.....suKKsKh-h ..................................tapssph.NPsLQpaa.p.....sLpshAL.....c.....p.....t...P......ts.....D....slsc..hpphs.c....chssh.........lp.......ch.+ph..h.....pph........p.ctt.htt.cp..t..t...tp...t...........hp........................................................... 0 75 127 191 +531 PF03731 Ku_N Ku70/Ku80 N-terminal alpha/beta domain Bateman A anon Bateman A Domain The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the amino terminal alpha/beta domain. This domain only makes a small contribution to the dimer interface. The domain comprises a six stranded beta sheet of the Rossman fold [1]. 20.60 20.60 20.70 21.00 20.30 20.40 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.24 0.70 -4.92 17 702 2012-10-10 16:07:06 2003-04-07 12:59:11 10 19 300 4 399 710 4 208.10 18 32.63 CHANGED ullallDsu.sMhps.pspps.......hp.shpshpphhtp+lhsps+.DhlullhauT-pscN.....sssapNlhl.........lp.....clshsshcplpclpphhps............tsstpsshhsuLhsshsl...hpc..spt+hs+++lhlhTs.csPh.......stsphchhhtc.................upDhpppthphs..hhhL......stt...phhap-.hp.utcp.pthhhs.pt........phpchhpplpshpphcRth ........................................................hlhhlDsu.sM....hp..........sttp................h...p.sh.ps..ltph.hppplhss....s+...Dh..lul..lhas.T......c...........p.......o......c.............s.............................sts..........a.p.plh.l.......................................hp.......pls.s.s.hctlpclpph..hps...................................s.....s..p..p..s.s.....hh.ss.L....h.ss.h.p.h.......h.p.p.........t........t...t..p....h.tp++.l.hl.hTstcsPh..........spsphp.t.hhpc............................................st-lpp.s..h.thp..hh.l...................t..p.p.h.hp.....-........p.......s...t...........t.................................................................................................................................................................................................. 1 128 216 324 +532 PF00014 Kunitz_BPTI Kunitz/Bovine pancreatic trypsin inhibitor domain Fenech M anon Prosite Domain Indicative of a protease inhibitor, usually a serine protease inhibitor. Structure is a disulfide rich alpha+beta fold. BPTI (bovine pancreatic trypsin inhibitor) is an extensively studied model structure. Certain family members are similar to the tick anticoagulant peptide (TAP, Swiss:P17726). This is a highly selective inhibitor of factor Xa in the blood coagulation pathways [1]. TAP molecules are highly dipolar [2], and are arranged to form a twisted two- stranded antiparallel beta-sheet followed by an alpha helix [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.26 0.72 -3.76 147 4915 2012-10-02 12:37:03 2003-04-07 12:59:11 18 353 273 213 2635 4772 22 53.40 35 18.48 CHANGED hCphsh.ssG..s.Cp...u......th....+aaas....spsppCp.pFhYuGCtGN..t.....NsFpohc-CpphCt ...................................C.hsh...p....tG.....s...Cp.....s..................hh........Ra..a...as............s.ps...ppCp...pF..h.Y...u.................G.C...t....G...N...t.......NsFtopcp....CpptC................ 1 878 1112 2046 +533 PF03521 Kv2channel Kv21channel; Kv2 voltage-gated K+ channel Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 42.50 29.90 18.40 19.70 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.92 0.70 -4.93 4 108 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 39 0 63 94 0 204.20 45 32.14 CHANGED KosEous+KDKspDNHLSPSRWKWs+RsLSETSSNKSF-sK.QtsspK.......spSSSSPQHLssQpLE-lYNchsKTQSpP.lNopp.sQsu+P...tEElEMpplssPps.LsssppEullDMRShSSIDSFhSCATDFsEopR.shoPhuu..hphphss..ss.cta.tupst.hLs.pttpusAs+-uhpEhtststshp.-sts...h..s..pus.hlESP+oSlKssNPL+.RSLKVNFh-ucsso..ssssshps.Plp.tsashtusttlsThhL.-p .................Kss-s.spK-psp.DNHLSPs+WKW.s.++.shSETSSsKSa-sK.QtsspK.................sp.oSS.....S.......P.......QHLssQpLE.hYNchsKT.Qsps...Nscp.pt.t.t+P...cEElEM-plssP.t.Ls..s.psEsl.lDM+Sh..SSIDSFhSCATDFsEspR.shsP.su.................................s.pttt..ssp..sh.th.s..s.sh..ps..ss............s.h.-SP+SShKssNPLK.RuLKVNFh-s.c.ss...ss...hh.....s.....hs....s........................................................................................................................... 0 2 8 24 +535 PF02828 L27 L27 domain SMART anon Alignment kindly provided by SMART Family The L27 domain is found in receptor targeting proteins Lin-2 and Lin-7. 20.20 20.20 20.40 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.41 0.72 -4.28 20 1179 2009-01-15 18:05:59 2003-04-07 12:59:11 11 37 100 20 527 978 0 53.10 24 14.32 CHANGED slppsh-lL-cLps.ssss.....pchppLpplLpstahpullcla-pltppshssssss ........................hphsh-lLccLps..spst.....p-hptLpplLppsahp..u......L......l.csH-pltppphtss............ 0 84 132 273 +536 PF02448 L71 L71 family Bateman A anon Pfam-B_1976 (release 5.4) Family This family of insect proteins are each about 100 amino acids long and have 6 conserved cysteine residues. They all have a predicted signal peptide and are probably excreted. The function of the proteins is unknown [1]. 25.00 25.00 32.90 29.30 18.80 18.20 hmmbuild --amino -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.18 0.72 -4.14 10 105 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 12 0 35 105 0 71.40 36 65.23 CHANGED p.CccltcpCpcshpRLssssD.slchFNcpCRccsc..hpWRsVoRCELp+lsClh...cspshsCcNlA........chssh ...CccltcpCppshpR...LssssD...slshhNppCRccst..hpWRsloRCELpphsChh...cscphsCpslAchh............ 0 10 10 23 +537 PF00753 Lactamase_B lactamase_B; Metallo-beta-lactamase superfamily Ponting CP, Bateman A anon [1] Domain \N 22.70 21.90 22.70 21.90 22.60 21.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.20 0.71 -4.65 305 34807 2012-10-02 15:46:01 2003-04-07 12:59:11 22 191 5226 350 10222 34846 12115 193.00 14 50.57 CHANGED hsthsssshllt........ss.spsl..llDsGhststtthh..............................hthpstplctll..lTHtHhDHhGuhttltpthshshhhttttttthhtthh.............................................................hhhttttththtththhhhts.stsssshhhhh.....ttppllhsGDhhh......stsththth...........................................................h.....................h.....hh..uH ...................................................................................h....ttsshllt...........ts..pph.l.....l..lDsGhshsthpth...........................................................hthp.s..t.p..l...p....t....l....l.......lT.........H.t.H...........h.........DH.....h.........G......u.........l........s......t......l.....h.......p........p......h......s.....h......s......h....h...h...s...t....t.t...t..t.h..h..ht...t.h...........................................................................................................................h..ht.p.t.t..t.h..t.h..t..t...h....t..h..h....h....h...s.....t...s.........s..........s.......s........s...t..........h..h.hhh..............................stt....p.....h....l..h..s.GD..hhh..............ttshhhh................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 3453 6690 8675 +538 PF02652 Lactate_perm L-lactate permease Mian N, Bateman A anon COG1620 Family L-lactate permease is an integral membrane protein probably involved in L-lactate transport [1]. 19.70 19.70 19.70 19.80 19.60 19.20 hmmbuild -o /dev/null HMM SEED 522 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.72 0.70 -6.01 8 3338 2012-10-02 15:12:49 2003-04-07 12:59:11 9 5 2259 0 540 2191 157 490.80 36 95.93 CHANGED LSALlALsPIlLFhhuLslhKhKGYhAuhlolAlolhIAlhha+MPlphshuSslhGhlhGLWPIuaIIluAlFLYKlSVKoGtF-II+pSlhuIosD+RlQllLIGFsFGuFLEGAAGFGsPlAIoAAlLVGLGFpPLaAAuLCLIsNTAPVAFGAVGIPIhusuusssls......................shEISphlGhhLshhollIPFalVhlhsG.a+GIK-saPAllluGhSFAlsQaLoSshLGPELPsIluuLlSLsshslFLKh.WpPKpla+.ssptpotsspst..........+hscllcAWoPFlLLossIllWs.PhFKAlap...psuhlhhssh.h.h..lsp....hlhphsPlss..puhshssVaKhcLlhssGTuIllAsllS.hhlh+lsspDshslFspTLKEhtlPIloIshVluFAhlsNaSG........MSsoLAluLAc.TGpsFTFFSPhlGWlGVFlTGSDTSSNlLFGuLQhhsApplGlsssLlLAANTsGGssGKMISPQSIAIACAAVGLsGKES-LF+hTltaSLIaslIsullshll ............................................................................hSuLlAh.lPI..lhhhlsLs...........h........h........+...........hK...........u...h...h.A.u.h.h.o.ls..lsl.l..l..Alh....h..........a........c.....M...........P.......s.........s.....h...u.......h...u.us.s.Ghh....hG........la...PIuaIllsAlalY+lolcoGpF-lI+so.l.h.u.I..o..s..D.p.R.lQhLlIuF.sFGu..FLEGAAGFG..sPlAIsAslL.luLG.FpPLhA.......Ahl.sLIuNo.APVAFGAlGlPl..hss...u..p...s....s...u...ls.................................s.h.p..luth...s...u..h.p...L...s....hhsl.l.l.PF..h...l.l....h....l....h.......s..........G.........a............+G...........l............+.......E............s.......a.......P.sh...l.....luGh...oFuls.Qal...su..s..a....l...G...PEL...ssIl...uu..L.lol..h...sl.s.l..a...hKh....a...p....P...c....p....h....h..c...h......t.....s...t.....t....t...t..t...s...p.hsh.....................................................shtpl..l...pA....WsPal.lLss..h..l.h.l.hs.h.......h...K..s..lhs........................s.hl..........h..h...t...........lpt.....h.hphs.sl.su.................h..s.....h.shphshl.u.....ssGTsIl.l.u.s...l...lo....hh.........h.........h.........+...........h.........p.........hp..p........s........hp.s........a..spT............l+pl.thsllo....IshllAhA..hlhs..a.SG............................h.ossl.u.h.u.lAp..s.G..s.hFshh.S.....PhL.GWlGs.FlTGSsTuSNsLFusLQt.s.sA.p..p....l......G......l............u.............s........s........L........ll.AA.N.osGGs.sGKMISPQSIAlA.sA.A.l.....uh.s...G+..E.....u.....c.....lh+h..T..lp..a..S...l...h...ah.hllullshl............................................................ 1 182 344 454 +539 PF03798 TRAM_LAG1_CLN8 LAG1; TLC domain Bateman A, Finn RD anon Pfam-B_1398 (release 7.0) Domain \N 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.84 0.71 -11.57 0.71 -4.76 168 2246 2009-09-11 23:05:38 2003-04-07 12:59:11 11 31 362 0 1453 2051 32 191.70 17 58.71 CHANGED casppshshlahhhsshhuh....hlhhpps...........................thht.h..........phhh....s....thh.hhhhphuYalhs..lhhhhh.pttpt...........................................................................h.hlhHHl.hslhlhshuhh..................................hshtph..shslhllh-hoshhLphs.......phhphh.th....................pt.............h.hhh...hhahls.FhhsRlh.....hhshh.....hhhshhthh...h.................................................................hhhhhhhhshhhs.L...pllplaWhhhllc ....................................................................................patpphhphhahhh.shhhuh.....hlh.hpps...............................................hh..h.............st..h...t...........h....p.hhhhhphuaa..htp....lhhh.h.h...chtcp..............-h...................................................................................................h.hlhHHl..hslhh.ls.huhh.....................................hsh.hph...s.sh.l.hh.l.t...-.ho.....shhLp.hs.......c.hhphh.th................................pt...............t.hhh......hl.hhhs.ahh.sRlh...........hhshh..........hhhs.h.h..h............h................................................................h.h.h...hh.h.hshh.hh...L.....hlplhWhhhlh.......................................................................... 0 465 733 1095 +540 PF03161 LAGLIDADG_2 LAGLIDADG DNA endonuclease family Mifsud W anon Pfam-B_3225 (release 6.5) Family This is a family of site-specific DNA endonucleases encoded by DNA mobile elements. Similar to Pfam:PF00961, the members of this family are also LAGLIDADG endonucleases. 25.00 25.00 25.30 25.10 24.40 24.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.90 0.71 -4.36 18 250 2012-10-03 01:41:40 2003-04-07 12:59:11 8 26 160 6 57 270 38 167.00 22 52.22 CHANGED .hpllhGhLLGDualcpcs..pspp.......hphphphpp......tah.+hhhhhp..tasssss..hppphs..............ppGchhhshphpThsh.ssFshltphFYh...sspK....hhlPshlpp..aLTPpuLAaWhMDDG......p........sspulhlsTpuFohc-lphLhcsLpp+asLcsol+psp.......spahIal.spohthah ..............................................................hpllhG.LLGDup..lptps....ps..pp.......................hthp.phpp.........htah...a.h.h.hhp....thsp.p.....ht.ptht.....................s.p.htshpF..pThsh.spFshh.tchFY..........sstK...................hlP..p..p..l..tp..hLT.PhuLAhWhMDDG.........................ttsts.....lhlsTpsFo.hp-.h.hLhp...hLpppas....l.p.spl...pppt............tthhlhl.tpsh..h................................. 0 23 45 52 +541 PF04916 Phospholip_B Laminin_A; Phospholipase B Finn RD, Mistry J, Wood V anon Pfam-B_5721 (release 7.6) Family Phospholipase B (PLB) catalyses the hydrolytic cleavage of both acylester bonds of glycerophospholipids. This family of PLB enzymes has been identified in mammals, flies and nematodes but not in yeast [1]. In Drosophila this protein was named LAMA for laminin ancestor since it is expressed in the neuronal and glial precursors that surround the lamina [2]. 21.30 21.30 25.10 23.50 18.50 20.60 hmmbuild -o /dev/null HMM SEED 553 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.76 0.70 -6.39 6 390 2012-10-03 21:14:07 2003-04-07 12:59:11 8 9 163 6 279 416 25 411.50 27 85.60 CHANGED p.hph..ullhhl.http..pshsss.....ppspsslshst.shp.................sth.stst......lAhupapssVNpTGWuhLEl-stp...shssplQuYuAGhLEGhLTtthlhhHhp.NohpshCcN...tspaCscLt-ahspN.+Whcppl.....pps.sD.aWpQlshslsQLsGLhsGYppRs.pscIshc....hasIhhhNhsGDlhDLt.........pphs+TcsPs........Fchs..G+CSA..LlKllPssc.......laFuHsThSSausMLRlhKpYcFs.......c.phsPGphloFSSYPGlLtSoDDFh.lpoutLsllETTlu..saNtpLh+phsP.spVhsWlRuhlANhlApsuppWsphFSRaN.SGTYNNQWhVLDh.Kp.hcspcpL..........sc..ssll...................allEQhPG.....hlsppDhTt..hLp+.oYWsSYNlPaaKplhclSGh....phscchG.aasastsPRA+IFcRDcusVTDlsShptLMRYNsYpc...-.hu+Cc..............CsP.PYoAchuIusRuDLNssuG............ThEh..u..GhssHuu.......lDsKs......sshcLh.......pphphhAh....uGP...shcslPsFcWsc.sshc-tssHhGpPDhWNFshVshK...........Wph .................................................................................................................................h.................................................................................................h....t.h......GWs.lplts....................hs...s....hh..suGhhEu.....ho....h...h...N..h.h.....t.....h.t...........................th...t.l.pahttp.....tahppph......................t.....ps...aWtphthh.h...Q.hpGl.tuhtt......................t............................h.....hhp..uDl..D...l............................................h.t.....t........................h......sp..CSu..ll...+lh.ss.p..............plhhuHs.oWtsY.ts.hhRlh.Kpa.p..h.th.........................h...upphsaSoYP.Ghl.ShDDFY...l....h...s........s.........sLhhh.pTT..s...shN.t.L..h..p....l....p..s....t.slhtahRsh.............hAshhAps...u.tpWscha.t.phN..S..GTYNNQWh......llDhptht.......t...t.......................................thl..........................................................................................hlhEQhPs.......hh...tD.Tt.....L....p..sYasSaNhP.a.a.pl.hp..huGh.....................t.ph.u................h...pap....ssRu.IFtRsts..tl.......ts.hpshhthhRhNs.a.p...D.hu....tp.................s........IssR.....DL..ts....t.........................................................s..su...........hDsKh........ss.hthh................................tt.ht.hhsh....sGP....s...t...t.....ssFpW.....t..h.......+.G.Pp.apF..h......h................................................................................ 0 154 189 239 +542 PF04031 Las1 Las1-like Wood V, Finn RD anon Pfam-B_10636 (release 7.3); Family Las1 is an essential nuclear protein involved in cell morphogenesis and cell surface growth [1]. 22.70 22.70 25.60 25.40 19.40 20.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.97 0.71 -4.56 26 316 2009-09-11 09:56:21 2003-04-07 12:59:11 8 8 274 0 222 321 1 147.90 32 29.95 CHANGED slsW..tshpEhpplhphla..............sp........................................................s.sppccAlp+lp.sWpsRu.....plPtsl-uTuhLlpshLtD..............................................ssthsstslc...htYuhAllR.....FVNsllDstQp..uphshshpplApplGlPphhV-LRHtsTHcp.LPsLshLRhusppsLpWLacpYWppp ..................................ssWhshsEh.pVpphha..............sp.......................................................................................s.ptpppAlpplp..sWctRs........plP..hsV-uTA.LlpshLtD...................................................sst.hsp....tlp...hhYuhAl.sR.........FVNtls-t..hpt.....p...t...ht...h.shtplApp..lGl..Ps..hhV-LRHpsTHcp.LPuLshLRpusphsLpWLhcp...YWpp............................................................. 0 80 125 183 +543 PF00057 Ldl_recept_a ldl_recept_a; Low-density lipoprotein receptor domain class A Sonnhammer ELL anon Swissprot_feature_table Repeat \N 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.59 0.72 -4.05 53 19728 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1237 198 54 11586 17417 67 38.20 41 15.75 CHANGED ssCp.....sscFpCssup.....CIst.pahC...DGpsDCp.....DGSDE....psC .............................h.tCt.......ssp.F.p..C......s...s.up.............C.....I...st...p..hhC....D..G..p..s.DCs........D..s..S..DE......tsC............... 0 3071 3990 7792 +544 PF02987 LEA_4 LEA; Late embryogenesis abundant protein Griffiths-Jones SR anon Pfam-B_106 (release 6.4) Family Different types of LEA proteins are expressed at different stages of late embryogenesis in higher plant seed embryos and under conditions of dehydration stress. The function of these proteins is unknown. 36.00 10.00 36.00 10.00 35.90 9.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.72 0.72 -4.09 26 1180 2010-01-13 15:33:07 2003-04-07 12:59:11 11 27 134 0 431 1127 2 40.60 29 59.19 CHANGED cDhsu-KAtEAKDsThcKsGEhKDass-KAtEuKDpss-KstEh ........................stpKAt-..s..t.......-ss...t...c....Ku.......u-.......s.......pDts....t-K....As-..sK-...tst...pth............................ 0 117 228 375 +546 PF04004 Leo1 Leo1-like protein Wood V, Bateman A anon Pfam-B_11226 (release 7.3) Family Members of this family are part of the Paf1/RNA polymerase II complex [1,2]. The Paf1 complex probably functions during the elongation phase of transcription [1]. The Leo1 subunit of the yeast Paf1-complex binds RNA and contributes to complex recruitment. The subunit acts by co-ordinating co-transcriptional chromain modifications and helping recruitment of mRNA 3prime-end processing factors [3]. 20.10 20.10 20.70 23.30 18.60 18.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.34 0.71 -4.55 86 331 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 272 0 230 324 0 165.30 31 32.43 CHANGED hahh+lPsF......ls.l-s..............cPFcPcoa.ts.p....................pt......................hpphh.php...NolRWRhsp..s........psGp...........tpSNA+lV+WSDGShoLplGs...Eh.aDl.......................................tppsh.tp.t........saLhsppst.........................................sllp..sputlspphshpPsoh....sStsH+..............phstulsp+ptpcstt....hhhtsspDPEhc+cctp+ .........................................ahl+lPNF......Lu.l-s..............cPFDPpsa.ts.t.p..............p..hscp.......................................................................ttp+hhhcsp...NolRWRhsp....s..........tpGp.h.........pcSNARlV+WSDGS..hoLplGs...E.h.aDl..................................................hptsh..p.sp............saLhlpps..........................................sh.Lp....s..p..uhhppplshp.Pp.os........sStsH+.......pho.hulssRstKppt.....thhh.sspDPEhp+pph..................................... 2 73 122 188 +547 PF00060 Lig_chan lig_chan; Ligand-gated ion channel Bateman A, Sonnhammer ELL anon Blastp NMZ1_HUMAN Family This family includes the four transmembrane regions of the ionotropic glutamate receptors and NMDA receptors. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null --hand HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.03 0.71 -4.49 44 3260 2012-10-03 11:11:44 2003-04-07 12:59:11 21 78 277 465 1868 3242 57 253.80 21 33.83 CHANGED oh-lWlslhhsalhluhslallt+.hoP.h-apt.........................................................................spaolhsuhWFshushht.pGsc...pP+uhouRllsslWahFsLllluSYTANLAAFLTlpchpss.IpuhcDLtpps.plpassht.uoohthappop.........hpphaphhpstps..........................ph.spssp-Glpclcpu..th.hAalhEsshl-ahsscp.......CchhplGp..shsspGaGIAhstsSPhpsplohAILpLpEsGclpplcsKWappp......pCsspsst..........sssspLslpshuGlFllLshGh ................................................................................................................................t..plW...h.h..l.hhs.h...l.h...l......u.h...l...la.l...l...t..........+....hss.hpapt.................................................................................................................tsph....s.l...h....s..u..h..W....a....s..h.u...s....h.h...............p.....p.....u.sp........hs..P.....+..u.....h.o..s.R.lls..............s.............l.Whh.F.s.......l.I...l..........luSY.TANLAA.a.LT...l...p....c.h...t...s.s...I..p..u..h.cDLtpps.hthhshtshsph.hhpppp.thhphhh.hh.ttt.t.hhtttttsh.thh...hhhh....................................................................................................................pp.pp.hh.hsth..tthhhhh...ts.ht.h.hhhhh..ptt.h.phhpchhhtts..tC.tptt...........tsstLslps.htGl..Fhllhhu............................................................................................................................................................................................................................................................................................ 0 597 867 1417 +548 PF02900 LigB Catalytic LigB subunit of aromatic ring-opening dioxygenase Griffiths-Jones SR anon Structural domain Domain \N 19.80 19.80 19.90 19.80 19.60 19.70 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.68 0.70 -5.41 47 3175 2012-10-01 19:17:44 2003-04-07 12:59:11 13 7 1832 5 918 2499 472 260.20 23 90.78 CHANGED sshhhSHssshhhh.s.....sttph..t...ph......................ht.htppltp..h.............cP-slllhuscahs....phhss.hlssss................psltD........ah.......h.phphshsGss-LApplschlhtssls............spphslDHGshsPLphhhs..........shPll.lslss.....hhsspcphplGcultphht.....scpVlllGSGuloHplts.phs.............................................hstcaDchhlchlps....s...chptlhshpppt.ttttups.stchhshhhshGAhs...............phhsphhtatslsths..sshs ..............................................................................................shhluHu.s.sh..shps.........st.pt...........th..........................................h.th.t.pplst...................cPcsIll..h..........osH...Whs........phh.s..ls.s...ss.p...................................ps.laD.........as......th..P.t..h...hch..p..Ys.hsGsPpLAp..pls.ch...l.tt..s...s.lshth..............................p.shsl.D..HGs...h..sP.....Lhhh.hs...........................chc.lPl.l...p..lu......lss...........hhsstpphc...lGcslttht...........-csVhlluSG.s.ls.H..s.l...t....s......p.hstts.............................................................hs.tpFD.phlhphlpp....................s......ch.c...t.L...s...sh...h.p.......p..t..s..s.t...h..u...p...s...s...cchhshlhshGAhss.....................hphhs..chh.htshs.h.h.h...................................................................... 0 265 556 770 +549 PF03893 Lipase3_N Lipase 3 N-terminal region Birney E anon Birney E Domain N terminal region to Pfam:PF01764, found on a subset of Lipase 3 containing proteins. 21.10 21.10 21.10 21.50 20.90 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.44 0.72 -4.16 10 181 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 56 33 114 168 0 71.60 22 15.93 CHANGED hlsslhslusupWuhph..............pcssaots-shspWspusssp...apshspss+shtsVasss....L+sPcltsshsshth ...............................h.hpth.shusApWs.t................plpaohtcshsp..WshussAth..a.s.hsphsphshslhpss....hpsPthts..t....t.......................... 0 14 65 97 +550 PF01764 Lipase_3 Lipase (class 3) Bashton M, Bateman A anon Pfam-B_893 (release 4.2) Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.84 0.71 -4.57 61 3914 2012-10-03 11:45:05 2003-04-07 12:59:11 20 76 1209 69 2118 4488 506 139.30 18 28.90 CHANGED lluaRGTp..shtpahssh.phshsshp.h..............................ssplcpGFhcsap...................................thhpplhsplpc.Llpphs......s..hplhlTGHSLGGulAsLsAh.........lhppt............tplplhoaGsPRl...Gshsauphhs.................t...........hhRllpppDlVP+lPshhh ...............................................................................................................lsh+Go............s..ph....h.....s.h......h............h.........................................................................................tshsc.t.Gh.h..p..h.hp......................................................................................................h...h...p...p..l...h..p..t...l...pp.....hh.....p.phs.....................p..hplh..l..o..G.......HSL.....GGul...As..lhuh........................................h.h.t.ph...........................................hp.s.hs..F..u.s..P.t.l........s..s.....t.h.t..p.hh.p......................................................................hh.p.h.s...t........t..D.lshh....hh............................................................................................................. 0 783 1383 1855 +551 PF04571 Lipin_N lipin_N; lipin, N-terminal conserved region Waterfield DI, Finn RD anon Pfam-B_4929 (release 7.5) Family Mutations in the lipin gene lead to fatty liver dystrophy in mice. The protein has been shown to be phosphorylated by the TOR Ser/Thr protein kinases in response to insulin stimulation. The conserved region is found at the N-terminus of the member proteins [1,2]. 25.00 25.00 25.90 25.20 23.70 24.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.55 0.72 -4.41 10 549 2009-09-11 22:04:36 2003-04-07 12:59:11 9 10 268 0 306 483 2 107.30 51 13.02 CHANGED MsYV...uuplasoVpclYsuINPATLSGAIDVIVVEQpDGoapCSPFHVRFGKh.GVL+ss-KhV-IplNGp.sDlpMKLuDoGEAFFVcEs.--p.pclPshLsTSPlsssssu ....................MsYV....tplhsoVpc.h.ap...ulNsAT..L.SGsIDVIVVc.......Q.............D..G......o.......hpCSPFHVRFG..Kh..GVLRucE..K...l......V..-IclNG.....ps..l..D.l...pMKLG.-sGEAFFVpE.s....-.s.p........p....lPt.LtTSPl.s...s..................................................... 0 91 150 231 +552 PF03180 Lipoprotein_9 NLPA lipoprotein Mifsud W anon Pfam-B_1418 (release 6.5) Family This family of bacterial lipoproteins contains several antigenic members, that may be involved in bacterial virulence. Their precise function is unknown. However they are probably distantly related to Pfam:PF00497 which are solute binding proteins. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.38 0.70 -5.46 35 6507 2012-10-03 15:33:52 2003-04-07 12:59:11 9 8 3244 23 955 4106 60 229.70 38 86.36 CHANGED lplGshssscu-lhc..hlpphhKccs.lclclhpFoDYspPNpALscG-lDANhFQHhPYLcphs+s..pttpLVslussalpPlulYS........+KhKsls-L...cGusIAlPNDsoNpuRALhLLppsGLI+LKss.tshhAThpDIs-N.PKsLcl+..El-AutLPRsL-s..VDhAlINssYA.lpAsLsPpcDulhhE.s+.s..uPYsNllVsRp...ssccsstlKcllcAhp.oc-V+phlpcp...a.sGuslPua ............................lplGss..s.s..s..c..s..p..lhc.....hsp..p..h.h.c.c...c..G....lclclhpF.s.D.YshPNpALscG-lDANhF..Q.H...h...P.aL..-p..scc.....+.....Ghc......L..s.s..l..u.s..s.al......P...hu.l.Y...S..................+K...h..K..slc-l......c..Gu..p......l..AlPN.D.soNtuRAL..h.L..Lp.psGLI+L....K....ss....s....s......h....h......u..T....s..t...DI...s-....N....PK....pLKhh...El-.AuQlsR....sLsD....VDhA..l.I..N.s.saA...hp...u...G.L.s..P..t..c............Du.lhh.E.......sp...s..........s..P..Y....s.Nl.l.....ssRp.......sscc.s.pt....lKc.llcs.ap.Sc-Vpchlpcp...a.pGuslss................................................................................................................... 1 211 490 731 +553 PF02190 LON ATP-dependent protease La (LON) domain SMART anon Alignment kindly provided by SMART Family This domain has been shown to be part of the PUA superfamily [1]. 21.90 21.90 21.90 22.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.53 0.71 -4.65 158 5691 2012-10-02 17:37:24 2003-04-07 12:59:11 11 68 3513 13 2086 4772 4482 199.10 23 30.32 CHANGED plPllPLtshl.l.aPshhhslp.lt..csp............hp......hlcpsh.......................pp.st...h...lhlh.....................................................ptp........s.......t.ssl..................................................aplGsl.....upIhp.h.....................phs..c..Gp..................................................hplllp..Ghp.Rh+l....pph................................................................................ppsa....h...............huclp.h..........................................ptptpptt.......ptltph...hpp..................................hhp.h................................................phhpth.s.phh.t.hpshppssp.Lsshl...ush.ls....hshpc+Qp.lLcthsspcRlptlhphLpp .......................................................................................lPllPL...cshl...l...aPthh...lPLh..ls....ct+s..........................lp.h.lcpsh...............................................pp..s..pt......lhls...............................................................spp.ps...t.p..ts.........s.h.s..cl...............................................................................................................................a.plGsl...upIhp.hh................cls......D...Gs..........................................................................lplllp...Ghp.R..h+l.........tph........................................................................................................................................................................t.pcsa..h.................................................tAcl..p.hl............................................................................tttppt.t..........ptl.tth...h.t.p....................................................................................................................................................hhpth.............................................................................ph..p.ph.....s........c....h....h.t..s....h.p....s..h..s.c..ss....p..L...u...Dhl.......Aut..hs.........lph....p.c.+...Qp.lL....Eh.sl.....pcRLchlhthh..t.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 700 1277 1725 +554 PF00560 LRR_1 LRR; Leucine Rich Repeat Bateman A anon Reference 1 Repeat CAUTION: This Pfam may not find all Leucine Rich Repeats in a protein. Leucine Rich Repeats are short sequence motifs present in a number of proteins with diverse functions and cellular locations. These repeats are usually involved in protein-protein interactions. Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains. 20.60 9.30 20.60 9.30 20.50 9.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.84 0.76 -7.83 0.76 -3.07 2414 25597 2012-10-02 21:32:02 2003-04-07 12:59:11 28 4912 1389 82 13545 96601 2712 23.30 33 5.68 CHANGED pLppLslss..N..plp.p................lst...htph .....................pLphLcLSs.N..pLs.p................................lPs.p..ht...................................................... 0 2975 8467 11049 +555 PF01463 LRRCT Leucine rich repeat C-terminal domain Bateman A anon SMART Family Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the C-terminus of tandem leucine rich repeats. 20.00 4.20 20.00 4.20 19.90 4.10 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.26 0.74 -7.54 0.74 -3.10 59 1112 2009-09-10 23:15:08 2003-04-07 12:59:11 19 248 79 18 530 955 2 25.80 34 6.10 CHANGED shhCssPsthps..lhphs......phsCs ....sRCsuPtpL+shpltplp.....pcFpCp..................... 0 82 113 266 +556 PF04180 LTV Low temperature viability protein Wood V, Finn RD, Mistry J anon Pfam-B_15065 (release 7.3); Family The low-temperature viability protein LTV1 is involved in ribosome biogenesis 40S subunit production [1]. 25.00 25.00 26.50 26.60 24.90 24.60 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.55 0.70 -4.97 4 274 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 231 0 202 269 2 368.80 26 86.00 CHANGED sKKpAppFpLVHRsQcDP.haDEsAsp+VLlPspp.N..Kou...shpspDtp.s...shRspcGEAApYGlaFDDoEYDYhQHL+slGpssusulhlsupshspp.cpK......ctpc.h.hpspspcL..p-shhsphphph....shtsQQs......sPDtIuGFpPDMDPcL.......REVLEALEcpuhshND-E.s.--...........DucEhc-hDhp.........tthDEh-D.ut..s.p..cs.cc..a..s-E.sch..h-hspsupshshps-hpht.cca...chhpK+psDst.usus.u-hpsS.ppDs......l.-h.po..pKu+s+pKtuAhoshSshSMSSSALsRoEsholLDspa-clcEc.........Ysshh--hp.lc....................Qs....sVhsc....tppF-sMhD-FLssatsp......sRphucp+hcpp+hKpul-ElcchR+s.t+ARh .............................................................................pKKpAhpapLVHRuQpDPLht.DpsAsp+VLh......s..s.....................................................................t.................p...t.p.....................t...t.pcuEttpYGlaa.....DD..c.YDYhQHL+...........t....p.sss....s.hlt.sts.....ttp....ctc...............................ph.c..............t......t..........p......l.............ss...s......h.h.Sp.hppth...........shts...s.........sPD.lAuhps.D..h....D...c..............c..lLpA..cspsh....t.t....-..p.t.....pp..........................csp.Ehp..-...-.....t.............................t........t....p.-..p.p.t..........p.t................tt.....t..p.....a............c.-...s..p.........t........hp..........s.s..ss.s..h...p.h....s.tp.hh..h..pcF................c+.....c......p.h...s.............t...h.........u..sh.s.....s.....c....h.p.s.............p.........t...........s...s..................h.......ph..t...............pttc.....pp...h....+....h......ssh.ps............p.....s..........h.......s.hs..sst.h..cs...Et...ph.....l.ssph-..php.pt.........................Y.sp............p...c..p..p.....t.................................ps..s.sh.st.......tp...p...hpp.h.......pt.....ss.....p......................R.hsc..p.p..t.cp.tt.h.ppuhcp...pc.+t..t.sp............................................................................................................................................................................. 0 68 104 163 +557 PF02123 RdRP_4 Viral RNA-directed RNA-polymerase Mian N, Bateman A anon IPR001795 & Pfam-B_6212 (release 8.0) & Pfam-B_9867 (release 8.0) Family This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.79 0.70 -5.78 28 1146 2012-10-02 12:54:00 2003-04-07 12:59:11 11 8 405 10 1 4591 0 427.20 31 60.01 CHANGED sphhstspphhtphhshpttstsst+sslp.tplhtsthpphsps.........sth..shhphhh..............................................................ps.h.shh..............hh.......................................h.......ht...............................................tss.p.sshhsphpsl.G+hssshc.......hptEhpp+sssslsh.thsptspctl.......hshltplhppcl.................................p.h.tp.....ttt..............pthhpsc..hh..tsschs++sls-s......lchh.sptcscst........h..tpKl.Ep................................G.+sRhIh....usshlsalshchlhtstcc.........th.supshlshssttphhshtpcl.............hpttt.hhhhDhosFs.SpHshcshptlh....p.hpppls.shshttt..........h.h..hhpshtshhlhlsss..htp..........chhGslhSGp+sTohhNSllNhshhphshuthshsh.........ps.GDDslhuhpss..............hhcphpphsh+spshKpshu..tuE........FhRht..............hpptshtu..a....................lh..RssushloGsW ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...h.h.s....ph...............php.c.s.shs..th.t..tp............h..l.thh.p.l..............................................................................p.h.psp........h........s.phscphLt-s..................lph...sttpsc.p...................tK...c..............................................................................................G..+hRlIh......ph.lsp...phl....h.chhh...tpc.............h...t.p...hhs.........s..s.......t..p..h..........t.l......................h..hDhotas.S...p....sh....cch.....................lps.hh...s........s..ps..psh.........................p.Lh.t.t.h..hs....l....h.......sS...h.l....p..lsD.Gs..lltp....................................h....GshtSGphpTpusNSh.h.....p.....l...h...h....h...h.........t.....s.....h..........u.....p......h.....u...th........................................GDDshtshp...s..................hthYtp.h.s.hK..scs.............................................................t...thh....a.........................p..................................................................................................... 0 1 1 1 +558 PF01476 LysM PG_binding_2; LysM domain Bateman A anon Bateman A Domain The LysM (lysin motif) domain is about 40 residues long. It is found in a variety of enzymes involved in bacterial cell wall degradation [1]. This domain may have a general peptidoglycan binding function. The structure of this domain is known [2]. 20.90 11.80 20.90 11.80 20.80 11.70 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.96 0.72 -4.06 156 27194 2012-10-01 23:00:54 2003-04-07 12:59:11 15 602 4537 7 7042 21185 3954 44.30 31 16.15 CHANGED apV+pGD..TLhpIAppa.....................shshpplhphN..........slsssp.....lhsGQpLpls ....................YsV.pp.GD...o.Lhs....I.A.p.ca.........................ss..s...hp...p....l...h..p....h..N....................s..l...s....ssp..........lhsGQpLpl.................................................. 1 2348 4523 5983 +559 PF03466 LysR_substrate LysR substrate binding domain Bateman A anon Bateman A Family The structure of this domain is known and is similar to the periplasmic binding proteins [1]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.20 0.70 -10.87 0.70 -5.23 415 85941 2012-10-03 15:33:52 2003-04-07 12:59:11 15 39 4494 158 20277 62785 6990 202.80 15 67.81 CHANGED pst.pG..pL+lus..sshu...tthlsshlspFpp.p.aPt.....lplplptss..tplhchltpGplDlulhht.............tsssltsp.Lh.ppphsllss...ssa...............Lsp.......ttsl.shpcLtp..pshlhh..p..sss.......hpphhpphhppt......shp.........phthpssshpshhphltsGhGlullPphhht..p.h...tpsplh.hslss.........sht.tshhllhtpsp..h.ss......thpshhchltp.ths ......................................................................................t....pG..plplu.ss...s.s.hs.....tth...l.s...s..h...l.t.p..a....t..p.......p.....a.....P..p...................l..p...l....p.......l.....t...........t.......s...........s........................p..........h.....h......c..........h............l..........t.....p.....s.............p....h..D..l..u...l..t..h..tsh.................................tss.s.l...t....s...p....L....h....p........p.........p..h..h.hl...s..s........ss..a...........................................................l...sp..............ttsl...s....h.....p...c....L.....t.........p.......t..s....h....l....h.h.....p...t..sss.....................hpp..hh.p....p...h....h..p.pt...................sh..p.........................pht...h...p...s...s...s...h....p...s....h...h....p..h.l......t...s......G......h......G..........l......u..h...l..........P....p...........h........h.......h....t...........p..h..........t.p.....s.......p..........L.......h........h.s.l..st.........................tht...hs..h..h...l....h...h...p.....p.....p....p..........h....s.s.......thp.thhphlht...t....................................................................................................................................... 0 3804 9358 15032 +560 PF03816 LytR_cpsA_psr Cell envelope-related transcriptional attenuator domain TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.81 0.71 -4.30 37 6551 2009-09-11 05:59:27 2003-04-07 12:59:11 9 20 2068 19 1112 4741 444 152.40 32 38.41 CHANGED RoDohhllplssppcpsphlSIPRDohVpl............ss........p........hpKlssAhshG..................usphshcTlcphhu.lslDaYstlsapuhtpllDslG..GVcVssspshp.........sst.........................................hphpsGpppls.GcpALsasRhR+sss.G.....DhuRhcRQppllpulhpchts .....................................................................RoDohhlhslss..p.s..c..p..s..pllSlPRD.o.h.lp.Is...........................sh.............p.............tsKlNtAa.shG......................................Gsph.shcTl.cp.h........h...s.......l...s...I...-....aYsplshpuFtcllDslG...GVc.Vss.......sh.......s..hs...........pss...............................................hp..h.p.t.G.....p....p.....p.Ls..Gcp.ALs...asR...hR..........p............ss....s.....................G............Dh.........s....R.tp..RQppllpulhp+hh.s.......................................... 0 408 799 997 +561 PF02847 MA3 MA3 domain SMART anon Alignment kindly provided by SMART Family Domain in DAP-5, eIF4G, MA-3 and other proteins. Highly alpha-helical. May contain repeats and/or regions similar to MIF4G domains [1]. 20.70 20.70 20.70 20.70 20.60 19.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.14 0.71 -4.37 29 1720 2012-10-11 20:00:58 2003-04-07 12:59:11 12 37 326 24 1096 1714 25 110.00 21 15.93 CHANGED h++chttllp.EYh..tDhpEAspsl.pcLths..phctclV+hhlshsh-c....pptpchhuhLLpchhptshlsspphppGF.chhs.sh-DlslD...............lPpshphlupFlu+hltsshLs ....................................c+plhhhlp.phh........D.hpEAhppl....pc.L....ph......s.....p......hp.....t......c..............l................lph.hlptsh-c...........pphpchhuhL.lppL.s.....p..t......s..h....htppp..h.ppuFh.chh....c...ph...c..-...l.tlD....................................................hs.thp.l.uphhuchltsshl.......................................................... 1 344 582 853 +562 PF01454 MAGE MAGE family Bateman A, Wood V, Finn RD anon Prodom_3141 (release 99.1) Family The MAGE (melanoma antigen-encoding gene) family are expressed in a wide variety of tumours but not in normal cells, with the exception of the male germ cells, placenta, and, possibly, cells of the developing embryo. The cellular function of this family is unknown. This family also contains the yeast protein, Nse3. The Nse3 protein is part of the Smc5-6 complex [2-3]. Nse3 has been demonstrated to be important for meiosis[3]. 25.00 25.00 25.80 25.30 20.80 22.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.39 0.71 -4.97 47 1361 2009-01-15 18:05:59 2003-04-07 12:59:11 14 13 248 3 734 1407 0 168.80 36 45.07 CHANGED LVpalLhK.ptKp..PIp+u-hlctVl+ca.......+cpF..scIhpcAucpLchlFGhcLpEl-sp.............................................................s+sYlLlspLshs..................ssh..lsss...pshP..............................+sGLLhllLulIahpGNpssEpplWchLphhGlhss.c....cH.laG.-s+KLl.........op-hVppcYL.cY+.plssocPscaEFh.W..GPRAhsETSKhKV ....................................................................................LVpaLLhK.ptKp...PIp+u-ML..c..lhcca........cchF...PpIhp+AspplchlFGlcl+ElDsp..................................................................s+sYlLlspLshs..........................................ssh...lsss..pshP..........................................+sGLLhhlLulIFhp......G.......N......pusE...........pp.....lWchLp.t...h.G..lhsu..p...........cH.laG.-s+...+Ll........................................Tp-hV.ppp................YL.c...Y...............+....p.............V.............P..s....Sc..............P...........s..c..YEFl.W..GPRAhtEToKh+V.................................... 0 103 147 224 +563 PF00390 malic Malic enzyme, N-terminal domain Finn RD anon Prosite Domain \N 19.20 19.20 19.20 19.20 19.00 19.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.02 0.71 -4.70 48 7633 2009-01-15 18:05:59 2003-04-07 12:59:11 14 36 4079 97 1959 5725 2415 136.20 38 27.44 CHANGED ps+pEhlah+hlpsp...-.hPllYTPsVupsCppauc.hcpsp..hahohssht...h.t.h.th.......plhVlTDGstILGLGDlG.huGhslslGKhsLasshAGlcPt.slPlhLDsGssp.t.hts..YhG....R..s....thlDcFlcslpphasspshIpaEDhusspAFplLc+a+ .................................tc.phh.hp.ltsp.....c.LslsYo.PsVutsCpphsct........pp.s.............h.....hahsh.s..sh............................................................VhV.los..Gp..u...lLGLGslG..huuhs..............l........GKhs.....La.............p.....thuGl.c...s.....hsltlDst..............................s-chlpslp..th..sh............uhlph..EDhtsspsF.l.p+h+............................................................. 0 602 1170 1596 +564 PF03949 Malic_M malic_N; Malic enzyme, NAD binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 23.50 23.50 23.50 23.50 23.30 23.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.45 0.70 -4.83 66 6923 2012-10-10 17:06:42 2003-04-07 12:59:11 10 36 4103 83 1803 5218 2322 236.30 38 43.02 CHANGED .QGTAsVslAullsAl+lsucs...l......p-h+lVhhGAG..uAGlulschlhpt.....Gls....t...cplhhlDpcGllpcsR.ss..h......s.....shptt....aA+ppsphp.............hsLt-slcss..slllGsSt.ssGshsc-hlcpMup.........+PIIFALuNPss..EhpP--Ahphssutsl.ATG...........+o.hPsQsNNslsFP..GlhhGslsspAppIs-cMhhAAAcAlAshsspc..............phst.shllPshtchR.luhplAhAVuctAh ......................pGTAlVshAullsAL+....l.s....u+p..............l..............p-...h+lVh.GAG..uAGlu...hschllsh.........G..lp............hcplhhsDp..................p.....Gllhcs..R....s.s...L........s......thppt...........aApp.s.st.t.................................toLt-slcsu..Dlhl........Gl..........S....s....ss...s......hopEhl+pMup..................cPIIFALuNPs.....s..EhhP.cp.Ahphss...sts.hA....TG..............................................R..o.........t....a..PsQsN..N....sLsFP....GIhhGsLss.tA.pp.Is...--Mhh.AA..u.c...........AlA.phsp...t..p..........................ph.u.....s...hllPpshD......R..l.hplA.AVAcsA..................................................... 0 555 1086 1472 +565 PF00629 MAM MAM domain Bateman A anon Prosite Domain An extracellular domain found in many receptors. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.29 0.71 -4.46 92 3415 2012-10-03 19:46:52 2003-04-07 12:59:11 18 308 158 2 1950 3145 308 149.40 23 28.95 CHANGED CsF...Eps.....hC.uappsps...ssh..cWpp.hps.ss.......suPttD+ohss........Ga.ahhlpssts....ttGp..........pA.pLhS.hh........stss..........stC.lp.FaYah.hGpsh....Gs.Lplhlcpp...........tsttph.....lWph.sGspu......spWppsplslss....sppapllFEu.htus..stt.GslAlDDlplp....ps....tCsp .........................................................................CsF.....-ps......hC..s...ap....p.......ttt...............ssh.....pWph...ps...s......................sP..hDpohts.........................Gh.a..h..h..lpssts......t.up......................pA..pL.h.Sshh.....stss.......................stC...lp..Fa.Y.ah.....h.....Gtsh......................Gs...L..p..l.hlptp..............................tth.tp.h.....................lW....ph.....s..u....s.p.u...................stW..p..p.s.p.ls.....lss.......sp.....ta............p..l..lF...c..u...h.hst................s.t...us..lA.lDDlplt..t.....C..t........................................ 0 938 1052 1428 +566 PF03999 MAP65_ASE1 Microtubule associated protein (MAP65/ASE1 family) Bateman A, Wood V anon Pfam-B_12512 (release 7.3) Family \N 21.80 21.80 22.50 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 619 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -13.08 0.70 -5.92 6 605 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 273 3 390 587 4 449.20 20 74.33 CHANGED hpssplsclcpcspsshpctccEs..sptppullppIustcsElssL...........sphLc.......................................tthshhpp.cshshh....+Lccpl-pL....................RcphspRhsElpELhcQ.ppLCppLGp.sLsh.........h-sssLs.....Ech-phRp+lspLc-p+spRLc-hssl+psIpphhchLspp.t....sshEpclhs............ss+slo.EshspLpphhcphpsp+pcpt-+lcslpsplppLWsRLphosE.tp...............hp-Aoshoppohcl...lccElpRLptlKppplKphIEphRlEIpEhWDphhhSpEpR+p..Foshap-hhs........EpLLEtaEsElccLKphhsspKcIh-LlpcatpLhcchctLEtpusDsNRas.sRG....Gp..L.LKEEKpRKplsp+LPKlpppLptclpsaEscpspsFLhcGpslLEhhu.......spWEc+RppKppupspKKsssppss.hcss.ts..sssPpTPssp+sstslsssTsuht+s..hppsppptpsossstpoGsh+p+ssspRh.ssssp.suAppuptssh.RsttssssuptssSssptp.pos.sshhhspshshshcpssspsphpssspsspptcslppspsusssps-soostpsu ......................................................................................................................................................................................................................t....................................................h..............................................................................................................................l....hp..h...............................................................tt.ttpRhpph...tt...l...tt...p....p....h......pt..............................................................h.t...t.s.ls............pplpphptplppl....ppchp.pRhpph..phh...pplhph...ht...Lsh.s.............p.ht...t......h..t.............................................................tt.pths.l.sp....pslt.pLpphh...........ppLpt...p+ppp.pp.....h...........pplttpl.pLW.phh..p...h..s...tp-pp...F........................................h.tt.s.t....s....hs....h...phlpt...........hp.ElpRLppLKtp..ph+ph...l.p.......h+..-lpphhcth.a......h....s...-...p...ttt.............h.....h...h....h.....-.s.hs....................................p...p...L...L.p.h-.tpl.tclcp.hts++plhchlp+a.phhpc.p.LEp....hs.pDss...........Rat...sRG........up........L..L+ct.cpt+tlh...pc.lPt..l.ppLt.tp.lptWE....p....cp.......s.p...s.......F.h.h.pG.....phlphlp.......................h...p..t.p...c............p.+.......p....p........+...p............p.+...p........h....t....t.t..............t...t...t.p.............t..t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tt.................................................................................................................................................................. 0 110 216 309 +567 PF00917 MATH MATH domain Bateman A anon Pfam-B_1602 (release 3.0) Domain This motif has been called the Meprin And TRAF-Homology (MATH) domain. This domain is hugely expanded in the nematode C. elegans [3]. 21.20 10.00 21.20 10.00 21.10 9.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.59 0.71 -3.86 52 3261 2012-10-02 00:06:50 2003-04-07 12:59:11 21 124 446 92 2107 3079 7 124.00 17 25.90 CHANGED lcNhSphp.......pspphhs.shpp+ashsWp................lplhppss.......ahulaLpCtpt..ps..........hpWslpschplpllsssGpp.........hppphp.....psFsps.................................hshGhsp.....alpacplppch...lhcDolhlcupVcI ................................................................................ht.h...............thh..s..s.h...h.t..h..tG...h.p...Wp..........................................lpla.pGss................pssalS.l.alpl..h.ps.t.ps.........................tWsh.p...s..p...h...p..lpl..ls..p..p..spp.................ptp.p...........ppF.s.pt........................................................................................psh.G.atp...........Fhp..hp.pl....p.ptt.......a..l....h..s..D.s.lhlpspl.......................................................................................... 0 726 1040 1659 +568 PF01429 MBD Methyl-CpG binding domain Bateman A anon Bateman A Domain The Methyl-CpG binding domain (MBD) binds to DNA that contains one or more symmetrically methylated CpGs [1]. DNA methylation in animals is associated with alterations in chromatin structure and silencing of gene expression. MBD has negligible non-specific affinity for DNA. In vitro foot-printing with MeCP2 showed the MBD can protect a 12 nucleotide region surrounding a methyl CpG pair [1]. MBDs are found in several Methyl-CpG binding proteins and also DNA demethylase [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.56 0.72 -4.45 16 1074 2012-10-03 08:51:45 2003-04-07 12:59:11 14 66 135 6 573 1046 14 75.90 25 10.19 CHANGED hpppp.tcssL.tGW+Rchh.RpsGpph....................s+hDlhYhu...........P.sG++h+ohs-lhpYLppss...............................................hphppFsFssthhhsp ............................................................................................t.......p.sL..GW.pRchh.....R...p..t...Gt.ph.......................................tch.-lhYhu..................................P...sG+.chRoh..s-l....hcY...Ltp.ss...............................................................................................h..ppFsFpsh.....hht........................................................................................ 0 130 240 380 +569 PF03062 MBOAT MBOAT, membrane-bound O-acyltransferase family Bateman A anon Pfam-B_2359 (release 6.4) Family The MBOAT (membrane bound O-acyl transferase) family of membrane proteins contains a variety of acyltransferase enzymes. A conserved histidine has been suggested to be the active site residue [1]. 20.90 20.90 20.90 21.00 20.60 20.80 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.26 0.70 -5.31 41 4677 2012-10-01 20:09:06 2003-04-07 12:59:11 14 27 2309 0 1785 4135 489 287.90 18 62.65 CHANGED hhthhhhthphhslhhhhslshhshphluhhhphtcs..................................................................................hhpthshhcahsYlhahs...................shhsGPhhpapcahpthc.t..hphthh..................t.hthulthlhhshhhhhhhthhlshhhhthlphhthhp......hshhhhhhhhhhhthhhha...th.hhshuhshhhGlt....................................ts.sh.psh.tspshpcahcpWphslspWLtcYlYhphh...........hhhtphhuhhhsahlsulWHGhphtallashhpslhlhspphhtphhphhthtttttththhthhhthhhhhhhshhhshhhh ..............................................................................................................................................................................................................................hhh.............thhh.hulsh....h....o...h......p.l.shh...h..-hhc..t.............................................................................................................................................................................................................................................hhtphsh.hpaht.al.haaP...................................................................plhu..GPlhcapc.hh.pp.h..p....p....h.p...ph..........................................h.t.t..ul.t..h.l....h..h..G.h..h..h.c.hl..l.......u.........h......l.s....h...h..h......h..t...h...h...p..t..........t...........................................................................................h.......h....h....h.h....sh...h...ta.s..h........l.....a..h.cFsu....a..s...h...A.l.uhuh..hhGhp..................................................................................s..NF......psa.h.u.psl..p-F...W.p.+WHholspWh.+..cY.l.Yh.hsh................................................pp.thh.t...p..h....h....s.....h.......hl.sah..lsul.WH.....G..h..........s.h.t..a....l............l..........a.....G..h...........h....p..u.h.hh..h....h...tp..h....h...h....p........h....h....t......h................h...................t....t...h....h...h.....h.....h..........h...h..h..h...h.......h.h.hh....h.......h...................................................................... 0 630 1009 1447 +570 PF02820 MBT mbt; mbt repeat Bateman A anon Pfam-B_526 (Release 6.2) Family The function of this repeat is unknown, but is found in a number of nuclear proteins such as drosophila sex comb on midleg protein Swiss:Q9VHA0. The repeat is found in up to four copies as in Swiss:Q9UHJ3. The repeat contains a completely conserved glutamate at its amino terminus that may be important for function. 20.20 20.20 20.30 20.60 20.10 19.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.19 0.72 -4.06 18 2104 2012-10-02 16:56:36 2003-04-07 12:59:11 13 50 103 159 1180 1975 0 72.60 33 29.03 CHANGED MKLEAlD.pssphlClATVspVhGpc.Lpl+aDGacsph....DaWsph-.SsDIaPlGWC-tsuctLpsP.shtppth ..............................................................MKLEul..D......p...s..s....p...h.h.p..lAT.V..s.cl.......h....upc...lhl.....+.....a..........D.G....h..c...ssh....................DaW...s.phs.S.scIaPVGWCpps..G..p....Lp.PP.sh.....h................... 0 218 298 671 +571 PF02470 MCE mce; mce related protein Bateman A anon Pfam-B_475 (release 5.4) Family This family of proteins contains the mce (mammalian cell entry) proteins from Mycobacterium tuberculosis. The archetype (Rv0169), was isolated as being necessary for colonisation of, and survival within, the macrophage [1]. This family contains proteins of unknown function from other bacteria. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.65 0.72 -3.98 125 12284 2009-01-15 18:05:59 2003-04-07 12:59:11 15 19 2490 0 2643 8745 850 86.00 22 29.09 CHANGED sshplpsthssu.sGLpsG.ssVphpGlpVGpVps..l.............p.sspp......lplphplp...............................................psh............................hlspsspuplpst.sll.Gt........................................palslpss .................t...plphphs.su..sGLp.sG..ssVphp..GlpVGpVpsl........t.............h........ssss.pp....................lplph.plp......................................................sph.................................hls.p.ss.p...u.slps...sll..Gt..........taltlp.......................................................................................................................................................................... 0 540 1469 2139 +572 PF05053 Menin Menin Moxon SJ anon Pfam-B_5848 (release 7.7) Family MEN1, the gene responsible for multiple endocrine neoplasia type 1, is a tumour suppressor gene that encodes a protein called Menin which may be an atypical GTPase stimulated by nm23[1]. 19.20 19.20 19.70 19.20 18.70 18.40 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.08 0.70 -6.35 3 147 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 84 8 78 156 0 425.50 46 94.18 CHANGED MGl+usQKoLFPLRSIDDVVpLFcuELsSs.EPDLsLLSLVLGFVEHFL.........AVNRVlPVNVP-loFpPosusDPsuGNSs.FPVV-LsLIAALYcRFpAQIRGAVDlSpYPKPuGsSSRELVKKVSDVIWNSLSRSYFKDRAHIQSLFSaIT.....GTKLDSSGVAFAVVAACQlLGLKDVHLALSEDHAWVVFGpsGEETAEVTWHGKGNEDRRGQTVsAGIsERSWLYLKGSYM+C-RsMEVAFMVCAINPSLDLHTDSlELlQLQQ+LLWLLYDLGHLERYPMALGTLADLEElEPTsG+.SPLoLY+KAIESAKTYYRNEHIYPYMYLAGYHsRpRcVREALuAWA-AAoVIQDYNY.CREDEEIYKEFFDIANDVIPsLLKEsASutEAc..-Et.tEpp.tptuthSALQDPECFAHLLRFYDGICKWEEGSPTPVLHVGWATaLVQSLoRFDuQIRQKVsIlo+E..................sEAsEu-EsuGEEAREG.RRRGPRRESKs......................................-EPuGutSPNPcLPApNpNssospsuss.GucRKsuAoTsGsAssscNGSoosVPlPouSpP..................PphptG..................PVlTFaSEKMKGMKELLlAsKlNSSAIKLQLTAQSQVQMK...+QKsSAouDYTLSFlKRsRKsL ..........................................................................................ttthFPlpshssllplFtttLt......p...EPDLsLLSlllGhlE..L..................s.stshsss.....sth....t....................sh...FPshphphltuLYt+F.s....hls..h....s.h...t....tt.uoREllKKVSDVIWNSL...RS.aKDR.AHlQsLaSalo..........GsKLDs.GVAhAVVuuCQhLGh+........DV+LAlSEDHAWVlFG...p.t.pThEV.TWHG..KGsED+RGQslt.........sGl.tpt.SWLYltG..hhCsR.tMEVAhhVsAlNsSl.......shpo....DshElhpLQQpLLWlLYDhGaLc+YPMALGsLu-LE-...ht..........o...............s.....................p.ss...platcultuuppaYp.spHlYPYhY.usa.h.R......h.hp-....Ah......tsWAps...upVhp.YsY.sR-DtEIYKEhh-lAN-lIP.hh+.ts....p........................................phLppspsau.lLpFYDGIC.WEEsu.TPlLHl.sWAp.Lltsls+Fp.plRtpl.l....p..........................t..t...t.................t.........ps....pp.................................................................................................................................................................................................................................................................h...........................................sh..l.h.StKMtsh.thl..s.t.+lNstA.htL.lTAQS.sp................................h........................................................................................................ 0 32 35 60 +573 PF00149 Metallophos STphosphatase; Calcineurin-like phosphoesterase Dlakic M anon Dlakic M Domain This family includes a diverse range of phosphoesterases [1], including protein phosphoserine phosphatases, nucleotidases, sphingomyelin phosphodiesterases and 2'-3' cAMP phosphodiesterases as well as nucleases such as bacterial SbcD Swiss:P13457 or yeast MRE11 Swiss:P32829. The most conserved regions in this superfamily centre around the metal chelating residues. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.44 0.71 -4.92 324 31320 2012-10-02 19:15:56 2003-04-07 12:59:11 23 436 5141 215 11462 33578 5742 205.20 13 48.44 CHANGED hplhhhuDlHsphtph.............................................hthhphttptpsshl.lh.sGDhsspsthshtthhhh.....................................................................hhthphst.hhhlhGNH-htthsphhhhh.th............................................................................................................................hhht.thhthhhphhshh.htthtshpllhsHssh.stht...........................................................................................thhtthhtptphchlltGHpHt. ..................................................................................................................................................lhhhuDl.H....h.p..htth.......................................................................................................thh.h...p...h....t...t.....p.........t...s.......s.....h........l....l.........h....s..........G.D....h......h......s........p....s...s.........s....t.s.h.thh...........................................................................................................................................................................hhhh..p..h......t............h..h.....h.....l.h...G..NH-....h.t....h.......h....h.....t....h....h.....t..........h.......t.....t...h...........................................................................................................................................................................................................................................t....h.....h.....t......h......t..................t.............h......h............h......h................h......h...............h......t.........t........................p......h........h......l...........h.....h............s.......H......t...s...h......s.t....................................................................................................................................................................................................................h..........t....................t.....h.......p..h....h...h.h.uH.h................................................................................................................................................................................................................................................................................ 0 4014 6855 9602 +574 PF01420 Methylase_S Type I restriction modification DNA specificity domain Bateman A anon Bateman A Domain This domain is also known as the target recognition domain (TRD). Restriction-modification (R-M) systems protect a bacterial cell against invasion of foreign DNA by endonucleolytic cleavage of DNA that lacks a site specific modification. The host genome is protected from cleavage by methylation of specific nucleotides in the target sites. In type I systems, both restriction and modification activities are present in one heteromeric enzyme complex composed of one DNA specificity subunit (this family), two modification (M) subunits and two restriction (R) subunits [2]. 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.11 0.71 -4.27 65 11631 2012-10-02 00:09:28 2003-04-07 12:59:11 14 41 3019 14 2039 9932 1120 165.50 14 72.39 CHANGED s.cWc.phpLs-lhcl....tpGpt.tpp..phtpsGph...........Palsss.thps...............thhsttpphhhp.tsslhlstp........u....shGpshhtst....t.....h..hssp..phtslpsh..ph.h....chhhhalt....thhp......phpp........thussh.plstsplp..phcl.lP.......sh.pp......QptIschlcphtp...plpp.phhpplcp .................................................................................................................................h...hpl..t...p....l.h..p.h...........t...u..t...t.......p.............t..t...t.s..t..h.....................................shl.phs.....t.............................th.....t............p.....p.....h..........h...h....p.....t.....ss....l.l..hstt.......................u..sh.G..p....s...t..h...h..pt.............p...............h.....h.h.s.p....t.h.hhl...psh.....ph...............p.a.l..hhh.lp.......th.hp............ph.p.ph......................sp.u..s..s..h.....p..l...s..t...s..p...lp.....p.h..l.s..l.P..............sh..pE.................Q....pc.Is....p....h....l....p....p....h....tp........th.t..........t.................................................. 3 744 1450 1786 +575 PF01795 Methyltransf_5 DUF36; UPF0117; MraW methylase family Ayala JA, Bashton M, Bateman A anon Pfam-B_1376 (release 4.2) Family Members of this family are probably SAM dependent methyltransferases based on Swiss:P18595 [1]. This family appears to be related to Pfam:PF01596. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.69 0.70 -5.15 16 4779 2012-10-10 17:06:42 2003-04-07 12:59:11 14 11 4534 7 1122 4036 3223 300.00 42 95.42 CHANGED phtHhoVLLcEslchLp...........l+ssG...lYlDsTlGtGGHoctlLppLsp.t+LhulDpD.pAlttAppcLp.a..ct+hsll+ss..Fsplhphhtct...............hltclDGILhDLGVSS.QLDps-RGFSFpcD.uPLDMRMDpossl.TAtEllNshs.p-LscIhhcYGEE+auKpIA+uIhctRc......ppPhpTTt-Ls-llppshPsht.....++t.......ttHPAs+sFQAlRIhVNcELppLccuLppAhchLsssG.RluVIoFHSLEDRIVKphF+-hsph..p.P.uLPhh.pthp.......hlT+KPIhPSpcElcpNsRSRSAKLRlsEKh ....................................t..pHhsVLLpEsl-.s.Ls...........................................................l+.ss.G..........lYlDuThGtG.GHSchl...L.p.p..L.sp...tG......+......L..............luhDp..D....pA...lttu.....p....p....p......l.p..h...........ssRhshl..+us....Fs.p.l.....t....ph..lpch.......................................................tl..tclDGILhD..L..GVSS.Q......LDp......sER..GFSa..p..p.D..uP.L.DMRM.............D...............t.................s...........p........u.....h....oAt-llNphstp-LsclhhpYG..EE..+...a..u+pIA+t.Il.p.t.R.p......p...p....P.l..po....T.t-Ls-llp.p...u.....h...Pt...tt...................+.c..................t.t.HPA.p.RsF.QAlRItVNsELptlcpu.L.p....p.A.h.c.h.L...............ts........G.G....RluVIoFHSLEDRlVKphh........+........c...t..........u.........p.s............................p.s...P........t......s.......l.......P...hh..........p.p.h...p.sh..........................hphl.s.+...K......s...l.......hPoccElppNsRuRSA+LRlAE+.h.................................................................................. 0 409 743 952 +576 PF05060 MGAT2 N-acetylglucosaminyltransferase II (MGAT2) Moxon SJ anon Pfam-B_6001 (release 7.7) Family UDP-N-acetyl-D-glucosamine:alpha-6-D-mannoside beta-1,2-N- acetylglucosaminyltransferase II (EC 2.4.1.143) (GnT II/MGAT2) is a Golgi resident enzyme that catalyses an essential step in the biosynthetic pathway leading from high mannose to complex N-linked oligosaccharides [1]. Mutations in the MGAT2 gene lead to congenital disorder of glycosylation (CDG IIa). CDG IIa patients have an increased bleeding tendency, unrelated to coagulation factors [2]. 27.00 27.00 27.40 27.20 23.10 22.60 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.42 0.70 -5.54 10 200 2012-10-03 05:28:31 2003-04-07 12:59:11 9 4 105 0 131 210 0 274.60 35 75.24 CHANGED sloLptRshl.shN..p....l.N.sDlaspLsp.........scllIVlQVHNRP...pYL+lLl-SLu+s..+GIscsLLIhSHDhYssElNcllpuIcFCpVhQIFaPYSlplaPssFPGssPsDC.pphcK-cAhcppC.Nuc.PDpYGpYRpu+hsplKHHWWWphNhVWDtL.ccs+sasGalLFlEEDHYlhPDhY+sLphlhshK.phCPsCtslsLGshsh.ops.uh-sthsEVhsahuscHNhGhAhsRslWcKI+sCucpFCoaDDYNWDWTL.tlshsChsp.l+sL..s+uPR..lhHh......G-C.GlHp....sssCpss.ssstplccls+..slpsphaPpslploc+ps.uhpush+sNGGWGDhRDRcLC .............................................h............hN....thhs..........................phhlVl.VHsR....pYL+hLl.SLtps..psIppsLllhSHDha..-hspll.t.s.lsFC.....V.hQlFaPaShphaPppFPG.sPpDC....t+ptAh.htC.su...PD.aGpaRps.ch.sphKHHWWWKhp........hVapt.....l...c.hp.asGhllFlEEDHalh.Dh.hhhhphh.php......p.CspCt.....hlsLGsh........h..sh.hs.tpps-hhsahSs........a..NhGhAhsRssapplh.t..psppFCpaDDYNWDhoL.hls..t.s...h..............p.....................hh......sh...h.psPR..lhHh........Gc.C....GhHp......ppsCtsp..t..tp.l....pph.p..p.pt.hh..........tl...hl...t.....t.......t........pt.GGWGDh.RD+pLC.......................................................................................................... 0 44 65 105 +577 PF02142 MGS Methylglyoxal_synth; MGS-like domain Bateman A, Mian N anon Pfam-B_220 (Release 4.4) Domain This domain composes the whole protein of methylglyoxal synthetase and the domain is also found in Carbamoyl phosphate synthetase (CPS) where it forms a regulatory domain that binds to the allosteric effector ornithine. This family also includes inosicase. The known structures in this family show a common phosphate binding site [1]. 22.90 22.90 23.00 23.10 22.80 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.10 0.72 -4.05 164 10875 2009-01-15 18:05:59 2003-04-07 12:59:11 17 65 4646 116 2605 7914 4421 100.00 27 14.06 CHANGED sllshA+tL.tph.G..acllATuGTu.chLp.cs..Gls.hplsphstps.h.s......................hhchlpptpIp...lVlssh.shptsht.........-shs...lccss.phcIshsT ............................llphAptL.t.ph..G....aclhuTu.GTu....p.hL.....p....c.....s...........Gl.............s..........hpls..p...h..s.t.t..s......h.hsuc................................................................................thhshlpp...t.....c...Is.........lVlssh..hshptsst................-stsl.c.css.phcIsssT............................... 0 852 1659 2184 +578 PF01769 MgtE Divalent cation transporter Bateman A anon Bateman A Family This region is the integral membrane part of the eubacterial MgtE family of magnesium transporters. Related regions are found also in archaebacterial and eukaryotic proteins. All the archaebacterial and eukaryotic examples have two copies of the region. This suggests that the eubacterial examples may act as dimers. Members of this family probably transport Mg2+ or other divalent cations into the cell. The alignment contains two highly conserved aspartates that may be involved in cation binding (Bateman A unpubl.) 21.50 21.50 22.00 22.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.48 0.71 -3.88 17 3706 2009-01-15 18:05:59 2003-04-07 12:59:11 11 18 2452 6 1251 2988 1805 127.60 28 31.59 CHANGED hlPhlhGhsGNhGsphuohlsptLslGplp........tph.plhhcphthshh....luhlhu..shshlhsshh.....sssshlshsVssslhhshhlus...husllslhhc+htlDPs.huuPllTolsDlholhlh.hhlu .......................................................hhPllsuhGGNs.....GsQsholllR....u.lA.....h....s..p.ls.......................ps.h.h.p.l.l.h+..E.l.s.....l.u......hl........................hGll.hu...........hh..h...h..l..h..s...h..h....h..........................................................h....t.....s.............h..h....l....u.h..s.....l....u.....h....u.h...h...s.s.l....l.husl......hGshl...P.hl..hc.+h.slDP.AlsouPhl.TTlsDlh..Ghhla.hsl.................................... 2 418 721 996 +579 PF00993 MHC_II_alpha Class II histocompatibility antigen, alpha domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1288 (release 3.0) Domain \N 20.60 20.60 20.80 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.69 0.72 -4.41 51 3084 2012-10-03 22:02:01 2003-04-07 12:59:11 15 5 277 142 138 2606 0 76.50 45 55.24 CHANGED DHlutY.hthhQshsssGpaha-FDG-EhFYVDLc+KEsVWpLPpFuchhoF-...sQsuLpsIussKtNLslhhKtsNsTssss ..........................hthhts..s.s....s....G...pasa-FDGDEhFYVDlpKKE.......TV.WpLP.F.u.catoF-.....sQGALsNlAssKtNLslhhKpSspss................................. 0 7 18 38 +580 PF00969 MHC_II_beta Class II histocompatibility antigen, beta domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_331 (release 3.0) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.49 0.72 -3.99 121 17595 2012-10-03 22:02:01 2003-04-07 12:59:11 14 5 639 123 215 16212 0 69.80 52 67.66 CHANGED u-CaF.pNGTc+VRalsRahYN+EEa...lRFDSDVGcahAlTELG.....c.sAEhWNSpc-hLEppRAtVDThCRHNYtlhp ..................cChF...N..G..T-RVRalcRYhYNpEEa....lRFDS..D..V..GcahA.s.TELG......cssAEa........WNSQp.-lLEppRAtVDs..hCRHNYtl......................... 0 23 41 74 +581 PF02816 Alpha_kinase MHCK_EF2_kinase; Alpha-kinase family Egelhoff T, Ryazanov A, Bateman A anon Egelhoff T Family This family is a novel family of eukaryotic protein kinase catalytic domains, which have no detectable similarity to conventional kinases. The family contains myosin heavy chain kinases [1,2] and Elongation Factor-2 kinase and a bifunctional ion channel [3]. This family is known as the alpha-kinase family [4]. The structure of the kinase domain [5] revealed unexpected similarity to eukaryotic protein kinases in the catalytic core as well as to metabolic enzymes with ATP-grasp domains. 21.30 21.30 21.50 21.50 20.90 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.47 0.71 -4.63 63 765 2009-01-15 18:05:59 2003-04-07 12:59:11 13 29 138 16 532 775 15 177.80 24 20.05 CHANGED FupGsh+tsathh.h....................................ttsphhVsKhhpp.....t.........................p..............................c.hhp-spsphhupphscpFNpph....tts..pp...lp..............ahshh.lhchpsps........................hhhsEta......lpG....capKYNsNt..Gah..............tppsthtphhpAFSHFTYphSptphllsDlQGls...s.......hLTDPtIpot..s.sph....h..u..sshGpcGltpFht.pH.pCsphC ......................................................................................................................................tGthR.shtsh.h....................................p..sp.hlhK.hh...........................................h....................................p.hhhp-..h.p.ph.......hu.phhsptFsph..............t.s.t......p....................................hl..hh..lhp.h.psp..........................hhhlEph......l..pG........ca.h.Kas..sNs..uhh...................................ttssp..h..tphhpAFoHaoYphops....phlVsDlQ......GVs....p........................hlTDPplhshp..tph................h......G..sNhG..pulptFht...pH..pCNphC........................................................ 0 305 370 431 +582 PF02854 MIF4G MIF4G domain SMART anon Alignment kindly provided by SMART Family MIF4G is named after Middle domain of eukaryotic initiation factor 4G (eIF4G). Also occurs in NMD2p and CBP80. The domain is rich in alpha-helices and may contain multiple alpha-helical repeats. In eIF4G, this domain binds eIF4A, eIF3, RNA and DNA [1]. 21.00 21.00 21.10 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.33 0.70 -4.92 41 2682 2012-10-11 20:00:58 2003-04-07 12:59:11 14 57 339 22 1754 2678 51 203.50 17 26.69 CHANGED t+plpulls+ls....pphpphhpplhphhhpp........phhppllphl...hppshtps......shhshhApLsstlspphs....................pFsphllsphhppFpp...................................ppppptsppcthshl+FluELhphph...lspt.hlhp......hltpllpphsc..................pplchlhplLpssGphLp.....................................................ppsppthcphhpphpphhtp................................tphs.clcahlpslhpl+psp ......................................................h...hpshls+ls.....pphpplhpp.l.hph..hhpp..........................thhptllphl....hpts...ht..ps............phs...haApl.ss.t.ltpths..................................................................pht.p..hl...ls.ph..c.pFcp....................................................................t.p.t.c..t..t..s....p.....p.......c.t.hs..l+Flu-L.h..ph.ph.........ls.p........t...lhhp..........hl.t.p.....L.l..pp.hpc.............................................psl.-hlspl...Lpss.....Gt.Lp...............................................................................pp.s.p...t..t.......hc...phhp......phpphhpp...........................................tphs.c...hc.....ahlpshhplpt......................................................................................................................... 1 585 916 1394 +583 PF02815 MIR MIR domain Bateman A anon Ponting CP (EMBL archive) Domain The MIR (protein mannosyltransferase, IP3R and RyR) domain is a domain that may have a ligand transferase function [1]. 24.70 24.70 24.90 24.80 24.40 24.60 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.31 0.71 -4.76 30 1582 2012-10-02 19:42:32 2003-04-07 12:59:11 14 76 302 11 968 1488 5 172.80 24 10.35 CHANGED GaL+utcslh........htucQpphhsh..........sch-pssc........................WclEs.pps.h...........puutscW..........GshhRl+HlsTG+YLtuc-cp..............................sslscp.chpp-sosYshpsh.ts............................sDt....lplhcpcsssshusspl+stcohhRLpHhtTusaLpupsscl..sthG.tppEpsshpp.....tssh.hhEpc-shs.Ls ......................................................hL+utshhh..........htu.p.Qpp.s.h.sh.......................................s.pt.D.p.ssp....................................Wh.l.c.s.....p..p..s.s.h..............pust.h.ph........................GshhRLpHlsTu.+hLtuccst..........................................sslspp....ptE.so..sa..s.h..psh...ts..............................................................................pDh......hpl.h......phc..s.....p.....p............t..........t...s....p...l..ps.hso.hhRlpHh..s.os...saLpspshpl....sphu.tptEhs...shtp........sph.................................................................................. 0 275 441 727 +584 PF04212 MIT MIT (microtubule interacting and transport) domain Bateman A anon Crosby A, Patel H Domain The MIT domain forms an asymmetric three-helix bundle [3] and binds ESCRT-III (endosomal sorting complexes required for transport) substrates [2]. 28.30 28.30 28.30 28.30 28.20 28.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -8.98 0.72 -4.09 82 1334 2009-01-15 18:05:59 2003-04-07 12:59:11 13 33 342 20 840 1237 38 69.40 27 14.39 CHANGED hlppAhchlppAlctDp...tu.......papp.AlphYppulchhhpulphc......p.sspp+ph..l+pKhppYlsRAEpl+phl ........................hpcAhphlppAl.ctDp.......ts.......pacc.AhphY.ppul-hhlp.....ulph-................................p.sspp+ct.....l+pKhppYLsRAEpl+ph............................ 0 236 365 592 +585 PF00153 Mito_carr mito_carr; Mitochondrial carrier protein Sonnhammer ELL anon Prosite Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.00 0.72 -4.28 188 37736 2009-01-15 18:05:59 2003-04-07 12:59:11 22 243 698 9 25106 35824 767 94.40 21 75.41 CHANGED pphshhtphhuGuhAGsh.utssstPlDhlKsRlQ................................tts.tpa.puhhcs...........hpplh.+p-G........h.tuLa+GhsssllthsstsulpFs....sY-thKphhhptt .......................................................................t....h.phh.uG...u.h.u.....G....s....h.....u.......s.........h...........h..........s..........h..P....h........-..........s...l..K..s.....R.hQ.....................................................................................t.tt..tph....p....u......h...h....s.s......................................................h..p.p..l.h...+..p....-......G........................................h...tu....l....a.....+..........G...h.s........s..s.......l..h...p......h......s..s..h........s.......u.....l....has....sY-hh+phh....t............................................................................ 0 8054 13500 20193 +586 PF03637 Mob1_phocein Mob1/phocein family Bateman A anon Pfam-B_1830 (release 7.0) Family Mob1 is an essential Saccharomyces cerevisiae protein, identified from a two-hybrid screen, that binds Mps1p, a protein kinase essential for spindle pole body duplication and mitotic checkpoint regulation. Mob1 contains no known structural motifs; however MOB1 is a member of a conserved gene family and shares sequence similarity with a nonessential yeast gene, MOB2. Mob1 is a phosphoprotein in vivo and a substrate for the Mps1p kinase in vitro. Conditional alleles of MOB1 cause a late nuclear division arrest at restrictive temperature [1]. This family also includes phocein Swiss:Q9QYW3, a rat protein that by yeast two hybrid interacts with striatin [2]. 21.90 21.90 23.80 22.00 21.10 21.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -10.99 0.71 -4.60 12 1280 2009-09-10 23:30:30 2003-04-07 12:59:11 12 11 310 3 872 1169 11 157.60 35 62.79 CHANGED pphspsoLsuu..slpphVpLPpGpDhs-WlAhpsh-FFsplNhlYGolsEaCT.poCPpMousspa-YlWtD..phtK.PsphsAspYlchlhsWhpsplsscplFPo+sul.FPpsFht.lsppIhpRlFRlauHlYh+HFcplhplphEsHlNspFpHFhhFspEFsLlcp+-htsLp .............................................hhptsltps..slcphVpLPp.G..Dhs-WlAhp...................s.....h........-....Fapp...lN..h.l.a.u..sl.......s........-........h........C....o.p..oC..PhMs...As...s.................p.a................pYhWt-...................pht.+....Ph.ph.........sAs......cYhchlhsWlps.lssc...pl.FP........o.+.......h........u.......h...........s..F......P.....p...s........Fhp......h....sp...pIh..+....RLFRlauHlYhpHa.p...hhp..lt.EsH.....LNopFpHF.hhFhpcasLlstc-htsh..................................................................... 0 273 432 669 +587 PF00994 MoCF_biosynth Probable molybdopterin binding domain Bateman A anon Pfam-B_1258 (release 3.0) Domain This domain is found a variety of proteins involved in biosynthesis of molybdopterin cofactor. The domain is presumed to bind molybdopterin. The structure of this domain is known, and it forms an alpha/beta structure. In the known structure of Gephyrin this domain mediates trimerisation [1]. 25.10 25.10 25.10 25.20 24.90 25.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.80 0.71 -4.79 149 13076 2009-01-15 18:05:59 2003-04-07 12:59:11 19 66 4239 129 3598 9529 4365 147.50 23 44.05 CHANGED ullosGsElh...............tGpl..........hDsNu.hlsshlp..p.....hGh.plhths.llsD-.ptlppsl......htt.pp...DlllsoGGsusupcDhohc.............sl.tphhs........hhctlshp.st.................................................shh...hush...stt........s....lh.sLPGsPs...ushsshphhl..............PhLtp .................................................llosGcElh............................tGph...............hDssu....hl...tphlp...p.............hG...h...p....l...h.....t..hs....l...ls...D.c........p.tl.p..psl..............pps.h.pps.............Dll.lo....oGG.s.....u...s.s...p.......c.......D.hThc........................................sl..t....ph.h...s..p......................h...hp.pluhp...tt.........................................................................hush..h..h.spt........................t.s...lh..sLP.G..sPs.ust.s.s.hpthl....Phl..t...................................................................................................................... 0 1154 2293 3059 +588 PF03454 MoeA_C MoeA C-terminal region (domain IV) Bateman A anon Bateman A Domain This domain is found in proteins involved in biosynthesis of molybdopterin cofactor however the exact molecular function of this domain is uncertain. The structure of this domain is known [1] and forms an incomplete beta barrel. 20.40 20.40 20.40 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.06 0.72 -4.02 47 4409 2009-01-15 18:05:59 2003-04-07 12:59:11 10 31 3053 40 1367 3598 1140 71.70 25 16.06 CHANGED puhlspshtsstsRpcalRshlp.....t...uphhspPht...tpsSuhlpolspAsuhlhlspsspt...lpsG-pVpVhhh ..................................uhhsps.h..p.p.s.s..s..RpcFhRutlp................tts.................Gp..h.h..lp.s...s.u........tps..S..u...hls.ohupANshlhls..tspss...........lpsG-hVplh.h.................. 0 406 862 1156 +589 PF03453 MoeA_N MoeA N-terminal region (domain I and II) Bateman A anon Bateman A Family This family contains two structural domains. One of these contains the conserved DGXA motif. This region is found in proteins involved in biosynthesis of molybdopterin cofactor however the exact molecular function of this region is uncertain. 22.50 22.50 22.80 22.90 21.40 21.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.74 0.71 -4.89 52 5148 2009-09-11 13:37:57 2003-04-07 12:59:11 12 42 3304 40 1547 4152 1544 162.50 33 36.85 CHANGED hhsl-EAhchlhshhtth.......sEpVsltcuh.GRlLA-DlhushslPsFcpSuhDGYAVRupDshsust...................tLpVluclhAGpssp.hplssGpsl+IhTGA.lPpGADAVVhhEpspttss........plplh..ptspsGpNlphtGpDlppGssllppGpplsP...tclulLAuhG .................................................hslccAlp...h...l...h.s....t..h.p.shs..........sEplsL......t....c......u.....h...G.......RlLA..............c.Dl.hushs.lPsFcsSAMDGYAlR..s.s..D..ht.s.sp.......................................l..V.h.u..c..h..h.A..Gps.................p.....t.......pl...t........s........G...........p........sl.R.I.hTG..A..sl..P......p.........G........u.......D...u.V.Vh.Epspttss..........................tlp.lt.....ps..s...p....s.G.....ps..lR....h............tGEDlp.tGsll.lttGptlss..splulLAohG.................. 1 471 975 1318 +591 PF02493 MORN MORN repeat Bateman A anon Bateman A Repeat The MORN (Membrane Occupation and Recognition Nexus) repeat is found in multiple copies in several proteins including junctophilins (See Takeshima et al. Mol. Cell 2000;6:11-22).\ \ A MORN-repeat protein has been identified in the parasite Toxoplasma gondiis a dynamic component of cell division apparatus in Toxoplasma gondii [1]. It has been hypothesised to functions as a linker protein between certain membrane regions and the parasite's cytoskeleton [1]. 22.50 5.00 22.50 5.00 22.40 4.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.84 0.73 -6.99 0.73 -3.92 271 23538 2012-10-01 20:24:03 2003-04-07 12:59:11 15 292 1025 32 14186 22201 11255 21.80 32 29.18 CHANGED YpGpa.ppGhtcGpGhhpassGsp ..............YpGpa..tss..p...t..p...G.p..G.hhhassG............... 0 8471 10144 12192 +592 PF03476 MOSC_N MOSC N-terminal beta barrel domain Aravind L, Anantharaman V anon Aravind L, Anantharaman V Domain This domain is found to the N-terminus of Pfam:PF03473. The function of this domain is unknown, however it is predicted to adopt a beta barrel fold. 21.00 21.00 21.20 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.28 0.71 -10.57 0.71 -4.40 16 1954 2009-01-15 18:05:59 2003-04-07 12:59:11 11 18 1430 1 823 1627 289 117.60 26 29.64 CHANGED huplssLalaPIKSscuhulpcAp...lsstGhh....tDRtahlls.sc.G............phlTtRpcPpLshlpssh............psshLplsAP.....Ghssltl.lt.....s.ththtssplathshsuhcsG-..........tsuc.WhosaLups ......................plspLalaPlKShp.Glsls.....puh............h.s.s.p.GL............aD...Rt...aM....lsc....ss......G.............................ph...lT...t...R..ph...Pph...s...h..lpssh..................................................ps...sL..t.ls...us....................s.t.s...s.h...hl.t.hs............s.ttp.t..s...s.p..V..a.t.......s....p..h......p..u....h...sss.............................................th.sp.WhSthhu.............................................................................................................................................. 0 218 430 658 +593 PF04643 Motilin_assoc motilin_assoc; Motilin/ghrelin-associated peptide Mifsud W anon Pfam-B_5485 (release 7.5) Family This family represents a peptide sequence that lies C-terminal to motilin/ghrelin on the respective precursor peptide. Its function is unknown. 19.30 19.30 19.80 19.30 18.30 17.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.78 0.72 -4.48 16 149 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 81 3 37 152 0 54.60 38 51.89 CHANGED hlDPs...pspt-E-phcI+hsAPh-IGl+loucQhpcattsLE+hLp-lLhpps.pt..scc ........................psttt--p.pIphsAPF-lGl+loutQhppautsLt+hLQ-lLh-ps................... 2 2 4 8 +594 PF04644 Motilin_ghrelin motilin_ghrelin; Motilin/ghrelin Mifsud W anon Pfam-B_5485 (release 7.5) Family Motilin is a gastrointestinal regulatory polypeptide produced by motilin cells in the duodenal epithelium. It is released into the general circulation at about 100-min intervals during the inter-digestive state and is the most important factor in controlling the inter-digestive migrating contractions. Motilin also stimulates endogenous release of the endocrine pancreas [1]. This family also includes ghrelin, a growth hormone secretagogue synthesised by endocrine cells in the stomach. Ghrelin stimulates growth hormone secretagogue receptors in the pituitary. These receptors are distinct from the growth hormone-releasing hormone receptors, and thus provide a means of controlling pituitary growth hormone release by the gastrointestinal system [2]. 21.90 21.90 22.40 23.10 21.20 21.80 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.82 0.72 -7.00 0.72 -4.14 6 118 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 53 1 32 126 0 27.20 59 26.77 CHANGED VPIFTauElQRh.QEKEppKsp+KSLplQ .GSSFLSPEHQ+h.QRKEsKK.PsuK.LpPR.... 0 2 2 5 +595 PF01398 JAB Mov34; JAMM; JAB1/Mov34/MPN/PAD-1 ubiquitin protease Finn RD, Bateman A, Iyer LM, Burroughs AM, Aravind L anon Pfam-B_738 (release 3.0) Family Members of this family are found in proteasome regulatory subunits, eukaryotic initiation factor 3 (eIF3) subunits and regulators of transcription factors. This family is also known as the MPN domain [3] and PAD-1-like domain [4], JABP1 domain [5] or JAMM domain [7]. These are metalloenzymes that function as the ubiquitin isopeptidase/ deubiquitinase in the ubiquitin-based signaling and protein turnover pathways in eukaryotes [7]. Versions of the domain in prokaryotic cognates of the ubiquitin-modification pathway are predicted to have a similar role [8]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.47 0.71 -4.34 30 2882 2012-10-10 14:49:21 2003-04-07 12:59:11 16 61 451 19 1880 3177 141 110.70 23 23.48 CHANGED pssppVhlpslllhphlcHhp+sspt.........................clhGlLlGphps-.........................slcltssFulPhpsspcsspsht.shp...ph.chhcph...............................sc.EplVGWYHopPsh.s..ossDlpspp.apphtss .........................................................................pVhlpshslhphlc.....H..s.p.p....t..sth..............................cVhG.hL.lGph.s.sc....................................................slclpss..F....s.........h....P...p...........p.....ss..p...........s.s.....s.p.h...s.thp................phhchhcps..........................................sc...pllGW.Y....H....opP......sh...s....os........Dl.pspt.hpth...p.......................................... 0 655 1015 1500 +596 PF01853 MOZ_SAS MOZ/SAS family Bateman A anon Pfam-B_3994 (Release 4.3) Family This region of these proteins has been suggested to be homologous to acetyltransferases [1]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.31 0.71 -5.06 15 1290 2012-10-02 22:59:21 2003-04-07 12:59:11 13 26 337 14 784 1296 19 175.50 49 25.71 CHANGED h++PPGsEIYRcssISlFEVDG+cp.................plYCQNLCLLAKLFLDHKTLYYDV-PFLFYlLTEpDcpG.........sHlVGYFSKEKcSspsYNlACILTLPsYQR+GYG+hLI-FSYELS++EuplGoPEKPLSDLGhLSYRoYWspsllclLhchcs......plTIc-lSphTulsppDIlsTLppLs.hl+YhKspalIsls ..............................................................pHPPGs.EIY......R.......c.............s............s.......l..SlF...E.VDGc..cp.......................+..h..YCQNLCLLAKLFLDHKTLY..YD.......V.-P.....FLFYlhT..c...h...D..p...p...G.................................tH..lVGYFS.K..........E....Kp....S.........p.s.........a.NVuC...ILTLP...........Y.....Q.R+..G..........YG+h.LIcF..........S................Y..L....S...+....h...E.....s..............p..h..........G.oPE...........KPLSDLGhl..S.Y+uYWppsllch.Lh......p..h.p..s................................pl.o.....I.p....clSphTuhp.pDllpTLptLp.hlphhpsp.....h...................................................... 0 270 410 629 +597 PF04117 Mpv17_PMP22 Mpv17 / PMP22 family Wood V, Finn RD, anon Pfam-B_8493 (release 7.3); Family The 22-kDa peroxisomal membrane protein (PMP22) is a major component of peroxisomal membranes. PMP22 seems to be involved in pore forming activity and may contribute to the unspecific permeability of the organelle membrane. PMP22 is synthesised on free cytosolic ribosomes and then directed to the peroxisome membrane by specific targeting information [1]. Mpv17 is a closely related peroxisomal protein. In mouse, the Mpv17 protein is involved in the development of early-onset glomerulosclerosis [2]. More recently a homolog of Mpv17 in S. cerevisiae has been been found to be an integral membrane protein of the inner mitochondrial membrane where it has been proposed to have a role in ethanol metabolism and tolerance during heat-shock [3]. Defects in MPV17 is associated with mitochondrial DNA depletion syndrome (MDDS) and Navajo neurohepatopathy (NNH) [4][5]. MDDS is a clinically heterogeneous group of disorders characterised by a reduction in mitochondrial DNA (mtDNA) copy number. Primary mtDNA depletion is inherited as an autosomal recessive trait and may affect single organs, typically muscle or liver, or multiple tissues. Individuals with the hepatocerebral form of mitochondrial DNA depletion syndrome have early progressive liver failure and neurologic abnormalities, hypoglycemia, and increased lactate in body fluids. NNH is an autosomal recessive disease that is prevalent among Navajo children in the South Western states of America. The major clinical features are hepatopathy, peripheral neuropathy, corneal anesthesia and scarring, acral mutilation, cerebral leukoencephalopathy, failure to thrive, and recurrent metabolic acidosis with intercurrent infections. Infantile, childhood, and classic forms of NNH have been described. Mitochondrial DNA depletion was detected in the livers of patients, suggesting a primary defect in mtDNA maintenance [5]. 20.30 20.30 20.40 21.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.11 0.72 -4.34 93 1477 2009-01-15 18:05:59 2003-04-07 12:59:11 7 21 327 0 1058 1437 41 66.50 24 28.41 CHANGED hu.hl....-ups..hppshpclccpahssht.....ssahlWPhsQhlNFthl.PsphRllasshlulh.WssaLShhppp ................................s.hh....pups..h.pphhpcl+ppa.hsshh...............ssa...hl...W...PhsphlNFt.hl...P.h.p...h.Rllasshl....ulh.WssaLShhtt........... 1 361 595 889 +598 PF03587 EMG1 Mra1; Nep1; EMG1/NEP1 methyltransferase Finn RD, Mistry J, Wood V anon Pfam-B_3290 (release 7.0) Family Members of this family are essential for 40S ribosomal biogenesis. The structure of EMG1 has revealed that it is a novel member of the superfamily of alpha/beta knot fold methyltransferases [2]. 28.20 28.20 28.20 36.20 27.80 28.10 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.25 0.71 -5.03 60 459 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 385 15 316 463 33 199.60 41 81.56 CHANGED llLtcusLEhl.hp..................csphtLLssscHtshh..c+h...tc...........................s.ccuRPDIlHpsLLtlhDSPLN+.tGhLp.lYIH..TtcshlIpVsPpsRlPRsapRFtGLMtpLLc....+hplhss..utptLlclhc.sslsc.hlss...ssphlh..lSpcGphs...............................................phpchsp...........................................................................................................................pshs.hhlGuhs+Gc.thp.thsp....cp......hSIu.shsLoAtsssu+lssuhE ...................................................lVLppAsLEsh+h.....................t.p.pa.pLLNsDcHtshh...+Kh...s+............................-.ucsRPDIsHQsLLsLLDSPLN+..AG.hLQ.VYI+..Tt+sVLIEVsPpsRIPRTFcRFsGLM.............VQLL+....+hs..I+us.s.u.p.c+LL+V.IK.NPlo.-.aLPs....ss+Kls..lShs.u..thl...............................cs.p.-.hlp.........................................................................................................................tpcsls.lhlGAhA+Gc..hs.scas-.....cp........luIS.sYsLSAuhsCu+hspuhE................................................................................. 0 115 179 261 +599 PF03022 MRJP Major royal jelly protein Griffiths-Jones SR anon Pfam-B_1099 (release 6.4) Family Royal jelly is the food of queen bee larvae, and is responsible for the high reproductive ability of the queen. Major royal jelly proteins make up around 90% of larval jelly proteins. This family also the sequence-related yellow protein of drosophila which controls pigmentation of the adult cuticle and larval mouth parts. 19.80 19.80 19.80 20.10 19.60 19.70 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.62 0.70 -5.20 20 978 2012-10-05 17:30:42 2003-04-07 12:59:11 11 6 294 8 455 1055 14 247.90 22 65.31 CHANGED sSsa+lslDcCsRLWVLDoGllshsps......hCsPpllsaDLpTcc..ll+phclPpslsp....ssotlsslsVDhhcs....sCttsasYluDttuc...uLIVYshsscpuWRlpsp..thpsDPthsphslsGpsFphpDG......laGhuLushp.s....LYFpPLuSpp.asVsschLpspshtpss....phpphpslGs+u..oQusucuhsp.sGlLFauhlspsuluCWspppshsppNhshls..psscsLpassslKIpc.............pphlalLS....s+hpphhtsclshschNFRIhtsslsclhcsohC ...................................................................................................................ss.phhhD.t.ps+LWllD.sG..h...h.......sh....t..............hs..sspllsh.DLp.o.s.p...........ll+ph.hstshhh.....soh.hssl...s.lDht.s........s.........tsasYlsDs...s...s...............ullVashtssp..u..aR..l...pp.......hhss..s....h......s..s.....h.s.....l.t.........s..........h....p...h....sG..........................h.u.hshs..s...................t...........s........phLYapshuopp.atls.TphLp........stsh..sp...pt..........tthp...l.G.p.+s....sp.ss..u.hhhDt...sGslahs.hppsulhphs..s...p.t........tp.h.th.lh...pssp....h.assshthst..........................pt.lahhs........sph................t...................................................................................................................................................................................... 0 144 221 388 +600 PF04707 PRELI MSF1; PRELI-like family Mifsud W anon Pfam-B_5792 (release 7.5) Family This family includes a conserved region found in the PRELI protein and yeast YLR168C gene MSF1 product. The function of this protein is unknown, though it is thought to be involved in intra-mitochondrial protein sorting. This region is also found in a number of other eukaryotic proteins. 20.40 20.40 21.70 20.40 20.20 19.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.85 0.71 -4.66 16 922 2009-01-15 18:05:59 2003-04-07 12:59:11 9 17 286 0 571 815 0 145.80 29 50.50 CHANGED -pVssAahp+YPN.PhsspllusDVlcRc.lss.pGp.LhT.cRllphph.....shPcalc+..llGs..sp..sahhEpSslD.pc+ohslcopNloasshlpVcEpspYpsHs-Ns.shThhcQcApls..lhs.hhuhsstlEchuhppaspN.ssKG+cuhEallpp.lptp ..............-.VhtAhhp+.Y....P....N....Ph..s........spVlusDl..lpRp.lss...sGt.....Lto.cRllp.pp............thP....phlcp................l.huh.............sp..s.alhEcShl....Dsp.......p+....shp.hpo......pN..l.oa.ss.....h...l......p......VpEpssYp...s....ps.....c....s......s...s.....hTh.....hp.Q...pA.ls.......hh.u...h...hs.h.p.ptlEphhhppappN...spKGppshEhhlpp.....p................................................. 1 155 247 414 +601 PF00635 Motile_Sperm MSP_domain; MSP (Major sperm protein) domain Bateman A, Griffiths-Jones SR anon Prosite Domain Major sperm proteins are involved in sperm motility. These proteins oligomerise to form filaments. This family contains many other proteins. 22.80 22.80 22.80 22.80 22.60 22.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.16 0.72 -4.41 63 1788 2009-01-15 18:05:59 2003-04-07 12:59:11 21 69 335 35 1207 1567 9 102.30 22 37.32 CHANGED hlplpP.sptlhhss.h.........ssspl..plpNsosp.pluFKlKoTssppYpVcPshGllpPspshplpl..hhps.p...ss..p.......pDKhhlphshs.tstssstpshpphhppsps ..................................h.l.lpP.t.t.lh.Ftt.h.........tp...hpptl..pl..p.....N.......s....o..sp...t.lsF..K........l........K..TT...sP..c....c..Y..p.V....c....Ps.s.G.l.l...........cP.tps.s.p.l.s...........l.........hh.p.s.t.....st.p................pD+Fhlpss.s..tsts.t......ht........s.............................................................. 0 442 671 1055 +602 PF03820 Mtc Tricarboxylate carrier TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 19.10 19.10 19.50 19.40 18.30 17.70 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -11.91 0.70 -5.77 9 716 2009-09-11 12:47:20 2003-04-07 12:59:11 12 12 283 0 439 638 7 268.20 38 91.85 CHANGED aD.STahGRs+HFhphTsPhsLlsopppLpcu+pIVpcY+pGphsssh.Ts--lW+AKplhDSsaHPDTGEphhhhGRMSuQVPhNhllTGGML..shYpsssuVVFWQWhNQSFNAlVNYoNRSuss.hospQLhsuYsuATouAhssALGLNphl...KphsP....LluRLVPFAAVusANslNIPhMRppElp...pGIsVhD-sGpplGpSptAAhhuIupVslSRlhMAhPsMslsPllhppLpKpsahpppPthts....PlQslLsGhsLhhusPluCAlFPQ+ssIpls...pLEPclppplc+..pssPscpVYaNKGL ........................................................aD.sTahGRhpHahplsDP............psl...hho...p...ppLcpA+pl.l..ppa.....+.......t........G........h......s......h......pppp...LWpA+hlh-SshH.P....DTGEhhhh..hRMSs.........lPhs.h.h.ls....u.s...ML..t.....h......................ps....ss.usl.FW.QWhNQSaNAhVNYsN.....Rsu...s...s....P.....h....o..........spp.......l.htu...Y..hsAss.....u.u...susA.....l.G...L....s.ths..........+phsP..........l..lsRhV...P..F.sAV...AuAsslNlshMRtp..Elp................pGIs.V.................h...................D.........c......s...G...........p..............p..............l....G...p.SppA.A.thulspsslSR.lhhshPsM..hl.sPllhph.L...p.+.....t.t.....h...h....p..p...h...s...h.h.t.h...................P.l..phh...ls.....hhs..hh.....hshPhuhu..l..F..PQ..p.......splpss.............pL...E...s..-hptth.t.t.............t.hlhaN+Gl.................................... 1 137 197 330 +603 PF00249 Myb_DNA-binding myb_DNA-binding; Myb-like DNA-binding domain Bateman A, Finn RD anon Prosite Domain This family contains the DNA binding domains from Myb proteins, as well as the SANT domain family [1]. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.23 0.72 -3.90 194 12500 2012-10-04 14:01:11 2003-04-07 12:59:11 26 179 730 34 6476 17506 196 47.00 26 14.31 CHANGED +ssWTt-E-phlhphlpphGsp....................WptIupphs.........sRospps+p+Wpshh ............................................+ttWTt-.Ecphll.p..h..l.....p..p...hGpp...........................................................Wp.pI.u...p..p.hs...................sRT.s..pps+p+.apph................................ 0 1753 4075 5391 +604 PF02736 Myosin_N Myosin N-terminal SH3-like domain Bateman A anon Pfam-B_110 (Release 5.5) Domain This domain has an SH3-like fold. It is found at the N-terminus of many but not all myosins. The function of this domain is unknown. 20.70 17.00 20.70 17.30 20.60 16.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.82 0.72 -4.28 41 1520 2009-09-14 13:17:30 2003-04-07 12:59:11 14 67 230 150 754 1364 7 42.00 35 2.67 CHANGED KptsWV.sDscE..ualpucIp..sppG-pVoVpsts...Gc..pholKcDc ..........KptVWV.sDs+c..ualcu.plp...pc..cGsc.Vs.Vcsts...G+............plsVccDp.................. 0 141 253 484 +605 PF00784 MyTH4 MyTH4 domain SMART anon Alignment kindly provided by SMART Family Domain in myosin and kinesin tails, present twice in myosin-VIIa, and also present in 3 other myosins. 19.20 19.20 20.20 19.20 18.90 18.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.18 0.71 -4.15 11 1375 2009-09-11 15:02:25 2003-04-07 12:59:11 12 160 222 5 806 1210 7 107.20 31 9.90 CHANGED ppsh-llppIlppulpp.sLRDElYCQlhKQhscNPs..ppStt+GWcLlhhssusFsPScplhtaLp.pFlpsp.t.......htshAhhs.cpL++ohpsG..sR+hsPuhhElpAhp .................................p...thhp.ll..phsh.pp....t.L...R..DElYsQlhKQho........p.N.ss.................ppS..t..tR..GWpLLsl.ssu.hFs.P.S.p.p..hht..........aLp.pFlppp..h...t......................................................................t....t....h........Ahh.C..c.pLp...+.o.hp..pG........sR..p.......sPohhElpAh.................................................... 1 262 339 583 +606 PF03485 Arg_tRNA_synt_N N-Arg; Arginyl tRNA synthetase N terminal domain Bateman A anon Bateman A Domain This domain is found at the amino terminus of Arginyl tRNA synthetase, also called additional domain 1 (Add-1). It is about 140 residues long and it has been suggested that this domain will be involved in tRNA recognition [1]. 21.30 21.30 21.30 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.78 0.72 -3.44 174 5094 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 4796 10 1267 3829 1849 86.10 28 15.38 CHANGED pplpptlppulpph................th...hlc.h..s.....pp.paGDausNhAhtLAKth+p.......sPpplApplsp...plpts....th......lp..cl-luGP....GFINhhL .....................................hpphltpslpph................hs.tts.ph..hlc..p....s.......cpspaG.D.....ausNhA..hsLAKhh+p.............sP+plApplsp......pLsts.................sh.......hp.....c.....lEl.AGP...G.FINhhL.................. 0 428 821 1065 +607 PF01699 Na_Ca_ex Na_Ca_Ex; Sodium/calcium exchanger protein Bashton M, Bateman A anon Pfam-B_1680 (release 4.1) Family This is a family of sodium/calcium exchanger integral membrane proteins. This family covers the integral membrane regions of the proteins. Sodium/calcium exchangers regulate intracellular Ca2+ concentrations in many cells; cardiac myocytes, epithelial cells, neurons retinal rod photoreceptors and smooth muscle cells [2]. Ca2+ is moved into or out of the cytosol depending on Na+ concentration [2]. In humans and rats there are 3 isoforms; NCX1 NCX2 and NCX3 [1] see Swiss:Q01728, Swiss:P48768 and Swiss:P70549 respectively. 24.90 24.90 25.00 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.57 0.71 -4.58 111 11392 2009-01-15 18:05:59 2003-04-07 12:59:11 19 59 3066 4 4750 9872 2859 136.70 20 50.60 CHANGED lhlhhhuchlssshs.slu.p..................hhs....lsstllGlsllAhusuhPEhhsulhushp...............s.......psslulusllGSslhslhlslGlsslhssh...................................htstshthsh.shhhhshhhlhhhhh......................ttplshhpullhlhhYhhalhhh .................................................h.hlhhhuph.hstshp.tlu.p...................................hhs..............l.sphlh.....Glsll..Ah..G.oS...hP........E.l.h....s.......u.lh..u.s.hp..................................................s..............psclu...lus.......ll....GS....s.l.h......N...l...h.l....l...l.u..ls..s.llsshh....................................................hps.t.s..h..p.h..s...h......sh..h.h..h.s..h.l.hh..h.lhhh.............................................tst.p...l...s.h....h....p....G..h.l...h.l.h..hahhalhh.h......................................................................... 0 1381 2625 3903 +608 PF00999 Na_H_Exchanger Sodium/hydrogen exchanger family Finn RD, Bateman A anon Pfam-B_312 (release 3.0) Family Na/H antiporters are key transporters in maintaining the pH of actively metabolising cells. The molecular mechanisms of antiport are unclear. These antiporters contain 10-12 transmembrane regions (M) at the amino-terminus and a large cytoplasmic region at the carboxyl terminus.\ The transmembrane regions M3-M12 share identity with other members of the family. The M6 and M7 regions are highly conserved. Thus, this is thought to be the region that is involved in the transport of sodium and hydrogen ions. The cytoplasmic region has little similarity throughout the family. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.36 0.70 -5.93 80 17250 2012-10-02 17:06:44 2003-04-07 12:59:11 16 72 4700 5 5319 13718 2497 371.20 18 66.67 CHANGED hllllh.uhlssh..h.hp+lp...lPs........lluhllsGlllGshshs...........h...............................hhphluplGlshllFhsGlchshpp.lppshts..............slhhulluhlhshhh............Ghhht.h...........h...hshhtulhhGshl.osoussllhtll......-pttlspphuplllutullsDhssllllslhhshh.........shhhshh...............hhhhhhhhhhhshlhhthlhshlhphhpch.......tstplhhhhslhhshhs..shhuchl.GlssllGuahsGlslu...........ptthppp..lppp....lpsht....shhhslFFlhlGlpls.................h...........hthhhhllhhllhlhl.s+hlsshhhtthh...............t.hshppsh...hlu.hshh.+Gthslslsthuhpt......tlhsp..............ph.hshhlhssllosl.....lssh.......lht ........................................................................................................................h..hhhhh.s.h.lhsh.........l..hp+lt........lP...........h..ls...hl......l..s..G...l....l.......l......G....s...h..s..hs................h..ht.s...........................................................................hp.h.h..s...p...lu.l..s.hl...l...F...h............s.G................h.c.h.....s...h......p..p....l..h..p..h..ht...........................................hh..hhu...h.h..t.lhl.sshh.....................................huhhhhh.........................................h..hs.h.s.h.h.t..u..l.h...l.G...s.....hl...u.s......o.....s.s.......ss.l.ht....ll.......................cpt....t....h....s.p.p...h...s...p....h...lh...........utul.....h.s.........Dhsu.ll.h.....h..sh.hhshh.....................tstth.s.h.h..............................................................t.h.h.h..t.h....h.....h..h....h.h.h.....h....l....l....h..t..h...l....h...s.....h....l.....h.t..h.......hhph...............................tsp...ph.h...h...h.h..s..l.h....h..shhs........hh...............h..s........p......h.......l........s............h............S...........s.......h..l..u..s...........h...hu.G.lh..lu.....................ptth.pp..p.......hppp..........................................hp..shh..............hlh.slh.F...l.hl..Gh.tls.......................thl.....................hth.h..h.h....l.l.h....h....h.h........h.l...h..l....s+h...l...ss..h...h..h.h.hhh..................................................................t..hs.h.p..p.t.h.........hhu...hs...hs....pG....t...hu.h..s......l....s....h....h..s...tt.......................th..h.s..t.....................................ph.....h...s...h....h....h....h....s...l....l..l.ohh.h....s..h..h...hh............................................................................... 0 1593 3099 4353 +609 PF02690 Na_Pi_cotrans Na+/Pi-cotransporter Bashton M, Bateman A, Haft D anon Pfam-B_509 (release 5.5) Family This is a family of mainly mammalian type II renal Na+/Pi-cotransporters with other related sequences from lower eukaryotes and bacteria some of which are also Na+/Pi-cotransporters. In the kidney the type II renal Na+/Pi-cotransporters protein allows re-absorption of filtered Pi in the proximal tubule [1]. 28.60 28.60 29.20 28.70 28.50 28.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.63 0.71 -4.47 184 5785 2009-09-10 16:47:48 2003-04-07 12:59:11 10 11 2359 0 1241 3956 693 134.10 26 49.88 CHANGED LhLhG.hphhssGlppssG.sp..l+plLt..phTs...s........htull....sGhhsTullQSSSssol...lsluFVuuGllsltpulullhGANlGTTlTuhllu.............hcl.uthu.hhlhlGsllhhh..pppphpthGthlhGlullFhulphhppu ..............................hhhhu.hphhspuh.p...hts..sp...h.c.p.l.ht....phss.....s.............hhull.......sGhhlTsl.........lQSSoAsss..............lssuhs..u.......s.......G...............l.l.......s.......lptAlsllhGANlGTslTAhlhu..................................................hcl...u.hh...ph.h.h..hl..Gsl.l.hhh....................................h.......................................................................................... 0 429 745 997 +610 PF02445 NadA Quinolinate synthetase A protein Bateman A anon Pfam-B_1915 (release 5.4) Family Quinolinate synthetase catalyses the second step of the de novo biosynthetic pathway of pyridine nucleotide formation. In particular, quinolinate synthetase is involved in the condensation of dihydroxyacetone phosphate and iminoaspartate to form quinolinic acid [2]. This synthesis requires two enzymes, a FAD-containing "B protein" and an "A protein". 19.00 19.00 20.70 20.60 18.60 17.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.89 0.70 -5.61 144 2787 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 2689 2 795 2280 2221 307.90 41 89.62 CHANGED hpcIpcLKc-..+sAlILAH.YQpsEl.Q-lADhsG....DSLpLuctAsp.scA-sIVFCGV+FMAEoAKIL.s.P-Kp.........VLlP-h.pAGCshA-hhss-pl....pph+cpa...Ps......tsVVsYlNooAclKA....tuDlsCTSSNAlclVpp.l....sp..cc.IlFhPDcpLGpal..............................tcps.s+cl................................llWs...GhC.VHppassc.clpph+ppa.PsApllsHPEC...s.cVlchAD........hlGSTutllcasppsssp.c.alluTEhGllpcLpc..cs....Ps.Kpahshs.........s.pshCspM+hhTLcclhpsLc......shp.................sc.lpls..c-ltpcAppul-RMLcl ............t.pcIccLh+c..+sAVllAHa.YpcsEI.QplA-hsG..................DSLphA+h.u....sc....psA...ssllhsGV+FMu.ET.A.K.I.L.....o....P-...Kp......................VL...hP.....s.l.p.A.....sCSLs.usss.-ph........................ptap-ta.............P-...............tsVVsYsNTSAAVKA.....cuD.....hssTSSsAl.c.llcp....l.........sp......s...ccIlasPDcaLGpal...................................................................................pcpT..stch...................................lhWp...GtChVH-c.F.psp.plpch+.pp.a....P-.At.llVHPEs.............stsVl.phAD...........hlGSTotlIcts............p....sh...........ss...........p....c.......hIVuT-tGlha+hpp....ts....P.-...Kp..h..lt.ss.......................................................s..sshCPaMthssLpplhps...Lc.....p..tt.......................pElp.VD..ppltppAhhsLpRML-.h................................................................................................ 0 274 536 683 +611 PF03822 NAF NAF domain Griffiths-Jones SR anon PROSITE Domain \N 25.00 25.00 26.00 26.00 21.80 24.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.19 0.72 -4.26 71 764 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 57 2 323 696 0 60.30 43 14.83 CHANGED pPss..lNAF-lI.ShSpGhDLSuLFtccpct....c..sRFsSppsAsslluKlEplAc..shs.apV..+K ..................PtshNAF-lI.S.hSpGhsLSuLF-ccp.p...................pcc......sRFsS.pp...PAspIluKlE-sAc...shu.apVpK............. 0 37 177 261 +612 PF05089 NAGLU Alpha-N-acetylglucosaminidase (NAGLU) tim-barrel domain Moxon SJ, Bateman A anon Pfam-B_6295 (release 7.7) Domain Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate [1]. Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations [2]. The structure shows that the enzyme is composed of three domains. This central domain has a tim barrel fold [3]. 23.40 23.40 23.70 23.80 20.50 22.50 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.06 0.70 -5.69 30 411 2012-10-03 05:44:19 2003-04-07 12:59:11 7 31 267 5 179 405 16 304.40 38 38.80 CHANGED RYahNsCTaSYohsaWsWpcWE+cIDWMALpGINhsLAhsGQEulWpcVapchGlocpElcsahoGPAahAWpRMuNlcG.WGGPLspsWhcppttLQ++IlsRMRpLGMpPVLPuFuGaVPpshpchh.PpAplschusWsu......assshaL-P.....pDPlFscluptFlcc.pchY.G.sschYssDsFNEhsPsss...ss............................phLussupulacuhppsDPcAVWl.hQGWhF..p..hWpssthcAhLsuVP.....ps+.hlVLDLauEphP.hWpp..........ocuahGpPaIWChLpNFGGshslaGslptlsssh.pAttps..ssslhGsGhoPEGlcpNslhYELhhEhuWpps. .......................RYahNhCThuYohsaWsWpcWE+cIDWMALpGlNhsLAhsGpEslWpclhp..c.hGh.opp-l.ppahsGPAahsWtpMuNlpu..W......u..G.....P.Lsp.....sWhppphtL.Q++IlpRM+phGMpPVLPuFuGhVPpsh...............p......c...hh..P..p......splhp..s..pWss........as...ps...h....hLpP..........pD.s...hFtplu.phFhcc.pcha.G....ssphYshDsFpEhtssss......ts.................................lsphupslhpuh.pts.cscAlWl.hQuW.a........W.ps.s.hpuhLpu.V.s.........p.sc.hllLDLa..uEttP.h......app...........ppsahGpPaIaChLpNFGGshshhGphptlspt.......hpAh......tps....sssh........hGhGhs.EGlppN.lhY-Lhh-huWppt.s................................................... 0 64 108 149 +613 PF02365 NAM No apical meristem (NAM) protein Bashton M, Bateman A anon Pfam-B_530 (release 5.2) Family This is a family of no apical meristem (NAM) proteins these are plant development proteins. Mutations in NAM result in the failure to develop a shoot apical meristem in petunia embryos [2]. NAM is indicated as having a role in determining positions of meristems and primordial [2]. One member of this family NAP (NAC-like, activated by AP3/PI) is encoded by the target genes of the AP3/PI transcriptional activators and functions in the transition between growth by cell division and cell expansion in stamens and petals [1]. 21.20 21.20 21.20 21.50 20.80 21.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.25 0.71 -3.95 133 2410 2009-01-15 18:05:59 2003-04-07 12:59:11 10 26 114 15 1319 2332 1 125.10 42 37.77 CHANGED LPPGFRF+PTDEELlsaYLcpK.ltup............hsh..p................s.Is-l.D....lhch.-P.W-L..P...h.........................hh.s.......tpp..EW..Y....FFs.+-+KYs....s.Gt.........RsNR.u......................Tt.....uGY..WK.ATGp..D+tlhs.............ssth............lGhKKTLVFY....p.......G.+uPcGp....+TsWlMHE.YRL.s ..................................lPPGFR..F.+PT.DEELlhaYLpcK..ltspt..........hsh...p.................h..I.s....-l..D.....lh...+.h...-....P.W-L..P..t...................................thth....scp....EW......YF.Fs..+..-..+...K..Ys.........s...Gs........R.s..NR...u.........................................ss........uGa...W.K...A..T.Gp....D+s..lhs................ssph...................lGhK.K.sLVFY..............p...G..+....u..P...+......G..p........KTsWlMHEYRL.s.................................................................... 0 160 722 1039 +614 PF04095 NAPRTase Nicotinate phosphoribosyltransferase (NAPRTase) family Wood V, Finn RD, Bateman A anon Pfam-B_5038 (release 7.3) & Pfam-B_5422 (Release 7.5) Family Nicotinate phosphoribosyltransferase (EC:2.4.2.11) is the rate limiting enzyme that catalyses the first reaction in the NAD salvage synthesis. This family also includes Pre-B cell enhancing factor that is a cytokine Swiss:P43490. This family is related to Quinolinate phosphoribosyltransferase Pfam:PF01729. 19.90 19.90 20.10 20.00 19.80 19.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.66 0.70 -5.09 19 4673 2012-10-03 05:58:16 2003-04-07 12:59:11 11 6 3929 55 1191 3401 285 219.70 27 50.59 CHANGED aplpDFGsRtt.Shcstthssp...........sthstFhGTuslhh.............stthul.shtppshphhtscpplss...hlpphpp.........suh-hWhttht...hG.sLpDhlshcuhhtths...........hslR.DSGDPhp.................hh-clht+atsh.shcsh.....phtllauDGlshcphhplhcthcs.th...sluFGlGosLhpclsp.h.t....ssslsIshKhhpsptpPlsclS.csstKuh.................hts.....hthl+psFpsst .........................................................................................lh-FGoRRt.phcstthssp...t..........................uh.h..u.u.hsuTSNlhs......................ucta...u..lsshGTtAHsa........h.......t.....s.....a.....t.....p.h..s.....s....................h.p.s.htp.......................................tsh..sh..hhs...pa.......slt....ulssslps...st.h.t.ph...............................................hhGlRhDS.....GD.sh........................................................................hucc....stphh...-c...h....t.......sh...............................ptpllhSssL.D...pp.l..h..pLhtphsp...............................ssaGlGT.....pLhs.....s.........h.......s..........p..............................h.ss..l.................p..h..s..h......p.h....h.................................s......h.hch.o..pp.tp........................................................................p...................................................................................... 0 381 694 967 +615 PF04970 LRAT NC; Lecithin retinol acyltransferase Finn RD anon Pfam-B_3758 (release 7.0) Domain The full-length members of this family, eg Swiss:P53816, are representatives of a novel class II tumour-suppressor family, designated as H-REV107-like. This domain is the catalytic N-terminal proline-rich region of the protein. The downstream region is a putative C-terminal transmembrane domain which is found to be crucial for cellular localisation, but not necessary for the enzyme activity [1]. H-REV107-like proteins are homologous to lecithin retinol acyltransferase (LRAT), an enzyme that catalyses the transfer of the sn-1 acyl group of phosphatidylcholine to all-trans-retinol and forming a retinyl ester [2]. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.81 0.71 -4.17 32 808 2012-10-10 12:56:15 2003-04-07 12:59:11 8 10 297 3 418 739 44 120.20 26 54.98 CHANGED h.......p.GDllpl.R.......hYpHaGIYlGDspVlHhs.......Ps..............t..ssstt...........hhushtptuhVchs.sL-shstGsshhlssh........t.......tptpshss-cllpRAct.LlG....ph.sYsLlhsNCEHFVsaC+aGhsh ......................................................hh.....t.p.GDhl....h...R...........hY.pHaulY..l......G.....c....s.....h......Vl..Hhs...................................................................................hhu..hh..t...p.p.u...hVcts..pLpsh....s..t.u...s.p...h...plssh................................t.................phps...hsscpllp..R.App..hlG..........ph...p.....YsLhts....NCEHFsshs+hGh..t........................................ 0 78 152 260 +616 PF04904 NCD1 NAB conserved region 1 (NCD1) Kerrison ND anon Pfam-B_6188 (release 7.6) Family Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors [1]. This region consists of the N-terminal NAB conserved region 1, which interacts with the EGR1 inhibitory domain (R1) [1]. It may also mediate multimerisation. 25.00 25.00 40.20 39.70 21.60 20.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.57 0.72 -4.24 4 175 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 92 0 89 144 0 79.10 79 17.49 CHANGED hspPoTLuELQLY+VLQRANLLsYYDsFIppGGDDlQQLCEAsEEEFLEIMALVGMAoKPLHVRRhQKALpEWsTsPshFpp ........uLPRTLGELQLYRlLQRANLLSYY-sFIQQ...GGDDVQQLCEAGE..EEFLEIMALVGMAoKPLHVRRLQKALR-WsTNPGLFsQ..... 1 21 29 56 +617 PF04905 NCD2 NAB conserved region 2 (NCD2) Kerrison ND anon Pfam-B_6188 (release 7.6) Family Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors [1]. This family consists of NAB conserved region 2, near the C-terminus of the protein. It is necessary for transcriptional repression by the Nab proteins [1]. It is also required for transcription activation by Nab proteins at Nab-activated promoters [2]. 25.00 25.00 29.80 25.60 21.90 20.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.02 0.71 -4.45 5 176 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 91 1 93 153 0 152.80 49 30.99 CHANGED scppsSppshS.PusLGSPtSs...tlusp-ouuss....ssLssstltplsEsuERhu+oLP+uDhuEVscLLKsNKKLAKplGHIlEMS-sDP++EEEIRKYSAIYGRFDSKRR-GKpLTLHE........................LTVNEAAAQLCh+DsALLTRRDELFuLARQVuREsTYpYoh+oSRL+ ..........................................................s..tptup.shS..Pus.hGsPtus...............ttsu.........-tL-sthst.VsE.sVERhhtohP+.uDhsElppLLKhNKKLA+..plGHIFEMsDsDspKEEEIRKYSuIYGRFDSKR+-GKpLTLHE........................LTlNEAAAQlCh+Dss..LLTR.RcELFuLARQ.......luREsoYhhoh+so+............... 0 26 34 60 +618 PF03096 Ndr Ndr family Mifsud W anon Pfam-B_2481 (release 6.4) Family This family consists of proteins from different gene families: Ndr1/RTP/Drg1, Ndr2, and Ndr3. Their similarity was previously noted [1]. The precise molecular and cellular function of members of this family is still unknown. Yet, they are known to be involved in cellular differentiation events. The Ndr1 group was the first to be discovered. Their expression is repressed by the proto-oncogenes N-myc and c-myc, and in line with this observation, Ndr1 protein expression is down-regulated in neoplastic cells, and is reactivated when differentiation is induced by chemicals such as retinoic acid. Ndr2 and Ndr3 expression is not under the control of N-myc or c-myc. Ndr1 expression is also activated by several chemicals: tunicamycin and homocysteine induce Ndr1 in human umbilical endothelial cells; nickel induces Ndr1 in several cell types. Members of this family are found in wide variety of multicellular eukaryotes, including an Ndr1 type protein in Helianthus annuus (sunflower), known as Sf21 Swiss:O23969. Interestingly, the highest scoring matches in the noise are all alpha/beta hydrolases Pfam:PF00561, suggesting that this family may have an enzymatic function (Bateman A pers. obs.). 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.65 0.70 -5.93 10 725 2012-10-03 11:45:05 2003-04-07 12:59:11 9 5 129 8 272 2897 1291 225.90 42 76.06 CHANGED EHplcTsaGulpVoVpGs.ct..p+PsllTYHDlGLNHcSCFpsLFsp.EsMsEltc+FslhHVssPGpEpGAsshspsa.aPSl-sLA-plssVLsaFplcoVIGhGsGAGAYILsRFAlpaP-RVpGLVLlNssssAsGWh-WstsKlss..h...GhosslhDhllsHhFu+Etpps....ss-...lVppYRphlscshN.sNLphalpAassR+DLshctssht..........CsslLlVGDpSPah-sVlcsso+LDsppoolLKlscsGGhVpt-QPsKlscuhchFLpGhGahs ...............................................................................p.lcTsaG...lp.V.slh..Gs.c............+PsllTYH.DlG.LN.apoCFp.shF....p.........h..-sM.p.-....lh.pp...F...slhHV....-...A...P..G..p.....p.....G....A.s..s....h..P.....s......a........a...P...o.h..-p..L..A-..l.ssV.Lpa.h...s...h...p...s..l...I.G.h.Gl.GA.G..A.Yl.....L.s.+.F..A...........l..t.a....P.c....h....V.GL..lL.......l.......N............h.....s.......s.........s.....u........t........u......W........h...-......W.........h.............h.....p.........K....l.............................................................G......h...s.....p..s....l........-...h..l........h....t....H....h......F.u...p.p...htt..............p.-.......ll..p..t...a...R...t.....l...t..p....t.......s................N..l....t..h..a....h...p.....u...a..........p........p.....R........p....D....L..p.....p..h...h.h......................s..shll..s...G..s...p....u...P...h.......p..s..s...............l.....c....h....s..s..+...L......s...........p...p.........o......s.h.....l.c..h.t.c.....s.su.....s...pQPtp...lspuhchFlp.GhGhh...................................................................................................................................................................... 0 74 119 189 +619 PF03102 NeuB NeuB family Mifsud W anon Pfam-B_2572 (release 6.4) Family NeuB is the prokaryotic N-acetylneuraminic acid (Neu5Ac) synthase. It catalyses the direct formation of Neu5Ac (the most common sialic acid) by condensation of phosphoenolpyruvate (PEP) and N-acetylmannosamine (ManNAc). This reaction has only been observed in prokaryotes; eukaryotes synthesise the 9-phosphate form, Neu5Ac-9-P, and utilise ManNAc-6-P instead of ManNAc. Such eukaryotic enzymes are not present in this family [1]. This family also contains SpsE spore coat polysaccharide biosynthesis proteins. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -5.23 158 1576 2012-10-03 05:58:16 2003-04-07 12:59:11 9 19 1121 8 447 2406 3908 236.80 36 67.28 CHANGED lI-sAtc.AGADAlKFQTaps-slsspt..sppsp.....athpp....hssp.o.h-hhcchc.hsh-.appLhcascptGItahSTPFDhpul-hL.pplssstaKIuSuEloshPLlctlAp...psKP.....lIlSTGMuolsEIcpAlssl...........................tpssspplsLLHCsosYPsPh--...lNLpslpsLpptF...s..l.l.GhSDHTh...GhtsslAAlALGAsl..........IEKHFTLD+sh....tGPDHthS..L-PpEhcphVpslRplcpAL.Gsshc ................................................................hlcsAtc.uGADulKaQTaps-shhspt.......s.ps...........a.hpss.......hssp..ohh-h.hcch..c.hsh-.atpL..hcas.c.chGlhhhSoPF.Dhpul-hL..pph......s..sshaKIuSsE...ls.....shP.hlctlAp.....psKP..........lIlSTG.M.u.o.h.p.EI.cpAlphh.....................................pp.t.t..s.psls.l....L....H.......C..s....osY.P.s.s...h..-.-........s.N......LpsltsLpc....tF...s........l.........l........Gh..S....D..Hoh.....G.....h.......t.....s.....sl.....u.....A.....V.A.LG...Ap..l...........I..EKHFTLD+sh....tG.sDp.thS.l-PpEhcphlp.slRpsppAL.Gss........................................................................................................ 0 154 297 373 +620 PF02931 Neur_chan_LBD Neurotransmitter-gated ion-channel ligand binding domain Bateman A, Sonnhammer ELL anon Prosite Family This family is the extracellular ligand binding domain of these ion channels [1]. This domain forms a pentameric arrangement in the known structure. 25.80 25.80 25.90 25.80 25.60 25.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.64 0.70 -5.19 92 6005 2009-12-16 13:50:00 2003-04-07 12:59:11 18 90 267 651 3245 5129 71 186.70 25 43.27 CHANGED pp+LlccLhps....YsptlRPs.....pssp.....s.lsVplsltlpplhslsEhsp.hosslalpppWpD.+LpWsstp....asslpplplssc......plWhPDlhLhN.....sssspapso.hpsplhlp.........sGpVhahsss.hhcusC.lclphFPFDpQsCslpFuS............asYss.......pclslphtp............................pphclssFh.supWslhph...shpttp.hs.hsp.........l.aplhl+R+s ......................................................................hhppLhtt......Y.s...t...h...hRPs......................st.........................sh...V....phsl.........lt..p..lhs..l................s.-hs........................hsh..............s......l.ahp...............p....p..............W.......p..........D.......+...Lp..a.s............p................s.s.h...p.t..l...p...l..ssp......................plWhP..Dhhhh..N............p..........t...........s.........t....a.....p....h.....s.............p...p.............h....lhl..h............................sG..p....l.....h......a.....s..............................h.....hh..p...............u.sC....h.c.........l.ph..............FPh.D..t...Q..s......CslphtS........................................a.s..Y.st............pc...lth.h.htp.....................................................pt.h..pl.s..p..a.h......p.......sp.hth....ht....................h...........hs..thst...............h..thhlcRp.............................................................................................. 0 1010 1272 2385 +621 PF01436 NHL NHL repeat Bateman A anon [1] Repeat The NHL (NCL-1, HT2A and LIN-41) repeat is found in multiple tandem copies. It is about 40 residues long and resembles the WD repeat Pfam:PF00400. The repeats have a catalytic activity in Swiss:P10731, proteolysis has shown that the Peptidyl-alpha-hydroxyglycine alpha-amidating lyase (PAL) activity is localised to the repeats [2]. Swiss:Q13049 interacts with the activation domain of Tat. This interaction is me diated by the NHL repeats [3]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.93 0.73 -7.24 0.73 -3.73 108 7067 2012-10-05 17:30:42 2003-04-07 12:59:11 16 402 688 32 4354 6356 1348 27.90 34 11.21 CHANGED hstPpulsls..sssplhVsD..ppspplhha ........hptPpGlAls...ssGp..laVuD........psNpRlphh.............. 0 2548 3068 3732 +622 PF03031 NIF NLI interacting factor-like phosphatase Griffiths-Jones SR anon Pfam-B_1405 (release 6.4) Family This family contains a number of NLI interacting factor isoforms (eg. Swiss:Q9PTJ8) and also an N-terminal regions of RNA polymerase II CTC phosphatase (Swiss:Q9Y5BO) and FCP1 serine phosphatase (Swiss:Q9PT70). This region has been identified as the minimal phosphatase domain [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.36 0.71 -4.57 120 2877 2012-10-03 04:19:28 2003-04-07 12:59:11 13 63 441 30 1934 3360 347 157.00 26 34.52 CHANGED hsLVLDLDET...LlHsp.t...................hh..............................hphhlth......RPhlccF...Lp.pl.uph..aElllaTuut..ppYAptllchlD.......s.....pphh...pppl....aR........c...pCh.......hps..h.l..K...DLshl.....................sR.s..........LspllllDssspsahhp.spNul.Ips......ahss.......ps.Dp.........pLhp...LhshL.ctlt..ph.pDVR ................................................................................................sLVLDLDcT..LlHsphp...........................................................................................hth.hlhh.............R..Pt..l..c...pF....Lp..ph...uph..........aE.ll..l.aTu.u.h..............p.t...Y.Ap.......slh.c.h.L.D.....................................s..........pphh.........ptp.l.............hR...................................................c...psh...............................hpu...ph..h.........K...................-L.s.hl.....................................................................sp..s.............h..sps.ll...lDss.s.t.s.ah.hp....s......p...N..u.l..lps.................ahss........................p.Dp.........tLhp......lh.hl.p.l................................................................................................ 0 769 1211 1633 +623 PF04923 Ninjurin Ninjurin Finn RD anon Pfam-B_5824 (release 7.6) Family Ninjurin (nerve injury-induced protein) is involved in nerve regeneration and in the formation and function in some tissues [1]. 20.70 20.70 20.80 20.70 20.20 20.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.29 0.72 -4.30 50 257 2009-11-15 17:29:03 2003-04-07 12:59:11 7 5 73 0 176 264 1 98.90 39 59.48 CHANGED shNpYss+KolApuhhDlALLoA..NAsQ.....L+hllphspp...tthYhhh.lsLlslSllLQlllullllhhup...hslppt..................................ppppp.hpphNshsshh.............................lhllsllNllIouFs ............shNpYusKKolApuMlDlALLhA..NAsQ...........L+hllp.Gsp...htaYhsh.lsLIolSllLQlhlGlLLlhlup......hslpp...................................tcpt+.hshlNNhsshhlFllsllNlhIouFs........................................ 0 55 71 132 +624 PF02613 Nitrate_red_del Nitrate reductase delta subunit Bashton M, Bateman A anon COG2180 Family This family is the delta subunit of the nitrate reductase enzyme, The delta subunit is not part of the nitrate reductase enzyme but is most likely needed for assembly of the multi-subunit enzyme complex [1]. In the absence of the delta subunit the core alpha beta enzyme complex is unstable [1]. The delta subunit is essential for enzyme activity in vivo and in vitro [1]. The nitrate reductase enzyme, EC:1.7.99.4 catalyse the conversion of nitrite to nitrate via the reduction of an acceptor.\ \ The nitrate reductase enzyme is composed of three subunits [1].\ Nitrate is the most widely used alternative electron acceptor after oxygen [1]. This family also now contains the family TorD, a family of cytoplasmic chaperone proteins; like many prokaryotic molybdoenzymes, the TMAO reductase (TorA) of Escherichia coli requires the insertion of a bis(molybdopterin guanine dinucleotide) molybdenum (bis(MGD)Mo) cofactor in its catalytic site to be active and translocated to the periplasm. The TorD chaperone increases apoTorA activation up to four-fold, allowing maturation of most of the apoprotein. Therefore TorD is involved in the first step of TorA maturation to make it competent to receive the cofactor [2]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.73 0.71 -4.34 270 4923 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 2034 17 709 2437 138 149.30 21 69.62 CHANGED shhshht.h.shst..........................tpt....ltthhp..thtppshtplpt-YspLF........hsPasShYLs.......sc.......hhGpshhclpphhpptGlphss....pEssDHlulhLEhhuhL..................sphttthphlpp+Lt......sWhsth..htpl ..............................................................................hhh.........t..s.t............................................tttLt.shhp.......phts..t.sh..h....p..lp....scaspLF.............................t.s.hsS..halp...........................pc........hG.p...s....h...h.....cl+t.....hhp.ptGl.ph..s..s..........pE.........s....DHlslhL-hhuhL..t......................tht..................pphps.htphlstpLh...................sWsshF...h...h.................................................... 0 194 410 579 +625 PF00877 NLPC_P60 NlpC/P60 family Bateman A anon Pfam-B_292 (release 3.0) & Pfam-B_9022 (Release 8.0) Family The function of this domain is unknown. It is found in several lipoproteins. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.67 0.72 -4.22 28 11783 2012-10-10 12:56:15 2003-04-07 12:59:11 14 163 3617 17 2263 9076 1767 106.80 30 35.28 CHANGED Gp.PYtaGGsssp.............GFDCSGhsphsatph.GlpLPRsuspp.................hthupp..lstsphpsGDLlFFps....tssssHVGlYl................Gssphl+us..usslp..........hsslpssaWppphhtstR ......................................................................GpPY.h.aG....Gs.s.sp...................................GhDCSGhs......p...h.s........ap.......p....t.....G......l...p.....L...P..R.s.stpQ......................................................h.p.h..Gp...t.............l...s....h....s....p....h...psG....D...L.lF.Fps..................ttsssHVGIY..l........................G..s.s...p.h...l..Hus.......upslp.............hs.s..h..t..p.s.h.att.h............................................................................ 0 699 1509 1928 +626 PF04981 NMD3 NMD3 family Bateman A anon COG1499 Family The NMD3 protein is involved in nonsense mediated mRNA decay. This amino terminal region contains four conserved CXXC motifs that could be metal binding. NMD3 is involved in export of the 60S ribosomal subunit is mediated by the adapter protein Nmd3p in a Crm1p-dependent pathway [1]. 22.80 22.80 22.80 23.10 22.20 22.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.82 0.70 -5.25 73 560 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 451 0 390 543 91 215.30 34 49.77 CHANGED CspCGt...shcss.s................s...hCscChhpph.clsc.lscph...plthCppCsphhps.spWhps..tuc-lhslh.p.l.cs.l..........pcsh..hh-sphlhsEs...........pupclcl+lslps..plhs...s.lppshtlphhlptptCscCp+ht..sshacAlVQlRpp..........h.pchshhhl-phhh+tsttt.lsplpctc-.GlDhahuspstAp+hschlpsth.ssphpcotcLlup-tp.upp.Y+hTauV+l ............................CspCGs...sh..s.s.s..su...................s...hChsCl+ppl..DIoc..sls+p.s...........slphC+pCp+a......hps....sp..........Wlpst....ES+ELLslCL++l.csL...........scs+......llDAtFlWTEP..............................HS+Rl+l+lolpt.........Elhs....ssllpQsh..V-ahlptp.CscCp+h........stshWcAsVQlRQ+...........s.cK+ThhaLEQLll.K.tstpppslpIp.c.hc-.GlDFaaus+...spAp+hl-Flpshh..Ps+h.pp.SpcLlSpDhcssphsY+hTaSVcl..................... 0 138 235 327 +627 PF01234 NNMT_PNMT_TEMT NNMT/PNMT/TEMT family Finn RD, Bateman A anon Prosite Family \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.75 0.70 -5.42 7 264 2012-10-10 17:06:42 2003-04-07 12:59:11 12 4 72 91 177 297 5 229.70 31 93.18 CHANGED h-u..shhuspsahp+FsPcsYLpsaYph.Stsss..tp..llhahL.s....lhpphs.sthts-sLlDIGuGPTlYphLuhp-shc-IhloDasspNhpELt+WlccE.usaDWosslpahsplEG.....stsphp-hEpKhRttV+p..VLcsDVppsssl..su........s.lP..sDsVlohhslEsuCssLssYppAl+shsuLL+PGGaLlhhssLctohY.hGt+c.Fosl.LpcEhl.cAlhcuGhplpphp.....t..ttshhstcGlhhlsA+K ............................................................t..........t.h..pcFpPpsYLpp.Yt.....spt.........tt...............hh...a.hLpp....lhphF..s....s...tl....p.G.c..pL..lDlGuGP.TlY.p.lLSAschFc-IhhoDas-pNhpELp+W.L.............cc-..suua..D..Woshhpa.l.s.p.lEG......ptpp.hpE.+cccLR.ptlKp.....ll.sDV.ppspPl..ss............lP.....sDsl.l....oshCLEsss.s..hs.s..YppALcpl......ssLL+PGGaLlh...hs.s.....Lp.t...o........hY..hs...Gt.......p.c...asslslsc-.lcpAlt..cu..Ga.pl.php...........p.....t.sh.s.pu.hhhhhupK................................................................... 0 60 72 135 +628 PF04147 Nop14 Nop14-like family Wood V, Finn RD anon Pfam-B_8521 (release 7.3); Family Emg1 and Nop14 are novel proteins whose interaction is required for the maturation of the 18S rRNA and for 40S ribosome production [1]. 34.90 34.90 44.20 35.60 34.40 34.30 hmmbuild -o /dev/null HMM SEED 840 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.42 0.70 -6.62 37 404 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 278 0 286 416 7 609.40 24 92.83 CHANGED s+tp+ppsltt..........IccphNPF-h+ss..+sKa-l......hG+p.scssp...ucPGlo+uhup-pR+pTLhtEhpp+sKsGuhlD+RFGEp.DsshosEEKhhpRFs+E+Q+p...+K...cshFNLp-DD-p...................LTHhGpoLu..........pD-.p-tcht...t..................pt.chtt........pp....tpspPp..R+KSKpEVMcElIAKSKhaKtERQpt+-csp-hp-cLDcs..hp-lhshL..psspp..t..........................ht...........spcph.c-YDptl+pLs..a-+RupPsDRTKT-EEhAcEEtc+L+cLEp-RL+RMpG...tsps-cppcpt......................ts-Dl-Ds.........................tahhs.s.pppspcp.........................................................uhps---tppD-D.p....t.pp.p.........pt..ptts-.Esptppcpt.......................................tspspsplsaTassPpoa--hhphlp..shshp-hssllpRItts..a+PpLttsNKpKLspFhul....Llp+lhaLusps.............shpllpslhphl+sLucp.....................aPpshupshRshlcch.pphct....slps.........s-LlhlslluslFsTSD+aH.VlTPuhlhluchLupsthp...olpDlupGhalsslhLpYpphSKRalPEllsFltssLhhh.............hPpcspp..........................s.sh..shpt.sstLp.......lsss...................pt..thpspp..Lphhclh........spppssph+lslLsshlsllppsss.lapsp...suahElhpPhhslLpphsp..................tsh.splpphhpc.lsphhststht..R+PLsLppH+PluI+ohtPKF.E-sFsP.c+Kp.YDss+ERsEhsKLKtphK+E+KGAhRElRKDspFlARp+lcEppccDpEYccKh++llsplpspEGcp ...............................................................................................t..............t.NsF-hphs.....t...Khph..............hst......t............t.ss...hs+t.u.ppRppsh...cht....p.t+..s.hhhD+R..h........G..E..s.p..h........s.E-+hhtRathEp.pt.......p+.....tshasLp-p-...........................................................LTHhGpsLt..............................t...-p.tpt.............................................t...htt...................................ttt.t....p.+o+pElhpElIsKSK...Khc+ptt+cp..t.p.htpLDpt..htpl.t.h...tt..t.................................................................................................................................................tt...ptYD.hh+..phh..h..-..h.+u.tss-RhKotEE.ht......tcp..tp+LppLE.............tpRhpRM.u......ppptp..t.t...................................-s.tct.............................................................h..t.........................................................................................t.t.tp.pp...tps...........................pt.t....................................tp...pt.........................................................................................................................ttthshsh..hPp......sh.pp.....h.thht...th........tp..........hhlp+l..th...pspLt.tN+t+ht.h..h....Llpah....hhpt..........................hth.h....p.lh..lhphsp.................................................s...hstthp.hltph.tp..t......................................t.LhhhphhuhlassSDhh.H.VhTPshlhh.......sphLtp..............h.h......shtphs.uhhlstlhhp.h.t...upRhhPEhh.ah.thlhhh......................h.t..........................................t......h......................l..t.................................................h.h..h...............t.phthl..hhthlpth....hhtt.....uh...hh...s.h..hlpth.....................................hpthhpt....htt...h.p.t.........ht...L....h..........p.p...p+...............shslt.......h.P+h.....p..s...hp..t..........c.......s.sp......p..ct....-h.t+hh..tphKcEhKus..h+ElR+DstFhtp.phpp.htpptthppKhtplhs.lttpptt................................................................................................................................ 0 105 165 243 +629 PF04153 NOT2_3_5 NOT; NOT2 / NOT3 / NOT5 family Bateman A anon Pfam-B_2131 (release 7.3) Family NOT1, NOT2, NOT3, NOT4 and NOT5 form a nuclear complex that negatively regulates the basal and activated transcription of many genes. This family includes NOT2, NOT3 and NOT5. 20.50 20.50 21.10 21.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.04 0.71 -4.28 66 799 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 308 0 526 782 7 125.90 31 23.73 CHANGED lpsuhsss.pst...ss.pp..pap........Ppshh.......spps.........................phhp..phst-.....TLFalFYhh.s..shtQhhAApELppRsWRaHKchpsWhpR....t.....c.hs........sph...Ep...GsYhaFDhps...........Wpphc..c....sFphcYphL- ...................................................................sshts..tst...ss.....t..t...pap......Pps.h........h.spps.sh.............................................thhp..+h.sp-.....TLFalFYhh.u..............shhQhlAApEL.t...p........+sWRaH+chphWhpR..pt..p....pshs................................sph..Ep...GoY.haFDhpp............Wpphp..p.pFphcYphLp................................................................ 0 187 299 442 +630 PF04065 Not3 Not1 N-terminal domain, CCR4-Not complex component Wood V, Finn RD anon Pfam-B_8081 (release 7.3); Family \N 25.80 25.80 25.80 31.30 25.50 25.70 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.69 0.70 -5.09 24 389 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 268 0 251 374 2 217.20 45 32.76 CHANGED Mup.RKLQQElD+shKKVsEGlptF-sIY-Klps..ssNsoQ+.....EKLEuDLK+EIKKLQRhRDQIKoWhuss-IK.DKss..Lh-pR+hIEs......tME+FKulEKthKTKuaSpEGLp..sss......pl.DPc-pc+p-sspalssplDELpcQlEphEuEh-pl.sthKKt.ptst.spppchp-hcpthERacaHls+LEhlLRhLpNspl-s-pVp-Ic-DIcYYVEsNp-..sDFh.Es....-slYD-.Lsl-pp .............spRKLQ...tEID+shKKVsEGlptF-sIapKlps.......ssNssQK.........EKhEs...........DLK+EIKKLQ.....RhRDQIKoWh...uus-IK...D........K..p...............Ll-....pR+.hIEs...........pME+FKsVE+EsKTKAaSKEG..Lu....tup.................................+l..D....P...tp.....+cKpEsspaLsssl-pLptQ.l-phEuElEsLps......p.......t.......+..Kt.....pts......p.....s..........c.....ppRlpclcphl-RH+aHlppLEhlL.RhLcNs.p.l.ps..-pl.p.c.lK-slcYYl-.....s......s...p-.....sD..F..Es.............-tlYD-LsLp.......................................................................... 0 87 147 217 +631 PF03060 NMO NPD; Nitronate monooxygenase Griffiths-Jones SR, Bateman A anon Pfam-B_2634 (release 6.4) Domain Nitronate monooxygenase (NMO), formerly referred to as 2-nitropropane dioxygenase (NPD) (EC:1.13.11.32), is an FMN-dependent enzyme that uses molecular oxygen to oxidize (anionic) alkyl nitronates and, in the case of the enzyme from Neurospora crassa, (neutral) nitroalkanes to the corresponding carbonyl compounds and nitrite. Previously classified as 2-nitropropane dioxygenase [1,2,3], but it is now recognized that this was the result of the slow ionization of nitroalkanes to their nitronate (anionic) forms [4]. The enzymes from the fungus Neurospora crassa and the yeast Williopsis saturnus var. mrakii (formerly classified as Hansenula mrakii) contain non-covalently bound FMN as the cofactor. Active towards linear alkyl nitronates of lengths between 2 and 6 carbon atoms and, with lower activity, towards propyl-2-nitronate. The enzyme from N. crassa can also utilize neutral nitroalkanes, but with lower activity. One atom of oxygen is incorporated into the carbonyl group of the aldehyde product. The reaction appears to involve the formation of an enzyme-bound nitronate radical and an a-peroxynitroethane species, which then decomposes, either in the active site of the enzyme or after release, to acetaldehyde and nitrite. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.86 0.70 -5.29 12 5886 2012-10-03 05:58:16 2003-04-07 12:59:11 10 43 3071 11 1913 12559 6372 309.60 26 84.17 CHANGED psthschh.hp.slhtshhuh.hssscLAuAVSpAGGLGllu..uuhhos........DtLtpplptlcphTsc.PaGlNlhlsp.ttsc....................h...h.phthchuls...................................lluhuhGsP.tphlccl+puGshlhshsuosppActstpsG.......sDsllsQGhEAGGHpG......csu....thhhLlsplscsls.......lPVlAAGGItDs+slAAALsLGApGVthGTtaLsopEussssht+pthlpustccThhopshs.........G+stRsLpsshhcch-.....p..shshs.t.................................hsts....lptusspu...shctu.hhsGQstthlsclhsscpllpplsp- ..........................................................................................................................................................................................................................................................thh.thchP.l.l.p.u.....sM.uh.....l...u...ss.......cL.su.A..Vup.A.G....GL.Ghl.u.....u...u.t.h.ss...........................c..l.....c.....p.....p.l...p......t........h............+...........p......h.........s.......s.........p.....P............a.....u...l...N...l..h..h......t.......htt......................................................................................hp...h...hh.p.tsht..........................................................................................................................................................l..l...s...h.......u.......h.......G...............P................t.......p.......h..........h......p........t......h......+........p..........s......G........l.......h..........l......l.....s.......h.......l.....s....o.....s.....c..t....A...p....t....h..t....c....h.G..................................s.D.u....l....l.....s.......p..............G.....h.............E.......A...G..G.HhG.......................phs........................sh..s..L..l...s..p...l......s..s..s.hs....................................lP..Vl..A.AGG.I..s...s........G........c.u......l....s......A...A......h...........s...........L.......G.....A........s........uV....p....h............GT....t..F..l..so..p...E........u....s...s........p.t.......sa....K..p..t..l....l....p................u........p.........t.....p......D.........s......s..l.o..t.t.hs................G....h.s..s.......R.sl..p...s..p.....h..h.p..p.h.t...........t......t..h.t....h.........t.t........................................................................................hh.tt.........lpt.s..hhps........c..h..c....h.s........h.h.sG.pss...t..h.....l.....p...c..h...sstcllpplh..t................................................................................................................................................................................................................. 1 550 1189 1608 +632 PF05021 NPL4 NPL4 family Wood V, Bateman A anon Pfam-B_13681 (release 7.6) Family The HRD4 gene was identical to NPL4, a gene previously implicated in nuclear transport. Using a diverse set of substrates and direct ubiquitination assays, analysis revealed that HRD4/NPL4 is required for a poorly characterised step in ER-associated degradation after ubiquitination of target proteins but before their recognition by the 26S proteasome [1]. Npl4p physically associates with Cdc48p via Ufd1p to form a Cdc48p-Ufd1p-Npl4p complex. The Cdc48-Ufd1-Npl4 complex functions in the recognition of several polyubiquitin-tagged proteins and facilitates their presentation to the 26S proteasome for processive degradation or even more specific processing. 21.10 21.10 21.30 21.50 19.60 20.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.92 0.70 -5.26 36 376 2012-10-10 14:49:21 2003-04-07 12:59:11 10 22 298 0 260 392 4 274.70 36 51.89 CHANGED RhGaLYGpYccaspsPLGIKAVV-AIYEPPQ.psE.DGlsl.hs...pppppVDplApth..GLp+VGhIFTDLhsss.tpGoshhpRchcoaaLSuhEllhAA+hQhpaPp.s+aS...psGpFuSpFVTCllSG..s..suplshpuYQVSspAhuLV+ucllpsosp....Pshhhlpc........sspp..cYlP-VaYpchspYGtpVpc.A+P.FPl-aLlVslocuaPpsPp..........shF........psssFPlENRphhGp.........QshpsltcaLpsp...............sshhpplSsFHLLlaltph.thLs..pp-hthLscssp....pp..t.h.th.tpstsatsLlpIlp ...............................................RhGaLYGpYppap...p.h...PLGl+..AhVtAIYEPPQ......sp.-..u.lpl...hp........tptptVDplA..pth............GLp+...VGhIFTDLhs....ts.......t......t......GpVhhpR......p.h-oaa..LSu.EslhAuchQspaPp.s..+hu...........sGp.FuSpFVTsl..loG...s..t.s.s.plphpuYQlSs..pshtlV+sshltsstc.....Pplhhlpc................................sspt...pYlP...-........V....aYpc.h.s....c.a.G..t.p..lpp.A+P...hPV-YLlVs.....lstuh.PpsPh...........hF........ppssFPl.ENRphhGp..........sQshpsLtphLpptt...........................tp.hhctlSDFHL.LlaLh.p..t..h..hs..p...........cchshLhcssp........pcs...t.....h.ph.hts.tWtpl..l........................................... 0 96 146 219 +633 PF01909 NTP_transf_2 DUF76; Nucleotidyltransferase domain Bateman A anon [1] Family Members of this family belong to a large family of nucleotidyltransferases [1]. This family includes kanamycin nucleotidyltransferase (KNTase) which is a plasmid-coded enzyme responsible for some types of bacterial resistance to aminoglycosides. KNTase in-activates antibiotics by catalysing the addition of a nucleotidyl group onto the drug. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.40 0.72 -3.66 117 8895 2012-10-02 22:47:23 2003-04-07 12:59:11 18 121 3228 80 3285 7676 1203 100.00 15 25.51 CHANGED lppltctlpphh......stplhlaGShsc.....Gphp.t.....SDlDlllhhspt.....................................hhhthtthhpphhshthDlh..............hhtthp.........hhhtphhppthhh ..........................................h.......h..t.h..........hhtl.h.laGShuc............G.p.h.pst........SDlDlllhhspt...........................................................................................h..th..t.....h...t.....h........h.....h..shh...................................................hh........................................................................................................................................................................................... 0 1095 2088 2757 +634 PF01759 NTR UNC-6/NTR/C345C module Bateman A anon [1] Family Sequence similarity between netrin UNC-6 and C345C complement protein family members, and hence the existence of the UNC-6 module, was first reported in [1]. Subsequently, many additional members of the family were identified on the basis of sequence similarity between the C-terminal domains of netrins, complement proteins C3, C4, C5, secreted frizzled-related proteins, and type I pro-collagen C-proteinase enhancer proteins (PCOLCEs), which are homologous with the N-terminal domains of tissue inhibitors of metalloproteinases (TIMPs). The TIMPs are classified as a separate family in Pfam (Pfam:PF00965) [2]. This expanded domain family has been named as the NTR module [2]. 21.90 21.90 21.90 21.90 21.80 21.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.74 0.72 -4.50 103 1259 2012-10-01 21:39:20 2003-04-07 12:59:11 16 55 160 51 605 1076 0 105.20 21 14.37 CHANGED pc..hCps......-Ysh+spVhs.....hppps......shsthshpltpVhK....pu......th.ppsppthahtpt.....pC.phht...Gp.pYLlMGpt.........pptphphllsppohlchW.sptppphpchpp ...........................t.phCts......-a....s....l...ps+lhp..................h.ppps............shhph....s.s.p.l.p..plhK...............................pu..........ph.pcspp.p...hhh.pt........pCsplps..........sppYLlMG......................pptpsphlls.pohlthW.sphtpphpp.................................................................... 0 82 136 315 +635 PF04142 Nuc_sug_transp Nucleotide-sugar transporter Bateman A anon Pfam-B_2311 (release 7.3) Family This family of membrane proteins transport nucleotide sugars from the cytoplasm into golgi vesicles. Swiss:P78382 transports CMP-sialic acid, Swiss:P78381 transports UDP-galactose and Swiss:Q9Y2D2 transports UDP-GlcNAc. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.56 0.70 -5.27 10 1158 2012-10-02 19:55:49 2003-04-07 12:59:11 10 22 276 0 767 1530 427 213.10 26 58.73 CHANGED +hscpL+ctlhsps...tDoLKluVPShlYslQNNLhYVALSNL-AATYQVTYQLKILTTAlFoVlhLsR+LuphQWhSLlLLhsGVAlVQhssssu.p.ssspss...........................................spN...hlGhsAVLsAChsSGFAGVYFEKILKsos...sSlWlRNlQLuhhGlhhuLls..salpDtspIs-pG.........FFhGYshhVWhlVlLpAhGGLllAlVlKYADNILKuFAoSloIILSoluShh.LF.DFplohhFhLGAhlV ................................................................................................................hh..................p....hh.h....u.l.P.u..hl....Y.sl.p.N.sLh.a.l....u.......l.s...l..s..su.s.....a.Q.l.hhp.h.......K......Il.......sT.A..lhshh....hLp+p..Ls.h..........h.Q..W..h.uLh.l.L.h.h.G..lsl..l.p....hs.....s.st..s........................t.....t...........h.......................................................................................................................................tp.......hh....G...h......hhl...l....h...us....hhS..uh...A..u..............VY.............hEt..l...L.K..p..p.t.....................s.l....a...h..p....N.......h......l.....h..huhhhs.h..hh...............hh...h.......t....h....t........h.......p..tG..............................................h..h..G....a....s....h.....s.h..h.hl.h................p.u.h..s....Glh.huh....h..h..+ass.sl..h..Ksassshuh...lhss.hh.S.hh.hh...s...h...s........hhhs.................................................................................................................................................. 0 334 453 632 +637 PF04096 Nucleoporin2 Nucleoporin autopeptidase Wood V, Finn RD, Rawlings N anon Pfam-B_5132 (release 7.3); Family \N 21.90 21.90 23.80 24.90 21.80 21.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.80 0.71 -4.37 57 473 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 294 19 304 507 1 137.20 35 10.65 CHANGED sYahpPolppLpphohpcLpp...VpsFslGR.csaGpIpF...tsVDLssls.L-..............plVphps.+pltV.Ys-s...p..KP.lGpGLNlsApITL.sshPh...s+sspt.h.pspp.....hpcplcplpp.tpsscFlSY-s.sGsWsFcVpHF ...................sYas.PSh--Ltphstpc....hpp......V..s.sFslGR.cuYGplpF...tsVDLs.s.l.s..LD.................p.....IV.phpp..+.....plhV.YsD-..................sp..KPPl..GpGLNh.AplTLcssaPh..........s+ss+p.hpp.spc.h.....acp+lc+lpc.p.psscFl.s.Ycsp....o.....GsWsFcVpHF................................ 2 113 182 263 +638 PF01733 Nucleoside_tran Nucleoside transporter Bashton M, Bateman A anon Pfam-B_2135 (release 4.1) Family This is a family of nucleoside transporters. In mammalian cells nucleoside transporters transport nucleoside across the plasma membrane and are essential for nucleotide synthesis via the salvage pathways for cells that lack their own de novo synthesis pathways [2]. Also in this family is mouse and human nucleolar protein HNP36 Swiss:Q14542 a protein of unknown function; although it has been hypothesised to be a plasma membrane nucleoside transporter [2]. 22.90 22.90 23.10 23.10 22.20 22.60 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.89 0.70 -5.26 7 1065 2012-10-03 03:33:39 2003-04-07 12:59:11 13 12 303 1 710 1041 14 251.30 21 65.30 CHANGED hhtlslINu.hsAlhQsSlaGlAushPtpYosslhsGQuluGhhsolshl.lshAsssc....hptuAhhYF...hsuhlllllChlhh.hlphhcaY+hatphp.....................p.pt-hhpsccp...sp.s...stsshpp.............s..t.....h..hhsllppl.shshslshhaplslshFPuh.o..h.os....s....pp.aa..lssFLsFNlFDhlG+slsuhhhaPs...ssRhlshhshhRhlFlPLFhhCshtsp...+....hPshFcp-hhFhhhhhhFuhoNGYLsSLsMhhuP+pV..sccpEsAGtlhshFLslGLuhGulhSalhch .............................................................................................................................................h....hh.h.sh....ssuh...hpsu.h.huhsu...hs.....p.a........hpshh.Gp.uhuG...................hh.......s.u.hh..l.....h.s.h...h......hsp.................................pt.s....shhaF...hhuh.hh....h...hs...h....hh.h...h.h.h......h............h....ht....h.h...t.............................................................................................................................................................................................................h.h.lh.......pphh.....h...h....hsl.hhhahlT...h.lFPu.....h..............h.....t......s...........................s................................h....h...........h............h...h....hhhaNhhDhhG........+.....h..s.t.........h..........h....................p..p..h.l.h..h.h...sh.hR.hl.h..l.Phhhhs..h..t....................................hh.t..ps.....hhh.....h.h.h...hhhuhoNGahsshshhh...u......P............p.....h...s........tpt.........phu..G.hhsh....hl....h..hGlhhGuhhuhh...h................................................................... 1 294 433 600 +639 PF04880 NUDE_C NUDE protein, C-terminal conserved region Mifsud W anon Pfam-B_6501 (release 7.6) Family This family represents the C-terminal conserved region of the NUDE proteins. NUDE proteins are involved in nuclear migration [1]. 27.20 27.20 28.20 28.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.15 0.71 -3.89 3 274 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 162 6 163 251 0 178.10 37 42.72 CHANGED SLEDFEp+LNQAlERNALLEhEl....DEKESLplclQRL+DEsRDLKQEL.hVpER.ppsNRKSRPoP...........V.susSlPo...TPss...psShsSP....+SlPNGhVoSPL..TPss+lSL................pLAu..ssA+DsAsspStTSuSVN.shshsSshsh.ptSussSFssRu....h.ss.PphsQuHSRspS ...........SLEDFEp+LNQAIERNAhLEsEL....DE.KEsLhlplQRLKDEsR.............DL+QEL.tVp............c+.........p....c....h....s....p.p.....s...tsos......................................l.p.ssh..olPu.............T.Pss.........ps..sh.sos..............................puh..s...suhu..u....oPL....TP..s..s.+...l.Su........................................plAu...shsps..us...ts..h...sss.hp..........h....t..........s............s..............s............................................................................................................................................ 0 39 64 109 +640 PF00293 NUDIX mutT; NUDIX domain Bateman A, Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.73 0.71 -4.52 197 43912 2012-10-02 00:00:35 2003-04-07 12:59:11 23 282 5206 325 11753 31604 9218 130.90 17 65.32 CHANGED hthusssllhspps..................clLlhcctps.................................................shaphPuGtl-..GEs.tp..................................uAhREltEEsGlph.......t.hthhthhthttsstt.....................................thhhhahsphtsst.thp.t.t.........Ehtphpahshpclhphhhthp.......hhtthhtt .................................................................................................................................................h....shh.h.l.hptpt..................clL.l..h....c..ctp.t.................................................................................................s.h.a.p.h.P.u....G....t.l.....-.......u...E..o.......tp...........................................................us.hR.El..t.E..E..s..Glph.........................p.hph..h..s.....h.....h...t....h.....h...stt...................................................................................h.h..h...h....a..h....s....p....h..........t.....s...t..............h..t..t.t.......................Eh.t.p....h..t....a..h....s..hp.ph..p.h........................thh.......................................................................................................................................... 0 3648 7193 9734 +641 PF03826 OAR OAR domain Griffiths-Jones SR anon PROSITE Domain \N 20.10 20.10 20.10 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.38 0.72 -6.56 0.72 -4.31 45 968 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 131 0 530 872 1 20.70 51 6.33 CHANGED s-.+soSIAsLRhK...AKEHsss ......sp+soSIAuLRhK...AKcHus...... 0 82 133 288 +642 PF03137 OATP OATP_C; Organic Anion Transporter Polypeptide (OATP) family Mifsud W, Bateman A anon Pfam-B_626 (release 6.5) Family This family consists of several eukaryotic Organic-Anion-Transporting Polypeptides (OATPs). Several have been identified mostly in human and rat. Different OATPs vary in tissue distribution and substrate specificity. Since the numbering of different OATPs in particular species was based originally on the order of discovery, similarly numbered OATPs in humans and rats did not necessarily correspond in function, tissue distribution and substrate specificity (in spite of the name, some OATPs also transport organic cations and neutral molecules). Thus, Tamai et al. [1] initiated the current scheme of using digits for rat OATPs and letters for human ones. Prostaglandin transporter (PGT) proteins (e.g. Swiss:Q92959) are also considered to be OATP family members. In addition, the methotrexate transporter OATK (Swiss:P70502) is closely related to OATPs. This family also includes several predicted proteins from Caenorhabditis elegans and Drosophila melanogaster. This similarity was not previously noted. Note: Members of this family are described (in the Swiss-Prot database) as belonging to the SLC21 family of transporters. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null --hand HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.72 0.70 -6.46 16 1462 2012-10-03 03:33:39 2003-04-07 12:59:11 15 17 98 1 852 1674 81 439.80 25 82.67 CHANGED hKhFllshslshhsQ.shssuahsSslToIE+RFplsSspoGlIsusa-IushllllhVSYFGu+hHRPRhIGhGsllhulGullhuLPHFhhs...................Ypaspss.sssspstsspLC.........t....p.stsstsspcptpshhallhhhuQhlpGIGsoPlhslGlSYlDD.scppp.SPlYlulhhshshhGPAlGalLuShhhplYlDhspss.t....lplsssDPRWlGAWWLGFLlsuulslloulPhFhFP+pLPcs.......................th....thpt.pspcpppppsppscstt........................................tpl+cF.pslhplLpNslahhhllupshpssshsGhhoFlPKaLEpQauhouupAshLhGslslPssulGhhlGGhll++a+lsspuhsthshhssllshhhhlshhhlsCssssluGlsssh............ss.tt.s.hssCspsCsCspstasPVCussGhtahu...........................shsusssssstsstp.spshssssssssspss..........tGhCsss..CspphhhalhlhshsshhsshutssshhllLRsVp.--KohAlGlphhhhRlLGhIPuPIhFGhlIDssClhW.uppC.Gp+GuChhYDssshp .....................................................................................................................................hhhhhshh.h.......p....hh...s.hh.ssl..o.p..l.E+Ra...tl.S.t.sG.......hlsus...-l..u...shhhhhhlo.Ya..G.s..+..h..p.+P.phluh.G..........shlhuhushlhslPcFh....t............................................................................................................hp.ht.................................t.....t.......ht.....................................................................t.t...t......t............................h.h.hhhhhuphlhGhGtssl.slGhsYlD-...spp...pp.us.hY.....l.u..hh.s..hthhGPhhGah.l.uu.hh.hp...ha...h.......-.................t.l.s..p.c....s..pWlGA...W.Wh.....Ga..ll.suhhh...h.h...uhs.hhhh..P.+p...hstt...............................................................................................................t.............t....................................t...........................................................................................................................................h..p.th....s..hh..p...lhp.N....hahhhhh..sth....h....h...h.....h..uh.........hsF..........h.sK...al..E.........p.Q....a.........t...............s...........su............us.........hh.......h...G...h.....h.ls...sh..s.......h.GhhhGG.h...lh.p+...h....p.h..t..h..h....t...hh..t..h...shh.ht.h.h.......shhh.....h..hhhhtC........s.........h.u....G.ls.sh................................t.....s.Cs.ts.s..t..tshsh.ssh......................................................................................................................................................................................................................G..C.ts........t..t....h....a.hh.h...h..h.....h.hh.h.h.....s.ths........h....h..h....hl...R.........sl..p..+shulGh.thhhhR.h..h....u................hlPuPlhaGhhlDpsChhW...t..........p.......s....t...p.u....sChhYs...h...................................................................................................................... 0 328 378 620 +644 PF05005 Ocnus Janus/Ocnus family (Ocnus) Moxon SJ anon Pfam-B_4799 (release 7.6) Family This family is comprised of the Ocnus, Janus-A and Janus-B proteins. These proteins have been found to be testes specific in Drosophila melanogaster [1]. 20.20 20.20 20.20 23.60 19.90 20.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.43 0.72 -4.17 22 220 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 116 7 113 230 6 104.80 40 67.16 CHANGED Lst.lPpV-lD.cGh.FKYlLlpltspt.....scts+.lVRGhstspaHs...DIa-clptphcphGl..ssc......................CLGGGRIc+csppKp..I+VYGaSpuaG+A...cHphopclLp..spYscY.pI ..................................................................Lht.lPpVcI-.pGh.aKYlLlplpsts..........spts+.lVRGht...s.c....aHs...DIa-clp....tchc....p.h.Gh..ssc.......................CLGGGRIpHpspcKp..I+VYGhSpua...G+A...cHshop-lLp..spYs.-Ypl............................................. 1 44 56 89 +645 PF00215 OMPdecase Orotidine 5'-phosphate decarboxylase / HUMPS family Finn RD, Bateman A anon Prosite Domain This family includes Orotidine 5'-phosphate decarboxylase enzymes EC:4.1.1.23 that are involved in the final step of pyrimidine biosynthesis. The family also includes enzymes such as hexulose-6-phosphate synthase. This family appears to be distantly related to Pfam:PF00834. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.61 0.70 -4.95 87 7126 2012-10-03 05:58:16 2003-04-07 12:59:11 19 18 4753 429 1546 4780 3008 216.50 23 86.65 CHANGED spLpluLDhtst..............cchl...plscclss..lshlKsshslhpshG........hpllptl+pp.s..hhlhhDhKhsDIGsTstpthp.....hhphsAchlslpshsG..sslpuhhcsupphs.....................................................................................................tllhls.hoshsthshtp........hspthlcpttctp.t.............hh.Ghlsssp.s............................chhhlsPGlph............ttusstutphtts.thht.ttuchllVGRuIhtus.sPttsucph ..........................................................................................................................lhlAL..D..hpsh.......................................pps.h.............p.h.l.c..p....ls........s...ts....s....h.l.K..lGhthhhs.G.........................phl.c.tL.+.pts.............hhlhhDh.K............ht.DI.s.sTssptst.......................................................s......p........h.........u.......s........Dhlslpssu......G....hph....hcuuhcs..h.pphs.................................................................................................................................................................t.l.l.t.ls..hh..oshstp.phpp..hs..............tss..p...th.s.pp.h.tchttp.......................................t...Ghlsuspps......................................shhhls..P..Glp...................................psu.ss.s.s....pt..h...hhs..t.......s.t..............ss.......h.lllGRsIspAs...sPht.shct.h..................................................................................................................... 0 478 961 1295 +647 PF04084 ORC2 Origin recognition complex subunit 2 Wood V, Finn RD anon Pfam-B_7065 (release 7.3); Family All DNA replication initiation is driven by a single conserved eukaryotic initiator complex termed he origin recognition complex (ORC). The ORC is a six protein complex. The function of ORC is reviewed in [1]. 19.60 19.60 20.10 20.00 19.40 18.80 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.09 0.70 -5.60 31 329 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 276 0 239 313 5 306.80 30 59.16 CHANGED SspoLuplt......shlspccahphhpph.pth...pppphptL.chapph..............FtpWhh-LppGFsllhYGlGSKcpLLppFspphLss......................hshlVlNGa.Pslsh+sllpsIsphlh.t.............thhsppstcplphlhchhpst...................psclhlllHNlDGsh..LRppcsQshLupLushspIallAShDHlsuPLlWD........................ptct.ppaNFlaa-sTTatPYstEh..saps.sl..hh..............................................s.....+osp.....hutpuhpaVLpSLTtNu+sla+lLlphQLp.....................sptss.....pthGl-acsLappCpcpFlsSsEhshRohLsEFhDHKhlppp+sssGhE...............hLhlPhspspl .......................................................Stpshtph....hlspp.phhphh.pp...h..t.....p.pchptL.p..a..p.p...............FspWhhpLpp.GFslllYGhGSK+pLLp...cFtpphhpp..................................................shlVlNG.ah.P.s...lsl+s...lLssIsptlhst...............................................tthhpps.ptlphlhphhppps...............................................................shplhllIHNlDush........LRp....s.ps.QphLupL....u..s.......hsp......IpllAShDHlN.sPL.lWD............................pspt...spaNalaa-sTTatPY............s.tEh....sats..pl..hl..............................................t........pssp.........hshpuhtaVLpSLT.Nu+sla+lLhphQLp.................................................sttss.....phhGlpapslYppspEpFlsoS-hslRs.LpEFhDHpllpp++.s.s.sGs.E................hLhlPhstt..h.................................................. 1 83 134 198 +648 PF03392 OS-D Insect pheromone-binding family, A10/OS-D Mifsud W anon Pfam-B_3032 (release 6.6) Family \N 25.00 25.00 25.40 26.20 24.60 24.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.33 0.72 -4.01 91 701 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 96 9 258 733 0 92.40 40 73.06 CHANGED YTsKYDNlDlDEILps-RLlpsYhpCLl-cG..tC.TP-GcELKchLPDALcTsCuKCo-KQKpuscKVlpaLhcp+P-.WcpLpsKYDPcspYpc+Y ........................YosKaD..s.l.sl--ILpscRLlpsYhcCLLcc..G...C.TP.-............G+-LK.....ch.l.....P-ALc.s.pCsKCo-+Q+psucKllcaLhpp+P-.W.ppLtsKYDPpspYtp+a.................... 0 61 101 246 +649 PF04756 OST3_OST6 OST3 / OST6 family Wood V, Bateman A anon Wood V Family The proteins in this family are part of a complex of eight ER proteins that transfers core oligosaccharide from dolichol carrier to Asn-X-Ser/Thr motifs [1]. This family includes both OST3 and OST6, each of which contains four predicted transmembrane helices. Disruption of OST3 and OST6 leads to a defect in the assembly of the complex. Hence, the function of these genes seems to be essential for recruiting a fully active complex necessary for efficient N-glycosylation [2]. 21.30 21.30 22.50 22.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.01 0.71 -4.67 19 672 2012-10-03 14:45:55 2003-04-07 12:59:11 8 15 309 3 385 597 4 142.80 26 50.64 CHANGED hhh.Fh..hhclsph+lhhPs.lshP.hhlshhllllo............................aFlhsuGhhashIpssPhlspshcs.....psVsFh.tpsptQahhEuhhsuhlashsul....Ghlhlsps...stshs+p................chhhhu..Ghshllh.FhshhhF .................................................h.ah..hhp..h........clh+Ps.h.shs.hh..h...sh.hl..s..l...l.u.........h..h.h..t............h....hh........p..+.hWhhhslhh.hhhhsSGhhashI+....tPPhstpsscs.....t.lsahts.pspsQ..ashE..shhsuh...la.shhuh....uhl.lL.s...p.......ss.........t...s..hsct...................c.hhhhh.....Gh.shlhh...Fh.h...h.......................................................................... 0 117 190 292 +650 PF01010 Oxidored_q1_C oxidored_q1_C; NADH-Ubiquinone oxidoreductase (complex I) subunit C-terminus Bateman A anon Pfam-B_41 (release 3.0) Family This sub-family represents a carboxyl terminal extension of Pfam:PF00361. It includes subunit 5 from chloroplasts, and bacterial subunit L. This sub-family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.63 0.70 -4.86 113 14159 2009-09-12 05:40:05 2003-04-07 12:59:11 14 14 12076 0 124 13554 281 225.10 56 42.75 CHANGED RIYLLTFEGHLs......laFpNYSG...p..+sssh..Y....SISLWG..K.cs.....Kt..ls+N...hhL.t............psscpsSF...Fs...K..phY..pl.........s.pN.l+phh..p...s.Fhsh.s..pF..ss..Kp..sh......YPaESDNTMLFPLLlLlLFTLFlGhIGI.P....Fs..Qtth.....slDILSKWLsP..SIN..................LL..HpNsss..S......h..DWYEFlp...NAlFSVSIAhFGIFIA.hLYpPlYSShpNhsLINSF..lK..hss..K.R..hhh..DKIlNsIYsW..SYNRGYIDsFYsphhotGIRtLAcLTpFFD ........................................RIYLLTFEGHLN.VHF..QNYSG......p...Kssuh..Y....SIS..L..WG........K..ct..sKt........lN+s....hhLhs..................ps..s..p......p......sS...F...FS..........p.......ch.Y.....p..I..............s..pN...s....R.....s.hh.....p........s..F..h..s......l...s......pF.....s.s.....Kp....sa....................sYPa..ES..DNTMLFPlLlLlLFTLFl......G.sI......GI..P...........F...s...Q...t..s..h.......-l..D..IL.SKWL..T.P..SI.N..........................LL........Hp..s...S...N..s....S......h....DW..Y.....E.F.....l..p.s..A..l..F..S..V..S.....I..AhF..G.IFIA...hLYpPl.Y..S..S.h....QNL..s...........L...l..NSF.........lK......t.uPK...R.........hhh...D+.I..h...NhI.Y.sW..SYNRGYIDs.FYsp.hhh..Gl.RtLuc.hh.pFFD......................................................... 0 38 87 111 +651 PF01483 P_proprotein P; Proprotein convertase P-domain Bateman A anon [1] Family A unique feature of the eukaryotic subtilisin-like proprotein convertases is the presence of an additional highly conserved sequence of approximately 150 residues (P domain) located immediately downstream of the catalytic domain. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.02 0.72 -4.20 75 1577 2012-10-03 19:46:52 2003-04-07 12:59:11 15 112 646 15 726 1507 250 87.90 31 11.11 CHANGED lEaVplplslsHs.pRGDLplpLsSPsGspShLhs.pRsp.........D.tpsGahsWsFh......osptWGEsspGsWpLcl..pD.......................sstpppG......plpsWpLtl .......................lEcVplpl.s.lsHs..pRGDLplpLhSP.s..GT.p.oh..Lhs..pcst...................................D...s.ps..G..h....h.sa.sFh........................................osphaGE....s.s...p.G...s...WpLcl...p.D..................................ps.stptG......plppWpLhh.................................. 0 252 371 555 +652 PF04062 P21-Arc ARP2/3 complex ARPC3 (21 kDa) subunit Wood V, Finn RD anon Pfam-B_6413 (release 7.3); Family The seven component ARP2/3 actin-organising complex is involved in actin assembly and function. 25.00 25.00 40.00 40.00 18.30 16.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.01 0.71 -4.49 29 364 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 276 15 231 331 3 170.20 48 94.42 CHANGED MPAYHSsFhs-st........phl..G..NhulLPL+TpaRGPA.................ssspshDIlDEsLsLFRANsFF+NFEIKusADRlLIYhhLaIo-CLpKL.....ptshstp-ApKtLhsLAl-.sFsIPG-sGFP..LNuhYphP.ps+s-uElLRsYLpQlRQELuhRLlc+lYs..sp.p................spPSKWWLsFsKRKFMsKSL ....................................MPAYHSth.s.t............phl..G..NhulLPl+..Tp.h....+....GPA.h................................ss.pchDIlDEslhhF+ANlFF+NaEIK.u.ADRsLIYlhLaIo-CLpKL.....pt.ssopspupKthhsLuls.pFsI.PG-s..GFP..LNuhYthP..ts+p.-...s...............-hhRpYLp.....QlRQEhuh.RLhc..+.Va..s..sp..s..................spPSKWWhsFsKR+FMsKSL............................................ 0 73 121 183 +653 PF02331 P35 Apoptosis preventing protein Mian N, Bateman A anon Pfam-B_13247 (release 5.2) Domain This viral protein functions to block the host apoptotic response caused by infection by the virus. The apoptosis preventing protein (or early 35kD protein, P35) acts by blocking caspase protease activity. 20.10 20.10 21.20 27.20 19.80 17.80 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -12.01 0.70 -5.30 7 25 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 14 10 0 29 0 301.10 58 92.61 CHANGED MCVlhPs.hcssQTlIhDst...spphR-LlYlNplh....ss.lsKsVLMhFNISGPl+sVsR.hsspht-hhKSKlD......cpFsphp+sh.....SsphsGhc...+YFcs-cYoVsC.stsshKsKatKhLpscshs-ccsIEsacK.CL.P.......Lhsc..........psshYV..sVCsLKPuhtNs.uppsLSFpYpP.ssKVIVPhtHEIs-sG..hYcYDVh...AhVcuVp.....phcc.lQsLhh.pta.cs.c.lhascsshNcphhhhs.-FpTcshasKs....hpI.CNu.I.DccpchLhVKL+NVTspLscsl................ILsplc ...........MCVIFPVEIDVSQTVIRDCp..VDc.QTRELVYINKIM....NTQLTKPVLMMFNISGPIRSV.TR.KNN-LRDRIKSKVD......EQFDQLER-Y.....SDchDGFHDsIpYFKDEHY..SVSCQ......NGSVLKSKFAKILKSHDYTDKK.SIEsYEKYCL.Pp......LVDc..........+sDsYV..AVCVLKPGFENG.SNQVLSFEYNPIGNKVIVPFAHEIN.DTG..LYEYDVl...AYVDsVpFD.GpQFEEFVQpLILPSoFpcSEKVLYYNEASKNKNMIYKALEFTTESsWsK.SpKaNWKIFCNGFIYDKKSKsLYVKLHNVTSsLNKNV................ILshIK....................................... 0 0 0 0 +654 PF02225 PA PA domain Bateman A, Mahon P anon Pfam-B_259 (release 5.2) Family The PA (Protease associated) domain is found as an insert domain in diverse proteases. The PA domain is also found in a plant vacuolar sorting receptor Swiss:O22925 and members of the RZF family Swiss:O43567. It has been suggested that this domain forms a lid-like structure that covers the active site in active proteases, and is involved in protein recognition in vacuolar sorting receptors [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.23 0.72 -4.32 69 5065 2009-01-15 18:05:59 2003-04-07 12:59:11 17 204 1352 75 2693 4930 531 99.80 17 13.82 CHANGED sstsstptsll................................tsstssssshtsssspGpl..lls.....pputs...............sh....hp+s....ttAp..ps.GAtulllhs.....................shststhtshslPsshl.spssGptlhphh ..................................................................h........................................................t.........t..t....tp..h..t.....s.h...s..l....p.G..K...I......sll...............pcG...s.s...................................sa..............spKs...............ppAp....ps...GA..hull.lhs.......................................t....st.............s...p.....s.h...t...l...P.shhl...s.ttuttlht.................................................................. 0 704 1487 2130 +656 PF00291 PALP S_T_dehydratase; Pyridoxal-phosphate dependent enzyme Bateman A, Finn RD anon Bateman A Family Members of this family are all pyridoxal-phosphate dependent enzymes. This family includes: serine dehydratase EC:4.2.1.13 P20132, threonine dehydratase EC:4.2.1.16 Swiss:P04968, tryptophan synthase beta chain EC:4.2.1.20 Swiss:P00932, threonine synthase EC:4.2.99.2 Swiss:P04990, cysteine synthase EC:4.2.99.8 P11096, cystathionine beta-synthase EC:4.2.1.22 Swiss:P35520, 1-aminocyclopropane-1-carboxylate deaminase EC:4.1.99.4 Swiss:P76316. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.14 0.70 -5.19 148 31175 2009-09-12 08:45:30 2003-04-07 12:59:11 20 102 5380 356 8492 23404 13882 295.50 21 77.75 CHANGED lp.thstTPLhph...................pla.hKhEshp..s.sG..ShKsRsu...hhhltp..........s.p............................ptl...lsuouGN......................................puhulAhsus..th.G..lp.s....hlhl.....Pps.sst..............t+hthhcthGA...pllhhstt.t.t.stst....phsp...t......h..h.hpt.................................ssstshtGhtohuh..El...............h....pph.........................p.h............llsss.GsG.GhhsGlupshpp..........ts.c..l.luVps..pss.....sshhpthtttt...................................................hht.ulshsh........................ssth.hthhcph...............h.....sVs-p-shp.uhptlsppp............G.lhs..tsuuussluus.ht......h.........t.tp..ll.hl.lsu ......................................................h....ht.TPLhh..........................h.tt.h.h.ss....p.la..h...........KhEs.......hp....s...sG.........S.......h.K.sRsu.........hthltt...................A.tpt......................................................................................st....psl..........lp.so...u......GN.............................................pG.h.u.l...A..h.s.us......th..G.........hc.s............hlhM.................Pps...hst.......................+htt...h.c.t..h.G.A.........c.l....l..h...s..s.........s...s......s.........t..h..h..s..t.st........ph.s....p....ph.......h.t...hp...................................................................................ts.s..s.h.t....tp...t..s...hu........El...............................................h...........ppht..............................tt.D.h..................................llssl...G....s.G..G..shs..G....l...u.t.hhpp.....................................sl..cl..lul-s........pss..........................s.sh.....h...t.s..h.t..s.s.p.........................................................................p......tht....Glshsh.......................................................................................sssh..s.hp.h..h..c.c.h..t............................hh....sl.s-p-....shp..shct....l....t....p....pc.....................G....l.ls.....s.s..u..u.us...u...l...sus...h.....p.h.t.t.p............................tspp...ll.hl.hs............................................................................................................... 0 2689 5306 7156 +657 PF00024 PAN_1 apple; Apple;PAN; PAN domain Bateman A anon Patthy L Domain The PAN domain [1] contains a conserved core of three disulphide bridges. In some members of the family there is an additional fourth disulphide bridge the links the N and C termini of the domain. The domain is found in diverse proteins, in some they mediate protein-protein interactions, in others they mediate protein-carbohydrate interactions. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.64 0.72 -4.22 89 2217 2012-10-02 11:41:37 2003-04-07 12:59:11 21 217 255 64 1525 2546 76 80.20 16 20.08 CHANGED C..........atthtstthts......tshpth.ts....sshppCtptCt.....pptp.........Cpuasapts.....................pppChLpspspts.......................spltt.....stshshapp.pC ............................................th.h.s.......shpth...ts..........ohppCtptCt.....pptp.............C.puas..apsp.......................pppChLpspspts........................................th...............h..h........................................................... 0 624 791 1265 +658 PF01569 PAP2 PAP2 superfamily Bashton M, Bateman A anon Pfam-B_486 (release 4.0) Family This family includes the enzyme type 2 phosphatidic acid phosphatase (PAP2), Glucose-6-phosphatase EC:3.1.3.9, Phosphatidylglycerophosphatase B EC:3.1.3.27 and bacterial acid phosphatase EC:3.1.3.2. The family also includes a variety of haloperoxidases [1,2] that function by oxidising halides in the presence of hydrogen peroxide to form the corresponding hypohalous acids. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.46 142 14398 2012-10-02 00:53:37 2003-04-07 12:59:11 16 88 4426 44 4448 11648 1667 132.00 19 47.04 CHANGED thhhhhhshhh.............shhl....s.h........h....Khhhs..p..sR...Phhhhtth..h.....................................................uF...PSGHs...shuhshhhhlhhhhtphhhh...................................................hhhhhhshhluhuRlhhuhHahsDllsG....hhlGhhhsh.....hhhhhhtthth ...........................................................................................................................................................................................................................hhh....hhhshhh........shhl...........s.h..............h..........Kth.ht....c.......sR......Ph....h.h.h..h..h..h....hh...........................................................................................................................................SF.......PS..GHs...........s.h.u.h..s....h....s..h....h...h....h....h..h...h..t.th.h.th.......................................................................................................................................hhh..h..h.u....h.h.luh..S.R.l.....hh.G.h.Ha.sDl..lsG.....hhlGhh.hshhhhh.......hh...................................................................................................................... 0 1276 2479 3559 +659 PF03828 PAP_assoc Cid1 family poly A polymerase Griffiths-Jones SR, Wood V, Mistry J anon PROSITE Family This domain is found in poly(A) polymerases and has been shown to have polynucleotide adenylyltransferase activity [1][2][3][4]. Proteins in this family have been located to both the nucleus and the cytoplasm. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.09 0.72 -4.02 112 1402 2009-01-15 18:05:59 2003-04-07 12:59:11 14 49 312 22 919 1373 27 60.60 28 8.69 CHANGED sLGpLLhpFFcaYu.p............pFsapphsIul....ps.uth....hsK.pphthh.......................t.thlsIpDPhsssp ...........sLGpLL.hpFFcaYu..................cFsapcpsISl....+p..uth........ls+.pp.t.t.ht..............................................................phlsIE.....DPhp.s................................................................. 0 290 439 704 +660 PF04928 PAP_central Poly(A) polymerase central domain Wood V, Bateman A anon Pfam-B_1341 (release 7.6) Domain The central domain of Poly(A) polymerase shares structural similarity with the allosteric activity domain of ribonucleotide reductase R1, which comprises a four-helix bundle and a three-stranded mixed beta- sheet. Even though the two enzymes bind ATP, the ATP-recognition motifs are different. 20.50 20.50 20.60 20.90 20.10 20.40 hmmbuild -o /dev/null --hand HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.59 0.70 -5.57 36 745 2009-01-15 18:05:59 2003-04-07 12:59:11 12 32 320 11 465 707 14 304.90 37 49.06 CHANGED pppa.....GlT.PlSts.Psst-hphoppLhctLpptssaEop-.-sp+RppVLppLppllp-aV.ppluhp+shspphspssuuplasaGSY+LGVhusuuDIDslslsPpplp+........pcFFssFhchLpppsplscltsVt-AaVPlIKhpasGIplDLlFApLslsplPc..sl.sltDcslL....+slD-pslRSLNGsRVsDpILcLVPsh....csF+hsLR.slKLWAp+RulYuNlhGF.GGVuWAlLVARlCQLYPNAssus...llp+FFplaspWpWPp....P.........VlLpphpcs.......hthp..VWsP+h...spD+hHlMPIITPAYPshsuT+NVopSThpllhpEhpRuhcIspclh.........hsp..tsWpcLaEth .........................................................................................................................................................hGho.slShs.Pp..-h.hsppL.p.Lp..sha.Espp.....Ehp+..............R..........lLt.pLp.p...llpcal.pplutp+shstthhttssutlhshGohplulhs.suDlsslslssphh.......................................................................................................................................................................................ppaFt.hhphht.t.plpclpsV.cAaVPlhchpasGlplDllaAplsl.plP..p...sl..clp...sc..s..lL.....csL...D..ps.lR.SL..N.GsRV..sDpI.Lc..LV.Psh.............psFRhsLR.slKhWA+.+.............Rul..YSNhhGFhGGVsWAhLVA.RsC...Q.............L.Y....P.N....A.....s.sus....................lVp+FFhla.sp..WpW....Pp..................P........................................VhLpt..-css..........Lth...VWsP+h.....................pD+.......hHhMPIITPAY.Pp.NuoaNVo...hSThplh............hcEhpp..G..htlspcl...............sp....tpWspLFp.............................................................................. 0 142 239 368 +661 PF05028 PARG_cat PARG; Poly (ADP-ribose) glycohydrolase (PARG) Moxon SJ anon Pfam-B_5996 (release 7.6) Family Poly(ADP-ribose) glycohydrolase (PARG), is a ubiquitously expressed exo- and endoglycohydrolase which mediates oxidative and excitotoxic neuronal death [1]. 20.80 20.80 21.40 21.10 20.40 20.60 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.14 0.70 -5.20 23 357 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 168 8 240 364 7 279.40 30 52.28 CHANGED asc.h...p..ct..Fhp.h............LP..ths+lsLplsslh.t..............sl.lLtpppspplhlopcplusLLAsuFFshhspp.................ttt.sphPsh.NFspLap....s.ptpsphpKlKClhpYFcpls.......pphssGhVoFpRhth................t.pphs.WppsstsLp....plclhscutIEDp..tshLpVDFANKalGGGVLspGsVQEEIRFhIsPELlluhLFscshccsEAlhIhGApRaSsYTGYusoFpa......................pGpa.Dpp.shDphpR........+pTpIVAIDAlpa.......pshhp..QacpstlhRElsKAasGFh...ptptpspphs...................................................................sluTGNWGCGuFuGDscLKslIQhlAsS...................tspRshl.YhTFGDppLpshhp .......................................................................................................................................................................................................h.............ht...h............hs.hhphh...hph..h.h.................................................h.hl.....t...s..t.l...hop...husLlupsFhshh..t.............................ph...h.sF.sph..ht......t......tt.........pKlpslhpYFppht............tph.psh..lshp..Rp.h......................................................p.....ht..ppt.h.ht.....hph.............ps.....h......IE..pp.........th.lpVDFANch...lGG.Gsh.s.t.G.hl.Q............EEIhFhhsPELl.luh.Lh..hp.......hppsE.slhlhG..spp...a...SpapGYu.p.oapa..............................tt.p..h..ctt.....h....c..t....htc...........htspllAlDAhph...........t.t...p....Qat.ptl.RElpK..A.....h.s.G.Fh.............t..t..hs...........................................................................................................................................slu..TGpWGCGs..F.s.Gc.pLKhllQhhAuu....................s..t.+s.hh.Yhsatp..th.....hh................................................................................................... 0 114 147 209 +662 PF01734 Patatin Patatin-like phospholipase Bashton M, Bateman A, Dlakic M anon Pfam-B_2206 (release 4.1) Family This family consists of various patatin glycoproteins from plants. The patatin protein accounts for up to 40% of the total soluble protein in potato tubers [2]. Patatin is a storage protein but it also has the enzymatic activity of lipid acyl hydrolase, catalysing the cleavage of fatty acids from membrane lipids [2]. Members of this family have been found also in vertebrates. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.36 0.71 -4.40 116 10726 2012-10-02 11:19:24 2003-04-07 12:59:11 17 127 3394 5 3812 8861 1395 181.80 20 39.60 CHANGED LslsGGGs+.G....hhphGslpt....L..................................tththphchlsGoSsGulsu...................................................hhhshs...hs.pphhpthtphhtpthhshh..................................................................................hhhthhtttshhssphhtphlp......ph.......................lsptthpphttththh.............................................................................h.ht.tttthhhhttptts......stplhp..............................slh...ASs..uhPshh....................................tshph........ssp.......ha..........hDGG......lh.ss....hPh.phsh .............................................................................................................LsLsG.G.Gs+...G.....hhp.hGllcs................L.........................................................................................tp...tsh.h...c..h..l..sGoSs..Gul.su...........................................................................................................................................................u.hh..s.ss.............s..s..t....t.....h...h...th.h.t.....p.h...ht.p.hh.ht.h....................................................................................................................................................h.........h..h......t....t....s....h....h....p.....s...p...h....h.t..ph.lp..........................ph.....................................................h.s..t..t..t.h.p.p.h.h.t.....hhhh..................................................................................................................................................................................................t....t..s.....h..h....t..h.t...h..h.h.s...............................p.t.p.l.h.p...........................................................................................................s.lt.............ASs...............ulPsha............................................................sh..ph........................................sup..................................................hh.............................................................lDGG.........lh..ss...hPht...h....................................................................................................................................................................................................................................... 0 1192 2259 3131 +663 PF02460 Patched Patched family Bateman A anon Pfam-B_2400 (release 5.4) Family The transmembrane protein Patched Swiss:P18502 is a receptor for the morphogene Sonic Hedgehog. This protein associates with the smoothened protein to transduce hedgehog signals. 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 800 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.61 0.70 -13.13 0.70 -6.63 9 2194 2012-10-02 18:57:54 2003-04-07 12:59:11 13 37 648 0 1317 3841 1223 372.80 15 51.82 CHANGED hhs.chhahhs.sDhp.shtphtshspp.shs.pcahsucuhststa..hhlsupspss.......lLp.shLs-lhplschlhpsh.......................tl.pss....h.hsacclC.pa.phspsspphh..................lhpp.ps...pthslTYPhhphhsptlYlusphuGVphhs.......................ssplp.s+shhLhahschsscpscphuppaEppLtpalcpp.ssp..hlphshhpsphls-Elp+suhshhPhhslohhlLhsFohlssh..h.p...........lppKPhlAhhGllsshhAhlouhGhLhhhGh.assIssVhPFLlL.uIGVDDhFlhlsAWc+Tstpps...........hccRhucslsEuGsuIoITShTslloFulGshTshPulplFChhsulAlhFsalYQlTFauAlhulsschEhptppsh..h.......hs....p................pphpspsuh.....................................................t.upphpp..hhs.....chhhshYssFlhssps+lhslhlallYlsluhYGshshcpsLsPspLlhs-S.Llchhp.h-chlaptGttlplhVpNPPslsh.spshcchpphhscFEshsashGtpuTpaWLp-Ypp.h.p......php.hpspc..................h..hsthcpalthsttsh.Wtpshhhs......cssstlppFhFplu..hcshsshss+s+hhpphRslAcpas...FNVolFcp.thasDQh.plhssslpshlhsllsMhlVshlFIsp..sshslshulsSIslGVhGhhShWGlsLDPloMlsllMSIGFSVDaoAHIuYtahpsttp....sspcRlhsALpslGWPlhpuuhSTlLslhsLhhVsoYhlhlFhKTlhLVlslGhlHGLhhLPllLshhss ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..................................h.......t....h.....................a................................a..h..-..t..h.............h...............h..........h....h.s.h....h....h...h.h.h...l..s....h..h.h..h....................p...h.....s.......sh.h.........h..h..h.s..l...h....h...........h.....t...h...............h...G...h.......h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......................................................................................................................................................................................... 0 483 664 1096 +664 PF02170 PAZ ZAP; PAZ domain Bateman A, Song JJ anon Bateman A Family This domain is named PAZ after the proteins Piwi Argonaut and Zwille. This domain is found in two families of proteins that are involved in post-transcriptional gene silencing. These are the Piwi family and the Dicer family, that includes the Carpel factory protein. The function of the domains is unknown but has been suggested to mediate complex formation between proteins of the Piwi and Dicer families by hetero-dimerisation. The three-dimensional structure of this domain has been solved [2-4]. The PAZ domain is composed of two subdomains. One subdomain is similar to the OB fold, albeit with a different topology. The OB-fold is well known as a single-stranded nucleic acid binding fold. The second subdomain is composed of a beta-hairpin followed by an alpha-helix. The PAZ domains shows low-affinity nucleic acid binding and appears to interact with the 3' ends of single-stranded regions of RNA in the cleft between the two subdomains. PAZ can bind the characteristic two-base 3' overhangs of siRNAs, indicating that although PAZ may not be a primary nucleic acid binding site in Dicer or RISC, it may contribute to the specific and productive incorporation of siRNAs and miRNAs into the RNAi pathway. 23.30 23.30 23.40 23.40 23.20 23.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.61 0.71 -4.86 55 1915 2009-09-11 14:14:06 2003-04-07 12:59:11 17 66 319 37 1147 1880 5 132.50 21 13.63 CHANGED tsll-..hhtphhpppptpt.p.ppp........hpcsltGlhVhspaps........+pa+lsslshcssspppFphp...........spphohs-Ya+ppYslplphs.p.Phlhsppppp................................................saLP.ELCplsshpchhppphsh..pshhh+tpsps .....................................................................................................................................hpctlp...G..hh.V.s.p...aps................+pa..p.lssl....sh..p.................s........s..p.p.....pF.hp...............................spphohh..cYapp+..Y..s..l...pl...p...hs..p.PhL..p.sspppc...........................................................................................saLP..E...l..C..p..l..s....s.......hp.chhpchtph...hshhh+hp................................................. 0 337 545 909 +665 PF00564 PB1 OPR; PB1 domain SMART anon Alignment kindly provided by SMART Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.68 0.72 -4.32 53 2276 2012-10-03 10:59:06 2003-04-07 12:59:11 19 126 339 38 1408 2199 10 83.30 18 13.97 CHANGED shplKhpatssht+........hphspsh.sap-Lhptltpththt........tshplpY.Dc-t.-hlslssDcDLppslcphct............t.t.pl+lhlhss ........................................................thKhpats.phht.........hths...t...s....h..sap....cLhpplpphhsh......................tsh...plpY...........h...D...-..-...s...-........h.ls.lss-pD...Lppul.phhpt.............................lplhl...t.......................................... 0 348 710 1054 +666 PF00786 PBD P21-Rho-binding domain SMART anon Alignment kindly provided by SMART Domain Small domains that bind Cdc42p- and/or Rho-like small GTPases. Also known as the Cdc42/Rac interactive binding (CRIB). 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.09 0.72 -3.55 44 1824 2009-01-15 18:05:59 2003-04-07 12:59:11 23 43 302 22 1078 1667 2 56.30 28 11.27 CHANGED tISsP.ssacHhsHVGaDspsG.hh.....GhPppWppllpss..........hsppc.tppspsshpshtahs ............tISsP..os.F....c.Hs.sHV..Ga.D.st.s.Gths........................GhPtpatp.llpps..................................hpp.p...p..t..h.......t...................................................................................................................... 0 294 490 775 +667 PF01161 PBP Phosphatidylethanolamine-binding protein Finn RD, Bateman A anon Prosite & Pfam-B_5394 (Release 7.5) Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.37 0.71 -4.35 128 4705 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 2779 40 1522 3605 238 145.50 26 74.89 CHANGED sshlshpa..............hp.Gssl..sPsh...hpsh..Pp..........................ssp..s.asllhh..............................DPDA.....................htsah.HWl......lsNIP....................................s..pshssh...................up.............................hsYhGPsPP.Gss.hHRYhFhlauhss................sh.ptssst...sp..htpsh.................ppa...Lsps.lsus..ahp .................................................................................................t...h..pa..........t.htsG.ssl.......oPph...hssh..Pt.................................ssc....t.as.lshh..............................DPDA..Ps...........................tpsahHW..l..........Vss.IP....t..t...lsts........................................uhh..ps..t.s.sh............Gp..............................ssYtGssPP...........G.............pt..hHR..YhFh..lauhss..............ph..sl...stssst....s....hthth...........................ptpt..Lups.lsuha............................................................................. 1 420 841 1251 +668 PF01399 PCI PCI domain Bateman A anon [1] Family This domain has also been called the PINT motif (Proteasome, Int-6, Nip-1 and TRIP-15) [1]. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.03 0.72 -3.72 77 4727 2012-10-04 14:01:11 2003-04-07 12:59:11 22 54 409 7 3184 4579 40 109.70 16 22.45 CHANGED .shtpllpshppss.hppatphlpph.................................tthhtp.hhtt...........hhppLhpplhcpslhplhp.......applshppluptlplss.....................splEphlsphIhsstlp.upIDph....sthlhhpc ....................................................................................................................................................hhplhps.ht.psp.hpt.h.tp..h.l.p..ph........................................................tt.h.h.t..p...h..hhp...........................hh.p.p.Lh.p...plhpps.lt.p.hhp............................sappl.s.lsplupt...l.p.lss....................................pclEph.ls.phI.tsst....l.p..u..plD.ph.sthlhht.t................................. 1 1114 1760 2604 +669 PF03462 PCRF PCRF domain Dlakic M anon Dlakic M Domain This domain is found in peptide chain release factors. 20.80 20.80 21.00 20.80 19.30 20.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.45 0.71 -4.39 100 9136 2012-10-03 10:28:09 2003-04-07 12:59:11 13 15 4731 15 2219 6114 4472 113.30 38 32.37 CHANGED c...htppsDt-...ht..p.hppElpplp.pplpplcpc......hsh..sth..Dp.pssllEI+uG..sGGsEAs....aAp.LhRMYh+aA-.p.+........uacscllc..hstu-h.u.......GlKpsslplp.........Gc..........tAYuhLKhE ..............................................th....ttsDt-.ht..c...s..pp..El.pp.l........c.p.c.lpp.lE..c........L..l.p...........cst..Ds.....pssh.lEI+u........G...........uG..GsEAs....aAu.LhRMYpRaA....E....p.+..................Ga+sEll-.......hs.tu-h..u....................GhKplshplp.............Gc...........sAYGhLKhE.................................. 0 757 1443 1875 +670 PF02153 PDH Prephenate dehydrogenase Bateman A anon PSI-BLAST P20692/1-290 Family Members of this family are prephenate dehydrogenases EC:1.3.1.12 involved in tyrosine biosynthesis. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.53 0.70 -5.50 10 4258 2012-10-10 17:06:42 2003-04-07 12:59:11 12 22 4077 29 1083 3244 2767 243.70 25 69.28 CHANGED lAhuL++pG.tsplhGhDhsstpsttAhcLGlhDssss..lptsp-A..DlllLAVPlcsstclLcclus.tlccusllTDluSVKscllcshcphls.phtpaluGHPMAGoctsGstuucssLF-spshlLTPspc.Tssptlppl+cllcthGA+lllhsPccHDpssulVSHLPHllAhuLsstht..chpsshc.sh+hAuuGFRDhT....RIAuusPhhWpcIhhpNscslh-tl-catpclsclpphlc....spDt-...sLhchh+p ..............................................................................pl..h...shs...p........t.t..h....t......u......t.......s...h..h..p...t.......h.......t...t......h............p.....h....l.....t....p.A..............-.llllus.P..l.p.t....s...p.h....l...p...c...l.......ss...........l......t.......t.....s.s.........ll.....sD.l.u.SsKs.s.l.l.p.t..h....t....p...h...h......s............................s............phl..u.u..HP....M.s.Gsp........s..G.....s..u.ps.sL.a.p.st..h.hl.ls............ss.........c............t.......s.........s.................p....s.......h....p....h....l...t.p...h.h...p...s.h.G.A.+.lh.p.h.s.s.p.-HDpshAhl...S...HLPH..ll....u...hulstt.ht..........pp.s....t....p..h..p.......h.....hp....h.....u....u..s....u....F....R..-....ho.....R....lA....u....s....s.P...p.h...ap...-Ihhs..Npp.llphlcpa..tppl.sphhp.hlp..........psD.t.p...thhphht.p.................................................................................................................................................... 0 337 710 930 +671 PF04166 PdxA Pyridoxal phosphate biosynthetic protein PdxA TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family In Escherichia coli the coenzyme pyridoxal 5'-phosphate is synthesised de novo by a pathway that is thought to involve the condensation of 4-(phosphohydroxy)-L-threonine and 1-deoxy-D-xylulose, catalysed by the enzymes PdxA and PdxJ, to form either pyridoxine (vitamin B6) or pyridoxine 5'-phosphate [1]. 19.70 19.70 19.70 19.70 19.00 18.40 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.99 0.70 -5.21 16 2867 2012-10-02 21:08:39 2003-04-07 12:59:11 7 6 2310 19 692 2198 2820 293.60 39 88.25 CHANGED h..tpc..hpp.s.phVllu-tshL..ppttthhshp.l.........slcchp.....s..p..shh.hlt.hsL.th......s.sh........h.GcssstsGthslchLscAsphshsGphsullTuPlsKsslppA....Gh.asG+TEaLA-......hss......sc.....pslMML.ssp......cLRlsLlTsHlPL+-VssslTtctltphlcllpcsLppcaGItpP+IuVsGLNPHAGEsGhhGpEEh-pIh.....................Pul-phR..tpGlslh.....G.....PlPADTlFpts........tttth...DAVLuMYHDQGLhslKhhuF.spuVNlTLGLPalRTSsDHGTAaDlAGpG.hAcssShhsAlchAs ......................................................................ttt.......h.phllhu-hplL....p.p.t.s....th.l....s..ls....l..................................p.lps.hp......st.....s..p..........t.........s.s.hh....sl....h...s.h...tt..............s..lp........sGplsstsGphslcsl.pcAschsh.....sG.....ch.....sAllTuPl..pKt.s..l.pp.A...........G....h...s......FsGHTEal...uc.....hst...........................s.p........c.sl.M.ML..u..s.c.................................pLRVuLsTTH.lPL+clscslT.ptlppslpl....hppsL......+p..........cF....G.l..spP....R.IhVsGLNPHAGEsGhhG.p.EEh...-.h.Ih..........................................................Pulc..p..h..+......tp..G..hp.lh......G..........PlPADTlFp.t.........................hh.s.ph.........DuVlAMYHDQGL.sLKhhuF.....s...........c.........uVNlTLGL..PhIRTSVDHGTAhD.......lA....G.....pG..pAcssShhsAlphA............................ 0 212 441 582 +672 PF00934 PE PE family Bateman A anon Pfam-B_253 (release 3.0) Family This family named after a PE motif near to the amino terminus of the domain. The PE family of proteins all contain an amino-terminal region of about 110 amino acids. The carboxyl terminus of this family are variable and fall into several classes. The largest class of PE proteins is the highly repetitive PGRS class which have a high glycine content. The function of these proteins is uncertain but it has been suggested that they may be related to antigenic variation of Mycobacterium tuberculosis [1]. 20.60 20.60 20.60 21.10 20.50 20.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.30 0.72 -3.90 135 4012 2012-10-01 21:44:22 2003-04-07 12:59:11 15 13 103 2 365 2559 0 90.80 46 26.20 CHANGED MSaV.hssPEhluAAAsDLuuIGSsluAANAAAAuP.TTuVlAAuADEVSAAlAALFuuHuptYQslSAQAAA.FHppFVpsLsuuAuuYAuAE.AAN ..................MSaV.hssPEhluAAAs-LusIG.Ss.luAAN.A.AAAus....TT.ull.A.A.uADE.VSuAlA.A.LFuuHuptYQulSAQAAA.FHppFVpsLssuuuuYAsAEAu...................... 1 142 177 350 +673 PF01095 Pectinesterase Pectinesterase Finn RD, Bateman A anon Prosite Family \N 21.20 21.20 21.30 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.76 0.70 -5.85 35 2674 2012-10-02 14:50:22 2003-04-07 12:59:11 14 76 936 20 1177 2391 56 228.80 26 55.97 CHANGED slVVApDGSGpa+TIsEAlsssP.c+uspR...aVIYlKtGlY.cE.NVcVsKcKtNlhhlGDGhsKTlITG.phshhcGs.TTFcoAThAlsGcGFlA+DIsFpNTAGPpKHQAVALRVsuDhulFY+Csh-GYQDTLYsHSpRQFYR-CsIsGTVDFIFGNuAsVFQsCpIhs....R+PhssQ.pNhlTAQGRpDPNQsTGlsIQsCpIsuss-Lh....ss.sshtTYLGRPWKpYSRTVlMpShIsshIsPtGWhtWsG..sFALcTLaYuEYpNsGPGussupRVKWsGa+slhosp-AppFTlupFI.tGs ...........................................................................t..t...a.plttAl.t..h....................................................................hl....l..G...Y...................E.......l.......l..................l...h.......G...................s.................................s..........................h.....................................t...........................................................................t.....................s.....s...............h..............p.............o....A.......o..........h.......h.........s........t........u......s.....s.hh...h....p..s........lsh........p............N.............o..............h...............G............s..........t.......t............p..............p....A....V.........A..l..p..s..s..u..D..........p.s........hh.....ps..ph........h.Gh.QD.Tl........a...........s............p...........s............s............R.....p...........ha.p...........s..shIpGslDFlaG..p.u.s....s....l.F....p.ss.plhs.........h....t.....s.............t......t...........t.....s......h....l....s..A...u.p................s...t...............h..........Gh........hh.pspltus.....t........................................h.LGR..s....W..........t................h......u................p..............s..........h.......h..s.hs..........t...l......G........a......h................................................................h...............ph.s........G.s.................................................................t..................................................................................................................................................................... 0 261 757 995 +674 PF04710 Pellino Pellino Mifsud W anon Pfam-B_5882 (release 7.5) Family Pellino is involved in Toll-like signalling pathways, and associates with the kinase domain of the Pelle Ser/Thr kinase [1,2,3]. 20.00 20.00 20.40 34.90 19.30 19.90 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.47 0.70 -5.83 8 257 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 97 3 152 252 0 347.90 58 92.85 CHANGED up-ptsu.scpsl....+YGELIVLGYNGsLPsGDRGRR+.S+FsLpRRs+ANGVKPSshHhl.poPpsSKAlssKsQHSISYTLSRspoVVVEYsHDscTDMFQIGRSTEsPIDFVVhDT...........lPGu....t-sp-stspQSTISRFACRIlC-RsPPYTARIYAAGFDoSpNIFLGEKAsKW...psscGcMDGLTTNGVLlMHP+sGFo.E-SpPG...lWRElSVCGsVYoLREoRSAQp+GphV.p-sNlLQDGoLIDLCGATLLWRousGLp+sPT.+cLEtllpclNAuRPQCPVGLsTLAFPphp+.......tss.s-+QPWVYLpCGHVHGaH-WGpccEpt.p.cRcCPhCRsVGsY...VPLWhGsEPAFYlDsGsPoHsFsPCGHVCSEKTstYWuphPLPHGTpAFpAACPFCAs.LsGppGal+LIFQs.PLD ...............................................................at.LllLG....YN..Gs......Lss.G...D.+....G.RR.........+.S+hsLh+RscANGVKssshHhh.soP.......suKA...lp.s.+...sQHSIS..YTL............SRspoVlVEYscDssTDMF..QIGRSTEs.IDFVVoDT.............................hsGu.........s.ss-s.tssQSTISRFACRIlC-Rps.P.aTARIaAAGFDSS+NIFLG..............EK..AsKW.....................+ss.D.Gp.MDGLTTNGVLVMH.Pp...sGFs...p-S.....tPG......................lWREISVCGsV.aoL..R..E......oRS...A..QQRGKhV............EsEoNhL.....QDGoLIDLCGATLLWRTu...........p..G.........L.+...........sPTh...+pLEuhRpElNAuRPQCPVGhsTL.....uF..Pohtp...............................pps.sc.cpQPWVYlpCGHVHGaHsW.........Gp.cp....-........p...t................s...........p.......p..........RcCPhCRtlGPY.VPLa.LGpEs.....uh.....a...lDsGP.PoHAFs.PCGHVCSE.KTstY.WuplPLP.HG...T...c.sF+AACPFCut.Ls.....G-pshl+LIFQs.slD..................................... 0 35 49 95 +675 PF02452 PemK PemK-like protein Mian N, Bateman A anon Pfam-B_2134 (release 5.4) Family PemK is a growth inhibitor in E. coli known to bind to the promoter region of the Pem operon, auto-regulating synthesis. This Pfam family consists of the PemK protein in addition to ChpA, ChpB and other PemK-like proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.78 0.72 -3.85 306 4707 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 2273 13 846 2702 289 106.10 23 83.07 CHANGED c+G-lhhlsh.....ts..sh.GsE.......tt.hRPs........lllsss..hhs.ph..t......slllsPlT.o....pht.t...............shcltlp.s...pt..pt....................ohlhs-plpolc.+p...Rl....t.c........plGp...............ls..spphppl.ppslthhl ......................................................+G-lh.h.ssh......tP.......s...GsE.................st..t......RPs........lVlsss...hhN.ph...s..........slllsslT.o...........php.t.t......................................................................shcl.ls..sp.....th..t.......................uhlhh-plpols...+p.Rl..........t.c......................plsp.......................ls....p.p.h.h.p.p.l.pptlth..h.............................................................................................. 0 269 586 752 +676 PF01804 Penicil_amidase Penicillin amidase Bashton M, Bateman A anon Pfam-B_1410 (release 4.2) Family Penicillin amidase or penicillin acylase EC:3.5.1.11 catalyses the hydrolysis of benzylpenicillin to phenylacetic acid and 6-aminopenicillanic acid (6-APA) a key intermediate in the the synthesis of penicillins [1]. Also in the family is cephalosporin acylase Swiss:P07662 and Swiss:P29958 aculeacin A acylase which are involved in the synthesis of related peptide antibiotics. 21.30 21.30 21.80 21.70 17.40 20.80 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.03 0.70 -5.50 21 1530 2012-10-03 21:14:07 2003-04-07 12:59:11 13 8 899 125 501 1655 2079 651.20 21 92.28 CHANGED tsVcIh+Du.aGlPHIhAssptslhauhGYspApDRLhpl-hh+tpApGphuchhGs...........stlssDhhh+phthsp......ssctphpshssp........tshlcuYusGlNtYlp....t.tp.hPh-ashhshp........Pc.asshDshtlt.hhhhphsus.............h......t.hhshasshh.t..................................h.tshs.hts................h.thstthsspsts.............hGSNsWsluup+TssGpslLhssPHhsatsP.shahphpLps.suaclhGsohsGhPhl.hhGaNsplAWuhTsstschhDhYt.pls.p.ssspYhas.Gpapshcp+ppsltV+s..sp.....shphplhcThcGPVlspsss.............sshshthsshtpsspsh.sahphscAcslpphpp.s.tpthtssshNhhaADtpGsIuahssuhhPh....pss.ps........hhPssG........uph-WpGhhs.hpt...hPphhsPs.......pGalhsuNppsh.........ssshsht.hssthhts.....hRspphhphlstpss.........hshcshhplphs.stshhschhh.................llcshp.........ssssssppAhstLtuWstphsssopuAs.........lathahcthsppshhsthst.hhsshhs.h.......................s.h.hssssshh.tttp.........sthlspuhspuhs..................hpsphG.................shshpWGchpp.........hh.hpshshpu.........sssshs.hhGshsshsssths.sstsssshtsshs.....hphss.hspsttlhssupSusPtSPHYsDQh.haspspahslhhs.pplptphpt .........................................................................................t..splhcDt.hGlPHI...hA.p...s...tt.c...hha...u...GYspApDRhaph-hhR+hut...Gpluchh..G.................ttlt....Dt.hhRt.h.thtp......ssptt..h.s.t.h.ssps...........pphl..p...uYssGlNsalt............t...t...hhPh.-a.thhsht........s.t.W.ps.Dsl.hh..h.....hhh.hh..ph...tss.......p.................h..............h.................t.....h.....s...h...h.s.t..........................................................................................................................t...........................................t.hh........h..........................................................hGSNsWslusp+o.tsGps.llss.sPHh.sh.t.h.P....shaa..........t.hp....L........p.......h....s..........t................h.......s......lhGsshsGh.Psl.hhGaNt...........c.lAWuhTs.sh.s.D..Dlah.phs...............t....p......s.......t..t.....Yh......h.s...s......t........h..hp.tp.pp.sl..tV+s...st...............shp.hslh....T..t.a..G.Pl.lpts.t.................................................thshuht.h.sh..t...s...s..t.s.h....shhthsp..A..psh..pphhp..s.h........pt......h.t..h..s.s.Nhlh...AD.......p...G..s..I..uahssG.thPhR.....tthps.....................................................hhPssG.........psp.hp..W.p........G.h.hs..hpt.........hPp.......hs.Pt..................pGalhsuNsp.h...........................s.shsh......hs.....thh.s..........Rs...pcl....t....p...h....l...t...t...t.tt...............hs.hpsh..t.hphD..p...hs.......h.u...p...hh......................................................h.hp..hhp..............sts..hp.ph....hp....hL..t.t..W...st.ph....sh.....sshush...........................................lhthah.pt.....h.h...p...t...h..h.t...s.t.h.s...........h...ht.....h..s.........................................................h.........s.....s......h...h.t..t.......................hsthh.tts.h.ttsht..................httt.h.G.............................pht.W..Gphpp...............hh.hts.h.s..h..s....................................h.hs...s..G......s..t.t.s.h..t...s..........s..t...h...t.......................t......t....h............h...h..usu...ht...........hlhs...h..sc....s....tuh.h.h..hs.hGp.S.G.ps.hS.....saatDthp.ha.h..p.s.p.hhsh.hp.tth.....t.......................................................................................................................................................... 0 178 337 452 +677 PF01469 Pentapeptide_2 Pentapeptide repeats (8 copies) Bateman A anon Bateman A Repeat These repeats are found in many mycobacterial proteins. These repeats are most common in the Pfam:PF00823 family of proteins, where they are found in the MPTR subfamily of PPE proteins. The function of these repeats is unknown. The repeat can be approximately described as XNXGX, where X can be any amino acid. These repeats are similar to Pfam:PF00805 [1], however it is not clear if these two families are structurally related. 20.30 20.30 20.50 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -9.90 0.72 -4.32 126 12308 2009-01-15 18:05:59 2003-04-07 12:59:11 13 89 95 0 878 9127 90 40.00 46 47.97 CHANGED uNsGshNsGsuNhG.hNhGsuNhGshNhGsuNsGstNsGs ......................NhGhhN.sGs.sNhGh...hN.s...GssNsGhhNsGssN.sGhh.NsG............... 0 480 495 874 +678 PF00391 PEP-utilizers PEP-utilising enzyme, mobile domain Finn RD, Griffiths-Jones SR anon Prosite Family This domain is a "swivelling" beta/beta/alpha domain which is thought to be mobile in all proteins known to contain it. 23.00 23.00 23.10 23.20 22.70 22.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.57 0.72 -4.41 179 11592 2009-01-15 18:05:59 2003-04-07 12:59:11 18 59 4567 47 2573 8381 2287 81.70 31 11.56 CHANGED pspp..t.hh.ps...ILVscc.lsPuphs..hshppltGlloppG.Ghs...SHsAIlARshGIP.sllGs.t.s.............shpt......lps..Gphlhl...DGppG .............................ts.....t.hsps..sILVsc.c.hsPsphs......s.h.p...ps..tGllTsp..G.GtT...SHuAIlARshGIPsVlGs..s..s.....................sspt..........l.ps......Gc..hlhl...DutpG................................... 0 893 1669 2174 +679 PF05131 Pep3_Vps18 Pep3/Vps18/deep orange family Bateman A anon Pfam-B_6057 (release 7.7) Family This region is found in a number of protein identified as involved in golgi function and vacuolar sorting. The molecular function of this region is unknown. The members of this family contain a C-terminal ring finger domain. 20.50 20.50 20.80 20.50 20.20 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.82 0.71 -4.77 31 318 2009-09-11 06:43:15 2003-04-07 12:59:11 9 7 276 0 229 324 2 145.80 26 15.40 CHANGED GlhaGplshsssss.........pplhppspl...........h.psplssspss............hulsLTpaHlLlLhts+lhAlNpL.stplVa-psl.........hpstpphlGlssDs..tpsThWlaospslFElslpcEsRclW+lalcppcF-tALpas+s.....ssp+DtVhstpu-ahhpc ...........................................................GlhhGpl..t..ts...........hhsptph...........h...st.hststss.................................huhsLTpaHhllLh..s..s..+..lhslspL..st...p..lVhcpth.........hpthtphhG...lspDs...............tp..s..shWlaTsp.slFclhlpcEs...................RslW+laLch............pc............F-tAhpas+s..........stphDtVhttpu-hhhp.p................................. 0 82 129 192 +680 PF03051 Peptidase_C1_2 Pept_C1-like; Peptidase C1-like family Mifsud W anon Pfam-B_2136 (release 6.4) Family This family is closely related to the Peptidase_C1 family Pfam:PF00112, containing several prokaryotic and eukaryotic aminopeptidases and bleomycin hydrolases. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.53 0.70 -5.96 13 1894 2012-10-10 12:56:15 2003-04-07 12:59:11 10 5 1149 26 416 2017 382 362.80 34 93.53 CHANGED pLos-plppFspcasucPphtlspsAsp+sGlh-Ashsc.psptchspVFSh-lsT-..sVTNQKpSGRCWlFuALNshRHshhKcaclc-FEhSQuYsFFWDKlE+uNaFh-pllsTA..DcslDuRhVpaLLssPppDGGQWDMhluLlEKYGlVPKpsaPEo.asossSptLNshLsc+LRccAltLR.pLhppGss.cplput+-chLsEIa+lluhsLG.PP....csFsaEYRDKDKNYHp.+slTPh-Fac+YVs..hDLcsaVsLlNAPpsD+PYsKlYoV-aLGNVsGGpplhalNlsh-hLKchslsQlKsGEuVWFGsDVu+ph-RKsGlhDsclYph-plFslchph..oKA-RLcaGEShMTHAMVlTGVDl.ss..Gpsp+WKVENSWG-csGpKGYFVMSDcWFcEYsYplVVcKKaLPc-llcsh-p...pPIsLsPWDPMG .....................................................................................................................................................................................s.lp.p.htph.tph....ts.t.thhtpuh..p.s.sl.ts...p...ps..h.........p.....s....s.Fo..h..c.ls..pt.....tls..sQc.tS.GRCWhFuuLNshRh.thhpph.p..l.....c.....p..F.EhSQ...sahhFaDKhE+uNhFh...ppl.......lt..ou..........sp.hssR...hV...palhps.P......pDGG.Q.Ws.M.hsslhc.KYGlVPpps..h...P..Eo...h....s.o....p..so....pt.hs...thLs.ph....LRp.....u..h.LR...phh.t........p.............s..................t............s..........................t.................t.........l..........p..tt+pphL...ppla.phlshsLG....PP....cpFsa.t............a.......p......D.c...-.......p............p....a..p...............t.t......shTP.pFacc.als..........hs..........l.p.-....Y....V.s.lhN.s.....P...c.+sas+.asl-h.sNlhs..u....p....hpa.lNlsh-th+ch.shsp.l.p..sGc.s.V..........WFGsDV.u.p.....s.p.c....t...G........l..............h.........................s...h......sh....a......ch........p.......t.....h........h........s........h.....p..........h.p...............sK..u..............p..........R......................L..........p...........ht..................-.......S............hh.o...HAMslsG.....Vc.......l.....ts.......G..............p.......s..p.+..W+V.E.....NS..W.....G.....c......c...s...G...p..........c.......G.aa.sh....o..-sW..hccas...aplVVcKchlst.....-...hth....hpt....ps.h...L.sWDPh................................................................. 1 131 242 329 +681 PF00112 Peptidase_C1 Cys-protease; Papain family cysteine protease Sonnhammer ELL anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.92 0.70 -4.47 157 7017 2012-10-10 12:56:15 2003-04-07 12:59:11 18 153 1160 373 3137 7739 473 199.90 29 53.16 CHANGED lPpshDWRp..ptss..VoslKsQG...pCGSCWAFSssuulEuthtlts.....tphlsLSEQpLlDCst....sp..........GCsG.GhhppAapalhp....sGls.oEssY.......PYp.....upc........................spCchp.ppp.h.....sphssass..lsts.......sEpsltptlsp..GPl.uVulsusp....sFphYpuG..........lapt...pCss......tlsHuVhlVGYGsps.....................................GpsYWll+NSWG.ssWGE.pGYh+htR...shs.....spCG..Isspsshsh ..................................................................................................................................................................PtthDhRp..............h....l.s......l....+..s.Q..G..........p.C..........G....S...........C......W....A.....F........u......s.s.......u.........s...lE.......u...tht...h......t......s..........................t......p.......h.......h..............s..........L......S...t.....Q....p......l.....l.....s....Cst.......tst.............................G.C...s...G....G..h..........t...t.......A.a.....p...alhp............t.G..l...s...s.-...p..sY.............................P.Yp..........utp...........................................................................spCphp....tpt..................hphp.s.a..hp....ls..s.......................pptltp.t...l.h..p........G.......P...............l...............s...su...............l........p....s....t......p...............a..................h.......Y......p...s...G...........................lapt................ssp................th..sHuVhlV.G...a..G.s.p.t..............................................................................................sh...Y..Wll+..........N.......SW............G.......p.p.........W.....G.-...pG..Y.hph..tt...s.t.........Cs.lt.......h............................................................................................................................................ 0 1477 2033 2663 +682 PF01650 Peptidase_C13 Peptidase C13 family Bateman A, Caffrey C anon Pfam-B_1302 (release 4.1) Family Members of this family are asparaginyl peptidases [1]. The blood fluke parasite Schistosoma mansoni has at least five Clan CA cysteine peptidases in its digestive tract including cathepsins B (2 isoforms), C, F and L. All have been recombinantly expressed as active enzymes, albeit in various stages of activation [2]. In addition, a Clan CD peptidase, termed asparaginyl endopeptidase or 'legumain' has been identified. This has formerly been characterised as a 'haemoglobinase', but this term is probably incorrect [2]. Two cDNAs have been described for Schistosoma mansoni legumain; one encodes an active enzyme whereas the active site cysteine residue encoded by the second cDNA is substituted by an asparagine residue. Both forms have been recombinantly expressed [3]. 20.40 20.40 20.40 20.50 20.10 20.00 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.83 0.70 -5.09 34 952 2012-10-03 02:24:44 2003-04-07 12:59:11 13 35 545 0 478 982 50 226.40 30 53.62 CHANGED pWAVLVAGSsGa.NYRHQADlCHAYQlLc+.tGlcsEpIlVhMYDD.IAps.pNPhsGhlhNpPp.....u.....pDVYpG.VshDYsGpsVsscNFhuVLhGscstltt.usGKVlpSsssDplFlYaoDHGuPG...lluhP....ts.LaAp-LhcsLpphHspspYpchVhYlEACESGShFps..LPpslslYAsTAuNucESSausYC.............sssphsTCLGDhaSVsWhEDo-t.pslpt..cTLppQachVKpcT.......ptSHVhpaGDhslsp.hlsp.a....................................................hGs .......................................................................................pWuVllu.s.S.p...hahN.Y.RH..AsshphYphl+c.hGl.-ppIllhhhD..D.hAss..............cNPhsu.lh..sp...p.............t....slY..t.s..l...........hDYpu..cVssc................sFht..lLh...G.ph............spt+....hl....o..s.s.ps.p...l....h.............lYho.sHGuss...............hLth........sspplts.......t-Ls....cs.l.p...phat....cp+.................Ypph....lh..hl-sCpuuoh.............hpt...............t.s.......s......lh......s...hsuo.p.ssEs..Sa.up...h...............s..phtshlsD...h...a...o..h.......hhc...-p..............p.p....sltp.ap....h.....................................................t........................................................................................ 0 181 292 392 +683 PF00863 Peptidase_C4 Peptidase family C4 Bateman A anon Pfam-B_232 (release 3.0) Family This peptidase is present in the nuclear inclusion protein of potyviruses. 20.80 20.80 20.90 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.49 0.70 -5.40 39 1088 2012-10-02 13:45:52 2003-04-07 12:59:11 14 30 161 9 0 1336 7 226.80 49 9.57 CHANGED aEupShhpGlRDYNsIus.......slCpLpNpSsutppslaGIGaGshIIoNtHL...F++NNGp....LplpopHGpF..plcNosplplp.lpG+DlllI+hPKDaPPFsp+l+FRsPpps-+lChVussFQppphsspVSEoShhhs...ssusFW+HWIoTp-GpCGhPlVuspDGtIlGlHS.....LsstssspN...aFsshscsF.pphLpshcphpWsptWpaNsspluWuulplhsspPpt.FphsKhls ............................................HESpShaRGLRDYNPIusslC+LsN.sS-utssshaGlGa..G..sh..IlTNpHL...FcRNN.Gp....Lpl+...SpHG-.F...slKNTspL+l.h.P.It.....s...RDlllIRhPK......D....hP...P.F......Pp+.LtFRp.PccsERlC.hVG..o.....N...F.....QpKSl.o....S..hVSETSshhP............lts..SpF..WKHW...IoT....cD....GpCGhPhV...STp...DG..p...IlGlHS.....LAN.p.sopN...aFuAFs--Ft.pcYLpsh-sp......cWl......KpWpYNsstlsWGoLplpsup.Pp..u...FKloKLlp................................... 0 0 0 0 +684 PF02902 Peptidase_C48 Ulp1_C; Ulp1 protease family, C-terminal catalytic domain Griffiths-Jones SR anon Structural domain Domain This domain contains the catalytic triad Cys-His-Asn. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.61 0.70 -4.72 27 3178 2012-10-10 12:56:15 2003-04-07 12:59:11 14 120 647 37 1892 3126 158 164.70 15 27.92 CHANGED palsstlhthhhchh........pstpppcsthhsohFhshLp.....h................pphhsuspp.hpt......hhchDhlahPlp.h........stHWshl....hlsltcp.......................................pIhhhDSlhshpp..........htshsthh.aLhpp..hhptp..t..............p.ht.hhth.plPQp.sstDCGsashpal-hhspshs.p.........l...ppchtth+pchuh..hchhtstl .....................................................................................................................................................................................................................................................................................................................................t...t.p.h..l.h.lPl.p..................ttHWhLh.....llsh.ttp.............................................................................................................................................................................................................pl.h..hhD..S.h..t.ttpp.........................thh.p.th.h....p..h.l.....tt.....h.....t.....................................................tth.t....h....h....h...............t....h..s..p..Q..t..s...s.......c..CGh.alh....t.hh.ch....h....hp...t.........................................................t.h..................................................................................................... 1 468 899 1346 +685 PF03416 Peptidase_C54 Peptidase family C54 Bateman A anon MEROPS Family \N 25.00 25.00 25.50 25.40 20.40 23.30 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.91 0.70 -5.27 14 700 2012-10-10 12:56:15 2003-04-07 12:59:11 14 10 298 10 424 693 4 260.10 32 62.57 CHANGED pshhpsFhpDhtSRlWhTYRcsFssI.............................ssoshToDsGWGCMlRouQMLLApALlhp+LGRsWphstp.....................................................pppttapcIlphFsDp...susaSIHphlph..............................Gtpt.sKp......sG-WaGPsssupsl+pLscts.....shssl.plaVuhDsslhh--.hppss.....................................................................................................tstapPlLlllPlRLGlsclNshYlctLppshphspSlGIlGG+Ps.puhYFlGaQsDcllYLDPHhsQpslshspcshs......................................................poaHs.phhp+lphpphDPShslGFhC+scc-F-shppplp ..................................................................................................................t...tthhpDh.S+.l.Wh.TYRptFs.sl....................................................................................................................................s..s..s...s..hToDsGWGCMlRsG...QMlLAp........uLlhth.....LGR..s.Wp.h...tt....................................................................................................................................................t.t..thhpll.p.....hF..hDp.............p.....usaSlHph....sph................................................G.ht....GKp.......sGpWa...GP.ss....suphlcphs..t.........................th.ssl...tl..a...l.s...Ds.sl.h.h...p-hhphht...............................................................................................................................................................................................................tth..pslllLlPlRLG.h.p.p...l.N....h.Yhpslc.....thhp.h........tslGlhGG+P.p..puhYF.lGht........s..s..p.l.lYLDPH.h.s.Qsh.l...s....h.p.t..t.s..h.......................................................................................................................................................poaHs...pp...p+hthtphDPS.....hsl........GFh..sp..s.pp-apphppth.t.............................................................................. 0 158 222 329 +686 PF01433 Peptidase_M1 Peptidase family M1 Bateman A anon Swiss-Prot Family Members of this family are aminopeptidases. The members differ widely in specificity, hydrolysing acidic, basic or neutral N-terminal residues. This family includes leukotriene-A4 hydrolase Swiss:P09960, this enzyme also has an aminopeptidase activity [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.50 0.70 -5.38 33 7280 2012-10-03 04:41:15 2003-04-07 12:59:11 15 83 3187 81 3135 6965 1743 332.40 23 43.69 CHANGED RLPssltPh+Yclplp...sphpsh.......sFpGpssIplps....pp.sssplslcspc.lsIp.slplps..psss............hphptppph....lplphspshtttp........hpLplpasu.lss....s..htGhYtohYhp....sGpp+hhssTQhp.sstARpuFPCaDEPshKATFsloltpssph..sulSNhs.hpsp.hp.ss..hthspFppT.shMSoYLlAahlu-hphl..pspsps..tl..lclaucsuttppup..aAL-hst+hlpahEcaashsYslP.....KhDhlAlP.-Fuu..GAMENWGLlTYREssLLh-.t..ostppcppluplluHELAHQWFGNLVThcWWssLWLNEGFAs.ahEahssctlts..phphhphalhsph..ppshttDuls....ss+Plp....plssPsplsshFs.slsYsKG ................................................................................................................................................................................................................................................................................h.................................................h...u.........h.h..................................t........h......l....p.........t......hp..l........t....h.....h.................................................................................................................hh.l.............t.........h..............................h.l...l..t..ht..s.........tt.......................t........htG...h..a............h.........................t...p...t..........h..h..........h...TQ.....h.E.....s..p...........ss.R...p.......hhs....s....h....D....c..P.........s.h..+.........A..s.........a.s.h.p...l....ts..s...........psh..............hsl.....SN..t...s.....h......h.......p..........p...........t...............t.........h....t.....s.....................hthh....pa......p.p....s.......h....s.o..Y...L..hA....h...s.s.G....c....hphh.................php..sps....................t.h...lp.......l....a....s...p............s....t.........h....p..p.st...........as....h..p.......h...t..p..s.h.p.a...h.c....c......h...a...s...h.....Y......s....hs....................p.h..p.hl..sl.s.....p..F.....sh.......G...AME..N..h..G...L.......h.a..p...p..p...h....l....L....h......s......t........t....s..............s...................t.....t........h........t..............pl..tt.....l...........l.u..H...........E.h.u.Hp............W.h.............G..N..h..V..T.h.c...........W..p..p.L...W.L.pEGass.ah.p.h....h.......h..s.s...p.h......ts.........................h.ph....................p.....h...h.h..p.th.................t.h.t........Ds..s...........s.cslp.................t..p........p.h..p............ph...as..hhsYtKG......................................................................................... 0 1008 1673 2600 +687 PF01431 Peptidase_M13 Peptidase family M13 Bateman A, Dudgeon T anon Swiss-Prot Family Mammalian enzymes are typically type-II membrane anchored enzymes which are known, or believed to activate or inactivate oligopeptide (pro)-hormones such as opioid peptides. The family also contains a bacterial member believed to be involved with milk protein cleavage. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.42 0.70 -5.15 21 3320 2012-10-03 04:41:15 2003-04-07 12:59:11 16 34 1562 10 1433 3043 301 191.10 36 29.63 CHANGED NAaYssshNplshstslh.sPaassphs..pshNaushGsVluHElsHsFsspGspas..h............tah.tpspssapstspCslcpasthsssstst.....shsGspThtEslADluGlphAhpshpp......psss-ppl.s..hpt.s..phhahshAtsaCp.pppspstlh....psHuPsphRlNsslpshPtFsssFsCp.Gs+MascPcpps ...............................................................................................NAaYsPtpNp..I.sFP......A...........u.I.LQs.P.F.a.....s...h.p.ts..........puh.NY.GuIG.s.VIG.HEl.oH.uFDs........p.G....t..p...a...D...tpG.s.Lp............................sWW...op.p...s.hp..tF.p..p...+sps...hlc.Qa.s.....s.h.......ps.........h...............s............s..................................................p.........l...........N...G....p..hT...lu..EN..IA......Dh.GG.lphA....hc..Aap........................th.p..p...t...p...h..ss.........h.p........h....s..tQ.........h.FF..........l................sa..A................p............l.Wp................t........p....h......p.......s.....c.t..........h......t.............h....l.t..s...D........s.......H.u.Ps.......c.h..R....sss.s..lpNhsp.Fh..csF.sspt.u.ss.Mh.s.cpcl.................................................................................................................. 0 539 753 1171 +688 PF05193 Peptidase_M16_C Peptidase M16 inactive domain Yeats C anon Yeats C Domain Peptidase M16 consists of two structurally related domains. One is the active peptidase, whereas the other is inactive. The two domains hold the substrate like a clamp [1]. 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.01 0.71 -4.61 181 16090 2012-10-02 15:41:56 2003-04-07 12:59:11 16 64 4451 253 5211 13286 2764 179.20 14 37.15 CHANGED slstpplppahpphYpss.phslhlsGslshp..plhphhp....chhuplst.........................................................................tsp.hthsht..thttt.hhh.tt.........s...pspltlua.sssh...........................................sp.cthshpllspllust.........................htutLhpplp...............cpsh...shsstuh.hptht......ttuh.hslhhssssp......phpc............hhphhhptlppltpp.t...hsppc....lpcs+pth .......................................................................................................................................................................................................................................................................................................................................hshppl.hp.a.a.p.p..a.Y..p.ss..p.h..s..........lhl...s...G...s........h..s..........p....p...h.h...p.h...hp.......ch....h.u..p.httt...........................................................................................................................p..ht.h....s..h...p.............t.......t..t....t..t.h.h.h.tpt..........................s....p..p.spl....t..l..u.....a..p.....ss..sh..........................................................................................................................s.p...c.t....h..s...h...p......l...h....s....p.l.Lust.................................tu.t.L..hp.p.lp..........................cp.sl.......sh..s....s..t..u..t...hs.sht..................t.uh...h.....t.....l..t..s.s..s..s.sp...........phpp............................hhp.h.lhp.t.............l...p.pl.....t..pp..s........hspp-...lpphpt..h............................................................................................................................................................................ 0 1802 3187 4395 +689 PF02789 Peptidase_M17_N Cytosol aminopeptidase family, N-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_990 (release 3.0) Domain \N 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.42 0.71 -4.38 103 3629 2012-10-02 00:07:53 2003-04-07 12:59:11 12 10 3099 51 1002 2638 1020 122.30 21 24.73 CHANGED lls..lh..........cssthssss..........lc.pthsu.hlpphhpptshsGKhGpshh..lhshs.sh....hppllllGLGcppp.........hstcsh+pssusssctlps.hpsppsslhls...................ttspshspGhhLusYca.sca+opp ...................................................................hs..s..........tthc..th.su..hlpt.l.lcp...s...p.....h.p.G.K..Gpshh.....lhtss...s..................sp+llllGhG....cppp...............................h.s.t.c.ph+ps...hu..pshpsL...ps...t...........ts.tps.shhls.........ttts.................ttsptssc..usthutYpF.cpaKop............................................ 0 310 592 816 +690 PF02127 Peptidase_M18 Aminopeptidase I zinc metalloprotease (M18) Mian N, Bateman A anon IPR001948 Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.40 0.70 -5.98 9 1876 2012-10-02 19:46:12 2003-04-07 12:59:11 10 13 1229 44 661 1854 254 401.10 32 91.82 CHANGED FlspusTPaHsVppltc+Lh.ptGFppLpEppsWt..l.pPts+YFls+NtosllAFulGsphcstsGhslluuHTDSPsL+lKPpuppp.sptah.lulpsYGG.Ih.poWhDRDLuluGhVhlKss..tpph.phllclccPlhpIPsLAIHLs+sh.pshphspcschhslhuh.s......p..h.sptc..........pth+hslLpllscchG...lps-D...llsh-LhlhDspsuplsGhcccFlhusRLDNphssasuhpuLhpstps......csphplsshaDpEElGSsoupGAcSsaLptVLcRlshthppss.....hhpptl....sKShhlSADhsHAhHPNYsspH-psatPhhstGsVlKlssNtR..YuTsusstshlcplAptss...V.lQhhhl+sssPsGoTIGPhhuucpGlcTlDlGss.LuMHShREhsustDlh.shphapuFF .....................................................FlstusosaH...........sltphtphLp..ps..GFp.pLpE.........p..c.p.Wt..........h..ps.....u..s+.aahs+ssusllAat...lsp...p......................t......p...G..................hpll.GuHsDSPsl+lKss....s..php.......ppG...ah..........l....sschYGG.lh.tsWhDRsLuluG+lhh+.................ss.......................t...............h..........st.ll.c.h.scPlhtIPsL.AIHLs+..p.h.p.p..s...h..ph.s..ppp...ch...Plhuhhs................................t..............................................hc.tl.hpl.l.sc..phu..........lp.tc-......llsh-L.lhssp.uphhGhs....p-....hltusRlDshsssasulp.Alls....stt........................................................................t...................p.......st..lh.s.haD+EElGS.totpGA....pSshlps....slpRls....t..s....h..s.tsp............t.h.pthh.........................spShhl.SuDsuHul.H....PNY...s.c...p.a-............s...p.ps.hlstGsVlKhss..spR......Yuosuss...s...s...h...h...p...p...l...s...p...p.ss........................lshQ.pahhpss...sGoT.lG.s.hh.s.u.p.h.Ghp....slDlGss.LuMH.ShRE..hsuttDlhhhhchhpuFh........................................................... 1 270 456 592 +691 PF01546 Peptidase_M20 CO_pept_M20; Peptidase family M20/M25/M40 Bateman A anon Pfam-B_253 (release 4.0) Family This family includes a range of zinc metallopeptidases belonging to several families in the peptidase classification [1]. Family M20 are Glutamate carboxypeptidases. Peptidase family M25 contains X-His dipeptidases. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null --hand HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.37 0.71 -4.79 366 29469 2012-10-02 19:46:12 2003-04-07 12:59:11 23 76 4930 91 7433 26828 10363 326.10 13 78.41 CHANGED hltuHhDslP.......th......p.s...................................GhhaupGpsDtpuhlhuhhpslpph........ttphps.slpllhpssEEsu.....u........................Gsphhhcs....................................hth+....shsh..sc.s.hts...............shpushshhhhhhsspuptussshssssshhhsshhhhhhpthhspttsshpssshshs.......................................................................................................................thsssssssshsttshhtsthpshppthpthhpthhtthtpthstthttttthphhtshssshsss.h....hphlpps..spp.h...................t.ph.sssuu...pDhshhtt........s.sh.hlshusts...........thhHs.ssEpls..sslhtusphhtphltp .......................................................................................................................................................................................................................................htuHhDs.lP..........t...tth.........................t.................................................................................ps...s....h..h.....a....u....p....G....t....s....D.t...p.us....l.h...u.h...h....t....s.......l..pt.h..................t.t.p.h.p...s....s..lp.l..l..hp..s....s..E.Esu...t.........................................................................Gsp..t.h...hpp.........................................................................hthc.........shsh.........sc...s...t.th.......sttsssttththttttsttsstttsstshhtsshhhhthhthhtttttt.tttthhhhh.....................................................................................................................................................................................................................................................................................................................tsssstssh.thtthttthtthttphtthhttthtthttthstthththphphh.sh.hs.sst......h...hp.h.h..t..ps....hpp.hhh....................................ts.p.h.t.s.h.t..u.u...........pD..s..s...h.ht..................................t.s.h.s...h.l..h.h.usss.................tthHs....s...s...E...t.l....s....hps.l..tssphhhphh..t...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2236 4510 6175 +692 PF00814 Peptidase_M22 Glycoprotease; Glycoprotease family Bateman A anon Pfam-B_1670 (release 2.1) & Pfam-B_4550 (Release 7.5) Family The Peptidase M22 proteins are part of the HSP70-actin superfamily ([1]). The region represented here is an insert into the fold and is not found in the rest of the family (beyond the Peptidase M22 family). Included in this family are the Rhizobial NodU proteins and the HypF regulator. This region also contains the histidine dyad believed to coordinate the metal ion and hence provide catalytic activity. Interestingly the histidines are not well conserved, and there is a lack of experimental evidence to support peptidase activity as a general property of this family. There also appear to be instances of this domain outside of the HSP70-actin superfamily (e.g. Swiss:Q9ZM49). 24.80 24.80 24.80 24.80 24.70 24.60 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.90 0.70 -4.90 113 9668 2012-10-02 23:34:14 2003-04-07 12:59:11 20 26 4927 28 2530 7185 5233 230.60 26 77.47 CHANGED plluphhhspht.h.....G.GlhPp.suRpHtcplhshlcpsLpcu...slshp..-lDsIAsopGPGhhsuLhVGsshA+uLAhuhshP.lluVsHlpuHhhsshht..tt........P.........lsLllSGG+oplhhspt..tpYchhGpol........DsAsG-shD+su+hlshsh.......s..uu...tlcphu..tu.......hp..hs.s.h............hshSFS....Glcosshpth............................slshuhpcsshshLspts.cshth..........hssppl..llsGG..VuuNptL+p.......tht.hstp.....t.h.hhhPs.t.assDNuAMIuh ...................................................................................................t..ss...RpHs.cplh.shlpps...Lp.pu......................sl..s...hp.......-l...DulAho..tGP.....G.........h.....s.uL.h.l...G.lssA...+u.LA..........h.u....h....s....lP..lluVsplt....uhh.h...ss.hhp.....ps....................t.P........lslll....s....u....t....+......s....p........lh...h...s.........................p.....a.....p....h.....h....u..p.s..h...........css..h.u.-..s....h..s..phuchh.th..............................................tt........t............................................................................................thsas....G..h.s.h...h...................................................................................................h...t....h.............................................................................h...h.......................................................................................................................................................................................................................................... 0 882 1633 2143 +693 PF04389 Peptidase_M28 Peptidase family M28 Bateman A anon Bateman A Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.54 0.71 -4.54 65 5682 2012-10-02 19:46:12 2003-04-07 12:59:11 12 79 2007 144 2682 7975 2221 184.50 20 35.77 CHANGED phllluAHaDo.hs.................htsGAsDsuoGs...AslLElA+hls...p..t.p....pp..................slhFlhhsuEE..............pGLhG..Sctast......tph................................hppltshlNhDh...h...Gs......ss.shhhtsss.........t........................h....ltshhpps.tshh.shs.psh..sth.spoDahsFh..ttulPul...phtsst..................hsttY.HospDshsp..lshsuh.p ..................................................hllluuHhDohs.........................................................hssG.A..s....D............s..uoGs..............uslL.E....lA.+....h....lt......p.........t..hp.........pp..................................................................................................slhFhh...as...u.EE............................................................................tG.L..h.G.....Sp..tasp.......t..tpt................................................................................................................t.p.p.h..h...h.hlNh....D.h.....h.........us.............sst.t.h..h..h.t.sss.....................t..t.....................................................................hpth..h.p..p.h...h.................h.....h......p.........t.....h.......s......h.......p......h...............................t...t......h......h....s.....p....o......D..a...h...sFh..............ptG.l...P..s..h....th.htss...............................................................s...ha...H...s...t...Dsh.pt.ls.....h................................................................................................................................................................................................................................ 0 912 1571 2227 +694 PF01551 Peptidase_M23 Peptidase_M37; Peptidase family M23 Bateman A anon Pfam-B_291 (release 4.0) Family Members of this family are zinc metallopeptidases with a range of specificities. The peptidase family M23 is included in this family, these are Gly-Gly endopeptidases. Peptidase family M23 are also endopeptidases. This family also includes some bacterial lipoproteins such as Swiss:P33648 for which no proteolytic activity has been demonstrated. This family also includes leukocyte cell-derived chemotaxin 2 (LECT2) proteins. LECT2 is a liver-specific protein which is thought to be linked to hepatocyte growth although the exact function of this protein is unknown. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.32 0.72 -3.94 276 16518 2012-10-02 20:27:15 2003-04-07 12:59:11 17 180 4065 22 3908 13013 6175 97.80 32 25.05 CHANGED phHp...GlDl....uus.....h..Go........s..VhAsssGpV..hhuuh.....t.....uh.GphlhlcH...ssu..hhohYuH..hsp.hh..Vc..tGppVppGphIGtsG.sTG..............uo...GP............HLHF.El..p.h............su.pslsP ..........................................................................t..HpGlDh...........us.s..............t...G.s................s....l..h..A..s.s.s....GpV.........sh.ss........tt.......................................ua..Gp.h.l..hlcH...................sss....hhohYuH........h..sp..hh.................Vp.................G.p.p...V....p.t.Gph..I..uhsG..soG.....................uo..........us...........................HLHF.El..p..h.............ps..p.lsP................................................... 0 1317 2602 3339 +695 PF01434 Peptidase_M41 Peptidase family M41 Bateman A anon Swiss-Prot Family \N 20.60 20.60 20.60 20.60 20.30 20.50 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.37 0.70 -4.86 31 7039 2012-10-03 04:41:15 2003-04-07 12:59:11 13 33 4736 42 2254 5669 3966 203.00 39 31.13 CHANGED ohp-l--AlDRlluGhE++.shllopcpK+llAYHEuGHAllGhhhcssDPVpKVTIlPRGp.AhGhThhlPc-Dph....ho+ppLhsplssshGGRsAEEllFG..clTTGAusDlppsTplARpMVTcaGMScclGslshtppss.t..hh.tht.tptaScpTuphIDpEV+pllccuYc+ApplLpcp+ctl-tlActLL-+ETlcu--hppl .........................................................p..p-h-cAhD.+l.l....h..G.sE.+.+..o.t..l........h..o......ccE+..c...............hsAYHEuGHAll....uh.....h............l.....s.....p.....s............-......P.....V.........p..K.......V.....T......I.....l....P....R....Gp.....A.......L.......Gh......s...h..h...L..P.........c.....c..D..phh........ho+............p..p...........L.hs...............p.ls.shhGGRsAEEllaG.................cloT.....G....A....S....N....DlcpATp....l.AR.sMV.....Tp........a...G..M.....S...-..c..l.G..sl...ta...s.p.p.......ps...........................a.....h...h...t.....s.......h......t.................t.........p.......p.................s.hS-...p...TA..p...h.ID.pEV....+pll..ccsYpcApplLp.....c..pc................-t.lc...............tluctLlc..hETlsucplppl.................................................................. 0 790 1454 1932 +696 PF02163 Peptidase_M50 Peptidase family M50 Bateman A anon IPR001193 Family \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null --hand HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.09 0.71 -5.34 68 8451 2012-10-03 04:41:15 2003-04-07 12:59:11 17 49 4572 15 2487 6748 4385 283.10 20 78.45 CHANGED hhllulhlhls.lHEhGHhhsA..+ptGl.plpphthhhG.hlhthh.ht.h.h............................................................................................................hssshphphtptshhp+hhlshAGPhsNhllA.llhhhlhhhhss...............................................................................................................................................................................................................................................................................................................................................................................................................h.............htsshhthhhhhuhlslsLulhNLlPlssLDGG+llh.hh.............hh.tt.hs.phtthhhhhshslhhhhhhhshhps .......................................................................................................................................................................................................................................................................................hhlslhlhlh....lHE.hGHhh.sA......+h.h.Gh..pst.ph..t......h..t..h..........u..................l.......ht..h............h............................h.............h........h......t.h.h............h.....h....h.h.....h............h....h.s................................................................................................................................................................t...........t..h..t....h..tp..ps.hh.p.+.h..hl.hhA..GPhh.N..hl...l.u.l...l...h...h...h...h....h....h....h...h...t.s...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.t...s...h...h........h.l..h......h....h....u....h....l....s.l.s..L....u.l.hN....L....l.P.......l.....P.....s....L....D.GG+llhhh.h.........................t.h.h.h.....t.p..........h.............p.....h...t..t..h..h....h...h..h.s.h.hhh..hhh.hhh............................................................................................................................................................................................................................................................................................................................................................................................................................ 1 901 1712 2148 +697 PF02897 Peptidase_S9_N Prolyl oligopeptidase, N-terminal beta-propeller domain Griffiths-Jones SR anon Structural domain Domain This unusual 7-stranded beta-propeller domain protects the catalytic triad of prolyl oligopeptidase (see Pfam:PF00326), excluding larger peptides and proteins from proteolysis in the cytosol. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.28 0.70 -5.90 30 3044 2012-10-05 17:30:42 2003-04-07 12:59:11 10 23 1955 38 1041 2696 2777 371.10 22 55.84 CHANGED Psst+spttspph+Gpp......lsDPYtWLEDsc.....us-spAaVcApNphTpshLsph.sh+stltcclpphhshs+hosPa+cGpaaYYh+NsshpspsVlhppsss......pups.............cVhLDPNsLSc-G...TsulpshuhSpDGchlAYulstuGoDhhsI+hhclcsu..........................c.lsDsLcpVKaos.lsWssDscGhFYssaspspcsp.......pths.ppKlaaHtLGTsQSpDh.LlaEts-psp.hhhusplScDG+Ylllustpuss..NclahtDlppp..........s.hhphhhllsphcuphthVspc.......GsphhhhTN......csuPNh+LlssshssPs..pWcs.......lls-+ccclhh.shsltusaLllsYhc-spsplphachs............sthhhcphhhslGolsuhuuppcssclaapFoSahoPsslYch-hssschchhphp .........................................................................................................................................................................thaG.t......h.Ds.YhW.........Lc....D......s............ssc.shsa....l.p.tcNths.p...t.h.h......s..p.........................s....hp........cp.lhpch....t...s...........h...h..p...t...p..c....h...u.s.....P....h..........h....c....s...s........a..h.Y...h.th.h.p.s.....s...p..p...sl.h..hRpssh.............tt.t......................................cllL.....D...s.....Np.....h....u.......t...s...p..........t.....ah.h.....h...t...uh....s....l..........o.......s.....D....s.......p.......h..h......A.........hu.........hs...h.t.Gs.-hh.s.....l.+hhclp.o.s..........................................................ph.h..s...-.....h.....l....p.....t....s.p....s..s..........hsW....s...s.D.....s....p....s......h..a.Ysp...hct..t................................................sh.pl.a.h..H..pl.......G.....o....s...ts...p......D........h...ll....ac.c.....p.....-.....s.....s.....h...............hh........u......h.......t......t........o..p.....s......p...c.......a...........l..h.......l..p..h.s...s...tss.........s.c.l...hh..lchpt...................................tsp..hhl..h..s.c.pc....s.h...th...l..sph...........tp.p...h..a..l.....hoN.............pp.u...s..h...t...l...h....p...s.....t.....h....p....s.......t............p.hp.......................lh..cpt.h.....h.....l.....p......s..h.......s.h......h........p..s.......a.l.....l.lp...h.pp.s.hs.pl.ph..hshp..............................................tth...hh.p.........h.......s...h......h......................h....s.....h....s.......s.....c...........c.....s.....sp...lh.hsa..oShssPs..plachsht.s.t.p.phh...t................................................................................................................................... 0 356 649 859 +698 PF04080 Per1 Per1-like Wood V, Finn RD anon Pfam-B_12918 (release 7.3); Family PER1 is required for GPI-phospholipase A2 activity and is involved in lipid remodelling of GPI-anchored proteins [2]. 21.50 21.50 22.40 28.50 20.40 21.30 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.05 0.70 -4.97 34 346 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 246 0 231 344 0 246.10 35 77.58 CHANGED pp.P..lhh+hhhWsC.u-CcYp.Cp..phhTppRhpp.s.....tls............................................QFHGKWPFhRl.....hGhQE.hSslFSlhNhhspap.Gh.tphpc.pl.pp.shpph...........alhhuhluhsAWhaSslFHsRDhslTE+LDYFsAuusVLhuhahshhRhhpL......pp.thtthasssslsh.assHlhhL.hhcaDYsYNMtsNlshGllQhllWhhauhhphpphtp.......................................................aslhPhllllhshhuhuLElaDFPPhhh.hlDAHuLWHhsTIs.shh.WYsFhlpDhp ..............................................s...PlhhphhhWsC.s-CcYp.C...hhhspthhpp..s.......ls............................................QFHG.KWPFhRh....................hGhQEPhSslFShhNhhsphh.sh.hphhp....l..tp...sht.h............................................ahhauhluhsuWhaSslFHoR.DhslTE..+LDYasAs.uhllau...lahshlRhhpl................................pp.thhhhhs..sh....hlhh.hssHlh.YL......p..h.h.caDYsYNMtsNlshGllphlhWhhash....h..ph....pth............................................................hphh..hllhhhhhuh.LElhDFPPh....hh...hlDAHulWHhsTls.shl.aa.sFlhcD........................................... 0 71 128 191 +699 PF04695 Pex14_N Peroxisomal membrane anchor protein (Pex14p) conserved region Waterfield DI, Finn RD anon Pfam-B_4121 (release 7.5) Family Family of peroxisomal membrane anchor proteins which bind the PTS1 (peroxisomal targeting signal) receptor and are required for the import of PTS1-containing proteins into peroxisomes. Loss of functional Pex14p results in defects in both the PTS1 and PTS2-dependent import pathways. Deletion analysis of this conserved region implicates it in selective peroxisome degradation. In the majority of members this region is situated at the N-terminus of the protein [1,2]. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.94 0.71 -4.12 53 413 2009-09-11 00:50:26 2003-04-07 12:59:11 8 13 263 4 278 398 0 121.40 28 31.63 CHANGED sRc-LlssAlpFLpDssVpsoP.lp+KlpFLcSKGLop-EIctALpcussss.....................sspshsssssssshtssssss............hp............................sss.....................................sW+DhhlhssshuGlsauhYthsc+YlhPhl ............................R-pllpsAlp........FLp.......csp........VpsuP.ltp+hsFLcsK.....GLTp-EI-tAhpc....sssss.........................................st...ss.....s......ts..s..s......................h..............................................ss.......................................sacchhhhs.hhhuGluhuha.hh+phlhPhl................................................................................................................... 0 88 153 229 +700 PF00294 PfkB pfkB; pfkB family carbohydrate kinase Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family This family includes a variety of carbohydrate and pyrimidine kinases. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.75 0.70 -5.29 64 26513 2012-10-03 06:25:16 2003-04-07 12:59:11 19 81 4887 268 6104 23264 9854 285.90 18 86.51 CHANGED hstlsslGts...h-hhhhs.t..........thh.pstphphtsGGsutNsAhslupLG....hpsshlutlGsDphGphhhptlpppslssshhhhssp.pposhshhhhsts...ppslhhhhssssphpht...........hltpsphlhluu.....hhsts....tshhpthtphtcpts....s.shhsshht......hhpthpchh.shssllhsNccEhphhhstp...........shpphhphhtp....th...phlllThGs....cGshhhp.................pstththssh...thplVDTs..GAGDuFsuuhlsslhp..........stslccslphusssuuhslpptGsts.s.h..ph ...........................................................................................................................................................................................h......hG.h....hp..h.........t.................t....tstp.h.p.h..t.s.GGsuhNhA.hs.h.u.p.LG...............................tpst...h...l..u.h...l...G.............s..............D........t...h..G.p........h..l.h..p.t....l..p.....p........t..........u....l.........s.....s............s..h.....l.....h...h.........t.....s......p.....t..........t.......o.........u........h...t......h...h..h....h......s.....tp....t...........p......p........p......h.......h....h.............t....s.........s....s..s...p..h..p....t..................................t..h.l...t...p......s...c........h....l..h...hsu.........................h.......t.h...s...........p...h..h...p...t...h.........h.....p....h..t..p.pt.s..................s.s..h.h.s.....s..hht.........................t.h.....h...t.....p...h....l....tt.s....s...........l........l...h...s.........N..c......p..........E..h....p..h..l..h.u.hp......................................................s.pp..h..h..p..t..h...p...t......................ss...........ptl..l.....l.....o.....h........Gs....................cG......s...h...hhp...........................................................tst...h.h..p..h..ss.............t........s..p...l....l......D.......o...s...........GA...G......Duah.uu.h.l.t..u.l.hp.................................s.t..s..h..t..c.u...h...p...hA....ss...s....u......uhslpphGs.........t.......................................................................................................................................................... 0 1886 3711 5053 +701 PF00169 PH PH domain SMART anon SMART Domain PH stands for pleckstrin homology. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.45 0.72 -3.82 133 16347 2012-10-04 00:02:24 2003-04-07 12:59:11 24 818 416 205 9216 15741 151 107.30 17 13.94 CHANGED hhtpGhLhpps.........................ttppacc+ahhLps.......stLhhacspp....................................................................ttp.pttlsLpsh.plppssst.............................................................................ppptsFplttspp........................tphhlpssop.p-tppWlpslppshp ...................................................................................................................................h..hpGaLh+ps...................................hhps..W..c.c......Ra.hl..L..ps.........................st.Lh..a.acsp.p..................................................................................................................................................pp.p.s..p.s...t...l....s.L....p.....s.h....p....lptsspt....................................................................................................................................................................................................ppphs......F.p...lhstpp.................................................sh.hl...p.....A.....p..o...p..p-....h..p..pWlpslpp...t................................................................................................................................................................ 0 2389 3501 5912 +702 PF05065 Phage_capsid Phage capsid family Finn RD, Bateman A anon Pfam-B_3186 (release 7.7) & Pfam-B_9481 (release 10.0) Family Family of bacteriophage hypothetical proteins and capsid proteins. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.98 0.70 -5.21 189 3200 2012-10-03 06:22:39 2003-04-07 12:59:11 8 11 2039 63 462 2535 684 261.30 16 68.16 CHANGED GGhllPpph.tsplhpthpptss.lpplssshs..ss.ssphph.hspss..............tushhu.....E...............tt.....spsss..phs.plsh...sscclsshh.tloppl..LpDushs..lp.sh...lssc....lspthutpcstshltG......ss...pstGhh......................hthssh...sssst..............h..............-slh....phh..slpsshp...t.ss....alhspssh.stlc..phK...D.ssGp....hlht...ssh...tssts......plhG....hPlhhspsh.............t.....................lhhG.Da....pshhlsc.....cts.lpl..hps......................s..ppshsthhshpRhssslhc..s.pAhth.hph ..................................................................................................................................hlstph..ppl.hp.hpppss..lt..p..hsphhp....ss..tt..phph...httts......................sushss.........E................................sp.....spsps....phs.pl..sh...sh.t.c.l......s.s.hh..tlo.p.c.l....lcsu..s....hs.............lt.sh........ltst..lsps...hstt..cp...sh....lpG.......................ss.....p.....shGhh...................................................thssh.....ssss.................s...sh..................................................................................cslh................shh.t....tlps.shp..............t.su........hlhssssh...ttlp...phc........s..ssGp......hlh...............s.h............ttsts.....plhG...........hslhhspth.........................................................................thhhG..sh....pthhl.sp.........pts..hphth.pp..........................tps.hh.hhstth..hshthhp...pAhhhh........................................................................................................................................ 0 140 299 389 +703 PF02899 Phage_int_SAM_1 Phage_integr_N; Phage integrase, N-terminal SAM-like domain Griffiths-Jones SR anon Structural domain Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.78 0.72 -3.83 37 10422 2012-10-02 14:21:04 2003-04-07 12:59:11 12 29 4035 13 2334 8796 3027 83.20 22 26.81 CHANGED lcp..ahpaLthc+shStpTlpuYppsLpthhpahpp....thtshppl...stpcl...p.talschtpts........hsssolsp..tluulRsaapa..hhpcs ...........................................pt.ah.ph.L..h..h.c...+..s..h..S..tpTlp.uYppDL.........pp.hh..p..a..........lpp.................th.....h.....s.......h...p.....p..l...................stpsl.....................p.sa.l.s.p....h.tpps................................hsssohsR......ploulRs.Fa.p.a.Lhpp................................................................ 0 747 1479 1975 +704 PF04860 Phage_portal Phage portal protein Kerrison ND anon Pfam-B_6050 (release 7.6) Family Bacteriophage portal proteins form a dodecamer and is located at a five-fold vertex of the viral capsid. The portal complex forms a channel through which the viral DNA is packaged into the capsid, and exits during infection. The portal protein is though to rotate during DNA packaging [1]. Portal proteins from different phage show little sequence homology, so this family does not represent all portal proteins. 27.90 27.90 27.90 28.20 27.70 27.80 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.93 0.70 -5.78 127 3192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 1991 3 469 2728 646 315.20 19 80.95 CHANGED ht.st..ssh..tls.tpsshp.tssla..uslph.lupsluphPltlhcp....t.ttp.h.t....pslhpllp...pPN........ouhpahpthhhphlltG.Nua.shhhh..s......s.up..hh...p...Lh.lpspp.h.................ht..ss....phh.Yphp...........ss......tt...t.phstscllHl+.h..hshs.....shhGluslp.sutpslslshusppastphapNGspsuullphs..sth....sp..-phcp....hccphpp.thpGsssss...pshlLps...............G.hcapsl.....shssp-sphlp.s+phshc-IuphatVPsthluthtp.soh..usl-ptshsahphsLhPhlpphcpplstpLh..t.tthttthhhcFshpsllcs-.tsphphhtphhpsu....hho.N.EsRth.s ..................................................................................................................................................ht..........t..pshht..ssl...sslph..lupsluphshhhhcp..................................p.lhthhp....cPs...............suh.phh.pthhhp.hll..hG..Nua..l.hh...p....s..h..............t.Gp......sh.p.......Lh.h.sph.s................htstst.......................phh..Ythp............................................ts...................pt.......thpht.p....cl.lH.lp.s......h.p......tl.h.G.....hsshh.sshpsht.hstusppaptphapNG.u.p.suh.ll.h........stl....................sp...cshpp.........l+pphpp..t...p.G.....t..stt....+shhlps.....................G.h..chpsl.......s...ss..tc.sph.hp...hcphs...tc...c...ls....t....saplPst.hl.u...hh..s..p...ss...........uslEptst.sahpt.sl.hP.hhp.phpptlsp.hLh..................t.......hhcFs....sh..h..ths....t.ht.....h....hht..s.......hhs.s-hR....................................................................................... 0 123 284 381 +705 PF05119 Terminase_4 Phage_sml_term; Phage terminase, small subunit TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 24.20 24.20 24.20 24.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.00 0.72 -3.85 76 1248 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 969 0 185 864 92 98.70 21 66.43 CHANGED A+ppW++lss.LpchtlLsphDhssLttYCpsYupahpspcplpc........pGhh.........hpstsG................thppsPhl.shhpcshpphpphusphGLoPuuRs+lsh....spspcscs .............................tt.apchh..L.p.p..h..sh.lsthDh.thlptY...spsashapps...pcplpp.........pGhh................hpspsG...........................shtps.Phl.shtpcshpphpphssphGLossuRt+lhh.....tttt...s........................................................................... 0 56 117 162 +706 PF03354 Terminase_1 Phage_terminase; Phage Terminase Finn RD, Mifsud W anon Pfam-B_3931 (release 6.5) Family The majority of the members of this family are bacteriophage proteins, several of which are thought to be terminase large subunit proteins. There are also a number of bacterial proteins of unknown function. 19.70 19.70 19.70 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.45 0.70 -6.08 7 2382 2012-10-05 12:31:07 2003-04-07 12:59:11 10 9 1576 0 301 2092 481 410.10 22 80.19 CHANGED PaQcFhhs.haGW....cptsss+pFscshhpluRtpGKohhhAhhslhphhlt..GpsstphhlsuhshcQAtclF...............p.stp.h+t....p..hphh.c.cl.tsppptl.hhh.pshh+hhospssphDGhp.phsIhDEhtphpscphspplhpG.ht..tN..hlhI.oTussshsu.hc-chchhpphh....phpsDsaashlap.Ds.--shDsppWhKuNPhLshs.phcslhcshptstsssh....phscFhsKphNLWh.pcossahshpsh-p......hs.hshpG+csalGlDhShhsDsTAlshla.htG.....+hahcshoFhP.tsttt...t..pptt....pYpphhppG...hphpssuhIs.pplhshlhpahppp.hplpthuYD...shtsp.hhsplppp.hsh...shhclpQshtshusshKhlpphhhpp+lp+sss.lhphtlhNsshKhDshs.lphsKcts...stKIDsssAlI.AhhpAhhst. ...................................................................................................................................................................................................................ap..h.hhst...la.G...h...................t.p.t.....s..t.h....+h.a..p..c..shl.l.sR+sGKo.t.lh.u.sl.h.h.h.hh..h.hp..................sp.....s.t...c...l...h...h.s...AsspcQAphs.F....................p.s....pp..h.l.pt.................s...h.p.........h................p....h......................t.....p..p...l.........h.........h........t...s....t..u.....h..hp..h.....h..s....s....s.s.p.s.h.cGh.p.s.t..h...slhDE......h....+....t..h......t.........s.........p..........p.......hh...p....s....h....ps....Ghut...p...ps......h..hhh.I...oT..u..G.....s.....h.p..u....h.h.p...h.h.p.h.s.t.......p.......l...l......p..........t...................................p..........h..............ps..............s.....p.ha.s...h.l......h.....p..........h......-...........c.................t..........c................-.............h..p.......c.......p.s......W....h.....+.AN.....P....t.l.....s......h...s...h....t........h......c.t.l...h....pp.hpp...s.p...p...s.st................th.tt...a...hsKphN..h..h..h......t..t..p....t....s.ah.sh.....pph...pt................h....h...s..............p......h.......p....s.....p............s..h....h.........G.l.Dh....up.hpDhs..u..h..s..h...h..h...........p.s...................ph..h.h..h.s.h.....a.h.s..pt...h.p.................pp.p.............apt...a....c.......t.G.......h...lph.s.t...s...s...h...l.c......ht...lh.p......hhh.....p...hhpp.........................h....l..p..t..l.shD....h.ts.t.t.h.h.pt..l.p..p...s.h.............................hh.th.Qs...........h........p...................h......sss........h...c...p...hcphh...............h.............pt..p..........l..ha.s..s...........sPlhpWhhs..Ns..hh.c......c......s.s.s......h...h...sK...p.p.p....p.pK.IDshhAhl.Ah........th....................................................................................................... 2 82 188 254 +707 PF00628 PHD PHD-finger Pascual J, Bateman A anon Prosite Domain PHD folds into an interleaved type of Zn-finger chelating 2 Zn ions in a similar manner to that of the RING and FYVE domains [2]. Several PHD fingers have been identified as binding modules of methylated histone H3 [3]. 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.96 0.72 -4.27 92 10006 2012-10-03 17:27:21 2003-04-07 12:59:11 24 581 416 151 6059 10034 80 49.80 31 6.20 CHANGED hC.lCpp.............ssphltC..-..s.Cs.psaHhtCls.shp..ph.pt.........tWhCspCpsp ...........................hC.lCpp................tt....ssp..hltC....D............p..C...s...phaHh..tC...ls..s...hp.........p..hsps...................pW.hC.s.p.Cp..t.............. 0 1693 2722 4372 +708 PF01384 PHO4 Phosphate transporter family Bateman A anon Pfam-B_923 (release 3.0) Family This family includes PHO-4 from Neurospora crassa which is a is a Na(+)-phosphate symporter [1]. This family also contains the leukaemia virus receptor Swiss:Q08344. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.01 0.70 -5.78 174 5075 2009-01-15 18:05:59 2003-04-07 12:59:11 15 11 3412 0 1413 3524 1377 368.90 29 86.21 CHANGED hGuNDsANuhuTuVuo+slohpp.AlllAulhphlG....Alhh.....GspVspTIsp.sllssshh....................................................shhuuLluuslWhhlushhGlPlSooHullGullGsulsss.....G...h........................ss...lpWs.......................................t.lhp...IlhuWllSPlluh.hluhllh...hhlphhhh..........t.h......................................hhh.........................................................................................................................................................................................................................................................................................................................................................hpph.......aphhtlhoushhuFuHGuNDsupulGslsshhh............ts.h...................................................................................................................................................................P.................h..Wl......llhuuhululGhhhhGh+llcTlGpcloc..lssspGassphuuAhslhhAoh.........hGl......PVSoT+slsGulhGlGhsc..............................................................................................t.hp..slph...phltpIlhu..............................................WllTlPh.....uullu .............................................................................................................................................................................................................................sGhpDsANuhATslu...............o+ulp.s.ph.Al.lhAulhshhG...........shlh..............G..s...p.VutT.Itp.sl...s.s.s...hhs.................................................................h.hhhh.usLlu..AhlW..lhshhhGlPsSoo..Hu.....lIGullGsulssu.............u....h..............................................su.lshs..............................................................t.lhp.........I.hh.........uhllSPll.uh.l...hu.hll.h....hllcthh.h.................th..............p.h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pspph.........ap.hh..l.hoA.shh...uFuHG...uNDsppuhGhlh.hllh...............ht......ssh..........t..................................................................................................................................................................t..sP............h..Wl..h..hh.sululu.lGshhh..G..h+lhpTl.GpcIsc..ls.s...p..G.huAphsuAhs.l.h.h.Ao.h...............h.G....h...............PVSTTHsls.uulhGsGhsc..............................................................................................t....t....ulph.....tssppllhuWllTlPsuullu........................................................................................................................................................................................................................................... 0 437 853 1191 +709 PF01663 Phosphodiest Type I phosphodiesterase / nucleotide pyrophosphatase Bashton M, Bateman A anon Pfam-B_994 (release 4.1) & Pfam-B_6150 (Release 8.0) Family This family consists of phosphodiesterases, including human plasma-cell membrane glycoprotein PC-1 / alkaline phosphodiesterase i / nucleotide pyrophosphatase (nppase). These enzymes catalyse the cleavage of phosphodiester and phosphosulfate bonds in NAD, deoxynucleotides and nucleotide sugars [1]. Also in this family is ATX an autotaxin, tumour cell motility-stimulating protein which exhibits type I phosphodiesterases activity [4]. The alignment encompasses the active site [3,4]. Also present with in this family is 60-kDa Ca2+-ATPase form F. odoratum [2]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.38 0.70 -5.13 70 4764 2012-10-03 20:55:17 2003-04-07 12:59:11 17 83 2021 34 2144 12839 5667 283.30 14 54.15 CHANGED llllulDGhpschlpc.........tthPslppL.........hppGspst..hpssaPohThPsahollTGhhPspHGIluNphhD...........................................sppptp..athp.....shpsh............PlWhssp.cpshp..........sushhaPuspsshss......................Pphh..phhstp...hsh......................................................ptchsphhp.hhch..pscc........sslhhhah....ppsDpsG.H......................paGs..s..usc........hpp......slpclDphlupLhpsLccpth.tpssl.lllS.........DHGhsss....................................p+hlh...........lsph...........hs.sth...............hspsslstlhsp.............................ttthsclhptLpsh...........tthpshh...cp...phs.tchpht..........................spRhsslhlhscsGhhhhtpt................ttph..thtGsHGac .............................................................................................................................................................................................................................................................................lhh.hD..u....h..t......t....h.....................h.s.h..t.t..h...............................h.p.p..u.h....h...............h..............s...............h..........P..........o.h..o...h..s.s...h..s.lh....T..G......h..h...........s...t.........p....G......l...h..t..........h.h.......................................................................................................................................h...............................................................................shh....t...t...................................ss........h..............................h.......t....................................................................................................................................................................s........h........................pt............h...................................................................................................................................................................................t.h..s...p....h..h............h.h.t...t.......h.....t...p.pp...................................s..h.h..h...h........a.h...........................t.h.......D....p....h.G...H.......................................................................................................................th.Gs.......p..........u.p....................................................htp.............................tlp.p.h...D.p...h...l...t....p.l....h......p........t.l.....p........p...t.............................t......h.......l.ll.su................D.....H...G..hssh...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 703 1239 1744 +710 PF02811 PHP PHP_C; PHP domain L Aravind, Bateman A anon L Aravind Family The PHP (Polymerase and Histidinol Phosphatase) domain is a putative phosphoesterase domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null --hand HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.25 0.71 -4.48 69 14520 2012-10-03 00:45:34 2003-04-07 12:59:11 14 119 4828 48 3478 11535 3589 213.50 16 29.40 CHANGED h-LHsHopaS.......DGts.slc-hl...cpAtchGhp....slulTDH..sshhs...shphhpttpt..............................hs....lp....hlh..GhEhpls.t.....................................................................................................................................................................................................tpt....htt.hchhlhuh.pt................................plsphtshthh.................................phlpph......ppshlslhuahpsththh...............hppscphlpth...................................ht.pshhhElpspth.t.......................phpp ................................................................................................................................cLHsH.o.....s.h.S....................DG....h.......h....s...h.......p.....c.....hl..............pp.At.p.h..Gh.p....slAlTDH.................ssh...h.u.................h.s..c.h...h..pt.spt........................................................................................ts......lc........hlh....GhEhthh.................................................................................................................................................................................................................................................................................................................................................................................................................p...t.tt...h...c..h.h.l....h.u..h...p...p......s.ht..................................................................................slhp.hhs.t.t.hh.........................................................................h............phlt.th..................ppG.h.l.h.hh...u...p........t...t.......h...h...h.............................................................h...t.hht.h..............................................hhpl..................tthtth............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 1312 2409 3037 +712 PF02972 Phycoerythr_ab phycoerythr_ab; Phycoerythrin, alpha/beta chain Griffiths-Jones SR anon Structural domain Family This family represents the non-globular alpha and beta chain components of phycoerythrin. The structure is a long beta-hairpin and a single alpha-helix. 22.50 22.50 23.60 53.00 20.80 17.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.77 0.72 -4.37 5 25 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 4 6 0 31 0 56.60 54 46.87 CHANGED +APVITIFDHRGC.SRAPKEYTGsKAGGpDDEMCVKVuMsKlpV...SEusAstlLpEhLu .+APlITIFDHRGC..uRAsKEYTGuKAGspDDEMhVKVtusKlpV...SpusAsthLpphlu............... 0 0 0 0 +713 PF05023 Phytochelatin Phytochelatin synthase Wood V, Rawlings ND anon Pfam-B_9299 (release 7.6) Domain Phytochelatin synthase is the enzyme responsible for the synthesis of heavy-metal-binding peptides (phytochelatins) from glutathione and related thiols [2]. The crystal structure of a member of this family shows it to possess a papain fold [3]. The enzyme catalyses the deglycination of a GSH donor molecule [3]. The enzyme contains a catalytic triad of cysteine, histidine and aspartate residues. 23.80 23.80 23.80 24.00 23.40 23.70 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.33 0.70 -5.37 20 280 2012-10-10 12:56:15 2003-04-07 12:59:11 9 7 183 4 123 295 16 197.80 39 55.01 CHANGED shsuhhppsLPs.shlsFuSsEGccLhtcu...ushcsaasLtspF.TQspsAaCGlASLsMVLNALu...............lD....PscthcG.PaRaFopssl.......pspshtc........lpcpGlTLsclusLu+sp..usslcsa...........................+us.chSlspFRpplhpshsssspalIlsYpRpslsQTGsGHFSPlGGYcttpDhsLILDVARaKYPPaWVshphLapAMpolDs.soucsRGahLIs ..................................................................................................................h......h.lst..hlthsS.tGpphh.pu.......sthpsaapLhs.tFpTQspsuaCGlAoL..........shVLNALt................................l-.....Ps..cta.c..u...PW..RaFspshLt..............sspshpt............lcp.pGlohsc....lssLApss........Gspspsh...................................+ss..ps.o...l.-pFRphlhp......s..s................s.......s..s.........s..........p.alllsYpR.ps.....l.t.......Qs...G..............s....G...H....F............SPlGuYcttp.D............hsLILDVARaK.YPPaW.Vsh.phLacAMso.hD.t..so.s.ptRGahll......................................................... 0 64 91 107 +714 PF02567 PhzC-PhzF Phenazine biosynthesis-like protein Mian N, Bateman A anon COGs Family PhzC/PhzF is involved in dimerisation of two 2,3-dihydro-3-oxo-anthranilic acid molecules to create PCA by P. fluorescens [1]. This family also contains Swiss:P28176, though there is no significant sequence similarity to Pfam:PF00303 members. This family appears to be distantly related to Pfam:PF01678, including containing a weak internal duplication. However members of this family do not contain the conserved cysteines that are hypothesised to be active site residues (Bateman A pers obs). 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.87 0.70 -5.19 14 3486 2012-10-03 03:02:41 2003-04-07 12:59:11 11 19 2256 24 972 3693 843 247.90 26 93.31 CHANGED lDAFTspPhp.GNPsuVsh.s-.....pls-stMQplAsEhshSETsFlhhss........pssch+lRhFTPssElshsGHuTluouhsLhppsh......ssspl.hpThuGhlss.............tppssstphhlphphPhhshhshsptt.s.h.hhhths................sh.hthtssGhtplhlsLpShculsslpPch........p.htchsstuhhshsstsssustcapuRhFu.PthGlsEDPsTGSAsssLutYLspchph.......phplhQGpu.uRsGplphph.....c.t...p+VplsGpAVslhcG ......................................................................................................................................................lDsF..o..s.p..s...ht..GNs.su.Vlh...sp...................L..s...c.t.p.MQtI...A.....p.-...h.....s....h......S........ET......s......F..lh..t.s..................................t.ss....sh......c.....l..Rh..F.T.P.p.t.E.lsh.sGH.sTlusu.h.s.lh..phht......................tssp.l...hh.ps.t.u.G..h..ltl.....................................................t....t..t.th..h..h.p......s....h....P...t...h.......t...s................h.......s.....t....t....t.....h....t...h....h.......t..h.h.h..p...................................h..s.t.h.h.s..s.G.......t...t..l..h..l.......l.p.....s....t...t....s...l..t......s..l.....p....P.sh..................................th.h..t...p...h.....s....t....t......s.....h.....h...s........h......h..........h........t.......s......s........t.......s.......t....t....c...........h..hu..Rh.F..s...P..t..h.G..l......s....EDPsTG.oAsus..l..u.s.a..hh.p...p..h.hh...................................h.p.h..pQ.G.p..t.h.s..Rsu..hlt.splt.............................tp........l...h...lu.....Gpuhhh....................................................................................................................................................................... 0 306 597 824 +715 PF00454 PI3_PI4_kinase Phosphatidylinositol 3- and 4-kinase Sonnhammer ELL, Finn RD anon Prosite & Pfam-B_6771 (Rlease 7.6) Family Some members of this family probably do not have lipid kinase activity and are protein kinases, e.g. Swiss:P42345 [1]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.34 0.70 -4.90 43 4451 2009-09-16 22:35:53 2003-04-07 12:59:11 22 163 790 95 2750 4248 185 225.50 21 15.05 CHANGED th..hhhKsG-DLR..QDthhhQhl...........................plhsplhpp-shshc.......lpsYtllshusptGhlp..................hlsss.ohtpl..tpthhptth................................................................sslhphFhcpssss.......tpahp................................................................stppaspShAshsllsYlLGluDRHssNlhl......................................ptsGclhHID.................FGhhhs.....t..p.-psPFcLop........shspsh.t.....ssstphthF+phshpshphlRcstshlhsll.........phhhpsslhphpp .....................................................................................................................................................................................t...hhhK..s..t.-..DhR.....pD.hhhQhh.........................................................phhp.pl..h....t.....p.p..s.h.s.hp............................htsY..t...llsh...us...t............G.......hlp........................................hls.ss.....sl..t........t....l............p..p..h.....h............................................................................................................................................................ssh.hph..h...h..p...p.....s..ss.............pt..a..h..p..............................................................................................................................................................t.pp..p..a...spS.........h....Ashsl.............lsYl.....Ls....ls...D.....R....H.ss..Nlhl.......................................................................................................ptsGc....l....hH....ID.................................................FGh.h..hs..............t...........t.....-ps.PF+LTp........................phhpsh.......................t.st......th..a..........p............p.......h..........s................p..........sh.......h...hR..p...p...t..p..h..lhsl.h.................phhh.ssh.t...t......................................................................... 0 1017 1545 2244 +716 PF00792 PI3K_C2 Phosphoinositide 3-kinase C2 SMART, Griffiths-Jones SR anon Alignment kindly provided by SMART Domain Phosphoinositide 3-kinase region postulated to contain a C2 domain. Outlier of Pfam:PF00168 family. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.78 0.71 -4.41 24 929 2012-10-10 12:23:49 2003-04-07 12:59:11 19 41 276 79 559 820 7 139.70 21 12.87 CHANGED sp.plhlpssla...tssc.Ls.....sl.poshsshs..........phsWs.chlsas.lphpsLPtsAhLslsla.....shptsppt....................lGhssl.LFsp.+shL+p.G.ppLpl.Wsstpsst.h.h.......................thppGphpp...sphlDthsh ..............................................................s..clhVpstla..pss.c..Ls........sl...pTphhshs.......................tphtWs..c.....h..lpa.s..lplscLP+pupLslsl.a...........s.l..stspst.......................................................hsluh......sslsLFs........h...cshL+p.G.phtLpl.Wssstsptt..........................................................................t.................................................................................................... 0 172 248 399 +717 PF00640 PID Phosphotyrosine interaction domain (PTB/PID) Bateman A anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.75 0.71 -4.20 25 2909 2012-10-04 00:02:25 2003-04-07 12:59:11 18 102 112 50 1457 2821 2 128.10 21 22.94 CHANGED Fts+YLGsl-Vsps......t..h..sp-Alpplptt.................ht....h..puptpph.pplhLplSscslplhssco.........ppllhsaslcpISahus..Dss.p.ctFua...............htctsspphtCHlFhspc....ApslupulGpAFpltapphLct ......................................................................................a.spaLG.hpl.p...............s.ppulpplptt.............................................st.t.p...ph...hpl.....h...L...p...l...o...h.p...s...lp...l..l...s..sps.......................pp..l.l...h..p..p..sl.p.p..I.Sa.h.ut........D...............p..p...............chFua..............................................................hs.p...c...t....s...s....p...p....hhCH..lFp..spp.............Aptl..s.s.ltpAF..p.hthp.h...t............................................. 0 345 463 898 +718 PF04987 PigN Phosphatidylinositolglycan class N (PIG-N) Moxon SJ anon Pfam-B_5307 (release 7.6) Family Phosphatidylinositolglycan class N (PIG-N) is a mammalian homologue of the yeast protein MCD4P and is expressed in the endoplasmic reticulum [1]. PIG-N is essential for glycosylphosphatidylinositol anchor synthesis. Glycosylphosphatidylinositol (GPI)-anchored proteins are cell surface-localised proteins that serve many important cellular functions [2]. 20.80 20.80 20.90 20.90 20.20 20.70 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.54 0.70 -5.83 39 370 2009-01-15 18:05:59 2003-04-07 12:59:11 9 19 235 0 259 348 1 374.50 26 46.08 CHANGED cGLcYLQTYsWhaLpsllThGalGWIsashstllchash.......ptp.......p.....................pohhshhhFss.lhlsLhhlhhhQ+SPhpYYhYhhFPlhFWpp............lhscppsltsuhp.hhtt.p......hhphlhthlhh.lulhEsl..VhuaFcRplholhahhhuh.as..hhhshthht..ppthhshhWhlsslhhosFoLL....sss+hcshphI.......hhuuhLhhlsuhhhlh......................tsphoppl......hshQlhhlllshhsTptoshSLps..+pGLPhhsQlsuWhhLlsSll.lPhh....ss.schhtRLhlIFLshuPsFllLoISYEuLFYhsFshpLhtWlplEptlhh.stt.t.................................................h+slslsDhRlulhahhhlpsAFFuTGNlASlS............SFsL-SVhRllPlFsPFs.GALLl.lKLlIPahllossLGllscpLplsspslFhlllslSDlMoLpFFalV+scGS ..................................................................pGLpYhpsYshhhLhshlshualGWhhhhhhhllc.hs...............p..........p.....................................tthhhhhhhhs.hhhhlhhhhhhppsPhtaahYhhhPlh.....h.W.t.........................lhtp.ht....hh.tt....h..h......h.t...............................h.phhh.hhhh.hhh..hphl.....lhuaa.R..hhohhhhhhuh.hs...hhsh..h......tthh.............hhWhh.shhhusFsh....h....ss.h..sh.....h.l.............hhuuhlhhh.hshhhh.................................................h.thphhl.............hhhphhhlhh..shh...........sh.thpt...p.tslPhh.plhuW..hhlh.uhh...hPhh.............................s.pthhtRLh..lhhshhssallLohuYEu..LFhhhhshhhhhWlp.........lEpthh..pt.................................................................pthhhpshRhuhhhh......hhh.suFFuTGNlASls............SFshsolhphlslF.sPFhhuuLlh.hKlhlPahll....sshthl...........shh.hphs...............tsl..................a..........hhlh.......hhoDhhsLpFFahV+spGS.................................................. 1 74 129 213 +719 PF01850 PIN PIN domain Bateman A anon [1] Domain \N 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.43 0.71 -3.78 170 8807 2012-10-03 20:43:45 2003-04-07 12:59:11 16 13 2111 52 2682 7029 1235 121.10 16 80.11 CHANGED hhlDTslllthh............pscsttthhtthhpt........tplhhsslsh..hElhhshpp..............................tttphhphhhh.htthplls...hstphhtpstclhtpht...........lshhDshhhAtA.......httsht.....ll.T.tDpc.......htchsth ..........................................................................llDTsll.lthh...................psp...s.t.t.t.t...h....tt.h.lpt..................tplhlssh..sh...hE.lh.hshpp.............................................t.ttp.h.hthhhh.....h....t..t....h..p....l.ls..........h.s.t.p....h.h....t..t....s.t.p...l...h.tpht...............................hshhDshlh.AhA.........ht...tshs...........ll.T.tD.p.c....htt....h........................................................................................................................... 0 858 1797 2312 +720 PF04696 Pinin_SDK_memA pinin_SDK_memA; pinin/SDK/memA/ protein conserved region Waterfield DI, Finn RD anon Pfam-B_4141 (release 7.5) Family Members of this family have very varied localisations within the eukaryotic cell. pinin is known to localise at the desmosomes and is implicated in anchoring intermediate filaments to the desmosomal plaque [1]. SDK2/3 is a dynamically localised nuclear protein thought to be involved in modulation of alternative pre-mRNA splicing [2]. memA is a tumour marker preferentially expressed in human melanoma cell lines. A common feature of the members of this family is that they may all participate in regulating protein-protein interactions [3]. 25.00 25.00 25.70 25.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.86 0.71 -4.55 11 266 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 204 0 185 268 0 123.00 32 28.27 CHANGED pDpcp+.tRNRRhFGsLLGTLQKFpQEpshh..spppc+Rs-IEpKl......EcQtctERcplccc+cpLap-R+ccQhElRtLEpKhtchc..cpWppp.thltpa.I+T.KTcP+laahPtphsspppKhLcEs.pc-sp ...........spct+.pRN+R.....hFG.t.L.h.G.TLp+Fpp..E..p..p.h.............op....ppc..RR....pEIEp+h......-t...ptcp.E....c.cp.l...c....p....c++....p..ht....-.+R....t.c....ph.plR..th.-p..ch...t.....t..p....h.h...c....php.pp.tthtpa.l........+.....T..+scP.l.........aYhPhchs.tspphlppp..pp.................................. 0 68 102 150 +721 PF01504 PIP5K Phosphatidylinositol-4-phosphate 5-Kinase Bashton M, Bateman A anon Pfam-B_571 (release 4.0) Family This family contains a region from the common kinase core found in the type I phosphatidylinositol-4-phosphate 5-kinase (PIP5K) family as described in [1]. The family consists of various type I, II and III PIP5K enzymes. PIP5K catalyses the formation of phosphoinositol-4,5-bisphosphate via the phosphorylation of phosphatidylinositol-4-phosphate a precursor in the phosphinositide signaling pathway. 20.20 20.20 20.40 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.86 0.70 -5.27 117 1752 2009-01-15 18:05:59 2003-04-07 12:59:11 13 60 323 8 1132 1665 41 260.10 28 29.93 CHANGED hSL......ssp.....h.phsosGK..........SGS.hFahopDp+FlIKTl.p+...pEhc......................hhhc..hLspYap.alppssp.........................TLLs+h....aGlapl.........................p.....ss...pcht...........................hlVMpNlF....st..hplc.......cpaD........LKG.Sphs...........................Rpsp.............................t....p........tt.............sLK......Dhshhp.......phlhlstpp+phlhcplppDspFLpphplMDYSLLlGl....................+........................................................................................................................................thh...t..........................ptlhalGIIDhLppYs..hpK+lEphhKsh.hhs.t.....pslSslsPppYucRFh.cFlpphh .......................................................................................................................h...sssG+SGu.hFaho.t....Dc+alIKol....p+.....pEhp..............................hhhc..hLs.tYap...a....l.pp.s..p.......................................TLLs+hhGlapl.............................t.h..su...pphp..................................................................h...l.VMpNlF............s........hph.p............................ppaD........L..K.G.Sphs...................................................Rpsp..............p.....pt.ptpt................................................tsshK..D.shhp...................phlhls.t.p.tp.phlh.cplpp.Dsp.....aLp........ph.plMDYSLLlGl...........c..................................................................................................................................................................................................................................................................................................................................................................................s......................pc.hha.hG.II..D..hLpp.Ys............h.pK+lEphh.K...sh.hhs..s........pphS.s.lsPp.YtcRFh.c.hhpph....................................................................................................................................... 2 374 629 900 +722 PF02171 Piwi Piwi domain Bateman A, Hammonds G anon Bateman A Family This domain is found in the protein Piwi and its relatives. The function of this domain is the dsRNA guided hydrolysis of ssRNA. Determination of the crystal structure of Argonaute reveals that PIWI is an RNase H domain, and identifies Argonaute as Slicer, the enzyme that cleaves mRNA in the RNAi RISC complex [2]. In addition, Mg+2 dependence and production of 3'-OH and 5' phosphate products are shared characteristics of RNaseH and RISC. The PIWI domain core has a tertiary structure belonging to the RNase H family of enzymes. RNase H fold proteins all have a five-stranded mixed beta-sheet surrounded by helices. By analogy to RNase H enzymes which cleave single-stranded RNA guided by the DNA strand in an RNA/DNA hybrid, the PIWI domain can be inferred to cleave single-stranded RNA, for example mRNA, guided by double stranded siRNA. 19.90 19.90 19.90 20.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.82 0.70 -5.45 18 2067 2012-10-03 01:22:09 2003-04-07 12:59:11 12 30 421 62 1282 2033 12 263.30 32 35.31 CHANGED hllsllsc.pspsh.YtslKKhhps-hsl.oQslphcshhph..........tphhsNlhlchNsKlGGhN..hhlsshsh.......pshh.........llGhDlsHsssup..........tssoluuhluohs.ppsspahussphpppsp-hlss.lpp............hltchLpsapcsspp.pPp+IllYRDGsuEGph.plhp.Elspl+cshc.......plspsap..PplshIllpKpppsRhFspstsp........................tt.Ns.PGTlV...DstlspPpth-FaLsupsshpGTs+Ps+YsVlhD-hthss-cl.QpL......oYpLsahahpsh....pslulPsPlaYAcplAttstpphc ......................................................................................................................................................h.hhhll..st...pt....h..Yst.lK+h..s.p...h....h..sh..........oQsl...p..p.h.p....................ph...hss.l.sh.......plNsK..........l..........GGh..........hhh..p.........................tssh..................................hl...GhDVsHssssp................................................ts..Slsu.......h...Vu.Shs.......p....s.....c....a......h.....u...p.sp...h.........Q...........p......t......p.....p.......E.........h.l......ps...ltt...................................................h.hpph.L.h...ta........h............p........t...........s....t........t..................h........Pp...+I...lhY....R...............DG.Vo-GQh.t.pl.h..p..El....t.lc...pu.ht........................p.h...t...........s..hp............Ptlohl.lVp.K..R..H..poR.......h.......F....s...t..stpp.........................................t.t.Nh..s......GT.l..V.......................Dp.t..l..s......p..P..p.......t......a....D..F.a.L.s......S..+..t..........u..h................p......G..T...u.+.Ps+Y..pV..l...h...D....-..........s.............p..........h..s...........s...D.....pl....Qp....L..................Tap.L.Caha.h.pss.................p.slsls..sPshYAchhuhhst...................................................................................................... 0 440 645 1059 +723 PF00801 PKD PKD domain Bateman A anon [1] Domain This domain was first identified in the Polycystic kidney disease protein PKD1. This domain has been predicted to contain an Ig-like fold [1]. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.26 0.72 -4.23 93 5437 2012-10-03 16:25:20 2003-04-07 12:59:11 15 708 991 15 2785 5152 1955 70.40 22 13.50 CHANGED hsss.........shhstslpFssps....s................Gssssap....WsFGD.......................stsssptsssHsYsp......................sGsYsVsLssssssussss .........................................................s...........h..s.slpFssps.......s..........................us.s..s.s....ap.............WsFGD........................sssu.s.t..t...s...s...s..H.s...Y.sp.....................................................................sG...s.Ys.VsLol.ssshut...t......................... 0 1060 1676 2066 +724 PF01477 PLAT PLAT/LH2 domain Bateman A anon Bateman A Domain This domain is found in a variety of membrane or lipid associated proteins. It is called the PLAT (Polycystin-1, Lipoxygenase, Alpha-Toxin) domain or LH2 (Lipoxygenase homology) domain. The known structure of pancreatic lipase shows this domain binds to procolipase Pfam:PF01114, which mediates membrane association. So it appears possible that this domain mediates membrane attachment via other protein binding partners.\ The structure of this domain is known for many members of the family and is composed of a beta sandwich. 20.80 20.80 20.80 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.52 0.71 -3.70 84 3359 2012-10-02 11:40:54 2003-04-07 12:59:11 18 189 280 91 1790 3171 14 109.70 19 17.37 CHANGED hplplh....utphtussst...h.h.lhs.spGcssphthhps.h............ssshphphch..t....slG.lttltlppt......................hps.paalppl.hltt.....hsst.sph.pFss......pp.Wltss.........phhhh .........................................Yplhlh.T...ut...t.u.u.Tssp.......lhltlhG...pp..G...c...o..t...p.h.....l.h.psttt...................................tsstppF.p...l..ps...t........slG..p.l.t.plp.lt+c.......................st.shps..sW........alc..pl..hlps...................s.t....p.p..h...hFss.......pp.Wlstpp............................................................................ 0 578 838 1202 +725 PF00614 PLDc Phospholipase D Active site motif Ponting C, Schultz J, Bork P anon Alignment kindly provided by SMART Family Phosphatidylcholine-hydrolysing phospholipase D (PLD) isoforms are activated by ADP-ribosylation factors (ARFs). PLD produces phosphatidic acid from phosphatidylcholine, which may be essential for the formation of certain types of transport vesicles or may be constitutive vesicular transport to signal transduction pathways. PC-hydrolysing PLD is a homologue of cardiolipin synthase, phosphatidylserine synthase, bacterial PLDs, and viral proteins. Each of these appears to possess a domain duplication which is apparent by the presence of two motifs containing well-conserved histidine, lysine, and/or asparagine residues which may contribute to the active site. aspartic acid. An E. coli endonuclease (nuc) and similar proteins appear to be PLD homologues but possess only one of these motifs. The profile contained here represents only the putative active site regions, since an accurate multiple alignment of the repeat units has not been achieved. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.70 0.72 -6.96 0.72 -4.02 48 1567 2012-10-02 13:01:53 2003-04-07 12:59:11 17 33 652 3 833 6708 176 29.50 40 4.30 CHANGED hshhhHpKhlllDcc............hsalGuushsssp ........hhsHHpKhlllDsp.................luFlGGlDLs.tp...... 0 219 475 677 +726 PF01690 PLRV_ORF5 Potato leaf roll virus readthrough protein Bashton M, Bateman A anon Pfam-B_1335 (release 4.1) Family This family consists mainly of the potato leaf roll virus readthrough protein. This is generated via a readthrough of open reading frame 3 a coat protein allowing transcription of open reading frame 5 to give an extended coat protein with a large c-terminal addition or read through domain [1]. The readthrough protein is thought to play a role in the circulative aphid transmission of potato leaf roll virus [1]. Also in the family is open reading frame 6 from beet western yellows virus and potato leaf roll virus both luteovirus and an unknown protein from cucurbit aphid-borne yellows virus a closterovirus. 27.40 27.40 27.70 27.70 27.30 27.30 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.73 0.70 -5.34 14 317 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 35 0 0 332 1 377.50 41 71.26 CHANGED VDup.PsPsPuPsPtP...PsPoPEPsPs.p+cRFhs.YsGsPpshIpsRpsoDuIsltslssQphpYIEDEshshpslsutWhosNslpA.PhFlaPVPcGpaSVpISCEGaQussshusspcGphpGhIAYsssss.csWNVGshssssITNh+usNoa+.GHPDLclNuC+FspsQlVE+DuhlSFHlpsssp-usFaLsAPslpKouKYNYsVSYGsaT-+cMEFGhISVslDEp..-utpsupcs+pshRsGHh.hhspshc...sh.P.ssp............tpps.pTPssshspst..........................p....s.pcslspsp..ss.s.............pp.shth.thph.sstlsssspphh.sptt..................................ssc.lcs.sss.hs.shspssphhtsh.t...h..s.s.h.......ts.sPs.................slhsspp.pu............ShtuppLpGssh+tpst..pspshp.phosppptpYpRl+pohGhTsAcpahppht .......................st.s.sPpPsPsPp....PtPsPpPsP....pRFht.Y.GsPpshIpsRpNo-tIsltplts.sMhhhEDEshshpplsutahsNNphpA.shhlaPVscGpa.SV.IpCEGaQuspphusspcGph.GhIAYsssp..csWslGsYsGssIoNhhsssoa+.GH.D...hclNuspFsptQlVERDhshSFHlcss.ppupFhLhAPslpKos+YNasVSYGsaT-+hMEaG.ISVslDEp..tu.p.....sphscps.h+ss+h..hsps.p.......s...p...............................................t.ts.ppP.t.........................................................................ph..............................p.s.h............tst.....l......ts....h.tt........................................ssp.hc....t.hs...s.p...hss.............s...h..........s.ssp...........................ph..ttt....t.........................................................s..st.lpusp..tt....p.phh..phoppphtpYpplpps.s.ssAptah.t..t....................................... 0 0 0 0 +727 PF03126 Plus-3 Plus-3 domain Bateman A anon Bateman A Domain This domain is about 90 residues in length and is often found associated with the Pfam:PF02213 domain. The function of this domain is uncertain. It is possible that this domain is involved in DNA binding as it has three conserved positively charged residues, hence this domain has been named the plus-3 domain. It is found in yeast Rtf1 which may be a transcription elongation factor [1]. 21.20 21.20 21.20 21.60 21.10 20.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.23 0.72 -4.00 39 464 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 274 4 319 438 6 106.20 28 16.55 CHANGED c-lpplplsRohltchhhpP.tF-pslhGCFlRlslGsscpptt.....YRlspIpulppst...sYph.......tphtTshhlplppup..pp+.aphshlSspshscpEapcah.pphtps .....................plpclpLsRspltchhthP..hFc.csltGCFVR.lslGs..pppp............YRlspItslscss...sYpl...................sshpTsphLpl......ppGp......sp+.aphphlSs.ptFo-sEapcahpsh...h................................... 0 102 178 263 +728 PF04043 PMEI Plant invertase/pectin methylesterase inhibitor Yeats C anon Yeats C, McQueen A Domain This domain inhibits pectin methylesterases (PMEs) and invertases through formation of a non-covalent 1:1 complex [1]. It has been implicated in the regulation of fruit development, carbohydrate metabolism and cell wall extension (see [2]). It may also be involved in inhibiting microbial pathogen PMEs. It has been observed that it is often expressed as a large inactive preprotein [1]. It is also found at the N-termini of PMEs predicted from DNA sequences (personal obs:C Yeats), suggesting that both PMEs and their inhibitor are expressed as a single polyprotein and subsequently processed. It has two disulphide bridges and is mainly alpha-helical [2]. 26.70 26.70 26.70 26.70 26.60 26.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.24 0.71 -4.31 142 1441 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 79 25 905 1470 0 147.00 18 43.47 CHANGED sss....tp.hlp...shCpp..Ts.......st.hChpsLss.......tttsss.p...s....lhphsl.phshspsppshshl.p....plhtp....................spstpt..ps......slp....sChchhssul.c.pLp.puhpsl.............................pt...sc..........spsh.lSuAh....sstpoCt..-uh.................pts..h.......p.h...pt......pslpcl.......sssuLul ..............................................................s....t.lpthCpt.....o...............st.hChpsLts............tstsss.p....p............lhphul..phshs..psppshshh.p......plhpt....................sts.pt....ps....ulp.......sC..h.c.h.hst...ul..s.pLp...puhp.sl.................................pt...ss..........spsh.lS.u..A.l....sstpTCh.....-ua...................tpsthp.....s.lt.....tt......pphtphsssuLuh.................................................................... 0 137 531 706 +729 PF04721 DUF750 PNGase; Domain of unknown function (DUF750) Waterfield DI, Finn RD, Pollington J anon Pfam-B_4045 (release 7.5) Family This family of proteins with unknown function shows similarity to PNG-1, a enzyme responsible for de-N-glycosylation of misfolded glycoproteins in the cytosol [1]. However, unlike PNG-1, this protein does not contain a catalytic triad in its transglutaminase domain [1]. 21.30 21.30 21.30 22.10 21.00 21.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.14 0.72 -3.98 23 98 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 42 4 71 108 0 62.70 37 13.79 CHANGED pNYlKFTYDllssoYS+sscDGSslp.PahlpNlcRl.-........pp.stsYlcp.pstc.u.........................ItWpFs ......h.......YsllcDpY.....s+sss.....s.....spslp.....sac.....hcsIhRKVE........pDWphVYLsR.cEuss.uh........................ISWKF-...................... 0 42 46 55 +730 PF03726 PNPase Polyribonucleotide nucleotidyltransferase, RNA binding domain Griffiths-Jones SR anon Bateman A Domain This family contains the RNA binding domain of Polyribonucleotide nucleotidyltransferase (PNPase) PNPase is involved in mRNA degradation in a 3'-5' direction. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.72 0.72 -3.71 21 4259 2009-01-15 18:05:59 2003-04-07 12:59:11 9 35 4196 34 982 3077 2312 81.70 30 11.36 CHANGED sLhpcltslApsclupAapI..ssKp-RhsplDpIKspVhtthh...tppt......phsppclpslapsLcpclVRppIlsspsRIDGRc ............................................L.splpshs.p.s.c.l..pc.Ah..pl.......p-KppRpstlc.tlK....pclhs....phh.............t...p..p..p..................................................ph..s..t..p.-...ls...p...h.hc...plc+clVRptIlssch......RlDGRc..... 0 334 637 830 +731 PF01357 Pollen_allerg_1 Pollen_allergen; Pollen allergen Bateman A anon SCOP Domain This family contains allergens lol PI, PII and PIII from Lolium perenne. 20.70 20.70 20.90 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.82 0.72 -4.18 136 1558 2009-01-15 18:05:59 2003-04-07 12:59:11 16 11 171 13 632 1490 0 77.50 40 32.72 CHANGED lpFplsut...saa.hVLlpsluGsG-lssVplK.......................G............o....s....WhsMs+sWGssWp..hss...th.hst.sLSFRl..Tot-..Gpslsu.psVlP ..................l+FslsGt....s.aa.hVLlpNVuG.sGDltuVslKt..............................................st............o......s.Wh.s.Ms.R..N...........W...G.....t..s..........Wpssu...hh...st.....sLSF+l..Ts.s-...G..+..slsu...sVhP.............................. 0 78 341 494 +732 PF01522 Polysacc_deac_1 Polysac_deacet;Polysacc_deacet; Polysaccharide deacetylase Bashton M, Bateman A anon Pfam-B_502 (release 4.0) Family This domain is found in polysaccharide deacetylase. This family of polysaccharide deacetylases includes NodB (nodulation protein B from Rhizobium) which is a chitooligosaccharide deacetylase [1]. It also includes chitin deacetylase from yeast [2], and endoxylanases which hydrolyses glucosidic bonds in xylan [3]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.34 0.71 -4.49 104 13288 2012-10-03 16:37:10 2003-04-07 12:59:11 16 176 4258 46 3692 10795 3031 133.50 20 38.42 CHANGED ttsssp+slhLTFDDGs................................sshssplLslLpchplpATa.FllGp.h...sppt...............................................................................sphl+chhppG.aplusHo...................hsHs.......ph..........sshssp.phpp-lppspphlpph..sGp.psph..hRsPaGt......hssss....hphspphGhphs ..........................................................................................................t....tp.lhlT..F.D.D.G.................................ts.h.s..s...p.l.l.s.....l.......L.......c.......c.......h.........s........h......p.......A..T..a..Fl..hup..h..........hppt.............................................................................................................s.p...h.l...+..c.h.h..p......p..G...ac......l..u.s...Ho.........................................................................a..s..H.................ph............................................................sp.h.stp..ph....p.p....-.l...t....c...s....p......p....h...lp...ph................h...G...p....p.....s.ph................hp.......h...P..h..Gt..........hs.p..p..s.......h.p.hh.tt.th...h.......................................................................................................................... 0 1356 2490 3163 +733 PF04831 Popeye Popeye protein conserved region Mifsud W anon Pfam-B_3905 (release 7.6) Family The function of Popeye proteins is not well understood. They are predominantly expressed in cardiac and skeletal muscle. This family represents a conserved region which includes three potential transmembrane domains [1]. 28.10 28.10 28.10 28.70 26.40 27.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.83 0.71 -4.47 10 249 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 76 0 142 229 2 145.50 45 44.99 CHANGED EhcuLYpslapPLpVPlclF+cIstshtscVpoLpp-psYAlEGKTPI.DRLSlLLSGRl+VSh-GpFLHYIaPaQFLDSPEW-SLRPSEEGtFQVTLTA-s-C+YloWpR+RLYLLLsp-RYIuRLFSlLLGpDIu-KLYuLND..KlauKpGhRh ..............................Ehp.lYpslFpPLpVshphF+clstsh.pplhoLcptpsYAhEscTsl.D.+LSlLLSG..........+.........l+VS.c.GpFLHhI.aPhQFlDSPEWcShp......o..c-.shFQVTlsA-ssC+alsWpRc+LphhLtp-taltclFshLlGpDIscKLYuLN-..phhsphth..h................... 0 36 47 88 +734 PF01558 POR Pyruvate ferredoxin/flavodoxin oxidoreductase Bashton M, Bateman A anon Pfam-B_350 (release 4.0) Family This family includes a region of the large protein pyruvate-flavodoxin oxidoreductase and the whole pyruvate ferredoxin oxidoreductase gamma subunit protein. It is not known whether the gamma subunit has a catalytic or regulatory role. Pyruvate oxidoreductase (POR) catalyses the final step in the fermentation of carbohydrates in anaerobic microorganisms [1]. This involves the oxidative decarboxylation of pyruvate with the participation of thiamine followed by the transfer of an acetyl moiety to coenzyme A for the synthesis of acetyl-CoA [1]. The family also includes pyruvate flavodoxin oxidoreductase as encoded by the nifJ gene in cyanobacterium which is required for growth on molecular nitrogen when iron is limited [2]. 20.70 20.70 20.90 21.30 20.60 20.30 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.99 0.71 -4.36 119 5899 2009-01-15 18:05:59 2003-04-07 12:59:11 13 69 3035 27 1854 4841 1225 177.10 23 24.80 CHANGED G..GpGlhosuplLupus.hppG...ht.lhshppausphR......GGss.hsplR.lus.c..h......sth..t.psDhlluh................ct.psh..t..phhst..lpss......Ghllh..ssshhsst.ht.th.........................................slsh...pclAt.........chhs.......shhhNhlhlGsh.....sthhs..lsh..-s.....lppslcpphs.tt......................thlctNhcAhctGhp ..............................GGpGslsuuchhsphh..tp.ps...................hh...spsh.sas..ucp+......G.G.so.huclR..huc..p.hh...................sshhh.s..psDhllsh..................s..thl.p.......phhpt...l+pG........Ghhll....No....hh...ss...p..th.tpth..................................................................phhhlsu.splAp.............................-h.sh...............stthNhlhhuuh........h..plss...lsh..........-s...........hhctlcpp.....hs.pp.......................pll-hNhp.AlctGh............................................................ 0 715 1316 1619 +735 PF01855 POR_N Pyruvate flavodoxin/ferredoxin oxidoreductase, thiamine diP-bdg Bateman A, Griffiths-Jones SR anon Pfam-B_323 (release 4.2) Family This family includes the N terminal structural domain of the pyruvate ferredoxin oxidoreductase. This domain binds thiamine diphosphate, and along with domains II and IV, is involved in inter subunit contacts [3]. The family also includes pyruvate flavodoxin oxidoreductase as encoded by the nifJ gene in cyanobacterium which is required for growth on molecular nitrogen when iron is limited [2]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.34 0.70 -4.85 32 5386 2012-10-02 16:07:47 2003-04-07 12:59:11 14 71 2734 21 1633 4863 884 224.70 28 29.55 CHANGED puuhtss.cshuuYPITPuos..hsEthsphhspuph..........phlphEuEhuAhuslhGAuhuGu+shTuTSu.GlhLMhEslhhhuupchPhVlhlssRuhsusulslhs-psDhhtsR......-sshhhLsssslQEuaDhslhAatlA.c..splPshhhhDGahsoHphpplpl.s.-.....hhcp.h.shcchp.........hhsP.cpPlstsstts.sshhptcctst.shptstthhpphhc ...........................................h.suhtss.c.h.h.uhYPITPSSp............hsEhhs..p.h....t.spstt.......................phlp..h.p.s..E.h.uAhuss..h.G.As..h...u.G.uh..s..h.T.s.Tu.u.GL...L.Mh-..s.l.h.h..h...u...s.p..tl..P..h.Vlhs..ssRus.....supuL.sh.hs-...p.uDl.h.ts.p.......ps.s.h.s.hLss..uos.QEsh.Dhsh.hAaphu.c.......hpsPhlhhhD.......G.....h..h..s.......H...t....h....p....p.....l...p..h.......c..c..........h...........t.....h....h.....t.p.........h....h.t.t....p...p.......................thss...pt...s.........h..h.s.s...............s.h.h.h.p..tptth......p.......................................................................................................... 0 680 1192 1447 +736 PF04151 PPC Bacterial pre-peptidase C-terminal domain Yeats C anon Yeats C Domain This domain is normally found at the C-terminus of secreted bacterial peptidases. They are not present in the active peptidase. It is possible that they fulfill a similar role to the PKD (Pfam:PF00801) domain, which also are found in this context. Visual analysis suggests that PKD and PPC are distantly related (personal obs:Bateman A, Yeats C). 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.72 0.72 -3.01 151 2040 2012-10-03 16:25:20 2003-04-07 12:59:11 10 196 870 21 476 1650 202 70.40 24 12.12 CHANGED spchaphsls..suss.ls..ls...l.......suss............................ss-..Lh..lhtss......s.....shss......hssts..............tss.....................ssppss..hss.....spsGs.Y.altltu .......................................t..phashpls....sups..ls...ls.....l............ssts............................ssD.........la...lhtss.........s....................shss.......hssps.........hpsu.................................ssppss...hss.....stsG.p..Y.altV..h.......................................... 0 158 302 417 +737 PF01577 Peptidase_S30 Poty_P1; Potyvirus P1 protease Bashton M, Bateman A anon Pfam-B_364 (release 4.1) Family The potyviridae family positive stand RNA viruses with genome encoding a polyprotein. members include zucchini yellow mosaic virus, and turnip mosaic viruses which cause considerable losses of crops worldwide. This family consists of a C terminus region from various plant potyvirus P1 proteins (found at the N terminus of the polyprotein). The C terminus of P1 is a serine-type protease responsible for autocatalytic cleavage between P1 and the helper component protease Pfam:PF00851 [1,2]. The entire P1 protein may be involved in virus-host interactions [1]. 19.70 19.70 20.00 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.39 0.70 -4.98 26 1446 2012-10-02 13:45:52 2003-04-07 12:59:11 11 18 127 0 1 1522 0 212.30 29 12.60 CHANGED tpppsphttchhppp.....huphchpcpuphhh+thstptltcpppptpppccpppp.....hphthsshssshshttttthscstssphppt.hhppospch+pptspcphttssp.lstlhcplhpIspccshsVElIuK++..tspspa++hpushhhplpltHhpGphp+hDlshsphtpplhthhutpttppppspshph..GsSGllhpspphhs.hthsptshFlVRG+psGp....Ll-ARs+lspt.hpplcH ..............................................................................t...t.p.pthhhc.c...........uhhh.tpp.p.tslhhp...h.tspp...l.+.ppcthcp+ctcEtp.......h.h.thp.s..h.stho.h.s.ss..sss.t..hp....s.ph.ppt.......hppo....ph.......+c.p...h.s.h..p.p.s.+.....h.spt..hp....hlh+plhpIhp..tp.s.ts.lEh.Iu++p....hclpahc.h.t..t.s.phstlpssHh...G.h.....+.....tp.....h...D..h.....hs......p.t...hhlphhuth..st.p....p..hpshslp.G.SGlllpst.l...ht...hs..+s........p.p.........hFllRGc..c.c.G+....LhDARt+lop...pcl.p...................... 0 0 0 1 +738 PF03291 Pox_MCEL mRNA capping enzyme Mifsud W anon Pfam-B_4078 (release 6.5) & Pfam-B_3482 (Release 7.5) Family This family of enzymes are related to Pfam:PF03919. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.17 0.70 -5.57 11 647 2012-10-10 17:06:42 2003-04-07 12:59:11 11 21 389 17 358 751 441 284.20 25 61.34 CHANGED ch..ppspshsshVssH...........Ysch............pcss..hchRppSsIhp.............................................................................................................................................................................L+pFNNaIKolLIuhasp+s..c....sshpVL-lsCGKGGDL.KathutIuthlusDIucsSlppspcRY...pphppt.cpchh+hsa.schlsuDsahsplpEhahss......hpF..DllShQFAhHYSFcoccpAchhLpNlschLssGGhhIuThPDushl......++Lpt..t.stpcphGNslYploF-cp..........s.aGphYsapLcsslpssPEYlV.FssLhclsccYshpLl.phsFp-hhcp.h.p.cp+hhlpphsul-spsupp........................p...tshpusEhEAsh.hYllasFcKp .................................................................................................................................................................................................................................................................................................................................................................................................................................................t............l.ta...........Ysth....................t...........+.tts.l............................................................................................................................................................................ht.hsN.a...l.KohLIp....a..................h..........p.............................................................................tt..............tV.LDlsCG..+G...u...DL.K.a.....h.....u..t...............l..th...............hl.shD...........ut.ul....p.p....spp....RY.................................t............................p.........p.............................a.......t....p...h.h.ht..D..............sh............p.....ltph..h..............................F.....cllsh.............F.shHYuFcspppu..........p.hhpNhs.thLp.GGhhluo..hssu.p.l...................................tpl....pt....................................t.........t.......ph...s......s....ph....h.tl..p..att..................................................hG.h.....ha...a....h........p.p...h.s......................t...........hsEalV.h..h..h.t.lhpcashp..Lh.....p.......sF.p.hhp.............................ht...................................................................................................................................................................................... 1 117 205 299 +739 PF00481 PP2C Protein phosphatase 2C Bateman A, Finn RD anon Prosite Family Protein phosphatase 2C is a Mn++ or Mg++ dependent protein serine/threonine phosphatase. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.72 0.70 -4.99 31 7190 2012-10-03 01:39:20 2003-04-07 12:59:11 16 293 1352 43 4244 8910 707 218.80 20 49.91 CHANGED hshshsphpuh+pphpDsphthtsh.t.......ssp.shshhslaDGpuGppsuchsspplt.....phlppppsh.ps.................thppsltpuahp..tc.thtp.....t..........s.puGsoAssshlpspc........LalANlGDS+slLspssssh...........tLopcHcPssts.EppRIppsGGhlp......sRlsGs.....................LulSRAlGDhphKs..............................hVsupP-ls..ptplsts.D-FllLAsDGlWDhlsspcsl-ll+pphst............spcsupcLhchuhshuop-sh ..................................................................................................................................................................................h.........................................................................h....hs.l.h.DG.....h.....s......G...........t.......s.....up.....h.ht....p...h..................t..h....t............t...........tt.................................................htp.h..l...t...p..s...hht........h.s.tthhp.........................................................t..tt.s.G..o..T..s.......s...s...s..l..l.....s.pp...................................lhl...AN..s.GD.........S...R..s......l........L...s.....p....p..st.h..............................................L.o..t.DH.p................s.....t.........p......................t......E.t....t....R...l..p..t.....t..t...u......h.....................+..lt....u..................................................................................................l.sh....o...R......u......h.........G...D.h.....hc.................................................................................................hl.st.P-.lt...............hp.l..........p...........p.....c..............p........a.......lll......us...............DG..l......a.....c..............h........h.......s.s....p......p........h.s..p.h.l..t..p......t...............................................hs.p.lht.s............................................................................................................ 0 1476 2539 3487 +740 PF00823 PPE PPE family Bateman A anon Pfam-B_297 (release 3.0) Family This family named after a PPE motif near to the amino terminus of the domain. The PPE family of proteins all contain an amino-terminal region of about 180 amino acids. The carboxyl terminus of this family are variable, and on the basis of this region fall into at least three groups. The MPTR subgroup has tandem copies of a motif NXGXGNXG. The second subgroup contains a conserved motif at about position 350. The third group are only related in the amino terminal region. The function of these proteins is uncertain but it has been suggested that they may be related to antigenic variation of Mycobacterium tuberculosis [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.65 0.71 -4.40 146 3533 2012-10-01 21:44:22 2003-04-07 12:59:11 14 54 122 2 395 2191 1 151.60 45 28.01 CHANGED slP..PElNSuhhauGPGuushlAAAuAWsuLAuELssuAsshsullusLs...sss.WpGPuusuMsAA.ssPYluWLsssAupAppsAuQAcuAAuAYEsAhAuhVsPshlAANRsthss..LlATNhFGtNosu.......IAssEA.pY.scMWuQDAsAMhuYtusuuuAss ...................................hlPPElNSuhha.uGsGsuPhlAA..AuAWcuLAsELssu.Asuas.u.llusLs..........sts.W.pG.PuusuMsAA.ssP........Ylu.WLsssAupAp.tuAsQAcAuAu..AaEsAhAusVs.P.s....h.lA.AN...R.sthhs...L..lu..o.N....h....h..GQNosA.........IAs.sEA...pY...tcMWAQ....DssAMhuYtuuuuuAs.t.................................................. 0 97 190 347 +741 PF01535 PPR DUF17; PPR repeat Bateman A, Birney E anon Pfam-B_874 (release 4.0) Family This repeat has no known function. It is about 35 amino acids long and found in up to 18 copies in some proteins. This family appears to be greatly expanded in plants. This repeat occurs in PET309 Swiss:P32522 that may be involved in RNA stabilisation [1]. This domain occurs in crp1 that is involved in RNA processing [2]. This repeat is associated with a predicted plant protein Swiss:O49549 that has a domain organisation similar to the human BRCA1 protein. The repeat has been called PPR [3]. 25.00 9.30 25.00 9.30 24.90 9.20 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.38 0.73 -7.57 0.73 -3.61 518 31410 2012-10-11 20:00:58 2003-04-07 12:59:11 15 3797 468 0 23088 82367 965 29.30 21 15.97 CHANGED hoassllsuascpGphccAhplhpcMpppGh ....................asslls.s..a..u......+......s....G....p.....h.......c.....c.....A..h...p..lacpM.t....................... 0 2628 15898 19987 +742 PF04193 PQ-loop PQ loop repeat TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Repeat Members of this family are all membrane bound proteins possessing a pair of repeats each spanning two transmembrane helices connected by a loop [1]. The PQ motif found on loop 2 is critical for the localisation of cystinosin to lysosomes [2]. However, the PQ motif appears not to be a general lysosome-targeting motif. It is thought likely to possess a more general function. Most probably this involves a glutamine residue [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.69 0.72 -4.42 255 3702 2012-10-03 12:15:12 2003-04-07 12:59:11 9 33 711 0 2429 3643 533 59.40 21 35.09 CHANGED huphlGhlshshhhlshlPQlhpNa+p+Ss.pGlShhhhhhhhhGshhthhhhlhpthsh.h .........................h...hhGhhshhhh.h..h...s.hlP.Q...lhpsa..+.......p..+Ss.pGlShhhhhh..hhGsh..hhhhhhhhtt.....h.................. 1 810 1368 2026 +743 PF05033 Pre-SET Pre-SET motif Bateman A anon Bateman A Motif This protein motif is a zinc binding motif [1]. It contains 9 conserved cysteines that coordinate three zinc ions. It is thought that this region plays a structural role in stabilising SET domains. 21.80 21.80 21.80 21.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.44 0.72 -11.70 0.72 -3.75 86 1039 2009-01-15 18:05:59 2003-04-07 12:59:11 11 69 226 37 626 1020 2 109.60 28 14.91 CHANGED chsput-s....hslslh........NplD............t-ts..P..tsFpYlsphhhsp...............................h....hstphhh.............GC.sC..p.......................ss....C...sspsC.Chpts.....st...h.................s.Y...sp.ptpLt................p...........................................sssIYECsspCpCs..t.sCtNR .........................................................hs.uhEphslshh.........Np....lD......................sp.s........ssFp..Ylsp..hhsp................................sh.....hs.phh.........................uC..sC....p........................ss......C.......ssspC.Csphs..........tt...th.........................................s.Y..sp...pspLh.........................hpt...................................................sshIaECsptCpCs.......sCtNR............................................. 0 144 274 462 +744 PF00156 Pribosyltran Phosphoribosyl transferase domain Bateman A, Sonnhammer ELL, Finn RD anon Bateman A Domain This family includes a range of diverse phosphoribosyl transferase enzymes. This family includes: Adenine phosphoribosyl-transferase EC:2.4.2.7, Swiss:P07672. Hypoxanthine-guanine-xanthine phosphoribosyl-transferase Swiss:P51900. Hypoxanthine phosphoribosyl-transferase EC:2.4.2.8 Swiss:P36766. Ribose-phosphate pyrophosphokinase i EC:2.7.6.1 Swiss:P09329. Amidophosphoribosyltransferase EC:2.4.2.14 Swiss:P00496. Orotate phosphoribosyl-transferase EC:2.4.2.10 Swiss:P11172. Uracil phosphoribosyl-transferase EC:2.4.2.9 Swiss:P25532. Xanthine-guanine phosphoribosyl-transferase EC:2.4.2.22 Swiss:P00501. In Arabidopsis, At the very N-terminus of this domain is the P-Loop NTPase domain [1]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.41 0.71 -4.40 71 30853 2012-10-10 14:25:38 2003-04-07 12:59:11 22 68 5141 299 7483 25977 11727 121.40 18 48.82 CHANGED pslh...hs.ctlpphsct......lupp....lpppths.........llultpuGlshustluptLsh...shhhhhh.h...........................................tthtppsspththhhthst.hcucclllV.DDllsoGtTlptshc.hLc.pts.sp........lthsslh ..............................................................................................................................................................t........t..htthhpt......lupt..............h..h..p..p........h....p....h...............llu.......l...............p.......t.......G.....h.............hu..t.......t...l...u...pt.l.sh...........s.h..h...h..s..h.h.ttp............................................................................................................................ttt..t.ppp.s.....s..t....t..h...h...h...t...h.......p....s.........h....p...G.......c.....c.....V.ll.V..DDl.lsoG.sT.hp.t.h.hc...hlc...ptG...up......hl.tlssh.h........................................ 0 2412 4803 6327 +745 PF00377 Prion prion; Prion/Doppel alpha-helical domain Bateman A, Finn RD anon Prosite Domain The prion protein is thought to be the infectious agent that causes transmissible spongiform encephalopathies, such as scrapie and BSE. It is thought that the prion protein can exist in two different forms: one is the normal cellular protein, and the other is the infectious form which can change the normal prion protein into the infectious form. It has been found that the prion alpha-helical domain is also found in the Doppel protein. 25.00 25.00 36.90 36.70 19.70 19.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.62 0.71 -4.38 7 730 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 240 88 41 750 0 107.50 71 50.15 CHANGED hp+hshcFGs-.tsRYYptN.hpaPstlaYcshsphsVspptFVpsClNhT.stpphp............u..cs.schc.+Vhh+llcEhCstpapcahLt....Ruuulplhss.PhhLhlLshlhFlht .......MSRPLIHFGNDYEDRYYRENMYRYPNQ.VY...YRPV.D.QYS.NQNNFVHDCVNITVKQHTVTT..........TTKGEN...FTETDlKhMERVVEQMCITQYp+EupAa...pRGu.ShlL.FSuPPVlLL.lshlhhll.s..... 0 2 2 9 +746 PF00227 Proteasome proteasome; Proteasome subunit Finn RD, Bateman A, Valas RE anon Prosite Domain The proteasome is a multisubunit structure that degrades proteins. Protein degradation is an essential component of regulation because proteins can become misfolded, damaged, or unnecessary. Proteasomes and their homologues vary greatly in complexity: from HslV (heat shock locus v), which is encoded by 1 gene in bacteria, to the eukaryotic 20S proteasome, which is encoded by more than 14 genes [1]. Recently evidence of two novel groups of bacterial proteasomes was proposed. The first is Anbu, which is sparsely distributed among cyanobacteria and proteobacteria [1]. The second is call beta-proteobacteria proteasome homologue (BPH) [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.99 0.71 -5.02 174 10806 2012-10-03 21:14:07 2003-04-07 12:59:11 21 55 3712 1970 5186 9052 2184 173.50 21 77.89 CHANGED hppGs.TslGlc.sccuVllu...u.-p.+so..ts.hlhsppp..h..cKlhpl..scclhhshuGhsuDsphlhchhct.psp.hac.hp.hs.c.hslp........hushhpthh.t..pt...th.psh...s.s.shlluGhD.p.p.s...t.spLaph..-ssGshhph....hsuhG.suuphuhs.hl-ppa....pts..hoh--u...hclshcult.t..uh..c..pst...hss..ssl..plshl ........................................................................................................pus.Thlu......lp.....t.......p.......s.......t.....lllu.............u..Dp.p.....s.......o.......t..s...hl..........t........p.s..p....h..........cK..l..h..pl....s..............s........p........l....h..........h.u....h..............u............G...........hs.A...D..s...t..sl...............h....c.hh........c..........t........p.......h........p.............a........p........h...........................t......t.....h...s..lp.....................................h.........s.........t...h...h....t.....s.........h.....t..........pp...............ht.....p......s.h..................h.l....phlh...u...Gh....s...........t....s..........................t...s...p.la.h..l.sss..Gshh......p............p............h.......h......A........hG..uGu...phAhu.hh.......c......phh...................t...s........h.........s.......p..c..A..........hc..ls.h.culp.....uh....c....hs.h......hos..p.phpl........................................................................ 0 1761 2940 4236 +747 PF03371 PRP38 PRP38 family Bateman A, Winge P anon Winge P Family Members of this family are related to the pre mRNA splicing factor PRP38 from yeast [1]. Therefore all the members of this family could be involved in splicing. This conserved region could be involved in RNA binding. The putative domain is about 180 amino acids in length. PRP38 is a unique component of the U4/U6.U5 tri-small nuclear ribonucleoprotein (snRNP) particle and is necessary for an essential step late in spliceosome maturation [2]. 22.40 22.40 22.80 23.00 22.10 22.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.81 0.71 -4.65 41 560 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 309 0 382 526 4 168.00 32 51.79 CHANGED tsts..tlpspssshllppllRp+IhsShYaKpphaslss..........pollDchhp.lcalGuhhsust+...................PosFlCLLhKLLplpPsc-Ilhphlppp..................................-aKYlRALuhhYlRLshss...................t-laphhE....PhhpDaRKl+hpstsu...........plhahD-alDpLLsc-chhslhLPRl.tR..hhL-ppstL ..........................................tt......lhGsssphllppll+spIhsS.YaK.phatLss.........cpllDchhp...lcalushh.s.G.sp+...................................PoshhCLlhKhh.plp.s+c.lht.hlppp...............................................-.p..YlRALGhh....Yl.....Rhstss........................h-lapahE....Phhp...D.cclchpstsG..............................plhphschlcpLL.......pcpchhs..hhLPRl.hp..h.hp..t..h................................................ 0 142 214 311 +748 PF01789 PsbP PsbP Bashton M, Bateman A anon Pfam-B_1303 (release 4.2) Family This family consists of the 23 kDa subunit of oxygen evolving system of photosystem II or PsbP from various plants (where it is encoded by the nuclear genome) and Cyanobacteria. The 23 KDa PsbP protein is required for PSII to be fully operational in vivo, it increases the affinity of the water oxidation site for Cl- and provides the conditions required for high affinity binding of Ca2+ [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.09 0.71 -4.82 30 539 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 153 5 301 551 149 160.70 22 65.89 CHANGED hssshsshhshshsustutssuGhpsa.sssDGYpFLYPsGW...pcVpl....sGs-llF+Dll-ssEslSVslossscc...olc-LGoPpc........VGcpLhcphluspGus.....RpAcLlcAspRcss.G+sYYslEatl+hss.....t............sRHpLuolsVscG+LYTlssuosEcRW.K.scchhcpllsSFslh ..................................................................................................hs................................h...h....t....t.....aph..hPt.t.W.............................phth........G....................hh..acs..............t.....t.p.s......ls.V..hlss....s........spp...........sl.p..ch.Gs..scp...........lu...ppl...h..t..t.hhs.s.tt.....................tpupllps..pp......pp...h....s..G.....c.tY..Yp.hEhtsp.tss........t......................t+Htlssss..l....s..s.G.....+.......LYhhss...p...s...s.-...p...p...W...t....c...hcphlcpshsSFpl......................................................... 1 93 210 266 +749 PF01416 PseudoU_synth_1 PseudoU_synt; tRNA pseudouridine synthase Howe K, Griffiths-Jones SR anon swissprot Domain Involved in the formation of pseudouridine at the anticodon stem and loop of transfer-RNAs Pseudouridine is an isomer of uridine (5-(beta-D-ribofuranosyl) uracil, and id the most abundant modified nucleoside found in all cellular RNAs. The TruA-like proteins also exhibit a conserved sequence with a strictly conserved aspartic acid, likely involved in catalysis. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.52 0.72 -3.60 118 11373 2009-01-15 18:05:59 2003-04-07 12:59:11 15 22 4733 26 3331 8399 4001 104.70 20 69.27 CHANGED hthtahGp.sFpuap.........pp....................pshp........pshh+sltphph................................t.t.........................thhht.h+hhh.......sshhcs.spGhpuhsplt....ph.............................................Lspph.s...........tssshs.hhhcspas ......................................................................................hta.Gp..cFpuap.....................................pp....................................................pohp.........psh...+slpch.ph................................................................................................................................................................h+.h..hl........................sshscs....st...Gs.c.uhsp.ls..ch............................................................................................................................Lssp...s...s..........ss..sss..hhcsca................................................................................................................................. 0 1157 2111 2810 +750 PF01437 PSI Plexin_repeat; Plexin repeat Bateman A anon Bateman A Family A cysteine rich repeat found in several different extracellular receptors. The function of the repeat is unknown. Three copies of the repeat are found Plexin (Swiss:P70206) [1]. Two copies of the repeat are found in mahogany protein. A related C. elegans protein (Swiss:Q19981) contains four copies of the repeat. The Met receptor contains a single copy of the repeat. The Pfam alignment shows 6 conserved cysteine residues that may form three conserved disulphide bridges, whereas [1] shows 8 conserved cysteines. The pattern of conservation suggests that cysteines 5 and 7 (that are not absolutely conserved) form a disulphide bridge (Personal observation. A Bateman). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.03 0.72 -3.81 95 2947 2009-01-15 18:05:59 2003-04-07 12:59:11 20 228 154 27 1472 2447 7 54.60 25 6.46 CHANGED sCsp......a..poC.s.sClsup.t..tCuWCst.....tppC....spts.........................pCtptp....................t.tpp..Cs ............pCst......a..poC.ssC.....l.....tup.....cs..hCuW...Cst........ttpC....spts..................................................pC.t.t.............................................................................................. 0 251 365 805 +751 PF04046 PSP PSP Wood V, Finn RD anon Pfam-B_PSP (release 7.3); Family Proline rich domain found in numerous spliceosome associated proteins. 18.30 18.30 18.80 19.00 18.10 18.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.35 0.72 -4.57 40 488 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 300 0 335 464 3 49.30 49 8.24 CHANGED +PGhlSpcLRcALGhssss.....PPPWLhpMpch....GhPPuY..PsL+IPGlshsl .....+PGpLS..-EL+pALGhs.sss............PPPWLhpMQ+h...................G.PPSY..PsL+IPGLNss........... 5 114 181 270 +752 PF04468 PSP1 PSP1 C-terminal conserved region Waterfield DI, Finn RD anon COG1774 Family This region is present in both eukaryotes and eubacteria. The yeast PSP1 protein is involved in suppressing mutations in the DNA polymerase alpha subunit in yeast [1]. 21.10 21.10 22.60 21.90 19.80 19.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.78 0.72 -4.00 138 1828 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 1651 0 508 1317 149 88.50 40 24.33 CHANGED slppllRhA...o..pcDhpphppscpcp.ccAhphCpp+lpccs.............................L..sMKllcsEa....pFDpsKlhFYao......A-sRVDFRcLVK-LAphF+sRIEhRQI ............................................................................lKpllRhA...sppDlpphp.cscpct.ccAhplCpcplpcps........................................................................................................................L.......cM+LlcsEY..................shD.....p......sKll.FYFT.............A.-.sR...lDFRcLVK-LAphF.+.T.RIELRQI..................... 0 218 376 469 +753 PF04024 PspC PspC domain Bateman A anon COG1983 Domain This family includes Phage shock protein C (PspC) that is thought to be a transcriptional regulator. The presumed domain is 60 amino acid residues in length. 21.80 21.80 21.90 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -8.93 0.72 -4.50 182 3398 2009-01-15 18:05:59 2003-04-07 12:59:11 7 17 2305 0 732 2173 337 60.70 32 37.58 CHANGED pp+LhRsp....ps+hluGVCuGlAcYhslD...ssllRllhll...h.hl...........................h..sss...sl..l.........hYllhh..lllP..ppss .....................p+LhR.s..psphluGVCuGlAcaas.l.-...ss.lVRllhllhsh............................hsss.....ull.........hYl.llh..hllPtt..s.............................. 0 264 511 639 +754 PF04886 PT PT repeat Bateman A anon Pfam-B_517 (release 7.6) Repeat This short repeat is composed on the tetrapeptide XPTX. This repeat is found in a variety of proteins, however it is not clear if these repeats are homologous to each other. The alignment represents nine copies of this repeat. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.01 0.72 -4.63 15 489 2009-01-15 18:05:59 2003-04-07 12:59:11 7 108 180 0 258 482 734 37.20 41 7.69 CHANGED pPTspPospPTspPTupPospPTspPTspPostPos .............................PotpPTspPTsp.PT..spPTs.pPT.spPTspP.ottP..................................... 0 131 163 221 +755 PF01329 Pterin_4a Pterin 4 alpha carbinolamine dehydratase Finn RD, Bateman A anon Sarah Teichmann Domain Pterin 4 alpha carbinolamine dehydratase is also known as DCoH (dimerisation cofactor of hepatocyte nuclear factor 1-alpha). 20.80 20.80 21.20 21.00 20.40 19.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.29 0.72 -4.23 136 2229 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 1838 50 880 1778 2287 90.20 32 78.95 CHANGED s.s..s.hLoppchpph..Lp.pl..s.....sW..plt...s...........tplp+p.apFcsFtpAh...uFhspVuhlAEptsHHP-lts.sascVplphhTHcl....s..G.LocpDhhhAs+h-plhp ....................................................ht....lsttphtth..Lt..tl.....s..........sW...p..h.......s..............stlp+.p.a..pFp.s.......Fp.pAh..................uFh.s.c.lAhh.AE.......ph.s.......HHP.-.hhs....s.........as..p.........V.plsl..h..TH..sh............s...G...locpDhthAtph-tlh.t........................... 0 282 549 745 +756 PF03095 PTPA Phosphotyrosyl phosphate activator (PTPA) protein Mifsud W anon Pfam-B_2456 (release 6.4) Family Phosphotyrosyl phosphatase activator (PTPA) proteins stimulate the phosphotyrosyl phosphatase (PTPase) activity of the dimeric form of protein phosphatase 2A (PP2A). PTPase activity in PP2A (in vitro) is relatively low when compared to the better recognised phosphoserine/ threonine protein phosphorylase activity. The specific biological role of PTPA is unknown, Basal expression of PTPA depends on the activity of a ubiquitous transcription factor, Yin Yang 1 (YY1). The tumour suppressor protein p53 can inhibit PTPA expression through an unknown mechanism that negatively controls YY1 [1]. 20.10 20.10 21.30 20.50 20.00 19.70 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.06 0.70 -5.25 53 620 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 316 20 404 615 5 265.50 37 78.11 CHANGED tPsKRIhss.pDlphFtpStsapclhsFlpslsculpGpphs..........pshthSs...........................................slppllp.lLcclppllccsPP....hct.s.sRF......G...NhuFRsaa-clp...........ppsssllpphlss.................ttpsslsELssYlhpS..FGsppRlDYGoGHELsF.lsaLhsLhclu................llp.pp.........D.............hssllLtlFscYlclhRcLphsYhLEPAGSHGVWGLDDapFLPFlaGuuQhhscsh.hp........................Ppulpsppll.........-.papcpahYhssIsFI.splKou........shp.HSPhL.DISust.sWsKlpp................GhlKMYpsEVLuKhPlhQHFhFGs.Lhshscshss ..................................................h.P.+cIhs..tDhthahpS.......satphhsFlhp.lscultspphs................t...st..................................................................................slpplht.lLsplpphl......cchPP.........pt..s.....pRF..................G...N.huaRpahpc.lp...........pps....plht..ph..l.st.........................................................................t.tts..h.ElssYhhpS..aGstpRlDYGoGHEhsF.hhaLhsLhpls............................hhp..tp-...........................................sllhtlF..pYlp..lhRpLphsY..hLEP.AGSH....GVWGLDDapFLPal.aG..uuQ..ht.p..h...hp...............................................Ppphhptphl..........p.p..t.pphha.hpslt..al......pp.hKpu............shttHSs.L.sISus...sWsKlpp................GhlKMYp.sE..VLtKhPlhQHFhFGs..Lhsh.....s........................................ 0 157 237 341 +757 PF02302 PTS_IIB PTS system, Lactose/Cellobiose specific IIB subunit Mian N, Bateman A anon Pfam-B_9339 (release 5.2) Domain The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The lactose/cellobiose-specific family are one of four structurally and functionally distinct group IIB PTS system cytoplasmic enzymes. The fold of IIB cellobiose shows similar structure to mammalian tyrosine phosphatases. This family also contains the fructose specific IIB subunit. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -10.36 0.72 -3.50 245 18949 2009-01-15 18:05:59 2003-04-07 12:59:11 12 57 2638 16 1606 8004 181 89.10 20 29.08 CHANGED pllssCssGhuoShhstpplcctscptG........l-s....pstsss...hptpphts.s.Dllllus...plp......phpphss............hhhlshps......hh..tsscpllpp ................llssCssGhuoShhstptlcctspchG................................l-s....psssss.......hp.tps.h..tp.s..Dll..l.hus.plp........phpphss........h.........h.hlshts....hh....phtthh.................................................... 0 438 856 1245 +758 PF01472 PUA PUA domain Bateman A anon Medline:99193178 Family The PUA domain named after Pseudouridine synthase and Archaeosine transglycosylase, was detected in archaeal and eukaryotic pseudouridine synthases, archaeal archaeosine synthases, a family of predicted ATPases that may be involved in RNA modification, a family of predicted archaeal and bacterial rRNA methylases. Additionally, the PUA domain was detected in a family of eukaryotic proteins that also contain a domain homologous to the translation initiation factor eIF1/SUI1; these proteins may comprise a novel type of translation factors. Unexpectedly, the PUA domain was detected also in bacterial and yeast glutamate kinases; this is compatible with the demonstrated role of these enzymes in the regulation of the expression of other genes [1]. It is predicted that the PUA domain is an RNA binding domain. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.05 0.72 -4.25 108 4603 2012-10-02 17:37:24 2003-04-07 12:59:11 15 42 3369 67 1812 3731 1805 74.30 29 20.31 CHANGED splhlDcuAscultp..Gu.sLhssGlhpscssh......ctG-hVhlhsp....p.G........chlAhGhuthsupEhtchp..tttusclcphl ....................GplhlDcGA.spAlhp.....Gs...SLLssGlh......p.....l......pGsF............................ptG-sVt..lh..st...................p.G..........................................ctlA.pG..lspasus.cl.p..p..ht...tth.utcltt............................................................. 0 564 1073 1503 +759 PF01480 PWI PWI domain Bateman A anon [1] Family \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.59 0.72 -3.88 34 907 2009-01-15 18:05:59 2003-04-07 12:59:11 12 44 271 2 602 867 4 72.20 28 10.85 CHANGED l-llKsWIsc+ls-lLGhEDDlVl-ashshLp......................tpc....DsKplplpLsGFLs.+suttFsp-LWcLLluApps..pG ........................-hlKsWls++lp-..l.lG.h.-...-......s..s.l...l-a.lhsh.lctt...............................s..c.p.h.h.p.pLss..F..Ls..ccs...pt..FlpcLWchll.tppt...t............................. 0 199 312 474 +760 PF00787 PX PX domain SMART anon Alignment kindly provided by SMART & iterated Domain PX domains bind to phosphoinositides. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.35 0.71 -4.28 71 7076 2009-01-15 18:05:59 2003-04-07 12:59:11 19 282 785 46 4186 6459 70 114.90 20 19.35 CHANGED sthhp..hthss.t.......spttthhhhplphpss..............tphpltRRYp-FhpL+ppLpcphs.....................thhlP.hPsKthh......................................................hppphlccR+.ptLppaLpplhppPhhtp.....schltpFLpsp ........................................................................ht............................tththhh..h.h..plp.hpss..............................pht..V...hR.RY..p-..F..ttL....cp..p..Lp...cpas.........................................................th.h.lP......lP.t..Kp.h.h.st...........................................................................................................................................................hppp...h..l...c...c...R+...p.tL.p...paL....p..............p...l..h....p....p..s..h..ltp......sphht.pFLp..t.................................................................................................................................................................................................... 1 1382 2076 3184 +761 PF02194 PXA PXA domain SMART anon Alignment kindly provided by SMART Family This domain is associated with PX domains Pfam:PF00787. 21.10 21.10 21.30 21.10 20.50 20.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.17 0.71 -4.72 69 830 2009-01-15 18:05:59 2003-04-07 12:59:11 10 30 257 0 547 794 2 167.90 23 19.45 CHANGED sstlsptlppll..shll+-FVps.WY.ppIossp..pFss.plcpslppsltplppRlp...............phDhssllspclhsllspHlphappAppth.............................ht...spspthplhhshphpss.....csA.htsptpphp.........................................alRtlspslLshlLsppphpstslpsLl+ElLussVLhPllp.hlu-P-alN...phIlthhpss ............................................lsttlppllphllc-aV.s.WY....pp.........l.o.scp....pF.p.pl.cpslpthhtp.lpp.R....h.p...........................cl..D.h...sl..l.............sp.......cllshhspHlphhp.pAppth...........................................................................ttt...p...hp.h.ht.p.t.t............hc.u..h....psp....pppht....................................................................................................................aLRpl.sp.hll...hl.LP.......pt.............h..........p.......sp.......shphllpEllu....ssl.lhPh.ls.hlucP-hlN...phlhhhhp..s............................................................................................................................................................................... 0 173 277 432 +762 PF00070 Pyr_redox pyr_redox; Pyridine nucleotide-disulphide oxidoreductase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.49 0.72 -3.57 130 40648 2012-10-10 17:06:42 2003-04-07 12:59:11 22 455 5442 395 10595 41559 13431 81.90 22 17.40 CHANGED +llllGGGhIGlEhAshh....pp.hG...t..cV.ollcttsplh....hhstp.huphl..pcph.ccp....Glplhhssplpplp......tsss.t....lhhp......ss ..................plsVlGG.Gh.l....Gl....Eh.A.thh.............................pp..h..G.................s.......c..V..o.....l..l...c.t....t..s..........p...l.........h..............................h...........s.......................p......h...........s...........p.......h..........h............pc..p....h...p..pp...............G.l...pl..h..h..s..s....p.l..p..p.lp........tss.t..t.......l.....tttt............................................................. 0 3391 6626 8921 +763 PF02852 Pyr_redox_dim pyr_redox_dim; Pyridine nucleotide-disulphide oxidoreductase, dimerisation domain Griffiths-Jones SR anon Structural domain Domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.34 0.72 -4.03 102 18533 2009-01-15 18:05:59 2003-04-07 12:59:11 17 94 4904 322 4408 13761 3778 109.20 27 23.24 CHANGED lPsslaopPElAsVGl...TE.ppApppsht......lcltphsas..........sss+Ahshtp......scuhlKllsct.cspclLGsHhlG.spuuE..hIpthulAlchuh...Thp.-.hspsltsHPThuEthtpss .....................................lPtshaopP.pl.AsVGl....T....E....pp....A....+....p....p..s.hp................hcstp..h..sas.......................s...s.+..A...h.shsp...............spGhh.Kl.l.......h......c.............t.....c......o.......pc..llGsph...l..G........s..u.sE....hIpt...hul...Al.....c..h.s.h...ohp...p..h..sp..sla.sHPThuEshh...h......................................... 0 1486 2777 3700 +764 PF01729 QRPTase_C QRPTase; Quinolinate phosphoribosyl transferase, C-terminal domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_2063 (release 4.1) Domain Quinolinate phosphoribosyl transferase (QPRTase) or nicotinate-nucleotide pyrophosphorylase EC:2.4.2.19 is involved in the de novo synthesis of NAD in both prokaryotes and eukaryotes. It catalyses the reaction of quinolinic acid with 5-phosphoribosyl-1-pyrophosphate (PRPP) in the presence of Mg2+ to give rise to nicotinic acid mononucleotide (NaMN), pyrophosphate and carbon dioxide [1,2]. The QA substrate is bound between the C-terminal domain of one subunit, and the N-terminal domain of the other. The C-terminal domain has a 7 beta-stranded TIM barrel-like fold. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.98 0.71 -4.73 18 3824 2012-10-03 05:58:16 2003-04-07 12:59:11 14 9 3313 84 1170 3336 1907 167.10 40 57.32 CHANGED IATtTpchlctscus..ps+lhsTRKTtPG.LRhh-KYAVhlGGGs.......sHRhGLsDslhIKDNHIsssGu...lpcAl+psRphs.shs....lEVEl-sL--hccAlp.........uG....ADlIMLDNh.........ss..-pl+cAlc.h.ctpshc....hhlEsSGGloh-sltpaApsGVDhIShGsLT+uspslDlSLc ..................................................................................lAThTpphVct.lpus................pspl.hDTRKT.h.P...G...L.R..h.h.p.K..YAVhsGG.Gh................NHRhGL.....sDul.......LlK-....N....HI....t....s........s......G..u........................l.pp....A...l.ppA...+t...ht....st...h...s.........lEV...E.V...-...o....L....-.....p.....l.c.....-..Alp.....................................AG...........AD..lIhLDNh........................ss......-ph.+.....c.A.....V.p.........h..h..ss+...........shlEs.SGsl..oh.cs..............lpp..h..A...p..o...G...V....DhISlGu...L.T...+...ss.pslDlShc........................ 0 388 754 980 +765 PF02749 QRPTase_N Quinolinate phosphoribosyl transferase, N-terminal domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_2063 (release 4.1) Domain Quinolinate phosphoribosyl transferase (QPRTase) or nicotinate-nucleotide pyrophosphorylase EC:2.4.2.19 is involved in the de novo synthesis of NAD in both prokaryotes and eukaryotes. It catalyses the reaction of quinolinic acid with 5-phosphoribosyl-1-pyrophosphate (PRPP) in the presence of Mg2+ to give rise to nicotinic acid mononucleotide (NaMN), pyrophosphate and carbon dioxide [1,2]. The QA substrate is bound between the C-terminal domain of one subunit, and the N-terminal domain of the other. The N-terminal domain has an alpha/beta hammerhead fold. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.54 0.72 -4.16 161 3866 2012-10-02 20:27:15 2003-04-07 12:59:11 11 8 3303 80 1158 2967 1621 88.60 32 30.01 CHANGED hs...tG.DlTo..shlsssppup...uplhu+ps.ullu.Ghphspp.lachl....................sl.plp..hhhpDGcplpsGs........hlhplpGsucslLpuERsuLNhLp+hS ......................................sDlTs.....tll...s.s..s.p.p...up....upll..s.+..-.s.GV.l.u...Ghp.h.sp.p.lFptls........................................ssl.plp.......hthp...DG-tl...ps..sp..........................sl..hp..lpGss+sLLsuERsALNhlpphS............................................ 0 387 749 973 +766 PF01424 R3H R3H domain Bateman A anon Medline:99003905 Domain The name of the R3H domain comes from the characteristic spacing of the most conserved arginine and histidine residues. The function of the domain is predicted to be binding ssDNA. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.15 0.72 -4.27 402 3275 2009-01-15 18:05:59 2003-04-07 12:59:11 17 89 2043 7 1332 2719 355 63.80 24 13.76 CHANGED cppplpphspphhpps....h........pss...p.........shphp.P..h.ssh-R+.llH.phsp....th.....s...lpopS..t.......G.pt.sp..Rplllthp ...........................................................l.phspchhppl....h.....................css.........c..........shphp.P.....M..sshER+.l.........lH.phsp...hh.....s....lpopS.....p.........G...cc.sp...Rtl.ll...................... 0 444 749 1050 +767 PF03834 Rad10 Binding domain of DNA repair protein Ercc1 (rad10/Swi10) TIGRFAMs, Griffiths-Jones SR, Coggill PC anon TIGRFAMs Family Ercc1 and XPF (xeroderma pigmentosum group F-complementing protein) are two structure-specific endonucleases of a class of seven containing an ERCC4 domain. Together they form an obligate complex that functions primarily in nucleotide excision repair (NER), a versatile pathway able to detect and remove a variety of DNA lesions induced by UV light and environmental carcinogens, and secondarily in DNA interstrand cross-link repair and telomere maintenance. This domain in fact binds simultaneously to both XPF and single-stranded DNA; this ternary complex explains the important role of Ercc1 in targeting its catalytic XPF partner to the NER pre-incision complex [3]. 22.10 22.10 23.90 23.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.48 0.72 -4.28 26 301 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 268 3 216 287 4 67.50 46 21.96 CHANGED psILVsspQ+GNPlLpp..l+slsWca.s.-....IlsDYhlGpspslLFLSLKYH+L+PEYIapRlcpLpp.pash ...............sIlVu.sR.....Q+GN.PlLpa..l+.s..ls.WEa.u..D.....I.s.sDYll.....Gpo.oCsLFL.SL+YHpLH.P-YIapRlcpLsppat.h........ 1 73 120 178 +768 PF04423 Rad50_zn_hook Rad50 zinc hook motif Bateman A anon Bateman A Motif The Mre11 complex (Mre11 Rad50 Nbs1) is central to chromosomal maintenance and functions in homologous recombination, telomere maintenance and sister chromatid association. The Rad50 coiled-coil region contains a dimer interface at the apex of the coiled coils in which pairs of conserved Cys-X-X-Cys motifs form interlocking hooks that bind one Zn ion. This alignment includes the zinc hook motif and a short stretch of coiled-coil on either side. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.55 0.72 -4.50 32 262 2009-01-15 18:05:59 2003-04-07 12:59:11 9 18 230 2 157 265 13 53.50 26 4.97 CHANGED phcspttphpctlpplppspt..sCPlCtRslss-cc.pclhpchppclpph.cchp .............htttht.hpphlppLsc..ppt..sCPlCpRshss-pc..pcllpchpsclcth.cp.................. 0 38 69 121 +769 PF04055 Radical_SAM Radical SAM superfamily Bateman A anon Bateman A Domain Radical SAM proteins catalyse diverse reactions, including unusual methylations, isomerisation, sulphur insertion, ring formation, anaerobic oxidation and protein radical formation. 29.40 29.40 29.40 29.40 29.30 29.30 hmmbuild -o /dev/null --hand HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.13 0.71 -4.08 628 65493 2009-09-17 13:13:13 2003-04-07 12:59:11 16 316 5136 37 18526 49841 14153 169.40 14 43.97 CHANGED h.hsssCshp.....CtaCthtt...............................................................htttttthshcplhchhcph...ph...hp.tlhhs.....................uupshhh................................hshhthhhthttt.............................................................thplslposshhh............................tchlctltchs...........hstlslslcshspp.........................htchh...pttsh........cphhcslcthpcts......hp...........hhshhhhh.sps.p..pchtphhchl ............................................................................................................h..hhttCs.hp...........C.s.....a.......C.thst.......................................................................................................................ttt.pths...h.....c........c....l......h.......p........p........h.........p.........p........h..................h.........p.....................s.........h.........p........t........l..h..h.s..................................................G.G.p..s..t.h..h.........................................................................................................h..s..h..h..t..h....h.h.thtpt.............................................................................................t..thc..l..s..l...p..o..s..sh.h.h...............................tchl.p.t...l.tp.ts................hs.h.l..p..l...s..l...p...s...hspp..............................................hhphh......ptt..sh....................pp....h.h....c.s...l...c....t...h....p..p.t..s...........................h......................hh.hh.hh.h........s.....s........p..ph.......h............................................................................................................................................................. 0 6768 12613 16003 +770 PF02145 Rap_GAP Rap/ran-GAP Mian N, Bateman A anon IPR000331 Family \N 21.50 21.50 21.60 21.50 21.40 20.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.25 0.71 -4.92 54 1221 2009-01-15 18:05:59 2003-04-07 12:59:11 10 45 216 7 719 1143 3 181.80 35 14.83 CHANGED lhsNppu..S..sapcFLshLGpp.................................VpLps..ap.tapGGLDspss..psGpao....lYhphc.........spElMFHVoThhPh....sts...............DtQplp+KRHIGNDhVsIlF....sE.ssps.as..................................shItSpFs...alhlllpsh....................ps...................................................hY+Vplhp+...sslP..FGPhhs.sthl.p..c..shtsalhspsINAppsshph.....spasphh..pcph.phlc.......sL ................................hhsNppu...S....sap-FLs.hLGpp.................................lcLcs.....ap....ta.p..G..GLDs.p.ss..pTG...ppo......h.Y..hs..h.p.........shElh..FHVSThh..Ph....p.ps......................cp.pp...lp.........+KRHIGNDhVsIVF.........pE....ssp...s....Fs.................................................................sh.I..t.SpFp.........alall..l.ps..ht...........................s...................................................................................................................hYpVsl.p.p+.......p-..VP.....hFGP..s.....hs.....s...s...hl..t.+..........thtpalhsphINAppAshps..........tpatphh..p..cph.thlcp........................................................................... 0 262 341 531 +771 PF04078 Rcd1 Cell differentiation family, Rcd1-like Wood V, Finn RD anon Pfam-B_5278 (release 7.3); Family Two of the members in this family have been characterised as being involved in regulation of Ste11 regulated sex genes [1,2]. Mammalian Rcd1 is a novel transcriptional cofactor that mediates retinoic acid-induced cell differentiation [3]. 27.60 27.60 27.80 28.90 24.60 27.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.59 0.70 -5.56 27 453 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 304 4 291 421 6 238.70 58 77.46 CHANGED phltcLt.pPcp..REpALlELSK+REpascLAshLWpShGslusLLQEIlulYPhLs.........PPsLos..ttSNRVCNsLuLLQCVAS.Hs-TRthFLpA+IPLaLYPFLpTsu+sRs..FEYLRLTSLGVIGALVK.sDcsE....VIsFLLpTEIlPLCLRIMEhGoELSKTVATFIlQKILhDDtGLsYlCtTsERFhuVupVLspMV...tpL...spps.os........RLLKHllRCYLRLS-Ns....................RA+cALpphLPptL+..DsoFsshlc-..Dsss++hLtQLLhNlsst ........................................hl.-Lh.sspp...REsALLELS....KKR..E.p....hs-.LA.hLWaSF...G............shsuLLQEIlslYPh.ls.........Psp..Lou...ttSNRVCNALALLQCVAS.Hs-TRshFLp.................A...............HIP.LaLYPFL.pTsSKoRP..FEYLRLTSLGVIGALV.K..sDsp-.....................VI..sFLL...............sT.EIIPLCLRIM.E..s.G.SELSKTVAhFIlQKILLDD............s..........GLsYICtTh.ERFh.tVutlLupMV...hpL............scps....os..............................RLLKHllRCYLRLSDNs....................RARc.AL+.p.CLP-....L+.......DsTFs..psLc-....DssTK+hLtQLLhNlp.s..................... 0 108 166 239 +772 PF05177 RCSD RCSD region Guo JH anon Guo JH Family Proteins contain this region include C.elegans UNC-89. This region is found repeated in UNC-89 and shows conservation in prolines, lysines and glutamic acids. Proteins with RCSD are involved in muscle M-line assembly, but the function of this region RCSD is not clear. 25.00 25.00 27.70 25.50 19.50 23.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.14 0.72 -10.92 0.72 -3.52 4 51 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 16 0 36 64 0 87.60 41 11.71 CHANGED cV+SPsKKEKSP..............EKoEpp.so......EEsKos..KEKSPEcsDtp.tSPTKK-KSPppSusE...-lKSPsKKEKSPE...KsEccPuSPTKKE............psppcEKSPE ....................................................tKp.pp....tss......EcsK.sspK.cc+S..sEc.s.-c...pshSPs.KK-K.SPppS.usE......cs+SPsccctusE...csE.c..uo.sKcE.......................p.................. 0 7 13 33 +773 PF01030 Recep_L_domain Receptor L domain Finn RD, Bateman A anon Pfam-B_244 (release 3.0) Domain The L domains from these receptors make up the bilobal ligand binding site. Each L domain consists of a single-stranded right hand beta-helix [1]. This Pfam entry is missing the first 50 amino acid residues of the domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.71 0.71 -4.10 68 2555 2009-01-15 18:05:59 2003-04-07 12:59:11 19 86 174 80 1269 2434 30 112.50 25 20.58 CHANGED sCohlpG....sLpIphtstp......................t..thlsslcclsGhlhItps..thpshshhpsLchIcGpphhp......paulhlhpN.p...............LpcLth.s..LppIpp.........GslhlppNs.......cLChtpp.h.hphlh .........................................................sCohl..p.G.....sLplh.hstp........................................phphh..p..slcclo.....Ga..lhIpps......thss.h.s.hhp.N.Lph.....I.....+..G...p.p..l.hp.........................pau..Lhlh.pstp.............................................LppL.t.h..s......L.p.p....l.p..............................Gtlh.hp.p...N............pL.Chhp.p.h.h....h................................................................. 1 289 375 1023 +774 PF02010 REJ REJ domain Bateman A anon Bateman A Family The REJ (Receptor for Egg Jelly) domain is found in PKD1 Swiss:P98161, and the sperm receptor for egg jelly Swiss:Q26627. The function of this domain is unknown. The domain is 600 amino acids long so is probably composed of multiple structural domains. There are six completely conserved cysteine residues that may form disulphide bridges. This region contains tandem PKD-like domains. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.36 0.70 -6.17 25 1075 2012-10-03 16:25:20 2003-04-07 12:59:11 10 184 317 2 592 1109 109 274.00 15 21.55 CHANGED Aplpl..pC....sstpspapWplhsssssssh.phsp.....................t..plsIPphsL....hGsYshsholoh.....sssslsspptsplplts.....osLhAhIcGGopcshuhp..pslh.........lDu.SpSh...DP...........Dhsstp...sslsatWhCpspsss........................................tsC..........t.t.h.hssssuslolsuspLpussp.YpFplslsKsu.....R.oupoppslhlhpGpsPplplpChssss.t...lssssclsLpusss.sss...sppspapWsl...........ps.tt..hh.....................................................................................................................................................................................................upssTshssstLsl+tsshpsstpYthslhltssshpt......hAshshps..NtsPpsGsCsl............sPspG...hsLpTpFslpCpsapDp-t.......PLsYphhh.............hpsps.hphlhpGspspp.....thhLPsG.ssp.taslslhVpVhDshGuss.ssshslpVpsssssss.........hh.hh.tssstlsshltpGDstputphhss .............................................................................................................................................................................................................................................s.........h..htl...........................t...h..spt.h.......l.....l.........................t..s.............s.h..h.....s.s...s....t.hh.hshs.......psl.s...........................................Ls..G..otS.......Ds...........st............lp..apWppss.ssts..................................................................................................................................................t..tp.s.sh....hph...sp.h.....t......st.......hpF.p...lplpcsp.................t.sstsph...sl..h...s.............t.......................h........................................h...t.......h..h.h.....t.....................h.....h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 318 363 454 +775 PF03432 Relaxase Relaxase/Mobilisation nuclease domain Finn RD, Mifsud W, Bateman A anon Pfam-B_4002 (release 6.6) Family Relaxases/mobilisation proteins are required for the horizontal transfer of genetic information contained on plasmids that occurs during bacterial conjugation. The relaxase, in conjunction with several auxiliary proteins, forms the relaxation complex or relaxosome. Relaxases nick duplex DNA in a specific manner by catalysing trans-esterification[1]. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.43 0.70 -4.98 17 2990 2012-10-02 18:54:05 2003-04-07 12:59:11 9 15 1395 0 379 2707 170 226.00 16 49.85 CHANGED spsShsphlsYhpp...........................+shppsshp...hsschstpphhsspthas........cs........psspsacll.SFpssE..ss......cphppluhchhpplu..saQhhlhsHsD....p-plHsHIllNplchpo...G+phpsp............h..shcplccsscplspccGhphspt........st.+.s.s-tshhp.......................scsphctph+pslcptpt....spsap-h+ctLpcpGlclc..thsspplshphpspp..ppl+usplucc..ascsplcpchtcp .......................................................................................................t.....................................................................................t..h..............p.....h..................t.....t..........t...............................................pt.......................pt..sh..H.hhhSap...s....t.-...hss....................................cph....p....p....l....u.p....c......h....h....p...c..h.........t.....h.........s......p......a......p........a....l.lsp.H...p.D..............ps....+.......hHhHIlh.s.pls..pt...............G..+ph..p.sp............................................h....shcp.h...p..p.h...s...c.c..l.p.p....c...h.Gl...phspp......................t...p..p......p....h....t...pht.hhp..............................................................................................ttt.p...h...c..t..t...l...p.p.....t...lp.thht..........t..psh...p.p...hhp...h.h.t.p.....s.h..plp............tt....t...............h..............h................t..............................h....p...p..t..h.ttt....h.............t....................................................................................................................................................................................................... 0 137 269 341 +776 PF03090 Replicase Replicase family Mifsud W anon Pfam-B_2424 (release 6.4) Family This is a family of bacterial plasmid DNA replication initiator proteins. Pfam: PF01051 is a similar family. These RepA proteins exist as monomers and dimers in equilibrium: monomers bind directly to repeated DNA sequences and thus activate replication; dimers repress repA transcription by binding an inversely repeated DNA operator. Dimer dissociation can occur spontaneously or be mediated by Hsp70 chaperones. 20.10 20.10 20.10 21.20 19.90 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.56 0.71 -4.31 15 361 2012-10-02 15:26:12 2003-04-07 12:59:11 12 7 242 0 25 357 7 124.40 36 38.95 CHANGED sulhhps+psALcp+YIQsNtsphsshLVhDlD+s..sA..............ththtctst.sPNhhstNssNGHuHLlasLs.PVpss-t.ARpKPlpYhAAlppuLpptLsuDhuY.SGLlsKNPtHs.......pWpspphpsc....YsLccLuchL- ..........................h.thhhts+thAlt.h.YhQhNp.sthhtaLVaDlD+t...su.................lsWp.-tshPsPshhsp.......N.p.s...G......Hu..pLhYuls.s..V.p.....ss.....ps.....ucsKslp.....YhtAlppuhst.+L...s.A.....DssY.uGhlsKsPhHs.........................tWpshchppcs...YpLs-LAshl-.......................................... 0 6 14 24 +777 PF03248 Rer1 Rer1 family Bateman A anon Pfam-B_3358 (release 6.5) Family RER1 family protein are involved in involved in the retrieval of some endoplasmic reticulum membrane proteins from the early golgi compartment. The C terminus of yeast Rer1p interacts with a coatomer complex [1]. 21.60 21.60 22.10 22.20 19.90 21.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.21 0.71 -4.69 34 427 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 311 0 286 395 2 163.90 50 84.44 CHANGED sslsphhp+hpppYQphLDcosPasthRWhshshLlhlFhlRlhhhp.GWYlVsYuLGIYLLNLFLuFLTPKhDPuLpppp...t.-tG................LPt................cps-...EFRPFIRRLPEFKFWasss+AhhluhhhoaFshFDlPVFWPILlhYFIlLFslTM++QIp..........................................HMIKY+YlPFshG.Kt+Ystp ...............ss...thhscltphYQtaLD+.........oTPasthRWlsshslhhl...ahlR..lh........hhQ.GaYI.........VsYuLGIYlLNLFlAFLoPKh..DPslptpp.....ptGs..................sLPo...............................+ps-...EF.RPFIRRLPEFKFWausT+.AhhluhhhTaFph....FslP.VFWP...ILlhYalhLFslTM+RQIp..........................................HMIKY+YlPFshG.Kt+Ytt.p............................. 1 98 166 240 +778 PF02453 Reticulon Reticulon Mian N, Bateman A anon Pfam-B_2196 (release 5.4) Family Reticulon, also know as neuroendocrine-specific protein (NSP), is a protein of unknown function which associates with the endoplasmic reticulum. This family represents the C-terminal domain of the three reticulon isoforms and their homologues. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.94 0.71 -4.65 90 1346 2009-01-15 18:05:59 2003-04-07 12:59:11 12 20 297 3 619 1288 1 158.90 25 44.60 CHANGED ssDllh.WRch+touslhuushs..hLhp.l..tphsllolhspl.hlhsLssshshphhtpllps..............h...clslsc-thpphssslhthlNpslppl+clh...hsc-lhpolp.hsshhallohlGuhhshhTLlhluhlhhFolPllYc+apcpID...phlt....hspsplpc .........................h-llh.W+ch+..t..ouhlh..uushh..hLh..p...l.....hthol....lolhuhl...hlhhLs.sshs....h...p..l..h..p...p..llps..t............................ht....pls.l.sp......-.....t......hp.......phsss.l..htpl...Np...slttl+.c..lh...hsc...D...h...h.p............sl............p...h.s..l.h.h.a...llohl.G.u..hhs.sl.TLh.h.luhlhhao..lP.h.lYcc..hpspIDphlt.......hsppph..p........................................ 0 125 283 450 +779 PF04527 Retinin_C Drosophila Retinin like protein Waterfield DI anon Pfam-B_4914 (release 7.5) Family Family of Drosophila proteins related to the C-terminal region of the Drosophila Retinin protein. Conserved region is found towards the C-terminus of the member proteins. 22.20 22.20 22.50 22.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.51 0.72 -3.98 18 168 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 19 0 91 157 0 73.70 44 54.73 CHANGED lhpEPsVA+VGslV+slPoAVSHQSpT.VHs.pt.lhpPVl......APsVKsT.lhp....sPl..s.........................tAAPll+s ................llpEPslA+VGs..V..V+olPo...AVSHQStT.VHs.pt.llp.PVV.......AP..lVKoT..hhs......uPllts......................tuAPll..................................................................................... 0 17 20 58 +780 PF03732 Retrotrans_gag Retrotransposon gag protein Finn RD anon Pfam-B_3194 (release 7.0) Family Gag or Capsid-like proteins from LTR retrotransposons. There is a central motif QGXXEXXXXXFXXLXXH that is common to Retroviridae gag-proteins, but is poorly conserved [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.19 0.72 -3.92 57 4914 2012-10-02 13:37:57 2003-04-07 12:59:11 12 294 205 0 2568 5331 10 92.40 19 10.14 CHANGED phhshtLputAtpWapslhsspht...............oWpphpptFhppahs.tphsphppclhslpQ.sscolpEYhpcF.cplhppss......hsc...................pshlptahpGLpt ...........................................................h.htLp.s..A.t.tW.h.p..p..h..ppht..................................oW...p...c..h.p..pt.F...hpp.F......h..s...s....t...tt.s.t..t..c..p.p...l.p...s...l....p..Q..s...sc..ol.p-Yh.pcF..p.p..lt..p.ths..........hsp..........................t.hh...th..ah.tGh...................................................................... 1 853 1171 1588 +781 PF01694 Rhomboid Rhomboid family Sohrmann M, Bateman A anon Pfam-B_1399 (release 4.1) Family This family contains integral membrane proteins that are related to Drosophila rhomboid protein Swiss:P20350. Members of this family are found in bacteria and eukaryotes.\ \ Rhomboid promotes the cleavage of the membrane-anchored TGF-alpha-like growth factor Spitz, allowing it to activate the Drosophila EGF receptor. Analysis has shown that Rhomboid-1 is an intramembrane serine protease [2][3][4] (EC:3.4.21.105). Parasite-encoded rhomboid enzymes are also important for invasion of host cells by Toxoplasma and the malaria parasite [5]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.89 0.71 -4.17 65 7854 2012-10-01 23:21:32 2003-04-07 12:59:11 17 73 4018 16 2797 6356 2105 148.00 23 48.95 CHANGED shtpsphWR.....llTshalHs..hhHlhhNhhsLhhhGh.lEphh.....GphRhhhlYlluGlhu...ulhshlhssts......................tlGASGulaGllGuhhshhhhshhhhh................................hhlhhhhhhlslshshsh...hstlshhu...........................HluGhlsGhhhuhhlhtthp ....................................................h....phaR......llo...sh.F..l...H.........h........u...........h.......h.........Hl..hhNhlh....L....h.............h.....h.Gth....l...E....p...h..h......................G...p.....h....+h......h.h....l...a.l.....l...uu...l..hu..........ul.h....p....h....h...hss.s............................................................hlGASG..u.....l...a....Gl.....h.us.h.h..h...h..h.h...t.s.t.t.h.hh..........................................................hhhhh.h.h.h.h.h.....l..h.h..sh.h.hsh.............hs...s..l..s..hhu............................................................Hl.u..Ghl...s..G..hhhuhhh.....hh............................................................................. 3 991 1753 2345 +782 PF05104 Rib_recp_KP_reg Ribosome receptor lysine/proline rich region Finn RD anon Pfam-B_3249 (release 7.7) Family This highly conserved region is found towards the C-terminus of the transmembrane domain [1]. The function is unclear. 25.00 25.00 25.80 25.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.36 0.71 -3.69 6 77 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 30 0 24 79 0 127.50 54 14.53 CHANGED MKETSYEEALAKQRKEpuKTQ.ptKsDKKKK-KlsEKKsKuKKKEEKPNGKIPEpEsstEsocp........llltppP..sPsVsssPspVsls...PsVAshPcsotPs.opcpsuuP...tKuss..sPpspppKppKsA....KstPAPsptussP..hsuKuAPlsAp ...MKETSYEEALApQRKEhuKTp.ppKs-KKKK-K.ssEKKsKsKKKEEKPNGKIP-p-ssspso..........................lh.ppP...sPsVslsPssVt.........s.lh.tPhsssPs..s.pcchsus...........Pp.cpppKp.pKs.A........KscPAsuphsss..h.ssKuA.h...s............................................................. 0 2 6 11 +783 PF00636 Ribonuclease_3 Ribonuclease III domain Bateman A, Finn RD anon Prosite Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.55 0.71 -3.63 239 3186 2012-10-03 08:45:47 2003-04-07 12:59:11 21 88 1836 63 1255 5519 1999 113.60 26 26.36 CHANGED ppLtaLGDullchhlpphlh...........................tspLsp........h...psthV....sspshu..phsp............p.......................................L...tp....hlp......................................................................................................................................................................t...................phhusshEullGslaLsss ...........................................................................................................pLtal.G....DAl..hchhlpp.aLht........................p........sph..c.......................sspLpp.........h....ps.phV.......ss.cs.Apl.h.t..........................p........................................................................hh.......................h..pp........hl.p.p...pp......phhcptcs..................................................................................................................................................................................................................................................................................tpt.tt..p.......pssht.sh.phu..sshEAllGhlalst.t................................................................................................................................................................ 0 428 728 1043 +784 PF04597 Ribophorin_I Ribophorin I Kerrison ND anon DOMO:DM04452; Family Ribophorin I is an essential subunit of oligosaccharyltransferase (OST), which is also known as Dolichyl-diphosphooligosaccharide--protein glycosyltransferase, (EC:2.4.1.119). OST catalyses the transfer of an oligosaccharide from dolichol pyrophosphate to selected asparagine residues of nascent polypeptides as they are translocated into the lumen of the rough endoplasmic reticulum. Ribophorin I and OST48 are though to be responsible for OST catalytic activity [1]. Both yeast and mammalian proteins are glycosylated but the sites are not conserved. Glycosylation may contribute towards general solubility but is unlikely to be involved in a specific biochemical function [2] Most family members are predicted to have a transmembrane helix at the C terminus of this region. 25.00 25.00 27.80 25.30 24.50 24.50 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.41 0.70 -5.97 40 386 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 288 0 244 366 1 397.90 32 80.96 CHANGED sacNssltRsl-Lspsal+pohplslc.....Nhu...spP.tscYhhshss..c.hspluhlssp.ps.spttsph........slclppsp..t...........aplpLPpP.ltPusplslplp.ashspslpPhPspIsQs-pQhlhapssta.hhSsYtTcp.Qphpl+hs.sspl.saTpst.............sspppssslsYGPa..pslsu......aohpP.ltl+YEpspPlsplspLpRsIEV..SHW.GNlshEEpYpLpNsGAcLputFSRl...-appsphp................................................sssulpplchhLPs..supDhYYpDplGNlSTS+hpsspt.h.....................Lcl+PRaPlFGGWpasFslGashshppaL+psust..pYhLplPhlsuhp...-shh-plplcllLPEGApslclps.........P.hshtptphshchoYLD.ThGRsslslphpNls...--tp.pplhVpYca.......sthshhpKPlhIsuhhahlFluhhllsp .............................................................................h.s.sl.RplcLpp.ph.s+.shplhlp........shs...sps...spah.hsh.s........p..hspluhlpst.pttpptt................hphtt.tp..th.......................................................aplpLsps.ltsutphslhlp.hhhspslpPhPtpIsQ.s.-..p.Qhlhapssta....hhS.sY.Tpp...Qphpl+hs...ospl..s.aTphs..................................sspppssplpYGPa...cslss........ho.ps..hpl+aEpspPhhslsplp.R.sIEl.....SHW.GNlulE...Ephp.LppsGApL.cusFSRh...-appp...t................................................shsulpphchhLPs..uup-sYYpDpIGNlSTS+hh.tp.cpsp....................l-l+PRaPLFGGW+hpahlGYshP..........pa.Lhptss..........pahLph.hlstlh...sh.sh-plpl+llLP.............EGAcslclps...................P.hslpptt....p.h.chTYLD..ohGRsslshptpNll....-pah..ppl..hVpYsa.......st.hhhLp..cPLh.lsu.shahlFhsshlhh.............................. 0 87 140 202 +785 PF01775 Ribosomal_L18ae Ribosomal L18ae/LX protein domain Bateman A anon PSI-BLAST Q02543 Domain This family includes eukaryotic L18ae as well as archaebacterial specific LX. Ribosomal protein L18ae forms part of the 60S ribosomal subunit. 22.90 22.90 23.50 22.90 22.50 22.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.37 0.71 -4.23 63 714 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 512 7 414 682 35 104.30 44 66.40 CHANGED l+pYpVhGchhss...........................................................................................................................p..................hsshcshhKEhRshsppsAl-phYs-hGu+H+s+tppIpIhcVp ............................................................................ppYpVlGRtlPo....tt.t..s.la+M+lFAsNplhAKS.....................................................................+FWYFlppL+KhKKusGEllthp..l..............................hE+pPhplKNa...G.I....W....l.R.Yc...SR.oG.p.HNMY+EYR-hohsuAVpphY..........p-MuuRHRsRhp..sIpIl+V.......................................... 0 140 231 342 +786 PF00828 Ribosomal_L18e Ribosomal protein L18e/L15 Bateman A, Finn RD anon Pfam-B_1295 (release 2.1) Family This family includes eukaryotic L18 as well as prokaryotic L15. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.86 0.71 -3.77 104 6291 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 5075 302 2015 4096 2465 118.70 35 75.86 CHANGED Ghhtppct+sp+ut+sp........ppc.ha.th.h....la+hlu+.+....sFpp.hh...hhsphphssl...........................Nls+L.p..thhtts.........ssc.hllshuhl.......ts.hKlLGtGtl..phs.lsVpAhphScsAcp+IcpAGGpshhh ........................................................................s...GKTuGR..GpKGQ+uR.uGs.......ts+h......GFEGGQhP....LaRRLP.K.h...........GFss.hh...........phs.V....................................NLscL..s...p........t..ps................ls.p..sLhtsullt.......................tshlKl.L...u.........s...G............c......l...s........p.t....ls.V.c........u........t.phS.cuAcpt.Ic.uAGGplp..h................................................................................... 0 703 1254 1677 +787 PF01907 Ribosomal_L37e Ribosomal protein L37e Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes ribosomal protein L37 from eukaryotes and archaebacteria. The family contains many conserved cysteines and histidines suggesting that this protein may bind to zinc. 24.50 24.50 24.70 25.00 23.00 24.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.92 0.72 -4.17 46 667 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 505 75 372 579 23 50.80 56 57.37 CHANGED sKGTsSFGK+pp+.oHhhCRRCG+pSaHlpKcpCAuCGassu+ph.pYs.WspKshc ...................sKGTsSFGKRpsK.oHsLCRR.C.GR+SaHlQKpsCuuCGYPuu+pR.cYN.Wu.KAh................ 0 138 223 305 +788 PF00347 Ribosomal_L6 L6; Ribosomal protein L6 Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.30 21.30 21.40 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.42 0.72 -3.48 132 10746 2009-01-15 18:05:59 2003-04-07 12:59:11 18 5 5013 462 3006 6705 3987 74.40 26 80.83 CHANGED lshG.hclphps.....plhhshG.pt....lphphsps.lplph......phshhhh.puhc+pp.........ltphRshhs.hs+Glp.G ..................................lssG.hc.s.phpG...........pllhshG.p.ts..........lphp.h.s.ps...lpl.....p.h.......................................p........h....s........hhh....h...p.........G...t...c...+.phs...................thtshRs..s.h.scGVp.......................... 0 982 1857 2473 +789 PF02482 Ribosomal_S30AE Ribosomal_S30; Sigma 54 modulation protein / S30EA ribosomal protein Mian N, Bateman A anon Pfam-B_869 (release 5.4) Family This Pfam family contains the sigma-54 modulation protein family and the S30AE family of ribosomal proteins which includes the light- repressed protein (lrtA) (Swiss:P47908) [2]. 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.14 0.72 -3.55 144 5238 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 4060 21 1172 2968 717 96.10 30 63.58 CHANGED plploG..+p.l-lo-ul+palpc+ls+.lp.+ah.s....ph...hp.sc..Vhlsht.........t....ptpsElol.hh..sGh..hlpAps.pspDhYuAIDhss-KL-+Ql++aKc+..hp.s+ ..................................hsIpG..+p.l-.lTcA.lRpaVppKl..s..K..LE..+ah..s...pl......hp..sc...VhLps..............pp.p.stp...sEs..Tl...h...sus..........hlp......A......p..........s......p.........s......p....D.......hY....u......AID....hlh-K....L-RQL....pKhKcKhpp........................................ 0 368 735 983 +790 PF01201 Ribosomal_S8e Ribosomal protein S8e Finn RD, Bateman A anon Prosite Family \N 21.30 21.30 21.60 21.80 20.80 21.20 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.67 0.71 -4.01 93 1149 2009-01-15 18:05:59 2003-04-07 12:59:11 17 11 591 7 655 1075 110 194.20 35 92.35 CHANGED Muhscs..cp+R+tTGG+hcht+K.KRKaEhGRs.....................................................ss.T+l........u....tcl+plR.........................sRGGNp.KhR..............................uLRhcpuNhsasppssoc+s+IlsVhhNsuNschlRpsslsKGsIlplsss............-LG...............................................................................hAploSRPGQ...............sGhssuhll .......................................Ms.sc.t.pc+R+tsGt.+hchhcK...KRKhEh.G.cs...................................................................sssTpl............up...ppl+phR...................................................................sRGGspKh+.................................................ALRhcpuNho..asptssTt+scIlsVhhNsSNsphsRpssL....sKusIlplsss............pLGhhttst...............................................................................h..hthhAploocPGQ...............sGpssuhlL.............................................................................................................................................................................. 1 231 377 536 +791 PF00652 Ricin_B_lectin Ricin-type beta-trefoil lectin domain Bateman A anon Prosite Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.45 0.71 -4.09 97 3906 2012-10-02 19:42:32 2003-04-07 12:59:11 17 272 770 168 1967 4763 232 120.80 19 22.78 CHANGED ssshlpths..sshCLDst....tttpsssltlhsCss......sss..QhW...phsts......splpstst..........CLssssss..............s.ltlhsCps.ssst......QtWphps........ssplhstp.ss..hCL-stt.......sssplhlh.pC....sss....ss.QpW ...................................................................h.....t.ht...sshC..l-st............ttsst......l.t...lh.....s.Cps..............sss.......Q...h.a............th..s.t..s....................t....p....l...p...t..t.s...t...........................C...Lssssts..................s......l...tl..h.s.C....s......s...sss................Qp..W.phps.....................ss..p..l..h...ph....t..ss........hCLsstt.................sstt.l.hl....t...C....sts......s..QtW............................................................................. 0 706 1030 1517 +792 PF00355 Rieske Rieske [2Fe-2S] domain Finn RD, Griffiths-Jones SR, Eberhardt R anon Prosite & Pfam-B_31 (release 4.1) Domain The rieske domain has a [2Fe-2S] centre. Two conserved cysteines coordinate one Fe ion, while the other Fe ion is coordinated by two conserved histidines. In hyperthermophilic archaea there is a SKTPCX(2-3)C motif at the C-terminus. The cysteines in this motif form a disulphide bridge, which stabilises the protein [4]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.08 0.72 -10.67 0.72 -4.42 146 13057 2012-10-02 12:49:59 2003-04-07 12:59:11 21 95 3458 318 4393 11916 6165 94.20 19 31.33 CHANGED sahhlsppsclsp....sshhhhph.spp.llht.stcGphhuhtshCsHtGsh................lsts....tp......t......hpCshHGhpashp....GplhpsPs................hpth.sthphs .................................................................h...tpl.t............t.h..h.h...h........h....s....t.....t.........l..h...h...h....s........t.........s.....u...p.........l..h..uh.ts.hCsH..t..G.s........................................................lst..s.....ttp..................tt.............th.hCP.hH...G.h....pash.....s......Gph....h.p....s.Ps..t............................tt..................................... 0 1183 2635 3671 +793 PF00866 Ring_hydroxyl_B Ring hydroxylating beta subunit Bateman A anon Pfam-B_771 (release 3.0) Domain This subunit has a similar structure to NTF-2 and scytalone dehydratase. 20.50 20.50 20.50 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.64 0.71 -4.51 44 1346 2012-10-03 02:27:23 2003-04-07 12:59:11 13 2 779 109 317 1106 162 141.90 28 83.48 CHANGED +EAcLLDs.....pcacpWhs.lhs-DlpYahPhppsc.ptsps............ptshhas-s+ttLcsRVtRlposhuWupsPsoRTpHhloNl.ltts.psssplcVpSsahlaRsRhc...tpschasGpppchLRpsssu.....hclspRpllLcpssl.sp ...........................................................pEApLLDc.....tcaccWLs.hhs--...h.pYhhPspss....ttphspc.th................hshla.p-s+stLccRlhRl.c.o.shua.u.p.p.PssRT.pHhl.oNlp...l...h....pt.....t.......p.....s...s....t...........hp.VRsNahlh.c.sRtc...........p.ps.h.a.s.Gpp.h.cplccss-s.........h+lhc+pllLcpshltt................................... 0 51 148 245 +794 PF04068 RLI Possible Fer4-like domain in RNase L inhibitor, RLI Kerrison ND, Finn RD anon COG2042 Family Possible metal-binding domain in endoribonuclease RNase L inhibitor. Found at the N-terminal end of RNase L inhibitor proteins, adjacent to the 4Fe-4S binding domain, fer4, Pfam:PF00037. Also often found adjacent to the DUF367 domain Pfam:PF04034 in uncharacterised proteins. The RNase L system plays a major role in the anti-viral and anti-proliferative activities of interferons [1], and could possibly play a more general role in the regulation of RNA stability in mammalian cells. Inhibitory activity requires concentration-dependent association of RLI with RNase L [2]. 20.60 20.60 21.20 21.20 20.10 20.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.63 0.72 -4.36 80 889 2012-10-03 08:56:42 2003-04-07 12:59:11 10 13 472 2 602 859 65 33.10 36 7.40 CHANGED h+lAllch-....cCc.PKKCst.cht+hssl......s+sGcp ..........+lAlhsh-................+Cc.PK+Cst.cht+hs.l......lRhGp............ 0 200 348 505 +795 PF04437 RINT1_TIP1 RINT-1 / TIP-1 family Bateman A, Wood V anon Bateman A, Wood V Family This family includes RINT-1, a Rad50 interacting protein which participates in radiation induced checkpoint control [1], as well as the TIP-1 protein from yeast that seems to be involved in a complex with Sec20p that is required for golgi transport [2]. 24.80 24.80 25.20 25.10 24.70 24.70 hmmbuild -o /dev/null HMM SEED 494 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.68 0.70 -5.95 19 318 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 250 4 239 330 2 430.90 22 62.61 CHANGED hsl-hhlpPlclRFpYHFoup+.Tspl-K.PEaahshlhchlss.ssFhspplQPlhDc........hthshhss+ppFIsull.hlpcKlsspl..................plppc.phhsHLlcEllsFDpcl+psasY.s...............h..slplLs.cpsha-+WlplEcchAls+hcthlps.cshphp.pt.............l.sshssh+sscsAtph.cLLpslh-RhpsLsshs.clpFLhslQlplhcpFhppLppth.th.h................tphpssssL.+hspllsuspYlpphlc-WuscVaFl......p.t...............phscluscsou..................lFD-shsshc+.lchchpshIssslhcshcsth+sYh+.hspWsohssp......stt.shssSu.-lssshphLpsplshLppt.Lshsshtplh+plhtslppalassllhts.pFSpuGusQhthDh.cpLhslhsh..........stpspthhp+LsEulhLLsLphsts...tphh.tt.tc........................spsspshLsEl..ulppLop.........s-spslLpRRs ...............................................................................hshphhhpPhthRFp...aHF.p.u.p.+...Tshhs......K.....P.....E.....aahs.lhphhpph.thhtphlpshhsp..........................t.....h.ss.h.pa..hpull....hlhpKltt..l.....................................................................p.h.tps..thhsHhlpphhtF-pp.lp.......ph....asYss.......................................................s.shplL.....ptp..h....h..ppWlphEpc....hAhp+hp.t.h.h.ps....ssh....p...p...........................t..s.ph+..ss.sA.phhpLLpslp..........c+.apsL.phs.clpFl.plQhtllcpah.pLhph................................................................tt.pt.sth.phstlhsuspaltphl...p-Wup..p.hhFl............................p..............................thtphss.ptss.....................................................................lF-phhs.hpp.htpph.p.lhp...tlhpphptth..+sYhp...ppW..sh.tt................................hs.Ss.phs.hlt..hLppplt.Lppt.Ls.hs..........hhth.hptlhptlsphlhp.pll....h......ts...pFst.........sGstQhthDh.ps..lhslhp....................ht...p..s...p...thh.pl.p.-uhh.lLs.h..ht.t..................t....t...................................................................................hpthhtph....t.l.t..ls..........tph..lL.hh.................................................................................................................................................................................................................................................................................................................... 0 83 134 198 +796 PF01163 RIO1 RIO1 family Finn RD, Bateman A, Wood V, Mistry J anon Prosite Family This is a family of atypical serine kinases which are found in archaea, bacteria and eukaryotes.\ Activity of Rio1 is vital in Saccharomyces cerevisiae for the processing of ribosomal RNA, as well as for proper cell cycle progression and chromosome maintenance. The structure of RIO1 has been determined [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.31 0.71 -4.95 28 1707 2012-10-02 22:05:25 2003-04-07 12:59:11 17 19 847 16 1056 4738 997 172.30 27 45.74 CHANGED sVYpuhs.....tsup.....................phAlKla+sutssF+c.hccYlss-hRap..h.+tsh+pllchWAcKEa+NLpRltptG.l.VPcPlshpcplLVM-alG.tpGhsAPpL+-sp.......tcscplatcllp......hcthYpcspLVHuDLSEYNlLlpcs.clhlIDhuQuVphs.HPpAhpaLcRDlpNltpFFc++uss....hhshcpl .............................................................................................lahs.s........pt.......................hshKla+.......h.......s.h............h.......t.......F+......p.......h.......cph.......h.......s....h..........p.......h.......+.....................h.p...p...hs.......h...h.h......c....h....h.s........p...+...E.hp...sL.t............p..........l.....t.....p...........s..................G.......l...............s......P...c.........P........l........t.........h..........p..........c......p........l........l......l...M......-...h....ls........................G..........h.....s.....h....s....p....L......p.-.s.p.......................tp.h....t...p........h...a...t..p.llp................hh...t...h.....h......t..ps.....s.......LlHu...D..L..S.E...a...N.lL..l...............p.............p............s.....p..............h.....h..lI...D..h...s.......Q..s...V..........ph.....s....p.......s.............p..............A..............t.........h........h..pR..Dlpslt..p.aFt+hh..........h.................................................................. 1 337 599 872 +797 PF01000 RNA_pol_A_bac RNA polymerase Rpb3/RpoA insert domain Finn RD anon Pfam-B_172 (release 3.0) Domain Members of this family include: alpha subunit from eubacteria alpha subunits from chloroplasts Rpb3 subunits from eukaryotes RpoD subunits from archaeal 21.30 21.30 21.40 21.70 21.10 20.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.56 0.71 -3.91 59 7300 2009-01-15 18:05:59 2003-04-07 12:59:11 21 25 6097 183 1610 4907 2300 120.20 33 38.59 CHANGED VpI..su.......ltHchuhlshlpEDlhc....hlh.hKth...............psp-ps..hlpLpspGsup..................VhAuDlph............sssVcll....................ss-hhIspLscstc.lchchpsccGhGhs.Ac....................ao .........................hph..............VhHEa..SolsGV..pE.D..V.hc....IlLNl.Ktls....h.......................psppcp......hlplshp.Gsu.................................VTAuDIp.h....................................................sucVEIl....................................................N..P-.hhIssL...s..c............s.s..p..lphclplp+GRGYlsAp..tppptt.....h........................................................................................................................... 0 566 1019 1357 +798 PF04997 RNA_pol_Rpb1_1 RNA polymerase Rpb1, domain 1 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 1, represents the clamp domain, which a mobile domain involved in positioning the DNA, maintenance of the transcription bubble and positioning of the nascent RNA strand [1,2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.06 0.70 -5.18 24 12239 2009-01-15 18:05:59 2003-04-07 12:59:11 7 127 9183 179 1981 10137 4485 254.70 33 31.57 CHANGED h+clcplpFulhSP-pI+.phShsclpps-ohp.sp...pPcpsGLhD.+lGs.........hcpch.............hCpoCthphtc...C.GHaGHI-LspPVaHIGahctlhplLcplChhCuplhhsppt......hhhp.t.......thsphp+hphlsphstppshpcts...............................................................thtp.pshhtc.G.hslhthhc........tcphpcpht......hps.pshplhc+Isccchhlhuhsspts.+P-hhILoslPVPPPslRPuV.h-utp...hu-DDLThpltcIlppNppL++hppp.uAPptllpcphphLQ.plsshhDN..thsuhs.st.pssRPlKSlspRLKGKpGRhRsNL ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p..phGaIcL.s...P.l..Hl.......h.ah..pt.l.p...ch....s..h.h..........s.h...........t...hp.........s.........-......p..............h..........h..h..h..ht...........................ss.h.p.p.h.ph......l.h....p......p.....h...h...p.....h...h...p.c...hs......................................................................................................................................................................p...............t....t......G...............t.l..h....t..h...hct......................t..pph....p...hp..................hp.s.tt..s...p....h....hp..+....h.........cc.............h.......h..........h.............u.......h...................t.........s..........h............s.........+..........P.........-.........W........M.l.........l.s.l.lPV.PPslR....P.........l..l..D....Gut......hu..p....sDL...s..hhtc......l....Ip.......t.......N....s...............p......l..............p..+...h........................p.........................s....u.........P.................t..............h................l............l..p..................p...........................cp.........h............L...Q..........t.Vss.h.hDN......s.h...t.................s.....s.............h.........................t......t.....+.sh.Kuht..ph.lcGKp.....G.........RhRtsL..................................................................................................................................................................................................................................................................................................................................... 2 707 1252 1678 +799 PF00623 RNA_pol_Rpb1_2 RNA polymerase Rpb1, domain 2 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 2, contains the active site. The invariant motif -NADFDGD- binds the active site magnesium ion [1,2]. 23.40 23.40 24.50 23.60 22.90 23.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.15 0.71 -4.29 33 13383 2009-01-15 18:05:59 2003-04-07 12:59:11 15 145 9364 137 1887 9901 3260 133.80 57 17.84 CHANGED GKRVDaSuRoVIss-PsLclcplGlPhphAhpLphPphlsphNhcclpphh.pt.p.a.th..h..p.pus+phlptt.........s.plh.................tlhR+lhcuDlVLhNRQPoLH+hSIhuHcs+llpt...+ohRLp.sVCssYNADFDGDEMNlHlPpo.pA+uEuhpLhhs .............................................GKRVDYSGRSVIlV.GPp.LpLaQCG..LP+c.hAlELFps.Fllp.tL....t.pp...lAs...................................................................................s....l........t..s..........A.......K.....ph......lccp................cs..hVW............................................................-lLpEVh.p..sHPVLL.....NRAPTLHRLGIQA..........FpPh....LlEG......+AIpLH.P.LVC....p.uaNAD...F...........D........G.DQ........MA.V.H.V.PLShEAQuEARhLMh.s............................................... 0 671 1188 1598 +800 PF04983 RNA_pol_Rpb1_3 RNA polymerase Rpb1, domain 3 Bateman A anon Pfam-B_288 (release 4.2) Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 3, represents the pore domain. The 3' end of RNA is positioned close to this domain. The pore delimited by this domain is thought to act as a channel through which nucleotides enter the active site and/or where the 3' end of the RNA may be extruded during back-tracking [1,2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.91 0.71 -4.50 112 9996 2009-01-15 18:05:59 2003-04-07 12:59:11 13 133 7655 137 1909 7790 3140 147.50 24 14.34 CHANGED pIloPpsGcPlhussQDhlhGsYhlTtcc.........................sFhspp-shphhtts............................................................................................................pssIhhs.pt....................................haou+phhuhll.t..............pl.hpph..............................s.hhlppu.lhts.lsct.hu........tpshusllphlhcchG.ptssphlsplpplshtahtptG.holGlsDh ..................................................................................................................NllsPtsGcPlhsssQDhl...lG.hYhhThcp.........................................................................................hFhshpcsh.hh..h.................................................................................................................................................................................h..pst..l.hh....t..............................................................................................................hhhhc..p..hh...uhh.l.s..........................pl...th.........................................................................................................h.hlpps...h.................................s...........................pshupllphh.hp..c...........h....Ghp..ssthhsplppl....s.htahhh...tG.holGlsDh........................................................................................................................................................................ 0 675 1204 1625 +801 PF05000 RNA_pol_Rpb1_4 RNA polymerase Rpb1, domain 4 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 4, represents the funnel domain. The funnel contain the binding site for some elongation factors [1,2]. 24.80 24.80 24.80 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.59 0.72 -4.31 106 8924 2009-01-15 18:05:59 2003-04-07 12:59:11 12 128 7522 137 1812 6872 2412 93.40 29 8.38 CHANGED lppscc.scl...ph.upshccshc.....thhpph.hs..pc............tp.................NslhhMstoGu+GShhNlsQlsuhhG..ssps.p.........p.h.............php.h.ptPhspGFh ........................................................h....cchpcl.....sh.spstcpshc......shhspl...s.....p.pp..................t..t....t.t.....................................NslhhMus.SGA.+GS.............sh.....plpQlsuhhG.hu.........h.s.t.......................................h.......................................................... 0 647 1141 1537 +802 PF04998 RNA_pol_Rpb1_5 RNA polymerase Rpb1, domain 5 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 5, represents the discontinuous cleft domain that is required to from the central cleft or channel where the DNA is bound [1,2]. 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null --hand HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.68 0.70 -5.14 27 9187 2009-01-15 18:05:59 2003-04-07 12:59:11 12 161 6820 151 2086 7646 5482 395.80 25 36.28 CHANGED GLsspEFFFHsMuGREGLlDTAlKTAcoGYlpR+LlKshEDlhlpYDsolRsustpllQhhYG-DulDshphcthshhhh.hhsh.hptphhhshtp.hhhttt.........................................................................................................................................hthh.csthtphplhp.hphsppthpphhshhpspappsllpsGEuVGhlAAQSIGEPuTQMT.LpTFHhAGsuupssT.GlPRl+Ellplspstppsshsshhhhshspcptphhthphttpphthtthhhtt.hhhs.s.tpshtpptt.hhhphhhh.pp.tpt.hh.......hh.......h.h......................................................................................................................................................h......t....t...p.hhtttttthhtpththpptppthshpthtthtphtth.hphhhhhtsspthhhthhhsptthsspshh.htstchllph.pGsslptlhp......hps...lpss+phoNp..lhphhchlGIEAuRpsllpElpplhttcGhtlshRHlsllsDhMThcGhlhul...oRtGlspp.pShh.h ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................GLsshEaFhpshGuRc.............G.L.s.DTAl+T.ApoG......YLpR..RLVcsh.p-lhlp.cs..s....s....p....p....p....s....h.........h.....h........h..h...G..t..-hhtsht.ch.thhhhp.hhts.ptthhh..tt.h..t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.h.ttthpthhlpsshsppsp.hhshhshspshhptphlp...GEA....V.GhlAA...QSIGEPGTQhT.h+TFH.....h....u..Gs..u...s...t....s....................h.s....s..........p.......hp.p.......hp.h......p.................h...t..............h..t....................................................p....t....t..t......h....h.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...h.h.....h......t.............t.....................t.......h.........................t.................h....h.....ht....p.....l.p.pV..ap.........pG..h..t....s+php.h..lp..ph..h..p.p..ht....l...t.s.t..p.o............h.h........h.p.....h...................h............s......ch...................h......t...................ht..hp.....lhu..l...T+.tu.ltpp..ohh..s......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 752 1323 1780 +803 PF04992 RNA_pol_Rpb1_6 RNA polymerase Rpb1, domain 6 Finn RD anon Pfam-B_288 (release 4.2) Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 6, represents a mobile module of the RNA polymerase. Domain 6 forms part of the shelf module [1,2]. This family appears to be specific to the largest subunit of RNA polymerase II. 21.10 21.10 21.90 22.40 20.60 19.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.11 0.71 -4.55 153 1575 2009-01-15 18:05:59 2003-04-07 12:59:11 9 78 1215 90 258 1468 151 157.90 39 18.62 CHANGED hDGshlE.pQpl-slphSsp...pF-............+ca+lDl......s.spshh...........tshlptsh..p-l.tussc...h........QphL-cEacQLppD.....................RchLR......plhssu-.sphs..LPlNlpRlI.NApphF+...I....cppps.SDLpPhcllpsVcpLhc.+.LllVp..............................G....cD............................tLS.........pEAQpNATLLFphhLRSpLAsKRVlp.Ea+LsppAF-WllGEIEsRFppu ............................hDuhhlE..QplshlphSsp...tF-...................+ca+hDh.............s.spphh.......................pch..hp.s......p-l..hsshp..s......................p.L-pEa-pLhpD.....................RchLR.........p.l..h..sp....u-...pph................LPhNltRhl.sApphFp....l.......ptp.t...osLp.Ph.........c.....Vl.s...VppL...c+..hhlVt.................................G.....pD.............................lS.........hpApp.NATlLFphhLRSpLs.Kcl.sp.Ea+Lsp.AF-allGElEsRFtpu................................................................. 0 98 149 223 +804 PF05001 RNA_pol_Rpb1_R RNA polymerase Rpb1 C-terminal repeat Finn RD anon Manual Repeat The repetitive C-terminal domain (CTD) of Rpb1 (RNA polymerase Pol II) plays a critical role in the regulation of gene expression. The activity of the CTD is dependent on its state of phosphorylation [1]. 20.30 6.20 20.50 6.20 20.20 6.10 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.46 0.74 -6.69 0.74 -3.36 99 4609 2009-01-15 18:05:59 2003-04-07 12:59:11 8 80 222 10 2717 4635 45 14.20 79 15.78 CHANGED SP..s.SP.....s..Y..SP...o.SPsa ............SP..T.SP.......u......Y..SP.....T..SPuY.............. 0 913 1434 2266 +805 PF04565 RNA_pol_Rpb2_3 RNA polymerase Rpb2, domain 3 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 3, s also known as the fork domain and is proximal to catalytic site [1]. 21.00 10.30 21.10 10.40 20.90 10.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -8.99 0.72 -4.22 31 16867 2009-09-13 23:49:08 2003-04-07 12:59:11 11 131 12522 133 1834 13597 2855 63.90 55 9.45 CHANGED Qhh-phN.loploHhR+lsh....Guls+cptshcsRclHsopaGhlCPl-TPE.GtssGLlssLuhhscls .........................Qhhsph.s.lSp.loHhR..Rls............uuL..s.R-p..t.thc....sRclHsTHa....GhlCPhETPE.G..shGL.lpsLuhhupls.................... 0 652 1156 1555 +806 PF04567 RNA_pol_Rpb2_5 RNA polymerase Rpb2, domain 5 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 5, is also known as the external 2 domain [1]. 20.40 18.00 20.40 19.00 20.30 17.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.28 0.72 -3.99 64 7512 2009-01-15 18:05:59 2003-04-07 12:59:11 12 60 5291 96 702 6482 353 64.00 49 13.61 CHANGED apsLlppG..llEYlDsEEEEsshIuhs..th.......................................stp.sppaTHhEIaP .............WcsLlcsG.....llEYlDAEEEETsMIsMTPEDL-...R..ptsh...............................................................ttt..p.hht.hsss..sHhaT..HCEIHP................................................................... 0 239 399 587 +807 PF00562 RNA_pol_Rpb2_6 RNA polymerase Rpb2, domain 6 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain represents the hybrid binding domain and the wall domain [1]. The hybrid binding domain binds the nascent RNA strand / template DNA strand in the Pol II transcription elongation complex. This domain contains the important structural motifs, switch 3 and the flap loop and binds an active site metal ion[1]. This domain is also involved in binding to Rpb1 and Rpb3 [1]. Many of the bacterial members contain large insertions within this domain, as region known as dispensable region 2 (DRII). 23.40 23.40 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.31 0.70 -5.72 117 16703 2012-10-01 19:23:01 2003-04-07 12:59:11 23 139 12411 138 1943 12966 6003 307.10 35 46.19 CHANGED slhAphhshspaspusRh.......................hhps...scps.t...................................................................hhshs.phpRssps.s...phsQ+P..lVpst.tlctsp..............hs-lshGpNslVAhhsasG.YNhEDulllscphlccshasSlahcp.....hps..........ccpc..hG..-chhpphsss...........scpshppL..Dcs....Gll.............................pl...................Gst......VpsGDlLlGKh.....s............hpthhsppspp.h+DsSlphtts..pp.GhV.cVtl.............................................................................................................................................................................................................................................p.sss.s.....hphl+VhltppRp.plGDKhuuRHGpKGlluhlhPpEDMPahp.cGh..ssDlllNPhGlPSRMsl...................GQllEshhGhAush...........................................................................pG.hhhssssFs...........................................................................................p..................ppl...............................tchLp.....................c.....t.....................................................................................Gpp..............hlasGpTGc.h.......cs.lhlGhhYh.KLpHhVs..DKhHAR.upGPhs..hl.....TpQPltG+up .............................................................................................................................................................................................................................................s.hhuph.shl...sp......us+h.......hhts.....ttt.t.....................................................................................................................h.h..h.a.p.RoNpss.....shsQ+P..hVp..hs........l...ctsp............hs..........t......hs.ELshG.....pNhlVAhhsa.....pG..YN.EDul.........lhspp.l.cshapShahcp...h.............................p.pp...........t....p.c...hh.t.c..p...............................ttthhppL.D.c.p......Glh...................................................Gs.......l......t.t..c..lllu+h.................................................t.h...t..p......h+-...sh...h.....s.......t..Ghl....tl.h.......................................................................................................................................................................................................................................................................................................t....s.......hl+lh..hh.p+h.p...GDKhuuRHGp.K...Ghlu.........hhh.....pDMP..a..........pG.........h....s..........s..DlhlNPhulPSRMsl...................uplhEh.hhuh..sut...........................................................................................................................................................................s......h..t..sss.Fst................................................................hp.........................................t.l.t.l.....................................................................................................................................................................u...hh.s..Gh.oGp...h............h..................................................................................................................................................................................................................... 0 704 1234 1655 +808 PF04560 RNA_pol_Rpb2_7 RNA polymerase Rpb2, domain 7 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Rpb2 is the second largest subunit of the RNA polymerase. This domain comprised of the structural domains anchor and clamp [1]. The clamp region (C-terminal) contains a zinc-binding motif [1]. The clamp region is named due to its interaction with the clamp domain found in Rpb1. The domain also contains a region termed "switch 4". The switches within the polymerase are thought to signal different stages of transcription [1]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.99 0.72 -3.72 134 7219 2009-01-15 18:05:59 2003-04-07 12:59:11 15 94 6073 138 1845 5823 2697 79.50 52 6.84 CHANGED GGQRFGEMEVWALEAaGAAasLpEhLTlKSD....DlsGRsphhps...................IscGcsh.css.lPESF+VLl+ELpSLulslclhtp ........GGQRFG.EME.......VWALEAYGAAa.sLQEhLT.l.K.SD......D.V....sGR..s+hYcs.........................................................................Ilc..G...c...s....h.....cs...G..hPESF+..VLl+ELpSLulslcl..p................................................................................................... 0 651 1163 1560 +809 PF03874 RNA_pol_Rpb4 RNA polymerase Rpb4 Finn RD, Bateman A anon Finn RD & COG1460 Family This family includes the Rpb4 protein. This family also includes C17 (aka CGRP-RCP) is an essential subunit of RNA polymerase III. C17 forms a subcomplex with C25 [5] which is likely to be the counterpart of subcomplex Rpb4/7 in Pol II [4]. 21.60 21.60 21.90 22.40 21.40 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.49 0.71 -4.09 88 790 2012-10-03 03:05:55 2003-04-07 12:59:11 11 11 467 61 545 755 50 113.00 23 70.54 CHANGED hLo.sEshplLp.phpppppt...................................................................phsplhp..cslc.Ylp..p......................huchppt-s.........................spplhptLp......php........L.pchEthplsslhPpshsElpsllsphcp....................................ch..........s-...-plpplLchlpchh ..............................................................................................................L...Esh.lLp.phpppppt................................................................................................ttptphs.phhh..colp.Ylp.p......................hu+hp.spEs....................................................................lppl.h.phLp..............php..............L.p..chEh..h......pl.......s...NlpP..po..spEhpsl...ls.ph.ct....................................+..h..........s.-.....-plpplLc.lpp..h...................................... 0 176 309 446 +810 PF01351 RNase_HII Ribonuclease HII Bateman A anon Bateman A Family \N 20.60 20.60 21.20 21.00 19.90 20.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.10 0.71 -4.67 14 6064 2012-10-03 01:22:09 2003-04-07 12:59:11 13 19 4789 32 1433 4292 2775 182.20 35 75.83 CHANGED hGlDEsGRGslhGPlVsAushls.cp.....h.thGlcDSKKLocp+RptLtchIp...................pthhuhtlsh..hpssphsthsltpsshhshhc.tlpph.slpsc..tlhlDu.ps.....Pt.hththps.lht.............uDuths.luAASllAKVpRD.hhh-hhpchsth.shspssGYsoc.+tptlhchssss.......htRhoFtss ......................................................sGlDEsGRGsLsGPVVs..AAV.....l......L........s....s..p..........................................h............G...l....s..DSK+.L..o.c.p.+.R.p..p.LhptIp....................................................................pp....sl...sh..sluh......spsp.c.I.....D..p.......l.....s..I..hp...A.s....h....hA...M....t..........c....A....l.......t.......t.......L........s......h......p......P..c.....................hl..L..l...Du.ph...........................st...h.....s...h......s...t...p...s...l....lc..................................................GDupsh.sIAAASIlAKVsRD..th.......M...h...............c.................h............s..........p.....p...............a..........P............t..........Y...............s...............a.............s...........p......ptGY.s.T.ptHh.pslt..chGsss..........h.HRpSFts................................................................................................................................................. 0 496 923 1217 +811 PF01138 RNase_PH 3' exoribonuclease family, domain 1 Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family includes 3'-5' exoribonucleases. Ribonuclease PH contains a single copy of this domain, and removes nucleotide residues following the -CCA terminus of tRNA. Polyribonucleotide nucleotidyltransferase (PNPase) contains two tandem copies of the domain. PNPase is involved in mRNA degradation in a 3'-5' direction. The exosome is a 3'-5' exoribonuclease complex that is required for 3' processing of the 5.8S rRNA. Three of its five protein components, Swiss:P46948 Swiss:Q12277 and Swiss:P25359 contain a copy of this domain [1]. Swiss:Q10205, a hypothetical protein from S. pombe appears to belong to an uncharacterised subfamily. This subfamily is found in both eukaryotes and archaebacteria. 21.00 21.00 21.10 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.70 0.71 -3.84 192 13598 2012-10-03 01:04:38 2003-04-07 12:59:11 16 86 4722 247 4120 10174 6384 132.80 29 40.48 CHANGED phRslplch..G...hhppAs...............GSshlphG..................s..TpVlsslpsst...t..............................tt..s..............lslphpht...shussph.......................pp..st....ssccph..........hup....l.......l............ccslcssh..........h...........phh.......hplplshpllss................DG............................................shhssul....suushALhsuslP ........................................................................................hRslslcs..G......hh.t.p..u.c.................GSs.l.h...phG......................-....Tp..l.L.s..s.so..hsp...ts.........................................................................hh.....-.hs........................t...ltscYph.........sausGch...................................................tpt...Gp......................suc+ph..................thuR.......L......l......................................................cR.u.....l..+..slhs....................p..................................................................tth.....hsl.p.ls....s...pl.lp..u.....sG....................................................us.p..h..A.u.l....sG.uolALhsuslP............................................................................................................................. 0 1408 2573 3461 +812 PF03725 RNase_PH_C 3' exoribonuclease family, domain 2 Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family includes 3'-5' exoribonucleases. Ribonuclease PH contains a single copy of this domain, and removes nucleotide residues following the -CCA terminus of tRNA. Polyribonucleotide nucleotidyltransferase (PNPase) contains two tandem copies of the domain. PNPase is involved in mRNA degradation in a 3'-5' direction. The exosome is a 3'-5' exoribonuclease complex that is required for 3' processing of the 5.8S rRNA. Three of its five protein components, Swiss:P46948 Swiss:Q12277 and Swiss:P25359 contain a copy of this domain [1]. Swiss:Q10205, a hypothetical protein from S. pombe appears to belong to an uncharacterised subfamily. This subfamily is found in both eukaryotes and archaebacteria. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.01 0.72 -4.04 107 12290 2009-01-15 18:05:59 2003-04-07 12:59:11 10 60 4627 243 3309 8916 5491 69.10 29 20.56 CHANGED shssuloluhl..s............sp.......hllDPsh.pE-.................htpu...slslsh........ssst..........ph.slhpt........ss....st....lspcplhcslchutps .................................................sslAuluhGl.l...s.....................................sp.............hlh..Dhph...pE-..............................hu.ts..........Dh-hhV.........AGop................cs.h...sshtt................................stth.......tloc-.hhpALthA+p.................................................. 0 1115 2087 2755 +813 PF02755 RPEL RPEL repeat Ponting CP anon Ponting CP Family The RPEL repeat is named after four conserved amino acids it contains. The function of the RPEL repeat is unknown however it might be a DNA binding repeat based on the observation that Swiss:Q9VZY2 contains a Pfam:PF02037 domain that is also implicated in DNA binding. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.67 0.72 -6.75 0.72 -4.50 54 1765 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 175 8 892 1495 0 25.80 41 10.30 CHANGED cpLp++lspRPsh-ELlc+sILppps ........pLp+KLupRPshcELhc+sIL.tp..... 0 145 243 514 +814 PF04059 RRM_2 rrm_2; RNA recognition motif 2 Wood V, Finn RD anon Pfam-B_4981 (release 7.3); Family \N 21.00 21.00 21.00 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.47 0.72 -3.98 4 387 2012-10-02 20:46:34 2003-04-07 12:59:11 7 16 140 0 313 408 9 92.10 44 16.81 CHANGED RTTLMIKNIPNKYTppMLlutIDE+sKGTYDFLYLPIDFKNKCNVGYAFINhlpPppIlsFhcAFNGKpW-KFNSEKVAoLuYAcIQGKsALIu+FQ ...........................RTTlMl+N..IPNKaop.p.hLhshl..D.c.........p.....p..............p............G........p.......YDF..h.YLP.ID........F............p..........N.c......C.....NlGYA..FINhhsstthh............tFhp......sFp..........sp+W........p.t..F.s.........S.......c..........K..lsplsYA+lQGp.psLlt+Fp................................ 1 154 233 283 +815 PF01137 RTC RCT; RNA 3'-terminal phosphate cyclase Finn RD, Bateman A anon Prosite Domain RNA cyclases are a family of RNA-modifying enzymes that are conserved in all cellular organisms. They catalyse the ATP-dependent conversion of the 3'-phosphate to the 2',3'-cyclic phosphodiester at the end of RNA, in a reaction involving formation of the covalent AMP-cyclase intermediate [1]. The structure of RTC demonstrates that RTCs are comprised two domain. The larger domain contains an insert domain of approximately 100 amino acids [1]. 20.60 20.60 20.70 20.80 19.90 20.40 hmmbuild -o /dev/null --hand HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.31 0.70 -5.57 29 1194 2012-10-02 15:27:11 2003-04-07 12:59:11 16 17 881 19 569 985 32 309.70 31 89.51 CHANGED lDGShtEGGGQlLRouluLSsloGcPl+I.hsIRAsRsp.PGLppQHLsulcslpclssAplpGhplGSppLhFpP...........uplcGG.shphDl......GTAGSlsLlLQslLPhhhFucpssclplpGGTcsthuPslDal+pVhLslLc+hGhpsc...lcll+RGaYPcGGGcVhhplp........Psp.hs.lphhchupltplpGhuhssplssphupcptcssttthsphh.tshhhttt........................ssusshhhshpppsshhuusulGc+GhsAEtVGccAAppLlcplpsuusVDcahuDpllhahALuss...phpsuclos..HhhTsltllcpF.hshca.clct .......................................................................pGs.tcGGGQllRpuLsLShloGpPhpI.ppIR........u..s..............R...s...............p...P..............GL.............h..............ppHlosl+hhscl.........s.s.....u..s..l..........u.....s.............phuup.plhFpP....................................Gt.l.p..G......G....php.....ash................so.A..u.ShsLlLpsl.LshhhF..........u................c...................s.......s.................p.....lpl..p.G......G.T....s..s..s.....uP....s.....hDalcpl.hhP.l....L....t.+.....h...G.l...p.tp.................hp.l...h++...G....a............h...P.....t....G....G....G.Vthpls....s.st.t.h.p.slpLh-pGplhphpG.shhssls.phspRthtshts.hthh..shh.hspp................................................................................................................................................................................................tssssss.hthpspshttth.shGpptsssEslutpsspplhc.ltpsusVsc.ahtDplll.MAL........u....s........s..............................u........chp...l........u.......p.........o..........HhhosltllcpF.hsspFt...h........................................................................................................................................................................................................ 0 191 322 470 +816 PF05189 RTC_insert RNA 3'-terminal phosphate cyclase (RTC), insert domain Finn RD anon manual Domain RNA cyclases are a family of RNA-modifying enzymes that are conserved in all cellular organisms. They catalyse the ATP-dependent conversion of the 3'-phosphate to the 2',3'-cyclic phosphodiester at the end of RNA, in a reaction involving formation of the covalent AMP-cyclase intermediate [1]. The structure of RTC demonstrates that RTCs are comprised two domain. The larger domain contains an insert domain of approximately 100 amino acids [1]. 20.60 20.60 20.80 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.14 0.72 -4.01 99 1028 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 852 19 465 818 15 100.70 29 28.85 CHANGED h-pGplp..c....lpGlu..asspl.ssplApR.hcuAcp.hL.....th.h.-lplps..................tpsuh.usGsGlsLhAcspp..shhuusul..................GpcGhsAEcVGccAAppLlcpl.psu .......................h.-pGplhp..hRG.s............h..s..s.s.V..shpl..AcR.lssstuhh............sh.l.t-hpIps............................hspsp..u..PGsuloLhsE.....ops............h.hhthpsl...................................................Gcctss..AEsVutpsucplhc.ltp............................. 0 153 268 382 +817 PF00301 Rubredoxin rubredoxin; Rubredoxin Finn RD anon Prosite Domain \N 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.78 0.72 -4.09 43 2943 2012-10-03 19:45:42 2003-04-07 12:59:11 15 40 1967 86 844 2104 307 46.10 48 23.38 CHANGED +ahCpl..CGYlYDsspGDstpslsPGT.Fc-LP--WsCP.C..Gss.KcpF ........pa.Cpl..C.G.alYD..s..stG-P..........p...p......s..l..s.PG.T.......a...p....-l.P.-.c.WhCP.C..usu.KssF........ 0 288 568 732 +818 PF02759 RUN RUN domain Bateman A anon [1] Family This domain is present in several proteins that are linked to the functions of GTPases in the Rap and Rab families. They could hence play important roles in multiple Ras-like GTPase signalling pathways. The domain is comprises six conserved regions, which in some proteins have considerable insertions between them. The domain core is thought to take up a predominantly alpha fold, with basic amino acids in regions A and D possibly playing a functional role in interactions with Ras GTPases [1]. 20.70 20.70 21.00 21.20 20.40 20.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.78 0.71 -4.40 50 1686 2009-01-15 18:05:59 2003-04-07 12:59:11 14 59 103 6 970 1562 5 138.00 21 18.38 CHANGED sLssslEtlLpHGL+t...........................................................................................................p..thhhtpppshashhpph.................tph.hsssppl...hpplp.plpplpss..........................................................pu+......................t+AWl+hAL.c+hLspalphLhpspp...hl.............s........paYpstAllhsstts......hlhslLsuLss.lsFslshps ..........................................................................................................................................................................................hssslEthltHGL+t...................................................................................................................................................................................................p....hhhh..t.p.pp..shas..hh.pth.................................................tph...hsp.tp.ph............hpp.l......p....sl...pp..l..p..ss....................................................................................................................................................................................hu+.......................................t+Aalp..hu...L...........c..+h....Lspa.l....ph.Lhp..spt.....hh..................................p.........c.aY.c.t.t.Ah....l..h...sppts..........lhshL.hsL.ss.lpashsh.............................................................................................................................................................. 1 257 332 591 +819 PF00853 Runt Runt domain Bateman A anon [1] Domain \N 20.50 20.50 21.10 26.40 19.70 19.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.74 0.71 -4.71 6 488 2012-10-03 00:25:27 2003-04-07 12:59:11 14 7 118 24 206 469 0 119.90 74 31.17 CHANGED ERols-hlsEaPGELV+TuSPsFlCSsLPuHWRSNKTLPlAFKVlALGEVsDGThVTI+AGNDENaCuELRNsTAVMKNQVAKFNDLRFVGRSGRGKSFTLTIoIuTsPPQlATYs+AIKVTVDGPREPRp+ppp ............................Rshs-hlu-Hsu.ELVRTsSPsFLCSlLPoHWRsNKTLPl.AF.K.........VVAL..G..-......VPDGT...l............VTVhAGNDENYsAELRNAoAlMKNQVARFNDLRFVG....RSGR.....GK.S.FTL.TITVh.TsPP.Q.VATYpRAIKVTVDGPREPRp+pp......................... 0 47 63 141 +820 PF00665 rve Integrase core domain Bateman A anon Pfam-B_10 (release 2.1) Domain Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site [1]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.26 0.71 -4.00 230 47519 2012-10-03 01:22:09 2003-04-07 12:59:11 21 846 4253 244 8847 47451 3435 111.40 27 21.34 CHANGED psppshphhphDhs...hhp..s.t................................talhshlDshS+hhhshhhppc........ssptshphhpthhtthtsh...............hhlpoDpGspassp...............phpphhpphG..lphphspstsPpssuhsEphpppl+pph ...........................................................................................s..tts.thap.hDhs......hhp.......t......................................th...lh.sh.l.....c...s..h.S...t.h...l....u.......s..l.s.sc...................................sp.p...s..s..h..t.h...L.......c.............l...t...s.p.h.s...............................................sphl.+oD.s.G..s.p.Fsot.........................................th.p.t.h.....t...h.h..t.G......l...p..p......p.h....u........h.s.......h.......s.......P.p...s......p...G.h...sEsh..sppLKp..h............................................................... 0 3423 5434 6849 +821 PF00077 RVP rvp; Retroviral aspartyl protease Eddy SR anon Eddy SR Domain Single domain aspartyl proteases from retroviruses, retrotransposons, and badnaviruses (plant dsDNA viruses). These proteases are generally part of a larger polyprotein; usually pol, more rarely gag. Retroviral proteases appear to be homologous to a single domain of the two-domain eukaryotic aspartyl proteases such as pepsins, cathepsins, and renins (Pfam:PF00026). 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.29 0.72 -4.09 50 124336 2012-10-02 15:32:34 2003-04-07 12:59:11 15 164 892 1164 420 112085 121 94.30 86 28.21 CHANGED sh.pcPhlplplsGp.............hpsLLDTGADcollpptphshp......hpsphltGlGGth.pscphpphhlplttcphps.....shllhP..sPls......llGRslLsphsspLs ...............................................T.LWQRP.LVT.IKIGGQ...............LK.E.ALLDTGA.DD.TV.L.....E....E...h......N...L....P....G....+.....................W.K...P....K........M.I....G....G....I.......G...GFI.......K..V....R....Q.......Y.......D....QI........I.E.....I...CG.HKAIG.............TVLV.G..P...TPVN..............I.IGR.NLLTQIGCTLN..................................................................... 0 190 263 349 +822 PF00078 RVT_1 rvt; RVT; Reverse transcriptase (RNA-dependent DNA polymerase) Eddy SR anon Published_alignment and HMM_iterative_training Family A reverse transcriptase gene is usually indicative of a mobile element such as a retrotransposon or retrovirus. Reverse transcriptases occur in a variety of mobile elements, including retrotransposons, retroviruses, group II introns, bacterial msDNAs, hepadnaviruses, and caulimoviruses. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.44 0.70 -4.98 152 172360 2012-10-02 12:54:00 2003-04-07 12:59:11 22 1060 4989 405 8257 157991 1195 171.90 64 42.06 CHANGED l.Kps...........uphR................ph+hlschhtp.............................hp.sphuh.shtsh.................htps.phhhplDlpsuFpplshs.hhp.hhpshshst..........................................................ttphphpslPQGhhhSPhlaphhhpplhp.lpp.........................................................................thhhhtYsDDlllhsps.tpp..........hpphhptltphlpp...hGlpls.cKsphh.......................................tpphcaLGhpl ........................................................................................................................................................................................IKKKD...........STKWRK..L.........................V.D..F.R.E..L.N..K.R.T.Q.D.........................................................................F...W.E...V...Q..L.G..I..PHP.A.GL....................................................................K.KK....KSV...T..V...L.D......V..G..D..A...Y........F.....S........V.....P.....L....D...c.....-....F.........R....K..Y....T...A...........F....T........I...P.........S......h....................................................................................................................................................................................................................................................N....NE.....T....PG....I.......R...YQY.N.....V....LP...Q..G...............W..K...G...S...P...A..I.F....Q....u..S....M.T..K.....I....L..E....P..FR+pN...............................................................................................................................................................................................................................P-I...V..I..Y...Q.YM...DD..L....Y.....V...G....S..DL...EIGQ...................HR.s.K....I..E....E..L.R.p......HL....L+.....W......G....F..o.....T.....P...D.....K....K..HQKE..................................................PPFlWMGYEL.............................................................................................................................................. 0 3604 5373 6662 +823 PF03501 S10_plectin Plectin/S10 domain Barker W, Wu C, Bateman A anon Pfam-B_2138 (release 7.0) Domain This presumed domain is found at the N-terminus of some isoforms of the cytoskeletal muscle protein plectin as well as the ribosomal S10 protein. This domain may be involved in RNA binding. 22.20 22.20 22.20 23.00 21.90 22.10 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.25 0.72 -4.13 39 605 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 384 4 306 562 4 93.60 56 26.87 CHANGED lPKpsRptIYcaLFcEGVlVAKKDhphs.pHs-........l...slsNLpVl+shpSLcSRGaV+EpFuWpaaYahLTsEGIcYLRcaLaLPs-.lVPuTh++pspst ...........hPKpsRhtIYch.LF.+.E.GVhVAKKDhphs...KHP-..........l...slsNLpVlKAhQSL+S+G.Y.VKEpFuWpHaYWaLTNEGI-YLRpYLHLPsE.IVPATL++pt+............... 0 106 165 242 +824 PF01479 S4 S4 domain Bateman A anon Medline:99193178 Domain The S4 domain is a small domain consisting of 60-65 amino acid residues that was detected in the bacterial ribosomal protein S4, eukaryotic ribosomal S9, two families of pseudouridine synthases, a novel family of predicted RNA methylases, a yeast protein containing a pseudouridine synthetase and a deaminase domain, bacterial tyrosyl-tRNA synthetases, and a number of uncharacterized, small proteins that may be involved in translation regulation [1]. The S4 domain probably mediates binding to RNA. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.81 0.72 -4.53 155 36010 2012-10-01 23:15:27 2003-04-07 12:59:11 20 39 10093 225 6709 24712 7637 47.10 28 18.58 CHANGED hRLDphl..hchshspopppAcplIppGcVtVNGchls.suhtlp..ss-hl ..................RLDphl.......hch...u..h..s..s....oRspAc.pllpp..G..c.l.t..V..NG....c....h.lp..suhplp..stDhl...................... 0 2224 4271 5635 +825 PF04382 SAB SAB domain Bateman A anon Bateman A Domain This presumed domain is found in proteins containing FERM domains Pfam:PF00373. This domain is found to bind to both spectrin and actin, hence the name SAB (Spectrin and Actin Binding) domain. 20.90 20.90 20.90 20.90 20.70 19.40 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.27 0.72 -4.27 6 436 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 42 0 117 282 0 48.50 54 6.12 CHANGED Khc-LDKoQD-llKHQASISELKRoFhEo..sspsRssEWE..KRLST.SPhR ..................hc-LDKsQ--lhKHQASISELKRsFhEo..sspsRssEWE...KRLoT.SPhp....... 0 4 12 42 +826 PF03399 SAC3_GANP SAC3/GANP/Nin1/mts3/eIF-3 p25 family Mifsud W, Moxon SJ, Waterfield DI, Finn RD, Bateman A anon Pfam-B_2845 (release 6.6) & Pfam-B_4388 (release 7.5) Family This large family includes diverse proteins involved in large complexes. The alignment contains one highly conserved negatively charged residue and one highly conserved positively charged residue that are probably important for the function of these proteins. The family includes the yeast nuclear export factor Sac3 Swiss:P46674, and mammalian GANP/MCM3-associated proteins, which facilitate the nuclear localisation of MCM3, a protein that associates with chromatin in the G1 phase of the cell-cycle. The 26S protease (or 26S proteasome) is responsible for degrading ubiquitin conjugates. It consists of 19S regulatory complexes associated with the ends of 20S proteasomes. The 19S regulatory complex is composed of about 20 different polypeptides and confers ATP-dependence and substrate specificity to the 26S enzyme. The conserved region occurs at the C-terminal of the Nin1-like regulatory subunit [4,5,6]. This family includes several eukaryotic translation initiation factor 3 subunit 11 (eIF-3 p25) proteins. Eukaryotic initiation factor 3 (eIF3) is a multisubunit complex that is required for binding of mRNA to 40 S ribosomal subunits, stabilisation of ternary complex binding to 40 S subunits, and dissociation of 40 and 60 S subunits [7]. 26.10 26.10 26.20 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.39 0.71 -4.76 105 1100 2012-10-04 14:01:11 2003-04-07 12:59:11 11 24 325 2 789 1485 13 199.20 22 28.32 CHANGED ppssPppl+shcllhhphphlhtph....................salhsph+uh+pD..lpl...........slplhEhtshhtl.p..............tc...........ltpasp.sht...pLh.hYtp.............................................p.pps-hhuhhLL.hhL.ps.....................s-hppplph.L.............................................tt.h.psthlphslplpphltpus.........................Yp+Faplh......p.................pss.hhstlhc.h.ahsplRhpshpsls+uYpp...........lslphlpphLsh ..............................................................................................................spplR.....s..hL.tshphlht.h..............................t..a.tal.h-ph+ulRpDlshQt...h.s........slplhEtts+htl.p...................t-....................................hpphs.p...s.p.........pL..pt.hYpp..h......................................................................psppsEFtuY...h...lL.h.h.l....p.s.......................s-ht.p..p.ltt.l..............................................stp..hpps..........tlp....h....Alp......lppu.ht..pus.............................at..+aaclh...........p..........................................................................pss.....h....hshlhc..h...ahsph...R..tpulpshp.+uYp.................lslp.ltphLh............................................................................. 0 272 441 657 +827 PF03435 Saccharop_dh Saccharopine dehydrogenase Finn RD anon Pfam-B_4166 (release 6.6) & Pfam-B_6325 (Release 7.5) Family This family comprised of three structural domains that can not be separated in the linear sequence. In some organisms this enzyme is found as a bifunctional polypeptide with lysine ketoglutarate reductase. The saccharopine dehydrogenase can also function as a saccharopine reductase. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.07 0.70 -5.49 82 3742 2012-10-10 17:06:42 2003-04-07 12:59:11 13 32 2288 17 1319 4625 2619 321.10 19 76.28 CHANGED llllGs.GslGpsshthltcphch........plslsspstpphpphhtt.........tshphpshslsssshps.Lssh.lpps...chllslussh..tsh.slhchChcpGstYlD....Tuh.....................hppthhphcpptt.....tuGsTslsssGhsPGlsshhstpulpclt.............................sphtphhulc.hasutt............s.tassoWSscGhlpE.hps........t...sElshtspppph...shhhtsGsshhhah.pcsushspshshhst.....shhhs...slphssahshhp..htshshhc.ssh.hh..................................................................hss.csltulhc.hssht.phpsh.hplh.hhc....Ghtc.Ghhhhtphpps..hspths.hppsppthshssusshtlsutllssshhs....ptGllps-ph.hp.sh...hhshlt...Gl.hs ..................................................................llllGu..G.hlu.p..h.s.s...p.h.....lsp.psph....................cl.s.l.A.....u..R...s.....t...p.+.h..ptl.ht.................................tth.p.h.p...s...h.t..l.....D....s....s....s.....s.....p....s....l...t.....th...lppt...........clV...l...s..ss......s.....Pa.............hsh...slh.cA..C..l..p..s.G.s....c.YlD.........ssh............................................................................h.ph.h..t..h....c..cp.hc......p.uGh.s..hl.us.G....h..D..P........G.hs...sh...hs.t...hhhp.....phh................................................................t..........l..t.....h.......s...uth........................ht.h.sa..s..s.t....h....h.t.........................h.......t....t....t..........t.h..........h..t............h.......................h...........h.h.........p.t....h...........................h..h...........h.........t.h.t.hht...........hht.................................................................................................................................phh..thh............................t............h..h.....h.......................h......s.................................................h.h.s.s....h....hh.......................................th.................................t......................................................................................................................................................................................................................................................... 0 431 806 1113 +828 PF00536 SAM_1 SAM_1; SAM domain (Sterile alpha motif) Bateman A anon [1],[2] Domain It has been suggested that SAM is an evolutionarily conserved protein binding domain that is involved in the regulation of numerous developmental processes in diverse eukaryotes. The SAM domain can potentially function as a protein interaction module through its ability to homo- and heterooligomerise with other SAM domains. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.98 0.72 -3.80 65 6213 2012-10-02 20:42:54 2003-04-07 12:59:11 25 406 310 71 3366 7526 101 63.10 23 9.07 CHANGED shhshpsVs-WLcu...l.th....spYtcpF.psshhshctlhplot-D..Lhp.lGlshhGHp++Ihpulptl+ .........................h...t.tpVspW.Lp.s...........l...sh.........sp.Yt..p.tF..p........p.....p......h......h...s.......s..c......t.....l..h.....p...l.o..p..p-.................Lp...p..lu.....l.p.t.....Gcpt+lhtulpth............................. 1 782 1136 2102 +829 PF02198 SAM_PNT Sterile alpha motif (SAM)/Pointed domain SMART anon Alignment kindly provided by SMART Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.87 0.72 -4.22 15 1002 2012-10-02 20:42:54 2003-04-07 12:59:11 11 15 115 23 513 1149 0 82.10 32 19.96 CHANGED hsuappppp+ltlst.....cPphWocscVhpWLpWuhcE..FsLsslshspF.pMsG+pLCsLs+E-FhptsP...hsGDlLapHLphLpcps .....................................phtppphhlst......-P....ph....WoppcVtpWL.p.Ws.hcE....as..L.t..s.l.s..h..p..p..F...shsGctLCtho+--Fhph.s..P.......hsG..-..l..L..appLphlppt.s............................ 0 109 148 306 +830 PF01342 SAND SAND domain Christensen J, Bateman A anon [1] Family The DNA binding activity of two proteins has been mapped to the SAND domain. The conserved KDWK motif is necessary for DNA binding, and it appears to be important for dimerisation [2]. This region is also found in the putative transcription factor RegA from the multicellular green alga Volvox cateri. This region of RegA is known as the VARL domain [3]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.68 0.72 -4.27 27 491 2009-09-11 01:11:04 2003-04-07 12:59:11 16 30 96 4 249 486 2 78.20 33 13.91 CHANGED scs.sh....sstlPVsCGpspGhLhhc+h.ptGhpt+CI............................phc...spahTPpEFphhu.G+upuKcWKpuIRh......sGhsL+pLh-cthLs .........................t....sh...p..hPlsCGps.puhLhhp+h..ps..GhpsKCI............................php......spahoPpEFEt.hu.G+.ussKsWKpuIRh.....................sGhsL..p.plhcps.L..................... 0 51 98 148 +831 PF02037 SAP SAP domain Bateman A anon [1] Family The SAP (after SAF-A/B, Acinus and PIAS) motif is a putative DNA/RNA binding domain found in diverse nuclear and cytoplasmic proteins. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.04 0.72 -7.33 0.72 -4.37 182 2887 2012-10-03 03:04:30 2003-04-07 12:59:11 22 173 629 14 1622 2724 591 34.30 34 5.81 CHANGED hsphpVs-L+phLcppuLsssGp..KspLlpRLpphh ...........pphpVs-L+ptLcp+uL.s..ss..Gp..KspLlcRLpph............. 0 558 853 1248 +832 PF05184 SapB_1 Saposin-like type B, region 1 Finn RD anon Manual Domain \N 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.81 0.72 -4.05 161 1396 2009-01-15 18:05:59 2003-04-07 12:59:11 10 105 174 31 654 1402 4 38.80 27 19.67 CHANGED sshCshCphhlphlpphl.p.sspopppIhphlcp.hC.shlP ..s.shCphCchlVshlpphL.c.sspTcpcIhphlcc.hC.shLP..... 0 247 355 514 +833 PF03489 SapB_2 Surfactant_B; Saposin-like type B, region 2 Finn RD anon Manual Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.44 0.72 -3.89 103 1536 2009-01-15 18:05:59 2003-04-07 12:59:11 12 112 171 39 716 1502 4 34.80 28 17.26 CHANGED sppCpplVppYtshllphlhpphs..PpplCp.tlslC ..........ppCcphVspYtshllphlhpp.....hs...PpplCs.tlslC...... 0 242 367 546 +834 PF04499 SAPS SIT4 phosphatase-associated protein Mifsud W anon Pfam-B_2011 (release 7.5) Family This family includes a conserved region from a group of yeast proteins that associate with the SIT4 phosphatase. This association is required for SIT4's role in G1 cyclin transcription and for bud formation. This family also includes homologous regions from other eukaryotes. 24.90 24.90 24.90 24.90 24.50 24.80 hmmbuild -o /dev/null HMM SEED 475 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.59 0.70 -5.85 51 896 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 267 0 509 895 0 267.70 20 48.43 CHANGED .ppLlspKspphlsF.l+ppcslVcphLpHI-ssslMDhLL+lIS...s-+....s-sspGll-hLpp.....QcLIs+LlshL...........................................s.schsts.hQosAuDhLKAlIolSu.Nss.p...pssIGP.................NpLoRpLsStphlcpLl.shML..............................................................................................................ps.usuLssuVuIlIElI......RKN............NSDYD...................................hshtspsP.os+D...............................................PlYLGp........lL+hFup+lscFhpLlhps..................ptt.lposhGs.hcPLGFcRFKlsELlAELLHCSNMsLhNp.t..thht.RD..R.t...t...ht...tp........t.t....p.......................t.........................................................................................................................................t........t.t..............t...t............................................................................................................................pPs.................lGDhlKlpLh-spllssILchFF+aPWNNFLHNVVaDllQQlhNGshc....................................huaNphLshcLF..................................cpsplsptIlcutcpspch.ppp..............thRhGYMGHLTLIAEEVVKFsphhss..phlo....hlhc..tlpspcWptalppsLs-.TRc.p.ss ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h............................................................................................................................................................................... 0 148 248 376 +835 PF04000 Sas10_Utp3 Sas10/Utp3/C1D family Bateman A, Wood V anon Pfam-B_6555 (release 7.3) Family This family contains Utp3 and LCP5 which are components of the U3 ribonucleoprotein complex [2][4]. It also includes the human C1D protein and Saccharomyces cerevisiae YHR081W (rrp47), an exosome-associated protein required for the 3' processing of stable RNAs [3], and Sas10 which has been identified as a regulator of chromatin silencing [1]. This family also includes the human protein Neuroguidin an initiation factor 4E (eIF4E) binding protein [6]. 20.50 20.50 20.60 20.60 20.10 19.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.78 0.72 -3.79 122 865 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 313 0 604 854 4 85.50 20 23.39 CHANGED hlpslspslsplpspl..ptlhpth..................httths.................lhphKhphhhuYhtslsha........hhL+hp.shsspp............................HslhpcLhcl+phhc+ ..........................................................................................lpphppplsplpstl...pslhpts...............................ttthss.......................h.hcsKhpllluYhhslsaa..........hhL+sp..uhssps............................HPlh.p.cLhclRphhc+........................................... 1 186 316 487 +836 PF01547 SBP_bac_1 SBP_bacterial_1; Bacterial extracellular solute-binding protein Bateman A, Griffiths-Jones SR anon Pfam-B_269 (release 4.0) Family This family also includes the bacterial extracellular solute-binding protein family POTD/POTF. 20.50 15.00 20.50 15.00 20.40 14.90 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.87 0.70 -4.70 129 9759 2012-10-03 15:33:52 2003-04-07 12:59:11 20 37 2534 267 2818 28939 7759 292.70 15 65.96 CHANGED ssshpp...shpthh.pp....a....pc..pss..lcVp..hp.s...sssshtp.....h.hthtsus...ss.Dlhhs........h.....hhpt..shhtslsshhtst.hh.h...........hh...h.............................shha.........ppphht............................sWs.-hhphstphttpstsh........................hhh.h.tthhhhh..h....s............................................shthh.thh.......thtshtshstshstshshht...........pGpsshhhsh.ht..........h......................thhtsttpst.phshh.....................sshs.hslspsupp....tct....AppFl........pahhss........................pspt ..........................................................................................................................................................................................................................tttt..........htphh..pt.............F.........pc....p..s.s......lc.lp..........hp.hh..........s....s...s..htp..................cl..p.s.th..s.u.Gs....................sP...D.lhhh...........sssths.............phspp...........Gh.l..h..s...l...s...s....h...h..sp.........t.....t........................................................................h......t...h.hp.....pht.........................tpha..ul....P............h..........t....s......s....t...........s........l..a..Y................................s.+c.lhc.................................chG.lp.................................................Pp...Ta......-......-....h....h...p....s....s.....c.....p....l....p....p...ts.hss......................................hhh...t.t...t...h.....t.....t..t....h....h.....h...h.............h...h...t...s.hG.........s....t.h..h..s.t.pht.....................................................sh.sss.thhp..............shph....htp..h..h........................................t.........t.....h....h......s..............s....s.....t....s......h......s...h...s....p....s....h....s..h.ht.......................................sGc..s..u...h...h..h.......s..s...h.....h........................................................................................................................h.t...t..t..t.t..t..........t...t....p....h..s..h.....h..sh...P.t....h.t.sst.t...........................hss.s..h..s..l....s....p....s...o..c..p...............ct............Ah.c.al................pahhs.t.............................................................................................................................................................................................................. 0 1094 1940 2361 +837 PF00497 SBP_bac_3 Bacterial extracellular solute-binding proteins, family 3 Finn RD anon Prosite Domain \N 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null --hand HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.47 0.70 -5.10 483 31454 2012-10-03 15:33:52 2003-04-07 12:59:11 15 351 4322 113 7607 27422 5780 223.60 19 63.80 CHANGED lpl.usss.shs.Pasahct............sGphsGa-lD....lhpt....luc.ch.Gh....c.hchh......h...sasshl.s.uLpsG.c.h.Dh.lhuuhsh......Ts-R..p..c.p...........ls.F.op.P..Yhts.stsllsp..p.....s.....................................................................................................................ph...p...t.........-L...pG....................+..p.....lu.ltpG.os....tpth...htp.......h................thpl....................h.....h.sshsps.ht........s.LpsG...clDAhls-tsshthhhpppsttthhh......................ttshtspthuhshpcssspLhptlspultpl.ppsGphp.....c.l.hp...Ka.hst ....................................................................................................................................................................................................................................................lhlu.h.ps..s..h.s..P.a...p...h...hst.................ss..p...h..h...G.h..-.lD................ls....p.t........l....sc....ph...sh.....................c..hp...hh....................h..sa.s....s...hl.....s....s.L.....p.....s........s.....c........h......D...h....l.....h......u.....s......h....o....h................T....s....c...R.......p...c...p....................................hs....F....os......s......Y....h......p......s......s.....h.......t......l.l.spp...s....................................................................................................................................................................................................................................................................................................t..sl...p.....sh.t............-..L........pG...........................c....p.......l.u.....s..t...p...G...os.................tp.p.h........lpp...........h.........................shp.l........................................................h.....p...h....s...s....h...s...ps...hp...................s....L....p....s...G......+...l...D.....u.h......h...s.....D....p....s....s....h....t...t....h...h..pp.p.s.ththhh................................ttthtsps....h...u...h......s......h.....p.......+.....s...........s.......s.......p.......L....h...p....t...l...sp...sl.t.p.....h...p.....p.....s.....G......p......h..p......p...l...p+a.h..t.......................................................................................................................................................................................................................................................................................................................... 0 1937 3962 5901 +838 PF04144 SCAMP SCAMP family Bateman A anon Pfam-B_1298 (release 7.3) Family In vertebrates, secretory carrier membrane proteins (SCAMPs) 1-3 constitute a family of putative membrane-trafficking proteins composed of cytoplasmic N-terminal sequences with NPF repeats, four central transmembrane regions (TMRs), and a cytoplasmic tail. SCAMPs probably function in endocytosis by recruiting EH-domain proteins to the N-terminal NPF repeats but may have additional functions mediated by their other sequences [1]. 23.20 23.20 23.30 24.00 23.10 22.60 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.13 0.71 -4.39 39 634 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 162 0 330 557 0 161.50 37 57.61 CHANGED cpsNWP.Ph.t.h.htPhhYpDIssEIPs.caQ+hsphhahlWhhh............shsLhhNlluslshahts..ss...ussFuLullahllhsPsualsWYRPlY+AaR......................oDSuFsFhhFFhhahhplshslhtulGhPsh......GhsGaIsulshhps.shs..lulhhhlsshhFslpuslulhhlp+VaphaRtoG .......................psNWP.Ph............htPhhap.DhssEIP....ch..Q..+hsphhYhhWhhh..........................slsLhhNll.usl.uha.hts.............su.....sssF......sLulla....h......l........lh..s.Psoal.sWYRPlY+AFR.......................................oDSuhpFhhFFh...hahhphshplltAlG.hs.....sh........................G.h..sGhlsu.l...sh..............hsp.....shs.....lulhh.h.lsshhFshtul.hu.......hhhlp....cVathaRtsG..................................................... 0 89 145 226 +839 PF02023 SCAN SCAN domain Bateman A anon Pfam-B_1614 (Release 5.0) Family The SCAN domain [1] (named after SRE-ZBP, CTfin51, AW-1 and Number 18 cDNA) is found in several Pfam:PF00096 proteins. The domain has been shown to be able to mediate homo- and hetero-oligomerisation [2]. 20.20 20.20 20.30 20.40 20.00 20.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.10 0.72 -4.51 62 2273 2009-01-15 18:05:59 2003-04-07 12:59:11 12 322 50 7 1114 1819 0 93.00 48 19.04 CHANGED .ssEshRp+FRpFpYp-...ssGP+EALupLpELCppWLRP.EhHoKEQILELLVLEQFLoILPtElQsWVppppPcSuEEAVsLlE-Lpcphpcs.t..pts ....................st.EshRp+FRpF.pYpE.....ssGP+E.ALup.....LpELCppW.LR..P..Eh..+.TK.EQIL.E.LLV.....LEQFL..oILPtElQsWVpp.pp..Pc.SuEEAVsLlEcLppphpp.....h............................... 0 119 159 371 +840 PF02404 SCF Stem cell factor Mian N, Bateman A anon Pfam-B_2598 (release 5.4) Family Stem cell factor (SCF) is a homodimer involved in hematopoiesis. SCF binds to and activates the SCF receptor (SCFR), a receptor tyrosine kinase. The crystal structure of human SCF has been resolved and a potential receptor-binding site identified [1]. 20.00 20.00 20.50 20.90 19.70 19.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.77 0.70 -5.20 4 137 2012-10-02 01:28:15 2003-04-07 12:59:11 10 3 70 16 36 126 3 171.70 49 93.19 CHANGED MKKsQTWIITChhLQLLLhNPLVKTQu.CtNPVTDDVpDIsKLVuNLPNDYhITLKYVPtMDsLPsHCWL+.MVschShSLpsLLpKFSsIS-h...LSNYSIIspLs+IlsDLhACht.cpsKs.lKEsu+h.EpcpFhPEpFFplFNRoI-saK-.Fhsu.DpsDClh.Sos.TPEpDSRVuVTKsh.hPPVAASSLRNDS......SsSN+cAhs.IpsSSLQhhulALsuLlSLlIGFhhGAlYWKKppP.ShscosEsIQhp..pE-NEISMLQQKE+Ea.pV .........................h....hhh.......tt..hN.lTsslpclshLhtNlPpDYhIslpYlst...lsshCWl...V.php.SLpsLhpKFsphSp......NhsIh.ph...h...............h.t..t.....t.h.stpaFthh..p.....hpt...s...s.cpu-Clh...sos.sPEp-scs.shopsh.hs..h.................................................................................................................................................................................................................................. 0 2 5 15 +841 PF00188 CAP SCP; Cysteine-rich secretory protein family Finn RD. Yeats C anon Yeats C Domain This is a large family of cysteine-rich secretory proteins, antigen 5, and pathogenesis-related 1 proteins (CAP) that are found in a wide range of organisms, including prokaryotes [2] and non-vertebrate eukaryotes [3], The nine subfamilies of the mammalian CAP 'super'family include: the human glioma pathogenesis-related 1 (GLIPR1), Golgi associated pathogenesis related-1 (GAPR1) proteins, peptidase inhibitor 15 (PI15), peptidase inhibitor 16 (PI16), cysteine-rich secretory proteins (CRISPs), CRISP LCCL domain containing 1 (CRISPLD1), CRISP LCCL domain containing 2 (CRISPLD2), mannose receptor like and the R3H domain containing like proteins. Members are most often secreted and have an extracellular endocrine or paracrine function and are involved in processes including the regulation of extracellular matrix and branching morphogenesis, potentially as either proteases or protease inhibitors; in ion channel regulation in fertility; as tumour suppressor or pro-oncogenic genes in tissues including the prostate; and in cell-cell adhesion during fertilisation. The overall protein structural conservation within the CAP 'super'family results in fundamentally similar functions for the CAP domain in all members, yet the diversity outside of this core region dramatically alters the target specificity and, thus, the biological consequences [4]. The Ca++-chelating function [3] would fit with the various signalling processes (e.g. the CRISP proteins) that members of this family are involved in, and also the sequence and structural evidence of a conserved pocket containing two histidines and a glutamate. It also may explain how Swiss:Q91055 blocks the Ca++ transporting ryanodine receptors. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.57 0.71 -3.63 98 6534 2009-01-15 18:05:59 2003-04-07 12:59:11 21 194 2233 38 2946 5991 336 123.50 18 43.36 CHANGED lshhNphR.....t......................stshhs.Ls...asss.LsphApppupphsp...........................tt.htpshspshthhthsst......t....................spshhtttpphptt..............htshh........sss..........sHhpplltssssplGsuhspssstt...............hhhlsta ......................................................................................................................................phhNthR....................................................stpt...hs.lp..........assp.LsphAp..t..a.u.p....ph.st............................................ps..hsp...s...t....s...p..s....h.t.....h.htt.hpt.....hsh..h........................................spshh.s.pt.pshshs....................................h.p.s.hh................ss.s....................GHh..s...p..ll.............s......s......h....s..plGsu....h....shsspt....................hhhsp.a...................................................... 0 1119 1733 2458 +842 PF03803 Scramblase Scramblase Finn RD anon Pfam-B_3893 (release 7.0) Family Scramblase is palmitoylated and contains a potential protein kinase C phosphorylation site. Scramblase exhibits Ca2+-activated phospholipid scrambling activity in vitro. There are also possible SH3 and WW binding motifs. Scramblase is involved in the redistribution of phospholipids after cell activation or injury [1]. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.44 0.70 -5.26 10 783 2012-10-02 20:44:47 2003-04-07 12:59:11 10 15 285 0 512 771 22 198.70 28 69.98 CHANGED Mousht.hsssPsuL-hLsphDsllVpQplEhlElhTGFETsNRYsl+sstGpplhhshEc.....SsshsRQhhGscRPFshclhDshGpEVhplpRPFpCss...p.....hssshppt-l....ps.sGssIGhVtQpWchhcspaclhsucp.psshplpuPssshs.sssspsF.V+oh-s.pllGpIsRpWsGlhREhFTDADsasl+FPh...........................DLslchKAVlluusFLIDFsYFEc ...............................................................................................................thp.hL.t.h.stlhlp.pp..h.E....hhp....................h....h.....h.....s..h..E..psN+Ytl.........h.......s.........s.............G.......p........t...l...h.h.....s.....hEc...................sshhsRp.h.h..t.....s.t.R......s...F......p..h..cl...............h......D.....s........h...........s....p...........c.........l......h.p..hp.R....P....hp..h.s......p....................................h.s.h..hpph.pl...................ps.s.s.Gph.......l..G..hlt...Q.....p..a....p.............h.....h................p..a....s.l.s......tpt...pp.............h........h............p...............l.......................G..........P..........h........h..h.........h..........s.....s...............h....ts....h.............s....F............pl.....hs...h.........s......t..p.............l......GpI.s....+....p....W.s..G.......h...h...p..-.h......h....T-..ss....pas..lp.F.sh...................................................................................................................sL..s.l..c.....+AlhluushhIDh.aFp............................................ 0 214 284 400 +843 PF01390 SEA SEA domain Ponting C anon [1] Family Domain found in Sea urchin sperm protein, Enterokinase, Agrin (SEA). Proposed function of regulating or binding carbohydrate side chains. Recently a proteolytic activity has been shown for a SEA domain [4]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -10.06 0.72 -4.36 94 1758 2009-01-15 18:05:59 2003-04-07 12:59:11 15 244 103 4 1017 1679 0 104.30 16 14.60 CHANGED sstthhphshpl.sshp.....asschpsssStpapslspplpptl...pphapps......ttahpsplhsh.........................pp........Go...llsphhlha.ptssststtshtptltpthpp....tthhslthssp ................................h.....hthsh.pl.sshp........aspchtsssStpa...ps...h...ppp...lp.phl....pp.hapss.........stat..ssplhsh................................ps.....................uu.....lhVphhhha..p.s.s.s.t.s...h..tt..h.p..h..p...t................h....................................................................... 0 253 329 554 +844 PF04091 Sec15 Exocyst complex subunit Sec15-like Wood V, Finn RD anon Pfam-B_7871 (release 7.3); Family \N 22.40 22.40 22.40 22.50 21.80 22.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -5.10 31 418 2012-10-03 17:31:52 2003-04-07 12:59:11 7 15 255 1 266 408 1 281.90 32 39.47 CHANGED -Y-p.Vlphpahp........sptts.s...FPhhhPFSphhP.sCh.l+..........palsphhtFhsphhp.p.spls-.hl++shDpLLsch....lscslhphlpss....shpQlsQIllNLpaaEhAspplpphlsptptssps.ss...sph....tLpupcpapss++tAEpplhphlsoKID-hl.-hs-YD.Whs.spsss.-........sstalp-lspaLcshhsSshhsLPtplpphlhhcuhcHloppllsl.lLsssl+plsspultshshDlpaLEpFsspl......................................p......sssLppsFsEL+QplsLlhossh-.-a..........h-sshRt++YsRl.cspsuhhlL-Khp ....................................................Ych.lht...hp...........p.ph...pp.s.......FPthhPFSphhPthhh..p.l+.....................pFl.thhpFsps..hhh....p.splc-.hl+KuhshLLocp....lspsL.phlppt.....hsLspllQIllNhsahEpACp.L-phlsphp.sh.tp.ss..........tth..............................pLhut.ptF.+.ss...+.c.sAEppIh...phl.spKID-hl..ph...s-Y.D.Whs...scs..ss...c.................sSsYl...-lltaLcshh.ss.hptLP.t..........cltptshhsAhpHlusplh.....................ph.l..L.ss.....-.............l+plshsAltphslDlh.h..EtFsss.............................................p.......tspLp.shh-LRQhlsLhhs....shp.pa..........h-..u..p..p..+Y.+l.ss.pshhlLEKh................................... 0 77 136 208 +845 PF04815 Sec23_helical Sec23/Sec24 helical domain Bateman A anon Pfam-B_3055 (release 7.3) Domain COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is composed of five alpha helices. 27.80 27.80 28.00 27.80 27.60 27.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.03 0.72 -4.59 121 1420 2009-01-15 18:05:59 2003-04-07 12:59:11 10 28 326 24 936 1370 10 102.20 26 11.83 CHANGED Q-Ahsshhu+pulp+shs....ss.....hp-........s+chLsppllclhspY+..phhtssssu.............lhLsp..shchhPhahhsLhKsthl.ps...tssssDcRsahhphlhshslpphlhhl .....................................-Ahsslhu+h..Al.t+s.s.........ss......h.t-........................spc.hLsppllclhspat.......cht...tssssu.............................hhLsp....shplhPh...ahhsLh+o.phl.ps......h.......s.......s.......ssD-puahhphltp.slsp.hhhl....................................... 0 285 492 755 +846 PF04811 Sec23_trunk Sec23/Sec24 trunk domain Bateman A anon Bateman A Domain COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is known as the trunk domain and has an alpha/beta vWA fold and forms the dimer interface. 21.20 21.20 21.20 22.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.23 0.70 -5.01 22 1546 2012-10-10 16:07:06 2003-04-07 12:59:11 10 35 327 24 1024 1513 23 236.50 26 28.15 CHANGED P.PslFlFllDlohs...su.LpslppulhpsL.shLP........spshVGhITasshl+hapls.sh.......ptsp.hsssclp-hh.......................hPhs.sphLlslpcschslpsLLcplsp.hasss..+csppshGsALpsAhtlLpss...sGG+lhlFtuu.sohGs.uhlpsc..cp...shhshcK-ttphhppsstaYcsLApcssspGhslDlFhhs.s.sslAplpslsphTGGplhhhsuFs.......sshFppshpRhh ............................................................................................................PshalFllDs.s..hp..........ps.lp..........slpps.lhts.L.....shLP.....................................sps.....hlGhlTa.s....ph..l.......ph.a...p..lttsh........................................................ptsp..h....s...s..s.p...l...p.-.h.h.............................................................................hPhs...sp.h.L..l..s.l..p..c..s..c.......................................l.ps....l.Lp...p...L.................p....ha..s..s..s.............................p........c..s........t........p..s......hG...sA.......LpsAhtl.lp..............................sG.G..+lh.....l............Ft..uu..............soh.Gs......s...h..lpsc..cp........................shhsh.s..+..-....................t.................t......h..h.......p..p..s.s...........t...aYc.pl.Atcssp...............p...............sh.......slD.lF.....hh.....s..........p...........ss..........l..................u..p..h................ts..............l.sphTGGphhhhssFp.............s..phhtpshp+................................................................ 0 340 560 837 +847 PF04136 Sec34 Sec34-like family Wood V, Finn RD anon Pfam-B_16464 (release 7.3); Family Sec34 and Sec35 form a sub-complex, in a seven protein complex that includes Dor1 (Pfam:PF04124). This complex is thought to be important for tether vesicles to the Golgi [1]. 25.00 25.00 25.10 25.70 24.80 24.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.88 0.71 -4.53 3 293 2012-10-03 12:17:00 2003-04-07 12:59:11 10 9 255 0 211 309 1 151.10 29 19.38 CHANGED cscch+cYLQsFpp...pCDpILsQlNuAhp+LtSLp-cY-FVSpKTSsLsEACEQLlcEQpRLsELA-sIQ++LoYFupLEpLNp+LpSPTLSVA.S-uF+EpLsKLD-CIsYIEENPcFKDuPtYLlKYKQCLSKAhcLhKsYslslIpQsT-QlLKcc ................................................h....h.p.Lpth...pp...ph-...t...l..Lsp.....sssslptLpsLpppaptVsscTsshpctC-pLLp-Q...pcLtclu-pIpppLpYFspL-slsp+L.s........u.........P..s...h..........u....V..s...scs.F..h.s.h.Lp+LD-Clsalp..s..H..........P..p......aK-ussYhh+a+phLo+AlpLl+sahsssLpph.spth....t..................... 0 71 115 174 +848 PF02889 Sec63 Sec63 Brl domain Bateman A anon Ponting CP Family This domain (also known as the Brl domain) is required for assembly of functional endoplasmic reticulum translocons [2]. 33.60 33.60 33.60 34.00 32.60 33.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.13 0.70 -5.21 133 1940 2009-09-17 06:06:13 2003-04-07 12:59:11 11 53 348 4 1383 1943 35 302.80 21 32.16 CHANGED sppLGcI....ASpYY..........lpapohphasp.....lps..pt.s.tc...........ll......pllotusEFc.....pl.lRppEcppLpcLh...pp.....hs....h.h.................p.tshpssps.....KsslLLQAalSR......h.plp...hsLhs................Dhthlhpsus.RlhcAhh-l.h.....hpcs........ahpsshtslpLsphlpp...............phW.s..pp........sPL.+Qhs.........................thsppl....lc....plcp...ps.h..ohpclh....ch........ssp-ls.......pllp...........p.upplhphlp...pa.Pplcl.psplpshscs..l.............................lplplplpsph.hp...................................th.st............t-saalhlt..D.scspplhthcphtl.....pp.............................tt.h....................lphslPhs.ss.......ph.hlhllSDp.alss.-pphslsh .................................................................................hthGhl....suhYY..............lpapT.h...pha.p..........lps......ph...s.tc..................................llpllu.tus.EFc..............................p.l....l....R...p..p.E..c.....l.p.pLh.......pp......l.......h.h.....................................................ptph.p.s.s.ps........KsplL.LQ..........AaloR........h..pls.........hsLh..s..............................Dtt.hl...hp.puh.Rll..........p...Ahh-l..s..........................hpps..................ahpss..htshpLsphltp...................thW..s....pp..................ssL...pQls...................................................................................p.h.s.t.p..h..........lp..........phpp......................ts...h.......sltclh........ch..................psp.cht......................pllp..s...............p.hpplhphhp.......ph..P.p.lpl..ph..p...h.p..s.h.spp...h..............................................lplplplp.ph....pp.......................................................................................................................................................hh.tt..............t.csaa..l.hlt.............-.sps..ppl......h.t....h....chh.h............p........................................tth..............................................................................................................................hph....hP....t....................ph..hlhhhsDp.ahuh.-t.h.h.................................................................................................................................................................................................... 0 481 779 1155 +849 PF04048 Sec8_exocyst Sec8 exocyst complex component specific domain Wood V, Finn RD anon Pfam-B_9576 (release 7.3); Domain \N 25.50 25.50 25.50 26.20 25.30 25.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.55 0.71 -4.55 22 297 2012-10-03 17:31:52 2003-04-07 12:59:11 9 8 250 0 211 305 0 126.60 26 13.09 CHANGED pLppllsplphpWsthhpcsssPlplALphhD-oSlGhu+cht-FpphpcphppsLppVVs-HapsFNsuIuoYpplhsslpsSppcltplKptLppupptl.pscpspLpcLspsShcapchIplLstIc-lpplPs+l-ph .......................................p..........................................D..p..s...s...s.ptpchtc....hpcthcphppsLcplVspHapshssuItoappIpppls................sSpp+lcpl...KpsLtpsKthL..ps........++.............s.............-L+cLhhcuhp......accllplLcpIE....plpplPp+lEt............................... 0 73 119 178 +850 PF00856 SET SET domain Bateman A, Huang S anon [1] Family SET domains are protein lysine methyltransferase enzymes. SET domains appear to be protein-protein interaction domains. It has been demonstrated that SET domains mediate interactions with a family of proteins that display similarity with dual-specificity phosphatases (dsPTPases) [2]. A subset of SET domains have been called PR domains. These domains are divergent in sequence from other SET domains, but also appear to mediate protein-protein interaction [3]. The SET domain consists of two regions known as SET-N and SET-C. SET-C forms an unusual and conserved knot-like structure of probably functional importance. Additionally to SET-N and SET-C, an insert region (SET-I) and flanking regions of high structural variability form part of the overall structure [5]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null --hand HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.85 0.71 -3.93 267 8450 2009-01-15 18:05:59 2003-04-07 12:59:11 23 382 879 153 5532 8290 1539 133.10 21 17.69 CHANGED GhGlhApcsItpGp..hlh....phtupl..lstppspppttt.............................................................................................................................................................................................................................................tthh................................................................................................................hhhthtpththsstthsshucalNH...............SC.....................Nsthphh.................................tttplhlhAh+sIpsGEElshsYs ...............................................................................................................................................................................................................................................................................................................................................................hGlh.s.h...p......l...t...sp....hlh..........hs...h......ht.t.h.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................t.............p....t......h......h........h.....c......u....t........t....h....s.....s....h...u....c.......a..l....N..H.........................SC....pP............................N.st.hphh................................................................................tthpl.hl..h..A....h.....+...s.........I.....p..s.GE.ElshsY....................................................................................................................................................................... 0 1814 2953 4471 +851 PF03749 SfsA Sugar fermentation stimulation protein Bateman A anon COG1489 Family This family contains Sugar fermentation stimulation proteins. Which is probably a regulatory factor involved in maltose metabolism. SfsA has been shown to bind DNA [2] and it contains a helix-turn-helix motif that probably binds DNA at its C-terminus. 25.00 25.00 29.70 26.60 23.80 24.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.26 0.70 -4.88 13 1762 2012-10-11 20:44:43 2003-04-07 12:59:11 8 9 1701 0 410 1213 1569 211.30 41 87.65 CHANGED l+RhpRFlu-lpl-.GchhpsHhsNTGphptlhpsGspValp+u-sspRKhsashthspts..sphVslsTthsNcLstcAl..sttlspLs..astlctEVcaGppp...lDFLLspst..thaVEVKusTLscsslA.....hFPDAsTsRGpKHL+ELtpls+cG.aRullLFllh+sshcsFpPspclDPcauchlpcAhpsG.Vcllsapsphshp..lchsphlt ..................................lpRYKRFL..ADV....h.h.......s......s.......G...............c.....t...lThHCsNTGuMpGsh.p..PG.ss..Va.hS...p...S..-...s....s.K.....R...Khs.aohEls....ps......p...................G....t.....hlslN..Tt.hsNpLscEAlt...s..............t..p..............I..s........p........Ls..uY..s.......pl+pEV+..Y..G.....p...p+.......RID..F.h.....L.............p.....s...s.........s...............p......c....s....Y.lEVK......u......VT..L........t...c.....p.....t..h.u..................hFP........DA........lTpRGpKHL+ELh...shst..p........G.........p............RAlll..FsV.+ss.lpp.FsPuc.clDspY...AphLpcAt.ppG.VElLAYpsclospt.htltp............................... 0 139 263 348 +852 PF05002 SGS SGS domain Finn RD anon Manual Domain This domain was thought to be unique to the SGT1-like proteins [1], but is also found in calcyclin binding proteins. 21.90 21.90 24.50 22.70 21.20 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.97 0.72 -4.23 48 564 2009-01-15 18:05:59 2003-04-07 12:59:11 10 29 330 3 345 531 2 76.50 36 24.80 CHANGED spsWspLs..............h...pscc--cssp.t....................lsshF+plYpsuD-Ds+RAMhKSahESsGTsLoTsWt-lttt.h.....spsPcGhEhKca .................................................t.psWDpLst...................hpc....p.p.pc-.ct-s..ssu................................LsphF+clYpcuD--h+RAMsKSahESsGTsLSTsWp-Vtptphp.....t..sPp.................... 0 113 182 275 +853 PF03983 SHD1 SLA1 homology domain 1, SHD1 Finn RD, Wood V anon Pfam-B_ (release 7.2) Domain NPFXD peptides specifically interact with the SHD1 domain. NPFXD is a clathrin-facilitated endocytic targeting signal. NPFXD was originally discovered in the cytoplasmic domain of the furin-like protease Kex2p [1]. Sla1 is thought to function as an endocytic adaptor [1]. 21.20 21.20 21.70 21.90 21.00 21.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.21 0.72 -4.32 18 166 2009-01-15 18:05:59 2003-04-07 12:59:11 7 29 137 1 116 180 15 68.70 47 6.66 CHANGED ossKScP-PpKlRoWoDRSGoFKVEApFLGhtDGKIHLHKsNGVKIAVsssKMSh-DLEYVE+lTGpSL- ........t..tcshPsst+sRpWoDRoGoFpVEApFlG.l.p.-GKl+LHK.h.NG.VKI.AVPlsKhSh-DL-YVE+lTG.SL-.............. 0 44 72 103 +854 PF04925 SHQ1 SHQ1 protein Wood V, Bateman A anon Pfam-B_11411 (release 7.6) Family S. cerevisiae SHQ1 protein is required for SnoRNAs of the box H/ACA Quantitative accumulation (unpublished). 20.80 20.80 21.00 21.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.08 0.71 -4.85 27 208 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 176 5 136 213 1 179.00 31 36.24 CHANGED Lp...h.ppptp...............lpFocEEpcpht.plsp+paL.ls..p....tplhhsllslLaAYsYD.pps-G-pssE.SuWTIuKLoPplSaLDs.....................hssl+ssllsshRRuLsYPLaRsasLsp+sap-shhhLpu..GKphll+sLLcl+clF.p.hp-hhYlhsclalsDhhsWl..pss.s-phlpsLApclcpth..lscpp.......lp ...................................................................hpFoc-.Epp.ht.plsp+paL..ls..............pt..tppplhhsLl-lLhAYsY-hR.sspG-.pssE...SuWols.KLSssluah-p.......................hsslccslluhhRRsLsY.P.L..aRp...a...pLshpsh.pDshplLph...G..Kphll+sLL-l+clF..p..pp-........stYlhN...clalsDhshWl..pps.ppphlttLupplcph...lpKtt..s................... 1 53 77 115 +855 PF01549 ShK DUF18;ShTK; ShK domain-like Bashton M, Bateman A anon Pfam-B_662 (release 4.0) Domain This domain of is found in several C. elegans proteins. The domain is 30 amino acids long and rich in cysteine residues. There are 6 conserved cysteine positions in the domain that form three disulphide bridges. The domain is found in the potassium channel inhibitor ShK in sea anemone [1][2]. 20.90 4.80 20.90 4.80 20.80 4.70 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.66 0.72 -3.59 157 2278 2012-10-02 17:51:16 2003-04-07 12:59:11 19 119 113 2 2009 2028 124 36.40 29 22.98 CHANGED sCh....D.p......ssCsthssh....Cp...ss.....shhppp.CspTCs.hC ..................................Ch.......Dtt.......ssCst.h...sp.........Cp..............ss.............shh......p......p......p...CspoCs.hC............... 0 920 1083 1978 +856 PF04542 Sigma70_r2 sigma70_r2; Sigma-70 region 2 Finn RD anon manual Domain Region 2 of sigma-70 is the most conserved region of the entire protein. All members of this class of sigma-factor contain region 2. The high conservation is due to region 2 containing both the -10 promoter recognition helix and the primary core RNA polymerase binding determinant. The core binding helix, interacts with the clamp domain of the largest polymerase subunit, beta prime [1,2]. The aromatic residues of the recognition helix, found at the C-terminus of this domain are though to mediate strand separation, thereby allowing transcription initiation [1,2]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.24 0.72 -4.37 259 39743 2012-10-04 14:01:11 2003-04-07 12:59:11 9 171 5036 49 11330 31962 8852 69.50 22 26.33 CHANGED lhpph...hthl...hphutch....hsss....h.ss-D..LlQ-uhlthhcu.hcpac.ppu.p..hpsahhthhc....pthhc.....tlRc.pptt .............................................hpth.hphl.....hp.h.u.t.+h.............hsss...............h..p...h..c..D....ll.Q-.shltl....h....c.......u....h.........c.......p.....a........c...............p.....p.....u................p........hpsah...hthhc....sthhc......tl+c.pt................................... 0 4421 8178 10016 +857 PF04545 Sigma70_r4 sigma70_r4; Sigma-70, region 4 Finn RD anon Manual Domain Region 4 of sigma-70 like sigma-factors are involved in binding to the -35 promoter element via a helix-turn-helix motif [1]. Due to the way Pfam works, the threshold has been set artificially high to prevent overlaps with other helix-turn-helix families. Therefore there are many false negatives. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.05 0.72 -4.65 130 17028 2012-10-04 14:01:11 2003-04-07 12:59:11 11 79 4776 54 4045 30085 7549 51.60 33 15.50 CHANGED sLspLspRE+pllphRa....tpshTLpElGpthulocpRV+QlcpcAlp+LRp .............................hLppLscR.Ec.pll...p...hRah.......................tpstT.LcEluc....p.h..s......l.oc.pRl+Qlcpc..Alc+LRp..................... 0 1461 2782 3486 +858 PF03145 Sina Seven in absentia protein family Mifsud W anon Pfam-B_1854 (release 6.5) Family The seven in absentia (sina) gene was first identified in Drosophila. The Drosophila Sina protein is essential for the determination of the R7 pathway in photoreceptor cell development: the loss of functional Sina results in the transformation of the R7 precursor cell to a non- neuronal cell type. The Sina protein contains an N-terminal RING finger domain Pfam:PF00097. Through this domain, Sina binds E2 ubiquitin-conjugating enzymes (UbcD1) Sina also interacts with Tramtrack (TTK88) via PHYL. Tramtrack is a transcriptional repressor that blocks photoreceptor determination, while PHYL down-regulates the activity of TTK88. In turn, the activity of PHYL requires the activation of the Sevenless receptor tyrosine kinase, a process essential for R7 determination. It is thought that thus Sina targets TTK88 for degradation, therefore promoting the R7 pathway. Murine and human homologues of Sina have also been identified. The human homologue Siah-1 [1] also binds E2 enzymes (UbcH5) and through a series of physical interactions, targets beta-catenin for ubiquitin degradation. Siah-1 expression is enhanced by p53, itself promoted by DNA damage. Thus this pathway links DNA damage to beta-catenin degradation [2,3]. Sina proteins, therefore, physically interact with a variety of proteins. The N-terminal RING finger domain that binds ubiquitin conjugating enzymes is described in Pfam:PF00097, and does not form part of the alignment for this family. The remainder C-terminal part is involved in interactions with other proteins, and is included in this alignment. In addition to the Drosophila protein and mammalian homologues, whose similarity was noted previously, this family also includes putative homologues from Caenorhabditis elegans, Arabidopsis thaliana. 28.70 28.70 28.70 29.00 28.60 28.50 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.27 0.71 -4.65 10 1309 2012-10-02 00:06:50 2003-04-07 12:59:11 11 16 738 7 396 752 2 133.90 53 60.55 CHANGED IRsLAlE.+lAsplpFPC+aushGCslp.LPhpphscHEEpCca+PYpCPhss..ucCpWpGshcslhsHLhscHpshhp.pus.-lhaltsshshstussWhhsp............................pCaGcpFpLhhEth-ts..ssphaashlphhGspcpAcpFuYcLELtu..spR+LpWQuhPpSI+-shcpshcup.....DsLlhpscsuphFucsss.....Ltl+Vo ........................................IRNLAME.KVAsS....V.hFPCKYA.........SsGCclo...LP...aT....EKs-..H.....E.E.l.CEF....RP.Y.S.C...PCPG.......ASC.KW.QG.S.L..-.uVM.sH.Lhc...pH..........KS.I.T..T.L...Q...G....E....DI....V.FL.A.T.D.I..N....L.P..G..A..V..D..WV................................................................................................................................................................................................................................................................................. 0 90 187 279 +859 PF04938 SIP1 Survival motor neuron (SMN) interacting protein 1 (SIP1) Moxon SJ anon Pfam-B_5071 (release 7.6) Family Survival motor neuron (SMN) interacting protein 1 (SIP1) interacts with SMN protein and plays a crucial role in the biogenesis of spliceosomes. There is evidence that the protein is linked to spinal muscular atrophy (SMA) and amyotrophic lateral sclerosis(ALS) in humans[1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.68 0.70 -5.01 12 252 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 189 1 164 243 1 206.00 24 72.25 CHANGED LhPl-tsD.s.t-aD.osPPcsstEYLR+V...........phEAppCPsVV.lAplssp+.hc+cQoV.hsl.s......ss.ssPcshsPohcWp....ppQlspFspsRpslsph+pchpsp.lDssss.P..pDp-tWc+FCLscp.................................c.uhsPhLohlp+hsQ.sslsplLEhLstW.hpEcshssp......luRWlYALLACL-pPL.s-spSslRpLAR+CuplRst...Lcpc.......c.-plsshNLlIslluRaFsQpD ...........................................................................................................................................thp......Ppss.EYLppV............................................ph.EAtphP...pVh...lu.p.h.....s.......p...p...h..ppp....ps.s.......s....s..............................sh.....ss.tt....h......P.s....pWp....pt.ltpFpphRp.pl..tp............t............p..h...t.t...t...hs...t...p.....h....................t..spptWp.p.hhhspp...................................................................................................................................Phh......p..h.l...p..hs..p.tpl.t...l.l.p.hh.p.a...htppt..h..s..p............................................hspWlauLLspl-..p..P..L..scspuhlRpLsRp.C..p.lR........hptp..............................th......h.....s..h..hlslh.sph.atQ............................................................. 0 65 96 134 +860 PF03530 SK_channel Calcium-activated SK potassium channel Griffiths-Jones SR anon PRINTS Family \N 22.90 22.90 23.40 23.40 21.80 20.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.68 0.71 -4.27 10 342 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 90 0 179 301 0 110.00 53 20.18 CHANGED RLt+R+tLaE+RK+LuDaALshAhhGIllMVlEsEL.ohtlYs+..........................................uShYShsLKslISlSTllLLuLIlsYHApEI..........QLFhlDNGA-DWRIAMThcRlhhIsLELllCulHPlP..Gsa.hasas ............................................................+Lt+R+tL.FEKRKR.LSDaALlhuhFGIllMVlEsEL...ohs...s....Y..sK.............................................tShaShsLKCL.ISlSTllL.LGLIlhYHA+El.................................Q.LF..hlDNGAD.DWR.IAMTh-RlhhI.sLEllVCAIHPlP..GpahF.W........................................................ 0 35 51 113 +861 PF02437 Ski_Sno SKI/SNO/DAC family Mian N, Bateman A anon Pfam-B_2013 (release 5.4) Family This family contains a presumed domain that is about 100 amino acids long. All members of this family contain a conserved CLPQ motif. The c-ski proto-oncogene has been shown to influence proliferation, morphological transformation and myogenic differentiation [1]. Sno, a Ski proto-oncogene homologue, is expressed in two isoforms and plays a role in the response to proliferation stimuli. Dachshund also contains this domain. It is involved in various aspects of development [2,3]. 20.00 20.00 21.50 20.40 19.60 18.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.50 0.71 -4.52 10 565 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 112 15 283 519 0 107.10 39 17.76 CHANGED pP..P.hh...............SsPshssuDsssNEs+hlcLcGt+luuFlVsG-chLCLPQlFshhLKch..uLcplaT+lccLcIsslsCTs-Ql+ILRuLGAI.suVsRCtLIT++DhEpLhsshhsu ..........................................................s.....s..s...pss.pht.s.LcG.plusFh.l...sGpchlCLPQlhss.lLKch...ulpplas+hccLtIhs.spCss-QLcIL+s.hG.uls.uss+Cs.LIT+cDsEpLhpuhht...................... 0 66 89 178 +862 PF01466 Skp1 Skp1 family, dimerisation domain Bateman A anon Bateman A Domain \N 21.20 21.20 21.30 21.20 21.10 20.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.51 0.72 -3.95 62 1119 2009-01-15 18:05:59 2003-04-07 12:59:11 14 21 383 62 707 1038 25 74.70 46 40.61 CHANGED lppWDt-Fl..pl..Dpp.....hLF-lIhAANYLsIcsLLDLsCppV.....AshIK.GKTPEEIRchFsIpNDFTP.EEE...pp.lRcENp..Wsh- ...............lppWDt.cFl...cl....Dps.....pLF-LI........l...........AA..NYLsIKsL.........LD.l.sCpsV.....AsMI.K.......G..K...o.......P.E.E...I.RchF.s.I.p.s.D.aTs..EEE..tp..l+cEstWs................................ 0 222 328 583 +863 PF03931 Skp1_POZ Skp1 family, tetramerisation domain Bateman A, Griffiths-Jones SR anon Bateman A Domain \N 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.74 0.72 -3.87 17 1315 2012-10-02 01:20:04 2003-04-07 12:59:11 10 19 380 90 843 1197 12 61.60 32 39.07 CHANGED phlhLpSSDscsFEV-ctlAhpStsIcpMlEDssssst......lPLsNVsucILsKVlEaCp+aph .....................hlpLhS.oD.Gc..pFpVc.c.p.h..A.p.p.St.T.I....+.s...M..l....c........s.......s...tpsp..................lP.ls.N.Vsup....lLp.K.V.lpa.CpaHt.t................................ 0 254 396 691 +864 PF00395 SLH S-layer homology domain Finn RD anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.04 0.72 -4.21 175 10758 2009-09-11 16:01:23 2003-04-07 12:59:11 15 704 543 2 2552 9714 348 43.60 29 13.80 CHANGED sFsDlsstt..h....pslpthhptG.lhpGhs...ss........p....FpPspslTRu-hu ...........................FsDlssp..t.....ah.......ptl....ptl.s..p......t......G..llpGhs...........sG...............p..............FpPspslTRschA............................................................. 0 1376 2129 2319 +865 PF01423 LSM Sm; LSM domain Bateman A anon Psiblast SMD1_HUMAN Domain The LSM domain contains Sm proteins as well as other related LSM (Like Sm) proteins. The U1, U2, U4/U6, and U5 small nuclear ribonucleoprotein particles (snRNPs) involved in pre-mRNA splicing contain seven Sm proteins (B/B', D1, D2, D3, E, F and G) in common, which assemble around the Sm site present in four of the major spliceosomal small nuclear RNAs. The U6 snRNP binds to the LSM (Like Sm) proteins [3]. Sm proteins are also found in archaebacteria, which do not have any splicing apparatus suggesting a more general role for Sm proteins. All Sm proteins contain a common sequence motif in two segments, Sm1 and Sm2, separated by a short variable linker. This family also includes the bacterial Hfq (host factor Q) proteins. Hfq are also RNA-binding proteins, that form hexameric rings. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.91 0.72 -4.42 129 8102 2012-10-01 22:42:21 2003-04-07 12:59:11 17 62 2734 482 4356 6190 344 67.00 23 58.52 CHANGED phLpphh.......sppVhlpLps..G.pplpGpLpuaDpahNllLs-spEhhttt.....................................ptt.l....s..hllRGsslhhls.. .........................h.....hh.......pp...V..pl..h..Lps........G..h.p.lp.G......p....l..p....ua..D..p.a..h....N..ll.Lp..s.spph..................................................................................................................................h...........t..hh..l.pGpslhhl........................................................................................... 0 1475 2432 3549 +866 PF02463 SMC_N RecF/RecN/SMC N terminal domain Bateman A anon [1] Family This domain is found at the N terminus of SMC proteins. The SMC (structural maintenance of chromosomes) superfamily proteins have ATP-binding domains at the N- and C-termini, and two extended coiled-coil domains separated by a hinge in the middle. The eukaryotic SMC proteins form two kind of heterodimers: the SMC1/SMC3 and the SMC2/SMC4 types. These heterodimers constitute an essential part of higher order complexes, which are involved in chromatin and DNA dynamics [1].\ \ This family also includes the RecF and RecN proteins that are involved in DNA metabolism and recombination. 40.00 40.00 40.00 40.00 39.90 39.90 hmmbuild -o /dev/null --hand HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.52 0.70 -5.11 29 12849 2012-10-05 12:31:08 2003-04-07 12:59:11 14 52 4778 48 3915 12683 5334 665.40 15 93.51 CHANGED hlcclhlpsFKSat.cplhhsFstsFssIlGPNGSGKSNllDAIhFVLG.p.supplRupphscLIat...............pstsssppApVplhF........sppspt.............p..hslpRphhpsGs...ScYhI........NGcsssh.p-ltcLLtptsIshpshphlh.tsthctlthp.spc+p.hc-tsuhsphcpttpcphpplhpppppttph.hphpphptphpphtpppppt.ch.t.ttt...ht..hhhhph..h.tp..p..pp..t.ptpp.t...t..ptptph.tp.pp..t.ppp..p..pt..t.htpt.tt.tpph.php.tp...ppthp.tppp.tphppphpttpt.h.p.pt..p.hthtp.t.ttthpphtp.tttt.tttpp..hthttttpt.ttt.p.ht.tht.h....pphpt..phtpp....hp.hhpt..t..tt.tp..ph..t..t.tptt....t.h..t.t...pt.tt.h.thp.h....t.....tt....t.....t....................h..................h............hh......................h............................................................................................................................................h....h.........t..................................................................p.........t.tt.......t.......h.p...t..t...t..ttp.t.h.tp..t...pt..ttt.h.th....pt....t.tp.tpt.ttp.pp.hpt.tpp.tt..tt.p.p....pp.t...pt.tthtpt.hpt.tht.....ptt..t..p.p...t..t.p...t.pt.p.h..t..t.htt.ttt.tt.pptpp....htp.t.t..pp.pt.t.p.t..h....t.tp..hc....t..p.t..t.p..t.p....t..ttht..tthp...h.th.p.t.p..t.t.phtph.tpcpchhphhhctpppphpphhtthsthspshpthhphhthuuputhphps.psshssGlphsspsssKphpplp.LSGGEKoLsALALlFA.....lpphcPsPhYlhDElDAALD.pNsptlAphl+..ppscp.uQFIVlol+pshhptA....-plhGlhh.csuhsp ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lpplplps.F.t..........sh..............c.p.......h.....p.....l........p......h.......p.....s.............u.........h.....s.....s.l...........s..G..........NGu...........GKSs...l...........l...-......A.......l.....t.......h.......s......L.......G................p........s...s......p.......p.................h........R......s.......s.....p.....h.......p.....s.....l...Itt.............................t.....s.h.......s....t......u.....p.......l...p....h...h..h.......................................pp.p.stt..................................................t.p......l.h.........l..p...R...p.....l.....t......p.....s..Gc...........o..p....h......h.....l........................N...s.....p..........t.......s.....p.....h......p....p.....l...t.....p....h...h......t.....p........h...h......l......s...h...cs.phhttt.p.chhhhp.tthpttht.ttthtphpphhcpppttlpptppptpphtthhp.hthphpthtttt.tttt..t...t...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.tt.httthtthtthhpttpphhtthhtthhth.pth.h.hthhhhhsptt.tths.pshhtsshphssp.stpth.hhphh.S.G...G..E..h.........s..hh.s....L......A...L..h..h..u................................h.t...p......h........p...s......t......P....h.....h...l..h.DEl.-usL.....Ds..t...p.t.p.t....l...u.......p...h.l....p.................p.....h...s.............t.............t........s............Q..h...l...s...l..o.....p...........t....h..h.t...u......p...hhl.h.......t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 1352 2437 3292 +867 PF04158 Sof1 Sof1-like domain Wood V, Finn RD anon Pfam-B_9404 (release 7.3); Family Sof1 is essential for cell growth and is a component of the nucleolar rRNA processing machinery [1]. 25.00 25.00 26.10 29.90 22.10 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.98 0.72 -3.96 28 344 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 289 0 240 346 7 85.70 41 18.60 CHANGED AsASE+lGllosRE+puhpYsptL+E+apahPEI+RIuRHRHlP+slhptpch+pthhcAc+R+EcNc+pHSKsup...shhsE+cK..pll ......ApASE+lGlhssRE+pthpYscsLK-+apahPEI+RIuRHRHlPKsIhpAtchc+hhhc.up+RKEtNcR+HoK.sp..hshhsE+cKhll............... 0 88 137 202 +868 PF01033 Somatomedin_B Somatomedin B domain Bateman A anon Bateman A Family \N 23.10 23.10 23.30 23.20 23.00 23.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.47 0.72 -3.99 73 1147 2009-01-15 18:05:59 2003-04-07 12:59:11 12 76 110 24 540 1024 13 45.40 33 10.31 CHANGED s.pSCpuR...Cspshppst..........tCpCcstChp.hssCCtDYcphChtphs ........................tSCp.sR.....Cscthttsp.......................sCpC-.spChp..hssCC.DYpphCttp.t....... 0 152 189 320 +869 PF03700 Sorting_nexin Sorting nexin, N-terminal domain Finn RD anon Pfam-B_29150 (release 7.0) Family These proteins bins to the cytoplasmic domain of plasma membrane receptors. and are involved in endocytic protein trafficking. The N-terminal domain appears to be specific to sorting nexins 1 and 2. 21.00 21.00 22.00 29.10 20.10 20.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.82 0.71 -4.04 5 151 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 39 0 51 126 1 120.90 46 24.42 CHANGED AuEREPPPlsDscsp........EsE.........-pEEGEDLFTSssSs.cossSSP-suslPuE-uS.TsSNGPKssplLLDDDcEDLFAEAT-EVSLDSPE...RcsILSopsSPAlTPVTPoSlIsPRlE.hu......aDRShEElEEE .............AuER.PPPhsshcsp...........-...E.............p.p-GEDlFT.u.....ss.....s..s........c.......sp..ss.p...psu.LPhps.s.....S......ppN..G....+tpp...Dp-.pDLFA.....-..AT.ElSLDSsp.......+cs.....h.po.....S..ssp.s...T...s.o.oh.t.P................ohEElEEE.............................. 0 2 7 21 +870 PF04130 Spc97_Spc98 Spc97 / Spc98 family Bateman A anon Pfam-B_3531 (release 7.3) Family The spindle pole body (SPB) functions as the microtubule-organising centre in yeast. Members of this family are spindle pole body (SBP) components such as Spc97 and Spc98 that form a complex with gamma-tubulin. This family of proteins includes the grip motif 1 and grip moti 2 [1]. Members of this family all form components of the gamma-tubulin complex, GCP [2]. 27.00 27.00 27.30 27.20 26.30 26.80 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.85 0.70 -6.19 120 1599 2012-10-02 13:20:28 2003-04-07 12:59:11 8 21 316 1 1118 1585 20 536.90 16 64.78 CHANGED -l......LhsL.hGhs.s...hhphpt.........................tpaplspsh....................shps.............llpplhchuphapplp..pahp..tptt...................................uhhhpuhssulpphLp.sahp.hlspl-pphhp...............................................h.oLtplhthlp..phhphlphLtplspp.......h.....................................................t.tthpGuplLshlhpph....t.p.Gs......php.p....hhpplhppstpsa...hc.................................hlppWl..hcG......p....lp....D.....................t...E................FFlpps........................................tttpspphWpp.........p..........apl......pps......................................hl....P..................................sFl..psh...uppIlhsGKslshl+phstptthtpstt............................................................................................................................................................................................t.hp.hpssphpph....................................................................................................................................................................................................................................................................................................................................................................................................lppthphss..pplhpl............lhpph.cLh...pc.Lpsl+pa.......hLl....spG-Fhsphhpphtsplpcsspphpstphpshhptshttsssp......................................................................................................................................................................................................................................................................................................tptpthpttsstthtptpshpuhsshsLpYpls..hPLslll..otp............................slppYpplF............paLhpl++sp........htLs.phWtp.p...........t............................t..tth...hpph.......hhl+pchhpFlpslptYhh............h-......VlpssWpp.hppplp....................................................................................................................................pspsl-p.l......hptH.ppaLsslhppsh ......................................................................................................................................................................................................................................................................................................................................................................................................................hlhhh..Gh..u...hh..hp.....................................................th..h..t.......................h..s.pp...................................lhp.p.lhthu.hhh...tlp...pahp....t.........................................................................................................................u.hhpuh.ssul.p.p.hLp..paht...hlh.p.lcpp.h................................................................................................................h.oL..plhhhhp.....hhh.hh..ph..Lhtl..spph.........................................................................................t.hpus..tllshlhpth.........tp.Gs........................ht.p.................hh.plh.t.tst...psa...hp.............................................hlppWl....hpG...............................l.......D..........................s.................hp.......E................Fh...lppp..........................................................................t..tpphapp.............p......................................................apl..........ptp..............................................................................hl.......P...............................sa..l....tph......upplhhs.GKt.l...p.h...l......+ph......st....t....t..h..tt.....t...................................................................................................................................................................................................................................................................................................h......th.th.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lp.pthth..s....s.ptlh.ph..................hh.....p...ph...p.....Lh........tp.Lp.sl.+p.a.......hLh.....tpG...-hhtt..........hhp................h.....p........tl...p.p..s......p..t...h....t....h.....t...hp.s...h...h..p....s..ht..t..t....p..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.....h...p...t...t...h..t..............................s.......t...shs.shpL.p.....Y.p.l.....hPlshll...s.p..............................shppYphlFpaLhpl+chph.Lp..ph....aht...........t.......................................................................................hpth..........................hhlptchhh.Fl.pplt.Yhh................hp............................llpspapp..h.pplp...........................................................................................................................................p.s.p.sh-p.l...hthHtpaLsphhtt..................................................................................................................................................................................................................................................................................................................................................................................................................... 0 419 621 910 +871 PF04435 SPK DUF545; Domain of unknown function (DUF545) Finn RD anon Pfam-B_429 (release 7.5) Family Family of uncharacterised C. elegans proteins. The region represented by this family can is found to be repeated up to four time in some proteins. 22.10 22.10 22.20 22.10 21.80 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.20 0.72 -4.09 37 379 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 5 0 372 368 0 102.60 21 29.69 CHANGED hs+hMsFLl-pTKDs.sE...Pls.spplFp-Fsch-ssshs...tpsYhp+F+ppLAPpMsphssYSIEpRlRlMFuhuucVp--.FLpplcppGs.VpLD-ppRIs+YsSpDGplpL .........................................h...hhpalhppspph..p.....Ph.....htplhcca.tpp.p.t.s..tts.............hpsh.hp....+hp..p..tl.t.....plpt...h.p.p....ashcs+l+hhFshus.sVs.p.....p...FL....p................c....L............c.p...p.u.p..V..p...l...D.c..p.pR...I..h.cYpu.pctth................................................ 0 69 86 372 +872 PF04014 Antitoxin-MazE SpoVT_AbrB; Antidote-toxin recognition MazE Kerrison ND, Bateman A, Finn RD anon COG2002 Domain MazE is the antidote to the toxin MazF of E. coli. MazE-MazF in E. coli is a regulated prokaryotic chromosomal addiction module. MazE antidote is degraded by the ClpPA protease of the bacterial proteasome. MazE-MazF is thought to play a role in programmed cell death when cells suffer nutrient deprivation [2], and MazE-MazF modules have also been implicated in the bacteriostatic effects of other addiction modules [3]. MazF toxin functions as an mRNA interferase, cleaving mRNAs at ACA sequences to inhibit protein synthesis leading to cell growth arrest [4]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.13 0.72 -4.18 145 4979 2012-10-01 20:57:08 2003-04-07 12:59:11 13 18 1997 32 1417 3448 534 45.50 23 42.49 CHANGED hpsssshtlslPpchtcph.....slct...Gcpltlhhp..ss..tlllp.hppttp ..............+hsppstlsIPpclc...cpl........slpt......G-plplhsp...ss..plllp.ht....st.......................... 0 482 933 1183 +873 PF00622 SPRY SPRY domain SMART anon Alignment kindly provided by SMART Family SPRY Domain is named from SPla and the RYanodine Receptor. Domain of unknown function. Distant homologues are domains in butyrophilin/marenostrin/pyrin homologues. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.61 0.71 -4.04 68 8383 2009-01-15 18:05:59 2003-04-07 12:59:11 23 306 478 24 4618 6876 377 121.50 19 17.99 CHANGED sG+aYaElcltstst........hplGhsppss..................................................................sttp...hsucc.tuauapstt.pphtssps........tthspthpp.........schlGshlDhpss....plsFt.tNGptht......hsFp.psphs...thlaPhhsht.....spthphphstht .........................................................................................................................................G+aYWEVclsspst..............................htl.Gl...s..ppsh....................................................................................................................................................t...s...t..t..p.....h..hu.p...s....t...s..a.s...h.....p.h...p....s.......p..t...a...t.s.tps....................tph..s....thpt............................................scplGlh...l.....D.....h..csG.................p..ls.Fh...p.s.s.pt...t.............hhsFp......h...phs.............ts.l..aPhhsh....................................................................................... 0 767 1604 2913 +874 PF03105 SPX SPX domain Mifsud W, Bateman A anon Pfam-B_502 (release 6.5) Domain We have named this region the SPX domain after (SYG1, Pho81 and XPR1). This 180 residue length domain is found at the amino terminus of a variety of proteins. In the yeast protein SYG1, the N-terminus directly binds to the G- protein beta subunit and inhibits transduction of the mating pheromone signal [3]. This finding suggests that all the members of this family are involved in G-protein associated signal transduction. The N-termini of several proteins involved in the regulation of phosphate transport, including the putative phosphate level sensors PHO81 Swiss:P17442 from Saccharomyces cerevisiae and NUC-2 Swiss:Q01317 from Neurospora crassa, are also members of this family [see 4,5]. The SPX domain of S. cerevisiae low-affinity phosphate transporters Pho87 and Pho90 auto-regulates uptake and prevents efflux. This SPX dependent inhibition is mediated by the physical interaction with Spl2 [6] NUC-2 contains several ankyrin repeats Pfam:PF00023. Several members of this family are annotated as XPR1 proteins: the xenotropic and polytropic retrovirus receptor confers susceptibility to infection with murine leukaemia viruses (MLV) [1]. The similarity between SYG1, phosphate regulators and XPR1 sequences has been previously noted, as has the additional similarity to several predicted proteins, of unknown function, from Drosophila melanogaster, Arabidopsis thaliana, Caenorhabditis elegans, Schizosaccharomyces pombe, and Saccharomyces cerevisiae [1,2]. In addition, given the similarities between XPR1 and SYG1 and phosphate regulatory proteins, it has been proposed that XPR1 might be involved in G-protein associated signal transduction and may itself function as a phosphate sensor [1]. 26.90 26.90 26.90 26.90 26.70 26.80 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -4.62 168 2197 2009-01-15 18:05:59 2003-04-07 12:59:11 14 120 337 0 1617 2267 25 161.90 18 28.52 CHANGED MKFu+pLppphl.....PEWpsp.....YlsYcpLKKhl.+plpptttpt....................................................................................................................................................................................................................................................................................................................................................................................ttpthpp........cspFhptL.-pELpKlssF....................................ap.......p+tp-hhc............+hp..........tLppplpphttptstttttt......................................................................................................................................................................................................................h.pthtpLcptlhchhtplptLcsatpLNhsuFpKIlKKaDKhh...spp..hptpahpphh ............................................................................................................................................................................................................................................................................................................h+Fup.h..t..................................ta....t........ahtYp..................hKp...l.h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.Fh....h.p.phtc.......l..a................................................................................................................p.tth...................................pht................h.......t.......................................................................................................................................................................................................................................................................................................................................................................................................................................h..th..thh.th..L.pa..............t.lNhhu.h.KlhK........KaDKhh..t..............t...................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 468 969 1406 +875 PF03125 Sre C. elegans Sre G protein-coupled chemoreceptor Mifsud W anon Pfam-B_352 (release 6.5) Family Caenorhabditis elegans Sre proteins are candidate chemosensory receptors. There are four main recognised groups of such receptors: Odr-10, Sra, Sro, and Srg. Sre (this family), Sra Pfam:PF02117 and Srb Pfam:PF02175 comprise the Sra group. All of the above receptors are thought to be G protein-coupled seven transmembrane domain proteins [1,2]. The existence of several different chemosensory receptors underlies the fact that in spite of having only 20-30 chemosensory neurones, C. elegans detects hundreds of different chemicals, with the ability to discern individual chemicals among combinations [1]. 20.00 20.00 20.00 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.17 0.70 -5.53 19 368 2012-10-03 04:04:29 2003-04-07 12:59:11 13 8 32 0 339 323 2 246.30 15 87.61 CHANGED Mllphtss.....hhalPlahhspsh....hphlhslh-llhallssYlhhhslhlhh+lphFHpNhhhlslshhshWaEhllGKhlshsYchtlh....sht........hphhthWTs-.schl.VpshsslphlhhuuFLphHah.hohlaulhslslERshAShhlcsYEpps+haIs.lhLlhltQhluIshu..hhhlhphls.hlhhhhsahlssslushhahhlcphNpphppchcs.p.+...+..haTLSpRFQlKENl+sl+lhp+llhsllshlslsshslhhLhacl.....hP...s.hhsahhEshlhl.PhhIshshhholstW+pcapphhs.hhh...hc...hhptphthp.h.h.psppcphphETchYFpQLscSWp ....................................................................................................................................................................................................................................................................................................................h.....h......h...............hh...h.h......hh.hhhhERhhAohhhtsYE...p..p............p.............h........l...h.h...l......h.....h.h.......................hh........s..h...........s................h.h.h.h.........................h........h..............h...............h....h....h............h.....h.......hh.......h...........hh...............shh.............h.....h......h..h.h.hh.....hN.phh.p..p...h....p..t............t.............Yo.Lu.t+aQlpENl+.shp................hh..pt.h....h..h.s.h..h..h.h.h.h............h.h.hh.h....h....h..h.........h.p.h.............................hhhh.h...h.p.....h....hh..h..sh...hh.....s.h...hhh...............tap.p..h......h.....................................................................................................p......p.....aF..h............................................................................ 0 115 150 339 +876 PF04086 SRP-alpha_N Signal recognition particle, alpha subunit, N-terminal Wood V, Finn RD anon Pfam-B_7342 (release 7.3); Family SRP is a complex of six distinct polypeptides and a 7S RNA that is essential for transferring nascent polypeptide chains that are destined for export from the cell to the translocation apparatus of the endoplasmic reticulum (ER) membrane [1]. SRP binds hydrophobic signal sequences as they emerge from the ribosome, and arrests translation. 25.40 25.40 25.70 25.40 24.80 25.30 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.96 0.70 -5.02 27 323 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 265 2 237 320 1 243.40 23 40.50 CHANGED ssslNuLIpsVhlEE+s.............usss.ap+-....paTLKaphspEhsLlFVslYQplLpLsalDKLLssl+phFh-hY..................cspLp..p.psph.th....pFcch.Fcphlc-hEpsupt.tpssphhpphcpppcs.hospshh...s...t.........tpp.tttptt.pspsssssssp.t..s.........lhstc.h......ppRt+hhtphtst...........sssscptos+pstts+...pshKchRhWDtsG.....spc-stsLDaS....t.ss.tt..ssssss.ptlstpshtptTtKGp.hh.tDl......s-Esc .......................................................................s.tslNsLIpsshlp.ERs................usst.a..pp-.....thsL+aphsN-htLlFVssaQplLp..L...salDcLlssl+ph.FhchY........................tsplt...t......h.t............pFsp....Fpphl.....pph.Eppup....pts....t.h...pphpp....s.p.c.stt..s.stshh..t.st..t............................tttttttt...tt....t.....s...psp.s....s.ptt.....t......................h.ptchh...............ppRtchhppttt................................t.pts..p....t.s.....ppt.t..p...pttKp.Rh.Ws.su......pppstt.LDaS.........ssttt.....t....t....t.ht..ttphh.tp..p.G.p.h..h-h.........tp...t......................................................................................... 0 80 131 196 +877 PF00448 SRP54 SRP54-type protein, GTPase domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family This family includes relatives of the G-domain of the SRP54 family of proteins. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -10.97 0.71 -4.87 55 11362 2012-10-05 12:31:08 2003-04-07 12:59:11 17 39 5002 94 3068 9263 5175 195.50 41 44.83 CHANGED ssVIhhVGlpG..uGKTTThuKLAphhpcc...Gp+lhllAuDTFRuuAh-QLcphupphsl.hhs...tttts...DPsslshculppuKppt..hDllllDTAGRlppcpsLhcELpclpcll........pPs-slLVlDushGQsuhs.QA+sFp-sl.s.lsGlIlTKlDGsA+GGusLSlstthphPItFlGsGE+l.-DLcsFcscpals+LLu ...............................tVlhhlG.lp..G..sGKTTolGKLA...p...h.h.......p.....p........p...........s.............++....VhLsA.u..D..s....aR..s...A.A...l....-..Q..Lp.s.h.u...cp.....s..........s.........lslhs........................ttpss....cPss......l....sh.........c..Al.p.p..A..+....t..c.t.............hD..lll.....lD.T...AG.RL.p...h..c.......ps.LMpE.L.c.c..lp..csh...............................s.P..p.Esl.LVl.DAhT.G.Q.s.A....l..............s..p.Act...F..........s.....-........s............l...............s................l..o.....G..ll.L.TKLDG.s.A.+.G.Gs.slol..t...........p.....h.s.......h...P..I..+a.lGsGE+l..-DLcsFp..s-chsstll................................................................................................... 0 1055 2004 2604 +878 PF02881 SRP54_N SRP54-type protein, helical bundle domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.47 0.72 -3.87 189 9665 2009-01-15 18:05:59 2003-04-07 12:59:11 14 28 4872 82 2570 7341 3204 77.20 29 17.42 CHANGED L.ppsLp+shpplssp...hhh.sccpl..cchlc-lEptLlpADVuhpsspcllcpl+c.t....ht.............tptlsstpplpphlpcpL ...........................LpcpLpcohps..ltup...........shl..sccsl...c-hhcElctsLLpADVulplscchlppl+c.tlu................hcslssspplhcllp-EL................................. 0 854 1649 2165 +879 PF02978 SRP_SPB Signal peptide binding domain Griffiths-Jones SR anon Structural domain Domain \N 20.10 20.10 20.10 20.10 19.40 19.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.52 0.72 -3.77 124 4974 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 4793 40 1356 3784 2145 100.10 43 21.33 CHANGED pFsLcDhhpQlpplpKMGslsplhpMlP.Ghsth......p.tp.ths.-+thc+hcuIIsSMT.cER..psPcll.................................ss...SR+pRIApGSGsslp-VscLl...KpapphpchM ...........................................pFsLsDFl-QlpQh+pMGshsslh....sMlP..G.huph.........t.t.p.phs..-+plp+hcAIIsSM.T.pER.....psP...-.ll.............................................ss.......SR++RIAtGSGsslp-..VN+Ll...KQFpphp+MM.............................. 0 465 879 1145 +880 PF00436 SSB Single-strand binding protein family Finn RD, Bateman A anon Prosite & COG2965 Domain This family includes single stranded binding proteins and also the primosomal replication protein N (PriB). PriB forms a complex with PriA, PriC and ssDNA. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.88 0.72 -4.16 70 10092 2012-10-03 20:18:02 2003-04-07 12:59:11 20 7 4898 110 2030 6702 2778 102.00 30 65.61 CHANGED lN+VhL..lGpLspD..P-lRhst.sGsslssFolAssc...phps.......tsptcpps-ahplslasc.hAEhhspYlpKGs.hlhVpGcLps.cpapspp.G....pc+hss-lhsc....plphL ...........................................hN+VhL..lGpLspD......P.E..l......R.hss....s....G......s.....s.....V..u..shslAssc.......paps...................tsp.tc..cp.s-..a..hsllh..a...s....+....h..........A...E....s.........s.s.........p.....a.....l........p.K.G..s...hlhlpG.cl......p.......o..Rp..ap..spp..G............pc.c..h.s...sElhsp.....pht.............................................. 0 640 1330 1739 +881 PF04503 SSDP Single-stranded DNA binding protein, SSDP Mifsud W anon Pfam-B_2031 (release 7.5) Family This is a family of eukaryotic single-stranded DNA binding proteins with specificity to a pyrimidine-rich element found in the promoter region of the alpha2(I) collagen gene. 21.70 21.70 22.10 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.22 0.70 -4.67 5 325 2009-09-11 22:41:17 2003-04-07 12:59:11 8 5 59 0 134 249 0 206.60 62 77.49 CHANGED EHSSEAKAF.............................HDYSAAAAPSPVLGNhPPGDGMPsGPlPPGFF..........................................................................QPFM....................SPRYPGGP.....RPPLRhPNQPsGGVPGSQPL.....LPuGMDPT.RQQGHP...............NMGGPMQRMTPPRGMsPL..GPQ................NYGGuMRPPsNuL....uGPGMPGMNMGPGuG...................................RPWPNPsoANSI.......PYSSuSPGsYVGPP.................................GGGGPPGTP..IMPSPADSTNSuDNMYTlMNsVPPGssRuNFPMGPGSDGPMGGhGGMEPHHMNG..............................SLGSGDMDSlSKNSPNNhSGLSNs.....PGTPRD.........DGEM..GGN ..........................................................................S.............AA.u.........A....PSPV......hG.shs.Ps.-u..M...ssGPh.s.P.GFF.....................................................................................................................................QPFM....................SPRas.GGP.....R..PslRhs.sQ....s.u..ul.PGoQPL.....LPsuMDPT.Rt..Q.G.HP............................sMGG.sMQR...M.sPPR.G..M..ss......h..GPQ.......................sYGu..uM..R..PP.Nu.L.............GP.u.MPuhNM.GPG.u..t.......................................................t.PWs..sP.su.NSI.......sYSSuSPGsY.s.GPP.................................GGGGP..P..GTP..IMPSPu.DSTNS......u.-NhYThhNs.lsP......GssR..sN......F....PMGPGs-.G.PM...uuh.u.uM..EsHHhNG....................................................SLGSGDhDul...sKsSPs.shu.hsN......PGTPR-.........DuEh..s...................................................... 1 13 25 69 +882 PF05030 SSXT SSXT protein (N-terminal region) Moxon SJ anon Pfam-B_4900 (release 7.6) Family The SSXT or SS18 protein is involved in synovial sarcoma in humans. A SYT-SSX fusion gene resulting from the chromosomal translocation t(X;18) (p11;q11) is characteristic of synovial sarcomas. This translocation fuses the SSXT (SYT) gene from chromosome 18 to either of two homologous genes at Xp11, SSX1 or SSX2 [1]. 21.00 21.00 21.20 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.11 0.72 -4.39 9 329 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 133 0 185 312 0 63.70 52 20.29 CHANGED RG............tsplsppslQ+LLDENspLIpsI.-YQNKG+AsECsQYQplLHRNLlYLATlADusps.hpspts ..................tls.ttIQ+hLDENppLIpsIh-hQN.....KGKss.....ECs.Q....YQphLH+NLlYLAoIADus.s.t...s............. 0 45 78 128 +884 PF01852 START START domain SMART anon Alignment kindly provided by SMART Domain \N 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.20 0.70 -11.09 0.70 -5.00 18 2507 2012-10-02 19:24:03 2003-04-07 12:59:11 14 50 457 40 1330 2394 50 186.50 16 33.31 CHANGED plttpshpphhphspsspssWhp.s......t.pstspshpphs.s.........pscshRtpshV.hsss..lspplhcsh.....pWsppht....pspslpslssu.......sslphhhsphhs.sPlsP.R-ahhlRht+p.......ssssaslsshSlsssptss....ptshlRscphsSGhllpssssGh...........oplsalp+s-lcuphs..p.lh+slhpsuhshss+p..hhusLpptspp .................................................................................................................t....................t....W............................h.t...h................................................h.h.h.+.h..........s....h...l.....hs.st.........h.ht....plhpp..........pWspphh.........................ph.p.ll.p.p.l.s.ss...........................stl..h....h......h..h..h..p..h.......h..................s...h..s..........s..RDh.l...h..l.R..hhpp.......................tss.s.h...h.ls.....s..h.....Sl..p.............ts........p........h.........s............ttshl..R...up....h....h.s..u.s..hl...l....ps....h...s..ss............................s.p....lsal..h..p..s-.........p...u...h.h.s.....t.l.hp.hhttth........hhp..h.............................................................................. 0 408 676 997 +885 PF01740 STAS SpoIIAA; STAS domain Bateman A anon [2] Domain The STAS (after Sulphate Transporter and AntiSigma factor antagonist) domain is found in the C terminal region of Sulphate transporters and bacterial antisigma factor antagonists. It has been suggested that this domain may have a general NTP binding function [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.28 0.71 -10.12 0.71 -4.67 68 10190 2012-10-02 18:52:36 2003-04-07 12:59:11 16 126 3448 44 3587 10260 1791 111.40 16 25.99 CHANGED thphpttssl....hllclsGs.lshssustlpcpltphhtpt...................................................................................phlllDhssl..shlDosGltslhphhcphp.ppusplhlssspspltch.........lptsuhh.chh..........plhtolppA ..............................................................................................................................................................................h........sh.......hlh.p.l.p.G.s..l...as..s.u.p.t.h.pp.p.l....t..p...hhptt........................................................................................................................................................................................ptlll..D.........hs.s.l..shlD.sou.l.p.s...L....tp..h.h......c....p.....hp........p..........p....u....h..p...........l.h.l..s...s......l......p..s..p...lt..ph.........lp.t..s.s.h...t............................................................................... 1 1204 2247 3019 +886 PF03015 Sterile Male sterility protein Griffiths-Jones SR anon Pfam-B_1115 (release 6.4) Family This family represents the C-terminal region of the male sterility protein in a number of arabidopsis and drosophila. A sequence-related jojoba acyl CoA reductase is also included. 21.40 21.40 21.40 21.60 21.10 21.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.03 0.72 -3.72 13 894 2009-09-15 11:21:16 2003-04-07 12:59:11 14 22 172 0 591 1002 19 92.10 26 18.78 CHANGED haHhLPAhhhDlhhhLh..GcKPhhl+.la+KI+...pslslLphFshspWpF-scNhpcLppphs.pD+c...hasF.DhsslsWc-YFpps.lhGhRpYLhKE ...............................................hahlPAhhhDhhhhlh.......Gp...c.....s..h....hh+...lh..p..+lp.......cshph...hpaFsh.p...p.....W......pF.ss.sN...h.pp.L.h..p..p.....h....s...s..p.Dc.c.............hFsa.Dhpp...lcW.p..c.Yhpsh.lhGh+p.Ylh+-............................. 0 181 263 487 +887 PF02544 Steroid_dh 3-oxo-5-alpha-steroid 4-dehydrogenase Bashton M, Bateman A anon Pfam-B_1713 (release 5.4) Family This family consists of 3-oxo-5-alpha-steroid 4-dehydrogenases, EC:1.3.99.5 Also known as Steroid 5-alpha-reductase, the reaction catalysed by this enzyme is: 3-oxo-5-alpha-steroid + acceptor <=> 3-oxo-delta(4)-steroid + reduced acceptor. The Steroid 5-alpha-reductase enzyme is responsible for the formation of dihydrotestosterone, this hormone promotes the differentiation of male external genitalia and the prostate during fetal development [2]. In humans mutations in this enzyme can cause a form of male pseudohermaphorditism in which the external genitalia and prostate fail to develop normally [2]. A related enzyme is also found in plants is Swiss:Q38944 (DET2) a steroid reductase from Arabidopsis. Mutations in this enzyme cause defects in light-regulated development [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -11.02 0.71 -4.44 10 1178 2012-10-01 22:51:20 2003-04-07 12:59:11 11 15 411 0 741 1677 1096 142.80 25 50.78 CHANGED sshPlhhlstAsh.FsshNGhlQuhahuaats........s-sahs..+hllGhhLalhGMhINh+SDpILR....pLRKsGpsuY+IPp.GGlFEYVSsPNYFGElhEWlGaALssWShsuluFAh...FohssLssRAhpHHpWYp+cF.ccYP+oRKhLIPFla ..........................................................................................s....h..hh.s....a.h.hs.h.............h..h...............................................h.....................t...h...h..h......u.h.hlF.h..h..u...hhNhpsc..hh.Lt..........................sL...R.....p......s......u......p..........p.......t...........a.....p......I.P......p......u......t....h.....F....p....h....VosPNYhhEl.l.t...W...l.u..............a...s......l.........h........s.........t.......s..........h........s.........s....h......h.....F....h...h.................h.s...h.....s.....p..hs..........h.....A.......h......t.p.....+....c...............h....Y...h...c..c.......F......p......c....Y....s.....p.p..R...t......s....llPalh.................................................................... 0 231 405 592 +888 PF02910 Succ_DH_flav_C succ_DH_flav_C; Fumarate reductase flavoprotein C-term Griffiths-Jones SR anon Structural domain Domain This family contains fumarate reductases, succinate dehydrogenases and L-aspartate oxidases. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.29 142 8079 2009-01-15 18:05:59 2003-04-07 12:59:11 15 23 3914 101 2187 5856 3967 117.20 28 20.41 CHANGED +pcLpchMpppsGlhRsppsLpcul........pclppL.pcch.p.p.ltl.t-puthhNssLhcslELtshlphAphhshuAlpRpESRGAHhRpD..aP.......................pRDD.................psah+HTlhhhttps..............l..phcsVphp.................PptRsY ..................................................pclpphMt.c.t.s....u.....l....h.....R.....s.....s.....c..tL.....pcu.l................ccl.p.t.l...p..pc.........h.........p.....p.........h.............t.............h....................c.............p.......s.............h.....h......N.............p.....s............h...h.p.sl...EL.t...shlplAphhshuAhtRcES..R..G...u..Hh..R.......D.....aP......................................cRs.D...................pah..ppsh.h........t.tt.....................h.th.p.l............................................................................. 0 688 1387 1840 +889 PF00884 Sulfatase Sulfatase Bateman A anon Pfam-B_784 (release 3.0) & Pfam-B_7393 (Release 8.0) Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.00 0.70 -5.24 59 18763 2012-10-03 20:55:17 2003-04-07 12:59:11 18 130 3819 26 4719 15548 10237 301.40 17 53.82 CHANGED .sllllluEohtts....shshhuhsh...................ts...sPtl.........tths..ppGlhassha..ussshTssoh.shlouhs.tphs....................shhpp.sslsphhppt.GYpo.hht....shhspps.............................shp..hhsh.s.tsthtptt..shh................................................................................s.Dptlhsphhph.....hppt.scshalhlthhusH.................hhsscaspphtth.s.h.t.........................................................................phhssYssslthsDphlspllptL..pthtcsThllah.uDHG.tl......................................................th.htsttt.tt.ptthplPhllahssthtp...........ttscthhu....p.Dlhsollshs.uh ......................................................................................................................................................................................................................................................................Nllhl.h.....s-.shtht........................th.s.h...h..u.....th.............................................................TPpl.........stht................ppu...h...h...F.s.......s........h...a..........ss...s.........s......h....s........s..s.......o...h......s...s...h.h...o..u....h...h...sh.p....h...s...............................................................ths..tp...t..s...l.....s.p.h......L....p.....p...t....G..Y...p...o...........h....h.....t..t..................s....h.....h...s..pp.t........................................................................t.sa....p..........h...h......s.....................h.....t.....s....t...h.......t....t......t...........t..........................................................................................................................................................t..h..h.s...p...h....h.....h...p...p...s..h.phl...................ppp...ppPa.h.l....h.......lt.h.....hs.s..Hh.....................................................h.s...p..c.....a......p..p.....h.....t....t....h............................................................................................................................................................phhs.s..Y..h.s..s..l..p.....h..h....D.p...t.l...u...pl....l............p..t...........L.....c...p........p......s........h...........h........c...s...T.....ll..lah..uD..H.G..t.....h.tt...................................................................................................................................th.h.t..t..t..p..t.............h..h.....p..t..t..h....+...l...P..h.......h......lh..h..s..s..t..htt....................................th.pt.h.h.s..............thDl.hsTlhphh.s........................................................................................................................................................................... 0 1780 2902 3885 +890 PF00685 Sulfotransfer_1 Sulfotransfer; Sulfotransferase domain Bateman A, Griffiths-Jones SR anon Pfam-B_87 (release 2.1) & Pfam-B_1885 (Release 7.5) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.73 0.70 -5.14 45 4587 2012-10-05 12:31:08 2003-04-07 12:59:11 22 172 680 97 2665 5498 3822 230.10 16 64.36 CHANGED ssDlllsoaPKSGTTWlpcllsh...............lhspsch....phhppshl....hsphPhlEh............................................................................shtphpph.uPR.lhp..oHhshthhs.shhpspsKlIYlsRNP+DshVShYaatphht.............hh.tsss..ap..........phh-hFhp..............GplshGsaacHlcsWhp..hppppplLalpYE-h+p-Pppplp+ltcFLGtshs......t.lppllcpsoFpshKss.ssshs...tt..........p.ssahRKGhsG-W...+sahTs.p.s-chDchac.......cphpsss .................................................................................................................................................................................................................phhlhsh..+..s.......G...T..s...h.h..p.hlt...............................................................................h.p.ht....................p.p...........h........................p....h..h........h......l.p...h.............................................................................................................................................................t.......t.....h....t.....t...h.......................h.........................................................................................................................t...............p............s...K....l....l....h..l.h..R.s.P..t.-...h...h...hS...h...a..p..h..h..t.t.ht...................................................................................................t......hp...........................................ph.h.....p.....h.h......t.................................................................................s..t...h.......h.....u...........a...h...p....a....l...p...t...a...hp.................hh........t...p..p....l.......h..h....l..p..Y..E-........h........h........p......c........s....t..t...p...l...pc.l.h.c...F.L..Gh..s........................h...t....h...h..p..t..h..h.p.....h...p....t...........................................................t...h.h.tp..s......tth.........h..hs...t..t.hpt.h................................................................................................................ 0 952 1358 1975 +891 PF03567 Sulfotransfer_2 Sulfotransfer2; Sulfotransferase family Finn RD, Bateman A anon Pfam-B_3050(7.0),Pfam-B_5394(7.7),Pfam-B_7836(10.0),Pfam-B_5040(7.5) Family This family includes a variety of sulfotransferase enzymes. Chondroitin 6-sulfotransferase catalyses the transfer of sulfate to position 6 of the N-acetylgalactosamine residue of chondroitin. This family also includes Heparan sulfate 2-O-sulfotransferase (HS2ST) and Heparan sulfate 6-sulfotransferase (HS6ST). Heparan sulfate (HS) is a co-receptor for a number of growth factors, morphogens, and adhesion proteins. HS biosynthetic modifications may determine the strength and outcome of HS-ligand interactions. Mice that lack HS2ST undergo developmental failure only after midgestation,the most dramatic effect being the complete failure of kidney development [1]. Heparan sulphate 6- O -sulfotransferase (HS6ST) catalyses the transfer of sulphate from adenosine 3'-phosphate, 5'-phosphosulphate to the 6th position of the N -sulphoglucosamine residue in heparan sulphate [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.78 0.70 -4.70 66 2144 2012-10-05 12:31:08 2003-04-07 12:59:11 9 38 506 1 1196 2133 483 218.30 15 67.30 CHANGED pchhlssca.pllaChlsKsusoshpplhhhLts.................hpps.phs.....pshsp.p..h...ptthtphsphp.pph.tpthp.t........................................hhpFhhVR............cPhcRllSuapsKhh.........htphh.upphhphhc................t.....t.spapcFhphlhstt.......pht......hsp...H..............ats.....htp.hshsshh..cachluphEshp....cctshlhphhsh....shth....phsp.................pppppsssphttphhhphs.thhctlhclYph.DatlF.sYs ..........................................................................................................................h...........ll.ahhl.Ksusosht.phhh.h.h.t...........................................................ht.......th....................tth.h.......................................p............hh..ph....s.ph.....tt.......ht..p...............................................................................................................................hhhhhhlR.............cP...hp...R..h...lS..t...a.h..t.h.h..........................t..h.........t.t..h.hpthp..............................................................t....hshp..p...F.h.p...h..l.h.s.t....................t...........s.............p......................................................h.....................ht..h.p.....h..h......pa.s.h.l.Ghh...E.php............pp..h.t....h.l...p.h..h.sh...............h...........ph.t..............................................p.......t.......h..t.h......h..s.........t.hphh...phh.h.Dh.ha.ta............................................................................................................................................................................. 1 489 622 928 +892 PF04935 SURF6 Surfeit locus protein 6 Moxon SJ anon Pfam-B_5497 (release 7.6) Family The surfeit locus protein SURF-6 is shown to be a component of the nucleolar matrix and has a strong binding capacity for nucleic acids [1]. 22.00 22.00 22.10 22.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.72 0.70 -5.04 29 306 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 262 0 222 303 4 204.60 26 49.88 CHANGED hppRptllcp+R++cctc+p+++.pcRpct+ppcp...spcsctpsspppttts................................pssstpstsslsFsplpFs-sptsspshphhcpptpph.......t..shKphLp+lEs++p+LpphcEp..............Ktt-lc-KppWppAhh+AcGhKl+DDp+LL+KALKRKEppKcKSppcWpERpppVpcphppRQKKRc-NLpcR+csKtpK+hppt.+pKG..+hh ...........................................................................................................s....thhpt++p+c.ctc+t++.+.pp+p.ct+tccp...........tpc.tp.t.t.spttpt.t..t.........................................................................tpsttp..tssslsF.uplt..hs-spths...pphpphp..p...t.t.t...............p....shcp.hL.pclct+cp+Lpplccp..............Kttclcc..KptWpsAht+AcG.K.....V+..D...D........cLL+KulKRK-ppKcKSpcc..Wc-RpctVpcph....pp....RQcKR...c...c.Nlc.pR+cpK....t..p++hppt.ppKt................................. 0 74 123 186 +893 PF01805 Surp Surp module Bateman A anon Bateman A Family This domain is also known as the SWAP domain. SWAP stands for Suppressor-of-White-APricot. It has been suggested that these domains may be RNA binding [1]. 21.10 21.10 21.10 21.10 20.90 21.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.50 0.72 -4.31 64 1689 2009-01-15 18:05:59 2003-04-07 12:59:11 15 52 309 8 1115 1679 12 53.40 30 10.94 CHANGED .hplIcpoApaVscpG.pFEt....hlhp+p...ps.spFsFLp..ss.sshasYYph+ltphp ................hplIc+hAp.F.VA..+..sG......p..FEp......hlh.p+p.....tpN..sp.........F..s.FLp......sp.ss....hasYYph+ltph.............. 0 351 557 826 +894 PF02201 SWIB SWIB/MDM2 domain Bateman A anon Bennett-Lovsey R Domain This family includes the SWIB domain and the MDM2 domain [1]. The p53-associated protein (MDM2) is an inhibitor of the p53 tumour suppressor gene binding the transactivation domain and down regulating the ability of p53 to activate transcription. This family contains the p53 binding domain of MDM2 [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.54 0.72 -4.30 41 1464 2009-09-11 13:54:42 2003-04-07 12:59:11 13 39 457 79 836 1368 324 74.50 28 17.21 CHANGED hs+.h.lo.pLtphlGssp...h.oRs-llctlWpYIKp+sLQDPpsK+hIhCDppLppl..Fts.cp.lshhplschLspHhh ..................................t.htlsspLtp..hl....Gtpp.......h.oRspllptl.WpYIK.........ppp....L.............QD.sps+.......+.h.....Ih..C.D.cp.Lpp.l......Ftt...pp......l...phh.pl.schl.stah....................... 0 228 451 658 +895 PF04434 SWIM SWIM zinc finger Aravind L anon Aravind L Domain This domain is found in bacterial, archaeal and eukaryotic proteins. It is predicted to be organised into two N-terminal beta-strands and a C-terminal alpha helix, thus possibly adopting a fold similar to that of the C2H2 zinc finger (Pfam:PF00096). SWIM is thought to be a versatile domain that can interact with DNA or proteins in different contexts [1]. 19.60 5.00 19.60 5.00 19.50 4.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.58 0.72 -4.45 88 3643 2009-01-15 18:05:59 2003-04-07 12:59:11 12 91 1791 0 1241 3096 130 38.50 23 6.69 CHANGED apVplt......................ptsCoCthaph......pthsCpHhlAlhhtht .............................................................tspCo.Cs........................tsh.hC+Hhlulhh...h........... 2 294 711 981 +896 PF04433 SWIRM SWIRM domain Aravind L anon Aravind L Domain This SWIRM domain is a small alpha-helical domain of about 85 amino acid residues found in chromosomal proteins. It contains a helix-turn helix motif and binds to DNA [1]. 26.10 26.10 26.10 26.20 26.00 26.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.82 0.72 -3.86 79 1274 2009-01-15 18:05:59 2003-04-07 12:59:11 12 43 286 28 857 1242 5 86.30 27 11.90 CHANGED hhhsshu...shhshs.plpphE..pphhschhh..................................ssp...sYlphRshllppa..phpssphLshoss..........+csht......Dsstlt+lapFLpcaGhINa .....................h....shsshhsh..s...pl.pshE..pphhschhtt....................................................sPp....hYLshRshhl..pp..a.....phs..st.th..Lshsss..........+pslp.........DlstltRl.asFL-phGhINa........... 0 243 449 684 +897 PF00804 Syntaxin Syntaxin Bateman A anon Pfam-B_1158 (release 2.1) Domain Syntaxins are the prototype family of SNARE proteins. They usually consist of three main regions - a C-terminal transmembrane region, a central SNARE domain which is characteristic of and conserved in all syntaxins (Pfam:PF05739), and an N-terminal domain that is featured in this entry. This domain varies between syntaxin isoforms; in syntaxin 1A (Swiss:O35526) it is found as three alpha-helices with a left-handed twist. It may fold back on the SNARE domain to allow the molecule to adopt a 'closed' configuration that prevents formation of the core fusion complex - it thus has an auto-inhibitory role. The function of syntaxins is determined by their localisation. They are involved in neuronal exocytosis, ER-Golgi transport and Golgi-endosome transport, for example. They also interact with other proteins as well as those involved in SNARE complexes. These include vesicle coat proteins, Rab GTPases, and tethering factors [6]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.23 0.72 -3.86 171 1633 2012-10-03 05:55:03 2003-04-07 12:59:11 20 15 425 9 971 1702 19 101.60 21 33.55 CHANGED thspF.....hppspclppplpplppplp.clpphp....pphhshss..tp..................................phppclcplspplpp...hh.......pplppplc.........p......lppt..............................ttsssstphppsph.pslspcFpchh ........................................spFh.pp.spcIppplpp.lppplpclpphp...................pphlssss...sp......................................pphcpcl-pLstpIpp.ph................pplc..pplc.........................s...........................lppp......................p....................tppt.tspssssh+htpsp....hs.....sLupcFh-lh................................................................................................................................................... 0 281 496 740 +898 PF00907 T-box T-box Bateman A anon Pfam-B_363 (release 3.0) Domain The T-box encodes a 180 amino acid domain that binds to DNA. Genes encoding T-box proteins are found in a wide range of animals, but not in other kingdoms such as plants. Family members are all thought to bind to the DNA consensus sequence TCACACCT. they are found exclusively in the nucleus, and perform DNA-binding and transcriptional activation/repression roles. They are generally required for development of the specific tissues they are expressed in, and mutations in T-box genes are implicated in human conditions such as DiGeorge syndrome and X-linked cleft palate, which feature malformations [2]. 20.50 20.50 20.60 20.60 20.10 20.40 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.08 0.71 -4.81 22 2143 2012-10-03 00:25:27 2003-04-07 12:59:11 17 23 269 9 1083 1755 0 155.30 47 35.04 CHANGED plpL-spcLWpcF+plsTEMIlTKoGRRMFPsh+lploG....LDPpuhYhlhlDhVPsDspRaK..assucWhsuGKAEPpsP.sRhYlHPDSPssGu+WM+pslSFpKlKLTNst...sssGp.l...lLpSMHKYpPRlHlVcs..ss..s..........ph.hpTasFPETpFIAVTAYQNpcITpLKIcpNPFAKGFRDs ...........................h.LptttLWtpFp........p..h..s..sEMIl..T.K.t.G...R.....R..........M....FPshchp..l..sG....Lc..s........pspYhl.hhD.h..lss..D...s.....p...R.......a+.............a..............p..s..u....p........W..h...s.....u...G...K.....A........-......st....h.......s...............s.R.....h...Y.............l....H..P....D...S...P...s...s...GtpW..........M+....p....l.SF.....pK..l..KL..........T...........N..............N....h.............s.....p....p......G........p....l.....................lLpS.h..HK.Y..pPR.l...Hllcs..sp..t..................................p.t.hpoasFs.ETpFhAVT..AYQNp..p................ITpLKI.cpNPFAKGFR-............................................... 0 218 311 758 +899 PF04719 TAFII28 hTAFII28-like protein conserved region Waterfield DI, Finn RD anon Pfam-B_4085 (release 7.5) Family The general transcription factor, TFIID, consists of the TATA-binding protein (TBP) associated with a series of TBP-associated factors (TAFs) that together participate in the assembly of the transcription preinitiation complex. The conserved region is found at the C-terminal of most member proteins. The crystal structure of hTAFII28 with hTAFII18 shows that this region is involved in the binding of these two subunits. The conserved region contains four alpha helices and three loops arranged as in histone H3 [1,2]. 18.90 18.90 18.90 19.70 18.50 18.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.85 0.72 -4.19 10 332 2012-10-10 12:36:46 2003-04-07 12:59:11 9 8 271 2 230 363 2 87.60 40 33.70 CHANGED hQhLlosFocEQhsRYEsaRRSuhpKuslK+Llsp...lTG.polupsVlIulsGlAKVFVGElVEpAhcVpc..Epp-os.....PlpPcHlREuh+RL ............................tlLlssFoc-Qh.sRYEhaRRou..hsKu..slK+Llps.........ls.u.....p...SVspNVs..IshuGluKVFVGElVEc...........Ah.sVpc......cht..-ss.............................slpPp+h+cuhh...................................................... 0 73 122 187 +900 PF04177 TAP42 TAP42-like family Wood V, Finn RD anon Pfam-B_5735 (release 7.3); Family The TOR signalling pathway activates a cell-growth program in response to nutrients [1]. TIP41 (Pfam:PF04176) interacts with TAP42 and negatively regulates the TOR signaling pathway [2]. 26.00 26.00 27.00 26.70 24.90 25.60 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.09 0.70 -5.49 39 389 2009-09-11 05:02:27 2003-04-07 12:59:11 7 7 295 3 263 396 3 308.00 25 90.82 CHANGED sLpphFppuhphhs.plcp....ts..psosphQsplp...psIpphpcspphl.spLuLFSsNEslE-luTssLpYLhlsYaLupLhp+..................ssspRhphLptupphatpFLsphcsYc.llstp.tchhcph.............st.hs...shsssstpRptKIppaKpcKELcp+Lphlcpthpp.............................psD-E.............hRchaLspLphhhhcohppL-tlspElpl.Lpthtphttpst..........................................pphhc..............t.h...lsppspsLpPF.......hl..........hs.pRpphpppVFGsGas.LPTMol-Eah-pEhcpGthhcst......tpp.................ps.ppct.....c-cc.EpsDcEsh...+.sRpWD-aK-sNP ...................................................l.thattu.phht.tht.........t.s.u.thpp.t.lp...pslthhppstphl....splu.LFSsNEsl--luTssLpaLh.lsahlupLh.+..............................hs.spRhphLptup.......thhh......pF...LphhcpYp.l..ls.p....phhpph.............................t.......shsshstpRptKItpa+pp.Kc....l...cp+Lp.........tlcpthcp...............................................................psD.-E.....................hhRch..alhplphhhhp......uhppl-.lppEhph.Lpth.t....ttt.stpt.....................................................................................t.h.....tttstsl.p.sF...........hl......................pRpph..ptpVFt...sGas.LPTMol-Eah-pchcp....Gth.ptt........ttp......................tt.ppp..........p-pp.......-.......psDpcph.....+..sR...tWD-....aK-sp............................... 0 91 141 208 +901 PF03943 TAP_C TAP C-terminal domain Bateman A anon Bateman A Domain The vertebrate Tap protein is a member of the NXF family of shuttling transport receptors for nuclear export of mRNA. Tap has a modular structure, and its most C-terminal domain is important for binding to FG repeat-containing nuclear pore proteins (FG-nucleoporins) and is sufficient to mediate nuclear shuttling [1]. The structure of the C-terminal domain is composed of four helices [1]. The structure is related to the UBA domain. 21.30 21.30 21.40 22.60 21.20 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.40 0.72 -4.54 8 365 2012-10-01 23:03:33 2003-04-07 12:59:11 8 19 218 4 263 372 0 49.80 36 8.36 CHANGED QEMlpAhSsQSGMpl-WSpKCLpDNsWDYs+AupuFspLpscscIPtEAFt ......phltthoppouMshcaSppC.L.p.p.s.sW...saspA..hpsFp.plp....spspIPt-AF....... 0 74 111 185 +902 PF03134 TB2_DP1_HVA22 TB2/DP1, HVA22 family Mifsud W anon Pfam-B_837 (release 6.5) Family This family includes members from a wide variety of eukaryotes. It includes the TB2/DP1 (deleted in polyposis) protein (e.g. Swiss:Q00765), which in humans is deleted in severe forms of familial adenomatous polyposis, an autosomal dominant oncological inherited disease. The family also includes the plant protein of known similarity to TB2/DP1, the HVA22 abscisic acid-induced protein (e.g. Swiss:Q07764), which is thought to be a regulatory protein. 27.20 27.20 27.40 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.26 0.72 -4.24 93 1350 2009-01-15 18:05:59 2003-04-07 12:59:11 14 26 310 0 844 1293 9 90.30 32 38.56 CHANGED hshhupllss.lhGhlYPuYtShKAlcos...stp......-sppWLsYWllauhholhEhh.sshhlt.WlPhYahhKhhFllW......LhhP.pspGAphlYcphl+Phh ........................................................h.....phls.lhGhhYPu..YtShKAlcop.....shp........................-.ppW.lhYWllauh.h.ol.h..Ehh..s.......chh....l.......s..W.........h..P....hYa.hK..h.........hFllW......LhhP...tsp...G...uphlYcphl+Ph................................. 0 265 450 669 +903 PF00352 TBP Transcription factor TFIID (or TATA-binding protein, TBP) Finn RD anon Prosite Domain \N 22.90 22.90 23.10 23.00 22.70 22.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.62 0.72 -4.62 135 1985 2012-10-02 11:58:57 2003-04-07 12:59:11 16 8 584 132 1141 1872 373 83.40 38 62.81 CHANGED tss.hpIpNlVAosslst.plcLcpl.uhph...css...-YcPE.pFPGLlaR....hp..cP...KsshL.IFsSGKlVlTGuKs.c-sppA.hcplhphLpchs ................................................................h..phplpNlV.uossl..........th..plc.Lcsl.Ahpt......ppu..........pYcP.......-....hF.su.l..I.hR.........hp....cP..................+....sshLIFsSGKlVhTG.AK.......s..cc-.sthA.hcphhtllpch................. 0 351 608 909 +904 PF03148 Tektin Tektin family Mifsud W anon Pfam-B_3069 (release 6.5) Family Tektins are cytoskeletal proteins. They have been demonstrated in such cellular sites as centrioles, basal bodies, and along ciliary and flagellar doublet microtubules. Tektins form unique protofilaments, organised as longitudinal polymers of tektin heterodimers with axial periodicity matching tubulin. Tektin polypeptides consist of several alpha-helical regions that are predicted to form coiled coils. Indeed, tektins share considerable structural similarities with intermediate filament proteins. Possible functional roles for tektins are: stabilisation of tubulin protofilaments; attachment of A and B-tubules in ciliary/flagellar microtubule doublets and C-tubules in centrioles; binding of axonemal components [1]. 25.10 25.10 25.20 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.31 0.70 -5.92 37 1118 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 349 0 335 991 0 261.70 40 82.81 CHANGED WppsNhtphppupspRspuc+lpp-oppLhpEsptpTppsQpDssc+LspRlpDIphWKpELpcplcchtsEhstLtp.KpRlEcALpshp.hPlslsp-CLphR-pRhuh.DLV+DpVEpcLhKEl-lIcsspplLpcslppshcQlph.Rsu+ppLEhDhsDKhpAhpIDspChpL.sNsSsslpapssss+hs.......ssss..o.csWtcFoppNlp+Acp-ctsStpLRphl-plLppsusDlcsQtsssshAFppRlpEhpcA+scLchpLtcshpEIsptEcsIptLccAlp-KpuPlKVApTRL-sRot.RPNVELCRDpsQhtLlsEVpplpsolptLpp+LtpAcssLpsLhcpphpLEc-lthKpNSLhIDc.+ChphRpphss ............................................W+cNN..pYphospupccActLch.po++.hpcshsshshp.cDSs+K.LspR.............IcDlshWKcclp+slpslscEIspLcc...........sRs+LcsAhchLh.hP.uI.ucECLcL...........Rs..sRhcs...DLV.+D.-uEpEL.l..KE.Vslltpl++lhhsTL.s+s-EQh...h....hN....................+uAKpslEhDaSD..KhsuLcl.....Dc....cstsL......ssp..S.shl...a.+s...G.ssRhs.....................ps...so....o......E.......Wtcas....pt......................................................................................................................................................................................................................................... 0 101 127 224 +905 PF01397 Terpene_synth Terpene synthase, N-terminal domain Finn RD, Bateman A anon Pfam-B_728 (release 3.0) Domain It has been suggested that this gene family be designated tps (for terpene synthase) [1]. It has been split into six subgroups on the basis of phylogeny, called tpsa-tpsf. tpsa includes vetispiridiene synthase Swiss:Q39979, 5-epi- aristolochene synthase, Swiss:Q40577 and (+)-delta-cadinene synthase Swiss:P93665. tpsb includes (-)-limonene synthase, Swiss:Q40322. tpsc includes kaurene synthase A, Swiss:O04408. tpsd includes taxadiene synthase, Swiss:Q41594, pinene synthase, Swiss:O24475 and myrcene synthase, Swiss:O24474. tpse includes kaurene synthase B. tpsf includes linalool synthase. 20.20 20.20 20.60 20.30 19.90 20.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.24 0.71 -4.91 141 1399 2009-09-17 01:05:38 2003-04-07 12:59:11 16 21 224 56 399 1601 0 164.00 29 30.94 CHANGED WscpF.l..............sasst........cphhpplcpL.+pcV.+p.hl..s.sts.....phhppLpLIDslpRLGluaHFcpEIc.phLcplapp.htppp...tt...........DLassALtFRLLRpaGapVSsD.lFpcF.+-cpGp..Fpp.shh..sDs.+GlLsLYEAo.aLthcGE..s.lL-EAhs.FoppaLcphh...ttps..s.............L..spplppAL ................................................Wt..hh............tht............pphhphh.p.p.L..hpch.....pp.......hl..s..sh............shhppLphlDsl.....pRLGlshaFcpEIc.p.hLpp...la.pphtpps.........................DLh.ss.ALtFRLLRppG..a.p...V.........S..s..-......lFp.pF..pcppGp...Fp.........t....sh.........t.......pD.......s..cuhLsLYcAo.pl....t.h.p.uE..p..lL--Aht.FopphLcphht...p.p................LtpplphuL.............................................. 1 37 241 326 +906 PF00440 TetR_N tetR; Bacterial regulatory proteins, tetR family Finn RD anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.07 0.72 -4.32 109 63222 2012-10-04 14:01:11 2003-04-07 12:59:11 18 89 4528 525 17576 49863 3840 46.40 26 22.51 CHANGED IlpuAhclhs.ccGapssohpcIAcpAGlupuslYhaFs.uKcpLhtsl ....................llpuA.h.p.l.ht..p....p....G.....h....p....s...s..o.lpc....lAc..pA...G...l...s..p..u..s..l.Y.p.aF.....s..sKcsLh.t..................... 0 5211 11622 15040 +907 PF03850 Tfb4 Transcription factor Tfb4 TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Domain This family appears to be distantly related to the VWA domain. 21.00 21.00 22.10 22.10 19.50 19.30 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.08 0.70 -5.14 26 353 2012-10-10 16:07:06 2003-04-07 12:59:11 9 8 279 0 253 344 4 246.40 33 81.47 CHANGED sSLLsllLDTNPptWsphppp.......hslopslsu........llVFlNAHLuhNpsNpVAVlAupsps.scaLYPssp...............tsspsshshst.st.............shY+tF+hV-Etlscclpplhppsstss.psp.....ohl.uGALohALsYINRh.p.spu..........................................pphpSRILl....lsuspDsshQYIPlMNsIFuAQ+hplPIDlspLsG..DusaLQQAuDsTsGlYl+.lsp.....scGLlQYLhhuFhsD.ptRshllLPspssVDFRAuCFC..H++llDlGaVCSVCLSIFCph..P........CsTCs ...................................................................sLLslllDssPhhW.t....ht..............................hslsphlts................lhsFhNuHLhhstsNplAl.lA.u.p...pt....spaLYPs.s............................................p...p.t....t................................shap.hp.hpphlhpplpplhp.p.sp.t.........t.t.....s.t...........ohl.uGuLuhALsaIsRh.p...h....t...............................................................thpuRILl........lps..up-ts.QYhsh.MN.s.lFuAQ+.....plsIDsshL.....ss.....sushLQQAschTsGlYlp..s.t.p.....................tu...LlQYLh....h...sF.h...s.......-.......p.....Rp....l.......h.h.........P..s........s.p......VDa...R..A..u.....CFC.Hcpll-lGaVCSlCLSIaCp.....s..........C.TCt............................... 1 84 138 210 +908 PF02269 TFIID-18kDa TFIID-18; Transcription initiation factor IID, 18kD subunit Bateman A, Mian N anon Pfam-B_3681 (release 5.2) Family This family includes the Spt3 yeast transcription factors and the 18kD subunit from human transcription initiation factor IID (TFIID-18). Determination of the crystal structure reveals an atypical histone fold [1] 20.90 20.90 20.90 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.04 0.72 -4.17 6 572 2012-10-10 12:36:46 2003-04-07 12:59:11 11 6 281 2 396 577 1 87.00 33 36.04 CHANGED phappElpsMMYuhGDspcPhsETssllE-lVps.lhElhppAhclup.cttpplplEDlhFLIR+D.tKlsRl+chLohp-hhpKstKphD- ....................h..att-lpphMauhGD.......s.......p.......p.......PhsETssllE-lVhp.lh...............-hhp.....p.A....tpl.....uppp......t.t.p.......p...l.psEDllFLlR..+..D...tKluRlcchLphp.-.hp+stKs.-t.................... 0 122 205 319 +909 PF03847 TFIID_20kDa TFIID_A; Transcription initiation factor TFIID subunit A Griffiths-Jones SR anon PRODOM Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.07 0.72 -3.69 8 348 2012-10-10 12:36:46 2003-04-07 12:59:11 8 10 262 2 243 431 8 68.60 45 16.48 CHANGED oK+KLp-LVppl.....DssspLDs-VE-lLL-IADDFV-SloshuCpLAKHRKScpLEs+DlpLHLERsaNh ...................sK++Lp-LV+pl................DssppLDs-.VEEh..LLplADDFl-sVsstAC.pLAKHR+Ss.....sLEl+DlQLaL....ERsaNh................. 1 81 131 200 +910 PF04494 TFIID_90kDa TFIID_WDA; WD40 associated region in TFIID subunit Bateman A, Wood V anon Pfam-B_9152 (release 7.5) Family This region, possibly a domain is found in subunits of transcription factor TFIID. The function of this region is unknown. 20.60 20.60 21.20 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.59 0.71 -4.13 35 427 2012-10-02 14:46:49 2003-04-07 12:59:11 10 17 260 14 300 411 3 132.80 27 19.44 CHANGED sp.hstttp-s..cPppYtpuYshL+sWl-soLDlYKsELp+.lLYPlFlasaL...-LlspshsppA+..pFasca+s-ap.hHsp-..lppLpslspspHlc-NphAptapssKYplplocsuasLLlpFLpcppss...llltllspals ...........................s........pssPtpYpptastL.+pal.cs.s...L.Dh.a+h.ELpp.l.LYPlFVahaL...cLVtsshtpcA+........pFap+..apspa.......tt....app-......lcpL.p.s..........lpp.p.c..........l..ppNph.h..p..tacssKahlplocsua.................phLhpaLpppppt....hlhtllppal............................................... 0 91 144 231 +911 PF04253 TFR_dimer Transferrin receptor-like dimerisation domain Bateman A anon Bateman A Domain This domain is involved in dimerisation of the transferrin receptor as shown in its crystal structure. 21.00 21.00 21.40 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.45 0.71 -4.55 36 850 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 269 58 560 820 42 119.80 23 16.83 CHANGED slohp.lhpAhppapcsupphpphhcphc..........psshltl+h....h..NcplhplE+sFlss...hGlPs.+sha+HVlauPs.hs..ts........toFPultDultptp................WspspcQlsllshslpsAAstLtts .................................................t.lshp.LhpAhtpappsupphpp.hhp.php.........................................pp.s.h.th+h......h.......Nc+.....lhh..lE+sFls........................tGl.P.s.Rsaa....+Hl.lau......Put.hsthss.................psFPulh-ulpptc......................hspspcplshhshslpsAAthL...s...................... 0 150 267 427 +912 PF02824 TGS TGS domain Aravind L anon Aravind L Family The TGS domain is named after ThrRS, GTPase, and SpoT [1]. Interestingly, TGS domain was detected also at the amino terminus of the uridine kinase from the spirochaete Treponema pallidum (but not any other organism, including the related spirochaete Borrelia burgdorferi). TGS is a small domain that consists of ~50 amino acid residues and is predicted to possess a predominantly beta-sheet structure. There is no direct information on the functions of the TGS domain, but its presence in two types of regulatory proteins (the GTPases and guanosine polyphosphate phosphohydrolases/synthetases) suggests a ligand (most likely nucleotide)-binding, regulatory role [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.68 0.72 -4.13 61 10557 2012-10-03 10:59:06 2003-04-07 12:59:11 16 41 4800 19 2820 7605 3711 61.10 35 9.24 CHANGED ltlah.PcGphhc........ltpGuT.shDhAhtIHpslucphhhAhVsG.......p.hlslsphLcssDllcIls .......................lhlho..PcGslhc................lspG.uT..slD...hA.h.sIH.s.s.lucpsluA+..VNG.......................................c.hVsLsp.Lp..sssplEIlT....................................................... 0 893 1705 2346 +913 PF00763 THF_DHG_CYH Tetrahydrofolate dehydrogenase/cyclohydrolase, catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_882 (release 2.1) Family \N 21.60 21.60 21.70 21.70 21.40 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.28 0.71 -4.06 105 5546 2009-01-15 18:05:59 2003-04-07 12:59:11 18 17 4681 28 1648 4097 2479 116.10 41 35.35 CHANGED pllDG+tlAppl...ppcl+pclpplptp..sh.PpLulllVG-csASphYVcsKp+tspclGlcsphhplspshopp-LlshlpcLNpDsslcGIllQLPL..Ppplspp..pllpsIsPcKDVD ...........................................llDGKtlAp...pl...cpcl...tp...cV............pt..l..........pt...........p........s....hsPu...LA..VlLVGs...c.........P....A.SpsYVpsKp+us..cclGh.pS............phh..cL..........P...p...s.....s....o.............p........p.......EL.LshI.ccLNpD.s.slc.GILVQLPL.......P.pp......I..D...pp...pllpsIsPcKDVD.......................... 0 523 1025 1369 +914 PF02882 THF_DHG_CYH_C Tetrahydrofolate dehydrogenase/cyclohydrolase, NAD(P)-binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_882 (release 2.1) Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.02 0.71 -5.16 91 5651 2012-10-10 17:06:42 2003-04-07 12:59:11 14 24 4719 28 1679 4648 3203 157.80 45 48.31 CHANGED HPhNlG+Lsts..ps....shhPCTPtGlhcLLcp.hsls...lpGKpsVVlGRSsIVG+PluhLLhp.........p........sATVTlsHSpT...csLsphs+pADIllsAsGpPphlpu-hl...KsGAlVIDVGINRl..........tss+.LsGDV-F-sltch.AuhITPVPGGVGPMTlAhLhpNTlpuAcpph ..................................................................................HPhNlG+Lhhs........ps....shhPC.TPtGlh.p.L...L..c.........c......h....s.l......c..............lpG+.p..AVVlGRSs...I.......VG..+PhuhLLLp........................t.....................s.A.TVTl..sH...op......T...................+s...L..tp..h........s...c.......p.......A.D.....Il.l...sAl.....G.+.......P.p..h........l........s..u...-.h...l..............K............G..A...l..VIDV.GlNRh.................................tsu+..LsGD..V..c.a.......-.s..s..tch..A..u.h.I..TPVP.GGVG.PMTlAhLlpNTlpAscp.h.................................................................................... 0 539 1040 1395 +915 PF00899 ThiF ThiF_family; ThiF family Finn RD, Bateman A anon Pfam-B_59 (release 3.0) Family This family contains a repeated domain in ubiquitin activating enzyme E1 and members of the bacterial ThiF/MoeB/HesA family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.59 0.71 -4.34 90 12207 2012-10-10 17:06:42 2003-04-07 12:59:11 16 111 4208 117 4344 10198 2664 134.50 27 37.94 CHANGED ppu+VlllGsGGlGuhssphLAts.Gl.....GpltllDtDpl-hSNLsRQhlaspsc..l...Gp..sKspsAppplpplNP.plp..lps....hsptl...................stpshppll.......t..sh..D.....lllDu........hDs..hssRhhlsp..hshpt.............th.P...hlpuush.shtups.hhh ............................pu+Vlll..Gh.G.G..lGu......ssphLA...t...u...G..l.........................Gpl..s..l............l..............Dh......D....s..V...c....ho.N.....L.pR....Q..h..l.h...........p.....p....s..s.....l....................Gp...........sK...s.........p.s.s..tc..p........l.......p............p..l......N.....P......p..l.p.......l.ps....hpp.tl............................................s.t..p..s..h...p..p...hh....................p...ph......D...............lV...l.D..s.................hD.s......h.p..s...+.....h...h...l..sp...hsht.t.......................p.h...P.....l.lpuush..uh.......hh............................................................................................................................... 2 1492 2600 3620 +916 PF02597 ThiS DUF170; ThiS family Morett E, Mian N, Bateman A anon COG2104 Family ThiS (thiaminS) is a 66 aa protein involved in sulphur transfer Swiss:O32583. ThiS is coded in the thiCEFSGH operon in E. coli. This family of proteins have two conserved Glycines at the COOH terminus. Thiocarboxylate is formed at the last G in the activation process. Sulphur is transferred from ThiI to ThiS in a reaction catalysed by IscS [1]. MoaD, Swiss:P30748 a protein involved sulphur transfer in molybdopterin synthesis, is about the same length and shows limited sequence similarity to ThiS. Both have the conserved GG at the COOH end. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.42 0.72 -3.55 145 6781 2012-10-03 10:59:06 2003-04-07 12:59:11 15 34 3470 44 1951 4749 1939 72.90 24 84.19 CHANGED lhh.uphpchss............tstTltpLlppL....s.hph.....................ptltltlNtchlpt.....sp.l..pcGDplsllPsVuGG ...................................................................hh.ut.tphhs...........................t.s...t..Tl.....t.pLlppL.....t.tpt..........................................ptl..t.lul.Np...p..hl..s.......p.................s.......p.......hl................p-GD......plsl..lssVuGG....................................... 0 625 1287 1677 +917 PF01833 TIG IPT/TIG domain Bateman A anon [1] Domain This family consists of a domain that has an immunoglobulin like fold. These domains are found in cell surface receptors such as Met and Ron as well as in intracellular transcription factors where it is involved in DNA binding. CAUTION: This family does not currently recognise a significant number of members. 20.90 19.70 20.90 19.70 20.80 19.60 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.09 0.72 -3.99 131 8379 2012-10-03 16:25:20 2003-04-07 12:59:11 19 516 691 126 5044 7498 871 86.60 18 16.26 CHANGED Pt.lssls..P...ttsssts.GstlolpG..psF........................tssssplhhusttsssh............hhssspl.................hspsPsts............su.shslplthss............tssshsapa .................................P.lpslp.....P.......ppu...sh....t.....G...G....sp.......loI....pG......p.s...h..............................................................t.s.ss..h..p....V.....h.l......u..st...t...sssh...................................hhss.spl..........................................................hCps..Psts..........................su..th..p.l.plthss............................................................................................................................. 0 2483 3116 3961 +918 PF04280 Tim44 Tim44-like domain TIGRFAMs, Finn RD, Bateman A anon TIGRFAMs (release 2.0); Family Tim44 is an essential component of the machinery that mediates the translocation of nuclear-encoded proteins across the mitochondrial inner membrane [1]. Tim44 is thought to bind phospholipids of the mitochondrial inner membrane both by electrostatic interactions and by penetrating the polar head group region [1]. This family includes the C-terminal region of Tim44 that has been shown to form a stable proteolytic fragment in yeast. This region is also found in a set of smaller bacterial proteins. The molecular function of the bacterial members of this family is unknown but transport seems likely. The crystal structure of the C terminal of Tim44 has revealed a large hydrophobic pocket which might play an important role in interacting with the acyl chains of lipid molecules in the mitochondrial membrane [3]. 29.60 29.60 29.60 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.54 0.71 -4.22 140 1637 2012-10-03 02:27:23 2003-04-07 12:59:11 10 10 1251 4 675 1440 1309 139.80 19 44.40 CHANGED sutul.p.thpph-ssFssppFlpuA+stath.lhtAaspGDhcsL+shlop-hapthppslsp+.tptG..............shcsphlslpcsclhpuphc..sspshloVcFhsp.hth.hcpts.Gpll-G....s.spspchp-lWsFsRs......hsss.sssWpLsuhpp ..........................................................t.....t.hhths..ssFs....ptF..lptscphah..l.tA...a.s..p.s.-.........h....cs....l....+.p.hhotphas.thpt....tl...ppt..t.G.......................t.psp...l.tl.hps.c.l..spsphp.............sst..shlolp....h..p...h...h...h.t.t..s.....G.plhcG..........sts.ps....p.p......hp-lW..shsRs..........tst...tssWhlsshp...................................................................... 0 200 377 531 +919 PF04176 TIP41 TIP41-like family Wood V, Finn RD anon Pfam-B_12821 (release 7.3); Family The TOR signalling pathway activates a cell-growth program in response to nutrients [1]. TIP41 interacts with TAP42 and negatively regulates the TOR signaling pathway [2]. 25.00 25.00 27.20 26.60 23.00 22.20 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.01 0.71 -5.03 29 320 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 281 0 224 312 3 175.30 40 57.53 CHANGED MlFGcNhlpIpH.poGhsIcFNAhDALctVcppsp..sl+Vuhuc-WpcSRppsp.tp...........................-hlKPaDWTYTTsYpGTlhs...............ttthhhpsosccIslc+LpptDPILFaD-llLaEDELuDNGIShLSlKlRVMssphhLLhRFFLRVDsVLhRlpDTRlYh-Fsp......shllREapp+Essacplpp .........................................................MlFGsNhlpl....pH...........tsGht..lpFNAhDALctV.......p...p....pt.......t..............l.+V..uhupcWp...poRppsp....t.t...............................cll.+PaD.WTYTTs.YpGTlts..................................................t.h...ph...p.sosp..pIsh-tL..p.pp.-.s....ILF.a--lhLaEDELsDNGlShLSVKlRVMP.s.s..hhLLhRFFLRlDsVLlRlpDTRlYh-Fsp...........shllREapt+Esphppl..p........................................... 0 72 123 186 +920 PF01582 TIR TIR domain Bateman A anon Pfam-B_571 (release 4.1) Family The Toll/interleukin-1 receptor (TIR) homology domain is an intracellular signalling domain found in MyD88, interleukin 1 receptor and the Toll receptor. It contains three highly-conserved regions, and mediates protein-protein interactions between the Toll-like receptors (TLRs) and signal-transduction components. TIR-like motifs are also found in plant proteins thought to be involved in resistance to disease. When activated, TIR domains recruit cytoplasmic adaptor proteins MyD88 (Swiss:Q99836) and TOLLIP (Toll interacting protein, Swiss:Q9H0E2). In turn, these associate with various kinases to set off signalling cascades [3]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.52 0.71 -4.74 45 4612 2012-10-02 18:56:14 2003-04-07 12:59:11 15 676 330 18 1819 5111 86 129.70 23 17.89 CHANGED sFlsaps...........cscctFlsclhppLccp.....uhplalcp+.hhtGtphhss....lhpsIccS+hslllhScsaspSp..WC.LpELhphhcpthp.s..plllslFhpVcspclp.....pppscatpshtpthph.tsc.............tphthW+pshts ..................................................sFlsapt......................csp..ph.h.....h...s...cLh.....p....L...c...p.p.................s.h..p..l...h.l.c....c.+......-......h.........s..G....p.p..l...h.ss...............lh.p.u.I..c.........p.........S+...p.s.lh.VlS....ps....a.....sp.......Sp.............W..C...h..E..hh..h........h..h...p......p.............h..h...p...p......t.............t...p..l.l....l.h..l..h.....p.l..s.........p.plh...............tp.h.p.hh.h...thp...p.h....h....p...h.pp.....................tp....hWpphh..t................................................................................ 0 512 992 1303 +921 PF03920 TLE_N TLE_N-terminal; Groucho/TLE N-terminal Q-rich domain Finn RD anon DOMO:DM01627; Family The N-terminal domain of the Grouch/TLE co-repressor proteins are involved in oligomerisation. 24.00 24.00 25.20 24.00 23.60 21.00 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.83 0.71 -4.34 15 565 2009-09-11 00:33:27 2003-04-07 12:59:11 10 14 92 0 240 496 0 119.50 69 20.92 CHANGED MaP..tsRhsuPtp..........suQP..FKFTluEoCDRIKEEFQFLQAQYHSLKLECEKLASEKTEMQRHYVMYYEMSYGLNIEMHKQuEIsKRLNAICAQllPaLSQEHQQQVsQAVERAKQVTMsELNulIG..........pQ.phQpLppt ......................................s.s..p..........suQs..hK.FTl.sEohDRIK-EFQF..LQAQ.YHSLKl..EC-KLAoEKTEMQRHYVMYYEM.S.YGLNIEM.HKQ.sEIsKRLNs.IhA....Q.llPFLSQ.E.HQQ.QV.sQAVE.RAK.QVTMsELNAIIG..................QQ..hQ.hQpLSp.t................................. 0 38 56 155 +922 PF05154 TM2 TM2 domain Bateman A anon COG2314 Domain This family is composed of a pair of transmembrane alpha helices connected by a short linker. The function of this domain is unknown, however it occurs in a wide range or protein contexts. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.55 0.72 -3.75 142 3044 2009-01-15 18:05:59 2003-04-07 12:59:11 11 50 1798 0 887 2110 350 53.80 28 26.67 CHANGED pt+shhhAhlL.sl..h..LG.hhGlHRFYlG..phtp.GllhLlhh........Gh.hsl........h.hlsh.l .......................pshhh.uhlL..ul...h..........lG..h...h....Glc+FYhG...phtp.Glla.Llhhh.................Gh.hsl...................hhllp..h.................................................. 0 311 541 742 +923 PF03348 Serinc TMS_TDE; Serine incorporator (Serinc) Mifsud W, Mistry J anon Pfam-B_3473 (release 6.5) Family This is a family of eukaryotic membrane proteins which incorporate serine into membranes and facilitate the synthesis of the serine-derived lipids phosphatidylserine and sphingolipid [3]. Members of this family contain 11 transmembrane domains and form intracellular complexes with key enzymes involved in serine and sphingolipid biosynthesis [3]. 20.30 20.30 20.40 20.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.65 0.70 -5.77 45 847 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 289 0 501 781 10 343.80 29 92.34 CHANGED usCCGu...usCshhCosCsus...psShsTRlhYAhlLLlsollShIMlsshh.ppLp+.h.tas....................thsC....sp......hhGahAVaRlsFuluhFahlhullhlsV+SS+DsRAslQNGFWhhKhlhhlulhVsuFa.IPs.shFhhhhhaluhhGuhlFILlQLlLLVDFAHsWuEshlp+hE........-ssS....+hWhshLlssThhhYhsSlshslLhYlaas...ssuCshNphhIolNLllslllSllSlpPpVQEhpP+SGLLQuShlolYssYLTaSAloscP....D+pCNPh.............................hpspsshssshllGhllhhls.....lh.YoohRuussop....................................sshltLssc..............p.sh.p.cspshptssc-Gs.spps.......................hDsEcsuspYsYShFHhlFhLAohYlhhhLT.....................sWapP..tp.h...lsps.asusWVKIsSSWlChhLYhWTLVAPllhP-R.F ...............................................................................h.t...h..........psh...sRhh..Yshhhh.hsshlshlhht...h..h.t.l.....c.....h....h...................................................................................sC........t.........hhGh.uVaRls...huhsh.....Fah.........lhslh.h..ltlp..ss.......p.ss.R..u..t.lpN.G.aW.hhKhhhhhshhhhsFh.l........Pp...t.a..........h.........hh.hhluhhGu.hhFlllQLlLll-FAHpW..scthhtphc................pt..............phWhhhLhhsThhhY...h..h..uhsh.hs....lhaha..a......s..........................ts...Ct....Nphhls...hshhls.hhhohlul.P.......t...lp........t....P.p...........uGLh.Q..uuhlshYshYL..sa.SAhsspP.....pptCss...............................................................................t...tt....sh...llGhhlhhhs......lh.as..s.hps.uspst.............................................................................................tt........ts...............................tsE.pps.s.YsYuh.FH.h...hhhLA.ohalhhhLT..........................sW.....hp....................sp.....ht...ht..ts....h.shWVKl..s..ssWhshhlYh.WoLlAPh.h.h..pR......................................................... 0 149 252 373 +924 PF03459 TOBE TOBE domain Yeats C anon Yeats C Domain The TOBE domain [1] (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulfate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.78 0.72 -3.86 81 5980 2012-10-03 20:18:02 2003-04-07 12:59:11 12 16 2667 79 1494 5716 620 62.60 25 27.64 CHANGED suspNhltupVtslcttu...sps.Vplplusst....lsuplot...pussp..ht.Gpplhshl+sspltl ...................SsRNtltG...pl...s..s...l..pp....ss.........spscV..p..l..p..ls..ssp..............l.h.At.l..os........puspcL..tL...p.sG..pplhAhlKuspVh......................... 1 339 843 1182 +925 PF04265 TPK_B1_binding Thiamin pyrophosphokinase, vitamin B1 binding domain TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain Family of thiamin pyrophosphokinase (EC:2.7.6.2). Thiamin pyrophosphokinase (TPK) catalyses the transfer of a pyrophosphate group from ATP to vitamin B1 (thiamin) to form the coenzyme thiamin pyrophosphate (TPP). Thus, TPK is important for the formation of a coenzyme required for central metabolic functions. The structure of thiamin pyrophosphokinase suggest that the enzyme may operate by a mechanism of pyrophosphoryl transfer similar to those described for pyrophosphokinases functioning in nucleotide biosynthesis [1]. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.24 0.72 -4.26 86 1995 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1908 31 487 1399 315 67.10 24 29.30 CHANGED Gp.ahlphppsh..................paluhlP..lssssp.lohpGhKYsL.......s.spshph..Gsh.hssSNch......tpp..splolpsuhsl ...................................t....lp..tsh..........................pYluhlP..ls.s.sp..lolpGhKYsL..............s.stshth..sss..hssSNEhh.....spp...splohpsGh.................... 0 183 311 409 +926 PF04263 TPK_catalytic Thiamin pyrophosphokinase, catalytic domain TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain Family of thiamin pyrophosphokinase (EC:2.7.6.2). Thiamin pyrophosphokinase (TPK) catalyses the transfer of a pyrophosphate group from ATP to vitamin B1 (thiamin) to form the coenzyme thiamin pyrophosphate (TPP). Thus, TPK is important for the formation of a coenzyme required for central metabolic functions. The structure of thiamin pyrophosphokinase suggest that the enzyme may operate by a mechanism of pyrophosphoryl transfer similar to those described for pyrophosphokinases functioning in nucleotide biosynthesis [1]. 30.70 30.70 30.80 30.70 30.60 30.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.54 16 2336 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 2211 33 581 1714 474 120.30 26 51.60 CHANGED N.p..thPt..hhps...ppschhl.usDuGusph...hth.......................slhPchhlGDFDSlsc.Ehhshhtpt...sphlhhs..-KDpTDh-hAlphs.......hphsts.clslhGuhGG.RhDHhh.......uslthLh..+h........s..hpshplhlhsp ........................................................................................ht......................pp....s..h.hl.usDtGu.hl...hch..................................................................slh...Ps..hhlG.D.....FDS.lsp..-..h.ph.h......tpp..........ht.p..h.h..h...s....-K...D..pTDhplAlphs.......................................hp.h.s...s...p..lh...lhG...A..hG...G..RlDHhl.......uNl.lhh..p........................................................................... 0 229 386 496 +927 PF00515 TPR_1 TPR; Tetratricopeptide repeat SMART anon Alignment kindly provided by SMART Repeat \N 22.90 13.00 22.90 13.00 22.80 12.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.34 0.73 -7.61 0.73 -4.14 555 18473 2012-10-11 20:00:58 2003-04-07 12:59:11 23 4566 3183 236 9338 112617 30616 31.90 20 6.62 CHANGED spshhshGhsahphscaccAlpsapcAlplsPsp .......................hhhphG.h.s.a..h..p....h....s.....c.......a.p...pA.l..p.sap.cAlphp....................... 0 3602 5574 7522 +928 PF01938 TRAM DUF90; TRAM domain L Aravind, Bateman A anon L Aravind Domain This small domain has no known function. However it may perform a nucleic acid binding role (Bateman A. unpublished observation). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.89 0.72 -4.18 30 8506 2012-10-03 20:18:02 2003-04-07 12:59:11 15 36 4435 5 2131 6354 2315 60.00 24 13.59 CHANGED pphlGpphcVllcshu...ppGpshu+spt............sphlhlpss....ts...G-hVcl+lpcspp..phLpGcll .............................sphhp.lh.l.cshu..............ptGp....h..l...G+..sc.....................................spsV..hlpus.........................hs....GchV....cVcIsc..spp..p.hhucl................... 0 721 1386 1802 +929 PF00486 Trans_reg_C trans_reg_C; Transcriptional regulatory protein, C terminal Finn RD anon Pfam-B_94 (release 1.0) Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.37 0.72 -4.06 362 52522 2012-10-04 14:01:11 2003-04-07 12:59:11 23 271 4715 43 11355 34159 7239 75.80 30 29.75 CHANGED spp........lpL.ospEapLLphLhppssc.lloRcpLhc..plWstsh........ssspsl-lalppLRcKlcs....s.ts................phIpTl+GhGYph ..................................p...lpL.T.s.p.Eap.LL.t...hL.h....p.....p..s.......s....c......lloRcpLh...c..........pl....W.s.hsh...............sssp..s..l.-VaI..p+LRcKlpp.......s.ts................................ph.Ip.Tl.+.G.hGYph.......................................... 0 3440 7162 9460 +930 PF02458 Transferase Transferase family Bateman A anon Pfam-B_1540 (release 5.4) Family This family includes a number of transferase enzymes. These include anthranilate N-hydroxycinnamoyl/benzoyltransferase that catalyses the first committed reaction of phytoalexin biosynthesis [1]. Deacetylvindoline 4-O-acetyltransferase EC:2.3.1.107 catalyses the last step in vindoline biosynthesis is also a member of this family [2]. The motif HXXXD is probably part of the active site. The family also includes trichothecene 3-O-acetyltransferase. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.32 0.70 -6.04 11 3130 2012-10-02 12:01:53 2003-04-07 12:59:11 10 37 323 17 1766 3103 5 327.00 17 84.89 CHANGED hpVphpppplIpPupPTPpp...pL.LShlD..hltssh.hs.shaFYcpssphspph........shcpL+puLScTLstYYPhAGRL.....sschplsCNs-G..s.FVEApucsp.Lp-hhch...sssshphllsphssspps.s...hPLhhlQlTpFcCG.GlslGhshsHplsDuhShspFhpsWAchu+ut...t.sssPsas+phlhsp........tsPps....hs.h......hssspptsscchlpKphsat....pssplccLpp+ussppsss...........................TphEsloAhlWRshsputptssppp.......sslhhslshRsRls.P.LssuYhGNshhsssstpss..u-l.pp.lshssctlpctlpp.lsc-hhpsshshsts.........hhhpthscsshh..ssalloSWs.+hPhh-lDFGWGpPlassssssshtsts....hhhPsp....tsssGltlhlsLspppMstFccchphh .......................................................................................................................................................................................................................................h...........................................................................ht.tu.ht.h.L......a.....h..sGph...............................................................................l................................s...t.......u..............h......h.h.............s.......................................h....t........h.................................................................................................................h...............................................................................s....lh.h.......hQ.l..o.....h........t.s.........G.....u..........h.hluhthp..H.......hh.h.Duh..uhht....Fh..psh.u....p.h.s....ps........................................s............h....p....p...t........h..................................................................................................................................................................t......................................t.....................h.....h.h.........................p.......t.l.t.....t........h..t....t...t.....h.......t....t...................................................................................oshps.l...sAh.l....W..p....s...h......s.....t...u...h....t....h...........t..t.............................sthh.h...shs.....h....R.....t....p.....h........t......s...............l.....s.........t......s......a.h.........G.....N........h.h......h........h....s........h...s....h.ts..................tp.l.......h......t.........t..........s..........l..t...............h.......s....t....h.l..p.....p...t.....h..t.......t..........h.....p.....t......t...h..h..p..t..h...h..p.hht..............................h..........h.........................h....h...h..s.s.........p..h..s......h...h..p..h..D...F..G...........h...........G.............p.............s......h.h...t...........................................hh.......................tt..t......h...h...h...h.....................h..................................................................................................................................................... 0 148 963 1430 +931 PF01336 tRNA_anti-codon Aspartyl_tRNA_N; tRNA_anti; OB-fold nucleic acid binding domain Bateman A, Mian N, Finn RD anon [4] Domain This family contains OB-fold domains that bind to nucleic acids [4]. The family includes the anti-codon binding domain of lysyl, aspartyl, and asparaginyl -tRNA synthetases (See Pfam:PF00152). Aminoacyl-tRNA synthetases catalyse the addition of an amino acid to the appropriate tRNA molecule EC:6.1.1.-. This family also includes part of RecG helicase involved in DNA repair. Replication factor A is a heterotrimeric complex, that contains a subunit in this family [2,3]. This domain is also found at the C-terminus of bacterial DNA polymerase III alpha chain. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.29 0.72 -4.12 640 22969 2012-10-03 20:18:02 2003-04-07 12:59:11 20 145 5024 111 6345 20485 6498 80.20 20 12.26 CHANGED lplt.Ghlpsh.................+st.sthhalp.....lpDts...G......plplhhhp......................tthhphtpplp......sshltlp..Gplptp..........tts..........................hplhspp......lphl .............................................hpltGhlhpp................+st.up..h..h...F..ls...............l.p..Dts.....G........................tlQlhhhs.............................................tthhp.h.t.p..p..lp..........ts..s.h.lt......lp...Gpl.ppp.............pts...............................lplhspp...lp..................................................... 0 2125 3961 5336 +932 PF01841 Transglut_core Transglutaminase-like superfamily Bateman A anon [1] Family This family includes animal transglutaminases and other bacterial proteins of unknown function. Sequence conservation in this superfamily primarily involves three motifs that centre around conserved cysteine, histidine, and aspartate residues that form the catalytic triad in the structurally characterised transglutaminase, the human blood clotting factor XIIIa' [1]. On the basis of the experimentally demonstrated activity of the Methanobacterium phage pseudomurein endoisopeptidase [2], it is proposed that many, if not all, microbial homologues of the transglutaminases are proteases and that the eukaryotic transglutaminases have evolved from an ancestral protease. [3] 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.81 0.71 -3.86 87 6914 2012-10-10 12:56:15 2003-04-07 12:59:11 14 194 2577 45 2597 6853 1021 111.30 20 20.05 CHANGED pphuppl......tstpssh..ptsptlhpalp.......pphpYs............hssssts........stphLt......spp.....G.pCtpaAslhsshhRuhGI.PARhlsGhhh.......s........................................sts.ss....................HuWsE....sah................shu.....WlshDs ....................................................................h...................s.h.....pth.h.t.lhphlt...............pp.h.pYs.........................t.s...sps......................stpsLt........ppp...........G...sC.psaAtlh.hs.hh.R.s.h.Gl...P...A...R..h.V.s.Gahh.............s...............................................................t....sh.....................................................................HuWs-l.ah................................s.ts......WhhhDs................................................................................................................... 0 827 1630 2120 +933 PF00927 Transglut_C 1005; Transglutamin_C; Transglutaminase family, C-terminal ig like domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1005 (release 3.0) Domain \N 26.60 26.60 26.60 26.70 26.50 26.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.28 0.72 -4.01 41 1042 2012-10-03 16:25:20 2003-04-07 12:59:11 17 15 108 79 564 1010 0 100.00 19 25.81 CHANGED sphplclhssss.hupchsltlphpNshspshpsh....sshslphsGlhh..ptppcphthsltPtpptphplphh.pchG..pphlsp...hppspltcVpuhtplhlpp ...........................thplcl..s.s.sh.lGpchslplphpN............shsps.ps.....sshslphs.........Glhh..phhp...........pthhssltPtpptphph...plhspch...pphlsp...hp.sth.t.plpshtplhh............................ 0 94 147 303 +934 PF02779 Transket_pyr transketolaseD2; transket_pyr; Transketolase, pyrimidine binding domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain This family includes transketolase enzymes, pyruvate dehydrogenases, and branched chain alpha-keto acid decarboxylases. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.96 0.71 -4.78 93 20295 2012-10-02 16:07:47 2003-04-07 12:59:11 19 58 5038 173 5571 15647 10577 175.70 23 29.45 CHANGED pthshppu.spAlsplhpcs.pllshutDls................sushsttpuhhp.p.......................RhhcsslsEpuhsuhssGhAhpG...hhsatupFhsFss......hspstlp.htuhtphsss.lss+sshGhGtcGPsHpuhc.huhhpslPs........................hpVhtPssss-stthlptAlp....tt.tPshlhhs+p.ht.t .....................................................t..hshppu..spu.l......sph......h......p......p..s......s........p......l......hs..hutDlu...................su..s.h..t...t...p...p...u...h.tt...p........................................Rhh..cs.sluEtu....h..s.u...h..us.GhA..hp........G..............hhsht.upF.hs.F..hp.............hsps.t.l.t......h....t........u.h...h....p..h....s..ss....lhs+...s.s.h.u.h...G..t...c....G..s...s.H...pu.h..........p...huh..l..pt.h..P..s.......................................................................................hpVh..tPu...sss..-stthlpt...........ul.....p.....................p..s......P..s.hlhhs+p.h..................................................................................... 0 1847 3551 4709 +935 PF02780 Transketolase_C transketolaseD3; transketolase_C; Transketolase, C-terminal domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain The C-terminal domain of transketolase has been proposed as a regulatory molecule binding site [2]. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.45 0.71 -4.19 80 17209 2009-09-13 15:57:48 2003-04-07 12:59:11 15 48 4995 153 4660 13131 6413 117.50 22 22.65 CHANGED Gcuplh...+pGp-lslluhGshlphulpAAcpLtpp.....GlsscVlDhRolpPlDhcsl...hptsc..+ss+ll..lVcEuhh.huGhuutlsshlsc.psht.h.sslhplss...-sshshs...thththhshs..ppl ........................s.h.....ppu..s.-..lsllu.hG.s.h...l.ph.A.lp.AA.cpLptp.................Gh.ps.pll..ch.Rslp..P..h...D..t...-.h...l...hp.t.c...cs.t..+.h.l.........llp.-....s....s...h...p..s....u.....l...u.....u.....tl.st.hh.........t....c..........sh....t.............h.........h...s...........l..t..p...h....uh.....s.s..hs.........hht.hshs.tt................................................................... 0 1573 3034 3963 +936 PF00335 Tetraspannin transmembrane4; Tetraspanin family Bateman A, Finn RD anon Bateman A & Pfam-B_3109 (Release 7.5) Family \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.90 0.70 -5.07 212 4340 2012-10-02 01:14:40 2003-04-07 12:59:11 15 37 310 4 2488 3981 12 207.50 17 82.23 CHANGED hlKhh....lh.hhN..hlhhlsG.hsllu......hGlhh...........................................tthhhs..s........hhllh.lGsll.hll.uhhGs.hG.uh.......+csp..........shLhhahh.hlhllhlhplssulhshshpsphpsthtp.h........................................................................tthppssshppthstlp...pphp.C.CGhpshp-atp.......................................................................................................sstssssshhppGChptlt...phlp.pp..hhhl......sslulsl....s.hlplluhlhuhh.Ltpph ....................................................................................phh.....hh.hhN..hlh..h........lhG...hhllu...hGlah....h.......................................................................t................h..h.h..s.s..h...lllh..h.Gslh.hll..uhhGs..hG..uh....................pc.sp......................shL...hh...........ahh....hlh..l..l.h......l..h.....p.l.s..s.u.lh.s..h...h...h..p...s..p.hppthtp.h...................................................................................tht...p..p..t.t..h..p.ph...hshlQ...............pphp..C....CG..h....p....s.....p..Da......t..........................................................................................................................................................................................................................................................................t..t.......t..t..t.t.....h....h..p..p...G.Chtt.l...........phhp...pp....hhhl..............shhshsl...h.h..h..p....l..hshhhuhh.h....h...................................................................................................................................................... 0 761 1047 1771 +937 PF00905 Transpeptidase Penicillin binding protein transpeptidase domain Bateman A, Finn RD anon Bateman A & Pfam-B_726 (Release 8.0) Domain The active site serine (residue 337 in Swiss:P14677) is conserved in all members of this family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.70 0.70 -5.40 42 25088 2012-10-02 21:13:33 2003-04-07 12:59:11 17 55 4539 218 4622 19829 10300 286.30 21 43.58 CHANGED ussVlh-s+oGplLuhsspsphsspp...........................hhtt.apPGSThKshss.hhul-sGhhpsppthpt.st.h..sstshpsapps........hshppuhppSsNhhh.pl.shclGtc..................p..htphhpphGhGspss.sh.tp..t...............................................hstsssuaGpu.lsloPlp.sphhsslsssG.h....hht........................t.....chshstpshpplpphhttsspss..................shpluuKTGTAphht.ts.h.............huhalGa.s.tsssphshslhls.......tssttsutssstlhpplhp ........................................................................................shlsh-spoGpllAhsu.s...s.a.ssst......................................................................................h..N.p.s.......h.......p......s.......h.......h........p....PGSohKshsh...ss..A..........l..........-..........p.........G............h................h...........s..............s.......s..........s..............h.............h............s.............s.........s..........s..............h.........h....................h............s.........s..........t.......p......h.......p.......s....astp...........................htG......l.s....hpp.......A....lt....p..S..sN....s....s....hs...p.l....s....t....p..l....G.h..s.......................................p......h.t.p.h.h....p.c.......h.G...h...s........p.......t....s...t..................................................................................................................................................................................................phhs.h.u.h....G....t.........G........h...sso......P..l.phup.sauslAN........s.Ghh.h..........p.......P..p...h.l.p....p...l....t.....p.sp.h...........................................................p................s...p....p...s....h....s...p....p....s..s..t..h.....l.p.p.....h.........h....pt..Vspps............................st...th..t....h.........s...h......p......l....u...G....KTG.Tu....p........h...................................................shh..sGa...................................s...........P.....p....h.s.h.s.lhht....................t..s....t........h.................................................................................................................................................... 0 1494 3005 3893 +938 PF01609 DDE_Tnp_1 Transposase_11; Transposase DDE domain Bateman A anon Pfam-B_1013 (release 4.1) Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction [3]. This family contains transposases for IS4 Swiss:P03835 [1], IS421 Swiss:P11901 [2], IS5377 Swiss:Q45620, IS427 [4], IS402 [5], IS1355 Swiss:O69604, IS5, which was original isolated in bacteriophage lambda [6]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.43 0.70 -4.79 91 12598 2012-10-03 01:22:09 2003-04-07 12:59:11 16 50 2656 6 2783 18569 2527 188.80 13 58.95 CHANGED ttttphh......hlDuohlpss..........................................tth..thpstpthtuh.Khclhs.....sssshhhshtls.sush.pDtphhtp..lhct..............phthlhuDtGYsstp....hhpplppp.shph..hht....h+tstth..................................................................................................hhtpptpthth..............................hhptRhtlEpsFphlKph.hthsc..hptpshsphpshlhht.hlshhl ..................................................................................................................................................................................................................................................tt........hD...h...........................................................................................................................................................th...t......h.phhh....................tp....t.h....h..tht.h........h.....ss....ph...p..-..h..p....hh.t........llpt.................................p.t.t.h..l.h.s..D.........tu..at.s.tt.........hhp..t.l...p....p......t....t...hph..........hh......................h+.t.p.tth..................................................................................................................................................................................................................................................................................................................h....h...t.....t.....t....t.....t....................h.......................................................................................................hh.p.t.R.h.t.l..E.t..h...ap.h...+...p.......ht.h.pp......h..t.p....t..t.thp...hhhh.hhh....................................................................................................................................................... 0 850 1859 2290 +939 PF02371 Transposase_20 Transposase_19; Transposase IS116/IS110/IS902 family Bateman A anon Pfam-B_280 (release 5.2) Family Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases for IS116, IS110 and IS902. This region is often found with Pfam:PF01548. The exact function of this region is uncertain. This family contains a HHH motif suggesting a DNA-binding function. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.69 0.72 -3.88 139 6999 2012-10-03 02:11:09 2003-04-07 12:59:11 11 25 2087 0 1815 6255 787 83.60 27 26.97 CHANGED sphLtolPGlG.lsAssllupl.s-h.pp.FpsscplsuasGLsPttppSGsptp.ps+lo+.tGsptlRphLhhuA.hssh.ph.......s..shhpthap ............................hltolPGl..G....h.sA.ssllu..........pl..u-h..pp..Fp.sscp..lsua.s.........GL.sPtp.h.pSG..sppt...ttphoK..tG.sptLRphLh..sA..hssh...ph.............p.................................................... 0 542 1135 1449 +940 PF01526 DDE_Tnp_Tn3 Transposase_7; Tn3 transposase DDE domain Bateman A anon Pfam-B_885 (release 4.0) Domain This family includes transposases of Tn3, Tn21, Tn1721, Tn2501, Tn3926 transposons from E-coli. The specific binding of the Tn3 transposase to DNA has been demonstrated. Sequence analysis has suggested that the invariant triad of Asp689, Asp765, Glu895 (numbering as in Tn3) may correspond to the D-D-35-E motif previously implicated in the catalysis of numerous transposases [2]. 20.70 20.70 20.70 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.91 0.70 -5.86 50 1929 2009-09-11 04:42:20 2003-04-07 12:59:11 12 14 851 0 316 1636 192 297.90 33 44.98 CHANGED DlLh-VsphTuFsptFpclpst.....chpsttptllAsllA.GsNlGlpchAct.ssslohcpLshspppalct-slppApspllstppplsluphW..G...s.Gp...s..u...SSDGp+assstpsltu.phss+Yt...stGlshYsalu-passhauplIssstpEAhallDGLLppp..os.l...pscp..hh..sDTtGho-hlFulhcLLGapFsPRl+slp...c..p...+Laph...c..s..ss.pYsplssl.h..tp....pIshclIcppW--llRlssSl+tGplsuotll++Luuh.s+pssLtpAlpElGRl.+TlFlLcYls..-.slRRplpttLN+uEuhpsLsRAla.aGptGclps+sh-pQphpssuLsLlsNuIlhWNThalpcslpplcpp.G..pls.s...chltclSPlsapHINhhGcY .........................-llhtlst.s.tF.pthhthptt........t.t....lhsslh.u.uhNhGhp.hsp......ssh.shtpLt.htt.hhp..cshptA.s...lhph..ph....hsthW..G...t.up...hu...ouDG..ph.s.....ps...s.t.s.+Yh.....t........GhhhYphlscp.assh....s...................hlsss.p-uhall-Gll.p..p.....os...l...p.pp..hhsDTtG..................hochsFuhhtLLGaphsPRltshtp...p...phah......p..t........st..th..t.lt.h..h....tt....plsh.phIttpas-hhRlssSlp.Gpsssshhl++Lss..sptstlh.AlhEhGRl.+TlahLcalp...s.phRRplpttLN+GEuhpultRslh.as.ph.Gc.lpp+thcpQp.phssLsLlssullh.WNThhhppshpthp.....tp.G...hs.t.phlttlSPltapHI.NhhGcY.............................. 0 61 182 248 +941 PF01548 DEDD_Tnp_IS110 Transposase_9; Transposase Bashton M, Bateman A anon Pfam-B_646 (release 4.0) Family Transposase proteins are necessary for efficient DNA transposition. This family includes an amino-terminal region of the pilin gene inverting protein (PIVML) and members of the IS111A/IS1328/IS1533 family of transposases. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.69 0.71 -4.53 127 7132 2009-09-14 00:11:16 2003-04-07 12:59:11 12 28 2092 0 1856 6395 788 129.20 22 42.24 CHANGED hlGlDluKpphpls....hhssss............htptphsss.tuh.pplhshlpph...t..............hhlshEusu.htt.hltphLpp.tGhpVhllsstps+shtp....ptsKoDthDAphlAchspp..t...php....h.......htstptppLptLsptRcpLhpppsphpNc .............................................lGlDlu.K.p.phpss....hhstps..........hh.h.p.t.p.h.s.s.s....ts...h....pplh.phltph..t...................hhls.hEusuhht.....h.......h......h......p......h......L......p.p....h........G......h......p......l..hl..l.s.Pthh+tht............ptsKoDthD...Ath.l..A..chhpp.........t.....php........h.......h.st.t..ptlpt.Lh....p....h....+p.plhpphst.hs.................................................. 0 549 1162 1475 +942 PF00579 tRNA-synt_1b tRNA synthetases class I (W and Y) Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.88 0.70 -5.29 40 11971 2012-10-02 18:00:56 2003-04-07 12:59:11 20 46 5006 203 3414 8921 6461 279.10 24 74.98 CHANGED pp.tt.hplhsGhcPTus.lHlG.ahsslpphhphQ......tGacshhlluDhpAhhscss.....cplhpppphhtss......L.uph.lDsp+...............sp...lhhpSchhpph..phthhlpplushhpls+Mhphcs......hppchpp..............slshuthsYPlLQuhDhhhhpsc....l..sGsDQptplphuc-lsc+hspp...................hshslhss....llstssGp..KMSKStss...sIaLscptpsshchhpphhssscp..tltthhthhshlss..............lphhpth...ttpss......pthcclhsphhsthhpsschtptstpshpphhp .....................................................................hp......tlhsG..hcP.T.us..LHlG.alssl.hp.h.....h.p.h.Q....................ss...a..c..s..h.......h..h..l...u...s.....h...puh.hsc.s.......cp.l...p..p.p.p.t...htss................l...uth....l..D......pc.........................................................................................sp.....lh....p..u.c...h....t.ph.......phh.h.h..l..p..s.hsp..h..hpls+M.hphcs..................h..tpchpp.........................tsl.s.h.s.F..sYPl....LQ..uh.....D.hh.....h..h..pss........................l..sGpDQh.tplphu..p-.....l.s.c...+.h.stt..............................................................................s...t.sl..hss.................ll.st...h......D.........G...............s......................KMS.K.St..ss................slhL.........s.p.........p..sshc.h.h.p..t.hhsss-t.........lh.h......hp.h..h...s......hs.............................................l.p...h...h.p.th......stps............hpc.lhs..p.h..s..t....h.....htstchtptstchhpthh...................................................................................................................... 0 1164 2149 2869 +943 PF01409 tRNA-synt_2d tRNA synthetases class II core domain (F) Howe K anon swissprot Domain Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only phenylalanyl-tRNA synthetases. This is the core catalytic domain. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.61 0.70 -5.18 46 6642 2012-10-02 14:22:40 2003-04-07 12:59:11 15 27 5187 59 1844 6318 4237 222.30 45 68.66 CHANGED slDlolP...upthtsGshHPlspsh-clpclFhthGFphhp.GsplEs-aaNF-ALNlPppHPAR-hpDTFal....p....................................s.phlLRTHTosVphRth.....pppc...sPl+llu.G+VaRpDs..DATHps.FHQlEGlllDc..slohucLKGsLcpFhpphFGtc.hclRFRPoaFPFTEPSsElDl.t.................tcpstWlElhGsGMl+PpVLcss.......Gls...pchsGhAaGlGlERlAML+YGlsDlRphaps..Dl+FLcpa .......................................................................................................................................................................t.lDlohs.....sp..h.t...GthHPlsth.hcclpp...hF...h...t........h........G........a........plhp....G....s-lE..s............-aaNF-.sLN...hP.t.p.H.PA...Rs.h..p...D.TFYl............................................................................spphLLR..T..aT..S.s....V...Q...h...Rsh..................................ct...pp.......sP...l.+....h.IuP.G+V.YR...p.Ds...D...u...TH...os...FH........Q..l..EG....L...l...l.............D..c...................s..l........o...h.............u...c...L....K....G..s..L..c..t....hh+ph....F.........G.......p..........c....hp............lRh........R.P...SY.F...P....F.T.E.PSsElDlts..........................................t.ctssW.lE...l.LG.s.GMV+P.p.VL.c...s...........................GlD..sp.asGFA...FG.....hGh-RhsMl............+aslsDlR.haps..DlRFhpp................................................................................................. 2 623 1139 1548 +944 PF01588 tRNA_bind Putative tRNA binding domain Bashton M, Bateman A anon Pfam-B_482 (release 4.1) Domain This domain is found in prokaryotic methionyl-tRNA synthetases, prokaryotic phenylalanyl tRNA synthetases the yeast GU4 nucleic-binding protein (G4p1 or p42, ARC1) [2], human tyrosyl-tRNA synthetase [1], and endothelial-monocyte activating polypeptide II. G4p1 binds specifically to tRNA form a complex with methionyl-tRNA synthetases [2]. In human tyrosyl-tRNA synthetase this domain may direct tRNA to the active site of the enzyme [2]. This domain may perform a common function in tRNA aminoacylation [1]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.02 0.72 -4.15 43 10729 2012-10-03 20:18:02 2003-04-07 12:59:11 15 48 4863 53 2411 7871 3721 99.10 31 16.79 CHANGED l+VGcllcsEcaPsADK.LhhhplDlGpcp.....RplVuGlsphhs..-th.s+hlllls.NLcPt..............KhRGlpSpGMlluAp.......sssp..htllssstss.sGsc ..............................................lhVGcll..ps..c..p..h.P..s.......u..DK......Lhhhpl.......D....l............G.......s..........c.........p...............hpI.Vs.Ghs.shhs..........cs....l...luphss.hls....N....hp.p..........................Kl.R....G..h....SpG.Mlhuspp...............sspp....s..l.l.ph.s..p.s.h..sGt.................................. 0 822 1535 2024 +945 PF03250 Tropomodulin Tropomodulin Bateman A anon Pfam-B_3359 (release 6.5) Family Tropomodulin is a novel tropomyosin regulatory protein that binds to the end of erythrocyte tropomyosin and blocks head-to-tail association of tropomyosin along actin filaments [1]. Limited proteolysis shows this protein is composed of two domains [2]. The amino terminal domain contains the tropomyosin binding function [2]. 21.80 21.80 21.80 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.67 0.71 -4.68 12 463 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 101 0 230 395 0 130.10 40 31.99 CHANGED Mu.s...hp+-LtcY+DlDEDElLtsLStEELcQL-htLpElDPENshLPAGhRQ+DQTpKsPTGPaDR-pLLcaLEKpAlEhKDR-DhVPF.TGEK+GKlFVPKp+stch..tcEploL-.P.ELEEALssATDsElCDlAAILGMaTLhssp .....................ap+cLpc.Yc.D.lD.EDElLusLSsEELcpL-pEL--lDP-s..shLPsGhRQ+sQTpKsPTGsFsR-tLlpal..E.Kp.A....hchh.-+E-h..VPh....st.h.+.G.+....a..h.......................................t..................t.......h..tl-.....s.-.hcp..uLtpAspt-hh-lA.tlls................................................................................ 0 29 46 105 +946 PF00992 Troponin Troponin Finn RD, Bateman A anon Pfam-B_62 (release 3.0) Family Troponin (Tn) contains three subunits, Ca2+ binding (TnC), inhibitory (TnI), and tropomyosin binding (TnT). this Pfam contains members of the TnT subunit. Troponin is a complex of three proteins, Ca2+ binding (TnC), inhibitory (TnI), and tropomyosin binding (TnT). The troponin complex regulates Ca++ induced muscle contraction. This family includes troponin T and troponin I. Troponin I binds to actin and troponin T binds to tropomyosin. 23.20 23.20 23.30 24.10 23.10 23.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -11.04 0.71 -4.09 56 1116 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 175 22 337 1073 1 125.00 33 53.51 CHANGED K+pLKsLhhtpAtpcLcpEpcc+tpE+pphLp-+s.shp.hsu..sps-LQcl.s+ch+p+lspl-EERYDhEt+lsKpctEI--LphKl................hDL+GKFKKPsL++V+hossuhh+uhLGsKHpsshDLRusLKpVK ...................Kp..K.sLh.hpAh.-hchEpccKEEEc............hhhLp-.Rltthc......h...........ppu-...................Q..........cl.scc.+c+.s+l-..EE+h.chE.tc.t+pctE....cthppKs..........................t-h+t...hK+..sh+.ch+hph.u.h+t.Ls....c...+.h..s.p.pLRt...phKplh........................................................................ 1 71 116 229 +947 PF00234 Tryp_alpha_amyl tryp_alpha_amyl; Protease inhibitor/seed storage/LTP family Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family This family is composed of trypsin-alpha amylase inhibitors, seed storage proteins and lipid transfer proteins from plants. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.26 0.72 -11.45 0.72 -3.77 187 1632 2012-10-01 19:46:35 2003-04-07 12:59:11 17 6 243 52 214 4676 6 103.10 21 68.27 CHANGED Cstshhp..............................lssChshhps..........spppCCstlpsl...................ttttChCtslpshhhs................................................................................tht..tsttLPshCsl....shP.........tC ..................................................................................hht........................................h....s..hshhp..t.t.hs................sl.ps.CCppLtsl....................sphCpC..tsltshhpuh..............................................................................................................................tt.ls..hh.sAs..slPuhCs.l..sls.......................................................................... 0 29 95 159 +948 PF00089 Trypsin trypsin; Trypsin Lutfiyya LL, Sonnhammer ELL anon SCOP and Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.76 0.70 -4.76 71 22248 2012-10-02 13:45:52 2003-04-07 12:59:11 21 596 2517 2044 9786 28849 4196 206.20 23 59.84 CHANGED IlGGppsphtsh....Pa.sslphps...........hCGGsLlspp..all.TAuHChps..................phpl.....hhGt..thppppss......phhplpp..hhhp.tassps..............tDlALl+Lpps........hphssslpslsLss.....ss...tssspshlsGaGp...stpsu........sphLppsplsllspp.pCpp....h..................lssshlCsss........tt..ssCpGDSGGPllstst........lhGlsS....aG..hsCupsph....sula.spls....thhsWI ..........................................................................................................................................u.....s.....t.ph....Pa...s..t..l.......htt...................................hhC..G...G...s...l.......l........s..........p..........p.......a......l...L..T........A..A...H.C.hts......................................tthpl................hhG....t.............t....h....p....t......t......p.s.......................t...h..h..t..lpp.............hhh..H.....p....a.s..t...ts..h............................................tpD..l......A..L.l...c...L...pp.s..................................h.t....h...s...p....t....l...p.......s.....l..sLsp.....................t.t...........s...s.....p....t........s.......h......l....s...G..W..Gp.................s.t.tss..........................sp.hL...p..p...s..............p.l.......l.....l....s....p...p....t...Cpp............h.....................................................lp.p.s....h..lCuuh.............t................t...ts....s...C.................p..............G....D........S...........G....G.P...L....h...sptt...............................lhG.l..s..S.............aG........s....C.....s....t....h.................ula......splt......hh.Wl................................................................................................................................................................................................... 1 2699 3841 6981 +949 PF02210 Laminin_G_2 TSPN; TSP_N; Laminin G domain Finn RD anon Pfam-B_4211 (release 12.0) Domain This family includes the Thrombospondin N-terminal-like domain, a Laminin G subfamily. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.64 0.71 -4.10 208 7915 2012-10-02 19:29:29 2003-04-07 12:59:11 19 1063 156 84 4277 8885 448 128.10 19 16.04 CHANGED F+T...p.pssGlLlassstss...................taltlplp.sGp.lthphshu................tss.hhhhsspp..........l.sDGp.WHp..Vplppptpp................hplt.....VDsptstttttsttt...................................thphsss.lalGGh.spt.t................ppsFpGClpslplssp ......................................................F+T..tpss...G.lLlhssstps...............................talt.lp.l..........p..s......Gp....lt...hp.h.shu..................................ssshh.h.h.h.s...s.t.t......................l...s..D..G.p..WH..p.....Vp.l..p.....p...p..s.pp...........................................spLp..........V.D..s..pt...s...t..t...thts..ttt....................................................................tl.s.hpss...lalG............Gh..sp.t.........................................tpsFt.GClpslhhss..................................................................................... 0 870 1227 2700 +950 PF03133 TTL Tubulin-tyrosine ligase family Bateman A anon Pfam-B_682 (release 6.5) Family Tubulins and microtubules are subjected to several post-translational modifications of which the reversible detyrosination/tyrosination of the carboxy-terminal end of most alpha-tubulins has been extensively analysed. This modification cycle involves a specific carboxypeptidase and the activity of the tubulin-tyrosine ligase (TTL) [2]. The true physiological function of TTL has so far not been established. Tubulin-tyrosine ligase (TTL) catalyses the ATP-dependent post-translational addition of a tyrosine to the carboxy terminal end of detyrosinated alpha-tubulin. In normally cycling cells, the tyrosinated form of tubulin predominates. However, in breast cancer cells, the detyrosinated form frequently predominates, with a correlation to tumour aggressiveness [3]. On the other hand, 3-nitrotyrosine has been shown to be incorporated, by TTL, into the carboxy terminal end of detyrosinated alpha-tubulin. This reaction is not reversible by the carboxypeptidase enzyme. Cells cultured in 3-nitrotyrosine rich medium showed evidence of altered microtubule structure and function, including altered cell morphology, epithelial barrier dysfunction, and apoptosis [4]. Bacterial homologs of TTL are predicted to form peptide tags. Some of these are fused to a 2-oxoglutarate Fe(II)-dependent dioxygenase domain [6]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.88 0.70 -5.49 21 2286 2012-10-10 13:17:02 2003-04-07 12:59:11 10 53 310 4 1558 2259 173 263.90 24 44.99 CHANGED hphpp......tptlNaaPsshplspKD.hhhpslpc.tpphthc.......................hthhspoahlst-hsphhphapcppt............shWIlKPsupu+GpGIhlhschspl.............................................pppshllQ+YIc+PLLlss......pKFDlRhYVLlosh.pPLplYlYc-u.lhRFuopcYs........hsshpshhhHLTNhulpKc.t.....tpc.pthpspcaoltshhthhpp..hstcplhpplhshlhcshlsu.......hpsst.shppthssFElaGhDlhl........DpslcPWLlElNhSPsh.psssthssplpstllpslls.....lsssptpsh ................................................................................................hht............hsph..t...........l.spKt...hhhp..h.tp.h..t..h..p......................................................thhP..o.a....h.......s.....t.....c...h..t...............h..h...p........h.t..pttt......................................................................thWIh...KPs.s.....t...sp.G...p.G.........I..h..l..h...p.p.h..p.p.l..................................................................................................................ttpth.....ll....Q....cYI....pp.P.hL.lts.............................hKFD.l.RhY.lL.l..........s.....u.........h......p..P....L.p............la.h.a.........ccu.hsR........Fusp......Ys.......................ps..hp.s...hhh....HLT....N.hul.p+p.............................t..........t...................t............s.........p...p...h.s.....h.....p....p......h..........t......h.hpp.............h..p.....h.....p.....p.....h..h..p.p.....l....h....p..hhhp...hhlts...............................hpss.........h..............t...........p...s..F.ElhG......hDh.hl......................Dp.p.....h...c.....P.aL.lE..............l.N..hs.....Ps.h...t.t...s...s.....h...p.....h.p.l.h..thh.tshhp..............th................................................................ 0 771 931 1276 +951 PF01167 Tub Tub family Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.70 21.00 20.20 20.50 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -5.39 11 847 2012-10-02 20:44:47 2003-04-07 12:59:11 13 23 166 7 509 787 14 217.80 36 43.04 CHANGED PAPpsshlpChIpRDKpGhD+GLaPsYahaL-p...EsG..hFLLAuRKRK+SpToNYlIShDPsclSRsussYlGKlRSNhLGTKFTlaDsGsssp+..sp................ospssshR.-LAsVsYEsNVLGa+GPR+MolIhPGhssssp.....RVssp...........................................Phs.sp-slLschpppst-slllL+NKsPpWs--sQsYsLNF+GRVTpASVKNFQllcs....................................scPDaIVLQFGRVucDhFTMDaRYPLsAhQAFAIsLSSFD ...........................................................................................................................pt....hpChlpR..s+p..s.........s.hh....h..a......h........h............cps.........hFLLuu.++h.++........s.t.p....spYlI.....Sh....s....s..s...s..h........S+....tu.........p...s..........a.......lGKl.R........S.N.h..hGTcFhlaDst.s.tt..t..........................................................p....s..th.p..plutl.Y.c.h..Nl.L...s.......+.G...P..R+.Mp..shh....s.hs.pt............h..hp....................................................................................................s....tt......t......s....h...t.....p..h.p..p.....p....s..hp.......phl....LpNKsPhWs-phQ..sasLNF.+........G..R...V......T.....ASVKNFQl.lts..........................................................................................................................................................s-..-.h....l.lh...QFG+l..u.............c..D.hFThDa....p.YP.lsAhQAFAIsLoSF-............................................................................................. 0 172 275 391 +952 PF03953 Tubulin_C tubulin_C; Tubulin C-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes the tubulin alpha, beta and gamma chains. Members of this family are involved in polymer formation. Tubulins are GTPases. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. Tubulin is the major component of microtubules. (The FtsZ GTPases have been split into their won family). 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.47 0.71 -4.09 66 8994 2012-10-03 12:11:42 2003-04-07 12:59:11 12 31 3147 105 2212 7432 55 113.00 52 32.23 CHANGED PRlHFhhsuaAPLsups.....ptsacphoV.-lTpphF-spNhMs...ssDP+p........G+Ylosss.laRGcVss+-Vccsltplps+ps..spFV-WhPsulKsulsshsPhshptu........ushluNoTuIpclFpRlsc ...............................................PRLHFhhsGaAPLoScu....................uppa.Ru.loVs.E..LTp.Qh..................F.-...........s+.N.MMs.....us...D..PR.+..........G+Y....losss....ha....RG.......c...hs............K.-V.cc..p.h.hs..lps.........K.p.o.......s.F.V-.Wh.P.s.s..hK..........suls.....PPp...sl.hu..................sshluNo..TuItE..hapRls.c................................... 0 760 1134 1715 +953 PF00567 TUDOR Tudor domain SMART anon Alignment kindly provided by SMART Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.37 0.71 -4.28 48 3295 2012-10-02 16:56:36 2003-04-07 12:59:11 19 92 277 31 2089 3372 6 118.30 19 26.42 CHANGED hhhspthslhloplt.ssspFal.......tpstpplppltpplpphhpphtt.......ht.sthspsssshhstDs..pWaRApl.......tthsspphcVhalDYGspph.lshsclptls.pphtt........Ph.uhcspLss ..........................................................................h......pshlst.l...s...s..s...p..hah....................h.pp.t.pp....l....p.....p..........l.....p.p.l.p..p....hh.ppttt...................ht...p..h.sp.h.s.h....s...t.......h.....p....t......-.s.............pW.a.Ruhl....................t.hss.....p..p............s..pV....halD.a.Gspp......h...l...s...h......s...p......l......p..........l....s...p...p.hhp.....l...Ph.AhpspLt..................................................... 0 629 828 1457 +954 PF04906 Tweety Tweety Finn RD anon Pfam-B_5713 (release 7.6) Family The tweety (tty) gene has not been characterised at the protein level. However, it is thought to form a membrane protein with five potential membrane-spanning regions. A number of potential functions have been suggested in [1]. 29.70 29.70 30.00 32.60 29.50 29.60 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.42 0.70 -6.06 14 332 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 97 0 157 301 0 341.60 35 69.90 CHANGED applsusFpPcDt.sYQQSLlhLusluussLuLsLLhLhhYLlphCCsR+.pcspssp....csCClsWssllssLlC...........CAuIGlGFYGNuETsDGlhploYSLcpAN+TluGIcshVpsosssLppslcppLtpL--laus+s-.......alpslphhQp.sssllpphsulP..hWpts....slslsplAppsshhEaYRWLuYLhLLlLDLlICLlsllGLARpS+slLlshsshGlLsLllSWuShGL-hAsAVGsSDFCssPDsallp.sppphs..sDllpYYLhCu.utsNPFQQpLohup+uLssMQpplpsLL+.Als.FPsuc..csLlulQtlLNooEhsLHQLTALlDCRuLHhDYlpALpGlCYDGlEGLlaLsLFShluAlhFoshVCusP+sWphhtsR-cDh--hp-psPh ....................................................................................................................................................................................................................................t.p...h.puLh.hlus.l..uhh.sLsl.s.Llhlh...hYh...hhhC...Cp+p..p...ttpt....................tssChshs.h..l..hssLl.s...........s.sul...u..l..GFYGNuEss.DGl.plh.ulh....psNpTl.....s.slpp.......hl.ts.sttLp...ps.lc...tpLtpLpc...h..h...stp..s-............hltshphhpt.hpslhttht..sls.....hhpts......shs.h.tpl.....up.psshh-.......Y.R....Wl..ua.lhLL.l.ltll.....lCLh.s.llGl...........s........+...........pS+hhll...hhsh..hu...lL..sLl.lSWsuhGlc.hAsuV...uhSDFCssPDsalhphsppphs.......s-llp.YYhh.Cs.....s..hs.....N...P.F.Q.Q.p..Lo..sp.+...uLsph...ptplt.tL....p...uh........ass.sp.......csLhtlpthL..N....soEh......shpp.LsAl...l....cC...RuLHtDYh.pAlp.GlC.DulpGL.laL....h..L.aS...hlsAhhhsshlCshs..+sWt.h......p.p.p.tp........................................................................ 0 39 56 101 +955 PF04564 U-box U-box domain Bateman A anon Pfam-B_2801 (release 7.5) Domain This domain is related to the Ring finger Pfam:PF00097 but lacks the zinc binding residues [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.36 0.72 -4.01 16 2544 2012-10-03 15:03:13 2003-04-07 12:59:11 10 170 360 21 1743 2959 196 70.60 32 11.07 CHANGED lPDEFhDPIhhpLMpDPVlLPSG.hshDRusIp+HLhs......spoDPFs.RpsLTpcpLhPNhpLKpcIspalpp+cp ...........................hPpt.F......h..C....PIo.hcl......MpDP.V.l.......h...s...o.....G....hTY-..Rps.I.p.c.a.l.p.p................................s.p.osP.h....T...p.......p...........s..L...s..p.....p.........p..L....h.PN.hsL...+phIppahtpp..h................................ 0 505 1047 1434 +956 PF00627 UBA UBA/TS-N domain Bateman A anon Bateman A Domain This small domain is composed of three alpha helices. This family includes the previously defined UBA and TS-N domains. The UBA-domain (ubiquitin associated domain) is a novel sequence motif found in several proteins having connections to ubiquitin and the ubiquitination pathway. The structure of the UBA domain consists of a compact three helix bundle [1]. This domain is found at the N terminus of EF-TS hence the name TS-N. The structure of EF-TS is known and this domain is implicated in its interaction with EF-TU [2]. The domain has been found in non EF-TS proteins such as alpha-NAC Swiss:P70670 and MJ0280 Swiss:Q57728 [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.47 0.72 -4.00 141 9566 2012-10-01 23:03:33 2003-04-07 12:59:11 26 206 4854 104 4154 7764 1831 37.30 34 8.50 CHANGED hspptlppLhph..G...aspp.pspcAL.ptsss..sh-pAhphL ...............stphlcpLp-h...G....hshh.cs++AL..p.....p..ssG...cl-tAl-hL............. 0 1308 2242 3222 +957 PF01040 UbiA CytC_assmbly_fac; COX10_ctaB_cyoE; UbiA prenyltransferase family Finn RD, Bateman A anon Pfam-B_1357 (release 3.0) Family \N 24.60 24.60 24.60 24.60 24.40 24.50 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.17 0.70 -5.17 138 10787 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 4280 0 3435 8227 6621 264.00 17 84.83 CHANGED hhhsh...hssh..hu..hhhshtshhsh.........hhhhhhhshl............hhts.us.hh...N-..hhDhchDt.................tt..tp..........ls.sGtls....p.pshh.hshshhhlu..lh.....hu.............hhhs.hhhhhhshh..shhh..s.hhYo....................hh+phshhsplhhuls...hu......hhhh..hushh......hsth...............................sh.hhlhshh.hhhshslhht........thtDhcsD.ppsGhpo...........lshhh........t.thhh.......hhhhhsshhh...............hhhhhhhhhhhhhshhhhshslhh..h.h...........h.........................hhthhhhhhh ........................................................................h....hhssh.hu.......hhhu...t.s...h..sh...............hhh..hhhhlush.............lhtu.uussh...............Nc....hhD.t.chDtpht................................Rp..tpR..............s........ls..sGtlo...........p...puh...h...hshh.h.hl..lu....hh.....lh.................................hhhs...h...h...s...h....h......l......u.hh.....ulhh.....h..hhYo...........................hhK.+.hs.....h.....splh.suhs..........hu..........hssl..hGhss.........sssph......................................................shhhhl.hhhh.h.h..h....h....s....huhhhh....................................sht-.hcs.D.....tp....s...G.hto................lslhh...........Gtptshh...........................hhhhhhhhhhh...........................................................hh..h.h.hhh..h..s..h...h..h.h..h..h..s..h..h.h.....h.sh.h...h..h...h.......h...h...h...h..........p.....................................hhhhhhhhh.......................................................................................................... 0 1089 2178 2911 +958 PF00240 ubiquitin Ubiquitin family Finn RD, Griffiths-Jones SR anon Prosite Domain This family contains a number of ubiquitin-like proteins: SUMO (smt3 homologue) (see Swiss:Q02724), Nedd8 (see Swiss:P29595), Elongin B (see Swiss:Q15370), Rub1 (see Swiss:Q9SHE7), and Parkin (see Swiss:O60260). A number of them are thought to carry a distinctive five-residue motif termed the proteasome-interacting motif (PIM), which may have a biologically significant role in protein delivery to proteasomes and recruitment of proteasomes to transcription sites [5]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.96 0.72 -4.58 76 11560 2012-10-03 10:59:06 2003-04-07 12:59:11 18 265 901 508 6203 11456 369 67.10 44 28.05 CHANGED +shssp........phslclp.ssolpplKpclpppps...lssspQ+LlasG+.L..cDppslt-aslppss..slplshc.p .........................................+olsGK......slsl..-.V.-..s.o.D........T..l....c.sl.K..s....K...I........p..-..+cG.............IP.......P..-.......Q.Q...RL...I........F.....u........G......K......p.....L......-D..........s...........+......T..L.....u.....D....Y.....s...Ip.....cpo......TLHLVl+................................... 0 2304 3483 4906 +959 PF00789 UBX UBX domain SMART, Mistry J, Wood V anon Alignment kindly provided by SMART Domain This domain is present in ubiquitin-regulatory proteins and is a general Cdc48-interacting module [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.56 0.72 -3.92 22 2190 2012-10-03 10:59:06 2003-04-07 12:59:11 15 58 327 23 1473 2158 19 82.40 21 19.37 CHANGED spstsssplplRhPDGp+htc+F..pscslpplhpalpspt.sts....p.........................FpLhsshP..R+.hspps..poLp-s.thhssuslllph ................................................p....stsplplRh.s...s...G...p...p..l.p.p..c.Fp..sscsl..p....s.lh.p.al...p...s...p...t....s...st.....t.............................................................................F...p...L....h.s..s.a.P.....p+.......h........s....p.p..s............t......oLp..-h...hh.ss.u.slh...h................................................................. 0 463 739 1131 +960 PF00443 UCH UCH-2; Ubiquitin carboxyl-terminal hydrolase Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null --hand HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -12.07 0.70 -5.28 53 8903 2012-10-10 12:56:15 2003-04-07 12:59:11 24 248 429 43 5798 8963 637 380.20 19 45.37 CHANGED hsGLtNhGNTCYhNSlLQsLht..ssthp.chlh..............................................p...............stpst.ttts..........................................................................................................................................................lspthpplhpphhps......................pppsltPpt.............................................................................................hhptlsthtpp..hp.......shpQpDApEahthLL-pLccshpt..t.....................................................................................tt.shl..pclFpGphpsplpChpCsppspshcshhslplslttppph....................................................................................................................................................................................................................................................................................................................................................................................................................phhphpttpthptppphhsspspppppuhKphplpcLPp.lLhlpLcRF......aph.ptth...ppKlspplpaP.....h.pLDls....shhttstt.................................................................................................................................................................................................................................................................................................................................................................hpYcLhuVlsHtG.s.......hpsGHYhuahhp.........ps..............................ptt.......WhpacDspVsphs................ppl.hp.............................psAYlLFY ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hGL.N..h..G..N.T........C.ahNuh......l..Q..............s........Lh.t...h....hp...phhh...............................................................................................................................................................p.....................................tt......t....t..t.t............................................................................................................................................................................................................................................................................................................................................lhpth.t.p.l...h.tt.h.ps.....................................................................t.t.p.s.h..s..P...p..t.........................................................................................................................................................................................................................................................................h...h...p...t....l.....t...t.h.t..p.......ht.........................shp..Q.p.......D.......u........p........E.......h.h.............t.......h..l......ls..t..l...c..p.p..hpt................................................................................................................................................................................................................................................................................................................t...t.t..p...s.hl..........pp..h.F.....t...G.....p.....h...p.......s........p.......l...p.....C.......p...Ctphstp.pshh.lpl.h..ttt...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.httttthhstpspthtpsp+phpl.t.ph....P.......l.L................h.l.......p.......L.......p......R..F..........................hp.....t............................htK....l.sp.h..l.p.aP.....................t.Lc.hs...............hh..t...p...t......t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhY.p..L.h..u.V......l....s...H................G....s..................h.p...s.G...H.....Y..h......sa..hpp.......................tt...................................................................................................................................................ptt..............Whh...as..D....pp..V..p...h.p..............................ppl..p...............................................tpuY...hLhY................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 2079 3020 4452 +961 PF03456 uDENN uDENN domain Callebaut I anon Callebaut I Domain This region is always found associated with Pfam:PF02141. It is predicted to form an all beta domain [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.27 0.72 -4.00 45 1381 2009-01-15 18:05:59 2003-04-07 12:59:11 13 72 202 2 776 1312 4 65.10 30 5.71 CHANGED pPcllppaPp....p.........pcp.h.pslshFCFPpGlshhspp........psphFsFlLTctDGs+h.aGhChphh ................splltpaPpps........................ps....h...pslshFChPpG.h.ph...ts.s......................tsphasFVLTs.t..........D.u..s.+p.a.GhChphh..................................... 0 186 286 501 +962 PF03167 UDG Uracil DNA glycosylase superfamily Aravind L anon Aravind L Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.75 164 10115 2009-01-15 18:05:59 2003-04-07 12:59:11 14 32 4976 114 2700 7557 3938 160.00 22 68.49 CHANGED .shsssssc.llllGpsPGt.pp......st..sGtsFsst.........sGphLpp.hl.tph............................ulscpp...................lhlsssltp......................t..ttpssps-hpts.ps...hLhpplph.hp..Pclllhl.GptAhpthh.h...........h............................htth.lhshhHPSsh.tt.....................hp.t.h.phhp.pLp ..............................................................................h.....tss+llIlGpsPht..ss............pt........pG...hsFss..................sss.p.Lhs...hh...ppl..................................................................................uhs.cps...............................................lhlhNsl.......................................................hss.pp.s.s.p..sch..sts..pt....................tlhp..tl.sp...hp......t.........l.....l...hhL..Gph.Atpphhh..h..................................................................................sttphhll...ssHPSslstt.hth.................hsst.h.tps.t.L......................................................................................................... 0 865 1704 2249 +963 PF02809 UIM Ubiquitin interaction motif Aravind L anon Aravind L Motif This motif is called the ubiquitin interaction motif. One of the proteins containing this motif is a receptor for poly-ubiquitination chains for the proteasome [1]. This motif has a pattern of conservation characteristic of an alpha helix. 20.40 2.90 20.40 4.00 20.30 -999999.99 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.07 0.74 -6.49 0.74 -3.89 46 2346 2009-09-16 13:18:06 2003-04-07 12:59:11 15 72 279 15 978 2205 32 17.30 40 7.58 CHANGED tpE-pcLphAlthShp-t .........pE-tc.LptAlphShp-....... 0 264 430 695 +964 PF01027 Bax1-I UPF0005; Inhibitor of apoptosis-promoting Bax1 Bateman A anon Pfam-B_1376 (release 3.0) & Pfam-B_5704 (release 7.5) Family Programmed cell-death involves a set of Bcl-2 family proteins, some of which inhibit apoptosis (Bcl-2 and Bcl-XL) and some of which promote it (Bax and Bak). Human Bax inhibitor, BI-1, is an evolutionarily conserved integral membrane protein containing multiple membrane-spanning segments predominantly localised to intracellular membranes. It has 6-7 membrane-spanning domains. The C termini of the mammalian BI-1 proteins are comprised of basic amino acids resembling some nuclear targeting sequences, but otherwise the predicted proteins lack motifs that suggest a function. As plant BI-1 appears to localise predominantly to the ER, we hypothesized that plant BI-1 could also regulate cell death triggered by ER stress [2]. BI-1 appears to exert its effect through an interaction with calmodulin [3]. The budding yeast member of this family has been found unexpectedly to encode a BH3 domain-containing protein (Ybh3p) that regulates the mitochondrial pathway of apoptosis in a phylogenetically conserved manner [4]. 26.10 26.10 26.20 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -12.10 0.71 -4.72 511 5798 2012-10-01 20:22:31 2003-04-07 12:59:11 15 12 3474 0 1486 4003 2434 202.60 25 84.39 CHANGED pphl.ppsYshh......uhu....lh.lo.uhsuhhsh..................................h.....hhhhh.......hhhs.....luhl.h.h.s...h.tht............p.....sss........hs.hh......hhh...sasslhGhslus................lhhh.......ht................sshlspA..hh.....hTushFsuhohh...uhpo...++........D.......ho.th..GshL...h........hull..sl....lluulls.h.F......h...........ssshphsl.u....hlu....lllFs.Gh..h...haDT...p.pl....h..........p.....ththtsh.................................lhuAlsLYL.....DhlNLF...l.h...lLp...l.h .......................................thl.ppsYhhhulsLh...ho...uhsAhhs.h.h..................................hh....hh.h.hh............hhl.st.....lshh..h.h..h.h.h...h....thp.............................................p...h.sss..........hh.......hhh...........hao...shhGhslus...................lhs.h.Yh................ssslhtA.....hs.....hTus.hFh.shohh.....uhpo...++.........................D........ho....th....GshL...h........hull....sl.......llus...llN....h...F............................lt.................ssslthsl.o.............slulllFs.Gh....l...ha.DT......p..pl.....h............p.....tht.ss..................................lhuAlsLYlDh...lNlFl....lLplh................................... 0 437 786 1139 +965 PF03684 UPF0179 Uncharacterised protein family (UPF0179) Bateman A anon SWISS-PROT Family The function of this family is unknown, however the proteins contain two cysteine clusters that may be iron sulphur redox centres. 25.00 25.00 69.70 69.30 20.10 19.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.06 0.71 -4.62 24 104 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 103 0 79 109 6 141.80 37 95.34 CHANGED hITLlGpcLA+sGsEFlahG.sscCcsC+h+psCh..NLp.G+RY+llpVRsst..pcChlH.-ssVpsVEVtcs.slhsllpo+pAhcGuplohpss.Cs..h-CpsachCpP-Glhpu-+hpIhcllGcht...C.tG.+plplVclthh ....hITLlGpcLAcsGtEFlahG.ss.cCcsC+h+phCh..NLc.G++Y+ls.sVRsst....ppCslH.-ssVpsVEVtcs.slhshlpu+tAhpGuplshpss.Cs..h-Cptac..hC..pP.-.GlhpG-+h+IhcllG-h.t..C.pG.csLphVclh.h...... 1 16 46 63 +966 PF03699 UPF0182 Uncharacterised protein family (UPF0182) Bateman A anon SWISS-PROT Family This family contains uncharacterised integral membrane proteins. 19.40 19.40 22.50 23.80 19.30 18.80 hmmbuild -o /dev/null HMM SEED 774 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.59 0.70 -13.39 0.70 -6.60 63 654 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 582 0 224 642 800 783.20 35 82.82 CHANGED p+s+hhhhlhhlllhlllhsh....hsshhs-hhWFpplG....atsVFhTplhsphhlhlhs.................................................................................hllhhlhlhlslhh.sp+st..........ht..h.t......ht.....................t.hhhhhhphhhhslshllullsuhhsuup.WtplLha....lssssFGpsDPlFstDluFYlFpLPhhchlhshlhshllluhlhsllsa................hlhusht.......hsstth...tlsptsptpLulLhulhhLlhAhuaaLcpapLLao..s..pGs.hhGAuYTDlpstLPshhlLshlullsAlh..h..................hhsh.................hppph+h................shhuhslhllssllhsslhPtllQpFhVpPNEhphEpPYIccNIshTRpAa.sL-p..lc.hpsasspss..Lsst.s.....ltpspsTlsNIRLhDspPLhpTapQLQQlRsYYpFsc.lDlDRYpl.sGp........................hpQVhlusRELs.ssLstpupoWlNcHllYTHGYGhlhuPlNpl.............os-GhPpa..hlpDIPsssph..................sltl...ppPRIYaGEh.......ossYsIVs.s..pst....EaDYP...............................................................pus.pN.shspYsGsGG....l.lsshhpRllaAhphp-hplLlSsslss-S+ILapRslt-RVcclAPFLphDuDPYhVl.sc........G+................lhWIlDAYTTSspYPYSp..Ptp......................pslNYIRNSVKslVDAYsGoVsaYlh.DtpDPllpsap+lFPslF+PhuphPssLpsHlRYPpDLFplQsphlspYHh.....TDPpsFYsp-DhWplPp.....-..........................hsss.....pts...............htPYYllhpLPs........ppptEFlLh.sasPts......RsNhsAaLuARu......Du........psYG+lllaphP..+pchlhGPtQlpspIsQDspISpploLW ...................................................................................................................................................................h....hhhhh.hh.h.lhl.llhhh...hhsshasDaLWFsplG.......apuVFhThlhs+lslhlss...................................................................................sllhu.shlhhs.hhl.AhRs+.s.sh..s.s...s.........psltth........................................................................................................................ctsh.tp..h+hhhhslsl.llu.lls.G.hhspup..Wt.plhha....hpussFGhpDP.FGhDluFYsFpL.Phhchllshl...hshllluhlssllsa............................................................................alhGul+............hss.t.t.s.t.l.ops.A+hpLullsulhhLlhAsuYaLcRYpLLhs.......ppss...hsGAuYTDlpAsLPuphlLhs..lul.l..sAlh.....h..................h..ss.l...................hh..+..sh+l................................Ps.l..u.hslhl.lsul..l.lG..s..h..a..PhllppFpVpPNttphEpsYIp+NIpATRpAY.GL..ss...lp.hp..s....a....s...u..s..ss......h.s..sp.p......................l.tsstsTlsNIRLhDPpllssoapQhQQ..h+saYpFs-.LslDRYpl...sGp...................................hpshllAsRELs.ssLstp.ppsWlNcHhVYTH.G..Ghlsu.uNpV.................s.ssGhPta..hlp...slsspuph..................slsl...ppPRIYaGph.......ss-YuIVGssts.....EaDYs................................................................psp...ts...sphoY...s.G...sGG....lslushhsRhlaAh+at-hphLhSstlsspS+.ILapRssp-RVppVAPaLTh.....D.....u....csYPsl..lsGR..............................lhWIlD...uY..TTsssY..PYSp..hsshtt....................tpplNYIRNSVKAsVDAYDGoVsLYth.Dpp..DPll+sWt+lFP.G.hh+shu-hs..s..-LtsHlRYPcDLFcVQpplLs+YHV.....sDPpsFasspDhWplPp.....-.............................s.s..s..sp.....spt.......................psPYYlhhp..h..Ps........pspsp...Ft..Lhosasshp......R.p.LsAa.luucS...Ds...........ssYG.....clplhp.l.P..p...s....s.l....GPtQspsphspssplSpplsLh................ 1 84 173 210 +967 PF03676 UPF0183 Uncharacterised protein family (UPF0183) Bateman A anon SWISS-PROT Family This family of proteins includes Lin-10 from C. elegans. 19.80 19.80 21.20 20.40 19.20 19.70 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.37 0.70 -5.88 8 356 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 220 0 262 349 4 251.20 23 75.90 CHANGED GsctWEFlLGMPIuQAluIIppQsplI+sVpVhYSDpsPLshDllIslPsDGl+LhFDPhsQRLKlIEVaslpplpL+YsushFNSPsllPTl-plpp.FGuTHPGlYDsu+plasLpaRGLSFtFPlsS.....+apssaucGLu...SLcFssGuoPlloRMSIYsGus.........lsEu+sP.sLPhuCahGslYhEpVcVl+puts.shGlcLpLsspG...shhhE.chpshpRplhFGDSsQDVhSsLGuPs+VFaKoEDKMKIHSsSsHR.spo+suDYFFNYFoLGlDILFDupTHcVKKFVLHTNaPGHasFNhYpRCpFpI.l.............sDpssssssspps........ITshoKWDplpchLusst...+PVVLpRuSospp.NPFGSTFCYGYQclIFEVM.Nsa.IASVTLY.sus ................................................h.........G....h.....phht.lp.t..t..h..hp..lha........t...tPht...s.lhl.hsttuhpLhF-s....QpLphI.........El........hs.......h..p........h.............hhat.............t...........................h.................t........................t............................s........s........h.tl....t...F.G.s.oa.......P..G..............t...................t......................ahL.a.GlsF.F..h....................................................ttst..........htp.h.la.utp.............h.ps........h.............................................................................................l.h.s.....t.o.s.p-lh..hG.......Pttha.Kt.pph.lH........................................................................................................................................Khlhhs.s...tp..h.............................................................................................................................................................................................................................................................ts...................................................................................... 0 89 144 210 +968 PF03671 Ufm1 UPF0185; Ubiquitin fold modifier 1 protein Bateman A, Coggill P anon SWISS-PROT Family This is a family of short ubiquitin-like proteins, that is like neither type-1 or type-2. It is a ubiquitin-fold modifier 1 (Ufm1) that is synthesised in a precursor form of 85 amino-acid residues. In humans the enzyme for Ufm1 is Uba5 and the conjugating enzyme is Ufc1. Prior to activation by Uba5 the extra two amino acids at the C-terminal region of the human pro-Ufm1 protein are removed to expose Gly whose residue is necessary for conjugation to target molecule(s). The mature Ufm1 is conjugated to yet unidentified endogenous proteins,[1]. While Ubiquitin and many Ubls possess the conserved C-terminal di-glycine that is adenylated by each specific E1 or E1-like enzyme, respectively, in an ATP-dependent manner, Ufm1(1-83) possesses a single glycine at its C-terminus, which is followed by a Ser-Cys dipeptide in the precursor form of Ufm1. The C-terminally processed Ufm1(1-83) is specifically activated by Uba5, an E1-like enzyme, and then transferred to its cognate Ufc1, an E2-like enzyme [2]. 21.70 21.70 22.30 25.90 18.70 21.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.53 0.72 -4.22 2 177 2012-10-03 10:59:06 2003-04-07 12:59:11 9 5 139 3 104 152 2 72.40 75 67.89 CHANGED uKVSFKlTLTSDP+LPaKVhSVPEusPFTAVLKFAAEEFKVPstTSAIITNDGIGINPtQoAGNVFLKHGSELRlI ............uKVoFKITLTSDP+LPaKVlSVPEu.TPFTAVLKFAAEE...FKVPs.sTS.AIITN.DGlGINPsQTAGNVFLKHGSELRLI............ 0 41 60 83 +969 PF00179 UQ_con Ubiquitin-conjugating enzyme Ponting CP, Schultz J, Bork P, Finn RD anon Prosite Domain Proteins destined for proteasome-mediated degradation may be ubiquitinated. Ubiquitination follows conjugation of ubiquitin to a conserved cysteine residue of UBC homologues. TSG101 is one of several UBC homologues that lacks this active site cysteine [4, 5]. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -4.75 71 9194 2012-10-02 15:28:41 2003-04-07 12:59:11 21 127 571 270 5880 8415 316 134.10 27 49.81 CHANGED Rlt+Ehp......ph.cpsssuhpst.hs....c.....slhpWpshIhGP.psTsY-uGhFplplpFPpcYPh.pPPpl+Fh..o......................claHPNlp.tsGpl......CL.sILp............pp.......WoP.shslpslLl.ol.uLL......spP.NspsPhss-sA...phapcs.pp...paccpsp.th ..........................................................................................................................l.p-ht........ph.....p...p......s.......s..h..p.s.t..s......p...................slh..p.W.ps..h.I..h..........G...P.......s.......s....o....s............Y...........c...G...........G..........h...........F.......p...........l..........p...........l.............ph..P.....p.............c.Y..P.........h..p......P...P....p........l......p...Fh...o..................................................p.l..a.....H....P.......N.............l......t....s...........s.....G..p....l....................C.L...s...l..Lp........................................p.p...................WsP...shs.l...p.......s......lLh...ul......p..u........LL.............................spP.....s....s...p....s...P.....h...s.....phu...p....h....h...p...p....t....ta.t.h....h................................................................. 0 1995 3131 4649 +970 PF02814 UreE_N UreE; UreE urease accessory protein, N-terminal domain Bateman A, Finn RD anon Pfam-B_6279 (release 6.1) Domain UreE is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid. 20.80 20.80 20.90 21.40 20.70 20.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.02 0.72 -4.46 70 1294 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 1157 35 261 891 364 64.20 27 37.70 CHANGED hls.shpthtt.tsss..ttplsLsh--Rp+pRhRhpsssGp.-lulpLs+s.sh.LpsGDlLht--Gph.l .................h.........t...tttp.....s-plhLshp-ttKpRhRhsos...p.....Gp..-lulpL.tcs..hh...LpsGDlLht--sphl...... 1 57 144 203 +971 PF04192 Utp21 Utp21 specific WD40 associated putative domain Wood V, Finn RD anon Pfam-B_16350 (release 7.3); Domain Utp21 is a subunit of U3 snoRNP, which is essential for synthesis of 18S rRNA. 22.10 22.10 23.10 23.10 20.20 19.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.40 0.70 -5.26 28 330 2009-01-15 18:05:59 2003-04-07 12:59:11 7 21 284 0 242 331 2 226.30 30 24.85 CHANGED hlEu..Ah--pppc........pppth.......YpSh-QlscsLlTLSlhPcS+apsLLcLDlIKpRNKPKEsPKtPEpAPFFLP.ossslsspt....................t.sspp........scstsctsclsclp.sspht..tpSpFopLLcp............usps..s-YsphlcaLpshuPuslDLEIRSLs.......s..hsshsElhsFlculsptLcop+sFELspAahulFLK.........lHuDllhp.......ssp.........................LtcsLppWpstpccphp+Lc-LVtastuVluFl+o ..........................................................ttt.ht...h.o..cQl......sp.pLlTLS.h.lPcS+WpsLLpLDlIK........pRNKP..........p........Es.PKtP..c...p.....APFFLP..ohsslssph.........................................................................................ptpp........................pppp..spt...sph.sph.p......t........popF...sp.hLpp......................................................................ut..ps....ss......asshlphL+sL.u.P.S.s.lDhEl.RoLs.....................s...tssh..pt.hht....F....l....c....hlsthLp.s+csFELsQAahulFL+.........lHschl.h.p....psp.................................Lhptlpphpp.p..ppphp+lppLht.shsllsalp......................................................... 0 84 137 203 +972 PF02151 UVR UvrB/uvrC motif Mian N, Bateman A anon IPR001943 Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.60 0.72 -4.32 72 12483 2009-01-15 18:05:59 2003-04-07 12:59:11 14 46 4610 17 2727 9003 4562 35.60 34 5.61 CHANGED pphlp.pLppchppAscppcaEcAuclRDplpplcpt ..............phlp.cLcpcM.ppAu.csh-FEcAAplRDplppLcp.h.......... 0 966 1868 2351 +973 PF05008 V-SNARE Vesicle transport v-SNARE protein N-terminus Moxon SJ anon Pfam-B_5492 (release 7.6) Family V-SNARE proteins are required for protein traffic between eukaryotic organelles. The v-SNAREs on transport vesicles interact with t-SNAREs on target membranes in order to facilitate this [1]. This domain is the N-terminal half of the V-Snare proteins. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.81 0.72 -3.82 89 491 2012-10-01 21:14:52 2003-04-07 12:59:11 10 8 290 5 319 473 11 77.00 26 28.67 CHANGED aptlpsplppclsphs..s...s..-..p++ptlpclcppl-EAppLlcpM-lEsp.sl..P...s.+sphps+lRpY+s-lsp.l+cchcp ..........................ap.lpsp.lppphsp.h.......phs.u..-.......p++phlpph-ptl-EAp-LlcpM-lEsc.sl....Ps.......stR...sthps+lRsY+p-lsp.l+pch+..................... 0 111 177 256 +974 PF00790 VHS VHS domain SMART anon Alignment kindly provided by SMART Family Domain present in VPS-27, Hrs and STAM. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.53 0.71 -4.55 15 1706 2012-10-02 18:21:09 2003-04-07 12:59:11 14 40 287 48 1025 1655 5 130.70 28 24.38 CHANGED ss.sp.lpthlp+ATcpsls-s..Dhuhhl-lsDhIsps....sssP+-AspsIpK+lss..psspsulhALslL-sCVKNCGppFHhclus+-Fhs-Llphlspcs.sc.....V+pcllcllppWspsh....cpcschphlpDhachLKhcG ............................s.....htthl...p..+ATs...t....t..h......pt...DW.s.h.h..c.lC...D...h...lspp............tsus...+....-Ah+u.lt++lpp..........pss...p...ht.........h.h.........A.........Ls.............lL-sslc...N..CGppFH..hp.l....u.........sc....c.F.l.s......c.l.l..c...ll.ps.ph.t.....st...................VppKllpll..psWs.p.s.h.....p.ps.ph..s..hltphap.LptpG..................................................................................... 1 272 484 768 +975 PF00654 Voltage_CLC voltage_CLC; Voltage gated chloride channel Bateman A anon wublastp P37020/1-588 Family This family of ion channels contains 10 or 12 transmembrane helices. Each protein forms a single pore. It has been shown that some members of this family form homodimers. In terms of primary structure, they are unrelated to known cation channels or other types of anion channels. Three ClC subfamilies are found in animals. ClC-1 (Swiss:P35523) is involved in setting and restoring the resting membrane potential of skeletal muscle, while other channels play important parts in solute concentration mechanisms in the kidney [3]. These proteins contain two Pfam:PF00571 domains. 23.80 23.80 24.10 23.80 23.20 23.00 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.28 0.70 -5.43 139 7727 2009-01-15 18:05:59 2003-04-07 12:59:11 15 38 3515 57 2231 6126 1048 337.70 23 63.73 CHANGED lsuhls.......uhlhthh..sts...t...u.sGlsplhttl...pttpt.h.....lsh+shhs+hlus..llsluuGtSlG+EGPslplGAslushl..u...........ph......hth..ttsp.p+h.LlssGuAAGluAsFssPluGslFulE...............lh.tph..sh.......ps....hhsshluulsushlsphl........hu.t..sh...........as.....lss................hshhtlhhhlllGllsGl.hushFsphhhtsp.p.....hhpph.....h..............hh.pshlsuh...ll....uhlu....hhh.......................P.t..hhGsGh.......sh.........................................................................................................lpthh...sst...................................................hhhhhhhhhhlhKhlhTslohuuGhsGGlFsPsLslGAslGt.hh.....u.hlhthh........................h..................sshullGMuAhhuusspuPlouhlllhEhTu.shphllPlhlushluhhluph ................................................................................h..hhhs...shl...ht..hh.........stt...t...u......p...Gls.p.l...ht..tl......ps....h............................h.hps.hhhK.................h.lus..lls....lu..u...G..h..slG.............+...........E.............GPhlpluuslu.phlu..................................ch...............hp.h.......p.sc...p+h..llusGuA...AGluAsFsAPl..u....G.......slFsl.....E..............................lh..tph..ph...........................tshhss..hhu....ul.sushshphh..........hs.t..sh......................ht.............ls.h..........................................shs.h.t.p.h.h.hhll.l.Gl.l..s.....G..l.....hGhla.p...hhhthp....p.........................hhp..p....h........t.h.............hh..hshls..uh.lh.....ullu.....hh.h..........................................s..t..hhG...s.Gh...sh.......................................................................................................................ltthh.......tst....................................................................hshh.hlhhhhlhKhhhoh..lo.h...uuGhsGGlFhPsl......hl..G...uhhG.thh.....u..hhhshh................................................................................s..sh.hul....l...G.h.uuhhu..uss+s...................P.loshlllhEhT.......s..s.......h.p.......h...........l.hsh.hls.sh.huhhlst.h............................................................ 0 710 1253 1782 +976 PF04840 Vps16_C Vps16, C-terminal region Kerrison ND anon Pfam-B_6003 (release 7.6) Family This protein forms part of the Class C vacuolar protein sorting (Vps) complex. Vps16 is essential for vacuolar protein sorting, which is essential for viability in plants, but not yeast [1]. The Class C Vps complex is required for SNARE-mediated membrane fusion at the lysosome-like yeast vacuole. It is thought to play essential roles in membrane docking and fusion at the Golgi-to-endosome and endosome-to-vacuole stages of transport [2]. The role of VPS16 in this complex is not known. 36.00 36.00 36.20 36.20 35.90 35.60 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.73 0.70 -5.48 5 345 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 273 0 252 368 2 283.70 30 37.70 CHANGED lSascIA++AYpsGRs-LApKLL-hEscuup+VsLLLpMccsctALpKAIcStDssLIhpVLLcLKpchspSsaahsLpcpPhAlsLYpcasRcp-+cT.......LaDlYpQ-DcapclApaHlcsuhts.ccs-uRlouLppAuDAaupu+s.slEscss-Dph....+Ll+lQcoLpccasssFssLSl+-TVscLI.sGcsK+AccL+p-F+IPDKRlaWLKlcuLuct+KWEELEcaApS.KKSPIGYtPFVchCl+pcNhcEA+KYlsR...lss.p-KVchalpsssas-Au-lAh-cRDtssLp-lhp+hssss-ushss+Vpsslcp ......................................................................................................................lSaspIAttAhppGRtcLAhp....LLphEspu.scQ..................VPLLLp........h.p.c...........pchA..LpKAlcSGDsDLlhhVLh.p.Lcp.c.h.s........h..u......p.....Fh.h.hl...pp....ps.h.........A...........s....L...ht.t.......hs.+..p....p.phph.................................Lc-ha......p.....sD...ph.....-hu...hh.lppuht............p.........p........................ps.........p..ht.......t..L....p.......At..c..h.h......t.p.s..+...p.......t..h...t...t...p...h..hp-ph..................pLLc.hQ.cpL..-....pc.....h...s.............t.............p.............F.h........s.h....Slp-T.lhpL...lh.....h.......G.p...t............+.........p.....Apcl.....tp-F.+l.s..-K+a.a.al+lpuLs.pt.cc.W...p..-L-....c.h........u........+......p...K.......K.S.......P.........IGapPFhphhh.p.t.s..p.t........t.p.At.pals+......lss...pp+hcha...hths.hhpAup.Ahct+s.t.Lt.lht.h....................................................................................... 0 95 149 215 +977 PF04841 Vps16_N Vps16, N-terminal region Kerrison ND anon Pfam-B_6003 (release 7.6) Family This protein forms part of the Class C vacuolar protein sorting (Vps) complex. Vps16 is essential for vacuolar protein sorting, which is essential for viability in plants, but not yeast [1]. The Class C Vps complex is required for SNARE-mediated membrane fusion at the lysosome-like yeast vacuole. It is thought to play essential roles in membrane docking and fusion at the Golgi-to-endosome and endosome-to-vacuole stages of transport [2]. The role of VPS16 in this complex is not known. 22.00 22.00 22.60 22.40 21.50 21.20 hmmbuild -o /dev/null HMM SEED 410 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.35 0.70 -5.94 5 393 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 278 0 293 403 5 326.60 26 46.86 CHANGED Pou-WphLcDs.YYRKptLuos-WsLch-l.chhlulAPaGGPIAVsRsP.phpss..hs.hPh..IpIashSGplLu+.lsWsp..sslVuMGWocsEELIlVsKsGpVhVYuhhG-h...paSlGcsl...pssclpEs+lFpoptGcoGVAlLssucclhllsusucslhh+phP...-lPsspophpstsssspI.....LssDcshpIhlssGssLt.Is-pu..ps.............lsS.p+alKloVS.s+p+LALYTsoGplhllSsDhuccLCEacls...t+usPKQMs..WCGN......DAVVlua.EshLhlVG.....csG-pVsFhYchT.sh..LssElDGVRIlTpoopEFLp+VPAsoENIF+IuSpsP...GAhLlEAtpchEc...+SsKA-EhLpplp-..pLccAVs-CIpAAscEFpPEhQKsLL+AASFGKuaLcpasPD..cal+ ...............................................................................................................................................................................tW..h......hap....t.hh.t......a..............t...............h.......p...hhl.ss.us.uGsl....Alh.ps...................................t...........lplas.s.G......l.tp...h.a..pt.............s..lh.thsW.st....p.-pLl.sl..psGh.h.h.h.a........s.h.....uph.................h...s.h.sp.th....p.tl..p..s..............................h.sG.hsh.....l.h..t.s....p......h..h.h..tsh....t...............t..h.................t...st........................................................h.........p...p...h......l...hh..s.....s.....plh.l.....ttt.......................................h........................hhphslS.stp.hl.Ahh......s.....t.s....G.....h...l.....h.h..h........s.s.shp..p.h.h...-hss.p.....................pt..P.p..ph...WCup.................c..ulh.l........ta...p............p.......lhllG...............................stsp...h..pa..h.s....t......h........................lhsEhDG.lRlh..opsph-hlpc...VPt...s...s...........tplFplu.S.h..sP...........uuhLh-uhcphpp.................pu.....+A.--...lp.l..pt.........pL.pAVppClpAAst.E.a...psp.hQ+p...LL+AAsaG+sh.hp..h..p............................................................................ 0 115 177 250 +978 PF03635 Vps35 Vacuolar protein sorting-associated protein 35 Finn RD anon Pfam-B_3569 (release 7.0) Family Vacuolar protein sorting-associated protein (Vps) 35 is one of around 50 proteins involved in protein trafficking. In particular, Vps35 assembles into a retromer complex with at least four other proteins Vps5, Vps17, Vps26 and Vps29. Vps35 contains a central region of weaker sequence similarity, thought to indicate the presence of at least three domains[1]. 21.20 21.20 21.30 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 762 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -13.43 0.70 -6.39 23 430 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 295 2 300 460 7 646.70 38 90.35 CHANGED +hLpEAlssV+pQuhhM++sL-p.scLhDALKauSsMLsELRTusLSPKpYYELYMtlFDpLphLssaLt-p+spt.++..........................LsDLYElVQaAGNIlPRLYLhITVGosYlcsp-uPsKEILKDhlEMCRGVQHPlRGLFLRaYLsQto+-hL.Ps.s....................ppsssGslpDol-FllsNFhEMNKLWVRlQH.....................................QG.s..............+-+-+Rp+ERcELplLVGoNLVRLSQL........-GlshchYpcsILPplLEQlVpCRDslAQpYLhElIlQVFPDEFHLtTL-.LLsus.spLpPsVsl+pIlhsLl-RLusYsspps-sp.p..t..........................................................................................................................................................................................................................slclFplFhsplspl........lc.s+schs..........................................................................................................................................lpshlsLhsSLlsLsLpsYP..-pl-hl-plhshshp.hlpph........tsphpsspsppplh...................pLLhh.................Pl.pp........ahshhslLplpsa.sLlshhshps.pKsluhsllssll.....................cssoh...................................Issh-pl-plhpllpsLI...................p-psDp..t.....................................................-s--ht-EQphlu+llHll....p.s-........Ds-pphclLtssRKthhpGG.pRl+aThPsLlhshl.........+Lsp+hptpp.............................................................................................papspspplF+alHp..slssLhshss....s-LsL+LaLpsAtsADphs.............lp-...luYEFasQAFslYEE......................sluDS+sQhpAlthlluoL.p+h+shs..cENY-sLhsKsshauSKLLKKsDQCRuVhhCSHLaWssch.........................................................h+-uKRVLECLQ+uL+lAcsshp.......sssslpLFVElLN+alYYa...-pssspVoscalssLI-LIpssh ................................................................................................................................................................................hLt-ultsl+.psh.M.....++s...L-p..spLh-ALKpuophlsEL...........R.....T.....s.LuPKpYYEL..................Y.....MtlhDt.Lp.hLphaLh......-ph.t.....pp......................................................................ls..DLYE.lVQauGNIlPRL.YLhlTVGssYhp......t..t....sh+-lhKDhhEMsRGVQHPlRGLFLR.YL.phs...+.shL.Ps.s........................................ptstGslpDuh.pFlL.NFhEMNKLWVR.h..Q.H.......................................................QG.u..............+-+-pRppERpELplLVGoNLVRLSQL....................t.lsl-hYpphlLstlLEQlV.pC+DslAQpYLh-slhQ....V....FPDEaHLpTLs..hLpus.spLps.....pVslK..............pllhsLh-RLutastppsts..................................................................................................................................................................................................................................................tplclFphF.pplspl...........lp...sp.phs.............................................................................................................................................pshlsL.suLhslsh...p.haP...........-cl.-a.l.DplLthshp.hhpph..............tt.....psstsp.pplh...........................tLL.h........................Pl...pp........a.slhshLpL..pa..lh.ph.s.hts.p+thuh.llpsll................cp.p.s.h...................................lss..-p.....l-tlh.pllpsLl...................p-t....pt.s.t........................................................-s--hh-EQshluRhlHh.l........p..sp.............-s-p..phtlLphs++thh....tG.s...pRlpaTh..Psllhssh.........pLsh.phpt.p.........................................................................................................................................tpphptpspplapahpp..slssLhpt...................s-LsL+L...aLpsu.sAs.p.hs.....................hEp...luYEFhsQAFslYE-........................pI.u.DS+sQhtAlphIhush...pp..........hpshs....pE.N..a-sLhspsshhuuKLLKKsDQsRuVh..hsuHLaWssp..........................................................................h+sucRVh-CLp+uL+lAstshc..................sshplpLalElLs+Ylaaa.....-pt.stt....l.ohphlstLIphIppp.h............................................................................................................................................................................................... 0 117 178 255 +980 PF04129 Vps52 Vps52 / Sac2 family Wood V, Finn RD anon Pfam-B_10164 (release 7.3); Family Vps52 complexes with Vps53 and Vps54 to form a multi- subunit complex involved in regulating membrane trafficking events [1]. 22.60 22.60 22.90 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 508 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.63 0.70 -5.98 5 461 2012-10-03 17:31:52 2003-04-07 12:59:11 7 15 279 0 310 506 9 381.30 24 70.45 CHANGED lEpcSIcDhIp...............................ESpNluoLHNQIsuCDuVLscMEshLsuFQs-LusISoDIcpLQEKSscMslpLcNRpuVcu+LuphV-DllVPP-LIcsIl-GcVsE......spasopLplLs+htusscDQu.....s+uopAsKDVc...slLDKLRsKAlsKIRcaIlpKIhsFRKPh.TNhQIlQs.sLLKaKaaapFLhcNsRclAhElRsAYI-TMsKlYhuYF+uYlppLsKLQa-cusop.DL.hGVE-sus.......uLFFS..............KssoL+p+sslFolGcRssIls.tpl-sPllVPHIApssp.+YshEsLFRShchALlDNuosEYhFlsEFFslsGsptc-l...FppIFu+TLohspKalpslIusCaDsIGlhLsIRllp+aQLhup+RsVP.sLDsYa-ulllhLWPRFchVhDhplpSLRcssloshst............h-o+PHYlTRRYAEFouSllsLulsassup...lspLLscLpp-V-sFlL+lAKpFsc+KcQllFLINNYDhlluVLpEtus-suKEscsFpEhLNusospFl ..............................................................................................h...t.h.tttl.t.t.spthl.t...p.hc.......phLttFpscLsslos-lppLQppS.ths.pLpNRpthpt.Ls.hlptl..lssthlp.Ihp..s.....lsc......................h...t....p..............p..ph.h...h...........tp..t.....................................pt...tu.....ht-lt...s.lppLp.+...........Aht+l+............cal....l........t....p...l.hthR..........ps..................hN.......hQ.....h.Qp...th.......l.+.h.+............hh.tFLhtpt.thstElpptYhpTh.ph.......h.ah....sha.p.pY..ttL....p....lphp...hspt...sl...s..tt...t...........................t..h.......................................................t..ttha....t.lspR.h.pllp.....p.......p.t..........s....h......h..........................s.......h........s..........pt..............s.......t.............h.....................h...EhhFRs.phhLhDssstEa...........Fh.pFF.........................tph.........................h.tlh..t.sht.hh.p..hp.........p..h.l..t..............t..s..h.D....shulhl..hlpl..p.h.t.........h.t.p.R.....p..l..s..sh-.....ta.hpth...lWP+hp.lhphphpS.lp...p..h.s......t.......................................t.ts.+....ls.ppauph.tuhl.l....s..............t..h....pt.........................h......h.pl......t.hpthh.th......s....t.........................p........p......t.t....haL.hsNY.hhhsh......l......p.............................................................................................................................................................. 1 127 191 270 +981 PF04100 Vps53_N Vps53-like, N-terminal Wood V, Finn RD anon Pfam-B_5601 (release 7.3); Family Vps53 complexes with Vps52 and Vps54 to form a multi- subunit complex involved in regulating membrane trafficking events [1]. 24.00 24.00 24.20 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.37 0.70 -5.68 7 406 2012-10-03 17:31:52 2003-04-07 12:59:11 7 9 287 0 282 410 14 328.10 31 47.32 CHANGED tschssl-hINplFPs........................-tSLssl-plh.......p+lpt-Ip+lDssltshVcspsNsGpcup.sLp-AppulppL.pcIp-l+s+AEpoEthVp-lTcDIKpLDhAK+NLTsolTsLp+LpMLssuh-pLpthhppppYuchhs.hpuhhplh.phFptYpsl.pItpLppplsphptphhpplhpsapphF....ustsppc.u..hphLsDuChVhssL-ssl+cpllchFhpppLp-YhplFt-spEhuhLDcl-+RYsWhK+hLpsa-pt.htslFPtcWchshRLshpFC+.TRppLtpIhhp..+ccc.slclLlhAlppThpFEphLs+RF.............................p...E.c.ht.......................FcshlSSCFEPHLslYIppp-ppLtchh-phsp-.ph ..............................................................t..casslpalNthFsp..........................po...L.s.s.lsphh...........pplp.php..pl-ppIt....ph.lpt..Qs...............ss..ppu.ptlppA.p.tsl..................t.pLht+Ipcl+p+AtpoEphl...pphTpDIK.pLDhAK+pLTtohT...sLp+LpMLs...............suh-pLpt..hsp.p.....+.p....Yt.-sup.L.................p..ulhplh.paF.............p...pY.p.s......ls.p..ItpLppplpthpppLhpplhtD.Fc...h.a........................st.t......t..p..........p.Lt.-uCh.VhssLss..p..h+..pcllphahp.pp.LppYt.lF.p..................ts.p......-......su.L.....DplsRRYsWh++hLhsa-pp...h.....sthFP.pWphsp.tlshtFCchT+pcl.......t....tlhtp................ptp....ph..-...VplLL.slpcThpFEp.Lsc+F.................................................................................t........................................................................................Fp.thlSpsF-PaL.tlalctp-+pLtphl.phht....t................................................................. 0 109 172 243 +982 PF02204 VPS9 Vacuolar sorting protein 9 (VPS9) domain SMART, Eberhardt R anon Alignment kindly provided by SMART Family This domain acts as a GDP-GTP exchange factor (GEF). It activates Rab GTPases by stimulating the release of GDP and allowing GTP to bind [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.18 0.72 -4.00 49 1496 2009-09-12 00:55:57 2003-04-07 12:59:11 13 121 301 10 1005 1449 9 105.40 25 12.44 CHANGED hpputpcLppl.sph..poPp-Klphllpspchlhpsl................................tttstshuAD-hLPlLlalll+ups..pL..huslpalpp.........F........................h.psshhpu........EtuYhLTshpuAlpalcshsh ............................................h..tAhpcltpl...sph....psPpc.Klthlh.....pss...chIhpsh................................................................................................t..t.ppsp.shuAD-hlP.......lLl.Ylll.......+u............s...............pL..........huplpalpc.........F................................................................................h...tssh.hp.G...........E.tuY..hlTsl......p.uAlpalpph..t..................................................... 0 361 525 776 +983 PF03302 VSP Giardia variant-specific surface protein Mifsud W anon Pfam-B_4536 (release 6.5) Family \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.42 0.70 -13.07 0.70 -5.83 6 1081 2009-01-15 18:05:59 2003-04-07 12:59:11 8 16 13 0 430 1062 0 178.50 20 59.27 CHANGED CstCpsGYclSsDKTpCsuou..sCps-NCKsCSs-c+..psCp-CsSssYLTP.TpQCIDsCtKIGNYYssTsupsKplCKECssANCKTC-spGpCpsCsDGFYKsG-sCuPCcpsCKTCuuGTuSDCTcC.oGKsL+YGsDGTKGTCGtGCsTGsGuG.ACKTCGLTIDGsSYCSECAspTEYPQNGVCoSssuRAssTCpsuslAsGhCuoCssGah+MNGGCYETTKaPGKSVCpsAsuuG.TCQKtAsGY+LssssLssCS.GCKpCoSsTsCTsCh-GYVKo..osuCsKCDuoCcTCT.GusTsCcsCuTGYYKouospsuCTus-SD.pslTGVpsClsCAPPssspGSVLCYLIKDu...G.STNKSGLSTGAIAGISVAVllVVGGLVGFLCWWFICRGKA ...........................................................................................................s...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 430 430 430 +984 PF00092 VWA vwa; von Willebrand factor type A domain Sonnhammer ELL, Bateman A anon Prodom Domain \N 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.17 0.71 -4.55 189 10632 2012-10-10 16:07:06 2003-04-07 12:59:11 23 753 2209 191 4154 16265 2130 166.70 19 31.03 CHANGED DllhllDuSsSlst.....tp.....app.s+palppllp.th.t.......hstpssplullpauspsp............h.hslssh..pstpphhptl...........tphhhhsusTp.hup.ALphshpphhpt..........ttssRt...sss+lllllTD.....Gpssss...tthstthppt................slhshulGh.......................psstppLpplu..s.tt..pphhhhpshpt................phhppl ............................................................................DlsFllDs...S..sS...lst.................................tp..............................a.pt.....h+...p...a.l..t.....p.llp..ph..p...........................hu..t...pp.s.+.......l.....u....l.l..p..a...u...s..p.sp....................................................s.h..p...ls..s.h.........p.s.t.p.....p.lhptl.................................................pph.h.h.h.u..u..s...T...t....s..u..t.......A..L...p...h...s..h...p.p.h.hpp.........................................tsR..........ps.s....p....l.......l.l.l.l...T.D.........................G...p......s.....s.......s...p.................h........t............h..t....thppt................................................sl.h.h.hulGh..................................................pss...p.p.L.ptls.........................h......................h........................................................................................................................................................................... 0 1087 1482 2603 +985 PF00094 VWD vwd; von Willebrand factor type D domain Bateman A, Sonnhammer ELL anon Dotter Family Swiss:P17554 contains a vwd domain. Its function is unrelated but the similarity is very strong by several methods. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.27 0.71 -4.06 62 4125 2009-12-18 15:39:34 2003-04-07 12:59:11 20 537 352 0 2288 3757 7 145.50 21 18.65 CHANGED Csl.Gs..chpTFDGtpYsaps....ss.....sah..lspssssp..........hphhlt.ppsstsssth...h.pplplhht.............shplphtts.......htlhlsspplsh....shttssh..tlphhs........tshhhlth....thshpl..hphcs...tlhlplstthpspssGLCGsassctpsDhhpssGp ................................................................................................................................CtshGss.HahTFDG..t....has.F.G............sC....................pYh.......Lsps.s.ts.p................................phtlp..h...p.p...p..s.t....s..p.pt.h..............h.hpp.lpltlt............................................shplpltps................pl.h.l.ss.p.t.hth...............Pht.tsh.....tlphht................................t.t...hhlth........thshtl....h....p.h...s.......s..............pl......h.l.p..l...s.s............p....a....t......s....p.....s..s...GLC..G.sa.ss..p..p.sDhhh.ss................................................... 0 497 686 1444 +986 PF00095 WAP wap; WAP-type (Whey Acidic Protein) 'four-disulfide core' Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.55 0.72 -3.99 72 1589 2012-10-02 12:37:03 2003-04-07 12:59:11 16 122 164 5 826 1565 1 44.60 36 19.27 CHANGED KsGpC.P............tttt.Ch......ppC.psDpcCss.spKCC.........s.u.C.Gpp.ChtP ..............KsGtC.P.........................tttstCh.............ppC.....p......sDpcC..ss.....tp.....K...CC..........s..u.C..Gpp..ChtP........... 0 219 260 462 +987 PF00400 WD40 G-beta; WD domain, G-beta repeat Finn RD anon Pfam-B_2 (release 1.0) Repeat \N 21.00 12.10 21.00 12.10 20.90 12.00 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.75 0.73 -7.90 0.73 -3.89 1804 193252 2012-10-05 17:30:42 2003-04-07 12:59:11 27 2206 1701 1549 125805 189768 5414 38.20 23 20.36 CHANGED tphhtsh.puHpsslpslsasss........................sphlsouu..tDssl+lWc ............................................................................h......h.puH.p...s...s..V..p....s.l.s.asss.............................................................................sphlso..uu..............tDtsl+lWs........................................................... 0 44330 69386 100531 +988 PF00568 WH1 WH1 domain SMART anon Alignment kindly provided by SMART Domain WASp Homology domain 1 (WH1) domain. WASP is the protein that is defective in Wiskott-Aldrich syndrome (WAS). The majority of point mutations occur within the amino- terminal WH1 domain. The metabotropic glutamate receptors mGluR1alpha and mGluR5 bind a protein called homer, which is a WH1 domain homologue [2]. A subset of WH1 domains has been termed a "EVH1" domain [3] and appear to bind a polyproline motif. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.44 0.72 -4.34 8 1177 2012-10-04 00:02:25 2003-04-07 12:59:11 18 30 236 25 600 1088 23 104.80 28 24.22 CHANGED huh......psIsouhAplahh-sss+c.Whhs....ppsusVshh+DsspNoYhlhuhclp.cscllhspplhsshpYspsoshFHpapsscs..hhGLNFuSE-EAspFtcthpcsl ...................................................tt...........slhp.s.hApVh....hh......-.ss.s....pc.....Whsh.......pthusVs..h.....h.....p......c......s......s.....p...s.........s...........a....hlhuhc.h...........p...sp...p............lllsp......p..l.h.......p..s.........h..p...Y...s....p.....s.....s........sp.....F....H..p..W...........p.s.....s.c.p...........haG...L..sFu..Scp-Ap.....p...Ftcthpc..h............................................... 0 159 234 397 +989 PF02205 WH2 WH2 motif SMART anon Alignment kindly provided by SMART Family The WH2 motif (for Wiskott Aldrich syndrome homology region 2) has been shown in WASP Swiss:P42768 and Scar1 (mammalian homologue) to be the region that interacts with actin. 23.10 13.00 23.10 13.00 23.00 12.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.37 0.72 -4.21 57 1560 2009-01-15 18:05:59 2003-04-07 12:59:11 15 69 271 11 837 1462 8 26.60 38 5.24 CHANGED ssssRuALLusIppG..t..LKKs.posD+S .........sssRsuLLssI+pG....hpLKKs.pptp.......... 0 213 324 544 +990 PF02467 Whib Transcription factor WhiB Mian N, Bateman A anon Pfam-B_2249 (release 5.4) Family WhiB is a putative transcription factor in Actinobacteria, required for differentiation and sporulation. 22.80 22.80 22.80 23.00 22.40 22.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.62 0.72 -3.67 8 2386 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 569 0 664 1645 791 66.10 42 61.73 CHANGED WphpAlCRssDP-hFF.P..-RG..pusR-..AKclChR.CPVhpE..CtAaALcscctaGVWGGlSE.....cERRtL .....................................WptpAhC...t.....p..t.....D....P.-....hFF...P.......ppG.........t.s..pcc.........AK...tlCtp.CP..Vppp....CLpa.....A......L........p......s....s........p......c......a........GVW...G...GhoE.......cERRtl........................ 0 218 497 614 +991 PF02019 WIF WIF domain Bateman A anon [1] Family The WIF domain is found in the RYK tyrosine kinase receptors Swiss:P34925 and WIF the Wnt-inhibitory- factor. The domain is extracellular and contains two conserved cysteines that may form a disulphide bridge. This domain is Wnt binding in WIF, and it has been suggested that RYK may also bind to Wnt [1]. The WIF domain is a member of the immunoglobulin superfamily, and it comprises nine beta-strands and two alpha-helices, with two of the beta-strands (6 and 9) interrupted by four and six residues of irregular secondary structure, respectively. Considering that the activity of Wnts depends on the presence of a palmitoylated cysteine residue in their amino-terminal polypeptide segment, Wnt proteins are lipid-modified and can act as stem cell growth factors, it is likely that the WIF domain recognises and binds to Wnts that have been activated by palmitoylation and that the recognition of palmitoylated Wnts by WIF-1 is effected by its WIF domain rather than by its EGF domains. A strong binding affinity for palmitoylated cysteine residues would further explain the remarkably high affinity of human WIF-1 not only for mammalian Wnts, but also for Wnts from Xenopus and Drosophila [2]. 25.00 25.00 26.30 25.90 23.50 18.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.79 0.71 -4.71 10 223 2009-01-15 18:05:59 2003-04-07 12:59:11 13 21 84 5 144 241 0 122.70 35 26.76 CHANGED laIsccEV++Ll......GlsA-lYYl+sGtlNsYuhc.....F......hlsVPS-VsslsFTWpS.uppcYhYuFsl.hosDpplLstPslsIsppGcVPpshpsFulsL.CoGspuGpsshslsLplpsscsh..ssTsLph+p+KhC ...................als..ph.hhh......G..l.p.t-..lahVcpGhlspashs......F........hhslPuplpplpFT.Wp.uh.u....ph.phhYshps................s.c.............tshhstPplNIshpGpVPp....p....hp.sapV....tLsCo.Gphsupsshplpl.lpsspt....ssThLph+ppKhC.................. 0 30 41 95 +992 PF03106 WRKY WRKY DNA -binding domain Bateman A anon Pfam-B_85 (release 6.5) Domain \N 21.00 21.00 21.00 21.10 20.60 20.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.07 0.72 -4.13 34 3051 2012-10-02 23:28:20 2003-04-07 12:59:11 10 33 220 3 1178 2878 5 54.90 54 20.61 CHANGED hcDGYpWRKYGQK.VKGu.aPRSYY+CTps...sCssKKpVERustDsphs.hsYcGpHNHsh ...........sDGYp.WRKYG.QK.sl.KGsP......h.P....R....u..........Y...Y+...Cop......sCss+...K....pVp.......R...s.t.....p..........D......s....p......h......h.........hsTYcG.pHsH.............................. 0 150 687 953 +993 PF02206 WSN Domain of unknown function SMART anon Alignment kindly provided by SMART Family \N 19.50 19.50 19.50 20.10 19.40 19.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.99 0.72 -4.23 36 345 2009-01-15 18:05:59 2003-04-07 12:59:11 13 36 5 0 343 301 1 68.00 24 6.39 CHANGED ssLpphhcphphluRlsNuIsLQtulhssoIshc-lluELLphsss.shsplhslcs...spltptlpplpch ...........hpphhcchphlARlsNuIsLQsulhs.sol.shcclIuELLslssh..phsplhshc........pplpphlppl...h................................ 2 57 110 343 +994 PF00397 WW WW_rsp5_WWP; WW domain Finn RD anon Prosite Domain The WW domain is a protein module with two highly conserved tryptophans that binds proline-rich peptide motifs in vitro. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.62 0.72 -4.06 547 8927 2009-09-12 21:03:02 2003-04-07 12:59:11 21 403 357 119 5200 8316 391 30.00 36 6.13 CHANGED hsss.W.p.thh...ss.....GchY.YaNppTppopW-cP ..........LPsG.W.c.pphs..sp....GchY.YhscpT+.pTpWccP..... 0 1588 2359 3759 +995 PF02825 WWE WWE domain Aravind L anon Aravind L Family The WWE domain is named after three of its conserved residues and is predicted to mediate specific protein- protein interactions in ubiquitin and ADP ribose conjugation systems [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.29 0.72 -4.55 14 1021 2009-01-15 18:05:59 2003-04-07 12:59:11 15 76 133 6 601 935 23 72.80 24 8.55 CHANGED hststtshhW.ap....sssspWpsYs.ps..pppIEsuaptpcp.....hlp.l..shhGtsYhlshpsMpQhpptss.....thRtV+R .........................s.......hhWpap..........scpup...WpsY......s.........tp.s...........spp.........lE.p.uap.pspp...............sls...h............ss..t....t...h...t.Yhlc.....hps..M..p...Qhsppos............ptRtl+R...................... 0 158 223 410 +996 PF02706 Wzz wzz; Chain length determinant protein Bateman A, Mian N anon Pfam-B_1977 (release 5.5) Family This family includes proteins involved in lipopolysaccharide (lps) biosynthesis. This family comprises the whole length of chain length determinant protein (or wzz protein) that confers a modal distribution of chain length on the O-antigen component of lps [1]. This region is also found as part of bacterial tyrosine kinases such as Swiss:P38134. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.63 0.71 -4.39 38 7377 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 3167 41 1494 5501 1022 184.60 16 39.45 CHANGED scIDLhcLlttLh+p+hlIlhlsllhshhuhhYs..hhssPpYousshlhVspppt..........h...........sl.ssh......p.hs.shhcllpSppllpclhpclplt........................................................................................................................t.lpp.lsl..s.tp.potllslshp.spssppspplssplsps .........................................................pl-lhpl....ht.h..l.......hc.......t....ph.h.l.l.hh.s.h.l.h...s..h...l..u.h....h.h...s....h..........h.......h..........s.....s.p...Y..pus..u..h....lh...l.tppstt...................t............t...........s.h....s.st...............................p..h.hs....s..h..h...p..ll.p....S....p.........pll....p....cs..h.p.p..h.slth.............................................................................................................................................................................................................................................................................................................................................................................pt.l..p.p...t..l.pl.....p......................t....po...t...l..l....s..l..s.hp....sps..p..s.t.plhsth...h............................................................................................................................................................................................................................................................................................................................................................................. 0 472 948 1240 +997 PF03254 XG_FTase Xyloglucan fucosyltransferase Bateman A anon Pfam-B_3419 (release 6.5) Family Plant cell walls are crucial for development, signal transduction, and disease resistance in plants. Cell walls are made of cellulose, hemicelluloses, and pectins. Xyloglucan (XG), the principal load-bearing hemicellulose of dicotyledonous plants, has a terminal fucosyl residue. This fucosyltransferase adds this residue [1]. 20.40 20.40 20.70 20.50 20.20 19.50 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.64 0.70 -6.00 29 257 2010-01-08 15:56:59 2003-04-07 12:59:11 8 7 32 0 171 248 4 376.80 37 79.45 CHANGED lllu..............ptthsshhspttsssss.hspssptspD....+LLGGLLusGFDEpSClSRYpSu.LYRK.sSsa+PSsYLlSKLRpYEsLHKRCGPGTcuYp+AlcpL...puu....pssuss.....-.CpYlVWlshpG..LGNRhLolASsFLYALLTcRVLLVD...up-hu-LFCEPFPsTSWLLPh.DFPlps...hpuasppsscsYGsML+scsIsss...............t...PsalYlHLsHshpc..tDKh.FFC-c-QshLc+VPWLll+oDsYFlPuLFLlPuFppELs+LFPpK-oVFHHLuRYLFHPoNpVWGllTRaYpuYLA+ADERlGIQlRVFsppsssappVhDQIluCTpcE+LLPcl..sspps..........ssssupss+.KAVLlTSLh.stYaEpl+sMYWc+PTsTGElluVaQPSHEchQppspphHstKAhAEhYLLSLoDslVTSuhSTFGYVApGLGGL+PWlLacPps..tpsP..sPPChRuhSMEPCFHsPPhYsC+u ...........................................hh.s....................................-....tLhsGLL..s.s..s..h.s.ct..oChSRapt..h.a...h+.................h.....sSsaLlp+LRpaEthp+.+CGPtTt.YppshppL.pps..........t.t..sss......t..C........pYlVh.h......s...h..p.G..LGNRhLshsSsFLYAlLTsRVLLV.....c........st.c.h.ssLFCEPFPs......o.oWhL..P.................c..F..Phtt...........httht...tt.csa.sshlpp.phht.ss.................PsahalpLtts...t............-.p.h.F...aC-csQ..th..L.p..plsW.ll.hp.oD..YasPuLF........hhPtappE.Lp.phFP.p..+-.sVFHaLuRYLhHPsNplWt.hlp+.a....apuY.Lut.u.s.cp..lGlQ.lRh...a..t......t.....s.......h..p.h....hc.Qlhs.Cs.pc.p.lLP..pl....tttt....................sst.tstp.puVLlsSL..s.Yh-pl+shYhp.............pssh..sG.....-..h.ltVaQ....PSH.....EthQ.p.tpp.Hs..KAhAEhaLLShsDhl.lTSu.hSTFGYVA.pGLuGlpPW.lhh.p..t........ttss.....p.ssChpshShEPCha.PP..htCp.................................................... 0 33 90 137 +998 PF05181 XPA_C XPA; XPA protein C-terminus Studholme, DJ anon PF02186; Domain \N 25.00 25.00 25.20 25.40 24.20 24.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.47 0.72 -4.37 16 261 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 237 2 182 251 2 51.70 54 16.42 CHANGED -KYSLLTKTEsKpDYLLTDsELcDcE.LLs+LtKsNPHpusaucMpLaLRhQV ......-KauLlTKTEAKp-YLLpD.......s-LcccE..lL.altK.NPHpupWucMpLaL+hQV..... 0 58 94 146 +999 PF01286 XPA_N XPA; XPA protein N-terminal Finn RD, Bateman A, Studholme, DJ anon Prosite Domain \N 27.10 27.10 27.10 27.40 27.00 27.00 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.95 0.72 -4.27 10 119 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 95 2 72 115 0 33.60 52 11.78 CHANGED .-Y.hC--CsK.FMDSYLhspFDhsVCDsCRDs- ....sashCcECu+pFhDSYLhspFDlslCDsC.RDs-.... 0 23 31 52 +1000 PF00102 Y_phosphatase Protein-tyrosine phosphatase Sonnhammer ELL, Griffiths-Jones SR anon Swissprot_feature_table Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.81 0.70 -5.02 114 7626 2012-10-02 20:12:17 2003-04-07 12:59:11 22 291 589 328 3916 9772 240 210.40 29 34.46 CHANGED NhpKNRatslhshDp..oRVtL.................................t..s.sD..Y..INAsal....................uht.t...tppaIsTQuP.......l..................pTh...pDFW+Mlapp..p.spsIVMLsphhEt.........sp......KCspYWP.........pp..shpaGs.hpVphh.......ppp.tptshhl+phplp.....p....tspp...............................ppl.ppapap....sWP..Dps.......lP..ps.....spshl....phlcplcptp...................tsPllVHCS.....AGlGRTGsalslchhhpplcp.......pth...lclhp........hlpplR.ppRsthVQo......pQYhFlapslhc .................................................................................................................................N..KN.Rh......s.l...h.........s..h......-..p..sRVhL.............................................................................................................t.....s-.......Y.....I..N...Asal..........................................................pshp..p.........ptaI..so...Q......u..P...............l...................................................pTh.......tDF..WpMlacp....................p......s..t......h................I.VMlsph..Eh......................................sp...............KC.tpYWP.................................pt..........shp.a.....s....t......h.p.Vphh...................ppp....hsp.a..hhR..p.h...pl.p.....p.......tppt........................................................................................+pl....ppa.pah........s.W.....P....-pu................................................lP......pp...............spshl............phl.p.p.lpptp.............................................................................ttsP..l...l.VHC..S............................AG.sG.R........T.G.....s...a...h...s......l.s..h.h.h..p..t..hcp...........pth.........................lslh.p.......................................................hl..p.p..hR...p.p...R.s..t.h.lp..o.........................pQ..Y.Fhapsl....................................................................................................... 1 1277 1669 2915 +1001 PF04893 Yip1 DUF649; Yip1 domain Finn RD, Bateman A anon Pfam-B_5598 (release 7.6) Domain The Yip1 integral membrane domain contains four transmembrane alpha helices. The domain is characterised by the motifs DLYGP and GY. The Yip1 protein is a golgi protein involved in vesicular transport that interacts with GTPases [1]. 30.90 30.90 30.90 30.90 30.80 30.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.47 0.71 -4.76 208 1661 2012-10-01 22:34:14 2003-04-07 12:59:11 12 30 827 0 929 2057 257 174.80 14 66.95 CHANGED lhhpPsph.appltppt..........................hhhhhhlhshlshlhshhhs.........h...h.........................................hthhshhts......hhshhlhh.......hlshhlhuhlhth.......hsphhs.......upss....htp....shslhuas...hhPhhlstlhshhhhhhh.........................................hh.hhlhshWsh..hlhhhulttshphsth..puhhhshh.......shllhlh .......................................................................hp.tth.hpchht.hh.........................................................tp.cLh.uslh..hs.l..shshhlhhs...................................................................................th.h.shhuh................lhhshh...........hlshhlhshlhhh.....................h......hs...................ushs.....ahp..............shs.l.h.GYs....hlP.h.s.lssll.sh.hh..hhh.h........................................hhh..shlhhhWsh....hhhshhh.tthh..t..h........t.hh....................hhhhh........................................ 0 325 549 754 +1002 PF03226 Yippee-Mis18 Yippee; Yippee zinc-binding/DNA-binding /Mis18, centromere assembly Mifsud W anon Pfam-B_2930 (release 6.5) Family This family includes both Yippee-type proteins and Mis18 kinetochore proteins.\ Yippee are putative zinc-binding/DNA-binding proteins. Mis18 are proteins involved in the priming of centromeres for recruiting CENP-A. Mis18-alpha and beta form part of a small complex with Mis18-binding protein. Mis18-alpha is found to interact with DNA de-methylases through a Leu-rich region located at its carboxyl terminus [5]. 24.80 24.80 24.80 24.80 24.50 24.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.82 0.72 -3.94 54 862 2012-10-03 11:08:31 2003-04-07 12:59:11 9 10 301 0 600 712 1 97.00 39 59.44 CHANGED hVFpCtpCpshLuDShshls.......pspp........psllh......................p...plsssVhlscphhsup.p.t......sshpslhCptCstslGhhYhsospp.hshhcshFslphcplpsYp .........................................................p.hasCtpC+scLusps-lIS+.....................shpGtps.......+AYLF...................................................................sp......llNls.pu....s...p.-RhhhTGh.............HsVsDIhCpsCpstLGWKY-.....pAhEpoQKYKEGKaIlEhtth....h............................ 0 172 307 445 +1003 PF02757 YLP YLP motif Ponting CP anon Ponting CP Motif The YLP motif is found in several drosophila proteins. Its function is unknown, however the presence of completely conserved tyrosine residues and its presence in Swiss:Q15303 may suggest it could be a substrate for tyrosine kinases. 25.00 5.00 25.00 5.10 24.60 4.90 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.31 0.76 -5.60 0.76 -3.38 26 674 2009-01-15 18:05:59 2003-04-07 12:59:11 12 40 12 0 397 674 0 8.90 65 15.78 CHANGED usEYLPPsp ..usEYLPPsp.. 0 60 60 245 +1004 PF04146 YTH YT521-B; YT521-B-like domain Bateman A anon Pfam-B_1386 (release 7.3) Domain A protein of the YTH family has been shown to selectively remove transcripts of meiosis-specific genes expressed in mitotic cells [3]. It has been speculated that in higher eukaryotic YTH-family members may be involved in similar mechanisms to suppress gene regulation during gametogenesis or general silencing. The rat protein Swiss:Q9QY02 YT521-B is a tyrosine-phosphorylated nuclear protein, that interacts with the nuclear transcriptosomal component scaffold attachment factor B, and the 68-kDa Src substrate associated during mitosis, Sam68. In vivo splicing assays demonstrated that YT521-B modulates alternative splice site selection in a concentration-dependent manner [1]. The YTH domain has been identified as part of the PUA superfamily [4]. 21.40 21.40 21.40 23.30 20.90 21.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.53 0.71 -4.58 59 968 2012-10-02 17:37:24 2003-04-07 12:59:11 10 23 252 2 553 870 5 142.90 42 23.91 CHANGED su+aFlIKSho.t-slctSlchslWuoospss........c+LspAacpsp..........sVaLh.FSVNtSGpFtGhAcMhos.lshs...................................................................................................pss.hW..................................................................tp....casGs.FplcWlhlc-lP.pph++l.......hss...-s+sVphuRDspElp.phGhpllplFcph ........................................................................................s.sRaFlIKSho..c-cl+cSl+as.lWsST..psN........++LssAa.+psp...................sVaLl.......FSVNsSGcFsGhAcMtos.lcas.....................................................................................................................................................................................................................................ps.sshW....................................................................................................................................................................................................tt.......t+WsGh.FcVcWl..hl.+DlPssph+Hl.......p.p..N-N.....KPVstSRDsQEl..hc.pGtpllplht..h......................................................................................... 1 150 268 403 +1005 PF00643 zf-B_box B-box zinc finger Bateman A anon Prosite Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.88 0.72 -4.06 106 7772 2009-09-12 06:38:22 2003-04-07 12:59:11 19 359 385 16 4488 6434 9 43.10 26 9.16 CHANGED pptthC..spHp.....p.p...hphaCpsCpp.hlCppChhtt.......H..ps......Hph...hsl ........................t....hC...spHp................c..p.....hphaCp..s..s..pp..hl.....C.h.h..Cphst.............H..ps...................Hphh............................... 0 1713 2306 3263 +1006 PF02892 zf-BED BED zinc finger Bateman A anon [1] Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.41 0.72 -4.20 41 1403 2012-10-03 11:22:52 2003-04-07 12:59:11 10 85 169 2 872 1516 7 47.30 26 9.97 CHANGED utsWcaFphhp.......tcppspCpaCtKthstt.........GTosLpcHL........hptpp .......................s.lWpaFphht.............tpppspC.......ph..C..p...ptlstt..........uTo.s.LppHL..........ptp....................... 1 185 394 653 +1007 PF01530 zf-C2HC Zinc finger, C2HC type Bateman A anon Swiss-Prot Family This is a DNA binding zinc finger domain. 25.00 25.00 26.10 27.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -7.84 0.72 -4.22 33 1621 2009-01-15 18:05:59 2003-04-07 12:59:11 13 44 89 6 769 1357 1 30.90 62 11.87 CHANGED thpCPTPGCcGpGHlsG.pas....sHRSluGCPhu ............h+CPT.P.GCDGpGHloG.pas....oHR...SL..SGCPhA.. 0 116 177 407 +1008 PF00097 zf-C3HC4 Zinc finger, C3HC4 type (RING finger) Sonnhammer ELL, Vella Briffa B anon Swissprot_feature_table Domain The C3HC4 type zinc-finger (RING finger) is a cysteine-rich domain of 40 to 60 residues that coordinates two zinc ions, and has the consensus sequence: C-X2-C-X(9-39)-C-X(1-3)-H-X(2-3)-C-X2-C-X(4-48)-C-X2-C where X is any amino acid [1]. Many proteins containing a RING finger play a key role in the ubiquitination pathway [2]. 21.00 20.70 21.00 20.70 20.90 20.60 hmmbuild --amino -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.37 0.72 -4.30 35 9094 2012-10-03 15:03:13 2003-04-07 12:59:11 20 329 3245 29 3178 27732 1125 40.40 44 7.09 CHANGED CslChchhppss................hhtCtHs.FCpsClpphhp........tptht..CPhC ...............................................CpIC...p...c.l..l...s..-Pl.......................................................pos.C......p........Hh....FC.+....s.....CIh.ctlc.............................hhssh.....C.PtC................................... 0 1053 1488 2303 +1009 PF00642 zf-CCCH Zinc finger C-x8-C-x5-C-x3-H type (and similar) Bateman A anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.40 0.72 -4.21 88 7686 2012-10-01 21:35:20 2003-04-07 12:59:11 19 244 456 18 4636 7265 193 26.00 35 7.49 CHANGED hcpthCphatcsG..tCpaGspCpFtHsh ..........p.hCpha..h.c.G.....tC.paG.s.p.Cp..FtHs...... 0 1486 2495 3614 +1010 PF00098 zf-CCHC Zinc knuckle Bateman A, Eddy SR anon Overington and HMM_iterative_training Domain The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. The motifs are mostly from retroviral gag proteins (nucleocapsid). Prototype structure is from HIV. Also contains members involved in eukaryotic gene regulation, such as C. elegans GLH-1. Structure is an 18-residue zinc finger. 20.80 16.70 20.80 16.70 20.70 16.60 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.41 0.73 -6.48 0.73 -3.78 184 50986 2012-10-03 11:39:54 2003-04-07 12:59:11 18 692 722 56 6654 45194 253 17.90 58 7.29 CHANGED tpCapCGp.GHhu+-Csp ....htCapCG.KEGHht+sCpt.... 0 2202 3537 5115 +1011 PF02008 zf-CXXC CXXC zinc finger domain Iyer LM, Aravind L, Bateman A anon Bateman A Domain This domain contains eight conserved cysteine residues that bind to two zinc ions. The CXXC domain is found in a variety of chromatin-associated proteins. This domain binds to nonmethyl-CpG dinucleotides. The domain is characterised by two repeats [3], and shows a peculiar internal duplication in which the second unit is inserted into the first one [4]. Each of these units is characterised by four conserved cysteines, displaying a CXXCXXCX(n)C motif that chelate a Zn+2 ion. The DNA binding interface has been identified by NMR [3]. In eukaryotes, the CXXC domain is found in stramenopiles, plants and metazoans. Plants possess a mono-CXXC domain that is present in distinct chromatin proteins [4]. Structural comparisons show that the mono-CXXC is homologous to the structural-zinc binding domain of medium chain dehydrogenases [4]. 21.10 21.10 21.80 21.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.87 0.72 -4.15 31 1021 2012-10-08 21:21:50 2003-04-07 12:59:11 15 77 106 16 505 970 22 46.30 45 4.85 CHANGED pshp+pp+.CGhCcuCpps-.CGpCssCpD...hhKaGGsspp+Q.pChhR+C .........t.t++ppR.CGpCp.uCpps-sCGpCshCpD...hh.KF.G..Gssth.+Q.pCh.hRpC......... 0 110 148 279 +1012 PF01529 zf-DHHC DHHC palmitoyltransferase Bateman A anon Pfam-B_945 (release 4.0) Family This family includes the well known DHHC zinc binding domain as well as three of the four conserved transmembrane regions found in this family of palmitoyltransferase enzymes. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.00 0.71 -11.65 0.71 -4.87 97 4432 2012-10-03 10:42:43 2003-04-07 12:59:11 15 126 347 0 2951 4268 82 169.10 23 40.72 CHANGED hhsahhshhssPGhhspt.................................................................................................................................................................................................................................hhtss..tttptpaC.p...........Cphh.+PsRs+HCphCspClhchDHHCsWlssClGtpNa.+a.F....h.......hF...........lhhhslhhhhhhh.....hthhhhhphhtptphh.hhhh...........................................................................................................lhhhhhs........hhhhlhhshlhhh.........phh.hhh.ps...hT....ohEhhptt ...................................................................................................................................................................................................................................................................................................................................................................h...........Pu.......................................................................................................................................................................................................................................................................................................................................................tt..............thp...a.Cpp...............C..p........h.......h..+P.........s....R.u.+HC...p.....h..C..s......pCV..h.+h.D...H..H.....C.s..W.....l.s.NCl...G.............tp....Na..+a..F..h..........hF..................................lhh...h.s..l.h....s.h....h..h....h.h........h.s.h....h......h....h...h....t..h....h....t.....t....t.t...........t..h....h..h....................................................................................................................................................................................................................................................hhhhh.hs................hhh.hh.h...l.s...h....l..hhh...............phh.hlh..ps............T....ohE.h...t....................................................................................................................................................................................... 1 1087 1605 2335 +1013 PF04438 zf-HIT HIT zinc finger Bateman A anon Bateman A Domain This presumed zinc finger contains up to 6 cysteine residues that could coordinate zinc. The domain is named after the HIT protein Swiss:P46973. This domain is also found in the Thyroid receptor interacting protein 3 (TRIP-3) Swiss:Q15649 that specifically interact with the ligand binding domain of the thyroid receptor. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.11 0.72 -4.30 100 1073 2012-10-03 05:12:49 2003-04-07 12:59:11 11 21 308 3 734 1067 14 30.60 38 9.10 CHANGED tpphCsl..Ctp.....u+YpCPpCsht........hCSLpCh+ ..................hCsl...Csp.....u+Y.pCs+C.sht........hCSLpCh+..... 0 221 385 573 +1014 PF02891 zf-MIZ MIZ/SP-RING zinc finger Bateman A, Mistry J, Wood V anon Psi-blast Q9UN16 Domain This domain has SUMO (small ubiquitin-like modifier) ligase activity and is involved in DNA repair and chromosome organisation [2][3]. 22.20 22.20 22.30 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.07 0.72 -4.45 36 975 2012-10-03 15:03:13 2003-04-07 12:59:11 15 16 294 1 586 1031 4 50.20 45 7.21 CHANGED lSLpCPlohpRlpl..PsRuptCpHlQCFDhtuaLphspptss....WpCPlCsps ............lSLhCP..ls...t.h.Rl.pl..PsRutsC.pHl.Q..C.FDh.phaLphNp..cp.so...........................WpC..PlCsK.t................ 0 192 294 443 +1015 PF01753 zf-MYND MYND finger Bateman A anon Bateman A Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -9.03 0.72 -4.08 177 3934 2012-10-03 05:12:49 2003-04-07 12:59:11 13 252 370 26 2780 4039 204 41.10 35 7.26 CHANGED C..shCtp.....................tshp..pC.spC...........p...ts....hYCSppCQptcW.t......t....H+p.C ...................................................Ct.Ctp...................................tshp.......pC..stC..............p......ts........hYC...u...ppCQ.p.tcW..t............t.......H.+p.C................... 0 1218 1661 2317 +1016 PF05020 zf-NPL4 NPL4 family, putative zinc binding region Wood V anon Pfam-B_13681 (release 7.6) Family The HRD4 gene was identical to NPL4, a gene previously implicated in nuclear transport. Using a diverse set of substrates and direct ubiquitination assays, analysis revealed that HRD4/NPL4 is required for a poorly characterised step in ER-associated degradation after ubiquitination of target proteins but before their recognition by the 26S proteasome [1]. This region of the protein contains possibly two zinc binding motifs (Bateman A pers. obs.). Npl4p physically associates with Cdc48p via Ufd1p to form a Cdc48p-Ufd1p-Npl4p complex. The Cdc48-Ufd1-Npl4 complex functions in the recognition of several polyubiquitin-tagged proteins and facilitates their presentation to the 26S proteasome for processive degradation or even more specific processing. 20.40 20.40 20.80 20.60 18.60 19.80 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.90 0.71 -4.55 22 277 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 241 0 198 293 0 135.90 48 23.39 CHANGED pVpp.slDshL-KpDGhIpRp+.ophC..+HGsKGMC-YCsPL.PaDcpYhpEpp..IKHlSFHAYL+KlspspNK.t.GoSalsPLppPsaplphsCssG.H.PWPcGICoKCQPSAITLQpQpFRMVDHVEFtsssllspFlshWRpTGs ................................................................s.V.pstlDphLsKpDGpI.Rp+Dsp.h.C..+HGspGhCs.aCh.PL..-Pa.DtpYLp-pc..IKHhSFHuYlRKlsuus..sK.............Gp..hh....sLpp...sh+l+psC.s.......H.PW..P.cG.ICoKCQ....PSAITLppQpaRhVDplpFpsssllspFLsaWRpTG..................... 0 67 103 162 +1017 PF04810 zf-Sec23_Sec24 Sec23/Sec24 zinc finger Bateman A anon Bateman A Domain COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is found to be zinc binding domain. 23.10 23.10 23.10 23.70 23.00 23.00 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.35 0.72 -4.39 133 1393 2009-01-15 18:05:59 2003-04-07 12:59:11 10 29 324 26 939 1354 14 40.00 38 4.58 CHANGED sPlRCp..pC+ualNPasphs...suppWtCshCtt.pNplPspY ...............PlRCp......pC+uYlNPa.sph.t.......pu+p..WtCshCtp.tN.plPspY...... 0 313 517 760 +1018 PF02148 zf-UBP Zn-finger in ubiquitin-hydrolases and other protein Mian N, Bateman A anon IPR001607 Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.77 0.72 -3.86 179 2273 2012-10-03 15:03:13 2003-04-07 12:59:11 14 45 510 19 1416 2222 31 68.20 29 11.39 CHANGED C.....spCs............................s..ps..................lWl....CLp..CGpluCG.....R.p...........s..sHAhpHa.p.pss.....................................Hslslslsshp........l.aCYss..-shVp..s.ph ..................................................................................C.ptCt...................................sp....ts.......................lWh....CLp..CGpluCG.............R..........................p.sHAhpHh.p...psp...........................................................Hslslslpshp...............................l..aCYss..cp.Vhp..h.................................................................. 0 429 703 1082 +1019 PF04704 Zfx_Zfy_act Zfx / Zfy transcription activation region Kerrison ND anon DOMO:DM04798; Family Zfx and Zfy are transcription factors implicated in mammalian sex determination. This region is found N terminal to multiple copies of a C2H2 Zinc finger (Pfam:PF00096). This region has been shown to activate transcription when fused to a GAL4 DNA binding domain [1]. 25.00 25.00 25.30 25.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.01 0.70 -5.58 5 252 2009-01-15 18:05:59 2003-04-07 12:59:11 8 42 68 0 75 216 0 215.60 54 43.94 CHANGED EsVlIEDVcCSDIL-EsDV.SEuVIIPEQVL-uDVA........EE-SLuphslPDplLTS-LlosololPEpVhsuEuVosss.GHlEpllpssVlttEIlsDPLstDlVSEEVLVADssSEAVIDA......SGlPl+ppDDD....KuNCEDYLMISLDD.uuKlDH-GuoslTMuA-uEpDs..SKlDGsuPEVIKVYIFKADu-ED.DlGGTlslVESEs-NcHtssVhDspsSuRlPREKMVYMsVsDSQQEEDDLssuEhsDpVYMEVIVGEEDAu.......stHEpQM-DSElpKTFhPlAWAAAYGNNo.DuIEsRNGTASAhLHIDESsGLsRlsKQKsKKRRRGEoRQYQ ..............tl...-sVps.-IhtEs.l..o-.....sVIhP-tlLt.s-Vs.........-.sL...t........c.lLsu-lhtt....s.........thP-pVhss-.hp...................................................hsp...-slhp...............................tlslp..p..p.................cssspDYLMI.ShD-.s.Kh.......s.hphtst.......t....p.pt.s.E.VIK.VYIFKA-st-D..-lGGT..lsES-.pssH....tlhs..s.p.Rh..REKMVYMsVpDSp.E-..-.-........................htpcpt...............................p.hpts........s....hhh.hs................u................Ns....ssl.Es+sssAuthLpIs-uhshs+lhKQKsKK..++Rs-s+QhQ................................................ 0 3 9 28 +1020 PF02535 Zip ZIP Zinc transporter Bashton M, Bateman A anon Pfam-B_1189 (release 5.4) & Pfam-B_1903 (Release 7.5) Family The ZIP family consists of zinc transport proteins and many putative metal transporters. The main contribution to this family is from the Arabidopsis thaliana ZIP protein family these proteins are responsible for zinc uptake in the plant [1]. Also found within this family are C. elegans proteins of unknown function which are annotated as being similar to human growth arrest inducible gene product, although this protein in not found within this family. 28.30 28.30 28.30 28.30 28.20 28.20 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.02 0.70 -5.42 40 6416 2012-10-02 19:55:49 2003-04-07 12:59:11 17 54 2513 0 3060 5517 747 223.60 18 74.42 CHANGED h.thhlhulhslhlsohhushhshhh.hhtt............................t..phhlphlhsFuuGllLusualHlLPcuhcshtpt.pht.............................................shsshhshhGhhhhhhl-phhphhhtt............................................................................t..t.....t..........t.t..............................................................................................................................t..htptptcpptpspppsppphtpthp.t....................uhllh...............huhslHslh-GLAlGsotsssp...shslhlAlhhHcl.cslGlsuhLlpushsptpshl...hshlhulssslGhslGlsl.........tustuthspull.uhoAGhhlYlull-llspthtpsp...........................h..lhphlsllhGhulMslls .........................................................................................................................................................................................................................................h.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t................................................................sh.h.hh......................................................................hu.l.s..lHs..h....-G................l..A..l...u.....s........s.......h.t.....ss..................uhslhlu..l..h..h......Hp...hscuh..u.............ls......h...l...h..............t....s...........s....h...........s....t.....h............p.....s..hh.......hshh...u..l......s...................p.sl.G..sh........l.u.hhh.................................s......................h....s..............h...h....h.............s.................h..............h..........h..........u...h...sAGh..hl...al.s...h.-...l.l...s....p.th.t..tp......................................................hh....t..h.h..s.h....h....h.Ghhlhhh..s............................................................................................................................................................................................. 0 1027 1736 2521 +1021 PF00246 Peptidase_M14 Zn_carbOpept; Zinc carboxypeptidase Finn RD, Bateman A anon Prosite & Pfam-B_4832 (Release 7.5) Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.07 0.70 -4.80 114 5755 2012-10-02 19:46:12 2003-04-07 12:59:11 19 114 1951 160 2797 5760 1277 247.20 18 49.68 CHANGED lhsalcpls..tphPsls..phhsl.Gp.Sh-GRslhslclopt...................................tss+sslhlpushHupE..hluptsshhhl.....ppLlpt..............pstthppLlcphchallPshNPDGathst..p.......................................ttphWths........Rssst..............................GsDLNR.........N.....a........sht.h.........s.sspssssp.atu.ts.t..............pPEspulh.sa.l...pp...........tphthalslHuhuphhh.........aPas.............................hsh..........................shsssppp........hpplupthspshtp.ht.............t....a..phG.ssss............hY.ss.GuspD.....asa.phsh.......hsaThEltsps.......................hhhss.....spl..tsp-shtu ......................................................................................................s..t...h.h......p...h.....l..G.p.....o........h...........p........s......p.................l..h.lplst.....................................................................................................ttsc...t.l.h.l.u..shH.u.p.E..hhus..t..h.hh.hhl.................p.p.Lhpt...............................ts....th...p....t.ll..c..p..h.phh...llPsh..NP...DG.ht.hst...........................................................................................................R.ss.ss.....................................................................................Gh.DL.N.R.............................s....................a..........................s.............................................................................s......................................................psEspulh..ph...h....pp.........................................hphthhlshH..u...h.....s.thh...............h.a..s............................................................................................................................................t........................................s...t...........................h...h.t.......h....h.......................................................t......................................................t...p...............h.h.....th.................hthth................................................s......................................................................................................................................................................................................................................................................................................... 0 956 1446 2182 +1022 PF00100 Zona_pellucida zona_pellucida; Zona pellucida-like domain Sonnhammer ELL, Bateman A anon Swissprot_feature_table Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -12.48 0.70 -4.95 143 2880 2009-09-12 10:41:20 2003-04-07 12:59:11 18 163 237 17 1681 2596 0 221.40 16 41.22 CHANGED pCsp.sph.........pltlpps.h.ts..............th.phsplpL.......pssp...........Cps.............hpsssshhhaphslss.........CGop.ppp...............hhYp..Nplhhphps..........................t.lhp.....hphtl.pChY........................ppshts..sh...................................h..thss...hs......s.sshp.........hphplhs..........................s..htp......................phss.h.lspsla.lc.....sphhs...........h.l........hlcsChA..Tss...s...s.spasll..ppGCs.pscshhsh.h...............htsphtcFp.hpsFpFs.........................spsplalcCplplCtssp......Cpt......Cstt ...................................................................................................................................................Ct..sth..l.lthp.....................th........phhh.............tt.t..................C.s...............t.st..t..h.h.h.h.ph.shpt............CGsh.thp.........................................t.........hhap....stlhh...t....ts....................................h.h........hthth.pCta........................................th...h.........................................................................................................s......................t.s.ht..............hphph..hp..............................st.................................t.sh.htlsp.lh...hp....h...hp..p..............s.h.l......................hlppChA.........sss............st.................phtll......tpG........Cs....tp.h.th........................................tsphhphp..hpsFpFs...........................pts.lahpCpltl.C.ttt.......C..........C...t....................................................................................................... 0 525 708 1302 +1023 PF01262 AlaDh_PNT_C AlaDh_PNT; Alanine dehydrogenase/PNT, C-terminal domain Finn RD, Bateman A anon Pfam-B_4166 (release 6.6) Domain This family now also contains the lysine 2-oxoglutarate reductases. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.66 0.71 -4.69 96 5257 2012-10-10 17:06:42 2003-04-07 12:59:11 16 38 3489 97 1531 6780 5369 153.50 35 34.75 CHANGED hpchttthshLhsussuVtP...A+VlllGuGVsGhsAhphAtsLGA.VsshDhpssphcpl-slhuphlps..................................hhsstttlpctltpsDllIsssLlPGpcAP+LlocchlppM+...................PGSVlVDlAl-pG.......GshEsocsss..................sshshpGVhhhu ....................................................................................h..p.tthhshhl.su...ss..s.V....s....P.....u+.V.h.l.l..G.u.G..VAGh....s..AhthAtu.L.G.A...........V....p.s.h......D..h..p......s.....p...t.h.c.p........l..c..s.........h..........h.........u........p..h.lps..................................................................................hh......u.....p......t......h.l...t..ptlp.cs..DllIsssLI.P...G..t.tAP+...L.....l.....Tc-M..Vps.MK.....................................sGSVlVDl.Ah.-..p..G...........GshE.h.o..c..ss.p....................................ss.h.h.t.pG.Vhhhu.................................................................................................................... 2 518 968 1302 +1024 PF05222 AlaDh_PNT_N Alanine dehydrogenase/PNT, N-terminal domain Finn RD anon Manual Domain This family now also contains the lysine 2-oxoglutarate reductases. 29.50 29.50 29.80 29.50 29.30 29.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.44 0.71 -4.16 363 5191 2012-10-02 14:31:05 2003-04-07 12:59:11 10 33 3449 95 1522 3903 4047 132.80 37 29.97 CHANGED GlP+E.hps...t...EpRVAlTPssVppL.....hpt.G.a..cVhVEsGAGh...uuuasDpsYptA...GAplss.ss.tp........lastu-lllKV+pP.........ssEhshl+pGphLhoaLahu...s.s.phsctLhppsloulAhEslsc.h..cu.slslLusMuplA ...........................GlP+E..hps..s...Ep.RVAhTPssVcpL...........lpt...G...a...pVhVEsu....A..G.....suuFsDp.sYtp.A.....GApIls...ss...c.......sas.u-lllKV+pP..................ssE.....hshl..........+p.....G.......thLho..aLa.A.......s.s.plhc.........tLh....p....p.........t........l..........o......s.lAhEsVsc...p........cup..sLshLusMuplA........................... 0 510 963 1296 +1025 PF05218 DUF713 Protein of unknown function (DUF713) Moxon SJ, Pollington J anon Pfam-B_6651 (release 7.7) Family This family contains several proteins of unknown function from C.elegans. The GO annotation suggests that this protein is involved in nematode development and has a positive regulation on growth rate. 21.20 21.20 21.20 21.30 20.70 20.80 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.55 6 53 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 5 0 50 59 0 174.90 23 27.54 CHANGED EELcchRpppKp+hhAFasClcLKhRFEEKEp-WuDWIpss.+pPIhRlhphFppF.ppht.....a++hsp-p.s-..lppEIhhlppclhshhsoLpphFppLppLppcap-sLFIKVLpKsIs-hAs+.......LhplhNsLsc...ssS+-.appLcshFppIssssIPoTopLR+ICKpsshSD.YpslcFPKl ...................................-hpph+pp.ptphth....hhpClhL+h+FEE+EccWucWlpsh.RpslsplhppahpFppphp..........hpc.hsp.c..cppp.....l.p.Eht.htpplhsshstlppsappLcpLpppasDtlFl+lL......Kslsphusp..................Lhplhptltph.....thspp....hp..pLcphhtplcs.ppIPoTspL+phspss.p.p-.Ypslp.s..h..... 1 9 9 50 +1026 PF05210 Sprouty Sprouty protein (Spry) Moxon SJ anon Pfam-B_6527 (release 7.7) Family This family consists of eukaryotic Sprouty protein homologues. Sprouty proteins have been revealed as inhibitors of the Ras/mitogen-activated protein kinase (MAPK) cascade, a pathway crucial for developmental processes initiated by activation of various receptor tyrosine kinases [1]. The sprouty gene has found to be expressed in the the brain, cochlea, nasal organs, teeth, salivary gland, lungs, digestive tract, kidneys and limb buds in mice [2]. 25.00 25.00 25.40 25.40 20.90 19.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.29 0.72 -11.13 0.72 -4.04 18 429 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 86 0 238 353 0 107.50 48 29.34 CHANGED +sRCt.C...RsL.Sphhss+.shCs.....ucsslcpsoChsCsculhYHChsDsEG-........huDpPCSCs.......cspsCsRWhuLuuLSLhlPCLhCYhPhRuCh+huptC.......GC..Ct .................+s+Ch.C...........Rsh.sp..ss+.shCp......scsslchsoCMhCscuhhYHCh.s.DsEG-............................huD.PCSCs........................ps+hC......hRWhuhuuLSlhlPCLhCYhPh+uCh+hsptC.......GC..Ct.................... 0 32 51 123 +1027 PF05216 UNC-50 UNC-50 family Moxon SJ, Studholme DJ, Wood V anon Pfam-B_6607 (release 7.7) Family Gmh1p (Swiss:P36125) from S. cerevisiae is located in the Golgi membrane and interacts with ARF exchange factors [1]. 21.10 21.10 35.20 28.40 20.30 20.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.58 0.70 -5.16 24 272 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 239 0 184 268 3 215.90 38 82.36 CHANGED ssph.chhRRlFK.hppMDFEhAhWpMhpLhhsP++VYRshhY+KQ.....TKspaARDDPuFllLhshhLslouluauls.Ys..uhhp............hlphhlhhVhlDFlhsGlllATlhWhlsN+.aLp..........................hspsscpplEWuYsFDVHCNuFhPhallLYllQahLlPllh...ppsaluhhlGNoLahhulsYYhYlTFlGYssLPFL..................cssphlLh.Plsllhllallul.......hGaNlsthhhphYht ...................t.ph.pahRRLh+...hpQMDFEhAhWpMhaLh.h.sPp+VY+shhY+....KQ...........................TKspWARDDPAFllLlshhLslooluauhs.hs...uhhp............h..lphllhhlhlDhlhsGlllATlhWhlsN+.aht.................................hppstctcVEWu.......YsFDVHhNuF...aPhhlhLahlQ.hhhh.s....hlh.....tp.........s.........aluhhlGNoLahhAluYYhYlTFLGYss.LPFL...................cpT.p.hlLh..Phhh......lhll.ahl.s.l......hhG.aNhst.hh.hY................... 0 69 102 153 +1028 PF00702 Hydrolase haloacid dehalogenase-like hydrolase Bateman A anon Pfam-B_566 (release 2.1) Domain This family is structurally different from the alpha/beta hydrolase family (Pfam:PF00561). This family includes L-2-haloacid dehalogenase, epoxide hydrolases and phosphatases. The structure of the family consists of two domains. One is an inserted four helix bundle, which is the least well conserved region of the alignment, between residues 16 and 96 of Swiss:P24069. The rest of the fold is composed of the core alpha/beta domain [1]. Those members with the characteristic DxD triad at the N-terminus are probably phosphatidylglycerolphosphate (PGP) phosphatases involved in cardiolipin biosynthesis in the mitochondria [2]. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null --hand HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.66 0.70 -3.93 239 27368 2012-10-03 04:19:28 2003-04-07 12:59:11 21 179 5134 203 8521 48772 5819 255.00 26 31.25 CHANGED hcsllFDh.sGTL..hpsp.hl.................................................................................ttthhphh..tph........shthh.http.............................................................................................................sttphhpphhtt...h.tt.hhttt..................................httttthhhhh...................................................................................................................................................................................tthhthhthtstlhs......s.........stpslpp..........L+pt........uh....plsllo.s.....s....hpshtpths..l............................s.hhhuph...................pP.psc..h...tthhcp......lt................p.t........p....shhlGDuh.sDh.uhttAs ...............................................................................................................................................................................................................................................................................................................................................................................................sslshDKT..G....TL.....T.p.s..c...hplt.....................................................................................................ht...........................................ths.pp...p...h..h...p..hs...........ssh........p.....t......h.......s......p.....p....P........h.....u...p..u......l....h...t..h....s..p..t.......t...........................................................hp.t.h..t..t..h..........s...h...s..h..t....s.t.h.pst.......................................................................................hh..........us....p.h.h...t..p.t.s...hp......................t........t.............................................ttt.s.t..s.s.h.h.hs..................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................s...h.p.h..l..G..l.l...u...l..t......D....s....l...+..s................s..............................s..t..p..u....l...p.p...................L.+.p.t...........................................Gl..................c.s.h.M..l.T...G.................D.s..t...t...T...A...p...u...I..A..p...p...l..G..l..........................................................................................................................................................sc.....l.hAcl..........................................................................................................hP..cc..K...........hpl.lcp.............L.p.t.....................................................psp.....h..................VAM.......sG..D.Gl..ND...A.PALtpAD.......................................................................................................................... 0 2722 5262 7124 +1029 PF04227 Indigoidine_A Indigoidine synthase A like protein Kerrison ND, Finn RD anon COG2313 Family Indigoidine is a blue pigment synthesised by Erwinia chrysanthemi implicated in pathogenicity and protection from oxidative stress. IdgA is involved in indigoidine biosynthesis, but its specific function is unknown [1]. The recommended name for this protein is now pseudouridine-5'-phosphate glycosidase. 25.00 25.00 30.30 25.00 20.20 21.30 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.78 0.70 -5.19 90 1192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 1117 6 398 842 476 280.60 54 79.09 CHANGED sEVppALssu.c.PVVALESTIIoHGMPYPpNl-sAppVEphlRcsGAVPATIAllsGcl+VGLsp--LEtLAp.....s.ps..lhKlSRRDlshslApsts.....GATTVAATMlhAphAGIcVFATGGIGGVHR.....G..A....EpoaDISADLpELu+TsVsVVsAGsKSILDlstTLEhLETpGVPVluY.....to-......chPAFaoRpSGhpsshcl-osp-lAphhcs+hp...LuLpu.GhllANPlPc-ttlstphlsshIppAlt-AcppGI..sGKslTPFLLs+ltElTpGcSLpuNIALVpNNAclAAcIAhsl .............................EVppAl..p..sp..pPVVALESTIISHGMPaPpNspsAhpVEphlRp...............pGA...lPATIA.II.sGhh...+...lGLspE-lEhLup...............p...cs....VsKVSRR....DLs.hllA.....t.....tp...s.................GATTVAuTMIhA.shA...........GIcVFuTGGIGG..VHR....................G..AEp.....ThD.ISADLpE.LupTs.VsVVCAGAKSILDlshThEYL.............ETh......GVPVlGY.......pTc..........phPAFasRp...S..u.....hclsh..pl-osp-lAchh...ts+hp...h.....s...L.p.G.G.lll.A.NPIPc.paAhsc.phIsssIspAltEA-........pp.Gl..p..GK-sTPFLLu+lsElTsGcSLpu..............NIpLVhNNAhLAucIAht................................................................................................... 0 144 246 340 +1030 PF04613 LpxD UDP-3-O-[3-hydroxymyristoyl] glucosamine N-acyltransferase, LpxD Kerrison ND anon DOMO:DM04521; Family UDP-3-O-[3-hydroxymyristoyl] glucosamine N-acyltransferase (EC 2.3.1.-) catalyses an early step in lipid A biosynthesis: UDP-3-O-(3-hydroxytetradecanoyl)glucosamine + (R)-3-hydroxytetradecanoyl- [acyl carrier protein] -> UDP-2,3-bis(3-hydroxytetradecanoyl)glucosamine + [acyl carrier protein] [1]. Members of this family also contain a hexapeptide repeat (Pfam:PF00132). This family constitutes the non-repeating region of LPXD proteins. 22.40 22.40 22.70 22.40 22.00 22.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.20 0.72 -4.44 90 2398 2009-01-15 18:05:59 2003-04-07 12:59:11 9 36 2248 13 574 1707 1557 71.20 31 20.84 CHANGED sshtlpulAsLppAsssploFls.st..KYt.ptLpsopAuAlllstc......tp........stshshLlspsPYluaAplsphh .............shhlsulAslpp..A..psuploFls..Ns.....KYp.cp.....Ls.p.opAuA.Vllspc.......s.............hs.s.ps.shLll.c.sPYlsaA+luphh............. 0 177 363 480 +1031 PF00244 14-3-3 14-3-3 protein Finn RD anon Prosite Domain \N 21.40 21.40 21.40 22.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.50 0.70 -5.28 85 2097 2009-01-15 18:05:59 2003-04-07 12:59:11 15 15 551 104 918 1919 23 196.20 57 83.35 CHANGED R-chV..ahAKLAEQ.AER.Y....--MlptMKplsph............tsc.LosEERNLLSVAYKNVlGuR.RuoWRll..SSIEQK-..csc...........................ppphphl+pYRpKlEpELsplCs-lLsLLDppLlPs..........u.........s...ssE.S+VFYhKMKGDYYRYlAEas.su........-c+c.csscpuhpAYppAp-lAp...pp..LssTHPIRLGLALNFSVFYYEIhNsPccACpLAKpAFD-AIu-LDs..LsE.-SYKDSTL.IMQLLRDNLTLWTS-tpscp ..................................................................pp.l.hA+LAEQ...A.ER.Y.................--Mst.MctVsp.............s.EL...osE..ERNLLSVAYKNV....lGuR.RuSWRll..SSIEQKpcsp.....................................-pphphh+..........pYRpKlEpELpplCp.s.lL..p.lLDpaLIss...................u.................s.....ssE..SKV....F.YhKM..........KGDYa.RYLAEht.....sG.......................................s.cR...p........ps.s-po.pA..Y..pp.....A.-lAp....sc.....h...PTHPIRLGLALNF.S.V.F.YYEIL..NuP.-+..ACp.LAK.p...A...FD-AIA.ELDo..L...s.E....-...S..Y...K.DS.TL.IMQLLRDNLT.LWTS-.tt..t............................................................. 0 308 472 682 +1032 PF02826 2-Hacid_dh_C 2-Hacid_DH_C; D-isomer specific 2-hydroxyacid dehydrogenase, NAD binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain is inserted into the catalytic domain, the large dehydrogenase and D-lactate dehydrogenase families in SCOP. N-terminal portion of which is represented by family Pfam:PF00389. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.91 0.71 -5.12 161 20266 2012-10-10 17:06:42 2003-04-07 12:59:11 14 74 4565 208 5776 22821 13988 171.60 30 48.83 CHANGED luhhLuhsRpl...hphppth....+pG....pW..........t...thhuh....clpG+TlGIlGhGpIGptlAchhp.uFGh.cllsasht.....t...........pthsh.phs.sl-cllppuDhlolpsP.loscTcpllstctlstMKssu.....hlINsuRGsllDcpALlcALcsGpluuAuLDVappE...........Phs....ts........p...sL....hp..hsN.....lllTPHlu ..................................................................................uhhlshhRph.....t...h...t.t.th............c.p.s.....pW............................t......t.hhuh..........cl...p......G.+...T...l.Gll.G.h.G.p.....I...Gp..t..............l.A....p..h.hp...ua..G.h....p.....l...l..h..a..s..h...................hp...............t......................tp..s.......h.......p......h......s.......s.....L.......-......c.....L..............l.....p....p.......u.....D...l...l.o...l...p.s..P....h........s.......s........p...................T.c..............p....l..l.....s...t....p..................t....l.s...t.M...K.s.....s..u...................hll.N...s..u..R...G....s.l.l.D.p.p.AL.l....pAL.....c....s.....G........c....l.t..u....A.u.l.DVa.p.p..E......................................P.hs.......t.s......................p.......s.L................hp.....h.s.N.............l.ll.TPHlu............................................................................. 0 1662 3388 4776 +1033 PF02834 LigT_PEase 2_5_ligase; 2_5_RNA_ligase; LigT like Phosphoesterase Bateman A, Mazumder R, Anantharaman V anon Bateman A Family Members of this family are bacterial and archaeal RNA ligases that are able to ligate tRNA half molecules containing 2',3'-cyclic phosphate and 5' hydroxyl termini to products containing the 2',5' phosphodiester linkage. Each member of this family contains an internal duplication, each of which contains an HXTX motif that defines the family. The structure of a related protein is known [1]. They belong to the 2H phosphoesterase superfamily [2]. They share a common active site, characterised by two conserved histidines, with vertebrate myelin-associated 2',3' phosphodiesterases, plant Arabidopsis thaliana CPDases and several several bacteria and virus proteins. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.03 0.72 -4.00 107 1248 2012-10-03 21:31:47 2003-04-07 12:59:11 11 3 1026 8 425 2631 248 80.60 31 52.39 CHANGED ls..spph.ppltpltpplc....thsh+h....ct.s.HlTLthlt.phs....pppls...plhpthtph..t...h.lplpthuhass...tspsla .....................................tlPsphppplhp.hp..ssh.h............s.....tu..s...+.V......tspNhHlTLu.FLG.-ls...........scc.c..........sltp.h.....h..u...c.....l................c...p..ss..F..s...l.pLc.shGtah....csRVlW............................................ 0 167 289 360 +1034 PF03475 3-alpha 3-alpha domain Aravind L, Anantharaman V anon Aravind L, Anantharaman V Domain This small triple helical domain has been predicted to assume a topology similar to helix-turn-helix domains. These domains are found at the C-terminus of proteins related to Swiss:P32157. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.87 0.72 -4.24 49 1443 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 1316 6 193 768 20 46.50 31 19.76 CHANGED cloVtchhplhascphs..hptlccllsh.ssLutoW+pphp+Rlpptps .................cloVpcssp.lhap......hs......p-tlccl...Lss.ssL.SsoWpcphpKRhtpt.......... 0 52 97 146 +1035 PF01612 DNA_pol_A_exo1 3_5_exonuclease; 3_5_exonuc; 3'-5' exonuclease Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_659 (release 4.1) Domain This domain is responsible for the 3'-5' exonuclease proofreading activity of E. coli DNA polymerase I (polI) and other enzymes, it catalyses the hydrolysis of unpaired or mismatched nucleotides. This domain consists of the amino-terminal half of the Klenow fragment in E. coli polI it is also found in the Werner syndrome helicase (WRN), focus forming activity 1 protein (FFA-1) and ribonuclease D (RNase D). Werner syndrome is a human genetic disorder causing premature aging; the WRN protein has helicase activity in the 3'-5' direction [4,5]. The FFA-1 protein is required for formation of a replication foci and also has helicase activity; it is a homologue of the WRN protein [3]. RNase D is a 3'-5' exonuclease involved in tRNA processing. Also found in this family is the autoantigen PM/Scl thought to be involved in polymyositis-scleroderma overlap syndrome. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.15 0.71 -4.79 27 6914 2012-10-03 01:22:09 2003-04-07 12:59:11 15 92 3529 32 2350 6043 5677 173.20 23 27.78 CHANGED hphlpsppthpchhppltstt......slAlDTEppshph..h.p.hstllQlpst.....ptsalhsshthts..........hphLptllpssphhKlhasschDhthltp........aulhhpsla-s..lAstllshtpt..........auLssLspphls...hthspppttucap.....tcslhcpth........tYAutDshhhlp..lhppLpp.lp .................................................................................................................l.s.tth.tt.hh.t.t.l..pt.ts............hlAlD...T.....E....t...........s.......s.............h.....p..s........................h....p.............h....h....h.......hl.pltss...................pss.h.l.......s..s....h..s..h...t.s..................................................hp..Lp..s.lL..p..s...s.s..l.h.....K......lh..as.h.c......D.hth.ltp............................................h.G..l...t...h..p.....s.....l...h...........Ds...........l...t...u...h..l..h..s.h..stt....................................................huh.s.....s...L..s...p..c.....h.ls.........hs...l.............s...h..p....p......t.....u..cht...............................tcsh...c.pth..............................pY.AAt..Ds.hhhp...lhptlh.pl............................................................................................. 0 797 1441 1966 +1036 PF00803 3A 3A/RNA2 movement protein family Bateman A anon Pfam-B_1054 (release 2.1) & Pfam-B_6332 (release 7.5) Family This family includes movement proteins from various viruses. The 3A protein is found in bromoviruses and Cucumoviruses. The genome of these viruses contain 3 RNA segments. The third segment (RNA 3) contains two proteins, the coat protein and the 3A protein. The function of the 3A protein is uncertain but has been shown to be involved in cell-to- cell movement of the virus [1]. The family also includes movement proteins from Dianthoviruses. 25.00 25.00 35.30 33.40 22.10 17.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.29 0.70 -5.30 27 894 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 89 0 0 507 0 209.50 48 84.13 CHANGED sppstusspppLlptlhu.ps.hpthtpcsshuphp.hpussphshh......sLlPssstsplsshhp......opacssphsosG...................hhpls+lllhlsPTls.sssGpVpltLhDsshss....hpslst.pphplphsss...PtlhsFhssYshPhpp.s...........pRCFtlshphsuhh..spusSshSlhhhWptphsspspsYp.pssthh.lp..hpRt.thpshsthpp...alcush...spssstthlhspshsh ....................s..plutcsuss.lsp.aSsss.chsl.spthDsu+t+hhhsspusSsh...........SlsEucuaDl.ARhlV-pspHlSNWKNDhhVGNGpppsshsIpIhPTWsSp+pYMhISRlVIWVsPTIP.sssGcLsVuLlDPNhPo.........-.cpVIL+sQGol.hDPhCFlFYLNWSIPKhsNTP............csChQL..pLhCs..pcY...thssohuSlh..WptpFsspsts.pp.ss......................................................................... 0 0 0 0 +1037 PF02829 3H 3H domain Bateman A anon [1] Family This domain is predicted to be a small molecule binding domain, based on its occurrence with other domains [1]. The domain is named after its three conserved histidine residues. 25.00 25.00 30.00 44.50 21.30 19.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.17 0.72 -3.97 57 945 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 913 1 147 544 7 98.00 42 55.62 CHANGED lssp..H.ss-chc-ELphIVc.pGGplhDVhl-HP.lYG.clpupl.tlpscp-VppFlcplpps.cup.....LppLTs.GlHhHTlpAssccsl-pIpctLcctGaLl ...............htsp.H..psEchcpELphI.VD.pGGpVp.sVhl-HPlYG..-lpu.L.plpsRcDVpcFlcplpps.pup......LSpLTc.G.lHhHplcA-oppsLctIccALcctGaLl................... 0 62 104 124 +1038 PF00725 3HCDH 3-hydroxyacyl-CoA dehydrogenase, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_743 (release 2.1) Domain This family also includes lambda crystallin. Some proteins include two copies of this domain. 21.00 21.00 21.00 21.00 20.60 20.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.19 0.72 -3.59 77 10901 2012-10-02 19:36:46 2003-04-07 12:59:11 17 35 3006 55 3264 9099 4385 93.80 27 23.85 CHANGED GFlsNRllhshhsEAhpllpcG.Assc-lDpshchthGaP.MGPhpLsDhlGlDsshplhp.......hhtpths-p...thp.s.lhcphscsGchGcKoGcGFYpY .............................................GFlsNRl.l.h..s.h.l.s.EA.h.p.h.l.p...........-......G.......h..u.s...s.p.c......l......Dtsh.p.hs..h...Ga...P...h...G...Php..h.h...D...hl...G...l.Dsshp.l.hp...........................hhtpt.hspp............hp..s..s.....hl.pph.h....p.t.sh.h...GpKs.s..t..Gaa.............................................................. 0 922 1912 2683 +1039 PF02737 3HCDH_N 3-hydroxyacyl-CoA dehydrogenase, NAD binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_743 (release 2.1) Domain This family also includes lambda crystallin. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.95 0.71 -4.63 74 8383 2012-10-10 17:06:42 2003-04-07 12:59:11 13 46 3107 48 2683 14660 8387 177.00 33 34.30 CHANGED pVuVlGA.GsMGsGIAtlhAps.GhpVslhDh......spptlpcuhptlpp...thtphhpcGphs........ttphsthhsplp..hsschsshs.su.DlVlEAVsEslclKpclFtcl-tls.tscsIlAoNTSulsloclupshp.cPc+..hlGhHFF.N.Ps.lM.LVElltutpTuscslspshshscpl.GKtPVhV.pDs ....................................luVl..G.u.G.h..M........GuG..I.......A..h.......h..h........A....t..........t.....G....h..s......Vh.lhDh.................st...p...s....l...s.......p...u........h...s...t...l.tp...........L.p..c..t....l..p..+...u...+..ls.................................t.t.p.hs..t..h..l..u....p..l.s..........s.......s...s.......c..........h.....s....s......h......s......c......s..........D.......l.V...............I........E.........A......V........s......E......s.......h......c...l.K......p...pla.t..c..l-.p..h.s.....t..........s.c......s..l..lASN.TSolsIs.pl..A.............s....s.......s.............p.......+........P.......cc..............hlGhHF..F..N...Ps...hM.LVEl.lt.........ut......p.Tusps.l.s.......pshsh.s.p.p.h..GKsslhl.pD......................................................................................................................... 0 802 1633 2233 +1040 PF02446 Glyco_hydro_77 4A_glucanotrans; 4a_glucanotrans; 4-alpha-glucanotransferase Bateman A anon Pfam-B_1924 (release 5.4) Family These enzymes EC:2.4.1.25 transfer a segment of a (1,4)-alpha-D-glucan to a new 4-position in an acceptor, which may be glucose or (1,4)-alpha-D-glucan [1]. 25.00 25.00 26.80 25.60 23.20 24.80 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.45 0.70 -5.93 14 3053 2012-10-03 05:44:19 2003-04-07 12:59:11 12 17 2557 11 694 2467 761 483.60 30 78.89 CHANGED lplhSL.oc...GIG-hGppAh.hlDahpcsG...hplhQlLPlssou.ssu.......PYsShSuhAhNslalclpsLs-tthlc............hpchpphpstspVcYsplhthKhshL+csacthpppt...pppsFppFtccp...phWLcsaAhF..hAlK-phss......hsWspW.....pshptpctpslpchpcphp-clpFataLQalhapQhppl+saApp..psltIhGDlPIhluccSuDVWs..chFphs.......tsuGsPPDhFsppGQtWu.PlYsactLpccsapWWhcRlctshphasllRIDHhhGFhphW.lPt.................ucppAtsGcahhsPG...................................................................cclhshlhpths................................................................ph.lIuEDLGslss-VcthhcphulsGh+llpFshs.-spsshlPpsh...ptpsVshsuTHDssTltuWacst.....................pccpphhtpaLs......ph.pcthptsll+hshposuhhtIh.hpD....hLuLss-........tRhNhPuTssp..NWsaRlpss.......-.hssppphhphltplh ....................................................................................................................hplhoL.op...t.....G.I.GD.Fup.th..hl-.h.ht.c.sG.....htl..lhslhss.s..t.su..........................P.Yp.s...SthhhNshaIc.l..p..t....l...-.h..t.hhptt................................t....p..t.....h.....p.....t..h.....p.....s.....s...p......h.lD.Ys.tl..hthKh.s.hL..c.hsa.pta...........t....p....p..........s.............p...........t........s........F...ppF..ht..pp....................tth..Lts.aAha.............sl.+.....c..p.htt.............................h..s..W.....t.Ws........p.......th.......pp....hc.............s............s....sl.....pph......p.p..p..h.........t.-.clpaahalQahhhpQhtth..pp..hApp..hsItlhsDlslhVutsuu-sWsp.c.haths....................................sssG.sPP......D.ha.......u.s.tGQpWG..Psa..s.c....h....hp...t..pu.............Yp.alchl.Rtshphhs...........s..........lRIDHhhGhhchWh.IPt...............................up..s.AtpGt.a.lphPs.............................................................................................................................................................................pclhsh.lthE..t.........................................................................................................................p.h.lIuEDLGs..l..s..sp....lhthhcptGl.uh+lL..Ft..h........................p.......p......p.........s...........h......h....h....P..p.ta.............stpulshsuTHD.sTl...tG.aapst..c.................................................................................pp..ppph..h......t.t......h...l..s............................t..h.............t....p.......s.....h....t....p....s..h...l.....p...s.h...h....s....o......h.....sp.h..h...h.h...h..tD......................lLulp.p.........................t.hNhPGTsst...NWph+hsts.......p.h.t.tth........t.................................................................................................................................................................................... 0 231 453 586 +1041 PF01812 5-FTHF_cyc-lig 5-formyltetrahydrofolate cyclo-ligase family Bashton M, Bateman A anon Pfam-B_1555 (release 4.2) Family 5-formyltetrahydrofolate cyclo-ligase or methenyl-THF synthetase EC:6.3.3.2 catalyses the interchange of 5-formyltetrahydrofolate (5-FTHF) to 5-10-methenyltetrahydrofolate, this requires ATP and Mg2+ [1]. 5-FTHF is used in chemotherapy where it is clinically known as Leucovorin [2]. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.10 0.71 -4.52 24 4810 2012-10-04 00:26:15 2003-04-07 12:59:11 15 19 4472 16 1259 3577 1998 177.40 26 87.98 CHANGED KppLRcphht.thctlspcphtptsptltp+lhshhthppu.ppl........uhYlsh.ssEhshp..tlhpphhptschlhhPhhp.ps..................................s.h.ps.ht........lhpPhpthth..s..........plDllllPulAFD.ppGhRLGhGtGaYDRhLuphpt.....p.htlulshcpQhhs..plPt-....aDhslchllss ...................................................................................KpplRpthhp..phps....l.....s.....p.....p..p...p...p....t.....s..p.....t....l.t.p...p....l....h...s....h...........t...h.......t....p....A.....p......s...l.........................................................uhals.h..s..s..El...s...Tp...........sl..l..c..p...h..h........p...........p........G.........K.....p.....l..h..l.Phh.ps.ts..................................................................................h...h...h...t...h.p..s....t.s....t...l....h.p.s..t.a.s................................lhEP.t.ts.h.t.h...................................plD.llllPslA.....F....D...c.......p...G...h.....RlGhGGGaY..D....RhL..s..phpt..........................hs.........l....u.ls.a...s..t..........Qhl........c........p..........l....P....s...Es.........a.Dl..slstllT..................................................... 1 390 782 1045 +1042 PF02739 5_3_exonuc_N 5'-3' exonuclease, N-terminal resolvase-like domain Bateman A, Griffiths-Jones SR anon Pfam-B_716 (release 3.0) Domain \N 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.04 0.71 -4.88 25 6210 2012-10-03 20:43:45 2003-04-07 12:59:11 11 20 4577 17 1278 4680 3819 164.00 35 23.04 CHANGED ppLlLlDGpulsaRAaFAhs......LpsspG.sTs.AlaGFhphLhpllccppPsalllsFDutsp..TFRcchYtcYKusR............spsP--LhsQlshl+EllctlGIshlEhsGaEADDlIGTLAcpAppc..GhcVhIlouD+DllQLls-...pVtlhps........tchphhss-pVhEK.a ...................................................................................LlLlDGpuhhaRAaaAhs...................h..p..s.s..p.G.........T.....s......Alh.G........h........hp....hLt....p...l...l.....p.....c.........................p.......P.....o.........H....hs.........VsF.D.....................s.t................t...............p......TFR..............p-h.as.cYKusR........................................s.s.hP..-....-........L......p......t......Q....h......s....h..........l.......+........ch..l....c....u......h........G....l..........s.h......h.p.h.....s.GhEADDlIGTL.Ap..p..u...p...p.p.......G.....h.......p.V.h.I.loGDKDhhQLlsc......p....l.t..lh.pp...........hp...phh.s.ptlhc+..................................... 0 413 828 1081 +1043 PF01367 5_3_exonuc 5_3_exonuclease; 5'-3' exonuclease, C-terminal SAM fold Bateman A, Griffiths-Jones SR anon Pfam-B_716 (release 3.0) Domain \N 40.00 40.00 40.30 40.00 38.60 39.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.20 0.72 -3.78 27 6018 2012-10-01 19:52:02 2003-04-07 12:59:11 15 19 4488 11 1254 4486 2458 100.70 36 13.83 CHANGED lsPpQ....hhDhtALhGDsSDNIPGVtGIG-KTAhpLLpcaGSl-slhp..Nl-plps.ph+......ctLpspc-sshhScpLuslcsDlslplsh-sh.......thps.stpplhp .........lsPpQ....llDhhuLhGDoSDNlPG....V....sGl....GpKTAhpLLppa.G.ol-slhp.....pl..-....p....lp...s.......+.h+...........................cpL..p..p..s..p..-.............t..AhL..S+..pLAo.lps.Dl...s...l...s..h...s.h...cpl.......thp..t......t.................................................. 0 404 813 1059 +1044 PF03491 5HT_transporter Serotonin (5-HT) neurotransmitter transporter, N-terminus Griffiths-Jones SR anon PRINTS Family \N 20.30 20.30 20.90 22.20 20.20 19.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.17 0.72 -3.84 3 54 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 34 0 20 41 0 39.10 70 7.22 CHANGED ENGVLQKuVPTPADKVESGQISNGYSAVPSTGAGDDAcHSlP .............ENGVL..QKsVPsPGDKsESGQISNGYS....AV....PSPGAGDDspHSIP............. 0 1 2 4 +1045 PF02096 60KD_IMP 60Kd inner membrane protein Mian N, Bateman A anon IPR001708 Family \N 25.40 25.40 25.50 25.50 25.10 25.30 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.46 0.71 -4.65 147 6198 2012-10-01 21:53:17 2003-04-07 12:59:11 15 26 4682 0 1609 4484 3134 199.40 31 48.09 CHANGED s.saG..luIIllTlll+.lllhPLshpph+Sh....t+Mpp....lp..Pclp....clpc..+a...............ps.ctpc.....hppEhhcLYccpslNPh...uGCL.PhLlQhPlhhuLatslpp...............................................................hp.....l...........pp.............ssFh.........................W......lpcL...........ussD............................................hhlLPllhulshhlpppl........s....................pst......pt+hhhh...hh.Plhhh.....hhhhphP..uGLsLYWhssslhslhQphhl.pphht ............................................................................................................................................saGhuIIl.lTl.ll..R...hll.h.P..L....sh.tp....hpoh..................tK.M.pt......l..p......Pclp.................tlp.c+a...........................................t-..-pp+...................hp..p....E...h...hp...LYK......c..p......s.l..N..Ph......uG..C....L.....PlL.l..Qh..Pl...ah.ALY.hslhp............................................................................................................................................shp...............l.............pp..................usFh.................................W.............l..cL............ussD.......................................................................................s.hh.lLPl.lh..uls.h.alpp.t.l......................................s..................................s.ps..htphhhh.................hh.Pl..hhh.....hhh.....h........hP....................uGLsLYWlluN.......lhsllQphllpp...t............................................................................................ 1 548 1016 1354 +1046 PF00428 Ribosomal_60s 60s_ribosomal; 60s Acidic ribosomal protein Finn RD anon Pfam-B_151 (release 1.0) Family This family includes archaebacterial L12, eukaryotic P0, P1 and P2. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.32 0.72 -3.59 107 2118 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 646 27 1183 2032 138 88.10 32 53.35 CHANGED psossslppllpuush.plcshthphhhptlp..uc.slc-llsssttt.....................uuuuusAAuuuuuuuusAspcpccEEcc...........EpD-..DMG.hu...LF ......................................................................sotsslppll.......puush.pl-s.hthp..h..h.......hptlp..uc.slc-llsssssths............................................uuuuAs.Au.u...u...u...u.u.u...u.A......s.s.sp...E.cccEEcc...........E..EoD-......DM..G..FG...LF....................... 0 390 647 956 +1047 PF01591 6PF2K 6-phosphofructo-2-kinase Bateman A anon Pfam-B_717 (release 4.1) Domain This enzyme occurs as a bifunctional enzyme with fructose-2,6-bisphosphatase. The bifunctional enzyme catalyses both the synthesis and degradation of fructose-2,6-bisphosphate, a potent regulator of glycolysis [1]. This enzyme contains a P-loop motif. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.39 0.70 -5.28 11 1160 2012-10-05 12:31:08 2003-04-07 12:59:11 13 22 311 13 696 1195 96 194.90 37 39.97 CHANGED tcsssssp.pss.hhIVMVGLPARGKTaISpKLTRYLs....WlGhsTKVFNVGpYRRpssps........pshcFFcs-NpEuhclRcpsAhsALcDlhsaLscEsGpVAlFDATNTTRERRchIhphsccps........hKshFlESlCsD.plItpNIpplphuSPDYtspss-c.AhcDFh+RIcsYctsYEPLD.-ppDcsLSaIKlls.lGpphllNpVpsalpSRlVYYLMNlHlpP ...................t........pss.hhllMV.GLPARG......Koal......u..pK...LsR..YLs.................WlG..h.o.+..l.....FN..lG.p..Y.R.R.pts.tt..............................................ps..h...p...F.F.p.ss..Nt..c.uh...p.lR...cphAhtslc..........Dlhp...al............p....p............p............s.....G..........p...........lu..l.................a......DATNoT.+cRRphlhp.h.h.p.p..p.s........................................h..c..shFlESlCsD....pllttNI.h.ph.p.h.s.s.........PD....................Yhs......h.s...cp...AhpDFhpRIppYct.....s.....Ypsl...s..................-.....p.....p..l.............s.....aIKhhs...sGpp.h.hl.s.p.l.p..salpo+lVaYLhNl+lp........................................................................ 0 187 351 557 +1048 PF00393 6PGD 6-phosphogluconate dehydrogenase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This family represents the C-terminal all-alpha domain of 6-phosphogluconate dehydrogenase.\ \ The domain contains two structural repeats of 5 helices each. 20.40 20.40 20.80 20.60 19.70 20.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.91 0.70 -5.20 58 5458 2012-10-02 19:36:47 2003-04-07 12:59:11 14 19 3935 36 1173 4118 977 231.40 44 59.81 CHANGED GHaVKMVHNGIEYGDMQLIuEAYplLKpsLGLoscEluclFpcWN.pG-LcSYLIEITucILpt+D.....ps.G..p......sLVDhILDpAGQKGTG+WTs.sAL-lGlPlshIsEAVhARhLSuhKcERltAS+.hLsG...sp.....sthss-+ppal-sl+pALYsSKIsSYAQGFtllptASc-asWsLshupIApIWRGGCIIRutF.LscIpcAappsssLsNLLlssaFpptlpptpsuaRclVu.hAsptGlPlPuhSoALuYaDuYRsspLP.ANLlQAQRDaFGAHTYcRh..........D+pGs..FHTpWs ................................GHaVKMVHNGI.EYGDMQLIuE..uYplh..+..p..s....h..s..h......s.....sc.......-.......huphFpcWN...pG.-....L.s.S.aLl-IT...tclh.ptcD.......pp..u..p.....................ll.-h.I......h.....D.....p.....A.....u......p.....KGTGKWTs.sAL-lGhPlslIs.EuVFARhlSu.h.K.p.p.Rht..A.sp..h.h.t.....s................scctthl.cpl+pALahuKlhuYAQGF..hptAu.pp...tWs.lshuplA.laR.sGCIIRu.......F.Ltp.I.pp.Aa.tp....s....s....t.l.................s.Llhs.hFtth..h..tp.h........tuhR.....plls.h............u............hp.G...........lP...........hPshu.uu.l.s.a.aD.uhp.st.L.P..AsLlQ.......AQRDaFGuHsaphh.........s....t.........hH...W............................................................... 0 355 715 977 +1049 PF02495 7kD_coat 7kD viral coat protein Mian N, Bateman A anon Pfam-B_2886 (release 5.4) Family This family consists of a 7kD coat protein from carlavirus and potexvirus [1]. 21.10 21.10 21.40 23.50 21.00 20.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.89 0.72 -4.52 61 349 2009-09-10 16:31:06 2003-04-07 12:59:11 12 1 114 0 1 324 0 58.80 27 70.57 CHANGED h....hhhhlullssllslhhl.....psssssChllITGESlplpu..C..hsschlchl.psL+shst .......h..lhhhlslls.sllslhhl........ssppspCpllIoGculhIss..C.t.os-hlchl..shpPhp.s........ 0 1 1 1 +1050 PF02294 7kD_DNA_binding 7kD DNA-binding domain Mian N, Bateman A anon Pfam-B_8148 (release 5.2) Domain This family contains members of the hyper-thermophilic archaebacterium 7kD DNA-binding/endoribonuclease P2 family. There are five 7kD DNA-binding proteins, 7a-7e, found as monomers in the cell. Protein 7e shows the tightest DNA-binding ability. 25.00 25.00 119.50 119.30 20.90 19.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.11 0.72 -4.29 2 33 2012-10-02 16:56:36 2003-04-07 12:59:11 13 1 18 30 10 32 0 61.50 90 96.62 CHANGED ApV+FKYKGEEKpVDhSKIKKVWRVGKMlSFTYD-.sGKTGRGAVSEKDAPKELhpMLt+tc s.TVKFKYKGEEKEVDhSKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQMLEKQK. 0 2 2 9 +1051 PF00001 7tm_1 7 transmembrane receptor (rhodopsin family) Sonnhammer ELL anon Prosite Family This family contains, amongst other G-protein-coupled receptors (GCPRs), members of the opsin family, which have been considered to be typical members of the rhodopsin superfamily. They share several motifs, mainly the seven transmembrane helices, GCPRs of the rhodopsin superfamily. All opsins bind a chromophore, such as 11-cis-retinal. The function of most opsins other than the photoisomerases is split into two steps: light absorption and G-protein activation. Photoisomerases, on the other hand, are not coupled to G-proteins - they are thought to generate and supply the chromophore that is used by visual opsins [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.81 0.70 -5.25 64 42866 2012-10-03 04:04:29 2003-04-07 12:59:11 16 290 7772 141 17156 63287 11 225.60 18 72.53 CHANGED GNslVlhslhtp+ch+....sssshalhsLAluDLlhslsls.hshhhhhh.........pWshGp...hhCclhshhhhhshhuSlhhLsslolDRYlAIscPhphtthps...tpuhhhhhhlWlhuhllulP.hhhhttpt.pps.............hhChhphsp.............hhlhhtlhsFhl.PlhlhhhsYhhIhppltpptt.................................................................................................................................................ppcp+ss+hlhsllslFhlCWhPatlhhhltshpphs............hthhh.lshhluhsssslNPllY ...................................................................................................................................................................................................................................................................................................................Nh.h.s..l....h....h.......h...h...t........p.....p......p..h+...................ss......h...p...h....h...l......h...s.....L....A.h.....u..D.....h.......h......h...............s...h............h.....h..................h........h.....h........h........hh................................a.........h......u.............h.h...C......p.....h.........h.......h..............h.......h............h.......h........h........................h...........h.......s......o.....l..........h......................l..s.h..l....u......h......-................R.............a...............h.........s................l......s......p.......s................h........p..........h................h.........t.............h.....h..............p...........................p..............t...............s.......h......h.......h......h.......h......h........h........W..............h......h.........u......h.........h..........h......s......h......s.........h........h.........h.....................h...........t...............................................C.h........h.................................................hh.h..h..h...h...h.....h.....h.....a.....h.....l.......P.....h......h....l.......h......h......h......s.....Y...h...h...l...h...h...t....l...t.....p...t..t...t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p..t...c....t....c......h...h....p....h........h....h............h....h....l...............h....h.......a....h...l....s........a....h...P.....h....h.....h......h........h.....h...........h.h..........................................................h..h......h.........h....h....h...h........s...s...hhsPhlY............................................................................................................................................................................................................................................................................................................................................................................................ 0 5181 6677 11247 +1052 PF00002 7tm_2 7 transmembrane receptor (Secretin family) Sonnhammer ELL anon Prosite Family This family is known as Family B, the secretin-receptor family or family 2 of the G-protein-coupled receptors (GCPRs).They have been described in many animal species, but not in plants, fungi or prokaryotes. Three distinct sub-families are recognised. Subfamily B1 contains classical hormone receptors, such as receptors for secretin and glucagon, that are all involved in cAMP-mediated signalling pathways. Subfamily B2 contains receptors with long extracellular N-termini, such as the leukocyte cell-surface antigen CD97 (Swiss:P48960); calcium-independent receptors for latrotoxin (such as Swiss:O94910), and brain-specific angiogenesis inhibitors (such as Swiss:O14514) amongst others. Subfamily B3 includes Methuselah and other Drosophila proteins (e.g. Swiss:P83119). Other than the typical seven-transmembrane region, characteristic structural features include an amino-terminal extracellular domain involved in ligand binding, and an intracellular loop (IC3) required for specific G-protein coupling [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.78 0.70 -5.29 31 4718 2012-10-03 04:04:29 2003-04-07 12:59:11 19 348 246 3 2544 4248 5 225.10 22 28.73 CHANGED hhslpllhhlGaulS.LsuLhlAlhlhshF.RpL+CsR.NhIHhNLhhoalLpshshhlt.tthlhspht....p...................................h....C+lssshhpYhhhsNFhWhLlEGlYLasLlshs.....ahs-.+thhhhahllGWGhPslhlssWshs+...............hhats............spC.Whsp..ptshh....WlhpGPlhhslllNhllFlpllplLhpKlp......ssphucpcptph.................................hs+ooLlLlPLLGlpall..hhhhsss......phlhhahphhlsSF....QGFhV .......................................................................h..hthl.hlG.h.sl...S...lh...s....Lh...h..sl.h...h......h......h......h..............h.......+................p...............l.................p............s.....p.....c......s......h.....l....p........h....s.L....h.....h.......u.......h....h..l.......t....t..h....h..h......l...lt........h....h..tt......................................................................................................................C.p...hhs.hh.h.aa.h.h.....lu.s..ah.....W..h...h..l..Eu...l..h.L...a.....h.....h...l.shs...................h..s.p....p.....t.h.h..h.....h.....a....h....h..l.....G.........W....G.............h..Ph.l..lls..l...h.sshp.......................tha..t..s.......................................p..t.....C....W..hp........pt...h..hh.............a....h...h...h..u.P...h...h....h..h.l.h...........l.N....h.l..h...h.l.h..h.....l.h..h..l....h...p.php..........tt.p.h.t...p.t.p.p.hh..............................................................................hhh.s.s..hh....L..h.....L...h..G.....l.....pahh..........h.h.....h....h.......tp..........t...hh....hh.hh.hh...hsuh....Q..Ghhl.............................................................................................................................. 0 852 1057 1672 +1054 PF02949 7tm_6 7tm Odorant receptor Bateman A anon Pfam-B_436 (release 6.4) Family This family is composed of 7 transmembrane receptors, that are probably drosophila odorant receptors. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -11.67 0.70 -5.57 36 3836 2012-10-01 21:54:26 2003-04-07 12:59:11 15 33 114 0 2186 4221 0 248.70 14 75.32 CHANGED shsphhpslphhssshsshhKhhhhhhpppchtclh......ptLppl.tcsh.ppp-phhhtphhp...ptp....hlhhhhhhshshhshhsh.....lshlhttpt..............sh.hhh.......h.hsap.....sph.aah.thhhphhshshsshttlssDshhhhhhhhlshphclLpt+lcpltts................................................................................ptsppp.....ppLtpslppHppll..................chsptlpshhshshhspFhssuhllshshhplhhhss...shphlhh...lhahhshhhQlFhhCahuspltppupplspAla.pss.Whs......tshch++hllhhht+sQ..+shplpAss.hhslsLssahslhphua .....................................................................................................................h..........................p.h....hh................h......h................t...h.............................................................................hh................hhhh......h.h...h..h..h...h...h....h.h.......h..h.h.h...................h....h..h...............................h.....h..h................................h...h...............t........hhh....hhhh.p.h.h...h.h.....h...hshh.......h...........sh.s...s.h........hh.h...h...h.....h....h.t...h..p...hph..l..t....p.l.pp.h.t..........................................................................................t..t..................ttl...h.t....h...lphHttlh................................................................ph.hp..h....p....t..h.h.t....h....h..hp.h.h...h....h..h.h.l...s..hh...h.............h...h..h...h........................h...h....h...................h...h..h...h...h...h...h....h...h...p.h..h....h.hsh..h...up....l.................t............st....pl.hts..h........a...p........W....h.t..............hs......p.....pp.......lh.hh.h.psp....p..sh...hpsh..hh....h..s..h..hhth......................................................... 0 582 712 1980 +1055 PF00207 A2M Alpha-2-macroglobulin family Finn RD, Sammut SJ anon Prosite Family This family includes the C-terminal region of the alpha-2-macroglobulin family. 20.40 20.40 20.40 20.40 20.10 20.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.07 0.72 -4.41 131 2819 2012-10-03 02:52:13 2003-04-07 12:59:11 17 148 1230 58 881 2534 98 88.80 26 5.98 CHANGED oWlWpsh.ls........t.s...GptslshplPDoIT.oWphpAhulosst.........................GlGlups...plpsh+sFFlslpLPYSlhRGEplplpssla.NY.hsps..lp.lpVpl ............................................hha...hhs.............tp....Gpspl.shh........lPDo...lT...pWc.lh.A.....h.u.h.s.s.s.......................................................s.hGhscs........plpshp...shhlphshPh..ltpG-p.hplthslh.Nh...h.sp.s...p.hpl............................................ 0 218 372 622 +1056 PF01835 A2M_N MG2 domain Bateman A anon Pubmed:16177781 Domain This is the MG2 (macroglobulin) domain of alpha-2-macroglobulin [1]. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.41 0.72 -3.75 175 2926 2012-10-03 16:25:20 2003-04-07 12:59:11 14 140 1451 60 921 2616 150 95.90 22 6.25 CHANGED psalhTDRslY+PGcsVph+sl..shs.................t............slp...lplhcP.sGppl.pphhh............Ghhphs.aslspss.hGpaplpsph.........................tstthsstpFpV ..............p.halhTD+slY+PG-s.V.phpsl..hhchc.....................................hpsh.ss....pslp...lp.l.....h.cP...sGp.h.l.pph.s....................t..pp...Ghh..phs..at..L..sps..s....s..tG....taplpsph...............................t.sphhphpFpV............................................ 0 235 407 669 +1057 PF01356 A_amylase_inhib Alpha amylase inhibitor Bateman A anon SCOP Domain \N 25.00 25.00 25.00 58.00 23.50 23.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.41 0.72 -4.18 6 15 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 13 7 1 19 0 65.90 51 70.74 CHANGED upPAPACVchapSWRYTpVsNsCupsVSVTVsYpDGspuPCtslsPGslsTFu.GYGTpuNaVpulshC .t.pPAPACVchapSWRYTsVsNsCussVoVTVsYpDGppuPCRslsPGshsTFu.GYGTpGNashulthC 0 0 1 1 +1058 PF02137 A_deamin Adenosine-deaminase (editase) domain Mian N, Bateman A, Iyer LM, Zhang D, Aravind L anon IPR002466 Family Adenosine deaminases acting on RNA (ADARs) can deaminate adenosine to form inosine. In long double-stranded RNA, this process is non-specific; it occurs site-specifically in RNA transcripts. The former is important in defence against viruses, whereas the latter may affect splicing or untranslated regions. They are primarily nuclear proteins, but a longer isoform of ADAR1 is found predominantly in the cytoplasm. ADARs are derived from the Tad1-like tRNA deaminases that are present across eukaryotes. These in turn belong to the nucleotide/nucleic acid deaminase superfamily and are characterized by a distinct insert between the two conserved cysteines that are involved in binding zinc [2]. 19.80 19.80 21.20 19.90 18.90 18.60 hmmbuild -o /dev/null --hand HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.43 0.70 -5.12 59 781 2012-10-02 00:10:39 2003-04-07 12:59:11 13 25 275 2 446 770 16 308.00 27 55.17 CHANGED oluTGsKslss..pphuppGph....lpDsHAEllARRuhhRaLhpplthhhpt......t.tpsl....a..............h.p..tssth........acL+sslphaLYlSpsP.CGDAsl...tp.............................................................................................................................................sstthlsscsss....tsh.shpGll.htsspu...-phhoh..SCSDKls+...........WsllGlQGuLLuphl..cPlYlsolllu..t.........hpts......phpRAht..pRh.....................tth.tsaphpp.Phht.......t.......................................................................tspsppptpsss.............................hSlsWsh...............sshplplsss....ppGp.........sshtstSclsKtshaptatpl......ht............ppphts............sYt-hKpts...ppYppsKpplhpt......................tshssW.lpKs.- ...........................................................uluTGs+Clst.......phh.s....pG......h..............lpDsHAEllARRuh...h.R.aL....hpplthhh...st................ppol..a...........................................................t.....t..t..s..............................................................apL+.pslphahYh.S..psP.C...GDAphhs.hp...............................................................................................................................................................................................................................................ssttslss.p.s.s...s..............sh..ph.Gll.............p.u.....-.....p..hhoh..SC..SDKls+...........WsV...lGl...QG...uLL..ophl...pP.......lYlpol..llG........................hptt.......................................thpRA..h..h....pRh...........................................t.htth...athpp...shht...............................................................................................................tsptpt.hpsss.............................hS.ls.Wsh...................................ss.pl.pl..hss..ppGp............................tshtt.SplsKtthhthahp.l..................ht.h.....................ttth.t................................sYt...phKths..............ttYpt.s..pp.hhp........................shssW.htps.......................................................................................................................................... 1 123 188 317 +1059 PF00324 AA_permease aa_permeases; Amino acid permease Finn RD, Bateman A anon Prosite Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 478 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.47 0.70 -6.16 26 17705 2012-10-03 01:44:59 2003-04-07 12:59:11 16 47 2920 0 4452 25995 1856 417.10 27 85.04 CHANGED clhhhuluuslG..sGLalusuhsltpuGssus..llua.llsuhhhhhhhhuluEhushhP.....puGuahsaus+hlus.phGhssGhh......ahh.hhhslsh-lsssshhlpaW...hscls.hhsh..................sslhhsllhhlshhul+hauchEah..hshlKllsllsFlIlu...hhhhtsstssps.............t.hhtsus.....hsss.........htshhushshshFuas.GhEhluhsAuEscsPpKslP+uhppslhplslhYl....................huhhhlshhlshsssshhst...s...........................ushshhhphtslsshssllshslLsusLSuusuulasuoRhlhuLuccshhP.....phhpths+pusPhtulhlohhhuhlsh........lhsthsss..hlhsahlshsulsshhsahhhhhshh..taRpshthpstthstlshchhhhshhshhslhhlhhlhlhhshhhh......hs.......hssts.htsalshhlhllhhhshthhh+phhsphhh ................................................................................................................................HlphIA..lG.G...s..I.......G...sG.L....F....l......G..u..u...t.........s.........l...p..............A......G................P...us............llu.Y..hl.s...G...h.........h............h......a......h...l..M...p.u.L..G............E..h..s......h.....t.P............ssG........S........F......s................s....a.......A................p......c......h..............l...........u......s......h..............h....G..........a....h..s.G.Ws.................................Y.a.h....t....a............h......l.....s.........s....h.........s........-....l......s....A......s.......u.....h.......h...h.......p......a.....W.............h.....P....s....l....P......t.W.l..h..................................................................s.h.h.h..l...h...l....l....h.....s....l....N.........l....h....u....V....+...h....a...G....E....h........E.....F..W........F...u...h....l....K...V....l....s....I..l....s.....h...I.lhG............h...h..h.h...h.s.s..tssst......................................................ht.h..h.p..p..Gu....................................h....s.....s..G.......................................................................h......h......u....h......h....h....s....h.....h...h...s....h..F........u....a.........t......G.....h....E...l..l............u.........l....s....A..........u.E....s..c...............s.........P....p......+................s...........lP...+..A...........l...p...p...l...h...h...R....I....h...l.....F..Y.l.........................................h..u...l....h...l...........l.......h...h...l..h........P....a....s....p...h....s....h....t..s................................................................................................S..P..F..l...h...h....h....p....t.......h.......G.............l...........s.....h....s.......u......s......l........h.....N....h....V.....l.......L....o.....u....s....l.........S..........u....s....N.....S.........u....l....a.....u.......s....u....R...h...L....a.u.L..u....p...p....G.........A..P...............................ph..h...s.....+..h.....s.....+.................p.........u....l..........P.....h.....h.....u.....l....l....h.....o........s....h..h....s..h...l.u.h..........................................................................l...h..s.h.h.sst...........ps..a..h...h.....l....h....s.......h...u....s..........h.....s.......h...l....h.....s......W...........h.......h..........I...h...l...u..p.h..............p.a...............R............+.............t...................................................p......s.............t..................h.........t.................p....h..................a........+.........h...............h.......h........s..........h.......s..........s..........h.................h.......s.......l.......h.........h......h...h.....h...l........l...l......h...h......h...h..t.............................................ht...........................................................................h............h....h...........h..h...h..h...h..h.......h....................hh...................................................................................................................................................... 0 1009 2148 3513 +1060 PF03306 AAL_decarboxy Alpha-acetolactate decarboxylase Mifsud W, Sammut SJ anon Pfam-B_3661 (release 6.5) Family \N 25.00 25.00 25.70 25.60 21.30 21.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.12 0.70 -5.30 71 1480 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1246 4 233 763 52 214.90 42 90.92 CHANGED laQhSTluALhsGlY-Gsholu-LLcHGDaGlGTFspLDGEhIhlDGpsYQhcuDG...s.sp.ls.ssp.psPFAslThFpsc.hphphtpshshpplpphlpphhs.upNlFhAl+lcGpFppl+sRoVsp.QpcPYsshs-sscp.QshaphpslpGTllGFaoPpahp.GlsVsGaHlHFlo-D+phGGHlLDapl.psuplplshhsphplcLP.pstsF....hpuclshps ..........................LaQauTLuuLhuGLhcGTholsELL.cHGD.h.GlGThsulDGE.lIhL.DGcsY..p..sp..u..cG...p.hh.c.l....p....s.-............p...hsPaAslT.Fps-...hpa...p..................p..p.p.h.o.pcplpt....pIcph....h.....uc....NL....FtAlKI.pGpFp+..h+lRhhPp...QptP.Ys.phh-succ..QPEaptps.lpGolVGF.aTP-hac.Gl..us..AG..aHlHFlsD....D+saGGHVhDFtl...ccshl-..ltshsphc.+hPhpspsFhpAclshcs................................ 0 52 122 182 +1061 PF04611 AalphaY_MDB Mating type protein A alpha Y mating type dependent binding region Kerrison ND anon DOMO:DM04516; Family This region is important for the mating type dependent binding of Y protein to the A alpha Z protein of another mating type in Schizophyllum commune [1]. 25.00 25.00 57.40 234.00 20.90 19.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.31 5 6 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 2 0 1 6 0 146.80 51 15.97 CHANGED MsDpLusL+uISAsAKuMsulAtSRGApssPsPhssTsV+..FDPLPoPsLDuLRoRLp-A+LPPKulKSALuAYEcACuRWRp-L-EuFcpTA+SVSP+NLHLLssLRaRLYTcQVEKWtsQVLQVPE+W+AEMEKQRAHIsATMGPu Ms-hLusLpuISAsAKsMhulAtSRGApss.pPhssTss+..FDsLPsPsLDhlRoRLp-A+LPPKulKuALuAYEpACuRW+p-L-EuFcpTA+SlSP+NhHLLspLRhRLYscQVpKWhhQVLQVPE+W+AEMEKQRAHIsATMGP.s 0 1 1 1 +1062 PF03417 AAT Peptidase_C45; Acyl-coenzyme A:6-aminopenicillanic acid acyl-transferase Bateman A anon MEROPS Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.28 0.70 -4.90 18 964 2012-10-03 21:14:07 2003-04-07 12:59:11 11 16 794 14 316 1078 61 218.40 21 58.87 CHANGED hpscss.....hlu+NhD..htPthhssphhlhts..............sGasslsh...supl..Gps.GhNcpGLshsh........Nhhph+ph.ssGhsphhltRhlL-.ssols-AlchLp-h....PctuuhsalhlDpstphshlElsssstsh......sl+css..thhhpTNHh..........................ppshppts......pphhcsShpRhp+hpphhspttss...ppshchhsDtpst.hs.....pphsshtsTla...oulaphtstphphslupsst ..........................................................................hp....t.....hhs+N..aD........hps..t...h...h..s...t...t.h...hh.hh..................................suh.s..t..l..s.................su.tl....Gp...h...t.GhNcpGLsh..sh...................N.h...h..p.....t....c..p...s..........s..s.....G.......h......s..t..h...h....l...h..Rh.....lL-.spslsEA.lp....lL...cch..............sttu..u.......h.sh.hl..h...D...p.s.....t...s...t.sh..l.E...hssp.sh..........................slt.ss.......t.hhp...T.NH.a......................................................t....h....h...p...p....s......................p.h...hp..p...S..h...p..Rh.......t.+h...tp...hhs.pp.sh.............ttsh.......p.......h.h.p.s.t...t.....hs............................h...p.....p.....h.............s....T..lt.............oshap.p..phph.hh.st...h................................................................................................................................................ 0 115 199 275 +1063 PF02496 ABA_WDS ABA/WDS induced protein Mian N, Bateman A anon Pfam-B_2496 (release 5.4) Family This is a family of plant proteins induced by water deficit stress (WDS) [1], or abscisic acid (ABA) stress and ripening [2]. 25.00 25.00 28.10 28.10 23.00 19.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.93 0.72 -3.81 17 393 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 64 0 39 374 0 70.70 70 52.27 CHANGED .cY+KEEKHHKHhE+lGcLGAsAAGAaALaEKHcAKKDPEHAH+HKIEEElAAAAAlGAGGaAFHEHHEKKEAKcEpcEu ..........-YcKE.KHHKHLEclGcLGAVAAGAaALHEKHcAKK.DPEHAH+HKIEEEIAAsAAVGuGGaAFHEHHpKK-A+cctct............... 0 1 18 30 +1064 PF00950 ABC-3 ABC 3 transport family Bateman A anon Pfam-B_1591 (release 2.1) Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.50 0.70 -5.09 16 7614 2012-10-02 17:14:55 2003-04-07 12:59:11 12 12 3931 0 1534 7023 2698 251.50 28 90.95 CHANGED pacahppAllsulllulssulLGsallL++hSLhGDulSHusLsGVuluahLuls.....hhhGAhhhulluAlshsalcppo+l+pDsslGIlhushhulGllllolhpt....spssLtpaLFGslLulsppDlh.htllssllLlllllha+chlhhoFD.shApshGlslphhphhLhhLlulslVsulpsVGsILVhAhLlsPuusAhhhs+shcphhllAsllGhloshsGlhlSahhs.suoGssIVlltshlFlluhhht .................................................................................................hpFh.pAh.ls.ulhl..u..l.s.s.u.h.l.G.s..a..l..l..lRp...h...ohh....G.D....ul...S.H..u....sL........s....G.....l....A...l...u..h...h......l...u.hs..........................hl..u....u.....h...l.....h...u...l.....l...s...A....l....h.......l....t....h......l.......p.......p.....p....s.......p....h...p.....p....D.......s.....s.....l......G.....l....l...h.....s.....s...h...h.....u...l......G.....l.....l...l.....l.o.h.hs...................sss.s.L.......s...h...L...F...G..........s..l...................L............u............l............s.........................p.............D............l............h...........h............l...........h...l....lu...s.............l......l.........l...h.....l..l.h.lh...a...+...p....Lh.hhoF...D.ph...Ap..s.t.G.l.s....s....p..hl.ch.l...hhhllulslssuhpsVG...slLlsuLL...l...hP....u.A....s.A.h.h.l....s..c..s...h..p.....p....hhh.l..us.hl.u....h....l....u.s....h....h......G........l....h........l...S.............a....h........h.............s......h...........s.....sG.....ss.I.Vl.h..ts...h.lFllshh.t................................................................. 0 472 958 1282 +1065 PF01061 ABC2_membrane ABC-2 type transporter Finn RD, Bateman A anon Pfam-B_865 (release 3.0) & Pfam-B_31 (release 15.0) Family \N 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.52 0.70 -5.17 298 15925 2012-10-03 10:13:34 2003-04-07 12:59:11 19 140 4416 0 7264 20686 4522 204.80 15 46.78 CHANGED pphtslhtRphh..phh+ss.ht.h........hphhps.llhsllhGhlaht.............t....thh.th.................huhlhhslhhhsh.shssh.shhhpppshh.h+phstshhphhshhluph...lsp.lshshlhshlhhh.lsah....h...h....uhpht..........h.....hhhhhhhhlhhhhhsu.huhhluuhs.shp.sshls.shlhhshhhhuGhhhshspl....s.....ahp..a.hhhlsPhsashpu.hhhs.h ..............................................................................................................................h..hhhlhh+phh....thh+.s......h...h..h...................ht.hlt..s...h..l....h....h.l..l...h.....u.h..l..a.h.t.............h..........s..t.thh...h............................................................................................................................s.uh.lhh.h.h..h..h...h....s....h........s....h...h........s.......h........................h........h..h.....h.....t...p....p.......shh....h.......+............h...s....t...........s...h.........s....h.....h.s....h.....h.....h..u.ph..................l.st....h...s.....h.........s...........l..l...h...s...hl.h...h...h...l...s..hh...........................h.....h...........................u.h.pht.............................th...........hhh.hh..h.h....h...l...h.s.h....h.......hss....hu.......h.h...l....u.s..........h...s........s..h.....p....s...s..s..h........l..s....s....h.l....h..h....h..........h...h..........h...h.....u...G.....h..h.h...s......h...s.........th......P....................h...h.p.........h..l.h.....h.h.sP.htahhpu.ht.s................................................................ 0 2247 4566 6342 +1066 PF00664 ABC_membrane ABC transporter transmembrane region Bateman A anon Pfam-B_2 (release 2.1) Family This family represents a unit of six transmembrane helices. Many members of the ABC transporter family (Pfam:PF00005) have two such regions. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.40 0.70 -4.99 70 51056 2012-10-02 13:23:42 2003-04-07 12:59:11 18 190 5230 28 16541 42384 7093 265.40 13 43.23 CHANGED lhhshlhthlsshhthh...hshhhuhhhsh.............hsssstp....................psshhhhhhh......................hlhhhhh..hshhhhsh.has..htthst+lptplhpp.lhc.phhp..h..s......pppssGplssRlosDssplpssl.stplhhhh.tslhhhlsshllh...hhh..uhpLsLlhl.hhslhhhhshhh.sphhpphpppppps.........hsp.hss....hhpEslsulcTVpuastppthhpcappthpp.hpptshptshhsshhhs.htphlhhh.h.shshhaGs..hl...shtsths..ssplh.sshthhthhttsl .................................................................................................................................h....hhhhh.h....s.h.hthh.........s.h...h....h..t.h..h.ls.......................h.tt.t..................................................................t..h.h..h..h..hhh....................................................................h.h..h..h...h....h..h.......t.s...h.......h...h.....h...h...h...h..hh...................h.t.t....h....u....h....p.....h....t..........t......p....l....+..............p.p.....l....h...p....+..l..hp.........hs.h....s.hh..........................pp.p.s....s......G...........p....l....h.....s..........+.......h....s.................s..............D..h....p....p....l.....p.....p................h....h...s.........p..h....l................t....h.................h......t....s..h....h..............h..h..l.....s.......s...l..h..h.h................h.h.h........s..h..t...l....s....l...............l..h...l...h...h...h..s....l....h...h...........h...h....h..........h.........h....h.....t......p..........t...........h.......p...p...h...t.......p.......p.h....p...p.t.....................................................hup...hs..s...............h.h.p...E..s....l....p....G...h..p.....s......l.......+...s...........a.....s......t.........p........p........p...........h....p.......p....h.....p....p..........t................p.......p....h.......h....p...t...........t......h............p.......h........t........t.....h....t.............s.....h..h.....t..s.....h....h..t.........h...l......h........h.......s.....h....s...h.....h.h..h..h....uu..........hh..............h.h.t.s......p.....h...s........hG..t..h...h....s.h..h...h.h..h....h...................................................................................................................... 1 5333 9845 13729 +1067 PF00005 ABC_tran ABC transporter Sonnhammer ELL, Bateman A anon Prosite Domain ABC transporters for a large family of proteins responsible for translocation of a variety of compounds across biological membranes. ABC transporters are the largest family of proteins in many completely sequenced bacteria. ABC transporters are composed of two copies of this domain and two copies of a transmembrane domain Pfam:PF00664. These four domains may belong to a single polypeptide as in Swiss:P13569, or belong in different polypeptide chains. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null --hand HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -3.87 55 363409 2012-10-05 12:31:08 2003-04-07 12:59:11 22 730 6447 300 93265 270413 81790 147.80 27 41.67 CHANGED LpslshslptGchlullGtsGuGKSTLLphlsGhhpP.....spGpl.....................hhp.spsht...........hpthcppluhl.Qp..splhspho.lt-slh.s..........................................hhthtppttpsc........hpphlpplshht..........................hhcphl..............tpLSGGp+p..RlslA+slhppsplLlLDEPTs .....................................................................................................................................................................................................................................................................................................................................................................................................cslsh.p.l...p...p..G..c.h....l...u....l.....l...G....s..s............G.......u....G..K........S.....T....L.....l.........c.....h.....l....s......G....h...h....p..s........................s.s.....G.....p...l....................................................................................................................................................................................h..l...s....G....p....s...ltp....................................................h.t....t..h.....p.....p.....p.....l.......u....h.....l......h...Qp.........hs.L..h...s....p...h...s....lt-slthsh....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................thtttptppc....................h.p...p..h..l.....p...t......l....s..l.t.t....................................................................h....p....p...h......h........................ppL..S....G...G........p.......+......Q.............R........l..........s..l....A.....R.......A........L...........h............p............p...........P..........c..........l......l.......l.l.....D..EPT............................................................................................................................................................................................................................................................................................................................... 0 28903 57358 77150 +1068 PF00561 Abhydrolase_1 abhydrolase; alpha/beta hydrolase fold Bateman A anon MRC-LMB Genome group Domain This catalytic domain is found in a very wide range of enzymes. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.07 0.70 -4.80 48 9019 2012-10-03 11:45:05 2003-04-07 12:59:11 15 58 3408 175 3041 47284 15466 242.40 13 59.97 CHANGED acllshDhhGhGtSs..................................................hsthshpshsc.lptlhp.phshpp......hhllGaShGGhluhthstphsp....plpsll.hhss......................................................................................................................hhhtpshhtt............hh.thhhshhhshhhsshpthhstth................htshpp..hsphhptht...hpshhp....shthsshht.................htthhttshhthhpp......hpsPslllhuppDp...hhs.pst.t.htphhsp.......s..phhhhss..uHhs........hhppssph..sphlhs ..........................................................................................................................................................................................lhhh.s...t.G..s.sh..us....................................................................................................................tht..h....t....p....h....s.......h......p....c.......h......s.....t.........s........h........c......t.......l........h...........p......t........h......G.......h....cp................hsh....l.G.t.S.h..G.G....h.....h.....s....h....t....h....h.....s......p...h.Pp.................pl....p....s...h..l...h.h.us.h..............................................................................................................................................................................................................................h...h...t..t..t.....ht...............................h...t..h...........h...t...........h......h....t................h.....h......t...............t................h....s.............h......................................................................................................h............t..................................t..................ht...................hh...................................h..h....h.......................................................h................t...s.......................h.tt.....................................ht...h....h....h....h..h.....t..h.....t.....p..D.................h.h....s................t..........................t......................t......h.........t...............................h........h.........t.......tH................................tt........................................................................................................................................................................................................................................................................................................................................... 0 897 1658 2474 +1069 PF03806 ABG_transport AbgT putative transporter family TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 19.60 19.60 19.60 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.73 0.70 -6.50 9 1493 2012-10-02 15:12:49 2003-04-07 12:59:11 8 3 1103 0 203 1520 191 425.00 40 96.44 CHANGED RFLshlE+lGNtLPcPlhLFhhhhllLhVsSuIhSshuhSsssPhs.Gst...........hlplhsLLos-GLthlhsshlcNFouFsPLGlVLVsMLGlGlAE+SGLluALh+shlppsP++lloshllhlGllSpsAuDAuYVVL.PLuAhlFhulGRHPLAGLAAAaAGVSGGFoANllluthDsLLuGhTppAAphIDPsa..ssNPhsNWYFhsASshVls.luhaVT-KlVEPRLGsapss.tppptspht...lTstE+KGL+aAGluhllhlALh.shsllPtpuhLRs.cssslt.soPFhpulVshIhllFllPGlVYGhlstol+sp+DVsshMu-uMuoMGsYIVlsFFAAQFVAhFsWSphG.llAVtGAchLcshsLsG.sLlluhIlluuhlNLhIGSASAKWulhAPIFVPMhMLlGauPEhoQAAYRlGDSlTNlITPhMsYFsLlLshsp+Yc.chGlGTLlShMLPYSlsFhlsWhlhhhlWhh.LGlPlGPGushtY .....................................................................................................................................................................................................................................................................................hLshlEhhGN.tlPcPhhLFhhhhlhlhlhohlh..uh..hs.h.s..sh.pP.............t..t......................................l...hl...h...s..L..L..ss..-....G....l....p..h....hlss.hlpNFs.sFs..P.LG.hl.......LshhlG..l.G...lA..EcoGhlssh...hhthl.t...t..s....s....t...p....h..lo....hl...l...h..h.ulh....u.p....h..A...u.D..u.u.....h....V.....l.....l...PluAhlFhuhGRHPlAGlhsAaAuluuGas...ANl.l.lss.hDsLLsGhop......t......A...A.......p...h..l...s..s...s...h......pl..s..s...h.sNaaFhhsSshllshlshhl.T...-Kl....l...c....P+..L............u.p........h.....p......s..........s...................................t......p........p.........h................p.................................lT.t.Ep+uL+h.A.s.l.sh.ll.h.l......sl.l.hhh.h.....lP..................p...s.uh.LR..s...st..s...........t.............h..l............h...suPhh.p.uIl.slIhlhFhlsGlsYGhsstph+....sp....p....Dlh.p...................hMsc....s....h....p.s..M..u.s.a.I.V....hsF..hsAQFlA.h.F......s.a.S..N..h.G..t..hhA.l.....tG..Ac..hL...c.s.......s..hsG......s..hhlu......h.l.ll.s.u.h.lNhhluSuSApWulhAPIFVPMhMhl...G..hpPthsQhhaRlGDSsoN.l....oP....h..sah..sLll.s.a....h..p.+Y.....p..............c.............p....ht..lG.T.lh..Sh.hlP.Yol.hh.hl.sWhlhl.l..hWah..lGlPlGPGs....h............................................................................................. 1 57 123 167 +1070 PF02230 Abhydrolase_2 abhydrolase_2; Phospholipase/Carboxylesterase Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_1382 (release 5.2) Domain This family consists of both phospholipases [1] and carboxylesterases with broad substrate specificity, and is structurally related to alpha/beta hydrolases Pfam:PF00561 [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.31 0.70 -4.90 12 2930 2012-10-03 11:45:05 2003-04-07 12:59:11 11 32 1915 13 1262 4011 3116 197.40 21 84.53 CHANGED hs..tlhsstp.tpsslIaLHGLGDsu.....cuhu.sht.thsh.pspaIhPpusp.hPlshstGht..uWF-lhshs.st..p.spssltputpplppLl-t-hcpG............lssscIllGGFSQGuhluLassLop.p...luGlluhSuhhshttph.pt.stss...phPlhpsHGppD.llPhthGttstchLpshht..psphpsapthuHu.sspphpsltpalpphl ....................................................................................................t............t....t..p..t.h..l.l.h.lHG....h....Gsss.................ph.h......s.........h..................p...........................h.......................h...............t............h......p.h....l..h..s..p..u.....s.......h........................s..........s...............s............t.....G.......h....................................t....W..........a..............s......l.........s.......h........s...........ps........................c.....t.........t............tl...p..t.......s.....h.t...t.l..t.p.....h..l..c.p....t....h...p.p.s..........................l.s.s.p..c.l....h....l.sGFS.Q..................Gu.......s.h.u.............lt..s...s...........h...........p...........t............s.........t..........................h....u..........u...........l.....l...u....h....S...........u..............h..................h...................s.................................................p...............h................t.........t......................................p..........s.......................t.............t......h.........s.....l......h.h.h.HG.p..t.Ds.l.l...s.h.t..h.u.t...t.s.t..p....h.Lpphuh.............plph...c...h..........h...sH......t....l..s.......p..php..hhpalt...h................................................................................................ 0 362 705 1015 +1071 PF02517 Abi CAAX protease self-immunity Bateman A, Bashton M anon Pfam-B_1073 (release 5.4) Family Members of this family are probably proteases (after a isoprenyl group is attached to the Cys residue in the C-terminal CAAX motif of a protein to attach it to the membrane, the AAX tripeptide being removed by one of the CAAX prenyl proteases). The family contains the Swiss:Q03530 CAAX prenyl protease. The proteins contain a highly conserved Glu-Glu motif at the amino end of the alignment. The alignment also contains two histidine residues that may be involved in zinc binding [1]. While they are involved in membrane anchoring of proteins in eukaryotes, little is known about their function in prokaryotes. In some known bacteriocin loci, Abi genes have been found downstream of bacteriocin structural genes where they are probably involved in self-immunity. Investigation of the bacteriocin-like loci in the Gram positive bacteria locus from Lactobacillus sakei 23K confirmed that the bacteriocin-like genes (sak23Kalphabeta) exhibited antimicrobial activity when expressed in a heterologous host and that the associated Abi gene (sak23Ki) conferred immunity against the cognate bacteriocin. Interestingly, the immunity genes from three similar systems conferred a high degree of cross-immunity against each other's bacteriocins, suggesting the recognition of a common receptor. Site-directed mutagenesis demonstrated that the conserved motifs constituting the putative proteolytic active site of the Abi proteins are essential for the immunity function of Sak23Ki - thus a new concept in self-immunity [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.29 0.72 -3.85 1333 12845 2012-10-01 21:07:14 2003-04-07 12:59:11 11 37 3734 0 2636 9172 1289 110.90 21 43.17 CHANGED hhhh.......hhh......hlh.......................................................hslh...ss.l..........sEEllFRG....h...l........................................................................................................................hs...t............................................................................ltph............hh..........................hhulllou.llFul.hH..............h...........................hh.....h..................................................................................................lhh..hhh....G...l.....hh......u.....h....lh...........h........po..t..s...............lh.sslhhHhhhNhh .................................................................................................................................................................................................................................................................................................................................................hh...hhhhlh......................hslh...ssl........sEEl...laRG....h....l.........................................................................................................................................................................................................................................................................h..s..t.............................................................................ltpt.........ht..........................hhulllou..llFu.l.hH...hst..............................h.h.....h..................................................................................................lhh...hh..h....G...l.....lh......u........h....la...........................h......po...p..s...............lh....ss.l.hhHhhhNh......................................................... 0 995 1783 2287 +1072 PF03992 ABM Antibiotic biosynthesis monooxygenase Yeats C anon Yeats C Domain This domain is found in monooxygenases involved in the biosynthesis of several antibiotics by Streptomyces species. It's occurrence as a repeat in Streptomyces coelicolor SCO1909 (Swiss:Q9X9W3) is suggestive that the other proteins function as multimers. There is also a conserved histidine which is likely to be an active site residue. 21.90 20.90 21.90 20.90 21.80 20.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.39 0.72 -3.94 114 8140 2012-10-02 00:20:33 2003-04-07 12:59:11 11 50 3128 85 2223 5804 1847 77.50 17 63.08 CHANGED hhhlhsphp.lpsspt.ppahph.hpphsp.......hhtspsGhlphplh.........p....s..hpsssp.ahlhphWcs...psu.hpsatpo.........spapphtpt .................................................hhhhsphp..lp.s.spt...pphhpt..h..p...p..h..ht........th..ppps...G.hlshplh.....................p...............s...hps.s..s.p..h..h...h...hp..t.....Wc.s.......ppu...h.p.s.a.t.p.o..........spapth...t.......................................... 0 615 1296 1802 +1073 PF00887 ACBP Acyl CoA binding protein Bateman A anon Pfam-B_864 (release 3.0) Domain \N 21.90 21.90 22.20 22.30 21.50 21.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.06 0.72 -4.32 101 1677 2009-01-15 18:05:59 2003-04-07 12:59:11 14 62 583 29 963 1596 101 85.20 32 33.64 CHANGED hpppF-pAsphl+pLsp........pPos-phLcLYuLaKQAT.G.css..ss+P..Ghh..............Dhhu+sKW-AWppl..cGh........S+--AhppYlchlpp....lhp......phu .................................ptpFptAs.ph.l.c.p..Lsp...................pPss.-phLcl..Y..uL..YKQ..ATh.G.sss.............ss+P.....Ghh.............................D.hh.u+s..KW.....-.....A.Wppl....puh.......................o.p--AhppYlphlpplht...t........................................ 0 305 482 749 +1074 PF03255 ACCA Acetyl co-enzyme A carboxylase carboxyltransferase alpha subunit Mifsud W anon Pfam-B_1935 (release 6.5) Family Acetyl co-enzyme A carboxylase carboxyltransferase is composed of an alpha and beta subunit. 21.30 21.30 21.40 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.66 0.71 -4.64 15 3540 2012-10-02 13:07:06 2003-04-07 12:59:11 9 7 3419 3 719 2292 2130 133.30 51 41.80 CHANGED hhL-FEKPlhELEpKIspLcclup....cschslssclppLcc+hpcLp+cIausLoPaQRlQlARHPcRPoTLDYIptlh--ahELHGDRshuDD.AlVGGlG+lcG+sVslIGHQKGRDTK-pltRNFGMssPcGYRKALRLMchA- ........................................................p..L-FEpPlh-lct+IppL.p...h.tp............p.........t....l....s........h..s...p.ElptLcp.+.........p..htc.....c...l........a......s..sL..ssWphsQl....ARHPpRPhTLDYlptl..F..s..-..Fh..E.....LHGDRs.au....DD.t......AIVGGl..A........+.l.s........G........p........P........VhVIGpQ.K.G............+cT..K...........-plpR........NFGMPp...PEGYRKALRLMchAE.............................. 1 237 478 613 +1075 PF00871 Acetate_kinase Acetokinase family Bateman A anon Pfam-B_1595 (release 2.1) Family This family includes acetate kinase, butyrate kinase and 2-methylpropanoate kinase. 22.00 22.00 22.00 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.10 0.70 -5.93 11 6003 2012-10-02 23:34:14 2003-04-07 12:59:11 12 14 3886 38 1171 4042 326 366.90 38 95.86 CHANGED KlLVlNsGSSSlKFpLh-s........ptpsLhpGLsEpIhlssuhhhhp..stcp.tthsshssHptAlphllssLpp...thhpphs-IsulGHRVVHGGphFosSslls--llcsI+DhhpLAPLHNPApl.GIctstclhPss...pVAVFDTuFHpThPptAYLYulPhshhpcaGlRRYGFHGTSHKYVup+suchLsKPh-pLslIsCHLGNGuSlsAl+sGKSlDTSMGhTPLEGLhMGTRSGsIDPAIlsalt-ppshSss-lsshLNKKSGlLGloGloSDhRsl.-shpc.ucccAphAlchasaRlt+aIGpYhAsLt.splDulVFTuGIGENuuhlR-hhlpsLchlGlclD.EpN.h..phGccslISossS+hplhllPTNEElhIAp ...........................................................................................................h.lLllNsGSSSlKatlh...chs......................s.t...p....h...l...h...p.Gl...s....E.....p....l...s...h...p..s...u...h....h...p...................h..p....................................s..............s.......c.......p..........t....t.............h.....s....h..s......s.....H....p....tA...l....p.h.l..l.p.tLhp..........t..h..h...p..t...h.....s......pl.s..ulGHRlVHGGch.F.s.posll.....s..-.....cV...lp.p.I.c.c..hs..s.L.......A..PLH.NPAslh..........GIcss....p...cl..h......P.s.ls.........pVAV.FDTuFHpTMPctAa.hYulPh.ch.Ycc.....hslRR..Y.GFHGTSHpYVu...pc.u.A...c.h...L.....s..+..............s.......h..................c...clplIssHLGNG.u.SlsAlc.....sG+SlD.TSMGhTPL-GlhM....GTR.SGDlD.P.ull.a....l....h..........p.........p.........t.............s...........h...o........h.c.............c.l..............p....c...lLNKcSGLLGl.o.G.ho.o.Dh...Rs.....l...c...p.............sh...............t...........p.....G.........c............c......c...........ApL.A.h-hasaRltKaIGuYs.A.sh................t.........lD..AllFTuGIGEN.ushlRph..lh.p.t.L.t....h..h......G.l.p...lD.c.tN..t.......hhG..p..p...t..h....I...s....p...t.s..o.p.....l.h.s.hVI.PTsEEhhIA............................................... 0 390 748 994 +1077 PF02550 AcetylCoA_hydro Acetyl-CoA_hydro; Acetyl-CoA hydrolase/transferase N-terminal domain Mian N, Bateman A anon COGs Domain This family contains several enzymes which take part in pathways involving acetyl-CoA. Acetyl-CoA hydrolase EC:3.1.2.1 (Swiss:P32316) catalyses the formation of acetate from acetyl-CoA, CoA transferase (CAT1) EC:2.8.3.- (Swiss:P38946) produces succinyl-CoA, and acetate-CoA transferase EC:2.8.3.8 (Swiss:Q59323) utilises acyl-CoA and acetate to form acetyl-CoA. 20.70 20.70 20.70 21.40 20.60 20.60 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.20 0.71 -4.55 5 2652 2012-10-04 00:26:15 2003-04-07 12:59:11 10 14 1717 21 626 2072 95 179.70 26 42.46 CHANGED hpcpYp+Klts.PEEAAsLlcsGpHIchGGhhuAuTApulPcYLA+R+sEhspl+ssohIDLtshphloAuPps-hhcpcsuhlaR.ss+pouchsssslNpGllcasshaLSElut..hhspGFss........IDlAlIpTTshDcHG..alNhG..Vo......ssthKuIlEl........AElVllVlssssPalNG.YDthIsl-+V..DYIltDsEhsVshlP ..............................................................................................................................t........hhhs..st.cA.s.th.l..pps...........hlshut....su.sts.ls.....A..ls......p..h..............t...h.t...........h.....h..p......l..h.h.h.t.............s....s....t..t.h..h..t........t.hh.t.h+........sa....s.s.......h.....R.c......t....Is.....p......G..p......s....as.s.h....+LSc.lsp......hh.c.p.s...hhs.....................lDVAllp.........s..ush.sccG.....ah..s.....G..lu............sp.tpp.s.ll.Eh................uth.h...l....l.s.t.s.hP+.p.hu.hs...h..h....h..t..h..s+l...s.h.lh...ss........................................................... 1 243 430 562 +1078 PF00797 Acetyltransf_2 Acetyltransf2; N-acetyltransferase Bateman A anon Pfam-B_575 (release 2.1) Family Arylamine N-acetyltransferase (NAT) is a cytosolic enzyme of approximately 30kDa. It facilitates the transfer of an acetyl group from Acetyl Coenzyme A on to a wide range of arylamine, N-hydroxyarylamines and hydrazines. Acetylation of these compounds generally results in inactivation. NAT is found in many species from Mycobacteria (M. tuberculosis, M. smegmatis etc) to man. It was the first enzyme to be observed to have polymorphic activity amongst human individuals. NAT is responsible for the inactivation of Isoniazid (a drug used to treat Tuberculosis) in humans. The NAT protein has also been shown to be involved in the breakdown of folic acid. 21.00 21.00 21.10 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.49 0.70 -4.79 33 2001 2012-10-10 12:56:15 2003-04-07 12:59:11 12 6 1363 31 435 1384 26 229.20 28 85.40 CHANGED sL-oLpplhttHhpulPFENLslhhG.....csl.sL-lpslacKlVpp+RGGaCaEhNtLhthsLpplGFclshLuupVhhstsst..ssshoHhlLhV.sl-Gc..salsDVGFGus..phhtPlcLtsstsQspshu.hFRl.scpss....tahLpphppppWh.................slYpFslpPpshpDapsts.hahpopPsS.hFspp..hlsuhtss-.GphsLhupphohp....psst..thphphlsssEltcsLpphFslsls...tthl ..............................l-sLptlhhtahtslPF..EN.L.s...lh.h.s...........................p.l..pl...s........psL..hcKl...l........h......p......p..R....G..G.aCaEhNslFthhLpp..lGF.sVphlhu.....pV.........h.....h.........s............s....s.............s.............t..............h........s........s...t.............s.......HhhLlV......s...........l.........c.sp...........palsDVGFGut...h.htPl..L.....t.s......s....h.....s................s....s....t....s.....paRl..h........ppss.................ha.hl..p....t.......t......p......p..p....p....Wp........................................shY..pF..s....h..p.......p.....p.D..a....t.........sp...hast..p..p..P..pS....hF.tpp.........hl..h.......s....p.....h..........h........s.........s.......G.............+..............h.....s...L.s.....s..t..........p......hshh..................tss....hpppp.h.s....tphhphLpp.F.ultl........................................................................................................................................................................... 0 133 234 337 +1079 PF00328 His_Phos_2 acid_phosphat; Acid_phosphat_A; Histidine phosphatase superfamily (branch 2) Finn RD, Griffiths-Jones SR, Rigden DJ anon Prosite Family The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue. Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches.The smaller branch 2 contains predominantly eukaryotic proteins. The catalytic functions in members include phytase, glucose-1-phosphatase and multiple inositol polyphosphate phosphatase. The in vivo roles of the mammalian acid phosphatases in branch 2 are not fully understood, although activity against lysophosphatidic acid and tyrosine-phosphorylated proteins has been demonstrated. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.16 0.70 -5.12 93 4055 2012-10-02 11:42:54 2003-04-07 12:59:11 17 54 1206 64 1934 3581 41 323.30 15 63.62 CHANGED pLchVpllhRHGsRsPppshstp..t................................................................................t..h.t.hta...................................................................................pashshGt...........LTst.GttpthphGchhR.p+Ys.................tLhsst........pplhlhoostsRsltSApshhtGhh.sps.............................pshshthlsc...t.sshhhhst.........tsCsthpp......................ppsspthpphppthtpshs........tclsph...................................su.....shshtcshshhshshhppt.................................phpshssl.hsp.....tchhphph..............................................................................................................................hpslpp......hath.............uhspt....htphhGsshhsplhpplppshsppp..........................................................................phphhhhhuHDoslhslhs.sL....Glhpt...........................st.sPauuplhhEha.p.......................ssp...t....hhV+lhhs ................................................................................................................................................................................................lp.l.hl.RHG.Rs.Ph.tt...................................................................................................................................................................................................................................................................................................................................ta.....................................................................................................................................................................th.s..h.thGt.........LT....s....t....Gt......p....hht.hG.phhR.phas...................................................thhp..p.t..h........pplh.hh..u.s......s....pR...s.h.....to............A......ps.....h...h..tGhh.sps.....................................................................................h.sh....t.tht..p...................t..s.th......h.........................tt.ss.th..pp.................................................t.s...t..h..t......t......t....t..t..h...p.ht.....................phpp.........................................................................................tt...............h..t.h...t...h..ht..h...h......h.thhp...................................t.....h..s..p...h...hpt........tp.....tp.hp...............................................................................................................................................................h.t.ph.pt....hat...................................th.s.t.....ht...t.....h...u...t...sh.h...p.....l...hp.tl........tphtp.tt..............................................................................................................................................................................................................phh..hh..hu..........HDss.....l.hs.lhs..sL...........sh..................................................................................................p.ss.h.uuplh..hphap..............p.....................stt.......t......hlph.................................................................................................................................................................................................................................................................. 0 702 1050 1602 +1080 PF03767 Acid_phosphat_B acid_phosphat_B; HAD superfamily, subfamily IIIB (Acid phosphatase) Finn RD, Selengut, J anon Pfam-B_2784 (release 7.0) Family This family proteins includes acid phosphatases and a number of vegetative storage proteins. 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.47 0.70 -4.99 31 1773 2012-10-03 04:19:28 2003-04-07 12:59:11 9 10 1346 54 323 1170 350 209.70 26 84.65 CHANGED thpshhshtts.sthtCso....a+.hu...sEspNlhsapshstpshphstpYh..thppaps-.pslsppAhhhAcpht.ps.........sthsshlFDID-TlL.oNhPYhthps.h....Gsctass........pp........as.cWlppGpA.sulst..sl..chhptlhphGhcIhalosRpcs..p+ssThp.....NLpptG.................ap....shc+LlL+s.ps.ps...hpY...KsscRpplhcc..GYpI.............lt......hGDphsDlhG........sspu..................pRsh+LPNPhYhs. ...........................................................s...........................p..................t.tt.s....l......phsu..........hh.ph...p.httt...........................t.hAlshDlD-TlL..ss.ss..a....h..h...........st.c..sa.s.s..........................p.s.......................ah..ch.hp...p..u..t...s...hulPt..sl..phlch.t.p+GspIaalos.Rs...........p..........s.........t..........s.........t..........Thp..........slhp.t.s........................................................ht....sh..s.tl.l.h.t..s...pc....s...........p..s..............................Kts+pphlpcc......hpl................................................................hhh.....hGDs.s.Dhsu..........s+pssscs.......................h+hlhhsNshYts........................................... 0 69 187 257 +1081 PF00330 Aconitase aconitase; Aconitase family (aconitate hydratase) Finn RD anon Prosite Family \N 22.30 22.30 22.40 22.40 21.60 22.20 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.53 0.70 -6.02 11 14106 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 4231 25 4215 11379 9019 338.30 28 64.53 CHANGED TLhEKlhcuHlst.p..........cpusphLhl..DRhlhp-sTus.Ah.sLtsuGpslppssh..........ohhshDHsIsspsttp......Dlphptshs+cphphLppssKcaulthas.su...GIlHpls.EphhshPGh............TlVssDSHTsstGuhGuLAaGlGsuEsEcVhAsQslp.tpPKshtlclsGKLssGlTuKDlILplhGhlsscGGTGplVEahG-ulssLShpuRhTICNMuhEhGApsGhhs.DEsThcYLcups+A.cutph-cAhshhctLpsDcsA....paDpllcl-hsslpPplshspsPs.shslschssssp.p......t.t.cc..htahshhPt..h.tlcVchuhIGSCTNSphEDlppAAullKpt.....ttlpshshhhVsPGSc.V+sphE+-GLscIFp-sGhphhssGCosClG.s...sDhlps.......tspssoouNRNFEGRpssss+TH..LsSPshssAhAluG ............................................................................................................................................................................................t..h.p-..........h..h......h.......................t..............................................................h...DH..h.......................................................s...........h......h......................................tt..h...t.........................h.........ss.......Glh..H.h.........E..hhhshh............................................................shlssDSHTshhsu.hGhluh............G..........hGs......-.sth.shh.s.tsh.......hs.c.............s.....h..thphp....G.p.h.........thssp...............Dll............Ltl............h......t.......h....h.t......h......t.....t............s............h............G...h...hhE.....a.h..G........u..lt..t.Lsh.tt+hols.NMu.....................Eh.G.A.p.su.h.h..............D.p.............................s....h.......p.................Yl................p.................................p.......................h................t..........................t........................................................h.....................t.............................................................p..................h...........t.....c........ss...........as.t.h..l.p.lchsp...l.t.P.lshspp....Pp.t........hh...ls...th.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................t......ht..l......s.shIsSC.ossp...phhhuAtl...ltt..p.........................t..h..t........h..p..h.lsPuSt......l.....t.t......h........t.....t........t.....G.....h......................thh..p.h...........Gh.t...................h.h.......................u.Cs.......ChG.t.................t...ht.....................................h.hs.h.ossRNF.sR....t.........s...ph......LsSs.hssshAlhG..................................................... 0 1274 2607 3544 +1082 PF00694 Aconitase_C Aconitase C-terminal domain Bateman A anon Pfam-B_224 (release 2.1) Domain Members of this family usually also match to Pfam:PF00330. This domain undergoes conformational change in the enzyme mechanism [1]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.54 0.71 -4.04 18 9126 2012-10-01 19:37:30 2003-04-07 12:59:11 14 28 4295 33 2735 7034 4256 118.60 33 21.91 CHANGED hshalphcGhssstthSsssschth.tphpsshsphtlssthhtshchspspph.st-ps.......shhtss.tY+.tsscllVluscNaGsGSSREHAAhu.pthGh+ulIucSFucIacsNLhppGlLPLphsps ....................................................................................h...ah..pGh.h....t.....s.sspct.h..h.hh.tp.h..sp...hth..t..s....t....h..h...............s......h..........c........h......................t.....t.............................................................................shh.s..t.pY...p........s.s.s.l.........llhu.csaG....sGSSREaAAhuhp.hh.G...........l+sVIA....p..........S.....F.u.cI.ap...sNhls..G.l.L.Plph.t.s.................................. 0 835 1673 2276 +1083 PF01756 ACOX Acyl-CoA oxidase Bashton M, Bateman A anon Pfam-B_598 (release 4.2) Family This is a family of Acyl-CoA oxidases EC:1.3.3.6. Acyl-coA oxidase converts acyl-CoA into trans-2- enoyl-CoA [1]. 21.40 21.40 21.50 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.98 0.71 -4.92 37 1066 2012-10-01 23:33:27 2003-04-07 12:59:11 14 21 433 8 692 1082 31 165.80 23 25.84 CHANGED chpss.tshlcAachhstchlpcuspphpp..t.t.s.tpuaspsu.hphhpsu+hHs+hhllpsFhc+ls.....thsstsl+thLppLspLa.uhahlpcpuutFLptuhh..ospplshlpp.tl..cLhsplRsssluLsDuFshsDhhLNSslGpaDGclYcshachsppssssp...tscssaacph..LcPhLp+. ..................................................t.hhpsaphhtt.thltpssp.plp.p...........t..t.t....s.t.hpu.aN.p.st...hphh.....ph.u.pAHsch.hllct.Fhctlp...................ptsss.shpplLppLspLa.uLt.tl..pp.....p..............hu..tal.....pt........s........hl.......os.pp.......h.p.....t.lpp....tl..pLh.spl.....RPp....Al.sLVDu..FshsDhh.LsS........s...............lGt.D.Gph...Ytthht..h.t...............................t.................................... 0 256 392 581 +1084 PF00873 ACR_tran AcrB/AcrD/AcrF family Bateman A anon Pfam-B_578 (release 3.0) Family Members of this family are integral membrane proteins. Some are involved in drug resistance. AcrB cooperates with a membrane fusion protein, AcrA, and an outer membrane channel TolC. The structure shows the AcrB forms a homotrimer [1]. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 1021 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.68 0.70 -13.50 0.70 -7.15 16 19835 2012-10-02 18:57:54 2003-04-07 12:59:11 14 33 3142 84 5307 18191 12148 888.80 26 97.07 CHANGED hspFFIcRPlFshllAllIhlsGslulhpLPVstaPpIssPsVpVsssaPGASscslpssVTpsIEpshsGlcGlphhoSpS.usGpsslTlsF-pGsDhDhApspVQs+lptApspLPpsV.pp.ulsshcsups.lhshulsSs.......ssshsth-LpsasssslpstLuplsGVG-VplhG.sphuhRIhlDPs+LsphpLThsDVhsAlpspNspluuGpL......sspphphslhspsphpos--accIll+......stsGu.VRL+DVAcVElGu-phshhuphNG.cPAsslslphtsGANsl-sucuV+pclscLpth..hPpGlclshsYDsTsalcsSIcsVscTLlEAllLVhLVhaLFLpNhRATLIPslAlPluLlGTFulhthhGaSlNsLThhuLsLAIGllVDDAIVVVENlpRhltp.puhsP.........hcAshcuhsplptAlluhuhlLsAVFlPhhhhuGssGtla+pFulTllhAhslShlVAlTLoPAlCAhlL+stppscct...........thathFNphFctssptYscslstlL+psthhlllhllllsu.slhLasplPpsFlPppDpGhhhstlQhPsGsShppopplhpplpc.hlp.cpspVcslh.shsGhs..uG..pu.NsuhsFIpLKPhcER.t.pp.ostullp+hptphspl.suslhh.hss.slpt..htsssGhchplts.hhGsuh-sLppstpplhthhtphPs.L....scV+sstpsstPphplclDc-+ApplGlsls-IspslpsAhGu.phlssFhcpuRhhcVhVpsssphR.sP-slsplhVpsspGp.....hlPlouhAohc.shGssplpRhNuh.shpItups..ssGhSsGpAhpshpplhpp..LPsGhshpaoGhuhppptussptshlhslulllVFllLAAhYESashPlslhlslPLullGALLAhhlpGhs.slhstVGllsLlGlusKNAILlV-FAp-hhcpcGhslhEAllpAs+hRLRPILMToLAhlLGhlPLAluoGsGutsppslGhuVhGGhlsuTlLsLahlPlhYlllc ..........................................................................................................................................................................phhlpp..hh.sh.l.ls.l.h.l.h.l.h.G...........h...........h......u...........h...........h.p..........L.....s..........l...p.t.h.Pp.ls..s.l.l.p.s.s.a.PG.A..........ss.pplpppVstslEp.t.h.s...s.l.s..s..l..p.....h..p..S..pS..ptG....u..ls..l....p....F...p.........s.....s...s.........s...h...A...t..........p...V.....pp....tl....p...t.....s...........t.....s...p.....LP....p....s...l...tps.s..l...t.......................p.....s.......s....s....s.......l.h..h.h.sl.hup...........................................................t.h...s........t...p......l...p...s...h......h...p....lp..pl.p.p.ls..G.Vu.p..l.p.h...h....G..t.t.p.hthpl.ls.s..ppLtth.s......l..o..........s.c.l.h....s.u........lps..p.N.h....p..h..s..s..G..tl...........................................pttt.h..p.h....l.p.s.......p.s...........p.....h.......p....s...........h.c....c.....h..tpl..hlt...........................tt.s...G.......s.......................l.p.....LtD.............l...A.p.l.................p......h.............G....................s..............p.............p...............h.............p................................h................s................t.........................h.......................s....................G........p..................................u............s..s.......l.s.l..h.h.t................s.u.u.N.s.lp...s..s..ptlc....t....t...l.t....p....l....p..tt...............h....P.........t.............u.........l.........p............h...t....h.....h...h....D...p.o......h..l...p.....t..u...l...p...p.l.h..p....s.....L....h......u.h....h....l.....V...h..l....V.h....h.l.F..L...t....................s....h...R...u....s....l..l.ss.l....u.l..Pl..sl..l...s.o...h..h..h..h....h..h......h......G......h......o..l.N..h....l...o....L...h..u..l.s.lA..lGh....lVD.DA.IVl.l.E....N....l......+.h...hpp.....s.hss...................................................................hp.A.s.hpuhpplshsl.lu..hsl.s.lhs........VFlP.l.h.h..h..s...G...h...............s.G..t....h.a...p..t.hu....holsh..uh.hh....Shl..lul.s..l.s.P..h........h......s.....u.......h.........h......L...+........t....t..t.tpp................................................................................s.......t.....h...........h....p........t........h.....t.....p...........t.....Y......t.......t....h...l.....t......h...h......l......p.......p.......p....h.................h.......h.....h...h.....h....h........h..............h.........h.....h......sh........s.....h.....h................h............h........h........h...............l......s.....p...................p........F..h...P...p..pD....p..u............h....h.....h..h.ph.s............us...........o.......h..pp.s...........t.p....h.h.......p.p.......h..................p.......p.......h.........l..............t.........p....................................s.........t.............l.......p..............p............h............h.....s.........h........s........G..........h.........s.......................s..........s..................................t..........s................s.................u...............h......h.......l.........t.........L....c....s....h.....p......................p..................R..........................................................................t...............................s.............h...............p......t...........l..........h...t................c................h..................p.t.........t......h.................t..................p..........h.....................s..........s.t............h................h.........h.......h.................t.h..h.............................s..s.u...h.......p....h....l........t...........h....h.h.u.........s...h...p.t.Ltp...h.s..p..p....lh...t......h...h........t......p........s..t...l..............ss...l...p.....s..s.....h......t.....t.s......tsp.hplplD..p.ppAtth......Gl..sh.ss.l.s.p....s....l.....................p..s..s......................h...u.u....t..........l...........s...........p.....h....h.....p........t....s..........p...............h...p...l....h....l..........p..................h.........s.............t........p...........................R................s...................p.........s...l..p....p...h......h.....l.....t.....s....s...s...G.t....................................................................h..l...P...L...ush...s...p...h.p.....t...u.s.s.t.l.p....+..h...s.th..s..h..p....l.tu..sh.....................s.s...s...............h............s.....h........u.............p.......s....h................p........h......h............p........p..........h............h............p.................p......................................................L..............P......................s.....................G.....................h................t..................h................p................a..........s..................G.......t...s...........p.......p..p....u....h..............s.............p....h..........h...........h................h................h..sh.u....l.lhla.ll.Ls.................h..............a.c.S...hh..P.hh...lh.h...s.lP.hu..ll..G.........u.........l...........h..............u..l......h............l...............h..................s...............h......................s......................h...........s.............l............h....s........l..G......h.lh.L....h...Glss.+NuI.ll.......l........-.....a............h........p..........p.........h........h...........p..............................p..............G.............h..............s.............h...............h.............c............A.....h....h.....p.A.shhR.hRPIlMT.sh.shlhGhlP.l.............h............h............u..................t..................G...............s...............G.........u.t...h..p.............p.......s.......luhslhGGhlsuTlLsLh.h.lPs.h.ahhh................................................................................................................ 0 1569 3236 4342 +1085 PF05058 ActA ActA Protein Moxon SJ anon Pfam-B_5981 (release 7.7) Family The ActA family is found in Listeria and is associated with motility. ActA protein acts as a scaffold to assemble and activate host cell actin cytoskeletal factors at the bacterial surface, resulting in directional actin polymerisation and propulsion of the bacterium through the cytoplasm of the host cell [1,2]. 23.80 23.80 31.70 23.80 21.10 22.90 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.74 0.70 -13.05 0.70 -6.52 2 1025 2009-12-16 10:43:18 2003-04-07 12:59:11 7 3 37 0 1 762 1 335.50 65 99.41 CHANGED MRAMMVVFITANCITIsPDIIFAsTDSEDSSLNTDEWEEEKTEEQPSElNTGPRYETsREVSSRDIKELEKSNKV+ssNKADLIAMLKtKAEKGPN.NNNN...SEQotNsAINEEASGADRPslQVERRHPGLsSDSAAEIKKRRKAIASSDSELESLTYPDKsTKsNKKKVAKESlsDASESDLDSSMQSADESoPQPLKANQQPFFPKVFKKIKDAGKWVRDKIDENPEVKKAIVDKuAGLIDQLLTKKKSEEVNASDFPPPPTDEELRLALPETPMLLGFNAsATSEPSSFEFPPPPTD...................................sELEIhRETASSLDSSFTRGDLASLRsAINRHSQNFSDFPPIPTEEELNGRGsRPTSEEFSSLsSGDFTDDENSETTEEEIDRLADLRDRGTGKHSRNAGFLPLNPFsSSPVPSLSPKVsKISAPuLlSDITKKsPFKNPsQPLpVFNKKTTTpTVhKK.TPVphAPKLApLPsTKPQETsltENpsPhhEKQAETNpQsIsMPSLPVIQKEsTEpsKEEMKPQTEEKMVtESEsANssNGKpRSAGhEEGKLIAKSAEDEKsKEEPuNHTTLILAMLAIGVFSLGAFIKIIQLRKNs .....................................................................................................................................................................D.LIAML......KtKAEKGPN.NNNN.....uEQotNsAI.N..EEASGsDR........Psl.QVERRHPGLsSDSAAEIKKRRKAIA.SSD.........SELE......SLTY..........DKPTKssK+.K.VAKtSVsDsSESDh.....tSphpSsD.ps....KtsppPFFsKsFcKIKtAGpWshDKlscNPt....................cpEEVNAS.DF..P..PPPTDEE....L......R...LAL.PE.TPMLL....GFNA..........P.ssSEP.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +1086 PF00976 ACTH_domain Corticotropin ACTH domain Finn RD, Bateman A anon Pfam-B_1057 (release 3.0) Family \N 20.70 20.70 21.10 21.00 19.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.14 0.72 -4.11 13 1273 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 737 0 33 1256 0 38.60 83 21.34 CHANGED SYSMEHFRWGKPsGRKRRPIKVa.sNuh..E..-ESpEsaPhEh ..SYSMEHFRWGKP...V.G.KKRR..PIKVF.PoDA...E..EESSEhYPhE.h.............. 0 1 4 13 +1087 PF00022 Actin actin; Actin Sonnhammer ELL anon Prosite Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 393 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.26 0.70 -6.05 28 15168 2012-10-02 23:34:14 2003-04-07 12:59:11 14 83 3849 392 3857 12329 1523 241.30 43 92.30 CHANGED u--lsulVlDsGotss+AGaAG-DsP+..........slhPohlG.....+spsst......................................chhlGsp...thp.....csthplppPh.ccGllpsW-shcclWcashhpc.Lpss........P......................................p-+PlLlTEsshNspppREKhsElhFEpapsPAhalupssVLotaAsG+s..................TuLVlDsGsupTslsPVa-GasLp+ult+.sluGchLoppl.ppllpp.......h.h...........h...........................s.SapshtcppllpchKEslChVs.s....................phptus...........s.ssps......Y....cLP.DG...............................................p..phhhGs-RFplsEhL.....FsPshht.ptt................................GlscllhsulttsD..sDlRtsLhuslVloGGooLhsGhs-RLppElpph................s.ssh.....+l+lhAss...ER+ausWlGGSILASLu.oFp.phWlSKpEY-EpGss...lVc++Ch .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....................h...t.h.......................................................................................................................................t..p....p...........h....l...l.........o...E.s.............s.........h.........s.........s...........................t...p...R...c......p...............h....s..........p........lh......F..E.....p.a.s......h...s.u.....ha.l.......sh..pu..l.L.....u..l.....a...u...s..G.p.s..........................................................................................o.G..l.Vh..DsGcu..so...p.s..lP.l..h...-......G........h........s..l...s...p..........u...l......h......+..l...s......h.........u.G.c..c.lTp.hh.hplL.tp.......................p................................................................................................................................................................t...h.....t.....t.p.h..cl..l...c...p...h...K.....E...p..h..s..a.l.u.shpt.....................t.t..ts.........................................................................s..t..p..p.........a..........p..l....P...D...G..........................................................................................................................................................................................................................................................................p.......hl..p..l..u...t...E...R.a..p....s..s...E....hl............FpP.p...hs....p...........................................................................................................ul........h....h.h..p.....s..l..............p.......s...s................h.-....h....+....t.....t...l.h............t.s..h.lh....s.G.G...s...o.....hh...........s....h.........tRh.......tch......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1405 2078 3077 +1088 PF01643 Acyl-ACP_TE Acyl-ACP thioesterase Bashton M, Bateman A anon Pfam-B_928 (release 4.1) Family This family consists of various acyl-acyl carrier protein (ACP) thioesterases (TE) these terminate fatty acyl group extension via hydrolysing an acyl group on a fatty acid [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.67 0.70 -5.45 27 1578 2012-10-02 20:54:35 2003-04-07 12:59:11 12 8 1264 3 365 2638 846 228.20 24 89.64 CHANGED GLsa+psFslRsYElGhs+TAolETlhNhLQEsuhNHspslGh.sDGFutT.pMp+hsLIWVVs+hplplpRYPsWGDsVcl-TWstupGKhGh+R-WhlpDhpsGEllsRAoShWVMMNpcTRRLs+ls-EVRsEh.shh.pp..l...--sscKLpKl......-ssu-hhctGLsPRasDLDhNQHVNNVKYlGWlLES.hP.pll-o+ELpslTL-YRRECtpDsllcSlTsht......................sstppsthphpHlLplss......GsEIs+u+T-WR. .........................................................................................ap.phpl.h...csD.hs..t....p..hpl....sslhphh......p.sut...p.s.t.p.h.G.hs..ph..........hpchsh..sWl.lschplcl......p.R.h.P.p..h..........s.-.p.lp.lpTh.s.h.u.h.s..+...h..a.s..h..R.c.a..t..l...h....s......p.s......G.........p...............l.s.cspoh...alhhshco...R.....+.h...t..p...l..s...s....-.lh.s.a.....................tsp.....p.+lh.+h.h.........hpt..p.p.....h.t.ps.a..p...l..Ra...D...lDhNtHVNN...........sc...Y........l........p........W.......l.........h........-........s..........h....s.........h.......c.......h...........h........p.........p............t..........t.....................p....p...........l.........p....l.........c..Yh+E.s.t....Gs......lp....................................................tt......h...l............t.............h...a.......................................................................................... 0 116 276 331 +1089 PF02770 Acyl-CoA_dh_M Acyl-CoA dehydrogenase, middle domain Finn RD, Griffiths-Jones SR, Mistry J anon Prosite Domain Central domain of Acyl-CoA dehydrogenase has a beta-barrel fold. 20.60 15.00 20.60 15.70 20.50 14.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.35 0.72 -4.45 69 26220 2009-01-15 18:05:59 2003-04-07 12:59:11 14 120 3313 214 9308 22756 9383 53.90 34 11.57 CHANGED uhuhTEss.uGoDh........tshpTpAptssss.......ahls.....GpKtalos.u..shA....shhllhu+s.s ............................................shuhTE..P....s...uG..S..Ds.................s.u.l.p.....T..p..Ap..p.......su-t...................alls..................G.p.Kh...aIos...u......shu......s.h.hlVhA+s...................................... 0 2632 5550 7773 +1090 PF02771 Acyl-CoA_dh_N Acyl-CoA dehydrogenase, N-terminal domain Finn RD, Griffiths-Jones SR, Sammut SJ anon Prosite Domain The N-terminal domain of Acyl-CoA dehydrogenase is an all-alpha domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.79 0.71 -3.50 988 26175 2012-10-02 12:47:07 2003-04-07 12:59:11 11 97 3435 215 9054 22885 10305 112.70 20 25.73 CHANGED o.-.......-pc..hl+cs...sRc.Fspc..cl.h..P.....h........stch.....c..c....pp.p.........hP...tc..l..hcch.u.-.h.GlhGlslP......E-YG.G...s.G..h.............s...hhshsllhEElu+..ss...s....uhsh.hhss..pssls.s........l....hpaGo.--QKp...+..aLPtlssG- .............................................................p.p.hhp.t....scp..ahpp......pl...t.....s.........h...............................h.tph.............-.......c.............ps.....p.........................hP..........pc....h..........h.p.ph...s....c....h....G.h.....h.....u...l....t..l....P...............................cc.a......G.....G...........t..G.......h...................................s...hhp.hs.l.l.h.-El.up.....ss.................s.....u.hsh.......hhss.....t.s.s.ls.h..........sl..............ht...aG.o...c.c......Q..+p...c.a.LstlhpG................................................... 0 2413 5396 7525 +1091 PF02551 Acyl_CoA_thio Acyl-CoA thioesterase Bashton M, Bateman A, Griffiths-Jones SR anon SCOP Domain This family represents the thioesterase II domain. Two copies of this domain are found in a number of acyl-CoA thioesterases. 20.70 20.70 21.20 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.75 0.71 -4.53 9 1320 2012-10-02 20:54:35 2003-04-07 12:59:11 10 4 888 10 170 2384 900 124.10 41 59.06 CHANGED hssthFhs-h..p.css.hppsasGphhu...Qshhtu.p.ssP.cDhhlppsh................................HShaFh+sscsschllYslpo........LtpGchF.......sQsGplluo......sspcGh .........................................................................l.pttFhs-pPhchcP.s.h+.p.s.h...p...GcV.st....pQ..lWl...+...AsG...sl.P....D....D..h....hlH....p...ah.LuYuSDh..sh.Ls.s...A...l.......p...s...Hsl..........uhhp...t.h...p....l...AT..l..DH..S..h..WFH.....R.........P.........F......s........h........s.-W..LLY.............u.VESssA..u..s.uRGhsR..G..cha.......sQsGt.LlAo.......ssQEGl......................... 0 28 69 120 +1092 PF00698 Acyl_transf_1 Acyl_transf; Acyl transferase domain Bateman A anon Pfam-B_250 (release 2.1) Domain \N 24.30 24.30 24.40 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.84 0.70 -5.10 12 13987 2012-10-02 11:19:24 2003-04-07 12:59:11 16 1271 5013 68 4402 12759 2619 292.50 26 23.26 CHANGED VaVFsGQGuQWtGMGhpLlpspslFtsultcs-cshps.hGaplh-lLp..sss.......................................................thcpl-hlQPsLhulpluLsplh.puhG..lpPsuVlGHShGElAAuhVsGALo.t-ushslshRuphhtpls.stGuMsuVsh....s..c.h.t.h.splslAssNuPpollluGsp-tlp-hlpphptcs.htsphlsVshAsHSsphsslt-sLtttLup.lsshtsplshauospss.t.....sssphsApYWhpNh+psVpFppAlpuh...h-suatsFlElSsHPlLttulpcshc.........tshssssllsphpRcpss...hppFhtshsphHssGsss ...............................................................................................................h.alFs.G......QGu.Q.hsG....MG.p.p......Lh....p........p...........s....s....ht....p.....s....h....s....p........s....s...p...h........h.......t.....................h........s...........h.........s.......l...h...p....l..lh......s.s..s..t............................................................................t.t.L..s..p....o...p...h...s....Q.P....A.....l...h....s...h....p....l....A......l..h..c....l....h.....p....s.......h.......G.............lp.P...s...h...l....s....GH....S........l......G......Eh........u..Ah.h.....s.AG..........s..........l..o.h...p.....D.....A.......h....p........lV.......t.....h.....R.......u....p.....h........h...............p................p.............h.............s........s...........s...........G..........u.......M...s..A...lhs................................stt..t.....h....h...............t.......h........t...........s......t....................l.......s.......l.A....s.....h.....N...u......P...........s..p...........s............V..lu..G....s.t....p...u..........l....p.....p....h....h.......t...t.............h........p.........t........p............G.......h.............p.s..h...h........L....s..V.......s.h..A....h.H.o.s.......h.h....c.s........h....t............p...p......h.........t.....p......h.....l......t.......p.........l..............p.........h.............p.........s............s.......p...........l........s..........l......h....o....s...l..s...u.p.hh..............................ss.s.t.h.......t....p.h..h...h...c...p..lt...p...s.V...p....a....t...p....u.............l...........p..................th...................h..............t..............t..............G...................h...............p................h.................h..........l.................E...l..uP.t......s...L...t...t..hh.p...p......................................................................................................................h......................................................................................................................................................................... 0 1131 2658 3767 +1093 PF02273 Acyl_transf_2 Acyl transferase Mian N, Bateman A anon Pfam-B_5787 (release 5.2) Domain This bacterial family of Acyl transferases (or myristoyl-acp-specific thioesterases) catalyse the first step in the bioluminescent fatty acid reductase system. 20.30 20.30 20.30 20.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.94 0.70 -5.40 4 59 2012-10-03 11:45:05 2003-04-07 12:59:11 10 4 39 2 8 132 21 269.20 60 83.59 CHANGED hsIDHVIcVsssRcI+VWEThPKppssKRNNTIlIASGFARRMDHFAGLAEYLSpNGFHVIRYDSLpHVGLSSGpIDpFoMSlGKpSLLTVlDWLp.p+sIsNlGLIASSLSARIAY-llu-lsLSFLITAVGVVNLRsTLE+ALtaDYLph.IDElP-DLDFEGHpLGScVFVpDCFEssWDoLDSTINKhtpLslPFIAFTANsDDWVpQcEVhcLlSsI+Sc+sKIYSLlGSSHDLGENLVVLRNFYQSlTKAAIAhDsshl-lss-IIEPsFEpLTIATVNERRLKscIEs ...........................................sI-HVlplsssppI+VWET.PK...pp.sp+psT.IlI.A.S..G....FAR..R....M.D.....H......FAG......LAEYLSsNGF..HVlRYD..SL.pH..VGLS.S...Gs....IspFoM.S.l...G.K...p..SLhsVl-W.Lp...s+...G...l..p..p..lGLIAuS....L.S.A..R...I..A.Y.-...l.su.-.l..s.LSFL...IT..AV.GV.V.NL..RsTLE+ALtaDY...Lp.h.Is-lPpD.LDFE.GHpLGScV...FVpDCFcppW.DoL.-ST.ls...chppL.slPFIAFTANsDsWVcQcEVh-hlusIpSs+sKlYSLlGSSHDLGENLVVLRNFYQSVTKAAIALDss.l-lss-h......l..EPpFEpLTlsTVNERRLKscIEs........................................................................................................................... 0 2 4 5 +1094 PF00708 Acylphosphatase Acylphosphatase Bateman A anon Pfam-B_686 (release 2.1) Domain \N 20.80 20.80 20.80 21.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.99 0.72 -3.88 75 4748 2009-01-15 18:05:59 2003-04-07 12:59:11 13 31 3398 34 1333 3407 353 90.30 31 29.81 CHANGED M...............hphplhlpG.pVQ..GVGFRhastphApphG.lpGa.VpNhs-G...pVclhspG.spps.lcphlptlpp..ussh.....uplpplph.pphshpsp........ssFpIt .........................phphhlpG..+VQ..GV.G....F..R.h..h..shphApplu.....LsG.h....VpN...t.sDG.........pV-lh....s.p......G.sp.pp....lcph....l.p.t....Lcp....usss.......A+.V.splph...p...h.p.h..p..tt......ssFpI....................................... 0 404 800 1089 +1095 PF01553 Acyltransferase Acyltransferase Bateman A anon Pfam-B_128 (release 4.0) & Pfam-B_5069 (Release 7.5) Family This family contains acyltransferases involved in phospholipid biosynthesis and other proteins of unknown function [1]. This family also includes tafazzin Swiss:Q16635, the Barth syndrome gene [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.90 0.71 -4.71 57 17338 2012-10-02 00:16:30 2003-04-07 12:59:11 16 145 4819 2 5587 13699 5618 134.80 20 34.96 CHANGED plplps....Epl.p..................ttssllluNHpSh...lDshhls.hhh...........pshhhlupcplhth.shhshhhp....hhsslhlcR.............................tp.pttttshpthhc...hlppG................ph.lhlFPEG.....Tcsps...................tph..h.aKpGshphshps.......sssllPlslp ..........................................................................................................h...h.s.....pp.l..p..................tts.h.l..l.l..u.N..H.pSh.............hD.h..l.h....l......s...hhh.......................................t..h.ph.h..u...c..p...p...l.....h......p......h...s.....h....h.....u.....h....h.hp.................hh.s..s.l.......l..c..R.................................................................................................................tt...t.t..s..t...t..s.h.cphhc.........hl.pp..u.....................................................................................ph...lhlF...P..E...G................o.+.sps.............................................................tph.......h.h.+.s.G..h.h..th..uhps......................sssllPlhh...................................................................................... 0 1763 3288 4613 +1096 PF02805 Ada_Zn_binding Metal binding domain of Ada Bateman A anon Bateman A Family The Escherichia coli Ada protein repairs O6-methylguanine residues and methyl phosphotriesters in DNA by direct transfer of the methyl group to a cysteine residue. This domain contains four conserved cysteines that form a zinc binding site [1,2]. One of these cysteines is a methyl group acceptor. The methylated domain can then specifically bind to the ada box on a DNA duplex [2]. 20.90 20.90 20.90 21.10 20.70 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.24 0.72 -4.38 150 1999 2009-01-15 18:05:59 2003-04-07 12:59:11 11 24 1681 5 495 1548 76 64.90 47 18.48 CHANGED -ppap.AlhsRD..spaDGpFahuVpTTGIYCRPsCsuRhP+ccNVpFasouttApsAGaRPCpRC+P- .....................................ppap.AlhsRD..sphDGpFhhAV+TTGIaCRPSC.s......u......R..t.......P..p...c......cNV....p...FassAspAhsAGFRPCKRCpP-......... 0 126 281 399 +1097 PF01602 Adaptin_N Adaptin N terminal region Bashton M, Bateman A anon Pfam-B_491 (release 4.0) Family This family consists of the N terminal region of various alpha, beta and gamma subunits of the AP-1, AP-2 and AP-3 adaptor protein complexes. The adaptor protein (AP) complexes are involved in the formation of clathrin-coated pits and vesicles [1]. The N-terminal region of the various adaptor proteins (APs) is constant by comparison to the C-terminal which is variable within members of the AP-2 family[2]; and it has been proposed that this constant region interacts with another uniform component of the coated vesicles [2]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.50 0.70 -6.18 32 3862 2012-10-11 20:00:58 2003-04-07 12:59:11 15 87 445 26 2500 4196 107 460.30 19 58.89 CHANGED cpphhpp-ltphhsph..............s.p....p+ppslpKllhlhhhGp...chs.....hhhhsll+hht.opchshK+lsYLhlh.hscppsD.....lhhL...sssslp+D.lp...ssNphhpuhALpsLuslt..sscls..csltsslpphls.spss..aVR+sAslshh+laph.....ss-hlpp....hsclpchl..sDps..........................hVhtsAlshltcl...................pssc.hh....cllthhhppltph....................s.sssahplhllchL.p....phstpsspts...........................................................................t.hhcplhshlp....................ssssuVlhEslpslh........plsspss........hhhhssshLhphLs.spcpsl+YluLps.lsplspp..........c.....pslh..cpshhlhhlp.csshsl+hcsl-lLhtlsscpNlpp...llpcLhpa......l....-ht-.p................a+pthlpsItplu.+h....ssshphhlssLlcllp.....ps..shh..sp-hl.slhpllpphsshpths..lppL....hchlps............hppsp.......h..htshlWllGEaushlss.................ssphlcplhpphh.pss.l+tt.hLsshhKht...........httst.pht.............plhphlhphsp..s.ch-lp-RAh..........hhpllptsp ................................................................................................................p.tt.hpp.....................p.t.....hptp.h.lt+.ll.................h.hh............h.h.G........s..hs..................h.h.phlp..hh.....t...s...pphp...K............+.l.......s...............YLhh..........hh..p.......p.p.-..............................hhhL..................ssssl.p.p......D..hp.........................s....s.....h....hp.uhA..lp..sl.s..p.lt...................ssp..hh............p..l....h...l..t.p.h..lt....s....t...s....s.......hV++p.A...sls..........h..h+l...hph.................................ss.p...h...h.......t.........hs.p....l..h..p.........hl......s-.ps..................................................................................hV.h.hs..u...l.sh...l...hpl........................p..sp...............phh....t...h...h.p.t.....l.th....................................................................................s.h.s...a.h.plh.l.l.c....h.l.t.............t.h.s...tp..ttt........................................................................................................................................................................................t.hhp..lhshlp.................................sts...tuV.l.h...pshps.lh......................................................ph..s.t..s.............................hhth.hs.s...Lhphl........p....sp..............p...p.............l.p....al...u...L..ps..lttl..h.tp............................p..............sl.........p..t...p...h........l......h...l....p...........ss..s.h.....l..+......h.t.....t..l....c.lhh.t....h...s........s...p.........p..N..l.pt..............l......l.p.....c..lh..p..a...................h............p.h...-.p.............................................htt..thl.pt..l.t...t..h..s..ph..............................s..sh.phh.......l.s.h.l.hp..l..l.p..............................ts.......s.hh.......hpph..h...l...h.p..l..h.........p.......p......................s........p.......h......p........t............h...............l.t...p....l..................h.p..h.l.pp.................................hp....p..............................h..htshh..a.llG.E..a.u....phh.p.................................................................................s.ph.h.p......h.....h....p..t.....h...h........p.....s................s..p..h.....hlsshhKhh.........................................................................................................................tlh....h.l..th.t........p.s.-.........lppRuh....h.tlh....t............................................................................................................................................................................................................................................................................................................................................................. 0 926 1396 2042 +1098 PF03352 Adenine_glyco Methyladenine glycosylase Finn RD anon Pfam-B_3953 (release 6.5) Family The DNA-3-methyladenine glycosylase I is constitutively expressed and is specific for the alkylated 3-methyladenine DNA. 25.00 25.00 29.60 25.10 22.80 24.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.14 0.71 -4.75 190 3641 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 2986 18 794 2600 430 166.00 41 85.63 CHANGED Ws....ss......slYhpYHDpEWGhP.lp.DDppLFEhLsLEGhQAGLSWhTIL+KR-saRpAFssFDsppV...Apa.....s.-pclccL.htssGIlRNRtKIcAsIsNA+shlplpc..ch.....GSFusalWsFV.sspP.hhsp..hpshtclP.....u..pTshScslSKcLKKRGF+FVGPThsYAFMQAsGhVN.DHlssChpptp ....................Ws..tps..slYhtYHDpEWGh...P..p...D.-.ptLFEhLsLEuhQAGLSWhTlL+K.....Rcsa....RpAFts.FD.p+V.......Ath........s.-p.-......l....-c.L..h.p.s.s..uIIRpRtKIpAslsNAp..uhlpl.....pp..........p.h...................G.SFssalWuF........l....st.ps...hsp...ht..s..hp..p..hP......................s..p..o..shS.ctlSKsLKK+GF+FVGsThsYuFhQA.....sGhVs.DHh.sC.hh..t............................................................ 0 218 478 659 +1099 PF02438 Adeno_100 adeno_100; Late 100kD protein Mian N, Bateman A anon Pfam-B_1583 (release 5.4) Family The late 100kD protein is a non-structural viral protein involved in the transport of hexon from the cytoplasm to the nucleus. 25.00 25.00 131.30 131.10 19.40 19.30 hmmbuild -o /dev/null HMM SEED 583 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.87 0.70 -6.30 19 136 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 100 0 0 144 0 588.50 65 76.54 CHANGED ssLhKHlpRQutIl+puLp........-p.psPhoVsplSpthEptLFs......P+sPsc+pcNuos-PsPRLNFYPsFhlPEsLATYHIFFhNppIPlSC.+ANRotuDphhpLppusplsslsshccssKlh.-GLG.pEspusst.Lp........psutLVpLcsDssRLsslK.RshploaFAYPAlsLPPKl.pslh-pLlh+psp...........pps-sscPVVSDEpLs+Wls.ss.......-st..tLp......c+..RKthhuAlhhTlpLEChp+hFoc.phl+KlpEsLHYTF+HGaV+.spcIosVpLSNhVoYhGlhHENRLspssLHsoLcuEs+RDYlpDolYLFLlaTWQTAMGlWQQsL--cNlcplpclLs+p++sLasthspcslApcLAcllFPtc.LlpThppuLPDFhoQS.lpNFRoFILERSGILPuhssALPSDFVPlsa+EsPP.LWsHsYLLpLAsFLhhHuDhh......EDsuspslhcsaCcCNLCoPHRslspNsuLhNElpsIGTFElQsPsspsst......LKLTPuLWssAYL+KFsspDYasapItaYcspsc.s.pspLTACVITpscILApLppIpcuRE-FLL+KG+GVYLDPQTGEpLN ..DVLLKHLpRQShIl+DALt.......DRocsPlSVcELScAYEhsLFS......PRVPP...KRQsNGTCEPNPRLNFYPsFAVPEALATYHIFFKNQ+IPlSC.RANRoRADtlLsLtsG.uRlPDIsSLEEVPKIF.EGLGpDEsRAANA.Lppp...........ctppSsLVELEGDNARLAVLK..RolElTHFAYPAlNLPPKVMpslMDpLlhp+spsLsc......pptpps--ucPVVSDEpLuRWLG.Tp.......-Pp..sLE..........cR..RKLMhAsVLVTlELECh+RFFoDP-TLRKlEEoLHYTFRHGaVRQACKISNVELoNLVSYhGILHENRLGQsVLHoTL+GEARRDYlRDCVaLFLsaTWQTAMGVWQQCLE-pNLKELcKLLpRshKsLWTGFDERTVAuDLA-IlFPER.LppTL+sGLPDFhSQSMLpNFRoFILERSGILPATCsALPSDFVPLoYRECPPPLWSHCYLLpLANYLuYHSDlh......EDVSGEGLLECHCRCNLCoPHRSLsCNPpLLSETQlIGTFELQGPpssst..us.........LKLTPGLWTSAYLRKFlPEDYHsHEI+FYEDQ...Sc.P..+A-LTACVITQusILAQLpAIQKuRpEFLLKKG+GVYLDPQTGEpLN...... 0 0 0 0 +1100 PF03052 Adeno_52K Adenoviral protein L1 52/55-kDa Mifsud W anon Pfam-B_2151 (release 6.4) Family The adenoviral protein L1 52/55-kDa is expressed in both the early and late stages of infection which suggests that it could play multiple roles in the viral life cycle. The L1 52/55 kDa protein interacts with the viral IVa2 protein and is required for DNA packaging [2][3]. L1 53/55-kDa is required to mediate stable association between the viral DNA and empty capsid [2]. 25.00 25.00 47.20 46.80 21.70 17.90 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.11 0.71 -5.09 18 154 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 99 0 0 150 0 187.10 52 54.87 CHANGED lsp.ssuhAsct.......h.+.Qhcc-shcutlPppNlF...R-tpsp.....-t.RchhapuGptlphsh..cRsLpscDF....s-.s.....ulSsApsHhpAAcLtpsactTtphEsshpcoFsscl+sLlhR.ElslGLhaLhDFlpshhppPs.shsLssQLhLlspHscscsshRctlhslucsc......ucWLhDLlshltsIl.pccphsls-pVuAIs ...........htph.....GA.uPER.......HPRVQLp+DsRtAYVPtQNLF...RDcSGE...EsEEhRctRFcAGREL...R..L..DR..pRlLRsEDF..EssE.o......GlSPARAHluAAsLVoAYEQTV+pEpNFQKSFNNHVRTLlAREEVslGLMHLWDhhEAhlpNPs.SKsLTAQLFLlVQHSRDNEsFREALLNIuEPE......GRWLhDLINILQSIVVQERuLoLu-KVAAIN......... 0 0 0 0 +1101 PF02703 Adeno_E1A Early E1A protein Bashton M, Bateman A anon Pfam-B_1193 (release 5.5) Family This is a family of adenovirus early E1A proteins. The E1A protein is 32 kDa it can however be cleaved to yield the 28 kDa protein. The E1A protein is responsible for the transcriptional activation of the early genes with in the viral genome at the start of the infection process as well as some cellular genes [1]. 24.90 24.90 24.90 25.50 20.90 24.80 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.11 0.70 -4.95 15 313 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 91 4 0 292 0 158.20 37 97.87 CHANGED MRplhhhssh.h....-hus-lLE...plVsshhss-hPp.ssshhpsPSLHDLYDLE..V-ssE.DsNEEAVsshFsDuhLLAAcEu......................sssh.ssppssGspslP-L.ps--hDLpCYE-GhPPSDsEDEpppp..shpphuspushshppt.......FhLDsPplPGHGC+SC-aHRpsTGssshhCuLCYhRspspFlYSPVSDst..--cosss.........................-pps.........SPPclssssP.slh.+PVPVRsostRRsAV-slED.......LLpE....ssEPLDLSl.KRPRs ....................................................................t.h....p....hlpthh..p..................s.oLp-LYDl-..Vps.p..DsNEcAVs.hFs-uhhLtsp.u........................s......s...s...hPtL.....hDLhCaEtshPsSDsEspp.....................................................................................hSt........................................................................................................................................... 0 0 0 0 +1102 PF01691 Adeno_E1B_19K Adenovirus E1B 19K protein / small t-antigen Bashton M, Bateman A anon Pfam-B_1569 (release 4.1) Family This family consists of adenovirus E1B 19K protein or small t-antigen. The E1B 19K protein inhibits E1A induced apoptosis and hence prolongs the viability of the host cell [2]. It can also inhibit apoptosis mediated by tumour necrosis factor alpha and Fas antigen [2]. E1B 19K blocks apoptosis by interacting with and inhibiting the p53-inducible and death- promoting Bax protein [1]. The E1B region of adenovirus encodes two proteins E1B 19K the small t-antigen as found in this family and E1B 55K the large t-antigen which is not found in this family; both of these proteins inhibit E1A induced apoptosis [2]. 25.00 25.00 40.80 40.30 18.20 17.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.31 0.71 -4.87 17 117 2009-09-10 16:39:30 2003-04-07 12:59:11 11 2 86 0 0 102 0 134.60 47 75.49 CHANGED M-....lhphLpsapphRpllctuSspsShhhRahFGupLuclVacsKp-acppFtclLs-s.GlasuLsLGapshappcllppLDFSoPGRssAulAFlsallDcWs.pcop...lSpsahLDhlshsLWcth...h.p...phhhh ........M-..lWslLpDFppTRpLlEsu..SsusShaWRah..FGos....Lu+LVaplKcDYppEFEclLs-CsGL.a-uLNLGHpuhFpE+ll+sLDFSoP..GRTsAuVAFlsFllDKWs.ppTp...lScGYhLDalAhtLWRsW..h+ppt...hph..... 0 0 0 0 +1103 PF01696 Adeno_E1B_55K Adenovirus EB1 55K protein / large t-antigen Bashton M, Bateman A anon Pfam-B_1728 (release 4.1) Family This family consists of adenovirus E1B 55K protein or large t-antigen. E1B 55K binds p53 the tumour suppressor protein converting it from a transcriptional activator which responds to damaged DNA in to an unregulated repressor of genes with a p53 binding site [1]. This protects the virus against p53 induced host antiviral responses and prevents apoptosis as induced by the adenovirus E1A protein [1]. The E1B region of adenovirus encodes two proteins E1B 55K the large t-antigen as found in this family and E1B 19K Pfam:PF01691 the small t-antigen which is not found in this family; both of these proteins inhibit E1A induced apoptosis. This family shows distant similarities to the pectate lyase superfamily. 21.80 21.80 21.90 31.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.22 0.70 -6.00 15 139 2012-10-02 14:50:22 2003-04-07 12:59:11 12 2 88 0 0 132 1 334.00 51 78.03 CHANGED sssshhsp....Lshuhhs+pR.EpVpap-lps-a..pct..hht-+YsFEQlKTahLpPt-.DhEtsI+paAKlALRPsspYpIp+slsI+ssCYIlGNGApVclsspcpsA.Fcsthh.uhsPGVsGMpuVTFhNlRFps-s..........asGslFhusTplllHGCsFhGFssoCl-uhussplRGCpFhuCa+ul.su+s+ucloVK+ClFE+CslGlhsEGpu+lR+Nsuo-ssCFlll+GsuplcHNhlsussch.pps.hpMlTCu..sGpspsLpolHIsSHsR+pWPsFc+NllhRCslHLGsRRGsFpPhQCNhoaopllLEscuhs+VsLsGlFDhshplaKlLRa.--s+sRsR.....sC-CGupHhph.ssshpVTE-lRsD+hhhSCsssEFsSS-E ......................s.pt.hscLshSLMsRpRPEplhatElpp-h..pct..hhppKYuhEQlKTaWLpPt-.DhE.AIcpaAKlALRPDppYplo+plsIRpsCYI.GNGApV.lss.D+sA..FcCsMh.sMhP...G..VhsMpuhsFhNh+Fpuct...............asGslFhusophhLHGCsFaGFNshCl............EsW.utsplRGCpF...........huCWhul.suRsKSphSV.KpClFE+ChLGl..s.EGpuRlR...Hsuu.-ss.CFhLlKGsAslKHNMlpGss-..pp..hphLTCs..sGhCphLtslHlsSHsR+tWPhFEpNllh+CphHLGuRRGhF.PaQCNhspsplLLEs-AhS.RVsLsGlFDMslplaKILRY..DEo..+sRsR.....sCECGG+HhRhpPVsl-VTE-LR.PDHLlhuCsssEFuSSsE............... 0 0 0 0 +1104 PF04623 Adeno_E1B_55K_N Adenovirus E1B protein N-terminus Kerrison ND, Bashton M, Bateman A anon DOMO:DM04583; Family This family constitutes the amino termini of E1B 55 kDa (Pfam:PF01696). E1B 55K binds p53 the tumour suppressor protein converting it from a transcriptional activator which responds to damaged DNA in to an unregulated repressor of genes with a p53 binding site [1]. This protects the virus against p53 induced host antiviral responses and prevents apoptosis as induced by the by the adenovirus E1A protein [1]. The role of the N terminus in the function of E1B is not known. 25.00 25.00 34.80 33.50 21.90 20.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.50 0.72 -3.49 5 123 2009-09-11 22:02:39 2003-04-07 12:59:11 7 3 65 0 0 110 0 68.70 40 17.64 CHANGED MERsNPoEpGl+uGLHusAsVEuhptuAEEEsL+LLAuAASs.pssussssstAthtuGGus.uuuGGE..Es ...MEstsPspQGl+sGh+upu.VEshttuAsp-NLcLLAusAuh.ts...ss....puss.s.....t.uth...tuGuts..uuuGtp....t...................... 0 0 0 0 +1105 PF04834 Adeno_E3_14_5 Early E3 14.5 kDa protein Waterfield DI, Finn RD anon Pfam-B_4148 (release 7.6) Family The E3B 14.5 kDa was first identified in Human adenovirus type 5. It is an integral membrane protein oriented with its C terminus in the cytoplasm. It functions to down-regulate the epidermal growth factor receptor and prevent tumour necrosis factor cytolysis. It achieves this through the interaction with E3 10.4 kDa protein [1,2]. 23.60 23.60 23.60 25.50 22.10 23.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.47 0.72 -3.57 8 110 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 68 0 0 79 0 104.20 50 79.38 CHANGED pC+FpcPWsFLcCYpccoDhsssWlhhlshlhlhssThhultIYPphchGWNuPsAhshPphPs..tthPLQ........PhPpP............-P.PpsPosISYFpLTGGDD ......KCKFpc.WsFLcCYccKsDhPshalhIlGI.lhlhuCT.hFu.lhIY.PpFchGWNuspAhsaP.-Ps..tphP..........Phs.Pht..........pY.pEP.PphPoslSYFpLTGGDD. 0 0 0 0 +1106 PF03307 Adeno_E3_15_3 Adenovirus 15.3kD protein in E3 region Mifsud W anon Pfam-B_3512 (release 6.5) Family \N 25.00 25.00 45.80 45.60 20.40 16.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.76 0.71 -4.35 8 100 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 71 0 0 67 0 116.40 64 88.98 CHANGED DGphoEQRll..phR.tRppp-RpspELtsLhslHQCKKGlFCLVKQAKLoYE.lsu..psHcLuYpLstQRQoFssMVGssPIpVTQQuG-scGsI+CsCcsPEClYTLlKTLCGLR-LLP ..............DGpsSEQRll..QLR.lRQQQERssKELtDslsIHQCKKG.IFCLVKQAKIoYE..lsu..psHRLoYELPpQRQKFTCMVGlNPIVITQQSG-TcGCIHCSC-SPEClYoLlKT.LCGLRDLLP.......... 0 0 0 0 +1107 PF02440 Adeno_E3_CR1 Adenovirus E3 region protein CR1 Bateman A anon Pfam-B_1854 (release 5.4) Family Early region 3 (E3) of human adenoviruses (Ads) codes for proteins that appear to control viral interactions with the host [1]. This region called CR1 (conserved region 1) [1] is found three times in Adenovirus type 19 (a subgroup D virus) 49 Kd protein in the E3 region. CR1 is also found in the 20.1 Kd protein of subgroup B adenoviruses. The function of this 80 amino acid region is unknown. This region is probably a divergent immunoglobulin domain (A. Bateman pers. observation). 20.00 20.00 20.20 20.30 19.80 19.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.17 0.72 -4.48 20 217 2012-10-03 02:52:13 2003-04-07 12:59:11 10 5 50 0 0 184 0 58.70 42 36.33 CHANGED oVshGsNhTLlGP.ssspVoWY.....ssshpchCptsphc......hpaoCstQNLTLlNVopsasG .....VshGsNhTLlG...P...sss...VT.Wa......ssphp.chCstsphc..........hpaoC..N.tQN..LTLlNVspsapG... 0 0 0 0 +1108 PF02439 Adeno_E3_CR2 Adenovirus E3 region protein CR2 Bateman A anon Pfam-B_1854 (release 5.4) Family Early region 3 (E3) of human adenoviruses (Ads) codes for proteins that appear to control viral interactions with the host [1]. This region called CR2 (conserved region 1) [1] is found in Adenovirus type 19 (a subgroup D virus) 49 Kd protein in the E3 region. CR2 is also found in the 20.1 Kd protein of subgroup B adenoviruses. The function of this 50 amino acid region is unknown. 26.20 26.20 26.20 26.70 26.10 26.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.01 0.72 -4.41 9 114 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 48 0 1 96 0 37.90 49 13.63 CHANGED IPsShIuIIsAVllGhslIIlChhhYACCY+Kh+.p.Kh .IPsoTlAIlsuVlsGhhllIIshhhYhCChK+.+.a.+.... 0 1 1 1 +1109 PF03376 Adeno_E3B Adenovirus E3B protein Mifsud W anon Pfam-B_3736 (release 6.6) Family \N 25.00 25.00 27.60 27.60 20.60 18.50 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.33 0.72 -4.06 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 67 0 0 48 0 66.40 59 73.42 CHANGED VuclsPDCLsPFssYLlFsFVTClslCSIlCllIsFhQhlDalaVRIAYhRHHPpYRNppVAsLLpL ...VuHAoPDCLGPFsoYLLFAhlTChCVCSIVClVITFhQhlDWhhVRhsYL+HpPcYRspsVAtLLRL....... 0 0 0 0 +1110 PF04528 Adeno_E4_34 Adenovirus early E4 34 kDa protein conserved region Waterfield DI, Finn RD anon Pfam-B_4904 (release 7.5) Family Conserved region found in the Adenovirus E4 34 kDa protein. 25.00 25.00 49.30 48.70 21.60 21.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.97 0.71 -4.61 17 117 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 86 0 0 105 1 147.40 57 51.52 CHANGED Ltshh+phlhGshhNpcahWYRchVNpthPcElhYVGSVahRGtHLIYl+l.....haDuchhsllcphs...aGhshhshGlhsshlVLsCppCs.shoEhphRsCA+RTRplhh+slpllsppsh...............ppStsEccRQ+hL+tLhpap+slhh ..........................LAsWFR+lIhGsMhNQRhPWYRplVNh.pMPKEIMYhGSVFhRGRHLIYl+I.....WYDGHsGuIlssMS.....FGWSshN...YGLLNNhVIhCCTYCp.sLSEIRMRCCA+RTR+LML+AltIlsc-TssscP..........lsSSRTE.RRQRLLRuLMc+pRPI..h................................ 0 0 0 0 +1111 PF00541 Adeno_knob adeno_fiber; Adenoviral fibre protein (knob domain) Bateman A anon SCOP Domain Specific attachment of adenovirus is achieved through interactions between host-cell receptors and the adenovirus fibre protein and is mediated by the globular carboxy-terminal domain of the adenovirus fibre protein, termed the carboxy-terminal knob domain. 25.00 25.00 65.80 65.70 17.40 16.80 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.40 0.71 -4.61 46 374 2012-10-01 20:11:45 2003-04-07 12:59:11 12 13 129 144 0 334 1 176.00 40 50.73 CHANGED LWTTPDP.SPNCpl...pp-pDuKLTLsLTK.CGSQlLuoVSLlsV.pGp.hpslssshp......shslpLhFDssGsLhs........sSslspsYWNa.................................RsssSss..ussapNAls...........................FMPNhsAYP+...........sppspu+spIhuplYLpGp...shpPlslploaNppsss.......sYSloFsashsps..Yt...s..sFsooShTFSY ....LWTTPDP.SPNCpI......ppspDuKLTLlLTK.sGuplhusVSLlsV..sGphphlsssps.....hshslcLhFDssGsLLs........sssh..KshWsa.................................Rss..sohs..ssshpsAlu...........................FMPshsAY.Pp...........spppps.cshlhussYhtup...stpPlslplshNpcsss.......sYSIoFsauhsps..ht.....s.hsF.sTosaTFSY... 0 0 0 0 +1112 PF00608 Adeno_shaft adeno_fiber2; Adenoviral fibre protein (repeat/shaft region) Bateman A anon Bateman A Repeat There is no separation between signal and noise. Specific attachment of adenovirus is achieved through interactions between host-cell receptors and the adenovirus fibre protein and is mediated by the globular carboxy-terminal domain of the adenovirus fibre protein, rather than the 'shaft' region represented by this family. The alignment of this family contains two copies of a fifteen residue repeat found in the 'shaft' region of adenoviral fibre proteins. 20.50 20.50 20.60 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.35 0.73 -7.21 0.73 -4.10 63 1159 2009-01-15 18:05:59 2003-04-07 12:59:11 12 20 137 15 1 857 6 30.00 30 24.56 CHANGED sssLslshussLslss.ssLslplussLshs ...sssLoLshusPLslss.ssLsLpluusLsl............... 0 0 0 0 +1113 PF04881 Adeno_GP19K Adenovirus GP19K Kerrison ND anon Pfam-B_6142 (release 7.6) Family This 19 kDa glycoprotein binds the major histocompatibility (MHC) class I antigens in the endoplasmic reticulum (ER). The ER retention signal at the C-terminus of GP19K causes retention of the complex in the ER, preventing lysis of the cell by cytotoxic T lymphocytes [1]. 20.00 20.00 22.00 83.50 19.70 18.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.00 0.71 -4.43 6 78 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 49 0 0 58 1 133.40 56 81.67 CHANGED CsLTFAs-ssc.CsVLIKCua-Ccsl.cIpapNKThspol.sssWpPGs.sp.YTVoV.............pGsDGo.hhsNsT....FIF.spMC..........DlsMaMS+QYsLWPP.oKENIVsFSlAaslsACllouLlslslthhlph+PRpuN.EKEKhP ...CslThus-pSc.CsVlIKCta-C.h..pITaKNKThsNsh.susWcPGD..p.YTVoV.............+GsDG.....NpT....FIF.phMC..........Dlshahu+.auLWPP.oK-NhVsFSlAaslhAClhouLLssslhhhlpp+PR.uN.EKEKh.......... 2 0 0 0 +1114 PF01065 Adeno_hexon Hexon, adenovirus major coat protein, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_885 (release 3.0) Domain Hexon is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. The penton complex, formed by the peripentonal hexons and base hexon (holding in place a fibre), lie at each of the 12 vertices [1]. The N and C-terminal domains adopt the same PNGase F-like fold although they are significantly different in length. 18.60 18.60 20.00 20.00 17.90 17.80 hmmbuild -o /dev/null HMM SEED 495 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -12.75 0.70 -5.64 36 2918 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 287 43 0 1583 0 236.80 39 78.17 CHANGED FRNPTVAPTH-VTT-R.SQRLQLRFVPVppEDspYoY.KsRFoLuVGDNRVLDMuSTYFDIRGsLDRGPSFKPYSGTAYNsLAPKuAsNNs.ap.................ssss.................spshAQAshhs...................sspsst.ttspthhsssshpPpPQlG..tss...Ws...tspsstptuuGRlL.....csssshhP......CYGSYApPTN.pGuQu................s.slpphaassssssss.........ssslhhsEsVs..LpsPDTHlVhh..ssss..sups.hs..uhPNRPNYIGFRDNFIGLMYYNSsGNhGVLAGQuSQLNuVVD.LQDRNTELSYQhhLssLsDRsRYFShWNQAVDSYDs-VRlI-NcGhED-hPsYsFPlsGlss..tsasslptsss.....tsapsssssssss.....hluhGNlsuMEINLsANLaRsFLYSNVAhYLPDchKaT...PsNls........LP.sNsNTYsYMNuRlPsssllDoalNIGARWSlDsMDNVNPFNHHRNsGL+YRSQLLGNGRYspFHIQVPQKFFAIKNLLLLPG .................................................................................................................................................................................................u.hs.........................................................t..t.h.t.....Ppsp.G.....tt.....................huRhh.....c..t............................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +1115 PF03678 Adeno_hexon_C Hexon, adenovirus major coat protein, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_885 (release 3.0) Domain Hexon is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. The penton complex, formed by the peripentonal hexons and base hexon (holding in place a fibre), lie at each of the 12 vertices [1]. The N and C-terminal domains adopt the same PNGase F-like fold although they are significantly different in length. 21.40 21.40 21.60 22.10 21.30 21.30 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.60 0.70 -4.90 23 736 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 213 43 0 552 0 193.00 81 34.68 CHANGED NsTNDQoFsDYLuAsNhLYsIPAssTslsIsIPuRsWuAFRGWSFTRlKspETPslGusaDP.FpYSGoIPYLDGTFYLoHTF++VSIpFDSSVsWPGNDRLLoPN.FEIKRs..lD...uEGYshuQSNhTKDWFLVQMhANYNhGYQGY+lPssh+.hpYuFl+NFpPMoRQlPshsp.st.achlshs.p...............aNNSGahuhps....shtt+pGHsYPANWPYPLIGpsAlts..lTp ........NDTNDQSFNDYLSAANMLYPIPANATNVPISIPSRNWAAFRGWSFTRLKTKETPSLGSGFDPYFsYSGSIPYLDGTFYLNHTFKKVSIMF.DSSVSWPGNDRLL..TPNEFEIKRo..VD...GEGYNVAQCNMTKDWFLVQMLupYNI..GYQGFal..PEuYKDRMYSFFRNFQPMSRQVVDphpYpc.YptVslsaQ...............HNNSGahu.hu......s.ppGpsYPANaPYPLIGtpAl.s.....hp....... 0 0 0 0 +1116 PF02456 Adeno_IVa2 Adenovirus IVa2 protein Bateman A anon Pfam-B_1982 (release 5.4) Family IVa2 protein can interact with the adenoviral packaging signal and that this interaction involves DNA sequences that have previously been demonstrated to be required for packaging [1]. During the course of lytic infection, the adenovirus major late promoter (MLP) is induced to high levels after replication of viral DNA has started. IVa2 is a transcriptional activator of the major late promoter [2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.18 0.70 -5.81 11 128 2012-10-05 12:31:08 2003-04-07 12:59:11 10 1 98 0 0 181 10 359.20 69 84.98 CHANGED lDpp.ht.lp-hac+ltshppslpshshu.h.....tPh....ppFsSa-chhuhuG..llpcLpcsppplccthstsphaLpssGphsSLNhshQPlIull....YGPTGsGKSQLLRNLlSspLIsPsPETVhFIsPphsMIPPpEhsAWchQlsEGNYssss-GTlsPpouTh+PcFlcMoY--hTss-NhDlscPpNlFspAAppGPlAIIMDECM-cLssp+uIShhFHAhPSKLasRaspCTGaoVFVVLHNMNPRpshuGNIssLKIQAKlHIlSs+hpP.QlsRFlpoYo+uhspsl.slLLKsIFsahpppspYsWllYNssP.pEuhpWs.hLcsppulhPhhLNlQshlacslc+Ip+hh....p-RpRappth+pKhp ..........................................LDRDAlE+lTELWDRLpLLpQTLscMPMADGL....KPL.KNFuSLpELLSLGG-....RLLs-LVRENhpVRcMhNEVAPLLR-.DGS.C..SL...N...Yp.L.Q.P.V.IGVI....YGPTGCGKSQLLRNLLS.uQLIoPAPETVFFIAPQVDMIPPSEhKAWEMQI..CEGNYAPGPEGTllPQSGTLRP+FlKMuYDDLTpEHNYDVSDPRNVFApAAA+GPIAIIMDECMENLGGHKGVSKFFHAFPSKLHDKFPKCTGYTVLVVLHNMNPRRDLGGNIANLKIQAKhHIISPRMHPSQLNRFlNTYTKGLPlAI.SLLLKDIhpHHAQ+PCYDWIIYNTTPEHEAhQWs.YLHPRDGLMPMYLNIQoHLYRVLEKIHRsL....NDR-RWoRAYRARps......................................................................................... 0 0 0 0 +1117 PF01686 Adeno_Penton_B Adenovirus penton base protein Bashton M, Bateman A anon Pfam-B_1180 (release 4.1) Family This family consists of various adenovirus penton base proteins, from both the Mastadenoviradae having mammalian hosts and the Aviadenoviradae having avian hosts. The penton base is a major structural protein forming part of the penton which consists of a base and a fibre, the pentons hold a morphologically prominent position at the vertex capsomer in the adenovirus particle [1]. In mammalian adenovirus there is only one tail on each base where as in avian adenovirus there are two [1]. 25.00 25.00 94.70 94.50 15.40 15.00 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.49 0.70 -6.25 19 217 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 130 39 0 205 0 436.00 67 95.37 CHANGED h-..hsPPRhhAPTEGRNSI+YopLsPL.DTTKlYhlDNKouDIsoLNaQNDHSNFhToVlQNsDhoPtEASTQoIpLD-RSRWGG-LKTlL+TNhPNVscFh.SNoh+s+lMss................ctssssspYEWh-lolPEGNao.stlIDLhNNAIl-pYLtlGRQpGVhcSDIGVKFDTRNFpLGhDPlTsLVhPGhYT.cAFHPDIVLLPGCuVDFTaSRLsNlLGIRKRpPYpcGFhIhYEDLpGGNIPALLDlpsYpts....................................................................................IpP.lhpDSpuRSYpVhpssos.....TtYRSWhLAY.N........spsuspspTLLTsPDlTsGltQlYWSLPDhhpsPlTF+ss.ppssshPVVGhcLhPlhu+shYNspAVYoQllcptTs.tTpVFNRFPENpILhpPPhsTlTslSENVPuloDHGTLPL+NSlsGVQRVTlTDsRRRsCPYVYKSLusVsP+VLSSpTh ............h.-s.hVPPRYhuPT-GRNSIRYS-LsP.aDTT+lYLVDNKSuDIsoLNYQNDHSNFLTTVlQNNDFTPsEASTQTINFDERSRWGGDLKTILHTNMPNVNEaMFTsKFKARVMVuR+.s..........tsDtupchLcY-WaEFTLPEGNFSETMTIDLMNNAIl-NYLpVGRQNGVLESDIGVKFDoRNF+LGWDPVTKLVMPGVYTYEAFHPDlVLLPGCGVDFTcSRLSNLLGIRK+QPFQEGF+IMYEDLEGGNIPALLDVcpY.cScpchtptttts...t.......................................................t.httt..tsstcclsIpP.lpcDspsRSYNVlpssp.......sThYRSWYLuYsYG..................DPEKGVpSWTLLTTPDVTCGuEQVYWSLPDhhpsPVTF+so.ppsssaPVVGsELhPh.huKSFYNt.AVYSQhlcp.Ts.hT+VFNRFP-NpILhRPPssTlToVSENVPAloDHGTLPL+sSltGVQ...RVTlTDsRRRTCPYVYKuLuhVsP+VLSS+Th. 0 0 0 0 +1118 PF03955 Adeno_PIX Adenovirus hexon-associated protein (IX) Finn RD anon DOMO:DM01967; Family Hexon (PF01065) is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. 21.80 21.80 23.30 42.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.58 0.72 -3.53 12 121 2009-09-10 22:51:00 2003-04-07 12:59:11 9 1 96 4 0 85 0 109.30 59 79.56 CHANGED -GtlhosaLTsRLPsWAGVRQNVhGSslsGtPV.hPuNStshphtphs..ushcssAAAsu..ptutssspshstsht.hstlt.ph.u........pctLthllspLcpLpppLtth .EGGlFSPYLToRLPuWAGVRQNVMGSTVDGRPV.hPANSST.hTYATVG.uSoLDosAAAAA..uAAA.TAptlAushh..us.ussssSS.....ltE-KLhsLLAcLEALoppLus.L..................... 0 0 0 0 +1119 PF03910 Adeno_PV Adenovirus minor core protein PV Finn RD anon DOMO:DM04810; Family \N 20.30 20.30 30.70 24.40 17.50 16.80 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.56 0.70 -5.37 12 112 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 88 0 0 111 0 325.30 60 96.48 CHANGED MSKRKhKEEhLpslAPEIYGP..s.hpsch..KPRplK+V........KKcc+..tc-..........p......-stVEFVRpFAPRRRVQW+GR+VpRVLRPGTsVVFTPGERSshp..KRsYDEVYuD-DILEQAApphGEFAYGKRuR..............-.lulsLDpuNPTPSLKPVThQQVLPs.s......+RGlKRp.ttcl.PThQlhVPKRp+lE-VL-phKs............................................................-PslpPEVKV..RPIKpVAPGLGVQTVDIpIPs............-s............M-sp....sc.P.s.s.................................hsshpl....QTDPWh.hs..........ssppp+ppR+YGsAstlhPpYsLHPSIh........PT.............PGYRGpp.apsphptssRRRpssspRp+p........hsPstlhRshsRRG+..pls .......................................MSKRKhKEEMLQVlAPEIYGP...s..ts-p...KPRKlKRV................KK+c..c...tt................s........-stVEFVRpFAPRRRVQWKGR+VpRVLRPGTsVVFTPGERSusp..KRsYDEVYGD-DIL-QAA-RhGEFAYGKRuR.............p-hlulPLDcuNPTPSLKPVTLQQVLPsss....pRGlKRE..upslhPTMQlMVPKRpRlEDVL.-p.hKh.......................................................D.tlpPE.VKV..RPIKQVAPGLGVQTVDIQIPs...................s.....................M-sppc.P...................................................................ooohEV....QTDPWhsssss............sssp.pRRR+.aGsA.StL....hPsYsLHPSIl...............PT.............PGYRGop.Yt...sRp.psup+RRppsspRRR.........lsPu.tVpRVshRcGR..hlh................ 0 0 0 0 +1120 PF01310 Adeno_PVIII Adenovirus hexon associated protein, protein VIII Finn RD, Bateman A anon Pfam-B_1405 (release 3.0) Family See Pfam:PF01065. This family represents Hexon. 25.00 25.00 49.30 40.90 20.60 20.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.58 0.70 -4.53 17 163 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 109 4 0 124 0 206.60 56 98.48 CHANGED cphPTPYlWpYQPptGhsAGAuQDYoo+hNWl........sAGPpMhs+lpslpspRNpllhppAthspsPhss.N.ssWPAs.lhp.sss...Ppslhhs+spsh-pth.........TsuGhQLAGGut................................hthpG.hQLss........P.sttth..RPsthh..QLuGuuh......hs..ppshhhhppusS..lPRoGGluspQFlcEFsPsVY.pPF.SGsPssaPcpF.s.Ycssssshcuas .............................................K-IPTPYMWSYQPQhGlAAGAuQDYSo+MNWL........SAGPpM..ISRVNslRsp.RNpILLcQAAlTsTPRspLNP.sWPusLVYQEhPs...PTTVlLPRDAtAEVpM..........TNuGsQLAGGuphs.hts.......................................ttuIKpLhIRGRG.hQLNDEh.............VSSuhGL..RPDGlF..QLuGuGR....SSFTP.pQAhLTLpoSSS..pPRSGGIGTlQFVEEFlPSV.YFNPF.SGSPGpYPDpFIPNFDAls-uVDGYD..................... 0 0 0 0 +1121 PF02459 Adeno_terminal Adenoviral DNA terminal protein Bateman A anon Pfam-B_1602 (release 5.4) Family This protein is covalently attached to the terminii of replicating DNA in vivo [1]. 20.60 20.60 21.20 243.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 548 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -12.75 0.70 -6.34 17 138 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 99 0 0 157 3 552.20 65 87.77 CHANGED MLhsLuPpsPsTtphPhhthPPPHLLlGYQYlhRshNDYlFDsRsYSpLpYpphhp.t...pplsWoshssCSYoINTGuYpRFl.Dh-....NFp-TlsplQpAlLh-RllADLulht...hRGhGhs.h......tttpls.......ptl.pcphpplsthpspuhGhupchRlp..........pAup+DhslLssIR+L+sAhhpFLlspth..........tstcshLsLP.....s-s.sWLsuFlccFuc....php............tphsspphh+sllosLoLsps..........tusshsGGsFp...........LRPRE.sGRAVTEo.MRRpRGchlcRFlDRLPlppRRRRh..ssP.P..s.ptt.tttth....-Ep.p.............tFt-EVhsolsEsIchLp-ELTssARpppFFsFAscFYcll.chcsh..uclsEthLRRWlhYFFlsEHIAoTL.YLappLphpt.FtRaVplphsQVlhRuRDtsGp.laoRVWsEpupsA..FppLhpRIhpDlhshlERAGct-..hpp-Eh-pFhs-Isap-sSGDlpEIL+QlslN-s-IDSl-lSFRhKhoG.VsaSspcpIps.sRRVlptA..LRppc.ps ..................MLEDLAPGAPATpRWPLYRpPPPHFLVGYQYLVRTCNDYlFDoRAYSRL+YoEllpPG..hQTVNWSlMANCoYTINTGAYHRFV..DhD....DFQsTLTQlQQAILAERVVADLALlQP..hRGaGlTRM......tt-ppls.........................lERLMpDYYKsLuRCQspAWGMA-RLRIQ..........QAGPKDlVLLATIRRLKsAYFNaIlSshsu......................ppst.tpTsLSLP...............CDC.DWLDAFlERFSDP..VDLpsl.........................htssPTtQLI+CIVSALSLPNG.s................s...phpchpGGVFp...........LRPRE.sGRAVTET.MRRRRGEhIERFlDRLPVRRRRRRs...PPP....Ps......sP.E..pt...........t.E..............hh.......E.......EEEEEt......................................ssFEREVRsTlAELIRLLE-ELTVSAR..NuQFFNFAVDFYEAMERLEAl..GDloEhsLRRWIMYFFVsEHIATTLNYLFQRLRNYAVFsRHVELNLAQVVMRARDs-GsVVYSRVWNEsGhsA..FSQLMuRISNDLAATVERAGRGD..LQEEEIEQFMuEIAYQDNSGDVQEILRQAAVNDsEIDSVELSFRFKlTGPVsFTQRRQIQDlNRRVVAHAStLRAQ+p......... 0 0 0 0 +1122 PF03228 Adeno_VII Adenoviral core protein VII Bateman A anon Pfam-B_3049 (release 6.5) Family The function of this protein is unknown. It has a conserved amino terminus of 50 residues followed by a positively charged tail, suggesting it may interact with nucleic acid. The major core protein of the adenovirus, protein VII, was found to be associated with viral DNA throughout infection. The precursor to protein VII were shown to be in vivo and in vitro acceptors of ADP-ribose. The ADP-ribosylated core proteins were assembled into mature virus particles. ADP-ribosylation of adenovirus core proteins may have a role in virus decapsidation. 21.20 21.20 23.80 23.80 20.80 18.30 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.78 0.71 -3.51 14 129 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 102 0 0 107 0 116.10 57 66.84 CHANGED MuILISPSNNTGWG.LGs.ppMaGGA++hScpHPV+VRsHaRAsWGuhp....GRssh..h..th....t.hp...ssthssTsDsVhcs...lsAsuRtht+p+RRhc....h.sRR+thttsotAhRt........ARu ..MSILISPSNNTGWG..Lss.....Sp.....MY.....GGA++RSsQHPVRVRGHaRAP..WGAhKtt..........ttsRTT..VDDVIDpV..VADARNYT...s..ssSTVDuVIDS...VVADARsYARRKpRpR....RhARR+ps...TsAMRAAR.Alh................................................................... 0 0 0 0 +1123 PF04439 Adenyl_transf Streptomycin adenylyltransferase Kerrison ND anon DOMO:DM04121; Family Also known as Aminoglycoside 6- adenylyltransferase (EC:2.7.7.-), this protein confers resistance to aminoglycoside antibiotics. 20.60 20.60 20.60 20.90 19.60 20.30 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.90 0.70 -5.40 6 625 2012-10-02 22:47:23 2003-04-07 12:59:11 7 5 514 1 75 442 9 256.10 36 96.18 CHANGED MRoEpEMhDllLchA.pcEclRlVsLpGSRTNpNlsKDcFQDYDIsYhVoDl-sFIp-csWLppFGchIhhQcPEDh-h...Fss-hsptauYlMLF-DGNKlDLTLhshc-hpcah.......-sDsLhKlLlDKDshl.p.lsPsDppYalK+Pop+EFpcCCNEFWhVosYVsKGlsRcEIlaAhDHhppIlRsE.LL+MluWaIu.p+GaplShGKNhKahc+YLsschWpch.uTashsuYcchWpSLFhsppLF+phupcVupthsYsYP.-h-.csIpcYhcshhpp ......................................MRoEpEhhplllphA.p-c+IRsVhhpGSRsNs.......ps.+DpFQDY..DIsYhV..p..-..hp...s.a...hpst........s........WlppF.Gp.h.lhhQpPcc.hph.......hs...s..p....p...p...t..hsYL..MhF.p.D.G.s+IDLTLhPlcplcpah..................................pt......Ds.....LhplLlDKDshh.....t....s.s..o..-.psY.hl.pp...P..op.p-Fp.csCNEF.WasosYVsKGLhRcElhaAhshhpths+pp.Ll+hls.Wpl.uh.cpsa.p.l.s.lGKphKalppY.l.st-h..hcphhpoashssh-phWpuLhhhtpLF+phupclA.pp..htapYs.phs.cpltpYhpph...h......................................................... 2 29 51 60 +1124 PF01928 CYTH Adenylate_cyc_2; CYTH domain Aravind L anon Aravind L Domain These sequences are functionally identified as members of the adenylate cyclase family, which catalyses the conversion of ATP to 3',5'-cyclic AMP and pyrophosphate. Six distinct non-homologous classes of AC have been identified. The structure of three classes of adenylyl cyclases have been solved [3]. 23.10 23.10 23.10 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.36 0.71 -4.61 51 3690 2012-10-01 23:11:28 2003-04-07 12:59:11 16 10 3229 34 930 2667 248 182.60 21 64.24 CHANGED hhElEhKhhl....t..t......htphpspthshhpppshYasss.....shphttpp.tsLRlR............................sptth.lThKsst..t......phpsths..........................h.t..hhtlhsths.....................hp.h..sslpppRptapl....tt........stlslDtschl............hs.hElEhpspstpph.phhcp...........t..t.hth.thsthtRsYhphhtt ...............................................hEIEhKhhls.........tthpt.l...............htph.....t.....h.p....t.t...s.....h.....p......t.sh....Y.a.-Ts........c....L...t.t.p...c...huL...RlR...........................h.....s..st....th...hTlKs..ss.......................hsshhp..t....Eh.ph.ls...................................................hst..hht.h.htths.........................lp.s..l...hs..hshh.R....pphhl...............st...................hplslD....p....schh..........................sh.s...-hEL..El....p..s...s..s....h....p...t....h..h..p.hhpp..........h.......t....h......ts.....schtRhh..h...st..................................................................... 0 244 546 742 +1125 PF01295 Adenylate_cycl Adenylate_cycla; Adenylate cyclase, class-I Finn RD, Bateman A anon Prosite Family \N 21.80 21.80 25.30 25.30 21.30 20.70 hmmbuild -o /dev/null HMM SEED 605 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.99 0.70 -6.25 18 1028 2009-10-20 15:17:26 2003-04-07 12:59:11 13 6 915 0 113 599 28 520.90 64 68.95 CHANGED -WlDhGGhuplsApEYFGASLWQLYKGIDoPYKuVLKhLLLEAYStEYPNTcLluhphKp+hh........sschs.paphDsYhhMLE+VTpYLpplsDhpRLDhlRRCFYLKssEslu...p..tsssWRpphLscLlspWsWocpplppLDpRpsWKIcpVcps+spLl-tLMtSYRNLIpFAR+pplssSIsPpDIuILoRKLYuAFEsLPGKVsLlNPpIS.sLSEssLTFIpVpp.s+t...psGWYLhNQuPcstthsspphlEasc.LsKLVAWuYFNGLlTspTpLHlhspsscl..spLppFlsDL+toFPlp.sspsospsLppPCEIRpLslhlNLppDPTp+h..pslchchpsoDlFSFGpppcsLVGSIDllYRNsWNElRTLHFcGspAlLcALKsl.sKhHpsussPcSlcVFCYSp+hRuplcshVtsLlpcCIplpLus..ppptpphpsL+luGcsatlFFEc+GlSlQcLpsshsh..............aspIoppKht..sh...ttppppphPt.IDuFASEGhlQFFFEssc..puFNVYILDEsNclElYppCsGsK--hl+clN+hYsssp.cptpsstphl..NFNhPQFYpllps......sstlpllPFpuptptp ........................................EWLDLGGLSoLSAEEYFGASLWQLYKSIDSPYKAVLKT.LLLEAY........SW....E.YPNs.+LLAcDlKQRLa........sGEls...saGLDPYshMLERVTcYLstIp..D....TRL.DLVRRCFYLKVsEKLS....p.p.p.u.ssuWRRtlL.spLVpEWsWsc.sp.LshLDNRusW.KI-QV+csHscLLDAhMQSYRNLI+.FA...R.RN..slos.S.hSP...QDIGlLoRKLYA.AFEsLPGKVTLlNPQISPDLSEssLTF..I..VPs...G..Rs.....N+sG...WYLY..N..puP...........shc..sI..l.u.a..pPLEYNRYLsKLVAWAaFNGLLT.upT+Lal+ussh.s...sKLpchVuDl.pppF.P.LR....ls.sPT..sc...A.....LhoPCEI.RcLAllVNLEhDP.Tutapsp.sl+hDh+clDlFS.FGppQpCLVGSlDLlYRNSWNEVRTLHF.sGEpuhl-ALK.TlL.....G.KMHQDA...s...P.P..-.SV-VFCYSp+LRGlIRsRVpQLVuECI-LRLus..pppcssRFKALRVuGQsWGLFF.ERhs.VSVQKLENul-F.........Ysu.ISp.NKL+.Ghslpl....cssps+.....L.P..sVVDuFA.SE.......GlIQFFFE....-o....t..p.p.GFNIYILDEuNRlEVYHcC-GoK-ELV+-VsRFYoSu+..Dc.s.hu..ushI....NFN.LP.QFYQIVps......-G..ctpVlPFRsp...p.h.......................................................................................................................... 0 16 40 81 +1126 PF00709 Adenylsucc_synt Adenylosuccinate synthetase Bateman A, Griffiths-Jones SR anon Pfam-B_690 (release 2.1) Domain \N 19.90 19.90 21.30 20.20 19.00 19.70 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.50 0.70 -5.82 92 5395 2012-10-05 12:31:08 2003-04-07 12:59:11 16 13 4698 55 1514 3933 4438 387.40 46 96.96 CHANGED sslllGsQWGDEGKGKlVDhLu.......pcsDhVVRapGGsNAGHTlVss....s.c+atLHLlP..SGllpsss.hslIG.NGVVlcP.sLlc.Elcp.Lp.ppG.lsh..scLhISccAHllhPaHptlDthpE....t..pupt....pIGTTt+GIGPsYpDKsuRp.G.lRlsDL..h.c......chhpc+Lcph..........lp.p.Nthh.ppha.........t....p.h.shcplhpchhphsc............................................plp....sh..ls.Dsshhlpculcp.sKplLaEGAQGshL.DlDaGTYPaVTSSsssuuGsssGsGluPppls..pVlGVsKAY.oTRVGsGPFPTE..lh........s-......hG-plp.............ct..............................................G...p....EaGsTTGRsRRsGWhDhVhl+Yus.plNG.hsslslTKLDVL....sshcclKlCsuYch....sGc....................hlc.thPss.httlpcscPlYEphsGW.p....pchssscp..a--LPtsAppYlctlEchl.uls...............lshlSsGPc.RcphIh ....................................................................................................................................................................................s.lVVlGsQW.GDEGKGKIsDh..Ls................ppuchVsRa.Q...GGpNA..GHT..lVl................s............G..pcahLHL.lP..S.GIh.pts......h....hslIG........NGV..VlsPtsLhc..Elpt...Lc..p..p.......G.......lss..........ppLhI...ScpA...p...............l...IhPYHhtlDphpE................pt..+Gsp.........pIGTTt+GI.......GPu.Y.....t.....DKs.....u.............Rh...G.....lRluDL................h..-t.....-thtc+Lcps...............lph+..Nhhh...pha.......................................tt.......psl...sh.cclh..c...c.hh..t...hu.c............................................tlp.....sh...ls..DsshhLs.......p.Ah..........c.p....G...c.plLFEGAQG.............shL..DID.a.........GT.Y.P....aVTSSNssAG.Gs...ss.G.u.G.lGPptls....pVlGlsKAY.oTRV......G.........s......G..PF..PTE..Lh...DE...........................hG-.tlp..............ch..............................................................G....p....E.........a.Gs.TTGR.R..RsGWhD...sVslRhuh.p.......l......sG.l......os..lsLTKLDVL........s..Gl.c...plKlCluYch........-Gc.......................pls...hP..hs...hpth..ppscPlYEphP.GW...p....Eshtus+s.....hc-.........LP.psAppYlc..................RlEEls.Gl............................lshlSsGPcRppTh........................................................... 0 480 920 1248 +1127 PF00106 adh_short short chain dehydrogenase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family contains a wide variety of dehydrogenases. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.81 0.71 -4.33 230 80936 2012-10-10 17:06:42 2003-04-07 12:59:11 20 600 6093 916 30667 110825 42278 163.30 25 56.70 CHANGED GTlLlTGGoGuLGttlARaLspp.Gsc+LlLsSRp.............u.tusuuscL.....hs-Lp...thG..ApVplsu.sD...luDc.sslssllssls...t.tPlsuVlHsAGl...lc..Duslssh.....os-chs..pVhpsK....ssuAhpLcclstc....hs......LstFVlaSSsAulhGusGQusYAAANuaLDuLAcpR+upGL ...................................................................................ssll...T...G......u.........u.....p.........G.....I......G....t........u.......h.............A...........p.....t.......h....u........p........p.....u............u......................h.........l.........l....h...s....s.ps..............................................pt..h.pph....................hp.p.lp.............t....G......s........p.........s.........h..........s................h..........t......h..........D.................l....s......s........t.......p...........s.......l.........p..............p.........h.......h........p......p............h...........h...........t............p..........h..........G..........p............l.................D.............l.....L..........l...........N.....N.......A.........G....l.....................tt...............s........s.......h......t......c.h..........................s....c....p.......a....c............p...h.....h..........s..l..N.................l.h...u....s....h....h....h..s....p....t.hhshh.................................htps........tG.p..I..l..s..l........u...........S............h............s............u............h............h.............s..............................s............t..............................s............s............Y..........s.......A....o..K....tu....l..h.u.hscslsh-..t...................................................................................................... 2 8711 17860 25336 +1128 PF04619 Adhesin_Dr Dr-family adhesin Kerrison ND anon DOMO:DM04566; Family This family of adhesins bind to the Dr blood group antigen component of decay-accelerating factor. This mediates adherence of uropathogenic Escherichia coli to the urinary tract. This family contains both fimbriated and afimbriated adherence structures [1]. This protein also confers the phenotype of mannose-resistant hemagglutination, which can be inhibited by chloramphenicol. The N terminal portion of the protein is though to be responsible for chloramphenicol sensitivity [2]. 20.60 20.60 20.70 22.00 20.50 19.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.76 0.71 -4.46 6 61 2012-10-02 17:35:21 2003-04-07 12:59:11 7 2 5 49 0 67 0 136.70 49 86.71 CHANGED uFTsSGoTGTscLTVTEECpVpVsspssoKpRu-LsDuAhlGslolsApGCsTcp.sAL+AsusNYcssp.hhLhp-stps+lsV....sltAsDGs.sWTsDsushYRsssGsWsGolhllVcGDQsspPsGsYTLNL-GGYWsp ..............FTsSGoTGTscLTVTEECpVpVssho..soKsRupLs.-ustIGslsVpApGCsscQ.lAL+AssDNa-psp.hahhp-NspDKLhV....slpusDGS.uWTsDsGVaY+s........csGsWGGplsl.hVcGDQTspPsGsYTLsLsGGYWsc..... 0 0 0 0 +1129 PF03257 Adhesin_P1 Mycoplasma adhesin P1 Mifsud W anon Pfam-B_4117 (release 6.5) Family This family corresponds to a short 100 residue region found in adhesins from Mycoplasmas. 25.00 25.00 42.90 42.90 19.10 18.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.06 0.72 -3.67 7 75 2009-09-11 06:42:52 2003-04-07 12:59:11 8 2 7 0 10 73 0 87.40 58 10.15 CHANGED pGSspptGS.oGo..SAGNPDSLcpDKlspSGpshTs.p......ouspssTNYTNLPP.s..........lTPTuDWPNALSFTNKN..NAQRsQLFLRGLLGS...IPVLVN+ ..................................ptttS...uo..SAGNPsSLcpDKlop..SGps.os.p.....shsppcsTsYTNLPs.s...........loPTuDWsNALoFTNKN..NsQRsQLhLRGLLGo...IPVLlNK........... 0 9 9 9 +1130 PF00406 ADK adenylatekinase; Adenylate kinase Finn RD anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null --hand HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.92 0.71 -4.33 23 9734 2012-10-05 12:31:08 2003-04-07 12:59:11 17 83 7181 94 2694 6272 2767 160.60 42 74.26 CHANGED llGsPGuGKGTQuppIsccaulsHlSTGDhLRApl.puGTplGcpAKphMDpGcLVPDElslsll+-cltps..ssppGFLLDGFPRTlsQA-uL-c....tshplDhVlpl-Vs--lllcRlouRhlp.ssGpsYah.apPP.c..hhsDhsup.LhpRuDDstEol++RLpsYpppTpPlI-aYppcGh ..............................................hhG.PGuGKGTp.........sth.lh...p....p....h..........t..h......s..p..l..S................sG....DhL.....R....u.........s....l......c....s..............s..........T......-.......L..........G............p..............p..................A...........K............sh...h..D......t...................G..p......L..V.......s....D.E.....l...l...........l...u...l.......l.+....-.R.....l...........s...................p...............s...........D..............s................p........p..........G.........F.......L.L.D.....G.F...PR.......T...ls.......QA.........-...u...........L.............c............c........................................................h..........u..............h.........s..............l..............D..........h..........V....l....ph..-.V.s.-..-.lll.c....R..hsGRthp.ssGtsYH.hapss....................................................................h.tshsDhsuttLh.RsDD.p.pE..T..l.+.p...Rlt..a...h.....p.........s...t...PlltaYtt...h............................................................................................................................................................................ 1 926 1587 2197 +1131 PF05191 ADK_lid Adenylate kinase, active site lid Finn RD anon Manual Domain Comparisons of adenylate kinases have revealed a particular divergence in the active site lid. In some organisms, particularly the Gram-positive bacteria, residues in the lid domain have been mutated to cysteines and these cysteine residues are responsible for the binding of a zinc ion. The bound zinc ion in the lid domain, is clearly structurally homologous to Zinc-finger domains. However, it is unclear whether the adenylate kinase lid is a novel zinc-finger DNA/RNA binding domain, or that the lid bound zinc serves a purely structural function [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.93 0.72 -4.29 141 6209 2009-01-15 18:05:59 2003-04-07 12:59:11 9 17 5432 61 1176 2972 552 36.00 57 18.92 CHANGED RRlCts..CGpsYH........lhasP..PKhpGlCDhCGs..pLhpR.sD ....................RRsHhs..SGR.oYH........shaNP.........PK..........hEG.........K............D.Dl.TGE...-Ll.RcD.............. 0 385 706 964 +1132 PF05221 AdoHcyase S-adenosyl-L-homocysteine hydrolase Finn RD anon Pfam-B_157 (release 2.1) Domain \N 25.00 25.00 25.20 27.90 24.40 24.20 hmmbuild -o /dev/null --hand HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.67 0.70 -5.45 9 2525 2012-10-02 14:31:05 2003-04-07 12:59:11 12 10 2027 95 1084 2299 4318 421.00 48 95.54 CHANGED tcYKVtDIuLAsFGRKEl-lAEsEMPGLMshRccYGsSQPLKGA+IuGCLHMTlQTAVLIETLsALGA-VRWuSCNIFSTQDaAAAAIA.........tuGlPVFAWKGET.EEYhWCl-pslp.asDG..tssNhILDDGGDhThLlpc.........................................KasphhtsIhGlSEETTTGVH+LYch.ppGpLhhPAINVNDSVTKSKFDNlYGCR-SLlDGl+RATDVMlAGKlAVVsGYGDVGKGCAuuL+GhGARVlVTElDPIsALQAuMEGapVsslE-lspcucl..hlsTsssssIlsscch.hhppsslssshtah-h-.psthhphsupcphsIKPQVDcahh.sGp+IILLAcGRLlNLGCATGHPSFVMSsSFTNQVLAQlELWspt..............ucY....cssVYlLPKpLDEcVAtLHLupLss+LTKLoccQAcYlGlPlpGPFKsDHYR ...........................................................................................................................capVtDl.u.LA..saGR+EIclAEpEMPGLM.u.lRccaus..pp.PL+GARIsG.s.LHMTlQTA...................VLIETLhALGA.-.V...R...WuSCNIa.STQDcA...AAA.l.A.............t.s.G.l.P.V..FAWK...GEo.-EYahshcpsl..a...........s.s...................ht.s.NhILDDGGDh.Thhl+..p.................................................ca..sch..hp.p...l............+G..loEETT......TGV.+.RLYphtcp....G...pL.hhPAI.NVNDSVTKSKFDNhYGCRcSLlDGIpRuTDlhlAGKhsVVsGYGDVGKGsAtuL+uhGApVhVTElDPIsALQAsM-GacVssh--ssppuD................................................................hhssssssphhhtphhthhptsshssssthtp.p..hthhthpshphhph+splcchhhssG+plIlLuEGRLlNLGsAoGHPSF.VMSsS.FsNQ...s..LAQI.ELapp.t.........................................s.pY......c...p.VY..h.LPKcL...DEcVApLHLpplGscLTpLop-QAcYlGlshpGPaKsphYR........................................................................................................................................................................................................................ 0 369 670 896 +1133 PF00670 AdoHcyase_NAD AdoHcyase; S-adenosyl-L-homocysteine hydrolase, NAD binding domain Bateman A, Griffiths-Jones SR, Finn RD anon Pfam-B_157 (release 2.1) Domain \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.03 0.71 -4.40 16 2557 2012-10-10 17:06:42 2003-04-07 12:59:11 16 15 2063 95 1090 4107 3855 158.70 52 35.69 CHANGED NLYGCRESLlDGI+RATDVMIuGKlAVVsGYGDVGKGCAtuL+uhGARVlVTElDPIsALQAuMEGapVsTlE-ssccucIFVTTTGN+DIIps-HhppMKcsAIVCNIGHFDsEIDVshLpss.uhcp.slKPQVD+aphss.G++IILLAEGRLlNLGCATG ..............................NhYGCRcSLl..DGI...p....R..u..T.D.l...h...lAG...K.....s.....sV.Vs.G.Y.G.D.............VGKGsAt.uL+.uhG.A...p...V.h.....V..T.....E...l..D.......P....I...s........A......L....Q....A.........s...M....-......G.......a....c.....V.....s.............s.............h.......-............-.........s............s...p...............p........u.....D...I.....a.....V....T....s.....T....G....N....p...c....lI...st.-.Hh.c.t..MKc.pA..IV..s.N.....I...G..H...FD..sEI.D.....l..s.s.L.cph....th..........p.h.pp.l.K.P......Q......VD...c.........h..........h........h........s......s........G.............+....p........llLLAE..GRLVNLGsATG........................................................................... 0 375 679 903 +1134 PF03747 ADP_ribosyl_GH ADP-ribosyl_GH; ADP-ribosylglycohydrolase Bateman A anon COG1397 Family This family includes enzymes that ADP-ribosylations, for example ADP-ribosylarginine hydrolase EC:3.2.2.19 cleaves ADP-ribose-L-arginine [1]. The family also includes dinitrogenase reductase activating glycohydrolase [2]. Most surprisingly the family also includes jellyfish crystallins [2], these proteins appear to have lost the presumed active site residues. 26.10 26.10 26.20 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.93 0.70 -4.63 176 3139 2009-01-15 18:05:59 2003-04-07 12:59:11 9 53 1913 22 1016 2670 767 273.20 22 79.26 CHANGED GullGtAlGDALG....h.shEh...hshs..plp.t.ht............................................................hh.ht..htsGph.....T....DDTphslslsculhp..........................................th.....s..ps.........hhpc.hht...........Whp...............................t.shshG.....ssstpul...............pph...ht..ut......t.t........................................................................ssus.GuhMRssPl.ul..hh...........s.pph...hp.hutpsutlTHssspuhtuuhhhAhhltth.lpGp.....s...htphhttt..................................th..tppphtptlpthhphtppst.............................................htthtssshshcslshulhshhp................sss........apculhtsls....hGGDoDosuAlsGulhGAha.G.h....ps...lPpp..W .................................................................................................................................................................................................................uslhGtslGDA.hG..........h.shEh..........hshp.........plp..t.hs.h..............................................................................h.h.....h.ttu...ph.......................T.......................DDTt..hslslscuLhpt............................................tth........s...ps.................hhpp.h.t..................Whp..................................h.ttshG..................ss..sptul....................................pth.......pp.....st.............t.......................................................................tuhsN....GusM..RlsPl..uh.......hhst..............shpph.................hp.hsths.......u....ph..THsp.....s..h.u.ltuuhsh...Ahhlths....lp..Gt.........s...hpphhpth..................................................................................................................tt..pht.t.plphhhphhpptt.............................................................................hht.h..t.sss....t.....sh.-.sl.s.sAlhhhht.......................................................sss..........................hpcsl..hh.sss....h.G.G.........DoDThuA.........hsGu.....lsGAha.G.h.....ps........ls................................................................................................. 0 390 677 884 +1135 PF04587 ADP_PFK_GK ADP-specific Phosphofructokinase/Glucokinase conserved region Waterfield DI, Finn RD anon Pfam-B_4731 (release 7.5) Family In archaea a novel type of glycolytic pathway exists that is deviant from the classical Embden-Meyerhof pathway. This pathway utilises two novel proteins: an ADP-dependent Glucokinase and an ADP-dependent Phosphofructokinase. This conserved region is present at the C-terminal of both these proteins. Interestingly this family contains sequences from higher eukaryotes. [1,2,3]. 20.90 20.90 24.50 23.80 20.30 18.30 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.51 0.70 -6.27 9 234 2012-10-03 06:25:16 2003-04-07 12:59:11 10 6 153 7 154 223 1 386.20 27 87.69 CHANGED slh+shulhhAYNsNlDAIh.hLpsE.....slp+hIpchGtcclh+t.EEhP+cIpp..-hlupllaul+pGKsAEl.lhs-c.l+pahtcpat..aDp.R....MGGQAGIhANlLuultshpVIsasPhLuKh.ApLF...sslhhPshEpGcLhLh..+E.acts-...sscIphIaEFp+G..FKlh..phpAP+usRFIsuuc.ps.s+l.h+--hpchLpEIuc.ps-hAIlSGhQulp.....ucstph.l+ps+EclplLpp.spsI+sHLEFAShs-cclRccll..plLshspSVGhsEsElApllplLGhc-LAccIhshsh.l.-ss.lhtuhlhL.c..s..............lchlphHTlhYhhhlT+tcs....EclccuLtFuslhAAs+AphGsIps.--l+.GLcVPhscpuphlcth.chcap.tcsh..h..........chc-YplshlPT+lVpcPhSTVGlGDTISouA.Flo.h .....................................................................................................................................tlhluhssslDhlh.hl...........................ph.lpt...hs..h..p.h..........t.t..pph.....lps.t-l.htshhahhppGtuAEhhh.s...pp.hpphht...tht.......p..p........hGGs...Aulhupthu...h..t.s....hpV...lltsshhs+...tphh.....tt.l.hhP......tp...............................schHhIhEYptG....tphu....phpuPpuNRaIhspD.pN.sthp.....hh-.a.hp.........tLpc.......hp......sDlsllSGlp.hhc........s..p.s....tchhhccl...tc.l......p.lpp...spsl..l.HhEhAShsspclhpplhp.plh.PhVsSlGhNEpELh.lh...pshstsc.s.....h...thst..l..pss...l.chh...h...hlhcphs..................................LsR.lHhHTlsYphhhshpt.........hc.hppshthuuh.stssts.pt...........ss.hts...h-.s....c...h.s..L.chs..phhs.p.h..ct........c...h.h...p....tpsh.....h...........................pppshphhhsP.shlscpPh.p..TlGlGDsISutu.hh........................................................................................................................... 0 42 64 112 +1137 PF01117 Aerolysin Aerolysin toxin Finn RD, Bateman A, Griffiths-Jones SR anon SCOP Domain This family represents the pore forming lobe of aerolysin. 24.30 24.30 24.60 24.70 23.60 24.20 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.28 0.70 -5.51 14 252 2012-10-01 20:43:00 2003-04-07 12:59:11 15 9 61 14 43 279 4 219.70 45 72.35 CHANGED ss.h-VptcllspcspF.l+PluhL.AHhLGYAWsGGspupaVG.EDhslpRs....uD.uW.lpusssGs...CsGYR.CsE+hphplsNFpaslsspshpaGssp.pc+chlpTlsuhAhNpoDpspp.hllshphspoTsW.....SKTssashu-plthcspFcaP.hhG....cT-lslphpAsQsaoso.Nusopops.shpspspVPs+Sph.lplcla+oplshPYchph.hsYDl..phsGFLRWGGNAhh.sHPs.........NRPshsaTFs..hupspptupslhYQasH+hIsGps+aWDWsWslsc..GhushpaAhu....ssl.R.atuhloGpFpAtupasusI-hupshslsup.....phsphstpspttssshpl.hsshshcpLst ......................................................................................................................................................................t.t....h..h.hhN.sp..tp....hpht..h..ptsp...p..h.....shossht..hupp..lth..p.pa.phP..LsG...........coElulEluA..s..QSWAop..pG..GSTT.pol..S...ps...R..s...TVPs+SplPV+ltLYKusISYPY-FKAclp................................................................................................................................................................................................................... 0 15 29 36 +1138 PF05110 AF-4 AF-4 proto-oncoprotein Moxon SJ anon Pfam-B_6407 (release 7.7) Family This family consists of AF4 (Proto-oncogene AF4) and FMR2 (Fragile X E mental retardation syndrome) nuclear proteins. These proteins have been linked to human diseases such as acute lymphoblastic leukaemia and mental retardation [1]. The family also contains a Drosophila AF4 protein homologue Lilliputian which contains an AT-hook domain. Lilliputian represents a novel pair-rule gene that acts in cytoskeleton regulation, segmentation and morphogenesis in Drosophila [2]. 29.90 29.90 30.20 30.70 28.90 29.80 hmmbuild -o /dev/null HMM SEED 1191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.23 0.70 -14.19 0.70 -6.69 6 483 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 78 0 208 431 0 610.70 28 90.08 CHANGED Yc-DRNlLRh+E+ERRNQEsQQ-c-AFPsshPLFuEPYK....TuKGDcLSSRIQsMLGNYDEMK-FLos+Sp.s+LsuhPpssss.SsscKscPpah.s-pRuputsuS.......pQSs+sossGPsPs.....ushSpSpKtops.....sS.+s+uussPuo..uoQc+spp........ptspp+tpupptu...-pu.p+p.SPhhuplspssss............pppuSS+..s..puusssuKpphpuKSP+-h-sshps.ppP.suhs.......uSuQhssQsF.PPSLhSKousMQQKPTAYVRPMDGQDQAPsESPELKsS...tEsYusQSapss.s-hKssA..KAKLoKL+IPSQslEsshSs-ssCVEEILKEMTHSWPPPLTAIHTPu+sEPSKFPFPTK-SQalosuoppQKphDsss+opsssppsp.SMLEDDLKLSSSEDSDsEQss..............-KsssRssP...sssspspsEsussu+uuSu.ScSsSESSSuSDSESESSSSDS...EsNEP.csuoPEPEPPoTNKWQLDNWLsKVssppss........PsusssopsPppps.cscG+spususp..s-s+-s..pSos+sttcsRsspKuPcsG+s+.QKSPspu....................-..sstRRolGKKQPKKsEKssus-t.psuh+sEpEsu..shtpusphssc+sKspTKGsp+sus+KEPKsulsss................uEK+KaKuso+sssKS+EhlE..TDSSoScSspc.......cuLP.......ssopSsGsscSspt.........RTsss.pushspssht+pp.....hhlPhc-sELLSPL+Ds-shps........LhVKIDLsLLSRIPt+.P.tcus.s+sscccs.stsp+csScstSEKuSsKuKRK+Ks-p-sctsspKKs+lEccsp........osuss.uspcsSps+sSpppStsKcc-hLPsP.u...Plsspo.Kst+su.KRshsppsosusssPpusspspSussssossKp++sEuKGusopctt+ssSsssssp...................................................................hPlPsLosusoKspRsKLsFDD.spsADaaMQEAKKLKHKADAhsD+FGKAlpYL-AVLSFIECGNAhEpsu.EuKSPYsMYSETV-LI+YsM+LKsauuPsAostDKpLAVLChRCpSLLYhRMF+LKKDpAlKYS+TLsEHFK.NSSKsAQAPSPsh...........u+usGsPSPlS.PpsSPuoSVGspSuuuSu.uusu..uoVoIPQ+IHpMAASYVsITSslLpAa-lW-QAEpLo+ENKEFFu-LDplMGPLshNSS.Ms-LV+.......YoRQGLphLRpsA+ ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 20 34 93 +1139 PF03969 AFG1_ATPase AFG1-like ATPase Bateman A anon COG1485 Family This family of proteins contains a P-loop motif and are predicted to be ATPases. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.27 0.70 -5.65 8 2702 2012-10-05 12:31:08 2003-04-07 12:59:11 11 19 1983 0 969 3282 2551 309.70 33 88.96 CHANGED sTshpRY.pppLppsshhtDsAQusAlssL-cLtp...cltsushs+.t.....thhttLau+K...sttss+GLYlWGGVGRGKTaLMDsFFcuLPsppKtRsHFHcFMhcVH--LppLp..............ttsDPls.suDchss-splLCFDEFpVoDIuDAMlLupLhcALFsRGVoLVATSNhsPcsLY+sGLpRpcFLPtIshlcs+hpllsVDushDYRLRsLppA.hahhP.sttsptthsthapthh...ut.pstssL-lsuRtlps.ssssslstauFssLCpsspuspDYlsLuctFpTVhLssV.sMs.sccstA+RFIsLVDclYDpcl+LllS...AEs.hp-LYpuGch..tFEFpRshSRLhEMpS....t-alsRt .................................................................................................t.a..t.lt..th.....D.s..hQtthh.t.t.hp.hhp....................th......t.......t.........t.........................hht.t.h........h...t................t.......................t......................ts...........s.........+...............G...LYhaG.sV.G.pGKTh....L.M....D....h....F....ap..s.l......P..................h.............p..........p..........K.......h..R...h...H...F.....H...pF.M..h....c.lH.p..clpt.lp.........................................................................s..p..t..sP..l...t...l.A.c....p.h.......t....tc.......s..p..l.LCFD...E...F.......V...o....D.I...s....D...A......M....l..Lu..p.......L...h...p...t.L....F...t..p...G..l..s...lVAT.SNh..sP...-...p...L......Y.......c...s....GL.pR..t..p.F.L..P.t.....I....sh.lpp..phplhp.l..D...u......s.....h.DYR....h.........c...............s...h...p....t........s.......t......h.......ah.........P..........h..........s......t..............p..........s..........p.........t..........t...........h.p.......p.......h...........a.......t..lss.....................t..t....t.........t.....s......................s.............l...p.......l...t......t.......R...l.....ts....................t...h..s..s..........t......s...h..hhsFtpL..C............t...........s..hutt.DYltLuchac...s.lhltsVPhhs.................................c..stA...RRFlsLlDthY..-pcl+Llho..............................Atssh.cl....a............t....tth...................thcatRshSRL.EMpst............................................................................................................................ 2 276 558 798 +1140 PF01314 AFOR_C AFOR; Aldehyde ferredoxin oxidoreductase, domains 2 & 3 Finn RD, Bateman A anon Sarah Teichmann Domain Aldehyde ferredoxin oxidoreductase (AOR) catalyses the reversible oxidation of aldehydes to their corresponding carboxylic acids with their accompanying reduction of the redox protein ferredoxin. This family is composed of two structural domains that bind the tungsten cofactor via DXXGL(C/D) motifs. In addition to maintaining specific binding interactions with the cofactor, another role for domains 2 and 3 may be to regulate substrate access to AOR [1]. 19.60 19.60 19.90 21.70 19.00 19.50 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.39 0.70 -5.78 112 1171 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 639 10 535 982 251 383.30 27 62.78 CHANGED htchhpchhctlp...ssssht....shtpaGTssh.lphhs......phGshPs+Nap....puth..c............tAccluG-t.hpc.......phh.hppt....u......ChuCslsCu+.hschp.................................u.h..............................stsPEY..EshhuhGuss.....................GlsDlculhphNcLsschGlDoIShGssluhsMEhhE+Gh...ltct.h........shtlp.aGss-ullchlcclAhRcG.lGchLApGstchucch..G.s.-...hu..h.pV......K....Gh.EhPuaDPRuhp...G...huLuYATusRGusHl..cu........a.shs.-hh.......s.................thD.h.............th...........csKs.phlthhpchpul.....hDShslCtFssh.................................hshpth...............................schlsAsTGhc.hos--lhps.GERIasLcRhaNl.+t......G....hst.pcDsLPpRh..hcc.......Ph.pGs..scGp.....lsp..lcchLscYYchRGWc.ppGhPot-pLccLGL ...............................................................................................................hhtp..chl....pss.sh.....s.tphGTshh..l.hhs......ctGtlPstNhp.............psta....p..............htpthshts....htc...........phh....s+....hp....u...........C......a.uCPltCtp..hp.h...........................................................ps.Ea..-slhuhGuss....................................................s...sc.ltss..h..hs.pl....s--hGlDs..hshG.p.hhu...hhh-h...hp+...Glhpc..........................h.tl..c...h...GD..sph..hh..c.hh...c.lApR......pG...hhshLA.c.G....sht....h....Acp..a........s..sp-..........hh......h..hs................................K.....sh.th.P.....sac.s.pthp.....G.....h.l.s.hhh.sp.c.s.t.sHh.ch...............................h.hlth-lh.................G.................tthD.h.....p....................-uKh.chsth.t.h.s.sl.....hsulslCt.ash......s.............................hsh.th.h............................................schhpA...h.TG.c..hot-c.L...hh.u-RIhsLcRtash+h..h...........h.pp.pcD..hlP..phh..hcc.............................sh.s...p..Gs..tp........hsct.hct.Ls.aYp.hGWs.p.G.hPstppLpcLsl....................................................................................................................................................... 0 220 395 470 +1141 PF02730 AFOR_N Aldehyde ferredoxin oxidoreductase, N-terminal domain Finn RD, Bateman A, Marshall M anon Sarah Teichmann Domain Aldehyde ferredoxin oxidoreductase (AOR) catalyses the reversible oxidation of aldehydes to their corresponding carboxylic acids with their accompanying reduction of the redox protein ferredoxin. This domain interacts with the tungsten cofactor [1]. 20.80 20.80 29.70 34.50 20.70 20.60 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.13 0.71 -4.98 111 1162 2009-09-12 00:12:13 2003-04-07 12:59:11 10 5 646 10 533 979 183 199.10 39 32.36 CHANGED atG...+lL+VsLostphph-plstc.hh+palGG+GLus+ll.hccl.ss.........psDPLuP-.NhLlassGPLTGo..sPsuuRhslss.KS....PLTG..........slssSssGGtauspLKhAGaD..ulllcG+uccPV...YLhIc.cs........plElc-AscLW....G.pssh-TpctLpcch.......t.................spVhsIGPAGEphVtaAsIhs....-t..tR.......uAGRu.GhGAV.MGSKpLKAIsV+....Gsp ........................ahsplLRVsLos...s....slphE....chsc...hcpalGGhGhuh+lha-cl..ss.........sscP.h.s.p..NtLlFusGPLoGos.sPs.ouRhslss..hS..........PhTs..........hls-uphGG.hausphKhA..GaD..slllcG..+..u..c..pPV...a..LhIc..--...........c..Vpl....c..c.....A..s...t..LW.........G......csshtTsctlpcth.s...c......................ssVssIG.AGEN..hV...h.uslhs........sp......p+................uG.RuGsGAV..M.G.S.KNLKAIsVcGs................................ 0 222 394 469 +1142 PF02420 AFP Insect antifreeze protein repeat Bateman A anon [1] Repeat This family of extracellular proteins is involved in stopping the formation of ice crystals at low temperatures. The proteins are composed of a 12 residue repeat that forms a structural repeat. The structure of the repeats is a beta helix [1]. Each repeat contains two cys residues that form a disulphide bridge. 36.50 2.00 36.60 2.50 35.20 1.90 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.25 0.75 -6.12 0.75 -3.10 9 502 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 7 21 7 518 0 11.70 45 80.61 CHANGED TCTsSpsCspAp ..sCTsSssCspA.... 0 0 7 7 +1143 PF03756 AfsA AfsA_repeat; A-factor biosynthesis hotdog domain Yeats C, Bateman A anon Yeats C Domain The AfsA family are key enzymes in A-factor biosynthesis, which is essential for streptomycin production and resistance. This domain is distantly related to the thioester dehydratase FabZ family and therefore has a HotDog domain [2]. 21.30 21.30 21.30 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.60 0.71 -4.43 34 276 2012-10-02 20:54:35 2003-04-07 12:59:11 8 5 116 0 114 312 5 127.30 20 65.85 CHANGED hVHRtsss-VlLossppt.....sththphphspsHshhas+...ssDHhsuhLlhEuhRQuuhhlsHsth.tlshshthlhhshshsatchschss.....Pshlpspssssssp.......t...sphphplslhQsGpllusushssss .................................VtRt.pstpVllsshtph.........sth........ph.th.p.h........s..psHshhh-+.....ssc...H..hsshl.LhEusRQsuhh..hs...ptth.....hs...............t....h..lhp.s.h...s.hshpp.hs.phss........Psplpspss.ssshp..............tthph..phphhpsuphsspsphph..h................................................ 0 16 76 106 +1144 PF04671 Ag332 Erythrocyte membrane-associated giant protein antigen 332 Waterfield DI, Finn RD anon Pfam-B_4587 (release 7.5) Repeat To date many different Plasmodium antigens recognised by the hyperimmune system human sera have been cloned, sequenced and characterised. The majority contain tandemly repeated amino acid sequences which make up a considerable portion of the protein sequence. It has been suggested that these repeat-containing antigens may provide an immunological 'smokescreen' to the parasite in order to evade the human immune system. This repeat is found exclusively in the Plasmodium falciparum Ag332 protein and occupies most of its length [1]. 20.70 20.70 61.00 20.70 18.40 20.60 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.49 0.73 -6.65 0.73 -4.03 41 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 2 0 29 71 0 21.20 52 11.32 CHANGED sSlsEEllE.-GSsTE-lVppc .sSloEEIVE.-GSsTE-lVcpp. 0 29 29 29 +1145 PF01453 B_lectin Agglutinin; MMBL2; MMBL; Agglutinin; D-mannose binding lectin Bateman A, Griffiths-Jones SR, Studholme DJ anon Prodom_2511 (release 99.1) Domain These proteins include mannose-specific lectins from plants[1] as well as bacteriocins from bacteria [2]. 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.15 0.71 -4.19 128 2897 2009-09-13 13:36:34 2003-04-07 12:59:11 19 158 299 65 1397 2960 33 103.90 30 18.28 CHANGED spshlW.....ANpt.....pPl..................s.pLplpt.DGs..LVLh.s..ss..........................t....lWuo..........ssssss..sssh....spL.c.sGNLVl.h....t.............sspslWpS.......F..cpP.scohLsstph....t.hs.t.p....hsuWposs...........ssu ...........................................thlW....hANpp...............pPl.............s........upLplst...s...Gs.........LVLh...s..ts..........................sps...lWoo.......................sos..sts.....tssh..........upLh...c.sGNLVlt...s..s................ssphlW...Q.S.......F...DaP..oDTlLPt.pls.h..s.hh...t.t........hhshts..................................................... 0 123 873 1179 +1146 PF05039 Agouti agouti; Agouti protein Moxon SJ anon Pfam-B_5381 (release 7.7) Family The agouti protein regulates pigmentation in the mouse hair follicle producing a black hair with a subapical yellow band. A highly homologous protein agouti signal protein (ASIP)is present in humans and is expressed at highest levels in adipose tissue where it may play a role in energy homeostasis and possibly human pigmentation [1] [2]. 21.80 21.80 21.90 21.80 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.33 0.72 -10.99 0.72 -3.64 9 278 2012-10-01 22:06:18 2003-04-07 12:59:11 7 1 126 6 49 243 1 92.60 45 71.89 CHANGED tcu-pth.............plsshuLs.slcphstpptcpc....p+hshhc.phLs.......R.s..sPpRCltLt-SChu.tsPCCDPCAoCaCRhFpshCYCRph .................................................s....p.h..s..........pluhl..uLppc.Kphohp.EAEtp......p+.ph.pc..s..p..thphA.......tsRss....sPpsCVsp+-SChs.ssssCCDPCAoCpCRFFpohC.CRh.h..... 0 2 4 16 +1147 PF04647 AgrB Accessory gene regulator B Finn RD anon Pfam-B_5200 (release 7.5) Family The arg locus consists of two transcripts: RNAII and RNAIII. RNAII encodes four genes (agrA, B, C, and D) whose gene products assemble a quorum sensing system. AgrB and AgrD are essential for the production of the autoinducing peptide which functions as a signal for quorum sensing. AgrB is a transmembrane protein [1]. 23.00 23.00 28.30 28.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.36 0.71 -5.22 56 789 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 539 0 119 491 7 179.40 24 93.29 CHANGED pl.scphsphl....tpptslscpchthlpYGhpllltslhKhlhlllluhlhshhh.sllhhlshhhlRhauGGhHupo.hpChlholhhhlshshls.thhthshh......hhlhh.....hlhuhlllhhaAPssotp+Pltspc....hc+phKhtulhhhhlhhllslhlh.............hhphlhhulhhpsls..lhslsh..phhp .....................hhcphuphl....ppcpsl-.c...ph.hlcaGhpllhtslhphllhhhluhlh.shhh.ollshlsahhlRpautGhHApophhChl.olllalh....h....shlh.hp...h.p.lshh........hhlhl....hllulhllhha....APssocpp.Pl.stc....hh+p..h...Kh..h...ullh...h..h..lhh...l.ls.Lhlh.............hsphlhh.Glll.psls..lhPlhh.h..p............................... 0 69 107 116 +1148 PF04730 Agro_virD5 Agrobacterium VirD5 protein Mifsud W anon Pfam-B_3261 (release 7.5) Family The virD operon in Agrobacterium encodes a site-specific endonuclease, and a number of other poorly characterised products. This family represents the VirD5 protein. 25.00 25.00 628.50 219.50 17.30 20.70 hmmbuild -o /dev/null HMM SEED 774 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.53 0.70 -13.41 0.70 -6.48 5 19 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 5 0 4 20 0 432.90 50 93.94 CHANGED KLGAAERuAYEsWssusRPTWKDLIL+ARLDAIDSSAWLsDl.......................................................................................................................................uEsTSSTFcYEGlPLGEGER+AYEcWQEDAQPTWE-LVVNARlsELs+suuIssEasuLpEshEhRSDAsKRKRccsIDQ+so.SuSFpYDGMRLGuPERtAhtsWucspPPo.p-LllcuRlpuIssSsWh.sp.ssoS.FEYpGhPLGpsER.AhcpW.tsAQPsWEDLVVsARh...AELh.uu.h.sp+...p.tt-.s.ptp.Rh..hPI.tcp.sts.FlYDGhtLGAsERAAY-RWSKP-RPTWEDLILDARQAAIESDuVSsstIGKoSSSVFLYEGMSLGDAERQAYGRWRQPAQPRWQNLVVNARLAELDPSAWIPDEHDPFE-GEAsuhsSQASsANKSssuLGsQS-S+RPuhA+EssQEsTHlQsPoCSQLETRRALaFGSSGRDAspTESIAcsNRlDGVGKVKRLGoKSRRuVsATsHuVsSSscRLLS-EsGppAssSsPEKsVRSRoDNIGTYGSRKNERARLATETGKYESEHIFGFKVVHDTLRATKEGRRLERPMPAYLECKELHRQHVGTGRGRTRLVGRGWPDDASYRSDQRATLSDPVASuEGATASNGYQLNQLGYAHQLANDGLQSETPDGVsMPIQVATTSYNYTVSRDPVLSPPSKEQAPQLLHLGPRGQTEAVLARETALTGKWPTLEREQQVYREFLALYDVKKDLEAKsLGVRQKKpALVSALNRTAuLIGASPhKAQSssAEVEpusDEhDERRVYDPRDRuRDKAFsR ......................ERtAY-sWstst.PTWKDLllpARlssIDussW..phtEsosSsFhYEGlPLGEGERpAYcpWtEsAQPoWEcLVlsARhtpLstst.IssEhssLpps.phRu-u.K+phpp.hDpcps.stuF.aDGM+LGpPE+tAYt............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 3 4 4 +1149 PF00578 AhpC-TSA AhpC/TSA family Bateman A anon MRC-LMB Genome group Domain This family contains proteins related to alkyl hydroperoxide reductase (AhpC) and thiol specific antioxidant (TSA). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.35 0.71 -4.44 124 16547 2012-10-03 14:45:55 2003-04-07 12:59:11 16 104 4711 453 5334 26489 14345 127.90 25 62.99 CHANGED lGppA.P..-..Fplt.......st..p...s...pp.lsL.s-h...pG..phllLhF...ast.sasssCs.pEhsshpchh.pcap.pt.ss.pllulSs.Dsht.........stppahcphp.................lsaPlluD....sppluctYGlh............................hshtssalIDtc....Ghlphh ...................................................up.s.P..s...F.p.ht...............st....p.........h......pp...l.s..l....s-h................pG.......+h.s.l...l...h....F..................a.....Pt...s.....aT.......s....s...............Cs.....s....E..h..s...s....h...p......c....h...h......p.....c...a......p......p..........t......s.......s......p.....l......l..u...l......S......s.....D...s..ht.............................sp..p...s.a...t.p.ptp..................................................ls..a..s...l....l......u...D.............s...p.....p......l....s....c...t...a..s.l.h..t.................................................................h.s.hR...s....o....F.l...l.Ds.c...Ghlp..h.......................................................................................................... 1 1910 3591 4647 +1150 PF02626 AHS2 DUF183; Allophanate hydrolase subunit 2 Mian N, Bateman A anon COG1984 Family This domain forms the second subunit of allophanate hydrolase. In yeast urea amidolyase (Swiss:P32528) this domain is found between Pfam:PF00289 and Pfam:PF00364. 25.00 25.00 25.10 25.90 23.60 24.70 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.44 0.70 -5.10 160 3169 2009-01-15 18:05:59 2003-04-07 12:59:11 10 21 2343 11 754 2303 1607 265.10 35 59.05 CHANGED chGlssuGAhDthAhphANtLlGNs..sssAslElshs..Gsplch.ppssh.lAlTGAshsspl..........sG...ps.....lsha.pshtlpsGshLpluts.p..pGsRu.YLAltGGl.sls.hlG.SpSThhtuslGGh........tGc.sLpsGDhLslsssssttstht.........................h..thhsthsp..pht.l.RVl.GPph..chFoppshppahsspaplosposRMGhRL..p....Gs.slp............spstslhS-ulsh.GulQlPssGpPIlLhsDpQTsGGYP+IusVlssDLs+LAQhpPG.spl+FphlshppA ............................hGls.uGAhDphAhchANhLVGNs..tssuuLEl.Tl.........t....sssl....cF....p....s....s.sh.lAlTG.As.h.p.spL.............................-s.....ps........l.....hh..pshhlctGphLpl.........sts...p......pG....hRu.YLAV.uG.G.l....s.........V.ss.h..h.G.SpSTphpuulGGh........pGR...hLpsGDhL.slspsttthttht..........................................h..h....p...h.st........sst..l..+ll.GPph..chFscpuhpsFh.p.s.s.........aplospSsRhGhRL....p.........Gt..hhp.....................tpsp-lhS.cu.hsh.GslQVPssGpPIlLhsDt.TsGGYPpIusl.hpsDL.+LAQ.htsG.spl+Fh.hshpp..................................... 0 197 420 621 +1151 PF01808 AICARFT_IMPCHas AICARFT/IMPCHase bienzyme Bashton M, Bateman A, Iyer LM, Zhang D, Aravind L anon Pfam-B_1613 (release 4.2) Family This is a family of bifunctional enzymes catalysing the last two steps in de novo purine biosynthesis. The bifunctional enzyme is found in both prokaryotes and eukaryotes. The second last step is catalysed by 5-aminoimidazole-4-carboxamide ribonucleotide formyltransferase EC:2.1.2.3 (AICARFT), this enzyme catalyses the formylation of AICAR with 10-formyl-tetrahydrofolate to yield FAICAR and tetrahydrofolate [1]. This is catalysed by a pair of C-terminal deaminase fold domains in the protein [3], where the active site is formed by the dimeric interface of two monomeric units [3]. The last step is catalysed by the N-terminal IMP (Inosine monophosphate) cyclohydrolase domain EC:3.5.4.10 (IMPCHase), cyclizing FAICAR (5-formylaminoimidazole-4-carboxamide ribonucleotide) to IMP [1]. 25.00 25.00 25.00 25.90 24.70 24.50 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.91 0.70 -5.33 9 4880 2012-10-02 00:10:39 2003-04-07 12:59:11 13 11 4399 36 1208 3788 3760 297.20 42 60.59 CHANGED AAKNHtcVsIlsDPtDYssllpchcp.pGp.lohppRpcLAhKAFpHTAtYDusIusaFpp.hu..............tt.s...shshhcp.sLRYGENPHQpu.hhhphs.tch....lssAp.LpGKthSYNNlhDADuAhphV+Eap.....PAsshlKHsNPCGlAlG.....................sslt-AYsRAhpAD.hSuFGGlIAhNccVDstTAchIpp...........FsEllIAPuYstEALclLs+K.KNlRllhhthssph...ph-h+pVtGGhLhQcpD...tshhstsphp.lVoKppss..EppLpDLlhAWpslKasKSNAlVhAKNshslGlGAGQ.SRVcus+lAh.KA .........................................AAKNats....VsllscssDYstllpclp........t.....t.....G..s.....hoh..pp.....RhpLAhKAFpHTAsYDuhIusYhsp......hs................................................t.t.h..P..p.t...h...sh.sh.....p....p.p.shRYGENPHQpAuaYhp..st..s.t.p................u.l..A.s.......A.....c...QL........p..G.Kt.LSYNNltDsDAAhch....V+-F..s............................pPAsshlKH.....sN....PC.GlAlu.......................................s.s.lhpAYp...+Aa......tsDshSuFGGIlAhNRpl.D.ttTAct...ltp.......................FhEllIAPuasp-A.LclL...st....K....K....NlRlLt......h..........s..............................s.....p...............t.................s.......t........h-.....h......+.p.V.s..G.G.lLlQ.........sp..D.................h.h.h...s.......t...p.h.........p...........lV.T..........c........+...p.................P.............o....-p................ch.....pDLh.FAW+ss.KaVKSNAI..lh...u.........+.....s...s.....h....TlG...lGAGQhsRV....tSs.+.lAht+............................................. 0 404 789 1035 +1152 PF04548 AIG1 AIG1 family Bateman A anon Pfam-B_1846 (release 7.5) Family Arabidopsis protein AIG1 appears to be involved in plant resistance to bacteria. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.37 0.70 -5.23 32 1778 2012-10-05 12:31:08 2003-04-07 12:59:11 11 41 154 28 1094 11277 2366 173.00 24 40.10 CHANGED hsllLVG+TGsGKSATGNSI.....LGccs.....FpS+hpupuVTppCpttss.p.cG...pplsVIDTPG...L.F-hp.s..s...s-.hs+EIh+ChhLupsG.HAlL....LV..hslt.RhTpE-ppslpplptlFGsplhcahIllFTpt--Ltsss..L-DYlppssschLppllp.CssRhshFsN+..ssttc+tpQlppLLshVpslhppNs..G.t.aosc.hacphpppt....p.thpppp .............................................pllllG+.oGsGKS....uh....sN....ol...........................L..G..p....p.h.......................F...p.......s....p....h......s.........s....p......s.....s........T......p.....p....s......p.......t....t...........p.......t.......p........h......p..G...................p.p..l........s.......l.l..DTPG..........h....h....cs...p...h......p......p.p...p....h..h..p...p...l.....h...p.....t...h............h........p...t...s.....u.........c.....s....h.l......lV.....h.p..hs.....+h.o...p..p.....-.p.....p..h.....l.p....h.....l.p...p...h....F......G.........p...p......h..h..c...h...s..l.l..l..h..T..p......t..c...p.....h.....t..s...tt........hp..p....h...ht......................h......t.l.....h.t....h...t.............................................................................................................................ttt................................................................................................................................................. 1 345 637 860 +1153 PF00731 AIRC AIR carboxylase Bateman A, Griffiths-Jones SR anon Pfam-B_462 (release 2.1) Domain Members of this family catalyse the decarboxylation of 1-(5-phosphoribosyl)-5-amino-4-imidazole-carboxylate (AIR). This family catalyse the sixth step of de novo purine biosynthesis. Some members of this family contain two copies of this domain. 29.30 29.30 29.80 29.70 28.80 29.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.83 0.71 -4.93 224 5292 2009-01-15 18:05:59 2003-04-07 12:59:11 15 19 4504 106 1483 3728 2423 147.40 46 73.54 CHANGED shVuIlMGScSDhssM.......ccAsclLcc.hG..............lsaEscVlSAHRTP-chhcaAcsAcp.+GlcVIIAGAGG.AAHLPGMlAuhT.sLPVIGVPVp........op.sLsGhDSLLSIVQMPsGlPVATlAIG..GApNAuLLAspILuhp....Dsp.ltp+LppaRpphpppV .................................................hVullMGSpSDas.sM.......pcAu.p.h.L.-p..hu....................ls....a....E....s....c.V...lSAHRTP-hhhpaAcpAcp...+G...h..c..V.IIAGAGG.AAHL.PGMlAuhT.slPVIGV..P.Vp..............op...s...L...sGhDSLhSIVQMPs...Gl..P..V..A..T..l..AIG.....sGAtNAuLLAspILuh..p....c.p.....lt....p+lttacpt.pp................................................... 0 517 988 1277 +1154 PF00586 AIRS AIR synthase related protein, N-terminal domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome Group Domain This family includes Hydrogen expression/formation protein HypE Swiss:P24193, AIR synthases Swiss:P08178 EC:6.3.3.1, FGAM synthase Swiss:P35852 EC:6.3.5.3 and selenide, water dikinase Swiss:P16456 EC:2.7.9.3. The N-terminal domain of AIR synthase forms the dimer interface of the protein, and is suggested as a putative ATP binding domain [1]. 21.00 11.40 21.10 11.40 20.60 11.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.29 0.72 -3.75 856 17593 2009-01-15 18:05:59 2003-04-07 12:59:11 19 63 4701 78 4854 13670 7415 103.20 23 21.07 CHANGED G.-DuAllchs....................................sttlshss.Dshsstsp...........h.sshs...hGtpulusslsDls..uhGA.cPl.shhsslsh.......t.........t.........................................l.pphlpGhscssp..phGs...sllGGco ....................................................................................tsuulhp.ls...................................tptt.ls..hssDth.s..s....psc.......................t.ps.hs....hGtpulusslsDlh................shG..A..c.P.........l...hh.hshlsh............sp.h...ssp.h...........................................h.ptllp.....Ghuptsp...ph..Gh.......sllGGcs....................................................... 0 1572 3133 4132 +1155 PF02769 AIRS_C AIR synthase related protein, C-terminal domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome Group Domain This family includes Hydrogen expression/formation protein HypE Swiss:P24193, AIR synthases Swiss:P08178 EC:6.3.3.1, FGAM synthase Swiss:P35852 EC:6.3.5.3 and selenide, water dikinase Swiss:P16456 EC:2.7.9.3. The function of the C-terminal domain of AIR synthase is unclear, but the cleft formed between N and C domains is postulated as a sulphate binding site [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.95 0.71 -4.21 169 19206 2009-01-15 18:05:59 2003-04-07 12:59:11 17 69 4747 77 5269 15059 7896 157.80 18 34.84 CHANGED psGDhllhlus........sGltusuhhtttphhttss.................hs-hhhp.phlhhhhhhhth....thlpuhpDlosGGLssslsEhs.t.sthGhplshs........................shhp.h............h-hhhs..................................tspGthhlslssppt....pphhphhp..pht......shhlGplssptththhht ...................................................................................sGshllhhus......................Ght..usu....h..s...h..h...p..p.h.h.t.tss.h................................................hscsh.h..p.......p.h..hp.....s...h..tt....hhth...............shl.....p..uhpDlo.sGGL...h.....s............slsc.....hs...t..s.....u......t......h........usp...l.chs..................................h.shh..p.h..............................t.h-...h.h.hs......................................................................................................tsp....u...t....h.....l.l.s...l...ssppt.........p..thh.t.h.hp........p..t.sh.........t.....shhIGplspptt......ht.................................................................................................................. 0 1677 3348 4466 +1156 PF04988 AKAP95 A-kinase anchoring protein 95 (AKAP95) Moxon SJ anon Pfam-B_5366 (release 7.6) Family A-kinase (or PKA)-anchoring protein AKAP95 is implicated in mitotic chromosome condensation by acting as a targeting molecule for the condensin complex. The protein contains two zinc fingers which are thought to mediate the binding of AKAP95 to DNA [1]. 24.40 24.40 24.40 26.90 24.30 24.30 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.47 9 156 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 41 0 77 134 1 157.10 53 28.51 CHANGED FsCShCKFRTFp-c-IppHhpSshH+Epl+al..ps+LsK.sh-FLpEYlsNKhKKTtpc+pphpsppthhpph...tDlhcGlu.-cFMpKVEAsHCuACDlaIPshapslQcHL+S.sHspNR+thhEQhK+sSlhsA+SILNN+hltp+LE+YlKGENPFssssc- .....................FsCSlCKFRoFp-cEIppHL-S+FHKEph+aI..uTKLPKpTs-FLQEYlsNKsKKT-pR+pphpsppsshpph..-pDlhculu..hEHFh+KVEAA...HCuACDlaIPhpasllQ+HLKSsDHN+NR+hhhEQ.K+sSLhVA+SIL...NN+hIsc+LE+YLKGENPFssps..t................ 0 3 8 28 +1157 PF02983 Pro_Al_protease AL_protease; Alpha-lytic protease prodomain Griffiths-Jones SR anon Structural domain Domain \N 20.50 20.50 21.10 21.10 19.90 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.07 0.72 -4.01 35 302 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 88 14 99 313 0 59.80 28 15.82 CHANGED polucLcuspssL.s........sush...susuWaVDspoNcVVVsucsss...uusshschuussuutss ........................ohspLcuupspL.s.............sAsh......ssou..WhVDspoNpVVVs.sssss.....susphsphstssss.s.t...................... 0 32 73 96 +1159 PF00490 ALAD Delta-aminolevulinic acid dehydratase Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.90 21.90 22.50 23.40 21.70 21.80 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.83 0.70 -5.54 165 3912 2012-10-03 05:58:16 2003-04-07 12:59:11 16 9 3698 72 1117 2899 2727 314.40 48 95.89 CHANGED hshpR.RRhRpstslRclVpE.TpLsssDLIhPlFVp-Gps...pp.IsSMPGlhRhS.l.DpLhcpsccshplGIsulhLFGl...s.pt..KDttGopA..as.ssGllpcAlRtlKcth.P-.lhllsDVsLstYTsHGHsGll......................t..s..pl.NDtTlchLs+tAlupAcAGADllAPSDMMDGRVuAIRpALDps.GapclsIMSYuAKYASuFYGPFR-AssSu..sphG.............D++oYQMDP.A.NscEAlREltLDlpEGADhlMVKPuhsYLDll+cl+cpa.s...lPlsAYpVSGEYAMlKAAAp..pGWlD.cccllhEoLhuhKRAGADhIlTYaAh-sA..c.hL .......................h..hpR.RRlRpsshhRphl.p..E..s....p.L.s.........s..sDLIhPlFVh..........-G..p.s......p..ptl..sSMPGVh.Rh......o......l..-.pLh.ccscchsp....hGItu.lhLFGl......s.pp....KDtt...GopA..as..-.Gllp+AlR.t.lKpp..h..P-.l.........hlhsDlCLstYTsHGHCGll....................csu..tV.NDtTlchLs+pAlu.....pAcAGADllAPSsMMDGplsAIRpALDp......s...G.a.p.s..ssIMuYosK..YASu...FYGPF.R.........DAssSu..sp.hG.............DRKoYQMDPu.N.p.cEAlREsthDltEGADhlMVKPuhsYLDllRcl+-...ph....p....lPlsAYQVSGEYAMlKAAAt...sGW.lD.cccllhE..oLhuhKRA..GADhIlTYaAh-sAch.......................................... 0 362 710 948 +1160 PF00171 Aldedh aldedh; Aldehyde dehydrogenase family Bateman A, Sonnhammer ELL anon Prosite Family This family of dehydrogenases act on aldehyde substrates. Members use NADP as a cofactor. The family includes the following members: The prototypical members are the aldehyde dehydrogenases Swiss:P00352 EC:1.2.1.3. Succinate-semialdehyde dehydrogenase Swiss:P25526 EC:1.2.1.16. Lactaldehyde dehydrogenase Swiss:P25553 EC:1.2.1.22. Benzaldehyde dehydrogenase Swiss:P43503 EC:1.2.1.28. Methylmalonate-semialdehyde dehydrogenase Swiss:Q02252 EC:1.2.1.27. Glyceraldehyde-3-phosphate dehydrogenase Swiss:P81406 EC:1.2.1.9. Delta-1-pyrroline-5-carboxylate dehydrogenase Swiss:P30038 EC: 1.5.1.12. Acetaldehyde dehydrogenase Swiss:P17547 EC:1.2.1.10. Glutamate-5-semialdehyde dehydrogenase Swiss:P07004 EC:1.2.1.41. This family also includes omega crystallin Swiss:P30842 an eye lens protein from squid and octopus that has little aldehyde dehydrogenase activity. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.37 0.70 -6.16 119 43773 2012-10-02 17:28:28 2003-04-07 12:59:11 17 120 5325 607 12917 36315 23835 407.20 25 80.35 CHANGED ahsups.......pp.hpshsPu.....sschlupls.tustp.Dl-pAlpuAcp.Ahp.......t...W.pphsstcRuplLt+hAchlc.p+t--lApl.sh-sGKslt...pupt.-ls.ts.....hshhcaaAsh..scc.hpG.ph......hs..s..........sthshsh+p.Pl.GVlusIsPWNaPlhhs.shKluPALAsGNolVlK.Pup.TPlou.hhluclhp-.A....GlPsGVlNll...sG..GppsGpsls.pcsplctloFTGSotsGcplhpt.Au..pp...l....K+lsLELGGKs.....shIVhsDA.D.l.-tAlpsslhut.ahssGQsChusoRllVpcu.la-c..Flcclspts.ppl.clGs...P.h-..........ssplGPllsppphc+lpphlppuhpcGAclls.GG..............tpG.halpPTlh...ss.VsschplhpEElFGPVlslhpac..s.......-EA.lphANsopYGLuuulaTpDls+uhchucpl..csGtValNsh...tsss.ts..PFGGhKpSG.hG..R-sG.ttulcpasphKsl .......................................................................................................................................................................................................h.sP.............st.p....h..t..t...h..........s...........s......t...c.l..p.t.A.l....t..s...A..pt...Aht.......................t.W...t.p..h..s...s..t.p...........R........s........p....h...L........t..........c.h........u........p............h..l..........p........p........p...........t..........c......p.....l..........u.......t........h................s........h.........-..........t........G............K..s..ht..............p.uts........-....lt....t.s.......................h.p.h.h.....c.a..h..u...th.......sc...p.....h......t.....s....ph..............................h....t...............tshh..s.h.h.h.p...p.....P..l....G....V........l....us..I............s..P...a..N....a....Ph......h...h...s...s..h.ph..u....s.A............L..s..s..G.N.s..l..l.l....K....P.u.....p....p....s.....s.....h....s....s.....h.....h....l.....s.....c.....l....h....p....c....A......................G.l.P...t...G...l....l..s.l.l............t.G.......u.....t....p.....s.....u...p...t...L........h....p..c.......s...t...l....sh..............l..............s......F.T..G....S.s..t.....s........G..c....t..l.....h........p.....t...uu........tp........h..................p....l..hhE......h........G.Gp..s.....s.h.l.....V......h.....s........-......A.......D......l.......-.....t.....A...l...p.......s.h...h..t..........u...t.....h.......h..s....s...GQ.......h.C.......s...u..s.p...p.........l.l.V..p.........c.........s.........l.........h......-.p.........hhpt...l.........h.......t.t.......h.....p......p......h.......p....h......G.s......s...hp........................ts...t...h..G.s.....l...l......s.t.t........t.h..p....p..l.............t.h...l...p...t...u...h....t......p.....G......u...p.....l...h.h....uG.............................tt........G....h.a...ht.P....T.....lh...................t...t....s.t............p......h.....t...l....h.....p............c.....E..........h..F.....G.....P.....lhs..........l.h.t.hp.....s...............................c...-..s.....l....t....h....s.....N....s....s....t...............a....G..L.....s.........u...s.........l.......a.....o......p......s........h......t......p......s..........h......p.....................h...t.......p...t.l..........cs....G.........h.....l..............h........l.......Nt..........................s................h................s........FG.................G....h........t..t..S...........G....h.G....pt.u....s.ht.a.p.p................................................................................................................................................................ 0 3478 7376 10497 +1161 PF01081 Aldolase KDPG and KHG aldolase Finn RD, Bateman A anon Prosite Family This family includes the following members: 4-hydroxy-2-oxoglutarate aldolase (KHG-aldolase) Phospho-2-dehydro-3-deoxygluconate aldolase (KDPG-aldolase) 22.20 22.20 22.30 23.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.02 0.71 -5.14 5 4204 2012-10-03 05:58:16 2003-04-07 12:59:11 14 12 2642 49 825 2763 739 192.20 33 90.12 CHANGED I-ulL+cA+llPVIslcct-DAlPlA-ALlAGGlRVLEVTLRTpsAlDAI+hlt+pRP-hlVGAGTVLsApQlupApcAGAQFlVSPGLTsDLlKtus-schPLlPGVuTPSEIMlGL-hGlccFKFFPAEssGGVsAlKAlAGPFuQVRFCPTGGIsPuNlRDYLAlPullClGGSWlVsuuhlpssDas+IspL ..........................................h....hppt.sllsVlts.ps.h.-cA.lshucAL.lpGG.lp.s.lElTh......p......o......s.....s......A....h......c........u....I....c....t...l....t.................c........p.....h.....s....c.........s.l.l..GAGTVLssp..psctsh.pAGA...pFlVSP.u.h.st...pllctsp.p.t....slP......hlPGssTsoElhtAh.-hGh.chlKhF..PA..p..sh..G.....ss..hl+Alt..u..P..h.s..p..l..p.h.h.P.TGGlo..s..s..N..htcalshs.s.lss.s.hG.S.hL.ss.ts....t.t.tpattlt.................................. 0 228 513 683 +1162 PF00596 Aldolase_II Aldolase_class_II; Class II Aldolase and Adducin N-terminal domain Bateman A anon MRC-LMB Genome Group Domain This family includes class II aldolases and adducins which have not been ascribed any enzymatic function. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.46 0.71 -4.68 502 9699 2009-01-15 18:05:59 2003-04-07 12:59:11 16 32 3654 97 2299 6508 3435 184.40 24 69.68 CHANGED pc.l....stss+hhs.phGh.sp.ussGslSs..+..............................s...............tsthl.lsPo..Ghphsplp...s.scllhl.....c...h........c.G..phl.....................t.................t...pso............u-hshHttlapt.....ps.-spullHsHoshusuhu.h...................tp.l....s.....hstptst.h...........splsh.............h..hp.......G..tl....................u..pplspsht.......t...........................s.p...ulllpsH..GllshG......p.oltcAhhhhhhl....Epsschtl ............................................................................plhpsstths..phGh..s........p..hstGNl..Ss...p........................................t............psthl.I....pPo...Gh...t.ap...pls.......s..schl..hl...c...h........s.G.....chl....................................pt.......................thpPS......u-.hsh.HhtlYpt...............hs.ch.....tullHsHos..tusuhu.h.................................spsl...s...............hshptst..hhh......................sslsh..................h..h.hs........u.p..pcl..........................ups.ls..csht.........................................ptt..ulLlpsH....Ghhsh.G........c.s.hpcAht.hh.hlEpsAph..h...................................................... 0 668 1344 1864 +1163 PF03752 ALF DUF312; Short repeats of unknown function Yeats C anon Yeats C Repeat This set of repeats is found in a small family of secreted proteins of no known function, though they are possibly involved in signal transduction. ALF stands for Alanine-rich (AL) - conserved Phenylalanine (F). 20.80 20.80 21.00 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.82 0.72 -4.10 42 301 2009-01-15 18:05:59 2003-04-07 12:59:11 8 24 42 0 140 347 0 41.90 30 20.81 CHANGED Rsslsplhssu..GssVppAAptAL....sGsspslcsFLpsG.htAt ...............Rhtssplhssu....GssV+pAAptAL....sGo.spslcpFLpsG.h.A........ 0 39 110 133 +1164 PF05208 ALG3 ALG3 protein Wood V, Bateman A anon Pfam-B_3416 (release 7.7) Family The formation of N-glycosidic linkages of glycoproteins involves the ordered assembly of the common Glc3Man9GlcNAc2 core-oligosaccharide on the lipid carrier dolichyl pyrophosphate. Whereas early mannosylation steps occur on the cytoplasmic side of the endoplasmic reticulum with GDP-Man as donor, the final reactions from Man5GlcNAc2-PP-Dol to Man9GlcNAc2-PP-Dol on the lumenal side use Dol-P-Man [3]. ALG3 gene encodes the Dol-P-Man:Man5GlcNAc2-PP-Dol mannosyltransferase. 21.50 21.50 21.70 22.00 20.30 20.90 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.58 0.70 -5.23 7 352 2012-10-03 03:08:05 2003-04-07 12:59:11 8 7 268 0 241 353 16 311.30 39 79.10 CHANGED lsshLlLsEshlshllIp+VPYTEIDWtAYMpplpuhLs.GppDYopLpGsTGPLVYPAuaVYIYouLYaLTstGsslhhAQhlFAslYllsLulVhhlY.ppt+lPPalhsLhsho.hRlHSIaVLRLFNDshAhLhLahul......hhhcp+.......WthGuhhaSlAVuVKMNlLLasPAl.LlLhLtshu.hltTl.pLsshullQlllGlPFLhpaPspYLppuFDLGRhF.a+WTVNaRFlscclF.s+pFplsLLhhHLhLLlsFsps+Ws..t......+h..plls.lhptt......hchpht........p..sssphhos...............................................hhsuNhIGlhhuRSLHYQFYsWYaaoLPYLlWpss...hshhlthllhslhEasWNsYPSTstSS .......................h...hlhhh-hhlsh.hll.pVsY.......TEIDWpsYMpp.lphhls.G...phDYsplc..GsTGPLVYPAGaVYlaphLY.alT...s.p........G................p.............s...lhhAQhlFuhlYlhsLhl..Vh.hhY....p...p.............tp.............l...PP.a...h....hh.hhs.ho.hRlHSIaVLRLFNDshAh.hhha.hul...............hh.h...p.pp...........................Wh..hushha.....Shu..VulKMNlLLahPul..hhlh.lh.t......hu..hhtsl.hl.h.l.h................shl....Q.llluhPFL...h.........p.p...P...h...........s........YlppuF-huR.FhacWTVNW+FlsEch...............Fhs+tFtlsLLh.hHlhhLhhFhhhpWh.........................p..t..t.h.ht..lh.p.........................................tst.hh...h........................................................................hhsuNhIGlhhuRSLHYQFasWahaslPaLLats....................hs..hh..lt.hhlhhh.EhsWNsaPSTshSS..................................... 0 90 136 201 +1165 PF03561 Allantoicase Allantoicase repeat Finn RD, Yeats C anon Yeats C Family This family is found in pairs in Allantoicases, forming the majority of the protein. These proteins allow the use of purines as secondary nitrogen sources in nitrogen-limiting conditions through the reaction: allantoate + H(2)0 = (-)-ureidoglycolate + urea. 21.60 21.60 30.60 23.80 19.10 20.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.10 0.71 -4.75 13 1158 2012-10-03 19:46:52 2003-04-07 12:59:11 10 13 467 6 534 1098 1048 143.30 39 75.20 CHANGED GGpVlusSDpaFuss-NLlpPucsh..........chhDGWETRR+Rp................GH.DWslI+LGss.GhIcuh-lDTsaFpGNasstlSl-Ashhpsps..........................scsspWsplLstpcht...PcpcHhat...lsspp.aTHlRLsIaPDGGluRlRlaG .......................................................................GutslssoDpaF.u.s.spsl.LtPstuh..........phhD..GWETRR+Rp.....................sGa..DWsll+Luts...G......h.....Ict.l-lDTuaFpGNaPstsSlpAshhss...s..s..............................................................................................................sps.ttWpp.lls.p.sp.Lp......ss...p...c..Hha........t..............l......s......s......t.....p.......s....h......THlRlsla.....P.DGGluRLRlaG.............................. 0 146 279 433 +1166 PF04864 Alliinase_C Allinase Mifsud W anon Pfam-B_4527 (release 7.6) Domain Allicin is a thiosulphinate that gives rise to dithiines, allyl sulphides and ajoenes, the three groups of active compounds in Allium species. Allicin is synthesised from sulfoxide cysteine derivatives by alliinase (EC:4.4.1.4), whose C-S lyase activity cleaves C(beta)-S(gamma) bonds. It is thought that this enzyme forms part of a primitive plant defence system. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.28 0.70 -5.80 10 180 2012-10-02 18:26:03 2003-04-07 12:59:11 8 7 33 19 77 718 304 318.40 46 79.58 CHANGED sDlssGDPhFLEcYWp++t-soAVllSGWHRMSYF..tsss.pFlSsEL-+pI+cLHclVGNAsT--RaIVFGTGoTQLlpAslaALSPs..ssssusPspVVAssPYYusa+cQTsaFcuptY+WcGsAusah....cssssssaIELVTSPNNPDGtLRcuVl...cGopu..IaDhAYYWPHYTPITtsuDEDIMLFThSKhTGHuGSRFGWALVKDccVAcKhlcYlptNohGVS+EoQLRsLKILK.llssh+sppGo....clFpFGacph+pRWcpLsclVspSs.RFSLQch.ss-YCNYFp+h+sPSPuYAWlKCEh-c-pDCtphL+.su+IhTpuGshFtussRYVRLSlI+opDDFD.hlpRLsshl ...........................................................................................................................shspGDshhhE.aWh.p.ptttuslllssWppMSYh...ss...............Flp.EL-+pI+c.LH..c.hV.GN.A.s.s.cs.+.alVh.GsGsTQLhpuh..lhu.LSPs...........ssu..sp.VVu.t..sPa..Y...s.......s.a...p.......p..........Q......T........c.......a........h........c.........p........t........h........Y.......p........W.....s....G.......s.....A....s...s.a..........s.s...s..s..s.ppa...I.E......h....V..T..SPNNP.....-G......h......l......R.....c........u.....V...l........cG...s..p....u..........Ia.....D.....hs..Y.......Y........W..........P......H.....Y........T.....P........I....s......t.........t.......A........D.........c....D.......I.....M..L.F...T...hS.Kh.TGHuGS...............RhG.WAL....l.KD...c..pVh..p..+...hh...p.Yhp..h..NT..G.sS.....+-oQLRuhKlL...+...tVsshhc.......s.............p......p..st..........cl.pFGacphccRWhplp.sllspS..c.RFSlpch.ps......pYCsaFp+hpsssP................ua..AWlKCEhE..-...s.cDChphhp.sst.I.spsG...at......s.ssc.aVRlShlcpp-sFs.hhthLt.h........................................................................................................................................ 0 14 57 67 +1167 PF04030 ALO D-arabinono-1,4-lactone oxidase Wood V, Finn RD anon Pfam-B_33547 (release 7.3); Family This domain is specific to D-arabinono-1,4-lactone oxidase EC:1.1.3.- , which is involved in the final step of the D-erythroascorbic acid biosynthesis pathway [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.81 0.70 -4.80 34 1395 2012-10-02 00:48:38 2003-04-07 12:59:11 9 15 826 5 608 1305 206 200.00 21 43.75 CHANGED lTlpslPuaslctppps.......hshppllcsh-phh..pps-ahchaWFPaocpshlhpsscssts...........sp.tpphhsphlsphhapsLhhlu.chhPslsPhlp+hshphhhu..........pptlssShcsFshsphh..p.h..EaulPhcpu.-sLpcLcshlspps.................hhlHaPlEVRhsssD......................................csaL..............SsshscsosYIssphYRshs.......hppaapthEslhpph.GGRPHWuKhaphsspp.............................lpphY.schccFhplRcchDPpshFls..sahc.+h .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................E.......a.......l..Ph.....p.......t....h.............t..............h..............l...p...pltthhptpt.........................h...hhs...l.phRhsts..s.......................................p...hL.................u..s.........t....p............s.....h.......hl.s.h...h..h.....t..s..t.................htp.hh..p....t.....h....-....p....h.........h....h.....c....a...u..G.......RP...HWu....K..........p..p..h..s.tpp...........................................................................................h..p...ph..Y....P...c.....h..pcFhtlRcc..h.DP..p..shFhs..sahcp..................................................................................................... 0 177 387 524 +1168 PF00128 Alpha-amylase alpha-amylase; Alpha amylase, catalytic domain Sonnhammer ELL, Griffiths-Jones SR anon Swissprot Domain Alpha amylase is classified as family 13 of the glycosyl hydrolases. The structure is an 8 stranded alpha/beta barrel containing the active site, interrupted by a ~70 a.a. calcium-binding domain protruding between beta strand 3 and alpha helix 3, and a carboxyl-terminal Greek key beta-barrel domain. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.03 0.70 -5.03 31 28372 2012-10-03 05:44:19 2003-04-07 12:59:11 19 300 4623 473 6746 23482 3056 261.10 18 44.76 CHANGED GDLpGIhpKL-..YLKc........LGlouIWloPlac.........Ss......u.HGYDhpDYpplsspaGTh-DhcpLlscsHp..+GlKlIlDhVsNHoSsppth...........Fpcuhpptss...hpcahh..h.h..........t.psssshht......h.stosaph......ppps.....h.hhhshsDLNhcNss.........lhpthhc....shcaWlcpG......lDGaRlDsspclsc............................sah+patpphsph.........thhhlGEhatss...sp.hhhhstsshhshs...hhsahphthtpssp.....hp.ss.s....htchpphlschhpthsp...sthtssalsNHDpsR.......hhothusst................ph+hshshlhshp.GsPhIYhGpEhGhoGsp.........DP ...............................................................................................................l.s....al.t.p........lGh.....stl.......l.....P..hhp......................................ss...........ts.h.G..Y..s........s..........h.......s.......a............h.........s.........l.........s.........s.........p.........a..............G..............s.........................p...........-...........h........c...........p...........h....l....pth.+p..........tG.......l...............p...l.l......hDh......Vh....N......H.h...u......t...p...p........h...................................................................................t........t.................t...h.......................................................................................................................h..t..............................................................................p.l..s....h....t....p............................................................hh.p...h..........h...p.......ht.a.h.h.p...h..................lc.Gh...R.....h.Dhh.t..h..h...........................................................................................................h..h.t....h...h..t....h.t...t.......................................h.hhh.u.E...h......t.............................................................................................................t.........................................t.....................................................................................................................h...........................................................................................................h.......t....s..H.D.................................................................................................................................................h.......h...h.......t.................................h........................................................................................................................................................................................................................................... 1 2088 4020 5641 +1169 PF02903 Alpha-amylase_N alpha-amylase_N; Alpha amylase, N-terminal ig-like domain Griffiths-Jones SR anon Structural domain Domain \N 21.20 21.20 21.50 21.30 21.10 21.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.77 0.71 -4.14 38 1507 2009-09-11 01:10:01 2003-04-07 12:59:11 9 20 1336 56 147 988 29 118.10 30 20.02 CHANGED Mh.htAIaH+PpssaAYsYsppplHIRLRTcKsDlp.cVtLlauDsYph..................th.hpphsMcKhuos...plaDYW.splsssh+.R..l+YsFpLps.sscphhas-cG...hapctshs...hthaFpaPalp ......................................shaHhP.s...as...h...h...h...s...c...c.....p...LpIpLps.....p.p.p.D.l.s....c......lh.L.hhtcshp................................tshs...McKht..S.s..........sh..a..-..a..W....p..ssl..s..h..s...t..R...hpYsFcLhs..c....scphaas.spG.............hs...p..h.s.hc................................................. 1 45 79 112 +1170 PF02296 Alpha_adaptin_C Alpha adaptin AP2, C-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_8859 (release 5.2) Family Alpha adaptin is a hetero tetramer which regulates clathrin-bud formation. The carboxyl-terminal appendage of the alpha subunit regulates translocation of endocytic accessory proteins to the bud site. 24.20 24.20 24.30 24.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.39 0.71 -4.12 21 349 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 230 11 231 333 4 109.80 43 12.10 CHANGED FhcPspl.oupDFFpRWKQluusspEAQ+IFt..........uspshstshs+phl.pGFthulLpsVDPNscNhVuAullpop.uGp...lGCLLRLEPN.hpspMaRLTlRuoc-sVsptLhcl ..................FhpPsch.su.p-FFpRWKQLu.....ss...t..p.E..sQcIFp.............us+sh-....tp..h.s+s.tl....hGFuhslL...cs.V..DPNPp...N...hV.G.AGllpT...c......ssp........lGCLLRLEP....N...hpupMaRLTlRooc-.sVsptLsc............... 0 80 124 184 +1171 PF02883 Alpha_adaptinC2 Adaptin C-terminal domain Bateman A, Griffiths-Jones SR, Mian N anon Pfam-B_8859 (release 5.2) Domain Alpha adaptin is a heterotetramer which regulates clathrin-bud formation. The carboxyl-terminal appendage of the alpha subunit regulates translocation of endocytic accessory proteins to the bud site. This ig-fold domain is found in alpha, beta and gamma adaptins. 20.60 20.60 20.60 20.90 20.50 20.20 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.62 0.71 -3.77 102 1431 2012-10-03 16:25:20 2003-04-07 12:59:11 15 26 301 45 862 1378 12 109.40 19 13.69 CHANGED ssshlapssslplthphph..........tpsshhplphhhsN...p.sspslsshshp..hulP.........pshplplpss.sssslss.......ssplpQhlhlts...........hph+l+lsaphs...up.th...p.t.h...hP .....................................hhlapps.slp.lthphph............tpsshhhlhlphsN.......p..sspslsshshQ....hs..ss.............................cshplp.Lpss...sssplss..........ssplpQhhhlt.s.......................t...p..hplclsap.hs.st.......p.h........................................ 1 253 405 648 +1172 PF00944 Peptidase_S3 Alpha_core; Alphavirus core protein Finn RD, Bateman A anon Pfam-B_266 (release 3.0) Domain Also known as coat protein C and capsid protein C. This makes the literature very confusing. Alphaviruses consist of a nucleoprotein core, a lipid membrane which envelopes the core, and glycoprotein spikes protruding from the lipid membrane. 20.70 20.70 20.80 20.80 20.30 20.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.86 0.71 -4.81 5 862 2012-10-02 13:45:52 2003-04-07 12:59:11 14 3 54 46 0 511 4 104.80 71 12.60 CHANGED sMKLEuD+TFPlhL.DGKVNGYAClVGGKVhKPLHVKGTIDNPsLAKLKFKKSSpYDLEaAcVPssMKSDAFpYToEKPEGFYNWHHGAVQYSNGRFTVP+GVGGKGDSGRPILDNoGKVVAIVLGGANEGuRTALSVVTWNcKGVTlKTTPEuoEpWS ...............................................................................................................GRFoVP+GVGuKGDSGRPILDNKGRVVAIVLGGsNEGSRTALSVVTWNpKGVTlKhTPEGoE.WS.......... 0 0 0 0 +1173 PF01589 Alpha_E1_glycop Alphavirus E1 glycoprotein Bateman A anon Pfam-B_587 (release 4.1) Family E1 forms a heterodimer with E2 Pfam:PF00943. The virus spikes are made up of 80 trimers of these heterodimers (sindbis virus) [2]. 25.00 25.00 25.30 27.00 18.20 24.60 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -12.88 0.70 -6.18 17 1920 2012-10-01 19:42:26 2003-04-07 12:59:11 11 5 59 57 0 941 0 247.60 48 50.15 CHANGED .tu+Ats.hsEohsYLWspNQshFWhQhshPlAulllls.hlRsh.CCh...sFLllhu........ls.ssspAYEHssslPNhVGhP.YKAlV-RsGYuPLsLplpVlpopL.PolsLEYITCcYKTVVPSPhVKCCGssECss..........ppcsDYpC+VaTGVYPFMWGGAYCFCDoENTQhSEAYV-+S-sCptD..HAtAY+sHTASLKApl+loYGsss.psl-saVNG-ossphsshKhIhGPlSoAWoPFDsKlVVYKs-VYNhDFP.YGuGpPGpFGDIQuRTss...SsDLYANTsL+LtRPuuGslHVPYTQsPSGFchWhp-+GpsLspsAPFGCpIpsNPlRA.NCAVGsIPlSlDIPDAAFTRls-uPslopLpCpVssCTaSoDFGGlAslpYpo-+sGcCuVHSHSssAsLpEuslcVhp........sGphTlHFSTASspssFhVplCus+sTCpAcCcPPKDHIVsaPspHssthhsslSsTAhoWlptlhGGsusllslullllllVshlshpR ..........................................................................................................................................................................................................FMWG.................................................tt.s...h.sasNG.pssp.tt.phhhGPhSouaoPFDp.......................................................................................................................................................................................................................................................................................... 0 0 0 0 +1174 PF00943 Alpha_E2_glycop Alphavirus E2 glycoprotein Finn RD, Bateman A anon Pfam-B_308 (release 3.0) Family E2 forms a heterodimer with E1. The virus spikes are made up of 80 trimers of these heterodimers (sindbis virus) [3]. 19.50 19.50 21.10 21.00 19.30 19.10 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.51 0.70 -5.84 23 2166 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 59 31 0 967 0 320.80 49 61.18 CHANGED PYlAhCscCutGtsCaSPlAIEpVpu-AsDGhl+IQsSuQhGls.puGstctsKhRYhts...pclcEushsslpVpTSuPCpllushGaFlLA+CPPGDolTVuFp.sssss+uCplsa+hc.p.VGRE+ashsPpHGp-lP.....CssYspssppost..l-MH..hPsshsDpoLLSpsuGsVpls.PuG+slpYcCpC.GstpsGTs.oocpplssCsts.QC+AahsspsKW.aNSshlPR.usspshKGKlHVPFsLssuTCpVPLA.pPsVpauh+ploL+LHPc+PTLLThRpLGs-PphocEWIotsssRshsVsccGlEYsWGNpsPlRlWAQ.cousGsPHGaPHEIlpaYYphYPssTlsslsusuhllhluluuSshhhspARpcCLTPYsLTPsAtlP......hsLulLCC .......PYlu+CssCtcu.sCaSPltIEcVhs-ucDGhlRIQTSu.aG.D.posptctp+hRYMss...hpltEu.hcplpVpTSuPCpllshpGYFlLs+CPPGDolTVuhh.pssshHSCTVth+Vphp.VGREpYpaPP.HGtplP.....CssYsphppsousY.lpMH..h.Gshs.oThLppppGpVhlssPuGpTVpY-CpC.uspp.GTT..upchplhhCccscQChAYlsspsKWVaNSscL.R.u.sscohKGKlHlPFsLssssChVshA.tPhVp.hh+tlhL+LpscpPTLLohRpLGt-sp.ptcWlpt.sphshoVstEGlEasWGN.sPhRhWuQ..os.GsPHGaPHEll.aYYphaPhhTlhslsshshhhhlshssohhhhspsRp.ClTPYtLsPsApVP......hhlulhCC..... 0 0 0 0 +1175 PF01563 Alpha_E3_glycop Alphavirus E3 glycoprotein Bateman A anon Pfam-B_285 (release 4.0) Family This protein is found in some alphaviruses as a virion associated spike protein [1]. 25.00 25.00 67.70 67.70 21.40 20.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.83 0.72 -4.16 25 939 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 57 13 0 592 0 56.50 63 6.95 CHANGED MClLANlTFPCspP...PsCY-+pPppTLchLEsNVDpPGY.pLLcAsl+Css..pRp+R .MClLANlTFPCsQP...PlCYsRpPcpsLshLE-NsDsPuYDELLpusV+Csu..+RtKR.... 0 0 0 0 +1176 PF03229 Alpha_GJ Alphavirus glycoprotein J Bateman A anon Pfam-B_3350 (release 6.5) Family \N 21.00 21.00 21.00 21.30 20.90 20.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.96 0.71 -3.79 5 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 8 0 3 47 0 92.70 53 76.17 CHANGED MRSLLFVVGAWVAAlVTsLoPcAALATGsTsTsGsANu....sssssAoGANATSAsAuuThAAssssPPscST.sstTPGPaPPTDFALPLVIGGLCALTLAAMGAGALLHRCCRR.AARRRQRVSYVYA ...........................hu.ls...........................................................uslAATphuPAuso....TpAPsoPaPSPhus...FAl....PLVlGGLCAlVLuAhshhtLLRRsCRt.huR...........hs............ 0 0 0 3 +1177 PF01120 Alpha_L_fucos Alpha-L-fucosidase Finn RD, Bateman A anon Prosite Family \N 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.20 0.70 -5.55 48 2021 2012-10-03 05:44:19 2003-04-07 12:59:11 12 80 870 58 664 1863 422 324.50 24 62.37 CHANGED hlllh.........hhhts...........spspapPsh-sls............ppt.pWFc-sKFGlFlHWGlYSVPuhs.....................sEW.......................hhpp..pthsstpYt........chs..........spFssp......pFDscpWsphhKpuGAKYhlhToKHH-GFshWcS.....chot.aNssc......ssPKRDlltELspAs+cp.Gl+hGh..YaS..hh.-Waps.a.htcppstht................phppa.............................................hp.hhhsQlpELlspY.ps-llWhDGsWtt.............................ssptWpt...schhshlhphpP...slllssRhtpss.tth..........sapss.EpthPsp..........l.ppsWEsChTl.ssoWGY...phpspsapospplIcpLlcsVSpGGNLLLNlGPps-GslsshtpcpLpphGpWhchNGEuIYu .............................................................................hhhh.h.............................................................ap....p.....chGhhhHaG..sh..s.............................pW.......................................t.................ta.t...................................p...........p.tF.s..s..p........paDs..cpWschh+puGhKalllTsKHH..D.GFs....La..s..o..........p.ho.......s...asshs..........pss...+...p...Dllt..ElspAs+c.......t..GlchGl....YhS.......h...D.......h....p..p....s......a....hh..st.pt.........................................h...........................................................hp.hhhsQlpE.Ll...s.......p....Y...sp...h.s....hlWhDGs.hst.................................................stpth.ph..........pchh.phl..c.p.h.pP....................ssll......s......s......+hss.s......h....................................th.p..-..p........s.t.t...................................h...h..a......E..s..sh...o.....l....p.......suWhY.......................tpspph..K..ohppllc.h.hh...c.sVu.+s....us.l.LLNls.Pst.sGhlsstthptLtphGp.aht.............................................................................................................................................................. 0 276 488 583 +1178 PF02232 Alpha_TIF Alpha trans-inducing protein (Alpha-TIF) Bateman A, Mian N anon Pfam-B_1799 (release 5.2) Family Alpha-TIF, a virion protein (VP16), is involved in transcriptional activation of viral immediate early (IE) promoters (alpha genes). Specificity of Swiss:P23990 for IE genes is conferred by the 400 residue N-terminal, the 80 residue C-terminal is responsible for transcriptional activation [1]. 25.00 25.00 191.50 191.30 18.40 17.60 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.14 0.70 -5.63 13 66 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 37 1 0 62 0 333.00 47 75.87 CHANGED sssss+luhPP....PpsuuPssLYpRLlpELsFsEGPuLLutLEphNEDLFSClPtNtDLYpcstlLSssss-Vlcsspsssst....ssslsLssHGstshPpsPusc--LPsYVpulQpFFhuELRAREcsYs+LLhsYC+ALlcYL+usup+shRGht.ps.pstthpp+hRphltsRYYREsA+LARLLYLHLYLoloRElSWRLaApQstpQsVFsuL+acWtQtRQFpCLFpPlLFNHGlVhLEGcPLsutcLRclNYhRccLGLPLlRsuLlEEpsuPLsp.PsFoushPRosGaLsppIRuKh-uYSpt+sssPt.pspp-HsYu+thsss.NYGoTlEAhL.cPssP .......s.pttphh.sP....PhsssPsuLapRLlc-LsFs-GPuLLotLEpWNEDLFSslPtNtDLYp-sthLSssss-Vlptsps.sst....tstlDLpsHGshshPpsPusc-sLPsYVpulpcFFhuELRAREEuYsplLssYC+ALh+YL+uou+cthRththps.pstshtcphRphIusRYYREsARLARlLaLHLYLsloRElSWRLaApQhhppslFssLph-WpptRQhsCLFpPllFNHGlVhlcGtPlpAtcLRclNYhRccLGLPLlRsuLlEEssuPLsp.PsFpushPRuuGaLhphIRuKh-uYSphtsspPp.tshpEHsYu+thsts.NYGSTlEuhL.sPss.s.................... 0 0 0 0 +1179 PF02430 AMA-1 Apical membrane antigen 1 Mian N, Bateman A anon Pfam-B_2016 (release 5.4) Family Apical membrane antigen 1 (AMA-1) is a Plasmodium asexual blood-stage antigen. It has been suggested that positive selection operates on the AMA-1 gene in regions coding for antigenic sites [1]. 20.10 20.10 38.70 21.90 17.70 18.30 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.96 0.70 -5.61 7 1565 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 33 38 37 1845 0 343.80 69 86.34 CHANGED WpcaMtKaDIt+sHGSGIhVDLGEDApVtspsYRhPsGKCPVhGKuIhlpNus.sF.LoPluotstpl+ptGLAFPp.......................s-splSPlThssL+ph.YKcp......sDlu.CucaAushVPusspNopYRaPhVYDcpschCYlLYsuhQ.N.GsRYCssD..tsp.csuhhChpPtKscp.ppLsYhopslcpDW-psCPh+sl+sAhFGhWsD..GpC.sht.hhpcpucsh.-CspIlFptSASD.p.cphpcchsDhpclppGhpptNhphltpA.FhPluuhpucphKS+GhGhNWANYDscspKChIhsspPoCLI.spsalAhTALSSP.E.sthsaPCsIhKschhh.............................E.+tpspsht.ss.s........hP.clahS.cK-oLcCss.spphssuoCs.aalCssVEtct.lppsp.l.l...............hsp+phllIhlhhssuslllslhhhaha+.......+pcsu-.............cYD+ht ...............................................................+CPVFGKGIII....E....N.......S......s...TTF.LTPV..ATtsQcLK....D...GGFAFPP.........................TpPhh..SPMTLssM.Rc.h...YKDNc.V.....KNLDELTLCSRHA...GN...M.sP....DsD.......c...NSN...YK.YPAVYD-K-KKCHILYIAAQENNGP.RYCNKD..pSK..RNSMFCFRPAKD.cS.F.p.NYTYLSKNVV...c....N....WE...KsCP.RKNLpNAKFGL..WVD..GNCE-.I.Pa...Vp.E...h...pApDLhECN+lVF..ph..S..ASD.QPpQYEpchTDYpKIppGF+ppNtpMIKSA.FLPsGAFpuDpaKS+G+GaNWuNa..sp.....p..pKC.IFNsKPTCLINspsaIATTALSHP.E.V-.pFPCSlYKsEI.+.............................EIc+pS+php.L.ssDs...c+IlhP.RIFISsDh-SlKCPCtPEhlSNSTCp.FaVCpCVE+RAElppNNpVVl................p........................................................................................................................................................................... 0 13 17 33 +1180 PF03913 Amb_V_allergen Amb V Allergen Finn RD anon DOMO:DM04509; Family \N 25.00 25.00 66.90 66.60 21.60 20.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.72 0.72 -4.23 3 4 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 3 3 0 8 0 42.80 61 62.87 CHANGED lssCYhAGolCGEKRuYCCSDPGRYCPWQVVCYES+cICuKNCu ..hsChhAGslCGEKRuYCCSDPGRYCPWQVVCYESpcICuKpCu 0 0 0 0 +1181 PF02948 Amelogenin Amelogenin Bateman A anon Pfam-B_402 (release 6.4) Family Amelogenins play a role in biomineralisation. They seem to regulate the formation of crystallites during the secretory stage of tooth enamel development. thought to play a major role in the structural organisation and mineralisation of developing enamel. They are found in the extracellular matrix. Mutations in X-chromosomal amelogenin can cause Amelogenesis imperfecta [1]. 21.00 21.00 21.00 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.51 0.71 -3.64 6 451 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 281 0 28 407 0 113.20 52 97.18 CHANGED lPLPPHPpHPGYVNFSYE..............VLTPLKWYQShhpHQYPsYGYEPhGGWL+p.hlP.hsPQhPpQp....phhPpLsPHHplhhl.......PQpPhh.hPuHHPhhPh.tp...pPph......PsQcPlpPpssp.P....QPQQPspopPPhp.p.PhsPpss..........pPMaPlQPLPPhlPDhPLEPW.uADKTKQEElD ..........................................................................h+p..phhP..h........s.pa............hh.shHph..............P..Q.pPh.h.....PG..pps...hsPhptp....ps....hs.....PsQpPh.Q...Ppssp...P.........QPpQP.lp...P...p.Ph...HPhQPhsPp................P.haPhQ...PLPPhlPDhPLEuWP.................................... 1 2 2 6 +1182 PF04709 AMH_N Anti-Mullerian hormone, N terminal region Kerrison ND anon DOMO:DM04835; Family Anti-Mullerian hormone, AMH is a signalling molecule involved in male and female sexual differentiation [1]. Defects in synthesis or action of AMH cause persistent Mullerian duct syndrome (PMDS), a rare form of male pseudohermaphroditism [2]. This family represents the N terminal part of the protein, which is not thought to be essential for activity [2].\ AMH contains a TGF-beta domain (Pfam:PF00019), at the C terminus. 19.60 19.60 20.10 20.00 19.40 17.50 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.36 0.70 -5.51 13 109 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 66 0 28 87 0 261.20 30 66.38 CHANGED lpch.sAFlEul.pcuchsppDLspFGlCssuDssushs.Lp.Luphlscsttp..tLhVLHhpEVhW.......EsslpLpFphP...hsu..uss.s....sLLlhasGshts....ss.cVohousuL.PtsQSlClScDTpYLlLsscts..........puuhpsphhshs.p+ps-.GspLshs-LpthLhGscs+shTphTPlLLh............sus.hsucGplcssPhPps...........sPssT...............sFLspLpRFlRslLsP..pupsss.u.....LshsuLpoLPph.LsLSsotu.LtpLlsSppPolhlF.......sshsusLps.ppuphshpsuLlpcLtt+Lppstscl....Rshsuhsssss.hLpRLhsLssh....ssssuuuss....phRALLLLKALQoVpspWctcR ............................pth.tshhpsl..putht.ps.thFGhCss..tttsh...hp..ltt.ht....t..th.shH.tt.s.hh.......p....Lhhp.s....................hhLhh..s.........st.tl..htt..l....ttQslC.otsTpalhLsst.s..........tut....h...h..s.t...psp...tt..................h........ph...Lhstt.ts.hphpPhLLh................................ssph.hphhs.hs...................................ss.ps..........................FLppLpthlttlhs.......ptt............l..t.htshP...lshSpsts.Lttllpupt..lh.h........t..........s.ht...slhp.lh.cLptshhph....pt..s...sh...hht+Lhths............t...tssts..........hpuhLLLKALQslhttap.p................................. 2 1 3 9 +1183 PF01425 Amidase Amidase Bateman A anon Pfam-B_191 (release 2.1) Family \N 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.52 0.70 -5.63 44 12240 2009-01-15 18:05:59 2003-04-07 12:59:11 16 79 4062 116 4833 11144 9300 393.40 26 83.65 CHANGED -llcthls+hpthppplsshhphhhppAhptApphcppht.......tssLtGlPlulKDslslp.GhsoTsGothhp..shhssh..DuslVcpL+puGAlllGKTNhsEhuhussspssha.........GsspNPashsc...ssGGSSGGuAAsVAushsshulGoDsGGSIRhPAuhCGlsGhK......PThGRluptGlh.....shssohDpsGshu+sVcDsulhhpsltGt-.tDssshs.............htt.httt.ptl+lGlhpp.h.......................................h.shsstlpcshppshttLpphG.tplhchphs.ht.hhs.hhhhhhssps.t......................h.chhsphp..s..phhsstlptpl.hsshshsth......hhpspph+t.hhpcphtphapp...............lDlllsPosstsshphsth..............t.sthhth.shs..ssssshsGhPulo......lPsuhs.pp.................GLPlGlQlhupthp-ptlL .................................................................................................................hhpthhtph...t.....t...h......p............t.........l.p...s.....h...h..t........h....................p.....t...A..h.t...t....A.......p..t.h......-.......t..............t...........h...................................t.............t.........................................s.............s......L.t......GlP.lulKD...h.....s...s...............t.....G.....h..To.s.G..o......t...h.h..t.................s..h.....h...s.........s.h...........D...A....s..l..........V..pp.L........p....p........u........G............ul................h.l...........GKT.NhsEa.....u....h.....u..s.....s.....s..p...s..s..h..a....................................G.....s.p...N...P......a...........s.........h............s.......t....................ssG.G..SSu..Go..AA......A..V...............A..u.....s.............h..............s............s.h.....ulGoD...s.G..............G..S.....lR....PA.u...hsGl.........sG.hK.........Po......h..G......t..l....s....p...h...Ghh..................sh.s.s.s...h..-.....p.....h..G..P...hs..+...o......l...p...Dsu.hl...h.........p...s.l.............s.......u.......-.....s......t.....c.s.t..sh....................................h...........t................h.........t.....t.......s.........h..........p.........s...........h....+...l...u.l...p.p.hh................................................h....t..h...s...s...p...l.t...t.s..h.pp...s..h.p..h...L...p.p...h......G....t....p.............l........p.........h.....s.....h.......s..........h...p...........s......h.....t......hh...h.h....h..h.......s..-..st.t.t....................................................................h..p.h..h..h..t..t..p....s.........p.t...h....s..........t....h........p.......t....t...h.....h........h..u..t.....h..h...h.....s..s...t........................................h...hpu...t..ph...cp......hh...t..p..p..h.t.p..hhpp........................................h.D..hl..ls.Ps.s.s..t....s..s...th..s..t.....................................s.h...h....h...h.................h....h......s.......................h......h...s..h..s.......G...h.Pu.lo..............lP.s....uhs....p................................................................................Gh.P.l.G.lQlhu.t.hhp-ttl............................................................................................................................................................................................................................................................................................. 0 1337 2800 4004 +1184 PF01832 Glucosaminidase Amidase_4; Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase Bateman A anon Pfam-B_888 (release 4.0) Family This family includes Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase EC:3.2.1.96. As well as the flageller protein J Swiss:P75942 that has been shown to hydrolyse peptidoglycan [1]. 21.70 21.70 21.70 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.93 0.71 -4.05 162 6435 2012-10-03 00:09:25 2003-04-07 12:59:11 15 195 2899 3 744 3961 1635 133.40 25 32.41 CHANGED htstApphtpc.hsl.s.slhlA..QAhLESuW......GpSt.....huppsp.NLFGIKut..........tst...htht.....TtEh.p.sth.phpspFRsYsohp-Shp-asp.hlpssst......Y...p..sshp..t..tpshpth....sptlpts.G....YA....T......D.PsYupKlhsll...pp.hsL ...............................h....h.phtpc.hs...l..s.shhlA..QAhLESua...........GpSp...........................hup.p.sp...NlFGlKu......................h.h...........................t.ch.s..spt.h.shtspa...+...t......a....sS.....hp.-u.lp.sasp..hlpps.t...................Y.........p..shhp....t.........ttshpps..........sttlppt.t...........YA.....T.................D..spYupc.lhsllppht............................................................................ 0 222 434 613 +1185 PF02274 Amidinotransf Amidinotransferase Mian N, Bateman A anon Pfam-B_5784 (release 5.2) and Pfam-B_1850 (release 5.5) Family This family contains glycine (EC:2.1.4.1) and inosamine (EC:2.1.4.2) amidinotransferases, enzymes involved in creatine and streptomycin biosynthesis respectively. This family also includes arginine deiminases, EC:3.5.3.6. These enzymes catalyse the reaction: arginine + H2O <=> citrulline + NH3. Also found in this family is the Streptococcus anti tumour glycoprotein [4] (Swiss:P16962). 22.00 22.00 22.00 22.00 21.90 21.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.74 0.70 -5.32 55 3546 2012-10-01 20:45:11 2003-04-07 12:59:11 12 9 2331 60 857 2617 1976 321.90 24 89.76 CHANGED hhR.PsphthN.spostsshh.............sst.h.ppAhpEacshlptL+pp.GlcVhhlcch.t..................................................................................sshP-ulFspshhus....ts..uslslhPMhs.sRRpEpphshhhhhpp.....hth..hhchs.thc.psthLEG.Gshlhspppplshusts.Rosppslcphspphshp..shhhps...hssptpshhHhsshhslusphsllshsslhsst............................cphlhpt.......Lptss.hcl.Iplu.......tcphh.shuuNhLpl.....s......h..s.hAass.s.sppphhcp............tscllthslsplp.tGGGusRCMhstlah ......................................................................................................................................................hpPt..h.phh.s.hpphh...................ssh..lptA.p.c.EH-taspsL......+pp...G...l..c..Vhhlc.s.l.h.t.............................................................................................s.shPsh....h..F....sRDstsh..............ls....ss...l....slspM......h.t..sRppE....shhhp...hl...hphp.................................th.hhh...shp...t................ss............plEG.GDhlh..l...u...c.............s..h.....l.s.......h........G..........h........u.........p...R.Tst.pul-...............pl...sp..plhpp........h....p.......l............l...s.h.....th...s......p.......s...c......s........h............hHLDT..sh.o.hl..........-........h......c......p.........hs.h.a.s.t.lh.st...............................................hph.ltpt...................L.t...h...c..p...hpl...Ipsu...................................tcEta....s...u..sNsLsl................ssGsV...................................lsas..p..s.s..h....s..s.p..h.L.c..ct..............................................Gl..c..l..lt....ls......ss.ELs..+GtG...Gs+CMohsl.R.............................................................................................. 0 307 557 719 +1186 PF01979 Amidohydro_1 Adenine_deam; Amidohydrolase family Bateman A anon Bateman A Domain This family of enzymes are a a large metal dependent hydrolase superfamily [1]. The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source [2]. This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit [3]. Dihydroorotases (EC:3.5.2.3) are also included [4-5]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.00 0.70 -4.79 58 18418 2012-10-03 00:45:34 2003-04-07 12:59:11 15 103 4730 152 4606 25313 10133 289.10 14 66.61 CHANGED hlhPGhI...DsHlHhhps..t..................................h.shtpshpphlpuGsTsstsh...tssss................................htchlts..s.ph...hthh..spsuhts.htphhhsts.......................................lchhpshtth.....thhsshstpsstthtpsthtstsphscc.....t.slhhshHhtpssss.....................................h..h.....................hh.h......sthphhhhshhls....p..th.......tsstlshpshusct............t...t...lspshhphhphhpp.tht....ht.t...s....................................hsstchlphsThssAcshGl.pphGslpsGchADlllhc ...............................................................................................................................................................................................................................................................................................................................................................................lhPGhl......Ds..H...h.H..h...h...t.s....h.......................................................................................................................................................t...s......h....t.........h...h....p..t...h...h.....t....s.G...s.....T.s....h.h.s.h......ts.............................................................................................................h...p.....h.h....t........t...t........................................h...................................h.....s...........h.........h.....................................................................................................................h.c.....h.h.p....t.h....t....t...h......................................................t....h.....h............s......h......s...........p.............s.........s......t..........s..........h.........h..........p.........s..........t..........h.........h......s................t......h......t.......h....s.......p......t...............................................t..............t....h......h....h...p.....h...c...h...t..p...s...t..tt..........................................................................................h..h.............................................................................................t...hhh...h................................t....t....h.....t.....h......h.h....h...s..h...hlp.........................p.........ph...t.h..........h................t.......t...........t......s..t......h...t......t...t.............h........s...........s.....t....p........................................................................................................................................................................h........t......p....s.....h.....h..........h....h....p.......h...h.p...t.........th..............................................................................................................................................................................................................hsht..p..h...l.t..h...s....o.h....s.s....A....p.......h...h......s.........h....................t.........p........h.......G......p....l.tsG.t.ADhllh....................................................................................................................................................................................................................................................................... 0 1418 2812 3769 +1187 PF04909 Amidohydro_2 Amidohydrolase Bateman A anon Pfam-B_4687 (release 7.6) Domain These proteins are amidohydrolases that are related to Pfam:PF01979 [1]. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.72 0.70 -4.84 132 6287 2012-10-03 00:45:34 2003-04-07 12:59:11 9 30 2257 66 2486 6470 2429 284.10 13 85.76 CHANGED lDsHtH...................................................................t.th...h.hthtsh..thh.hts.hsh.tts....shh....ttt.s..shspt............h..h.....tthh.p..t.s..sthhshsshs........t..p......shtphtcth......................................................tphshtG....................lp..ht..thtss...stth..............................................hhp..........thtch.s.l.....slt.la......................................................................hthtp.hhht.slh....ccaPp.......lplllsHhGhs..................htth..hthhtptsslah...chst...h........................h.ssh.t.....h.....htph...hphhs.s...-+llaGoD.....................aPhhsh.tts.hsthh.th.h.......................stptpcplhtsNAt+lath ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..................h.....................h.......t..h....s....p..............................h..h.......t..hh..t.......t...s.........s..t.h..h..s.h...s......s......h......s.....................t......t...................sh..tc.h.t..c....th..................................................................................t....p..h....u.....h.p..G...........................lp........l...........s.....t..h.......t.....s..h............t...s..tthh...............................................s.hhp..................................t.htc.h......s...h...........s.lh.lH.................................s......................................................................................................h.t.t..t..t....h..h...h..t...slh.........pc..a.Pp........lpl..l.......l....sHhGts....................................................ht.th..h..t.......h...h...t.....p.....h.....s.....s..l..a.h......c.hut..h....................................................................shh...h...t.....h...............h.t..th......h.p..t..h.u.............-.+...l.l....aGSD..........................................a.P...h....s..p.......h....tt....httht...th.h................................................stpt...tp..plhhpNAtclat........................................................................................................................................ 0 680 1536 2082 +1188 PF00155 Aminotran_1_2 aminotran_1; aminotran_1_2; Aminotransferase class I and II Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.94 0.70 -5.51 48 52439 2012-10-02 18:26:03 2003-04-07 12:59:11 16 160 5588 558 14604 63947 26038 337.30 17 82.76 CHANGED sphlsLussphths.....hhtshhcst.c..........htttpshptYhshpuhsphcptlA.....chht........t.......h..ptp.slhhsuGssushtshhhhh......h.suct.lllssPsassatpshchsssp.lhhhth.............pshplDhpslppslpp...............phllhssspNPsGsshshcplpplhphsppp.shhllsDEsYtshshut...t..s....................hhhlpst.p......hhlstohSKsh.GlsG.RlGhlhu.ss...................pllstlpphspshhs...sststthssshLsssthh........plpphtpcltpp+phlhstLtph.....shshhtsp.uuhashhshsst.................th.ph........................Lhpc.hslhlhshp..th........s.hhRlsh.sshospplcthhptl ..............................................................................................................................................................................t.................................................h.h.tt.h...........................................t..t...s...h...h...h...Y........s.....s.........t.....u........h............t.......L......c...p....s...l..u...........phht......................................................h.......h....p...s.p..........p..l...h...h.....s...s...G.....s..p......t....s.....l..t.....h....h....h....t...h..h.............................h..t....s.......u......-........t........l.....l.....l....s.......s......P.......s.....a.............s.......s......h..........h.....t...........s.........h............p.....h.......s.........u............s...........p..........h...........h....t...h..s....h.........................................t...h....t......h....c......h....p...t......l....p....p....t....l....p..p..................................................................p..h.l.......l.....l.s....s...s....p....N...P........o.............G........s.....s...........h....s.....h....p.......p....h..p...p....l......h....p.......h...........s.........p.........c.............h......s..........h..............h...........l..l................D......-....s...Y...s....p...h..s....h..s.tt......t..s.....................................................htt.h..s..t.t..p..................h.lh.h.s...o...h.......S......K.s......h...........u........h..........s....G.h...R..l......G...a..h..s...u...st.................................................pl.h...p.......t....l.......p.......p.....h....t.....p.......h....hh...............ss.h..s...t...h....s.......s....h..s...s.....l...p....s...s..t.......................................th.p.p..h..p....p.....t......h....t....p....p....p....p...h...h.......h..p...t....L.pph..........................................sh..p..h...h.....t....s......p......u.......u........h....h....h...h..h..t..h..s.tt.............................................................t.s...p..h..h..pt.......................................L..h.....p.....c...........s.....l.....h.......l...h..s..s....t....h.sh...............................tt..h.lR.lsh..........h.h...s...p...p.h.pthht........................................................................................................................................................................................................................................... 0 4349 8681 12025 +1189 PF00202 Aminotran_3 aminotran_3; Aminotransferase class-III Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.97 0.70 -5.59 18 23135 2012-10-02 18:26:03 2003-04-07 12:59:11 16 134 4515 255 6601 18830 12804 322.10 28 72.23 CHANGED sls+ucGsalh.Dh-GpcYlDhhSuhussshGHsHPcllpAlppQtsplsass.tshspcshhpluccLsphh....st.sh-+lhhssoGoEAsEsAlKhA+...h....shsp+s....................cIluFpsuaHGRThuulSl.ssssth+phhtPhhssh........hhlPaschts...............t.......h..hhpphpsssl..AAlIlEP.lQGEGGhhhsssuaLptl+clCccaslLlIhDEVQTGhGRTGchFAt-a..hGlsP...Dlhshu.KuluGG.hPluAslsssclhpsh..psss.......HGsTasGNPlAsAluhusLcllpc-pllpsspchushLpptLp.pLpct..shltclRGt..GhhhulEhsc .........................................................................................................................................h..t+ucG.shlh....D.h.-...G......p....c.....Y...l..Dh.h.u..u.h.u................s.h.s.h.G.H..s....+...P...p.l....s...p.....A..l.p....c.........Q..h....p........p..h..h..p..h....................s.h...........s...........s.......p...s......t....h....c..LA....cpLsphs.....................................sh....c.....c....l....h..h.s..s..S....G...oEA....sEsAlKlAR.......................................s......h...s...t.....+s.................................................................................p.l.l.sh..p.s.u.aHG....co...h.......u....s...l.......u........h.......s......s.......t.....s......s.......h.......p.......p.......h......h......t...s.....h...h..s..s.h..............................ht..s..s..hs.s..hht.....................................................................................................t..............h..h......t......h......t.......s....p...p...l.........AA..l.ll.EP..l.Q..G.......p..u.......G..h..h.....s....s..s.....s..aLp.........tl.......Rc.......l.................C...c................c..............a................s....hL.................L.....................IhDE...V.t....o......G..h....G...R....T....G.........p.....h.....a..........A..........h............p...p..................h...u.....l......p...P.................D....l...h....s..h...u...K.u.l........u..G.G...hP.lu.A.h.l..s..p....p........c..l..h..p..s...h....t.sss........................au.s.Ta..u.G...N....Pl.A..s.A....s...u...h..A...s...L.c..l.....l.........p....p........p......s.....l..h....p...p.......s.p......p....h.u....p....h....l...t.....p....t.....Lp.....pl.....t..p..........p.................s.......h.......l..t..c...l....R..uh......G..hhhulph..p................................................................... 0 1940 3976 5469 +1190 PF01063 Aminotran_4 AA_transferase_C4;aminotran_4; Aminotransferase class IV Finn RD, Bateman A anon Pfam-B_607 (release 3.0) Domain The D-amino acid transferases (D-AAT) are required by bacteria to catalyse the synthesis of D-glutamic acid and D-alanine, which are essential constituents of bacterial cell wall and are the building block for other D-amino acids. Despite the difference in the structure of the substrates, D-AATs and L-ATTs have strong similarity. 20.50 20.50 20.50 20.80 20.40 20.40 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.44 0.70 -4.89 78 10610 2009-01-15 18:05:59 2003-04-07 12:59:11 14 29 4578 127 2921 7879 6007 228.80 22 69.44 CHANGED hhlcpHhpRLtposptl...........thshshpp..........lpphlpphl........cths...tpss.hl+lh..lsps.t.h........................thhhhtttts.phhhspphphs.tsh........................hsphKosshhtthh.ttctt.....t....uh-c..s.lhhsppGt...lsEustuNlh......................................hhcs...............sp....lhTPshsp...s.hLsGlsRptll.phstt..sh.....lp-pslshp-lt.puc.....phalssohtslhP...lpplsst.h...........t..ht.l....hpth ................................................................................................................................................h.cpHhpRLppS.uctl..............h.hs.h.s.h.c.p..........................................l.h.p..t..lpphl......................ptst...........hsss..hlR.h........l.h..t.s...s...s....t..h...u..ht.sstt....h................................sthhhh.h.th..t.s.h..p.h..h..h...s..p..t...h.p..h...s..s...ts.h......................................................hsthK.s.s..s...ph...s....ssl.hspptA......................tpp.......Ghs-........s..lhls....t..p.Gh...............lsEsussNl.F......................................hh....c..s.................................s.p..........lhT.Psh...ss...........s..lLsGI.TRp..sll..pl..Acp...h.....u..hp....................lp.E...c.p...l.s.....h.c....-Lh...pA..c................Esahsuos..s.....lsP.........ltp..l..p..ththt...............u.hh.t.h............................................................................ 1 919 1864 2466 +1191 PF00266 Aminotran_5 aminotran_5; Aminotransferase class-V Finn RD anon Prosite Domain This domain is found in amino transferases, and other enzymes including cysteine desulphurase EC:4.4.1.-. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.96 0.70 -5.72 45 23746 2012-10-02 18:26:03 2003-04-07 12:59:11 14 88 5192 153 7010 41460 18473 338.30 19 85.74 CHANGED lYLDuAAToppPpsVl-uhpcaYpphtuNlHR..uhHshuppuopthEpsRcpVApalsAps.cEIlFTpGoTc.ulNLlAtuhttt.....lpsGDcIllophEHHANllPWpplucppGsplc..hl.slsspGpl-..lcpl.pphl...s.scT+lVulsalSNlhGs.lpPlpclsphs+p.t.GAlllVDuAQulsHhslDlpplssDFhuFSGHK.hhGPsGlGlLYs+cchLpph...Ph.hGGpMlpplsh.psts....apphPh+FE.AGTPsluuhlGLusAl-al.ppluhstlpp+pppLtphshppLts.lsslpl..hG.s...pppsulluFsl.pslcs....pDluphLcpp.GIAlRu..G++CA.....Phhthhsls.......ushRsS.hshYNTp--l-pl .........................................................................................................................................ah...sus.s...h.s...t....l.h.p.t....h....t.......h..........t..................u..s..s.....p...........th.p...t.h..u....t........p....h...p.....p.....h..h......c.....p..u...R.p.....p.l..t.....p......h......l......s......s........p........s.............p........c......l.....l...a..s..p.....G.........u.T....p....u.h.........s.............h........s..............h..............t....s...h......................................................h...p.............t...........s........c..................c...............l..............l...........h.............s....t.........h....-.............+......h......u......s.....h.......................s......h..........p.....t......l..........t.........................p......t.....G........h........p.......l...p....................h.l......s.......s.....s.............t........s......u.......h.....l..s....................h.p.p..l.....p.t..t.l............................s...s......c.......s..........t.......l......l.....s.......l....s.....t..s....s.....s..................h...G....s....l....p.....s.....l..............t..........p.......l.........s..............p........h........s...........+................p......t.......s...........s...............h........h........h........l...D...........u............s..............p...........u............h...........u...........p...........h.............s...........l...........D..........l.........p...........p.........h..........s..........l.............D............h............h......s.......h......o..u........a.....K...h.....h......G....P.....t.....G.....l..........G.....h.l........h.s.....+........c....p......h.....h..........t..........p.....h.................t...s.........h........h......h......u..............u....t........................................................p.t.ts.........................................................h....c.....s.....G......T................s...h.....h.....s.......l........h......u........h..........s.....t.....A.....h......c.......hh......t..................p...........h................s.................h...................p................t.................h.............t...................t......+...t...p.............p..........l...t..............p..............h....h......h.......p........t...........l........p.....p......h.........s............t............h.....t...l...............................s.....................t........................t..................t.......p.......t..................s.......s.......h......h....s....h.....s......h......t......t......h...c.s...............................p..t....l...h....h....h....L...p...p.....t....s....l..t...lps........G....ptss..................................................................................sshRhS.h....h...s...t.plp.................................................................................................................................................................................................................. 0 2396 4497 5949 +1192 PF02293 AmiS_UreI AmiS/UreI family transporter Bateman A anon Bateman A Family This family includes UreI and proton gated urea channel as well as putative amide transporters [1]. 25.00 25.00 28.00 27.70 24.90 21.80 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.30 0.71 -4.60 24 489 2009-09-10 14:59:15 2003-04-07 12:59:11 10 1 261 0 51 381 6 180.40 56 94.65 CHANGED Mh.GlsLLYVGAVLhlNGLhlLG+lss+ssulhNlFsGsLpllsshhhlhsu................s.ucstslhuuAsshLFuFTYLaVulNplhsh.Du+GLGWaSLFVAlsAlshuhhu.hsss..........tchhhulhWlhWulLWhhFFLlLuLt+.plpphsualsllpGlhTuhlPuaLhLsGha ....ML.GLlLLYVGhVLlsNGlstLsKVDsKSsAVMNhFVGGLSIlsNlllIsaS................hu..pchsoaYusATGLLFGFTYLYsAINahFGL.D....hRsYuWYSLFVAINslPuAILS..aoshh.......hscusWaAIIWLAWGVLWLTuFIENhLK.....h.sLG.KFTsaLAIlEGIlTAWIPAaLLFhp+W............................. 0 15 38 47 +1193 PF02461 AMO Ammonia monooxygenase Bateman A anon Pfam-B_2301 (release 5.4) Family Ammonia monooxygenase plays a key role in the nitrogen cycle and degrades a wide range of hydrocarbons and halogenated hydrocarbons. 24.40 24.40 24.40 26.10 24.10 24.30 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.88 0.70 -4.65 8 15067 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 915 12 14 15207 1 149.70 61 93.12 CHANGED uAutSsh+S+AEAstssRThDalhLshLFhllLGuYHIHhMLThGDWDFWlDWKDRRhWPTVsPIVuVTFPAAAQuaFWE+FRLPFGAThsVLGLLlGEWlNRYhNFWGWTYFPINhVFPouLlPsALaLDlVLhLSpSallTAlVGuhGWGLLFYPuNWPIlAshH.PsEhpGhLMSLADlhGFpYVRTGTPEYIRMVE+GTLRTFGKDVVPVAAFFSGFVShLlYFlWWalGKWFSTs.+alpp ..........................................................................................................................................................FahWo....aYPINF.VhPS.oM..IPG.ALh.hDslLLLT.pNWhlTALl.GGuuF.GLL...F.Y.PGNWP.IFGPTHLPl..Vs.EGs..LLSlADYpGahYVRT..GTPEY.V.RlIEQ.GSLRTFGGHTTVIAAFFuAFhSMLhahlW.WYhGthasss..a...h.................... 0 6 11 13 +1194 PF00909 Ammonium_transp Ammonium Transporter Family Finn RD, Bateman A anon Pfam-B_596 (release 3.0) Family \N 19.20 19.20 19.30 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 399 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.45 0.70 -6.00 25 6224 2009-01-15 18:05:59 2003-04-07 12:59:11 16 40 3496 34 2233 5420 6440 367.80 30 87.31 CHANGED sahllsuuLVhaM.psGhuhhpuGhs+sKNslN..hhhhshhshsls..slhahhaGauluFGps....hsuhhGs..........hthshhshsssshsphh.............hhFQhsFAs.sssuIlSGulAERh+hsuallauslhssllYsshuHWlWs...........sGahsphssh..........DFAGusl.VHhsuGhuGLssAhllG.Rhs+h......pstshpsHslshshLGshlLWaGWaGFNuGSuhshsshsts.............sslsTshAuAsGulshhhhshlpp..GK...shltstsGslAGLVAI.TsusuhVs.PauAlllGllAGllshhuht....hLpp+l+lDDslsshslH...GhuGlhGslssGlFut.th................sshhtGss....t.LhhQlluhhsshsauhssshllhhllshhhs..LRlot-pEhhGlDhspH.u-suY ...............................................ahhh.sshLVhh..M...h.....s.......G.......hAh...h...uG.h.l.R..p.K.N.s..l..s.......hh..hp....shhshuls..sl........ha...h..h.h...G..aulu......F..ut......t.....................h.s......s..h.hGs................................................h.h..t......s...h............t.......s.......s...h......p..h.h................................h.h.hF....Q..h.hF.Ah.....l.ss...........u...l...l...s........Gu....hA...E..R...h.+..asshl.lFssl.hssl.lYsslua..h.lWu............................................s.G..a..l....s....p..h.Gsl..........................................................................DFAG....usV.VHhsu......G..hu......u....Lss..uh......h.....lG......t.Rh.shh.........................tpsh.....s...H.........s..l.sh.shl.Gs.h........l.L...W.hG...Wa.G.F.Nu...G..Ss.h.s.s...ssh.ush.........................shlsThlAs.Auus..l................s...h....h....h....h....p.....h...h...ht..................s.K......s.......s....hhs.hhsG.slAGLVu..........I.....T................su..s.u..............h.....ls....shuAlllGhl..u.G.hl.shhuhp..............................hlp...p.t....l.....p....h....DDsl.s.shulH......GlsGlh...Gs....l...h...sGlFustsh.......................................................h..h.h..h....st..........pl..h...h...Qlhu..h.h.hsh...s.ashls....shll....h.hllchh..............hs.......LR..l........s..p-p..E..h...G.lDhs.H.ut.s................................................................... 1 776 1422 1922 +1195 PF05145 AmoA Putative ammonia monooxygenase Bateman A anon COG3180 Family This family are annotated by COGS as putative ammonia monooxygenase enzymes. 23.30 23.30 24.20 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.86 0.70 -5.38 8 1520 2012-10-02 17:14:55 2003-04-07 12:59:11 7 4 1297 0 296 1116 1205 282.70 30 90.48 CHANGED aMlGsllAuIlsspht.hslphshhhh.......tsuQsIlGltIGtplosslltslhspaslllsssllTlLuuslhualh+RhupsshsTAhauohPGGuotMlslu.pchGAsptlVuhsQshRllaVshssPhlsphhlss.......ussshshhhs....lslthlslLhhsuhlsuhsuphL+hPuPaLLuslLluAlVphG......hslphtLPsahhuhuQhllGhsIGsphs+shhtptsRhhhtsllsslhhlhhushhAhllohLssl-hhohhLuhsPGGlsphulsAtuLp.h-suhVsAhQshRlLhlLhlssslh+ahp+hts ....................................hhGshlss.llhs.h.hh.....hpl....p....h....Ph.hh...........hu.sllGs.lutsho..sl.ltsltt........h......l.lhlh.l.hsll..u.....h.l.hu.a.l....h.h....+.h..........u.............t..l.s....h............t.......Tuhhus.PGu...hu.tMl.shA..p-h.s.A.sh..thVuhhQhlRllhVshh.ssh.ls..th.h..hss..................................t..ss.s...t..h..h..hhs......................hsh.t.......l....llhh..l..h..u....h..l.u.u.h.h.....up...hl+.....lPusthLsPhl.s.su.l.h.phs.......thl.s.h.p..L..P...p...a.L.l.shA.hhlGhpIG.l..ta.s+th.hht..th.Rhlsth.llshh.hL..l.hh.sshhuhhlshhh.p..l.shhssaLusuPGGlsplsllA..hsss..ADhuhlhuhQslRlhhlLhh...s...P...sl.h+hh.p...s..................... 0 74 166 237 +1196 PF04896 AmoC Ammonia monooxygenase/methane monooxygenase, subunit C Mifsud W anon Pfam-B_6611 (release 7.6) Family Ammonia monooxygenase plays a key role in the nitrogen cycle and degrades a wide range of hydrocarbons and halogenated hydrocarbons. This family represents the AmoC subunit. It also includes the particulate methane monooxygenase subunit PmoC from methanotrophic bacteria [1,2]. 25.00 25.00 34.70 34.70 22.70 21.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.61 0.70 -5.35 9 127 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 57 12 24 127 22 191.30 46 94.37 CHANGED sspssususA.hsps.laDh+hhhlGlhshhsFYlhlRhYEpsaGaptGLDSFuPEFppYWMslhahEhslEhlssLslhGaLWKTRD..cslsslsPREElRRhFhhlhWLsVYuhAlYWGASaFTEQDGoWHQTlIRDTDFTPSHIlEFYhSYPlYIIhGVuuFhYA+TRLPhFu.KGhSlsaLhhhsGPFMIhPNVGLNEWGHTFWFMEELFsAPLHWGFVhFGWhuLuVhGVslQllsRhpcLh.sh-hsts....c .................................h.s...hhhshhhh.hhhhhh.hYpthauht..tGhD.htPtapphWhshhhhph.l..h.h.hhhualhho.Rc.....cph.slsPp..El+Rhh..hhhalshYhhu.laauuSaFsE.QDuoWHQsllRDTsFTPSHll.FYhSaPhalhhGhushhYAhTRlPh.at...cuhshshhhhlhGPhMlhPNVGlNEWGHsFWFMEELFsAPLHWGFVhaGWsu.LuhhGlhhQhlschspLh.ch......h............ 0 12 19 23 +1197 PF03782 AMOP AMOP domain Bateman A anon [1] Domain This domain may have a role in cell adhesion. It is called the AMOP domain after Adhesion associated domain in MUC4 and Other Proteins. This domain is extracellular and contains a number of cysteines that probably form disulphide bridges [1]. 25.00 25.00 25.60 26.20 24.20 24.90 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.45 0.71 -4.21 15 277 2009-01-15 18:05:59 2003-04-07 12:59:11 12 32 81 0 184 274 0 146.50 32 15.41 CHANGED hspspC.cWlpsc.chL.sah....p-LPo...CPCohsQuhhDp.........uRFhs.hcsst..+pphshapPGAhaClRS..sspuSst.Gu.................QpCCYDssGpLh.....................sRG+shGsPshhps.hsPtL.paphDlhPahhC........Chas-p.uh.Ctha.hcR ......................................shspCppWhppc.....chL.p.ah.....p-Lss...C..PC....ohspshhDp..........uRhh..hssst...........ppphs.h...apsuAh.a.ClRS.....hpuosh..uu...................QpCCYcssGpLl.....................sRG+s.hGsPshhps......h....sPtL.Ha.hhDllPahhC...............shhs.....s..pp...Cp....c................................... 0 57 70 120 +1198 PF04739 AMPKBI 5'-AMP-activated protein kinase beta subunit, interation domain Kerrison ND anon DOMO:DM04946; Family This region is found in the beta subunit of the 5'-AMP-activated protein kinase complex, and its yeast homologues Sip1, Sip2 and Gal83, which are found in the SNF1 kinase complex [1]. This region is sufficient for interaction of this subunit with the kinase complex, but is not solely responsible for the interaction, and the interaction partner is not known [2]. The isoamylase N-terminal domain (Pfam:PF02922) is sometimes found in proteins belonging to this family. 21.50 21.50 22.20 21.50 19.70 18.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.39 0.72 -4.15 56 516 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 289 36 306 507 2 114.60 35 32.87 CHANGED ........tsP....tpas...splPsh..p.............................pph.pPPtLPPaLppsl........LNp.ss................................................................................ppc-.........sshL.....shPsHVlLNHLhspu..I+ss..VlslusTpRY+pKaVTpl......lYpPh ....................................................................................................ts...............................................................spcphpsPPhLPPaL.hpsl........LNpsss......................................................................................................................................................................hps-..............sulL.P.PNHVhLNHLastS...IKcu..VhslusTpRY+pKYVTslLYKP............................................ 0 85 165 252 +1199 PF02166 Androgen_recep Androgen receptor Mian N, Bateman A anon IPR001103 Family \N 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.48 0.70 -5.61 3 248 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 130 3 28 232 0 263.10 50 56.51 CHANGED GLGRVYPRPPSKTYRGAFQNLFQSVREVIQNPGPRHPEAASAAPPGAsL.......QQQQE.............TSPRQQQQQQpGEDGSPQAHpRGPTGYLALDEEQQPSQQQSALECHPESGCVPEPGAAsAASKGLPQQPPAPPDEDDSAAPSTLSLLGPTFPGLSSCSADLKDILSEAGTMQLLQQQQQE.................AVSEGSSSGRAREAoGAPTSSKDSYLGGTSTISDSAKELCKAVSVSMGLGVEALEHLSPGEQLRGDCMYAPLLGGPPAVRPTPCAPLAECKGSLLDDSAGKGTEETAEYSPFKGGYTKGLEGESLGCSGSSEAGSSGTLELPSTLSLYKSGALDEAAAYQSRDYYNFPLALAGPPPPPPPPHPHARIKLENPLDYGSAWAAAAAQCRYGDLASLHGGGAAGPGSGSPSAAASSSWHTLFTAEEGQLYGP ...........................................................................................................pEslQsPusppsps.s.shAPPuspL.................Qp.....tp....................................pp.....pp....p.............QQ........s-ssSsps..Ru.suYLsL-ccp..p..su.....................................................................................................................p....................................................s......t......s....s.RsREssuss.S...SKDsalG......us..oTISDoAKELCKAVSVShGLuhE..........u........E......t.t...........s..s..c.t.........pt-pha.s................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 2 3 7 +1200 PF03139 AnfG_VnfG Vanadium/alternative nitrogenase delta subunit Mifsud W anon Pfam-B_1227 (release 6.5) Family The nitrogenase complex EC:1.18.6.1 catalyses the conversion of molecular nitrogen to ammonia (nitrogen fixation) as follows: 8 reduced ferredoxin + 8 H(+) + N(2) + 16 ATP <=> 8 oxidised ferredoxin + 2 NH(3) + 16 ADP + 16 phosphate. The complex is hexameric, consisting of 2 alpha, 2 beta, and 2 delta subunits. This family represents the delta subunit of a group of nitrogenases that do not utilise molybdenum (Mo) as a cofactor, but instead use either vanadium (V nitrogenases), or iron (alternative nitrogenases). V nitrogenases are encoded by vnf operons, and alternative nitrogenases by anf operons. The delta subunits are VnfG and AnfG, respectively. 20.50 20.50 21.40 115.70 19.40 18.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.35 0.71 -4.28 20 108 2009-09-11 14:59:01 2003-04-07 12:59:11 10 2 80 0 25 83 0 111.80 49 82.15 CHANGED hcs+l-pLhDYIhKpCLWQFaSRuWDREcQNEGILsKTtplLsGEsscpsTP.tDRsYasDAlsLAcsaKp+aPWlsshsK-EI+pLhpuLKpRlDalTITGSLNpELTcppY ...ps+lDpLhDYI.cpCLWQFaSRoWDREcp.EGlLspsscLLsGE.s..huTP.pDRhaasDAlslAsDh+c+aPWhsphsK-EIptLhpGLKsRlshlTITuSLNcELscchY. 1 9 18 20 +1201 PF00212 ANP Atrial natriuretic peptide Finn RD anon Prosite Family \N 21.20 21.20 21.90 24.30 21.00 20.20 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.78 0.72 -3.87 39 340 2009-09-10 22:47:03 2003-04-07 12:59:11 13 3 149 7 90 346 0 32.00 49 25.16 CHANGED phhpstcps+..h....sGCFGt+lDRIGShSGLGC ................s......tts+.phhs....sGCFGh+lDRIGohSGLGC..... 0 3 11 36 +1202 PF03452 Anp1 Anp1 Finn RD anon Pfam-B_4441 (release 6.6) Family The members of this family (Anp1, Van1 and Mnn9) are membrane proteins required for proper Golgi function. These proteins co-localise within the cis Golgi, and that they are physically associated in two distinct complexes[1]. 28.80 28.80 28.80 29.00 28.50 28.60 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.77 0.70 -5.25 49 428 2012-10-03 05:28:31 2003-04-07 12:59:11 9 10 142 0 314 409 3 260.40 42 61.82 CHANGED phstsslpa..YDLsphpuos..cuhtpcE+VLIloPL...upa.....lshaacsLhpL.TYP..HcLI-LuFllscop..DsshptLtptlpclQ.................................................p.ts..tpppFtplpIlcpDFsphl..uQshp-RHuhpsQt.RRchMA+ARNhLlhssLpPtpSWVhWhDuDI...csPsolIpDLhpHs+DVlVPNVap.h.s..s.....s.pPYDhNSWh......ES-puLcL.AssLs-D-lI...VEGYA.EasTaRshhAahtD..spGsscpphpLDGVGGsulLsKAcVaRs..GuhFPuFs ................................slphaDLsphpuos..puhp....p.cE+lLlloPL+s.uupa........LshaFs...pLhpL...TYP..HcLI-LuFLVuDop..DsThshLpptlpclQ..................................................p...ts...pptFtplo.Il....c+.DFsth.......l......uQshp-RHu..htsQs.RR+hMA+ARN.aLL.ssLc.....P.tcSWVhWhDsDl...psPso.llpDL...hp.....Hs..KD....llVPN.....lap.h..s..s.....stpPYDhNSWh......ES-pulpL.ApsL.sc..Dsll...VE.G.Ys.Eas...T.a........Rsh.hAah..t...D...spussctEh-LDGVGGsulLsKAcVaRs..GshFPsFs........................... 0 61 156 266 +1203 PF03374 ANT Phage antirepressor protein KilAC domain Mifsud W anon Pfam-B_3485 (release 6.6) Family This domain was called the KilAC domain by Iyer and colleagues [2]. 24.80 24.80 24.80 24.90 24.60 24.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -9.94 0.71 -4.02 66 1631 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1146 0 193 1289 13 109.40 28 45.26 CHANGED Lc...pp.....ltttpPKspahDtlssus.ssltlsplAKhl.......slu..tpcLhpaLp-.pshlh+...sssphhshQct.hctGhhphKpsshtpss.Gp.cht...hss+hTsKGp....talhphlhppshht ...........................ppp..ltt.tPKstasDtlhpu.....p...sslhlsphAKhl...................slu.tpcLhpaL+c.....pshlh+.....ssp............p.......hPhp.ch...hspGhF.pl..K..ps..shs....p...ss...Gt.hp.hs...........hs.s+lTsKGp....talhphhhptt..h........................................................... 0 53 133 161 +1204 PF04715 Anth_synt_I_N Anthranilate synthase component I, N terminal region Kerrison ND anon DOMO:DM04829; Family Anthranilate synthase (EC:4.1.3.27) catalyses the first step in the biosynthesis of tryptophan. Component I catalyses the formation of anthranilate using ammonia and chorismate. The catalytic site lies in the adjacent region, described in the chorismate binding enzyme family (Pfam:PF00425). This region is involved in feedback inhibition by tryptophan [1]. This family also contains a region of Para-aminobenzoate synthase component I (EC 4.1.3.-). 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -11.08 0.71 -4.17 111 5751 2009-01-15 18:05:59 2003-04-07 12:59:11 8 30 3830 10 1512 4547 2202 145.90 22 28.89 CHANGED shhsPlshatplts...pt.......shlLESs.....t..ssp......uR.aShluhsP.........................ht....hpssththtt..........................tt.....p......................shph......Lcphhsphp.h.............t..P..FtGGhlGYhuYDh...hctlc.ph...........sshs.hP-.hthhhh..-phllaDHhppphhl ........................................................................................................sshthatp.Lpt.............tt.............hshLLESs...................t.........stp................uR.aS.llsh...ss......hhp.....................lp...........stspp.splpt............................................................tptph.ppts......................shph...............Lc.p.hh.p.p.hp.....................................t..s....F.sG.GhlGhhuYDh..................lpthE..pLs....p..........htsc.t..hP..D..hthhlh...-plllhDHhcpphh........................... 0 477 968 1295 +1205 PF03497 Anthrax_toxA Anthrax toxin LF subunit Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 28.20 27.20 20.80 20.00 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.92 0.71 -4.44 7 96 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 91 65 11 94 0 173.00 35 16.76 CHANGED t+sh.....s.tshctsGlssE..ascuhtplAcEpNshlhhRsVN.huToLIcpGh.uTKGhsl+uKSSDWGPp..AGaIPhDtphSK.......htsssttlp+hs.tsppul........tutuhsplsLplscpRlsELppsGshshstcsht....hshphssupphEFclppssss.atl.hhth.ss....lpVhGsshsht ..............ps.t.hctsGlssE..aupshpplApchNslIGlRsVs.lupoLIcpGh.soKGhplKuKSSsWGPp..AGFIshDQphSK.......hsssthplp+hNhpstKul........pttuhsplsLpIo+pRlsELhcs.s.lshhtcph..........hssptsss.pp...hEF...c..lp..p...p..sp...s.h..l.hhsppsp.....lpVhsss.p..s.................................................................. 0 4 8 11 +1207 PF02522 Antibiotic_NAT Aminoglycoside 3-N-acetyltransferase Bashton M, Bateman A anon Pfam-B_1432 (release 5.4) Family This family consists of bacterial aminoglycoside 3-N-acetyltransferases EC:2.3.1.81, these catalyse the reaction: Acetyl-Co + a 2-deoxystreptamine antibiotic <=> CoA + N3'-acetyl-2-deoxystreptamine antibiotic.\ The enzyme can use a range of antibiotics with 2-deoxystreptamine rings as acceptor for its acetyltransferase activity, this inactivates and confers resistance to gentamicin, kanamycin, tobramycin, neomycin and apramycin amongst others. 25.00 25.00 26.70 26.60 24.80 24.70 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.32 0.70 -5.11 52 757 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 561 30 138 557 210 200.70 28 82.97 CHANGED hVHuSlpslGhlsGGsp......sllpALh-slu.pGTllhPstoss.ssst............t..t.h+tphPsaDPsho.so.cshGhlsEhhRphPushRSsHPthShuAhGtpAptl.hsspshs.saGtpSPLu+lh..chsGplLhLGssh-ssThlHhAEthushstcphtphp.....slh.tsGpph.Wpphpchshss.........pt......Ftplupsh.pppsh..hppuplGsAp.spLhst+-hlchuhp.alppc .........................................hVHsslpphG...h.l..sus.......sllpulhcsls.pG.TllhPs.o.p.spst.............................saD.tho..o...shGhlsEhhRp.hs.s..shRSsHPhhShuAhGtpuc..l.h..c...th.s..........shGcpSPhtplh..c.hsupllhlG.s.s..h.p.ss.T.h.lHhuEtp..hs......hchhptht.......slh..ppGphh..hpph..pphs.hst.............-t.............h.plst.....h...pppt..............hppsplusut.hpl..h.ptpchlchshphhpt................................................................................ 0 48 95 117 +1208 PF03230 Antirestrict Antirestriction protein Bateman A anon Pfam-B_3190 (release 6.5) Family This family includes various protein that are involved in antirestriction. The ArdB protein Swiss:Q47057 efficiently inhibits restriction by members of the three known families of type I systems of E. coli [1]. 25.00 25.00 25.00 26.50 24.50 24.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.15 0.72 -4.00 27 1097 2009-09-10 21:28:14 2003-04-07 12:59:11 8 3 438 3 52 624 10 95.40 57 64.17 CHANGED Lss.cYsGGaWpFYpLusGGhaM.APss....ppchplhssh.NGapGplSA-AAGIlsoLashu+h...utcsp....sDthhcpYapLR-aA..tpHsEuusIapAID ......................LC--YsGGhWshYTLsN..G................G.AFM.AP-s....s-sahLFNuh.NGN.cAEhSsEAAGIsACLhsYS...H+.....As+TE..................saAMos....HYY+LRDYA..LpH.....P..ECuAIhRlID................... 0 2 13 38 +1209 PF03589 Antiterm Antitermination protein Griffiths-Jones SR anon PRODOM Domain \N 21.60 21.60 21.90 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -10.19 0.72 -3.90 21 1401 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 423 0 52 693 1 94.20 30 76.57 CHANGED ss..losuspsssucu.......sAlshtpoctp.Ghsla.....suttGhschssppAhctlhphs.slspps.hh+phcshhcshVhpshtp.uaA-hshut ...................p.s.lossspsspucu.......sAhshspoctt.Ghsla.....CuthGhSp.ssptAhptltphs.slsshs.hh+plps.hcshVhphhtp.AaADhphsA......................... 0 2 8 26 +1210 PF01786 AOX Alternative oxidase Bashton M, Bateman A anon Pfam-B_1154 (release 4.2) Family The alternative oxidase is used as a second terminal oxidase in the mitochondria, electrons are transfered directly from reduced ubiquinol to oxygen forming water [2]. This is not coupled to ATP synthesis and is not inhibited by cyanide, this pathway is a single step process [1]. In rice the transcript levels of the alternative oxidase are increased by low temperature [1]. 22.50 22.50 27.10 26.80 22.30 22.10 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.23 0.70 -4.92 60 637 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 343 0 287 637 966 195.30 41 67.21 CHANGED ssaspcphp.slphtH+pspshuD+lAhhhl+hlRhshDhhohh....................................................................................................hocc+ahsRhlhLEolAGVPGMVuGhlRHL+S......LRthcRDt...............................................................GWIcoLLEEA.NERMHLLTFlcls.cPuhahRhhlhsuQGVFaNhaFlsYLlSPRhsHRFVGYLEEEAVpTYT+hlc-l-s.G+.l..h...shsAPpIAlcYWphsc.p......................................................................................................soh+DllhhlRADEA+H..R.-VNHThus ......................................................................................................................t.H.p.stph.DphAhhhl.c....hlRh.h.Dh........................................................................................................h.h.p....p+a.h.pRhhhLETVAuVPGMVu......G...hlhHL+S......LRphc+..s.s.....................................................................G.W..I..+.sL.L-EA.NERMHLhTFh.c.lu....pPt......WapRhllhssQG..VFaNsa..FlhYLlSP+h.uHR.hVGYLEEEAlpoYTchlc-.l-p...Gp.l........shsAPtIAl.........pYWphsp.p......................................................................................................ssl+DllhslRADEAcH...R...clNHhhus....................... 0 104 194 247 +1211 PF01261 AP_endonuc_2 AP_endonulease2; Xylose isomerase-like TIM barrel Finn RD, Bateman A, Studholme DJ anon Prosite & Pfam-B_3980 (Release 7.5) Domain This TIM alpha/beta barrel structure is found in xylose isomerase (Swiss:P19148) and in endonuclease IV (Swiss:P12638, EC:3.1.21.2). This domain is also found in the N termini of bacterial myo-inositol catabolism proteins. These are involved in the myo-inositol catabolism pathway, and is required for growth on myo-inositol in Rhizobium leguminosarum bv. viciae [1]. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.09 0.70 -11.13 0.70 -5.05 311 17202 2012-10-03 05:58:16 2003-04-07 12:59:11 19 48 4117 343 4623 13647 4257 209.50 15 68.04 CHANGED lpts...tphG..asslElhht..tth.........................hpplpphhcctulp......................h..hth.......................................ttt........hpthpptlchu...ppl..............G..sph.......lshhsu.............................................................h.ttstp....pshpphhc..slpplschst..ptG....lp.lslEshstpth..h.......................pchhpllcplsptt...ltlthDssHhhht.................ttshhphlcphssp...............lttlHl..pDs..................................ttspch..GpG.pl ..................................................................................................................................................................................................t.stphG.hps.lc.h..h...h...........th...................................................................hp.ph.t.phh.p.pt....s..lp.............................................l.....s..h..s...h...h.t...hhh..................................................t..........t.t..hp.t.h....p.p..s..l...ch.u.......p.tL............................G...s..ph.............l..s..hh.su...........................................................................................................h.ht.t.s.p...................psh.pp.h.h..c....sl.p..t.h.s.c.h....h....t...ptG....................lp...lslE.s.....h.ss..t.h.h..tth..........................pph...h..p..l...l...c.....t......l....s.........p............t.......p........lt...l...t...h.....Ds....sHh.hht........................................tts...h..t...p....h...l......p.....p..h.t..st..................l.ttlHl.....pDs........................................t....t...h...GpG................................................................................................. 0 1580 3025 3904 +1212 PF01636 APH Phosphotransferase enzyme family Bashton M, Bateman A anon Pfam-B_840 (release 4.1) Family This family consists of bacterial antibiotic resistance proteins, which confer resistance to various aminoglycosides they include: aminoglycoside 3'-phosphotransferase or kanamycin kinase / neomycin-kanamycin phosphotransferase and streptomycin 3''-kinase or streptomycin 3''-phosphotransferase. The aminoglycoside phosphotransferases inactivate aminoglycoside antibiotics via phosphorylation [2]. This family also includes homoserine kinase. This family is related to fructosamine kinase Pfam:PF03881. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.76 0.70 -4.64 244 16611 2012-10-02 22:05:25 2003-04-07 12:59:11 18 143 4228 100 5858 21791 6494 227.30 12 63.11 CHANGED phphhs.sG.hsst.sahht........ssp...t........phl.......l.+h........................hpss......htph.t.....tph.p............hh..phLs.ppth....s..........sPps...l........s....t...tthtth......................hhthh..phlsGpthtp.................t.tthhp..thuphLs....................................plHph.....................t...........................ttshs..................................................tht..t..........hht......h.h......................pth.hph.............th........ht.phhp.........phhp...............................hhsthtpths...................hslh.HuD.hpssNllhs.............tsspl...ullDa....ppushus.hhDl......uh..h..h.......t.hs.ths...................sphhtth....hptht.............hshtphphh ...........................................................................................................................................................................................................................................................................................................t....................h...h.....................................t..............ph.h.......h.+h...............................................th..................t.h..t.....................................hh.......th....l.t...t.t.th.......................................................s..P.p.h....h...............................t..................t...hh...........................................................hhlh....p..h..l..t....G.p...htt..................................t......t.h...h.p........th...s...p..hl.t...........................................................................................................p.l.+p....h.....................................................................t............................................................................h.t...........................................................................................................................................................................tht........t........................h...............................................................................t.h....ph...............................th.................ht..th.t...................t...hhp......................................................................................h..h..p.t.h..t..p..t..h.....................................................ths.h.h...H...u..D..h.p............s.Nl.l.hs...........................tss..p....l....s...l....l..D....a.........p.t.s..s....h....u....s......h....h......D.l.................uh...hh...................................................................................................hh............................................................................................................................................................................................................................ 0 1538 3487 4810 +1213 PF02558 ApbA Ketopantoate reductase PanE/ApbA Bashton M, Bateman A anon COGs Family This is a family of 2-dehydropantoate 2-reductases also known as ketopantoate reductases, EC:1.1.1.169. The reaction catalysed by this enzyme is: (R)-pantoate + NADP(+) <=> 2-dehydropantoate + NADPH. AbpA catalyses the NADPH reduction of ketopantoic acid to pantoic acid in the alternative pyrimidine biosynthetic (APB) pathway [2]. ApbA and PanE are allelic [2]. ApbA, the ketopantoate reductase enzyme is required for the synthesis of thiamine via the APB biosynthetic pathway [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.78 0.71 -4.77 98 4828 2012-10-10 17:06:42 2003-04-07 12:59:11 11 38 2913 28 1377 9328 3628 147.80 22 45.49 CHANGED ItllGuGAlGuhhustL..tp.uGpc.....VphlsRsp.phpt.lpppGlplpssp........tphhh.s.hthsss.............htthDllllssKuhpsps.slpt.ltshlsss...s......hllhlQNG...lGpt.-tlpphh.....st..p.pllhGlsh.huutpps.......supl....pps...u..tuphhlGthss ....................................................................lsll.G.s.G.A.l.G.s.hh.u.s....tL.....tp....u....Gp-..........................Vsh......l..s..R..t....s....p...hps....l..p....p..p.....G..l.p.lptts..........tphh.h........h.thsst............................................hsthD..ll.l.l.......s.......s.......K....u........h..........p........l.....s.....s.....s.lpp...l...t..s..h..l..sss...o.................hl...l..h.....l....pN..G.....................h..up...........-t.lt..phh........................................st....p..pllhGs.sh...hs.uthps.......................ss.tl....hhh....u...tsth.ls....s....................................................................................................................................... 0 390 813 1149 +1214 PF03256 APC10 Anaphase-promoting complex, subunit 10 (APC10) Mifsud W anon Pfam-B_4273 (release 6.5) Family \N 19.90 19.90 20.10 20.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.18 0.71 -4.72 7 646 2012-10-03 19:46:52 2003-04-07 12:59:11 11 56 294 3 429 618 4 165.50 26 13.97 CHANGED .ptsssssu.t..pphphtGh........hh-luppAhWolSSsK.G.GVc.hLRD-sh-TaWQSDGu.PHhlpIpFpK+sslphlslYhsaphDESYoPSplplcuGsshpDhp.lphh-lsp..PsGWVplslp...................Dspt..p.l+shhlQltlhsNHpsG+DoHhRtI+lYuP..p..t.shp.hht.....oohth.phsol .......................................tt..tt............................................h...t..So...p......t.slt.....pL........pDssscoYWQS...DG...s................p...............s...H.hls.lphp.+.t..s...h.l....pplt..lhls.ptDcSY....hPs+.lslhuGsshs.s.....L.p................E........l..ps.....lpl.p...............ssGah.pl.l.................................................................................p.s...s....p.hph.hlQltlhps..pps....GhDT+lRtlclhu....................................hh.................................................. 0 128 193 305 +1215 PF04110 APG12 Ubiquitin-like autophagy protein Apg12 Wood V, Finn RD, Bateman A anon Pfam-B_9471 (release 7.3); Family In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells [1]. The Apg12 system is one of the ubiquitin-like protein conjugation systems conserved in eukaryotes. It was first discovered in yeast during systematic analyses of the apg mutants defective in autophagy. Covalent attachment of Apg12-Apg5 is essential for autophagy [2,3,4]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.89 0.72 -3.79 6 282 2012-10-03 10:59:06 2003-04-07 12:59:11 8 8 246 2 197 973 70 83.60 40 53.46 CHANGED KIsl+L+AlGssPlLKpppasVssocohutlIpFL+KhL+l.hs-pLFlYVNsoFAPSPDppltsLYcsFu...oDu+Ll.lpYCto.AaG ...........................Klslhh..+slG.s.sPll..K....p.p..haplsso.pphp..sllcFL.+.KtL.....+.........h...........s.......-........p.............lFlYl...Np.......s.F.A.PuP.Dp.tlusLa.c.s.Ft..........ssscLl.lpYsto.AaG.................. 0 65 107 165 +1216 PF04108 APG17 Autophagy protein Apg17 Wood V, Finn RD anon Pfam-B_71163 (release 7.3); Family Apg17 is required for activating Apg1 protein kinases [1]. 25.00 25.00 25.00 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.35 0.70 -5.67 19 172 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 138 \N 130 174 0 344.20 21 54.31 CHANGED hpahhtA+poLspspplCp.Apphlsss+ptLppplh.pt+.hpFLhpuLppQhchLhp.....shsphhhtpptspp.hsslhppLpsups+LcpplphLcpThVph.h..............sp-s+sLtDFlspcsl-.Lptslpphlccssth..ptpl-s.htta-s.lpplppth....tpph+phpp....pspht.t.pst......s.hsslsptlpuLE...pEhAslLpSLopHaDhChpuhchh.s.............................................pshstsEhcEhlpVLpsDAtEl.-VltELpsthsphcpthcs....lpsphsphpphastspslhpplpphtp.phstYlthhpshsphhpcppt.........plpptlsplppLspaY-pFlpuYpu.LlhElcRRpts..cp+hccllcpsppcLppl.-pDhcpRppFhhc.GDYLPp-l.WPGhhcpssh ...............................................................h.tah.tuppsLtphp.lhp.Ap..h.thpt.hpp......p.htalhttltpQhthl.t.....hhtth....t.spt.h..thhpphp.h.tplpphht.Lcth.s...h..........................................tpt.ppLhDFlsppslp.l....p..lpph..h..ptth-t....thpp.hptlppth..............h.........................s..t.h.p..tphp...pthsphLpuLspHaD.shhh.c...........................................................t.hs.tpht.p.hpVl.pDstEh.slhhclpphhtphpt.htp....l..p.p....hsthpphhsshpphh.pplsphtp..p....h....spalt....thpshpphh.pptpp..................................plpsthpphp....pLpphYpsFh..p..uYss.Lll..EltRR+ps..ppphcslhcphpcpLsplh-....p-hcpRcpFhtchG-aLPp-l..hssh......s..................... 0 30 65 105 +1217 PF04111 APG6 Autophagy protein Apg6 Wood V, Finn RD anon Pfam-B_11747 (release 7.3); Family In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells [1]. Apg6/Vps30p has two distinct functions in the autophagic process, either associated with the membrane or in a retrieval step of the carboxypeptidase Y sorting pathway [2]. 30.00 30.00 30.00 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.09 0.70 -5.38 99 445 2009-09-10 16:48:33 2003-04-07 12:59:11 7 12 305 4 274 429 5 294.70 35 65.03 CHANGED PlCp-Cs-hLlcpLcpplctspcEpcsYppaL......cplp.........................................tppstpp....h..pcclpp..lctEEpphhpELpclEpccpplsp-lpp.......hcpEtppLc..ppEc.paacchsphphphhchpc-hpSlpsphphspppL-+Lc+oNlaNssFcItH.....-.G.tFGT..INGhRLG+......LssssVsWsEINAAWGQslLLLtslAc+lshp...Fp.p.Y+LhP..hGSpSpIpchs..p.......................................phptLsL......auo.G......sh...phFh.....cp+FDpAMlAFL-slpQh....tppl...p..............ptss......t.....................h.........p....LP.Y..cI..p..cc+Itch.......SI+l..thspc-.pWT+AhKahLTNhKWhLAasSsp...tp .................................................PlC.-Csc.hLl-thcpplp.sppEppsYtphLcpLp......................................................p.t.p.tpp...pthpp......clpp..lpt-EpplhpcLpplE.ppcttlspp.ltp..........lct.......ctpp.Lc..ppEt.pah+-hsphphp..hhphpc-.......hculpsphchsptpL-+Lc+TNVaNss...F....p..I.H................s.G..tFGT..INshRLGR......Ls.sss..V.-WsEINAAWGQssLLLtslAp.+..hs.hp...Fp.p.Y+LhP.hGs.aShlpphp....................................................ptpplsL..ass.s...sh....phFh.....pp+FD.pAMsuFL-ClpQhtc.l...p.............ptst.......................................thpLP..Y.ch....c..psKItsh...........................SI+h..thspp-..pWT+AhKahLoslKahLsasss....p........................ 0 102 152 217 +1218 PF04109 APG9 Autophagy protein Apg9 Wood V, Finn RD anon Pfam-B_12479 (release 7.3); Family In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells [2]. Apg9 plays a direct role in the formation of the cytoplasm to vacuole targeting and autophagic vesicles, possibly serving as a marker for a specialised compartment essential for these vesicle-mediated alternative targeting pathways [1]. 21.70 21.70 25.70 23.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.15 0.70 -5.55 36 425 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 279 0 277 410 20 322.60 36 42.59 CHANGED Ds-ltTlsWppVlp+lht.Lcc..tsshosp..............t.ps.....Kp+LsApDIANRIMR+ENYhIALh.NKslLslsl.......................slP.......hlpsp...........hLT+oL-WNlphslh...salFs.ppGpl+ppal+sppRptLupcL++RF.........hhsGllNllLuPFlllYhlLhhFF+Yap-a+psPusl.GsRpaoPhAcW+hREFNELsHhFp+RLshShhhAscYlsQFP..pshhsl.lh+hluFluGulsAlLllholh...Ds-sFhsF-lo.s+olLFYlulhGslhuls+uhlsc.......................-shV.aDPEtsLcpllpaTHYhPscW...cs+hHopcV+pEFspLaph+lhllLcElhSlllTPFlLhFsLspsu.spIlDFFR-aoVcVDGlGaVCpFAhF-hp.cssts ......................................................................t-lpshsWt.cV.p+lht.lpc.pp..thsht.......................................ppcL......sthDIhpRlhRh............cNYh..lAhh..NK........slLshph..........................................lP.............hht.p.h...........................hho+sLcaNlphhlh....t.hFp..pphplp.tpahp.......stp.......c......L.......uptLpp+h.........hhhulhNll.lsPhlll...a.llahFapYhp.h+..p.pPusl.GsRpao.huchhhRcFNE.L.H.hppRLshua...........AscYlspFs..pshhsh.lA+.lsFhuGulhulL..lhh...slh.......Dpclhh......scplLhhlslhGslhsls.Ruhls-....................................................................-phV......asPEth.hptlltahHYhPpcW...puphHs.p.....pl+pEFs.pLaph.....+hl...hlLcElhS.llTPhlLhF.l....pu..cIlDFFRpFTVcVsGlG.lCSFA.hDh.cpts...p...................................... 0 94 148 221 +1219 PF04655 APH_6_hur Aminoglycoside/hydroxyurea antibiotic resistance kinase Waterfield DI, Finn RD anon Pfam-B_4369 (release 7.5) Family The aminoglycoside phosphotransferases achieve inactivation of their antibiotic substrates by phosphorylation utilising ATP. Likewise hydroxyurea is inactivated by phosphorylation of the hydroxy group in the hydroxylamine moiety [1,2,3]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.58 0.70 -5.23 9 681 2012-10-02 22:05:25 2003-04-07 12:59:11 9 5 456 0 126 779 46 215.90 33 83.38 CHANGED RWcLptcGtshsscoShllPVh...psDGssAhLKlth..tc..pEptGthl.LsaWsGcGAVRlLsp-..sushLLERhsGsRsLspls.cttDcpAstllAthhsRLauspstPhP..LpPLp-hhsuLhpts.spt.st......cttL.tssAusstpLlusPp-.RsLHGDLHasNVLsusc.....csWLAIDPcsLhG-sGFDhAshhsss.tc......shcstclc+ph-llstslslD.tRlhsWslAhss.SusWthEDG ............................................................................................................................Wtl.........s.........s.suhlh.V.....h..s..G.t...............A.hlKh..........pE.h.....h..Lh.....h..h.s...G.p...G..A.s.c.lls.........tc.....................p.......s..hhLLEhh..utR....L.u......p.....l.....s..................s...D...p.....A...s...pl..h....A........p...l..hu...+........La....u..s..s..............s.h.....P..s........s...L....h...s.......lp...-...h.....h...s.s......hh..pps.....tp.s.st...................sppphh..hpsAt.h..s..c...p...l...h.u......s...s..u......-.........p......h...L.....HGDLHa-N..ll...t.......us........................cs.W.....L....s....ID....P.p.s.L.s.G-.sGF-h....ssh..hh.s....h-c......shs.s..p...p..l.tph.h-.hhs.c.slsl..D.cR.hhsashuhss.sAhW.ht.................................................................................. 0 40 79 103 +1220 PF00807 Apidaecin Apidaecin Bateman A anon Pfam-B_1489 (release 2.1) Family These antibacterial peptides are found in bees. These heat-stable, non-helical peptides are active against a wide range of plant-associated bacteria and some human pathogens [1]. The Pfam alignment includes the propeptide and apidaecin sequence. 25.00 25.00 31.70 30.20 23.50 17.60 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.47 0.72 -4.26 2 104 2009-09-10 22:49:59 2003-04-07 12:59:11 12 9 4 1 40 92 0 27.70 92 82.49 CHANGED hctcPEAEPGNNRPlYIPQPRPPHPRl. .REAEPEAEPGNNRPVYIPQPRPPHPRLR. 0 40 40 40 +1221 PF04711 ApoA-II Apolipoprotein A-II (ApoA-II) Kerrison ND anon DOMO:DM04862; Family Apolipoprotein A-II (ApoA-II) is the second major apolipoprotein of high density lipoprotein in human plasma.\ Mature ApoA-II is present as a dimer of two 77-amino acid chains joined by a disulphide bridge [1]. ApoA-II regulates many steps in HDL metabolism, and its role in coronary heart disease is unclear [1]. In bovine serum, the ApoA-II homologue is present in almost free form.\ Bovine ApoA-II shows antimicrobial activity against Escherichia coli and yeasts in phosphate buffered saline (PBS) [2]. 24.30 24.30 24.70 24.70 23.80 24.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.41 0.72 -4.09 5 38 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 26 44 15 41 0 73.20 61 73.18 CHANGED QAEEoslQSLsSQYFQTVTDYGKDLMEKAKuSELQuQAKAYFEKTQEQLTPLVKKAGTDLlNFLSpFl-L+cQPAT .............QA-EsslpSLhSQYFQTlTDYGKDLhE...KVKoPELQAQAKAYFEKo+EQLTPLVKKAGT-LlNFLS.Fh-Lc.p.pPA........ 0 1 1 1 +1222 PF04691 ApoC-I Apolipoprotein C-I (ApoC-1) Kerrison ND anon DOMO:DM04729; Family Apolipoprotein C-I (ApoC-1) is a water-soluble protein component of plasma lipoprotein. It solubalises lipids and regulates lipid metabolism. ApoC-1 transfers among HDL (high density lipoprotein), VLDL (very low-density lipoprotein) and chylomicrons. ApoC-1 activates lecithin:choline acetyltransferase (LCAT), inhibits cholesteryl ester transfer protein, can inhibit hepatic lipase and phospholipase 2 and can stimulate cell growth. ApoC-1 delays the clearance of beta-VLDL by inhibiting its uptake via the LDL receptor-related pathway [1]. ApoC-1 has been implicated in hypertriglyceridemia [2], and Alzheimer's disease [3]. 27.60 27.60 28.40 30.60 26.80 27.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.86 0.72 -4.28 6 41 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 29 5 13 45 0 55.00 47 71.03 CHANGED ss-hSSshpplsDKLKEFGNTLEDKA+sAI-cIKpS-lssKTRsWFoEsFpKlK-KlKss. ......tP-lussh....D+LKEFGsTLEDKA+psIp+I+QSEhssKTRsWFoEsapKVK-Klp.s..... 0 2 3 4 +1223 PF01333 Apocytochr_F_C Apocytochrome_F; Apocytochrome F, C-terminal Finn RD, Bateman A anon Pfam-B_1294 (release 3.0) Domain This is a sub-family of cytochrome C. See Pfam:PF00034. 28.50 28.50 28.50 28.70 28.40 28.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.48 0.71 -4.26 49 697 2012-10-02 20:27:15 2003-04-07 12:59:11 14 5 642 30 66 553 134 114.80 74 37.15 CHANGED NNslasAsssGpIopIpt..p-..............c....GGaploI.possGppls-plPsGP-Ll..VscGpsVpADQsLTsNPNVGGFGQs-sEIVLQsPsRlpGLlsFhhsVhLAQlhLVLKKKQFEKVQhA.EMNF ................NNTVYNAousGhVoKIlR..KE....................K....GGYEITI..scs.SD.GcQVVDIIPPGPELL..VSEGEsIKlDQPLTs...NPNVGGFGQGD.AEIVLQDP.LRVQGLLhFhASVILAQIFLV...LKKKQFEKVQLuEMNF...... 0 16 40 56 +1224 PF01442 Apolipoprotein Apolipoprotein A1/A4/E domain Bateman A anon Prodom_1521 (release 99.1) Domain These proteins contain several 22 residue repeats which form a pair of alpha helices. This family includes: Swiss:P02647 Apolipoprotein A-I. Swiss:P06727 Apolipoprotein A-IV. Swiss:P02649 Apolipoprotein E. 90.00 5.00 90.00 5.90 89.90 -999999.99 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.09 0.71 -11.79 0.71 -4.81 46 1816 2009-01-15 18:05:59 2003-04-07 12:59:11 13 29 249 39 715 1765 2 196.40 12 86.35 CHANGED thlp-shcplssYtpcLpppLsPhs...p-htspLsc-sptl+pcl......ppDlE-l+s+lpPahsElpphlppph-chRp+lsPhspcL+c......phppchccLpppLsshsp-..................h+sph......cpsl-sl+spLtPhs-ph+p+ls..................p+Lcpl+ppssspsp-hpsplpppl...ppl+c+lpstsc-l+ppL...............................................................................pPhsEshcpplhphhE ...................................................................................................................................................................................................................................................................................................................t.......................................................h.......ttl....................ttt...h.tt.htt.......t.......l...t....thh....p....p...h...p...p......t.l....t.p...p.......h.cp....l..pp....p....l...............s...s.......hhpphp.p......................pl....s........pp...s......ppl.....pp.p....ls.......sh...s...pp........................................................................................hpppl..................pp...ps....p...ph...p...splssh.......s....cpl...p......pphs............................................................................................pplpphp...p...p.l...s..p...ts.....p....p....ht...s.p...l..p...p...ph...........pphp...ppl....st.t....hp.......slpppl....................................................................................................................................................................................................................................................................t..t.th...........................tt...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 146 285 400 +1225 PF01583 APS_kinase Adenylylsulphate kinase Bateman A anon Pfam-B_578 (release 4.1) Domain Enzyme that catalyses the phosphorylation of adenylylsulphate to 3'-phosphoadenylylsulfate. This domain contains an ATP binding P-loop motif. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.78 0.71 -4.57 20 3100 2012-10-05 12:31:08 2003-04-07 12:59:11 15 31 2402 54 1024 3172 2394 154.70 50 43.89 CHANGED +GsslWhTGLSGuGKSTIAtALEcpLhppG.hpsYhLDGDNlR+GLN+-LGFSccDRpENIRRluEVAKLhu-uGllslsuFISPacp-RctARplhpp.........cFlEVFVDsPL-VCEpRDPKGLYKKARsGcIKsFTGIDuPYEsPpsPElhlcssppsl .......................usllW.h.TGLSGSG..K..S..TlAsALEc.......tL....h....p................p.G.....hp.....s....Yl....L.D....G....D......N..l....R.....+.....G.....L....s........p.................D..........L.......G.....F.......S..c.....t..D.R........p..EN........I....R..R.....l..u....E..V....A.....+.....L.h.....s....-.u....G.l..l.s.l..su...F..IS.P.a...+.t..-R..ph..sRchlsps.............pF..l.E.......V.a..Vcs.P.Lp..lC..E....p..R..D........P........K.................G.....LY.+KA.....R.....u....G...E........I+.............s........FT......GI....D....u..s............YEs..P..p..sP..-..lplcsp...ph........................................................................................................ 0 317 597 837 +1227 PF03440 APT Aerolysin/Pertussis toxin (APT) domain Griffiths-Jones SR anon SCOP Domain This family represents the N-terminal domain of aerolysin and pertussis toxin and has a type-C lectin like fold. 25.00 25.00 36.70 41.30 24.80 24.80 hmmbuild --amino -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.83 0.72 -4.28 5 77 2012-10-02 16:37:33 2003-04-07 12:59:11 9 3 33 26 10 90 2 81.00 49 19.25 CHANGED sEPVYPDQl+lsuLG..pGVCuscYRPLTR-EApS...l+sNLVuhMGQWQIoGLADtWVILGPGYNGEIKsGoAGuTWCYPToPsou ..................lYPDQlhhhsLG..ptlCsstYRPlTRpEApu...lKusllshMGp..WQIoGLAssWVIMGPGYNGEIK...G.o.A.u.sTWCYPssPs..s.... 0 3 5 5 +1228 PF02610 Arabinose_Isome L-arabinose isomerase Bashton M, Bateman A anon COG2160 Family This is a family of L-arabinose isomerases, AraA, EC:5.3.1.4. These enzymes catalyse the reaction: L-arabinose <=> L-ribulose. This reaction is the first step in the pathway of L-arabinose utilisation as a carbon source after entering the cell L-arabinose is converted into L-ribulose by the L-arabinose isomerases enzyme [1]. 27.00 27.00 27.00 27.50 26.80 26.90 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.15 0.70 -5.49 38 1076 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 1019 9 200 697 49 349.80 58 71.66 CHANGED M.hph.cphEhWFlTGSQHLYGpEsLcpVupcoppIssuLNsoupLP.hclVaKslsoTs-pIpplhp-ANss-sCsGlIsWMHTFSPAKMWIpGLptLp..KPLLHLHTQaNp-IPWsoIDMDFMNLNQSAHGDREFGFIsuRhphpRKVVsGHWpDtcVpccIusWhRsAsuas-upplKVARFGD.NMRpVAVTEGDKVEAQIpFGaoVssaGlGDLVphlssVo-p-lcsLlpEYpspYslssshpp.ssppcpulpptA+lELGl+pFLc-GGasAFTTsFEDLtGh+QLPGLAsQRLMA-.GYGFGuEGDWKTAALlRhhKlMupGh...sGTSFMEDYTYchssGschlLGuHMLEVCPSIA ..............................................................................................................................MhhhcsYEVWFVlGSQHLYGsEs..L+QVs.pHAcclVsu.L.NspucL..P.sKlVhKPlsTosDEIosls+-ANYD-cCAGllsWhHTFSPAKMWIsGLshLp..KPLLphHTQFNsslPWDoI....D.......M...D......FMN....L.NQoAHG....s....R....EFGFIsARMRpp..+.tVVsG.H.W.p..D.cp.s.pc+IusWMRtA...luhp-o+pLK...VsRFGD...NMR.cVAVT-GDKVuAQI+FGaSVNsauVGDLV.p.lVsu..lSDuD.l.sALl-E.Ycs.pYshssusp.....p.G.-.++psVh-AARIELGh+RFLEpGGa.cAFT.TsF......E.....D....L.........a..........G.....L.....K..Q...L..P.G.....L..A...VQ.R.LMpp.GYGF..uGEGDWKTAALlRlMKVMusGL....p....GGTSFMEDYTYcFc.pG..N-hlLGSHMLEVCPSIA................................................................. 0 70 136 171 +1229 PF02311 AraC_binding AraC-like ligand binding domain Mian N, Bateman A anon Pfam-B_12588 (release 5.2) Domain This family represents the arabinose-binding and dimerisation domain of the bacterial gene regulatory protein AraC. The domain is found in conjunction with the helix-turn-helix (HTH) DNA-binding motif Pfam:PF00165. This domain is distantly related to the Cupin domain Pfam:PF00190. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.58 0.71 -4.48 58 10833 2012-10-10 13:59:34 2003-04-07 12:59:11 14 20 2515 10 2189 13862 1372 132.60 12 46.69 CHANGED hsshtthpsph.tpta.s..HhHs..tatlthltpGssph.phsspp.....aplssGclhllsPsp.Hphtsss.........pt..saphphlhhpsshltphhtphthht...................hhpsspltphlpplhptl............ppttsshhtpshlhpll .....................................................ht..................h........HhH.s....th.p..l.h.h..s..h....pGs.s...p..h..t.h...s...s..pt......................ah..l.psGslh.h..l...s..s..s..p..h...H...p....h.t..sss............................................sh.p..h...h..h..l....h.h......p.......s...t..h.......h..............t...h...h...t...t...................................................................h.............h....hh........................................................................................................................................................................................... 0 712 1314 1755 +1230 PF03869 Arc Arc-like DNA binding domain Finn RD anon DOMO:DM07094; Family Arc repressor act by he cooperative binding of two Arc repressor dimers to a 21-base-pair operator site. Each Arc dimer uses an antiparallel beta-sheet to recognise bases in the major groove [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.28 0.72 -4.34 3 753 2012-10-02 18:44:02 2003-04-07 12:59:11 9 3 491 41 123 550 31 46.50 30 53.16 CHANGED uRcsP+FNlRhPcEVREpl+cVAEuNGRSMNSEIlQRVp-SLpKEGslsu ..............ph....lR.lPp..pl+-plcphAcpssRShNuE...l....lthLcpult.p......t................... 0 21 58 85 +1231 PF04659 Arch_fla_DE Archaeal flagella protein Waterfield DI, Finn RD anon Pfam-B_4437 (release 7.5) Family Family of archaeal flaD and flaE proteins. Conserved region found at N-terminus of flaE but towards the C-terminus of flaD [1]. 20.80 20.80 20.80 21.50 20.50 20.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.47 0.72 -4.20 22 97 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 55 0 68 99 1 97.30 34 28.55 CHANGED .cspcps+LcslP.--shu.hlshcWLEFLhp+sGhsshs-sL-YYhslGWlS-cshscLhcas+Ghph.p-p.....pssscLohsDHllSLLaIE+LsGcp .........tppshLpplP.c-shuphlshcWLEFLlc+sGhpsh.csL-YYhslGWIS-cVhspLhcas+Ghch.t.c...............psstcLs....hpDHlhSLlaIp+LsG.................... 0 8 36 56 +1232 PF01917 Arch_flagellin Archaebacterial flagellin Enright A, Ouzounis C, Bateman A anon Enright A & COG3354 & COG3353 Family Members of this family are the proteins that form the flagella in archaebacteria. 29.30 29.30 29.40 29.40 29.10 29.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.30 0.71 -4.68 114 362 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 94 0 241 376 91 185.90 22 88.87 CHANGED GtsG.luslIlFIAhVlVAAlsuuVllsoushlpppupssucpsspplsoslplhssss...........ssssssshs...........lslhlp.NsGus.slclspspls..lh.ss............................................................................................................................................................................................................................................................tlhshpssstshsss....................pssls.G-hstls..ls...............................................hssssphph.plhschGu.ssslph ............................u.sG.luolIlFIAhVlVAAlsA.uVllsoushlpppupssGcpusppluoslplhsssu...............................ssssss..s..lsp......................lslhl..p..ssG..us..sl...-lsps.plt....lssss..t.....ht.h..st..............................................................................................................................................................................................................................................................................................................t..thshhsl.pstssshtss.................shlp...u-...hhtlh.lsht....................................................................................................h.s.tp..thph.plhsp.Gssh.h.................................................................................................................... 0 43 130 195 +1233 PF01637 Arch_ATPase Archaeal_ATPase; Archaeal ATPase Bateman A anon Pfam-B_1507 (release 4.1) Family This family contain a conserved P-loop motif that is involved in binding ATP. This family is almost exclusively found in archaebacteria and particularly in Methanococcus jannaschii that encodes sixteen members of this family. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.46 0.70 -4.66 56 1765 2012-10-05 12:31:08 2003-04-07 12:59:11 13 92 762 3 830 5495 1741 203.60 17 44.02 CHANGED FhDREcELcclcchhccs.t.lhllYGPcssGKTuLL+phlpph..thsh....tslYhpshcphhttthcth........................tcpltcth...........................tpsl.psth...........phs.hsls.hhchlpccscc....lsllhDEl.phh..t.p................tstthlptLhphh-h..stphchhhl..lsuSS....-Glhhc..........lhGRppa.hplcsh.....hphhccsFcpl.....t....sc.ch.-clachsGGpPthLtpl .......................................................................................................hhsRppE...l....p.........l.........p....h.....h.......p........p........t.........................p....h.....h...l...l...h.G..RchGKTs....L.l.....p..p..h...h....p...ph....................................hsla....h.........s...h......p....p....t..t....t....p.....p.....h.p..p..h............................................tp..h....p..hh...............................................................................................................t......h....t.........h..........................t.......h...t.s..h..p..p...h.h...p..h...l....t.p....p..tpt................hll....h...l.....D.Eh...p.lh........p.......................................................................t..h.......p...h.ltp....hh....pp.................p......p.....p......h...h...hl........h...s..G...S..t.........s..hh.p............h.t.....p.t....s......h.h...s......p..h....p....h...h.l.p...h....................h.phh.........t...................................t....h...........hht...hhsGhs......h............................................................................................................................................................................................................................................... 0 292 489 679 +1234 PF00798 Arena_glycoprot Arenavirus glycoprotein Bateman A anon Pfam-B_1047 (release 2.1) Family \N 25.00 25.00 28.70 28.60 18.10 18.10 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.84 0.70 -6.09 20 431 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 50 30 0 349 0 309.30 46 98.66 CHANGED MGQlloFFQEIPpllpEAlNIALIsVSLlAIlKGllNlaKsGLhQLlhFLlLAGRSCS...h.......pIGc+pphpolplshoplh....tphPtsCplNNoHaYl+sspsop.Gl-lolTssolls.......ph..tthsNlosChcssptt.....asLpWllsslHash.pssphl.stspspssuthpIQhNLoc....tpcstpaspplhsultclFGshpts.........................C......stsshpaLI.IQNoTWp...........scCphs...Hhsol+lLhpsstpphl.oR+LhuFFoWoLSDSsGNDhPGGYCLE+WMLluucLKCFGNTAlAKCNhsHDSEFCDMLRLFDFNKNAIcoLpspocpplNLls+oINuLISDsLLMKN+L+ELMsIPYCNYTKFWYlNHTtoGcHSLP+CWLVpNGSYLNEocFRN-WlhESDpLIoEMLsKEYp-RQG+TPLuLVDlCFWSTlFYosolFLHLlGIPTHRHIlG-uCPKPHRLsppGlCuCGhYpp.sK ..MGQlloFFQElPphlpEshNIsLlAlSllAllKGlhNlhpsGlhtLlsFLhLsGRSCo.......h.......hhtthhEhQolphsMspL......shP..CphNsSHhYlphGpp....hplohos.oll......................pp.sslosshppp........aslh.hhsshthsh...s.h..shspshstG.h.lQhNloh....s..shp.stphtsthhphahthh.s...........................C.........s.sh.hlh.hpspsW.spC.hs....hsh.thl.tptt.....tRplhuhFoWsLoDstG.p................................................................................................................................................................................................................................. 0 0 0 0 +1235 PF00843 Arena_nucleocap Arenavirus nucleocapsid protein Bateman A anon Pfam-B_1333 (release 2.1) Family \N 22.70 22.70 22.80 33.30 21.70 22.60 hmmbuild -o /dev/null HMM SEED 534 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.76 0.70 -6.62 17 468 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 52 24 0 392 0 313.20 54 96.85 CHANGED ScpVPSFRWTQSLRRGLSsaTpsVKuDVl+DA+ullsuLDFspVupVQRlMRK-KRsDsDLs+LRDLNKEVDsLMpM+SsQ+sslLKVGsLo+DELM-LAoDL-KLKpKVhRsEt.ssssGVYhGNLTsoQLspRuclLchlGh....pt.pssssGVVRlWDVK.....Dso.lLsNQFGSMPALTIACMoc..QGuEshNDVVQuLT.sLGLlYTVKYPNLsDL-KLoppHssLplITp-cSuINISGYNhSLSAAVKAGAChlDGGNM.LETI+VpPssFoolIKslLpVKp+EuMFVu-sPGpRNPYENLLYKlCLSG-GWPYIuSRSQIpGRAWDNToVDLsscs.......ssspsPh+sGus.pLssLo.sQEt.l+cuhtpLDPssTTWlDIEGsssDPVElAlYQPsoGpYIHCYRcPpDtKuFKspSKaSHGlLlKDLtsAQP.GLlSslIctLPpsMVlTsQGSDDI+KLh-hHGR+DlKllDVchou-pARhFE-tVW-+FshLCcpHsGlVlpKKKKGss.s.o....pPHCALLDCIMFpuslsGt ......................................................................................................................................................................................................................................................................................................................................YIuSRopIhGRuW-NTsVDLss.c.s.......sssptP......pt.su.tshp........usLo.tQphhlK-.uhtpLDPssshWlDIEG.PsDPVElAlaQPsuupYIHhaRcPpD.KtFKpsS+aSHGI.lpDl.sApP.GLhShVIthLPpsMVlTsQGSDDIRKLhD.pGR+DlKLlDVcLop-puRpaEptVW-+atcLC+hHsGlVlpKKK+ttt....hp................................... 0 0 0 0 +1236 PF00025 Arf arf; ADP-ribosylation factor family Sonnhammer ELL anon Swissprot Domain Pfam combines a number of different Prosite families together 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.12 0.71 -5.03 20 5460 2012-10-05 12:31:08 2003-04-07 12:59:11 16 68 556 115 3415 26010 2506 161.80 36 79.30 CHANGED hsslhupLhu....sKEhRILhLGLDsAGKTTILa+Lphs-lso...shPTlGFNlEolpa.....+Nl+FslWDlGGQcplRPhWRsYassTsulIaVVDSuD+-..RlsEu+pELpslLsE-ELp...sAslLlhANKQDLPsAhSssElpctLuLpp.......l+.....sRsWpl.sssAlpG-GLhEGh-WLussl ...........................................................................................h.......h.h......pc.c..h..+....l..l.h......l............G.LD..s.......A....G...K.......T.........T.....l.....L.......h.......p........L.....p.......h..........s........c.......h....s..p...................h.h....P.............T........l................G..........F.....N.....l......c......p.....l....p..h............................p...s......l........p.........h.......p...l..........W..............D....l.......G............G...........Q.........p.........p..........l............R.............s.........h..........W.............+...........p........Y........a.........p..........s.........s............p...........u...........l....l..........a..........V..........V................D..............S..........s..........D..........c.......c...................R......l............t........-............u.......+.......p........E.....L.....p.........p.......h.........L.............p...........c........c.........c.....L...p............................s..s.....s...l....L.....l....h..A..N.....K............Q.............D..l....s.....s.........A.............h.....s.....s......s......E........l.....p....c....t......L..s..L..p..p.................................lp...............p.R...t...h....h...l............s..s...s..A........h........p......G......p........G......l..........-.uh.cWL.p..h............................................................................................................. 0 1187 1801 2657 +1237 PF01316 Arg_repressor Arginine repressor, DNA binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.06 0.72 -4.42 10 4229 2012-10-04 14:01:11 2003-04-07 12:59:11 16 5 2719 38 530 1777 256 69.40 34 44.70 CHANGED hsKspRpptI+cIIppcclsoQsELlctLpcpGls.VTQATVSRDL+ElshVKVt..ssGphhYsLss-sph ..........................p+ppRh.p....hI+.pl.lpccc..lp...oQpEllptLpc.p.Ghp.lTQuTlS...R...D...l...+-.....l....s...llK.lt.stps.p.....hhY.sLssp...h................... 0 156 308 440 +1238 PF02863 Arg_repressor_C Arginine repressor, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 25.00 25.00 30.40 29.50 21.60 18.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.18 0.72 -4.51 146 4296 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 2715 74 531 1750 261 69.90 29 45.54 CHANGED hlpphllslctspshlVl+Th..PGsAphlAshlDph.phsc..IlGTlAGDDTlhlls+stpssppltpplpph .................h.lpchllsl-hsst.hlVl+Th..PGs..Aphl.usllDp.........h..phcp.........IlGTlAG.D.DTlhlls.csspsupplhcplhp................ 0 156 310 442 +1239 PF00491 Arginase arginase; Arginase family Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.00 22.00 22.20 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -12.03 0.70 -5.11 347 5883 2012-10-01 22:40:15 2003-04-07 12:59:11 16 18 2993 272 1841 4658 4174 269.20 24 85.75 CHANGED tclsll.GlPhDt...s.ssh.RsGs.chGPp......ulRpu.h.ht............hs...s................hplh....................................D......hGD.l........shs.............................p.tp.s...hcplpptspp.l.....lp..p.................uths......................lsLGGDHolohshlpuh...scth...................st..lullahDAHsDh.ps...............tsst..........hs.HGsshp...p..................................shp..hhlp.....tphhplGlRshsps....-...h.........t.h...cthGhp..hhshcclpphuh.t..th...hcphhptl.....t....t....slYlShDlDslD.PuhAPGoGTPts.G.Glohc-shtl.lct.l...ts.....hpl..lGhDlVEls.P....shD..s..................thTuhhAupllh..p.hhs .....................................................................................................................................................................phsllGhPhDh.....h..sph..Rs.Gs.....c......h.GPp........ulRps.h.ht.................p...s....................................hplh..........................................D.......hGD.l..........shs....................................................h...s.t.p..h.........hpplpptspp.l..lp..p.....................sths.......................................lslG.GD..H.plshshlpuh....scph.....................................s..lullahD.AHs..Dhps...................................ss.t...........hs..HGoshtp................................shpp...shlps........pphlpl.GlRshsss.........ph.................................phh.....cctG..hp.......h.hs..h..p.c...l...p...c.h.......uh.....t...........tl...........hpplh.phl.....t.........sp.....slYlohDlDsLD.P.u.hA........P.G..s...........G.....T...P....s.......s........G...G.......l.o...c.c.s.h.pl.lct.l.....tp............hpl.....suhDlV.E....ls..P....shDt.s...................hTuhhuuplhhp...h............................................................... 0 557 1073 1504 +1240 PF00764 Arginosuc_synth Arginosuccinate synthase Bateman A anon Pfam-B_888 (release 2.1) Family This family contains a PP-loop motif [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.30 0.70 -5.42 12 4339 2012-10-02 18:00:56 2003-04-07 12:59:11 14 19 4054 37 1169 3498 3319 374.20 40 92.38 CHANGED VLAYSGGLDTSVsIshLp-chs..EVlulslDlGQst--lctscp+AhchGshcphslDAK-EFsc-Yha.uIpANAhYEs.YPLuTALARPLIAK+lVEsAcccGApAVAHGCTGKGNDQlRFEsshtshsPplclIAPhR-hshT.....RcctIpYAcp+GIPlssshcpPaSIDcNlWGRulEuGhLEDPhstPPc-lataTpsPtcssppP-hVcIsFcpGVPVuls......Gcphss...lplIpphNElAGtpGVGRlDhlEDRllGlKSREhYEsPuAhlLlpAH.csLEslTLsR-.h+FKchl-pp.auELlYpGLWasPL+csLcAFIs+oQE+VTGhVRV+La+GshhlhGRcSshu.LYstcLsoa-.scshDQptupGFlphaGLpu+lap ...........................................lLAYSGGLDTSlhlhWLpcch....t-VlAh.su.........D..l....G..Q..s............-....-..h...-.s..l.c.c....KA.hphG...A...p..c....s..hl...lDs+cEF...sc.-a.lhssl.pus.A........h...Y....E....s....p....YhL.sTul.u....R.P...l...I...u......+tLVc.....hA+..cpGussluHGsTGKGN.D....................Q..................VRFEhshh.u.ls.PpLcllAPWR-hs.h.h.....uRc-hlp....aspppslsl.t...h...........s........h...c........p....sYShDpNlht.t..ohE.......u.t.LE..ss..h.....s.....p.....s....s....c.s....h.a......th.o.h.ss.-.p.s.ss...........p.s-.lplpF-p..GhPVul..N......................Gcp.h.ss......spllhcLNclu..G+HGlG.R.lDhlENRl..lGhKSRGl.......YEs..................PGuslLhtAH.ctLEs.lsls+-.shc.h+.c.t.l.tt.c.hucLl.Yp.........GhWFsP..p...p.hLp.s.hl.pc.....s.ppt.V.oGpVplc.......La+G.s.hh.lh.u.pcS.s...p.s...L.Y...stc..hsTa.p..c..s...c.....s..a.stpcAhGFIplhuL..p...t.......................................................................................... 0 359 740 981 +1241 PF01960 ArgJ ArgJ family Enright A, Ouzounis C, Bateman A anon Enright A Family Members of the ArgJ family catalyse the first EC:2.3.1.1 and fifth steps EC:2.3.1.35 in arginine biosynthesis. 25.00 25.00 34.00 28.60 21.50 16.80 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.42 0.70 -6.06 139 2527 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 2367 44 817 2155 2927 371.30 40 94.80 CHANGED FpsuuspuGlK..................ps.+hDLullh..sp...tsussAGVFTpNphpAAPVhlsccpl.......pss.p....h+AlllNSGNANAsTGppGhpsAtphsphsAptLslss............p..........pVlluSTGVIGp.L.Ph-pltsulsp..hst.tL.........s........ts..shpsA...AcAIM.TTDThsKtsuh....phpls.Gp...plpluGlAK..GuGMIcPNM.......ATMLuFlsTDAslsssh.LpphLcpulcpSFNpIoVDGDTSTNDolllhAsG..tu.ss............s.lsp...ts.t...hptFppuLptVstcLActIs+DGEGATKhlplpVpGAtscp-AcplA+ulusSsLVKTAlaGpDsNWGRIlsAlG.hu.....Gs...sh-ssclslhl.........s....s.............lhlhpsGthss.as.E..ppupphhpp.........p.-lpIpl-L.....st.Gs..upussWsCDLoa-YV+INA-Y..RT ...................................apsuuhpAGlK..................ss...+tDlullhss....ss..u.s.s.A.uVFT.pNphpAAPVhhs+ppl.......ssu..p.........hp...AlllNSGsANAs.T.........G........tp..Ghpcupphspts...Aptl...s..ht..s.............p...............pVhlsST....GVIG..p...LPh..-.+lhs.ul.sp..hh.s.sh...............tts........shtsAAcAIh...TTDThsKtssh.....p.h.p.....l.....s.....Gp.................slsluGhAK..GuGMIpPNM...................ATMLuF............lsTDAsls.....s.sh.LpphLppsscpoFNpIoVD...GDTSTNDslllhAs.Gtu...st............................................lpt.......tp.p......httFppALptVspp.....LAptls+DGEGATKhlpVpVpGAt.opp-AcplA+slspSsLVKTAla.GpDsN.W..GR.IlsAlGhu................us...........shD.s.splslhl......................ss......l.lscpGt.ss.....as...E....ptspthhpp...........................................c.-lpIplcL..........sh...G...s........u.p.ussWsCDLoa-YVcINusYRo............................... 0 273 556 708 +1242 PF03308 ArgK ArgK protein Mifsud W anon Pfam-B_3540 (release 6.5) Family The ArgK protein acts as an ATPase enzyme and as a kinase, and phosphorylates periplasmic binding proteins involved in the LAO (lysine, arginine, ornithine)/AO transport systems. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.69 0.70 -5.62 9 1738 2012-10-05 12:31:08 2003-04-07 12:59:11 11 13 1333 12 625 2137 1076 266.50 40 60.95 CHANGED LARAITLVEsccs....pAppLLcclhPhsGpAphlGlTGsPGuGKSTLl-tLhtpLtccGh+VAVlAVDPoSPhTGGuILGD+hRMpchu......scsGsFIRshsopGsLGGluptTp-sltLh-AhGaDVlllETVGVGQSEV-lsphsDshlllplPGsGD-lQuIKtGlMElADlhVVNKAD...htsActstp-LphALcL.p......................c..ctsWpPPllcssAspGcGlcELW-tIc-H+chlptoGhhtp+RRppttc.hhpllpstlhs+lpuu.s ........................................................................................LA+AITLlES..pp.stc....................hu.p..pl..L...p...t.....l.......h........P...........t..........s..........G....p.........u......h.........R..l...G.I.T.Gs.PGAGKSThl-ulGh.t..L..........h...c..c...G...t...+..VAVLA.....VD......P..S......S...s......hoG.....G......SI...L......G....D.+T...R...Ms.cLu...................................p.c......s..uF.....IR..P...s.s...o...pG.....p.....L....G.....G...l.up.....t....s.....+E.....shh.lh.E.A.A.......G.....aDll....lVET.VG...VG.Q...SEs...s.V.sphsD...shl.hl......h......lsusG......DpL.Q................u...I.K....+....G.l..h....El...AD......l...ll.lNKuDh.....pps.Apt.u..t..p.-....h.p.s.u.L...+...lhpt..................................................................................p....tstWp.P...VlssS..A..h....p.....t........c..Gls-lW...pt.l..c..apph.h..pt....s...G...thp..t..pR....p..pQ.t.htahhp.hpptlhpph....s................................................................................................................... 0 245 446 556 +1244 PF02374 ArsA_ATPase Anion-transporting ATPase Mian N, Bateman A anon Pfam-B_1201 (release 5.2) Family This Pfam family represents a conserved domain, which is sometimes repeated, in an anion-transporting ATPase. The ATPase is involved in the removal of arsenate, antimonite, and arsenate from the cell. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.70 0.70 -5.38 11 2686 2012-10-05 12:31:08 2003-04-07 12:59:11 10 13 1221 72 1090 14632 4072 239.70 25 76.97 CHANGED h+alFhuGKGGVGKTTsSsATAlp..Luc.G++sLllSTDPAHsLSDuFsp......chG+pPTKlp..-NLhuhEIDPp........hplpchhtpshc.hsssh...thphlpshlp-thsu.PGh-EhhuFsphh+ahc......sscaDlllFDTAPTGHTLRLLphPsshsphhc+hhchpp.....ltshhp.h.t...hGusshs....chhppl-pphEplcthp-hloDPspToFhhVshsEchSlhEocRshptLtpaGlcscslIVNp.llPEsspp....spahpu++plQpKaLcpI--hFpshslsclPLhppEltGlcsLpchuphLhs ...........................................................hl.hh.s.G..KGGVGKT..Th............us..u.h.Alt...........h...A...p...........t...G.......c.......c..ll.l..l.............o.o..D.............P.....A....p..s.......L..........u.........p...s..h...s......................................p..h..s..t..p...........h.........l..........t....................................s...........s........L..........s...........h....p...l..D.sp......................tt.h..p.c...h..h....t...p...h.....h.....t............h.....t...t.....h.........................t.....h........s....h.....h.....p....c.......t..h.....s.....u...............P...s.........h....p........E.....h....h.....s....h..t.........p....h..t..c.hhp.........................ptpa...-...h...llhDoAP...T.G..........c..TLc...h..Ls......h..P...p.th.........p...t..h..l.......t.........t...............t...............................................................................................................h..t.t..h.t.t.....................t....h..........t........t...........h............t..........hts...........t...........othhh.V..h.sp..s..l.Es.ph.t..Lt........t..h..t.ll..lNt...h..h............................................................................................................................t..................................................................................................................................................................................................................................... 0 393 721 952 +1245 PF02040 ArsB Arsenical pump membrane protein Mian N, Bateman A anon IPR000802 Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.53 0.70 -5.52 4 1841 2012-10-02 15:12:49 2003-04-07 12:59:11 10 4 1464 0 343 2231 187 367.80 44 95.11 CHANGED LAhsIFLLTLVLVIWQPKGLuIGWSAslGAVLALIsGVVshuDl.sVhsIVWNAThTFlAVIlISLlLDEhGFFEWAALHhu+.hupGpGh+hFsalsLLGAhVAALFANDGAALILTPIVlAMlhALtFsctsshsFlhAuGFIADTsSLPLlVSNLVNIVSADYFslGFhEYAShMlssslhSllASllVLaLaFRKsIP.sYsLppLKsPtpAIKD.shF+luWhlLhlLLlGaFs.E.luIPVSslAushAlIhlhlAp+u+AlpTt+Vl+tAPWpIVlFSlGMYLVVYGL+NAGLT-lLuslLsshu-pGLaAushGhGFluAhLSSIMNNMPTVLlsALuIsuSsssGhl+EAMIYANVIGsDLGPKITPIGSLATLLWLHVLopKGM+IoWGhYF+TGIllTlPVLhlTLsuLhLpL ...........................................................................................hAhhlFlhT.l.l....h..l....l...W..p....P......+......G.....L.s...l.........u...h..s..A..h..h.G...A....h...l......s.l....l....h...G.l....V...p..h..s...D...l................V......h....s..........l........V.......W...s.A....o.h..s...F...l.u.l.I.lI.Sl.lLD...c....G.F.F.-....W..u..A...l+.h.s.+.......h...up.....G...p.....G..h.tLF.sal.h...LLG.AhVuAhFANDGAAL.ILTPIVlAhl...h..sLs.a..........s.p......t.s.h.......h...s.......Flh.A.u...GF..I..A.D.......o....u....S....L....P...Ll..V.S..N..L.V...N.I..l..o....A.....c....a.....F....s....l..u.F..h..c.Ys......u.....h......M.h........s..s.l.h.....u...l....l...A..o...l..l.....h...L...a...l.....a......F.........R....+.......s.....l....P....p...p...........a.....-.....h......t...............l..........p.........p.....P..t........p.........A......l...p.............D...................t.......h.....F......+.....h..u.....h.h.l..L...h......l.L.......L......h....G......a......h............l...h....E.......l........s........l...........P................l.........S..h......l....A...s..l....s....A....h.......l....h....h......h.......l.......A..t......+......u......p.....s......l.............s........s....t..........p..........ll.+..s..A...PWpIVl..FSlGMYlV.VaG.L+N..s.G..l....T....p....hLu....t.....l....L....p....h.....l.....u......p.....t......G.......l....h.s...uh....h....G..hG...h.l.....s...A.h.LSul..h.NNh..P.....s.....V...............L............l........s........s.....l.........u.........l...s.......t........s.....s..........s.....s....s.....h....h..p.....c.......u...........h......l....Y..A.N.l....I...GsDLGP.K.....lT.P.I.G.SLATLLWL+..V...L.sp....K....s........h...p..I.o..Wu........Y...h...+s.G.l.l...h.T.l.P.l.Lhh.T.LhuLhh.................................................................................................... 0 93 207 283 +1246 PF03960 ArsC ArsC family Bateman A anon COG1393 Family This family is related to glutaredoxins Pfam:PF00462. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.33 0.72 -3.99 36 7591 2012-10-03 14:45:55 2003-04-07 12:59:11 10 18 3508 24 1253 4303 1099 107.40 26 86.65 CHANGED YtsssCsos+KAhpaLcppslpaphhshhpsshscccLppllpphs.s.hcpllsp+upsa+pLs.....ls.pplot.scllcllhcpPpLl+RPIlhss......c+lplGass-plpth ......................Yt.PsCsos++A.h....p....h...L....c....p...........p....s....l..p.h.p.h.hph.h..p..ps...o..tcpLppllpph.......u.....s....hcpll..ss+u.p.ha+cLs.......hp...ss.l.ot..pp..h..l.plhhppP.tL.l+RP.Il.lss...........pp.h.p.l.Gasp-phtt......................................... 1 315 718 1014 +1247 PF01129 ART NAD:arginine ADP-ribosyltransferase Finn RD, Bateman A anon Prosite Family \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.53 0.70 -5.25 9 454 2012-10-01 23:25:29 2003-04-07 12:59:11 13 17 114 8 209 503 14 201.40 31 63.26 CHANGED LDMAssoFDDQYtGChccMct+.hPpL.+pEFstspthsssWcpAppcWp..........cR......psphs...........hPhsF+DpHGlALlAYTus....s.la+.FNpAlRpuGtS+pcYhtpFpFKslHFhLTcALQLLps....tsCpp.....VaRGl+.....GhRFp.st.GtoVRhGpFsSoSLp+psAp.....FGpsThFslcTChGssI+saShaPpE-EVLIPPaEsFpVsshpp.spu.spI.LcShs+pso ..................................................................................................shs.ssFDD...pY.sCtpp.h.tt..hs.p..h..hppEhttpp.htpsWppA..p.tWt.....................pp...........ttth............hP...s.h.pspau.l...AlhsYTss.....sslappFNpAl+....psutohp.pY.h.p.p...F...p..FKslHahLTpAlQlLpp.............thChp........V.a.RG..sp.............s....h.p..F....p......s..t...hG....t.p.lRhG....pFs...SoS...hpct.Ap................pF....G....p......p..........T......h...F......sl...h......T....C....h........G.......ss.......l...pta.S.hh...t.E..cEVLIPPhEhFpVhpht....ptt.ts.....l..L.t.tp............................. 0 25 49 112 +1248 PF02497 Arteri_GP4 Art_glycop; Arterivirus glycoprotein Mian N, Bateman A anon Pfam-B_787 (release 5.4) Family This is a family of structural glycoproteins from arterivirus that corresponds to open reading frame 4 (ORF4) of the virus. 20.70 20.70 24.20 24.10 20.50 20.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.28 0.71 -4.35 3 241 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 9 0 0 248 0 176.90 80 99.36 CHANGED MAuoLLFlLVGFcClLVSQAFACKPCFSouLSDIcTNTTAAAuFhVLQDIuCaR.HRsSAu.EtIp..KsPQCRTAIGTPpYVTITANVTDEuYLHsADLLMLSACLFYASEMSEKGFKVlFGNVSGlVAVCVNFT-YVuHVpp+TQ+.pLVlDHlRLLHFLTPSsMRWATVIACLFAILLAI ............................................................................MAAuhLFLLVGFcChlVSQAFACKPCFSSSLSDIKTNTTAAuGFsVLQDISCLR..HGsS...uS.......sIR...KsSQCRTAIGT..PVYITITANVTDENYLHSSDLLMLSSCLFYASEMSEKGFKVVFGNVSGIVAVCVNFTSYVQHVKEFTQR.SLVVDHV.RLLHFMTPETMRWATVLACL.FAILLAI. 0 0 0 0 +1249 PF01481 Arteri_nucleo Arte_nucleocap; Arterivirus nucleocapsid protein Bateman A anon Prodom_2418 (release 99.1) Family \N 20.40 20.40 20.60 22.90 19.50 17.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.37 0.71 -4.00 4 700 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 11 6 0 662 0 110.20 70 93.69 CHANGED SQpK+Kp....GNGpssN.QLsQhLuthl.tpttQs+....Gptp+KKpPtKP.HFPlAs.sDlRHchT.sERphChSSlQThFNQGuGosoLuDSGtlsaTVpF.LPTppTVRLIpsoAss ...................pQpK+Kp....GsGQPVN.QLCQhLG+hI.uQppQsR.....G....p....psKKKp......PEKP.HFPLAsEDDVRHHhT.oERpLCLpSIQTAFNQGAGTsoLSsSG+lSapVEF.LPstHTVRLIRVTuos............... 0 0 0 0 +1250 PF01606 Arteri_env Arterivirus envelope protein Bashton M, Bateman A anon Pfam-B_664 (release 4.1) Family This family consists of viral envelope proteins from the arterivirus genus; this includes porcine reproductive and respiratory virus (PRRSV) envelope protein GP3 and lactate dehydrogenase elevating virus (LDV) structural glycoprotein. Arteriviruses consists of positive ssRNA and do not have a DNA stage. 21.20 21.20 21.70 256.50 20.60 21.10 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.67 0.70 -4.68 3 333 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 8 0 0 354 1 211.30 78 82.56 CHANGED MAcpCshFHhFL.CuFICYSsCCAVsANSoA..ThCFWFPLs+.GNTSFELolNYTlChhCsTsQAAtphLEPG+shWCRIGHDRCEEsDHDELsFslPsGhp.Lp.EGa.TSlYAWLAFLSFSYAAQFHPElFGIGNVSpVaVDh+HQFICA.HsG.NSTLscH.+NISAlaAlYYpHQlDGGNWFHLEWLRPLFSSWLVLNVSWFLRRSPAS+VSRRl ...MAsSCsaLHIFLpCuFL.YoFCsAVVANSNu..TaCFWFPLVR.GNFSFELsVNYTVCPPC.TRQAAuphLEPGRShWCRIGHDRCpEsDHDELGFhlPPG...LSSEGHLTSVYAWLAFLSFSYTAQFHPEIFGIGNVScVYVDIKHQFICA.HDGpNuTLPRH.DNISAVFQTYYQHQVDGGNWFHLEWLRPFFSSWLVLNVSWFLRRSPAS+VSVRV. 0 0 0 0 +1251 PF00951 Arteri_Gl Arteri_glycop; Arterivirus GL envelope glycoprotein Finn RD, Bateman A anon Pfam-B_425 (release 3.0) Family Arteriviruses encode 4 envelope proteins, Gl, Gs, M and N. Gl envelope protein, is encoded in ORF5, and is 30- 45 kDa in size. Gl is heterogenously glycosylated with N-acetyllactosamine in a cell-type-specific manner. The Gl glycoprotein expresses the neutralisation determinants. 21.80 21.80 22.90 24.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.97 0.71 -4.68 11 8906 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 12 0 0 8965 0 171.80 75 86.54 CHANGED .u.uhss.sss..p..p.h.ap.hhpphssTthhpphhsaulpohslh.hsp+lhphthLshsphl.hlshusl.uhhsahthchshossYuhsAlsALla....Flhp....ss..+.ChstRhhhst.ssFl.sscG+..lapscosl.lVc+sGpstVsGcLV.slKplVLsGpKAs.hstssAcpWp .............hss...hsNAsss.S.SSHhQ.LIYNLTLCELNGTDWLs.s.+.FDWAVETFVIFPVLTHIVSYGALTTSHFLDTVGLlTV.STAGFhHGRYVLSSIYAVCALAALlC....FVIR....hA..KNCMSWRYSCTRYTNFLLDTKGR..LYRWRSPV.IIEKGGKVEVEGHLI.DL.KRVVLDGSsATPlTRVSAEQW.G................................ 0 0 0 0 +1252 PF01097 Defensin_2 Defensin; Arthro_defensin; Arthropod defensin Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.12 0.72 -4.23 15 314 2012-10-01 23:31:40 2003-04-07 12:59:11 13 1 158 14 58 362 0 33.90 42 40.60 CHANGED u.hh.lNcoACAtHCls+.G.p+GGYCsu..KsVClCR ............hhsh..spusCsuHChuh..G.p+GGYCss...ptlCsCh............. 0 23 27 51 +1253 PF04161 Arv1 Arv1-like family Wood V, Finn RD anon Pfam-B_9351 (release 7.3); Family Arv1 is a transmembrane protein with potential zinc-binding motifs. ARV1 is a novel mediator of eukaryotic sterol homeostasis [1]. 20.00 20.00 20.30 21.50 18.20 19.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.82 0.70 -4.42 21 319 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 253 \N 217 304 3 176.80 27 62.27 CHANGED hlCIpCspt.lpsLYppY...Ssshl+LTpCspCp.cssDKYlEaDsVllaIDll.LLKspAYRHLlaN.h......................................scacphh+lhllhlLhEsY...........LhWtppccp.pps..............................h.h..hhp.tshhhQhlhFhhthll....phhhhhhhhthhhhthhphs......................ht.thphlhhslLlSthuKlF.ILhlIWsas.shhuhtllpllshhphlpuL ........................hClpCsp..sppLappY...............s.t.......t.s.......l+lo.CtpCp.phs......DKYlEaD.lllhIDll.LhKspsYRHlLaN.......................................................t...p..hh+.lhlh.hl.L...h-.sY............lpWh.p.ppp.t.s..................................................hhp.....h.thhhh.h.hh..shh....ts.hhh....h.h.hht...hhhh...hh.h............................................t....l.hslhlu.......hKlh..l..h...h.lW.hs........hthhh............................................................................................................................................................................... 1 62 112 171 +1254 PF01731 Arylesterase Arylesterase Bashton M, Bateman A, Fenech M anon Pfam-B_2101 (release 4.1) Family This family consists of arylesterases (Also known as serum paraoxonase) EC:3.1.1.2. These enzymes hydrolyse organophosphorus esters such as paraoxon and are found in the liver and blood. They confer resistance to organophosphate toxicity [1]. Human arylesterase (PON1) Swiss:P27169 is associated with HDL and may protect against LDL oxidation [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.71 0.72 -4.00 6 216 2012-10-05 17:30:42 2003-04-07 12:59:11 15 7 94 0 130 450 101 84.20 42 23.51 CHANGED NDIsAVGP-pFYuTNDHYFscsaLhhLEhhLshtWosVVYYSPsEV+sVApGFsuANGIslSsDcKalYVA-lls+sIHlhcKHss ...................................NDIlAVGPcpFYATND..H.YF...ss...s...h...L..p..hl.E.h.a.L.s..L..t.......Wus.VVY...Y...S.......P...p....E......V...+...l...V...Ac...G...a...s.ANGIs......lSsD...p....K...alYVA-..lhs+pl+Vhc+p.......................................................... 0 37 50 79 +1255 PF00858 ASC Amiloride-sensitive sodium channel Bateman A anon Pfam-B_415 (release 3.0) Family \N 23.00 23.00 23.10 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.65 0.70 -5.83 114 2052 2009-01-15 18:05:59 2003-04-07 12:59:11 19 39 116 15 1489 1878 5 339.30 17 78.79 CHANGED FspsoolHGhpal..h.sspt..........hhc......................R.....................hhWhlhhlsulshhhh.hhphhppah.spPslssls..hht..pls.FPAVTlCshN.h.......chst..lpt....h.........tt.hthht.hh...t.................................................................................................................................................................................................................................................t.............................phshhphhpph........uhsh.c-hl....hpCpapup..........h..spp......ahp.......................................................................................................................................................................................h..h....hTp.hGh.CYoFNsttt...............................................................hps....ttsGhpsGLplhLph......p..pp-hh..sttss...............................................................................h....Gh+lhlHsts-hPhhsp........shtlssuh..p.shlulp.pphp.......pls.............sshpp.Ch.ss.....p..........h.h.....ttY.ohssChhcChsphhlcpCsCs.hhh................................tphsh..........Csh..pp...h.pC..........hpphhtphh......................tC..p.C.ssCsphpYs...hphohsphsspt...h......................t.p.s.t..ppshthlplaa...cphsa.pphccp.sashssLL...............................uslGG.hGLFhGsSlLollEllhah .......................................................................................................................................................................................................................................................................................thtu...hh....t.....................................p...............................hhWhhhhhh....hhhh....hh....h.......h...............h................a....p..................h.................hthp..........................................ph...F.Pu.lo....lC.s..s.....h.......p.p.......ht..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ph.ph.h...th..........................t...h..t.p..h.h.....h.C........a...t..t..............h.......pp..........h................................................................................................................................................................................................s....G....C.hshN.................................................................................................................h..........su..t...............G...l..phhl.t.h...........t....p.p....h........................................................................................................Ghp.l.l...at.........tp..s......t..t.............hh.......h.......s........t.s..h......t.hp....h............................h...............h.tp....Ch.tt..............................................Y....o..t....sC..hpC..p...h..h..p.....C.tCh....h......................................t...............C.s......tp..............h..C..........h.........h...................................................................C..........C...sCp...................a.t.......h.t.h.t.......h......................................................................................t..t.t..h..hlplhhtt......h....h..pt..hth.thh.......................................................................................sshGu.hulhhGhShlshhEhh.................................................................................................................................................................................................................... 0 637 750 1219 +1256 PF01671 ASFV_360 African swine fever virus multigene family 360 protein Bashton M, Bateman A anon Pfam-B_1174 (release 4.1) Family The multigene family 360 protein are found within the African swine fever virus (ASF) genome which consist of dsDNA and has similar structural features to the poxyviruses [3]. The biological function of this family is not known [3]. Although Swiss:Q65137 is a major structural protein [1]. 22.50 22.50 26.90 26.20 22.40 22.40 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.46 0.70 -4.95 14 126 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 11 0 0 100 0 205.90 43 60.99 CHANGED EpT+cLCccLGAKptLpcpclhphFhc.lpcpKTuuNlILCHElFssN.PlLcsls.tcl+hhIahcLctl...h..hhsphohsphLsKYWYuhAlcasLpcAIpYFYQ+asHlppWRLhCALhaNNlFDLH-lYpp-+lcMDhsEMMplAC.h+DpNahTIYY.CalLGAsINpAMlsSlp.aN.................hsNlFFCIDLGAssFEEuhtlAtpcshphlhplLs ............cpT+cLCccLGAKptL.pcpclhphFhp.lpcpKTSsslILCHElFssN.PlLpsls.hch+hhIhhpLcth...h..hhsphohsphLsKaWYuhAlcasLpcAIpYFhp+atHhppWRLhCALhaNNlaDLH-hYppcclcMDhs-MMplAC.hpDtNahTIYY.CF.hLGAsINpAMhsSlp.as.....................hsNlFFCIDLGAssF-EuhtlAtpp.s.phlhphL............. 0 0 0 0 +1257 PF05170 AsmA AsmA family Bateman A anon COG2982 Family The AsmA gene, whose product is involved in the assembly of outer membrane proteins in Escherichia coli [2]. AsmA mutations were isolated as extragenic suppressors of an OmpF assembly mutant [1]. AsmA may have a role in LPS biogenesis [1]. 29.80 29.80 29.90 29.90 29.70 29.70 hmmbuild -o /dev/null HMM SEED 604 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.67 0.70 -5.90 12 3636 2012-10-03 05:41:17 2003-04-07 12:59:11 9 19 1503 0 623 2962 466 400.20 19 70.17 CHANGED M+phh+hlhhlLlsLllll....hulhhhhssp.hpsplpp.Vuttsshplshcus.t............chhhP..plpLpslplussst.t.......hsplcplclsLushsLhspplplsplhLcssslpLsh.spsphshst.lp.pp.shss.............................ss.ph.luplplssuplhhpscstpphh..phslphpssslht...................................hshspstlsGphcpctpsh.s.lsssh.spsshst.thpst.t.......................hhtthuss.hphsshclhssplth.sshhhpsh..phsh.shpLphssht...s..thpt..th..htt.hh.sthsh..sPtslsltphstpaptupspsshth.p.th.h.............................................cptulsGht.ss.ssh.......h.pshPphp.pLphppLslschh.lpts.........pps.hptsshsstsssh.Lspcph.hshWps..............slp..hssspssa.spl.lscsthphpspsGhlpLsplpuslttGhlpusuplshsphstphplphsucsl.lp.Lhpth...hslpG.hslshshpGtutssshlhtslsGplchshssuthp...............................tt..tst.h.hph.phshshppGhlohs.h ............................................................................................................................................................................................................................................................................................hhhphhh.h...h...h...hsh..l.h.hl.........hhlh....h.h.h...p........p....h.p.sh.h.sp.........l...ptp..s.h..ht...h...p.u....t..................hth...h..P.......plhh..t.ph.....tl.....s.t........s.......t..........t.................hlphpphch.sls...hs.L.h....s....p....p....l.l.plhL.....ptsshp..L........p..t....s.....t........s...............h....t........p..p......t...ss..........................................................................t.................p.......h.....p.......l...s..t.h.h.s.....tuhl..h.p......p......s..p.........................p.h.p..h.......s........................................................................................................................................................................t.h...h.....t.........t.......h..p.........................................................h.......h.........th.........................................................................................................................................................................................................................................................................................................................................t............h.............................................................................................................................................................................................................t......t.ht.u......hh...t..............................................s...h.....pl.......hphtt............................................t..............................................................h....t..................ht.........................................ph...hphtt.h..h......t.h....h..pp....hth..p.......t.s.....htlp..hph..h...tG.ht......h..t...h.......p...................p...........................h..t..h..p.th..p..th........h.....t..................l..........................................................................G.....h.sh...h.phpu.....s.....hh...p.tGphth..ttt........................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 138 316 463 +1258 PF00733 Asn_synthase Asparagine synthase Bateman A, Griffiths-Jones SR anon Pfam-B_443 (release 2.1) Domain This family is always found associated with Pfam:PF00310. Members of this family catalyse the conversion of aspartate to asparagine. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.71 0.70 -4.89 104 5876 2012-10-02 18:00:56 2003-04-07 12:59:11 16 41 3139 38 2130 12423 8100 248.10 24 44.92 CHANGED plcchLpcuVppphh............uDlslGshLSGGlDSShlsulspc...........................................ts.lpoFoluapsss.................................................-hthActsApplusc..Hpphhlstp..-hhsh..l.cllhth-pPhs.....sshshshahhu+ts+...................ptspVsLSGcGuDElF..uGY....thhpps.thttht..hht.hpthhhh.h......................................................................................t......hh..hthh.st.L.t+sD+hsMutulEsRsPFLDccll-hshp.lPsphKh.psstt................................KhlLRcsh........pphlPcplhp.........RtK ...........................................................................................t.hpphl.ppuV...p..p..p.hh.....................uD..l..s..l.G.....s.h.L..S.G.GlDSS..l..l.s.u.l.s.pc...................................................................ht.......p..l....c..o.....F..s...l....u..h...p.s..s..s.........................................................................................-h.t.h..A..c..p.l....A..p.t..lGsp........Hpph....h......h..s..........p.................-.h...hpt.......l.p.h...l..h.t..h...-...p..s.....................tssh..s.h..a.h....h..s.c..hhp.....................................................................................tth.ph...s...L.SG-G...u....D....El..F...........u...GY...............................h.....h.t....p..t..........h...t......h.....t............h.....t...h...t.h....h.............................................................................................................................................................................h+s..s.+..h..........hu.h...u....l..........E....sR..s....PFL....Dp.chh.chs.hp....l.s.p..K..h...t.t.stt...............................................KhlLR.psh........................pt...h.l.P.t.p.lh.R.K...................................................................................................................................................................................................................................................................... 0 735 1406 1834 +1259 PF03590 AsnA Aspartate-ammonia ligase TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.80 21.80 21.90 24.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.47 0.70 -4.94 6 1603 2012-10-02 14:22:40 2003-04-07 12:59:11 10 4 1542 4 211 863 13 236.20 56 72.62 CHANGED KoFIhQ.QppISFlKNTFTppLtE+LGllEVQGPILSpVGsGhQDNLSGtEKAVpVpVKpIPDAsFEVVHSLAKWKRHTLARasFupsEGLFVHMpALRPDEDsLDtsHSVYVDQWDWEKVIssGcRNluYLK-TVcpIYuAIRhTElAVsp+F.sltshLPcpIsFVHSEELVcRYPcLssKcREsAICKEaGAVFLIGIGGcLSDGKPHDlRAPDYDDWTT.SEstYKGLNGDILVWNslLscA .....................l.p.Qpt..IsaVKspFpppLpccLsLlcVpAPlhscsGsGhpDNLs..GsE+sVphcVKs...l.P..D.u...p...hE..VVHSLAKWKRhTLucasFstGEGLYTcMpAlR.DEDp....LsslHSlYVDQWDWE+Vls.c.s..cRs..hshLKpTVcpIYtuI+tTE..........t........tV.ptca...s...l........t....s...h.....LP...-.pIpFlHSpELlsRYPDL.s.sKpREpAIsKE.hGAVFLlGIGGcL.s.DG+..HDsRAPDYDDWoo..................st...........u..........h.................GLNGDILVWNslLppA....................... 0 102 148 179 +1260 PF00026 Asp asp; Eukaryotic aspartyl protease Eddy SR, Griffiths-Jones SR, Finn RD anon Overington enriched Family Aspartyl (acid) proteases include pepsins, cathepsins, and renins. Two-domain structure, probably arising from ancestral duplication. This family does not include the retroviral nor retrotransposon proteases (Pfam:PF00077), which are much smaller and appear to be homologous to a single domain of the eukaryotic asp proteases. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null --hand HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.08 0.70 -5.19 23 4762 2012-10-02 15:32:34 2003-04-07 12:59:11 18 56 583 656 3018 6175 72 282.30 24 71.72 CHANGED pYhupIsIGTPsQpFsVlFDTGSSsLWVP...SspCsss........................uCpp+spacPSpSSTapps.Gps...a.sIpYGsGS.hsGhlupDTVslGGlslssQtFu.usp...su..h.....htuhhDGILGluasulssst...........hssVasslhsQullspslFSlYLspss....ttsGpllFGGlDsu+YoGslpalsVo..........ppsaWplslculsluss......shsssssp..............AllDTGToLlhhPsshlspltpslGAp.sp.................................................................................................t.....GtYslsCsshsphPslsFhls.Gtpaslssssalhpsssss..........ChhGhps......sssshaILGDlFlRphYsVFD.hsNsclGhAts ...................................................................................................................................Yhstltl....G...o......P..........s.....Q......p.h...p.lhh..D.T..GS.u...s.hW....Vs..........s......s..........................................................................................................s...............p.....t.....h.....a....s......s...p........t...S....s........o..h......p...........................s.....p.t............................h...pl...p......Y......u....s..........G..........s.......h......p........G........h......h..........s.......p.......D.............s..........l.............p...............l...............u..................s....................h..........s.........l...........p..........p............t.......t..........h.........u.........s......p..p.................s..........h.........................s.....h...s...G....l....l.Gl..u......a..s...p..h....s.sst..................................................h.shh..t..s.h....h..p..........p.....t.....h...l....s....p....s....h....F....u.....h.....h.........L.........s....p....ts.....................s.....G.....t...l..h...h.....G.....u......h...............D.....t....s......h..........a......p..........G..........p......l....t...a...h...s...l............................pt..s.h..a...p......l...p.....h.s...s.......l.....t..l...s..sp...........hh.s...t...t.....s....h.t...........................................................................sl....l..D....oG......T..o..h..........h.h..........h..P....p....p....h....h....p....t....l.........p...t....l....s..u.t........................................................................................................................................................................s..h.h..l..s..C.....s......t.......h......................h........s.......s....l...........s.....h..........t........h..........s...........s..........h...........p........h..s.....l........s.....s.......p......p.....h.....h.......h.....p.............t...t....t...................................C..h............s.h.s.........................................................t.......s........s.....h.........h.....l.......LG.s.s.F....lpp.h.as..l.a..D....h...t....p....t....p..luhA............................................................................................................................................................................................................................. 0 1115 1670 2449 +1261 PF02261 Asp_decarbox Aspartate decarboxylase Mian N, Bateman A anon Pfam-B_3879 (release 5.2) Domain Decarboxylation of aspartate is the major route of beta-alanine production in bacteria, and is catalysed by the enzyme aspartate decarboxylase EC:4.1.1.11 which requires a pyruvoyl group for its activity. It is synthesised initially as a proenzyme which is then proteolytically cleaved to an alpha (C-terminal) and beta (N-terminal) subunit and a pyruvoyl group. This family contains both chains of aspartate decarboxylase. 25.00 25.00 29.00 30.30 22.50 19.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.33 0.71 -4.51 163 2477 2012-10-02 17:45:13 2003-04-07 12:59:11 11 4 2416 51 550 1458 373 115.10 52 89.57 CHANGED MhhphLKuKIHRAoVTpA-LpY.GSlTIDp-Ll-AAsIl.hEpVpIhNlsNGpRFpTYsItG-+GSGhIslNGAAARhspsGDhlIIhuYuthscpEh.cs.acPplVhlD.cpNplhc ...........MhpTMLpuKIHRApVTcA-LpY.GSloIDpDLL-AAsIlpsEpVpIhNlsNGpRFpTYsIsGE...RGSGlIslNGAAARhspsGDlVIIhoYuphs.-.-Es..+s..acPcVlah-.t.s.Nchh.c......... 0 193 390 489 +1262 PF00710 Asparaginase Asparaginase Bateman A anon Pfam-B_652 (release 2.1) Domain \N 22.50 22.50 22.60 22.80 22.20 22.40 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.03 0.70 -5.56 157 5341 2009-09-16 23:30:57 2003-04-07 12:59:11 15 27 3806 135 1165 3548 565 305.20 30 90.15 CHANGED clh.......llsTGGTI...........uh.ttsspstthss.......ssptll...ptls....lpph...s.plps...............................................................................................................................................................................cthhs.lsSsshssppWhplupplppth...cs...cGhVlsHGTDThp.TAhhLshhl.ps.s....+PVVlTGu.+sh..........sphsuDu.tNLhsAltlAss............htGVhlshssc.......lhpupcspKsposshsA.F.pSsshsslu.hl...ss..plphhtp.hh.....................................t......................th...h.tthtsp.VsllthasGh.ss....phlcshl..ss.hcGlVlpuhGsGs......sssshhsslpcss.pp.G.......lsVVhsopshpGtVs...hshhusutplt.p...h.GslsusshssppAhltLhhhLupt..hshpcl.p ..........................................................................lhlltTGGTI........................uh..tt.s.s...p.s.....hhs.................tspsll......ptl.s......lp.ph.....s...pl.cs...............................................................................................................................................................................c..p.h...h.p...l.s.SscMss.ppWhplucp..I..pp..th...cp.........hDG....hVIoHGTDThp.TAhhLs..h....hl.....ps.........s..........KP..VV.lTGu.pP............s.p.hsuD.G...hNL.hsA....l...hlAus...........................t.......spG....Vhls.hNs.c............lhpupcs...........o.K.sp..os.sh.s.s.F.pSs..s.hs.sLu.hl........st...clth.h.pssht.........................................................tt..t..........................h.....s..p.......p.h...s.p...Vsllt..hY..sGh.ss................phlcsh........l....pt.......s....h...........cGl....lltuh..GsGN..............hstshhp....t....l.p....p....hh...p.p...G.......................lsV.Vhsop..........shsG.tss........th.....s..s..u..h.tlt..c.................h.Ghl.su...t..s..hssppAhhhLhhhLsp..ths.pth..................................................................................................... 0 339 683 964 +1263 PF01112 Asparaginase_2 Asparaginase Bateman A anon Sarah Teichmann Domain \N 19.40 19.40 19.90 19.50 19.10 19.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.99 0.70 -5.71 7 2653 2012-10-03 21:14:07 2003-04-07 12:59:11 13 21 1641 72 1019 2127 867 259.80 32 88.17 CHANGED sshhIs.lpG..utsslsps......hPh.hphht..hh+pssptuhcsLpuGsssLDsVE.ulchlEsp..chssGhGus.spsGpsph-AslMDGsshcsGAVuslp+l+NPIplARhVM-pTsHlhLsh-uAppFApspGhcp.s.ophtopp.hpchhtsppts....pl..D.osh.................l.ptpp...........hsTlGhVAlDspGplAuuTSTuGhs.KhhGRVGDSPl.GAGsYAschsu.soATGpGEslIRthsuhplsthMchGt...phsptshphlhcthPc.......hssGlIAlstpGcluhshss.uha..h.sAspss.h..Ysp.s...p .......................................................................................t.............................................th.h.......hp...thh.p.........t..u.............p..h..L............p......t......G.........t.................s............A.l................D....s...Vttulphl...E.....sss....hhNuGhG.ushst.............sG.p.h.EhDA......s.l..M.............D..........G........p...........s..........h.......p..s......G............A.V......uu..l..p.pl..+N.Pl.....hA..........+.........h..........l..........h..............c......p...........s...........................H..............s.........h.........h.........sGp.......GA....p.p.a.A.h....t.t.....G..h...t...h....h....s............t.....h...................o...........................thp.ph.t....t....h.ttt................................t....t...............................................................ttp..............................................................hsTV.G.sVA..l.D.t.p.G.pl.A...A....uTSTGGhs.Kh.G.R.VGD............o..P.ll.Gu..G.s.............a........A........s.............s........t............s........s.....s.....o.....s..T...GhGE..hhh+h.....h..h.............u...hp..l...s.t...h...h......c.tt............th.t...ps.s..th.l....p..t..h.t........................stsull..s.l..st.pGp...h...s..hsh.ss......th................................tt....................................................................... 0 291 539 822 +1264 PF04958 AstA Arginine N-succinyltransferase beta subunit Bateman A anon COG3138 Family Arginine N-succinyltransferase EC:2.3.1.109 catalyses the transfer of succinyl-CoA to arginine to produce succinylarginine. This is the first step in arginine catabolism by the arginine succinyltransferase pathway. 29.40 29.40 31.90 36.50 25.30 29.30 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.86 0.70 -6.02 71 1133 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 909 1 204 701 163 330.90 51 97.92 CHANGED MhllRPlptsDlsuLhplAppo...GtGh.TSLPsscchLpp+Is+SppoFtspphspsupps...YlFVLEDocoGcllGssuIcAsVGhsp.PFYsY+lupllHsS......ccLslppphcsLsLssDhTGsoElCoLFLcssaRps..tsGcLLS+uRaLFhApapcRFu-pVlAEhRGlsD..-pG..pSPFW-uLuc+FFsh-FscADhLoGlssKsFIAELMP+aPIYVsLLsp-AQsVIGpVHssTcPAhclLEpEGFphpuYVDIFDuGPTlEAclcsl+oV+pS+thpVp...............tsu....pp..hLluNsphps.........FRAshsphths...s.splhlsscsAcsLplps.GDpVRhlsl ..........MhVlRPlcpuDlsALhpLAs...co...G.sGL.TSLPsNcspLusRIcRuhcoapuch...stuEpG...YlFVLEDoE...TGpVsGlsAIEsAVGLs-.PaYN.YRVuTlVHAS......+EL.sVaNslsTLhLSNDaTGsSELCTLFLcP-aRpp.....sNGhLLSKuRFhFhAuFR-+Fs-+VlAEMRGV.D..EcG..hSPFWpuLG++FFuhDFocADaLs.GsGpKuFIAELMPK.aPIYsphLSpEAQsVIG.pVHPpTtPAh.............s....l..LE+EGFcYcsYlDIFDGGPTLEC-l-clRulRcScllpVs.u.......ps..stsshss...hLVuNp.sacc.........FRssLscscss........sppllLostph-ALcsps.GDpVRlVt.L.......................................................... 0 43 78 145 +1265 PF04996 AstB Succinylarginine dihydrolase Bateman A anon COG3724 Family This enzyme transforms N(2)-succinylglutamate into succinate and glutamate. This is the fifth and last step in arginine catabolism by the arginine succinyltransferase pathway. 20.10 20.10 20.10 38.90 19.60 20.00 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.46 0.70 -5.68 4 825 2012-10-01 20:45:11 2003-04-07 12:59:11 7 3 787 14 131 513 157 429.90 67 99.25 CHANGED M.sAaElNFDGLVGPTHsYuGLShGNlASppNuttVSNPRtAAhQGLsKM+sLhDhGhsQuVLPPptRPssslLRpLGFSGSDEtVLppsA+-A.sLLuAsSSASsMWsANAATVsPSADTADGRVHhTsANLssphHRSLEussTtpsL+tlFsDEp+FAVHusLPtsuphGDEGAANHsRLsucaGssGVplFVaGR...-u..stsuRaPARQThEAspAlARLpGl..spslFAQQssssI-tGsFHNDVluVuNtpVLFtHppAFtcpsplLAEL+A+lsG.....hhlpVspsplulpDsVuSYLFNSQLLShs.DGpMhLllP.EsR-NstsWshlppLhuuDsPIscVKVhDLRQSMpNGGGPACLRLRVsLsEtEhsAlNPAhhhsssLhDsLscWV-RaYRDRLosuDLADPpLLpEuRpALDtLoQlLsLG.shYsFQ.....ss ..............MsAaEVNFDGLVG.THHYAGL..SFGNcAST+HchpVSNPRhAAKQGLlKMK.ALADhGFsQuVlPPHERPhlssLRpLGFoG.SDEQVLcKsARQAPchLSulSSASsMWsANAATVuPSADThDGKVHhTVANLNNKFHRSlEAssTculL+AIFsDEc+FuVHsALPpVAlLGDEGAANHNRLG.....GcYGcsGlQLFVYGR.....c-s..s...c...s...c..PsR.YPARQTREASEAVARLpQlsPppVlFAQQNP-VIDQGVFHNDVIAVSNRpVLFCHppAFs..cQppllspLR....s+lsG......FhuIEVPsspVSVuDAVuTYLFNSQLLSRs..DGSMh...LVlPpECR-csuVWsYLN.c.L.l.A.u.D.sPIs-l+VFDLRESMtNGGGPACLRLRVVLoEcERpAV..NPuVhMND.sLFssLNsWVDRaYRDRLTsADLADPpLLcEuRpALD.LTQlLsLG.SVYPFQ.c.................... 0 24 49 91 +1266 PF04952 AstE_AspA Aste_AspA; Succinylglutamate desuccinylase / Aspartoacylase family Bateman A anon COG2988 & Pfam-B_15640 (release 10.0) Family This family includes Succinylglutamate desuccinylase EC:3.1.-.- that catalyses the fifth and last step in arginine catabolism by the arginine succinyltransferase pathway. The family also include aspartoacylase EC:3.5.1.15 which cleaves acylaspartate into a fatty acid and aspartate. Mutations in Swiss:P45381 lead to Canavan disease disease [1]. This family is probably structurally related to Pfam:PF00246 (Bateman A pers. obs.). 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.67 0.70 -5.53 33 2822 2012-10-02 19:46:12 2003-04-07 12:59:11 9 14 1568 43 782 2774 810 264.10 21 80.56 CHANGED sG.slllouGsHGsEhsGs.llcclhpplsttp....hsuphhllshsNs.Ahpssp......Rhhs........psLN.....................RsFsGctpu............spsc+lAphhhtthh.sps-hslDLHouspuhshh.hshsshtss.h.....hphlpsh..shshsh.hpptsss............sh.phsspphut.ulolElGts..tshstptlsphttsltslLphl.uhhpsp......p.hthhphstpssptpp..u..sGhhchphplsspV-ttshhshshlhsphsscph.hpu...spsshllhhp.tshsttssslhhltpt ...................................................................................................tlhlsuu.lHGsE.hsu....h....t....hlpp.L.l..........p....pl..t.ttp............hh.u......t...h........l.......ll.slsNs.u.hp.ptp...............................................R.a.h.....................................pDhN..............................................R.h.F..s..G.p.ps...........................................s...hpph...s..p.th.......h..p.t......s.........t............p..s.....ch..h.lD...L...Hou.......h.....p..s.......t...h..h....h.....h.s.h.....h.s.h.p..sh.....h...............hth.ht.sh......sh.s..hl....h...ppt..s.ss.......................................s.h.hp.h.....s.....s.....p..p........h.......u......t............u..........h.o.lEhGts.........h.hstpthpt....h...ttsh.ht...hhthh...u.hhpst..................t.....h.h..t.hh.t....h...t...t.......p............................p..h..t..h.......t..hupp...stthp.h..s..l..h..s.....s..t......h.....hhs..........psthllh.p..shht.tsslhh....t......................................................................................................................................................................... 0 179 407 606 +1267 PF03115 Astro_capsid Astrovirus capsid protein precursor Mifsud W anon Pfam-B_2957 (release 6.5) Family This product is encoded by astrovirus ORF2, one of the three astrovirus ORFs (1a, 1b, 2). The 87kD precursor protein undergoes an intracellular cleavage to form a 79kD protein. Subsequently, extracellular trypsin cleavage yields the three proteins forming the infectious virion [1]. 25.00 25.00 28.80 27.90 20.90 20.60 hmmbuild -o /dev/null HMM SEED 787 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.35 0.70 -6.28 11 1664 2009-09-13 08:26:15 2003-04-07 12:59:11 9 6 167 5 0 782 0 200.70 66 92.28 CHANGED MASKSsKQVTVEV..NNNGRsRSKSRsRSQSRGRs+uVKITVNS+....s+uR.RQsGRsKhQSNQRVRpIVNKQLRKQGVTGPKPAICQ+ATATLGTIGSNToGsTEIEACILLNPVLVKDATGSTQFGPVQALGAQYSMWKLKYLNVKLTSMVGASAVNGTVVRlSLNPTSTPSSTSWSGLGARKHLDVTVGKNAlFKLKPSDLGGPRDGWWLTNTNDNASDTLGPSIEIHTLG+TMSSYpNpQFTGGLFLVELuSEWCFTGYAANPNLVNLsK........STDKpVsVTFEGSsGsPLlMsVPEsSHFARhsspRSohsToLuRAGtpooSDTVWQVLNTAVSAAELVTPPPFNWLVKGGWWFVKLIAGRsRoGs....+pFYVYPSYQDALSNKPALCTGuss.uuhRspssVtTTLQFTQMNQPShG+GEoPAThGRulPpP.........G-phpVlhTlussh...uPNssspQsWltKshsuPps........hslKIGpssp+YhshpGFT..........lsuV-WYTs-aQsupcP...........sPIsGlt....VhsssoKKADVYAhpQatsspTNs+pQlTolaLV+V...Tp...............sapVssahshhaptussss......ssslplRssTtssslpFppGpWYLlTsTsl+s.ussssGWlW.shELtsspsYhhDpshsHlIsP.Pssoplha-hhTulPpspsth.shppcss......................ss.pss-s.......hss-ETDs-sEosEDEsD..ElD....RFDL+soSsSEsED...ENsRVTLLsTLlNQGholpRAT+IopRAaPT.....s-+h+RuVahDLLssGlsPuuAWSHACcpARhhuh....pp..phS.GSRGHAE ............................................................................................................................................................................................................................................................................................................................................QRATATLGTl.GSNTSGoTEIEACILLNPVLVKDATGSTQFGPVQALGAQYSMWKLKYLNVKLTSMVGASAVNGTVlRVSLNP.TS.T.P.S.S.T.SWSGLGARKHLDVTVGKNAsFKLKPSD............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +1268 PF04377 ATE_C Arginine-tRNA-protein transferase, C terminus Kerrison ND anon COG2935 Family This family represents the C terminal region of the enzyme arginine-tRNA-protein transferase (EC 2.3.2.8), which catalyses the post-translational conjugation of arginine to the N terminus of a protein. In eukaryotes, this functions as part of the N-end rule pathway of protein degradation by conjugating a destabilising amino acid to the amino terminal aspartate or glutamate of a protein, targeting the protein for ubiquitin-dependent proteolysis. N terminal cysteine is sometimes modified [1]. 25.00 25.00 25.30 25.40 24.50 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.53 0.71 -4.08 166 1296 2012-10-02 22:59:21 2003-04-07 12:59:11 10 12 1200 0 497 1090 425 134.00 35 43.75 CHANGED pEpasLYp+YhpsR..HscGsMtsss...h...........ppappF...lpso.l.............................................................................................sohhlEap.............tsscLluVulsD...lLscGl.SuVYoF..aDP-..h.pptS.LGoaslLppIphucc............hsLsYlYLGYaIcsstKMsYKspapPhEl....LssspW ....................................................................................................................................................EpasLYp+Y.ptRH.tcushp..s.....t..............ppa.p.p.F...lps.s.h..l.............................................................................................s.o.hhlcah....................hss..cLlAV.ulhD...lLs.....c......u.....lSuVYoF.............a..D..P..D....h.pp..h.S.LGsaulL.p.pIphu+p................hsLtalYLGYaIcsstKMsYK.spapPtEhLsspt............. 0 143 288 400 +1269 PF04376 ATE_N Arginine-tRNA-protein transferase, N terminus Kerrison ND anon COG2935 Family This family represents the N terminal region of the enzyme arginine-tRNA-protein transferase (EC 2.3.2.8), which catalyses the post-translational conjugation of arginine to the N terminus of a protein. In eukaryotes, this functions as part of the N-end rule pathway of protein degradation by conjugating a de-stabilising amino acid to the amino terminal aspartate or glutamate of a protein, targeting the protein for ubiquitin-dependent proteolysis. N terminal cysteine is sometimes modified [1]. In S cerevisiae, Cys20, 23, 94 and/or 95 are thought to be important for activity [2]. Of these, only Cys 94 appears to be completely conserved in this family. 21.10 21.10 21.10 21.50 20.80 21.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.91 0.72 -4.17 128 1240 2012-10-02 22:59:21 2003-04-07 12:59:11 8 8 1160 0 466 1031 317 77.30 35 25.38 CHANGED pahhTsshsCuYLss................................+pppphhst...stthssphYspLhptGFRRSGshhY+PpC...psCsAChslRlssspFpso+oQ.+R ........................hhTsspsCuYLts...........................................cp.tcp.hst.........spt.......hs........sphYspLhptGF.R.RSGshhYRPtC....csCp.........uChslRlssscFpso+oQ+R.............. 1 131 266 373 +1270 PF03078 ATHILA ATHILA ORF-1 family Mifsud W anon Pfam-B_2240 (release 6.4) Family ATHILA is a group of Arabidopsis thaliana retrotransposons [1] belonging to the Ty3/gypsy family of the long terminal repeat (LTR) class of eukaryotic retrotransposons[2,3]. The central region of ATHILA retrotransposons contains two or three open reading frames (ORFs). This family represents the ORF1 product. The function of ORF1 is unknown. 19.50 19.50 20.20 20.50 19.30 19.00 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.52 0.70 -5.90 11 113 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 9 0 38 110 0 201.90 16 53.65 CHANGED p++tElARGKRshps...........c.pl.pE-h-...-..stsst+tpph.p+tcc.TspEYh+hhp..cFhuTRYPp.pThtpLGlhcDVpalhppspLpshhuh.asuaccEolpFLSTLclcha....s.th-.cG.lGalpFhVhshpYplsIppLpphaGFPotpuhp..casp-ELpsLWhpIuushPhs.u+SKSs.IRsPVIRYhppslANshasRcpTGTVoNs-hEMID.ALptlL....RcTKsGpslcGDh.NDsshohlLL.HLhuY+paAhs.ppp.htGuLChGGllT.ILhussVslts.sh.P+hhDhcaLptpc.hphthssDhhha+Fpcs..t.u.hhLPs.chTpIhptpNI-FpPphphL...............s...ppp..hhptttpppl...-tpscpcph-...h-Ts.aHFpEah.Psp.pu+sluEsH+p.uhLp+WsphQcKlltphhKtl+hhpptlpshussp ..........................................................................................................................................................................................................................................................................................................................................h...................................................................................................................................................................................................................................................................................................................h............................................................................. 0 3 3 6 +1271 PF03477 ATP-cone ATP cone domain Aravind L anon Aravind L Domain \N 21.50 21.50 21.50 21.50 21.40 21.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.95 0.72 -3.55 133 9433 2009-01-15 18:05:59 2003-04-07 12:59:11 11 53 4693 82 2010 5803 3748 89.30 26 18.76 CHANGED hhVlKRDG.pttsFchpKItpulpcAs.................t.h.....stp.p.spplspplhpp..lps...t....tl.ssccIp-hVpppLh.................cts.....thshs+thlh..YRph+ct ..........................hVlKRDG.p.p.psFst-KItpulh+As...........................................tt...ps.l..........sps.p...lp....p.l..sp.clp.pp....lps......t...............pl.sspcItphVhcpLh......................chs.....ph.s..hl+ht.sYRph+c.h............................................... 1 650 1237 1685 +1272 PF02222 ATP-grasp ATP-grasp domain Bateman A anon Pfam-B_157 (release 5.2) Family This family does not contain all known ATP-grasp domain members. This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.12 0.71 -4.91 23 5314 2012-10-10 13:17:02 2003-04-07 12:59:11 17 15 3695 55 1212 22317 9135 171.00 35 44.36 CHANGED l.pphGlssscahhlcsts...plppssppl....GhPsVlKtpphuYDG+Gphll+sps-lppuhcth......tstslllEcalsFctElollslR..shcGt..shhaPslcslpccGlh+hshsPAthstt...htppApplAc.clscpLsssGlauVEhFlh...sGc......lllNElAPRPHNSGHaTlsus ..........................................................................................................................h.cchuls.s..u...s...a...t..h.....l......s...o....t.p.........-.l..p....p.....s.....h....t......p....l...............G..h....P..s.llK.t.s.h.h.G.Y.D.G..+....G......Q....h...h...l.......+........s...t..p..-....l.p..t..u..h.phh.............................ssspsl.l.E.p.h..l.............s...............F...c.........h..................E............l..o..l.......l........s......s...R.................sss...G......p..............s......t........h.......a.......P.................s..............s......c............s........h.............p...........c...........s............G.............l.............h................p..........p..................S...........h.............s.................P......s....p.....h......s....s.t...................ht.p...p.A.p.p...h.Ap....pl.....s......p.....t.....L................s.........h.....s...G......l....h....u....V..E.h.Fls........p.G.s....................lllNElAPRPHNSGHaTlpu...................................................................................................... 0 320 712 992 +1273 PF00217 ATP-gua_Ptrans ATP:guanido phosphotransferase, C-terminal catalytic domain Finn RD, Griffiths-Jones SR anon Prosite Domain The substrate binding site is located in the cleft between N and C-terminal domains, but most of the catalytic residues are found in the larger C-terminal domain. 21.90 21.90 23.10 23.00 21.80 21.40 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.44 0.70 -5.05 115 3578 2012-10-02 17:21:26 2003-04-07 12:59:11 14 21 2750 104 545 2666 91 185.20 48 68.74 CHANGED s.-s.chVlSoRlRsuRslcGhshPPph.ocp-pcplEphlpsALssl.....suchp.GpYasLss.....MostppppLl-cH......aLFpc..ss.hh.suthscsWPtuRGIahNcs+sFllWlNEEDHlRlISMppG...uslppVacRhspulptlE........cp.lcFuas-+LGYLToCPoNLGTGLRASVHl+LPtLuts....sc.hpphhpplpLphRGhtGEsoc.uhG...slaDISNppRLGhoEh-lVppl.hsGVpplIchE+ph ..............................................................................................sscallSoRVRsGRSlcGasFsPsh.T-t.p.h+E.hEpcV.sssLsuL.............cG-LK...Gsa..Y..P..Lou...........Moccs.Q..QpL.IDDH..............FLF...cE.......ss.hLp...s...A...sh.sRhWPsuRGIa..HN......-..s+....ohh.VhhNEEDHlRl.uMp.G....................hshtp...sapphsph...ptl-.........cth.a..a.pc....plGalh.....hCPoNlGTshRAuVhl...+l...PtLshp......tch.p.hh..t.ph...t......h..thR...Ghh......G....ts..........t................................................................................................................. 0 243 305 418 +1274 PF02807 ATP-gua_PtransN ATP:guanido phosphotransferase, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The N-terminal domain has an all-alpha fold. 25.70 25.70 25.70 26.10 25.60 25.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.48 0.72 -3.83 91 1908 2009-01-15 18:05:59 2003-04-07 12:59:11 10 20 1186 104 380 1509 23 67.60 62 24.46 CHANGED schsp...ppslhpKaLT.-lapcL+sKcT..shGsTLscsIpoGl-Nss.....psVGlhAuDtEuYp..lFu-LFDslIc-hHsGa .................................ppshhtphL..T...-l..acpL+s.+.pT..shGsTLhDVIQoGl-N.s........................psVGlYAs.DAEuYo..VFA-LFDPII-DYHuGF.................. 0 158 187 276 +1275 PF01747 ATP-sulfurylase ATP-sulfurylase Bashton M, Bateman A anon Pfam-B_494 (release 4.2) Domain This domain is the catalytic domain of ATP-sulfurylase or sulfate adenylyltransferase EC:2.7.7.4 some of which are part of a bifunctional polypeptide chain associated with adenosyl phosphosulphate (APS) kinase Pfam:PF01583. Both enzymes are required for PAPS (phosphoadenosine-phosphosulfate) synthesis from inorganic sulphate [2]. ATP sulfurylase catalyses the synthesis of adenosine-phosphosulfate APS from ATP and inorganic sulphate [1]. 23.40 23.40 24.40 23.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.33 0.70 -5.03 113 1308 2012-10-02 18:00:56 2003-04-07 12:59:11 12 15 1032 36 520 1170 549 214.80 37 43.51 CHANGED asphpho....PtEhRphF.pc.pGWc..pVlAFQT.RNPhHRu.HEhLt+tAhct.h..s........ulLlpP.LlGtpKssDlssclRhcsacsllcp.Yh................PpspshluhhPhsM..RYAGPREAlhHAlhRpNaGsTHaIVGRDHAGlG..........s..aYGsa-AQclhcp..............ts-L..sIp.l.hcpshY.spcssphssh..........cssscspcp+.lplSGT...........clR.chLppGttsPshFsRPEVsclLhch.h .........................sphphsPtEhRppF....cc....hshc....sVsAFQo.RNPlHRu.HchLhppAhct..h...s.........sLllpP.LlG.TKssDlPtcsRhcsacs.ll..cp.Yh................stspshlulhPssM..+....YAGP+EulaHAllR+Nh.GsTHaI.VGRDHAGsG...........DaYssacApclhsp.............hss-l..sIphl.FcpshY.sppspthhs...........cpsscs...pcpc..lhlSGT...........clR.chLRsGttPPshF.pPEVsclLhc.................................. 0 179 315 435 +1276 PF00231 ATP-synt ATP synthase Finn RD anon Prosite Domain \N 22.10 22.10 22.40 22.40 21.70 22.00 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.76 0.70 -5.06 161 5197 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 4653 59 1266 3418 3462 281.00 34 97.64 CHANGED AsL+-l+pRIpSlpsTpKIT+AMchV.AuuKl++Apcphpss+PYsp.plppllspl...htt.................................t.t..pshhttc.....pspcs.......hllllooD+GLCGuaNssll+tstphlpphpspt....ph...plhslGcKutsahp..p.h.............shp.lht.........phssls.cssshppsp.tlsppllpta...hpt..p.................................hDc..lhllYscF.hsslsQpspspplLP...............................................ttpttttttpsph.aEP..sscplLssLlspYlpsplapullEuhASEpuARMsAMcsATcNAs-hlpcLslpYN+sRQAuITpElhEIl.uGApAL ................................................................sh+-I+s+ItSlpsTpKIT+AMcMV.uuuKl++upcphpsu+PYup.phppllspl..sps..t.............................ph.p..pP.hl.ppR............psK+s.....uhlVloo..DRGLCGGaNsNlhKp...sh.ph..hp..p.hp...s.ps...............hp.h.pl.hslGpKGh..saap....p..t.............................uhp...lht..........phsslu....-pP..sh.p.cst..sl..s..pp.hlpta.....pps...p....................................hD....c...lallYs+F..l..s..s.h..oQpsphpplLPl....................................................t..t.s..p..t.tp..p.tph.sY...aE.P....s..s..cp...lLcsLLspYlpu.lYtull-uhASEpuARMsAMc........sATDNA......s...clIpcLsl.YN+uRQAuITpEloEIVuGAsAl......................................................................... 1 435 822 1071 +1277 PF05176 ATP-synt_10 ATP10 protein Wood V anon Wood V Family ATP 10 is essential for the assembly of a functional mitochondrial ATPase complex [1]. 21.90 21.90 23.80 24.70 21.70 21.10 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.42 0.70 -5.11 23 186 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 169 0 137 174 20 237.20 27 80.37 CHANGED puh...........p..p....sspstptch...htLs+PlGh..PPp.s..pphs-spoLpp..htpDhhsh-K+tcRpKELst-hu+uhFt-hpsh+pppGKhFhuPsphFKpDKALYFPsht.....G+TL..............sus.....pssp.......shLcGKlSlVplFSoshuEppspoahp.....sshp-hhpps.t.........psQll-INl.-shl......KuhLl+lh.hsph+ppl.PtppapcY..Flsp.cs.hs.sl+Eplth.NshsGYlYLlDcps+IRWAuoGsAspcEhcsLh+sV+tL ..................................................hht...................t......l.+PlGh..sP..............s...pph..ptpohpt...........-hhs.-+....p..p+pccL..........t.....t...c.hs+.s...hat-hpsh.ph.p.pGKhFhuss.pha+t-puLaFPslh..............GpoL.............................ssp.......tshs...........shl..p...G.K.sSlVslao..o.hh.u-p.spoahp..........shhpphhpps......................tsQ.hlclNh.-shl......KuhLl+hh.hssh+..ppl..Ptppasp...Y..alsp.....ps.h...s.plR-plthhNshsGYlaLlDpps+IRWAusG.Aps..pEhcsLhpssptL.................... 0 41 82 117 +1278 PF00895 ATP-synt_8 ATP synthase protein 8 Bateman A anon Pfam-B_446 (release 3.0) Family \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.02 0.72 -3.76 66 6876 2012-10-02 21:03:42 2003-04-07 12:59:11 15 1 4964 0 41 6086 109 52.90 39 95.77 CHANGED MPQLssss......Wahhllhhhlslhllh.hKlhsah...ssss......sppspppppssPWpa.W ....MPQLNPsP.......WFhIhlho..W...Ls.ah.hl.l..sKlhsas.....sNsP...........ssp...s.....s.pp....p.s.ps..W..sWPW................................. 0 8 12 21 +1279 PF00119 ATP-synt_A ATP synthase A chain Sonnhammer ELL anon Prosite Domain \N 21.70 21.70 21.70 21.70 21.40 21.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.44 0.70 -4.84 30 17567 2009-01-15 18:05:59 2003-04-07 12:59:11 15 12 11908 3 1063 15216 2921 204.40 33 91.02 CHANGED hslshpslhhhlhshllhhhhhhh..p......hPsphpshhphlhchlpshlpsphstps...httahslhholFlalhhsNhhG.LlPasFs................sTuclslThuLAlslaluhhlhGlppcshsha..phhls.hs...hlPhhl.hlEllS.hs+PloLulRLhuNlhAGcllltLlushh...............hhullshhlh.........lhhhhh-lhluhIQAalFshLshlYls- .......................................................h........h...h.h.l.h.h.h.l..l...h.h........h....h....h....h...................................t......h..........lss...R....h.p.s..l.p.hhl.phh.s.p.p....l.h.t.s.l.s.tpu...........c.p.au.hh.......l..hoLhlFll...hhNhLG...L.l..P.Y....s...F.T................................................................................PT.opLu.hshuL.Als.l.W.lus.ll.hG.h+.p...p.s.....tts.h.......u+h..l....P.....p.......G......o........P..............h....................L....l.....P.......h....L....l...l......IE.sl.S.h.h.h.RPluLulRLhANlhAGHL.L.lp.Llushshs........................................h..h.h.u.l.l..s.h.hlh...............hhl.shLEl.hVuhIQAY.V.FslLhslYlp................................... 0 343 689 893 +1280 PF00006 ATP-synt_ab ATP synthase alpha/beta family, nucleotide-binding domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family This family includes the ATP synthase alpha and beta subunits, the ATP synthase associated with flagella and the termination factor Rho. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null --hand HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.34 0.70 -4.84 157 33595 2012-10-05 12:31:08 2003-04-07 12:59:11 20 61 14564 479 5203 24829 10876 206.20 38 47.96 CHANGED TGl+sIDshhPlu+GQRhsIhussGsGKosLhtplspss....................p...ssss....lhsh..........................................................................................................................................................................................................................................................................................................................................................................................................................lGERscElp-hhcph..............hsp........tsh.......................p+.olllsususcss...ht+htushsu.hslAEaapp.pGp.cVLl.....lhDslTRaApAhREluhthuc..sPupcGYPssla...shlsclhERAGph...................................ttGSlTul.s...lhstusDh.s-PlsssshulhDGplhL..s+cLsppuha.PAIDl....htShS .....................................................................................TGlKslD.h.l.s..P..h.u+G.t.+.hs.lh.G..ss.GsGKTs.lhh.-.lIpNh.................................s...psts..........sVas.u..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lGE.Rsc..Es.s....-lh.pch..................................ccs.......ss.h............................pcsslVhu....p..hs-P.s.......stRh.psu.hou............loh.A..EYFR............D......p............u..........p............D............V.Ll.....h.....hDsl....hRaspAhpElS....hl...Ls.....R...Pu.thGY.sslh.....tp.h.upL.ERhsps..........................................................ptGSlTul.s...lh.s.....us.Dh..ocsh.P..ssshuhh......D..u..phhL..s+pLt..tpGlaPAl.sshtSsS.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 1807 3311 4366 +1281 PF00430 ATP-synt_B ATP synthase B/B' CF(0) Finn RD anon Pfam-B_137 (release 1.0) Family Part of the CF(0) (base unit) of the ATP synthase. The base unit is thought to translocate protons through membrane (inner membrane in mitochondria, thylakoid membrane in plants, cytoplasmic membrane in bacteria). The B subunits are thought to interact with the stalk of the CF(1) subunits. This domain should not be confused with the ab CF(1) proteins (in the head of the ATP synthase) which are found in Pfam:PF00006 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.33 0.71 -4.35 27 5963 2012-10-02 21:03:42 2003-04-07 12:59:11 13 9 4775 3 1260 3817 3365 130.10 24 75.06 CHANGED sl.htllshhlllhlLhhhhhpsLtplLcpRcptItsslppAEcphpputphhtctcppLppA+tcApcllppApppupphtpphlspuppchcplhpsAtsclpppcppuhppL+pplssLulphspphlp ................................hhhphlsFl...l.ll.h.l.l.h..+..a..s....a....s.....s.........l.h.sh...l-cRppcIt.sslssA-cs.+.pc.u.p...th.t.pctpppLppA+tE.Ap....pIl......pp....Appcup....ph....hcp...hh....s....p....A...pp....-.tpc....l....h....ppA....ptc....Ip.tp....+pcAhppl+pp....V.upLulthAp+ll............................... 0 418 853 1083 +1282 PF00137 ATP-synt_C ATP synthase subunit C Sonnhammer ELL anon Prosite Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.05 0.72 -4.07 167 10204 2009-09-13 08:00:21 2003-04-07 12:59:11 16 24 5907 292 3091 5928 2123 66.90 28 77.67 CHANGED hshl.GAGluhuluslGuulGhGhsuuushpuhu+pPc....lhsphllshuhsEshulauLllulll..lhs .................thluA.uluhu.luulG.uulGhGhsuu.s..hlpu.....sARpP-.........................lhsphllshuhsEulul.a......ulllulllh...................... 2 1106 1902 2580 +1283 PF01813 ATP-synt_D ATP synthase subunit D Bashton M, Bateman A anon Pfam-B_1304 (release 4.2) Family This is a family of subunit D form various ATP synthases including V-type H+ transporting and Na+ dependent. Subunit D is suggested to be an integral part of the catalytic sector of the V-ATPase [2]. 22.30 22.30 22.30 22.40 22.00 22.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.17 0.71 -4.75 114 1524 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 1253 6 536 1171 151 187.10 30 85.70 CHANGED hpLhphKp+LphAp+GapLLKcKc-sLhhchhpllcchpch+pplpcth.pcAhtslshAph....utthhptlt.sstp..phplchppcNl.hGVplPhhchtp....................................ht.tsauhhssuspl-cuhcpapcslchllclAplEpshttLsp-lcpTpRRVNALEpllIPchcssl+aIphpL-EpEREphhRLKh.lKpp ....................pLtthKp+LpsAp+G+pLLKcKp-tLh.....hc......Fhpll.cchpch+pphpphh.ppuhtsh.shAph.....hutthhpplht.spp...phplchppcNl..hGVplPhhchhp.............................................................tts.h.au.h.hpsspplcc.shpphpchlphllcLAplc.....pshhhLscpIcpTpRRVNAL..............Ep..lh.IPphcpTlpaIphcL-EtERpphhRLK+lKp........................ 0 199 334 452 +1284 PF00401 ATP-synt_DE ATP synthase, Delta/Epsilon chain, long alpha-helix domain Finn RD, Griffiths-Jones SR, Kerrison ND anon Pfam-B_114 (release 1.0) Domain Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. This subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (Pfam:PF00213). 24.60 24.60 24.70 24.70 24.50 24.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.35 0.72 -4.08 179 3444 2009-12-15 10:59:44 2003-04-07 12:59:11 15 6 3304 16 491 1953 627 48.40 37 35.06 CHANGED c-IDhpcAppAhccA-ppLtpt..ps.ct....-hhcAphsLp+AhARLcshpth ........pDID.p+ApcAKc+AEpplppu....cu.ch...........-htcAphA.LpRAhsRlcshph.t................ 0 154 303 405 +1285 PF02823 ATP-synt_DE_N ATP synthase, Delta/Epsilon chain, beta-sandwich domain Finn RD, Griffiths-Jones SR, Kerrison ND anon Pfam-B_114 (release 1.0) Domain Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. The subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (Pfam:PF00213). 20.10 20.10 20.60 20.30 19.50 18.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.29 0.72 -4.22 327 5637 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 5213 39 1228 3611 2337 79.90 32 59.02 CHANGED lplclVoP-chlassp.lc.lhssuspG-hGlLssHsPllosLc.s.Ghlclp..ts...u......pp........phh.hlsGGhh-.Vp..s.splolLu-sA ............................hplplVoP.-.p.h.lasuc..Vc.lhls.....s..s.p.G.-lGlLPsHsPlloslc.s....G.h.l+lp....ts.....s...................pp.......................chl.hls.G.Ghl-lp..s.sp.l.o.lLA-sA...................................................... 0 402 786 1040 +1286 PF04627 ATP-synt_Eps ATP-synt_E; Mitochondrial ATP synthase epsilon chain Kerrison ND anon DOMO:DM04624; Family This family constitutes the mitochondrial ATP synthase epsilon subunit. This is not to be confused with the bacterial epsilon subunit, which is homologous to the mitochondrial delta subunit (Pfam:PF00401 and Pfam:PF02823) The epsilon subunit is located in the extrinsic membrane section F1, which is the catalytic site of ATP synthesis. The epsilon subunit was not well ordered in the crystal structure of bovine F1 [1], but it is known to be located in the stalk region of F1 [2]. E subunit is thought to be involved in the regulation of ATP synthase, since a null mutation increased oligomycin sensitivity and decreased inhibition by inhibitor protein IF1 [2]. 20.50 20.50 21.80 20.90 18.40 18.40 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.35 0.72 -4.41 30 309 2009-09-10 16:25:20 2003-04-07 12:59:11 8 6 257 30 199 292 1 49.00 38 53.74 CHANGED huuWRtAGloY.pYssIAA+sVRcuLKp-h+ssAt+Rsp.scl...............KascW.psG ................shWR.tAGloYh+YsslsAcsVRpuLKpph+spAt+Rst.spl...............+hscWpsG.............................. 0 63 113 163 +1287 PF01990 ATP-synt_F ATP synthase (F/14-kDa) subunit Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes 14-kDa subunit from vATPases [1], which is in the peripheral catalytic part of the complex [2]. The family also includes archaebacterial ATP synthase subunit F [3]. 23.50 23.50 23.50 23.50 23.40 23.20 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.29 0.72 -3.60 100 1179 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 1012 17 454 861 109 94.90 28 86.32 CHANGED luVIGDc.Do.lsGFtLuGlsph.sh...........................ssc-lccslcchh.pcp-luIlllspclu......cc.lcctlcph..pp.....shPsll..pIPspptshshttss..lpt.lc+ ..................................................luVIGDc......Do.lhGFpLhGlsthsst...........................stp-hccslcchs..............c............p-h.....u.........lIhlopplA..............pt.lpcplcpa..p.p.....shPs.ll...IPo+pt..sh.shuhsp..l.p.lc.................................... 0 171 280 387 +1288 PF04911 ATP-synt_J ATP synthase j chain Wood V, Bateman A anon Wood V Family \N 20.70 20.70 23.30 22.70 20.20 19.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.74 0.72 -4.51 18 106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 102 0 80 91 0 52.30 45 78.02 CHANGED +FPTPVlKPhhPFFsuGsllhYGlsphAss.hsocEFhNDPRNPpht..s.pttH ...KaPsP...lhKPhhPFFsAGhllhYGlsphtsAhhsocEapNDPRNPpht.tt.t...t................ 0 23 47 70 +1289 PF02038 ATP1G1_PLM_MAT8 ATP1G1/PLM/MAT8 family Mian N, Bateman A anon IPR000272 Family \N 19.80 19.80 20.80 20.10 19.60 18.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.47 0.72 -4.95 23 266 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 47 12 108 287 0 49.20 42 44.01 CHANGED scsp..-PFaYDYcoLRlGGLlhAulLFllGIlIlLS..t+C+C.phsQcp+phs ..........-c..-PFhY....DYpoLRlGGLlhAulLFllGI.lllLS......t.+.C....+C.phspp.pt..s................. 1 9 11 27 +1290 PF03899 ATP_synt_I ATP synthase I chain Finn RD anon DOMO:DM04622; Family \N 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.35 0.72 -3.84 154 2035 2012-10-01 19:47:47 2003-04-07 12:59:11 10 1 1796 0 377 1164 184 94.90 20 74.16 CHANGED +lhhhphhlhh.lhsls...hhlh...........shp...huhu.....hlhGuhlullshhhhshps...httthst...pphht..........hhhG.thh+hhlshhhhhlshth...ttlph.lslhlGhh ..........................hhhphhlhh..lhslh...hhlh..............tht...hslu.....hllGshsuhlsshlhthhs....hhtt...thst....tpshh..........hhhG.phh+..h.h.hsllhhl.lshth...hphsh.lslhlGh.................... 0 107 223 309 +1291 PF03154 Atrophin-1 Atrophin-1 family Mifsud W anon Pfam-B_3427 (release 6.5) Family Atrophin-1 is the protein product of the dentatorubral-pallidoluysian atrophy (DRPLA) gene. DRPLA OMIM:125370 is a progressive neurodegenerative disorder. It is caused by the expansion of a CAG repeat in the DRPLA gene on chromosome 12p. This results in an extended polyglutamine region in atrophin-1, that is thought to confer toxicity to the protein, possibly through altering its interactions with other proteins [1,2]. The expansion of a CAG repeat is also the underlying defect in six other neurodegenerative disorders, including Huntington's disease. One interaction of expanded polyglutamine repeats that is thought to be pathogenic is that with the short glutamine repeat in the transcriptional coactivator CREB binding protein, CBP. This interaction draws CBP away from its usual nuclear location to the expanded polyglutamine repeat protein aggregates that are characteristic of the polyglutamine neurodegenerative disorders. This interferes with CBP-mediated transcription and causes cytotoxicity [2]. 26.00 26.00 26.20 26.60 25.30 25.80 hmmbuild -o /dev/null HMM SEED 982 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.13 0.70 -13.94 0.70 -6.41 4 230 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 80 0 118 188 2 495.60 34 60.53 CHANGED GKHSMRTRRsRGSMSTLRSGRKKQTsSPDGRsSPsNEDlRSSGRsSPSAAST....SSsDSKsEShKKssK...KIKEEAsSPhKosKR.REKsASDsEEs-RsouKKoKTQ....ElS+PsSPS..EGEuEGEGE.SSDSRSlN-EGSSDPKDIDQDNRSSSPSIPSPQD.NESDSDSSA.......QQQh.QsQtsPul.sPPusususus..........Ps.sPSusslPPQsSPsuupPss.s....tssshoLl.pAPoLHPpRLPSPHPPlpP.su..utssspPssPs..tPpusHHGPhPPhPHsLQs....uPl.L.aP..sPPQP.uL....upt.sP...sPupA..+o..t..suQssht.PpQPP...REQPLPPAP.uMPHIKPPPTTPIPQ.ss.QSHKHPsHlpuPoPF.pMsSNLPPPPALKPLSSLPTHHPPSAHPPPLQLMPQuQ.LpossAQPPVLTQSQSLPspuSspPpo............uh.phPsQssFspHPFhsushPuIsPPsssssShsussPsuSu..pPssS...shsuSsss......usssssLPsIQIKEEsLDEsEEPESPPPPPRSPSPEPTVVssPSHASQSARFYKHLDRGYNSCARTDhYFhPLuuSKLAKKREEAhEKAKREAEQKsREEREREKE+EK..EREREREREAERAA.............................KASSSuHEuRhu-sQLuussHhRsSFEsPPTTIAAVPPYIGPDTPALRTLSEYARPHVMSPTNRNHPFaVsLNPsDPLLAYHMPGLYNsDPolRERELREREhREREIRERELRERMKPGFEVKPPELDsLHPSsNPMEHFARHGAlTlPshAG.PHPFAuFHPGLNPLERERLALAGPQLRPEMSYPERLAAERlHAERMASlusDPlARLQMFNVTPHHHQHSHIHSHLHLHQQDPLHQ........................GSuuP.HPL.VDPLsAGPHLARFPYPsGsIPNPLLGQ.PHEHEMLRHPVFGssYPR-L.suI.P.MSAAHQLQAMHAQSsELQRhAMEQQWLHGH.HhHGG.LPSQEDYYphhhppus+pL ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hS....P..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 12 22 62 +1292 PF03769 Attacin_C Attacin, C-terminal region Finn RD anon Pfam-B_2791 (release 7.0) Family This family includes attacin, sarcotoxin and diptericin. All members of this family are insect antibacterial proteins which are induced by the fat body and subsequently released into secreted into the hemolymph where they act synergistically to kill the invading microorganism [1]. 28.80 28.80 29.70 28.90 26.90 28.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.28 0.71 -3.94 24 176 2009-09-10 15:59:06 2003-04-07 12:59:11 10 3 48 0 58 196 0 106.40 31 59.05 CHANGED hssa..GsplsssupsNlFpsssHcLsAsAFto+shss...ps.pFsphGGGlsY.atsthGAohusu+pshhs.sshulsG+hNlapSss..oSLDhsuGas+ahussacsSphp.shGhsho+pF ......................................thts.hptthpsslhps.t.cslssps.a..topsh.t.......app.GuGlsh.hh.sthuAuhusuphsshs.pplslsG+sNLWpSssttopLDhsGuhu+ahuGPassptssausGhshoapF.......... 0 12 17 40 +1293 PF03472 Autoind_bind Autoinducer binding domain Bateman A, Holden M anon Bateman A Domain This domain is found a a large family of transcriptional regulators. This domain specifically binds to autoinducer molecules. 21.00 21.00 21.00 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.69 0.71 -4.91 188 2219 2012-10-02 14:34:25 2003-04-07 12:59:11 10 9 1181 48 438 1549 74 144.40 20 58.87 CHANGED shpp.lhphlpphhpthGacp.hsas.......s.ttstsshhlhs.saPs.sWhp...pYhppsahthDPllptsh...psht.P.hhWsch.ht...........stttp..phhppAppaGl.psGholPl+sssuph.uh..l.ol..stsppshs.hp.....pttplphluhhhapph .................................................t..pplhp.lpthspphuachhshs...............hhhshshstschh..hhs..sa.P.t..sWhp.......tYppp.sahtlDPllphst....psps..s..hhWscthhptt......................tlhp.t.A.p.p.a.....G..l..ppGl...ohsl...h...s...ss..ss....h..uh..L.Sh....upsstp.ssht......hphphphls..h....h................................................................. 0 65 185 288 +1294 PF00765 Autoind_synth Autoinducer synthetase Bateman A anon Pfam-B_881 (release 2.1) Family \N 20.10 20.10 20.20 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -5.04 13 807 2012-10-02 22:59:21 2003-04-07 12:59:11 12 2 478 5 184 757 67 174.20 26 85.33 CHANGED sashhscs+..-ElFsLRKcTFKDRLcWsVpCpsGMEFDpYDNpsTTYllGlp-.spVlCSlRFI-s+hPNMIT.GTFtsaFsclslPc.GsalESSRFFVD+u.Rs+sllG.sptPlothLFLuMINYA+spGYcGIhTIVS+sMhpILKRSG.WpIsllppGhupcpEplYLlaLPsDc-spctLh .......................................pphhph...Rpcl....F..h-....+Ls.W...c....l..s...s.....t.....s...u...h..E.hDpaD.s.s.s.sh..Yl.l...u.h.s..c.....up.lhGs.sRLL.......P..T.......s.....t......P.....h.....h.....L.......p....ss........F........s...p.......L...h.......t.......s........h........s.......h.....P....p.......s..s.s...lWEhSR.F.s...l..stt...h........s.......tt.............h..................s...s....t.........t...................................hs...h..h....lhhu.hlpauhppGhppllsVs.s.hs.h.t+lh+RhG.hthphlG..s...........t..t..p..hhhhhh.hs........h............................................................................................................................ 1 22 84 121 +1295 PF03987 Autophagy_act_C Autophagy_C; Autophagocytosis associated protein, active-site domain Finn RD, Wood V, Coggill PC anon Pfam-B_10019 (release 7.3) Domain Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the vacuole. The cysteine residue within the HPC motif is the putative active-site residue for recognition of the Apg5 subunit of the autophagosome complex [2]. 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.04 0.72 -3.79 57 558 2009-09-11 22:59:56 2003-04-07 12:59:11 10 8 307 1 391 549 6 65.50 35 23.68 CHANGED hIsYsphYpsPplaltGas.........p.st.psLs.ppha...-Dlsscatp+T....................lTh-paPh...............hsh...hhslHPC+ .............................aIsYsp..hYpsP+LalhGas........................p..st...pPLsscp...h...h......ED......l..s...t-atp+T............................................lT.h-pHPh.....................hshs....hholHPC+................................. 0 124 202 314 +1296 PF03986 Autophagy_N Autophagocytosis associated protein (Atg3), N-terminal domain Finn RD anon Pfam-B_10019 (release 7.3) Domain Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the lysosome/vacuole. Atg3 is a ubiquitin like modifier that is topologically similar to the canonical E2 enzyme [3]. It catalyses the conjugation of Atg8 and phosphatidylethanolamine [4]. 20.30 20.30 22.40 25.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.05 0.71 -4.43 29 337 2009-09-11 23:00:31 2003-04-07 12:59:11 8 5 293 3 233 333 4 156.20 35 49.50 CHANGED tl+ush.sltEaLTPVh+pSpF+pTG.ITPEEFVtAGDaLVa+hPTWpWu.susps+h+saLPtDKQFLlTRsVPCa+Rspphtts......ttt-cllc-p-t...........D..-GWVcTpthssttt.............psschcsls-hsstspp....................ptp-p----hs ...................h.l+uphhsht-hlTPlh.+pSpF+cTGhlTPEE.FVtAGDaLVa+hPTWpWs..s.....us..s......+t+sYLP.tsKQFLlTRs.......VPCh+Rspphths...........tpt-t.llc..-.s.ps...........................-.sG....WlpTtt.ss.ttt...................t.tt..c.sl....s-hs..spp.................................t.....t......ttt.............................................................................................. 0 78 126 188 +1297 PF02309 AUX_IAA AUX/IAA family Bateman A, Mian N anon Pfam-B_801 (release 5.2) Family Transcription of the AUX/IAA family of genes is rapidly induced by the plant hormone auxin. Some members of this family are longer and contain an N terminal DNA binding domain, such as Swiss:O64965. The function of this region is uncertain. 22.70 22.70 22.80 23.00 22.30 22.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.72 0.70 -4.59 56 1709 2009-01-15 18:05:59 2003-04-07 12:59:11 11 17 106 4 612 1704 1 179.60 27 48.17 CHANGED LsLctT.E.........LR..LGLPGstt.t................................ptsstsssKRu..Fu-..................................................sstttpssssspppp........psspsP.............................sK.......AQVVGWPPVRSaRK.Nsht....................................ptpst..............haVKVSMDGAPYLRKlDL+hYpuYpELssAL-cMF.usF.olG..............E.phh-hhsGS.EYVPTYEDKDGDWMLVGDVPW-MFlcSCKRLRIMKuSE....AhGLusps ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.h.s..sh.....ssl.p..s.hp..p..s.ht.........................................................t.....t...............thaV.K.VpM-GsshhRKlDLph...a......suYp-LtpsLp.cMF...........sh..t................................................t....hc.......hp.ts...cahlsYc.D.c.....-GDhMLVGD.sPWp...........F........hpss...++l.+Ihptp-.h.................................................... 0 67 360 485 +1298 PF02041 Auxin_BP Auxin binding protein Mian N, Bateman A anon IPR000526 Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.07 0.71 -4.81 4 80 2012-10-10 13:59:34 2003-04-07 12:59:11 11 1 41 8 18 138 46 144.30 65 86.11 CHANGED AupCslpGLPlVRNIS-LPQ-NYGRsGLSHhTVAGSlLHGMKEVEVWLQTFAPGScTPIHRHSCEEVFVVLKGpGTLYLu.SSHuKaPGpPQEFsIFsNSTFHIPVNDsHQVhNTsEHEDLQVLVlISRPPlKlFhY-DW.MPHTAA+LKFPYYWDE-Chpssp...DEL ........................................t.ss.ps.slVRsISch.QssYGt.GLSHhT..lAGu.lh..HGhKEV.EVWLQTFuPG.s.pTPIHRH.S.C.E..EVFVVLKG....sGTLYL....u....sSH.t....Ka.P.GcP.pE....a.sIFsNSTFaIPVNDsHQVWNTsE+EDLQVLVlISRPPlKlF.hY-DW.MPHTAA+LKFPaaWDEpCh.ts.....E................................................... 0 5 12 16 +1299 PF02519 Auxin_inducible Auxin responsive protein Bashton M, Bateman A anon Pfam-B_1263 (release 5.4) Family This family consists of the protein products of the ARG7 auxin responsive genes family none of which have any identified functional role. 20.90 20.90 21.10 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.50 0.72 -3.95 112 1334 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 37 \N 887 1284 1 94.40 31 75.65 CHANGED hstphpth.tttp.......thh.tts.tth...........................hP.+GahsVYV........Gc............tpp.RFlVPlsaLs+PhFppLLcpAcEE.F.Gasp...tGs..LsIPCcts.hFcp.lhshlp .........................................................................tt.......................................p..........................sslP...KGah.sVYV...........G-.............ptc..RFllPl.sYLspPhFppLLppA......EE..E...F.G.asp....tGs.....L.slP.C.cts.hFpplhphl.p...................... 0 98 600 754 +1300 PF03708 Avian_gp85 Avian retrovirus envelope protein, gp85 Finn RD anon Pfam-B_3651 (release 7.0) Family Family of a vain specific viral glycoproteins that forms a receptor-binding gp85 polypeptide that is linked through disulfide to a membrane-spanning gp37 spike. Gp85 confers a high degree of subgroup specificity for interaction with distinct cell receptors [1]. 21.10 21.10 24.50 23.70 18.30 17.60 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.85 0.70 -5.14 5 283 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 22 0 0 304 0 223.70 64 47.01 CHANGED LNVSLWDEPpELQLLGSQSLPNlTNITplou..V......sGGClGFsP..pusGlY.GWsRctlo+aLLccPh.pPaFspsSNSo-PFTVVTADRHNLFMGSEYCGAYGYRFWcMYNCSQoRps........Ys.C..Gcssu..sG.PEsWCsu+GGcWVNQSpEhNESEPFSFTlNCTGSsLGNsSGCCGcssTILP.GAW.lDSTQGSFTK..........PKALPPAIFLICGDRAWQGIPSRPVGGPCYLGKLTMLAPNHTDI...LKlLuNSSRTGIRR.KRS ....................LNTTLPWDPQELDILGSQMIKNGTsRTCVTFGSVCYp...tNtSpVCHsFDGNhNGTGGAEAELRDFIsKWKucD.LIRPYVNQSWTMVSPINs.....ESFSISSRYCG.........FTSNETR.Y.........Y+....GshSs........WCsSKGGcW.........SAGYSNGTpCSu....NToGCGGNCT.......uEWNYYAYGFTFGKpsElL......WNNGTAKALPPGIFLICGDRAWQGIPpNALGGPCYLGQLTMLSPNFTTW.....hTYGPNI..TGHRR.+R.... 0 0 0 0 +1301 PF01382 Avidin Avidin family Bateman A anon SCOP Domain \N 20.10 20.10 21.90 21.20 19.70 19.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.64 0.71 -3.80 15 142 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 81 454 71 247 4 107.70 29 48.71 CHANGED s..slpGsWhNchGSshtIss..susGtloGTYhossusssstspsp..slsGah.......s..t..ssoshuFoVsW....spssSsTsWoGQshls..supstlpThWhLsssssssp..WpuspsGtDsFTp ...............................shpGpWhNchGS..phpIp.....pssGtlsGsYhoslsps.t....s...ss.....lsGhh.................stssl..uFoVpW..........sphpShT..sWsGQshhs..supphlpThWhhspts.sstp...p..WtuhpsGtDhFs................................... 0 39 42 61 +1302 PF03377 Avirulence Xanthomonas avirulence protein, Avr/PthA Mifsud W anon Pfam-B_3936 (release 6.6) Family \N 23.10 23.10 23.10 23.20 22.30 23.00 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.43 0.70 -14.25 0.70 -5.74 2 801 2012-10-11 20:00:59 2003-04-07 12:59:11 8 12 45 7 116 807 9 173.70 53 73.42 CHANGED LsssQlltIAppGGhpAlEsVpthhssLsts..sLT.tQVVAIASNIGGKQALETVQtLLPVLCQsHGLTPDQVVAIASN.................................................................................................u.....GGKQALETVQRLLPVLCQsHGLT.-QVVAIASN.GuKQALETVQRLLPVLCQsHGLTP-QVVAIASp.GGKQALETVQRLLPVLCQsHGLTPpQVVAIASNhGGKQALETVQRLLPVLCQAHGLTP....................................................................pQVVAIASpsGGKQALETVQRLLPVLCQsHGLTP-QVVAIASNGGGKQALETVQRLLPVLCQsHGLTPpQVVAIASN .....................................................................................................................................................................................................................................................h.hlp.+.LhP..V...Lsps.HuLT.tQVVAIASp.GG...KQALETV.Q.R.LLP..VL.CQ.s.HGLTPcQVVAIASpsGGKQALETVQ.R.LLP..VL.CQ.s.HGLTPsQVVAIAS..N.GGKQALETV.Q.R.LL.P..V.L.CQ.s.HGLTP-QVVAIASpsGGKQALETV.Q.R.LL.PV.L.C...Q.s.HGLTPsQVVAIAs..psGGKQALEoV.p+hL.Ps.................................................................... 0 5 22 22 +1303 PF03591 AzlC AzlC protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.80 22.80 22.80 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.64 0.71 -4.25 162 3558 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 2883 0 690 2484 1687 141.30 28 60.12 CHANGED lPl.hluhhshGlsaGllu.spsGlohhpshhhShllaAGuuQFlslsllss....uu...s...hhslllsshllNhRHllauholsshhp..t..hshht+hhhuahLTDEsaAlshs.phtp.ttttt.............hahhGhs..lh.hahsWhluohlGslhGshls ................hPlslualslGluaGl..h.u.spt.Ghohhpsh.hhShllaAGuuQFlhlu.llsu....uu....s............lhsls.loshhlNhRahLhuhultshhp....t..........hshhpp.hhhuahlTDEsaulshs.ph.sp..ppttt.................hahhGlsl.ssahsWshuollGuhhGshl............................ 0 197 409 559 +1304 PF02216 B B domain Bateman A anon Pfam-B_1782 (release 5.2) Domain This family contains the B domain of Staphylococcal protein A, which specifically binds to the Fc portion of immunoglobulin G. 22.40 22.40 22.90 22.40 21.50 22.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.29 0.72 -4.20 6 1484 2009-01-15 18:05:59 2003-04-07 12:59:11 11 69 165 21 12 1155 0 53.40 62 43.70 CHANGED ppspasc-QQNAFYplL+hPNLsEEQRNGFIQSLKDDPSpSppVLuEAKKLNDu ....s.sNpasc-QQNAFYEILphPNLNEEQRNGFIQSLKDDPSpSsNl.LuEAKKLN-u.......... 0 8 8 12 +1305 PF04182 B-block_TFIIIC B-block binding subunit of TFIIIC Wood V, Finn RD anon Pfam-B_68239 (release 7.3); Family Yeast transcription factor IIIC (TFIIIC) is a multi-subunit protein complex that interacts with two control elements of class III promoters called the A and B blocks. This family represents the subunit within TFIIIC involved in B-block binding [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.10 0.72 -4.13 35 324 2012-10-04 14:01:11 2003-04-07 12:59:11 7 4 262 0 222 703 46 74.90 27 4.73 CHANGED sshpaplLptIApsRhpGlhph-Ls.phss..pDsRslhh+hcpLpctsLIs+psl...................................ptpstposllhLp+Fhpp .....................s..hpaplLptlu..p.u.Rh.pG..h..h.Qp...-Lt..phhp.....hDs+plhhph+tLpcpGLIs+psh....................................................................................................ptpt......tpo.t.l.lhLp+Fh..t........................................................................................... 1 65 124 181 +1307 PF02246 B1 Protein L b1 domain Bateman A, Mian N anon Pfam-B_3438 (release 5.2) Domain Protein L is a bacterial protein with immunoglobulin (Ig) light chain-binding properties. It contains a number of homologous b1 repeats towards the N-terminus. These repeats have been found to be responsible for the interaction of protein L with Ig light chains [1]. 25.00 25.00 26.40 43.00 24.60 23.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.31 0.72 -4.15 3 19 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 2 23 0 33 0 69.50 78 44.52 CHANGED TPE.PKEEVTIKsNLIFADGoTQTAEFKGTFAEATAEAYRYADLLuK-NGEYTADLEDGGYTINIKFAGK TPE.EPKEEVTIKANLIaADGKTQTAEFKGTFEEATAEAYRYADhLtK-NGcYTsDltDtGYTlNIKFAGK............................ 0 0 0 0 +1308 PF03483 B3_4 S3_4; B3/4 domain Bateman A anon Pfam-B_1005 (release 7.0) Domain This domain is found in tRNA synthetase beta subunits as well as in some non tRNA synthetase proteins. 23.30 23.30 23.40 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.76 0.71 -4.80 308 5864 2012-10-01 21:04:40 2003-04-07 12:59:11 12 45 4697 27 1517 4739 2917 167.50 30 24.80 CHANGED ssspahu+.llcslp.hssSP.WhppR..LhtsGlRs.INslVDlTNYlhl-hGp.....PlHAFDhc+l....p.........s.plh.VRh.Ap.su....E.plhsL..Dsp.chpL.ss...........................stlV.Is...D......s......p.p.sl.ulAGlMGGtpSpls.p.........sTpslhlEuAhFsshtlttsu+phs..lpo-uShRFERG.lDsphsthAhccAspLlh-ls.G ...............................................................................................................sstahs+.h.lc.slp.hts.o....P.W.hpp+....L.hps....G.h.....Rs...IsslVDloNYlhLchGp.....PhHuaDh-cl.p..........................................G.slh..VRh.Ac..pG............................E.phs.sL..D.sp...-tpL...ss.......................................splV.Is....D........s.......................pp..sl..uluGlhGG..p.po.t.ls.p........................pTps.lllEsAhFss..h.s.lttp..u+.pht....lpo-.uu...RaE+G.l-..t.h....uhphAstLl.phs............................................................. 0 509 955 1269 +1309 PF03484 B5 tRNA synthetase B5 domain Bateman A anon Pfam-B_1005 (release 7.0) Domain This domain is found in phenylalanine-tRNA synthetase beta subunits. 24.10 24.10 24.10 24.10 23.20 24.00 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.33 0.72 -4.15 85 4895 2009-01-15 18:05:59 2003-04-07 12:59:11 10 18 4795 27 1299 3944 2191 69.60 33 9.04 CHANGED tptlplshpplscllG...........................h..plstppltp...hLpplshpsph....................spsthpVpsPsaR..hDlppphDllEElsRhhGa ......................................t...lslphscls+ll.G.................................................h.....cl.s.tp....p.ltc...lL.p+LGhp.lpt.....................................................sssthp.V.ssPoaR..hDl....p.h.ct...DLlEEVARlYGY.............. 0 443 837 1100 +1310 PF01603 B56 Protein phosphatase 2A regulatory B subunit (B56 family) Bateman A anon Pfam-B_984 (release 4.1) Family Protein phosphatase 2A (PP2A) is a major intracellular protein phosphatase that regulates multiple aspects of cell growth and metabolism. The ability of this widely distributed heterotrimeric enzyme to act on a diverse array of substrates is largely controlled by the nature of its regulatory B subunit. There are multiple families of B subunits (See also Pfam:PF01240), this family is called the B56 family [1]. 25.00 25.00 25.40 25.10 23.40 24.40 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.30 0.70 -5.87 54 1081 2009-01-15 18:05:59 2003-04-07 12:59:11 15 15 292 10 639 990 15 337.90 49 73.42 CHANGED .L.P.hp.csssscppsLFlpKlp.CshlhDF.sD.....ssp-hppKch.KppsLp-ll-hlsss.p........stls-shhstlhpMlusNlFRsl.......Pshsp.t........hss---ps...hh-suWP...H...LplVY-lhl+alpssshssp.hptalspsFlhpLLsLFcSpDsRER-hLKshLH+lYuKFhshRshIRpsIsslhh.................calh.....csppasGluELLElhuSIIsGFulPLKcEHphFhh+sLlPLHpscslshYatpLshslspFlcKDssLsp.llctLL+aWPhosopKElhFLsElccll-hhpsspFpphhhsLFpplucC.lsSspFpVAE+ALhhWsN-tlhsLl.....p..pNsphlhPllhsuL.csucpHWNpsl+shshssh+hhh-hssplFpc.http......hpppppccpppp...pp+cppWpplt.chAt .......................................................................s.hc.-s.sst-p.pLFhpKlp.CshlFDF..D.........................shtc............h+.KEl.KRtsLtEhl-al.......sps..p............shlsEshhschl.pM..h..........u..hNlF.Rsl...............................PPssssp..........................aDsEEDEP............slEsuWP...H..................L.Q.....lVYEh.....FLRFlpS..-hpsp......lAK+YIDp.pFV..L.....p.......LL-L.FDS.EDPRERD...aLKThLHRIYGK.FhshRuaIR+pINNlFh.................c.FlY.....ETE+aNGlAELLEILG...........S...II..NG..............FALPLKpEHK.FLh+lLlPLH..........KsK........s..........Lu..hYHs....QLu.Y.CllQFl.EKDspLs-................Vl.h..GLL+aWP...t...TpS.K..EVM..FLs.ElEEI.L-...l.....hc.P.........s.p....F........Kl..PLF+QlA+C.lsSsH.F.........Q........V.AERALah.WNNEalh.s...Ll.......................p..cNt...th...IL..PI.hFsuLh.cso+p.HWN..p.s.l.....huLhhNshKhFhE.hs.pLF--.hstp......aptcctp........tp....p...........tpRpphWtplp.p.................................. 0 209 349 494 +1311 PF02043 Bac_chlorC Bacteriochlorophyll C binding protein Mian N, Bateman A anon IPR001470 Family \N 25.00 25.00 28.60 28.40 24.90 17.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.79 0.72 -4.19 12 34 2009-09-11 07:33:55 2003-04-07 12:59:11 12 1 21 1 24 32 0 77.10 52 97.47 CHANGED shuGAFopuAtAYGRhlEVFlDGHWWVVGDhLENlGKoTKRLssNAYPalYGG..uuuuul+GSSPphuGYApPoKclcpRFpc .s..uGsFTchhuAhGRIhEVhl-GHW.sVG.hh-ulGKuThRlspNAYsphhGG....usuLRGSSPpsSGYAhPoKtlcS+Fs........... 0 4 5 18 +1312 PF00216 Bac_DNA_binding Bacterial DNA-binding protein Finn RD anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.70 0.72 -3.96 158 11816 2012-10-02 15:10:05 2003-04-07 12:59:11 16 10 4455 45 2522 6533 4975 89.30 37 83.62 CHANGED hsKs-Llppl.........u.....pcs......s.l..o+ppspphlsshhctlppsL..ppucc...l....plsG.FGsFplpp...+tuR.pGRNPpT.G-.tlpl...ss+pslpF+suKpL+ctls ....................................................................MsKs-Llc..t.l.........A.........pps.......s..l.....oKp......cup....p.s.l-shhcs......lppuL....ppG-p........V................pL.h...G.FGoFpl..+c..............RssR...p....G..R......N...P....p...T.Gc..plpI....sAp..pVs..tF+sGKtLK-tV............................................... 1 797 1619 2112 +1313 PF00308 Bac_DnaA bac_dnaA; Bacterial dnaA protein Finn RD anon Prosite Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.18 0.70 -4.70 14 6279 2012-10-05 12:31:08 2003-04-07 12:59:11 13 14 4628 18 1289 7863 4434 203.50 39 51.24 CHANGED Lsp+YoF-sFVhGsSNchAaAAAhplAcs.Puc.tYNPLFIYGssGLGKTHLL+AIGNhshp.hP.shRVhYlsuEcFsp-hlculpc..schppFKcpYR.slDlLLIDDIQFlutKEpoQEEFFHTFNsLh-ssKQlVloSDRsPccLssh--RL+SRFphGLssslpPP-hETRlAILcKKhc....tcshs.....lPpElhpaIApplssNlRELEGAlpRlhsaushs ................................................................................p.paTF-.s.F..V.........G..p.......u.N.......p...h......A.....h..A..........A..u....h....t.....V....u................c............s......s.......u..t.....s........Y.......N...P......L......F..l.Y.G.u.s.GLGKTH.L...........hp....Al.G.......st.l........h..................p....t.t...........s.....s......s......+.V....h......Y......l....s..u....E.....p...F...s...p.......-...h.....l.p.....u...l....pp...........pt...h...p...p.....F.....+.......c....t.....Y...R.....s.......l...D....lL......L.I...D........D.........I........Q....F........l..........u.....s.......K.........-..........p..........o........Q.........E........E........F.F....a......T..F.N.....s......L......h.........c.....s......s........+.....Q......I..........l......l........o....S.......D......R..s.........P........+.................c...l.........s....s......l....-.....-.....R.......L....p..S.....R.F...s...W..G.L...s..s.sl...pP.P...-.h.Eo.....Rl...A....I.L.p...pKAc.......................t.p.slp...............lP.s.-V...h.pa.l...Ap.p..l....c...s...N.l...R-LEGALs+lhA.up..s................................................................................................ 0 415 826 1072 +1314 PF01311 Bac_export_1 Bacterial export proteins, family 1 Finn RD, Bateman A anon Pfam-B_1442 (release 3.0) Family This family includes the following members; FliR, MopE, SsaT, YopT, Hrp, HrcT and SpaR All of these members export proteins, that do not possess signal peptides, through the membrane. Although the proteins that these exporters move may be different, the exporters are thought to function in similar ways [1]. 27.80 27.80 27.90 28.60 27.70 27.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.37 0.70 -5.10 16 3358 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 2177 0 654 2099 515 239.20 26 92.63 CHANGED pshhshlsthhLs.hhRlhshhhhlPhhspphlsuhlR..hulshhluhhlhssl.hsssshhsst...hhhLllpElllGlhlGhhhshsFhshpsAGplIDsQtGhsh.ushhsPssuspso.luphLshhulllalsssGhhhlls.slhcSaphhPlsphhs..t..thhthlhpthsplatpulhlAuPllhhLLLh-lulGlluRhAPQlslhsluhPlKshlullhlhlhhsslhshhppthphhhshhs ..........................................th.lhshhhs.hhRl......huhhhhhPhh.u.pps.l.P.s.t.l+....hsluhh.lsh..h.l.h.P....t.l........s..s..t....s..h....s..h...h.........s..........hh........hhhlshpElllGlhlGhhhphsFhsh.phAGplIshQhGhuh..uohh.DPs..s.u.s..s..s.s..lluplh...shhu.hllFl.s...hsG.........Hh.hllp.hLhcSaphlPlu.s.....h....h..h....s..s.............h........h.....hlhphh..shhahhulhlAhPllshhLlsslulGllsRhAPQLNlFsluhPlphhlGl..h.l...l...hh...h.h..shlhshhtphht.......t.................... 0 202 393 515 +1315 PF01312 Bac_export_2 FlhB HrpN YscU SpaS Family Finn RD, Bateman A anon Pfam-B_1200 (release 3.0) Family This family includes the following members: FlhB, HrpN, YscU, SpaS, HrcU SsaU and YopU. All of these proteins export peptides using the type III secretion system. The peptides exported are quite diverse. 22.10 22.10 22.20 22.80 21.30 22.00 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.85 0.70 -5.55 16 4381 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 2177 42 948 3009 756 267.10 32 91.14 CHANGED .u.-KTEcPTsKKLcDARcKGQlsKSpDlsshshllushthlhhhushhhpcLtuhlth.hhhtspsashuhp.thhthhhhlhhshshhl...hhsshlsulhushlQsG.hlhoscul+Pchp+lNP..lpu...hK+.hFSsculhEllKSlLKllhluhlhahhlhsphsplhpLshss.tshhshshpLhhplhlhshhhhllluhhDahaQ+hpahKcl+MoKpElKcEaKcpEGDPclKu+RRphtpElt.ppphtssl.....cAsllVsNPTHaAVAltYc.pchssPhllsKGsDttALpl+phAcctslPllcshsLARuLYppschsphIPtphacsVAclLthlh ...........................................t...pKTEtsT.++hpcu+c.cGpls+Sp-...ls.hh.hhs.hhhh.h.h....h.h...h.............h....h.............t....h.....t...h....h.t...............h.........................................h......................................h..h.................h...h.............h...........h...h.h.h....h.h.h..h.h.hhs.hh.s.shh...s.G..hhh.sscs.l..php+l.sP..hts.........hKp..hF.uh.pshh-hhKslh+hhhlhhhhhhhh...h..thht.h..........s............t....h..h.hth.hh.hhhhhhhhhhhhshhDh.hpha.ahcph+Mo+p-l+cEhKptEGsPplKu+hRphtpth...........t...t....p...h.h.ttl.......pushllsNPT.Hh..AV.ALpYc....p....p.....s...A...PhVlAKGts.lAhpI+plAcc..pslPllcs.sLARu.Lapps.c.ls.ptIP..t..p..L..a..pAVAclLsal........................................... 0 309 593 767 +1316 PF05088 Bac_GDH Bacterial NAD-glutamate dehydrogenase Moxon SJ anon Pfam-B_6291 (release 7.7) Family This family consists of several bacterial proteins which are closely related to NAD-glutamate dehydrogenase found in Streptomyces clavuligerus. Glutamate dehydrogenases (GDHs) are a broadly distributed group of enzymes that catalyse the reversible oxidative deamination of glutamate to ketoglutarate and ammonia [1]. 23.00 23.00 23.40 23.00 22.60 22.80 hmmbuild -o /dev/null HMM SEED 1528 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.37 0.69 -14.31 0.69 -7.18 107 1221 2012-10-10 17:06:42 2003-04-07 12:59:11 7 6 1073 0 449 1248 467 1239.90 36 88.15 CHANGED l+lhs....htp.tshpus.pollplls-DMPFLVDSlphtlscpulsl+hlhH.PllplcRs.tsGplhpltstst........................ssttEShlhlEl-.+hs...sspphpt..LpppLppVLsDV+tsVpDWpsMpschpph.hpplp...pp...................chpEstsFLcWLtscpFoFLGa+cYpl....................tss.thtlt......ssouLGlL+..........t.httspphsshssttttthhp.sph.LlloK.ustpSpVHRsuYhDYlGlKc.a...cppG...pVlGEpRFlGLaTSsAYspssppIPllRcKlppllppuGasssoHsuKsLhplL-saPRDELFQhst-pLhchuhullp.Lp-Rt+lRLFlRpD.asRFlSsLValPR-+YsTplRh+lpplLtcshsu.pts-asshh.sES.sLARl+all+h........sssph.......h-hpclEppltchsRsWpDclpssLhpp...........hup..............................tputtlhp+YssuFPtuY+-passppAltDlpplcpL.........ss..ssslshplY......+s..pss....sp..l+LKlap.tspslsLScllPlLENhGl+VlsEp..Papl....ph..t.....st.....pshWlaDFsLp..hssst.......hclsp.............hpptF...p-AastlWpGcsEsDuFN+LVLtAuLsWRplslLRAYu+YL+QsGhsaSQsYltpsLsppPsluptLlpLFcsRF..-P.........................................ttppcspptptlpsplppsL-pVssLD-DRILRpahslIpATLRTNaaQt.......st........pGps...+salSFKlcsptlsslPtP+PhhEIFVYSPRVEGVHLRsGpVARGGLRWSDRpEDFRTEVLGLVKAQtVKNAVIVPVGAKGGFlsKphPss.s.....sR-uhhsEGhtCY+hFIpuLLDlTDN..l.lsGc.........lVPPs..sVVR+DsDDPYLVVAADKGTATFSDlANulut-.YsFWLGDAFASGGSsGYDHKtMGITARGAWESVKRHFRE.hGlDsQo.psFTVVGlGDMSGDVFGNGMLLSccI+LlAAFsHpHIFlDPsP.DsApSasERcRLFsLPRSSWsDYDpsLISpGGGVFsRouKoIsLSsch+thLGl..cps......phoPsELIpAIL+APVDLLWNGGIGTYVKAosEocu-VGD+ANDulRVsGp-LRsKVlGEGGNLGhTQhGRIEaAhpG.....G...........RlNTDhIDNSAGVDCSD+EVNIKILLsplVpsGcL.ThcpRNpLLtpMTD-VupLVLcs.NYtQopAl..SlsptpuspplstptRhhptLEppGpLcRtlEhLPs-cplscRtstGp.GLTRPELAVLlAYuKhsLp-pLlsS.-ls--s......ahsphLhsYFPp.Lpc+assp.ltpH.LRREIIATtlsNplVNchGh.....sFlhRlt-pTGsss.sclscAashucclFslsslWppI-uLD.splsuslQhphhhplp+hlc+sspWhLRs...ppt....h.s.lsstlspapsslppLp....splsshLsspp..tpthppptpphsp.tGlPpsLApplAslphhhssl......DllclAppssts........l..tplApsYFtluppLslshlhptlsp.lsssspWpsLARt.....uhp--LtttpRpLstslLpt............t......spttlstW..hppppstlpRhpphls-lps...sssh-hAhloVAhRpLpsL ....................................................................................................................................................................................................................................................t..hshlpll..shPaLlDSlhh.ltp..shthph.hhp.s..htltRt...pu....tl.tl...................................t...pohhhlpl.........s...t...........sttthtt...lpptl.phLtplt.sspDh..hhtth.th...httlt....tt................t.....................pht-shthLpWlt.sppFhhhGhpchth.......................t....t..h...........tstLGlhp.................................................th.......h..p.......l.hlsK.ss..Shl+R..ssY.Dalul+.....hstpG............t.hhuEpRhhGLasussattss.pIPhl+p+httlhthu...uht...tuash+tLhpllpphPR--Lhphs.ppLhthshsllt.htp.+.ph+lhlRh......D.hs.+FhssllalP+-casoth+.thtthlhp.htu.t.h.-..hshh..s-.uslsphphhlch......t.tt...............t..hs.....ttlptplhphspsWpDph.thh.tt...................................................s......tphttsFs.sY+pthsstpAlhDlthlttL................................st.....p...lth.hh.p....ts..t..............tt.........hphKl....ap.tttsh.LSplhPhLpshGlcVl.sEp..sapl.h.................ps.........t.h.alh-Ftht....t...............hp.h..t.p......................ht.tthppAhtth.atG.p.h.EsDsFN.pL..lhtAuLsh+pl.slLRuhu+Y....l.....+Q.h.....u.h.....s.....aSpsYl..tp.sLp..p..aPp..l..sctL.lpLFpt+F.....sP................................................ttptpppptp..t.l....t..p.p...ltpt.L..-...p.V......s......s......L.D....-.....D.R....I..L.Rpah...sll.pA.oLRTNaaps....................t.................t....s....+shluFKhcP........ph.....l........s........p.l.....P......t.......P....p.......P.haE.....I.F.V.Y.u.PclEGVHLRhGtVARGGLR..........W.......S......D......R.p...ED.........F.......R........T..........El.......L.GL.......V....KAQtVKNsVIVPlGAKGGFls+p..P.s..s...........................sR-thht.cGh.tsY+h...F..I.puLLDl..TDN......l..h....p....up..............l..VsP.t..p..V.VRa..D.tDDsYLVVAAD.KG.T..A..T.FSDhA..Nslut.p..as..F.WLGDAFASG...G.S...s...GY....DHKtMGITA+GAWEuVpRHF+E.hG.....h......DhQ.s.psFTslGlGDMuGDVFGNGMLLScpI+LlAAFsHhHIFlDPsP.DsttSatERpRLFpL.PR.SSWtDYstpLIStGGGlasRstKsIslosphpthLslpts..........................phsPs-LlpAlLpu.VDLLWNGGIGTYlKuspEocspVGD+ANDslRl.sup-lRs+l..lGEGGNLGhTQhGRlEauhpG.....G...........+lNTDhlDNuuGVDsSDpEVNIKIhLs..th..l.t...........sGcl...s.p....pR...spLL.pMoD-VuplVLts.NhtQs.s..l..uhstt..puhthht..tchhp.LEpp.s.h.L.sRtlEhLPssppltcR....t......t..G..sLTpPELulLhuasKhsLtctllt.o.pls-...-s......hh.sp.L.tYFPt.....lp.pp...aspt.h.pH.L++EIlsT.lsNphlsphGh.....sah.Rlt-psGts..splhpsahhscplatlstlhpplp.t..L..s......lss.................thp.phhhthtchhpcsspWhLpp.....t.t.........s.ltt.ltpat.thttlt.....phsthl.....t.tp..htthptthtthhp..tGl.sttlAthluth..hhs.hh....sI.hpl......u.p.......ts..hs.........................h.ttsAchYatlupthththh.....hpthpt...s.hsshWpshAttshp--l....ptlshtsLt....................p.............tt.ht.W....t.t.pt..ltph.thltphtt.....tt.......h...phshhsls.t.h.......................................................................................................................................................................................... 0 153 284 373 +1317 PF01152 Bac_globin Globin; Bacterial-like globin Finn RD, Bateman A anon Prosite Family This family of heme binding proteins are found mainly in bacteria. However they can also be found in some protozoa and plants as well. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.47 0.71 -4.10 12 2140 2012-10-01 21:46:00 2003-04-07 12:59:11 16 28 1468 69 809 1739 270 116.80 24 78.59 CHANGED ola-tlGGpss.lcslsscFYspl.sDsp..htphhp.sschsspcp+.htFLsthLGGPshYht+pG+P.L+tpHtsh..slsstch-thlcphtpAlpshs.tshs.-phhchhphhttshhsh ...................a-hlGG....pps....lpplV-pFYs.t.l.tpDsp.................lt.h...F..t......s......s......-...h...s..........p....t...pp+...h...t...p...........Fl...st...h...h.....G..G..P.......s..h.Y....s.t..p..p...........G...p........P...........l....+....t....p.H..h...sh........sIst.tc.h-tWLpp...hpp.A.l.s.c.h...s.h..s..t.t...h...tp.....l...h.thht.htt.h...t........................................................ 0 348 595 732 +1318 PF00296 Bac_luciferase bac_luciferase; Luciferase-like monooxygenase Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.70 0.70 -5.30 155 15488 2009-09-11 11:56:34 2003-04-07 12:59:11 15 68 3031 48 4888 13559 5534 299.40 18 82.83 CHANGED h..........hhh...phth.h.st.................shpphhchAptA..Eph.GF-shhhs-phts........................h-shshhuslA.stTp+lclssultssh..p....pPshlAppsATl-.plSs......GRh.tLGlGs.Gttthp...hthh.....Gh.........shscphsthcEhlcll+tlh....tt......................pps.sacGca..hph...........................tpshhhPps......................P..lhluutu.spshchAuchuDshhshs..............ts.sthtphhsplcp.tstp...........tGRs.....shphsh.thhlhhs....cspppAtpth......hthh.........................................................t..thtthhthh..tthhtshsh...lG.osp.pl...tcpltp..htpsGssp ...................................................................................................................................................shpthhplApt..A..E.......c.........h..G.a.cthal.s..-pht.h.........................................ss...hsh..lut.lA.s..t..T..p.......+..l....c..l......u.s..ush..hhs...p........sP.h.hlAcp.hu.oL.D.plos...........G......R.h..tl.s..l.....u....s....G.........t..t..t....p.........hpt.h........Gh.........................sh.s.p.p.h.p...t.h...p..E...h....l..p...llp.p..lh.....ps............................................t.h...sa....p...Gca....hph.......................................tssth.h.P.t.shpt...............................hP...lah.u.u........s..u..s.............t..u....h..p.h..AA.c..h....u.p.shhhss....................s.p.t..h.t.p.h...h.p.t......h.+p....th..t.t...............................tG+..t.........phph...h.....h...th....h......s...hs.s...............c..s...p..p..p..A.hthh.............tthh............................................................................................................................................h.......t.....h.........................................h..t.t.hhh..................lG..ssp..pl...tptltt....h.t.....t................................................................................................................................................................................................... 0 1231 3130 4170 +1319 PF01036 Bac_rhodopsin Bacteriorhodopsin-like protein Finn RD, Bateman A anon Pfam-B_1412 (release 3.0) Domain The bacterial opsins are retinal-binding proteins that provide light- dependent ion transport and sensory functions to a family of halophilic bacteria [2,3]. They are integral membrane proteins believed to contain seven transmembrane (TM) domains, the last of which contains the attachment point for retinal (a conserved lysine). This family also includes distantly related proteins that do not contain the retinal binding lysine and so cannot function as opsins. Some fungal examples are: Swiss:O74870, Swiss:P25619, Swiss:P38079, Swiss:Q12117. 21.90 21.90 21.90 21.90 21.30 21.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.64 0.70 -5.04 17 3174 2012-10-03 04:04:29 2003-04-07 12:59:11 13 10 618 161 242 3093 2566 141.90 35 89.93 CHANGED hshaslhulhhllushlFhhhshp.sss.p+phhssslhlsultuhuYhshA.slGhohsts..................cpla..hs+YlsWhlohPl.....lLhLs.lAsss...................tpslhshlhsshlhllshLlGulh..sush.....+asaashussshLhlhYhlh.sshtpsstshspp...shahhLp.hhlll...WhhYPlsWhlustGssl.sssspslhashLDllshlsashlLlhttsshsstt .......................................................................................................................................................................................................................................................................RYlDW....LlTV.PL.........h.hh....l....l...Ls...t...l....s...s.ss.................................................sul...h.a.+....L..l.h.uS.ll.MLls.G.Yh.G...E.......s........shh.s..............sh.h.h.a..l.l.u.h.h.u...alYI..l.Y.p...l...........a..G.....Es.u....c.s..u.s...p..u..u.ss.s....tsAasshph...lVhlG......W.u.lYP................................................................................................................................................................. 0 69 151 216 +1320 PF01103 Bac_surface_Ag Bac_suface_Ag; Surface antigen Finn RD, Bateman A anon Pfam-B_1201 (release 3.0) Family This entry includes the following surface antigens; D15 antigen from H.influenzae, OMA87 from P.multocida, OMP85 from N.meningitidis and N.gonorrhoeae. The family also includes a number of eukaryotic proteins that are members of the UPF0140 family. There also appears to be a relationship to Pfam:PF03865 (personal obs: C Yeats). In eukaryotes, it appears that these proteins are not surface antigens; S. cerevisiae YNL026W (SAM50, Swiss:P53969) is an essential component of the Sorting and Assembly Machinery (SAM) of the mitochondrial outer membrane. The protein was localised to the mitochondria [3]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.90 0.70 -4.97 70 6186 2012-10-03 17:14:36 2003-04-07 12:59:11 18 36 2849 0 1843 5261 3202 320.30 19 47.88 CHANGED NhhGpGpplshshs..............hu.............p.hppshshsaspP...ahhsp...............thshuhslhhp......................................t.st.pshshpshuhslshuh.....ls.......pphphs.....huhshppsphpstssps................................................................tttpstththshuhshsshssth.hPopGhh............hphshchs.....................h..huussp..ah+hpsps.phahsltp..pt.hhh.......hs+hphGhht.ths..................stc.lPhhcpFasGG..........ssSlRGaphsslGPps...............................ptt.lGGpthhsuShEhphPl.h.....t..t.hpsuhFhDsGs..............sassptpss.......................................psusGlGlpWtoP.hGPl+hDaAh.Pl....pc.ss.sp.................pp.papFulG.ppF ...........................................................................................................................................................................................................................h.utGp.phshs.hp.........................hs.................t...ppth.phsh.p.P............hh.h.s.p......................................t.h..ph.s...h.shhhp..................................................................................................p.....sh..s...t..p......s...h.u..h..s..h.shsh....................hs.................................p..t..h.p...hp.......................................hs.h..s..h...p..p....s..p..h.p..p.h..p.ps..............................................................................................................ps.h.h..h..s..h..s..h..s..hs..h......s....p...t...c......s........s....h...h.....P....s...p...G.h.............................................................hph.s.hchs........................................................................h....h.u...u-.sp.......ah+.h..ph..ps...p.h.ah.slsp...........tpthsh.............................................hh+s..phG..h...ht..shs................................................st.p....l.P....h....p.p.....F.a.s..........GG...................s.p..o....l..R.G..........a.p...p.s.l.uPpt..............................................................................ttt..h...G.G.s..t..hh.su..ohE.....hp..h.....s..l...................tt..p....hp.....s.uh.F...h....D......sGs.....................................l.h.s.....s..t.t..p......s........................................................................................................th+h.usGl.G...l..pa.......h.......o......P....h....G......P.....l.p.....hs..hAh..sl......pp...pt..t..............................................tt..th.hslG.t............................................................................................................... 1 578 1140 1548 +1321 PF01338 Bac_thur_toxin Bacillus thuringiensis toxin Finn RD, Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 26.00 26.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.51 0.70 -5.12 7 74 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 26 16 17 68 0 187.60 43 89.80 CHANGED pps..ppshRhIhLpV.ss.-.ssh.pIh.lp.Ppal.QAltLAsuFQsALsP........sLpFshpKuLplAssl.Ppuul.VshlsQolpQosspVSVMls+VlElLpsVLGlsLsuosh.pplpsuITsTFTNLssQps-AWIFWt+poAppTsYpYNIlFuIQNtpTGuhMhslPluFEIsVslpKcpVLFhTI+DpAsYpVplpuLplsQsL.pppthslhsl.pshs.sh ...............................................................h....................ph.pl..lp.spYl.QAlhlsssFQsAlsP.........sLpFsh.KuLpIANsl.PtuuV.luhlsQsVhppsspVSVMIsplhE.ll+..oVL.......Glslsu.ush..splsuAITNTFTNLssQpsEsWIaW..tpp..o.A..sQ.TsY.pYslLFuIQ.NppTGthMhhlPluh-ls.V.sht+pplLh.hsh.psptpatVphpth.......................................................... 0 1 6 12 +1322 PF01654 Bac_Ubq_Cox Bacterial Cytochrome Ubiquinol Oxidase Birney E, Bateman A anon Pfam-B_1148 (release 4.1) Family This family are the alternative oxidases found in many bacteria which oxidise ubiquinol and reduce oxygen as part of the electron transport chain. This family is the subunit I of the oxidase E. coli has two copies of the oxidase, bo and bd', both of which are represented here In some nitrogen fixing bacteria, e.g. Klebsiella pneumoniae this oxidase is responsible for removing oxygen in microaerobic conditions, making the oxidase required for nitrogen fixation. This subunit binds a single b-haem, through ligands at His186 and Met393 (using SW:P11026 numbering). In addition His19 is a ligand for the haem b found in subunit II 26.20 26.20 26.50 26.80 26.10 26.10 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.47 0.70 -6.07 178 4711 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 3032 0 940 3110 321 447.50 39 93.87 CHANGED LuRhQFAhTssaHhlFsslTlGLuhhlslhE..shaltTs.........cph....Yhchs+FWsKlFulsFAlGVVTGlshpFQFGsNWutaSchlGslhGssLAhEsLhAFFLEusFlGlhlFGWs..Rl.upthHhhushlVAlGoslSAhWILsANSWMQ.sPsGa..ph...............ps..G+hph...ssahsl.............................lhNPsh.h+asHslhAualouuhhVhGluAaaLL+.............................tc.....ch.....p.hh+....+uhphuhhhullsuhhtlh.sGDhpGhpshcpQPhKlAAhEuhW.-...T............ps......s..A....shslhulPsp...csp...c.......spat.lpIPhhhSlls...................s..+shsu.pVt.GLp-....................................................hst.........................................................................p.hP.s.VshlFauFRlMVGlGhhh.lhluhhuh.....ahth.+..................................p+ahLthhlhshPh.salAspuG.WhssEhGRQPWsVaG................l.........LpTspul.Ss...lsssplhhSLhsFhllYs.hLhs.stlhlhh+hh+....pGPps .....................LuRhQFAhTshaHalF.lPlTlGLuhhlAlhEslal....hTs................cph.........Y+....c....hs....+....FWu.K.lF.u.I.NFAl.GVsTGlsMEFQFG..T..NWShYSca....VGDIFGs...P.LAhEuLhAFFL.EuTF.lGl.ahFG.....Ws..R...l.....s+hh.HhhsTahVAlG..oslSAhWILsA.NuWMQ.sP..sGh...ch..............................................c..s...h..R..h..c..h....ss.ahtl.........................................................................................................lh.NPsh.s+Fs.H.slsuualouAhFlhuluAa..aLL+........................................................s+.........c.h........s...ht++.uht.l.uhhhuhhuslss.hl.hGD.tpGhphtch.QPhKlAAhEuhWc.T.....ps............s..A...shslh......u.h......Psp.......cpp......c....................spau..l.pIP..h.hL.ullu.....s..+.o..h..ss....V...GLc-........................hst.......................................................................................................................c.thP.s..VsslFauFRlMVuh.Ghlh..lhlh....hh.uh..h.hhh...+...pcl...p......................................p+WhL+hslhshP.h.salAlpuGWh.ssEh..GRQPWslhs..................lL.TtsAs..Ss....los.upl.hh..Shlhas...slYs.llhhs.t.la.lhh+hh+tGPt.s........................................................................................ 0 271 564 761 +1323 PF00936 BMC Bact_microcomp; Bac_microcomp; BMC domain Finn RD, Bateman A anon Pfam-B_1071 (release 3.0) Domain Bacterial microcompartments are primitive organelles composed entirely of protein subunits. The prototypical bacterial microcompartment is the carboxysome, a protein shell for sequestering carbon fixation reactions. These proteins for hexameric structure [1]. 20.60 20.60 20.60 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.50 0.72 -4.30 128 7283 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 957 198 954 3181 404 77.20 31 65.78 CHANGED pAlGllEs.puhssulhAADthlKuAsVcllthctss..u.G..........hhhl.hlsG.DVuuVpsAlcuuhpsspc.h.........llsphl....l....s..ps .............pAlGllps.puh.ssul.AADshlKuAsVpl.ls.hcphs.....u..G..........hshl.hlpG.DluAV+uAlcAusssspc.................lhs.hh.h............................. 0 418 700 840 +1324 PF02397 Bac_transf Bact_transf; Bacterial sugar transferase Mian N, Bateman A anon Pfam-B_1538 (release 5.4) Family This Pfam family represents a conserved region from a number of different bacterial sugar transferases, involved in diverse biosynthesis pathways. 20.40 20.40 21.10 20.40 19.50 20.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.31 0.71 -5.18 461 7019 2009-01-15 18:05:59 2003-04-07 12:59:11 11 45 3624 0 1678 5749 1907 186.40 39 54.00 CHANGED KRhhDllhuhls.LllhuPlhl.l.lAlhl+l..ss...GP.lhFpQpRhGhsG+.....FphaKFRoM.hs...ss..-pt.............hptth........................c.sDs......R.lTp.lG+FLR+TS..LDELPQLaNVL.+G-MSlVGPRP.h.....tpsc...ph..h.s.....thttR.hpV.+PGlTGhAQVs....tpssh.shpp...............+lchDlhYlcshS.....lhhDlpIlhpTlhsll...h.c..GAh ...................................KRhhDllhu.h.lh..L.l.l.h.u.P.lh.l.l.l.u.lhl+l........s.t.......G.PlhFp.Qp.RhGh......s.G+...............Fp.laKF...RSMts..ss.....-pt........htp.hh..........................................................................p..s.Ds..R..lT.+........lGcFlR+TSLDELPQl.hNV.L.+G-MSlVG.PRP.h.s.........pt...h...c.....p...a.....p..................................phht.R.hpV+PGlTG...hAQls...........spssl..s.hp.c..............................+lc.hDlhY....l...c...p.h.S......lhhDlcIl.hpTlhhlh..t..p...t........................................................... 1 576 1136 1441 +1325 PF01721 Bacteriocin_II Class II bacteriocin Bashton M, Bateman A anon Pfam-B_1954 (release 4.1) Family The bacteriocins are small peptides that inhibit the growth of various bacteria. Bacteriocins of lactic acid bacteria may inhibit their target cells by permeabilising the cell membrane [1]. 21.10 21.10 22.20 25.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.91 0.72 -4.43 19 138 2009-09-13 12:44:13 2003-04-07 12:59:11 13 3 75 11 5 112 0 35.30 47 58.83 CHANGED KYYGNGVaCsKcpChVsWGpAhssIsstsssuassG .+YYGNGVYC.s.Kpp..ChVsWucAhssIushshsuahsG............... 0 2 4 4 +1326 PF04798 Baculo_19 Baculovirus 19 kDa protein conserved region Waterfield DI, Finn RD anon Pfam-B_6291 (release 7.5) Family Family of Baculovirus proteins of approximate mass 19 kDa. 25.00 25.00 28.20 34.00 24.70 23.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.93 0.71 -4.81 23 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 60 0 0 60 0 142.40 44 76.49 CHANGED hhNPapphsp+LlcsatsoLpaGsaIcVaDhp....cs-RLFlIcPENlllYNssGsLYYYLE..uSuhhCP.sEFulVRFopsDIpsINEoGhashsCTslsSLsLlEHFho..ssls-tplhLs...p.phpaoIlDlINhLIpsGYlplp ...............h.hNPappphppLlcDatsTLpaGs.YIclYDLSsss.csERLFlIcPENVlLYNhsGsLaYYLE..SusVhCP.sEFulVRFops-IpslN-oGlasssCTsVsSLsllEHFhoLKNslsDppllLsh.....c..pIpaoIlDlINaLIhpGYVpl...... 0 0 0 0 +1327 PF04631 Baculo_44 Baculovirus hypothetical protein Mifsud W anon Pfam-B_5343 (release 7.5) Family This family includes several hypothetical baculoviral proteins, with predicted molecular weights of approximately 44 kD. 25.00 25.00 29.90 29.20 17.30 17.20 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.35 0.70 -5.84 23 108 2009-09-10 23:58:53 2003-04-07 12:59:11 7 1 82 0 0 99 0 265.80 44 92.09 CHANGED LlllllllFLallY.pPlapAatpI+psQtpYspplD-ph-hhppsLpRRRYVPLcsLPslcasoshsTlst.Gph+ChSVPlhVosscTssFDCoplCDNssAsYFFVspaD+FVVNGphLspGGYCTTNSlPRNCNRETSllLaSlNQWTCIAEDPRYFAGpsNMlQlAGRQHuscIhPuplc+NVLaD+LLstpVslopNTFRpsWDElhpDGo.RRFEl+Cs.ALD.+pNpMFVNPlNPIECLPNVCTNVpaVHpsVRPsFEsG.C-CGDhslTRVpHlsssD+TShCASIVDphcpsstSa.......pFRV-ClshDoPlocasts..+LLCPsclFspNTDsAYsFsLsGs.hPlSuNGIcEPTaRhahDTRsR.lsass ......................................................................................................h..LPslphssp.hsphst.....st.cshp........sshhl.u..p....h....s..shD..CstlCssssu..hhF...alsp.sp..hVlNsphLt.GGaChssSlsR.CN.hpTS.hhlholspapCluEDPRYaAGstNh.QlAGRQH.spIhsup.s+NlLaDpLLstpVssspNTFRp.pWDEhh.DGo.RRFEh+Cs.AhD.ppN.MFlNPhNslECLPNVCT.s.VphlH.sV+P..sF..-...s...G......C-CG..t.hch.p......tohCssh....hp.................h.....Chth...h..h............................................................................................................................... 0 0 0 0 +1328 PF04786 Baculo_DNA_bind ssDNA binding protein Waterfield DI, Finn RD anon Pfam-B_6251 (release 7.5) Family Family of Baculovirus ssDNA binding proteins. 25.00 25.00 44.80 43.80 19.10 19.10 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.59 0.70 -5.23 21 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 55 0 0 62 0 244.20 27 81.91 CHANGED Wh-phlaNL..pp.tNhollpCsssh..NpLtcsLshlppphsl..pahcchhP....pht..cplslhcPp..ss+lsYplGhpV+GGhhs.FYaFDhspl+Rscu.saGcFlolpWsshhhHNplauplhtpah....thp.-.thpLpssshlslPpc.........p.sp+pshlRKFFslppcpNpplYsTGc.....L..hctlpscPaoh-cFcplFphp.scs....sScEVpMlhuulI-GhKpuKp-hphco.lss+clpEKsYSLAl+PhlFhplE ............................W.cphhhNL...pp...tN..hollp.Csss...ppLpcpls.lpphhsl..phhcc.hhP........p.s...pplslhcsp..ss+ssYplGh+lcGssps.FaaaDhsplK+scu.sa..G.cFhslpasshttaNplauplhtpah.....t.ppc.slplpsslhlslPcc...........sp+phFlR+FaslpppsNtplasTGc.....l..scs..lpspshol-cFcclFphptspp....sSpEVcMlhsuhI-GlKpuKt.-hphps....lss+chp..EKsYSLulKPhlFhhlE.............. 0 0 0 0 +1329 PF04639 Baculo_E56 Baculoviral E56 protein, specific to ODV envelope Mifsud W anon Pfam-B_5446 (release 7.5) Family This family represents the E56 protein, which is localises to the occlusion derived virus (ODV) envelope, but not to the budded virus (BV) envelope [1]. 20.60 20.60 20.70 28.60 20.20 19.80 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.66 0.70 -5.76 25 71 2009-09-10 14:49:23 2003-04-07 12:59:11 7 1 62 0 0 66 0 286.80 48 84.53 CHANGED ssPoohsl.ussphhPGYslsNspFVSsu-...lNplhRNNDlsulRplFss.sossQlNGLspLRRuDNlPDAslHuhphR+suVKssaPc.TssRT.pGVpsuLspNPRLssYL...KsAGhssLlGsGVYLlhsuAsLV.pDIlcALNRTGGSaYhpGpNGG-...sl-uClLpaRoCshshssl...ssslC........shDPLLss..spLpslCpGYNaEsEpoVCRuSDPNA-PsS.QYlDIS-LsssQTIpClEPYDhGDLIGDLGLDaLLGEsGllspSSNuSp....SlSssLhPlIllIGullhllhIGahIaKhlhppps ........................................sPoohsl.uNsphhPGYslsNNpFlSsu-...lNplhRNNDlsslRplFss.ssssQlsuLspLRRsDNlPDAslHutphR+suVKpsaPp.TssRo.pGVpssLppNPRLssYL.shKsAGhssLlGsGlYLlhsuAoLV.tDIlpAlNRTGGSYYhpGpNGG-...sh-sClLhhRTCths..ss...tslslC........shDPLlss..spLpslCpGaNa-sEpo...VCRuSDPsADPsS.QYVDIS-LsssQTI.ClEPYshGDLIGDLGLDtLLG-.cGLlspSSNsSp....SlSspLhPlIllIGullhllhIha..hlaKhlhp..s.... 0 0 0 0 +1330 PF04850 Baculo_E66 Baculovirus E66 occlusion-derived virus envelope protein Mifsud W anon Pfam-B_4624 (release 7.6) Family \N 25.00 25.00 113.00 112.00 19.10 17.80 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.41 0.70 -5.47 28 75 2012-10-02 15:11:41 2003-04-07 12:59:11 7 2 51 0 0 75 0 381.70 37 56.26 CHANGED cssVNhpslppuIphVuSscGllNPAlhSRNGopaS.sVIGpFl-Ys.uVaSADaSKVLThhocpYaGSVVGsTsclAY.....YEADssNshaAPLWAMsRRIWscs...utllsY...p............spolshESGVlL.pshsGl............hslPo.....TssSTpSFpPslupT.AlspTcssG.AMlsau+FsELN.LEFcShTLYacpGMaQLY.pltshpsshss.....suRsVVLsRDh....s.pTs.-.sas.......suSs.stoaNGVss++hsIsNh...slsshslR.s.sslp..............hlEQlIuh-slpsGsGssCYpLsVps.....sDsspshplssssh...................hhhssssslcslFcFPal.hlK-spptplo.sstsst...........lshsslpplLshlshsstsl..hss..ptssssFhhpss..s.QFhFch ...ssVNhpNlppuIphVGSscGhlsPAlhSRNGopaS.sVlGpFl...-YssuVaSADaSKlLTlRocpYaGSVVGtosclAY.....YEAD.sNshHAPLWsMTR+IWsps...utllsY.p.................................sss..lshESGllL.pshNGl............hplPo......TssSTpSFpPsluhT.AlssTc.suG.shh.au+FsELN.LEFhSYTLYacpGMFQLYspI+slpshsss......uRCVVLsRDh.......s.sss.-.sap............ssSN.htsaNGVss+HaNIs.N.......slssFslR.shssls..............hlEQIIuhcslNsGsGsuCaoLhsps.....sDsspsh+lssssh..................hlhhsssssl.cslhsFPal.llK-spspplohssssst....st..H..lshspIpphLshlshsstsl..hss.hpc.psssFhhpss.tu.QFpFph............................................................................................... 0 0 0 0 +1331 PF03258 Baculo_FP Baculovirus FP protein Mifsud W anon Pfam-B_4275 (release 6.5) Family The FP protein is missing in baculovirus (Few Polyhedra) mutants [2]. 25.00 25.00 26.10 26.10 24.40 23.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.20 0.70 -5.22 12 67 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 56 0 1 62 0 186.50 49 94.67 CHANGED M-...ppLINVslLKsLIKsEIDcsVo-NIphlssKLK+LEc-pLNDoVEIYGIHDsR.LhsKKIRs.YlKKICuLLsL-aKtVl-SsacKNHIhV+LsDAsTA+EWQo+SREhRLKNaDLsI-aDGPVKIFVAAosEHK.LLKKTRDALLPaYKYVSLCKpGVMVR+s-+S+laIVKNEpDI.pLlsp.hpuhcsts......ts.t..t......pplI ..............................ppLINVslLKsLIKspIDcsVssslphhstKLK+LEpcpLscoVEIYGl+Dp.R.L.hsK.KlR.s.YlKKI.CsLLsLsaKtVl-osa...cKNHIhlKLsDAssA+EWQs+SREhRLKNaDL...........s..I..-a.D..GPVKIFVAA..osEaK.LLKKTRD.ALLPhYKYlSlCKpGVMVR+s-+S+laIlKNEpDIp.Lhsp...th........................h................. 0 1 1 1 +1332 PF04700 Baculo_gp41 Structural glycoprotein p40/gp41 conserved region Waterfield DI, Finn RD anon Pfam-B_4278 (release 7.5) Family Family of viral structural glycoproteins [1]. 23.10 23.10 23.90 188.30 22.90 23.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.25 0.71 -4.86 21 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 57 0 0 60 0 190.70 50 57.94 CHANGED sKRF-SD-pLIcaYtpLpKchsssshsp......sIFpsSFVhS.lh.uYApKFYs+........plsEAAcpLS.ulpYQlApAVTpNpPlPLshspplsN-YlpLLhp+AsIPsNlppuls.....sps.spLN....hhssllNsll-DlFsG.tssYYh.hsLNpcsRu+VhshK-NIuaLs...P.Lot..SssIFpaluphATpsG+ ...sKRF-SD-sLIcaYtRLpKELGssslsc......sIFpsSFVhs.lLPuYAQKFYN+GuttlutsulsEAA+pLuhAlQYQlApAVTsNpPIPLPFspQLuNsYlTLLLp+AslPsNlQpulp.....SRphs+lN....hIN-LINsVIDDlFsG.uusYYh.YVLNEcNRARlloLKENluFLA...P.LSu..SsNIFpaIApLAT+uGK.. 0 0 0 0 +1333 PF03273 Baculo_gp64 Baculovirus gp64 envelope glycoprotein family Mifsud W anon Pfam-B_4223 (release 6.5) Family This family includes the gp64 glycoprotein from baculovirus as well as other viruses e.g. Swiss:P28970. 19.70 19.70 20.70 19.80 18.10 16.90 hmmbuild --amino -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.87 0.70 -6.33 6 42 2009-09-11 04:57:11 2003-04-07 12:59:11 8 2 26 1 1 43 0 366.50 45 89.09 CHANGED EHCNAQMKoGPY+IKsLsIsPPKEoLpKDlpIclsETDhsENVlIGYKGYYQAYAYNGGSLDsNTplpEsLhTlsVuK-DLLMWGlRQpCEVGE-LIDpWGSDSpsCaR-.sGRGhWV.....sGKELVKRpNNNHFAaHTCN+SWRCGVSTuKMYTRLpCss-oD-CpVpILDINGssINVotscVLH..RDGVSMILK.pKSchoRRopKlACL.........lKDDKsDPcoVTREH.........CLl-sDIFDLScNpWhCKFN+CIK....R+sEpVVKpRPpTWR+Dt.sKacEGs..oATKGDLMHlQEELhYENDhLRMNlELLHuHINpLNNhhHDLlsSlAKlDERLIGNLMsNSVSSTFLSDDTFLLMPCTsPPsHTSNCYNNSIY+EGRWVuNoDooQCIDFsNYcELAID..DDlEFWIPTIGNTSaH-SWKDASGWSFIAQQKSNLIoTME.TKFGG+sTSLpDlsshupGpLsupLtShhhGphhsalllhsVIlFLhCMlRstsR+ .....................................................................................Noth.t.h..hs.ttt-LLhWu.pppCphG-phh...DpWGSDS.sCht-..stt.Ws.tKELV+RpsN.+.FAaHhCNhoWRCGloTochas+Lpss.....sD-s..p..VhhL..sGssIslotspsLa..ccshShllK.ppophppcphKluCh.........htss.sDPc.lst.-.+..........Ch.sssha-lopspa.C+.stCh+.............cc.t..s.p.+P.pa+hs...c...h...sAohtDlhpl.ppLhYE..s-hL+hslp.hct+hsplsshhpsLl.SlAKlD-RLIGpLhspssuSpalsscpFhLhPChps.s..tsSNChssslYp-GRWsts.sDsspChshtt.p...lc.....hphhhs.ltsssh+tshpDtpGWSFlAppKpsLIpTMp.T+.GG.sTSLpslhths.G.h.tph.u........hhlhhhhhhhhhhhhp.....t............... 1 1 1 1 +1334 PF04735 Baculo_helicase Baculovirus DNA helicase Mifsud W anon Pfam-B_3393 (release 7.5) Family \N 25.00 25.00 116.10 48.10 18.20 17.80 hmmbuild -o /dev/null HMM SEED 1173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.23 0.70 -14.16 0.70 -7.20 25 73 2009-09-11 13:37:16 2003-04-07 12:59:11 7 2 60 0 0 73 0 1095.40 37 98.75 CHANGED tIhssIhpshp...pp-phsssshsss-pllh+Nssot..p++hlcsh-sFpKLlsshssptsts...........................t............ptt..h..........................tsHsWshpsNhF.shhV+PFlhtccY-p...lpcplshpcFhsopssthuNcCspuG-YhYWPN....hulsahGWplYLphpasIclsspIPLlHN+cLGsVNLFs.sschFLslEhsl.ssps..ppLFVNG+othscp.p........-cLFplshs....ssssusCKhsscLVaS.NKshFcYl+D-INLppChssscY+pllc.lDLpsLRhFsstt..sssslsst-ct..........pspppITPSSEptcpIpppIc-ClphIpEsMhcshspp-pucs..lLppYhppSsFhNFcYLIlslW+hlp+spch...pascTDI+LFlELLC...........Eplaus............ct..thcpshpcCcPYhphosplap+FCsphshFsssss.....h.uLutYaAIHahIYtKpss..........WsaThcsshp.Cslss-V.....lstGFFKKI..psspsshlFNGK.HYphVK..KDDDLhKlh-cssslslSslKFNNWKYhYhTEcGVaNVhppsY+ssCPFllGsTLhpsahc+s-..pYLscslhsaMhssscpEhsIaKhYHsAKlsR-l+hlKsNhptshhhs.sCtsCphptppcLN-LFRElWshccs-LllLulYLNp.KhsDllpNl.pC..Ct.t.t.....pCpClp.plc.....lDlpuhKlsLhhcLFhsss.plhcLhWuLla.osphYsphhtshh........sssthlcphsthaapN+p+IlchLapplc+l-alcphh.chssscphlp..plps....................................sscsDshp.cpp....................phlpsFatpYspshplLp+a.NVWWDKLIltRp.sDDLsoWLTRFYhRlhhoK..lslpshsptalpplVpGYLYFRhFTNFNhsNShslhHFsASLuIPoDYEKhClYLsGcPssGKSShFELL-p.lllVHKpDs-pYshsK.+ETsEhEscKh.SQLYVINEhKhCs-oFFKopADSTKSsSssRKYpGppKYEANYKlLIlNNc..PLaIsD.YDKGVpNRFullYhDHpFp.-sh.FsGSVYcHhcsK+YP.E+shh-pLtsPVRlFLuHlLhY+RsP+DGYV.YKslLpsDssapHNLtCLslNNoslpALlYVLpV+...c.us.........thlsEsKlpchIchAssaVcshlH.ph+.....Kpss..........tphLhs-FK+Kap...KaYsscsKhahNLsMAhscpDFNhshPpFKs ......................................................s.Ihpplhpsh....p.c.p.shpshtsssplll+NstTt..p++hhcphp.Fppllsshpsp.stst................................t........t....t.t..h.................................................sHsWshpsNhF.shhV+PFlhp..cpY-t...lp.c..tlshpcFhtSppstasNcsspuGDYhYWPN....huloahGWplaLhhpFsIslsshIPlhHp+pLGsVsLFshsPc......h....FLslEhsl.sssc...ppLFVNG+ohFscp.p........-sLF.lphs....ssssusCKhtscLVhS..sKshFcYI+DsI.NLppClTsscYppllp.lsLppLRhFspps.hsstshsttpph..........phh..IosSSEphc.lpppIspslttIp-tMlcshsppp....tsss.plL......ppYhppSsahNFcaLlhllW+hlp+ppch..papcTDIKLalELLC...........-pl...ass..............ct..shppsht+CcPYhphshplFp+hCsphphFps.s......h.sLuhYauIHahIYhchss............WsaTacsshs...CtlsspVlstGFF+Kl..hsspsshVFNGK.HY..hVK...KDDDLaKl.hppss.shplsslKFNNWKYhYhTcpGVaNlhpspa+ssCPFllGsTLhpshpchs-..pYLPcsshsaMLssucpEhpIh+hYHhAKlCRDl+hl+sNhthh.hhs..sCtpCp.ptp.pLNplFR-lWshscppLlhlulYlNc.KhpDllpNl.+C..Cptt...........pCpClp.plclshhAhKlsLhh-LFssss.pl.pLhW.Lla.ssphYsphhh............sssphlpphsphhhpp+hcIlphLapplc+l-al-.hh.phss.phhlp.lpp....................................................ttpst.t.pps...................phlppFatpYspshplLpKa.NVWWDKLIltRp.sDsLsoWLTRFYMRlhho+..hs..lpsYs..alp..plVpGYLYF+haTNFNhsNShhhhHFsASLuIPoDYEKhslYLsGcPsSGKSSFFELL-p.lllhHKpDs-pa.shs.p.+-Ts-hEssKh.SQLYlINEhKhCs-SFFKspADSoKssSssRKYpG.pKYEANYKhLIlNNc..PLa..lsD.YD+uVpNRFsllYhDHpF..-sh.FsGSlYpHhhsKpYP.E+shh-p.LtpsVRlFLuHlLhY+RsPpsGaV.YKslLpsDshHpHNLhCLslNNoslpALlYlLpl+...csus.........thlsEpKlpchIphAs..alcphL.Hsthp.....Kphs..............tphLhspF+cKac...+hYpt.csphahNLsMuhscpDhshs.hPpF+s.................................... 0 0 0 0 +1335 PF04838 Baculo_LEF5 Baculoviridae late expression factor 5 Finn RD anon Pfam-B_5141 (release 7.6) Family \N 25.00 25.00 27.80 50.60 20.60 19.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.19 0.71 -4.47 20 57 2009-09-11 05:38:18 2003-04-07 12:59:11 7 1 56 0 0 56 0 153.60 56 58.32 CHANGED shsLFplFpcFRcscsYpcLIpaLlpNYPsNVKNKTFNFsNTGHLFHSLYAYlPulos..KERKQIRLp.-CIcKLFpNTpNDFKLYsElFchlp...spth.ppCPCpLlppRhpphpsYVcslpsKpFDsKPsKLKKEsIDsILaKYSlNWKslLhKKKh ....hsLFhlFpcFRpppsYppLIcFLlpNYPsNVKNKTFNFsNTGHLFHSLYAYlPulosh.KERKQIRLp.-ClcKLFsNThNDFKLYsEla-hIp...ppth.ppCPCpLlhp+hpphpsYVcslpsKpFDsKPPKLKKEsIDsILaKYSlNWKslLhKKK..... 0 0 0 0 +1336 PF05073 Baculo_p24 Baculovirus P24 capsid protein Moxon SJ anon Pfam-B_6005 (release 7.7) Family Baculovirus P24 is associated with nucleocapsids of budded and polyhedra-derived virions [1,2]. 21.30 21.30 21.40 38.90 20.40 20.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.32 0.71 -4.72 26 59 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 53 0 0 55 1 180.10 39 86.02 CHANGED FpYssc.slEVhIIpN.....s-sDpDGYlElsAAA+LLuPhsp.+shsss.LWsNstsSaKLp+NNKNYlHsFuLsKYLSsYshssp..p.spYhsLKpLlsDLlhGsQsp.....shDP.......Ls-IKsQLCslQEslsps..........................................................................tsppsshhuslsuhL-hlKo.lpsDlssKlsFu ...FpYssc.slEVhIIpN.....s-sD+DGYlELoAAu+LLuPhlp...hstusLWsNstsSaKLp+NsKNYlHsFuLsKYLSsYshssp..p.s.pYhsLKpLlsDLLhGsQup.......lhDP..............Ls-IKsQLCslQEsLsps.sths...............................t...........s............p..p.htphlpsc.ts.p.hhs.shsshL-plKs.h................................................ 1 0 0 0 +1337 PF04766 Baculo_p26 Nucleopolyhedrovirus p26 protein Waterfield DI, Finn RD anon Pfam-B_6066 (release 7.5) Family Family of Baculovirus p26 proteins. 19.10 19.10 21.80 21.80 16.90 19.00 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.53 0.70 -5.12 25 100 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 55 0 0 98 0 209.60 31 74.22 CHANGED hsVpY..sVsasp+plslhphcscsVpl+VhsPttps.....tD-s..Lstl.HpFPGVATsllFPpl.ppsstlpVhLss....GsLhcspss..+lahNaHsap+RhVYGQLsulslss...plts+lYlGAPIFpss.+.LVSVVTspa.sDhtcshslhPVTGhRtsuhlSGphphDst...VhVpchcsshSVYGppQLPY..........tlKtaAhst.sst...tshscslhlFas..cc-VpIslscGpFEIs+lRlsGPhlss ......................................h.lp...p.h......t....h.hh............p.t...................h-pl.HpFPGluSs.slhsp.l..pptoslpVh.sc.....h..hchhhs....+hha.saHphsKRalaG.LPshtss-......hh.hLhIGuP....................Iacpc..p..hlSlVTtRa..cs.t...tt.h....haPloGl...GhhSGplsl-ss...lphcpLcsGhuVYG+hQhsY............slKphAhphs...........shhlhhp..pp.l.lsh....s.thph.+hRh.s.hh.......................................... 2 0 0 0 +1338 PF05214 Baculo_p33 Baculo_P33; Baculovirus P33 Moxon SJ anon Pfam-B_6583 (release 7.7) Family This family consists of a series of Baculovirus P33 protein homologues of unknown function. 25.00 25.00 63.60 63.60 22.70 21.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.75 0.70 -5.10 18 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 57 3 0 62 0 241.80 45 97.07 CHANGED Is.TPLhtRYKsSFhLasFRhLDhhRsuPSpcLpplLspElpYLYplsClIsY+-sQps-l-pLhpWshsLss-.hcL-phKhhah-KhppLNL+shpPpcYsaoFoTIWDoIHaLsLlsDDMVtNR..cphsh-hlptpl+phKhlFYNlFhhL.CshCpcHYLTVcuF.haplERIElALaRE+.hGEslhhVD.......E.lstspsscNlLh+athLYsSMlFHNHlNsYR.IQ+Nhcs.s..NapRM-WshYKpLLsl .................................IP.TPLhs+YKDSFhL...asFRhlDhlR.su.SpcLsplLusElTYLYclACLItYKDlQppElppLhpWuhslstc.hcL-Qh+lhFh-KhpELNLRuhQPKsasYoFoTIWDoIHFLuLllDDMVtsR..cKhsh-hltppL+phKslaYNlFFhLpCshCRcHYhsVKGalIhpIERIEluL...RE+......aGEsIhhVD.......................-..hstsssscNlLMKpthLYsSMlFHNHINsYRaIQ.............RNhcsPs..sap+Mc.........WspYKphLpl............................. 0 0 0 0 +1339 PF02961 BAF Barrier to autointegration factor Griffiths-Jones SR anon Structural domain Domain The BAF protein has a SAM-domain-like bundle of orthogonally packed alpha-hairpins - one classic and one pseudo helix-hairpin-helix motif. The protein is involved in the prevention of retroviral DNA integration. 25.00 25.00 25.10 33.00 24.40 20.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.86 0.72 -3.68 9 197 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 94 13 127 159 0 83.60 53 87.26 CHANGED MsoTSQKHRsFVuEPMG-KsVssLuGIG-sLGt+LcspGFDKAYVVLGQFLlL+KD.E-LF+-WLK-oCGANu+QAs-CasCL+EWCssF ..........M.sTSpKHRsFVuEPMG-KsVspLAGIGcsLGp+LpcpGFD...K.............AYlVLGQFLlLKK-.E-lFp-WLK-.sC.GAssKQupDCatCLp-WC-tF........... 0 50 58 88 +1340 PF05112 Baculo_p47 Baculo_P47; Baculovirus P47 protein Moxon SJ anon Pfam-B_6441 (release 7.7) Family This family consists of several Baculovirus P47 proteins which is one of the primary components of Baculovirus encoded RNA polymerase, which initiates transcription from late and very late promoters [1]. 25.00 25.00 65.80 65.20 19.90 18.40 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.09 0.70 -5.03 21 58 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 57 0 0 59 0 307.50 47 78.87 CHANGED hFsphhphsSQhLPQCCKYLscsLshYhLa...hp....ths.stshahlccslpVctEGFVphppphVhFclspl.sp...uTPcDL-hYlcsoR..pl.osHctplhcLlh+DRWaKGDFsRL++hLs.pDsssLlsFsCNsLWERGYEsaYTLGQQLSIRITTKLIQSGLDFKH.Qsssss.....ts...............RGWssptFEKhluSIoSlSDlIKRHKhSpKYIlLElssspssphlchLhcppFslIpNsphsNVChIpl.D-DKNShpYLpKLupLIpp+llNVLFVTDlEaYl+pssahFYLYNSLKFYYYCLKNKFVF-hpDYEhlFL ....hFsphhphpoQ.LPQCCKYLs-sLshYhLY....hp....shss.ststhhlscslpVcs-GFVphppshVhFclspl..sp....uTPcDl-pYlchTR..sL.osHDtpllKLlh+.DRWaKGD.hsRL++lLppp-Vs..sL...lpFsCNVlWERGYEsHYTLGQQLSIRITTKLIQSGLDFKHQ.sssss..s.ss................RGWsstsFEKhlsSIoSlSDlIKRH+hupKYIsLElsssphsphlctLhpppFsllpNsphsNlChIpl.D-DKNSh.YLpKLspLlpsKllNlLFVTDVEaYh+pspahFYLYNSLKhYYYCLpNKFVF-tpDYEhlFL... 0 0 0 0 +1341 PF04878 Baculo_p48 Baculo_P48; Baculovirus P48 protein Mifsud W anon Pfam-B_6510 (release 7.6) Family \N 25.00 25.00 56.10 55.70 19.70 19.60 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.47 0.70 -5.52 23 56 2009-09-10 15:38:45 2003-04-07 12:59:11 8 1 54 0 0 53 0 376.70 46 98.32 CHANGED ppaplpYsLRFpK.....tsshppVsFpspLopsEIDSLsFLhucYFDQpphlsl.KGLTFFsEFNKCI-sIKpsFEs+p...-ss.-VKpIFslFL+cEFhsQVPsFphIMpYLppYYKPlssP.slst.l...Cs.pCs.hs+lpCLpCKssYlSsulohhDsulQ-GWDIFLRPMhGlPLhhalLlKT-as..cs-V.FNsDslITNsFsQFFYNLLCDKAsshYhsaKtCpPLlc-C++sshuLpsc-lEhLLshL.Nssohso.......KLasPFKpFM.chshpTKlK..KlNKlAuslFhGFYLRhYLEAtssK.......................shsssELElRNVCRhIh+cYs-cpFEphlpKLpsIKtDLhh.lhpphIVPEphIR+LhsKYsLDpDluhLLppsV .....s.hplpYsLRFsK.....hcshpsVsFpspLopsEIDSLsFLhS+YFsQpphVsl.+GLTFFsEFNKCl-sIKpsFEs+p....-Ns.....-lKpIFSlFL+.cEFhsQVPpF+pIMpYLppYY+PhssPslstl....Cs.pCs..........hs+...........l..pClpCKssYLSsulSshDsulQcGWDIFLRPMFGlPLhlalLl+T-as..csslFNsDs....LITNsFsQFFYNLLCDKAsstassa..KtCpPLlc-C++sssuLpsp-hEhLLshL.Nssohso.......KLasPFKpFMhchuppTKlK..KlNKlAusVFhGFYLRhYLEutssK....................................shosuELElR.NVCRaIhp.cYs-cphEphlpKLppIKtDLh..lhpphIVsEpaIR+LhsKYpLDp-luhLLppNh.... 0 0 0 0 +1342 PF04583 Baculo_p74 Baculoviridae p74 conserved region Waterfield DI, Finn RD anon Pfam-B_4744 (release 7.5) Family Baculoviruses are distinct from other virus families in that there are two viral phenotypes: budded virus (BV) and occlusion-derived virus (ODV). BVs disseminate viral infection throughout the tissues of the host and ODVs transmit baculovirus between insect hosts. GFP tagging experiments implicate p74 as an ODV envelope protein [1,2]. 19.40 19.40 19.50 20.40 18.30 19.30 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.58 0.70 -5.18 25 93 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 75 0 0 91 0 236.70 46 37.84 CHANGED s..psss-s-.LEsIIspFLEDauLlhGIhTslGF-hL.sslcsMLK+INosLIPtLK+hLLsoS+RlTsRLLGETYKAAllHshNRhAIKTlSsVAKAhsRhuhpAuSVlGIlLIhlTIuDLVLhlWDPFGYsNMFPR-a.DDLSpoFLoAYa-Sl.sssoRDlIEFhPca...Fs-lV.....-..pD-.......hhh-ohhall-YluuLEVNSNGQhLphscG-sIsD..FDEtoLVGuAL.ASouhYTph-FhtYTpRHNclLh...ssp .........................................st...s-pcL-sIIspFLED...aullhGIhsshGF-hLhsshKsMLK+INosLIPhL+phLlssop+VTsRlLGE.TYKAAll+shN+....lAIKTlossAKAhTRluIpAu...SVlGIlLIlhTluDLlLslWDPFGYNNMFPR-FPDDL....SpoFLoA...Ya-Sh.s.psoR-lIEFhPEa.Fs-hl........-..s--...p....hhhpohhalh-YluuLEVNSsGQhLphpcu-sIpD...FDEhoLVGs..AL.ASSuhYT+h-FhpYTtRpNpll...p.................................... 1 0 0 0 +1343 PF04513 Baculo_PEP_C Baculovirus polyhedron envelope protein, PEP, C terminus Kerrison ND anon DOMO:DM04337; Family Polyhedra are large crystalline occlusion bodies containing nucleopolyhedrovirus virions, and surrounded by an electron-dense structure called the polyhedron envelope or polyhedron calyx. The polyhedron envelope (associated) protein PEP is thought to be an integral part of the polyhedron envelope. PEP is concentrated at the surface of polyhedra, and is thought to be important for the proper formation of the periphery of polyhedra. It is thought that PEP may stabilise polyhedra and protect them from fusion or aggregation [1]. 38.00 38.00 38.00 38.40 37.80 37.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -10.97 0.71 -4.35 23 57 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 54 0 0 54 0 137.80 34 43.32 CHANGED lsslsQL..slssuNQalELoNhLsAI+hQssplhupLssll-slpspLsslss-lppLlsplss......clsshsssLssAlNpLp...-slRN-LTslNSlLsNLsSSlTNINuTLNNLLpAlsul....shGslsphhsshhss.........lppllsh ...........hsslNQl.....slNsoNpalELoNhLsul+hQNsplhutlsphl-slpspLsslss-lpplls-lss......pLsshsssLssAlNplp...cslRNELsslNSlLsNLsSSlTNINuTLNNLLpAlssl..........shG.slss...thpshlsp.........l.pllp...................... 0 0 0 0 +1344 PF04512 Baculo_PEP_N Baculovirus polyhedron envelope protein, PEP, N terminus Kerrison ND anon DOMO:DM04337; Family Polyhedra are large crystalline occlusion bodies containing nucleopolyhedrovirus virions, and surrounded by an electron-dense structure called the polyhedron envelope or polyhedron calyx. The polyhedron envelope (associated) protein PEP is thought to be an integral part of the polyhedron envelope. PEP is concentrated at the surface of polyhedra, and is thought to be important for the proper formation of the periphery of polyhedra. It is thought that PEP may stabilise polyhedra and protect them from fusion or aggregation [1]. 23.90 23.90 25.30 68.90 20.80 23.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.10 0.72 -4.00 16 82 2009-09-11 22:40:04 2003-04-07 12:59:11 7 2 53 0 0 79 0 113.00 28 41.17 CHANGED -sssVPlha..ssh.slWVGA-ElLpIL+ls.sstlp....slPpscKphhpcLps............ss-usKhFlTtlGlulLsuRs.s...........................................ctushhsshFlsDshp-hp.......ssp.hC ....psssVslhap.ssh..shWlusDEllplL+hs.tsshp....slPppc++hWpDhps...........sshsusKhFlshhGlulLssRs.s........................................................................phschhhshFls-shhphhtp................................................................................................................ 0 0 0 0 +1345 PF04501 Baculo_VP39 Baculovirus major capsid protein VP39 Kerrison ND anon DOMO:DM04345; Family This family constitutes the 39 kDa major capsid protein of the Baculoviridae [1]. 21.60 21.60 22.20 49.60 20.00 21.50 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.01 0.70 -5.42 16 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 0 63 0 287.40 41 92.57 CHANGED MALVssGssssRhp.NaCIFuuV.pP..FDsCtsYpSPCSsDAosDDGaFICpYHLShcFKMEKMsLPIPDu-ss..tYhRTlG+SLVsHpspuscRILIPTpsNYpoVLNlsuhsluEQLIhHhIYsNpsp.....hscICppL+hsE.pFps-hhplVEplYssTtplLuhTsPstYCopVss.sssRhassss..............ts.....ssu-psasshPsFl+NLIs+sVAPEshpIs....scsLhLRNCsTCtIs.ssGLVAss.......cLYNPVcP+YhhttN-NhLpIcNVLKFcG......NusALQ+.sLuRYEpYPlhVPLhLGpQllsops .........MALhsss.tspchp.NhCIFtul.ps....FssCtsYsSPCSsDA..sp..c..DGaFICsYHLuphFKhpKhslsIPDu..csN..papholG+SLVspp.s..pu....pRILIP.opsNYpsVLNlsshs.sEpLlaHhIYsNpsp...........................pplCptLphsE.pFpssh.tllcplassTpslLuhssPst.aCupVsp.sssRhassss.................sp.....s.sppsasshPsFLpNLIp+sVAPchhpIs....spsLplRNssTCtIs.ssGLVAss.......pLYNPlpPphh...p.hp.p..NpLpIcsVLcFcG......su.ptLQ+.sLspYEpYsl.lPLhLGpphlsp..s............................ 0 0 0 0 +1346 PF04913 Baculo_Y142 Baculovirus Y142 protein Mifsud W anon Pfam-B_6688 (release 7.6) Family \N 25.00 25.00 30.60 30.00 19.10 18.20 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.57 0.70 -6.13 17 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 57 0 0 58 0 445.90 43 96.79 CHANGED hsL-cc.pLKYLFLuSYFcL.ts.phlss-scsFIt.-YlcsNFp.ls-ssLlpYlsYLpshpLKpll.sDposshFKYlKPQF+FlCsRc.slDIlcFD.s+lYlpPsTslYATNhFVpsPppFphhlY..ptFs+Vas.cRpFVsssppasl.hsGspGalF-cuYlDWsGl+hCpsspl...psspaPYRLYLlGEtMAp+FlcpNI.hs...........pschlLKNFaKGLPL...h+ssaclINSK+FsTcKPNclF-EhppELsspssalKFIQRDYIYDA.sFP-DLL-LL.N-YhTpTSlaKaIpKFh-sp...phts..hsEIVlDRYuVs+YRKh.l+h-ssshaP.shphspsuYIFlpsDhlQI+GTLNAFYlP+ppll.ILAsNsLFGusclL............cFD..p.LlsYppsssPh+lst-hYhlstppKlYLs+ahFus.slPAYLlIRGDYE..oSp.....hKsLc-LpNsWVpNTLLpLhl ................................................................sL-pc.phKYLFlsoYFcL.ps.s...h...lss-spsFIp...pY...lpsNFp.ls-tsLhpYlsYLsphpL+pll.s-p.ssshFKYlKPQF+FlCsRs.slDIlcFD.s+hYI+PsTsVYATNhFVssPpchhhh..lY..scFs+Vhp..p+hFl...s.sssphsl.lsGssGalF-sAYlDWsGV+MCsss+l.....psspaPYRLYLlGEtMApHFlc..sNIhhs..........sssshhLKNFYKGLPL...h+spapllNSKKFsTcKPNcl.F.sElcpELssp....ssalKFIQRDYIYDA.pFPsDLL-lL.N-Y.hTpTSlaKhIsKFhppp.......p.sshhpEIVlDRYuVs+YRKL.IKh-.sshhP.shtts-suYIFlpsDl.lQI+GTLNAFYsPp.plh.ILAsNpLFGuTclL............cF-..p..LlsYppspsPh+lss-sYhVstppKlaLs+ahFus.sVPAYLLIRGDYE..oSc.....hKoLc-LpNsWVpNTLLpLhl................. 0 0 0 0 +1347 PF04684 BAF1_ABF1 BAF1 / ABF1 chromatin reorganising factor Kerrison ND anon DOMO:DM04689; Family ABF1 is a sequence-specific DNA binding protein involved in transcription activation, gene silencing and initiation of DNA replication. ABF1 is known to remodel chromatin, and it is proposed that it mediates its effects on transcription and gene expression by modifying local chromatin architecture [1]. These functions require a conserved stretch of 20 amino acids in the C-terminal region of ABF1 (amino acids 639 to 662 S. cerevisiae (Swiss:P14164)) [1]. The N-terminal two thirds of the protein are necessary for DNA binding, and the N-terminus (amino acids 9 to 91 in S. cerevisiae) is thought to contain a novel zinc-finger motif which may stabilise the protein structure [2]. 24.90 24.90 25.20 25.10 24.80 24.80 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.98 0.70 -5.65 2 57 2009-09-13 16:19:24 2003-04-07 12:59:11 8 6 27 0 35 55 0 198.70 24 64.86 CHANGED MSLYEYpcPIINKDLAtsDPV.uQpRoFPTLEAWYDVINDYEFQSRCPIILKNSHKsKHFTFACHLKSCPFKILLSaQGs.sSspstDGSPtshsGDttuppptpN........HpNGHTN....utDshuEpE..pDDEDDDAAVTAAIAAAVAAVADSQETIKGPFsVTKIEPYHNHPLESNLSLQRFVLoKIPKILQVDLKFDAILESLCND-DNTVAKFRVAQYVEESGIlDIIKQRYGLT-AEMDKKMLSNIARRVTT.KARFVLKRKK-GVYhhPsuHQloGuDpHQhQhpH......pHQHQ....pQ+phQHpHQpQpQp.QHQpQHQpHVsssppVYQsRItS.SDpD-uulHNLDDsNVRV.AAAAAAAAAALQSR-sHsoE-LKhshtpsQD--ul-sss.sSKRQLHRp-RDRVAEALKMATRDILSNQsVDSDVNVDVDLVTGHKQLSPHDDMAEQLRLLSSHLKEVEAEENVSDsNLKKDDl.DENIQPELRGQ ............................................................................ss....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pt...t............................................................................................................................... 0 3 17 32 +1348 PF02179 BAG BAG domain SMART anon Alignment kindly provided by SMART Family Domain present in Hsp70 regulators. 23.40 23.40 23.40 23.40 23.10 23.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.53 0.72 -3.87 63 795 2009-01-15 18:05:59 2003-04-07 12:59:11 11 16 274 24 498 779 0 77.20 24 25.51 CHANGED plpplhsclpp.ltsplpph.........tsp...ppcpchhhls.EhLhpt.LlcLDul.p..spGp......slRptRKphl+clQshlcpLD....thp .......................................................lpplhpclpp.ltspltph....................................ts.p...pppcphhtlp..EhLhp..LlcL.Dul.c......spGpt.......slRptRKphl+clQsllcpLDth.t................. 0 102 206 335 +1349 PF02923 BamHI Restriction endonuclease BamHI Griffiths-Jones SR anon Structural domain Domain \N 22.80 22.80 27.00 27.00 22.70 22.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.01 0.71 -4.30 3 18 2012-10-11 20:44:43 2003-04-07 12:59:11 10 2 15 9 8 16 2 136.40 41 74.24 CHANGED M+VcNcEILl-sGclssc...IpSIhsEVcsSIcsslWPAsScsFsINsT+Ku.NGVKPIK-pCMpHLc.paGWaLEK+LDIhKs-pKP.GPIDAVK.l....usKtFAlEWETGNISSSHRAINKMlLGMLcGcIIGGILILPSRpMYsYLTDRVGNFcELcPYF-l ........................hcl.pp.hh.pttp..h...pp...htphhs-lhpuIps.haPssspsFhlNs..scKs...NGVhPIKp.ChphLc.phsWhhEp.lcl.hhptp..GPlDshK.h....p.+hhuhEaETGNISSuHRuhNKhlLGlhcG.l.hulllhP.+phh...Y...LTDRluNFcELEPYFp.......................... 0 4 6 7 +1350 PF00373 FERM_M Band_41; FERM central domain Bateman A anon Bateman A Domain This domain is the central structural domain of the FERM domain. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null --hand HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.53 0.71 -3.95 56 4264 2009-09-17 09:40:39 2003-04-07 12:59:11 13 246 174 84 2246 3702 4 127.30 24 12.59 CHANGED -.t...hp-shphphhYhQs+psllp...sch...........ssp.-pshhLAuLthQhphG.chs.tpptsts..htt..th...................................................................................................................................................................................ls....................................hhpphppcp.hppclhptap.phps.........h..ottcAch....palphspp......L.stYGsphF ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hp-phs..hhhha.l..Ql.+pcllp.......Gcl...........Cs....css..hhLAu..htl.Q..s..chG...Das...pt.p.tt..s..hh.tpt.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhP.................................................................t...h...p...s..c.........p.......h........cc.c.lh....phap....ph.p.u..............................h.....o.tc.Ach.........paLp.h.sp.p......L.thYGlpha...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 564 750 1407 +1351 PF03114 BAR BAR domain Bateman A, McMahon HT anon Psi-blast P25343 Domain BAR domains are dimerisation, lipid binding and curvature sensing modules found in many different protein families. A BAR domain with an additional N-terminal amphipathic helix (an N-BAR) can drive membrane curvature. These N-BAR domains are found in amphiphysin, endophilin, BRAP and Nadrin. BAR domains are also frequently found alongside domains that determine lipid specificity, like Pfam:PF00169 and Pfam:PF00787 domains in beta centaurins and sorting nexins respectively. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.52 0.70 -4.85 28 2173 2012-10-03 12:17:00 2003-04-07 12:59:11 13 82 313 19 1287 2310 1 214.10 18 40.80 CHANGED hpKp....hsRssQhhppKhGtuEpTc..DccFcphEc+hcthpptspclhccsptYlps..shtscph......................hLupshhchucphupcss...u...........sh.phupshcpl.uphhcshptplcpshlcPlpphh.sphpslp+plcKhps+hLDaDsp+p+hp+hp...t.................--EhctAppca-chs-.lpppl.slhstcsphl.splpshlptphcaappshphLpplptpltt ....................................................................................................................................................h..p.h...t...p..h..s.................s...p.t...Tp......-.......t.......p.a.pp.h...p..p..hp..hpp.........spcl.cph....p....t....h...lp...s..........s.h..ptpph.................................................................................hlup.sh..c..h...h..pth..s.......ttpt.......s.................................................................hh....ph.spshc..pl....sp....h........h......p..p.h...p........tp...............l...p...pphl...p.PLp.phh..spht....sl.......p..c...............th+.......K....hpp...+...h.......l.......D.aDt....t+p.+.h..p........p.hpt..tp..t...........................................................-cElc..pA..p.ppa-...phs.......ptl.....p.p.......cl........p........l...h...s..t.c.h.phh.sp.hps.h.h.....p...hp....h...p....aappsh....phhppl.....t........................................ 0 333 558 919 +1352 PF01337 Barstar Barstar (barnase inhibitor) Finn RD, Bateman A anon Sarah Teichmann Domain \N 21.00 21.00 21.30 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.93 0.72 -4.14 52 1251 2009-09-10 16:56:04 2003-04-07 12:59:11 13 10 1143 47 253 726 26 85.60 29 72.31 CHANGED ltlDhsphpscpslhptlupph.sFP.saaGpNhDALaDsLosh............sphshhlhhpthsphpph..cphtt...lhplhc-spcph...ss.t..hp .......................hhDhsclpspp-hacphupsh..uhs..pth..G....c.NLDuLaDsLts..................hhshPlpI.asphsp.t..p+............c.c..a.ss...Ll.lhc.-.Ap-ch...tt......thh......................................... 0 59 152 209 +1353 PF00967 Barwin Barwin family Bateman A anon Sarah Teichmann Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.95 0.71 -4.43 8 153 2012-10-01 21:39:58 2003-04-07 12:59:11 12 6 58 2 47 226 1 108.50 60 69.81 CHANGED SAoNVRATYHlYNPtp.NWDLs..AsSAYCAT..WDAsKPhuWRpKYGWTAFCGPuGPRGQuSCGKCLRVTNTuTGApsTVRIVDQ..CSNG....GLDLD.ssVFpplDT.sGhGYQpGHLsVNYQFVsC ...........................................AsNVRATYHhYsPtp.......sWDLs..ssSAYCuT..WDAs...KPLu.WR.pKYGWTA..F.C.G...P...s..G......PpGpsuCGK...CLp......VT.....N.....T....u...TG..Aps.........TsRIVDQ...C....SNG.......G...LDLD.ssVFpplDT.sGtGhppGHLhVsYpFVsC............... 0 6 28 38 +1354 PF04865 Baseplate_J Baseplate J-like protein Mifsud W anon Pfam-B_4777 (release 7.6) Family The P2 bacteriophage J protein lies at the edge of the baseplate. This family also includes a number of bacterial homologues, which are thought to have been horizontally transferred. 20.50 20.50 20.60 20.50 20.10 19.80 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.59 0.70 -5.03 136 2290 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 1421 0 441 2037 229 212.60 22 56.27 CHANGED pustL.Dphuth.hs..ltR..hsss.A.ssshh.sss...................................................................................................................usshEsD-s.......hRpRlhhshcshss....uGspssYphaAh...........................ss.ss..............Vscutshss...............u..............................sGs..Vplslls.p...................su......hssspllst.....................Vpshl...........pt-sl..........RPlsc.pVpVtusphhshslssphhhh..sss.spsht.sspps...lpsahtstpph..........Gts...lhhSt.l..hus ..................................................................................t....Lt.hst..hs..h..Rh.ht.u...p.s...h...h.........................................................................................................................................................................t............................................................Gss.....hEo.....Dps.......h.RtRh.t..s..hc.s..hss....uG.s...p..s.sYpha.Ah............................ss..st..........................Vs.cstshss............u...............................................................................................sus.....Vp....l.hllsp..........................su..........hsspsllst..........................................................................Vpphl.........................-sh....................+Pl....s.....s...pl..pV.tu.sthhshplpspl..hhh...sss.p.......sp..sht.....shp...tt........lpta.ht.t...th...........stt...l..stl..................................................................................................................................................... 0 128 281 374 +1355 PF01586 Basic Myogenic Basic domain Bashton M, Bateman A anon Pfam-B_427 (release 4.1) Family This basic domain is found in the MyoD family of muscle specific proteins that control muscle development. The bHLH region of the MyoD family includes the basic domain and the Helix-loop-helix (HLH) motif. The bHLH region mediates specific DNA binding [1]. With 12 residues of the basic domain involved in DNA binding [2]. The basic domain forms an extended alpha helix in the structure. 25.00 25.00 27.10 26.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.47 0.72 -3.26 7 456 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 147 4 145 408 0 84.90 40 34.53 CHANGED MELh.s....hs.......hF...s.pp....FYDupsh.sschp..hhEsht...sp.ssLp.........P-s.ptppE-EHltAPs....cHpsG..pCLhWACKsCKRKosssD ...................................................................................................................................................................F.............ah.-...s...sh..sc.hp...hh-s..s....h.p....s..sL.p.........................s-.t....p...s.s....p...--..E...HVtAPs.............s.pHpsG....pCLhWACKsCK.RKosssD... 0 21 30 72 +1356 PF02028 BCCT BCCT family transporter Mian N, Bateman A anon IPR000060 Family \N 20.10 20.10 20.30 20.30 19.80 20.00 hmmbuild -o /dev/null HMM SEED 485 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.54 0.70 -6.25 145 4820 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 2128 17 801 3301 1081 459.80 34 85.74 CHANGED VFhhSshllhhhllhsl.....hh.scthpshhsshhsalsspFGWaYllssshhllhslhlAhS+.aGpl+L.GsscscPEaSthoWhuMLFuAGhGlGLlFaulAEPlhHatss................P...ts......psto...pAA.p.AhthoaFHWGlpuWAlY.ulluLulAYata+.+shP................hplposh.hPllG.c..+.hpGs....lGcslDllAlluTlhGlATSLGhGshQlsuGLstlh.G.......l...s.s......shss.plhllsllsslhslSshoGls+..GI+hLSplNlhLAhhLllFlllhGP.ThallsshspslGsYlpsh............hthohhh...ts...hs......sss.................................................WhssWTlFYWAWWIuWuPFVGhFIARISRGRTIREFllGVlllPslhshlWaulFGuoAl.........thph.....puss.slss.............sttss.ssulFshlpph.............Phutlh....uhlsllLlhlFalTSuDSuohVluhloosG...st..sPsthh+lhWulhhuslAhs.LLhs..GG.......LsuLQsssllsALPFsllhllhhhuL.....................h+sLpp- .............................VFhhoh.h.ll.h.h.h.s.h.h.s.h.....h.h.s.c.....t.stthhss..................h.hsalspshGWhallhsslhlhhsl.hl.s.....hSc.aGpl+.L.G..t.p.pcPEaShhSWhuMLFuAGhGluLlFaGsAEPhhaahs.......................................P......sh.........ps.to.tpAh...p.AhthohFHWGlpuWAlY.ulsuLslAY.Ftap.+s.h.P................h.p...lpusl.hPllG..c....+....h....p.G........l.G.chlDlhullATlhGlAToLGlGs.QlssGLphLa..u.......................l....ss..............shth.phhlIslhshlhs..lSshoGlsK..Gl+hLS.plNhhLAhlLll.hl.l.....l.....h.....G.....P.....T..hal...hs..sh...ssslGs.Ylp.s.h..............hph.oh.t..ts...hs.....pss.................................................................W..h.p.sWTlFYWuW.WluWuPFVGhFlARISRGRTIREalhGslllPshhshlWaulFGs.sAl......................hh..h..............ps..hh....slsp...................................ttss..ptu.la.thhpph...................................P.h..u..p.lh....shlshllhhlFalTouDSuohlluhhosps......................s..cs...s.hhhRlhWulhhu...............lluhs.LLhs......GG...........LpuLQsssllsulPFshlhlhhhhuhhKslpt................................................................................................... 0 247 506 695 +1357 PF02327 BChl_A Bacteriochlorophyll A protein Mian N, Bateman A anon Pfam-B_38317 (release 5.2) Domain Bacteriochlorophyll A protein is involved in the energy transfer system of green photosynthetic bacteria. The protein forms a homotrimer, with each monomer unit containing seven molecules of bacteriochlorophyll A. 25.00 25.00 234.40 234.10 19.20 18.50 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.33 0.70 -5.83 2 70 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 28 4 12 70 0 290.90 78 98.43 CHANGED TTAHSDYEIILEGGSSSWGpVKGRAKVNVPsA.PLLPsDCNl+IssKPLDstKGhVRFoshIESlVDSsKNpLsVEsDIANETK-RRIsVGEGSloVGDFSHSFSFEGpVVNhaYYRSDAVRRNlPNPIYMQGRQFHDIlMKVPLDNNDllDTWEGh.pulpusGu.FsDWIREFWFIGPAFsAlNEGGQRIS.I.VNS.sspuGEKGPVGVoRW+FSHuGSGlVDSISRWsELFPs-pLNKPASlEuGFRSDSQGIEVKVDGphPGVShDAGGGLRRILNHPLIPLVHHGMVGKFNDFTVDTQLKIVLPKGYKlRYAAPQFRSQNLEEYRWSGGAYARWVEHVCKGGTGQFEVLYA .........TTAHSDYEIlLEGGSSSWGpVKuRAKV.NVPsA.PLLPADCNlKIssKPLDstKGhVRFousIESIVDSTKNKLsVEsDIANETK-RRIuVGEGpVoVGDFSHoFSFEGSVVNMYYYRSDAVRRNVPN..PIYMQGRQFHDIlMKVPLDNNDLIDTWEGh.pulpusGA.FsDWIREFWFIGPAFTAlNEGGQRIS.IpVNu.hssESG..-KGPVGVSRW+FSHuGSGlVDSISRWAELFPsDpLN+.P.......ASlEuGFRSDSQGIEVKVDGshPGVShDAGGGLRRILNHPLIPLV.................................................................................. 0 2 3 9 +1358 PF00452 Bcl-2 Apoptosis regulator proteins, Bcl-2 family Finn RD anon Prosite Family \N 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.42 0.72 -3.89 72 878 2012-10-03 11:38:54 2003-04-07 12:59:11 14 11 175 146 362 880 0 97.20 28 43.78 CHANGED Lpplu-clppphpph..apshhpplpl.s.tp..shph....FppVupclFps...........slNWGRlVulhsFuutlsh................chhpptt...sth...lpplsphhspal.ppphssWIpppG.GW ..................................................LpphG--hppp.hpp...apshhppLp....h.......sstp......shpt.......FtpVsp..-....lFpc.................................slNWGR.lVuha..sFuutlsh..................cshppph......tsh.................lp.plspahs.p.al.pcplt.s.W..ItppG.GW.............................. 0 81 111 190 +1359 PF04538 BEX Brain expressed X-linked like family Finn RD anon Pfam-B_3086 (release 7.5) Family This is a family of transcription elongation factors which includes those referred to as Bex proteins as well as those named TCEAL7. Bex1 was shown to be a novel link between neurotrophin signalling, the cell cycle, and neuronal differentiation, suggesting it might function by coordinating internal cellular states with the ability of cells to respond to external signals [2]. TCEAL7 has been shown negatively to regulate the NF-kappaB pathway, hence being important in ovarian cancer as it one of the genes frequently downregulated in this cancer. A closely related protein, TFIIS/TCEA, found in Pfam:PF07500 is involved in transcription elongation and transcript fidelity. TFIIS/TCEA promotes 3' endoribonuclease activity of RNA polymerase II (pol II) and allows pol II to bypass transcript pause or 'arrest' during elongation process. It is thus possible that BEX is also acting in this way [2]. 26.10 26.10 30.10 31.20 23.40 25.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.38 0.72 -3.79 20 315 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 27 0 152 306 0 126.00 29 85.30 CHANGED M-.......KsppENEtcsp.sps+sE-t.............................................................................ppPhtss-tpcscGshRc.+ltcsh.pF+tDI.sRplss-Ehhct.sD-hpRhhEEhRclRpKht...............shHacp+cspsh. ..........................................................................Mp.psppENEsc.p..stspsEct..............................................................................................pcPhtss.spcsc...ssh+c.thtpsh.pa+.EDl.sRplssE-MhRt.s--hpRh.EEhRcl+pKht...............shHapp+cphsh....................... 0 14 14 19 +1360 PF04714 BCL_N BCL7, N-terminal conserver region Mifsud W anon Pfam-B_5900 (release 7.5) Family Members of the BCL family have significant sequence similarity at their N-terminus, represented in this family. The function of BCL7 proteins is unknown. They may be involved in early development. In addition, BCL7B is commonly hemizygously deleted in patients with Williams syndrome [1]. 25.00 25.00 29.00 28.30 19.50 17.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.48 0.72 -4.39 2 196 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 90 0 108 180 0 45.70 73 23.78 CHANGED hsRShRAETRsRuKD-l++Vhpul-KVR+WEKKhVhIpDTsh+IYKWVPluu ........Ro..lRAETRSRAKDDIK+VMsslEKV..R+W.EKKWVTVG..DTSLRIaKWVPVo.p...... 0 23 32 61 +1361 PF01869 BcrAD_BadFG BadF/BadG/BcrA/BcrD ATPase family Enright A, Ouzounis C, Bateman A anon Enright A & Pfam-B_5854 (Release 7.5) Family This family includes the BadF Swiss:O07462 and BadG Swiss:O07463 proteins that are two subunits of Benzoyl-CoA reductase, that may be involved in ATP hydrolysis. The family also includes an activase subunit from the enzyme 2-hydroxyglutaryl-CoA dehydratase Swiss:P11568. The protein Swiss:O66634 contains two copies of this region suggesting that the family may structurally dimerise. This family appears to be related to Pfam:PF00370. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.60 0.70 -5.10 19 4290 2012-10-02 23:34:14 2003-04-07 12:59:11 15 24 2197 20 1108 3723 293 259.00 21 58.92 CHANGED lGlDuGuTso+sllhs........cpup.lhspshssu..................................................................sshptss.p.s.t.h.culppshcpuGhshp-...............................................................................................lthh..sssGhGhsshs..hstchhht...........chsspscGshhhhsssp....uVlsIuGpsopslthc.sGpstsashtuhshuspGphhtlsuctlshslcphsulstpus............................ps.tthsutssshutp.hhsthuus.tutcIlsshspslutplhshhpphushtt.......lhhsGGlups.shh.tl...l.pphhph..h.h.s.sthsuAlGAuLhA ........................................................................................................................................................................lGlDsGSTss+s..l...l..h.s...........p.s...t...p...l...l......h...p.h..h...t...h.s..................................................................s.sh.......................................h.......p...sl.p.p.h.h...p...p...h....t....h..t.....t...............................................................................................................................................................h..t..h..h............s..s..s.G....h....G......t.t.t...h...t..............ht...ph.hh...................................Elhsp..sc...u..s.t...t.....hh......s..sss......................sll.-..IG.G..pDu...K.sl..t..l.........c..........s.......G......t.....h...........t......p.............h......t.h..............ss..............tCu.u.u.s.Gu..a..lc.h..h..u..p.t.....L.p..h...s.l..p.....p..h......s....p..hu..psp.........................................................................................................................ps...hsl..s..u..c....C...s.V...F...A...p....o.p..l...p...s...h.....p.t.ss......s.t.pcI.h...suls.p...u.lsp....p....s....h..s..h...l...t....p...h...p.......t.........................l.shtG...G.....s.....h..t..s.............h.....h....p.t.....h.............p....t..h.......t..............h..........s...t..h.......sshGAsh........................................................................................................................................................................................................................................................................................................................................................................................ 0 477 821 983 +1362 PF03170 BcsB Bacterial cellulose synthase subunit Mifsud W anon Pfam-B_3954 (release 6.5) Family This family includes bacterial proteins involved in cellulose synthesis. Cellulose synthesis has been identified in several bacteria. In Agrobacterium tumefaciens, for instance, cellulose has a pathogenic role: it allows the bacteria to bind tightly to their host plant cells. While several enzymatic steps are involved in cellulose synthesis, potentially the only step unique to this pathway is that catalysed by cellulose synthase. This enzyme is a multi subunit complex. This family encodes a subunit that is thought to bind the positive effector cyclic di-GMP. This subunit is found in several different bacterial cellulose synthase enzymes. The first recognised sequence for this subunit is BcsB. In the AcsII cellulose synthase, this subunit and the subunit corresponding to BcsA are found in the same protein. Indeed, this alignment only includes the C-terminal half of the AcsAII synthase (Swiss:Q59167), which corresponds to BcsB. 20.10 20.10 20.40 20.20 19.80 20.00 hmmbuild -o /dev/null HMM SEED 605 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.74 0.70 -6.50 55 1096 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 931 0 189 915 40 545.50 33 75.33 CHANGED sss...sspspshsFtplGh..sslsLpGspsptslsFslpsD.clVosApLpLsao..SPuLlsshSp....lpVhlNsplluslslsppps...phsshplslsPt.hhs..saNplplchhu+asts.CpssssssLWssluss.SpLsLshpslsl..tscLuhLPsPFFD..............tpstptlslPhVhsssss.splcAAuhlASahGshAs.aRstsFPshhs.......slPsps.....ulVhuossphsshl....shPs..h...sGP.......slthlssPssshs+lLlVhGRssp-LppAupuLshus..sshsGssstlsp......sRpPYDAPsal.o-RslphuELst....spsLpssGhtssslplshpl......PPDLahhpspslslsLcYRYoss...stssSpLslslNsphlpohsLss.t........h...hhspsshssppplplP...shhlsupN.......pLphcFshss.psu....tCpsstss..s..+usIss-SoIDhSsh.HahthPsLssFAsuG.aPFoRhADLS..cTslllPsp.....PsssslsshLslhGphGttTGhPshslplsss.....ss.ssh.pcc.DlLllGshss..shhhph.........sspl.shhhssspth.ht.s.....h.tth.h...t...ssttssssthsls...usushuslhGhpSPhsspRo........lVulh .................................................................................................................................sts....ssppsploFsplu..ssshsLpG..hss.s.u.slpFshcuD..clVopAhLsLpYo.sSPuLl.ssp.Sp.........LpVh.lNsph..hus...l..s...lsccph.....p..........pshtphsIssh.hls..caNpl...pl..c...hl...u....+apc.....s...C.Esssos.............sLW.lclsps.SsLsLsapt.lsl...ps-LucFPsPFaD..............s.c.c.s.p..s...slsh.V.hs...s....s..Psss.......pAuu.llAS..WFGutus...aRuppFPVh.hs....................pLPsps............................ul..Vhuos-c..hPshLp.........stPs.....l......puP...............................slph...lsp..Pp.........s........s...h.sK.L.LVVh.G+s..-c-LhpAu+ulApus.........hhhpGpsssVsc..p.h.sRpPYDAP.sWlpoDRs.l.......shu...ELtp..........ppLp.ssGht.......ssslslslsL...PPDLahhcussls..hclpYRYo.s..........sp-sSphslslNsphlpohsLss.t...............hhpshh.ssctslsIP...slpluupN............pLchcFphh....s.....hsu..........sChshtss...ts.+ssIs.s-SoIDhSp.hhHah.hPcLt..hFuNuG.FPFoRh.ADL.S...pTh...sVhPcp.....PspsphpsLLshhGhlGs..tTGhsshsls.lscs.....ust....ts..cct.DlhllGshss....hhp...........................scpl..shhhpss.psh...htps............h...............tttttspst.thp....sssshsslhuhpSPhpstRsllsl.......................................................................................................................................................................... 0 37 85 137 +1363 PF02138 Beach Beige/BEACH domain Mian N, Bateman A anon IPR000409 Family \N 21.00 21.00 21.20 21.80 20.60 20.80 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.94 0.70 -4.95 145 1432 2009-01-15 18:05:59 2003-04-07 12:59:11 13 80 283 6 960 1325 26 252.20 44 12.87 CHANGED T..pcWtptclSNF-YLhhLNhhuGRSaNDloQYPlFPWllsDYpSp..pl....Dl...s..sspsaRDLoK.PhG....uh.spcRhppht.pphpp.htp.t.....................................a.hYuoaYS..sshsVhhaLlR..l-PF.oshtlplQuGcFDhssRlF....pSlppsaps.s...ps.sDh.+EL...IPEFF.......hhPEhLhN.NphshG.h.....psp.lsDVhL...PsW..u..ps.st.....cFlhhpRcALEScaVSppLppWIDLIF....Gh+Q+GptAh.ps.NlFp.hsYpstht......thps...ppp...thpshlpshGQ....sPt.QLF.ppsHPpR .....................................................................pcW.p.tc..ISNF-YLhhLNslA..GRoaNDL.sQYPVF...PWllsD...Y..sSc.....pL..................DL.s.......sPpsa...RDLSK..PhGs...s.cR.hptht.c+Ycp..hcp....................................................................s.a.HYGoHYS.ou.t.hVhhaLlR.h...pPF.ophhl...pLQu.....G.p.FD...h...s...DRhF..................pSlt.psWps....s.......pshsDV..+EL......IPEFa......................a.h.P....E.hL.hNtst.hs.hGhh......................pssphlsDV.LPsW......A.......ps....Pc.................cFlt.hpR.pAL.......ES-aVSpp.....LHp.....WIDLIF....GY.KQ......p.G...tAl....cAhN.VFa.hoYpGsssh...............................ptlpD...h.+puh.s.IpsFGQ.sPp.QLh.ppPHP.R.......................................................................................... 1 439 568 771 +1364 PF00407 Bet_v_1 Bet_v_I; Pathogenesis-related protein Bet v I family Finn RD, Radauer C anon Prosite Domain This family is named after Bet v 1, the major birch pollen allergen. This protein belongs to family 10 of plant pathogenesis-related proteins (PR-10), cytoplasmic proteins of 15-17 kd that are wide-spread among dicotyledonous plants [1]. In recent years, a number of diverse plant proteins with low sequence similarity to Bet v 1 was identified. A classification by sequence similarity yielded several subfamilies related to PR-10 [2]: - Pathogenesis-related proteins PR-10: These proteins were identified as major tree pollen allergens in birch and related species (hazel, alder), as plant food allergens expressed in high levels in fruits, vegetables and seeds (apple, celery, hazelnut), and as pathogenesis-related proteins whose expression is induced by pathogen infection, wounding, or abiotic stress. Hyp-1 (Swiss:Q8H1L1), an enzyme involved in the synthesis of the bioactive naphthodianthrone hypericin in St. John's wort (Hypericum perforatum) also belongs to this family. Most of these proteins were found in dicotyledonous plants. In addition, related sequences were identified in monocots and conifers. - Cytokinin-specific binding proteins: These legume proteins bind cytokinin plant hormones [3]. - (S)-Norcoclaurine synthases are enzymes catalysing the condensation of dopamine and 4-hydroxyphenylacetaldehyde to (S)-norcoclaurine, the first committed step in the biosynthesis of benzylisoquinoline alkaloids such as morphine [4]. -Major latex proteins and ripening-related proteins are proteins of unknown biological function that were first discovered in the latex of opium poppy (Papaver somniferum) and later found to be upregulated during ripening of fruits such as strawberry and cucumber [5]. The occurrence of Bet v 1-related proteins is confined to seed plants with the exception of a cytokinin-binding protein from the moss Physcomitrella patens (Swiss:Q9AXI3). 20.80 20.80 20.80 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.78 0.71 -4.60 34 1250 2012-10-02 19:24:03 2003-04-07 12:59:11 14 8 180 62 290 1394 4 142.70 32 95.24 CHANGED uhsGph.sElplpssAc+aa+hapt.cschlPcshsc.tIpulclhEG-hsss..GoI+pWsash.-Gc.cshKE+lEhsD-p.pslsapslEGclhpcaKpatsshphh.PKspGs.slsKhohcYEKhs-css.Ppchlch....stplsc-l-saLlus ...............................................shptEhss.s.lsss+la+u.hhh.-s....ssllPKl.hPp.slpolEh.l.E....Gs..G.GsG...........TI..Kcls.F.....s.............c....u..........s...h.pa.s..Kc+.l-tlDcs....NhthsYoll..E...G.s..s.l.s..c.t...lc.p.l.s.hc..hKl...s...sss......G...G...S.lhKhosc..Yc...s...ps..stt...............pc..hch........stslh+slEsYLlt........................................... 0 47 183 242 +1365 PF03494 Beta-APP Beta-amyloid peptide (beta-APP) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 40.80 39.70 19.20 18.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -7.73 0.72 -4.48 2 171 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 48 133 33 165 0 38.90 89 6.25 CHANGED .tc-pt.EVaHQKLVFFAEDVuSNKGAIIGLMVGGVVIA ...FRHDSGYEVHHQKL..........VFFAEDVGSNKGAIIGLMVGGVVIA. 0 1 4 11 +1366 PF00144 Beta-lactamase beta-lactamase; Beta-lactamase Sonnhammer ELL, Bateman A anon Prosite and Pfam-B_106 (Release 7.5) Domain This family appears to be distantly related to Pfam:PF00905 and PF00768 D-alanyl-D-alanine carboxypeptidase. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.82 0.70 -5.42 127 13942 2012-10-02 21:13:33 2003-04-07 12:59:11 19 160 3791 302 3910 15232 5643 321.50 17 75.55 CHANGED lpphhpphht.............................hsGhs............lslhpsGchhh.th...............u....................shss......cohFtluSsoKshsussl.hthscpG................t.........lsL-csl.......................pcals....................t.stptlTlpcLhsaouGh........h.tt........................phhthhtth...h............sGpth..Ysssuhs.Llutlltpss..G.ps.....htphlpcplhps.................h..shpcsthssststptphstsh..t..............................................................shhusspDlt+ah.....................h....tthhsttthpph..........hshhtsststssthttsh....uhGahlsstst............................hhtpsG..........shushhhhsspp..........slslsh...lsNptt..ttttttsthhhthst .........................................................................h.........................hsG.h.s................l.h.l...h..p....s...s..p..h.....h.h.p.ph.h...................................................Gh...................ts.h..t.t.t.............s.h.ss.................s.o..h..apl.u...SloK..s.h....s..u..s.hl..h......p....L.h..pc.G..............................p..............l.s..L..c..c...s..l...................................................................................................sca.lP.......................................t....t....s...t..p....t....lT.l...pp.....LL...s.H..o.....o..G.....l..s.........................t...t.h.h.ptt.................................................................tp.hhp..h...h..t..p...h.......h..t..ht................................................PG..s...p..ht...Y.u...s.........s..shh..lLuh.....l...........l..c.pso...........G..ps....................................hp..p.h...h..p...c..p..l.....hpP.............................................L.....u..h...p....p.....o........t.....h.......t.....s......s.....t.....s....t.............t....t......h.....s...h......s...h.............................................................................................................................................................................................ssuulh.oos.pDh.....t+ah.................................................h...t.t.t..h....t.t..p..h..h.p..th........................................h...h...h...t....h....h..........s....s...t........t..t..s.h.........................uh..G.a..h.ht.t.tt............................................................................................thh...t.+s..G..........thus.....h..h...h...h.....Ppp....................................ph.sls...h....h.s.Nt...................thh........................................................................................................................................................................................................ 1 1339 2525 3318 +1367 PF01212 Beta_elim_lyase Beta-eliminating lyase Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.74 0.70 -5.27 53 4088 2012-10-02 18:26:03 2003-04-07 12:59:11 16 26 2725 70 1143 27402 8234 301.90 26 73.88 CHANGED DLtSDoht.s.TsuMpcsh.stA..hGD.-.s.YusD........sstcLccpss...........-..............................lhGh-.ss..l..F..............ss.o.GTtANplul....hshs.p....pphlstcs.....uHhahcEs.uuhs.l..uGspshsl...ssp...s.............Gshslsclcp.tlct..s.........a.spssllslpsTps...Gpllsh.cplcplttls+cpG.......l.lHhDGARhhs..Au............sslsssl+Ehs..uh.sDulshshoKshssslG.ullshscc...........................................................alppthch..p+....hLGuth....R..psG....llsA......................................uGlhul-ps......hhphtcDpcp..AcpLup...tlpt........lsh......s.hsp..hs....spsphshltts ............................................................................................................................................................phhoDs.....ss.t.h...h.puh....h...t.....s......hG..D......-....s....Y....G....s....D..............s..s..t....p....L..p....c....t....h.t................c.............................................................................................l...h...G....h....c......su.......l....F........................................hs....s...G...T......t......A...N........l...s.l.......................s..h....h....c..........p....t...p....p.....h...l...s.sps............................u.H.h..h......h.....p......E..s......Gu..h....t..h........s........G......h....p....s...h....s....l.s.....s..pp.....................................................G..p...l...s...h...c..p...l.c...t...tlct.s.....................H.h.s.p..s..t.h.l.s...l...p..p..T.ps.........G..p..l.......h.....s.....h......p......p......l...c..p....l......t.p..h..s...+.....c...p...s...............ls...l..a........hD...........G.....A......R....l..h..p.......A.s..............................................ss..h..s...h....t..h...p...-...h.s..........p.h....s..D......s....h...s...........h........s...h...o....K....s...h...s....s...s....h..G....u......l.l.s...h.s.cc.......................................................................................................................................................h.l.t..p...t....h..p...h......t+.............h.h....u....u....t....h...........+.........p...t.....G........llsu........................................................................................................u..u..h...h....u..l...c.s.........h..h..h..t...c....s...p..p..p....A.p.p.l..sp....t..l.pt........h..s.h.....h..h...p.....s.....opthhs.....h................................................................................................................................................................................................................................................... 0 371 694 965 +1369 PF02929 Bgal_small_N Beta galactosidase small chain Bateman A anon Pfam-B_592 (Release 6.3) Domain This domain comprises the small chain of dimeric beta-galactosidases EC:3.2.1.23. This domain is also found in single chain beta-galactosidase. 19.80 19.80 19.90 20.00 19.10 19.20 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.69 0.70 -5.38 21 2500 2012-10-02 23:57:29 2003-04-07 12:59:11 12 42 1440 273 453 2175 266 257.00 27 27.63 CHANGED slp.utshphhFs+psGtlpshh.hpGpchLhp..sspssFaRAPTDNDhGss.s.ch......tp............WpsAGh.phppcl..h.s.ptt..............s-shlplphsathsshh...htsphsYplpssGplplslshpssts....lP..lPRlGlphtlsps.hsslpaaGhGPtEsYsDRppuuphGpapsslsp.hssYlhPQEsG.+s-s+hlslpttt.........stlhlsup.....FsFus...YotppLpp.ssHhp-L.tpctshLplDttphGVGG.DSWussVpspYpLts.psapasaslp .......................................................................................................h.lpstshph.Fs+.t.s.G.hlspht...h........ss....p.......p.....h..lh..p........s..p........sF.aRA.s..hD.....ND....hs.....t.tsh...........................t........................W.p.s..s.s.ht.t..h.p.t...ph..t.h.ts.pp..s......................................................sts.....lh..s..p..h...s.h.....h..........h.s....s.....h..h...hpsphpYp...l....s..s.........s...G...p.....l...pl..s..h..p..h..p..h.ts.s...............hP....p...lP+lGhph....tl.....s......tp....h....sp...lpaaGhGPtENYsD+ppuuhhuhap...s.s...ls....p..h..a...p.....sY..lhPQEsGt+pssRahsl.tstt......................................sul.hlpup...............t.h.p.Fus............Y.......ot........p....p....L................p.................p......s........p......Hp...p..-L..p........t.....p..c..t...s..hL.slDttph.GlGu.sSWG..s..p..l..h..s..p..a..pl.h...pp..apaphhh.h......................................................... 0 131 310 401 +1370 PF02180 BH4 Bcl-2 homology region 4 SMART anon Alignment kindly provided by SMART Family \N 20.50 20.50 20.50 20.90 20.10 20.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.60 0.72 -6.77 0.72 -4.42 11 223 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 58 92 80 246 0 25.40 54 12.65 CHANGED huhssRpLVhcFlsYKLuQ+Gashtpt .....u.sNRELVhcalsYKLSQ+GYsWst.h..... 0 5 12 29 +1371 PF02368 Big_2 Bacterial Ig-like domain (group 2) Bateman A anon Bateman A Family This family consists of bacterial domains with an Ig-like fold. Members of this family are found in bacterial and phage surface proteins such as intimins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.64 0.72 -4.09 46 5992 2012-10-03 16:25:20 2003-04-07 12:59:11 13 651 1569 19 1313 5130 392 78.80 23 17.92 CHANGED sssplslsss......ssuhh.utshphsssth.ssuss......tphoWpS.sNsplAoVs..........ssGhV...ouhs..pGsssIsssssc..spssshTl ............................................................................tlpls.s.........phsl..h..h.G.....t.s.....h.p.l.s..s..s..h.....t...sssuss.............ppl.s....W....s....S.....u.....s.....s.s.l.A...oVs...............tsG.tV...........s.uls.....tG...s.....s.s..Is..s...s.s.ss...s..sh.......................................... 0 641 1057 1180 +1372 PF04775 Bile_Hydr_Trans Acyl-CoA thioester hydrolase/BAAT N-terminal region Kerrison ND anon Pfam-B_2191 (release 7.6) Family This family consists of the amino termini of acyl-CoA thioester hydrolase and bile acid-CoA:amino acid N-acetyltransferase (BAAT) [1]. This region is not thought to contain the active site of either enzyme. Thioesterase isoforms have been identified in peroxisomes, cytoplasm and mitochondria, where they are thought to have distinct functions in lipid metabolism [2]. For example, in peroxisomes, the hydrolase acts on bile-CoA esters [1]. 21.30 21.30 22.50 22.40 19.60 20.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.52 0.71 -4.42 37 429 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 155 4 254 436 2 126.40 34 30.24 CHANGED hD-.lpItlsGLsPsp.VTlpuphp..c...........-.....pGthapShAtapAsppGpVDLscsssl.uGoYpGl-sMGLhWShcP...p+hshthhppsl......h.pshhlplpshssp............lupsphcRhahusGVpRh.VcEs .......hDEslpItlpGLsPtp..VTlcuphp..c................-..pG.s.hapupAtYpADppG.plD..Ls+ssul...GGoYsGl-PMGLhWohcP........p+sh.h...+..hh+psV..........sPh.hVplplhsup................thlupsshcRha.huPGVpRhsV+-.................................. 0 37 72 171 +1373 PF03496 ADPrib_exo_Tox Binary_toxA; ADP-ribosyltransferase exoenzyme Griffiths-Jones SR anon PRINTS Family This is a family of bacterial and viral bi-glutamic acid ADP-ribosyltransferases, where, in Swiss:Q93Q17, E403 is the catalytic residue and E401 contributes to the transfer of ADP-ribose to the target protein. In clostridial species it is actin that is being ADP-ribosylated; this result is lethal and dermonecrotic in infected mammals. 23.40 21.90 23.50 22.10 23.30 21.80 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.33 0.71 -5.09 26 558 2012-10-01 23:25:29 2003-04-07 12:59:11 9 24 360 74 64 557 16 184.60 17 40.09 CHANGED AcpWGpcthcpatpp........................ossE+pAlptYTptsYpcINshL..Rpspsph.th...splpcp...........lcpl-uAhpKs.slPcsIhVYRtss.t.ht...t......................ppthpthpshhhu+hhp-tuYhSTSLsps.........uuFutp...l.h+lplsKGopuuYlss..l...............................Ssa.ssEhElLls+uspacl..........schsh.......sssppclll-Ahhltp ...............................................................................................................................................................sttch.tu....lt....Y.s........s......t..s..............h........t...tlN......phL......Rt..sp...............th......ppl.pp........................l...p.p.l...ssuhp..+....h.....h....p......s..h....h.l....Y.Ru........................................................p.thp.hh.p.th.h.....G.p.hhp.p.pu.ahS..TS..hstt......................................ts..hstp..........hh.h...clp.l.s.p.G.p...p.......u....h.h.lss........l............................................................o...th..ssEtElLls+ssph+l...............ppht.........................ht.............................................................. 0 25 39 48 +1374 PF03495 Binary_toxB Clostridial binary toxin B/anthrax toxin PA Griffiths-Jones SR anon PRINTS Family The N-terminal region of this family contains a calcium-binding motif that may be an EF-hand. 19.70 19.70 19.80 19.90 17.80 19.60 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.50 0.70 -5.85 8 100 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 61 59 4 126 0 353.70 41 51.40 CHANGED s.sDTDsDuIPDsWEhNGYTl..pNKlAV..tWDDshAp.pGYpKYhSsPhcupTsGDPYoDaEKsuGcIDpuluhEARcPLVAAaPsVsVsMEKlILSKNEclSsppst........olSsuTSoSpTasNT.GAslsAuhuhhs.....hShuVSANYSHo.psTVus-aosSp........oaupohulNTA-uAYLNANVRYhNsGTAPIYcVpPTTohVL.csp...TlATIKAK-NphApsIsPsppYPpKupsuIALNTMDDFNS+PIslNYsQLcp.LpssKslpL-TsQssGsYuphs.osGplhlss..sWusllspIpupTASIIlssustss.E+RVAA+-.sNPEDKT.PcLTLKEALKlAa..uhpE.csGlLaY...Ns......KsIsEs.slphhlD-pTuppl+cQLpshss.....KslY....clKLpsKMNIhI+ssohhhs ..................DpDsDuIPDshEhNGYTl......psphAh.....tW-sphtc.pG.......hpKYlSsPhcusTsuDPYTDaEK.soG.phDpsss.pshsPlVAAaP.ltVshE+lllSpscshosppst........olS+s..oosS..po..p..os......ssGssspssh..p.h.s........huhuVSssaSpo.ppTss.pcS.up........sWupshslNoucuAhlNsNlRYhNsGTAshYpVpPTTslVL....ssp...oluTIKA..ppsp.h.up.LuPsptYPp+shsslulsohDpFuSp.IslNhsQLcp.l-ps.c.lpL-TsQspG.hshhs.psG.plhsts..pWuthhspIpthoAplhhsht.s.sh..chRlsAhs.psP.-pT.PphTlt-Alphua..thpc..ss.h.a...ps......h.I.c.....h.hDppTspphcppLtphts.....pplh....phhlp.tMNhhl+.............................................................................................................. 2 1 2 2 +1375 PF02084 Bindin Bindin Mian N, Bateman A anon IPR000775 Family \N 21.70 21.70 24.20 21.70 20.30 21.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.88 0.70 -4.31 10 599 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 43 0 2 613 0 138.30 52 94.71 CHANGED YGNt...NYPQhhsPphGGsNYs.......GQ.sQQGYuspGM........GGPVGGG...s.uss.sssGt.uGslsGGG....huP.....h.spu.tss.uphc-YSSsshp-s...........-TTISAcVM-cIKAVLGATKIDLPVDINDPYDLGLLLRHLRHHSNLLANIGDPEVREQVLSAMQEEEEEEEpDAANGVR-NVLNNlN..NuPGsGGaGGstu...............uGtsGGh...sshG..sQGtGGshth.Ghssp...usuYN...QGYRQG ...............................s.......thsQ.hs..MGGuNY..............GQ.sQQ.....GYussGM...............uGPVGGG.......AMA.t.Ph...........G.G.GGAMAt...P..VGGGu............................uGPst..........h.G.....t.h....................ts.p-YSSs.....t...........tthIss....................................................................................................................................................................................................................................... 0 2 2 2 +1376 PF00351 Biopterin_H biopterin_H; Biopterin-dependent aromatic amino acid hydroxylase Finn RD anon Prosite Domain This family includes phenylalanine-4-hydroxylase, the phenylketonuria disease protein. 22.00 22.00 24.60 24.60 19.30 19.30 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.06 0.70 -5.68 6 1616 2009-01-15 18:05:59 2003-04-07 12:59:11 16 13 829 37 525 1403 197 213.20 33 70.19 CHANGED PWFPR+IsELD+CsphlhpYss-LDhDHPGFsDpVYRpRRK.hA-IAasYKHGDPIP+VEYTcEEhcTWtpVFpTLpsLYPTHAC+EYhcsFsLLp+aCGa+EDNIPQLEDVSpFL+-pTGFpLRPVAGLLSuRDFLAuLAFRVFpCTQYIRHuSsPMaTPEPDsCHELLGHVPLLAD.oFAQFSQEIGLASLGAoDEtIEKLuTlYWFTVEFGLCKQsGplKAYGAGLLSSYGELhHuLS-+Pcl+sF-P-sTAVQsYpsppaQPlYaVuESFpDAK-KhRpaAuoIpRPFuV+YsPYTpuIEVLDSsppIpphh-slpsElphLssALsK ................................................................................................................................pYo.t-.tsWt..lhp.p.................thh.thAs.ttahpsh..l.....h..cplPplt.-lsphLt..............tto...G..a.plhs..ls.uhls.ttFht..hLA...+.hF......sspal.Rp.pp...Yh.E.....P..DhhH-lh....GHsPhl.....h..s.....s.........a..A...p.....a.p........t...h..G............hu.....h.t.s.........s...........t................t...h.t...........h...........L.u...p.......h.......aWa..TVEFGL.h.........p....p............t.......t.........h+hY..GuGlLSS..uE.....................a.s...h.......o.......s..t........s..ph.h.s.ap...sh.h.p.a.hs.hQ.hYal.ps....h....t............................................................................................................................................... 0 167 254 398 +1377 PF00364 Biotin_lipoyl biotin_req_enzy; biotin_lipoyl; Biotin-requiring enzyme Finn RD anon Prosite Domain This family covers two Prosite entries, the conserved lysine residue binds biotin in one group and lipoic acid in the other. Note that the HMM does not currently recognise the Glycine cleavage system H proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.22 0.72 -4.34 49 25099 2012-10-02 20:27:15 2003-04-07 12:59:11 17 167 4936 74 7239 20327 9244 71.80 30 15.82 CHANGED tplpsP.hGphhp......thhVcsG-pVptspslshlEu.Khph-lsust.sGslpplhsptGss.VpsGphlhpl ....................................h..ltsP.l..Gps..h.p.........s....l....hphhVpsGDpV.........pt.sps.L......h.hl...E.u.......................K.....h.....p...h.....-......l.......A..s...........t.....s...G...........s.........l.p...c...l.h.....l..p.....p..G.ss..Vp.sGphlhh.............................. 0 2248 4430 6064 +1378 PF03744 BioW 6-carboxyhexanoate--CoA ligase Bateman A anon COG1424 Family This family contains the enzyme 6-carboxyhexanoate--CoA ligase EC:6.2.1.14. This enzyme is involved in the first step of biotin synthesis, where it converts pimelate into pimeloyl-CoA [1]. The enzyme requires magnesium as a cofactor and forms a homodimer [1]. 25.00 25.00 30.50 30.40 19.90 19.50 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.48 0.70 -4.94 17 405 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 395 0 39 195 3 228.90 45 69.10 CHANGED aSl+MRAS......psspHISGAEcls.stpclcphlsphlp+uhsHpp...G.psDFlslplEclp.csIphlp.sLslcohpstshEcu+phApplLppt............GlscphhcpAh.chlscs......shRGAhllshcoGpRL-s.ctpRGVRVophDhs-pp....p.tlhpps...spRsh-AlAlAoKVhst.GVlAELChSDDssYTTGYVAscphGYhRIsslKptGs..GGRlFFlcsshcl.p..shIphLEppP..VlIp ......YSl+MRuS......................spshHISGAEols...p.hccIEQTVpphhp+uhhHpp...G.psDFlsl.+lpclh.psIppI..AL.l...................hc-s+t.hpcLh....p.c.s............GVo.cpAlppuh.phlpst.....sshpGAllLsAhoG+RLD....u..spRGlRsT+Fu..hpshss...........ps....hs...cRhp-ALslASplsAtPhVhuELClSDD.sYTTGYhAssclGYpRlhslKssso.hGGRlIFVD.s.s.h.....sl..s...phIoaLEspP......t..... 0 13 23 34 +1379 PF02632 BioY BioY family Mian N, Bateman A anon COG1268 Family A number of bacterial genes are involved in bioconversion of pimelate into dethiobiotin [1]. BioY is a component of the BioMNY transport system involved in biotin uptake in prokaryotes [3]. 23.10 23.10 23.10 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.91 0.71 -4.56 153 2666 2012-10-03 02:46:00 2003-04-07 12:59:11 9 7 2228 0 579 1839 1577 149.40 30 79.42 CHANGED l.Ph....ss..VPlThQshuVhLuGslLGs+hGsluhllYLllGslGLPVFuGG..p.G.G.luhhhGPTuGYLlGahhuAhlhGhls.......c+hthp.................hhhhhhshlsGhlll.YshGhhaL..s.....hht..........t...lshsp....Alh.....huh.hsFlsGDllKsslAshlsht....l.pchh .........................sh.ss..VP..lTLQshulh..LsGhlLGs+hGslohhlYlllG.sl.....G.L...PVFuGG.......p...u...G.l.....usl.h..G.P.TuGYLlualls.Ahl.sGhlt.....cphtpt......................................h.hhhh.h.s.h.l.hG.hhll.al.h.Ghhhl..t...........hht....................s......hshtp......Alhhuh.hsF.l.ssDl.l.K.slluuhluhtlh..h....................................... 0 205 400 503 +1380 PF00653 BIR Inhibitor of Apoptosis domain Bateman A anon Prosite Domain BIR stands for 'Baculovirus Inhibitor of apoptosis protein Repeat'. It is found repeated in inhibitor of apoptosis proteins (IAPs), and in fact it is also known as IAP repeat. These domains characteristically have a number of invariant residues, including 3 conserved cysteines and one conserved histidine that coordinate a zinc ion. They are usually made up of 4-5 alpha helices and a three-stranded beta-sheet. BIR is also found in other proteins known as BIR-domain-containing proteins (BIRPs), such as Survivin (Swiss:O15392) [2]. 22.90 22.90 23.00 22.90 22.70 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.81 0.72 -3.55 184 1889 2012-10-01 20:49:39 2003-04-07 12:59:11 16 35 348 189 901 1890 5 67.90 37 17.22 CHANGED RlpoF.p.t.......................WP...hth...psp...p.LAcAGFaYsGt................sDpVpCahCphtl....psW.p.sD......sPhpcHt+atP....p.Ct...alp ...............................RlpTF.p..........................WP.......h..........s.s.s.cpLAcAGFaYsGt...................sDpVpCFhCsstL....p.sW.c.sD.......................cPhp-Ht.+ahP.....p..CtFl.......................... 0 236 332 623 +1381 PF04197 Birna_RdRp Birnavirus RNA dependent RNA polymerase (VP1) Bateman A anon Pfam-B_2204 (release 7.3) Family Birnaviruses are dsRNA viruses. This family corresponds to the RNA dependent RNA polymerase. This protein is also known as VP1. All of the birnavirus VP1 proteins contain conserved RdRp motifs that reside in the catalytic "palm" domain of all classes of polymerases. However, the birnavirus RdRps lack the highly conserved Gly-Asp-Asp (GDD) sequence, a component of the proposed catalytic site of this enzyme family that exists in the conserved motif VI of the palm domain of other RdRps [1]. 25.00 25.00 33.30 30.80 17.20 17.10 hmmbuild -o /dev/null HMM SEED 860 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.67 0.70 -6.51 5 338 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 26 27 0 228 0 401.50 68 97.83 CHANGED MSDVFNSPQsRuoIosALGhKssuupDlc-lLlP+pasPPcDPlsus...pcAApaL+-NpY+lL+PRuIPE..Np.l-TDsh.hP+Lsph..l-...sGcLtDT..VSlPtGoocaIPKYYPsHKPo+pcsssa.PPDlTLLKQhoYpLLpss-so-Npt......-Tl+pLpcAIsTppYGSGShpGQlsRLlAMKEVATGR...NPNKoPKclGY.ThEclAchLDpTLPIsPPtsDDc.hlsLossLSaLl.hTsDsuss...cDYLP+IThKSSAGLPalGKTKGETssp.ALsluDpFLR-VSphLK-GAsTuuss.........................cpcLc+lLsDYWYLSCGLLFPKuERY-pscWLTKTRNIWSAPaPTHLLLSsISsPlMcsShNNlhNs.cTPSLYKFNPF+GGM-sIVshI...Lus.sEslhLVYADNIYIhh-N.....TWYSIDLEKGEANCTP-HAQAsuYYLLTRGWTs--GoPtFNsTWAThAMplAPuMVVDSSCLLMNLQlKTYGQGSGNAWTFLNNHLLSTIVVsKWscsGpP...................NPsSKEFtcLEutTGINFKIERsIcsLRpKLpEAscpAspsGYL.S-GoE.PPcpPuPTVELDLLGWSATYS+ah-hFVPVLDKERLasSAAYPKGlENKsLcuKsG...AEQAYKlVRYEALRhVGGWNYPLIspAscssAps+Rs+....LcsKGhsLDchlu-..Wsc..h.SEFGEslEslol..ccsVTspsLt-LNtsscshcPsVs+shsRs...sL+cVoNALppGsYKsspossGchLsstA+SRI.....ps.h...+scslt-pls+LKPscscuDsWs-RsEptustlctLh+AsslhcppLcEsucALEsVQ......SscllsuKpPpEKssppAoNPVVGY+..uc+h...........sShP...p..sLoPsu+KttKRRpKQ+cp ..................................................................................Pp....EhEsDQI.LPD.....LAWMR..QIE....GAVLKPT.....LSLPIGDQEYFPKYYPTHRPSKEKPNAY.PPDIALLKQMIYLFLQVPEAs-sLK...................DEVTLLTQNIRDKAYGSGTYMGQATRLVAMKEVATGR...NPNKDPLKLGY.TFESIAQLLDITLPVGPPGEDDKPWVPLTRVPSRMLshTGDsss-........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +1382 PF01766 Birna_VP2 Birnavirus VP2 protein Bateman A anon Pfam-B_946 (release 4.2) Family VP2 is the major structural protein of birnaviruses [2]. The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) [1]. 19.70 19.70 21.60 21.10 18.70 18.70 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.32 0.70 -5.56 4 2317 2012-10-04 01:49:40 2003-04-07 12:59:11 12 5 39 48 0 1558 7 234.70 75 85.90 CHANGED ppspspYL+SLLhPpsGsuSIPDDhhtRHsL+sEToTYNLpsusoGSGLIVhFPssPuSllGAHYphsuss.shhFDQhlhTuQ-LKcuYNYuRLlSRplsV+SSTLPAGVYALNGThNAVTFpGSLSElpDhSYNuLhShTuN.pDKVGNVLVG-GVsVLSLPsGaDhPYVRLGDcsPth.pSs.thsspCssuspPRtYpIssss.......V.ssGspsphauhNlDulsssslss-hphphpsps.hshshpsh.luhsGhssssRslohossshusTspa......hstlhspspITpPlsulKlphph....u.tsusshu.sssSSluloltGGNhPGsLRPlTlVAYEpVAsGSllTluGlSNYELIPNPELtKNlsTpYG+hDPtshsYsKhILScR-cLGlRoVWsht-Yp-hppYFpEloDhsSsL+hAuAFGatDll+uIR ...........................................................................................................................................................................................................................apIshusph......s.YQsGGV...TITLF.SANIDAITSLSlG.GELV.FQT....SV.Qu...LlLGATIYLIGFDG..TAVITRA....VAAsNGLTAGTDNL....MPFNlVI....PTsEITQPITSIKLEIVTS...........KSGGQAGDQMSWSAS.GSLAVTIH..GGNYP.GALRPVTLVAY................................................................................................................. 0 0 0 0 +1383 PF01767 Birna_VP3 Birnavirus VP3 protein Bateman A anon Pfam-B_946 (release 4.2) Family VP3 is a minor structural component of the virus. The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) [1]. 20.80 20.80 29.10 28.50 20.30 20.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.65 0.70 -4.75 5 161 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 30 2 0 176 0 219.30 65 23.46 CHANGED lcYLs.......cLtMsosuSutcPELEcslcAhMAcA+clPsup..................KlLsLhSWsRcNuLlDcMacWAp.........pDPcAlRhcRhLuNsP+cGpKspctKhhsts....uKGPTpcs.......AQ-AKAsRISpDAscsGt-FATPEWVA..pNsaRGPoPGQhKYYhtTGhsP-PG--YpDYl+psloRPss-sKItRLAsSlYGhPsQEPAP--FhDtVAcVas-NsGRGPsQ-QM+DLRctARcMK+R .....................L.PYLPPsAGRQ..acLAMAASEFKETPELESAVRAMEAAAsVDPLFQ..................SALsVhhWLcc...N..GllscMssFuh.........oDPsAcRhcshLussPptupK........ts....s+GPT.cc.......AQ+tKsTRIStch.shGh.FATPEWVA..LNGaRGPSPGQhKYapsTtclPDPNE-Y.DYV+s.ppoRhss--pIhRhAsSlYGsPsQs.sPpsFlDcVAcVY-.NcGRGPsQEQM+DLh.tAhcMK+R....................... 0 0 0 0 +1384 PF01768 Birna_VP4 Birnavirus VP4 protein Bateman A anon Pfam-B_946 (release 4.2) Family VP4 is a viral protease [1]. The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) [1]. 21.20 21.20 21.20 22.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.62 0.70 -5.33 4 354 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 42 21 0 303 3 175.60 57 29.75 CHANGED VAsPVlSTLFP.AAPLIGAADphlssLhtssAuuGRhsu+AAuG+h+ss.cphtsho.suphuhplcspL........EssNapcs-l.+s.........ppuslFPVVhTs...cssPu-s.uphhsVI.Gth.-L.sPNQpshshaphsstpVaGhupD..lPhEssc.sYTsLPlc-l.hsGsIslpK..hsPlhGsSuQLAI.hhssslcpGV....Ph.hsFTGplsts..olh.IpGVsl...KhhsAHcLGLPLlGspPGls-hsssTSLAs+lh .............lAsPVlSTLFP.AAPLhtAhsphls.LhtspAtuuphpu+AAuG+h+ss.sphtpho....suchuhplhspL.........ss...htpstl..s.................lhPVlh............................................................................................................................................................................................................. 0 0 0 0 +1385 PF03042 Birna_VP5 Birnavirus VP5 protein Bateman A anon Pfam-B_1772 (release 6.4) Family Birnaviruses are ds RNA viruses. Non structural protein VP5 is found in RNA segment A. The function of this small viral protein is unknown. The proteins are about 150 amino acids long and contain several conserved histidines and cysteines that might form a zinc binding site (Bateman A pers. obs.). 25.00 25.00 33.40 39.90 19.50 18.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.73 0.71 -4.66 7 113 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 16 0 0 102 0 123.50 64 94.44 CHANGED hpDEHppuNRNLhElHYASRDWss+...HSGRHNtEsHsKTRDLVlQh..RGhRlRKhsSCLhPWtohlpstCoLQsEsEPDGstlRPVAsDlsGPcEulQLhEAslpEIR+scLHsstWsLCochD.cRpcLRRpSl .sDc.s+uN.s.spVHstspDANsRTGVHSGRHPtEAHoQVRDLDLQhDCtGaRVR..AsCLFPWhPWLsCtCSLHs.AEQWEhpVRssAPDssEPstpLQLLQASEoEs+ppVKHTsWWp.LCTKhc+KRRDLPRKP.. 0 0 0 0 +1386 PF03493 BK_channel_a Calcium-activated BK potassium channel alpha subunit Griffiths-Jones SR anon PRINTS Family \N 26.80 26.80 27.50 26.90 26.00 26.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.24 0.72 -4.06 35 665 2009-09-11 05:04:26 2003-04-07 12:59:11 13 15 154 10 289 608 6 99.50 40 9.24 CHANGED shsphhhphs.-pllslpElKhsllApsslsPGhsTllsNLlpopp......phss.t..........ppWhspYhpGhtp-Iaphtl.sspF....hGhoFsplsthhacphsllLlGlE ..................................................s..spaphK.u..DcslChsEhKhuh..............lAps.C.l.sPGhSThlssLhphpp..........h.ch.pp..............................................-pW.p+hYhcusu.NEhYs.hL..sSsF.................hGhSFstss.hsatKh..t..lhLIulc......................................... 0 105 144 218 +1387 PF04940 BLUF Sensors of blue-light using FAD Yeats C anon Gomelsky M, Klug G Domain The BLUF domain has been shown to bind FAD in the AppA protein (Swiss:Q53119). AppA is involved in the repression of photosynthesis genes in response to blue-light. 25.00 25.00 27.00 26.20 23.40 23.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.76 0.72 -4.26 154 831 2009-01-15 18:05:59 2003-04-07 12:59:11 7 19 584 55 218 692 223 91.60 38 33.13 CHANGED LhpLlY..hS.........ps.p.shs......tppltsIlppupppNtptslTGhLla......s..sshFhQhLEGscpsVppla.p+IppDsRHpslthlttp.l.spRtFss.WsMsh .............................LhpLlY..tS.........ch..pss.hs.......spclppllshApppNhpsslTGlLla........s..sspFhQlLEGs.c-pVptla.cpIppDsRHtsls.Lh.p.c...h...stRpFuc.huMt.h.......... 0 55 135 179 +1388 PF02608 Bmp Basic membrane protein Bashton M, Bateman A anon COG1744 Family This is a family of basic membrane lipoproteins form Borrelia and various putative lipoproteins form other bacteria. All of these proteins are outer membrane proteins and are thus antigenic in nature when possessed by the pathogenic members of the family. One protein Swiss:032436 is a transcriptional activator [2]. 20.80 20.80 20.80 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.58 0.70 -5.68 13 3407 2012-10-02 13:57:41 2003-04-07 12:59:11 9 10 1918 5 933 2720 1656 294.70 25 81.15 CHANGED hhhlulhssGslsDKuFsppua-Glp+hpcch..slchhhtsust...............sshhsshpplpcpshDLIhssGaths-slptlusch..........PchpFhllDuhhpp-...............NlsolsFRspEuuFLsGhhAAhho+psphualu....................GhphshhpsFhhG.FctGs+YsN.....clclhsphssoasDsstupshApphhp.cGlcVIas..huGhsshGVhpsA+-hG.p......hsIGhDpDQ.....uahuscsllsSslpslschhhphhpphhpss..h.sGpshphGL+cusVGhs................c..chh.schhcchlphtpKhl.ttl.lsspc .............................................................................................................h...shlhssG.slsDpSFNpus.h.p.G...hp..th...tcc....h........s......l...c....h..t....h..h.p.shs.....................................................t.s.-.h..t.s..s.l.p.p.h.....s.p....s.....u.h.....slIh..u.s.Gatht......s.s.l......t......p...s......Apca.............Pc..h.p.F..s..l.l...D....s...h...hp.sp............................Nlsohsa.pppE.uuYL.uG....hhAu..........h.....h.o.....K........o........s.......p.....l.....G.....a..lG...................................................G....h..p..h..s..h.l..pcF....sG..F....t....t.Gs....c....ss....s.........................ssl..p........l........t......s.......p.......a.............s......s...............s.......a...............s.....D......s....s....K....u..cphA.p..u.hhs...pGs......D..llap.........suG.ss.G.sGl...h.ps.A.cp.t.sth.......................................sIG.VDp.DQ..................s.t.h...s.....s.....p....s......l.....l.s..S.s.l.c....p.h.ss.shhphs.pp....hh....c...u....p...................t.......u.......p....h......h....h...h...GL.c-.s..u...V.s.ls.........................................p...h.s..t...p..h...h...p.t...h.......p.......t..............t.................................................................................................................................. 0 342 623 769 +1389 PF01722 BolA BolA-like protein Bashton M, Bateman A anon Pfam-B_1996 (release 4.1) Family This family consist of the morphoprotein BolA from E. coli and its various homologues. In E. coli over expression of this protein causes round morphology and may be involved in switching the cell between elongation and septation systems during cell division [1]. The expression of BolA is growth rate regulated and is induced during the transition into the the stationary phase [1]. BolA is also induced by stress during early stages of growth [1] and may have a general role in stress response. It has also been suggested that BolA can induce the transcription of penicillin binding proteins 6 and 5 [2,1]. 25.70 25.70 25.70 25.80 25.60 25.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.54 0.72 -4.02 238 4233 2009-01-15 18:05:59 2003-04-07 12:59:11 13 22 2088 10 1342 2705 2605 73.30 33 73.74 CHANGED p.Lppsl.ssp.p.lpVps.S...............................tsHFclhlVSstF.pGhshlpRH+hVassLt-.cl..sss....lHALul.+.shT.PpEa ................................................h.lppuh.sst.p.lcV...ps.u..........................tG..toHFplhlVSctF.pG.h.shlp+HphVYssLt-pl..sss....lHAL.u.l.+.shTPpEa.............. 0 378 743 1064 +1390 PF02044 Bombesin Bombesin-like peptide Mian N, Bateman A anon IPR000874 Family \N 18.10 18.10 19.60 18.40 16.80 16.60 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.87 0.72 -5.80 0.72 -4.41 12 105 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 53 2 39 125 0 13.20 79 12.60 CHANGED GspWAVGHhMGKKS .GNpWAlGHhMGKKS 0 2 5 9 +1391 PF02414 Borrelia_orfA Borrelia ORF-A Bateman A anon Pfam-B_1805 (release 5.4) Family This protein is encoded by an open reading frame in plasmid borne DNA repeats of Borrelia species. This protein is known as ORF-A [1]. The function of this putative protein is unknown. 19.60 19.60 20.70 20.50 19.00 18.60 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.19 0.70 -5.30 25 681 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 33 0 37 578 2 235.80 31 82.34 CHANGED KhQaK...LIsLISTLpYlNpphKK.....................YoQpsILYaFNpNLKRNGQ+slplKTLQpYLY+L-K.hpVTp...NYa+HLG.lNh...GTEIYYKLpasKcc.CaphIN.pYFc-+Kcp+apsRl..........sshh..pcchsKpssVpht....................................................EChsNps.s..NK...cEc..pKptc..IEchplpKYhpKCNFhsp.........llpLplpK-tpIclhKhhK+hE.tlhK..................................p.hph.c...pphKsKpppLKpILpNh+..hphcpcsYspcQLcpphpchYc.pYKsKPHFIIEppKYs..DLspI.htKlKcshcpp.Kpsspcshp.pIKsNI.........aN ................ph.hK...ll.lhusl.alNpph.....cc.....................YsQpsILhhhNpNLp+ss.p.ssl+Thpp.LhhLp+hhtlp....sahpphG.hsp....Go..h.aYclph.h........p.s......aphIs.paFpppctphhpphh.........................pt.h......ppph....p.t.s.lp...........................................................................................................................................................................................................psh.spp......sh..hpp...+.....h.p....p.p...........p...h..tp.sh...................ppt.h..hphhp..p...c.tlh+.............................................h.....phpp.pppph+phL.shp..hph.pptYp.cpl...h.p.hp..YK.K.ahhhcptchp..Dh..l.htchccpht.p...Kpp..pps.p..php..h......................... 1 29 29 29 +1392 PF03183 Borrelia_rep Borrelia repeat protein Mifsud W anon Pfam-B_2029 (release 6.5) Repeat \N 20.60 20.60 23.00 20.90 20.00 20.20 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.02 0.73 -6.24 0.73 -3.84 14 96 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 5 0 0 96 0 18.00 64 43.37 CHANGED TlSILlS+SLhoDhp.h. TlSILLSRSLhSDFsShh.... 0 0 0 0 +1393 PF00228 Bowman-Birk_leg Bowman-Birk serine protease inhibitor family Finn RD anon Prosite Domain \N 20.40 20.40 22.20 20.80 19.40 19.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -7.35 0.72 -4.08 55 570 2009-01-15 18:05:59 2003-04-07 12:59:11 15 4 80 50 85 586 0 25.90 43 37.76 CHANGED sCcsChCoc.....ShPPpCpCtDhhpt.Ch .sCcpC.h.CTc.....ShPPpCpCsDht.t.Ca.... 0 0 20 52 +1394 PF02653 BPD_transp_2 Branched-chain amino acid transport system / permease component Bashton M, Bateman A anon COG0559 & Pfam-B_654 (Release 7.5) Family This is a large family mainly comprising high-affinity branched-chain amino acid transporter proteins such as E. coli LivH Swiss:P08340 and LivM Swiss:P22729 both of which are form the LIV-I transport system [3]. Also found with in this family are proteins from the galactose transport system permease [2] and a ribose transport system [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.59 0.70 -5.43 51 33622 2012-10-02 17:14:55 2003-04-07 12:59:11 11 38 3614 0 9346 25680 15397 275.60 20 80.62 CHANGED tlhshlthus.lslhuluhslsh.hsGhhsluhuuhhshu.uhssshhhthhs.......................................hhlullhuhlsusshGhlsuhlhhthplspl.........lsolh..................hhhhhhulshhhhtthhts.tsss.....h.sshhsh.sshhh.hshthhh..........................................................................lhhllhsllhhhllt+TphGhtlpAlGps......puAchsGlslp+hphhsasloGhhAuluGhlhshhhsss........shuhs....hthtslsssllGGs........ssshGslluullluhlps.shshhth.....................sthttllhGhlllhsl .....................................................................................................h...shltt.ssh.h.sll..A...l..G..h..s..l..l......h..h.......s.......G.......h.......l.....s.......luhu.u.hh.slu...u...h..s....s..s...h.h...h..t..t.h.s.....................................................................................hhl.u.l..l.....h.u..h....l....s..u....s........l..h.....G.....h.....l....h.....G.....h.....h.....h.....h.....+.......h......p.......h.....s...........h.................l..s...o..l..h.............................................................h..h..h...h...h....h.....s...l....s.....h..................h......h.........t......t......h...h....s....s....s.....t....s.....s...................h.h.t...s...h....h....t.....h......h....h...h.....h.....h....s.....h..h.......h.hh....................................................................................................................................hl..h..l....l....l...s...h...h..h...h...h.....h....l....p....+....T...p...hG..pt...lhA..lu....ps......pu.A.chhG.lss.p+hphhsasluuh.lAul...A..G.s..l.h..u.t....t..h.s.s.sss..........shu..h.s.........h.t.l..p..u..lss...sl.......lGGh..................Gsl.h.G.s....l..l..G.u....l....l....l..u....h.....l....p.s......s...h...s..h.hsh........................................s...................s.hp.....l...l.h.uhlllhh......................................................................................... 0 2466 5546 7467 +1395 PF00634 BRCA2 BRCA2_repeat; BRCA2 repeat Bateman A anon Prosite Family The alignment covers only the most conserved region of the repeat. 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.08 0.72 -4.65 25 1602 2009-01-15 18:05:59 2003-04-07 12:59:11 13 37 218 1 293 1604 4 33.50 36 11.38 CHANGED hcsshsuFpTASGKplsVScpoLpKu+plhs-hct ...........s.hhuFpTASG.K.plp.VScpuLpKu+pl..Fp-hp............. 0 99 128 183 +1396 PF02498 Bro-N BRO; BRO family, N-terminal domain Mian N, Bateman A anon Pfam-B_1235 (release 5.4) Family This family includes the N-terminus of baculovirus BRO and ALI motif proteins. The function of BRO proteins is unknown. It has been suggested that BRO-A and BRO-C are DNA binding proteins that influence host DNA replication and/or transcription [1]. This Pfam domain does not include the characteristic invariant alanine, leucine, isoleucine motif of the ALI proteins [2]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.92 0.72 -3.70 257 2580 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 1532 0 314 2002 73 91.30 24 37.17 CHANGED pl+slh......psschWasup-ls.psL..sa........sss.p........csh......hpcl.cc............ptp..h..................................st........................h............hhlscsGlapLlhpS......c........hs.....p..AcpFppWlh...p-llP ......................................lRhlh....ssc..a.FsupDls.phL....sa....................sss..t...............csl......tppl.cp...............cpp.tth.....tp...........p..............s.ut....t.........................................................pp..h...............hhlsEsGlYpLl.h.pu..c...............hs...............p..AcpFppWlh....ppllP............................................................ 0 99 222 272 +1397 PF03032 Brevenin Brevenin/esculentin/gaegurin/rugosin family Griffiths-Jones SR anon Pfam-B_1232 (release 6.4) Family This family contains a number of defence peptides secreted from the skin of amphibians, including the opiate-like dermorphins and deltorphins, and the antimicrobial dermoseptins and temporins. The alignment for this family includes the signal peptide. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.63 0.72 -4.12 19 1689 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 76 0 1 1661 2 43.60 54 62.46 CHANGED shLKKSLhLlLFLGhVSLSlCEEEKc.-sE...sc-cpct-cpp...Eh+R ...........FThKKSLLLLFFLGT.ISLS.LC.E..c...ERs.AD.EE-..tc-tpp.c..........c........................................ 0 0 0 1 +1398 PF01318 Bromo_coat Bromo_CP; Bromovirus coat protein Finn RD, Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 25.50 245.80 20.40 24.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.01 0.71 -4.59 3 16 2012-10-04 01:49:40 2003-04-07 12:59:11 13 1 6 39 0 19 0 188.60 71 99.28 CHANGED STSGTGK.LTRAQRRAAAR+N.RhT+cVQPVIVEPLASGQGKAIKAhTGYSVSKWsASssAlcAKsTsAlSIoLPcELSSE+NKpLKVGRVLLWLGLLPSVAGTVKuCVTEKQsoAAASFQVALAVADSSKEVVAAMYsDAFKGlTLGDLpsDLoIYLYSSAALsAsuVlVHLEVEHVRPTFDDpFTPVY .STsGTGK.hTRAQRRAAAR+N...R..pTphV...QPVIVEPlASGQGKAIKAhsGYSlSKWpASssAhp.AKsTsAloIoLPsELSSE+NKpLKVGRVLLWLGLLPSVuGpVKuCVTEpQssAuAuFQVALAVADsSK-VVAAMYs-AFKGlTLtpLhscLpIYLYSStAlsAtsVlVHLEVEHVRPTFDDhFTPVY.. 0 0 0 0 +1399 PF01573 Bromo_MP Bromovirus movement protein Bateman A anon Pfam-B_508 (release 4.1) Family \N 25.00 25.00 26.20 26.60 17.80 17.30 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.50 0.70 -5.42 16 150 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 28 0 0 151 0 263.30 43 94.53 CHANGED ssppulol...otc-.ppL.cplscshpsshtpshsh+tCsshthp.Nssuhpsh-LsoK-s+uhlo..+hssKl+pplhVcHshIaLlYlPhILpoTsussslKLhNhATG-chslGsphsLNcAFIlthsWPRSlhscss.pt+GLaLshpss.Assl.ssutlGphhPhW--sho.tK.hYpcsss.hshsht-s.ss+.lhocKhhpSLlpSphptshsuppcts.hlps.plp..cph....cFTlpphp.ssss...........tspstppstlsstpshh-pshsss ..................P.SoSsFSV...Sh.D-hspluc-lcclh.usphpslsTKtCahLpLl.NhstsssLcLsSKEpKuFLo..R.uDKVKp+lYhshutlaLlYlPlI.s..TTSGllTLKLpNssTGEhsDVsTDV-AN+AFllhsRWsRSLhtsA.....-LsLlholS..sscV+ssA+VGchhsFWDE+hS.+pQhY.-cuNslhFPItETcss+hlsscKlLhShVRSRlhsGspupc.hs.spslpscRlussc+s.....hTlp.tsshsc-............hcstspshsusscshhE-thh................................... 0 0 0 0 +1400 PF04450 BSP Peptidase of plants and bacteria Finn RD anon Pfam-B_5066 (release 7.5) Family These basic secretory proteins (BSPs) are believed to be part of the plants defence mechanism against pathogens [1]. 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.54 0.71 -4.86 24 266 2012-10-03 04:41:15 2003-04-07 12:59:11 7 11 162 0 155 275 4 193.70 30 72.57 CHANGED tsslphclps..s.sosGupp.......................Ftphl...s.uppsLssAsphlhphh.ps..ss-+......................ss..csVTl......hlcDh-GVAhooG.....................ppI+hSspYltshss..scs+....tElsGVLhHElsHsaQass..............ps.pssuGLIEGIADaVRL+AGhsss+WtpPusG.........scWD.pGYphTAhFL-ah-s..hs.GFVuclNcch.+ssY...........s-saahplh.Gcs..VppLWp-Y .........................................................h...h.hp.ps....pp.u.uthF.p.l......stphltpu.hthlhphh.ps..stsp...........................ss..ppVsh..............hl.c.s....h..sG.VAassG.....................ppIahSspaltphs.......sphp........tEltGVLhHEhs.Hs...aQass................................ps..psPuuLIEGIADaVRL.c...u.u..h..s..s..s..p.....Wt.p..ss..tu..................ppW-.pGYphTAhFLsalcs.....ht.uhVtplNcph....csta....................................pcshahplh.Gps.....lcpLWp-Y.................................................................................. 0 33 107 133 +1402 PF00779 BTK BTK motif SMART anon Alignment kindly provided by SMART Motif Zinc-binding motif containing conserved cysteines and a histidine. Always found C-terminal to PH domains. The crystal structure [1] shows this motif packs against the PH domain. The PH+Btk module pair has been called the Tec homology (TH) region [3]. 21.80 21.80 21.90 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.86 0.72 -4.33 26 515 2009-01-15 18:05:59 2003-04-07 12:59:11 14 28 86 12 259 461 0 31.70 38 4.63 CHANGED ppYHPshah..sGcWhCCppss+sAsGCshssst ...............pYHPuhah..sG+WhCCpQs....s+sA.GCphhps.t....... 0 41 60 128 +1403 PF04514 BTV_NS2 Bluetongue virus non-structural protein NS2 Kerrison ND anon DOMO:DM04350; Family This family includes NS2 proteins from other members of the Orbivirus genus. NS2 is a non-specific single-stranded RNA-binding protein that forms large homomultimers and accumulates in viral inclusion bodies of infected cells. Three RNA binding regions have been identified in Bluetongue virus serotype 17 (Swiss:P33473) at residues 2-11, 153-166 and 274-286 [1]. NS2 multimers also possess nucleotidyl phosphatase activity [2]. The precise function of NS2 is not known, but it may be involved in the transport and condensation of viral mRNAs [1]. 25.00 25.00 83.80 37.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.37 0.70 -5.57 5 118 2009-09-11 00:56:28 2003-04-07 12:59:11 7 2 50 2 0 126 0 347.00 51 97.69 CHANGED MEQ..KQRRFTKNVFVLDtppKTlCGpIAstsupPYCQIKIGRshAl+sVsTPEPKGYVLEIs-sGuYRIQDGsDIISLMISs-GVEuTTERWEEWKFEuloslPMAssVslNG.......spsDAEIKYsKGMGlVPPYTRNDFDRREhP-LPGVp+ScYDVRELRQKIREEREKusccpsppsuhKsE....RhhttscsD-DpsshsptAs-hsPcTp+p-pccERRcsltp+lt-t.p....hphp.-c+cc-h++cpcppcsscscSD-s.......D-cGEDS-...--EscscoYITpsYIERIu+l+KhKDERLSuLAStMPQsuGcassMIFoKKpKW-NVPLYsIDEsuK+YELQSVGuC-RVAFVSKGhSLIILPVuu .................................................MEQ..KQR+FTKNlFVLDhstKTlCGtIA+tsSpPYCQIKIGRslAh+sVpsPEPKGYVLplsssGAYRIQDGpDlISLMlTspGVEuTpERWEEWKFEulSssPMATtVphNGshVDAEIKYsKGMGlV.PYhRN.D.F.DR.pEMP-LPGVh+S..sY...D...VRELRQKIKpEREuusch...p.pul....us+pE....sRWh......D-DEsplDEtup-hhP.tst+l-p.cEtRsslh+clttt.........hphs.pE+pct.+sEp-...cp-phps.SDc..........DEpsEDup...--E.cPcoaITcEYIE+luK.hKhK..DERh.SLuStMPQsuGsas+hIhoKKhKWpNVPLYsaDEuoK+YELQsVGuC-RVAFVSKDhSLIILPVG.V................................................................................................................................................................................................................................. 0 0 0 0 +1404 PF04426 Bul1_C Bul1 C terminus Kerrison ND anon DOMO:DM04045; Family This family contains the C terminus of Saccharomyces cerevisiae Bul1. Bul1 binds the ubiquitin ligase Rsp5, via an N terminal PPSY motif (157-160 in Swiss:P48524) [1]. The complex containing Bul1 and Rsp5 is involved in intracellular trafficking of the general amino acid permease Gap1 [2], degradation of Rog1 in cooperation with Bul2 and GSK-3 [3], and mitochondrial inheritance [4]. Bul1 may contain HEAT repeats. 24.20 24.20 24.20 24.70 24.10 24.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.71 0.70 -5.20 17 182 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 62 0 117 183 0 186.50 20 30.29 CHANGED HDQENWhpLhphls-pE+psLpcLslpLoCIQSNNShsH.sPPcIpSlTTELlCITu+SDN.SIPIKLsuchLh.sc-Klssl+psFpsappplp-Ypp+FpcNhcKLN-LYNhs+sh..ssREL+FoDFIosQlhNDlESLuNLcVplpsLpplFKKQhpohcspsp......stssossshtsp..ttsp.......................s.tthhppplhppWhppss.pYcRpls..VNLphNp.cl+ET.........LVPoFESCLCCRFYslRVsIKF-pHlGo.splDlPVsV+ph ..........................................................................................................................s.t.....hptlpsphhshohpo.p..shPh.hp.thhh...tp............................h.............ht......................h.....h..ph..slpshtthp.p.........................................................................................................................................p..t....phpppht......lsl.p.h.tt.....p.s.................llPsFpoChhsRhYhl+lplch.t.p............s........s.......hplclPlplt.............................................. 0 11 58 112 +1405 PF04425 Bul1_N Bul1 N terminus Kerrison ND anon DOMO:DM04045; Family This family contains the N terminus of Saccharomyces cerevisiae Bul1. Bul1 binds the ubiquitin ligase Rsp5, via an N terminal PPSY motif (157-160 in Swiss:P48524) [1]. The complex containing Bul1 and Rsp5 is involved in intracellular trafficking of the general amino acid permease Gap1 [2], degradation of Rog1 in cooperation with Bul2 and GSK-3 [3], and mitochondrial inheritance [4]. Bul1 may contain HEAT repeats. 28.70 28.70 29.00 28.70 28.40 28.40 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.50 0.70 -5.92 27 164 2012-10-02 22:29:00 2003-04-07 12:59:11 7 7 69 0 105 170 0 361.50 28 49.70 CHANGED ssspshphss.spss....sps.lhDlLPSFcMY.solp++lspushs.Dh+shPPoYp-spsppss................sstsssshs.tp.sh.ssstp..sps.p............................p....p.sl.t.t..................................sht....Dshs..p.pshhlDplapLPKLo......o.Pl-lpI+lTKcsspP.pt.ps-.-ohLKEYosGDlIpGYslIpN+SspslpF-MFYVoLEGhhsll-+..........p+sKpplKRFL+MhDloASWoYsslshusGhp.....hhssphD.hDsshlGLsssRlLpPss+YKKFFhFKlPppLLDssCc.cphhsHsLlPPShGlD+hppts+hptIphNpsLGhGalsh+GoPlL...................TpDhu.-slSIsYol-ARllG+s..................pcsschsIhKEppY.LRlIPhsht........sshhsppsshp........hcslhchlpc+l..pthcclhpclcppcs.lss....-l+spsloush ........................................ttp.............t.ttp.l.slLPSapMa.pslhpph..s...sp...shp......PPsYt....pp.s.......................t.s.t....................t.t.........................................................................................................psht..t.pphhl-plhpLspls......s.slplpIhhT+phs..t......-s.lpEYppGDhlpGahhlpNpSspsl.F-MFhVshEuhhphhsp..............tt.hph++FLcMhDhsASW.s.spls..ss.p........hh...hD.hDsshhulsssRlLpPshpYK+FFsFKlPppLLDssCp.p..p......h......h......s......Hs.hLPPohGls+.hp..sphtthths..p...........................s.h.............................hpDhu.tshSlsYslpA+hlG+t...........................psschhlhcEtph.lRllPhsh.........t....t.ps.........hpsh.p.lpp+h......thhppl.pthp.t...................h.............................................................................................. 0 10 51 98 +1406 PF03557 Bunya_G1 Bunyavirus glycoprotein G1 Bateman A anon Pfam-B_653 (release 7.0) Family Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. This family contains the G1 glycoprotein which is the viral attachment protein [2]. 25.00 25.00 31.90 31.90 18.10 17.80 hmmbuild -o /dev/null HMM SEED 871 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.79 0.70 -13.63 0.70 -6.74 15 361 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 74 0 0 338 0 609.20 35 56.09 CHANGED YsslAppLtuhstIS.LDhshlshlP--IuuAL+hIEsp+TYHp.lhlEhshLo+YCDYYopFscNSGYSQTsWRhaL+oHcF-lChhaPNpHFC.RClpcusKCssusWDFAsEMpsaYouKps+Fs+DLNLhLpsh+pAFRGTopualsphlpcKcssslhphhsKl+pKaPsNsLLhullcahpYLhuLschosacLsp.Wc-hlapspspppsphs+spcS..YpasNAtssssoKsCcNhKsVsCLSPRuusshsslIACG-sss.plYchPs.plYpSNscpspaClsDoHCLp-FEslspEhLsulKKopCWts-hsshs.cppSsGl+SC+hKDpGsCsVsss+.WsIIpC-sspaYYS-t+csaDpspDIG.HaCLSs+CsT.RYPINPcpIpsCsWphppsphcpIsshsLE-lEpYKKAIopKLpsoLoIa+Yp..TcNLPHIKPlYKYITlpGTETuEGIEuAYIpSpIPALuGTSIGFKIsSKDGppLhDlIsYVKSAsYposYsKlYsTGPTIGINTKHDE+CTGsCPspIsHKsGWLTFu+ERTSoWGCEEFGCLAIssGCVFGSCQDII+sEhsVYRKssEEssslElClTFscKTYCT-lNAlpPIITDchElQFKTV-ohoLPpIlAVpNHclhsGQINDLGsaSpGCGNVQKsNuTshGsGsPKFDYlCHhASRK-VIlRKCFsNsYQuCKhLpp.sShhh--cpsTlTVhchK+lLGslphKhILGDl+YKsFAEslDlpsEGpCsGClsCFEsIpCpFsIcoolEsoCslcusCThFHDRIlIoPsc+cYAlKlhCpcKPssolpFKICNpKl-sshTlVDtcshIELuslDQTuYI+EKD-RCKTWhCRVRDEG ...................................................................................p........hst...pht..t.hp.h.l.ppp....cshcsthhhEahhhthhs..shappa.psuG.sphpWRhhh+spphchC..assphhC.RCl.ptpcCsssphDhupphpphYptppphaptDlplhh..ht..h.t.s..hh....tptt......h..th..t......h..hh.hh.hh.............................................................................................................................................................................................................................................................................................................hsGhu.sa.lps+pupph.h-hlhYlKSsthphshschYsTuPh.uhsspa.phCTGsCs..s..pl.t..t..ssahsFshp.TS.WGCEEhGCLAIspGslhGpCpsl..hc...-..h..t.lYp..p...hppth.s-lClo...tt...shCs....plsshpPh.ss.....hphphp...s...spshslspllul.p.s.pclh.GpIsclGshs.p...h.GpsQh.s.p..s.....s..l.....h.....s...........us...s....c.FD...a....Cp...hhu+Kclhl+pCh.ssYpupphLpp.ssh.....hp.ppts.hhhphph.LGplphhh.Lsc.haK.h.s.p.p..pl.p.s.c.u.pCsGChtChpslpCphphhoshths.ssl...s.s.Cs.ap.s.pl..hlp.sppchslKhhCppp.t.ph.h.lsttp.pht.ph.h.tp.hl-...s..Dp............................................................... 0 0 0 0 +1407 PF03563 Bunya_G2 Bunyavirus glycoprotein G2 Bateman A anon Pfam-B_1048 (release 7.0) Family Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. This family contains the G2 glycoprotein which interacts with the Pfam:PF03557 G1 glycoprotein [2]. 25.00 25.00 30.20 25.60 23.70 18.80 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.98 0.70 -5.52 9 412 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 74 0 0 340 0 233.50 57 29.30 CHANGED ssshsRCFtGGpLltphpSssuhoElClKDDIShlKSpsha..tKNcssIhussKhaRpahVpDWppCNPl.ssuGoh.Vl-VscshpLhsKsYsCpssCsIol-+-sApIlhposcLNHFElsGTTlpoGWFKsKsoloLDpTCEHl+VTCG+KolpFHACF+pHhSClRFh+poILPt.MhpShCQNIELIIlshhslhhhIhhlILTKTYIsYLLlPlFhPhsalYGhlYN+sCKpCpsCGLAhHPFTsCsopClCGh+apoo-ph+lHRputhCpGYKSLptAR .....sslhpRCFpsGsllKppsSppuloElCLKDDVShIKopupY..hKNsoGlausNhshRpWlVpDW+-CpPhcssGGpINVIEVscDLoLpTcoYlCoADCoIslDKETAQlhLQT-shNHFEluGTTlKSGWFKoTshITLDpTCEHl+VoCG.KolpFHACFpQHhSClRah+tohLPt.hhpShCpNhElIllh.h.hh.hhhh.lloKTYlsYlhhPlFhPhshhYuh.h.+.hK.C..ChLshHPFo.Cs..ClCG..atso-th+hHR.sh.C.GaKsL.hsR..... 0 0 0 0 +1408 PF01104 Bunya_NS-S Bunyavirus non-structural protein NS-s Finn RD, Bateman A anon Pfam-B_880 (release 3.0) Family The NS-s protein is encoded by the S RNA. This segment also encodes for the N protein. These two proteins are encoded by overlapping reading frames. 25.00 25.00 33.20 33.20 22.10 21.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.88 0.72 -4.51 24 277 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 72 0 0 221 0 84.00 44 97.52 CHANGED M.hpshpl+Lhp+psMh+Lplshsps.hhl.LESSoShcRRPKlsshtcpsthhhLhLtuuphpaLIpIFhpTushpC.TosLPSTus.sT ....M.h.t.phcLh.h.thhcL.lshs.p.hLl.LtSSSSh.pRP+LhShhppsshLhLpLtuuphph.IhIFhpTushQhhTTlLPSTss.sh.. 0 0 0 0 +1409 PF03231 Bunya_NS-S_2 Bunyavirus non-structural protein NS-S Bateman A anon Pfam-B_3018 (release 6.5) Family This family represents the Bunyavirus NS-S family. Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. 25.00 25.00 95.60 95.50 18.40 18.20 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.33 0.70 -5.91 9 245 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 30 0 0 244 0 410.40 68 94.52 CHANGED MSosphs..................spphhpsYGop.Ds+AlsDsYslas.GcGpphlphhhaoNuthKouFuhsphG+stDlch...........cEtEllsopc.ashF-c...hsLsIshssc.hpllVp+PslpspGsKaphHspIhsPspshLphsss.....hscc-Fhcp.plpppchhPsshhl-tspKsshhlssssphslcYua.sVMGKs...ssa.tpshs+EhllosKpcsh.ssstssNRhLsspsVKuIpIu..S-ltss..opshLps+pshshclpsQh.RlShsulhcEsuhsRhFhls.-spsRhlhh.ucslsspsNtcTTLlIKllsKsh..sphssshsp...shpsChcs......ltsphGlVc.hhssDPs.YNphIs+sLLuVHTphAhslScsLpKPlIVFplhD.pELpscps-lsG+plsYpcDupGshYFLSpTL-.....hhPps.oolsYLsSht.s.WK.shstpchhlp ..................MSSSVYE................SIIQTRASVWGSTASGKAVVDSYWIHELGTGSpLVQ...TQLYSDSRSKSSFGY..TAKVGDLPC...........EEEEILSQHVYIPIFDD...IDFSINIDDSVLALSVCSNTVNsNGVKHQGHLKVLSP..AQLHSIGS....hMNRSDITDRFQLQEKDIIPNDRYIEAANKGSLSCVKEHTYKIEMCYNQALGKV...NVLSPNRNVHEWLYSFKPsF..NQVESNNRTVNSLAVKSLLMSAENNIMPN..SQAFVKASTDSHFKLSLWL.RV..PKVLKQlSIQKLFKVAGDETNKTFYLSIACIPNHNSVETA..LNIoVICKHQL..PIRKsKAPF...ELSMMFSD......LKEPYNIVH.....DPS.YPQRIVHAL.LETHTSFAQVLCNNLQEDVIIYTLNN.+ELTPGKLDLGERTLNYSEDuhKRKYFLSKTLE.....CLPSNTQTMSYLDSIQIPSWKIDFARGEIKIS..... 0 0 0 0 +1411 PF00952 Bunya_nucleocap Bunyavirus nucleocapsid (N) protein Finn RD, Bateman A anon Pfam-B_587 (release 3.0) Family The bunyaviruses are enveloped viruses with a genome consisting of 3 ssRNA segments (called L, M and S). The nucleocapsid protein is encode on the small (S) genomic RNA. The N protein is the major component of the nucleocapsids. This protein is thought to interact with the L protein, virus RNA and/or other N proteins. 25.00 25.00 27.60 35.70 20.50 24.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.52 0.70 -4.35 17 354 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 96 0 1 246 0 197.20 55 97.72 CHANGED FpDVPppssuTFDPEAuYluFpspastsLshsslRIFFLNt+KAKssLu+puc.pVsLsFGGhphsVVNsHFPt.psNPVsDsuLTLHRlSGYLARWll-pht.ss.pscpshl+opIlsPlAEspGhTWsDGsphYLuFhPGuEMFLpTFcFYPLsIshaRVh+ctMDspahcKslRQRYuslsA-pWhpp+hsslpuAhpsVspLsWu+ouhSsAARcFLupFGIpl ........F.DVsppssssFDP-suYlsFptpaspsLshssVRlFFLNttKAKssLp+psc.plslsFGshphslVNNHFPt.psNPlssssLTlHRLSGYLARWlh-php.ss.t.cpt.h+ssllsPlAEspGsTWs-.G..s....phYLuFhPGsEMFLpTFcFYPLsIshaRVh+shMDspahcKshRQR.Yu.thsAppWhppchstltsuhpsVtpLtWt+sshSsuARpFLtpFGItl........ 0 0 1 1 +1412 PF04196 Bunya_RdRp Bunyavirus RNA dependent RNA polymerase Bateman A anon Pfam-B_2559 (release 7.3) Family The bunyaviruses are enveloped viruses with a genome consisting of 3 ssRNA segments (called L, M and S). The nucleocapsid protein is encode on the small (S) genomic RNA. The L segment codes for an RNA polymerase. This family contains the RNA dependent RNA polymerase on the L segment. 31.30 31.30 31.40 31.40 29.90 30.40 hmmbuild -o /dev/null HMM SEED 743 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.13 0.70 -6.52 12 767 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 282 0 0 649 0 428.70 23 35.15 CHANGED lshsRh.slhhu.p....hFh....ptshhhh.tps......s.hsh..shpphhp...hphLlslh.cK.phhtlhs.hR.Ylhhtshs.hSsh.chI.chF-c..psshshhhshhIKphlhshtp.sppshh..lplsshshp.pslts.tlhs.shh.tshhs.hcsLlsphYhshYhhsKshcschss.splhpchlEaEpchpppp..........phhth.............h+.hscuhhtphpppsthc..lc.ppsFppslsplushphsphshp.....p.pslcu..o.hscch.chps.ppht..p.........cthphphtccs........ahhupsls.....................hlpshhhsphsppp.shVlchhcpshphhchtp.........hp.hhhKtppptu...RthFltsl.p+lhhth.lEchscslh+hsspEhhopsts+c.hhlppthphth+atstcohhplupuDsu.................................hhsphhalotsho+hostsss..ahahhhlhhsslhhpKchhhslphlp.h...cp.hhhs-hhhphlcpthphpp..sh..ahstshs..hhtsctsahQG.LsasSSLhHush..ha+chht....cth.pl-..........shhchhppSDDuth.luh.t.spsttsphthl................phspthFphtcthhhhhsIh.hs.KpThso..s.hEFhSpF...FpGsshs..hh+hlhsulscstt.shhsDltphpsphsphlctGssspLs.ls.sspp.hshphYu.sssu.hss.tphlph.ps.....phPh.hhhhhstPhtththhGhphss ..............................................................................................................................................................................................................................................................p....h.s..h..................th.....................................................h.....h...h...............tp.h.t..........t................................................................ht.pt...............h.t.h...................................................................h.p..t.......hl.p......t.h..................h.......h...p.t...+.hal..h...h...h...th..thh....pch.t....t..........h....t....h..t.........p.....................................hht.h..hst.h.hhs....s..hhhhhhhhh.uhhpp+hh.hslphlh.h........cp.h..pc.hh.hh.t..thp...ph..hhp.h.....p.....h.hptshhQG.hphsSShhtshh..hhpph.......phh..h...............hhhp.h.tSsss...h...................................................................................................................................................................................................................................... 0 0 0 0 +1413 PF03181 BURP BURP domain Mifsud W anon Pfam-B_1432 (release 6.5) Family The BURP domain is found at the C-terminus of several different plant proteins. It was named after the proteins in which it was first identified: the BNM2 clone-derived protein from Brassica napus Swiss:O65009; USPs and USP-like proteins Swiss:P21746 Swiss:P21747 Swiss:Q06765 Swiss:O24482; RD22 from Arabidopsis thaliana Swiss:Q08298; and PG1beta from Lycopersicon esculentum Swiss:Q40161. This domain is around 230 amino acid residues long. It possesses the following conserved features: two phenylalanine residues at its N-terminus; two cysteine residues; and four repeated cysteine-histidine motifs, arranged as: CH-X(10)-CH-X(25-27)-CH-X(25-26)-CH, where X can be any amino acid [1]. The function of this domain is unknown. 20.70 20.70 21.00 21.00 20.30 20.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.91 0.70 -5.17 28 407 2009-09-10 21:17:04 2003-04-07 12:59:11 10 6 56 0 178 416 0 183.20 34 55.05 CHANGED ulFFhEccL+sGsphs....ltFssssst...s..sFLPRphAcslPFSopcls-ILphFultssSspActhpsTlpcCE.ssu..hcGEcK.....hCATSLESMVDFusSpLGsp.slpAhS.Tts.s......tsstppYsl..suV+tls.u..t...sssVuCHp.sYPYsVFYCHpstp.o+AYtVsLhus-Gs.........tscAVAVCHpDTSsWsPp.HsAFplLsl+PGslPVCHFlspscllWs.p ...........................................s.haFh.p.lh..Gphh........hst.t...............hLs+t.....uppl.P.Fstpphstlhth.hsh..so..u.phhppslppC-...tss.......htGE..pK.....hCsoSlEshl-FshuhL...G...p........plp.......sho..oph.t........................hppYtl..tsV.pt..l.....s..s...............sp.VsCHp.sa....P....Ys.VaYCHp.h.........tt..s+sY.Vs.lhus.cus..............tspulAlCHhDTSt.Wssp.HhuFphLtscPG..p.s.s.....VCHalspsp.hlWs........................... 0 15 102 151 +1414 PF03309 Pan_kinase Bvg_acc_factor; Type III pantothenate kinase Mifsud W anon Pfam-B_3452 (release 6.5) Family Type III pantothenate kinase catalyses the phosphorylation of pantothenate (Pan), the first step in the universal pathway of CoA biosynthesis. 23.30 23.30 24.50 24.40 23.10 23.00 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.49 0.70 -4.81 194 2298 2012-10-02 23:34:14 2003-04-07 12:59:11 9 5 2191 43 659 1901 2016 197.40 29 75.38 CHANGED lLslDlGNTplhhulaptt..........thh.....ph+h..sosttps..........t-phshhltshhphtsh..............h..tllluSVVPshhtshtphhtch...ht......hpshh.......ls.sshph.GlshpY.c...sP..pplGuDRllsslAAhp.ha....s.s..hlllDh.GTAsThDsls..scG.p..alGGhIsPGlslthcALtppsApLP..plt...............htps.t..psl...GcsTssulpuGlhaGhsGhl.-tl...lp.......c ......................................................................LhlDlGNTphhhulhptt.................phht......ph+h.....ts.s.h.tpo...................t-ch..s.hh..l...p..phhp.h.tshp..........................tlhluSVV.Ps.h..h.t.s.h.p.p.hh.h.c.h.ht..........hpshh..............ls......s.shps...Gl.s.hta.c..s.P..p.p..lGuD.......RlsssluAhpha.........sts.hlVlDh.GTAsTh-h....ls.........tcG..p......alGGsIsPGltlphcALtppsAp..LP.p.lp...............lsp.s...t......ssl....GpsTssuhpuGlhaGhsGhl.-tllp.p......................................... 0 270 498 593 +1415 PF04681 Bys1 Blastomyces yeast-phase-specific protein Mifsud W anon Pfam-B_5640 (release 7.5) Family The molecular function of this protein is not known. Its expression is specific to the high temperature, unicellular yeast morphology (as opposed to the lower temperature, multicellular mycelium form) [1]. 20.90 20.90 21.20 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.02 0.71 -4.50 2 111 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 60 0 75 103 0 144.40 33 78.72 CHANGED ESVlAKRADLGsAFVhlYCNFoVhLDlpAGtsSTR+hLsGRsaDYapE.YtsGSGDGVSLTLpHT-G.DSSNScTTFRYKLuDsNSTVtYSLGNSGGNPFtGHKlTLKsSsDtCPpIEWP-GIPTGVSSGSCGSutNLILThCP.tpshp-FEDE ................................................h..shs..........lGpAhVhN..pCshsVaL.a..Sluusl.us.tp.hl.s.stshphhpp.ha.+..s.s.s.u.GGluLKl.o.p.s.-.G.h..hs.u.ss.pThFsYsLsts......tspVaYDLSslFGs.P.F..sGpp.ls...l...p.......s.....o..ss......sC.....P.sIhW.s.sG.........l.........P.s.........G.s........p..s..p.sCt.ussclhLTLC............t...................................... 0 7 27 58 +1416 PF00170 bZIP_1 bZIP; bZIP transcription factor Sonnhammer ELL anon Prosite Family The Pfam entry includes the basic region and the leucine zipper region. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.15 0.72 -4.01 22 5966 2012-10-02 13:17:30 2003-04-07 12:59:11 16 81 523 288 3305 7374 40 61.00 27 17.17 CHANGED -cplK+p+R+.pNRpuA++sRtRKptchcpLpp+VcsLpsENttLtpclppLccpstpLptpsp .............................++p+R.hhp.N.R.........t....u..A.....p+sRpRK.c............p...h.......lp.......p.......L.......E...p.......c...l..........p..........p....L....p....p.......p.......N.....p....p....L...p..........p...p.h..p...t...L....pp.p.ht.h.....h............................................... 0 830 1626 2510 +1417 PF00386 C1q C1q domain Finn RD anon Prosite Domain C1q is a subunit of the C1 enzyme complex that activates the serum complement system. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.82 0.71 -4.13 46 2356 2012-10-01 20:41:10 2003-04-07 12:59:11 16 38 315 47 1129 1977 431 122.60 27 35.37 CHANGED AFSsshspthsss.t.........slhFcc....llhN.tssYsssTGpFsCslsGlYaFsaplp...shppsltht..Lh+N...spphhthhsptpssp....psuSsuslLpLptGDcVWlplhst...suhhsspt.sposFSGFLl ................................AFpsshsps...h...s........p.........slhF-.p....lh..h.N....h........s...s.....p..Y...s...s.po...G.hFs.ss..l.sGl..Ya....F....saplt..................hpsppl..t.ls.....L.h+N...............s.p.s..h....h...t.s..h.sp.tssp................phuS....s.....u.....s....l....L..p.LptGDpValpl.tt..........puh.h.s.s.t...........th...oo..Fo.GaLl................................................................ 0 266 375 622 +1418 PF01413 C4 C-terminal tandem repeated domain in type 4 procollagen Ponting CP, Schultz J, Bork P anon SMART Domain Duplicated domain in C-terminus of type 4 collagens. Mutations in alpha-5 collagen IV are associated with X-linked Alport syndrome. 19.70 19.70 32.60 20.30 16.50 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.79 0.71 -4.17 26 1172 2012-10-02 16:37:33 2003-04-07 12:59:11 14 49 94 40 532 965 0 108.20 49 15.42 CHANGED uhhlshHSQopslPpCPtGhspLWpGYShLhhsu.sscusGQsLuoPGSCLpcFpohPFl.Css.pssCpYhu.NchSaWLoTsp.......tPhs.s.hsG.plcsaISRCpVCpts .......................hllshHSQo..hp.l..P..p..CP.....t.....GhppLWhGY....S.hLhhpu..spcupGQsLu....osGSCLccFpohPFl.Css..pssCpYhu.NcYS.aWLoTs.......p...........................tP...hs....s.h....s....G.....plcshI.S..RCpVC.t.................................. 0 125 159 312 +1419 PF03595 SLAC1 C4dic_mal_tran; Voltage-dependent anion channel TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Family This family of transporters has ten alpha helical transmembrane segments [1]. The structure of a bacterial homologue of SLAC1 shows it to have a trimeric arrangement. The pore is composed of five helices with a conserved Phe residue involved in gating. One homologue, Mae1 from the yeast Schizosaccharomyces pombe, functions as a malate uptake transporter; another, Ssu1 from Saccharomyces cerevisiae and other fungi including Aspergillus fumigatus, is characterised as a sulfite efflux pump; and TehA from Escherichia coli is identified as a tellurite resistance protein by virtue of its association in the tehA/tehB operon. In plants, this family is found in the stomatal guard cells functioning as an anion-transporting pore [2]. Many homologues are incorrectly annotated as tellurite resistance or dicarboxylate transporter (TDT) proteins. 26.20 26.20 26.90 26.40 26.10 26.10 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.30 0.70 -5.79 159 2801 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 2089 12 789 2007 101 296.00 22 86.74 CHANGED pphsssaFuhsMGhuulu...hh........ht.................h......h.......shhthluthlhhlslhlall..hhhh.hhh...+hlha..p..thttp.........h....pcPlpusFhsshshuh.hhlsshhhhh.....................................hhhluhsLWhhuss.lplhhshhhhh..................thh..t..pt....hphpp..hsPuW...hlPhVushlsussu..shhs.....................hhhhuhhhauhGhhhhlhlhsll.hhRlhhpphhsps...........hhPohhIhluPhuhuhhuhhtls..............................................h....shsshhuhhLhshuhhhhhlhlhthhph..................hph............................sF.shuaWAhoFPlushs...hushph.........sthhs...hthhphluh...................hhhhhhshhhhhlhhtslt ....................................h..hPhshhuhslGhsuhu.h...ht...............................................h............th...t.h.lut..hhhhl.uhllahh.........hlh......+hlha...c.....phhtc.............l....pcPlhushhsshshuh..hll.ss.hhh.h.....................................................h..luh.s.......lWhhuhl..lplshhhhhsh..............................thht..t..t...............h.phcp....hsPuW..hlshVuh.h.lsu.ssu...sshu.........................hh.huhhhhuhGhhhh.hhlhsll...htRL.h.........h..ps.......h.sps...........hpsshhIhhAPhulsssuhhtlst.................................................................s.sshhshh..Lhshuh..hhh.hhhlh.h.h...p.h......hpt............................sF...ssua..h..uaoFPhshhA.suhhph...........uphhp....................s.h.h..chLuh....................h.hlhss.hllhhlhhthl.h.............................. 0 203 426 638 +1420 PF01681 C6 C6 domain Hutter H, Bateman A anon Hutter H Family This domain of unknown function is found in the C. elegans protein Swiss:Q19522. It is presumed to be an extracellular domain. The C6 domain contains six conserved cysteine residues in most copies of the domain. However some copies of the domain are missing cysteine residues 1 and 3 suggesting that these form a disulphide bridge. 25.00 25.00 25.10 25.20 24.90 24.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.69 0.72 -3.93 35 172 2009-09-12 03:01:09 2003-04-07 12:59:11 12 10 7 0 169 145 0 94.00 21 48.65 CHANGED Csssshshuhusspsst.....shsshhssshsso..tsssoohploCsuhss..hpsshhlss........ssssspsus.s.....o.lslshsC..sssuhWhY............sss..pslsol.sC .........................CpsCs.h.hh....hsts........hsshhshstsss...ssCpsh.sloC.pusss......sps..shhhss..........ssssspsus..u......s.sshslsC...sssu.p.Whh.............hu..pslsslsC............... 0 72 92 169 +1421 PF03596 Cad Cadmium resistance transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.00 20.00 20.50 20.00 19.70 19.50 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.28 0.71 -4.60 6 922 2012-10-03 02:02:08 2003-04-07 12:59:11 8 3 734 0 141 604 8 185.00 42 92.57 CHANGED YhuTulDhLlILhlhFuchKppKphhcIalGQYlGoshLlllSLlhAalls.hlPEcWllGLLGLIPIYLGIKlhIhG-..---EpplhctLpppKhspLhhTVshIThAS.GADNIGlalPYFsTLohspLllslllFlIhIhlLsalup+LAslPpluETlEKYuRWIlslVaIuLGIYIllENsThsslLoh ........................................YhuTulDhLlILhlhF..Ap.hp.p..............p..K...p.............h..h.c.IYhGQ..YLGo.sh.Llh.sS.Llh.AaV.ls.al.Pp.cWl...lGLLGLI.PIaLG....I.....+....h...h........l...........h....s-.............s-..s.E...c...c.....h....hc...p........L........p........t........p..t..........h.............s.............p....L...........l...........hs........VuhlTlAS.GuDNlGlalPYF.soLs.hsplllsLllFllhIhlhshhuphLuslPhl..uEsl.EKapRhlhslVaIuLGlaIlhENsTlphhh.h.......................................... 1 36 78 122 +1422 PF00028 Cadherin cadherin; Cadherin domain Sonnhammer ELL anon Swissprot_feature_table Family \N 28.80 28.80 28.80 28.80 28.70 28.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.07 0.72 -3.73 57 41902 2012-10-03 16:25:20 2003-04-07 12:59:11 12 548 328 141 21785 37651 1579 93.10 24 44.86 CHANGED hphplsEst....ushlhplsupDtD...tsssspltYplhpss.....sphFplsspsGp........lpstps..LDcEph.........spYpLplhApDp.........h..shssssplplplt ....................................................hpl.E.s.s..s.......s..G...s.....h......l........h........p.......l........p....A..p...D....t.D..................tu..t..N......u........p........l....p..Y...p...l.....h......s...s.s....................................tshF..p....l..s.......s...p.o.Gt........................................lps.......t....p.......s......L.....D...p..Epp.......................................sp..a.p..l...p....l.p.A...pDt....................ut.s..s...h...s...u....s..s.p.lplpl................................................... 0 4217 6276 12981 +1423 PF01049 Cadherin_C Cadherin_C_term; Cadherin cytoplasmic region Finn RD, Bateman A anon Pfam-B_257 (release 3.0) Family Cadherins are vital in cell-cell adhesion during tissue differentiation. Cadherins are linked to the cytoskeleton by catenins. Catenins bind to the cytoplasmic tail of the cadherin. Cadherins cluster to form foci of homophilic binding units. A key determinant to the strength of the binding that it is mediated by cadherins is the juxtamembrane region of the cadherin. This region induces clustering and also binds to the protein p120ctn [1]. 21.70 21.70 21.90 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.84 0.71 -4.17 64 1652 2009-01-15 18:05:59 2003-04-07 12:59:11 12 99 108 11 822 1256 0 136.60 35 14.31 CHANGED R++ppp....h...........t.-c...DlR.ENllpY-DEGGGE-...D...ppuaDlssLppshss....................ptshhP.hthhs.................................ssssltpFIpc+lccsDsDPs.uPPaDoLpsYs...YEGsG........SsAGSLSSLs.Sso.o-u-p..caDYL.ssWGPRF+KLA-hYGsp ...................................................................................................................................................p................h...c-.....DlR.-NllpYs-EGGGE-......D.........................pp.taDlu..tLpps.tht.................................................p.s.hP....h.............................................................pssph.tp..FIpp+.l.p.t.A.DpDss...sPPaDoLh......sYs...YEGsG.......................................................SsAuS.LSSLt..S..........so....ss........s.-p.........-.............a..-a.L.s-W.G.PRF+pLA-hYut.t.................................................................................... 0 71 138 400 +1424 PF03507 CagA CagA exotoxin Bateman A anon Pfam-B_918 (release 7.0) Family \N 21.60 21.60 21.90 21.60 20.20 21.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.43 0.71 -4.44 4 2702 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 41 4 1 2704 1 133.20 62 54.67 CHANGED NFSDIKKELNtKL.GNFNNNNN.GLKNSsEPIYApVNKKKsGQsASPEEPIYsQVAKKVsAKIDRLNQIASGLGsVGQAAuF.LK+HcKVDDLSKVGLSASPEPIYATID..DLG.GPFPLKRHDKVDDLSKVGLSREQcLsQKIDNLNQAVSEAKAGaFsNLEQpIDKLKDSTKKNslNLaVEuAKKVPsSLSA .........................................................ph..tN...N.N.N.N.N.GLKN............EPIYApVNKKK.s.GQss..SPE.EPIYAQVAKKVsAKIDpLNph.sSu..lsthh......p.thsh.hptt..ctVsshSt..s.GpSs....S....P.EPIYATID..-hs....usF.PL+RpstVs.........D.L.S.KVG.LSR............p.Qc.L.s.p+.......lssLs.QA................................................................................................ 0 1 1 1 +1425 PF03524 CagX cagX; Conjugal transfer protein Bateman A anon PRINTS & Pfam-B_5812 (Release 7.5) Family This family includes type IV secretion system CagX conjugation protein. Other members of this family are involved in conjugal transfer to plant cells of T-DNA. 25.20 25.20 25.50 26.70 24.80 25.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.32 0.70 -5.06 150 1580 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 797 15 335 1390 79 205.60 22 75.21 CHANGED Ds+l..........phhsYssspl.hplhsssGhsosIthpssEpl....ts...luhGD................ossWplss.............ptsplhlK..............................Ppp..ss.hsTNlslsTs+.....RoYthcLpup.spshh............................htVpFpY..Pp...............t......ttttttttsssshtss....hphss.................hNap.Yshp...Gsps..hpPhp.laDDGphTalpFsss.hp....hPslFhlss...pG.....p.cplVNh+lps........shllVcplhpp.hhLRhG....pp.sVsIhp..s ..........................................................................................................h.Ys.sthhplhst.shho.l.ht.sEp.l.............s.....lshGs.......sht..Wplts........................tsstlhlK..............................P..hp....s..hp.TN.lhl.hTs+...........RsYphpLtstptp..h.......................................htlpap...Y..Pp.............................t...tt.tt.t.t...t.p....t..sp..p..tt....h.p...tp....................................................hsap..Y.hp................ustt....htP..p.......saDD..G..phTahpFstt.ht.....hPslahlss......u............p.pphlN.h.phps...........shh.llcplhtp.hhL+hG.....pp.hltlhp.s............................................................................................................................... 0 62 186 252 +1426 PF03185 CaKB Calcium-activated potassium channel, beta subunit Mifsud W anon Pfam-B_2176 (release 6.5) Family \N 25.00 25.00 25.10 26.30 23.70 24.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.46 0.71 -4.94 10 225 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 48 1 119 207 0 174.90 38 79.42 CHANGED tK.+lsou.psGEDRAlhLGLuMlssSVlMaFlLGhTlLpsYhpSlhs-EusCsVLpssIt--.hpCsaoCGs-CcssSpYPCLQVaVNLosSGppuLLaasEEslcpNsK......C.YlP+CpcDppchpsplpslp-pF+cp...QsFsCaasPspppssVLLpRhYsppsLhHsLhWPolhLsGGsLIVlhVKLTQaLSlLsEch ....................................+hs.t.ptGEsRAlhLG..lsMh.ss....ulhhhFll.GhTlLpsa..h..p....S..l...s.pEupCol.lpsplh-p.......hpCsasCGs-Cpthu........pYPCLQV...aV......N.l..o....p...SG......p...p..s..lLa....asE.-sh......p......h.N..c..................CS..YlP.t...........Ct..c.shpcsh...spV.slpp..accp...psFsCahsspt..p..csVlLp+hYst.slhHslhWPsh.hh.h..sGslIlshshhsp.Lulhstt....................................................................... 0 26 31 56 +1427 PF00214 Calc_CGRP_IAPP Calcitonin / CGRP / IAPP family Finn RD anon Prosite Family \N 21.60 21.60 21.60 21.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.80 0.71 -4.11 13 313 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 68 21 149 310 0 106.10 29 83.67 CHANGED MsllphSshLss.lhsL..............pphsthpAsPhRpsL-shss..ss....cst.h.thlh.p.hp.php..ptppppt.....ttsts.......ucKRuCshuTCssp+LAchLsphss.hpsshuPss.luspuaG.RRRRul ...................................................................................................................h.........hh...................................t..h...................t....t......h.thh..t.........p..t.........tt..t...........t.ppp............hpKRsCshuTCssp+LuphLt+....s.s...t...pshsPTs.lGspua.G.++Rp........................ 0 9 21 58 +1428 PF04847 Calcipressin Calcipressin Mifsud W anon Pfam-B_4547 (release 7.6) Family Calcipressin is also known as calcineurin-binding protein, since it inhibits calcineurin-mediated transcriptional modulation by binding to calcineurin's catalytic domain [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.31 0.71 -4.71 9 405 2012-10-02 20:46:34 2003-04-07 12:59:11 7 4 237 1 233 376 0 174.90 37 76.73 CHANGED .pVFsssps+.........pphcsLapphscslpaphl+SF+RlhlsFss.psAtsA+hphc............t.phpG+p......l+haFu..Qs.ss..sssspaLtPPpssKpFLISPPuSPPsGW-..tp-ssP...ll...saDLhtALupLs....................................................tEchpl+sss.-ssPulll.....+sspst.pttt..............tscschspTpRPsh .....................................p..h.t.p.h.........pphcs..Lappa.s...c....h...oF.....phh.....KSF...+...R...lhlsFssspuAscARh....pL..+.................pp..ph.Gcc...............h+lYFu............Qs.s......................hs....p..pa......LtPP..p..s..sKQFLISPPuSPPlGWc.......tp-ssP....Vl...saDLlt.Al..u..+Lus.......................................................................................................uEp.hE....L+sss....-ssPollV........+sspsp..p.tpt.........................................htts.+..pl.h.pTtRP..h............................................................................................ 0 60 98 163 +1429 PF02029 Caldesmon Caldesmon Mian N, Bateman A anon IPR000075 Family \N 50.00 50.00 50.80 50.80 48.70 49.70 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -12.95 0.70 -6.07 10 234 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 39 0 77 177 0 296.00 32 83.25 CHANGED DDDEEEAARERRRRARQERLRQKpEE-shGQVo-psEssuQNSVssE-sKsoo.po...t.tsDDEAALLERLARREERRQKRLQEALERQKEFDPTlTDuSlShsS.RRhtND.su-NppsEKEE+pEuRppRpElEETEsVoKScQ+NsacDsE-ccp--+.+EccEEEc.c.tohtpN...............................................................................................................................................................................................................................................................p.hc-phcK-KtsK.-hKphhDtK+Ghs-.KuQNG..EhhT.KLKpsENsFS.s..tt....st.scsA.psEAG++LEEL+RRRsEhEsEEFEKLKQKQQEAAlE..LEELKKKREERRKVLEEEEQ++KQEEA-RKsREEEEKRRhKEEIERRRAEAAEKRQKhPEDGlSE-KKPFKCFoPKGSSLKIEERAEFLNKSlQK.SGVKsoHpsAVVSKIDSRLEQYTsAIE.GTKuuKPsKPAASDLPVPAEGVRNIKSMWEKGNVFSoPuusGTPNKETAGLKVGVSSRINEWLTKTP-GsKSPAPKPS.DLRPGDVSuKRNLWEK .......................................................................................................................................................................................................................................t.t...ttp.............t......p.c.p...p..tt........p.tp.p..t.............................p...t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...........t...................................................................ttt...t..........t..............................................so...h+l.-RsE.LN+ShpK....suh+.sp.s..lucIDphL-QYTpAht...ts+ts+..+.ss.-Lss...s..ltshKohaEtGps.....tsss.ss..K-............................................................................................ 0 3 9 28 +1430 PF05042 Caleosin Caleosin related protein Moxon SJ anon Pfam-B_5163 (release 7.7) Family This family contains plant proteins related to caleosin. Caleosins contain calcium-binding domains and have an oleosin-like association with lipid bodies. Caleosins are present at relatively low levels and are mainly bound to microsomal membrane fractions at the early stages of seed development. As the seeds mature, overall levels of caleosins increased dramatically and they were associated almost exclusively with storage lipid bodies [1]. This family is probably related to EF hands Pfam:PF00036. 20.60 20.60 20.60 21.10 20.50 20.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.04 0.71 -4.71 9 275 2012-10-02 16:17:27 2003-04-07 12:59:11 8 3 97 0 153 262 3 155.30 43 68.07 CHANGED +-pcphSVLQQHVAFFDpscDGIlYPWETYpGhRulGhshlsShhhulhINluLSYsThPuhhPS.hFPIaI+NIH+AKHGSDSusYDsEGRFhPsNFEpIFSKaA+TtPDtLThtElapMhcuNRsshDhhGWluupsEWhLLYhLA+Dc-GhLpKEuVRtsFDGSLFEpltK ...........................tthosLQpHluFFDpspDGlIaPh-TapG.hRsl.......GhshhhShhsshhIphsh..Sh.T..h..s.......u.......h..h..............Ps.Ph.F..sIYlcNIH...+uKHGSDous.YD.sEGRFlPtpFEpIFuKa..A.+s.t.s.......-tLThtEl.hphhpu...pRp...shD.hG.W....h.....u...uhhEWthhYhL.s...pcc..-G.h..l..pK-slRthYDGSlF.hlt.................................... 0 38 83 123 +1431 PF00915 Calici_coat Calicivirus coat protein Bateman A anon Pfam-B_202 (release 3.0) Family \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.92 0.70 -5.50 11 9404 2012-10-04 01:49:40 2003-04-07 12:59:11 15 15 8145 108 0 7359 0 130.20 53 60.76 CHANGED hhASsD........AssssDGsuuss.lsPEsss.ssshsh-PssuutsAsAsuGpss.lD...sWhts.aVpsshu..TlsPp...GclLaslpLGPcLNPYLuHLSpMYsGWuGuh-VRlhlAGsuhhAGKllhuslPP..ul-slossphshaPHVlhDsRpLEPV.hslPDVRNshaH.ss.sssTh+LVhMlYsPLhsssuus..sshsluspV.T+PSsDFsFhhLhPP..plEppopP...holPplohpphuN.Rasu.IsuhhlsPs..hshQ.pNt+hshDGphhG...hSssplsslcusls .................................................................................................t................s.s.Du.s.uu.ss..L.V.PElN.........NEsMALEPVsGAulAAPlu....G.Qp....NlID....PWIpN..N.FVQAP..sG.E.FTV..SPRN......uPGEl.L.hsh.L.....GPc.....L.....N...........P.....Y....L.....u.....H.....L....up....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +1432 PF01067 Calpain_III Calpain large subunit, domain III Finn RD, Bateman A anon Pfam-B_852 (release 3.0) Domain The function of the domain III and I are currently unknown. Domain II is a cysteine protease and domain IV is a calcium binding domain. Calpains are believed to participate in intracellular signaling pathways mediated by calcium ions. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.03 0.71 -4.56 91 1670 2009-01-15 18:05:59 2003-04-07 12:59:11 17 64 274 9 868 1478 9 140.30 28 20.07 CHANGED pW...phshhpGpW.h.....pGs...TAGGCpNa.t.........TFhpNP.............Qatl...............plp-s--p.t..........................sslll..........uLhQ..............KscRpp+p...........t.c.hs........IGFslacl..........t.plsppahhpp...tshspopsahNtREVspR.hpLsPGc..YlllPoTFcPspcu-F.hLRlFocptsptp ............................................................W.p.thhpGpW.h..........pGs.....oAG..G....C.p.N.h...t.................................TF...hpN.P.................................Qa..hl...............plpcssct.........................................slll..........uLh.Q................+s.p.+.p..tpp............................tt.phhs................IG.Ftlacs...................................t.ph..p.pp.h.hhtp.................tshspo.t.s.a..h...s...t..R....-..Vsp..c..hpL.....s...P.Gp......YllVPST....a.....cPpp.pu.cF.h..LRlaoct.....t................................................................................... 0 247 356 615 +1433 PF00748 Calpain_inhib Calpain inhibitor Bateman A anon Pfam-B_543 (release 2.1) Family This region is found multiple times in calpain inhibitor proteins. 20.80 20.80 21.00 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.84 0.71 -3.82 24 632 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 49 7 111 594 0 125.40 35 73.52 CHANGED lDuLSpDFosut...........st.ssuptcppcschssssu-slpts..ssssspouuPs.sp..psp....h.-DAL-sLusSLGpRpsDP-..-sKshtDcVKEKu.KcE+p-KLGE+--TIPP-YR.Lh-scDK-GKPhhPcs..cc .......................................................................................................................lDtLSpDFst........................t..sstthp..pp..t.....t...tshs.Esl.sts.......ssss.sp...ou.sP...sp..t..tsp................h..D.s.AL-sLusSL.G.p.+p.s.-.P-...-sps.ht-cVc..-Ku...KpEch-KLGE+--TIPP-YR.Lhssp.c..ccG+PhhP.....p................ 0 4 12 40 +1434 PF00402 Calponin calponin; Calponin family repeat Finn RD anon Prosite Repeat \N 21.10 21.10 21.30 21.20 20.70 21.00 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.67 0.72 -6.68 0.72 -4.59 5 1168 2009-01-15 18:05:59 2003-04-07 12:59:11 13 17 129 0 595 1070 0 25.20 59 20.32 CHANGED luLQMGTNKaASQpGMTuaGtsRall .....IuLQ..MGT..N..K.sASQuGMTu...aGssRpl......... 0 145 202 400 +1435 PF00262 Calreticulin calreticulin; Calreticulin family Finn RD anon Prosite Family \N 19.60 19.60 20.60 20.10 18.70 19.00 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.66 0.70 -5.82 68 1064 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 414 14 509 1004 21 294.80 45 68.78 CHANGED saFhEpF.ts......thpp+Wl.Sptptpt.......t....hGcaplpsu.thhsshptD+GLhhppcA+aaAISs+..hspPhssc.s..................................KsLVlQYpVKhp.psl-CGGuYlKLhssshctp.....p..hs....scT..sYpIMFGPDhCG.ssK.lHhIh.........shhtcpalhptsh.p.....ssph.THLYTLIl+PcN.oaElhIDscpspsG.sLhcD.....ac..h.PPKcIcDPp-cKPcDW-DcthIsDP.cspKPEDWD......................c.PchIPDP-ApKP-DWD....--hDG-WcsPhIsNP.....................cYKGcWps.hIcNPsYKGhWtPpcIsNP-YhpD..p.h.h...s.lsulGh-LWpspushlFDNlll ..............................................hF.EpFpps........hp.scWl.Sptcpp.................Gpaphpts...h..ss..httD..........+..GL.hppcA+aaAlSuc..h..s.sassc..s..................................+sLVlQapV+ap..psl-CGGuYlKLhssshs.p......p..hp....scoP.YtIM.....F........GPDhCG.ssK..lHhIh...............shhtcpHh..h..p...shtt.....hsDph.THLYTL..IlpPDs.oaplhlDspthpsG..s.L.pD.....as...h..PP+...cIcDP...pspKP-..DWD.-.+tpIsDP.pshKP-...DWD.................................p..P..chIsDP-ApK.......P-DWD....--.DG-WcsPhIsNP......................pYK.GpWcs..IcNPsY+GhWh..cIsNP-ahtD...p.h..hh..pshsslGh-lWp......h...pushlFDNhlI......................................... 0 167 255 377 +1436 PF01216 Calsequestrin Calsequestrin Finn RD, Bateman A anon Prosite Family \N 19.70 19.70 19.70 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.37 0.70 -5.69 3 208 2012-10-03 14:45:55 2003-04-07 12:59:11 12 5 67 13 84 623 10 299.80 55 93.70 CHANGED KsPaLhLAuLhLLlu...p.GsRGEEGLDFPEYDGcDRVIslotKNYKsVLKKYEVLALLYHEPluDDKASQRQFEMEELILELAAQVLEDKGVGFGLVDSEKDAAVAKKLGLDEEDSlYVFKGDEsIEYDGEFSADTLVEFLLDVLEDPVElI-Gc+ELQAFENIEDEIKLIGYFKSEDSEHYKAFEDAAEEFHPYIPFFATFDuKVAKKLTLKLNEIDFYEPFMDEPITIPDKPNSEEEIV-FV+EH+RPTLRKLRPESMYETWEDDLNGIHIVAFAEEuDPDGYEFLEILKpVAcDNTDNPDLSIIWIDPDDFPLLVPYWEKTFcIDLS+PQIGVVNVTDADSVWMEMDDEEDLPTAEELEDWIEDVLEGEINTEDDDDDDDD ......................................................................h...........hh.........s.u-c...GLp.....a..P...pYDG..hDRVhs.l.stKNaKp.sh.K.+.a.c.h.h.sl.hY.Hp.s.....ss.+s.Q+Qap......h.pEhhL.EL...........s.AQVh......E.....c.....+.....s..lG....Fsh..V....Ds..p..K.-..s..t..lAKK..LG.hpE..sSlYl..hK..t...-..c...h...I...E.aD.Gp.....h..uADsLVEFLhDl.l.E.D.P.V..El.I.p..sphElp.uF.-p..h..E..-..c...I...+..L.l.G.aF...K.sc-..S..-.a...Y..+.....A.Fc-A.AEc..FpPa...I...FF.A.TF.-.pt.VA....K.+L..sL...K..h.N.E............l.....D..F......Y......E.P......F....M.......-...E...P....l..s...I.P...s.K..P......o..E.c.El.V.pFl..ccHp.RsTL...R+L....c.s..-..s..MaEsW...ED.D...h.s.GhHI.V...A..FA.E.ctDP..DG.a.E.F.L.EhLKpVApDNTc...N...P.-.LSIlW.IDPDDFPL..........LlsYWEKTFpIDL..t.PQIGVVNVoD.ADSlWh..-hs.....s.......--.DLP......os-ELE-WlEDVL.pGclss-D......DDpcp..p................................................................................................................................................................................................. 1 20 26 47 +1437 PF03160 Calx-beta Calx-beta domain Bateman A anon Bateman A Domain \N 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.19 0.72 -4.05 32 5074 2009-01-15 18:05:59 2003-04-07 12:59:11 9 429 918 46 2189 4984 1811 97.30 27 17.36 CHANGED sslslh...D.sDts.hlsF-pspapl...EssGt.splpVsphu.GslppsVhVsapTtD.GT....Apu..GsDY..psspupLsFsss.pst.pplpls.l....lDD-lhEps.EpFhltLs .........................................................................................t...p-.......s....hh..........a....p.t.....s...p........h.....p..s........tE.....s......s......G.....s.....l....p....l.s.V....h....R.......ps....G..........s....................s....s....s..........l..........h....V..s..a..........p...T....t......s......G..o......................A..p.s...........G.........s.........D..Y...............ps.s.........p...u..........s....l........s.....F...t.s......G....co................p.....p.......plpls...I.....................l-.D...s...h...h...Ets..Es.FhlpL...................................... 1 821 1173 1655 +1438 PF01213 CAP_N CAP; Adenylate cyclase associated (CAP) N terminal Finn RD, Bateman A anon Prosite Family \N 27.00 27.00 27.10 27.80 25.90 26.90 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.97 0.70 -5.14 6 489 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 282 4 291 479 4 263.70 35 60.01 CHANGED hppLlcRLEpATuRLEuVs.......sshhRss..............GssssPou.....slAu........................sssslpAFcshhuphlutalclS+cluG-VtppuEhVcpAFpsp+slLpsAupsp+Psps..cLt-LLKPIsEpItcltshREcNRuSKhFNaLSAlSEuIshLGWVuVsPpPssaVpEMcDuApFYTNRlLKEaKcsD.cpV-WV+uYLslhs-LpAYIK-aHTTGLoWsKcG.susstoAhsussouusssP..........PPP....PPPPPP.ssshhppssEuspu.s.psuhuAlFApLNpGEuITpGLKKVocD.KTHKNPsLRspss.sso...PKs.hpsPsP ..........................................................................tllpRLEtsssRLEshs.................t....................................................s.........................ts...............................s...lp.sF.D.p.h..l.st.ls.palph...Spplu.u...V.......tp......p...........uphltpua.p.p+thLhhus.ptppPs.s.....thtplLp..Plsctlppltsh+.Ep.s.Rs..St.h.FN.H....LouluEulsuLuWls.......h...s......s....p......PssaVpEh.suApFYsNR..........VLK-aKc..p..D......pHV..-..Ws+uahpl.h.p.pLpsY....lK..paass.GlsWst.p.G...........h........t.........h...........s..............t...........s..s...s..s.....s...s..P.......................PPP.............Ps.P.PPP...s.ss...t..................t......s.t....t.........tsshu..AlFupl.N.p.G.ps.l.T.p.u.L++Vscs.hTH.KNP.sLRstss.s.stt....st................................................................ 0 91 146 225 +1439 PF04451 Capsid_NCLDV Capsid_Iridovir; Large eukaryotic DNA virus major capsid protein Kerrison ND, Coggill P anon DOMO:DM04206; Iyer L Family This family includes the major capsid protein of iridoviruses, chlorella virus and Spodoptera ascovirus, which are all dsDNA viruses with no RNA stage. This is the most abundant structural protein and can account for up to 45% of virion protein [1]. In Chlorella virus PBCV-1 the major capsid protein is a glycoprotein [2]. The four families of large eukaryotic DNA viruses, Poxviridae, Asfarviridae, Iridoviridae, and Phycodnaviridae, are referred to collectively as nucleocytoplasmic large DNA viruses or NCLDV. The virions of different NCLDV have dramatically different structures. The major capsid proteins of iridoviruses and phycodnaviruses, both of which have icosahedral capsids surrounding an inner lipid membrane, showed a high level of sequence conservation. A more limited, but statistically significant sequence similarity was observed between these proteins and the major capsid protein (p72) of ASFV, which also has an icosahedral capsid. It was surprising, however, to find that all of these proteins shared a conserved domain with the poxvirus protein D13L, which is an integral virion component thought to form a scaffold for the formation of viral crescents and immature virion [3]. 25.00 25.00 26.50 25.50 19.20 23.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.29 0.70 -5.33 37 809 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 193 9 0 474 1501 166.00 33 55.42 CHANGED ssshsssp..las-YlaLDstERpthupss+-YLIEQlQhtsppsh.....sssssspplcls...FNHPsKhLhashp....................................s..stsshsshhss....................s.ss.ssshh............................................................................................................sslpsupLhhNupcRFsppsup.YFshVQP.apphsps.Pss.......................GlahYSFuL.sssshpPsGohNaSRlcsspLplshps......................tsshssspshp.lhlaAhNaNlLRltsGhuGl .........................................................................................s..............................................................................................................................................................s...........................................u.Pshssshsshs.h...................................................................................................................shs.l.pslolhhcsh..s..h.lsp.h.ssc..ahoth.P.aaa..ussh.....ps......................sGhhhhoaAL..shpc.hpPSGphNhuRh..pp..hh..lshcss................................hs..shpp.hc.LlV.AsshNhh..................................................................................................... 0 0 0 0 +1440 PF05159 Capsule_synth Capsule polysaccharide biosynthesis protein Bateman A anon COG3562 Family This family includes export proteins involved in capsule polysaccharide biosynthesis, such as KpsS Swiss:P42218 and LipB Swiss:P57038. 20.00 20.00 20.00 20.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.64 0.70 -4.92 23 1766 2012-10-03 16:42:29 2003-04-07 12:59:11 9 20 670 0 306 1577 451 256.20 23 64.77 CHANGED -sllhaGsctshcthtth..........pppshshhplE-GFlR...ths.th..PholslDchGlhaDsspPsch-plLpp.................shhststhspspphhshlhppploKYs..................................hpht...p...sps+chlLlssQVhsDtulp.huss...shpshtplLpsstccsP.suplllKsHPtshutp+h............uhhsph.t.tctsplhscDsshhsLlcpsctVhTloSpsGhEALLpsKsVhs...hGtsaYusaGLTpDtphp.......ppptph........slt.Lhstshlpa...............shY .......................................................................................................................................hhha.s..p...t...h............sp.t.h...th...hh.hhE-GalR......................P.....hsl....sh-...chG.l....hsus..h.....P...p....ch.c...hlpp.............................................h.st..t...h.t...h...shth..h.p.h.l..hp..tp.h.s+Yp..............................................................................................t...........t........th.ppchl.Lls..hQ........l......sDssl...p..htss...............thpth...hchl..p..s.....h.t...p...c..s..P....puh..............l.l.hK...H..P..t.s..h..ut.pch.................shh.s..p..........t...hptt...h.h.h..s.c.s..s..sh....s.L.lcp.sctVhTls.S.p.sGh-A.LlpG+sVhs...hGhsaYs..thG..LT...p....ct...h........t.......tpp.ph.....................ht..Lhtss..l.hshY...................................................................... 0 70 165 239 +1441 PF00194 Carb_anhydrase carb_anhydrase; Eukaryotic-type carbonic anhydrase Finn RD anon Prosite Domain \N 20.10 20.10 20.10 20.20 19.70 19.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.50 0.70 -5.41 14 3112 2009-01-15 18:05:59 2003-04-07 12:59:11 16 24 954 506 1486 2849 68 215.10 26 65.40 CHANGED WGYscpsuspp........WsphaPlAs.....GpRQSPIsIpopcspaDPSLKPLslSYssuoup..plhNsGHohpVpF-Dops+oVlpGGPLs.us.YRLhQFHFHWG..uscppGSEHTVDGh+YsuELHLVHWN..sKYssasEAhspsDGLAVlGlFlKlG..stpstlQ+llDsLssI+pKGppsshssFDPssLLPs...shDYWTY.GSLTTPPLpEsVTWlVhKEPIslSspQltpFRsLLhsscs-pth......hhsNaRPsQPL+sRsV+ASF ...............................................................................................................................................W.........t.............s.........s.t.......G.pp...QSPI...s...l......p...p.p..t...h.............h.......s.....s.........p........h.................t..s..l...p..h.....p..h.............t....t.....pl..h..N.s..G.+..o......l........p......l...p...h.................p......s.................................................h............l......pG..s........h......t..p.....a..............p.............htp.....hH.a.Hh................................sS.EHpl.....s.G....p...p..a..shE...h..H...l....V..Hh................s........................................................p...s...s....s....l......u...V.lu.l......h...h......p......h.u..........p....p.....s.....s...................h....p....t..l...h....p..t....l......t....t....l......t................t.............s.............p......p...s.............h.....t....s.......h.....s....h...p.......p.L..l..Pp.........ptp..Y.....ap.YpGSLTTP.......P...Cs..E.s.VtWh...lh.p.p.s.lp......l.op...........p................Q..............l........t..t.h................p.p......hh.........................................................t.N.Rs...Q..httR.l................................................................................................ 1 360 612 1041 +1442 PF02977 CarbpepA_inh Carboxypeptidase A inhibitor Griffiths-Jones SR anon Structural domain Domain \N 21.50 21.50 43.10 42.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.80 0.72 -4.38 7 15 2012-10-01 19:46:11 2003-04-07 12:59:11 10 1 5 4 0 18 0 45.30 45 51.05 CHANGED spshlssCNc.CsopuDC.GhThC.........ta...C.+hppossGhshpthulhs ...oNsLGTCNcYCsTNuDChGlTLC.........sW...C.Kh+KosuGhshupCulhP... 0 0 0 0 +1443 PF00619 CARD Caspase recruitment domain Ponting C, Schultz J, Bork P anon SMART Domain Motif contained in proteins involved in apoptotic signaling. Predicted to possess a DEATH (Pfam:PF00531) domain-like fold . 23.90 23.90 23.90 23.90 23.70 23.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.56 0.72 -4.15 60 1660 2012-10-01 21:41:45 2003-04-07 12:59:11 16 127 129 33 789 1599 1 84.60 21 14.74 CHANGED pphlcppRhtLlpplt...lst...lLDtLhpcpVLspc-h-plpp...pso...pts+.sRp..Llch.l.p+GsputphFlpsLpc..tpstLsphlptp ........................................phlcppRhtLlpplp.......lpt...ll..D.tLhp.pp......lls.pp-h-plps.....pso....ptp+.scp........Ll.-h.l..p.+G...p....p.u....hphF.lpsLpc....tpstLhp.l...t.................. 0 218 269 437 +1444 PF01623 Carla_C4 Carlavirus putative nucleic acid binding protein Bashton M, Bateman A anon Pfam-B_808 (release 4.1) Family This family of carlavirus nucleic acid binding proteins includes a motif for a potential C-4 type zinc finger this has four highly conserved cysteine residues and is a conserved feature of the carlaviruses 3' terminal ORF [1]. These proteins may function as viral transcriptional regulators. The carlavirus family includes garlic latent virus and potato virus S and M, these viruses are positive strand, ssRNA with no DNA stage. 20.70 20.70 21.00 20.80 20.50 20.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.42 0.72 -4.24 10 194 2012-10-01 20:03:16 2003-04-07 12:59:11 12 2 43 0 0 207 0 89.70 45 82.40 CHANGED h+scslhplhcslacp.usshsh-lshsIlshsss+.lut..GRSpYARRRRA+SIGRChRCYRVaPP.hsFoo+CDN+TCsPGISaNh+VssFI ..........................................hhhhllhthh.ch.sshhshclC.VsIhp.hsucsVut...GRSoYAR+RRA.tsIGRChRC.YRVaPP..hpso+CDN+TChPGIShNh+VtsaI......................... 0 0 0 0 +1446 PF00997 Casein_kappa casein_kappa; Kappa casein Bateman A anon Pfam-B_1298 (release 3.0) Family Kappa-casein is a mammalian milk protein involved in a number of important physiological processes. In the gut, the ingested protein is split into an insoluble peptide (para kappa-casein) and a soluble hydrophilic glycopeptide (caseinomacropeptide). Caseinomacropeptide is responsible for increased efficiency of digestion, prevention of neonate hypersensitivity to ingested proteins, and inhibition of gastric pathogens. 25.00 25.00 32.80 32.70 19.00 17.90 hmmbuild --amino -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.93 0.71 -4.53 9 256 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 142 0 12 240 0 137.40 73 97.48 CHANGED ElQNQEQssCpEp-ERlFcpcpVhYlPlhaVLNpaPpYtssYYQpRsuls.hNN.ahsaPYYs+PlllRspAQIPpWQshPNh..........sTssR+PtPHPSFlAIPPKKhQDKTsIPsINTIAslEPTP..lPTs......EPsVNoVssPEASSE.I..STPETTTVsVTSsss .........................h.p+hs.YlPh.YVLspYPpYGLNYYQQRPVAL.INNQFLPYPYYAKP.lAVRSPAQhLQWQVLPNTVPAKSCQsQPTTMARHPHPHLSFMAIPPKKsQDKTEIPsINTIASsEPTs..TPTT......EAlVNT..Vss.....EASSEsI.tSsPETNTsQVTSTtV..... 0 1 1 1 +1447 PF00363 Casein caseins; Casein Finn RD anon Prosite Family \N 21.80 21.80 22.60 23.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.74 0.72 -3.35 15 200 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 83 0 28 249 0 76.40 34 45.23 CHANGED QlspLpphpp.phtls.spt....tp....hp...to..h..cpshholsQ...pphlphhpplhpa.p....hshhlpslhQYpcshh.Phsphts ...............................................................Q..pLsshps.pLsLs....hhps.......hcQ.....psh..o..hhs.pshholsQ.....+hLsh.ppls.a.Q....tsh.lQsh..h......................... 0 1 1 1 +1448 PF00302 CAT Chloramphenicol acetyltransferase Finn RD anon Prosite Domain \N 23.50 23.50 24.20 24.30 22.90 23.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.40 0.71 -4.70 12 768 2012-10-02 12:01:53 2003-04-07 12:59:11 13 6 590 61 135 572 473 191.50 33 93.28 CHANGED MsFshIDlssWsRKEaF-HYhs.spCTYShTsplDITshhtplKcpuhKhYPshIYhluplVNpapEFRhuhsss.-LuhWDphpPsYTlFHc-TETFSulWo.acsDFspFhpsY.sDltpYucshphFPKsshPENtFslSulPWloFouFNLNltssssYLhPIFThGKYhpcssKlllPlulQVHHAVCDGaHsuRFlNElQ .......................phlDhpsWpR+caFpaahp..ps.tauhTsplDlTt...hh...pt...hKpp.....shpFasshla.hls+shNp........htEFRhth.psp.plshaDplpP..sa.T.lh.p.pc.s.c..oFSslas...p.aps..DappF...hp...th.p...p-h.p.p.......htp.s...p...s.h......h.s.c..t...h....p.....N....h.h..l...SslPWlsFouhs...h.sh.s.s..sss..hh.h.Pl.hThGKah.p.c.s.s+lh.hPlulQlHHulsDGaHlupFhpclQ........................ 1 52 92 122 +1449 PF03123 CAT_RBD CAT RNA binding domain Bateman A, Declerck N anon P39805/1-60 Psi-blast Domain This RNA binding domain is found at the amino terminus of transcriptional antitermination proteins such as BglG, SacY and LicT. These proteins control the expression of sugar metabolising operons in Gram+ and Gram- bacteria. This domain has been called the CAT (Co-AntiTerminator) domain. It binds as a dimer [1] to short Ribonucleotidic Anti-Terminator (RAT) hairpin, each monomer interacting symmetrically with both strands of the RAT hairpin [4]. In the full-length protein, CAT is followed by two phosphorylatable PTS regulation domains (Pfam:PF00874) that modulate the RNA binding activity of CAT. Upon activation, the dimeric proteins bind to RAT targets in the nascent mRNA, thereby preventing abortive dissociation of the RNA polymerase from the DNA template [2]. 25.00 25.00 25.60 25.30 24.10 24.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.90 0.72 -4.28 15 2777 2009-09-11 00:54:21 2003-04-07 12:59:11 10 4 1579 7 291 1469 8 57.80 36 21.19 CHANGED M+IcKlLNNNslllp.pcpppEtllhGpGIuFpKKtGDhlss.stIEKhFhLcscc.cpp+a ............hhIpKlLNNNVVl.sp....s..........cp.spE.h.llhG+..GIuF..p..KKhG-h.l..sp...p..p..I..EK.hFh.lcscp..tt...................... 0 79 152 209 +1450 PF00199 Catalase catalase; Catalase Finn RD anon Prosite Domain \N 19.60 19.60 19.80 19.60 19.20 19.40 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -11.94 0.70 -5.78 121 5600 2009-01-15 18:05:59 2003-04-07 12:59:11 14 27 3265 302 1509 4794 143 346.50 44 69.89 CHANGED TsstGssls.sspsohss....Gs...p.G.PhLLpDhpLl-cls+FsR.ERIPERsVHAKGsGAaGpFpso.s-.....hophTpAshhppsGp....cTPlhsRFSTVuGppGSsDss..RDsRGFAlKFYTc-GN.aDlVGNNhPV.FFlRDshKFPchl..HutK.pPposh....pcssth........................aDFhutpPEuh.Htlthlhoc.+GhP..toaRphsGaGsHTaphlN.spGctt...aVKaHacsppGhcslsscEAtp.htupssDatpcDLapsI.cpGsaPpWplhlQlhs.p-spph....p.as.hDsTKlWPcpchPl.......hplGphsLN.+NPsN.aFu-sEQsAFsPusl.VP.......GIphSsD.hLQuR.hFuYsDop+aRLG.sNa.plPlNt.P.......hs...h.s......pDG.hth............t.......ss..tssY.Psshssst ..............................................................................................................................................................................TpstGh.ls..s.s..psohps..........G....cG..PhLlpDhhhhEc...........ls+F.......s+..E+IPE..R..hVH.A+G.u.G.A.a..GhFpsp..ps.........l..o.chTp.....Aphhpp..Gc..............pTPlhlRF.S.TV...............sGpp.....G.....os..Dsh...RDs.RGFA.......lKF..............Y...........T............-..E....G...........N...a...........DLVGNNh.................P.l.FFl+Dsh...KF................P...Dhl..................Hu....K..pPcspl......pststh..............................WD.....F...ho..h..p...PEuh....Ht.....lhh...lhS...D....RGlP........toa....R.p.M.pGa.G..s..HT..aphl.....N.....s........p................G.cth......aV.KF.Ha............+..................s.......p.p........G....l..........c...s.....L...s..............-..........EAt.........c..........l.............t...........u......p.D...-.....atp+DLapAI....c...p.....G.s.aP.c.WplhlQlhs..c..-t..p..ph......p..as..s.hD..hT..K..lWP....cc...phP.l....................h.VGchs.....L......N....RN....P....c....N...aFu....E....s....E....Q....s....AF.......sP...upl...VP.......G.lp.hSsD....hL.......Q.uR.lFuYsDspt.hRLG..sNa..plPlNp.P.....................tss...hts.p...+DG.hph.......t........ts.....tssY.Psphs...t............................................................................................................................................................. 0 417 840 1230 +1451 PF00666 Cathelicidins Cathelicidin Bateman A anon Pfam-B_276 (release 2.1) Family A novel protein family, showing a conserved proregion and a variable carboxyl-terminal antimicrobial domain. This region shows similarity to cystatins. 20.70 20.70 20.80 20.70 20.60 20.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.15 0.72 -3.82 8 193 2012-10-01 19:28:07 2003-04-07 12:59:11 12 2 56 5 87 189 1 65.00 50 41.10 CHANGED sLSYREAVLRAVDphNEpSSEANLYRLLELDPs.PpsDpDPsTPKPVSFRVKETVCPRTopQsPEQCD ............LSYcEAVlR.....AVDphNp+Ss-.sNLYRLL..-L.D.......P...P.............ptD.tcssos.KPVSFTVK....ETVCP+o...s.p..ps..sE..pCD................ 0 1 1 18 +1452 PF04731 Caudal_act Caudal like protein activation region Kerrison ND anon DOMO:DM04892; Family This family consists of the amino termini of proteins belonging to the caudal-related homeobox protein family. This region is thought to mediate transcription activation. The level of activation caused by mouse Cdx2 (Swiss:P43241) is affected by phosphorylation at serine 60 via the mitogen-activated protein kinase pathway [1]. Caudal family proteins are involved in the transcriptional regulation of multiple genes expressed in the intestinal epithelium, and are important in differentiation and maintenance of the intestinal epithelial lining. Caudal proteins always have a homeobox DNA binding domain (Pfam:PF00046). 22.40 22.40 22.40 25.60 21.60 22.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.21 0.71 -3.88 14 144 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 47 0 59 120 0 127.30 37 51.59 CHANGED MY.PuslRps.GlN.............LssQNF.....Vuu.PQYsDYsuYHH..VPsh....Dsps.pPsuuWussYusP.REDW.ssYususussush.s.......SP.uphua.sss-Ysshps..............susGlL.........ssssuustthSPuup...R+ssYEWMRKolts....susup .......................MY.Pus...lRps.uls.....................Lss.QN.a......sus.PQYsDasGYHH..hssh......-stt...tss....suWsusYusP.....REDW.ssYu.usssss.s..............os..u.hua.sss-asshts...............susGl.L..........ss.ssss.tt.SPss.p...R+s.apWMR+sstss.ss.t......................................................................... 0 3 8 22 +1453 PF00689 Cation_ATPase_C Na_K_ATPase_C; Cation transporting ATPase, C-terminus Bateman A, Griffiths-Jones SR anon Pfam-B_137 (release 2.1) Family Members of this families are involved in Na+/K+, H+/K+, Ca++ and Mg++ transport. This family represents 5 transmembrane helices. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.20 0.71 -4.75 141 8688 2009-09-12 06:26:15 2003-04-07 12:59:11 16 115 3188 71 3253 7683 303 180.00 22 19.21 CHANGED shPlsslQlLalNLlsDshs.uluLuh-ss.-...pslMp+..Pc.t.ppslhstthhhpl.hhhGhhhuhhslh...sahhshh.............................................h.......shs...........................................................upThsFssllhsplhp.shssRo.....tt..............shht.hshh..pN..hh.....lhhuhhhsl.hltlhlhah..P.........lpph.F.pss....sl.......................shtpW.........hhs.....lshuhhhhhh..sElh....Khl .............................................................................................................PLtslQlLal........NLlhDs.hsulALuh.....-..s...s.....-.................pc.lM..p..+....P....R...t.p.p.......s....l.......h......st.t....h.....htph...lhh..G..hh..u......hhs.hh...........s.a.h...hhhh......................................................................................................................................hh............shs...............................................................................................................spThhFss...h.lhs.Q...hhp..sh.ssRo..........pp...........................................shh.........hshh..............pN...hh..............lhhulh.hs.....h..hlt............h.hlhah.....P..........................hs.ph..F..pht.....sL............................s.h.tpW.........hhs..lshuhhhhhl...splhKh........................................................................................................................................................... 0 1051 1898 2639 +1454 PF03310 Cauli_DNA-bind Caulimovirus DNA-binding protein Mifsud W anon Pfam-B_3746 (release 6.5) Family \N 21.00 21.00 21.00 22.90 20.90 20.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.59 0.71 -4.26 3 30 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 18 8 1 30 0 102.50 43 89.24 CHANGED LAsItSEIcEllosQKohcu-IKAILE+lGSssP.pouLEoVAAKIIsDLTKcIc-CcCNKEIlEhLspp...DcQIIPoPcEc..tKtLuLsKYSYPNasVGNEELGSSGNPNALKWP.hctPps ..........hsph.KElu.cllos.Kshcs-IKAI...L-hluSps.s......p-sLEslAAKIlpD...lschIscC.CsKcll-tLtst......cppl...p.pct.......tpthshtK....YSaPNasVGNtpLGSStsPpALpWP.......s..................... 0 0 1 1 +1455 PF03233 Cauli_AT Caulimo_AT; Aphid transmission protein Bateman A anon Pfam-B_3118 (release 6.5) Family This protein is found in various caulimoviruses. It codes for an 18 kDa protein (PII), which is dispensable for infection but which is required for aphid transmission of the virus [2]. This protein interacts with the PIII protein [1]. 21.10 21.10 21.20 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.14 0.71 -4.53 6 35 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 22 0 3 37 2 136.50 49 73.75 CHANGED MS.shos.PHIYKK.-pllRLKPLs.lsSNsRpYhFSS..pKusIpuIhsHCNNLNpIVuRsaLtlsKl.SYFGLpKDsSEthSKsKsP..shFsshppIF+cGGsspcppsp.lcoLpEhpN......RItclpppsKcLs-p.l.scsLhKc.VKDhcEpLscIc-ulKsIIG .......................MS..lTupPHlYKK.-pIl+LKPLs.lsSNsRpYhFuS..sKusIpsIlsHhNNLNpIlGRshLtlhKlsSYFGLp........KDsSE............SKSKsP..SVF.ssucsIFKsGGsDh.ssphc.plcsLhEhpN........+IcpL-pthppLspc.I.scs.hcp.lK-hccplppIc-tlKsIIG............ 1 0 0 2 +1457 PF01693 Cauli_VI Caulimo_VI; Caulimovirus viroplasmin Bashton M, Bateman A anon Pfam-B_1373 (release 4.1) Family This family consists of various caulimovirus viroplasmin proteins. The viroplasmin protein is encoded by gene VI and is the main component of viral inclusion bodies or viroplasms [2]. Inclusions are the site of viral assembly, DNA synthesis and accumulation [2]. Two domains exist within gene VI corresponding approximately to the 5' third and middle third of gene VI, these influence systemic infection in a light-dependent manner [1]. 19.90 19.90 19.90 20.00 19.70 19.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -7.98 0.72 -3.93 212 1222 2009-01-15 18:05:59 2003-04-07 12:59:11 11 36 808 7 520 1128 69 43.80 36 16.86 CHANGED Ka..YuVtpG+p..s.........GlY.soW...s-scpp......VsGasuAp..aKuF.sohpEApta .....................a..YuVt..pG+p..s............Gla..soW....s-Cppp........VpGasuAp......aKsF.sohpEApta...................... 0 188 333 458 +1458 PF04771 CAV_VP3 Chicken anaemia virus VP-3 protein Kerrison ND anon Pfam-B_2147 (release 7.6) Family This protein is found in the nucleus of infected cells and may act as a transcriptional regulator. It induces apoptosis, and is also known as apoptin [SwissProt annotation for Swiss:P54094]. 25.00 25.00 25.10 41.20 24.70 18.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.63 0.71 -4.14 7 93 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 0 0 63 0 108.30 94 99.84 CHANGED MNALQEDTPPGPSTVFRPPTSSRPLETPHCREIRIGIAGITITLSLCGCANARAPTLRSATADNSESTGFKNVPDLRTDQPKPPSKKRSCDPSEYRVSELKESLITTTPSRPRTARRpIRL .............MNALQEDTPPGPSTVFRPPTSSR.PLETPHCREIRIGIAGITITLSLCGCANARAPTLRSATADNSESTGFKNVPDLRTDQPKPP.SKKRSCDPSEYRVSELKESLITTTPSRPRTARRCIRL............. 0 0 0 0 +1459 PF01146 Caveolin Caveolin Finn RD, Bateman A anon Prosite Family All three known Caveolin forms have the FEDVIAEP caveolin 'signature motif' within their hydrophilic N-terminal domain. Caveolin 2 (Cav-2) is co-localised and co-expressed with Cav-1/VIP21, forms heterodimers with it and needs Cav-1 for proper membrane localisation. Cav-3 has greater protein sequence similarity to Cav-1 than to Cav-2. Cellular processes caveolins are involved in include vesicular transport, cholesterol homeostasis, signal transduction, and tumour suppression [1]. 25.00 25.00 25.60 25.60 21.60 21.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.88 0.71 -4.62 21 305 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 100 0 153 266 0 143.40 45 85.68 CHANGED sc-ph...........................sctps...h..........p-.cptthshssRDP+plNsc.lKlsFEDVIAEP.suoHSFDtVWhsSassFploKYhhYRlLosLLulPlAhlhGllFAlLSslHIWhVsPhl+ohLh.lsslpplWs.shcshhsPlFpuhG+hhSulplplpcp ...................................................s......p.pt...h.hDscpt.thshssRDP+plNsc.lKlsFEDVIAEP.puTHSFDslWpsSassFploKYahYRlLoslhuIPhAllaGIhFAhLSFlHIW......hVsPslKohLI.t...lpslpplashhlcsh..ssPLFpulG+hFSslplphp+.-...... 0 40 53 98 +1460 PF02275 CBAH Linear amide C-N hydrolases, choloylglycine hydrolase family Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_5806 (release 5.2) Domain This family includes several hydrolases which cleave carbon-nitrogen bonds, other than peptide bonds, in linear amides. These include choloylglycine hydrolase (conjugated bile acid hydrolase, CBAH) EC:3.5.1.24, penicillin acylase EC:3.5.1.11 and acid ceramidase EC:3.5.1.23. This domain forms the alpha-subunit for members from vertebral species, see family NAAA-beta, Pfam:PF15508. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.94 0.70 -5.00 8 2070 2012-10-03 21:14:07 2003-04-07 12:59:11 13 9 1418 42 386 1606 83 285.70 24 86.92 CHANGED CTulshcspcsphlaGRNMDas......hshsppVI...lhPRsaslsh.cp.ss.hss+h..............AhlG.MGshhtshP.lasDulNE+GLuhAGLYa..ssaspap+ssccspssIssh.lhpaVLsNsoSV-EVKctLpphslVs.....sl..slh.hs.sLHahlsDtSGEulVI.EssK-.uLcVa-s.phGVhTNsPsa.cWal..TNLspYpulp.p..pshhhschclssaupGhGtlGLPGDhTPusRFlRsuahKtsh.cspsEstulsshFcILuossh.+GsVls.psp.chTlYToshssspGpYYachY-s.plptlsL.ccsL..............Dss-.hoa ..................................................................................CTuls.h..s..t.......p..s....p.hhhGRohD.at...........h.s..h.ss..p.lh.........lh.P..+...s.....h...p....h....st........p....t......s......s......p..........h......p..t..ph..........................uh.lG...h..u..h..s...........h...t.........s.h.......h.........h....s..D.G....hNEcGLu..h..ut..hah...........ss..h..up.a..t..p...t............p...t.....s.....p......s.....ls.....shph.l....palLsphsoVpEsc.pt.l..p.....p...h..p..l..ls...............................th.......st..h......s..........h..s...s.l.....H.ahlsD.tsGcolll.E..h..ps....t..........l.p.l........a............-s...........ht..Vh..TN...sPs.a..s.hph..tNL..p.pYh.hp.........................t..s.........h......s.s...........h......p.........h.................s.....t..t...Gh.h...s....L.P.G.s.hosssRFlRss.......a.h............p.......ts.............h...s.............p.............s................s............s............p............p............pu.l....sphapllpsssh...P.......h.......G....h......s.........h.........s.....t...........p........s..........p.......................p.......h.......Thaposhshppth.h.Yhcshts.p.l.htlphpp..........................h.......................................................................................................... 0 137 230 310 +1461 PF03914 CBF CBF/Mak21 family Wood V, Griffiths-Jones SR anon Pfam-B_3822 (release 7.2) Family \N 20.50 20.50 23.40 22.80 19.50 19.30 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.16 0.71 -4.79 111 959 2009-01-15 18:05:59 2003-04-07 12:59:11 12 15 309 0 670 933 15 173.80 25 23.17 CHANGED hsl.uL.hlh.t.lhpp....p.....slc........h.......scFYppLYphL..ssp...........................lhpss.+.pp.................................................hlpLLpphL..pssphsh..pplsAFlKRLhpl.ul.ptsssshsuhlhhltpLhpp...aP.shpsll.pspp.................t..................t...........t......................................................................................sYss......cc.....c-Pphs.s.A.po...sLWElphL.ppHaHPsVsphAp .......................................................................................sl.uL.hlh.....lhpp....p...................sl-h.......scFYppLYphL..ssp...................................hhpsp.+.tp........................................................................................hhlplLpthL.............pssp....hsh.........tRlsAFlKRLhpl.uL.......pt....s...s.ss.......hhu.hLhhlppLhpp......aP...phpsl..l..cppp.pt..........t......................................................t................................................................................................................................sY..ss.................ppc-Pphs...s..A.po.......sLWElp........hL.p....p.H.aHPsVsphA...................................................................................................................................................................... 0 233 374 558 +1462 PF02312 CBF_beta Core binding factor beta subunit Mian N, Bateman A anon Pfam-B_12381 (release 5.2) Family Core binding factor (CBF) is a heterodimeric transcription factor essential for genetic regulation of hematopoiesis and osteogenesis. The beta subunit enhances DNA-binding ability of the alpha subunit in vitro, and has been show to have a structure related to the OB fold [1]. 25.00 25.00 27.50 27.40 24.10 23.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.99 0.71 -4.64 8 133 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 83 10 77 145 0 147.10 52 84.21 CHANGED MPRVVPDQ+SKFEoDELFR+LSR-SEl+YTGYRDRutEERpsRFpNuCR-G+uEloFVAoGTNLpLpF.ssps.htp..............cc.lDFD+EhGKVHlKS.FIhNGVCVpa+GWlDLcRLDGhGslEaDEcRAppEDulh+csl-ptppRhpEFE-cpRta+csppsphpt......u.sl .............................MPRVVPDQ+uKFEs-EhFR+LSREsEl+YTGaR-RshEER.phR.F.NsCR-...Gcs...El.uFVAo..GTNLpL.F.ssp..htp..............cc.sDF-+EtG.KVaL+ushIhNGVCVha+GWlDLpRLDGhGCLEaDEcRAtpEDAhh..ppth...-p.hppRhREFE-ppRsa+pp.ps..pt....t.sss............................... 0 20 25 56 +1463 PF02045 CBFB_NFYA CCAAT-binding transcription factor (CBF-B/NF-YA) subunit B Mian N, Bateman A anon IPR001289 Family \N 20.10 20.10 20.10 20.40 18.70 19.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.13 0.72 -3.43 25 558 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 271 0 319 542 1 56.50 67 18.95 CHANGED --PlYVNAKQY+uIlRRRpuRAKhEt...pp+...................l.s+sRKPYLHESRHpHAh+RsRGsGGRF .......EpPlYVNAKQ........YHtIL+RRpuRAKLEt...ptK.............................................l..s.KsRK......PYLHESRHpHAM+RsRGsGGRF..................................... 1 91 179 258 +1464 PF00808 CBFD_NFYB_HMF Archael_histone; Arch_histone; Histone-like transcription factor (CBF/NF-Y) and archaeal histone Bateman A anon Pfam-B_1351 (Rel 2.1) & Pfam-B_3673 (Rel 7.5) & Pfam-B_2078 (Rel 8.0) Domain This family includes archaebacterial histones and histone like transcription factors from eukaryotes. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.82 0.72 -3.91 28 2710 2012-10-10 12:36:46 2003-04-07 12:59:11 18 35 501 23 1800 5141 81 64.60 30 31.69 CHANGED tpLPlAslp+IhKps..sst+..lup-ApchlscslpEFlphlsscAs.-hspcppRKTlps-DlthAl ................................h..LPhAp.lp+....I....h....K.....p.....s..........s....ssp............l..up....-Ah.h...l...............s.c.....ss..p.FIphlo...pc.........At....p...hs..p......p..p..p....R....K.......T.l..pspDlhhAh..................... 0 538 1012 1477 +1465 PF01656 CbiA CBIA; CobQ/CobB/MinD/ParA nucleotide binding domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_782 (release 4.1) Domain This family consists of various cobyrinic acid a,c-diamide synthases. These include CbiA Swiss:P29946 and CbiP Swiss:Q05597 from S.typhimurium [4], and CobQ Swiss:Q52686 from R. capsulatus [3]. These amidases catalyse amidations to various side chains of hydrogenobyrinic acid or cobyrinic acid a,c-diamide in the biosynthesis of cobalamin (vitamin B12) from uroporphyrinogen III. Vitamin B12 is an important cofactor and an essential nutrient for many plants and animals and is primarily produced by bacteria [4]. The family also contains dethiobiotin synthetases as well as the plasmid partitioning proteins of the MinD/ParA family [6]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null --hand HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.18 0.71 -4.90 79 20629 2012-10-05 12:31:08 2003-04-07 12:59:11 18 135 4514 62 5745 39422 11955 208.60 17 65.21 CHANGED lslsutKGGsGKTThussLsthLup..tGh+VhshDh.Dspssssthhh...............................t.............h...htpshtsh.....................................thhsh.hhhph...tthpt..h......................................hpthhpthhptt....aDhlllDsssulsph................hspslsssshlllshpsph..hu..ltuhtphhphhpph..............lhGlllN+ht.s..pthtp.hh.thh.h.hhh....hh.........thhscshtlsp ..............................................................................................................................................lslsstKGGV..GKTT....s.......s..........s.s....L....u.......t.......s.L...........u..............p........p............G..........h...........+............V........l...l.......l........D.....h.....D....s.....p....s....s..s...s...s..h.ht.h............................................................hh...................................................................................................................................................................................h.h.thp...................................................................h...l.p.p...h...l..p..p...hhptt.............aD..h....l..l..lD....s...s....s...u....l.s.th......................................................................................s.h.s.u....l...t.....s.....u......c..........h..........l.......l........l...s...h...p....sph.........hu............lp.u....h.....t..p....l.....h.....p....h...l...p...p.ht..................................h.h.h.s..l..l..h..s.h...h..t....t........pth..t..h......t.......h............................................................................................................................................................................................................................................................................................ 0 1917 3836 4923 +1466 PF02570 CbiC Precorrin-8X methylmutase Bashton M, Bateman A anon COGs Family This is a family Precorrin-8X methylmutases also known as Precorrin isomerase, CbiC/CobH, EC:5.4.1.2. This enzyme catalyses the reaction: Precorrin-8X <=> hydrogenobyrinate. This enzyme is part of the Cobalamin (vitamin B12) biosynthetic pathway and catalyses a methyl rearrangement [1,2]. 21.30 21.30 23.90 29.00 20.60 20.20 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.14 0.71 -4.89 155 1640 2009-09-11 02:55:36 2003-04-07 12:59:11 10 16 1546 15 459 1277 315 196.50 41 85.28 CHANGED sIh.pcSFthIcpEhs.....p..thsstp..tpllpRhIHuouDh-hsc..hlpFos....s..............ulpuuhsAL.psGu..sIlsDspMVtsGIspttL.t......ssplhChls-sclsphApptGtTRosAulchhtp..c.........hsuullsIGNAPTALhcLl-hl..cp..sts+PALlIGhPVGFVuAsESKctLtt........slPhIslcGR+GGSslAuAslNALlphsp ...................tI.ccSFtlIcsEhs........p.....th..s.sp.p..ptllhRhIHssuDh-.hs..c..pltFos....s..............slpsutpAL.....ps.Gu..........sI.lsDspMltsGIs+ptLs.t.......s.....sclhChlsDscVschAc.......p.tGhTRSsAAl.-lhtp...c...........hssu.lh.sIGNAPTALa+Ll-hl..c..p....s..s.s..c.PAhllGhPVGFVGAAESKct.Ltps.................slPalsspGR+GGSslAAAhlNALhh..................... 0 144 309 396 +1467 PF01888 CbiD CbiD Enright A, Ouzounis C, Bateman A anon Enright A Family CbiD is essential for cobalamin biosynthesis in both S. typhimurium and B. megaterium, no functional role has been ascribed to the protein. The CbiD protein has a putative S-AdoMet binding site. It is possible that CbiD might have the same role as CobF in undertaking the C-1 methylation and deacylation reactions required during the ring contraction process [1]. 25.00 25.00 35.50 27.20 19.40 21.50 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.51 0.70 -5.42 6 1191 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 1139 1 338 1049 276 250.80 40 67.94 CHANGED scph+pGYTTGosAoAAAhAAlhsLh.ucphcpVplpsPs.GhplplslEpsch.puppAhAslhK-uGs.DhDlTpGh.IhuEVphpsGp.-lhIpGGEGVGhVT+.Gl.V.hGEAAINssPR+hIccslpchls-...scGs.VsISlPcGEclApKThNs+LGIlGGISILGTTGIVpPhSscuacsSLs.plslAhApsac+llhssGspGp+aARchh.slsp-thlphuNFhGahl-cAtcpGsccIlLlGasGKLIKlAuG ..................................s...LRpGaTTGoCAsAAu+AA...hhhL....l....p.......t...p...............h..........c...p..Vp.ls..hP......p...G...........p.......lphs...l...........pp...........hph......p........s...............s..........t..........A......t...........A..ulhKDuGD.D.PDlT+GhhI..hu.....pV............p...............h...............................s.............s.........t..................s...........t.............l..............h................lpu..GpGVGpVT+...GLsls.l....G.csAINPsPRcMIp...p...s...l...p...c...h...h...s..t..........................stu....hpl..pIslPpGEclAp+ThNsRLGIlGGISILGTTGIVpP.h.Spp....Aa.hpSlpht...l..c..l..t.....p.A.p.G..h.....p.p..llhssGstuEchsp....p...........h.........h......s....l...........sp.psllphusFlGhhLc...t...st......c........h......s............hp......c......lhlsG.th.GKlsKlAuG............................. 0 118 232 295 +1468 PF01890 CbiG_C CbiG; Cobalamin synthesis G C-terminus Enright A, Ouzounis C, Bateman A anon Enright A Domain Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process [1]. Within the cobalamin synthesis pathway CbiG catalyses the both the opening of the lactone ring and the extrusion of the two-carbon fragment of cobalt-precorrin-5A from C-20 and its associated methyl group (deacylation) to give cobalt-precorrin-5B [2]. This family is the C-terminal region, and the mid- and N-termival parts are conserved independently in other families. 21.20 21.20 21.20 23.40 20.90 20.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.28 0.71 -3.86 204 1400 2009-01-15 18:05:59 2003-04-07 12:59:11 11 24 1369 5 399 1196 300 120.40 30 34.00 CHANGED slslGlGCc+ss..shppltpslppsLtp.......tslshpslsslAol-hKt-Es...ulhphApphs..hPlphassppLpt.t....hsss.SphVhpph.Gss..uVAEu.AALhuu............ssup.........LlltKpthss..............sTlAlAp ............................................................t.lslGlGC++s.s..shppltphlppsLpp............t.sl...s.tu.lps...luolclKtsEs...ullplApphs..........lP.hphass-pLpphp..........................h.ssSsh.Vt.cpl.Gss..uV.uEsAAlhss.............................s.sup..............................................LlspKhttss...............sThAlu................................................................ 0 127 262 337 +1469 PF02571 CbiJ Precorrin-6x reductase CbiJ/CobK Bashton M, Bateman A anon COGs Family This family consists of Precorrin-6x reductase EC:1.3.1.54. This enzyme catalyses the reaction: precorrin-6Y + NADP(+) <=> precorrin-6X + NADPH. CbiJ and CobK both catalyse the reduction of macocycle in the colbalmin biosynthesis pathway [1,2]. 28.60 28.60 29.00 31.70 27.50 28.50 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.62 0.70 -5.05 90 1417 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 1368 0 341 1152 320 240.60 29 86.82 CHANGED h+lLlLGGTsEAptLAptLssts..........lssshShAGRstp..Ps..sh.slRlGGFG.....GssGLssaL...cpptlshllDATHPFAuplSpNAspAspptGlPhltlcRPsWp.ts.......sDp.WhpVsshspAsphL..........h...s.pR.VFLslGRppLstF......tthsppphllRsl-ssps.....slsh.ssspllhsRGPFshp........sEpuLhcpapI-slVoKNSGGs.ustsKlpAARpLGlsVlhlcRP.slP.sstt....hsssscslsalt.ph ..........................plLlLGGTs-uptluptLstts.............lsssoshuuchtt.........st.....ss...tl.+sGsh.......shcsltpal.....cc.p...pl..c..hllDATHPaAsplSpsA.h........pA.................s...c...........ps.....s...........lPhlRhcRs...sh..t....t.............................scs..hhhVsshppAsphh..................ph.......s.pp...lhLTsGppsLsta.........thsppphlsRslssscs......plsh..stt..cllsh+GPF..oh-..............hptslhcp..h....phc...........slVoKsSGus.uhptKlpAAtchGl.sVlhlpRP...sls......s..t.ph.....hpshsphhphl..t.................... 0 105 223 288 +1470 PF01891 CbiM Cobalt uptake substrate-specific transmembrane region Enright A, Ouzounis C, Bateman A anon Enright A Family This family of proteins forms part of the cobalt-transport complex in prokaryotes, CbiMNQO. CbiMNQO and NikMNQO are the most widespread groups of microbial transporters for cobalt and nickel ions and are unusual uptake systems as they consist of eg two transmembrane components (CbiM and CbiQ), a small membrane-bound component (CbiN) and an ATP-binding protein (CbiO) but no extracytoplasmic solute-binding protein. Similar components constitute the nickel transporters with some variability in the small membrane-bound component, either NikN or NikL, which are not similar to CbiN at the sequence level. CbiM is the substrate-specific component of the complex and is a seven-transmembrane protein [2]. The CbiMNQO and NikMNQO systems form part of the coenzyme B12 biosynthesis pathway [3]. The NikM protein is Pfam:PF10670. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.60 0.71 -4.72 154 1775 2012-10-03 02:46:00 2003-04-07 12:59:11 11 5 1495 0 550 1442 303 202.70 27 84.38 CHANGED HIsDGhL.ssshssshhslusshlshuh.....++lcpp......t...lPh.........................lul.huAhhFssphlslPl..su..oosHhlGsuLhulllG.sasuhlshslsLllQAllFucGGlssLGsNshsMulsus.hsua.hhhp..................l.......hph.........................thhluuhl....u...uhlulhhuulhsultLuls.s..............................................shthshshhh.......hsalsl.ull.EGllTshllshlt..+hcPch ......................................HI.-GhLss.hshshh..s..h.s..hshlsh.ul.......h+lppp....hp....ppp.........hsh.........................hul......s..uA......hh...Flh..p..h...l..pl...Pss..sG.....ossHh....h..G....s..uLsull....hG..shssslh.hs...l..l.Llh.QA....L....L.h..u..c.G....G..l..s...s...LGs.Nshs..Mul.h.ushlua..hla+..h......lpch....................thphhlusFl......u....uhl..us.h.h.s.h.h....s....s....u...l...p.L.ulu..hs...t.........................................hshhthhsh.hh....hsplsl..uls.EGllTshlhshlt.pht................................................................... 0 185 372 476 +1471 PF02553 CbiN Cobalt transport protein component CbiN Bashton M, Bateman A anon COGs Family CbiN is part of the active cobalt transport system involved in uptake of cobalt in to the cell involved with cobalamin biosynthesis (vitamin B12). It has been suggested that CbiN may function as the periplasmic binding protein component of the active cobalt transport system [1]. 25.00 25.00 32.70 32.30 21.50 20.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.61 0.72 -3.98 12 603 2009-09-12 00:29:08 2003-04-07 12:59:11 10 1 593 0 137 328 12 71.60 44 67.62 CHANGED hpplllLlh.hhlhllsLll....h.hpcts.hG.uDspAEcsIpc.lsstYcPWFpPlaEPPSGElESLLFuLQAA ........h..phllllh..sl.h.lhllsLll.......h.....hh....s...c.sut..aGGoDspAEptIpp.l.s.spYcP...WFpP...l...a......E.....P.....s.....SGEIESLLFsLQu.. 0 47 99 122 +1472 PF02361 CbiQ Cobalt transport protein Bashton M, Bateman A anon Pfam-B_673 (release 5.2) Family This family consists of various cobalt transport proteins Most of which are found in Cobalamin (Vitamin B12) biosynthesis operons. In Salmonella the cbiN cbiQ (product CbiQ in this family) and cbiO are likely to form an active cobalt transport system [1]. 21.40 21.40 21.50 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.53 0.70 -4.79 21 6688 2009-09-20 21:39:57 2003-04-07 12:59:11 11 12 2806 0 1371 4611 488 211.40 20 79.22 CHANGED tsshlHclsschKLlhhhhhllhssls.shhsh.llhsllhhshhlsths......phhtphhhh.hlhhlhshlhlhh.................................th.sthlhssss.......................................................h.lhp.G..............................hhtulhlshRhhshlhshhh....LshTTsh.ElssulcclthPh.........lshhhhLuhRal.hlhcEhpplhpAppsRshph...p.shtpthpplshLlsshhlp.uhc+uEclshAMpuRGYss...tpsph ...............................................................................................t....hpplss.t.sK.l.h.h.h..h..h..h..h..h..h...h..h..h....s..........s...h...........h..h...h..h..h..h..h...h..h.h..h..h...h..h..h.h..s.th....................h..h.h...h..h.h..h..h....h...h..h...h..h.l..h..h..hh.hhhh...............................................................h....t..t..t...t..h..l...h..t..h.hh...........................................................................................................h....l..t.t.t.u........................................lhh.u......h..h.l...h.h.....R.h.h.s..h.....lh.hs.hl................hsh...TTss.sp.l.h.s....u.......l.p.p.l..h..hPh.............................................................cluhhlslslRalPhlhc-hppIt.pApcsR...Gh..ph..........t...t......s....h......h..........p....p......h.....+.......s...h...h..........l....l....h.PL...h.hp....uhc+u-plutAM-sRG..aps.........h................................................................................ 2 510 954 1206 +1473 PF01903 CbiX CbiX Enright A, Ouzounis C, Bateman A anon Enright A Family The function of CbiX is uncertain, however it is found in cobalamin biosynthesis operons and so may have a related function. Some CbiX proteins contain a striking histidine-rich region at their C-terminus, which suggests that it might be involved in metal chelation [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.11 0.72 -3.92 76 2846 2012-10-01 23:23:09 2003-04-07 12:59:11 12 22 1311 18 915 2680 601 103.40 22 64.24 CHANGED HGS.+c.scuspshtphs.phltcths..h.V.phuFl-hsp.Pslppul....p.plhtt.....Gsccl.......lllPhhL.hsG.hHscpcI......sp.......lp.....phptphs........................thp..l..hhupsLGscstlhplltp .................................................+Go.pc....sp....us...t..th....p....ph....s..p....tlt....p..........p.........h..........s.....h...................V..........c....h..........uF...l...........p........hsp....P...s...lppsl................p.ph.htp...............Gsp.c.l.......................lll..Ph.h.....L.hsG.hHh..c..c..cI.........st.......lp.........php.t.phs.............................................................h.p.h.thu...s.sL.....Gs..cstlhphl..t............................................................... 0 286 665 834 +1474 PF02262 Cbl_N CBL proto-oncogene N-terminal domain 1 Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3949 (release 5.2) Domain Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. Cbl_N is comprised of 3 structural domains of which this is the first - a four helix bundle. 25.00 25.00 25.10 25.80 20.10 24.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.68 0.71 -4.43 3 228 2009-09-10 23:26:51 2003-04-07 12:59:11 11 12 88 35 117 222 0 122.10 60 17.48 CHANGED .suosDKKhLEKsWKLMDKVVKLCQsPKLNLKNSPPFILDILPDTYQHLRLIaS+NEDpMclLp-NEYF+VFlENLM+KCKQsIKLFKEGKE+MY-ENSc.RRNLTKLSLlFSHMLuELKAIFPsGlFtG ..............ssD++sl-KsWKLMDKVV+LCQsPKLsLKNSPPYILDlLPDTYQHLRhI......h.S.+Y........-.....s......+....h.....t.....t.....L....u....-.....NEYF+lal-NLh+KoKpsIpLF.....KEGK.E+MY..-EpSp.RRNLTKLSLIFSHMLAELKAIFPs..GhFpG.................... 0 24 34 66 +1475 PF02761 Cbl_N2 CBL proto-oncogene N-terminus, EF hand-like domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3949 (release 5.2) Domain Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. The so called N-terminal domain is actually 3 structural domains, of which this is the central EF hand domain. 20.00 20.00 20.30 20.80 19.70 19.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.00 0.72 -3.91 5 227 2009-01-15 18:05:59 2003-04-07 12:59:11 9 15 92 35 122 226 0 81.50 68 11.39 CHANGED pFRlTKA-AcpFWRcpFGsRslVPWupFcotLspsHPlosG.hEAhALKoTIDLTCNDaISlFEFDVFTRLFQPWsTLLRNWQlLA ......................FRITKADAA-FWRctFG...c+....TI.VPWKhFRQsL+cVHsISSG...LEA.MALKSTIDLTCNDYISlFEFDIFTRLFQPWuoLLRNWphLA.............. 0 28 39 74 +1476 PF02762 Cbl_N3 CBL proto-oncogene N-terminus, SH2-like domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3949 (release 5.2) Domain Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. The so called N-terminal domain is actually 3 structural domains, of which this is the C-terminal SH2 domain. 25.00 25.00 39.70 38.00 20.10 18.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.87 0.72 -4.35 4 215 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 85 35 115 217 0 84.00 81 11.55 CHANGED sHPGYhAFLTYDEVKtRLQ+hhcKPGSYIFRLSCTRLGQWAIGYVouDGpILQTIPpNKsLhQALl-Ga+EGFYLYPDG+spNPDL ........THPGYMAFLTYDEVKARLQK.a.hpKP..GSYIFRLSCTRLGQWAIGYVTuDGsILQTIPHNKPLFQALIDG.REGFYLaPDGRshNPDL.......... 0 24 34 64 +1477 PF00734 CBM_1 CBD_fungal; CBD_1; Fungal cellulose binding domain Bateman A anon Pfam-B_444 (release 2.1) Domain \N 20.60 20.60 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.57 0.72 -4.28 133 1334 2010-01-08 14:00:54 2003-04-07 12:59:11 13 104 175 6 936 1332 9 28.80 51 7.32 CHANGED tau.QCGG...G..a.oGsTs..CsuGhsC..pthNsaY ......haG.QCGG..tG.......a..oGsTs...Cs.....u..G..h....o..C..ph.Ns.aY......... 0 387 592 830 +1478 PF02013 CBM_10 CBD_5; Cellulose or protein binding domain Bateman A anon PSI-BLAST P10476/668-713 Domain This domain is found in two distinct sets of proteins with different functions. Those found in aerobic bacteria bind cellulose (or other carbohydrates); but in anaerobic fungi they are protein binding domains, referred to as dockerin domains or docking domains. They are believed to be responsible for the assembly of a multiprotein cellulase/hemicellulase complex, similar to the cellulosome found in certain anaerobic bacteria. 21.00 21.00 22.00 23.20 19.50 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.07 0.72 -3.69 92 276 2010-01-08 15:14:06 2003-04-07 12:59:11 11 56 49 10 58 278 0 34.80 41 10.69 CHANGED s..Chspthu..YsCCss...s.tlhasDssGsWGlENs.pWCul .........C.sttpu..YPCCss...s..s.hsDssGsWGh..ENs..p.Csh.. 0 16 54 58 +1479 PF03425 CBM_11 Carbohydrate binding domain (family 11) Bateman A anon CAZY Family \N 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.16 0.71 -4.58 15 149 2012-10-03 19:46:52 2003-04-07 12:59:11 8 40 89 1 61 228 302 177.60 16 25.80 CHANGED suhshhl-DFEss.s.s..hh.hWto.ssssspsuoplss.....stsscuhplphs..sstuuashpVsasl-cu.......Das......................pauGlsF.h.......K...Guu+plc....lEIsDss......cs-lalsslssspo.WpplpIsFsshsp......sGhstcssh...DLccltu.lsFpspussu....ssapIDslcLh....stspspssps ...........................................................................................s......lDc...F-.....sh.........s............s........h..h....ahs...s.s.pssthphp.ts................stssps.....hp.hphs....sstsshh...ssh...ths..hs.tt................................DWS..................................................shs.ul.pF.hl.........+ss..Guuppls.........l.....pl..p.sss..................t..c.hah......t..p..hss....sts..Wp.plpIPFsshst.....s..ss...st...st...s...h.....clspltt...hsh...h.hssstt...........sph..h..lDslphh..............s..................................................................................... 0 38 47 54 +1480 PF03426 CBM_15 Carbohydrate binding domain (family 15) Bateman A anon CAZY Domain \N 20.30 20.30 20.30 292.50 20.20 17.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.03 0.71 -4.87 2 3 2012-10-03 19:46:52 2003-04-07 12:59:11 9 1 3 3 1 7 0 160.30 69 26.24 CHANGED lplDMssGWRGNuoG...pSGlThsuDGVoFsA.GDslGAVhDhh+PhpLEDAlIsMVVNVSuEFKAStAsLQ.hsQlKts...GEWsChAusp.hTAspDhTloCTlsEsDcKFNQTthDVQVGlQAKGTPsGslTIKSVTlTLA.tA.............YSAN IEVDMANGWRGNASGSTSHSGITYSADGVTFAALGDGVGAVFDIARPTTLEDAVIAMVVNVSAEFKASEANLQIFAQLKEDWSKGEWDCLAASSELTADTDLTLTCTIDEDDDKFNQTARDVQVGIQAKGTPAGTITIKSVTITLAQEAYSAN.............VDHLRD 0 1 1 1 +1481 PF03424 CBM_17_28 CBM_28; Carbohydrate binding domain (family 17/28) Bateman A anon CAZY Domain \N 25.00 25.00 37.00 31.20 22.00 21.60 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.32 0.71 -4.67 7 48 2012-10-03 19:46:52 2003-04-07 12:59:11 9 6 27 11 12 57 0 188.40 35 39.07 CHANGED VWu.EELSlSGEYVRARIKGhpYpP......I-RT...caocslWD..FNDGTpQGFslNuDSP.hpslslENsN..sAL+IoGLNs..SNDlo..EGNaWANVR..lSADt...WupshsIhGApcLTMDVIscpPsTVuIAAIPQSsstsWANPsRulpVp..ssFhppcDtpYKAhLTITstDuPslpsIApcscssshsNIILFVGo-s...uDVI.LDNIpVo ................Ws.ppLohSGpYsRuRIhG..Yts............p......hsphlhs..FpDGT+QGashsu-Ss.spss..lTIcssN...u.phhuhcs..spshs...sshWAs...A.....l.t-h....pups..lhhs.hLs...-hhscushslslAh.P.o.huh..WApsscshpls..schsp...tp..c..sp.chhhhhsh.D.splpshth.ssDohLpNlllhlussp...ustlalDNl+h............................................................ 0 5 11 11 +1482 PF03427 CBM_19 Carbohydrate binding domain (family 19) Bateman A anon CAZY Domain \N 20.10 20.10 20.10 20.10 19.60 19.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.14 0.72 -4.19 2 39 2012-10-01 20:20:38 2003-04-07 12:59:11 8 5 31 0 26 42 0 63.00 34 12.39 CHANGED hTTTuhsTAoSA.hsh.hAspSCSoQsphuCTusGpYslCsaGKWVsusCPsGslClsosp .........................................t.......................................hsGp..ossopGphACou.sGp...aAlCs.aG.sW.VhtpCsu.GTsCh....s... 0 8 15 24 +1483 PF00553 CBM_2 CBD_1; CBD_2; Cellulose binding domain Bateman A anon SCOP Domain Two tryptophan residues are involved in cellulose binding. Cellulose binding domain found in bacteria. 21.10 21.10 21.10 21.10 21.00 20.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.07 0.72 -10.63 0.72 -3.96 17 1472 2012-10-01 21:34:18 2003-04-07 12:59:11 14 210 411 18 667 1442 50 96.90 30 17.38 CHANGED spssYslsspWssGFsAslslpNsuosslssWolsash.s.GpplTpuWNAslosoGsshososhuWNuolss.....GuossFGFpGotsGus..sss.slsGss.C .........................pssasssssW....s..u.....G......a.s.u....p....lslsNsGs.s.sl....s.u.W.slsash...sss..pp....lo..ss.....W......s..u..s.....h.......o.....p....o.......G.....s...p.h...o.ss..s.....s..u..a.....Nu....s.lus.....G.u.o.s...o..h...GFp.u.s.t..s.u..s.........ss............................................. 1 261 536 651 +1484 PF00686 CBM_20 CBD_2; CBD_4; Starch binding domain Bateman A anon Pfam-B_111 (release 2.1) Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.41 0.72 -4.38 101 1301 2012-10-02 20:10:03 2003-04-07 12:59:11 14 99 591 106 739 1353 50 93.70 23 15.09 CHANGED plsV.sFps.ps.....sTshGpslallGslspLGsWss.spAlsLshst....ssshPhWps..sl.sLPs.Gs....shEYKal+.....t.ssss...sV.tWEu..u.sNRshosPsssssssss ........................l.pFpl..ph.....psthG......p.plhl.....sGs..hspLG....s...W.s..s.......p.p.AltLpht..................t..s.hWps.....sl...sl......ss...sp.................slc..YKalh.................h..sss.s......sh...hW.Es...s..sN...R.hthst.......sh.................................................. 0 308 491 631 +1485 PF03370 CBM_21 PRS; Putative phosphatase regulatory subunit Mifsud W anon Pfam-B_2433 (release 6.6) Family This family consists of several eukaryotic proteins that are thought to be involved in the regulation of glycogen metabolism. For instance, the mouse PTG protein Swiss:O08541 has been shown to interact with glycogen synthase, phosphorylase kinase, phosphorylase a: these three enzymes have key roles in the regulation of glycogen metabolism. PTG also binds the catalytic subunit of protein phosphatase 1 (PP1C) and localises it to glycogen. Subsets of similar interactions have been observed with several other members of this family, such as the yeast PIG1, PIG2, GAC1 and GIP2 proteins. While the precise function of these proteins is not known, they may serve a scaffold function, bringing together the key enzymes in glycogen metabolism. This family is a carbohydrate binding domain. 21.10 21.10 21.20 21.30 20.50 20.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.67 0.71 -4.10 75 714 2010-01-08 15:58:43 2003-04-07 12:59:11 8 10 243 8 460 671 6 108.30 31 22.53 CHANGED phppp.VpL...Eshtls.......ppslhGsltVpNluF-KpVtlRaT..hDsWcohp-lsupYhs...shtthsh....D..........................pFpFplsLsshht..........................................tpplpFClpY..pss.u....ppaWDNN.supNYplp ................................................t..ttp.VpLEph.hp.........ppslhGslpVpN.luF-K....pVtlRhT..h.DsWcohp.-ls..u.pYhp............sss..shD...........................................pFsFplslssh.h...............................................................ttplcFslpY.......ps.s..u........ppaWDNNsGpNYpl............................... 0 122 198 331 +1486 PF03423 CBM_25 Carbohydrate binding domain (family 25) Bateman A anon CAZY Domain \N 26.80 26.80 27.00 27.20 26.70 26.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.00 0.72 -3.90 30 440 2010-01-08 16:02:45 2003-04-07 12:59:11 8 47 133 10 176 429 14 84.90 26 18.88 CHANGED usslclaYNhssssLstpsclah+GuaNsWsps.uhs.chsc...t......usahpssl.plPppAhhlDaVFssG....sssYDNNsspDaphsls ...........................................sslslaYs......t....p..ps.h..s.t.psp...l..........ah+s.u...a...s....s...W...s....ps.......u..hs.p....ttp.................ssahpss.l.plPtp.Ah.t.l-alFssG.....ussa...DN...Ns...sp...Daphsl.s............. 0 61 121 159 +1487 PF02839 CBM_5_12 CBD_7; CBM_5; Carbohydrate binding domain Bateman A anon Bateman A Family This short domain is found in many different glycosyl hydrolase enzymes and is presumed to have a carbohydrate binding function. The domain has six aromatic groups that may be important for binding. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -7.97 0.72 -4.37 135 2927 2012-10-03 03:17:01 2003-04-07 12:59:11 9 197 1085 40 615 2508 403 42.20 30 7.84 CHANGED hssWsss.ps..Yss.G....-hVs.asG..phYpA.phasp.st...tsssst..........sW ...................ssWsus.ps...Yss.G......DpVs..a..p..G..psYpA..paWTp..us.........pPstss..........sW............................ 0 203 349 497 +1488 PF04942 CC CC domain Bateman A anon Pfam-B_4563 (release 7.5) Domain This short domain contains four conserved cysteines that probably for two disulphide bonds. The domain is named after the characteristic CC motif. 20.60 20.60 20.70 20.60 20.20 20.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.04 0.72 -4.07 29 101 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 6 0 95 112 0 35.30 31 20.03 CHANGED ssssshsCc..uss.....sPshs.GtCPsGhsllpushCCssc ..........tshsC+.......uss.....sPuls.GhCPsGhslltus.t.CCsp.t..... 1 41 43 95 +1489 PF01845 CcdB CcdB protein Bateman A anon [1] Domain \N 23.40 23.40 23.70 26.10 23.30 23.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.22 0.72 -4.09 12 537 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 413 26 75 272 31 98.20 43 98.60 CHANGED hQFpVYpshucut...YPhllDVQSDlIsplsoRlVIPLhsscph.t.+ssp+LpPllp.l--psallhTppMASlPsslL.Gp.VsDLospRppI+sAlDFlhsGI ............MQFpVYps+tcsp...a.hhlDVQSDIIcshspRhVIPLsshchhsp...+ss..ccLhPllp.lsscsahlMTp-hAoVPlssl..Gcc..VsDlS.p.+cspIKsAlDhhhpGI.............. 0 14 34 51 +1490 PF04995 CcmD Heme exporter protein D (CcmD) Bateman A anon COG3114 Family The CcmD protein is part of a C-type cytochrome biogenesis operon [1]. The exact function of this protein is uncertain. It has been proposed that CcmC, CcmD and CcmE interact directly with each other, establishing a cytoplasm to periplasm haem delivery pathway for cytochrome c maturation [2]. This protein is found fused to CcmE in Swiss:P52224. These proteins contain a predicted transmembrane helix. 23.90 23.90 24.10 24.60 23.50 23.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.06 0.72 -4.23 96 1201 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1174 0 214 605 209 48.40 37 73.35 CHANGED GsYuhYVWsuYulohlslshLlhholhpp+plhpclpctptRpt....ch .....GGYAhaVWhAhuhTllsLslLllpolhp+RtlLptltp.ppAREs.R.h.................. 0 45 106 160 +1491 PF03100 CcmE CcmE Mifsud W anon Pfam-B_2583 (release 6.4) Family CcmE is the product of one of a cluster of Ccm genes that are necessary for cytochrome c biosynthesis in eubacteria. Expression of these proteins is induced when the organisms are grown under anaerobic conditions with nitrate or nitrite as the final electron acceptor. 19.90 19.90 21.00 22.00 19.00 19.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.50 0.71 -4.58 135 1616 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 1542 4 402 1031 1646 131.90 46 83.52 CHANGED h..st.+++RLhllshslsuluhAsuLlLhAhppslsaFaoPo-lhpscs........s.sspplRlGGhVppGSlpRs..suhpVp.FpV.TDs...s.........ss.lsVpYpGILPDLFREGQGlVApG................ph.tssG....sFtAsEVLAKHDEsYMPsE ...............................................................RR+pR..Lh.llhullsululssuLlLaAL.p.p.NIsh..F...YTPuEl.l.tGct..........t.sGpRlR.l.GG.M..Vh.....G.S......VpRss........soLc..Vs.Fsl.......t....Ds....t.........................s.s..V..sVsYcG..I..L.P.D.LFRE....GQGVVspG........................pL.tpss.....phhAcEVLAKHDE.sYhPPE.............. 0 113 247 324 +1492 PF03918 CcmH Cytochrome C biogenesis protein Finn RD anon DOMO_DM01577 Family Members of this family include NrfF, CcmH, CycL, Ccl2. 25.00 25.00 25.10 27.10 22.30 24.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.83 0.71 -5.21 177 2053 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1387 2 373 1270 1631 139.80 38 68.57 CHANGED hlhhl..lhl..hh...................sh...sss.sDph.h...ss.....s.EpRhppLscpLRCsh.CQNpsIsDSsAslApDLRtpVh-hltpGcScppll-ahVsRYG-FVlacPshpstThlLWhuPhllLlh.GhhhlhhhhR++pttt..........................sLosp-p.....tcLppl.Lp ........................................................hhh...hh.lhL.hhs..................sut....tss.lDshpF..tss......tQcpphppLsppLRCPp..CQ.N..p..slsDS...N...A.l.AsD...hRppVYc.h.l.p.......EGKSc..pEIlcaMlsRYG.cFVpYsPP.....ls...st.TllLWhhPl......lhlllGuhll.h.tht.++pht...........................................t........................................................................... 0 93 206 291 +1493 PF03597 CcoS Cytochrome oxidase maturation protein cbb3-type TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 23.00 23.00 23.40 23.40 22.50 22.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.99 0.72 -4.50 127 978 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 954 0 277 677 88 44.60 39 69.38 CHANGED sllhlLIPlullluhlulhsFhWul+oGQaDDl-ssutRILhD.D- ................phlhhLIPlulllshlulssFlWul+oGQFDDl-t.uppI.LhD.D-....... 0 74 175 227 +1494 PF03150 CCP_MauG Di-haem cytochrome c peroxidase Mifsud W anon Pfam-B_3135 (release 6.5) Family This is a family of distinct cytochrome c peroxidases (CCPs) that contain two haem groups. Similar to other cytochrome c peroxidases, they reduce hydrogen peroxide to water using c-type haem as an oxidisable substrate. However, since they possess two, instead of one, haem prosthetic groups, bacterial CCPs reduce hydrogen peroxide without the need to generate semi-stable free radicals. The two haem groups have significantly different redox potentials. The high potential (+320 mV) haem feeds electrons from electron shuttle proteins to the low potential (-330 mV) haem, where peroxide is reduced (indeed, the low potential site is known as the peroxidatic site) [1]. The CCP protein itself is structured into two domains, each containing one c-type haem group, with a calcium-binding site at the domain interface. This family also includes MauG proteins, whose similarity to di-haem CCP was previously recognised [2]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.45 0.71 -3.93 173 2309 2012-10-03 10:02:11 2003-04-07 12:59:11 9 22 1444 47 631 1895 669 159.60 36 39.95 CHANGED sttp.spLG+tLFaDspLSts.......sslSCAoCHs.ss..........t..uhsD..stsh.......u.hG.....hs.s.................ph.......ssR..NuPolhN.suap...............................................h.FW.DGRs...............................s..s..L.cpQA......htPl..tsshEM......ut...s........................hppll.p+Lps.............st......YtphFppsFs....................................sps........lshpplspAlAsF.p+ol...hossStFDpal.....pG.-tsAl ...................................s..p+stLG+tLaaD...sR.LSts.......ss.lSCuoCHs..hs.............t..uhsD......shps........................o..hG.......hs.s........................................................................................ph........GshNuPTlhN.usash.......................................................................s.FW.DG.RA...............................................................s.sL.p-Q.A......tGPl..hsPhEM.............us.......s................................................hctll.t+lpp................................st.......YtptFppsas..................................................ps....hshc.s.l.scAIApFE+TL...lossS.FDcaL.cG.-ppAh.......................................................................................................................... 0 223 417 541 +1495 PF04505 Dispanin CD225; Interferon-induced transmembrane protein Mifsud W anon Pfam-B_2070 (release 7.5) Family This family includes the human leukocyte antigen CD225, which is an interferon inducible transmembrane protein, and is associated with interferon induced cell growth suppression [1]. 27.10 27.10 27.30 27.40 26.70 27.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.79 0.72 -4.44 68 837 2009-09-11 14:57:37 2003-04-07 12:59:11 7 11 210 0 406 837 11 81.30 26 49.85 CHANGED ss.usssh...............spsaLshuIhsol....hCsh....P...lGllAllaSspspsthttGDhsGAppsuppA+thslhuhlh....Gllhh..llh.l ...............................................................spD.a.ls..hulhssl...........hChh.......P.......lGllAhhaS.h..........cspsthhtGDhpuA.pphuppA+hhs....lhulhl.Glhhhlhh.h.......................... 0 97 146 220 +1496 PF01130 CD36 CD36 family Finn RD, Bateman A anon Pfam-B_1229 (release 3.0) Family The CD36 family is thought to be a novel class of scavenger receptors. There is also evidence suggesting a possible role in signal transduction. CD36 is involved in cell adhesion. 19.80 19.80 20.30 20.00 19.50 19.60 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.55 0.70 -6.19 52 1012 2009-01-15 18:05:59 2003-04-07 12:59:11 16 15 151 0 630 1015 58 355.90 22 82.78 CHANGED hhsluhllhlhuhlhhhh.asshhpphlccplsLps...sop.......sac.Wtps........Phslhh.p.........lYlFNhTNs--hhs..ut+PplpElGPYsY+.EhtpKhslpassss...Tloa..ppp+sahFpP-...hSsG.....stsDhlsssNlshlussthhpp..........hs.hhp...............................................hhlshhlpt.....stphF.hspolt-hLa..........GacDsl..lshhpp.........h...............spFGhhhs..cNso...sshaslhTGtc.shpphGh..........lppasGpsph..sh...............Wps.........C.s.....plpGo.-GohFsP..h.lpp.pcslhh..Fts-l.CRslp...ltappsss.hpGIpsa+atss.spshsssptpspstCa................C.tstt...........................Ch.sGlhslosCh....G..uP....lhhShPHFa..tAD.phhpslp.Glp.Pst-cHp.halslcPhoGhPlp.sptRlQlNlhlppsps..h.shhps.hs.shlhPlhWhcpss.tlscphhshl.ptlhhh.plhthhthshlslGllhlhhhlhhhht .................................................................................................................................................h...............................h.p.th.lt...ss.............hp..W..p.............sh.hhh..p..............hahashpNspchht....s..tp..lpphGPYsap....chh.+.sl.p......t..tp...s......plsa.....p.h.F..p..........hS.s.......p-.lhh.Nh...hhsh..h..htp.......................hhp..................................................................................................................................................................................hhhs.h.hp..........ttt.h..hp.....hss...t-hha..............Gaps..h......hphh........................................................th.u.hh.......hNso.......ss.hphhsGhp..sh....t.ph..s...........................................l..pa.p.s.p..ph......sh..............................ats................C.s................l.pG.o..suph.a.P........h...hp.....pp...l.h..a...........-h..C.....R...th......h.atpp........hp.G.l.s...h+ahhs..p.hs.s.s..t............st.sa................................................................C.......................................C...tGhhsls.C...........s......tP..................lhhShPHFh....us..........h..........h..p..t.l..p......G..........h....p....P.s.................cpHp...hhh.lpP...............h.....sGhshp.sth+hQhNhhlp..tt....................h....h.t....p.....h............t................hh...hPhhah.p...................th....h.s.tthhp.h.......h..h.......h......h.h...h...hh.h....hh.uhhhhhh...h....h........................................................................................................................ 0 248 319 521 +1497 PF04549 CD47 CD47 transmembrane region Mifsud W, Bateman A anon Pfam-B_2739 (release 7.5) Family This family represents the transmembrane region of CD47 leukocyte antigen [1-2]. 25.00 25.00 26.70 27.80 21.20 23.00 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.25 0.71 -4.22 13 120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 61 0 25 104 0 151.00 44 53.06 CHANGED hlIllhhhlhllLhWupls..hlohKh..sphc+hIhlhhssulllTlIslVGtalh.lssthshhphpGlsLI.lshhh.IhLphhla.htltho.phhIsllllQlluYlLollslsLslhsC.pslaG.LLI.sLhlIslhELhuLlhhhh...hsssp+shh ............hLlhIFshlsllLhWhphu..hhol+h.Ssshspph.hhLhVhGhllolIhlsGAhLF.hP.u.aol+phhGLhhhslsohhLIllphhsFhh..hhtho.shlhhIlhhQllGYILsVlGLuLslptC...VcGsLLlSGLuhIsluELhuLlahhh...hsSsQ+sh............. 0 1 1 4 +1498 PF03234 CDC37_N Cdc37;CDC37; Cdc37 N terminal kinase binding Bateman A, Wood V, Mistry J anon Pfam-B_3345 (release 6.5) Domain Cdc37 is a molecular chaperone required for the activity of numerous eukaryotic protein kinases. This domain corresponds to the N terminal domain which binds predominantly to protein kinases [2] and is found N terminal to the Hsp (Heat shocked protein) 90-binding domain Pfam:PF08565. Expression of a construct consisting of only the N-terminal domain of Saccharomyces pombe Cdc37 results in cellular viability. This indicates that interactions with the cochaperone Hsp90 may not be essential for Cdc37 function [2]. 26.10 26.10 26.60 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.52 0.71 -3.95 21 361 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 238 0 221 333 1 125.20 28 34.41 CHANGED slDYSKWD+IElSDDSDlEVHPNlDppSFIRW+QpsIHE+RppR+p-hcsLchphphsscLhpRlc+hLspLppp.t..s...........l.....t................................p........ppMh.sLh..l.....p.t.t..........sshhpplppHppKlcshpc-hppKLcELpKccpppIsS-Dl..HsGFspS ..............lDYSKWDtlElSDDpD.-sHPN...lDptShhRh+ppthhcchtp..hppchcplchpht...pthhtchpph.lpphp.p....t.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 68 108 175 +1499 PF02724 CDC45 CDC45-like protein Mian N, Bateman A anon Pfam-B_1919 (release 5.5) Family CDC45 is an essential gene required for initiation of DNA replication in S. cerevisiae (Swiss:Q08032), forming a complex with MCM5/CDC46. Homologues of CDC45 have been identified in human [1], mouse and smut fungus (Swiss:Q99107) among others. 34.00 34.00 36.20 34.90 33.60 31.70 hmmbuild -o /dev/null HMM SEED 622 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -12.88 0.70 -6.49 33 405 2009-09-10 22:41:12 2003-04-07 12:59:11 9 4 282 0 267 409 17 488.50 28 92.86 CHANGED .VlllVuh.-lDALCAs+ILssLhKpDhl.apllPVtGYs-L.............cpthpch..spshpp...........llhlsCGuslDLtshLphs............................tshplaVhDuHRPasLsNlau....................................ssplhlhsDssh--phpt....cc...hapth........................cs-s-s-..s-s-s-sssp......pc.........t.ts.ppttcpc.hphcRp.p...................................................++pp++.hccpcphlpthYppushaupSsuhhlYsLA.plu+s..st.-hLWhAIlGlosh.l...................tthstppYsph.hthLpcEVpRLssps.....................tssspos............................sshpIsh.p.-h+LhLhRHWSLY-ShhaSsYluu.+LcLW.o-pGcK+L+cLLA.+MGlsLsps+QsataMDhplK+pLtphlc+.uspaGLpclsht...............uFs+saGa+.splSAuDsVhulsALL........Ess........................................................................................t.....tt.tppcpcphhssFapAhDALs................ppsh-lLppGlphAptLppAIhpsssoll.-p.c.lpshpsF+hsllp-.u...PDlclFspPhsLscLupaLlcuhspp...pcppttt..............hPLVlAu.LcpspsshlVl.Gl..........................................................................s.tpptp...................tNpFuhAFppsuppo.sA+lphDsF-sulIEl+p-DLssFL-sLohtss ............................................lhlh.ss..-sDulCAs+lL.p.tLhpp-.l...aplhPltuht-l.............pp.hh.ph...........p.hp..............hlhlssGu.lDl...p...h...L.t..t...................................tth.halhDs+RP.h.slsNlas......................................pttlhlh.c.tp.c.p.......cp...ha.t......................................................ttpptpp..pptt..t..t.tt...................t..............................................t.pc..................................................................................................................................................................................................ppp....hp.hpt...ct.l...Yp.hp.aups.u.hh..........ap.L..u..hs+p.......p..-...h...LWhullGlos..l............................thh.t.th.t...h..lppcVp...phs..p.....................................tp.hs.............................pp.p.lth..p.-..................phhL.hpHWoLa-Shh.osahss.phphW..s........p.................G...........tc+Lpphl....A.cMG.lsLtp..spQpat......Mshpl+cplhphh.c..h..ash..pphh.................sF.hpaGap.tph.AtDhshshhulh........Es...................................................................................................................ttttthhtpFhtAhDuLs.......................s.p.L.tul..hAp...pul..ppstshl..pp.p...lhp.t.....ahhs.l.....-..u............s-.ht.hFsp.P.hsL.hLu.alhcshh.p........tttt..................................hPllhus..h.......s..p.pshhhll.Gh..............................................................................................t........................hN.Fu.AFppsstps.ss...p.hhhs.F-.shlplptp-hs.FhptL.....h................................................................................................................................................................................................................................................................................................................................... 0 106 156 224 +1500 PF02933 CDC48_2 cdc48_2; Cell division protein 48 (CDC48), domain 2 Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_799 (release 5.2) Domain This domain has a double psi-beta barrel fold and includes VCP-like ATPase and N-ethylmaleimide sensitive fusion protein N-terminal domains. Both the VAT and NSF N-terminal functional domains consist of two structural domains of which this is at the C-terminus. The VAT-N domain found in AAA ATPases Pfam:PF00004 is a substrate 185-residue recognition domain [1]. 22.40 22.40 22.40 22.60 22.30 22.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.88 0.72 -4.33 103 1049 2012-10-01 20:15:13 2003-04-07 12:59:11 12 16 512 49 687 1072 104 68.70 25 9.07 CHANGED thsss.ht.hltpphts..pslstGphl..............hh...sh......h..................sptlphtVhsspPss.............sl.hlscsTplpl.t......pcssph ..............................................hsss.hshhlp..h.Fhp...+PlppG-hh..............................hl....ph.t...........................hpsl.pFp.....VspscPss..................hs.hls..sT.lphp.....tcs...t...................................... 0 197 383 563 +1501 PF02359 CDC48_N VAT-Nn; cdc48_N; Cell division protein 48 (CDC48), N-terminal domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_799 (release 5.2) Domain This domain has a double psi-beta barrel fold and includes VCP-like ATPase and N-ethylmaleimide sensitive fusion protein N-terminal domains. Both the VAT and NSF N-terminal functional domains consist of two structural domains of which this is at the N-terminus. The VAT-N domain found in AAA ATPases Pfam:PF00004 is a substrate 185-residue recognition domain [1]. 25.10 25.10 25.10 25.10 24.90 25.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.64 0.72 -4.04 120 1252 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 680 48 754 1224 114 82.90 27 11.30 CHANGED pLpVscAh.pp....csspulsplsspshppLslptGDhlplpG.c+p.Ts.shVhsstsp....sp....tshIRhsshhRpNsslulGDpVpVcpsp.s ................LhVscs....p.......spul..ltlsssshppL.....t.l........h...pGD.s....l.hlpG..c+.......p..Ts.shVhsscsp.........st.....uplphst.....s.hRp.NhtV.p.lG..DhVsVp.ss..h............... 0 221 435 635 +1502 PF03381 CDC50 DUF284; LEM3 (ligand-effect modulator 3) family / CDC50 family Mifsud W, Kerrison ND anon Pfam-B_2846 (release 6.6) Family Members of this family have been predicted to contain transmembrane helices. The family member LEM3 (Swiss:P42838) is a ligand-effect modulator, mutation of which increases glucocorticoid receptor activity in response to dexamethasone and also confers increased activity on other intracellular receptors including the progesterone, oestrogen and mineralocorticoid receptors. LEM3 is thought to affect a downstream step in the glucocorticoid receptor pathway. Factors that modulate ligand responsiveness are likely to contribute to the context-specific actions of the glucocorticoid receptor in mammalian cells [1]. The products of genes YNR048w (Swiss:P53740), YNL323w (Swiss:P42838) and YCR094w (Swiss:P25656) (CDC50) show redundancy of function and are involved in regulation of transcription via CDC39 [2]. CDC39 (also known as NOT1) is normally a negative regulator of transcription either by affecting the general RNA polymerase II machinery or by altering chromatin structure [4]. One function of CDC39 is to block activation of the mating response pathway in the absence of pheromone, and mutation causes arrest in G1 by activation of the pathway [3]. It may be that the cold-sensitive arrest in G1 noticed in CDC50 mutants [2] may be due to inactivation of CDC39. The effects of LEM3 on glucocorticoid receptor activity may also be due to effects on transcription via CDC39. 26.70 26.70 30.50 28.00 18.90 26.60 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.00 0.70 -5.23 76 747 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 312 0 511 720 9 276.90 32 77.38 CHANGED l.....hhsupplp-lpl.cYsssstsss..................................................................sCplsFsls....pphcu.PlahYYpLsNFYQNHRRYVpShsppQLpGct.hs.sp.......tCcPhph........................pssc.hh...hPCGLIAsShFNDTash.....................sss..sssh....................shs.....ccG..IuWpoD..+pp+a+p..sph.p.t...........s.P.sW.t................................shp.ss.hPssp........pE-FhVWMRoAALPsFpKLat+h.......sssL.sGp.YplpIp.NY.....PVppFs.Gp..KpllloTsShhGG+N.FLGlsYlllGulshllulhh.llhahh.ps+.....phu-ts ...........................................................................h.hhsuppl.El.h.cYspsp..ss..........................p..............................................hCplpapl.s....p.ph.p....s.slahYYpLsN..FYQNHRR.Y..VpS.hs.spQLpGps..hshps..............psCpPhphs..................................................tssc...slhPCGhIAsShFN..D..Tash..................sssspth............................................shs.......ppG.IuW.o.D.+.t+Fpp.....sth.t.t...................................ths..Ps..sW..p.......................................................................................shp..ss..P.s.p............sE-ahVWMR...TAALPsF+KLYt+lt...................pssL.sGp.Ypls.I.p....s.a.............sVtpFs.Gp..KpllloTt.o.hh.GG..+N..FL...GluYlllGulshlhulhh.hlhhhh.h.p......p............................................... 0 183 288 415 +1503 PF02611 CDH CDP-diacylglycerol pyrophosphatase Bashton M, Bateman A anon COG2134 Family This is a family of CDP-diacylglycerol pyrophosphatases, EC:3.6.1.26. This enzyme catalyses the reaction CDP-diacylglycerol + H2O <=> CMP + phosphatidate. 25.00 25.00 26.70 26.60 24.00 23.90 hmmbuild --amino -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.54 0.70 -5.10 24 718 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 658 2 75 359 4 208.40 62 86.80 CHANGED ssDALW+IVpppClst.ppssssuPCspV..c.....ptGhVlhKDtsGshQYLLhPTt+loGlEuPtLhpsssPNaahtAWpARsahupchGpslP-ssluLAINSphGRoQsQLHIHIuClpPslpptLspts....sshsspWp.LPh.L.....pGHpYhA+pVsss-hspt..sPFphLsccl..tupscMuchululsshssss....FlLLsophchhshs.uSAEElQDHsC.plhc ..............................................................................sDsLRKIVhEpCLPs.Qpps.QNPuPCAEV.....KP....sAGYVVhK...D.hpG.P.LQYLLMPThRIsGhESPLLh-PuTPNFFaLAWQARsaMScKYG..pP......lPDpAVSLAINSRp..GRTQNHhHIHISCIRPDVRcQLDssL.....ssIoo..RWhPLPGGL...........tG..H-YL..ARRVTESELsQR..SPFhMLAEEVP.-AR-+MGpYGLAhVRQSDsS.....FVLLATpRNLL.T..LN........RASAEEIQDHpC.pIL.p.................... 0 9 33 55 +1504 PF03598 CdhC CO dehydrogenase/acetyl-CoA synthase complex beta subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.60 20.60 20.60 22.80 18.10 16.30 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.37 0.70 -6.01 22 260 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 146 27 85 256 20 281.80 45 67.47 CHANGED tl-IPVshuPsaEGERIR+sDMaVEhGGs+.o...uhELVpshph-E.lEDGKlpVlGPDIc-h-.EGs.phPluIll-luG+chpcDaEsVLERRlHcahNYspGlMHlsQRDhsWlRlSK-AhsKGhp.LcHlGclLhshh+pEF.sll-+lpVTlhTDtpcV......pchh.ppA+thYcpRDcRh+sLoDEsVDsFYuCsLCQSFAPoHVCllTP-RsuLCGAlsWhDu+AuhclsPsGPp.PItKGcslDsptGpapuVN-hltcpSpGsl-+lsLYSlhppPhTSCGCFEsIshhlPEssGlhlVsR-asGhTPsGhsFSTLAG.sGGGtQsPGFhGluhpYhpSpKFlpADGGhpRlVWMPKpLK-plp-pls.........c-hhDKIAsEpsuoo.EElhtFLccpsHPslsh.......-sh. ...........................h.h.hush.tGE.lRt.ph.hEhsG.p.....shEhl.h.....tp....htDtplplhG.-lt..p.h.......u.........s........hu..hhhpl..G.cth.p.D...hEs....lh..ERphH.hhshh...pGhh.+..h..sQ..R.....hhRlsKtshttGht.hpphuphl..hhhtch.shl-phplhhhTp..tth.......p.h...s...at.RDt+htt..h.--ts-.aYsC.hCQsFuPsplChlsP-R.uhCGuhsaLDAKAstclcPsGPsQslsKtpslDEphGtapsVNEhVpchSpGslEcVsLYSlhcpP.TsCGChEslhhh.P..sGhhhspRta..s.sP.Gh.FsthAu.huGG.p..GahGhsh..h.p.+hh.u-.GG..RhlWhst.hKp.l...h............phhthlssEp.s.t.ttl..al........................................................... 0 37 65 75 +1505 PF03599 CdhD CO dehydrogenase/acetyl-CoA synthase delta subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.24 0.70 -5.97 9 289 2012-10-03 05:58:16 2003-04-07 12:59:11 11 5 140 16 156 518 200 303.70 26 74.61 CHANGED EVslGs.......pcpsVhlGG-csLaR....a-hsh.N.sslshDV.D.h.st..Ksltcchpclhpp...at+hpVtc.hshDhlsI+thSsDP.......cchscslEcVhpulshPlllsu......DP-VLctAhEVscsc+sLLhuAsh-.sacchschAhcYspsVls.au.tDlsthKsLs+plhp.sGl..ccIVhDPsT.ts.G.GlchohsshhpIRhsAlcG.Dc-lsaPI.uhsssAahsc............u.hVs.............................................pP.ts-ssh...Gss-.cuPla.hTs.............................GlshulAGspl.......................hhhl.PshsAsL+thhEshTGhpshVGstDsutlhshl ......................................................ltlGs.......tppshplGG-s..s..L..ap...a..-tph....s.shluhcl.Dh.hs.p...t......pshhpth...pc....l..p..........plt..p..hts..-hl.sl.+..h.....h...u..s..c.s....................cchspslc.p.V.h.p.s.s.c..hPlllhus.....Dsplhctsh....csst...sc..p..sLl..h..u....Astp.....N....a..ct..hst.h.A.ht.as...t..s.....lhs.............u.................h......-..........l.......s....h..........h....c..........p..........L..........s..h..........h...........l...........t......p....hGl.............cclVhDs...ss.ts.......u......s....l..c.sh....s.sh..p.lRhu.A.lpt.Dc.lshP.h.h.s.h.s.h.p.sh....s.p..................................................................................................................................................................................................................................h................................................................................................... 0 73 128 142 +1506 PF02234 CDI Cyclin-dependent kinase inhibitor Bateman A, Mian N anon Pfam-B_1698 (release 5.2) & Pfam-B_5787 (Release 8.0) Family Cell cycle progression is negatively controlled by cyclin-dependent kinases inhibitors (CDIs). CDIs are involved in cell cycle arrest at the G1 phase. 20.70 20.70 21.20 22.00 20.00 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.62 0.72 -4.17 43 388 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 133 2 195 364 1 50.30 34 25.13 CHANGED Rshhh..Ps.cpElcc.aptt.cc.pcchpc...KaNFDFhs-pP.L..s..GR...Y-W.c..ls ......................thFs...Ps.p..pElp.c.hptthp..p....h..pcptpc...KWNFDFhs-pP.L....p..G..+....acWpcl.............. 0 39 84 138 +1507 PF03261 CDK5_activator Cyclin-dependent kinase 5 activator protein Mifsud W anon Pfam-B_4160 (release 6.5) Family \N 25.00 25.00 42.40 27.30 21.40 21.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.29 0.70 -5.11 5 198 2012-10-03 00:42:12 2003-04-07 12:59:11 10 3 77 10 122 170 0 210.00 37 88.33 CHANGED MGTVLS........LSPuS.....RpusLa--.tcs...GussLusYTusssuKuuKu..EKsL.......KRHShlIsALoWKRLVAutSsKKKsSKKuosN...............sSSuYpspltpLN+ENlcKS...........hPhs..h.LsssNhuoacp............................sPupssAPs....supLuuKss.s...............lus.csAPs.t.susGtoP+RVIVQASTSELLRCLG-FLCRRCYRLKcLSPuDsVhWLRSVDRSLLLQGWQDQAFITPANVVFVYLLCRDVlsG.E...luo-cELQAslLTCLYLSYSYMGNEISYPLKPFLVEssKEsFWDRCLslIsphSsKML+INADPHFFTQVFoDLKNEGsp- ....................................................................................................................................................................................................................................................................................................h.......K+.....h.s.hsh+.hs.....tt...t..t............................................................................................................................................................................................................................................................t+hll.QASTsELL+sLG.Flpp..............+C.........tl.pp.....hpssc.lhWhRsVDRuLLLQGWQD.uFls....PANlVFlYhLsR-.ltt.p....................tp.t-Lpu.hLTCLYluYSYMGNEISYPLKPFLl..-ss+-tFWpRCl.llp.hSspMLplNspstaFTplFt-LKt.....t.................................... 0 27 46 88 +1508 PF05174 CDRN Cysteine-rich D. radiodurans N terminus Yeats C anon Yeats C Domain This domain is found individually and at the N terminus of a few multi-domain proteins. 25.00 25.00 102.00 101.10 19.60 19.40 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.58 0.72 -4.50 5 6 2009-09-10 20:04:58 2003-04-07 12:59:11 7 5 1 0 6 7 0 55.30 73 18.45 CHANGED phastsLEQFSELRVRRNSTATRSIL.......RPAhCFALAPLA......KKLCHLFVKCSRLs ....astsLEQFSELRVRRNSTATRSIL.......RPAhCFsLAPLA......KKLCHLFVKCS+Ls. 0 6 6 6 +1509 PF03498 CDtoxinA Cytolethal distending toxin A/C family Griffiths-Jones SR, Bateman A anon PRINTS Family \N 21.60 21.60 21.70 21.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.12 0.71 -4.83 10 430 2012-10-02 19:42:32 2003-04-07 12:59:11 9 1 170 6 23 258 3 145.30 32 69.60 CHANGED hlSlhu.toGulLosathss+sal...Wshs.lcos-auch+s.phhsh.shGhlpF+Nsssus.Clss...htsGhhtst..Csss....shpplFsLlPosoGAVQIKS..lusGpClpsshssph....hhhslplscC..shup...psshsphWhIsPP...spsspP .........................................loIhu..oGhsLosathsstshl...Wshp.lsup.shtc.tRs.plh....s.saVphpNs.+sss.CLss......hts.Gh....hth...........Cpps......shtphF.....plhP.h.os..GAl....QI+s.....lsss..pClps...hsssh.....h.hpht.lscC.....s.th...ptshsp.ahlosP....htA..................... 1 3 15 19 +1511 PF00272 Cecropin cecropin; Cecropin family Finn RD, Bateman A anon Prosite Family \N 29.40 29.40 30.00 29.70 28.90 29.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -7.00 0.72 -4.39 28 182 2009-09-11 13:43:56 2003-04-07 12:59:11 14 1 57 10 56 210 0 30.50 49 49.92 CHANGED chhKKlE+sGpplRDAlIp....AuPAlsVlupA .KlhKKIE+lGp+lRDAhIp....Ap.AlsVlusA... 0 15 26 47 +1512 PF02927 CelD_N celD_N; N-terminal ig-like domain of cellulase Griffiths-Jones SR anon Structural domain Domain \N 21.30 21.30 21.30 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.04 0.72 -3.73 42 561 2009-01-15 18:05:59 2003-04-07 12:59:11 9 36 362 16 169 519 19 88.60 25 12.13 CHANGED sstsstsss...tl+VNQlGYhPsusKhAslsssuss.....s.taplhsusGss.VhsGpspst.uspss.....................SGppV+hlDFSshp.ssGs.YpLpl..sGt..pSt .................hs...........lhlNQlGYhspu.s.KhAslhss.psp......................s.pap..l......h...s...s.sspp...Vh.p.Gphp.t...ss.st............................st.pp..ht.h.lDFSshp.ssGp.Yhlph..ssh.....p..................... 0 100 148 161 +1513 PF03500 Cellsynth_D Cellulose synthase subunit D Griffiths-Jones SR anon PRINTS Family \N 21.30 21.30 21.70 21.80 21.00 20.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.94 0.71 -4.51 2 86 2009-09-10 23:58:10 2003-04-07 12:59:11 8 1 83 16 36 84 2 124.90 30 81.81 CHANGED FoLFLQsLSWEIDDQsGIEVRN-LLREVGRGMusRl.PP.CpTlcpLQIELNALLuhIsWGhVpLELLuE-QuhRIVHEsLPQVGSAGEPSGTWLAPVLEGLYGRWlTSQsGAFGDYVVTRDVDAEDLNuVPpQTIIhYMRsRS ..................hth.....................pshL+phGpplAppaPLPsucTlu-LEpslNtlLschsWGaVplc..sp-suLpltHpAhPh...s..stucst.t..pW...h...s...A.lLEGlYupWLpuQuG.u...............................thsh.t.t................ 1 1 9 23 +1514 PF03552 Cellulose_synt Cellulose synthase Bateman A anon Pfam-B_1346 (release 7.0) Family Cellulose, an aggregate of unbranched polymers of beta-1,4-linked glucose residues, is the major component of wood and thus paper, and is synthesised by plants, most algae, some bacteria and fungi, and even some animals. The genes that synthesise cellulose in higher plants differ greatly from the well-characterised genes found in Acetobacter and Agrobacterium sp. More correctly designated as 'cellulose synthase catalytic subunits', plant cellulose synthase (CesA) proteins are integral membrane proteins, approximately 1,000 amino acids in length. There are a number of highly conserved residues, including several motifs shown to be necessary for processive glycosyltransferase activity [1]. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 722 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.18 0.70 -6.24 15 1407 2012-10-03 05:28:31 2003-04-07 12:59:11 9 33 149 0 613 2401 43 403.60 33 73.03 CHANGED VDlFVSTVDPLKEPPLVTANTVLSILAVDYPV-KVSCYVSDDGuAMLTFEuLuETA-FA++WVPFCKKasIEPRAPEaYFutKIDYLKDKVpssFVKERRAMKREYEEFKVRINALVAKApK.....................................lP-EGWsMQDGTsWPG........NNsRDHPGMIQVaLG.sGucDl-GNE...............LPRLVYVSREKRPGYsHHKKAGAMNALVRVSAVLTNuPFILNLDCDHYlNNSKAlREuMCFMMDPslG++lCYVQFPQRFDGIDhsDRYANRNTVFFDINM+GLDGIQGPVYVGTGClF+RpALYGY-PPpspch.c.s.........ssCChGp+Kpspstppsp........................................p..pcp-pptshashp-l--s....h..s-.E+t.lhoQpslEK+FGpSslFlsSThhpp.....................GGlPc..sssPAsLlKEAIHVISCGYEDKTEWGKEIGWIYGSVTEDILTGFKMHsRGWRSlYChPKRsAFKGSAPINLSDRLHQVLRWALGSVEIFFSRHCPlWYGat.tRLKaLpRlAYlNohlYPFTSIPLlsYChLPAlCLlTGKFIlPoLoNhAulaFLsLFlSIhsTulLElRWSGVoIE-WWRNEQFWVIGGsSAHLFAVFQGLLKVLAGlDTNFTVTSKuu.s...tD--FuELYlFKWTTLLIPPTTllIlNlVGlVAGlScAINsGYtuWGPLFGKlFFuFWVIlHLYPFLKGLMGRQNRTPTIVlVWSlLLASIFSLLWVRIsPFlscssusshp.p .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hp.......t.......t.......G.......W.Shhh...........................................h....G.s.s.sh.t.....h.Q.hRW..G.h...............ph.hh.s.p.........s.....sh.....hh............s.......................t.h................th...a.......................s..h.........hhY.h.h.s......h...h........h.......t...........h................................................................................................................................................................................h....h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 59 399 517 +1515 PF03040 CemA CemA family Bateman A anon Pfam-B_1775 (release 6.4) Family Members of this family are probable integral membrane proteins. Their molecular function is unknown. CemA proteins are found in the inner envelope membrane of chloroplasts but not in the thylakoid membrane [1]. A cyanobacterial member of this family has been implicated in CO2 transport, but is probably not a CO2 transporter itself [1]. They are predicted to be haem-binding however this has not been proven experimentally [2]. 20.80 20.80 21.00 21.00 20.50 20.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -4.75 53 734 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 614 0 83 652 29 213.40 50 80.88 CHANGED hu+h.pshsSlpYLhhL...lhlPhhlshhh+phhlpPhlspaW..NspQsplFLNs.QEcpALc+hpchEEhlhhDphl.p.....tsphp.pclphpI+ccslpLschasp-ulpsIhplhoslluhshhsshhlhG+cc.LtlLpSalpEhlYuLSDThKAFhIlLhTDlhlGFHSPHGWEllIsulhcHaGhscNcp..hIshFVuTFPVILDTlFKYWIFRYLNRlSPShVsoYHsMNE ..................................................t+hKshsslhYLss..l...VhLPWhlshs..hpKs.LEsWl.s.N...WW.....NTtp...Sc...hh...lstlQEc...s...hLc+F.hcl..EELhlL-cMl.c................-....h.s..pTH...........Q...c..l..c..ItI..HKETIQLlchaNE-p.lchIhHh.TNlIsFshlSuah..............IL...Gpcc.LsILNSWlQEFhYsLSDThKAF.ILLlTDLhIGFHSsHGWELhIsulhcc.aG..hs..cN-p..IISsLVSTFPVILDTIhKYWIFRaLNRlSPSLVVIYHSMN-..................................................................... 0 16 53 72 +1516 PF03879 Cgr1 Cgr1 family Wood V, Bateman A anon Wood V Family Members of this family are coiled-coil proteins that are involved in pre-rRNA processing [1]. 22.50 22.50 22.60 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.22 0.72 -10.74 0.72 -3.88 18 165 2009-09-10 18:08:07 2003-04-07 12:59:11 9 5 161 0 121 151 0 105.60 41 66.03 CHANGED pst..scGhRlNGKsW+spKcsFRssu.......phTSaEpRtpcRhpppthKt+EKELK-EKEspRp....p+IptlK-RRttKEEKERYE+MAtKMHtK+VERh+RREKRNKhL+p ...................s..scGhphsGKsW+s.s.Kp..s.FRsps.....................t.hTSaccRhpcRtptpthKp+pKElK-EKEs-R.p.....p+Ipt.l+-RRttK.EE.+....E.R.Y.E+hAt.K......M.H.t.K+...VERhKR+EKRNKhLpp.......................... 0 35 66 103 +1517 PF04752 ChaC ChaC-like protein Mifsud W anon Pfam-B_3722 (release 7.5) Family The ChaC protein is thought to be associated with the putative ChaA Ca2+/H+ cation transport protein in Escherichia coli. Its function is not known. This family also includes homologues regions from several other bacterial and eukaryotic proteins. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.26 0.71 -4.32 15 1371 2012-10-02 16:39:48 2003-04-07 12:59:11 7 12 1051 0 511 1064 243 169.70 37 77.62 CHANGED lWVFGYGSLlW+PuFsas-phhual+Ga+RpFh.ssscHRGTscpPGRVsTLhcs.t.........................utshGsAY+lsutph.ttslpaLp.REh..sG.ptppl.hh...................psssspssh.psllaluos...pNptYhGss.sl-chAp..............pIssAsGsSGsNt-YLFpLtctLcpls............................hcDcaLhcLtptVcct .................................................lWlFGYGSLh........W...p.....Psh.....t.......as..E......ph........s.up.ltGa+RsFh.h.............t....sp....s.tRG..Ts..cpP..GRsluLcc..u...............................................................GpshGl..AYRlst..p..p.h..cp..t.lphLhcREh...hs.....t.phlshah.............................................................p..ss.st..h......pA......lsalhs................cpspYt...ust...sh...pslAt..............hIusAsGs.GsNt-YLhpLtptLpphG..............................hpDctLppLhttVt..h.................................................................................................................................................. 0 128 251 387 +1518 PF00195 Chal_sti_synt_N Chal_stil_synt; Chalcone and stilbene synthases, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The C-terminal domain of Chalcone synthase is reported to be structurally similar to domains in thiolase and beta-ketoacyl synthase.\ The differences in activity are accounted for by differences in this N-terminal domain. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.48 0.70 -5.11 21 3069 2012-10-02 12:25:54 2003-04-07 12:59:11 14 18 1289 101 485 4736 327 174.40 49 54.42 CHANGED sslEph++AQRA-GsATlLAIGTAsPsNsVsQusYPDYYFRlTpSEHhs-LK-KFcRhC-+StIKKRYhaLTEElL+cNPslCsahuP.SLcsRQ-IslsEVP+LGKEAAtKAIKEWGQPKS+ITHLVFCTTSGVDMPGADYQLo+LLGLpPSVKRlMhYQQGCaAGGTsLRLAKDLAENN+GARVLVVCSEhTslsFRGPS-s+...hDsLVGQALFGDGAAAlIlGoDP ................................................................................................................................................................................................................t.....h....h..........p........h.h...t..t..............s...lppRh..hh.....h..................................................t.......................h...............................................s...s....h.....p.Rp.c...h...h..h.....c...s..s..c.L.up.-.AAhc.Al...c.....E.....W......G.....t.....s..t.....o....cI.TH.l.l.h......s.T..o..o..G...l.s.......h....P..G..s..D.h..p.Ls.p.hLG..L.+..s.oVc..Rlhh..a..p....G..CaAG.us..........s..L.R.h.A.K.D...L....A....E....N....N....t....G....A..R.V.LV..VC...SE....l.T....u..l..o.....F...R......u.........P....s...-...s...+..................L.-u.L....V.......G..pA.....L...FGDG.A.A.A.l.I.V.GuDP............................................................. 0 109 314 416 +1519 PF02797 Chal_sti_synt_C Chal_stil_syntC; Chalcone and stilbene synthases, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain of chalcone synthase is reported to be structurally similar to domains in thiolase and beta-ketoacyl synthase. The differences in activity are accounted for by differences in the N-terminal domain. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.90 0.71 -4.29 21 3003 2012-10-02 12:25:54 2003-04-07 12:59:11 10 16 1211 100 498 3983 257 132.70 50 39.80 CHANGED ELVSAuQTlLP-ScGuIsG+LpEhGLTFHLh+DVPtlIScNIEcsLhcAFsPlGIs......DWNSlFWlsHPGGPAILDQVEtKLsLcPEKLcAoR+VLSEYGNMSSACVlFlLDEMRKpShccGtsTTGEGL-WGVLFGFGPGLTlETlVL+SVs .........................................................................phhtuuQTllPDS-...uA.IsG+.L.R..E.s..G.....L..T.F.H.L..h.+.-.VPsLISc...NI.c...c..s..L.......sc.A..F....p......s...l......G..I.o.................D....W...N...s..l....F..W..I.u.HPGGPAILD...pV..Et+L.......sL..c..c.....K.lcuoRcV...L.uEYGNMS.....SAs..V.LF.l.L.D.-.h..R.c.p..........s..........h.....p....p.......t.....................t.......p..........................h.....c....hG............l...hhu.aGPGhshEhh...................................... 0 132 331 436 +1520 PF02431 Chalcone Chalcone-flavanone isomerase Mian N, Bateman A anon Pfam-B_2073 (release 5.4) Domain Chalcone-flavanone isomerase is a plant enzyme responsible for the isomerisation of chalcone to naringenin, a key step in the biosynthesis of flavonoids. 23.00 23.00 23.10 23.10 22.80 22.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.18 0.71 -4.92 60 585 2009-09-11 05:35:35 2003-04-07 12:59:11 10 8 320 25 257 589 20 182.60 24 69.08 CHANGED FPss.l.p.ssss.........................pshhLlGt...................GlRslshh...tlKhhulGlYlpss.........slspltsphpucssp....................................-hpcu.phacsllsu.sh..cphhRlshl+s.lshpphp-tlscslhuphpths....................hs......-ppspAlccFpph...F..psc.shPtGsslhhph..sssG.s......Ls.luasppsp...............tpthuslcsch........lucslhppYl.GcpslSPss+cSluppls.sl ............................................................................................pth.LhGh........................................G..hRshph.....lphhulG.............lYlptp...............sl.t..h..tt..hhtp.st....................................-.tps.thapslls....s....s...h...cphh+lshl.hs.lshpphp-thtcshhsphpphsh............................p...-sttpul...cc..Ft.ph.....F....psc.shs.Gsslhhph.sssG..s..............Lp..lshs.t..................tp.thuslpsph...........lscslhp.al.Gcp.ssS.ts+pslspth.t......................................................................................................................... 0 78 167 221 +1521 PF03502 Channel_Tsx Nucleoside-specific channel-forming protein, Tsx Griffiths-Jones SR anon PRINTS Family \N 20.50 20.50 21.40 20.80 20.10 19.30 hmmbuild --amino -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.70 0.70 -4.57 44 1502 2012-10-03 17:14:36 2003-04-07 12:59:11 8 2 831 6 162 633 25 237.30 33 88.51 CHANGED tsshpWhphshhhu.t.phs.....pps-....csYLEhEaGG+suhhDLYGYlDl.slt..sspsscp...ttt.sphFhchtPRhSlDthoGcsLuaGPhpElYh.......us.hsast.............................s.ss....hhhGlGsDl.sshhsphshNlaupYshpsh..............pasGaphp.ssWhtPahhhtssshloapuah-apFutcpthts.........psssGhshhhulaW+.o-ca.......ssGhsh+hacshh.htst...............huhcooGhuaYhslsYcF ..........................................................................................................................h.p.h.apShslhss.p.chu.......phss......psY.LEhEsuuphsWh-hYGahDh.shhs...tpspst..........s.oshahchcPRhSIctlsssp.hsFGPap-hYh.......As.s...h.as..............................cpss....ahhGlGhDl.oGhshhhphNlYt+Yp.psY...............pasG...aphp..ltahsshs.hhhsupphohpuas-a-asp...Dtu.sssp...........tspsul.s.uuhh..Lths..hs+a.......phulshRYacsththt..s.............hsh.cs.hG.uhhhslsYpF.............................. 1 24 50 109 +1522 PF03924 CHASE CHASE domain Yeats C anon [1] Domain This domain is found in the extracellular portion of receptor-like proteins - such as serine/threonine kinases and adenylyl cyclases [1,2]. Predicted to be a ligand binding domain [1]. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.08 0.71 -5.08 191 1214 2009-01-15 18:05:59 2003-04-07 12:59:11 8 223 668 14 443 1200 120 181.20 18 21.82 CHANGED sstsl.spscacsasps...lhpphsuhpuhs..at.plstpc..h..statpphppp......thst..apl...........t..................sps.hhslhalpPhs.tNppAlGhDhtopst++tAhpc....Atpoups..slouslp..LlQss......ptGhl....lhhPVa.tt.........................t.....pphhGhlhushchsslhpshhtpp.........tplslplhD.................spssssplh.ass ........................................................................................tphtth...htt...h.tt...hssh..u.ht....h......p..lt...tp....h..tthtt...phttp...........h.......hpl........................................tts.hhslta..l...........Phs....tNppsl..GhDh..ts......p..sp..p..p..ps.ltc....Atpssps..slouP.......lp...L......lQss............t.G.hl..lhhPVa...p.t.....................................tp.................pphhG..........h.lsushchsslhpshhh.tt............plslplhD..................ts.tt...hh.............................................................................. 0 131 283 371 +1523 PF03173 CHB_HEX Putative carbohydrate binding domain Bateman A anon Pfam-B_8666 (release 6.5) Domain This domain represents the N terminal domain in chitobiases and beta-hexosaminidases EC:3.2.1.52. It is composed of a beta sandwich structure that is similar in structure to the cellulose binding domain of cellulase from Cellulomonas fimi [1]. This suggests that this may be a carbohydrate binding domain. 25.00 25.00 26.20 25.30 20.30 19.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.97 0.71 -4.79 6 353 2012-10-01 21:34:18 2003-04-07 12:59:11 8 7 306 4 75 309 10 155.80 34 18.70 CHANGED sNLclphpllcshsuppshsCssht......s.huuCh+lphohsspspsscuc.DasIYFpSl+hshps-u-pFtIsHlsGDLHKLpPTstFuGhsuGcohslplsuphWQlhcoDhhPphal...ousstcPcsltsTs.......TE.........-lstFlsshs...ssQhKpTscDpsshtsuss .........................................................LtlphplhsNpuuptGhsCttht.........A-hAuCsps........phsLs........NpG.pss......uc.DWsIYFp.S.I.Rh.l..........L........p.l.s.s.s.......p..............F...........p.ls+loGDLa+lpPTcpFsGhusG-slplPhluEYWplhpoDhhPphal........sussAcP.clltshs.......T-..........DhstaVpshp.......sp...ph..+R..o.....s...Dps.hhps..t..................................... 0 18 32 55 +1524 PF03174 CHB_HEX_C Chitobiase/beta-hexosaminidase C-terminal domain Bateman A anon Pfam-B_8666 (release 6.5) Domain This short domain represents the C terminal domain in chitobiases and beta-hexosaminidases EC:3.2.1.52. It is composed of a beta sandwich structure [1]. The function of this domain is unknown. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.92 0.72 -4.14 264 1239 2012-10-03 16:25:20 2003-04-07 12:59:11 8 244 639 4 431 1707 370 74.60 23 9.83 CHANGED ssshhssssG.shts...s.pls.....lss.tpsss.....sIaYThD.GopP........ots..S.phY..................s.s..P.lsl..ss.s...........l+shuh..sss.tpsSpltotp ..............................h....h.s.sG...h.t.....stpls.......l..ss..ttsss......pIhY.Th..D.G.o.p.P..............................s.t........S..th..Y.................................s..s..P..l.slspss.s............................l+shuh.....sss.tp.Sphhp..t............................................................................................... 0 199 339 396 +1525 PF01339 CheB_methylest CheB methylesterase Finn RD, Bateman A anon Sarah Teichmann Domain \N 20.40 20.40 20.60 20.60 19.90 18.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.97 0.71 -4.85 180 3655 2009-01-15 18:05:59 2003-04-07 12:59:11 12 120 1959 4 1223 3093 363 180.30 37 42.15 CHANGED llsIG.uSsG.GspALppllsp...LPs.shs..h.sllllQHhsss.assthschLsptsslsVpp.Ap-Gp.hl..ps...GplYlAPsst.clt...........l..........p...ssp..........htlpht.....sthstt....+..Pol.DshFpSlApthusps.lullLoGhGsDGupGlttl+ptGGh.sl..uQct.pouhhhGMPcuAlp.sG.ssDtllshpcluptl.hph ..........lluIGuSTG.GspA..LpplLps.LPs..shP....sl.l.l.....s.Q.H.MPss..FT.p..uhA...c+L.s.ph..s.p.l.s.V+E.A.p-.G-..hl...ts...GpsY..lAP.G.sp.Hhtl.........tp....sup...................................lplptt.......ssssta....+..PSVDlLFcSsA.c..t..h.G.p.ps.lGVlLTGMGsDGAtGhhtl+.p.s.G.u.h..s.l..AQ...cE..so..s..l..VaGMP+t.Alp.h..G.ssspllsLsplupplh..h................................................................................ 0 399 770 1013 +1526 PF04509 CheC CheC-like family Waterfield DI, Finn RD anon COG1406 Domain The restoration of pre-stimulus levels of the chemotactic response regulator, CheY-P, is important for allowing bacteria to respond to new environmental stimuli. The members of this family, CheC, CheX, CheA and FliY are CheY-P phosphatase [1,2]. CheC appears to be primarily involved in restoring normal CheY-P levels, whereas FliY seems to act on CheY-P constitutively. CheD enhances the activity of CheC 5-fold, which is normally relatively low [1,2]. In some cases, the region represented by this entry is present as multiple copies. 20.20 20.20 20.20 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.22 0.72 -4.22 33 1993 2012-10-01 19:50:22 2003-04-07 12:59:11 7 14 662 13 597 2015 46 37.70 36 24.19 CHANGED httuAlpEluNIhsGsssouLuphht.tpl-hosPslth ......phsulpEluNIhhGsAuTuLSphls.pplslosPpl..h.............. 0 261 447 522 +1527 PF03975 CheD CheD chemotactic sensory transduction Bateman A anon COG1871 Family This chemotaxis protein stimulates methylation of MCP proteins [1]. The chemotaxis machinery of Bacillus subtilis is similar to that of the well characterised system of Escherichia coli. However, B. subtilis contains several chemotaxis genes not found in the E. coli genome, such as CheC and CheD, indicating that the B. subtilis chemotactic system is more complex. CheD plays an important role in chemotactic sensory transduction for many organisms. CheD deamidates other B. subtilis chemoreceptors including McpB and McpC. Deamidation by CheD is required for B. subtilis chemoreceptors to effectively transduce signals to the CheA kinase [2]. The structure of a complex between the signal-terminating phosphatase, CheC, and the receptor-modifying deamidase, CheD, reveals how CheC mimics receptor substrates to inhibit CheD and how CheD stimulates CheC phosphatase activity. CheD resembles other cysteine deamidases from bacterial pathogens that inactivate host Rho-GTPases. Phospho-CheY, the intracellular signal and CheC target, stabilises the CheC-CheD complex and reduces availability of CheD [3]. A model is proposed whereby CheC acts as a CheY-P-induced regulator of CheD; CheY-P would cause CheC to sequester CheD from the chemoreceptors, inducing adaptation of the chemotaxis system [4]. 25.00 25.00 34.30 27.90 21.80 21.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.33 0.71 -4.31 150 1074 2009-11-17 14:42:28 2003-04-07 12:59:11 8 8 935 2 412 920 66 114.90 30 61.85 CHANGED MLPsssttptt.........ss.s+YushAh-hLlscll+hG.Ap+ppLpAKlhGGAphhs........shhsIGpRNschs+chLpppsIsllucDlGGstuRplhF.s.poGclhl+pl..........tpt.p.....h ................MLPpsstspst..........sss+YushAhchLlschl.c.h.G..A.p.....+..p..cLpAKlhGGAphhs..................shhslGpRNschscchLpppsIsllAcDlGG.s.tuRplhF.s.poGclhl+pl........t......t........................... 0 151 288 351 +1528 PF01739 CheR CheR methyltransferase, SAM binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_694 (release 4.2) Domain CheR proteins are part of the chemotaxis signaling mechanism in bacteria. CheR methylates the chemotaxis receptor at specific glutamate residues. CheR is an S-adenosylmethionine- dependent methyltransferase - the C-terminal domain (this one) binds SAM. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.07 0.71 -5.02 22 3912 2012-10-10 17:06:42 2003-04-07 12:59:11 13 179 2109 2 1263 3557 543 191.20 32 48.37 CHANGED NhTpFFR-stpFchLpppll.s.l.phptt...pp.l+lWSAuCSoGcEPYSlAhsltc...hh.sshs..thpIhATDIshpsLppA+tGhYspptl.psls.thhp+YF.ctsstsap....lpscl+phVpFchhNLhs.....shhsphDlIFCRNVlIYFcppsppcllp+htstLpssGhLhlGpSEsls.thsshFphhpsshthh .......................................................NhTtFFR-sppFphLpcpll...h..t.ptt..................tp..lRlWSAuCSoGEEPYS.lAhhLt-............hh...p.h....h...s.......phpIhATDIss..ps..L..pc..A..+....pGlY...s.......tpp.......l........c........s......l.s.................p...h........h........p.........+........a...F..........c.......t.....p........s......s...p...ap.................lppp.l.+p..hV..p.F..pphNLlp..............h....s.......h......h....s.........p....a.....D...lI.h.C....R.N.V.h...I..YF.c....p.p...s....p.pc...l.lppFt.t.t.Lp.ss.Gh..La..l...Gp...oEsls...th..sphap.h.......h................................... 0 424 817 1056 +1529 PF03705 CheR_N CheR methyltransferase, all-alpha domain Bateman A, Griffiths-Jones SR anon Pfam-B_694 (release 4.2) Domain CheR proteins are part of the chemotaxis signaling mechanism in bacteria. CheR methylates the chemotaxis receptor at specific glutamate residues. CheR is an S-adenosylmethionine- dependent methyltransferase. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.39 0.72 -4.60 77 3379 2009-01-15 18:05:59 2003-04-07 12:59:11 10 136 2012 2 1043 2654 197 56.40 24 14.23 CHANGED scpchptltphlhppsGlslss.hKpshlppRLt.pRhcthslpshscYhphL...pssppp ..................ptcappltphlhppsGIsLss..tKcshlhpRLs.c+.l..c.t...h..s..l..s..sascYhphL...pps..................... 0 348 678 867 +1530 PF01584 CheW CheW-like domain Bateman A anon Pfam-B_579 (release 4.1) Domain CheW proteins are part of the chemotaxis signaling mechanism in bacteria. CheW interacts with the methyl accepting chemotaxis proteins (MCPs) and relays signals to CheY, which affects flageller rotation. This family includes CheW and other related proteins that are involved in chemotaxis. The CheW-like regulatory domain in CheA [1] binds to CheW, suggesting that these domains can interact with each other. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.71 247 10977 2009-01-15 18:05:59 2003-04-07 12:59:11 14 74 2190 12 3275 8780 671 135.50 21 33.57 CHANGED phlhhpluscp........aulsltpVpEllph..sp.hsplspss.shh.......Gl.lslRG....pllPllcLpphhshsstp................tppshllllc..hs.................sphhGllVDplhshhplshcslpss......hs...thsshlpGsshh.......cu......p...............llhlLDlppllpt.t ............................................hlhhp.lu.scp.........aulslt.pVpE..llph......sp..hs...pls...p...ss...shh..................Gl.l.sl..RG.........pllPll..c.L.p..p.h.h.s...hsssp....................................................tppshll.llp....hs.................pphh.Gll.VD.plhs......h.h.p..l....sh.c.s..lpss.................ht...t..ss.h..ls.Ghshh.............cs.........p.....................lhhllDlppllt...h............................................................................................. 0 1032 2073 2737 +1531 PF01111 CKS Cyclin-dependent kinase regulatory subunit Bateman A anon Sarah Teichmann Domain \N 20.90 20.90 21.90 22.20 18.10 20.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.52 0.72 -4.14 33 488 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 312 20 323 442 1 71.50 58 58.47 CHANGED pIhYS-KY.DDp.aEYRHVlLPK-lsKhlP+s..........+LhoEsE.WRsLG.......lQQS.GWhHYhlHcPEPHILLFRRPhs .........................IaYSs.KY.D-p.aEYRH.....VhLPKcl...sKhlPKs.........................+LhoEpE.WR.s.LG.......lQQ........ShGWhHYh.lH..cP.EPHILLFRRPl............. 0 119 178 263 +1532 PF04344 CheZ Chemotaxis phosphatase, CheZ Mifsud W anon COG3143 Family This family represents the bacterial chemotaxis phosphatase, CheZ. This protein forms a dimer characterised by a long four-helix bundle, composed of two helices from each monomer. CheZ dephosphorylates CheY in a reaction that is essential to maintain a continuous chemotactic response to environmental changes. It is thought that CheZ's conserved residue Gln 147 orientates a water molecule for nucleophilic attack at the CheY active site. 26.70 26.70 26.90 26.70 26.60 26.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.47 0.70 -4.68 60 1206 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1062 4 279 728 51 184.90 44 83.43 CHANGED ssplapclGpLTRpLH-uLpshtlDp+l.......ctAsscIPDA+-RLsYVlchTEpAAs+ThsAVEtuhPls-pLpspspplpspWpchhppplphs-....F+pLscchcpaLpp.sppsosplpspLsEIlMAQDFQDLTGQVI+RVlsLVp-lEppLlplLh.hu.scpp.tht......................pGP.lss-p+sDVVsuQD-VDDLLuSLGF .............................................s..clhtcIGpLTR.L+-SL+-L...u..lDptl.......tpAs-sIPDARDRLtYVlpMTtQAApRsLsuV.......EtupPhp-phcpp...Ap.tLp..tcWsc......hht..s......I...-ls-................h...R...pLspcsc.paLtp.Vstc.ouh...ssupLhEIhM..AQDFQDLTGQVIKRhhcllpElEcpLlhl.....Llpsh...sp...ppt.cstc..ps................................psL.hNG.PQl....s..s.....p..+..s..s....V..V.....u..SQDpVDDLLsSLGF.................................................................................... 0 63 151 211 +1533 PF00187 Chitin_bind_1 chitin_binding; Chitin recognition protein Finn RD anon Prosite Domain \N 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -8.90 0.72 -3.97 18 1066 2010-01-08 13:37:10 2003-04-07 12:59:11 14 39 234 154 390 1128 1 38.80 51 11.02 CHANGED ApQCGpQuGGthCPssLCCSpaGaCGoTs-YCGs..GCQ.SpC .................ppCG.........s....s....t....hC....s...s......s......l...CCSpaGaCGoTs-Y..C......Gs...........GCQ.u.C................... 0 83 225 335 +1534 PF01644 Chitin_synth_1 Chitin_synth; Chitin synthase Bashton M, Bateman A anon Pfam-B_892 (release 4.1) Family This region is found commonly in chitin synthases classes I, II and III. Chitin a linear homopolymer of GlcNAc residues, it is an important component of the cell wall of fungi and is synthesised on the cytoplasmic surface of the cell membrane by membrane bound chitin synthases [2]. 25.00 25.00 25.10 29.30 24.60 24.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.82 0.71 -4.46 14 884 2012-10-03 05:28:31 2003-04-07 12:59:11 12 12 402 0 349 874 2 143.70 53 23.85 CHANGED MYNED-lLFsRThHGVMKNIuHLCoRp+SpsW..GtDuWKKVVVCIVSDGRsKlp.RsLshLAulGVYQ-GlAKs.VssK.VpAHlYEYTTQlSIDsshpacGsc....+GlsPVQllFCLKEcNpKKINSHRWFFpAFu.lLpPNlslLLDVGT+PuspSIYpLWKuF ................................................hYNEsch..hhsRThtulhpNIt.hsp.....h....p......p......SphW...........G...t......s..uWpKIVVClVuDGR..sKls.RThslLAuh..GVYQ.-.........G.lA..Kp.......p.VN..GK.-V.sA.HIY..E..............YTTQ..lu..l.s.....p.tp..h....h.t.hp.............pshsPVQhlFCL.KEKNpKK.INSHRWhFpAFuphL.......pPp.........lClL...............lDsGT+Pu.polYpLWcuF................................. 0 111 202 304 +1535 PF03142 Chitin_synth_2 Chitin synthase Mifsud W anon Pfam-B_1787 (release 6.5) Family Members of this family are fungal chitin synthase EC:2.4.1.16 enzymes. They catalyse chitin synthesis as follows: UDP-N-acetyl-D-glucosamine + {(1,4)-(N-acetyl-beta-D-glucosaminyl)}(N) <=> UDP + {(1,4)-(N-acetyl-beta-D-glucosaminyl)}(N+1). 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 527 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.68 0.70 -6.12 10 1072 2012-10-03 05:28:31 2003-04-07 12:59:11 10 35 361 0 640 3978 47 331.20 28 34.17 CHANGED ssshlacslssQPhs-apsaGasLtHTIClVTCYSEuEEGLRsTLDSlAsTDYPsSHKLLlVICDGlIpGuGND+oTP-IVLsMMcDhlsPs--lEPhSYVAVAsGsKRHNMAKVYAGaY-Yss.......pls.ppQQRVPhlVVVKCGTPsEsottKPGNRGKRDSQlILMsFLpKVhFDERMTsLEaEha+sIWplTGlsP-FYEhVLMVDADTKVaPDSLT+MVAsMl+DPpIMGLCGETKIANKcsSWVThIQVFEYYISHHhoKAFESlFGGVTCLPGCFoMYRIKAsKGspshWVPILssPDIVE+YSENVVDTLH+KNLLLLGEDRYLTTLMLKTFPKRKphFVPpAtCKTlsPDoFpVLLSQRRRWINSTVHNLhELVLl+DLCGTFCFSMQFVVFIELlGTlVLPAAIsFTlYlIlhulls...pPsPhIsLlLLAhILGLPAILIllTsR+WsYluWMllYLLALPlWNFVLPuYAaW+FDDFSWGsTRpVpGEct..KcstscsEGcFDsSpIsMKRWcEaERE ........................................................................................................................................................s...................................................................................................................................................................................................................................................................................................................................hllhlDuDs.h...p....u....lphhlt.....h...........p..s.....p..l......huhCG.....p.........h.....t......l.............t..........s.........t.............t......p.......u....h........h.......s.......h....h...............Q....a.....EYh.lua..hh.t..K.uhEu.hh.G.s.VhCh.P.G.CFshaRhps..................t..............................................................t...t.l.h..........p...p...Y...s..p.................s.....-......s.....h..a....h............p.h.........h.........................h..G.....E...DRa..L....o.o....L....hl..p....t..........t....hp....hp.......a....sspAhshThsPc....sap..............hhsQRRRW....h.s........S...T....l....p..N.....h.....h......-........L.......h....h........h........t...p........h.......p.....h.......h........h....p....h.s..h..............h...l...l........h..h.....p....l...h....u.....s....h...lh.......P...s......s...h..h......h.......h...h.....h......h........................h..........h........h.ht.............t.............h..............h.......h........s...h..........h.........h....l...s..h..h.......h..s.....l......h....h.......h..h....h....h.......h...................t...p.h.....h...h...h.....h......h..h......h....h.h................................................................................................................................................................................................................................................................................................... 0 272 404 566 +1536 PF03503 Chlam_OMP3 Chlamydia cysteine-rich outer membrane protein 3 PRINTS, Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 62.80 62.80 17.80 16.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.44 0.72 -3.75 3 38 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 37 0 4 13 0 53.30 81 60.81 CHANGED CGVVSLSSCCRIVDCCFEDPCAPpsCNPCEs.+KKD+ssGCNuCGoYVPSCSKPCG .CuVVSLSSCCRIVDCCFEDPCA.PhpCsPCEu.+KKDVssGCNSCsuYVPuC.KPCG 0 1 1 3 +1537 PF03504 Chlam_OMP6 Chlamydia cysteine-rich outer membrane protein 6 PRINTS, Griffiths-Jones SR anon PRINTS Family \N 27.20 27.20 27.50 31.00 26.80 27.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.32 0.72 -3.73 2 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 44 0 5 50 0 91.30 82 20.13 CHANGED CaG+MYsV+VNDDpNVEIoQAVPEYATVGSPYPIEIhAsGK+DCVsVlITQQLPCEsEFVpSDPATTPTuDuKLlWpIDpLGQGEKsKITVWVKP .........CaG+MYoV+VNDDpNVEIoQuVPEYATVGSPYPIEIhAsGK+DCVsVlITQQLPCEAEFVpSDPATTPTuDGKLlWKIDRLGQGEKsKITVWVKP. 0 2 2 4 +1538 PF01308 Chlam_OMP Chlamydia_OMP; Chlamydia major outer membrane protein Finn RD, Bateman A anon Pfam-B_1429 (release 3.0) Family The major outer membrane protein of Chlamydia contains four symmetrically spaced variable domains (VDs I to IV). This protein is believed to be an integral part to the pathogenesis, possibly adhesion. Along with the lipopolysaccharide, the major out membrane protein (MOMP) makes up the surface of the elementary body cell. The MOMP is the protein used to determine the different serotypes. 19.60 19.60 19.70 20.10 18.50 19.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.33 0.70 -5.86 4 1235 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 64 0 7 951 0 269.70 66 99.06 CHANGED MKKLLKSALL.AA.huSsuSLQALPVGNPAEPSLLIDGTlWEGhuGDPCDPCuTWCDAISlRsGaYGDYVFDRVLKsDVsKpFp.MGAsPTusssA....s.oTss-RPNPAYGKHhpDAEWFTNAualALNIWDRFDVFCTLGAosGYhKGNSuuFNLVGLhGlKGoSl...sAsplPNsulopGVVELYTDTTFSWSVGARGALWECGCATLGAEFQYAQSKPKVEELNVlsNsAQFoVpKP+GYhGssFPLPloAGT-sATu..TKsATIsYHEWQVGhALSYRLNMLVPYIGVpWSRATFDADsIRIAQPKLAsAlLNLTTWNPTLLGpuTslsoo.NcFADhhQIVSlQINKhKSRKACGVuVGATLlDADKWulTuEsRLINERAAHlsAQFRF ....................................................................................RhGaYGDaVFDRVLcs.D.VsppFp.MGtt.P..o..s.s..s..sus...s.oo..h..ssR.NPAYG+HMQDAEMFTNAAhMALNIWDRFDVFCTLGAooGYLK.GN.SASFNLVGLFG..sst..p..s.s.....tss....plPNhulsQ...uVVELYTDToFuWSVGARAALWECGCATLGApFQYAQSKPKVEELNVLsNsApFTIpKPKGYVG..t..p..FPLsloAGT-s..ATs.....TKsAoIcYHEWQsuLALSYRLNMhsPYIGVpWSRASFDADTIRIAQPK.Ap..s............lhshTThNPTlhGt..us.s..h..s......s.....s.....s.hsD........................................................................... 0 4 4 6 +1539 PF00504 Chloroa_b-bind chloroa_b-bind; Chlorophyll A-B binding protein Finn RD, Bateman A anon Pfam-B_54 (release 1.0) & Pfam-B_5772 (Release 7.5) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.63 0.71 -3.97 157 2672 2009-01-15 18:05:59 2003-04-07 12:59:11 16 22 422 33 911 2617 772 131.40 25 66.18 CHANGED hPstL.sG....ph...sG-hG........FDPLGLu...t.-s.......................................................thchhRpuElhHGRhAMLuslGhlssEhh.t.........sh.....t.Ps....ahp.ss..............................................h.s......h...hh...lhhhuhhEhhp.tshhs..t.t.........................hhPG..s...............h.FDPl....G.hs.........ts.pphtph+hpElpNGRLAMlAhhGhhsQth ......................................................................................t.......aDsh.t.h.....t...................................................................thth...ptsElhpuR...hAMLGh.h.GhlhsEhhst................s.........t.............hht...s.................................................h...hh......hh.hhuh.h..E..h..h..t..t.h...................................................hPG..s..........................hssh........s.hs..........p.....t....tht.hphtElpNGRLA.Mhuhhuhhhp..h.................................................... 0 418 719 840 +1540 PF02962 CHMI 5-carboxymethyl-2-hydroxymuconate isomerase Griffiths-Jones SR anon Structural domain Domain \N 21.30 21.30 21.40 21.40 20.70 21.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.57 0.71 -4.00 3 831 2012-10-01 20:38:22 2003-04-07 12:59:11 10 4 769 15 152 460 70 119.90 39 95.22 CHANGED PHFhlECTDNIREpuDLP-LFuKVNssLAAo.GIFPlGGIRSRAHWLDTWQMADGQH.DYAFVHMTLKIGAGRSLESRQ-sGDMLFsLIKsHFAALMESRhLALSFEI-ELHPTLNaKQNNVHALF .........................................PHhlhEhosNlc-ps.cls.s.Lh.pplspsLh......s......o.....G........lFP.l.u.G..IRSRAhh.h-..sap..h...A..DG.........pp....-hAFlHhoL+IGuGRShEs+pplu-t.LFsllps.+.h.As.l.htp+.h.lALShEltEhc...s.h.sa..Kp.NslH...................................... 0 18 59 108 +1541 PF04428 Choline_kin_N Choline kinase N terminus Kerrison ND anon DOMO:DM04048; Family Found N terminal to choline/ethanolamine kinase regions (Pfam:PF01633) in some plant and fungal choline kinase enzymes (EC:2.7.1.32). This region is only found in some members of the choline kinase family, and is therefore unlikely to contribute to catalysis. 20.40 20.40 20.40 21.00 20.10 20.30 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.53 0.72 -4.50 21 146 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 123 0 97 147 1 62.40 31 9.87 CHANGED sshtsut.h..s...sp..LD.shs..h............................h+p-lhpllpoLplssWtcls.-tss ................t....ss.ph.lPs..scshLDNShshsY.................................FKp-IlcLh+oLclpuW++ls.p...t.... 0 16 47 78 +1542 PF01633 Choline_kinase Choline/ethanolamine kinase Bateman A anon Pfam-B_1165 (release 4.1) Family Choline kinase catalyses the committed step in the synthesis of phosphatidylcholine by the CDP-choline pathway [1]. This alignment covers the protein kinase portion of the protein. The divergence of this family makes it very difficult to create a model that specifically predicts choline/ethanolamine kinases only. However if [add Pfam ID here for Choline_kinase_C] is also present then it is definitely a member of this family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.44 0.70 -4.76 38 1798 2012-10-02 22:05:25 2003-04-07 12:59:11 15 27 951 30 795 4525 1654 199.50 21 51.55 CHANGED pspplLlRlYGps..s.................chhhsRppEhhshphlucpulGPcLhGhFssGRlE..cFl.sucsLssp-lpssclpttIApphtchHslsh.........................................................hpcpstlapphcca.......................h..pphspppthpthshppLpcElstlcchl.p...............thcps...lVFCHNDLt.uNllh......................................................................psspplhlIDaEYuuaN.tuaDlANHFsEhshshp..t...ahh..chshaPspcp .............................................................................................................................................................................................t..pthllR.h..h.Gtt....s......................................phh...l.sR..p.p.....Ehh.sh...thl.t.p........hs...h...s......s.........p..h...h..........h..........h....................p........u...t......hp...........ca.....l...p............u............p...s....L..s..s.......p....p...l.......p....p.............t......h....h........p.....t.....l...A....p.t....h......tph.H....s....h.th..........................................................................................................h.p.t...t......t.h.a..p..p......h...p..c..a...............................................................t..t......t.....t...p...t...t..h..........h......h.......p......h....p...p.....l....p.....p...p.l......t.....p...l...c..p....hl...p......................................p..h.t..t.s............hs...hC...H..N..D..........lh....t.Nllh...................................................................................................................................p..p..p.....s.....p....l....h......l...I...D.a...EY..uu..hN.......saD..l..u...s..a........h...h...Eh.t...hs...t..............h................................................................................................................................. 1 255 439 638 +1543 PF04345 Chor_lyase Chorismate lyase Mifsud W anon COG3161 Family Chorismate lyase catalyses the first step in ubiquinone synthesis, i.e. the removal of pyruvate from chorismate, to yield 4-hydroxybenzoate. 21.00 21.00 21.30 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.93 0.71 -4.82 3 1188 2012-10-01 19:33:20 2003-04-07 12:59:11 8 2 1158 11 198 690 245 159.80 37 92.75 CHANGED hLsNATWQpuDDlp.luPulpsWLh-pGSLTRRLssts-.clsV-lLuEuWh...TLpsDEsQtLsscpusssWlREVIL+GcDpPWVFARTLIPRSSLcsQsFDLsQLGsRuLGEhLFSsSshcRssLEVs+scss.......htuLaARRSRhShGAcsMLVAELFL.TPpIao+ssl .............................................hhh.......................h........lt.s.t...l..h.-WLh..psShT+Rhcpp.s.p...pl.oVp.......hlpEsas...............p......sp..s.Ep.h.L...hs.p.....ps..............h..W.l.REllLpu..D.u..p.P..Wl..huRTll.P.hooL...............pG.s.t...h.s.L.p...cLGppPLGc.h.LF......os.s..s..ls...R...Dhl..-..l...u+ss...............................tL.W.u.RRS+h.ch.s....G....c....P...LL.lTElFL.tssla.................................................. 0 34 86 143 +1544 PF01723 Chorion_1 Chorion; Chorion protein Bashton M, Bateman A anon Pfam-B_1914 (release 4.1) Family This family consists of the chorion superfamily proteins classes A, B, CA, CB and high-cysteine HCB from silk, gypsy and polyphemus moths. The chorion proteins make up the moths egg shell a complex extracellular structure [2]. 20.90 20.90 20.90 21.80 20.80 20.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.54 0.71 -4.39 14 91 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 6 0 62 96 0 136.40 30 89.51 CHANGED huphlLhlsspALhsQoshSQChuR....luu.t.....Gh.GhsuLG.YpGhGh.sh....GhG.Ghts.h.usus...LsAshGGsLsVso.ousAPTGLul..sSENsY....EGsVuVsGpLPFLGTssVAGphPTuGhGulpYu...CGDGAVuIouEsuh......uhs.s.shs.s.h.us.s.uhu......hGYtshs.GCGC.........Gs .................................................................s.......h.h.shhhp.hhuph.........h..........................s..s...s.......hs.......Ghuh............uhu...................................htus.G..s........l.s..sshuPsGlsh..tups.th.................-GsluVsGpLPhhGossltGphPs.GhGsVpau...CusGsVuIsscss...............shs..h.......................................................h............................. 1 39 62 62 +1545 PF03964 Chorion_2 Chorion family 2 Finn RD anon DOMO_DM03001 Family The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary[1]. 21.90 21.90 26.90 26.90 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.80 0.72 -3.46 14 51 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 21 0 18 55 1 106.00 35 72.01 CHANGED GYGusshG...s.u...psh.p................Ytt...........................s+G.ttuYGpt........................t.hhu.sssspAtutAt.u.AussuGs.p.hthPs.cltsu.hGPph.Ga.........G......hGtst .................GYGusshG....GYuh.spsshp.hsh................uYGs...........................upG...suYGps..........................t.htu.sssupAhAhAphAhAAsssGs.h.at.Pshclpsu..hGPp.ua........h.G...h....t.... 0 4 4 13 +1546 PF00425 Chorismate_bind chorismate_bind; chorismate binding enzyme Finn RD, Kerrison ND anon Pfam-B_164 (release 1.0) Family This family includes the catalytic regions of the chorismate binding enzymes anthranilate synthase, isochorismate synthase, aminodeoxychorismate synthase and para-aminobenzoate synthase. 20.50 20.50 20.50 20.60 19.70 19.60 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.53 0.70 -5.06 163 11419 2009-01-15 18:05:59 2003-04-07 12:59:11 13 52 4438 63 2457 8400 4662 249.70 32 53.38 CHANGED spppatptlppshpt.I..psG-...hhpls...Lopphphp..t..ths......s.........hslhppLpp......p.ssssashhh.............ph...................................h..luuSPEhhl.................p..hc................plpopPluGT...t.....R....uts.t.........................p......DtthtppLhpspK-psEphhlVDhlRscLuplt....s....so...........lplsphhplpp....hsslhHLsoplpu.pL.pts.hsh..h-llpulaPsuulsGsPKtpAhph....IpclEs.h.......sRGh.YsGslGals..su.....s......s-hslsIRoshh....ps...........hplhuGuGIVtcSssppEapEsttK .......................................................................s.pppatptlpps.p.ch.I..p..p..G.-........hapVl.lop..phphp....h........hs....................s...........................................hsh.a.pp.....Lpp......t..Nsu.sYhaah............phss...............................................htl..lGuSPEhhl................................c....hc...st...............plpopPlAGT...t...sR...........u.t.s..s..p.............................p.....Dppht.pcL..hsstK...............-puEphMlVDLhRNDl.u+ls.......suo................lcV..s..chhplcp.....a.sp....VhH.......LsS.plpG....p...............L......pt....s....hss..................hc.hl.p.u.ha.PsG...oloGAPK.hcA.h.......pl.I.p.c.l.Es...t............pRGh..YuGulGalsh.....sG.......................s..............h-hslsIRohhl...cs....................................spshltAG.u.GIVhD.SsPpsEapEottK............................. 0 741 1558 2097 +1547 PF01817 CM_2 Chorismate_mut; Chorismate mutase type II Bateman A, Griffiths-Jones SR anon Bateman A Domain Chorismate mutase EC:5.4.99.5 catalyses the conversion of chorismate to prephenate in the pathway of tyrosine and phenylalanine biosynthesis. This enzyme is negatively regulated by tyrosine, tryptophan and phenylalanine [2,3]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.53 0.72 -3.89 101 5974 2009-01-15 18:05:59 2003-04-07 12:59:11 16 36 4103 55 1304 3816 1745 78.70 27 28.68 CHANGED RppIDplDcpllpLLscRhplstpluchKppp.....shs.lhcs.pREppllcplhptsc....tshs..sphlpplappIhctuhthppsh ........RppIDplDppllpLLscRhclspcVuchKtpp............shs..lh.cs...pREpplL..pcltptuc........ttsls...schlppl...applhctShthpp..h............................. 0 373 798 1089 +1548 PF01264 Chorismate_synt Chorismate_synth; Chorismate synthase Finn RD, Bateman A anon Prosite Family \N 21.00 21.00 24.20 24.00 20.70 20.40 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.01 0.70 -5.96 158 4715 2009-01-15 18:05:59 2003-04-07 12:59:11 16 5 4374 29 1174 3448 3602 336.60 44 93.20 CHANGED hRhoTaGESHG.ulGsllDGhPAGltlspccIpt-LsRR+..PGt...u+.hso.RpEsDpVcILSGVh.cG......hTsGoPIullIpN....pDh+stcY..........................schtst.RPGHADhshthKY.G..h+DhRGG.GRuSARETAuRVAAGAlA+plLp...phG.....IclhualhplGsh........................phshp.h.......cp......ss..lhssDspstpchtphIcp.s+cpGDSlGGll-llspG......lPsGLGpPl.aD.KLDAcLApAlMSIsAlKGVEIG..sGFpsuphhGSpspDph......hhpst.............hh.pp....oNcuGGl.GGISsGpsIllRsAhKPssolhp.stpTVs.pstc..psplphtGRHDsCllPRAssVsEAMlAllLADthLc ..................................hRhoThGESHG.uLssll-GsPsGltl.stt-..Ipp-.LcRR+sG..t......uR...h.so.RtEs..DpVcILSGVh..cG..........h.T....T.GoPIulllpN...pDp+s...t-Y..................................................................................sp.h..h.st..R.PGHADhshthKY.G..h.R..DhRGu..GRSSARETAhRVAAGAlAKphLt...p......h.G.............lcltualsplGsl.....php........................................................hhs.hp.pl......cp......ss.hhss...D.....ssttpphcphlcp.h+.+.pGDSlGuhlpllspG......lPsG.LGpsV..aD..+LDAclApAlMSINAlKGVElG..sGFpss.phpGSpspDEh.................hh.s.st...............................htoN+uGGl.GGlS............sGp......sIls+hAhKPhsolhp..P.hpols.pssE......pspl..hpGRaDPCls.RAsslsEAMlAlVLhDtlLp...................................... 0 394 771 1014 +1549 PF02417 Chromate_transp Chromate transporter Bateman A anon Pfam-B_1872 (release 5.4) Family Members of this family probably act as chromate transporters [1,2]. Members of this family are found in both bacteria and archaebacteria. The proteins are composed of one or two copies of this region. The alignment contains two conserved motifs, FGG and PGP. 28.60 28.60 28.70 28.60 28.50 28.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.96 0.71 -4.66 103 4595 2009-09-10 17:38:28 2003-04-07 12:59:11 10 9 1631 0 1528 4268 786 171.40 25 85.64 CHANGED shhpLFh..hFh.plGhhuFGGuhuhlshhccphV.cppcWlopcpFhchlAluQhhPGPhslphushlGa+...h......sGhhG........ulsAsluh...hlPuhllllhluhhhtp..hpp..tshhpshhpGlpssslullhssu.....hph.sps.............hhtsh..hshhlh.shs..hlhlhhhp....h.....sslh.lllhushhGhh ...................h.hhplFh.hFh.+lGhhsFGGuhuhlshhp..p..phV....p..............p............h...p..........W....lo.......p...ppFh.chlAluQhhPGPh..sh...phu...sa....lGat.......h................sGhh.G.........................................................ullAsluh...........hLPuhllllhl.uhhhpp.........hpp......ps..h.....l.p.uhhpGlpsu.llullhsus......................hp.h.spp................................shtsh....hsh.s....ls....shs.....h..l.h...l......h.h..h.p..........l..........s.hh...lllhuulhGh............................................................................. 0 510 999 1289 +1550 PF00385 Chromo chromo; Chromo (CHRromatin Organisation MOdifier) domain Finn RD anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.93 0.72 -4.29 173 5794 2012-10-02 16:56:36 2003-04-07 12:59:11 19 330 405 83 3614 5547 121 53.30 24 6.52 CHANGED htl-cIl..cp+..hpcptt..................paLV+Wpu.hs....pcs.TWEs..tc.slppp.....p...lptatpp ..........................................l-cll...spc........htctpt.......................................pYhVKWcu..as.............pcs..TWEs...cc..plptt.......ph...lpta.......................... 1 1222 1720 2571 +1551 PF01393 Chromo_shadow Chromo shadow domain Bateman A anon Medline:95396576 Domain This domain is distantly related to Pfam:PF00385. This domain is always found in association with a chromo domain. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.74 0.72 -4.13 8 489 2012-10-02 16:56:36 2003-04-07 12:59:11 14 9 177 24 286 474 0 56.80 49 28.14 CHANGED RGLEPE+IIGATDSs.....G-LMFLMKWKsoDEADLV.AKEANhKCPQlVIuFYEERLTWHS .......................RGL-PE+IlG.A.T.Do.s..............G-..L..h..FLhKW.KssD....c...AD..LVsA+pANhKCPQl.VIpFYEE.RLoW+........ 0 69 99 194 +1552 PF00878 CIMR CIMR_repeat; Cation-independent mannose-6-phosphate receptor repeat Bateman A anon Pfam-B_764 (release 3.0) Family The cation-independent mannose-6-phosphate receptor contains 15 copies of a repeat. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.17 0.71 -4.43 12 1464 2012-10-02 14:19:21 2003-04-07 12:59:11 13 33 108 57 767 1357 0 136.00 23 77.56 CHANGED shpcsosIpFhCNcssh.s...sPVFhtEs......sCoYaFEWcTthACs...hpslpCplpDstu.pcYDLSuLo+tsc...sWcAVDsots.t.++pFaINVC+....hstspuCP.ssuusChVsc.spShNLGhhtpuPpht.sGslpLpYssGDhCs ....................................th...hsohIpFhCsps..s....u.............pP.h.h.h...p..ps..........sCpahFpWpTshA.C...............s...................p..........sh..............s................Cp...l.........p.......c....s.p.......t....p.p.aDLo...s.Ls...ptpt.............s.a.s..h.....ss....t......tt........................t...ahlNl.Ct...............................s......t.....s.....t.....s....C.....s....ts...u.....usC........h......p..........t.......s..........p..........s..........h..........s......l......G.h.....h...p.....p....t...............ph.....ss............l....l.Y..sGp.C........................................................................................ 0 238 267 463 +1553 PF02464 CinA Competence-damaged protein Mian N, Bateman A anon Pfam-B_2197 (release 5.4) Family CinA is the first gene in the competence-inducible (cin) operon, and is thought to be specifically required at some stage in the process of transformation [1]. This Pfam family consists of putative competence-damaged proteins from the cin operon. 20.90 20.90 20.90 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.55 0.71 -4.77 102 4443 2009-09-13 17:09:44 2003-04-07 12:59:11 12 14 3786 2 995 3093 2194 153.50 36 58.87 CHANGED tpsLsptlsphL.tppsholusAESCTGGhluutLTslsGuSshFpGGhVsYoscsKpplLGVspphLppaGAVScpsAptMApGs.pptht.....uDhulu.lTGlAGPs.GuotpKPVGpValuluttst.......stsp.chpFsG.sRpp..lRppusptALphLhchL ................h.pLsptlsphL..ppcshslusAESCTGGhlAusloslsGu.SshFcsGhVT.Y.Ss.cs.KtphLuVs...t...p...sLp...p....a...G..A..VSc.tsAtpMAp.GA..h.pt.s.p....................uD..hulu.lTGl..AG..P...s......G.............u............s....c...t+.......PV......G.T.Vah.uhu.....tpss.................shs....p..p.h..........p..a.......s..........G.....s......Rpp..lRppusthALphLhp................................................... 1 323 652 857 +1554 PF04162 Gyro_capsid Circo_coat; Gyrovirus capsid protein (VP1) Bateman A anon Pfam-B_1772 (release 7.3) Family Gyroviruses are small circular single stranded viruses. This family includes the VP1 protein from the chicken anaemia virus which is the viral capsid protein. 25.00 25.00 95.40 95.30 21.30 17.40 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.74 0.70 -6.24 22 315 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 11 0 0 241 0 348.00 94 99.88 CHANGED MARRARRPRGRFYAFRRGRWHHLKRLRRRYKFRHRRRQRYRRRAFRKAFHNPRPGTYSVRLPNPQSTMTIRFQGVIFLTEGLILPKNSTAGGYADHMYGARVAKISVNLKEFLLASMNLTYVSKlGGPIAGELIADGSKSQAAENWPNCWLPLDNNVPSATPSAWWRWALMMMQPTDSCRFFNHPKQMTLQDMGRMFGGWHLFRHIETRFQLLATKNEGSFSPVASLLSQGEYLTRRDDVKYSSDHQNRWRKGpQPMTGGIAYATGKMRPDEQQYPAMPPDPPIITuTTAQGTQVRCMNSTQAWWSWDTYMSFATLTALGAQWSFPPGQRSVSRRSFNHHKARGAGDPKGQRWHTLVPLGTETITDSYMuAPASELDTNFFTLYVAQGTNKSQQYKFGTATYALKEPVMKSDAWAVVRVQSVWQLGNRQRPYPWDVNWANSTMYWGoQP .MAR.RARRPRGRFYAFRRGRWHHLKRLRRR.Y....KFRHRRRQRYRR.....RAFRKAFHNPRPGTYSVRLPNPQSTMTIRFQGlIFLTEGLILPKNSTAGGYADHhYGARVAKISVNLKEFLLASMNLTYVSKIGGPIAGELIADGSp...SQAAp....NWPNCWLPLDNNVPSATPSAWWRWALMMMQPTDSCRFFNHPKQMTLQDMGRMFGGWHLFRHIETRFQLLATKNEGSFSPV..ASLLSQGEYLTRRDDV.KYSSDHQNRWRKGcQPMTGGIAYATGKMRPDEQQYPAMPPDPPIITsTTsQGTQV.....R.........CMNSTQAWWSWDTYMSFATLTALGAQWSFPPGQRSVSRRSFNHHKARGAGDPKGQRWHTLVPLGTETITDSYMsAPASElDTNFFTLYVAQGTNKSQQYKFGTATYALKEPVMKSDuWAVVRVQSVWQLGNRQRP................................ 0 0 0 0 +1555 PF02443 Circo_capsid Circo_ORF2; Circovirus capsid protein Bateman A anon Pfam-B_1890 (release 5.4) Family Circoviruses are small circular single stranded viruses. This family is the capsid protein from viruses such as porcine circovirus [1] and beak and feather disease virus Swiss:Q9YUC8. These proteins are about 220 amino acids long. 25.00 25.00 25.00 30.20 24.40 24.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -4.99 12 1556 2012-10-04 01:49:40 2003-04-07 12:59:11 10 1 51 1 0 1258 0 196.30 62 95.87 CHANGED phphRRRhtRs......hhRRRahRt......RhR.RRh.hpRRtapsNRlYshRlsRpaphplp.ptoss.ushpWssDhlshsLsDFL.ss........................sssshpLPFEcYRI+LAKhEh+Pths.hs.p.cGhGpTslIpDu+lpcFpppsshs.DPLAsaDGA+pW..p+GF.KRLh+PKPQl..oIsDhssu.NpoAALWLsss+shWIPl.....p.hs.suts+VcHYGlAaSa.pP.tss.....hsYpsplTlYVpFRQFsh ..............................R.R.RhtR...........hhRRR.hhh......RY..R.hR..R+.................N.tIashRLp.RTFGYTlK...pTTVpTPSWAVDMMRFNIsDFLPPG.......................GGoNPhSlPFEYYRIRKVKVEFWPCSPITQ.....GDRGVGS.oAV..ILDDNFV..TKATAL....TYDPYVNYSSRHTIsQPF.S.YHSRYFTPKPV.LDpTIDYFQPN.NKRNQL.WLRLQTs........................sNVDHVG.LGT....AFENS..hc......pYshclThYVQFRpFs............................................... 0 0 0 0 +1558 PF04487 CITED CITED Finn RD anon Pfam-B_3987 (release 7.5) Family CITED, CBP/p300-interacting transactivator with ED-rich tail, are characterised by a conserved 32-amino acid sequence at the C-terminus. CITED proteins do not bind DNA directly and are thought to function as transcriptional co-activators [1]. 21.10 21.10 24.90 21.80 18.30 18.30 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.69 0.70 -4.11 11 168 2009-09-11 10:42:53 2003-04-07 12:59:11 7 2 46 2 72 157 0 163.30 35 97.93 CHANGED MA-H.MhAhsauhhssshtt......hphsMs.....u.stps.+s......hps....uslhHYuuu...uhcuuhtsRsu...........sG.shuhupPsus..s.hhas........................ss.p.p.Fh.....ssuspLhASMpLQKLNopY.G+............tsts......sGtPhssus.Q...aR........sGsu.ssuhpp..ss..hs.ulhDsDhIDEEVLhSLVlELGLDRlpELPELaLGQNEFDFhoDFsst.tsupVSC ........................................Msph.h.shsht.hsts.tt......hp.t.Ms...th.s.sp.ht.pt........hps....sthhHYsus...shpu.s.t.p.ts...................u.s.sh.s....P.t.t.............................................s......a......t.s.pL.ASMpLQKLNspY.sh................................................hts.....sup.htsss.p.................t................................................u.......s.sssh.t...................shhDsD.lDEEVLhSLVlEhGLDR....hpELPELaLGQNEF.DFhsDhss............................ 0 6 14 28 +1559 PF04223 CitF Citrate lyase, alpha subunit (CitF) Mifsud W anon COG3051 Family In citrate-utilising prokaryotes, citrate lyase EC:4.1.3.6 cleaves intracellular citrate into acetate and oxaloacetate, and is organised as a functional complex consisting of alpha, beta, and gamma subunits. The gamma subunit serves as an acyl carrier protein (ACP), and has a 2'-(5''-phosphoribosyl)-3'-dephospho-CoA prosthetic group. The citrate lyase is active only if this prosthetic group is acetylated; this acetylation is catalysed by an acetate:SH-citrate lyase ligase. The alpha subunit substitutes citryl for the acetyl group to form citryl-S-ACP. The beta subunit completes the reaction by cleaving the citryl to yield oxaloacetate and (regenerated) acetyl-S-ACP. This family represents the alpha subunit EC:2.8.3.10. 19.50 19.50 19.50 19.60 19.40 19.40 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.52 0.70 -5.82 5 1131 2012-10-04 00:26:15 2003-04-07 12:59:11 7 4 925 4 116 1121 28 448.90 64 91.35 CHANGED DRKLsuSLEEAIc+oGLKDGMTISFHHAFRGGDalVNMVM-lIA-MGFKNLTLASSSLossH.sPLVEHIKNGVVT+IYSSGLRGsLA-sIS+GLLcEPVpIHSHGGRVHLIpSGEL+IDVAFLGVPsCDEFGNANGacGKAsCGSLGYAhVDA-YADpVVhLTEsLVsYPNsPASIsQDQVDhVVpVDAVGDPsKIGuGATRMTTNPRELLIA+pAA-VIsNSGYFK-GFShQTGTGGASLAVTRFL+EKMl+cNI+AoFALGGITAoMVDLHEcGLI-KLLDVQsFDSsAApSLARNPNHlEISANpYANPuSKGAuVDRLDVVILSALEIDTpFNVNVlTGSDGVIRGASGGHCDTAuAApLSIIVAPLVRGRIPTVV-cVsTVITPGoSVDVLVTDpGIAVNPsRPDLl-+Lpcss.lPVaoIEpLpERActlTGKPcPIEFTDKVVAlV+YRDGSlIDVV+QV ............................................................................................................................................................................................+KlsuSLEEAI++SGLcDGMTlSFHHAFRuGDhslNhVM-.hIA.cMGaKNLTLASSSLsssH.sP.LlEHI+sGVVopIYTSGLR.G.LuEpIS..c..G.LLt.pPVpIHSHGGRV+LlpSGELpIDVAFLGVPoCDEFGN..AN..G......h....s....G....K....u....sCG.SLGYAhVDAc.AcpVVhLTEpLlPYPp.sP.ASIsQDQVDhlVpVDc.VGD..ssKIuuGATRhToNPRELLIA+pAA-VIspSGYFK-GFShQTGTGGASLAVTRFLc-KM...cp+sIpAsFALGGITuoMVDLHEcGLI.......+.........KL...L.D..V......Qs.........FDp.sAAp..SLAcN.P.s...H...l...E...I..SAN.....p.Y........A........N.........uSKGAuV-..+LD.V.V.lL.S.A.L.EIDT..pF..NV.N..V..l.TGSDGV.lR.GASGGHsDTAhu.ut.....L.....S.......I....I.l..A..P..L..V..R..G.R.IPTlV-...pV...h...T.slT.PGuSVDlLVTD+GI.....A....VNPs...R...s...-LhEpLp.p.AG..l.lho..IE.L+-RAphLT.G.cPp..P..IEF.T.D...R...VVAVV+YRDGSVIDVV+QV.............................................................. 2 32 62 87 +1560 PF01874 CitG ATP:dephospho-CoA triphosphoribosyl transferase Bott M, Enright A, Ouzounis C, Bateman A anon Enright A Family The citG gene is found in a gene cluster with citrate lyase subunits [1]. The function of the CitG protein was elucidated as ATP:dephospho-CoA triphosphoribosyl transferase [2-3]. 20.80 20.80 22.20 21.10 20.40 20.70 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.47 0.70 -5.20 135 1741 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 1304 3 316 1196 72 248.10 35 85.49 CHANGED pl....uphAhpAhhhElsh.PKPGLVchtssGuH.DMshtpFhp.Suhultshhtphspsuht...t..........ltplGhpAEpsMhtsTsGlNTH+GulahlGlLssAsut................................h.tttsstsstthhtslsthssshh.....t..............t........tpshohGpththpauhsGsptEstsGFstlhphulPthppsh..t................hs.ppAhlpshltlhuph.sDTsll+RuGhpsl.phlpppAppl....Lstu.............t.tt..hpplpph-pthh..pcslSPGGoADLLusolFlst ..............................................................................h....huhpAhlhElploPKPGLVDthssGAH.p.DMslt.sFhp.Suhult.sah.phhphGhptsth.........................hhttlR.lG..htsEtsMhpATsGlN..T...H+GuIFulGLLsuAhut..............................................hht..t.p.shs...pplspth.sp.hs.pslst................................h.tp.....ttphTtGpphhpphGls.....Gs....RGEApuGashVhptALPhhpphh..pp...............................ths.chAllpsLLh.LMuh..sDTNlhpR.u.G...t..uL..pa.lQp.pAppl....LtpG.................Gh.hs.ss.....hptLpphDcthl..c+s..lSPGGoADLLhhThFls.p............................................ 0 62 165 244 +1561 PF03600 CitMHS Citrate transporter TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Family \N 29.60 29.60 29.60 29.60 29.50 29.50 hmmbuild -o /dev/null --hand HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.34 0.70 -5.87 32 7280 2012-10-02 15:12:49 2003-04-07 12:59:11 11 35 3249 0 1836 11155 2469 359.10 15 84.92 CHANGED Luhhhslshhh..llhhpthuslhuhhllsllhsLh..............................hs.hh.phlshls..slhLLFuhha.hslM.-oGlFDslsphlhchspGc.hclhhsssllsullShsGsusTsshlss.shhl.Lacphsls.hhlssslsluuslhs.hsPhGsPss........ps.hsslsh...sshhsshlPuhlsulhhhh...hhhhhhth+cppthtthph.htt..............................................................................................................................................................................tpshtptpsptttht+shlhhhshhLshslhshl.............................lhuh............h.s.lhhhhuhslsllhsa.slcpphp...th.ttstsshsslhhhhAhhlFsullstsuhhctluptlhsl............lstslu.hlsllsuls.ShPhshhhu.ps....hhslhhpsusthGls.h.hshsuslGs .......................................................................................................................h..h.h.h.......h..h.......................h.....s....h....h...h...h..h....h..h..h..s..h.hh.shh..........................................................h...h..t..p....h...h....s...t...h....s...............s.............l..........h...h..l........h...u..h..h....h...st.h...h.p.p...o.Gh...hp...hls....p...h...l...l....p....h..s.....t......s......p......................p.........l...h..h...h...l......h......l.........h...s...u..l..l..o.s.h.h......s...s.s...s.....s...s...h........l.....h....l.....P..l...s............l.......s....l...........t.............p..............p...........h...............p...........h.............s..........s.................t..............h...............h...h..h....s....l....s.....h......u..u........s..h....u.u.h.....h....T.......l...G...s...P..s.....s...h..h.l........................................ts...hts.h.s...h.................s...s....h.....h......t...h.....h....h....s...h........h...l...h...s...h..hh...hh.........hhh.h.hh.h.pp.tt.t...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...h.t.h.h..h....h..........h...h..l...h....h...s.h..h.hhh.....................................................hht.h.........................................h.....h...h....h....h...u.....h...h....h.....h.......h........h............h............t........h.................t........h.......p....p...ht.............................................t.....ht.h............hl...h...h.h...h....u.h.....h.h.h...s..t....s....l...t.......t.....h...s........h..h.t.....h..h...s.p.....h.h..th......................h.......h...h..h.................h..h....s..t....h...h..........s.......s..hh.h..................................................................................................................................................................................................................................................................................................................................................................................................... 0 639 1190 1535 +1562 PF00285 Citrate_synt citrate_synt; Citrate synthase Finn RD anon Prosite Domain \N 20.10 20.10 20.30 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.20 0.70 -5.72 121 9392 2009-01-15 18:05:59 2003-04-07 12:59:11 16 26 5081 103 2244 7278 4274 289.30 36 82.11 CHANGED GL.....psshsspopIohlDG...........ppGh..LhYRGYsIp-Ls.p.pss.............FEEss.aLLlhGc..L.PspppLppFppplttpp.t..........lsp...plh...phlps.hPp.sscPMshLpsulusLus.hp.t..........tstph...p..hcpuhcL.lAphPslsAhh..a+hp......pGtphltPcss..Lua...utNFLhMl..pspt............s.s.....t.hscsh-hhLlLaADH.thNuSTFssRlluSThuDhYuulsAuluuLpGPLHGGANptVhchlpcI....ss...........scpscpalcctlsp.pc..+...lMGFGHRVY+.shDPRApll+chscplstph.........sp....sphhclupplEchshp...h.p+.t...lhPNVDFYSullapth.GlPt..-hFTslFAluRssGWhAHhhEp....h.t.....ss+l.hRP .......................................................................h.sh.sh.otls.lst...........pts.....LhaRG.hsl.pLs.p..tsp..............................a.-sh.aLLh....G.c...........h.P.s.t.t.p.h...tt.hpttltt....t..............l....t...t.h..........thhtt...h......t....HPMs.h.h.sshsh.lssh......................................................t...tt.....shcl.h....up..hsslh..shh...a.+ht.........................s....h..h......s...p..ts......hshstp.F.lh..hh....h..stt................s..h.cshchhhh.La....A-H..t..............N.s............S.T.T..s...Rl...............s...u.S...............o.t.u.s..a.u.slsuuluu.L...hGPh.HG.G.A.N.Essl.c.MLpcl............ss.....................scpl.ca....l.t.+.t.h.cc....p-........+...................lM..GFGH.RV.....Y...+...shDP..RApl..h+chsc.clhpch..........ts....s..s..lhc.lAhc..lEclshp.......hhc+.+.......LaPN.........VD..........FaSGl....h....h.............ct.h..........G..............h.Pp....p........h..a..............T..............s..............l.FshuRhsGahAphhE.....ht.....t.tl..RP..................................................................................................... 0 694 1359 1866 +1563 PF03802 CitX Apo-citrate lyase phosphoribosyl-dephospho-CoA transferase Bateman A anon Bateman A Family \N 25.00 25.00 26.40 25.60 19.40 18.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.94 0.71 -4.82 29 1055 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 904 0 101 514 3 165.80 43 81.99 CHANGED pslsL.plLsuREtRsshQpphLpca.pt..sLlSlTlshPGslKsssthcplFstulpslpphhtptthhhhtp....sttTGPEuhhslsts.AtplKphhltlEcsaPLGRLaDlDVl...sspsptlSRpsluh..ssR+CLlCscsA+hCuRuR+HulpElhsplpchlpph..spc ...............................p.sVolsElLsuR-cRtARQ+th....L..p+a..ss...........PLlShTlssPGslKsStlh++lFspulsslpsLstcps.W.phpt.p...............th....lspsoG...PEuhl.uls.uP..Aps..LKtthlpLEcsHPLGRLWDIDVl.............ss-G...chLSRc....-huL..P.sR+CLlCpp.s.AtlCARu+pHsLs-Lls+hctllpshpt.s.......................... 0 23 50 75 +1564 PF01214 CK_II_beta Casein kinase II regulatory subunit Finn RD, Bateman A anon Prosite Domain \N 20.10 20.10 22.20 21.10 20.00 17.30 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.35 0.71 -4.78 66 783 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 323 22 504 729 15 179.10 47 69.13 CHANGED sWIshFssh.hspcaFscV-c-aIpDpFNLhGLppp..lspappALchILDhpstp..................................pthppsp....tptl-psAptLYGLIHARaIlTspGlpt.....Mh-KYpp.ucFGpCPRlhCpsptlLP..lGlSDtsspssVKlYCPpCpDlYpP..ssctsslDGAaFGsoFPphFlhsa.sc..hhspt........................................................................ppahP....+laGF+ ..............................................pWIsWFCuh.cG.sEaFCE..VD--YIpDcF.NLs..........GLspp...V.s.h.Yc.............pAL-hILDlps..pp.......................................................t..s.....p.p.......pphl.EpuAchLYGLIHARYIl.T.s+Gltt..M.........h-KYpp.u-.....FG.pCPRVaCp....spshLP........lG...hS.DhP.....sps.sVKlYCP+C.....pDlYhP..po.s..+.....p...tp...l............DGAaFGToFPphhhhsaPp....hhspp.........................................................................................stppahP....+laGF+........................................... 1 212 307 426 +1566 PF03805 CLAG Cytoadherence-linked asexual protein Lawson D, Bateman A anon Lawson D Family Clag (cytoadherence linked asexual gene) is a malaria surface protein which has been shown to be involved in the binding of Plasmodium falciparum infected erythrocytes to host endothelial cells, a process termed cytoadherence. The cytoadherence phenomenon is associated with the sequestration of infected erythrocytes in the blood vessels of the brain, cerebral malaria. Clag is a multi-gene family in Plasmodium falciparum with at least 9 members identified to date. Orthologous proteins in the rodent malaria species Plasmodium chabaudi (Lawson D Unpubl. obs.) suggest that the gene family is found in other malaria species and may play a more generic role in cytoadherence. 18.30 18.30 19.90 19.80 16.80 16.70 hmmbuild -o /dev/null HMM SEED 1282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.23 0.70 -14.21 0.70 -7.22 10 213 2009-09-11 00:22:49 2003-04-07 12:59:11 8 2 15 0 39 216 0 436.10 45 84.97 CHANGED DNIpELKphI-N-ELYcNLspLE+hlLpoLEpDKLKlPlhps-s+.cYlDhSpFKll...cstssucscsYIlPThcoohcDIIKYEHlLKpQlhpcYss-ISDhIK+KlLlVRTLKllKlMLlPhNsYKcssDlKpALpELNclFpsc..ppppcccsslphpschFcpllpsl+tlKcppph.pth......DsKp-lh-hsDhFFTTssNIEFM-sLDpIoNpYGIGlaNhlGsHhIALGHFlsLKLALKpYcpYFEtGslKFaoWQKILpFshSDRFKlLDLMCDcsusYcsppKRRpoYLKscpouoopECNILEFLIHaFNKYQlELlppspcp-FslahhhEHKclK-cFFsFMCs..csKECsIYcospFKpEps--ssFs-pps.sFs........hhuPaNlYsNYaYFhKpYs-FsscpllYlHlLNLsGlLNs-scAYVoSLYLPGYYNAIQLSFp--ppLs-LapNLlKClcKCau.p++spo..p+.pslpsptcp-sSKCslCKGsFhYIN.Ks--ssSMLQKFYsYlTKlLclpplSoLlcshsIY--YsNFLoHDLNWYTFLFLFRlTSYK-IsppoVuEAMYLNLKDEDsh++ThVTsaWYPSslKKaYTLYlRK+lusNLL-ELEpLl+pcTIEKMKKSIpFllHVNSFLQLDFFHtLNEsPlG..RsaPLShlLEs+FK-Wh.shssGFaFlNYDcPptRtcLacKhcsp+FlsPKhscWschLK+lIcsAY-hYFpQRHVKNLaKaHssaNINNKIMLMRDSYELYhKNacDllFhADIhllRKYLouTPKl++lpDRhaYalHslhGNulNFYKYGlIYGFplNKchLKEVV-ELaoIYphNpcIFoDlSFLQTVYLLFRKIEpSatSHRRNDclSlNNIFFhNVusNYSKLsKEER.pEIHsSMASRFauKThFosFQhMFsopIS+clD+LDKhYGKAshlplsssEcAaLpFAYlY.GSIhDSlTNsLhPhYAKKPIsQLKYGKTFIhANYFhLuSplauhLNLNNLShLCEYQAIsSANaYShKKluQFls+KFlPllsYahhhRlts.hspsp...ttaphhpt.us.sp........................hhhpsshYhohYhusNLah-sstaFPssLssELp-QT-alptpp.ccKPslHuhspshlhullpuhshsFslaslspaYAFF-NllFalhsshRFhDRaaslhspYhssah+phh++hTsDllLKhhp+sYpshKKcGYYcEsIsARLssKpplpphltt-pspslhsslp.h-hcshp+.shshYsD-phhF-DlscNEpFLN-R ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................shhpFAYhYsGSIMD.SlTNsLLPPYAKKPITQLKYG+TFVFSNYFMLASphY-MLNYKNLSLLCEYQAVsSANaYSuKKlsQFlGRKahPlTThaLhhRIptohsahp......thphhs..ss............................................hts-sshaLaFaFFhNLYhDuhKaFPGGFusuL+EQTcH.Vppps.....a.+KP.VHths........................................................................................................................................................................................................................ 2 14 17 38 +1567 PF01217 Clat_adaptor_s Clathrin_adapt_s; Clathrin adaptor complex small chain Finn RD, Bateman A anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.74 0.71 -4.47 7 3187 2012-10-04 00:47:01 2003-04-07 12:59:11 15 38 357 27 2033 2739 18 135.90 25 49.99 CHANGED hl+hlLlhs+pGchRLsKaY.sshssscc.pKhlcclhtslosRpsch................sNhlEhps..h+llY+pYAsLYFshsl-.s.-sELhhL-lIppFVEsLDphFs.NVCELDllFNFpclahll-Ehl.G.G.lhEoshppllcplt.hspht ..................................................................................hphlhl.h.s.ppGc.h.....hls+hY....t................s...............h...............s......p.....p..........p..........p......p....p....h...h....p...c..h.h....p...h.h.....h......s.+....t..t..p..h.......................................ss.h...l.......p......h......p......s........hph..lY..+.+...............a.s.......sL.a..hsss....s.......s.....ps.s.N.t.L.h...h..........l..-...hlp............p.h.l.-s..l...sc.a..F.........t........s.....l.s.Eh...s.l.h...N.a....h........la...l...........LDEh..l...s...G..h..h................p.Ts.p.lht.h......t.................................................. 0 669 1072 1604 +1568 PF00637 Clathrin Clathrin_repeat; Region in Clathrin and VPS Bateman A anon Prosite Family Each region is about 140 amino acids long. The regions are composed of multiple alpha helical repeats. They occur in the arm region of the Clathrin heavy chain. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.47 0.71 -4.59 150 4251 2012-10-11 20:00:59 2003-04-07 12:59:11 15 79 397 51 2715 4128 33 136.90 16 33.12 CHANGED l.php..shhshspllchhpc...tshhp.lhtaLpphh......s.pt..ppsslpstLhphhhct......sp.pplcphlp..phsp.....aDhtpluphs..ccpc.Lhptthh...lYpc..hsphpchhp...l.hpp.pthppsh..chstctss.....clhppllphhlsptphph ..............................................................h........phppllp...hh.p.......ts..h...h..p.t....Lh..taLpthh....................t...p...........pp.tslpstLhthhhct....................................pp.pplccalp................t.sp.........ach..tp......l.t.phs.......pc....tp...h....hptthh....lapc..........hsphpphlp.......h..hpp......p.....phppsh........ch.ht.c..h...ss....................p...hh.plhth.hp......h............................................. 0 886 1499 2201 +1569 PF01086 Clathrin_lg_ch Clathrin light chain Finn RD, Bateman A anon Prosite Family \N 20.40 20.40 20.50 20.60 20.20 20.10 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.84 0.70 -4.43 28 493 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 297 6 289 500 3 200.20 27 86.99 CHANGED MA..DcFPsl-s.sss........s.h..ssptsssu-FLuREcshLGD...pFto-pD.......hs.s-sD.................p.ssFppsFPslsststt.....sussushsGshht.s.......suhsphsphpsE..PEsl+cWRE++ctcIpc+-ctsccp+pEhhcKApccIDDFYpsYNpKp-KppupsR..................t-tE....pFLucR--h.opsGTpWERlAcLlDhssKus+...su+D+oRFRclLlsLKtptpAPGu ............................................................................................................................................................s.........tstFL.tp..ppt..lus..................h.ps..................tss...............................................................t....st..ts.t........................t..ts.s...sut....t..s..........................t.....su..h.....s.t.h...s....p..ppE.....sEslR...cWREcpptcL..p...c..+....Dtppc...........cpctEhhccApccl--.aYpphscph-+s+t.sNR....................................tpE-......tFlppp-c....t.sGo...p..WERlAcLs.....D.....h.ss+ss+............................pspDhSRhRplLlpLK.....psPh................................................... 1 94 157 232 +1570 PF01394 Clathrin_propel Clathrin propeller repeat Bateman A anon [1] Repeat Clathrin is the scaffold protein of the basket-like coat that surrounds coated vesicles. The soluble assembly unit, a triskelion, contains three heavy chains and three light chains in an extended three-legged structure. Each leg contains one heavy and one light chain. The N-terminus of the heavy chain is known as the globular domain, and is composed of seven repeats which form a beta propeller [1]. 21.30 21.30 21.90 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.35 0.73 -7.82 0.73 -3.30 71 581 2012-10-11 20:00:59 2003-04-07 12:59:11 15 30 177 39 344 507 4 38.40 30 4.84 CHANGED sshspphsa.ssh..shhhlhslp.........stuplhlhslc .....tsst.IsacssscpphhhlhGls.........ststGphplhsV-.. 0 90 164 246 +1571 PF03505 Clenterotox Clostridium enterotoxin Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 32.40 35.10 21.90 16.80 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.18 0.71 -4.92 4 72 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 20 25 4 63 0 172.20 44 58.07 CHANGED D+DLIGTLLIEusoSGSIIQPRLRNTTcPlFsoSNsTtFSQpYTEtRLcDAFNIpLFNTSTsLFKFVEEAPsNKNIshKsYNTYEKYELIcYpNGsIsscApYYLPSLGasEVosAPSstu.VVch.lspsuFIQsGPpEcIVlGVIsPSENIp.EIsTsIu-sYTYNIPshlssNPhYlLFsVNoTshYKIoscsNL ....................pllGs.lIpsuooG.lhp.pLRsospslhhs.ssspFspEYhcsplpssF.Nhp.hNp.shhFcFscpAPSNKNlhhhl..TY.+YElIch.pssIs-+A.hYlPSLGYscshphsS.tt......lsps............................................................................................................................... 0 2 4 4 +1572 PF03515 Cloacin Colicin-like bacteriocin tRNase domain Griffiths-Jones SR anon PRINTS Family The C-terminal region of colicin-like bacteriocins is either a pore-forming or an endonuclease-like domain. Cloacin and Pyocins have similar structures and activities to the colicins from E coli and the klebicins from Klebsiella spp. Colicins E5 and D cleave the anticodon loops of distinct tRNAs of Escherichia coli both in vivo and in vitro [1]. The full-length molecule has an N-terminal translocation domain and a middle, double alpha-helical region which is receptor-binding [2]. 25.00 25.00 37.00 31.60 20.90 20.40 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.86 0.70 -5.19 8 84 2012-10-01 19:36:59 2003-04-07 12:59:11 9 10 43 12 4 97 0 272.40 34 49.83 CHANGED Mu......................................psG.sausp.ssa.shtt.s.............GGsspsG..uuoshu.s...................s.hPtshAh.......GlPuhhsPG.......sG........shulSlsus....slsAAlu-lhtsL+G.....stP.....................aKhshsGlulhulhPupIAcDc.sh.................hshlVooLPsDslT-sPlSsLPhspssVsVshRlsDlVcDspQplAlVsu..hPhsVPVVcA+P........TcpssVaoAul.PG.hPslplSVspssssspuhscuhssc......cstsspPAGaThGuso+..-AlIcFPccSGpcP.lYlSVoclLosspl...KQRQ-EEp+tppcWc ................................................G..........s..spG.sausp.ssasshtt.s.............GGsspsG..GuSuhussh...................hsstshAh........h.ulhhPG............tG........GhuVolpus....tlSAAhssl.suL+G...........s.P..........................huVuhhtl.Puphsc.pc.hh........................lhso..h-plT-hPsS..pLPhstssVsVphRlsDhspstpQhhullsG....hP.M.oVPVVDAhP........TphssVaoAsl.Ps.hPsL.lSVsptsssspsss.h.pps.......pstshh.AGaThuuNs+..DsllRFPcsSGhsP.lYlSss-lLssspl...pQRQ-tENptppca........................................................... 0 2 2 4 +1573 PF03513 Cloacin_immun Cloacin immunity protein Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 27.90 59.60 20.90 19.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.67 0.72 -3.94 6 22 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 17 7 6 26 1 80.50 54 72.32 CHANGED GLKLcLsWFDKKTE-FhGcEYSKDFGDDGSVIESLGlPLKDNINNGsFDVccsWVPlLQPaFKNpI-hDKahYpISFDYRDu ..GLKL+LpWFDK+TE-FpGcEYScDhGDDGSVIEpLGhPlcDN.INNGsFDVpppWlslLQPaFpppI-hs+apY.lSFDYRDt... 0 0 2 5 +1574 PF01785 Closter_coat Closterovirus coat protein Bashton M, Bateman A anon Pfam-B_1309 (release 4.2) & Pfam-B_6985 (release 8.0) Family This family consist of coat proteins from closteroviruses a member of the closteroviridae. The viral coat protein encapsulates and protects the viral genome. Both the large cp1 and smaller cp2 coat protein originate from the same primary transcript [1]. Members of the closteroviridae include Sugar beet yellow virus and Grapevine leafroll-associated virus, closteroviruses have a positive strand ssRNA genome with no DNA stage during replication. 21.30 21.30 22.20 21.60 19.90 21.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.18 0.71 -5.25 11 1326 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 50 0 0 1130 0 171.40 35 75.75 CHANGED hshpshs.sssspLssc-h+plhtphppplpsch.........sss--chhhtluhhLhphushSTSsKlsh..psstoh...shs.scchslpcu.lhshlpuph.htspsNsLRsauRoaEctYlshspthhpt.sptptsuK+GlPuthpYLuADFlsGosstho-c-pAshLhApcsALc+p.usts-pplssLppLG+h ......................................................t.....hh.s.s.lshs-h+.l.sp..thlspch.........shss+-ht..ha.luhhlhplsspSoShpscc.sssshoa....spt..Gh..c..hcls-chhhshlhss.hsh.s..s.psNsLRhauRop-shalshs+ps+sl.s.sttshctGlPst.aaYLsADFLsGu..slo..-.h-pu.sh...l.A+cphLc++tu..cps.lhNlpph................................................ 1 0 0 0 +1575 PF00574 CLP_protease Clp protease Bateman A anon Prosite Domain The Clp protease has an active site catalytic triad. In E. coli Clp protease, ser-111, his-136 and asp-185 form the catalytic triad. Swiss:P48254 has lost all of these active site residues and is therefore inactive. Swiss:P42379 contains two large insertions, Swiss:P42380 contains one large insertion. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.82 0.71 -4.62 86 9058 2012-10-02 13:07:06 2003-04-07 12:59:11 18 23 5278 397 2138 7738 4927 176.10 39 75.01 CHANGED spuE...cshD.lYuR.LL+-RIIFLustl.sDphAstllA.QLLaL-u-sssK-IhlYINSPGGsVoAGhAIYDTMpal.+scVsTlCh.GhAASMGuhLLuuGspGK..RhuLP.suRlMIHQP...hGGspG..QAoDIpIpAcElh+l+cplsclhuc+.T.GpshEcIpc......Dh-RDhaMoA..pEAh-YGLlDpVlppp .................................................................t........hD.l.as.R.L.hc.-Rl..l.hL......s....s.......t...l..pD..p..h..A...s.......l.lA.Q.LL..a.L......-....u.p...ssp......KD.Ih.lY...I.N.S....P.......G.............G.........s.........V..........oA..............G.........h..............AI...Y....D...T...M..........p.....a....l.......+......s.............c......V..s....T....l.....s..h.....G....h....A....A..SM...G............uh...LL.s............u.GscG..K..Rhs...LP.....su........c......l......MI...H...Q...........P................h...G...G..h..pG.........QA....oD.......l....p........I..p..A.c....c.l...h.+........h+...c........p.......l.......s.....c.....l.h.uc+...T....G..p.......s.......h...E.......p.......Ipc...........................Dh...-........R.D.....p....a..h.o..A........pEAh-YG...llDplhp..p............................................................................................................... 1 751 1443 1838 +1576 PF01093 Clusterin Clusterin Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 20.30 20.30 19.90 18.70 hmmbuild --amino -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.51 0.70 -5.68 14 180 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 48 0 59 144 0 339.10 37 92.73 CHANGED ppLpphSptGp+Yl-cElcNAlpGVKpMKolME+opEEHppLhpsLEcsKccKE-ALKhsp-sEp+LcEcpclCNETMhuLWEECKPCL+pTCh+FYo+sC+SGuGLVG+QlEEFLNpoSPFhhhhNG-clcsL..h-psppQshplsplE-sFoplsssV-pLFp-Sh.hatphp..FspshpssFh.....hpPhhhsthptthttsc.....pth..p..Ph..hp.hsh.shFQ.Fh-huhphhpuhssthpp.ht.........t.u.phs.sp.hh....sDRtlC+ElR+NSoGCL+h+-cC-KCp-IL.ul.DCotppPspspL+pElp-uLpLAEchoppYsplLp.hQp+M.NTouLL-phscQFGWVSpLANhTps.scshFploTVso+ss...sspuPuD.TpV.sVplFsSsshTlslPt-lSh-sPcFh-hVApcALQ+aKpch+pt ..................................................pLpphS.ttGpphlscElppAl.GlKphKphh-+spc-+ppLhpsLccs.+ccKp-...Alp.hp-spt+Lpc..plCptohh.s...W-EC+sCLcpsCh+aYs.sCpsu.u.VtpplEp.Fhpp.s..shhh.........hp...t-phcsL.........pp.pppst.ls..thp-tFs.phs.s.hspLFpcp..........aspt..shah.......s.....h..hp.....h..t........p.............t.hs..h..shFp.hh.ph.hph.pshs...hhpp....................s....p..h.s...p..h...t....psRhlC+ElR+NootCh+h+ppCpKCpphL.th..DC....Ps.stL+pchp-ulplsph.sppYsplLp.hQt+h.sTs.LLcphpcQFuWVSp.LAN.Tts...sp.hhpltpVh...s+...p....s..s...........s.ts.T.V.slplhsSssholplP.-.shpsspFhphVstcALppa+pp.................................................................................................... 0 4 8 23 +1577 PF03026 CM1 Influenza C virus M1 protein Griffiths-Jones SR anon Pfam-B_1290 (release 6.4) Family This family represents the matrix 1 protein of influenza C virus. The protein is the product of a spliced mRNA. Small quantities of the unspliced mRNA are found in the cell additionally encoding the M2 protein (see Pfam:PF03021). 20.20 20.20 22.60 276.40 18.30 18.20 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.48 0.70 -5.26 2 132 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 89 0 0 34 0 218.90 100 86.56 CHANGED MAHEILIAETEAFLKNVAPETRTsIISAITGGKSACKSAAKLIKNEHLPLMSGEATTMHIVMRCLYPEIKPWKKASDMLNKATSSLKKSEGRDIRKQMKAAGDFLGVESMMKMRAFRDDQIMEMVEEVYDHPsDYTPDIRIGTITAWLRCKNKKSERYRSNVSESGRTALKIHEVRKASTAhNEIAGITGLGEEALSLQRQTESLAILCNHTFGSNIMRPHLEKAIKGVEGRVGE MAHEILIAETEAFLKNVAPETRTAIISAITGGKSACKSAAKLIKNEHLPLMSGEATTMHIVMRCLYPEIKPWKKASDMLNKATSSLKKSEGRDIRKQMKAAGDFLGVESMMKMRAFRDDQIMEMVEEVYDHPDDYTPDIRIGTITAWLRCKNKKSERYRSNVSESGRTALKIHEVRKASTAMNEIAGITGLGEEALSLQRQTESLAILCNHTFGSNIMRPHLEKAIKGVEGRVGE 2 0 0 0 +1578 PF03021 CM2 Influenza C virus M2 protein Griffiths-Jones SR anon Pfam-B_1092 (release 6.4) Family Influenza C virus M1 protein is encoded by a spliced mRNA. The unspliced mRNA is also found in small quantities and can encode the protein represented by this family. 20.90 20.90 22.80 308.20 20.00 18.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.77 0.71 -4.63 2 111 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 87 0 0 38 0 139.00 98 70.10 CHANGED MGRMAMKWLVVhIYFSIASpPASACNLKTCLpLFNNTDAVTVHCFNENQGYMLTLASLGLGIITMLYLLVKIIIELVNGFVLGRWERWCGDIKTTIMPEIDSMEKDIALSRERLDLGEDAPDETDNSPIPFSNDGVFEI MGRMAMKWLVVIIYFSITSQPASACNLKTCLNLFNNTDAVTVHCFNENQGYMLTLASLGLGIITMLYLLVKIIIELVNGFVLGRWERWCGDIKTTIMPEIDSMEKDIALSRERLDLGEDAPDETDNSPIPFSNDGIFEI 0 0 0 0 +1579 PF02543 CmcH_NodU Carbamoyltransferase Bashton M, Bateman A anon Pfam-B_1740 (release 5.4) Family This family consists of NodU from Rhizobium and CmcH from Nocardia lactamdurans. NodU a Rhizobium nodulation protein involved in the synthesis of nodulation factors has 6-O-carbamoyltransferase-like activity [1]. CmcH is involved in cephamycin (antibiotic) biosynthesis and has 3-hydroxymethylcephem carbamoyltransferase activity [2], EC:2.1.3.7 catalysing the reaction: Carbamoyl phosphate + 3-hydroxymethylceph-3-EM-4-carboxylate <=> phosphate + 3-carbamoyloxymethylcephem. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.06 0.70 -5.33 11 743 2012-10-02 23:34:14 2003-04-07 12:59:11 10 5 526 10 274 1274 3956 344.30 32 61.74 CHANGED sSAashSsFtpu..lslslDusGDhhsphhhluptsthth..hspFPtp.....t.......lGhha..hphsphhGashh.stGKlMuLuuYG.sssphhsslpphhchpttss........Ycthlsthpsslhcshchh....ct....h..ppcpcluAohphhlE+llhchhphhhtchs...hsp.LslAGGsuhNlphNutLhppGhFsclaV.PtssDuGsAlGAAlhshsphst......hPhchs..VY.GPshss-.......p.csssWcuphp.......hhuplAshLAsGclVuahQGRhEhGPRALGNRSILA-PpsscsKD+lNthl+.REtFRPaAPosLEEcss-hF-.spsc...PaMhhshpsREsh....................tcplPAlsHlDGoARlQ ...........................................................................................................uAahsSsFpcs....sllslD.G....h...G.....-.......h.....s........o.s.........hh...h.tc.s.s.p....h....p....h.....h...t...ph.thPpS.............................................LG.hhY.........tthTtalGF....p....h................p.......sEh....K.lM.G.LA.sYG......t...s.......t...h.....h..h..p...h...h.....pp.hh..p...h.p.s.ss........................................ah.t.....h..s.h....h....p....hh.....t....t..h..h.t....hh...................t........t......t.....ptt....hDl........A...suhQt.hh.Ec.ll...l....ch...hcth...hppts......pp..LshAGGVALN...s....sh...Ns....+...l....h....p....p......s......F.c.........c..............l..a..V...p..P...A..u...u...DuGs.AlGAAhhsht.phts......................................t.ht.p......sYL..GP.sas..sp...............tht.h.th.hts.....t.h.t.h............p.....p..tlhp.p.sAphL.Ap.u.p....lVuWaQGR..hEFGPRALGsRSILusPpssphpcplNthlK.REpFRPFAPslLtEcuscaF..c..h.s..p..su.....................Pa......MhhshpVp.tp.h..................................+scl.PAVsH.l.DGTARlQ.................................................... 0 93 169 217 +1580 PF04989 CmcI Cephalosporin hydroxylase Bateman A anon COG3510 Family Members of this family are about 220 amino acids long. The CmcI protein Swiss:O85726 is presumed to represent the cephalosporin-7--hydroxylase [1]. However this has not been experimentally verified. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.08 0.70 -5.01 31 285 2012-10-10 17:06:42 2003-04-07 12:59:11 7 8 264 36 73 634 456 186.70 38 79.54 CHANGED tpasasapWhGhPIlphPpDlhshQEllacl+PDlIIEoGlA+GGShlahAuhLpll...............tsct+VluIDI-lRtpsptul-s.+sh.pRIphlpGSSscscllppl+phtpt.hcpllVlLDSsHopcHVLsELch.YusLlosGsYhlVhDTll-chsts....h.s.......psWs.GsNPhsAlppaLppps............c...FplDp...thps+hh...lTsuPsGaL+Rs ......................................h....hphpahG..hshhp.stDhh..h.QE.l.l..ac.l..+...P.cL...IIEhGstaG.G.Ss.lahAs.hLchl.................................sps.+Vls..l..DI........s...h..p..s......s........t..h..p..t......................s..R.......Ip.h.l...p......u.SS.ss..s..p.h....lpplpp.h.tp..sshl..llILDSsHop.pHVhtELph.Ytsll.os.GsYhlVEDT...s...l..s.s...hPh.....................sa..G.s.G..PhpAlcpaLtpps............p...FhlDp....hcpKhh...hohsPpGaL++...................................................................................................... 0 24 52 57 +1581 PF02627 CMD Carboxymuconolactone decarboxylase family Mian N, Bateman A anon COG0599 Family Carboxymuconolactone decarboxylase (CMD) EC:4.1.1.44 is involved in protocatechuate catabolism. In some bacteria a gene fusion event leads to expression of CMD with a hydrolase involved in the same pathway [1]. In these bifunctional proteins (e.g. Swiss:O67982) CMD represents the C-terminal domain, Pfam:PF00561 represents the N-terminal domain. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.77 0.72 -4.14 215 9489 2012-10-01 19:19:04 2003-04-07 12:59:11 15 45 3396 115 3044 7828 1802 82.90 18 51.31 CHANGED Pphhphhtphttshhh...putLsh+p+cLlslusshsps.sshsh...shHhpt..Ah.psGsop...ccltcshth.tshhs....uhstth..suhthspp ...............................h.phh.thhhs.hh.....pu.....Lshpp+cLlsl.ss...u.t.ss.....s...s.shsl........phH...spt....uh...p....t...G....hop............cc..l..t.plh.th...tsh.ht.......th...stth..tuhthh........................................... 0 764 1817 2496 +1582 PF00795 CN_hydrolase Nitrilase; Carbon-nitrogen hydrolase Bateman A anon Pfam-B_1042 (release 2.1) & Pfam-B_5155 (Release 7.5) Family This family contains hydrolases that break carbon-nitrogen bonds [1]. The family includes: Nitrilase EC:3.5.5.1 Swiss:Q42965, Aliphatic amidase EC:3.5.1.4 Swiss:Q01360, Biotidinase EC:3.5.1.12 Swiss:P43251, Beta-ureidopropionase EC:3.5.1.6 Swiss:Q03248. Nitrilase-related proteins generally have a conserved E-K-C catalytic triad, and are multimeric alpha-beta-beta-alpha sandwich proteins [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.21 0.71 -4.94 58 15146 2009-09-13 06:29:50 2003-04-07 12:59:11 17 49 4520 101 4901 12501 4878 178.50 18 46.69 CHANGED psAssQ........hs....shhshpss.htphhphl..............................tcAsct..usp.llshPE...hhlsua................ht.thtphhthuh................sh......u.thptlt...........................................................................phApppslhllsG.hspp......csst.....hYNshlhlsss.....Gphl..........s.pYRKh+........h.tah.....EpphhstG.........stsh.ssass......s.h....u+lGh..hI.........CaE..hhaPthtph.hst.............pGspllsssss ........................................................................................................................................................................................................................................................................................................thhQ........h...........hh.s....h.t...t....s....h.....tphh.p....hh....................................................ppA.spt.........ssp...lllhP.E........hh.h..s.sa..............................................................................p...h.....t...h.s..t................................................sh....................t.h.h..pt.l..p...................................................................................................................................................................................................................................................................p..h..A.....p...p....h...s.....h...h...l.l...s....G....hs.p...........................pssp..............................ha....N.sh.h..l...ls.ss..............G.plh.......................................s...pYc....K.h.HL....................h.....tah....................Ep...t.h.hs.sG....................stth....s.hpt...............................................h...................h...+....l.uh......hI...............................................CaD...........hh.a..P.p.h......h..p..t..h.s.h................tG..spllhh.t..................................................................................................... 0 1561 3031 4091 +1584 PF01110 CNTF Ciliary neurotrophic factor Bateman A anon Sarah Teichmann Domain \N 19.30 19.30 19.80 19.30 18.70 18.20 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.37 0.71 -5.02 2 48 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 34 4 26 265 0 175.60 49 95.14 CHANGED MAhs-.oP.sshR+.DLCSRuIhLARKhRSDlTsLh-.YVc+QGLstsIslsusDGhPsAus-pWuE.TtspRL.-NLtAYRsF+sLLAphLE-Qp.hhssT-u-ht.AltshLLQVuAFsYplEELh.L.phthP.sEtsu.P.ssschuLFEpKLhGL+VLpELuQWsVRSl+DLR.lSpH..G.PupGuthhhsppp ....................................................MAhs-..s.ssp++.-...LCS....RSIhLARKhRSDlTsLh-sYscpQG..........L.....sp.s...l.sl..s..u....hD..GVP..sA...o..s..-..p..WSEhT-u.ERLp-NLpAYRsFcshLspl......LE-Qcsph.sPs-u-a+pAI+...sllLQVuAFAYplEELMh.L.L..ph.plP....s.Ess..uhPh...s.s...G..c..t..GLFEKKLaGL+VLpELuQWTVRSl+DLRllSpp.t.G..hsu.tst.......................... 2 1 3 9 +1585 PF03450 CO_deh_flav_C CO dehydrogenase flavoprotein C-terminal domain Griffiths-Jones SR anon SCOP Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.07 0.72 -4.08 168 4331 2009-09-11 23:20:01 2003-04-07 12:59:11 12 53 1952 108 1331 3679 1203 98.20 24 19.57 CHANGED pua...Khu+Rts.DluhVsuAhtlplp...s..........st...plpss+luhGGhuss.sh+A.ppsEphLhGpshsp.pslptAhpslt.p-hsPhsc.hcuospYRpplutsLhhRhhhps ............................................................t.ta.KhucRps...shu.lVssAht..lphc.....s.....................................s......hlp.ps.RlAhG.......GsAs.............p..sh.Rs...ppsE...pt..L...........h...........G...........p...........s....h..........s............t....p....s..........l....p...p........A.sp.........t.hh...ps..........h.........t....sh...ss.......t..u..ps...t.......Y.Rhtlstshhh+hh.t.h...................................... 0 410 752 1046 +1586 PF00473 CRF Corticotropin-releasing factor family Finn RD anon Prosite Family \N 22.10 22.10 22.20 26.60 21.50 21.70 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.80 0.72 -3.87 7 187 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 111 8 64 192 0 39.40 47 25.30 CHANGED tPSlSIshshclLRphL.hchscpphpphp.....sNRphLspl ....PPlSlDLTFHLLRphLEhuRAcp..tp.QAc.............pNRplh-.l... 0 11 19 37 +1587 PF02552 CO_dh CO dehydrogenase beta subunit/acetyl-CoA synthase epsilon subunit Bashton M, Bateman A anon COGs Family This family consists of Carbon monoxide dehydrogenase I/II beta subunit EC:1.2.99.2 and acetyl-CoA synthase epsilon subunit. Carbon monoxide beta subunit catalyses the reaction: CO + H2O + acceptor <=> CO2 + reduced acceptor. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.87 0.71 -4.70 6 97 2012-10-03 09:55:27 2003-04-07 12:59:11 11 5 65 5 55 244 219 163.80 27 75.46 CHANGED tphphhP......p.ssl.sscpAshhps.cssspMI++AKRPlLllGPplhpsE.hEtpsKthhc+clshltTu.....h..tsl...cchp..shh-hophlpssp.phh.hpGpaDLslFlGshhYasupsLstLKpFosplpslsIscaaHPsAchSFsNlsc--..ahchLpEhl ................................................sh.....P........tshhtschsphhps.chuuphlppAKpPhlhsGshll..h..s......h.ps.h......t.....p.......tht.h.t..plshstsu.......ht.hsh...c....cs...+Y....shhp.ps.lsssc.phh.hcsp..hDhslFlGlphaaus.sLphl+sao..sshphshhschhH.sAph..Shsshs.t-.c..hht.hltp..h.................................................................................... 2 14 34 44 +1588 PF01121 CoaE UPF0038; Dephospho-CoA kinase Finn RD, Bateman A anon Prosite Family This family catalyses the phosphorylation of the 3'-hydroxyl group of dephosphocoenzyme A to form Coenzyme A EC:2.7.1.24. This enzyme uses ATP in its reaction. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.99 0.71 -4.88 13 4826 2012-10-05 12:31:08 2003-04-07 12:59:11 15 23 4560 31 1266 3845 2571 176.30 33 80.94 CHANGED hllGLTGGIGSGKSTlushFsp.hGlsllDADllARpVVpPGssuLspIsccFG...ssllhs-..GpLcRpALtcclFucsEc+phLNullHPhIppchhcpluphps...saslhslPLLhEs.phpslsspVlVVcss.cpQlcRshpRc.ulocpcspuhlsAQhSccERluhADsVl-Npushs- ...................................h.llGLTGGIuSGKSTV....u....p....h....h........p................p....h.....G...h.sl............l...D.AD..h..l........A..........+.p..l....l....p............s.Gs...........s.hp.t...l....h....p.t..F.G..................s.p.l...l........t.......t......c..........G..........p.............L........sR.....t...t.L.up.h......lF....s....s....s....p..........p....+....p....h...L..........ssl.........l.........H......P..h....l....p.p....c....h...t........c...p.........h...............p.....p....h.......p.p.t..................sh.l..lh.-.l.....PL.L..h.E...........s.....s..........h...p..............p.....h............s.....c...c..V...l.V..Vt.s.s...c.h.............QlpRlh............pR..............c......sh..o...........c....-.cspthls......uQ.hs......hcp.+.h..th....A.....D....hVI-Nsut...p..................................................................................................................... 0 433 796 1062 +1589 PF02035 Coagulin Coagulin Mian N, Bateman A anon IPR000275 Domain \N 25.00 25.00 25.40 341.10 24.90 24.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.33 0.71 -4.59 2 4 2012-10-02 16:54:34 2003-04-07 12:59:11 10 1 4 2 0 10 0 173.00 78 93.51 CHANGED sNsPhCLC-EPslLGRp.IVopEhKDKIEcAVpAlspcstlSGRGFSIFutHPsF+ECGKYECRTVpsEcSRCYNF.PFpHF..ECPVSspsCEPsFGYTsusEhRlIVQAP+AGFRQCVWQHKCR.aGSN.CthsGRCTQQRSVVRLVTYsLEKssFhCEshRTCCGCPCRS. TNAPlCLCDEPGlLGRpplVosEsK-KIEKAVEAVAcEuGVSGRGFSlFSHHPVFRECGKYECRTVRPEHSRCYNFPPFhHFpSECPVSTRDCEPVFGYTsAGEFRVIVQAPRAGFRQCVWQHKCR.YGSNsCGasGRCTQQRSVVRLVTYNLEKsGFLCEoFRTCCGCPCRS.. 0 0 0 0 +1590 PF04733 Coatomer_E Coatomer epsilon subunit Mifsud W anon Pfam-B_3343 (release 7.5) Family This family represents the epsilon subunit of the coatomer complex, which is involved in the regulation of intracellular protein trafficking between the endoplasmic reticulum and the Golgi complex [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.62 0.70 -5.22 7 430 2012-10-11 20:00:59 2003-04-07 12:59:11 9 22 320 7 259 625 124 243.90 29 86.34 CHANGED tsDcLFslRN.FYlGuYQssINpup.lsshst-ss.E+DshlaRuYlAlGphtlVlsEIctus.ussLQAV+hlA.ahssstp+-thlupLpEhlucpshssNsshpLlAullah+-psas-AL+th+tu.ssLEhhALsVQIhlKhcRh-hAcppL+hMQpIDEDtTLTQLAsAWlslAsGG.pKlp-AahIFp-hu-Kas.TshlLNGpAVsshthtpa-EAEolLhEALsKDspDsETLhNLlVsshclGKs...ssR.lSQLKhsHPpHshVcchsppEspFDRhspphs .................................................................t....Lhsl+staahGtYptsl.php...h...t.........s...s...p.t......t....hptphhhhRuhlA...t...p.....ht....ll.....tpl.....tt..ps.....s...st.......Lt.......Al....p.hh..A........p..a...h...t.....s................p....p...........ct.h......l.t....p....lp.................p...h....sp...p.........................ts.......h....h.......lhsu...l...h.h..t..ptp.p...tA...Lph..l.......p.........p.............t.......s.............s............l.....E.....h..h..A.........h.....h..l.....Q.....l...h.L...p....hsRh......Dh.A....p........+...p......l...pt...h.....p....p..h......s......-................D......s..h......Lsp.....LApuW..l....slthGu.....-p..........h.....p.pAaa.....lap.E.....h.....u.....p...p..h.....s.........s......o....................h.l.....Ls..upAsspht..schp-A...c...s...h...Lp.......p......A.....L.....p....p....c............s...p....p......s.-......s......l..h.N.h.l...lhs..h.h.GKs...ss..ch..h.s................p...L.p..t.....s.p.p....Hs....hlpc...ht.ttpt.F-phs.ph.t......................................................................... 0 98 151 219 +1591 PF04053 Coatomer_WDAD Coatomer WD associated region Wood V, Finn RD anon Pfam-B_1269 (release 7.3); Family This region is composed of WD40 repeats. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.47 0.70 -5.69 60 828 2012-10-05 17:30:42 2003-04-07 12:59:11 9 37 318 6 577 840 14 397.60 33 41.01 CHANGED psplpphshpsp.........ssphhslshKchGssphh......PpoluasP...ss+hlhV......susGcYhlYo.............ulshpspshGpuhshlWsu...pNc..aAlh-p.spslcl..KNh.cpps.hslplshss-plFs.....GsLLslpssu..lshYDhpptpllpclc.ls...s...KhVhWSss.Gp.hVAll...............o..........ccohhlls.hsh-..............................shsslpE...s.plKSutWss.s..VhlYTTts...+lpY..hlsG-sshIppl-pshYllthhs...s+lYhlD.+-hplhuhplsss.lcFppullcpch-p............................t.spll............tpplspaLpppGa.ElALphspDscpRF-LAlphGsLclAh-hAcphs....spp..........pWcpLGctALppGshplAcpsap+sp..caspLhhLYhsoGstppLp+luphApppuchsstFpsthhh.GclcpplclLhpouphs.AhhhApoau. .......................................................................................................................................................................................................l.phshpt............tp.h...lsh.+chG..s..s...h......Ppolpa...sP......stchVlV..............sssGpY.lYo..........................uhshps.pp.h.Gpu.p.hlasu.....psc.....aAlh-p...s.p............p...........lpl...KNh..ppph.......s....l.p..s..s..h.....s.s.-.t.Iah..............G..LLhhp.s.ss...........l..shaDhpp.t.p.............h..ltclc..ls........K.VhWS..ss..up.hVulh....................s........cc.shhlhp.hph-....................................................th.psl..pE...s.pl+oGhWs.......s....s....sFlYoots...+lpY..hss.G........-hshltpL.D....p......s......hY.l....h........thhs.............s...plYhhD.+-hpshsh.s.ls.o.hca.phulhcpch-..p....................................t.spll..............tpplhpaLppp..Ga..EhALphs.p...............D....c.pRF-LALphGpLc..lAhc......hA.pphs...........spp...............................................pWcpLuchALtpsshplsc.sap+sc..saspL.hLahsoGs.t..ptLp+hs.ch.Actpsc...h.................sstF.shhhl.Gcl....cppl.clLhpss..phs.AhhhApoat....................................................... 0 228 341 487 +1592 PF02572 CobA_CobO_BtuR ATP:corrinoid adenosyltransferase BtuR/CobO/CobP Bashton M, Bateman A anon COGs Family This family consists of the BtuR, CobO, CobP proteins all of which are Cob(I)alamin adenosyltransferase, EC:2.5.1.17, involved in cobalamin (vitamin B12) biosynthesis. These enzymes catalyse the adenosylation reaction: ATP + cob(I)alamin + H2O <=> phosphate + diphosphate + adenosylcobalamin. 23.40 23.40 23.70 26.20 23.20 22.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.93 0.71 -4.63 173 2528 2012-10-05 12:31:08 2003-04-07 12:59:11 10 9 2106 4 629 1797 875 170.90 42 88.83 CHANGED pc+GlllVaTGsGKGKSTAAhGhshRAlG+Gh.......+VuVlQFlKG.s.hps....GEcphh.cph.......ss..........lpacthGc.GFTWc..opccp.pD..htsAppuWptAcphlt.................sspaclVlLDElshsLca......salsl-cVlssLp.sRPttpcVllTGRsA.sptLl-hADhVTEMp.lKHsFcs.Gl+AQpGlEa ........................c+GlllVaTGsG..KGKoT.AA.FGhuhRAlG....+Gh.......+VuVlQFlKG....phps......GEcshL.c.th..u......................Vca.p.s..h.Gp...GF..oWc..sps.cp...pD..............tssupps...W.pc.u..+.chLt..........................s.s..phDhVlLDElshslpa......sa..Lsl-EVlp.s.L.p.....p.R.Pt..p.....pcVllTGRss..ppl.l-hADhVoEhc..lKHsFcs..Gl+..AQtGI-a........................ 0 192 403 529 +1593 PF01122 Cobalamin_bind Eukaryotic cobalamin-binding protein Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 26.80 26.50 24.60 23.70 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.04 0.70 -5.82 11 162 2012-10-03 02:33:51 2003-04-07 12:59:11 14 7 67 16 90 185 0 281.40 30 67.51 CHANGED hhhLssLLhhuusssthtp.CplspspppLlcsL.ppLLpshsppuu.s.NPSlhluLRLuuhpshstEphhlppLKhphppp.huSsou........p.ohGpLALhlLALpuuCcshu........cs.cLlopLccphEsEpctht.sHputPtTsYYQhSLulLALClcpscsspsVss+Lhpslp+s.hhhsuphSVDTtAMAsLALTClcpphhs.......shcsplshAlcslhEKIlppppssGhhGNlYSTsLAlQtL.sosp..sc.thsCtKshsslLsplppGsFpNPhuhuQlLPuLptKTYLDl.p.ssCsuscss..hh.....Psssosspsp ..............................................................................hhh...hh........p.C.......lst.pt.hlp.l...hh......p.....sPSlhluhpLus....s.ph...pp....h..h.ppL...........hts..s..........................t.shGpL..ALhlLALpusCcsst........hs.cLlupLpcphppttpth....sppstPhosaYphuLulLALChpstcspt.phls+Lhpshpts....tsp...hS..V.DTtAMAsLALsClppsthp........t.tpplstslcplhccIhppp.ps.pGhhG..NlaSTuLAhQ.AL.sss.....t...........sc.thsCtpshssl.Lp.p.l.p.pGtFpssh..huQlLPsL.tKoYlDl.t..pC.s.p.....h......P................................................ 0 14 19 39 +1594 PF03186 CobD_Cbib CobD/Cbib protein Mifsud W, Bateman A anon Pfam-B_2468 (release 6.5) Family This family includes CobD proteins from a number of bacteria, in Salmonella this protein is called Cbib. Salmonella CobD is a different protein [1]. This protein is involved in cobalamin biosynthesis and is probably an enzyme responsible for the conversion of adenosylcobyric acid to adenosylcobinamide or adenosylcobinamide phosphate [1]. 28.00 28.00 28.00 28.40 27.70 27.80 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.72 0.70 -5.34 139 2167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1937 0 647 1878 476 282.10 30 91.11 CHANGED hhhlhlAllLDhllG-.Ptth......HPVshhG+llshl.-pthppt..........tp+htGhl..hhhhllsssshluh.hlhths.................hlshll.ps...hllhssluh+oLtccuttV.....tpsL.pps.sLstARctluh.l...VuRDTspLscstls+AslESlAENhsDGllAPLFahh........l.....h........GhP......G..uhsY+slNTLDSMlGY+s........c+YppFGasuARhDDllNalPARLTullhs.lsuhhh............tu........h....phhh...+Dutpa.SPNuGas.AAhAGALulpLuGsshY..G.h...cshlG..suhpsssst.cIppAlpLhttss .........................................h..hhhhuhllDh.llG-..P..th.............HP...V.h...h..hGp.hhshl.-...phhpp..........................ttpht.G.s..l.hh.llh.l.s.sshhlsh.hlhhls.................h.t.hhs.hhhpshhlahsLuh+uLtcpsptl......................tpsL.........p.....p.......s.....Dlst........A.....R.ptluh.l...VuR.DTspL.st.stlsRAslE.olAENssDullAPLFahh..............l.u........................G.sP..............u....s..hhY+sl.....NTL........DuMl......GY.+.s...........p.+Yp.....tFG........h..suA......+.lDDlhNalP.ARLou.lhhs.lsu...h.h..............................pu...................................hchhh.....+-st.....pasSP.Nu.G.h.sE..AAhAGALulpLGGsshY..h.....G.ph....+P.hlG..ss.....p.t...s.st...cltpulpLhhh..s................................................... 0 190 410 540 +1595 PF02654 CobS Cobalamin-5-phosphate synthase Bashton M, Bateman A anon COG0368 Family This is family of Colbalmin-5-phosphate synthases, CobS, from bacteria. The CobS enzyme catalyses the synthesis of AdoCbl-5'-p from AdoCbi-GDP and alpha-ribazole-5'-P [1]. This enzyme is involved in the cobalamin (vitamin B12) biosynthesis pathway in particular the nucleotide loop assembly stage in conjunction with CobC, CobU and CobT [1]. 21.70 21.70 21.80 21.70 21.20 21.00 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.34 0.70 -4.78 8 2349 2009-09-13 21:11:09 2003-04-07 12:59:11 10 4 2268 0 598 1783 441 235.20 29 94.17 CHANGED l+uhluFhTpLPlhu......DhEphpctlhtaPllGhllGslsulsthltshhhs..s.LsuhLslhlhlhhsGhhHlDGLADhuDGlhusss+cRhltsM+DhplGsuGlsslllVhLlphhuLtpls..h.hhhlhluplsAKhshlLshtht+P...ttpGhGuaFhcphcp.pplslGhllhlllslhhu...sh..h.slhAlhsulhsulhluphuc+pFGGloGDVlGAusElocssoLLuL .........................h...slsFhT..R..l..PlPt...........ths.h.....p......p...h...s....+..u......h...h....h......a.P.l.l.Gl.ll...G.s...l...s.....u.hl..h...h.l....h.t..h...h..h.s...............s..l....u.u.l..l.s.l.h.s.h.h....llTGuhHlDGLADosDGl.h.u......s.t........s......+.............-R......h.....LcIM..+DSR....lGsa....Gsl....A...L...l.h...s...l...L.....h......+....h......h....s....l...s......p....L......s..............t.............s...........t.................h...........h...........h..............h.........h......l..........u...s...s.......s....u.....p....h....s...s......h....h...s....h...h..h..h.h.......................s+tpG.h.G.....s...h...h...h.....s.p........h.s...t.......p...p....h.......h...h....s..h....h......h...s...h....h...hs...h...h.hh........................s........h..t.s....l.h.u....h...l......s.....s.....h......l..........s...h..h....h...h....s....p....h....h.t+....cl....G.G.hTGDslGA...s...h...clsElshLls................................................. 1 189 397 511 +1596 PF02283 CobU COBU; Cobinamide kinase / cobinamide phosphate guanyltransferase Mian N, Bateman A anon Pfam-B_7022 (release 5.2) Domain This family is composed of a group of bifunctional cobalamin biosynthesis enzymes which display cobinamide kinase and cobinamide phosphate guanyltransferase activity. The crystal structure of the enzyme reveals the molecule to be a trimer with a propeller-like shape [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.98 0.71 -4.84 192 2330 2012-10-05 12:31:08 2003-04-07 12:59:11 11 13 2134 6 530 1759 319 165.20 37 82.22 CHANGED hLVoGGARSGKSpaAEpLs.....hphs....tps.hYlATupsh....Ds....EMppRItpH+pcR...sspWpTlEp....s..h...cLsp.sL..tp.tst......pslLlDCLTlWloNllht.p.........................p...shptphppLlssl...pp.hssslllVoNEVGhGlVPpssluRtFRDhhGplNQplAstAccVhLlluGlPl....pl .................................................hLlhGGARSGKSpaAEplh.........................tp............tpl..hYlATuphh........Ds......EM..ttRIt.pHppp....R.........ssp...Wp...Tl...Ep......h...................cLsp.hl............pt....ttt.............tpslLlDClThaloNllht.s...............................tp..slp.tcl.pp.L...lsul......pp.....ps.u...p.l.llVoNEVGh.GlVP.tsp.lu.R.pFRDhtGclNQtlAstA-cVhlVVuGlslp.................................. 1 172 358 453 +1597 PF00241 Cofilin_ADF cofilin_ADF; Cofilin/tropomyosin-type actin-binding protein Chris Ponting, Joerg Schultz, Peer Bork, Finn RD anon Prosite; SMART; Domain Severs actin filaments and binds to actin monomers. 21.10 21.10 21.20 21.10 21.00 20.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.70 0.71 -4.25 16 2431 2012-10-01 21:06:05 2003-04-07 12:59:11 15 42 403 61 1450 2235 7 121.70 20 53.52 CHANGED ssspssap-l+tscp...hphllapIscs..ppplhVc......p........ss...tssssa--........Fhppls.....tp-scYulaDhchp.........suphsKlhFltWs.PDs..A.l+pKMhYAS.oKssl+ptLsu...lphplQuo..Dhs-ls.c-tltp+hp .................................................................t.......h.pht...t.t.....hphl...lhplptp......pppl..hlp............p.............h.........ssshc.c.......................h.h.p.p.Ls............psp.s...p....Y......sl.a.c..h.ch.pt....................ssp.h.s.c.h.l.h.l.h.Ws..P.-.s...........u.....s...l.+pK...MlYAo..s+....ss...l...p.c..t...l.ps.........hph..pl.p.us......s.p-ls....p.l.t...t................................ 1 449 733 1114 +1598 PF00963 Cohesin Cohesin domain Bateman A anon Sarah Teichmann Domain Cohesin domains interact with a complementary domain, termed the dockerin domain. The cohesin-dockerin interaction is the crucial interaction for complex formation in the cellulosome [1]. 21.40 21.40 21.50 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.32 40 1118 2012-10-01 21:34:18 2003-04-07 12:59:11 13 97 144 45 199 1131 188 151.00 25 40.86 CHANGED lplslssssu.psGssVsVPVslssl.ss...lushshslsYDsslLphssspsGs..............llh..sssssFsss.tsssuhlshh..Fhcso.husphIspD.GlhAslsFKVpssssss.............sslshssshsasssshsplsssh.ssGslsV ..........................lplslucssA.csG..sphsV..sVolucl...Pss....Glpsh.sFslpYDs.slLpl..sslp......sGs.h................................................hh.s.ss...u........sh........a.p...s...s.....p.s..s..cG.h..lshh....aosss.tsuu.h..hl.ps-..Gl..FsTIsh....pVsusss.sGs.........................s.lp.h...................................................h................................................................................................ 0 105 188 196 +1599 PF01410 COLFI Fibrillar collagen C-terminal domain Ponting CP, Schultz J, Bork P anon Pfam-B_464 (release 3.0) Family Found at C-termini of fibrillar collagens: Ephydatia muelleri procollagen EMF1 alpha, vertebrate collagens alpha(1)III, alpha(1)II, alpha(2)V etc. 26.80 26.80 26.80 27.10 26.70 26.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.73 0.70 -4.92 36 1510 2012-10-01 23:56:02 2003-04-07 12:59:11 13 94 127 0 533 1087 7 167.00 40 15.57 CHANGED l+sPpG.o+csPARTC+DLphsHP-....hpsGpYWIDPNpGsst.DAI+VaC..shps..GETClhPs...spl.ppsWappps....ttsWFupphpGs.................pFsYsss..........slQlsFLRLLSspApQNlTYHC+NSlAahDpts.sshc+AlhLtGuNDhElps-sssphpYpVl.pDGCpp+supauKTVlEa..cTp+spRLPIlDlAPhDlGsscQcFGl-lGPVCF .........................................................ppP.G.opcsPARoCcD.Lphs..a..P....c....h..s....GpYW.lDPN.p.G.Cs..Du.h+VaC..Nhps....G-....T.Clh.sp......tt.h...tta...tt...............ahu........h....t..t......................hpYss.......s....hlQh.sFL+LLSspApQslTYpCpsohu.a..h..-.....t..........ts....s.s.h.c...pu.lhh.G..u........N..-..................E...h.p.h..c..........sp.h.......h.......p........s.h.....DuCp.......p...p..Gt....t.cTlh-h..pT........psppLPllDlt.....Dh.G.ts.s..p.c..aG..h-lGPVCF................................................................. 0 127 171 312 +1600 PF01024 Colicin Colicin pore forming domain Bateman A anon Bateman A Domain \N 23.40 23.40 23.40 29.80 21.20 23.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.98 0.71 -4.71 14 176 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 93 7 11 176 0 170.20 39 38.52 CHANGED pcppcpsEpphlpcAs-hhsuhhcclsEphGcKacplA+-lAsshK...GKpIRSh--AhuShpKhtsN.shKhstsDR-AIsNAhculshp-hAppLsplSKuFKhsshshpstclhpchhcuhcTGNWtPLhlclEohslushAouVshhlFShslG..........sslGllGI.llsulluuhID-shlsplNphl ...............ptppptE.phlpDAlchsssFhcplsEcaGtKhpplAcElAspu+...GKpI+ss..-.....-ALpua-Ka+s.slsK...KhstpDRtAIspAL-ulphpch....upplspFuKuhthsu+shphhDlhs-hhcuhcTsNW+PlFlclEshssusuA....oslsuhh.FSlhhG...........oslGIlGh.ullhusluAhID-phl-plNchl............. 0 1 3 8 +1601 PF03857 Colicin_im Colicin immunity protein Finn RD anon DOMO:DM07441; Family Colicin immunity proteins are plasmid-encoded proteins necessary for protecting the cell against colicins. Colicins are toxins released by bacteria during times of stress [1]. 25.00 25.00 27.20 29.20 22.20 22.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.73 0.71 -4.43 4 38 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 0 3 37 0 130.20 56 79.78 CHANGED cshlLcpIhspsp.LPSITShaNPlMTclMslYsKTAPhhAllLFIhTaKpRcLIpposRptVL+SChhuslhYAshlYlhhFpNhELTTAG+sh+LhspN.ssTLhlhYhulYhhIhahoYhsLhsPlhshKhhKtRQ ....aSpVLYhLY.NKlu.LPSITSLasPlMTpLMSsYsKTAPVhGILlFLCTYKTREIIKPl....o.RKL...VlQSCF..WGPVFYA.ILI..YITLFYNLELTTAGu.FFKLlS+N.slTLFILYsoIYaoVL.TMTYAl....LLhPlLVhKhFKsRQ.............. 0 0 1 2 +1602 PF01320 Colicin_Pyocin Colicin immunity protein / pyocin immunity protein Finn RD, Bateman A anon Sarah Teichmann Domain \N 20.90 20.90 21.10 21.80 20.50 20.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.82 0.72 -4.12 29 341 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 189 47 44 217 1 84.90 43 95.53 CHANGED Mc.hKp..plpDYTEsEFLpFlpclhps...p..hss--....hspLlcHFpclTEHPsGoDLIaYPps...st-DSPEGllcplKcWRAuNG+PGFKp ...............hKpplpDYTEpEFlEhlpclhss..p.........ps--........hhDsLlcHFp.+.l.TpHPssSDLIaYPpp...pt--pPEsIlchlKEWRtppGhPhFKp.................... 1 3 11 21 +1603 PF02674 Colicin_V Colicin V production protein Mian N, Bateman A anon COG1286 Family Colicin V production protein is required in E. Coli for colicin V production from plasmid pColV-K30 [1]. This protein is coded for in the purF operon. 25.80 25.80 26.00 25.80 25.60 25.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.05 0.71 -4.35 164 3878 2012-10-03 02:02:08 2003-04-07 12:59:11 11 7 3737 0 790 2303 1930 149.10 22 74.52 CHANGED lDllllhllhhuslhGhhRGhlpplhulhuhlsuhhlAhhats.lushlst.......................................stthtthlua.hl...lFlhshll.stlluhhlsp.....hl.ph..ssluh.hD+lhGslhGhl+uhlllh.lllhlhshhshst.............................hhppShlhs.hlt......shss ........................................................lDhlllhllshu.slhGhhRG....hltp...........h...........luLl..uhls...........uhhlAt.t...a.h.s..l.us.h.l.st...............................................................................t.s...phht.s...sluh...hl...lFl...hs...h...ll....stllshhlsp...........ll..pt.....ssLut....hDR....l....lG.s....l....h.G....slcuh....lllh.llh.h...lh...s.h.h.s.hsp.............p.................hppShlh..hh.....s........................................................... 0 259 520 663 +1604 PF01114 Colipase Colipase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports duplication of common fold with Colipase C-terminal domain. 19.40 19.40 19.60 24.90 19.10 17.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.14 0.72 -4.47 9 50 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 36 7 28 47 0 39.20 63 37.13 CHANGED PRGllINLEsGELClNSAQCKSpCCQHsosLuLARCs.KA ........PRGlIINL-sGELClNSAQCKSp.CCp+souLuLARCssKA.... 0 3 3 7 +1605 PF00325 Crp crp; Bacterial regulatory proteins, crp family Finn RD anon Prosite Domain \N 20.50 14.00 20.50 14.20 20.40 13.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -7.03 0.72 -4.47 12 2310 2012-10-04 14:01:12 2003-04-07 12:59:11 15 6 1382 67 409 4399 727 31.70 52 13.95 CHANGED Lsho..Rp-IAcaLGhThETVSRhls+LpcpuLI ..........lphT..Rp-I.GphlG.h.o.hETVuRlLpphpcpsll............. 0 79 197 303 +1606 PF02740 Colipase_C Colipase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports duplication of common fold with Colipase N-terminal domain. 23.70 23.70 24.60 23.80 23.60 23.60 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.38 0.72 -4.21 5 53 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 32 7 26 45 0 44.20 69 42.92 CHANGED ENSECSPpTLYGIYYKCPCERGLTCEGDKTIVGSITNTNFGIChD ........ENSECSsc.TLYGlYYKCPCERGLTCEuDK.oIVGoITNTNaGlChD................ 0 2 2 4 +1607 PF03047 ComC COMC family Mifsud W anon Pfam-B_2107 (release 6.4) Family This family consists exclusively of streptococcal competence stimulating peptide precursors, which are generally up to 50 amino acid residues long. In all the members of this family, the leader sequence is cleaved after two conserved glycine residues; thus the leader sequence is of the double- glycine type [2]. Competence stimulating peptides (CSP) are small (less than 25 amino acid residues) cationic peptides. The N-terminal amino acid residue is negatively charged, either glutamate or aspartate. The C-terminal end is positively charged. The third residue is also positively charged: a highly conserved arginine [2]. A few COMC proteins and their precursors (not included in this family) do not fully follow the above description. In particular: the leader sequence in the CSP precursor from Streptococcus sanguis NCTC 7863 Swiss:O33758 is not of the double-glycine type; the CSP from Streptococcus gordonii NCTC 3165 Swiss:O33645 does not have a negatively charged N-terminus residue and has a lysine instead of arginine at the third position. Functionally, CSP act as pheromones, stimulating competence for genetic transformation in streptococci. In streptococci, the (CSP mediated) competence response requires exponential cell growth at a critical density, a relatively simple requirement when compared to the stationary-phase requirement of Haemophilus, or the late-logarithmic- phase of Bacillus [1]. All bacteria induced to competence by a particular CSP are said to belong to the same pherotype, because each CSP is recognised by a specific receptor (the signalling domain of a histidine kinase ComD). Pherotypes are not necessarily species-specific. In addition, an organism may change pherotype. There are two possible mechanisms for pherotype switching: horizontal gene transfer, and accumulation of point mutations. The biological significance of pherotypes and pherotype switching is not definitively determined. Pherotype switching occurs frequently enough in naturally competent streptococci to suggest that it may be an important contributor to genetic exchange between different bacterial species [2]. The family Antibacterial16, streptolysins from group A streptococci, has been merged into this family. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -7.19 0.72 -4.51 20 666 2012-10-02 23:56:30 2003-04-07 12:59:11 9 4 281 3 31 212 0 29.20 45 61.52 CHANGED MKp.........ppLppFppLs-c-LppIpGGshhtphh ............Mcp........h.pL.ppFppLoscELQcIpGGthh....h...... 0 6 10 15 +1608 PF02247 Como_LCP Large coat protein Bateman A, Mian N anon Pfam-B_2294 (release 5.2) Domain This family contains the large coat protein (LCP) [1] of the comoviridae viral family. 27.10 27.10 27.70 33.70 26.10 27.00 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.04 0.70 -6.06 10 129 2012-10-04 01:49:40 2003-04-07 12:59:11 11 4 22 5 0 137 0 314.50 38 47.63 CHANGED pE.sLhpLSLDDTSSl+GohLpTKlApo+llLsKsMlGGTlLpsshLsshLscushRAolsLhRTHVIpGKI+slAolNls-NTGCuLAlsaNSGlcGt.huTDIYThsSQDuhlWNPACcKss-aoFNPNPCuDuWshtFLp+T+sH..hsVpCVoGWTsoPhTDltlslsWaIssphCVP+phslusspssFslNRWMGKLoFPQGsspVl+RMPLuIGGGAGsKsAILMNMPNAhlSLaRYF+GDllFElTKMSSPYIKATloFFIAFGslo-chsN...LESFPHKLVQFuElQc+T.TlsFoQpEFLTAWSTQVhssssspuDGCPaLYAllH.DSsoSTIEG-FslGVKLlsI+sasuhGpNPGhpGoRLLGuhu ..............................h...p.uo.h.phlhpshh.lshshssGshlhsthLs..L.spts.htsh.hphhphh.uplhshhshtls.ssGhuLhhsaspG.c.ut.hso..slhphhu.pphhWNPAhp..hph.hpP.sCsDhWshpaLtpspht..hsl.slotWhssPhsDhphohshahps....p.slPc.....ph.sshp.tsshhhp+.hGpLsF.QG.ppshhph.lshGtstsstptlh.shssAhhuh.pYhpuslhh-lhhhSSPhItuThuhhlshG.sh.cphsN...h-uh...PHh.hpFuchpcps.slpFsp-.Fhshhohphhs.sshptDssshhaslhp.DusuSsl.G-hshtlthp..tshphhGhssGh.sshhhh.................................. 0 0 0 0 +1609 PF02248 Como_SCP Small coat protein Bateman A, Mian N anon Pfam-B_2294 (release 5.2) Domain This family contains the small coat protein (SCP) [1] of the comoviridae viral family. 21.90 21.90 23.00 21.90 19.10 18.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.93 10 94 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 21 5 0 104 0 162.30 36 20.83 CHANGED ssps.sVYsshhhlcTPlss.osppsFuuFThDLlsusISsDuoG.NWshslhsSPIuNLL+TAAWK+GTIHhQLhhpG.AuVKRSDWuuosplsLppuhuscuhsARoWhIocP+uu-lpFslEIsGPNNGFEMhsSsWANQTTWaLEhlIsNP+QhslFElsh+lspNFEVAGNsLhPPlsLS ..................p............................................u.s.shcl.pSPhspLLpssAWh+GTL+aplVhcG.uuschusapspsQlslosspss.pohsup.pa...s...ho..pPtShELpFsh-lsGPssGFc.Mh.s.h.uspp.ahLphtlsNs+p.ssh.l.uthspDhchAGp.........t.......................................... 1 0 0 0 +1610 PF01257 2Fe-2S_thioredx complex1_24kD; Complex1_24kDa; Thioredoxin-like [2Fe-2S] ferredoxin Finn RD, Bateman A anon Prosite Family \N 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.58 0.71 -4.60 178 4533 2012-10-03 14:45:55 2003-04-07 12:59:11 14 41 2679 24 1588 3500 2480 131.40 29 53.12 CHANGED lcphls+Ys....pppuAllslLphsQcp.......h..G.alstsslphlAchLs..lshhcVhpVATFY.ohFp.....hpPsG+.ahlpVCssosChlp..Gu-clhcshcccLGl..ph.G....cTT....sDGpFoLppVcCLGAC.spAPshhl....sc..........-..hapcL.Ts-plppllcphp ...........................................................h..t.htpa.........t.tushl.hLphhQpt.............G.als.tshthlAchL.t..hs..cl.tVsTFY.s.h...........hp.P.sG.....+.....ahlplC.....su......s........s........C.tl..p...G.u.pp.lh.pt...l.....c...c....c......L.s.l.........p.....s..................p.so................D....G.p....hol.p.ps.p.C.lG.sC.....s....p......u......P..s.hhl......ss......................c....has..p...l.o...s...-.pl.t.cllcph...................................... 0 643 1141 1390 +1611 PF00346 Complex1_49kDa complex1_49Kd; Respiratory-chain NADH dehydrogenase, 49 Kd subunit Finn RD anon Prosite Family \N 20.40 20.40 22.50 20.50 20.10 20.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.74 0.70 -5.40 7 7353 2009-09-12 08:12:15 2003-04-07 12:59:11 14 22 3508 15 1751 5117 3078 185.50 28 55.80 CHANGED DlGAhTPhhahFcEREclh-haEtsoGhRhHsA.ahhhhGVtpDLPhGhhDclh-asc.F..tlD-h-chlTpNpIahpRlpGlGhlott-AlsaGhoGsMLRuSGltWDlRKspPY-sY-ph-a-lsh.sttGDCasRYLsRltEMRpSl+IlpQslpphPsGP...pl-th+hp.s.+schppphEslIt..hhho.saplPsG-sYstlEuPKGEhGlYLsuDso.hPaRh+lRsPuFsHLphlsth.+GhhLADlhAllGolDlVhG-VDR ..........................................................................................................................................oshh.ha.p...Rp..h.......phhEhhsG.Rhh.s.h.h.GGl..t.....D.l..............................t....................................h.t.h...h.p...h..t.t.l.t.p.h...p.hh....ts..h..tR........sluhhs.p.A.hthu..sGshhRu.o.....G.h...t.hDhR..t.....p...s.a.....hY...........t.t.........h.......h......p......l...........h......t.........tt..................sDshsRhhl+htEhhpShphlp.s...lp.......h.........t......s..................................................................................................................................................................................................................................hEss+G.....h.a..h.s.t..s.s.......h.a.Rh+hRsssa..p.h....h...h....t....luDh.hhhuo.s.hhsthD..................................................................................... 0 581 1119 1471 +1612 PF01512 Complex1_51K Respiratory-chain NADH dehydrogenase 51 Kd subunit Bateman A anon Pfam-B_780 (release 4.0) Family \N 20.50 20.50 20.80 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.86 0.71 -4.48 123 5590 2012-10-02 20:27:15 2003-04-07 12:59:11 12 89 3181 15 1614 4537 2950 161.60 39 31.64 CHANGED lccuGlsGhGGA..GFPTtlKhssssc...........chlllNusECEPhlpsDctLhpcpscp..llcGhtlhtphl.uupcshIulcsphscAlpslppA......lp....ptpt.................lclthhsstYPsGsEptLlpslpG+..........tlPps....t..hP...hchGl..........lVpNVpThhslhp ....................................l+puGL+GRGG....A....GFPTGlKWphhsc.p..............................cYllsNADEsEPGshpD+tlM..cp.p..P..ap..................llEGhhIuua.Al.t.Ap.puaI..al+.uEa...p..Ahp.tLppA............ltcu..pp.shlt..........................................hthplpl+hG.s.G.tYlCGEEoALlpSLEG+.........tuhP+s..........+..PP.....ushGlh...............tPTllsNVETlssl.......................................................... 0 615 1120 1387 +1613 PF00668 Condensation DUF4; Condensation domain Bateman A anon Pfam-B_130 (release 2.1) Family This domain is found in many multi-domain enzymes which synthesise peptide antibiotics. This domain catalyses a condensation reaction to form peptide bonds in non- ribosomal peptide biosynthesis. It is usually found to the carboxy side of a phosphopantetheine binding domain (Pfam:PF00550). It has been shown that mutations in the HHXXXDG motif abolish activity suggesting this is part of the active site [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.70 0.70 -5.57 42 20609 2012-10-02 12:01:53 2003-04-07 12:59:11 15 1841 2351 9 7271 22090 414 273.20 20 26.78 CHANGED hpshhPloshQcthhahpphpssss.....uashsshlchps..sl-hpplcpAhppllp+H-uLRhhahpppt......hQhlhpptphplhhhphhs.........pph.chhhpc..hppsacLppu.PLh+stlhphtcs.pthlhhs...hHHllhDGhShsIlhc-ltphYpshp.......Lsshs............spapsau.hhpphtppt.hpcptsYWtphhpph.s......h.lstchspsstpshpst....plphslstt...htptLpphspppssolhslLlusaslhLpcasupscl.llGs.hsGRsp.....sslpphlGhFlshlPlclchp .......................................................................................................h....hslo.h.Qp.t.h.h....h....h...t...p...h..t...........tss.....................ta.s.....h........h...h..h.....c......l.....p.................u......t..........l...........c......h.s........t....Lp....p....A..h....p....t.l.l..p..+..H..p..h..L..R..s...t.ah...tpss.....................t..h..Q....h................l....h......t.........t....h.......................h.....t.......h............h..h..s..h..t.t....................................tt.h...p.h..h.t...p.p.......ht..p...s...h..........c..l......t...........p.......s..........s..........l.......h....+....h......t...l......h............p............h...............s..............s................t.......p.............t..............h............lhls..................hHHl........lhD....G...h.S.h....t.l.lh..p.-....l....t....t..h...Y...psht....................hs.s...s......................................h.p.a...t..s...a....s......t...h..............p.....t............h............h..........p........s....................................h.......t.....p......p......h.........s.....a.....W...p....p.t.....L....t....s....h.s.s.....................................l...s.....h....s....h....s.......t....s...s....h....t...s..h...t...st...................ph..p.h.p....l.stp......httt.l..p..p...h....s....p.......p.....t.......t........s.....o.....h......t..l..l...h.u.u.h.....u......h......h.........L...............t..........c............h.............s.........s............p.............p..........-............l.....s........lG............hs.h......s....u.Rtt.......................s...s...h..p.p...h...lGh..Fhss.lsl+hp..t........................................................................................................................................................................ 2 1552 3788 5729 +1614 PF00029 Connexin connexin; Connexin Sonnhammer ELL anon Prosite Family \N 20.60 20.60 20.60 20.70 19.20 19.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.33 0.72 -3.98 65 1256 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 120 6 656 1028 0 102.60 49 33.51 CHANGED sWuhLtpLL-.pVppHSTslGKlWLoVLFIFRIllluluuEsVWuDEQScFsCNTpQPGCcNVCYDphFPISHlRaWlLQlIhVSTPo..........Llals....Hshaphc..+cc+tcp+ ........................sWshLtplLp.t.Vpp...HSTslG....+.lW..LoVlFIFR..llllssAuEsVWu.....DE.QucFs.C.N.TpQPG.....Cp.NVC.YDphFPIS.HlRaWsLQlIhVSoPo..........Llahs..Hshaphpppc+....t............................... 0 61 128 329 +1615 PF03508 Connexin43 Gap junction alpha-1 protein (Cx43) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 37.00 37.00 23.60 22.80 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.38 0.72 -6.56 0.72 -4.69 3 90 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 65 1 24 85 0 20.00 92 5.88 CHANGED RsTuSCRNYNKQASEQNWAN ..RNNSSCRNYNKQASEQNWAN 0 1 3 8 +1616 PF03509 Connexin50 Gap junction alpha-8 protein (Cx50) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 48.60 46.60 17.30 16.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.30 0.72 -3.67 4 47 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 33 0 25 40 0 66.60 70 15.69 CHANGED IQKAKGY+LLEEEK..slSHaFPLTEVG.hEsu.Lsu.sFptFEEK...uhuPhcDhS+sYDETLPSYAQs ......IQKAKGYQLLEEEK..IVSHYFPLTEVGhVETSPLsA.PFspFEEKl...uTGPLu...DlSRuYpETLPSYAQV. 0 1 4 10 +1617 PF03601 Cons_hypoth698 Conserved hypothetical protein 698 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 24.50 24.50 24.60 25.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.00 0.70 -5.78 13 3130 2012-10-02 17:06:44 2003-04-07 12:59:11 9 5 2815 0 627 2177 1234 301.90 31 87.76 CHANGED sGLLLshllullAhh.......Luph....hhstluAhslAIllGhlluN..ahphppphpuGltFupphLLRhGIlLhGhpLolspluslGhpullhsslslhuohllshalG.+hltLD+phuhLluuGoSICGAuAlhAspPVl+Acscc...VuhAIuslslFGTluhhlYPslhshhuhs......sctFGlahGuolH-VApVsAAGtthuspsss......sAllsKhhRlhhLuPhllhLuh..hhs+ppptutspst...+hs.....lPaFlluFlllullsohhhls........ssllshlsslsoahllhAMAAlGLssslstlt+sGhKPLlLu .....................................................Glhl.shlluhhuhh....................lup.........h...hsh..luuhs..lAIllGhlls....s.....hh..t.........h..........p...................p..h....p.s..G....l.p....Fu....s....cpLL+huIlLhGhpL........shsp.......l......hs..lGh......t......u..l......l......h.......s.ll...s.....lh.....soh.....l......l.....sh.a......lu.+....hh....tl..D..c.c..h.uhLlu..sGouICGuu..AlhAsuPll...+...A...c...s...cc...........suhA.lus...lslaGoluhhlaPhl......h.s..h..h.shs........spsaGlasGsol.H-....lup.VlAAutsh...u...s..put.s...................hA.s....lsKl.h.RVh..h.....L...sP..lsl.ll.uh..........hh.p.....p...p..pp....t..s.psptt..................+hs.........................lPaFllh.Fl...ls..ul.l.so..hh.....h...ls............................ps....lh.s..h...l.....t.....p........l...s.phhlshAMuAlGLssplpsl.t.c..s..G.sKsllh................................................ 0 209 414 530 +1618 PF03602 Cons_hypoth95 Conserved hypothetical protein 95 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.12 0.71 -4.89 27 4548 2012-10-10 17:06:42 2003-04-07 12:59:11 10 7 4379 14 1006 14823 4683 175.00 31 90.57 CHANGED hRIluGph+G.......RpLts.su.pshRPTsD+VREulFNhltsh......lststhLDLFAGSGuLGlEAlSRGAppslhlEpstcuht.hlccNlptLth........thlhpssthth.phstps..FDlVFlDPPYtps....ppslphlsppshLp.....suhlhhEptpc.ttl.p.ssshphh+c+thGpsplphat .............................................................hRIIuG.p.a+GRp.L....s......s....p..u....t..s...h...RP..T.o.D+..V.+.E.slF.N...h.Lssh................hp.s.u.p.s.L..D..L..FA...G.S..G....u..L..G...l......E...A...l.....S.....R.....G...A....s.......p.....s...........s.....h......l..Epst..pu..hp.....hlp....c..N......l....p....t....l.c.hps.......................hpll.p....t...s......s.....t............p.........s.........l............h......t......h......................................t.......s.................t.....t........F...D......ll...F.l..D...P.....P........Y...pp.......s........h.......h..............p.p........s.......l.p........h........l...t....p........p........s........h.L.s.......................ps..u..l..l.h..s..E.p..s...p..p..........t...h...s.......p............s....s....s....h....p....h.h.+.c.+thGpsthphh................................................................................... 0 343 672 859 +1619 PF04234 CopC CopC domain Kerrison ND, Finn RD, Bateman A anon COG2372 Domain CopC is a bacterial blue copper protein that binds 1 atom of copper per protein molecule. Along with CopA, CopC mediates copper resistance by sequestration of copper in the periplasm [1]. 22.60 22.60 22.60 22.90 22.50 22.50 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.20 0.72 -3.36 162 1651 2012-10-03 16:25:20 2003-04-07 12:59:11 7 10 1283 12 405 1175 336 96.80 35 42.58 CHANGED HupLhuosPAssuslssuPsplpLsFsEslp.......thuslpls.sssG.pshsssp.......spsss........pshslslss..Lss....GsYpVpW+slS.sDGHshpGsasF.sVp ..................HApLppssPAss.up..l..s..sA.PptlsLsF.....oEslp.s....................sFou..spls......ssps....c..s..lpsts..........................s.phst.....tstpplhlsLsps....Lts................Gs.YsVsW+lVS.s.DGH.spGpaoFoV.................................... 0 101 242 333 +1620 PF00127 Copper-bind copper-bind; Copper binding proteins, plastocyanin/azurin family Sonnhammer ELL anon Prosite Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.54 0.72 -3.77 31 1467 2012-10-02 17:41:00 2003-04-07 12:59:11 15 49 912 431 532 1917 1988 103.80 25 50.66 CHANGED spstlsscsus.hsFsssslslssG-plhalN.........sshsHNlVhsc.........DtlsuGs-sstlphsc.....cshl.usGEshSVThs...tsGs..YsaaCo.P.HtuhhMhGpVsVp ........................................................................................h.....thttss..htFp.P...s....t...lplps.G..s..s..l..p..a..h..s..................ss.hs..H...Nhshsp......................................sth..s...t.s.h.pt....hh.p..t.....................................t.h.......t......s...G..c..s....h..s..l..T..Fs.....psGs...Ypa.....h.....Cs..P....Hh...s.h....s.MhGplhV................................... 0 126 321 455 +1621 PF00649 Copper-fist Copper fist DNA binding domain Bateman A anon Prosite Domain \N 25.00 25.00 39.10 37.80 24.80 24.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.38 0.72 -4.76 23 240 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 128 1 176 247 0 39.50 57 8.31 CHANGED M.llIsGpKaACtsCIRGHR...uSoCpHs.-R.Lh....cl+pKGRPs ................M.lIsGtKaAC...EsCIRGHR...sSsCpHs.D...RsLh....pl++KGRPs...... 0 46 97 155 +1622 PF01218 Coprogen_oxidas Coproporphyrinogen III oxidase Finn RD, Bateman A anon Prosite Family \N 20.00 20.00 22.60 22.50 18.90 18.40 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.90 0.70 -5.86 93 2155 2009-09-11 07:50:01 2003-04-07 12:59:11 13 8 1940 24 700 1719 2513 276.40 51 93.10 CHANGED pplcsalhsLQccIspuLE....th.D...G.....ts..pFttDsWpR...t..pG.....GGGto+VLpsGpVFEKuGVNFSpVhGsthPsuAost..Rs-l.....uGt................sFpAhGVSLVlHP+NPalPTsHhNlRaF..lu...ptp...spsssWWFGGGhDLTPaYs.....a-EDshHaHpss+sACssa.GsshYP+aKcWCD-YFaLKHRsEsRGlGGlFFDDLs...p....hs...F-psFuFhpulGcuFLsAYlPIVc+R+stsau-cERpFQlaRRGRYVEFNLVaDRGThFGLQoG.GRsESILMSLPPhscWcYs.apPpsGS.EAcLhc.alts+-Wl .........................p.tscpalhpLQDpICptLptl..D.......G........................upFhcDsWpR...p.s.G.................G.GG.po.....RV.........lp.............s..............G..............s...............VFEpuGVNaSpV.aGp.hhPs.uAsst...R.sch...........uGt..................................sF.AhGlSLVlHP+NPalPTsHhNsRaF....hs.................p...t....................st.ss........sWWFGGGhDLTPa.Ys.....a-....E.....D...shHaHpst+s.......hC.p..........s.a.......u....t....s.............h....YP+..a...KcWCD-YFaL+HR.s...EtRGlGGlFFDDL....s...s.............................s.....a-.p.sF.uF.hpuVGcuahsAYlPIVc+R..+..s..h.....sasEcERpaQLhRRGRYVEFNLVaDRGTlFGLQ.....TG..GRsESILMShPP.hs..+Wc..Ys.............a...........p...P..pss........S.EutL.p.hh...s+-Wl........................................... 1 214 409 570 +1623 PF03232 COQ7 Ubiquinone biosynthesis protein COQ7 Bateman A anon Pfam-B_3545 (release 6.5) Family Members of this family contain two repeats of about 90 amino acids, that contains two conserved motifs. One of these DXEXXH may be part of an enzyme active site. 22.00 22.00 22.00 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.23 0.71 -4.79 47 840 2012-10-01 21:25:29 2003-04-07 12:59:11 8 7 777 0 370 769 1628 164.50 33 78.42 CHANGED thl-chIRVDpAGEhGAspIYtGQhtVLu....csphushlpcMh-QEptHhppFscLltc++VRPTlLtPlWcluGFuLGuuTALlGccAAMACTsAVEssIscHYssQlcpLt......ps-tc......................LtspIpcFRD-ElEH+Dhulpp.sAcpAssY.lLopsI+hGCRsAIhluc+l .........................................t..httlhRVsHsGElsAptl....YpGQh..h.........shp..........pspl+thhp.chhcpEtcHLshhpchlp.ch..ps..R.P.olLsPlWh.s.uuau..l...G.......ss.su.lhGccsuhuhssusEcplspHhsspLcpLs............tp-tc..........................psllcphRpDEhcHt.cpA.......lct.u.u..t..p..h................h.th....t....................................................... 0 111 206 296 +1624 PF04803 Cor1 Cor1/Xlr/Xmr conserved region Waterfield DI, Finn RD anon Pfam-B_6320 (release 7.5) Family Cor1 is a component of the chromosome core in the meiotic prophase chromosomes [1]. Xlr is a lymphoid cell specific protein [2]. Xlm is abundantly transcribed in testis in a tissue-specific and developmentally regulated manner.\ The protein is located in the nuclei of spermatocytes, early in the prophase of the first meiotic division, and later becomes concentrated in the XY nuclear subregion where it is in particular associated with the axes of sex chromosomes [3]. 22.10 22.10 22.40 23.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.66 0.71 -4.39 12 157 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 50 0 76 193 0 115.40 34 52.16 CHANGED sKsLpsKRKRlEshoKsShKuSppKlEplW+TQpspRQKLsp-aSQQhhslhQQW-hDspKhcEQcEKLsNhF+QQQKlhQQuRlVQsQ+lKsl+pLaEQFlKshE-lEKs+-shhpusQpEL+KEMAhL .......................sL.tK+++hph.spsohps.pp+l...cphh+spp.ppRQKlspcaSpphhslhppa-hDhQKhcE...p...c....EK....ls..s..happQQK..hhQQschlQpQ+Lcsh+plh-palKshpslEpsppphh.shpsEhcc.hs.............................. 0 12 14 22 +1625 PF01544 CorA CorA-like Mg2+ transporter protein Bateman A anon Pfam-B_944 (release 4.0) & Pfam-B_3206 (release 7.5) Family The CorA transport system is the primary Mg2+ influx system of Salmonella typhimurium and Escherichia coli. CorA is virtually ubiquitous in the Bacteria and Archaea. There are also eukaryotic relatives of this protein. The family includes the MRS2 protein Swiss:Q01926 from yeast that is thought to be an RNA splicing protein [3]. However its membership of this family suggests that its effect on splicing is due to altered magnesium levels in the cell. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.91 0.70 -5.38 79 8370 2009-01-15 18:05:59 2003-04-07 12:59:11 13 47 4035 34 2466 5606 407 264.00 18 74.31 CHANGED tpshhWlclptsspp..phphLtpt.......hslst.h.hp........hhstppps+h-.......hpsshhlhlpshphspsspt...h.......lshhl.ssshllTlppps...hphh.......pplhppht........ttht.pssttllhtllptlscphhphlcplppclcplEcpl............pppspphhpclht..l+...+p.lspl....+chlhsppphl........phhp......pttthhsppp......lpclhsclpp....lhpthphhp-hlptlt-thtshlspp.......hNchh+hL...Tlloslh.lPhTl..lsGhaGMNht......hP......thphta......hhhhhlhlhhlhshlhhhhh......++ ...............................................................................................................................................................t....hWlpl.t.sptp...pht....ltpt.......ht.l...h..hpt...........hh.p....p.p.ps..+hp..................t.s.h......hh.l..h....h...t...h...h..p.t.ppp..................................l.thhl....t........p.....p........hllo...hp...pp.......hh...............................pthhpph...........................tthh.hss....t..h.lht.l.h.......p.ths......cth....hth.l.cp.lppp...h..p.pl-.ppl.............................ttptp...p..p........h........t..plhp.....lc.........cp....lhhh........................................ppslt.....s.......p.p.hl..................................phhp.................t..h....h...hs..p.p.p.....t..th...........hc.-.l.hp.chpp...................hhph.hch...h..t...ph...ls...t...h...h.......p.......sh...h...u...h...l....stp.............................N.p.......h.......h...c.......h...l..............olh.......oslh...lP....Th...ls..u...h..aGM.N..hp................hP................thp.h.ta..........hh.h.h.s........lh.hh.l.hhsh...hhhh.hh.++........................................... 0 668 1466 2076 +1626 PF03311 Cornichon Cornichon protein Mifsud W anon Pfam-B_3813 (release 6.5) Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.70 0.71 -4.00 31 647 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 286 0 397 527 2 110.50 35 84.83 CHANGED Mu.phh.h.alhulllssshlFhtlaalIhhuDLEsDYlNPI-hCs+LN.hVlPEhhlHuhLslLFLlsGaWhsFL........LNlPllsYNshphhp+.....spl..lDsT.EIF+p..Lstcp+cshlKLuFYLlhFFhYLYp ...............................h.h.ahhullhsss...lhh.laal.....Ih..hs-LcsDYhN.Ph-.Cs......p..........LN.................hllPEhh..l..Huhhsl.ha..L...hstpWhhhh........LN.l.Pl.lhaphh+.hhpp.......tl..hDsTpIhpt.......Lshpp+-uhhKLuFaLl.FFhYLY..................................... 2 101 172 283 +1627 PF04694 Corona_3 Coronavirus ORF3 protein Mifsud W anon Pfam-B_5763 (release 7.5) Family \N 23.60 23.60 25.00 54.80 23.20 23.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.21 0.72 -4.09 4 89 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 40 0 0 78 0 59.00 82 84.35 CHANGED MDIVKSIshSVDAVLDELDsAYFAVTLKVEFpoG+hLVCIsFGDTh.tA..phhu.Lthc MDhVKSIshSVDAVLDELDshhFAVTLKV.FpoGKLLVCIGFGDTh.EAcpKAYAKLtL....... 0 0 0 0 +1628 PF03262 Corona_6B_7B Coronavirus 6B/7B protein Mifsud W anon Pfam-B_4476 (release 6.5) Family \N 25.00 25.00 208.10 119.30 22.20 21.60 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.46 0.70 -5.08 5 357 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 39 0 0 270 0 194.70 87 100.00 CHANGED M.IVllLVClhLuNuhGIKtssQEcDL...HEHPThTWELL-+FVGNTLYITTsQILSLPLGAcV+C-sVEGFsCoWPGFcssAHDHIDFYFDLSNPFYSFVDoFYIul...G-tspKIsLRlVGATPK-KRLNlGCHTSFu.VcLPFGTQIYHD+DMphhV-GRHLECTHRVYFVKYC.PaasHGYCFcDKLKVYDL+Rl+SpKsF-KlNQac+oEL ....................KATTVQsDL...HEHPVLTWELLQHFVGHTLYITTHQlLALPLGSRVECESVEGFNCTWPGFQNPAHDHIDFaFDLSNPFYSFVDNFYI.l...GEGNQRINLRLVGAVPKQKRLNVGCHTSFA.VDLPFGTQIYHDRDFQHPVNGRHLECTHRVYFVKYC.PYNLHGYCFNEKLKVYNLsQLRSKKVFDKINQHHKTEL.......... 0 0 0 0 +1629 PF02398 Corona_7 Coronavirus protein 7 Mian N, Bateman A anon Pfam-B_1574 (release 5.4) Family This is a family of proteins from coronavirus which may function in viral assembly. 22.30 22.30 23.10 47.60 19.00 22.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.39 0.72 -3.53 2 105 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 47 0 0 81 0 77.30 75 100.47 CHANGED MLVhhpAVhlTVLhLLLIGRlQLLERLLLsH.hNLpTVs.......................DFNILa+sLAETRLLhVlLRlIFLVLLGFsCYpLLshLh ..MLVFLHAVhlTVLILLLIGRlQLLERLLLsH.LNLpTVsNVLGVsDssL+VphhQLLKPDCLDFNILa+sLAETRLLhVVLRVIFLVLLGFsCYpLLssLh...... 0 0 0 0 +1630 PF03187 Corona_I Corona nucleocapsid I protein Mifsud W anon Pfam-B_2926 (release 6.5) Family \N 25.00 25.00 75.50 75.20 24.60 16.30 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.43 0.70 -5.17 3 82 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 48 0 0 55 0 183.00 55 98.69 CHANGED MESSRRPLGLTKPSADcIhKIEAEGISPSRLQLL.NPIPGVWFPITLGFLALPNSRRERSLSLQhDKECLLPMESQLLSKRDIGIDTTDVLLKHLMASRSNYCPDGIFTILEQuPMLcsSMATsLTESSGSQlsRhhspPLLTLLKGTQVAMRLFLLGLRPVRYCLRVoMLKAQEGLHLLVDLVRGH..NPVGQIIALEAlPTSASLPLL ....tS.pt.l...p.o..ph.h.tsEp.NPspLhLL.NHpEshh..hI.GSLtL.sFK+.coLNhQhsK.h.LhpES.LLKpRDIGhDTTsVLLKQLMus+SsCspDGIFTIhtQs+MPtpsMssshptSSGSLlT+Lh.lPh.hFppGhQlsMRLFhLGhR.sRasL+shhLKAQEGLhLlsDLl+uH..pPlsQ.hshEsh.s.ts.PLl........ 0 0 0 0 +1631 PF01635 Corona_M Coronavirus M matrix/glycoprotein Bashton M, Bateman A anon Pfam-B_845 (release 4.1) Family This family consists of various coronavirus matrix proteins which are transmembrane glycoproteins. The M protein or E1 glycoprotein is The coronavirus M protein is implicated in virus assembly [1]. The E1 viral membrane protein is required for formation of the viral envelope and is transported via the Golgi complex [2]. 19.10 19.10 19.30 19.30 18.60 18.60 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.48 0.70 -5.39 14 976 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 276 4 0 775 0 199.50 48 97.13 CHANGED h.SN...solshp-llphl+sWNFshsl.ILlhlhllLQaGYss+S+hlYllKMhlLWLLWPLslAlolFsAl..aslN.hshhuhSIlhAslohlhWlhYFlsSlRLapRTcSaWSFNPEoNsllslsl.hGpphshPlhpsssulThsllsGpLhh-Ghclup.sssssLPphlTVApPsshhhYch.u+..s.shsssoGaAhYl+hKt.Gsaptssshpsshs-sppLhp ........................sh...tphh.....hhtpaNh.hsh.hh.hh..hhhlLQaGhsphShhlYhlKMllhWlhWPlslAlslhssh...Ytls.h.VhhuhSIhhAllsh.hhWlhYFVpSIpLa+RT+SWWSFNPETNAlLslsh.hGpphshPl-usPpslThTllsGsLYsEGhKlAsGhsl-cLPKaVhVApPoRphlYph.VGK..p.puussoGaAhY...V+uKt.G.sYs.spshpsshopttpLh.................... 1 0 0 0 +1632 PF04753 Corona_NS2 Coronavirus non-structural protein NS2 Mifsud W anon Pfam-B_3747 (release 7.5) Family \N 25.00 25.00 26.60 179.60 24.00 17.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.68 0.72 -4.11 3 81 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 63 0 0 43 0 108.40 73 99.55 CHANGED MEIWRPShKYLRhTR-FGVT-LEDhCFKFNYCQP+VGYCRVPL+AWCRNQGKFAApFTL+S+-KSa+ppFGVITSFTAYGNTVcEAVSKLVEpAuDFIsWRAppLN+YG ..MDIW+PEhKYLRYTNGFNVSELEDsCFKFNYpFPKVGYCRVPs+AWCRNQGpFCAThTLYGKSK+.YDKYFGlITGFTAFuNTVEEAVNKLVFLAVDFITWRpQpLNVYG. 0 0 0 0 +1633 PF05213 Corona_NS2A Coronavirus NS2A protein Moxon SJ, Bateman A anon Pfam-B_6568 (release 7.7) Family This family contains a number of corona virus non-structural proteins of unknown function. The family also includes a polymerase protein fragment from Berne virus and does not seem to be related to the Pfam:PF04753 Coronavirus NS2 family. This family is part of the 2H phosphoesterase superfamily [1]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.74 0.70 -5.06 4 104 2012-10-03 21:31:47 2003-04-07 12:59:11 7 4 66 0 0 95 1 202.80 52 38.20 CHANGED AYADcPsHFlshPlsp.psFltsahcLQ.....EGhssKhQsAPHISLTMLclpsEDhcpVE....-IlD-Mshspu..lshsNPHMhG+phVhDVcGl-pLHD-lVslLRc+GhssDQoRhWhPHhTIuplpDsuh.sKh.......hpFshpppl..........tch-hVKLGAsKtsuhYEhIso...........sWsupR.LCapsss.p.Sc.htYhsLss...EhptG.l..Ncsss...hShcYpsppahh++V+DpSpa..hRTu ............................AaADKPNHFINFPLspFpGFhhpYhtLQ.Qll-..G..lDCK....lQpAPHlSls...hL.D..Ip..s-pY+sV-....hAIQEllDDhthh..EG..pIpF-NPHh.L.G.R......ClVL.......D.V.+G.VEELH-...DlVNhlRc+GCsADQSRpWIsHCTlAQhs-tslpIKt.........hQF.a.Kh.sh.....N..osAclElVKlGupKh.DGFYpo.hS...........hWhG.R.hpYpPPTsKhu.IhGYCCl-hlRt-LE.GDLP.sD--AWhcLSYHYppNoaFFRaVacpS.Y..FRp.............................................. 0 0 0 0 +1634 PF03053 Corona_NS3b ORF3b coronavirus protein Mifsud W anon Pfam-B_2130 (release 6.4) Family Members of this family are non-structural proteins, approximately 250 amino acid residues long. They are found in transmissible gastroenteritis coronavirus (TGEV) and porcine respiratory coronavirus (PRCV) isolates. These proteins are found on the same mRNA as another product, designated ORF3a. While ORF3a/b has been implicated in TGEV and PRCV pathogenesis, its precise role remains unclear (see [2,3]). 25.00 25.00 25.60 25.30 23.20 23.00 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.42 0.70 -4.81 4 228 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 53 0 0 204 0 211.70 53 99.20 CHANGED MhLGLFphTls.......osVpposppsplSt-sshtlppsVVshRpssplsuFhlsSlFVhFFALFKAoSa+pshhllhh+lLslhlYsPlLhasGAYlDAhllsssLhuRhhalsaasWhYKsacFIlaNoTTLhFlpG+Asaacs+u....aVhL.GGspYlhlGsphVsFVSs.sLYlAIRGpt-uDLpLlRsVELLsGchlYlFSpc.lVGlsNuuFpp..L..........DchhsIS ...MIGGLFLsTLu.......hI.V.s.s.p.sh.l.s.Nhsp.s..slVQQ+pVV.Sup..hpShahEFSIAlLFVhFLALYRSTNFKsCVulLMFKIlSMTLlGPMLIsaGYYIDGIVTT.TVLuLRFlYL.uYFWYlNSRFEFILYNTTTLMFVHsRAAPFhRSSHuSIYVTLYGGINYMaVNDLTLHFV.ssh.l......hlAIRGhscADLpllRsVELLN.....GchIYlFSQEsVVGlhNAAFsph.l..........sc........................ 0 0 0 0 +1635 PF03905 Corona_NS4 Coronavirus_NS4; Coronavirus non-structural protein NS4 Finn RD anon DOMO:DM04795; Family \N 25.00 25.00 42.70 42.70 18.80 16.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.22 0.72 -4.16 3 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 45 0 0 37 0 47.40 70 65.50 CHANGED MPMATTIDsTDYTNIMPoTVSTTVYLGuSIGIDTSTTGFssFSa.............Y MPMATTI-GsDYTNIMPhTVhTTVYLGsSIGIDTSTTG.pshsh....................... 0 0 0 0 +1636 PF00937 Corona_nucleoca Coronavirus nucleocapsid protein Bateman A anon Pfam-B_267 (release 3.0) Family \N 24.60 24.60 29.90 25.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.18 0.70 -5.50 21 1034 2009-09-11 06:29:42 2003-04-07 12:59:11 13 4 296 36 0 939 0 294.10 37 87.79 CHANGED Mu........................plsatsp...stppsppttt......hPh.......................SaFssLptpscpthh.hhsGsGVPhutG.stspphGYWpRQpR..a+.scGppppLss+WaFYYhGTGPaA-hcatc.......cp-GVhWVApcGAcssssu.lGoRsssp...pslss+Fssu..lPpshhlps......pspSRssSRus......SR...upS..RusSts..........uRssS...psRpsss...............lhsslhttLtslshsppp.........................tsppssploppsst-stp....K.caKRossKu..psVspsFGtRussp...NFGsschlctGscsspaPtlAELlPosuAhhFuSclssccps..Dsl.........plsashshpls+csPshppah....pplsAY...........scPpcp.cK.pppp ...............................................................thsh.......................SaFpslp.pphst...hhtspuVP.s.s.ptspQhGYWpRpsR..a+.scGtpK.lsstWYFYYhGTGPtAchpats........p.-GlhWVAtcGApspsps...GsRsssp...-thsh+Fssu..hPps.hh.s..........pGcSttsot..us......S..t....upS....Rs.sS...............RpsSpsRpssu........................................t.sl.hsthtplh.spp.p..............................ps.pphs.p..tpss.-h.p......+.hhKRT.s.t...pVspsFG.Rs.sp......NFGDsch.ppGhpstphsthhphsPostAhhFGSpls.c.ts.sth...........plpaphshhlscscsphpphl....pplsuh................tcPpp.t.t....t............................................... 0 0 0 0 +1637 PF01600 Corona_S1 Coronavirus S1 glycoprotein Bateman A anon Bateman A Family The coronavirus spike glycoprotein forms the characteristic 'corona' after which the group is named. The Spike glycoprotein is translated as a large polypeptide that is subsequently cleaved to S1 and S2 Pfam:PF01601 [1]. 25.30 25.30 25.40 26.50 23.40 25.20 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.94 0.70 -6.20 8 2634 2009-09-11 07:53:46 2003-04-07 12:59:11 11 4 115 4 0 2424 0 374.30 50 73.17 CHANGED hhG.us.VahhpSuh+.PcGaphpshhhhssssos.susssSsQ....sClhul.taupsssssShshTAss..sGhShsss....cFshs.......csNhsctslFlTasapo.hshuCs.ouh......l.puah+IshhKsts...salFYNlTls.......luphPphhp..slsphsSVYlNGahhFTostscsVshusspacoGG.....hThtlhphVcALs.hsNsohpcVIhCD.SPhuhLtCQ.sTuNhsDGFYPhoso....phhlVhppsSVsTh.shpsaTFsNlosu.........PssGGlpohslYtopsspouhhNFNhohlouFsh+sSpFhhhsa......h.pspFpspshNsGhWhNSlslu..hs.hslQussc.ssFu...TsCauhShsGsusshuVausphsp.apshhhlYVoho-GShIpTuspsPlhshpsasNlTLDcCs-YNIYGRsGpGhITNsTsohlut.........Ih.TSsShDlhshpsshusshYpVsPC-ss.pQhVVssGclVGllTShNpTu............p.htN.aYlplhNhT+hhpR ...............................................ts.VahhQSuhR.spGWHlpGGAYAVVNsos.ssNAG....o....up.............p...CT....s..Gs........I.......p.....s..h.......s........s.......Au....Sl...A..MTAP...........pGMuWSps..................QFCoA.........HC..NF.o...-...h....o...VFVTHCaps...suCPlTGh.......l.psaIR.ISAM.+..s..........upL....F..Y..NLTVo..............VuKYPpFKShQCVNNhTSVYLNGDL..V.FTSN.p.TpDVsuAGV.a.aKu..G..G....PlsY.plM+phcsLsaFsNGospcVlhCD..oPhshLt.Cp......shshsDGFYshss.......hhhs.h..pShsTh...hpsaoF.N.osu.........ss...u...s.h.p..s.h...hhts..t.hps...sh.h.sFs.s.h..s..Fsh...ptsp.ahh..................s.F...h....ps..s......Ghh...sslsst.....s..h.shpt.hsp..sFp......phC....h.......uhs......ss......hphsl..hth...t..........phh.lahphptGp.h.ots........h...h..h.hs.CstYslYsh.G.GhIh..s....p...sh.........lh.so.uhphhhhps.....hapl.PCt...tQhsh.ttthhs........................................................................ 0 0 0 0 +1638 PF01601 Corona_S2 Coronavirus S2 glycoprotein Bateman A anon Bateman A Family The coronavirus spike glycoprotein forms the characteristic 'corona' after which the group is named. The Spike glycoprotein is translated as a large polypeptide that is subsequently cleaved to S1 Pfam:PF01600 and S2 [1]. 30.00 30.00 30.20 30.10 29.90 29.00 hmmbuild -o /dev/null HMM SEED 610 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.17 0.70 -6.42 22 1416 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 312 44 0 1305 0 413.10 39 54.14 CHANGED sCs.Ph.....lsYushulppsGulht.lshp.......s.stlsslhs.....tslpIPoNFTlolpsEYlQhpsp.lolDCupYVCssss+CppLLpQYuohCpsIpsuLptsupl-shplhshhoss..puhpls....shpsFsus..................aNhosl...Lsss..........up.t.RSsIEDLLFsKVhouGlGtVDttYccCos...G..tsltDLlCAQhYNGIhVLPsllssphhuhYTuSLlGuhshG....Gl.TuAAuIPFuhslQuRLNalulppsVLpcNQKllAsuFNpAlusIpp..............GhsosupALsKlQ-VVNppupALspLssQLssNFtAISSSIp-IYsRLDtlpA-AQVDRLITGRLsALNuaVoQpLschsclcsSRpLAtpKlNECVKSQSpRhGFCG.sGsHlhSlsQsAPpGlhFlHhshlPopatsVpAssGlClss.....hhhhhsPthulFh......psso..........ahlTsRshapPchhotu-hVplpoCsVsasslsps.lsshl.P-..hhDhscpLscahpsh..spshPshs..hshaNhTh..LNLosEI..............pcLpplIcsLNsohlDLc.LsphEpYlKWPWYVWLsIshullhhlhlLlahhhsTGCCG.hhuChu.....spCtppCp...thpp...h..hcphHsp .......................................................................................................h.lP.thshs...Ehh.h....h.lss..alss....C...h.pYh.hC.pl.t.l...s...s.....hh.........................................................................................................................................thYTsuhhuuhshu....uh.ouAsul.PFuhplQhRlNhlulppslL.cNQchlAsuFNpAluphp...............uhpohs.ALtplQsVVNpputhLsphhtpLppNFtAISSslt-IhppL-tlpApsQlDRLIsGRLsuLsshsotp..phhclptptpLAhpKlsECV+SQS.RhsFCG.pG.HlhohspsAP.GhhhhHhsh.P.t..ph.s.suhChts................hh...t.shh.........t......................ahho.p.ha.Pp..p.tshl.htsCtssahphs.s.h...h..p....shpcplpchhpph..p..hs-...hs......hN.Th..LslptEI..............pclpthhpslNpohlsLp.lshhc.........................hhhh.Tsps..........ssh........ht....................................... 0 0 0 0 +1639 PF00115 COX1 Cytochrome C and Quinol oxidase polypeptide I Finn RD anon Pfam-B_23 (release 1.0) and Prosite Family \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.90 0.70 -5.86 94 254351 2009-09-13 13:30:05 2003-04-07 12:59:11 15 23 108179 109 2006 206187 5121 227.90 58 95.26 CHANGED pcp.luhhYllhuhhhhllGslhullhRhpL..shP...................sh...lssp....saNplhohHushMlahhshshhhG.hu.allPhhluscplshP+Ls.shuaWlhshusllshhuh......hhths...............stsWh.Ys.Phs....................s..hhhhulhlstluullsulNhlsT..lhphRs.uhsh..h.slh.sWuhhssullhlhuh.Pslshs.hhh.h...h...hh..stsussllapalFWaauHPtVYllllP.uhGllu.llsphs.t+.lauaphhhhuhh.uluhlu.hhlauHHhas.sGhshhhpshhsstohhlu..........lPsult...........lasalsT......hhtu....p....hphs.sshhasluhlhhF.hhGGloGlhhuhsslshhhHsTaalVAHaHhslhGuhshshhuulaa....hhPphtGp.....hhsppluphpFalhhlGhslhFhshph.hGlh.GhsRRh....hsa......sst.......htsa.....hlpolG..uhlh ........................................................................................................h.lG.suh.ShlIRhEL....upP...........................suh....lssDQ........lY.Nsl.VT.A.HAFl.MIF.FM.VM....P.l..MIGGFGNWLl...PL..M.lG...A..P............DM....A....F..P...R....M.N....NM.SF.W..LLP.PS.lhL.Lls.So.....h.V.....EsGs.......................................GTGW.T....V.YP...PLu.usluHs............GuSVDLuI.FSLH..LA.GlS.SI.L.G.AI..NFI.T.T.........l..I...........N......M........+..............s..........s...h..o....h...c........p......h.......PL.......F....V..........W.........u........V.........h.IT.Al.L.L.....L.L.SL..P..V..LA.G....A.I..TM.LL.T.D..RN......l.NToFFD...............PuGGGDPl.La.phh................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 538 1218 1661 +1640 PF05051 COX17 Cytochrome C oxidase copper chaperone (COX17) Moxon SJ, Mistry J, Wood V anon Pfam-B_5838 (release 7.7) Family Cox17 is essential for the assembly of functional cytochrome c oxidase (CCO) and for delivery of copper ions to the mitochondrion for insertion into the enzyme in yeast [1]. The structure of Cox17 [2] shows the protein to have an unstructured N-terminal region followed by two helices and several unstructured C-terminal residues. The Cu(I) binding site has been modelled as two-coordinate with ligation by conserved residues Cys23 and Cys26. 21.80 21.80 21.90 22.20 21.60 20.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -8.95 0.72 -3.94 43 302 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 243 7 199 288 2 48.60 48 61.89 CHANGED ppKP...KsCCsC.-pKcsRD-Cll...pGp-p.....CpchIEtaKpCM+uhGFpl ..........p.pKP...hKPCCsCP-TKcsRDp.Cll.....pGp-s........CtphIEsHKpCM+uhGFpl............. 0 65 105 163 +1641 PF00431 CUB CUB domain Bateman A, Finn RD anon Pfam-B_136 (release 1.0) Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.53 0.72 -3.98 29 12961 2012-10-02 11:50:15 2003-04-07 12:59:11 15 883 190 46 7617 11572 76 107.50 26 32.62 CHANGED CGsp....lppssGslpS.PsaPp.sY.ssppClWpIpss.uhp..lpLpFps.F-lEtpt.......ChYDalElhDG.tss.t.hlG+aCG.....pthPssltSsusphhlpFho.DsshsppGFphsa ......................................................................Cut......hp..t.....s....G...h...lp...S...P....s.......a....P.......p........s...........Y......s........s......s.....h...........p.....C..h.......Wt..........I......p....s......s..............s.....p.....p...............l...p...L......p.....F..p......p...F...p.l...E.......................................C....t.......h....D....a......l.......p........l.....h.....-...............G......s............s...............t...............p........s.......................h...........l.............u.......p............a.............C....G.......................s.p..h..P...t....s.....l......h...S......s....u.....s.......p....l..hlpF...p......o......D.....t....s.....h.....s.....t...p..GFphpa.............................................................................................. 1 2339 2836 5004 +1642 PF00116 COX2 Cytochrome C oxidase subunit II, periplasmic domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.47 0.71 -4.28 32 31269 2012-10-02 17:41:00 2003-04-07 12:59:11 15 29 18166 144 1320 28095 2586 106.00 48 47.64 CHANGED lTlKulGHQWYWSYEYsDa.s.....lpFDSYMlPpppLp.sphR....LL-VDNRhllPhssplRhllTu....sDVlHSWslPShGlKhDAsPGRLNQsohhh...sRsGlaaGQCSEICGsNHSaMPIslEul ................................................................lTlKslG.HQWYWS...YE..Y...o..D..a....s..........................l..p.F..D.S...Y.......M....l.........s..........p......p........-.........h..............................s.p........F.......R.............................LL..-.....VD...N.....R.l.....l..lP..hsspIRlllTu......s.D....V....l...HSWs...l....P.....u........L..G....l....K........h........D..A..s..P.....G.....R........L.N...Q...s...s...ahh....................sR...P......G......l...a.a......G.QCS.ElCG......ss.Hu.h.Ms.hhh........................................ 0 349 789 1076 +1643 PF02790 COX2_TM Cytochrome C oxidase subunit II, transmembrane domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family The N-terminal domain of cytochrome C oxidase contains two transmembrane alpha-helices. 21.00 18.00 21.10 19.70 20.80 17.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.03 32 25391 2009-01-15 18:05:59 2003-04-07 12:59:11 10 24 16352 78 555 22832 1763 78.90 47 36.87 CHANGED MsT.hshs...hQDuuSPhMEplhhFHDashhlLhhIhhhVsalhhshlhs......phsp+ahlcGQh.IEhIWTllPAllLlhIAlPSL+L ............................Msp..phs....h..Qs...usSPlMEp.Lla.F.H.DHs..l..h..I....l...lh...I.o..h.l...V....s..Y.lh..h..h.l.h.hN................................K.hs.s.+...h...ll.-u.Qh...IElI.....WTIlPAllLlhIAlPSLRL................................................ 0 162 342 451 +1644 PF00510 COX3 Cytochrome c oxidase subunit III Finn RD anon Pfam-B_78 (release 1.0) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.69 0.70 -4.94 21 13171 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 8530 60 1326 11262 3530 206.40 41 93.84 CHANGED HPFHlVssSPWPlssShuhhshsluhlhahHsap..hhllhlullsllhsMhhWaRDllREuTapGpHThhVpcGL+hGhlLFIlSElhFFhuhFWAFFHSuLuPolELGusWPPhGI..pslNPh-lPLLNThILLoSGsTlTaAHHullpG.pRppul.uLhlTllLulhFThhQhhEYhpAsFTIuDuVYGSsFahuTGFHGlHVllGTlFLhVshhRhhpaHhTspHHhGFEsAhaYWHFVDVVWLFLYlolYWWGu .........................................................................................................................................................................................................................................................................................................t.t..............G....Hs..............l.............u..h.+..h..........G...hh.LF.Ih.S.E.l.hF.....Fhu.a.F....WA..........a......a...................p.........u.................u....L........u..........P..............s.....................................p........l.................G.................s............h....................W..P...........P..............t.......G.......l.................p..s.......h...s....P....h...p...l..........P......L....L....N..Thl....LLuS......G...l.o....lT...........a.........A...HH............u...........l...........h.......p.............s.............p.............c................p......p........s..............h.t.u..........L.........h.l.T..l..l...L.GhhFsh..l...Q...s...h...E.........Y........h..........c..............s...........s.........F................o.........l.......u.......D..............u...........l...........Y.........G......S.......sF...F....h....s..TG...FH.GlHV...ll..G.oh.aL...h.......l..sh....h...R..........h...........h.........h......c.........a........o...........s..........p...........p..........+..........h..........u..........h............c...s.....su.........h.......YWHFVDl.VWl.h.lah.lYh................................................... 1 358 800 1075 +1645 PF02284 COX5A Cytochrome c oxidase subunit Va Mian N, Bateman A anon Pfam-B_7466 (release 5.2) Domain Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit Va. 21.30 21.30 22.00 25.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.43 0.72 -4.10 3 318 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 272 51 176 300 0 103.20 52 69.73 CHANGED uHGoEETsEEFDARYEKYFs+EuIDuWElRKGMNsLlGYDLVPuPKIIEAALRAuRRVNDlAoAIRlLEulKDKsGscKt.lYPYlL-EL+PTLQELGIPThEELGhDK ......................H..pp.E.T.EEFsA.R......a.phFs.sshDsaElp+uhNs.hhuYDLVPpPcllpAAL...RAs...RRlNDaAoAVRlhEulKsKst.s+.c.....YshhlpEL+PshpELGIsh.EELh.-.............. 0 51 86 139 +1646 PF01215 COX5B Cytochrome c oxidase subunit Vb Finn RD, Bateman A anon Prosite Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.69 0.71 -4.50 4 403 2012-10-03 19:45:42 2003-04-07 12:59:11 14 3 288 52 248 395 7 118.00 33 69.68 CHANGED S+ll+t.pslsp.suQsLt.s+tPs..oLltshuptGtVPTDh-QtTGLcR..hLuthpGpDsFshcsLcuo.pGThcDPllVpSlsshRhVGCpGususSHsllWhpltcscspRC.-CGoVYKLp.hus.scphtH ................................................................................................u....tls.....s-..-.p....ATGL.E...RhEl..lu.p.h..p.G.h.D.s......a.c....h.c.s..cus..hG...T.h-s...PhlV.s.Sh..hscRlVGCss....t....t.s.s.o..slhWhhl.c.c.s.c.s.p.RCspCGp.haKL............................... 0 84 137 203 +1647 PF02046 COX6A Cytochrome c oxidase subunit VIa Mian N, Bateman A anon IPR001349 Family \N 25.00 25.00 28.10 27.90 24.60 24.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.92 0.71 -4.17 31 404 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 264 51 247 396 1 102.90 34 78.21 CHANGED Mht.............t..t..........sh+RhhSo......................sstt.......tpta.pphtth.tHutu...soclW++loh.....hlAlPu.lslsulNsa.l..cscHt-HhtH.....-p-phspYsa.NIRoKsF.WGDGsKTLFWNscVNths ........................................thhhts.................................................................t...shtt..c.sts...su.....ch..W+p.loh.....h....lul.Pu.lslshlNsa.l.pcHtcH.....................tp.EhstYsahp..IRoK.sFPWG.DG.s..+.o........LF........aNsclN.h.s.............. 0 67 115 186 +1648 PF02297 COX6B Cytochrome oxidase c subunit VIb Mian N, Bateman A anon Pfam-B_9188 (release 5.2) Domain Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of the potentially heme-binding subunit IVb of the oxidase. 24.50 24.50 24.80 25.40 24.40 24.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -9.88 0.72 -3.96 58 675 2012-10-02 15:44:21 2003-04-07 12:59:11 12 9 292 51 456 646 0 66.70 32 59.76 CHANGED schPs.........pspp+pCapshscaacClcp...pu........................spp................Cphhppsacs.CssuWlchac-p.....pcpshhs ....................RFPs..psQp+pCap.......sas-aa+Clct...psp......................................shss...............Cc.tatcsacohCPts......W....l..ccac-p......tt....t................. 0 125 227 364 +1649 PF02238 COX7a Cytochrome c oxidase subunit VIIa Bateman A, Mian N, Finn RD anon Pfam-B_3023 (release 5.2) Family Cytochrome c oxidase, a 13 sub-unit complex, is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of the heart and liver isoforms of cytochrome c oxidase subunit VIIa. 22.20 22.20 22.50 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.73 0.72 -4.24 8 306 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 111 51 138 301 0 53.40 42 59.85 CHANGED +NKVhEKQKLFQpssch.sYLKGGh.DslLaRlTMsLslGGTuYslhuLGhAuhP+s ......pN+Vs-tQKhFQcsss..lPlaLKGGhsDs.lL..Y..RsT.Ms....L.....s...l...u........Gohhslh.thhhhs.s+t.......................... 0 21 42 77 +1650 PF02285 COX8 Cytochrome oxidase c subunit VIII Mian N, Bateman A anon Pfam-B_6423 (release 5.2) Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIII. 20.50 20.50 20.60 20.60 20.00 20.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.98 0.72 -4.43 11 112 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 71 51 45 114 0 42.40 43 59.90 CHANGED VpSKPs+p.hushEpAlGloshFVoFLlPAGWlLSHLEsYKKpu .....lpSpPscp.plushEpAl.GLoshFlshLlPuGWlLuHL-sYK+..... 0 5 7 16 +1651 PF02672 CP12 CP12 domain Bateman A anon Bateman A Family The function of this domain is unknown, it does contain three conserved cysteines and a histidine, that suggests this may be a zinc binding domain (Bateman A pers. observation). This domain is found associated with CBS domains in some proteins Pfam:PF00571. 21.20 21.20 21.20 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.40 0.72 -3.51 39 291 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 151 16 114 305 584 62.40 34 39.64 CHANGED ls-pIpcshpcAcpsCups..sSucCssAWDtVEELpAttuHpcpcpp....ppssLEpYC--NP-AsECR.lYDD ...................................tpplppthppActhsstt..ssspsttAW-t.l-ELpsttsHt.tpt......stssLE.....paCcsNP-ssEC+.lY-p................... 0 24 78 103 +1652 PF01383 CpcD CpcD/allophycocyanin linker domain Bateman A, Griffiths-Jones SR anon Pfam-B_887 (release 3.0) Domain \N 21.10 21.10 21.30 23.20 19.30 20.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.36 0.72 -3.79 86 448 2009-01-15 18:05:59 2003-04-07 12:59:11 16 9 103 2 124 392 105 55.10 34 24.05 CHANGED u..RhFplcVsu.........tt...h.R+Sspshl.VPaschspphQcIp+hGG+IlSIsss .........RhF+lcVsu.........tppp.pl.R+ospsal.VPYsphspphQRIpRhGG+IlSIpsh.... 0 14 77 114 +1653 PF00166 Cpn10 cpn10; Chaperonin 10 Kd subunit Sonnhammer ELL, Finn RD anon Prosite Domain This family contains GroES and Gp31-like chaperonins. Gp31 is a functional co-chaperonin that is required for the folding and assembly of Gp23, a major capsid protein, during phage morphogenesis [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.12 0.72 -4.05 48 6057 2012-10-01 22:45:51 2003-04-07 12:59:11 16 6 4946 114 1577 3685 2838 90.70 44 92.00 CHANGED plcPLtDRVllct..h.pt.-p...potuGIllP..-supc..Ks..ppGpVlAVGsGhh..ppGp.hhshslcs..GDpVlascau.............Gs.clch..-.sccalllcppDIlAll ....................l+PLtDRVll+t....h..Es.Ep...cTs.uGIl.lP.....soApE....Ks...ppGc.VlAVGs.G...............p.h...................c..........s............G..........p..h..........hs.h.s.VKs...GDpVlas.cau....................Go.-lch........-.sc-aL.....lhp.EsDILAll................................ 0 540 1020 1322 +1654 PF05205 COMPASS-Shg1 Cps15; COMPASS (Complex proteins associated with Set1p) component shg1 Wood V, Coggill P anon Wood V Family The Shg1 subunit is one of the eight subunits of the COMPASS complex, complex associated with SET1, conserved in yeasts and in other eukaryotes up to humans. It is associated with the region of the Set1 protein that is N-terminal to the C-terminus, ie Set1-560-900. The function of Shg1 seems to be to slightly inhibit histone 3 lysine 4 (H3K4) di- and tri-methylation, and it is a pioneer protein. The COMPASS complex functions to methylate the fourth lysine of Histone 3 and for silencing of genes close to the telomeres of chromosomes [3]. 22.10 22.10 22.10 23.40 21.50 21.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.43 0.72 -3.75 22 260 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 195 0 177 248 0 106.20 31 17.20 CHANGED cpLsctaKKcGtFDphR+clLsch.cpu...........-tcpplpp+ltpllcsclpc-s.plL.+s+G+susLIcGtls+s............................shhpt..........s-pslss..ll-pclpch..tslcphh+ptltc- ................llpphKpcGhFDphR.+-sLu-h.csp..............suhpsLpp+lcshVpscls.cpp......shN+..sphpshl.ctplhcS............................hl..ps............s-ch....lsp....llcs+lsch..hsplcphh+p.l...t..................................................................................... 1 43 76 128 +1655 PF00289 CPSase_L_chain CPSase; Carbamoyl-phosphate synthase L chain, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines [2]. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00988. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117. 25.50 24.50 25.50 24.50 25.40 24.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.17 0.72 -3.96 157 23989 2009-01-15 18:05:59 2003-04-07 12:59:11 17 163 7729 193 5850 19831 8081 108.40 33 18.02 CHANGED hhc+lLlANRGEI...........AlRlhRss+ch.............Gl.coVslaoss..Dpsuh...asphADcshhls..s...s.........tuYLsh-pllpsA..cps........G.....spA.lHPGYGFLSEsscFActst..p.tGl..hFlGPsscsl ...................................................................hc+lLlhs.pG.I...............................ulpsh+uh+-h..................Gh.col.hV.ss.ss.............-..o..sus.............chph......AD.c.....s..Yh.s...............................lsh.-tlhplhchp.........................u...s.s...ha........sG....sh.Ls.s.hph..tp...h..........c..tGl....hhlGssscsI...................................... 0 1856 3641 4921 +1656 PF02786 CPSase_L_D2 CPSase; Carbamoyl-phosphate synthase L chain, ATP binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines [2]. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00988. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117. The ATP binding domain (this one) has an ATP-grasp fold. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.13 0.70 -5.05 16 23031 2012-10-10 13:17:03 2003-04-07 12:59:11 12 179 6767 194 5911 27500 13103 197.30 32 31.79 CHANGED D+tthpsthschshPssPusss...coh--AlthAcclGYPVII+uAauhGGpGhtlspsc-Eltclhspuhspu......splLlEK.lcs.KcIEhpVlpDupsNsIplsshEs.s.h..+stcsl.hAPSpTLo-cphphl+psAlpls+clGhhGus.slpahlss..stcahhIEhNsRlphppslupcsTGhsLshhthKlAhGhsLspl .............................................................................................................D+tthpp.h.h..pc..h..s..ls.......h........s.............ut....t...s........................p......s..........h.......-......-...........A............h.............t........h............A.......c.........c..........l.............G....Y...P...l..l......l.......+.......s.u.......h.......s........h........G.....Gp.........G.......h....p........l.........s.........p.......s....c.......c...........-.........L....p................p.h..h.................p.......p.................u.......h.................p.................t.................u...................................................................................s.......s.......t.................l...........h.......l..-.+.h.l....t....s........s....+......c....l......E...l........p......l.......l........s....D.......s.......p........s.........N.........s.........l........h.......l.......s.......p......h..........-......s............pt................+s..t........c.....s.............l......p.............................u.......P.......u.............................s.............L......o........s.................c................h.....................p.........t......l....t....p..s....u..........h....p....l.....s...........+t....l.....s.....h....h...G....s..s.....s...l...p..F.......h.......l.........s...............................s...........s..................c...........h...Y..hl..E........hN.s....R....l...p.......h....p...p....s...l...s...p..p...s.........T......G.........h.....s.....lschth+.lA.hG.sLs..h.................................................................................................................................. 0 1878 3684 4980 +1657 PF00650 CRAL_TRIO CRAL/TRIO domain Bateman A anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.70 0.71 -4.74 110 4026 2012-10-02 01:12:42 2003-04-07 12:59:11 15 76 365 19 2809 4545 94 152.30 20 37.53 CHANGED th.hptththlhGh.DppGRPlhhhcht.thssp....sts..t......................phhchhlhhhEpsh..................hhtt........thsshsllhDhpshs.....htphs....hshh.+hllphhp.spYP-pLtplhllssPhhhsshapllp..sal.sspsppKlpht.pstp...........Lppals...cplsp.......phG.Gp ....................................................................................................t......h..h.h...th..D...p.p.Gcslh..hhphs..ph.ssp................phs...p.........................................ch..h+.h..h.hhhhE..thht.....................................................h............................thp...shshlh.Dh..p...uhs..............h.p.p.h...t.............hphh...p.......p....h....hph..h.......p....stYP...p...plt..p..h...hll...........N.......s....P...h......h.a...p.s.ha.p..l...........l+................sa.l..s.........t.ps...t..pK...lhh...h...ts..t.t..t..........tLhp..h..l.s.........p.LPp.......phGGp............................................................. 0 1013 1571 2350 +1658 PF03765 CRAL_TRIO_N CRAL/TRIO, N-terminal domain Bateman A, Griffiths-Jones SR anon Prosite Domain This all-alpha domain is found to the N-terminus of Pfam:PF00650. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.54 0.72 -3.83 148 2100 2009-01-15 18:05:59 2003-04-07 12:59:11 10 27 310 19 1402 1996 17 56.30 25 13.53 CHANGED pptlpp....lcphltp............................thhptph..............sD..thlLRFLRARcaclpcAhpMlpps ...............................................................................................................h.p...hcphltp...................................t.hhttph...................................................sD...thLLRFLRARcFclpc.Ahphltp.............. 0 364 715 1092 +1659 PF02537 CRCB CrcB-like protein Mian N, Bateman A anon COGs Family CRCB is a putative integral membrane protein possibly involved in chromosome condensation. Over expression in E. coli also leads to camphor resistance [1]. 22.70 22.70 23.10 22.90 22.40 22.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.59 0.71 -4.21 106 5746 2012-10-02 19:55:49 2003-04-07 12:59:11 10 8 3698 0 1390 3815 1350 112.50 30 83.00 CHANGED hlhlulGGslGAhhRahlsthh.p.........................uTl.hlNllGsallGhhhs.hh..............htth...hphhlssGFhGuhTTFSoash-shplhpps.phh.tuhhahhholhhGlhsshhGhh ...................................lhlhlGGulGulhRahluhhhst...hhssh.................................PhGTL.hlNllGuFllGhhhshhh......................................tthssthphh.....lsTGFhGGhTTFSTFshEsl...p.L....h.pp.........s.....p.........h........h.....huh...h.al...hhollhul.hhshlGh.h.......................................... 0 430 866 1175 +1660 PF01321 Creatinase_N Creatinase/Prolidase N-terminal domain Finn RD, Bateman A anon Bateman A Domain This family includes the N-terminal non-catalytic domains from creatinase and prolidase. The exact function of this domain is uncertain. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.87 0.71 -3.66 170 7541 2012-10-02 11:23:57 2003-04-07 12:59:11 13 18 4001 40 1991 5872 3626 132.50 18 31.80 CHANGED Rlpclpphhpcps..lDuhllsss............tslhYloGa.........ssstshhl.l......st.cs..t.h...................llss.hchtpttppp...........hcl.h.hpp..................hpslhphltph.........h.t..............ppl.G...hEtsh.....hshst...hptlppth.....t...phhshss....hlpplRhl .....................................+lpplpphhp..p..p..p.......lD..uh..llsss................................................tshtYlo..GF...................sssss.hs.l...l................st...-p...s...h.......................................................lhs..c...hch...httsppp.......................shpl..hthps........................................................hpslt..p.h.ltph..........................................................h.t................ppl..u..........h-..s.ph..............loh..st.....h..p..p.Lppth..........st.......ph.h.shs.......h.t.hR................................................................................................... 0 667 1256 1650 +1661 PF00030 Crystall crystall; Beta/Gamma crystallin Sonnhammer ELL anon Swissprot_feature_table Domain The alignment comprises two Greek key motifs since the similarity between them is very low. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.58 0.72 -4.09 138 3340 2012-10-01 23:14:22 2003-04-07 12:59:11 14 52 213 141 1708 2532 18 81.10 31 53.48 CHANGED +lplaEcpsFp..GcphEhs.sDs..sslpphhh.sc............lpSh+V.s.GsW.lhYEpssapG..cQalLc....pGEYtsapp.W.G.....ssp...........ltShRhl ....................................plhlaEcp.sFp....G.c.phEhs....s....Ds........sslps.h.....h...a.sp...............spSl+.V.s..G...sW....l..hYEpssap....G......pQa.lLc........p.G.-.Y.....p..a..pp...W..G........t.ss.p.........................ltShR.l......................................... 0 133 378 969 +1662 PF02633 Creatininase Creatinine amidohydrolase Mian N, Bateman A anon COG1402 Family Creatinine amidohydrolase (EC:3.5.2.10), or creatininase, catalyses the hydrolysis of creatinine to creatine [1]. 24.40 24.40 24.60 24.50 23.90 24.30 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.52 0.70 -5.28 163 1362 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 981 81 528 1293 626 234.00 26 90.37 CHANGED hsphoh..s-l.pph.....tpt..sssllPlGu.sEQHGPHLPluTDshlupslupcsspphs................shlhPslsh......Gh..SscHh.s.....FP...GTlolsspThhsl.lp-lscSlt.ppGh++llllNuHGG..Nhshlphsscclptc............shhlhshsaaph..s..t........hhsthttptshHAGthETSlhLtl....tP....-hVchs+.............................ts.thhsht..thhhshthpc...hsssGshG...-sst..AoAE....+Gctlhcthscthsphlp ........................sh.-htth......tps.....sssllPlGu.sEQHGPHLPluTDshlupslu...pp..lspphs................shlhPslsh....................Gh.....u...c.Hh.s........................aP.....G.T.lolss.pshh..sh.lt-lscult.p.pGh...++...........llhlN................uHGG..........Nhss....l.pts...spclptc................................shh.h.h.h..h...s.a.h.ph....s.........................h.t..t.t..t.ts...hHAGthETSlhLtl....tP.-h.Vchsc............................h.t.t.hhp.h.t....sh.hhs..hthpp.....hsss.....G.shG...csst...Aos-......cGctlhpthspthsphl...................................................................................... 0 165 359 459 +1663 PF03858 Crust_neuro_H Crustacean neurohormone H Finn RD anon DOMO:DM02710; Family These proteins are referred to as precursor-related peptides as they are typically co-transcribed and translated with the CHH neurohormone (Pfam:PF01147). However, in some species this neuropeptide is synthesised as a separate protein. Furthermore, neurohormone H can undergo proteolysis to give rise to 5 different neuropeptides [1]. 21.00 21.00 21.10 21.00 20.90 20.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.00 0.72 -3.37 8 51 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 23 0 0 56 0 38.60 54 29.06 CHANGED RSA-GaGRM-RLLASL+Gsu-o.sPLu-LpGA.E.uuuHPLE RSApGaGRM-RLLASL+..u-s.sPlusL..s.E..ussHPLE.............. 0 0 0 0 +1664 PF01147 Crust_neurohorm Crustacean CHH/MIH/GIH neurohormone family Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 26.20 25.00 23.70 24.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.08 0.72 -4.67 45 264 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 93 1 38 277 0 69.90 40 61.43 CHANGED plaDpsC+Gla.sRslFp+L-RVC-DCYNLaRcsplsotCRpsCFsNphFttClcsLhh.c.phccapphlphl.s ........................haD.sC+Gla.DRslFp+L-RVC-DCYNL..a..R..p..stlsstCRpsCF.........sNphFttClcsLhh..-..phcphtthlphl................ 0 12 15 35 +1665 PF00525 Crystallin crystallin; Alpha crystallin A chain, N terminal Finn RD anon Pfam-B_97 (release 1.0) Family \N 23.20 23.20 23.20 23.40 23.10 23.10 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.29 0.72 -3.76 11 395 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 264 24 81 297 0 51.50 65 39.75 CHANGED MDIsIQHPWhRRPhas..aaPSRlFDQhFGEHl.-uDLFPsh........sslSPaYh+...P.hhRhPS ..........................R.....u...LGP....hhPSRLFDQFFGEGLhEYDLLPhh.............SSTISPYYRQS.....LFR...oV............... 0 5 12 29 +1666 PF03783 CsgG Curli production assembly/transport component CsgG Finn RD anon COG1462 Family CsgG is an outer membrane-located lipoprotein that is highly resistant to protease digestion. During curli assembly, an adhesive surface fibre, CsgG is required to maintain the stability of CsgA and CsgB [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.25 0.70 -5.25 61 1246 2012-10-01 20:48:06 2003-04-07 12:59:11 9 33 1090 0 274 845 586 190.90 30 65.37 CHANGED tshshsth+hsluVhpF..cspou........at.s..........SshsssluptusshLlstLppo...stFsllERpsLpslhpEppl...............ssphssLpuAshllpGulspaspss.t.tsG........hphhGlhu......psphppshspVsLRlVslpTucVlhSspssuchtspshpsu..............................hhthhsu.suhhsscslslAlppAlpp..hVptl.s.th.tuhWp..sp .......................................................................s..h.hsss+h.luVhph..pscou..............a..h.h........Sshsstlsppu.psh.....Llo.t.L..p..p..S....p.h..F.hsL.E..R..p..s..L.pslh.p.Epplhps.s..p............s.pph.l........psLpu.AshhlpGuI........h..s.......ap.spst..s.st.lG..............tphaGIhu...........................................ss.phpls.pht..VsL+lVsVsTu.Ell.S........sp.sutphh..phpss..............................h.th.htu....uhhsscsl.lslhpAlpphV.hl.s.Gh.p......s................................................................................ 1 75 168 224 +1667 PF02599 CsrA Global regulator protein family Bashton M, Bateman A anon COG1551 Family This is a family of global regulator proteins. This protein is a RNA-binding protein and a global regulator of carbohydrate metabolism genes facilitating mRNA decay [1]. In E. coli CsrA binds the CsrB RNA molecule to form the Csr regulatory system which has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis [1]. In other bacteria such as Erwinia caratovara RmsA has been shown to regulate the production of virulence determinants, such extracellular enzymes [2]. RmsA binds to RmsB regulatory RNA. 20.70 20.70 20.70 21.60 20.60 19.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.60 0.72 -4.24 137 1995 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1669 7 426 998 255 53.20 54 75.78 CHANGED MLlLoRKhGEsIhI...G.D.-IpIsVlplc..Gsp....V+lGIcAP+slslaRcEl......YpcIppc .............MLILTR+lGEolhI...............G.D...-l.sVT.VL..u.V+...GsQ....VRIGlsAPK-VuVHREEI..Yp+IptE................... 1 175 279 361 +1668 PF02554 CstA Carbon starvation protein CstA Bashton M, Bateman A anon COGs Family This family consists of Carbon starvation protein CstA a predicted membrane protein.\ It has been suggested that CstA is involved in peptide utilisation [1]. 25.00 25.00 28.10 27.70 24.80 24.70 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.31 0.70 -5.81 97 3680 2012-10-03 01:44:59 2003-04-07 12:59:11 9 5 2335 0 644 2537 205 319.50 39 60.88 CHANGED h.sulhlllsulssahluYRaYupa.lup+lht.lDssRtTPAcphpDGhDYVPTs+h.VLFGHHFAuIAGAGPlVGPllA.AtaGaLPuhLWIllGslhuGAVpDahsLahShR+cG+SlGpls+cplGphushhhhlhshhlhllllAlhuhlVspsh..............sp...........oPh.....................ushslhhsIPlAlhhGha...hah+suplhtsSllGllLlhhulhhGhh.ls........................................hh.hstpphsh..llhsYualAolLPVWlLLtPRDYLooahhluslshLslGlll.....stPplphP.AhTp......F.h...sGssPhhsG...slFPaLFITIACGAlSGFHuLluSGTTsK.lspEscs+hlGYGuMLhEuhlAlhAllAAsslt....................uhYauhp......................................................................Ghs.....sstlsphup....slG ............................pshhhllsulshhhluYhhYuha.l..tpplh..l.-..ss.R.hTPAhh.sDGhDYVPss+h.llFGHHFuu..IAGA.GPllGPlLA.A.hGalPuhlWllhGslhAGAVpDahsLhlShR+sGtSluphhpcphG.hsthlhhhhshhlhlllhAVhu.....h....llspsL................s.t............sPh..................................................uhhs.lh.hlslAhhhGhh...hhhts...sp..hh.hohlGhhhh.hhslh.G...l.........................................h.hptsthsh..hlhsYsFlAulLPVWhlLuPRDYLsoahhlssllul.sl.Glll...................htPpl..p...hP...uhop.....a.h................ssssP..h..hsu............slFPhLFITIACGAlSGFHAhluSGTTsKhlt.............pEppuphlGYGuMlhEuhVAlhAhluAs.h.l.........su.hh.th...............................................................ht.s..hh............................................................................. 0 219 401 536 +1669 PF00859 CTF_NFI CTF/NF-I family transcription modulation region Bateman A anon Pfam-B_362 (release 3.0) Family \N 19.50 19.50 20.10 19.70 19.10 18.60 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.12 0.70 -4.65 4 440 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 51 0 132 328 0 227.10 51 56.35 CHANGED QDSFVTSGVFSVoELVRVSpTPIssGTGPNFSLuDLpSpsYY.shsPGu..hRRoLPSTSSSuS.KRhK..SME--hD.SPGt-saYo....SPuSsopSS.sWHE.hEsuhsSP.phpc.-Ks.FsssSPpppSshhSuFsQ+H+Psl.....os.tsSPHso.SsLHFPTSsIl.QpPuoYFsHsAIRY....pPQ-sLK-aVpLsCssuuQQuGQ.....PNGSuQ..GKV.s.FLsTPMLsPPPP..PshARPVsLshPDTKPsTTSTEGGusSPTSPoYSsPuTSPANR.FVulGPRDPuFl...pQsQSWYLG ...............................................pDsFVpSGVFsVoELVRVSpTPlssGo.....GP.NFSlu-.L...pSp.sY.Y..s....hss......u.....s.....h+RSLsSssSo............ss.K........R...K.........Sl-.-.pM-...SPst-..........FYs....SPu...ususu.S.t.sWp-..h-.............s.................s.............h......su.P....ssh....KKs.tK...FsS.sosppsSshh.sFspHt.hPs..l.....ss.....ssSP+so..sSsLHFPoosIl..QpsusYFoH...PsIRY...hpP.QDsLK-aVphsCs..s.u..u.tQ.s.u.p.........................suusQ...+h..s.h....hLsPs........P.......h.h............................t........................................................................................................................... 2 9 23 64 +1670 PF01467 CTP_transf_2 Cytidylyltransf; Cytidylyltransferase Bateman A anon Bateman A Domain This family includes: Cholinephosphate cytidylyltransferase Swiss:P49585. Glycerol-3-phosphate cytidylyltransferase Swiss:P27623. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.99 0.71 -4.17 65 14199 2012-10-02 18:00:56 2003-04-07 12:59:11 21 74 4889 196 4168 10545 6936 142.60 19 60.12 CHANGED lhsGoFDPlHhGHlpllcputphhs......llhlssspsspp..p....shhstpcRhchlctshtsst.....................................................h.hlhstc.....................hpthpchltchphshhs+s.s..thpt.....................t.....hhhhhhh.htp.......lSootl+pt .......................................................................................................lhsGoFDPlH..h.........GHl.shlppA...t......p.....h......h.......c.........h..........................llh..ls....s.s..t....s..s..p.+...p............................shh.s.h.ccR....hth....l....p....t......s....h......t.....s........................h...........h.......p.......................................................................................................................................................................t.......h..hl..h.ttc.........h...........................th..p..th....p..c..h..h...t......p...h......p.....h...s...h..hs..+.s...t...th.pt.h..........................................................................................h..h.........h.................lSoo.l+p.h........................................................................................................................................................................................................................................ 0 1392 2560 3483 +1671 PF02348 CTP_transf_3 Cytidylyl_trans; Cytidylyltransferase Bashton M, Bateman A anon Pfam-B_886 (release 5.2) Family This family consists of two main Cytidylyltransferase activities: 1) 3-deoxy-manno-octulosonate cytidylyltransferase, [3], EC:2.7.7.38 catalysing the reaction:- CTP + 3-deoxy-D-manno-octulosonate <=> diphosphate + CMP-3-deoxy-D-manno-octulosonate, 2) acylneuraminate cytidylyltransferase EC:2.7.7.43, [1,2], catalysing the reaction:- CTP + N-acylneuraminate <=> diphosphate + CMP-N-acylneuraminate. NeuAc cytydilyltransferase of Mannheimia haemolytica has been characterised describing kinetics and regulation by substrate charge, energetic charge and amino-sugar demand[4]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.47 0.70 -4.71 20 4289 2012-10-03 05:28:31 2003-04-07 12:59:11 14 33 2780 54 1074 4236 6196 213.60 26 80.94 CHANGED lsIIPARhuSpRL.sKsLtpltGcPllt+slcsAhpSthh-+....llVATDscpltchsppaG..spshhpssshsssscRshcslcchhss........hllslpGDpPhLpspsltphhppltps.t...................hsshstsls.sppshpssslcsshcpp......shthahpcushsahpcp.s.................hhtchulYsaRpt....hhhcaststsos...........hc.hcpl.........cQh+.....hhhtu................pcI .......................slIPA.Rh...uSoR..l...P.....s....Ks...Lt....c...l......s...G.....K.....P..hIhash.-...p....A.....t...p.u....s...t....h.c.c.........l.l..V...A...T.....D....c...c...c....l....t....p....s....s......p....t.hG..........scl.h..h.s....p...s.c..p.t....o....u....o.....c.......R....h....s...-..s....l..p.c...hshs.....tp....................llls..l...Q.G....D..p....Phlssshlpp....sh..p...t..l...t..ps...s...........................................................th.s.o...h...s...h.........l....t........t....t....p........c...h...h..s........s...s.s......l.K...s....l....h..ctp..................s.u...h...a...a.o+ss..lPa....+....p.t...t.....................................................hh.t+..l.u...lYsactt...........hl.pas.t.....s....................hE.hE.l...................-ph+.....hh..................................................................................................................................................................... 0 335 680 894 +1672 PF04808 CTV_P23 Citrus tristeza virus (CTV) P23 protein Kerrison ND anon Pfam-B_2595 (release 7.6) Family This family consists of protein P23 from the citrus tristeza virus, which is a member of the Closteroviridae.\ CTV viruses produce more positive than negative RNA strands, and P23 controls this asymmetrical RNA accumulation. Amino acids 42-180 are essential for function and are thought to contain RNA-binding and zinc finger domains [1]. 20.70 20.70 21.00 20.70 20.20 19.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.44 0.70 -5.01 3 175 2012-10-01 20:03:16 2003-04-07 12:59:11 7 1 1 0 0 178 0 204.80 91 100.00 CHANGED MDNTSGQTFVSVNLSDESNTAST-VEsVSSEADRLEFLRKMNPlIIDALIRKNSYQGARFRARIIGVCVDCGRKHDKALKTERKCKVNNTQSQNEVAHMLMHDPVKYLNKRKARAFSNAEMFAIDLVMYTKERQLAVDLAAEREKTRLARRHPMRSPEETPEHYKFGMTAKAMLP-INAVDVGDNEDTSSEYPVSLSVSGGVLREHHFI ......MDsTSGQTFlSVNLSDESNTASTcV+sVSSEADRLEFLRKMN.PFIIDALlRKTsYQGARFRARIIGVCVDCGRKHDKu.+TERKCKVNNTQSQNEVAHMLMHDPVKYLNKRKARAFSNAEMFAIDLVMYTKERQLAVDLAAEREKTRLARR.HPMRSPEETPEHYKFGMTAKAMLPDINAVDVGDNEDTSSEYPVSLSVSGGVLREHHFI...... 0 0 0 0 +1673 PF01179 Cu_amine_oxid Copper amine oxidase, enzyme domain Bateman A, Finn RD anon Prosite Domain Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ). This family corresponds to the catalytic domain of the enzyme. 19.70 19.70 20.10 21.40 19.50 18.30 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.16 0.70 -5.75 75 1272 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 545 187 699 1240 205 371.60 33 57.94 CHANGED Plpl....sQP-GsSFpl.c.....GptlpWppWpF+lGFssREGlsLaslpapscs.........lhYRlSlsEMhVPYGDPpssahpKpsFDsG-aGhGhhussLplG......CDChGt.IpYhDuhhssssGpPhplcNulClHEpDsG.lLaKHoshcs..s.ps.tspRsRpLVlphIsTluNY-YhahahFhpDGsIchEl+ATGIlsoss........hss.....spp.....ss...aG.shVs.sslhushHQHlFshRlDhslDG...t.p....Nolhth-shshsh.......sstN..Phusuapsccphlcsttpu..thchssppsRhaclhNssppN.h.GpPsuY+lhs......ssssh...llspssShhtpRusFAp+plWVTtYc-s.....EhauuGpassQssus......sGlssaltp...scs.......lcspDlVlWaThGhpHhP+sEDaP..VMPs-hhuhhL+PhsFFspNPuLDlPsssp ...........................................PhplhpPcGssapl..p......Gphlp.WppWpF+lu..hs.RtGhhl.slpapsps...........lhYchSlsEhhVPYu..sPp....sahh+thhDhG-aGhGhhsssLthG.................pDC..u.t.h.....tahDs.hhss.t...sGtPht.h...su.....lClaE...p....s.sG..ha+...Hppht...............t...s...hstcp+pLVlphlsTluNY-Y........h...........asahFtp...sGsI......phcspuTGllssts.......................hss..t..ts.....s....aG.ohls...slhushHpHha......shRlDhsl...D....G...........t...p....N.o.lh.t..-.s...hs...h.s....................sst..s..s.h.....s...ssh...p...sp...pph....lpsE.tpu..thch...sstp.sRhhhlsNss.ppNth.Gps.........suY+lhs......ss.st.h....hhhs.ss.s.h.hh.+R.ssF.sc..................pp....laVTpY..ccs......E.....hasuGtassps.ts..............suls.paht..p......scs..............................l.....c...s..p...Dl......VlW...hT..................hGhpHhP+s..EDaP..lM..P..s..p..hh..sh.hL+PhsFFspsPslss........................................................................ 1 162 358 544 +1674 PF02727 Cu_amine_oxidN2 Copper amine oxidase, N2 domain Bateman A, Finn RD anon Prosite Domain This domain is the first or second structural domain in copper amine oxidases, it is known as the N2 domain. Its function is uncertain. The catalytic domain can be found in Pfam:PF01179. Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ). 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.76 0.72 -3.90 14 944 2012-10-02 11:54:05 2003-04-07 12:59:11 11 27 479 119 492 882 81 87.20 27 12.37 CHANGED HPL-sLostEhs+spsIlpsps.hsts.shphp.lsLc-PcKphVhpa.....-ctssh..ssRcApslhhhuups..a.hllDLssuplsusphh ................................HPLssLostEI..pp....sst.llp........sut.................hts........s.....h........pFttlsLtEPsKttlh...sa................stssts........ssR..pA......l....l..h..h...s.....t.p........hcslV-..L..ssstl.p.p..h............................................................ 0 103 248 379 +1675 PF02728 Cu_amine_oxidN3 Copper amine oxidase, N3 domain Bateman A, Finn RD anon Prosite Domain This domain is the second or third structural domain in copper amine oxidases, it is known as the N3 domain. Its function is uncertain. The catalytic domain can be found in Pfam:PF01179. Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ). 20.60 20.60 20.60 20.70 20.30 20.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.13 0.72 -3.96 17 943 2012-10-02 11:54:05 2003-04-07 12:59:11 11 20 497 180 473 884 97 100.70 23 14.32 CHANGED sslhhcEhspsppllh..p.Ptatcslpp+G..tp.hspVhstPhosGaautts....ucRlh+shsahppsss....shas+Pl-slplllDhcshcVlchp-pt.hhsls ....................................lhh--atplppllp..ss.spatts.l..ccp..G...ls..s..hp.pVhssPh.s............s..........G..........h....hstps......spRlh.pslhahcssss........Nhau+Pl-.lsslV..D.hpptcllcl.c.t..hhsh................. 0 93 229 354 +1676 PF02298 Cu_bind_like Plastocyanin-like domain Mian N, Bateman A anon Pfam-B_398 (release 5.2) Domain This family represents a domain found in flowering plants related to the copper binding protein plastocyanin. Some members of this family (eg Swiss:P93328) may not bind copper due to the lack of key residues. 20.80 20.80 20.90 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.14 0.72 -4.19 26 1166 2012-10-02 17:41:00 2003-04-07 12:59:11 12 16 93 17 688 1154 2 82.90 31 43.81 CHANGED Wshsh......YspWAsu+pFplGDsLlFpYssphHsVhcVs.ctsYcsCpssp.shpsas.............sGssplsLsp.GhpYFICuhs..GHCp..tGh ..................................sYssWu.ps..p.p.F.p..lG.DsL............l.F.pY.......s......s.....s...........a....s..Vh.pVs....cssYcsCssss.....sl..t.s.h.s............................sG..s..s..t.l....s...L....s..p...s.G.tt.Y.F.I.suhs.....uHCptG............................ 0 76 388 550 +1677 PF03263 Cucumo_2B Cucumovirus protein 2B Mifsud W anon Pfam-B_4373 (release 6.5) Family This protein may be a viral movement protein. 20.90 20.90 41.70 40.80 20.60 18.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.55 0.72 -3.74 7 122 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 9 6 0 124 0 104.60 70 96.29 CHANGED tuuhosl-LpLA+lVEtK+pRRRSH+pNRRtRGaKSPSERARSpLR....LFphLPFatsDss-.hhshh+c.shs-LspsEss.....s.p.tshDDTDWFAGNEW.sEGSF ......t.GAhTNVELQLARMVEsKRQRRRSHKpNRRERGHKSPSERARSNLR...........LFRFLPFYQVDGSE.Lh...-hh+HssVsELsESEAsp.h..ut-DHDFDDTDWFAGNEW.AEGuF... 0 0 0 0 +1678 PF00760 Cucumo_coat Cucumovirus coat protein Bateman A anon Pfam-B_867 (release 2.1) Domain \N 25.00 25.00 49.30 43.20 18.60 16.70 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.13 0.70 -5.38 4 531 2012-10-04 01:49:40 2003-04-07 12:59:11 13 2 37 6 0 462 0 188.50 85 95.29 CHANGED sstpRRPRRGpRS....sssutDtsLRALTQQlsRLsphhAuutPTLsHPTFVuSc+C+sGYTaTSlsl+Ps+hEKsp.FGpRL.LPssVoEYsKKhVSplQlRlNP.PKFDSTVWVTlRKlPtoosLostul.thFsDGhSsVLlYQassoGlQsNNKllaDLSshtA-IGDMpKYAllVYSKDDsLEsDElVlHVDlEHQRIPouphL .............sRRRRPRRGSRS....AsSSADAsFRVLSQQLSRLNKTLAAGRPTINHPTFVGSERCKPGYTFTSITLKPP...KID+GSYYG...KRLLLPDSVTEaDKKLVS.RIQIRVNPLPKFDSTVWVTVRKVPASSDLSVAAISAMFADGASPVLVYQYAASGVQANNKLLYDLSuMRADIGDMRKYAVLVYSKDDALETDELVLHVDIEHQRIPTSGV.L..... 0 0 0 0 +1679 PF02376 CUT CUT domain Mian N, Bateman A anon Pfam-B_770 (release 5.2) Domain The CUT domain is a DNA-binding motif which can bind independently or in cooperation with the homeodomain, often found downstream of the CUT domain. Multiple copies of the CUT domain can exist in one protein (eg Swiss:P10180). 22.50 22.50 22.50 22.80 22.20 22.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.78 0.72 -4.18 26 1069 2009-01-15 18:05:59 2003-04-07 12:59:11 10 15 90 11 655 873 0 84.90 35 19.88 CHANGED sphssspplsTtcIscclcpcL+...................ctsIuQslFAchlLs...+SQGoLS-..LLp............cP.......KPWsphp.pG+psap+MppaLs.s..psp+thhhh...ppp .................t......pplsTtcIspcl+.ppLp...................+tsI..sQtlFuchlL..s...hS.Q.......GolS-..lLp.........................cP..............KPWs+Lp..pG+EsFh+MppaLptsps.+..hh....ph........................................... 0 96 146 428 +1680 PF00888 Cullin Cullin family Bateman A anon Pfam-B_1149 (release 3.0) Family \N 23.00 23.00 23.00 23.20 22.80 22.90 hmmbuild -o /dev/null HMM SEED 588 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.84 0.70 -5.98 47 2251 2009-09-12 05:56:55 2003-04-07 12:59:11 17 46 329 30 1484 2160 44 484.10 20 65.66 CHANGED Wstlp.ss.....lp..plh..............t......h..laptlhsh............................................p.hut...lYpthtp....hh.........ppaltt.h.tph.t.....................t...hl..h...t.W..a...h.hhtt.hh.a..hs..p.a.ltpp.........................h.h..apt.l.ht...htt.hh..hh..h.....R..p.....p.....htthhp.hh.h........................t..hY.p.Ft..hlptottaYp..s.thl.p..sh.pYh..s..thppEpppst.........alp.ps.t...lhthhpp.hltth.t.l......tth..hhttpp.pcl..ha.lhphh.tslp....hhthhpphlp..ptuhthhtp.........................................................................................t.ps.....hlpthlthht....hhppsF.t......c.....hhtslppsht.hls.t.....................spssEhluhahD.hL+put....pthsptp...................lcpplsplhhlhcalpsKDlFpcaYpphLA+RLlptpSss..-sEpphlstLKp.............................ttG.paTp+lptMhpDlplSc-lsppFc.phhtpp.t..........................................................shshslplLssshWPh.ssp...............hplPp.....clppthctFppaYppp+ss........R+LpWhhpLupsclphph.ttt...........................................h.hploshQhslLhhFNp...............pcplohc-ltpttplstc..tLpc.tL.pslhpsphhh............p..pspphssssthplNppas .......................................................................................................................................................................................................................................................................................................................................................................h............h....h.................................................................................s......lat..h.............h..........................th.h..t................................................................hl..h.......Wt..pa....................h.h..t......hh.h...h....s...p......h...h.tp.....................................................................................................................................thhh..apt..h..h................h.....t......h.t.............h......h..............lt...p..................R....s........t............p............h..th..h.t.h...................................................................................ha.....ac..hlt.stt.hat..s..................h.t........h.....ah...........s..th.l..pE...hh.................h.h.........t...................l..t.h.p.hl..............t..h.........................tth.....h...h.....t.........t.........t.......p...........t...............p......l...t....h...a......h...h...t........t.............h.............t.......t........h............h..h.t.ht.....thlh..p.u.thh.t...................................................................................................................................................................h...ltt...h..lp...hh.p.p.h....t.h.....h..t........sFtt...........................................p.......h....t.shp.p...s....h.t...h.ls.t............................................................spssEh..l..........u..ha.......h.D...hl+ts....................tp..p........................................................................................................................hcp.h....h.pp...........hhh.l...h..p.hl.....p.s.....KDlF.paYpp.hL.ucR.L....l......................t.....p..........o.....h................s...............-..............t........Etphl..p.h..L+t..................................t.h.G.....p.h...s.p.p.hptM.hp.....Dh.p.h.S..p.ch..t.p.app....h.htpt.........................................................................................................................thphph.lLsts.......h..W..Ph............................................htlP..............phtph.hp.ap.pa..Y..t...ppss...........................R.+L..pW......phupsplp..hp.h...t.t.t........................................................................................................hph..p.loshQ....hhlLh.h....FNp..................................tp.p..ho.hppltp.....t..s.....t..l.....t.....Ltp.tl...t.l...h...p.tthh.........................t....................................................................................................................... 0 530 800 1185 +1681 PF03091 CutA1 CutA1 divalent ion tolerance protein Mifsud W anon Pfam-B_2307 (release 6.4) Family Several gene loci with a possible involvement in cellular tolerance to copper have been identified [1]. One such locus in eubacteria and archaebacteria, cutA, is thought to be involved in cellular tolerance to a wide variety of divalent cations other than copper. The cutA locus consists of two operons, of one and two genes. The CutA1 protein is a cytoplasmic protein, encoded by the single-gene operon and has been linked to divalent cation tolerance. It has no recognised structural motifs [2]. This family also contains putative proteins from eukaryotes (human and Drosophila). 22.10 22.10 22.20 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -4.35 13 1667 2012-10-01 21:59:08 2003-04-07 12:59:11 10 8 1551 85 550 1191 568 99.10 39 84.17 CHANGED hhllhlTsPs.-pAcclA+pllEc+LAACVNllspIcShYhWEGcIpcDsEhhlIlKTpspphspLpccl+phHPYsVPEIluLPlppGspcYLpWlcpslc ..............h.hlVhsTsPsc.ts.ApclAttllp..c+LAACsslls.shpSl...YhW.....c.....G.....+....l.....cp-.t.ElthllKTss..sphpsLhcpl..+phHPYpsPEllslPlsp..GsssYLsWlstsh.p....................... 0 154 313 430 +1682 PF03932 CutC CutC family Bateman A anon COG3142 Family Copper transport in Escherichia coli is mediated by the products of at least six genes, cutA, cutB, cutC, cutD, cutE, and cutF. A mutation in one or more of these genes results in an increased copper sensitivity. Members of this family are between 200 and 300 amino acids in length are found in both eukaryotes and bacteria. 21.20 21.20 21.30 21.60 20.80 21.10 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.36 0.71 -5.06 8 2302 2012-10-03 05:58:16 2003-04-07 12:59:11 9 10 2140 20 426 1510 277 197.20 39 85.23 CHANGED hllElCl-slsulhsApsuGA-RVELCuuLulGGlTPShGllcsAsc...pusIPlasMIRPRGGDFlYs-pElcsMtpDlcss+chGssGlVlGsLsscGslDhcthcpLltAAp..GLuVTFHRAFDhssD..PtcALEpLIcLGs-RVLTSGtt..ssAl-Gh-pLtsLVsQAusRIpIMuGAGlsApNltcLsptTGlsElHuSuts .....................................................h.hlElCsts......hpsshpA.t...psGAcRlELC..s..s..h.s...G.GlTPSh...G....llcp.s...hp.......................ph.sl...P.lhshIR.PR.....u...........G..............D......Fs.......Y..s-tEhth.MhcDlphs+c..h...........G............s........s......GlVhGsLss...-.G.............p...........l........Dh.....s.th.cclh.s.......s.u..t..................sh..s....l.....TF.HR.....A.FDh..s......s..s................h...p..A.....lc...p.......L.h....p....h.G...l.s..RlLTS..........Gtt....sss.....p..u..l.sh....l.....pc.L.....l......t...p.....u....s.........u.........p.............I.............Ihs...........Gu.GlpscNlpphh.c.......sG.lpplHsot.s............................................ 1 136 238 337 +1683 PF01083 Cutinase Cutinase Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.17 0.71 -4.54 23 1537 2012-10-03 11:45:05 2003-04-07 12:59:11 17 11 503 75 597 1612 26 189.00 24 66.18 CHANGED ssCscltllFARGosEsush...ussGsshsssLputhGusslul.uVt..YsAshsp........tsosssGssshtshlspssspCPsTplVluGYSQGuplhssAlst..............lsuustspltuVllFGsPps........spsls.......................slssh.suK..shshCssuDslC..usuhshss....Hh..sYss-.h.sspAssFlts+ls ........................................................s..C.sh.hhhA.RGot..-...........s.....s..........s.......h..........s.........h.s.......s....h.h...s.sl......p......p...p.h.....s...........p..s.l...ssh..s..Vs........Ys..Asht.........................t..os.stGs...p.s.h........sstl.....pp.h......s......s..p.....C.P.s..T.....clVLuGY.....SQGAslhstshss...................................................sh.s.s.s..s...s..s..+.l...su..l....sLaGs.Ppp..........sts.hs.................................................................shs..s...h..s..s.+...s....hplCs.s....u..D..sl.C.......ss...................s.............H.....Y...........u..hh................................................................................................................. 0 135 324 513 +1684 PF01473 CW_binding_1 Putative cell wall binding repeat Bateman A, Mistry J, Russell R anon Bateman A Repeat These repeats are characterised by conserved aromatic residues and glycines are found in multiple tandem copies in a number of proteins. The CW repeat is 20 amino acid residues long. The exact domain boundaries may not be correct. It has been suggested that these repeats in Swiss:P15057 might be responsible for the specific recognition of choline-containing cell walls [1]. Similar but longer repeats are found in the glucosyltransferases and glucan-binding proteins of oral streptococci and shown to be involved in glucan binding [2] as well as in the related dextransucrases of Leuconostoc mesenteroides. Repeats also occur in toxins of Clostridium difficile and other clostridia, though the ligands are not always known. 20.50 9.80 20.50 9.80 20.40 9.70 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.74 0.73 -6.64 0.73 -3.57 213 28133 2009-09-16 13:19:13 2003-04-07 12:59:11 15 543 547 186 1455 18171 72 18.80 40 20.20 CHANGED suWhp.h..s..ssW.YYh.sssGsM ............sGWhp.......s...upW.YYh..sssGtM........ 0 419 1058 1118 +1685 PF04122 CW_binding_2 Putative cell wall binding repeat 2 Kerrison ND, Finn, RD anon COG2247 Repeat This repeat is found in multiple tandem copies in proteins including amidase enhancers [1] and adhesins [2]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.12 0.72 -3.78 99 3897 2009-01-15 18:05:59 2003-04-07 12:59:11 7 210 151 0 926 3881 25 92.20 26 35.86 CHANGED pRl.sGss...RY-Tuhp....lupp..........h..........h..sssslhl..s....sG....ps..asD.ALuuuslAup............pssPllLss....ssh...psstshlpsh...........t.pplhll..GGpssls...pp....lhpp ..............................................Rl.uG..ps...Ra-Tuhp....lucp.....................h.t............................t.....sssp.lh.l..s.............sG...........ps......hsD..ALuuuslA.sp..............................t.s.u..PIlLss.......tssls....ssstshl.pph.........................tspplhll..GG..p.sslsppl...t........................................... 0 554 857 897 +1686 PF03638 TCR CXC; Tesmin/TSO1-like CXC domain, cysteine-rich domain Bateman A anon Pfam-B_1144 (release 7.0) Domain This family includes proteins that have two copies of a cysteine rich motif as follows: C-X-C-X4-C-X3-YC-X-C-X6-C-X3-C-X-C-X2-C. The family includes Tesmin Swiss:Q9Y4I5 [1] and TSO1 Swiss:Q9LE32 [2]. This family is called a CXC domain in [2]. 20.30 20.30 21.20 20.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.62 0.72 -4.10 90 900 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 139 0 617 883 19 40.70 48 14.71 CHANGED ppp+sCsCKKSpCLKhYCECFtsGthCsp..CpC.psCpNptpp ...............pp+sC.sCKKStC...LKhYCECFt...........uu..hhCss...CpC..psCpNp........ 0 297 406 520 +1687 PF03128 CXCXC CXCXC repeat Bateman A anon Pfam-B_252 (release 6.5) Repeat This repeat contains the conserved pattern CXCXC where X can be any amino acid. The repeat is found in up to five copies in Vascular endothelial growth factor C [2]. In the salivary glands of the dipteran Chironomus tentans, a specific messenger ribonucleoprotein (mRNP) particle, the Balbiani ring (BR) granule, can be visualised during its assembly on the gene and during its nucleocytoplasmic transport. This repeat is found over 70 copies in the balbiani ring protein 3 Swiss:Q03376. It is also found in some silk proteins [1]. 13.20 13.20 13.20 13.20 13.10 13.10 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.42 0.74 -6.50 0.74 -3.42 116 305 2009-01-15 18:05:59 2003-04-07 12:59:11 9 19 71 0 133 258 0 13.90 43 10.39 CHANGED sphWscpoCpCtCs ....sppaDcsoCpCsC.. 13 46 52 85 +1688 PF02560 Cyanate_lyase Cyanate lyase C-terminal domain Mian N, Bateman A anon COGs Domain Cyanate lyase (also known as cyanase) EC:4.2.1.104 is responsible for the hydrolysis of cyanate, allowing organisms that possess the enzyme to overcome the toxicity of environmental cyanate. This enzyme is composed of two domains, an N-terminal helix-turn-helix and this structurally unique C-terminal domain [2]. 25.00 25.00 25.80 32.60 24.80 20.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.45 0.72 -4.44 25 725 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 688 80 229 524 63 73.00 59 47.15 CHANGED shp.slPoDPhIYRhYElVhVYGsolKplIpEcFGDGIMSAIDFsh-l-+.psPc.GD.RVVlThsGKFLPY+pa ........sh...stlPTDPslYRFYEhlQVYGsTlKALlHE+FGDGIhSAIsFclDVcKlsDPc.G-.RsVITLcGKaLPhKs...... 0 59 131 184 +1689 PF04199 Cyclase Putative cyclase Bateman A anon Pfam-B_1440 (release 7.3) Family Proteins in this family are thought to be cyclase enzymes. They are found in proteins involved in antibiotic synthesis. However they are also found in organisms that do not make antibiotics pointing to a wider role for these proteins. The proteins contain a conserved motif HXGTHXDXPXH that is likely to form part of the active site. 20.80 20.80 20.80 20.90 20.70 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.41 0.71 -4.57 76 2705 2009-09-13 14:05:17 2003-04-07 12:59:11 8 22 1704 5 1053 2458 695 168.90 21 65.07 CHANGED lhDLohslp..............pt.h........shh............h....................tt.hsh...pt....shsspt.lp.h.ssHsGTHlDu.sH........h................s.................sl-plPlp....hhs.pulllDlsp.................hsst-lptshpt.th...plptu-hVll+T......................Gh.................tphh..sstp.ahsp..........PGlsh-uucaLh.cpGl...pslGhDshuh...........-ts. .........................................................................hDlo.shp..................t..h........shas...........htht.h............................................tt........tshtspp..lp.h..ssp..s.G..THhDAPhH.........................................................h.tsst.........................................sl-p.l.slct.............hhu.......ulll..Dhsp.....................................ssthl.shpclt..tthtt.th.........pl.pt.u.c.hVll+T..........................................uh.......................................pp...hh...............s...s..tp...hhps................hs.uh....s.......-........ss..caLh....-..p..sl......pslGhDshuh-...s.............................................. 0 330 674 904 +1690 PF02984 Cyclin_C cyclin_C; Cyclin, C-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Cyclins regulate cyclin dependent kinases (CDKs). Swiss:P22674 is a Uracil-DNA glycosylase that is related to other cyclins [4]. Cyclins contain two domains of similar all-alpha fold, of which this family corresponds with the C-terminal domain. 21.20 21.20 21.20 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.72 0.71 -4.17 240 3032 2012-10-03 00:42:12 2003-04-07 12:59:11 14 43 449 158 1706 2961 42 116.40 20 28.12 CHANGED PTshsFLcR....h.t+...........s...sph..................s....p........hcplupalhE......Lolh-ap............hlpahPShlAAuA..lhlupph......Lt.....t.............WspsLptao.uY...stspLtsssphlh....phhh...pss.....tsph.p..ulhcKYsppchtp.........lu.hhss. ..............................................................................ssshpFLpp........h..h+................h.........sph...........................s..p..........hcphupahhE........................lol..h-hp..................hlp.a.PS.hlAAuu..lhhAtph.......lp....t...................................Ws.p..p..l..t..t.ho..sh.................s.p...p...l..t......s....s.hp...hlh..phh............p.....................thhp+ht...............h....................................................................... 1 471 819 1258 +1691 PF03784 Cyclotide Cyclotide family Bateman A anon [1] Domain This family contains a set of cyclic peptides with a variety of activities. The structure consists of a distorted triple-stranded beta-sheet and a cysteine-knot arrangement of the disulfide bonds [2]. Cyclotides can be separated into two subfamilies, namely bracelet and moebius. The bracelet cyclotide subfamily tends to contain a larger number of positively charged residues and has a bracelet-like circularisation of the backbone [2]. The moebius cyclotide subfamily contains a backbone twist due to a cis-Pro peptide bond and may conceptually be regarded as a molecular Moebius strip [2]. 20.30 20.30 20.50 20.50 17.20 19.80 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.22 0.72 -4.07 30 259 2009-09-11 23:07:15 2003-04-07 12:59:11 8 4 30 33 0 274 0 29.80 57 44.52 CHANGED G..lP.CGESCsalP.C...tssG.CSCcs+...VCYhN .......GlP.CGEoCVhhP.C...sshG.CSCpsp...VCY+N.. 0 0 0 0 +1692 PF00548 Peptidase_C3 Cys-protease-3C; 3C cysteine protease (picornain 3C) Bateman A anon SCOP Family Picornaviral proteins are expressed as a single polyprotein which is cleaved by the viral 3C cysteine protease. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.45 20 4118 2012-10-02 13:45:52 2003-04-07 12:59:11 15 54 450 68 1 3087 3 135.60 48 11.78 CHANGED GPsh-F....uhuhh+pNlhslsT...........spGcFTtL..GlaDplhV.....lPpH..upsscsIhlsGppsplhD....uhpL.lcppGssLElTllpL..cR...NEKF.RDIRpals...pphpcts-ssLslNosphsphhlsVGpVsthGhl.sLuGssTpRsLhYsaPT+sGpCGGVlhs....sGKllGlHlG.GNG ...........................................................................................................................................................P..-h..thhht.Nh.h.sph...........tp.s...h.h.hh...G..lhsphhl.....lPpH........ut.h...plhh.....p..t...t...hph.-.........thtl...stpsh......h.....-lsllpL..cp...s..p+F.RDIptals............pp.s.t..h..sss..shhl.......s.s....pc.hss..hh....l..s.l.us.....ls......G...h...l......s...hs......G.......p............o..t.+...h...hhYp..a..s.T+....A.GQCGGVl.h.u...........s.GKllGIH.lG.GNG........... 0 0 0 1 +1693 PF00007 Cys_knot Cystine-knot domain Sonnhammer ELL anon Published_alignment enriched with PDOC00234 members. Domain The family comprises glycoprotein hormones and the C-terminal domain of various extracellular proteins. It is believed to be involved in disulfide-linked dimerisation. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.85 0.72 -11.43 0.72 -3.95 24 1416 2012-10-02 16:54:34 2003-04-07 12:59:11 17 22 417 7 416 1343 0 92.30 29 44.70 CHANGED t.hCp.hN...hTlplE+csCshClslpTTICsGaChTh-...sh.ssh..h.QplCsac-hpYcohclPsCPsGssPhhoYPVAhSCcCs..tCstssoDCsh.thpsshC .......................h...C..hp.......hsht.h..ch...p..t...Cs.s..h.shp.sshC.GhChp.........................s....s.....t................p..sC..shtchph.psshl.......sCP...s.....G......s.s...s...h...h..o...ash...sh...oCpCs...tCpts.ss.h........................... 0 44 77 214 +1694 PF01053 Cys_Met_Meta_PP Cys/Met metabolism PLP-dependent enzyme Finn RD, Bateman A anon Pfam-B_366 (release 3.0) Domain This family includes enzymes involved in cysteine and methionine metabolism. The following are members: Cystathionine gamma-lyase, Cystathionine gamma-synthase, Cystathionine beta-lyase, Methionine gamma-lyase, OAH/OAS sulfhydrylase, O-succinylhomoserine sulfhydrylase All of these members participate is slightly different reactions. All these enzymes use PLP (pyridoxal-5'-phosphate) as a cofactor. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.10 0.70 -6.18 30 12248 2012-10-02 18:26:03 2003-04-07 12:59:11 15 41 4308 161 3561 25261 14126 368.30 34 93.53 CHANGED sThtlHuGpp.c...spuAlssPIYtoooatas........tsscpsG.a.YoRpGNPTpssLEcplAtLEuu..stulAhuSGhAAl.sslhsLlcs.GDc.....llsssslYGG.....Taphhcpshs+.hGlpssaVDs....schsslcpAlps..sTKhlalETPoNPhhclsDIttluclA+cp..s..llllVDNTFsoP.hlppPLcLG..ADlVlHSATKYlsGHuDVluGlllsps.tclspchthht........................................sshGusluPaDuaLlhRGl+TLsLRhcppspNAhplAcaLcpc....P....pVcpVhYPGLsoHPpH-lA++QhpGhuu..hlSFclcus............tupchlcsl+LhohAtSLGusESLlptPAsMTHuslstEpRtstGls-sLlRlSVGlE-h-DLlsDLcQALc .............................................................................................................................Thhl+uG..p.c.............tt.su.hssPI..a.o................ooa.h.....as.........................................................................h..........h..........t..........p........t..............t.......G.....h...............Y.o...R...h...s.......N................P...T....p...p...s...l....E.....p....t....l..A.t.......L..........E..........G.....G..............t.....s...........u..........h.........s....h...u........S........G.................h.....A.......A....l.............h......s......l.....h.....s.......l.........h.......p....s.....G..D....c.......................l...l........s........s........s........s.....l.........Y......G...G............................T.h...p...l.......h........s.......p.......h......h........t.......+.......h......G...........l......p.........s.....s.......a...l.....D..s.......................s...s......h......p.......s......l........p.......t.....s.....l....p........s............p......T.............K.............h.....l............a.....l....E.....o....P.......u....N....P.....h....h............p....l...s....D...l...t...t....l....u....p.....l.....A.......+...pt..........u...........h........h.........l........l...........V.........D............N.......T............a.........s..........o...............P......h.......h............p..........p.........P.......l......c........h...........G..........A......D.....l.......V..l..H..S..u....T..K..........Y..l........s.G.....H..u..D.s....l.......u..G....s.l.l...s....s........s...................p...ht....p...p..h..t..h..h..t....................................................................p.s.h.G....s....s....l..u...P...h...s..u...a....L......l..l..+.G...L....c....T....L...s....l...R....hc...p.....p...s....p.N..A.....h.p.l.A....c..a.L...ps..+.....................P.........................tV.p...p..V....hY...P........u......L.........................s.......p..............t.....a......p.....l...t..p............+.......................h...........p..............G................t.........u.......u...........l..lSFplcss.....................ttstphl....ssL....c....l.h.s....hutS.lGsscS.L.l....h..a.....P.....u...s.....h....T..H......s....p....l.....s...t..c.....t.....p.....t...t..t..G.........l..ss.sL..lRlSlG.lEc.s-.DLlsDLcpAL.t......................................................................................................................................................................................................... 0 1060 2191 2996 +1695 PF00839 Cys_rich_FGFR cys_rich_FGFR; Cysteine rich repeat Bateman A anon Pfam-B_297 (release 3.0) Family This cysteine rich repeat contains four cysteines. It is found in multiple copies in a protein that binds to fibroblast growth factors [1]. The repeat is also found in MG160 and E-selectin ligand (ESL-1). 20.60 20.60 20.60 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.16 0.72 -4.04 85 1785 2009-01-15 18:05:59 2003-04-07 12:59:11 12 15 137 0 1097 1610 20 58.80 24 71.91 CHANGED pcCc.ptlhph..pp..spDh+.....lsstLhpsCcp-l...pc..aC........sp............ttspupllpCLppphpp.tp ...........................cCc.ptlhph...pt....spD.h+..............lsstLhpuCc....s-l...pc.....hC...........ss..................................ttspGpllpCLhpphtp..t...................... 0 339 467 794 +1696 PF00031 Cystatin cystatin; Cystatin domain Bateman A, Sonnhammer ELL anon Prosite Domain Very diverse family. Attempts to define separate sub-families failed. Typically, either the N-terminal or C-terminal end is very divergent. But splitting into two domains would make very short families. All members except Swiss:Q03196 and Swiss:Q10993 are found. Pfam:PF00666 are related to this family but have not been included. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.92 0.72 -3.86 43 2054 2012-10-01 19:28:07 2003-04-07 12:59:11 16 20 367 81 813 2132 5 88.60 17 54.95 CHANGED Gulpssss..sssclpcslchAlscaNpps..tsthhchhclhcsppQVVuGh..pYhlchplscssC........ptpshpsCshh....ppphthsshpshptst .....................................................hps...sssplpph...sphul..p.p....hN.........p.....ps........................s........h..........h....hp.h.h.c...l...h...c......u......p...p.....Q..l..V......u...Gh......pYh..lp..lclt.cs.p.s............ptp.hp....s.p.h...............pth.................................................. 0 158 262 454 +1697 PF01578 Cytochrom_C_asm CytC_asm; Cytochrome C assembly protein Bashton M, Bateman A anon Pfam-B_114 (release 4.1) Pfam-B_8014 (Release 8.0) Family This family consists of various proteins involved in cytochrome c assembly from mitochondria and bacteria; CycK from Rhizobium[3], CcmC from E. coli and Paracoccus denitrificans [2,1] and orf240 from wheat mitochondria [4]. The members of this family are probably integral membrane proteins with six predicted transmembrane helices. It has been proposed that members of this family comprise a membrane component of an ABC (ATP binding cassette) transporter complex. It is also proposed that this transporter is necessary for transport of some component needed for cytochrome c assembly. One member CycK contains a putative heme-binding motif [3], orf240 also contains a putative heme-binding motif and is a proposed ABC transporter with c-type heme as its proposed substrate [4]. However it seems unlikely that all members of this family transport heme nor c-type apocytochromes because CcmC in the putative CcmABC transporter transports neither [1]. 21.10 21.10 21.10 21.20 20.80 20.90 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.60 0.70 -4.79 39 8917 2012-10-03 10:28:09 2003-04-07 12:59:11 15 16 4477 0 1718 6398 4666 196.50 21 51.54 CHANGED huslhtulhhhshshshhhhlh.t...p....thhuslshsssh...........................lhhuhhhh.......................l.....tssslssslps.hLhhHlshhhhoYushhlushhulhhL...........................................................................ht.htthcphshphhhlGashLTlullsGuhWA..p.sWGsaWsWDP+pshuLlsWllasuhL+sth..pc.shtuctsulluhlGFhllhlshauVphh..slc ............................................................................................................................t..sh.h.h.hs.h.h.h..shh.....hhh......................................p.........t..........................th...........h.s.t..h.h....h.h...sh........................................................................................................................h.s.h.uhhhh.................................................................hh.............h..t..s..t.s.l..s.s..h..L.p....s......h..l.h.hHl..sh..hh..hu.Y...s...s...hh...ls.h.h.h.ulhhL..........................................................................................................................................................................................................t...h..........h..tt..ht...p..h..sh.t.h...s...hG...hh.h..ho...l.u.l.lhG.u...h..Wu........p.sW....G...s..a..W.h..W..D.s+.ts.hu..hl....s.ah..las.u..h..la.hhh........pp...sh..t..t..p..h..s...s....h..h..s..l..h.u.h.h.h.hh.l.shahVp......................................................................................................................... 0 525 1095 1442 +1698 PF02224 Cytidylate_kin Cytidylate kinase Bateman A anon Pfam-B_1582 (release 5.2) Family Cytidylate kinase EC:2.7.4.14 catalyses the phosphorylation of cytidine 5'-monophosphate (dCMP) to cytidine 5'-diphosphate (dCDP) in the presence of ATP or GTP. 20.40 20.40 20.40 20.40 20.00 20.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.69 0.71 -4.79 14 4159 2012-10-05 12:31:08 2003-04-07 12:59:11 13 26 4102 23 894 2951 2326 155.70 40 63.80 CHANGED hphhhphplphh.p.st....shhsGpDlosp.IpopEVuptsStlushPtVRshhschQ+phuc..ssslVhEGRDlGTVVhPcAplKIFLsASsEhRApRRhpphstpu..s.sh-pLlt-ltcRDphDppRssuPLhhAtDAlhlDTSsLolp-Vl-cllp ........................................hs....phplph.....p..t..s..t..p....Vhl....s.G..c..DV.op.t..IRsp-VustsS.tVA.u.hPtVRctLlptQRp....h.....u............p..........t.........s..........G..........l.....V.hDGRDIGTVVh.P-.........A...p....lK....IFLsASsEpRAcRRhpph..t........p.....+.........G......h......s...s.........s...h.........-....p..l...hp-.......I.pcRD..pp.D..p.sR.....p.....lu.....P.L.....c........As.....D......A......l.....h.lDoTsl....oI-pVlpplh.t.................................................... 0 314 596 761 +1699 PF01265 Cyto_heme_lyase Cytochrome c/c1 heme lyase Finn RD, Bateman A anon Prosite Family \N 20.40 20.40 22.40 21.30 20.30 19.20 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.16 0.70 -4.80 46 463 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 263 0 351 449 4 246.90 36 87.58 CHANGED M.................s..t...............................s...spCPlscpspp......................................................stCPspt...............................................tphss..sshhss.spp.ssspshsLsscREhSSIPRs........................................t.usus........WlYPStpQFaNAMlRKGa..t.pp.....tsshcslVslHNtlNEpAWpcIlcWE..tha..................................scpCs.sP+LhpFtGcsc-...hoP+Aphpph.hG........hphPFDRHDWhVsRC.G..........+-VcYVIDaYs...Gssps..........p.....s......haLDVRPA.Lsohpu...sh-Rhh+h .......................................................................s.....................................................tCPhptt......................................................................stC.Phtt..............................................p.p.l..sP......ssh.s.s.sp..p....s.ssQs..hs.LsscRphSoIP+u.........................................................s..ssss.......W.YPS.Q.Fa..NAMlRKGa.t.t...tphs....tcshcslltlHNtlNEpAWpEIlcWE..t..ha...........................................................sppss..sP+LhpFtG+sp-...hSP+ARhpph..hG.....................................hphPFDRHDWhVsRC..G...............................ccV.RYVIDaYsGssst.........................p.....s.......shhLDVRPA.lsohpush-Rhhh.h.................................... 1 120 196 293 +1700 PF02322 Cyto_ox_2 Cytochrome oxidase subunit II Bashton M, Bateman A anon Pfam-B_997 (release 5.2) Family This Family consists of cytochrome bd type terminal oxidases that catalyses Quinol dependent, Na+ independent oxygen uptake [2]. Members of this family are integral membrane proteins andi contain a protohaem IX centre B558. One member of the family Swiss:O05192 is implicated in having an important role in micro-aerobic nitrogen fixation in the enteric bacterium Klebsiella pneumoniae [1]. 28.10 28.10 28.70 28.50 27.50 28.00 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.19 0.70 -5.27 134 4601 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 3012 0 866 2836 242 331.80 32 94.78 CHANGED sLtllWahllulhlhhYslhDGFDhGlGhLh.sh....hsc...........-p-RchhlNolGPhWDGNEsWLlhuGGulFAAFPhsYAslhouhYlslhlhLhuLIhRuVuFEaRtK.st.s.ph+.....phWDhshhhGShlsshhhGlslGs.llpG...lshs.............................................ttas....Gs..............h.hsh...lsP.........FullsGlsslshhshhGusaLhhKTcG....p.lpp+Apphuthhshsh.hshhhhss.............................hhshh.tsthhtpah.shshhhhh.sslsllsslhshhhh...thp+............pth..uFhsoulsh.hhshhslshuhaP....hllPus...........sLTlasAuuuthoLphhhhsshlhlPllLsYssasYhlF+ ......................LphlWalllulllhsahlhDGFDhGlGhLh....h.....lu+..........................s-p..E...R...+...l...hl..N.....ol.....uPhWD.....GNpVWLlhuGGAlFAAFPhsYAs.hFSuhYlshhl.lLhuLhh.RsVuF-aRsK..h...ssc.a+.....phWDhuhh.lGShl.ss....hlhGl...AhGs.llpG....lshs............................h.....................thtasGs......................h.hpL..........LsP...............Fu.l..l.sGls...s...lshh...hhhG....usa..Lt..hKT..pG.....t.lp.pR..u....pphuphhullh....hlh.h.h.lsu.....................................hhssh.....sss.h.hp.p.a.........h..p.h.P......h.h.h.hh...ss.l..ul.ls..hlhs.hhhh....thp+.....................ssh......uFlhsslth.hhshhshuluhaP....hlhPus................sLTlas......AsSot.hTLplMhhss.ll.h.l.PllLsYshWsYahh........................................... 0 238 510 701 +1701 PF00283 Cytochrom_B559 cytochr_b559; Cytochrome b559, alpha (gene psbE) and beta (gene psbF)subunits Finn RD anon Prosite Family \N 29.50 29.50 30.70 30.30 29.40 29.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -6.92 0.72 -4.66 8 2001 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 993 42 112 639 147 28.50 55 47.05 CHANGED sscshoYPIFTVRWLAIHulAIPolFFlG ..........hphshs..IholRahslHulslPolFhhG... 0 28 71 99 +1702 PF00284 Cytochrom_B559a cytochr_b559a; Lumenal portion of Cytochrome b559, alpha (gene psbE) subunit Finn RD anon Prosite Family This family is the lumenal portion of cytochrome b559 alpha chain, matches to this family should be accompanied by a match to the Pfam:PF00283 family also. The Prosite pattern pattern matches the transmembrane region of the cytochrome b559 alpha and beta subunits. 25.00 25.00 28.20 27.70 22.80 22.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.93 0.72 -4.44 25 1065 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 972 19 62 465 127 39.70 82 49.73 CHANGED LAYDVFGTPRPNEYFTpsRQclPlloDRFsAhpQl-phsc ...........LAYDVFGSPRPNEYF.TESRQGIPLITGRFDuLEQLDEFS.p... 0 14 39 56 +1703 PF02335 Cytochrom_C552 cytochr_c552; Cytochrome c552 Mian N, Bateman A anon Pfam-B_19175 (release 5.2) Family Cytochrome c552 (cytochrome c nitrite reductase) is a crucial enzyme in the nitrogen cycle catalysing the reduction of nitrite to ammonia. The crystal structure of cytochrome c552 reveals it to be a dimer, with with 10 close-packed type c haem groups. 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.24 0.70 -12.83 0.70 -5.79 38 1136 2012-10-01 23:37:15 2003-04-07 12:59:11 10 12 966 82 171 735 44 413.10 48 88.71 CHANGED s.tpau........+taPpQYcoW+cTsE..ss.........s................htDtLcccPtLslLWuGYuFu+DYscPRGHhYslpD.hpohRssss...........stsuuChoCKoPssPcLhcchG-........sYFsspascstscIs.......ps.............luCsDCH-sp.shp.......L+loRP.tlt+AlcslGh.......................c.hc...to+p-hRohVCAQCHVEYYFptc.............................................sptVpFPWspGh......olEsh.pYYDphs..........FtDWTHulSts.MLKAQHP-aEhas.pGlHupsGVoCsDCHMPhsp.tsupKhoDH+.lt.SPL.sshp...coCtsCHp.poc-tL+spVtshps+shphth+AtptlspAth-h................ctAh.cAGAspcchc.....puhphhR+AQWRhDashAppusuFHAPpEulRlLusul-cAscAcspLtplLAptGlps........PshsshphAphshshshc+h.hct..pp.lcs..hsp................................t+ .........................................................ts..Epau.paPcQYtSWpsTu-..ps...........................................thctLt..ccPpLlILWAG.YsFS+DYNcPRGHhaAlsDlccTLRTGu.Phss.pcG...................................s.P.hACWoCKSPDVs.RL....I.pc.G-................................cuY.ap..u.KWuchG..sE.Is........Ns.......................................lGCADCHsss.o..c.......LploRP.hshcAhculG+............................s..hc....cAs..R.-.pohVCuQCHVEYYFcuc...........................................................sKsV.pF.PWD..cGh.......................cVEshE...p.YYDclu..................FuDWTp.....sLSKsPML.KAQHP..EYEh.W...o....sG.......l..H..G..c....N....s...VoClDCHMPpVp....tpG.K....hY......TDH.+..Is......sP..h....cs...hs......pTC..t..sCH......s....Q..s..c.st...Lp..p.hVt-+ppplp-hph+s...EcpLV+AHhEA..................................................+tAh.....D.A.G.....A.T.....-.tEMK......slhptIR+AQWRWDhuhASHGht.....hHAPcEuhRhLusAhDcAtcARsKLscl..LApp.Glsc....tl.h..P...D.I.STtpKApthlGl.sh.pp.hpAtK..ppFlcsl.lPp.W.cpAp..ss.............................................. 0 57 111 146 +1704 PF03188 Cytochrom_B561 Cytochrome_B561; Eukaryotic cytochrome b561 Mifsud W anon Pfam-B_2927 (release 6.5) & Pfam-B_7165 (Release 8.0) Family Cytochrome b561 is a secretory vesicle-specific electron transport protein. It is an integral membrane protein, that binds two heme groups non-covalently. This is a eukaryotic family. Members of the 'prokaryotic cytochrome b561' family can be found in Pfam: PF01292. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.96 0.71 -4.34 48 1285 2012-10-03 10:28:09 2003-04-07 12:59:11 11 38 290 0 837 1341 24 131.30 21 41.09 CHANGED hHPllMslGalhLhG..........EAlLsa+.s...hh.hs+pspKhlHhsLphlAhhhullGlhuhap.+stp..........t...h.sphhSLHSWlGlsslhLaslQhlsGhssahh....PthstthRshhhPh..HshhGlssalLulsoshhGh.pphhFt ...................................................Hshhhhhuh.h.hhh..s...............................u.l.lh...h+.....h......................h..................s....c.........h....hh......h.......lH..hhlp.h..l.uh..l..l.ul..s..G..hsh.s.h...t.h.c.s.t.............t.....h...sp..h.h..........o....h......HuhlGlssh.h.L.h.h.l....Qhl......h.............G.hh...h..h....hh.................P...t...h...s...t...p.......h.....R...t..h..h..h..h..........HhhhGh..shhl.luls.shhhGh.......t............................................... 0 230 447 659 +1705 PF00032 Cytochrom_B_C cytochrome_b_C; Cytochrome b(C-terminal)/b6/petD Sonnhammer ELL anon Prosite Domain \N 20.70 18.90 20.70 19.30 20.60 18.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.54 0.72 -3.79 114 70433 2009-09-12 14:19:01 2003-04-07 12:59:11 12 20 28713 95 599 68332 2102 89.80 73 27.89 CHANGED .ANPhhTPsHItPEWYFL.hYAILRS..................IP.....sKLsGVluhhhSlllLhhlPh....hp....puphpohp.ap.shh.phhaah.....FlssallLsalGupPs-......Pah..h.luplhol.h...YF ........................PANPL.s.TP.PHI.KPEWYFLFAYAILRS...............................................IP...............NK...L...G.G.VLAL....l....hS.I...L...l.Lh.llPh...LH.............T.S.K...Q.R.....u.h.....t.....F...R....Plo.....QhL.....FWh.....Ll.Ash.hlLT.WlGu..PV.Ep...........Pa.l.hIGQlASh.h...YF............................................ 0 158 359 481 +1706 PF00033 Cytochrom_B_N cytochrome_b_N; Cytochrome b(N-terminal)/b6/petB Sonnhammer ELL anon Prosite Domain \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.39 0.71 -4.94 92 8451 2012-10-03 10:28:09 2003-04-07 12:59:11 14 32 3033 2 1429 91159 3309 164.10 19 83.40 CHANGED httashhh+...........lh.HW..hhulhhlhhh.h...sGhhhh................................................................hhh..hshtthhhtlHhhhGhhhhhlhlh+lhhthhp........................................h.t.t.ssttttshhtphshhhlhhh.hhhhsloGhhh....................................h..hh..stshhphh.....thlH.thhshllh.shlhlHlhhshhtphh....shlptMhsG .................................................hash..phhHW......lhul.shl.h.hh...h......oGhhhh....................................................................ths.t...h.s.h..h.h..h...h.p.......l..HhssG.hh....h....h...l.s..hh..h+l.hhthht..................................................................................................................................................h..h......h.p.h.ht..p...h.h.a.h.h.hhhh..hhhh...l.o.Ghhh................................................h...........h...h..................h..h.....thh.............t.h.H...h.h.s.h..h.hh.hhl.hhHhh..h.Al.hhph.........sslttMh.......................................................................................... 0 342 765 1112 +1707 PF00034 Cytochrom_C cytochrome_c; Cytochrome c Bateman A, Sonnhammer ELL anon Prosite Domain The Pfam entry does not include all Prosite members. The cytochrome 556 and cytochrome c' families are not included. All these are now in a new clan together. The C-terminus of DUF989, Pfam:PF06181, has now been merged into this family. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.47 0.72 -3.15 237 12531 2012-10-03 10:02:11 2003-04-07 12:59:11 16 182 3088 283 4329 18590 7463 94.00 18 36.97 CHANGED hspG...pplh.p.t.......pCssCH.utssps...........................................Pslsshstthhhpthtthhttt..h...........thh.tth...shh....................................loc.p-htslssal.php .........................................pG.ttlh..t..t...............sCsu.CH...us...s.s..t.u.st.h................................................................................hPsL..t...s..h...s.....s....p.....h......h...h.....t.....t......h......t..t.........h...h..s...s...s.....t..t.......h..........................................thh.....t..t.....h..............................................................................loc.p-..hpsls....uYlts..t........................................................................................................ 0 1149 2550 3504 +1708 PF02167 Cytochrom_C1 Cytochrome_C1; Cytochrome C1 family Mian N, Bateman A anon IPR002326 Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.64 0.70 -4.69 124 1691 2012-10-03 10:02:11 2003-04-07 12:59:11 10 8 1421 83 579 1376 2156 190.90 30 70.78 CHANGED a.....sasG.hspaDpsuLQRGhpVYppVCuuCHShphltaRpLsc.............luho.......-sp..........l+shAtp...p.h.s....sGp........Rsup.uDhhssshsscpAtthhssGAhPPDLSLlu+ARt....................sGs-YlaoLLpGY........hc.s........................Ps......................tsstahNshF.......................................su..luMs.sL.h-t....l.....................................................pasD.Go.........ssoh-QhucDVssFLhWsAEPphppRKphGhhVllFL.slhsslhYhhp+ph...Wpsl ...........................................shhDps.ulpRGhplahph.C.uCHSh.phhtapplhs.............lshs..tpp..................................h+t.hh................................pup...........phuDhh.sshssp..pAsphh..GAhPPDLSlls+uR..........................GsDalashL..pua.........p.s......................................ss................t.hh..h.N.hF.......................................ss.....luMstsL...p.....s..........................................................phts..Gs.............stphsph..scD.lssFLtasuEP...tt.tpR.+phGhhlhhaL..s.l.hhh..ls..hhh..K+phWpp........................................... 2 173 330 465 +1709 PF01322 Cytochrom_C_2 Cytochrome_C_2; Cytochrome C' Finn RD, Bateman A anon Sarah Teichmann Domain \N 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.82 0.71 -3.57 95 629 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 492 53 234 648 813 114.10 23 73.21 CHANGED pc...s.lchRQuhh.ph.hutshu.sluuM.s+GchsaDsststtsAsslssluphshst..F.stGocsus.....TcApPpIW.pchscFppphsphppsuspLsssA.tsG..Dh.sul+sAhsslGts.CKuCH-saR ............................................................................t...lp.Rpshhp..ht.ph.t.thssh.hc.G..p..sa-stt..htttut..tls..th.u.ph........ph..F...tsspts...........scAhstlW..p..c..hscFpt.ttpphpsssspLss.uA....psu...ch...s....slpsuhsp.huts.CcuCHcsaR............... 0 54 144 186 +1710 PF02085 Cytochrom_CIII Cytochrome_CIII; Class III cytochrome C family Mian N, Bateman A anon IPR002322 Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.92 0.72 -11.89 0.72 -3.92 24 410 2012-10-01 23:37:15 2003-04-07 12:59:11 11 26 147 90 206 698 264 94.70 21 46.57 CHANGED P.....hshcsPssst.........pptsVsFsHpsHt.ps.....sCpsCHHt........tsttphssCss..CHss.t........scps.tsh...hpAhHspps..........oClGCHpphttpt.t.............ps.suC.stCH ................................hht........................tsl.FsH..t..hHs.ph..t..........sCpsCH..+s.......................sstssh.....t..s.....Css........CHsth...........................tptt..h.th.........ht..u..h..Ht..............................pCh.s.CHtp.t................................C....CH................................ 0 92 172 198 +1711 PF03264 Cytochrom_NNT Cytochrome_NNT; NapC/NirT cytochrome c family, N-terminal region Mifsud W anon Pfam-B_1404 (release 6.5) Family Within the NapC/NirT family of cytochrome c proteins, some members, such as NapC Swiss:P33932 and NirT Swiss:P24038, bind four haem groups, while others, such as TorC Swiss:P33226, bind five haems. This family aligns the common N-terminal region that contains four haem-binding C-X(2)-CH motifs. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.29 0.71 -12.03 0.71 -4.83 111 2644 2012-10-01 23:37:15 2003-04-07 12:59:11 9 24 1304 8 392 1397 89 166.30 41 59.71 CHANGED ps...stphslshll...llGhlsGllhauuhpsshchTsopcFC.lSCHp.Mpss.apEappolHapNpsG.VRAs.CsDCHVP+..chh.sKhhpKhp.Au+-latch.hGplsT.EKF-.t+RhphApctWtch+sssStpCRsCHshp.tMch..spQ....ptpApptHpps..................h....tpsp..TCIDCHKG.....I.AH.....phP.ch ................................h...+htlhsll..llG..h.l..sG..l..h..h..h.ssh..p....su.hc.h.T.....sopc.FC.lS.CH...p..Mps.....s.....YpEY.p.......p.......o.l.....Ha......p...........N.t.......pG....lRAp...CsDCH..l.........P..+............-hs...shhh.pKlp....ASK-lYtph..hs.pIDT........P-K..FE......s.+Rhp.....hAcp.pW....p.chK....sN.sS...t...pCR..s..CHsa-....tMDh...spQ............pspA....s.+..........aptA.......................................h...........ccsp.....oCID..CH.KG..IAHpLP-......................................... 0 120 230 327 +1712 PF01801 Cytomega_gL Cytomegalo_gL; Cytomegalovirus glycoprotein L Bashton M, Bateman A anon Pfam-B_1420 (release 4.2) Family Glycoprotein L from cytomegalovirus serves a chaperone for the correct folding and surface expression of glycoprotein H (gH) [2]. Glycoprotein L is a member of the heterotrimeric gCIII complex of glycoprotein which also includes gH and gO and has an essential role in viral fusion [1]. 25.00 25.00 144.60 144.50 20.70 19.80 hmmbuild --amino -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.38 0.70 -4.85 7 80 2009-09-12 02:32:10 2003-04-07 12:59:11 11 1 37 0 0 58 0 204.20 59 82.04 CHANGED hpscChcshhpChpu......p..ShhpPlhs....cusl......hSpLIRapp...........ptphhss..lhls--FL-plhLLaNNssQLRsLLTLl+.S-sussWhsahpGYspC.tsssslaTCV--lCppYsLp+LpYspslFsEsVlGFElssP........shulLlhlcNptT+ss+lVRlssso......loLFDulYNhl+pFhhch.ulshsLlpcLcpYpspLPs.a+puc..slhtRsh .ss.sECsELTRRCLhGEla.pusch-SWL+PLVN.VTsR....DGPL.......SQLIRYRPl..........TsE.AAso..VLLDDuFLDTLALLYNNPDQLRALLTLLu.SDTAPRWMTlMRGYSECGDGSPAVYTCVDDLCRGYDLTRLoYspslFTEpVLGh-lsPP........hhsVlVhlRNptT+sp+sVRlPs..oo......loLF.ulYNhl+phhh+h.pLDssLlppLcpYhstlPs.h+poc..pl.tpp.h........ 0 0 0 0 +1713 PF02239 Cytochrom_D1 D1_heme; Cytochrome D1 heme domain Bateman A, Mian N anon Pfam-B_3322 (release 5.2) Domain Cytochrome cd1 (nitrite reductase) catalyses the conversion of nitrite to nitric oxide in the nitrogen cycle. This family represents the d1 heme binding domain of cytochrome cd1, in which His/Tyr side chains ligate the d1 heme iron of the active site in the oxidised state [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 369 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.20 0.70 -6.16 8 7650 2012-10-05 17:30:42 2003-04-07 12:59:11 11 55 672 42 338 8166 222 221.80 52 91.82 CHANGED phshtNLFsVs.RssGplsllDGDspc.luhl-otYAl.HhShh.us.GRYlYVhGRDGtlohlDLaspc..hVAEV+hGhsuRulslS.....-s+allsGsYhPsphslhDucTLE.lpllsTpGhssss....spsRVuAllsu.h+spFVVslK-TGclhllsYosh.cslpTppIsuA+aLaDuuassstRYalsuh.ups...KlslhcsccGthssLlDs.G+ss+sh.sushsH.phG.hWssuphtshshshIGpcshs....hhctpsWKhlppIsstGsG.lFl+THPcSpalWVDshhsP-...scuVtVhDpcsL.........lshsltshsGhs.......VlpsEFs+cGDpVWlSVWsucs...ALVVaDs+TLcLh+sls...hhoPoGKFNVaNs .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................AuIlu..S....pP-.alV.Nl.K...E..T.G.pI.L.L..V...s........YpD............l.....c......N....................L....p....................s......T...............s..I............s....A..A...+......F.L..H.......D..G...Ga.....D....u....o.....+....R.....Y...F...Ls.A.....A.....N.t.Ss..............K.lA..VVD..o.K.....-..p.......K...L..s.......A.....L....l....Ds...sc..h.P.H..P.G.....R...G.AN.a.s....cPc.a.......GP.V..W.....sT..u.....t.........L..G..s-..s....l.o...h...IG...T....D..Ptp..........H..p.p...A...WKVVp..slc..G..p...GG..G..SLFl.KT.HPp.Sp....pla..s..Ds.s..hs.P-.......thup.o.l.s.Vach..tsh......................h.h.....l...t......st.......................................................hhp...cas.tG.p....hhh.................................................................................................................................................................................................................................... 0 91 229 292 +1714 PF02109 DAD DAD family Bateman A anon Bateman A Family Members of this family are thought to be integral membrane proteins. Some members of this family have been shown to cause apoptosis if mutated [1], these proteins are known as DAD for defender against death. The family also includes the epsilon subunit of the oligosaccharyltransferase that is involved in N-linked glycosylation [2]. 20.20 20.20 20.30 21.10 20.10 20.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.48 0.71 -4.02 30 370 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 306 0 230 340 0 104.90 45 83.17 CHANGED sus....h.s.llsphhppYhspTP..p+LKllDAalhalllsGllQFhYCsLl.GsFPFNSFLSGFIosVGpFVLslsLRlQlN.......tNcspF.hulSs.ERAFADFlhAsllLHhlslNFIG ....................ss...h.sllpphhppYhssTP..p+LKllDsalhalllsGhlQ...FlYshLl.GsFPFNuFLSGFlSsVGpFVLslsLRlQlN......................pNttpF..tslSP.ERAFADFlhusllLHhllhNFls....................................... 3 77 127 189 +1715 PF00130 C1_1 DAG_PE-bind; C1; Phorbol esters/diacylglycerol binding domain (C1 domain) Bateman A anon Prosite Domain This domain is also known as the Protein kinase C conserved region 1 (C1) domain. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.82 0.72 -4.23 46 6123 2012-10-02 13:15:50 2003-04-07 12:59:11 17 344 330 24 3317 5447 1 52.60 31 8.26 CHANGED HpFttp......sa.tpssaCstCsphl...........h.htppGhpCp....hCphpsHc+..CtptVsstCsss ...................HpFhhp............sa..pp..Ps....hCshCpphl.........................hG..hhp.QGh.pCp..................tCp.h.s...sHc+..........Ctp.plsspC...t.................. 0 793 1126 2107 +1716 PF03982 DAGAT Diacylglycerol acyltransferase Finn RD, Wood V anon Pfam-B_11378 (release 7.2) Family The terminal step of triacylglycerol (TAG) formation is catalysed by the enzyme diacylglycerol acyltransferase (DAGAT) [1,2]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.82 0.70 -5.72 6 1100 2012-10-02 00:16:30 2003-04-07 12:59:11 8 11 361 0 723 1216 48 230.60 28 64.76 CHANGED ahLFhTspWhllsLYulWhhYDhsoP++GuYpssWsRphtla+aFAsYFPlpL.hKTu-.Lsss+NYlhGYHPHGIlulGAassFuTsuTGlhcpFPGIR.plsTLs..GpFhhPaRREhhlhhGhI-sSRESI-alLs+stp.G+AlVlVlGGApEAL-AHPGpcsLTLtsRKGFV+hALcpGApLVPsYuFGENDlYcQh-NPcGSpLRphQchhK+hhGhoPPlFaGRGlFs.hshGLLPaRKPlsTVVGtPIpVsKs.cPTpEpIDclHuhYh-tLt-LFEEHKsKaGls.sspLllp ..............................................................................................................................h.............................................................................................h......p...................h.hph..h.h.t..Y..F..s.h.pl....h....p......s.....t....h.........s.p..p....p..........Ylh.uh.aPH.Gl.hs..h..u.h.....hhshs.......p.......................t....................h............t..............h....hs.t.h.t..........l.....h..sls.....F...h.....h.P.h...h.R-hh.h......h.....G.hs.ss.scpshth...h...L...........s..........p...........t...........t...........t...............lhll.sGGst..E.u.L.....p.s......p..s....s..p............p..l....hL.p.p.R.+GFl+l.AlppG........s......s.......LVPshsFG..E.......s.......-.......l.......ap...h................t........h....h..............h...t.......h.h....t....p.h........h.t.....h...s.....s..h.h.......h..u.....h........h.........................h............P.h..p...............t.s....l..h........V...l..G..p....Pl..............l..............p................p.P...s........p.......lpt.h.+.t.....ah.t.tL.plapph+..h.s...............h.................................................................................................. 0 239 386 554 +1717 PF01219 DAGK_prokar Prokaryotic diacylglycerol kinase Finn RD, Bateman A anon Prosite Family \N 21.90 21.90 22.40 25.20 21.50 21.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.08 0.72 -4.44 186 3222 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 3038 3 605 1802 403 102.60 36 78.24 CHANGED htYuhpGl.ttshppEtuFR.clhhullllsluhhl......slothEhhlllhslhlVLhsEllNoAlEslVDhlu..-aH.LuttAKDhuuuAVhluhlhusllhhhllh .............h.tauhpGl.tsAhp.pEtsFRpchhhsllslshu.hh.l......slstlEhllLlhulhLVlhlEllNoAIEslVD.hlu....s-aH..Lu+pAKDhuuuAVLlshlhA..s..lshhllhh....................................... 0 181 367 508 +1718 PF00609 DAGK_acc DAGKa; Diacylglycerol kinase accessory domain Ponting C, Schultz J, Bork P anon SMART Family Diacylglycerol (DAG) is a second messenger that acts as a protein kinase C activator. This domain is assumed to be an accessory domain: its function is unknown. 21.30 21.30 21.40 21.90 21.20 21.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.18 0.71 -4.09 42 1202 2009-01-15 18:05:59 2003-04-07 12:59:11 14 102 195 0 702 1147 5 153.50 36 20.57 CHANGED sMsNYFSIGhDAcluhsFHptRppp..PthapuphtNKh.hYst.hGspc.hh.ppshtsh..cplplc..................s-sppl....plP..ssculllLNIsSauuG.sshWssspppt...................at.tshsDGhLEllulpushclutlpssltps.hRluQu.spl+lphpp............phshQlDGEP ..................................................................................................hNNYFulGh................DAp..luhcFHptR...E.cp......PpK.....Fs...S..Rh.pNK..............h......hYst.hGspc......hh..ttot.+s...L..ppplp..lp................................................................sDGptl....pls...slpu...Is.hLNIPS..asGG.sshW..Gsspcpp...................................................ats.phsD.thLEVVGlp..ss.....hphu.t..lp.......s...s.h......t....ps...hRl....AQs.pplplphpp................slPhQ..lDGEP..................................................... 0 211 323 501 +1719 PF00781 DAGK_cat DAGKc; Diacylglycerol kinase catalytic domain SMART, Coggill PC anon Alignment kindly provided by SMART Family Diacylglycerol (DAG) is a second messenger that acts as a protein kinase C activator. The catalytic domain is assumed from the finding of bacterial homologues. YegS is the Escherichia coli protein in this family whose crystal structure reveals an active site in the inter-domain cleft formed by four conserved sequence motifs, revealing a novel metal-binding site. The residues of this site are conserved across the family [5]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.93 0.71 -4.71 125 7254 2012-10-02 15:20:27 2003-04-07 12:59:11 19 132 3378 25 2390 5666 830 128.70 23 30.86 CHANGED phhlllNPpu..Gpppsp.......plhtt.h..ppt..h..p......h..pl.hps..p..p..tsss..pt......hth.hts.hsph...............tllss.GGDGTlspVlsul...........h...p......p...sh...................s...lullPsGTuNshA+slth.stshtt............................................tthhthht.pht..sp...ssthsph ............................................................................................hhllhN.P..p.S..Gp.ppst......h.......tplht.h..h...ppt...t....hp....................h...pl...hho.......pp.......tssu...pp....................hst...hts..hssh...................................shllss.GGD....GT....lsc......V.lsul.................................................h...p......p.sh.................................s..lull...P..h..G.T.u.N.DhA+.sL.s.l..s.ts.tt............................................th..hhht.th....hsh......t.......................................................................... 0 772 1353 1927 +1720 PF00793 DAHP_synth_1 DAHP_synthetase; DAHP synthetase I family Bateman A, Griffiths-Jones SR anon Pfam-B_1032 (release 2.1) Domain Members of this family catalyse the first step in aromatic amino acid biosynthesis from chorismate. E-coli has three related synthetases, which are inhibited by different aromatic amino acids. This family also includes KDSA which has very similar catalytic activity but is involved in the first step of liposaccharide biosynthesis. The enzyme is also part of the shikimate pathway, EC:2.5.1.54. 19.80 19.80 20.50 20.10 19.30 19.60 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.66 0.70 -5.71 28 8958 2012-10-03 05:58:16 2003-04-07 12:59:11 15 18 4112 276 1994 5818 4302 285.30 28 87.41 CHANGED chsls.t.............slhhGpspphlllsGPCSlEs.-tsh-hAp+Lpplus+ht..lhllh+uah-KP.Rooshua+Ghh.-stLphhtclpcsh..............GlslsTEhlDstssphlA-hsslhpIGARsscspshlctAuthspPVslK+Gps....sulpthtsAuchhhhhGs..........ssslhhC-RGlRsGc.tsNppshDlpulshhpcts.hh.......PlhlDsSHusup+s...........hh.p.hVsshspuulAsG...hsGlMlEsH.sPupAhsD.......usptLsht...shtphhh-hsphlhp ...............................................................................................tthhpht...................plhhGp...D...p..chlllsGPCSlc..s.-tsh-hAc+.Lt.s.l...s..p..c..h..t....l.lVh+saa-KP..posh.ua+GL....ht......-......st.......l..c.hhhclpcsh.................................GlP.s.s.T.Ehh-s.ss....p.h.l.u.D.l.l........s........hhtIGARs.scs....pshh....c....hA.....u......t....h.......u..t......P....VshKpGps..............luhsshtsAs..p...h..th.h.G........................spss...hschhl...R...s......G......p..t......sN....h.t.s.hD..l..tsss.....h.h.hpts....tl................sl.llD...soHusup+s...........htp..p....hVs.p.............s......p..s......t...lusG.................lsGlMlEo..H.s..s...uptp.s.-........us..p..tlsh...u.p.hs.hshhhp.sp.h.t.................................................... 0 632 1204 1652 +1721 PF01474 DAHP_synth_2 Class-II DAHP synthetase family Bateman A anon Prodom_1974 (release 99.1) Family Members of this family are aldolase enzymes that catalyse the first step of the shikimate pathway. 19.40 19.40 22.40 19.50 17.40 19.20 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.44 0.70 -6.12 111 1570 2012-10-03 05:58:16 2003-04-07 12:59:11 11 4 1229 18 510 1307 2300 383.10 49 93.61 CHANGED pWsssS...WRspPhtQpP.sYPDtstLpplpppLpphPPLV.FAuEscpL+ppLApVApGcAFLLQGGDCAEoFs-hsuspIRcph+llLQMAlVLTauuuhPVVKVGRhAGQaAKPRSushEsh.....-G....lsLPSYRGDhlNuh-FotpuRhPDPpRhlpAYppSAuTLNLlRAaspGGhAc.Lc.plcp.......WshsFl.tpushup+Yppl..........AscIscuLpFMp....AsGhs.t......pl..............................ppschaTSHEALLLsYEpALTR......................h.............D..opo.Gp.........................a..YssSAHhLWIG-RTRQlDuAHVEFhRGlpNPIGlKlGPohss--llcLl-tLNPc....sEsGRLTLIsRhGAc+lpctLPtLlcsVcppG+p..VlWsCDPMHGNThpus..sGaKTRpF-cIlsEVpsFFclHcup....GoasGGlHlEhTGp......sVTEClGG.upsls-.pDLssRYcTtCDPRLNupQuLELAFLlA ..........................................puWcsh.s.h.hQtP.pYsDt..t..tltpV.tpLpphPPLVh.AuEscp....LcppLApVupGcAFLLQGGDCAEoFsphss....spI+s.ph....+...llLQMA...lVLTauuph.PVVK.lGRh...AGQaAKPRSsshEth..........s.GlpLPSYRGDhlN.Ghphstpu.RhPDPpRhlpAYtpSuuTLNLlRAhspGGhAsLc..plcp........Ws..hsFl..tp..o.......t.t+Yptl..........ApcIscuLt.FMp....AsGls..........t.tL.........................................pps-haT.S....HEA....LLLsYEpAhTR.................................................................h.................D.....sts...sp.................................h..YssSuHhlWIG-.R....TRQlDsAHV-FhpsltNPlGlKlGPshss--llpLh-h.L..sPp.............s.-sGRLTlIsRMGuc+ltstLPtllcsVptp.G..p..p..VlW.sDPMHGNThpus..........sG.aKTRtF-pl..hsEVpuFFclHpu.....GoasGGlHlEhTGc......sVTEChGG......u......pslop....psLssRYcTtCDPRLNspQuLELAFllA..................... 0 158 324 433 +1722 PF02733 Dak1 Dak1 domain LOAD anon LOAD Family This is the kinase domain of the dihydroxyacetone kinase family EC:2.7.1.29. 25.00 25.00 26.10 25.40 23.90 23.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.91 0.70 -5.87 9 2939 2012-10-02 12:41:15 2003-04-07 12:59:11 12 12 2046 34 705 2165 413 309.40 40 77.08 CHANGED LtGLspuNPs.Lsllpcs+Vlhpsss.....s.sKVullSGGGSGHEPsHAGFVG.cGhLsuAssGpIFASPSoKQIaoul+AV.puspGsLlIVKNYTGDILHFGLAAE+A+AtGhpsEllsVuDDVSVG+p+suhVGRRGLAGTlLVHKIsGAAAtpG...LsLcpVsplAcplssNhsTIGuSLsHCTlPGpchpsp............LscsEhElGMGIHNEPGht+tuPlPolD-LVuc.hLchLLspssc-Rsa.................VphsssD-VVLLlNNLGGsS.hELhsltppls-pL.pcasIpPsRshsGsasTSLsGsGFSITLlpsocsstchhcthscssshsssh ...............................tGhshu.asp...l..p.h.h....t..s..s..p...l.l.h.+tch.......tpsKVullSGG....GSGHEPsHuGaVG.cGMLsAAssGplFoSPos-plh.p.A.h.....+.................s.s.......c.......s...............G..t...........G...V.LhllK........NYoGDlhNFchA.....s.Eh..A.c.t.......-....G..............l....c.VcpVll..sDDlAlp..s..s..h...............hs...ssRRGVAGTlhl......aKlsGAA.A-pG........tsL-p.lt.p...luc........+ls..s.....p.....s+..olGlALssCTlPusG.+ss..F.p...........L.s...-.sE..hEhGlGIHGEPGhc+pph..ps...ucp.lspp.hhs.......pllpp.....hs........................................h..t..t..u.......c.cVhlLVNGlGuTPl..ELallhscl.t.ph.L..p.p..pGlp.lt+sh.l.G..s..ahTSLDMsGhSlTLl..c.l..D.-..-.h.h.t.hh.ps.st.......t....................... 0 221 420 588 +1723 PF02734 Dak2 DAK2 domain LOAD anon LOAD Family This domain is the predicted phosphatase domain of the dihydroxyacetone kinase family. 23.70 23.70 23.80 23.70 23.60 23.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.98 0.71 -4.60 175 4573 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 2828 8 1004 3192 390 167.00 29 41.01 CHANGED slGDGDpGtsMspGhpushctlpsh.......ttsshuplhpslupshhpssGGsSGslauphhtuhupsl.....ps.ppp........h..ssps.hupulpsuh.pulpph.u......tAcsG-+ThlDsltPsscshpp.sht.......tsh.phhptuspuA.cpuscsTtshh.sphGRAthluppsh.G......hsDsGAhuhshllcuhtp ...........................................luDGDpGsNMstuhpustcplpst.....................spslu.....plhpshutslltsstGsSGslh..........uphFtuhupsl..............ps..ppp.................l...sspp.lup.u..hpsul.suhpuh.........scss-tThlsVhtss....scuhpp..utp................tts..h.t.............h....h...ptshcsAcpuh.ppT..shl..Ath..GRA..........G.......hlDsGupuhhhlhpuhh.p..................................................................... 1 338 633 857 +1724 PF03045 DAN DAN domain Bateman A anon Pfam-B_1968 (release 6.4) Domain This domain contains 9 conserved cysteines and is extracellular. Therefore the cysteines may form disulphide bridges. This family of proteins has been termed the DAN family [1] after the first member to be reported. This family includes DAN, Cerberus and Gremlin. The gremlin protein is an antagonist of bone morphogenetic protein signaling. It is postulated that all members of this family antagonise different TGF beta Pfam:PF00019 ligands [1]. Recent work shows that the DAN protein is not an efficient antagonist of BMP-2/4 class signals, we found that DAN was able to interact with GDF-5 in a frog embryo assay, suggesting that DAN may regulate signaling by the GDF-5/6/7 class of BMPs in vivo [2]. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.34 0.71 -4.70 20 362 2012-10-02 16:54:34 2003-04-07 12:59:11 10 7 107 0 209 428 0 113.40 30 56.02 CHANGED phhpctlhctsttu.....hshppp.h+pshC+shPhpQpl.sc-GCcshsltNphCaGQCsSaalPpp...t...........hpsCupChPs+hpthplsLpCsups.s......hKcVhhVccCpCpssppc ...............................h.....................h.thh.h+pshC+spPlp.Qsl...pc..p...G...Cpu.p.s.l.N+hChGpCs..Sa....h..l...Ppp......ptts...............................hppCsp.CpPtchphhhVsLp.CPshps.s.p...........hK.pl....tpl..c..pCpC.sh..t........................ 0 42 63 121 +1725 PF01266 DAO FAD dependent oxidoreductase Finn RD, Bateman A anon Prosite Domain This family includes various FAD dependent oxidoreductases: Glycerol-3-phosphate dehydrogenase EC:1.1.99.5, Sarcosine oxidase beta subunit EC:1.5.3.1, D-alanine oxidase EC:1.4.99.1, D-aspartate oxidase EC:1.4.3.1. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.02 0.70 -5.55 530 27418 2012-10-10 17:06:42 2003-04-07 12:59:11 19 195 4663 179 8594 47340 28561 332.00 16 72.99 CHANGED -llllG.uGlsGhusAhpLuc.........pGh...pVsllEpp.phut..................................usS...s.ts..uGh..lpsth.....................................................pts..hh.hp.hs.hpuhph....hpp..............................................................................................h.....hpph...................shs....h.....t.h.ptGtlhls.h.......sppp..hp..th.............tt.hp..hhpp.tsh......thc.....hlstp.phtph............................s.hs.....s.........th........hu...............u........lh.hsssu.hl..cstphstult...p....................t..s.t..p.t......G..sp......lh...ppsp.....V............psl......pt..psst.............................................ht.lp...............................s.....t.pu........p.hps.............ctllhAs........Gs....as....sp.l.ht.h.sh............................................l...........hshcu....h..lhhp.......s................htt...h.....................h......h.........................................................t.hhh...........hh............................h.h.............h..........................................................s.tp..sp.......h...............hs............sshsth.............................................t................................................................t.......................tl.......................................................................h..ptht.......phh...Ptl.............................................tpspl.htt......a.sG.........................................................................................hc....sh..s.......................................................................sDth.............................................................hhtt....ht.slhhs..tGhss................Ghth....usssGc.hluph .....................................................................................................................................................................................................cllllG....uGlsG..h..u..sAhtLsp..........................tGh......pV.s..l..l-..t...p...t...h.ut.................................................................s..s....o.........s...t...s.........s.u..h..l.tst.......................................................................................................................................ttt.....h....h...h..p.....h......s.........t.....u...h....p..h......h.t.t........................................................................................................................................................................................................................................................................................h................ht.p.h......................................s...h..t.........h..............t........h.....................u.....h......l....h..l....t...h................p.t..t.p.........ht.....h........................................t....h.......t.........h.....t.t.......hsh..................th.p.............................h..l...s.....t...t......p.....h....t..t..h........................................................................s.ht......t.....................h..................h.s..............................u..............h.h.........p..s...s......h.....p.......s........t.....p........h.....s...h.....s....h.h....p...........................................................................t...s..t..p..t......................G....sp....................................l..h....t.t..s...p........V................p.s..l..........tt....p...s..st.............................................hh....l..p.........................................................................................................................s......t.p.s..........................p..lpA..........................ct.l..l...........A..s...................G....s............h..s..........tp...l...h.t...h...t.h...............................................................l...........hs.....h....c..u.........h...h....h.ht.............................................t.t...h...................................h...............................................................................................................t..........h................h..h.h..........................................h.h.........t....h..h..hu.......................................................t...tt......h...........h.s.............s.s.........................................................................t..................................................................................................................................t.th.......................................................................................................................................................................................................................................................h....p..t..h.......p.h...h........P..t.h.....................................................tt.h...p.h......tt........h..s.G..............................................................................................h.h...........sh....s.......................................................................s-th.....................................................s.h..s..t.............................h............s.....h........h...hs.......u..hs..t...........................G..h....t..h....u.hhup.hhs................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2308 4853 6950 +1726 PF01678 DAP_epimerase Diaminopimelate epimerase Bateman A, Griffiths-Jones SR anon Pfam-B_2089 (release 4.1) Domain Diaminopimelate epimerase contains two domains of the same alpha/beta fold, both contained in this family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.47 0.71 -4.20 28 7321 2012-10-03 03:02:41 2003-04-07 12:59:11 14 6 3504 42 1855 5624 4219 119.90 25 83.62 CHANGED hshspGsspshlhls-hcp...tshhp.......hsttlps+pthssusslhalp......sssctclphR..........lapssuuEsptCGsG.stshuthlhptth......tppslpVcs.uG.lhlplpsssp.....hh.hGssphl .......................................................................sh.hGsspshhh..l....D..s..h..sp...........ts.h.s...........................hu..t.l..p..s..+...p.....h......h.....s........s.t.s..s..lhhlp.........................hssc.s..c..h..chR................................lap.p.s.u.u..EsptCGsG...st...s.s..u...th...l..t...p...t...t..h..................tpp..p..l..p.Vps.....u..G.p....l....t..l..p.h..p..s..sst.............hh.hG.s........................................................ 0 612 1251 1586 +1727 PF05173 DapB_C Dihydrodipicolinate reductase, C-terminus Studholme, DJ anon Prosite Domain Dihydrodipicolinate reductase (DapB) reduces the alpha,beta-unsaturated cyclic imine, dihydro-dipicolinate. This reaction is the second committed step in the biosynthesis of L-lysine and its precursor meso-diaminopimelate, which are critical for both protein and cell wall biosynthesis. The C-terminal domain of DapB has been proposed to be the substrate- binding domain. 20.80 20.80 21.00 21.10 20.20 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.70 0.71 -4.38 189 4343 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 4160 36 1018 3039 2348 134.50 37 51.96 CHANGED GlslhhplscpAAchhss....aDlEIlEhHHppKhDAPSGTAlplu-slspst......................t....pt.thhs.p............sp.IslpulRsGsllGcHpVhFsutsEplplpHcAhsRp..sFAtGAltAAcal....h...s.p.....G...lYshpDlL ..........................................Glslhhclhc....pAA.+.h..hss.....hDlEIlEhHHccKlDAPSGTAlphuEsIAcsh................................spph....tt..thhsccth............ttts.ssIshpolR.u..G..slV.Gc..HpV.......hFus.tGE..p..l..p..IpHcAt.s.Rt..sFAsGAlpAApalt.........s....+..........sG......lYshpclL.................................. 0 327 676 865 +1728 PF03344 Daxx Daxx Family Finn RD anon Pfam-B_3933 (release 6.5) Family The Daxx protein (also known as the Fas-binding protein) is thought to play a role in apoptosis, but precise role played by Daxx remains to be determined. Daxx forms a complex with Axin. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 713 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.35 0.70 -6.04 7 169 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 84 2 75 167 0 470.80 32 71.69 CHANGED hAhsDsIIlLD.DDDtEEtsspPusS.sss..sts......ph..PKlpQPhsscIstusssou++csasLpsENpKLFpEFV-aCpspTpDpPEVlsFL+s+puKASPpFLuSVEF+NhLuRCLoRAQu+RoKsaVYINELCTVLKtHSsK++...lplpPsuups.....tp.t..PT-sh..s.......................Sup.pspc-p....tt-tppp+uSRRQItYLENLL+hYscEI+RLQEKELSLpEL--EDSoYIQEu+LKRK............................Lh+IapKLCELKGCSSLTGRVIEQRIsYpGTRYPElNRR....................IERFINtPEupps.PDYsDlLptlp+ANERHuLsLoRKQLpthAQDAFRElGsRLQERRHLDLVYNFGSHLTDsYKPusDPAL.DPoLAR+LRsNRplAlS+L-pVISKYA.hQD-TEEtERpK.RpE+ctpsspup.....tp.shtsssuspuP.hhtup-s.sppps-p-p-----.pp..p........psptt.--.-EE-pE.psssc.-p-h-sus-upusspEccpstp................................ttp..+..ohsspSPpspPtps.sh-.p..sEppcpphltp-pslSsHhphstluhs..............ppphsPsslshsLppsusshhopopsussoscsspppss.pt+pRpc+..........c.htut.hhsusl......h.csGpchp.LPhshsslsS.ushpspSopsDoPopt.sTso..........pTP..sPh+s...KlsVATQCDP-ElIVLSDS- .....................................................................................................................................................................................................................................s...altShtFpshlspshschptp.tp...haValpplss.Lpt...ct....++p..........p...h..sssptt............ss.....shp.............................................................................................t..pp.............ttpt.pppt.pc...c...pIthLpph..LthhstcIp+LpctElshsp.pc.D....SsYlp.t+hK++............................hhplat+lC-Lpspss.ssRhlcp..I..a..puTcYPclN+p.........................lpphlNp................ps.............hPDYtDlLphlp+........sstccsLsLsc..pphp.h..Ap..-AFpclG.hLQcRR+.DhhYshu..sHlTss.....sh..DPAh.Ds.Lhp+LcpNppht.s+lp-llpKYu.hQ-.p...ct...ptph.p..tc......tt...................pt..tt.......tt.......t....t...tt.t....t................................................tt...ptt........t..........t.....................t.................t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 23 28 50 +1729 PF02277 DBI_PRT Phosphoribosyltransferase Mian N, Bateman A, Moxon SJ anon Pfam-B_5739 (release 5.2) Domain This family of proteins represent the nicotinate-nucleotide- dimethylbenzimidazole phosphoribosyltransferase (NN:DBI PRT) enzymes involved in dimethylbenzimidazole synthesis. This function is essential to de novo cobalamin (vitamin B12) production in bacteria. Nicotinate mononucleotide (NaMN):5,6-dimethylbenzimidazole (DMB) phosphoribosyltransferase (CobT) from Salmonella enterica plays a central role in the synthesis of alpha-ribazole-5'-phosphate, an intermediate for the lower ligand of cobalamin [2]. 22.60 22.60 23.00 22.90 21.50 22.50 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -12.10 0.70 -5.34 26 2551 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 2248 35 647 2021 520 317.80 35 85.49 CHANGED llsplsssDstshstspp+.spLTKP.GuLG+LEpluhpluuh....pGps.P........sls+s.tlhVhAusHGVstpGVoshPtpVTsphlpNhht......GGAuIshlsppsGssl..c....VlDlulct................csoushsptsAM..occcsttulthGhchhtp.hstGs-LlshGE.hGI.GNTTsAAAlhuuLhGssspphVG.G.oGlsspsh..........p+KhslVccAlt.hppssh...sDPl.-lLsplGGh-lAAhsGhhLuAAhp+hPVllDGalssuAAllutp.lsPtshcahlhuHhSsE.GHthhLptLuhcPlLsL....shRLGEGoGAsLAhslVcuAsthhptMuTatpAsV ......................................................................................................................h....l.s.D.tt.hh.ttspp+......ctLsK.P.G....SLGpLEpluhp.lAuh......tup.t..s.....................pltc...tlllhuuDH.G..Vh.t......c.....G..Vos..h.P....ppV.T.t...t...hstN..h.h.t......Gsu..ulssluppsGs..p..l..p....VlDl.Glsss....................................................hph+ltpG.o.sshs....ptsAM....o..p..cp...s..ttulttGhphs.pp..h.h..........p.p.G.s.s.l.......lsh.GEhGIGNTTsA..uA.lhuslsG....h...............ss..p....p.....s....V.....G...h....G....s.Gls.s.st.l.............t+KhsllccAlp.....hp...p....s.ps.................pD....sl......-......lLu.plGGh-lu....uhsGshLuAA.p.p..p..hP...VllDG.Flss.uA..A.L..s.A..s..p....l....s....P............ss......psahlsuH.hS.sE......Gt....ph.hL..ptL...........u...........h............c...P...h...L.sh.....shRLGEGoGAs..LAh..sllcuAsthh.s.pMushttutl........................................................................ 0 206 432 555 +1730 PF03880 DbpA YxiN_DEAD; DbpA RNA binding domain Bateman A anon Kossen K Domain This RNA binding domain is found at the C-terminus of a number of DEAD helicase proteins [1]. It is sufficient to confer specificity for hairpin 92 of 23S rRNA, which is part of the ribosomal A-site. However, several members of this family lack specificity for 23S rRNA. These can proteins can generally be distinguished by a basic region that extends beyond this domain [Karl Kossen, unpublished data]. 23.10 23.10 23.20 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.86 0.72 -4.31 173 3567 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 2412 2 811 2655 411 73.50 30 13.56 CHANGED sphplslG+ccplcPtslluslsscu.slss.ppIGpIclh-paoaV-lsps.hscphhpplp..psplp.G+plplchh .........phpIssG+c.cp.lcstcIlGAlss-u..s...lsu...ppIGpIclhs..s..aohVplspu.hspcslpplp..ps.+Ip..G+phplph.h........................ 0 257 512 682 +1731 PF04290 DctQ Tripartite ATP-independent periplasmic transporters, DctQ component Mifsud W anon COG3090 Family The function of the members of this family is unknown, but DctQ homologues are invariably found in the tripartite ATP-independent periplasmic transporters [1]. 27.10 27.10 27.20 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.66 0.71 -4.39 186 5361 2009-09-13 06:40:50 2003-04-07 12:59:11 7 9 1674 0 1467 4565 5259 134.30 19 65.70 CHANGED hhhlhhhpVlhR..hh....h.....t.shs.h.spE.lsthhhlhhsh.luhuhshpcspHlplcll...hptl.ssphpthhp.hlssl.lshs...hss....hlhatuh...................................hhhhhtttptssshs...lPhhhhh.....hs...lsluh..slhsl...thl.hplhph.hps .................................................shllhhpVlhR....ah......h..............................spshs..h..s....-E.lst.hlhshhshlG.uuhsh.t..p.st.H.l..pl-hl..........hp.tl...ssp.h...pp..h...lp....l...lspl...lhlh....hsh..................hlhh.huh..........................................................hh.s.h.ht.h.s.p...h..o.ss.hs............ls.h.h.h.hh.............hs...lsluh....slhh.l....htl.tplhp.h..t......................................... 0 440 995 1263 +1732 PF03605 DcuA_DcuB Dcu; Anaerobic c4-dicarboxylate membrane transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.70 20.70 20.70 21.30 20.10 20.60 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.29 0.70 -5.62 10 2671 2012-10-02 15:12:49 2003-04-07 12:59:11 9 3 1383 0 245 1239 66 350.30 44 83.13 CHANGED ElllVLlslFlGARhGGIGLGlsGGlGllILsFshtlsPus..hPlDVhLIIhAVluAuAsMQAAGGLDahlQIAEKlLR+NPKplTlLAPlVTYhhTlhuGTGHsVYSlLPVIu-VAhcsGIRPERPhSsAlVuSQlAITASPISAAVVhhlAhLss......pulshLoLLtVsIPoTllGsllhulhsh.hpGK-LccDP-YQcRLscshh+phshspp......clssoAKlSVaIFLsullslVlhAsssslpsuhh..s....psluhshsIphFMLssuuLIllhTKlDsssIspusVF+SGMhAslslaGlAWLuDTahssHlspIKsshupllpsYPWhaAlsLFllShllsSQAATstsLlPlul.ALGlsP ..........................................................pllllLlslalG.ARh.GGIulGhhGGlGlllL.s.h.s....ht.lpP..Gp.......PhDVhLlIhAVluAhushQsAGGLDhhlplAE+lLR+NPKaITllAPhVTahhTlhsGTGHlsaohLPlIs-VAhcpsIRPpR.PhuhusluuQhuIsASPlSAA.VVhh.s.u..h.Lts.........tsl....s....al..plLulsIPoTllulhlhuhh.s.h......hhGp.-.....LscDs..YQcRLpcs.h.cthshtpp...............cl.p...sA+hSlhlFLhu.llsVV........lhushss................ps...........uhs..............p................slshstsI.hhMLssusLIhllsKss.sspIs.ssslF+uGMsAslsl.hGlAWhu-Tahs..u..phs.IKsshupllp....saPWhhAllLFhsStll.SQAATstAlhPl.uL.ALGlsP.................................................. 1 41 118 193 +1733 PF03606 DcuC C4-dicarboxylate anaerobic carrier TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.46 0.70 -6.26 13 3902 2012-10-02 15:12:49 2003-04-07 12:59:11 10 5 2105 0 339 3372 265 444.50 24 97.14 CHANGED hPthhsllhhlhlhhhIlohahPsGsatpts..................................hl.hhhsshhspspslssshtssttthhs..hhphshh.hpscAsslshFIhhlGGhhulls+oGAh-sslttls....KhhptpcphhIhhlhshFshsssshGhsEto...lshhslLhPlhlslGasthsususlhhuossGhhuuThNPhslVlAupsAGlslt-GhhhRllshslhslluluasaaYsp+lpccP......hpp.cEppphhphcpup.sss....ssthhhhLhhhshlhhlhu.........h.h.hls.....................paahsplsslalhhulllsalth...hsshpsp-hhpuFhcGhuchh.sAhlVuLA+Gltlhhpss.....hhlsTIlshhssuhoshsu..lhhllhhllhhhhuhllsSGSG.AhhoMslhsPLAchlGlstshlllshQhusslsphluPTSGlVhAsLuhu+luhtpWlKhsus.lshhlllshlslllthh ...............................................................................................................................................................................................hhsllhh.l.hlhhhlhphhh..PsGhh.h.t.........................................................................................h.l..h..h.h..s..s...hh...t.t.......t....sl..s.....s.......t......s..t.......t....hh...s..........................th............h...........hhs...puss...lh.h.h.IhhlG.G...h..h..ul.hp.+oG...A.-sslt.tl.s............+......h...p...........s...h...c....h...h..I......h.l..h..h.h.h.sh..s.s......h.s.h.uhs...p..ts......l.s.h.h.s.h.L.h.P....l....h.ls.lGhcs..........h.su...sus.l..h............hu....s..t..h.u....h....h..u............s....s....h............s.....................s.......s.......s....l.......A..........u...........p.......h......A.......G..........l........s.....h....h....s....s....h.......h.......h........+.......h....l...h...h.......s...l....s...s....l..l..s...l..h...h...s....h...h...a...s....p..+....h....p.cc...............................p.........t...c....p....t.......p...t....h..t..h.....p...t....s....p.........p...p.......p......hp.........t...p...t......h..h..h....l.....l...h..h..s..h..l...h.....h.l..h.u.......................hhhp................................................................................th.ah...s..p.h....s....s....h.....h..h...h..h..u.l...l..l.t.h.lt.........s..h.p...p.shhps.F...h...c.Ghu...s..h...............s.......Ah.l.V..u...l...s.h..u..h..t..l..h..h...p...sh.................hh.h.s...o...I...h...s...h....h...s..t...s...h...s...s...h..s.....s....h......l...hh..l...h..h..h..l..h..h.......h..l..huh.hh.sSuou.uhhhhs.l.hs.P..L..A....c.h..s.......G.....ls...t....t........h..h...l.h...s....hphusshsph.lsPs...o..............u....l....l..hushu....h..u.....+.........l.....s....h.h...p.hl.K..h...s..u..s..h..l....h..l.h..hl.h.sh.lhhhh...h.......................................................................................... 0 106 193 281 +1734 PF03184 DDE_1 CENP-B; DDE; DDE superfamily endonuclease Mifsud W, Bateman A anon Pfam-B_2254 (release 6.5) Family This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. Interestingly this family also includes the CENP-B protein. This domain in that protein appears to have lost the metal binding residues and is unlikely to have endonuclease activity. Centromere Protein B (CENP-B) is a DNA-binding protein localised to the centromere. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.35 0.70 -5.20 20 3031 2012-10-03 01:22:09 2003-04-07 12:59:11 14 109 208 0 2383 3243 38 158.50 17 38.69 CHANGED hsscslaNhDETGlhhshhsstt....s..ttpth.stpt..pcchTshlsssusuuc+L.PhlIhKups.cshph..................htpshshpacssppuWhosplht-WL.chFssphp........s.sc+hlLlLDuasuH..ssph......hhpNltllalPspsopllQPhDhGlhsshKtpYRpphhphhltstsshps...........................t.shhchlchhtpAWp.ulspcsItsua ..................................................................................................hhsh.c.p.....h.....t.................................................................................p......p...p.h..h.hhh...s....ts..s..s...hh.........h.h.lh...t...t....t........................................................................h......h..t......p.t...pu.a........h.s..p.......t..hh..t...ah......p...h.....h.....ht......................tp....hhL.lhDs.....h.ts..H....s.th........................t..p..l.....h.h.h.....h........P........s.........p.s..op......h.............h............Q............P.......h.....D......h.....s.....hht.sh.K...th..h.t.t.h.......t.h......................................................................................................................................................................................................................................... 0 825 1401 2161 +1735 PF02862 DDHD DDHD domain Bateman A anon Bateman A Family The DDHD domain is 180 residues long and contains four conserved residues that may form a metal binding site. The domain is named after these four residues. This pattern of conservation of metal binding residues is often seen in phosphoesterase domains. This domain is found in retinal degeneration B proteins, as well as a family of probable phospholipases. It has been shown that this domain is found in a longer C terminal region that binds to PYK2 tyrosine kinase. These proteins have been called N-terminal domain-interacting receptor (Nir1, Nir2 and Nir3) [1]. This suggests that this region is involved in functionally important interactions in other members of this family. 23.30 23.30 23.70 23.30 23.00 23.20 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.45 0.70 -4.56 51 906 2009-01-15 18:05:59 2003-04-07 12:59:11 12 34 250 0 612 855 0 183.70 25 23.63 CHANGED LcFcscshFhhGSPlGhFhhl+ttph.sp.............................................s.ssphspPsCcplYNIFHPsDPlAYRlEPLlssphup.lcPh.lPptppt.hhtht........................................ctlsslusphptsht.......................ht...h.pt.s.stpspp.tpsss.pph...............................................................................p.pppp.ppp....tttphphttLNsst......RlDYsLp.tu..sl-..spalsslsuHsSYWpspDsstFllpplhp ................................................................................................................................................................hpFcspthFhhGSPluhhLhl.+th.t.......................................................s.......hs...tCp.phaNlaHshDPl.AhRlEPhl..pht.......l............tPh..lPh..hpt.h.h.hp.ht.................................................................................................................................h.pthttt....h..............................................................................................................................................................................................................................................................................................................................................................................................................................h..h.thh...t..s..t...............RlDa.L....t....lp......ph.h.sh....HhsYWpstDsshhllpplh.......................................................................................................... 0 170 259 468 +1736 PF03345 DDOST_48kD Oligosaccharyltransferase 48 kDa subunit beta Mifsud W anon Pfam-B_3520 (release 6.5) Family Members of this family are involved in asparagine-linked protein glycosylation. In particular, dolichyl-diphosphooligosaccharide-protein glycosyltransferase (DDOST), also known as oligosaccharyltransferase EC:2.4.1.119, transfers the high-mannose sugar GlcNAc(2)-Man(9)-Glc(3) from a dolichol-linked donor to an asparagine acceptor in a consensus Asn-X-Ser/Thr motif. In most eukaryotes, the DDOST complex is composed of three subunits, which in humans are described as a 48kD subunit, ribophorin I, and ribophorin II. However, the yeast DDOST appears to consist of six subunits (alpha, beta, gamma, delta, epsilon, zeta). The yeast beta subunit is a 45kD polypeptide, previously discovered as the Wbp1 protein, with known sequence similarity to the human 48kD subunit and the other orthologues. This family includes the 48kD-like subunits from several eukaryotes; it also includes the yeast DDOST beta subunit Wbp1. 36.00 36.00 36.20 46.60 35.90 35.40 hmmbuild --amino -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.42 0.70 -5.95 29 360 2009-09-11 12:44:55 2003-04-07 12:59:11 9 6 276 0 243 382 1 379.40 35 87.94 CHANGED sLV........llDstsp+tp.........aShFapsLcs....R.GacLsacssc..s..ssLsLachGEhhYDpLllFssp.s+shGss..lo.scsllcFlssGGNlLlssSup........hsculRplhpEh.slch.s-csshllDHFNaDst.sss.pHslll.....h.tsshlpssspshht......tslha.+Gsuthlss..ssLlhPILpAspTuY.......SYs..scpchpsh..........sausGpQshLVuuhQuRNNARhsasGSh-hhpDcaFsutspt..............posNp-FAcplopWsFpEpGVl+lsplpHa+ts-st............sP.phYpIK-plhYoIplsEa.....sps+WlPFp....s-DlQLEFsMlDPahRhsLp...........tpp..tsushYos.pFplPDpHGlFsFclsY+RsGhTplpcpspVoV..RphsHsEYsR..hIsuuasYhuuhauslsGalhFshlaLappss...........KKpp ......................................................hLVll-s...s.+pp........aShFapsLps.........................RGa..pl.sacsscs......spLsLhchGphhYDpL.l.lhssp....sc.p...........hGss..ls...spslhpFlcs.G.GN.....lLlA.soss.........hscslRplhsEh.Glch.s-csshVlDHasYcst..........s.s.pHshll................h.sps.h...l.p..ssshst.tt........sslha..+GsGhshss.tNsLlhslLpAs.sTuY............................Shtscpthpph...............shssGp.phhLluuhQ.....AR.NNARlshsGSl-hhoDpaFsutlpt.........ttp..pouN..tphupplopWsFpEpGVL+ls.slpHa+sscst.........................sP.phYplpDpl..................pYoltlpch.....ssscWhP.ap..........sDDlQLEFshl-PahRh..sLp..........................psuhYps.pFplPDhaGVFpF+VcYp.R.GhTpl.ppppVoVR..htH.spY.R..hIsuAaPYhuusaShhhGhhlFshlaLa.p...............p......................................... 0 87 137 203 +1737 PF04625 DEC-1_N DEC-1 protein, N-terminal region Kerrison ND anon DOMO:DM04594; Family The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing [1]. Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). 27.90 27.90 27.90 58.00 24.90 27.80 hmmbuild -o /dev/null HMM SEED 407 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.41 0.70 -5.79 3 34 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 13 0 12 48 0 338.70 58 37.51 CHANGED MMFPALGSLLRWGSLFPAYSILGAIPDNLQPTAAASKVVLVLADDATAKTRVARQNPPPNPLGQLMNWPALPQDFQLPSMDLGPQVGSFLAQL...PAMPo...MPGLLGAAAPVPAPAPAPAAAPPsAPAPAADoPAAPlPDAsQPAILGQAALQNAFTFLNPSNFDASSLLGQSVPTFAPPNLDFVAQMQRQFFPGMTPAQPAPAGTDAQASDISEVRVRPEDPYSQEAQMKIKSALEMEQERQQQVQVKDQEQVPLLWFRMPTTQNcDATEEKTLEDLRVEAKLRAFERQVIAELKMLQKIELMAKQMRSSAAAQNGDSPYRISYPLSRTPIHKITRADIEQALRDDYVRRLVNKEAQRKARNSGINTQKANALKRQAKSQDQTLSKEDIVQIMAYAYRMANEQMESEKG .....................MMFPALGSLL+WGSLFPA.SlLGAlPDs.L.....QPtAAASKVVLVLADDAs.uKoRVsRQs........sP..PsP...h.Gph...MNhPAL......PQsF.....Q....L...so....M.sL.s...PpVGp.hL...u..QL.Ps.MPs.....hsul..LGtAsPV.Pss..........APAsss...sPs.AssPAs-sPs..sssP..ss..s.Ps....ushtsuhsFhsPuNFDuS.sLLGQu...lss.....................APP.shDFhuth.RQFaPGhsPA.pPusu.soDAQASDISEVRVRPE........s.............s...a...uQ.pAQ...M...KhKSALphEQE+Q........cspEpVPLLWFRMPo.......opsp..-ts-pKolE......DLRVEAKL+AFERQVIuEL+MLQpIEhMAKpMRoSA...suQss..soPY+lsYPLSRTPlHKITRADIEpALRDDYVRRLlpKEAQRKu....tsp......tsuhKRQstpQ.-QshSKE-IVplMAYAYRMAsEQh.Ep................... 0 3 3 7 +1738 PF04624 Dec-1 DEC-1_REPEAT; Dec-1 repeat Kerrison ND anon DOMO:DM04594; Repeat The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing [1]. Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). This repeat is usually found in 12 copies in the central region of the protein. Its function is unknown. Length polymorphisms of Dec-1 have been observed in wild-type strains, and are caused by changes in the numbers of the first five repeats [2]. 20.90 20.90 21.20 20.90 20.30 20.70 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.71 0.72 -4.31 7 107 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 8 0 43 192 0 26.40 68 12.46 CHANGED QNsMMM.p....RQWoE-QAKhQQp..QQMhQQ .QNPMMMQQ...RQWoE-QAKlQQs....Q.Q.hQQ........ 0 24 24 37 +1739 PF02352 Decorin_bind Decorin binding protein Bashton M, Bateman A anon Pfam-B_800 (release 5.2) Family This family consists of decorin binding proteins from Borrelia. The decorin binding protein of Borrelia burgdorferi the lyme disease spirochetes adheres to the proteoglycan decorin found on collagen fibres [2]. 24.60 24.60 95.30 94.90 21.60 20.90 hmmbuild --amino -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.68 0.71 -4.17 12 159 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 30 0 2 132 0 139.20 56 78.15 CHANGED KhtLESSuK-IpDEIsKI+K-AtscGVNF-AFpsspTGSKVupss.hIhcAKl+shslstKFlpsIEEEAhpLKEsGu.Sup......FpuMYDLML-lutsLEclGlppMppoVoptAcpsPsTTA-GIltIspthcsKLppV+pKQ ..KltLESSuKDIpDEIsKIKK-AsspGVsFcAFT-scTGSKVo......ps......s...hIh-AKlRAhslsEKFlpAIEEEApKLK-oGu..SGc......FSAMYDLMLEVScsLE-lGlpcMppTVo-AAcpsPsTTA-GlLtIAptMcsKLppV+pKp........ 0 2 2 2 +1740 PF01335 DED Death effector domain Bateman A, Finn RD anon Bateman A Domain \N 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.56 0.72 -3.86 37 947 2012-10-01 21:41:45 2003-04-07 12:59:11 16 55 86 20 419 826 0 81.10 24 27.28 CHANGED aphhlhp.lscsLspc-lcsLhFLsp...c.lspsc..hpp.pshh-lhhp.Lccpshls.ssl....shLtclLtplsRhDLLptl.phcppsh .........................ahthLhp.lu-pLspc-lp.sL.p..FLhp......-.h..lsppc.............hpp...pssh-lhhp.Lccps.hlsp..ssl....shLpclL...ppl..p..RhDLlppl.phcpt................. 0 158 186 257 +1742 PF00711 Defensin_beta Beta defensin Bateman A anon Pfam-B_675 (release 2.1) Domain The beta defensins are antimicrobial peptides implicated in the resistance of epithelial surfaces to microbial colonisation [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.44 0.72 -4.02 54 464 2012-10-01 20:50:19 2003-04-07 12:59:11 14 4 80 69 141 484 1 34.50 32 54.72 CHANGED s.hpChppsGhCh.tpCssthp.pIGo..Ctssth+CC+ ......hpChtptGh.C.t.hhCs.hhhp.plGs..Ct.sshhp.CCh.... 0 4 4 18 +1743 PF00879 Defensin_propep Defensin propeptide Bateman A anon Pfam-B_517 (release 3.0) Family \N 21.00 21.00 21.20 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.53 0.72 -4.01 23 253 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 31 0 76 259 0 49.60 48 52.36 CHANGED MRTLsLLuA.lLLlALQsQA-slQtps-Es.ssQ-QPGtEDQDlulSFssscuo ...M+TLsLLuA.lLLlALQsQA-...sl.......Q.s..ps-.......Es.ssp-QPGt--QslulSFstscs............. 0 9 11 16 +1744 PF00323 Defensin_1 defensins; Mammalian defensin Finn RD anon Prosite Domain \N 21.40 21.40 21.60 21.50 20.10 21.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.88 0.72 -3.58 11 136 2012-10-01 20:50:19 2003-04-07 12:59:11 14 3 22 60 56 185 0 29.00 47 32.72 CHANGED ChCRpt.tCttpERthGsCplpGhhashCC .CaCRth.sCtttE+hsGoCp.ht.Ghhaph.CC 0 5 5 8 +1745 PF01041 DegT_DnrJ_EryC1 DegT_DnrJ_EryC1_fam; DegT/DnrJ/EryC1/StrS aminotransferase family Finn RD, Bateman A, Parkhill J anon Pfam-B_239 (release 3.0) Domain The members of this family are probably all pyridoxal-phosphate-dependent aminotransferase enzymes with a variety of molecular functions. The family includes StsA Swiss:P72454, StsC Swiss:P77952 and StsS [1]. The aminotransferase activity was demonstrated for purified StsC protein as the L-glutamine:scyllo-inosose aminotransferase EC:2.6.1.50, which catalyses the first amino transfer in the biosynthesis of the streptidine subunit of streptomycin [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.04 0.70 -5.42 38 8204 2012-10-02 18:26:03 2003-04-07 12:59:11 12 29 3491 78 2089 27677 18092 342.00 27 92.75 CHANGED sthsp-.httlppslcus.hhhs......GshlppFEccFAsahG..staululsSGTsALhLALtA...hulu......GDEVIsPuhTFsATssslhtsGApPVFVDlD......ssTaslDPstlEsAITs+T.....KAIlsVHhhGpss.DhssltclAccauLhllEDsAcAhG..upapG...................+hlGshG..chusFSFassKsl.osGEGGhllTsDscLscphc........th+saGhs.........ppahpph............hGaNh+hs-lpAAlGlsQLc+l-phlppR+chsphapptLtslshh..hshsspss............tuaahaslhhptp....s.RcpLhptLppp..slsoplha......s.Ph+tpsh.........appht.tss.thPsu-plspcslsLPhassls...s-clccVssslp ..................................................................................................................p.hthltps.lpos...hl.s.s.............G.s..h.s..p..p...h..E......p....t......h..s....p...a........h........G..........s........p.................a.........s.........l........s.........s........s.........S.........u....T....s....A..L...p.l....A...Lhu....................ls...lt.......................G...D....E..V...I....s......s......u......h......T.....F......s........A........T.....s......s.....s.....l......h........h......h........G........A.....p..........P.....V......F.........l......D.....l.....-.............................c.....T.....h.....s.....l....D....s.....p.....t....l............E...t....s....I.......T........s.......+...T.........................+...A........I......l.......s.......V.......H.......h.......h........G.......t........s.......s.......-.......h.......-.......t.....I..............h.....t.....l...............u....c.....c......a................s.................l..............h.................l.l..E.D......u...A.....p....A....h.G.....u.....p..a.....c..G...........................................+.t..s..G......s..h..G.......c....h.u.s.a..S......F..a...s......s...K.sl....ssG..E.....G....G.h.l......h......o..s......c......p......p......l.h...c..c..hc...............................................hl.p..p.p...Ghs.................p.p...h.htph....................................................hG..hN...h..+...hs..-l...p...AA..l.s....h....s.....Q....L...c.p.......l.........-..p.h.....h.p.pR.....p..p.l.sp...hY.....pp.t.L........t....s...h...s...........h............h..t.h..s..stsp.........................................psaa..ha.h.lh...l..............p....t.................t................................................p......Rs........t.....l.h.ptLppp.............sI...t.s.....t.h.aa................hsh.a.t..p.sh..............................a.p..p.....h.....t....................................p.....h.s........scp.ht.p.p.hlsL..P..hast.hs...ppp.hphlhpth.h.............................................................................................................................................................................................................................. 0 739 1465 1821 +1746 PF02286 Dehydratase_LU Dehydratase large subunit Mian N, Bateman A anon Pfam-B_7927 (release 5.2) Domain This family contains the large subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances. 25.00 25.00 41.80 41.80 17.40 17.00 hmmbuild -o /dev/null HMM SEED 554 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.61 0.70 -6.26 11 398 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 338 20 44 238 19 512.60 77 98.83 CHANGED pRpKRFchLscRPVN.DGFltEWsEpGhlAMsSPNDPKPS.lKI-NGpVlEhDGKttpDFDhIDcFIAcYuIslspAEcsMshDSlclARMLVD.NVsR-EIlclTouhTPAKlsEVVupLNsVEMhMAhQKMRARRTPuNQAHVTNh+DNPVQIAADAAEAulRGFcE.ETTVuVsRYAPhNAlALLVGSQsGRsGVLTQCSlEEAsELcLGMRGhTuYAETISVYGTEpVFTDGDDTPWSKAFLASAYASRGLKMRFTSGuGSElhMGaAEGKSMLYLEARCIhlsKuuGVQGLQNGulSCIGlsGAVPuGIRtVLAENLIshhlDLEsASGNDQsFSHSDIRRTARhLhQhLsGTDaIhSGYSulPNYDNMFAGSNhDA-DaDDY.lLQRDLtVDGGL+PVsEE-lIulRpKAA+AlQAVFctLGLPsITDEEVEAATYAcGScDhPpRssVcDlKAApplhsRGlTulDVVKALucsGFsDlAEslLshh+QRloGDYLQTSAIhD+capllSAVNDsNDYtGPGTGYRL..otERW-cIKslstAlcsp-lc .....MRSKRFEsLAKRPVNQDGFVKEWlEEGhIAMESPNDPKPS.IKI.NGtVsELDGKshu-FDLID+FIA+YGINLsRAEEVMuMDSVKLANMLsDPNVpRs-IV.LTTAMTPAKIVEVVSpMNVVEMMMAMQKMRARRTPSpQAHVTNlKDNPVQIAADAAEuAhRGFDEQETTVAVARYAPFNAlALLVGSQVGRPGVLTQCSLEEATELc...LGMhGaTsYAETISVYGTEPVFTDGDDTPWSKGFLASuYASRGLKMRFTSGoGSEVQMGYAEGKSMLYLEARCIaITKAAGVQGLQNGSVSCIGVPuAVPSGIRAVLAENLIso.LDLECASuNDQTFoHSDhRRTARhLMQFLPGTDFISSGYSAVPNYDNMFAGSN.DAEDFDDYNVlQRDLKVDGGLRPVpEE-VIAlRNKAARAlQAVFsshGLPsITDEEVEAATYAHGSKDMP.ERNlVEDIKhApElls+shsGL-VVKALupGGFsDVApshLNh.Kt+loGDYLHTSAII.............stDhpVlSAVNDlNDYAGPuTGYRL..pGERWEEIKNIPsAlDPs-l.s........... 0 14 25 34 +1747 PF02288 Dehydratase_MU Dehydratase medium subunit Mian N, Bateman A anon Pfam-B_7081 (release 5.2) Domain This family contains the medium subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances. 20.60 20.60 20.60 37.50 20.50 20.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.27 0.71 -4.33 62 717 2009-09-11 12:54:19 2003-04-07 12:59:11 10 2 336 26 77 386 10 113.30 44 63.61 CHANGED stPAhslhhppshss....tll+pllhGIEEEGlshcl.p.shcsuDlshhAhpAAphSs.ulGIGlpuc......ssllH.+sL.s.ssL.hhs...hhstcshRtlGpNAARhsKG...hPh. .............s.uPAhslttssshsu.....pll+EVlhGIEEEGI.scl.c.....phcuuDVshsAhpuu+.Ss.ulGIGlpu+......osVlH.+sLPs.usL.hhs..shhs.csaRplGpNAARhsKG.....P........... 1 27 44 59 +1748 PF02287 Dehydratase_SU Dehydratase small subunit Mian N, Bateman A anon Pfam-B_6588 (release 5.2) Domain This family contains the small subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances. 25.00 25.00 38.30 38.30 21.10 20.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.44 0.71 -4.43 17 370 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 338 20 44 205 7 135.80 66 81.96 CHANGED os+p.hstsDYPLspK+P-hlKTsoGKsLcDITL-sVlsGcVsupDlRIoP-TLchQApIAcssGRsAlucNFpRAAELTslPD-RlL-lYNALRPaRSoKpELLsIAcELEspYpAslsAsalREAA-sYcpRcKL+ .......................ou.csspVsDYPLAsKHPEaVKTuTsKoLDDhTLENVLSscVoApDhRITPETL+hQApIA+DAGR-pLAhNFERAAELTuVPDDRILEIYNALRPYRS.TKpELLAIAD-LEs+YpApICAAFVREAAtLY.cRKKLK.......... 0 14 25 34 +1749 PF00257 Dehydrin dehydrin; Dehydrin Finn RD, Bateman A anon Prosite & Pfam-B_3306 (Release 7.5) Family \N 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.50 0.71 -3.89 27 1293 2009-09-17 10:00:38 2003-04-07 12:59:11 14 12 182 0 114 1285 0 110.70 26 87.32 CHANGED sshhcchGsslthssthppttttttst.........................................................................s.httptts.....................................................................ttthtthhppst...SuSSuuSE--sp.........sGc+.................................................................KKGltEKIKEKLPGspps...............t....sttssshstt..........................................................tttEKKGhhDKIK.........EKLPu..............Gpc ...............................................................................................................................................................................................................tt...........................................................................................................................................................................................................................................................................................................................................................................................................................................tpp-ptt.............................tt.c.+.................................................................................................................KK.G..lh-K..IK.E..KL.P.Gsppc.............................................................s.t...t.h.t.t.....................................................................................tpp...shhtp.......................................................................................... 0 23 58 87 +1750 PF02336 Denso_VP4 denso_VP4; Capsid protein VP4 Mian N, Bateman A anon Pfam-B_19701 (release 5.2) Domain Four different translation initiation sites of the densovirus capsid protein mRNA give rise to four viral proteins, VP1 to VP4. This family represents VP4. 25.00 25.00 42.50 40.10 18.80 17.90 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.45 0.70 -5.48 3 29 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 21 1 0 36 0 370.00 36 70.57 CHANGED DVTMAMSLPGTGSGTSSGGGNToGQEVYlIPRPFSNFGKKLSTYTKSHKFMIFGLANNVIGPTGTGTTAVNRLLTTCLAEIPWQKLPLYMNQSEFDLLPPGSRVVECNVKVIFRTNRIAFETSSTATKQATLNQISNLQTAVGLNKLGWGIDRSFTAFQSDQPMIPTAToAPKYEPVTGsTGYRGMIADYYGADSTNDsAFGNAGNYPHHQVGSFTFLQNYYCMYQQTNQGTGGWPCLAEHLQQFDSKTVNNQCLIDVTYKPKMGLIKPPLNYKIIGQPTsKGTISVGDNLVNMRGAVVoNPPEATQsVoESTHNLTRNFPAsLFNIYSDIEKSQlLHKGPWGHENPQIQPSVHIGIQAVPALTTGALLVNSSPLNSWTDSMGYIDVMSSCTVMESQPTHFPFST-ANTNPGNTIYRINLTPNSLTSAFNGLYGNGATLGNV .......................t.....hsLPGTG.u...spu..s..s.Gusspu.tp......lh.I.RPhs.s.h.s..pph.pTYpKsHKFhoFGlAsslls..s.u.T..ssss..s..s.....h.hlTTsLAEIPWcp.hhYMN.SEFcLL...P..s..GSplh-ssV+VlhRssRlAFETsSosTs.ATLNQ.p.lthAhGL..Nph.ua...GhsRpaTsFpu...spPMlPsustsspYtslsstp..tacshlt.-hYGssssss.shstssshP+HplG..hhLpsYashht..p....s.......s..p.ss........u......GWPsLsp+lppaDupsssspsll....shoYcPphu.lpsshp...h..h.hhs...P...st.us..h.....t..tshss.......hp.....t.....ssh..s.....hs.....t..t.p.s.ps..h.........s..p..........ap.....hhs.IEKuQhh.tG.h.t..psphQPSlHlGh.sV.tLTTsth.............................................................................................................................................. 0 0 0 0 +1751 PF01791 DeoC DeoC/LacD family aldolase Bateman A anon Bateman A & Pfam-B_6806 (release 14.0) Domain This family includes diverse aldolase enzymes. This family includes the enzyme deoxyribose-phosphate aldolase EC:4.1.2.4, which is involved in nucleotide metabolism. The family also includes a group of related bacterial proteins of unknown function, see examples Swiss:Q57843 and Swiss:P76143. The family also includes tagatose 1,6-diphosphate aldolase (EC:4.1.2.40) is part of the tagatose-6-phosphate pathway of galactose-6-phosphate degradation [2]. 22.50 22.50 22.50 22.50 22.30 22.40 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.50 0.70 -5.07 26 7750 2012-10-03 05:58:16 2003-04-07 12:59:11 4 17 3919 241 1426 4608 1164 223.40 24 84.08 CHANGED phhhhhDpsshpsshpst..............-lcsllpcshph.......tssuVslssshlthupcthsp......clshlluhstusshsssp...........shpsccAlc.GAstlchll.ht..tscp.ppphhpclptltctCp..shslhlllE.shhpspp............p..hltcAsclst-..hGADhlKssss...........t..htsp.pssphh+csstt...................lhsSGGl.........cslcpAh...cAGA..............hGsusGRslapp .....................................................................h..hhhlDpshhps..psstt.................clctlsppA..hp..............................tssuls.l.s.sph.l.h..h.s...t.c..th..s.t.............................pls......h....l..l...s.ast....ussh.ss.s.p..................shpsc.pA..lp...G..A..s...tlchll.ht.........tu...tp....tpphh...ppl....p....t....lhp..t..s..t.............shs...l...h..l..llE.....shl...pcpp......................t...l.hp.A.s..cl.u.tc.....hGADhlKTpss.................t...sssp.....ps....sp.h.hpchss.....................................................l..hsuG.Gl...............cshppAh..hpuGu.ph............hGs.sGhthht.t............................................. 1 479 902 1203 +1752 PF00455 DeoRC deoR; DeoR; DeoR C terminal sensor domain Finn RD, Anantharaman V anon Anantharaman V Domain The sensor domains of the DeoR are catalytically inactive versions of the ISOCOT fold, but retain the substrate binding site [1]. DeorC senses diverse sugar derivatives such as deoxyribose nucleoside (DeoR), tagatose phosphate (LacR), galactosamine (AgaR), myo-inositol (Bacillus IolR) and L-ascorbate (UlaR) [1], [2], [3]. 31.80 31.80 31.80 31.90 31.70 31.40 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.56 0.71 -4.60 91 14356 2012-10-04 00:26:15 2003-04-07 12:59:11 17 16 3216 0 1875 7048 402 159.00 26 63.33 CHANGED tpphpcKppIActAAphlps.GsslhlDsGTTshtlActLssp........slsVlTNulslAptLtpps........shclllhGGplcppstuhsGshshphlppap.hDhuFlussGls..ctGlts.s.p-stlccthlppAccshllsDpSKau+puhhphsslsplstllTDps .................................................h.phppKptI.Ac.t.A.u.p.hl..p..s..G...p..s.lhlDsG.oTshtl.uctLs...pp...................slsllTN.ultlA.p.h.Ltc..p................php.l.l.lhGGplc.p.....p..st..u..hl..G....s.h.shp.h.l.....p.....ph..p........hDhsFlussGls...............h......p.t...G...l.s.shs...............pc..............stlppthlpp.u..p.c.s.lll..sDpSKa..s..c..p...u...h....hph...s.sls.pl.s.h.llTDp.t................................ 0 508 1030 1467 +1753 PF04511 DER1 Der1-like family Mifsud W, Bateman A anon Pfam-B_1901 (release 7.5) Family The endoplasmic reticulum (ER) of the yeast Saccharomyces cerevisiae contains of proteolytic system able to selectively degrade misfolded lumenal secretory proteins. For examination of the components involved in this degradation process, mutants were isolated. They could be divided into four complementation groups. The mutations led to stabilisation of two different substrates for this process. The mutant classes were called 'der' for 'degradation in the ER'. DER1 was cloned by complementation of the der1-2 mutation. The DER1 gene codes for a novel, hydrophobic protein, that is localised to the ER. Deletion of DER1 abolished degradation of the substrate proteins. The function of the Der1 protein seems to be specifically required for the degradation process associated with the ER [1]. Interestingly this family seems distantly related to the Rhomboid family of membrane peptidases. Suggesting that this family may also mediate degradation of misfolded proteins (Bateman A pers. obs.). 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.32 0.71 -4.81 11 816 2012-10-01 23:21:32 2003-04-07 12:59:11 10 13 329 0 563 905 24 176.70 28 69.86 CHANGED lPsVTRhahhusllholls+hpllsPhhLh.asasLlh+KhQlWRLhTshhaa...ushGapalhNhaFhapYsptLEpusFps+.........ssDalahLlFshlhhshhsh......................................................................ph...hhLspshshhllYlWuphNs.psplsFatlhphpApYLPalLhshshllts.ssl.s-lhGlhsGHlYa....Fhp.................hYstp.tGpcLlpTP. ...........................................hP.lTRhahss....slhh...oh...h...s.....p.h...t......l.l....s..s....h...p....lh...hs..................p.....h..lh....p......c.h...........Q...l..W.Rll....Tsh..h....a.a...................u.s.hu..h.sh.lhp..hhal....hpYs......phLE..p...s...t..F.psp...........................................uDa.h.ah..l.l.a..s.....h.hhh.hh.h.u..h..................................................................................................................th.......h..L..u..ps...hhhh.l..l.Y..l..W.....u+.p....Ns...phpls......aahl.h..sh..p.A..a.LP...a...s.hh.s.h.shl....h....ss.....s.....hh....sc.lhGlhsGHlYa.........Flp................phaP.........t..G.h..p.l.sP..................................................................................................... 0 203 316 457 +1754 PF01880 Desulfoferrodox Desulfoferrodoxin Enright A, Ouzounis C, Bateman A anon Enright A Domain Desulfoferrodoxins contains two types of iron: an Fe-S4 site very similar to that found in desulforedoxin from Desulfovibrio gigas and an octahedral coordinated high-spin ferrous site most probably with nitrogen/oxygen-containing ligands. Due to this rather unusual combination of active centres, this novel protein is named desulfoferrodoxin [1]. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.63 0.72 -3.71 58 594 2012-10-01 19:22:21 2003-04-07 12:59:11 13 10 502 39 225 540 29 92.40 38 70.61 CHANGED osDstpEK.....HVPVIE...pspsG....VpVpVGp..lsHPMppcHaIpWI-Lhs.cpsth.......RtthpPss.............c..PcspFslph.t............................splhA.hpYCNlHGLWcsc ...........................s.sDsutEK.....HVPVlc.......htss.....lpV.p......VGp....ltHP.Mp.....pc......HaI.pW.IpLhs...s.p.s.sh..................+tpLpPsp..................c..PpApFtls.s..t...........................tsltAhpYCNlHGLWcs.p............ 0 114 181 211 +1755 PF04598 Gasdermin DFNA5; Gasdermin family Mifsud W anon Pfam-B_5153 (release 7.5) Family The precise function of this protein is unknown. A deletion/insertion mutation is associated with an autosomal dominant non-syndromic hearing impairment form [1]. In addition, this protein has also been found to contribute to acquired etoposide resistance in melanoma cells [2]. This family also includes the gasdermin protein [3] 30.00 30.00 30.20 31.00 26.90 29.80 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.42 0.70 -5.74 18 326 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 45 0 165 276 1 322.80 23 90.11 CHANGED hF-+so+sll+Els..pGDLlPVssLpsus+Fp.asLVp+K+cp..hFWp.s+YhslshoLtDlLEPss.......ssPsssposs..hpapsthcsplpGslclshs.hphpluGuu...opSstsolclQplslss.th-sh..cR+lhs.cs..................shLpphp....ppt..-sLaVVTEslpTsp-sslpcsspsctpsph.lst.shslpGpuptp......pspcpsloIPpGosLAY+lhQLhl..cpcspatlslh.ccKptTFtp....................................ph..p..h..................h.....sssptpsstptshpsLcpE...lcpphpsLscLscphppsLhssltplLpDcpsLpsLEphL-th..............tltsh-ssstslLppLht..........ssshh.pphtsslhYLLsALs.LS-sQttLLupuhEhplLstphcLVculh-pshp......hs.............LpsthLusLtscth.sls.tLhtpsGlclp..tssPcssh ...............................................hFttso+shl+pls...t..GcLhPVpsL.sus+hp.hsLVh+K.pp......hhat.scah...hs..hoLhDlLpsst..............s.....p.t..t...hph.s..tsphpup....hth.........ht..hpls..Gps....s.sp.stlphpplpls..hhc........p......+.....+h.p.c....................hlpphp....ppt..psL.hVVhEhlpshpps.lp..tttphpt...t.h.......h.h.h.......h..thpsp...........hspphhltlPt.tsslAathhpLhl....p..h...th..hh..t...p...t.sF..................................................................................................................h.t....h....t.h.....Ls...p..l.t.h.t..hh...pp..h..L......ht........................t.t......h.th.................................................................h...hh.hltA.l...hsc.t..hl....h..p.t...l....p.....h...t................................................................................................................................................ 0 13 19 43 +1756 PF04127 DFP dfp; DNA / pantothenate metabolism flavoprotein Wood V, Finn RD anon Pfam-B_6559 (release 7.3); Family The DNA/pantothenate metabolism flavoprotein (EC:4.1.1.36) affects synthesis of DNA, and pantothenate metabolism. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.32 0.71 -4.67 702 5121 2012-10-10 17:06:42 2003-04-07 12:59:11 10 12 4486 16 1366 3993 2687 170.90 33 49.56 CHANGED LtG.++..l..llTAGPTpEsIDPVRaIoN+SSGKMGaAlApuAtptGAcVTL.l....u.GP.s.s...ls..s....P......s..l.p.hlp.VpoAp-MhpAlh.pt...h......sps......Dl.hIhuAAVADa..RP....tphus.pKI...K.K....t......sst.hs........l..pLhcsPDILt.....pl.u......ttpp...........tt.....h.lV..GFAAE..T.p........slhcpA...cpKLp.....+KshDhIlANslstts.......tG..Fus.Ds...Nplp.ll .............................................................tshpl.llTAGsTpEsl..D..P....V..R...aloNcSSGKhGhAlApsh.hp.t..G.A....p.VsLl.....s.Gs..s..s..............l...s...............P..........t....l...p..hlp...Vpo.u...t.-.M...h.....ps..l....pt..............h.............tpt......-l.hIhsA.AV.uDap..s....................t.p.h....s..s..p.......K..l..................KK.......p.................tspths................lplh...csPD...Ilttls...t.t.t....................tp..............h.llGFAAE....T..p..........sl...phA.ppKL.t.....+.KshDhll.ANs..lst..........s...Fss.spNtlhh....................................................................... 0 450 859 1151 +1757 PF05035 DGOK 2-keto-3-deoxy-galactonokinase Bateman A anon COG3734 Family 2-keto-3-deoxy-galactonokinase EC:2.7.1.58 catalyses the second step in D-galactonate degradation. 25.00 25.00 25.70 25.60 24.80 24.60 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.83 0.70 -5.66 51 889 2009-09-12 20:56:58 2003-04-07 12:59:11 7 2 751 6 148 615 193 280.40 43 93.85 CHANGED DWGTosLRuWlhs.tsGp..llsp+puspGhtplssss............Fcss...LtsllusWhss..sshPVlhsGMlGSRpGWhEAPYlssPsshsslstthttls..s.ttsltIlPGls..ptssss.DVMRGEETQlhGsLs......psshssllClPGTHoKWVplpsGplssFpThMTGELFulLsp+SlLs+s....hssss.sss......AFtpGlppu....spssslsspLFulRuttLLttlssssutuhLSGLLIGtElAuspst..........hhspslsLlGussLsshYtpALsttGhssphls..u-pAshsGLtthsc ..........................................................................DWGoTsLRAWLh..ps.s.p...sL-pRp.u..psGlt+L....sspu.................sssLtclss......c..Wtp.......p...........s........s.P........VlhuGMlGSssGWp.APYLslPAthsulucpLssVs.......................csl..aIlPGls..........hpcs-s...sVMRGEETQLlGAht..................thussulhl.hPG.T.H.sKWVps.-......s...............t....pIscFRThMTGELapLLhp...HSllGsu..........ls.s...p...-...s...ss-........AFss.Gl.cpu....hsssul..LspLFplRAutll.G......sL......spp...psu-aLSGLLIGsElAuhpsh..............hstppsIsLlu.usuLs.uRYppAhpsh..Gh..s.s.stls..uDpAhpsGltplA.p............................ 0 23 71 108 +1759 PF00926 DHBP_synthase 3,4-dihydroxy-2-butanone 4-phosphate synthase Bateman A anon Pfam-B_1148 (release 3.0) Family 3,4-Dihydroxy-2-butanone 4-phosphate is biosynthesised from ribulose 5-phosphate and serves as the biosynthetic precursor for the xylene ring of riboflavin. Sometimes found as a bifunctional enzyme with Pfam:PF00925. 20.90 20.90 20.90 22.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.17 0.71 -5.08 97 4699 2009-01-15 18:05:59 2003-04-07 12:59:11 14 20 3949 37 1250 3399 2646 192.70 49 56.26 CHANGED lccAlpsl+s.GchlllhDD-s....RENEGDLlhuAphsTs-plsFMhppupGllClslstppsccLsLs.Mss..p......Nssth.....................tTuFTlolD.tp...s...ssTGISAtDRAtTI+tLu...........sss..spsp-FhpPGHlaPLhAcpGGVLpRpGHTEAuVDLs+LAGl.pP.suVlCEll..sc.DG.sMt+hs-..lhpaAccasLhlloIp-Llpat .....................................l-cAlpsl+p.G+hVl.V.l.DDE.D....REN.E...GDLIhAAEthTsEpl....sFMh+au..........pGllClsloc-cscpLsLP.MVp......p......Nsssa.....................tTuFT.VSI-...stc...G...sTTGISAtDRupTlcshs.......................sss........upssDhsRPGHlFPLhApsGGVL.pR.sGHTEAuVDLs+LA..Gl.pP......A.GVlCElh............N-...D...........G...o...........MARts-..LhpFAccHsltllTIpDLlpYR................. 0 388 804 1055 +1760 PF00701 DHDPS Dihydrodipicolinate synthetase family Bateman A, Griffiths-Jones SR anon Pfam-B_557 (release 2.1) Domain This family has a TIM barrel structure. 22.10 22.10 22.10 22.20 21.90 22.00 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.74 0.70 -5.81 13 10473 2012-10-03 05:58:16 2003-04-07 12:59:11 17 24 4601 330 2809 7818 5086 284.20 27 95.11 CHANGED thtGlloAllTPFsspGplDhpuhcpLlcapIspG.s-GLlluGTTGEuhsLSh-E+hpllchsVspssG+lsVIAGsGSNuTpEAlchsppupcsGhcusLtVsPYYsKsSpEGlhpHF+sIh.stsslPhIlYNlPuRTusclsPEslt+Lup.hsNllGlK-ssGsh....thppl+ptsscshslaoG...-DspthshhphGucGsISVouNlssthhpshhctspsGchttA.ttlpc+LhPLhchLFtp..sNPhslKsALphlGlssss.hRhPhsPlsp-tptclsshlcthG .......................................................................................................hpGlhsAhlTPF.....s..p..p.....G......p......l......D..h...p...uh.p...p.l.l.c.a...h...l......p.......p..G..s...s.G.l...h..ss..G.o...T.G..Ess..sLo..t...-......E..+.....t...pl...l....c...h...s...h.c...t...s........s.......u...........+..........l.....P.....l...I...A........G...s...G.......s..s...s...........T.p.c....u.l..p....h....s..c.p..A.p.c..h..G....s..D........ull.s.l...s.P...........a...Y....s.........+....s......o.......p......c..u....l...h....p.......H...a...c...s.......l......u.....p....u.....s.........s...l...P....ll.l..Y.....N..l...P.....u.....p...o..u..s.....s.l..s.s..-..s..l..t.c..Lu........c...h........s..s.l.l..ulK...........-....u....s....G....s...l.....................t.p...h...t....p....l.....h........p......t...........h...........s...........s........c..........h....h..l....h..sG..........sDs.h.h.h..s.h..h....t.....h...........G...u..p.......GsIos.....s.....uN..l.....h.P....c...t...h....t...p...h....h...c...t......h...t.....p.......G...c......h...p...p.....A......t.......pl...p..p...p...l.....h....s....l......h....p...h...l.....h.tt..............ss.s...h.......s........l...K.t...s......h.....p.....h.....h........G...h.......s.s..s....h...R.hP....h.s.s..h.sp..p.p.t.t.tltthht...h........................................................................................................................................... 0 799 1639 2295 +1761 PF01368 DHH DHH family Bateman A anon Pfam-B_1245 (release 3.0) Family It is predicted that this family of proteins all perform a phosphoesterase function. It included the single stranded DNA exonuclease RecJ. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null --hand HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.21 0.71 -4.55 150 10761 2009-09-12 04:54:29 2003-04-07 12:59:11 15 57 4504 39 2562 8005 1706 162.10 20 32.36 CHANGED tlppscpl.hlhsch..DsDulsSssslhphlpphsh.pthhhhsspthpt.h...hsttth..............................................................................................................................................................................................................................................t.ttppssLllhlDs.utsshtthpttpth........hpll.llDHH..spp.h................lpsthhs..........hhc.hsuuusuhhlsphl...............................hspphtsllhhGhl ................................................................h...p.ppl..llhGch....D..sDul.sS.s.hs.....h.tp..h..h.....p...p........h........s......t..............p.........s.........h.........h.....h..l..s...p.....p.................s....s......s...................h.sh.h..thtt........................................................................................................................................................................................................................................................................................................................................................................tsslllhVDs...u...t....s.s..p....t...s..s..p...h..t.p..hs...............h.pl.l..llDHH...s.sp.h.....................................ss.hs............hhp..hsuu..usuh.hlh.p....h.l.pt........................t.....p......h.tphhs.llhhGhh................................................................................................................................................................................................ 0 921 1749 2205 +1762 PF02833 DHHA2 DHHA2 domain Bateman A anon Bateman A Family This domain is often found adjacent to the DHH domain Pfam:PF01368 and is called DHHA2 for DHH associated domain. This domain is diagnostic of DHH subfamily 2 members [1]. The domain is about 120 residues long and contains a conserved DXK motif at its amino terminus. 21.20 21.20 21.60 21.60 21.00 20.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.58 0.71 -3.94 126 2132 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 1956 28 508 1459 93 128.40 35 33.02 CHANGED u....h-hhcAtosl.sshosc-llptDhKpFsh.......su....ppluluQlpshshsplhscp..........sclhsthp.phtpcpshDhhllhlT......DIhppsohlLhhu..tptphhppAash.phps.sthhLp..............................GVhSRKKQllP.Lpcsh .....................s.hpMLKAG.o.sl...ss.+.osc-LlshDhKsFph................su.................ppltluQVsslDl.s-lhp+p...................s-lcsshp.th.s..t....p..puh.......ch.hlLhlT..........................DIls.ss....SclLslG.....tstch.....lpp.AFsh.....pl.....p.....s......sp....s.hLs..............................................................GllSRKKQl.VP.Lpcsh.......................................................................................... 0 175 314 423 +1763 PF01180 DHO_dh DHOdehase; Dihydroorotate dehydrogenase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.95 0.70 -5.46 12 6787 2012-10-03 05:58:16 2003-04-07 12:59:11 16 50 4626 118 1697 6540 4232 282.50 29 80.42 CHANGED hLssplhGlcapNPlh.AuGh.t.GpphsphhsLu.hGhl.ltusThpsp.GNPpPRlhchstu..hlNuhGhsN.GhDhlhpclh.hhpc.....................................lsI.hshstss.....h-DYltsscplu..shschltlNlSsPNs..Ghhshtpss-lst.Lspslpchs..................................................psPlhVKluPsls-.shhslAs.ht.tpsLshls....Tlpt.hlDl+stpslhts...tsGGLSGsslKslAlchl+plhphss..IPIIGsGGIpoucDAlEhhhAGAShlQlhTuh.asGPhlss+IhccL.phL ............................................................................................................................................................................s..tsphhGlphpNPlhh.A..u.G..h.s.p.p.u...p.t.h...t...t....h.....h.......s..h.......u...hGtl...htol.T......c..s....p..........G.........N.......s....p..........P..R......h.......hc.......l........s........p.......s...................................h..l.......N..t.h......G..........h.....s......N.............G......h......-...t.....h..hp...p..l..h.t.h...h...t.p...............................................................................sls.l....l.....u....hstto...............h..-..-...Y..h..ts.h.c.p..lt..............sh..s...s....h...l...plN.l....S.s.P.....N.....s...........u....h.t.....s........l..t..........t...s.....s.....p....l......s....p.h.....l....s.p.h...h....p...p..t.s..................................................phPlhl....K..l..u............P........s..l.s..-.......................s.....h...h.........s......h.....A........s...........t.....h.........h....t..........p......s..........l.s..............s.......l...hh.N.o.T..l......s.t....................s.....l...p.......s....h....t.....s......h......t..................thG.G.....l..S..G....t......s.....l.....p..s.....h.......u.......l.....c.......h.....l......p......p......l......t......p........p.....h..............p..............................t...........l......P.......I.....IGs..G.GIpos..cDA...h...E........h...l...h...............A.G.A.o...hV.Ql.......hT....u.h.h...a....p...G.P..t.....lspclhptL.th.......................................................................... 0 544 1054 1421 +1764 PF01761 DHQ_synthase 3-dehydroquinate synthase Bashton M, Bateman A anon Pfam-B_1327 (release 4.2) Domain The 3-dehydroquinate synthase EC:4.6.1.3 domain is present in isolation in various bacterial 3-dehydroquinate synthases and also present as a domain in the pentafunctional AROM polypeptide Swiss:P07547 [2]. 3-dehydroquinate (DHQ) synthase catalyses the formation of dehydroquinate (DHQ) and orthophosphate from 3-deoxy-D-arabino heptulosonic 7 phosphate [1]. This reaction is part of the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.59 0.70 -5.47 95 4669 2012-10-02 14:41:14 2003-04-07 12:59:11 15 21 4283 45 1160 3985 3068 257.20 42 63.32 CHANGED hlslssGEptKo.hpsht.plhstLhpts...hsRsssllALGGGVlGDlsGFsAAsahRGlsalQlPTTLLApVDuSVGGKTulNpshGKNhIGuFaQPpsVlhDsshLpTLPtRE..htsGhAElIKauhI.t.D...sshaphLcpp.tt........................ptpsLp......phI.......t+SsplKA.plVspDE+E.sGlRtl.....LNhGHThGHAlEshtsY......phh.HGEAVAlGMlh.s.uclutthGhh.s.tthp............clpplLpphGL.P...s.shsph.................ss.-pllph...httDKKsp ...........................................thhlPsGEptKs.hp.s.h.p..p.lhst...L.l.pps..h.s.R.s.s..s.llALGGGVlGDlsGFsAAoahRGlc.FlQlPTT.L....L..A.p.VD...S.....S.VGGK.Tu.l...N..p......s....h.....G.............K..N...h...lG...A..F...a...Q..P...p..uVlhDhshL.c.TLPtRE.ltuGh.AE....V.I....K....a.....G.l....I...t.D.....ss.......h.ap.hLE..pph.p.t.h.ht..............................stpsht......phI.................t+.u...s...p.l..K..A........cl...V..s.......p...D...E+E........s.......G........l........R...tl................L........N..h.....G............H...T.........hG.H..AIEsth.sY.......tphhHGEAVAhGMlh.A.u.cl.u.pp...h..G..hh..sptpsp....................+lh..plL.p.p.h.G.L..P......s.s.hsp.....................shpphhp.tMtpDKKs.t.................................................................................................................... 0 401 765 1004 +1765 PF01487 DHquinase_I Type I 3-dehydroquinase Bashton M, Bateman A anon Pfam-B_2492 (release 4.0) Domain Type I 3-dehydroquinase, (3-dehydroquinate dehydratase or DHQase.) Catalyses the cis-dehydration of 3-dehydroquinate via a covalent imine intermediate giving dehydroshikimate. Dehydroquinase functions in the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. Type II 3-dehydroquinase catalyses the trans-dehydration of 3-dehydroshikimate see Pfam:PF01220. 20.70 20.70 20.90 21.20 20.60 19.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.32 0.70 -4.70 150 2260 2012-10-03 05:58:16 2003-04-07 12:59:11 10 37 1982 52 564 1429 154 219.00 32 56.47 CHANGED slslsspshtchhtphpphtt..ss.DhlElRl..Dhlp..................p.ltptlptlppps...shPllhTh..R...................sp........tEGGph.p..hsc....cphhpllcpshchs.......s-alDlElp..hppt.hht......hhhtppsts+lIhSaH.sFppTPshpp...lhp....hhpcht.phGsDlsKlsshspshpDslp.lhphppphp.....thPlIshsMGphGplSRlhsshhGSshoau..thspssAPGQlslpclpphhp ..............................................................lslhspshtph.h.p.p.hpthpph....ss....DllEaRl..Dahtt..................................................shcplhphhtt.l+cth...sphPllaTh..R....................ot........pEGGch...s.....hos.......ctYl....pLh....ctsh....phs..............sD.hIDl..........Ehh...s....t....pc..htt................ltttptpsspllhSpH...sFpcTP.ptcc...............lht...........hlpchp..p.............h.....s.s.D........lsK.lAlMs.ps..p.pDVLs...Lh....photphpp...........sspPl..lshSMuchGhlSRlsuplaGSshTFu.........shpcsSAP.GQlslscl+phh.p............................................................. 0 165 339 475 +1766 PF01220 DHquinase_II Dehydroquinase class II Finn RD, Bateman A anon Prosite Domain \N 21.10 21.10 21.30 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.60 0.71 -4.86 191 2882 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 2604 343 840 2257 2156 139.20 48 92.52 CHANGED hpILllNGPNLNlLGpREPplYGs.pTLs-lppplpp..tApph..u..lplphhQSNpEGpLlDtIHpA.....tsp..scuIlINPuAaTHTSlAlRD....Altul...sl.P...hlEVHlSNlHuR.EpFRH......+SaluslA.tGlIsGhGspGYplALcshhp ..........................plLllNGPNLNhLGpREPplYGs.tTL.s-lppplpp..t..A..p..p..h......u....l.c.lchhQSNpEGpLlDtIHpA....................tsp..scsIlINPuAaTHTSl...AlRD....Altul...s..l..P...hlEVHlSNlHsR..EsFRH......+SalSshA.tGlI.sGh.GspGYpLALpshh.p................. 1 251 526 715 +1767 PF04706 Dickkopf_N dickkopf_N; Dickkopf N-terminal cysteine-rich region Mifsud W anon Pfam-B_5838 (release 7.5) Family Dickkopf proteins are a class of Wnt antagonists. They possess two conserved cysteine-rich regions. This family represents the N-terminal one [1]. The C-terminal region has been found to share significant sequence similarity to the colipase fold, Pfam:PF01114, Pfam:PF02740 [2]. 21.20 21.20 21.20 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.92 0.72 -3.80 13 214 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 54 0 110 172 0 52.00 45 19.55 CHANGED .ChsD--CssspaCptscpp.......thClsCR+p++RCpRDuMCCsGshClNGlChs ....................Ch.Dc-CssscYCptsp.t...........tChsCRc...p+...c+CpRDu.CCsG...phCssGhCh...... 0 11 26 52 +1768 PF05086 Dicty_REP Dict_REP; Dictyostelium (Slime Mold) REP protein Moxon SJ anon Pfam-B_6278 (release 7.7) Family This family consists of REP proteins from Dictyostelium (Slime molds). REP protein is likely involved in transcription regulation and control of DNA replication, specifically amplification of plasmid at low copy numbers. The formation of homomultimers may be required for their regulatory activity [1]. 20.70 20.70 114.70 114.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 911 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.66 0.70 -13.84 0.70 -6.79 5 10 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 5 0 2 10 0 732.20 36 98.23 CHANGED scLIPWcsFhhFalpILE-FpPsKRsp..h..ss-IMsahYSs+sLlhKElFRolls+pph.....scphtDsLDlcoIFc+TSLhsh....spsscsDLDDllpI+Ks.psGKIlVoDsDQAIFIIDHFSRluDsQVFsKKsIsGFRslEKhVScsGYKIcDaRphGlcWFsFLNKlRTuCspachF....h.puhaKalDFISMLoslHsItVDcQN+-EEcLSslYoLYPFl-LE..........pp.s.t.pulsocNtpRpshpPsspsNsTTTTTTTTTTTTspshT+KR+thsspsls.s.t.....ps.tsssts.......................tsphpcpsSRKSGuLK-VRlDsIS...................s..hssssspSlsPs-sIlSlSN+IKsCLlEllpSKGEIso-lVKuIFcsLQsKsYosDLlDSIFsQNKSEKVITlSS+lFsluuKlDaDEI+Fu-lS-DlF-LoKRLoFEKNTNILIPTp-tENpFGFLWlPIVNGssSTSIaLSPsN.ScVsas+I+SllKFIQLCIllu-INsFlolRSIoFDsFKSIosELLGMScRlLsLEsDV+KLKDllsKs...KKKspl-ss-hpusssscscFAsaVaEF.INNNhYIKLSKp-NuLKLs-sPsSoLsVEYDPsoI-HKVGFlFHCRSEISKFssstNhaSINsLlpSFTPNNIsslSQ-sENDLKRKYSLMSSDhScllKssssFlPSNFcRFlsITITNsuYNhN+VFuFcDISsGlSITNLRsIasccScp.cNRYcEYlGcTRlIRAFFhAPCLIQITNlsFso-pLpsD+slsRQIKSIKI+NLSYIPIDIKVGGslIDTIKuspTcsVpINSSEFoFSISCLcIoFSooLISKsKLsNLsTlls-.KYscpTslLpssDKhscLs+sFlscapchN.sLTcLEcaLlupF...tuhcD-ccsEcEcp--DEDpDEsEDE ............pcLlsWppahhaahhlLcpatsscR.p.....ss-IhpthaSschllh+ElFRslhsph........t...DhhchpslFsthsh.........sh.hsLcchhphcKs.psuKlhVsDsDQuIhIlDHhSRlsDpplFsKctlsuaRshEKhlopt.YKlpDhRthGlcWashlscl+otCttpphh.......shac.sDhIuhLohlHslhltpQNc-EpplSthYoLaPFhsLE..............pp...t.pshsspptp+ts.pssp.sssTTpoTTTTTTTs.tshpp+pp.ht..p..............................................h..hsoRcoGuLccVRlssIo.........................sssspp.ssppsIhSlSs+IKshllEsh.sKGElshcsVKulFpsLQsKsYshsLl-slFppNKS-+VITlosphaplssplsa-EI+huclop-lhphu+pl.aEKsTNILlPTps.csthuFLWlPIVsshsSTSlaloP.s.opssasplcuhlKFlpLCIslucIstFls.RSIoa-tFKplsp-L.lsMSpRlLsLEtshcpL+phhppp...+p+ht.pps...p..s.ssp-pchhsalpEF.IssshYIKlS+ppN.sLpLs-sPsSoLslEYsPsoI-HKlGFhaHCRSEISKFsssts.hpIsslhpSFTPsNI+NlS.-pEs-LK+KYuLhsS-hopl.cshs.FhPpsFcRahsIohTNssYNhs+VFuFcDISpGhSITNL+sIatcsopp..ppY.-YhGsTRlI+AFFhAPCLIpIsNhtFsssplhsscslsRpIKSlcI+NhSYlsl-IKVssshlsolKuspTpsV.INSsEFsFSISCLchpFSsoLISKs+Lpplpohhsp.+Ysp.ps.h.phhsphscLhcsFlpphp.hN.plopl-phlhp.a....u.tc-cpsEspc.............-... 0 0 2 2 +1769 PF04562 Dicty_spore_N Dict_spore_N; Dictyostelium spore coat protein, N terminus Kerrison ND anon DOMO:DM04424; Family The Dictyostelium spore coat is a polarised extracellular matrix composed of glycoproteins and cellulose. Four of the major coat glycoproteins exist as a multi-protein complex within the prespore vesicles before secretion. Of these, SP96 and SP70 are members of this family. The presence of SP96 and SP70 in the complex is necessary for the cellulose binding activity of the complex, which is in turn necessary for normal spore coat assembly [1]. The function of this region of these proteins is not known. 22.30 22.30 27.90 27.30 22.20 21.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.53 0.71 -3.82 9 43 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 4 0 43 47 0 117.80 32 20.62 CHANGED h-C-s.hsEspCcus.s.ChhLsasuCCGcpphhCscsstNsChssslSCh+sspospIhEhWSsC.hPpc......sFp.a..PsptoCss...hpCpspGhsCcaspsssChsTSCCPchspChstss .......h.sCEs..hspspCpsshs..Cpals..ahuCCG.cppthCls.ss..........p...s.....sCp......p..s..s..l...sChps..spsspIaEhWS.s...C.pP.pp......Gap.a..P.sstoCsp...LsCpspG.hsCpas.p........sssC...huTS.CC.sthstCss..s... 0 34 43 43 +1770 PF00186 DHFR_1 DiHfolate_red; Dihydrofolate reductase Finn RD, Griffiths-Jones SR anon Prosite Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.07 0.71 -4.65 89 5237 2012-10-03 00:23:32 2003-04-07 12:59:11 14 18 4170 438 1010 3895 1205 156.60 33 84.23 CHANGED hlslIsAhscNtlIGpssplPW+LPsDLpaFKp..hThG+sllMGRKTa-Sl.s....+..PLPsRpNIVlT.Rs.s..hp.....t.s...s...lhpSlppAl......t.ht....ppt......p-la.lIGGuplYpthls..hA-...clhlTclc..tph.p.GDsaFP.pls..spWphspppttt..spp...sshsasFhtapR .............................................lshI.s.A.h.....s.....p.....s.....t....lI....G......t......p......s......p......lP..W.....+......L.....P......s....D.....h...t....a.F...Kp...............h.T.............h.............s.................p...........s.......l.......lM.GR+TaE.....S.ls................+...PL.Ps.R...pNlVlo+sss................hps.................p..sst............lhp......S..l-.-...ul..............................t..ht......tp.......pcla.lI...........GGup.l.Yp.ph..l..s.........h....sc.............clal....Tc..Ic.............sph....-.....GD...s....a....F.........P....p...h......s............s......p.......ac...s...t...p...p....h............................tp.......sshsapa.hh.+...................................................... 0 312 602 830 +1771 PF02966 DIM1 Mitosis protein DIM1 Griffiths-Jones SR anon Structural domain Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.64 20 463 2012-10-03 14:45:55 2003-04-07 12:59:11 11 4 318 9 303 648 68 123.00 59 88.41 CHANGED hLPHLpoGWcVDQAIloEc-RlVVlRFGR-pDstCMphDElLhpluE+l+NFAsIYLVDlD..cVPDFNpMYELh....DPsTlMFFaRNKHMMsDaGTGNNNKlNaslcDKQEhIDIlEslYRGApKGKGLVlSPhDY ......................................hLPHLpsuWpVDQAIlo.E-..-Rl.VV..............IRFG+...Dh.......D.ss........CM.phDElLhpl.....A-.+.....V.KN.F..AVIY..L..V..DIs..cVPD.FNpMYELY....DPsTVMFFaR.NK.HhM.lD.h.GTG..NN.NK.lNW.shcDKQEhIDIlE........TlYRGA.+KG+GLVlSPKDY..................................... 1 94 161 241 +1772 PF05163 DinB DinB family Bateman A anon COG2318 Family DNA damage-inducible (din) genes in Bacillus subtilis are coordinately regulated and together compose a global regulatory network that has been termed the SOS-like or SOB regulon. This family includes DinB from B. subtilis [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.82 0.71 -4.55 14 1291 2012-10-02 14:44:17 2003-04-07 12:59:11 7 1 629 11 408 2210 324 161.30 18 97.05 CHANGED Msppsh.........phhpa.thsssphhchltsLss-phs...tc.spshholtphLsHlhtschhWlsthph...........ttsph.ppsptcshphhcshppshspthhshhpphs-....ttcphh..s.tsstshphshsplLhHlhsHpsHHRGQlsshlRphGhsss.hshhhh...t................t.ch ..........................................................................................................thhpa..th.s.spp...h.h.p.h.hp.p..L....s..p..c.p..hp............pp.h..s.s.h.h.t.oltcshh...H...lh...s.s.....c.h.h...a..l.s.t.hps.....................................tt....s..h........p...t...h...........p.....p.....s...h....p....p.....l...c...p....h....h..p.....t...h......s.....p...p...hp.....p..a..h.s..p..h..sc.............t..t.h.hhh...........pt.s...s..h....t......s..h...p....h.......s.h...t..p.........h....l...t..H...l.h...s....Hps.HHRGQlss.h...lR.p.hG.h.s.s.s.s.hsh...............tt.................................................................................................... 0 165 288 348 +1773 PF00775 Dioxygenase_C Dioxygenase; Dioxygenase Bateman A anon Pfam-B_1018 (release 2.1) Domain \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.26 0.71 -5.03 20 2905 2012-10-02 19:08:27 2003-04-07 12:59:11 16 16 1090 387 1080 2726 434 159.70 27 65.12 CHANGED l-GPhYlss...APptss.huchs.t..ss.GpslhlpGpVpDtsGcPlsGApVElWHAsscGhYShasss..ts.paNLR..GpllTDs-GpYchpTlhPusYshPspGsstthlp..hGRHshRPAHlHahVoAPGa.cpLsTQlahpGDsalt.sDhshu.....s+spLlhshpptsss...................................hpchpachsLssp ...............................uPha....t..........................................ts.Gp.l...hlp...GpVh...D......t.....s...G...p...P.l.ss....u..hl-lWpA...su.sG..t....Y.s...t........h...........s..............s........s..................................t.......s........c..................s..........h...c.....................G+h..h..T..D..s...s...G..p..a.p.F..p.....TlhPu..sY.....sh..s........................................t.pshRss....H..lHh...l............u...uh....ptLhT....p....hah..........s..-..hh....ss.h.s......hhttLl.......t...........................................h.........t.................................................................................. 0 252 577 855 +1774 PF04444 Dioxygenase_N Catechol dioxygenase N terminus Kerrison ND anon DOMO:DM04160; Family This family consists of the N termini of catechol, chlorocatechol or hydroxyquinol 1,2-dioxygenase proteins. This region is always found adjacent to the dioxygenase domain (Pfam:PF00775). 20.90 20.90 20.90 21.60 20.80 19.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.31 0.72 -4.00 19 910 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 500 43 410 899 78 74.30 31 24.82 CHANGED ssRs+pIspcllpcLacsIc-hclT.-EahsulsaLschGp.....ptEhsLLussLGlEH..shthDstptps.uhpG..GT ........ssRh+plhppLlpcL+shlc-hclT.sEahtulpaLscsGp...................+pEhhLLuDsLGlpp.....hlDthpttt................................. 1 78 193 315 +1775 PF01866 Diphthamide_syn Putative diphthamide synthesis protein Enright A, Ouzounis C, Bateman A anon Enright A Family Swiss:Q16439 is a candidate tumour suppressor gene [1]. DPH2 from yeast Swiss:P32461 [2], which confers resistance to diphtheria toxin has been found to be involved in diphthamide synthesis. Diphtheria toxin inhibits eukaryotic protein synthesis by ADP-ribosylating diphthamide, a posttranslationally modified histidine residue present in EF2. The exact function of the members of this family is unknown. 28.70 28.70 29.50 29.10 27.90 28.10 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.11 0.70 -5.18 57 985 2009-01-15 18:05:59 2003-04-07 12:59:11 12 19 467 4 687 991 178 247.50 26 64.82 CHANGED hPEGLhhhuhpluchL..cp....sst.....sll.GDssYGACsls-hpu.ptlssDhllHaGHosls......ht..h.slal.sph...plDsp.chlcplhpph.......pplullsTlQahptlpplpphLpp.......p...hhlshsps+..htsGpVLGCshssh...........pshlalGsGpFHh.uhhl.u....sph.shthDPa....shshc...p.-tschl+hRhttIp+At..sA..+paGlIluohssQtp.clhcplpphlcp...GhcshllhhsclsPscLtth.s...lDsaVpsACPRlulD.sttFp+Pl.LTPhEhplslstpp.......tpY.hD.h ........................................................hP-tlh..u..lsp.l.....pp..........tt.p.......................hhlhuDs..sY.G.uCClD-hs.A........p........ths.sDhllHa..G+uCL.s........................t.t.l.slalh.sph......t.lc.....ht...thh.p.p.h.pt....t.h......t.t................ppls.lhssh..pa.....t..t...l.pt.h.tt...Ltt...................................t......h.......s......................t..p....l...h...sp...p...............................t.hha.l..us....s..hp......h...............s........hhl.t.....sth.....hh......th....s...Ph...............hp.....h......p....................phtthhthRhth.ltp.........up............p.A..................phhGll.luoLuhps..p.h.hpplp.phl..pt........t...G..h..p............hhlhhucl.s..sKLs.......h.............pl....-saV.luCP..c..h..s....l..........s..............s.....t..F.....+..Pl..loPhEh.lslt......................................................................................................................................... 0 245 397 568 +1776 PF02763 Diphtheria_C Diphtheria toxin, C domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain N-terminal catalytic (C) domain - blocks protein synthesis by transfer of ADP-ribose from NAD to a diphthamide residue of EF-2. 25.00 25.00 31.70 106.20 24.10 17.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.23 0.71 -4.85 3 23 2012-10-01 23:25:29 2003-04-07 12:59:11 9 1 10 13 1 35 0 169.70 98 32.48 CHANGED GADDVVDSSKSFVMENFSSYHGTKPGYVDSIQKGIQKPKSGTQGNYDDDWKGFYSTDNKYDAAGYSVDNENPLSGKAGGVVKVTYPGLTKVLALKVDNAETIKKELGLSLTEPLMEQVGTEEFIKRFGDGASRVVLSLPFAEGSSSVEYINNWEQAKALSVELEINFETRGKRGQDAMYEYMAQACA .GADDVVDSSKSFVMENFSSYHGTKPGYVDSIQKGIQKPKSGTQGNYDDDWKGFYSTDNKYDAAGYSVDNENPLSGKAGGVVKVTYPGLTKlLALKVDNAETIKKELGLSLTEPLMEQVGTEEFIKRFGDGASRVVLSLPFAEGSSSVEYINNWEQAKALSVELEINFETRGKRGQDAMYEYMAQuCA 0 0 1 1 +1777 PF01324 Diphtheria_R Diphtheria_tox; Diphtheria toxin, R domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain C-terminal receptor binding (R) domain - binds to cell surface receptor, permitting the toxin to enter the cell by receptor mediated endocytosis. 25.00 25.00 27.80 81.20 19.60 18.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.82 0.71 -4.66 3 23 2009-09-10 22:55:25 2003-04-07 12:59:11 14 1 10 12 1 20 0 138.70 95 26.57 CHANGED SPGHKTQPFLHDGYAVSWNTVEDSIIRTGFQGESGHDIKITAENTPLPIAGVLLPTIPGKLDVNKSKTHISVNGRKIRMRCRAIDGDVTFCRPKSPVYVGNGVHANLHVAFHRSSSEKIHSNEISSDSIGVLGYQKTVDHTKVNSKLSLFFEIK SPGHKTQPFLHDGYAVSWNTVEDSII+TGFQGESGHDIKITAENTPLPIAGVLLPTIPGKLDVNKSKTHISVNGRKIRMRCRAIDGsVTFCRPKSPVYVGNGVHANLHVAFHRSSSEKIHSNEIsSDSIGVLGYQKTVDHTKVNSKLSLFhElK 0 0 1 1 +1778 PF02764 Diphtheria_T Diphtheria toxin, T domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain Central domain of diphtheria toxin is the translocation (T) domain. pH induced conformational change in this domain triggers insertion into the endosomal membrane and facilitates the transfer of the catalytic domain into the cytoplasm. 25.00 25.00 28.40 27.40 24.40 24.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -10.95 0.71 -4.50 3 24 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 11 12 1 32 0 178.30 91 28.97 CHANGED SCINLDWDVIRDKTKTKIESLKEHGPIKNKMSESPNKTVSEEKAKQYLEEFHQTALEHPELSELKTVTGTNPVFAGANYAAWAVNVAQVIDSETADNLEKTTAALSILPGIGSVMGIADGAVHHNTEEIVAQSIALSSLMVAQAIPLVGELVDIGFAAYNFVESIINLFQVVHNSYNRPA ....................................SCINLDWDsIRDKTKTKIESLKEHGPIKNKMSESPNKTVSEEKAKQYLEEFHQTALEHPELSELKTVTGTNsVFAGANYAAWAVNVAQVID.SETADNLEKTTAALSILPGIGSVMGIADGAVHHNTEEIVAQSIALSSLMVAQAIPLVGELVDIGFAAYNFVESIINLFQVVHNSYNRPA................. 0 0 1 1 +1779 PF00200 Disintegrin disintegrin; Disintegrin Finn RD anon Prosite Domain \N 19.60 19.60 19.80 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.80 0.72 -11.29 0.72 -3.76 154 2414 2009-09-12 09:20:59 2003-04-07 12:59:11 18 51 294 43 1025 2300 5 73.50 43 11.29 CHANGED EtGE-CDCGs.cpC..p...ssCC.........sstsC+Lp..sGupC....up.G.CCp..p.Cph..pssGslCRtsp.s-....CDlsEaCsGpSspCPsDhat .........................EtGE-CD.CG..s.....p.p...C..p........ssCC..................ss..sCp.L...c.........su......u.p......C..............upG.C.C..............c..p..C..p.a......ps.s..Gs.hCRtst...s-.....CDlsEaCsG.p..S..s..pCPssha.h......................... 0 211 280 525 +1780 PF05141 DIT1_PvcA Pyoverdine/dityrosine biosynthesis protein Bateman A, Eberhardt R anon COG3207 Family DIT1 is involved in synthesising dityrosine [1]. Dityrosine is a sporulation-specific component of the yeast ascospore wall that is essential for the resistance of the spores to adverse environmental conditions. Pyoverdine biosynthesis protein PvcA is involved in the biosynthesis of pyoverdine, a cyclized isocyano derivative of tyrosine [2,3]. It has a modified Rossmann fold [3]. 23.00 23.00 23.70 23.90 22.60 22.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.79 0.70 -5.09 33 360 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 254 4 199 317 7 262.50 32 49.15 CHANGED plptFlppscPIchlLPAFPsKSP...Np.pKVlGsLPDhuEclALppLsshspcIpplYsPGAclhIsSDG+VFuDlluVsDcslssYspsL+pLhpplu..........hsp.lpahsLpDl........................tth.th.hssphsthRphLhppasssstsl...cpplps...scsththYRGhs+FLh-D....hhs.t.shS+pth+KsspphAhsMIpRupAausLlttpFPptlRLSIHspsssu.KhGIplh...................Pspcsh...hTPWHusssphs....suphhhhp+schtp..shcllphsG ..................................lptalsptp.lchlLPAFPsKSs...N..pKVLGphPDhuEcluLppLsphspcIp.plYs........PGs........p........lhIhSD.G+VFu........Dhl.ul.sDpcl.sYsctL+phhtpht..........hsp.lphhsLpch..........................cphst.h..phstph-thRchLhst.hss.ss....t..l....pppltt...spctthhYpGhp+Fh.pD......h....tt...s.h.S....ppth+.+pspplAhtMlhRu...............pAaushlp......ppFP.ctlRLSIHsp.........sstu.Khulplh..............................sppcsa.....hTPWHssll..s....cGp.hhhhp+pph.p..shp......h................................................................................... 0 23 103 165 +1781 PF04977 DivIC Septum formation initiator Bateman A anon COG2919 Family DivIC from B. subtilis is necessary for both vegetative and sporulation septum formation [1]. These proteins are mainly composed of an amino terminal coiled-coil. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.42 0.72 -4.49 70 4922 2012-10-02 13:28:50 2003-04-07 12:59:11 10 6 3891 0 944 2787 1568 78.40 20 63.49 CHANGED lhshhhhhhhhhttshhshhphcpclsphppphppLppcpppLcsclcpL.p...sp-hlcchARpcLshscssEhhaplspp ..................................hhhhhhhhhhh...h.p...sh.t...ph....h...p.hp....p....p...l...s...p..hpp....php...plp..p...cpppL.......p.......s-lpcL.s..........s.p-...hl.cc.hARpc.huh..s..p..s.s.Ethapl...p.................... 0 323 617 794 +1782 PF05103 DivIVA DivIVA protein Bateman A anon COG3599 Family The Bacillus subtilis divIVA1 mutation causes misplacement of the septum during cell division, resulting in the formation of small, circular, anucleate mini-cells [1]. Inactivation of divIVA produces a mini-cell phenotype, whereas overproduction of DivIVA results in a filamentation phenotype [1]. These proteins appear to contain coiled-coils. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.60 0.71 -4.10 99 3249 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1865 6 551 1653 105 105.00 27 54.00 CHANGED lTPhDIppKcF+psh..R.GYcp-EVDcFLDplhcDYEslh...+EspcLccclppLcpclppapphcpslppsllhApphu-chKtsApccuphllppAptpAppllt...........pu.tph.pltpph-.pLK+pspla+ppap .................hosh-IhpKpF.p...p.th....R...G..YctcEVDpFLD.pl.......hp-a.......-phh.....................pchc....p.......Lc.........p....cl.....p...pLc.......p.......c.......l.............pp..........h.p.........p....................................t....................................................................................................hh.........s..t......t.t...h.....pup.......tstt........................................................................................................................... 0 199 374 482 +1783 PF00778 DIX DAX; DIX domain Marcu O, SMART anon Alignment kindly provided by SMART Family The DIX domain is present in Dishevelled and axin [2]. This domain is involved in homo- and hetero-oligomerisation. It is involved in the homo- oligomerisation of mouse axin Swiss:O35625 [3]. The axin DIX domain also interacts with the dishevelled DIX domain [4]. The DIX domain has also been called the DAX domain. 21.40 21.40 21.80 21.60 21.20 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.79 0.72 -4.26 25 558 2009-09-11 10:36:07 2003-04-07 12:59:11 12 24 114 18 287 508 1 77.40 42 11.79 CHANGED sspTKllYa.lscEpsPYhlplstsst.lTLtcFKshl.s+p..sY+aaFKshDsDFu..sVKEElh-DsshL..PsapG+llua...lhss- .............................s.tphhVhYa..h.....stE.....hPYhspl.ht.s.....lTLucFKphL...s+p..ssY..+aaFKs.....hss-Fs....sVpEE.lh-Ds.shL..PsapG.+lluhl.p..t......... 0 70 98 185 +1784 PF01738 DLH Dienelactone hydrolase family Bateman A anon Pfam-B_757 (release 4.2) Domain \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.57 0.70 -5.06 17 4591 2012-10-03 11:45:05 2003-04-07 12:59:11 13 32 2059 10 1757 12590 4895 210.60 23 80.54 CHANGED hsualutPsss.....hPslllhp-laGls...s.h+thsppLAptG.ahulsPDlYhRpu........................psschspttphhtthhpph..tplhscl........sslsahcups.hpstclGllG.aChGGthuh.hAups..hlcusluaYG........h..sp..lthssplpsPlhhthGppDphlssps...hpplppshttsssshplchYssAuHuFtsstss...........sasstAupcuWp+hhpahpphh ........................................................................................................................................................................sahs.hPttt......t..hP........u.l.l.....l........l........p............-.......h......a........G.lp..............sth.c...p.....h....u....c....c..l....A......t.......t....G....Y..h..s..l.......s........P..D....h...a.tpps.............................................................t.p..h.s.p.t..t...p..h...h...p...t..h...h..t......p...h.................s...p....h..hs.ch..............................s.s.l...s...h..l...t...p.........p.....s.........................s......s....t.+.....l.uhsG.........a.C...........a.G.........G........p....l.........s.........h.........h.........h............A..............s...............p..................s........s.............p............l..........p..............u..........u.........l.........s.........a....YG......................................................p..t...s...h...t.......s...s..p....l.....p....s........P....l....l.h..h..a.u...t..p...D...s...h...h..s...p.t.............h.t.t....h.c.......p............t..L......p.......t.......s....s.......s....p........h.....p...h..h.....h...Y...s...s....s...s.H......u.F.ss..s..tps.............................................pY.s..tt..u.......u..c...p...u.a.pch.ltahpph.h....................................................................................................................................... 0 494 1027 1453 +1785 PF04914 DltD_C DltD C-terminal region Kerrison ND anon Pfam-B_6216 (release 7.6) Family DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis [1]. This family consists of the C-terminal region of DltD. 23.80 23.80 23.80 24.10 23.60 23.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.60 0.71 -4.28 36 1136 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 1012 6 85 607 7 128.70 41 31.78 CHANGED pFGIcsp.aYcp+lKp.plp..+LKspp+phsYhp.SPEasDhQLlLsphscppscVlFlIsPVNuKWhcYTGlspchhpphscKI+hQLpsQGFppIsDhSccuscsYFMpDTIHlGW+GWlthDctlpsFhppp .....................paGIcst.aYpp.plKp.plp..+h.K...spp.c..p...s.Yhp.SPEYsDL.QLlLsph..pcspscslFlhsPVNs.+.WhDYsGlsc-hhptshcKI+.QlpspGa..pIsDhSccsh-tY...Fh...pDTI.HLGW+...GW.lthDctlp.Fhpp....................................... 0 18 38 63 +1786 PF04918 DltD_M DltD_central; DltD central region Kerrison ND anon Pfam-B_6216 (release 7.6) Family DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis [1]. This family consists of the central region of DltD. 20.70 20.70 20.90 20.90 20.60 20.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.91 0.71 -4.05 9 1116 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 994 6 79 595 1 162.20 33 39.87 CHANGED lGpsGSsSLsHahshsuhhspLKsKKhlaVlSPQWFstpGlspssFpthhSssQlhpahhpps.ssptcphhAKRLLpFp...scsshpphLcplAsspphsstshh.ltp.tp....hhc+p-ulaSphshs.....s.ppplp+hhKtLPcshSappLsplAscpGp+pTssN .....................LGpuGopSLsphhshpuh.hspLc..sKKsVallSPQWFo.p.pG.hssssFsthaSppQhhpaltpps.ssphc+hhApR...LLph........cs.t...hKph...Lp.plu...p...s....pphs........p...ts....t...thh....th.tp........hLp+p-....s..hhS.Fshc.......s..ptl.p...+h..h..pt....L...s.cphSapp.lcphApppuctpTssN............................................................................................ 0 17 35 57 +1787 PF04915 DltD_N DltD N-terminal region Kerrison ND anon Pfam-B_6216 (release 7.6) Family DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis [1]. This family consists of the N-terminal region of DltD. 21.20 21.20 23.00 24.50 20.50 21.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.99 0.72 -4.11 32 1047 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 949 6 72 531 0 62.30 44 15.28 CHANGED cplcpuAsolossshKGphlKpcAl..ppspYlPhaGSSEhsRhDshHPSVhAc+Ypc.sYpPFL .........shccuAsShotpshK..uphlps+Al..pss+Y..lPhaGSSEhtRhDshHPulhhcKasc.sa+PFL... 0 12 28 50 +1788 PF03474 DMA DMRTA motif Ottolenghi C anon Ottolenghi C Motif This region is found to the C-terminus of the Pfam:PF00751 [1]. DM-domain proteins with this motif are known as DMRTA proteins. The function of this region is unknown. 20.30 20.30 20.30 25.00 20.00 20.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.66 0.72 -4.10 5 284 2012-10-01 23:03:33 2003-04-07 12:59:11 9 4 122 0 123 261 1 38.10 53 10.22 CHANGED sRoPIDlLt+lFPp+KRoVLELlL+sC+GDllpAIEshL ......RsPlDlLs+lFPsp+RosLEllLptCsGDlVpAIEplL..... 0 28 39 77 +1789 PF00885 DMRL_synthase 6,7-dimethyl-8-ribityllumazine synthase Bateman A anon Pfam-B_1503 (release 3.0) Domain This family includes the beta chain of 6,7-dimethyl-8- ribityllumazine synthase EC:2.5.1.9, an enzyme involved in riboflavin biosynthesis. The family also includes a subfamily of distant archaebacterial proteins that may also have the same function for example Swiss:O28856. 20.60 20.60 21.70 21.20 20.30 19.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.59 0.71 -4.53 104 4443 2009-09-10 17:03:43 2003-04-07 12:59:11 14 14 3970 450 1238 2833 2169 140.60 42 89.31 CHANGED sssh+lulVsucaNptIscpLlpGAlptlpptG.s..tplslhpVPGAaElPlsspplu.cs.s....c.......aDAllsLG.sVI+GsT.Ha-aVuspsspGlh..pl.....uLcpslPlshGVLTs.csh-QAlcRuuh..ptt...sKGtEAAtAulchlsl...hcpl .....................u..sh+luIVhuRF..Nph..IsspLLpGAl-sL.p.+.tG...l....p....p....c....sIslshVPGAaElPlsupplA..co..s..........c....................................aDAllsLG.sVIRGsTsHF-YVsspsupGls..pl.....u.l.c.....s.s.l.PV.h.F.G.VLTs.-ohE.Q.A.......lERAGs........KsG.....NKGsEAAhsAlEMhslhct.h.................................... 0 390 794 1053 +1790 PF04976 DmsC DMSO reductase anchor subunit (DmsC) Bateman A anon COG3302 Family The terminal electron transfer enzyme Me2SO reductase of Escherichia coli is a heterotrimeric enzyme composed of a membrane extrinsic catalytic dimer (DmsAB) and a membrane intrinsic polytopic anchor subunit (DmsC) [1]. 19.60 19.60 19.60 19.60 19.30 19.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.70 0.70 -5.14 6 1995 2012-10-01 19:35:38 2003-04-07 12:59:11 7 11 916 0 169 1024 199 261.40 35 93.70 CHANGED Ms.GhaEhPLVhFTVhuQsslGual...lhullhltucspt..+ph..lltuMFllLsLlGlGFIAShhHLGpPlRAFNuLNRlGuShLSNEIAuGuIFFuluGlaWLlAlltKhssuLuplahllohllGllFsahMspVYp.IcoVPTWaouaTshuFaLThlhuGssLuhuLLpstpls.shshphLshLssLu.................sslsusVslhQGhsLpoIcoSsQpAssLVP-a.........AsLpshRhlLLslshshLlhshhhhc.sslslL.lluhlLlLAGEhIGR...........sLFYGL ....................................................hHEhPLllFTVhuQsslGuhl......V.h..u...l..u..h...h.........t.u..ph.....p.....s.p...................ppt.........lht....s.h.h.h.lh.l..L....h.u..l.....Ghl.AShhHLGpP.......hR..AaNuL..s....+.l.G.uS.hLSNEI....s....u.....u...ol......F......a.A....l.u....Gl....h......a..........L...l..s.........h...........l.......t.....+....h.......s.....s....u....l..t.....p...l.......h.h..l....l.......s.hl...lG...l.l....F...V.a....hMs...pVYp....l.s.TVPTWtss.aTs...hsFa.....lT.....s.h.....l..s...G..s..l...L.u...h.......h......L....l......t.h......u.t.l.......shs....h..p..h...h..s.h.l.s.s.LA..................................lls.ssh..h..........h...h.....p.u....h.p.....l...s...s.....l..c....o....u.l..p...t.u..s.t..L.h..s..s.a.........................................u.tl...sh+...l...l.....L.....l...s..s..uh..s....h...h....l....s..s.......h....t.......t...h...........p.......s....t.......s.s..hL........h...u..h....l.Lllu..GEhlGR...........hlFYs................................................................................................................................. 2 45 97 139 +1791 PF03989 DNA_gyraseA_C DNA gyrase C-terminal domain, beta-propeller Yeats C anon Yeats C Repeat This repeat is found as 6 tandem copies at the C-termini of GyrA and ParC DNA gyrases. It is predicted to form 4 beta strands and to probably form a beta-propeller structure [1]. This region has been shown to bind DNA non-specifically and may stabilise the DNA-topoisomerase complex [2]. 20.40 4.10 20.40 4.70 20.30 -999999.99 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.96 0.72 -4.66 362 37895 2009-01-15 18:05:59 2003-04-07 12:59:11 8 34 4630 51 7547 27731 14770 47.70 22 27.63 CHANGED cp..llhhTppGhsh+hsh................................................schpt..hsRsspGlpsh........plpcs..-plhshhhss ...................................................................................llhhTppG.h.sh..R.h..sh..............................................................................................................schpt.....hs.R.s.sp.G...l.psh...............plp..cs.....-plhsh....t............................................................. 0 2464 4959 6375 +1792 PF00204 DNA_gyraseB DNA_topoisoII; DNA gyrase B Finn RD, Griffiths-Jones SR anon SCOP Domain This family represents the second domain of DNA gyrase B which has a ribosomal S5 domain 2-like fold. This family is structurally related to PF01119. 20.70 20.70 20.70 20.70 20.30 20.60 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.15 0.71 -5.05 99 18153 2009-01-15 18:05:59 2003-04-07 12:59:11 20 41 10023 20 2024 14069 4290 160.20 42 30.69 CHANGED Glpcalchhsps.........................h.hpstt..............ps....hpl-lAhtas.s..s.s...c.plhSFs..NsIsThcGGTHlsGhcsuls+slspascpp.phh......ppt.t....plsscDl+cGLssll.sl....+lssP..pF-GQTKp+Lssspsp..shlpph..lp-pht.phlp..css....phscpllppshtttc....sRhs..s+ps+ch ................................................................................GlpsFVcaLNps....K...psl.............hsp..........lhhhps-p..................................cs........lsVElA.hQWN..-....uap.......E..slhsFs...NNIsp+-GGTHlsG.FR..sALT.......Rs.lNs.Yhc...cp...shh...............Kc...t.c......sloG.-DhRE.G.LsAVl.SV....K.ls-P......pFp.uQ.........TK.sKLssoEs+.................shVp.p.h.hs-...tls.paL.....cNP................p..A+hllpKhlpAAp...........AR.A....A++AR-........................................................ 0 674 1283 1691 +1793 PF00986 DNA_gyraseB_C DNA gyrase B subunit, carboxyl terminus Finn RD, Bateman A anon Pfam-B_332 (release 3.0) Family The amino terminus of eukaryotic and prokaryotic DNA topoisomerase II are similar, but they have a different carboxyl terminus. The amino-terminal portion of the DNA gyrase B protein is thought to catalyse the ATP-dependent super-coiling of DNA. See Pfam:PF00204. The carboxyl-terminal end supports the complexation with the DNA gyrase A protein and the ATP-independent relaxation. This family also contains Topoisomerase IV. This is a bacterial enzyme that is closely related to DNA gyrase, [1]. 21.10 21.10 21.10 22.00 21.00 20.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.08 0.72 -4.22 72 9044 2009-01-15 18:05:59 2003-04-07 12:59:11 16 17 5005 38 1681 6230 3303 62.90 53 9.63 CHANGED ttppphpIQRaKGLGEMsscQLWETTMsPcsRpLlpVplc..D....ttp..scchhshLMGc.cspsR+paIp ................s..ppthslQRYKGLGEMNs-QLWETTM.......cPcsR..pLlpVs..l..-....D....................uhp........A-ph..hshLMG-.cVEsRRpFIp......... 0 548 1095 1410 +1794 PF03603 DNA_III_psi DNA polymerase III psi subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.20 20.20 20.30 20.80 19.50 19.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.70 0.71 -4.28 26 745 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 743 5 76 295 13 127.30 54 93.11 CHANGED M.s+RDhhLQphGIoQWpL++PphLp..G..phslsLsscsRLllVusphPptsps.LlpDlL+SlpLs.sQshplss-plshLstpph.phhWhlGscpsth.htu...........tpL........pSPtLspLpssspsKRsLWQQIssa ............MTSRRDW.LQQLGITQWsLRRP...u.ALQG.............EIAIul.PsHVRLlhVAs-LP.uLs-P..LlpDVLRALsloPD..QVLpLTPE+lAML..Pp..so+..CNSW.RLGsD....pPLs....LcG...........AQl..............sSPAL.s-LcuNssARtALWQQICsa........ 0 6 25 53 +1795 PF01653 DNA_ligase_aden DNA_ligase_N; NAD-dependent DNA ligase adenylation domain Bateman A anon Pfam-B_1334 (release 4.1) Domain DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor [1]. This domain is the catalytic adenylation domain. The NAD+ group is covalently attached to this domain at the lysine in the KXDG motif of this domain. This enzyme- adenylate intermediate is an important feature of the proposed catalytic mechanism [1]. 19.00 19.00 19.00 19.00 18.90 18.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.85 0.70 -5.48 15 5286 2012-10-02 00:43:09 2003-04-07 12:59:11 13 57 4462 24 1089 4137 3767 299.60 38 46.15 CHANGED ppsppclpcLpchl+pasYcYYVhDsPhlsDu-YDpLhpcLppLEpcaPELlTPDSPTQ+VGuussputFpplpH.sPMLSLDNAFspc-LpsahcR....lpcthsppsp....aslE.KIDGluluLhYcsGlLs+AsTRGDGphGEDVTpNl+TI+sIPlplshs.......tplElRGEVahsKcsFtpLNpph.cpsccsFANPRNAAAGSLRQLDPclTA+RsLphhsYulu..ps.phshpTQhtsLphL+phGFs.Vspphphsc...sh--VlsahcpapccRcsLsa-IDGlVlKlsplshQccLGaTu+uPRWAlAYKFPAc ...........................................................................t...ppchpcLppplppash.tYY..s...D....s....P..plsDuEYD.pL.h.c.cLtpLEppa....P.......-....h................h.s.......s.D.....S............P....T.p...RVGh...s.....s.l...........p.u.........F.......pplpHthPML..SL..s..s.....s......F..........s..........c.............c.........-..l..p..sF.pR......................lp...c........t...l..........s......p......s....p.............ahsE.KlDGLAloL..hY....c.....s......G.......h.............Llpus.TRG.D.G.p.tGEDlT...p..N.l.........+T..I......c..s....I.Ph.pLptt................................PtplEVRGElahs+psFtpl.N..............tpt.t......p...............p......G............p..........p...............h.....F......A.NPRNAAAGSL..RQ.....L..............D...s......p........l.........o.........A..........p...........R.........s........LshFsYu.....l..............u..................................p...................s...........................................t........................s......o..............p...pt....Lp.hL..p.p.......hGF.........s.s....s....s.t....h....p....h....s.p......................s..h..--.....l..h.p.....ah....pch..............t........p......p...........R......s...........s..L.......sa-I..DGlVlK.VsclshQc.pLG..a........s........u.+.....u.PRWAlAaKFPAp..................................................................................... 0 368 717 925 +1796 PF03120 DNA_ligase_OB NAD-dependent DNA ligase OB-fold domain Bateman A anon Pfam-B_1334 (release 4.1) Domain DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor [1]. This family is a small domain found after the adenylation domain Pfam:PF01653 in NAD dependent ligases [1]. OB-fold domains generally are involved in nucleic acid binding. 25.00 25.00 27.00 25.40 24.30 23.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.55 0.72 -4.32 23 5111 2012-10-03 20:18:02 2003-04-07 12:59:11 11 59 4425 11 1047 3928 2199 82.00 46 12.29 CHANGED s.TpLhDlplpVGRTGs.lTPlApL-PVpluGosVopATLHNtDhIcc+DIRIGDpVllcKAG-VIPcllpVlh..-pRssssps ............t.hTplpslchpV.GRTGs.lTPVApL.............c......P.......Vpl.....uGssV....opATLHNhDp.I...........c............c............h...........Dl.+..l......G.DpVl.lp+AGDlIPcllpVlh..pcRstpt............. 0 351 681 885 +1797 PF03119 DNA_ligase_ZBD NAD-dependent DNA ligase C4 zinc finger domain Bateman A anon Pfam-B_1334 (release 4.1) Domain DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor [1]. This family is a small zinc binding motif that is presumably DNA binding [1]. IT is found only in NAD dependent DNA ligases [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.44 0.72 -4.24 152 3952 2009-01-15 18:05:59 2003-04-07 12:59:11 11 37 3909 5 869 3009 904 27.80 47 4.05 CHANGED pCPsCsotlh.+.ps.....Es.shRCsNs..sCPA ...pCPsCGSclh.+hEG............Es.shRCsss..hCsA.. 0 296 579 741 +1798 PF00145 DNA_methylase C-5 cytosine-specific DNA methylase Sonnhammer ELL anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.04 0.70 -5.32 31 7421 2012-10-10 17:06:42 2003-04-07 12:59:11 12 102 3000 52 1976 6841 2919 257.40 19 65.39 CHANGED hchl-LFAGlGGhcLGhcpsu...........hpslhusEhDcp.....AhcoYptNa......phshsDIsphshp....plP.....hDlLluGhPCQsFShAGp....ppGhsDsRG...sLFa-hhRllcppc....PKhFlhENV+sLlspcpt...pshpsllpphcchG..YpVp.......hplLsApDaG.lPQsRcRlallGhcpch.........................th.phphsp.....................h.sltDlhpp.........h.pschshssphhpshphht....t..tsas..........hhhpp........................................cphttsthhshshpppsh..................................h.tsshhtpsst.......................................pthRplTsREsA.RlQGFP-s.....Fphhssps........ptY+phGNuVsVsllptlucplhp ...................................................................................................................................................................................................................................hphFsG.h..G....uh.p.h....u....hpt.hs................................hph..h.h..h.-..h...p.................................sh.t.s...ht...h...ph...................................t..D......l...t...ht.t..........................................th.....t.....h-....l.l.h.u..G.PC.Qsa...S....h....h....s...............................t...t....s......h........t.......p....t..p.s.......................tL....h..h...p..h.h....c....h....l.....p...........h...p...................P.t......h....h.h.h.E..NV.....s......h...........h.............p...................................................h..t....h...h...t.....h...............t..........t..............h...s.........Y.t.h....................................h..l..p...s.t.....p..a.......t......hs...ptRpRh...h...hl...uh.....t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................R...ho..hE...........hh.hh...s...h...t............................a..........h........................................h..hGsuhs..h...hh.....h........................................................................................................................................................................................................................... 0 634 1210 1611 +1799 PF01119 DNA_mis_repair DNA mismatch repair protein, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon SCOP Family This family represents the C-terminal domain of the mutL/hexB/PMS1 family. This domain has a ribosomal S5 domain 2-like fold. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.26 0.71 -4.64 151 4737 2012-10-03 01:04:38 2003-04-07 12:59:11 14 30 3934 16 1407 3880 762 118.40 30 17.88 CHANGED ltsl..hGpshspph.l.lp.tpsps........hplpGalupP.s.....hsRus.psh.hhalNGRhl+sp.....hlpcAlcpAYp.....shLsts+aPhshLhlclsPptVDVNVHPsKpEVRFpcpctlhchlhpulppsLt ..................................................................tslhGhth.h.pph..ltlc...hppts...................hplpGalu.p..P..p.............................hs.Rus....ps..hphhal..N........G...........Rh..l.+.s.p.............hls+....A....l.p...p....uYp.............................sh...L...s....t.s......p..........a..........Phhl..L.hlc......l.D..P.p.t.VDVNVHPs.KpEVRFpppc..tlhphlhpulppsL.t......................................................... 0 473 844 1161 +1800 PF02499 DNA_pack_C Probable DNA packing protein, C-terminus Mian N, Bateman A anon Pfam-B_1283 (release 5.4) Family This family includes proteins that are probably involved in DNA packing in herpesvirus. This domain is found at the C-terminus of the protein. 19.60 19.60 19.90 19.60 19.40 19.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.31 0.70 -5.71 27 252 2012-10-05 12:31:08 2003-04-07 12:59:11 10 4 127 9 1 214 0 256.60 42 58.45 CHANGED LsQsssKIIFlSSoNoucpSTSFLaNL+sus-chLNVVoYVC--HhpchstpsssTuCsCYhLpKPsFIThDsslRpTAsLFLtsuFhpElhGs...tssst.sssptllo-pAhspF.lhRsSTssppsh..LspsLaVYlDPAaTsNscASGTGlAsluph...psphllhGlEHFFL+sLTGouutpIApCssphltulhsLHP.hhppV+lAVEGNSSQDSAVAIAshlpc.h.............shslhFaHstsp..sssltaPaYLLs+pKohAFEhFIttFNSGplhASQ-lVSpTl+LohDPV-YLlcQl+Nlpplshps....sstoYouK..p.pshSDDlLVAllMAhYlus...ssptsF+sl ......................................h.QtssKlIFlSSsNoucpoTSF..LhpL+sAs.p..c.hLNVV..oY..VC--Hhpchst..+ssu..suCsCYpLaKPsFIoh-ssl+cTANhF.l..tsuFhpEIhGs....sspt...s..........psp.llocsuhpcF............lhR.STsspp..sh..hutsLaVYlDPAaTsNt.pA.......SG....TGluhlsth........p.llhuh-ch.ltth.t.s....t.hh...h......a.....h.....h.h....p................................................................................................................................................................................................................................ 1 1 1 1 +1801 PF02500 DNA_pack_N Probable DNA packing protein, N-terminus Mian N, Bateman A anon Pfam-B_1179 (release 5.4) Family This family includes proteins that are probably involved in DNA packing in herpesvirus. This domain is normally found at the N-terminus of the protein. 20.30 20.30 22.70 24.70 19.00 18.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.85 0.70 -4.87 26 149 2012-10-05 12:31:08 2003-04-07 12:59:11 10 3 92 0 1 142 0 267.60 39 43.48 CHANGED R.cphshPhlGslHusspaushhpshspphh.........t.t...s.........ptt....sspshlss........pLpsslppLpFhphs.tpstspcstYpushsoapuhhpsssFppLpsFltshuphLsssapst.................................pupLE.FQK.llMHshYFlsSlphs-psc+h.hshl+hhFslsphossslcpFKQ+AoVFLVPRRHGKTWhlVslIulLLuohcsI+IGYsAH.++sopsVFpEIhspLp+WFsscpl-hhKsp.TIsFsasstppSTlhFASsaNTN.SIRG .........................................................cRaQhshPtlGhlHsss-hs..htuhspchh............................s..tppp.hlts........pLppslppLpFt..s..psttp+ssYpsshsshpshhcsstFpQlssFlpcFsphLssuFpst............................tctc............................................................pspLE.FQK.lLhHsha.Fluu.spls-hA.p+l.hpaLptsFslshhS....pssl....c....pFKQ+ATVFLVPRRHGKTWFlVPlIuhlLss..h..pGIpIGYsAH.++sopsVFcElctpl++hFssphVscsKsp.sIohsasstu+SThlFASsaNTN.uIRG..................... 0 1 1 1 +1802 PF00712 DNA_pol3_beta DNA polymerase III beta subunit, N-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_631 (release 2.1) Domain A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.27 0.71 -10.21 0.71 -4.21 15 4817 2012-10-02 11:47:48 2003-04-07 12:59:11 14 8 4456 54 1006 4270 2846 120.10 28 32.57 CHANGED MKFsIpR-pLlcsLppVu+slssRsolPlLuslLLpsp-..spLolTuoDhElphpuplstpps.......ppsGslslsuRhlhDIsRsLPs.pplplphss..s+hhlp.uGp.......S+FsLsTLsu--aP..sLs ...................................M+Fsl.p.+stLlp...sLpplppsl...s.s..+....s.o..l....P.I.Lu.slhlp....s..p..s........sp.lsLouoD.h......-...l.p..l..p..s..p..l..s..h..p..ts........................psG...s.....h..s....l.sA+...hhh-...Il+...p..L....P.....c.......t..p...l.s..l...p..h..c..s........pp..h..h.lp..uG+....................Sc..FsL.ss.l.s..us-aP.pl............................................ 0 348 678 861 +1803 PF02767 DNA_pol3_beta_2 DNA polymerase III beta subunit, central domain Bateman A, Griffiths-Jones SR anon Pfam-B_631 (release 2.1) Domain A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.22 0.71 -4.06 15 4914 2012-10-02 11:47:48 2003-04-07 12:59:11 11 14 4454 54 1035 3844 2795 115.90 30 31.47 CHANGED slssssL+clIppTtFAhupp-sR.hLsGlplchpss....pLhhsATDuHRLAlpchphspstss..hsVIVPsKslhELs+llss...sp.lpltlu.......ssplthcssshhhTS+LlDGcaPD ............................lstphL+chI.p.p.T..t.FA...h..u...p.p...-..s.R......h.LsG.....lhh...c...l....p...ss..........pL.p.sVA..TD......u.....H....RL.A.........h........p.p............h.............p............l.............p.....p............s.........h......s.......................p...............h......slIl.P...t.KultE...lt..+.l.l..ss.......sp..p..s..l.p.l.t.lu..................s.s..p...l.h.h........p..........s...........s....s...........h....h....h.h.S.+.L..l.-.G.p.aPD.................................................................. 0 353 699 887 +1804 PF02768 DNA_pol3_beta_3 DNA polymerase III beta subunit, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_631 (release 2.1) Domain A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold. 20.30 20.30 20.40 20.30 20.20 20.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.23 0.71 -4.54 16 4819 2012-10-02 11:47:48 2003-04-07 12:59:11 10 13 4457 54 1006 3167 2433 121.10 27 32.84 CHANGED ppllPpchsphlslssp.LtcAlpRsulLup.-+spsl+Lpls..sspLclsusssEhGpucE..tlcsshh..GcslpIuFNspYLLDsLsslcspclplphsssspshllpsssssssp.llhPh+ ......................................+llPp.s....s..p..plp.h.spppLhpAlcR.su.ll.....op....p...+..t......p..s...V...+L..p.l.s........p.s.pl.....p...l....ou.s....s....s.-.......u........c.u.pE.............pl...s.s......shs..........G.c.....s...l.c...I...u..F..Ns..pY..llD...sLps..l.....c...s.-...p.....l.plp.......h.......s.s.......s.........s......ps...s......l......l......p........s......s......s......s......p......s.....h...h...h..l..lhPhp...................................... 0 348 678 861 +1805 PF04364 DNA_pol3_chi DNA polymerase III chi subunit, HolC Kerrison ND anon COG2927 Family The DNA polymerase III holoenzyme (EC:2.7.7.7) is the polymerase responsible for the replication of the Escherichia coli chromosome. The holoenzyme is composed of the DNA polymerase III core, the sliding clamp, and the DnaX clamp loading complex. The DnaX complex contains either either the tau or gamma product of gene dnax, complexed to delta.delta' and to chi psi. Chi forms a 1:1 heterodimer with psi.\ The chi psi complex functions by increasing the affinity of tau and gamma for delta.delta' allowing a functional clamp-loading complex to form at physiological subunit concentrations. Psi is responsible for the interaction with DnaX (gamma/tau), but psi is insoluble unless it is in a complex with chi [1]. 21.40 21.40 22.40 22.30 20.60 19.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.78 0.71 -4.34 151 1629 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1600 3 343 975 388 138.40 35 93.69 CHANGED M..sclhFYhLspssh..........tphhspLhpKuhppGh+.lhltspsppptptLDchLWsass-uFlPHsh..ssps...sst...p.PlhLsh.stt..sss..ts..lLlsL.sssh..Ps.hspF.pRll-lhsss-s.thptARp+a+ta+.st...Ghslph .........................MppssFYlLssssh.......................pphlCcLst...ctac.p..G.....p.....R.VlltspDc.ppAtpLD-tLWsh.ss-uFlPHsl.....sG..Eu........sp..ss................s....PV.luhspp....t...........sss.+s..........lLI..sL.pssh........ssh....s..ssF..scVl-hVsh--s.h+phA.R-RaKtYR.st....GasLsh............................. 0 79 183 259 +1807 PF00476 DNA_pol_A DNA polymerase family A Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family \N 23.80 23.80 23.90 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.15 0.70 -5.84 12 6834 2009-01-15 18:05:59 2003-04-07 12:59:11 15 51 4984 160 1526 5654 5505 340.20 33 42.62 CHANGED lchchhpthspchshp.htthttphhchuGppFNlsSscQLphlLF-chGl..hthTcp..Gs.oTssssL-pLt...sc.hlphllpa....Rpls+LpsTalctL.phlsstsG+lHTpaspssTtTGRLSSssPNLQsIPh+s......c.GpcIRpuFlAp.G..hslluuDYSQlELRlhAclusDpthlpsFppGtDlHphsAuchaGlshcs....VsuppR+sAKshNFGllYGhSAhtLuppLs.....IopcEApchh-tYFtpaPG..................................lcpahcchhc...........cu+ppGYlpTLhsRRc......lPclsSp....stshcptAERh......AlNhPlQGoAADlhKhAhIplc.hLpc.t.ps......RhhlpVHDEllh-ssp--..httlsthlpplMcpshth........pVPLhs-hshG.tsWhsA .......................................................................................................................................................h.....h.t.ttpht.t..h....tth.ttp....h.h.t....h..s...G..p..t.F.N.l.s...SsK.Ql.t....l.....L..F..c..c.......h....t..l......................................h......t......p......p........p...................t....................o......T..s.....t....pl.....L..p......pL....t..........................t..........h.....s......l..........t.hlLpa........................Rtl.s.K...L..p.o....T............a....s....c..t....L.....................t......h......l....p........................p..............s.......u............R...l.H...o.p.....a..............Q............s...........h.........s.t.TGR.L.S.S...s.s....PN.....LQ..N..IP..h+s.................................p.G+.....c....I.....RpuF...l.....s........s.........pG...........hhl..l..u..ADYSQIELR.lhA....H.....l....op..D.p........t.......l.......l...p..........A.......F.......p........p............G...........t.......D...........l...H..p..tT..A.....uc.l..F..s....l.s.h..cp...............................ls..s...p.p....R+.p.A.Ks.lN..FGl.......l...Y..Gh..o..sa......uL......u..p......p..Ls...............................................ls.c..p.-.....A....p....p.....hhc....pYappaPu.......................................................................................lppa..h..cp.h.hp...........................................pA.c..p...p..G..Yl..p........T.....lh..s.RRt.....................................hl.....P.c..l..p.up......................sh..h+t..h..u.E..Rt................................................AlNsPlQ.G.oAA.DIl.......KhA.M...lp...l...p.p..h.l......p..p.....t...t....h..ps....................................+..hlh....Q...VHDELlhEl....p....pc....hpth.....tt..h.l...t..p...hM....p.p.....s....h.p.h.................................pVP..Lh...sc.h.t..G.tsW.p............................................................................................................................ 0 539 976 1295 +1809 PF03175 DNA_pol_B_2 DNA polymerase type B, organellar and viral Mifsud W anon Pfam-B_236 (release 6.5) Family Like Pfam:PF00136, members of this family are also DNA polymerase type B proteins. Those included here are found in plant and fungal mitochondria, and in viruses. 22.90 22.90 22.90 22.90 22.80 22.70 hmmbuild -o /dev/null HMM SEED 459 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.55 0.70 -5.82 35 975 2012-10-02 01:06:00 2003-04-07 12:59:11 8 18 262 25 572 970 44 307.10 22 53.07 CHANGED chs.....cshhhhstplhh.DhsFhLsphppht..............hhphthhppshhpphchphphhphhltDoatlhh.sSLccsupuasl............hpKGhhPYctlsp.h....h.....ttshPhhcYhcc.phhhhpcplhp.hc...s.taclhccslcYsphDlhlht....plhtpapphlpcphsl.psshphap...luu.ohtIF+phh...............................................................phsstlhsPs.cchaphlRpulhGGRs.sph.shhp.....lahYDlsShYPuAhp.ashPhGpPhtstt.pththpch.htlpp....lphhspphpshl.....hlsh+sssphh.....hsshsshsu.....hhaosE-Lchtlspu......hhcshhlcsh..ccpsslFs-ah...ccahplphsucppuccpp......luKlL.N.uLYG+Fupp.....spchhlhs-ph........-ptphppltssphhlpppthhps.......pshssphh.tsstshshtp..htphspsttst.pphthtsttsh ...........................................................................................................................................................................................................................................h.hp.h.hh....l..h...h...t.................................h.h........................................................................................................................................ta..ch.c....t.h.....YCt.DVhlht...............chh.p.a..tp..hh..pthp...l....th..h........lsS.u...t..lFcphh....................................................................................................................................................s.t.lht.s...pp.hp.....a...l....p..p...sl.GGR...s..........................h.....h.....p.......p.lh...aDh..s..uh...Ys..SA.ls...h..h....G..Phs.h........................t..........................hh.......................h.h................h...........................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 439 448 571 +1810 PF04042 DNA_pol_E_B DNA polymerase alpha/epsilon subunit B Wood V, Finn RD, Bateman A anon Pfam-B_12632 (release 7.3) and Pfam-B_5821 (release 7.3) Family This family contains a number of DNA polymerase subunits.\ The B subunit of the DNA polymerase alpha plays an essential role at the initial stage of DNA replication in S. cerevisiae and is phosphorylated in a cell cycle-dependent manner. DNA polymerase epsilon is essential for cell viability and chromosomal DNA replication in budding yeast. In addition, DNA polymerase epsilon may be involved in DNA repair and cell-cycle checkpoint control. The enzyme consists of at least four subunits in mammalian cells as well as in yeast. The largest subunit of DNA polymerase epsilon is responsible for polymerase epsilon is responsible for polymerase activity. In mouse, the DNA polymerase epsilon subunit B is the second largest subunit of the DNA polymerase. A part of the N-terminal was found to be responsible for the interaction with SAP18. Experimental evidence suggests that this subunit may recruit histone deacetylase to the replication fork to modify the chromatin structure [1]. 20.50 20.50 20.70 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.41 0.70 -5.08 34 1130 2012-10-02 19:15:56 2003-04-07 12:59:11 11 22 407 8 787 1175 118 215.10 19 39.82 CHANGED llhsSGlhhs...tsshs....hchLpchlptht..........ss.sstlllhGshlstptt.ht.t.......................shstshhhltplpshlppls....tssplsllPuspDssst........hLPQtPh.pt.hhsc.pth............................hphsoNPsphpls..shplsssss-hltclt+tphtpss............................................................................hchlcplLpppplsPhsP..................................shhshsatpppshhl.ps..hPclhlsus...ttpFsp .................................................................................hhlhSslhhs......s.shs...............hphLt...chl.ptht........................ssts.s..h.lll.hGs.hlsts..t..h.h.tttth............................................sh.p...p...h....th....h....t......p....l.ts.h...h.tplh............tshp.llllP.G.s.pD....ssst..........shP...Q.s..l.pt....hh.s.ph.pph..................................................hphsoNPsphpls...................sh.pl...hhsp...schlpclt.+.t..p..h.t.t.s.t..................................................................................................................................p...tc.l.h.cplLpppphsPhhs...........................................................................sh.h.sh..sa...p....h...p...s..s...h...hl....s..........hP...clhhhss......F.............................................................. 0 269 448 662 +1811 PF04931 DNA_pol_phi DNA_pol_V; DNA polymerase phi Wood V, Bateman A anon Pfam-B_10566 (release 7.6) Family This family includes the fifth essential DNA polymerase in yeast EC:2.7.7.7. Pol5p is localised exclusively to the nucleolus and binds near or at the enhancer region of rRNA-encoding DNA repeating units. 22.00 22.00 22.20 22.00 21.30 21.90 hmmbuild -o /dev/null HMM SEED 784 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.21 0.70 -6.76 5 422 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 249 0 305 433 5 450.40 15 60.45 CHANGED huYlLcRLI+GLuSsRpuARssauluLsplLstsp-IsssclL-tLc+csshsusp+uh-cssla.h...usLlsshsullhupll+cKsplsEslFppltlLhlhpuptsaLpusslplIsctLccsshpp.sh.sLhshlsKlLps.hcVs.oS-clLula.Lshs+VhsNccSp..sVthhsp-shcsLhclh.lsusolppcpscsclAusLlpLsKs..Klsusap.lspcthhKpsutcusshssphlslssslhssEphthlcpsspppptlp+sssS+spsshcFhcphppslspthpsEccsspRhhhphhshltsscppssshsshsphhplhppLsopshss.loss+cspplh.pssVcoltchsshppshlpssstplscuolss-ppp+htpsphlhllcpthppKptshlcph..FplapsactsKssTst..AlsstFS...EsssptlclsthsuLhhspSsthpQs.s.tp.GchtLspLtplsphpLp+pLscsstpup.csshscppppsasplcppL+pcsupsu-spspAFcpLlllVsLplhspss-S........s-VLpDlpsChcKuhscssp....tp.+EpsscEPssM-VLTElLLSLLuQsSsLhRclVchVFspFsp-lsp-uLpllL-VLKtc.shscpsth.tGEEE.EDsh..oDsDED-pcsl-DuEsESE-........DsEssEsDEp-DDuEusptlh.hc.ulcppLscsLp.sssl.................cGu-S-DEEsMDDEpMhcLDspLA...plF+E+Kc...+hQuscEcKppsQps+ppllpFKlRVLDLLElYl .................................................................................................................................................................................................................t........h..........l..hh...............h...............................................................h.......t.........h....t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..............................................................................................................................t.....t................hhh-hhlthh.p......t.....t.....h+t....hh...h.......................ht.tth..hht........h.........................t......................................................t................................................t.........p...............t....t............................p.......t..t....t.....t.t.t............................t........t.t....t................t.....t....................................................................................................................t.t......p.............t.......h.s...-tt......h.t..h...s..l....thh..............................................p.+hhthh........................................................................................................................................................................................................................ 0 105 176 258 +1812 PF00336 DNA_pol_viral_C DNA polymerase (viral) C-terminal domain Finn RD anon Pfam-B_107 (release 1.0) Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.66 0.70 -5.11 3 8753 2009-09-12 08:13:51 2003-04-07 12:59:11 13 16 93 0 1 8052 1 158.20 90 27.33 CHANGED GSYGSLPQDHIlcKIucCFRKLPVNRPIDWKVCQRIVGLLGFVAPFTQCGYAALMPLYsCIQS+QAFTFSLlYKoFLpcQYhcLYPVARQRuGlCQVFADATPTGWGLVhGsQRMRGTFuuPLPIHTAELLAACFARCWSGA+LLGTDNSVVLSRKYTSFPWLLGCAANWILRGTSFVYVPSALNPADDPSRGpLGLLRPLPRLhFRPSTGRTSLYAsSPPVPsHRPsRVtFASPLQss-AWRPP .........GSWGTLPQEHIVQKIKpCFRKLPVNRPIDWKVCQRIVGLLGFAAPFTQCGYPALMPLYACIQuKQAFTFSPTYKAFLsKQYLNLYPV.A...RQ................................................................................................................................................................................................. 0 0 0 1 +1813 PF00242 DNA_pol_viral_N DNA polymerase (viral) N-terminal domain Finn RD anon Pfam-B_107 (release 1.0) Family \N 29.90 29.90 30.00 30.80 29.00 29.80 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.30 0.70 -5.43 15 5612 2009-01-15 18:05:59 2003-04-07 12:59:11 12 21 92 0 0 5444 0 301.90 78 41.87 CHANGED MPL..............SYpHFR+LLL.....L-E..Eu.....GPLE-tLPRLADEsLN+RV.........AEsLNLth.sVsIPWTHKVGsFTGLYSopsssFNPcWpTPsFPcIHL+pDlIs+CppasGPLTlNEKRRLKLlhPARFaPpuTKYhPL-KGIKPaYP-aVlsHYFpTpHYLHTLWcAGILYKREoo+SASFpGoPYSWEQc..LQHGs..h.pt..t.t.cshsuQSutILuRu..........................ssGsshQuphppSRLGhpup...pGplApuppGpShulRuRs+uospRshusEsSuSspssppA.ScSuSshpQSusRcsAaophSTocRpSSS....upul-hpslPssospupspuslh.ChWhpFRsS.+PCSsYCLsHlVsLL-DWGPC .......................MPLSYQHFR+LLL.....LD-EA.....GPLEEELPRLADEsLNRRV.........AEDLNLG.NLNV.SIPWTHKVGNFTGLYSSTVPlFNPcWpTPSFPpIHLppDIIs+CpQaVGPLTVNEKRRLKLIMPARFYPshTKYLPLDKGIKPYYPEpsVNHYFpTRHYLHTLWKAGILYKRETTRSASFCGSPYSWEQE...LQHGp..h.psspR+G-ESFssQSSGILSRu.................................sVGss...lpSph+pSRLGLQsp...QGpLAptppGRSGSIRARlH..PosRRsFGV.EPSGSGHhsNpA.SsSSSCLHQSAV.RKsAYS.pl.ST.SK+p..S..SS....GHAVE.LHslPPsSARSQSpGPVhSCWWLQFRNS.KPCS-YCLoHIVNLLEDWGPC................................ 0 0 0 0 +1814 PF04104 DNA_primase_lrg Eukaryotic and archaeal DNA primase, large subunit Kerrison ND, Finn RD anon COG2219 Family DNA primase is the polymerase that synthesises small RNA primers for the Okazaki fragments made during discontinuous DNA replication. DNA primase is a heterodimer of two subunits, the small subunit Pri1 (48 kDa in yeast), and the large subunit Pri2 (58 kDa in the yeast S. cerevisiae) [1]. The large subunit of DNA primase forms interactions with the small subunit and the structure implicates that it is not directly involved in catalysis, but plays roles in correctly positioning the primase/DNA complex, and in the transfer of RNA to DNA polymerase [4]. 21.10 21.10 21.60 22.60 21.00 20.80 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.94 0.70 -5.12 48 540 2012-10-01 20:20:07 2003-04-07 12:59:11 9 10 444 11 383 545 79 237.20 27 54.76 CHANGED pha+lsapshh.cLsp..p+........phhlc.pGhsalstpphhsllhppappplpc..slthhtpshs..cl....pcct+..lt.hhppls................................ttht.tp..h.splstppFP.CM+plhppL...ppsp+LcHtuRhplshFL+.......................slGlsh......--slphacpth..........s.scFsc.php..YsI+HthG...........-Gppss.....YsPhs.....Cpplh.............csaG...CPh...........s.-pL..................................................................................sppttl.pHP.pYa ..........................................................................acl.satch...cLsp....pR................phhlppG..hsals....t...pp....hh.....sllhppappplpcslthstpths....pl....ppcp+..ltsl..lppls...h...t.............................tstl.shpp....l..spl.s.pp..FP.CM+plhptL...cpspHL+HtuRhQhslFLK............................uIGlsl..............--sltaa+pt.att.............hs.-cFs+...pap..YslR.HtaG.............cGpcss.....Y.sPho.....Ctplh.....t....s.t..........ctHG...CPa+p..........s.ctL.t.l..............................h.t.................lh....t.p...phuC..hh...p........................s....l...HP..aa........................................................... 0 138 224 321 +1815 PF01896 DNA_primase_S Eukaryotic and archaeal DNA primase small subunit Enright A, Ouzounis C, Bateman A anon Enright A Family DNA primase synthesises the RNA primers for the Okazaki fragments in lagging strand DNA synthesis. DNA primase is a heterodimer of large and small subunits. This family also includes baculovirus late expression factor 1 or LEF-1 proteins. Baculovirus LEF-1 is a DNA primase enzyme [1]. Bacterial DNA primase adopts a different fold to archaeal and eukaryotic primases. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.98 0.71 -4.39 73 1657 2012-10-02 15:26:12 2003-04-07 12:59:11 14 27 1225 21 676 1488 174 154.80 19 29.20 CHANGED FDlDhschsshcs...............h.hhh.hhhhshthh.tsltc-hu.hpp..............hhhhhSG.........p+GhHlal.....s.ppshphsshpph....h.................................................tpthsptlhpppshhhst.................................................................................h.h.hlDhplspphpp......hl+sPhsh.......cstssplsss............................lsh ..............................................................DlD.........t.............................................h...t.s.t..h.hptl..h...c-hGhps...............h.h..sSG..........s+GhHlal.............hhppth.sh....spspch.............................................................................................................................spthsppltp.pt..s...hhspt.ht.........................................................................................................................ph.h.h..lDht.ssphpshlps.....sh........ps...ss.t.lss.lt.............................................................................................. 0 213 410 566 +1816 PF03604 DNA_RNApol_7kD DNA directed RNA polymerase, 7 kDa subunit Griffiths-Jones SR anon PRODOM Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.91 0.72 -4.48 22 397 2012-10-03 10:42:43 2003-04-07 12:59:11 8 6 344 96 272 368 26 32.10 48 44.18 CHANGED YlCu...-Cuscsp.lpts-slRC+pCGaRILYKcR .....YlCu...-Ct...pcsp...lc.t......t.-.s....I..RC+cCGaRIlYKcR... 0 81 151 225 +1817 PF00521 DNA_topoisoIV DNA gyrase/topoisomerase IV, subunit A Finn RD anon Pfam-B_55 (release 1.0) Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.44 0.70 -5.63 110 11276 2009-01-15 18:05:59 2003-04-07 12:59:11 15 57 5442 61 2091 8937 6799 383.20 48 53.48 CHANGED RAlPcltDGLKPVQRRILauM.hc.h.hp.sppapKsAplVGpV..hupYHPHGDsSla-ulVpMAQ.sash..phsLl-spGNFGSh.sGDss.AAhRYhEs+Loclupp.lht-l-cpslcahssaDspptEPphLPshlPsLLlNGusGIAsGhATsIPPHNhtEll-uhhthl.c.s.ph...pl....hp.h..h.sPDFPouuhlh......st..pthhptYpsG...p..Gplp..lRuchph...........t...tpllITElPapsspssllc.pIscll....psp.....plp.lt..-lcDp.occ..sl+lllc..lcp...shp...sphlhst..LachTpLpsohsss.hslhcs.p...P+hh.slp-lLcpalcaRhcllp+RppapLpchpc+hc........llpthhhhh..........spl-tllplIcss.s.....tpps+p.pLhpc..lsc.....................................................................psctlL.shpLtpLTph-hp+lppEhpclppclpplcphlss...h.phhhc-Lc ...........................................RALPDlRD..GLKPVpRRlLYuM.pc.h.G................s........s.......+.....sa+KSARlV....G-V.....hGKY.HPHG............DoulY-uhV...RMAQ.sFoh..........RY.LV..DG..pGNFG..........Sh.DGDsu.AAM.RYTEuRh.ocl.u.tp.l.L............p.D..lsc....p..........TVDahsNa..Ds..oppEPtVLPu+hPNLLlNGu.o.GI.AVG..M..ATsIPPHNLsEllsushhhlc....ssphs......................hppLhp......h.....l.GPDFPT.uuhIh..........uppsIpcuYcTG.+....Gplh...hRuchchE................................ttt.ppllloElPa..p.................V...s...p...s...p.l......lc...pI.schh....ppK...............................+l......t..hI.s.......clRDE..S.....c..........+.....puhR.lVIc...l+c.....css.............sp...hl.h..Np...Lap......Tp..Lpt..sa.sh.Nh.l..........s..........l.ps.p....................Pphh..sLtphlptalpaphcllhRRptacLpKsppRhH............Ilc..GLhhAh...........tplDplI.tlIRtu.........s.........tts.......+ttLhtp..........thsc.....................................................................................Q..ApAIL.phpL.t+Lst.h.-t.cl..tEhpc.L.tpltpLttlLss..thh.plhtp-........................................................................ 1 693 1331 1755 +1818 PF01556 CTDII DnaJ_C; DnaJ C terminal domain Bashton M, Bateman A anon Pfam-B_342 (release 4.0) Domain This family consists of the C terminal region form the DnaJ protein. It is always found associated with Pfam:PF00226 and Pfam:PF00684. DnaJ is a chaperone associated with the Hsp70 heat-shock system involved in protein folding and renaturation after stress. The two C-terminal domains CTDI and this, CTDII, are necessary for maintaining the J-domains in their specific relative positions [2]. 30.50 30.50 30.50 30.50 30.40 30.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.66 0.72 -4.08 113 11443 2009-09-12 04:15:19 2003-04-07 12:59:11 13 45 5229 30 3594 8830 3406 77.70 31 24.15 CHANGED DLhh.pl.slohhpAllGsp.lpl..slcG..plplp....lssssps.Gphl......plpscGh...tps.........tpGDLhlplp.lp..hP..pp.l...s..s.pp+ph .......................................slhh.pl.slsh....s.p..AsL..G.........s......p....lp......l......s.....T.....l..............c.........G...............p........lplp...............lP..s......G.......o..ps..Gpph...................Rl+G+.Ghsthpss........................tpG....DLhlplp..Vt...hP..p..p..l...s..t.ppp................... 0 1220 2251 3021 +1819 PF00684 DnaJ_CXXCXGXG DnaJ central domain Yamout M, Bateman A anon Pfam-B_89 (release 2.1) Domain The central cysteine-rich (CR) domain of DnaJ proteins contains four repeats of the motif CXXCXGXG where X is any amino acid. The isolated cysteine rich domain folds in zinc dependent fashion. Each set of two repeats binds one unit of zinc. Although this domain has been implicated in substrate binding, no evidence of specific interaction between the isolated DNAJ cysteine rich domain and various hydrophobic peptides has been found [1]. 32.70 32.70 32.70 32.70 32.60 32.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -11.02 0.72 -3.92 63 7168 2012-10-05 09:33:39 2003-04-07 12:59:11 14 35 5271 3 2059 5364 2289 63.40 43 16.70 CHANGED CspCpGsGs.....ssssht.pCssCsGpGhhhptpp..h.hhhhphptsCspCpGpGph..h..pc.CppCpGpG ...................CcsCpGoGAc...........sGop...sp.....TCspCpGsGplphp......p...............Ghhphpps......CspC.pGpGph....I........c-PCppC+GpG................ 0 693 1292 1727 +1820 PF03265 DNase_II Deoxyribonuclease II Mifsud W anon Pfam-B_4508 (release 6.5) Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild --amino -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.03 0.70 -5.43 37 585 2009-01-15 18:05:59 2003-04-07 12:59:11 10 37 128 0 441 567 4 252.60 21 48.86 CHANGED hoChsppu.................psVDWFhlYKLPp.............GhpahYhDs....ss.ssWphustslsss.p.sslspTlpphh....ssspsshshlhYNDpPP.....ptps...ssphuHsKGVlhhcpp...pGFWllHSlPc.FPshs....hu.YsaPssspphGQphlClohshpphpp.IuptlhhppPhlYsssl........Psths..sph..spLpplspspph...........psshpppstlpohpG....tphptFuKss.passDlYs....shluptLpsslhlppWtpsss...hLsosCs.ssa.h.shshp.lsl..sp.....aspppDHSKWsVs............ssssasClG-hNRptppt+RGGGslChp............stplapsFpphst..php.sC ..................................................................................................................................................h.phh....hhhKh.st.....................................t.thhhhps..................ts..ttht......t..............h..st....t...tshtt.oht.h.......................tpt.......phhhhhYNstsP...........................t...............tt....h.....upsK..............G........l....lhh....spt...........tu.h.Wl.lHohPt.FPs......................s..a.........h.P.....s......t.....................................................Gp.hlChoh.t...t.p.....hpt....ls........t........lhh.tts.la..ph.................................s..t............t.h.......h.p...lhpt....................s..........hto...ts.......h..auK.t...t............................-ha.......................thl....h...h.t.t.s.h.s.........W.........tttt........s.Cp.................h..h...t..hth...t.t..............................ht..ttDp.up..ahls.....................tshhChss.sh.......tp..psuhhhCh................lht.hp..h.................................................................................................................................................................... 0 336 365 405 +1821 PF01712 dNK Deoxynucleoside kinase Bashton M, Bateman A anon Pfam-B_1744 (release 4.1) Family This family consists of various deoxynucleoside kinases cytidine EC:2.7.1.74, guanosine EC:2.7.1.113, adenosine EC:2.7.1.76 and thymidine kinase EC:2.7.1.21 (which also phosphorylates deoxyuridine and deoxycytosine.) These enzymes catalyse the production of deoxynucleotide 5'-monophosphate from a deoxynucleoside. Using ATP and yielding ADP in the process. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.82 0.71 -4.29 14 2493 2012-10-05 12:31:08 2003-04-07 12:59:11 14 11 1392 149 635 1837 992 145.40 25 62.65 CHANGED hQhhhLspRh+.hhs.........pppshlh-RSlassphlFspshhcpGphspp-aphYppha-thh.phsp........cllIYLcss.-sslcRIc+RuRs.E..........sIsh-......Yh-pLc......thatthhpphshsshhhlcsschDh.ts.pc..phhpplh .........................................................hQhaFLspRacphpc..................ttts.hlh.-R..S..la.pD.th......lF.t.p.t...h.h.cp.....G..ph......s..c...p-.aphYpcl.a...ch.h.h.pph.h.................................PD..llIYLc..s.sh-s.h.h.cR.....Ipc....R..GR..sh.E....................p.l.s...c..........................Yhc.p.Lp...............ptYcp.a...h........p....p......a....s....t....s....s......h...l...h..l...ss...sp..h.Dhhps.psht.......h.................................................. 1 202 359 503 +1823 PF00404 Dockerin_1 celCC; Dockerin type I repeat Finn RD anon Prosite Domain The dockerin repeat is the binding partner of the cohesin domain Pfam:PF00963. The cohesin-dockerin interaction is the crucial interaction for complex formation in the cellulosome [1]. The dockerin repeats, each bearing homology to the EF-hand calcium-binding loop bind calcium [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.28 0.73 -6.42 0.73 -4.01 130 1647 2012-10-02 16:17:27 2003-04-07 12:59:11 13 223 55 24 504 1211 47 21.00 49 5.22 CHANGED DlNsDGpVsuhDhhhl++alh ..DlNuDGplNShDhslLK+YlL...... 0 294 503 504 +1824 PF04118 Dopey_N Dopey, N-terminal Wood V, Finn RD anon Pfam-B_17466 (release 7.3); Family DopA is the founding member of the Dopey family and is required for correct cell morphology and spatiotemporal organisation of multicellular structures in the filamentous fungus Aspergillus nidulans. DopA homologues are found in mammals. S. cerevisiae DOP1 is essential for viability and, affects cellular morphogenesis [1]. 25.70 25.70 25.70 25.70 25.50 25.50 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.97 0.70 -5.65 37 385 2009-09-17 06:44:01 2003-04-07 12:59:11 9 9 257 0 258 391 0 285.80 37 15.96 CHANGED pDt+h++asssl-+sLppF-o.lpEWADhIuhLu+L.KuLQ......ttsshshlPp+hhVu+RLuQCLsPsLPSGVHpKALElYshIFphlup-s...Lsc-LslahsGLhPhhuaASloV+shhlplhEpYhl.sLsp.sLcsshpullhuLLPGLE-EsSEhappshpLl-plp..ptlsps..............hFapslalsllsssphRLuuLsalsp+hspht........................................tpptp.hlss-ssLll+ulssuLsDc.......slLVpRshLDLLlpphPLsS....sl........l....stpDpphLlhushtlhh++DM.SLNRRlasWlLGspspspsst........pu ...............................................Dt+aRpYsusl-+ALppF-t..spEWADhIShLu+L.KsLQ...........................ps.ph..t.hlPc...+hhluKRLAQCLpPuLPuGVH.KALEsYphIFphlG..sct.............Lu+DL.lY...s...GLhPlLuhAuh..sV+............PhlLsLhEpahl.sLsp.sL+PuLpuhlhulLPGLE..E.sSEha-..+o.pL.l.pplp..ts.lspp...................................hFapsLWhull...sssuhRlsulsalhp+ls+hh...........................................................................................................hp..ptthh.....lus-...tLhlculssulpDp.......slLVQRshLDlllhthPhps....sh................hppsDh.hllpuuhtsll..RRDM..SLNRRLasWLLG.c.pt.....st.t..................................................... 0 92 133 203 +1825 PF04556 DpnII DpmII; DpnII restriction endonuclease Kerrison ND anon DOMO:DM04410; Family Members of this family are type II restriction enzymes (EC:3.1.21.4). They recognise the double-stranded unmethylated sequence GATC and cleave before G-1 [1]. http://rebase.neb.com/rebase/enz/DpnII.html 20.90 20.90 21.30 20.90 20.70 19.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.67 0.70 -5.34 19 239 2012-10-11 20:44:43 2003-04-07 12:59:11 7 3 216 0 35 188 11 259.50 38 88.94 CHANGED cpac.FlpohpsTstshsYaVsapKVpcNlpp..hc...lcLNhLNhLIGcc..slcp-hhpLhpchPpllpslPhLlAlR-pphchhh..............stptsh.........shhphsFpp......hs..pl..-phhcFhcpTGLh.clhppptl+sLsDYVhGVEVGLDoNuRKNRoGchMEtllcphlpchs....shp.ahpQhphstlcch...auh-ls.....shtp++FDFsl..cspcKlYlIEsNaYsuuGSKlppsAppYpplspclp.tpsshcFlWITDGpG.W.pouKspLpEuFcplshlaNLphlcpuhLcclh ..................................t..Fc.alpshp.sostshsaasDapKlhcNlpplc..lpLNhLN.L..lG..pc..slppchhpLhpphPchhpslPlLlAhRppph.hh..........stptsh..............phsF..pp......hs.....h..-phh.FhccoGLh.plhps+hlpsLlDYVhGlEsGLDoNuRKNRoGchMEsllpphl.ppts....shs.........ahcphph...p...c...lcch...ash-ls.............sps.K+FDFVl.....cspphlYhIEsNFYuu.uGSKLNEsAcuYpplupc.hp.shsshcFlWlTDG.p.G.W.hsA+ssLpEsFc.....hl.hlYNlspl.cpshlppl.h.......................... 0 15 27 33 +1826 PF04244 DPRP DUF426; Deoxyribodipyrimidine photo-lyase-related protein Mifsud W anon COG3046 Family This family appears to be related to Pfam:PF00875. 21.00 21.00 21.20 24.00 20.70 20.40 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.55 0.70 -4.82 107 598 2009-09-14 12:31:16 2003-04-07 12:59:11 8 3 588 3 197 601 2407 216.30 32 43.34 CHANGED LhLlLGDQLststuslpt....stsps.hllhsEstpEss.....YVtHHhQKlshhFuAMR+FAppLcppGapVtYhpL-.-....ststt..slsstLtchhppp.thpplthpcPsEaRLtpplcp.htt..thslsl.phhsss+Flssts-hpsahp..s.+KphpMEhFYRcMR++hslLMtsss.pPhGGcWNFDs-NR+phst.sh...phPpshpFpsD...thspcllphlppchs..sph......Gslcs...Fha ....................lhLlLGDQLs...ptuhhpp.....spspshhlhsElppEss.....YVhHHhQKlshhFuAMRpFAptL.ppp...GapVpYlplDs...stsht....shsstltph...hpph.t.spplphpcPsEaRLhpplpp.h.t..................hsl............sl...phhssc+FLsshs-hspahp.......s..p.....KplhM.EhFYRcMR..++hslLM....s.....ss...pP...hGGpWNaDs-NRcthsps..........phP.pshpass.....D.....spplhphlcpphh..sphGplps...h.a................................................................. 1 60 128 166 +1827 PF05219 DREV DREV methyltransferase Moxon SJ, Bateman A anon Pfam-B_6662 (release 7.7) Family This family contains DREV protein homologues from several eukaryotes. The function of this protein is unknown [1]. However, these proteins appear to be related to other methyltransferases (Bateman A pers obs). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.74 0.70 -5.52 3 166 2012-10-10 17:06:42 2003-04-07 12:59:11 7 4 121 0 102 1665 951 221.30 40 80.85 CHANGED WYssN+E.Ls-shQulFVsShPDusTQpFL-NSpclSuNlFhQLaHuLusSILShFMS+TDINGhLGRGSMFlFSE-QFpKLLsIst.p.sSs.cs++LLDLGAGDGEITc+MuPaFEE........VYATELSpTMRsRLpKKsYNVLotlE.WQpTshpaDLIoCLNLLDRCasPhKLLcDI+s.ALcP.NGRVIVALVLPapHYVEsNs.G+ap+PcslLElsG..poaEEcVs+hh.EVF+sAGFcVEAWTRLPYLCEGDMaNSaYaLsDAIFVLKP ...................................................................................................................Y..p.p..ls..phtshFlt.....DptTp.FLppohphSs.hhhpha.+shspslh..p.hFhopTslNGh...L....tR.GuMFlhS.pQ.apcLL..p.h....s.................................s................p.s.......p..........p.....L.L......DlGAG.DG....p..VT.t.h.h....u...s...h...F...p...c.........................l....a....s....T.......E.....h..S.........................s.........M.......h.........h..............p........L.........p.........c..........+..........t........a................p............V.......l........s.....h........p.....-........a.......p.........p........p.........s.......h........p..........a.....D........lI....o...C....L....N....l...L.D.....R..C..-......p...P...h..s..L..L...ccl+p.sLpP...pG.h.l.l..L...A....l....V...L...P....a.p.s..Y..V.E.s....s....s.........p....p..pP..p..c.h......L....p..h......pG........tsaE.....-..p..l..s..p....h...p.V.h.t..sGF..l.ta...o...+lPY..LCEGDhhpsaYhLsDslhVLp.......................................................................................................................... 0 41 52 81 +1828 PF02635 DrsE DsrE/DsrF-like family Mian N, Bateman A anon COG1553 Family DsrE is a small soluble protein involved in intracellular sulfur reduction [1]. This family also includes DsrF. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.48 0.71 -4.06 254 4389 2012-10-01 20:53:36 2003-04-07 12:59:11 10 5 1987 31 1183 3383 682 116.50 19 88.08 CHANGED phlhhlsp.......sst.sphshthst...ss..tths.............hclt.lhhhusulthhhp............................h.....tp..thptLtp.t.....lphhlCtsuhpppuls.pp.............l..........phthsuhs.plsph.pp.u...phlph ..........................................................................................hhhlhpts.sh.ust.tstpuhph.s.....uh...tt.s..............tclt..lFhhs-.GVh..thhp....................................................s.tthsh...................hp....thptLst.ts......lplhlCs.ss.h....p....t.R...Glsttp................sL....h.......sh.ph.sshs...t.L.sphh.hp.s-.pllph................................................ 0 373 740 996 +1829 PF01916 DS Deoxyhypusine synthase Enright A, Ouzounis C, Bateman A anon Enright A Domain Eukaryotic initiation factor 5A (eIF-5A) contains an unusual amino acid, hypusine [N epsilon-(4-aminobutyl-2-hydroxy)lysine]. The first step in the post-translational formation of hypusine is catalysed by the enzyme deoxyhypusine synthase (DS) EC:1.1.1.249. The modified version of eIF-5A, and DS, are required for eukaryotic cell proliferation [1]. 21.00 21.00 21.60 23.00 20.50 20.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.65 0.70 -5.29 7 956 2012-10-03 09:55:27 2003-04-07 12:59:11 12 6 712 6 524 948 1511 277.10 35 85.61 CHANGED hptLlchhtphGFQAo.plGcAscIhcpMhcp+..................p..pchslFhGYTSNlISSGlRchltaLVpcphlsslVTTAGGlEEDhIKsLus.thlG-atlcsppLRccGlNRIGNlhVPN-sYptFE-ahhsIh-phlp.pp....p.s..............hTsSchIaclG....Kcls...cEcSllYWAaKNpIPlFCPuLTDGSlGDMLaFaphcs.stpLplDlspDlp+lNshuhpu+csGhIILGGGlsKHaIhNAsLhRsGsDYAVYIsTup.aDGS.SGA.P-EAVSWGKI+scAchVcVauDAThlFPllVAtshs .......................................h...lhpth...p.h.G.FpA...s.plucAhch.h.pp.Mhp.c.........................................t.spl.FLuhoushlouGl.R.c.h.lt.LlpcphV-slVoTuu.sl.E.E.Dhh.....cslu........p......a......h......G.......s.......a.......s.......h.......s.............s.t.p.LRppGl.NR...IsslhlP.p.-.s.a.p.t.h.E....c.al.h.s.lh-p..hhtcpp...................................................................aospchltc.lG.........ccIs.........sc...s....S.lhhhAhc.........p.......s.lPlasPulsDuSlGs...h.l..a....h...a....s.....h.....c....s...........s....t................l...........h.....l..D.h........l....p............D.h......pplss........h..............s...h...p........u.....p................c........s.......G....hlh....lGGGlsKca.........h........h.......s........s........s........l......h...........+.......s.........G...........t.......-........Y.....u...l........IssupphDGS..SGApPcEAlSWGKl...c...h...s...u...p....s...p............VauDATlshPLlsu.hh....................... 0 196 337 452 +1830 PF01323 DSBA DSBA-like thioredoxin domain Bateman A, Mifsud W anon Bateman A & Pfam-B_2082 (release 6.4) & Pfam-B_5982 (Release 7.5) Domain This family contains a diverse set of proteins with a thioredoxin-like structure Pfam:PF00085. This family also includes 2-hydroxychromene-2-carboxylate (HCCA) isomerase enzymes catalyse one step in prokaryotic polyaromatic hydrocarbon (PAH) catabolic pathways [2,3,4]. This family also contains members with functions other than HCCA isomerisation, such as Kappa family GSTs (e.g. Swiss:P24473), whose similarity to HCCA isomerases was not previously recognised. The sequence Swiss:O07298 has been annotated as a dioxygenase but is almost certainly an HCCA isomerase enzyme. Similarly, the sequence Swiss:Q9ZI67 has been annotated as a dehydrogenase, but is most probably also an HCCA isomerase enzyme. In addition, the Rhizobium leguminosarum Swiss:Q52782 protein has been annotated as a putative glycerol-3-phosphate transfer protein, but is also most likely to be an HCCA isomerase enzyme (see [5]). 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.94 0.71 -4.75 30 5572 2012-10-03 14:45:55 2003-04-07 12:59:11 15 31 2655 78 1612 6482 3933 172.90 17 79.27 CHANGED plchaFDahCPaCahuppplpphtpth................s.lclph+shslhss.....s.s..............p......th.......th.h.............sopputplhhhsttps....tphspthapshaspstshscps...ltplspcsGl.-ssp...hpphhsu...ssspstlccspptAtphultGlPshhl...ss......c.....hhhGssph-hltptlt .................................................................................................l..ah-hhCPa.C.a.h.h.p.t....h....l.p.t..h.hpth........................................s..h..p..l..p..h..p...s.h....t...lhst...................................................................................................................................................................................................h..sshp...u.h...p.h..h...h.h.s.t...tps........................p..h..h.ts.l...a....c....u....h.....a.....p.....p........s......p......s....l....s.....s.......s......p.s...............l....t....p....l....h....t.....p....s.......G.........l...s.....t...t..p.................h.p...t...s.h...s...s.............s...h..p...s.t....l.....p.p...p....p....p.......t.......A..........t.......p...........h.........s....l..........p..........G.l..Ps.hll.........ss..........c...........hh.......G.t....t.h.....h........................................................................................................................ 0 421 907 1308 +1831 PF02600 DsbB Disulfide bond formation protein DsbB Bashton M, Bateman A anon COG1495 Family This family consists of disulfide bond formation protein DsbB from bacteria.\ The DsbB protein oxidises the periplasmic protein DsbA which in turn oxidises cysteines in other periplasmic proteins in order to make disulfide bonds [1]. DsbB acts as a redox potential transducer across the cytoplasmic membrane and is an integral membrane protein [2]. DsbB posses six cysteines four of which are necessary for it proper function in vivo [1]. 22.10 22.10 22.90 22.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.26 0.71 -4.43 184 2959 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 2169 13 514 1795 1819 155.50 23 77.26 CHANGED M...............pt+thhhlhhlsuhshhhs.....AL.ahpal....hshtPCsLClhQRhshhslsllslluhht.........hchh.hhhhhlh.uhhGhhluhhHshlptt....t....sCs.........h.t.hshtphh...lhts.sss.Csphsap.h..lG................lohstWsllhFhhlsllhlhhhh .................tht....t+hh.hhlhh.h.ss.hslhhs.....A.h.aaQal....hthtPCsLClhpRhshhshshsu.lluhlt...............hshchh...thh.hhl.h.uhhG....l....t....luh...p....H......sh......lphh..............Ps.s..h.s.....sCs.....hh.t..h...phh...shtphh...h..hhs...sss...C.sth...ht..h..hs.....................................lphsthth.hhFhhhhllhhh.h.h.................................................................. 1 116 272 395 +1832 PF02683 DsbD Cytochrome C biogenesis protein transmembrane region Bashton M, Bateman A, Eberhardt R anon COG0785 Family This family consists of the transmembrane (i.e. non-catalytic) region of Cytochrome C biogenesis proteins also known as disulphide interchange proteins. These proteins posses a protein disulphide isomerase like domain that is not found within the aligned region of this family. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.65 0.70 -4.73 10 5377 2012-10-02 18:22:22 2003-04-07 12:59:11 10 29 3528 0 1358 4319 2426 206.50 25 49.26 CHANGED uFLAGlLShLSPClLPllPlalSaIoshshssh+pt.........hhpulhFlLGhollFluLGhusuhlsslht.tatshlphluulllIlhGLphluhh.................hLh+hphs..............................h...tpsps.hltuhLhGhsFAluWoPChuPlLuullshuusps..hhushhhslYsLGLulPhllsulFhs.hh....hh+tlp+ap+hlchsoGlLlllhGllhlh ...........................................................hluGll...h.h.o..P.C...l.LP.l.l...Ph...h...h.u.h.l...h....u........t.........p.......t....t...t.........h......p..tt.............................shh.t.s...h...h.a.l...h...G...h...u...l...s.......a.......s.h...L..........G.......h.........s...s.........u.......h......h.......G........t.......h......h.......t.............h.......t.....t..........h.......h.....h......h.....l....h...u..h........l.....h...l....l...h.......u.......L.....t...h...h.....u.h.hp......................................h..h.l...t.p....pph...............................................................p.p..p..t.s.s..t.h.h......u...s.a.lhGhhh.ul....s....h....oPC....s....u....P...s...L..u.....ul....L....s.h........s....u......s....s............u....s.......h..h..tG.s..h.h....h.h....s....a...u...L..G.....hu..l..P.h..l....l....l..u....l....h.s...s....h...hh..................h.h.....t..h...t.........t.........a........h........t..h....l.....c......t.......h...hG.h.l.lls.hulhh..h.......................................................... 0 404 850 1135 +1833 PF01984 dsDNA_bind DUF122; Double-stranded DNA-binding domain Enright A, Ouzounis C, Bateman A anon Enright A Family This domain is believed to bind double-stranded DNA [1] of 20 bases length. 26.00 26.00 26.60 29.70 25.60 25.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.38 0.72 -4.05 62 498 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 448 6 336 484 53 104.50 33 78.93 CHANGED IR+p+h..tELQpp.............t.tttpppppppppt..........ctp+pulLpplLss-AR-RLspl+LV+P-hApsVEs.LIpLAQoGplpp+lo-cpLhplLpplspppcpp.....cIphsR ...................lRtt+htpLptp.........................ttttpptppp.ppppt...............................ppt+pu...lL...pQlLss-ARpRLuplpLVKP-+ActVEshLIphAppGpl.p..tKlo-cpLhplLpplspppcpp.....tplphpR................... 0 107 189 274 +1834 PF04077 DsrH DsrH like protein Kerrison ND, Finn RD anon COG2168 Family DsrH is involved in oxidation of intracellular sulphur in the phototrophic sulphur bacterium Chromatium vinosum D [1]. 24.40 24.40 24.80 24.60 23.80 24.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.01 0.72 -4.32 66 948 2012-10-01 20:53:36 2003-04-07 12:59:11 7 1 944 13 166 473 50 86.00 41 91.11 CHANGED +SPapstslpssL+hl....sssDslLLlpDuVh.Ashsssphhptlpt...pshplasLpsDlpARGl..tplsssl.........ph.lDYss..FVcLshc.Hpps. ........................SPa.s.-hsulLRLL....s-GDs..LLLlQDGVh.AAl......cu.sc......aLcsLps.............uslplYALpEDl..hARG...L....s...u.pl..S..s..s..l.........hh..lD..Yo-..FVcLTl+Hspp................ 0 38 74 123 +1835 PF05160 DSS1_SEM1 DSS1/SEM1 family Wood V, Mistry J anon Pfam-B_22209 (release 7.7) Family This family contains the breast cancer tumour suppressor BRCA2-interacting protein DSS1 and its homologue SEM1, both of which are short acidic proteins. DSS1 has been shown to be a conserved component of the Rae1 mediated mRNA export pathway in Schizosaccharomyces pombe [1]. 20.80 20.80 22.60 23.20 20.20 20.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.48 0.72 -4.19 28 295 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 233 7 210 264 3 62.30 45 64.59 CHANGED ptspp...chssLE..EDDEFE-FPh--..Wssccsppsss........plWEEsWDDDDs.sDDFSpQL+pELcps ....................................t..tpt.chshLE..EDDEFE-FPs--..Wssp-...p-pss..................plWE-s.....W.....DDD.-l.pDDFSpQL+tELcc................... 1 67 115 171 +1836 PF00908 dTDP_sugar_isom dTDP-4-dehydrorhamnose 3,5-epimerase Bateman A anon Pfam-B_540 (release 3.0) Domain This family catalyse the isomerisation of dTDP-4-dehydro-6-deoxy -D-glucose with dTDP-4-dehydro-6-deoxy-L-mannose. The EC number of this enzyme is 5.1.3.13. 20.30 20.30 20.40 20.80 20.20 20.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.20 0.71 -4.95 19 3552 2012-10-10 13:59:34 2003-04-07 12:59:11 12 9 2817 55 902 2953 1971 172.40 40 87.60 CHANGED hhchslssshllEPclFsD-RGaFhEoFspptFpctsstt.shVQ-NcShSup.GVLRGLHaQh.s.sQuKLVRslcGpVaDVAVDlRpsSPTaGcWsuVpLSs-N+ppLalPtGFAHGFhsLu-c.schhYKsoshYssppEpulpWsDPslGIcWPhtss.........lStKDtsus.hs-h ...........................................h..pstlsslhll-P.p.lFuDpRGaFhEsap..pc..p.....F.p......c......t....s.........h..............p.........t....s..........F.....V....Q.-.N.pS.hSpc..GVLRGLHaQht.............sQ...uKL.Vpll.p.G..p..VhD..V.sV..DlRcuSPTFGpWhu.shL....osc....N..........p....+plalPcGFA.HG.FhsL.o.-....s..sp......h.h..Y...Ks..s...s...a..Y.s...P.p...t...-...t...u...lt..asDPslu.IcW.P.h.t....s....t....................hlS-KDtttshLtcs..................................... 0 286 589 765 +1837 PF03942 DTW DTW domain Bateman A anon COG3148 Domain This presumed domain is found in bacterial and eukaryotic proteins. Its function is unknown. The domain contains multiple conserved motifs including a DTXW motif that this domain has been named after. 19.70 19.70 19.80 20.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.34 0.71 -5.11 13 1690 2009-01-15 18:05:59 2003-04-07 12:59:11 10 14 1061 0 506 1210 69 187.30 32 78.80 CHANGED RthtCtcCtlshptCLCshl..PslpspsplhllpHssEst+shsTuhll...shsltcs.hhth.ch.t.s.p...........................h..hhttPshpshllFPucputshpcl..................................................hshssschhphIllDuTWscA+cha+psshL............csLP.pVsLtsptsSpYR.lR+usscspLsThEsuthhLshhpsp............cu.tpsLlphhcsht.paphspt ..........................................................................................h.tC...p.Ch.hs...p....h....ClCstl.........sshp....s....p...s..........phhllhassEs.h.+.s..o.N.TGRLl................ut.hl..s..c..s...t.s..a.....a..sRs..psstp...................................................................................................................................L.ht..hl....p..s.s..s.....h...p.shllF.P....u.p.h....u...tp..h..p..p....l.........................................................................................s....s..ss.+......sh....hI.h.L..DGTWpcA+KMa+.+.S.PaL...............................................................psLP...hls..l.s........s......p..h....S......tY..+....lRcsp..s...........psphsTsEsuht....lLphhscp.........................................ps.spsLh.phFpthhppa.h..pt............................................................... 0 139 241 393 +1838 PF01950 FBPase_3 DUF100; Fructose-1,6-bisphosphatase Enright A, Ouzounis C, Bateman A, Mistry J anon Enright A Family This is a family of bacterial and archaeal fructose-1,6-bisphosphatases (FBPases). FBPase catalyses the hydrolysis of D-fructose-1,6-bisphosphate (FBP) to D-fructose-6-phosphate (F6P) and orthophosphate and is an essential regulatory enzyme in the glyconeogenic pathway. 25.00 25.00 41.40 32.60 22.20 18.10 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.12 0.70 -5.46 33 184 2009-09-12 01:16:20 2003-04-07 12:59:11 11 2 177 8 111 177 25 352.60 55 96.70 CHANGED hKlTlSlIKADVGulsGHopsHscLl-tscchLpcA.cpslLIDaaVopsGDDlpLlMTHp+GsDsc-lH.cLAWcsFhpuT.cVAK-LKLYGAGQDLLsDuFSGNV+GMGPGVAEMEFp.E.....RsSEPlllFtuDKTEPGAFNLPLY+MFADPFNTAGLVIDPsMHpGFpFEVhDVh.....-sKplhLssPEEhYDlLALIGssuRYVIK+Vap+.tsGc....l.....AAVsST-RLshIAG+YVGKDDPVsIVRsQSGFPAlGElLEPFA.hPHLVuGWMRGSHsGPLMPVshccAps..........TRFDGPPRVhALGFQls.cG+LlGP.sDhFcDPAFDhsRcpAtclA-ahRRpGPFpPHRLP.-EMEYT.TLPpVlcKLcsRFpsh ........KlTlSVIKADlGuhsGHshsHPcllctApchLp-A.ppslllDaaVopsGDDlpLIMTHp+GhDsp-lH.tLAWcAFppuT.cVAKcLtLYGAGQDLLpDuFSGNl+GhGPGVAEMEFpE......RsSEPlllFhADKTEPGAFNLPLY+hFADPFNTAGLVIDPsMHpGFpFEVhDlh.....EpKtlhLssPEEhYDlLALIGssuRYlI++Vap+.t.....sscl......AAVsST-RLshIAG+YVGKDDPVsIVRuQuGhPAlGElLEsFu.hPHLVuGWMRGSHpGPLMPVshcsAp..s..........TRFDGPPRVhALGFQlp.cG+LlG.....P.sDlFcDPAFDtsRppAhchA-hhR+pGPFpPHRLP.-EMEYT.TLPpVlcKLcsRFt.h............................ 0 41 74 94 +1841 PF01954 DUF104 Protein of unknown function DUF104 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes short archaebacterial proteins of unknown function.\ Archaeoglobus fulgidus has twelve copies of this protein, with several being clustered together in the genome. 21.60 21.60 21.80 21.70 21.10 21.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.93 0.72 -3.94 41 145 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 53 2 106 172 12 61.20 27 89.62 CHANGED MsclI-AlYEsGVhKPLcKlcLcEGc+lplhlppt.........-lhc+htshlttpp.......t..pchlcE ....M.phIcAlYEsGVhKPL..cpl.c..LtEGpc...VplhIppt...........c.hcchhthht.t........................................................................................ 0 46 70 92 +1842 PF01955 CbiZ DUF105; Adenosylcobinamide amidohydrolase Enright A, Ouzounis C, Bateman A anon Enright A Family This prokaryotic protein family includes CbiZ which converts adenosylcobinamide (AdoCbi) to adenosylcobyric acid (AdoCby), an intermediate of the de novo coenzyme B12 biosynthetic route [1]. 21.00 21.00 21.00 21.20 20.70 20.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.35 0.71 -4.51 69 333 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 295 0 160 338 10 191.00 28 66.29 CHANGED llchtpshpslSoushsGGhtp.sctlhNppVsp....sass...........sspphh.pchhpph..uh...psssuhhTAssh.cphshs....cshpVpshsTAGl.sNssphu..................................GTINlhlhsstsLscuAhspulhTATEAKstALh-hsl.........tuTGTuTDulsVss........tt..stthpauGstTclGphlucu....VhcAl ......................................h..phstshpslSouhh.sGGhtp.hptlhNtp.Vsp...shspt...............sstphh.pphhpph..uh..................psssuhhTAssh.cphshs..........c..s..h..tVtshsTAGl.sNuscsut..........................................................GTINlllhls.....ssLssuAhspAlhTsTEAKstAL.-hsh....t..................hATGTuTD.ulslss.........stt....stthpauGstTplGphlu+u....Vhpul........................ 0 62 111 139 +1843 PF01956 DUF106 DUF106; Methyltrn_RNA_3; Integral membrane protein DUF106 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. Members are predicted to be integral membrane proteins. 25.60 25.60 26.00 25.70 25.50 24.80 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.72 63 665 2012-10-01 21:53:17 2003-04-07 12:59:11 11 4 418 0 446 621 77 171.30 25 75.18 CHANGED slllss.lp......hhPhhllllllullpshhupllpthh..pp..sph......phpppphtpctcphpp.........................sss.thp.cphppccpthhpsppph.................................hhphhKs.hhhhlsphslhhWlshhFu....................Ghll.......t+lP.FPlhtphpthhptsl..tshssthso.lsWYFLshh.ulpplhphl....LGhss ............................................................................................sh..llhlhlullpthls.hl..p.........+p...sth.............phpc.pp.h....hcuctLcp...............................ssp.hht.tpp...h.ppccphhts.sppsh..............................................hhs.h.h.Kuthsh....hls.hhllh.ual..Nhh.Fs...........................GhVl.........sKlP...F.P..l..hh..thpshh.p.....pGl............shD...s....shs.......S.......lsWYF.Lssh.ulpphhphl.LG.s.......................................... 0 142 240 363 +1844 PF01957 NfeD DUF107; NfeD-like C-terminal, partner-binding Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family NfeD-like proteins are widely distributed throughout prokaryotes and are frequently associated with genes encoding stomatin-like proteins (slipins). There appear to be three major groups: an ancestral group with only an N-terminal serine protease domain and this C-terminal beta sheet-rich domain which is structurally very similar to the OB-fold domain, associated with its neighbouring slipin cluster; a second major group with an additional middle, membrane-spanning domain, associated in some species with eoslipin and in others with yqfA; a final 'artificial' group which unites truncated forms lacking the protease region and associated with their ancestral gene partner, either yqfA or eoslipin. This NefD, C-terminal, domain appears to be the major one for relating to the associated protein. NfeD homologues are clearly reliant on their conserved gene neighbour which is assumed to be necessary for function, either through direct physical interaction or by functioning in the same pathway, possibly involve with lipid-rafts [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.79 0.71 -4.05 29 3477 2012-10-02 01:11:51 2003-04-07 12:59:11 13 7 2863 3 986 2665 624 137.70 20 67.86 CHANGED hhlllGhlLlshEhh..hsshuhluhuGlsuhllGslhlhs...................hshhhshtlhhhhhhshlthhlh+t....+pppss.psh.....csllGcpupl...ts...hhsspGhlplcGphWpsput-....lttGcpVcVluhcGhsLhVpcps ............................................................hWlhlu.hlL...l..hhEh..........hssh...h..h..h.....h.h.s..G..l...u..u...h..l..s....u.h.lh.h.hh.........................................hs.hh.h.h....h..t..s.l.h.....h.h...h..l...s....h.l........t.....h............h.....h.....h.+...p......h..h...t...t..p.t.p.p.pt..t.................................ppl...l...G...p..pu.hl.......ps....................hs..s..s....p......G......p....l......c......l.....s.......u.......p.....t......W...s.....s...p....utss...........lts.G..s..c.V....pVlt.l...-...Gh.pLhVp...h............................ 0 335 648 832 +1845 PF01958 DUF108 Domain of unknown function DUF108 Enright A, Ouzounis C, Bateman A anon Enright A Family This family has no known function. It is found to compose the complete protein in archaebacteria and a single domain in a large C. elegans protein Swiss:Q19527. 21.00 21.00 33.40 32.40 20.70 19.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.94 0.72 -3.76 65 431 2009-09-10 18:08:58 2003-04-07 12:59:11 13 7 393 4 169 403 114 91.40 37 34.30 CHANGED pslspss..slFcGsApEAsptFPpNsNVAAsluLAu..hG............h-pspVcllADPss.stNhH-IplcG......shGphphplcshP.ssNP+TShlsAhSllp .......s..hsctp.llFcGoA+E.....AsphaPpNsNVAAsluLAu..lG............h-pTpVclhADPsh.spNsHclpscG......shGphplplps.hPhss.NP+TSslsuhSsl................. 0 50 101 138 +1846 PF01959 DHQS DUF109; 3-dehydroquinate synthase (EC 4.6.1.3) Enright A, Ouzounis C, Bateman A anon Enright A Family 3-Dehydroquinate synthase is an enzyme in the common pathway of aromatic amino acid biosynthesis that catalyses the conversion of 3-deoxy-D-arabino-heptulosonic acid 7-phosphate (DAHP) into 3-dehydroquinic acid [1]. This synthesis of aromatic amino acids is an essential metabolic function for most prokaryotic as well as lower eukaryotic cells, including plants. The pathway is absent in humans; therefore, DHQS represents a potential target for the development of novel and selective antimicrobial agents. Owing to the threat posed by the spread of pathogenic bacteria resistant to many currently used antimicrobial drugs, there is clearly a need to develop new anti-infective drugs acting at novel targets. A further potential use for DHQS inhibitors is as herbicides [2]. 21.20 21.20 33.70 32.40 20.40 20.00 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.01 0.70 -5.76 26 211 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 183 0 126 218 104 339.40 38 94.02 CHANGED KplWl..s.t..ssW-tpKchlTsuLEuGsDsVlVsp.tclc+V+cLGplplhu..................................s...h.........ht.h.......GpssusYlcIpsc-pEphAtchuc...s-alIltupDWplIPLENLIAtl.tpcscllAsVpss-EAcsAhpsLE+GsDGVLLcsc..Dss-l+chsphhcphspEp..lcLp.ApVTclcslGhGDRVCVDTCSlMpcGEGMLVGShSpGhFLVHuEThEsPYVAuRPFRVNAGAVHAYlhsPss+T+YLuELpuGDcVhlVDpcGpsRpAlVGRlKIE+RPLhLlEA..Es.cG.cclpslLQNAETI+LVss-GpslSVs-L+sGDcVLlhh-p...suRHFGhtl.-EoIIEK .............................................................................................al...........p..+thlptulEsGh.ss.llhtt...tp.h.....tt.h.p.pl....up.h..plhs.............................................................................................h.......tt.spchushlp.lps.ppp......chAt.phup...sshsll.s.p...DWp.....lIPLENllAt..h..t......p..ps............plluhs...p...sscEAclshps..........LEpGsDGVllpsc......shs-l+clhp.......hh...........cp............s..p........pp...........lpLs.AsVTplc..l.GhGDRVCVDTsSlhc.GEGhL..VGShSpGhFLVHuEohcss.YluoRPFRVNAGuVHuYlhsPss+TpYLSELcuGccVllVc.pcGpsRpuhVGRVKIE.pRPLlLl-....Acs......sG.....cphsslLQNAETIRLl.......s.s......s......G.....p...slu....VspLKsGDcVLshhpp...suRHhGhtl.pEhIlE+............... 0 43 92 117 +1847 PF01345 DUF11 Domain of unknown function DUF11 Bateman A anon Pfam-B_1553 (release 3.0) Domain A domain of unknown function found in multiple copies in several archaebacterial proteins. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.51 0.72 -3.93 35 6559 2012-10-03 16:25:20 2003-04-07 12:59:11 13 229 570 0 1139 6240 824 72.90 27 22.92 CHANGED sssssssssssp..ssssshssssssshtlspssssssspsG-slsaTloVoNsGsss.uss..VsVpDhl....PsGhsassso .............................................................ssss..................................s.........h....s....p.....u.......s...l..s....s...s...K......o.....s.......s.......p...s....h..........s...s.........l......G.......-..s.l..........TY.Tlsl..s..N..s....G.....s.....s......s.....A.....s...s........VhhsDsl........Ps.G..soFlsso........................ 0 455 877 1040 +1849 PF01969 DUF111 Protein of unknown function DUF111 Enright A, Ouzounis C, Bateman A anon Enright A Family This prokaryotic family has no known function. 19.90 19.90 26.20 23.10 19.40 19.00 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.28 0.70 -5.66 103 749 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 627 1 290 733 394 342.40 33 95.11 CHANGED LahDs.huGluGDMhLuALlDLGl..shch.lpptLppLsl.....st...apLphpcsp+pGlpup+lcVph.............................................................................ptpp..................c................Rp.......hs-I...tplIp.su.....sLsspl+ptuhplFppLAcAEA+VHGh.sl-cVHFHEVGAlDuIlDIVGsshul-hLs....l-c.......lhsoPlsh...GuGhVcsAHGhlPVPAPAslElL..cslP.lhs..ssh.p.....sELsTPTGAAllssluspasshPs.hplpplGYG..uGs+-hs..hPNlLRlhluc.............................tps.thpp-plhllETslDDhssEhlualhEpLhps.GAlDVhhsPlhMKKuRPGhhlsVls..pscptppltpllhpEToTlGlRhpth.pRhhLpRchtplpTshG....plpVKhuh.hsut....hhphpPEa-DspclApcp..slPlpcVhp ...............................................................................lahDs.hsGluGDMhluALlDh.......Gs.......s................p...h....l...p...tlppLsl.....tt..............hplphpch.+..p..GltutphpV.ht.................................................................................................................................................................p...ppt..........................c...............Rpht-I.hplIp.pu........s.Lsptl+phuhtlFphl.Ap.AEuplHGh.s.h-pVHFHEVGAh.D.uIlDIlGsslsl-.Ls....hcp.......lhsoslsh...GsGh.lpsuHGhhPVPsPAshclh.............psls..lht..t.sh...p.........sELsTPTGAAlltsls......s.p......a..s..........ss.hplpplGhG..s..Gp+-h.....t..hsNlLRshlhc........................................................................................tt...t.ttcplhhlEsslDDhssEhhuh.hh-tLhpt.GAhDVahpPlhMKKsRPuhhlslls.....p.pphpthtpllhpcToolGl.Rhh.h.pRhhhp.Rpht..plps.hG.....................................plplKhsh..h.....tst.................hhphpPEa--htplApptshshpplh.t.............................................. 0 134 237 275 +1850 PF01970 TctA DUF112; Tripartite tricarboxylate transporter TctA family Enright A, Ouzounis C, Bateman A anon Enright A Family This family, formerly known as DUF112, is a family of bacterial and archaeal tripartite tricarboxylate transporters of the extracytoplasmic solute binding receptor-dependent transporter group of families, distinct from the ABC and TRAP-T families [1]. TctA is part of the tripartite TctABC system which, as characterised in S. typhimurium [2], is a secondary carrier that depends for activity on the extracytoplasmic tricarboxylate-binding receptor TctC as well as two integral membrane proteins, TctA and TctB. complete three-component systems are found only in bacteria. TctA is a large transmembrane protein with up to 12 predicted membrane spanning regions in bacteria and up to 11 such in archaea, with the N-terminal within the cytoplasm. TctA is thought to be a permease, and in most other bacteria functions without TctB and TctC molecules [1]. 20.40 20.40 21.50 21.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.26 0.70 -6.07 199 2024 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 1089 0 682 1865 3145 407.50 36 83.48 CHANGED sLhhhhhG.shlGhllGslPGLusshulAlLlPlTas..h.....s........PhsulhhL.sulYhGuhaGGulsuILLslPGssuussTslDGasMAppGcuupALulusluShlGGhlu....slhLhhhuPhluplALp.FGssEaFsLhlhuLs.hlusluus.s.hh.............KulhushlGLhluhlGhDshsG.h.Ra.TFG..........pLh.sGlshlslhlGLFuluElltt....h..................t..ptph..tph....tt.h.h.s.....hp-htcthhshl.RuohlGshlGhLPGsGuslAualuYs........................ht++.ho..+psc..p..F..Gc.Gsh-GlsAsEuANNA...ssuuuhlPhLsLGIPGsussAlllGuhhlpG...lpPGPhlhpppsp..llaullsuhhluNlhhlllslhh.ht.h.as+.llplPtphLhPhIllhshlGsYuls....ssh.hDlhlhlshGllGahhc+hshs.........huPll........L ..............................LhhsllGshlGsllGsLPGLGsssul..A..lLLPlTau..h.............sPtu..ullhL.uulYhGuhaGGuhouILlNhPGssuulsTshDGasMA.pp.G.+.AGtALuhusluSFhGGhlu.......sl.hl.hhh...A...P..............h.LAph.......A.Lp..F..Gs...sEYFuLhlhuls.sluuh.su...p..s..hl..............Kulhu.s.h..lGLhlu.....sl..GhDsho....G.s.Ra..TF....s..s.........pLh.sGls.hlslhlG.LFul......uEllhhh...............tppp.p.t.ph.....h.t.ph..........sp...h...hhs...............hp-htp.h.h.h.shl..RuollGhh.lGlLP.G.uGAsl.......A.ualuYs...............................hpK+..hu.......+..p.sc....p..F...Gc..GslcG.lu.AP.EuANNA...u.s.s.G.uh.lPhL..TLGlPGsuss....A.....l....h.lG...Alh.laG......lpPGP...hlFs.....p.....ps.-...............l..saullsu..hhluNlhLllls.lsh....ls.las+......lLplPh.................th..LhP.....hIl...h.h...s..hlGsYu.ls..........ssh...hDlhlhlshGllGahhc+hsaP.........huPllL................................................................................................................. 0 185 421 574 +1852 PF01972 SDH_sah DUF114; Serine dehydrogenase proteinase Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaebacterial proteins, formerly known as DUF114, has been found to be a serine dehydrogenase proteinase distantly related to ClpP proteinases that belong to the serine proteinase superfamily. The family has a catalytic triad of Ser, Asp, His residues, which shows an altered residue ordering compared with the ClpP proteinases but similar to that of the carboxypeptidase clan [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.71 0.70 -5.63 4 404 2012-10-02 13:07:06 2003-04-07 12:59:11 11 4 342 0 216 2161 936 193.70 28 59.54 CHANGED h.hhDPhou.l.uLhWaLLFhaLlhuPphphptLlhARh+slRclppKRsSpVITMIHRQEuIGFLGIPIY+FITIEDSEclLRAIRhTPcDhPIDLIIHTPGGLsLAATQIApAL+cH.AcTpVIVPHYAMSGGTLIALAADEIIMDcNAVLGPVDPQlGpYPAsSILKslE+KsscclDDpTLIhADIucKAIpQhp-hVasLL.KDKhs-EKAKElAKhLTpG+WTHDYPLTVEcLKpLGLcVsTNVPcEVYELMELY.QPMtp+ss.VpalPsPhK...pEpsAK ...................................................................................hhh......................................................................................................................h.....c.tl.p......h.........s...h...s...I...s...L.l...l...c...TPGG....h....V.....AA....t....pI....u....p....t...l....p....p....p..s.....s.....c.....s....s............l....h...V....P...c...h.....Ah....SuGT.lIA.L.uAD...c..IlMs.sus..........lG.....P.....l.....D.....P.......p........l........t........t...........................................................................................................................................................................................................................................................................................................................................................................................tsh................................................................................................................................... 1 96 153 185 +1853 PF01973 MAF_flag10 DUF115; Protein of unknown function DUF115 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaebacterial proteins has no known function. 33.30 33.30 33.30 33.30 33.10 33.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.09 0.71 -4.75 151 1412 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 624 0 345 1270 190 143.60 25 34.65 CHANGED pphlpNlsphhp......hppLhs....pt....cs..slllusGPSLccp.lshl.+php....pchlIlusssuhphLhcpGIpPDhlls.lDt....hshc....h..hpph.p........slhllhsptss.chlpthpspph.hhhhsp..........hshh..t..t..........hhsGhoVuphuhplA.hthGtcpIlLlG.Dhuasps .............................................................................hht.................hhlhusGPSLt.pt..l.hl..pt............tp..hhlhss.ss.uh.hL.hptsI.tPD.....hlh...l-t.....hs..c.............h.hpp..tp........................slhhlh.ssh.sp......ps.l...p..hh...ptpph...hhhhtps....................................hthh...p..th.................htsGh.....oVu.phuhtLA.ht.lshc...sIlhlG.Dhuas........................................................ 0 131 239 288 +1854 PF01976 DUF116 Protein of unknown function DUF116 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein has no known function. The protein contains seven conserved cysteines and may also be an integral membrane protein. 25.00 25.00 26.20 25.40 24.60 24.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.23 0.71 -4.74 48 272 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 245 0 133 254 20 161.60 30 58.26 CHANGED hhshhchlh......................................phhuhcc..phlpphhIplpNphhtspht...................ch.scchllllPHCLppsc...Cst+lTs.cs.pC.pcCG.+Csluclhclu-chGhc.lhllsGuohs+............+llcct+Pc....ullulAC.p-LppGhpc.........hpt...lPs..hGVl.p+ss..C........hsTpV-hpplhchlp .............................................................h...hhhhls.....phhshpc..phlpp.hlcl...pNphh.hppht...................c.h.scchllLlPHClp.sp......C.h+lTp...ch.sC.....+cCG..+Csluslhclucch.G.hc...lhlsTGGTlA+............+hlpct+Pc....ulluVACc+DLtpGhpc.........sps...lPshGVlsp+sstsC...............hsTpVshpcl.phl.t................................................... 0 54 101 121 +1855 PF01978 TrmB DUF118; Sugar-specific transcriptional regulator TrmB Enright A, Ouzounis C, Bateman A, Studholme DJ anon Enright A Family One member of this family, TrmB, has been shown to be a sugar-specific transcriptional regulator of the trehalose/maltose ABC transporter [2] in Thermococcus litoralis. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.86 0.72 -4.27 51 1668 2012-10-04 14:01:12 2003-04-07 12:59:11 14 47 710 5 702 5171 791 67.60 24 27.24 CHANGED Lp.plGlochEucsYhsLl...ppusssup-lucpsslP+s+lYclLpsLhc+GhVphtp.up......Pt.hYpslssc ......................................thGhsph.Esc..s..YhsL.....l.......pp.u....s......h......o.....u.......p.-.l....ucp.s....u.....l....s..+......u....p.l.Y....csLppL....h....ccGh.V.ttpp..sp.....................sh.hY.tshs.................................................. 0 181 429 610 +1856 PF01982 CTP-dep_RFKase DUF120; Domain of unknown function DUF120 Enright A, Ouzounis C, Bateman A, Coggill P anon Enright A Family This domain is a CTP-dependent riboflavin kinase (RFK), found in archaea, that catalyses the phosphorylation of riboflavin to form flavin mononucleotide in riboflavin biosynthesis EC:2.7.1.26. Its structure resembles a RIFT barrel, structurally similar to but topologically distinct from bacterial and eukaryotic examples. The N-terminal is a winged helix-turn-helix DNA-binding domain, and the C-terminal half is most similar in sequence to a group of cradle-loop barrels. Swiss:O28174 has this domain attached to Pfam:PF00325. 25.00 25.00 66.80 66.10 22.80 20.10 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.50 0.71 -4.49 44 155 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 152 8 103 158 96 120.50 38 62.91 CHANGED VsSGLGEGpaYlol.hYpcpFcctLGFpPaPGTLNl+lssp.t.h.htttlcphpslhI.Gap.pssRsaGuV+sassplss.....lpuA......llhPpRTpHspcllElIAPhpLRcpLsLcDGDcVplpl ..VlSGlGEGpaYlSl.hYpcpFcchLGFcPaPGTLNlclppp...hphhttl.cshpslhI.uap.pssRsaGsV+sa.splss......lpuA......llhPpR..TpHs.psllElIAPhpLR-pLsLcDGDpVplpl.......... 0 26 61 84 +1857 PF01983 CofC DUF121; Guanylyl transferase CofC like Enright A, Ouzounis C, Bateman A anon Enright A Family Coenzyme F420 is a hydride carrier cofactor that functions during methanogenesis. This family of proteins represents CofC, a nucleotidyl transferase that is involved in coenzyme F420 biosynthesis. CofC has been shown to catalyse the formation of lactyl-2-diphospho-5'-guanosine from 2-phospho-L-lactate and GTP [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.46 0.70 -5.38 3 277 2012-10-03 05:28:31 2003-04-07 12:59:11 11 3 269 2 130 665 682 192.50 28 86.28 CHANGED M+lIIPVSshNpsKTRLSshLSsEERKsLL+sMLhDVIcALcs.lD.llllScDE-VL-aAhssLGlEllcEc..+DLNsAlcQA..Fpt.E-ccVIIIPSDIPLIuKcclcclL-puuphDVVIAPuRGGGTNhLlLR.Kcuhcl+YcssSFFKHLEEARKRGL+spIYDSFYlSVDINTsEDLGEIhlHGsGT+o+EYLRKLGFoVcPc+oSchRhcVpRp .........................................................................................hh..lllPVKpl...stAKoRLu.ss.....h.s.......s..pp....Rps..ls...h....A....M....L....t......D.....s......l.......s....A...s...t.....s......l.....s.....l....s...V..l........o....s...D...t.......s.......s....s.s...h.u..t................t..........h.....G...u.....p..l.l....sD..............s........t..........p.....s...........L....N.......s...Al....stu..............h.h...t........t....s....s....s.......s....ll.l..l....uDLPhlpsp-Ls...p.h..l..s..s..u...........t....p.....h....c.....s..h..lu.s...s..p.G.s..GTs...sL...h.h.t....s..s.t....h.....p.........s.......p..a.s.s..s..S...h..t.H...t...........................................Dls...ch.................................................................................tttth................................................................................ 2 39 96 119 +1858 PF01986 DUF123 Domain of unknown function DUF123 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial domain has no known function. It is attached to an endonuclease domain in Swiss:Q58030. The domain contains several conserved cysteines and histidines. This suggests that the domain may be a zinc binding nucleic acid interaction domain (Bateman A unpubl.). 25.00 25.00 27.50 25.60 21.20 24.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.76 0.72 -3.89 69 232 2012-10-01 19:55:08 2003-04-07 12:59:11 11 4 224 0 149 235 33 93.40 35 50.11 CHANGED phphptGhYsYlGSAht.su.....htpRltRHhp..tst.+..............................h+WHIDYLlt..psplstl.h.hhsppc....hEstluptLsphst....l.tGFGuSDsp..stSHLahhs ..............t.hphptGhYhYlGSAh..su.....ltpRlpRHhpps+...............................ph+WHIDYLhs...psphhts..h..s.stc....hEsplAptlsphht.....ltuFGuSDCp..CtSHLahh.p......................... 0 51 97 124 +1859 PF01987 AIM24 DUF124; Mitochondrial biogenesis AIM24 Enright A, Ouzounis C, Bateman A, Eberhardt R anon Enright A Family In eukaryotes, this domain is involved in mitochondrial biogenesis [1]. Its function in prokaryotes in unknown. 20.80 20.80 21.00 20.80 19.90 20.40 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.54 0.70 -4.88 153 1844 2009-01-15 18:05:59 2003-04-07 12:59:11 12 16 1144 7 748 1709 223 204.60 25 77.57 CHANGED aplh.tsshphlclpL.t.sucslhscsGuMlhhs....uslphps..th............th....lhtulp........chlsGEuhFhs.hp...tspG.pGclhlAss.hsGp.lhhlcL.ss.........p.tlhlppsuaLAs.sssl....phchpht......thttu..........hhu...........Gp..Ghhhh+lp...G.pGhlhlpu...hGslhphplt..scslhVDsuplVAassslpa.......plpps.s................................................thhuth.hu...GEGl..hh.php...Gs.G.pVhlQ.ohs ......................................................................................................t.shphlplpLt..ssp..s.lh.....sc.....s.GuMhhhp...................sslphps.h.t.............................utlhtthp...........phlo.GEuhhhs.hh........tspG..pGplhhAsshs.........sp..l..h.s.lcL..ss....................tpl.hl.p..p..ssaLAh.ss.s.l.....phshpht.....th.tu......................hhu......................Gp...Ghhh.h.p...lp...........G.....pG...h...lhlpu....tGslhph.p.l...t...sc.plhlDssplVAa...ss..s...l..ph..........slphs.s.....................................................................................shhsth...hu.........GEG.l....hh..ph..p....G.s..G...pVh.lQoh................................................................................................... 1 239 508 668 +1860 PF01988 VIT1 DUF125; VIT family Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the vacuolar Fe2+/Mn2+ uptake transporter Swiss:P47818, Ccc1 [1] and the vacuolar iron transporter VIT1 Swiss:Q9ZUA5. 28.10 28.10 28.10 28.10 27.90 28.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.37 0.70 -4.85 143 2783 2009-09-13 10:13:07 2003-04-07 12:59:11 14 11 1699 0 1169 2391 416 165.10 25 72.77 CHANGED hlRssV.hGusDGllosh.ullsGluuss...s.......s.st..slllsGlusl.lAuuh.SMuhGpYlSspupp-htptphpc.p..th.t..ptpht.phhthhtpp.Glstphspphspth.t...................t...............................................................ht.hh..thsh................pPhtuAlsosluahlGullPllsahlh....................shhholhhshlsLhllGhhtuths.s........tshhpuslchlhhGhlussloahlGtl ......................................................................................................................hpshlhGh.DGllssh.ullhGluuus.........s.....lhlsGhushluuuh.SMuhG-alSspsp............t-..tt.ht........h.........t...................h...hh.t...s...t.h......hs..h................................t..........................................................................h....tht...............s...............pPhpuAhsohhuahlG.....ulhPhlshhhh................................hthhhshh.hs.h..lsLhhhGhhtuhhs.t................................tshhpuhhc.lhhGhhshshshhlG.......................................................................................... 0 357 761 1000 +1861 PF01989 DUF126 Protein of unknown function DUF126 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. 19.30 19.30 26.90 32.90 18.50 17.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.51 0.72 -4.31 60 282 2012-10-01 19:37:30 2003-04-07 12:59:11 11 2 253 1 142 284 192 84.60 37 34.22 CHANGED slSFhGGVDPpTGhllDhtHslhGpSlsG+lLlhPsu+GSosGShVlh.pLtpsGpAPtAllhp.csEsIlshGAllAs......lPl...lst ...................lSFhGGVDPpoGhll-p.pHsLpGpslsG+lLlhPsu+GSos.GSh.Vlh.pLhpsGpuPsAl.lhp.c.s.-sIlshGAllAp..........lPll.t............ 1 42 83 110 +1862 PF01994 Trm56 DUF127; tRNA ribose 2'-O-methyltransferase, aTrm56 Enright A, Ouzounis C, Bateman A anon Enright A Family This family is an aTrm56 that catalyses the 2'-O-methylation of the cytidine residue in archaeal tRNA, using S-adenosyl-L-methionine. Biochemical assays showed that aTrm56 forms a dimer and prefers the L-shaped tRNA to the lambda form as its substrate [1] [2]. aTrm56 consists of the SPOUT domain, which contains the characteristic deep trefoil knot for AdoMet binding, and a unique C-terminal beta-hairpin [3]. 25.00 25.00 79.40 95.80 22.90 22.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.59 0.71 -4.37 31 143 2012-10-01 22:53:19 2003-04-07 12:59:11 11 2 140 4 88 146 75 121.00 48 66.03 CHANGED WGGs..FpVchsssa+phl+ca+ptuGhVVHLTMYGhsls-lhscI+...................pscc...lLllVGAEKVPt-lY-hADYNVuVGNQPHSEVAALAlFLDRLhcGcpLpp-FtsA+lpllPpc+GKcVlct ......WGGs..Fp.lchsssa+phl+cW+t.ts.....GhVVHLTMYGhsls-.lhscI+.........................p..p..p.cs....lLlVVGAEKVPt-lY-hADaNVuVGNQPHSEVAALAlFLDRLhcG+pLpp-FpsAcl+llPpppGK+Vlph.. 0 21 53 73 +1863 PF01995 DUF128 Domain of unknown function DUF128 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. The domain is found duplicated in Swiss:O27611. Many of these are attached to an N-terminal winged helix domain suggesting these are transcriptional regulators and that this domain has a ligand binding function. 25.00 25.00 63.90 27.70 22.10 23.00 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.26 0.70 -5.43 36 122 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 69 2 87 121 4 228.30 32 79.15 CHANGED lsFlhS+lpchhapssaD.tptpGcVllNhohlp..............ccch-pslcllccshcsG..huloshlplhccs.t.......tsplsItTlCSlThDGlLL+sGIPspPtYGGllclcstpPhRFp-lIsYcuTSlDPlclFhspshTsVhphhcsGpGtlLANlRplPhsAc-chcpllccl.t.uhsGll..plGcsspslhGlsVppsplGlshlGGlNPlsshpEtGlslchpshpslh-apphpch ............lshlhS+l.phhapssaD.pptpGpVllNhohlt..............cpphccslclhccshcsG..huloshlplhcc............psplsltTlCSlTlDGlLL+sGIPspPpaGGllclcs..tpPh+Ft-lIsYcuTSlDPlclFhsp.....shTsVhthhpsGpGplLANhRplPhsuc-chpcllccL.t.uhsGll..plGc..sspsl.hGlsVs.psphG.lshh.GGl..NPlushpEpGlslchpshpslhchpphpp.h......... 0 22 58 75 +1864 PF01996 F420_ligase DUF129; F420-0:Gamma-glutamyl ligase Enright A, Ouzounis C, Bateman A, MorningStar A, Mistry J anon Enright A Family F420-0:Gamma-glutamyl ligase (EC:6.3.2.-) is an enzyme involved in F420 biosynthesis pathway. It catalyses the GTP-dependent successive addition of multiple gamma-linked L-glutamates to the L-lactyl phosphodiester of 7,8-didemethyl-8-hydroxy-5-deazariboflavin (F420-0). This reaction produces polyglutamated F420 derivatives. GTP + F420-0 + n L-glutamate -> GDP + phosphate + F420-n 25.00 25.00 25.70 25.30 21.70 22.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.48 0.70 -5.29 91 759 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 642 4 296 699 526 231.60 28 69.99 CHANGED slcsh..lcs..GDD..Lspllhpuht..........lp-GDllVlupplVS+AEGRhlshsslpPu....................................Ah.lu..ht........tssph.........hplllcE...............................usclltt........tsshhlsps+tGhl.....hssAGlDp.........................SNs........sss....llLLPcDPcsSAcpl+ptlpphhGh....pVuVlI......sDohGRsaRhG....tsulAlGsuG......ltslt..-htGp.pDh.hGctLp.....................hT..uluDplAuuAsLlhGcusptsPlsllRGh .........................sltsh..lp.GDD..lsplltpss.........................lp-sDllslopplVu+spGphsshspht.s.........................................h..hs............hs.th.......................hpllhpE............................................................................................................sscllht..........tstshlspsppGhh........hssAGlDt....................................................................................................................................SNs.........ssst....lhLhP.p-..P..cssApplpptlpphhu.h....pluVl.l......sDoh..G+.a+hG....tsshulGhsG................ltslt..shtu...hD..hsp.lt...............................sT..slsD.luuhusLshGphst.hPlsllpG............................................. 0 110 228 272 +1866 PF02343 TRA-1_regulated DUF130; R03H10.4; TRA-1 regulated protein R03H10.4 Bashton M, Bateman A anon Pfam-B_814 (release 5.2) Family This family of proteins represents the protein product of the gene R03H10.4 which is located near a sequence that matches the TRA-1 binding consensus. TRA-1 is a transcription factor which controls sexual differentiation in C.elegans. R03H10.4 shows male-enriched reporter gene expression and acts as a direct target of TRA-1 regulation [1]. 19.40 19.40 19.40 19.70 19.20 19.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.97 0.71 -4.64 23 100 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 4 0 99 115 0 124.20 26 71.88 CHANGED clC.s.psspCPDLtshh............sshpl..s-tDGCss.loCs.suphPhhhupassSEIsss.ss..ssshshFtlhsPhohsphsu..........slhcaaGllCE....sspWphTKYPpGIthh..ss...hhGsDGShsGKKotlttlsC .....................................t........tspCsclhs..hh......................st..h.....p-tsGCsh.loCs...ss.t.h.shlhh..tasp.SEIs.h..P..ss...s.ss....t.hh...thh...s.s.s.tt.hs..sh...............................slhsaFGllC.E....sspWhsTKYPhGlt..............Yhs......ss......hhu....ss.uphsGK..Koplt.hth....................... 0 42 42 99 +1867 PF01998 DUF131 Protein of unknown function DUF131 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. The proteins are predicted to contain two transmembrane helices. 20.80 20.80 21.60 25.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.16 0.72 -4.61 35 123 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 85 0 87 115 3 65.00 29 72.33 CHANGED hlGshltsht......................................pscscschGGllhIGPIPIlFGos.....pphshhuhlLAlllhllhllhhh ....................................h.....h.........................................pppcspscsGGVlhIGPIPIlFGou......pph..shhslllAlllhllhllhh....... 0 26 46 65 +1868 PF02001 DUF134 Protein of unknown function DUF134 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaeal proteins has no known function. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.56 0.72 -4.14 4 568 2012-10-04 14:01:12 2003-04-07 12:59:11 11 5 485 0 227 751 43 94.30 39 66.00 CHANGED M.t....+sRsR+hRhIhhp..PpVRtFhPch..stTG..K..Vhlol-EhEAlRLVDYc-hoQ--AuchMGIS+sTlWRhLTuARKKlApALlEGR..hIlhcGGEhhpc ...........................................................................RPpphR.p..lpth...Pt.h..p..hFt.Ptu.......shpp.............l.h.....L....sh.....-.Eh........E....A.l.R.LhD....h.cG.LsQp-uAppMtVSRpThtpllp..sARp.KlAcuL..lpG+..hlhlpGG.h...ht....................... 0 87 154 191 +1871 PF02006 DUF137 Protein of unknown function DUF137 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaeal proteins has no known function. 20.40 20.40 20.40 33.80 20.30 19.80 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.08 0.71 -4.89 37 143 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 140 0 96 140 121 174.20 52 69.07 CHANGED LLhAcpPVISVNGNsAALsPcElVcLActls..AclEVNLFaRT-ERhcpIs-hL+.........cpGAp.cVLGhtsDs....pIPsLpppRupVspcGIasADVVLVPLEDGDRsEALhcMGKpVIsIDLNPLSRTucsAolTIVDNllRAlPplschsc-h+phscpcLppllppaDNcpsLpculctI ..LLLAcpPVISVNGNsAALsPcElVcLActss..AclEVNLFYRTcERhctIschLc.........c..p.GAp..cVLGltsDu....pIPsLpppRu+VspcGIapADVVLVPLEDGDRsEALs+MGKpVIsIDLNPLSRTA+sAolTIVDNllRAlPplschs+-h+p....hs+c-.LppIlpsaDNccsLpculctI............................ 0 23 56 78 +1872 PF02363 C_tripleX DUF139; Cysteine rich repeat Bashton M, Bateman A, Yeats C anon Pfam-B_602 (release 5.2) Repeat This Cysteine repeat C-X3-C-X3-C is repeated in sequences of this family, 34 times in Swiss:O17970. The function of these repeats is unknown as is the function of the proteins in which they occur. Most of the sequences in this family are from C. elegans. 22.90 1.00 23.60 2.50 22.70 -999999.99 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.68 0.74 -7.70 0.74 -3.51 166 2125 2009-01-15 18:05:59 2003-04-07 12:59:11 14 31 38 0 1234 2152 0 17.40 42 36.77 CHANGED spCtstCps.sCps.pCsp ...................pCtPhCps..uCpN.GhCst............... 0 331 447 1058 +1873 PF02405 Permease DUF140; Permease Bashton M, Bateman A anon Pfam-B_1126 (release 5.2) Family This domain functions as a permease. In Swiss:Q7DD59 it is involved in L-glutamate import into the cell [1]. In Swiss:Q8L4R0 it is involved in lipid transfer within the cell [2]. 20.20 20.20 20.20 20.40 18.80 20.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.06 0.70 -4.85 267 4359 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 2463 0 1190 3158 944 214.40 33 75.29 CHANGED p.phlp.phhplGhtol..........sllslhuhhhGhllulQshhtLpp.aGApsh....lushluluhlRElGPlloAlllAGRsGSAhsA-lGsM+lsEpIDAlcshGlsPlphLVsPRllAshlshPlLshlsshhGlhGGhllush..hhs...lssusahpphpp...hlsh.tDlhhu............llKuslFGhllulluCapGhp......s.puGspGVGpuoTpuVVtuhlhlllhDhl...lohhh ............................................h..hlp.phhtlGstol.........sIlsls.ulhlGh.V.lulQuhhhLsp...aGApsh.....lGhhl.uluhlRELuP....llsAllhAGRuGS.A.h.TA-lGsM+hoEplDAhcsMulcPlphLlsPRlhAullshPlLshlsshl.Gl..hGGhllush..hhG.........lssGsah.s.thps.....hl....s....h.tD.lhhu............l.lKuslFuhhlshlusapGap.......s..p.s.....ss.p.GlGp..AoT..poV..VtuhlsllslDhllohl................................ 0 329 763 1012 +1874 PF02408 CUB_2 DUF141; CUB-like domain Bashton M, Bateman A anon Pfam-B_1716 (release 5.4) Domain This is a family of hypothetical C. elegans proteins. The aligned region has no known function nor do any of the proteins which possess it. However, this domain is related to the CUB domain. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.59 0.71 -4.56 24 264 2012-10-02 11:50:15 2003-04-07 12:59:11 15 9 5 0 250 312 0 113.70 19 25.57 CHANGED ssshsC.sssl..hstP.ssupPh.haPssasts.sssthssspsCsaplslPpGaaAplhlpsphss....psslpshDshuph...tthtssptpsa..aFssPphplslssssss.....sFuFplpW.shs .........................s....tC...s.th..hs.s.......s.tsh...aPts.tts....s.hsths.ss.....sCsapl..s..lP....p.....G.....h..as..plp..lps.phps.......sshlplhD..sssph..................hh.h.s...s.s....t....p...sa.....ahsss.p...h..p..lpl..p...ssssss....pFthplpa.ph............................................ 0 50 67 250 +1875 PF02410 Oligomerisation DUF143; Oligomerisation domain Bashton M, Bateman A, Eberhardt R anon Pfam-B_1798 (release 5.4) Family In yeasts, this domain is required for the oligomerisation of ATP synthase subunit 9 into a ring structure [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.35 0.72 -3.97 165 4456 2009-01-15 18:05:59 2003-04-07 12:59:11 10 15 4365 5 1086 2727 2020 97.70 36 72.41 CHANGED hhphlspsl--cKA.pDIhllDlpp......ho...slsDahlIsoGsSs+plpAl......u-pltcph..+t...t......s.hpshphE.Gh..............................psu...cWlLlDh.G.DllVHlhpt-sRpaYsLEcLW .....................................................................h..phlhcsl--pK.u.pDI.lslDlps...................ho.....sls..Dah..lIsoG.sS..sRp..VpAl...A-pltcph....+p..........t..............G...hpshp..h...E.Gt...............................................................s..su...cWl....LlDh.....G...D.....llVHlh.p.p-pRpaYsLE+LW................ 0 374 701 924 +1876 PF02413 Caudo_TAP DUF144; Caudovirales tail fibre assembly protein Bateman A, Moxon SJ anon Pfam-B_1800 (release 5.4) Family This family contains bacterial and phage tail fibre assembly proteins [1]. E.coli contains several members of this family although the function of these proteins is uncertain. 26.50 26.50 26.60 26.60 26.40 26.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.69 0.71 -3.98 73 2529 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 606 0 168 1625 6 118.40 33 75.29 CHANGED csppsapspshp......h.ps.s.hsp......................ssstls.....t......ssGpahhhss..thh.thhs........................ppphhppA-pp+ppLlppAsptIssLp..sslcLs.hho--EpspLptWpcYtshLs+.lDsosAsD......IpWPp.Pp .......................................................................................................................a..tsht.hh.pt.h.hs..........................ssh.ls................s.u.ta..hhss...ph.h.t.hp..........................ttthh.ptA...Etp+..ppL..lptAsp..tIss..lQ.......stlc.L....s...hhT-EE.......p..spL.......pAWpcYtshLsp..VD.TS.s..APD......lpWPp.Pt............................................. 0 15 60 110 +1877 PF02415 Chlam_PMP DUF145; Chlamydia_PMP; Chlamydia polymorphic membrane protein (Chlamydia_PMP) repeat Bateman A, Yeats C anon Yeats C Repeat This family contains several Chlamydia polymorphic membrane proteins. Chlamydia pneumoniae is an obligate intracellular bacterium and a common human pathogen causing infection of the upper and lower respiratory tract. Common for the Pmps are the tetrapeptide GGA(I/V/L) motif repeated several times in the N-terminal part. The C-terminal half is characterised by conserved tryptophans and a carboxy-terminal phenylalanine. A signal peptide leader sequence is predicted in 20 C. pneumoniae Pmps, which indicates an outer membrane localisation. Pmp10 and Pmp11 contain a signal peptidase II cleavage site suggesting lipid modification. The C. pneumoniae pmp genes represent 17.5% of the chlamydia-specific coding capacity and they are all transcribed during chlamydial growth but the function of Pmps remains unknown [1]. This family shows some similarity to Pfam:PF05594 and hence is likely to also form a beta-helical structure (personal obs:C Yeats). 20.60 12.50 20.60 12.50 20.50 12.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.18 0.73 -7.79 0.73 -3.36 528 2166 2012-10-02 14:50:22 2003-04-07 12:59:11 12 61 306 0 261 1592 95 25.80 43 6.13 CHANGED slhFssNpu.....................................................tspGGAIhsp ........................................hhFssNsA........................................................................sspGGAIYs.................... 0 105 137 231 +1879 PF02457 DisA_N DUF147; DisA bacterial checkpoint controller nucleotide-binding Bateman A, Coggill P anon Pfam-B_1846 (release 5.4) Family The DisA protein is a bacterial checkpoint protein that dimerises into an octameric complex. The protein consists of three distinct domains. This domain is the first and is a globular, nucleotide-binding region; the next 146-289 residues constitute the DisA-linker family, Pfam:PF10635, that consists of an elongated bundle of three alpha helices (alpha-6, alpha-10, and alpha-11), one side of which carries an additional three helices (alpha7-9), which thus forms a spine like-linker between domains 1 and 3. The C-terminal residues, of domain 3, are represented by family HHH, Pfam:PF00633, the specific DNA-binding domain. The octameric complex thus has structurally linked nucleotide-binding and DNA-binding HhH domains and the nucleotide-binding domains are bound to a cyclic di-adenosine phosphate such that DisA is a specific di-adenylate cyclase. The di-adenylate cyclase activity is strongly suppressed by binding to branched DNA, but not to duplex or single-stranded DNA, suggesting a role for DisA as a monitor of the presence of stalled replication forks or recombination intermediates via DNA structure-modulated c-di-AMP synthesis [1]. 20.60 20.60 20.80 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.46 0.71 -4.98 190 2847 2009-01-15 18:05:59 2003-04-07 12:59:11 11 19 2386 11 670 1835 550 121.00 38 40.57 CHANGED lhpuhpthucp+hGALIVl...pcppslp..chhp.sGhh............lcuplosplLhslF...t..ssLHDGAlllps.scltuAushLPLops..ss...lspchGTRHRAAhGloEpo.DAlsllVSEEsGsISlshs..Gplh ...................................................hculphhucp+hGALIll..pc....sp...s.Lp....-hlp..oGh.......................lcu..closp.LLhslFh..s..oPLHDGAlIlps..s+It..s....Aush......LPLops..........st.......lo+-hGTRHRAAlG.l..SE.h...o....D.A.l.sllVS.EETGsISlshsGph.h.................. 0 281 502 603 +1880 PF02520 DUF148 Domain of unknown function DUF148 Bashton M, Bateman A anon Pfam-B_1103 (release 5.4) Family This domain has no known function nor do any of the proteins that possess it. In one member of this family Swiss:Q23614 the aligned region is repeated twice. 23.90 23.90 24.00 24.20 23.80 23.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.33 0.71 -4.29 45 259 2009-09-11 00:48:36 2003-04-07 12:59:11 12 5 28 0 213 215 1 107.00 20 48.61 CHANGED pcsppcahsIlpN.psLThsph-splppWApp.u..lsspappFppphpspppchcpstsplIspLo....slpspLssIhsscs.TtppppptIpsLppphsp-.hsslhaltp....htpttt ...................t.pspppahtlhps..tslohsphppplptaspp.s...............lpsthppappph.psttpphppshspllspLs....ss.spLpsIhs...sps.Thppppptlpplh.pphs.c.h..l..lht.h....t.......................... 0 69 84 213 +1881 PF02576 DUF150 Uncharacterised BCR, YhbC family COG0779 Mian N, Bateman A anon COG0779 Family \N 20.20 20.20 21.00 20.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.69 0.71 -4.60 40 4207 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 4149 1 904 2603 1054 139.30 32 84.80 CHANGED llpPllcshGhELhclchtpputthhLclhlD......p.-sG.lsl-DCpclS+tlSshLDs...pDPI..sp.tYhLEVSSPGl-RPLpptccFt+ahGchVclphp.shcsc+papGplhps-s......-slsl..phtsp....................plplshssls+ApL ...................................lpshlcshu.aELVclE..ahp...t....G....p....p.........hLRlaID..................................p..-..sG...lsl-DCsplScplSslLDs......................pDPI.......sp..tY.hLEVS.........SPGl.-RPLpptccat..chl..G..c.Vplp.L.....h..................s.........l..........p..........s...........c...+.......pa.pG..h..l..t..uh-s.................-plsl.........plcs..c...........................phplshspItKApl................................................................. 0 295 598 771 +1882 PF02577 DNase-RNase DUF151; Bifunctional nuclease Mian N, Bateman A, Eberhardt R anon COG1259 Family This family is a bifunctional nuclease, with both DNase and RNase activity [1]. It forms a wedge-shaped dimer, with each monomer being triangular in shape. A large groove at the thick end of the wedge contains a possible active site [2]. 20.50 20.50 22.70 22.40 19.60 20.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.61 0.71 -4.95 112 1079 2009-09-11 06:46:39 2003-04-07 12:59:11 9 6 977 4 436 880 532 135.00 31 73.52 CHANGED hp..ltGlshstss.s.....sslllLp-.p..sc.....+hLPIaIGthEApuIshsl...ps.hpssRPhTHDLhtsllpshshplccVhIsslc-ss.FaAplhl...p.psp.............t.h.................................plDARPSDAIALAlRss..sPIast-cVlp.puul.hp................t-c ....................................................h.plhslthp.sp.s...tssllLc-.s......uc.........RhLPI.aIGt.E.Apu.I.uhth.......ps....hp..s..s.RPLT.HDLht...s...l...lps......h......s......tp.l.pcVh..Iscl....p...-......us..FaAcLhh........p..pst..............................................cl.DAR..PSDAlALAlRss..sPIasp-plls.pu.ul.h..........pttt.................... 0 176 335 404 +1883 PF02578 Cu-oxidase_4 DUF152; Cu_oxidase_4; Multi-copper polyphenol oxidoreductase laccase Mian N, Bateman A anon COG1496 Family Laccases are multi-copper oxidoreductases able to oxidise a wide variety of phenolic and non-phenolic compounds and are widely distributed among both prokaryotes and eukaryotes. There are two main active catalytic sites with conserved histidines that are capable of binding four copper atoms [1]. 20.40 20.40 20.40 20.80 19.70 20.20 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.59 0.70 -5.29 132 3547 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 3384 10 808 2592 2090 224.90 32 89.31 CHANGED aoTR.........Gho...s.h.......tuhNluhp...s.sDs...ppVtpN....Rphlspt.....hshs............hhhhpQlHusplhhlpt..................................tssh.puDullTsp.ssl........slslhsADClPlLlh..D.psp..hluusHAGW+Gss....ssIstpsl.pth.pphss.......pspcl....huhlGPuIusssYEVu....p-Vh.ptF......tpthstttthhht............................s+hhhDLhthsptpLppsGlt....tpIth.......ssh........CThsp.s-.hFaSaRR.......psps.....GR.hhuhIhl ......................................oTR.....G..GlS........ssa............suhNluhp.....s...sDs...s..ttVtpN.......RppLhpt.........................hshs..........phlahp..QlHuscVhp.lst.............................................................stsh.puDAhhTsp..s.sl.........sls.lhoADClPVLhs....s.tpss.....h.lAA.sHAGWRGhh..............sGllcps.lp..h...t...p....ss...........................sspcl.............hAhlGPuIusp..sa..E..Vu....s-lh.ptF..................hs..t......s..p..s...t..s..h..h.h........................................s+ahhDLhths..ctp..Ltp.hGVp......pIhs.........ssh........................................CThs.......c.....tc......h.........FF..SYRR........ctpo........GR...hsuhIh.................................. 0 252 508 672 +1884 PF02579 Nitro_FeMo-Co DUF153; Dinitrogenase iron-molybdenum cofactor Moxon SJ anon COG1433 Family This family contains several NIF (B, Y and X) proteins which are iron-molybdenum cofactors (FeMo-co) in the dinitrogenase enzyme which catalyses the reduction of dinitrogen to ammonium. Dinitrogenase is a hetero-tetrameric (alpha(2)beta(2)) enzyme which contains the iron-molybdenum cofactor (FeMo-co) at its active site [1]. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -10.08 0.72 -3.91 179 2027 2009-01-15 18:05:59 2003-04-07 12:59:11 12 31 977 14 943 1863 98 94.20 19 46.31 CHANGED tsstls...tHFG...+uptFhlh...-ss....st.shc...llcsp.................ssstt......tsstphsp.hltp...pss..ssllsspl..GssAhttLppt.Glclhp..s....sssslc..-s.lpphhp .............................................tttlstHFG+uptFhla............-lp...................st...php.......hlppp..............................hs.sss......................ssp...sp...hsp.hlpt.........ps....s..sslls.us..l..G.tsshptLtpt..GIclht...s....stsslc..cslpth..t.................................... 0 381 689 831 +1885 PF02582 DUF155 Uncharacterised ACR, YagE family COG1723 Mian N, Bateman A anon COG1723 Family \N 21.10 21.10 21.30 22.40 21.00 20.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.83 0.71 -4.36 92 875 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 488 0 565 818 18 175.80 27 42.95 CHANGED plalF..p.aGslVhWshs................cp....ptpphL...phlp...................................phttp.hsppp....p.E-hpahhss.ph.....................................................................pspl...ts....................DhIh.Lpst...............................................................shhtchshSpuLupSs+LshhEpplsphl....-phpplsppLsp.sG+lsh.sc+......clhchhGclhth+hplslpspll.Dp.P-hhWc...cspL-tlYptlpchh-.......lspRlplLNc ..............................................................lFlF.p.aGslVhWshs................cp......pp.pphL...ptlp.................................................th..thp.l...sptphp..sEphpahhspph....................................................................................................................................pspl..hs..........D.hIh.L..p.st...............................................................shhhKlulSpuLAQSsKLuhaEptlsphl....pp..spslPppLu...p.sGc..l.sh..s+c............................plhpphGcLathRhplNL.puslL.Ds...P-haWc.......csp.LcslYptlppaL-lspRlplLN.............................. 0 179 331 483 +1886 PF02585 PIG-L DUF158; GlcNAc-PI de-N-acetylase Mian N, Bateman A anon COG2120 Family Members of this family are related to PIG-L an N-acetylglucosaminylphosphatidylinositol de-N-acetylase (EC:3.5.1.89) that catalyses the second step in GPI biosynthesis [1]. 22.00 22.00 22.00 22.40 21.90 21.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -11.03 0.71 -3.63 137 4157 2009-09-13 21:13:25 2003-04-07 12:59:11 12 36 2321 19 1451 3457 1554 136.10 25 45.05 CHANGED LllssHPDD-s.husGusltphtp.....p..Gtpltllsl.........os....G...............................phs........................phsth..Rpp......EttpA.s.phL.Gl........pphhhLs..........hsDsthpt........................................................hpphhpt.....ltpll...pph...p...P.........................cl.......lhsht....stss....................HsDH.psst.....t..hshp...uh ....................................LslsAHPD..D-s.hus.u.u.o.l....u....p.hsp..............p......G.h..pV..t..l.l.sh............Tp.........G...................................................................................................ph.u.p................................................phsth....Rcp.............EhppA.s..chL..Gl..........................pphthLs.....................h..D.sthtt...................................................................................................hpph.tpt............ltpll........cch........p...P......................................................sl.........l.h..o.ht..stss....................HsDH.htstpsshtA.h...................................................... 0 554 1008 1293 +1887 PF02586 DUF159 Uncharacterised ACR, COG2135 Mian N, Bateman A anon COG2135 Family \N 27.10 27.10 27.20 27.10 27.00 26.80 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.88 0.70 -4.80 164 2412 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 1845 9 843 2126 1620 208.00 26 88.83 CHANGED MCG..R...asht......ts...ppltphhth................................................ttphtspaNls....Psptssllht................................................................tpt..............................h.phhpWGl.....h....P...tasc...........................ththhNARsE...............o.l..tp..+s..sF+....psh...pppR.CllPusGa....YE......Wp......................tttsp.......K......pPah.....l...........php...s......t..ps...............................................................................................................................................hhhAGlap..tapss.tt............................................................htohsllTssu.......sst.....lstl.Hc..................R.MPllL..s.t-phcpWL.........pstssttpt....h......................lsttsspstpsss ...............................................................................................................MCGR.....as.t......ts......pphht.hh.t.................................................ssp.....sp.aNlu..Psp.s.llhpp..............................................................................ptth..................h..p.hpWGh.h......P.........sWhc..............................t..hhN..ARsE...........................Th....tp......p...hF+.....tsh....ppp....RCllPssGaaE...Wp..........................ttssp.......K..........pP.aa....l........................pht......-.............s...p..................................................................................................................................................................h.hhAGlap......phts...tst.......................................................................htshsllTssA....stt.......................lstl.Hc..................R....hPllL.......s...-..t.h.cpWL.........ssp..hstttt.....h....................................................................................................... 0 251 500 686 +1888 PF01519 DUF16 Protein of unknown function DUF16 Bateman A anon Pfam-B_764 (release 4.0) Family The function of this protein is unknown. It appears to only occur in Mycoplasma pneumoniae. The crystal structure revealed that this domain is composed of two separated homotrimeric coiled-coils [2]. 25.90 25.90 26.00 31.60 25.80 25.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.55 0.72 -3.80 18 91 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 5 3 30 66 0 93.90 42 69.63 CHANGED sFsGshsc..Kt.p......ocYVT+KQFs....EFK.D.........uscp+LpKlEscls...........tQGEQIpp.................................................QGEQIccLp.p.c....................................t.scsLplllpoLpphs.......cRLD+lEu ...........hsGshsc..+hp.......scYVTpKphs.......EaK.s..........uspQcLhKlEspls..................sQGEQIsplhphVpt..............QGEQI+pLphc.K....................................sQGcoL.p.IhpsLtthscRLDph-...................... 0 30 30 30 +1889 PF02589 DUF162 Uncharacterised ACR, YkgG family COG1556 Mian N, Bateman A anon COG1556 Domain \N 20.40 20.40 20.40 21.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.04 0.71 -4.78 209 4142 2012-10-04 00:26:15 2003-04-07 12:59:11 10 12 1994 1 981 2994 341 198.50 23 58.26 CHANGED lpp.htcplppts.splhhsps...........hp-shphltclltpt...sh.................................sh.shphsl.tchhl.............hshlctsptphhchhstph......................tpthtcphhpuDhu..........lousshulA-oGslslhsspust.chhshhPpthlsls.....sh.s+lV.sshpcAhptlph.....ts.t.........th.sshsshh.................sGP.ucsu-lphhh.hshp...GPp.clpllll ......................................................................................................h..phtpphpp.s.spVhhsps.........................tp-.ssph.l.h.p.lh.pcp...ss..................................................sh.sh-scL....sEhhl......................hssl.c...p.s...p..t.p.l.tchhppph...........................................cp.phc.cp.hh.pA..-.l.G.........lousshulAEoGolsl.ssspGs.......u..RhhshlPcsplslh.....sh.p+ll.sshp-shthlph......suhu...........p.phsshhshl..................oGP..pp.s.u.Dl-..................G.Pp.chpllll.................................................... 0 354 721 864 +1890 PF02590 SPOUT_MTase DUF163; Predicted SPOUT methyltransferase Mian N, Bateman A anon COG1576 Family This family of proteins are predicted to be SPOUT methyltransferases [1]. 20.90 20.90 20.90 21.20 20.60 20.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.79 0.71 -4.50 30 3767 2012-10-01 22:53:19 2003-04-07 12:59:11 12 4 3656 18 737 2319 968 152.30 38 97.90 CHANGED M+IpllsVG+hppcalcpuhsEYtKRlspasp......lcllEltsp+t....pspshpphhccEupplhtpl..tssshllsL-hcGKthoSEphAchlcchthpGppclsFlIGGutGLssslhpcAshphShSchThPHpLhRllLsEQlYRAasIhpscPYH .................................................M+lpllsVG+h.t..tcalppuhsEYhK.Rhsch.hp......h-llElsstKt......spstchp.p..lh.c+E..G..pp.lLutl..............ss..p..s.h.l.l......sL-lp.GK.thsS.phAp.pLppht.h........p.G.p...clsFlIGGu...G.Lu.s...slpp...p...A.s.........p...lShSphThPH.p........LhRllLsEQlYRAasIhpscPYH............. 0 246 481 623 +1891 PF02591 DUF164 Putative zinc ribbon domain Mian N, Bateman A, Eberhardt R anon COG1579 Domain Structural modelling suggests this domain may bind nucleic acids [1]. 23.70 23.70 23.80 25.90 23.40 23.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.02 0.72 -4.14 93 1089 2009-09-12 00:25:18 2003-04-07 12:59:11 10 5 1080 0 327 812 391 56.40 33 22.61 CHANGED llptY-+lRpp..pGhulstlpps....sCtGCphplssphhscl.pps..cc..llhCspCsRIL .......llphY-+lRcp..pu..huls.lpp.p.......sCsGChhplsspphtcl.ppu......cc...llpC.pCGRIL........ 0 138 262 312 +1892 PF02592 DUF165 Uncharacterized ACR, YhhQ family COG1738 Mian N, Bateman A anon COG1738 Family \N 22.00 22.00 22.70 22.40 21.90 21.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.71 0.71 -4.02 151 2443 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 2235 0 513 1577 1908 145.40 33 64.21 CHANGED shGslhaPhsFlhoDllsEhYGtptA+cslhhGahsslhh.hlhh...hhhh......hhs.ss.................................tshttlhs.....ssRlslAShh.AalluQhlDlhlashl.....+phtt......tp............tlWh..RshsSThluphlDThlFhs.lA..Fh......u............h...........hs.............hsthhslhhssalhK .......................................................................ThGshsFPhhFLsTDlhsclaGtphAR+llahshhssllh..hll...lhh......hhs..h....h.............................tultth.hh.......lsRIAlAShh.AYllGQhlDlhVFs+l+pt..........+............paWl.tshuSTlhGshlDTllFhs.IA...Fh..t......................s............h..........................sppahplslssYhhK.................................................... 0 152 307 420 +1893 PF02593 dTMP_synthase DUF166; Thymidylate synthase Mian N, Bateman A, Eberhardt R anon COG1810 Family This family catalyses the synthesis of thymidine monophosphate (dTMP) from deoxyuridine monophosphate (dUMP). The physiological co-substrate has not yet been identified [1]. 24.40 24.40 25.20 24.40 22.00 21.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.46 0.70 -5.00 24 85 2012-10-10 17:06:42 2003-04-07 12:59:11 9 3 45 0 68 91 5 224.40 30 91.77 CHANGED sllhp..GpaGcRhhpslhscsphp...........................lhlh-hs-..pls-hI-pscphL.p......l.-sDlllshsLHPDlshtLschhtp.sshtulIlsutssc....h...pl+cph-phshphhsPc..hCsLc...........sspshlccFsch..FGpPclclplp...sscl..pcVcVlRuAPCGuTaalAccltGhsls-hthps........uhthpp...YPCtAuhth..p.p-shlHpAGhltpcAlpcAlth ......................................llhc..GpaGcRhhpsltppsshs.........................lhhhchsc....tls-hI-p.P.c-h.Lsc......l.cuDlllshslHPDlshtLschh.....pc.ssscul.Ilsutpsc........pl+cphcphshphhsPc..hCsLcc...........stpshlccFlch..FG+Pcl-lplc.....ss..pl.....pcVcVlRuuPCGoThalA+clhGh.....plp-ht.ps........uhthpp...YPChAuhth..p.s..-shhHpAGhlt+cAlccAltp........ 0 16 41 54 +1894 PF02594 DUF167 Uncharacterised ACR, YggU family COG1872 Mian N, Bateman A anon COG1872 Domain \N 22.60 22.60 22.60 25.40 22.50 22.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.22 0.72 -4.05 29 1679 2009-12-08 16:16:35 2003-04-07 12:59:11 11 3 1585 3 582 1119 107 76.30 39 73.13 CHANGED pppslhlplcVpPpupcsplsulpsp.....pLclplpuPPhcGKANpcLlchLu+hhpls+SslplhpGppSRpKhlhlps ................s.sslhLplhlpP+...Au..+..ss..I.s.Glcs-.........plKVslsAPPlDGpANscLl+aLuK.thcVsKSp.Vslt+GchuRc.KplcI..s............ 0 177 345 468 +1895 PF02596 DUF169 Uncharacterised ArCR, COG2043 Mian N, Bateman A anon COG2043 Family \N 20.20 20.20 20.30 21.30 18.20 17.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.27 0.70 -4.95 76 379 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 254 0 211 370 56 210.60 21 82.60 CHANGED sppLtchLcLctpPVAV+hh.t...............................spc.hst....shp..ttthp..............aCphlth.AR.pG.........cshhh.ss-phsCssGusshGhtch.st...........lpsG........................phhhp.hthhps.csuc+hhcp.hPpl...ttphhullhuPLpc.ssh.....p.P..DllllhssPtQhhplspuhhYtpGs.phpsshsuhpusCu-ssshsh....hpspss.hslGCsGsRthut.hp....ccElshulPhptlpclhcsL .......................................................s...l.phlpLptpPlulphh.........................ppp..st.....th.......tt.hp...............hC..ph..hth..Ap....p.G.........pshhh..st-s...h...s..C.huGthshGhtph.sp.............................hlpsG...................................thhhp....thhpo.-tucchhpp..lshl...tsp.......h..tslhh.u..P.Lpc..hph.....-.P....Dl..llhhssPtphhtLsps...hhappss..hpsshssht.usCusssshsh.....ppsp.s..hslGshuspthst..hp....s--hshulPhpphpchhpt......... 0 81 158 191 +1896 PF02598 Methyltrn_RNA_3 DUF171; Putative RNA methyltransferase Mian N, Bateman A, Eberhardt R anon COG2106 Family This family has a TIM barrel-like fold with a deep C-terminal trefoil knot. The arrangement of its hydrophilic and hydrophobic surfaces are opposite to that of the classic TIM barrel proteins. It is likely to bind RNA [1], and may function as a methyltransferase [2,3]. 18.60 18.60 24.50 20.90 17.90 17.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.85 0.70 -5.06 23 418 2012-10-01 22:53:19 2003-04-07 12:59:11 12 7 347 2 286 396 27 273.20 32 79.92 CHANGED slSlslPsSlls..sspshc.tTahsupIARAsslFsVsEIlla--.....................................................tp..tstpptps.......shhlsplLpYhtTP.YLRKplF...shpspL+aAGlLPPLssspHh..ppscptcaREGlslchspps................................phlslGhsc.ltlcp....tls.ssRVTVchps................tplVsPscsps.....thYWGYpVR.hspshuclFppssh.tGaDhslhsScpupslspsphphh...........................shtplLlVFGth...........ptttps.ppsc......tpspthFDt.l.NssPsQsspslRsEEAlhlsLuhLp ..............................plSlslPuSlls.......ss..ps.....c.+ThhsGpIARAsslFpV-Ellla--....................................................................................t..t....tt..p..tps..............sh.hlsplLpYh-sP.YLRK.......tlF...........Ph.+p.sL+huGlLsP...Lc.sPaHh.......pps....c.......scaREGlslpt.ssptt.t..................................shVshGh.p..c....lplsp...................tl..ss.hRVTVchsp.......................................spl..Vssppscp...thshYWGY......pVR.hs..s..sL.splhsc.ssh......t..G...YDhsIuTS.c+Gp.sl.s.ps..t.ht................................php+hLlVFGs...................................pshtts.pt.p...............st.psp.hFD..hhl....Nss.P.sQ..GocolRTEEAlhlsLshL........................................ 1 82 155 230 +1897 PF02604 PhdYeFM_antitox DUF172; PhdYeFM; Antitoxin Phd_YefM, type II toxin-antitoxin system Mian N, Bateman A, Eberhardt R anon COG2161 and [1] Domain Members of this family act as antitoxins in type II toxin-antitoxin systems [1]. When bound to their toxin partners, they can bind DNA via the N-terminus and repress the expression of operons containing genes encoding the toxin and the antitoxin [2]. This domain complexes with Txe toxins containing Pfam:PF06769, Fic/DOC toxins containing Pfam:PF02661 and YafO toxins containing Pfam:PF13957. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.24 0.72 -4.39 156 5612 2012-10-03 00:18:00 2003-04-07 12:59:11 14 16 2448 47 1340 3995 616 70.50 18 79.79 CHANGED hptlshs-h.+sphupllcpsppspp..llITccGcs.ssVllsh.....ccacphpc...phthhpt.ttspphtp........tthp..ts .....................hhsho-h..+pphspllcpspps.p.....p....l.hI..o..p..p..s..c....s...s...sll.lsh.....................cpapp.h.p.......t..h.t....tt.............t....................................................... 0 421 912 1136 +1898 PF02616 ScpA_ScpB DUF173; ScpA/B protein Mian N, Bateman A anon COG1354 Family ScpA and ScpB participate in chromosomal partition during cell division. It may act via the formation of a condensin-like complex containing smc that pull DNA away from mid-cell into both cell halves. These proteins are part of the Kleisin superfamily. 23.70 23.70 23.70 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.71 0.70 -4.79 11 3012 2012-10-01 19:44:35 2003-04-07 12:59:11 9 6 2925 0 778 2298 2016 209.20 27 81.05 CHANGED Llpctclssa-VslsclT-pYlphlcph.ppL-Lch.......sschllhAuhLlchKucsLLs....tpp.-p..-.t....phc-thhtcL.....h+tttc.hcchchpth...tphs+c.sshcchlctlcc...th.cl-c......shht.htth.hh.c.thtsthcchp.........p.hclslE-phpELlt.............p.h.tl...s.hhsa.pLh............hshhclVssFlALLhLhpsptVplpQc-.au-lhlphh ..................................................................................................................................LIp+pclDIhDIslsplT-QYl.....s.....Y.......l......c......p......h.....p.........p..........h.........cL..-l...............................AuEYLVMAApLltIKS+hLLP...........p..t.p....t...p...p.......t....-.......t............................-s.....R.p.-.....L..l...p......c.L.h..-.Yc..p...a....K.th..u.....p.t.L.pp.h..t....ttt..........has.+.t......................p..p..h..h..p..t....t.............t..l.p...........p..h..c...L.h..h..A..h..tp.l...hp.+.t......p.h.pph.......................tl...s...l.cp.p.h.p....p..lh..t.....................................h........t.....t....p......hhpFp.p.L.h.p.p................................tps.t.t.t..l.l..sp..FLAlLELhKpthlpltQ.p.c..sassIhl...t....................................................................................... 0 264 526 667 +1899 PF02617 ClpS DUF174; ATP-dependent Clp protease adaptor protein ClpS Mian N, Bateman A, Moxon SJ anon COG2127 Family In the bacterial cytosol, ATP-dependent protein degradation is performed by several different chaperone-protease pairs, including ClpAP. ClpS directly influences the ClpAP machine by binding to the N-terminal domain of the chaperone ClpA. The degradation of ClpAP substrates, both SsrA-tagged proteins and ClpA itself, is specifically inhibited by ClpS. ClpS modifies ClpA substrate specificity, potentially redirecting degradation by ClpAP toward aggregated proteins [1]. 20.00 20.00 20.10 20.10 19.90 19.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.71 0.72 -4.26 194 2895 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 2572 47 893 1937 1064 79.80 36 28.96 CHANGED hcps..shY+...VlLhNDDaosM-FVlplLpphF.phsp-pApplMLpVHpcGp.uls.ulhsh-lAEs+spplpphu......pttth.........PLpssh ...........h.p.PshY+VlLhNDDaTsM-FVlpVLpchF...s....hs.........h-cAsplMLpVHppG+..uls.G.l.a.o.t.E.lA.EoKstp.Vpphu........+ttta.........PLhssh....................... 1 268 550 744 +1900 PF02618 YceG DUF175; ADC_lyase; YceG-like family Mian N, Bateman A, Moxon SJ anon COG1559 Family This family of proteins is found in bacteria. Proteins in this family are typically between 332 and 389 amino acids in length. This family was previously incorrectly annotated and names as aminodeoxychorismate lyase. The structure of Swiss:P28306 was solved by X-ray crystallography. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.01 0.70 -5.39 195 4001 2009-09-21 13:22:03 2003-04-07 12:59:11 11 7 3759 2 892 2962 1446 281.50 30 77.07 CHANGED htlplspGsohppluppLpcpullpss........hh....F.phhs+hp..stssp.l+uGpYplpss.hostpllptL..spG........................................................csht......h.plTlsEGhshpp............lhptlsp.........pshh..tp.t......t.tp........htphst.................EGhLaP-TYpa.stss.......ospp.llpphhpphp...phl..tpthppcs...................ttl...sh.osh-hlhlASIlEKEsu.hssERshlAuVFhNRLc.p.......GM.....pLQoDPTVlYu..l...tt.......tt............plppp...DL.c.ts....oPYNTYthpGLPPsPIusPGpsulpAAlpP.spsc.....aLYFVA...c.sc..........Gs..HhFopThpEHppsV...pcY .............................................h..lpl.tGsuhpplupp.Lpcpsl.lpss....th.....F...phh.h...+.h......p.......s......h...sp....l+uGp.Yp.l....pss.hosp.cllphL.....pp.G........................................................c.psp...........h..plslsEGh.sh.pp..........................hhptl.tp..........................tshl.......ppp..............hpp....th.h...t.t...h..tt.ph.s.ph..............................EGhhaPsTYp.h..st.....ss............o..s.p..llcphhpphp.....ptl........pph.h.t.tp.t.............t.sh.....sh.s..phlhlASllEKEs....u....h..s.p.-.RshlAuVFhNRL.p.t.......sM...........hLQoDsTVlYu..h........sc.........p............t...plppp.............Dl...c..hsoPYNT.Yh.h....sGLPPsPIssPuhsulcAuhp.....P....spo.....s.......aLY..FVA...c..ss........Gs..phFup.shp-HppsVpc................................................................................................................................................ 0 308 608 762 +1902 PF02620 DUF177 Uncharacterized ACR, COG1399 Mian N, Bateman A anon COG1399 Family \N 21.30 21.30 21.50 22.20 21.00 20.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.80 0.71 -4.06 181 3846 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 3794 0 853 2473 744 115.20 24 64.04 CHANGED lpuplpu..slphsCsRCLpshphslphp.hphh.h..h...sppt..tt...............................--..thhh.............sstp............lDLtphlp-pllLslPhtshp.....p..sCps..hsststs..tt.........................tt.........s....P.....a.usLpsL+ ....................shplpsslsl.CsRCLcP..hp.ht.lphs.hs.h..h...ht...sppp....tpt...............................................-s.psl.l............cpsp.............lDLtshlcDpllLslPhtslpp........-.....cCps.....h.s.s.s..ss..p...hsct...................................p...pp.ps.................s......P.....a.AsLtsL........................................ 0 270 558 726 +1903 PF02621 VitK2_biosynth DUF178; Menaquinone biosynthesis Mian N, Bateman A, Eberhardt R anon COG1427 Domain This family includes two enzymes which are involved in menaquinone biosynthesis. One which catalyses the conversion of cyclic de-hypoxanthine futalosine to 1,4-dihydroxy-6-naphthoate, and one which may be involved in the conversion of chorismate to futalosine [1]. These enzymes comprise two domains with alpha/beta structures, a large domain and a small domain. A pocket between the two domains may form the active site, a conserved histidine located within this pocket could be the catalytic base [2]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.28 0.70 -5.18 131 1044 2012-10-03 15:33:52 2003-04-07 12:59:11 9 3 556 15 368 1003 282 248.00 24 91.99 CHANGED lplGtsshsNsh.haauL.........................ths.hchhhssspsLNphhh.puclDl..uhlSshtasphtccYtlLs..usuuhGpshuslllu..................p....t..tt.....plAlsupssTushLh+lhh.pt........phh.hs....cl..thhpst......DA.........ullIt-stl..................satppth....hhlhDLGchWpchT.G..LPhshuhhsh++s.hs.....phhtplpc.slppShphuh..pphpphhph...hhcpsthh.......pp.hlphYls.phohslupctppulcphhphutc .......................................................hplup.shhNsh.hahul..............hpht.......ths.hc.h...h.h.ss...s...pp.LNchhh.....p..s...c..l.Dluhl....S....h....h....t..h..sp..h....h....c....c...Y.t.l.L.......ss.s..u....hG..c..sh..usl.lls..................pp....h..t...hp...........................plAls.u.....p.ssT.ushL.h+..lhhtct...................phh.hs......pl....h.hpst......DA............................ull...It...-..ptL.....................pa.t.p.p..h........phhhDLuphWp...-..h.T...u.....LP.......h.s..hu..shsh++s..h......................shhtplp.ctlppuhph.u....pp.p...hlhph....hh.cppt.hp...........pphlph.Yh..s...phsh...sl..u...p..pphtAlcphhchsh.t................................................... 0 156 292 345 +1904 PF02622 DUF179 Uncharacterized ACR, COG1678 Mian N, Bateman A anon COG1678 Family \N 20.60 20.60 20.60 20.70 19.80 20.50 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.22 0.71 -4.61 181 2362 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 2224 7 628 1535 1045 160.00 35 76.37 CHANGED hP.s..hsDspFp+oVlalC....cHsp.c.GAhGlllN+..Ph..s....lsls-llppls.........h................................ttph..............sVahG....GPV.ptc+....GF...lLHss..t............t....ts.............ol..pls...s.....slhlTsohDlLps...lupu.tu.....PpchllsLGYAGWusGQLEpEl.tpNuWLsssAssc..........llFss.s.pp...+WptAhpplG ......................................................................................................................................hPshpDshFpRSVlYlC....-Hsp..p.G.A....h.GlllN+..Ph..s......l.sl..psl..Lpplph.s..........................................thcp.....................................sVhhG......GP..l..pp-R.......GF...lLHss.tt......................h......su...................................................ol...pls.......s..sl.s.h...T...s..o.tD.lLcs....luss....tt............PpchllsLGYAuWst..GQLEpEl.tc....NuWLsss.A.c...s..........llFs....s....shtc.......+WppAhphlG..................................... 0 200 399 525 +1905 PF02623 FliW DUF180; FliW protein Lima T, Mian N, Bateman A anon COG1699 Family The protein BSU35380 from Bacillus subtilis (renamed FliW) was characterised as being a flagellar assembly factor. Experimental characterisation was also carried out in Treponema pallidum (TP0658). In Campylobacter jejuni, Cj1075 has been shown to be involved in motility and flagellin biosynthesis. The two paralogues in Helicobacter pylori (HP1154 and HP1377) were found to be able to bind to flagellin. FliW proteins are involved in flagellar assembly [4]. FliW is part of a three-part feedback loop: in Bacillus subtilis FliW inhibits CsrA (an RNA-binding protein) which inhibits FliC translation; hence FliW is required for FliC (flagellin) production [5]. 25.00 25.00 31.20 29.20 24.60 23.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.40 0.71 -4.55 83 725 2009-09-10 21:46:53 2003-04-07 12:59:11 10 3 652 2 212 534 71 119.40 31 84.43 CHANGED lpF.pGl.GF-.ph+cFhll.....ppsssFhhLQSl-ssclu..FlllsPathh.-YchclscpphptLplpstp......-lhlhsllol.....tss...hcchTsNLtAPlllNhcsphutQllL.psscYsh+a.lh .......thppsIhGFE.ch+cahlh.........ptcpsFhhLpSl-....st...s.lu..FlllsPahhh...-Y..ch...cl...ssthhphLpl.ps..tp................c.lhlhsIlsl........sps........hccsTl..NlhAPlllNhcsphutQllL.ssspYshpa...h................................................... 0 108 182 198 +1906 PF02624 YcaO DUF181; YcaO-like family Mian N, Bateman A anon COG1944 Family \N 25.00 25.00 25.40 25.00 24.10 24.50 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.89 0.70 -4.95 127 1709 2009-01-15 18:05:59 2003-04-07 12:59:11 11 25 1462 0 432 1316 83 324.50 28 58.87 CHANGED ssGKGtottp.ApsSAlhEulERauuth.hst.................hthtpshtph......ttts..lsspshhhhsppph.....................hs.st.lp........Wh................shsltss.....c.plhl..Ptsh...httsh..................hhh..sS...NGhAuGsohcEAllpulhEllERDuhslh.....hhsp...hshspl..........shss....thstlhpthpp...tG..hclhlhDho.......-h.slPshsuhh....................hpssssthhhGhGu+hsschAltRAloE..hsQsthhhh.ttstpp.....................................p.tphtphhshsphttt..hhh...t........sphphsshs.............s.tsslptl.lstlpptGh-.lhsl.Dho......h.clG.lsVV+lllPGhp. ...............................................................................................sGKGsopcs..Ahs.SALuEhhERhusshhhs-...........................hhhtpslsst...............................shs.pass-pah..t...................................thac.P.-scls.................hs................................shhc....s...cs......p..slal..Phsl....lsshas..........................S....NGhuA....G.NohpEAhlpGL.E.lhERtshsth..................................hhpp......lslPcls.........s.shs.chss.l.hctlpph.p.t.pG..aslhshDso......h..ss..th.P.Vl.ssll....................hsssssssh...suhGAH.sDhtlALcRslTE...l...h....QuRshps...h.sshssssh....................................stcchsc..ht.shpp..p..hhcs.stlh.....t........httt.....schsas-hs............hss.ss.p.c-hssL.hsh.hppt.sh-..lhls.Dhp.........cl...G...lhssRllVPGhp........................................................................................ 0 120 256 346 +1907 PF02636 Methyltransf_28 DUF185; Putative S-adenosyl-L-methionine-dependent methyltransferase Mian N, Bateman A, Moxon SJ, Eberhardt R anon COG1565 Family This family is a putative S-adenosyl-L-methionine (SAM)-dependent methyltransferase [1,2]. In eukaryotes it plays a role in mitochondrial complex I activity [2]. 23.20 23.20 23.50 24.00 23.00 22.70 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.82 0.70 -4.98 157 1692 2012-10-10 17:06:42 2003-04-07 12:59:11 12 15 1433 3 694 1556 1906 243.10 24 62.15 CHANGED hluhhhhphapph....s...ts.......hpllEhGsGpGpLhsDlLpshp........ph.P.ph.....htt..hphhllEhS.............stL+phQpppL.................tt...................................hsh.....W..hc.pl..t..ph.s...................hsslllANEhhDAlPl+phh.................................hpsss..a.pEthVshs.......................................ss................th...ths..h.ts....................htshltphthth...............................tGhhsElsssstshhppl...upplsp....................................................GssLhlDYGh.stpp...........hssTL..puh................ppHpht.........s.sh...tpP.GptDlTAHVDFssL....tpsu.pt.G...hpshuhs .............................................luhhhhp.hpth......t........t............hplhElGsGpGpLhtDlLptlp.........ph.s..th.....htt...hphhllEhS..........................spL.pphQpp.p..L..........ps........................................................htt.l.t.W....hp..pl..ph......................hsshlluNElhDAhPlchht......................................................................psptt.......h.hEhhVshs............................................................................ts............ph.ths.....h.tsh.t...........................tth.l..pht.th..............................................tsth....h..Els..tttthhppluptltp.....................................................GhhlhlDYGh..tst.............................psoL........puh..............................hp.Hchh......................s.sh.tts....G...ptDlTucVcFstltphs.pt.s..ht.....h............................................................................................................ 0 221 439 583 +1908 PF02638 DUF187 Glycosyl hydrolase like GH101 Mian N, Bateman A anon COG1649 Family \N 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.14 0.70 -5.57 4 1510 2012-10-03 05:44:19 2003-04-07 12:59:11 10 34 1000 0 288 1415 225 300.00 32 62.31 CHANGED clRGVWlssVsslca..............spsQppphIshLcclp...hNTlaspVhssGpsLYPSthu..Wpshhssp.ttD.GhDsLu.hI-cAH+RshcVhsWF...chuhpss.....shschlcpHPthhh.p+pDhsts.....GsphahsPhhPEVQsaITpllh-lVp+YDlDGVQhDDaFh.spphG.-..s.uhYcQ.st.sshus.cD..................WRpsplsphltplstpIKAsKPslphulSPsssa.N..............huYsshhtDhpcWlcpGllD.lssQlYhs.hut.suthphhuhh.ocplhPssVtlhhGlsshpls.s..p.-ssWss ..............................................................hRuhWlsoV.....p..h..-.aP.................hpt.p....pp...t....h.l....c...h.L...c...c...lpt.....thNsVhhQV.+.P.s.usAl.asS.p.......h.........h......P.......W.............S...........p..h....h....T.......G..........p........G...p.....s..P......G......aD.P..LtFhl-EAHK.R....G.....hc.lH...A...Wh...............hch...s....h..s.sp............p.h.s..p....h..h...s...p...p..Ps......p....h.h......p.....+.....-..h.......l.....h.s..........h...................................................u..s.......p...h...h.ls...P....GlPE.V.pcalsslltEl..Vp+.Y....s....lDGl..p..hD........D......Y...F...Y..............h......p............s.....s......G.........t......p.....h...s.....D......p.....p........s...a.p.p...Y..s.......t.s..h...s.s..h..s.D....................................................W...R...RsNs.pp...l...lpclppsl.K....s...h...K....P....t.........VcFG..l.S..P.h.Gla..s.tpc...............................sptGhtsY-.p..Y..AD..sppWl....p.p....Gh...lDYlsPQlY.W....s.........h.sh.....s........u......t........ashlhpW...W..s.p..s.....t..s....p.....h......tLahG.sh.hph.................................................................................................... 0 98 206 262 +1909 PF01579 DUF19 Domain of unknown function (DUF19) Vogel B, Bashton M, Bateman A anon Pfam-B_402 (release 4.1) Domain This presumed domain has no known function. It is found in one or two copies in several Caenorhabditis elegans proteins. It is roughly 130 amino acids long. The domain contains 12 conserved cysteines which suggests that the domain is an extracellular domain and that these cysteines form six intradomain disulphide bridges. The GO annotation for this protein indicates that it has a function in nematode larval development and has a positive regulation of growth rate. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.95 0.71 -12.01 0.71 -4.25 47 383 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 8 0 378 354 0 150.60 15 67.46 CHANGED sCsststh.........pshpCh....h..htphtpphpthshp....ptsphpphppsCsshhpChpshpC.......tt....hp.h.thCphh.ah.stpa.pChsKlts.......ptspChpsat...........ttptpphtpcsCpthhupc.sCh...........cctl.ppsCGp...pphptatsphh...........thsshhhp.pCshp ..............................................................................pCh.....h..htph.t...tht.t......hs..t.........pthpphpphCp.....p..h.h.pChp.sh..p..C....................t..p.hp.htt...h..C.p.hh.ah.sp.p.a..t.pChp....Klts.........p...pp..sCh..psa.....................................pptt...pppptC.p.thhspp.sCh...........cptl.pphCup...pthptatpph.................................h................................................... 0 97 120 378 +1911 PF02643 DUF192 Uncharacterized ACR, COG1430 Mian N, Bateman A anon COG1430 Family Two structures have been solved for members of this large (>500 members) family of bacterial proteins present mostly in environmental bacteria and metagenomes (distant homologues are also present in several Plasmodium species). TOPSAN analysis for pdb:3pjy shows that there is much similarity with the other solved structure, pdb:3m7a, solved for UniProt:Q2GA55 (Saro_0823), a homologue of Thermotoga maritima TM1668, UniProt:Q9X1Z6., The homologue in Caulobacter crescentus (CC1388), UniProt:Q9A8G6, is associated with CspD, a cold shock protein (CC1387), UniProt:Q9A8G7. However, the genomic context of UniProt:Q2GA55 is most conserved with a putative xylose isomerase, suggesting a possible role in extracellular sugar processing. Saro_0821, UniProt:Q2GA57, is annotated as an AMP-dependent synthetase and ligase. PDB:3m7a structure corresponds to the C-terminal (27-165) fragment of the YP_496102 (Saro_0823) protein and it is structurally unique, as the best hits from Dali have a Z-score of 3.8 (1nt0, 2j1t, 3kq4) and it is thus a likely candidate for a new fold. Interestingly, many of the top Dali hits are involved in sugar metabolism. There are no obvious active site-like cavities on the protein surface of 3m7a (http://www.topsan.org/Proteins/JCSG/). 20.30 20.30 20.70 20.50 19.30 18.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.23 0.72 -4.36 180 1078 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 966 4 459 1048 876 105.70 30 67.60 CHANGED shplElAcTstpRtpGLMhRpsl..........ss...sp.GMLFsa..s.......psph.tsFWM+NThlPLDllFlspsGplhslppts......Pts..tss...h.ssh...........s.sph....VLElsuGhhpphulpsGcplph.. ..................h.hplElAso.tpRtpGLM..aRpsl..............st...sp.GMLFla......s...........psph..tsaWM+NT.lPLDllFlcscGp.....lhslpc.t..........Phs.....ps.......h.sst.............................ts...spa......sLElsuGhhtchGlpsGsclp...h............................ 1 129 301 397 +1912 PF02645 DegV DUF194; Uncharacterised protein, DegV family COG1307 Mian N, Bateman A anon COG1307 Family The structure of this protein revealed a bound fatty-acid molecule in a pocket between the two protein domains. The structure indicates that this family has the molecular function of fatty-acid binding and may play a role in the cellular functions of fatty acid transport or metabolism [1]. 20.30 20.30 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.61 0.70 -5.33 45 5275 2012-10-02 12:41:15 2003-04-07 12:59:11 11 9 1863 22 831 3515 181 269.70 26 95.33 CHANGED clsIlTDSousL.s..t-hhcchslpllPLslhhss.csYpDsh..-l.s.cphhcchtpptphPp.TSQPssschtchacc.hhppua-tllsltlSStLSGTapsAthusphh.....shc..........lpllDSphsuhu.GhhVhcAuchhcpGt.shc-Ilpplpphppcp..psahsVcsLchLh+GG...........RlupstuhlGsLLslKPllphcc..GplpshsKsRupK...KAlcclhc.....l..ttptsstthhclslhaus..st.-puppltcplps..phst..ch.hsthuslIusHsGsGululshhhc .....................................................ltllTDSos.s..l.s....t..c.h.h..c.........c....h....s....l..p....l....l....PLs.lh.....ls....s.....p...s...Y...h...Dsh..................sl..s.c...ca...h.p..p..h.....p..s...p..p.h.P.p..T.....SQPshupahchacp...htp..s.h..s....p..lls.lpl..SusLSGoaps...Ap......u.s.p.h...............shp...................lpll.DS.phs.uhuhuhhlh.pA.s...c..h...h.pp..Gt....shc..-.l..l.ppl.p.p..h.pp..p.s..phah..hlss.LcpL.....h+GG...........Rlop......ssuhl...G..s...L...LsIKP..l..l.p..h..p..c...GpltshpKs....Ru.....p..K.....+..u..h..pplhc.................................hh..........p.pht.......s...t......t.....h..p...l..h......l..s.aus.....s....-tA...p....p....l....pp..p.lpp...........th.st.........pl....l..t.....h.usllusHsG.Gslulhhh..t..................................................................................................................................... 0 343 598 735 +1913 PF02646 RmuC DUF195; RmuC family Mian N, Bateman A, Moxon SJ anon COG1322 Family This family contains several bacterial RmuC DNA recombination proteins. The function of the RMUC protein is unknown but it is suspected that it is either a structural protein that protects DNA against nuclease action, or is itself involved in DNA cleavage at the regions of DNA secondary structures [1] 23.50 23.50 23.50 23.80 23.40 23.40 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -11.80 0.70 -5.58 23 2678 2012-10-11 20:44:43 2003-04-07 12:59:11 11 9 2619 0 607 1935 2922 298.80 31 66.14 CHANGED pppL-thhpslcEpLp.shppplcsoacphucchtpLpcpLt.l.thp...pplup-sssLppsLp..ssKopGsWGElpLEplLEssGL.cshpYppQssl.......tsuphRPDhhl+LPssp........hLlIDuKhsLpsYpchhsAp-sspp...ptshpphhpul+tHl+slupK.Y..........lhsscT.DaslMFlPsEuhascslcpsstlhphutcppVllsoPoTLhshLpolshha+spphpcpApcItchsucLhcchsphsschpcltppLspuspshsphhsphsptptphhpphpthpttusctptphsst ..................................................................ppLpphhpslc...Ep...L...c....sh....ccp....l....p.poap....p....pu....c....p....h.psL.......pc.pl............tt............lt....p....hp..............p.......plu....p-sssLscsLp..ssKspG.sWGEl.LpplLEsu.G.hh.cshc..Yp.pQssh..................st...........sup...hpPDhll+LP.ss+........................pllIDuKhsLsuY...p+hhs......A.....p.....-.s.t.p+....................cpshppahtul+pH....l....+sLupK.tY.t........l..s..c.o.hDaslhFlPsEshattAlcps.....s..pLh..p..uhc.pp..IhlsoPoTLhshLpolssha+ptp.p...csApcIucpsupLhcchstFs-ph.s....c.luppLspAsps...acpshsphsps..t...t...s...h..lp..phEthct.usp.......t................................... 0 190 388 502 +1915 PF02649 GCHY-1 DUF198; Type I GTP cyclohydrolase folE2 Mian N, Bateman A anon COG1469 Family This is a family of prokaryotic proteins with type I GTP cyclohydrolase activity. GTP cyclohydrolase I is the first enzyme of the de novo tetrahydrofolate biosynthetic pathway present in bacteria, fungi, and plants, and encoded in Escherichia coli by the folE gene; it is also the first enzyme of the biopterin (BH4) pathway in Homo sapiens[1]. The invariate, highly conserved glutamate residue at position 216 in Swiss:Q5F9K6 is likely to be the substrate ligand and the metal ligand is likely to be the cysteine at position 147. The enzyme is Zinc 2+ dependent [2]. 25.00 25.00 25.80 25.80 24.40 24.20 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.78 0.70 -5.15 121 985 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 926 5 311 797 284 258.70 36 90.12 CHANGED hPDVQ....sptsstplsls+VGlpslchPlplpsp.....t...pshAphshhVsL...P.sp..hKGhHMSRhhclLst.hp..p.t..lsstsl....cplLpchhpph......upsAclchpFsahlc+pu.lSs.hpuhtsYslthpuphp.ts.....t.hphplplpVshoSsCPCStplScpts...................................HsQRShsplplch....sst....l.lp-LIchlEpuhS.s.lhsllKRsDEpthsctuapNPhFVEDssRclstpLppp....sp.hssaplcscshESIHsHsAhA .........hsDlQ....ostcs.hthsIpcVGl+slchP.lplppt...................t..posuphshsVsL...s...tcpKGhHMSRhlchl-th.p....p..th.....lshs....sl....pplLpshhpph.......pupsAplcl.shsaFhc+tu.....Plos...lpuhhsY-.Vshsuphctts..........t.hchplplplsloolCPCSKcIScauA....................................HNQRuhlolpspl........sp..p......lh...lpcll-hh...EssuSs.laslLKRsDEKhVTEcAYcNP+FVEDhlRplAtpLhpp......................stlssaslcscN.ESIHsHsAaA...................... 0 100 210 268 +1916 PF02650 HTH_WhiA DUF199; WhiA C-terminal HTH domain Mian N, Morningstar A, Mistry J, Bateman A anon COG1481 Domain This domain is found at the C-terminus of the sporulation regulator WhiA. It is predicted to form a DNA-binding helix-turn-helix structure [2]. The WhiA protein also contains two N-terminal domains that are distant homologues of LAGLIDADG homing endonucleases [2]. 21.10 21.10 21.20 22.80 21.00 20.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.53 0.72 -3.59 23 1969 2012-10-04 14:01:12 2003-04-07 12:59:11 9 6 1950 1 338 1114 114 85.00 46 27.44 CHANGED lNRlsNh-sANlpKospAuh+plcpIphIhcplG.h-tLPppLcclApLRlpas-tSLpELGchlcs....sloKSGlNHRlRKlppIAc .....................................sNRLsNsETANLs+TlsAAh+plp.sIphIpcplG..l-sLP...-cLp....-lApLRlp+.-hSLcELG-hlss....Pl..oKSGVNHRlRKlpclAp.............. 0 134 244 300 +1918 PF02655 ATP-grasp_3 DUF201; ATP-grasp domain Bashton M, Bateman A anon COG1821 Family No functional information or experimental verification of function is known in this family. This family appears to be an ATP-grasp domain (Pers. obs. A Bateman). 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.91 0.71 -4.37 18 304 2012-10-10 13:17:03 2003-04-07 12:59:11 9 4 195 1 172 13200 4990 163.10 22 44.85 CHANGED suDKhcsh+tLcst..hssP........hp.tchtt..........scphllKPtsGsGupusphscstpp....................llQ-aIEGcshSVSllussccshsLslN+QhIsh...t..............htYsGshsPh.pph..ppchhphApcllcsl....GLhGhsGVDll.......ls-..ttPYllEVNPRhTso .............................................................................................................sKhhh.hp.hL.....pph....sl..s..h.P...................................h..............t........t...............................tt..th.llK.....P.h.......s.....G...s........G.....G.....h.......s...l...t...h....h...s...s...t.t...t......h....t...........t.............................................ll....Qca..l....c.....G.....p.....s............h....S..l....s....h....l...........s...........s...........s.......p......p.....s....h....h.....l...u....h...s..c........Q...h....l..s..h........t...........................................................ht.a....s..G......s.....h.......s......s......h......p......h......sh...........pp....p....l.....h....p.....h....s.....p....p.........l........s.......p......t.....l...........GL....t.......G....h....s...G......l...D.......h.l.......................l..s..s.........tt...a..l.l..ElN..P.Rhsu.............................................................................................................................. 0 52 118 149 +1919 PF02656 DUF202 Domain of unknown function (DUF202) Bashton M, Bateman A anon COG2149 Family This family consists of hypothetical proteins some of which are putative membrane proteins. No functional information or experimental verification of function is known. This domain is around 100 amino acids long. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.60 0.72 -3.76 197 2355 2009-01-15 18:05:59 2003-04-07 12:59:11 10 32 1062 0 920 1576 39 77.60 28 34.40 CHANGED +stlAsERThLAWlRTululhuhuhslhp..hh..h.t............................................hsh.........hhuhshh..............hlulhhhhhu.......hhp..ahptt...pt ....................+stLAsERT...a.L.AWlRTuLuhhuhuls.lh.phs..p..s.........................................................................................................hlth.........lhuhlhs..............lluhhhh.hau.....................hhR.aht....s.......................................................................................................................................................................... 0 298 582 809 +1920 PF02659 DUF204 Domain of unknown function DUF Bashton M, Bateman A anon COG1971 Family This family consists of hypothetical transmembrane proteins non of which have any known function, the aligned region is 180 amino acids long. 21.70 21.70 21.80 21.70 21.40 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.29 0.72 -3.85 162 3883 2012-10-03 02:02:08 2003-04-07 12:59:11 10 3 1582 0 728 2452 78 66.50 31 65.64 CHANGED hS..hDAhAVuluhuhh.............php....hhhsulhhGlhpslhshlGhhlGphhu..p..hlsph..........uchlGullLlhlG ..................hShDAhAVuluh.uhh.............psp......hlhsulhhGhhphlhsh.lGh.hlG.ph.h.u..p...hl.s.ph...............sc.hluullLlhlG...................... 0 276 519 622 +1921 PF02660 G3P_acyltransf DUF205; Glycerol-3-phosphate acyltransferase Bashton M, Bateman A, Eberhardt R anon COG0344 Family This family of enzymes catalyses the transfer of an acyl group from acyl-ACP to glycerol-3-phosphate to form lysophosphatidic acid [1]]. 21.00 21.00 25.80 25.70 20.70 20.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.15 0.71 -4.71 127 3881 2009-09-13 07:44:17 2003-04-07 12:59:11 10 9 3433 0 833 2498 1770 180.00 37 84.40 CHANGED lluY.LlGSlshuhllu+hhthhDlRphG.SGNsGATNshRsh.......GtphuhlshlhDhhKGhlsl.hl....uphhhhs.hhh.....hh...................suluull.GHhaPlahpF+.....GGKGVATshGl..llslsshhsl.....lshh.lahllhhlo+.hs..........SLuSlh...uslshslhshh..........................................thsh.hhlhsh...hluh..lllh+H+pNIpRLl ...................................lluY.LlGS.Issulllu+l.h..t.h..h..DlRphG.S...........GNsGATNshRlh.......GK.t.uuh.hsllhDhhKGhlsl.hl..........s.hh.hs.h..sshhh..............hh.......................................suluA.lL.GHhaPl..Fht.FK................GGKGVATuhGs.......lls.l......s............hh....s.l...........h.hhs....sall.slhls+.hs...............SLuull.......uul.hsslhshh..............................................h.p..h...h..hh.hsh.....hluh..lllhRH+sNIpRl..................................................................................................................................................................................... 0 284 542 701 +1922 PF02675 AdoMet_dc DUF206; AdoMetDC; S-adenosylmethionine decarboxylase Mian N, Bateman A, Moxon SJ anon COG1586 Family This family contains several S-adenosylmethionine decarboxylase proteins from bacterial and archaebacterial species. S-adenosylmethionine decarboxylase (AdoMetDC), a key enzyme in the biosynthesis of spermidine and spermine, is first synthesised as a proenzyme, which is cleaved post translationally to form alpha and beta subunits. The alpha subunit contains a covalently bound pyruvoyl group derived from serine that is essential for activity [1,2]. 25.00 25.00 27.60 27.30 22.50 22.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.43 0.72 -4.15 134 1798 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 1523 19 460 1159 1636 119.90 39 60.28 CHANGED Hllh-hass..s.chLsDtchlcphltcAsctuGATllsh.thacF...pP..........pGVoulslluE....................................SHlolHTWPEhs........aAslDVaTCGp..ssPhcA...hphlhcshpscphphpp.hpRG ......................................................................-pLhssppLpclLs-s.sphhGA.s.l..Lsl..uppca.....pP..........QGsS.s.slLlSE................................................SHIslHTaPEpp..........hAsIDV.TCGs...lsPhcA.....lsYLhcpL.cuc.hsshch.hsRG........................ 0 183 309 389 +1923 PF02676 TYW3 DUF207; Methyltransferase TYW3 Mian N, Bateman A, Wood V, Mistry J anon COG1590 Family The methyltransferase TYW3 (tRNA-yW- synthesising protein 3) has been identified in yeast to be involved in wybutosine (yW) biosynthesis [1]. yW is a complexly modified guanosine residue that contains a tricyclic base and is found at the 3' position adjacent the anticodon of phenylalanine tRNA. TYW3 is an N-4 methylase that methylates yW-86 to yield yW-72 in an Ado-Met-dependent manner [1]. 20.50 20.50 21.00 20.70 19.50 19.60 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.47 0.71 -5.20 42 374 2009-01-15 18:05:59 2003-04-07 12:59:11 9 24 292 11 258 380 11 199.60 29 62.09 CHANGED cppKpphLpclt..........-tp.cGplDcsIhsllchIN...uhpshhTTSSCSGRIoVh.p....................tp.hp+.......................tsupWLahsHcs...................hphpplhcslptshsst.....................lhh+hcP.ILHVhs+sLcpAptlhshAhssGF+cSGItshp...................pphlVtlRss.h+l-sPluhps...............chlV.sc-YLphLlcluNc+hpcspc+lpRLppslcp..hhp .................................tKtphLppl...........DhS.+GplDtslhsllphlN...sh.tshhTTSS....C.uGRlslhtps.............................t...tc.............................................................................tsupWLhhsH.c.............................................................hp.pphhphhph..ttttt..........................................................lhh+hEPh........ILHVhspshppAphLhps.A.h.s.uGF+p....SGltshp................................................tphhVulRss...htL-sPluhps.........................phhV..sc-Ylph....Llp....l...uNp+hpcNpc+hp+hhptlpt...t........................................................ 1 79 129 198 +1924 PF02677 DUF208 Uncharacterized BCR, COG1636 Mian N, Bateman A anon COG1636 Family \N 21.10 21.10 21.10 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.35 0.71 -4.49 75 1642 2009-09-12 22:47:58 2003-04-07 12:59:11 9 4 1534 0 279 1063 255 165.00 42 73.24 CHANGED lLLHsCCAPCSsaslctLppp..shclosaFYNPNIHPhpEYhhRhp-sc+hspch......slphltu-..Y-...hcpahctsc..GhEp-sEtGt.RCthCachRL-poAphApEtGFchFoooLhhS.aKshpplpchGcchupca.slpahhpDaRKtsGhpcslc.......luKchslYRQpYCGClaShc-p ..............................lLlHsCCAPCSs.shEhLpp....h-hslaFaNPNIHPhpEYhhRtpppt+Fscch...................sl..t..hl-.u-..Y-....c.pahcts+.....Gh.E..pEsE.t....G.hRCphCF-hRh-psAphAh.......E.......tG.......achFoosLslS.hKshp.pINphGhcssph........Y......s..............lpYhs.sa+KssGhpRtlE.......hs+c.phY+QpYCGClauhpp.p............................ 0 102 190 241 +1925 PF02678 Pirin DUF209; Pirin Mian N, Bateman A, Moxon SJ, Yeats C anon COG1741 Family This family consists of Pirin proteins from both eukaryotes and prokaryotes. The function of Pirin is unknown but the gene coding for this protein is known to be expressed in all tissues in the human body although it is expressed most strongly in the liver and heart. Pirin is known to be a nuclear protein, exclusively localised within the nucleoplasma and predominantly concentrated within dot-like subnuclear structures [1]. A tomato homologue of human Pirin has been found to be induced during programmed cell death [2]. Human Pirin interacts with Bcl-3 and NFI [3] and hence is probably involved in the regulation of DNA transcription and replication. It appears to be an Fe(II)-containing member of the Cupin superfamily. 26.10 26.10 26.50 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.44 0.72 -4.17 52 4965 2012-10-10 13:59:34 2003-04-07 12:59:11 11 18 2388 5 1562 3896 1851 109.40 35 40.71 CHANGED shclpcshst.........shhphhsPFlhhDphsPspht.u............shuh.ssHPHpGhETVTYlh...cG.clpHcDShGscuhlpPG-VphMTAGpGIhHSEhs.st...........stsh+GhQlWl .............................................................s....tp.hut.....shhps.hpsF.Fh..-a.a..sPp..ths.us............................-.h....stGF.ssHPH+shEh.l..Tall........c...G.pl.p...Hc..DS.h...Gs.......p.s..............h.............l...p.sG.-......lQhMoAG....s....GIh....H.SEhsssp........................spslchhQlWl........................................... 0 450 941 1293 +1926 PF01595 DUF21 Domain of unknown function DUF21 Bashton M, Bateman A anon Pfam-B_618 (release 4.1) Family This transmembrane region has no known function. Many of the sequences in this family are annotated as hemolysins, however this is due to a similarity to Swiss:Q54318 that does not contain this domain. This domain is found in the N-terminus of the proteins adjacent to two intracellular CBS domains Pfam:PF00571. 25.10 25.10 25.20 25.10 24.90 24.70 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.93 0.71 -4.99 126 9166 2009-01-15 18:05:59 2003-04-07 12:59:11 15 26 4552 0 2462 6747 2342 186.30 24 42.42 CHANGED hhllhlhlL.lhluuhFuusEhAlhulspsclcpht....cp..ss.....ptApt.lhplhpp......sphLsslll...Gsslsslhhu.slushh...hhphhs...................huh.hluhllhThlhl..lhGEllPKsluh.p..tsppluhhhuhhlthhhhlhh.Plshllshhs.phlhphhshp.........t.hhop.cElctllp.up.ppG....slc..pcE ..................................h..llllhlLlhlsu.aFu.uuEhAlhulp+s+lc.p.hs.........cp.....Gs.....................ppApt....lh...cl...h...p.p..........sphLosl..l................Gh.slsslhhu..hlupsh.......htp..hhts..................................huh.hluh.sl.lThlh.l........lhuE......lh..PK.slAh..p..ts-pluh...hhuh..slthh.h...hl..ht..P..l...lal.ls.ths.sh.l....h+.h.hshp..............ptth..oc...-E....l.c.tllptut.ptG.lpt.......................................... 0 785 1593 2105 +1927 PF02679 ComA DUF210; (2R)-phospho-3-sulfolactate synthase (ComA) Mian N, Bateman A anon COG1809 Family In methanobacteria (2R)-phospho-3-sulfolactate synthase (ComA) catalyses the first step of the biosynthesis of coenzyme M from phosphoenolpyruvate (P-enolpyruvate). This novel enzyme catalyses the stereospecific Michael addition of sulfite to P-enolpyruvate, forming L-2-phospho-3-sulfolactate (PSL). It is suggested that the ComA-catalysed reaction is analogous to those reactions catalysed by beta-elimination enzymes that proceed through an enolate intermediate [1]. 25.00 25.00 30.60 39.00 23.60 23.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.55 0.70 -5.36 30 234 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 207 2 133 248 145 238.90 32 79.40 CHANGED hthspRsp..K.PRppGlThllDKG...luhpthcDhLcsuupYIDhlKhGaGTuslhspchl+-Klclh+caslhsasGGTLhElhhh..psph.-cYlcps+clGFsslElSsGolclsp-c+tch....IcpspctGhpVhoEVG.....................pKcsppptp.psschlchhpp-L-AGAphVIlEuRESGps.Glacss...Gpl+sshlpcllpp.ls..hc+llFEAPpK.....ppQthhIpchGssVNLuNIs.p-llsLEsLRtGLRGD .............t..stRstK.PRppGlT.lhD.u.........................huhphhcDl.lc........s......sGpalDhlKhuhGotslhspp.l+-tlclh+paslhlhsGGhhhElhht....ps.p...h...ccYlcts+clGFcslElSsGsl.slsp-cphch....Icphp.ptGhp.shsElG......................pKs.ptths.pstchlchhcppL.-.A.GAphlhlEuc......Glhcss...uphRp-llppll.pp.ls...hc+lhFEAspp........ptptaaIccaGssVNLsNls.scllsLEsLRhGLhus...................... 0 56 91 118 +1928 PF02680 DUF211 Uncharacterized ArCR, COG1888 Mian N, Bateman A anon COG1888 Family \N 20.90 20.90 20.90 50.20 20.70 20.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.03 0.72 -4.18 22 122 2009-09-13 14:06:05 2003-04-07 12:59:11 9 1 121 29 80 118 3 93.30 43 97.60 CHANGED M...s..lRRlVLDVLKP.+pPsll-lAtpLuclcGV-GVNIoVhElDpcTpslplTIEGsslDa-pIcchIEphGusIHSIDEVsuGcpllEp.cssQ- ........slRRlVLDVLKP.+pPsll-lAppLuclcGV-GVNIoVhElDh-TpslplsIEGssl-a-clpcsIEchGusIHSIDEVsuGc+llEt.................. 1 19 41 62 +1929 PF02681 DUF212 Divergent PAP2 family Mian N, Bateman A, Yeats C anon COG1963 Family This family is related to the Pfam:PF01569 family (personal obs: C Yeats). 21.70 21.70 21.80 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.64 0.71 -4.46 40 730 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 571 0 233 576 175 132.70 41 83.60 CHANGED lhsNtsLhsAllAhhlAQhlKlhlphhhp...++hch.......phlhsTGGMPSSHSAhVoALuTulGlppGasSshFAlAslFAlIVMYDAuGVRRuAGhQAclLNp.Llp-h.p.............t..tpcpLKELLGHTPlEVhsGulLGlhlu ...........................hpN.sLhuulluhhhAQhlKhhhp.h....htp.....p.+.h.ch.......phhhuoGGMPSSHSAsVoALuoulGl....ppGhsSshFAlAslFAhIVMYDAsGVRptuGcQAplLNp..l...h..pph.pth............................tpcpL+El.lGHoPhpVhsGullGlll........................................... 0 86 168 208 +1932 PF02697 DUF217 Uncharacterized ACR, COG1753 Mian N, Bateman A, Eberhardt R anon COG1753 Family Structural modelling suggests this domain may bind nucleic acids [1]. 25.10 25.10 25.20 25.10 24.80 25.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.55 0.72 -3.73 16 123 2009-09-10 16:57:48 2003-04-07 12:59:11 9 1 63 0 61 129 9 67.10 27 87.20 CHANGED sKTIoIsD-VYccLlchK..tscSFS-VIpcLlc.......spcctLhchaGhls---h.cEhc+clpEs.tphcc+h ..........KTIslo--sYccLpchK.pssESFS-lltRLlp.......tpt...cplhch...h..Gh....h...s-c..-h...cchtcphcc............................................ 0 9 40 54 +1933 PF02698 DUF218 DUF218 domain Mian N, Bateman A anon COG1434 Family This large family of proteins contains several highly conserved charged amino acids, suggesting this may be an enzymatic domain (Bateman A pers. obs). The family includes SanA Swiss:P33017 that is involved in Vancomycin resistance [1]. This protein may be involved in murein synthesis [2]. 24.30 24.30 24.30 24.30 24.10 24.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.02 0.71 -4.73 148 7246 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 3217 2 1345 4642 908 154.40 20 61.35 CHANGED pscs.llVLG.........th..stt..sh..stp.Rl.ptulcLhc....t.shssh......lllS.....Gu.s.t......................hsEAphh......pchhhpt.G..ls.....tpplhh....-sp.ups...ThcNA..thst.plhp.ppsh.........t...ph.l.lVTsshHhhR.......Ah...hhhcptuh.pshshsssh.ssht................................................phhh+E.hhu....h ....................................................................................ph.llVLG...............th...s.t......s.h........htt.Rl.stA.....h....plac.............p..s..s....sh...............lllS.....GGputs...........................shsEA...psh.............tchh.hp..t..G.......ls......................tp.p.Ilh.....-.sp.ut.s....Th-Nhhhuc..plhp...pp...................................ph..l..lVTssaHh.R........Ah..........hhh....p......p..h..Gl..p...s.......s..hs..s.s..........................................................h.................................................................. 0 387 838 1133 +1934 PF01629 DUF22 Domain of unknown function DUF22 Bateman A anon Pfam-B_1137 (release 4.1) Family This domain is found in 1 to 3 copies in archaebacterial proteins. The function of the domain is unknown. This family appears to be expanded in Archaeoglobus fulgidus. 25.00 25.00 73.10 70.50 19.40 18.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.04 0.71 -4.58 23 49 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 29 \N 41 51 0 107.70 28 79.39 CHANGED hp.c....hphshchsGclh+cc..lchcsauYp.pup.lupWEslIAsEcl-VccGEshhl+I+clclPssTllhPhslhRHAhGsllDVspps.PtpVE-c+plscAlFlsscDG ................ht.............htscltchc..lctp.hsap..htp..huphcsllAsEcl-l+pG-hc.I+I+cIplPspollhPsshhpHslGpllslscct.Ph.lEp-RplcpAhFlsstDG.. 0 20 30 37 +1935 PF02713 DUF220 Domain of unknown function DUF220 Basthon M, Bateman A anon Pfam-B_1412 (release 5.5) Family This is family consists of a region in several Arabidopsis thaliana hypothetical proteins none of which have any known function. The aligned region contains two cysteine residues. 29.40 29.40 30.70 44.10 28.10 27.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.70 0.72 -4.30 17 83 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 13 0 48 82 0 69.60 52 24.65 CHANGED hSGslPIpLllcEN+Ksho..uKYKKcKMMFMKlFEGsWKVEPLYVDs-RLCKppcPKShEEY++CSGGpG+IuSK ........................SGslPlplll-EN+Ksho............sKYKp.pKhhFMKsFEGsWKVEPLYVDpERLC+.....shcPKShEEY+pCSGGpG+IuSK............. 0 21 27 30 +1936 PF02714 DUF221 Domain of unknown function DUF221 Bashton M, Bateman A anon Pfam-B_1596 (release 5.5) Family This family consists of hypothetical transmembrane proteins none of which have any function, the aligned region is at 538 residues at maximum length. 28.70 28.70 28.70 29.20 28.30 28.30 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.88 0.70 -5.57 82 1418 2012-10-02 00:51:22 2003-04-07 12:59:11 10 55 296 0 1031 1387 24 296.20 22 36.12 CHANGED uFlpFcophsAphssQ....................shtppps...hph..hsthuspPcDllWpNl.slshhp+hh+phhsshhlhhlllhaslPVuhl.uhlsslstLsphhPhLphl...t...hs.hhh.ullsGlLPslhLslhhhllPhlhchhuphpGhhopuphEhpshsp......hFhFhllplFLlsol.uuoshsh...lpplls.............pssslsphLApsL..P......ptusFalsYlllpulshsuhpLLplssLlhhhlhtthhs......pTPRchhpth.s.sshsaGshaPhhhhlhsIslsYo...lluP..lILsFuhlhFhlsahsa+YplhYlas.pph.-ot...Gtha..PhshtplhhGlhlhplhhhGlh ...............................................................uFVpFpsthtAthshp..........................................shtp..p.s......hph....hthAspPpDlhW......pNl....tls.hh..p...h.hh...R....phh..h.......h..h.l.hh.l.h.....h...h....a..s.....hPs.sh.l..ss......l.....s........p........l......s......hl...........pp..h......h......s...h....lp.l...........................hhh...s...h...lpuhLPsl.h.L.hl.h...h..l.l.P......hl..h.hh....hu.t.h.p.G.h.h..otothchtshpp......hah..Fh......l..h.p....lhll.s.sl.sussh.sh.......htthhp.........................................ts.p.p.h......p.hl.u.t.sl....P.........ptusF.a.ls.....Yl..lhpuhhs..h.uh..p.Ll.....pl..s.s....Llhhh...h.....h..t..h..hht...........posc.....c.....h..h.......th.......h.............p.....t....hp....au......t......hhs.h....hhhh...........h.............h.....l................s.....l..sYu...hlsP..lll..sF....shlhhhltahsh+aplha..........sh...tph..-st..........G.t.ha.....shhh..t..ph..hhulhlhplhhhGh.............................................................................................................................. 0 345 611 869 +1937 PF02720 DUF222 Domain of unknown function (DUF222) Mian N, Bateman A anon Pfam-B_1711 (release 5.5) Family This family is often found associated to the N-terminus of the HNH endonuclease domain Pfam:PF01844. The function of this domain is uncertain. This family has been called the 13E12 repeat family [1]. 24.40 24.40 24.60 24.40 24.30 24.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.74 0.70 -5.29 29 2037 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 207 0 557 1830 116 259.90 21 64.47 CHANGED thutlt-hpsttcpLsuhtthlhscl.....spcsh.tttGsculushlAsphplStupAspplphAtpLtpR...........LPtsAsAhtpGclshcpVpsItttscplscshslssh-tApsshlppusp..hpscpLtthscphhthh...-...............................PDush.....sct-ttc.......cRtlslus..ts-GMoplpGhLsscsuAsh-shLsphAus.hCss..................DsRossQRptDAlsAll....RhshssGphsp.sGh..........sslllpsshpchpssust............slsGhGslLPhp-lhcLApcAp..Lt.lh.t.hupPls.hs+spRhsSss.RhhLhsRD ............................................................................................................................h...........h.tth.sh...........h.....h.ph.....................ttts.......t....s.h......p.......h..s.t..l.u..t..h.t.h.o...........s.tAt..p...hh.....t......A.........t...s..L.h......................................................hst.stts.......htt...Gtl......s.t.phthlhth.......h.......t...........l...............t............s..............h...........t.....h..........h.......p..t....t...........s....t..........h.....t.t.h..t.......hps..tp...l.....t.thh.p.thhthh.....c...............................................................................................................s..c..u...............pttc.ttp................pRtl.ph.....t..t...t.....-.Gh.stlp........u.h.Ls.st...tt.sthp..shl..sthu......ts...hsss......................................................................................................D.sR.o.s.Q.p.....p.t.D.uhhthh...................................phhht...s.s...t....s....p...t.uh...........................................st.lh..l..p..h.s.....h.p.p..hthtss.................................................hhhs...hs...lsht..h.h.p....h........s.t....p........ut..........ht..............................s......s.....h......h.......c.....thh.s.t.t.hl.hpD............................................................................................................ 1 124 377 513 +1938 PF02721 DUF223 Domain of unknown function DUF223 Mian N, Bateman A anon Pfam-B_1714 (release 5.5) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -10.03 0.72 -3.90 15 441 2012-10-03 20:18:02 2003-04-07 12:59:11 9 27 17 0 209 461 0 88.80 23 26.26 CHANGED sLlLsDcpG......splcATIsp+hushYt.........-plp..EspWcsIooFsVp.sssulR...sTsHca+IhFhcp..........ThVspusshpssh.ahshTPFDhIl--osspslL .......................hlLhDcpG.......spIpAsl.p.p.phh.sp.ap.........shlp..EsphhplpsF.p.Vstss.sth..R...sosHca+ltFhts..........Th.lpt...s...psh.s..s........hh..p..hs.Fs.lhtt..................................... 0 3 13 25 +1940 PF02890 DUF226 Borrelia family of unknown function DUF226 Bateman A anon Pfam-B_1255 (release 6.0) Family This family of proteins are found in Borrelia. The proteins are about 190 amino acids long and have no known function. 25.00 25.00 25.20 25.20 24.50 24.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.84 0.71 -4.41 21 456 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 31 0 21 314 0 133.60 48 76.35 CHANGED lapFt.spsKcp+hhlhF+plaNpp+.hpthpLFPl+E..sDKFLGIaYGY+K.h.Kshhl+Yp...stspKsY.shsKsYYIEFRFKKGSVFCYl+ultpLL+.K-KhsTpY.ptLlcplhcLE+cVYcFYsKKLss..tGlIhKWIpK.NQ ............................hhpFt.sppKcpcFhlohRsLF.N...tc+.hpthpLas.l.KE..sDK.FlGIaYGa+K.h.Ks..h...hlKYp...sssp..KsY.sls.KsYYhEFRFKpGSVFCYl+uLhpLL+.Kc+tsscY.psLhshhpcLEppVYcFYsKKhsc..tG..hlhKWIhK.N............................. 0 20 20 20 +1941 PF02989 DUF228 Lyme disease proteins of unknown function Griffiths-Jones SR anon Pfam-B_1298 (release 6.4) Family \N 25.00 25.00 41.60 41.40 20.40 19.10 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.23 0.71 -4.72 7 322 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 26 0 26 250 0 188.30 51 91.81 CHANGED Mu-hsplccpa...cKhtclpthh...Kssppssu.ltNSl......cF+DcNl.a.spGGspoSptDKlENa...PhpuasYKRGVKLs...spsspl....plEsGGGsDLYGlClDlDEFS+TATVlPITNNFEGYLlsK...ssolKscDKL.hNpcG.LEKssGu....tssINAlALScAhpls...................pDlall+VtlFGN+ulpc .............................................u-psplccpa...cKlsElcslh...Kssppssu.llpNSl......cF+DKNlhasspuGspoSptDKIENY...PspuaPYKRGVKLl.....sppspl.......pVEs..GGssDLYGIClDlDEFSpTATVlPITN.NFEGYLlsK...ssolKsGDKLshNppGsLEKssGu....tssINAhALScuh.pls....................p-l.ll+VulFGN+ulc................. 0 17 17 17 +1942 PF03003 DUF230 Poxvirus proteins of unknown function Griffiths-Jones SR anon Pfam-B_1300 (release 6.4) Family \N 20.40 20.40 22.30 21.10 18.90 17.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.29 0.71 -4.60 41 207 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 61 0 0 168 0 126.70 31 43.33 CHANGED IhsaCpppps...sppChClaPs...pshlphucchhtP+hCWhc.C...p+sspaLlpspcpphupCplssCsIslssLpl..supsclpssCt....sstshhsshspschlppphphsh......lhs.hhhhl....slhllhhl ...lhsaCsppps...sppCtCshPs...pshlppu.+hh..t..P+hCWhccCs..s+sspaLhtspcpshupCplssCs.....IslssLslt.supsclpssCs....psptsssss.p.tsc.h.lppphp.hsh......hhh..hlhhl....slhllahh................................................ 1 0 0 0 +1943 PF03008 DUF234 Archaea bacterial proteins of unknown function Griffiths-Jones SR anon Pfam-B_1430 (release 6.4) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.59 0.72 -3.88 91 702 2012-10-11 20:44:43 2003-04-07 12:59:11 9 11 463 0 243 706 46 99.90 25 24.52 CHANGED WFRFlhPshsh.lEhGphc.hh.cplp..pph.spYhGhhFEcls+..-hLh.ch.t.t.h.....hsclG.+WW........c.....+sp..EIDllAlscppt....hlhsEsKWps.............ttcs+pl ..............................WFpFlhPst..sh.l-hsp.hc.lh...phl..c....pph.sp.a.huhsFEpls+..-hlh.ch.t..........hsp.lG.pW..W.......................c..........+pt....EIDlluh..scp...p..........hlhuEsKaps............p.................................................. 0 80 141 188 +1944 PF03057 DUF236 Protein of unknown function Griffiths-Jones SR anon Pfam-B_488 (release 6.4) Family This family represents the C-terminal region of a number of C. elegans proteins of unknown function. 21.20 21.20 21.50 21.80 20.10 20.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.83 0.71 -4.22 8 173 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 9 0 167 116 0 106.80 37 73.74 CHANGED K...............................................suhAuTHDPNYQTLAGLssNVFpcKsG....ssuAGGuuPsAPA.tsuKPGMAATHDPNYQTLAGLsN.slFc..KKD.Gut.......sAuGsuuPtsP.s-psuKAAT+DPNYQTLAGlsNDlF.............................................................s .......................................................................................................t....s.t.DPNYQThsulss.s....hh..ttt.........ss....ss........s...s......Pt..s...ss...............ss.t.......hAuTpDP.....NYQTLAGlss..s....lFt.....cK.s..ttt.......................ssuu.ss.sPtsP..ststhAuTpDP.NYQTLAulss.shF..............................................................t................................ 0 64 91 167 +1945 PF03072 DUF237 MG032/MG096/MG288 family 1 Mifsud W anon Pfam-B_2298 (release 6.4) Family This family consists entirely of mycoplasmal proteins. Their function is unknown. Another related family, Pfam:PF03086, also consists entirely of mycoplasmal proteins of the MG032/MG096/MG288 family. Some proteins, such as Swiss:P75072, are included in both families, but of course differ in the aligned residues. 25.00 25.00 149.50 147.70 21.90 18.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.75 0.71 -4.27 14 35 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 4 0 14 29 0 135.50 36 26.32 CHANGED LRtpLsphushpLs.lp..plplpsp.sss..........slpWNcphsshchppspPYcFpFElshcYpGsYslphathhhs...huuIPupWpGchplpahlDGclspahss+.DYPuohFpFs-..scLLFs.HlhQcIpVps LRtpLsppuslpLs.lp..plplpsp.sss....h.....sIpWNcchsshchppspPYpFpFElshcYpGsYslphathhhs...hGuIPupW+GchplpahlDGclssWhssK.DYPGohFpFs-..scLLFs.HlhQ+Isspt.. 0 11 11 11 +1947 PF01638 HxlR DUF24; HxlR-like helix-turn-helix Bateman A anon Pfam-B_1509 (release 4.1) Family HxlR, a member of this family, is a DNA-binding protein that acts as a positive regulator of the formaldehyde-inducible hxlAB operon in Bacillus subtilis. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.94 0.72 -4.35 14 8192 2012-10-04 14:01:12 2003-04-07 12:59:11 12 15 2896 25 2341 7312 707 89.20 34 69.10 CHANGED llGuKWphLILhpLhp.Gs+RFsElc+tlPsIop+hLoppLRELEp-GllpRpVYsplPP+VEYSLT-hGcsLpPlltthpcWGppahpt.. ......................................lus+W.p.h.l...Il.h..p..L..............h........p.....G............s........p...R.......F.....s..-..L.p.+.p.l.......s......u.......l.....o.............p+.hLop.....p.....L+cLEpcG..l.l..p..R....p..s....a...s..p....s..P....P..+..V....E..Y.s.L.T.c.h.G.c.s...L.t.s....l.....l.p....slt.p.Wucpah...hh................................. 0 770 1605 2009 +1948 PF03086 DUF240 MG032/MG096/MG288 family 2 Mifsud W anon Pfam-B_2385 (release 6.4) Family This family consists entirely of mycoplasmal proteins. Their function is unknown. Another related family, Pfam:PF03072, also consists entirely of mycoplasmal proteins of the MG032/MG096/MG288 family. Some proteins, such as Swiss:P75072, are included in both families, but of course differ in the aligned residues. 25.00 25.00 135.40 135.40 22.70 22.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.59 0.71 -4.47 13 35 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 4 0 14 32 0 120.10 40 22.97 CHANGED hsFpssYhsssssl.sLsFsLphpTsNFusLp-Lp-oFsp.sGssLssQLFaKssVsKLsh.ssNDLTplApTslu-shhshplsLscSIl....phsLppscpcF-ccllpPFhpcRpcAKtta- .hsFcssYhsssspl.sLsFuLphpTsNFosLp-Lp-SFsp.sGssLssQLFaKssVsKLsh.ssNDLTplApTAlG-slhshplsLscSIl..cssLpp.scppF-pchlsPFhpcRpcAKtta-. 0 11 11 11 +1949 PF03112 DUF244 Uncharacterized protein family (ORF7) DUF Mifsud W anon Pfam-B_2667 (release 6.5) Family Several members of this family are Borrelia burgdorferi plasmid proteins of uncharacterized function. 20.70 20.70 21.30 20.70 20.10 19.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.91 0.71 -4.69 7 158 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 30 0 10 164 1 148.00 70 39.00 CHANGED hEl.plpppIhps.thphh...NhD.pshhphlc-hV.pSDFYpsGlEFDWhsEFVEYV-ClDLEI+s-psAhNLEpsLhEIpsLpsELNKIQ....NEN+K+....EKPIKDlLKh+IscIhpcasLIsplNY+FccFVFshDPpKRAIoDRFKuLhPhSu+l.a.ss ......................hElhNL+KDIaSNYR--YLMAHNFNpDTFIKLV...E...DLVE..RSDFYSSGVEFDWAREFlEYVDCsDLEIKDsQSAENLAhDLMEIDSLpKELN+IQ....NENKKR......EKPIKDhLKMhIaNITNpYPLIE..plNYKFtEFVFTLDPKKRAISDRLKGLLPTSGsVFFPSN...... 0 8 8 8 +1950 PF03136 Pup_ligase DUF245; Proteosome_20S; Pup-ligase protein Mifsud W, Bateman A anon Pfam-B_3042 (release 6.5) Family Pupylation is a novel protein modification system found in some bacteria [1]. This family of proteins are the enzyme that can conjugate proteins of the Pup family to lysine residues in target proteins marking them for degradation. The archetypal protein in this family is PafA (proteasome accessory factor) from Mycobacterium tuberculosis [2]. It has been suggested that these proteins are related to gamma-glutamyl-cysteine synthetases [1]. 25.00 25.00 45.70 34.20 18.90 18.50 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.53 0.70 -5.88 31 731 2012-10-02 17:21:26 2003-04-07 12:59:11 10 3 352 0 214 642 154 443.10 44 90.47 CHANGED +RlhGhETEYGIsssspsssssl..o.p.l.......................hh..at.EsPhcDARs..hclpth.stssup...s................................NlhLsNGuRlYlD....tuH..........PEYSoPEsssst-hVhaD+AG-clhppAspcApph.ut...........sslhLaKNNsDucG.sSYGsHENY.....................................................................................................................LhsRsssF.splsctLhPFhVoRQllsGAGRVG........hs.suppsu...FQlSQRADalcptVuhpT.ThsRPIINTRD..............EPHADs-+YRRLHVIlGDuNMSEhoshLKlGoTuLVLphIE....sGh.....hs-Lsl..........csPVpul+plSHDh.oLptplpLtsG+phTAL-lQppYh-pstpalppc.s..........sspspcVLshWpcsLstlcsssh...........psusclDWlsKhpLl-pa+pR........puLshsc...P+LthlDLpYpDlcsp+GLaptLhp+GphcRLls-pclppAsspPPpsTRAhhRGchlcphs...pcl.......hsAsWsplhl ............................................pRIhGlETEYGloss.......s...stpsl..............................................................sP..csARt..hhhphs..uts.us....................................................................NlhLsNGARLYlD....tuH..........PEYuoPEssssh-hVhaD+AGEplhcshstcAp.pp...hut...........sslhLaKNNsDu.................t.G.sSYGsHENY.....................................................................................................................Lhs.Rps..........s....F.splscsLlPFLVTRQllsGAG+Vs...................................ps...u...c..tss..........FplSQRADal.ptVu.tT.ThsRPIINTRD..............EPHADu-+YRRLHVIlGDuNhuEsoThLKlGoTsLVLchIE...........sGss............hpDlsL...........ssPlpAl+plS+Dh.o...h.ct.l.pL....s...........s....G........R.p.....h..o..ALplQccYhp+s.tcalpp+t..........................sspsppVl.chWs.csL-sl-.sssh.............phusclDWshKhcLlctappR................pshshss......P+lttl..D...LpYpDl+.s.+G..Las.....tL.t+Gt..hcRl.ss..-ppl.tcAsspPPpsTRAhhRGchlppht....scs.......hsssWsplhh.................................... 0 66 156 198 +1952 PF03158 DUF249 Multigene family 530 protein Mifsud W anon Pfam-B_2304 (release 6.5) Family Members of this family are multigene family 530 proteins from African swine fever viruses. These proteins may be involved in promoting survival of infected macrophages [1]. 23.10 23.10 23.20 24.90 22.90 23.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.42 0.71 -4.77 9 88 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 11 0 3 75 0 189.60 43 38.18 CHANGED GALEucpYDL...IpKYasQIpDhHcILPLIQDPchFE.....KCH-Lsp.Csh.CLlpHAlKasMLsILQKa+cpLstc...hhsQhLFEhACcpp+a-llpWI..utsLplYp.pslFsIAhs++DloLaoLGYpLlhs+h.sp......p.shs.LLo.pHLcpAutKGLLcFhLETLKYGGsls...hlLopAlpYsHRKILsYFl+p .....................GALcucpYDL...IpKYasQItDhHpILPLIQDPchFE.....KCH-Lsh.CshpCLlpHAlKaNMLsILpKa+-pLp.tp...hhsQhLFElACcpp+h-llpWI..upsLtlhc.cslFsIAhsp+DloL..aoLGYpllhsph.sp......p.shhsLLs.pHLchAutKGLL.FhLETLKaGGslc...hlLotAlpYNHRKILsaFl+p.............................................. 0 3 3 3 +1953 PF03151 TPT DUF250; Triose-phosphate Transporter family Mifsud W, Bateman A anon Pfam-B_3234 (release 6.5) Family This family includes transporters with a specificity for triose phosphate [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.80 0.71 -4.57 62 2906 2012-10-02 19:55:49 2003-04-07 12:59:11 11 51 371 0 1969 5343 564 144.50 19 38.47 CHANGED Ghlhshh.ushshulp.shsphlhpc.............................................tthsshphhhhhu.hushhhlss.hhhh-thp..h.........................h.hhhhhllhsulhhahhshusahlltpsSslohsVsushKpsllllhullhFp...sp.lohhshlGhslulhGhhhYsh ..............................................................................................................................................................................................................................Ghhhshh.us.hhtuhp.lhs.p.hhh.p.p..........................................................................................................................................hp.h.ssh.s.h..h..h...h....h.u....s.h...s.....h.h.....h.h.........h.sh.....h.....h..h......h...-..t..h..t....hhh.th...........................................................................th..hhhl..h.hs.u.l.h.u..a......h.h.s.h....s.s.a..h....h....l..t...t.s.....S...s.l...Th.s.l...sGsh...Kp..hh............s.lh...hu.hlhat...............s...ho..h.h....s....h..lGhsl.s.lhG.hhhYs............................................. 0 634 1165 1620 +1954 PF03159 XRN_N DUF251; XRN 5'-3' exonuclease N-terminus Mifsud W, Moxon SJ anon Pfam-B_2349 (release 6.5) Family This family aligns residues towards the N-terminus of several proteins with multiple functions. The members of this family all appear to possess 5'-3' exonuclease activity EC:3.1.11.-. Thus, the aligned region may be necessary for 5' to 3' exonuclease function. The family also contains several Xrn1 and Xrn2 proteins. The 5'-3' exoribonucleases Xrn1p and Xrn2p/Rat1p function in the degradation and processing of several classes of RNA in Saccharomyces cerevisiae. Xrn1p is the main enzyme catalysing cytoplasmic mRNA degradation in multiple decay pathways, whereas Xrn2p/Rat1p functions in the processing of rRNAs and small nucleolar RNAs (snoRNAs) in the nucleus [3]. 22.90 22.90 24.00 23.10 22.80 21.50 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.75 0.70 -4.98 17 896 2012-10-03 20:43:45 2003-04-07 12:59:11 13 24 311 10 593 928 242 207.90 41 22.33 CHANGED MGVPtFFRWLSc+YPpllpslhEcp.......................EFDNLYLDMNGIlHsCoHPpDc..shshsE-E.....hahtlFcYlD+lashlRPRKLLahAlDGVAPRAKMNQQRuRRFRuA+-Apctptctpchtcph.pp.........................................cpFDSNsITPGT.FMspLupsL+YaIpp............................KlssDstWp.slclIlSsssVPGEGEHKIM-aIRpp+u.pPsaDPNT+HClYGLDADLIMLGLuTHEs....HFslLRE-l .......................................................................MGlPthaRWlsp...+YPt..h.........h.....c...pp...............................................................................................-hDN...LYLDMNGIlHs.C......s.Hspsp...............h..h.s--c......................h..h.ht.lFp..Yl-cl.h.p.h.l.+.P+....+l..ha...........hA...........l.................D..GV...............APR.A.KMNQ.QR.uRR..F.R.uup-s.t..pt......t.pt...p..t..p....h....t........................................................................................................................................ptaDSNs....ITP............GT.FMs..pLsptLca.a.lt...............................+.l....s.p......D..s.t......W.p....tl.p.ll.lSstps.........PGEGEHKI...Mc......aI......R....p....p+s......p...........ss.........a....s................P.Ns...cH..........slaGh............D...ADL.....IMLuL.s.....o.....H.-s.....pFplLREp................................................................ 0 247 365 514 +1955 PF03162 Y_phosphatase2 DUF252; Tyrosine phosphatase family Mifsud W, Yeats C anon Pfam-B_3756 (release 6.5) Family This family is closely related to the Pfam:PF00102 and Pfam:PF00782 families. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.93 0.71 -4.88 8 714 2012-10-02 20:12:17 2003-04-07 12:59:11 8 10 280 4 416 1012 121 149.50 27 62.75 CHANGED llPPhNFSsVts...slYRSuaPpstNFsFLcs.L+L+oIlhLssEshsp-sLp..Fh-sppIchaalthsus+c.............hlslhscplpcsLcllLspcNaPlLlHCscG+HRTGlVIGCLR.KLppWslsuIhsEYppFous.ttchh-ppFIEhFssslhhcpss.tsthshtp ..............................................................................................................sP.NFuhVps...slYRS.u.h...P...p.....h..s...a....s...F..Lpp...Lp...L+o.llhLs....s......c...................s...p......p..............hp......Fhp.p.ps....I.p...h...h....p...h....t........h.p.s...p...pp........................................................................l.sp..p.t...l..h...psL.c.l.l...l....s......p.s..a...P..lLlHC.ppGp.cR.T.G.sl.l.u..s.h....R.+.l.Q.tWs....h.sul.hpE....Yp..p...asts.......p....s.palc.ap.p...............hh.................................................... 0 143 269 372 +1956 PF03190 Thioredox_DsbH DUF255; Protein of unknown function, DUF255 Mifsud W anon Pfam-B_2331 (release 6.5) Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.18 0.71 -4.52 98 1287 2012-10-03 14:45:55 2003-04-07 12:59:11 10 21 1106 1 594 2317 1224 153.40 47 23.88 CHANGED sNRLtpppSPYLLQHAcNPV-WaPWGcEAhpcA+ccs+PIhLSIGYusCHWCHVMt+ESFEDt-lApllNcpFVsIKVDREERPDlDplYMsssQhho.GpG......GWPLolFLTP.DtcPFauGTYFP.....c.....s+h...Gh.................PG......FhplLpplschWcp.c.+pplhpsuppl...hptLp .............................N+LhpppSPYL.hQHA.pNP.........V......cWaPW.u..p.E.....A.h.pcA.+.p.c.s.+.P.I.hL.S.....l.G..Yu.sCH..WCH...VMt+.............E.S.F-Dt...-...l.A.t..l..h.N..c.p.F....V..s.l..K..V..........D......R....E....E.....R.............P....D....l.........D..............p.....l.Y...........M....s.........h.....s...............p.....s..........h....o......G..p....G..................GW.Ph.olFL...TP...-...t.c.......P....Fa..u.G..T..Y..aP.p.........sph........sh.................................su.........F.hpl.L...p.tltphWpp..c..+pclhppupplhptl.p............................................................................................... 0 217 426 530 +1958 PF03192 DUF257 Pyrococcus protein of unknown function, DUF257 Mifsud W anon Pfam-B_2788 (release 6.5) Family \N 25.00 25.00 25.90 29.10 23.40 22.10 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.13 0.70 -5.03 28 91 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 14 6 77 103 0 204.40 25 94.90 CHANGED lp-hl.cph+hGEol...LlEYsS..shhchlhatl.lphh+-c...shsllIsDlhDoL.lhh........pplclhGlcsslh..csspVIKlG.GphshGNVlt+l..t.-.shah...ppYtptlpclht...pcs..hIsIlLGlE+lh.hh.pshh-hhhllptltpa.lsscc+..puhYFlNpsllcphsss.hl.hLEEluosV..lclspc...t....hplhKuhp.th.Gh...plpl ........h..chl.pph+hGEsV...LlEYsS.hshsclhhhtl.lphhpcc...shsllIsDlhDohhlhh........ppLchhGlcssh...cslpVIKlG..GphphGsl...lt+l.h.t-.shah...ppYpphlpclhp...pcp...hlslllGl-+lh.hht.psht-h.hhhlps.ltph..lGsccR..huhYFlNpsllpph..p.hlshLEEluosV..lclppc......sp.hhhplhKu.p.p..s..ph..h....................... 0 4 5 43 +1959 PF03193 DUF258 Protein of unknown function, DUF258 Mifsud W anon Pfam-B_2832 (release 6.5) Family \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.98 0.71 -5.05 74 4240 2012-10-05 12:31:08 2003-04-07 12:59:11 11 13 3686 8 935 12978 6178 158.30 36 49.24 CHANGED hcphhpthcslGYp.lhhhssp..sspulcpLpshLps.+hslhsGpSGVGKSoLlNtLhs...ph........................................phcTuplSpp.s+G+HTTspscLhtl....ssGGhllDTPGhpphslh..hstcplspsFsEhpphh....tpC+F+sCpH...tsEP..uCAVcpAl-pGplst.RYpsYhcllc ......................................................h....ht.Ypp..lGYp...lh..h..susp.........ptpu....l....c....t....L..........p....p....h...L....s.......s......+.......l....o......l...h..s.Gp..SG...VGKSo.L...l....NtLhs.....p..h...........................................................................................................................................p..h..p...T..u....-..l....S.....c......s.........u...+......G......+......H........T......T....s......p....s...c...L..h...p..l.....................p..G....G......h.....l....l...DTP..G..h....p..ph...s...lh..c...l.p...s.......-.p....l.spsFs..E..h...pphh..........tp.C+...F..R...s.CpH...t.p.E..P.......u..CAl....+..p..A.l.-....p.........G....p.....I...s...p...p...RacsYhplh.p........................................................................................................................................................................ 0 336 638 814 +1960 PF03196 DUF261 Protein of unknown function, DUF261 Mifsud W anon Pfam-B_2687 (release 6.5) Family \N 26.10 26.10 26.30 26.40 22.40 26.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.68 0.71 -4.19 8 189 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 32 0 15 139 0 137.30 62 92.16 CHANGED M.l.KhpppNt.....Ip+aGCYFLClhahh.lhKphc......Fssh-IslsYp+FluLGYI+.sNCaIlNPCtILuhaGIcocVRaESh....sYlssp.sEFEIoElKlcssshhHFlssss..pcVLYDS.LsLK.pGppaplsS+RlF+hp ..........MhIsKIKQsN+sLh.EIQKWGCYFLCLHYYsSlFKphE......FsAaEINsAYhRFlGLGYIK..SNCFIlNPCMILNYYGIRSSVRY...ESh.....sYLuAA.....NEFEISEVK...IccVN.GYHFIATKN..KEILYDS.LDLKs+GKlFKVTSKRIF+l................. 0 11 11 11 +1961 PF03235 DUF262 Protein of unknown function DUF262 Bateman A anon Pfam-B_3462 (release 6.5) Family \N 29.30 29.30 29.40 29.40 29.20 29.20 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.80 0.70 -4.38 194 2983 2012-10-01 20:12:50 2003-04-07 12:59:11 9 17 1749 0 669 2580 315 195.70 18 37.50 CHANGED lppl..hsphp............................h...hlP..p............aQRsYs.W....ppcp.....hppLlcsl.....................................psah........lGsllh............................................ttt..tsphtllDGQQRLTolhh..............hhts........................................................................................................................................................t..hhtphhpttt..............................................pphppphhpshphhpp..................................................................................hh.h.h..................ps...shplFp...plNspGh....tLssh-..lh+shhh ...................................................................h.....htt......................h..hlP.t............aQR.sYs.W.......spcp.................hppLlcsl..............................................h.psa....lGsllh.............................................ppp......psph.llDGQQ....RLTolhh...........hlts....................................................................................................................................................................................................h...t.h....h.p...t.t..................................................................................................................p.t.tpphhp.h.phhtp..........................................................................................................................h.h...h.h...........................ts...s.....plFp....phN.stGh.Ls..-..lhpshh.h............................................................................................................................................................................................................................................................. 0 249 483 593 +1962 PF03237 Terminase_6 DUF264; Terminase-like family Bateman A anon Pfam-B_3575 (release 6.5) Family This family represents a group of terminase proteins. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.95 0.70 -5.70 41 3063 2012-10-05 12:31:08 2003-04-07 12:59:11 10 17 2033 7 507 3449 4695 362.10 15 76.55 CHANGED hhhtuppsGKTahhuhphhtpshtss......................psplhlu.sspsphph..............htthhhphspp.hhphphp........tp.hhh...........ssGup.ltahuhptt....stpuhpG.......t..hlahDEhhhhsc.thhpthtpshssptchh.........hhossssst+.hhshhssth.sp.................................................pth.hshp..................hshtDshptu.s.hh....................cplttphsspt..hpphhhupass.............ssuulFphtphphthstt..............s.tttphh........................huhDsu............tssDssuhhlh..............thssspahhlttpptpshshsthsstlpphhp...phssphlsl-sss.....sGpulhphlppth.................hshphs...........csKtshshthpsll...csu+lphsps..........phtshhpslcphhssss.........p..tt+sDhhsAhthAlhpt .................................................................................................................................................................................................................................tuht.G.tT.h....h.h....s.....h........h....h....h....h....h..tpt.............................pthhhs..s.s..tt.p.ht...........................................hh...h.phh.....t...p............h..p.h.thp......................tp.hhh............................sGs.....p...l....hh.h.us...p....t.t...........shp.s.h...p..G.........................................hl...h.h.DE....h...hh.h............p.............t...............p......t......h....t....p...h....h...h...t....h......t...t..hh................................hho.s.s.s....s..............t....p......h...h............h...h.p..t...t.h.sp.........................................................................................h..hp...........................................h.sh.ts.s..h.....t..h.s.tp..hh......................................pp..l..t..t..p..h.s.spt........hpp...h..h..s..p..hhp............................stu..s..l...a.s..h...t..t..h..p...t..t...h..h.tt..............................................................s..s...t...t.hh.........................................huhD.u..................tss.D.ts.s.hhlh........................................h......ss......p..h.....h..h..l....t..t..h.........t........h.......p.........t......h......s.............t....t....t......p.....hl..t..p..hht..................ph.t.s..t..h.l.h.........h..-sps......................hu.tt.hh..p..lpp..h.............................h.h.h.s.................................ps+...t...h..h......t....htshh..........pt..tp.l..hst.............................h.t.h..t..lt.t..h...h.s.t.........................tp....s...Dhh.uhhhsh........................................................................................................................................................................................................... 0 161 327 421 +1963 PF03266 NTPase_1 DUF265; NTPase Mifsud W anon Pfam-B_4081 (release 6.5) Family This domain is found across all species from bacteria to human, and the function was determined first in a hyperthermophilic bacterium to be an NTPase [1]. The structure of one member-sequence represents a variation of the RecA fold, and implies that the function might be that of a DNA/RNA modifying enzyme [2]. The sequence carries both a Walker A and Walker B motif which together are characteristic of ATPases or GTPases. The protein exhibits an increased expression profile in human liver cholangiocarcinoma when compared to normal tissue [3]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.03 0.71 -4.54 38 349 2012-10-05 12:31:08 2003-04-07 12:59:11 10 9 303 2 181 414 99 157.30 29 79.34 CHANGED +lhlTG.PGlGKTTLlpKlhctLppp.shplsGFhTtEVR..csGpRlGFcllslsoGccuhLA+ss.........tsts..+VG+YsVslps...h-plulsslcpt...ppsD...lllIDEIGsMELpopsFhpslcplLs.usps...lluslHcp.......llccl+pp.....sclap....lTpcN.................Rs...tlhpcll .....................................lhlTG.s.G.lGKTTll.p+.....lh.ptLp..tt...sh..t..........ls....GFh.T..p..E....lR........pt..G..pR..h.GFcll..s...l.....s.Gc..c.s..Luphp..................tt.pl.GpY.s.V.p.lps...........a-pl.ulshLpp.h.........pssc.................llllDEIGtMEl..h..upt......Fhp...sl.....pph....Lp....ssps......llusl.ht..........hlppl+p.p.s...splhp....l...s.pN...Rs.l..pl......................................................................................................................... 0 69 108 142 +1965 PF03270 DUF269 Protein of unknown function, DUF269 Mifsud W anon Pfam-B_4172 (release 6.5) Family Members of this family may be involved in nitrogen fixation, since they are found within nitrogen fixation operons. 25.00 25.00 28.20 30.50 24.10 20.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.36 0.71 -4.11 41 182 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 157 3 85 173 7 121.40 47 78.61 CHANGED a-shoDpplLsPaIloKEpRRpIPlhGDPDPsTlhRlchFYpAlAhsIEccTGhhssshhc..lsHEGFGRsllhsGRLVVls+sLR.DlHRFGF-SlpKLA-cGpKhlssulchIccaP-V.Ach ......W-scoDtc.LL.s.PaIloKEpRRp...IPllGDPDPps.lhRlchFYsAVulsIE+pTGlhssPhhc..hsHEGFGRhllhsGRLlVlsKpLR.DVHRFGF-oLpKLA-cGsKhVssulchIcpaP-VAp............ 0 21 54 68 +1966 PF03189 Otopetrin DUF270; Otopetrin Mifsud W anon Pfam-B_2323 (release 6.5) Family \N 22.80 22.80 22.90 22.90 22.70 22.40 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.45 0.70 -5.48 12 606 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 83 0 443 578 1 212.80 17 64.99 CHANGED YLRlGAlsFGlGshVY.GlEh...F.h.hsssCpslhluls.hhthlFshhQMpFIFhN..uclshtRa+hlARFGLMHhVAsNLshWhphlltEs...phEIhphtp.t...stpsshhpssptsstthsts.tttc........................................hh.th................................................................................................................................................................................hoppsss.......................................................th.hpstRhphhsshltssusaLasshlEYSLIsAslhahhWKplt.................h....tsshs+ps+phplDCsssppGlFhGIllllholIsllhaalLhpcst.phhAs..lsls-hlhaslshhA...llhuhhp..MRsLca......ppptcu.sLDsILLllu.sG.hlYushullushhshhpsp..sshlsllstlhpllQsslQolFILpuu+p+stusppsRspPG+pIlTFLLlsNluhahhpThEpt+u..uFpshhh-aYGhhsWollh+lohPLsIFYRF ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 124 164 306 +1967 PF03407 Nucleotid_trans DUF271; Nucleotide-diphospho-sugar transferase Bateman A anon Pfam-B_4460 (release 6.6) Family Proteins in this family have been been predicted to be nucleotide-diphospho-sugar transferases. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.26 0.70 -4.56 49 713 2012-10-03 05:28:31 2003-04-07 12:59:11 11 28 132 0 514 739 97 195.10 17 45.37 CHANGED thlpphlVlAhDppshpts.pphtss.hhhh.h...........shs.tsphhtstsahphhhh+hplhpplLchGhshlhsDsDshWhcsPhshh.................ssDlhhss.Dthssp.spthpphh....................NsGhhal+uospohphhcphhpphtp..ss..........DQslhshhhpp.hh...........................hshphphLstshF.........shFhp...pp.pthps..............hhlHsshph...shp.....sKhpch ...................................................................................................................t...pphllhshDttshphh....t......th.t........h.hhh....................................tt.hhhst..sahp.hh..............h....h+hp.hl.t.plL...p..........h...G...a...s....hlh.sDsDlh..a..h..c...s..P.hs.hh...............................ssDl..hhss..Dth.t.s..t.......t.tphh.............................................................NsGhhah+s.ss...tsh..p.hhcpWh...p....th..tp.........st........................DQss.hsthht.t.........................................................................................hthphthl.sh.....hh................hh.t...........h.t.....................hp.sh.......t.......tKh........................................................................................................................................................... 0 210 362 473 +1968 PF03314 DUF273 Protein of unknown function, DUF273 Mifsud W anon Pfam-B_3636 (release 6.5) Family \N 30.00 30.00 30.20 30.20 29.20 29.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.72 0.70 -5.04 12 87 2012-10-03 05:28:31 2003-04-07 12:59:11 9 7 10 0 85 68 5 176.00 35 55.45 CHANGED l+CYsKhHsYchllshDs-....ap..Csp.KDphFRRHCllAKlLssaD..slLFLDADhGVVNPpR+IEEal..ccslDIsFasRFhNWEItuGsYlsRNTpaAlshLpcFAsYE.+LPpSaHGoDNGAlHhaluE+lhPpso.clchC+.Khacp....SpsapDLaTYEuCI+slhGssscFs.KlRIL+K...............GT.GWARDGWLTs.hWpsch.DFMlHGWKsspLh.hPp..lts.ph .......................................hpCY.t..tY.h.hh..ttt.................ht...C..p....p..........................hF+RHChh....uthL......p........p....hlhhlDuDhullNPpp..................hlE-al..............p.th.DlhhYsR..a..........h..........NaE....lhuGSYls+NT.auhphLpc.aAs.a.E...pLPp.u.hH.GoDNGAlH...............hhlhchhhPp...p......h......p..C..phapt....otshtshhsapsCh+.hhGttp.as..cl+IhpK...............Gp..uWsRD.WLTsshWs.p..DFMhHuhKppp.................................................................. 0 32 41 85 +1970 PF03434 DUF276 DUF276 Finn RD anon Pfam-B_4450 (release 6.6) Family This family is specific to Borrelia burgdorferi. The protein is encoded on extra-chromosomal DNA.\ This domain has no known function. 25.00 25.00 30.20 30.20 20.50 19.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.74 0.70 -5.44 3 157 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 28 0 12 138 0 260.50 73 99.62 CHANGED MSIVFDSDFGILKRTI+DIVRoKREYLRVNYGINIDDNpSSIYNIIASSLALIEEEIINELNLFFSKMKPGGSYWAAIEEHISS.KSTTYSAVRNALLNL-GIEHsNIKSuAGKANIYLILKEDLLsTsKoNINsPEFKAKlWETLYLTTPSGTLLEGDIEIDGLNSTGQ+KSYKISLGKRKYVYMKVKYKLDLKNYLYLNIDSQIRDIYSRIISNNYhDMGISFEYQDFFAPVNEVKGIKFMEIuICIKDTDTESIoKISDSDFppNQDISIsDDTILLFNTT-RLLIDhD ....MSIVFDSDFGILKRTIKDIVRsKREYLRVNYGINIDDNpSSIYNIIASSLALIEEEIIsELNLFFSKMKPGGTYWAAIEEHISS.KSTTYSAVRsALLNL-GlEasNIKSuAGKANIYLILKEsLLDssKoNINssEFKAKLWETLYLTTPSGTLLEGDIEIDGLNSTGQ+KSYKISLGKRKYVYMKVKYKLDLKNYLYLNIDSQIRDIYSRIISNNYSDMGISFEYQDFFAPVNEVKGIKFMEIusCIKDTDTESIoKIsDSDFppNpDIsIsDDThLLFN.TTDRLLIDh................ 0 8 8 8 +1971 PF03353 Lin-8 DUF278; Lin-8_Ec; Lin-8_Ce; Ras-mediated vulval-induction antagonist Finn RD, Pollington J anon Pfam-B_3924 (release 6.5) Family LIN-8 is a nuclear protein, present at the sites of transcriptional repressor complexes, which interacts with LIN-35 Rb.Lin35 Rb is a product of the class B synMuv gene lin-35 which silences genes required for vulval specification through chromatin modification and remodelling [1]. The biological role of the interaction has not yet been determined however predictions have been made. The interaction shows that class A synMuv genes control vulval induction through the transcriptional regulation of gene expression. LIN-8 normally functions as part of a protein complex however when the complex is absent, other family members can partially replace LIN-8 activity [1]. 25.00 25.00 25.30 25.30 23.00 24.00 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.15 0.70 -5.14 14 108 2009-09-11 05:18:49 2003-04-07 12:59:11 10 17 4 0 106 110 0 233.90 14 66.63 CHANGED lohp-Yhphpppphhphsst...cstlKKVlLshlEcpPphWp..pssphttccWptlG.................................sEVacRTGpl........................lpssplpphappuKssL+p+L+psIhpK+hs+tssEtcLhcWEaYsah+YYRcsLtpaEApLRsc........hptptp.spscD-Ihh-shhp.......p.ppthEpsssss-h.s.Es.ht..sph.........ppsshpcsptsppshs..thsts.p.s............................sps.sushppsSpppps...........htsspp-ss...................sppIs.QspRLhpphPE+s+LlRcsLFcTllAh--t..-apssu-lFpDL.....At.psh++ppR .........................................................................................................................................p..hth.....hlht.lpphsthht..pt.p.h..p..at.lu.................................hphatRTG.h..........................................hpht.lpphappuKptLpp+lp.hl..p.c.p.hs......tphEppL.hp.W.hY..h+aaRp....h.tphEtthptp.......................................................................................................................................................................................................................................................................................................................................................................................pp........................................................................................................................................................................ 0 19 19 106 +1972 PF01709 Transcrip_reg DUF28; Transcriptional regulator Bashton M, Bateman A, Eberhardt R anon Pfam-B_1741 (release 4.1) Family This is a family of transcriptional regulators. In mammals, it activates the transcription of mitochondrially-encoded COX1 [2]. In bacteria, it negatively regulates the quorum-sensing response regulator by binding to its promoter region [3]. 27.00 27.00 27.80 30.60 26.90 26.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.54 0.70 -5.23 257 5704 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 4795 3 1221 3365 2823 228.30 42 95.40 CHANGED SKWsNIK++KutpDu+RuKlFoKlu+EIhlAAKt.G.G.sDPssNspLRsAlp+AKssNhPpDsI-RAIcKu.sG...t.....sussa-ElpY.....EGYGPuG.lAllV-slTDN+NRTsu-VRpsFoKs.GGsLGpsGSVuahFc++Ghlhhs.............tshsEDplh....-..ssl-....A.GA-Dl.................p.........s..-....-..............st...h..plhosssshtsVppuLcp.t....Gh...php.su-lshlP.psplpl...ss.-.......sspph.+LlDtLED.DDVQsVYsNh- .................................pKWsNIcp+Kss......pDup+uKlasK.................hs+EIhlAAK..........t.G..G..sDPcsNspLRhsIc+AKpss.hP+-sI-RAIc+u.sGs........sspsacplpY.....EGaGPu.G....sA.llV-sLTDNpNRTsu.-VRssFsK..............s..G..G..N...lGss.GSVu.a.h.F-+pG.lIshp......................utDt..D.plh....E.tslE....A..Gs.-.DV..............p.........p.-....-............................ss.....h.pl.ho..ss.p-ht...pVpcALc..s...s........Gl...ch..p..suElphl..P..ps.p.spl..........ss.E...stpph.+LlDtLE.DpD.D.VQpVYpNh..................... 0 405 775 1031 +1974 PF03436 DUF281 Domain of unknown function (DUF281) Finn RD, Bateman A anon Pfam-B_4313 (release 6.6) Family This family of worm domain has no known function. The boundaries of the presumed domain are rather uncertain. 25.00 25.00 25.80 25.20 24.90 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.38 0.72 -3.91 10 42 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 4 0 39 43 0 58.60 30 28.03 CHANGED sToDGCophtVpCshNsuh.Cs.............sstLhA-psuGshsssGossushAEuolTCQcDssW.DS ..........tsG.CsphtVpCphssuh..Cs.............sstLhhptsuGt.shsGss.su.p.u.psolTCssDuha...s... 0 20 20 39 +1975 PF03383 Serpentine_r_xa DUF286; Caenorhabditis serpentine receptor-like protein, class xa Mifsud W, Fenech M anon Pfam-B_2888 (release 6.6) Family This family contains various Caenorhabditis proteins, some of which are annotated as being serpentine receptors, mainly of the xa class. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.46 7 69 2012-10-03 04:04:29 2003-04-07 12:59:11 10 2 5 0 69 113 0 139.20 32 47.97 CHANGED phPhlYIhsMslsGllsKls..hhlDhh..s.lh.PstsYtpYRphIGppl.TllsThsYhhPhaLshLMThpRh.IlhpPhcp..hFosp+lhlYshhlhIlshllLLIPahSpCslNFNAps..apsACAPc+HPlThhQNpaLIhlPhsshllNs ..................................hPhsYIhhMhhssh.l.phh....hh..hshh.................h.hh.hs..p......ttY................t..............t...............a.h.......p..h..hGp.h.TlhsohsYhhshhlslLMolpRlh.llhpPhsp..hFosp+lah.YC.h..h.ls...............l.....hsh.h......Ll.IPahSsC.lsFshhphsahosCuP.p+H..............P...lT.hapNpYh.....IhlPhsshhlN............................................... 0 16 20 69 +1976 PF03384 DUF287 Drosophila protein of unknown function, DUF287 Mifsud W anon Pfam-B_2926 (release 6.6) Family \N 19.30 19.30 21.30 21.30 19.10 17.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.64 0.72 -3.78 13 34 2009-09-11 13:33:43 2003-04-07 12:59:11 9 3 3 0 2 34 0 49.60 42 8.73 CHANGED IsSIlpPs.-EKhLLccIh-spsscDDlshhDllVDuWccRLltEcKpIaacsla ..............IpSILtPsh-E+.hLpclh-c.pVDDsls.lD.llDSWccRLlsEcKpIaacsla.......... 0 0 0 0 +1977 PF03385 DUF288 Protein of unknown function, DUF288 Mifsud W anon Pfam-B_3134 (release 6.6) Family \N 19.90 19.90 23.10 20.80 19.80 19.00 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.20 0.70 -6.00 8 118 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 38 0 100 113 35 184.50 29 31.56 CHANGED RTTDIWRSFISQKILHLSGLTVSFVPTNAVQFRNAHsYLKDFKDEKQVYEDSGKMIEFLHNWpCospNSo.lEsCIppLlNDLVKVKLWGc-DApLMEMFLsDLKsMGFEFPpLls.sNah-PYuPSpNEToRDVNCRRMHLEF-Ll-P+Kp..sEsl++ApQKLNYFGDIlsWCNETGYSslospFPSPcQLA+pH-cSYVhQKchNSVLIVVNNYPWKYGMGLIQRLYQPYFATlIFCGSWYPEpFoslDNFTSTlaPINYIHMNPAEIcKGaFAYHCVTLVKELtLsNVpGYFLMuDDsVFNIWQRIDYSRVHHLsG.ShNhpNsWastspaGhpAAK+IlchVKsSTDsKlt-TWpKFDsGLpKauYhN.T-suEspMpSshGKSlSDF .......................sDlhRuahuQ+lLa...G.hhlsFhP.ss.sh.phRssHs.hhhsFp-Ecpla.psGcll......cFL.pWp....st......l........cplhcLshshscpshWsppDhphhthaLpDLhslGap.Pplht........................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 48 63 94 +1978 PF01062 Bestrophin Worm_family_8;DUF289; Bestrophin, RFP-TM, chloride channel Bateman A, Moxon SJ anon [1] Family Bestrophin is a 68-kDa basolateral plasma membrane protein expressed in retinal pigment epithelial cells (RPE). It is encoded by the VMD2 gene, which is mutated in Best macular dystrophy, a disease characterised by a depressed light peak in the electrooculogram [1]. VMD2 encodes a 585-amino acid protein with an approximate mass of 68 kDa which has been designated bestrophin. Bestrophin shares homology with the Caenorhabditis elegans RFP gene family, named for the presence of a conserved arginine (R), phenylalanine (F), proline (P), amino acid sequence motif. Bestrophin is a plasma membrane protein, localised to the basolateral surface of RPE cells consistent with a role for bestrophin in the generation or regulation of the EOG light peak. Bestrophin and other RFP family members represent a new class of chloride channels, indicating a direct role for bestrophin in generating the light peak [1]. The VMD2 gene underlying Best disease was shown to represent the first human member of the RFP-TM protein family. More than 97% of the disease-causing mutations are located in the N-terminal RFP-TM domain implying important functional properties [2]. The bestrophins are four-pass transmembrane chloride-channel proteins [3], and the RFP-TM or bestrophin domain extends from the N-terminus through approximately 350 amino acids and contains all of the TM domains as well as nearly all reported disease causing mutations [4]. Interestingly, the RFP motif is not conserved evolutionarily back beyond Metazoa, neither is it in plant members. 24.20 23.40 24.60 23.60 22.80 23.30 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.74 0.70 -5.47 123 2221 2009-11-19 14:00:48 2003-04-07 12:59:11 16 18 1111 0 1116 1855 146 255.60 24 74.64 CHANGED Mh..........Vpsp................shhclLhph+GSlhptlh........hcllhhhhhshllsh....hh..........................hahhhh..hhphshs.s..lls.sLulhLGFpssssYsRWWEuRphhuthlshsRslsppltsh...........lps.....ssp.............ctchl+ctlu...............asthhptt.LRshsstsc.....hpphl.....ppptp.tlp...st...pts.....shhlhhhhuppl.tps.tcpGp.hssh..........hhttltppLsphpsshuss-RlctTPlPhsYohllphslahahlhlshuh.p.........................slshhs.P.lhoslhsahahGhtplucpL.sPFGp-s.sDlsLstl...I-psltp ...................................................................................................................h.h.hh.h.p...u.o.lh......h..lh..........plhh.hhh.....hhlhh.h...................................................h...hh...hh.p.h..shh....s..........l.ls.....hluh....hL.u..F+ss..ss....as.R............ah-uRphhs.hh.tscslhp.h....hsh..............l.s...ptt..............................................htchh.+h.ls...............as.hhhph...h....L.....R.p....s.t........................................................ht.phl...............tpp.t...h.........tt.....tp.........s.....hlhhhh.t.t........l.....ttt....h...p......p..Gp...hs...sh.........................hh.h.pl..p.........plsphpshhush-+lt.sPlPhsYsh..............hlphslahahhhhPhuhh.............................................slt.hh..s...P.hhosl.hsaha....hu..htt...lupp.ltsPF.G.p........c.s..s..D...l.hstl...I-hsl......................................................................... 0 469 658 954 +1979 PF01724 DUF29 Domain of unknown function DUF29 Bashton M, Bateman A anon Pfam-B_2003 (release 4.1) Family This family consists of various hypothetical proteins from cyanobacteria, none of which are functionally described. The aligned region is approximately 120-140 amino acids long corresponding to almost the entire length of the proteins in the family. Swiss:Q2RPE2, PDB:3fcn, is a small protein that has a novel all-alpha fold. The N-terminal helical hairpin is likely to function as a dimerisation module. This protein is a member of PFam family PF01724. The function of this protein is unknown. One protein sequence contains a fusion of this protein and a DnaB domain, suggesting a possible role in DNA helicase activity (hypothetical). Dali hits have low Z and high rmsd, suggesting probably only topological similarities (not functional relevance) (details derived from TOPSAN). The family has several highly conserved sequence motifs, including YD/ExD, DxxNVxEEIE, and CPY/F/W, as well as conserved tryptophans. 21.60 21.60 22.20 22.10 21.50 21.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.81 0.71 -4.34 89 805 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 164 1 289 1041 50 128.00 32 89.81 CHANGED LY-pDahhWhpppsphL+p.....tch.spLDhpNLlEElEslG+p-+.cplpSpLtlLlhHLLKapaQ...s..p+pspSWpsoIpppRpcIpchLccsP.SLcsaLspt..hp....csYpcAhc.AtpEotlsh.........ptFPtpCPa..sh-plLspsahP ................hY-pDa.hWhppphphL+p........tph.splDhc.sLlEElEshG+p-+...cplpShLthLlhHLLKhpa...........pp.pspuWptsIpptRpplpch.Lpc.oP......S.L+.s.h..ltph.....hp.....csYpc.Ahp.utpcotls................t.FP...pp...sPa..sh.-.plLstpah............................. 0 57 198 269 +1980 PF03442 CBM_X2 DUF291; Carbohydrate binding domain X2 Bateman A, Eberhardt R anon Bateman A Domain This domain binds to cellulose and to bacterial cell walls. It is found in glycosyl hydrolases and in scaffolding proteins of cellulosomes (multiprotein glycosyl hydrolase complexes). In the cellulosome it may aid cellulose degradation by anchoring the cellulosome to the bacterial cell wall and by binding it to its substrate [1]. This domain has an Ig-like fold [2]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.94 0.72 -4.15 41 261 2009-09-13 17:41:48 2003-04-07 12:59:11 9 72 123 1 115 262 11 84.50 29 12.79 CHANGED oIsPoshsFcKsss..sDhslshshN.GNTLsu..lps.ssssLspGoDYol..SG.sslTlppuYLusl......ss...ussoLsFsFusGs...sssLsl ........................hsssshsFc+....ps..s...........s.....Dlslsl..shN..GN.o.lsu............lp...s..sspsLspGsD....YTl.....uG..ss....lTlppuYLusl........ss.....usssLohpFssGs...s.phpl............................ 0 69 95 105 +1981 PF03398 Ist1 DUF292; Regulator of Vps4 activity in the MVB pathway Mifsud W, Coggill P anon Pfam-B_3833 (release 6.6) Family ESCRT-I, -II, and -III are endosomal sorting complexes required for transporting proteins and carry out cargo sorting and vesicle formation in the multivesicular bodies, MVBs, pathway. These complexes are transiently recruited from the cytoplasm to the endosomal membrane where they bind transmembrane proteins previously marked for degradation by mono-ubiquitination. Assembly of ESCRT-III, a complex composed of at least four subunits (Vps2, Vps24, Vps20, Snf7), is intimately linked with MVB vesicle formation, its disassembly being an essential step in the MVB vesicle formation, a reaction that is carried out by Vps4, an AAA-type ATPase. The family Ist1 is a regulator of Vps4 activity; by interacting with Did2 and Vps4, Ist1 appears to regulate the recruitment and oligomerisation of Vps4. Together Ist1, Did2, and Vta1 form a network of interconnected regulatory proteins that modulate Vps4 activity, thereby regulating the flow of cargo through the MVB pathway [1]. 32.50 32.50 33.40 33.00 32.30 32.00 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.90 0.71 -4.84 44 593 2009-09-13 16:20:19 2003-04-07 12:59:11 9 15 281 8 399 584 6 152.10 33 39.47 CHANGED Kot.L+hulsRL+hlppK+pshs+ptR+-lApLLptG+p.................pp.A+lRVEplI+--hhlEshEllElYCELLlsRlsllpp.................ppsssslcEAlsollaAusRh.sEl.ELppl+shhspKa.G+-Fsttshs.t...sssVsp+llcKLs.spsPspchh.thLpEIAcpaslsa .......................................+s.L+hslsRl+hl.ppK+...........p.....thspptR+-lAph.LpsG..+p............................................................ppA+lR....V.........EplI+--.hlpshEllElaC-LllsRhsllpp.................................................cphssslpE...Aluo..llaAusRh.s-ls...ELptl+...shhstKY.G+-Fstt.....shp.t.....tstVspc.lhcKLu.s........psPstthh.phLtEIAcpasl.a.................................... 0 110 222 321 +1982 PF03444 HrcA_DNA-bdg DUF293; Winged helix-turn-helix transcription repressor, HrcA DNA-binding Bateman A anon Bateman A Family This domain is always found with a pair of CBS domains Pfam:PF00571. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.48 0.72 -4.54 13 1056 2012-10-04 14:01:12 2003-04-07 12:59:11 10 4 1032 0 280 1296 281 76.20 43 22.68 CHANGED pLTslQ+-ILpsLIsLYccp.spslKGcEIA-hlsRNPGTlRNQMQuLKuLGLV-GVPGP+GGYhPTscAY-sLslps.s .................................lTpRQppILphII-h.Y..t.po...tcPV.G.S+sL..t..c..p..l..s......S..s....A....TIRN-MucLEchG.L.l.cp......ps.SuG.....RhPo.tuh........hsssh............................ 0 77 176 236 +1983 PF03445 DUF294 Putative nucleotidyltransferase DUF294 Bateman A anon Bateman A Family This domain is found associated with Pfam:PF00571. This region is uncharacterised, however it seems to be similar to Pfam:PF01909, conserving the DXD motif.\ \ This strongly suggests that members of this family are also nucleotidyltransferases (Bateman A pers. obs.). 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.58 0.71 -4.56 86 1219 2012-10-02 22:47:23 2003-04-07 12:59:11 8 171 696 0 561 1623 276 130.00 27 18.27 CHANGED pplsplhtslhtpG....spsptlspllsplsDtlhcRllpLshtpht......s..sssasalshGSpGRtEQhltTDQDNuLlhsD...........tsttptaatphupclspsLspsGashCsGslMAsNPpWptslspWcpphppWlppPs...sc ...................................p....thh.thhtps.h.p.sch..lsp.h...lutl...sc...t...hh...p+ll.c..L..s....tphus...............s..PssashlshGSpGRpEQhlhoDQDNAlllp-.......................tsptpttaFtpluchl...spsLtph.....GashCsGphMssNspWphshptWpphhtpWlttsp......................................... 0 301 440 518 +1984 PF03479 DUF296 Domain of unknown function (DUF296) Bateman A, Dlakic M anon Pfam-B_796 (release 7.0) & Dlakic M Domain This putative domain is found in proteins that contain AT-hook motifs Pfam:PF02178, which strongly suggests a DNA-binding function for the proteins as a whole. There are three highly conserved histidine residues, eg at 117, 119 and 133 in Swiss:Q46QL5, which should be a structurally conserved metal-binding unit, based on structural comparison with known metal-binding structures. The proteins should work as trimers. 25.00 25.00 25.80 25.00 24.60 24.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.39 0.71 -4.49 113 1687 2009-11-26 14:38:53 2003-04-07 12:59:11 10 15 1088 11 617 1176 262 116.50 29 57.50 CHANGED hcsallclssGp.................Dlhpslpp...aupccshtsuhl.ouhG.ulsssslph..........s.......tp..h..sshsh.cuphEIlSLsGolh..........s.t.tsshsHLHlslus.spGplhGGplh.Gsl.hssu-lhlhthshtthpp ..................................h.p.aslcltsGp.................Dlhpplts....Fup...p...p.p.l.p.s.shl.uusG.ulssl..sL+h...............s............................sp...tsshpl....pG.p.aEllSLsGolt.....................s.st.....hH.LHlsl.....u...s...s.p.........G...p......s..lGGHl...h.......G..sl..tsssElhlhshs.h....h........................... 0 140 390 516 +1985 PF03537 Glyco_hydro_114 DUF297; Glyco_hydro_114; GHL7; Glycoside-hydrolase family GH114 Griffiths-Jones SR anon PRINTS Family This family is recognised as a glycosyl-hydrolase family, number 114. It is endo-alpha-1,4-polygalactosaminidase, a rare enzyme. It is proposed to be TIM-barrel, the most common structure amongst the catalytic domains of glycosyl-hydrolases [1]. 27.00 27.00 27.10 27.00 26.30 26.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.46 0.72 -4.19 128 403 2012-10-03 05:44:19 2003-04-07 12:59:11 8 12 334 6 208 401 21 78.40 24 25.17 CHANGED ssshhpP..sss......ssWpaQLs.........us..........h.sss...........sss......slhslDhh.c.............ss.....tttltt.L..+sp...G....+tlICYhSuGshEs......aR....s.D.t.....s....p....hsts .................................t.........t.s......tsath.Ls..........st..........h.pss.................ssh...clhslDhh.p........................hs.....tppltt...L..+sp..G..+hVlsYlssGshEs....aRs..h....pp.a...s................ 0 87 139 182 +1986 PF03618 Kinase-PPPase DUF299; Kinase/pyrophosphorylase Finn RD, Eberhardt R anon Pfam-B_3403 (release 7.0) Family This family of regulatory proteins has ADP-dependent kinase and inorganic phosphate-dependent pyrophosphorylase activity [1-3]. 22.10 22.10 22.40 23.20 21.00 19.90 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.63 0.70 -4.89 121 2713 2009-09-10 22:59:03 2003-04-07 12:59:11 9 2 2465 0 532 1496 1541 253.70 38 92.63 CHANGED lallSDuTGtTA-slu+AsluQF.......shp..h.phpphPalcspcplpcllpplpp...psu......llhaTlVssclpphlpppspthth.tlDlhsshlsslpphLGh.........ps.p....tpsGthHplscs....YacRI-AlpFulpaDDG.psscslpcADllLlGVSRouKTPsSlYLAh.pGlKsANlPLls...-.......splPcpLhph...ppKlhGLTIsP-RLtpIRppRhhsht...................upYushcplppElphAcplac+..uhshI-sTp+SIEEsAstIl .....................lahlSDuoG.TAEhlu+AshuQF................................s..sp.....h..phhph..Palcsppchcp.llpp.lpp........ppu........lVhaTlVps-l+phlt.p.u.p.t.h....shDlhpslls.lppphth........................cP.p.....ps.u.tsH..t..L..sss....YapRIsAI-FultaDDGt.s.s.+.s.ltcADllLlGVSRouKTPhSlYLAh.hGl+sANhPlls...-.......lslPspLh..th.......pcK.lhGLTIsP-+LspIRc.ERhps..................................opYAshcphppEls.s-plhc+..shshIssTspSlEEhAspIl................ 0 164 320 430 +1989 PF03625 DUF302 Domain of unknown function DUF302 Yeats C anon Yeats C Domain Domain is found in an undescribed set of proteins. Normally occurs uniquely within a sequence, but is found as a tandem repeat (Swiss:Q9X8B8). Shows interesting phylogenetic distribution with majority of examples in bacteria and archaea, but also in in D.melanogaster (e.g. Swiss:Q9VA18). 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.08 0.72 -4.40 170 1152 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 862 4 381 987 109 63.90 27 39.52 CHANGED clDhptshcptshp...........hpshpllhhsNPphupphhpts..plGhhLPp.+llVhc......psGpshlshhcP ..............................lDhttshpptshp.....................htshpllhhsNPp.......hus.hhp..tc..sp..lul.LPh.+lllhct...........ssGpshlshhp.................... 0 118 258 338 +1990 PF03629 DUF303 Domain of unknown function (DUF303) Finn RD anon Pfam-B_3622 (release 7.0) Family Distribution of this domain seems limited to prokaryotes and viruses. 21.20 5.70 21.30 5.80 21.10 5.40 hmmbuild -o /dev/null --hand HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -12.04 0.70 -5.16 136 2378 2012-10-02 11:02:24 2003-04-07 12:59:11 13 60 784 11 391 2310 644 279.90 27 56.17 CHANGED G-VWLsoGQSNMp.....h.ht............shts.sp..............pphtps..sp.PplRhh...ph........t..s.p............s.tp.............spWp.sostss....t.t.....h......................oAsuaaFu+pLtpp.l..slPlGLIs.suhGGoslE.....sWhstp..sh.tt..th....t.htt............t.....................................................................................................................................................................................................ssLYNuMlpPlt.sh................sl+GslWYQGEuNs.....t.t..................pYtphhsslIpsWRppa........s.......ps..-hP..FhhlQLu...........sahtt.............................st.ths.lR-sQ.tpshtpl.....sNsuhs...............sshD.....h...up.psI...............HPpsKpplGcRLAhhAhph ...............................................................................................................................................................................................................................................................................................................VhlhuGQSNh...........................uhup.uh..................s-hhcu......sp..PpI+.l..................pp.shp.....................s...............tt..........spapshhPtst....s.c..................h...........................su..uh.a.hA+...cLhshl.............sss.lhLls.sshGGSuhp.....shspss.........th..ts..s..s......t.................t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................psLY..ps...hlsts+..uh.................slhull.WhQGE.Dh....sss........................pas...t...h...Fss...hlppaRt.-.h........................................s..........ts.......slP.ahhs.phs............ahppp....................................h...........................................................................................................................................................p.................................................................................... 0 191 307 348 +1991 PF03733 DUF307 Domain of unknown function (DUF307) Yeats C anon Yeats C Domain Domain occurs as one or more copies in a small family of putative membrane proteins. 21.20 21.20 22.60 21.60 20.70 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.56 0.72 -3.53 122 2967 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1534 0 655 1690 93 52.80 40 43.34 CHANGED hlhNllWhl.huG.halulualluullhslTIlG.......IPaGhtsa+lushsLhPFG+p ....hlhNIlWhl.hu.G.aahsLual.lu.ul.....ltsloIIG........IPh....Ghusa+lu.tluLhPaGpp......... 0 171 375 544 +1992 PF03729 DUF308 RUF1; Short repeat of unknown function (DUF308) Yeats C anon Yeats C Repeat Family of short repeats that occurs in a limited number of membrane proteins. It may divide further in short repeats of around 7-10 residues of the pattern G-#-X(2)-#(2)-X (#=hydrophobic). 25.00 15.00 25.00 15.00 24.90 14.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.70 0.72 -3.98 354 6171 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 1945 0 1223 3772 186 64.10 19 81.07 CHANGED lhGllhlllGllslhhPhhsshsl....s.hllGhhhllsGlhpl.h.tshp.....pph.t....shhhhllhGllhllhG.lhllh ............................ullhllhGll.hl.h.t..Phh..uhh...sl........s.hl....lu...h...h...hllsGlhpl..h..tshp.......pph.................hh.hh.uhh.hhhuhhh..h.................................. 0 391 835 1043 +1993 PF03745 DUF309 Domain of unknown function (DUF309) Bateman A anon COG1547 Family This domain is found in eubacterial and archaebacterial proteins of unknown function. The proteins contain a motif HXXXEXX(W/Y) where X can be any amino acid. This motif is likely to be functionally important and may be involved in metal binding. 21.20 21.20 21.50 23.40 19.00 21.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.08 0.72 -4.52 100 762 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 716 5 231 570 211 62.40 35 37.77 CHANGED sthptulphasptcaacuHEsLEthWh....pssss..c+phhQGllQlAVuhhH.hpcGNhpG...AhtLh .........h..lphhhphas.ctcYFpCH-lLE-.hW+....ppssh.....ccshlhGLIphAsuhaH.hcRsNhpGAh+h................ 1 68 158 208 +1994 PF01732 DUF31 Putative peptidase (DUF31) Bashton M, Bateman A anon Pfam-B_2152 (release 4.1) Domain This domain has no known function. It is found in various hypothetical proteins and putative lipoproteins from mycoplasmas. It appears to be related to the superfamily of trypsin peptidases and so may have a peptidase function. 21.30 21.30 21.50 21.40 21.10 20.70 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.49 0.70 -5.13 40 325 2012-10-02 13:45:52 2003-04-07 12:59:11 11 4 69 0 65 293 0 366.40 20 54.21 CHANGED s-hY+phhcpTFulpasstpp...............................................................p...............................................pttGTuWllDYptsp.........................sspss......hphYlATNlHVhstLhst.s.......................................................................................................................shpspspshsLs+hspssshpsphspsp..........ppsptphshhsts.........................................ssKplasAsDFlpp..ssp.h.t..........................................pphpphp.shstasDFAVlElchph.p.........................sspphhchlsss...................atthpsptpp......hphhs......pphhpp..htph..........................................hahlGY.....Psspss..............................................shahststth.tp.t.........sththsphptss..hs.p.thpsh.spsulscshlst............................t.....ppYtpaGhshhhpph.shtuGuSGShlhspssp....llGI....aau ..............................................................................-hYcphh.poaulphhs.ts..........................................................................................................................tt................................................tt..ts.GTsWIlDap..h...sp..........................s.s.c.s......h+aYhATNhHVhs.h......................................................................................................................................shpspstshsls+hpsss.p.hppphphss................ppph.phhhhp.p.....................................................p.......sh+tla..suh.D..F.lpp..pstphht....................................................ppppphp...shstasDFAVlElcaph...........................................ssppltchlsss.......Yt..sppppp......hpahs...........pphhpp...htph................................................hahhGY.....Psspps.............................t....................tpp...........Slahsts..ph.pp.h.............sth..tphptss...hp.t.thpsh..spsulhcshlst...........................................................h...hthGhphh..pph..s.tuGuSGS....lh..........sppsp....llGIaa................................................................... 0 47 61 63 +1995 PF03750 DUF310 Protein of unknown function (DUF310) Bateman A anon COG1421 Family This family contains a number of archaeal proteins that are completely uncharacterised. The proteins are between 130 and 160 amino acids long. Their C-terminus contains several conserved residues. 24.60 24.60 27.40 28.50 23.90 23.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.56 0.71 -3.91 10 220 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 215 0 73 182 9 122.60 25 86.48 CHANGED hlsshsc.ssptLlcpAE+lG..chLuc.ssLsToQIRclaspV+s.....I-pcscshpp.......lppcLhhLKPKLAYpsGRhp........tulcsLhElLccslcpl.....scchc+..FcpFhcFFEAIVAYHRaYGG+p ....................................hptt................t..h......h.....t..................p..........l.oToQlRplhsh...l.sp.............lt...schppptp.t................pltpc...l...phL+s+hsYpsGRcp................sl+sF.h...c..h..l..p..csl..-.....tl................tcs.tcp......h.tcFscahEAllAY+Kahs............ 0 32 46 62 +1997 PF03759 PRONE DUF315; PRONE (Plant-specific Rop nucleotide exchanger) Finn RD, Wood V, Mistry J anon Pfam-B_3610 (release 7.0) Family This is a functional guanine exchange factor (GEF) of plant Rho GTPase [3]. 19.00 19.00 21.10 20.00 18.90 18.60 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.27 0.70 -5.91 16 263 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 22 6 163 260 0 316.50 52 68.73 CHANGED tt.u-hEhMKERFuKLLLGEDMSGuGKGVsoALAlSNAITNLuAolFGpph+LEPhssE+KshW+REh-WLLSVsDaIVEFsPotQph.sG.sphEVMsoRsRoDLahNLPAL+KLDsMLl-hLDuFc-.TEFWYl-ps.............ttuu.cp..spRp-EKWWLPsP+VPPsGLSEpuR+pLppp+-ssNQlLKAAMAINuplLuEM-IP-SYlEoLPKsG+uuLGDsIYRhl.Ts-pFoP-plLssLDLSSEHcsL-lsNRlEAShalWRRKhpt+..........ssKS..oWustVp.h......h-KpclhtERAEoLLhpLKQRaPGLsQooLDhoKIQYNKDVGpAILESYSRVLESLAaslhuRI-DVLasDchs+pps ..........................s-hEhMKE+FuKLLLGEDMSGuGKGVsoALAlSNAITNLs................AolFGp..h+LEPhss-+KshW+REh-WLLSVs....DaIVEhsP.ohQph.sG.sphElMss+.RsDlhhNlPAL+KLDsMLl-hLDuFpc..sEFWYlcps........................tsss.ct....s.pRp--KWWLPsPpV.PssGLS-tsR+.Lppp+-sssQlLKAAMAINuplLsEM-lP-sYh-oLPK.........s....G...........+usLGD.lY+hl...Ts-pFsP-plLss.....l-...........LSoEHpsL...-lt...NRlEAuhhlW+++hp.pp....................ps+o.....oW.shVp............h-..Kp.ph.htcR...AEslLhhLKpRFPGLsQosLDhsKIQaN......+.............DVGpuILESYSRVLESLAaslhuRI-DlLhsDphspp.s.............................. 0 21 94 128 +1998 PF03773 DUF318 Predicted permease Bateman A anon COG0701 Family This family of integral membrane proteins are predicted to be permeases of unknown specificity. 24.50 24.50 24.60 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.98 0.70 -5.44 12 3218 2009-09-14 00:13:01 2003-04-07 12:59:11 8 17 2210 0 803 2344 347 274.70 22 88.25 CHANGED shlshh.slhh.hhlcsl.hLlhuhhluuhIpshlscphlhchLsc..s+hhuhhluulhGhlhshCsCuhlPlhtthhcpGsshssAhsFLhuuPhlN.lslhhshhshG..hplshhRllsuhlhulllGllhphhhptp........................hlhshp...............................h.hls.sph.hsshh.+ht.shh.....cshs.hhphh..hLllGshIAuslpsal.Ppshlhs.hGt.....sslhushlhhlluhlh.hsohs-sslstuLhpt.huhGsslAhLhhGshlsl.shhhlhphhtt+hlhhhlshlsl.shlsuhlhsh .....................................................................................t...t...thh.h....hhlc.h.h..hll.l.uh....hlu..uhlpshls.......c...h..lh+h.L.sp.....sp...h..h...u..h.hh.us...h....hGhhhPhCpCuslPlhtuhhp.ptsshs.s.sh.s.FhhsuP..hl..N.P.h.slhhh.hh.shG......hp..hshh+....h..lsullhullhG..h...l.lthhhtpt............................................................................................................................h.............................................................................................................................t....h.....t..t...p...h.....t....t..h...t..h.hhh..........psh.p..hh.p.h.h...alll....Gsh...l.uu..........h..........h.......psa..l....Ppphhts...hut.........................ss.h.h....ul.hlh....h..lluhhh..l.sutush.ls....t....u.h..h...tt...hu.h.ushlAhlhhssh.lsl.plh.hh.pphht.h....+..hlhhhh.s.hl..hh.s.shl.huhlh..h........................................................................................................................................ 0 295 561 699 +2001 PF03778 DUF321 Protein of unknown function (DUF321) Finn RD anon Pfam-B_876 (release 7.0) Repeat This family may be related to the FARP (FMRFamide) family, Pfam:PF01581. Currently this repeat was only detectable in Arabidopsis thaliana. 20.60 20.60 24.40 21.10 18.00 20.00 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.13 0.72 -6.26 0.72 -4.48 32 105 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 1 0 2 105 0 19.10 66 55.12 CHANGED LRFWRENHGFoFLAtK.pla LRFWRENaGFTFLAGK.pVY. 1 2 2 2 +2002 PF03780 Asp23 DUF322; Asp23 family Bateman A anon COG1302 Family The alkaline shock protein Asp23 was identified as an alkaline shock protein [1] that was expressed in a sigmaB-dependent manner in Staphylococcus aureus. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -3.78 173 4865 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1784 0 734 2174 33 105.20 28 72.18 CHANGED phGplpIuscVltpIsGlAup-l.GVhuhsus...hhsslschl.sp...cshs+GVpV.chsp.ctplslDlhlllcYGssIsclupplpcpVcpslcpMTGLcVspVNlpVpslp ..........................................................p..GplplsscVltp..I..sGhAsp.cl.Glhuhsut........hhsslt.c..hl..sp..........psh..s...+GV...p.V....ch..........s....cpplsl.DlhlllpYG.s.plsclupplpcpVppslcphT.ulp...l.s.pVNlpVpsl.................. 0 308 520 657 +2003 PF03781 FGE-sulfatase DUF323; Sulfatase-modifying factor enzyme 1 Bateman A, Wood V, Mistry J anon COG1262 Domain This domain is found in eukaryotic proteins [1] required for post-translational sulfatase modification (SUMF1). These proteins are associated with the rare disorder multiple sulfatase deficiency (MSD) [2]. The protein product of the SUMF1 gene is FGE, formylglycine (FGly),-generating enzyme, which is a sulfatase. Sulfatases are enzymes essential for degradation and remodelling of sulfate esters, and formylglycine (FGly), the key catalytic in the active site, is unique to sulfatases [3]. FGE is localised to the endoplasmic reticulum (ER) and interacts with and modifies the unfolded form of newly synthesised sulfatases. FGE is a single-domain monomer with a surprising paucity of secondary structure that adopts a unique fold which is stabilised by two Ca2+ ions. The effect of all mutations found in MSD patients is explained by the FGE structure, providing a molecular basis for MSD. A redox-active disulfide bond is present in the active site of FGE. An oxidised cysteine residue, possibly cysteine sulfenic acid, has been detected that may allow formulation of a structure-based mechanism for FGly formation from cysteine residues in all sulfatases [4]. In Mycobacteria and Treponema denticola this enzyme functions as an iron(II)-dependent oxidoreductase [5,6]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.91 0.70 -5.03 22 4674 2009-01-15 18:05:59 2003-04-07 12:59:11 11 147 2003 22 1863 4765 2666 226.90 22 55.13 CHANGED pssshVhlsGGsFth...Gs.....................tspEsPh+phpl.psFhIsch.VTstpatpFlpt...ssh................................................h....ht.hppspt......ttsspPVspVoa.DAhAYApWhuc.......................................RLPTEsEWEhAARuG....tuphasaGsphts.....................t.stttsstsstpt.ssPVssass..NshGLhDMhGNVWEWTsshap...........................t.s....sssh+Vl+GGuahst.....sthhRsuhR.s.t..tssstssplGFRssp .........................................................................................................h.....h.l...s...G...h.h....G..........................................t....-....ts......c..p..lpl..ps...F..hhsph.V..T..stpa...t...tahpt........st............................................................................................................................................................................................................h.......p..........................................................................t............t...........pt..P......V......h..pVoa.-.At.s.....a.spWh.u..p.............................................................................................................R.....LPTEsE......W....E....h....A...A......+.u..s.....................t..t........h.....h..s.s.p.............................................................................................t..................t..t...........h.....s.....t....s...V...s....t.h.s.................s.sh..G.....l.a.-.hhGNVaEWst.s..at............................................................t................................t....s..t........t..l....h.+..G...Gu..ahs..........sth...hRss...hR....t................t.......t...........t....h.GFRhs........................................................................ 0 719 1285 1627 +2004 PF03787 RAMPs DUF324; RAMP superfamily Bateman A anon COG1332, COG1567, COG1367 Family The molecular function of these proteins is not yet known. However, they have been identified and called the RAMP (Repair Associated Mysterious Proteins) superfamily. The members of this family have no known function they are around 300 amino acids in length and have several conserved motifs. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.47 0.70 -4.49 154 1749 2012-10-01 21:23:39 2003-04-07 12:59:11 10 11 493 0 736 1663 29 234.30 14 68.48 CHANGED plpsloP.la......hGsu........................................................................................................................tssstshl.uool+GslR.....hhhc..................thht.........................t.tt.tttt..t.t..............................................................................................................................................................................................h.splF.....Gss........................................shtupltlpss.................................................................................................................................h..hh.hththptphthspt.t..................ap..hchh.tt.......................................................................tt..ppttphhtphLphh..........................thhslG....uppp.......hG.hGt..hp ............................................................................................................................................................................hhos..lhlGsu...................................................................................................................................................................................h....hp.t.h.t..s.......h.P...hIPG...SSlKG.tlR................shhp.............................................................................................................phht....................................................................................t......................................................................................................................................................................................................................................................................................................................................................................hhpplF.............Gsp..........................................s...htu...plhhp....D.s..h.h....................................................................................................................................................................................................................................................................................................................h..t...h..h.t..hp.h..p..h..t.lsp...pttst..........................................hp.....hchl.tt........................................................................................................................................................t....t............p....t.........h...t....h.hht.hhphh......................................t....htlG..utts.hGhGhh............................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 388 536 643 +2005 PF03804 DUF325 Viral domain of unknown function Griffiths-Jones SR anon PRODOM Family \N 21.50 21.50 21.90 127.20 20.10 19.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.31 0.72 -4.26 6 14 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 14 0 0 14 0 72.40 66 48.77 CHANGED l+FSLRLTpEaKENIVAHlcHLsRLRALIDGKVTpADVRRFGFsDRNALVuACMsVNVQsYs.PDuTI..RhQP lKauLRLTpEYKENIlsHhDHLoRLRsLIDGhlpspDVpRFshhsRNsLlSACMplNVpsYh.PsuTIDMRhQP 0 0 0 0 +2006 PF03885 DUF327 Protein of unknown function (DUF327) Bateman A anon COG1728 Family The proteins in this family are around 140-170 residues in length. The proteins contain many conserved residues. with the most conserved motifs found in the central and C-terminal region. The function of these proteins is unknown. 25.80 25.80 26.50 34.10 24.90 25.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.80 0.71 -4.37 51 443 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 442 2 118 290 4 147.00 26 98.51 CHANGED M+Ip............t.ppht.tshtpsptcpst.ss..psFtphh.sptcpcttp...........ccLppllscIcctGp+LscshshpsltpYKchlKcFLcpslppshplcpppuh....s.t..ps+haplVcplDc+LpcLsp..pllspc+cslsllspIsEIpGLllDlah .......................................................t......t..tt..t.ct..sp.sp.....tsFspph..sptcccph.................-plpphlcpIsctGc+LtppholcslhpYKphVKpFLp.hlcsshphccppuh................p.th..psphhslV+pl-pcLp-Lsc..pllsscpsplclLspIsEIpGLllslh... 0 54 92 102 +2007 PF03883 DUF328 Protein of unknown function (DUF328) Bateman A anon COG3022 Family Members of this family are functionally uncharacterised. They are about 250 amino acids in length. 24.60 24.60 24.70 24.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.57 0.70 -5.08 127 2603 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2522 0 474 1849 1083 236.40 36 94.32 CHANGED M.LlllSPAKsL.chp..ss..ss...tpho....pPphhcpuppLlchh.+plos...pclupLM.plS-cLAtLNhtRapsa....p...t..ps....u+...AlhAFs.GDVYpGL-ApoLspp.shsaAQp+LRILSGLYGlLRPLDhhpPYRLEMGT+Ls..s.pGpsLYpFWGsploptLspthtt........splllNLASsEYaKulc..ptl.pspllossFp-.K..sGp..hKlISaaAK+ARGhMsRallcpplsss-p..L+sF....shsGYtastphS ...................................MhIllSPAKshshp............ss....sh............tph.o..........Pph.h...sp...opp.L..l.cth....cpl.os.............splppL.h.plS......-.clAshNhtRapsap.................t.ps.....up.....AlhuFp.GslYp..uLps.cs......h..ocs...-h......casQpHLRlLSGLYGlLRP.L.Dh.hpPYRLE..MGs+..Lt..........sp....G....+............s...LYpFWssh..l...o.ptL.sctht..................cplllNLASsEYh+slcs......tpl.......ps..........c........llss.h.F...h.-pK.......sGc.....hKllS........aaAK+ARGhMsRall..c..N..p..l..p..p..s-p.Lp.sF.......shtGYhastp.S........................... 0 130 290 396 +2008 PF03884 DUF329 Domain of unknown function (DUF329) Bateman A anon COG3024 Domain The function of this short domain is unknown it contains four conserved cysteines and may therefore be involved in zinc binding. 19.80 19.80 19.80 19.90 19.70 19.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.34 0.72 -4.41 121 1426 2012-10-03 05:12:49 2003-04-07 12:59:11 9 3 1418 1 310 792 215 59.70 48 90.21 CHANGED h..s........VpCPpCt+tl.Wt.ps.ta.RPFCScRCphIDLGcWAsEcapIPup........ss-t.sp-p ......sl....VsCPsCGKsV......sW......s..p.s..sa..RPFCScRCp.LIDLGcWAuEEppIPuss-.....-sDt........................... 0 71 164 240 +2009 PF03886 DUF330 Protein of unknown function (DUF330) Bateman A, Sammut SJ anon COG3009 Family The proteins in this family are uncharacterised. The proteins are 170-190 amino residues in length. 21.00 21.00 21.10 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -4.85 95 1859 2012-10-01 20:48:06 2003-04-07 12:59:11 8 5 1526 8 438 1301 80 157.30 22 77.37 CHANGED YpL.......ssssssssssssss.....lhlssVslsshLspsplVhpp..ssspl.phscpspWAssLsptlppsLspsLspphssttlss.s.........ssssshplpls..lpcFcut....ssspshlsup..Wplhs...tp.......sp..........shss+s.hphptshsus.u....hsulVsAhspul.spLuppIA ............................................Y.L........stssstsssstst........hhLhlt.pVsls.c.hLs.ss..s..lV.hps......sss.p.h..shh.p..sshWAssLsptlpsp..LsssLspp....hsshslssts............hsssphpLpls....l..pp....F....pup................su..p....s......h...l...s..up....ahLhp.......p....................ut................lhpRs.Fp..l.pts.hs.ps..s........hss..hV.pu.hspuh.sphuspIs................................................................. 0 105 240 342 +2010 PF03889 DUF331 Domain of unknown function Bateman A anon COG3036 Domain Members of this family are uncharacterised proteins from a number of bacterial species. The proteins range in size from 50-70 residues. 25.00 25.00 25.90 25.20 19.10 18.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.80 0.72 -4.67 20 635 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 633 0 89 304 9 39.20 62 53.07 CHANGED GsI+.DNALcALVTSpLF+pRVEKsKKGKGSYpRKs......K+pG ...........GpIKDNAlcALl+DPLFRpRVEKsKKGKGSYpRKu......KH................ 0 9 28 58 +2012 PF03891 DUF333 Domain of unknown function (DUF333) Bateman A anon COG3042 Domain This small domain of about 70 residues is found in a number of bacterial proteins. It is found at the N-terminus the of Swiss:O28332 protein. The proteins containing this domain are uncharacterised. 25.00 25.00 26.20 25.40 23.10 24.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.61 0.72 -4.16 57 1338 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 848 0 141 468 11 48.40 44 56.10 CHANGED luhA.NPA.......usaChptGGplpltppspG...phGhCpLPsGcthEEWsLaRppp ....................thsNPA.......us.CtphGGoLtsspph.p.G...shuhCtLP.sGcph-EWuLhRtsp......... 0 22 53 100 +2013 PF03904 DUF334 Domain of unknown function (DUF334) Finn RD anon DOMO:DM04800; Family Staphylococcus aureus plasmid proteins with no characterised function. 21.40 21.40 21.70 22.20 21.20 21.30 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.64 0.70 -5.03 2 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 34 0 1 40 1 157.90 31 83.28 CHANGED .hthQ.h.S.cs.KpNcps.NhQhpSK.TGT.SpphQpoLpNEELpELK+QNKLllKYluEhpEpQclREKE.ptIpSpLKcsTcDFps+uhKl+NDFVclLQcpLp+VssE-lc..lt+slYtVREE.cpMLpEVKpSHEHYQpRQK.LFTGIGAMLLVFMLFALIMTIGpDFMuFLHVDsLQNAIAuKlKASEGFhohlWYIAYGLPYlLAIGLFIhLYEWIRA+FHD .........................................................................................................................p...pp.pth....phppTtccFh.p..s.phppDFlchlpcpLpcl..ss-..php.t..hpp-.lhc.p...pE.cphLp-.V+p...uhEchpppppplasuhsuhLhVFhlFsllMhhG.Dhh.............................................................hh.................................... 0 0 1 1 +2015 PF03928 DUF336 Domain of unknown function (DUF336) Bateman A, Moxon SJ anon COG3193 Family This family contains uncharacterised sequences, including several GlcG proteins. The alignment contains many conserved motifs that are suggestive of cofactor binding and enzymatic activity. 20.70 20.70 21.20 21.30 20.30 20.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.48 0.71 -4.35 36 2508 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 1635 13 741 1913 457 128.80 27 74.31 CHANGED sslshpsAh....phspAAhttApphuht.lslAV.VDt.sGphls.hhRhsGAshtosclAtcKAaTAsshtt.....sTtphsptlps.ssshhtshsth.....sphshhGGGlPlh.hcG.tl.lGulGVSGs.osppDcthAptulpAl .............................................................................................lohptAp..plht....suhpp.....Ap.c.........s...h...lslul..lDs..sG..p.hls..h.h..R..h....-s.u..s.....h.s..sclutcKA.hTAst.hcp...................................sottht..p..h....s..ps....sts.hhshtsth....................sthsshuG.GlPlh...hcG....p.........l....lG.ulGVSGs..sscpDtplApsult..h...................................... 0 202 445 612 +2016 PF03929 PepSY_TM DUF337; PepSY-associated TM helix Bateman A, Yeats C anon Yeats C Family This alignment represents a conserved TM helix found in family of bacterial proteins. The previous DUF337 alignment covered the whole (or most) of the protein. Analysis with dotter (E Sonnhammer) indicated that the same region was represented multiple times within the old alignment. 21.50 19.00 21.50 19.00 21.40 18.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.17 0.74 -7.28 0.74 -3.18 216 157 2012-10-01 23:59:14 2003-04-07 12:59:11 11 13 141 0 57 868 51 35.30 47 13.52 CHANGED hhplHp.................hhshhsshhhllhsloGlhhh .................sDLHKGRss........GssWtWhlDlhAlhhllFslTGlhLL....... 0 14 29 44 +2017 PF03937 Sdh5 DUF339; TPR_div1; Flavinator of succinate dehydrogenase Bateman A, Yeats C, McNeil M, Eberhardt R anon COG2938 Domain This family includes the highly conserved mitochondrial and bacterial proteins Sdh5/SDHAF2/SdhE. Both yeast and human Sdh5/SDHAF2 interact with the catalytic subunit of the succinate dehydrogenase (SDH) complex, a component of both the electron transport chain and the tricarboxylic acid cycle.\ Sdh5 is required for SDH-dependent respiration and for Sdh1 flavination (incorporation of the flavin adenine dinucleotide cofactor). Mutational inactivation of Sdh5 confers tumor susceptibility in humans [1]. Bacterial homologues of Sdh5, termed SdhE, are functionally conserved being required for the flavinylation of SdhA and succinate dehydrogenase activity. Like Sdh5, SdhE interacts with SdhA. Furthermore, SdhE was characterised as a FAD co-factor chaperone that directly binds FAD to facilitate the flavinylation of SdhA. Phylogenetic analysis demonstrates that SdhE/Sdh5 proteins evolved only once in an ancestral alpha-proteobacteria prior to the evolution of the mitochondria and now remain in subsequent descendants including eukaryotic mitochondria and the alpha, beta and gamma proteobacteria [2]. This family was previously annotated in Pfam as being a divergent TPR repeat but structural evidence has indicated this is not true. 20.90 20.90 22.30 21.70 20.70 19.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.31 0.72 -4.29 149 1961 2012-09-24 13:32:29 2003-04-07 12:59:11 11 9 1889 6 589 1180 1046 73.20 35 68.49 CHANGED cppRLhacu.RRGhhEhDllLssFhcpph..ssls-pchptac.pLLsts..D.DLapWlhsp........shPp....chph......tllpplp .........p+tRlpaps.RRGMhElDlllhsFh-c..p..h..ssLo-s-hphF..cLL.-..ps.........D..sDLasWlhs+........spPs....chcht...thlchI............................ 0 158 312 458 +2018 PF01784 NIF3 DUF34; NIF3 (NGG1p interacting factor 3) Bashton M, Bateman A anon Pfam-B_1006 (release 4.2) Family This family contains several NIF3 (NGG1p interacting factor 3) protein homologues. NIF3 interacts with the yeast transcriptional coactivator NGG1p which is part of the ADA complex, the exact function of this interaction is unknown [1,2]. 23.40 23.40 23.70 23.70 22.10 23.30 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.86 0.70 -4.80 122 4441 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 4063 30 1020 3147 628 264.00 27 92.17 CHANGED clhphlcphhPhphu..p.sa.Dss.............................GL....ls............pp..........................plp+lhlslDso.pllccAlp......ts.sshllsHHP...l.ha................pshcplssp......htphlhpLlc.s...........sIslauhHTshDss.s.GhNshLA.chL.....slp.........shpsht......................................................................................................................thuhGhl.Gplsp.s....hohpphhthlpppl.th.........tt......lphs.........shspplp.....+VA..lsuGuGs..phlt..pAhptss..D..lalTG-l...........paHsshpA..ppt.....G......ls..llssGHatoEphhhpt.....ltchLppp.ht.......................l .................................................................................lhphlpp.hhs.phtp..sa..D..s..s..................................GL.....plt.......thcp..........................plp.+....lhsul-soptll-cAlc......tt..sD.hllsH........Hs.......h.ha..............................+s.h.p...s.lt.ss........ptphlppLlp.p.....................................cIslauhHTs..l.......D..........s.......tst..GsNshLA.chL.............Glp..........shp.lp.........................................................................................................................................t.ulGhl.Gplsp...s.........hohtchspplcptL..sh.............ps.......................lchs....................s..sssp...lp.......+VA....lss...GuGp........shlp..pAh.pt.Gs.....D....salTG-l...........................pa+sspsA....hpp........G...............ls......hlssGHatoEph.hhpt.....lschLppt...tt..................................................................................................... 0 349 670 874 +2019 PF03956 DUF340 Membrane protein of unknown function (DUF340) Bateman A anon COG2431 Family Members of this family contain a conserved core of four predicted transmembrane segments. Some members have an additional pair of N-terminal transmembrane helices. The functions of the proteins in this family are unknown. 23.80 23.80 24.00 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.34 0.71 -5.01 43 1369 2012-10-02 17:06:44 2003-04-07 12:59:11 8 2 1098 0 227 767 126 173.70 40 69.66 CHANGED llsslllGhllGhhhhss.....hth..sphsphhLhhLlFhlGlpLtpsshsl+plh.ls++slhlslhslluollGGllsuhlL.slslppuLAluuGhGWYSLSGlllschhus........hhGulAFlsslhREllulhhlPllh+..hsp.suluhuGAToMDsTLPlIppsuGhcssslAlspGhlLollVPlLlshhhu ..................................l.husVllGhhlGhhthsh........lphs...spsSpas.L.hlLLFllGIpLtssu.hoL.+plh.LN++uh..l....lulls....lluSLlGGllsuhlL..s.........ls.lppuLAhASGFGWYSLSGIL.lo-t.aGP........hlGohAFhs-LhREllAlhhIPhLh+...tttss..AluhsGAToMDhTLPllp+.s.GGh.-.hVPhAIsaGFlLollVPlLlshFs.s..................... 0 68 135 189 +2020 PF03959 FSH1 DUF341; Serine hydrolase (FSH1) Finn RD, Bateman A, Mistry J, Wood V anon DOMO:DM04042; Family This is a family of serine hydrolases [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.56 0.70 -4.89 52 906 2012-10-03 11:45:05 2003-04-07 12:59:11 8 40 298 2 698 2926 1726 210.90 22 69.79 CHANGED Ms.......t.h+lLhLHGatQouphFptKsuul+KhLpc................sh-hsalsuPh.l..s.schs..........................sttsss.huWahpppt..................tsphhsh--ulchlpchlpc....................................pGP...FDGllGFSQGAsluuhlsshhpphh....................................ppP.h....KFslhhSGFp......ts................................app.........ha...ps.l...phPoLHlhGptDtllstp......+SptLhcthps........splltHsGGHalPsppthhpth ...............................................................................................................................................................h.......+lLsLHGhtps....u...p...hFp.....t...p..h...t..s...l..p..p..tl.pp...........................t.h.chsahsu.P...h.h....s......s.s..h................................................ttsshhs...W..at.tp.tt......................................................tthtsh....p.p.u....l....p.h....l..t...p..h....lp.c............................................................................................p.G.P......a......D....G.......l.......l.......GFSQ.GA.s..l.u..u..h..l.....h...t.h..p.p.h.h...............................................................................tts...h..........+.Fu.l...h.....h...u.G.h.h.s.........hs..t...............................................................................................hpt............hh........pt.l...........ph.P.ol.H.l..h..G...p.........p...D...hls............................t...u.tt....L..h.ph.hps...........stlh.p.H....s.u..G.H.h..l.Ptptt.h...h.......................................................................................................................... 0 185 395 577 +2021 PF03961 DUF342 Protein of unknown function (DUF342) Bateman A anon COG1315 Family This family of bacterial proteins has no known function. The proteins are in the region of 500-600 amino acid residues in length. 32.40 32.40 32.40 32.40 31.80 32.30 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.29 0.70 -6.07 100 994 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 688 0 301 827 74 432.60 19 78.32 CHANGED lplplusDcMpAtl..........plpss......sGpsl.......................shcclhptLpppsl.shGlpcptlpp...hltp.t.h.........hphllAcGp.PhsGpsuplphhhp.tppph.p.ttccs......plDh+sLsplhs..VptGphLh.c+lPsstGpsGhsVhGchl...sspsG+-htlt..h.G.....pustls.cDsphl.l.AshsGtshht.ps.t.....lpVs...sslplc.sVDlso.GNIp...........................FpG.sVhlpG....sVpsGhpVcAsGcIplt...GhV.-uAplpA.tGsIhlptGllGptp....................uplpA.pGslpspal.ptuplpAtsslhlpctlhpsplpussplhl....ptt+..GpllGGp.hputpplpst..lGu...................ptuspTplpl.uhs.sphpcph..ppLcpplpph...ppphpclppt.......lpth.p.pt.ttp......pthpchtpph....pphppplpplcpchp.plppplpph......tps.plpspc.plasuVplpls...stthphpcp.hsssphhh..psspl .............................................................................................................h.ht.pth.A.h.........................lh.s......tth.h...........................hhpplhp.hltpttl..hhGl.p....c.....p...tlpc........hhtp.ttt..................ph.lApGhtPlpupsspl.h....hhc...............p..p.t.................ppt..........plD...h.+................p.h.s.hhhs...........Vc.....cs-hlh.ch.h...s.p..p.G.psGhslpGchl........s.hsu...p...-....s.....lh...h.G.......css....ths.......p..-.s...spl..h.As..hsGhshhp..pst........hsVp....shlpls......sV.shpT.GsIc...................................a.p.G.sl.hl.pG......sVtsGhplpAstslplt....Ghl...ppu.plp.A..pusIplppGlhuttt.......................................................................................upltA..ttslh.s....phh.psup.lpAtp....lhlp.phh....h.psplhs.tpplhl................p..stl.u.sp..hpstp.plhsh....lGs.....................th.ss..t.o.p.lps.shp.......phppph....pplpp.p...hpph.......ppth.plppt.........................ltph..p.t............th.p.hpchhpph....tphptphpphptph....plppphpph................................l.h.p.thh.sspl.ht....t.................................................................................................................................................... 0 134 239 268 +2022 PF03966 Trm112p DUF343; Trm112p-like protein Bateman A anon COG2835 Domain The function of this family is uncertain. The bacterial members are about 60-70 amino acids in length and the eukaryotic examples are about 120 amino acids in length. The C terminus contains the strongest conservation. Trm112p is required for tRNA methylation in S. cerevisiae and is found in complexes with 2 tRNA methylases (TRM9 and TRM11) also with putative methyltransferase YDR140W [1]. The zinc-finger protein Ynr046w is plurifunctional and a component of the eRF1 methyltransferase in yeast [2]. The crystal structure of Ynr046w has been determined to 1.7 A resolution. It comprises a zinc-binding domain built from both the N- and C-terminal sequences and an inserted domain, absent from bacterial and archaeal orthologs of the protein, composed of three alpha-helices [2]. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.61 0.72 -3.61 69 2406 2012-10-03 10:42:43 2003-04-07 12:59:11 11 10 2242 11 804 1479 422 52.00 41 65.19 CHANGED chpLLclLsCPhs+G........sL............................................................................p....hchtc.....spLlCpss..phsYPIc-G ....................................ch+LL-ILsC..P..ls+G.............L.................................................................................................................................t....astcp..........pELlC..c..t...s..pLAaPl+DG.............. 0 241 477 650 +2023 PF03976 PPK2 DUF344; Polyphosphate kinase 2 (PPK2) Bateman A, Albrecht M anon COG2326 Domain Inorganic polyphosphate (polyP) plays a role in metabolism and regulation and has been proposed to serve as a energy source in a pre-ATP world. In prokaryotes, the synthesis and utilisation of polyP are catalysed by PPK1, PPK2 and polyphosphatases. Proteins with a single PPK2 domain catalyse polyP-dependent phosphorylation of ADP to ATP, whereas proteins containing 2 fused PPK2 domains phosphorylate AMP to ADP.\ The structure of PPK2 from Pseudomonas aeruginosa has revealed a a 3-layer alpha/beta/alpha sandwich fold with an alpha-helical lid similar to the structures of microbial thymidylate kinases [3]. 20.40 20.40 20.40 20.80 20.30 20.10 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.38 0.70 -5.07 10 2990 2012-10-05 12:31:08 2003-04-07 12:59:11 9 6 1855 12 886 2487 1001 226.10 37 80.64 CHANGED pls+ccYcccLtcLQhELlKLQpWltpcGc+.lVllFEGRDAAGKGGsIKRlT-pLNPRssRlsALsAPT-cEpuQWYaQRYlpHLPAuGElVlFDRSWYNRAGVERVMGFCssppapcFhRclP-FE+MLscsGIhllKaWhuIScEEQhcRFpsRpcsPlKpWKlSPhDlcuRp+W-sYo+A+--MhsRTcTshAPWhVVcADDKK+ARLNsI++lLpulcYpchccs .................................t..p+ppYcpp.Lp.pLQhc.Lhcl......Q.p..........h.l.......t.....p......p......s......p.....+.....llllFEGhDAAGKGGsIK+lh.p.tLsPRsh+Vsuls............t...P....o-c...Ep.........s............ph.........Y..h....Rah.p+LPs.........sG.....cI..slF..DRSWYsRsh..V..........E.........RV...............h...............G...........a...............s...........o.............p.....p..p.............a.............c....c..........h....hc............plspFEc.hL.s..c.sG.hhl...lKaahplSc-EQpcRFpp..Rhpc..P..h..KpW.K.....l.S.s.hDlcs.pc+W-cYp...pAhccMhtpTsTs.t....APWhllpusDK+.tARlsshchlLpp.l.ch.....h................................................. 0 253 565 757 +2024 PF03978 Borrelia_REV DUF345; Borrelia burgdorferi REV protein Finn RD, Moxon SJ anon Pfam-B_26177 (release 7.2) Family This family consists of several REV proteins from Borrelia burgdorferi (Lyme disease spirochete). The function of REV is unknown although it known that gene is induced during the ingesting of host blood suggesting a role in the metabolic activation of borreliae to adapt to physiological stimuli [1]. 21.60 21.30 21.60 22.00 21.50 21.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.88 0.71 -4.51 6 40 2009-12-11 17:11:53 2003-04-07 12:59:11 8 1 21 0 4 39 4 144.40 56 92.53 CHANGED MKNKNIhKLFFluML..FVhuCKsY.............VcEKKcIDSLhpuVSsLNNcsDcspFKsYK-KINcLKEsLKDluNA.ELcEKlLsLpsLFQDKLAAKLAALKAAKscIpuh...sDcDps.sKsKIWocAKLVGVTlhh.GSsToGpGs.......cMSK-AVEQ....IDcIIKFLEEGTN .........................M+pKNIhKLFFsSML..FlMACKAY..............................................VEEKKpIDSLhpslssLpNctst.....p..pFpsYKpcINcLKEsLKDlGNA...ELcEKLLsLppLFQDKLAAKLAALKA..AKpcIpsh..............TDpDpshsKpcIWuEAKLVGVTlph.GSsosGpGs.......cMSK-AVEQ....I-KIIKFLEEGTN........................................... 2 4 4 4 +2025 PF03984 DUF346 Repeat of unknown function (DUF346) Yeats C anon Yeats C Repeat This repeat was found as seven tandem copies in one protein. It is predicted to be composed of beta-strands. Thus it is likely that it forms a beta-propeller structure. It is found in association with BNR repeats, which also form a beta-propeller. 21.20 21.20 21.20 21.40 21.00 21.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.06 0.72 -4.50 10 31 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 15 0 18 32 1 39.70 39 21.81 CHANGED sQQHVauRss-GsLtHWaWs.sssulpppsW..sGsluGsPsA ........VFuRGoDspLtH+WWp.sssuh.s.tsWt.lGGslsSsPsh.. 0 8 11 12 +2026 PF03988 DUF347 Repeat of Unknown Function (DUF347) Yeats C anon Yeats C Repeat This repeat is found as four tandem repeats in a family of bacterial membrane proteins. Each repeat contains two transmembrane regions and a conserved tryptophan. 21.50 21.50 21.70 22.00 21.30 20.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.46 0.72 -4.08 170 1475 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 248 0 562 1404 64 55.80 28 80.30 CHANGED hhaWlshlhopsLGsshGDhLs.c..s...LGlGhssuoslhsullssslshhhtppt...ts .......hhaWlshlhopslGsshuDhlsc.....s...................LG..l.Gh.s..suohlhsullss.hlshh.hhspt...s......... 1 143 313 454 +2027 PF01796 DUF35 DUF35 OB-fold domain Bashton M, Bateman A, Krishna SS anon Pfam-B_1390 (release 4.2) Domain This domain has no known function and is found in conserved hypothetical archaeal and bacterial proteins. The domain is approximately 70 amino acids long. The domain is duplicated in Swiss:O53566. The structure of a DUF35 representative reveals two long N-terminal helices followed by a rubredoxin-like zinc ribbon domain and a C-terminal OB fold domain represented in this entry. OB-folds are frequently found to bind nucleic acids suggesting this domain might bind to DNA or RNA. 24.20 24.20 24.30 24.20 24.00 24.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.43 0.72 -3.75 157 1861 2012-10-03 20:18:02 2003-04-07 12:59:11 12 19 530 0 752 1722 492 65.50 26 35.94 CHANGED hhthu.GpGslhoaTssct..sstshh...........PaslulVcL.-..t.........Gs........clhupls........s.s.......ssp......t......lGttVcss.hp ........................hthu.spGslhoaolspt..s.ss.h............Pasl.AlVcL.-...-................Gs............plhupls.........sh.....................ssp.....t.l.t.........lGhpVcssa........................... 0 153 462 641 +2028 PF03994 DUF350 Domain of Unknown Function (DUF350) Yeats C anon Yeats C Domain This domain occurs in a small set of of bacterial proteins. It has two transmembrane regions, and often occurs as tandem repeats. The are no conserved catalytic residues. 20.60 20.60 20.80 20.60 20.30 20.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.36 0.72 -4.23 173 2291 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1116 0 490 1389 176 53.60 26 71.47 CHANGED asllulllhhlshhlhch.lsP.hchtptIpp...s.NhAuulhhuuhhlulullluuul ................ahhlullh.hllh....hhlhph..ls....P...h..shhphItp..........s..NsAAulh....huushluhulhluush........... 0 138 280 375 +2029 PF03995 Inhibitor_I36 DUF351; Peptidase inhibitor family I36 Yeats C, Bateman A anon Yeats C, Bateman A Domain This domain is currently only found in a small set of S. coelicolor secreted proteins. There are four conserved cysteines that probably form two disulphide bonds. Proteins 2SCK31.15C (Swiss:Q9ADK5) and SCO3675 (Swiss:Q9X8V7) also have probable beta-propellers at their C-termini. This family includes Swiss:P01077 a known peptidase inhibitor of known structure. This protein has a crystallin like fold Pfam:PF00030 and is distantly related by sequence. It is not known whether other members of this family are peptidase inhibitors. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.52 0.72 -3.62 7 93 2012-10-01 23:14:22 2003-04-07 12:59:11 8 5 47 1 33 113 0 89.90 18 54.95 CHANGED uhtcCPsGthChasussusGphhhs..sssstshGs.hssphtShhNptstssCsat.tsYss.....shhhsthshsRGt...h.sshsuslsuhphlpspptC .......sCssGphClassss..asGshhpht....sss..............h.s.....s...h............s.s..........h....ss.p.sp..Shh.Np.osttsshYpptsats...........................................h.h............................................... 0 10 25 33 +2030 PF04001 Vhr1 DUF352; Transcription factor Vhr1 Finn RD anon DOMO_DM06473 Family Vhr1 is a transcription factor which regulates the biotin-dependent expression of transporters VHT1 and BIO5 [1]. 25.00 25.00 47.90 46.90 22.50 20.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.08 0.72 -4.07 6 53 2009-09-10 17:29:09 2003-04-07 12:59:11 8 1 38 0 31 51 0 94.00 66 16.65 CHANGED spchothGsTH+IREpLNFsDEKKWKpFSSRRLELIDpFsLSp+KASEQD-NI+QIAshLRsEFsYPspsop-FEKLVTAAVQSVRRNRKRSpKpp ....t.p.su.GsTH+IR-pLNFpDE+KWKpFSSRRLELIDpFsLSp+KASEQDpNI+QIAsILRsEFsYPsohosEFEKLVTAAVQSVRRNRKRSpK+.h. 0 5 19 30 +2031 PF04007 DUF354 Protein of unknown function (DUF354) Bateman A anon COG1817 Family Members of this family are around 350 amino acids in length. They are found in archaebacteria and have no known function. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.11 0.70 -5.57 4 167 2012-10-03 16:42:29 2003-04-07 12:59:11 7 3 126 0 117 350 205 320.30 24 90.78 CHANGED h+VWIDIsNuPHl+FF+slIpchc+cGh-lllTsRchGslscLlchhGFphpsIGKHG.sTLpcKLltsucRshtLochIspp+PcVulhKpshELPRlAFGLpIPSIhllDNEpAlstNKLhhP........LscpllhPcshDt.clhchGuD.N.Ip.apGhsElsslps.....a..-splLccLs..hccpphIVMRPEP.tuoYhsu..+cSILscIl-hLpchss..IllhPRsccQ+EIacta-.lhIPccslDsLoLhahuchhlGAGGTMNREAAlLGsPslSsYPGK.LLuVDKaLI-pGhhaHSsDscEIlp....hVhsNhthR+tl+sh....-c.hchIl-hVpshhc ..............................................................................................................................................lhlDlspss.al+hF+thlpcLcc.c...G...a.-..lh..l..TsR...c...h...s...pshc...LLc..ha..sl..sa...h...s..lG.p..pu....ss...hh.......tK...lht....h..p....R.....t.h.p.Lh.ch...h.p..p..............h.c...P....Dlhl.u.h....s....u.sp....s...s....+.l....u.hs...ls..h.....Psl.....hh.......hD.........s.........-...tu.....h..h................s+L.shP.............hssh.ll....sPtsh................htp..h...Gtc...p.lh..asG...hp..Ehsa.l...p.s............F.p.P.c....t...s.llc..c.LG.....lc...s.p.t..a.l..l....l...R..p.s.h...p.A.s.Y.sss.........pp..sl...h.........c....l..l...ct....Lp...c.h..sp...l.V..h......l....s...cp......p....p......p.....t..p........h......h........c.........p.......h........p....h......h........l..s.......c..s...l........c...h...h...s...Ll...ha...uclhlusGu.TM.spEAAl..LGsPsl...p...h...h.s.........h..pp.h..h.hp...t.h.h.hh..p......t...h..................................................................h............................................................................ 1 28 75 96 +2032 PF04009 DUF356 Protein of unknown function (DUF356) Bateman A anon COG1844 Family Members of this family are around 120 amino acids in length and are found in some archaebacteria. The function of this family is unknown. However it contains a conserved motif IHPPAH that may be involved in its function. 29.50 29.50 30.40 115.80 25.00 29.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.37 0.72 -3.97 10 49 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 49 0 39 47 1 106.90 43 82.18 CHANGED lllIRADshcKlpsALuDL-RauulpItGKPRIlsPshADcllppllGp.lR++s+sAsLVcl-pssscAIs+lR+IHPPAHllVlSsRa.-sYp-Lhcpas+h.cL.+ llLIRuDsa-KlpsALADlcRautlpItGKP+IIsPphADpllcpIlGp.l+KpsKsAslspl-psAs+AIsRlRKIHPPAHIlVlSsca.-sapcLhcpasphP.LK.. 0 7 15 28 +2033 PF04010 DUF357 Protein of unknown function (DUF357) Bateman A anon COG1849 Family Members of this family are short (less than 100 amino acid) proteins found in archaebacteria. The function of these proteins is unknown. 24.00 24.00 24.00 35.20 23.80 23.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.36 0.72 -4.28 41 162 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 151 2 105 158 72 71.80 33 50.27 CHANGED c+Yhchlc-ALpplc........ts.httssc-hlchAcsYhcDu+aalc.pGDhlsALuslsYuaGhLDAutclGlhs .............................+Yhchhc-ALpplc........t..htsssp-...h...lphAcsYhcDu+aahc..cGD..hlsAluslsYAaGhLDAstplGlh...... 0 25 64 86 +2034 PF04019 DUF359 Protein of unknown function (DUF359) Bateman A anon COG1909 Family This family of archaebacterial proteins are about 170 amino acids in length. They have no known function. The most conserved portion of the protein contains the sequence GEEDL that may be important for its function. 25.00 25.00 52.30 51.60 22.80 18.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.41 0.71 -4.73 31 147 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 144 0 99 150 82 122.20 34 69.31 CHANGED VTp....sllcsGltPtlsllDsKT+Rp.hpppht......htphlpVcNPsGsIo.-llcslccu...htp.scsshIhVcGEEDLhslPsllhAP.s.ohVlYGQPs.pGlVllpVspchKpcspcll.cchptp ......sTtpllcsGlhPslullDtKT+Rp.spppht...........hhtphlcVpNPsGsIot-lhc.slcpu....htp...spsshlhVcGEEDLhslPsllhAPtG.sh..VlYGQPs.cGlVllpVspchKpcspcll.cphp..h............... 0 24 58 80 +2035 PF04021 Class_IIIsignal DUF361; Type_III_signal; Class III signal peptide Bateman A, Szabo, Z anon COG1991 Motif This family of archaeal proteins contains. an amino terminal motif QXSXEXXXL that has been suggested to be part of a class III signal sequence. With the Q being the +1 residue of the signal peptidase cleavage site [1]. Two members of this family are cleaved by a type IV pilin-like signal peptidase. 23.50 23.50 23.60 23.60 23.30 23.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.85 0.72 -7.04 0.72 -4.10 49 226 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 58 0 144 226 10 27.60 32 22.13 CHANGED p+GQlSlEahlLlhulllsshlsshhhs ..p+GQlSlEahlLlhullllshlsshhh......... 0 19 44 103 +2036 PF04015 DUF362 Domain of unknown function (DUF362) Kerrison ND, Finn RD anon COG2006 Domain Domain that is sometimes present in iron-sulphur proteins. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild --amino -o /dev/null --hand HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.45 0.70 -4.85 115 993 2012-10-02 00:55:42 2003-04-07 12:59:11 7 22 485 0 449 1069 99 221.70 23 62.70 CHANGED +VllKPNllt....sps.Pp........ussTcPpllc.ullchl.t.ctGu..p..lhluDusuhs.......ssppshcpsGhtphsc....chGs..pllshspst.hhthtt............sthhpphtlspslh-.u..DhlIslPpLKsHshst.hThulKNhh.Ghlss................pKst...hH............................................................................t.pphsphls-lsph.h.....pPsL..sllDulhu....hpusGP.s.sGpsh....ph.sh.llAusDsl.AlDss.uspll ........................................................................................................................VhlK.phsp..........ts....................tshhcPthlcsll.chl..p..ph...G..u...p...hlssssshhs..................pthps..ht.tsGhp..hst..............s.s......tl.l......hc..s...phhp.h.h..................tsphhp.php..l.upslh-.....u.......DhlIsls+hK.s.Ht.h...ss....hsuulKNlh.Gssstt..................sKtt......hH........................................................................................ttpth.t...ctls-hsps.l...............+sph.........s.l..h.....ssl.hs.......h.s..uP...s...ssp.st..............hh..sl.lhAS.tD.sV.AlDtsssch........................................................................................................ 1 226 385 422 +2037 PF04016 DUF364 Domain of unknown function (DUF364) Kerrison ND, Finn RD, Eberhardt R anon COG2014 Family This domain of unknown function has a PLP-dependent transferase-like fold. Its genomic context suggests that it may have a role in anaerobic vitamin B12 biosynthesis. This domain is often found at the C-terminus of proteins containing DUF4213, Pfam:PF13938. 23.00 23.00 23.40 24.20 22.50 22.80 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.52 0.71 -4.88 60 351 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 267 6 183 356 38 129.70 21 50.59 CHANGED ss.h-h.hthttsccVslVGt.......a.P..hlctlc.p...pstplhllEhsst....tth.h....ssttsptlLspsDslllTGoollN.pTl-pLLphsp..sstllllGPosshhPtshhshGlshluGsh..lhDs-tlhcslscGuustthpttsththhh ...............................tsp+VsllGh..........h.P..hlcplt..t.......tstplhlh-hss...........t.tphsh.................ssptsptllsp..sDhlllTGoollN.sTlsplLphs.c.....s..t.h.h..llhGPos.shhs.hhhthGhphlsu........p...hh.hh...................hh.......................... 0 77 130 159 +2038 PF04033 DUF365 Domain of unknown function (DUF365) Kerrison ND, Finn RD anon COG2028 Family Archaeal domain of unknown function. 22.50 22.50 22.80 89.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.33 0.72 -4.00 5 11 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 11 1 9 15 0 94.70 52 69.01 CHANGED MKlIFYASREDQGFaGEAEIERVEha-sPhcIlEKYGDcLFLTcEELK+Y.E+upcRWsu.....cu+RKRPWMVIcL+NIRKYc+VVKPKRFVPVuGRYV+E .MKllFYASREsQGahGEAElEpVchapss.cIlcKYtDcLFLT.EElRcY.p+s.pcRWss.....tu+R++PWMVlhL+NIRKYs+VVKPKRFlsVuGRYl+-. 0 4 5 5 +2039 PF04017 DUF366 Domain of unknown function (DUF366) Kerrison ND, Finn RD anon COG2029 Family Archaeal domain of unknown function. 25.30 25.30 25.80 170.30 23.40 25.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.95 0.71 -5.01 15 67 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 66 6 53 69 18 186.10 43 97.22 CHANGED M....hhlll--cl-YDGSQIpShWAapsFGlpGDSIVsFRGsC-Vch-cMlDlEDl+pcp.....IpScDMlHFIlEHFDp...DLchuhtRQRLLlull+EhLpch......GlcspRcGDDLYlss+KLSVSIATsSsVShKIHhGINVpocGsP..VsslGLpD....LGlcshp...plhccluppYscEh-cIc+DhpKsRs ............hlhhccplcYDGSpIpshWAacsFGlpsDSIVsFRGsscVcl-cMlDlcDl+tpp......IpSsDMlHFIlEHFDp...DLchshhRQRLhlsll+ElLpch......Glc.hpRcGDDLYhs....s....+KLSVSIAosSsVStKIHhGINVpscGsP..VpslGLp-.........lGlpshp.....clhccluptYscEhccIc+DhpKsR... 0 14 28 42 +2040 PF04034 DUF367 Domain of unknown function (DUF367) Kerrison ND, Finn RD anon COG2042 Family \N 23.80 23.80 24.20 29.50 22.90 23.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.59 0.71 -4.53 10 466 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 417 0 327 458 69 125.70 45 45.79 CHANGED lVLoPhuctslSPcD+-ll-+pGlsVlDCSWs+l-ch.Fscl+s...cptRtLPaLVAANPVNYG+PtcLSolEAlAAsLYIlGhh-cAptLLS+FpWGcsFL-LNcELL-tYupscsS.EIhclppcaLp ..........lVLoPh.uppslSP.s.D.+pl....lppt.GluVlDCSWs+l..--...s..PFs+lpu.......p.p...R..LLPa.LVAANP....VNYG+Ph+LsCVEAlAAshhIs..G.ap-hApp.lLspFpWGcsFL-lNc-LL-tYutC.p.sspElhpsppcaL.............. 0 111 190 275 +2041 PF04018 DUF368 Domain of unknown function (DUF368) Kerrison ND, Finn RD anon COG2035 Family Predicted transmembrane domain of unknown function. Family members have between 6 and 9 predicted transmembrane segments. 24.90 24.90 25.50 27.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.75 0.70 -5.19 110 1391 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 1306 \N 249 911 878 250.90 32 87.16 CHANGED M..GuADlVPGVSGGTIAhIsGIY-cLlsuIssls......phhphhhp...........tthtthhpthsh.FLlsLhhGlhhulholupll..saLLppaP...lhsauFFhGLIluSlhhlh+.p......................lpt..hshtp...l....lhhllGh.sluhh........ls...............................h........hsss.p...sssshhh..........................lFhuGhlAlsAMILPGISGSFlLLlLGhYssll.sAlpsh..........................slshlhlFuhGsllGLlsFo+lLpalLc+a+shTluhLsGhhlGSLttlW.....Pap.............t..hshhp...tslhshsa ............................hGhusllPGVSGGTlAhllGIY-chIsuluplh..pthc.hhp............................sh.FLlslhlGhllulhhhSpll.......paLLps........a....l.shhhFhGlIluolPhll+p.................................tpp.....hshtc........l..hhhl..h..uhll.s.hh.hs...................................................................................................h.................hssh.t...t.hssshlh.............................................................................................hhlAGhlAusAMlLPGlSGShlLLllGlYsshl.sulpsh..........................tLshlhsh....uhGslsGllshSKllsahLp.+a+shshthIlGlllGSlhhlh.....Phh..........................sss..h.hh.h.hh.......................... 0 76 167 220 +2042 PF04126 Cyclophil_like DUF369; Cyclophilin-like Kerrison ND, Finn RD, Eberhardt R anon COG2164 Family This domain has a cyclophilin-like fold, consisting of an eight-stranded beta-barrel with an alpha helix located between the beta-2 and beta-3 strands and a 310 helix located between the beta-7 and beta-8 strands. The catalytic site found in human cyclophilin is not conserved in this domain, suggesting a different function for this domain [1,2]. 20.30 20.30 20.30 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.60 0.71 -4.51 14 235 2012-10-02 15:38:38 2003-04-07 12:59:11 8 5 223 5 81 217 103 113.10 22 69.69 CHANGED h+l+Ihhs.....ptcs.sELs-s...pTsctlhcuLPlcupsspWG-ElYFosslpss.-Esup-sV-hGDluYWsPG+AlClFFGsTPhS.ssc.pPASsVNlIGRlsssh.chlppVpcGspVtlc .......................plclphs.....spphpupLt-s...tsucphhptL...Plsl.p.h.p.p..a..G..s..E...h.h.h..ph...Pt.p.l.s.hp.ss........s..t.s.s.s.p.sGDlsYasstss.....lslFY.tpt..............t.h..hGch.ts.........h................................................ 0 35 56 71 +2043 PF01809 Haemolytic DUF37; Haemolytic domain Bashton M, Bateman A, Eberhardt R anon Pfam-B_1485 (release 4.2) Family This domain has haemolytic activity [1]. It is found in short (73-103 amino acid) proteins and contains three conserved cysteine residues. 21.10 21.10 21.80 21.10 20.30 21.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.35 0.72 -4.36 219 3575 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 3453 \N 886 2340 1979 65.50 47 72.84 CHANGED hppllltlI+hY....QhhISPl.h.s.ss.CRFhPTCSpYul-Alp+aGsh+GsaLulpRllRCHPa....ssGG.aDPV ....................pplhlthl+hYQth..I..SPl....h......s.....ss..C...RF.....h.....P.....TCSpYul-Alp++GshKGsaLultRILRC.HPa....stG..G..hDPV.............. 1 320 605 769 +2044 PF04025 DUF370 Domain of unknown function (DUF370) Kerrison ND, Finn RD anon COG2052 Family Bacterial domain of unknown function. 21.20 21.20 21.70 21.50 20.80 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.12 0.72 -3.93 19 534 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 505 0 164 344 8 72.70 64 81.98 CHANGED LlNIGFGNhVsusRllAIVsPcSAPlKRlhp-A+-pupLlDATpGR+TRAVIlhDSsHVILSAlQPETlupRl .......LlNIGFGNIVSAsRllAIV...SP....E....SA....P....IKR....llQEAR-+GhLIDATYGR+TRAVIlhDSsH..VI.LSAlQPETlApRl........... 1 85 138 153 +2045 PF04027 DUF371 Domain of unknown function (DUF371) Kerrison ND, Finn RD anon COG2090 Family Archaeal domain of unknown function. 25.00 25.00 44.10 28.20 22.60 22.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.60 0.71 -4.49 19 131 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 127 1 86 127 18 128.60 38 81.94 CHANGED l+A+GHcNVpApH+SThElT+DcaLTPpGDCIIGlsAD+uhsDlsccFKptl+cscth.lplhlcs.....sshpDplhGpGpscLoLscssshVhR+SsYl.ssRTlhIpAsKAAtDlsR-llctL+.cGsplhlplhV ..........lpu+GH.NVpApH+oTlElT+-saLTspGDCIlGlsAD+uhtDhspchKptl.psspt..lhlhlcs............ts.hh-hlhGpGsscLsh.spspclVhR+Ssal.ssRTlhlpAsKAApDlsR-llptL+.sssphplpl..................... 0 23 51 72 +2046 PF04036 DUF372 Domain of unknown function (DUF372) Kerrison ND, Finn RD anon COG2098 Family Domain of unknown function. 20.40 20.40 21.00 40.50 19.90 20.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.65 0.72 -4.49 15 84 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 82 14 60 90 6 38.00 56 31.52 CHANGED ERAhFEAGIKLGALYHQFsGTPVu.coAsSLEpAIE-S ..EcAsFEAGIphGALaHQFsGTPVS...coscSLEpAIE-u.. 0 11 41 52 +2047 PF04123 DUF373 Domain of unknown function (DUF373) Kerrison ND, Finn RD anon COG2237 Family Archaeal domain of unknown function. Predicted to be an integral membrane protein with six transmembrane regions. 22.10 22.10 22.20 22.20 21.50 21.90 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.98 0.70 -5.42 35 156 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 130 0 101 157 110 341.20 29 93.44 CHANGED M.chLVlsVDRDsDlGcKsGlpTPVlGR-sslcAAlcLulsDPEDSDsNslFuul+lY--Lc...sc.Gc..-VEVAsloGspcs.ulpuDhplucpLDpVLpphsss...ssllVoDGu-DEpllPlIpS+l.lsuVcRVVV+QupslEoTYYll....KcslsDschpphhL.lP.GllLLlaulhtlhsh..............sshslsslshllGhahLh+Ghul-chlpphhcphhtsh..t.clohlohlluhhllllGlltGhhsl.phtshsshhhhs..........alhshl.ahshuslh.shlG+ll-phlpschphhphlst.hhlhuhthllashssah.hs............hhhsslphhhhslssslllulhs .....phLVlsVDhDDDlGcKsGlpoPllGR-sshcAAscLulsDPEDSDsNslasul+lYD-Lcpc...Gc..-VElAsloGspcs.ulpushplucplDplltphsss...tsllVoDGs-DEpllPlIpSRl.lsuVcRVVV+QucslEoTYYll....KchlsD.chpphhL.lP.GllLLlaulhtl.hsh..............sshshsslshllGhahl.h+Ghul-chlpphhcph.......h..G.plohlohlluhhllllGhltuhhtltshtsttshhhhs.................alhshlsahsh.uhlh.h.hG+ll-phlpp..ch.phhphlst.hhl...lulhhllhs.hsthhhsh.....................h.hssh.phhhhhlsshlllul..h....................................... 0 23 61 85 +2048 PF04028 DUF374 Domain of unknown function (DUF374) Kerrison ND, Finn RD anon COG2121 Family Bacterial domain of unknown function. 26.00 26.00 26.80 26.20 24.30 25.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.39 0.72 -4.74 79 649 2012-10-02 00:16:30 2003-04-07 12:59:11 8 4 572 0 167 448 385 78.70 34 33.22 CHANGED hhshha..tp..........tphtsllSppcDG-llupslcthGhpslRGS..........os..+GGspAltphlctlc.pGtsl..ulTPDGP+GPtc .......................hshha...tp...........tchhshlScpcDGEllApllcph.GhpslRGS............os+GGspALpphhctLc..pG..p.sl..slTsDGP+GPh...... 0 71 123 147 +2050 PF04041 DUF377 Domain of unknown function (DUF377) Kerrison ND, Finn RD, Mistry J anon COG2152 Family This family contains many hypothetical proteins, some of which are predicted to be glycosyl hydrolases. This family was noted to belong to the Beta fructosidase superfamily in [1]. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.06 0.70 -5.71 8 965 2012-10-02 00:26:57 2003-04-07 12:59:11 8 6 499 13 330 923 112 286.00 27 80.25 CHANGED hh+KhPs.PIlc.Po.hpGaDs+h................sFNPuVl.hcpch.....hVhlYRsps.cshsohRIGLuhSpDG......lpastcPEslhhPchphEhhG...lEDPRlsKlsccYhMTYTGhss+hsRlsluso+shloat+hs....shhs.Fshp....phho+sGslhPpKl.....sGcYlMaa+........sushaLAhS..sDtlpW-shhcs....lh..pscpsha-s...lKlGsGsPPlcsp-G..LVLhH...sss...s-phlYRlGsALhDt-sP.+...llhRocp.ILEPEt.aEhaGcVsNVVF.ssu.hV-ts.t+lllYYGuADptlGLAphs..lcclhchscc ................................................................................................................hh......................................................hhNsu...sh..httt..........................hhhhhRs............p....s........t.....t......h........h.................................hl....thu...S...pDG........................l.p.a.p.h......p....c...P....l...h..h....P...p....s.....p.............c....t.....h.....s......................h.DsRl.s..........t..h.....-....-.....t.....Y.....ah....sa.s...........u...........h...........s........t...........p...........s...........s..p..........h....u............l.u.........t.........o....p...D.h...t.....s...apphs..................................hh..s....s....................................+..ss....s..LF..P.c.........+l..........sG.....+.Yh.hhpR..............................tslalu.h.S..........sD........h.h.t.......W......s......t...p.pp.........................lh....ps.p..........t...h.a..cs.............h+l....GsGssPIcT.........p....c.......G.....WLhlYH.G......s....................sss...h.....t..Y...shGsh..L.L..DlccPs+.....lltRspp.llt..Pp....t......a...E..........p.......G...........V.s.N...VVFssus..l.........h.......c........t.......c.....p.lh.lYYGsADoh..hulAhhp..lpcllp....h................................................................. 0 164 266 307 +2051 PF04070 DUF378 Domain of unknown function (DUF378) Kerrison ND, Finn RD anon COG2155 Family Predicted transmembrane domain of unknown function. The majority of the family have two predicted transmembrane regions. 25.00 25.00 32.20 32.10 24.30 22.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.00 0.72 -4.35 26 729 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 606 0 136 385 24 60.70 43 82.79 CHANGED M+slshluLlLlIlGuLNWGLlGl..FpaDLVAulFG.Gps.....oshoRllYlLVGluulaplh.hhp .........hphlphluLlLsIlGulNWGLlGh.....F..pFDLVutlF...Gss.......ohluRIIYhlVGluulYslhhh..t.... 0 54 99 112 +2052 PF04038 DUF381 Domain of unknown function (DUF381) Kerrison ND, Finn RD anon COG2098 Family Archaeal domain of unknown function. Strongly conserved YPLM motif. 25.00 25.00 32.00 31.60 16.90 16.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -8.89 0.72 -3.93 21 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 85 14 63 93 5 60.00 33 50.59 CHANGED lpslsVcIcpshlcsphs..au.YoELsGcMLcVclplcYsusplpsphca..Eh-YPLMhlcc ..............scclpVcIcp-hlcppht..hu.YopLsGcML-Vclplch..psspstsplca..EhsYPLMhlc...... 0 13 44 55 +2053 PF04063 DUF383 Domain of unknown function (DUF383) Finn RD, Kerrison ND anon DOMO_DM06609; Family \N 23.50 23.50 23.50 23.60 22.10 23.40 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.26 0.71 -4.76 31 308 2009-09-11 22:59:18 2003-04-07 12:59:11 9 8 266 0 219 301 3 182.50 32 50.38 CHANGED hsplsc.pps....ADhh...shLLuNLu+s-shsp.llshppp...............................................pth.tsppsl-pLhDsFsKGhp........................................sshN+..cAsaDYLuhlFAslS+hccGRpaFl...........................scQch-t.l.PlsKLlsFTE+t.SplRRcGVAoTIKNssFDtstH....hLLs.....--t.................lslLPalLLPlA.GsE..EhsE.E-hhsLPs-LQ..hLPsDKcR-sDssIhph ...........................................................................................h...lhp.pp....uc.hshlLuNLs+.t-shtp..llpht.pp..........................................................ps....hslspLhcsFsp......................................................tshNc..push.cYLu.lhuslSphtpuRpahl...........................sppchs...........lp+LlsaTcpt...SplRRt.GVsuTl+NssF-tppH....thLLs.......pp................................lslLPalLLPlu.GsE.................-hs-.E-h..tphPs-LQ..hL.P.-K..pR-sDssIhh................................................................. 0 72 119 184 +2054 PF04064 DUF384 Domain of unknown function (DUF384) Finn RD, Kerrison ND anon DOMO_DM06609; Family \N 20.40 20.40 20.50 21.00 20.00 19.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.90 0.72 -4.40 29 294 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 252 0 203 282 2 57.90 36 15.57 CHANGED oLhLLssT+pGRchhRs+sVYsIlREhHph....p....p-.cclt-sC-+......lVplLhRcEsptsh-p ......slhLLssT.+tGRchlRstslY.IlREhHph.....E...................p-...ppVtpuC-+...............lVplLhp-.E.tts................................ 0 72 116 170 +2055 PF04074 DUF386 Domain of unknown function (DUF386) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family consists of conserved hypothetical proteins, typically about 150 amino acids in length, with no known function. 20.10 20.10 20.50 20.50 19.70 19.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.79 0.71 -4.70 13 3287 2012-10-10 13:59:34 2003-04-07 12:59:11 7 3 1492 8 258 1385 24 148.30 31 97.21 CHANGED M.Iluclsphshhhuls.tlpcsl-aLpspshssLssG+a-IcGDplahsVhphpTpsssscphEsH++YlDlQlllpGpEpItaussht..shps.psas-pcDltahtsscscptlpLpsGpFslFaPt-sH+Ptshsss.sppl+KlVlKVtls ....................................................................M.Ih.splp.p..h...p....h....t...l..sst.lpcu..l....-..a..l.ps..........p......t..hs....sht.s...G...+..h-..l.....c...........G...c.plFhpl...p...p..Tp.s.st..p..pp.hEhHc+YlDIQllL.p.G.p.E.t.hsaussss...st...ps..ps.a.pc...-cD.l.t...a.h............s........s..p...s.....E...p..hl..t.L..p.....s..GpFs.lFaPt-sH+Ptsh.s..s...p.....s.......t.......t.......l..+KlVlKVth.t......................... 0 59 125 184 +2056 PF04079 DUF387 Putative transcriptional regulators (Ypuh-like) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family of conserved bacterial proteins are thought to possibly be helix-turn-helix type transcriptional regulators. 27.40 27.40 27.90 27.80 27.00 27.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.71 0.71 -4.86 139 3001 2012-10-04 14:01:12 2003-04-07 12:59:11 11 3 2904 3 769 2172 2160 158.50 36 73.66 CHANGED shlEAlLFsuuc...P.lolppLsplh...........s...tsplpphlpcLppcY..................ttps+.Glplhch.ussaphtT+schuphlpchhpp..t..tcLSpAuLETLAlIAY+Q.PlTRu-I-plR.GVs.usshlppLhc+sLlcptG+pcs.sGRPhL.YsTTcpFLchFGLps..Lc-LP....sl ...........................t.hlEAlLFs...uu-...s.lshcpLscllt....hs...............splpphLpcLtpcY.......................................p.s.p..GlpLhcs.us..saphtT+schuphlpch.hpp......t...ptpLSpAALETLAIIAY+Q.PlTRu-I-p...IR.GVs...ossslppLht+sLIcts....G.....+p.-.s....sG..RPhL...YsTTctFL-hFGLps.Lc-LPs................... 0 268 528 663 +2057 PF04076 BOF DUF388; Bacterial OB fold (BOF) protein TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Proteins in this family form an OB-fold. Analysis of the predicted binding site of BOF family proteins implies that they lack nucleic acid-binding properties [1]. They contain an predicted N-terminal signal peptide which indicates that they localise in the periplasm where they may function to bind proteins, small molecules, or other typical OB-fold ligands [1]. As hypothesised for the distantly related OB-fold containing bacterial enterotoxins, the loss of nucleotide-binding function and the rapid evolution of the BOF ligand-binding site may be associated with the presence of BOF proteins in mobile genetic elements and their potential role in bacterial pathogenicity [1]. 23.60 23.60 23.70 23.90 23.50 23.50 hmmbuild --amino -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.10 0.72 -4.20 7 1522 2012-10-03 20:18:02 2003-04-07 12:59:11 8 1 890 1 148 625 10 102.00 40 81.34 CHANGED uhs...........tGGFpGss.sstsoVctAhphtDDs.VsLcGsIlcplscDhY.FRDsoG.pIpl-IccphasG.plpscshlpIpGclD.+chsps-lDVctlpK ...............................ttts.......s.s.ps.tsG..ap..G..ss.....u.ph..h..T...Vcp...A...K..s...h...+D-sh.....VoL+GNllc...+l..u...-DpYlF+DsoG.pIsV-IDcc.ha...s...G.h.s.VsPc.spVcIpGclD...Kc.h..s.s.s.cl-VcplpK................... 0 16 45 102 +2058 PF01837 DUF39 Domain of unknown function DUF39 Bateman A anon Pfam-B_7373 (release 4.0) Domain This presumed domain is about is about 360 residues long. The function of this domain is unknown. It is found in some proteins that have two C-terminal CBS Pfam:PF00571 domains. There are also proteins that contain two inserted Fe4S domains near the C-terminal end of the domain. The protein Swiss:O26943 has been misannotated as an inosine monophosphate dehydrogenase based on the similarity to the CBS domains. 25.00 25.00 42.10 41.80 19.00 18.70 hmmbuild -o /dev/null --hand HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.94 0.70 -5.79 75 219 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 176 0 141 228 13 356.80 43 84.63 CHANGED AlVlTAEEhhshVc-.GhppssccVDVVTTGTFusMsSSGAhlNhGa.o-PsI+h..p+saLNGVsAYuG......luAVDsYlGATphs-pss................pYGGGHVIcDLluGKplcLcApuhsTDCYPR+pl-splTl--lspAhLhNPRNuYQNYssulNsu..-+sl.aTYhG..sLhPphGNssYSsuGpLuPLhND......PthcsIGlGTRIFLGGutGYlshpGTQHss.........Phssh.uTLhlhGDhKpMsscalRGshhpuYGsoLhlGlGlPIPlLsE-lhptsulpDcDIhsPllDau..hsppsRs.slutVoYu-L....................................................................+SGpI....plpG+cV....p.TuslSShthAcclApcLKchIppGcFhL .AVVlTAEEhhphVc-.GhppuAc-VDVVTTGTFusMsSSGAhlNhGH.o-Psl+h..p+saLNsV.AYuG......luAVDhYlGATphs-sss................cYGGGHVI-DLl.uGKplcLcApuhuTDCYPR+pl-spIol--lNpAhLhNPRNsYQNYssulNsu..c+sl.aTYhG..sLpPphGNssYSssGpLSPLhND......PthcoIGlGTRIFLGGu.GYlhhpGTQHsPt...........ssh.uTLhlh.GDhKpMsscalRGshhpuYGsoLhlGlGlPIPlLNE-lhptsulpDcDIhsPllDau..hPpps+s.sluhVoYu-L..........................................................................+SGpI.........plpG+cV......TuslSShhhAcclApcLKpWIppGcFhL.................. 0 48 104 129 +2059 PF04094 DUF390 Protein of unknown function (DUF390) Bateman A anon Pfam-B_1698 (release 7.3) Family This is a family of long proteins currently only found in the rice genome. They have no known function. However they may be some kind of transposable element. 19.60 19.60 19.60 19.60 19.10 19.40 hmmbuild -o /dev/null HMM SEED 828 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.48 0.70 -13.45 0.70 -6.49 2 813 2009-01-15 18:05:59 2003-04-07 12:59:11 9 20 6 0 486 813 0 267.70 29 75.06 CHANGED MRTaQGhpWDWsPEDF+hlVQRVLNLsSVEASLIPQtlLPLCsDP-RAsILTIM.tVGAStERA.+GHDGAGGSRRGEQSTPGGGRASGsRDtGPGuSRPADARGKRKQtGTPsPSPPRGGGAVRASSRRPEGAsPTSQPEGERKKKRhpKMGtsp.utGs..p..thph...................................h.shSEIPSRPSRHSKSGpSEAE-sAsAEARRREuDRREAADRLREAEEAAQ-AsRsRQAEEsAREEA.RhRpAtEusRE.cAu.RtcpAh............sp.uPsPossttsTTStAstDEAAGs.LGPsPSGDA.DpPusGshP-SGTSIGGPSRAAsoPRRL..hPShAPLsAEPLLQALAAsNTTVLDGLSAQhEsLQAERAELDAAWARVEEGRRSVEAMVEhGRKAHRRHsSELEsR++sLAEIA+EVEEERtsALIATTVhNEspDsLRLQYGShtAELcKKLDAApGVLDAAAAREpRAAEsEAASRpRE.ALEARAhALEERApshERDhADREAAVsIREATLAAHEAACAEEE.ALRLREDsLTERtRuLEtAEuAAQpLADSL.LREAApEEQARRNLEGARAcRAALsQRsAELEARAKELDARA+SGGAAsG-uDLAARLAAAEHoIA-LQsALDSSAGcVEALRLAGEVGPGMLhDAVSRLDpAGRQsGLWtGRssKYAANQGGLAQRLScMAGsLQRLPEELEcTIKSSSRDLApGAVELVLASYQARDPsFSPWhAL-EFPPGTEDusRAQVRDAAspIVcSF-GoAPRLsFA.sSDEEGsssGAsDuDDEAGDPGAS ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 1 1 1 +2061 PF04134 DUF393 Protein of unknown function, DUF393 Mifsud W anon COG3011 Family Members of this family have two highly conserved cysteine residues near their N-terminus. The function of these proteins is unknown. 25.60 25.60 25.60 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -11.11 0.71 -2.97 156 1577 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 1266 0 502 1346 1808 108.70 23 68.72 CHANGED llaDGsCsLCstts..phltct.....Dp.t........t...plpassl....psststphh...........................t..s.ls..hpphpsh........lh....tsp...........................hh.ps....scAhhpl.hptlsh...hthlthhhh..lPth..ltchhY.phlAcpRaphhup ...............lhaDGsCshCsshsphlhpp......Dt.t.........t..phpassl..........psptstp..h...............................................................tt.s.ls......hpphpsh..........lhh...pssp.............................haht....ssA.hhpl..hp.....t..lst.......hphl.s.....hh.hh....h.....Phh....ltchsY.chhApsRhch.............................. 0 152 315 430 +2063 PF04143 Sulf_transp DUF395; Sulphur transport Bateman A anon Pfam-B_2577 (release 7.3) Family This is an integral membrane protein. It is predicted to have a function in the transport of sulphur-containing molecules [1]. It contains several conserved glycines and an invariant cysteine that is probably an important functional residue. 22.00 22.00 22.10 22.00 21.60 21.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.05 0.72 -3.93 106 5952 2009-09-11 12:44:05 2003-04-07 12:59:11 9 11 2209 0 1539 4246 1254 43.90 30 23.66 CHANGED hhthllG.GhlhGhGhtluuuCssushhssssshsh.uhlshhuh .............tsllG.GllhGhGhsLAGGCssGshhsussshsh..uhlshls....................... 0 487 974 1295 +2064 PF04148 Erv26 DUF396; Transmembrane adaptor Erv26 Wood V, Finn RD anon Pfam-B_22900 (release 7.3); Family Erv26 is an integral membrane protein that is packed into COPII vesicles and cycles between the ER and Golgi compartments. It directs pro-alkaline phosphatase into endoplasmic reticulum-derived COPII transport vesicles [1]. 22.20 22.20 22.80 22.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.64 0.70 -4.96 19 254 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 225 0 176 237 4 186.10 38 87.32 CHANGED M.lL.LLuYlGsllGFsFLTLuIASGLYYlSELVEEHo..s+RhLpRhIYslIslhlLLhlhDpFPapLolhSIsoahVYhpNL.+cFPhlpLouPhFllSClLVllNHaLWFcaFsp..........s...s.php.....Y.....t....hsoFsEVsSaFulCVWlVPFALFVSLSAuDNVLPotspptsstt.............................s..t...+p+spGLhKsllsslRchl .....................M.hl.llualuhhlthhFlTLul..................A......uGLYYLuELlEEaTshs+RllphhIahhhsl.hlhLhlh-pFPhh.hsh.hulhopllYht.L..ppFPalp.L..ouP..FlhSClLV.llNHaLhFpa.Fsp...............................................h.sFsE.lhuaFslClWllPFAlFVSLSAs..-NVLPohspt.s.................................................+t+t.Ghh.slhs.h+t..h.......................................................... 0 59 97 141 +2065 PF04149 DUF397 Domain of unknown function (DUF397) Bateman A anon Pfam-B_3066 (release 7.3) Domain The function of this family is unknown. It has been suggested that some members of this family are regulators of transcription. In particular, it is thought that this may regulator of antibiotic production in Streptomyces coelicolor [1]. 19.70 19.70 19.80 20.00 18.90 19.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.82 0.72 -4.39 107 1426 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 87 0 637 1558 2 53.40 37 77.67 CHANGED pspW+KSShS...........................sssusCVEV.A.......................ssss......s.lu.VRDSK.......c.P.sGPsLsassstWsuFlsus+s ...........................................t.htWpKSSaS...........ssuusCVEl.A.................t.hss.............s.lslRDSK.......s.s...sGP....sLhhssstWsuFlsusp................ 0 143 532 615 +2066 PF04187 DUF399 Protein of unknown function, DUF399 Mifsud W anon COG3016 Family No function is known for any member of this family. 27.70 27.70 27.90 27.70 27.40 27.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.51 0.70 -4.84 84 689 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 634 1 222 600 105 207.30 26 61.87 CHANGED slshpplhtpltsscllllGEhH-ss..scHtlQhpllpsL......tp.....ps.phuluhEMhspcpQshl-pahsu..........phsppplhpthta.p.sWs...ashYpPllphAtppplPllAuNls+shs+plsctG...httlstt.ccthlss.ts..h.s...ssshcphh....tphh.....suH.tt..............................................................hsps...ht.................thhpsQ..thhDpsMApslhphh.........sttllllsGshHs...cpshGlPtpltc ................t.hohpplhtclt.s..AcVlllG..EhH..sps..tcHthQ.htllps...Lt........pp...............ph..phsluhEMhspspQshlD....ph.hps............plspp.pLhpth..h.p..s.....Ws........aptYpPllph.Ahppph..slluuN.ls+phl.pplhp.G....htth.........ppp...h...l.ss.hs......h.s...ssshcphh.t.hh.....tu.Hpts......................................................................................stp....hp.............phhtsQ..hhtDcsMAcsllpth..................tphllllAGshHs...ppshGlshplt........................................................................................................ 0 76 146 189 +2069 PF04165 DUF401 Protein of unknown function (DUF401) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Members if this family are predicted to have 10 transmembrane regions. 24.00 24.00 24.00 24.10 23.70 23.90 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.41 0.70 -5.45 4 109 2012-10-02 15:12:49 2003-04-07 12:59:11 7 2 97 0 54 238 36 372.40 24 97.56 CHANGED slolLLh.FsVllVLIhl.KVNIGluIFlGuhhLAhLotLGlsGhLhoLh..us..pWsplhlIlIIuFlhGhopha..hGhLc+hppuhhclFs..KaShhhlPALIGLMPMPuGALlSAPMlcslspcaplsPEhtTllNYWFRHlWEh.WPhY.AlVlsSAllGlshtclSlphFPlsllhhhlGalFFstshccshpssRN....Lh..lhslYPIllllllSVlltLDhLhG.hluhLSl.l.NhhRlp.L+ElLcRsFp.+.IlhLLhAVMhaKsVIEsSGls-ulsthhlSaplPshllLhlhPhllGlhTGIohAYVuhshPLLpsFFou....hcplsLsYhuGYhGllhSPVHLClVhSAEYatsEltKlY++hLLPullhhllGhlhlhlI ................................................................h...hhhhh.hl.llhhh....h.h....+.+.ltl..u...h.uhh....hu.u.h..ll..shhh......h....s.h....pthh..h.shh....ss....phthlt.hl..l...h...l...hh.l..hshh.ph....Ghlcphhp..ulptlhsp...+hshs..h..hP..AhIGLLP.PGGAlhSAP.hVcp.ssp.ch.tlssc...cpshlNYWFRHlaEhhhPlYsulllsusls.s..lsltph.lhth.hPhs........lhhhlhG..a.l...hh.hp..........h...h.......pp........t.......t.....pt..ps...................................hh..hl..hs.hhPllssl....h........l.....s............l........h...h.............t...........l....s............h.h.........h...u..h.h...h.s.l...h....s....h..h.lht......hh.php.l.h.phl...ppuhp...h+....ll...hhl....h.hlhhFpphl.p....h...oG..sh....ptlsthhht.hs.lPh....hll..hhhls....FlhGlhTG....h....s.....uhl.ulshPl..lhsh..h..s.u.......hshh..s...lsahhuahGhhl.SPsH.l.CLllos-YFcschhplh+...h..l.h.hs.h.llhhh..hshh.....hhh........................................ 0 28 44 48 +2070 PF04167 DUF402 Protein of unknown function (DUF402) Kerrison ND, Finn RD anon COG2306 Family Family member FomD is a predicted protein from a fosfomycin biosynthesis gene cluster in Streptomyces wedmorensis [1]. Its function is unknown. 21.50 21.50 22.10 21.50 21.30 19.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.26 0.72 -4.15 76 1865 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1388 4 316 897 10 69.90 40 36.02 CHANGED hcttchhlphh..tsphaslthhh.cs.psphhshYlNlssssthppp....slchlDL.LDlh....l..hsss.....phpllDtDEh ..................................sc....csslsaFa..pc..haasl.....hsMh..cc.......s....slpY..YsNluo...Phhh.-cc............sl+alDa-LDlp.............V..hPsG.......chcLL.DtDEa....... 0 95 196 271 +2071 PF04168 Alpha-E DUF403; A predicted alpha-helical domain with a conserved ER motif. Kerrison ND, Finn RD, Iyer LM, Abhiman S, Burroughs AM, Aravind L anon COG2307 Family An uncharacterized alpha helical domain containing a highly conserved ER motif and typically found as a tandem duplication. Contextual analysis suggests that it functions in a distinct peptide synthesis/modification system comprising of a transglutaminase, a peptidase of the NTN-hydrolase superfamily, an active and inactive circularly permuted ATP-grasp domains and a transglutaminase fused N-terminal to a circularly permuted COOH-NH2 ligase domain [1]. 19.00 19.00 19.30 19.30 18.90 18.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.91 0.70 -5.05 180 1295 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 831 0 486 1208 743 297.00 26 60.07 CHANGED MLSRsA-sLaWhuRYlERAEssARllc.ssh......phshst.....ssss...ppht.slltssGstssattp...........t.thstpslhpalhh..DpssPuS..lhuslpsARpNA+tlRstlos-hWcslNphahplpphttpth..........................sppshh..phhchl.pppsthhpGhhpsoMhRs-uacFhplGphlERADpTuRlL-l+atsh.......................ss.h.-thpWpslL+usuuhpsYRphY.psshpstsls-hLlL-sphPRSltaslpplpppLppLt....tt..st...spspchhsp.lpspLphs.slcclh...................ttuLcpaLsphhpplsplustIsppYh ....................................hhSRsA-sLaWhuRYhERAEsh.ARllcssh.....phthts..........tsss.........ptht....sll....ts.hG..h...sshtpt...............h.......t...hs..tp.s..l...lphlhh......DtssPuSlhuslpsARpNA+slRstlos-hWcslN.s...hh..h..t.....l.pp.h.t.p.p.s.............................................ttt.shh....phhphl.tpps.thhpGhhtsohhRs-uapFhplGphlERu-t...ssRlLssphthh.........................................t..hcthtapslLcsssuhtsY+phY.p.s.t.hp...stsls-hLlLctphPRSlhaslpplpppLpp.L................t.thsstspchhsplt....s....pLphh..shpplh.........................ttsLpphLsph.tphtplustlsppah.................................................................. 1 137 300 400 +2074 PF04175 DUF406 Protein of unknown function (DUF406) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Members of this family appear to be found only in gamma proteobacteria. The function of this protein family is undetermined. Solution of the structures of the two members of this family investigated bear some resemblance to that of the single domain enzyme pterin-4a-carbinolamine dehydratase, PDC. Although the residues of PCDs involved in binding of metabolite are not conserved in the two structures under study, they do correspond to a surface-region structurally aligned with residues that are highly conserved, eg Glu 89, suggesting that this region is also involved in binding of a ligand, thereby possibly constituting a catalytic site of a yet uncharacterised enzyme specific for gamma proteobacteria. 25.00 25.00 25.60 32.60 22.50 22.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.10 0.72 -3.52 51 1127 2009-11-21 14:42:31 2003-04-07 12:59:11 7 1 756 3 94 348 5 92.60 50 97.42 CHANGED p.p..pps......-pCssCt..sssDlGslIDssDsshplshshsspttAcsthsphsp+A+ssp.....splpsplsss.-suhpLphsFsFpCpAEphIFQLphR .............Mp...sss...DcsssC...CshDlGTlhDNsDCTupaSRhFAoRtEAEptLstLs.E+A+uVp..oEsspls.phs-..p.sGVcLDhcFTFuCpAEhlIFpLuLR..... 0 8 27 63 +2075 PF04174 CP_ATPgrasp_1 DUF407; A circularly permuted ATPgrasp Kerrison ND, Finn RD,Iyer LM, Abhiman S, Burroughs AM, Aravind L anon COG2308 Family An ATP-grasp family that is present both as catalytically active and inactive versions. Contextual analysis suggests that it functions in a distinct peptide synthesis/modification system that additionally contains a transglutaminase, an NTN-hydrolase, the Alpha-E domain, and a transglutaminase fused N-terminal to a circularly permuted COOH-NH2 ligase. The inactive forms are often fused N-terminal to the Alpha-E domain [1]. 100.00 100.00 101.20 100.50 99.90 99.40 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.93 0.70 -5.82 28 1353 2012-10-10 13:17:03 2003-04-07 12:59:11 8 8 856 1 522 1302 873 327.40 42 53.87 CHANGED PtlIsusEWstlEpGlhQRscsLsthLsDlYucpcllps....GllPsplVhusssatcthhGlpsPtshalHlhGhDlsRss-GpahVLEDNscsPSGluYsLENRphhpRhhPclapph.....plpslssahptLhcsLpshusss...sPp.lVlLTPGsaNusYF.......EHuaLA+phGl.LVEGcDLhlcDs+Vah+Th.pGhp.pVDVlYRRlDD.DaLDPLthpsDShLGVsGLhpAhRtGsVsls.NAhGoGls-DKulhsal.Pphhchh....LGEc.hLss..VsTahCsps.schcalLspLccLVlKsstssG.....GhuhllGsphop.tptphhpchhtts..salAQ ....................................PRlIsusEWppl-pGltQRlcALNtaLsDlY.s.....c.Q...........cIl+s....GllPscllhusst...a...h....t.....hh..Gl...p.....s...P........s.s..l.......a.....hHlsGhDLlR.s.s....-....G....p....ahVLEDNhRsPSGlS..YhLENRchhtR.haP-L...Fpph.........plps.l.s.s.Y....sptL........h.psL......ps..........h...............uP.................s..................s.....................ts...........Pp..lVlLTPG.haNS.A.YF.......EHuaLAcp.h......G......lpLVEGpDLhV.c.D.s.p.VahR.....Ts.pG..h.c..pVDVlYRRlDD.s..FLDPLsFR.s..DS..hL..GV...s...GLlpAhRuGsVslu.NAhGoGluDDKulYsal..PchlcaY......LG.....E.c....sl....LsN.....VsTahCtcs..sphpaVLspLs....c....LVlKssps..uG.........Ga..Gh..l.lGPp......soptphtpht.t.+.l.hspPt.saIAQ...................................... 0 150 328 435 +2076 PF04181 RPAP2_Rtr1 DUF408; Rtr1/RPAP2 family Wood V, Finn RD, Bateman A anon Pfam-B_22202 (release 7.3); Family This family includes the human RPAP2 (RNAP II associated polypeptide) protein and the yeast Rtr1 protein [1]. It has been suggested that this family of proteins are regulators of core RNA polymerase II function [1]. 20.20 20.20 20.70 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.14 0.72 -3.89 45 383 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 326 0 228 335 3 76.20 28 20.32 CHANGED hsphlo.spYc-llpER...sls.phCGYs.lCspshtph......pppaplssp............spplaphp..c.t..........................................paCSptChpt.StahpsQLsp ..........h...hhp.spYp.-llpER....sl..phCGYs.LCscshpp.....................+tpa+lstt..................ppplhshs..c..t.....................................................................................................................paCSptChcp.upahthQl....................................................................... 0 65 117 185 +2077 PF04188 Mannosyl_trans2 DUF409; Mannosyltransferase (PIG-V)) Wood V, Finn RD, Mistry J anon Pfam-B_9248 (release 7.3); Family This is a family of eukaryotic ER membrane proteins that are involved in the synthesis of glycosylphosphatidylinositol (GPI), a glycolipid that anchors many proteins to the eukaryotic cell surface. Proteins in this family are involved in transferring the second mannose in the biosynthetic pathway of GPI [1] [2]. 22.40 22.40 22.50 22.50 22.00 22.30 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.93 0.70 -5.57 4 631 2012-10-03 03:08:05 2003-04-07 12:59:11 8 10 512 0 342 606 30 277.80 20 72.07 CHANGED Rpt..lhhatlhsRhlhLhLshLa..hhhshsputshssss......s.sthlsshhp+hLhs.hlpWDulaFlc..hucsG...aEppaAF.sLaPhhlplhs.phhsslhsLLultushh.shh.ls.hlhahlAshhLaplsp.lhpspchohhsullFChoPAulFhouhYSEuLaAhFoFsGlhph.pup........shsushhFuhush.hRSNGlhsssahshsthttha..uLhpLphshhhhphhsuhhLpshhlhlPFhh.QYYu.YppFC.st.........................s.sWCptplPL........lYsaIQchYWs...VGFLKYaphpplPNFLhAsPslIlllauhhhahp.........G.phsphppp.t.......................................................alVhsuhhlhhusFhMHVQVLsRhhS.uhPlhYWahAchlh.s.........Kpp.hpshu..............hthhhhWhs.............hYhlLtslLausFLP.s ........................................................hhh....................................................................................................................h.t.hhpWDs.aahp............lA..p....p.............G....Y..........................h...........t............t...........p........h...........A...F.......hP.hhP...hh.lp.......hh.......s..............................................h..........h......t.........................h.........h...s..h..h..l.s.....hh.........hhu..s...hhLapl...s.t...h......h.......................t.......t.........s......h.....h.............s.....sh.La.hh....sP.u.s.l.Fh.s.ssYoEu..............h......Fshhshhuhhhh..tsp................................................h.h.h.us.l...h....h....u...l..Ash..hR..ssGl...h.h...s...h...hhh......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 123 207 294 +2078 PF04190 DUF410 Protein of unknown function (DUF410) Wood V, Finn RD anon Pfam-B_12495 (release 7.3); Family This family of proteins is from Caenorhabditis elegans and has no known function. The protein has some GO references indicating that the protein has a positive regulation of growth rate and is involved in nematode larval development. 19.50 19.50 20.00 19.80 19.20 19.40 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.73 0.70 -4.84 5 337 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 275 7 236 317 2 239.10 26 79.21 CHANGED sppK+a--AlELlasGAppFFcccQpuSAADLshhlLEsLEK..........AElssuspplssLAcllstLsPuEPE......RcshVsRslcWS.....osucGKaGcPsLHpllAppLlctcplppA++HFLLusDsSupAphhLlEYpps+spcuEs...DhFlucAVLQaLsLcNhsoAhsoFTpYTc+ahc..tPphEp.....hchsaPacpPLLNFLalLlhsl-sKcpusFpsLsppYpspLKRD.uapuYLs+IGpLYFGI+PspspSs..uLGGLhSuLLu ...........................................................................................................tpppaspAh-llhpGAhhhhpt.s..Q.....t..s....SuuDLu...h.hll.-shpp....................................sph..ps..s..pt.....h.....t.cLhp..lhphh.....s...sppsp............................+ppalppslpWS........................ph.u.p..h..ph.Gc..P..cLHphluphh.......h............c..............-...........p..............p.............h...........h...c........Ac..hHhlhu.........scsps...hs........hl.............ht.p.s...t.s....-s...............shahucAVL.aLhltN...hpsAptshptat..p....p.......t..t..ht..........................t......s...P...L...L....NFl.hLLh.s.lpp.....t....p.......h.........s...h....Fp..LpppYtsp...l...p..c..s.s..app.hLptIGphaFs..h.....sp.p.t..s.....hhshhushh................................................................................................................... 1 77 131 196 +2079 PF04214 DUF411 Protein of unknown function, DUF Mifsud W anon COG3019 Family The function of the members of this bacterial protein family is unknown. Some members may be involved in conferring cation resistance. 21.40 21.40 21.40 24.40 20.30 19.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.50 0.72 -4.47 109 858 2009-09-11 13:47:38 2003-04-07 12:59:11 8 4 685 0 253 667 461 70.00 47 46.02 CHANGED pVpspsss..-hssl...KpchGlP.splsSCHTAll..s..GYslEGHVPAssIp+LLp-+.Pp.shGLAVPGMPhGS..PGM .............t.Vpshpss..shssl...KpchGls.spLtSCHTAll.s...GYllEGHVPAssIc+L.Lpp+.....Pp..shGLAVPGMPhGS..PGM...... 0 45 144 209 +2080 PF04217 DUF412 Protein of unknown function, DUF412 Mifsud W anon COG3092 Family This family consists of bacterial uncharacterised proteins. 20.70 20.70 20.90 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.85 0.71 -4.38 21 797 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 793 0 92 296 13 139.50 64 94.36 CHANGED shh.phlpcGQpYMKoWPhcKpLushFPEpRVl+AT+aAl+hMPslAllolshQhhh...shphhP.pAlshALFhlSLPlQGLaWLG+RupTPLPsoLhsWap-lppKLtptGhshpslpu+PsYp-LAplLKpAFcpLDcsah-- .....saF.olF+RGQHY.KTWPh...EKRLAPVFsENRVIKhTRaAIRFMPPlAVFTLsWQIAL.............GGQLGP...AVATALFALSLPMQGLWWLGKRSlTPLPPulLsWFYE....VRuKLpEuGQs..........L.A..PVEGK.PcYQuLADsLKRAFKQLDKTFLD.D.......................... 1 8 29 62 +2081 PF04219 DUF413 Protein of unknown function, DUF Mifsud W anon COG3085 Family \N 25.00 25.00 32.30 32.10 24.00 22.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.09 0.72 -4.24 23 807 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 791 0 100 247 13 91.50 65 80.88 CHANGED +FaDspNFPRGFuRSGDFTlpEAplLEpaGpshpuLtsGphpPpsc-EppFltsspGppssso.hE+sWhKYhchspp+p+FaoLsGst+..ss ........RaFDNKHYPRGFSRHGDFTIKEAQLLERHGaAFN-LDLGKREPVTEEEKhFVAVCRGEREPVT-tERVWSKYhTRI+RPKRFHTLSGGKP...Qs-............... 0 12 37 70 +2082 PF04220 YihI DUF414; Der GTPase activator (YihI) Mifsud W anon COG3078 Family YihI activates the GTPase activity of Der, a 50S ribosomal subunit stability factor [1]. The stimulation is specific to Der as YihI does not stimulate the GTPase activity of Era or ObgE. The interaction of YihI with Der requires only the C-terminal 78 amino acids of YihI [1]. A yihI deletion mutant is viable and shows a shorter lag period, but the same post-lag growth rate as a wild-type strain. yihI is expressed during the lag period. Overexpression of yihI inhibits cell growth and biogenesis of the 50S ribosomal subunit [1]. YihI is an unusual, highly hydrophilic protein with an uneven distribution of charged residues, resulting in an N-terminal region with high pI and a C-terminal region with low pI [1]. 25.00 25.00 51.50 34.20 20.10 19.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -11.16 0.71 -4.92 30 794 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 788 0 94 348 14 164.80 55 94.86 CHANGED RKusp.sssp...hssps++psRtpl-spuRpcK+cKK++GhcoGSRpstsptp..pp.psssppKDPRlGSKKPVsLhVpttstsp.........Php.....pps.............+LosEQELtpLENDtpLNpLLDpL-sGcsLussDQpaVDcpLDRI-pLMpcLGIp----s............ptps-DDLLcpFE .....................tss.......tsK...utuKsR.RKTR-EL-tEAR-RK..R..pKK++GpAsGSRsuuGsss..usucs.Qst.KD..PRIGSKpPIPLs..VsE.psstp+.p.......PKs.......cKP.......................hLSPptEL-hLEsDERLDALL-RLEAGETLSAE-QuWVDsKLDRIDELMpcLGLohDDDE........E--..................E-EKQEDhhRL..h................... 0 10 31 64 +2083 PF04222 DUF416 Protein of unknown function (DUF416) Mifsud W anon COG3068 Family This is a bacterial protein family of unknown function. Proteins in this family adopt an alpha helical structure. Genome context analysis has suggested a high probability of a functional association with histidine kinases, which implicates proteins in this family to play a role in signalling (information from TOPSAN 2Q9R). 22.30 22.30 22.60 22.40 20.20 22.20 hmmbuild --amino -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.98 0.71 -5.16 48 828 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 821 2 104 339 19 190.50 67 96.99 CHANGED sshapRLcpLcsWQplsFhsuLCERMaPNYpLFs-hophu-.sphh+slLsLlWEhLssK.su.KlNF-pQLEKL.EphlPsss-.aDhYG...VYPAhDAChALusLLp........uhlstcsh.-cslplSplShsTVAsalE...spsscEls.--p.......lcppthhppEh-lQhplhchL..p-sppR.ch-lIcsL+p-lpps.GlSNIGIsl .............................NPIHLRLE+LESWQHlTFMACLCERMYPNYAhFCpQTtFGD..upIYRRILDLIWETLTVK.DA.KVNFDSQLEK.h..EEAIPuADD.aDLYG...VYPAIDACVALSELlH........SRLSGETL..EHAl-VSKsSIoTVAMLEM..........TQAG..R.EMoDEE........LK-NPAVEpEWDIQ.WEIFRLL..AE.CEER.DIELIKGLRuDLREA.G.SNIGIt.h................ 0 16 40 74 +2084 PF04224 DUF417 Protein of unknown function, DUF417 Mifsud W anon COG3059 Family This family of uncharacterised proteins appears to be restricted to proteobacteria. 22.10 22.10 22.40 24.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.14 0.71 -4.90 6 870 2012-10-02 13:32:46 2003-04-07 12:59:11 7 2 767 0 99 439 11 168.40 46 92.16 CHANGED h+hhchlshtschslsllRLulhIlFhWlGshKassaEA-uIpPhVuNSPahSahYch.p......................................ssYssS.hLGllEsIlulhlLlGhhpstsGllGGllshshslVTLSFLhTTP-sahst............P.LuGssphVlKDlLhlAuulhlhthstpchL ....................................hclluptsclGlsLlRl.uIsIVFhWIGhLKFssYEA-uIsPFVANSPhMSFhYca.t...p.h.pc..puE............................c.thtWpptNsTYuhSsuLGllElIlulLlLss...h..s.h..lGLlGGlhAhshslVTLSFLITTPE..sWVssLGss.............paGFPaL.S.G.A.GRLVLKDhlhLAGAlhlhu-uA+cl............... 0 23 57 78 +2085 PF04235 DUF418 Protein of unknown function (DUF418) Kerrison ND, Finn RD anon COG2311 Family Probable integral membrane protein. 20.80 20.80 21.00 21.00 20.60 20.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.94 0.71 -4.57 138 1711 2012-10-02 17:00:17 2003-04-07 12:59:11 7 6 1135 0 378 1373 248 158.90 28 41.74 CHANGED hs+pshhp.tsppptphhpphhhhs..lslulshtlhhs.....................hhtshhhhhhthhuuhh.huhsYluhlhhlhpptth.ht.....h....hpslsssGRMALTNYlhQSllsshlF.huaGLGLhsplshh....thhhlslslahlQlhhSphWL+pF+hGPlEal....WRplTatp ........................................................................hh+pshhp..sp.p..hp...hh+..+hth..lh...lsluls.lsh.th..................h.h...s.ht.hss.hh.h....ph.......h.....p...luus...h.......huluY.suhh..hhhh.phpt..hp..............l.....ltsls.slGRMALTNYlhQ.........ollss..hl.F...................a.t.hu..L...hh...p.......h....s....hh....th.lhhslsl....ah.h.pllaSslWL.+.h.a.+.pGPlEWL....WRplThh.t...................................... 2 131 275 337 +2086 PF04237 YjbR DUF419; YjbR Anantharaman V, Kerrison ND, Finn RD anon COG2315 Family YjbR has a CyaY-like fold [1] 25.20 25.20 25.40 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.31 0.72 -3.64 196 3031 2012-10-09 12:12:44 2003-04-07 12:59:11 8 8 2138 7 527 1873 137 96.50 28 68.23 CHANGED shuhPsspcs..hsa........st....hsa+V...............u......KhFAh....hsttt............................lslK..sssppt.thLhpp..sshhsu..+hs+..pp.Wlsl.h...l......t.slspsplpchlccSapLl ...................................................shth..tsp.s...h.a.......s...ph...ssh+l..........ss..KhFAhltphptp.........................................hlsLK..s..s..P..-..hs..thLppp......ss.l.h.P.u.a..Hh.NK......pH.WloVh..........l........sssls.c.splhcLlscSaplh.......................... 0 174 335 445 +2087 PF04238 DUF420 Protein of unknown function (DUF420) Kerrison ND, Finn RD anon COG2322 Family Predicted membrane protein with four transmembrane helices. 28.50 28.50 28.60 29.00 28.40 28.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.73 0.71 -4.21 60 636 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 597 0 165 381 289 133.90 42 79.22 CHANGED lshLPhlsAslNuloslhLlhGhhhI..+ptphphH+phMlsAhshoslFLlhYlshphh.sssos......FG..............Gtu.........hl+s.lYahlLIoHIlLuslslPLsLholhhuhpp.................phspH+KluRhThPlWlYVulTGVlVYlMlh .........l.sILPplsssh.sloslhlshGhhhI...........h.++plptH+shML..uAhshuLhFhlhYh..o.hp....hh......hssT.s.........FG...............Gsu..........I+h.hYh.hhLhh.HIhLAslsssLuLhsllhuaps..................phshHRKlu.ashsIWhhsulTGVh.VYLhl................ 0 65 126 155 +2088 PF04239 DUF421 Protein of unknown function (DUF421) Kerrison ND, Finn RD anon COG2323 Family YDFR family 27.40 27.40 28.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.02 0.72 -4.48 196 3034 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 1816 4 627 2119 40 94.30 26 48.77 CHANGED ccllpGcPhlllcsGclhpcsl..++p+lohs-LhttLR.pp.ul.hsls-VchAlLEssGplSVlh+s.p..tsht.....................................t.t.h.t.h.....................hpcl ..............chlcGcPsllIcsGclhhcpl..pptphots..-lhhpLR.pp..G.l...h.......pl....ppVchA.lLEsNGploVhhtt.c......h...............................................................................hhh.................................................... 0 243 460 537 +2089 PF04240 DUF422 Protein of unknown function (DUF422) Kerrison ND, Finn RD anon COG2324 Family Predicted to be an integral membrane protein. 25.00 25.00 27.00 27.00 20.20 20.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.57 0.70 -4.93 13 245 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 225 0 128 250 252 208.50 23 75.40 CHANGED uFs.hhsLhGlhsMthshlAuhl...phl.stRuhuhllshsuhuhulELLGspTGaPaGcatYsssLGPhluGhVPhslPluaFsLslsuYLLs..hshLu.csppphl+hhts....uhhlsshDlVLDPuhs..ulsFWsW.ssGsFa.GsPhQNaAGWlLoGs.luhslhsluashsulptchp....ssshhLsshVuhhhhhsslsLhhGhhlPstlulhLGluhl ...............................................................h..........hhhhhhhhhsshh....hph.G..h..tp...s.h.hh.hs..hsh....s.luhs...sEhlGl..p.TG..a........PFG.pYpY.s...s...s.L...G....pl.h.G.VPlhlsluW...hhlshsuahlu.............thhl....t............................t...t......t....h.h.p.hhhs...................uhhhsshDlhlDP.shs......sh.s.aW......h......W....p..s..............s.............G........s............aa....G...lPlpNahGWhlsuh....lh..h.hl..h.p.h..h....ht.h...t....t..h..................h..h................hh....hh.h............................................hh................................................................ 0 43 103 124 +2090 PF04242 DUF424 Protein of unknown function (DUF424) Kerrison ND, Finn RD anon COG2412 Family This is a family of uncharacterised proteins. 25.00 25.00 27.10 34.40 24.90 18.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.59 0.72 -3.94 44 146 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 142 4 72 141 21 91.50 26 84.16 CHANGED apsss..hhluhs-cchhGppapctp..hhlplscuFassphs...t.ppslptLpcs.........slsNlhGpcslshult.Ghhc.ssVhhlsuss+splspl .......pspsp.hlluhCDc-llGcpaccsp..lhlplsct..FYsschs...p.-pshptLpcA.........slsNllGpcsVthAlchGhlc.ssVlhlsuss+AQlhph... 0 19 46 61 +2091 PF04248 DUF427 Domain of unknown function (DUF427) Kerrison ND, Finn RD anon COG2343 Family \N 20.70 20.70 22.80 20.70 20.10 20.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.09 0.72 -4.32 99 1160 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 605 5 492 1041 207 92.70 32 63.77 CHANGED lpVhhsGtllA-opcslhlhE...sshsPsaYlP.pDlph.s....hLp..o.sppohCPaKGpAs.Yas.l.....ssts..tpsAAWsYsp.Phss.sstIpsalAF.as.stl ...............................hplhhsGtllA-..o..p..p...s.lhlhE...sshss.....h.......aY.lP.sD.lp.h.p..........h.Lp.t.o..sp.p..ohCPaKGpAs.Yas.l..hs...sspt..................hpsAAWsY.p.P.hst.....s.....tt.....I....tsalAFass........................................... 0 132 295 403 +2092 PF04250 DUF429 Protein of unknown function (DUF429) Kerrison ND anon COG2410 Family \N 20.20 20.20 20.30 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.54 0.70 -5.07 41 405 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 389 0 180 381 182 209.30 20 83.95 CHANGED GlDhs....+suhlAlh..........ptsphhphthh..s.scllshhtst.....tllulDhPluL...............tss............tshRssDpthR+h..ht.....plF..Psp.shhtp.................................lotpuhplh..............tphthclhEsHPpsshptl.....ssstt.........t...tsh..RhthLtt.h....................th..shtpcDllDAhssAloAth.h.hpGps......hplsst.st...hspts .......................................................GlDhu.....p.shlush..............tssph.h...thth...ht...s......tpllshhpsh...........shlulDuPlsl............st.................................................sutRss-pthpch......ht.........t.ssa.....sspt..shhsp..........................................................h.s.puhp.lt.........h.t....h...t........h...........t..ptstplh..EsaPc.su.h.h.sl.............ttt.t...............ctp....tps.h...t...t....hh....p...hLhphh................................h.....t..th.tp......htts-D....h.lDAhlsAh.sAhh...h.....s.h........h..............t................................................................................................ 0 61 121 150 +2093 PF01861 DUF43 Protein of unknown function DUF43 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes archaebacterial proteins of unknown function. All the members are 350-400 amino acids long. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.51 0.70 -5.39 6 163 2012-10-10 17:06:42 2003-04-07 12:59:11 11 3 152 1 80 301 87 239.00 33 61.06 CHANGED -lLc+FpEIAK-RPcslppYDQGaVTPEoTluRVtLhasRGDLcGK-llVlG.DDDLTulAhuLTshPK+lsVlDIDERLIcFIc+sAcchGls.lEshsaDLRpPLPEchh++FDsFlTDPPETlhul+sFlGRGIusLKGtGsAGYFGlT++EuSlcKWtEIQRhLl.-hGsVITDlIcsFNhY.NWsYhppTRAhphlPlK+......cPEc.WYpSshaRIEsLc....s..+th-EElscsEclYpD-Euos .........................................hhcchpplscsRP..pshtphD.QuhsTsETsltR.sh..l..h..tp+G.DLpGKcll.sl..G..DD.DLsS.......lA.l..u.......L...........o..........s.......h........s.....p........c....lsV..lDID.-RllcaI.pchAc...c...hsl.....s...lcsh..pa...DlRp.sLP.c...chh..sp..FDsFh...TDPP..........Tl....p...Gl.cLF..lu..R.G.l..p.sL.+.s.p...G...s......s.u...Yh....u.hoc.cc..s...o.hpcah..plQ.+hLl.chGl...llp-Il.sFNpY....shs...h...l.tp......hh.......h..h.p...................p..ahh....hhphh........................................................................................................................................ 0 26 46 66 +2094 PF04254 DUF432 Protein of unknown function (DUF432) Kerrison ND, Finn RD anon COG2430 Family Archaeal protein of unknown function. 20.70 20.70 20.80 20.70 20.40 20.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.52 0.71 -4.20 17 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 63 0 53 71 5 120.30 27 51.26 CHANGED hPlstst.thsphlhl+hcpPlllsPtsshphalchPl-luVhhsst..t..thlDhhshs..+tKYsLYGsspsGllsRYhcoplhsc.Pps......pulh+lhlpNpssphspls+lVFshhshphYY ...........Ph.h.t..hsphlhlchpcPlhlsPtsphphalphPl-luVhl.sst..................hlDhh..sls..+.KYsLYGs.s.....s.....sGsls..RYac..Sphhs.....p.Pcs....h...pulhcltlpNssschhpls+llhshhshplYY..... 0 19 33 41 +2095 PF04256 DUF434 Protein of unknown function (DUF434) Kerrison ND anon COG2454 Family \N 20.40 20.40 22.00 21.10 20.30 19.00 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.72 0.72 -4.59 19 102 2009-09-10 18:22:19 2003-04-07 12:59:11 7 1 102 0 64 100 2 57.20 35 25.61 CHANGED tpLpcAhcDlpaLLNRGYs+csuLchVus+YpLstcpRhhLtRslho-c....cltt.h+pKh .....hLpcAhpDlpaLLsRGYsc+suLphVuN+YpLspcpRhhLhRslhScp....plpt.h+pK........ 0 27 40 53 +2096 PF04258 Peptidase_A22B DUF435; Signal peptide peptidase Bateman A, Studholme DJ anon Bateman A Family The members of this family are membrane proteins. In some proteins this region is found associated with Pfam:PF02225. This family corresponds with Merops subfamily A22B, the type example of which is signal peptide peptidase. There is a sequence-similarity relationship with Pfam:PF01080. 20.60 20.60 20.90 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.10 0.70 -4.97 14 885 2012-10-02 13:41:03 2003-04-07 12:59:11 8 15 291 2 585 836 37 274.40 29 63.39 CHANGED pspppstshssptAhhaslsuoshLlhLahhachht..............................................................llhshFslsush.....................hahlhsshhph.p...............phsttphph.....................hphphshtplhshhhslslslh.alh+p..+..WlhpsllGluhslsslphl+LsshKsuslLLssLFhYDIFWVF........uosVMVsVApu.h-s........................PhhLhhPph............ssssaShLGlGDIllPGlhlAhshRaDh.t..................ptppps.YFhsohluYslGLllThluhplhc.pAQPALLYLVPssLhshlllAhh+s-L+phWs .......................................................................................................................................................................................................................................................................t......phs..pA.hh.hslhuushLhh.L.ahh.hp.h............................................................................................................................................................................l.lhh.ha.hlhuhh................................u.hh.h..s.hh..........................................................................................t...tph.h...........................................h.php..h..sh..hpll..shhh.sh.hh..sl...h.....h....l...hpp...p.........W.l.hpshl....Gluhs..l..sh.........l....p.h..l+.L.......s.sh+s....us.lLLss..LFl.Y.D...l..Fa.VF.............sssV.MVpVApu..hcs...........................P..h..hL..h.hPp..............................sstsa.uh....LGlGDIl..l..P..GlhluhshR.a.Dh.h.................................................................................................tt.p.tps..YF..hssh.............lu..YhlGLlhThhs...h..t..lh...p.tu.Q...PALLYLVPssL.hs.hhhAhh...+s-lpthat.................................................................. 0 194 304 452 +2097 PF04260 DUF436 Protein of unknown function (DUF436) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of bacterial proteins with undetermined function. 25.00 25.00 30.10 30.00 21.20 20.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.83 0.71 -4.77 34 978 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 969 3 107 502 4 171.00 54 92.89 CHANGED pphpphlpEllctusLcpGplhVlGCSTSEVhGt+IGpsuSh-lupslhpslhphhpcpGlaLAsQuCEHLNRALVVERpsAcphshEhVoVlPsh.+AGGuhustAacphp-PV.VEpI..pAcAGlDIGDThIGMHlKhVtVPlRsuh+pIGpAHVTshpoRPKLIGGsRAhY ....c.phppllc-lh-..pusLppG..slFVlGsSoSEVlGt+IGpsuShEluEhIhpslhplhc......ppGIpLAhQGCEHlNRALVVERplAp.p.hsh..E..lVoVlPsl.HAGGShtstAFctMpDPV.VEaI..pApA..GlDIGDThIGMHlKHVpVPlRssl+plGpAHVThhsSRPKLIGGsRAcY...... 0 37 64 88 +2098 PF04266 ASCH DUF437; ASCH domain Kerrison ND, Finn RD anon COG2411 Domain The ASCH domain adopts a beta-barrel fold similar to the Pfam:PF01472 domain [1]. It is thought to function as an RNA-binding domain during coactivation, RNA-processing and possibly during prokaryotic translation regulation [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.51 0.72 -3.65 85 2885 2012-10-02 17:37:24 2003-04-07 12:59:11 9 30 1955 15 561 1820 99 108.40 21 70.36 CHANGED hp.........htsp.............htshllpGpKssp......hRhtspsh.....h.uphhll..hp................................spsphhshlclpsV...phh....papcl........spptuh.....-s..............sh..hhp.h.ph.....att............phhlss.p..Fchlt .....................................thhpc.............htshll.sG.pKThT.............h.+..ps..-s..h.p.......h..Gph.hl.l..hp...........................................................................tp.sp.hs...hlclp.slphh.....phspl............stp...hAh.....tEs....................oLt...hp.ph.htch........a.t.................p..hhh.cat................................................................................................................ 0 151 287 429 +2099 PF04282 DUF438 Family of unknown function (DUF438) Kerrison ND anon COG2461 Family \N 25.00 25.00 25.10 36.30 23.60 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.47 0.72 -4.18 27 563 2009-09-11 06:07:37 2003-04-07 12:59:11 8 11 557 0 100 390 1 72.00 48 16.66 CHANGED hLKcllhcLHpGtss--lKccFppllsslsstEIshhEQpLlp.-G.lsscElp+LCDlHAslF+sulpphtp .........lL+-ILhcLHsG..u.osEoVp-cFstpFsGVSAlEIShhE+ELMs...sG.lshEDVhcLCDVHAsLFKsAIcslc.s....................... 0 38 61 76 +2100 PF04283 CheF-arch DUF439; Chemotaxis signal transduction system protein F from archaea Kerrison ND anon COG2469 Family This is a family of proteins that are archaea-specific components of the bacterial-like chemotaxis signal transduction system of archaea. In H. salinarum, the CheF proteins interact with the chemotaxis proteins CheY, CheD and CheC2 as well as the flagella-accessory proteins FlaCE and FlaD, and are essential for any tactic response. CheF probably functions at the interface between the bacterial-like chemotaxis signal transduction system and the archaeal flagellar apparatus. 25.00 25.00 56.20 55.60 24.60 24.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.18 0.70 -5.14 14 80 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 50 0 56 83 0 221.30 22 74.64 CHANGED ptltph.uphh.hspsshc..pspWpcucllLopcRlVlus.tptph....slslsplpDlssp.s.t........hsthpshsulphppc.....shllsssss.......t.ppFtphlFpsllstptVhlpcsAhhGGsV.p-upWE+Gplplsccslphshsstp..pIsl..............sslusl-tcp+plsGcp+.VLplcHscc.spslsoalh.ssp+p.hplLcthlp.pht..........-......phptth- .................hlsch.Gphh..hsptshp..tscWpss+llLopcRlllss.pst+t....slslsplpDlsschs.pt......hhs.hsshsslphtps.....shllsstss........hppFtphlapslLs..tptlhlpaPAlhGGsV.p-spWEcGplplscpslph......shsssphhslsl..............sslssl-hpc+pl....sG.cc+sVLclc..Hhcs..sp..oV....soalt.sspcp.lplLpphlc.phh................c.............t............................... 0 7 33 47 +2101 PF04269 DUF440 Protein of unknown function, DUF440 Mifsud W anon COG3099 Family This family consists of uncharacterised bacterial proteins. 21.50 21.50 23.30 58.30 20.80 17.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.30 0.72 -4.00 9 735 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 728 1 72 234 7 105.90 69 97.44 CHANGED M...p.ho.D-sl-hAYDIFLEhAs-NL-PsDIhLFsLQFE-RGusEhs-suDcWpccVGhpl-s-taAEVhlGLss.Ep-Eh-DlFARhLlSRch-c+hsHllWKc .MDLN.NRLTEDETLEQAYDIFLELAuDNLDPADllLFNLQFEERGGAELFDPAEDWQEHVDFDLN..PDFFAEVVIGLAD..oEcsEINDVFARlLLCREKDHKLCHIlW+E...... 0 4 20 46 +2102 PF04284 DUF441 Protein of unknown function (DUF441) Kerrison ND anon COG2707 Family Predicted to be an integral membrane protein. 22.20 22.20 26.70 26.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.79 0.71 -4.30 35 1132 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1100 0 140 468 0 136.80 52 91.76 CHANGED LhLllLlllGllu+NpSlsIAssVLLll+hhs.lsph.hPhlpp+GlshGlhllTlulLsPIAoGcIshcsLhpuhhShtuhlAlssGllVAhLuucGVsLlssp..PplssuLllGTIlGVuhhpGlsVGPLIAAGIshlll ...............LlLLsLhhLGhlu+NsolslulhVLlll+lTP.Ls..........sa............FPalE+pGlslGIlILTIGVhsPIASGpls.ssLlcSFhs..aKullAIulGlhVuWLuG+GVsLMusQ..PpllsGLLlGTlLGVAL..F+GVPVGPLIAAGlluLll...................... 0 41 78 108 +2103 PF04273 DUF442 Putative phosphatase (DUF442) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain Although this domain is uncharacterised it seems likely that it performs a phosphatase function. 24.60 24.60 24.60 24.90 24.40 24.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.29 0.72 -4.20 5 718 2012-10-02 20:12:17 2003-04-07 12:59:11 8 8 547 8 211 636 313 108.40 33 46.58 CHANGED -hRclo-cLSVSPQlss-DlAshAcpGF+olINNRPDGEEPuQPuNAAlpAAAcAAGLuYsalPVhsGsITs-sVcuFpcAlAsA-GPVLAaCRSGTRulsLYALuQAlc .....................................hplscphslu.s.Ql.sssDl.t.p.l.u.p.tGa+olIsN.R.PDsE.t.s.s.QP.s.h.tp.l.pps.AcptGl.s.a.h.alP.V.s..u..s.p.l.o.p.p.sV.......ppF.t...ph.ls..p...h..t..t...P.VLA.aCRoGs.Ru...ss...Lasltp...t....................... 0 62 127 168 +2104 PF04276 DUF443 Protein of unknown function (DUF443) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of uncharacterised proteins. 26.20 26.20 26.40 26.40 26.10 25.80 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.36 0.71 -4.88 22 1105 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 227 0 32 406 0 179.30 47 93.80 CHANGED Ns+YRIlchssEYhhlDl.ssoahshhFPhlsWllP+chhKIopcEh.EpLphs+ss.........Kspsh.hsshGu...ulLlushlRthhphhslphpphlshhlshlshlhllhhalhls++h+hplas.s....ppscpKlhlhP.ohKphhhhlFsYlhhhuholhslhhhl..pspNlIhalshhhhh.hhhhhlNhhoIss.ppspVhh+ .............................................NPKYRlI+YssEYLMlDl..lS.oWlshFhPhINWhIPK+YsKIS..ccEa..EsL...Nl...VKPs.........KspsF.....WPlsGu......ol.LhulhhR....K..Yhhlh...slpL.-Kp.lVIhlChlshlG...ll..hFalhLN+K.LpLplas.s....+spppKl..l..LlP..ohKsh..shh.lFsY.l..hhGuhShhhl.hLlohs.QNIIlalsW.lhhh.hhF.FhlNhssIhs.Kpl+Vlh................................. 0 13 24 31 +2105 PF04285 DUF444 Protein of unknown function (DUF444) Kerrison ND anon COG2718 Family Bacterial protein of unknown function. One family member (Swiss:Q97LI1) is predicted to contain a von Willebrand factor (vWF) type A domain (Smart:VWA). 20.30 20.30 20.40 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.35 0.70 -5.84 8 1564 2012-10-10 16:07:06 2003-04-07 12:59:11 7 3 1281 0 389 1028 179 338.70 43 98.30 CHANGED MuhFlDRclNs..+cKohssRQRah++h+.EpIKculsDtVsc.cSIsshsutEslpIPtRulcEsph+.Gc.sGcpc+VtsGsc.....c.psGDhIsRssuGsG.tGsGcGpuuuDsEG.......EDtFphplSp-Ehh-lLFEDLpLPNLp++phsplsp.h+sc+AG.hpssGlsuNIshsRTlpsuluRctuhs+upp..hcu..tpl...t...s.tlhct.......tht.hcs+hcRlP.lcs.DLRa+paccpPcPpSpAVhhClMDVSGSMspscK-lA+RFFhlLphFLpp+YEsVElVFIpHHTpA+EVsEc-FFappEoGGTIlSSAL+hhpElIcERYssAcWNIYuhpASDGDNas-DosRClclLppclhthsphauYsEIs....hpsHps.........hhcYcthpsshDs.FthppI+stsDlaPVh+plFp+Eps ................................................................................t.hIDRR.Ns..+sKShsNRQRFl+Rh+.tpIKpuls-tlsc.cSl....hsh....p....s....s..-p.lsIPh..cslsEP...h...F...+p...Gp.sGhp.c.pV.tP..GNs................cahpsDpltR..PtuGu...............u....GsGpGpu.u.t-.GEG.......pD...pF.FplSp-Eah-hhFEDLtLPNLpppp..pp.lsp.h+.s.cRuG.hpssGs..Pu..NIslhRolpsuluRRhAhsts..t.hpth..........tt.l......h.......t....................t.....t................h..t..l..t..l.ct+htp...lP.aIDshDLRa+sapcpPp.Ps.SpAVMFClMDVSGSMsptpK-hAKRFahLLYlFLpR..pYc.s.V-lVaIRHH.TpA.KEV.s...E.c-.FFau.....pEoGGTlVSSAL+LhpEllc...........pR.YssspW.NIYuAQASDGDNW.s.s.Dosh.CtclLtpcllshspaauYlEls.........ptHps...........WhpYpplp.t.t.h.ss..FuhpplcptpDIaPlFR-LFp+p..u.......................................... 0 133 240 315 +2106 PF04286 DUF445 Protein of unknown function (DUF445) Kerrison ND anon COG2733 Family Predicted to be a membrane protein. 33.80 33.80 33.90 33.80 33.60 33.70 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.28 0.70 -4.93 106 2067 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1694 0 414 1545 89 326.60 24 87.11 CHANGED sEAAhVGulADWFAVsALFR+P.....lG.l.IP.....HTulIP+p+-clucsLuphVtsphLsscs...ltp+lpphshsptlspaLt..........ps.....spsptl........................sptss.phltthlctlpcpclpphlcptht...............................................pplpph.hushhuplLp.hht-scpptlhDhll......pphtphlpsscsp..ltphlpchhpph.sthh.....................stplsptlhpth.phlcclpt-sp+.hRpphsptltphlscLtps.th..pch-plKpphlsc.thpphhps...lhpplcphlhpthps.s........hlcpplsphhtthsp.........................................pLtp-..splpppls........................chlpptsttllsp.ttplspllp-slcpa-scclpctIEhtlG+DLQaIRlNGollGGllGlll.aslshll ....................................................h.uuhlGulssWhAlphLF..R...P..........h.h....h.lP........p.s..........ulIP+p+-clucsluphVpcchLssps...lhtt....l........cpt..p....sthltphhp............................p...ptsppl.................................................................stphh.phhpt..h..lph.h...t..s...t..p.....lpp.hlpp..t.hp......................................................................................................................................ptlpp..h.hs.t.h.shh.......ppphp..hh-h...lh..............tphh.t.h.ltp.pp.sp..ltp.l.ph.h.pp..sh......................................tthsphh.pt.......phhs.cl.....pt..c........t..+..h.c.....p.t.hs.p.h.h....hl.pp.Lh.p..............tchc.thKphhh.p.c.....t..h.tphhtp....................lhtp.l..pphl.h.s.thsp.p.................hhpp..ltp.hht.h.hp...........................................................................................................................plhtc......stlttplp....................................................................thlpptstplh.p.........h...l..sphlp-pl..psa.Dscchpc.Ip..hs+-LphIch.GsllGuhIGlh.hhls.h.................................................................................................................................................................................................................................................................................................................................................. 0 158 296 367 +2107 PF04287 DUF446 tRNA pseudouridine synthase C Mifsud W anon COG3098 Family This family is suggested to be the catalytic domain of tRNA pseudouridine synthase C by association. The structure has been solved for one member, as PDB:2HGK, which by inference is designated in this way. 25.00 25.00 25.80 25.70 19.70 19.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.97 0.72 -10.29 0.72 -4.20 56 892 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 888 1 128 441 33 100.00 49 91.89 CHANGED cp.p.ltptLtpLEttL+phsLWpspsPsscAhsSspPFulDThshppWLQalFlPRMptLl-sstPL.PpphulsPhhEcshp..pp...sphptLlshLpplDpLl ..........c.spV+.pLpsLEshLRcpphW..ps...st..Ppsct....FsSspPF.hhDTMcPhEWLQWVhIPRM+sLL-sspPL.PsuFAlAPYaEh..ALs....-+...Pppth..l..L..A.LpcLDsLh...................... 0 20 49 93 +2108 PF04289 DUF447 Protein of unknown function (DUF447) Kerrison ND, Finn RD anon COG2457 Family Archaeal protein of unknown function. 24.80 24.80 25.20 24.80 24.50 24.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.99 0.71 -4.71 45 190 2012-10-02 11:35:36 2003-04-07 12:59:11 7 1 185 12 120 206 54 176.30 24 88.66 CHANGED hEsllTTtssp....NhAPlGllh...cucs....hhl+lFcsS+ThcNltpsshhslslssDshlaspsshsth..p.tht.sh......h....hLcsuhsahthclpph..........pssspthhhphpslctthtp.....sh...NRupsullEusVhsTRl...ph.....hptcclhpclphhttllcKsGGspE..pcAhchlpc ............................................EsllTThs.p.....phAPlGlhh...................pssp.........lhlphacsopThcNltp..pshsslshs.sDshlFstushs....tthp..h......th...........hLpsuhuahph...clpph..........psssp.h.hhphcslctthpp.....htsa...NRApsAVlEusVhsoRLph........hst-clhpcltahtts.lcKsuGtpE..pc.Ahphl..t............................................. 0 30 74 101 +2109 PF04296 DUF448 Protein of unknown function (DUF448) Kerrison ND anon COG2740 Family \N 21.60 21.60 21.60 22.70 21.20 20.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.83 0.72 -4.35 155 2403 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 2395 1 477 1345 326 77.80 31 69.80 CHANGED .RpClsspc.hts+ppLlRlVhss-....u....p.lshD...pt+hsGRGAYl.ssstp..slcpAh.....++..+thsRuh+..ss.............ls..ss..lhct...lpphl .............RpClsspc.h......tsK..+-LlRlV..t..s.p-...G.......................p..lhhD...soGKtsGRGAYl.shc.....p.slppAt.......++.......+shs+uhchp............lscp....hhcpL.t..h.............................. 0 172 342 420 +2110 PF01863 DUF45 Protein of unknown function DUF45 Enright A, Ouzounis C, Bateman A anon Enright A Family This protein has no known function. Members are found in some archaebacteria, as well as Helicobacter pylori. The proteins are 190-240 amino acids long, with the C terminus being the most conserved region, containing three conserved histidines. This motif is similar to that found in Zinc proteases, suggesting that this family may also be proteases. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.57 0.71 -4.53 82 3481 2012-10-03 04:41:15 2003-04-07 12:59:11 12 7 2755 0 820 2754 698 165.60 22 80.80 CHANGED +plplplps.ssl.plssPhthsppplpphlpc.....+tsWltpphtchpp............tthhsspth.h..hGcpapLphhtsp.....................hhh.thp.pthpptlpcah+cpspt.hlppt....ltthspthsl...ph.tphpl+sh+oR.WGSC..ss...psplplshcLlhhP.pll-YVllHELsHLh.chNHutcFWpllsphhPs.acct+phLcpt ..................................................................................................................................................................................................................................................................................................................................................t.h......th.h..t.h.t......h..h......h.....h...............h....t...h...................ht.....l.p...p...pp.p..WGoC....pu......c....s.....pIpl...s..h.t.l..h..t....s.P...hl-Y.....ll...lHELsHLt...c..h....s.........HsctFaplspphh.P.p..a+p.hct.Lp...................................... 1 265 553 699 +2111 PF04313 HSDR_N DUF450; Type I restriction enzyme R protein N terminus (HSDR_N) Kerrison ND, Finn RD, Yeats C anon COG2810 Family This family consists of a number of N terminal regions found in type I restriction enzyme R (HSDR) proteins. Restriction and modification (R/M) systems are found in a wide variety of prokaryotes and are thought to protect the host bacterium from the uptake of foreign DNA [1]. Type I restriction and modification systems are encoded by three genes: hsdR, hsdM, and hsdS. The three polypeptides, HsdR, HsdM, and HsdS, often assemble to give an enzyme (R2M2S1) that modifies hemimethylated DNA and restricts unmethylated DNA [2]. 21.10 20.50 21.10 20.50 21.00 20.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.44 0.71 -4.56 77 3819 2012-10-11 20:44:43 2003-04-07 12:59:11 9 43 2584 8 831 3899 642 175.90 19 18.43 CHANGED thsEpthcp...hl..hl...hGaph.................ht................................................................................................................................................................................+sDlslhlN.......................GlPlsllEhKp..................................t.pt......hppLapYs...........................................thhlhoNGpphthhsthsptpp .......................................................................................................................................E..h.t...h...h.....t......s.at................................................tt....h..t....h.h..t.h.t.p................................................tt.......................t......t..h..h....t....h..h.......t.....h.......................t.......h...h.............hs.h...p..p......t..p................................................................................................Np....h....p..l..spphphp................................sp.ppp+.hDlll.h.l..N.......................G.l.Pls...hlElKp..................s...sl.ppAh.p.......Q..hp.p.Yp.pp...................hppl.a.pah....................................................tlhhh.os.shp.shhhst.t....p.............................................................. 2 293 576 721 +2113 PF04301 DUF452 Protein of unknown function (DUF452) Kerrison ND anon COG2830 Family \N 19.50 19.50 23.70 23.10 18.50 18.10 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.33 0.70 -5.04 2 358 2012-10-03 11:45:05 2003-04-07 12:59:11 8 2 355 0 43 228 1 204.30 37 95.06 CHANGED McTpahspQGspLIlYFAGWGTPPssVpHLILPENaDLhlCYDYpDLphDhDFSAYpHIRlVAWSMGVWsAERshQGh.LhSATAlNGTGLPCDDpaGIPpslFtGTLpsLsEssRhKFERRhCGsKs.hcDYQpashRP.htEIHtELhALashltQDRRTDLIpWopAlVGStDKIF.stNQ+tYWpsRCslpEIsstHhLFs+FTHWpsh .................................p.p.p..u..p.c.LIlhFuGWuoss.shhs...HL..h.......s.....p......s.....+..-l.l.l.s.YDYcsLs.h.......c....F....D........h..s.....u...a...p..c.IpLlAaSMGV.....as.A....sR..l.L..........p..p....l.....p.......h...p..p..t..h....A.....INGTshP.hD.cphGIssuIFctTLcs.hstp...sht+Fc+pht.t-.+p..h.pca.p.ph.s.t.+s.hc-l+pELptL...a.t.h...ht.p..c.....p..p...p..s......h.l.h..W..spsh........luppDcIFPsss..+pha.p..p...h...h......l.....l..-.t.sHahF.+FppWp............................ 0 13 26 37 +2114 PF04303 PrpF DUF453; PrpF protein Kerrison ND, Bateman A anon COG2828 Family PrpF is a protein found in the 2-methylcitrate pathway. It is structurally similar to DAP epimerase and proline racemase. This protein is likely to acts to isomerise trans-aconitate to cis-aconitate [1]. 20.20 20.20 20.80 20.80 19.30 20.10 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.34 0.70 -5.72 6 1338 2012-10-03 03:02:41 2003-04-07 12:59:11 8 10 1077 9 378 1097 393 341.90 45 96.08 CHANGED .l+IPsThMRGGTSKGVFF+hp-LPtss....stRDtlLLcVhGSPDPh..QIDGhGGGsSsTSKVsIlutSSpPstDVDYLFGQVuI-cthVDaoGNCGNlSAAVGPFAIcAGLV.sA+hP...lstVRIapsNhuKhIlAcVPlssGpVp.sG-hplDGVshsuA.ltLsFhDsAssss..GtlFPTGN.lDsL-sst.G.lpsThIssuhPhlhV-A-ulGhsGTELtEElNuD.phLA+hEpLRshuAh+MGhluclc-ts.p.tTPKlAhVusPppYhsSuGthhtus-IDlhVRhhSMt+hH+AhhsTuAVAIuoAsAl.GTlsshhAGust.hssVphuHPSGsLcVtscscp.pt..sh.pAthsRoARhLMEGaVhlP ..................................h+IPsshMRGGTS+GsFhhhpD...L...Pp.st....stRDtlLhtlh...GS..P.......D..sh......QIDGhGGuss.TSKssIlS............+.....................S.s...............c........s..c.....tDV.............DYLFuQVslc.c.sh.VDhosNCGN.h.uu..VGsFAIcsGLl...sup.P....................lspV....RIhpsNh.GphI.AcV..h.........s.....s...G....t..........V....p.......p.....G-.h...clDGVsh.s.AA.VtLpF.lss...Ausps.......Gp......hFPTGN...hl.D.l.......-......s......................................l..psThIssuhPslhlsAps..L......G..h......o.........G.h........E...L.........s...p.ls..uD.tt.hLu....+....hEsIR..htuuhtM......G.lsc......l......s.p.h.s.........hPKhshlus..s...p.....p.uG.........sls..VRh...h....tphH+AhhhTu..AlAIuo.........A....ssl..GT.ls.p.......h...................s.................s.........u.......s...............s.........h..............s....s..................lphtHPSG..sLcVt..hp...s..c...t..tsu........ssh+A..sh.RoARhlhcG.VhlP........................................... 0 82 190 304 +2115 PF04304 DUF454 Protein of unknown function (DUF454) Kerrison ND anon COG2832 Family Predicted membrane protein. 21.60 21.60 21.80 21.80 21.40 21.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.41 0.72 -3.95 112 2157 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 2047 0 323 1155 310 69.50 27 54.83 CHANGED hapWLlsH.haGshlcsWpppculsh+uKhhAlhhhhhshshu.hhhssh.hasphhlhshhh.hlthalhph ...FasWLltpphaGs.al...csapc.p.+.uhshpsKhpullhhhlshulS.....la...hs...sh....hhl.+l...hLhllhh..hlhhahap............... 1 79 164 253 +2116 PF04305 DUF455 Protein of unknown function (DUF455) Kerrison ND anon COG2833 Family \N 25.60 25.60 25.70 27.00 25.00 24.70 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.50 0.70 -5.11 81 772 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 692 0 260 668 146 239.50 38 83.29 CHANGED LtssDspcKsthspphtpta..tp.tph........shh..sth.........s......tPuRPtcPpLlsPpc..l.s+R.phso.cGRhslLHAlAH..IEhsAI-LAhDhlhRF....................................tsh.....Pps.......FapDWlcVAs-EAcHFsLLpp+L.pplGtpYGDhPAHsGLWcsuccTspDlhuRhAlVPhsLEARGLDsoPhhhp+lppsGDp....pusplL-lIhcDEIsH...................VuhGs+WF+alCp..pcsh-.........Phps..ap.pLl..ppahtstl+sPFNtcARtpAGhsps.ht. ...............................................................................pss.ppKsthspphht.thtstp.hth................t......th..........t....hPuRP..t..p..sp.Llt.Ppp...l....+R.phso.cup.ssllHAlAH..IEhNA..IsL.ALDsshRF.........................pshPtp.......FatDWlcVAs-Eu+HFpLLps+..L..ppLG............h..cYGDhP...........uH..su......L...........Wph...sp..pTtpDlhsRhAlVPpsLEARGLDssPhlhtKlppsGDp.....tssslL.-...lIhpDElsH...................VuhGs+W.a+alCp........pp.th.-...........................Ph..th....FppLl...pth.h..h..shh..+s...Ph..NhpARtpAGFspp.hp.s........................................................................ 0 81 174 222 +2117 PF04306 DUF456 Protein of unknown function (DUF456) Kerrison ND anon COG2839 Family This family is a putative membrane protein that contains glycine zipper motifs [1]. 27.60 27.60 27.60 31.00 27.30 27.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.90 0.71 -4.15 85 842 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 833 0 221 624 147 138.90 30 85.95 CHANGED hllPlLPGs.lllasGhllasas.suhs....huhhhlslhsllsl.lshlhDalssshGs++hGuo+huhh....GuslGsllGhFlhs...Ph.......Gll....lGPhlGAhluEl.hppc.........s...h......ppAh+suhGuhlGhlsussh+hslslhhlshFlhsl.h ..............lhPllPGs.hllasGhllatas.....hsht....huhshhlshsllsl.lhhssDalusthts++hGuSKhu.h....uuhlGsllGhFhhP....Ph............Gll....lsPFlus....als....EL...lptp..........sh......ppAh+sulGoll.GhlsuolschhlthhhlhhFhhsh.h..... 0 75 163 208 +2118 PF04307 DUF457 Predicted membrane-bound metal-dependent hydrolase (DUF457) Waterfield DI, Finn RD, Bateman A anon COG1988 Domain Family of predicted membrane-bound metal-dependent hydrolases, based on Swiss:Q97LP7. May act as phospholipases. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.30 0.71 -4.89 181 3256 2012-10-01 21:01:47 2003-04-07 12:59:11 9 8 1920 0 845 2084 420 171.40 23 77.02 CHANGED shoH.h.....hhGhsluhsh.ht................................htshhhhhhusluuhl...PD.lDhh.........thh.sptht.h............................................................................................HR.uh.THSllhhhlhuh......................................................lhhhhhthh........................hhhhhhhhhlGh...hsHllh.Dh..hTs.....h.Gst...lhaPh...................pptphths...hhhhhss........hhth...............................................................................................hhhhhhlshthhhtthh ............................................................................................................................oH.h.....hhuls.huhhh.h.t..h..................................ht.hthhh.sul.lsu.hL.......PD..lDph...............shh..spththl..t...................................................................................................................................H.R...G.....h.TH..S........l.l..hs.hll..sh.....................................................................................lhhhhhhth...................................h...hph.h.h.h.h.h.luh....loHllh.Dh.....hTs........h..G...lt........hLaPh..................................phph.ths.........hhh..h.hss...........hth...........................................................................................hh...............hhhhh....................................................................................................................................................... 0 248 566 724 +2119 PF04308 DUF458 Protein of unknown function (DUF458) Waterfield DI, Finn RD anon COG1978 Family Family of uncharacterised eubacterial proteins. 25.00 25.00 26.50 67.80 22.10 18.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.64 0.71 -4.47 19 224 2012-10-03 01:22:09 2003-04-07 12:59:11 7 1 219 0 67 155 174 148.90 44 90.53 CHANGED ccshpcIhpaltpsspssY+lhlGTDSQst.spTpFVTAIllHR....hGKGA.taaappphp++lpS....LRQ+IahETshSlElA........scltchltttsh...tshslElHlDIGt..pGcT+-LIpElVGhlhG..GapscIKP-SYuASslAD+aTK ...............p..VhpcIpsFlcc.D.PcshY..+LsIGTDSQs+..pc..sTcFlTAIhIHR....lGKGA.thha+pphpc+.to....LREKIahETphS.ElA.........pplh-lLt.hst....sshhhEIHLDIGs.....cGhTK-hIp-hsuhIpu....MG.hpAKIKPDuYAA.ShANRaTK.. 0 42 56 61 +2120 PF04311 DUF459 Protein of unknown function (DUF459) Kerrison ND anon COG2845 Family Putative periplasmic protein. 20.20 20.20 22.20 21.30 18.50 19.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.94 0.70 -5.59 2 172 2012-10-02 11:02:24 2003-04-07 12:59:11 8 2 171 0 55 291 5 282.60 32 67.88 CHANGED Psl.Ip+tps..h.ht.h.Phh.st.QhsIsphhP.hR.shl..sG.h.tstht-s..hs.thu.pPAstlstpssus..hlp-shhs.P.tlt.s...sE+.sssss.hhupsshp.hhsGDsp.p.hotshhp.htpp....thtItpspVs..hhshs.ha+hP+hh.shLshps....hAA..huhthsDh.s.FhDtsGuhssstssh.tpshc.+hcsslNl.ht.ph.hsahhthP.h+.htLst.hshh.tst.t...V.A.hssttshhhs..h..llsssthshuotlhssts.hhts.thsG.+hsIEGt...h.ph.PhhhsDhphs .............................................................t.t.tspsssss............................................t..t.sss...+hhll...hGDhhustlu-G...L..ptsaspsPs...lhl..s.p..ssuso....GhVR...cDhh...sWsttl.......hhtt.....pp.ss.....lll....hlG...uN...D.RQ..shhs...s..s.s..p.......hpsp.ospWpp.p.Yp+Rltths.chl...s...cp...+...hP...l....lWVGhP...sF+sp..th.op..D..h.Lsh.N.pl.YRsuA..p.+s.Gu.pasDlWD.GF....VD..EpG.p..F.s.po.GsDhsGQpsRLRusDGls.hTps....G+RK..LA..aYsE+Pl....pchL.u.h.....s...tp.ht....l..sssp..............ph.s...spsh.hs....ss...L..ts.stc..u..s...Lh.G....s.s.st.........t..h..ps......ctp...........hhhtDh........................................................................................ 0 13 27 35 +2121 PF01864 DUF46 Putative integral membrane protein DUF46 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein has no known function. It contains several predicted transmembrane regions, suggesting it is an integral membrane protein. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.22 0.71 -4.39 4 235 2012-10-02 14:06:56 2003-04-07 12:59:11 12 3 221 0 122 719 285 165.20 30 94.67 CHANGED hhsll..LtslWalLPAYhANsSullhGGGTPlDhGKsahDGRRllGDGhTWRGhhuGlhsGsllGllQhhL............Golh.tlhLuFLLuhGAlhGDhsGSFIKRRLsh-RGpPAslLDQLsFlluALhhu..YP..ltslPh-hIlllhlITshlHhuuNIIAY+LGhK-V.W ..................................................hh.............hhPshhsNsss.h.l....h.......u........u.......tpPlDh.G+ph..........h.DG+..R..l....h...Gs...uKTa+Ghhs....ul....hh.Gs.lhu..h....l...s....h...l.....................................................................s...h.h...........l.hhu..hh..luhu.AhlG......DlhuSFlKRRlsl...p........c....G....t..................s................h...l....DQl.D.h.l.l.u.u....l..lhh......hh...........h..h.s..l..s....h.t...........h..lh.l..ll.ls..shl..HhssNhluYhltlKp..a.......................................... 1 34 75 100 +2122 PF04312 DUF460 Protein of unknown function (DUF460) Kerrison ND, Finn RD anon COG2433 Family Archaeal protein of unknown function. 24.30 24.30 24.50 89.00 23.30 24.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.45 0.71 -4.65 30 115 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 115 0 80 116 4 136.10 41 22.10 CHANGED +sh+utDVpVclcshh+c+lcFhsh.......st+.cchlIVGlDPGhoTGlAllsL-GclltlhSpRshspu-llchlhphG+PllVATDVsPsP-sVcKlupshsAsLYsPpccLol-EKtcLscchu.......phcssHERDALAAA ................p..h+utDVpVclcsh.ccplcFhsh.........tt+.+chlIVGlDPGhTsGlAllDLcGclltlhSpRshspu-llchI.chG+PllVAoDVsPs.PcsVcKlupsFsAhlasPccsLsl-EKpclscchu........hcssHpRDALAAA. 0 21 50 65 +2123 PF04314 DUF461 Protein of unknown function (DUF461) Kerrison ND anon COG2847 Family Putative membrane or periplasmic protein. 22.20 22.20 23.30 22.30 22.00 22.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.19 0.72 -4.40 175 1433 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1205 6 437 1186 538 110.60 30 62.80 CHANGED lpcuasRs...............sss..ss.............................p......suAua.hslpNpuspss.pLluss.os.sAppsElHpphh.c...sGhMpMpplp.slslPAssslpL.....pPG....G.hHlMLhsLpps.l.ptG-plslTLpF..ccu......sp....lplp ..................................................................lpssasRs.....ss.s..st..............................................t......suuua..hslp..N.pu.........s.pss.pLluss..o.......s..susp..sElHphhh..p..........susM.+Mppls...slslPAss.slpL.....cPG.......G..hHlMLhsLcps..l.ptG-plslTL..pF..csu....tplpl....................... 0 102 259 351 +2124 PF04315 DUF462 Protein of unknown function, DUF462 Mifsud W anon COG3101 Family This family consists of bacterial proteins of uncharacterised function. 21.00 21.00 22.20 21.60 20.90 20.00 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.10 0.71 -4.87 44 863 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 858 0 104 403 45 164.50 66 89.42 CHANGED lFNpsFtppaNTpLVpGs-EPlYlPA.......ssp.......sap+IlFAHGaauSALHEIAHWClAGpcRRhL.DaGYWYpPDGRstppQspFEpVElKPQAlEWlhuhAAGhpFpVSsDNLsGs.psD....ppsFpppVtpQlhpal..p....................pG.lPtRAttFhcALppaYpss..ls.ppF.ht .............IFNsCFu-.-FNTRLlKGDDEPIYLPA.......Dsc......VPYpRIVF.AHGFYASAlHEISHWCIAGcsRRcLVDFGYWYCPDGRDAQTQopFEcVEVKPQAl-WLFCVAAGaPFNVSCDNL-GD.hEPD...........RlsFQR+VHAQVhsYL....p....................pG..IPcRPA+FI+ALQsaYcTP.pLsAEpFs.hs........................... 0 22 47 79 +2125 PF04317 DUF463 YcjX-like family, DUF463 Mifsud W anon COG3106 Family These proteins possess a P-loop motif. 25.00 25.00 26.90 26.90 20.10 19.90 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.57 0.70 -5.91 51 1031 2012-10-05 12:31:08 2003-04-07 12:59:11 7 2 996 0 159 638 61 429.50 59 95.04 CHANGED lRLuVTGLSRuGKTsFITuLVspLLcs............u+LPlapusppGRlhuschtPQPD.sVPRFsYEstlsuLhsss..PpWPsSTRslSELRLul+Ypspsultphhss.uTLaLDIVDYPGEWLLDLPLLcpsatpWSppphshhps.tRtplApsaLsthpslD.sutsD..EtphpplAcsaTsYLpss+tpp.GhphlpPGRFLLPG-.LcGuPsLsFhPLsh.s........ppsscsShhshhc+RY-tY+ppVVKPFa+-HFuRhDRQlVLVDsLsALNtG.pAhpDhcpALsplhpuF+hG+s................ohLscLF.uPRIDKlLFAATKADHlpc-QHspLhuLlppL...lpcutppApFpGscscshAlAulRATppuhVppsGcplsslpGp.hts..........scpsslaPG-lPpch................sssshWppts.FpFhpFpPstl......ssstslPHIRLD+sLpFLLGD+L .......LRLAVTGLSRSGKTAFITuhVNQLLshps.........s.......uRLPLhu...As...REpRLLG.....VKRlPQpDhulPRFsYDEuLspLh.....u.....sP..PsWPs..PTRGVSEIRLALRa+os.cu.LLRHh+.-.TuTLYL-IVDYPGEWLLDLPhLs.QDYhoWScQhtuLLpG.p.RuEhuspWhthsc..s....LD..PhAs..AD..EspLAcIAsuaTDYL+pCK...pp.GLHaIQPGRF..VLPGD.hAG.APALQFFPhPclss.t.....thhupAcKposhuML+cRapaYppKVVKuFYKsHFhRFDRQIVLVDCLQPLNuGPpAFsDMRhALoQLMpSF+YGp.R................oLh+RLF..SPhIDKLLFAATKADHVThDQ..HsNhVSLLQQL...lQ..-AWQpAAFEGIsMDCluLASVpATpoGhlc.hs.Gc+lPAl+GsRLsD..........GtslTlYPGEVPuRL.........................Pu..tsFWp..p.Q.G.FpF-uFRPpsh......clDcPLPHIRLDuALEFLIGDKL................................................................ 0 25 67 110 +2126 PF04327 DUF464 Protein of unknown function (DUF464) Kerrison ND anon COG2868 Family \N 20.40 20.40 22.10 22.10 18.80 18.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.51 0.72 -3.78 58 1433 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1398 14 197 712 9 103.00 33 94.18 CHANGED MI+lphp+sp.splhuFploGHAs...........hu......chGpDI.....VCAuVSulshsslNul-plt.phcsphph....psGaLplcl.s.....ppppsQllLcshhluLpsltppYs.ca.....lpl ...................MIpsshp..csc...tG....p....ltuhphsGHAs.....................hu...-a..Gp.Dl.....VCAuVSslshssl.Nulppls...sh...c...s...p...lcht....cuGalpl..cl..sss...................pp......cps...QlllcshhluL.ps.lp..c..p..Ys.-alp................. 0 90 142 168 +2127 PF04325 DUF465 Protein of unknown function (DUF465) Kerrison ND anon COG2841 Family Family members are found in small bacterial proteins, and also in the heavy chains of eukaryotic myosin and kinesin, C terminal of the motor domain (Myosin Pfam:PF00063, Kinesin Pfam:PF00225). Members of this family may form coiled coil structures. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.43 0.72 -4.26 175 1757 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1278 1 425 1015 380 49.40 33 66.23 CHANGED hscLtpcHpp.LDppI..pphppp...s..s.scshplpcLKKcKLpLK.........Dclhplhpp .......htpLhccHsc.LDccI.pphEss...s...s..ss..sh..cl..pc.LKKcKLpLK.........Dclhplhp........................... 0 96 245 335 +2128 PF04328 DUF466 Protein of unknown function (DUF466) Kerrison ND, Eberhardt R anon COG2879 Family Small bacterial protein of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 30.10 25.70 20.80 23.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.34 0.72 -4.14 41 1692 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 1243 0 200 511 13 64.60 52 94.47 CHANGED hhpclpphh+...hhtpsh+hhVGlP-Y-sYVpHM+ppHP-c.PlMohcEFFR-RQcARYuusuu...RC.C ................MFssLupstKYLGQAA+hhlGlPDYDNYV....E.HM+psH..PDp....s......sMoYEEFFRERQ-.ARYGGcGus...RC.C......... 0 32 93 150 +2129 PF04326 AAA_4 DUF467; AAA_div; Divergent AAA domain Kerrison ND anon COG2865 Domain This family is related to the Pfam:PF00004 family, and presumably has the same function (ATP-binding). 25.30 25.30 25.40 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.83 0.71 -3.96 163 2671 2012-10-05 12:31:08 2003-04-07 12:59:11 9 56 1473 5 706 2349 272 120.30 21 27.02 CHANGED Esppl.EaKpshttstp....................lhcslsAFAN..s....t.....G.GhlllGlp..D...........stp...lhGl.......................tpttpttphhpp.ltptlpPt...........lph.php.......hthpsp...............................................................pllhlp......l.puspts.......................http....sphYhRhuspspt.hs ....................................Esppl.EaKpshpp....p.....................lhcslsA..FAN....s.........p.........G..Ghlll.Glc....D.........ssp...........lhGl............................p.p....p.p.th.p.t.h.t....p........h.t.p..php..s...............lt.h....phph...........h..p..h..p.sp...............................................................pl.l.hlp...l.tutpts............................ht.p.............sthahRhssts....t................................................................................................................................................................... 0 253 490 579 +2130 PF04318 DUF468 Protein of unknown function (DUF468) Finn RD anon DOMO_DM06450 Family These conserved ORFs probably are probably not translated into protein [Personal communication, Val Wood]. 25.00 25.00 83.00 82.90 20.10 17.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.11 0.72 -3.42 4 21 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 3 0 12 21 0 79.70 78 74.39 CHANGED hHGTCLSGLYPVPFTHpsHcYPHFsIYISFuGPKYCITALNshlIPLLpHI............LTsphIaTYhNIspKSP.KpPKHKNILlFN.sp .MHGTCLSGLYPVPFTHpuHDYPHFNIYISFGGPKYCITALNTYVIPLLH+I............LTTQFIaTYsNITpKSPlKSPKHKNIL.FNpNT... 0 12 12 12 +2131 PF04320 DUF469 Protein with unknown function (DUF469) Finn RD anon DOMO_DM08606 Family Family of bacteria protein with no known function. 21.90 21.90 23.50 23.00 21.40 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.35 0.72 -3.23 37 852 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 825 0 116 317 11 100.30 61 90.89 CHANGED RRLRKKL+lsEFQELGFplshpaccshst.-phDshlDpFI-.hIEspsLsasG.uG...phtaEGhlssp.chG..psTEEcRtsVctWLcu+s.lpslclo-LhDhWa ..............RRLRKKMHIDEFQELGFSVuW+FsEGTS-.EQIDcTVD-FIsEVIEPNcLAFDG..SG....YLsWEGLIChQ...cIG..KCTEEHpAlV+KWLEtRp.Lc-VcsSELFDlWW...................... 0 12 33 77 +2132 PF01865 PhoU_div DUF47; Protein of unknown function DUF47 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes prokaryotic proteins of unknown function, as well as a protein annotated as the pit accessory protein from Sinorhizobium meliloti Swiss:O30498. However, the function of this protein is also unknown (Pit stands for Phosphate transport). It is probably distantly related to Pfam:PF01895 (personal obs:Yeats C). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.29 0.70 -5.17 9 1815 2012-10-02 11:27:25 2003-04-07 12:59:11 11 7 1654 7 546 1275 457 205.00 24 94.91 CHANGED hhhlFtpss.+plhcHhchlspslhthtchhcuhhcGshcpsEcltc-lsphEccADpl++-lclpltpuhFLPssRsDllcllc..DclhDshEcsAhhlhltc...thPc-hc--hhthhppolcshchltcslctl-p.l-suhp.........llpclcplEccsDtlptclhctlas...psh.sshchhhhhpllcplusluDpuEDsu-clpllhhc ................................................................................h...........t.h.phhpths.p.sh.pth.hp....hh...p.t.h.h...p...t...s......p..c...h...p....ph..t....pplpphEccuDplp+clhp.cLsp......s......F....l...TP.l-...R.....-D....Ih..p.L.ssph......Dclh...Dth....c.c.s...u...t....h.h......h...hp......l.........p....t.h...pp..t....hh..phsp.hlh....c.us.pp.h......p.p.s..l..p.t.L..sp.h.h...p.ss...h.p....................phsh...c..lcplEscsDplhcphh.p.c.LFs..........tp..t..h....-...s..l..pl...l...th.+.cIh.-tlEcls...DpspcVAstlEsllh........................................................................................................ 1 177 348 453 +2136 PF04322 DUF473 Protein of unknown function (DUF473) Waterfield DI, Finn RD anon COG1935 Family Family of uncharacterised Archaeal proteins. 25.00 25.00 103.10 102.80 20.80 18.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.19 0.71 -4.46 17 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 54 0 43 55 1 117.30 33 92.22 CHANGED MchhuLTGIu.psIp-LhpstlRTlEl+SspNlhslpph.psG..DhlFlTssshcDlhsGTpGllApVhphplshp+.h..tps.hhEE+EhhsuRlQLchlGhu+lh.clhppchhpshhV..- .MchluLTGIucpsls-Lh+splRTlEl+SspNlhslppl.psG..DhlFlTssshpDlssGTpGlIuclhplplspp+.h..tps..h-E+EhhsuRlQLchlGhu+lh.cVpppphhpsshV......... 1 8 22 33 +2138 PF04536 TPM DUF477; Repair_PSII; Phosphatase; TLP18.3, Psb32 and MOLO-1 founding proteins of phosphatase Waterfield DI, Finn RD, Bateman A, Eberhardt R anon COG1512 & Pfam-B_18715 (release 10.0) Domain This family has a Rossmann-like fold. It has phosphatase activity [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.21 0.71 -10.28 0.71 -4.24 162 3085 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 1821 5 879 2585 289 124.00 22 42.17 CHANGED hLo..sspppplpptl........................pph.Epposs................plhllhh.sh..s............................................................................................................pshcphAhchapp.ht...ls..pptpss.....GlLlhl......uht..-+..p....hpl..su..........tGlpsh.lscshhpp.llpsh...hsth+pspaspulhsulptlsphl ................................................................................Lssppppplpp.tl.........................p.ph...cpp.s..ss................plhVlh..l.ss.h..ts...........................................................................................pshcpaApclapp.ht......lG......ppp..pss...............GlLlll..........uhs......-R.p................lpIp.s.G..............hGlcsh..l.sD......s...hhsp.I.lpph......hsth.+.....p.s..c...astGlhtulpsltt.......................................................... 1 272 591 767 +2139 PF04334 DUF478 Protein of unknown function (DUF478) Finn RD anon DOMO:DM06402; Family This family contains uncharacterised protein encoded on Trypanosoma kinetoplast minicircles. 25.00 25.00 99.60 96.90 19.10 18.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.32 0.72 -4.12 2 2 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1 0 0 4 0 60.50 64 97.58 CHANGED MGVQh.sYTNPVLFWGlFEVRGTSKGVGVILTRFF...............l.Ih.lhlhhGF.pts.a MGVQh.sYTNPVLFWGlFEVRGTSKGVGVILTRFF...............l.Ih.lhlhhGF.pts.a 0 0 0 0 +2140 PF04336 DUF479 Protein of unknown function, DUF479 Mifsud W anon COG3124 Family This family includes several bacterial proteins of uncharacterised function. 21.40 21.40 21.40 23.10 20.80 19.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.51 0.72 -3.93 55 920 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 913 0 158 494 113 106.50 46 55.17 CHANGED aDHaLApcWppap.sp.P.....LspFsppsYptLpst.....tshLPt+htplhsthhppcWLsuYpchsslppsLpphup.Rhs+ss..Lssuht-lpp.pYppL-psFhsFYPpLhsa ..............................WDHFLuRHWsplos-..P.......LppFlsaAppplhsh.....lPcpPsRFlsL.NsYLWuEpWLs+Yc-h-a.I.ppVLsGMA....s.RRP....R....Lcu....L.ps........SahDL-s.HYssLEscFhpFYPchMs.p.................... 1 33 79 125 +2141 PF01867 Cas_Cas1 DUF48; CRISPR associated protein Cas1 Enright A, Ouzounis C, Bateman A anon Enright A Family Clustered regularly interspaced short palindromic repeats (CRISPRs) are a family of DNA direct repeats found in many prokaryotic genomes. This family of proteins corresponds to Cas1, a CRISPR-associated protein. Cas1 may be involved in linking DNA segments to CRISPR [2]. 20.20 20.20 20.30 20.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.77 0.70 -5.33 140 3143 2009-09-11 00:29:59 2003-04-07 12:59:11 11 9 1958 26 807 2515 107 209.90 20 81.31 CHANGED slalpspGshLphcssslhlpp....................ppc.........................lPlppl..cplhlhGp.lslosthlphhscpsIsltahstp.....G.pahuphhs...........thstsshhph.tQhpthhspptpltlA+phltuKltNttphL.....+htppp...............hpphhpplp.t......h..hpph............p.slsplhGlEGpuuphYapshs.pll................................................p....sapFp....tR......s+RP...P..pD.lNAhLSaGYulLhspshsslhtsGL-PhlGaLH.p...sp.s+.oLuLDlhE.FRPhllDchlhpLl...scphlp.tpcF..t.t...............sshLscpu++phlptapc+ ...................................................h........t.lphtttthhh.p..................................ptt...............hhtlPltpl..t.lhl.st.stlotthhthhsp.sh.lhah.st.t.....s..h..hhth...............ttps.h.hhhp.hph..s.t.t...phtlspphht.ph...............................................................................phtplhsh.Eu.t.hsp.hathh...........................................................tht.ap....t+........p.............ts...h..Nthlshu.s.hlhs.h.ttlhhhGhsshlGhhH.......p.t..uhs.Dlh-.h+s.hss.hs.hthh....t.......................................................h............................................................. 0 307 576 708 +2142 PF04337 DUF480 Protein of unknown function, DUF480 Mifsud W anon COG3132 Family This family consists of several proteins of uncharacterised function. 25.00 25.00 30.30 27.40 24.00 21.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.80 0.71 -4.35 53 923 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 913 1 173 537 52 150.80 62 67.31 CHANGED LoshEuRVlGsLlEKphTTPDpYPLSLNuLssACNQKSsR-PVMsLoEu-VppuLDpLpp+pLlpp..sshGuRVsKYcH+Fsps....LpLsstphAllslLLLRGPQTsGELRoRopRhapFsDlspVEssLppLtp+p....ssLVscLPRpPGc ...LTAhEARVIGCLLEKQVTTPEQYPLSlNullTACNQKTNREPVMNLSEuEVQ-pLDsLl+R+.hlRs..SGFGsRVoKYEpRFCNoEFGDLKLSuAEVALlosLLLRGAQTPGELRoRuuRM.a-...FuDhuEVEusLEpLAsRE.....sGPhVVRLsREPGK...................... 0 32 81 128 +2143 PF04338 DUF481 Protein of unknown function, DUF481 Mifsud W anon COG3137 Family This family includes several proteins of uncharacterised function. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -10.91 0.70 -4.61 121 1242 2012-10-03 17:14:36 2003-04-07 12:59:11 7 4 1038 0 283 927 368 208.30 31 76.69 CHANGED schGht.hsoGNTcosshsuphp.hphcts.pachph.phph.h..............p.............pss.sp....s........oscpahhshphcaph..scphahaupspacpDcFs.................shch.ctshusGhGaphhs.scpt...pLslcsGsuaphp.........chpss.............pspsph....hspsshsa.paplscshphppphph...........h....sssshphps-suLpspl..sss..luhcluhphcasops.ss.ut....cpsDophshsLsYsF .......................................................................................................h.phGah.upoGNTc...os.S.l..suc....ss..hsah.st...pp...tasl..husupp.............s..................sss.sc.........c.............ou-+assuscscapl.......s-hs.......Yl.a..G.pusahs...D+as.....................................................uYc....p.+.ss.l.s.uGhGhQhls..sshp.......shch..E...hGPGhRas........cascs.................................sscsps......luhuussY..ta...p.l...o...-..s.scF..s..pslol..................................h.........u.sp.cT....slsSE.....su....LsssI....scc....huLKluasl.sasopP.Pp..us.........c+oD.ppoolsLsYs......................................................... 0 92 164 226 +2144 PF04339 DUF482 Protein of unknown function, DUF482 Mifsud W anon COG3146 Family This family contains several proteins of uncharacterised function. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.14 0.70 -5.59 99 853 2012-10-02 22:59:21 2003-04-07 12:59:11 7 7 784 0 330 1004 1212 354.00 38 92.18 CHANGED ulspIssspWDul....s...............s...................s.......tsPFlpasFLsALEpSGssss..cTGWtPpHLslp.cs.......s.....pl....................lussPhYlKsHShGEYVFDauWA-AapRsGhcYYPKLlsulPFTPssG.RlLht...st.cpsth.tpsLhpultphspp....pslSShHlhFss..ts.-tth...........hp..ptGh...............hpRhspQFHWpN......pGYpsFDDFLusLsS+KRKsIR+ERcps.tppGlplchLpGs-l..spppW.ctFapFYpsThs++..WG.pPYLo+pFFphlscp.hs-pllLlhA....c............+.sGc..lAuALshh.......u.....u-sLYGRYWGshEc..hshLHFEsCYYQuI-aAItpGLpphEuGAQGEHKluRGahPssTaStHaltcsuhcpAlscaLppE+ttlpthhctls.pth....PF+c ....................................................................................t.lsplstspWsul....ss........................................t.sPFlp+sFLpALEp..SG..ss....ss..poGWtPpHlslhcs.....s........pL....................................................lussP....h...YlKsH.ShG..EYV.FDauWA-A.apRt.G..h..pY..YP...K.Lhs...ulPF...T.Ps..s..Gs.R.lLst.......t...p...t...t.th...tttLh.tulhp...hspp.....ps.l..SSh...H...l.h.....Fss.....t...s.....-..t...th..........h..p.....p.....t..Gh..................................ht.Rhsh...Q...F.HWp.........N........................pG.......Y.....tsFDDFL.s.s.L..su+.KRK.slR+ER+ps...t............t....t.Gl...p.hchl..p.G.scl...sp.t..pW.chFaphYtsTh..tc+....hu...pP..Y...L......s..c.......pFFphlu..p.p...h.....s.....-.p..llLlhA...c........................c..s..G...c...lAuAl.shh.....u........................sssL....Y....GRYW...G......s...h...c..c......h...s...t.....L..H..F...E...s.C..Y.Y...Q......u.........I-aA.ItcGLpp.h.E....u.G.......A....Q....G....E..H.....KlsR..G..ahPsh.T.aShH..als....csuhppAltcaLppEpttl.pthhptl.p.pt..Pa+.................................................................................. 0 81 191 269 +2145 PF04467 DUF483 Protein of unknown function (DUF483) Waterfield DI, Finn RD anon COG1790 Family Family of uncharacterised prokaryotic proteins. 25.00 25.00 39.40 33.20 24.30 24.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.48 0.71 -4.07 4 23 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 23 0 16 23 3 120.50 47 61.63 CHANGED h-hQI.lsc+hpshlRs..tl-shIs...sELGlhR.h.stp.G+LLsYP-CCl+SasEspR.hsh-tcaLtEsuEhs...h.....hGhhhIhhP....SsFIPCSLcCp-AlccshIuhhh+-EFcchh-Lcc.Lh ..........................LchQIEIVcKYpscVRP..AIDPhVS...oELGIYRRLD.DhElG+LLsYP-CCl+SFsEssR..huIDp-HLKElEchchc.........................s..hYAIlLP...............SGFIPCSLcCccAlcptLIuhlsccpac+lLcLEcEL............. 0 4 8 12 +2146 PF04340 DUF484 Protein of unknown function, DUF484 Mifsud W anon COG3159 Family This family consists of several proteins of uncharacterised function. 24.20 24.20 25.30 24.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.30 0.70 -4.88 6 1222 2012-10-02 14:34:25 2003-04-07 12:59:11 7 4 1147 2 271 774 583 214.30 37 92.58 CHANGED ts.pssLsspsVs-YLppHP-FFhcascLltcLslP+psussVSLschQLsRtRp+hccLccclstLhs.AtsN-plFhchhtLphsLhcApSLsDslppl-phs+chhhtshspLlLhsDsth..u.u......lupcshp.sthspLsscpshhGhLphs-thhLas-.-ApplGSsAll.Lu....pt.hGllAFuSpDspHFpsuhGT.FL+alAplLschLc..RWss .....................................ttLss.csVs-YLhcpP-F...Fh+pschlcsl....c...l...P...H....s...s...p...u...sV.SLVEhphtRhRp+lc..hLE-phshLMcpAtsN-sLFh+lhpLptpLh.sA..s.S...Lp-hl........hp.h...pchsR-.lhhs.s...s..sL...+.L...a....s...D......p........hp......h........u...........u.......h...........p.....h...s...l..u....c.........p..s..h...c....s.........l.p....h.....p........+........L......u.....t........p............p........t.Y.....LG......s...L.s....s............s...E...h....h....h...l..........h.........s.........p...A......p....tl................G............SlAhshLs................ssu.s...l..GlllhuScDspHapsu.GT.hLpplAh.hLsclLp..Ra..p............................................................................................................................. 0 52 138 206 +2147 PF04341 DUF485 Protein of unknown function, DUF485 Mifsud W anon COG3162 Family This family includes several putative integral membrane proteins. 28.00 28.00 28.20 28.00 27.80 27.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.91 0.72 -4.27 132 1454 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1255 0 378 871 80 90.20 38 81.72 CHANGED lpssPcFpcLhpcRppFuhsLohhhLlhYauallLlAassshLup.ls.G.slolGlslGluhhlhsallTulYVp+ANppaDtlspplhcc .............lpssscFp-Llc+Rp+FuhhLollhLshYhuFlLL.lAauPshLu.sPlt.G.slThGl.s.lGlGlI.lhoFlLTulYlh...+ANscFDclsppllc................. 0 96 224 312 +2148 PF04342 DUF486 Protein of unknown function, DUF486 Mifsud W anon COG3169 Family This family contains several proteins of uncharacterised function. 25.00 25.00 41.40 41.30 20.00 19.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.55 0.72 -3.99 4 583 2012-10-02 19:55:49 2003-04-07 12:59:11 7 1 574 0 179 409 45 109.10 55 92.52 CHANGED LhosLLLhsSNVFMTFAWYGHLKphss+PhllAsLlSWGIAhFEYLLQVPANRIGassLoVuQLKhhQEVITLhlFVPFSVaYLppPL+LsYLWAuLCllGAVYFhFR ...h.hsllLLlhSNlFMTFAWYGHLK.....h..h.........s......s......p......P.....lh.....hslLlSWGIAhFEYhLQVPANR.I.G.ap....s....a....osu..QLKlhQEVITLsVFssFo.V.a.Yh.p.E.sL+hsaLhAhlhl.luA.VaFlF+........ 0 56 129 152 +2149 PF04343 DUF488 Protein of unknown function, DUF488 Mifsud W anon COG3189 Family This family includes several proteins of uncharacterised function. 22.20 22.20 22.20 22.60 21.50 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.63 0.71 -3.74 219 2338 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 2035 0 577 1579 120 111.60 33 83.11 CHANGED plcchhc...hhpssGh+......lLVD.RhaPR.....G.hsK-slphsl..............Whpc.lusspclRchat.tt....p...hsphhcpYtpclps...........pslpcLtp.hs.ppt....lsLlhut+-.p+.......sHthlLt-h.L .............................................p.hcRlY-...htps-GhR......lLVD.RlWP..R..........G...lpK.pshth..D.....................WhK-.luPSs-L.....Rc.hap..p-s...c.................aspFtcpYttELtp.p...t....................pthccLts.ls...p...pp.......slsLLauu.+.c..ppp.......NHAhVLt-hL................................... 0 172 365 488 +2150 PF04356 DUF489 Protein of unknown function (DUF489) Kerrison ND anon COG2915 Family Protein of unknown function, cotranscribed with purB in Escherichia coli, but with function unrelated to purine biosynthesis [1]. 22.90 22.90 24.60 38.60 22.80 22.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.09 0.71 -4.71 56 1032 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1015 2 152 528 101 191.80 55 92.10 CHANGED -pslALAGlhQAupLVpplA+pGps.-pss.....hpsslpSllppsPs.....sshsVYGspp....sL+hGlcsLhstLs.......tspspss-lsR.....YhlullsLE+KLs+ssshhspLupRIsplpcQhpHF..sh............tc-slluslAulYsDsISsLusRIQVpGsPphLQpststp+IRALLLAGIRuAVLWRQlGGpRhpLlFuR+pllpp .......DITLALAGICQuA+LVQQLA+pGcC.Ds.DA.....LcsSLsSllshNPu......STLuVFGGs-...tNL+lGLETLlusLs..........ssppuhsuELTR.........YsLSLMsLERKLous+sAlssLGsRIstLpRQL-HF....DL............ps-slhSuhAuIYlDVISPLGPRIQVTGoPulLQsPpVQuKVRAsLLAGIRuAVLW+QVGGuRhQLhFSRp+Lhs.p.... 0 30 66 113 +2151 PF04357 DUF490 Family of unknown function (DUF490) Kerrison ND anon COG2911 Family \N 20.40 20.40 20.60 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.40 0.70 -5.36 151 2461 2012-10-03 05:41:17 2003-04-07 12:59:11 8 21 2009 0 647 2270 560 348.80 22 27.68 CHANGED pp..htst....sssGplplsG.....slsh..................tthp......hsLsl.ph.pp..htlhpssthp.......sp.l.sus...lplsG..sh......shplsGp..l.plspuplpl.phs..ssts.s...........................shhhhttt...............................t..h.h.h...thslplph.....ss..........plhl...............cu.u...l..csphs.G.sLpl..........sts.....ss.hthsGplplh.c.Gp.hph.....h.upphpl.p.pGplsas.G.sh....pPhLs.lpAhpps...........................ss...........pls.lplsG...ssspPp........lph.....................p.S...pP.................s.h......................sps-lluhLl.hGp....shsshu.............ttsthssuhuslhts...sshtshh.sp.........ltpt.......hGl...s.slsl.......sstt.........t.......................ttshslplGphl.ssclhlsht..hsh.ss.t...........tphp......lcYpls...pphplcups..s................tt......tlslhaphca ...................................................................................................hht........stpGpl.pl.sG.shsh........................................tthpshp.....hp.l.sh...pu...sp...........hp..l...hs...sshhp..............hp..l..sss...........ls.l.pu......ssp......thslsG.p....l...sl...stup....l.s.l.phs...ssssss..........................-l.shlsps......................................................................................................pt.......ts.hsl...phslp.lp.l....ss..............slpl...........................suhG............l..cupLs..G..s....Lpl..............sts...........ps...hthsGplsl....c..Gp..ach..................h.Gpclhl..c...cG...p..lh.....Fs...G.s.............pPhLs.lpAhpps....................................sl........................hss.l.plsG..........ss..pPp.........hpl................................................................h..S..pP..........................s.h........................spppsLohLl.hGp..........shssss..............p.ssu.hsshh.h.u....s...sphh.sp.....lsps...........hGl...s..slsl.................so.ps......................................sssspls.lut.hl.tspl.l......phs..huhhss.s..............................phs..........lcYpL.....plhlpuhs..s.....................sp....ulsl.hYphca................................................................................................................................................................... 0 213 426 555 +2154 PF04361 DUF494 Protein of unknown function (DUF494) Kerrison ND anon COG2922 Family Members of this family of uncharacterised proteins are often named Smg. 20.60 20.60 20.60 22.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.91 0.71 -4.55 48 950 2009-09-11 06:38:14 2003-04-07 12:59:11 8 1 940 0 158 376 87 154.50 57 99.23 CHANGED MFDVLhYLFEsYhcs...-.-hpsDp-pLsc-LtcAGFcppEItcALsWL-sLushp......................tshhtsssssoh.RIYospEhp+LsscsRGFlhFLEphsVLsspoREhVIDRlMuL-ssplsL--LKWllLMVLFNhPGpEsAathhE-Lla-pps...thlH ..............MFDVLMYLFETYIHs..........EAELpVD..QDcLpp-Lo-AGF-+EDIYNALhW.LEcLA-hQc..................s.scPht..hso...D.PhSh..RIYTsEEs-RLDssCRGFLLFLEQIpVLshETRE.MVI-RVhAL...Ds...sEF...-L-DLKWVILMVLFNl....PGsENAYpQMEELLF-ss-.GhLH............................... 2 38 88 123 +2155 PF04362 Iron_traffic DUF495; Bacterial Fe(2+) trafficking Kerrison ND, Mistry J anon COG2924 Family This is a family of bacterial Fe(2+) trafficking proteins. 20.90 20.90 24.90 24.90 19.20 18.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.95 0.72 -4.55 65 1349 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 1339 3 260 617 383 87.20 59 96.06 CHANGED Mu..RsVhCtKLp+EuEGLDhsPaPG-LGc+Ia-sVSKcAWppWhc+QTMLINEp+LshhDscsRcaLpppM-+FhFsps.sspspGYsPP ...........................MoRTlFCshLp+EAEG.DFQ.YPGE.LGKRIaspISK-AWupWp++QTMLINEK+LNMMssEcRKhLEpEMssFLFEGc.-..s+lEGYsP................ 0 61 135 203 +2156 PF04363 DUF496 Protein of unknown function (DUF496) Kerrison ND anon COG2926 Family \N 21.50 21.50 22.30 23.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.10 0.72 -4.17 13 780 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 775 0 75 202 1 94.80 77 85.78 CHANGED hpsVLEhVRhhRRKNKLpREIsDN-+KIRDNpKRVhLL-NLh-YI+ssMShEEIpsIIcNM+sDYEDRVDDYIIKsAELSKERRElS+KlKphKc ...................FQDVLEFVRLFRRKNKLQREIpDsEKKIRDNQKRVLL.LDNLSDYIKP..GMSVEAIQGIIASMKuDYEDRVDDYIIKNA.ELSKERRDISKKLKAM..GE.. 0 6 23 50 +2157 PF04365 DUF497 Protein of unknown function (DUF497) Kerrison ND anon COG2929 Family \N 23.70 23.70 23.70 23.90 23.60 23.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.80 0.72 -4.06 96 916 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 614 1 261 806 130 76.80 33 83.44 CHANGED WDps.KsptNhpKH.GlsFt.-AttlF..cshtlhh.................Dtccs..sEpRa.hslGh...ss+llhVlaThR....ts......tIRIISAR+As++Ec ....................................WDts.KsppNht..K....H.GlsFp.-Attl.F..ss..tlph................pDtpp....sEtRa.hslG.....hh.........st.p..llhV.saT.R........sp........hlR.lISs...R+Asc+E........................................ 0 68 179 219 +2158 PF04430 DUF498 Protein of unknown function (DUF498/DUF598) Waterfield DI, Finn RD anon COG1504 Domain This is a large family of uncharacterised proteins found in all domains of life. The structure shows a novel fold with three beta sheets. A dimeric form is found in the crystal structure. It was suggested that the cleft in between the two monomers might bing nucleic acid [1]. 20.90 20.90 20.90 22.80 20.70 20.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.42 0.72 -4.51 101 971 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 868 12 443 853 223 109.00 28 70.39 CHANGED lpuY..usGthpl.s....shp..........apuulllhPsuh..hsW..s..............stphppls.......hcchshlhsh...ps-llllGTGtc..hthlssplhptl.pptG..lslEsMsTssAsRTYNlLhuEs..RpVAAALl .................................................................lpuYussthpl.s.....shp..............hps..s..lll.hs...suh..hsW..p.................................................sps.ppls........................psh.shlhth......p..s....-llllGT.Gtp...hth..l.s.sp....lhthl....ppt..G..IulEsM.......sTssAs+TYNlLhuEs..RpVuAALl.......................... 0 132 244 338 +2159 PF00674 DUP DUP family Bateman A anon Pfam-B_99 (release 2.1) Family This family consists of several yeast proteins of unknown functions. Swiss-prot annotates these as belonging to the DUP family. Several members of this family contain an internal duplication of this region. 21.10 21.10 22.30 22.00 20.20 20.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.64 0.72 -4.09 32 469 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 27 0 183 427 0 103.60 29 52.60 CHANGED hlhhts.....hhhss.h.hhllhshhhhhhhhhsF.sht....hhsp-achphLhElIsc+Pu.tuc-WcpIAhpMNpYLF-cclWpT.YaFassp-CppaFpphltthhstKps ..........................................................hh............h.h.hhhhhh.hhhhs...h..h.h...hh..lhsh.php.......hsp.ph...hphlp-llpppPu..sspsW-tIA...tphNpYLa.-p.+hWpo.t.h.aFasutcCpphFpphlhp.hs.p..p................ 1 36 47 123 +2160 PF04465 DUF499 Protein of unknown function (DUF499) Waterfield DI, Finn RD anon COG1483 Family Family of uncharacterised hypothetical prokaryotic proteins. 21.10 21.10 25.60 24.10 19.70 20.90 hmmbuild -o /dev/null HMM SEED 1036 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.71 0.70 -13.89 0.70 -7.00 5 220 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 188 0 107 235 48 429.10 21 44.17 CHANGED PcALcDcEV....L+sYcs-KREplcElu-hIcsl..ulclVlVYGKss..lPpPs+PL-hsst+lKTlWGYIAapLG+YElVEcsD+NLTsPshElL+-LlpGc+VLlLIDEIuDYlDslppSusEE-RsYucNVusFLD+LApALos...SpSlMVITIPMEcEGts..lKspcEYcR..-lVRulh-AVoRVuGtclYoPV+h.GRcsELlEVLKKRIFK+lDcsE+c+sLp+L......REshus.+EIFG-sSpFhcEIccTYPFHPEYIpVLRsIIERls.LQRTRDLIRI...TRIVlRc.LlcuhE....sPsLIMPaHIDLs--cI..+GlLFucpocYuDYtoIa-s-lIs-EKl...Kchu+PELu+IILoYIFL+TYPaDS.....................Ppshs-FPTsccIARuVYEPEhFcpppWlPsDIKDslEEI+uSl+FlaLs...++Dus..aWFWRlANVochVcSKsEELIEos+G-VhspLVKp.lc+Ll+EuKSlcpuRu+tsps......caVsFFcspsVIVoK..-PQElhDos-YKLhVlVpDDV-EDhLc+lIFhhsousRTY+NTVVVlYlusc.uM..........................-oLlEhpAplhAC-cVM-cI+EhY.........uKYGKDVlcIQ+ohVc-I+c+ALED.LENQIlpsFR+VAYPctDG....l-pspAsASSKSVlENVYSALsS+..GKIV.-EFDFEpLtshLc-.Vslcl.+sEGYsVSELhNlIpS..NPcLPMIcpccLh-AIK-AVccL-IG.IERcG+IaFK+IY..KEIPch.--E+GcsPusVR..........cDVILPt.....-EALpRQlpELLc-EK--It-+sGp+Ycl+h..............WYE..IYLPppNhplPL+slV...scEscV+DE..ptlhaGaIVE++EEcpIl+GEF-..LpVucupIcGKPG-sV-lcV+lpPlGDtchsVELsosaG-LsoccVpL.EGpsl+lpW.phplsc.+c.shIcuKS.-s+ttphElllIPKlEp-IlEVcElcEpHKGslLlSI+..Slcs....lDoL-pIsc.aEGs........lsGSLEl.....EcPlWcVpF..cssDhcVFcaIluElc-aLGSpsplc..V-lchSEElhINDLlhEKL+PLsG+VKF+lKKG-.pc .........................................hhh.shs.......................................................................................h...pT.lWG-lAhQL.......G...........t.............ap...h....l...tp.t.Dt.p.t.hs...P.G.tphlpc.Lh.....p....t.....p.P..sLILlDElssYhc......phh.tst..s.h.s..sss.htpph.sF.lp....s.L..o.c.A.sps.....s.p.s.h.lV.l.o.ls.........s...s.................p...t..t..............................tt.....t...h....t....p.........................................thl.ppL....pp....hhs.Rl.t....t..h.p...Ps.........stp.p..hacIlR+R...LF...p..p.......l.......s.t....p....t......t.......p...p....sh....pph...................tphh.p.p............p.............h....s.....p.........p...s.....p....a.........tcc.lpp.......uYPhHPc.lh-hL.h.c..h...ssh....s.FQRTRGlLRl...hutll+p.La.pp.tp...............ts.LIhstp....lsl.ss....pl..p.......h.h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ttttt.................................................................................................................................................................................................................................................... 0 43 78 94 +2161 PF04366 DUF500 Family of unknown function (DUF500) Kerrison ND anon COG2930 Family Proteins in this family often also contain an SH3 domain (Pfam:PF00018), or a FYVE zinc finger (Pfam:PF01363). 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.40 0.71 -4.69 74 1081 2009-01-15 18:05:59 2003-04-07 12:59:11 7 22 629 0 603 1018 204 114.40 29 34.00 CHANGED hsG.uuhGh.QlGs-hoDhVhllpspculcshhp.uphoLGussSlAAGPlGRsupussssth....u....................................................................slaoYS+o+...............GlFAGloL-Guslhpcp-s..NpcaYG..........plsspcILtGps.s..........s....ss..uptL...hpsLpph ..........................h.susuhGh.QhGsp.tchVhlhpspcAl..c..sF...pp...ssho.lGuss..........o..........lA..s.......G.s.lG.t..sup.s.ss..s.sst.....u...............................................................................slh.s.a.s.p.o.+...............GLauGlol-Gohlh.p..tt...s.thYt...........h.....l.h...t...............................................h................................................................................ 0 172 359 505 +2162 PF04417 DUF501 Protein of unknown function (DUF501) Waterfield DI, Finn RD anon COG1507 Family Family of uncharacterised bacterial proteins. 22.60 22.60 23.20 23.10 21.70 22.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.73 0.71 -4.33 43 550 2009-09-11 10:23:03 2003-04-07 12:59:11 7 4 523 0 173 424 260 123.50 47 54.26 CHANGED VutRCssGhPsV.lpotP.LsDG........................pPFPTlaaLTsPplsttlS+LEupGhh+chpccLspDt-LtstatpAHcpYltcRpplh........................h.tlssh.usGGhss......+....VKCLHshlActLAs..ssNPlGchslsh......l ..................................................luhRCs.sGpPsV.ltTtP+LssG......................................................oPFP.ThYYLTsPthsuthS+LE..us.G.lM+cMscRL.....up..D....t..E....LAstYcpAHEsYlAcRstlt......................................................................shh.s.sh.SuGGM..Ps...............R..........VKCLHsLlAHu.......LAt...GPGlNPhGDEsls..l......................................... 0 68 127 162 +2163 PF04367 DUF502 Protein of unknown function (DUF502) Kerrison ND anon COG2928 Family Predicted to be an integral membrane protein. 20.80 20.80 21.20 21.00 20.20 20.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.52 0.72 -4.28 140 1039 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 869 0 414 929 1403 106.80 31 47.23 CHANGED lllsllh...l.hllGhlspshlG+tllphhEp.llpRlPll+slYsul....Kplh-slhssppps.Fpp....sVLlcaP..pp.GhasluFlTspstsclttth............chluValPToPNPToG ..................llslhh.l.hllGhhs.p...s.h..l..G..ppll.phh-t.llpR.l.PlV+sl.Ysul....K.Q.........lscslhsspsss...F+c.................sVLlc.a.P......cp.GhasluFlTupss.sphttthtp..........-hluValPToPNPToG........... 0 128 287 356 +2164 PF04456 DUF503 Protein of unknown function (DUF503) Waterfield DI, Finn RD anon COG1550 Family Family of hypothetical bacterial proteins. 21.20 21.20 23.70 26.50 20.80 20.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.49 0.72 -4.39 79 654 2009-09-13 11:37:40 2003-04-07 12:59:11 7 2 651 1 239 472 24 88.80 34 91.95 CHANGED hlus.lplclhl.ssspSLKEKRtll+slls+l+p+FNVSlAElsppDhapcutlGlusVusspspscphlppl.ch...l.-pps.-h-lhshchE ......hluhlphclhl.s.-s+SLKEKRull+slls+l+p+F.N.VSlAElspp.DlapRstIGlusVSuspspscchlsplhca...l...-uts.EhElhpsch................ 0 109 193 227 +2165 PF04457 DUF504 Protein of unknown function (DUF504) Waterfield DI, Finn RD anon COG1531 Family Family of uncharacterised proteins. 22.20 22.20 22.20 22.50 21.90 21.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.09 0.72 -3.81 36 171 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 169 0 131 171 3 53.70 33 12.31 CHANGED hlsYhcR..s.s......sh.pplshspltphpsthlhh........-shIPhHRIlcI..hpcGcllWcR ................lsY.DR...ht..........sh.cclshsphcptpsshh...............-phIPhHRIlhlh..+p.....cG..cllWcR.... 0 40 72 106 +2166 PF04458 DUF505 Protein of unknown function (DUF505) Waterfield DI, Finn RD anon COG1542 Family Family of uncharacterised prokaryotic proteins. 28.50 28.50 28.90 101.90 25.40 28.40 hmmbuild -o /dev/null HMM SEED 591 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.88 0.70 -6.39 5 33 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 29 0 22 36 0 543.50 33 98.34 CHANGED MlLpKRHAElL+clKEscppsEhctKlsE-Fcpcsl.....hELEltGLs+LcGsscacLT-AG+phsphLtEhlDKhsLEc.P-pW.tp.RWlGSEIIpMl+hscLs.G+VPEcWtctLcERGLA-E.........ssLTcaGKslLElY+EoHP+LYlTsElAuaIR+MPcGPGshcphlpYKsTlshGDNIlpALEAMRLLlISPPs-sG+sYALTuLGptVcpALchsPsss+sLILcclhEDFhsh.....pcLc-uEcsscLEcuGasscGsLTchGE+lh-sYKhhGKs-ahslPsasLE.DEl+lLKsI-+LhKKaEcNP-llPThKEIcR.h.....................................................EahRsDlD.lsssL+sLES+-LIc+cls+cG....KssYhLTEaGE+VL-s.....lusVoA-GVKAIThocu.F-uPssEWlccA+-EGLV+sGAlT-KGRhYu+LS+cIKRKPaLT+aEhplLtKlP+KuhIcR-cLlEhlpDcVGuEE+cIlcALucLEAKGFV-EL.NssVpLTEAG-clKTAIcsupp.Ellcl+aPVTPssaRlLKAIYDptcpas+tcKl+tpPKsaKt.....lcKcLuLosEphKKsLsLLRplGFlGppuLTEAGlsLLcAa- .MllpKcHh..hLpclhppcp.tchptthhEchpp.csl......ELplhGLlch...tss.tphpLTauG+thh.hLt-.....................hhcch.................lp....p.P-ta.ttaRaluSElIuMlchutcs.uc.l..st..h.hc.tLcERGhAc-..........ssLophucslh-lY+.ps+P.clhlss-lApalRcMP.GPup.......pshh.shssp.hthLEAMRLlsaSlP..pu-sauhTsLGptlppAlphh.shstc.sll.c.chh...tDh.t.......ptlp..ch..cpph.hphhhh.stpGplp.hGcthh-sY+lhpcts.hsh.saslp.pEhclLcsIcclh.c.+.h-psP-.hPT.cp.Ic+.h.....................................................pahcpsh-.lptsLasLEuFsLlcpc.pcp......+.sYhlTpaGccVl-t.t..thpslousuVKulThscp.hssPshEWhppAccEsllusutsTppG+hYtcLuhphcRhPhlT+hEhplLppl.Pp+t.h.ts.hhc...shsht-Eccl.....ALscLEA+GhlchL.sshlhhTEhGchhKcAluus.p....shthPlsPhhhRlLpAltchGshas+tpKlRh.PcshKc.....hh.Kh.uLs.-phccsl.lhR.stalGcpulsEAGlhLLcAh.............. 0 11 13 18 +2167 PF04368 DUF507 Protein of unknown function (DUF507) Kerrison ND anon COG2952 Family Bacterial protein of unknown function. 23.00 23.00 25.60 25.40 22.60 21.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.17 0.71 -4.65 20 269 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 241 0 73 139 10 164.00 43 99.34 CHANGED MRL+ht+lPYIApKIshDLhssualcl.spsl-sltpphpcILcEslpcEctL-E+s+ElLEcpp-E..h-hhplDc+phFahlK++LAs-hshhLs...........hEDRasploHpIL-tLhcpchIc.asVs-NplKNlIasuIcsalKta-cIEctVhcKIcsYK++llsGo-Ea-llacKLYEEEL+KRGhl ..........MRlKLsHIsaIupKlhhDlhpSshlEl.KsphEpLp.pIhplLEc-lhpEctLDEcs+ELLEpppDE..hE..h..MphDc+phFWhlK++LA.-hslhLs............EDRtNcLuHpIL-cLl-pDhIp.Fh.......VSENpl+..NlIapSI-sYlKh.YEclEsEVaEKIpsYKpK.lsGS-EY-LlFEKLYpEELRKKGh..... 0 28 58 71 +2168 PF04370 DUF508 Domain of unknown function (DUF508) Finn RD anon DOMO_DM06371 Family This is a family of uncharacterised proteins from C. elegans. 25.00 25.00 33.70 33.70 19.40 19.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.95 0.71 -4.60 2 11 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 5 0 11 9 0 141.30 44 57.13 CHANGED pPSthoTcsspps.spssTSuoShssuSpppSlPo....Ta.ssapphst+hVTMVpVKFlLlHpDshpRRsQShFTDEF.SDCRLE-VllNFpQLCsRQLhDtphpPRLSYCIGElshpsSKPV.usDhuKTLsQLAsopslhQhulIVD....Nhc. .....................................psssSss..S..ouS.s...Ssh..hSlPph....Ta..u.s.ph..p.p..ou..KhVhMVaVKFlLLHhDlh+RpsQSsFsDEFsSDChLpDVlhNFpQLCsRQL+s..p..chpPRluYCIGEls.hNSKPVhppDLtKoLAQLAsopolhQFuLIsD........st.. 0 1 4 11 +2170 PF01871 AMMECR1 DUF51; AMMECR1 Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family This family consists of several AMMECR1 as well as several uncharacterised proteins. The contiguous gene deletion syndrome AMME is characterised by Alport syndrome, midface hypoplasia, mental retardation and elliptocytosis and is caused by a deletion in Xq22.3, comprising several genes including COL4A5, FACL4 and AMMECR1 [1]. This family contains sequences from several eukaryotic species as well as archaebacteria and it has been suggested that the AMMECR1 protein may have a basic cellular function, potentially in either the transcription, replication, repair or translation machinery [2]. 25.00 25.00 28.30 28.10 24.60 24.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.00 0.71 -5.13 76 937 2009-09-11 09:06:12 2003-04-07 12:59:11 12 8 800 7 555 903 86 170.30 30 62.01 CHANGED llplARpslpptLpstp...............hssthpp.ptusFVTl.cp..........ps...pLRGCIGthps..........tsLhcsltcsAlsAAhcDPRFs..Plptc.ELsp..lplEVolLos.c.ls.......h..cplc..lG+aGlllct......G.hhpGlhLPQVssEpsW-tppFLsphsh.KAGls...sssW.ppssclhpFpupl..ap....E .......................................h.hthshpsl.thl.pttp................t.h.s.t.th......pp...ptulFVTl.cp...................ctpLRGCIGohps..........tsLt.psltchAls.u...........Ah.cDs......RFs..Plptc.E.Lsp..LphsVol.Losh.E.hs..............................................sh.tchc.....lGpaGlhlch...............................s.tpsus.hLPpV.hEp.sas.p.p.p.h.l.s.plhp.KAGht.......s....p.pshclh+apsphh............................................. 0 209 357 469 +2173 PF04459 DUF512 Protein of unknown function (DUF512) Waterfield DI, Finn RD anon COG1625 Family Family of uncharacterised prokaryotic proteins. 24.60 24.60 24.70 25.60 24.30 24.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.17 0.71 -5.25 60 441 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 416 0 174 431 193 194.80 30 44.54 CHANGED VPVGlT+aRt..sh.tLpPlstcpAtcsIsplcphQcchppchGstFsahuDEaYllAGpslPsht.YpshPQl-NGVGhl+.Fhcchtcthpp.ls.......pppcholVTGphstphlpplhccL.sthpGlplplhsltspaaGpsloVsGLLTGpDllptLpt.....p-lG..-tlllPslML+p.......spthFLDDholc-ltpp...Lssslhllcussc .....................................VPlGlT+aRp......tlhtLpshspcp.A..tcslpplcphQ..........cchhp.ch..GppahahuDEaY.......llA.....s.....p.lPs.phY-s.asQlENGVGhlRhhhpp.hppshpp..l.......................httppphollTGphuhthl.pphhcpl...p.....th...sl...p.lplhslpNcaFG.pslTVuGLlTGpDllppLcs..............................pplG....-tlllPssML+p.................scp..lFLDDhol.c-lppp...L.t.hplhlsp....t...................... 0 71 129 159 +2174 PF04375 HemX DUF513; HemX Kerrison ND, Moxon SJ anon COG2959 Family This family consists of several bacterial HemX proteins. The hemX gene is not essential for haem synthesis in B. subtilis. HemX is a polytopic membrane protein which by an unknown mechanism down-regulates the level of HemA [1]. 29.30 29.30 29.50 29.30 29.20 29.20 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.09 0.70 -5.87 5 1205 2012-10-02 17:03:51 2003-04-07 12:59:11 9 3 1165 0 207 918 162 356.10 38 87.20 CHANGED uusPp-sAcpssssSoPsAscsss+susussLAulALLlA.LGLGAGGaha............uLQQlschps+s-ALApclpQlssAp-s........spcpplpclp+ssp-thcQLc+pLushs+plsplQppVuslpuu......sssDWLLAEADaLL+LAuRKLhL-pDVsTAsuLLcoADspLAshNDPulpAlR+AlAsDIssL+AlPpVDpsGlhlRLssLpNQIDsLsLlssshc............tuspsusupAlo-ulu..............-WppNLp+Sl+uFhssFIsIRpRDsoshPLLAPsQchYLRENI+L+LpsAthAVhscQ-ElYRQAL-sAuoWlcuYFDT-sssspuFhtplDpLAcQslpV-lPspLQApssLpcllscRlpslph...ulutsAscu ........................................................................ppstpt.s..psss.p.s....s..s.s......p.t...t.......p....p....p.....s....s...u.....h.....h....L....u.....s...l.Al.s....l..A....LA.h.G...s....G..h.ah.a..............................................s.p.Q.Q.s..s.p.......s....s.......p...s.sA...L...ss...QL.s..sh.p..pupp................................sppscL..c.s.h...h...p...p....p...s..s...p...l.c...p....s..pp....p............p....s...h...s...cplscl...Q...pp.l.uslsup..................sscsWLL..ApADaLl+LAuRKLa.-pDVsTAsuLLcoADupLAch..NDPuLlslR+AIssDIusLpul..s..plDhsGllL+LspLpsQlDs..Lt...L..u.s.s.s..s.s.........................................................................tu.s...h..s..s.c....u.pp..l.S.uu..lu..........................-W...p.........N...L...p+...Sh...p...s.......F...hcsF...IsIRRR...Ds.s.s...h.PLLA.P....sQchYLRENIR.RLLsAtpAV.++QpEsY+puL-ssusWl+uYaDsc.cs.sT+uFLccl-..pL.upQsI..s.h...-.l..P.-...pL.p.S.p.shLcclhppRhpph...............stst....................................................... 0 34 94 156 +2176 PF04415 DUF515 Protein of unknown function (DUF515) Waterfield DI, Finn RD anon COG1627 Family Family of hypothetical Archaeal proteins. 25.30 25.30 25.30 49.70 23.90 25.20 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.27 0.70 -6.23 6 64 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 39 0 46 66 5 243.40 20 93.40 CHANGED ccpsP..................................KspPhKpPpRRss...................Rluo.+p+......................EcRpRlIIGA.VlslIIlhhshulYhahpspthpE...LENsKssKIuElNphFtu.chuNDsp.......KhthlsKIpuApslEElppIsV.t.hpp...........h-ptchhhplpphKspsIppIKstaG.lhphpl.ssELcpKI.....suplpsLhppVssscshcEllphp.-phLhsh.KhhY.............lchstcchhhoKD-AKcllsphs.ls-LhpapVpsV-hVplslVlsRpphsGulhpsGDpVplYsKss..ushh.tllspuYV.L........upIusSESpSpSoossspSSoooppS.pop.Ssu.tsph.sopp.p.p..soQotspp.SSShSYol.NLspIL+AhAAGKlst.scl+spLpsYGh+L.-lEcsTpl.thsEsTphLlIlcVPs-hVPcllphcsulllshsss ...................................................................................................................hh...lh.hhhhh...h.ha.........p.......hp..+..thttlptha....h.sp..............h..h..pl..u.s.pph.tl..............................thtth..p...h.................................................................................................................................................................................................................................................................................................................................................................s......... 0 8 18 34 +2177 PF04414 tRNA_deacylase DUF516; D-aminoacyl-tRNA deacylase Waterfield DI, Finn RD, Mistry J, Wood V anon COG1650 Family Several aminoacyl-tRNA synthetases have the ability to transfer the D-isomer of their amino acid onto their cognate tRNA. D-aminoacyl-tRNA deacylases hydrolyse the ester bond between the polynucleotide and the D-amino acid, thereby preventing the accumulation of such mis-acylated and metabolically inactive tRNA molecules. 21.30 21.30 22.80 22.70 16.80 16.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.46 0.70 -5.21 34 183 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 168 4 114 186 85 216.70 29 69.66 CHANGED lptDhh-......hhtcs-hllFlSRHpussshPsLTVHssGNhs.....s.sthGGpPtpluhusPthhssllcslpchs.........s....a-lohEATHHGPothshPshFVElGSoEp-WpcpcAscslAcullcslsp.tp..................pshlGhGGsHYAP+hoclslcschslGHllsKa.hs...........p.ls.........ppllppslp+s..ssch.shlDhKulputp+pplhphhcc.hulcllpp ............................h......t......thtss-hllFhS+Hsup.s.shPsLTsHssGNhs.........p.s.hGGcstpluhusPphhpshLptlpchss........p....s.................a-lshEATH.....H.G.......Po..........t..........hs.sP..........shF..lEIGSoEcpWp..spcAucslAc....ulhcshsttt.ttt................psllGhGGsHYu.s+aoclsh....c....s.p..hs..lG..Hllscatls....................t.ls....................tclltpuhpps...........ssph..shlDhcuhputp+ptlhphlcchslplh.h..................................................... 0 27 64 89 +2180 PF04378 RsmJ DUF519; Ribosomal RNA small subunit methyltransferase D, RsmJ Kerrison ND anon COG2961 Domain RsmJ is the tenth to be found of the ten methyltransferases required for full methylation of 16S ribosomal RNA (rRNA). It specifically methylates m(2)G1516. A strain of E.coli lacking RsmJ due to deletion of the rsmJ(yhiQ) gene is missing the methyl group at G1516 and exhibits a cold-sensitive phenotype. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.62 0.70 -5.38 8 1570 2012-10-10 17:06:42 2003-04-07 12:59:11 8 2 1530 1 321 1377 328 241.30 46 87.23 CHANGED -KPFhhLDTHuGsGRYDLuu-cAc+TGEacpGIuRLhpp...sslPt.LpsYLcsI+phNhuss........l+aYPGSPhlA+pLLRsQDRhlhsELHPsDhslL+spFssDtpVpVcpsDGattl+ApLPPpE.....+RGLlLIDPPaEhpc.EYpclVpAlccuhpRassulhAIWYPIKsRpplccFh+uLcsosl+KlLpIELtV+PsospcsMsuSGMllINPPWsLEsphpslLPaLpppLu.s.sGphpl-Wl.s .....................................................................-KPFhYlDTHAGAGtYpLs....u....-cA....p+TGEahpGIu..Rl...W.pp...........sc.L.P......s...t...LpsYl.ss.l+.p.h.....N.p..usp.................LR.aYPGSP.hl.A.c.p.L.....L......R.c......Q..D.......+.......lpLs...ELHPo.......Dasl.L.......+...s...p...F...............p.......c...............D............p......+...s........p...Vp...+...t..DGapt...L...+...A...hLP...P.hp........RRGLlLIDPPYEhK.s...D.Y.pt.VVpultcuaKRF.uTGs.YAlWYPl.....l......R................p.....p...........l....c....+.....h........h.ccL.......c........s.......s.....G.I.......+.....+...lLplEL......sV.....p.......P.........c....o.........-...............p......p.........GM.s...............uSGMlVlNPPWpL.......-pph.pslLPaLpptL..s.s....sG..phplpWl................................................................................. 1 70 169 245 +2181 PF04461 DUF520 Protein of unknown function (DUF520) Waterfield DI, Finn RD anon COG1666 Family Family of uncharacterised proteins. 22.00 22.00 23.10 22.20 20.70 19.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.09 0.71 -4.37 108 2052 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 2015 2 466 1129 1083 158.50 49 98.52 CHANGED MPSFDIVS-lDhpElcNAV-pup+ElssRaDFKGoc.usl-lpcc..plpltu-s-apLcplhDlLpsKLsKRsls.h+sl-hs.pspptoGpps+Qplpl+pGI-p-hAKKIsKhIKDsKlK.VQAuIQGDplRVTGKKRDDLQtVhsLl+.pp..-ls.....hPLQFsNaR .......MPSFDIVSElDhpElcNAV-pAs+ElsoRFDF+Gsc.A.slELs-c.....slplhu-uDFplpQlhDILpuKLlKRsls.spuL-hs.c...p...htSGcshp.pscLKpGI-s-sAKKIsKhIKDoKlK.VQApIQGDplRVTGKpRDDLQuVhAhlR...ts.....DLs........PlQFpNFR........... 0 133 293 392 +2182 PF04412 DUF521 Protein of unknown function (DUF521) Waterfield DI, Finn RD anon COG1679 Family Family of hypothetical proteins. 27.80 27.80 27.80 35.20 26.90 27.70 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.03 0.70 -5.92 42 289 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 256 0 143 298 337 382.20 33 88.27 CHANGED hhLocp-cshLsG-tG.puhppAMcIllthuchhGAc+LlslspAHls....Gs..........Yp..GsuuL....cFlcchhp.huu+VpV........PTTlNshuh..Dh.p.hcphs...lstphscttt...clhcAahch.......Gs..csohTCsPYh..lts.hPph.....G-plAWuESsAVlaANSVLGARTN+cus..sLsuAlsG+sPhhGlHL-ENRpuslllcVcs..t.....csu.ashLGalsGchsss..plPllpGLps........sspDcLKALGAAhAooGusuhaHltGlTPE..A.shtssh.t...............-plslsts-ltpshppl....sssspplDlVslGsPHhShpEhcp....luphLcu+c...sshslhlssuRtlhppspc.GhlptlEphGspllsDT..ChllsPl.....hstttps.lhTNSGKhApYsPuhsGhslthGolpcClcu .........h.Locc-ct..hLsGchG.cuhphAMcIllthuchhGAp+Lls....lsp.AHls.........Gs...............Yp...GpuuL....cFhcc.l.hp...hGu+Vp....V.................PTTlNs.huh..Dh.p.hctht.....lspp....htc....ttp.......clhcAahch..........Gs..psoaTCsPYh..ht.s.hPph.....G-plAWuESsAVsaANSVlGARTN+pus.hsLsuAlsG+sPhhGhHl-ENRpuplllclps..t...........sss.ashLGahlGchsss..plPhlpGltt........ssp-pLKshGAAhAooGusshaHl.GlTPEA....shtssht........h...........cplplstp-lppshcpl.....tssspplDhlslGsPHhSlpEh....pp.....luphlc......u+p.........psslhlssuptlhttsc.p..GhhptlcptGspll..sDT..C..hls.sPl............thps..lhTNSuKhApYhPuhh.GhpshhushtpClc..................... 0 42 84 111 +2184 PF04463 DUF523 Protein of unknown function (DUF523) Waterfield DI, Finn RD anon COG1683 Family Family of uncharacterised bacterial proteins. 22.70 22.70 22.70 23.20 22.40 22.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.99 0.71 -4.42 164 1833 2009-09-12 22:11:40 2003-04-07 12:59:11 7 10 1453 0 463 1420 90 141.80 33 63.83 CHANGED lhlSuCLlGppVRYc..Guppt..sph.....lpphlpcth.hls..lCPEltsGLssPR.shclh..s............ssph.+lhs.ps.u.pDlTcphhphupptlpthp....slsshIlppcSPSCGhppl..Y........psshs...stthtG.pGl.huthlhcpt.hl.sE--h ..........lhlSuCLhGt..s...sRYD......Gspph.........ssh...........lpphl.p.p...t..h.......hls...lCPEl.t.s.GLssPR.sshcllt.....................t.sps.+l.hs..ps..u..pDVT......cphhphupptLp..phpt.......plss.....h.I.lp..s+SPSCGhppl...Y.............cu.phs......s.t.ptsGsGlhsthl.hcph.hl.sE--............................................................. 0 157 291 373 +2185 PF04411 DUF524 Protein of unknown function (DUF524) Waterfield DI, Finn RD anon COG1700 Domain This domain has been identified as a member of the PD-(D/E)XK nuclease superfamily through transitive meta profile searches [1]. The domain has two additional beta-strands inserted to the core fold after the first core alpha-helix.\ It has been speculated that it could function as s methylation-dependent restriction [1]. 22.10 22.10 22.40 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.17 0.71 -4.39 28 222 2012-10-11 20:44:43 2003-04-07 12:59:11 7 5 207 0 64 224 13 157.30 25 23.37 CHANGED pLYEhWCalplhplLpph.....Ghphtppslhpspspsh.thslppsps....hhh..p..sshplpLtYpspht.t...........t..shsp+PDhsLplshtstt......hhhlhDAKYRl-sspct............................p-slspMHpYRD...Al..............ppsshuAalLaPGtps ..................................................pLYEhWsalplhplLpph.....................thph.hpp..s.l.h.p...hp.p.psh....hsslp.cups......hhhh.....p...psstplpLhaptph....t...............................t..sssp+PDhhLplpppspph................hphlaDAKY.Rlphs.tpt...............s.....................pcslppMH....p..YRD....Alh......................tt..hp+sshuualLaPhp.c............................................... 0 28 46 57 +2186 PF04379 DUF525 Protein of unknown function (DUF525) Kerrison ND anon COG2967 Family Members of this family include the bacterial protein ApaG and the C termini of some F-box proteins (Pfam:PF00646). F-box proteins contain a carboxyl-terminal domain that interacts with protein substrates [1], so this family may be involved in protein-protein interaction. The function of ApaG proteins is unknown, but mutations in the Salmonella typhimurium ApaG homologue corD gives a phenotype of low-level cobalt resistance and decreased magnesium efflux by effects on the CorA magnesium transport system [2]. 19.30 19.30 19.60 23.30 18.80 18.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -9.91 0.72 -4.02 147 1678 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 1515 9 519 1070 541 89.30 48 52.06 CHANGED l........s-pSsPpps+..asFuYslsIp........Npupp.s.....sQLl........oR+WhIT.DusG.chpEVcGpGVVGcQPhlpPG-.sapYoSGssLsTPsGsMpGpYpMhs...cs ..........................................................l..tQSsP-.p.p+..YsFAYslTIc..............NhGpt...s...........VQLl........uRaWlIT.....sus.........G....+.p..EV......p...G.cGV.VGpQPlls.........PG-..p....apYTSGss....lp........T.......P.......h.G.s.MpGpYpMlsc.s.................... 0 150 289 406 +2187 PF04380 BMFP DUF526; Membrane fusogenic activity Kerrison ND anon COG2960 Family BMFP consists of two structural domains, a coiled-coil C-terminal domain via which the protein self-associates as a trimer, and an N-terminal domain disordered at neutral pH but adopting an amphipathic alpha-helical structure in the presence of phospholipid vesicles, high ionic strength, acidic pH or SDS. BMFP interacts with phospholipid vesicles though the predicted amphipathic alpha-helix induced in the N-terminal half of the protein and promotes aggregation and fusion of vesicles in vitro. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.77 0.72 -3.84 131 1484 2009-10-26 17:57:44 2003-04-07 12:59:11 8 3 1473 0 318 838 803 78.10 44 82.37 CHANGED M.ss.chl-.......-lucphspshsss...puhtc-...lEpph+shlppths+lDLVoREEFDsQpplLt+sRp..........+lcsLEsRlutLEsp ....................Mhss.+hl-.......plA+plp-uhstu...hpphucD...lEcplR.p.hLQutLs+..LDLVoREEFDVQspVLlRTRp.......KLst.LEpRlucLEst.............. 0 79 171 241 +2188 PF04384 Fe-S_assembly DUF528; Iron-sulphur cluster assembly Kerrison ND, Eberhardt R anon COG2975 Family This family of proteins is likely to be involved in the assembly of iron-sulphur clusters. It may function as an adaptor protein. In Escherichia coli Swiss:P0C0L9 forms part of the isc operon, which encodes genes involved in iron-sulphur cluster assembly. Its structure is entirely alpha helical, and it contains a modified wing-helix structure, usually found in DNA-binding proteins. It binds to Fe2+ and Fe3+ ions and to the cysteine desulfurase IscS, the same surface of the protein is involved in both binding to iron and to IscS [1,2]. 25.00 25.00 26.00 25.40 20.60 18.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.90 0.72 -3.61 61 1135 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1120 2 184 434 221 63.60 63 95.87 CHANGED hKWsDsp-IAhtLh-paP-.lDPts...lRFTDL+pWVhsL...ssF-DDPp+ssEKILEAIQhsWl-Eh- ................lKWTDoR-IuEsLYDtaPD..lDPKT...VRFTDLHpWIs-L...-DFDDDPptS.NEKILEAIl.hsWlDEuE.................. 0 40 86 134 +2189 PF04385 FAINT DUF529; Domain of unknown function, DUF529 Mifsud W, Yeats C anon Yeats C Family This family represents a repeated region found in several Theileria parva proteins. The repeat is normally about 70 residues long and contains a conserved aromatic residue in the middle. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.31 0.72 -4.13 64 1072 2009-09-11 15:51:37 2003-04-07 12:59:11 10 26 6 0 961 1070 0 76.20 15 28.74 CHANGED hlcl.....tshppsphhhhp............hhhhhhthp.pshphpplhh...........psphlWptpsspphhhhhhhhhptt......hhlhlhhsptt ....................................................pp...ht.............ttthhpaph+.suhphpKlhh.......................ssp.hlWcp.sc...s.sp.hs.hh.h.h.ht..........hhhh.......h............................. 1 2 558 558 +2190 PF04409 DUF530 Protein of unknown function (DUF530) Waterfield DI, Finn RD anon COG1771 Family Family of hypothetical archaeal proteins. 20.10 20.10 20.30 104.10 19.00 19.80 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.58 0.70 -6.40 7 60 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 39 0 47 64 5 306.90 25 99.99 CHANGED .ooptLIspsN+hLDsIs.c..hschhcDhDshh............lhhTLcsNLc.Lp-LcEchEhR.G.tuPh.sltch+utts................HhpalhRKtp.+phhLERl+SAIsuHKIALuhLpt.h.hph.pst.p.p..p.p......................chch..p..slGRlEIlPaLshuG-hhclhuphs..shcuaKcIhshh+pEGhsthcShplplchhEp..G+..hK+h+l-.s.DsDhEt.LRK+aGpphRhcllphh+o+ssLINs+YshcsLALAYhshs......................................................................ccGtcL..........................luhDlF+YYhhTS.c-Rpp.ss.FPul+sslDspa....ol.sls.+chhDhh.s.hp...........hl.hKh-lEcpLsKh+hcls.h...hlGGlhLYhhusas.pcss-lhtl-lEElhc..hhhhlpslhpllhsc..-hpKhEKa.hP+o-KAKpFLpLLpG ............................................................ppp.Lltp.pchLptlt...........t.hp.p.................................l...Lppplp.Lpphcpphph..t...P...h.phttt..................a.phhhhhhp.+h..L-Rl+.AlsuH+hAlu.ltt..................................................t.hphpl...LshsGshh.hh.p.s..hhpta+.hhthhp.pt..................hs.hpthph.l.hhpt..s+..h++.pl-.....shEt.l+ppaGpth.hp.hph.hphs.llsspYshp.LuluY..h............................................................................................................h..DlhhaYhhpo.pcRtt..s.aP.lh.s.p.t.......h.th...t..th.....................Kh.hEp.h.thp..hp......hGuh.l......hp..p..th.th..ppl.c....................htphpph.h.ppt+s+pFLthl..t...... 1 6 11 31 +2191 PF04407 DUF531 Protein of unknown function (DUF531) Waterfield DI, Finn RD anon COG1772 Family Family of hypothetical archaeal proteins. 25.00 25.00 80.50 79.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.87 0.71 -4.38 9 38 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 38 0 33 41 9 170.90 53 89.30 CHANGED MLTlGLYNTYDpp+lHEAHhRAIARAAPlsYAFsF+LALhsFPac...h.sscElsEhVscpTTIG-uG+YLhtLt-pN+halhDhPp+GFPsQFGplVuTTSKP-ccKslsPh-lAchhL+s+ShhllIGLGR+GLPKElachucYHLDIT.s+tlSLETCTAIGuIsAhIhslhcsh ..MlTLuLYNTYD+pKlHEAHhRAIARAAPICYAFsFpLALhsFPhc.........t.p..-ls-hls.pcTTIG-uGcYLtcLh-pN+FallDhP.hGFPsQF.GpsVATTSKP-ccKtIoPh-lAchhL+t+ShhlLlGLGRHGLPK-lhKhu+YHLDIT.GKslSLETCTAIGuIPApItTlhc.h.... 0 6 12 24 +2192 PF04391 DUF533 Protein of unknown function (DUF533) Kerrison ND anon COG2979 Family Some family members may be secreted or integral membrane proteins. 29.60 29.60 29.80 30.40 29.20 28.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.09 0.71 -4.92 61 974 2012-10-03 21:00:09 2003-04-07 12:59:11 7 5 942 0 139 548 37 188.60 44 85.52 CHANGED uuhlss........husG............uLs.....GGlhGhLlGs+pup....phuusuhthGGh.AulGsLAYpAYpsa..psp...ps...........sssssspshstsss...................tt-pputhlL+AMIuAAKADG+IDppE+ptI.tplschshDsctppalpppLppPlDsstlApts.ps.phAsElYhAShlsl-.-shtE+uYLspLAptLsLssslspplEppls ...................................................................................................ts.sth......LssG.......AL..GGLAGlLlusKouR..........KhGssA..LhlG.Gu.As...h.Gslhap........tYpch.......p...s.................................stQspPQhsspsss.....................................................h-p+oppLlhAhlh............AAKSDGHIDscERA...tI-...ppLcEu..Gl...-...cpucshlEptlc....pPLDPp.cL.Apul.psEEpAh.......ElYhhS.phAID.....D.........pFM...ERuY....LstLucAL+lspslp-tIEpsl.p...................... 0 31 61 102 +2193 PF04392 ABC_sub_bind DUF534; ABC transporter substrate binding protein Kerrison ND, Mistry J anon COG2984 Family This family contains many hypothetical proteins and some ABC transporter substrate binding proteins. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.49 0.70 -5.46 9 2266 2012-10-02 13:57:41 2003-04-07 12:59:11 7 34 1502 3 510 1839 143 277.50 32 82.31 CHANGED cIulhthlpHsuLstthpGhp-uLp-hGhpp.p..hhphhNApss.spstphu+pLhscpsclllsIuTPsAQtlhuthcs..lPlVhuAVosPlutpLs.shcpsttNlTGVs..Dt.slppplphh+tlhPss+slslhYsPuEssshshhccl+thAcptGlpVlphslssos-lspshpshsp+ssulahshsshlssuhsu..llpsupptKIPlhsussuhVccGuh.AAhulshpphGhpsuphlhclLcGpcst-lssphssp....phhlNcpshppLGIplscshhspusphc .......................................................................................................................................................................................................plulhphlpHsu..LDth.hcGh.....-sL.tct...Gapp...c..plc.l.......c....h....p..N.A..p.....G.....-....p......u......p....htp.hu.....p.p..h..s.s.p....p..s.D..ll.l.u.I...A..TP..u...A.Q......u..l.........s.........s...s........o.......p........s.............l....P.l..lhsA.V.T.D.P.....lu....A......tL...V.p...s...h.....c.p.P....G...s....N.lTG..sS.......D..t..s..P...lp.pp.lcL...l.+c.lh..P.s.s...K.....p...lGl..l...Y..s...s.u..E......s..N..S...h..s..pl..c.p.h.+..p...h...s...p..c..t..G..l..p...l..h.......p....h........s........l.......s..........s..........o.........s.......-.......l....t........p........s.......s.........p........s............h....s...s...c........l.....D.....s...l....a.....l.....P......s...D.N....s.........l...s...S.....u.....hps.........l..sp...s..u.......p...p.....s..Kl.......Pl.h.s......u..s.s.s.h.....V...c...p.G...u...l...A.u..h..u.l..s..h.h.p.l.G.hp...T.Gc.h.ss.c.l.....L.c.G.c.c.P.u.s.h.sl.phh.pp.....chhlNpch.AppLG..lslspthhtt.....h.................................................................... 0 178 346 426 +2194 PF04393 DUF535 Protein of unknown function (DUF535) Kerrison ND anon COG2990 Family Family member Shigella flexneri VirK (Swiss:Q99QA5) is a virulence protein required for the expression, or correct membrane localisation of IcsA (VirG) on the bacterial cell surface [1], [2]. This family also includes Pasteurella haemolytica lapB (Swiss:P32181), which is thought to be membrane-associated. 19.10 19.10 19.10 21.40 18.80 18.70 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.84 0.70 -5.50 34 1261 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 911 0 96 664 6 271.70 36 90.85 CHANGED hhstps.....hhpphpp+hKahhRsllhhtphpphhphlsp...shhpslhpppPphst+lh+PYlspshstppRlpsltpHaphhppthstphhp.lhppt.slpLsplp...tppaslhLshssths+EG-hsL.Lpspp.tphlhploFolh.........sppsLhIGulQGsss....-psp-hl+phTKshHGLRPKpLllEshphLApthslcplluVupcsHlapph+a......ppclhsDYDphWpEhGGp..spsaap.LPlphtRKsl--IsSKKRuhYR+RYthLDplppslpptL ..............................................................t........thhpctpaRh+FhlRsll.hsthsh.phhptLsp...PphpsLls.p.p.Pplsh+lpRPYls.........tshs.tpp+lculh.Hathlpp............h............hs...........t..c..h............p.....hls..p...slpLuc.........lp.sK.......ss-pa..sl..pLs.h.hshc.+EG-holhlp..ssc....tsLuploFol..s...h.p......sppshhIGGLQGsph......chspphIppATKsCHGLhPKcLlhEAhphhApthpl-pll.AV.opcsHlapphRY.h.pK..cpph.hAsYssFWpslGG.....p...p......t.p...aac..LPhp.lsRKslt-IsSKKRupYR+RYthLDslptphst...................... 0 11 32 65 +2195 PF04394 DUF536 Protein of unknown function, DUF536 Mifsud W anon Pfam-B_2107 (release 7.3) Family This family aligns the C-terminal region from several bacterial proteins of unknown function that may be involved in a theta-type replication mechanism. 20.70 20.70 21.60 21.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.53 0.72 -4.22 29 746 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 588 0 44 358 4 47.70 44 25.93 CHANGED hccQlppcc...........pplpch.......ppLLDQQQpLsLpspchlEc........hKs ..............................hccQlchKD...........pQIsEt..............sK.LDQQQpLsLpuhKchEpLc.pL-p.t+....................... 0 6 14 32 +2196 PF04398 DUF538 Protein of unknown function, DUF538 Mifsud W anon Pfam-B_2637 (release 7.3) Family This family consists of several plant proteins of unknown function. 25.00 25.00 25.00 25.10 24.70 24.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.22 0.72 -3.80 58 600 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 46 1 341 552 1 111.40 29 68.11 CHANGED osh-lLcpashPhGLLPpsl.p.sYshspsoGphp.lhhs......s.........s..Cphphps....hp..lpY.........ssplo...........................GhlspG................+lsslpGlpsK.......lhlWlslsclth....susplhFps..Gshs....cshshssFpps .........................h-lLpchGLPtGLL.Phss..p.pashscsoGhhh.lhhp.....p..........s.Cphpacs....ht..lpY.........ssplo...........................GhlppG................+lpplpGlcsKt......lhlW.lslspltl......susplhFps..usls....csashssFp......................................... 0 30 189 274 +2197 PF04400 DUF539 Protein of unknown function (DUF539) Kerrison ND anon COG2991 Family Putative periplasmic protein. 20.80 20.80 21.00 20.80 19.30 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.62 0.72 -4.47 37 588 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 495 0 111 320 178 45.40 45 59.21 CHANGED hIhp+KsIpGSCGGluulGl-KtCD.C...cpsCDpppc+.hpttt..tp ...aIhp+KslpGSCGGLu.u.l....G.l.-K.sCs..C.............scPC.Dscp++hspttt...p......................... 0 25 53 87 +2198 PF01877 RNA_binding DUF54; RNA binding Enright A, Ouzounis C, Bateman A, Eberhardt R anon Enright A Family PH1010 Swiss:O58738 is composed of five alpha-helices (1-5) and eight beta-strands (1-8) with the following topology: beta-1, alpha-1, beta-2, beta-3, alpha-2, alpha-3, beta-4, beta-5, alpha-4, beta-6, alpha-5, beta-7, beta-8. The first six beta-strands (1-6) form a slightly twisted antiparallel beta-sheet and face five alpha-helices on one side. The last two beta-strands form an antiparallel beta-sheet in the C-terminus. PH1010 forms a characteristic homodimer structure in the crystal.\ Dimerisation of the molecule is crucial for function. The structure resembles that of some ribosomal proteins such as the 50S ribosomal protein L5 [1]. Although the structure resembles that of the RRM-type RNA-binding domain of the ribosomal L5 protein, the residues involved in RNA-binding in the L5 protein are not conserved in this family [2]. Despite this, these proteins bind to double-stranded RNA in a non-sequence specific manner [3]. 22.90 22.90 23.60 27.50 22.20 21.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.43 0.71 -4.10 52 277 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 140 17 189 274 118 115.90 25 77.86 CHANGED pplplpshlasTEDp-KVhcAlpslh............chphphpptcGaaup.....p.hlptt.sspcsLcplpchlcppcl..cth+s.tlpcplp..ssphhh+lsKQsAa.hGplphsp.ss.....ssIplplchc ..........plplpshlasTED.cKVhcAlpslh............shchp.h.ppppGaas.......h.lptt..pspcslcpltchlcppcl..cthps..lpcplp..ssplhh+lcKQsAa.hGplshsc.ss......ssIplplch............................ 0 47 104 149 +2200 PF04402 SIMPL DUF541; Protein of unknown function (DUF541) Kerrison ND anon COG2968 Family Members of this family have so far been found in bacteria and mouse SwissProt or TrEMBL entries. However possible family members have also been identified in translated rat (Genbank:AW144450) and human (Genbank:AI478629) ESTs. A mouse family member has been named SIMPL (signalling molecule that associates with mouse pelle-like kinase). SIMPL appears to facilitate and/or regulate complex formation between IRAK/mPLK (IL-1 receptor-associated kinase) and IKK (inhibitor of kappa-B kinase) containing complexes, and thus regulate NF-kappa-B activity [1]. Separate experiments demonstrate that a mouse family member (named LaXp180) binds the Listeria monocytogenes surface protein ActA, which is a virulence factor that induces actin polymerisation. It may also bind stathmin, a protein involved in signal transduction and in the regulation of microtubule dynamics [2]. In bacteria its function is unknown, but it is thought to be located in the periplasm or outer membrane. 24.90 24.90 25.30 25.30 24.30 23.70 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.12 0.70 -11.52 0.70 -4.47 156 2985 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 2441 0 788 2172 443 200.80 20 84.19 CHANGED lsVsGpuphpstPDhstlslslps..pspssspA.......hpp.s..spphppllstl......p...phu.......lt...p...ps.lp.....s......sshsl.p.............Ppa......p...........................................thtG.Y..........psspp.lp..lphp.cl....sphup......llsthhs...tsh................s..pl....sslpaplsctpphp..pcshppAlpsA+p+ApthApsh....G.hplsplhpl...............p.sstss.................hhthth.ttss..........sshpsuphplsssV.....sssapl ....................................................................................lslsGpuphc...sssDhsplslplps...........pupsss.su.............tpp.s.....spphsphlshL....c......................ptG....................................ls......c...c-........lp.........s...ushsh..p................................PpY..........pa.p.....................................tpptlpG..Y.......................................pusps..lp..lphc...sl.......splsp..............ll...st...hhp........tGh..............................................s...pl......psl..p..a..s..l..sp.....sthc..scshppAlpsApppApplApuh.....s..tpLG.t.l.h.pl.........phustss...............................ht.h..th.ht......................sh.p.sphphpsplsssat............................................................. 1 250 495 659 +2201 PF04418 DUF543 Domain of unknown function (DUF543) Bateman A, Wood V anon Wood V Domain This family of short eukaryotic proteins has no known function. Most of the members of this family are only 80 amino acid residues long. However the Arabidopsis homologue is over 300 residues long. The presumed domain contains a conserved amino terminal cysteine and a conserved motif GXGXGXG in the carboxy terminal half that may be functionally important. 21.30 21.30 23.20 22.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.67 0.72 -4.02 35 301 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 235 0 198 280 0 72.70 36 72.16 CHANGED Moppsp.......................................plscKWDpslussllKsuhGhusGllsSlLhF+R...RsaPlhhGsGhGlGhuYs-scthFp .......................................................................scs...........................lscKWD+ClushllKsuhGhuhGllhSllhF+R......................RsaPl....hlGsGhGlGhAYspCptpFp........ 0 66 107 168 +2202 PF04424 DUF544 Protein of unknown function (DUF544) Kerrison ND anon DOMO:DM04041; Family Eukaryotic protein of unknown function. 20.80 20.80 23.00 21.90 20.40 19.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.61 0.71 -4.30 8 377 2012-10-02 14:22:40 2003-04-07 12:59:11 8 9 242 0 265 379 5 124.40 30 22.46 CHANGED sNlLlLpssls........LssppphlTt--LhphLsDhhlp.ts...........shppNhs-shslLs+LtsGLsVNl+Fs...............ulpsFEaTPEhuIFcLlslsLYHGWllDPp.s-.hsculushSYspLhctlsss .............................................................................hNhLhLp.php........................L.s....p...tp..lohppLhphlu.....-hl..lphpst...................................sh.p..tsls-hhslLspLpTGL.sVNs+Fs............................GhtsF......E...h....T..s........E...htl.FcLhsIPLhHGWllDPps..sp....hh....pt.l..s..p..h..SYsph.phl................................. 0 79 132 200 +2203 PF04440 Dysbindin DUF546; Dysbindin (Dystrobrevin binding protein 1) Finn RD, Moxon SJ anon Pfam-B_3919 (release 7.5) Family Dysbindin is an evolutionary conserved 40-kDa coiled-coil-containing protein that binds to alpha- and beta-dystrobrevin in muscle and brain. Dystrophin and alpha-dystrobrevin are co-immunoprecipitated with dysbindin, indicating that dysbindin is DPC-associated in muscle. Dysbindin co-localises with alpha-dystrobrevin at the sarcolemma and is up-regulated in dystrophin-deficient muscle. In the brain, dysbindin is found primarily in axon bundles and especially in certain axon terminals, notably mossy fibre synaptic terminals in the cerebellum and hippocampus. Dysbindin may have implications for the molecular pathology of Duchenne muscular dystrophy and may provide an alternative route for anchoring dystrobrevin and the DPC to the muscle membrane [1]. Genetic variation in the human dysbindin gene is also thought to be associated with Schizophrenia [2]. 25.00 25.00 30.50 25.70 20.90 17.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.03 0.71 -4.45 7 190 2009-09-10 17:06:14 2003-04-07 12:59:11 11 2 41 0 87 168 0 142.30 37 66.55 CHANGED tths....csscsPpstl..ssplplptcpth..ts.ptEtEhsls...suhLplsE.+RpPluSlSShEVphDLLD.sELhDMSDQEhA-VFhsS--E.stspSPs................s.p...h.+husLcSsShsp.opsppspEcps.sDs-......ss..t.....l- ................................s...ctpcs.chth....sQQh+L+ERQKaFE-shQt-hE.hls...suaLQls-.RR..p.............PlGShSS..MEVNVDhLEQh-LhDhSDQEuhDVFLsSs..sEps.ss.SPs........................s.t.....hs.p....s..s.pSssh.s...psops..p.tttp.p..........................t............................................................... 0 3 10 37 +2204 PF04445 SAM_MT DUF548; Putative SAM-dependent methyltransferase Kerrison ND anon DOMO:DM04150; Family This is a family of putative SAM-dependent methyltransferases. 26.00 26.00 26.10 26.00 25.80 25.50 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.55 0.70 -5.21 7 1454 2012-10-10 17:06:42 2003-04-07 12:59:11 8 3 1428 5 251 988 353 223.20 46 88.57 CHANGED tclhtltstasLtpsp-sshALlhscp.....pLELRplDEsp.GulhVDFVuGAhAHRRKFGGG+GEslAKAVGlpsuhhPsVlDATAGLGRDAFVLASlGCcVphlERHPlVAsLLpDGLpRAhtDs-IGthlpp+lpLlhssuhstLp......shspPDVVYLDPMYPH...+pKSALVKKEMRlFppLVGsDhDAcsLLcsAhpLApKRVVVKRPcYA.hLusppPsauhpTKspRFDIYhst ......................................................................tt...........l...t..st.p.s...h.sLl.hs.p.....pLpLpp..h...s-s.c..G.slhVDFlsG.uhuH.R.R.K..F..G.G.G+.G..-.slAKA.lGlct.s...h..h....P..sVlDATAGLGRDAFV..L..A.S...l....G..........C....+..Vph..l..ERs.PVVAAL.L-DGLp...........R.........u....h.....t....D.......s......E.I.......s.......s....h.....l.....t.....cRl.p.Llau.sSlsh.Ls..................sh.s.sp.P-VVYL.DPM..F..PH.......+...p.K.S.AL...VK...KEMR...lFpsL.......V............G................s........D..h..D...A...D...s..LLps.A.h....tl.A..p..K......RVVVK.RP.chAs.L...u....s.h.......ts.s.sl..s..K.spR..FDlYh.p....................................................... 0 81 135 196 +2205 PF04446 Thg1 DUF549; tRNAHis guanylyltransferase Anantharaman V anon Manual Domain The Thg1 protein from Saccharomyces cerevisiae is responsible for adding a GMP residue to the 5' end of tRNA His [1]. The catalytic domain Thg1 contains a RRM (ferredoxin) fold palm domain, just like the viral RNA-dependent RNA polymerases, reverse transcriptases, family A and B DNA polymerases, adenylyl cyclases, diguanylate cyclases (GGDEF domain) and the predicted polymerase of the CRISPR system [2]. Thg1 possesses an active site with three acidic residues that chelate Mg++ cations [2]. Thg1 catalyzes polymerization similar to the 5'-3' polymerases [2][3]. 26.60 26.60 32.90 26.60 24.40 23.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -11.01 0.71 -4.54 100 440 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 356 8 291 450 19 126.10 39 45.52 CHANGED +hchl.+paEppsp...llPssalVlRlDG+uF++Focp.apFpKP...pDt+slplMspsApsl.hpphs.clsluYupSDEhShlhcc..sssh..............asRRtpKlsShlsShhoupashhatphh.p........h................................s..PsFDuRslhhP ......................................h.hcYV.+p.FEtts.p...llspsalVlRlDG+sF+.+.....Focp.apFpKP...N.Dp+uLpLM.spsApsl.hpc...........ht....-....lslAYGpSDEa...SFla+c...pssh.............................................apRR...s..sKlhohlsShFousYshhWtpaF.p............p..L.h................................................s..PsFDuRsVhYP................................................ 0 100 174 238 +2206 PF01878 EVE DUF55; EVE domain Enright A, Ouzounis C, Bateman A anon Enright A Domain This domain was formerly known as DUF55. Crystal structures have shown that this domain is part of the PUA superfamily. This domain has been named EVE and is thought to be RNA-binding [3]. 21.10 20.60 21.10 20.60 20.80 20.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.06 0.71 -4.17 140 1582 2012-10-02 17:37:24 2003-04-07 12:59:11 13 19 1431 13 568 1316 1573 137.80 30 64.70 CHANGED sY...WLhK......oEPp..........auhpph...ppptp..shWcGVRNapARN..hh+.pMchGDhshFYHSs.....tpt...ulsGlscV.s..cpsasDsoth.......s.t..........ps+WhhV-lchh.pphspslsLppl.....+........tp..t............Lpch.sll+pu..R............LSlhPVspccaphIh.ph ..............................................taWLhKoEPtt..............auhpp.....ttptt..t.WsGlR..N...ap..ARN.......hh+.pMchGDtshFYHSs........................ct.......ul.lGlsc..V.s..ppsasDsst..........stt...............tspahh..V-Vpah..cph.s..p.slsL.spl+.....................tp..sp.............................L.psh..llpps..R...........LSltPVop.spaphIhp..................................................... 0 170 336 459 +2207 PF04447 DUF550 Protein of unknown function (DUF550) Kerrison ND anon DOMO:DM04177; Family This family is found in a range of Proteobacteria and a few P-22 dsDNA virus particles. The function is currently not known. 21.40 21.40 21.40 21.40 20.90 21.00 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.38 0.72 -4.40 10 150 2012-10-01 21:36:44 2003-04-07 12:59:11 7 3 138 0 8 97 13 98.30 67 44.14 CHANGED phhc+HA-WS-+TFG.slpssGsLcHLuKEhlEltAsPsDlsEWADlhhLhaDutpRuGlosppIscAlhsKhslNcuRpWP-.+su-PchtI+cpsssss .....................................IR+EHAcWSDuTFG...sVGPIGPLKHLSKEALEAAAE..P...DDL....S....E..WADMQFLLWDAQRRAGIS.DupITsAMc-KLcINhcRQWPE.PKDGEPRLHIKE.sss.PV......................... 1 3 5 7 +2208 PF04448 DUF551 Protein of unknown function (DUF551) Kerrison ND anon DOMO:DM04177; Family This family represents the carboxy terminus of a protein of unknown function, found in dsDNA viruses with no RNA stage, including bacteriophages lambda and P22, and also in some Escherichia coli prophages. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.78 0.72 -3.22 26 815 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 445 0 22 522 9 66.40 34 34.52 CHANGED GWISCoERhP-..psp....VLlh..........t...hssthtsthhshhhssspa...............................thppsTHWMPLPcPPp ....................................................................................................................................................GWISCSERMP-.........psp.......tVLlh.................tt...hshhhss....t.h......h.s.ss.t..ss.s.ph...............................sth.ppsTHWM...PLPEPPQ............................................ 0 2 7 11 +2209 PF04472 DUF552 Protein of unknown function (DUF552) Waterfield DI, Finn RD anon COG1799 Family Family of uncharacterised proteins. 21.10 21.10 23.20 22.60 20.60 18.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.02 0.72 -4.06 120 2218 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 2038 2 449 1368 357 73.10 35 43.08 CHANGED lhlhcPpsa.s-spp.ls...cplcst.psVllNlpphcpcp...ApR.llDFluGssaulsGclp+l...upp.lalloPssVcls .................lslhcP+sY..s...-App.Iu...-p.L+ss.csV.llNl.p.phccsp.........A+R.llDFluGssaulsGslp+V......usp..lFLhoPsNVcl............ 0 149 312 401 +2210 PF04473 DUF553 Transglutaminase-like domain Waterfield DI, Finn RD, Bateman A anon COG1800 Domain This family of uncharacterised archaeal proteins are related to Transglutaminase-like domains. This family has previously been called DUF553 and UPF0252. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.11 0.71 -4.95 14 69 2012-10-10 12:56:15 2003-04-07 12:59:11 7 5 50 0 45 689 23 149.60 21 35.45 CHANGED pclpsluppL....pGsspt-ohWNlLcW.ccNlcYDasKuphh....................................................................................h..Ip.............................................oPpEhlth+cGlCsDYAlLTuulLLs.NhsshYllphphpps..h.....HAAsAlplsGshalLDQ+.P.hchssYl........phhc.cshhIpslshY .............................................................................................................................................................................h...t..t..s...pthhplh.pW.p..pslp..Y...shp+sp............................................................................................................................................................................................................................................hp...........................................................sP.h-sl.phppGlCpDYAlLTuul..Llp.................sh..s.......s..hY..l..........l...t...h...p.....h...p....s.p....t..........Hsssul..p.l.s..u....phal.lD..pp..s.h.....htpah....................................................................... 0 12 22 33 +2211 PF04474 DUF554 Protein of unknown function (DUF554) Waterfield DI, Finn RD anon COG1811 Family Family of uncharacterised prokaryotic proteins. Multiple predicted transmembrane regions suggest that the region is membrane associated. 21.00 21.00 22.20 22.10 20.30 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.24 0.70 -5.20 64 1327 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1217 0 230 848 239 223.00 39 97.03 CHANGED hhGollNshullhGullGhhltpplP-+h+pslhpulGLsslhlGlphshps............cshlllllSLllGullGEhlplEctlsplGphlc...............................................................pst.psphscGFVouoLLFClGuMuIlGulpsGLsGDhslLhsKolLDGhoullhAuohGlGVhhSAlPlhlhQGulsLhAshlpshlsssh..........ls-hoAsGGlLIluIGlslL....tl..+cl+VuNhLPALllshhlshlh ..............h.lGshlNusAlllGullGsLlsp+lP........ERh+sohhphhGLsslslGI.hslcs...................tNh.shlluhllGulIGEhhtLEctlsphssthp........p.......+..............................................................................tp.p.s..tcpsFhpuaVshhlLFCsuuhGIhGAhppGhoGDsoILhsKShLDhFoAhIFAsoLGluVsh.ulP.lllhQhsl.s.hhAshI.h.s.lhos.sh..........hs-hoAlGGlLllAsGLp.lh......uI.......K.hh.VsNhLPALllshslshh................... 0 97 162 198 +2212 PF04475 DUF555 Protein of unknown function (DUF555) Waterfield DI anon COG1885 Family Family of uncharacterised, hypothetical archaeal proteins. 25.00 25.00 41.50 41.20 22.30 21.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.27 0.72 -4.01 14 106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 80 0 72 99 1 102.40 53 80.82 CHANGED ssYhVsLEAAWlV+DVcolDDAIulAISEAGK+LN..sL-aV-l-lGhphCPtCGE.h-ushlVAsTALVGLlLpMKVFNAEStEHAtRIAKuhIG+AL+cVP ....sYhVslEAAalV+DV-osDDAIslAlScsGKtLN..cL-YVEl-lGhotCPtCGE.h-sAFlsAcTALVGLhL-M+VFNAESpEHApRIAKupIGpAL+clP. 0 10 44 62 +2213 PF04476 DUF556 Protein of unknown function (DUF556) Waterfield DI, Finn RD anon COG1891 Family Family of uncharacterised, hypothetical prokaryotic proteins. 21.20 21.20 21.20 22.00 21.00 21.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.55 0.70 -5.14 4 172 2012-10-03 05:58:16 2003-04-07 12:59:11 8 5 145 0 104 180 67 193.90 35 90.49 CHANGED MlLLlSPhsVEEAlEAIcGG..ADIlDVKNPsEGSLGANFPWVIRcIREhsPcD.h.VSATlGDVPYKPGTlSLAAlGAsVSGADYIKVGLYGs+Nh-pAlElM+sVVRAVKDhcsstlVVAAGYADAaRlGuVpPLtlPclAAcuGADlAMLDTAIKDGpoLFDahshEhltpFVchA+spGLhsALAGSlsppHltsL+ElGsDIVGVRGAsCcGGDRNsGRIcR-LVtcL+Ehhc ................................................LLhSstshpEAh.shtuu..sDIlDlKpPtcGuLG....A.............s.sl.+.tl....ht...h...s..st......p.....h....lSAslGD.h.s.hc..P..ss.hs..Ashs........ss.s..s.G.....scYlKlGlh..shts.tts..hphh..tth..t.h..........hh.l........lsshauD.h..........s.h.l.........thhtpuG.h...sssMlDTuhKc.G.ps....Lh-ahs.ptLtpFlp.s+phGLhsuLAGSlpht-lshLt..t..lssDhlGhRGAlCtstcRst.tlp.phVtthht.......................... 0 29 70 91 +2215 PF04452 Methyltrans_RNA DUF558; RNA methyltransferase Kerrison ND, Basturea G, Mistry J anon DOMO:DM04256; Family RNA methyltransferases modify nucleotides during ribosomal RNA maturation in a site-specific manner. The Escherichia coli member is specific for U1498 methylation [1][2]. 21.00 21.00 21.10 23.80 20.90 20.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.30 0.70 -5.23 96 4583 2012-10-01 22:53:19 2003-04-07 12:59:11 9 6 4226 22 1026 3326 2801 217.90 29 90.03 CHANGED ltlss.cps+HlhpVhRlp.G-plhlhs.sputtahupltphsccph..hplh........cthphssc.s.......lsls.ulsKs.c+h-hhlpKusELGssplhPhhscRo..p....pphtp+hpRhpplshpAsEQutRshlPplp.sh.shpphl.pphspt.h........llhapctt.t.tth.thttth................plhlllGPEGGaospElphhpp.tGhpslsLGsR.ILRsETAulsAluhlph ....................................................h..lss.-pspHlhpVhRhpsG.-plhLh........s.....s............s......s.......t........ha.h..up...l..t..p.h..s....c..c....p...l.....lc.lh...........................................cth..p.ts.tc...Php..................lplspuls+u.-+hEhhlQKusEL.Glsplhsl.ucRsh....s....+h......pt....c......+............htK+hp+.ap+I.shtAsEQ.st....Rsh.lPpl..p......hh...shp...p...hh....tp....hs.pt.t......................llha.t.c.s.t.p..p...t.p.hst..hp.....................................................plhlllGPEGGh.o.s.pEl.phh.........p.......p....t.......s.....h.pslsL.G.PR.lL.RoETAslsAluslp......................................... 0 362 687 881 +2216 PF04480 DUF559 Protein of unknown function (DUF559) Kerrison ND anon DOMO:DM04310; Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.45 0.72 -4.40 8 1749 2012-10-11 20:44:43 2003-04-07 12:59:11 7 59 1173 3 463 1520 202 98.00 26 32.61 CHANGED pchptpARcLRpstThsE+sLWphLRs+RLsGaKFRRQtPlGsYIVDFsCtpsKLIVELDGuQHstp..csYDApRTpaLputGasVLRFhN-EVhps.-sVL-pILptLp ......................................................................................t...................p..hh....h.....tt.....................h.php..p..ph..l........s.....t........a.....h...l.......D...h..h.....h............p....t....+......l....l..l....ElD.....G..s.....p....H...t...ps.............ht.....h...D.....t.....pR.......ss.hL....p..s..t..G..a.p.Vl.Rh.hs.pcl..h..pp..c.tVlptlht...h....................... 0 130 318 395 +2217 PF04575 DUF560 Protein of unknown function (DUF560) Waterfield DI, Finn RD anon Pfam-B_4010 (release 7.5) Family Family of hypothetical bacterial proteins. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.80 0.70 -5.18 22 575 2012-10-03 17:14:36 2003-04-07 12:59:11 8 16 290 0 79 541 30 264.50 25 62.17 CHANGED WpassuhsYhp-cNlNsuscptph..........ttsh..stPpsApGlsaphuhpKcasltssaahphphsstG+hYWcN+c..Ys-hosRhthGhuYpsu+pplslhPFhEppaa...........utcpYspst......GlplphshhlospWphusshEhtpppY..hcc...pchsGshphhSsoLhah.ssppaaahGhDahcpps.ps......cspSapppulRhGWuQ-Wsh..GlooplshuhupRpYcshshhh.............shpR+DcpassslolW+RshaahGlTP+LsapapKscSNhs.hasYsc..pplhlphsKsF .......................................................................................................Wphphuhshh.ppNlN.pustttph......................h.t...p..st..pu.pulsYphshp+p....hsl..t..sp....ahhththsstGp.hYhss+c........as-....h..s.h.phth..G.h.tapss.....p...pp...h.ulhPa.hp..pph..h......................usctastsh.................Gh..phph.shh..ls..p.Wphsssh.......p.htcp.cY...pcp...............pphsupt..h.h.u.s..o.l.h..a.h.h..s.s..p..t.h....hh.s..G.hs..a....+cps.....p-.......p.s.p..uh.pp..tu.l+h.G...hsp...past......G.lss.ph..p..h..u....a..scRp..Ycs...shh..................s.tR+Dcchphsh....u......l.h..+.c...p...l....p.a.h.GlsPpLsap.ap+s..pSNhs.hYpacp..pphhlphp+pF..................................................................... 0 27 48 65 +2218 PF04481 DUF561 Protein of unknown function (DUF561) Kerrison ND anon DOMO:DM04313; Family Protein of unknown function found in a cyanobacterium, and the chloroplasts of algae. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.43 0.70 -5.42 5 102 2012-10-03 05:58:16 2003-04-07 12:59:11 7 3 94 0 40 445 219 236.40 57 91.19 CHANGED MsN...K..lcsslcp+KAIKVISGLNNFNsspVlpIA+AuptusATYlDIAADPcLVcpVK.ulssIPICVSAl-schLhcuVtAGADLlEIGNFDSFYsQGphlsss-IluLVKpTRpLLP+TsLoVTIPHILsLcEQlpLApcLEuLGlDlIQTEGphouIS.....+scplschIcpSss....TLASTYsIS+sVsLPVICASGLSslTlPlAFuhGASGIGIGSuVo+Ls-ptcMVshlsElpsulSuspsh .....................................................Lppuhpp+puLKVISGLpNFDtssVstls+AAptGGATaVDIAsDspLVc.hsp.plo.sLPlCVSuV-PctFssAVpAGAshlEIGNaDoFYspGRhFputEVLpLT+pTRpLL.PplsLSVTV....PHhLsLDcQlpL..A.pLVptGAD...II...QT.E....GGT.S.SpP.......hpsGs..LGLIEKAuP.......TLAAAauIS.....R.....A.....V.....s........lP.....VlCASGL.S.s.V.T.s.PM.A.I.u.A.G..AuG..VGVG..S..A..l....N......+....LNDplAMlAsVRuLsEALt....sh............................ 0 6 26 37 +2219 PF04763 DUF562 Protein of unknown function (DUF562) Waterfield DI, Finn RD anon Pfam-B_6057 (release 7.5) Family Family of uncharacterised proteins. 25.00 25.00 36.60 58.60 19.20 17.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.89 0.71 -4.96 6 19 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 2 0 13 19 0 138.60 41 24.59 CHANGED phpcFL.hVltDEpEKcVlVVsscup.thcuLssculshLhcELpppGYSYLNIhSscscu.spVpERplLssc.pGRSFTVIls-LPlGssDIRsLQLASERIlVScph-AADAhASGC......KlLpa-c.pssWs....pcatsatcpV-cpt ...hpcFhhhlhhscpE+sVVVVsscu..sh.sLsspuhspLlcELppcGYSYLNIhuhcssu.hpVppRplLsss.pG+uFTllhs-.s.upsDhRsLQLASEthhsu+EhpAsDsYASGC......cllth-c.ppsWs....pcHttatccVccp.. 0 0 0 13 +2220 PF04577 DUF563 Protein of unknown function (DUF563) Waterfield DI, Finn RD anon Pfam-B_4026 (release 7.5) Family Family of uncharacterised proteins. 22.10 22.10 22.10 22.60 22.00 22.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.58 0.70 -4.42 219 1565 2009-09-11 01:27:59 2003-04-07 12:59:11 9 58 566 0 846 1505 506 202.60 16 44.06 CHANGED tha....t.Hahh-h.lspl.hhhppthhpps..............hh...hst..thh...sahp......chl.ph.......hul..ttppl.ht....p...p.hhhp.......................plhhssssh.........ththhshhh...........thtphltpphthpt............................................ts.........+hhalsR........tst......pRplhN..pp..cl...h....phhp...............phu.......ap...hlp....sp.phs....hp-Qlp.las.suchlluspGuultNh.....lFh..ps..t....spllpl..s.....pthsht....ahtlsthhshphhhlhsps .......................................................................................................................................................................................................................................................................................................h.....................t..........h.............t............t.hhh.........................................thhh..............................................................................thtp.hlhpthshsp................................................................tts.........plhhlsR...........sp..........pRtl...hN...ps......-l....h.............phhp..............................................phs................hp..........llp......hc...phs....................h.tptlp...lhs....su.chllusHGAuLssh....lFh...ss...........s.....usllcl...........hthp.t................ahthuph...shph......th....................................... 0 442 611 734 +2222 PF04483 DUF565 Protein of unknown function (DUF565) Kerrison ND anon DOMO:DM04315; Family Predicted transmembrane protein found in plants, chloroplasts and cyanobacteria. This family is also known as YCF20. 21.10 21.10 21.20 22.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.81 0.72 -3.81 19 173 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 110 0 85 173 161 59.50 33 43.19 CHANGED Qpu-WDllsAulllshsEhlutlhYpph.........ppp.th......hlphlNhhKlGlhYGLFlDAFKLGS ................u.hDllsAsllVhhhEhlstl...hYptsh................................htp.th......hlphlNhaKhGlhYGLFl-AFKLGS........ 0 16 55 75 +2223 PF04525 Tub_2 DUF567; Tubby C 2 Waterfield DI, Finn RD, Eberhardt R anon Pfam-B_4998 (release 7.5) Family The structure of this family has been solved. It comprises a 12-stranded beta barrel with a central C-terminal alpha helix. This helix is thought to be a transmembrane helix. It is structurally similar to the C-terminal domain of the Tubby protein [1]. In plants it plays a role in defense against pathogens [2]. 24.50 24.50 24.50 24.70 24.20 24.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.36 0.71 -4.90 16 987 2012-10-02 20:44:47 2003-04-07 12:59:11 7 6 608 2 363 768 7 155.80 24 85.77 CHANGED thhlss.ahsstthsLslhh+shhhsssuaslhDspGpllF+V-s.....hphscchhLhDssGpsLlol+c.K.....hhoLtspWplapusssp.......p.lFsl++s.....l......psahh..ssh...tppspsapIcGshhpcuhplhc..psspllApl+...s..pshhhGcDsasl.hVpsphDhuhlhuLlll ...........................................ht...................hhl.pp..h.hh.t.........s.c.p.Fsl....p......D.s.p.G......p.....lsapV-G..........ph....h..p.......hs.....c..phtl.......hD.s.s.G.c.......l...h.pI.p+..c...................hh.o.h...h.s.capl.h....ts.sp.p.....................h.as..l+Kp................................sFh....t....................s...h...................t..p....sh.shcl..cG.s.h..h...c....h..s.....aplh................p......s......p...s.....p.....llA.clp.....................p.......p...h.......h.th.......s.....s....s....asl...sV.h....s....h........ss...Lllulslh................................................................. 0 59 216 293 +2224 PF04601 DUF569 Protein of unknown function (DUF569) Waterfield DI, Finn RD anon Pfam-B_4902 (release 7.5) Family Family of hypothetical proteins. Some family members contain a two copies of the region. 21.20 21.20 21.20 21.90 21.00 21.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.78 0.71 -4.76 7 184 2012-10-02 19:42:32 2003-04-07 12:59:11 8 6 19 0 119 175 8 140.00 37 47.43 CHANGED MElFpcAcuVRLRS..pHDKYLhAD-DEEoVpQsRsGou+pA+WTVE.V..sp.clIRLKSCaGpYLTASNc.FLLGhTG+KVlQotshR..hDpss.-WEPlREGupVKL+oR.G.paLRuNGGlPPWRNSVTHDh.PHhSsTpc.llW ...............MEhFpcuphVRLRS...tp.sp..YLhAD-....Dt.....psV...sp...s...+......s.u..u.....u..h..s....Ah.WsVE..h..l..................s......s....s......s............h.........lh...L...+.S.s.Y.G+YLsA....o...st.......h....h...h..G.h.s..Gp......+....VlQs............t...p..................h-.s.sh......W....c..s.l..........+......c....G...s.......p......l...pL...+........s.t.....G.......pa..LR......ANG.......s.h....sW.csu.VTh.D...st.t.......................................................... 0 12 63 92 +2225 PF01881 Cas_Cas6 DUF57; CRISPR associated protein Cas6 Enright A, Ouzounis C, Bateman A anon Enright A Family This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats [1]. It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation [2]. Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers [3]. 23.60 23.60 23.70 23.70 23.40 23.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.12 0.71 -3.99 46 464 2012-10-01 21:23:39 2003-04-07 12:59:11 11 3 348 9 200 474 12 148.10 20 61.04 CHANGED hsuLh....ppsphpls...s....tpaplccl..chhcp.ch.ppphpFpTL..SPlllpshh.........sphpphaltPp-pc.....FhchlppNLhcKYpth.....hucp.p...pchphch.....h+t.....+hlphK.......pth..l+uhhhh.Fclpus..cllchuY-sGhGpKNuh.GFGhlcll ...........................................................................................h.........t...h.ht....t.....ph.hl.ppl....phhp...p.........h..ppph..hhpsl.....S..Plllppt..............ptpshahpsp-..pc..........FtchlppNlhcKa..pth..............hspp.hp....pphphps........h...th+....h.phhph+....................shh.....lpuhh.sh.acl.....pus....c...ll.....phsa...ps...GlGp+sup.GFGhlch.................................. 0 73 128 169 +2226 PF04489 DUF570 Protein of unknown function (DUF570) Kerrison ND anon DOMO:DM04330; Family Protein of unknown function, found in herpesvirus and cytomegalovirus. 25.00 25.00 32.00 31.60 16.80 16.20 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.45 0.70 -6.07 4 48 2012-10-03 01:18:03 2003-04-07 12:59:11 8 1 25 0 0 44 0 393.40 47 68.12 CHANGED ss.u..hpsshp.pssaG+uI..Aohpp+.ppsh..R+HLshYpuhLhsIlcQYpp.hlPsptphpYcpGtIclushlls.spQlsutp......lYsWoolshPKhhuhtELaFLlsSscppslshpPhITKGGhhSu.hoY...Phs.SsshslshlpsslhMlPFlPa+hPhhhs.hhs...........sh.hLppttst.taGplp.lKp+shhhLullcsLTWppphshs.psphhphhtA.FhGohss..stlP.-shAhRhhsNspaphtsaEFoINl-Nlsls+spcKllGTLATssCcphsspLosENhPphLllpFELloshpcs.cLhFSsNPpLaFoGDlLNss..L.psPNhaELpVaAPYDlHFhsuppHhVpLslRYhphsDRp.hLVSsh.sEshFcTthslWtsssPL+lTLhSao.NLlLPQGT.lAsLhhl ..............ohhEuhR-s..spssYG+sl....p.cs+.l+..Rot...pHLosYcshLhtlscQYNp.sssspt+ApYhpGsIahuhsVIhsstpssshp.......YsWosllsP+s.shsELaFLLCSsptsusVhQPhITKGGhpouhhsa...s...tps....pss....phshl+spLshlPFVPassPcauV.FhT..tcsuh.....................h.hlp.....tAs....FGpho.VpRpGushlshhcpLoWhs+plhs.sppchTpYlA.FcGTh-s..uhhsu.scsWhs.+NVpYEhhsh.FolNV-SlsVsspp.RpLlGTluouhCcps.os.ploscNMPp..shpF.Ll.sotpRt.slhFSpNPoLFFoGDuLN.s..L.ppPssasLTVHAPYDIpFt.psppsVplDlRYsphsDR.CFLVuslPpE..stFaTGLoVWRsspPL+lTLhShT+ollIPQGTPIAsLY.l.. 0 0 0 0 +2228 PF04672 Methyltransf_19 DUF574; S-adenosyl methyltransferase Waterfield DI, Finn RD anon Pfam-B_4601 (release 7.5) Family This family contains a SAM (S-adenosyl methyltransferase) domain, with a central beta sheet with 3 alpha-helices on both sides. Crystal packing analysis of the structure PDB:3giw from Swiss:Q82L35 suggests that a monomer is the solution state oligomeric form. An unidentified ligand (UNL, cyan) was found at the putative active site surrounded by the residues His57, His170, Phe171, Tyr216 and Met22 . The UNL is likely to be a phenylalanine or phenylalanine-like molecule. (details derived from TOPSAN). 20.00 20.00 20.00 20.10 19.80 19.90 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.57 0.70 -5.13 12 720 2012-10-10 17:06:42 2003-04-07 12:59:11 7 2 101 4 349 788 2 255.20 39 95.90 CHANGED stttsspIDTo+PHsARlYDahLGGKDNYPVDcpAu-shhtshPthhtsAptNRsFhpRAVphLAt-.GIRQFLDIGTGlPTtsNlHQlAQplAP-oRVVYVDNDPlVLsHucALLsusP-GhTsalcADl+cPcpILp..cstcsLDFsRPVALhLlAlhHal...sDs-DshulVcpLh-sLPsGShLsLScsos-hsPthh...ctstshaspu..utshphRohp-lttFFc..GL-LlcPGlVssscWRP-ssts...........................sstthshauGVARKs ............................................s.....tlDsshPpsARlYDaaLG.GKDNassDRpsucphht....hhPph..h..t..hAptNRsFLpRAVRaLAs.c.GlRQFLDlGoG.lP.T.t.................s....N..........lHE.VA.Qpl......s...P..-...uRVVYVD.NDPlVLuHu+ALL.su.s...s.p......Tsh...lpADlR-PcpILst....pspchLDh.sc.PVALh.llulLHa.l.......sD....p.-......c.s.t.slV.pplh-sLssGSaLs.lo..H......s....o.....s......-....h.s.....s....p.t..h..........pt...s....t...p.h....h...p..ps........ss..s.h.....t...hR..opp-ltpaFc...GL-Ll......-......P.G......lV.s......sp....W...RP-s.sss.............................p.s.tt.h.hauG.VuRK............................................... 1 125 277 340 +2229 PF04746 DUF575 Protein of unknown function (DUF575) Waterfield DI, Finn RD anon Pfam-B_6048 (release 7.5) Family Family of uncharacterised proteins. Contains several chlamydial members. 25.00 25.00 80.70 79.60 19.60 18.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.26 0.72 -3.98 6 20 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 2 0 15 20 0 97.00 55 18.45 CHANGED AFDFoRPlCSRITNFALGVIKAIPIlGHlVhGl-WLlSph.tphls+PsFsSDVssIlKlEKstG+DHluRVEphL+p.RlslssED.sKVHG+hPpsPFs AFDFo+PhCSRITpFALGVIKuIPllGHllhGl-WLlS+hhcphVo+PsFsSDVssIlKlEKlsGRDHIuRlEshLKp.RlsIssED.DKVHG+hPpsPF.u 0 0 0 15 +2230 PF04507 DUF576 Protein of unknown function, DUF576 Mifsud W anon Pfam-B_2120 (release 7.5) Family This family contains several uncharacterised staphylococcal proteins. 20.00 20.00 20.40 20.30 19.90 19.70 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.62 0.70 -5.46 29 2553 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 198 3 28 1021 1 231.00 55 98.31 CHANGED MpahK+lsLhISlllLhlhI.uGCGh.s..........K--SKEspIKKSFsKTLsMYPIKNLEDLYDKEGYRDsEFcKsDKGTWlIpS-MshpsKGcshco+GMVLhlNRNTRTuKGaYhlcclp--pcthspsp-KKYPVKMcNNKIIPhcpIcDcKLK+EIENFKFFsQYGsFK-LcsYKsG-ISYNPNVPsYSAcYpLsNsDhNVKQLRKRYsIPTpKAPKLLLKGsGDLKGSSVGaKclEFTFlcsKcENIaFoDSlpFpPSc ............Mhh.p+lhLhIsh.ll.Lhlhl..sGCsh.p..............ptsuKEtQIKK.SFsKTL.sMYPIKNLEDLYD..KEG.Y..RDs..EFcKsDKGTWhI.Schshps.Kscphcs+GMVLalNR.NTRTsKGaYhVpphhc-pct.h..........p.s.pcKcYPVKMhsNKII.hcplcD-KLKKcIENFKF.FsQY.usFK-lpsYcsGDlSaNspVPSYsAcYpLsNcDhNVKQLR+RYsIPTpKAPcLhLKGsGsLKGS..SVGaKclEFsFscpKc-slhasDSlpapPo.................................................... 0 11 11 27 +2231 PF04510 DUF577 Family of unknown function (DUF577) Finn RD anon Pfam-B_3938 (release 7.5) Family Family of Arabidopsis thaliana proteins. Many of these members contain a repeated region. 20.90 20.90 20.90 20.90 20.40 19.60 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.18 0.71 -4.57 17 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 9 0 49 54 0 158.90 33 38.91 CHANGED l+EIpPLLIoCLp.pphp.-SphKlLtclVSpVAaplh.hpcssW.ELpDhIhShu.ppE.hKAhalF.sLs.sl.hc-Flh+hhcs.ll.chpphLhsPpc..scspsWsLAhpsshphuIpLl-ssh+.schl+clhph....hlcSV+cLVp+GhE.thlhRuhc-lEphVp+phphYphoc ....................hhplpslLIsCLp.pthp.po.hKlLtplVspVsh.pl..h.hpctsW.-Lt....-hIho.u..ps-...+AhhlF.sLs..l...c-Flh.hhcp.Lh.ch.phL.sPpp...cspsWsLAhpuuhphsIpll-sstp.schlcclhpp....hlcSl+-LVp+thE.thlh+uhcchEphlpcphphYphs.t................. 0 25 29 30 +2233 PF04669 Polysacc_synt_4 DUF579; Polysaccharide biosynthesis Waterfield DI, Finn RD, Eberhardt R anon Pfam-B_4574 (release 7.5) Family This family of proteins plays a role in xylan biosynthesis in plant cell walls. Its precise role in xylan biosynthesis is unknown [1,2]. Its function in other organisms is unknown. 20.40 20.40 20.70 21.70 18.50 20.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.34 0.71 -4.93 29 437 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 248 1 286 418 7 142.80 30 66.25 CHANGED hEhphuh+slc+tps.hNlLshs.ssp.....pLhhstlscp.............hhpphccpaPslc.......scl.h-tccLhust.....................................tcpcW.hhhhss.cshht-........ashGslhRscsstsho..sslFVpcl..........pFhshEhtpsptGhsphhah .............Ehphus+slp+tps..hslLshs....ssp.....pLhho.phs..cp............................hhpph.......c.....cpFP...p..h..c...........sph..hct-cLhutt..................................................hctcW.h.hhpt.c...shhp-...........ashGsLhRscsstths..ss.lFlhch..........pFhshEht.psptGh..hh......................... 0 68 151 223 +2234 PF01882 DUF58 Protein of unknown function DUF58 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of prokaryotic proteins have no known function. Swiss:P71138 a protein of unknown function in the family has been misannotated as alpha-dextrin 6-glucanohydrolase. 21.60 21.60 21.60 21.60 21.50 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.81 0.72 -3.84 161 3991 2012-10-10 16:07:06 2003-04-07 12:59:11 13 9 2067 0 1413 3655 957 85.30 25 23.95 CHANGED st-ats.lRcY.psGDsh+pIcW+soAR........t.sp...Lhl+ca.ptpp.p.....tplhlhlDtsts.......................................tphEttlphsss.....lshthhp.....pspthul ................-atp.lRpYps.G.DclRpI-W+so.AR...............p..sc........hhl+..pa.cp-p.p.......ts.lhlhlDhsssh..h.stt.......................................................tch-hul.phsus......lshh.shp.....pssphs............................................................................................................................................. 0 508 1002 1244 +2235 PF04515 Choline_transpo DUF580; Plasma-membrane choline transporter Mifsud W, Pollington J anon Pfam-B_2258 (release 7.5) Family This family represents a high-affinity plasma-membrane choline transporter in C.elegans which is thought to be rate-limiting for ACh synthesis in cholinergic nerve terminals [1]. 22.10 22.10 22.30 22.10 21.70 22.00 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.83 0.70 -5.70 73 1097 2009-12-17 12:57:17 2003-04-07 12:59:11 7 15 292 0 722 1057 28 295.60 23 51.13 CHANGED hhhllhshh.llhllhh..........................hhhppRI.hussllcpuscshp.ph.plhhhsllshllhhsahshashshlslhsptps....shps.s.....................................................................................tlhhhhhahlFuhhWhsphltslhphslA.GshusWYastpp....shPptsshsuhpRuhp.....YphGSlshGSLllullphlRhllchlppphpttt.....s.hhthlhpshpChhtsl-phlcahN+.AYlhlAlaGcuahpuAKcsapLlpp.sslcslhssslsshhLhluplhluhhsulluahhhph..........psshsashsshhhshlhuhhls.plhholhpsul-TlFlCaspD.ph.ppss ..................................................................................................................................h..hlhhhlh.thlhhh.hh..........................hh....hppR..l...h..sh..tllp.u.s.cslt...ph.thh..h..........h..sh.hsh.hhhhhh...hs..h.Wh.h..sh.l.h..l.hss.s..ps......p..t...............................................................................................tt...............hhhhhh.hahlhs.hhWh.sphlhuhtphslA.Gshusa....Yashpp.............shst..............h........s..l..h.s.u......ht...+sl.p...............aphGSl...shGS.Lll.sllp....h.hRh...........l.l..........p.hh.pp....phpttt.....................s.h.phhh.psht..C...hhhhl-..............p.hlcahNc..AY.........hhhAlhGpsFspuA+................psa.llh.........p..shh.cs..h......h.....hsp.........l......sshll....hh..u..pl.hls..h.hsu...hhuh.....h.hhp..................................t.th.phhh...h.sh.h..hs..h.h................h....uahls.phhhs..l...ht.sl-olalCas.D...h........................................................................... 0 287 424 583 +2236 PF04570 DUF581 Protein of unknown function (DUF581) Waterfield DI, Finn RD anon Pfam-B_4765 (release 7.5) Family Family of uncharacterised proteins. 23.40 23.40 24.00 23.40 23.20 23.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.43 0.72 -4.68 42 427 2012-10-03 05:12:49 2003-04-07 12:59:11 9 3 27 0 245 398 0 55.50 39 30.63 CHANGED stsstsstsstss.ssFLpsChLC++pLs..scDIaMYRG-pAFCSpECRppQh.hDEtp- ..............hs..........tsspFLpsChLCp+.pLs..scD.....IaM.YR..........G-pu...FCSpECRppQh.h.DEt............. 0 20 132 192 +2237 PF04518 Effector_1 DUF582; Effector from type III secretion system Mifsud W, Eberhardt R anon Pfam-B_2447 (release 7.5) Family This is a family of effector proteins which are secreted by the type III secretion system [1,2]. The precise function of this family is unknown. 21.00 21.00 21.50 22.40 20.80 20.90 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.21 0.70 -5.47 20 164 2009-09-14 12:27:57 2003-04-07 12:59:11 7 1 35 0 22 81 0 380.30 32 51.67 CHANGED plpsphsshstspspFss..sshsshpshplasphssspstls.shhtssspthlsshpps..s.uppspphhppapstAssplpplpppIsphppphtchsstKAohsss.......hphupsshlpopPLtSAauSllLD+YlPpQpphLpsLtpclsaSNhAushhNsllchlssFsouslYYsLuSYlsQ.....ppGtstFssuhtpApstLscE+ppsppDlppspcApshlspllsplpsDs...clTsuQ+pclh-slssYptplsslhssLssLpshLssLpl.........hsssspscssFclps.......-sWhtpLpsLEuhllsG...hssusssGGhhslhsplpuDQQsYsshuQNQQLsLQhchouhQQEWTlVusSLpLLNQIaspLsRp ......................................s.psphpphs.tpppaps..ssts.hpthplh....sphpsspshls.hhhtshspthlsshppsh.ptuppspphhscapstupschpclppplpphppphpplsstpsshssp.......ht.ApsshlcshPLtSAauplLLD+YlPpQpphLpsLtpclpaSNhAushhNsllchlspFssuslYYNLuSYlsQ.........ptupshFssshppspspLscE+pphctDlpp.......sppApphlppllppVpsss...plTspQ+sclp-tlssYtppLsslhspLssLpshLssLsh..........sssspscssFclhu......tcpWhhpLpsLEstllsG...hssusssGGhhshhs.lpoDQQsYsshuQspQLtLQhphouhQQEWTlVusSLplLNQIatpLspp..... 0 5 5 18 +2238 PF04519 Bactofilin DUF583; Polymer-forming cytoskeletal Mifsud W anon Pfam-B_2455 (release 7.5) Family This is a family of bactofilins, a functionally diverse class of cytoskeletal, polymer-forming, proteins that is widely conserved among bacteria. In the example species C. crescentus, two bactofilins assemble into a membrane-associated laminar structure that shows cell-cycle-dependent polar localisation and acts as a platform for the recruitment of a cell wall biosynthetic enzyme involved in polar morphogenesis. Bactofilins display distinct subcellular distributions and dynamics in different bacterial species, suggesting that they are versatile structural elements that have adopted a range of different cellular functions. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -10.41 0.72 -4.00 160 2175 2010-01-08 16:00:08 2003-04-07 12:59:11 8 16 1383 0 671 1765 1447 96.10 23 54.70 CHANGED tsolIupssplpGslp.hsu.slpl-Gp..lpGslps.ps....plhlupsGplcGp.lpup.plhlsGp..lpGslpupchl.pltssuplpGclpssp.lplcpGuhhpG.phph ............................h..shlstshp.hpG.p...lp.hps....tlpl..c..Gp...l..p..Gs.lps..ps......plhlupsup..l.c...Gs..l.p..s...p....p.l.hl...s...G.p........l..p..G.....s.....lp.u...p.p..l.pltssuplp.G..s..l...p..s...pp.....lplptGuhhpGph..h....................................... 0 268 476 575 +2239 PF04520 Senescence_reg DUF584; Senescence regulator Mifsud W anon Pfam-B_2571 (release 7.5) Family This protein regulates the expression of proteins associated with leaf senescence in plants [1,2]. 21.50 21.50 21.50 23.40 21.40 20.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.18 0.71 -3.72 22 277 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 33 0 170 265 0 149.00 31 84.22 CHANGED sth....ELsEsDla.ss...................................................pssusssshpsu......sss+th.th..............ssssssthtpSAPVsVPshuth..........pptcs.pptcs--p--t-ss.hlPPHEhlAR......Rcsuuh..SVhEGsGRTLKGRDLR+VRNAlhc+TGFhD ........................................................................................................t...-h.Et-lh.........................................................................................t.t....t....t........t.sthh..t..............................tstsssthhtpSAPVsl..Ps.hsphht...........................tttptt.t..tt.c.s.s.......c....pc..c.....s.......s.........ssp...hlPPHEhlA.R...................pptsuh........SVhE.....Gs...G.RTLK.GRDLp+VRNAVhcp.TGFl-....................... 0 18 95 134 +2240 PF04522 DUF585 Protein of unknown function (DUF585) Kerrison ND anon DOMO:DM04368; Family This region represents the N terminus of bromovirus 2a protein, and is always found N terminal to a predicted RNA-dependent RNA polymerase region (Pfam:PF00978). 25.00 25.00 233.10 232.40 18.60 17.60 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.67 0.70 -4.98 5 16 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 6 0 0 16 0 246.00 51 30.16 CHANGED M.SKFlssD.EYpVPSFQWLIDpSLEc..s.-sElAshVsc..s.....-PcsElTADGSLASFlLAVKPLVIG.Gpa-PPFDQARWGsCCcsVssls-uFTc++LIPhAEMARMLYLDI-GSFVDESEsDDWhPEDTSDGYsEYhuuDEsch.t...us-LppsLucEssshEh-EscEsoD.....SsPS...EhTLGDRYlsTsEEFtol-.SDYslTLNLhNPVEpRVullEDTascs-sD-.hspuPpYpERVSLcuLEAAGH ...MSKhhs--.sYpVPSFQWllDQoLEs..s.cs-sAthlsp..s.....-susElssDGoLASahhAVKPL.IG.Glh-PPFDQARWGpsCcsVhslspthsshtLIPhAEhARMLYLDI-GSFVDESEsDDWhP.DTSDGas-shusstsch.t...pochpp..hLup-usphE.cpsccToD.............sPs.....EhTLGDRYhshcEEFtol-.oDYDIoLsLhsPlEp.RVuhl.DTasHs-ssD.hsThPhYh-RlSLppLEAAGH 0 0 0 0 +2242 PF04532 DUF587 Protein of unknown function (DUF587) Kerrison ND anon DOMO:DM04375; Family This family consists of the N termini of some human herpesvirus U58 proteins, and some cytomegalovirus UL87 proteins. This region is always found N terminal to the Pfam family UL87 (Pfam:PF03043), which has no known function. 19.30 19.30 22.20 199.50 17.20 16.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.32 0.70 -5.40 8 45 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 27 0 0 40 0 215.80 55 24.92 CHANGED hp.psup-A..LIVcSsss..p+shsVPVaVNSaNLTpElSssEDuRLspssP..VDsE+lculFcsLapAuPsplcs-pDRsKllLsRLLLGPVAVPCYC-.EW-ss...............-YLscsuhtCpGPlLYVHRtRC+Cs..sstsshpFoVMcsahuTHVFRGLLSLpEWNs+LPslFCsC.....ssspsDRYshslLP+chSlaL-hYPYhLspLsRaLoVsEIDDhsNslI ......ush...phss-ALIVhusoA...hRVhsVPVaVsohNLTpELSssEDARhspstP..VssE+VcuVFtuLYpAsPsal.............cTEp-RsKlVLsRLLLGPVAVPCaCD.EW-sc...............-aLscssphCpGPLLYVHR.RCpCG....ssGpuLsaoVLcsHhATHVFRGLLSLoEWNpcLPslFCsC....ssuspp-RYsMAsLP+-hSlaL-.YPYhhVcluRhLoVsElDDhVsuh.o 0 0 0 0 +2244 PF04569 DUF591 Protein of unknown function Mifsud W anon Pfam-B_2799 (release 7.5) Family This family represents a conserved region in a number of uncharacterised plant proteins. 21.00 21.00 24.30 24.30 18.80 18.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.21 0.72 -3.94 49 201 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 3 0 185 199 0 81.40 47 29.53 CHANGED uGpR.........c...tpssupsspRs..ssspsGuppssp.....uctc+...p.GhhttsRGs-PoARIRppthDG..G..shRRRpPAApcG....Gssctso.R..................s+F.ustASst ................................................G.R........c..hpcss.utsGRRs..sssssGup+usp........Gcuc+....TGhhttsRGsEPsARIRpRtlDG....GthRRRQPAAtct....GNGDEsT..R..................GRFsusRASst................... 0 0 0 0 +2245 PF04574 DUF592 Protein of unknown function (DUF592) Kerrison ND anon DOMO:DM04429; Family This region is found in some SIR2 family proteins (Pfam:PF02146). 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.87 0.71 -4.66 14 90 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 54 2 54 87 0 117.70 34 26.38 CHANGED phhP..ptP.ppsl.ls+s..ssKalhstho+--olNARhaLKhaG.tcFL-sYLPp-lNSLalYaLIKLLGFplKDppLhssl.p.hp..hp.s.t..ht.....................t.pDPL-KKphl+LIKDLQ+AhsKVLuTRlRLsNFhTl-+Fls+L+sAKKILVLT .............................h......................hhh.ths+cpshpsRhaLK..shhpFL-haLP.phso..lhhhIthLGFt.+-.thh.................................................pDsLtccphhcLIK.Lp+AlsKVLspRlRLssF.TlDchlptL+sA++ILVLT............................... 0 8 25 46 +2246 PF04578 DUF594 Protein of unknown function, DUF594 Mifsud W anon Pfam-B_2859 (release 7.5) Family \N 21.00 21.00 21.70 21.30 20.10 19.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.99 0.72 -4.52 66 634 2009-01-15 18:05:59 2003-04-07 12:59:11 8 22 16 \N 390 558 0 57.80 34 8.75 CHANGED pLh.......htsppphWclluclWsEhLlYsA....s...pssupsHucpLupGG..EhlThlWhLhsHsGl .......................hh.........ptpptWcllsclWsEhLhYsA..s......ps.ss.ptHAcpL.upGG..EhlThlWhLhtHhGl........ 0 6 138 264 +2248 PF04591 DUF596 Protein of unknown function, DUF596 Mifsud W anon Pfam-B_5061 (release 7.5) Family This family contains several uncharacterised proteins. 25.00 25.00 26.10 37.00 22.60 24.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.53 0.72 -4.07 19 144 2009-09-11 22:03:20 2003-04-07 12:59:11 7 1 49 2 24 99 0 70.10 52 56.83 CHANGED LD-G+LKLu.+KG-aI.pGoT-ELV-MFRpuFPs..SDEEh-.........tGlWFhs-p.CPhsAVWlaKGtt.....ENGEDYY-Ws .LDEG+LKLu..++Gc..Fl.sGTo-EhVEMFRpuF.Ps...SDEEh-.............tGhWFhs-...pCPuGAVWVhKGt.....tENGEDYY-Ws.... 0 1 12 21 +2249 PF04640 PLATZ DUF597; PLATZ transcription factor Mifsud W, Riaño-Pachón D, Mistry J anon Pfam-B_5458 (release 7.5) Family Plant AT-rich sequence and zinc-binding proteins (PLATZ) are zinc dependant DNA binding proteins. They bind to AT rich sequences and functions in transcriptional repression [1]. 25.00 25.00 25.10 25.10 21.20 24.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.53 0.72 -3.77 12 304 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 36 0 210 282 0 71.90 52 29.08 CHANGED RR.sYHDVlRVs-lpKllDhSsVQTYlINSAKVlFLNcRPQsRssKG..........ssNsCpsCsRuLh-s.F+FCSLuCKl ................RRtSYHDVlRVs-lpK.l.lDlSsVQTYlINSA+VVFLN..cR..P.....Qs...R.s..sKG................................ss.stCpsCpRuL..h...D.s...a+FCSLuCKl............................. 0 34 136 177 +2251 PF04654 DUF599 Protein of unknown function, DUF599 Mifsud W anon Pfam-B_5550 (release 7.5) Family This family includes several uncharacterised proteins. 21.10 21.10 21.50 26.70 18.20 20.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.25 0.70 -5.19 46 411 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 270 0 210 414 55 205.50 27 85.43 CHANGED hlulshhlhsWhGYshahpppstpps..olsshhsphRctWhpphl...sR-s+.lhDspllusLppusuFFASoslllluGlhslluss.......-phhslhucls..hsstsschhhplKllsLlhlFlhAFFpFsWShR.asasulLlGusP.....tp.........ststtshAppsupl.shAuppaNhGLRuaYFuluhlsWhhuPlshhhsolhlVhlLapR-FpSpshpsl ......................................lhlshhlhshhuYp..hah.hhpstpps...olhsh.sthRctWhpphh....pccpp..lhssphltsh.tusoFhASos.lllhuGlh.slluss................pph..tphhssls....h..ss.p...sphh.h...plKhhsLhhhFlhAFhpFshuhRhasasulLluuss......p..................t.ttt.hhsptsuchhppAuptashGlRuaYauhshlhWh.huPhhhhhsohhhlhlL.ap.h-hhSpsh...h................ 1 47 122 162 +2252 PF00892 EamA DUF6; EamA-like transporter family Bateman A anon Pfam-B_177 (release 3.0) Family This family includes many hypothetical membrane proteins of unknown function.\ Many of the proteins contain two copies of the aligned region. The family used to be known as DUF6. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.39 0.71 -4.09 98 66028 2012-10-02 19:55:49 2003-04-07 12:59:11 15 83 4851 0 17444 56174 36728 125.00 15 75.68 CHANGED hhauhshlhs+hhhpp.....hsshthsshphlhus..hlhhhhhhhttt.............hhshpthhhllhlulhssslshhhhhhu...lphssuspuuhlht.hhPlhshlhu.hlhLtEplshhphlGhllslhGlhllhh .................................................................................huh.h.h.h..h..h..p..h..h..h.tt...........h.s...s....h.....h....h......s...h....h....p....h...h..h..u......s...........h....h..hh..h...h.h.h.h.tt................................h.h.h.s.h.p...t...h......h....h.....h.........h.....h.....h.........u...l...h...s....h....s.....l..s...h.....h.......h...h....h..hu..........l.p...t......h....s....s..........s...t..s..u..h....l..hh....h...t...P....l....h.....s........s........l........h..........u....h..........l..........h........h........t.........E............p........h.........s..........h.......h......p........h......l......u......h......h.l..s.l.hGlhlh..h..................................... 1 4905 10337 14119 +2253 PF04634 DUF600 Protein of unknown function, DUF600 Mifsud W anon Pfam-B_5411 (release 7.5) Family This conserved region is found in several uncharacterised proteins from Gram positive bacteria. 21.70 21.70 21.70 21.70 21.20 21.60 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.76 0.71 -4.18 15 1266 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 296 4 43 633 0 133.70 58 88.36 CHANGED LsplYpEIAppIsshIPsEWEKVYhhApls-cuuEVaFaYspPs.S-EhhYspsIsccaNlsccpFhcphh-LachFc-LRctFK-sstEPWTShphshs+sGKhslcFDYpDhh.po-asphtRphhapYK+hGllPEs..E.-hEhlcc ................................LSphYNEIANcISuMIPV.EW..E..KVYThAY..ls..D..p..G.GE..V..hFNYTcPu...S-EL.YYosI.......s+....cYNlScplF....Dhh.......h-LYchFccLRslFK..EEs....hE..PWTSCEFDF.T.c-.G.K.LpVSFDYIDWl.soEFs.hG.RpNYYhYKKFGllPEh..EYEhpclKc............................. 1 15 23 33 +2254 PF04645 DUF603 Protein of unknown function, DUF603 Mifsud W anon Pfam-B_5498 (release 7.5) Family This family includes several uncharacterised proteins from Borrelia species. 22.10 22.10 22.20 33.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.99 0.71 -4.34 3 189 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 29 0 14 166 1 164.70 61 95.50 CHANGED MKRAKRSFDDYVAYF+EGSLSDsEIAK+LGVSRVNVWRMRQKWESGEsS...VNEDSRVTISEDTFEHLLuQTF+SEVpARKVKSELDLERSNLELGFINAFKQYSSlELsSMhoKIEsLRuKIDSLNKpsNKKNtpsVNEE...INSLKlELDELIKECpIREMELYYECMKKLAsAHEVDSKSNY ........MK+sKRSFDDYlsYFpEGsLsDhEIAp+LGVS+VNVWRMRQKWESGEss............VNpDSRVTISEDTFEHLLuQTF+SEVpA+KV+SELDLERuNLEL..GFIpAFKQYSSlELsSM+o.KIEsLRsEIDuLNKuusKKNKpsVNt-...INSLKSELsEhIKECSIREMELYYECMKKLssApEsESKSNY.............. 0 8 8 8 +2255 PF04646 DUF604 Protein of unknown function, DUF604 Mifsud W anon Pfam-B_5503 (release 7.5) Family This family includes a conserved region found in several uncharacterised plant proteins. 20.80 20.80 20.80 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.72 0.70 -5.16 17 290 2012-10-03 05:28:31 2003-04-07 12:59:11 7 9 78 0 208 320 1 203.90 35 42.72 CHANGED QNlhFSYuMAaGGGGaAISaPLAstLu+h.DsCIcRYstLYGSDcRlpAChuELGVPLT+EsGFHQhDlhGslhGLLuAHPlAPLVSLHHLDhV-PlFP.sh..sRhsAl++Lhp.sscLDsupllQQSlCYD...........+..pppWTlSVSWGYsVQlhpuh..............lssRElEhPtRTFhsWh+pus.hssasFNTRPl.scs....sC.p+PhlFahssspts....sps...tThopYp+phhtt...spC.pashssP.sclcpIhVhp.+PsPphWp..+uPRRpCC ......................p.hauas.M.AaGGGGhAlShPLAptLs..c..h.DpC.....l...........pR.Y...s.....tl.....a.....G..S...DsRlpuCh.u......E.L.........G.Vs....LT..c....-.............GFHQ........h...............DlhGch...hGlLsuH.PlsPllSLHHh.-...h...h..pP.la....P.....th.....sp.t..u.l...phhh...t...shp.hDs.thhQpsh.CYs..............t..t.th...ohuluhGasl.lh...................h....ph.hs.pTF.sW.t..t.....a.hsT+......p......C.tpPhhaahpps................t.......hhs.Y.h............h....................lp.l.V.t................................................................................................................................................................ 0 37 125 168 +2256 PF04657 DUF606 Protein of unknown function, DUF606 Mifsud W anon Pfam-B_5554 (release 7.5) Family This family includes several uncharacterised bacterial proteins. 21.80 21.80 21.80 22.00 21.60 21.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.66 0.71 -4.09 147 2941 2012-10-02 19:55:49 2003-04-07 12:59:11 8 4 1497 0 499 1852 371 136.10 27 86.54 CHANGED lhhl.lulluGhhlslQsulNupLupt.lG.tshhAuhluahlGhlsl....hllhlhhp...t................shtt..ht......ss....PhWhal.GGllGshalhsshhhh.PclGsshshshllsGQllsulllDpFGhhGhsppslsht+llGlllllsGlhl ..........................hhlhslhuGhhlslQ.sslNupLupt.....hu....s............sh....hus....hlsahlGhlhh..hhlh...hhhp..s...............phtt..ht.........................sh.......sh.ahhl.GG.lLGshhlhsshhhh..sp..lGsuhshsh...hlsGQllsulllDpFGhhGs....tpshshh..+.llGslhlllGlhh............................... 1 148 281 409 +2257 PF01886 DUF61 Protein of unknown function DUF61 Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Protein found in Archaebacteria. These proteins have no known function. 20.40 20.40 20.80 21.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.59 0.71 -4.31 17 84 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 82 0 58 78 1 126.40 28 91.20 CHANGED h-+hlphtltplNpclPtcRKoLpELLpE-+PhlhlpsGscHhhc+cELEhLpphlsp-.hthl+lPIllEhpSohtpGshhlpGct.ElKsIp+lLshchs...tpshlhlh+..l.plRRcLPTsTphhFlh .............-+hlpttlpplNpphPtcR+oLp-LLpE-+Pplhlps.GppHhh++cELEhLpphls...tp.hphl+lPIllchsss.htpGthhlpGct.cs+slsplLGtcht......pshlhl.+..l.plR+tLsTsTphhFh................................................ 1 14 29 46 +2258 PF04748 Polysacc_deac_2 DUF610; div_psaccdeacet; Divergent polysaccharide deacetylase Waterfield DI, Finn RD, Yeats C anon Pfam-B_5949 (release 7.5) Family This family is divergently related to Pfam:PF01522 (personal obs:Yeats C). 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.16 0.70 -5.32 134 1399 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1342 3 291 924 146 203.80 39 64.31 CHANGED IlIDDlGh....phtsscslhsLPhslThAlhPhsststphuptA...+ppG+ElllHhPMpPhs.......t.p.sstsLhsshsspclppplptshsplPtshGlNNHMGSthTpsppsMphlh......ptLppp.sLhFlDStTsspShAtphApphGlsshpRclFLD...sp.ps.ssIpppLpphhphA+p.pGtAlu.IGHPtsp....Tlps.LpphlspLppcslpLVslS..p.Llp ....................................................................IVIDDhGh...p.pspppl..lul..P.s...lolAllPssPp...uc-hAppA...+ppG+EVllHlPMtPls........t..p..slt.ssLpsphSs-EIp+hlcpAlspVP.aAl..G..lNNH...M....G......SthTushhuMpplh......psLc.ch..s.LaFLDShThuso.Ah.........+hApt.........h........GV.s.lc.R...c.V.FLD...cs.p........stusIcpQhpcAlclA++..pGssIA.IG..HP+.Ps....Tl.c.s.Lpphl.pL.sc..lsLVhsSsLl.s.................. 1 98 175 231 +2260 PF04764 DUF613 Protein of unknown function (DUF613) Waterfield DI, Finn RD anon Pfam-B_6084 (release 7.5) Family Family of chloroplast proteins of unknown function. Some members have two copies of the conserved region. 25.00 25.00 91.90 91.90 20.20 19.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.72 0.71 -4.19 10 10 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1 \N 0 12 0 125.70 30 66.12 CHANGED cYlYRchKccsYWppa.h..lslYVNsK..........+VG.............hscsIo-lEYhLLDlFLaGPLchclo-.YcplpphlpcRsp+Ycchhcc.cplsG.....INIlFs...phsapchLpscYphcEhlssspILplYlls ...........pYhYRc.+ccsYWppa.h..lslYVNsK.....+VG...................hscsIo-lEYhLLDlFLaGPLchclo-.YcplpphlpcRsp+Ycchhcc.cplsG.....INIlFs...phsapchLpscYphcEhlssspILplYll.. 0 0 0 0 +2261 PF04751 DUF615 Protein of unknown function (DUF615) Kerrison ND anon DOMO:DM04973; Family This family of bacterial proteins has no known function. 25.00 25.00 39.40 39.00 22.50 21.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.90 0.71 -4.58 90 1331 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1309 1 252 777 138 159.20 49 85.34 CHANGED --...-....hsSKSphKR-hcsLQcLGpcLlsLspspLs+l....PLs-p.LhcAltpAp+l..pppEA+RRQlQaIGKLMRpt....Dh-sIppuL-php..................................spppppstthHplEphRD+Lls.....pss..............sulsphlppaPps.DRQpLRpLlRpApKEttp..........sKs....s..........+uhRcLFphL+c ..............p--.E.haVSKSElKR..DuctLpcLGtELV-LucssLcKl.....PL---..LtsAIchAQRI...cpEuRRRQLQhIG.KhhRpp....DlE...PI...cpALDKL+............................................................N++...s..Qps...shh.....H+LEplRDRLls.....pGD...................sAls-l..lshaPcA..D.....R........QpLRsLIRNAcKE+pt..........................................NKP......P....................KuhRpIFQaL+-............................................................... 0 54 129 196 +2262 PF04765 DUF616 Protein of unknown function (DUF616) Waterfield DI, Finn RD anon Pfam-B_6152 (release 7.5) Family Family of uncharacterised proteins. 21.40 21.40 21.60 21.40 20.00 20.50 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.98 0.70 -5.27 13 242 2009-01-15 18:05:59 2003-04-07 12:59:11 8 14 63 0 143 231 87 269.50 35 55.71 CHANGED N..........LsYlpt-p..s......t.spFGGp.ShpcRcpSFphppshs.VHCGFh+......psGF-ls-p..................D+thMcpC+.VVVuSuIFusaDpl+pPps..ISchoccsVCFhMFVDEpTLstLcpcssh...hDssthVGlWRlVlV.......+NLPYsDsR+NGKVPKhLsHRLFPsu+YSIWlDuKhcLslDPhLIL-pFLWRpsusaAISpHhcRasVa-EA.AsKRhpKYsc.suIDhQhchYpp-GLpPa..SssKhPlsS...................DVPEGulIlRcHsPhSNLFSCLWFNEV-RFTsRDQLSFAYlh.Kl+s.....saplNMFpsCER+slsh.apH ..........................................................................................t.....................s.FsG..ohtpRppoa..........htts.p.l.CGFhp........tsGac.hsc....................Dh..hppCc...llVuoslFu............s.D.lppP.hs.......hsphoh.p...plC.Fhh..FhD-.o.thhpp.sph.........sttthl.GlW+llll.......+sl.P.a..s.D.R+sGK.....................lPK.hLsHRLFPss+YSIWlDuKlpL..h.s.DP.h.h.lLE.thLW..+..ps..sshAISpHhpRpsla-Euttstph.........pKasp.stl-......QhphYp.p-Gls.a...tsphs.h.s...................tlPEu.ulIl.R..cHs..s..ho..NL.F.sC.L...WFNEVs+.F.T.s..RDQLSFsal..h.+lp.......hth.MF.sCthpthh..hhH....................... 0 25 94 119 +2263 PF04768 DUF619 Protein of unknown function (DUF619) Kerrison ND anon DOMO:DM04985; Family This region of unknown function is found at the C-terminus of Neurospora crassa acetylglutamate synthase (amino-acid acetyltransferase, EC: 2.3.1.1) (Swiss:Q12643). It is also found C-terminal to the amino acid kinase region (Pfam:PF00696) in some fungal acetylglutamate kinase enzymes. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.18 0.71 -4.84 33 416 2012-10-02 22:59:21 2003-04-07 12:59:11 8 16 240 39 279 450 57 166.10 28 26.44 CHANGED sLpKELFTcoGuG.TLlRRGh+lhptsohcph.....................shc+L+sllpcsh....ps+tslspYlcpl.cpp.hpsYss-s..h-ulAIV.....cpss...........................................phshLDKFslocsuhhs.sVuDslFssl.p+-FPp.LhWps+cssss..pWaFs+ScGohp.............csshhlFWYG................lpchsclpphl.......pphsst .............................................................l.hclFotpuuu.Tll++G.....plhhhsshpph..........................Dhs+Lppllpcsh........tt+hslpcYl.ppl.ppp...hphh....h....sts.........hcusAIlh.....s....t.s............................................hsaLDKFslhcsu.ts.uluD.lassh...+.....ccaPp.L.......hWp...u+p...s.s........sp....tWaFc+usGohp.................psthhhFWhG...................htshpp..phht.h...p.............................. 0 85 157 226 +2264 PF01887 SAM_adeno_trans DUF62; S-adenosyl-l-methionine hydroxide adenosyltransferase Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family This is a family of proteins, previously known as DUF62, found in archaebacteria and bacteria. The structure of proteins in this family is similar to that of a bacterial fluorinating enzyme [1]. S-adenosyl-l-methionine hydroxide adenosyltransferases utilises a rigorously conserved amino acid side chain triad (Asp-Arg-His) which may have a role in activating water to hydroxide ion [2]. This family used to be known as DUF62. 25.00 25.00 26.30 26.20 22.70 23.50 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.59 0.70 -5.33 121 1173 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 1120 112 316 783 493 260.10 40 93.36 CHANGED hIsLhTDFGhpDsaVushKGVIhsl....s.PsspllDlTHplsshslhpuAahLtpshtaFPpsTValuVVDPGVGopR+ulsl....css..s.taaVuPDNGlLoh....lh...........................pth..............tcshclsppp..............................thSsTFHGRDlFAPsA.AtL..ApG.hshpplGptl..............sslsclsh.ps..phps.....s......tlpGpl.lalD+.FGNllTNltsph..........hthuc....phplph..........tttt.............................................hhthscoau...............................clt.Gchl.shhsStGhlElAlsp......GsAuphhulphG..cp.lpl ..................................h.llLQoDFGLpDGAVuAMhGVhhph........s....s...s.l.+IhcLTH-IsPasIacuuYRLhQ.T.ssYW..........Pp..G.T.lFV.SVVDP..G..V.........Go.c.R........+S..llscTt...pspYhVoP..DNGoLoh....lt.c.................phGI..........pplhEIsEhts....h..........................p............cpSaTFHGRDVaAYsG..A+L.....AuG..IsFE..plG..spl..............s.spl.lcLs.h.ps..thpcs.................tlpGsI..Ilss.+.FGslWTsIspc.h........phthphG.c.phpVsIhpts.........................................hatsplsascSFu...............................DV.p...G.pPl..lYlNS.L.h....plulAlNp.......Gu....FuctaplusG..sp...hh............................... 0 118 221 276 +2265 PF04788 DUF620 Protein of unknown function (DUF620) Waterfield DI, Finn RD anon Pfam-B_6213 (release 7.5) Family Family of uncharacterised proteins. 20.40 20.40 24.20 25.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.56 0.70 -5.21 20 186 2009-09-10 16:40:51 2003-04-07 12:59:11 7 5 27 0 112 174 1 218.50 52 58.66 CHANGED MYAhG+V+MsssEhps....ustthsp..........sssus-tGuFVLWQhsP-hWhlELVVu.GsKVsAGSDGKlsWRpTPWls.sHAu+.GPsRPLRRhLQ...............................GLDP+oTAslFusAt....ClGEKtlss-DCFlLKlpss.usLpupSs..ussElI+HslaGYFSQRoGLLlplEDSpLhRlpo..ss..stulaW.........ETohpShlpDYRsVD...Gl.lAHuG+ossolhRF...........G-sstst.spTpMEEtWpI--VsFNV.GLShDsFlPP .....................hYAhG+V+M....hssEhpt...ustsstt.................sttssE.GsFVLWQhsP-hWhlELsVu.G..s.KlpAGssG+lsW..RpTPWhs.uHAu+.GPsRPLRRhLQ.......................................................GLDPho.TAslFus.Ah....ClGEKplssEDCFlLKLps-stsLcuRSp..ussElIRHslaGYFSQRTGLLVplEDS+LhRIps..su.......s-slaW.........ETThpShlpDYRsVD.....Gl.IAHuG+osVoLhRF.............G-sshsp....s+T+MEEsWsIEEVsFNV.GLShDsFlPP............................... 0 10 64 89 +2266 PF04822 Takusan DUF622; Takusan Mifsud W, Eberhardt R anon Pfam-B_3835 (release 7.6) Family This domain is named takusan, which is a Japanese word meaning 'many'. Members of this family regulate synaptic activity [1]. 25.00 25.00 25.30 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.71 0.72 -4.22 13 539 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 34 0 235 892 0 82.20 37 22.31 CHANGED sR+TSSpssplocpptc.cclEcLph-l+hIspERsELpchLshahpsshshR......lNspaphL+pp+cph...Mhshcchp.pIh-AhE ............tpsSo.ss..l.ocpppphpclE..cLKhcl+plop-p-EhpsILshah.....cslNhR..............hNsEaphlKpQH-cs...MhDhp+MpppIhpuhp....... 0 1 4 20 +2267 PF04844 Ovate DUF623; Transcriptional repressor, ovate Mifsud W, Eberhardt R anon Pfam-B_4487 (release 7.6) Family This is a family of transcriptional repressors. In plants, these proteins are important regulators of growth and development [1,2]. 25.00 25.00 39.40 38.80 23.70 22.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -8.97 0.72 -4.17 44 411 2009-09-11 05:52:17 2003-04-07 12:59:11 8 6 28 0 276 364 0 60.00 42 21.11 CHANGED SsDPhpDFRcSMhEMlspptlt.........shscLc-LLtpYLsLNupcpHshIlpAFs-...lhhs..Ltus .........SpDPhtDFRcSMhEMlspptlt............shc-Lc-LLtsYLsLNup.caHshIlpAFs-...lhhslh.s............. 0 25 150 222 +2268 PF04854 DUF624 Protein of unknown function, DUF624 Mifsud W anon Pfam-B_4640 (release 7.6) Family This family includes several uncharacterised bacterial proteins. 21.50 21.50 21.60 22.00 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.55 0.72 -4.12 65 1340 2009-09-11 11:23:42 2003-04-07 12:59:11 9 2 753 0 194 770 11 76.80 25 35.89 CHANGED lshLNlLWlhhs.........................LhGlslhGlhPAosAhaslh++ahpsc....css.lh+sFapsaKcpFhcushlullhhhhshllhlsh ...........................................hhhNlLal..................................................lsulsl..lsl..GsApsulhpshhc..htpsp...........c.....s..........sh+pa....ap.sa....KpNFh.pu.h.lhhhhlslshllhhs........... 0 75 133 155 +2269 PF04776 DUF626 Protein of unknown function (DUF626) Kerrison ND anon Pfam-B_2357 (release 7.6) Family Protein of unknown function, currently only identified in Brassicaceae. 25.00 25.00 34.10 33.00 19.00 21.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.85 0.71 -3.76 16 91 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 2 0 85 109 0 106.20 33 38.53 CHANGED slPca.s-ssh...psp.caY..lccS-lpps.-WlpLYhElslhsp.csp.hppt..lssLcIlpVslcT.ccshpss.c..LpApsAlhYIsa+shsps+.......hGpth.-RtAIlRRhhDtp.uphsL .................hPcW.s-ssh.....pcp.+aY..lpcSElpps..-WlpLYhElulhsp.cth...hpt......ht.LcIhKVsl-Tpt..pshcsspc..LcApsAlhYIoa+sh.sts..........s.th.-cpAIVRRshDtpsGphpL......... 0 23 23 23 +2270 PF04781 DUF627 Protein of unknown function (DUF627) Kerrison ND anon Pfam-B_2475 (release 7.6) Family This family represents the N-terminal region of several plant proteins of unknown function. 20.40 20.40 20.40 20.80 20.30 20.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.12 0.71 -4.45 17 70 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 12 0 49 65 2 113.40 33 10.00 CHANGED chAcchaspGsaIKALcllEDhIph+tccps.h.hlHhhQGplFhcLApcTE.NsDlKhsYLLuSVpCaScshtLsshs.........AhSLapLuppluSshaYKKslpKAKpuLSlpts.cs .......cAhshhtcGs+hKALcllcDhlscHt.......csp.....hlHphQGslahclAtps-...ssssKhtaLhuul-shocsstLus.u.........ApuLapLApcht.spaYc+slpcu+cuLSlpss..s..... 0 18 27 31 +2271 PF04780 DUF629 Protein of unknown function (DUF629) Kerrison ND anon Pfam-B_2475 (release 7.6) Family This family represents a region of several plant proteins of unknown function. A C2H2 zinc finger is predicted in this region in some family members, but the spacing between the cysteine residues is not conserved throughout the family. 21.50 21.50 21.70 22.30 21.40 21.40 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.77 0.70 -5.97 14 139 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 16 0 82 125 0 300.70 25 30.08 CHANGED cpL+oaWhGLDschKRsFhKVslsKLhoaVpsh....asccsp-sLcpsLs.A+pst+WpFWhC+s.CSc+F.ssE-CKpHl-ppHsucacPspppchsppIscsWActI.SsGsW-PVDssAAlchIKsRltcVKtFs...............YpNGW.................................sc-WPLAsDE..ERuKLLKEI+.lLVthh..-pKlLSCSlRDWlhpaslcaL.tpLcVScpsL.s-spLlcTPQSICFL-sc-LspILcaL+plK.....scRDDGssLVs+uV-uhhcsopVKE+IDhDsphS.hLLLD+RLLpscph......ph.DDEGolssa-ssshYscs.spGDcIlSWLh.DhsplDcp.....FPpsl.....+tHNh-IWlAVLRAVphTsRpLto+YsKKhphlsY-suLstsEslChpEDcRRcsh.c-QhspYASlLs-cCEEhl.c......lssKhFLssV+DVLctAspPsF-hhs.c-C....hshI+phco....lsDD.hVlKSlthL+pllppKV .........t..h+paWsu.h.s.-p+c.s.FLpVslscLpuahpsh.......tp..pp......stcslscsl..sas+c..sppWpFWhCsh....C.....s......p.+.FhsscphhpHlppcHhs.p......h.......phpp...h.sp..plspt.hchl.....WpPh........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 20 35 52 +2272 PF01889 DUF63 Membrane protein of unknown function DUF63 Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Proteins found in Archaebacteria of unknown function. These proteins are probably transmembrane proteins. 25.00 25.00 37.80 34.50 22.40 22.10 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.79 0.70 -4.32 13 123 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 87 0 88 133 119 269.70 27 93.11 CHANGED tIp-Fla+YYI-PIht.cpGYNhVNTlTYAlILulslahlY+.hL++L+IclD-+FlhuslPallhGuolRVlpD..uGllpsPhoYlhlTPhIaFLlFslshhsLhluhpl.ptt...t.Yh+laushGlshsllsLhhLl...hppp..lhcsallshll.sluoshssllYhlh+h.h.hshhpc+LshhllhuHhlDASoThlGI-h..l.GYhEpHVVPsaLI-hhGoAhsMaPLKlllllsllYIL-p.htc-....pl+phlhlslllLGLAPGlRsshRMlhGl ..................................h...t..GYs.lsplsYullLhlulhhlhp..llc+......L.cIplD.c.chhhAlhPahlhGuslRslpD.........s........G...........l..........l..PhshLhlTPsIYhslhhls...hssllluhtl.ppt.....ta.p.....h..hhshGslhh.hhsl...h..hLh......hptp....h..hp.h..sh..h..hll..sl..u..sh.hss.lh.....a.hlh+h.........hph...h..p..sh.h.shhllauHhlDusuTslGl-h.......h..G....Y.....hE....pH............sl............sphl............l.........-............h.........h............G............s............ua...s.........hhslKlhlhlsllal..hsc.lp--....phtpllhlslhslGLuPGhRshl+hhhtl...................... 0 13 48 72 +2273 PF04816 DUF633 Family of unknown function (DUF633) Finn RD anon Pfam-B_5077 (release 7.6) Family This family of proteins are uncharacterised have no known function. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.37 0.71 -4.98 13 1587 2012-10-10 17:06:42 2003-04-07 12:59:11 7 4 1567 12 231 1139 75 199.30 36 86.45 CHANGED lsDIGSDHAYLPhaLlpsshsptulAGEVscGPapuAhppVpcssLsc+IpVRluDGLuslcpt-.l.DslsIAGMGGtLItcILEpGpc+Lttlc+LILQPNspt.pLRpWLpppsapIhsEpIlpEcsKhYEIlVsE+upp.......h.osp-lhhGPhLhc-+sslFhpKWppElp+lphlhtpls.....scpsctchptlppcI.phlc- ........................................................................lADIGSDHAYLPlhL.lppshhpp.AlAGEVsp...G...Pap...u...A......hpsV.........p.........t..........p......s.....L.......p..-........c..........IpV...R.Lu....s..GLs....s....l.c.p.........-......l...csIsI..A...GM....GG....p....L....IscI...L-..p.G..t....s........+.L..........s.......s....h..p...+...L..IL..QP..N...h...pp....ppLRpaL.pps.........apIlsEpIlcE.su+hYEIlVs......c..........h..upt..........................ho.pt.-l....hF...G.Ph....Lh.p...p.p.s..s..l..Fh..pKWp+E...lpphpphhpplt.........ppp.......p.....c.ht..tltpcl.t.l........................................................................................ 2 89 145 191 +2274 PF04827 Plant_tran DUF635; Plant transposon protein Kerrison ND anon Pfam-B_2859 (release 7.6) Family This family contains plant transposases which are putative members of the PIF / Ping-Pong family [1][2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.30 0.71 -4.94 5 275 2012-10-03 01:22:09 2003-04-07 12:59:11 9 11 33 0 143 891 1 132.70 41 47.04 CHANGED scYL.RuPsAsslcRLLplGc.RGFPtMlGSlDCMHWcWKNCPTAWcGQYTRGcp+.PTIlLEAVASaDLWIWHAFFGsP.GSNNDINVL-pSPlFsDIlpGsAPplpYhVNG+QYNhuYYLADGIYPcWATFVKSIRpPQsEK+KLFApcQEusRKDlERAFGVLQARF+IVtsPu+lWspscLusIMpACIILHNMIVEDERDhs ...........................................................................................................t.................................................................................................................................................tt.Nthp...hlt...s..l..h...............p...G..p...s...s........h..p...a......lN.....tp.........Y..p...h......s.Y.Y.L.s...D..u.I.Y...P..pa..s...s....h......h.....p........o...I.........................P..........s.....t......+......t.p.h.auptQ.Eu....sRKDlEpAFGV.......LQsRatIl.pt....P....s....p.h....a.p.t...tl.t..l...hh...uClIhHNM.IlEDEtt..................................... 0 20 63 102 +2275 PF04828 GFA DUF636; Glutathione-dependent formaldehyde-activating enzyme Kerrison ND anon Pfam-B_2779 (release 7.6) Family \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.87 0.72 -4.04 59 3736 2009-01-15 18:05:59 2003-04-07 12:59:11 9 25 1139 16 1716 3480 2625 89.40 22 58.89 CHANGED hhCHCppCp+toG.usass.shhtpp.slp....hhpG...thppap......susshp+.hFCspCGoslah......tsttss.....hhhlthGsl-ps.......pthpPp.hcl.aspp ............................................................................h.hCpCshC........p........+.........t.sG.s....h..hh.shhs......ts....s.lp..................h..h..p.G...t.....plptap.........................................ssptsp....+.....hFCspCGs...lah...........ptpsss..............hht...l.s..h..u.s..l-ss..........p.h.......t......t.............................................. 0 364 828 1304 +2276 PF04830 DUF637 Possible hemagglutinin (DUF637) Kerrison ND anon Pfam-B_2732 (release 7.6) Family This family represents a conserved region found in a bacterial protein which may be a hemagglutinin or hemolysin. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.49 0.71 -4.62 30 397 2009-01-15 18:05:59 2003-04-07 12:59:11 8 83 154 0 54 380 8 148.40 36 13.38 CHANGED ussAu.............l......uuhuosAusuhlsspGshttshK.lspusslKuhssuulTAGlssGl......................s.s......s.ssshs.thstuo.sslspthss.sspushsAulsTAI.pGGSht-...sLtsuh....ssshuAtuAstIGs..................hshssstss+lhsHAhlGCAuutAsGu.........cCtsGAlGAus .............................................................htsAA.huSLsopAuVuhINNpG.s.......l.upsLK-LupusslKphssushTAGhhsth................................................hh.sshst.o.tth.....s.splssslssus.uuhls..TAl.sG...GS...LcD......NLtsshlusllsshpu-uAspItp............................hspshls+phAHAlsGC....suutsstu..........cCpsGAIGAuV....... 0 18 31 43 +2277 PF04829 PT-VENN DUF638; Pre-toxin domain with VENN motif Kerrison ND, Zhang D, Iyer LM, Aravind L anon Pfam-B_2732 (release 7.6) Family This family represents a conserved region found in many bacterial porlymorphic toxins which is located before the C-terminal toxin modules [1][2]. 20.30 20.30 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.69 0.72 -4.42 32 725 2009-01-15 18:05:59 2003-04-07 12:59:11 8 157 252 0 136 722 0 52.90 36 4.05 CHANGED sssp...LoEcpKQplosLupLAAGlAuGlsGss........s.sAssGApuucsAVENN.tLu.shps .............sspLo-ppKppls.uh.up.LsAGlsu....ul.sG.s.s...............tsssuApuucsAVENN.tLs...t..... 0 16 63 98 +2278 PF04842 DUF639 Plant protein of unknown function (DUF639) Kerrison ND anon Pfam-B_6010 (release 7.6) Family Plant protein of unknown function. 24.30 24.30 24.70 24.50 23.70 24.20 hmmbuild -o /dev/null HMM SEED 683 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.16 0.70 -6.37 4 126 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 22 0 82 122 0 514.10 35 90.47 CHANGED pcht-h....s.ps.sshp.hPc..LS.lANsVVpRCS+hLslosc-LpcpF-sph..ulpp..TauRpFlEaCsF+sLSp.s.phhspLpDtpFpRLhFDMMLAWpsPssssppuh.pt.sp.p.......................h.h.V-cctoVGpEAFsRIAPslPhlADhIss+NLFcsLTouTGp+.Lpa.lYDtYl+pLcKhhKth+oppssphhsh.hucsEhlLch-Gs.sstPVL+Hls.ouWPG+LTLTspALYFEuhslhuh-sshRhDLocD.KpslKPthTGPLGspLFDKAlsYpShsh.EslVhEFsEhpGpsRRDaWLsIlpEllhlHpFlR+a..........plpG.l..t+pEhlu+AlLGIhRlpAlpEhhplsssphKsLL.FsLh-plPtGDhVLEsLA.ph......sphtspRssp..uusthh+.....s.ss.chlspl.G..............s.pstsphtspphlVG.....-hsVu-lssLEpAlcpSRpchchlEtAQATlstlchcGIsoNlAVhKELhLPh..hh..h.plhhWp-PhhossFhhhsohhIaRsWlsallsssLlalAhhMhh....pRph.s+sK..ttlpVps.PspNshEpllulQsulppLEphlQcVNVsLLKlRulhhShhPQtospsAluhlVlAohhAlVPhKYllshshV-hFTRpss.hR+tSs-+hpRRlREWW.plPAAPVhllpsps ..................................st....................h....LS.hAs.slpphu.ph.shsspcL..ppF-.th..t.....s.u..RphlEaCshphL.t....ssp.htsh..LtDttFpRLhashMLAWEsPss..psp..............................................p.hVu.-AFs+luPshshhsDhhssc.hFcsLossost.+.Lpa.hacpYlptlp.+.shcthpt...t..s.t.h......h...p.s.EhlLpltus.sppPVlpa.s.osWPG....+LhLTspALYFEuhtl.hshppshth-Lsp.-..pp.lc.thsGPhGspLFDpAl.hpS.shs-shhhEFs-htuphRRDhWhuhlpEllhha+Flpca............pl.s..h..t+tcshspAh.uIhRhpAlpchhph.ss..cthL.FshhpplPtGDhlLpsLs.p...........t...ptst.......tss...t........s.....t..h...h.h................t.....pttt..h..t.....ph.h.uths.lptultpshpp.chlphspATl-tspl-GlssNlslhpELlhPh..hh..h..lhtW-cPhhohshhhh..hhlh+thl.ahhs..hhh.shhMhh....h+...t.tp..ht.l...l...s.ss.s....shppllslppuhpplEt.......hlQss.NlhLLKhRslhluh.s...........p..........................topphhhhhlshAhhhhhlPh+hllhhhhhp.aTpp....Rp..sc.ph.phh+EhW.pIPssPV.l.....p................. 0 13 46 59 +2280 PF04852 DUF640 Protein of unknown function (DUF640) Kerrison ND anon Pfam-B_6053 (release 7.6) Family This family represents a conserved region found in plant proteins including Resistance protein-like protein (Swiss:O49468). 25.00 25.00 32.40 27.30 24.40 24.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.60 0.71 -3.97 15 226 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 24 0 141 225 0 123.60 63 54.52 CHANGED ssspsutsuspssss...sh.SRYESQKRRDWNTFsQYL+Nc+PPlsLsp..CuusHVL-FL+Y..........LDQFGKTK.V....HtpuCsaFGpPsPPuPCsCPLRQAWGSLDALIGRLRAAaEE.s.G..GpPE.....sNPFuu+AVRlYLREVR-o ..............................................sssss............ss...s..SRYEuQKRRDWNTFsQYL+NHRPPLsLup..CSGAHVLEFLRY..........LDQFGKTK.V....HstuCsFFGp......Ps......PP.........A........PCsCPLRQAWGSLDALIGRLRAAaEE.s.G..GpPE.......................sNPFuARA..VRlYLREVR-.h................... 0 23 87 111 +2281 PF04862 DUF642 Protein of unknown function (DUF642) Mifsud W anon Pfam-B_4723 (release 7.6) Domain This family represents a duplicated conserved region found in a number of uncharacterised plant proteins, potentially in the stem. There is a conserved CGP sequence motif. 27.00 20.00 27.20 20.50 26.70 19.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.21 0.71 -4.58 56 419 2012-10-03 19:46:52 2003-04-07 12:59:11 7 25 70 0 246 418 17 157.70 32 51.47 CHANGED GLL.NGsFEpuPptuph.puot...lh.GppuIPsWclsGh.VE.YIsuGppp.GsMhLsVPcGsaAVRL...GN-AS..IpQpls.VptGthYolTFuAA..........RTCAQt....EpLsVSVs..s.....p...suslPlQTlYossGWDoYuWuFpA.pp.sslplsh.HNPGhp.-DP......uCGPlIDuVA.IK .........................sll.NGsFEpuPh.....ssoh..........l..sp.sslPsW...hl...p.u.....h...Vc...YIsusphp...................VPp...GspAVcL..........Gp..Euu......IsQplp..ss.GphYsLoFusu........................csC.sts....p..t.l...s.Vplhs.............p.....stplsh.p...oh.h...s.....s.s..Gacshu.hsFpA..ss..splthh....ps.shp..-Ds......hCGPllDsVtl................................................... 0 35 131 191 +2282 PF04867 DUF643 Protein of unknown function (DUF643) Kerrison ND anon Pfam-B_6086 (release 7.6) Family Protein of unknown function found in Borrelia burgdorferi, the Lyme disease spirochete. 25.00 25.00 44.00 67.60 22.90 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.68 0.71 -3.68 3 80 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 22 0 5 64 0 111.60 69 64.24 CHANGED INEISDFYDNLScclKKE....INKLYtTcQlTLKQK+QhYSuacuhQEYsIKTGKSl-EIls.IIDPsKcFIKDVLKcKaLIKKYKNFpNhKVDaSYKKGMLEKCLEKLGEccShtF .......INEISDFYDsLS.sTKKE....IsKLYGscQLTLcQK+chYpuahuIQEYKRKTGKSI-EIls.IlD.PAKpFIKDVLKDK....aIIEKYKNFQNhKhDhSYKKGMLEKCLEKhGEchSscF........ 0 5 5 5 +2283 PF04870 Moulting_cycle DUF644; Moulting cycle Mifsud W, Eberhardt R anon Pfam-B_4889 (release 7.6) Family This family of proteins plays a role in the moulting cycle of nematodes, which involves the synthesis of a new collagen-rich cuticle underneath the existing cuticle and the subsequent removal of the old cuticle [1]. 22.00 22.00 22.10 22.40 21.30 21.90 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.08 0.70 -5.36 9 114 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 9 0 113 89 0 257.80 25 45.59 CHANGED PhuhIsKpLpphV+shKpK..-..pKWp-llp+Ipccspcl+p+Kptcph.Rp.hth...t.............ht.ph........hps..h-phl-D--ltphhtp........ho-c-chhhhPhclIRpAsKLGhslsG..pNsosF-pKpl+lhSPRFMSllP-cptt..sslsLLSPSlFSLHc-Go-.EpphSlsplL.tuhhsssDppsal-hllEAoGVsEsV--Ac+chhptphccpct.hhs.cGp.h.Fo+pNhocta.sscspKl-lhEpLcKoYStpQhc-MNpTGYolhsscQMphhYGctSPapNsc..hLcsY+Nho+u-hcculassI+slAccch...KFc .............................................................................................................................................................................p.p......p.W.phh.ph.p..t.h..ppp.pp..p....................................................................ht...h-....ht......p.........................................tt.p.h..P....thlcp...............uh.ch.h..h..s..G.....ppssshs...hphlSPRhhulhP-p.......................s.phslLSPSlhuLac...c.sst.-phhsLspll.....phhp...pp-ppshlshlhEhoGlscsl-ch.ph.h......p..p........s........h.p.pp..thh...ptpphchhctLppoho.c...............Q...hpchpppGashhs.pQhphlYGptu.hpssp...................lcphpphoptp..hpptl.psI+tlAptp................................ 0 39 53 113 +2284 PF04875 DUF645 Protein of unknown function, DUF645 Mifsud W, Eberhardt R, Haft D anon Pfam-B_4997 (release 7.6) Family This family includes several uncharacterised proteins from Vibrio cholerae. There is some doubt regarding the existence of these proteins, they are encoded by open reading frames contained within a repeated region in the Vibrio superintegron. 19.80 19.80 20.30 20.20 17.40 19.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.94 0.72 -4.28 12 607 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 63 0 14 247 1 51.60 66 95.40 CHANGED lLDspHsphuFhKGCIIusIhlSLSRTLNpGQLNLDRFcFWQPTSQLLsLDVCLhDAFA ...........h.sh.ptphuFhKusIIsshhlSLSRTLNRGQLsL-RF-FWQPTSQLLsLDVCLhDAFA........................ 0 13 13 14 +2285 PF04883 HK97-gp10_like DUF646; Bacteriophage HK97-gp10, putative tail-component Kerrison ND anon Pfam-B_6160 (release 7.6) Family This family of proteins is found in the caudovirales. It may be a tail component. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.80 0.72 -3.05 172 1665 2012-10-01 19:49:39 2003-04-07 12:59:11 7 3 1229 0 221 1184 106 85.30 19 61.94 CHANGED ccltcplpp.htp..........hpcth.ccslc.psuppl....tpphpppsP......................p............s.Gp......lpcShph...................................................ttps..st.hplpltsssp.....................Y...AphlEaG ........................................................tl.pplpp.hsp......p.hp.chh.ccslp..psuphl......hpphpppsP................................................hc........o.Gp..........Lccslph.................................t.......................ptps.....st..hs..spl...ts.sst...................................................Y....spalEhG................................................................... 0 74 152 185 +2286 PF04890 DUF648 Family of unknown function (DUF648) Finn RD anon Pfam-B_5530 (release 7.6) Family Family of hypothetical Chlamydia proteins. This family may well comprise of two domains, as some members only match the N-terminus. 26.60 26.60 27.80 26.80 26.20 26.20 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.04 0.70 -5.26 8 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 15 0 21 58 0 213.60 23 66.45 CHANGED Mpl...YoFSP.slpsSapc+lhAsLDuYFFLGG+RhKVlolsspsshphhuppcshslSTsEKlLKILSa.LLlPIVIIALLlRhhLHppa+tph.sh.hL-st............spsl.................ph-l.tpstcsushhsh.s...........htppPpslctshhhsctpF.sshppcahhchho.....shltYllu.ppphpshsl...............cs..hp.oscshppph-hsh+scp+sl.......pcLcchLshsu..Khop.schlpptlhculpp...........sPpFh.-hscchhspLssss.Ishc.............................................tG.pssphpthtshhlhhp.....phth ....................hthSs...psoh.c+hhutLDsaFahGGcp.spllshss.tthh.hhpp...pshtl...SphEKllKI.lSa..lllPlsl...lALllRhhLH....thhphph..................................................................................................................................................................................................................................................................................................................................................................h......................................................................................................................................................................... 0 1 1 19 +2287 PF04894 DUF650 Archaeal protein of unknown function (DUF650) Kerrison ND anon Pfam-B_6199 (release 7.6) Family This family represents the amino terminal region of an archaeal protein of unknown function. 25.00 25.00 54.00 31.10 16.90 15.20 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.65 0.70 -4.92 36 192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 147 0 121 190 40 262.30 35 66.52 CHANGED hLCGhshCPILp+hcshpthhpphppt.....l.GuSPPSlFVGchGYP+VplsPhsPP.lpGDsu.h.a-sPppWhs..hslc-llphRsuLlputtphpVc...............pts.sthlpclQplAlSs+PV-sEhthp+h.Pphc...lhhDthhsPhGPuushcphclspNP+lP+plE+lhhD.Dh+Ac-AlhpLYcpGhslhpIp+lLSsGhLG..ccR+LVPTRWSITAVDchluchLhccl+passIsphcVahppahGNpahllLhPupasaEhlEhWhPsSlWs .....................LCGhshCPIlt+hcshh.....ph.....h.ph...httt.....p...l.GSoPPSlFVGchGYP+VplGPhhPP..htGDsu..h..h-sPppWhs..hslc-IlphRhsLlputpp.h.pVc....................ph..sthlpclpplulSs+PVDsEltlp+h.Pp.st...lhh.sthhsPhGPpu.lcchclspNPplP+tl-+lhtD.Dl+Ap-AlhpLYpp.GhDlhpIp+hLSsGhLG..cpR+LVPTRWSITAVDchluctLhc.cl+paspls.chcVahtpahGNhahllLhPs.papaEhlEhWhstohWs................... 0 32 71 99 +2288 PF04895 DUF651 Archaeal protein of unknown function (DUF651) Kerrison ND anon Pfam-B_6199 (release 7.6) Family This family represents the carboxy terminal region of an archaeal protein of unknown function. 20.50 20.50 20.90 21.10 19.90 19.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.37 0.72 -4.43 41 188 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 147 0 119 186 25 106.70 36 26.66 CHANGED EshcG+psY.spluGuYYAARLuVhEaLpcp+RQAssllhREIpssYasPVGVW.VREsVRcAhcscP.tpF-oLc-Alphlspp..lchshppahtpSpllpth...pQpoLssF ...........EshpG+psY..spluGuYYAARLuVLEaLpph+RQAsslllRElpssYhsPlGVWplREsVRpAhc..........s...........ps.tpa-.slcpAlphlspp..Lphs..hppahppSpllpth....Qppltpa............................. 1 31 69 98 +2289 PF04910 Tcf25 DUF654; Transcriptional repressor TCF25 Mifsud W, Eberhardt R anon Pfam-B_6652 (release 7.6) Family Members of this family are transcriptional repressors. They may act by increasing histone deacetylase activity at promoter regions [1]. 20.10 20.10 20.10 20.30 20.00 18.60 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.07 0.70 -5.51 44 359 2012-10-11 20:00:59 2003-04-07 12:59:11 9 8 293 0 251 376 8 304.20 28 48.74 CHANGED a..FpFp..asppYppspppFhhslphh.Dspslhpll.pphPYHlsoLLQluplhchpu.Dpshus-LlERALash-psh+ssF.....shss.ups+Lsapp.pNRtFaLsLa+alttLspRGsacTAhEasKLLLSLsPsp.DPhuhhhhIDahAL+ucpapaL....lchhp..........p.hh......pp....................................................................................................................................................................................ht..............hP...........................shuaShALAhht.............Lpcp........................................................................ppucthLppAhttaPhslhtLhc..................pl.................shs.hs...thtspss.tsh.......phhsclYltRsptlWcsspshpaLcpsht......................................ph..pt...tpshtt.....ppsht..pshshslhRallLSc......pphhshlP......pplh..tt.h.t.DP..LPP .........................................ata...asptYppsptpFhhs.l..pth.Dsptlh....tl...L..pptPYHlsoLLQluclhchpt.-tthus-LlERAL..ashppuhps.F............phsp.GpsRLsapp.p......NR..thalsla+aht.Ltp+GshRTAhEasKLlL.SL-Pp..........DPhshhhhIDahAL+u.cpa...p.al....lchhp.............................phth...........pp.....................................................................................................................................................................................................ht....................................hP........................................Nhsa...ShsLAhhh.............lppp.............................................................................................................ppu.pthLppAhhhaPhlhh.Lhp...................ph........................................sht.hs....h.hsp.s.s....sh..........phls.plYltRst.lW.pp.sphhsaLppsh..................................................................tp.t.....t.hsh.slhRalhLo-.....hpph.h.shlP.....phh...t.....D.lPP.......................................................... 0 90 142 209 +2290 PF04919 DUF655 Protein of unknown function (DUF655) Mifsud W, Bateman A anon Pfam-B_6697 (release 7.6) Family This family includes several uncharacterised archaeal proteins. This protein appears to contain two HHH motifs. 22.20 22.20 22.20 22.30 22.10 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.05 0.71 -4.57 8 157 2012-10-03 02:11:09 2003-04-07 12:59:11 7 2 154 1 102 214 123 176.80 41 87.79 CHANGED hE-YAaVLDYLPpGaPD-cp..a+ccPlsQulGEcpFpLLEloP+..ssDlh.....................................................................................lh-+VaIGKGp..RcKIsplsR.+lpY-DLTssA+sELPYVlE-IV+ppE-RFVcFFNcApPIosRLHsLELLPGIGKKhMWsILEERc+K.PFESFEDlcpRVculpcPs+lIscRIlcEl......csc.ppKYhlFVt .........................................................................................................................h.EcaAhVLDaLs.....hGh.....s..s-p....+........a..p..cc.P..lsQulG-ctFpLlElssc...ssslp.....................................................................................lt-+lalGpsp..Rc+l.plt+.......+lpY--LTssA+sELshllccIVcppE.c+FVcFFNcAt..PIT...h...RLHsLELLPGIGKKhhhpIL-ERc++.PFcSFcDlccRlt....ulp.cPschlscRIlcEl..........psp..pKYhLFs.h................................. 0 27 61 83 +2291 PF04920 DUF656 Family of unknown function (DUF656) Finn RD anon Pfam-B_5777 (release 7.6) Family A family of hypothetical proteins from Beet necrotic yellow vein virus. 25.00 25.00 27.00 31.60 19.90 17.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.60 0.71 -4.43 2 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 4 0 0 37 0 123.70 89 45.88 CHANGED MADsEICRCQsop.PLlphpsYDhTuRhh.cRI-IGPLGVLhNlshLFHMShlR+hDlaPaLNpIhSIsVSlDVPs.uslsssplhVhlahsp.........Wphl..C.....CasusclhSD.L ......MADGEICRCQVTDPPLIRHEDYDCTARMVQKRIEIGPLGVLLNLNMLFHMSRVRHhDVYPYLNNIMSISVSLDVPVSSGVGVGRVRVLIFTTSRERVGIFHGWQVVPGCFLNAPCYSGVDVLSDEL 0 0 0 0 +2293 PF04936 DUF658 Protein of unknown function (DUF658) Moxon SJ anon Pfam-B_5062 (release 7.6) Family Protein of unknown function found in Lactococcus lactis bacteriophages. 22.30 22.30 22.30 23.90 22.20 22.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.28 0.71 -4.44 2 26 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 17 0 4 20 4 142.50 57 91.62 CHANGED KlaDsYhpG.KhuTGThcEluchh.lS.pSluhWlKNGhssp.tNsthKhAllNc...KthME.......KhP.........G.h.hsGotpcISp....................................................Ehp-+-RRKHETKEERRLRRNIRAQMAIEsuRKE-.sh ....................KlaDsYhpG.KpuTGThcEluchatlShsSlulWlKNG+ssp+AsstYKHAI.LNcEpoKElhE.pKpEt+K.LPtuVYshYccGphlhTGTAcEISQhhpItppsVa.YlpsGKhsachhKphK+AlhNpsETRKRFPhhSh.pEcE...lEpp-K-RRKHETKEERRLRRNIRAQMAIEsuRK--.sh..... 0 4 4 4 +2294 PF04937 DUF659 Protein of unknown function (DUF 659) Moxon SJ anon Pfam-B_5061 (release 7.6) Family Transposase-like protein with no known function. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.77 0.71 -4.73 15 467 2009-01-15 18:05:59 2003-04-07 12:59:11 10 41 39 0 216 514 3 121.70 28 21.63 CHANGED PohcsL+spLLpcthp-lpphl+-hKssWptTGCSIluDsWpDscu+sllsFlVsCPcGshFLKSlDsSs..hhpssphLhcLlsplV-EVGscNVVQVITcssssYtuA.GKhLhp+a..solFWoPCAu+ClclhLE-lu..K.........lcals-slccAppIT ......................................................................................h...h.t.h.....hp.....t.h....G.so...lh.sDsW..ss.....pt+.sl.h.N.hhs..s...t.u.h.h..F.hpu.l.-sos...........css...phlhphhcp.hl....c..c....l....G......p...N..VVQl...l...T...DNsss.h...htu.sph..l.tch...pl.aW..ssCssHslsLhlc-hs..p.................h..htph.ltpu.............................................. 0 24 135 186 +2296 PF04939 RRS1 DUF660; Ribosome biogenesis regulatory protein (RRS1) Finn RD, Moxon SJ anon Pfam-B_6906 (release 7.6) Family This family consists of several eukaryotic ribosome biogenesis regulatory (RRS1) proteins. RRS1 is a nuclear protein that is essential for the maturation of 25 S rRNA and the 60 S ribosomal subunit assembly in Saccharomyces cerevisiae [1]. 25.00 25.00 30.50 26.30 24.40 24.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.16 0.71 -4.76 33 347 2009-09-10 22:34:59 2003-04-07 12:59:11 7 6 299 0 237 343 3 161.40 38 57.87 CHANGED hshpaDLGpLhuhDsNsl........ssssppEptLpshsRDs...sQlLlNplhs...L.Ptcss.s-u...........sllpLPpPsohL.PREKPlP+PK.s.TKWEpFAccKGI..............pK+++s+hVaDEsos-Wss+WGYKttNc.ct-c.pWllEV............sstctstDshttpctc+Kc+lpKNchpch+Nht ...............................................hphDLGpLhuh...DsNsl...........t.tsshEp.LpshsRDs...sQhLlNplhp...L.Phpps..p-u................hlhpLPt....P....sT..........h..L...PREKPlP.cPK...s.TKWEpFAppKGI..............pKc++s+hVaDEtstcWhs+WGY..Ktt......s...ctcc..pWllEV............................tss..c....h...pD.h..scp..cpp+Kc+ltKNEhpph+Nh.h................................ 0 88 139 200 +2298 PF04978 DUF664 Protein of unknown function (DUF664) Moxon SJ anon Pfam-B_5281 (release 7.6) Family This family is commonly found in Streptomyces coelicolor and is of unknown function. These proteins contain several conserved histidines at their N-terminus that may form a metal binding site. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.85 0.71 -3.97 48 958 2012-10-02 14:44:17 2003-04-07 12:59:11 7 6 365 1 344 2013 90 150.30 24 84.39 CHANGED sLtuaLctpRpslhhplcGLo-cph+ppssssus..oLhGL...........l+Hlu.pVE+sWhppshssp.sss.h..............ssssscat..hsss-ohssllutacpssspucthlu.thsLDsshsh.....tshht.....sol.RalllHhlcEhARHsGHADllREtlDG ...................................................................LhshLsttRt.sl..htpscGLoscphcp....t......s....ss..u.....s....ol.....suL..............l+.Hl...A...t...l...........E...........p....s...........W..h...t.t...s...h...t...sc....ts.h..t..h...ht........................tthst.shp.......h.s...p....s...c.....s..h...s......p...l.....l...s...t.a.pps.s...p.c.s..p..p.hl.t......t..h..s...h.-.ths.ph..s........hshtss........sol.c.hll.l.H..l.l.c.-tupH.sGp....A.D.llREhl........................................................................................................ 0 134 276 327 +2300 PF05006 DUF666 Protein of unknown function (DUF666) Moxon SJ anon Pfam-B_5319 (release 7.6) Family This family contains several uncharacterised viral proteins. 22.20 22.20 22.20 27.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.19 0.71 -4.22 25 69 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 65 0 0 67 0 150.20 43 75.16 CHANGED FpR.sslVDC.s.po+LPCVTDpQChDNChss.hh...sshpCpp.GFCsspss.lsu.ps.s.....CDtshGLlpVasA.u-FVVsphClSsYRDllDDsGplRPYlC.......ssGsLclcLpsp.FoscDChCssGYT+hhFsQsAhuRslPVCIPNphAsLYsRlY ......FpR.sshVDCsps..+LPCVoDpQChDNCshtshh...sthsCps..GFCsspsu.hsutts...sth-CDsshGLlcVasA.u-..F.V.VsphClSTYRDll...DDsGp.RPYlC.......ssGsl....s.lsLtsp.Fo.sscCpCssGasKhlFpQTAhARolPVCIPNphuslYs+lY...................................................... 1 0 0 0 +2301 PF05018 DUF667 Protein of unknown function (DUF667) Bateman A anon Bateman A Family This family of proteins are highly conserved in eukaryotes. Some proteins in the family are annotated as transcription factors. However, there is currently no support for this in the literature. 19.70 19.70 19.80 20.20 19.20 19.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.14 0.71 -5.07 9 406 2009-01-15 18:05:59 2003-04-07 12:59:11 8 22 162 0 285 362 9 167.50 41 37.28 CHANGED MFRssaQsG.FLolLhSsGSKPLpIWchpsKNGalKRlTDc-I+ShVLEIhGsNVuToaIssPssP.ppoLuIKLPFLVh.llKNh+KYFoFElQIlDD+ph+RRFRsSNaQSsTRVcsFhssMPhpLssG.WNQIQFNLuDFTRRAYGTNYlETlR.lQIHANsRIRRlYFsD+LYs-DElPs-a+LasPh+sp .............................................ptaQts.Flslhhu.hG......p..........p..PlphWcp...psc.s..G.cl+...........+lhDp-lp.S..VlEI.G....sssuosaIp..sP.....t....cs.....pp.........o.........LGI+hPFLlh...llK..s..................h..p.KY...Fo..F.Elplh..DDcsl+RRFphS..Na.....p..pp....s............+l.......p......P...h.hsp.......h................P......h..p...............l........s...............p.............G....WNplQhN....Ls.DaT.......c.c..u.Yu.....o....s..Y...h..coL+....l.plpANCRlRRlYFuDplYs.pp-lP.ch.+h.......p........................................................................... 0 118 162 232 +2302 PF05003 DUF668 Protein of unknown function (DUF668) Moxon SJ anon Pfam-B_4700 (release 7.6) Family Uncharacterised plant protein. 20.90 20.90 21.00 20.90 20.70 20.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.14 0.72 -3.77 18 204 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 22 0 131 194 0 88.20 43 17.08 CHANGED TLGuAuLALHYANlIlhIE+Ll..........ss..sptlstcuRD-LYpMLPsolRsuLRu+L+shs+s............DtsLAspW+sshp+ILcWLuPhA+NhlR .......pLGuAuLALHYANlIl.I-pll...................sp...sphls.ssRDsLYphLPsol+u.uLRu+L+shshs..................-thhssph+sshp+.hLpWLsPlApNTh+.. 1 11 80 109 +2303 PF05037 DUF669 Protein of unknown function (DUF669) Moxon SJ, Sammut SJ anon Pfam-B_5014 (release 7.7) Family Members of this family are found in various phage proteins. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.85 0.71 -4.28 22 204 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 181 0 29 185 46 130.70 19 77.97 CHANGED hFThDasps....psapsl.sGpYcVhl..psphpsscs.Gsphlplchclhss..........papsphla.c.hh....ppcsschs.ttLpsls+AhG...h.pspphpslcsahpphhs+sl+Vslchc..sE.NGppY...plhth..scs..Ptst ...................s.pps.....pthtsl.sGpY-l..hlp..psphpsocs......G.sph...lslphplcss............ca.p.spplahphhh............pppstchshttl.thstsht...........h..ptpphpshpph.......hp.th....s+.hplhlp.p...p.pup.h...p.................t..................................................................................................... 1 15 25 28 +2305 PF05050 Methyltransf_21 DUF672; Methyltransferase FkbM domain Moxon SJ anon Pfam-B_5811 (release 7.7) Family This family has members from bacteria to human, and appears to be a methyltransferase. 18.30 18.10 18.30 18.10 18.20 18.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.82 0.71 -4.31 459 2587 2012-10-10 17:06:42 2003-04-07 12:59:11 7 106 1140 1 1157 2771 5093 161.90 17 45.41 CHANGED DlGAsh.....G...................hhh.h......h..sshl...hs.......hEPtst...................thpthttt..........................hu.....ls...............................................t..t.hphhhhsh............t.ht.thhtst......t........ttttt....................t...............................ttphplpshsL................D..sh.hpph..............ths.....h....lKlDlEGtE...hp..lL.......s...spphl.pp...htshh...............lE.....h..................................thpplhp......hl.....pph........GYph ...........................................................................................................................................................DlGAph.......G.............................................hh...........h...tspl....hu.......................hEPtst....................hthlptp...........................hs...........ls...t.............................................................................................t..t....hphh...h..sh.t..............tth.h.....h.htt....t..shtt...........................................................ttspthplpshsL......................................................D.......sh..hpph............tpls........h...lKlDlEG....tE.....hp....lL..........pG......spphl..pp.....hpshh............h.hE......hp.......................................tt.hht...hl..........th..sa................................................................................................. 2 476 817 1040 +2306 PF05054 DUF673 Protein of unknown function (DUF673) Moxon SJ anon Pfam-B_5918 (release 7.7) Family Family of uncharacterised viral proteins. 25.00 25.00 37.30 36.30 17.40 16.60 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.30 0.70 -5.88 23 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 0 58 0 355.70 43 99.39 CHANGED MsCPhsI+VhISDpFlhFPYchVtsQsshus.....t.lpsLsVYVPTpEDlphls+pph..sp.FcsVhVh+Hc...sph-scsstcsssuslVYWNPIhPIsclG.lGpTpVFSVlLTssLa.Csohhlcpsssh...CPlQh....p.......................h.....phschsslsGEss..Ltphsplhcss.ssFlICFs+ETPphlKI.LNlKRlLhlhu.hRps.A+Ysl.LscpElssIapcLsWEpsRRLhKGs.hssp...C.shlNRsSLpYl+pA.-lLsIsssu.polhshlchFpsLIlsYplVP-lhlcLNslspp...........K+..VRLYC+sDShAITstGsVP.NhPssNsh.sFcasslssssh.pphppclhpcs...s.lhlpuscYNYFh ........................................................MsCP.hsI+VhISD+FhsFPYphVpPQsDlGs.....t.l.psLlVYVPT...--DlpaV-Kpth......sp..FpoVhVh+HE..ssph-ocsscKsssuTlVYWNPIlPIsElG.sGcTcVFSVLLTssLFhCpThllcppsPh.....CPIphp..........................p....hphpchhPIsGEhP..Ltchpclh-ss.ssFLIC..FshETPphlKI....LNlKRlLslhp.aRcsPA+YsIpLscpElsslYpcLsWEpsRRLh.K.......GD..hssp...C.shlNRsSLpYl+pAppLLtIscsS.polVcFVphFpsLIhPYplVP-lllKLNolcpp...........++..VRLYCKNDShAITshGsVPsNhP-pNPh..sFDaoDlsssptlpphtpclhpcs.hss.lhVpAs+YNYFh......................... 0 0 0 0 +2307 PF05056 DUF674 Protein of unknown function (DUF674) Moxon SJ anon Pfam-B_5937 (release 7.7) Family This family is found in Arabidopsis thaliana and contains several uncharacterised proteins. 23.20 23.20 23.20 23.30 21.50 23.10 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.82 0.70 -5.86 8 313 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 19 0 190 294 0 209.90 21 83.65 CHANGED MAcSoccPKlSLRLlIDEEKNKVVLAEAG+DFVDlLFShLTLPMGTIVRLLEcH.+KSpslsIGCFsNLY+SVV-MulDsFcT-ACKQMLLYP+Sl+cppsRsLKLNIDDTEusKhFhCPtF..pttCpchYSNFsTS+CcCGsh.........MNcEIph.cptts.uphts..DssGVFsps..+oSFlITDDLKVplsSscllLNsLKDLGhuDs.sKLsEhll-VGlcE............................VhTLLpClFTS-sPLTDTFL+K+S..o.sspRhaKt.SPslpcpt-EussDpslTLcsaVRKpD.hcILYVECGEDFVDLLFTFLAlPLESlWpISG....suIslGCIGNLhRSFKDLS...sstsppus..KCslPaYYpCQKQLL..slhTppPPsYhtah...ths...patLopp........pc+lpphhhhDPKocsphpStstp.....GFVK+sTKFhVoDDLIITPhNSsSTlslLK.chQlchDDlElQsIoIuKsE ...............................................................h...........hphhhppp..tpplhhhEuttchl-hlhs.hh.hPhuthhp..hh...................................................................................................................................................................................................................................................................................................................................................................................................................ahl.ssh..h......s........h.......th.t....h.p..h.hs...tc............................hh...h...L............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 15 106 142 +2308 PF05055 DUF677 Protein of unknown function (DUF677) Moxon SJ anon Pfam-B_5920 (release 7.7) Family This family consists of AT14A like proteins from Arabidopsis thaliana. At14a has a small domain that has sequence similarities to integrins from fungi, insects and humans. Transcripts of At14a are found in all Arabidopsis tissues and localises partly to the plasma membrane [1]. 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.87 0.70 -5.77 7 135 2012-10-01 23:20:42 2003-04-07 12:59:11 7 4 17 0 109 163 0 266.10 27 77.09 CHANGED L+SFDosLppRTsplIsSLsspsc.......spSls...hDuLhElsppLlEhNQsVs+lIl-.c-.DlWcNp-LhpLVpsYFcoopKTLDhCpslEpClc+Achup.LlIphAltQFEpEsh-ps.G...cp++YcKTLEEL+pFKshGsPFsG-........Fhs.FcplacpQhhhLEcLc+p+cKLDK+L+Nl.......+shRhlSNllFssAFlsVhlhSlVssuhusssssu.lAuuhssPltuhGhWss.........phWcK.EculK+p+plhpoM-tGh.ss.cshcsIphhVcpLcscIpShLcss-FAl-+cp...us+luhppIcK+l-thTcplcElG-psuppS+hIthuRhllLp+I ...........................................................................................................................................................................................................pl.tLl.pYFp.o.ps.phCttl.pslppsc.p....l..hlt.....h.....tp.................................t.h.t.hhpt.L...tpF..htsP.Fst.................ht.Fptlh.ppp.t.hlc+Lptp+++lc++l+.l.......+thp+s...Sslh..hssshs.s...lhl...sullhAs..h.s.hsslhs..hsuhhu.h.P.htsh.upahs.................phh.p.p.hpp..slp.t.t..p......phls.sh...t...tGTalhhcDh-.....oIphLVs+Lcscl-...shhphschulcp....pc...........s......lc....s.....l.c....cl+K..p.ps....Fhcpl--Lt-psthC.tsIp+ARtlVlpcI.................... 0 17 65 82 +2309 PF05077 DUF678 Protein of unknown function (DUF678) Moxon SJ anon Pfam-B_6127 (release 7.7) Family This family contains several poxvirus proteins of unknown function. 21.50 21.50 21.70 56.20 21.40 21.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.90 0.72 -3.99 10 55 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 35 0 75.10 63 95.94 CHANGED hDsu...GGtKR.RKRKP+T..oVc-p.......DDCsTCSSCpSKLVplSDlTKlSLspaplsGK...usTLoCuACGSpLphLsDFs ......DST..tuGhKp.RKKKPKT..TVhD-.......DDCMTCSuCQSKLVKISDITKVSLDhlssh.+....GNTLuCuACGSSL+LLNDFA.. 0 0 0 0 +2310 PF05078 DUF679 Protein of unknown function (DUF679) Moxon SJ anon Pfam-B_6129 (release 7.7) Family This family contains several uncharacterised plant proteins. 25.00 25.00 33.00 27.10 21.20 17.60 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.20 0.71 -4.60 24 212 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 23 0 125 191 0 154.50 40 74.00 CHANGED hsusAsLupLLPTGTVLAFQhLuPsFTNp.GpCss..sN+hLTusLlulCuhSChhhSFTDShhs..t.cG.+laYGlAT.......h+G.lhlhs...............................................................uusssspp...........chp+YRLRhlDaVHAhhSslVFhuVAhh.DtslssCaa.Pssupss.....+clLpslPlulGhluShlFhlFPosR+GIGa .......................pusupLspLLPTGTlLsFphL..sP.hT..Np.GpCts...ss+hlousLlslCuhuChhh..oFTDSap.s.....t..cG.plaYGlAT.......h.cG.lhshs................................................................t..th...................phscY+L+hhDFlHAhhSllVFhulAhh.DtslssCaa.Ps..s.tpt.....cpllpslPlslGhlsShlFhlFPopR+GIGa............... 0 12 64 94 +2311 PF05079 DUF680 Protein of unknown function (DUF680) Moxon SJ anon Pfam-B_6131 (release 7.7) Family This family contains several uncharacterised proteins which seem to be found exclusively in Rhizobium loti. 21.70 21.70 23.10 22.70 20.80 20.10 hmmbuild --amino -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.92 0.72 -3.48 10 44 2009-09-11 14:48:27 2003-04-07 12:59:11 7 1 5 0 29 44 0 52.80 42 70.12 CHANGED GSDsaGSsssNQPAAo.......stVDsotTASlcK....h.t...ossssps...psuQGs.........c.slaGp ......GSDpYGSsssNQPAAo........tlDsotTuSlcK...........ossssps............tssQGs.........chslaGp.. 0 0 10 10 +2312 PF05080 DUF681 Protein of unknown function (DUF681) Moxon SJ anon Pfam-B_6137 (release 7.7) Family This family contains several uncharacterised beak and feather disease virus proteins. 25.00 25.00 72.90 71.80 18.00 17.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.47 0.72 -3.57 2 31 2009-09-11 14:24:40 2003-04-07 12:59:11 7 1 1 0 0 31 0 88.90 84 58.80 CHANGED MGGAChs.+YWLVPs+Vh.Rh+.T.hTu.RGVARsDGPMsSLs.NhIKCAVsGGMDMhGKhSSShTT.MGG.LIARCSA.hTVTHI+CQLRA.LWSL.ARG ..MGGACIsSRYWLsPAHVhSRh+.TSSTsHRGVARVDGPMsSLGLNIIKCAVNGGMDMMGKhSShWTTFMGGYLIARCSASATVTHIKCQLRAhLWSLPARG 0 0 0 0 +2313 PF05081 DUF682 Protein of unknown function (DUF682) Moxon SJ anon Pfam-B_6152 (release 7.7) Family This family consists if several uncharacterised baculovirus proteins. 25.00 25.00 29.80 29.40 19.80 18.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.97 0.71 -4.67 16 57 2009-09-11 12:02:38 2003-04-07 12:59:11 7 1 55 0 0 51 0 147.80 46 94.26 CHANGED p+TI.LYLsc..Pssls....sDKssDcsVlYF-GlIEslsDcSCDKhohFuELKKEcALhMKKshpDLlp+ssGNYsKNHVLlDALlMYKTYVELl..D-SAFGtslLphCh-alTthF+LFpLpS+IlVllPspssacpDNLSsLLKHLhphslIpl ..........................+TlhLYLsc..P.s.slp....sDpssDc..shlYFEuIlEClsDcoCDKhohFuELK+EpALhMKKhhpDLlp+ssGsYsKsHVLlDsLlMYKTYV-Ls..D..-SAFGpsllph.C.palTtlFcLFpLpS+IlVllP.pls.acpDNLSsLLKHLhphslIpI.. 0 0 0 0 +2314 PF05082 Rop-like DUF683; Rop-like Moxon SJ, Eberhardt R anon Pfam-B_6161 (release 7.7) Family This family contains several uncharacterised bacterial proteins. These proteins are found in nitrogen fixation operons so are likely to play some role in this process. They consist of two alpha helices which are joined by a four residue linker. The helices form an antiparallel bundle and cross towards their termini. They are likely to form a rod-like dimer [1]. They have structural similarity to the regulatory protein Rop, Pfam:PF01815. 21.10 21.10 21.50 27.10 19.80 19.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.25 0.72 -4.50 32 142 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 135 4 75 130 7 64.10 40 88.76 CHANGED hs-.lcpLKtclKKLsu+AsphKMDLHDLuE-.LPssWppIh-lAp+Ta-sascLsph+ppLtshEsu .......p-lcpLKtcl+KLsu+AsphKM-LHDLuE-.LPssWppIhplApcsa-AatpLspt+ppLtthct.t............... 1 17 47 60 +2315 PF05075 DUF684 Protein of unknown function (DUF684) Moxon SJ, Pollington J anon Pfam-B_6081 (release 7.7) Family This family contains several uncharacterised proteins from Caenorhabditis elegans. The GO annotation suggests that the protein is involved in nematode larval development and has a positive regulation on growth rate. 25.00 25.00 26.20 26.00 23.20 24.30 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.22 0.70 -5.84 5 141 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 6 0 141 123 0 248.80 21 75.10 CHANGED LcKLtELcKcIc+LS-sMsucF-DLKAFIlspEFY...................sslAsTAsTLMKaMQDTsssPsKco+cs....F+-sssKsSPLcLAcpLhphL-sEsTNPLKMAMsADsL+sccTFEKWRcIIDAVlTQhLlLEoFtNGha+D+spYcPc+LsEchcEacEphccW+E-Yps.-oSYW-ctVcshVHcIQDNNEsp..SNEEKAslI+-sL-KIMTNDtFYVlVaD.hsh....supcapasltts.c+paIuSaNRGsCNVhVaRS.....t+s+ohsppsc-L++KaQHsLss+app.....Nspahlpspu+shps+lss.CGaVtlVRsscclA...Vcsoss-.tp+GPGthh-usFctssh...hSs.csFpl ............................h.........th..ltpph..thtphc.hhst..ph.........................phh..h..lhphh.ssht..............s.p.sht....................Ftt.ht..p..Phph...............sh................h.thlp..p.oNPlhhsh.t....p....pttshppWpthhtthhsphhhlEshhtGh...tts..t.p.l.p..t..th.p.htphppta...........................................hW....pthcphlpp..h.pptt.h...sptp+AphlpptLpp.h..hoscs.F..alhVhs............t...h..................ppp.hl.s..h....p.s.t....hshhlYRS.................t.p..t....t.......t.h.tp..ht...................................................t......h......p.....t.hhhhh.t............hp.st..........tPG...............................hhh...................................................................................... 0 20 31 141 +2316 PF05085 DUF685 Protein of unknown function (DUF685) Moxon SJ anon Pfam-B_6261 (release 7.7) Family This family consists of several uncharacterised proteins from Borrelia burgdorferi (Lyme disease spirochete). There is some evidence to suggest that the proteins may be outer surface proteins. 25.00 25.00 28.80 28.80 23.30 23.30 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.87 0.70 -5.34 4 176 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 29 0 9 140 0 217.00 43 96.09 CHANGED KLLIDEEETVQIKDLNKVTTVNNsDLLLLDDGVASSNAITFKNFL-ToKDKTFKGEGLuYFKEIIKSTIAEELAADc-FVEKIYsKIhDKLINNDSTNLSNLFSKIKSRLTDSISSATLSRSDcLLIM.SSuTIQKTPVPKQLLGlPSsashstphTcuTTlYPSDYcspuIhIsM-sauDVsLlF.KSsDDpPIYLDIDIclKHpcNc..sKuLhlpYuDETphshVYhhpGusuluhRIPhYKGWYlQKRsph.GcPVPhLlKL ......................................................................KLLIDEpETVQIKDLN+..VopVNsoDLLLLDDGsuSSsAITaKsFLcsspcchF.KGEtLsYFKpIIK.....STIApELsusp-FlcpIYsplhsKL.Ip....N....-.S....sp..lusLFpKI+SpLpsuIsp.s.TLo.cs-pLLhh..pssI.QKTslPcplhGlPssath.....t..hs..........sspla..p...Yc.........sKthhIsh......c..p....sssollh.Kp.Dsp.sIYLDI-lc..lch..p..p...st...pK.lhLpYsDEo...ppph.lYt.h.p.us..p.t....shphPhYpGWY.lQ++.t......Gt.hP.h.Kl.................... 0 5 5 5 +2317 PF05092 PIF DUF686; Per os infectivity Moxon SJ anon Pfam-B_6313 (release 7.7) Family This is a family of dsDNA Baculovirus proteins. It is required for the infectivity of the OBs or occlusion bodies. It is a structural protein of the ODV envelope required only in the first steps of per os larva infection, as viruses being produced in cells expressing the gene for this protein but not containing it in their genomes are able to produce successful infections. Baculoviruses are large DNA viruses that infect arthropods, mainly members of the order Lepidoptera. In their life cycle, they produce two kinds of particles, a budded, non-occluded virus (BV), which buds out of the infected cell and is responsible for the cell-to-cell transmission of the virus, and an occluded form, the occlusion body (OB), which is responsible for protecting the virus between encounters with larvae. A variable number of virions are included in the para-crystalline structure of the OB, mainly constituted by the virus-encoded polyhedrin protein; these virions are called occlusion body-derived virions or ODVs. 25.00 25.00 37.30 35.50 18.20 17.50 hmmbuild -o /dev/null HMM SEED 522 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.15 0.70 -6.14 26 72 2012-10-02 15:38:38 2003-04-07 12:59:11 7 1 65 0 0 71 0 489.90 37 94.42 CHANGED lLLlll.....lhllhphlsllphtpp..h..htslphFDNosVP........lIcPPs.EIlIEuNshpCH+pL.TPCoocuDCslCREGLAsCQhFcEpsllpls-.....ppphsIcPGESYCLALDccRARSCNPsTGsWlLscocs.GaoLLCoClpPGLVTQLNlY-DCslPVGCpPaGpIssINpsPl+CsC-sGYVuDh.ssTpTPaCRPpplRDVlh-ssFFPRsPCssGalpl-HPuLcshYRpphth..slCVhDPCSlDPIoGpRps..G+Lhhhh.sspch..hshCsC....shpcsLasVa....sssoM....lp......ps.......stplsNACI...pPFslphpplpph-hKhFWGRss.h.pSDsDlVhpVs...spl.cs+Y+thLa.hlpsH.Pphs..pss......hhllKFoluhs.....hhtpsth...tpslaphYhphstpp..sss.CF..hPGh.GcC........Isss.ssCIpR..tsh.V.osEshssshCahoRpsptl.................plWssss..hYspsphPlshhlssh...hh..hh.sspsh...pslhhlsutssssp.tph.......ssLtplLsTYPpYS .......................................llLllllhllhpalsllpht.cp.h..h.slthFDNspVP........lIcPPs.EIlIE...uNshpCH.+pL.TPCs..THtDCsl..CRE.GLANCQhF-Epshlphp-....tppphhIp....s....GES....YCLALDRcRARSCNP.sTGsWlLs................cocs......GauLLCoChpPGLVTQLN.hY-DCsVPVGCtPaGpIssI....Np..s..PlRCsC-sG....YVuDasssTpTPaCRPpplRDVhhDssFFPR.....sPCssGalpl-HPuLsshYppphph..slCVlDPCSlDPIoGpRps..GpLhhh..ss.ct..hshCsC....shtssLasVa...ssssuM..........lpps......stplsNAClpPFssph.tlpph-YKhFWu+s..........-.h.huDsDlVhpls...spl...ptRYchhLashlt.sHPphs..t........hhlhKFSluas.....hh.t..........tslaphahthpttp..sts.CF..hPGh.GcC.........Ists.s.CIp+..t...V.suEshtsphChhoRcsptl.................phWssss..hYspsphPsshhlpsh...hh.....sspph...pslhhltstthh.t..tph........ssLtplLtTYspYS................................................. 0 0 0 0 +2318 PF05095 DUF687 Protein of unknown function (DUF687) Moxon SJ anon Pfam-B_6321 (release 7.7) Family This family contains several uncharacterised Chlamydia proteins. 25.00 25.00 32.00 30.90 23.20 20.60 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.62 0.70 -6.14 11 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 28 0 17 48 0 453.50 30 80.94 CHANGED csuphssss.ph..s.stslh-s..lChh.ssssppstshpVslsYlNGStpTptEA.tEuhalS-.LRGcPVRslYNsG....puhupuhhhspRsShotppPlCpAlLcsWcpFFS+stNssppallIFaGDGGsaVQpALc...pohausRIhllGIuPohYVpGpspsHaYRVsGDlsohLDppGaspup....VoTLPYSuGu-GlFhPulpsPoFpaALRht..pl.................ltp.sthsussspDpp......uusthulhlshupsssuFoRlpchLsps-osuphEhNshP.sphhDllLoulaslhRloslhQtahlhs.lshps-s.lshlhhsuYssssLphhhLhhTNppstRcpaRslRllApuhpshhhLsslh-h.....lNhhphhppss..shlpAlahsuoshosollhh-...IhchshssLRsRlQphshRhhssss.c.....pccls................RsscuuhttslshlssssuGlhhulhhGlhNthulplPcs.....lscs.ssss............NsTslas.s.h+ss.........puWpoGDshulupTlshllshllllhslhshVp.VRpNt+RR ...........s.................slhps..ls...sspsp.sts..VtVsYlNuS.pshhcu.tEshaLSp.ltGc.VhhlYNpG....phhutshh..tp...pt..pPlspAllcsWctFFup....sppFlhhaaGsuuhaVppAlp...ps.asspIhllGIsPohalp.p..s..psaaYRV.G..shhohLD.tGahtup....VspLPYSsuu.Glhh..shtsssap.Alhht..ph....................hschSs.tsssspst...........upp..ul.ls.spssssFsRl.phLs.spTs.phE.N..hP.pph.shlhpshhslhRloshhp.ahhhs.lsht.ss.l.hh.hh.ashsslp.hhLhhTs..phRcthRshRllhhshpsh..hsslh-h.....ls.hhhhtps....hhpslahhhphhshsllhh-...lhchth.sLRstl.thshRhhts.s.c......ppphp.....................pp.hsshhhhhphlpssshulhhshhhGhhs.hulplsp......hspsss...............sposhhs.................sa.sscshulupshphhhshlhhhh.lh.hlt.lptp............ 0 3 3 17 +2319 PF05093 CIAPIN1 DUF689; Cytokine-induced anti-apoptosis inhibitor 1, Fe-S biogenesis Moxon SJ, Bateman A, anon Pfam-B_6320 (release 7.7), Wood V Family Anamorsin, subsequently named CIAPIN1 for cytokine-induced anti-apoptosis inhibitor 1, in humans is the homologue of yeast Dre2, a conserved soluble eukaryotic Fe-S cluster protein, that functions in cytosolic Fe-S protein biogenesis. It is found in both the cytoplasm and in the mitochondrial intermembrane space (IMS) [1]. CIAPIN1 is found to be up-regulated in hepatocellular cancer, is considered to be a downstream effector of the receptor tyrosine kinase-Ras signalling pathway, and is essential in mouse definitive haematopoiesis [2]. Dre2 has been found to interact with the yeast reductase Tah18, forming a tight cytosolic complex implicated in the response to high levels of oxidative stress [3]. 21.70 21.70 25.50 22.90 21.60 18.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.47 0.72 -10.97 0.72 -3.92 25 368 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 289 0 251 368 1 85.80 45 31.98 CHANGED sDhcps..hs.tt..Cs.s......tK++KACKsCTCGLAEtpEpEppsppspppp........................................................t.........phps.t..SSCGsChLGDAFRCuuCPYLG...LPAFK..PGEcVpL ...................................................................................................................t...t..Ctss.....tp+++A..CKsCTCGLAEp.ctcppttptttt...................................................................p..uuCGsChLGDAFRCuuCPYl.G...hPAFK..PGEclth............ 0 93 143 211 +2320 PF05148 Methyltransf_8 DUF691; Methyltransf_hyp; Hypothetical methyltransferase Moxon SJ anon Pfam-B_6432 (release 7.7) Family This family consists of several uncharacterised eukaryotic proteins which are related to methyltransferases Pfam:PF01209. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.42 0.70 -4.71 4 358 2012-10-10 17:06:42 2003-04-07 12:59:11 10 15 292 2 259 3274 773 201.90 38 54.17 CHANGED sLpt+hpcRLDGu+FRaLNEpLYospuscAhchFpEsPphFcLYHcGFp.QVcKWP.pPlDhIIccL+........p+.sshVlADlGCGEA+IA...thpc..pV+SFDLsulsc+VhsCDM.uplPh-DpSVDlAVaCLSLMGTNltsFl+EApRlLKsGGlLhlAEVpSRFs..SlchFt+tlsKLGF-hpphclpNshFhLF-FpKss..hhu.Kc.h.slpLpPClYK+R ...........................................................................................................................s.L.tphtt+LtuuRF.RalNEpLYTpsSpp.Ah.phFppsP.p............h.F.p......Y....Hp.Ga..pp...Q..V..p.....p....W.P.....N.P..V....D.h..h....lp...l+................................................................p.p...s..t...p...h...s..l..A.Dh..GC...G............-.......A....p....L.A....................ps.h......p........p...........p..............................V...............a.......S.......F........D..............L..............h..............u................s...............s.................s............h..............l.............s...............s...............s..............D..............h.......u...............p...............l..............P.....L.............t..........s.......pS.......lD....l.....s.....l....F.....C.....L...S......L.....M.......G.......T.....N....a....h.......s....F......l.c...EAh.R...lL.+..st..G...........L..h.....l........A........E.................l...p....S......R.Fs...................s.s.p...t.....F....h...p..h..l..p......h....G....F...h.............t...........p.hFhh...h.h...+................................................................................................................................................................................. 0 97 153 220 +2321 PF05114 DUF692 Protein of unknown function (DUF692) Moxon SJ anon Pfam-B_6476 (release 7.7) Family This family consists of several uncharacterised bacterial proteins. 25.00 25.00 26.90 26.90 22.80 22.50 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.63 0.70 -5.49 7 833 2012-10-03 05:58:16 2003-04-07 12:59:11 8 2 668 1 283 808 137 271.00 36 91.05 CHANGED hhpuAGLGhRpslhcshlsh.pssslpFlEssPENahshGGhhhtph-pltER.hPlshHGlSLSLGGtsslDhshLcslKthhcpacshhhSEHLuaspscG.pLa........DLLPlPaT--Alc+lstplR.sQDhLtp.lhlENsShYlp.s.pphsEl-FLpAlspcssCtLLLDVNNlaVNAsN+Gh.DPhtaLsslPscRlshlHlAGHspp..u......................spLhIDTHGtsVtssVWsLLttAhuRhus..PTLLERD.NhPPhuELhAEl-pItth.p+uttAtt ....................................................................................h...tsGlGhRtphhpthh.p.......................t...s.........sl.s....ahEltsEN..a......h......s...................G..G...h.......h..t.....ptL..cpl.t-..c..aPlshHG.l...u.L.Sl..Gu....s..s...PLD....tsa....L.p.pl+phhcphpsshhSEH..Lua...s...p....s.....s.....G...a.ha..................DL......LPl...Pa.Tc.Eul.cplsp.+lcpVQ-hL..t.c...lhlENsS.......t..Ylp......h.....s......t....u..phsEs.....-Flstlsccss..CtLLLDVNNlYVsuhN.H..........Gh.....D.........s.....t.........s....a....L....ssl....P.hc+Vs.lHlAGHs...pp......t........................ss....l.lID.THup..sV..ss..sV...WsLhpt...........sht..p.h............G.....s.....hP.TL.l..EhD.tslPs.hstL...hsEh...pphpph.t......tt.................................. 0 73 162 234 +2322 PF05113 DUF693 Protein of unknown function (DUF693) Moxon SJ anon Pfam-B_6473 (release 7.7) Family This family consists of several uncharacterised proteins from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 29.80 25.00 18.00 17.60 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.08 0.70 -5.50 2 142 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 27 0 14 124 0 282.70 62 98.41 CHANGED hlLLpYDFKIEFYcscpS..KcoosG.sh.EETPKIIINTQHGIHlDIoISN.aSNhphlpSKpsKIVLWNLPLDFsscIchGDIVKIaYKKFAHEKpFDFIMAGhLGTPMSTDYPGGDFSVELDVRLhspSNFFNRKL...EsKsFKGKTVQEAIESVFPNRNIIpMDccDRLKlI-KsIYATTPKEFlDKIKGlYlHsVIADlGss.hss-CNhIFTN.+phtus.pYcALEDYGLEFIPQQEIsltspaKIphlaWNApTFYTHKLplGDKVSFIDGLGKMIKTTIKETSApLSNsGECSLILKLcDDSscpc ...............MLLLQYDFKIEFYss..c..pS..Kco.sG.shsEEhPKIIINTQcGI.H.lDIoISNha.S.shNhlpSKpuKlVLWNLPLDFTccIchGDIVKIYYKKFAHEKs..FDFIMAGhLGsPMSTDYPGGDFSVELDVRLhopSNFFsRKL.........EsK...NFKGKTVQEAIESVFPNRNIlNMDEcD+LKII-KsIYAoTPKEFI-K.I.K.GhYlHsVIsDlG....s........s........shslECpaIFTNhtphtts..pYcALEDYGLEFIPQQEIsltsphK.h.phlaWNApsFaTHKL+lGDKVSFIDGLGKhIKsTIKETSARLSNsGECSLILKLhDDSs........... 0 7 7 7 +2323 PF05107 DUF694 Family of unknown function (DUF694) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of hypothetical bacterial proteins. 25.10 25.10 25.90 33.40 24.10 24.30 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.76 0.70 -5.17 68 568 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 510 0 201 554 13 266.70 31 92.36 CHANGED lpNRh-FlhlaDVpsuNPNGDP.sG.NhPRh.D.ETGpGlVoDVsLKRKlRsalpt..........p.........................G....p......pIaVpcpshhp.............................................ppptphhsth...........................................ph......pt..t.t.............................tttscphtphhCppahDVRsFGAVhu..........................thssuplRGPVQhs.aApSlcPl..l.hphsIT+hssop.......t......-uc...cpus.....................................tpMG.cKahVP.YuLYthaGhlsspL.A..cc....TGFo--......DlcllhcALhshF-pDp.SuuR..GpMpsRtLlhacH........sst....lGshsuttl.................acp....lplsh..............pp.psscsap..Dh.pl ..........lpp+h-Flhla-VpsuNPNGDPssu.NhPRh.Ds-pGhGhloDVsLKRKlRshlp..t.........................G....h....pIa..lpppthhs.....................................................................pp..p.phhpth...............................................................phttt.h.t......................................ttptcchtphh..sp...p...ahDlRsFGAVhs.......................s.....thssupl+GPVQls.hupS..l.cPl..h..phsIT+ssssp...............p-sc..cpps............................................pMG.pKahVs.YulYthpGhlssph...A..cc..........TGFo--......DlphlhcALhphF.-pDp...SuAR..Gphps+tllhacH...........sst............LGshsuthl......................act.........lplph...................pt.p..p.ht-.............................................................. 0 87 152 181 +2324 PF05117 DUF695 Family of unknown function (DUF695) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of uncharacterised bacterial proteins. 22.40 22.40 22.40 22.70 22.30 22.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.53 0.71 -4.25 13 259 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 238 0 50 209 9 131.20 24 51.99 CHANGED hppsWpsYhoplsD+.AhhohNlulhcphss..phhspllplplsappssEsGLPss-EatcLtch.s+l.splsAhs.slasG+lhssGptchaFYspps....sslh-sLsph.sashshshQ-DscWDhYFcFLh ..............................................................................ppWt.a.sh..l.s.sp.us.hplNlsh.hph.t.h..pphsphlplplt.apsssEsGhPot-EhppLtplE-plhp.pl....p.shs...slhsGplossGp.tcha.FYsccs....th.phltcsL.....p.p.h....c.h.th..p..h.thpcD.pW...chYhpaLh............................ 0 16 33 43 +2325 PF05128 DUF697 Domain of unknown function (DUF697) TIGRFAMs, Finn RD, Bateman A anon TIGRFAMs (release 2.0); Domain Family of bacterial hypothetical proteins that is sometimes associated with GTPase domains. 20.20 20.20 20.30 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.61 0.71 -4.75 81 1824 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 1554 0 346 1241 405 156.70 29 46.24 CHANGED hpspclhphhcphlhp.p-ppAcpllp+huhtsAsllAloPlsllDllhsshtNhpMlpcluclYGhchuhhuthcLh+plhtsluhhGssclussl....ltshLuh........slsuhluu+hsQGlssGhLTttlGhushchhRshs.......assts...lspllpclhpthcc ........................................stclht.htp.l.....hctpAcp.lp+hAttuuh....h...lA....lSPlsllDhshlshtshclIpcluplYGl-huh..h.u..t..h..c.......Lh.+.h.l..l..h.s.hA..h.sG.....u...s.c..l..sc..cl..........shshh.u......................slsu+l..o...spsuQGluAG...hl...Tt...tlG...h.tu.h....c.hhcshs............a.ttp...tltph.hpphht....p............................................................................. 1 76 196 287 +2326 PF01901 DUF70 Protein of unknown function DUF70 Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Archaebacterial proteins of unknown function. Members of this family may be transmembrane proteins. 24.20 24.20 25.00 25.00 23.60 24.10 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.18 0.70 -5.39 16 72 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 43 0 46 76 4 303.90 22 86.25 CHANGED .hhhllllhlalsluhl................phhhshllhhhuhhash....shlsuphlht.ht..........h.....................chphptpcphshhlllIGllhhhhsllhlGuIPLhcshlRtpLsshhhhhsaLhhlususllhu...phpchthhlhsh.............hLlsLhGYRTsVlsllluhhlhhYYsp+ls..t+tlllshlslhl.lLuluhhphhs.thts.s......lhhRhthThsVL-hIl...shsGshpGclphssl............GPRslIuphlh.shu..VoITsTlFGshhLDFGlhulhl.hhlLGlhhGlha+lspph.+ulYhhhYSlLluYsllGIETGhLDlslhlhYlhuhIlthh ............................................hhhhhlhhhalhh..h..................................hh.hhhlhhhhshhhhh......hhh.sphhht.h................................................hphhhppphhhhhhhlGll.hhh.lhhhtulPLhs.slRhtlss.....shlhhl.uhuhllhu............phpppthhlh.h..............hllsLhGY.Rosll..sllluhllhhYhp.p+lp......h+..llhhh....hhhhl.hlulshhthhh.t.tt..........lh.RhthTh.lhchll.......sh.uhhhGplhhss................sPRthI...........uphlh.hhs..hslTsTlhGshhhDFGhhGl.l.hhllGhhhthhhchtp.h..ptlYhhhhullhuhhhlulcoGhLsl.lhhhahhuhl........................................... 0 9 15 31 +2327 PF05142 DUF702 Domain of unknown function (DUF702) Finn RD, Sammut SJ anon manual Family Members of this family are found in various putative zinc finger proteins. 20.50 20.50 20.80 21.40 20.30 20.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.08 0.71 -4.05 16 166 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 27 0 104 156 0 135.60 46 49.80 CHANGED utuGs.hsCQDCGNQAKKDCsHhRCRTCCKSRGFcCsTHVKSTWVPAA+RRER.Qpthuu.ttt.stt.........ssussc+.Rp.........sttoohssophssusss.thsspp....................aPsEVSSsAVFRCVRVoulDD.u-sE.aAYQTsVsIGGHlFKGILYDQGs-pp ...............s..sshsCQDCGN.QAKKDCsahRCRTCCKSRG....Fc....CsTHVKST.WVPAu+RRER.Qt.hsshttt...................ttsss++.+...............t.t.t...sshsp...sphsss...p.....t.h.th.t.p.............................hPtpVpu.AlF+CVRVoul--..u......-sE..hAYQssVpIuGHlFKGhLYDpGsc..t........................................................................................... 0 16 70 85 +2329 PF05152 DUF705 Protein of unknown function (DUF705) Moxon SJ anon Pfam-B_6448 (release 7.7) Family This family contains several uncharacterised Baculovirus proteins. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.96 0.70 -5.44 27 75 2012-10-03 04:19:28 2003-04-07 12:59:11 7 1 73 0 1 97 1 256.70 34 92.37 CHANGED hpWhsL+p+pu.hl+cHlLhlscasDLphl....uacplchhEaVlFthssp...........pphssppYthplhps.pDsMp-lRpplKpsaKsshLGHsaVlpc+.shYsh..LcEWhV...shh-V.p.ht.pshhachPH...VlVFDLDSTLIT-EcpVpIRDptVY-SLp-L+ptsCVLlLWSYGsREHVscSLpcscLs.sYFDlIlstGppssphsss............hhsDpphchhahspsFahD......hsstps.....LPKSPRVVLaYLpcpGlNahKoITLVDDLhsNsauYDhFVpVp+CP....PlsDWphYH-pIlcNIpcY-shap .........................................................................................................................WhsLc.pts.hh+tHlLhlsphtDlhhl....thpthphhEallhths.p.............hs..pYhhphhps.tDsMp-hRhplKhsa+hshLGHhhllt.pc.shYsh..LpEWhl...sh.pl.p..ht.pphhaphPH...VlVFDLDsTLIT-E.p.pVpIRDstVY-SLp-L+ppGC...lLlLWSYGs+-HVscShccs..c..L..s....sa.FDllls..tGpphtp.ttt.............hhD..p..chhahppsFhhD..........hp.tpt........lPKSP+lVlhYLpchslNahKolTLVDDL.sNsasYD.aVplp+C.....PhpDWphaHp.Ilp.l.pY-t................................................................ 0 1 1 1 +2330 PF05153 DUF706 Family of unknown function (DUF706) Finn RD anon Pfam-B_2804 (release 7.7) Family Family of uncharacterised eukaryotic function. Some members have a described putative function, but a common theme is not evident. 20.60 20.60 20.70 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.86 0.70 -5.05 21 371 2012-10-01 20:28:14 2003-04-07 12:59:11 10 7 259 4 239 388 200 228.40 49 78.66 CHANGED Yssu..p-RV+pFY+ppHp+QTVsashptRtcat..phs+tcMolWEAlEhLNsLVDESDPDs-lsQIpHhLQTAEAIR+D.sc...-WhpLTGLIHDLGKlLhh..F...u-sQWsVVGDTFPVGCpFs-phla.s-oFcsNPDhssPhYsT+hGlYp.sCGL-NVhMSWGHDEYhYpVhK.spSTLPccuLhMIRYHSFYPWH+cGAYpHLhN-cDcchLchV+tFN.YDLYSKScchsDlEpLKPYY.pLI-KYFPs...hlcW .........................................................p-pVcphY+..HppQTlsashp.t+tpat........p.h.s..+.tcMolaEAh-..hLss..Ll..DE...SDPDs...-hspIpHhlQTAEuIR+D.sc...cWhpLsGLIH.....DLGKlLhh.........a.................up..........s..........QW....sVVGDTFPVGCta.sc.....plla...chF.........p.s.NPDhps..shY..sTchGlYp.sCGL-NVhhSWGHDEYhYpV...............hK..s.p.o.oLPppuLhhIRYHSFYPWHp....tG....s.YpcLhs-cDh.ch...LchVptFN..a...DLYSKs.s.p.h....s.....cl..-pL+.PYY.pLI-ca.hPt...hlpW................................................. 1 72 137 192 +2331 PF05212 DUF707 Protein of unknown function (DUF707) Moxon SJ anon Pfam-B_6598 (release 7.7) Family This family consists of several uncharacterised proteins from Arabidopsis thaliana. 24.10 24.10 24.70 24.70 23.40 24.00 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.10 0.70 -5.30 9 266 2009-01-15 18:05:59 2003-04-07 12:59:11 7 11 48 0 153 263 5 249.30 42 72.25 CHANGED hassopP+GAEhLPPGIVss-SDhaLRRLWGpPp..EDlspps+YLlshTVGasQKsNlsusV+KF.S-sFsIlLFHYDGRsotW.--hEWS+pAIHlSs+KQTKWWaAKRFLHPDIVA.Y-YIFlWDEDLGV-pFsAEEYl+llKKaGLEISQPGL-sspG..lsaphTtRRsct.EVH+....................shFVElMAPVFSR-AWRCVWHMI...............QNDLVHGWGLDFsLt+CV.......-sscEKIGVVDSQallHpulPSLGs...................QGpucputushpuVRsRsptEhphFpsRhusAEKph ........................................................................h.............LP.GIl..posh.h+..L.as.sp..............pph........t..p.LlshsVGhpQ....+t.lsthV....pKF...s...............p......sFslhLFHYDGpsstW.pph-WSpp...AI.Hlustp..QTKW.....WaA.KRFL..HP..DlVu.Y-YIFlWDEDLsV..-pFssc....c.....Ylcll+ccGLEISQPuL-sspu...lpaphT..hRp...t.......s........t..clH+....................tt.....t....th.s.t.t....sphPPCsu..........aVEh.MAPV.FSRsAW.+ClWH.hI...............Q.NDLlHGWGLDatLthCs......................tstpc+lGVVDupallHp.s.l..PoLGs..............................ps..ps.t.....p.s...hs.....h....tlRtpshhEhthFppRhtpA.t........................................... 0 28 91 123 +2332 PF05166 YcgL DUF709; YcgL domain Finn RD, Bateman A anon COG3100 Domain This family of proteins formerly called DUF709 includes the E. coli gene ycgL. Homologues of YcgL are found in gammaproteobacteria. The structure of this protein shows a novel alpha/beta/alpha sandwich structure [1]. 25.00 25.00 26.10 25.40 22.00 18.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.32 0.72 -3.85 61 977 2009-09-11 13:36:28 2003-04-07 12:59:11 8 2 970 1 147 486 68 71.60 58 75.14 CHANGED CulYKSsKKsshYLYVt+..........+DsFscVPEsLhphFGpPphVMhlsLssc+pLApsDsccVhpslpppGFYLQhPPs ......CsIY+SsK+-pTYLYVcK...........+DDFS+VPEtLhcuFGp.PQhuMh.L.sL-GRK+LssADlEKVKpALsE..QGaYLQLPPP.......... 0 27 60 108 +2333 PF01902 ATP_bind_4 DUF71; ATP_bind4; ATP-binding region Enright A, Ouzounis C, Bateman A, Yeats C anon Enright A Family This family of proteins probably binds ATP. This domain is about 200 amino acids long with a strongly conserved motif SGGKD at the N terminus.In some members of this family e.g. Swiss:Q12429, this domain is associated with Pfam:PF01042. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.19 0.70 -4.98 6 870 2012-10-02 18:00:56 2003-04-07 12:59:11 12 20 725 8 474 928 144 206.10 27 57.01 CHANGED hKsAsLaSGGKDSshALYhAhcc.hEVcaLVslho-Nc-.SaMaHssNlclscl.AEulGIPllchhspG.cEcEV--LtshlcpLc...V-ullsGulhScYQKpRI-pVCRElGlKshsPLWc+sP.chh.EhlctGFcshllAVsAhGLscsWLGRclc+chlD-LpcLsE+YGIc.AGEGGEFETlVLDuPlF+tRlElcph-thWctphGahtIc+tcl ............................................................................hh..hSGGKDS...shs.h......hp..h.hpt.....sh.p...l...h.....h...L.........h.s..........h.......h........s............c.....t................c......Shhap.....shs.h.p..hlph.A....c.uh.s....l.....P..L............h...........t....h............t...........h..........p............s...............................p..............p..................c....................h..c...s..l....h.p...h....L....p....p...lp........................lc..u...l...shGsl.....h.s.....p.h.....p..+s.hh.-pl.................spc..l..G.Lpsh.hPLW...........p................c...............s......p...........p...p..........Llp...-h......l...........s..........t...............Ghcs.hl..lpVs.u..h..G.L......s.t.p.a......L..........G+......p..l...s..............p.....h.....h.t.p..L.hp..h........................s....l......c........ss........GEsGEacTh..VlDu..P..lF....c.p.p.l...ltt..p...h.....................................h...................................................... 1 171 291 398 +2334 PF05164 ZapA DUF710; Cell division protein ZapA Finn RD, Mistry J anon COG3027 Family ZapA is a cell division protein which interacts with FtsZ. FtsZ is part of a mid-cell cytokinetic structure termed the Z-ring that recruits a hierarchy of fission related proteins early in the bacterial cell cycle. The interaction of FtsZ with ZapA drives its polymerisation and promotes FtsZ filament bundling thereby contributing to the spatio-temporal tuning of the Z-ring [1][2]. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.79 0.72 -4.02 162 2881 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 2807 8 629 1565 1385 96.40 23 86.52 CHANGED plp....l.pIhG+pYplsssssp-ctLppsAphlcp+hp.clppp..ss.thssp+lhlh..uALsl.......uc-hh.......p....tcppt....tphpp....................................................................phppc......lcpLt............................ppl- ...........lslpIhGcsaplsh.s.s.-.........p...cct.L..ppsAchlsp+lp..-l+pp......st...shs...spc.l.....sll..sALNl.......spEhh...........p.......hcpct......pphtp..................................................................phppp.lp.ltt..................................................................................................................................................................................................................................................................................................................................... 0 210 399 517 +2335 PF05167 DUF711 Uncharacterised ACR (DUF711) Bateman A anon COG2848 Family The proteins in this family are functionally uncharacterised. The proteins are around 450 amino acids long. It is likely that this family represents a group of glycerol-3-phosphate dehydrogenases. 25.00 25.00 28.30 28.20 23.80 22.70 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.23 0.70 -5.76 53 1207 2012-10-01 23:28:04 2003-04-07 12:59:11 7 3 1155 2 247 890 31 418.00 60 95.97 CHANGED M..p.ppIhETlpMlppcpLDIRTlThGIoLhDChssshp.htpplapKIs.phApsLVcsuccl....ppcaGIPIlNKRIoVTPIullut.uhtt.psh......lphAcsLDcAAcclGVsaIG.GaSALVpKGhopuDctLIpSlP-ALssTc+VCuSVNluoT+sGINMDAVpphGclIKchAphost..uhusAKLVVFsNusp.....DNPFMAGAFHGlGEs-sVINVGVSGPGVVcpAlcch..c.ut....s.............hptlu-.pIK+TAFKlTRhGpLVGppsuc+LsV...sFGlVDLSLAPTPshGDSVAcILEphGLppsGs.GTTAALALLNDAVKKGGhMAsopVGGLSGAFIPVSEDssMlcAlppGsLsl-KLEAMTuVCSVGLDMIAlP.GDTsscTIuulIADEsAIGhlNsKTTAVRlIPssG+csGD.V-FGGLLGpu.slh.lsphsuttFlp .........................................................................hphpplhETlpMIccpphDlRTITMGISLLDClc....sDhpcssc+I.YpKIT.opApsLVtsG-cI....tsEh..GIPIVNKRlSVTPIulluu.As.ps...sDa...................VslA+sLD+AAK...clGVsFIG.GaSALVpKGh.ppuDchLIpS.IPcALupT.-hVC.S.S.VNlGST+.....uG.I.NMsA.VppMGcl...IKcsAchos.................hGsAKLVVFs..N.A.V.E.....DNPF.MAGAF........H....G....VGE..uDsl...INVGVSGPGVVKcALEcl.....+Gp...s................a-hlAE..Tl.K.K.T.AFKI.TR.hGQLVGp...AScRLGV......sFGIVDLSLAPT..PAlGD..SVAclLEEMG.L..E......plGoHGTTAALALLNDtVKKGGlMAsspVGGLSGAFIPVSEDpGMIs....AVpsGsLsLEKLEAMTulCSVGLDMIAIP.GDTsAc..........TIuuhIADEAAIGhINpKTTAVRlIPs.GpcsGDhlEFGGLLGhA.PlM.VsthSs.sFl.s....................................... 0 100 167 212 +2336 PF05168 HEPN DUF712; HEPN domain Bateman A anon COG2250 Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.33 0.71 -4.21 92 1339 2012-10-01 22:14:54 2003-04-07 12:59:11 9 14 572 11 648 1274 106 115.30 18 27.05 CHANGED hcthlcpAccsLcsAch.hc.ps..hYshusFtupQAsEhslKAlLhp..hstp..s+sHsltpLlp..hlpphh........spphhcth.......phLpphhhtuRYsshh...........scc-AccslptAcp....llch.h......cth .......................................................hhppAppplptAc..h.....h....p...ps........ha.p......h..u.s..ap.sppu....sEpslKA...h...l...ht.............hsh....p........s........p.....s....H.s.l.t..t.Lhp......hlpph..................pph.tp.th..............phLpph..h..h...t...s..R..Ysshh......................spppsp.phlphspp....lhph.h...h.................................................................................................................................................................................................................................... 0 263 417 547 +2337 PF05206 TRM13 DUF715; Methyltransferase TRM13 Wood V, Bateman A anon Pfam-B_10143 (release 7.7) Family This is a family of eukaryotic proteins which are responsible for 2'-O-methylation of tRNA at position 4 [1]. TRM13 shows no sequence similarity to other known methyltransferases. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.98 0.70 -5.14 29 265 2012-10-10 17:06:42 2003-04-07 12:59:11 9 15 192 0 183 340 36 237.10 27 56.06 CHANGED hQQuSllGphcchph...L........ssss.....................salEFGAG+GcLSpalsp..sh.pp..............................stspalLlDRsspRh..KhDs+lppcp...................hlpRl+IDIcDLsLstl.p.................t...ptpth.......luluKHLCGsATD..LoL+Cllssshtpt....................................tsp..htGllIAhCCHHhCsacpass.........+ca.ltphGhs.pp.-Fphlp+hsSWAVsGpc.............................................t..tp..ttlshp-RcplGhhsKRlIDtGRlhalcpp.....Ga.pspLhhYsppslohENVsLluh ..........................................................................................QpuS.llt.hpp.thl........t.tp.........................salEFGAG+G.t...Lothls..sh.t....................................................................ttsphlL.l..-R.ts..Rh...K.hDtp....hp.ppt.............................................hpRlphDIpc..Ls.Lstl..........................................................................................t....tt..h..................lsluKHLCGsATD..hsL+Clhps.h.........................................................................................htulslAhCCH...HhC..pappass..........ppa..h..p..t.huhs.....tt.pFthhpphsS.WAssu.t.........................................................................................t....hs......tc+.......ptlGhhs+tllD.GRl.aLppp.............sh..pstLh.Ys..p..lo.ENhhlhs.h............................................................................................... 0 75 106 154 +2338 PF01904 DUF72 Protein of unknown function DUF72 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this family is unknown. 20.80 20.80 21.30 21.10 20.50 20.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.87 0.70 -4.61 218 2485 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 2050 4 731 1925 168 236.10 27 85.81 CHANGED hYPpshsp.tp.pLp..hYupp..assVElNuoFYs.h.s.p........shtpW.tpp....sP..c.s.FpFslKss+hlT.H...t...ppLp...............................sspsh.ppF.....hp.slpsL...t..s+...LGslLhQhPP..s..hphsspphptlhphlsph...............................s........hphAlEhRHto.ah........s..th.hphLc.c....tslshlhsDsst......hs.hs.................hss............shsYlR..hHGpsth.................h.htYss......ptL...ptaupclt.t..htt...u......................cc.salaFsN.stp.s...pAstsAhpLtchl .............................t..tp.pLptYupp..FssVElNsoa.......Yu.h.p..c.........sht+W..tp.p....oP....-..s...FcFshKhspslT..H.......p.ttlp...............................................pspct.l...ppF.....hp....tltPL...........t......s+..lG.llhQh...Ps.......s.......hths....s.cpl.stLh.phhpphs.....................................s.....h.shuVEhRH.s.aa......p..pph.hphLc.p....ps..ls.tll....sDptt.....sh.s..ht...............................................................ss........................shshlR..hpGpspht....................h..tYsp................ptl.....ptasp.+lt.p....htp....ts........................ppsalhhps.sstu....pAspsAtpLhp................................................................ 1 225 443 602 +2339 PF01905 DevR DUF73; CRISPR-associated negative auto-regulator DevR/Csa2 Enright A, Ouzounis C, Bateman A anon Enright A Family This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats [2]. It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation [3]. Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers [4]. This family used to be known as DUF73. DevR appears to be negative auto-regulator within the system [2]. 22.70 18.80 23.30 19.50 22.60 18.70 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.54 0.70 -5.55 28 278 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 233 4 129 276 8 245.00 21 77.26 CHANGED lsuRhhlpscuLNtsEusG.NhscpR+ssll.hppt.......aplthVPslSGpultHtattsLsclutpp...shslschscph...thhhphsstp.h..............t..thEpphlppsslpDltGFLh...........s.tsps.............l+RsSslphoahlPs.cthp.......h-sthHsRas................ttppppQslaphEtuSulYshohslDlstIGhsps.................................................................p.phslpsc-+.pcRlcsulcALh.hLsth.hGA+psRhhPlht..phVsuloc.tPhss.s..sth.scYl-psh ................................................................................h..hph.hphpuhNh...s-.u.....h.....G.....Nhsph++l.................hpsp.hshlStpul+athhptlt.c.hh.t.......p........h....t...t........tthhpht.......................................................................................................ppsslpphhptDlaGaMp.......................................stpsts..................hpRsSsl+lo.hlulpsht..............h-hpF..tsphs......................................t......ttp..ss.slaphEhp.p.uhYphohslDLcplGh..s.pt.............................................................................t.htl..spc.E+..tcRlpthlculth.hst.s.htu.+.hspht..Phh....hlhsh.p.......................h........................................................................ 1 58 86 103 +2340 PF01906 YbjQ_1 DUF74; Putative heavy-metal-binding Enright A, Ouzounis C, Bateman A anon Enright A Family From comparative structural analysis, this family is likely to be a heavy-metal binding domain. The domain oligomerises as a pentamer. The domain is about 100 amino acids long and is found in prokaryotes. 22.40 22.40 22.40 22.50 22.30 22.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.20 0.72 -3.61 171 2580 2012-10-01 20:55:33 2003-04-07 12:59:11 12 6 1979 20 541 1427 512 98.20 39 85.64 CHANGED MlloTTssl.tG+cIhchhGlVtG..psltup...slh+DlhAul+sllG...GchpsYp.chLpcARcpAlpchtpcApplGAsAVlulch-hssl......sp..uhh..lhAhGTAVpl .............................................................MhhoTTssl.tGppIhchhGl.VtGpslhut......Nlh+DhhAu..l+.sl.lG............Gc...suY...p.c.Lp......cARc.AhpchtppActhGAsAlVGlchDhpsl...........up.....uM.l.....Vss.GTAVp.................... 0 198 364 467 +2342 PF01910 DUF77 Domain of unknown function DUF77 Enright A, Ouzounis C, Cerutti L anon Enright A Domain Domain of unknown function. The crystal structure of two of these members shows that this domain has a ferredoxin like fold and is likely to exists as at least homodimers. Sulphate ions are are located at the dimer interfaces, which are thought to confer additional stability. Although the function of this domain remains to be identified, its structure suggests a role in protein-protein interactions possibly regulated by the binding of small-molecule ligands [1]. 24.90 24.90 25.10 24.90 24.80 24.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.81 0.72 -4.31 104 1948 2012-10-01 20:40:01 2003-04-07 12:59:11 12 3 1567 27 505 1121 132 91.30 29 87.80 CHANGED sslsllPlG..s.....ssSlupaVApshcll....ccuG.lpaplssMuTsIEG-.hDclhpllccsa-tshptGusRVhoslKlDpRpD.pp.tsh...ccKlpsVc .....................hslsll..Phu...st.......sssVsphV.scslcll....p.p..uG....lp.a............plsshtTsl........EG-.hc-lhcllppspEhshp.t.tssRVhoslKl..chR.sp...hsh....ppKlpph............................ 0 171 325 444 +2343 PF01918 Alba DUF78; Alba Enright A, Ouzounis C, Bateman A anon Enright A Family Alba is a novel chromosomal protein that coats archaeal DNA without compacting it. 21.40 21.40 21.60 21.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.03 0.72 -4.38 126 745 2012-10-03 06:24:24 2003-04-07 12:59:11 16 6 348 31 496 820 75 66.30 25 38.49 CHANGED splhVss.tpshhshlptshphl....................................................pps...tcpVhlpuhG.pAIs+AlsluEhl+pch.......s.hp.lpplshss .............................................plhlss.tp.ltsalthshphh.........................................................pps...tccVhlpAhG.pAIs+sVslAEllKc+h..........shhp.hpplp...h......................... 0 148 273 408 +2345 PF01923 Cob_adeno_trans DUF80; Cobalamin adenosyltransferase Moxon SJ anon Enright A Domain Cobalamin adenosyltransferase This family contains the gene products of PduO and EutT which are both cobalamin adenosyltransferases. PduO is a protein with ATP:cob(I)alamin adenosyltransferase activity. The main role of this protein is the conversion of inactive cobalamins to AdoCbl for 1,2-propanediol degradation [1].The EutT enzyme appears to be an adenosyl transferase, converting CNB12 to AdoB12 [2]. 21.30 21.30 21.30 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.92 0.71 -4.45 180 2946 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 2347 41 715 2068 1103 162.40 34 75.35 CHANGED IYT+TGDpGpT...uLhsGp...RVsKsc.RlcuhGslDELNohlGlsh..sth..............................pplpph..LpplQpcLFclGu-LAs..s.....t............hcls.pp.plptL......EppIDphstp..lssLcpFlLPGG.......stsuAtLHlARTlsRRAERthVsL.....tppps......l.ssts....lpYLNRLSDhLFlhARhhs .........................lYT+TGDpGpT...sL...h...sup...RVsKsc..Rlpu...hGslDEhNuhlGhsh.sth...............................tpphpth.......LtpIQppL.Fsl.u.u-Lus....st.................................htls..pp..plptL................................Eptl..D.chstp...hs..s...L..cp..FlLPGG........stsuAhLHlsRTls..........RRAERthspl.....................tppps.............l..pstl..............lpYlNRLSDhlFlhuRhh................................................................................. 1 266 472 598 +2346 PF01925 TauE DUF81; Sulfite exporter TauE/SafE Enright A, Ouzounis C, Bateman A anon Enright A & Pfam-B_3578 (Release 7.5) Family This is a family of integral membrane proteins where the alignment appears to contain two duplicated modules of three transmembrane helices. The proteins are involved in the transport of anions across the cytoplasmic membrane [1] during taurine metabolism as an exporter of sulfoacetate [2]. This family used to be known as DUF81. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.43 0.70 -4.92 912 12351 2012-10-03 02:02:08 2003-04-07 12:59:11 14 23 3983 0 3751 10142 8371 231.90 19 89.56 CHANGED h.h.h..s..uhh..sG.hlsulhGh..GGul...lhlP..hLh......h.h..........h......................ssth.Ah..usshh....shhh..sshs.u...shs..a....h+....p.t.................l....s....h.p.h...........h..h...h....h.h.h......s.ulh.Gu...hl.G........uhlht..hls......sp..h.............h....phhhuh....hl.lhhuhhh...hh...........ptt...............................................ttt...................hhhthhhhhhh............u.hh.hGhh.uGhhGsG........uGshhsshhh..hhh...s.hshpp..ssuos.......th.hh....hhh........slsu.hhs.....a.............hh...........hGhls.....h...hhh.....h...lhhu.....ul...l..Guhl....Gsp.l....spcl....s.s...t..h.....l+hh...hshll..l.hhu....lth ...................................................................................................................................................hh.hhhuhh...uG..hlsul..hGh...GG..Gh.....lhhP.hLhh..h.....h........................s.sth..Ah......uss..hh...........thhh...sshs.u......sht...a........h+...........p...t............................................................l.......s.........h...p...h.....................h....hh.h...h..h....s.shl.G.u.......hl..G...............uhlht.......hls........sp......h..............................................................l..phlhsl.....hl....lh.h.ulhh.........lh.....p.p.h.........................................................................................................................t..t..............hhhphhh.hh.hh....................s..hh.hGhh..sGh...h...Gs.G....sGshhsshhh........hhh......sh.shpp.....ssuss....th..hhhh.s......................slsu.hhs........a..............hh........................................tGt.ls...........h....hhh.........h.........hhhu.sh..l....Guhl....Gsp..l.......s.t..p.h.......s..s......p.h.....l+hh.hh.h.lhh.hhuh................................................................................................................. 0 1230 2477 3207 +2347 PF01927 Mut7-C DUF82; Mut7-C RNAse domain Anantharaman V, Enright A, Ouzounis C, Bateman A anon Enright A Domain RNAse domain of the PIN fold [1] with an inserted Zinc Ribbon at the C terminus [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.07 0.71 -4.23 57 605 2012-10-03 20:43:45 2003-04-07 12:59:11 11 11 493 0 310 595 66 132.00 26 50.48 CHANGED .RFllDshL.GpLA+aLRhhGaDTh....ass.ch......................sDpplhplAtpEpRIlLTRD+tLhcRtthptt....hh..........................................l.pspcsppQLtElhp+hslph.phpP.hoRChpCNu......Lptls+pplhtp...............lP.tshphhpp...FhpCssCt+lYWcGoHacchpphl .................................................................+FlsDshL.G.p.L.A.+hLRhh.GaDsh.......ass..sh..........................sDtplh.ph.AtpE.pRll..L..T...R..Dc....t....L....hc...+pthhtt.......hh.............................................................................l...pstp..p.pQhhclhp.ch.tl.t..tht..hsRC..CNs........L.....h.t.t.t.t.h....t........................................................l..h.h.t..t......a..C..CtplaW.GsHhtph.t..h........................................................................... 0 106 212 273 +2348 PF01930 Cas_Cas4 DUF83; Domain of unknown function DUF83 Enright A, Ouzounis C, Bateman A anon Enright A Family This domain has no known function. The domain contains three conserved cysteines at its C terminus. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.20 0.71 -4.32 57 1113 2012-10-11 20:44:43 2003-04-07 12:59:11 12 15 868 0 454 1522 488 163.70 22 56.47 CHANGED sGshlpYYhlCcRclWLhs+.pIsh-pssp.tVph..G+hlHE......psYp+..pp...+-lpl..s....shplDhlc....ttphhl...........tElKKSp+h.......pcAs.chQLhaYLahLc.cpGlpsp.GhLpYPcp++ppc....VpLs-cpccclccslpcIcpllppp.pPP.spcpp.hC.+pCuYh-hCas ..................................................................................................................h..shlp.ahhC...Rphhlhth...t..t.h.t...p....p........pht.......G....phlHc.................................................p...p.....tt..............cpl.l..p...................tuhh.D.hlc.........tt.tphhs..........................lEhK......c....u..ctt..............t...p...s....t....chQLsh..shh......L....c...h...h....G..h...p..l.p............G...........h.l..h..Y........s.....p......p....+....+....p.h.p............V.t.h....s....p....p...h......c....p.p....l....h......p..h..l....p.......p.....l....c...p.l.h........p....p......t.......h.s........P............s....p....t.p......p.....hC...ppCuhh.chCh................................................................... 0 183 320 397 +2349 PF01931 NTPase_I-T DUF84; Protein of unknown function DUF84 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this prokaryotic protein family is unknown. 20.40 20.40 20.60 21.10 20.20 20.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.59 0.71 -4.52 20 1061 2012-10-01 20:37:09 2003-04-07 12:59:11 13 5 1021 15 237 655 51 158.90 48 84.25 CHANGED uSTNPAKIpAVppAFspsFstt.shclpGVsVsSuVsDQPhus-ETppGAhNRs+pAhphhspA-ahVGlEAGI-....s...thsFuWhVl.stp....ppG.u+SAuh.LP.hllcclh.cGcELG-VMDElsGpcNl+pKtGAIGlLTsspLoRoulYcQALIhALlPFhssph ..............................AosNPAKIpAlhpA...Fpcl...........Fu...pt......sscIsuVuVp.S.GVs.-QPhGsEET+tGApNRV...t....NA+..c.............h...h..P.........p..........A.........D.....aaVulEAGI...-..........s.................shT.F.uW.sV.l-sss........pRG..EuR.SAoLsLPsslLpcl+..p.....G..ctLGsVMsc..hsGh..cpItp..KpGAIGlFTsG+LTRsSVYaQA.VILALsPFhNs..lh................................. 1 59 118 177 +2350 PF01934 DUF86 Protein of unknown function DUF86 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of members of this family is unknown. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.21 0.71 -4.46 147 1619 2012-10-01 22:14:54 2003-04-07 12:59:11 12 7 989 2 590 1431 169 105.20 20 82.04 CHANGED lhcslpplpphh........................................shs.hcpFh.p....sthhppAlh+plpllsEushclspc............................lhspp..thsa+phsuhRNhllHtYhs.lDhphlaphlpp.cl.......................ssLpphlppl .........................................................................h..h...........................................t.t..hppah..p......sth...h...h.Ah.p.lphlhEushc.lspt.......................................lh.sp.ps.t.s.h.pphhshRsh....l.sHcYh.s...lc.h...chlap.hlpp.pl.......................s.lpphht..h..................................... 0 206 423 515 +2351 PF01935 DUF87 Domain of unknown function DUF87 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this prokaryotic domain is unknown. It contains several conserved aspartates and histidines that could be metal ligands. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.62 0.70 -4.54 46 1158 2012-10-05 12:31:08 2003-04-07 12:59:11 12 19 801 0 504 4373 796 209.10 16 38.09 CHANGED lp..lGpLh..ssss.............ls.ltlDl.schls+HhAIlupTGuGKSpssulllpcll.........phss..slllhD.HG.EYspht..........................tspsplhsst........................lpIshhplshp-ltplh.h....t.ssss.ppphlppshpphp..................pphtphshpchlpthhppht......................................................thpphptpshttlhpplpphhpphtth.........tshhptl.....cssplsIl..-lut.....lsptttph.lsuhlh+pl .................................h..hGt........ttt...........h....sh..l.sl...pc.hl.......s.c....H.hu.l.h...GsTGoGKS.p.h.l.p...h..L..lc.phh..............................ttts......phll.lD.s..c.G....E..Yssht................................................................................................t..p..h..t...h..hp.t..................................................................................h.h.l....s....h...................h.......p................p......c.....h.............t....h...h..........................................................hh.....h.h....h............................................................................................................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 154 317 410 +2352 PF01936 NYN DUF88; NYN domain Enright A, Ouzounis C, Bateman A anon Enright A Domain These domains are found in the eukaryotic proteins typified by the Nedd4-binding protein 1 and the bacterial YacP-like proteins (Nedd4-BP1, YacP nucleases; NYN domains). The NYN domain shares a common protein fold with two other previously characterized groups of nucleases, namely the PIN (PilT N-terminal) and FLAP/5' --> 3' exonuclease superfamilies. These proteins share a common set of 4 acidic conserved residues that are predicted to constitute their active site. Based on the conservation of the acidic residues and structural elements Aravind and colleagues suggest that PIN and NYN domains are likely to bind only a single metal ion, unlike the FLAP/5' --> 3' exonuclease superfamily, which binds two metal ions. Based on conserved gene neighborhoods Aravind and colleagues infer that the bacterial members are likely to be components of the processome/degradsome that process tRNAs or ribosomal RNAs. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.13 0.71 -3.91 149 2912 2012-10-03 20:43:45 2003-04-07 12:59:11 13 43 1840 1 1150 2564 397 149.80 21 49.81 CHANGED +lAlalDspNhhhs.p..............h.chcplhpplpp...........tGp.....lhhtpsYss...............tttt..............ppht...shL......pphG..........h.sl.......phhshhp..............sK.sshDltlslDhhchs..sph...DshlLlSGD..uD...Fs.llpplc.ppG.hpl.lhu................t.tssps......Lhsuss.pal........lp .................................................................................................................................................................lulhlDspNh.h..t.........................................h.s.h.ptlhpthtp..............................................hsp......lhhthhYss...................tttt.....................................pshh.....phL.............pphu........................................................h..tl.................ht.h.shht......................................tK.sssDh.tls..lD.hh..c.hhhp...pph................Dsh.lll..SuD.......uD.......Fs...ll.pplp..cp..G..hpV.hshu..h............t.hsspt......lhpsss.pah...t................................................................ 0 388 740 969 +2353 PF01937 DUF89 Protein of unknown function DUF89 Enright A, Ouzounis C, Bateman A anon Enright A Family This family has no known function. 25.00 25.00 26.10 26.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.29 0.70 -5.66 86 886 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 573 7 557 885 43 317.00 21 73.89 CHANGED shtcphPsll.ppslsslch.........spss...............pptcpllpphtp..lhtchppspshssh..............................................htshlaRclhphhs..........phDsFpchKp.ps.ppuhphl.pltpp..hcphtsp......................hhpphlclulhGNhhDhulhs.........pp.......ttphppthcpsl...llsD.hsphhptL...................ppspplshlsDNuG.ElhhDhl.LhchLhc...huscVhhtsKshP.hlsDsThcDhp..................................................tttlpc....thscllhp...........u.ss...ssshshtch........ss-lhptlp..pu-LlIhKGphNYcpL.ps-tpht.p..................lhhLhssKs-slssh..sshscpl ..........................................................................................................pphsh.hh.pphhtt......................t....................t.hpthhtth.p...hh.p..h.t..p..t.....h........................................................t.shh..h.....cplhph.ht...........hDs.a.t.c.Kp.pp.......p.....t.uh.p..hh....pl.tpp.....hpphtpp.............................................hhphlchslhGNhh.Dhuhhs....................pp......................ttthtpthp.p.sh.................llsc..hsphhptL............................................tttppl....hlhDNuGhElhhDhl.l.......schLlp.......hu.......s.................cVhhts+shP.hlsDsT.hpD..ht......hh.t.....h.......................................................ttthpp.........t...t..s..p.lh.h.p.................................s..ss.....ssshshtph.....................................s.clh.pt.hp.....puc......LllhKGthNaRpLhssh........th..t......................................lhtLthhKsphlsth...t.............................................. 0 199 329 463 +2354 PF01939 DUF91 Protein of unknown function DUF91 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this prokaryotic protein is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.24 0.70 -5.20 4 795 2012-10-11 20:44:43 2003-04-07 12:59:11 11 12 704 2 245 677 167 188.00 36 75.29 CHANGED +ttlllllApCpV.YEGRA+ScLspG-RlllIKPDGoFhlHpD.+Kh-PVNWQPPsS..h.clptsph.lhSIRRKPcEcLcVpIpElatsosa.scDhc-LslsGsEu-MuchIhcpPpLIEEGF+PltREYtlspGhVDILGpDccGshVllELKRR+A-lpAVpQLKRYV-.h+c-hs.ttVRGlLVAPSlTpsAccLLc-EGLEF+cL-PP+ss....++shcstphchF ......................................................................h....hhlhupCpspY..G.Rh.pupLs..u.RllhhKsDsol.lHss.tuhcPlNW.sss.s..h.............t.......p......................t..................t......................................l.......h...h.t......t.......ss-.L.l.ht.h........cls......tssL.hs...G.lE..scLQcLLs-p....p.h.l.....u...p.G..hpll.c..R..E.......Y...........T.....s.....h.....G...s.....VDlLspD....t.s....G.s.h.Vs.lE..lK.R+...u....p.h......-uVc.QLsR...Y...l-..hL.sc........-...............h.......h...s.........Vp..GlhsA.pp.Ipsp.A.+h.L.h.p-+Glchlhl-..h........................................................................... 2 65 167 218 +2355 PF01940 DUF92 Integral membrane protein DUF92 Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family have several predicted transmembrane helices. The function of these prokaryotic proteins is unknown. 20.40 20.40 21.70 31.90 19.90 19.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.42 0.70 -5.20 79 583 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 495 0 375 586 324 246.10 29 79.32 CHANGED ulhlsh.lLuhluh+tp..hLshuGhluAhllGshlhshsGhthahsllsFalhGShsT+hthppKpphul..s.ptcs.GpRshtpVhuNuhsuslhAlhhsh........................tstsh....hhhuahuuhAsstuDThASElG.thhspps.hLIT.Th.+.VssGTsGuVSl.GshAulhGuhlluhsuhhls....................hssth.....hhhl...sluGhlGohlDSllGAslQt..............................thshLsN-hVNhlsThhu...ullu .......................s..hhsh..hls.h..h..u..h.p..p+..sLshuGhhsuhllGhh..hh.hhs...u....hthhhhLlsFFls.........uohhTKh..p..tph..K......pphsh..............s....pcs....GpRsh............hp......VhuNussusl.hulhhhh...........................................................hhssh..hhhu.hl.u....uhAsssuDTauS.ElG.hl..t.p.pP.hLIT...Th..+............hVs.sGTsGGVolhGhhAuh.hGuhhluhsshhhh.......................................hsh........hhhl......sluGhlGollDSlLGAslQt.....................................................................................................ths.hLsNpsVNhlsshhsuh.................................................................................... 0 120 231 311 +2356 PF01941 AdoMet_Synthase DUF93; S-adenosylmethionine synthetase (AdoMet synthetase) Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family This family consists of several archaebacterial S-adenosylmethionine synthetase C(AdoMet synthetase or MAT) (EC 2.5.1.6). S-Adenosylmethionine (AdoMet) occupies a central role in the metabolism of all cells. The biological roles of AdoMet include acting as the primary methyl group donor, as a precursor to the polyamines, and as a progenitor of a 5'-deoxyadenosyl radical. S-Adenosylmethionine synthetase catalyses the only known route of AdoMet biosynthesis. The synthetic process occurs in a unique reaction in which the complete triphosphate chain is displaced from ATP and a sulfonium ion formed. MATs from various organisms contain ~400-amino acid polypeptide chains [1]. 19.40 19.40 19.50 37.60 19.30 18.50 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.30 0.70 -5.97 48 243 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 220 0 139 229 20 380.20 42 98.33 CHANGED hRN...IsV-phpptsl-cpplElVERKGlGHPDoIsDuluEulSRuLs+tYl-+a.GslLHHNsDcs.lVuGputPcFGGGcVlcPIaIlluGRATpch......ts.p......lPlssIAlcAA+caL+cslthLDs-pc.lll-s+lGpGSsDLp-VFpcspt...lPhANDTSFGVGaAPLSpsEplVhpsE+hLNSp.hKpchPtlGEDlKVMGlRcsccIsLTlAsAhVDRalsshccYhphKppl+ctlt-hApph.....s..pccV.....cVtlNTAD...sh-csulYLTVTGTSAEtGDDGuVGRGNRsNGLITP.RPMShEAsuGKNPVNHVGKIYNlLuspIApclsccV-G.lcEVhVcLLSQIG+PIDpPhlAsspllsccG..hshsclcpclppIl-ctLsslsplocpllcGcl .......................tNIhVp.hptpslpcpplElVERKGlGHPDoIsDulAEtlStsLuphYlc+a.GsILHHNhDcs.lVuGputPcFG.GGcll.cPIhIllsGRATpch......tspp....lPltplslcAA+caL+cslp.LDs-pc.l.ll-s+.........l..u.p........G.......S.........s..........DLh-lFpcttp..........lPhANDTShGVGaAPLopsEclVhpsE+h...LNop.t...h.Kp.....chPtlGEDlKVMGlRpscclsLTlAhAhls+alsshpcYhphK-plpptlpchupch...........s..tppl...........pltlNTuD....t.cpsslYLTVTGTSAEtGDDGpVGRGNRsNGLITPhRPMShEAsAGKNPVsHVGKIYNlLuppIAcclhcpl.cs.l.c-shVhlLSQIG+PIscPplsslpllscpu......h.t...phppclptIscc.LsslsplschllpGc.h.......................... 0 34 81 112 +2357 PF01944 DUF95 Integral membrane protein DUF95 Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family have several predicted transmembrane regions. The function of this family is unknown. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.33 0.71 -4.63 172 1288 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 1099 0 417 1025 82 170.70 20 68.88 CHANGED lshllalhuhlhGh..hhsh......t.sphhpth..lst.h.thh.........................................h...hhtlhhNNh.tlshhshhhG...............lhh...Glsslhll.hhNGhhlGhshu..hhhppsh..........hh.hhthl....lPHGllElsulhluuus..Gltlu.................ht.....lltsth.................h........pphpsht...ptht...ptstlh..l.sshhllhlAuhlE.u..alosh ........................................h..hhhhhhuhlhGhhhs..........t.sph.pt.....hst..htphhtt.....................................t.t....h....hhtlhhNNh.h....lshhhhhhG.....................hhh.G.ls.sl..hhL..hhsGhhlGhhhu...hhhppst...................hhhhhhhl......lPHGllElsulhluuuh...uhp.lu.......................ht.......llpsth.........................h...............tct.p.slt.......pthh.......phhthh....l.hhhsllhlAuhlE.ualos.h....................... 0 141 283 372 +2358 PF01947 DUF98 Protein of unknown function (DUF98) Kerrison ND anon DOMO:DM04314; Family This is a family of uncharacterised proteins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.94 0.71 -4.46 20 217 2012-10-01 19:33:20 2003-04-07 12:59:11 11 6 206 6 95 274 193 150.60 30 71.93 CHANGED LLGDGSPTRHLpLL.TucpspV-lIsMts.s..sss..........spuPpcVppL.......ssPllRRQVWLps......uupsLAaApSWWNtpcs-caLps....+stPIWpsLspt+sELaRElcGltLspuc.hLEptFs.pcGPFWuRHYpFa+cG+sLTlIhEVFSP.tLEpaL ........................................lhuDGShT+hLpll..ss.c.pltVcllp.phhs..ss....................sth.t.hpth..............ss.llpRpVhL+s........................usphlsaAp..S.h..h.sh...s....cl....s....p.t....Lpp..........pshPIGchltpp.....+hE.....haR...El.t..p...lh.h...s.c.ss.....h...L.....t..pt....ap....p....c..t......h....h.u...RcYpl..h..ps.spPlh.lIpEhFs.............................................................. 0 26 65 87 +2359 PF01949 DUF99 Protein of unknown function DUF99 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this archaebacterial protein family is unknown. 22.80 22.80 22.80 22.80 21.50 22.70 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.20 0.71 -5.19 29 165 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 161 2 106 166 10 177.70 29 89.74 CHANGED KptlRllGlDDuhhtp.....cccshlsGlVh+ss.hl-GlhhsplslDGhDuT-sllshlps.phhtplpllhLsGlshuGFNllDlpclaccTtlPVIsVhc+tP-httlcpAL++hFscsp.c.Rhchlcph.Gphptl..............s.lahpshGls.ppAtcllpthohpu+lPEPLRlAHllApulhptp.+ .........th+llGlDDuhhtp................ptcshlsGslh+usthl.-Gl.shsplplDGhDAT-slhphlps...hth.plphlhLsGlshuGFNllDlppltccsshPVlsVhpctPchttlcpAL++.tFs.ssp....c..Rhphlcph.sphctl..............tslalpshGlshppAppllcthphpu.plPEPLRlA+llAsulh.....t............... 0 27 61 87 +2360 PF01207 Dus UPF0034; Dihydrouridine synthase (Dus) Finn RD, Bateman A, Kerrison ND anon Prosite Family Members of this family catalyse the reduction of the 5,6-double bond of a uridine residue on tRNA. Dihydrouridine modification of tRNA is widely observed in prokaryotes and eukaryotes, and also in some archae. Most dihydrouridines are found in the D loop of t-RNAs. The role of dihydrouridine in tRNA is currently unknown, but may increase conformational flexibility of the tRNA. It is likely that different family members have different substrate specificities, which may overlap. Dus 1 (Swiss:Q9HGN6) from Saccharomyces cerevisiae acts on pre-tRNA-Phe, while Dus 2 (Swiss:P53720) acts on pre-tRNA-Tyr and pre-tRNA-Leu. Dus 1 is active as a single subunit, requiring NADPH or NADH, and is stimulated by the presence of FAD [1]. Some family members may be targeted to the mitochondria and even have a role in mitochondria [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.04 0.70 -5.59 17 9141 2012-10-03 05:58:16 2003-04-07 12:59:11 12 33 4628 7 2600 10106 4606 294.60 27 86.25 CHANGED hlA.PMsGloDhhaRpLspchuss..........................LshoEMVsupshlpspphshthhsp.cps........hslQLsGs-PthhAcsAchst-.hGuptIDlNhGCPsc+VppstuGusLhpps-lltphlpshhcAVs...lPVTlKhRlGhD..-spppshchuctspssG.sphlslHGR.......T+tptaps..tAcaptItpl+psls...lPVluNG-Itsh...pcupptLptoGsDuVMlGRuAhspPhlht....tltpstht.pss.ht-h....hphhpp+hptlhphhu.tpulppht+HhtWhhpt...hssstth+pthspstshtch...httl....shhpsht ........................................................................................................lA.PMtshoDhsaR..........h.l.h....p....p....h.....us.s..................................................................l..h..h..o.E......M......l.........s....s.....p.........s.h....l.....h.....s.........s.....c....p......s......h......t....h.....h...sh.t..ppt...........................................t...lsl.Qlh..........G..s.-..P.pt.hu....c.s....A.c...h.......stp.....h.......G................s.....s............h................I.......D............l........N.h..G..........C.......Ps..p+..V........s.....p....p.....t.............tGu.....s.....Lh...p...c...P....c..l...l...t...pllp.uh..h.p.s.ls.........................lP..V.T.l........K....h....R......l......G..h-...................p.p..t.......p.....h........h...........-....hs...........p.t.........s................p.............p............s............G....s........s...........t..l.........s...........l.......H.u....R........................T.+.p.p..h..ap.......s...........p...s.......c...a......-......t.......I.......t....p............l................+....p...........p.......l...............s..................l....Pl..l.uNG...-...Ihsh...................pc...A...p..p...h......l.....p........t....s........G..........s.......D.........u........V.Ml..GR....u...Ah..s..p.Pa...lh..p..............h....h...p..p....t..........h...h.........s....t...s....tch.............................hp.hl....h....t....p....h....p..h..h......p........h..h..........s......................p........t......t...........t...h..t.....t.....h....p..+..p..h......h..h..h.pt..........h.s...s...t...t...h...+p.ht.t...tphtt...................thh.............................................................................................................. 0 870 1575 2170 +2361 PF00692 dUTPase dUTPase Bateman A anon Pfam-B_127 (release 2.1) Domain dUTPase hydrolyses dUTP to dUMP and pyrophosphate. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.46 0.71 -4.68 29 7405 2012-10-03 01:18:03 2003-04-07 12:59:11 14 34 4788 263 1771 5446 3768 123.20 28 66.70 CHANGED ppssh.sctpstsAGaDLhuspshsl.s..tssthlsTslthslPt.sphuhIhsRSul.utKGlllhs...GlIDpcapG.plplhlhNhspp.shplppGpRIAQLllh.hhp.....hp.spsh.cpopRGstGFGSos .......................................t............suuhDl..h...s.......s................s.......h.......h......ltP.....s........phs....LlsTs.l..t..l..p..........l...........P.........c...sh....h........u.........h...........l....h.sR.Sul...u.p.p...t...h..l...l...h...s...Gh.I.DssY......c.....G...p...l..t..lt..l......h.N......h......u..........p...t...sh.......p...........lp......sG-RIAQh.lhh.h.p.............sth.p...h....p...ph.....s......t.....p...+GptGhs.......................................... 0 563 1093 1455 +2362 PF02670 DXP_reductoisom 1-deoxy-D-xylulose 5-phosphate reductoisomerase Bashton M, Bateman A anon COG0743 Family This is a family of 1-deoxy-D-xylulose 5-phosphate reductoisomerases. This enzyme catalyses the formation of 2-C-methyl-D-erythritol 4-phosphate from 1-deoxy-D-xylulose-5-phosphate in the presence of NADPH [1]. This reaction is part of the terpenoid biosynthesis pathway. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.66 0.71 -3.60 87 3428 2012-10-10 17:06:42 2003-04-07 12:59:11 11 6 3275 79 838 2723 1839 125.40 42 32.41 CHANGED lslLGSTGSIGspTL-V..lp...pp....P-.pFplsuLsA.spNlchlhpQs..pp..F..pPph..l.sls-ppt....hppLpptl........t.......tsplhhGpcult...clAsh....scsDhVlsAIVGsAGLhPTlAAlcuG.KpluLANKEoLVsuG ..................................................lsILGSTGSIGspTL-Vlp...pp...........P-....pFcVsALsA.GpNls.hlscQs..tcF..pPch..s..sls..-..p.ss......spp.L.c..ptL.............s..........................tsclh.sGppuls....-lA.sh..........s-sDhVhsAIVGu.AGLhPTLAAlc.AG.KplsLANKEo.LVsuG......................................... 0 295 570 722 +2363 PF00350 Dynamin_N dynamin; Dynamin family Finn RD anon Prosite Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.96 0.71 -4.43 79 6429 2012-10-05 12:31:08 2003-04-07 12:59:11 18 111 1940 32 2723 11019 2690 176.80 21 26.13 CHANGED lsVlGs.SuGKSSlLpuLlGpch..LP..puss...sTcpslh.lph.........................................scp.p.............sphp..ht-h.....tpphpshsplppthpppppphs.....th..................................................................slhhphhs.ts.sloLlDhPGlsp..ssstcp.................shsppal.ppsslILhl.s.sust...shups..-thplspplDspsp+.........o.lsVlsKs .......................................................................................................................................................................lsllGs..SuGKSo...llp.u...l....l....G.........p.......c.....h.........LP....puss.............s..T.p...p.s.h.....h..lph..................................................................................................................................................spptp...........................................p..htp........................tpthps..h....p....p.......l...t....p.t..h..p..t..p..h.p...t...h..t....................................................................................................................................................................................................h....h....h..h.s....h..h............s...l..s..l..lD..h.P......G.ls...p...ssstpp...................................................phhtp...h...l...p..p......s.......s...h..l...l...hl..h..........s.ss.p.......s.hs.....ps.....-.......h.............h.h...h....p......h.c....t.t..h..............h.hhlhst.................................................................................................................................................................................................... 0 828 1542 2205 +2364 PF04912 Dynamitin Dynamitin Finn RD anon Pfam-B_5757 (release 7.6) Family Dynamitin is a subunit of the microtubule-dependent motor complex and in implicated in cell adhesion by binding to macrophage-enriched myristoylated alanine-rice C kinase substrate (MacMARCKS) [1]. 25.20 25.20 25.20 25.30 25.10 25.10 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.28 0.70 -5.66 8 360 2009-11-18 13:30:50 2003-04-07 12:59:11 9 7 237 0 243 360 0 273.70 22 85.27 CHANGED hIuhsQ.DlYETsDsPEs-ssph..pp-EssscsIEplplssscAapKFps+plsssslDFSDpIuK+p..+pGYphc.puEaEllGp..uppETPhQKhQRLQpEhtELh-Elpshpsssppsc.EEclSssAL.upslssLccpLcsL....+LppllGp.csl.sssp..t.s.sp+LloplEpa...........p.........pp.s.hutsssp............ptschspsu+lAsLE+RLtcLEsslGs.cs-h.s.Lssshp..sssll-u.....lpplpuKsuLLssspLDtlEpRLsslhtKhppIs.E++sus.pDssp-pKIs-LYElspKW-slspp....LPpllpRLpoLcuLHcpAspFuptlupL-ssQpslpspLssspsLLpcVpcs.....htpNLpslp-slsplEuRltsLp ..............................................................s...........................................................................................................................................................................................................................................................................................................................................................-.........s............p+htRl....Ehtph.t...ph.......t..t..........................................tt................t.............h....p.htt...hpp.L.t.......................t.p...........s.....................................tt.....tht....................................................................................................................................................p........t..........t......t....chsph-pRlt.lEthlt...t...................t...............tslh.s.................lp.lptp..lsh.L...p.........t........lpthptcht....l..tchp.......t......l.........t....p....tp..............t...............t......................................t..............t...............t................p..............p......p.............p..........K..............l.......p.t.lathh.phpshts................lP.llpRlhslptlHttAh.phs.......p.lptl-pppt.htpplpp.pphlptlppt............htpshthhtt.httlct+htt..t............................................................................................................ 0 80 126 197 +2365 PF01221 Dynein_light Dynein light chain type 1 Finn RD, Bateman A anon Prosite Domain \N 24.90 24.90 25.00 25.10 24.70 24.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.12 0.72 -3.99 75 1105 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 322 46 723 974 9 86.60 39 73.19 CHANGED h.....s..ptps.hlKssDMs--..Mpp-AlchutpAl..-....+as...h-..+-.lAtaIK+EFDc+YGsoWHCIVG+sFGSaVTHEsppFIYFhls.ph..ulLLFKou .............................................................pshl+ssDMs--.....MQp-Al-.h....A......p..pAh.....-........+as..........h-.........+-..lA....taIK..+.-F.......D++Y.......s.......s.....o.......WH.C.I.V..G..+...s..FG...S.aVTH.....EsppF...l..Y.Fh.lu..pl.ulLLFKo............................. 0 331 455 616 +2366 PF00519 PPV_E1_C E1; Papillomavirus helicase Finn RD anon Pfam-B_48 (release 1.0) Family This protein is a DNA helicase that is required for initiation of viral DNA replication. This protein forms a complex with the E2 protein Pfam:PF00508. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.55 0.70 -5.87 66 1112 2012-10-05 12:31:08 2003-04-07 12:59:11 12 7 265 44 9 1576 23 256.40 49 68.38 CHANGED KFK-saGlSFs-LTR.FKSDKTsCpDWVlssa.GlpcslhEusKpLLppaCtYhalpp..stthGhhsLhLlpFKsuKsR-TVtKLlsslLsVs-pphLhEPPKlRSssuALaWYKpuhussshs..aGphP-WIs+QTllsHp..hpsspF-LSpMVQWAYDN-hs-ES-IAYpYApLA-pDuNAtAFLpSNsQAKaVKDCusMsRaYKRAEM+pMSMSpWIpcRspcl.-..ssGcW+sIVpFLRYQslpFIsFLsAhKpFL+G..hPKKNClllaGPPsTGKShFsMSLl+FLpGpVISasNS+SHFWLQPLsDAKluLLDDATpsCWsYhDsYLRNuLDGNslSlDtKH+uslQlKsPPLLITSNlsltp-c+apYL+SRlpsFpFss.FPhcssGsPlapLsDpsWKSFFpRhWspL-Ls-.pED-s.-..s....GssppsF+C ..............................................................................................................KFKEhYGlSFhELlRsFKSsKosCsDWslusF.Gl.ssolAEuhKsLlp.ashYhHlQsL.ss..shG.hllLhLlRaKCuKNR.TltKhlupLLslspppM.lI-PPKLRSsssALYWY+ouh..S..Nh..SpV..hG-TPEWIpRQTllpHu...h.p-.s.F-LSpMVQWAaDp........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 4 8 9 +2367 PF00122 E1-E2_ATPase E1-E2 ATPase Sonnhammer ELL, Bateman A anon Prosite Family \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.33 0.70 -5.32 126 30268 2009-09-12 10:17:11 2003-04-07 12:59:11 15 261 5547 105 10217 25831 1942 229.00 24 26.96 CHANGED hllhhlhlsshlphhpch+upcslppL...tphtsp.p.spVhcs...........................s...........phpp...............l.sspclssGDllhl.csG-plPuDuhll.........................................p...s..........shplDcShLTGEo.......hPlpKp..........................................psshl.....auGohlh...sG..ph......pshVstsGpsopluclsphl...ppspp.tc....................................sslpphls+lsphhshls.lslul.......................lsh...hhthh........................................................................t.shhpslhhu.lulllsusPpuLslssslslshuhtphu+pslll+.phsulEs ..........................................................................................................................lhhhlhls.t.h.l....p.h.h.t.....c...t..+.u.p...ps.l.p..t.L.........h..p...h.t..sp.....p.............s.p..l....l....c..s..............................................................................................G.......................p.h.t.p...................................l...sspclh...s.......G.Dll......hl......c............s..............G...........-.............p........l...P..sDuhll..............................................................................................................p.......u.................pstl..DE...S..h..LT......G..ES..............hPVpKp...................................................................................................................................................tssh..l......h.u..G..o.h.sh............sG..........ph............................hh..h..V..st........s.G.....t.cT..h..l.....u...p.........l........h.ph.l......p..pspp...pc................................................................................................................ss.l..pp.hh.s....p...l.u.t.h..h..s.h..h..s....l....hlu.h.........................................l.sh...........hh..hhh..................................................................................................ttsh..h..t..sl...h.hu...l...sl....l....l...s...ss...P...puLs..ls....ssh.slhh............u....s...t.....ph.u.c.p.s.hl....l+phtulE........................................................................................................................................................................................... 0 3318 6138 8457 +2368 PF00676 E1_dh E1_dehydrog; Dehydrogenase E1 component Bateman A anon Pfam-B_117 (release 2.1) Family This family uses thiamine pyrophosphate as a cofactor. This family includes pyruvate dehydrogenase, 2-oxoglutarate dehydrogenase and 2-oxoisovalerate dehydrogenase. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.82 0.70 -5.70 16 9023 2012-10-02 16:07:47 2003-04-07 12:59:11 15 32 3995 112 2879 10789 7579 299.30 26 49.43 CHANGED tchhslpRht-ttthhapcpthhGFsthpsGQEAsslGhttAlspp.Dhlhss..YRspuhh.hh+Ghs..lpplhsphhG+t....pspGGuh+..hh..s+p...ahsusuhlusQ...slss..GlAhAtK..........hcpcppVshshaGDGA.osQGphaEuhNhAslachPl...IFlscNNpauhuT.ssc+uuussshhphutGhtIPGlpVDG.DhlAVhpAsKhAt-hsppGpGPhLlEhhTYRhsGHShS.DssooYRspcElpch+tpcDPIpth+ctllspGlso--Ehcshccclcpcl--AhcpAcsss.sssp .....................................................................................................................h.....htt.h..p...h.t.h.h.h.t.......t.t..t..........p.h..u..h............t..st...su........p............p.....Ah..p..huh..t...t....u..h...p.....t......s.....D..h..l..h..st..........a..Rs+.s..ph..l.s...p.....s.hs................hppl.....h..s.p.h.h.Gct..........................................psp.u..s..s...h..p.............h......h............................s.......p.........t...................p........h.h..s...s..s.........s........lusp................P....lsh.............Gh..AhAt..p................................................h..p....s....p.......p.....s....l....s.....l...s.h.a.GDuA.su........QGsh.aE....sh.N....h...A.....s.l.....h............p.....h.....s.s.....................l.a.l.l..............N.N...p..h...uh..o.........T.....s......s...........p.........c....s.p.......u......s.p..........h.......h.s........c........h..A........t.u.......h........s....hP.s..h...c.......V......s..G..s...Ds.......A.Vh.....t....s.s....c.hAh-..h....t....p....p...........h..........c...........t....ss.lI-h.h.s.Y..Rh.tG..H..s...pu...D.....c..P...s....t...h............p.....sp...........h.p.p.h...p.p.c.......s....Pl.th.apch..L.h.....p.........p...G.........l.........h..o.p...-...c..h...p...ph...t...p...p...h...c...p...t...lp.ps.hp.spt.......t............................................................................... 0 974 1810 2435 +2369 PF00524 PPV_E1_N E1_N; E1 Protein, N terminal domain Finn RD anon Pfam-B_98 (release 1.0) Family \N 22.50 22.50 23.20 25.40 21.90 22.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -11.07 0.71 -3.92 76 514 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 242 0 0 513 1 123.50 39 22.24 CHANGED MA.....s.scGTc........ttssss.GWhhl..EA..ssc......spD-..Esh..h-p.....soDlsDhI.Dsssh.....pQssuhtLap.pQpspcsppplpsLKRKYht.....SPtss.........ppLSPRLpuls..lssp.pppuKRRLF.p.....DSGhup...ots- ...........Ms-.spGTss......tttGssGWFhV..EA..spc...s..o-D...-.tEp...t-s.....spDhs-FI.Dsss......pptsuptLap.tQpsppct..pslp.sLKRKahs.....SP.ss........sppplSPRLpuIs..ls...pp....pppuKRRL.Fp.p....DSGYGsops....................................... 0 0 0 0 +2370 PF00511 PPV_E2_C E2_C; E2 (early) protein, C terminal Finn RD anon Pfam-B_87 (release 1.0) Domain \N 25.00 25.00 25.30 25.30 21.00 19.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.69 0.72 -4.02 99 565 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 249 38 0 497 0 76.60 44 22.58 CHANGED PllhL+G-sNsLKChRYRlpp+apsL.FpphSoTWpWss....ssssp.psuhlhlsasoppQRppFLssV+lPpulphthGhhs ...Pllpl+G-uNsLKChRYRl.p.+appL.apthSSTW+Wsu.........sss.pp.puhlTlTasScpQRppFLssVKI.P.solphshGhMo............. 0 0 0 0 +2371 PF00508 PPV_E2_N E2_N; E2 (early) protein, N terminal Finn RD anon Pfam-B_76 (release 1.0) Family \N 19.10 19.10 20.60 43.80 18.20 18.10 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.35 0.71 -5.14 82 467 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 250 13 0 494 0 197.90 44 51.73 CHANGED MEsLspRLsusQEp...lLslYEpsSpsLp-QIpaWphlRpEpsLhatARcpGlp+lGaQsVPsLsVScsKA+pAIchpLtLpoLppStYupEsWTLp-TShEha.hssPppCFKKtGpsVEVhaD.s-ppNsMpYThWshIYhpssss..WpKspGpVDapGlYY.ptpstK.pYYVpFpc-Ap+YGpoG......pWEV+..hsspslhsP.sosoS ....MEsLspRLsAsQ-c...lLslYEpDSscLpspIpaWphlRhEsslhYtARctGlp+lsa.QsVPsLsVScsKAppAIEhQltLcoLppSpYusEtWTLp-TShEha.hosPppCFKKpGhoV-VhaD.s-psNsMpYTsWstIYhpsps............s...............Wp+VpGpVDapGlYY..hppGh.+..sYYlpFp.c.-Ap+Yupos......hWEV+..hssplIhsPsos.............. 1 0 0 0 +2372 PF02319 E2F_TDP E2F/DP family winged-helix DNA-binding domain Mian N, Bateman A anon Pfam-B_8420 (release 5.2) Domain This family contains the transcription factor E2F and its dimerisation partners TDP1 and TDP2, which stimulate E2F-dependent transcription. E2F binds to DNA as a homodimer or as a heterodimer in association with TDP1/2, the heterodimer having increased binding efficiency. The crystal structure of an E2F4-DP2-DNA complex shows that the DNA-binding domains of the E2F and DP proteins both have a fold related to the winged-helix DNA-binding motif. Recognition of the central c/gGCGCg/c sequence of the consensus DNA-binding site is symmetric, and amino acids that contact these bases are conserved among all known E2F and DP proteins. 21.20 21.20 21.30 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.61 0.72 -4.41 33 1416 2012-10-04 14:01:12 2003-04-07 12:59:11 15 13 169 2 833 1251 16 71.90 39 20.10 CHANGED RpcpSLslLop+Fltlhpp....spc.shlsLscsAcpL....................ss..p+RRlYDIsNVLculslIpK.......hpKsplcWhG ................................................RhppSLthhopKF.ltlhpp..................sps....usl...sLs..p.....sAcpL...............................................ss..p+RRlYDIsNVLpulslIpK...........tpKsplpWhG............................ 0 297 417 591 +2373 PF02817 E3_binding e3_binding; e3 binding domain Griffiths-Jones SR anon Homstrad Family This family represents a small domain of the E2 subunit of 2-oxo-acid dehydrogenases responsible for the binding of the E3 subunit. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.38 0.72 -4.32 58 9252 2009-01-15 18:05:59 2003-04-07 12:59:11 12 41 3883 49 2483 6857 3964 38.30 38 8.78 CHANGED scshAoPssR+LApEpulDLs...plp.GoGssGRIh+pDlps ...............hsoPssR+lApE.p.G...l...-....ls....pVp..G.oG.h.p.G.RIh+cDVp.............. 0 779 1517 2071 +2375 PF00518 E6 Early Protein (E6) Finn RD anon Pfam-B_57 (release 1.0) Family \N 20.70 20.70 20.70 21.80 20.30 20.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.34 0.72 -10.86 0.72 -3.72 26 915 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 239 6 1 645 5 108.30 48 74.15 CHANGED ClaCpchLspsElhsFsh+-LplVaR-s.hsausCstCLphhuplcph+aaphSlausslEphstpslhclhIRChhCt+.Ls..EK.cpl.ppcpF+clp.spW+GpChpC ........................................CVaCKppLpp..pEVacFAapDLplV..YR.Du...PaAlCctCLchhSK..lpch.RaYp..hSlYGsT.LEphhpKsLs-lhIRChtCQ+PLsPpEKp+HlspppRFHpIp.G.....cWpGpChpC........ 0 0 0 1 +2376 PF00527 E7 E7 protein, Early protein Finn RD anon Pfam-B_95 (release 1.0) Family \N 21.50 21.50 22.10 22.10 20.00 19.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.56 0.72 -3.83 75 626 2009-09-14 14:24:34 2003-04-07 12:59:11 13 1 232 5 0 438 1 91.80 40 93.18 CHANGED G.........p.psTLpDIVL..ch.pP.........pslD.LhCpEpLs....sop..-.........-Esc.t......................psYpllssC.sp..Cppsl+LsVpuopss.lRsLppLLh.ssLslVCPtCu ...................................................Gp.psTLp-hlL..-L..pP..........pssD.LaCaE.QLs....cSs.-E........-E...chst..t..............p.tpssYtIlThC..sp..CcsslRLsVpSTps-.lRsLQpLLh.GolslVCPtCu.......... 1 0 0 0 +2377 PF00563 EAL DUF2; EAL domain SMART anon Alignment kindly provided by SMART Domain This domain is found in diverse bacterial signaling proteins. It is called EAL after its conserved residues. The EAL domain is a good candidate for a diguanylate phosphodiesterase function [1]. The domain contains many conserved acidic residues that could participate in metal binding and might form the phosphodiesterase active site [1]. 21.60 21.60 21.80 21.80 21.40 21.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.59 0.70 -4.88 137 26968 2009-09-15 16:29:44 2003-04-07 12:59:11 15 1049 2504 33 6836 21773 1845 227.50 27 36.53 CHANGED tpplppu...h..tpt..ph.thhaQ....P.lh...s.hpssp....lh...uhEslhRhpptptt.hl..sstp......ahshhcp........shhtpls.phllcpshp.phtph.........th.l..slNl....ss.tpltssp...hhptlt...thh.........th.ssp.lslElsEs..................pphp..phlp....p.L+p...hGhplulDcF..Gsshs.shph.ltplshc..hl.KlDpphl..tth.....stpspthlps........lhphucph........shpllAcGVEspp..phphltphGsc.....hhQ..Ghhh...upP .............................................................................................................................................t...ltpA.....l....ppp.....ph.hlhaQ..........P..hl...............s..hpssp..................lh....GhEu...LlR...a..pp..s..p....t...s....hl.....sPsp.....................Fl..s.hAEp.................................t....Ghh.htls..phllc.pshp....p...ht.p.h.....................................shpl.....................ulNl.....................Ss...tp....l....tpss............hhp.plt.....phl.........pph.tlss..p.p....l...t..lElTEs.........h..............hps.tp..h.h.p.hlp............p..L+p....hGhpl.u..l......D..DF...........GoGaS....oL.......sh...L....pp...h...s..h..D.......hl.K..I..D+uFl...............pslt......pst.......p....s.....pt.......l.l.p.u.......................ll..p.l..u.c.pL........s.h....p.l.l.AEGVEopp...php.h..L.p.p.h..G..s.s........hhQGahau+P...................................... 1 1860 4128 5580 +2378 PF04157 EAP30 EAP30/Vps36 family Wood V, Finn RD, Bateman A anon Pfam-B_8830 (release 7.3); Family This family includes EAP30 as well as the Vps36 protein. Vps36 is involved in Golgi to endosome trafficking. EAP30 is a subunit of the ELL complex. The ELL is an 80-kDa RNA polymerase II transcription factor. ELL interacts with three other proteins to form the complex known as ELL complex. The ELL complex is capable of increasing that catalytic rate of transcription elongation, but is unable to repress initiation of transcription by RNA polymerase II as is the case of ELL. EAP30 is thought to lead to the derepression of ELL's transcriptional inhibitory activity [1]. 25.00 25.00 25.10 25.00 24.70 24.60 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.28 0.70 -5.19 67 648 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 296 8 450 643 7 216.70 26 60.19 CHANGED sGluulpppptpppphs-hshphshp..clspLhpphpphhphhcphupptpschppssp...................phpphhsslGlss.....hsstc................h..uhspFa.ELuhplsEhsh........phhcpsG.......................G.....llsLp-lashhN+.....sR.tst......................................hlospDlhcAschh.cpLshs.hplhphs..............tuhhhlpsss.s-h.tspsplLphh............................................................................phtsslohtplspphs.......WohshupptLp..p.hhppGhlh..hD ...................................................sGluulpppt..pppp.sctsh..s.uhp..clspLh...........pphpphhshhcphApc..h..p..p.ch..p.p.s.sp.hcs.......................................pFpphstslGlss.........................sopc........................h...uhspaahELuhpl..sEhhh..............sshpc.sG......................................................G.........lls.Lp-lhshhs.+......uR...uhp......................................hlS......s..-DlhcAschL..csL.......shs..hplhphs.....................G.hhllp....shs............s-.........pstsplLphs...................................................................................................pht...uhlosppltppls...................WshthApphLp...t.h.ppGhlhhD............................................ 0 156 251 373 +2379 PF01309 EAV_GS EAV_env_prot; Equine arteritis virus small envelope glycoprotein Finn RD, Bateman A anon Pfam-B_656 (release 3.0) Family Equine arteritis virus small envelope glycoprotein (Gs) is a class I transmembrane protein which adopts a number of different conformations. 25.00 25.00 440.70 440.50 18.00 17.50 hmmbuild --amino -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.41 0.71 -4.80 7 84 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 2 0 0 84 0 196.00 95 86.34 CHANGED WWRAVHEVRVTDLFKDLQCDNLRAKDAFPSLGYALSIGQSRLSYMLQDWLLAAHRKEVMPSNVMPMPGLTPDCFDHLESSSYAPFINAYRQAILSQYPQELLLEAINCKLLAVVAPALYHNYHLANLTGPATWVVPTVGQLHFYASSSIFASSlEVLAAIILLFACIPLVTRVYISFTRLMSPSRRTSSGTLPpRK WWRAVHEVRVTDLFKDLQCDNLRAKDAFPSLGYALSIGQSRLSYMLQDWLLAAHRKEVMPSNVMPMPGLTPDCFDHLESSSYAPFINAYRQAILSQYPQELLLEAINCKLLAVVAPALaHNYHLANLTGPAsWVVPTVGQLHFYASSSIFuSSVEVLAAIILLFACIPLVTRVYISFTRLMSPSRRTSSGhLPQRK 0 0 0 0 +2380 PF02905 EBV-NA1 EBNA1; Epstein Barr virus nuclear antigen-1, DNA-binding domain Griffiths-Jones SR anon Structural domain Domain This domain has a ferredoxin-like fold. 25.00 25.00 53.10 53.10 18.40 18.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.54 0.71 -4.72 5 148 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 9 4 0 87 0 90.90 90 66.67 CHANGED KGGWFG++RGQGGps.sKFEshA-uL+ALLuRCcuPRTosEGcWssGVFVYsGSKTSCYNLRRuLALCIPECRLTPLuRLPYGaAPGPGPQPGPLRESossYFLVFLQTplFAEClKDAI+DYIpT+PsPTssl+VTVCoFD.DuVML .....KGGWFGKHRGpGGSN.QKFENIAEGLRhLLARCHVERTT--GsWVAGVFVYGGSKTSLYNLRRGIALAIPQCRLTPLSRLPFGMA................................................................ 0 0 0 0 +2381 PF00378 ECH Enoyl-CoA hydratase/isomerase family Finn RD anon Prosite Family This family contains a diverse set of enzymes including: Enoyl-CoA hydratase (Swiss:Q13011). Napthoate synthase (Swiss:P27290). Carnitate racemase (Swiss:P31551). 3-hydoxybutyryl-CoA dehydratase (Swiss:P52046). Dodecanoyl-CoA delta-isomerase (Swiss:P42126). 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -5.25 13 26211 2012-10-02 13:07:06 2003-04-07 12:59:11 15 113 4081 502 8836 22457 13530 233.90 23 67.75 CHANGED phphpsulAhlplcpPt.slNulssphlsElspulpphcsDssl+.ulllouscpsFsuGuDlp-hsst.....tp.sshptsstclapclpssshPllAAlNGhAhGGGhplALssDhtlAucsAp..hGhsEsplGlhPuuGGo.hLsRhlGhstAh-hlhsGcphsApEAh+hGlVspVVs............p-p.lhcpAlphspclssp............s.hulshhKphhpt.....hcpslspstptstptasushssccspcuhpu...................ahEc ............................................................................................................h....tssls.hls.l..sR...Pp...t.....hN....A.l...s......t...h.h.p...p..............l.........t...p...s...l.........p....p....h............p............p...........-.............s......s.......l.........c....s....l....l........l............s.......u............s.............s............c............s............F......s.............A.............G............u....D....l.......p.......t.......h.tph.........................t.....t.....h.........t..........t.........h.........h............p............t...............h................t............p................h.........h............p.........t...........l...........p.............p.............h.............s............+....P....l......l.Au...l...s..............Gh.AhG.G.G.h.p.Ls...h.s...s...D....h.t.l.................A............u.c..s..Ap..............h...u..h.s..c..s.......p..l..G....l.....h......P.......s....s.G.....u.s.........................h...L....s....+...h..l.............G......h..........t......t........A...h..........c.h............h...........h............s.....G......c...t..h............s..A.pc..A.h.p..h..........G...L.l....s..p.....l...ls......................................................................tsp....l........p....t....s.........h....p..h...u......p......p..l....t..tt.......................................................................s.s..h..u..h..t...h...h..K..t......h..h..pt..................t...........t...t............t.......t.......t.....................h.......t.....p...t.................................................................................................................................................................................... 0 2394 5222 7277 +2382 PF04736 Eclosion Eclosion hormone Kerrison ND anon DOMO:DM04950; Family Eclosion hormone is an insect neuropeptide that triggers the performance of ecdysis behaviour, which causes shedding of the old cuticle at the end of a molt [1], [2]. 25.00 25.00 48.60 47.90 21.70 21.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.34 0.72 -4.53 6 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 31 0 29 44 0 60.10 58 68.49 CHANGED phDlhGGYDhluVCIsNCAQCK+MaGsFFEGpLCAEAClpFKGKhIPDCEDIuSIAPFLNKL ...........s.hsGhDhltVClpNCAQCK+MaGsaFpGphCA-uClKFKGKhIPDCEDluSIuPFLNtL... 0 8 11 26 +2383 PF02963 EcoRI Restriction endonuclease EcoRI Griffiths-Jones SR anon Structural domain Domain \N 19.90 19.90 20.30 20.70 18.60 19.40 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.51 0.70 -5.59 3 52 2012-10-11 20:44:43 2003-04-07 12:59:11 11 1 50 10 6 62 7 187.20 49 86.48 CHANGED SpGVlGIFG-tAKtHDLoVGEVS+tVlsKLoEDYPQLoFRYRsSIcKKEINEAL+KlDPcLGQTLFVENASI+PDGGIlEVKDDaGNWRVVLVuEAKHQGKDIENIRsGlLVGKsKDQDLMAAGNAIERSHKNISEIANFMLuESHFPYVLFLEGSNFLTEoIcVTRPDGRVVsLEYNSGMLNRLDRLTAANYGMPINoNLCcNhFV+HKDKoIMLQAASIYTQGDGusWsuucMFEIML-ISKTSLRILGcDLFcQ ..........................................................................................................h+tplphp.h..N.hLptlcschs.pohFh.sspIKPDGGll..cl..KD..Dpt.h+l.lLloEAKhQGps..pIpt..h.GKtpp....tsGNAIERuaKNlsElANhMLpEt+FPYllFL-GoNFlTpsh.l.RPDGRhl.l.YssGhLNRlD+LTuANY.GMPINpNLC.N+Flphpst.IMLQAhSIYTpGpGttWs.p.Mh.hh..................................... 0 1 4 5 +2384 PF03974 Ecotin Ecotin Finn RD anon Pfam-B_54504 (release 7.2) Family Ecotin is a broad range serine protease inhibitor, which forms homodimers. The C-terminal region contains the dimerisation motif [2]. Interestingly, the binding sites show a fluidity of protein contacts binding sites show a fluidity of protein contacts derived from ecotin's innate flexibility in fitting itself to proteases while [4,5]. 22.20 22.20 24.00 24.20 21.70 21.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.76 0.71 -4.42 47 725 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 700 31 89 328 19 124.20 65 76.27 CHANGED tcphts.aPtspsGhpRpVIp....LPt.ss...Es...sa+VELlsG+shpl...D.CNpptLu.Gplpp...........colcGW.GYsYYplsths...ss.......oThMACs...spt+pppFVsl..ust..hlpYNS+LPlV.VYlPcssElRYRlWp......up ..............L.EKIAPYPQAEKGMKRQVIp....LsPQcD...ES..........sLKVELLIGQTLcV...D.CN.hH...RLG.GcLEo...........K.T.LEGW.GYDYYVFDcVouPV.........STMMACP...DGKKEpKFVTAaLG-su.MLRYNSKLPIV.VYTPcNVDVKYRlWKA............... 0 13 29 57 +2385 PF00736 EF1_GNE EF1BD; EF-1 guanine nucleotide exchange domain Bateman A anon Pfam-B_488 (release 2.1) Domain This family is the guanine nucleotide exchange domain of EF-1 beta and EF-1 delta chains. 20.80 20.80 21.40 21.00 20.50 18.10 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -10.04 0.72 -4.29 87 874 2009-01-15 18:05:59 2003-04-07 12:59:11 14 17 487 11 461 872 67 86.40 48 37.56 CHANGED hop....lllcVhP.s..sEsDhccLpcpl.cslptcul..s.s.ch.PluFGl+tLplhslltD.ccsusDpl.pEtl...sh.-t..........................VpSs-lsuhs+l ...................................KSs..llLDVK..PWD..DETDhpcLEcsV.Rul.p.h-..GLl......W.....G.uuKLVPVGaG.IKKLQItsVlED..DKV.usDtL.pEpI......pth--a..................................................VQSsDlsAhsKl............................................................................................. 1 149 255 374 +2386 PF00647 EF1G EF1G_domain; Elongation factor 1 gamma, conserved domain Bateman A anon Prosite Domain \N 20.70 20.70 25.40 25.40 20.60 19.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.25 0.72 -4.34 40 588 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 372 1 292 576 1 100.60 47 27.49 CHANGED Ksc.cPh-..sLPp...uoFs.lD-aKRpYSNpDT..hss..AlPaFW-p.aD.sEsYSlWhscYK..Ys-EL..ph.sFMosNLluGah.QRL..-.phRKauFushslaGcs.ssssIsGlalhR.....Gp- .......................KsKcPh-tLP+.........uoFs.LD-aKRhYSNc-o...hsV..AlsaF.W...........-p....a..D...Es.a.SlWhs-Y+..Ys-EL..oh.sFMSsNL......lsGhF.QRL..-.thRKhuFushhlaGps......ss..ssIsGlalhRGp-................ 0 96 162 239 +2387 PF00889 EF_TS Elongation factor TS Bateman A anon Pfam-B_1408 (release 3.0) Family \N 21.00 21.00 22.50 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.43 0.70 -5.02 168 5433 2009-09-12 05:55:42 2003-04-07 12:59:11 14 21 4607 24 1441 3651 3185 191.80 35 74.06 CHANGED AuEGllsshl..psst........usllElNsETDFVA+s-pFpthspplsphsh.............t.....pt.sshc...........tlht..h.t.....tslpphhtthhuplGEphpl+R..h.thhps.....s.h.ssYlHst.............u+lGVllthpss.t.........plu+clAMHlAAhsP...phlstc-lss-hl-..+E+............................cl.............hptpstp.......................................p....GKP....................................................p...plh....-K.....llpG+lpKah.......pEhsLLcQsFV..p.DschTVpphlcptss.......pltpFsRaclGE .................................................................AAEGllt..shhpssh..........usllElNsETDFVA+st.....tF.thspplsphhh........................................t...tp...ssh-......................th.h.t...............phppthhthh..u.p.IGEplplRR.........h.thhpt..............th......hssY..Ht.........................s+.l.GVls...h..pusst............................phu+plAMHlAAh..pP......phls...c.-..lst-h.l...c.+Ep.............................................pl..........................................hhppshp........................................................p..........GKP..........................................................cpIh-K.......hlpG+hpK.ah.......pEh.sLhsQ..s..al.......h.-...s..p...h.TVtphlcptss........plhtFhRaclGE................................................................................................................................. 0 539 975 1245 +2388 PF01132 EFP Elongation factor P (EF-P) OB domain Finn RD, Bateman A anon Prosite Domain \N 20.90 20.90 20.90 21.10 20.50 20.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.58 0.72 -4.29 271 5656 2012-10-03 20:18:02 2003-04-07 12:59:11 15 10 4552 12 1140 2928 2197 55.20 32 29.59 CHANGED c+chpYLYpDG-.t.ahFMDs-.oYEQhpls...tctlu.-shpaLpE..shplplhh..as.spsl ........+shpYLYp.D..G-..t.ahFMDsE.oYEQhpls...tctlt...-.phpaLhE..sh.p..splhh..as.Gpsl.................... 1 382 741 957 +2390 PF04863 EGF_alliinase Alliinase EGF-like domain Mifsud W anon Pfam-B_4527 (release 7.6) Domain Allicin is a thiosulphinate that gives rise to dithiines, allyl sulphides and ajoenes, the three groups of active compounds in Allium species. Allicin is synthesised from sulfoxide cysteine derivatives by alliinase (EC:4.4.1.4), whose C-S lyase activity cleaves C(beta)-S(gamma) bonds. It is thought that this enzyme forms part of a primitive plant defence system. This family represents the N-terminal EGF-like domain [1]. 20.00 20.00 21.40 21.00 19.00 18.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.98 0.72 -3.98 6 87 2012-10-03 09:47:55 2003-04-07 12:59:11 8 4 21 7 34 96 0 55.30 63 12.67 CHANGED hoWThKAAcEAEAVAAIsCSuHGRAaLDGlh.s-Gs.PhCECNsCYoGsDCSshlsNC ..........hoWoh+AAEEAEAVAsI.sCStHGRAFLD..Gll.s-Gs...PtCECNoCYsGPDCSp+lpsC. 0 6 24 30 +2391 PF01303 Egg_lysin Egg lysin (Sperm-lysin) Finn RD, Bateman A anon Pfam-B_1464 (release 3.0) Domain Egg lysin creates a hole in the envelope of the egg thereby allowing the sperm to pass through the envelope and fuse with the egg. 21.00 21.00 21.10 21.10 20.60 20.60 hmmbuild --amino -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.68 0.71 -4.11 24 77 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 40 11 2 85 0 88.00 52 83.19 CHANGED ls+thEsAlKlpIlsuaD+cLspWlp+HGptl....oshQ+KTLaFVNR.RaMQTaWpsYhhahs++IttLG.RsssssDYsplGAcIG+..Rlshchh.Ysall++N...hlP+apsYMtclhup+suDlPl .......ElAlKspIluGFD+pLspWLppHGptL....oslQ+KsLYFVNR.RYMQTah..a..h.......h........................................h..........h.................... 0 0 0 2 +2393 PF00971 EIAV_GP90 EIAV coat protein, gp90 Finn RD, Bateman A anon Pfam-B_210 (release 3.0) Family Equine infectious anaemia (EIAV). EIAV belongs to the family Retroviridae. EIAV gp90 is hypervariable in the carboxyl-end region and more stable in the amino-end region. This variability is a pathogenicity factor that allows the evasion of the host's immune response. 20.60 20.60 20.60 20.70 19.90 20.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.45 0.70 -6.00 3 1082 2009-09-11 14:56:58 2003-04-07 12:59:11 13 5 9 0 0 800 0 234.90 62 80.26 CHANGED SKNSMAESKEARDQEMNLKEESKEEKRRNDWWKIGMFLLCLAGTTGGILWWYEGLPQQHYIGLVAIGGRLNGSGQSNAIECWGSFPGCRPFQNYFSYETNRSIHMNNNTATLLEAYHREITFIYKSSCTDSDHCQEYQCKKVN.......Nusph.sVsNTTEYWGFKWLECNQTENFKTILVPENEMVNINDoDTWIPKGCNETWARVKRCPIDILYGIHPIRLCVQPPFFLVQEKGIANTSRIGNCGPTIFLGVLEDNKGVVRGNYTACNVsRLEINRKDYTGIYQVPIFYTCNFTNITSCNNESIISVIMYETNQVQYLLCN.NNNSNNYNCVVQSFGVIGQAHLELPRPNKRIRNQSFNQYNCSINNKTELETWKLVKTSGITPLPIS .........................................................................................................sF.sh.phtsNRsh.hsNpTuTLL-AYpREIT.IY+oSCsDSDHCQEYQCppVs..................s..s...................h.p..ss..p.s......o....s........E.....YWGFKWLECNQTENhKTILVPENEMVNINsssTWIPKGCNETWARVK+CPhDlLYGIp.IRhCVQPPFFLhp..pp.t..s.s.s.u.RIuNCGPTIFLGVLEDNKuslps...ssCplpphp.IpR.DYoGhYQlPIFYhCshTslpS..................................................................................................................................................... 0 0 0 0 +2394 PF01176 eIF-1a Translation initiation factor 1A / IF-1 Finn RD, Bateman A, Mistry J, Wood V anon Prosite Domain This family includes both the eukaryotic translation factor eIF-1A and the bacterial translation initiation factor IF-1. 23.00 23.00 23.00 23.20 22.90 22.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.04 0.72 -4.55 63 5916 2012-10-03 20:18:02 2003-04-07 12:59:11 14 9 5270 9 1627 2668 2074 65.40 52 77.15 CHANGED cphph.GtVhchLuNuhacVphtsGpphLu+IsGK.h++plhIhtGDhVlVch.....psap..hsKucIsaR ..................IEhpGpVlEsLPN.uM..F+V.c...........L..........E....N..G..H.........h....l.L..AHIS.....GKhR....ppaI....R....ILs.G.D+V.pVEl........oPYD.....Lo+GRIsaR....................... 0 532 1008 1345 +2395 PF05091 eIF-3_zeta Eukaryotic translation initiation factor 3 subunit 7 (eIF-3) Moxon SJ anon Pfam-B_6311 (release 7.7) Family This family is made up of eukaryotic translation initiation factor 3 subunit 7 (eIF-3 zeta/eIF3 p66/eIF3d). Eukaryotic initiation factor 3 is a multi-subunit complex that is required for binding of mRNA to 40 S ribosomal subunits, stabilisation of ternary complex binding to 40 S subunits, and dissociation of 40 and 60 S subunits. These functions and the complex nature of eIF3 suggest multiple interactions with many components of the translational machinery [1]. The gene coding for the protein has been implicated in cancer in mammals [2]. 17.50 17.50 18.80 17.50 17.20 17.20 hmmbuild -o /dev/null HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.83 0.70 -5.88 36 378 2012-10-11 20:44:43 2003-04-07 12:59:11 7 10 262 0 257 371 7 443.00 41 90.26 CHANGED hshhhssl..Nss.uWGPspssp.....th.........slPatPFSKuD+lG+luDW.......sshp............................pts.shu.ssp.ash................ts-DEsoFplVD.....ss+ss....tpt+sp.....thsptsttpc.....................tthhsppttputt+cttthtp..........hst+.stt.p................t...hR-uSVplps-WshlE..ElcFscLsKLs.l.ss.cs-Dl..sshGpl.aYD+saD+ls.sKs.E+.Lpph.c.Rs.hassTTo-DPlIpcLhpcspu.........................................................sVaATDsILusLMsssRSlYSWDIllp+hG.sKlFhDK..R-susl....DhlTVNEs.A..s-sPh-.......s....pssINospuLuhEAThINpNFspQVlhpspp..phpacpsp.PFhscspc....................uShuY+YR+asLssp................lpLlsRsElDulhp...s..sscspals..l+ALNEaDsKhps.....l-WRpK.L-oQRGAllAsEh+NNosKLA+WssQulLAGuD.hKLGaVS.......Rsss+DsppHsILuspsaKPc-hAsQhsLshsNuWGIlRsIlDhsh+..pp-G............KYlLlKDPsKsllRLYpl ...........................................................................s.......l..s.s.uWGPsthsp.......th.................shPYtPFSKuD..+LG+..huDWos...ts.p..........................pp.s.th.u..ss...p.ash................tt.-DEsoFplVD........ss+sp.t......hh...tpt..ph..............t.ttp.p.t.tpttt...................t...h..tt.tpt.ttptt..thtp..............ttp..hs.ttt...........................t+...RcuSVplps-WphhE..Eh-FspL.Khp..h.........ps......s.....-spDl.................tphG......t.L.aYD.+saD+ls..s......+s.E+....Lp..p.h.p...Rh.hasV..TTo-DPlIpc.L...tcspu.........................................................sVFATDsILuhLMsssRSlYSWDIVl.+hG.sKlFhD..K..RDsuph..........Dh.lTVsEs.u..t-sP.-................ptsshNsspsLuhEAThINpNFupQslhtstp...+hph.psN.PFhpts.-.............................AShuY+YR+acLsts................lpLlsRsEhDuVhp.................ss...sspspals..l+ALNEaDs+hps.........sl-WRpK.L-oQR....GAVlAoEhKNNuhKLA+WTspAlLAsu-....hKLG.aVS.......Rhps+s.stpHlILus.taKPp-FAsQhNLshsNuWGIlRsllDhshc..........t..-G................KYlllKDPNKshlRlYp............................................. 2 108 152 212 +2396 PF01287 eIF-5a Eukaryotic elongation factor 5A hypusine, DNA-binding OB fold Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family eIF5A, previously thought to be an initiation factor, has been shown to be required for peptide chain elongation in yeast [1]. 21.50 21.50 21.60 21.50 21.40 21.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.03 0.72 -4.03 97 832 2012-10-03 20:18:02 2003-04-07 12:59:11 15 11 539 15 474 760 38 66.70 37 40.40 CHANGED hVcRp-aQ.LlsIpsDs.......alsLMs.-sG-T+-DlclP..........pt...plscclcsta....csG.c-h.VsllsA.MGcEp...lhshK .......VpRp-YQ.LlsIp.pDG.......aloLhs..-sG-o+-D..l+lP..............-s......pLspp.....Icsta.......spG..c-...lh.VsVhsA.MGcEthhshK...................... 0 143 254 374 +2397 PF01873 eIF-5_eIF-2B eIF5_eIF2B; Domain found in IF2B/IF5 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the N terminus of eIF-5 Swiss:P55010, and the C terminus of eIF-2 beta Swiss:P20042. This region corresponds to the whole of the archaebacterial eIF-2 beta homologue. The region contains a putative zinc binding C4 finger. 21.60 21.60 21.80 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.74 0.71 -4.46 13 1012 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 491 15 654 962 101 123.90 36 40.09 CHANGED huh.....pNsctsssDcp..RachPthpstlEGp...tKTllsNhpDIAKsLsRsPp.alhKalhtELGosGslDup.sRhllpG+apspplpslLccaI+cYVlC+sCpsP-Tclh+c..sRhhhLcCcACGucss ........................hhp.....Nsp.sss-ph..RachP..p..s.hhEGp......hKTlhsNhs.-ls+s.................LpRssp......al............hcahhsELGsp.us.h.Dsp....sRhll....pG..pap.s.p.plpslLcpaIpc.YVhCtpCc.sP.-.TpLphc......sc...h..hhlpCcACGtp..s......................... 0 216 369 528 +2398 PF01912 eIF-6 eIF6; eIF-6 family Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes eukaryotic translation initiation factor 6 as well as presumed archaebacterial homologues. 25.00 25.00 26.20 26.10 24.60 24.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.15 0.71 -5.23 65 564 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 471 8 362 545 94 190.30 51 82.10 CHANGED h+hshtsss.pIGVaspsosshsLl.shssscphhshhcppLs....lsl.lcToIuGopllGplssGNpsGlLVPshspDpElp....pL+pph..slpVphlcp.+hoAlG.NlIhsNDpsALlaP-lsc-stchIsDsLsV.EVh+toIAs.shVGShuVhoN+GsLVHPcsot-ElcpLsslhp...Vs.lssGTVNtGoshlGuGllsNshus .......................s.RspFE.sss-lGVFupLTNsYsLV..ulG.u.SEsFYSsFEuELst.hIP.l...l+soIu.GoRllGRhssG.....N+pGLLVPssTTDpELp....HlRNsLP...Ds................Vplp..Rl-..E.RLSALG.NlIsCN.DaVALV..HPDl-+ETEEl.luDVLsV.EVF.RpTlAspsLVGSYsshoNpGGLVHPcTohp-.-ELSsLLQ...VP.lsAGTVNRGSslluAGhVVNDash..................................................... 0 122 213 298 +2399 PF03608 EII-GUT PTS system enzyme II sorbitol-specific factor TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 26.90 26.70 22.50 22.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.95 0.71 -4.41 17 904 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 809 0 76 276 63 163.20 64 89.76 CHANGED alsphAEsFlsLFptGGcsFluhlTGIlPhLlhLLlAhNulIthlGEERlp+lAphuu+Nsl....hRYhlLPllusFhLsNPMshohGRFLPE+aKPuaYsuAspasHs.sGLFPHlNPGELFVaLGI........AsGlTp..LG......hssssLAlRYhLVGllhshl+GhVT-hhTsalt+p .....h.IT+uAEWFIGLFQcGGEsFsGh.l..........TGIlPLLIsLLVhMNALIsFIGpcRIERhAQ+sAsNPl....SRYLLLPhIGoFhhCNPMolSLGRFhPEKYKPSYYAAAu...CHohNGLFPHINPGE.LFVaLGI........AsGlTT..LG.......LP.h.GsLAlpYLLVGLVsNFhRGWVTDhTTAhhpK+............. 0 22 38 61 +2400 PF03609 EII-Sor PTS system sorbose-specific iic component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.37 0.70 -5.09 72 5167 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1519 0 403 1910 44 235.10 33 88.43 CHANGED M...phshlQhlLlslluhlsshct....hh.sphthapPllsssllGLlhGDlpsGlllGuoLphhhLGh.sslGuAssPDsslAulluohlslt........ustsh.ssAl.ulAlPlAlhu.hLshls+....olsshhhHtuD+tAccGshpslphhphhuh.lhtulphulP.shl...slhhussslpshlssl......P...p..alhsGLsluGGhlsAVGaAhllphMss+..chhPFallGFllAua..h..pls...llulullG..hslAl.la ................................M...shlQhlLls.l.h.uh.l.u..s......h.ct......hh...sthth..ppPllsss....llGLll.....G....D....lpTGlllGuoLpLlhLGh.sshGu.A..h.sPDsshu...u.lluT.shult.............ss.h...ss....p....sAl...u.l...Al....PlA....s.h.sth....Lshlhp....ol.sshh.h.H.t.AD+...tAcpu.shpu.lp....h....hp.h.h.uh.hh...hu.l.h.h.ulP.shl...slhhG.sshVpshlssl......Pt....hltsGLslAGGhlPAlGaAhll.p.h.Mh.s.+...p.h.hPaFhlGFlhuAa...h.....pls...............llulullG..ss.hAll............................................................................................................. 0 104 225 318 +2401 PF03612 EIIBC-GUT_N EIIBC-GUT; Sorbitol phosphotransferase enzyme II N-terminus TIGRFAMs, Griffiths-Jones SR, Yeats C anon Yeats C Family \N 20.90 20.90 21.50 21.30 20.60 20.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.17 0.71 -4.67 31 932 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 811 0 85 414 65 167.90 55 52.77 CHANGED pslcIsKGsGGWGGPLhltPstp+cKllYlT.GGscPslsc+IuELTGhEAVDGFKsulP.--ElusslIDCGGTLRCGlYPKK+IPTINlhsTGpSGPLApaIsEDIYVSuVpscsI.............................pls-u.susssspststptstsp.............asssKKlo-Qps......ullu....+lGhGhGpVlusFaQAGR-oID .....................p.pl+IpKGsGGWGGPLpl.sspst+KlVYIT.u.GsRPsIVDKlApLTGhpAlDGFKpu.P.-sEIusAlIDCGGTLRCGIYPK+pIPTINlhsTGKSGPLAQYIsEDIYVSGV+.-.sI..................................slssc..sss.p.ss.s.ss...........................pc..............YDTSKKIT.EQsc..........GllAKlGMGhGpsVAVhaQuGRDoID.................................................................................... 0 27 42 68 +2402 PF03611 EIIC-GAT PTS system sugar-specific permease component TIGRFAMs, Griffiths-Jones SR, Mifsud W, Bateman A anon TIGRFAMs & COG3037 Family This family includes bacterial transmembrane proteins with a putative sugar-specific permease function, including and analogous to the IIC component of the PTS system. It has been suggested that this permease may form part of an L-ascorbate utilisation pathway, with proposed specificity for 3-keto-L-gulonate (formed by hydrolysis of L-ascorbate)[1]. This family includes the IIC component of the galactitol specific GAT family PTS system. 20.80 20.80 20.90 21.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.46 0.70 -5.70 89 5120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 1917 0 376 2362 24 393.50 26 88.73 CHANGED lpslLs..ssulllullshlGhllh++shscslpGslKshlGFlhlshGsu.llssslsshsphhpcsaulps......sl.ss.....sulsth.........auotsuhlhl.luhllNllls...+hTph+hlaLTGchhhahushlsslhhhhsh.ss...............hhlllhuulllulhhslhsshst.hhpclTs..ssshuluHhs......sluh....hlushlu+h......h...us.p..ch......ssEshplPKpLuhhp-shlsssllhhllallss..............hh.......................t...h..................sspshhh...hlhtulphAAulhll.tGVRhhluEllPuFcGIuc+llPs...............uhsALDsslsasa.uPsAlhlGFl.uhhstllslhlh.......h.........hslIl..P..GhlshFFsuusuulFuNuh...GGh+GslltuhlsGllhhhlshhh.....hshlushts.................................................................................tsshhushDa ................................ptlLs..ssslllsllhhlhhllh+.t.phscslcusl+shlGF.hhlss.shu.ll.s.s.s.lsPhs....p...th....sc....sa.sl.ph......slsDshhs..usls...............au.o....hu.hhhl.luh.l.l.Nllhl....hT...p...hppl.Lsschh.hhthhlss.hl.hh.hsh...ss................hhhhlhuullhulhhhhhushht.hhpclhs......ssuhuluHhp.......sluh....hls.t.hlsph............h.......ss.h..p....c............sh...-...s...hcl...s+plu.....hat...-shhsssllhhllhhlsshs..........................................h.................................stp..hh....hlphulsh.Asslhll.psVRhh......lstlsPshpGlsp+lhsp..................hh.ulDssllhu...sP.sslhhGhl.......llslhll.....................hslIl..P...Gshsh...Fs..shs.s.hsa.s.ssh...uuhRGslhtshltG.ll.hhhlslhh.....sshhushth....................................................................................................................................................................... 0 98 214 302 +2403 PF03613 EIID-AGA PTS system mannose/fructose/sorbose family IID component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 30.80 30.70 23.30 23.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.44 0.70 -5.46 125 5271 2009-09-11 00:44:58 2003-04-07 12:59:11 9 5 1544 0 416 2104 46 264.50 36 94.84 CHANGED tpLTK+D....lppsahRuhhh.psuaNYERMQshGasauhlPsL+KLY....ts..c--htpAlcRH.lpFFNTpPhh.ushIhGlslAhEEccusss........hsstsIsulKsuLMGPLAGIGDolFasTltPIhuuluuulAhpG.sllGPllahllasll....thhh+ahthphGYchGsphlspl.tuuhl...pplocuAollGlhVlGuLlss.hVplsh...shp.hstsp...................................................................tshslQs.hLDplhPuLlPlhlshhsaaLLpKK...lsshhlllhlhllGllhshlGll ...............................cLTKcD.....lpp.lhhR..S.hhh..QsSa..NYERMQuhGasauhlPslKKlY.....ss....c...--......h...t.pAl++H.L.p.F.FNTpPal.sshlhGlslAMEEpcu....sss..............h.ttuIpulKsuLMGPLAGlGDslFW.hTlhPIh.....uu...luA..uh.A........h..........s.G....sl............lGP...l..l..FhllaNll....phhh+ahhhphGYchGsshl.pch....uuhl....pplocuASILGlhVlGuLlss.a.V.p.lsh.s.l..p...h..s.t.sp..........................................................................................p.s.hslQs.hLDplhPuLlPLhhThhhaaLL+KK...hsshhlIhshhllGIlsphlGlh....................................................................... 0 107 235 331 +2404 PF04873 EIN3 Ethylene insensitive 3 Mifsud W, Moxon S anon Pfam-B_4883 (release 7.6) Family Ethylene insensitive 3 (EIN3) proteins are a family of plant DNA-binding proteins that regulate transcription in response to the gaseous plant hormone ethylene, and are essential for ethylene-mediated responses including the triple response, cell growth inhibition, and accelerated senescence. 21.40 21.40 22.00 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.44 0.70 -5.19 59 958 2009-01-15 18:05:59 2003-04-07 12:59:11 8 17 628 1 88 945 0 256.50 30 39.71 CHANGED EE.lc++...-Qh.l+...+p+php.hhts+Ess....ssupKss.............+spEQhp++phScA........h.thclsNh.GFh.s.sssssK..phssssLp...+Eclcpsh.Gshtlus........pscsLlhusscs..ps...cpospssolphlsDTshG.QtS.SLLpAth.tptpssPppphsLptultsP.............cELhas....suKDppssstsaKcP...........LpstlpHh.p.s.hchccpthcophLQs..KhSpRpSFAhhus.sp.Ec.ssplhsc..ot....shpc....pSPclslpsspc.-sp......................sccEschp.cspshcsss...........................................shshssp+c.csu-.sphs.p....tlsthCpsoQhpus........-ochhhss+pulsQs.h ............................................-Ehcc.t...pph...l+...+tpp.p...hh...pp....................htspKtp..............................psp-phpcphhscs.....................h..hplsph.uhh.s...ppsc....hsssshp...t-.t.p.ph.tshthsp.............p.pphh.....uttps.........ttu...tshpl..l.-Tsh..s......Slh.s.h.....t.pss.s..p....phs......hh....sP.............pE.h.s.....spptt...........shcp............ht..t.lpp...p.s..chcc..hpophLQs..+hot+pohshhu..tp.p..shphhst......s...........s.pp....tus.psshp.s.t.p.c..-sp......GpcEsp..p...spshps.h....................................................s.hshsp.p.c.p..p.ssc.....shhs.p..th..shhC.t...s.t.h..t.t.p.-hphhh..sp.uhspp................................................................................................................................................................................... 0 10 42 60 +2405 PF03317 ELF ELF protein Mifsud W anon Pfam-B_3282 (release 6.5) Family This is a family of hypothetical proteins from cereal crops. 25.00 25.00 25.10 147.50 22.10 22.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.85 0.70 -5.11 2 19 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 18 0 3 15 0 252.30 77 96.73 CHANGED MT.shhR.........hhFSo.St....Fpshhhphp.+h.hI.....shIhF..lCl....Fhh...IhhFlsh..IlLPIlphFu....sSFLITLPPElQDPQALAHLtGLNFYLSLYEQDPtWVsFIQpELNHNTPLEDIPGRL+LFLMEE+hSshR.DlIQEFlALYtR.GshLPlEPYLl-tALRSYLDpI+ATDsFolLQAuYQDLR-pEtGShFFRDsVSHNRDhLEApSutRphlEVEpp.hapcI.+upApLERsEapHsL.lFp.EDh+RthE .MpNh..............VRWLFST.SR....FTsFYhahCIKFPhIY....shILFS.lCl....FhF...lsRFI.h..ILLPIhpLF.u.....uSFLITLPPEIQDPQALAHLAGLNFYLSLYEQDPGWVTFIQNELNHNTPLEDIPGRLKLFLMEEKLSSMRQDVIQEFVALYQRlGPYLPIEPYLVDEALRSYLDHIHATDSFTVLQASYQDLRENEGGSVFFRDAVSHNRDLLEAESSARRCLEVEQRIRWEEIPKSKASLERAEHEHALDLFKSEDLRRELE. 0 0 1 2 +2406 PF02323 ELH Egg-laying hormone precursor Bashton M, Bateman A anon Pfam-B_953 (release 5.2) Family This family consists of egg-laying hormone (ELH) precursor and atrial gland peptides form little and California sea hare. The family also includes ovulation prohormone precursor from great pond snail. This family thus represents a conserved gastropoda ovulation and egg production prohormone. Note that many of the proteins present are further cleaved to give individual peptides [2]. Neuropeptidergic bag cells of the marine mollusk Aplysia californica synthesise an egg-laying hormone (ELH) precursor protein which is cleaved to generate several bioactive peptides including ELH, bag cell peptides (BCP) and acidic peptide (AP) [1]. 19.60 19.60 21.00 20.50 18.90 18.70 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.68 0.70 -5.23 4 25 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 7 0 1 41 0 116.80 44 91.91 CHANGED Mp+Ps.tssssh..llhCLhLSoLCsSSpSsuVHG+sFsspRAVK.pus.lV.SstD......stN.......-t.-csst.h.sss--..psEKpRLphtKRRlRFs+R..c.uth+.h.hpthshSADEN..FDLSN-DGA..QRchRsPRLRFYslRKRAAGs.EpSEspNPETESH...SRRKRSsLT.PSlpSLtpSLESGISKRISINQDLKAIsDMLLsEQhptRcRhLAsLRQRLL-hGKRuSs....VuLhsu-ht.-tRph .........p..............................................................................................................................................................ltK+...t.ptuEt.p.p.cS....Shp.RSh....s.PS......pu.E.th.K.ISIsQDlhs.sp..h.tpht.Rpp..tsLhphhhph............................................................................ 1 1 1 1 +2407 PF00964 Elicitin Elicitin Bateman A anon Sarah Teichmann Domain Elicitins form a novel class of plant necrotic proteins which are secreted by Phytophthora and Pythium fungi, parasites of many economically important crops. These proteins induce leaf necrosis in infected plants and elicit an incompatible hypersensitive-like reaction, leading to the development of a systemic acquired resistance against a range of fungal and bacterial plant pathogens [1]. 25.00 25.00 25.10 26.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.96 0.72 -4.04 87 406 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 40 16 173 325 0 89.40 32 49.01 CHANGED ssCossp.......lsslhsss.h...ssCu.....pss.s.hshhs.s..sossphtshCsuosChsllsslhshs..sD..C.....slshsu..hshpphlsshhstCs ........................sCosop....hsslsslLosssh...spCu.....sDS..G....Yshlss.s.s...h.PTssphphMCuSou.Cpshlsplhuhss..PD..C.....slsh.uGhhhNlhphssshtspCs............. 0 59 101 173 +2408 PF03789 ELK ELK domain Finn RD anon Pfam-B_3136 (release 7.0) Domain This domain is required for the nuclear localisation of these proteins [1]. All of these proteins are members of the Tale/Knox homeodomain family, a subfamily within homeobox Pfam:PF00046. 21.30 21.30 21.40 22.10 20.50 20.50 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.33 0.73 -6.39 0.73 -4.00 28 1671 2012-10-04 14:01:12 2003-04-07 12:59:11 8 7 111 0 131 525 2 22.00 75 11.86 CHANGED ELKcpLh+KYuGaLusL+pEF. .ELKDpLLRKYSGYLSSLKQEFh.. 0 14 79 110 +2409 PF01151 ELO GNS1_SUR4; GNS1/SUR4 family Finn RD, Bateman A, Kerrison ND anon Prosite Family Members of this family are involved in long chain fatty acid elongation systems that produce the 26-carbon precursors for ceramide and sphingolipid synthesis [1]. Predicted to be integral membrane proteins, in eukaryotes they are probably located on the endoplasmic reticulum. Yeast ELO3 (Swiss:P40319) affects plasma membrane H+-ATPase activity, and may act on a glucose-signaling pathway that controls the expression of several genes that are transcriptionally regulated by glucose such as PMA1 [2]. 21.00 21.00 21.50 21.10 20.90 20.70 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.99 0.70 -5.08 181 2001 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 384 0 1301 1987 48 227.10 27 79.46 CHANGED hhhss.h..shhlhshYlh...hV.....................hhGs.chMc....sR..c.......Ph............................pL+thhhlaNlhlslhShhhhhthh........t.hshh..h...........................................uhaphhC...............................................s.tthtts..hssaah..a.lSKhhEhlDTlFhlL+..........KK...ploFLHsYHHsshhhhsahshphh.suu..thhhh.hlNshVHslM.......YhYYhls..uhu...........h+.h..ha..KcalTphQllQFllsh.hhshhshhh....................................................................................................................sC........hh.shhhsh.......hhhhoalhLFhsFah.psY..h+pp....ptp...p .............................................................................ts.h.sh.hhhhhYlh...hl......................hhG..c.hMc..............sR.......p......Pa......................................pL+t.hhhl...aN...hhhslh....Shhhhhthh..................hhthh..............................................shaphhC.................................................s.t.tht.t..hhhh.h..h..h........ahhoK..hhEhhDTl..FhlLR.................KK......ploFLHsYHHsshh..h.h.s.W..........hs.h...p...hh...su.u............t...hhh...h.hlN..hVHslM..........YhYYhlu....uhu...........hp...hhW...Kcal..TthQllQFllsh..hhshhshhh.........................................................................................................................sC...........hhhhhhhhhhhshhhLF.hpFahpsY.hpttt...t........................................................ 1 477 666 1080 +2410 PF02488 EMA Merozoite Antigen Mian N, Bateman A anon Pfam-B_924 (release 5.4) Family This family represents the immunodominant surface antigen of Theileria parasites including equi merozoite antigen-1 (EMA-1) and equi merozoite antigen-2 (EMA-2) [1]. The protein shows variation at a putative glycosylation site, a potential mechanism for host immune response evasion [2]. 36.00 36.00 43.70 94.20 33.00 35.10 hmmbuild --amino -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.62 0.70 -4.90 10 417 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 56 0 2 413 0 240.60 65 91.16 CHANGED K-cKcDLslDVshoSh-NVTlsss...-AsslVhTA+-GaRFKTLKVGDKTLYsVDTSKaTPspsa+LKHs--hah+LsLcsApPlhFKKKGDKEWsEhcaAsYYD-VLFKGKpt.K-LDsSKFsDsuLFoossFGoGKKaTFpssFK.sSKVsF-cK-VGcscpAKaL-VhVYVGuDsKKVVRLDYFYsGDuRlKEVYFcLtD-KWs+lEQs-ANKsLHAMsooWshDYKPlVDKFSPLAVhuuVLIVuuuslYa .t.EEKKDLsL-VsATpsENhTVsso...suNcVVaTAp-GaRF.KTLKVGDKTLYTVDTSKFTPTsAaRLKHs--LaFKLsLppAKPLlFKKKoDK-WVpFsFupYLDEVLWK-KK-.K-LDASKFsDA.uLFsu-AFGTGKVYsFhGsFK.lpKVhFEcK-VGDssKAKYTuVKVYVGoD-KKVVRLDYFYTGDERFKEVYFKLVDsKWK+lEQSEANKDLHAMNsAWPhDYKPlVDKFSPLAV..................... 0 0 1 1 +2411 PF01105 EMP24_GP25L emp24/gp25L/p24 family/GOLD Finn RD, Bateman A, Wuster A anon Pfam-B_803 (release 3.0) Domain Members of this family are implicated in bringing cargo forward from the ER and binding to coat proteins by their cytoplasmic domains. This domain corresponds closely to the beta-strand rich GOLD domain described in [2].\ The GOLD domain is always found combined with lipid- or membrane-association domains [2]. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.10 0.71 -4.57 232 2288 2012-10-03 07:10:23 2003-04-07 12:59:11 19 33 357 5 1454 2124 14 176.60 21 79.83 CHANGED uhphplsss...ppc..Ca....h-pl..pp.sshlthpapl......sss...............................hslshp.........lp.....t.......ps....p.....hlhpp.ptp..pt..t...........pasFsu.p...................................psG.pYph.CFpsp.sphtt...............................ptlphclph.......s..tt..........................htp......hsp..ppclps..lpp............................plpp.......lppplpplpppppah+tREpph+sts-usspRlhhaSlhplhlllshuhhQlha..L+paFp ...........................................................................................................hhh.lss...t....ppc....Ch....hcpl.........tp..ss..hlh..h...p..apl...........pts....................................................t..hslshp..........................lp..................................ss....tp.....................hlhpp..ptp..st....s.........................pas..Fsu..p..............................................psG...p...a.ph.CFpsphsshst...............................ppVp.hclph................u...pt..p..................................................................htp........htp........ppclps..lpt.......................................................................p.l.pp............lpptlpp..lpcp...ppah+t.R..EtphRsts.......EssssR.Vhh....aSlhphhlllshuhhQlhhL+paF................................................................... 0 470 751 1133 +2412 PF04493 Endonuclease_5 Endonuc_V; Endonuclease V Bateman A, Wood V anon Wood V Family Endonuclease V is specific for single-stranded DNA or for duplex DNA that contains uracil or that is damaged by a variety of agents [1]. 20.00 20.00 20.30 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.19 0.70 -4.98 14 1175 2012-10-02 11:25:59 2003-04-07 12:59:11 9 7 1072 10 386 811 48 194.60 45 85.13 CHANGED h.p-lpp+lshpsphpp..ph..chlAGVDlua....cpscuhushVlhshsshcllcptsh.hsclshPYlPGFLuFREh.hhlthlcpLtpc.c......................llllDGpGlhHPR+hGlAoHhGllLshPTIGVAKphLps....shhp.pst.........sshp.lh......suphhGtsht.ohpsspPlalSsGsphslcsslclspphhps.h+lPcssR.ADhho .........................................plt.pp.l.hp.s.ph.s.....t........ss..phluGsDVuFc........psG-lspA.A.hVl......L.c.....a.....P.....o............L.........-.l.V.EhplA...cl..sso..hP.YIPGFLSFREh...PuLLsAh...c....t....L....s....p...+...P.D......................LlhVDGp....GIu............H.P.R...R......h.GlAS..Hh..G..lLl..D..l..PTIGVAKp+LsG....capslssc..............................................Gu.hsP.L.hc...............cGEp.lu.....hV.hR.....o.......+.......s............p.........s..p.....P..........LFlusGHRV.Sl-............oAlshVp+Chps..YRLPEPTRh.ADth.u...................................................... 0 138 250 323 +2413 PF02945 Endonuclease_7 endonuclease_7; Recombination endonuclease VII Finn, RD anon Structural domain Domain \N 23.60 23.60 23.80 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.62 0.72 -4.27 16 283 2012-10-05 18:28:12 2003-04-07 12:59:11 10 5 233 14 41 269 148 84.50 31 34.34 CHANGED tp+htthYsl....ohccht.tlh-tQsG..pCtl..Cp....s..tpsh....slDHDH.....csGh.........VRGlLCssCNp.hlG+..h................tDs.chlpp..hhsYLcs ............................t...........pph...tlhptQ.........su...pCsl..Cpt.........ht...t..tp.ph..................slDHDH..........pTGh...........VRG.lL..CssCNt.sl.Gph...............tDs......phhpphhpYLp.t................................... 0 8 21 40 +2414 PF04231 Endonuclease_1 Endonuclease_I; Endonuclease I Kerrison ND, Finn RD anon COG2356 Family Bacterial periplasmic or secreted endonuclease I (EC:3.1.21.1) E. coli endonuclease I (EndoI) is a sequence independent endonuclease located in the periplasm. It is inhibited by different RNA species. It is thought to normally generate double strand breaks in DNA, except in the presence of high salt concentrations and RNA, when it generates single strand breaks in DNA. Its biological role is unknown [1]. Other family members are known to be extracellular [2]. This family also includes a non-specific, Mg2+ activated ribonuclease precursor (Swiss:Q03091) [3]. 25.00 25.00 26.30 26.00 20.20 20.20 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.59 0.70 -4.51 55 1440 2012-10-05 18:28:12 2003-04-07 12:59:11 8 32 1199 11 288 962 273 210.20 40 70.17 CHANGED pohpphK.pLts...................psY..s...........................sss.hshYsGsshstp...........pu..sppsps..........schphEHV.......hPtpphupppp....................................pthpu.DlHpLhPssGpVNusRuNasFu.hs.................sss......phGpsshtssa.......cscphEP.cp.s+GslARshhYMstRY.s..............lclsctpp...............plhttWs+p.PVssaEppRNptIhp.hQGN+NPFl....spsp ........................................................t..t.hhhhh................................................................ps....css.....ss.hY..CGsphshpsp........thscht...sCGY.p.s.R.KspsR..........usRlEWEHV.......VPA.pFG+pppCWpp......G...............GR+sCtp....s..appM-o.DhHNLpPul.GE.....VNGDRuNa..au.phs.....................sGt......pYGp...Cs...hplcF...............Kp.+tsEPssc.s+GsIARsYFYMpspY..s.....................LpLSc.pps........................pLhpsWs+p.aPVos.WEp..cRs.....p........+Itc.......lQ.G.N.+.NPaVpcsp................ 0 90 164 237 +2415 PF04667 Endosulfine endosulfine; cAMP-regulated phosphoprotein/endosulfine conserved region Waterfield DI, Finn RD anon Pfam-B_4454 (release 7.5) Family Conserved region found in both cAMP-regulated phosphoprotein 19 (ARPP-19) and Alpha/Beta endosulfine. No function has yet been assigned to ARPP-19. Endosulfine is the endogenous ligand for the ATP-dependent potassium (K ATP) channels which occupy a key position in the control of insulin release from the pancreatic beta cell by coupling cell polarity to metabolism. In both cases the region occupies the majority of the protein [1,2]. 29.80 29.80 31.20 30.00 29.70 29.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.95 0.72 -4.02 12 485 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 254 0 291 440 0 86.40 28 63.05 CHANGED slshuphp.pE..............+lhKhYGtLsspKsh........Lp+Khpc.RKYFDSGDYAhtKuts.pspt....s.tt.t..hssssthccshh++thsuSss .....................................................t.......p..-Et................+..lhthY...G.pL...ssKtsh..............................................Ltp+hpc...+KYFDSGDYshuKAtht...s.pphs..........................................ss.................................................... 0 71 139 223 +2416 PF00322 Endothelin endothelin; Endothelin family Finn RD anon Prosite Repeat \N 20.50 20.50 25.30 20.50 19.70 20.30 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.73 0.72 -4.71 8 186 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 53 9 79 193 0 29.50 68 19.48 CHANGED Rsc.hCSCsshpDKEClYFCHLDIIWhNTss .....Rs+.RCSCsohhDKECVYFCHLDIIWlNTPp.... 0 3 9 26 +2417 PF00555 Endotoxin_M endotoxin; delta endotoxin Bateman A, de Maagd R anon Arne Eloffson Domain This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding. 25.00 25.00 28.80 27.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.47 0.71 -4.49 36 401 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 48 8 1 429 0 200.00 30 21.33 CHANGED TpspLTRElYTDPlstss................sshs....oFsslEsshlRsPHLh-aLsplplaTst.pt........hpaWuGpplphphots....shlppshaGsps..stsstsls....ssps....lYRshopsh..h........ssshtulptspFhhssspttthssshhp......sss..shDohspLPspssp.........s.hpsYSHpLSalphhttphsp.t........hPsauWTH+Ssc .........................TsoQLTRElYTsPlhtshst................psh......ohpslEsshl..RsPHLhDhLsp.lsIYT.st.pps............htaWuGHplthp.sss....sthshshaGshss..stssppls....hsps....lYRThSss..hh...th...........sp.ltslstscF..th....sss..thssshYt............tss....shDShsclP.Pp.s.ss.........Ps...h...puaSHRLSHlsh.hpts...tss.............sssF......uWTH+Ss-................................................... 0 0 0 0 +2418 PF03944 Endotoxin_C endotoxin_C; delta endotoxin Bateman A, de Maagd R anon Arne Eloffson Domain This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding. 21.00 21.00 21.40 21.40 20.70 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.98 0.71 -4.07 80 512 2012-10-03 19:46:52 2003-04-07 12:59:11 9 13 62 9 5 538 2 134.80 32 15.34 CHANGED ITQIPsVKuhpl.t.s...susVlcGPG.aTGGDll.hpssssh......hphpls......sshsppY.+lRlRYAS.ssshphtlphssssh........shshssThss.....st.....hp..Yp.sFphhshss......shsh.sssp.tplpl......thpshsss..splhlD+IEFI..Pls ................ITQIPh.lKu.tl..s...s.....ssoVlpG.......PG.aTGGDllphpsssst............uplpls.....p.sshup+Y.RlRlR...YA.S...s.ss.....hphplshsspshs........phshssThs..s.....sss........lp....hs.s..Fphhshss........shsh..ssss....hsl.......shpshsus...splhID+IEFlPsp................................................................... 0 2 4 4 +2419 PF03945 Endotoxin_N endotoxin_N; delta endotoxin, N-terminal domain Bateman A, de Maagd R anon Arne Eloffson Domain This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.47 0.70 -4.96 38 632 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 108 9 79 668 0 213.70 28 25.79 CHANGED lpsulslsuhlLush.h..PhuGhlhuhht.LlshlWPstsss....WchFlpplEpLIc.Q+Ischs+spAlucLpGLsssac.....hYhpuLcpWcpsssssps......pptlcppFpsh-sthpsulPtFslp..sh....p...lsLLslYAQAANLHLhLLRDuslaGp.cWGhspssls.......paYsc.hchhpcYosHClpaYNsGLspL+.........sos...hpsWhcYNpFRREhTLsVLDlVALFPsYDsRt ..................................................................................................................h...shslsthlLu.h..h........P.h...sG.h.l...h.s..ht.llshlW.....st......tsss..........Wc.thhp.plEpl...Is....Q+Ipp.hsp....spshucL....p.G.Lpshhp....................Yt.pshcp.W..p....ss.ssss......................tptlpppapshpst.hhst.l...P....Fthp....sa.............p.........l.LL.slYsQAANLHL.lLRDs...laGp..cW...G...hs....sssls.............................shhs.c........hpphhtpY.osas..lp.hYssGLpplt...............................tss....tpWhpa........s...p...aR+phTLtVLDllul.F.s.YD...t.................................................................................... 0 46 71 76 +2420 PF03272 Enhancin Viral enhancin protein Mifsud W anon Pfam-B_4236 (release 6.5) Family \N 19.50 19.50 19.60 19.60 19.40 19.40 hmmbuild -o /dev/null HMM SEED 775 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.10 0.70 -6.62 14 631 2012-10-03 04:41:15 2003-04-07 12:59:11 8 24 223 0 27 514 0 333.10 25 62.04 CHANGED hs.hsltlPslshPsWlpsss.shhulcHt+pPlshlltAushl+lRpsps........lTlclLNsscpTEcolss..sss.hphssptsSVsFVcpsh.sstssthc..VpaplsuphpsLPhYphGpsp..t.Fhsphs..spsssaual-hchlplLVP.sD+stlp...ttshslspLtsaYssIlpaYssLsGh........sptNh.s++YFsKADt.uGsGuAYYuptahApSssolp.haLpsossNWhsLHEIGHuY-htFs.spthht..EVWsNlhsDhaQYthhs.sE+.ppuhlYpsGp+spl-psIhshlsssh.sassWshhp+Lhhho.lh.ppuGcchhpphppphRph.pshshs.ppaplhshlssh.....sphDls.hhpLh.th..s.........................thp.tshhhtppshYPlppllssa-hht.....l+....hposasLVsssphht.....sslolphpIc-ss.......QIhGphhtlhcGschlhpsslssssphhhs.tlssGlYplhhPpGp.s+RYpls..............spYllVcsss.p.........hplpap.hstSslhsc.phplhGhsD.tlsAshhlshtpcplslplhsssPpstFsNphYaplsIcsssshph..hsh.t.sss..tpshhshch..ssshplhlacpcsspph.....ahssh.ss.psssahloppGlpptss.....ssttpplhs+IsphstaLsscsshLh.hpspl+DsIYLuhphl.sppp..ptLhppascalP....ph.ss.......pshshshhGhsphshlplphshshppsplhhtss.s.....t..hp.....Yhslph ...................................................................tphs.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................phphpGhuD.hphApls.h.spppc.c.hplslp..s..s..pPHsYFss..pYuoIpVhsps..Gphlap+chpG..s.ppp.ts.p.pp.hshp....sshplp.....laH.....sEs.t....Rlph.......shtsph.ppp.....Kpt..h.ap..lTppG.Lcp.p........................................................................................................................................................tp............................................................................................. 0 0 8 14 +2421 PF03386 ENOD93 Early nodulin 93 ENOD93 protein Mifsud W anon Pfam-B_2931 (release 6.6) Family \N 25.00 25.00 33.50 26.60 19.90 19.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.65 0.72 -4.27 8 99 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 28 0 48 83 0 75.50 65 70.20 CHANGED QKhAhAKpCS+EushAGsKAAAVAsVAoAlPTLASVRMlPWAKANL.N.TAQALIISoAAGhAYFIsADKTILusARKpS .........Q+lAhAKcCS+EuslAGsKAAAVAolASAlPT.........LASVRMLPWAKANl.N.TuQALIIoTsAGhAYFIsADKpILuhAR+pS............ 0 3 24 36 +2422 PF00113 Enolase_C enolase; Enolase, C-terminal TIM barrel domain Sonnhammer ELL anon Prosite Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.72 0.70 -5.45 11 7258 2012-10-02 01:07:48 2003-04-07 12:59:11 17 14 5866 108 1661 5284 3499 256.00 52 66.97 CHANGED shlLPVPhhNVlNGGSHAGNcLAhQEFMIhPoGAsSFsEAhRhGuEVYHsLKullKtKaG.sAsNVGDEGGFAPNIpoNcEAL-LIs-AIpKAGYpG..KVpIuMDVAuSEFY..c-sKYDLDFKs.psc.S+hlou-pLsDhYcElspcYPIVSIEDPFDpDDW-uWsphTtphGpclQIVGDDLTVTNPKRlpcAIEcKssNuLLLKVNQIGolTESlpAschApcAGWGVMVSHRSGETEDoFIADLsVGLssGQIKTGAPCRSERLAKYNQLLRIEEELGupAhYAGcsFp+sh ...................................................................................................................p.hhLPVP..hhNlING.GsHA.s.N.s.l.s.h.Q.E..FM.Ih.Pl..GA...p.oFpEAlRh..GuElaH.s.L.Kp.lL..........K..........p..............+G................h.........sT..u............VGD...E................GG................FAP................s.l.........t.........s.........s...............c.-...............A..L.phIh.c..AIct...AG..YpsG............pclhl.......uh....DsAu.S.E....F..Y.............c...s.....G.....p.....Y.......s.h.....p.......t....c...s.........................................................t..hh........o................upchscahpp.Ls.p....c.Y....P....I.lS.I.EDsh.......sE....sD..W-GWthhT......pp.l...G.......p...+...lQ................lVGDDLF....V.TN.s.c.hL....t..c..G.I.c..p...t.....hu.Nul...L.....lKlN..............Q...IGoLTETlpAlchA.+c..sG...a.sshlSH.R....SG..E..T..EDo..hIA..Dl.AVuhssG...QI..KT.Gu..uRo-RlAKYNQ.Ll...R.IE-t.L...u..p........A.........a...G.t....h.......................................................................................................................... 0 546 1007 1367 +2423 PF03952 Enolase_N enolase_N; Enolase, N-terminal domain Sonnhammer ELL anon Prosite Domain \N 21.10 21.10 21.40 21.70 20.90 21.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.46 0.71 -4.22 190 6649 2012-10-02 11:54:41 2003-04-07 12:59:11 11 15 5507 108 1614 4840 2520 125.70 58 30.79 CHANGED pIpplpAREILDSRGNPTVEV-VhhpsG.hG......RAuVPSGASTGppEAlELRDuD.....p+ahGKGVhcA......VpNVNshIussL.l..G.hc....st-QptlDphhl.cLDGTp.....NK..u+LGANAILuVSlAsA+AAAsshs.lPLYcYl .............................................................IhclhuREllDSRGNPTVEs-Vhh...-....s.G....h..hG....................RAsVPSG..AS...T....G...p..........+EA.l...E..LRDGD....K..............sRYhGK.GVhKA..............VpNVNshI...Ass.l..l....G..hD...............sp-Q..ttID.phMI.c.LD...G..Tt........NK.....u+LGANAILGVS.L.....AlA+AAA.sttslPLYcYl....................................... 0 524 978 1328 +2424 PF03735 ENT ENT domain Bateman A, Hughes-Davies L anon Bateman A Family This presumed domain is named after Emsy N Terminus (ENT). Emsy is a protein that is amplified in breast cancer and interacts with BRCA2. The N terminus of this protein is found to be similar to other vertebrate and plant proteins of unknown function. This domain has a completely conserved histidine residue that may be functionally important. 20.70 20.70 20.70 22.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.12 0.72 -4.25 20 289 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 104 5 169 275 23 69.90 40 10.89 CHANGED hcsp..l+pLEp-AYuuVL+AF+AQus.lSh-Kcplls-LR..................KELplSs--HRphlp+lssD-plpplRctppusss ..........ct.l+pLEhEAYsuVlpAh+AQu-..L.ohEKcsLlsELR..................+pLpI.SsEcHRt.lp+sssD-hlppItc.hts...s........... 1 42 89 127 +2425 PF01375 Enterotoxin_a Enterotoxin_A; Heat-labile enterotoxin alpha chain Bateman A anon SCOP Domain \N 19.60 19.60 19.90 20.10 19.50 19.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.63 0.70 -5.09 6 164 2012-10-01 23:25:29 2003-04-07 12:59:11 12 6 82 29 50 164 0 208.20 43 47.94 CHANGED hsplhhFFI...hlu....ANs..haRADSRoPDEl+cSGGLhPRGpsEsa-RGTslNINLYDHARGTsTG.sRasDGYVSTohoLRpAHLhGQshLuuascYYIYVlAsAPNhFDVNGVLGs.YSPaPsEsEsuALGGIPaSQIhGWYRVs....FGsl-st.hcRNR-YRc-hacsLssAPupDGYpLAGFPssaPAWcEtPWtpa.AP.uCssss+ppssssCsptsspLupttLt-app+lKRplshh..hS..ps-hhusssh+DEL .....................h.......................t...lYRu.D.s.RsP--I+puG..GhhPR.......Gpschh.p.cs.Tph...sh.......sLa-..HspGs..ps..........GhsR...h.s.D.GYVSTo......ho...lc.s.A..+..h.hu.p..s..hlu...s...hs......s......h..YIYsI...A.s.A..PN.MhsV............N...c....s...L......Gs..Y........SP..aPt.EpEhuALGGIPaoQIhGWYRVp...............................aG.sl-p.....t..hp+NctYcs..ch.a.pshs.h.Asuts...tLAG.FPs...pa..AWc.-cPWhpa..us.ss.t.p.t....c.s...sssCsttppphs.h.h.phpphh+.p.................................................................................................. 0 3 37 40 +2426 PF01376 Enterotoxin_b Enterotoxin_B; Heat-labile enterotoxin beta chain Bateman A anon SCOP Domain \N 25.00 25.00 80.60 80.50 21.30 19.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.57 0.72 -4.16 5 107 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 28 276 1 108 0 95.80 92 83.02 CHANGED TPQNITDLCAEYHNTQIYTLNDKILSYTESLAGKREMAIITFKNGATFQVEVPGSQHIDSQKKAIERMKDTLRIAYLTEAKVEKLCVWNNKTPHAIAAISMA .TPQNITDLCAEYHNTQIaTLNDKIFSYTESLAGKREMAIITFKNGATFQVEVPGSQHIDSQKKAIERMKDTLRIAYLTEAKVEKLCVWNNKTPpuIAAISMt. 0 1 1 1 +2427 PF02048 Enterotoxin_ST Enterotoxin_HS; Heat-stable enterotoxin ST Mian N, Bateman A anon IPR001489 Family This family consists of the heat stable enterotoxin ST from Escherichia coli. ST is a small peptide of 18 or 19 amino acid residues produced by enterotoxigenic E. coli and is one of the causes of acute diarrhoea in infants and travellers in developing countries. ST triggers a biological response by binding to a membrane-associated guanylyl cyclase C which is located on intestinal epithelial cell membranes [1]. 21.10 21.10 23.10 22.70 19.90 19.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.23 0.72 -3.71 6 34 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 13 3 0 35 0 45.90 57 76.04 CHANGED QETsShphuDu.SssIssEl.cKtCDs.psss....Es.sDW..CCElCCNPACAGC ..................................KtpIsh.s..ccCsh.Kpss....EshNsh..CCElCCNPAChGC 0 0 0 0 +2428 PF01417 ENTH ENTH domain Bateman A anon [1] Domain The ENTH (Epsin N-terminal homology) domain is found in proteins involved in endocytosis and cytoskeletal machinery. The function of the ENTH domain is unknown. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.49 0.71 -4.26 72 1127 2012-10-02 18:21:09 2003-04-07 12:59:11 15 25 307 14 691 1760 5 121.20 40 23.47 CHANGED hophphcVR-ATss-.sW.GP..osshhp-IuchTas..pph.clhshlh+Rl.........................................................s...cps+pWRplhK.........uLpLL-YLl+sGSE.....pslpph.+pplhhlpsL.ppFpa.h-p.pG+DpGhs............lRp+ucplhsLLpDs-pL ................................................................ophphKVR-ATs.sD.PW..GP.ou..s.l.M.s....EI.....A.....c.h...T........a......s.....h.....ts........a.t..E......I....Msh...la...+...RL............................................................p..-puKsWR+lYK..............uLpLL-YLl+sGSE........+V.s.p.ps...+c..s...l...a....h....I..psL....c..........sF....pa.lDc..pG+DQGhN............VRp+uKpllsLLpD--+L........................................................ 0 203 350 539 +2429 PF00429 TLV_coat ENV_polyprotein; ENV polyprotein (coat polyprotein) Finn RD anon Pfam-B_145 (release 1.0) Family \N 22.50 22.50 22.60 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 561 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.24 0.70 -13.00 0.70 -6.06 16 3200 2009-01-15 18:05:59 2003-04-07 12:59:11 14 13 221 9 70 2514 0 268.00 25 83.52 CHANGED llLhlLhh.........ht.ssP+pshslh.plhs.................phhP.h......shCsLuut..sshshsspss.sss..s.spsuhhss...............p..Csps.ph......stttstshYlCPspspsh..stpphGh.pssYs..sthtCppsG.....psYWpss.ohsh.olppsto.....................ttspssptpstsssLhlpFopsGppu....pshsWuhR..lahS.GtsPhhhhslphhhppl........p.lltpp.tPPsp.pPht.Phss......................................t.ssssphlsLlpusa.sLNhTsPshsp-CWLCLstu..PPaatulus.sshsspTsss....C.ssspatLp.spsos.shhhu.......sl.....PhsatshsN............ps...tttss..hLssssGohasCssshT.ChpTsl.NhooshClLspLhPclohhss-....l.s..psssRh+Rt.slulTLs.LLsGL...GIuuuluTGsTuL.Vu.....sppappLpttlcsDlptLppoIssLccsLsSLuEVVLQNRRGLDLLFhcpG..GLCtALpEcCCFYAs+oGlVRDphtKLpE+LppRp+lhpupthW.pGhhshSPWhpsLlpohhGPLllLLLlLhFGPCILN....+LspFl+ ...........................................................................................................................................................h......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ph.thtt...tpp...................................................................................................................................................................................................................................................................... 0 14 16 49 +2430 PF00811 Ependymin Ependymin Bateman A anon Pfam-B_1391 (release 2.1) Family \N 25.00 25.00 26.20 25.30 24.20 24.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.67 0.71 -4.45 20 196 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 88 0 108 182 0 130.50 28 57.16 CHANGED lEcssphNpopph.DlLlhFcE..GVhY-IDt+NcoC+KpsLppph+Ph-lPssATa.sEhhlGussh.tpGLcVchWsGc..lP-p+........GpYohpoTphGClPVops.Yps-ps...s.LhhSFa-lpstlcDP.pVFsPPu.hC ................h.........stph.-hlhhac-..GlhYpIs.pscpCpKpsL..p.p..sa...cPhclPpsAoa.....sphhlGus....tpulhVppWssc...hsstp........utY...s.s.p..s.s..s....ClPVpps...ahs....sps...........shlh..h..p..Fhslph..GIpDP.sVFsPPs.hC................................... 0 61 68 92 +2431 PF01404 Ephrin_lbd EPH_lbd; Ephrin receptor ligand binding domain Bateman A anon [1] Domain The Eph receptors, which bind to ephrins Pfam:PF00812 are a large family of receptor tyrosine kinases. This family represents the amino terminal domain which binds the ephrin ligand [1]. 19.80 19.80 19.80 20.60 18.70 19.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.49 0.71 -4.48 33 1131 2012-10-03 19:46:52 2003-04-07 12:59:11 14 44 89 57 480 825 1 167.20 52 19.47 CHANGED EssLLDopsspu-L.GWhsaP.s.G.........W-EloshD.-ptpslRTYQVC..sV....hcssQNNWLRTsaIpR.psApRlaVEl+FTlRDCsShPusss..oCKETFNLYYhEoDp............sss...sshpcstatKlDTIAADEu.hsphc...........hssps.h+lNTElRslGP.....Lo+.+GFYLAFQDhGAClALlSVRVaYK+C ...............................................sLhDopt.t.u-L......uWhs.P.p...G...................WEElSthD.E.phssIRTYQVC..NV.......h-.ssQNNWLRT...saI.sR....cuA.p......R.......lalEl+FT...l.R.DCsS..lP..ss...h..G.....o....CKE..TFNLYYhEoDp...............s.ts....sthpEs.a...........hK..l....DTIAADES.FophD....................hGsRh.hKlN.TElRslGP................Lo+.+G..FYLAFQ.DhGAClAL.lSVRVaYKKC......................................... 0 50 89 238 +2432 PF01370 Epimerase NAD dependent epimerase/dehydratase family Bateman A anon Pfam-B_93 (release 3.0) Family This family of proteins utilise NAD as a cofactor. The proteins in this family use nucleotide-sugar substrates for a variety of chemical reactions. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.13 0.70 -11.31 0.70 -4.93 96 38903 2012-10-10 17:06:42 2003-04-07 12:59:11 16 146 5462 329 11522 75709 43322 233.20 20 69.25 CHANGED lLlTGssGhlGutlschLhppuhc.........shshtppts..........................h..sDls.cpsslpchhppt...psDt....VhphAAtst...lttsh.ppstthhcsN....hhsshpll-uhp.........phsh.........h+hlhsoS.uplYGps..tt.....................h..sEsssh...tPhsP....YuhuKhhupthstsh......pcp..a.shpssshhhhNhhGPt........t.tthss+hlsthlpphh..........pup..................................lhhhG.......sGss.tRDalascDhscAhhhhlppsp.............spsaNlG ........................................................................................................................................lLlT.G.u..s..G...a.......l...G......u..p...l....s......p..t....L......h..p......p....G.hp......lh...................shs..t.s..t..p..t..t.......................................................................th.p.h..h.......s...D.....l......p......c.........t.......s.........t......l.......p.....p....h....h...p...pt........................D..s.......................V....h.......H..........h.........A........u......................s............h..........s...........t...........t..........s..........h........p..............p........s..........t.........t.........h.......h.....p.....s....N.......................l...h....u....o....h.....s........l.....l........c.....s....sp............................................................p.t.ss..............t+..h..l...a...s....S.......o.....s........s......l........Y...Gps.........tp.h....................s................................................................h.....s.E...s.psh............t.P.h.ss.....................................Y..u...h.....o....K..........h....h....s..E.......p....h....s...p...s...h...............................t...c...p......h......s.........h.....p........h.......s....h....h....R....h....h...s...s....h....Gst.......................t......s...p...h...l.....s....h...h...h...p...t..h...h.......................................................t.s.p....................................................................hh.l.h.G............sG.....p....t.....h....R.....s..........a....l......a..l.pD...h....s...pu.h..h....h....h..h..p.p.t....................t.hsh......................................................................................................................................................................... 0 3673 7370 9810 +2433 PF02350 Epimerase_2 UDP-N-acetylglucosamine 2-epimerase Bashton M, Bateman A anon Pfam-B_888 (release 5.2) & Pfam-B_4862 (Release 7.5) Family This family consists of UDP-N-acetylglucosamine 2-epimerases EC:5.1.3.14 this enzyme catalyses the production of UDP-ManNAc from UDP-GlcNAc. Note that some of the enzymes is this family are bifunctional such as Swiss:O35826 and Swiss:Q9Z0P6 in this instance Pfam matches only the N-terminal half of the protein suggesting that the additional C-terminal part (when compared to mono-functional members of this family) is responsible for the UPD-N-acetylmannosamine kinase activity of these enzymes. This hypothesis is further supported by the assumption that the C-terminal part of Swiss:O35826 is the kinase domain [3]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.11 0.70 -5.68 187 4483 2012-10-03 16:42:30 2003-04-07 12:59:11 14 13 3029 20 922 3343 2891 333.90 34 89.01 CHANGED ptlppp..t.....hchpllsTGQHh.pchspphhc............tapl..ps-hp....Ls..pss.tshup.huphlhthpcllpc...pP.DhVl...VhGDssoslAuAlAAhhhpIP.luHlEAG.Rs.........................-hp.s.hPEEhNRphss+luclHFssTppupppLlpEG....h..........ssp.....plassGsssl...Dsl......hhstppltt.........sl..p.............hppphlLlThHRtpshsts..hpplhpslpsls...pp..ph..llashH.sPpspchl.............phlpph.s.......plplhpPLsahcalpLhppuphl............loDSG.GlpcEAss..ls.....hPslsl..R..sER.EuhttGs...shl.l.u..s.pppIlpulpphlts.....tthpthsp....PYG.-GpA.uc+Ilchlt ...............................................................t..htpp..th-thlssTuQH..ccMh-p...lhp............hFpI....p......sD.hs.Ls.......pss..psLs.ch.......Tup....hlttlc.pllpc............cP.DhVL...VaGDTs..oohAsuLAAh.h.............ppIP..VuHlE...AGLRoh........................-hh..s.h...PEEhNRplsspluc...laFu...PTcpu+.pNLlp.E.uh.....................spp..........pIalT.GN.ss.l.DuL..............hhs.h.pp..h.....tt.....................plhtp.....................tsc+hlL.l..T...s....HRc..E.....N.h..s..p.s.....hc.p...lhpA.l.p.pls...pp....cht..llaPhH.....sP.p.s.pch.l.............pc.h.ltph..p.......plpLl-.P.l..sa.h.c.Fh...tLh.....p..cu.h.hl.............l...T.DS.....G..G..l.Q.E.EA..Ps..LG.....pPsLll.........R....s.s..TE.RPE....u......l...p...s..G.T......stL......lG....sc....tppIh.ptsppLlpc...........ppth.pp.hsp..stNPY...G..D.G..p.A.upRIlchl......................................................................................................... 0 306 604 780 +2434 PF00758 EPO_TPO Erythropoietin/thrombopoietin Bateman A anon Pfam-B_990 (release 2.1) Domain \N 25.00 25.00 25.90 25.90 23.50 19.90 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.96 0.71 -4.61 6 114 2012-10-02 01:28:15 2003-04-07 12:59:11 13 1 54 9 46 138 0 150.00 39 69.31 CHANGED sRLICDSRVLERYlLEA+EAENsTMGCuEussLuENITVPDTKVNFatWK+.EspppAlEVWQGLALLSEAlLpuQAlLANSSQssEsLQLHsDKAlSGLRSLToLLRsLGsQpEthSsP-sss..sAPLRslssDslsKLFRVYuNFLRGKLpLYTGEsCRRGDR .....t.lCDsRVLp+alhEA+-AEsthhsCsEsssLspslsVPsTclsahtWKphphp...ppA.EVhpGLsLLtEAlhtupu.LusostssthhQLHsD.pu...lpsLtSLpuLL+sLss..ptt...hos.....scs.....ss..hhshp.phhhsshpcLhplhsshLpuKhtL.sst..sspptt................................. 0 2 4 11 +2435 PF00275 EPSP_synthase EPSP_syntase; EPSP synthase (3-phosphoshikimate 1-carboxyvinyltransferase) Finn RD anon Prosite Family \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.42 0.70 -6.05 23 10932 2012-10-02 15:27:11 2003-04-07 12:59:11 15 37 4830 176 2360 7857 8210 390.10 27 90.26 CHANGED lpsssplsGpV.plPGsKShosRslllAALusu.....oplpNlLcucDspphlpsL+pLG..hhphppppsshhspG.htphpss....t.hslhhGNuGTshR.Lsuhhuh.ts........lsLsG-splppRPlspllcuL+phGAcIphppstshsPltlps...hphtslclpssluSthlTuhLhhAshhAcu..sshlps..hsscPplshThphlpphGsplcs.sstp...hhlcGspp...hsu.pahV.uDtSuAuaFlsAAAlssG.pVtlpslshsslp.u...hlt.hLcchG....upls.......hs-stslshtt.........hcshsl..slpshsD.uhslAhhAtFAp.......usoplcshtplRhKEo-RlhuhusELp+lGucscEt............DGh.......psshh.....LpsuclpoatDHRhAMuhuLsu..sps.........tshIccspshs+oaPsah-pL ......................................................................h..s.p.LpGpl..plsG.tp.ushsll.h.A.u.L..h..s.sp....spl.p.s.l...p..h........pDlt.shhph..lp...tLG..............s........p..l....................t...................................t..............s..............s...........s........h.............h.....h........s...........s...s..........t..........h.......t.....s.........h.p............ss...........ph.............s...h...p....h...tsS...hhshts.Lhuthut..s........................plsl.sG...ss.s........l.u.....p....RP.........lchh..lcuL.c.t.h..G.A.p..l.......p........h......p......p......s.........h...........h.ss.hp...........................tpL.p..usc........lh.h...s...h.....s...S........st..sTtslh.h...Au..hh..AcG..............pT.hlp.....s.....hspcPp..l.sc.htphLpp.hG....u..c....l.........p.....s.................s...s..sp.......................lp.l..........p....G......spp.............Lt.u.......t.p...a..pV.s..Dt.p....Auha..l..s.A...........A.........Al........s....s.............G...c..............lhl....p.s.s.......hh.......p.......php............hlh.hLcch.G............splp.......................h.t.c.........s..h..lt.hptt....................hc.h.t..s.l.......sl.p...s.....hs...cs.uhsh..shtu.hst................................hh.shhc.Gsu..lp.p.h.-.phhh.hss..ELp..+h..G.A.c..l.p...ps.................cs.hh...................................................hh.sst....................L..p..G.A..p.V....t..u.h..DhRhuhuh.slAuL...hu.p.G...........s.plpch.tpls+uYsshhccL............................................................................................................................................ 1 809 1563 2026 +2436 PF03736 EPTP EPTP domain Staub E, Mistry J anon Staub E Repeat Mutations in the LGI/Epitempin gene can result in a special form of epilepsy, autosomal dominant lateral temporal epilepsy. The Epitempin protein contains a large repeat in its C terminal section. The architecture and structural features of this repeat make it a likely member 7-bladed beta-propeller fold [2]. 20.60 20.60 20.80 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.87 0.72 -4.21 71 1712 2009-01-15 18:05:59 2003-04-07 12:59:11 12 64 48 0 784 1271 21 45.40 22 22.68 CHANGED ppFhphQsls....psshshcshsl.sschalllup.hu..top......lhcWss ............pFhpaQsls....hpsshshchapl..s......s......c.............alslus.hs.......top.............lacWs........................ 0 61 129 324 +2437 PF01133 ER Enhancer of rudimentary Finn RD, Bateman A anon Prosite Family Enhancer of rudimentary is a protein of unknown function that is highly conserved in plants and animals. This protein is found to be an enhancer of the rudimentary gene Swiss:P05990. 25.00 25.00 25.70 34.10 20.90 24.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.11 0.72 -4.32 13 234 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 144 7 154 192 0 97.70 53 65.02 CHANGED MSHTILLlQso.p+h-oRTasDYESlspshEGlC+lYE-+LKchNPssssITYDISQLFcFIDsLADLSsLVacpsTtoYhPasKpWIKp+lYthL+ppAt..p .................oHTILLlQPs.p+.-sRTYsDYESVs-CME.GVCKhYEE+LK+..h.....N...P......sssoITYDISQLF-FIDsLsDLSCLVYctsTp............o.YtPaNK-WIKEKIYhlL++pAt..s................... 0 44 65 117 +2438 PF00810 ER_lumen_recept ER lumen protein retaining receptor Bateman A anon Pfam-B_1387 (release 2.1) Family \N 21.50 21.50 21.50 21.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.90 0.71 -3.71 61 726 2012-10-03 12:15:12 2003-04-07 12:59:11 13 9 338 0 498 703 22 138.40 43 59.98 CHANGED SCsGlSh......KoQhLYslVFssRYlDL..ap.sah...ShYN......slhK.....lhalsoohhslYlh..hhpa+t...TY-cph..DoFplp.aLlssshl...........................................LAllhs.....ppa......phh-lhWoFSlaLEuVAILPQLhhlp.+o.scsEslTu+YlhsLGhYRsLYlhNWIaRY ...........................SssG.lShKoQ.LaulVa.......s.s.RY.LDL...ap.sah..........ShYNo..hhK..................lhalssohh.slYhh...hhca+s.....TYDtsp.....DoF+.........l.........t.a.........LllPshl...........................................L.uhlhs.........pca..........................shhEl..lWoFSlaLEuVAILPQLhhlp..+T..u........c....sEslTuHYl.FsLGhYRsLYlhNWlaR...................................... 0 174 285 401 +2439 PF02732 ERCC4 ERCC4 domain LOAD anon LOAD Domain This domain is a family of nucleases. The family includes EME1 which is an essential component of a Holliday junction resolvase [2-3]. EME1 interacts with MUS81 to form a DNA structure-specific endonuclease. 20.70 20.70 20.80 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -11.07 0.71 -4.43 159 1301 2012-10-11 20:44:43 2003-04-07 12:59:11 10 39 620 18 812 1250 227 143.70 22 21.21 CHANGED lDsRE..hp.st............l.phlpphu..h.phphpsLt..........lGDalh.......ts.......................................phhlERKs.hs..Dhss.....Slh...c..sR..hhpQh......pcLp..p............thpp....s.hlllEsptthhtt.....................hp.............pss.........................sl.....................psslsplplp.h.sltlhhops.hp-ouphltph ............................................................................lD.RE...hp.st..............l.phlpphs......l..phthp.p.L.s..............l.GDalh......ss.................................................................................................phhVERKs.ls.......DLhs.................Slp.........s......uR.hhpQh....................tclp....p.................thpp....................s.hlllE...tpt.st.h.p..............................................................p...................................................sl.......pssLsplplth..thtlhhops.pposphlh........................................................................................................... 1 273 458 660 +2440 PF04404 ERF ERF superfamily Aravind L anon Aravind L Family The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to ERF [1]. 25.00 25.00 25.00 25.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.21 0.71 -4.70 43 704 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 504 0 89 596 339 157.60 21 65.36 CHANGED hhpshsplpppl.t.pKs......sphhph.....pYt..shc-I....lcsl+sllscpGl.hh.hsp.............................spthlplpshlhc.tsu.t-phps.h.tp.......s.sKs....ssQssGouloYA+RYsLsuhFuIsscpDcDs.....................ttptppppsppppssppppp..ttchhpppsp ......................................tthtplptp.l..s..t...pKs...............sph.hpY................pYt..sh..psI....lcslcsll..p..pps..Lhls.pt.sh............................tthhhplpss..hhs...ssG....pphss.hhsp.....................ptsKs....sssQss.GuuhoYu+RYsLsuhhs.Iss.-..c...DsDs....................ttpptpp.tspp.t.p........................................................................................................ 0 29 53 69 +2441 PF03463 eRF1_1 eRF1 domain 1 Bateman A anon Bateman A Domain The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known [1]. The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site [1]. This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification. 21.20 21.20 21.30 22.40 21.00 21.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.36 0.71 -4.58 119 1096 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 518 26 713 1080 196 131.10 28 32.75 CHANGED h...hppshc.hchcthlchl.cppschhpLhsLlhssDp.hupsh+pl.pc...sssh.....puppsp.....................ppV.sslshtlp+lca....sstsuh....................................lhhsGtlhpt.....sphsphcolslEss..cslslh+.........phDshhhpchhcsh.p .........................................................pcslc..hch.ct.hlphl.psp.s.....sh..hphhsLlhPscp.h..up..sh+hl.pc................ssshpucssp...............ppVhs.slo.shp+l.ch..............sP.suh........................................................lhhs......Gpll.....sc..............c..s+.tp..hhol-hE..P.....+...s..lshph..................hhhDsthh.schhpth............................................ 1 236 416 593 +2442 PF03464 eRF1_2 eRF1 domain 2 Bateman A anon Bateman A Domain The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known [1]. The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site [1]. This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification. 24.10 24.10 24.10 24.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.50 0.71 -3.83 35 1105 2012-10-02 16:33:16 2003-04-07 12:59:11 10 12 531 23 724 1082 171 129.50 31 32.44 CHANGED phuhlllDcstAplullpupshcllpchssslPtK+ttGuQSth+at..htcttcpah+cVuEtsspthh.......ppsplculllAGPGhhKsphhppthhctchtp........thlh.llDlShuuctGlpEslcp..uschLsc ............................huhllh-t.stAhhuh.l..pu..s......s.pcllp+hss.slP+K+t..pGG...pSsh+at.......htcthcpahcclu-thsph.h..................shsslpullLAGsu.hKs-hh..pt......hhctchps...........................phlh.lVcsSh..Gscp.Ghppslch..sschLp................................................ 0 236 422 602 +2443 PF03465 eRF1_3 eRF1 domain 3 Bateman A anon Bateman A Domain The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known [1]. The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site [1]. This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification. 21.20 21.20 21.20 22.20 21.10 20.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.31 0.71 -3.81 34 1128 2012-10-10 14:40:03 2003-04-07 12:59:11 10 12 530 29 741 1099 188 117.00 33 29.44 CHANGED hhpEpcllscFhcplup..DsshssYGhc-lhcAlEhGAl-sLLlh-sLhppc.h..+p............................................h-pLscpscphGuplhllSscpppGpQl.cuFGGIuulLRapl ..................................hpEp+llscaacclsp..Dsu.+ssYG..............hc-sh+AlEh..GAV-....s....Lllh-sL.psc.h.h+pp.............................................................................h-hLsc.......p.h+ph.G.u.plcll....osc..pppGpQh.cuhGGIuulLRYtl................................... 0 245 432 616 +2444 PF03734 YkuD ErfK_YbiS_YhnG; L,D-transpeptidase catalytic domain Bateman A anon COG1376 Domain This family of proteins are found in a range of bacteria. It has been shown that this domain can act as an L,D-transpeptidase that gives rise to an alternative pathway for peptidoglycan cross-linking [1]. This gives bacteria resistance to beta-lactam antibiotics that inhibit PBPs which usually carry out the cross-linking reaction. The conserved region contains a conserved histidine and cysteine, with the cysteine thought to be an active site residue. Several members of this family contain peptidoglycan binding domains. The molecular structure of YkuD protein shows this domain has a novel tertiary fold consisting of a beta-sandwich with two mixed sheets, one containing five strands and the other, six strands. The two beta-sheets form a cradle capped by an alpha-helix. This family was formerly called the ErfK/YbiS/YcfS/YnhG family, but is now named after the first protein of known structure. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.41 0.71 -3.93 173 11311 2012-10-02 23:30:06 2003-04-07 12:59:11 9 81 3160 8 2612 8151 1995 140.70 20 42.83 CHANGED pthlhls.....hsp.......phhhh..hc.ssp....hh.hp..h...........lssG......................p....ttTPhGtaplt............h..t....h..............tt...................................................................h.h.ttts....lhlH..ss......................................t.t..SpGClpl.t.............................pcspplhsh................lt.....................................hu.s..............................Vhl ...........................................................................................s..hlhls....hsp........ph.hhh....hp...ssp.................hl..hs..hs...........lu..hG...............................p.sptT...P...p.G.s....h.p.lp.............................p+t.h.sP.s........p...s..s...h...h..h..t..............h.........h..........................................................................hh.h.t.h.h.s.s.s.s......hhlHs..sss.s...h.................................ps.....SpGClRl..p..s............................pchp.t.L.ash................l..................G.s...........V.................................................................................................................................... 0 765 1615 2089 +2445 PF03694 Erg28 UPF0143; Erg28 like protein Bateman A, Kerrison ND anon SWISS-PROT Family This is a family of integral membrane proteins, which may contain four transmembrane helices. Members of this family are thought to be involved in sterol C-4 demethylation. In S. cerevisiae they may tether Erg26p (sterol dehydrogenase/decarboxylase) and Erg27p (3-ketoreductase) to the endoplasmic reticulum or may facilitate interaction between these proteins [1]. The family contains a conserved arginine and histidine that may be functionally important. 22.20 22.20 24.60 27.20 21.90 21.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.45 0.72 -4.06 21 246 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 215 0 172 225 0 113.00 34 77.67 CHANGED hGhLshWLlhlSlluhhNolQsYhs..hphoc+lYs...............................scs....sps.osLpuRsFGsWThloullRhauAapl.pspslYpLshhoaslAhsHFhoEhLlF+TspLstshh.....uPLlVuos .................................shLstWLlhVSlluhhNohQsahs..hthsc+lYs.........................................sps...........sts..ssLpuRTFGsWTlloullRhhsAhpl.pspslYplshhoahlALsHFhoEhlla+T.s.phs...hshh.....uPlhVuo.h..................... 0 46 89 137 +2446 PF04622 ERG2_Sigma1R ERG2 and Sigma1 receptor like protein Kerrison ND anon DOMO:DM04578; Family This family consists of the fungal C-8 sterol isomerase and mammalian sigma1 receptor. C-8 sterol isomerase (delta-8--delta-7 sterol isomerase), catalyses a reaction in ergosterol biosynthesis, which results in unsaturation at C-7 in the B ring of sterols [1]. Sigma 1 receptor is a low molecular mass mammalian protein located in the endoplasmic reticulum [2], which interacts with endogenous steroid hormones, such as progesterone and testosterone [3]. It also binds the sigma ligands, which are are a set of chemically unrelated drugs including haloperidol, pentazocine, and ditolylguanidine [2]. Sigma1 effectors are not well understood, but sigma1 agonists have been observed to affect NMDA receptor function, the alpha-adrenergic system and opioid analgesia. 19.50 19.50 20.00 19.80 19.00 19.10 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.32 0.70 -5.08 7 288 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 227 0 183 275 0 190.10 36 84.41 CHANGED thhtalslhlslluslh.slpphlhpo.YhFD.cplpcluppulu.as.....sscsllpclhDpLpsh.ss.hhh......ssppEWVFNNAGGAMGsMaIlHASloEYLIhFGTslGTEGHoGhHhADDYFsILpGpQhAassGsh-..uEVYssGssH+htpGps+QYpMP..tssaALEhApGWIPsML.FGhhDsLSSTlDh.TLahTshlTuR-MltsLhhsKF ...........................................................h.......hhhshh....l..h.....hl.......hh....p.....alFs.ppltpluppsh.u.........................stpthhpplhsc.Lpp.p.ass..hl.........................tpptpWlF.......sNA.....GG.h.M.GuMhlLHASloEYlllFGTulGTcG.HoGh.a.h....A-sahs.ILpGp.....ht..ah..sG..s..hc...sElahP.GsshhhtpGpspthphs..sssahlEYuRGh..IPshL.FuhsDoh.SThDh.TLahThhhhu+thhhpLh..h............................................ 0 58 101 148 +2447 PF01222 ERG4_ERG24 Ergosterol biosynthesis ERG4/ERG24 family Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.58 0.70 -5.94 5 987 2012-10-01 22:51:20 2003-04-07 12:59:11 12 14 331 0 568 1273 263 356.30 30 87.33 CHANGED +ptcaEFGG.stGAlGIphGhP....lhplahsspucsstsphhhPcohslssLhstI+s.sphlass...h....s+plWTVFhhaaslQAVhYlhLPG+hscGlPLS..sGc+LsY+lNAhao...hllTlAllslLpssplFcLpalhDpFuplhosAIlFuFALoIaLYltSLhssch.hs......c.LAsGGsSGNlIYDFFhG+ELNPRlG......LDlKMFhElRPGhlsWllINLusll+QYcpYGpVoPuLlhVsls..QhLYlsDulhsEEuVLTTMDITaDGFGFMLuFGDluhVPFTYSpQThYLssH.PspluWss.hsluIhllLhsGYYIF+oANuQKNsFR...........T.PtpPpLph..LKaIpTusGopLLsoGWWuhARHINYhGDWlQSLuWuLsTGFs................................olLPYFYslYFhlLLlHRstRD-+KCK+KYGcDWEEYC++VPY.+IIPYla .......................................................................................................................................................................................................................................................................php...s..h....h...hhh..hahhh...phh..h..h........h........h......l......P...........Gh...h...spGh.L..........sGp.pL.pYphN...u.h.u.............hhh.s...h...s...h......h.s................h..h....h...h...h.t.......h.h...........h.shlh-.pahtlhsss....hlhuh.h....luhhhYh..p.uhhh.t................................tt.oG..s.hl.YDaah.GtELNPR.l.s............hDhK...hF...h.....p....hR..u..h.h.hhh.l........l.....s......l.................uh..........hhc..p...........h...c..................h.....G............h..l...........o...............s.......s...h.................h.hh.hh...................phhY.lh.s.hhh.Ep...................hhlsoh.................D.lhh-taGFhLhahshshlPahYshpshYLs..............p......P....p....h.s.h......................................hhh.l........h.h.h.h.h..h.u.a.hla.c.s.uN.pQKsh...FR.........................t.P..p.....s..h..h..hh.........ph...h....t...o........t........p......G......s....p......LLs...s...GWW.......GhsR+.p.Y.hu..Dh.h......h..u...h.s......a.s.L..s.s...G...h..s............................................................................p....h...s..a...F.Y.......l..a...h...sh.l.L....h...H...R..t..h..R...D.p.t.+.C.t.pK.YG.p..s..WpcYpphV.a.hhlPhla............................. 0 174 294 458 +2448 PF00769 ERM Ezrin/radixin/moesin family Bateman A anon Pfam-B_851 (release 2.1) Family This family of proteins contain a band 4.1 domain (Pfam:PF00373), at their amino terminus. This family represents the rest of these proteins. 30.00 30.00 30.00 30.00 29.80 29.70 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.00 0.70 -4.84 10 499 2009-09-10 21:10:39 2003-04-07 12:59:11 14 15 129 5 242 462 0 216.90 35 42.32 CHANGED EchE+cppELcc+hpphEEchpcupccLpcppc+stcL-pctpptccptcpLcpcppchpctpc+LccpstsptcE+ccLttElsEhstclpplcpupp++EpEsschpp-lppupEcc-cs+pchhthhsss.....................s....l.ts.....................sssptct.ss-sst-tSp-L-......s-sphccps...EEcRlTtscKNE+lQcpLpsL+oELussRDEoK.coshDhlHpEN.lRtGcDKYKTLRpIRpGNTKpRVDpFEuM ....................................................................................................EphE+p+pEL.cRLhphcEpsp.pA.pc.tL...tcppcpA..Ltpctphsp-.EA..c...h...........Ltp.ct.....tpA..cpthpclpppuhcp...tcppcp.LttcltEh....p.tpl..ttlp-tpc+....+ccEsp......ph.p.p....chppAp-s.c+s+pcLhthhsts.......................................................s.s............l.ts.........................................st.p..p...........s.....-....s...t....t...t...h....St-hp...............................s-s...hpchs......Ec-Rls.h-Ks.c.+lpc.QL............ptLpoEltth+.cp+...cTt.DhlHsEN...h+.t.G.c..sKYpTL..+........p........l+pGsTKpRls.FEth................................................................................................................ 0 56 75 146 +2449 PF04137 ERO1 Endoplasmic Reticulum Oxidoreductin 1 (ERO1) Wood V, Finn RD anon Pfam-B_4729 (release 7.3); Family Members of this family are required for the formation of disulphide bonds in the ER [1,2]. 21.60 21.60 36.10 24.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.55 0.70 -5.44 39 459 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 292 6 304 440 6 345.10 38 75.19 CHANGED pplhshLppLsp.scaF+aaKVNLh.ppC.............PFWscs..shC.ss..ps.....................CsVpssscp-.lPt.............sa+h....phps.......ppspp..tt...............hppslss.......tt.pthtthc-...hsp..as..h-Dpss...pusYVDLltNPERaTGYsGts.utplWpuIYcENCF........................spss.p.................................................................s.......ClE++lFYRLISGlHuSIusHLstcY........l......ppcs............................WtsN................lphFhpRl......u.......paP-RlpNLYFsYtlllRAlsKlpsah...pph..ph....ou...sp..........tpDpcs.......+phlpsllsplps....hsth...FDE.....shhFpss..tuhpLKc-F+p+F+N.lSclMDCVsC-KCRLWGKLQTpGhGTALKILFp.ss..........ppt........hp...LpRsElVALlNshsRlSpSlptlc .........................................................h.tlhPhlppLhp.psaF+aa+....l.sLh..+.C.............PF..W....s-p.....uhC.s...ps................................................CuV....p.ssppc.lP.................................................sh+h....sphpt..........tppspp.....tt.............................lppslsp..........tt.hthtthD-...tpp..aC....-Dpps...spspYVsLlhNPERaT.GYpGss.AhplWpu..IYcENC.F..............p.ts...................................................................ts.C..lEKRsFY+LISGLHuSIshHLstcY..............L.pps......................p..........WssN................lp.FhpRh......s......ptPcR.lpNLYFh.Yhl.LRAlsKhts..ah......pp.......ph..h.oG..s.s.................tpDtcs........+phlhp.llpphps....................hs.h...FDE..............sthF..tss...................pu.pLK.c-.F+t+F.+N....lS+IMDCVGC-.KCRLWGKLQ.stGhGTALKILFp.ct...................................pp.........hpLpRpElVALhNs...hs.RlSpSlptl.............................. 0 109 169 247 +2450 PF03238 ESAG1 ESAG protein Bateman A anon Pfam-B_3037 (release 6.5) Family Expression-site-associated gene (ESAG) proteins are thought to be involved in VSG activation. This family includes ESAG 117A Swiss:P04477 as well as ESAG IM Swiss: Q26705. 20.20 20.20 20.20 23.00 20.10 20.10 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.47 0.70 -5.06 10 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 8 0 13 53 1 220.90 55 71.32 CHANGED sM-scpcKLEKLISYGNpMGDLVAKsGGLFAEVNESVRAVRKEIPuALIKsNKYYTAIAEITRTVWDDVKAlth..s.ucscCp-QcFcGVtEhcspCGDpTCPLucs.VsEuALpKYKsGCLplsVhsGSVScChNLPRsNLY+SGAlsoSs-sLcW+-cp.t.uphFQLpL+VcsIFGPLIAsFAAGQPPSsLhEMMsNITSLpSRFNEVHuNFTSLLlssNlssNVssTcSTI .....MctpcDKLEKLI.SaGNpMGDLVAKsGGLFAEVNESVRuVRKElPsALI+sNKYYTAIAEIsRTVWDDVcult....s..sctcCpspchcuVtEhcspCGspTCPLtcu.VsEuuL.pKYKsGCLplsV...s..Gp.Vsc.ChNLPRssLY+sGAVpsSscsLcWcpst...sthFpLpl+VppIFuPLIusFuuGpsPSsLh-MhsNITSL.S+FNEVHsNFTSLLlssslpssVssTsSTI............... 1 0 13 13 +2451 PF03433 EspA ESPA; EspA-like secreted protein Finn RD anon Pfam-B_4100 (release 6.6) Family EspA is the prototypical member of this family. EspA, together with EspB, EspD and Tir are exported by a type III secretion system. These proteins are essential for attaching and effacing lesion formation. EspA is a structural protein and a major component of a large, transiently expressed, filamentous surface organelle which forms a direct link between the bacterium and the host cell [1,2]. 22.20 22.20 22.80 45.30 21.80 22.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.20 0.71 -4.86 7 296 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 247 1 18 151 0 172.00 48 96.44 CHANGED hS....ssshtuusp.sssssSpshsss.hups-slp....husuhusLh.hhhhh...o-LupuKaspMpppuccu+suQcMANplDthIAclpcss-K.tp.cLPp-VlcYhpD..NGIpV....DGhp.hs....................................................................................ttLspG-LpsVKuAL-spuNpsoDhVsQuQLplQph.pohNsssohhsuhQoh.uch.pu ......................................................................pss..hs...ss..hutp-slp..N.hu.h.usLLhhhhhh...pslu.sKF.......h-hpcsuccupssQchuN.hDthIAcstpus..s.K.....sKtclPpDVIcYhpD..NsIhl....sGho..s.......................................................................................................GcLstGsLQsVKAAlsscANp.TslhspuQlpIQpMSppLNhlhothosl.Sh.hch.S.h.................... 0 6 8 13 +2452 PF04806 EspF EspF protein repeat Mifsud W anon Pfam-B_3518 (release 7.6) Repeat The enteropathogenic Escherichia coli EspF secreted protein induces host cell apoptosis. Its proline-rich structure suggests that it may act by binding to SH3 domains or EVH1 domains of host cell signalling proteins [1]. 20.20 20.20 21.30 20.20 17.50 20.10 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.40 0.72 -3.77 3 690 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 113 3 0 484 0 43.70 74 73.26 CHANGED IQPARSMAEHIPPAPNWPAPoPPVQNEQSRPLPDVAQRLVQHLAEHG ..............IQPARsMAEHIPPAPNWPAPsPPV..Q...NE...QSRPLPDVAQRLhQHLAEHG......... 0 0 0 0 +2453 PF00756 Esterase Putative esterase Bateman A anon Pfam-B_476 (release 2.1) & Pfam-B_4968 (Release 7.5) Family This family contains Esterase D Swiss:P10768. However it is not clear if all members of the family have the same function. This family is related to the Pfam:PF00135 family. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.57 0.70 -4.96 20 9472 2012-10-03 11:45:05 2003-04-07 12:59:11 15 87 3654 69 2036 9626 1891 233.50 17 73.54 CHANGED Sssh.spchpltlaLP.t......ssspphPllYhLDG.......ssa.pphsstphhpphstctth..sh.lshPhGsps.....t.h...........ssptstsppap..salppELsshlcusasspsps.......pulsGpShGGhuALhhALc..aPcpFspluuhSsh..............................ss.Wu..........tpst..psDshhph.t.ssstsslplhlssGsp-s.ht..ph.sph.........hphhstsppht..hphpthsthsc.a......usHshth..WpspLssthhph ......................................................................................................................h.tpphph..t.lhhPss..............tsspth...Pl.l...a....h.....L....c..G.....................................ssh..........p......p....h....t.....h....t.....t....h....h...p.....p.....h.....t.....t.......p....h...t......h.........h.....h...h....l...s......h............s.......t....s.....s.ps.......................h...................................................h.......t......t..........t......s........h.....s.....t..p....at...........sa..l..t...p...E...L....h....s...h...l...c....p...p...a...s....s..sspt..............psl.s.Gt.ShGG..h..s.A...L.h..h...u.lp......p..P....s....t....F.s.......p...l....s..u..h..S..s.h....................................................................................................................................s..Wh....................................t........h........p.......s.....s..h.............h........................................t.....t...........p....t....h............l......h....l...s..........G.....t.......t.....-.........................................................................................h.....t...t.....t..t.h.............h..t.....h................h.........p.....h..........................................a.tth.......h............................................................................................................................................................................................................................. 0 630 1289 1731 +2454 PF01684 ET ET module Hutter H, Bateman A anon Hutter H Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 8-10 conserved cysteines that probably form 4-5 disulphide bridges. By inspection of the conservation of cysteines it looks like cysteines 1,2,3,4,9 and 10 are always present and that sometimes the pair 5 and 8 or the pair 6 and 7 are missing. This suggests that cysteines 5/8 and 6/7 make disulphide bridges. 21.80 21.80 28.50 29.40 21.70 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.89 0.72 -3.95 22 150 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 5 0 150 131 0 83.50 33 82.03 CHANGED CYsGlass.sss...hssuuhphCpG..pCuSloht.shNG..pssolYsCsPsslCpsLslss......sCssl.ss.........louCCCs.sssC.sssss CYsGlass..sss....hssuuhphCp.G..pCuSloh..shsG...pssolYsCsPsslCpsLslss......sCsslpss.........louCCCs.s-sChsss.s....... 0 42 61 150 +2455 PF00766 ETF_alpha Electron transfer flavoprotein FAD-binding domain Bateman A anon Pfam-B_853 (release 2.1) & Pfam-B_1321 (release 3.0) Domain This domain found at the C-terminus of electron transfer flavoprotein alpha chain and binds to FAD [1]. The fold consists of a five-stranded parallel beta sheet as the core of the domain, flanked by alternating helices. A small part of this domain is donated by the beta chain [1]. 21.00 21.00 22.40 23.80 20.90 20.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -9.80 0.72 -4.36 39 4727 2012-10-03 09:55:27 2003-04-07 12:59:11 14 26 2782 14 1280 3485 2076 85.60 48 26.01 CHANGED ps-.lspAcllVuGGRGltu..tEsFc..llp-LAchL.GusVGuSRssV-..........sGWhss-+QVGQTGKoV+PcLYIAsGISGAIQHhuGMcsSc .............................p.s-LspAclVVSG..G.RGluu..........t-s.ap...llcpLActL......G.A..s.l......GuSRssV.D..........sGa.hsp-.hQVGQ.T...G+h.V.s.PcLYlAlGISGAIQHlAGhpsSc.................. 0 448 834 1086 +2456 PF01012 ETF ETF_beta; Electron transfer flavoprotein domain Bateman A anon Pfam-B_1321 (release 3.0) Domain This family includes the homologous domain shared between the alpha and beta subunits of the electron transfer flavoprotein [1]. 24.20 24.20 24.40 24.40 24.10 24.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.97 0.71 -4.53 174 9173 2012-10-02 18:00:56 2003-04-07 12:59:11 16 30 2828 46 2509 6583 4289 156.00 22 53.86 CHANGED sshlls-.p..s......ssplp.shshpslstAtplupt..........p..lssls.......hG.....s.spsspt.....hhs.hGsccllhlsssthsth...sstshutslsphlpp..........ss..lllhussshucs....lusplAstLshshlosss..tlph..pss.................hh..hpRshh...uGp.thsplphs.....tllTlcs..sshps ..........................................s.........t....phtls..hsh.pslptAh.pLtpt.............................p..ls.sl.s................hG.....s..tps......t.p..s.........shs......h............Gs-cslll..p......s..s..t..h.t.th...........s..shApslsshlcp.......................tt..hs....lllhG...ssuhscs................lustl.AthLs.....h..s..hssso....plph....s.s.sp..................................hs.hpRtl...uGh..thh..p..l...phs......sllTlp.sh.p................................................... 0 863 1630 2135 +2457 PF05187 ETF_QO ETFD; Electron transfer flavoprotein-ubiquinone oxidoreductase Wood V, Bateman A anon Pfam-B_2305 (release 7.7) Family Electron-transfer flavoprotein-ubiquinone oxidoreductase (ETF-QO) in the inner mitochondrial membrane accepts electrons from electron-transfer flavoprotein which is located in the mitochondrial matrix and reduces ubiquinone in the mitochondrial membrane. The two redox centres in the protein, FAD and a [4Fe4S] cluster, are present in a 64-kDa monomer [1]. 20.40 20.40 20.40 20.40 20.10 19.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.44 0.72 -4.06 73 1342 2012-10-03 08:56:42 2003-04-07 12:59:11 8 18 1195 4 515 1183 1543 109.60 47 19.70 CHANGED GhhhGhshuul-p...lhp....Gps..PW.TL+.cppsDassLcsAscsp..IsYPKPD....GtLoFD+LSSValSsTNHEE-QPsHLpLc....DsslslshNLshYsuPppRYCPAGVYEhVcc- ................................................GhhhGhhhsGl-p......lht....Gph.Pa....T.....L+.c.p..........c..s............DttsLcs.As..p...s...p........I.....sY......P.K..P.......D....G.pLoFD+.LSSValSsTNHEE-QPsHLpLp..............Dss..lPls.h.NLs...hYs..uPppRY.CPAGVYEhVcp.t............................ 1 157 287 409 +2458 PF00178 Ets Ets-domain Finn RD anon Prosite Domain \N 21.50 21.50 21.50 21.50 20.90 20.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -9.92 0.72 -3.84 15 2375 2012-10-04 14:01:12 2003-04-07 12:59:11 17 22 255 41 1162 2070 0 80.40 50 21.09 CHANGED plpLWQFLLcLLsD..psppchIpWss.csGEFKls..DP-cVARLWGp+KN.KPsMNY-KLSRALRYYYc+sIl+KV..pGcRasY+Fsss .........................lpLWpFLL-LLpD....ps.stphIpWps.....c...........pGcFK..lh...Ds-c..............V.ARhW....Gt+Ks......+..P.s..M..N.Y.-KLSRAL......R............Y.Y.Yc....+s..Il..pK..l.....p.G...cRhsY+Fs..p..................... 1 249 337 689 +2459 PF03318 ETX_MTX2 Clostridium epsilon toxin ETX/Bacillus mosquitocidal toxin MTX2 Mifsud W anon Pfam-B_3569 (release 6.5) Family This family appears to be distantly related to Pfam:PF01117. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.49 0.70 -4.80 15 164 2012-10-01 20:43:00 2003-04-07 12:59:11 8 8 81 12 52 305 3 208.90 14 66.88 CHANGED sshsshsshhspshs........................h..t.sphhshsshasussslsNsTsppQphpThSFscshTcTsSsosTpGhp....huspssuphslsh.....ssEsslpho..lo..YNa...ooosTpTsososphhsPSQsVsVPP+spspsshhlt+sshs.sshpLhssl.up....................shho.hshscsshl.shttstssshsphssshs.hsssspl.phpGoGhhc.hstGsphhl+hschshssssutphs .............................................................................................h...............................................p....shhhtpshhpN.s.ost.p.p.p.h.p..o...paspshs.po....s..ohosppGhp....huhpsshp..hslPh.....hsp..sshphs..hp....ash......spo...po...ps...so.pp.......p.ph.h..ss....sp...s...l..tVPspppspsphh.lpcsp...hp.ssh..p..h........sph.............................................................................................................................................................................t................................................................................................... 0 27 34 46 +2460 PF01459 Porin_3 Euk_porin; Eukaryotic porin Bateman A anon Prodom_3211 (release 99.1) & Pfam-B__3211 (release 7.5) Family \N 26.90 26.90 26.90 27.30 26.50 26.80 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.59 0.70 -5.10 90 1317 2009-01-15 18:05:59 2003-04-07 12:59:11 17 7 369 3 757 1211 9 250.60 22 88.76 CHANGED sPssap-luKcs.+Dlhs+s.a.hssh+hclssh..ssu.stFpsou.............stshtss.......................hsstslpspat.......shshs................................................................hphss.ssslssplplpcthstu..l+sphssphsssp......psupl..chpYppsphshssslsh..t......sshhsushlhuhs.slslGs-ssacsspsphsp.shuluas.sp..................................cahsuhpl.sp......ssshsuSaap+ls..splpsGs-hshshsss.................pssho..........lGspYplcpss..tl+u+lsssGhlushhpccL..psslplsluuplDsh+t........stKlG .......................................................................................Pssap-ls+ps..+..Dlhsps....a....h.s..hh....+hslpp.....s.u..s.tFp.sst........................shphtst..........................................................hssh..slpspat................shshh.......................................................................................................................................................phss...s...sshssp..h.pl.p.pt.l.spu...L+.hph..ss....p...h......s...s.s.p..............................psspl.......chpYpt.pphshs.ssls...h...................ssh.hh..ss...hlh.u..hp...s..........hhhG.hphsa.cttp.sp..hsp........shs...luat..s....t....................................ca.hsh.pl.pp..............tsth.tuSh.a..p.....+.ls......pplpsuschsh..s.h.tsp.................p.s.p.hs...lu..spap..lctss..hh+....u..plsssuhluhhhpppl....t.s.l.pl..slu..u.lDthp...........shKhG.................................................................................... 0 215 374 573 +2461 PF04346 EutH Ethanolamine utilisation protein, EutH Mifsud W anon COG3192 Family EutH is a bacterial membrane protein whose molecular function is unknown. It has been suggested that it may act as an ethanolamine transporter, responsible for carrying ethanolamine from the periplasm to the cytoplasm [1]. 25.00 25.00 28.10 28.10 23.30 23.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -11.99 0.70 -5.49 27 734 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 686 0 58 324 5 371.20 64 95.93 CHANGED M..s.INcIIlYIhhlFhVlGAlDpl..................lGs+h.GLGccF-EGlhuMGsLAluMsGIhulAPlLuplLpsllsPlaphlGADPuhFAsTlLAsDMGGY.LAppLAp......s.cuhlauGlIlGuMhGsTIVFoIPVuLGlIcKcD+cYhApGlLsGllTIPlGshluGl.ls..............G......hshhhllpNLlPlllhulLlAlGLhhhPstMI+GFhhFGKhlsslIslGLshullptlTG....hslls............GhsPI...........pculpllGpIuIhLAGAFPhVhllT+hhpKPLttlGchLGhsssuAAGhlAoLANsIsMFthhK-MssRGKllNlAFuVsAAFlhGDHLGFoAuhpsshIhPhllGKLlGGlsAlhlAhhls ..........................Mu.INEIIMYIMMhFMLIuAVD+.I..................hGppl.G.GuQFEEGFMAMGALuLAMVGhsALAPVLA+VLGPVIlPVYEhLGAsPSMFAGTLLAsDMGGFhLAcELAu......DsuAWLaSGLILGSMMGPTIVFSIPVALGIIE.sDR+YLALGVLAGIVTIPIGCIAGGL.lAhhsth............sh..lp......FoFuLILhNhIPVlIVAlLlALGLKFI.PEKMIsGFQIFuKalVALITlGLAAAllcaLLG....acLIP.........GlDPIhhs.....t-.hRAIEVIGSIusVLhGAYPMVh.LLTRaFcKPLMsVGKlLsM.NslAAAGMVATLANNIPMFGMMKp.MDsRGKVINsAFAVSAAFsLGDHLGFsAus.h....s.uM.IFPMIVGKLlGGVTAIuVAhhL.s.................. 0 29 41 51 +2462 PF03319 EutN_CcmL Ethanolamine utilisation protein EutN/carboxysome Mifsud W anon Pfam-B_3053 (release 6.5) Family The crystal structure of EutN contains a central five-stranded beta-barrel, with an alpha-helix at the open end of this barrel (PDB:2HD3). The structure also contains three additional beta-strands, which help the formation of a tight hexamer, with a hole in the center. this suggests that EutN forms a pore, with an opening of 26 Angstrom in diameter on one face and 14 Angstrom on the other face [2]. EutN is involved in the cobalamin-dependent degradation of ethanolamine [1]. 25.00 25.00 27.80 27.40 21.20 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.57 0.72 -3.60 51 1340 2012-10-03 20:18:02 2003-04-07 12:59:11 8 3 931 33 216 583 325 82.50 46 89.31 CHANGED Mhlu....+VlGslhuTpKsssLsGtKLLlVc.lc...........ptt.tuphhVAsDslGAGhG-hVLlsp.GSuARtshss.p.ssPl...DssIlGIlD ..........MpLAcVsGslVuTp+ppuLsspKLLlVchls...........................p.s.p..ssupstVAlD.slG.AGsGEhVLlss.GSoARpu.hps...p....stPl.....DhsllGIlD................... 0 95 156 191 +2463 PF02472 ExbD Biopolymer transport protein ExbD/TolR Mian N, Bateman A anon Pfam-B_2343 (release 5.4) Family This group of proteins are membrane bound transport proteins essential for ferric ion uptake in bacteria [1]. The Pfam family consists of ExbD, and TolR which are involved in TonB-dependent transport of various receptor bound substrates including colicins [2]. 25.40 25.40 25.40 25.90 25.30 25.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.75 0.71 -4.15 80 6991 2009-09-11 05:16:59 2003-04-07 12:59:11 11 4 2435 5 1847 4807 3274 131.40 24 90.36 CHANGED cpp.tsplslsPhlDlhalLLlFFhl.oushsptt....h..plsL.Psssssp................hpppptlhlsls....tc.....sp......hh.......................................lssp...h...shppLtspLpphtpppsp..........................lllpuDpsssappllplhsthpps.Ghpplslssptp ..........................................p....hs-INlsPhlDVhLVLLlhFMl..Tushhsps.................l.....plsL...P.pus..sss.........................spspps.l..h.lsls............ss................sp....................lh............................................................................................................................................................lspp..........l............shpp.l.t.s.t.l..p...p...ht.pt..p.sps........................................................................hlhlp...uDcs.ssYpplhplhstlpp..u.Gh..p.plulhst..t.................................................................................... 0 593 1191 1567 +2464 PF01541 GIY-YIG Exci_endo_N; GIY-YIG catalytic domain Bashton M, Bateman A anon Pfam-B_489 (release 4.0) Domain This domain called GIY-YIG is found in the amino terminal region of excinuclease abc subunit c (uvrC), bacteriophage T4 endonucleases segA, segB, segC, segD and segE; it is also found in putative endonucleases encoded by group I introns of fungi and phage. The structure of I-TevI a GIY-YIG endonuclease, reveals a novel alpha/beta-fold with a central three-stranded antiparallel beta-sheet flanked by three helices [4]. The most conserved and putative catalytic residues are located on a shallow, concave surface and include a metal coordination site. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.17 0.72 -3.61 105 9798 2012-10-01 19:55:08 2003-04-07 12:59:11 19 83 4836 20 2329 7330 3586 77.80 26 19.92 CHANGED psslYhlh....st......ps...................phh....YlGpo.p..sl.ppRh.ppH..hps.......tptpht.................hpshphh.hlphhpspptt.........hphEpthlpthps.......thN ........................................................................................t.sulYhhp..........st........ss.....................................phl.....YlGpu...p........s...L...+p...Rl..psa.......hps..............p.s..s..p.t.ppt............................hpsh..phh....h.h..p..h...s..s......s....c.scA........................LhhEt.pll+hhps..................................................................................... 0 771 1529 1995 +2465 PF03081 Exo70 Exo70 exocyst complex subunit Mifsud W anon Pfam-B_2462 (release 6.4) Family The Exo70 protein forms one subunit of the exocyst complex. First discovered in S. cerevisiae [1], Exo70 and other exocyst proteins have been observed in several other eukaryotes, including humans. In S. cerevisiae, the exocyst complex is involved in the late stages of exocytosis, and is localised at the tip of the bud, the major site of exocytosis in yeast [1]. Exo70 interacts with the Rho3 GTPase [4]. This interaction mediates one of the three known functions of Rho3 in cell polarity: vesicle docking and fusion with the plasma membrane (the other two functions are regulation of actin polarity and transport of exocytic vesicles from the mother cell to the bud) [3]. In humans, the functions of Exo70 and the exocyst complex are less well characterised: Exo70 is expressed in several tissues and is thought to also be involved in exocytosis [2]. 23.70 23.70 24.30 23.80 23.30 23.60 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.07 0.70 -5.67 58 882 2012-10-03 17:31:52 2003-04-07 12:59:11 10 11 263 7 540 852 0 323.30 22 57.29 CHANGED hppWlpshphshpslhtsEc.pLhspl................asst..s.......tppsFscls.ppslt.....pllphsctls...................pstpssp.tlhcll-hhpslpch...hsplpthhps.........t.hh...phpph..........hppLtcsspphht-hhstlpp.................psspphs.ssGulp.lTphlMsalphLs..-apssLspllts.ssss...................................sts.luphlspllssLhssL-t+....u+thp............................c.uL........ptlFLhNNhpalhpp..lcp............S...cLtslLGsphhpchp...pthcpahst...YhcssWspl.hshLps...........................................tths..........spppp.hK-+.............h+pFspuF--hhppQ.ppapl..sDspLRppL+psltphll...PsYppFhs+atstht.....t........al+as.s--..lcshlscL ..................................................................................................h.hh..shthlh.sEh.plhppl................................hst..t..............tttsFsphs..pssh.......tlhp.hscsls.......................hthpsst..plhpll.s.hhpslpph.................hsphpt.hhts......................................................htt...ph.th..................hppltpsst.thh..t...-h.ptlpp.................pstpshs.........s.GslH...lTp.s....hpalphLh..cappslttlh.tp.ttts........................................................................................................spp.luth....lhplltsLtts....L-sK.............u.+.h.Yc............................-.uL................pt.lFLhNNhpalhpp..lcp...............................................................................S......pLttl..l...u..p....p.hh.pphp....phh.cphhpt...Y.cs.Wtpl.hsh..Ltp........................................................t..t.........stpp...p..l.K.c+.................hctFNttF--hhptQ..ptWhl.....sD..pLRppl+.sltphlh...s.sYpt..Fhp+.atth........................t.t+al+.Ys.scplpphltp..................................... 0 109 289 425 +2466 PF04257 Exonuc_V_gamma Exodeoxyribonuclease V, gamma subunit TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The Exodeoxyribonuclease V enzyme is a multi-subunit enzyme comprised of the proteins RecB, RecC (this family) and RecD. This enzyme plays an important role in homologous genetic recombination, repair of double strand DNA breaks resistance to UV irradiation and chemical DNA-damage. The enzyme (EC:3.1.11.5) catalyses ssDNA or dsDNA-dependent ATP hydrolysis, hydrolysis of ssDNA or dsDNA and unwinding of dsDNA [1]. This family consists of two AAA domains. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 805 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.30 0.70 -13.43 0.70 -6.56 84 2140 2012-10-05 12:31:08 2003-04-07 12:59:11 9 6 1971 4 352 2371 649 635.20 26 61.91 CHANGED MlplapSN+hEhLsphLsphlp...pPhsp.........Phps...EhllVpS.GhspWLphpLA............pptG..IuAslcF.shPusalhphhppl.lsp..ls...p....ps.s........FsppslsW+lhplL.....Pp......hh.........pp.st.atsLppYL.............sscsst...........h+haQLAp+lADlFDpYhlYR..............P-WlssWcpG.......................tthstspp.W...........QshLWRtL...hpchtttt.t...p..........hh.....hptlpp..tptt.ptLPc.RlhlFGlSsLsshhLclLpuLuph..t-VpLahhNPsppaWu..-lhs..p+phhp...................................t.tppthhttu......sPLLAuhG+.GR-ahphLtph.........................ttpch-hF...........ssppss...............LLpplQs-ILp..................tttptpht..hstsDpSlplHsCHSshREVElL+DpLLphh....spD....s.s......LpP+DIlVMsPDl-sYAPhIpAVF....u.................stptlPasl.uD...pshpppsslhtuhhpLLsL.spoRhsss-llsLLpsPsltp+Fslspp-lpplcpWlppuGlRWGlDtpp+pp...h..ths....s.pppoWphGLcRhLLGaAhs.................sssh..........hp....slhPa.splpG.puphlG+LhphlcpLpphhppL....pps.pshppWtphlppllsshF...sss.....scpphp...lptlpptlsph.tptsppus...............hpp..........lslsllpphLtppLsppsts..tFhsGsloFCslhPMRuIPF+VlCLLGhN-GsaPRpptssu.FDLhsp..p.....+.hGDRu+R--DRYLFLEALLSARcpLYlSYlGp.sh+DssphsPSlllsELl-alppsh..................stpshpph...h......hp.HsLpPFs.phF...................tssthhSas..ppahtstpthpptt.................................................tshhst.....................stst.pslsLpp .............................................................................................................................................................................................................................................hhlh.upph-.L.thh...ht.......t..............sht....p...l.lV.u.uhtpaLp.tl..u.....................tt...G..lsAslt.h..hstthhhphh.th..h...........t................ps...............h...s..t.h.W.p..lht...ll............................t.........h.................................tp..........h..........lttaL.............................................t.....tt............................thaplutphAslFspY.hhR...............sphl.tWtts........................................h.....tt....W............Qs.LWp.tl...ht.h.t...t.......................................................h.....httl............t.t..thPt..p...lhlhuhsths...lphlttluph....hplhlhh.sPst..hWtsl.t........t.h.t...............................................................t........psL.LsthG+.sp-...hL.t.............................................tha.........................................................t...ts............hLtt..lQtslhp...................................................................................t.ptth...ls.tD.p.S.....lphasC..Hos.RElE.lLp-pLLthl....pps..............s..s........LpP+DllV..hs..sD...lssYsPhIpAlF..................u.....................spthlP..atl.uD...pp.stp..pp...Pl...lpshhsLLsL.......p......uRa.stp-llsLLcs...ss...l.t.t...+F...slsp.p....slphl......cpalptuGI...R........a..Ghsttphtt...................ths........t.ttpoWp.hu..l.....p...R..hlL....Ghuh.t....................tts...............h.t....s....hh.s........h...s.......t......tu.........hth.h.GpLsp.hl.....tLp........hhp...tl...........tp.......t.sh.t...p.W.h......h...h...p...p...h..l..p...thh.........s..............t.pttht..............ht.lpp....h...tth...htt...h.t.st.........................h..tt....................lsh.t.h.l.tp.....l.t..t..t..L......st.t.......thp....t..a.h..sG...lshsohhshRulPa+llslLGhs.-Gs.aP..c..p.................s.......s......h.......D......L....hst...t..................t....G.........D.R.........s.........pR...p...-...-R.hL............hL.-u......l..huApp...Lhlo..Yh.G..t..s..p..ss...p.....h......Pusl...lp...pL..h-.h...ltt.h...................................s..th..........h...Hsh.sFs.p..a.............................s.....sas..tphh.httt..tt............................................................hth...................................................................................................................................................................................................................................................................................... 0 96 194 283 +2467 PF02601 Exonuc_VII_L Exonuclease_VII; Exonuclease VII, large subunit Bashton M, Bateman A anon COG1570 Family This family consist of exonuclease VII, large subunit EC:3.1.11.6 This enzyme catalyses exonucleolytic cleavage in either 5'->3' or 3'->5' direction to yield 5'-phosphomononucleotides. This exonuclease VII enzyme is composed of one large subunit and 4 small ones [1]. 22.30 22.30 22.40 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.94 0.70 -5.24 27 4810 2009-09-16 18:18:13 2003-04-07 12:59:11 10 9 4139 0 1058 3717 1818 268.20 31 69.15 CHANGED lFchs+KpslPthPp+IullTStouAuhpDhhcshpc..RhshsclhlhsshVQGcsAspplhpAlpph..s....th..hDslllsRGGGShpDLhsFNcpplsRslspss..lPVloGIGHEsDpTlsDhVADhRssTPTtA....Achllsctpcht...ppLpshppclppshppplcpppppLthhppphh..s........................................lpppppplpphtp+ltp......................shpphhpptppplpp...................................................................................................................................................LpppLcshsPpphLpRGaullh.pcG+llpsspcl.cpsctlplpht-Gphtspl .......................................................................LFs.phKpslP..hscplGllTS..o..GAul..+Dllpslpc....RhP..hp.lllaPshVQGppAs...........t..pIspulphh...............s....t.......h......t.....................p.......hDllIluRGGGSlEDLW.sFN-.EhlsRAlhtsp...hP.llSuVGHE.sDh.TlsDaVADhRAsTPouA.............AEhs.s....s.s.t.t.-hh....pplpphpt+l...tpshpph..lp.ppppplpplppph.htp.....................................................................P.hh.......hpt.p.tp.p.lcph.p.p..c.lpt....................................................................shp.p.hlp...ptppphptht....................................h.......................................................h......................ht..................................................................................................................................................................................t.ttpht.h.ptLpsl.s.P..th....lt..RGY..u...ls.p...................p.....p.s......p....h.lppsppl...p.sptlphphtDG.lps............................................................................................................................................................. 0 371 715 913 +2468 PF02095 Extensin_1 Extensin; Extensin-like protein repeat Mian N, Bateman A anon IPR002966 Repeat \N 15.00 0.20 15.20 0.20 14.60 0.10 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.48 0.74 -5.49 0.74 -3.49 28 430 2009-09-16 13:20:14 2003-04-07 12:59:11 10 19 12 0 56 410 4 9.60 82 76.50 CHANGED PPVYKPPVEK .....PPVYKPPVEK.... 0 0 56 56 +2469 PF01267 F-actin_cap_A F-actin capping protein alpha subunit Finn RD, Bateman A anon Prosite Family \N 22.30 22.30 37.40 26.90 21.60 20.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.93 0.70 -5.34 39 514 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 296 35 304 465 1 246.50 40 88.89 CHANGED pptcl..ssphlpsAPPGEls-VhsDl+sl.hss..........t...lpss...ltsAhppYNpcphsslclss..ppsllop.ascl..........tss+YhDspspptFphDHlppcusshps......h....spph-.h.....p..psl.cpLp....tYspcaY.ssushsV.........................................................................................................................................ashpssst........................ltllIhup+asspNFasGcWRSpaphs......t...plpGplclplHYYEDGNVpLpssKshpp......sh...ss...t.ApsllptIpphEspappplscsaspls-ssF.KuLRRpLPlTRsKlsWs+.lssY+LGp- .....................................hcIsspFlhpAPPGEh...s-Vhs.......Dl+hL.ls.s..........-.sllcps...hs..pAFtpYN.hcQh..sslc.l...pG...ppp........V...lIocascL....................................................usu+ahDPcs.phsFpaDHlcpcAo.Dsps...........hp.....stth-th................R.pulppsLp....sYlp-HY..ssGs...s.s..V.......................................................................................................................................................................................a.spp.csp................................plhssIpuppapspNFWNGRWRSp.Wphsh......ssss....plsGhl+lp.VHYYED.GNVpLsopK-lpp................ols...sss......t..psApphl+hlctsEscYQs....uls-sapshS-ssF.Ku.LRRQLPVTRoKl-WsK.lhuY+lGp-......................................... 0 82 132 212 +2470 PF00469 F-protein Negative factor, (F-Protein) or Nef Finn RD anon Pfam-B_128 (release 1.0) Family Nef protein accelerates virulent progression of AIDS by its interaction with cellular proteins involved in signal transduction and host cell activation. Nef has been shown to bind specifically to a subset of the Src kinase family. 20.60 15.00 20.70 15.00 20.30 14.90 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.64 0.70 -4.97 20 19129 2009-09-11 11:41:56 2003-04-07 12:59:11 15 3 100 35 0 15393 0 175.30 58 97.39 CHANGED GuuhSK+pspsh.slRERLh+s.ttssutp.....t....u.st.spttsS.shEtp.thhpucs................hpppNhD...l-up--.-cVG.hPVpPpVPLRsMTYKhAlDhSHFlKEKGGLEGlaYScRRpcILDLYl.+EpGIhPDWQNYTsGPGlRYPhsFGWhaKLVPV-spptsE....-sEspCLlHPuQppthDD..scGElLhW+FDspLAhcacAhphaPEpatc .................................................................GGKWS.K.p.p.h...h..G.Wss.lRERh+Rs......psu.t...........................G.V.G............AsSRDL.E+.+....GA.l.T.oS.N................Tst.sN.u..-..C...A.W.L...E.......A......Q...E..-....E...E...VG.FPVRPQVPLRPMTYKu..AhDLS.HFLKEKGG.L.EGLla.Sp.+RQ-ILDL.WlY+TQGaFPDWQNYTP..GPGh..........RaPLTF.GWCaKLVP.V.-spclEc...scGEss.sLL.H.Ph......s..H.G.h-D..s..ctEVLhW+FDSpLAh+.H..hA+E.hHPEaap................................ 1 0 0 0 +2471 PF03807 F420_oxidored NADP oxidoreductase coenzyme F420-dependent TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.98 0.72 -3.56 56 6885 2012-10-10 17:06:42 2003-04-07 12:59:11 12 28 4452 62 2074 23061 11556 93.30 25 34.58 CHANGED plull.GsGshGpulupthstsG.......pplhhusuRss-+httht.pphs............htsps.sst-Asp..tu-lllluV.hpth.plhppl........thhps+lllsssss ..................................................pIuhI...G.s...Gs...M.u....p..A...l...h....p...G..l....l.....p....s..u.................s.ppl.....h......s........s..t......s..........p...p...h........p....t....l...t....p.p.hG......................................h.p..s..s..s...........s...s......t..c.....s......s.p........pu.....D.....l.....l..l.......L...A.......V....K....P..p.....h....h......t...p...l.l...ppl............tth.t...p..s.p....l.llSlsAG.................................................................................................................................... 0 639 1255 1704 +2472 PF01115 F_actin_cap_B F-actin capping protein, beta subunit Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.80 25.50 21.20 22.80 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.57 0.70 -5.17 31 384 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 276 28 239 354 4 220.90 55 84.34 CHANGED phDuAL..DLlRRLsPpplccNLsslhs.LsP............cLs-DLLSSVDpPL+lpp.sppos..+-YLtCDYNRDGDSYRSPWSNpYaPsls.................-ushPScpLRcLElpANcuFDhYR-LYYEG.Gl........SSVYLWDl-ct......................uFAGVVLlKKs............................................spphsGsWDSIHVhElpppspt...........sspY+LTSTVlLpLps............p..t.....suslsLSGsLTRQtEpshslsss.......ssHlsNlGphlE-hEsphRshLpplYFuKsKDIl ...................hDsALDLhRRLsPpplccNLssLlsL.sP.............sLsEDLLSSVDQPLclt+.DctsG........+DYLLCDYNRDGDSYRSPW.......SNcY-PPL-........................DGshPSt+LRKLElcANpAFD.YR-LY..aEG.GV.............SSVYLWDLDc.......................GFA.GVlLlKKs.........................................s.ss.up.p.pGsWDSIHVhE.lp..-+.up............su.c.YKLTSTVhL.hLpT.....................................sp..pu.....sGphs..LuGslTR.QhE.....pD.slsss...........................................ssHluNlG+hVEDMEsKhRshLp-lYFGKsKDlV.......................... 0 89 133 198 +2473 PF01116 F_bP_aldolase Fructose-bisphosphate aldolase class-II Finn RD, Bateman A anon Prosite Domain \N 25.40 25.40 25.70 25.70 25.00 25.20 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.79 0.70 -5.31 188 7516 2012-10-03 05:58:16 2003-04-07 12:59:11 15 14 4376 71 1389 4343 1369 297.90 34 95.98 CHANGED slsshpclLpcApcstYAlsAFNlsNhEhlpAllcAApctpSPlIlQhSpGsh.pah.................G.hthlsthscshAcph.s.......VP.VsLHLDHups......h-slhpAlct........GF.oSl.......MhDuS.......chs.hEENlphT+cllchA+ths..ls...VEuElGpl..G.....Gp..EDults...ptt...........pthhTsP--AtcFsccTGlDu.....LAsulGssHG..hYps.........pPp........Lshp..h...................................LpcIpptl........................sl.......P.LVLHGu..........SGlsp-.....................plpculphGlsKlNlsT-hphAaspul....................+chltpp.st............aDPRphltsuppuhpchlpcphc..hh.Goss+s ....................................h..sshpphlppApc..psY.Al.s..AFN..s..s..N..h..E..hhp...AllcuApc.h.c.u.P.V.I.l.ph..S.s..Gut..p.ah..................G.ht.h.h...t...p.h..l...psh.u..cp.h..s........lPV.slHhDHu..tp................h-shhpulct..............GF.oSl......MhDuS..................chs.h....-E....Nlphs+c...l....V-h....sHt....hG..........loVEuELGsl........G.......Gp...EDslsspt...............pthh.TsP--s.t.ch.lpcT...GlDu......LAsulGssHG..sYp...............pPp..............Lshp..h...................................Lcclpchl.............................sl....P.LV...lHGu.....................SG..lspc......................c.Ip.c.uI.p.h...G.Vs..KlNlsT-.h.phAhsp.ul...................................+chhtpp...st............................aD.P.Rp.a.l.tsuppAhpphlppt.hp...hGstsp............................................................................................ 0 471 906 1182 +2474 PF03405 FA_desaturase_2 Fatty acid desaturase Bateman A anon Bateman A Family \N 21.60 21.60 21.60 21.90 21.40 21.50 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.02 0.70 -5.67 11 926 2012-10-01 21:25:29 2003-04-07 12:59:11 9 3 348 37 263 879 84 289.00 38 89.32 CHANGED PPcKhEIF+SLEuWAccN.lLshLKPVEKsWQPpDFLPDPsS..DuF...p-QV+ELRcRs+ELPD-YFVVLVGDMITEEALPTYQTMlNsLDGl+DETGAS.oPWAlWTRAWTAEENRHGDLLNKYLYLSGRVDM+pIEKTIQYLIGSGMDPtTENNPYL.......GFlYTSFQERATFISHGNTARLAK-aGDhpLAQICGoIAuDEKRHETAYTKIVEKLFElDPDsTlLAlADMM+KKIoMPAHLMYDGcDssLFcHFSAVAQRlGVYTA+DYsD.ILEaLVsRWcV-KlT.GLSuEGR+AQD....aVCuLssRIRRLEERAptRA+ptt...slPFSWIFsRcV .............................................................phclhpp.L-..shs-pp.l.pa.Lp.scc.sWpPpDal.P.......spu.............csF........h..tsc-hc.ct...........ppls-shhlshVsshlTE-sLPo....Y....pp.lsp....h.....u.h...........s..G...u...............sWu.tW.sptWTAEENRHG.lLpc.YL.hl.ot.pVD.p.p.lEcsh.hllss...Ghc.s..t.p...p..s.sshh.................uhlYs.oFQEhAThlS.HtN.....Tu.....+h..u.............u.........D.hL.uplhupIAuDEpRHthhYppllcphh-l..sP.stshhAhschhpp.hpMP.u.t.hh.-.................................Fp.+huslhtchGVYssp.pa.h.-.llp.l.lpcWcl.p.ht..s....los-Gp....+Ap....-....alp....tLs...ph...p+h.....pE.ptp.t.hh.t...p...................................................................................................................................................... 0 59 187 239 +2475 PF04116 FA_hydroxylase Fatty_acid_hyrd; Fatty acid hydroxylase superfamily Bateman A, Wood V, Finn RD anon Pfam-B_7847 (release 7.3) & DOMO:DM04600 & Pfam-B_905 (release 4.1); Family This superfamily includes fatty acid and carotene hydroxylases and sterol desaturases. Beta-carotene hydroxylase is involved in zeaxanthin synthesis by hydroxylating beta-carotene, but the enzyme may be involved in other pathways [1]. This family includes C-5 sterol desaturase and C-4 sterol methyl oxidase. Members of this family are involved in cholesterol biosynthesis and biosynthesis a plant cuticular wax. These enzymes contain two copies of a HXHH motif. Members of this family are integral membrane proteins. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.55 0.71 -3.68 122 4549 2009-01-15 18:05:59 2003-04-07 12:59:11 8 43 1563 0 2248 4426 2057 115.30 21 37.33 CHANGED hllhhlhh-hh.hYhhHR.hhH........hth...la...th..HthH.H...ps.......pt.phh....ssht...hps.hphlhhshhhh.................hhh...hh................hhh.hhhhhhhhhhtshhthhsHssh....................hhhh.....hhhthhhhssph..HphH.Ht ...................................................h.hhhhlhh-.hh.hYahHR.hhH......................hsh........la...ph.....H.p...s.H..H........p..........................pt..tsh.........suhh.........hps...hp...hhhh.shhhh........................hhs.....hhh.....................hs..h..t..s...h..s...h..h.hhhhh.hh.hht..hhsHssh................................hhh..............t.h..h.t.h...h...h.....s...s..sph......HchHH......................................................... 0 659 1293 1875 +2476 PF02504 FA_synthesis Fatty acid synthesis protein Mian N, Bateman A anon Pfam-B_1671 (release 5.4) Family The plsX gene is part of the bacterial fab gene cluster which encodes several key fatty acid biosynthetic enzymes [1]. The exact function of the plsX protein in fatty acid synthesis is unknown. 19.90 19.90 20.00 20.00 19.80 19.40 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.83 0.70 -5.56 11 3673 2012-10-02 21:08:39 2003-04-07 12:59:11 10 5 3469 4 781 2569 2334 315.60 40 93.65 CHANGED h+lulDsMGGDauPh...pllcGllpAhpshs.lchhLlGsccthpshlpc.....cpsp.lpllpApshlpMc-sPstAlR+Kc.SSMtlulshl+cGcADuhlSAGNTGAlhuluhh+lutlpulsRPAlsshlPThs.G.hsllLDlGANV-scPccLlpFAlMGpsYupplh.shcsP.+lGLLNIGsEEpKGs-.....hh+psachL+sh....shsFlGNlEupDlhsGhsDVlVsDG......FsGNlhLKosEGssphltpll+-chcpshhutLtull...l..slKphtp+hDaspYsGuslhG.lstsVIKsHGsusupulhsAIctAtphlpsslsp+ ....................................................................................plAlDsMGGDauPp...shl.ushp......A..lp..........p....h....s....p....l.c.llLhG..s.p....st.....l.....p...h..Lsp............................t....p.....+.....lp.....l.lps.s...-.h.Is.-.-..c..P..s.p.Al..RpK+.sS..S.......M...........hl...........AhchV...........K............c...........G....c....AcAsl..S..A..G..NTGALMuhuhhlltp.l.......c.G.......I.......-........R..P..A..LssslP.....o......h..............p....G....tslhLDlGAN..s-scsppLhQFAlMGuhaAcplh...slp.....pP..RVuLL...N...l...GpE.-sKGs-.....hh+cuhpLL...pp.t..........sl.NFlG.lEup-.lh.s..G....p...s.....D..VlVsDG......FsGNlsLKohEGsu....p.hl....h.p.hL.Kp.p....h....p.....s....u....h....h...u....K......l....u.u.ll........lp.ss.L..p...p...h...t...p.+hD.s.pYs..GAsL.L.G..LcGsVlKoHGuusscAhtsAI.c.pAhphlcppls..t....................................... 0 273 513 659 +2477 PF01557 FAA_hydrolase Fumarylacetoacetate (FAA) hydrolase family Bashton M, Bateman A anon Pfam-B_641 (release 4.0) & Pfam-B_1228 (release 4.1) Family This family consists of fumarylacetoacetate (FAA) hydrolase, or fumarylacetoacetate hydrolase (FAH) and it also includes HHDD isomerase/OPET decarboxylase from E. coli strain W. FAA is the last enzyme in the tyrosine catabolic pathway, it hydrolyses fumarylacetoacetate into fumarate and acetoacetate which then join the citric acid cycle [1]. Mutations in FAA cause type I tyrosinemia in humans this is an inherited disorder mainly affecting the liver leading to liver cirrhosis, hepatocellular carcinoma, renal tubular damages and neurologic crises amongst other symptoms [1]. The enzymatic defect causes the toxic accumulation of phenylalanine/tyrosine catabolites [3]. The E. coli W enzyme HHDD isomerase/OPET decarboxylase contains two copies of this domain and functions in fourth and fifth steps of the homoprotocatechuate pathway; here it decarboxylates OPET to HHDD and isomerises this to OHED. The final products of this pathway are pyruvic acid and succinic semialdehyde. This family also includes various hydratases and 4-oxalocrotonate decarboxylases which are involved in the bacterial meta-cleavage pathways for degradation of aromatic compounds. 2-hydroxypentadienoic acid hydratase encoded by mhpD in E. coli Swiss:P77608 is involved in the phenylpropionic acid pathway of E. coli and catalyses the conversion of 2-hydroxy pentadienoate to 4-hydroxy-2-keto-pentanoate and uses a Mn2+ co-factor [5]. OHED hydratase encoded by hpcG in E. coli Swiss:P42270 is involved in the homoprotocatechuic acid (HPC) catabolism [6]. XylI in P. putida Swiss:P49155 is a 4-Oxalocrotonate decarboxylase [7]. 23.90 23.90 23.90 23.90 23.80 23.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.56 0.70 -4.89 95 10787 2012-10-02 17:33:27 2003-04-07 12:59:11 13 37 3508 112 3559 8643 4023 213.40 24 76.20 CHANGED p.lhshuhN........at.....p+sppht...........................................hshPh.....tsh....hah+sssulsu.....ss......................s.l..hhPtt.................tt.....lcaE.sELull....lG+..tspph.......ppA..hs.hlhGaslssDlo.s.......................Rch......phtth...........hhh.uKuh-shsslGPh.....lsshsphst....t.............................slplpspl.........................NGch..hpcu..ss.schlasstplluh.lSphh.................sLpsGDl....lhTGTs...t.s.......................................................................tsshhlpsGD...p.lpspl...............tt.........lGslp.s..pls .................................................................................................lhshuhsat........sHstphs..................................s.t..........tsh........hFh+.s..s.s.....s......l....s..s......st.s............................s..l.....hhPpt....................................stp....hcaE.sELull...................lG+............sspph........s.pcA....hc...tlh..Ga.............s..ls.Dlo...t.........................R....ch..........Q.tptt..............shht..uKuhss.sss.l.G.....Ph.......lhs.hs.p.lss.........p.......................................s.lslphpl...........................................NGch..........hppu.....ss.....ss....h....las......h........s....p....l...l....u....a..l....ophh..................sLpsGDl....IhTGTs.......t..G.............t.................................................hlps.GD......p..lplph..............ps......lGp.lps.h.h........................................... 0 912 2048 2900 +2478 PF00667 FAD_binding_1 FAD_binding; FAD binding domain Bateman A anon Pfam-B_180 (release 2.1) Domain This domain is found in sulfite reductase, NADPH cytochrome P450 reductase, Nitric oxide synthase and methionine synthase reductase. 21.80 21.80 21.80 21.80 21.40 21.70 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.45 0.70 -4.90 15 3726 2012-10-03 00:38:56 2003-04-07 12:59:11 15 73 2009 39 1475 3268 224 195.00 27 27.25 CHANGED sPhctppsFhuslhss+cLpsssusRsshHlElDluso....ulpYpsGDHlGVaPsNspphVccllchlslss...cpslpLcsh-tp..........hc.Ph.sPsTlppALpaal-Iss.sPo+phLptLAsaAs-ts-+pcLchLuss...htpcapchthspshollclhccFPSschPhs.hLlsllPpLpPRYYSISSSschpPspVHlTssVVpacs....spG+h+pGVsSsa ..........................................................................................s.....ttsh.Aplhts.p.pl....s....s...........s...........u.....p...+..s.s.h..H.lEl....c...........l....s....ss..............slpYp.....sGD.t.luVhs.........pN.ss.....t.......hVppll...p..h..l..t...hss........cp.l..p......l..p.sts.............................................................................h..sltpsLphah-lss......s.stt...h.lpt.....hA......ph........s........s.....s...............p..p............................L..t..Lsss...............p.t...p..h.........p..p.a........h..............t...hs.h....l..........-.l.........l................p..............c...a..........s..........s.......s...............p........l............s.h..p...tllphL....Lp.P.RhYSIuS.............S....................t.........h.........t...........s.......s........c.............l.............c....lTVulVca.s........spup.....+..t...GssSsa......................................................... 0 471 808 1178 +2479 PF00890 FAD_binding_2 FAD binding domain Bateman A anon Pfam-B_255 (release 3.0) Family This family includes members that bind FAD. This family includes the flavoprotein subunits from succinate and fumarate dehydrogenase, aspartate oxidase and the alpha subunit of adenylylsulphate reductase. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.32 0.70 -5.63 63 15429 2012-10-10 17:06:42 2003-04-07 12:59:11 19 132 4737 137 4045 50736 24820 363.40 25 69.86 CHANGED DVlVlGuGhAGLsAAlphuc.pGh.plsllspstshtu..soshu.pGulshsh...t...t..DshchthpDshpus......sths.cpshsch.hsc.pusctlt.Lcph....Ghsasp........................t.......hhshpth.Gutptp...........................................cshh.sust.......sGpslltshhpps..hph.....lchh.chhstsLlh......p.......su....plpGshlt............psuphh....phtAp.uVllAsGGhut...................sht.ssss..sssGcGhshuhpsGst.lts..hchhtatPsulh....sssh.......hhpcshpu.G..........ulhls.spGcR..Fh.......................s-hsstchlscuhhtpchptsts..........sphaLhhsp.................................................lsscslcpplsthpcpstthhs.......hsshcc.............................P..............lhsss..hashGGltTDhpucs.............tttspslsGLaAsGpss...suuhcGssphuGsuL ...............................................................................................................................................................................................................DllllGuGh.AG..h....t....AA......l.........p........h.........s.......p.......t.......G..........h....p......l..h..l...l.s..+.....s...................h..tu...............po........h......h.....u......p........G.............G..........h............s...........s.........s....h.................................................................................................t.......D..........s.......h....p..........h...h......t......D......s.l..t.u.u..........................................shh..s.......c....p...p...h.....l....c....h........h.....s......p........p.......u.......s....p.....t....l...p......L..tph..................Gh...asp...................................................................................s...........................h....p...h...G..uh.php..................................................................................................................................R.h...h..h.....s..ss..h.................................s..G...p.......t.......l.....h....p......s......l......h......p.p..s.......h.p.ht.....................lph.h...t..ch..h..s...h.c.Llh....p.................cs...................................ps.h.Gshsh....................................................psu..phh............t.h.p.A..c...ulllAoG.G.hut...........................................................................h...h..t....h....s..o.......s....s..............h......s......o.G.c.G.h.s..h.uh.....cs.Gs...t...ht...s................h....-..h......h...Q....h.....a....P....s...s....h.......................ttth.......................................l.h...s..c..s.h..p..s..p...G...................................................u...h.h.....h......s...t.....p.....G...p..R...ah........................................................t-..h.....s.s..+...c......h....l....u.p..s....h..h........p....h.pttts.............................h.h..hL.hsp........................................................................................................................hs......p...h...l...t...p....p....h.....s..t..h........p.h.s.h......t..h......h..s............................h...s.....hpp.......................................................................h.......................................................l..h.P..ss............+a.....s.h.........G.....G.....l........s.s...p.s.ps.....................................................s....l....GLaA....sG.Ess.......suh..H..G..s.s..c..huusul.................................................................................................................................................................................................................................................................................................................................................................. 1 1281 2555 3442 +2480 PF01494 FAD_binding_3 FAD binding domain Bashton M, Bateman A anon Pfam-B_549 (release 4.0) Family This domain is involved in FAD binding in a number of enzymes. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.95 0.70 -5.40 20 17997 2012-10-10 17:06:42 2003-04-07 12:59:11 14 174 3547 120 7349 32963 12148 284.60 17 67.85 CHANGED cssVL.IVGuGPsGLhhuthLuptGlc......shll-+hsssssts..RAtslppRTMElLcphGltcchtspusspphhutthtss.........................................................cttscl-....t.sussthsshsQsclE.lLhccApppG.splpauTE...lhuhppDtsGVsuhlp...sctsGpp....TlcucYllGsDGs+ShVRcslGlp.hcGptth..hshhslhhcu.sls.........hhhlhsscssGh.................hltPhps...t.hhhhsshs.phpst..ssphsp--stptlcshsussthss.chpthotashssphAccaRpG.RlFLAGDAAHhpPPsGGhGhNsulQDAaNLuWKLAtVLpGpAsssLLDoYssERpsluppllctu ..............................................................................................................................................................................................................plh.llGu.G...sGhh..........h.....A.............h.....h...L....t....p......t.........G...h.p...........................s.h.l......l......E............p..........t...................................................s................t........s..................s.........l...........t.................t.......s.....h..............p......h..........h..............p.......t...........h..........G.......l........h...p....t.....h............t........t...................h........h..h..................................................................................................................................................t..ht.............................................h....h.....h......h...................p.........t...............h.......t.......t.....h........L.....h................p.....t.....h............t...........s.......h..p...h..h....h..s......t..p.....................l......t....h................t........p........t.....p..........t...............t.........h.........h..........................t.t.........t..................................h...p........u......c........h...l.l.........uA.D.....G.........h........p......S....h....l.....R.........p.......h........h............s...........h......t.........h..........t..............t.............................................h.....h.......h.....h......t...h....h......h....t............h..........................................................h......h............t.t....h.................................................................hh.h.s..h........t.....................................................h................h.........................................................t...t..............................t.....h.........................t.........t..........h....................t.................h........................t............h..........h..............t.......................................................h.........................h.........t...................................................h............t...............h............h..............................h...........t.........................h.............h.........h...........t..........p..........h...............h.............t...............s......p.......l.......h...L..........h.G....D.............AAH...............s......h......Ph........t....G....p..........G....h....s.......huh......p...D....s...h.....L...s........h.....l...........t.................................................................................................................................................................t......................................................................................................................................................................................................................................................................................................................... 0 1974 4250 6144 +2481 PF00941 FAD_binding_5 dehydrog_molyb; FAD binding domain in molybdopterin dehydrogenase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1112 (release 3.0) Family \N 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.84 0.71 -4.82 94 4901 2012-10-02 01:00:47 2003-04-07 12:59:11 16 65 2185 108 1498 4277 1822 176.00 27 36.83 CHANGED tphpahpPsolp-hhplht......ttssA+lluGuTslsshhchphhphshl.lslspl.s-Lptlpps..s....sslplGAssohsclhp..........tthsphhs.sLspthptlAu.QlRNhuTlGGNl.............................................ssusshuDhsssLhAlsAplpltssps..pR....plslp-.Fht.....shhpssL.pssEllhulplPhh .................................................................................................t...pah+Psolpcs....lplhs...............tts..pA..c..l.l..AGG..Tslh...h....h+...h.....p........h....h....p...........s....h....l....lsl.s..p.l....EL...p.t.lp....hs...s....................sulcIGAssshs..cltpt..................................................hpp.t..h.......s...sLs...c..t.h....p..t.h.Au.h.Ql.R.Nhu.....TlGGNl.......................................................ssu.s.s......u....DhssshhAh..sApl......p...l.t....s....sp.....u......pR...........pl.s.lp-..Fat......................s.hcs..sL..ts.s.E...llhulhlP.......................................................................................................... 1 466 867 1191 +2482 PF01687 Flavokinase FAD_Synth; Riboflavin kinase Bashton M, Bateman A, Mistry J, Eddy S anon Pfam-B_1221 (release 4.1) Domain This family represents the C-terminal region of the bifunctional riboflavin biosynthesis protein known as RibC in Bacillus subtilis. The RibC protein from Bacillus subtilis has both flavokinase and flavin adenine dinucleotide synthetase (FAD-synthetase) activities. RibC plays an essential role in the flavin metabolism [1]. This domain is thought to have kinase activity [2]. 19.60 19.60 20.60 20.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.59 0.71 -4.24 139 4738 2009-01-15 18:05:59 2003-04-07 12:59:11 12 21 4531 30 1203 3417 2167 126.90 37 41.33 CHANGED hLG+saplpGpVl+Gpph.GRplGFPTANlph....pshllPtp...GVYsspspl...................................ssp..........................................................ha..ulsslGhpPT.hss....pph..plEsHlh................DF.st...-....lYGcplplpalphlRsEpKFs.ul-pLhpQIppDhptu+.phhs .....................................................................hLG+saplp.GpVl+Gpph.G.Rp.............l.........G.............FPT..............ANltl...............pc.p.hhP.....tp...GVYAsp.l.p.l...............................................s.s.p................................................................................................................................ha....ulssl....G.....h.pPT..h.ss......tch............pl........E.....salh........................................DF...st......D.....l.Y.Gc.p.lpVtahchlRs.Et.K.Fs.u.......l.-tLhpQlppDtppA+phh.s................................ 1 411 772 1021 +2483 PF04703 FaeA FaeA-like protein Mifsud W anon Pfam-B_5784 (release 7.5) Family This family represents a number of fimbrial protein transcription regulators found in Gram-negative bacteria. These proteins are thought to facilitate binding of the leucine-rich regulatory protein to regulatory elements, possibly by inhibiting deoxyadenosine methylation of these elements by deoxyadenosine methylase [1,2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.86 0.72 -3.81 5 310 2012-10-04 14:01:12 2003-04-07 12:59:11 7 14 259 2 35 393 13 62.30 35 44.62 CHANGED MK-cILoalscl.........+uPC+TsEIA-AhGlSAYQARYYLhsLEKEGKI+RSPlRRGAuTLWclpu ..................................scIlpahpp..................pts..s+Tp.-IA-Ahul.o.h.Y.QARtYLhpLEctG..hV..c+..s.ht+Gh..t.h.W....s................. 0 6 14 21 +2484 PF02106 Fanconi_C Fanconi; Fanconi anaemia group C protein Mian N, Bateman A anon IPR000686 Family \N 25.00 25.00 26.30 26.10 19.80 22.60 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.76 0.70 -6.07 5 88 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 36 0 35 71 0 385.40 44 98.24 CHANGED MAQDSssLsoNaQFWMQKLSsWsQASTLETQ+DTCLHLuQFQEFLRQMYEsLKEMDSsA.lIERFPTIGQLLAKuCWNPFILAYDESQKILIWCLCCLhsKEPQNSsESKLNSWIRuLLSHlLSu.RFDh..KEVuLFspuLGYsshDYYPuLLKNMVLSLVSELRENHLNGhNoQcRMSPERVpSLSclCVPLlTLPDh-PLVEALLpYHG+EPQElLsPEFFEAVNEAFLpKKISLPsSAVlSLWhRHLPSLEsAsLHLLEKLlSScpNsLRRl-CaIKDSLLPQ.AACHPAIFRlVDEIFRsALLETDGAPEVlAsIQVFTpCFVEALcKENKQL+FALKTYFPYsuPuLlhsLLQ+Pp-IPpGHWLQsLKHIS-LL+ElVEDps+G..SausPFESWFLFVHFGGWlDlAAEQLLhu..AAEPP-ALLWLLAFYYuPpsGsQQRuQTMVElKsVlu+LhMLaRSssLSApDLQAuA.pssuuDsRsPssQQLVRHLLLNFLLWoPGGHoIApElIT+MAcTsAlp+EIIGFLDpTLYRhsHLulEusRocKLARELLcELpsQV ..........................php.WhpKh..Wtps...po.ps..Dss.HLs.hppFlpphhphlpphs......pph..htp..hLtp.C....hhu..tps.phlh...pL.sh.ptcP.sshp.ths.WhpslLsHllos.cht...tEss..hhpsLGYtshDYastLlcNhV.SLspELctsphsths.hp.pRhssp.phhslS.hClPLlTLs-htPLlEsLLhh..putpspEhLt.-Fh-tVs.......-Ahl.c+ls.....L.s.pulhsLWh+aLPSLEpAhLpLh-plhot.p.....pchpphlc-S.LPp.uAppsuIFhlVs-hF.R....sLhEo-Gs.plhshlQlFTpChlpthppps+Q.pasL+saFP.s..sLshsL.hppPp.tlspth..hhp.L.pIophL+phsE-tptt...............u.ts.aEtWFLhlphGtWschssp.Llh......tutss.tsLLWLLsFhatPpsttppRtpphsphp.hhs+Lhhh.t..p.shshtcLptsh......t.t.p.sh..tpLhh+Llhphhlas.tsh.hs.-hlshhh.ptth.pchh.hl.ph...h.h.p...p.t.cs.t...hhppL.tpL............................... 0 5 6 11 +2485 PF03511 Fanconi_A Fanconia; Fanconi anaemia group A protein Griffiths-Jones SR anon PRINTS Family \N 20.90 20.90 21.00 32.30 19.50 19.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.13 0.72 -4.19 2 47 2009-12-01 10:28:13 2003-04-07 12:59:11 9 1 30 0 22 44 0 61.90 56 5.08 CHANGED REELLlhLFFFSLMGLLSSaLTppsTs-h.KAhclCAtlLpCLE+RKlSWLsLFQLTEpDh+LG .....EcLLV.LFFFSLMGLLSS+Lssps...ssD..KAl.clCApILpCLE+RKI.SWLsLFQLTEsDh......... 0 1 2 7 +2486 PF01149 Fapy_DNA_glyco Formamidopyrimidine-DNA glycosylase N-terminal domain Finn RD, Bateman A anon Prosite Domain Formamidopyrimidine-DNA glycosylase (Fpg) is a DNA repair enzyme that excises oxidised purines from damaged DNA. This family is the N-terminal domain contains eight beta-strands, forming a beta-sandwich with two alpha-helices parallel to its edges [1]. 24.40 24.40 24.60 24.50 24.30 24.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.78 0.71 -3.73 109 5183 2009-01-15 18:05:59 2003-04-07 12:59:11 19 13 3625 70 1224 3585 2246 118.40 30 42.40 CHANGED PELPEVEslpcsL...pp.tlhGppItp.lplt..........psp.lchs.stt......htptlpGpplp.slpRRGKaLlhph......................................................................................ss.........sh.....hLlh..HLuMoGphhh......................................................................................ttt...t+Hs+lh..hph........s.sst.......................pLtasDsR+FGphp ..........................................PELPEVEs....sp+sl......ps...h....lhG...pp..l.p.p.l..t.lp......................s.p..l+h.s.stt..........h.tpLsGpplp.slp...RRGKaLlhch...............................................................................................ps...................st.......slls..HLu..M.sGphpl...............................................................................hstp..s.sp+H.s+..lt...hth............s..ssp.....................................hLh.atD.RpFGth.h......................................................................................... 0 373 766 1025 +2487 PF04750 Far-17a_AIG1 FAR-17a/AIG1-like protein Mifsud W anon Pfam-B_3664 (release 7.5) Family This family includes the hamster androgen-induced FAR-17a protein (Swiss:Q60534) [1], and its human homologue, the AIG1 protein (Swiss:Q9NVV5) [2]. The function of these proteins is unknown. This family also includes homologous regions from a number of other metazoan proteins. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.34 0.71 -5.15 28 524 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 287 0 312 511 38 182.70 21 80.16 CHANGED hpllhHl............................................hsls.ashhhahhpp.sp.............taG..........G+apaLThlsL...................llpslaaslsllsDl...........pppcpLptl.....tDahhsshuFPlulhVslhFWsLah....hDccLlh.......Pchlsph................lP..hW.............LNHshHThshlhlll-hhhpp+paPp+ht...ulshhsshshuYhhW.lphlhthsG.hWlYPlhstluh.t+llhFshusllhhhhYlhuchLss .............................................................................................h..............................................................................................hu...........tpa.paLThlsh....................h.hphhhhhlshltph.......................t..t.hhth........c.hhhshs...hsl....thh.......ls.hh.....aWs.lhh.......hs.phlh.......schhc...h................hP..ha.............l.s.hshHshshlhh.ll-hh..h.........t..h.s.....pht..............uhshh....s...s..hshhYhha...lh....h...h..h.............h...s....u....h........asYPhl.p.....hh.s...h......t..p..hh...h..ashshhhhhhhhhhh..................................................... 0 107 159 243 +2488 PF01581 FARP FMRFamide related peptide family Bateman A anon Pfam-B_666 (release 4.1) Family The neuroactive peptide Phe-Met-Arg-Phe-NH2 (FMRF-amide) has a variety of effects on both mammalian and invertebrate tissues [1]. 15.00 2.30 15.00 2.30 14.90 2.20 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.70 0.77 -6.28 0.77 -2.59 61 1303 2009-11-03 19:08:40 2003-04-07 12:59:11 11 24 73 0 591 1475 4 10.20 46 34.60 CHANGED ppcptahRFGR ......t..ptpahRFG+ 0 210 278 550 +2489 PF02469 Fasciclin Fasciclin domain Bateman A anon Pfam-B_562 (release 5.4) Family This extracellular domain is found repeated four times in grasshopper fasciclin I as well as in proteins from mammals, sea urchins, plants, yeast and bacteria [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.82 0.71 -4.13 122 4881 2009-09-10 22:24:17 2003-04-07 12:59:11 17 137 999 8 2602 4995 1077 128.40 22 47.97 CHANGED sp...hoshhshlp.....su..sLs.ppL.................ss.s.........shTlFA.PoscAF.........tpl.....stsphptLht....spppLpplLpaHllss.ph..tsssltsh......................tp...............lpo..lpGs......plplshp.........ss.........plplss.......Apls..psDl.tssN.....GllHsIDpVL.lP ..........................................................s..hsphh.th.lp.......ts....sLs....ptL....................................................ssts............................shTl.FA....Po............s.pA.F...............................pp.l........................st..t...t.h.p..tlht...............spptlpp.lL........p....a..H...l..l.....ss...th....hss.s..l.tss..................................................................tp...................................................................................lpT......hp..Gp.............plplshp...............ss..........................................plhlss..................upl.h......p.s..s.l.....s..sN...................G..l...l..Hhl.D.p.VL.hP...................................................................................................................... 0 870 1506 2063 +2490 PF02259 FAT FAT domain Sonnhammer ELL anon (Keith and Schreiber, Science 270:50) Family The FAT domain is named after FRAP, ATM and TRRAP. 20.00 20.00 20.00 20.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.01 0.70 -5.47 55 1294 2012-10-11 20:00:59 2003-04-07 12:59:11 18 84 321 0 880 1335 63 334.40 17 12.77 CHANGED usLussAAWtlsp.W-p..hcphlsshp.........................pss.........cpp...FapAlLul..++sphc...cApphIcpuRpllssElou...LsscSYscuYshllplQ.........hLsELEEhlpap.phspt........spchctl....hpsWppRL.ss.pcsl-lWpcllphRsllls.............................................................sc-shphhl+FApLsRKssphslAp+sLppLht.t............................Pplhhs.h+h.Wtps.......pp...........pcAlppL.pchsp.phspsht.t.tp.ht.t...............................p..s.pp.htt.spLhA..+sahchGpaptpl..............p.ht.pshspllpuYttAsphcpphhK.AWcsWAhhNhcllph.tpt.t...................................................................hhpasssAlpsah+SluLups.....puhpchhRLLo ..............................................................................................................hhhtsuWthsp..Wpt......hp.p..h.lp...h..........................................................................................................................pps..........................cht..hhpuhhul........+ppphp.............................phpph..l.ppu..p...phh......hp.......phss.......hs..tp...s..h..p.pth.s..hlh..phQ...............hLsELc....-hhp.h..h.t...................................................pphptl...............hp.tW.pp..R...L............sh..tsshphh.....ppll...thRphhhp.........................................................................................................................................................p..p..h..p.h...h.l...ch...upl...sRKp.s..ph.p...l.uhptLh.p....lhp.................................................................................................spl...ht.hph.at....t....p...................................................pcAh...phL...p......p...h.p..hhppt..............h.......................................................................................................................................................t.......t....ttphh.u....csa...h..hhupahtph.........................................................pp.pthhp.tap.....t..u..s.ph...p.p..hh.............+...u..........at....t.hu..hh....t.p.hhpt.p.........................................................................................................................................................hphhh..ulpsahculth..spp........tshp.h.RlL................................................................................................................................................................... 0 319 487 724 +2491 PF02260 FATC FATC domain Sonnhammer ELL, Wood V, Mistry J anon (Keith and Schreiber, Science 270:50) Family The FATC domain is named after FRAP, ATM, TRRAP C-terminal [1]. The solution structure of the FATC domain suggests it plays a role in redox-dependent structural and cellular stability [2]. 25.20 25.20 25.30 25.30 25.10 25.10 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.33 0.72 -4.51 141 1318 2009-01-15 18:05:59 2003-04-07 12:59:11 15 61 309 3 915 1361 13 32.60 39 1.32 CHANGED p.sL.olptQVppLIppATsspNLuphYh.GWsPaa .......h.sL.sVpsQ.VppLIppATs.cNL.....sphYh.GWssah.... 0 330 502 761 +2492 PF00316 FBPase Fructose-1-6-bisphosphatase Finn RD, Griffiths-Jones SR anon Prosite Family \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.09 0.70 -5.61 12 2715 2012-10-02 15:53:20 2003-04-07 12:59:11 15 11 2198 197 881 2125 479 306.40 43 94.95 CHANGED hTLsphhlppt......usu-.....lstllsulshus+hIuptlp+AuLsp.lGtuuusNspGDtQKtLDVluc-lhhsALcuS.sltshsSEEp--hlsh....stpupasVshDPLDGSSNIDsslSVGTIFuIa................sssssp..shhpsGp-.VAAGYslYGspThLVlohGs..GVptFsLD.s.GpFhLscpsh+lPtcuphauINpuNhpaas.slccaIcchhtstcssp.+sashRYlGShVADsHRhLl+G.GlFlYPust+...spGKLRlLYEstPhAFLhEQAGGtAosG+.cpl..LDlsPpplHpRsslhhGStscVt+hpchhtt. ........................................................................t.TLspal.hp...pptph....ssu-...............Ls.tLl.sslthusKhIsptl...p...+...u..........uL.......s.....s..l..l....G.......t...............u........G...........s........p.....N.l...Q.G...E..s.QpKL..Dlhu...N-...hhhsuLc..sp..s..tlsulA........SEEp..........-..........ch..ls..h.............t..t.....s....p........p.......u...c.YlVhhDPLDGSSNIDl..Nl.S.V.GT.I.FSIa+t.s..........................ssps..spt....ch.L..Q.....PGp.pQVAA.GYslYGsuTh..LVhT..sGs............G....V...p.........s..F..T.....h..........D.....s..s....l.......G............pFh.L..scp.........s..h..+..........l...Pc.c..G..p.........hYulNpGNhhpa.s....s.s.l+cYlc.hpttp..............pust....+sYshRYlG....S.............hVADh.HRsLl+.G.GIahYPustp.........psp..G..KLRL.LYEsNPMAFlhEQAGGt..........Ao...s.....G....p.....pRI...L.Dl...P.p.plHQRsshalGSpppVcclppaht.t......................................................... 0 253 520 722 +2493 PF03320 FBPase_glpX Bacterial fructose-1,6-bisphosphatase, glpX-encoded Mifsud W anon Pfam-B_3515 (release 6.5) Family \N 20.50 20.50 20.60 20.90 19.50 20.30 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.81 0.70 -5.79 111 2306 2012-10-02 15:53:20 2003-04-07 12:59:11 8 3 1790 13 437 1381 1900 308.20 54 95.06 CHANGED -RsL...ulEllRVTEAAAlAuu+ahGRGDKpsADpAAV-AMRphlsplshcGsVV....IGEGEhDEAPM.LYIGEcVG.s..G..pGPp............lDIAVDPlEGTslsApGhsNAlAVlAhuc+GslLcAPDh.YM-KlAVGP........tucGhlD..lstslp-Nl+slAcAhs.+slp-lTVslLDRPRHpclIcclRpsGARl+LIsDGDVAuulssshsc.o..GlDlhhGIGGAPEGVluAAAl+ClGGphQuRLhs.............ps-pEc.pRspch..Gl...Dh..s+lhsh-DLVpupclhFuATGlTcGsLLcGV+a.....psst.spTcSlV.hRucoGTl.RhIcut..H .......................RpLAhEhsRVTEuAALAuh+WhGRGDKNsADGAAVsAMRphLNplshcGplV......IGEGE.....hDE..APM.LY.IGEcVGsG......pGs.t................VDIAVDPlEGTphsAhGps.NALAVlAhu-+.....G.....shLc.A.PDM..YM-KLsVGP........tAtG.s..ID........LshPls-NL+sVAcAhu..Ksls-LsVslLc+PRHstlIpEhpphGsRVhhIsDGDVAuuIhTshs-...o.slDlhhGIGGAPEGVl.oAAAl+sLGG-MQuRLls..............h.s-pEhtR.CcpM..Gl......-.ss+VLpl-Dhs+u-..sllFuATGlTsG-LLcGlph.....push.....upTcoLl..hR.u+opTlRhIcohH.......................... 0 146 291 369 +2494 PF02634 FdhD-NarQ FdhD/NarQ family Mian N, Bateman A, Iyer LM, Zhang D, Aravind, L anon COG1526 Family A pan-bacterial lineage of proteins. Nitrate assimilation protein, NarQ, [1] and FdhD (Swiss:P32177) are required for formate dehydrogenase activity. Structurally, they possess a deaminase fold with a characteristic binding pocket, suggesting that they might bind a nucleotide or related molecule allosterically to regulate the formate dehydrogenase catalytic subunit [2]. 25.00 25.00 27.50 25.80 22.70 24.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.49 0.70 -4.91 203 2755 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 2387 4 651 1935 1212 230.20 33 86.35 CHANGED spEtPlslhl.NG........pphsshMsoP......................sch--hAlGFLhsEGllp.ssp-l..pulpls.....................................pp............t.p..........................lt...lp.........spt...t..h...cc+h..h..sssoGC........Gh..Ght..h.ph...hpth.h.........s.thplsssplhphhpplpp.tpslappTGulHuAulh..s.....ss..p..llhhtEDVGRHNAlDKlhGthhh.........psh.................s................hs...st.....hllsoGRlSsEMV.KsAphGlPlllohSAsTshAlclAcchGlTLlGhsRs..............pc..hslYot.scRlh ...................................................................................spEhPlslh.h....NG..........h.phsshMsoP......................pclE.phslGF.hoEGlIp.s.c-l...h.ulcls.......................................................ts.sss.........................lcls.........spt......thpt..........pc+th..sspouC...Gh..s..Ghpt..lpph.................hpshp.h................s.sh.phs.hs.p.l.hphhppL.pp....t.pt.lh.pp.TGulHuAAhhs.............ss..p....llsh+EDlGRHNALDKLlGtth.p.....p..s.h..............................s.......hpps..........hll.sSuRhShEhVtKuAhhGlslLhuhSAsTsLAlclAcchs.......lTLlGFs.Rs................sp..hslYotspRl.h......................................................... 0 188 402 541 +2495 PF04216 FdhE Protein involved in formate dehydrogenase formation Mifsud W anon COG3058 Family The function of these proteins is unknown. They may possibly be involved in the formation of formate dehydrogenase. 27.20 27.20 28.30 27.90 27.10 27.10 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.25 0.70 -4.69 49 958 2009-09-11 05:40:51 2003-04-07 12:59:11 7 2 907 2 192 561 21 287.00 48 95.45 CHANGED L....t.l........t..t.s.lhhss.tslatpR.....ApRLptLApp.pP.hu-YLpahAplscs.Qppl...lpph.shshs.st.h.t......tpuhPsLstpsh.p-stapt..hLptLlsplpst...s.ssshht..slctLcp...ssssplcthussLLs..........tphstsss..shshFlhAALplhasphAst..lstpshst.spp....shCPlCGohPluSllphssp.p.......Gh.RYLpCuLCpoEW+hlRscCssCspocslphhsl............-stps....sl+AEoCssCpuYlKhlh.p-ccsph-sh.ADDLAoLtLDlhh.p-pGapRsuhNPhLh .............................................................................................................chssscpps...ushIPPLLFPphKslYsRR.....AcR.LRcLA-s...N.........P..LuDYL+FAAhIAcA.Qcsl.....Lh-a...PLp.h.-.ls.s.cl.tcs............pspG.p...PPLDhcs.l.s.R.D..p+.W.pc....LLhuLlAEL+Pc.................hssssLA.......lIEsLEK..............ASspELEshAs.ALhA...........................u-FusVuS......-K.APFIWAAL.S.LYWA....Q..hAsh......IPu+.....A....+..s..........E..aG.....Ep.R....................p.aCPVCGShPVuShVplGss...Q.........G..L.RYLHCsLCETEW.HlV.R.l.K..CSNCEQo.tcLcYWS.L.....................-.sEpA........AlKAESCsDCs.TYLKILY...QE..K...-.....P....c..V.........E.uV.....ADDLASLsLDucM.EpEG.aARSSlNPFLF................................. 0 56 112 156 +2496 PF03147 FDX-ACB Ferredoxin-fold anticodon binding domain Bateman A anon [1] Domain This is the anticodon binding domain found in some phenylalanyl tRNA synthetases. The domain has a ferredoxin fold [1,2]. 21.00 21.00 21.20 21.90 20.60 20.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.85 0.72 -3.81 134 4662 2009-09-10 22:44:18 2003-04-07 12:59:11 9 19 4522 20 1190 3729 2172 94.00 33 12.29 CHANGED S+aPslpR.DlAlllsp..................sl.sss....plh..pslcpss......sp.hLc.slplFDlYp.G..........ppls.p..scKSlAhplhapst-+TLs-p-lsshhppllptLppchsupLR ........................S+FPuspR.DlAllVs.c........................................................s.l..sss......-lh........psl..cps.u.....................sp.....hLp.....slpLFDV..Yp..G...............cp.ls..p..........GcKSlAhsls.hp..s....s.-+TLs--E.....lstshpcllpsLpcchsApLR..................... 0 404 755 1004 +2497 PF00465 Fe-ADH Iron-containing alcohol dehydrogenase Finn RD anon Prosite Family \N 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.24 0.70 -5.82 226 13485 2012-10-02 14:41:14 2003-04-07 12:59:11 14 23 3785 67 2682 11831 4421 355.90 25 79.39 CHANGED P..s.c..lhhG...tG.s.lpplsp..hltph..t........+sLlVo...st.sh..tpt......u.hhcplhp...L..ppsulph.thast.l.pspPshpplpcuhp..hh+......pp..ss.......DsllulGGGSslDsAKuluhhhs.......................................lP............llslPTTuGTGSEso........shuVls..st...ppp..hKh.slts..phhP.......phullDPpLshshPhplsusouhDALsHslEuahs.....................p..ss............s..................hoD...............shuhpulc...hlhptL.ps.hpss.ps.....hpA.Rpph..hhuuhh....A....Ghuh....s...s..uuluh.......sHuluaslu....u.ha.p..............lsHGhssAlh.LPtlh..pash......................tp..htplup...hh.......................sp..............phlptlp....ph..hppl.ulPs.p...Lp-...h.G...ls..p....pp....l....spluptuhps..t..h.s....sPtthstp-.l ..................................................................................................................PtphhhG.tG.u...l.t.p.ls.p......htph....u..........+sllls...ct..slh..p..h...............G...hhsp.lhp....hL..........p..tts....l..p....h.....tla.....s.....t...l....p.s...s......P......o......h.....p.....s......l...p.....c......uh...p......h...h...p.......pt.......p.s...........D...h...l..l.u...lG...GGSs.hD.sAKu.............luhhhp......s.....t............th..............................hP.........................hls.lPT..T..u..G..T..GSEsT.....shuVIo..sp......................psp....hKh..slhs......ph..hP.......s.hullDs....pl.hh.shPtplsA....ssuh..........D..........A..........L..........s.......H.......s....l.......E.u...Y.so.......................ht......uss..............................h.o.D..................sh.u...h..p...ul.c........hl..h.c......t...l.......p....u........l.....p......ss........p....s..................ht..u.....R.....p.....p.....h.....hh.....u.......u.....h.h.......A........G.....h.u.F.....s.......s.......usl.uh................sHuh.u.H..tlu.............uha...c..................................................................ls..HG..hs.sAll....LP..tlh.paNt..............................................s......t+....htph.Ap....hh....................................th..........t.p.t.ttsp....................thlp.tlp............ph...h.p.pl....Gl...Pt..s........Lp-............h...G......lp....c........................pc....l....st....l.uptAhpc.ts....hhs....sPh...s.t............................................................................................................................................................................................................. 0 880 1660 2200 +2498 PF02742 Fe_dep_repr_C Iron dependent repressor, metal binding and dimerisation domain Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family includes the Diphtheria toxin repressor. 20.70 20.70 21.00 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.08 0.72 -4.29 141 3389 2012-10-04 14:01:12 2003-04-07 12:59:11 10 16 2647 96 811 2165 650 67.10 34 34.92 CHANGED TcpGcchAhpllR+HRLhEpFLschLuh.sh-clH--AcplEHhlS-chhc+lsphL..shPppsPHGsPI.Pst .......TpcGcclApplhc+H+l.l.EpFLhphLGl.s.-ps+c-AEtlEHtlS-chlc+lpphl...p.....p.sP........t................ 0 307 589 730 +2499 PF01325 Fe_dep_repress Iron dependent repressor, N-terminal DNA binding domain Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family includes the Diphtheria toxin repressor. DNA binding is through a helix-turn-helix motif. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.40 0.72 -4.03 10 3362 2012-10-04 14:01:12 2003-04-07 12:59:11 14 14 2588 94 779 4553 836 59.00 31 32.07 CHANGED cLscstE-YLcsIYsLpE.spshspssclAEcLsVpPuoVocMlp+Lc+pGYVcYpsY+Gl ....................................shEDYLc.s...I.h..c.L........t.....p.....c....t......s....t....sp...s...s..clApcLsV.S.s.PoVoc.M...lp+L..pcpG..llphp.apu.................. 0 286 561 700 +2500 PF02906 Fe_hyd_lg_C Iron only hydrogenase large subunit, C-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.14 0.70 -5.15 141 3059 2009-01-15 18:05:59 2003-04-07 12:59:11 9 160 948 10 841 2861 59 227.10 30 56.42 CHANGED +hVlsplAPAlRsulG-phuh..st...sG+hhuAL+c.LGFc.pVaDssauADlTlhEEusEhlc.Rl.........pp.st.......h..PhhTS.CCPuWlpasE+haP-.llsplSos+SP.thhGsllKs....hhsp......................th......s.spchhsVuIMPCsAKKhEut.R.-hps.......................cVDhVLTocELuphl+............pt.....sIc.hspLtc...pphDts.h.......s...uouuGtl............F.........GsoGGVhEAslRsshchh.........t..ppht.......hph..hRsh.t.....sh+.sphpl..............................................s........l.+lulspGl..t.ssccllcpl..+s.................G.....................c.....tp.......acFlElMACPGGClsGGG..QP .............................................................................................h..sst....ht..........................................t..hh...t....hh............h...........t...h..........G......ht.............h.h.-.sshst.phsh....tp.Ehhp.+h..................................................pp...tt.................shhoS.sCP...u..alphhEph...........a.P................c...hls.plSs..s+SP.t...................hhGslhKs......hh.s.p...........................................................................................t..h...............s.spclhsVslMPC..hAK.KhEsp...R.s.-.hts.....t....................................................cVDhVlT.o.c....EL.s.p.hlc.............................................pt..........sIs..ht...plts.....pp..h.Dt...h.........................s.......touu...u..hl....................................................................F.........................................u.soGGlhcushp.hht.h....................................................................................................................................h..phshspGh....t.sht.phl.p..th....ct............................s.................................................................................c...........hpal.Eh.sC.....t.GCh.G................................................................... 0 435 630 751 +2501 PF02256 Fe_hyd_SSU Iron hydrogenase small subunit Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3750 (release 5.2) Domain This family represents the small subunit of the Fe-only hydrogenases EC:1.18.99.1. The subunit is comprised of alternating random coil and alpha helical structures that encompasses the large subunit in a novel protein fold [1]. 21.70 21.70 22.00 23.20 21.50 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -8.88 0.72 -3.96 109 999 2009-09-11 10:48:27 2003-04-07 12:59:11 12 61 583 10 362 911 20 58.20 34 10.86 CHANGED tssp-h.hppRtpulYphDpph.s...lR+SaENPtlppLY.ccaLs....cPh.uc+uHcLLHTc.Yps+p .....................t....chhtpRhpuLYptD....pph.....lRpSa-NPtlppLY...c-aLs....cPh.uc+AHcLLHTp.Ypsp......... 0 185 271 316 +2502 PF01032 FecCD FecCD_family; FecCD transport family Finn RD, Bateman A anon Pfam-B_377 (release 3.0) Family This is a sub-family of bacterial binding protein-dependent transport systems family. This Pfam entry contains the inner components of this multicomponent transport system. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -11.87 0.70 -5.60 145 19091 2012-10-02 17:14:55 2003-04-07 12:59:11 13 11 3812 10 3276 15283 1729 306.10 28 92.97 CHANGED ll..ll....hhlhu...L....hhGsh...tlshtplhpslht..................sshtt.........llhplRlPRhlhAllsGuuLuluGslhQslhRNPLAsPsllGlsuGAulusslshhhh............shhsh............hhhshhAh..lGuhlu...shllh.hlu.......httthsshpllLsGlulushhsulsshlh.hh....s.p..............phpslhhWhhGSl..s.stsappl.hhhhshllhshhhhhh.hu+pLslLsLG-chApuLGlslpph+hhhllhssllsusuVuhsGsIuFlGLlsPHlsRh...lh....G.sshpthlshoslhGullllhuDhluRhlht.PhE.l...PlG..lloullGuPhFl....aLlh+ ........................................................................................................h.hhhhhhhu...l........hhGsh....tl...s...h.tp.l.h.p.h.h.ht.......................................ss..tph..................................l.lh.p.h...RlPRhlhullsGu......uLulu.Gs..lh....Q.s...l..h+NPLA.sPsllG..lsuGAu..hu.hsl..slhhh...............................shhsh.................h.h.h..s.....h...h..A.h.....l.G..uhls..............sh.l.l.h..hlu.................t.p.t..t.hs.s..h..p....l....l...L......s......G...l.u...l.u..s.h.h..s...uls..sh..lh...hh.......s...p.................................................php...p..l...h...h...W.h..h.Gol............s...ss......sa...p............p............l..hh...........h.h.....s.hl.l.l....s.lh.h.s....h..h.......h......u.....p.....t.......LslL.s.L.G.-..c..hApuLGls...l...pp...h+...hhhllh..s.slLs......u.....suVuhsG.s.I...u.F.l.G..L.l...s..P.H.l.s.R.h....l.s......G...s.....ct+...hhlP.h.............us..............Lh...G..ull.......Ll.h........u...D....hl...u.....R...h.....l..........h.................Ph.......E.......l....................P.l.G...l.l..s.u.l.l.Gu.P..h.Fl.aLlh+...................................................................... 0 941 1977 2706 +2503 PF04773 FecR FecR protein Bateman A anon Pfam-B_3234 (release 7.5) Family FecR is involved in regulation of iron dicitrate transport. In the absence of citrate FecR inactivates FecI. FecR is probably a sensor that recognises iron dicitrate in the periplasm. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.33 0.72 -3.76 182 4810 2009-01-15 18:05:59 2003-04-07 12:59:11 8 61 1004 0 1571 4630 1229 97.30 25 26.17 CHANGED s................plpTs..s........sp.........ptplt...l.sDG.....op.lpLsssopl...............htttppp..lp....LhpGp....s.hhpssp.ssp.......t....Fplpo.s.sspltstG.T..pF..sl..........t..tpsspsp.....lsVhcGpVp ........................................................................t..h.os..h.Gppt.plp..L..s....D..G.......op..lhLsssSpl...................phshshs.pcp.h...lp.............lpG.E........s.hhc...V.....s+..ssp...................pP.......FhV.......po.........s..p.......s.......p.......l..pV...l.G...T.......pF...p..s..........h......tps.stsp........lslhcGpV............................................ 0 637 1127 1377 +2504 PF02388 FemAB FemAB family Bateman A, Mian N anon Pfam-B_1214 (release 5.2) Family The femAB operon codes for two nearly identical approximately 50-kDa proteins involved in the formation of the Staphylococcal pentaglycine interpeptide bridge in peptidoglycan [1]. These proteins are also considered as a factor influencing the level of methicillin resistance [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.23 0.70 -5.79 46 2929 2012-10-02 22:59:21 2003-04-07 12:59:11 11 8 1044 13 402 2189 375 322.00 26 95.28 CHANGED losp..............................EacpFsppps..ssahQosthtcl+ttpshpschlGlccspspVhuAs.....lLhpthshhuaphhYhsRGP....lhDasspcLlpaahppLpcasKp......ppuLalplDPhlhhphhstsGc.h......s.psptllcphpplGacapGhsp.uass.hQsRaphllcLcshs--plhp.shspps+pslK+upphGVcl+hl.st.-E.........LstFtclhppT.p-R+sap.Rs.sYYpchhcpa.s-p.uh..lhlAplshpchlppLppphpphpppltphppt.tpp.pp.............Kpps+hpphppphps.ppclpcspchhpptup..lsLAuuLalhpspEshYLauGossc.........apcahushhlQachhph.Ahc+GlshYNFhGIsGpF...spss....GVh+FKpsFNupl.EhlG-FhhPlpPhhYplh.phlc.......K ......................................................................................................s.pEaptaspp....sphh.Qs..t.hhp.h+.....p.s..h..p..s.ph..l.u.lt.p..p...t......t...........l..h.sus..................l.l...hp.........h.........h.....h........h.....hhY....scGP.......lh..Da...p....s.t...p...h.l.phhhp....t....Lp.p.a.h.+p......ppslhlhhc..P.h.....hp.....h......t................................s.pthh...p..hp.p.hG.a...pa..pGhpt...th.ss...Q....c.........a.hh.hh.sl.....p.....t.......h.....s.t.c........p........lhp.ph.s.t.p.t+ptl+....p....s....h....p.....h....G....l..plchh...ph....-p.............hp.hFhpl.....hp.....T...tc...+....c...t...at......+...s.....pY..apphhc.ha...t-p...s......hhlA.lshpph.tplp.p.t..ptphtth.tt.........................pt.pphtp....pp..phtt..ppchtp.......hpth......tp.st....lsluuul.hhh..s....pc.shY..h.h.uGossc.........appa......h..us.hhhpaphhph.u.h.p.p.s.hs......hYshh.G.lps.th..................st.p.s......GlhpFKpsa..s...s......l....c...hhG.cF..hshp...hYthh.phh..................................................................................... 0 151 260 364 +2505 PF04023 FeoA FeoA domain Bateman A anon COG1918 Domain This family includes FeoA a small protein, probably involved in Fe2+ transport [1]. This presumed short domain is also found at the C-terminus of a variety of metal dependent transcriptional regulators. This suggests that this domain may be metal-binding. In most cases this is likely to be either iron or manganese. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.45 0.72 -4.01 284 5362 2012-10-01 19:11:18 2003-04-07 12:59:11 9 18 3214 52 1271 3443 369 72.70 22 50.92 CHANGED hsLsp..lp.G......ppspl......tp.lpt..........p................................tthh............p+..LhshGlhsGsplpllp.ps.h..ssslhlpl....................p.sp.......h..s.LppshAppIhVch ..............................................Ltp..hpsG...ppspl..pp..lpt......t.............................tstht...p+LhshG...lh.Gsplpllp.ts.........sh......ssPlhl.pl...................................c..st...p....l...s..LccptAptIhVp................................ 0 485 947 1153 +2506 PF02421 FeoB_N FeoB; Ferrous iron transport protein B Bateman A, Yeats C anon Yeats C Family Escherichia coli has an iron(II) transport system (feo) which may make an important contribution to the iron supply of the cell under anaerobic conditions [1]. FeoB has been identified as part of this transport system. FeoB is a large 700-800 amino acid integral membrane protein. The N terminus contains a P-loop motif suggesting that iron transport may be ATP dependent [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.71 0.71 -4.95 147 3662 2012-10-05 12:31:08 2003-04-07 12:59:11 13 24 2856 55 802 32060 12341 155.20 43 23.61 CHANGED hplALsGNPNsGKTTLFNsLTGup.Q+VGNWPGVTV..EKKpGphp...h......ps.pph.pllDLPGh.....YSL....os..hS.-EtluRcall.....pcc.sDlllNllDAoNLERNLYLThQLlEhs.hPhllALNMhD.AccpG.hpIDhppLpchLG.lPVlsssApcspGlccLhctl ...........................................................................h.pluLlGNPN..sGKToLFN....t....L...............T...........G........u...........p............Q.......+.............V........G.......N.......W............s......G........V........T..........V........E......+....K....p....G..p....h..p......h........................ts......p..p.......h.....p.l..l..D........L.......PG.s............Y...S....L..................ss....h...S..........-......E.....p......l......s.....p.....c.....a.....l.l...................p..p.....p......s...D............l.....l............l............N.........V.....l............D..............A............o.............N.............L.............E............R.............N........L................a..........L...........T........h.......Q..........L...........l...........E..............l............s............h............P......h......l.....l..........u......L...N.M...h.D...h.............A...........c...........+...........p............G........l......p.........I.........D...........h..........c...........t........L.......u.........p.....p......L............G...........s........P.........V.......l.......s....l...s.A.pc..s..c..G.lcpLhpt.h......................................................................................................................................................... 0 308 571 698 +2507 PF00142 Fer4_NifH fer4_NifH; 4Fe-4S iron sulfur cluster binding proteins, NifH/frxC family Sonnhammer ELL anon Prosite Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.62 0.70 -5.21 14 20327 2012-10-05 12:31:08 2003-04-07 12:59:11 13 8 3168 72 446 26260 3397 132.50 67 98.47 CHANGED RpIAIYGKGGIGKSTToQNhsAALAchG.K+VhllGCDPKADSTRLlLtGKsQsTll-hht-cGt..E-lpl--VlhpGatslcCVESGGPEPGVGCAGRGVITuIshLEE.GAYs.D.lDaVhaDVLGDVVCGGFAMPIR-sKAQElYIVsSGEMMAlYAANNIsKGlhKYAcpGG.VRLGGlICNuRpsDpEpEll-paAcplGophIHFVPRDNlVQ+AEhp+pTVhEasPpss....QApEYRpLA++Ih-Nc..hlIPpPloMD-LEsllhcaGlhc ...........................................................................................................................................................................S..T...R...L..l..........L...p....u.......K....A....Q.....s......T.............l........L..............c....h........A.u.........E..............t....G..s..........V......E...............D............L........E..........L....E....D..V....h......+.......h........G......Y.........t.....s............l.......+......C.....V....E.....S...G...........G...P.....E.....P.....G.....V.G...C...A.......G......R....G....V....I...T....u...I....N......F.....L...E....E....p....G....A..Y....-.......-.........l..DaV....Y.D...V........L.G...D....V...V...C..G....G..F..A...M..PI.R...E....s......K...A....Q.E.I.YI.V.hS................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 138 291 373 +2508 PF01794 Ferric_reduct Ferric reductase like transmembrane component Bashton M, Bateman A, Finn RD anon Pfam-B_728 (release 4.2) Family This family includes a common region in the transmembrane proteins mammalian cytochrome B-245 heavy chain (gp91-phox), ferric reductase transmembrane component in yeast and respiratory burst oxidase from mouse-ear cress. This may be a family of flavocytochromes capable of moving electrons across the plasma membrane [1]. The Frp1 protein Swiss:Q04800 from S. pombe is a ferric reductase component and is required for cell surface ferric reductase activity, mutants in frp1 are deficient in ferric iron uptake [1]. Cytochrome B-245 heavy chain Swiss:P04839 is a FAD-dependent dehydrogenase it is also has electron transferase activity which reduces molecular oxygen to superoxide anion, a precursor in the production of microbicidal oxidants [2]. Mutations in the sequence of cytochrome B-245 heavy chain (gp91-phox) lead to the X-linked chronic granulomatous disease. The bacteriocidal ability of phagocytic cells is reduced and is characterised by the absence of a functional plasma membrane associated NADPH oxidase [3]. The chronic granulomatous disease gene codes for the beta chain of cytochrome B-245 and cytochrome B-245 is missing from patients with the disease [4]. 24.30 24.30 24.30 24.30 24.10 24.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.97 0.71 -4.04 209 4658 2012-10-03 10:28:09 2003-04-07 12:59:11 14 103 2096 0 2249 3954 660 129.60 19 25.54 CHANGED tGhh.uhhhhshhhl........lu...h+....s.h....htthshpp....hhh.hH+hluhhshh.hshlH...........shhahhhhh.......................................tthhtthhtp.hhh.......................hGhluhh.......................................hhhhlhlsS......hthhR.+hs............YchahhhHhl.hhh.............................hhllsh .....................................................................................huhhhh..shshl................................ls...h+.........s.l..........h.tt.h.s.hpp...........hhh..hH+h.........lG......hhsh.h..hu....hlH............................sh.sah.h.h..h.hh...........................................................................................hhtth..h..p..p..s.hh.h............................................................hGh...l...u.hl...........................................................................lh.hh.lhh.sS...........hthh.p...+hs................................................achahhhHpl..hh.lhhlhh.h............................................................................................................. 0 573 1182 1798 +2509 PF00762 Ferrochelatase Ferrochelatase Bateman A anon Pfam-B_879 (release 2.1) Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.91 0.70 -5.46 217 4061 2012-10-01 23:23:09 2003-04-07 12:59:11 14 15 3675 58 1013 3123 3342 305.40 29 88.92 CHANGED +hulLLlNlGsP-ss..psVc.aLpp...hh..sD.cllc...lP......hh..hLtthIh.hRst.cssctYppl..s..s....sSPLhthTcpQsptLpptLspt..........................p..h.VhhuMRY.spP.l.pcslpplpp.p..Gs.....ccllllPLYPQYSsoTouoshcpltcsh..tp..............ht..sp....lc.hlpsaascPsalpAhspp....lppthpp.h..............tp.........spLlhShHGlPpph.l.ptGD...............................P.....................................YtppsppTs....chl.tct........................................L.....shpp................aplsaQSR.hGs..tcWLpP.TsctlcpLspp.G.h..+plhlhs.uFsuDplETL.ElshEh.+-hhtctGs...ppaphlssLNssstalpsL...scllppph ..............................................hulLlhNhGoP-s....psVctaLpphh..pD.p.+llc......hs......h..hlt..hl......hRst.cl.............sctYppl.................s.....ssSP..Lh..........th.o..c...pQ.sp...tLpptLsp...........................................p.h..Vhhu..h.+Y.usP..l.ps...sl.pphp.t..p...Gl................cc...llll..sL..YPpYS.soTsuush-pltchh....tp...........................h...p.....lphl.csaaccs.taIpshupp....lppphtph..................................tc............shLlhShHulPpph.h..ctG.D...............................P................................................................Y.ppspcTs.......chltpt.............................................L...............shsp........hthsaQ.S+.hG....t.WLpP.sp-t..lcpL......scp.....G...h.............cplhlhs.s....uFsuDplETL.ElshEs..+....c.hh.h.....ch..Gs......paphhsslNssstaIchlsslltt..h................................................................ 0 298 610 840 +2510 PF04060 FeS Putative Fe-S cluster Bateman A anon Bateman A Domain This family includes a domain with four conserved cysteines that probably form an Fe-S redox cluster. 20.50 20.50 20.50 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.70 0.72 -4.37 19 2487 2012-10-03 08:56:42 2003-04-07 12:59:11 8 151 1933 7 725 1862 213 34.40 47 12.10 CHANGED lLPpoNCGpCGassChsaAptllpscsclscChsh ........lLP.psp.CGpCGY.P.GCcsaAcAl.s.p.s..t.s.p.l.spCsPG................ 0 301 522 632 +2511 PF02941 FeThRed_A FeThRed; FeThRed_beta; Ferredoxin thioredoxin reductase variable alpha chain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 25.20 31.40 24.70 18.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.12 0.72 -4.04 17 112 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 88 8 52 119 22 69.00 49 52.62 CHANGED lGDRVRVpsSVlVYHHPpHRspsFDlp.GhEGEltsllspWpGRsISANLPhhVpFs...........KF+AHL+--E ...lGcRVRVpss.VhVYHhP...+stshDlp.GhEGcVtp.hl...spW+G+...lSANLPhhVpFph..........+Fh.AHL+EDE.... 0 12 35 45 +2512 PF02943 FeThRed_B FeThRed_alpha; Ferredoxin thioredoxin reductase catalytic beta chain Griffiths-Jones SR anon Structural domain Domain \N 19.30 19.30 19.50 23.60 17.80 16.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.48 0.72 -10.95 0.72 -4.34 20 255 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 215 8 133 258 35 99.20 36 71.12 CHANGED csLEsh+KFuEpYAK+oGTaFCsD.uVTulVlEGLA+HK-phGusLCPCRHY..EDKpuE.VtssaWNCPCVPMRERKE.....CHCMLFLT....P-N....FuGppQcIsp-plpcpp ..............h-hh+cFs-paAc.+sGhaFssDtslTssVl.cGLucpK-chGtshCPCRhh..-c+pt-...pt.h.CPCl.hREc..c..........CHChLFlo........-.........t..............t................................ 0 48 107 123 +2513 PF01846 FF FF domain Bedford MT, Bateman A anon [1] Family This domain has been predicted to be involved in protein-protein interaction [1]. This domain was recently shown to bind the hyperphosphorylated C-terminal repeat domain of RNA polymerase II, confirming its role in protein-protein interactions [2]. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.44 0.72 -4.01 149 2377 2009-01-15 18:05:59 2003-04-07 12:59:11 14 41 279 21 1493 2278 3 52.50 24 17.77 CHANGED cscpsFp.pLLc-tp....lsspo.........pWp...chhp..hl.ppDsRa..hsl.....pstpc+.cplFpca ...............tcpsFp.plLc-.tp............lsspo......................................pWp.....chhp......hl.t......p.....DsRa......psl......tstp-R..cplFpca..................... 0 421 703 1108 +2514 PF05013 FGase N-formylglutamate amidohydrolase Bateman A anon COG3741 Family Formylglutamate amidohydrolase (FGase) catalyses the terminal reaction in the five-step pathway for histidine utilisation in Pseudomonas putida. By this action, N-formyl-L-glutamate (FG) is hydrolysed to produce L-glutamate plus formate [1]. 20.40 20.40 20.60 20.50 20.20 20.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.41 0.70 -4.46 188 1531 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 783 10 479 1356 624 215.20 26 74.23 CHANGED sss.lllsssHuGptlP..ssh...thshs.sthth+huh........Dhhscplhpths.th.....sAs..........lltuph....SRhllDhNRss.chsshh.........................stsshshlP....s...sp.lh.....thsts-tppRlpphapPYHsslsphlsphpsp.hshshllssHShsshh...sh.............shplGshas............s...ss....plspthhsthp......ssh.............s...lspNpPYsG.uhhspt...htt.spGhtslplElppsLhhs .........................................p..PlllsssHuGpplP..ssh.....thshs...stht.p.hst.................DhalcpLhpths.th.....sAs..........hlt.upa......uRhllDh.NRss.chs.h...........................stustshlP.....s...s.p.hh..........t.sss-ttpRlpphapPYHpslpthlschpsp.aGhslll-sH.Shssh..h....shh..................hssasl..Gsths..............st..ss....plspthhphhp......sts..............hs.sshNtP.apG..uhhscp........hststpshp..ulplElppshhh................................................................................................ 0 118 271 364 +2515 PF00167 FGF Fibroblast growth factor Bateman A, Sonnhammer ELL anon Prosite Domain Fibroblast growth factors are a family of proteins involved in growth and differentiation in a wide range of contexts. They are found in a wide range of organisms, from nematodes to humans [2]. Most share an internal core region of high similarity, conserved residues in which are involved in binding with their receptors. On binding, they cause dimerisation of their tyrosine kinase receptors leading to intracellular signalling. There are currently four known tyrosine kinase receptors for fibroblast growth factors. These receptors can each bind several different members of this family. Members of this family have a beta trefoil structure. Most have N-terminal signal peptides and are secreted. A few lack signal sequences but are secreted anyway; still others also lack the signal peptide but are found on the cell surface and within the extracellular matrix. A third group remain intracellular [2]. They have central roles in development, regulating cell proliferation, migration and differentiation. On the other hand, they are important in tissue repair following injury in adult organisms [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.52 0.71 -4.34 77 1638 2012-10-02 19:42:32 2003-04-07 12:59:11 13 13 202 252 751 1462 0 113.10 32 52.07 CHANGED RhtpLYsps..saaLplhs.....sG....pV.sGoppc..sstaulLplpus....sh.G..hVtI+usposhYLsMsccGcLY.......u...s..........pphs.s.-ChFpEph.-NtYssYtStha.....................tcshaluls+pG+s++Gp..+s...+ptppsspF.....L ................................................................hpLaspp.....uhaLpl..s...............sG...............pl.s..G...s...p..cc.............ts....a...s...h.....l.pl.sl....................sh..u...hVs..I.pGlp.o.shY.lsM.N.p.c.G.cLa.......u....o..............................pp.hs....s..-ChF..p.E...pl.....E..N.t..YssYtStha.....................................ptaaluLs+p.G.pshcGp...+s+.tp..spFh.................................................................... 0 98 159 373 +2516 PF00370 FGGY_N FGGY; FGGY family of carbohydrate kinases, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain adopts a ribonuclease H-like fold and is structurally related to the C-terminal domain. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.34 0.70 -5.06 13 14797 2012-10-02 23:34:14 2003-04-07 12:59:11 16 35 4137 100 3453 10824 3157 231.80 24 48.40 CHANGED hhlGlDpGTousKAllhs.cpGcllustptphshhpPcs..GasEpDPc-lapsstpslppllppt....tho.ppIpuIulouQt+uslllDcps.cPLhsAIlWsDsRTsshscpLpc..phs.pplhphTG.shhshaolsKLpWlp-pcPchhcphc....pahhs+-YLpa+LTG.....pasTDhosASsohhaslpstcWDcplLphlGIs.s...hLPpllpouclhGplpsphAthhGh..slPVsuGuuDssuuslG ..................................................................................................hlulDhGTous+..s..l...l.....h.........s....p.......p.....G......p..............h....l.......u.......t......t.......p...t....t....h......p...............h......h......s......p.s................G.a...s....E.........p.......c........s..p..........p.........l...a...p...s.....h...t...p.s....l.p...p....s...hpps........................th.p....p...p.......l..t....uI...G..........l..........s.........s......p..........t.......c........s..........h..........l..l.........h.............D.........c...........p..............s....p...............s...................l..................h...........s...........s...........I...................s................W.p..D..p..R..s...s..p...h....s....p....p........l............p...............p..............t....h.................h.........t................p..............t...........l................h................p................h........o.........G................h...........................h.......t................s..........h......h................s......h...s......K.........l.h.....Wl....h.c...p......s.t...h..h....p.p.sp................hhh.h.h....s.sa.lh..a..p.L..T.G....................t.h.s..s..D....h.....o.s..A..u...p.....T..h..h........h..........s.....l.....p..........p......h..........p........W........s........p........p........l........L...p.....h..........h.......s.......l...s...pp..................hl......P......p........l.......h......p........s...u.....p..h..h.G.....p.......h............p...t.t.....h.............t.............t....h....h.....u...................................................t..............h...P........l.s.s..s.s.sDptuuhhG............................................................................................... 0 1153 2092 2847 +2517 PF02782 FGGY_C FGGY family of carbohydrate kinases, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain adopts a ribonuclease H-like fold and is structurally related to the N-terminal domain. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.30 0.71 -4.70 105 14710 2012-10-02 23:34:14 2003-04-07 12:59:11 11 34 4144 98 3368 11639 3173 190.40 23 38.30 CHANGED hthshGToshhhh..ts........spPh...tspuhhsshs...........................s..........shhhh-GspsssGslht.alhp........htthtpttt...................p.hst..t..ssss.s................sthhhh.....Psh.sGpts......Phhsspttusl..................hGhs.sps....huphhpuhlEuluhthRthlcth............tp.......................uts...hsslhhsGGts.cssllh.Ql..................hADshG..hPlhhsps.s-usslGuAhlu..uhAh ...............................................................................................................................h.ohGTushhhh.....ss............pp.sh........s.pu..h..h.s..s.hs..................................................s.tas...h.p.G..s.h.h.s.u.Gs.s..l.p....Wlpct.h................th......h.tpttp.........................p.hs.................p.ts...ss.....................ss.l.h..hl..........Phh..sG.....ts...............P...h.h....s..s.ps+..G......s.h.................................hGl.o...h.s.ss..............ps.clhR...A...s.....lEu....l...u.a...p...h..p.s.....s.l.csh.........ppt........................................................................Ghp........hpplt.l..sGGuu...pssh.h.h.Q.h..................h.ADlhs.......hsl...h............sts...t......E...ssALGAAhhAulu............................................. 0 1123 2048 2789 +2518 PF00498 FHA FHA domain Finn RD, Durocher D anon Prosite Family The FHA (Forkhead-associated) domain is a phosphopeptide binding motif [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.42 0.72 -3.85 221 10726 2012-10-01 21:55:46 2003-04-07 12:59:11 21 374 2041 93 5327 10014 968 70.70 25 13.00 CHANGED hplGRs...........ts.s-ls...ls..............s...tt.lS+pHut...l...phpss.............................phhlpD.....ht....SsNGT.al.....Ns......pplpt.................hpLpsGDh.lplG ...............................................hlGRs.....................ts..s..c.lh.......ls.................................s......t..lS.+.p.Hup...l......phpss.................................................................phhlpD...............................hs..........SsN.G..T...a.l...........Ns.............pplpp............................h...Lp.sGDh.lplG............................................... 1 1945 3258 4503 +2519 PF00771 FHIPEP FHIPEP family Bateman A anon Pfam-B_983 (release 2.1) Family \N 20.50 20.50 20.80 23.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 658 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.96 0.70 -6.20 180 3878 2009-01-15 18:05:59 2003-04-07 12:59:11 15 8 2246 8 680 2701 953 594.50 38 94.91 CHANGED sIlshhllPLPshlLDlhlshNIslSlllLhlulalp+PL-FSsFPolLLlsTLhRLuLNlASTRlILhpGc......AGcVIpuFGpFVlGG.....NhlVGlVlFlILsllpFlVITKGupRlAEVuARFTLDAMPGKQMAIDADLNAGlIspc-A+pRRpclppEucFYGAMDGASKFV+GDAIAGllIhhINllGGlhIGlhQ+shshucAhpsYolLTIGDGLVuQIPALllSsAAGllVTRsu.spp.....shupplhpQlhspP+sLhlsuullhhhullPGhPplsFlhluuhhuhhuahht+pp................................................ttttttsttpt.................tttttpshpshhps-...sltL-lGasLlsll-tsp..sut.LhpRlpslR+plup-hGhllPsl+l+Ds.hpLtssp....YpIplpGlplupuclhsschLAls..ssts..t................pl.sG..htsp.....-PsFGhsAhWIstsp+cpApttGa.............sVVDssoVluTHLscll+ppus-LlGhpEsppLl-pl........ppphPcLV-El.hPchlsluplpcVLppLLpEpVuIRcltsIlEsLs-hu.sps.cDsthLsEhVRtuLuRtIs....pphtss...p.spLsllsLssplEphltpul.pp.tsts.s.......hlsl-Pshsppl..lpplppth...pp.htt........tGtssV.LlsusslRhhlp+llcphh.s.plsVLSasElsssh.pl ....................................llshhllPLPshllDhlhshNIslulhlLhluhahpcsL-FusFPolLLlsTLhRLuLNluoTRlILhpup.................AGcllpuFGpFVlG.G......NhslGlVlFhILhllpFlVITKGupRlAEVuARFoLDuMP..GKQMuIDA....DLsAGlIspppA+pRRpclppEupFYGuMDGAuKFV+GDAIAGllIhhINllGGlhlGhhQasMshupAhppYolLTIGDGLVuQIPuLllShAAGllVTRls..ssp.........shupplhpQLhs.pPp.slhlsuu.lhhlhul.lPGhP....p..hsFlhhus.hlhhh..u.ahh.ppct....p.......................................................t..t......t..p...t..t..t..h.p..t.............ttpttp.t..sh.ss.l.........h.-....sl.tlclGh.p.Llshlctt.p...ttp...L....h..p+..I...cslR+phup-hGhl.hP..l+l.........R...-s..hpLpssp....YtlhlpGlplup.uc.lhssphhsls.......su.ps.hs................pl..sG........sh..............-.P.s..a.G..h.....suhWIpt.s.p..ppAph.Ga.............sVl.-.ssoVlsTHLscllpppss-lhGhpEsppLl-pl........pp.phPcLl--l.h................p..hlslspltcVLppLLpEplsIRDhtoIhEs....Ls....-hu...sh..........p....pD..s.hLsthVRhuLtRtIs....p.ph.h...s.s..........t.spl...plltLssplEphlhp.ul..pt..t..t..tu.t.........hhsl-Pshspplhpphppth..pp....t................h.u.t....ssV.LL..s.s..slR.hlp+hlct..h..s..pl.VLSatElssphp............................................................................................. 1 210 402 531 +2520 PF01269 Fibrillarin Fibrillarin Finn RD, Bateman A anon Prosite Domain \N 19.90 19.90 19.90 19.90 19.80 19.70 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.40 0.70 -5.44 42 616 2012-10-10 17:06:42 2003-04-07 12:59:11 12 7 468 19 394 756 165 215.80 57 78.68 CHANGED lc.H.+atGValsc.sctst..LsT+NLsPGppVYGEKhlphpsp...........EYRlWNPaRSKLAAAIlsGlcph.I+.GuKVLYLGAASGTTsSHVSDIVG.cGhVYuVEFS.RshR-Ll..shuccRsNllPIltDARpPpcYRhLVt.hVDllFsDVAQP-QAcIluhNAcaFLKsGGahlluIKApsIDsTtsPctVFtpElpcL+c...pshcsh-pl.sLEPa-+DHAhVlGha+h ...........................................................cPH.+atGVFls+.G+cDh..LsT+NhsPG.....cuVYGEKRls..l..pss................spKhEYRlWNPF..RS..K...LAAu.Ilu.G.........l-p.la..lcPGuKVLYLGAASGTTVSHVSDlVGP................pGhVYAVEFS+....R...s...G...R..DLl..s.h..A..c..+...R......s..Nl..lPI...l...E...DA.......R..+...P...t.+Y...R.M...L...V...........s..MV..Dl..I.F.u.D..VA..Q...P............DQA.RI.l.ul.N.A.chFLKsGGth.l.I.S....I.......KAsCIDS..Tss..s..E..s.....VF...A..........p.EV..pKL...pp...-phKPhEQl.oLEPaERDHAhVlGhYp..................................................................................................................... 0 137 227 320 +2521 PF00147 Fibrinogen_C fibrinogen_C; Fibrinogen beta and gamma chains, C-terminal globular domain Sonnhammer ELL anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.82 0.70 -4.76 11 4781 2012-10-01 23:56:02 2003-04-07 12:59:11 13 214 1032 239 2185 3828 41 153.80 35 42.42 CHANGED .s+DCp-lhpp..GucoS...thYhIpPcssp.cshcVYCDMcT-sGGWTVhQpRpDGSlsFtRsWcsY+pGFGNls............sEaWLGNDpIppLTptG..shcLRl-LcDapG-pshAhYssFpVps-ss+YpLpVssYpG.sAGsAh........stscoMThHNGMhFSTaDRDND.....ssptsCupp.uGG...WWYspCHuANLNGhYYaGushshp........sGVsWhsWKGp.........YSh+pspMKlRPh .............................................................................s.......thh...l...........................................h.......shC.-........p............t...s.....G.....G....W.h.....l.h....Q....p...............R...............s......G....p........s.....F.......c.t.......Wt.......p.Y.p...G.F..G.p.t................................sEaW..l....G..ctlh....lT......t..t..t................t...........Lh.lp..h......t....s......h.............s....p............h..............h.A........Y.....t...........tF...........l.....t.....s.....tt.p..Y.t.L.t.l..t..t....a...p....G.............................................................p..ps..h.FST..h.D..p........DND............................t.tp.C.u................t.....G..u.....WW.a.......p....t.....C....h..........s....p.....G.................................................................................................................................................................................................................................................................................... 0 849 987 1634 +2522 PF03516 Filaggrin Filaggrin Griffiths-Jones SR anon PRINTS Repeat \N 25.00 25.00 27.30 25.00 18.30 24.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.81 0.72 -3.51 10 278 2009-01-15 18:05:59 2003-04-07 12:59:11 8 19 11 0 38 231 0 53.10 57 24.32 CHANGED -uSRpSsu++HthSo+...ADSSRHSpsGQGQuuuu+o..SR+.pGSSsSQDSDSEGH .....-tuRpSsuc+Ht...Sp+....ADSSRHStsGpG..Q.uS.uuhs..St+.pGSSsSQsSDSEGH... 0 23 23 23 +2523 PF00038 Filament filament; Intermediate filament protein Sonnhammer ELL anon Prosite Family \N 40.00 40.00 40.00 40.00 39.90 39.90 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.07 0.70 -4.96 33 4874 2009-01-15 18:05:59 2003-04-07 12:59:11 16 38 268 31 1988 4185 5 271.60 33 60.33 CHANGED sEKcphQsLNDRhASYI-KVRtLEppNptLEs+lpphppp..p.....tspssphtshYppplp-LRppl-shst-+u+lpl-l-Nhptsl--h+pKaEcE............................hshRpssEs-lssLR..............+slD-toLs+lDLEt+lESLpEElsFLKphHEEElp-Lps....plp......splsVEhDs.s.phD....LsphLp-lRuQYEslAp+N+p-AEphYpsKhpcLppssspss-tlpsuKpElsEhRRpl...QuLphElpu....lpupppuLEppltEsEpRastplpshQstlspLEppLpph+p-MtRplpEYQcLLsVKhALDhEIATYRKLLE.GEEsR .................................................pEKtphptLNs+h.A.sa.l.-.K..................V..R...LEppNt....L-...schp.h.pp...........t..t...s..t.....sha...t.l...p........p..L...+..pp.............l...phtt-p......u...pl..h.plcs.h...p..th-...........c....h..+..t...+..a.Ep.E..................................................hsh.R.p.psEs-h..ssl+.................................................................................................+..slD.p....h.hhs+s.-..LE....tp....l-uLp......-.El...t.a.......L.+.p....a....-...c............El....p..plps.............plt.......spl.l...c..hD...s......s....p...D..............Lsp..hl.t..-...l.R.......s......Q.Y..E.p.l.s..pp.....s..+..t....-...s..E...p.h.a...p.........s.+..................h...............p...c.....lp......t....p.s....s.p......p....s....-.t.l.........p..ps.+.p...E.....l......p.......E...hp....+p..l.................pp.L..ph.El..ps...................................hp.s..................p...ts..L.Esplt-sEpphp...h.tl.pphps..h....lspLEt........pLpp.h+p-h..t.pphpE.Y....p.p.LhslKhtL-.hEIu.TY...R..+LL.E.GE-t.......................................................................................... 0 179 311 802 +2524 PF04732 Filament_head filament_head; Intermediate filament head (DNA binding) region Kerrison ND anon DOMO:DM04896; Family This family represents the N-terminal head region of intermediate filaments. Intermediate filament heads bind DNA [1]. Vimentin heads are able to alter nuclear architecture and chromatin distribution, and the liberation of heads by HIV-1 protease liberates may play an important role in HIV-1 associated cytopathogenesis and carcinogenesis [2]. Phosphorylation of the head region can affect filament stability [3]. The head has been shown to interaction with the rod domain of the same protein [4]. 21.80 21.80 22.10 21.90 21.00 21.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.68 0.72 -3.28 45 462 2009-09-11 21:54:48 2003-04-07 12:59:11 9 4 64 0 185 404 0 84.80 31 16.72 CHANGED tssSSYRRhFGs.............sts.p.uhss...st..ssuSshp...spps.psosu...s......pShttps.sssh...h.........-s.lDFSh.usAlNs......EFKtsRT ...................................psSSYRRhFGs.............shs.phuhss.........usuS.uhp.........spph..psSsu....ss.h...u.Sh..ptpsssssht......h.s...............-s.LDF.S..usAlNs......EFKsoRT.. 0 6 27 78 +2525 PF00630 Filamin Filamin/ABP280 repeat Bateman A anon Prosite Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.48 0.72 -3.44 132 7961 2012-10-03 16:25:20 2003-04-07 12:59:11 14 217 181 75 3933 8351 79 92.40 27 39.51 CHANGED sscssc.sps.....G.G.......l..pts.....hss.pss..pFplpsp.....ss.Gt...t........t....lpl......................tsspst................................................................................................................................................................th.h.....t...lp-pp.....................cG..sap.lpY.pPpps..Gp.aplpVph....ss.......p.plssS.PFp ......................................................................................................................................................................................................................................................s..sspclps............Gs.G...................L....pts..........hss..pss...pFslpsp...............sAs.s.s...................lt.......lpl...............................puP.sst................................................................................................................................................................................................hch....p.....lp.-.s..t...............................-.G.....o.a..s..VpY....h.P.p...p.s.....Gp..a..........pls....Vpa.....ss...................p..cl..P..s..S.PFp............................. 0 1135 1433 2563 +2526 PF01611 Filo_glycop Filovirus glycoprotein Bateman A anon Pfam-B_1023 (release 4.1) Family This family includes an extracellular region from the envelope glycoprotein of Ebola and Marburg viruses. This region is also produced as a separate transcript that gives rise to a non-structural, secreted glycoprotein, which is produced in large amounts and has an unknown function [1]. Processing of this protein may be involved in viral pathogenicity [2]. 25.00 25.00 176.80 176.70 18.80 18.10 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.08 0.70 -5.66 6 92 2009-09-10 21:45:54 2003-04-07 12:59:11 11 1 25 6 0 98 0 322.40 54 65.93 CHANGED +KTSFFlWVIILFQ+shSlP.LGlloNSTLQso-lDphVC+D+LuSTsQL+SVGLNLEGsGVATDlPoATKRWGFRoGVPPKVVsYEAGEWAENCYNLpIKKPDGSECLPsPP-GlRsFPRCRYVHKlQGTGPCsGslAFHKpGAFFLYDRLASTlIYRGssFAEGVIAFLILsKs++cFhpSpPh+EssNhTpsooShYaToolpYphssFGspposhLFclsspTaVpL-ssaTPQFLspLN-TIppssphSNTTG+LlWTlsPslD........oshGEWAFWETKKssocphpucohLSh.hhps+T.......pNhScpuss.....+pohpPus..ssTst.....ssppsssh.plPhpshpsssoppphpss.p .............++TSFFlWVIILFQ+shShP.LGllpNSTLpso-lDphVC+D+LuSTsQL+SVGLNLEGsGVATDlPoATKRWGFRoGVPPKVVsYEAGEWAENCYNLpIKKPDGSECLPssP-GlRsFPRCRYVHKlpGTGPCsGshAFHKpGAFFLYDRLASTlIYRGssFuEGVlAFLILscs++cFhpS+Ph+EssN..hTpssoStYaToolpYphssFGsppophLFclsshTaVpL-spaTPQFLhQLN-TIh.tssphSNoTG+LlWplsPplD........sslGEWAFWETKKs..pphhspp.lsh...pstt.......ps.stt........php.pstp...sTs......s.p.p..h.p...t.hpt..pp........h........................... 0 0 0 0 +2527 PF02097 Filo_VP35 Filoviridae VP35 Mian N, Bateman A anon IPR002953 Family \N 22.40 22.40 54.20 53.70 21.90 21.70 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.96 0.70 -5.23 4 107 2009-09-11 13:24:37 2003-04-07 12:59:11 10 2 30 27 1 61 0 190.00 70 95.46 CHANGED h.GPEhoGWlSEQLMTGKIPlo-lFsDl-NpPs.h.hphhspspsss+ss....sppoQTDshhs..hccVcpTLssLloslpRQssAIESLEsRlTT....LEsuL...KPV.DMuKTISSLNRuCAEMVAKYDLLVMTTGRATATAAAs-AYWsEHGQPPPGPuLYE-sAl+uKlcs.sshVPpuVp-AapNLsSTosLoEENFGKP.ISAKDL+pIhYDHLPGFGTAFHQLVQVICKI....GKDsNhLDhIHAEFQASLAEGDSPQCALIQITKRlPhFQDssPPlIHI+SRGDIPRACQKSLRPVPPSPKIDRGWVClFphQDGKTLGLKI ......................................................................................................................................................................................................................................................LALLLFTHLPGNNTPFHILAQVLSKIAYKSGKSGAFLDAFH....QILSEGENAQAALTRLSRTFDAFLGVVPPVIRVKNFQTVPRPCQKSLRAVPPNP.T.IDKG................. 0 0 0 1 +2528 PF00419 Fimbrial Fimbrial protein Finn RD, Bateman A anon Pfam-B_196 (release 1.0) & Jackhmmer:B2PIN3 Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.27 0.71 -3.98 244 16321 2012-10-02 17:35:21 2003-04-07 12:59:11 15 9 786 91 829 6504 36 150.30 18 66.83 CHANGED lshpu.sl......tsssCsl....s..ssss...................VsFG.slsh..s..plsss.........tht.pshslsls.........Csss.................slpl.thh........us.....ssshs..ssslts....sss..................slGlpl.......................................pps..ss.....h...................t........thsphhshshs.............hpAs.hp..tsssshs.t....GpFsAsuolplsY.p ...................................................................................phpG.pl......hsssCs...l........s..ssst...............sV.sh...G...plss.......s....phpss...........tssst.ts..F...slplp...........tCsss...............................tss.pl...shs.................us.......sss.ss....ss.hlss......sssss.t....................................ulGlpl......................................hss.s..sss.............l.....................thssss.......................s.hshssssss..........hs......................apAphhs..............sssssss...s............GshsAssshslsYp........................................................................................................ 0 59 218 557 +2529 PF04449 Fimbrial_CS1 CS1 type fimbrial major subunit Kerrison ND anon DOMO:DM04212; Family Fimbriae, also known as pili, form filaments radiating from the surface of the bacterium to a length of 0.5-1.5 micrometres. They enable the cell to colonise host epithelia. This family constitutes the major subunits of CS1 like pili, including CS2 and CFA1 from Escherichia coli, and also the Cable type II pilin major subunit from Burkholderia cepacia [1]. The major subunit of CS1 pili is called CooA. Periplasmic CooA is mostly complexed with the assembly protein CooB. In addition, a small pool of CooA multimers, and CooA-CooD complexes exists, but the functional significance is unknown [1]. A member of this family has also been identified in Salmonella typhi and Salmonella enterica [2]. 20.80 20.80 21.30 20.90 20.50 20.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.91 0.71 -4.14 13 326 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 291 4 19 128 3 139.00 45 81.62 CHANGED VpKsITVTAsVDPTl-lLQADGSALPoulplsYhPu.pshpstplpT+IaTNDpoKslpl+LsssP.sLsNlhsPo.ppIPlsVohGGcsLoTous...olcuucL.FusuulsssSsshsLsIuts...Tsussss..AGsYQGlVSlllTQu ...................................................................VQKDITVTANlDusL-hhQsDs..o..uLP.p.sl.chpYhPG...pGL..sapL.T+l.a..SND..TK...cVphpL.luss.QL..l.p...sL.Dso.Khl..slsVThGG..cplps.suu...sh.pAspl.Fs..s..s..ths....suShs.tNLhhuQp....stusL..ps....G.YpGlVSlhlSQ.................................. 0 1 9 13 +2530 PF02432 Fimbrial_K88 Fibrimal; Fimbrial, major and minor subunit Mian N, Bateman A anon Pfam-B_2036 (release 5.4) Family Fimbriae (also know as pili) are polar filaments found on the bacterial surface, allowing colonisation of the host. This family consists of the minor and major fimbrial subunits. 25.00 25.00 29.00 28.90 21.50 21.20 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.57 0.70 -4.94 17 376 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 101 12 10 223 0 219.50 30 89.32 CHANGED huG-lplsGhlTs....cshWsWplGsu.pshsssssch.spssplsIslstst..PlLhG+sh-sh.tG...GsGhsPploa.tuh-Ghsl.......shsssGpuphsLPVp.s-sssphGoLshslstuuslptspspp...................sha.GshhssshshlsGpotssssups.sht.........hhssphsshhp.....shpsuuh.s.shhocuslppltus....Yuuulstsps.chplstssss..hpWpAsLsVolsYp ........................................uGplplsGslTs...pts.WtWpVGsss.ps....h..s..schtcs............tssphslslstss...hPlLhG+sh.t.s..s..sG...tsGhsP.Isa..sspG.sh.......stsssuhhpholPVp..s-sss.ps..Gshshs.h.ptuusls.tshtsp................................a.Gs..h.hs..tsh...s...hl.s.s.p.usshstu.t.o.s.h..........hhssthsshhs.....ss.ss.u.sshs.uh..hs.D..upltplsus.....YAuslststs.-hphcts.sss...hpWpsuLsVoloYp.......................................................................... 0 0 0 3 +2531 PF05182 Fip1 Fip1 motif Wood V, Bateman A anon Pfam-B_4652 (release 7.7) Motif This short motif is about 40 amino acids in length. In the Fip1 protein that is a component of a yeast pre-mRNA polyadenylation factor that directly interacts with poly(A) polymerase [1]. This region of Fip1 is needed for the interaction with the Th1 subunit of the complex and for specific polyadenylation of the cleaved mRNA precursor [2]. 22.70 22.70 23.70 23.80 22.40 22.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.69 0.72 -4.69 36 388 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 282 0 249 374 4 44.70 53 8.76 CHANGED h-hD.l-.shc-..KPWRcPGADloDYFNYGFsEpTWctYCp+QcchRt ................-hDl-..sh..--..KPWR+P.........G...AD..lSDYFNYGFsEpTWcsYCcKQcclR....... 0 86 141 209 +2532 PF02433 FixO Cytochrome C oxidase, mono-heme subunit/FixO Mian N. Bateman A anon Pfam-B_2045 (release 5.4) Family The bacterial oxidase complex, fixNOPQ or cytochrome cbb3, is thought to be required for respiration in endosymbiosis. FixO is a membrane bound mono-heme constituent of the fixNOPQ complex. 24.20 24.20 24.30 24.70 23.80 23.20 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -4.98 56 1201 2012-10-03 10:02:11 2003-04-07 12:59:11 10 7 1028 0 361 915 226 199.50 49 74.82 CHANGED HphlE+NshlLhlhshlsloIGGlVEIsPLFal-sTI.EcV......................................................-GlRPYTPLELsGR-IYlREGCYsCHSQMIRPhRDEVERY....GHYSLAAEShYDHPFQWGSKRTGPDLARVGG.KYSDpWHlsHLhsPpuVVPcSIMPuYsaLtcst.LD...........hssltschpshptl...........GVPYo..-p...............IpsApsDhpuQAssps......s.hsshhp+Y.spAthp.......saDGssttloEMDALlAYLQhLGThVD ..............................................................................................HphlE+Nshllhlhh.ll.sl.ulGGL.V.E.I.l.PLFa...cs.s.hcsl..........................................................cGh+..P.YT..s....LpL.....tGRDIYIREGChsCHSQMI..R....P....F...R...u..E.s...E.R......Y.............G.......+.......Y......Sl.A..G.Ess........Y.........D..H.........PFlW.......GSKRTGPDLAR.VG.................u.+.Y...S....D....-....WHhsHLhsP..RsV..VP-ShMPuYsaLh...csp..lD.................scss.t.tc.hpsh+.p.l...........G.V.P.Yo..--................pIspAtpt.h.....cup.........................................................................................p........................................................................................... 0 111 233 301 +2533 PF01346 FKBP_N Domain amino terminal to FKBP-type peptidyl-prolyl isomerase Bateman A anon Pfam-B_402 (release 3.0) Family This family is only found at the amino terminus of Pfam:PF00254. This domain is of unknown function. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.08 121 3385 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 1650 10 537 2180 296 111.50 31 49.55 CHANGED shuhshsssttssss...........plpopppKhSYulG.hslGpphppphhc........lDhctllpGlpDuh..tssp.tlo-E-hpps...Lpphppclptppptpht..p........ApcNtpcGctFLs-NtpK-GVhsTp.SGLQYcV .....................................................................hts..........................shps..cppsuYulG.hp.lGpph...ppphhct...............ls.-tl.htGlpDuh.....tu.p.ps....tl...st..pp..h..pps....L..pp.hp.pchpsttptp................uptst..scGctFLpcN.tc.+......c.GVpsot..SG.LQYcV.................................. 0 144 275 420 +2534 PF04620 FlaA Flagellar filament outer layer protein Flaa Kerrison ND anon DOMO:DM04570; Family Periplasmic flagella are the organelles of spirochete mobility, and are structurally different from the flagella of other motile bacteria. They reside inside the cell within the periplasmic space, and confer mobility in viscous gel-like media such connective tissue [1]. The flagella are composed of an outer sheath of FlaA proteins and a core filament of FlaB proteins. Each species usually has several FlaA protein species [2]. 25.00 25.00 25.60 25.50 24.70 24.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.02 0.70 -4.93 10 195 2009-09-11 10:09:13 2003-04-07 12:59:11 7 10 102 0 60 164 0 204.90 22 55.90 CHANGED phshsshoslDa...ucsschuasutupth....hccuLshssW.Vthsuu...sp.....tsshhtts.scsputchuus+llGVRlpFsptGpNuhull....pPsatIPhhu.....................................slKoIuVWVaGtuY.asLplLlcDppGch+phhMGpLsFsGWKpLshs.NPsalsss+sR.lp.psshYPsussclshpGF+lccDssccsG-alsYFcDl...........+llhDhhsV-hsc ........................................................................................t....................t....t.tt......h.......pa...hs.s.............................sptt.stt.h..tspplLGV+spFsphu.sshs.hh.........tsshtIshhu..................................................hh.KplsVWVaGtsatasLplhlcDhpGpp....aph.h...G.pLsFpGW+pLp......hs..sPshls.......p.p.s.Ysh.ts.lphhuFtlppsstpthssahhYhc-l...........+llhDhh......p.............................................. 0 29 46 46 +2536 PF03646 FlaG FlaG protein Bateman A anon Pfam-B_2985 (release 7.0) Family Although important for flagella the exact function of this protein is unknown. 21.00 21.00 21.20 21.40 20.80 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.20 0.72 -3.91 139 905 2012-10-01 21:18:35 2003-04-07 12:59:11 10 1 864 1 295 725 111 111.20 22 87.37 CHANGED ststssssstsppsspsststs..................pptppptppp......tt...sppc.lp.cslpplschhps.hpps....LpFpl--cssc.hlV+VlDppos-VIRQIPsEchLclu....cplp-..........ht...........Gl.Lhcpc .........................................................tsts................t.tt.t.................pptt.pttptp..........pp.hspcclp.chl.c.clNchhps...hs..ps.....lpF.pl..c-.c..s..sp.hlV....pVhD.ps..Ts-lIRpIPsEEhLclh....pplp-...............hh.................Gl.lh-p....................... 1 114 218 264 +2537 PF03614 Flag1_repress Repressor of phase-1 flagellin Griffiths-Jones SR anon PRODOM Family \N 24.00 24.00 25.50 65.20 23.80 23.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.88 0.71 -4.68 3 93 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 77 0 3 55 2 147.20 63 91.53 CHANGED DISYGREAElWPRDYSsLARRlQFLRFNDIPVRLVSsNGQlIIGYIuKFNs+ENhI.LASDcsKGsNRIEVKLEpLAoLE....ELsusDuhstoLVssDhFNlQ.hsPSRRDFFSICNKCaKQGVGIKVYMtDGRlLpGcTTGVNACQVGlppuNGNHMQVMFDWVSRI .DIoYGh.AEhWPR-YShlt+tl.FhRhspIPVRls.psuplhshYltth.spcNhI.LASDc.sKGspRIEVKLEpLAhLE....ELsusDs....hshoLVssD.FNlQ.hsPSRRDaFSICNKCaKQGVGIKlYMt.GplLpGcTTGVNACQVGlppuNGNHMQVMFDWVSRI 0 0 2 3 +2539 PF05149 Flagellar_rod Paraflagellar rod protein Moxon SJ anon Pfam-B_6464 (release 7.7) Family This family consists of several eukaryotic paraflagellar rod component proteins. The eukaryotic flagellum represents one of the most complex macromolecular structures found in any organism and contains more than 250 proteins [1]. In addition to its locomotive role, the flagellum is probably involved in nutrient uptake since receptors for host low-density lipoproteins are localised on the flagellar membrane as well as on the flagellar pocket membrane [2]. 21.90 21.90 22.10 27.80 21.60 21.80 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.74 0.70 -5.21 13 124 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 28 0 18 115 0 271.00 42 47.55 CHANGED scc.lothl-s+cph+ptscpDLc+lc-phpphssscscppcpapsp+pc.-chlppNp-pQppsaccIp-LtccLpc.......LupcRpc.V-cRlcttccEcpRcssappFlphuspHpptLppolpNsctulcsssplpshVt-uCcplsuh.p-chppsLu-hhhtlpc-+LctFRshYlshGcLhaKKE+RLEpl-+pIRhsclppEhuh-oLDPNAKcaucscc-Lhtt+ppVppplshlcp+hcpu.csFcPTEcuLht....AGlcFVHPh-ElpctslpRps+lL-Y+phhs .........................ppssthh-s++ch+ppscpDLcplp-sIQcsDh-DucshKRausp.+c+S-.chlpcN.-pQ--sWp+Ip-LERpLQc.......Lus-Rh-EV+RRIEcp-REE+R+s-.appFLclsuQHK+hLcholhNCDhAhcssuhlE-hVsEuCsslcu+.aD+sppcLusLplpV+pEaLEtFRtLYhTLGpLlYKKE+RLEElDRpIRssHlQlEhslETFDPNAKcau-tKK-LhchRtpVEcELthL+-K.spAL-.FpsoEcuL.t....AGl-FsHPh-E.pctslsRRSKhlEY+s+l............... 1 7 13 18 +2540 PF00700 Flagellin_C Bacterial flagellin C-terminal helical region Bateman A anon Pfam-B_41 (release 2.1) Family Flagellins polymerise to form bacterial flagella.\ \ \ There is some similarity between this family and Pfam:PF00669, particularly the motif NRFXSXIXXL. It has been suggested that these two regions associate [2] and this is shown to be correct as structurally this family forms an extended helix that interacts with Pfam:PF00700. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.57 0.72 -3.82 33 7466 2009-01-15 18:05:59 2003-04-07 12:59:11 16 27 2537 11 1427 5887 1917 81.80 34 21.70 CHANGED lshlspA...lsplsshpupLGssQsRlcpssssLsspps.lssuhsclccVDhsctsochsphphhhQsuhulhAphspl..slLshL ...........................ttlDsA...lspls.shRusL...GAhQN.R.l.p.ps...l.s...NLsst.......spN.l....suu.p.ScIcDsDhApEsssho+..tplLpQAutuhLu.Q.AN.phs.p...slLpLL....................... 0 485 932 1156 +2541 PF00669 Flagellin_N Bacterial flagellin N-terminal helical region Bateman A anon Pfam-B_37 (release 2.1) Family Flagellins polymerise to form bacterial flagella. This family includes flagellins and hook associated protein 3. Structurally this family forms an extended helix that interacts with Pfam:PF00700. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.36 0.71 -4.38 34 9093 2009-01-15 18:05:59 2003-04-07 12:59:11 15 29 2631 13 1625 7111 2324 128.50 32 35.14 CHANGED IsTshhu.ssppslspspsplsputpclooGh+hssut-.uushuhusshpsphssLsphscssshuhuhlpsspu..Alsphtsshpp.........lppLsstuussssssh..sppshpsplppLhspls.....TsaNGphlhuGsto .......................................................lsss..s...s.pt...l...s...psp...s...t...hsp...shp+L.S...S.GhRIss.AtDD.uu.uhulupphpsphpuLspu.tcNs.scuhuhhQosEu..u.L.s.p...h.s...s.h.LpR.........h+-..L......u...l...Q...u....u...N..u...o..t.oss......D+....su....l.p.pE....l....p....pLh.sc...l.......spl..uspTp.a.N.G.pplLsGs......................................................................... 1 534 1032 1312 +2542 PF01350 Flavi_NS4A Flavivirus non-structural protein NS4A Bateman A anon Pfam-B_211 (release 3.0) Family Flaviviruses encode a single polyprotein. This is cleaved into three structural and seven non-structural proteins. The NS4A protein is small and poorly conserved among the Flaviviruses. NS4A contains multiple hydrophobic potential membrane spanning regions [1]. NS4A has only been found in cells infected by Kunjin virus [2]. 20.60 20.60 21.90 93.70 19.60 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.65 0.71 -4.64 32 3233 2009-09-11 15:45:06 2003-04-07 12:59:11 12 17 110 0 0 3283 0 144.70 60 4.30 CHANGED ssllcslutlPpahtp+stpAhDshYsLhsuEtGoRAa+tAhpElPEulpTllhlshLulhThGlhhhhht++ulu+hslGhlVlssussLhWhuslssspIAGshllhalLhlVLlPEPt+QRS.pDNpLAhhllslhsllGhV .....lsLIpElG+lPpHhsp+spsALDNLshLHTuEpGGRAYRHAlEELP-TlETLhLLuLlullTGGlhLFhhSGKGIGKholGhlClhsuShLLWMApVpspWIAuoIlLEFFLMVLLIPEPEKQRTPQDNQLAYVVIulLTllusV.. 0 0 0 0 +2543 PF01349 Flavi_NS4B Flavivirus non-structural protein NS4B Bateman A anon Pfam-B_211 (release 3.0) Family Flaviviruses encode a single polyprotein.\ This is cleaved into three structural and seven non-structural proteins. The NS4B protein is small and poorly conserved among the Flaviviruses. NS4B contains multiple hydrophobic potential membrane spanning regions [1]. NS4B may form membrane components of the viral replication complex and could be involved in membrane localisation of NS3 and Pfam:PF00972 [1]. 25.00 25.00 81.70 81.20 18.10 17.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.49 0.70 -5.45 35 3305 2009-01-15 18:05:59 2003-04-07 12:59:11 12 19 110 0 0 3287 0 244.40 64 7.42 CHANGED NEMGaLE+TKpDlttlFht.ppp...ptpssshp......slDL+PAouWuhYushsolhTPhlhHhlpophtshuhuuluupAssLhsLspGhPFhslchsVshLslushsphTssoLhsulsLsshHauhllPGhpAphs+pAp+pshuGlhKNssVDG.lssDlsch-stsshhEKKlu.llLlsLslsulllsRsshuhtEAusLsoAAlspLh-ssssshWshssAsGhsslh.RGsahuslsl...sWshhpssc ....NEMGhLEpTK+DLG..hGphtt....pp.s.p.u.....h..LDlDL+PASAWTLYAVATTllTPMLRHoIENoosNlSLTAIANQAslLMGLsKGWPlSKMDlGVPLLAlGCYSQVNPlTLTAAlLLLlsHYAIIGPGLQAKATREAQKRTAAGIMKNPTVDGIssIDL-Pls.YDsKFEKQLGQlMLLlLCssQlLlMRToWALCEAlTLATGPloTLWEGuPG+FWNTTIAVSMANIF.RGSYLAGAGLhFSlMp......p......... 0 0 0 0 +2544 PF00972 Flavi_NS5 Flavivirus RNA-directed RNA polymerase Finn RD, Bateman A anon Pfam-B_200 (release 3.0) Family Flaviviruses produce a polyprotein from the ssRNA genome. This protein is also known as NS5. This RNA-directed RNA polymerase possesses a number of short regions and motifs homologous to other RNA-directed RNA polymerases [2]. 19.00 19.00 19.00 19.10 18.90 18.90 hmmbuild -o /dev/null HMM SEED 649 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -13.14 0.70 -6.64 7 5252 2012-10-02 12:54:00 2003-04-07 12:59:11 15 19 173 49 1 4283 0 449.30 66 21.01 CHANGED Th.sDlsLssGTRuVsttpsh..shptltcRlp+L+pEapsTWahDp-HPYRTWpYaGSY.scsoGSAuShVNGVVKLLohPWDsltpVTpMAMTDTTPFGQQRVFKEKVDT+s.-P.tGTRclh+ssspWLachLu.+cKpPRlCT+EEFIsKVRSsAAlGAhhpEpptWpoApEAVpDs+FW-LVDcERphHLpG+CcoClYNMMGKREKK.uEFG+AKGSRAIWYMWLGARFLEFEALGFLNEDHWhSRENShuGVEG.GLphLGYlL+-lup+pGGhhYADDTAGWDTRITcsDL-sEphlhphM..sscH+tLApAlhcLTYpNKVVKV.RPsscGt..slMDVISRRDQRGSGQVVTYuLNThTNhcVQLlRhhEuEGVIptpchpp.....hhtlptWLpcpGp-RLpRMAlSGDDCVV+PlD-RFusALpaLNsMuKsRKDIspWcPS+GWssWppVPFCSHHFHElhhKDGRslVVPCRsQDELIGRARlS.GsGWsl+ETACLuKAYAQMWsLhYFHRRDLRLhu.AIsSAVPscWVPTGRTTWSIHuptEWMTTEDMLcVWNRVWIp-NPaMpDKTslpuWcDlPYLsK+pDhhCGSLIGhppRATWAcsIhsulppVRplIGpEc.....YsDYhssMcRYpttt-.t ..................................................hE.DVsLGuGTRtlsh.....ss.phItpRIppl+pEat..poWHhDpppPY+TWsYHGSY-s+.oGSASShlNGVV+LLoKPW.Dsls.VTphAMTDTTPFGQQRVFKEKVDT+s.cs..Gst.lhp.TspWLWt..Lu.+pKpPRhCo+EEFhpKVpoNAAhGAhF.-pNpWpoA+tAV-D.cFWchVccERphHh.GcCtoClYNMMGKREKK.GEFGKAKGSRAIWaMWLGARaLEFEALGFhNEDHWhuRcN.ShuGVEG.G..Lp....+L.GYILR-..luph....GGthYADDTAGWDT.RIT.tDLpNEth.lh.p.h..csEHt.LApuIhcL.TYppKVV+V.RPs.pG...TVMDlISRcDQRGSGQ.VsTYuLNT.F.TNhtsQLlR.MEuEGlht...ph.p......t.tl.pWL.p.G.ERLpRMAlSGDDC..VV...KPlDD.RFA.s.uLhhLNsMuKlRKDI..pWpPSpGW.sWppVPFCSpHFppLlMKDGR.lVVPCRsQDELlGR..ARlS.GA.GWsl+-TACLuKuYAQMW.LhYFHRRDLRLhusAICSAVPspWlPTuRTT.WSIHuttpWMTTE.DMLpVWNRVWIp-NPWM..EDKT.VpsWp-lPYlGKRED.WCGSLIGhpuRATWApNI.sAIpQVRtlIG.p.E.p.....YhDY.MsSh+Racpp.......................... 1 0 1 1 +2545 PF01570 Flavi_propep Flavivirus polyprotein propeptide Bashton M, Bateman A anon Pfam-B_304 (release 4.1) Family The flaviviruses are small enveloped animal viruses containing a single positive strand genomic RNA [1]. The genome encodes one large ORF a polyprotein which undergos proteolytic processing into mature viral peptide chains. This family consists of a propeptide region of approximately 90 amino acid length. 27.60 27.60 28.20 28.30 25.60 27.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.42 0.72 -4.26 9 5590 2009-09-11 10:42:09 2003-04-07 12:59:11 12 23 116 2 0 4419 0 80.40 58 3.84 CHANGED s+pG-shMllstp-+sculhh+sssG.NhCslhAhDlGchC-DTlTYcCPplsp.sEP-DlDCWCssss.saVpYGpCspsucpRRp+ .........o.RsGEPpMIVsppE+G+uLLFK.TusGlNhCTLhAMDLGEhC-DTlTYcCPhlsp.sEPEDlDCWCstos.saVpYGpCo.p..sucpRRp+........... 0 0 0 0 +2546 PF02525 Flavodoxin_2 NADHdh_2; Flavodoxin-like fold Bashton M, Bateman A anon Pfam-B_1456 (release 5.4) Domain This family consists of a domain with a flavodoxin-like fold. The family includes bacterial and eukaryotic NAD(P)H dehydrogenase (quinone) EC:1.6.99.2. These enzymes catalyse the NAD(P)H-dependent two-electron reductions of quinones and protect cells against damage by free radicals and reactive oxygen species [1].\ This enzyme uses a FAD co-factor. The equation for this reaction is:- NAD(P)H + acceptor <=> NAD(P)(+) + reduced acceptor. This enzyme is also involved in the bioactivation of prodrugs used in chemotherapy [1]. The family also includes acyl carrier protein phosphodiesterase EC:3.1.4.14. This enzyme converts holo-ACP to apo-ACP by hydrolytic cleavage of the phosphopantetheine residue from ACP [2]. This family is related to Pfam:PF03358 and Pfam:PF00258. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.50 0.71 -4.81 247 7847 2012-10-03 05:08:30 2003-04-07 12:59:11 12 15 2899 188 1533 11821 2314 183.60 23 92.92 CHANGED h.+lLll.ups.........ph.t.......uhspplspthhpthpptt......p....Vph.pDLhp......s.ht....................................................s.tp...t.hss....tt-p..pplhtADhllhthPha.auhPuhLKsalDpVhptGh.....sa.........t......tsLh.....GK.cshlhsop.Gus.hsht........t.sh-thh...hcshh.tahGhpsl....phhh...hpsht......s.pt.ppthppshp....phtp.l ........................................................................................................................plLllhups..........p.............p.s...s....t.ls....c.t...h....h....c..t....hpptt.............pp....................Vp.h...hD.....Lht...........................................................................t..p...........hsh.......tt-p....pp.lh....t.ADh..llhthPha.h...auhPshLK.t...alD..c...V....h...p..t..Gh.........sa..................t..u......s....t.sh.Lp.....GK...+.h.hl.h.hot....Gu.s...ptht.....................tt.sh..-.h.h..h....hc.t.h.h..t.a.hGhp.....l......tshh........hp.sht.......p...tpthtp.hhtth....h................................................................................................ 0 410 843 1194 +2547 PF02441 Flavoprotein Flavoprotein Bateman A anon Pfam-B_1622 (release 5.4) Family This family contains diverse flavoprotein enzymes. This family includes epidermin biosynthesis protein, EpiD Swiss:P30197, which has been shown to be a flavoprotein that binds FMN [1]. This enzyme catalyses the removal of two reducing equivalents from the cysteine residue of the C-terminal meso-lanthionine of epidermin to form a --C==C-- double bond. This family also includes the B chain of dipicolinate synthase a small polar molecule that accumulates to high concentrations in bacterial endospores, and is thought to play a role in spore heat resistance, or the maintenance of heat resistance [2]. dipicolinate synthase catalyses the formation of dipicolinic acid from dihydroxydipicolinic acid. This family also includes phenyl-acrylic acid decarboxylase Swiss:P33751 (EC:4.1.1.-) [3]. 22.20 22.20 22.20 22.20 21.90 22.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.76 0.71 -4.47 143 8070 2009-01-15 18:05:59 2003-04-07 12:59:11 14 27 4696 49 2213 5827 2565 130.60 29 43.39 CHANGED t+lllulTGusushh.uhcllctLt...cp...............hc..lp.llhopsA.pphlp.ts...............................thhpp..............................................lhsp............................tpshhspls..up...sDhhllsPsossTluKlAsGluDs.Llsps....................shst.h..........p+s.l..l.lsPshssh.....sshs ...................................................................................+llluloG.u.hu.Ahc..uscLlptLp...ct.........................................u......h-....V+.llhop...uA...pphlss.oh.................................tshstpt.....................................................................................................................................lhs-..............t..............................ssps...h..s..+.I.........s.......h...up..h.........sDhhllAP.sousolA+lApGhuDs..Llops..................................................shs..s..............................ppP..ll.lsPu.hsshhh.p.h................................... 0 675 1384 1855 +2548 PF00460 Flg_bb_rod flg_bb_rod; Flagella basal body rod protein Finn RD anon Prosite Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.77 0.72 -7.27 0.72 -4.19 16 12885 2009-01-15 18:05:59 2003-04-07 12:59:11 15 19 2264 1 2918 8471 1594 30.40 34 10.36 CHANGED lhsuhouLsspppplcllusNlANss.TsGaK ..............h.huhoGhs..up.ppphsllusNlANs....s.TsGap.......... 0 910 1827 2357 +2549 PF02120 Flg_hook Flagellar hook-length control protein FliK Mian N, Bateman A anon IPR001635 Domain This is the C terminal domain of FliK. FliK controls the length of the flagellar hook by directly measuring the hook length as a molecular ruler [1]. This family also includes YscP of the Yersinia type III secretion system, and equivalent proteins in other pathogenic bacterial type III secretion systems. 29.80 29.80 29.80 29.90 29.70 29.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.28 0.72 -4.31 344 2733 2012-10-01 19:58:36 2003-04-07 12:59:11 11 7 2004 1 679 2315 298 85.60 22 18.28 CHANGED tpphthtt.spshpphplpLsPscLGplplpl...phpss..p....lplpltsppspstphLcpshspL+ptLpp.......tGlpls.p..hsl..sttsttpt.t ......................tt.......hppstppsplcLcPtcLGplplpl...phsss..p.....h.plphhuppppspptLcpshspL+ppLsp.....pGlpls..p....hsl....stps.t...pt............................. 0 209 424 550 +2551 PF03963 FlgD Flagellar hook capping protein - N-terminal region Bateman A anon COG1843 Family FlgD is known to be absolutely required for hook assembly, yet it has not been detected in the mature flagellum [1]. It appears to act as a hook-capping protein to enable assembly of hook protein subunits [1]. FlgD regulates the assembly of the hook cap structure to prevent leakage of hook monomers into the medium and hook monomer polymerisation and also plays a role in determination of the correct hook length, with the help of the FliK protein [2]. This family represents the N-terminal conserved region of FlgD. A recent crystal structure showed that this region was likely to be flexible and was cleaved off during crystallisation [3]. 21.20 21.20 27.30 27.10 21.10 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.82 0.72 -3.91 135 2310 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 2042 0 554 1527 340 81.20 32 36.84 CHANGED sssssss....ssssssstst.........................s.sssssss.......hspc..s.FLpLLlsQLpNQDPhsPhDsscasuQLAQFSslEthpphNsslpsl ................................................................................sshss..............tst.t.t.t................................stssssss...hsps..s.FLpLLlsQLpNQDPhs..P..h-s.......schhuQhAQhSsVp.thpphNsslps............ 0 176 346 446 +2552 PF02107 FlgH Flagellar L-ring protein Mian N, Bateman A anon IPR000527 Family \N 20.40 20.40 26.50 21.40 19.60 19.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.87 0.71 -4.71 159 1866 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 1593 0 422 1225 436 186.10 37 80.12 CHANGED ts.stsss...GSla..psup..........ssLa..................pDpRAt.....plGDIlTVhlpEs.spAopsusosts+....sushsh....sh..sshh..................uhstts....................t..hshsu.sss...asGsGss..spsssLsuolossVscVLPNGNLhIpGcKplplNptschlploGlVRPcDIs......ssNolsSs+lA-ARIpYuGpGtls-spp.GWLp+hass.lhP ...........................ss....sssGSlapsup........psLF..................p.D.+Rsp.....plGDllTlllpEs..ss.ASKsussstuRsusssh....uh...ssls................shststs.....................s..hpssu.sss...Fs..GpGuu..spuNohsG..olTVsVspV.Ls.N.GNLhlpGEK.p.lt..lNpG..sEhI.RlSGlVcPcDIu.........ssNTVsSsplAD..ARIpYsGpGhl....s.-.u.Q.p.h.G.WLpRaF.s.l.P................... 0 107 239 330 +2553 PF02119 FlgI Flagellar P-ring protein Mian N, Bateman A anon IPR001782 Family \N 25.30 25.30 25.40 26.90 25.00 24.80 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.01 0.70 -5.80 159 1904 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1613 0 429 1324 595 332.20 48 91.94 CHANGED RIKDluslpGVRsNQLlGYGLVVGLsGTGDp....ps.sFTpQSlpsMLpphG..........l.sl...s.............ss............h.cs..KNVAAVhVTApLPsF.u+sGpplDVTVSSlG-AKSLcGGTLLhTPL+Gu...DGplYAlAQGslllGGhuup..Gts....uplphNhPTsGRIPsGAhVERpl......ss.sh.spt.sslpLsLcpsDFo..TApRlscuIN....pth................Gsss......ApulDusolpl..phPts.p.p..tVsFluplEsLpV.....ps.cssA+VVlNpRTGTlVhGpsV+ls.ssAVoHGsLTVpIs-s..tV...SQPs.sh..........u...............tGpTslsspoplslppp.....ssphhhl..ts.usoLs-lVcALNslGssPpDlluILpAlKsAGALpAEL.ll .............................................................RI+DlsslpGVRpNpLlGYGLVVGLs.GTGDp....spo...PFTpQolsNMLpphG..........I..sl..Ps.......tss..................h..phKNVAAVMVTAsLPPF.u+tGQpIDVsVS..S..h...Gs..A..KSLRGGTLLhTPL..+Gs................DGpV.....YAl...AQGsllVGGh..uAp..........usu...SplpsNp.suGRIssGAhlEREl......................Ps.sF..ups...sslsL..pL..p..c..sDFo..sApplscsIN....phh........................G.ss..........ApAlDupolpVpsPpsss....s.............pVpFLAplpslpV.....ss..tstsAKVllNuRTGoVVhsppVplp.ssAVup...GsLoVslscp...p.V...SQPs.PF.....u......................sGpTsVsPpopIs.lppp.....suplh.l...ps.usoLsslV+ALNslGAoPtDLhuILQuh+pAGALcAcLplI...................................... 0 119 250 339 +2554 PF04316 FlgM Anti-sigma-28 factor, FlgM Kerrison ND, Finn RD anon COG2747 Family FlgM binds and inhibits the activity of the transcription factor sigma 28. Inhibition of sigma 28 prevents the expression of genes from flagellar transcriptional class 3, which include genes for the filament and chemotaxis. Correctly assembled basal body-hook structures export FlgM, relieving inhibition of sigma 28 and allowing expression of class 3 genes. NMR studies show that free FlgM is mostly unfolded, which may facilitate its export. The C terminal half of FlgM adopts a tertiary structure when it binds to sigma 28. All mutations in FlgM that prevent sigma 28 inhibition affect the C-terminal domain and is the region thought to constitute the binding domain. A minimal binding domain has been identified between Glu 64 and Arg 88 in Salmonella typhimurium (Swiss:P26477). The N-terminal portion remains unstructured and may be necessary for recognition by the export machinery [1]. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.51 0.72 -4.03 135 1810 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1600 5 419 1076 250 64.00 28 67.14 CHANGED DsVplSspuppl.pphptt.......................h..sstsslcpp.KVpplKpAIssGsYplDscclAcpllpht ....................................................................t.tVplSsttt...p.............................................h...ssss.-lsh-.+V..ptlKpAIps.GphplD..s..p+lAcsllp..h............... 1 135 267 349 +2555 PF05130 FlgN FlgN protein Bateman A anon COG3418 Family This family includes the FlgN protein and export chaperone involved in flagellar synthesis [1]. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.80 0.71 -3.94 154 1850 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1595 3 413 1145 136 134.40 23 91.77 CHANGED M..................ppLhphLp..pphphhpp....LhpllppEppsLtst..c.hp.tLpplspp...Kppllpplpphc..ppptphhtphsht....................h..........ppspltphhpp....lpphhp...chpphNphNspLlppphptspphlshlpsspst.......tsYsssGptps ...................................ptLhphlpp..shlpp....LtslhcpEpptLpts.....s.ss..pLptlscp...KspLlspLsth-....ppRp...ph.pphs.........................................................tpspltpthpp...............lpphhp....pl+phNppNu...hLlptphchspp.hlphlpstpps.......shYsspGp................................................ 0 137 261 336 +2556 PF02465 FliD_N Flagellar hook-associated protein 2 N-terminus Mian N, Bateman A, Yeats C anon Yeats C Family The flagellar hook-associated protein 2 (HAP2 or FliD) forms the distal end of the flagella, and plays a role in mucin specific adhesion of the bacteria [2]. This alignment covers the N-terminal region of this family of proteins. 22.70 22.70 22.70 23.80 22.60 22.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.99 0.72 -3.63 191 2117 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 1839 0 481 1599 330 99.40 27 18.77 CHANGED uSGlD..hssllspLhs.uEptsh...splppppsphpsploAauplpotLsshp.......s.shssL......tp.......s..s..sa.........ps..............pss...osS..ss.......shlos........ousus..As..sGoYsl.pVppLApupp ......................SulDhssllspLhp..A-ctsh....s....lspppsshsschoAa.usLcosLsshp........s.uhssL......sp..............s.....s......sa...............ss..............pps.......ooS....ss...........ss.h.os........o..s.s..us..Ah..sG..s.Y..sl.sVsQLApupp..................................................................... 0 160 302 395 +2557 PF02049 FliE Flagellar hook-basal body complex protein FliE Mian N, Bateman A anon IPR001624 Family \N 22.40 22.40 23.10 22.90 22.00 21.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.97 0.72 -3.96 186 2295 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 2048 0 548 1400 351 98.30 29 92.64 CHANGED tsh.............t..tssssssssssssssssssFushLppulspVsptQp...pusphspshtpG.c.ssslp-VMlAhp+AsluhphslpVRNKllpAYpEIMpMtl ..............................thhpshh..........t.sstspsssspsssuFushLpsu...l...s...cl...sppQp...sAcsts.pphthGc..sssLp-VMlshpKAslohphslpVRNKlVpAYpElMpMpl.... 0 175 345 444 +2558 PF01706 FliG_C FliG-C; FliG C-terminal domain Bateman A anon [1] Domain FliG is a component of the flageller rotor, present in about 25 copies per flagellum. This domain functions specifically in motor rotation. 23.00 23.00 23.30 23.80 22.20 22.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.03 0.72 -4.08 154 2393 2012-10-02 13:19:07 2003-04-07 12:59:11 11 8 2047 8 574 1470 494 109.70 39 32.55 CHANGED lpslc.ppDsclApcI+cpMFsF-cl.hclcscslpplL+pls.scs...LslALKGAspp........lc-plhsNMSpRuuchlc--hcthGsV.+ls-VEpAQppIlphlRcLs-pGcI.ls ..................................pslc.chDs-LApcIp-pMFlFEsL.lc...lDD+uIQclLcEVs.s-s...LhlALKGAspt........L+-KhhpNM..SpRAA-..hlc--l.p.s.h.G..P.V.Rls-VEsAQ+pIltllR+Lu-sGEIhl.u............. 0 199 374 466 +2559 PF02108 FliH Flagellar assembly protein FliH Mian N, Bateman A anon IPR000563 Family \N 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.18 0.71 -4.26 50 2270 2012-10-02 21:03:42 2003-04-07 12:59:11 11 6 1901 0 508 1633 260 133.70 22 54.20 CHANGED ppt.hpplps....lhpplppslpph.......-pp....lpppLlpLslplu+pllt.pclpssPphllshlccsL.tthPhsspplplplpPsDhshlcpths.pthpthta.....plhsDsslspGGCplpossuplD.......uslpsRhcpl .................................................................................................p....tphpt.lhsphppsh.pth...........ppp...........hppcLh.phulp.h....A....+pVlt..p....ph..p...s......s.s.p.t.l......l.p...h.l.p.psL...t...p..sh.....t....s......splpl+V...p....P....-.D....hp....hl...c...c....t...hs....t.....h.p...h...psa...................cl.hsD..ssL.....p........GuChlps-pGp.lD.......uslpsRhpp.......................... 0 169 313 407 +2560 PF02050 FliJ Flagellar FliJ protein Mian N, Bateman A anon IPR000809 Family \N 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.52 0.71 -4.07 147 1890 2012-10-01 21:16:01 2003-04-07 12:59:11 11 4 1724 1 454 1152 113 121.90 22 82.41 CHANGED -cutppLsp...sppphpptppplptLpp.tppa..tpthpsph.......sphpphppalsplcpsItppppplpthcpplpptpppapptptchcphchLhp+ctpppppt.p+pEQ+hhDEhAhpta .......................................................-pAtptLup...hp..pthpp....t....p....pp....Lp..Lhp....hpp....-a....ppphs....s.......sh...t..G.....h.....ss.....sphhs..hpp..Flp.......sLc.psIsppcp....plpphppcl.......-pspppapctppchpshppLp-+pppptthtps+t-QKthDEhAtpt................. 0 149 289 371 +2561 PF03748 FliL Flagellar basal body-associated protein FliL Bateman A anon COG1580 Family This FliL protein controls the rotational direction of the flagella during chemotaxis [1]. FliL is a cytoplasmic membrane protein associated with the basal body [2]. 23.80 23.80 23.80 23.90 23.50 23.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.16 0.72 -4.00 175 2462 2009-09-10 21:49:05 2003-04-07 12:59:11 9 6 1859 0 634 1614 444 101.40 24 62.20 CHANGED shahsl....s...shslNlsss.....tt..+alp.lplslps....pspps.....hpplcp.....phP..h.lRssllthluspshp.....-lpss.pGpppL+pclhcpls.hl.............hpsp...............................lpsVhFosFllQ .....................................ahsL....c...shslNLsss.....ttp..+hlp.lslsLpl..........pspts.........tsplpp.................thP........lRspllhhhup.....ps.h.p.....pLps..pG+.ppL+pclppplsphL.............stsp............................................lpcVLaTsFll..................................... 0 199 394 512 +2562 PF02154 FliM Flagellar motor switch protein FliM Mian N, Bateman A anon IPR001689 Family \N 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.01 0.71 -4.64 7 1999 2012-10-01 19:50:22 2003-04-07 12:59:11 10 2 1927 3 493 1477 440 191.20 37 56.93 CHANGED YDFppss+hS+EplpoLphla-pFARhhoouLpshLRphlclplsuVcphsYtEFlpSlPsPTsLslhphcPLcGsullplsPolsFshlDpLhGGcGpshs...csR-hTcIEppllppllchlLtsh+EAWpslhslcschschEsNPpFspIVs.PsEhllllsLclclGchpGhhNlClPahslEPIhphL .............................................................YDhpp.cRls+-+.lpsL-hIpE+FARphphsLhshlRp.ss-lslsulc.hs.Y.p.EF.h.c......sLP.s.P.T.s.L...N....l....l..c......h.....c.....P.....L...+.....G...o.....u.....L.l.hhsPsLlFhhlDsLaG.G.cG.+......h........st.......-uRE..F..T.p.h.E....pc....l....lp....p....lLclsh....p....s....hp-AW....p.......s.l....h..s.....l...c.s....c...a.h..c.....s....E..h......s.s..p....a.....ss....I..so...Ps-.l.Vlh.ss.aclcl.G.s.h.s.GphsI.ClPashlEPlp-hL................................................................. 1 173 321 407 +2563 PF04347 FliO Flagellar biosynthesis protein, FliO Mifsud W anon COG3190 Family FliO is an essential component of the flagellum-specific protein export apparatus [1]. It is an integral membrane protein. Its precise molecular function is unknown. 21.10 21.10 21.10 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.88 0.72 -3.94 108 1689 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1647 0 458 1182 140 88.50 26 54.79 CHANGED AWLl+Rh......t....s..sss.tssstl+llushslGs+E+lllVcVssp.....llLGVTspplshLcp....l......................Pssssts.st.s...............................FuptLpphhpp ....................sWll+.Rh...............t...sstt..ssspsL+lluutuLGs+c+lllV..c.Vscp.....pllLG.VTs.s.pIslLcp.....L....................P.ps.tt.t.st............................................Ftphhpphh.......................................................................... 0 155 289 373 +2564 PF00813 FliP FliP family Bateman A anon Pfam-B_1679 (release 2.1) Family \N 23.70 23.70 24.40 24.10 23.60 23.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.27 0.71 -4.60 141 3505 2009-01-15 18:05:59 2003-04-07 12:59:11 15 4 2195 0 652 2050 491 188.20 45 81.00 CHANGED llTlLoLhPulllhhTSFTRIllVLulLRpALGhQQsPPNplLlGLALFLThFlMsPVhpphhpsuh.........pPhhstpl.....................shppAhpputtPh+pFMhcpT....cpp....-LthF.....hclu..........ptt.....sp.....shc-ss......hhlLlP..............AFhlSEL+sAFpIGFllalPFLlIDLVVASlLMuMGMMMlsPshISLPFKLlLFVLlDGWsLlltuLl .................lThLollPslllhhTSFs+lllVhulLRsALGhQpsPPN.lLhGlALhLThFlMsPVhpclhppuh.....pP..h..h..ptpl.....................ohppAhc.c..utpPh+pFMl+pT....ccp.....-lthF.hclu..........................ptt...............p..s.csss...hhlLlP..............AFslSELKoAFpIGFhlalPFLlIDLVVuSlLMAhGMMMlsPshISLPFKLlLFVLlDGWsLlltuL.............................. 0 200 389 513 +2565 PF02561 FliS Flagellar protein FliS Mian N, Bateman A anon COGs Family FliS is coded for by the FliD operon and is transcribed in conjunction with FliD and FliT, however this protein has no known function. 22.80 22.80 22.90 22.80 22.20 22.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.40 0.71 -4.26 6 2094 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1843 11 478 1234 391 123.60 32 91.09 CHANGED hps.shpAYpQs....pVpTAoPtcLllMLY-cuIppLppAtc.....shpspchc+tscpI.+Ap-IIo.ELpsoLDhEpGtclupNLhulYsahh+pLhpANlcp-ssclspVhshh+sLp-AW+clhps. ............................................hs....uhpsYt....ps.......pl....h....s....A....SPppLlhMLa-GslptltpA+h........thp.pps.....htt+sttlsKAhsIlp.tLpssL-h..E....p..G.....u.....Elu..psL...tuLYsahh.pc..L..h..p..A.N..l....c..s.D...s...p.tl-EV..sllpslt-AWcph..................................... 0 163 309 394 +2566 PF01698 FLO_LFY Floricaula / Leafy protein Bashton M, Bateman A anon Pfam-B_1633 (release 4.1) Family This family consists of various plant development proteins which are homologues of floricaula (FLO) and Leafy (LFY) proteins which are floral meristem identity proteins. Mutations in the sequences of these proteins affect flower and leaf development. 20.20 20.20 20.20 20.70 19.80 20.10 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.53 0.70 -5.61 38 1844 2009-09-13 17:08:47 2003-04-07 12:59:11 11 5 1128 2 26 1110 0 134.20 55 97.00 CHANGED MDP-s.F.....oAu.......hFK.WD....P+ssh.........s.Ps.....spl..pths.s.......sP...............s.ssht..hR...................uLE-LFpuYGVRYhTsAKIuELGFTssTLlsM+-EELDDMMsoLuclFRWDLLVGERYGIKAAVRAERRRL--...................................-..s+R+t..............ssDsss..sLDALSQE....G.LSEEsstp...h..uuuS..........................GGsu.sshthhshs......pc.........c++t....+p+++ptcc............................t.p..t.tttstts..........uGssGt....ERQREHPFIVTEPGEVARGKKNGLDYLFHLYEQCRcFLlQVQsIAKERGEKCPTKVTNQVFRYA.KKsGASYINKPKMRHYVHCYALHCLDEEuSNsLRRuaK.ERGENVGAWRQACYpPLVslAucpGWDIDulFNuHPRLuIWYVPTKLRQLCHhERussss ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................FcLYEQCtcFL.pV.QplAKE+GEKCPTK.......VTNpVFRaA.KhsGA............................................................................................................................................................... 0 11 21 24 +2567 PF00624 Flocculin Flocculin repeat Bateman A anon Pfam-B_51 (release 2.1) Repeat This short repeat is rich in serine and threonine residues. 20.80 20.80 21.60 20.80 20.60 20.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.32 0.72 -4.12 173 1092 2009-01-15 18:05:59 2003-04-07 12:59:11 13 66 28 0 906 1281 2 43.60 44 27.58 CHANGED tsso.Toho.sWos.shooTaSThhsThoss-G..pTTcTIYaVtTPh ...........h..to.Toho...sWTG.oh.ToThSTph.TThTGo-G....sT...sETlYhVcTPh..... 0 41 549 884 +2569 PF05202 Flp_C Recombinase Flp protein Studholme DJ, Finn RD anon DOMO:DM01865; Family \N 25.00 25.00 63.30 62.60 24.40 18.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.57 0.70 -5.16 8 18 2012-10-02 14:09:14 2003-04-07 12:59:11 7 2 15 12 3 31 0 194.40 52 58.85 CHANGED uspluccloKllcs-.csIWpllsplhsoI-ppoppsop+AtYpalLlsTFhNCCRtSDLKNsDPpTFEllpN+aLG+llRAhVsETKTRpsRaIYFFPlp.G+sDPLlALa-aLppspPl.K..oRTScpcoc.Q-aQLLRDoLlpsYDRFluKcuspulFuIhpGPKSHLGRHLMuSYLSpsphschsosaGNWSAuccphpSsVARu+YsHs.ppslPscLFAFLSsYYtcsspGch......cLhssp .h...s.th.+hh..p...hathh..hhp.hp..sh.sphps.hphhh.hohhNCsR.sDlKNhDPpoFcll.spaLGhhlpshVs-TKTph.RalYFassp.sthDPllhLcphhp.spPh.K..shossppsp.QcaQLL+-sLltsYs+hltKpss.ulFuIhpGPKSHlGRHLMsSaLSh+sLsELssllGNWS...DctuSuVARosYoHp.hsuIPDHhFAhlScYYshsP.uKphlsh..KDcs.P........ 0 2 2 2 +2570 PF04964 Flp_Fap Flp/Fap pilin component Bateman A anon COG3847 Family \N 23.90 23.90 23.90 23.90 23.60 23.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.16 0.72 -4.36 26 1017 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 521 0 401 913 41 46.60 34 72.55 CHANGED Fh+-E.sGATAIEYGLIAuhIAlsll..ssssslu..ssLpstFsplusul ...........FhcDE.pGsTAIEY.GL.IAul.lAlsl..l......susss.lG....ssl.s.shF.sslsst................ 0 120 223 315 +2571 PF03930 Flp_N Flp; Recombinase Flp protein N-terminus Finn RD, Studholme, DJ anon DOMO:DM01865; Domain \N 25.00 25.00 25.20 42.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.67 0.72 -4.22 7 12 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 10 12 1 23 0 80.70 32 19.65 CHANGED hss.....cchstlKhoTFhKYpppIppolpaDhsspsVpFcYHLKcsp-LpcsLccshtPhpFpl...pupKKssshhplhuuhc.+hp ............cpsstlKhuTFhKYpphIupoLpaDhsspslpFcYHhpcspcLpcsLcphhtshpFsl...tsp++.ssh.phhuuhphp................ 0 1 1 1 +2572 PF02662 FlpD Methyl-viologen-reducing hydrogenase, delta subunit Bashton M, Bateman A anon COG1908 Family This family consist of methyl-viologen-reducing hydrogenase, delta subunit / heterodisulphide reductase. No specific functions have been assigned to this subunit. The aligned region corresponds to almost the entire delta chain sequence and contains 4 conserved cysteine residues. However, in two Archaeoglobus sequences this region corresponds to only the C-terminus of these proteins Swiss:O29030 and Swiss:029595. 22.30 22.30 22.90 23.90 22.20 22.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.23 0.71 -4.43 66 475 2009-09-12 00:14:48 2003-04-07 12:59:11 11 77 203 0 303 485 67 116.80 37 34.02 CHANGED +IluFsCsasuYuuA..DhAG......ssRhpYPsslRlI+VhCoG+lsstallcAhpcG.ADGVhVsGC+hGD...CHah.pGNhpAccRhphl+chLpplGl-s-RlchtalSuuEup+asphlp-hscclccLG .........................+IluFhCpasuYsu.A..DhAG......ssRhpYPss.lRlIRV.CoG+lsshallcAhpp..G.ADGVhlsGC+..G-...CHah.pGNh.........h...up+RhthlpphLpplGl-s-RlchpalSuuEut+asphlpchscplcpLG................................... 0 150 250 287 +2573 PF02947 Flt3_lig flt3_lig; flt3 ligand Griffiths-Jones SR anon Structural domain Domain The flt3 ligand is a short chain cytokine with a 4 helical bundle fold. 25.00 25.00 36.60 29.60 17.80 16.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.84 0.71 -4.27 3 56 2012-10-02 01:28:15 2003-04-07 12:59:11 9 3 25 12 16 72 0 110.10 67 52.30 CHANGED TPDCYFSHSPISSNFKVKFRELTDHLLKDYPVTVAVNLQDEKHCKALWSLFLAQRWIEQLKTVAGSKMQTLLEDVNTEIHFVTSCTFQPLPECLRFVQTNISHLLKDTCTQLLALKPCIGKACQNFSRCLEVQCQ ....sC.F.aSPI.SSsFt.phtpL...............SDYLLQD.YPVTVAoNLQD-cLCGAhW+LVLAQRWMtRLKTVAGScMptLLEtVNTEIHFVTpCAFQ.s.PsCLRFVQsNIS+LLQ-TspQLhALKPhIs+.......pNFSpCLELQCQ................ 0 1 1 4 +2574 PF04772 Flu_B_M2 Influenza B matrix protein 2 (BM2) Kerrison ND anon Pfam-B_2165 (release 7.6) Family M2 is synthesised in the late phase of infection and incorporated into the virion. It may be phosphorylated in vivo. The function of BM2 is unknown [1]. 25.00 25.00 147.40 147.30 20.00 19.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.42 0.72 -4.00 4 501 2009-09-11 00:30:47 2003-04-07 12:59:11 7 1 487 16 0 121 0 108.60 95 99.99 CHANGED MLEPFQILSICSFILSALHFMAWTIGHLsQIKRGVNhKIRI+sPNKETINREVSILRHsYQKEIQAKETMKElLSDNMEVLSDHIVIEGLSAEEIIKMGETVLEVEELp MLEPFQILSICSFILSALHFMAWTIGHLNQIKRGVNMKIRIKGPNKETINREVSILRHSYQKEIQAKETMKEVLSDNMEVLSDHIlIEGLSAEEIIKMGETVLElEELH.. 0 0 0 0 +2575 PF02942 Flu_B_NS1 Influenza B non-structural protein (NS1) Bateman A anon Pfam-B_198 (Release 6.4) Family A specific region of the influenza B virus NS1 protein, which includes part of its effector domain, blocks the covalent linkage of ISG15 Swiss:Q64339 to its target proteins both in vitro and in infected cells. Of the several hundred proteins induced by interferon (IFN) alpha/beta, the ubiquitin-like ISG15 protein is one of the most predominant. Influenza A virus employs a different strategy: its NS1 protein does not bind the ISG15 protein, but little or no ISG15 protein is produced during infection [1]. 25.00 25.00 246.10 245.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.67 0.70 -5.47 5 839 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 785 7 0 395 0 243.30 96 87.75 CHANGED MADNMTTTQIEVGPGATNATINFEAGILECYERLSWQRALDYPGQDRLNRLKRKLESRIKTHNKSEPEuKRMSLEERKAIGVKMMKVLLFMNPSAGIEGFEPYClKNPSNSNCPNCsWADYPPTPGKYLDDIEEEPENVDDPTEIVLRDMNNKDARQKIKEEVNTQKEGKFRLTIKRDIRNVLSLRVLVNGTFLKHPNGYKSLSTLHRLNAYDQSGRLVAKLVATDDLTVEDEEDGHRILNSLFERF ...MAsNMTTTQIEVGPGATNATINFEAGILECYERLSWQRALDYPGQDRLNRLKRKLESRIKTHNKSEPESKRMSLEERKAIGVKMMKVLLFMNPSAGIEGFEPYCMKSSSNSNCsKYNWTDYPSTPGRCLDDIEE.EPEDVDGPTEIVLRDMNNKDARQKIKEEVNTQKEGKFRLTIKRDMRNVLSLRVLVNGTFLKHPNGYKSLSTLHRLNAYDQSGRLVAKLVATDDLTVEDEEDGHRILNSLFERL........ 0 0 0 0 +2576 PF03506 Flu_C_NS1 Influenza C non-structural protein (NS1) Bateman A anon Pfam-B_980 (release 7.0) Family The influenza C virus genome consists of seven single-stranded RNA segments. The shortest RNA segment encodes a 286 amino acid non-structural protein NS1 [2]. This protein contains 6 conserved cysteines that may be functionally important, perhaps binding to a metal ion. 25.00 25.00 114.50 114.10 18.70 18.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.02 0.71 -4.50 2 111 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 80 0 0 66 0 146.10 98 64.31 CHANGED GELLFNGTILQAESsTMT.ASVEMKGKK.PIDFsPSNIAPIGQNPIYLSPCIPNFDGNVWEATMYHHRGATLTKTMNCNCFQRTIWCHPNPSRMRLSYAFVLYCRNTKKICGYLIA+QVAGIETGIRKCFRCIKSGFVMATDEISLhILpSIKSGAQLDPYW GELLFNGTILQAESPTMTPASVEMKGKKhPIDFAPSNIAPIGQNPIYLSPCIPNFDGNVWEATMYHHRGATLTKTMNCNCFQRTIWCHPNPSRMRLSYAFVLYCRNTKKICGYLIARQVAGIETGIRKCFRCIKSGFVMATDEISLTILRSIKSGAQLDPYW 0 0 0 0 +2577 PF03555 Flu_C_NS2 Influenza C non-structural protein (NS2) Bateman A anon Pfam-B_346 (release 7.0) Family The influenza C virus genome consists of seven single-stranded RNA segments. The shortest RNA segment encodes a 286 amino acid non-structural protein NS1 Pfam:PF03506 as well as the NS2 protein. The NS2 protein is only about 60 amino acids in length and of unknown function. 25.00 25.00 133.60 133.10 21.30 21.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.00 0.72 -4.05 2 203 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 80 0 0 112 0 56.80 99 27.82 CHANGED VKSTNLMAFVATKMLERQEDLDTCTEMQlEKMKsSTKARL+TESSFAPRTWEDAIKD VKSTNLMAFVATKMLERQEDLDTCTEMQVEKMKTSTKARLRTESSFAPRTWEDAIKD. 0 0 0 0 +2578 PF00598 Flu_M1 Influenza Matrix protein (M1) Bateman A anon Bateman A Domain This protein forms a continuous shell on the inner side of the lipid bilayer, but its function is unclear. 20.90 20.90 21.10 23.00 20.20 18.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.86 0.71 -4.83 2 22622 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 22201 27 0 3071 0 152.20 94 61.75 CHANGED SLhs-s.sYlLSlh.sG.hKAElAp+LcshFuGKphDL-uhhEWlKs+.hLoslpKullGhshshhhPp-p..p+RRFlppsLsG.Gssss.hcthlhh.RKh+RploFHtAhEIA.uapuuALh.ChhlhY.phGshohpVhLGhlCAhCEp.Asp ..........SLLTEVETYVLSIlPSGPLKAEIAQRLEDVFAGKNTDLEALMEWLKTRPILSPLTKGILGFVFTLTVPSERGLQRRRFVQNALNGNGDPNN.MDRAVKLYRKLKREITFHGAKEVALSYSoGALASCMGLIYNRMGTVTTEVAFGLVCATCEQIADS....... 1 0 0 0 +2579 PF00599 Flu_M2 Influenza Matrix protein (M2) Bateman A anon Bateman A Family This protein spans the viral membrane with an extracellular amino-terminus external and a cytoplasmic carboxy-terminus. 25.00 25.00 25.50 25.50 24.90 24.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.24 0.72 -4.20 2 21616 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 21060 57 0 4083 0 92.50 89 100.00 CHANGED MSLLTEVETPh+stWECRCsDSSD.LVshASIhGILHLILWIhDRLFFKChYRRh+aGLKRGPuTuGlPESMREEYRQEQQSsVsVDsGHFVNIELE ....MSLLTEVET...PhRNEWECRCsDSSDPLVlAAsIIGILHLILWILDRLFFK...CIYRRFK..YGLKRGPSTEGVPESMREEYRQEQQsAVDsDDGHFVsIELE............. 0 0 0 0 +2580 PF00506 Flu_NP flu_virus_nuc; Influenza virus nucleoprotein Finn RD anon Pfam-B_10 (release 1.0) Family \N 25.00 25.00 27.70 26.80 16.80 16.10 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.60 0.70 -6.01 3 18503 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 18183 20 0 6541 0 474.40 89 99.75 CHANGED MAs.SsKR....S.EphpTuuE.......QpRQTPTEIRKSVupMVsclGEFYIQMCsElGLNsDhEtpLIQNuIAIERhlLuAsD-K+sEap+EKsAR-spcucushDscKTGGslYKRsRDGKsIpapslllLasKEEI+pIaK.oshGsD......AsuGLsHlMIaHSNLNDlsYQRoRALsRsGhDPcLhSLhQGSTLPRRSGAsGsAlKGlGTLVAEAIRhI......KRGlsDRshLR...ut+T+oAYERhhpsLKsKspsusQRALsDQVlcSRNPGsA-IEDLslLARSuLlLRPSVAsKssLPhClYhhA+lothDFpsEtYShVGh-AFcLaNhAp......sFSllRsNDDs....cDKSQLlaMACFGAAYEDLRVlSAlsGTclKPRupLKs+GF+VsusEpVETMsSuLLplRhpaWAshTRSGGNpssscuuuGQISsSPVFAVERsIsh-+QsVcchLohNlEGR-uDs+ssLlKMMc-uhu....pKoEssuFlG+uMF-LSDccKTNPI..sF+poscsFFFttDsAEDYDs .....................................................................MASQGTKR....SYEQMETGGE.........RQNATEIRASVGRMlGGIGRFYIQMCTELKLS.DYEGRLIQNSITIERMVLSAFDERRNKYLEEHPS.......AGKDPKKTGGPIYRR.hDGKWMRE...LILYDKEEIRRIWRQANNGED......ATA..G.....LTHlMIWHSNLNDATYQRTRALVRTGMDPRMCSLMQGSTLPRRSGAAGAAVKGVGTMVMELIRMI......KRGINDRNFWRGENGRRTRlAYER.MCNILKGKFQTAAQR.AMMDQVRESRNPGNAEIEDLIFLARS..ALILRGSVAHKSCLPACVYGLAVASGYDFEREGYSLVGID..PF.+LL.QNSQ......VaSLIRPNENP....AHKS...QLVWMACHSAAFEDLRVSSFIRGT+VlPRGpLSTRGVQIASNENMEsMDSsTLELRSRYWAIRTRSGGNTNQQRASAGQISVQPTFSVQRNLPFERATlMAAFTGNTEGRTSDMRTEIIRMMEuA.......+PEDVSFQGRGVFELSDEKATNPIVPSFDMSNEGSYFFGDNAEEYDs..................................... 0 0 0 0 +2581 PF00600 Flu_NS1 Influenza non-structural protein (NS1) Bateman A anon Bateman A Family NS1 is a homodimeric RNA-binding protein that is required for viral replication. NS1 binds polyA tails of mRNA keeping them in the nucleus. NS1 inhibits pre-mRNA splicing by tightly binding to a specific stem-bulge of U6 snRNA. 25.00 25.00 30.00 30.00 22.90 20.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.19 0.70 -5.20 3 18530 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 18330 77 0 6565 0 214.70 83 95.82 CHANGED MDSNTVSSFQVDCFLWHIRKplADQ-MGDAPFLDRLRRDQKSLKGRGSTLGLDIEsATRsGKQIVERILKEESDEsLKhTIASuPASRYLTDMTIEEMSR-WYMLMP+QKlTGuLhIRMDQAIMDK+ITLKANFSVlFD+LETLlLLRAFT-DGAIVGEISPIPSLPGHTNEDVKNAIGILIGGLEWNDNTVRlSEsLQRFAWRsSDENGGPPLoPK ...MDSN.TVSSFQVDCFLWHlRKRFADQ-LGDAPFLDRLRRDQKSL+GRGsTLGLDIETATpsGKQIVE+ILKEESDEALKMTlASsPASRYLTDMTLEEMSRDWFMLMPKQKVt.GsLClRMDQAIM-KNIlLKANFSVIFDRLETLILLRAFTEEGAIVGEISPLPSLPGHT.EDVKNA.IGVLIGGLEWNDNTVRVSEslQRFAWRSssEsGRPPLPP................................ 0 0 0 0 +2582 PF00601 Flu_NS2 Influenza non-structural protein (NS2) Bateman A anon Bateman A Family NS2 may play a role in promoting normal replication of the genomic RNAs by preventing the replication of short-length RNA species [1]. 25.90 25.90 26.00 26.70 24.60 25.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.97 0.72 -3.78 4 18368 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 18223 2 0 3232 0 92.00 84 77.49 CHANGED LNGMITQFEpLKLYRDSLGEuVMRhGDLHSLQsRNupWREpLuQKFEEIRWLItEsRphLphTENSFEQITFhQALQLLhEVEpEIRTFSFQLI ....LNGMITQFESLKlYRDSLGEAVMRMGDLH.LQsRNuKWREQLuQKFEEIRWLIEEVRHRLKtTENSFEQITFMQALQLLLEVEQEIRTFSFQLI.... 0 0 0 0 +2583 PF00603 Flu_PA Influenza RNA-dependent RNA polymerase subunit PA Bateman A anon Bateman A Family \N 23.00 23.00 23.00 26.40 22.30 22.90 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -13.03 0.70 -6.50 4 18248 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 17904 29 0 9139 0 661.40 91 97.25 CHANGED hsEhuEDhhhpss....+lhpIChHhpVChhhSDhcalscpGps...............pa-lIEGpcRshAh.l.pplCpppslEhP.+aLsDLaDhccppFlElGlT+chsD.Ya.pKhpKl.ussh+lhlFSasG.-hspss-hsLcEEp+tRIhohLsphtp.lspcNLapplhts-stEptI-..FclGpThpcL..RDpS...lP.sFpshEth+sYh-th..sPR.tlEsplupMpsplph..c.hchsphR.ItL.....-GP.sPapuhhLhtDuhhls.ls-PppcptuI.hh-.....phhasps-..hI+.p-Ks.susahh.....Wpplhuslpshtp......o.shp+sspscaAhG.shs.cKl.....ppsshsspshKQtEsclPEhpSlssWlpsEhshhpp.o-.stWl-LsEhsssss.lEtlApthpchahs.lsts+suphhhKhllhsTuLhspspsshu+hpllPIhsRshsccu........pphspLaGhslKGpSHL+pDTDsssllohEFShpDPcl-..Ka.KYoVFclGphh........VtG+phshaLYsRssuhSKIKhcWh.chRRCLlQohpphEsll.pESuhpppshsccsh.....Nc.pha.IGpppGtl.ttoltcslRslLspphhhslYsssQLEGFsAEpR+LLhslpAh+-p+p...PasFc.EGhh-tIEEClINNPhVlh.AphaNphlh.shc .................MKEYGEDPKIETN....KFAAICTHLEVCFMYSDFHFIDERGESIIVESGD.PNALLKHRFEIIEGRDRTMAWTVVNSICNTTGVEKP.KFLPDLYDYKENRFIEIGVTRREVH.IYYLEKANKIKSEKTH.IHIFSFTGEEMATKADYTLDEE.SRARIKTRLFTIRQEMASRGLWDSFRQSER.GEE.TIEE+FEITGTMRRL..ADQS...LPPNFSSLENFRAYVDGF..EPNGCIEGKLSQMSKEVNA+IEPFLKTTPRPLRL....P-GPP..CpQRSKF.LLMDALKLS.IEDPS.HEGEGIPLYDAIKCMKTFFGWKEPNIVKPHEKGINPNYLL....AWKQVLAELQDIENEEK.IP+TKNMKKTSQLKWALGENMAPEKV......DF-DCK...DVuDLKQYDSDEPE.RSLASWIQ....sEFNKACELTD..SSWIELDEIGEDVAPIEHIASMRRNYFTA.EVSHCRATEYIMKGVYINTALLNASC...AAMDDFQLIPMISKCRTKEG........RRKTNLYGFIIKGRSHLRNDTD.VVNFVSMEFSLTDPRLEPHKWEKYCVLEIGDMLLR....TAIGQVSRPMFLYVRTNGTSKIKMKWGMEMRRCLLQSLQQIESMIEAESSVKEKDMTKEFF....ENKSETWPIGESPKGVEEGSIGKVCRTLLAKSVFNSLYASPQLEGFSAESRKLLLIVQALRDNLE...PGTFDLGGLYEAIEECLINDPWVLLNASWFNSFLTHAL.K......................................... 0 0 0 0 +2584 PF00602 Flu_PB1 Influenza RNA-dependent RNA polymerase subunit PB1 Bateman A anon Bateman A Family Two GTP binding sites exist in this protein [1]. 20.70 20.70 20.80 21.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 740 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -13.07 0.70 -6.81 5 18462 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 18111 8 0 8861 0 698.20 94 98.09 CHANGED M-l.NPtLLFlc...VssQssISTTYPYTGPPPhSHGTuTtYTL-TVpRTacYSc.KuKpppscVsGss+l.cssNssL-EDNhsEPSGsAclDsVLtLhcph--ca.PGhFc+ustEAhEclhppcas+LTcGRQTaDWTssRNQPAATALpsTI-sh+pN.LsuscGsoMl-alcclhEsLD.+pElcFp..ssKshcR....+hcDppotthlp..cKphsKtcs+Is+cEYlpRALTLNTMsKDuERGKLcRRAIATPGM.lRGFVhlVEslA+sICEpLcpSGLPVGGNEKKAKLuosVcclhsp.ssuplosTlTGDNoKWNEChsP-saLAMlshIT+DuPpWh+-lhSIAPllFSNKhA+LGcGlhhpsKTp+pcslI+A-sluchc.ctFNEcp+stIcclEshl.p-GsspLosGMhMGM....FNMLSTVLGVSsLuhspcclsspthhWDGLQSSDDFlLFssA+Na-shppsl-cFh+lCKLlGINMSpKKS.YlstTGlFEFTSMFaRcGFVuNhAMELPSFs.suGlNESuDhulGholIKNNMINNsLuPuTAphAL+IFIp-YRaTY+sH.hDoclpsRRhKhLKchhcpppuKDGLLluDGGPslaNl+sLHIPElsLKa-..LMDE-Y+sRlhNPpNPFsu+spIE..cpsslFcAHGPl.+shEp-AVuoTHSacT+RNRolLNTcpRshlt-EQpYQKsCNlFEcsFsSuohRsPlG.tShhEAhtcRLc....cu+LctEuGRlpc-Ea-c ............................................MDV.NPTLL.FLK...VPAQNAISTTFPYTGDPPYSHGTGTGYTMDTVNRTHQYSE.KGKWTTNTETGAPQL.NPIDGPLPEDN..EPSGYAQTDCVLEAMAFLEESH.PGIFENSCLETMEVVQQTRVDKLTQGRQTYDWTLNRNQPAATALANTIEVFRSNGLTANESGRLIDFLKDVMESMD.KEEhEIT..THFQRKR.....RVRDNMTKKMVT...QR.T..I...GKKKQRLNKRuYLIR.....ALTLNTMTKDAERGKLKRRAIATPGMQIRGFVYFVETLARSICEK..LE..QSGLPVGGNEKKAKLANVVRKMMTNSQDTELSFTITGDNTKWNEN..QNPRMFLAMIT.....Y.............ITR........N..QPEWFRNlLSIAPIMFSNKMARLGKGYMFESKpMKLRTQIPAEMLAsIDLKYFNEST+KKIEKIR...PLL.IDGTASLSPGMMMGM....FNMLSTVLGVSILNLGQK+YTKTTYWWDGLQSSDDFALIVNAPNHEGIQAGVDRFYRTCKLVGINMSKKKS.YIN+TGTFEFTSFFYRYGFVANFSMELPSFG.VSGINESADMSIGVTVIKNNMINNDLGPATAQMALQLFIKDYRYTYRCHRGDTQIQTRRSFELKKLW-QT.pSKAGLLVSDGGPNLY...NIRNLHIPEVCLKWE.....LMDEDYpGRLCNPLNPFVSHKEIESVNNAVVMPAHGPA.KSMEYDAVATTHSWIPKRNRSILNTSQRGILED.E.QMYQKCCNLFEKFFPS....S....SYRRPVGISSMVEAMVSRAR...ID.A.RIDFESGRIKKEEFu....................... 0 0 0 0 +2585 PF00604 Flu_PB2 Influenza RNA-dependent RNA polymerase subunit PB2 Bateman A anon Bateman A Family PB2 can bind 5' end cap structure of RNA [1]. 25.00 25.00 37.80 37.70 17.90 16.50 hmmbuild -o /dev/null HMM SEED 759 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -13.23 0.70 -6.68 6 18433 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 18053 21 0 9733 0 710.70 92 99.89 CHANGED MERIKELRsLMSQSRTRpILTpTTVDHMAIIKKYTSGRQEKNPuLRMKWMMAMKYPITADKRIhEhIPERNEQGQT.LWS+TsDAGS.DRVMVSPLAVTWWNRNGPsusThHYPK.VYKTYFEKVERLKcGTFGPVHFRNQlKIRRRVDlNPGHADLSuKEAQDVIMEVVFPNEVGAplLTSESQLpITKEKKEELQ-CKIuPLMVAYMLERELVRKTRFLPVAGGTSSVYIEVLHLTQGsCWEQhYsPGGEVRNDDlDQSLIIAARsIVRRAsVSs.DPLuSLLEMCHST..QIGGs..RMVDIL+QNPTEEQAVDICKAAMGL+ISSSFSFGGFTFKRTSGSSVKREEElLTGNLQTLKIclHEGYEEFThVGK+ATAILRKATRRLlQLIlSG+DEQSIAEAIIVAMVFSQ-DCMIKAVRGDLNFlN...RANQRLNPMHQLLRHFQK...DAKlLFQNWGIEpIDNlMGMhGILPDMTPSTEhSLRGVRlSKhGVDEYSSTERlVVSIDRFLRVRDQpGNVLLSPEEVSETQGTEKLTITYSSSMMWElNGsESlLVNTYQWII+NWET.....VKIQWSQDPThLYNKhEFEPFQSLlPKAtRGQYSGFVRTLFQQMRDVLGTFDTsQIIKLLPFAAAPPKQSRM..QFSSLTVNVRGSGMRIL.VRGNSPVFNYNKsTKRLTlLGKDAGsLscDPDEGTs.GlESAVLRGFLILGKED+RYGPALSIsELusLAKGEKANVLIGQGDVVLVMKRKRsSSILTDSQTATKRIRMAlN ....................MERIKELRsLMSQSRTREILTKTTVDHMAIIKKYTSGRQEKNPALRMKWMMAMKYPITADKRIhEMIPERNEQGQT.LWSKTNDAGS.DRVMVSPLAVTWW..NRNGPTTSTVHYPK.VYKTYFEKVERLKHGTFGP..VHFRNQVKIRRRVDlNP.GHADLSAKEAQDVIMEVVFPNEVGARIL...TSESQLTITKEKKEELQDCKIA.PLMV..AYMLERELV.RKTRFLPVAGGTSSVYIEVLHLTQGTCWEQMYTPGGEVRNDDVDQSLIIAARNIVRRAsVSA.DPLASLLEMCHST..QIGGl..RMVDILRQNPTEEQAVDICKAAMGLRISSSFS.FGGFTFKRTSGSSVK+EEEVLTGNLQTLKIRVHEGYEEFTMVGRRATAILRKATRRLIQLIVSGRDEQSIAEAIIVAMVFSQEDCMIKAVRGDLNFVN...RANQRLNPMHQLLRHFQK...DAKVLFQNWGIEsIDNVMGMIGILPDMTPSTEMSLRGIRVSKMGVDEYSSTERVVVSIDRFLRVRDQRGNVLLSPEEVSETQGTEKLTITYSSSMMWEINGPESVLVNTYQWIIRNWEs.....VKIQWSQDPTMLYNKMEFEPFQSLVPKAsRuQYSGFVRTLFQQMRDVLGTFDTVQIIKLLPFAAAPPEQSRM..QFSSLTVNVRGSGMRIL.VRGNSPVFNYNKATKRLTV.LGKDAG..ALTEDPDEGTu.GVESAVLRGFLILG.KEDKRYGPALSINELSNLAKGEKANVLIGQGDVVLVMKRKRDSSILTDSQTATKRIRMAIN........................................................................... 0 0 0 0 +2586 PF03069 FmdA_AmdA Acetamidase/Formamidase family Bateman A anon Pfam-B_2541 (release 6.4) Family This family includes amidohydrolases of formamide EC:3.5.1.49 and acetamide. Swiss:Q50228 forms a homotrimer suggesting all the members of this family also do. 19.50 19.50 20.10 19.50 18.90 19.40 hmmbuild -o /dev/null HMM SEED 369 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.13 0.70 -5.76 8 1881 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 808 40 759 1832 361 191.80 18 81.87 CHANGED sllslD+sK..hcpss.hHNRWHP-lsssApV+PGEpl+lEshDAhGGQIpss-u...usDlcslDLoplH.LoGPltVcG.....AcPGDlLhV-IhDh.slc.......sct...GasGhFs+tsGuGFLsD+..ascstKslW-acGhassScplPGVRaPGhsasGVIG.sAPSc-lLsphscRE.t.ltpss..s..hs..Ppsp.th...........tlAsEuhRTlPsR.-sGGNhDlKslo+GS+lahPVFVEGAhLShGDlHaoQGDGElshs.AIEMuGplsl+lclIKsG.lcphslcs........Pha..u.l.cPpap..calhhpGluVD-uh.+pthhssshAh+puhLNsIsahc+aGYsstQshlllSsssspu....hVs.ssusssstlP.stIFpps ............................................................................tt..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sG..D...h..Gs.hhh.l...su.h.hGs.+h..u.uc.s....sh-..........h.h..h..................................................................................................................................................................................................................................s................................................................................................................... 0 226 478 623 +2587 PF01070 FMN_dh FMN-dependent dehydrogenase Finn RD, Bateman A anon Pfam-B_829 (release 3.0) Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.68 0.70 -5.61 180 7189 2012-10-03 05:58:16 2003-04-07 12:59:11 13 28 3457 189 2186 12550 8912 259.10 23 86.79 CHANGED A.+cpLPth..sasYlsuGAssEhThpp.NpsAFpchpltPRsL..psls...ph..chs..sslh.....GpphshPlhlAPsGhptLhas.c..GEl....shA.+AAsptGl..shslSohusssl.....E-l......up.s..............ss........ssh..WFQLYh...pDcshsppllcR.Ac..sAGhcuLllTVDsPshGpRc..p-hRssh....th....P.th.......hthh.phh.ts..............................tt................................................................tssth.tthht..h...p.....ss.lsW.cc.lphLRp..t.h..shPlllKGl...hss-DAttAhchG..sD.GIlVSNHGGRQ...........................LDuususl-sLPpl.sp..sl.s...............................sc.....htlhlDGGlRpGsDllKALALGAcuVhlGRshlaGLA.ssGptGVp+slclLpsElctsMsLhGssslscl...st.sh.lpt ...............................................................................................................................................................................................................................................................................................................................ts.hp.ph.hh.phl........hs.h.p...p.h........s..h..p..sph....h.................s........p..h........t..hPhh..hssh.s.....h......t...t....h......ht...........p....hph...........thAp....s.st..t..h.....s..h....h......hh.u......o.s.....h..h.........p..h..........................................................................h.h...h...................t..................th.h.........tt...h.............t...........h........s.l...h..p.h.s...........................................................................................................................................................................................................................................hsh....pp..lt..ltp................h......t.....h.....P.hll..K...t.....l...................hs.......c...s........s.t............h.....h....p.......h....G.......s....p.....u.l......l.......S....s.....+....G.....G......p....p........................................................................h...D..h.....s.........s....s..............h......p...s........L..........t.l...hp....h...t..........................................................tp........h.l.hh.s.uG.lRpG.Dl.h+....sl.A.L.G.Ac....hshlu.Rshl....h.ul............t....h.t.............G................t............u..l.....p.h...l.p.h...h.t...t...-hchhMthhGspslt..plp......h................................................................................ 0 652 1304 1811 +2588 PF00743 FMO-like Flavin-binding monooxygenase-like Bateman A anon Pfam-B_437 (release 2.1) Family This family includes FMO proteins, cyclohexanone mono-oxygenase and a number of different mono-oxygenases. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 532 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.62 0.70 -6.31 5 2951 2012-10-10 17:06:42 2003-04-07 12:59:11 14 45 679 107 1914 7810 2768 277.00 19 77.52 CHANGED AKKVAVIGAGVSGLuSIKCCLEEGLEPTCFERS-DIGGLWRFoEssEEGRASIYKSVloNoSKEMSCFSDFPFPEDYPNFMHNSKlLEYl+hFAK+FDLLKYIQFKTTVCSVsK+PDFSoSGQWEVVTE+EGKpcSAVFDAVMVCTGHHlNPHLPLESFPGIc+FKGQYFHSR-YKcP-uFpGKRVLVIGlGNSGuDIAVELS+TAcQVFLSTRsGSWVlSRluDsGYPWDMllsTRFsoFL+NlLPoslSsWlhE+QlNcRFNHENYGLpPcc+shtKEPllNDELPuRILsGtVpVKssVKEFTETSAIFEDGTlEEsIDlVIFATGYTFuFPFLEESlVKlccNclSLYKtVFPPpLEKPTLAIIGLIQPLGSIlPTsELQARWAsRVFKGLC+LPSppcMMpEIsc+pEc+hKhFG.spocslQoDYIsYMDELAsaIGAKPNLhSLhLTDP+LAlcVFFGPCoPYQYRLsGPGKW-GARNAILTQWDRoLKPLKTRlVpcSssPsuuF.hLKlFulslLLlAlFLlht ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s................t.....h.........................h.......................................................t..............................................a.....................t.......................h......tY..h.t..h..s.....t.......t.....h.....t.....h......................h.......h....t....t.........l..............................h..................................................................................................................................................................h..............................l..................................................................t...............................................................................h................p......h........l...........h..........h..s....s......G...........................s..............h........P........................................h..............s.....................t.....a....................s....t.........h..H.................u........................a........p...........t......s........t...................h..........t........s.....p..........p..........lh..ll.G....u..S.u.....-...l....s........p...h.......................s.....t.....t.....................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......h....................t.....t.......................................................D.....lhhsT.....Ga.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................thh................................................................................................................................................................................................................................................... 0 525 1056 1588 +2589 PF00039 fn1 Fibronectin type I domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.70 20.70 20.80 21.30 20.50 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.42 0.72 -4.15 20 1772 2009-01-15 18:05:59 2003-04-07 12:59:11 13 57 52 70 393 1849 0 38.10 42 17.20 CHANGED ChD.posspYplG.-pWpR.tp..Gthhp.CpChGtGpGchpC ..ChD.s..GspY.pl.G.-pWc+.tp..G.hhhp.CTCh.Gs..G.pGcapC..... 0 20 53 138 +2590 PF00040 fn2 Fibronectin type II domain Sonnhammer ELL anon Prosite Domain \N 22.20 22.20 23.60 22.60 22.10 22.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.60 0.72 -3.95 111 1582 2009-01-15 18:05:59 2003-04-07 12:59:11 14 126 87 45 677 1460 22 41.30 49 6.08 CHANGED ChFPFhapGppYpsCTscGcs.tthW..CuTT.ssYDpDpcWua..C ...ChFPFhapGcpYpsC...Ts...cG.R......p.D.....uhhW..CuTT.tsYD...p...D...p...+...aGFC................. 0 92 123 281 +2591 PF00041 fn3 Fibronectin type III domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.73 0.72 -3.85 106 58087 2012-10-03 16:25:20 2003-04-07 12:59:11 16 3078 1986 274 27544 50190 2367 84.30 20 23.88 CHANGED ssP.pslphpsh...sssslplsWpss.....suslssYplph....ptsssp...........hpphs...lsss....p...phslpsL.pPsspYphpVt.Ahsst.....t.S ....................................................P.tsl..p..h..p..sh......ss.s.....o...l..p..l......s...W.....p..s.........P................t..................s..........u...........t........l............p...........s...........Y..h.......l...p......h......ppt.sstt...........................................htp.h.s............h.s..s.s..................pp....p..h.p.....l.....s......s.....L...p.........s...s..........s.p.Y.....p....h.p..Vt..A.h.st.tG...u............................................. 0 7000 9544 17225 +2592 PF02986 Fn_bind Fibronectin binding repeat Griffiths-Jones SR, Schwarz-Linek U anon Pfam-B_2661 (release 6.4) Repeat The ability of bacteria to bind fibronectin is thought to enable the colonisation of wound tissue and blood clots. The fibronectin binding repeat is found in bacterial fibronectin binding proteins and serum opacity factor. Bacterial fibronectin binding proteins are surface proteins that covalently link to the bacterial cell wall, mediate adherence of the bacteria to host cells [2] and trigger the fibronectin/integrin-mediated uptake of bacteria by host cells [3]. Each fibronectin binding repeat is an array of short motifs that bind to fibronectin type I domains [4]. Fibronectin binding repeats are natively unfolded in the absence of fibronectin and are thought to adopt a well-defined conformation (tandem beta-zipper) upon binding [5]. 21.20 21.20 21.40 21.40 19.50 21.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.85 0.72 -4.44 34 1694 2009-01-15 18:05:59 2003-04-07 12:59:11 9 50 190 4 23 1338 0 37.00 43 15.06 CHANGED pslDhsE..DT.....suhS..Gpssss...oh.EDT+...Pp..hh.hGGp ......N.VDh-.Do....lPplp...Gp..Ncss..pshEEDTpt..sc..h...GG........... 0 6 6 14 +2593 PF03274 Foamy_BEL Foamy virus BEL 1/2 protein Mifsud W anon Pfam-B_4337 (release 6.5) Family \N 21.50 21.50 21.70 54.70 20.20 21.40 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.79 0.70 -5.12 6 21 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 8 0 0 35 0 218.60 34 59.84 CHANGED hshsEhE.slu..ssEN....Ptc.+hhpp..pssst-spsVoYH.uYK-pEDpps.cI.KhcDWlPsP-cM.SKplCppLILssLYsupKAtEllp..................hsasVpW..EQScssPshFtl...pYpChhCpslha-PMPlha....DscsclWhKht.LRuslGSlVashc+Hhpp...Cpu.VcP.pp..pGps.......t+PRhRssPshRa.+hhtEasssR.+coK.hlsppspuHp......SsGDshAhts ...................shsEhE.slu..stpN....Ptcs+hhpp......tcspploaH.uY+EhE-pps.pl.KLpDWlPsP-cM.SKpls...........pp.lhshLhps.Kst-.lp..................hPhsssW..pQscssPshFtl....Ypshhspshha-Ph.hha....DPpschahthphLhsslGplshphaKphps...Cpu.lcP.ss..pups.........scPRs+ssPsLpa.+hhhctshsRp+cp+.hlh+hspuHc.ss...SsuDhhAhp........... 0 0 0 0 +2594 PF03408 Foamy_virus_ENV Foamy virus envelope protein Finn RD anon Pfam-B_4411 (release 6.6) Family Expression of the envelope (Env) glycoprotein is essential for viral particle egress. This feature is unique to the Spumavirinae, a subclass of the Retroviridae. 20.10 20.10 20.40 24.00 20.00 20.00 hmmbuild -o /dev/null HMM SEED 981 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.94 0.70 -13.61 0.70 -7.02 7 89 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 14 0 0 86 1 583.90 52 99.33 CHANGED MTLpQWl.W+thppspphLpshssl..pphp.slh-hpt-EhlPh+h.RhKYhhYsCCATSTRlhsWlhhlClLL.lVhlSshlTlhRlQWppsIpV.GPVlsWNlopptshpshpst+luRphRs.H.t.p.VpVNhTSIPQGVaapPHPcPIIhKERVLGLSQVlMIsS-sIApstNLspEsKsLLs-MINEEhpsLusshlsFElPLGDP+sQ-QYIH++CaQcFAHCYlV+Ytss+sWPocplItDQCPLPsh.ass.tYphQsIWDYYl....htPpsWsscsh..YGp....ARlGuaYlPp..hppshoHVlFCSDQLYucWYshppo.pppEcLhhpKLhN..Lspts.upLKcRALPssWsspGputLFR-lNsLDhCs+PEAVhLLNpoYYsaSLWEGDCshpppsIoph.spC+sa.pp...pphHPYACRFWR....pp.tp-EhKChssEp..+CLYaPpaDosEtt.DFGaLAY.ssFPSPICIcppslp-.cYcV.SLYtEChppucpYGIcsVl.tLcshLs.sGhsls-hPsuRAFssLss.paPsoY.NhTpp.pppuC....s+RpRRSl..sNac+LpohGhuLssAlpTLSpISDlNDEpLtpGlaLLRDHlVTLMEAsLHDISlhEuMhulQHlHTHLspLKshLLtpRIDWoaIpSsWIQpQLQto--.MKlIRRTARSlsYhVpQTpsosTuTuWEIGIYYEllIPK+IYLsNWplhNlGHLl+sAGaLT+VplpHPYEIlNpECppppYLHLE-ChcQDYlIC-.V..VpPCGNsTs.SDCPVhActlKsPYlplpPLKNGSYLVLoSpTDCuIPsYlPslVTVN-TlcCFGhpFK+PLhuEp+s.sapPplPpLcLRLPHLlGIIAKlKulcIEVTST.EsIKDQIcRAKAELLRLDlHEGDaPsWlpQLusATcDVWPAAAsslpuIGNFLussApGIFGTsFSlLuYsKPlLIGlslILLllLlhKIlSWLss..c+K+p ....MTLQQWLlW+..........KMs-AHsALpNsooLTEEQKpQlIlEIQ...pEEV.l..PT+MDRlKYLsYsCCATuTRVMsWlhLIClLLIIVhVSCFVTVuRIQWN+DIsVhGPVIDWNVT.pp.AsY.QpLpssRloRSLRspHPc.pYlplNMoSIPQGVhYsPHPEPIIlKERVLGlSQVLMINSENIANsANLoQEsKhLLs-MINEELpsLSspMIcFELPLGDPRDQpQYIH+KCYQEFAHCYLVKYKp.P.psW.o-slIsDQCPLPGh.HssshYcYQsIWDYYlphppIRPpsWTocoa..YGs....ARhGSFYlPp.hRpssloHVlFCSDQLYGKWYNlpNslpENEpLL+oKLhN..LTsh..SpLKsRALPppWsspGpucLFRshNsLDlCN+PEAVLLLNoTYaoaSLWEGDCsaTps.Ipph.sEC+p.s+h...chhHPYACRFWR...aKp.spEEVKChssEc.c+CLYYscYsSPEupaDFGFLuYLsAFPu.hCIEspslR-s-YEVYSLYhEChNuAcpaGIDoVLhuLKoFLNaTGsPVNEMssARAFlGLoDPKFPPsYPNlT+E..p+uC..ps.pR++RS...sNlcKL+SM............................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +2595 PF03623 Focal_AT Focal adhesion targeting region Bateman A anon [1] Domain Focal adhesion kinase (FAK) is a tyrosine kinase found in focal adhesions, intracellular signaling complexes that are formed following engagement of the extracellular matrix by integrins. The C-terminal 'focal adhesion targeting' (FAT) region is necessary and sufficient for localising FAK to focal adhesions. The crystal structure of FAT shows it forms a four-helix bundle that resembles those found in two other proteins involved in cell adhesion, alpha-catenin and vinculin [1]. The binding of FAT to the focal adhesion protein, paxillin, requires the integrity of the helical bundle, whereas binding to another focal adhesion protein, talin, does not. 21.20 21.20 21.70 21.30 20.80 19.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.62 0.71 -4.69 7 286 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 84 30 140 236 0 126.30 54 14.58 CHANGED TtsLDRosDhVYpsshslVKuVhpLpstlpphssp-YlshVKsVGlsLRsLlsoVDclhssLPups+pEIEhspKlLsKDhuELIutMRLAQQ.uhToLcp-h++pMLoAAHsLAhDAKNLLDsVDpARl+hphhh..Pt ................TAsLDRosDpVYpsVsslVKAVlphssclp.us..PEp.YVs.hVKpVGLsLRsLluoVD-hlPhL.P....uSo.++E.I...........EMA..p..KLLN.pDLuELIsKM.+LAQ.Q...ash.TSLp.pE....YKKpMLoAAHsLAlDAKNLLDslDQARl+h.h.....t.................. 0 58 67 101 +2596 PF02980 FokI_C Restriction endonuclease FokI, catalytic domain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 104.30 103.10 19.40 19.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.71 0.71 -4.53 4 21 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 21 3 3 25 0 139.00 41 24.27 CHANGED upEc-lLhpAlLuYPPAspILoLLt-t.GpahTKF-LGcpLGFhGEpGFTShPpsIhlcsLAsup.st-K...pKIKosaEGoSDKYARhIusWLcplGLVpptsKpVhh.ThtpRKap..lupsa.ITu.GlpsLpcspGpoRas ....t.p-pclLpcAlLuYPPAsplLsLLsst.upthTKFpLGcpLGFhGEtGFTShsp-lhlpsLspAp.sp-K...pKI+SshEGTSDKYARhIsuWLhplGLVpppsKclsh.shssccap..shs.psYpITs+GlpAL+pspGpS+a.s....... 0 1 1 3 +2597 PF02981 FokI_N Restriction endonuclease FokI, recognition domain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 29.00 27.50 22.90 19.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.70 0.71 -4.58 3 21 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 21 3 3 25 0 143.20 34 25.01 CHANGED lhloEsS+lR..TFGWVQDPS.DF+sLs+VVuIFDcsSKlHpELtsp+IPsLVcEpclRpELlullNQ+PLplTYK-LVGTuho.RScAcCNuIVQAsl..pGQ.sRsaIsDWuADNFVRWAHALGFL+YtppuDoFsITElGlAhuKutD ..............chR..TFGWlQssS.shppLKKVVslFsssSchappLhcshlt.pllp-tsh+pcLhscLsss..shphoYhcLsGss.......h.......p...........RocuhssuLlQAsl.....puQ..u.....+tahcDWsADuFLRWAVulsFlcaspcsDTFsITchGhphsp................ 0 1 1 3 +2598 PF01770 Folate_carrier Reduced folate carrier Bashton M, Bateman A anon Pfam-B_1123 (release 4.2) Family The reduced folate carrier (a transmembrane glycoprotein) transports reduced folate into mammalian cells via the carrier mediated mechanism (as opposed to the receptor mediated mechanism) it also transports cytotoxic folate analogues used in chemotherapy [1], such as methotrexate (MTX). Mammalian cells have an absolute requirement for exogenous folates which are needed for growth, and biosynthesis of macromolecules [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.32 0.70 -5.96 24 433 2012-10-03 03:33:39 2003-04-07 12:59:11 13 6 105 0 249 620 26 306.20 34 82.78 CHANGED psWhh.ohlLChaGFhpphRPuEPFlssYLhGs.+NlTp-QlssplaPVhTYSYLAhLlsVFLlTDaLRYKPVIlLpuluhlssahlLlaspulhshQhhEhFYGlspAuElAYaoYIYuhVs......ppYQ+lTuYoRuuhLlGhhhuulLuQlLVo....hsthsahpLNhISLushsluLhhuhhLPtsp+SlaFppp..............................ppscspphppsppps......hpt.t..shpt...psl....tthhpchtssYss.pllhWSLWWAhuouGahQllsYlQlLWcpl...sspssplYNGuV-AsuTLLGAlsuhusGal..phpashautLsLulhSslpuGhlhlhshop...sIWlsYsuYllF+uhYhFlITIAshpIAssLsh-paALVFGlNTFlALhLQTlLThlVlD.ppGL..sLslpsQFhV ....................................................................W.h.shhLCh.aGhhtth+P..u.Esalh.aL.h.t....p...NhTtpp..l..p...........plhPhhoYS..als.hL..h...lF..l...hTDhl.RYK........Plllhpu.hu..hlh.h.ahh..Llhs..ps..l...hthQh...............hphha................u..........hh....h.A....s.c.l....A.Y...u.Y.Ia.u....h..V.p........................th..Yp.+.h..s....u.a.s.R.u.s.h....Lhu.hhhuul....lu.Q.lhls.....ht....hs.hh...L..thlo.....l.s.....h....uh..h.h.u.h.h......L............sppo...h......aa...pt................................................................................................h....hh.........h.hhtc..h..ht.s.h.pp....lhhWSlWWsh.s..ssGa..lh.....................YhphLWp.h..........pt..t.haNG.ss-A.h.shh........uu.h..ss.hhsuh..l..php.h.sh.h.u.....h.hlshho....h..h..u.s..lhh.h.t.....st.......sI....ahsYss...al.lat...Y.h...h.l...sl........A......hp...lA...........sLs......-.huLlFGhNTFhAhhlpollThl.Vss.ttuh...sL.l..Qah............................................................................................................... 0 72 94 174 +2599 PF03024 Folate_rec Folate receptor family Bateman A anon Pfam-B_1966 (release 6.4) Family This family includes the folate receptor which binds to folate and reduced folic acid derivatives and mediates delivery of 5-methyltetrahydrofolate to the interior of cells. These proteins are attached to the membrane by a GPI-anchor. The proteins contain 16 conserved cysteines that form eight disulphide bridges. 29.50 29.50 30.30 29.50 29.10 28.90 hmmbuild --amino -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.94 0.71 -12.36 0.71 -4.63 34 433 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 99 11 239 412 3 158.80 26 42.97 CHANGED hCh...........su+paKshPusEspLt.p..........Cssapcp............................uCCssspopphpps.s.hl.ph.hp+CGp...hospCccahhp.pChYcCSPplu.W.............................p...ppth.slP.LCp-.CcpWacsC+ssh...TCsssW.t...athspst.ppC..tst..Chsappha.ssss-LC...................cplaupuaphustt..................uspClp ...................................................Chstp..pp.stsc..tpLh............Cs.appp.................................uC....Cstspspphttp.t........hh..............h.......up...........hp.stCpcahhphtC.hcCSPphu.h....p.....................tp..pphhhslP.LCp....-hCppaap...sC+ssh.......ps....tssh....................pps...h...p.C........t.....C..h.p.hh.....sssshC........pph.hu.s.hthss.....................sthCh................................................................ 0 66 87 135 +2600 PF02152 FolB Dihydroneopterin aldolase Bateman A anon PSI-BLAST P31055 Domain This enzyme EC:4.1.2.25 catalyses the conversion of 7,8-dihydroneopterin to 6-hydroxymethyl-7,8-dihydropterin in the biosynthetic pathway of tetrahydrofolate. 24.10 24.10 24.10 24.40 23.60 23.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.15 0.71 -3.91 129 4525 2012-10-01 20:59:24 2003-04-07 12:59:11 13 22 3833 60 1029 2777 2126 111.20 28 72.22 CHANGED lhlcsLchhuhlGlhstE+thsQphhlDlpl....thchp...tssts.Dclppol....sYsplsctl.tphsp.ppp...apLlEsLAcpluptlhpp...........................as....lpt......lp.lplpKPss..ls.tsss.......lulplpR ................lhlcsLchhshhGlhstEpphtQchllDlpl.........thD...hp.........pAupo...D-.ls.c.s.l........s.Y.upl.scpl.tphlp...s.pp...........hpL..lEplApclAchlhpc................................as.....lpt..........lclclpK.PpA..ls..thcs.VuVplpR................................................. 0 309 627 864 +2601 PF00250 Fork_head FKH; Fork head domain Finn RD anon Prosite Domain \N 20.70 20.70 20.70 21.00 20.50 20.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.24 0.72 -3.94 21 4445 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 545 35 2306 4173 4 92.50 42 20.59 CHANGED KPP.....................YSYhuLIsMAIpp.uPsKhLTLupIYpaIh-pFPYYRpNppuWQNSIRHNLSLNcCFlKVPR......ps-cPGKGuaWpLcPsutshF.s................Gsah+Rc+R ........................................KPP.......aSYss.LIsh.A...Ipp....o.sp+pLTLs.pIYpa..Ihcp..FP..Y..a.....R..........p......s.....p......t.....u......W............p..............NS.IRHNL..SLN.c.C.FlK..Vs..+................t.....s....p.....s...G..........K................G..s..a.Wsl..c...Pss.t....p.h...h.t.......t............................................................... 1 603 883 1588 +2602 PF01226 Form_Nir_trans Formate/nitrite transporter Finn RD, Bateman A anon Prosite Family \N 28.20 28.20 28.60 28.50 28.00 28.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.70 0.70 -5.48 190 4680 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 2481 80 683 2429 127 247.40 28 87.19 CHANGED stphhpthhps.uhpK...hptshhphhltuhhAGhhIuhushhthhlsssh.................s...........uhspllsuhs.FslGLlllllsGu-LFTuNshhhshuhhpc+..loh.tpll+sWsllalGNhlGulhhuhlhshs...shht...........ts........thstthhplAp......tK..h......sh.s.............hhpsFh+GIhCNaLVsLuVWhsh.uu+sh..huKlhslhhslhsFlssGFEHsVANMhhlshu.......lhh.us..........................s.hhshhhpNllPVslGNllGGslhluhhhahha ..............................hthh.pt.u.tc...hp...ps.h.ph..hlhuhhAGhalul..uhlhhhshssth.................ss................uhsp.Lls.uhs.FslGLllllls.Gu-LFTusshhh....sluhhp..pp.......loh..sp..hh.......p.ahh.........shlGNllGulhhuhlhths..shht...............s.......................................thspt.hh.pl.up.tK.h................pp..s.....................................h.hphhhpGIhsNhhVslAlWhsh..t..s.c.ss..hsKhhshhhslhhFlhsGaEHs........lANhhhhshu...lhh..sp.................................s.hhsh..lhpNllsshlGNllGGulhhGLsYahh................................ 0 205 396 569 +2603 PF02971 FTCD formiminotr; Formiminotransferase domain Griffiths-Jones SR anon Structural domain Domain \N 24.00 24.00 25.40 26.60 22.80 23.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.74 0.71 -4.56 4 394 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 349 6 124 337 180 124.30 43 33.96 CHANGED FLIAFNINLLuT+..E.AHRIALslREQGRGtDQPGpLKKVQuIGWYL-E+NlAQVSTNLLDFEsTsLHsVaEEsChEApcLuLPVVGSQLVGLVPLKALLDAAuFYhcKEpLFlLp-E++I+LVVsRLGLDSLsPFpP+ERIIEYL ...............LlAaNlNL...s.T.s..l-IAccIA.KtlRtpu.......................................................................GGh+alKAlGl.Lc.......-+.....slsQVShNlsDap+TslaRsFEpl+hEAc.RaGVsVlGSE....llGLlPhcALlDsAcYYL.p....h.E.s.h.......................................................................... 0 62 84 104 +2604 PF02911 Formyl_trans_C formyl_trans_C; Formyl transferase, C-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -3.92 130 5755 2009-01-15 18:05:59 2003-04-07 12:59:11 13 28 4563 24 1384 4263 2359 99.70 28 26.51 CHANGED +lspp-stI-W.spsAppltphlRuh...s.PaP.uAas...hh....ss.......p....pl..........+lhcs.......phh.....spttttt.......................PG...p.....llphspp.slh.lssu.sus......ltl.ppl..Qhtuc+hhsspshhp....Ghp ....................pls+--ucI.D.W...spsApplppplRuh........s..PaP..sAas....hh..............ss..........p...........pl...................................Klapu.....................phh...........ssss.stt.....................................................PG....p.......llp....s...s....cp....slh..lAsu..sGs...............Lpl..tpl..Q...sGK+.t.h.sstsahpGh.t......................................... 0 435 862 1141 +2605 PF00551 Formyl_trans_N formyl_transf; Formyl transferase Bateman A anon SCOP Domain This family includes the following members. Glycinamide ribonucleotide transformylase catalyses the third step in de novo purine biosynthesis, the transfer of a formyl group to 5'-phosphoribosylglycinamide. Formyltetrahydrofolate deformylase produces formate from formyl- tetrahydrofolate. Methionyl-tRNA formyltransferase transfers a formyl group onto the amino terminus of the acyl moiety of the methionyl aminoacyl-tRNA. Inclusion of the following members is supported by PSI-blast. HOXX_BRAJA (P31907) contains a related domain of unknown function. PRTH_PORGI (P46071) contains a related domain of unknown function. Y09P_MYCTU (Q50721) contains a related domain of unknown function. 20.80 20.80 20.90 20.80 20.60 20.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.94 0.71 -4.74 15 13471 2009-01-15 18:05:59 2003-04-07 12:59:11 14 85 4844 104 3486 9789 6453 174.10 26 55.92 CHANGED h+lsllhSGsu.oshpsl.lsth+psstp...spllhVlos+spstGhp+utpsslsptlhpcps...................apsc.tthcpcltctlcthpsDllllAuah+lLssshl.pthss+lLNlHPSLLPpa.....cG.ssslppAlhsGscc.....sGsTlaals.-plDsGsIlhQpps.lhs.s-ossslppRlp-tEt.pshscsl .........................................................................................................+lhhh...Gp...s....p.....h..t..sl....l.p...t.hh.ts.th.............ls..s..Vh..o..p....s.c...s...h..s....h...t.t......s..h.p.....h..s......s..t.h.h....h.....t.ps..............................................h.psp...t..h.c.p...p....h...h....p...t...l...........p.....p...........h.......p...s.....D.....l.l.VlA.u.Y.h.p..l..Ls.sp.ll....p.........t..h..............t.....t.............t......h..l.NlHs...S..LLP........pa...................+.G...usP.h......pp...A...l........s....G.s.c.............................sGsTlH.....h.l...........s...p...........t..lDsG.s.Ilt.Q.t........t....l....s...l........pt...sDost...s.Lh.p.+l.t.p.h.tt.pl.lsps.............................................................. 0 1084 2180 2908 +2607 PF01491 Frataxin_Cyay Frataxin-like domain Gibson Tj, Bateman A anon Gibson TJ Domain This family contains proteins that have a domain related to the globular C-terminus of Frataxin the protein that is mutated in Friedreich's ataxia. This domain is found in a family of bacterial proteins.\ The function of this domain is currently unknown. It has been suggested that this family is involved in iron transport. 21.30 21.30 21.80 21.80 21.10 21.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.65 0.72 -4.26 96 1466 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1409 24 414 878 102 106.10 41 86.45 CHANGED hs-scapplu-phlpplE-tl-th..-ss.....................hDlDs-hs.uGVLTlpF.s.stuphlIN+QsPh+QlWLAo..SGGaHFcap................s.sp..W.lss+.sGp.phhshLscthsppsups.lpas ...................................Ms-oEFHcLADphh.sIE-plDph..-.uc.....................sD.l.DsEh.s....GG.VLTlsF.....-suS......pIlIN+QpPh+.QlWLA.o+.pGG...aHFchp.....................s..sc..Wl..........ssR...sGp.shaclLpptsoppuGcslph.p.......... 0 105 209 318 +2608 PF03197 FRD2 Bacteriophage FRD2 protein Mifsud W anon Pfam-B_2816 (release 6.5) Family \N 21.60 21.60 21.80 50.80 20.90 18.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.32 0.72 -3.47 6 36 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 32 0 0 28 0 99.60 58 79.79 CHANGED sMVclIc-NGGWFEVK-hpss.DGh.csVs+IpCANGcha.ssG.h-pYFEIpEDEF+CFREY...TsEcDElcccV..oGVTKIHCIVDENNVDEIIELLRKTFKK .sMVclIpENGGWFEVK-htsh.DGa.chVp+IpCANGcha.s..G.h-cYFElsE-EFaCFREYKE.TSEcD-lcDcV..SGVTKIHCIVDENNVDEIIELLRKTFKt.. 0 0 0 0 +2609 PF04422 FrhB_FdhB_N Coenzyme F420 hydrogenase/dehydrogenase, beta subunit N-term Kerrison ND anon DOMO:DM04087; Family Coenzyme F420 hydrogenase (EC:1.12.99.1) reduces the low-potential two-electron acceptor coenzyme F420. This family contains the N termini of F420 hydrogenase and dehydrogenase beta subunits [1], [2]. The N terminus of Methanobacterium formicicum formate dehydrogenase beta chain (EC:1.2.1.2, Swiss:P06130) is also a member of this family [3]. This region is often found in association with the 4Fe-4S binding domain, fer4 (Pfam:PF00037). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.47 0.72 -4.12 116 542 2009-01-15 18:05:59 2003-04-07 12:59:11 8 32 327 0 298 541 181 79.70 26 20.16 CHANGED tchhtApusst..hppuQsGGhVTulhthhL-pGhlDuVlsst....psp...................c.acshPhlssss--lhpsu..Go+as.hsshhphlcc ...........t.thhhApsp.p...hp.tupsGGlVTslhthhL-p.G.hlDuVlssp....psp................................c.a.pshshlspss--llp........ss..Go+as.hsshhphlt................................................ 1 80 189 254 +2610 PF03881 Fructosamin_kin Fructosamine kinase Bateman A anon COG3001 Family This family includes eukaryotic fructosamine-3-kinase enzymes [1]. The family also includes bacterial members that have not been characterised but probably have a similar or identical function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.81 0.70 -5.43 35 2015 2012-10-02 22:05:25 2003-04-07 12:59:11 9 10 1716 3 561 1603 432 272.80 31 95.06 CHANGED MWpuIup.lo-phGpsapIpc+pplsGG-Ispsahls........cuppcaFVKlNp..cshLshFcuEA-uLphLscopolpVPcslshGss.cspu............aLlLEaLshp.hsstcsthphGppLApLHp.......hssptpFGa-hDNhlGsTsQPNsWpccWusFFu-QRIGaQLpLsc-+Gh.p...husl-pllcpltphL................ssHpPpPSLLHGDLWsGNsuhsss.G..........PllaDPAsYaGDREsDLAMoELFGGFPssFYcGYpslaPLs...sGYpcRKslYpLYHlLNHhNLF...GGpYlspApphlcpl.Lpp ......................................................................................................................................................................................................h..ht.ls..G.Gp..lp.t.s...a.t.....lp...............h.stp....p.hFl..K..spp...............tph.....h.s.....h..F.p....u..Es..s.tL....p......h......l....t.....c......s........t.......s.........l..p..........l.Pcl..h......s..s....G.ts...p...s...t.u.....................................a...Ll.h...-........a.....L.........s.......................t.....................s...........s.........ps.........s..........hpL........G.pp.........lA.c...LHp.................................hps.pspF.....G..........a.....c.....hs.....s.....t.....h.G.s.........h.....s........Q....s.....N......s........W........p.....c......c.....W...s................s...FF....u...c...p.R.....l....s.h.........lc..h..s.t...c..+...Gh.h..............hsp.h.c...p....l..h....c....p...l..t.p..t..L...............................t..s...+..p.s.....p...P.SL...L.......HGDLWuGNshhsss..G...............................................Ph...laD....P.A.....s.a..a.GcR............EhD..L.....A......h....h..t..l.....F...........s...s....h.....s...s..p........h...a.c.uY...pp....h.....h.....P....Ls.....ts.a...pc.Rhsl.YpL.Yh...LL.s.+.ht.hF...G..u.t..ahs.tstp.hcplh..t.......................................................................................................................... 1 143 323 454 +2611 PF04961 FTCD_C Formiminotransferase-cyclodeaminase Bateman A anon COG3404 Family Members of this family are thought to be Formiminotransferase- cyclodeaminase enzymes EC:4.3.1.4. This domain is found in the C-terminus of the bifunctional animal members of the family. 21.80 21.80 23.30 23.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.73 0.71 -4.87 75 717 2009-09-11 00:17:51 2003-04-07 12:59:11 7 8 621 6 232 659 206 179.80 33 69.85 CHANGED holp-FlctlAScsPsPGGGSsuAlsuAlGuALssMVupLTl.GKKpYtph-s....phpplhppscphppcLhshlDcDscAFsplhsAa+LPKpTcEE+ttRptslQcAl+pAspVPlplAcpshchlplhptluphGNtsslSDssVushhscAAlpuAhlNVpINLs....sl...cDppahpchppchpplh .............olpcFlctluScsPsPGG...GosuAlsuAlGsALssMVupLTl...GK..+p.Ytp.hcp...................phpplhpp.s....p.plpp....cL....lphlDcDscAFstlhsAaclPK...pT-....EEK.stRppslQ....p.uhcp.AspVPhplsctshcshpl.hpphsppGNpsulSDsuVushhhcuulpuAhlNVhINLs....ul...+D.cpaspphppchpplh...................... 0 116 171 202 +2612 PF01268 FTHFS Formate--tetrahydrofolate ligase Finn RD, Bateman A anon Prosite Family \N 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.70 0.70 -6.53 141 4825 2012-10-05 12:31:08 2003-04-07 12:59:11 14 10 2767 19 740 4276 2739 447.70 51 94.46 CHANGED hhSDIEIAppsp..hcPIpclAp.cl.Gltp--..l-.YG+aKAKlshs.hlc..pl...ps+..............sGKLlLVTAIoPTPuGEGKTTTTlGLupAL.s+l......GKcshlsLREPSLGPsFGlKGGAAGGGYuQVlPME-lNLHFTGDhHAITuApNLLAAhIDN+lapsN.p......................................................LsIDscpIsW+RVlDhNDRuLRpIslGL...Gupt..sGhsREsGFDITVASElMAILCLusD......LpDL+cRlG+IlVuas....hcspPVTAcDLtssGAhssLLKDAI+PNLVQTLEsTPAhlHGGPFANIAHGsNSllAT+hALKLuD...............YVVTEAGFGADLGuEKFhsIKCRhuG.LpPcAlVlVATlRALKhHG....Gl.shss...............LspENl-ALccGh.sNLp+HIcNlpp.aGlPVVVAIN+FsoDT-uElphlcchs..p.ph.Gsp.sslspsWuc..GGcGuh-LActVlcs......pppsu.p.........FchLYssch.slc-KIpsIApclYGAssVphospAccplpphcchG.ascLPlChAKTQYSlScDPslhGsPpGFslslR-l+lusGAGFlVslsGclMTMPGLP+pPAA.sIDls.-sGpIsGLF ...............................................................................................................................................................................................................................................................................thoGDhH...AIsuANNLLAAh.lDNHIaQGN..p.......................................................LsIDs+c...IhW+R............s.lDhNDRtLRplssGL......Guph..............sG..h.sRE................DGFDITVASElMAILCLuss......lpDLKcRLu+IlluYs......h......c.s......p....PVT..spDLpspGAhssLLKDAlKPNLVQTLE..sTPAhlHGGPFANIAHGCNSllAT+hAL+.Lu.D...............YsVTEAG.F.GADLGAEK.FhDI.KCRhuG.lp.PsAVV.......lVATlRAL...KhpG....Gl....s+sp.....................L.sp.ENl-Al.cpGh.sNLt+HlcNl.pp.aGlPsVVAIN.pFs.o....DT-AEl..phlc......chspph....G....sp....sslspsWucG..GcGuhcLAc.pVlchh..........p.p....s....s...................FphlY...c.....c..h....slc-KIctIspc.lYG..A..ssVphss...p......A.p.cpltp....hp....p........G..asphPlshuKT.h.................................................................................................................................. 0 264 454 610 +2613 PF03239 FTR1 Iron permease FTR1 family Bateman A anon Pfam-B_3227 (release 6.5) Family \N 22.50 22.50 22.70 22.50 22.20 22.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.92 0.70 -5.47 13 2689 2012-10-03 03:33:39 2003-04-07 12:59:11 9 11 1826 0 661 2163 330 227.80 21 69.51 CHANGED hhuhhllshREuLEAulllollluhlKp........................................................................stccppt.................t.lahGhhhulhhululGsuhhthhtultt.......hpc-lhEuhhShlAslllohMhhaMt+...up+l+sclctplspslt..................t.paulhlhsF......lsVlREGlEsVLFluuhsttss........ssshlusssGhhsuslluhhlapsuh+lsLptFhhhsoslLhhluAGLhuhu....hhthhchspt.pGhchhthspussshshsh.ushthshhssthsu.....llhslhGahsp..........hhhlhlllhluhhhht..hshthh ............................................................................................................................................................................................................................................................................................................................................................................................................t..h...h....ht..hh..h...h.t......................ht...t..hpt.........th....h..................................................................................................................................................t....t..h......hu....l..h...h...h...s.F.........................lsl...hREGlEs.llF.lhulhttts.............thhhh.hGs.hh.G...lh...s...u....h....l....l...u...h.l..l.h....h....s....s.....h..c....l....s....h.t....t.....h....h..h.hsuh..hl....hhlussLhutu..........hhthhp.t..s....h.......h.....p..h.............t.s........h..h.....p..t..s.h......hh.......................................................................................hh........................................................................ 1 181 389 559 +2614 PF02491 SHS2_FTSA FtsA; SHS2 domain inserted in FTSA Anantharaman V anon Manual Domain FtsA is essential for bacterial cell division, and co-localises to the septal ring with FtsZ. The SHS2 domain is inserted in to the RNAseH fold of FtsA [2], and is involved in protein-protein interaction [1]. 25.40 25.40 25.60 25.60 24.90 25.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.50 0.72 -4.09 50 3487 2012-10-02 11:08:51 2003-04-07 12:59:11 15 8 3448 5 697 1941 882 80.40 34 18.91 CHANGED uGsplpshsspGhls..hpscEVsppDlcRVlcsApshshss...-pcILHslPpcasl.DspcGI.+-PlGMsGsRLcscsHllTs ........................Gtplps.sspGhls........hpscElo.ppDlcpV.lcs.A.p.s.h.s.lss.......-ccll+.llPp-ahl.Ds.p..c.G.I..+-P..lGM.G.l.RLcscsallTs............ 0 222 434 570 +2615 PF01580 FtsK_SpoIIIE FtsK/SpoIIIE family Bashton M, Bateman A anon Pfam-B_458 (release 4.1) Family FtsK has extensive sequence similarity to wide variety of proteins from prokaryotes and plasmids [1], termed the FtsK/SpoIIIE family. This domain contains a putative ATP binding P-loop motif. It is found in the FtsK cell division protein from E. coli Swiss:P46889 and the stage III sporulation protein E SpoIIIE Swiss:P21458 which has roles in regulation of prespore specific gene expression in B. subtilis. A mutation in FtsK causes a temperature sensitive block in cell division and it is involved in peptidoglycan synthesis or modification [1]. The SpoIIIE protein is implicated in intercellular chromosomal DNA transfer [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.18 0.71 -4.85 23 9858 2012-10-05 12:31:08 2003-04-07 12:59:11 13 40 4316 14 1937 10212 3173 196.90 29 27.52 CHANGED pltphhsspthctstsphslshGtph.tspslhh-lschP........HhLlsGuoGSGKSsslpsllhSlhhptpPpcVclhhlDsKhs.cLsshpslsHlhs....slsscsccshpsLctllsEMccRh..plhpphslcslst.....hppphsp..ht...................................................hs.lllllDEhs-Lhhssspc........lpshlsRluph......uR.....usGlHLllAs....................QRsus .............................................................................h...............t...t.....l....lsl..Gpsh...t....spslhh.D..L.s..c.hP............................HlLl..AGs.T....G.SGKSs.h..l.p.s.llhS..l..l..h..p...t...p...P..........c...c.........l....+...h..h.h.....l.D......P..K............h........s....-........L.......s..s.......a.....p........s.......l........P.Hl...hs...................sl.s...s..-...s...c..c....u....s....p..s.Lp...h...h....s...s.E.M...c...c...Rh........clh....s.p..h......s..s.+.s.lst.............asp..t...h..t..p.............................................................................................................................................................................................................................hPhlllllD.E...h.u-.L...h........h...s...s...s...pc.........................lpp...h...l..t+....l.uph...........uR.......us...GlHl.llAT......QRPs........................................................................................................................................................................................................................................................................................... 0 624 1301 1668 +2616 PF04999 FtsL Cell division protein FtsL Bateman A anon COG3116 Family In Escherichia coli, nine gene products are known to be essential for assembly of the division septum. One of these, FtsL, is a bitopic membrane protein whose precise function is not understood. It has been proposed that FtsL interacts with the DivIC protein Pfam:PF04977 [3], however this interaction may be indirect [4]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.04 0.72 -4.27 66 1462 2012-10-02 13:28:50 2003-04-07 12:59:11 8 3 1456 0 307 877 315 93.00 37 83.99 CHANGED Ls...thlhtsl.th.plhll....LhlhllhoAluVlassHpsRphhsp....hpphhpc+-pL-hEWtpLlLEpsshup+uRlEplApccLsM...phPssscplllp. .......................................h.........h...s+lslh....Lhlsll..lo..A.ls..VV..h...s..s..H....p..oR..hLh..s.p.......h-ph....h.hE+....-tL....-h.E.WcpL..lLEcsuLu.c..HS.R.VEplApcc.LpM...ppscsuppsllh.t..................... 0 85 172 247 +2617 PF03799 FtsQ Cell division protein FtsQ Finn RD anon Pfam-B_1605 (release 7.0) Family FtsQ is one of several cell division proteins. FtsQ interacts with other Fts proteins, reviewed in [1]. The precise function of FtsQ is unknown. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.26 0.71 -10.49 0.71 -3.91 180 3219 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 3193 6 616 2021 863 117.40 20 39.22 CHANGED lAhWpp..s.t..tllsppGplhts..................phs..t.s.sLPhlhG........ssspp.h.......hphhpph.th.ss.lthplsslphsspps....WplhL...s.....s.Ghp.lhLsps.......s..htp...............+lpphsplhtph............t.......pp.........lth..lDlR .........................................................lAhhps..p....thl.spsGpshss...................shs...phs..shP..h..l..h.G....................................pss.p.p.l...............hphh.pp..h.s....ph..sp....l....p.....hp..lpplshsspcs.............hpLhh.........s.............s...Gh...p..lhlsps................s....htc.................+.lthas.pl..h.pl..............t...tp.........lth.lDh.................................... 1 173 372 497 +2618 PF01098 FTSW_RODA_SPOVE Cell cycle protein Finn RD, Bateman A anon Prosite Family This entry includes the following members; FtsW, RodA, SpoVE 20.10 20.10 20.40 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.25 0.70 -5.67 15 10114 2012-10-02 17:14:55 2003-04-07 12:59:11 14 11 4374 0 2079 6986 6687 345.20 28 90.11 CHANGED hpLlhhlhhLhs.hGllhlaSASshpuhshhsss.hhhhh+QllahllGhllhhlhhplshp..hhp+hthhhallsllLLhllhl....lGssssGA+RWlslG..slslQPSEhsKlulhlalAphlupt............t..hpsphcshhhsll.hhhlhsslllhQPDLGTulllshhhhsllahuGhsh.phhlsllh......lulsshlhh....lhhcsY....phpRltuaLsPapDPh.....GsGYQlhQSlhAluoGGlhGpGlGpupQ.KhtYLPEupTDFIFAlluEEhGhlGsllllhLahllhh+uhpIAh+...spcpFtpllAsGlslhlhlQshlNIGhssGllPlTGlsLPalSYGGSSllshhsshGlLhsluppppt ............................................................................................................h...hhhhhllh..lG.llh..l...h..S..u...o.....h................s..............t..........h....h.........t..........t................h......h......h...h....+...Q.hha.h...h....l..u..h.l...h.....h...h..l...h...h...h.l....s..hc................hh..p..p..h......s..h....h..h...h...h...........h.....s...l...l..l...L...l........h.......l...h.h..................hG...t.t.....s..p.....G.........A......p.......p............Wls..l.G......sh..s..h.QPSE.h.hKl.sll.l.h....l..A....t.h...lsc.....................................p......h..h..s....p....h.......p....s......h......h....h....h.....hh.........l........h.h....l....s..h.h..L..l.....h..h..Q...P.....D.....L..G..o...s..l.l....l....h...s...h.....hh........s....h....l......a.........l.....u.........G....h.....s.......h.....p........h....h..h.....s.....hhs..................hh.s..h.s..s..h..lhh..................................................hhhp...sY.......................p..hpRlh..s..a......l...s.....P....h....p...c..s............................usG.Y....Q.l.hp.S..hhA..lG.s....G.G..lhG..p...G....h.....s.....p.......ut..........p.....p.....h.....t...a....L..P...E........s..cTDFIFulluEEhG.h.l.Gshhll.hLahhllhR.s.h...h...l...uhc............sps.......F..sp..h.lusGl.......shhh...hhpshlNlGhs..hGl..l.P.l.sGls.LPhlSYGGSSllshhhulGlllslstpt..t............................................................................ 1 736 1429 1800 +2619 PF03867 FTZ Fushi tarazu (FTZ), N-terminal region Finn RD anon DOMO:DM07486; Family This region contains the important motif (LXXLL) necessary for the interaction of FTZ with the nuclear receptor FTZ-F1. FTZ is thought to represents a category of LXXLL motif-dependent co-activators for nuclear receptors. 25.00 25.00 28.60 27.10 23.40 23.40 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.01 0.70 -5.15 3 27 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 17 1 8 31 0 259.90 70 61.39 CHANGED MATTNSQSHY.YAD..NMYNMYH...sHSLPPTYYDNSuSsSuYQsTuptWQ..PASYQSNYtAaYu.QESYSESCYYANspaQ.............PTssTVPQ.PsVPThPEPlltsS...PVKupKRKAEDsAAuIIAAVEERPSTLRALLTNPVKKLKYTPDYFYTTlEpVKKuP...PupoKuouSPAPSYEQEYVAVPTP...........SASEDVDYLDVYSPQSQsp+.KNGDFl..TPPPhATTPsossuI.EGISTPPQSPGEKSuoA..VSpEINHRIVTAANS..AuDFNWSHIEETLA ....................................................MAsTNSpSH.Y.Y..AD..NMYNMYH.....sHSL.PP....T..YYDNSuS..suhYQs....S.........uoYQ.SYtu.hYs.QESYSESCYYhNsQcQ............................p.stT.VP..Ps.Pss.......P.P...............KupKRKAE.....EsAAuIIAAVEERPSTLRALLTNPVKKLKYTPDYFYTTlEpVKKuP...sspoKsAuSPAPSYEQ...EYVsVPTP...........SASEDVDYLDVYSPQSQ......sp+....KNGDFs..TPPP..TTPs........ohssl...EGISTPPQSPGEKSuoA...VSpEINHRIVTAsNu...AuDFNWSHIEETLA........ 0 2 2 5 +2620 PF02952 Fucose_iso_C fucose_iso_C; L-fucose isomerase, C-terminal domain Griffiths-Jones SR anon Pfam-B_9303 (Release 8.0) Domain \N 28.90 28.90 28.90 29.00 28.80 28.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.78 0.71 -4.68 45 1454 2012-10-02 11:40:13 2003-04-07 12:59:11 12 4 1208 15 185 864 123 155.10 38 29.63 CHANGED ppshlhLhpsGsss......t.................pphptshptsphhstsppahpGu....shuophtphsssslThhRLs..cs.G.l.hhlucGpsl-hspc..spsplshpss.phspsaht..hs.p.........plhsshhuNHsuhsaGchupsLhphs.phLtIshs.h+ .............................................t.pGhlcLhNSGussLsh......tttpttt.t........hW-lppp-spusLtsTc.assA.h+..cY.F.RGG...........GaS.o.pF..hT.pu...ssPhThsRlNhl+GlG...PV.LQIAEGho.l-..LPc.c..sact..L.s.p.R..Ts.s.sW.P..oTaFs.s.R.lsucssF..psVYsVMsNWGANHuslshGHlGADllTLA.uMLRIPVs.MH.................. 0 82 141 159 +2622 PF02300 Fumarate_red_C Fumarate reductase subunit C Mian N, Bateman A anon Pfam-B_11568 (release 5.2) Domain Fumarate reductase is a membrane-bound flavoenzyme consisting of four subunits, A-B. A and B comprise the membrane-extrinsic catalytic domain and C and D link the catalytic centres to the electron-transport chain. This family consists of the 15kD hydrophobic subunit C. 22.50 22.50 22.70 22.60 22.10 22.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.80 0.71 -4.27 23 798 2012-10-03 07:11:12 2003-04-07 12:59:11 12 2 794 18 71 276 17 127.10 62 97.67 CHANGED soKRKsYVRpMpssWWpKhsFY+hYMlREuTulsslWFslVLlaGlhuLsp....GssuassFlsFLQNPlVllLNlIuLhusLhHshTaFphsPKsh.sIhlKsc+lsspsIlpuLWAlTslVSllsLslshl ................hoKRKPYVRsMTuTWWKK.LPFYRFYMLREGTAVPAVWFSI.LIaGLFALKs...........GP...E...u...WtGFV.sFLQNPVlVIlNLITLAAALLHTKTWFELAPKAA.N.I.I.VKsEK...MGPEPIIKu.LWAVTsVsTlV.ILaVAL.h...................... 0 8 27 50 +2623 PF02313 Fumarate_red_D Fumarate reductase subunit D Mian N, Bateman A anon Pfam-B_12414 (release 5.2) Domain Fumarate reductase is a membrane-bound flavoenzyme consisting of four subunits, A-B. A and B comprise the membrane-extrinsic catalytic domain and C and D link the catalytic centres to the electron-transport chain. This family consists of the 13kD hydrophobic subunit D. 25.00 25.00 28.90 28.90 23.80 23.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.46 0.71 -4.21 21 816 2012-10-03 07:11:12 2003-04-07 12:59:11 12 3 799 18 72 251 10 113.80 66 96.91 CHANGED hspsPKRSsEPlaWuLFGAGGMlsAhlsPVlILllGlLlPLGlls.tshsacplhuFApoalG+lhlLlllhLPhWpuhHRlHHGh.HDLKlHh.ssuphla.YGhAslholls..hhhVhs .....I.NsNPKRSDEPVFWGLFGAGGMWuAI...IA....PVhlLLV...GILLPLGL.h.P.uDAL.SYERVLAFAQSFIGR...lFLhLMIVLPLWCGLHRhHHAM.HD.LKIHV.P.A.GKWVF.YGLAAILTVVTlIGll.T................. 0 8 28 51 +2624 PF03630 Fumble Fumble Finn RD anon Pfam-B_3299 (release 7.0) Family Fumble is required for cell division in Drosophila. Mutants lacking fumble exhibit abnormalities in bipolar spindle organisation, chromosome segregation, and contractile ring formation. Analyses have demonstrated that encodes three protein isoforms, all of which contain a domain with high similarity to the pantothenate kinases of A. nidulans and mouse[1]. A role of fumble in membrane synthesis has been proposed[1]. 21.20 21.20 21.20 21.60 20.90 21.10 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.90 0.70 -5.59 11 1080 2012-10-02 23:34:14 2003-04-07 12:59:11 9 12 701 15 460 840 178 273.20 33 69.95 CHANGED s+lulDIGGoLsKLVYassps.sspp.pp..t.h.p...............................tGRLpFlpFpTpcIsphlpah+p.............pshsp.phslpATGGGAaKFt-hh+cplsl..plp+.DEh-sLlpGlsFllp...sl.pEsFsap.sp-sE..h.....phs.tsshYPYLLVNIGSGVSIltVpGpspacRVuGoSLGGGTFhGLssLLTsspoF-EhL-hAp+GDsoslDhLVtDIYGsc..Yp+hGLsusslASSFG+sh.....pcsKp....................ppaspEDlu+SLLhsIoNNIGQIAhhpAhppslcRlaFsGsFlRspshoM+TLoYAlcFWSpGphpAhFL+HEGYhGAlGAFLpht .........................................................................................................................................................................................................................................................................................................ts.p..hp..Fhp.....o.p......ph.p.p.....hl.phhpp..............................................................hlphTGGsA...hh.h.....tp....................h...s..h......th.h.sEh-.shh..................pGl.hl.p................E..............................................................................................p..hps.....hh.salllNlG.oG..........sS..llhl..p..u..p..s..p...h...cRVuGo..ulGGGThhGLspL........L..o.t.h.p.....s.a--hlp...hA.p........cG......DpsplDhh..VtD.IYuss................c....s..Lsu.shsA..S..sFG+sh..............pp..t...p.t.......................................................................thoppDlstullhhl................s..psIuplAh.hhAtpp......pl.ccllahG...shh+sp....h.hhchl..s...sahsh...............tthpshFlc..ctua.GAlGAhh...t............................................................................................ 0 148 239 359 +2625 PF04930 FUN14 FUN14 family Wood V, Bateman A anon Pfam-B_8237 (release 7.5) Family This family of short proteins are found in eukaryotes and some archaea. Although the function of these proteins is not known they may contain transmembrane helices. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.22 0.72 -3.72 34 431 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 305 0 272 412 7 95.60 28 60.52 CHANGED QlulGulsGassGashpKluKlshhslGssllhLphhpppGhIplsWpplpptlppstppst...................shhpphhshl..ts........................shsuuFsuGh.hlGht ........QlshGulsGhssGahhpKluKlsAhslGsuhllL.QhhsppGal....plsWp+.lpc.p.h.p.p..s..pppht...........................................................h.phhp..ph.hphlt.tsh..........................hsuuFhuGhhlGh............................................. 0 82 127 205 +2626 PF01475 FUR Ferric uptake regulator family Bateman A anon Prodom_2003 (release 99.1) Family This family includes metal ion uptake regulator proteins, that bind to the operator DNA and controls transcription of metal ion-responsive genes. This family is also known as the FUR family. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.39 0.71 -4.18 24 9302 2012-10-04 14:01:12 2003-04-07 12:59:11 14 10 4091 22 2167 5940 4980 119.00 26 80.09 CHANGED csGlKlT.pRhpILpllppscp...HhoAE-lYctlh..ccsspluLuTVYRsLphhcctGllpchphs.sucshaElsp...tcHHcHllChcCGclhEFpsspIEphppclsccpGa+lhsHplplY .......................................t.tGl+h.T.pRhtl.L.c.l.l.hpp....pp.......H..hoA--laptlp...........p..p.........t...s..p..l...u....l....u............TVYRsLphh.t-tG.l.l.c.c...l..p..........hs.....s.......u.......p......s...h..a....-..........h....sp...............tc.p.......H......p.......H...........l.....l......Cpp....C.G......cl..h...-....h....p....s......s............l.....p....p.....h.t......p...p....l...t...t....p...h...uaplppppl.h........................................................... 0 737 1498 1892 +2627 PF00757 Furin-like Furin-like cysteine rich region Bateman A anon Bork P Domain \N 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.75 0.71 -12.75 0.71 -4.55 40 888 2012-10-02 14:20:19 2003-04-07 12:59:11 15 44 120 49 334 892 2 154.30 37 12.91 CHANGED scpCs.........ssp.sp.......t.s..CWuss........cCQ..................plCsppCst..tCp.....ssscCCH.ppClGGC......ouPp.sosChAC+capp....sGpClspCPs................upYpF.GtpCV..cpCs............................ps.hlh.....csupClhpCPsuhpcst......ssstpCp.Cs.Gh.CPKlCp ...................................t.s...........t..t.Cp.ss...........tts..CWups........pCQ.........................tphCsp.pCst...pChh..............pss-C..CH.ppC.s.G.GC......oGPp...ss-C.l.AC+pF.c.......sG..sCh.....pp....CPs................u.+.Y..p....a...GspCV.....c.pCP...........................................pp..hlh.......csuuC...lpp.CPsshhcs...........sssttCp.sCs..G...C..P.KsC.............................. 0 65 91 210 +2629 PF04632 FUSC Fusaric acid resistance protein family Mifsud W, Bateman A anon Pfam-B_5345 (release 7.5) Family This family includes a conserved region found in two proteins associated with fusaric acid resistance, Swiss:P24128 from Burkholderia cepacia[1] and Swiss:Q48403 from Klebsiella oxytoca. These proteins are likely to be membrane transporter proteins. 30.90 30.90 30.90 30.90 30.80 30.80 hmmbuild -o /dev/null HMM SEED 650 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.29 0.70 -12.94 0.70 -6.09 92 3210 2012-10-02 19:04:43 2003-04-07 12:59:11 7 8 1176 0 491 3624 82 530.20 21 92.95 CHANGED phhaulKshhAuhLALalAhtlsLspPhWAhhTValV.upPhs........GhshuKuhaRllGTllGsssulsllshhsppPhLhhhulAlWlulCshhus.h......Rs..cuYuFhLAGYTssllulsuls..sP...ps.....lFshAlsRspEIslGIlCuslVusllh.P...ppsstsltsplpphhtshtphs..sssLssptssst.....................tttplhuclssl-shtstssa-ssphptpsttlptLpschhsll...............shhpultphhptlp...stsss..........ltsh..lpplss.............................ttt.tttht..............................tLhtpl.tshpstht............shtthhhtphhphLtphhpshpshpslppshtt............................................................tpsht..hsh+pDhhhAhhsulRssluhhhsuhhWlsouWsuGusslhhsulssslhustssPs...hshphhhGshhusshuhlhhahllPplss.FshLslsL.usshhhsuhhhspPphus.huhuhslhhshhlu.psshs..hshssahNsulAhllGhhhuslshtll.hPsssphttcRLhpshhpcls.phsptphst...tpt.......tatuphhD+ltpLhsth......ttstspcphhp.slssLclGpsllpLRcthtp..........hssshpts.lcphLpsls.......t..tst.ttt.tsssthhptlppshtthssptsttt...............................tphhssLhtlptsLhs.spsh ..............................................................................................................hhauh+hslAhhL.u....l....h..lu..h......h......h....p........l.....s......p..s......t........Wuhho.s...h..l...V..h.t..P..ts.....................Gslh..+uhhRlh..GTllGsh.s...u..l...l....l.....h...h...h......h.....h........p.....p....P....h........l............h.....l.l...h.h..u.h..h........h....u.h..C..s..a.l..u.t.....................pt....t.u......Ys..h...h.L..A....Gh.T.h.h...l.ls.hs.hhs...pP.....pt.................h.h..p..h.A.l..h.Rss-l..llGllsuhhhshl.h....P...........pps...t....pt...h...p....p...p...L....p.s...h...l.s....p...hh....p....hh.....p.hhpststpt.............................tthttl..htp....h....h...s...h..p.t....h.......t.s....s............h...-...s..........t....p.....h......p.......p...t.......s..........t.......t....l.......p......s......l......p...p....l...ph.h....................................ohh.s...p.h...h..h......p..h.........p....t.th.......................lpph..lppltp................................................stth.ht.p.ht............................................................................pl..hp.l.t..ttttt........................................hp...h..hh..t..p..h..h....hh.h...hh...tph.t.t..p.h....................................................................................................h...hs.thp....h..h.h...hh+shhshhhhhhh..hs.tW..s..h.shhhh.hslss....ths.s.sh....h..hhhshhh.h...huhlhhhhlhs.hpt.h........h.hh.l.s..h.h.h.h...hh.h..t.......p..s...hh.h.t.....hl..hh.h.....s.........hp.h...t..hhs.ps....hu.llGhhhuhhh.hhhl...hs......s.tht...hhtthhtth.....t.h.ph...................t.p.....th.sh.h.........h..h...........................t...h...................t..t....h.h.t...............................ht..h....hhtth............................................................................................................h........................................................................................................................................................................ 0 86 207 348 +2630 PF00523 Fusion_gly fusion_gly; Fusion glycoprotein F0 Finn RD anon Pfam-B_102 (release 1.0) Family \N 20.80 20.80 20.80 22.30 20.60 20.70 hmmbuild -o /dev/null HMM SEED 490 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.33 0.70 -6.43 12 7380 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 164 82 0 4285 0 200.40 38 89.01 CHANGED IshstLpplGllssps+phtlhoshpopalVlcLhPsls.......spssCspsplspYpphLscLLpPlt-sLst.pp..ts.......sspRp+RFhGsllGthA...LGVAT..AAQITAulALscAppsttsItplKsSlpsTNcAVpplppuspplshAVpslQDalNsplsPulsphsCchtshplGlpLs....ahoELsolFusplsssu..sslolQALp.Lhuusls.llpshhssphshhpllpothlpGpllsls.pth.hslplslPslsphssshlhch.slShshp...spEhhhplPshlhspus.ltsastss.ChhsspshhCspssu.slssphppC...lpGshosCshohlhushhs+FhhspGsllANCh.uhhCpCsssstsIsQs.spulohIshppCshltlsshphplsp.pspsshhtshslhsu.slslpPlD.lSspLupssppLpsupphlccSsphLsslsstpsotsshlhhlllsslslllhll ..........h.............................hhlcl.s.............th.pt.lt.hpphlppl....tssltt.ptshp....................sstRptRhlGhllGuh............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +2631 PF01621 Fusion_gly_K Cell fusion glycoprotein K Bateman A anon Pfam-B_1083 (release 4.1) Family This protein is probably an integral membrane bound glycoprotein that is involved in viral fusion with the host cell [1]. 19.40 19.40 20.90 20.40 17.90 17.70 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.22 0.70 -5.40 11 89 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 40 0 1 73 0 275.70 43 99.05 CHANGED hhsspst+lullsllouYhlhllWYss.tsphts..sClYAstshs...sssshsW....................ttaNsoLlYl...shsst..h.sshpshsshCRpsllstsshshhstssslcc+lRhVhtsRNChAYLWpspl+hlsluhhlYhsFlslRppRRMFGshRsss-hlSPssYoLNYAupllSsllL+ssYTKlsRLLsElshpRpuLScsFctDPloFhap+ssshsLlshElhl+luutslshsTlslsasPCuhlaPhal+IhsWlFVuslsslELlull.tscPsptuut........sssssspss..GltulCs...sCCusllSGlhlKslYlshlsssVllhl+YEpclQhpLFG ....h.hhhph.hh.hlhhhosashhhhaYss.hsp..p...ClYAshs.....sst..shhW......................hNpohlah..........sst....sshtshs..Cht.hlttp.hsh.s...shpp+lh.VhtshsChthlW.sphphhhhuhhlYhhFlhh+p.RpMFGshpsttchlsPstYhhNYsspllusshLthsYTKhschLCELSlpRpsLsphFcsDPloFLa++PululllssElhlRhsAhsLlluTshls+ssCAlsYPLalpIhTWsFVuhlulsELhhll...cpssA.csut.........susssscsp.......GhpGVCu...pCCShlLSGIsl+lhYlAllshhVlhhh+YEpplQhtLhs.... 2 0 0 0 +2632 PF02663 FmdE FwdE; FmdE, Molybdenum formylmethanofuran dehydrogenase operon Bashton M, Bateman A anon COG2191 Domain This entry represents the FmdE protein that is encode by the molybdenum formylmethanofuran dehydrogenase operon. FmdE does not co-purify with the molybdenum isozyme that is formed by FmdC and FmdB [1]. The domain is typically found as a single copy, but is repeated in some sequence two to three times. It is also common place to find this domain co-occurs with a zinc-beta ribbon domain, suggesting that is may bind nucleic acid and be involved in transcription regulation. 21.60 21.60 22.50 23.50 21.40 20.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.76 0.71 -4.25 188 431 2009-09-10 13:21:47 2003-04-07 12:59:11 9 12 244 4 258 430 25 131.40 25 54.33 CHANGED pFHGHhCPGlslGh+hsphAhcpL....sh...t......p.........s....cplhslsEss.....s...........ChsDAlQhlsGCThG+..s...sLhhh............ch....GKhAhoh...hscps..sculRl.......h......hp.pthpth.....htth..p........................cppppth..ptlhphs..cclaphpt ........tFHGHhCPGlslGh+huphAhcpL....sh....s........p.................s......cclhslsEss..............s................ChsDAlQhlsGCThG+....u....sLhhp.............ch......GKhuhoh.........hs..cps......sculRl............h.....hcs..pthpth........ph.th..p....................p..pphh.p.hhphs.pphh....h................................................... 0 119 200 228 +2633 PF04186 FxsA FxsA cytoplasmic membrane protein Mifsud W anon COG3030 Family This is a bacterial family of cytoplasmic membrane proteins. It includes two transmembrane regions. The molecular function of FxsA is unknown, but in Escherichia coli its over-expression has been shown to alleviate the exclusion of phage T7 in those cells with an F plasmid. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.35 0.71 -4.50 136 1830 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1809 0 376 1109 1286 120.20 33 73.64 CHANGED hl.lhlllP..llEIslllpVGshIGhhsTlhLlllTullGshLlRpQGhpsltphppphp...pG.chPuppll.....-.GhhlhluGlLLlhPGFlTDhlG.llLLlPssRphl.tth....ltp+hph...........ts...s.ht ...................................hhhhlhshlE...IslhItVu.p.....hlG.s.h.h...TlhLllhTullGh.Ll.R.s.QGhps..hhphppch.s......tG......c.............P.......u.t.phl............c.ul.hlhlAGl.LLllP.GFhTDhlG.lLL.L.l.Pss.pphl.phh........lh.pht.........tth.tthh...................... 0 104 230 311 +2634 PF04799 Fzo_mitofusin fzo_mitofusin; fzo-like conserved region Waterfield DI, Finn RD anon Pfam-B_6217 (release 7.5) Family Family of putative transmembrane GTPase. The fzo protein is a mediator of mitochondrial fusion [1]. This conserved region is also found in the human mitofusin protein [2]. 23.70 23.70 23.80 25.60 23.50 22.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.02 0.71 -4.88 11 189 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 95 1 101 193 1 163.20 50 23.86 CHANGED hsshspsshopE-hhlshVsulASlTSRoShGlllVGGVlWKolGWRlIsluhulYGsLYlYERLoWTT+AKERuFKpQFVcaAocKLphIVShTSuNCSHQVQQELuoTFARLCpQVDsTppsLp-EltcLspcIppLEslQspuKlLRNKAshlpsELppFpcsa.Lpts ..................................................hs.sshoQEEhMl..ohlouL.ASL..T..SRTS..MGllVVGG...VlWKsVGWRLIulShulYGhLYlYERLTWTT+AKERAFKcQFVpaAoEKLQhIV.ShTuuNCSHQVQQELusTFA+LCQQVDlTpcpLEpEIsplsccI-hL-slQspuKlLR....NKAshL-sELp.Fscpa.Lp..s............................................... 0 28 36 66 +2635 PF01125 G10 G10 protein Finn RD, Bateman A anon Prosite Domain \N 23.80 23.80 25.10 25.10 19.90 21.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.99 0.71 -4.64 28 378 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 308 0 261 313 4 138.50 57 91.64 CHANGED MP+I+p.pp..+tPP-Ga-cIcsoLpcasp+M..................+-Apsps...tc..sc...............p+sEs.........hW.IaplpHpRSRYlY-haY+++...tIS+ELY-allcppYADssLIAKW+KpGYE+LCCL+CIQss-ssau.............................sTCICRVP+upLcps........phlpCspCGC+GCuS .................MP+l+pt.p..KsPP-Ga-hIEsTLpEhppKM.....................................R-AE......s-s...H-...GK...................+KsEs..........LWPIF+IpHQ+SRYIYDLaY++K...AIS+ELY-ahl+psYADtsLIAKWKK...pGYEp.LCCLRCIQT+-oNFu.............................osCIC.RVPKupLcts.........phl.pC....lpCGCRGCu............................... 0 89 141 212 +2636 PF00503 G-alpha G-protein alpha subunit Finn RD, Anantharaman V anon Anantharaman V Domain G proteins couple receptors of extracellular signals to intracellular signaling pathways. The G protein alpha subunit binds guanyl nucleotide and is a weak GTPase. A set of residues that are unique to G-alpha as compared to its ancestor the Arf-like family form a ring of residues centered on the nucleotide binding site [3]. A Ggamma is found fused to an inactive Galpha in the Dictyostelium protein gbqA [3]. 48.80 48.80 48.80 48.90 48.70 48.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.31 0.70 -5.78 134 2902 2012-10-05 12:31:08 2003-04-07 12:59:11 15 23 511 117 1599 2617 11 305.60 39 89.11 CHANGED Mushh.......................................................................................ptpppttppps.p.ph.p.c.p.h.p.pc.ppptp.................pplKlLLLGsGESGKSTlhKQh+llatssas........................pp.EpppapsllhpNllpuhptllcuhpphphsht................................................................................................................................t.ppppptphlhphtsthtp.............................................................................................................................hstchspslppLWp.Dsslppsap..+ppchpL...................tDsspY.................Flsslc........Rlspss..YhPop..................pDlL+sR.l............................pTsGIhEhpFph......................pphpa+hhDVGGQ.RoER+K.WlphF-..................................sVsullFlsuLS-YDpsLhEDpps......................................NRhpESlpLFcsIsssphFp.......sssllLFLNKhDlhccKlpps...lpphFscap............................Gs..sshcp.stpalpppFhphsppp.p........................+plYsH............hTsAs..DTpslphl...hsslp-hI .............................................................................................................................................................................................................................................................................................t..st..h.p.p.l.p...tp..t...t..........................................................................pth..Kl.L.L.L...........G..s..........GE....S...GKST.......h..lK.........Q..M..+.........Il..H.t..s.G.as...............................pc.-....ttp.ap.hl........hpNhhp..u..h..hsllcAhpplth.ht.....................................................................................................................................p.tpt..psp..l.hth...s.t...t..............................................................................................................................................................................................................................................hs.chht..sl..p...pLW..p.Ds.Gl..p.tsap...R....ppEapL.............................................DsAtY......................................aL..ss..l-........Rl..s.t.s.s..YhP..op......................pDlLRsR..l............................hToGIh..Eh.pFph...........................................................................pplpa.+hhDVG.GQ..RS.....E...R+K..WI..HC.F.-..................................sV.Tu.I.lFhl.......AlS.p..Y.D..s.L..hE...D..p..pt.................................................................................................................................NRhp...EShtLFcoIh..N.s.caFt.......po.SlIL.......FL.NKhDlhpE..Klht..s.....lpth.F..P-Yp........................................Gs..pshpp...Atta.l.....hppF..phsppp.t........................+.lYsH......hTsAs..DTpslphV..FssVpDhI................................................................................................................................................ 0 558 822 1230 +2637 PF00631 G-gamma G_protein_gamma; GGL domain Bateman A, Ananthraman V anon Ananthraman V Domain G-protein gamma like domains (GGL) are found in the gamma subunit of the heterotrimeric G protein complex and in regulators of G protein signaling (RGS) proteins [1]. It is also found fused to an inactive Galpha in the Dictyostelium protein gbqA [2]. G-gamma likely shares a common origin with the helical N-terminal unit of G-beta [2]. All organisms that posses a G-beta possess a G-gamma [2]. 22.30 22.30 22.60 22.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.37 0.72 -4.23 50 1212 2009-09-12 06:39:52 2003-04-07 12:59:11 17 17 266 24 681 1015 0 65.80 28 29.38 CHANGED sphphphtpEl.ppL+p-Lpc..pRl.plSc..AstsllsYsp..sscDshlsss......s.ttpNPatpp..............tss.Chlh .......................t.....thpcpl.ppL+hclsh..pRl.KlSc...........uut....s....LhsYs-p.....hsppDPllsss.......s.tt.NPahpc....................tts.Ch......................... 0 129 220 399 +2638 PF04466 Terminase_3 G2P; Phage terminase large subunit Waterfield DI, Finn RD anon COG1783 Family Initiation of packaging of double-stranded viral DNA involves the specific interaction of the prohead with viral DNA in a process mediated by a phage-encoded terminase protein. The terminase enzymes are usually hetero-oligomers composed of a small and a large subunit. This region is found on the large subunit and possess an endonuclease and ATPase activity that require Mg2+ and a neutral or slightly basic reaction. This region is also found in bacterial sequences [1,2]. 22.40 22.40 22.40 22.40 22.10 22.30 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.22 0.70 -5.38 10 1027 2012-10-05 12:31:08 2003-04-07 12:59:11 8 7 859 2 119 1203 485 355.30 23 85.08 CHANGED s+papVhhGGRGSuKShslAhplVl+h....hpp...stssLl.lRcltNTlccSlasplp-slspLslschF+hs+Sslpl.phssGup..FhFpGh.DcPtKlKSlc....slsslWlEEAuphpp-s.acpLlsolR......c.thclahSaN.PhscpsWla+pFh..........Dspth........sDshlc+STYpDN...FLscshlcphE-hK+....pNschYRhhhLGEhsssGstlhs...lcshplpssp.tthphshth...........DhGapscssuhlphulch++phlalhs-ahp......sthlccpsu-hl+-.........tphp+cshpu-usEs+ultshcpp.......................thh+hhsu++upsohhpts+hlcpa+sllt..................pscspphIpEhpphpappDcpus.hs....c.hctD.....sHslDAlhYAlcc ..............................................................................................................t...hhlhhGG.RGSuKSh.s....h.uhhh..lhth......hp................s.h.t...hlh..h...RchtsolccSs.app.l.c..sl.p.p...h...u......l....p..c..........h..a..c.h.sp.o..h.c...h..hhss...Gsp.....h.l.FpG.....h....c.........s...s...p..+l..KShp.........sls..hhWh.EEA.....pp..h....pp.ps....aptlh.olR...................p.h....tp....l...a.h.oa...N.P.h.pp...ts..al.cpa.h.............................................................................psshl.hp...ssY....p..DN............a.lsc....hh.p.p.h.-.p.h.p.p................ps.sph.Y.+..h..hh.Gch.h.s.s..ss.h.s..h.................h..pt......h..pt..t........t..t..h..p....h..h.th......................................Dh....G.as....p...c.ss.uh.h..p..h.h...l................p...p.ptl...a..l.....p.-h.h.t...........................th......p...p..t.h..sp.hlhp...........................................................th..t.pt.......hh.uct..s..p......c.ht.thcp....................................hhph.hsspK.s..s..s.h..s.........p....h......l...p.t....h......chll................................ps.phhpEhpph.paphD.pt.s.p..h.........cp.p.........sHhhDAhhYsh.................................................................................................................................................................................................................. 0 34 78 95 +2639 PF04309 G3P_antiterm Glycerol-3-phosphate responsive antiterminator Waterfield DI, Finn RD anon COG1954 Family Intracellular glycerol is usually converted to glycerol-3-phosphate in an ATP-requiring phosphorylation reaction catalysed by glycerol kinase (GlpK) glycerol-3-phosphate activates the antiterminator GlpP [1]. 21.20 21.20 21.20 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.11 0.71 -5.23 8 1079 2012-10-03 05:58:16 2003-04-07 12:59:11 7 1 949 12 137 527 4 171.60 41 93.62 CHANGED PllsAl+shcpl-chl-S-hphlhLLsuclssl+pll+hlKs+sKpsFlHlDLlpGLspc.EhuhcFIppthcsDGIISTKuplltpAKKpslhsIQRlFllDSpALcpuhcpltphcPDhlEVLPG.lhP+lIccIsc+oshslIAGGLlcTcEEVcpAL+uGAsAVS.............TSpc..cLWc ..............................................................sllsAl+s.c.sLct.hl.c..o..c.h...ph.lslL..spIsplpsllchl.KpsuKhsFlHlDLlcG.lusc.Ehulpalpp.hcsc.GIISTKushl+pA+phuhh.sIpRlFllDS.uhcpuhc......lpp..sp..PDhlElLPG.sh.P...+.l.lpp...lp..c.ch.p...hP.l..IAGGLIpsc--lppAlpuGAsAloTSppplW..................... 0 64 97 119 +2640 PF00479 G6PD_N G6PD; Glucose-6-phosphate dehydrogenase, NAD binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.70 21.70 22.50 22.30 21.50 21.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.08 0.71 -4.05 117 4951 2012-10-10 17:06:42 2003-04-07 12:59:11 17 21 3884 29 1192 3729 762 172.30 37 36.68 CHANGED llFGAoGDLA++KLhPALapLhppGhLsps.hpllGhARpcho.c--a+phlcpslpp..........tpphsppthcpFhp+lpYhssshs.sspuappLpptlpch-p....t.ttssplFYLAlPPshFsslsppLppssls..ttp......hsRlllEKPFG+DLpSAcpLscpltphFcEcQIYRIDHYLGKEhVQN .........................................................llFGuoGDLA+RKLhPuLapLh.p.p.G.h.L.......s.p.p...htll..Gh..u..R..p.c..h.s.....c...-.p..app...h.l.cp.u...lpp....................p..t.h.sppthcpFhp+h..pY.hsh.-.h......s..c...s.p.s.....app.......Ltphlsph....pp..................t.......t................splFYLAhsP.shFsslsppLppss..Ls..........ptp.t.............hsRlllEKPFGpDLtSApcLNsplt.phF..cEcQl...........a.R.IDHYLGKEhVQN................................... 0 344 698 973 +2641 PF02781 G6PD_C Glucose-6-phosphate dehydrogenase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.50 19.50 19.60 19.60 19.00 19.40 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.96 0.70 -5.56 11 5312 2009-09-10 19:48:29 2003-04-07 12:59:11 11 20 3976 29 1208 3992 928 265.50 41 59.39 CHANGED LsLRFuNplFsshWNRcsIssVpITF+EsFGTEGRGGYFDphGIIRDlhQNHLLQlLsLlAME+PsShss-sIRDEKVKVL+ulcslssc-V....VLGQYsuu.sGs....ptGYl-DsTVspsSpTsTFsAhhlcIcNERW-GVPFhlRsGKALsp+KuElRlQFKsVstslFcpp...htcNELVIRlQPsEAlYhKhhsKpPGls...hssppo-LDLTYucRa.pshhlP-AYERLILDshpGspspFVRsDELctAW+hFTPlLctl-.scps+PhsY.hGS+GPptuDchhpcsGasascs ................................slRF.AN.s.lFcslW.NppaIcpVQI.............ThuEplGl.E.sRuG.YYDp..s.Gsl..R.DMlQNHlL.................Q..lLs.llA..ME..Ps...s...h....s....s.-....s....lRsEKl.................K..........VL+u..lc....h....s.....t..p.s.l...........hV...RGQY....t..u...Gh............................tu................t......h.uYh-E.....s....l....s.......s.Ss.TE.TFsAh+h.I-NhRWs...GVPFYlRTGKRLsp+s.oclsl.FK.p...s....s....h....s...l.F....tps................hs.Nh....LsIc.....l.Q..P.sE....Glplp..h.....t.....s...K.......P.G..s..................hp..h..p...sp.L-.......h.....s.......h....s.pp................t..........p..h.....p.........s..-....AYE+LlhDshpGstshFs.+.......tDEl.ctuWchlsslhchWt....t.........s.........t....s........................s..Ytu.....Go..........h.GP.tu.thl.t+p.GhtW..h.............................. 0 350 709 987 +2642 PF01468 GA GA module Bateman A, Finn RD anon Pfam-B_895 (release 4.0) Domain The GA (protein G-related Albumin-binding) module is composed of three alpha helices [1]. This module is found in a range of bacterial cell surface proteins. The GA module from Swiss:Q51911 shows a strong affinity for albumin. 24.10 24.10 24.10 24.10 23.90 24.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.86 0.72 -3.82 102 9090 2009-01-15 18:05:59 2003-04-07 12:59:11 12 216 256 15 122 9161 0 59.70 37 28.26 CHANGED lssucssLsGsppLtpAKpsAppplssLspLNssQ+sshpspIssAsolssVsslpspAp ................................VssuKsALN..G.s..p.p....LspA..K..psA.p.p.sls.sLspLNsAQKs.sLpsQIspAs..slssVsslppsAp.......................... 0 58 62 119 +2643 PF02938 GAD GAD domain Aravind L anon Aravind L Domain This domain is found in some members of the GatB and aspartyl tRNA synthetases. 21.10 21.10 21.10 21.70 21.00 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -10.04 0.72 -3.83 24 4662 2012-10-01 21:10:01 2003-04-07 12:59:11 9 15 4517 16 1094 3469 2015 96.00 34 16.26 CHANGED Fpsshtp..sGpV+AlplPs..sshoR+pIcthp...chuppaGApGLsal+lps.....tthpuslsKa...lsEpphpplh-pssApsG.DhllhsAsp.tphsspuL ............................Fttshps..tGpVtAlsl...s...Gu......s...phoR.Kp.lDp.hs......ca.s.p..t.a...G..AKGL..A..a.l...KV.s-............ssl...p..u.PlAKF.........l...s.p....-..t......h..ps......lhc......ps..s....A......ps........G..DllhF.sADp.tcllspsh....................... 0 359 705 913 +2644 PF02337 Gag_p10 Retroviral GAG p10 protein Bashton M, Bateman A anon Pfam-B_959 (release 5.2) Family This family consists of various retroviral GAG (core) polyproteins and encompasses the p10 region producing the p10 protein upon proteolytic cleavage of GAG by retroviral protease. The p10 or matrix protein (MA) is associated with the virus envelope glycoproteins in most mammalian retroviruses and may be involved in virus particle assembly, transport and budding [1]. Some of the GAG polyproteins have alternate cleavage sites leading to the production of alternative and longer cleavage products (e.g. p19 Swiss:P21411) the alignment of this family only covers the approximately N-terminal (GAG) 100 amino acid region of homology to p10. 19.60 19.60 19.60 21.50 18.50 16.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.96 0.72 -4.24 5 102 2012-10-01 20:35:47 2003-04-07 12:59:11 12 13 32 2 27 135 0 88.40 37 14.18 CHANGED MGSopSc.pLFVotLpcsLKpRGl+V+cscLlsFasFlc+VCPWFPpEGolslcsW+RVG-plccYYspHGPEcIPlsTauaWsLIRDlL- .................GpphSp...alphLphhLp...pRGlpVppssLlpFhphlccssPWFPcEG.T.lsLcsW++VGcpl+pahsh+Gs-pIPlpsashWsll+-hLp.................. 0 11 11 11 +2645 PF00540 Gag_p17 gag_p17; gag gene protein p17 (matrix protein) Bateman A anon SCOP Domain The matrix protein forms an icosahedral shell associated with the inner membrane of the mature immunodeficiency virus. 20.80 10.00 20.80 16.00 20.30 9.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.74 0.71 -4.41 3 37754 2012-10-01 20:35:47 2003-04-07 12:59:11 13 30 106 33 0 27496 2 123.20 74 36.45 CHANGED GARASVLoGGcLD+WEKIRLRPGGKKKYplKHLVWASRELERFAlcPGLLETcEGC+KILpQLpPuLpTGSEGLRSLYNTVAVLaCVHQ-IcVKDTcEALEpl+ccpscscKK..TtssssuuQQuAusGGTusSuGlSQNY ..........................GGc..LD.tWEKI...RL.RPGGK.K+Y+LKHlVWA.....S....R.ELERFAlNPG.L.LETo.EGC+QI...ltQ.L.QPuLQ.TGS..EE.L+SLaNTVATLY..C.V..HQ+I-VKD.TK.EALDKIEEEQ.N....KSKpK......................s...Q....Q...us...u..s............s..u...s..s...u...p...V.S.QNY...................................... 0 0 0 0 +2646 PF02228 Gag_p19 gag_p19; Major core protein p19 Bateman A, Mian N anon Pfam-B_1307 (release 5.2) Family p19 is a component of the inner protein layer of the viral nucleocapsid. 20.80 20.80 26.00 23.50 18.50 16.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.19 0.72 -3.68 2 75 2012-10-01 20:35:47 2003-04-07 12:59:11 11 10 18 1 0 77 0 83.90 64 15.89 CHANGED MGphaSpsAsPIP+sP+GLAhHHWLNFLQAAYRLpPGPSpaDFHQL+KFLKlAlcTPshlsPINYSlLAuLlPKsYPGRVpEIltILIQpps ...MGphau.ssoPIP+sP+GLusHHWLNFLQAAYRLpPGPSsaDFHQL++FLKlALcTPlWlsPINYSlLASLlPKGYPGRVsEIlpILIQsQ.............................. 1 0 0 0 +2647 PF00607 Gag_p24 gag_p24; gag gene protein p24 (core nucleocapsid protein) Bateman A anon Bateman A Family p24 forms inner protein layer of the nucleocapsid. ELISA tests for p24 is the most commonly used method to demonstrate virus replication both in vivo and in vitro. 20.70 19.70 20.70 19.70 20.60 19.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.36 0.70 -4.78 16 45086 2012-10-02 13:14:50 2003-04-07 12:59:11 15 80 305 153 68 32172 2 179.20 82 55.09 CHANGED .+pLpthhpslcp.uhsustslshhpulupsh..sshDhpphLss..ss...sptus.hpll+st...lsEpstchcpsp..s..sshs........hss-.IhGsGs..spppptphths.....sltpphpphhltAhp+lsphtsss.shhsl+QGPcEPYpDFVsRLhpslcsp.sstcsKphhhcpLuhpNANs-Cpphl+slt....hpuoL.p-hlpACpslGustpKuplh ...................................................................................PRTLNAWVKVlEE.KAFSPEVIP.MFoA.L.SEGA...TPQDLNT.ML.NT..VG...GHQAA.MQMLK-T......INE..E......A.AE..WDR.lHPVH..AGPlAPGQ...MREPRGSD.IAGTTS....TLQEQIuWMTsN.....PPIPVG-IY.K.RWIILG...LNK...I..VRM.YSPs..SILDI+QGPKE.PFRD..YV.D.RFaKTLRA...E..Q.A....oQ-VK.N.W.M.T-TLLV.Q....N.A.NPDCKT.I....L+.A.LG....PuATL..EE.M.......................................................... 0 11 11 17 +2648 PF02093 Gag_p30 Gag P30 core shell protein Bateman A anon SwissProt Family According to Swiss-Prot annotation this protein is the viral core shell protein. P30 is essential for viral assembly [1]. 20.40 20.40 20.50 21.50 20.30 20.20 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.58 0.70 -5.12 6 261 2012-10-02 13:14:50 2003-04-07 12:59:11 11 22 75 32 46 311 2 181.30 63 26.71 CHANGED QYWPFSuuDLYNWKopNPsFScDPsuLTuLIESlLhTHQPTWDDCQQLLQsLLToEERQRVLLEARKsV.GssGRPTQLPNEID-uFPLTRPcWDYsTAtGRE+L+lYRQhLLAGL+GAuRRPTNLAKV+pVsQGssEoPSAFLERLhEAYRhYTPaDP-sPGQpAsVuMSFIhQSAPDI+pKLQRLEGLQsaoLpDLVKEAEKlYNKRET .........QYWPFSSSDLYNWKsNsP.sFSEDPtcLT.........uLlESlhhTHQPTWDDCQQLLtTL.hTsEE+pRlLLEARK.....sV.GsD.GRP..T..QLs..NE...l...stuFPLp.RP.sWDYs.T.scG.R.ppLhlYRQhLlAGL.psAuRpPTN.LAKV+tlhQGssEsPSsFLERLhEAaRRaTPaDPps.uQcss..Vuh..uFIhQSA.DIt+KLpRLEsLpptpLtDLV+EAE+la.+RET........................................................ 0 4 4 12 +2649 PF03276 Gag_spuma Spumavirus gag protein Mifsud W anon Pfam-B_1878 (release 6.5) Family \N 19.60 19.60 19.90 19.90 19.10 18.60 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.99 0.70 -6.18 7 132 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 16 0 1 105 0 287.50 61 96.65 CHANGED MAt.....p.-LDs.tL.sla.s.Gl.ppP.Ht-lIulRhTuG.WG.u-RaphVplhLQDs.sGQPLQ.Pcac..s.RssNPtsp..hlluuPaspLRpAFpsl-lucGs.RaGPLusGpa.PGDtYSppFpPls.pEhAQhptpcLEp............lshlthtchEIRhLRp.h.chph...tGhssslPsAsts.PP..s..t.....................................................................P.p....PsA..........hPsPshsPhl..sssP.....PhPssp....IPIpHIRAVhGpsPsNPR-lPhWLGRsssAI-GVaPhsssshRsRllNALluup.GLsLps.-ssoWsuAlAsLahRTaGuhslHpLusVLpuIAspEGVssAapLGhMhospsasLVWGIlRshLPGQAlVsshQpRLDt.ssDtsRhtsF.phlpsVYplLGLNs+GQSlp....hsposptps.uputGRGppu.csp..........pppppGRppspssptQpsp.ps.ssQp..Qppsssps.ptp.GGYsLRPps.QPQRYGuGpGpt.Ns.......pPhRpsspspspppps.s...sRG.sQScssussttpGGRtspsRNppuussssop ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................AIDGVFPlTTPDLRCRIINAlLGGNLGLSLTPuDClTWDSAVuTLFlRTHGpaPhHQLGsVlpGIsNQEGVATAYTLGMMLSGQNYsLVSGIIRGaLPGQAVVTAhQQRLDQElDDQARAETFIpHLNAVYEIL.GLNARGQSIR.....ASVT.sQPRP.SRG.RGRGQstsRPSp.........GPAsSGRGRQRPAs.GQ.-RGSNsQNQs..QuNsuQ................................................................................................uGh................................. 0 0 1 1 +2650 PF00337 Gal-bind_lectin Galactoside-binding lectin Finn RD, Griffiths-Jones SR anon Prosite Domain This family contains galactoside binding lectins.\ \ The family also includes enzymes such as human eosinophil lysophospholipase (Swiss:Q05315, EC:3.1.1.5). 22.10 22.10 22.20 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.79 0.71 -4.81 80 2093 2012-10-02 19:29:29 2003-04-07 12:59:11 17 35 209 254 1005 1892 5 132.10 26 57.70 CHANGED shhtsl.sssh.ps...GpplplpGhst..................sss...................spFplNltss............psslslHhNsRasp........sslVpNo..hhsus..Wu..p.EER...........................................................tshPFppGp..Fplplhsp.pcpaplhlNstphhpFtaR..hs...pplst.lplpG..Dlp.lpslph ............................................................l..su.l.ts...GpplhlpG.ps...................ss..............................................pp.Fplsltss..............ts-lAhHhNP.RFsp..................................stlVpNo...hhssp....WG..p..EER................................................................t.t.t.h.PF..p....p.Gps...F........c.......lp........lhs..p..pp.pap......l...hV.sG...pph...h...patHR.......hs.........pplstl.p....lpG...Dlp.lppl..h................................................................... 0 188 321 655 +2651 PF03902 Gal4_dimer Gal4-like dimerisation domain Finn RD anon DOMO:DM03802; Family \N 21.10 21.10 21.30 21.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.62 0.72 -4.03 4 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 49 6 40 99 0 49.00 39 7.09 CHANGED TRAHLTEVEsRlupLEQlL+.lFPchDlDplLpp+cp.pl+tlLp.htsosslst.u TRAHLTcVEsRLppLEplhtpLFP.s....t....-lDt....lL....p.....p.......................................... 1 6 23 36 +2652 PF01762 Galactosyl_T Galactosyltransferase Bashton M, Bateman A anon Pfam-B_885 (release 4.2) Family This family includes the galactosyltransferases UDP-galactose:2-acetamido-2-deoxy-D-glucose3beta-galactosyltransferas e Swiss:O43825 [1] and UDP-Gal:beta-GlcNAc beta 1,3-galactosyltranferase Swiss:O54904 [2]. Specific galactosyltransferases transfer galactose to GlcNAc terminal chains in the synthesis of the lacto-series oligosaccharides types 1 and 2 [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.29 0.71 -4.71 14 2182 2012-10-03 05:28:31 2003-04-07 12:59:11 16 34 186 0 1505 2219 31 177.90 24 47.89 CHANGED sRRsslRpTWhspp...h......sclptlFLlGhss.t-t+hcchlhpEu+.hYGDllhs-hpDsYpcLsaKoLshhhausopsPpschIsKlD-DVhh.sspLhshLtpt...hssp.psthYGhlh.psGhshtpcps+.............Wahs.psasssp..............assYsuG.hYllotcAsptlhpsoc+p.pahplEDshl.GlhApchslshhsh. .......................................................................................pRpslRpTWhttt....t.....................................htl...h..h.hFl..............l....G....t..s....s..............................s............t............p............h..p...p..................l..p....t.Esp.ta.s.D.l..l.....h..s....a...h.D..s.Y.tN.L...o.h...Ks....lh............h...h.............pas....s....p.......h..............s..........s............t.........s..c....a.lhK.s............DDDs.al.sh.spl...lp...hLtph................................p...p...p...........h..........h.....h......u.......th.....h.......p........t....t......s..........h..p..p........t...s.K..............................a.a..h..s...p..a.......s.h..............................................Y.P.sa.s.s.G.............s................uYl.hStsls.p.h.l.....h....t.....s..s...........t.....p........h.....p.h...........h...........h........-D.Vhh.Ghhhtt.h.t.l......t...................................................................... 0 563 798 1152 +2653 PF02709 Glyco_transf_7C Galactosyl_T_2; Glyco_transf_2C; N-terminal domain of galactosyltransferase Bashton M, Bateman A anon Pfam-B_834 (release 5.5) Family This is the N-terminal domain of a family of galactosyltransferases from a wide range of Metazoa with three related galactosyltransferases activities, all three of which are possessed by one sequence in some cases. EC:2.4.1.90, N-acetyllactosamine synthase; EC:2.4.1.38, Beta-N-acetylglucosaminyl-glycopeptide beta-1,4- galactosyltransferase; and EC:2.4.1.22 Lactose synthase. Note that N-acetyllactosamine synthase is a component of Lactose synthase along with alpha-lactalbumin, in the absence of alpha-lactalbumin EC:2.4.1.90 is the catalysed reaction. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.64 0.72 -4.51 98 1328 2012-10-03 05:28:31 2003-04-07 12:59:11 9 46 442 59 697 3197 601 76.40 28 20.10 CHANGED pph..ssshsphphph.a.pphhGGlhuhppppahplNGas..NpaaGWGuE.DDDhhtRlhtpsh..p...............lpR....shshst.......a+hh....H.......p ...........................................h.h..tt..h.h.GGlhuls+pp.F..h..pl....N.Gas...spa.h...G.W.G.u.E.Ds-h.h.t...Rl.hh..s.G.h..p..........................lpR.....p.h.phst..........a+hh...H.tp.......................................................... 0 242 317 501 +2654 PF01296 Galanin Galanin Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.90 21.20 20.30 20.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -7.15 0.72 -4.11 4 94 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 41 0 35 88 0 24.10 73 25.87 CHANGED GWTLNSAGYLLGPHAlDsHRSFsDKHGLA ..GWTLNSAGYLLGP+AlDsHRSh..tDK.Ghs...... 0 2 5 10 +2655 PF02052 Gallidermin Gallidermin Mian N, Bateman A anon IPR001049 Family \N 20.20 20.20 20.20 20.60 19.00 20.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.53 0.72 -3.84 3 129 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 89 1 5 51 1 45.90 56 90.42 CHANGED MEAVKEKNDLFsLDVKVNAKESNDSGAEPRIASKFLCTPGCAKTGSFNSYCC .........h-........clhDLDVpVKus..s.sNDSuuDp..R..ITS+SLCTPG.Ct..K..T..GSFNSaCC............ 0 1 2 2 +2656 PF02744 GalP_UDP_tr_C GalP_UDP_trans_C; Galactose-1-phosphate uridyl transferase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports fold duplication with N-terminal domain. Both involved in Zn and Fe binding. 29.20 29.20 29.30 29.40 29.10 28.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.15 0.71 -4.71 9 2558 2012-10-01 23:45:21 2003-04-07 12:59:11 12 9 2265 12 565 1810 220 176.00 32 43.44 CHANGED E.........cpa.cYhttp........GShLhhDYsp.......hEltct-RlVh.s-p.alsVVPaWAtWPapsLlLs.+c+lhpls-Lsst..........pppDLAu........hl+plsp+YDNLFE.........TSFPYSMGlHtushsspts...tth.hHsHaYPPLLRSATVRKFhV.GaElLupsQRDlssEQAAt+L+..........sLs.phHhchsh ...........................................................................th..c...tYFttp............GS.LhtDahQ...............................hE.hA....s....t..pRhlht.s.-...ahsVs.shhs.pWPhpslhLs..pp.c...hpcls-Ls-p.....................ppsslst................slp.lsp.+.h.D..s.h.F.p...............................sShsYshGh............................................................hH..sH..h..s...h.l.+.pt.s.lt..hh.V.G.h.t.h.L.s.tp....+.-.htE.us.hLp..................tls..thH.t...t.............................................................. 0 171 324 461 +2657 PF01087 GalP_UDP_transf Galactose-1-phosphate uridyl transferase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports fold duplication with C-terminal domain. Both involved in Zn and Fe binding. 22.10 22.10 22.10 22.30 22.00 21.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.35 0.71 -4.23 135 2821 2012-10-01 23:45:21 2003-04-07 12:59:11 17 13 2385 22 659 2041 370 183.20 28 46.17 CHANGED s.ths.p-p.......................................sphRhssLhuphll..............suscp.................tcR..satspppth.tsshsptD.......hshLsPssscl....ssp.hsspY..psshshsNcFhuLpp.csshhpp..........scs..................hapspsshG.phcVhh.tsc.p...............p-hcslsssh.tphtp.........p......................th.phl.saENcGhthus......o..HP.............................Hu.lasppalP ................................................................................s..h...Dp...sp.RhtsLh.Gphll...............................susct..........................tc+........sht.s...tp...tt....pp.h.s.sp-................shls........Ps......s.....s..........p.....l..............stp.....h..................ssY...........psshhhs...s......c............FhsLtpcs.s.ht.p....................................sps................................haps.pss.hG..phclhh.psc.p................pchcplsts...phsc.....h..p.....................................h.thl.saENcGhthG....s....o.......HP..................................Hu.lhsspalP.......................................... 0 214 398 548 +2658 PF00304 Gamma-thionin Gamma-thionin family Finn RD anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.49 0.72 -3.95 50 545 2012-10-01 23:31:40 2003-04-07 12:59:11 15 2 132 17 121 592 0 46.80 35 59.18 CHANGED +sCcstSppF.+GsChssss..CspsCpp..Esas...uGcCct...hpRR.ChCo+.C ................hCcs..Stpa...cG..s.C...h...s...sss..CsshCps........E...sas...uGpCph......pp+.CaCpt.C............ 0 27 58 87 +2659 PF04410 Gar1 Gar1/Naf1 RNA binding region Kerrison ND anon DOMO:DM04007; Family Gar1 is a small nucleolar RNP that is required for pre-mRNA processing and pseudouridylation [1]. It is co-immunoprecipitated with the H/ACA families of snoRNAs. This family represents the conserved central region of Gar1. This region is necessary and sufficient for normal cell growth, and specifically binds two snoRNAs snR10 and snR30. This region is also necessary for nucleolar targeting, and it is thought that the protein is co-transported to the nucleolus as part of a nucleoprotein complex [2]. In humans, Gar1 is also component of telomerase in vivo [3]. Naf1 is an essentail protein that plays a role in ribosome biogenesis, modification of spliceosomal small nuclear RNAs and telomere synthesis, and is homologous to Gar1 [4]. 29.30 29.30 29.50 29.50 29.10 29.00 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.62 0.71 -4.77 57 687 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 360 15 496 691 25 146.10 25 42.52 CHANGED sp.hhcthhst...shshs..ssspl..lGph.pss-sp....lllcusss.......pV.phsuhlh.hEs+s.l.GplsElhGslpsshasV+.......sppthpst..shphGsplahsss....hph.hhspshtt....+Gscuusttccchs..tttt.pauccctptth+pttptp+ ...................................................................ttt....tth...h....t.h.p.s..sstp..l..h.lGph.p......hs.-.s.p....llscuhss..........pl..hsusla..h-...s.+.p...l.G+VsElhGslppsaaol+..................s-slpsp.....uh..p..hGsp.hYhssp....................hph.hhsp.hht.....................+Gsctut.ttstpss........tth.t.aus.sttt.tthtttht...t.................................................................................................. 0 175 287 416 +2660 PF01071 GARS_A GARS; Phosphoribosylglycinamide synthetase, ATP-grasp (A) domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_916 (release 3.0) Domain Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the ATP-grasp domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF02786). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.14 0.71 -4.74 19 4769 2012-10-10 13:17:03 2003-04-07 12:59:11 14 45 4503 19 1247 7505 5005 188.90 49 41.23 CHANGED uSKsFsKcFhp+asIPTA.pYcsFo..c.sccApualcctshss.....VVKAsGLAAGKGVlVs....psp-EAhpAlc-lhtpttFG.pAGcpVVlEEhL-G-ElSlhshsDGcolhshssAQDHKRlh-GDp...............................GsNTGGMGAYu....PsPhlopclhcplhcpIlpPTlcuhtcEGhsapGlLYuGlMlo.csG......PKVlEFNsRFGDPET ....................................................GSKsFuK-hM.t.+as..IP...TA...p..............Y....p..s..F...s........-......hc...p...A...h.....s..Y...l.c...p...t...G.sPl............VlK...A...D..G.L..A.A..G..K...........G...........V...l....VA...................o...h....-...E......A....t....p....A...l......c...c...h....h.......t.......s.......p.......t.......F.........G..........s.....A....G...p.........c.....V....V.I.EE.F.L....-...G...-..E..hS..h.hs..h..s...............D.........G........c.....p...h.l....P.h..s.s..u.....Q....DH.K..R..l..hD...GD.p.......................................................G.P..N.T.....G.G.M.GA.Y.o....Ps.P.h.l........o....p...........p..........l...........h.p..c.sh.cpIlpPTlcG.Mt.p..E.....G..p..sap.G.l.L.YAGLMls..psG..........PKVlEFNsRFGDPET................................................................................................................ 0 403 790 1057 +2662 PF02843 GARS_C Phosphoribosylglycinamide synthetase, C domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_916 (release 3.0) Domain Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the C-terminal domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF02787). 21.80 21.80 22.80 21.90 21.40 20.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.90 0.72 -3.81 94 4668 2009-01-15 18:05:59 2003-04-07 12:59:11 11 41 4494 19 1219 3584 1998 93.10 39 20.27 CHANGED sulsVVlAutGY......Pts..p+.Gh.lsh....t....ttpsshlFaAGo...................p....................tp..ss...............pllosGGRVLslsuhusolpcApppuYpslppl..phs.shaYRpDIGt...+sh ......................................................ulsVVlAutGY......P..s....s....Yc....K...Gs.IsG......................ttstsstlFHAGT....................................p........................................hp...ss.................pl.lT..sG..GRVLslsu..hG.colp-AppcAYp.tl.s.p..I.pa......c....u......haaRpDIGh+A.h........ 0 384 769 1030 +2663 PF02844 GARS_N Phosphoribosylglycinamide synthetase, N domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_916 (release 3.0) Domain Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the N-terminal domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF00289). 24.30 24.30 24.30 28.30 24.00 24.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.21 0.72 -3.54 135 4637 2009-01-15 18:05:59 2003-04-07 12:59:11 10 33 4461 19 1204 3558 2108 100.50 48 21.94 CHANGED M+lLllGuGGREHAlAhpL.tpSsplsclasAPG..NsGh...uph..ups....hsl........shsDhssls..paApcpslDLslVGPEsPLssGlsDthpp....tG......ls..lFGPoptAApLE .......................................M+lLVlGuG.GREHALAaKl....spS...s...p........V.....c...p.lalAPG....Nu.Gh.......ut....u..p.s.........lsI.........shsDhsuLl.......sFAp..cp..pl-LslVGPEsPLl..t..GlVDsF..cs......s..G.....lp..lFGPopsAApLE................ 0 383 758 1019 +2664 PF03198 Glyco_hydro_72 GAS1; Glucanosyltransferase Mifsud W anon Pfam-B_2209 (release 6.5) Family This is a family of glycosylphosphatidylinositol-anchored beta(1-3)glucanosyltransferases. The active site residues in the Aspergillus fumigatus example Swiss:B0XT72 are the two glutamate residues at 160 and 261 [3]. 25.90 25.90 26.20 26.30 25.50 25.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.10 0.70 -5.44 12 708 2012-10-03 05:44:19 2003-04-07 12:59:11 9 14 163 3 511 679 10 297.50 41 61.20 CHANGED ssstutsss.sIpIhGN+FF.ops.......uppFaI+GVsYQPssSt...........SshsDPLADsc..sCpRDl.hhppLGlNTlRVYulssotsHDcCMphLpsAGIYlILDLssP..spSIsRtcP..oaNsshhpphhssIDsFpsYsNlLGFFAGNEVsNstssos..ussaVKAslRDhKpYIps+saRpIPVGYSAsDssssRlphAcYFsCGs.s...............s..+uDFaGhNhYEWC.Gt.SoapoSGYcs+sc-FcsasIPlFhSEaGCN....cVp.......sRsFsEVsslYSspMosVaSGGlVYpY.pEsNsYGLVplsssssp.hhsDFpsLKpphtplu ..........................................................ss.....sslssIplp..Gs..cFa..ps................GppFal+G...lsYQ.ssu...............................sshhDPL..uD..s..p..sCpRDl..shhpcLG..sNsIRVYslDsst..sH-cCMph.Lsc...AG...IYllhDlssP.........stSI.sR............s...s...P..................sass.shhpphhsllDtFupYsNsLG......FFuGNE....V..hNst.ss....os.......uusa...V.KAssRDhKpYIp....s..+..s.....h..Rp.IPVG.YSu......sD....s..........s.........p..h.R...phup.Y........hsCusss..................p...psDFaGhN..s.YpW..C...G.s....S..o...a....p.....s..S......GYsphscpF..p..s..a..s..lPlFFSEYGC....N.......p.sp.............PR.FsEltA.lY.....u.....s.....p.MosVa....SGGlVYpYh...pE...s....N...s....Y.....GLVp...l...s.s...s...s..s.h.....hhtDaptLppphtp..t......................................................... 0 101 260 432 +2665 PF02187 GAS2 Growth-Arrest-Specific Protein 2 Domain SMART anon Alignment kindly provided by SMART Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.59 0.72 -4.21 4 597 2009-01-15 18:05:59 2003-04-07 12:59:11 12 116 140 1 314 529 5 69.70 52 2.76 CHANGED cpl-DtV+Rlltp..CpCsp+FpV.pluEGKYRhGDSphLhlVRlLRSpVMVRVGGGW.sL-caL.KpDPCRspp ................................plc-tVp+....ltp....C........pCsp....+.Fp....l.ppl..u..-..sK............YR..............h.........G...D.o..p..tLhll..Rl..L.R..o..p.VMVRVGGGW.sL-cFL.s.K...pDPCRsp.t...................... 0 92 125 199 +2666 PF00741 Gas_vesicle Gas vesicle protein Bateman A anon Pfam-B_545 (release 2.1) Family \N 25.00 25.00 26.40 30.10 23.30 22.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.58 0.72 -4.08 52 505 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 154 0 197 506 18 38.70 48 35.03 CHANGED uLsDlLDRlLDKGlVIsuDlplSlusl-LlslclRlllu ..uLsDlLDRlLDKGlVIsu.lplSlssIELLsIclRlllA. 0 71 157 191 +2667 PF01304 Gas_vesicle_C Gas vesicles protein GVPc repeated domain Finn RD, Bateman A anon Prosite Repeat \N 20.50 20.50 21.90 21.90 19.90 18.20 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.90 0.72 -7.35 0.72 -4.32 19 227 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 48 0 5 187 0 32.40 49 53.18 CHANGED hppLpppTpcFLosTsppRhAQAcpQAppLppF ....ppLQppoppFLosTAptRhAQAccQApELhpF. 0 0 5 5 +2668 PF02704 GASA Gibberellin regulated protein Bashton M, Bateman A anon Pfam-B_1221 (release 5.5) Family This is the GASA gibberellin regulated cysteine rich protein family. The expression of these proteins is up-regulated by the plant hormone gibberellin, most of these proteins have some role in plant development. There are 12 cysteine residues conserved within the alignment giving the potential for these proteins to posses 6 disulphide bonds. 21.60 21.60 23.60 23.30 19.70 18.90 hmmbuild --amino -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -9.96 0.72 -4.00 41 346 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 59 0 171 350 0 59.70 52 54.40 CHANGED pCsspCstRCSposh.+chChphCspCCt+Ch.CVPPGTYGNKphC.PCYsshKT+pG...tsKCP .........pCsupCshRCStsuh...pc...hChchCspCCt+Ct.C...VP.PGTa.G.NK......c..tC.PCYsshpT.+pG...psKCP......... 0 15 111 144 +2669 PF00918 Gastrin Gastrin/cholecystokinin family Bateman A anon Pfam-B_1542 (release 3.0) Family \N 22.20 22.20 22.20 23.80 22.10 22.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.78 0.71 -3.28 11 170 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 67 0 55 185 0 103.10 35 96.97 CHANGED tsslClslllsVLusushscssshsc......sthpp............ssptthpshpptp........csctshuulhp+hLtttphst.huchushps.htpp-stcthtD+DYhGWMDFGRRSuEEh-hsu ......................tlslslLlssLu.h.u.shupss..sp........thtp.......................t..s.tt.psht..t.....................csctshuuhLsRhlpt.ttpst.su.cho.hhps.h.p.-..sat.tp.pDYhGWMDFGRRSAEE.-................. 0 3 7 20 +2670 PF00310 GATase_2 Glutamine amidotransferases class-II Finn RD, Bateman A anon Prosite & Pfam-B_5381 (Release 7.5) & Pfam-B_455 (release 7.6) Domain \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.22 0.70 -5.75 57 7459 2012-10-03 21:14:07 2003-04-07 12:59:11 16 48 3746 79 2152 13504 10212 226.90 25 23.73 CHANGED CGl.Ghlsphpspso+.pllppulpuLpplpHRGuhss-.psGDGAGIhh..phPcphh.........th.h.......usu.hah...............t.hpt.h.t.s..lhuaR..lPht.t.hu..s...tt.P.h.Q.hlt..................................thc.pLahhR+.phpp......phttpt..........hYhsShs..sllYKG.hhs..l..a.a.DLps.phposhulsHpRaSTNThPsW.hApPhR.........hluHNGEINTlpGNpsahp..u+pthh..po..h.......t.h.Ph...t.............................................................................................................................sSDSsshDss.lEhhhtsG.hp...shhhhlP.sh..tp.ths....phpsFaca..hsslhEsWD..GPsulsh....o...D....G+...h..luAslDRNGL.R...PuRasl.T..............cDs...h.llhuSEs ......................................................................................................................................CGh.uhl................pp...plh.p.hh.phL.phpaRGh..........DusGlhh.....stt.h.................................................................................................................................................................................................................................................................................................................................................................................................h...a....th.t.........t..h.....u.t.....h...sl.hH...Ra.sT.s...s.............s...p..h......A.pPhh.................................................hlsH.N..G.......pls.s..hp.....t...hp.h...h....t..p.t.h...h...ts...........................................................................................................................................................................................t.S..D.o.t.............h......p.h.h.t.h...h...th............................................................h............t..p.sh..aph.....t......h...h..p.....h..c.......G...sh...s.h.s.h......................Gp............h..h..s.....h.D...p..Gl...RPhhhhhp..............tpt.......hhhuSE.................................................................................................................................................................................................................................................................................................................................. 1 614 1255 1761 +2671 PF04572 Gb3_synth Alpha 1,4-glycosyltransferase conserved region Waterfield DI, Finn RD anon Pfam-B_4980 (release 7.5) Family The glycosphingolipids (GSL) form part of eukaryotic cell membranes. They consist of a hydrophilic carbohydrate moiety linked to a hydrophobic ceramide tail embedded within the lipid bilayer of the membrane. Lactosylceramide, Gal1,4Glc1Cer (LacCer), is the common synthetic precursor to the majority of GSL found in vertebrates. Alpha 1.4-glycosyltransferases utilise UDP donors and transfer the sugar to a beta-linked acceptor. This region appears to be confined to higher eukaryotes. No function has been yet assigned to this region [1]. 20.40 20.40 20.70 20.50 19.10 20.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.88 0.71 -4.47 17 301 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 96 0 207 294 11 127.30 29 35.50 CHANGED hsF-sp...HplsttClc-hspsaNGspWGpNGPtllTRVl.+hhCpspshtthpp...pCht.holhsPsAFYslsa.pacpaFcs.ptcpshphlp........pShslHLWNKho+..shhlc.GSpshhthLhppaCPpsapustphh ...............................................hFp.p...H.hhthshc-FsppY...sup.hWGp...pGPtLlTRVh.+phCshp..sh.t..t...........tChs...hshlsspsFYPlsap.cacc.aF.cs.psp..p..t.h....p..h...hp......................so.YulHlWN..+hop..tht.hch.s.Sp.lhtpLhppaCPtsht.h....h..................... 0 63 97 152 +2672 PF02263 GBP Guanylate-binding protein, N-terminal domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4308 (release 5.2) & Pfam-B_9065 (release 8.0) Domain Transcription of the anti-viral guanylate-binding protein (GBP) is induced by interferon-gamma during macrophage induction. This family contains GBP1 and GPB2, both GTPases capable of binding GTP, GDP and GMP. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.66 0.70 -5.33 13 1135 2012-10-05 12:31:08 2003-04-07 12:59:11 14 37 152 19 688 1289 39 218.70 30 40.25 CHANGED cppLtlNpEALcIL....psIoQPVVVVAIVGhYRTGKSYLMN+LAG...........................pppGFSLGuTVpS-TKGIWMWClP....HPspPchTLVLLDTEGLtDhEKuDs+sDuWIFALAlLLSSTFVYNShsTIsppAL-QL+hVTELTchI+u+ss...t.sclcsussFVuFFPsFlWTVRDFoLpLEh-GpslTsDEYLEpuL+LppGpstclQs.N.sRcCIRpFFPc+KCFlF-+PutctpL.splEplp---L-s-FppQlppFsSYIao.cuhsKTL ..............................................ph.ls..p.A..lphl.............ph..h..s..p.s...l...s.VV......ulsGhaRpGKSaLhs.h.h..h..t................................................................p..G.F..sh..t.s.......ss....p...s..p..Tp.G.Ih....h.W..s.s...........................ps.p...t.....p...h.s.......llLlDT..pGhhs....p.....p....u.s...........p....s.............s....s..hl..FAL.u...s..h..lSS..h...l.YN........t.s....I...pp.p..s....lp.p.L...........p..h............ho-.........................................................st..h..t......p...t............h.............p............F..........s.hhahl.RDa..s.h....h..c.h..s.h.t...............s..tp.....p.a.L......-p..t...L....p...h...................t.....p..p.......p....h..p...p.........R.tp.......l..pphF..s.phpCFh..hspPs.ph.th.........ph..ct......t..pplp.pFhp...pht.h...h.hlht..t........................................................................................................................................................................ 1 260 347 485 +2673 PF02841 GBP_C Guanylate-binding protein, C-terminal domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4308 (release 5.2) Domain Transcription of the anti-viral guanylate-binding protein (GBP) is induced by interferon-gamma during macrophage induction. This family contains GBP1 and GPB2, both GTPases capable of binding GTP, GDP and GMP. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.72 0.70 -5.23 23 678 2009-01-15 18:05:59 2003-04-07 12:59:11 9 20 96 9 366 663 7 220.50 29 37.66 CHANGED GGIhVoGs+LusLVpTYVsAIsSGslPClENAVlsLAQhENoAAVQKAlsHYpppMuQ+l.phPT-TLQELL-lHpssE+EAIpVFMcpSFKD..csQcFQKcLhspLcsK+c-FhcpNp-ASscpCpslLpcL.psL-cslppGsFStPGGapLalcc+cclcpcYpps...PcKGl+AcEVLQpF..LpS.......................+c.slpcoILQoDpuLTcpEKpIpsE+s+sEuAEtcpchLcpcpccppphMEsp-+SapEpl+QLpEKMEpERc.plhpEpc+hlppKLpEQcchLpEGFcpcucpLppEIpcLp ..................................GhpVsGp.tL.h.plhpsYlcslsu.G.s.l.Pshcssl.ulAphcNpuAVpcAhshYpppMpphh......p........hPh.p.......s....p.-.L.chH.pts...ccc.AlplFh...c...p...u.h..p..D.......s...p...p.......a..p.c.p..Ltpp.l.ccph..p..........s..F..............hp.pNpptu......thCpt...ht.........h.....h...............................h...................h.................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 107 154 219 +2674 PF02425 GBP_PSP Paralytic/GBP/PSP peptide Bateman A anon Pfam-B_2690 (release 5.4) Family This family includes insect peptides that are short (23 amino acids) and contain 1 disulphide bridge. The family includes growth-blocking peptide (GBP) of Pseudaletia separata and the paralytic peptides from Manduca sexta, Heliothis virescens, and Spodoptera exigua [1] as well as plasmatocyte-spreading peptide (PSP1) [2]. These peptides function to halt metamorphosis from larvae to pupae. 25.00 25.00 31.00 42.20 20.00 17.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.43 0.72 -6.85 0.72 -4.42 3 25 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 15 11 3 34 0 22.90 81 27.38 CHANGED ENFAGGCsPGYhRTADGRCKPTF .ENFAGGCssGYhRTADGRCKPTF. 0 2 3 3 +2675 PF02526 GBP_repeat Glycophorin-binding protein Bashton M, Bateman A anon Pfam-B_1047 (release 5.4) Repeat This family contains glycophorin binding proteins from P. falciparum the malarial parasite [2]. Glycophorin is a cell surface protein of erythrocytes. The Glycophorin binding protein contains a tandem 38 residue repeat. In Swiss:P02895 the repeat occurs 11 times. 25.00 25.00 102.50 43.00 18.50 16.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.67 0.72 -4.56 9 59 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 4 0 24 67 0 37.70 83 63.70 CHANGED TSADPEGQIM+tYAADPEYRKHlpVhYpILTNTDPNDE .TSADPEGQIM+tYAADPEYRKHLpVhYpILTNTDPNDE. 0 24 24 24 +2676 PF04551 GcpE GcpE protein Bateman A anon Pfam-B_1482 (release 7.5) Family In a variety of organisms, including plants and several eubacteria, isoprenoids are synthesised by the mevalonate-independent 2-C-methyl-D-erythritol 4-phosphate (MEP) pathway. Although different enzymes of this pathway have been described, the terminal biosynthetic steps of the MEP pathway have not been fully elucidated. GcpE gene of Escherichia coli is involved in this pathway [2]. 32.00 32.00 36.10 36.80 30.10 30.70 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.12 0.70 -5.65 45 3640 2009-09-10 23:01:59 2003-04-07 12:59:11 9 7 3355 8 908 2655 3339 344.20 45 91.10 CHANGED TRpVpV..Gs......VtIGGssPIsVQSMTs...TcTpDlcuTl....pQIpcLtcAGC-lVRluVsshcsAcAlppI+cp.....hs.lPLVADIHFs.a+lAltA...h-tul-KlRINPGNl.......................up.c-+hctlVctAK-pslsIRIGVNuGSL....pcclhp+Y.......tsTsc...uMVESALcalclhEchsF..pclllSlKuSclhhhlp.AYRhlupcs-.......YPLHLGVTEAGshhsGplKSulGlGsLLt-GIGDTIRVSLT...s-Psc....El............................................................................................................................................................................................................................................................................................................................................................................................................pluhcILpulsL.Rptus........clISCPoCGRs.thDLhplspclccplp..plc..........sl+lAVMGClVNGPGEuc-ADlGlu.uGpGp...uhlac+Gchl++s.sp-phl-cLlc.Iccht .............................................................................................................................................pplhVGs......VslG.u......s......uPlsVQSMTN.......Tc.TpD.lp...ATl....tQIp............pL.pcuGs-....IVRluVPs.-uAcAlttI+pp.....ss.lPLVADIHFs.h+.hALts........s-hGlcplRINPGNI....................................................sp.--+lctllcsA+-pshPIRIGVNuGSL............-.+.clh.p.KY......................stsTs-........AllESA...hcpsc.lh-clsF..cphhlSlKuSDV...hhhlp..uY.RhL.A.p.p.hD......................................aP.L....HL.Gl.T.E.A.G.ssp......p.G.......sl.KSAlulGhLLt-GIGD.T.lRV.....S...Lo.......u.-.P....s.E....E.l.............................................................................................................................................................................................................................................................................................................................................................+....V...uhcILcol......sl.....Rs.p.u...l..........ph..lu.CPoCGRp.ph.D.l.hphspplcptLc...clh..........tshcVulhGCVVNGPGEAcpADlGls.uGssc.....uhlahcGchhcpl.s.psphl-pL.t.lct..th........................................................................................................................................................... 0 329 616 783 +2677 PF02155 GCR Glucocorticoid receptor Mian N, Bateman A anon IPR001409 Family \N 25.00 25.00 29.60 25.50 18.70 18.30 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.21 0.70 -5.72 5 165 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 79 0 32 162 0 305.40 50 50.18 CHANGED VhDFasshRGGs...olpsSsSssPTSluSQSsS+QQP.......lsuDhsNGLsNNssQPDLSKAVSLSMGLYMGEoEsKVMGNDluF..PQQsQsulSoGETDFuLLEESIAsLN+Sooss-sllGuss......Pp-htsLKTc.DsSsps+sclcuQsGSNGsStlKLslsDQ.....SThDILQDLELPssSP.GucoN.sPW..DhLhDEs...uLLSPLu.sDDoFLhEGN.hsEDsKPlsLsDoss..KlsDsucpsLLosuslslPQVKTEKE...DFIELCTPGVIKQENsGslYCQuulSGuslhGsKsSAISIHGVSTSGGQMYHYDhNTuTl.SQhQQDQKPIFNlhPPLPSuSEuWNRCQGSsD-shAolGslNhsuRosFSNGY.SSPuhRPD .............................................................................shSss.s.husss..sssh.p........h.hDhspG.sss.s............................-LopA.lo.hohGlhht-s-s.KshspDhth...Q..Q...s..Q.....hul.os...G.......Epsh..p...L..LEESIAsLsp.....o..o...oss.-sshuuss......P.ch.....KTc...chS...-....ppp..h...csps.u.osGss.hcL.sssp...............sThD....I....LQDLEhsss.P....Gpcos..ssa...DhlhD-............sLL......SP...Lu.t-Dsh..Lh.-us..........sscDs.KP.llls.csps...............pl.pcsu............c...............h.L..s.............s.......s....s.sl.....shP....p.......VKsEK-...DFIcLCTPGVIKQEK..h...G.s.sYCQ......uuhsu..usl.u....s..+hS...uIS...lp.GVSTSGGQhYHYchNou.......Sh..uQ.Q..Q.DQKP.lFNlhPPlPssSEsW...NRsQGsG-ss...hs...shG..s..h.s........h.s...upos...FSsGa..SSPu.Rs-............................ 0 1 3 11 +2678 PF04107 GCS2 Glutamate-cysteine ligase family 2(GCS2) Kerrison ND, Finn RD anon COG2170 Family Also known as gamma-glutamylcysteine synthetase and gamma-ECS (EC:6.3.2.2). This enzyme catalyses the first and rate limiting step in de novo glutathione biosynthesis. Members of this family are found in archaea, bacteria and plants. May and Leaver [1] discuss the possible evolutionary origins of glutamate-cysteine ligase enzymes in different organisms and suggest that it evolved independently in different eukaryotes, from an ancestral bacterial enzyme. They also state that Arabidopsis thaliana gamma-glutamylcysteine synthetase is structurally unrelated to mammalian, yeast and Escherichia coli homologues. In plants, there are separate cytosolic and chloroplast forms of the enzyme. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.67 0.70 -5.30 12 2189 2012-10-02 17:21:26 2003-04-07 12:59:11 8 10 1592 13 681 1830 1695 275.90 24 68.31 CHANGED lGlE.Ehhls-.hsh.ht....pshh.ttshhshs...tsshhp...tstlEluss.PhsslpcshpphtpshptltphssphGlthhulGhpP.hphtph.lhsssRYpthhphhsp....s.upphhhtstplplslcsus-t......sthhphhhshhPlhhALuAsSPahpG+soGatSsRtplapphss...u.lPhshsc...acpahchhhcsshhtshp...............chahchRsss.h.s....hh-lthptssh.....lh.cthhch+hhsutst.chsshsshhhsshhcs..h.pstacAs+.hh .......................................................................................GlE.Eh.lh.s....shths.............ps..hh......t...t.s..h...thp...............ss..p.h...hp-hstu.lElu.os..s...t....p....s..lp....pstsplsthhphltpsAs..ph.....s.....lt..l.husGscP....h..t.p....h....p....c...p...t..l...hs.p.t.....RY.......pt..h.h....ch.h................sh................................h..u...........p..............ph.h.......h...............h.......up..pV.......pVsl.ssu..............s-sh.................h.hhcshpthhPh...........hl.....ALu..AsSP.ah.pGp..s....oG.a...tS.....tR.p.la.p.......p.hss..................sGs..h.P.h..h.....h.....s......t...........t....ac....t......hhchhhp.....s.s.hh.shc..............................................ch.ah-h.R..sss..thu..........shE..lcsh.c......os........sh...c..thh.h...thhpu......h.s..h..h.h.l.s....t..h.s.h.h........h.t.....................a.......hh.stapus+.h..................................................................................................................................................... 0 201 465 597 +2679 PF01597 GCV_H Glycine cleavage H-protein Bateman A anon Pfam-B_988 (release 4.1) Domain This is a family of glycine cleavage H-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. A lipoyl group is attached to a completely conserved lysine residue. The H protein shuttles the methylamine group of glycine from the P protein to the T protein. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.40 0.71 -4.43 14 4486 2012-10-02 20:27:15 2003-04-07 12:59:11 14 10 3523 30 1292 3350 2660 118.10 40 88.84 CHANGED LhYspcHE....Wl+scps.ssTVGITcaApcpLGDlVal-LPEsGsclpps-shuslESVKssS-lYuPloGpVlElNccLp-sPsllNccPY.tcGWlhKlKs...ss.c-hcp.LhsucpYtchlcc. .........................................................................h+YocpHE..........Wl......+..........h....-.....u.....-................s.....sslGIT...............-aA..Q.-....tL.GD..lVFV-.L.......P..-......l..Gs......p.l.st.....G.-s.h..usl..E.S.V.K.usS..D...l.YuPl..o.GcVlpV...N.-s...L.p.-sP.......c.l.l.Np....-.P....Y....sp......GW..lhKl..ch.....s-.......s....p....l....c...s....Lhs.sptYpthh...t.......................................... 0 431 806 1082 +2680 PF01571 GCV_T Aminomethyltransferase folate-binding domain Bashton M, Bateman A anon Pfam-B_933 (release 4.0) Domain This is a family of glycine cleavage T-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. The T-protein is an aminomethyl transferase. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.15 0.70 -11.08 0.70 -4.96 131 7911 2012-10-01 23:12:28 2003-04-07 12:59:11 16 47 3499 47 2456 6628 15559 195.20 23 43.65 CHANGED lhDlSp..huplplp......Gs-..At.....paLpplhssclstlt...sGpst.hshhLsp.pGtllsDhhlh+hscs.t............ahllssuustppshtalp......chhh.........plplpshosp...huhlultGP.....putpll...ppls....................sth.hhphtth.......h.....h........hlsRsuasGE..GaElhh.ss.............cp....stplactLht.s...thpssGhtAh-sLRlEtGhshhGp..........-h.s.pph..sPhEuuLt.ah .....................................................hhDlSp.huhlplp...........G..s...-.....ut.....pa.L....p.tl...........hs.s.-..l...........s.p........ls..................sG.p...s...h...ashh..h.....st..p.....Gt...lhsDh..h..lhch...s-s................................ahl...ls.s...u...us..........t.............p..p...s.....h.s...alp...........................................phh.t...............cVplp....sh.ss.............huh.l.u..l..t....GP...........................put.t..hl.........tplh.........................................pt..h..h.h..tht.h.............h.s...h................................hlhp.s..u...........a.s.G.....E.....G....a..Elhh...st....................................pp......u.tlhptLht.....s..............................................l..p..s..s.Ghts..h..c.sL.R.lE....t...G...hshhup....................-h...s.tph........sPhpssht.......................................................................................................... 0 718 1448 1998 +2681 PF04295 GD_AH_C D-galactarate dehydratase / Altronate hydrolase, C terminus Kerrison ND, Finn RD anon COG2721 Family Family members include the C termini of D-galactarate dehydratase (EC:4.2.1.42) which is thought to catalyse the reaction D-galactarate = 5-keto-4-deoxy-D-glucarate + H2O, [1] and altronate hydrolase (altronic acid hydratase, EC:4.2.1.7), which catalyses D-altronate = 2-keto-2-deoxygluconate + H2O [2]. As purified, both enzymes are catalytically inactive in the absence of added Fe2+, Mn2+, and beta-mercaptoethanol. Synergistic activation of altronate hydrolase activity is seen in the presence of both iron and manganese ions, suggesting that the enzyme may have two ion binding sites. Mn2+ appears to be part of the enzyme active centre, but the function of the single bound Fe2+ ion is unknown. The hydratase has no Fe-S core [3]. 19.40 19.40 19.40 20.40 18.90 19.30 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.23 0.70 -5.81 114 2289 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 1384 0 458 1521 2260 381.60 41 82.16 CHANGED p.tTFpGY+RsDGp.VGsRNhlhIlsoVsCsssluctlActhptp................................................slD..........Gllsl.sHshGCu.hs...p-hphhp+sLtGhuppPNhuG.VLllGLGCEssphsplhps.....hsttt....ctlc........hhslQ-s..tG.tcslppGlphlcchltp.AsptcRpssshScLslGlpCGGSDuaSGlTANPslGhsuDhLlptGGTslLoEsPElhGAEclLspRAtsp-Vuc+llchlpha.cpYht.ptsssh.ssNPSPGNKtGGLoTl.EKSLGustKuGos.slssVlcYuE...lp..p.pGLshhso...........PGtDssusTu.sAuGsslllFTTGRGoshGsh.sPslKluoNoshhc+hp-.hDlssGsllsGptol-chGcclachllcVASGct.TpuEth..GppEhs.aph.....Gssh ..................p.hsFpGYRR.ss.Gp.VGsRNhlhIlsoVtCssslschlsphh.cc.................................................h...th...slDGV..s.slsH.aGCu.ht......ssth.hsh+sLtshuppPNhGu.VlVl..GL....G.CEp..Qsptlhcs......................hschss..............cpsp.............hhshQcp...G.pstl-sulphhcch.hpt.hs...pppRcssshSELhlGhpCGG..SDuhSGlTANPslGhsSDhLlpsGu.TslhoEssEhhGAEplLtsRAh...sccltc+Llphhsh.......acp.Yht.....t.t......p.p.sh.....ssNPSPGNKtGGLoTl.EKSLGsht.Ku.Gpo.sls-VLphG....-....+.p........p....pG..Lhhhso............PusDh.Vs.sTs.sAuGsplhl.FTTGRGTPa.Gsh..........VPs.........lK.........luTNopLsp+h...t-hhD.lsAG....slhpGt.pol-pl....upclhchIlclAsG+p.TpuEph.......tap-.h.ulapsuss..................................... 0 130 277 372 +2683 PF01150 GDA1_CD39 GDA1_CD39_NTPase; GDA1/CD39 (nucleoside phosphatase) family Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 21.20 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.52 0.70 -5.99 9 1515 2012-10-02 23:34:14 2003-04-07 12:59:11 12 8 341 38 858 1436 26 351.80 24 78.66 CHANGED .htpp-sh+YullhDAGSoGoRlaVY+a.sp..........c.shhtlsttlcc....hphlsPGLSoaupKs..................cpsuphLpPLL-hAcphIPppppsp..........TPVhltATAGhRLLtt-...tpptlLcsLpshlpshssa.spp..ptlpIIsGppEGhYuWIslNYLLG+hscc..............pppTsGslDlGGASsQIsFs.p..pt...stplcsst..hhpphchhspcY...slYspSaLsYGhspuhpphltpLlpssss..............hpsPCh..Ghppshshsslp................hthpususappCppplhcllp....st.Cs.p..pCtFNGVasPs....tppphtsSshaYhs.shhshsuph.....hs.pchscts+phCs..psWsplpsshspst.p.ls.....phChcssa.holLhsGFshshp..........cphpsspcIpsp-....huWsLGshl.hssshsh.....................pppLtss .....................................................t.......pY.ulhhDAG.......So..GoRlalapa.t......................................................................p.h.....h........h..p......................ht.h.h..t..P.....G.......l.....S..s......asp.ps..........................pt.s.u.p.t.l.p.L.l.p.h.....Ahp...h.....l...P......t.p..h.h..tp..............TPl...hlh.AT.A.GhRl.Ls.p.......ptptll...pt......l.....p...p..........h.h...p.............t...sa..h..................ptspllsG.p.p.......E..Gh.auWlshNaLh.......G.phtp...............................................................tppThGh..lDhGGu...SsQ.....ls..F.sp...............................p..h..........tt.........................h......h.......p...h.phh..s....ppa.................plYspSaLsaGh....tAtpt....h..h..t..thhp.....t.t...t.........................................................................h.........p..PCh.....s.h.....p.....t...p.....h..ph.....t..t.............................................h..t.....u..t.u.....s..h..t..C..ht.h.....hhp.......................s.........................s........h....s....u..la............s.............pt..hh...h....p..aa..h...t.....h.....t....h..ht..........................phtphtttspphCt.......h.t.............h....t...................t................................................................hChphsahhslLh.p...G..aths.t...................................................lp.hhpplts..p.....hsWsLGh.h.l.hht............t................................................................................................................................ 0 240 423 643 +2684 PF02347 GDC-P Glycine cleavage system P-protein Bashton M, Bateman A anon Pfam-B_840 (release 5.2) Family This family consists of Glycine cleavage system P-proteins EC:1.4.4.2 from bacterial, mammalian and plant sources. The P protein is part of the glycine decarboxylase multienzyme complex EC:2.1.2.10 (GDC) also annotated as glycine cleavage system or glycine synthase. GDC consists of four proteins P, H, L and T [2]. The reaction catalysed by this protein is:- Glycine + lipoylprotein <=> S-aminomethyldihydrolipoylprotein + CO2 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.34 0.70 -5.79 13 6351 2012-10-02 18:26:03 2003-04-07 12:59:11 11 14 3103 20 1832 6081 5305 362.70 31 75.92 CHANGED cRHIGsspp-ppcMLpslGhsoL--LhtpsVPssI+hpcshphsts............hoEpEhltcLpslAu..+Npl.a+SaIGMGYYsshlPsVIhRNlLENsuWYTpYTPYQsEISQGRLEuLLNaQTMlsDLTGLslANASLLDEGTAAAEAMthshphsKpK.tp+FlVspssHPQTlsVl+TRAcshslcllhsshp-hshos..........s-VsGVllQYPsT-GclpD...as-llcpAHpptshlssAsDLLALolL+PPGEhGADIsVGSuQRFGVPhGYGGPHAuFFAsppc.....htRpMPGRllGVo+DusGKpAhRLALQTREQHIRRDKATSNICTAQALLANhAAhYuVYHGspGL+cIAcRlHstTshLApuLcc..sshplpcshaFDTLplpsss.puspp.lLc+Ahtpt..lNLRhs-ss.slululDETsTccDl-sLlplF .......................................................................................................................................................................................................................................................t........lPt.t.l..........t....p.....................h...s..ts......................................toEt-hlchl+pLus..............+.N...h..s.....p...u...h...I...shG..ps.h.p.h.sss..lpt.......h..l..p.Pta.....h..s..s..a...s..P......a.Q.P......E......h......u....Q..G...t.L.ch.lh..p.h.Q...p.h..............l.s-L.......T.Gh-....hs....s...s.S.L...s....p..u.s.....A...t..u.....E.......A....h.......h....l.......s.......h......R.....t.......t......+.......p......c.........p.s.....p.........h.....h..ls.ss...s.H...sps........h.s..........s.......h..........s...p.......A....p.h......h......G......h......c......V......l.....s.....s...s..h..p..p..p.s.s.hD....................................tss.l.u.ul.hlp.Y.P.u.T.p.G.h....h...E.....-.....lp.cl.sp.h.l.H.......p..t.G............u....h.l.h.l....u...............A.s....h.....A.l..s....h..Ls.s.P.....G...c.h..........G.......A...D.ls.ls...pps.F..u...lPhG.....h.GGPtsG.hus+sc........Ltthl....P.G..+.h.l....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 604 1157 1560 +2685 PF00996 GDI GDP dissociation inhibitor Bateman A anon Pfam-B_1220 (release 3.0) Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.44 0.70 -6.17 9 1094 2012-10-10 17:06:42 2003-04-07 12:59:11 13 17 362 23 674 1189 269 337.00 30 82.45 CHANGED MDEE.....YDVIVLGTGLpECILSG.LLSVsGKKVLHlDRNcYYGGESASlo.LppLYp+F+st..........................tppP.pphG+uRDWNVDLlPKFLMANGpLV+lLI+TcVTRYLEFKsV-GSYVa+.........pGKIaKVPus-hEALsSsLMGlFEKRRh+pFl.aVssYcEsDspTaculD.p+toMp-lacKFsLspsT.DFlGHAlALapsDsYLppPAh.TlpRIpLYspSluRYGKS..PYLYPhYGLGELPQGFARLSAIYGGTYMLs+PlD-llatcs.GKslGV+S.ts-lA+sKpVlsDPSYhP...cKl+psG.+VlRsICIhsHPIPN...TssupSsQII.lPQsQlGRKSDIYlshlSasHNVssKGhYlAhlSTslETssPthElcPuLcLLGPlpcKFsplp-laEPhs-GscsplFlSpSYDAToHFETsspDVhsIacchsGpsLshsppptpp ........................................................................................................................paDlllhGTGL..Eslluu....h...hS.hp...G..pcVLHhDpss.a.YGu.p.....u..oh.s....lp....p...l...h.p...hh..p............................................................................................................t......t.h..s.p..s....+caslD.L..h..P.Khlhup...G.tLlphLlp.opV.s+Y.l-Fp.lts....a.lh.p...................................................ts...p..l......p.VP..s..s..c..-sht.o..h......h....u............hhEKRp.h.......h+Fh.a...l..................p....a............p.....t.......p.........p..s.........p.............p.......h..........p.....s........h.....p......t.........p......s....h....pp.h........h..p.p..a.......t...L.........t...t......sh........p.hh.h.a.ulAL..h....s...s.p...........h.p..t........s.......h................p......s...lp.c.hp.ha.....p.Slu+a......G..t.o.....P..alaPhY.G.h.G.ElPQuF.s.RhsA..la.G.GsYhLs...p...s...l...p...p...l..................h...........p..........p.....s....c.........h....h.u.......l...p.....s.......tt.......p...........h........h....p..s.....c.ll.....s......s.s..o....ah......................pp...s...p............h....s.....p.l..R....s.lhlh.p.p.sl...t..............st.p....s..t.....s...h....t...ll......hP.............t.....p..h......s..p...............p..t...................h.h.l..h.hu.sp.ss.spG.......hh.lhh.h.os........p...s.....t...p..........t.-.lt...s....h...t................................................................................................................................................t....................................................................................................................................................................... 0 223 348 528 +2686 PF02351 GDNF GDNF/GAS1 domain Bateman A anon Pfam-B_889 (release 5.2) Domain This cysteine rich domain is found in multiple copies in GNDF and GAS1 proteins. GDNF and neurturin (NTN) receptors are potent survival factors for sympathetic, sensory and central nervous system neurons [2]. GDNF and neurturin promote neuronal survival by signaling through similar multicomponent receptors that consist of a common receptor tyrosine kinase and a member of a GPI-linked family of receptors that determines ligand specificity [3]. 21.50 21.50 21.70 21.50 21.00 21.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.16 0.72 -11.45 0.72 -3.47 64 955 2009-09-11 15:54:25 2003-04-07 12:59:11 11 8 83 11 503 812 0 84.70 24 52.80 CHANGED ClpAt.ctCpt-.pCpsthpt......ahppCts.........................tsstpCs...................scChpuhpsh.hsslhs................CpCpts.ppp.ppCtphtpphh.tssC .............................CLcst.ctCpt-p..hC.p.p..thpt......ahppCts................................t.ssppCst.....................pcChpA...hpsh..h...ssshs....t...........CsCpts.........p.ptCtphhpphh..s.C............................... 0 47 105 259 +2687 PF02212 GED Dynamin GTPase effector domain SMART anon Alignment kindly provided by SMART Family \N 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.88 0.72 -4.09 95 1604 2009-01-15 18:05:59 2003-04-07 12:59:11 13 34 368 21 867 1583 18 89.90 28 13.21 CHANGED ppphpschIpph...lpSYasIVp+pltDplPKsIhah.........................llppsccplpppllppLhcpct...hspLLpEsstlsp+RcphpchlctLppApphlscl ............................phphchIcph...lpSYhp.I...l.p+sl.pD....t....l....P.K.s.Iha.h......................................................................hl...p..ps..K....c..p....l...p...s.c.LlspL..hp..psp.......hspLL...pEssphsp+...RcchtchlctLpcAhphlsp............. 0 252 456 678 +2688 PF04807 Gemini_AC4_5 Geminivirus AC4/5 conserved region Mifsud W anon Pfam-B_3520 (release 7.6) Family \N 18.70 18.70 19.20 20.30 18.50 16.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.82 0.72 -7.28 0.72 -4.47 11 267 2009-09-10 17:27:41 2003-04-07 12:59:11 7 2 169 0 0 239 0 32.80 49 22.87 CHANGED pIVLHpsustLlVcHlKaLoKlhhsu.htoTVos ..MpVLHuspTGLllKHlKahoKILhhh.pRoolTs..... 0 0 0 0 +2689 PF00799 Gemini_AL1 Geminivirus_AL1; Geminivirus Rep catalytic domain Bateman A anon Pfam-B_286 (release 2.1) Domain The AL1 proteins encodes the replication initiator protein (Rep) of geminiviruses, which is a replicon-specific initiator enzyme and is an essential component of the replisome [1]. For geminivirus Rep protein, this N-terminal region is crucial for origin recognition and DNA cleavage and nucleotidyl transfer [1]. 21.10 21.10 21.20 21.20 20.80 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.43 0.71 -4.23 11 3067 2012-10-02 18:54:05 2003-04-07 12:59:11 15 7 1057 2 10 2756 11 106.30 58 35.36 CHANGED FRlpuKNhFLTYP+ColstEcsLppLtsLppphshp.YItlsREhHpDGpPHLHsLlQhEu+hphossRaFDLspspp..s.FHPNIQuAKSoscV+sYIsKDG....sohEaGpFphcu ..................................FplpuKNaFLTYP.+C.S.Lo.K.E.E.ALsQLpsL.p..o....P...s..sKK...a.....I+...lCRELH.E......DG.p.PHLHVLI..QFE.G.Ka..pC........p...N.sR...FFDLsSPoR...Ssa.............FH..PNIQ................u.A.K...S.S.o....DVKsYl-KDG....Dhl-aGpFQlD.G................................................. 0 4 6 6 +2690 PF01440 Gemini_AL2 Geminivirus AL2 protein Bateman A anon Prodom_1117 (release 99.1) Family Geminiviruses are small, ssDNA-containing plant viruses. Geminiviruses contain three ORFs (designated AL1, AL2, and AL3) that overlap and are specified by multiple polycistronic mRNAs. The AL2 gene product transactivates expression of TGMV coat protein gene [1], and BR1 movement protein. 25.60 25.60 25.80 27.50 25.50 25.50 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.86 0.71 -4.07 116 1883 2009-09-13 07:36:57 2003-04-07 12:59:11 11 4 905 0 0 1605 0 131.50 56 96.44 CHANGED Mps..SSPSps+ST.ssIKspHRhAK+..+.slRRRRlDLsCGCShYlpIsCps..HGFTHRG.sHHCoSucEWRl....YL.GssKSPlFQDs.psptpslppppph.....ppssslQPQPpESsGsoQshspLPsLDslTsSDWu.Lps .............MpsSSPSpsHsTpVPIK.VQHR.hAK+.....+slRRRRVDLsCGCSYYl..pI.sCp........N..HGFTHRG.THHCoS...u+EWRl....YL.GssKSPlFQDp.psppps.lppc.pH....ppsssslQsQPpESsGs....oQhFS.pLPsLDDlT...sSDWuFLK........................ 0 0 0 0 +2691 PF01407 Gemini_AL3 Geminivirus AL3 protein Bateman A anon Pfam-B_1874 (release 3.0) Family Geminiviruses are small, ssDNA-containing plant viruses. Geminiviruses contain three ORFs (designated AL1, AL2, and AL3) that overlap and are specified by multiple polycistronic mRNAs. The AL3 protein comprises approximately 0.05% of the cellular proteins and is present in the soluble and organelle fractions [1]. AL3 may form oligomers [2]. Immunoprecipitation of AL3 in a baculovirus expression system extracts expressing both AL1 Pfam:PF00799 and AL3 showed that the two proteins also complex with each other [2]. The AL3 protein is involved in viral replication. 22.70 22.70 32.80 26.60 20.80 20.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.54 0.71 -4.65 97 1723 2009-09-13 21:06:47 2003-04-07 12:59:11 12 3 909 0 0 1578 0 119.30 59 89.59 CHANGED DSRTGE.IT...AsQApNGVaIW-ls......NPLYFKIhpHppcPhhp....ppcIlplQIpFNHNLRKALGlHKCFLsFpIWTshps...tTGp.FL+VF+.......pQVh+YLcsLGVISINNVIRAVsaVLassl.pp...sl ..................................DSRTGEhIT...AsQAc.N..GVaIWEls......NPLYFKIhpHspRP.Fhp....spDIIolQIpFNHNLRKALGl....HKCFL.sF+IWTTLpP...pTGp.FLRVF+.......tQVlKYLssLGVISINNVIRAVcHVLasVlppT.h.................................................... 0 0 0 0 +2692 PF00845 Gemini_BL1 Geminivirus BL1 movement protein Bateman A anon Pfam-B_1535 (release 2.1) Family Geminiviruses encode two movement proteins that are essential for systemic infection of their host but dispensable for replication and encapsidation. 20.20 20.20 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.79 0.70 -5.03 17 508 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 260 0 0 471 0 250.50 54 95.03 CHANGED sYIcScRsEYpLopDhT-IsLQFPSohpQhsuRLptpCMKIDHsVIEYRpQVPlNAoGoVIVEIHDpRhoDs-ShQAuaTFPItCNlDLHYFSSSFFSlKDPhPW+lhYRVsDoNVhpusHFA+hKGKLKLSoAKHSsDI.F+uPTlcILSKpaopcclDFapVshuKhp..R+Llpssshs...phuh+tP.lpltPGEoWAo+S.sIG............................st..spppspsspaPYRpLp+LssutLDPG-SsS.s......us.shopsplp-llcsTVpcClposspsspsKsL ....sYI-ScRsEYpLopDhoEIhLQFPSshpQhou+LptpCMKIDHsVIEYRpQVPlNAsGoVlVEIHDpRho-s-ohQAsaTFPItCNlDLHYFSSSFFSlKDs.PW+lhYRVpDoNVhptsHFA+hKGKLKLSoAKHS.sDI.F+sPTlcILSKpaTtcslDF.pVshsc.p..R+hlpsssht...phuh+tP.IpltPGEoWAo+S.pIG.............................s...sp.ps.pss.aPY+pLp+LssssLDPG-SsS.s......ts.ohShtplp-llcsTlpcCl.ss.pss.sKt.................... 0 0 0 0 +2694 PF01492 Gemini_C4 Geminivirus C4 protein Bashton M, Bateman A anon Pfam-B_453 (release 4.0) Family This family consists of the N terminal region of geminivirus C4 or AC4 proteins. In Tomato yellow leaf curl geminivirus (TYLCV) the C4 protein is necessary for efficient spreading of the virus in tomato plants [1]. 29.10 29.10 29.70 29.70 28.80 29.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.71 0.72 -4.25 79 1778 2009-09-12 04:38:53 2003-04-07 12:59:11 12 4 816 0 0 1410 0 82.00 49 82.94 CHANGED MGsLIShssssSKsNosA+IsDSSThaPQsGQHISIpTFRELsss.hSsPT.p+TETshsG-sSRSh-DhLEEVuph.TTphPRp ..........MGsLIshsSSsSKuNosA+hsD.SSThaPQsGQHISIpTFRELN.ss.hSpPT.pRTETs.sG-s.RSh-DhhEtssp..hThhP+p.............. 0 0 0 0 +2695 PF00844 Gemini_coat Geminivirus coat protein/nuclear export factor BR1 family Bateman A anon Pfam-B_1430 (release 2.1) Family It has been shown that the 104 N-terminal amino acids of the maize streak virus coat protein bind DNA non- specifically [1]. This family also includes various geminivirus movement proteins that are nuclear export factors or shuttles. One member BR1 facilitates the export of both ds and ss DNA form the nucleus [3]. 23.50 23.50 23.80 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.35 0.70 -5.14 41 3840 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 1300 0 0 3087 0 202.70 48 94.35 CHANGED RRphshss.....st...hpcssstp+stsp..htRpsphh+hh+ss........sh+lpshp......tschsls....ssuhsphlossspGps..spRsssthplKpltlpGplhh-cshpphsts.......sshhhaLVhDR+Pptssss....sFsplFsh...hpspsuohslpssl+-RFhVh+paptslssst........ss.h..hssppph.s+RFhphlsh+sha.ppsssGpYsNlpcNALllYhshhpssssshuoh.........hhlhFcsshss .............................................................................Rthshsp...thsss...shstss.+tp.s....W.sp....R....P.M.....hRKP+.hYRhaRSPDVP+.......GCEGPC.K.VQSaE.........p+pDlt.HhGhl..hClSDVTRGsG..lTHRlG.KRF.ClKSl.YlhG...K.l.WMD-NIKppNHT........NsVhFaLVRD.RRPh.s.sP.....cFuplFNM...aDNEPSTA....TVKNshRDRaQVh++apusVsGGt.................h...us.+EQ.A..l.l+.+..Fh+.lNsaVsY..NpQEsu.KY.-NHoENAL.h.LYhshhp.ss.ssshssh...........h.Fh.sh............................................. 0 0 0 0 +2696 PF01708 Gemini_mov Geminivirus putative movement protein Bashton M, Bateman A anon Pfam-B_1771 (release 4.1) Family This family consists of putative movement proteins from Maize streak and wheat dwarf virus. 25.00 25.00 33.50 33.40 23.00 22.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.00 0.72 -4.44 10 288 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 88 0 0 221 0 81.80 47 87.79 CHANGED MDth.hss.h.spss....Pp......VPssAPuuuslPWSRVGElsIhohVAVlslYLLYsWVL+DLILVlKA++GRoTEElsFGssst+s....sussss ...........................pss.Y.pPR......VPo.uAPs.SuslsWS+VGEVuIhsFVAllshYLlYlWVL+DLILVLKA+pGRoTEElhFGspssctsss..h......... 0 0 0 0 +2697 PF01524 Gemini_V1 Geminivirus V1 protein Bateman A anon Pfam-B_893 (release 4.0) Family Disruption of the V1 gene in Tomato yellow leaf curl virus (TYLCV) stopped its ability to systemically infect tomato plants, suggesting that the V1 gene product is required for successful infection of the host [1]. 21.20 21.20 22.30 23.80 21.10 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.48 0.72 -4.00 45 1677 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 796 0 0 1216 0 75.00 69 68.72 CHANGED MWDPLlN-FPE...TVHG.FRCML..AlKYLQt.lcpTYuPDTlGa-LlRDLIsVlRu+NYsEAopRYscFpuRl..puTspuELRQ ..............MWD..PLLNEFPE...oVHG.FRCML..AlKYLQh.lEpTYsPD.TLGaDLIRD.LIsVlRARsYVEAo..pRYsHFauRl..EGost..uELRQ............................ 0 0 0 0 +2698 PF02053 Gene66 Gene 66 (IR5) protein Mian N, Bateman A anon IPR000714 Family \N 20.10 20.10 21.30 20.50 18.80 20.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.50 0.70 -4.91 6 66 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 29 0 0 57 0 175.70 37 67.47 CHANGED Mpsha..........sDtphcosuthh..sutculYPhh.ssousHssSLPRSV+shApsVhssut-AhsAhRuGtPPPAclW.cVYchasssFpcaptS.....husFHsAsPlR+hVGchLhshssAP.ETHuELusRLLaCuYWCCLGHAusCShsphYE..csChRFF-pshGhGEsP.s-uEpYWpsLhshsuscsphh.+asAtsAahptRsR+hsls................u.sh .........................ttthhcpsssp.........................sssusLYPlp.spshAHhshLPtshRshhcslspsSp.stssl+s..s.PPssthh.hlhcphphsaspahRu.....ppphpsl.PhRpAshsalss....husts.pTH...cchc-hLhh....CAaWCCLuHAu..........T..........CShAtLYs...sp..Ch+LFss.FGCGs..................................................ss.ss.................. 0 0 0 0 +2699 PF03323 GerA Bacillus/Clostridium GerA spore germination protein Mifsud W anon Pfam-B_3821 (release 6.5) Family \N 20.70 20.70 21.00 20.80 18.50 18.50 hmmbuild -o /dev/null HMM SEED 470 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.23 0.70 -6.36 152 1955 2012-10-02 18:57:54 2003-04-07 12:59:11 8 8 418 0 415 1730 13 442.60 33 90.93 CHANGED Nlphlc.phhs..pssDlhhRch....................tttshc.ssllalcGlsDpphlp..ppllcsL.....................ttttptphhpplh..pph...lshsplpphp.shccllpplLsGpsllhl.-Ghs..pulllsspsh...pRulpEPpsEpllRGs+-GFsEslpsNhuLlRRRl+sspLphcphplGcco+TclslhYIcsIss.cllccl+pRlppIclDsl.hssuhlEphIpDps.hosFPplt.TERPDtsuupLhEG+lsIllDGoPhslIsPsohhphhQssEDYap+ahhuoh.lRllRhhuhhlolhLPulYlAlhoaH.-llPspLhhslsusRpslPFPsllEsLlhElshElLREAGlRLPpslGpslGIVGullIGpAAVpAGllSshhlIlVAloAluSFshPsYphuhulRllRFhhllhuuhhGlhGlhluhhhlll+LssL+SF.GhPYLsPhsPh..phps.h+DsllRhPhhhh....p.pR.......Pphlp ......................................................................phlp.phht....pssDlhh+ph......................htshp.hslhalculsDpp.hlp..p.llpsLh........................................p..t..p......t.hp..p.hh..pph.ls.h.s...plpthp.shpclhptlLpGpsllhl.-.s.s.............p.s...h...lls.spsh...p..RulpEPpsEtslRGP+-GFsEslssNhuLlRR+l+sspLphcphp.lGcp.op.TclslhYlcslss.cll...pclcpRlp.pIchDs.l..hssuh.l.E.ph.I.p..Dps..hosFPphh.TERPDtssssLl-G+luIllDGoPhsLlsPsoahphhQosEDYhpphhh.uo.a.lRllRhh.uhhlulhhPulYlAlhsaH.pl..lPspLhhslhsuRpslPFPsllEs.LhhElshElLREAGlRLPpslG.pslGIVGGllIGpAAVpAGlsSshhlIlVAloAluSFshPs.aphuhuhRllRFshhlhAuhhGlh.....Glh.....lGhhhlhhHLspL+Sh.G.s.PYhtPhh...Ph...p...h...p...s...h+.Dhhl.RhPhhhhp.pRPt...p............................................ 0 209 341 357 +2700 PF00196 GerE Bacterial regulatory proteins, luxR family Finn RD anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.43 0.72 -4.65 30 38873 2012-10-04 14:01:12 2003-04-07 12:59:11 14 273 4244 74 9895 30161 2583 57.10 31 20.74 CHANGED hssLopREtpVLchlspGhoNcpIAppLslSp+TVpsHhsslhpKLslpucsplsphA .......................h...Lot.RE.t..c..l..L.p.h...l..u.......p........G...h....o....s.p.c......IAppL.....t.......l......S.t.p.T.....VcsH.h...p.......s....l..h.p..K...L...s...lps.+sphshh......................... 0 3079 6345 8321 +2701 PF01353 GFP Green fluorescent protein Bateman A anon SCOP Domain \N 25.00 25.00 36.30 36.10 24.50 24.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.25 0.70 -4.97 27 306 2012-10-01 21:32:51 2003-04-07 12:59:11 17 7 114 621 18 1347 1 212.80 38 91.18 CHANGED hssIK-pM+hKlpMEGsVNGHtFplpGcGcGpPacGsQ..shplpVscGuPLPFAaDILosuFpYGNRsFsKYPccI..PDYFKQSFPE...GaoWERshsaEDGGlsssosDIol...cG-.....sFhacl+FcGlNFPssGPVMQKKTh.tWEP.STEthYs..pDGlLpGclshALh.LcGGuHYpCch+ToYKuKKs.lph..PsaHFlDHRlEhhpccc.DhspVc.aEpAlA+hs ...............phlpttMph+lcM-GsVNGHcFslpGcGcGpPaEGpQ..shcLpV..s..c..GGPLPFuaDILossFpYGs+sFs+YPccl..sD.YFK.pu.h..P-...GYoaERohpF.........E.........DGGlsssosc..loh...cGs........shh.acl+hpGsNFPssGPVMpKK.Th..sW-P.S.s.Ep.h.hs..p-ssLpG.csshsLh.Lc.G...G.u+...apscacosY.....+....s..c....Ks...........l..ph.....P.shHalc+clphtp.psp..chpphp.hEhu.Ap....................................... 0 17 17 17 +2702 PF05165 GGDN GGDN family Bateman A anon COG2429 Family I have named this protein family of unknown function GGDN after the most conserved motif. The proteins are 200-270 amino acids in length. 20.60 20.60 20.60 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.25 0.70 -5.33 4 106 2012-10-01 23:51:22 2003-04-07 12:59:11 7 2 91 4 64 115 25 210.80 32 93.13 CHANGED M...shlQIDsYGPWTspPsPRREsDLQsLQucLYADlpc.huu+pGhVFhsRFDNMlulosGhsltsHcRlQ-uItNRYPlTVSMulusAcTPhDA.ctAspALpctGuAQsEpRpEsLsh.s.....ss-GYVpIAHIDlNslTtThTDhVushcThhpVpclhttLhchL..KhsAlh.FlGGDNhhAsssthp.t.lLclhsclc-phsl.....-LKAGIGhusTAccAusLAchALEcIR..Gths.sslsshcp ..................................h.sllplssYt.WT.o.sscREhtlQtlQucLatclpp.huthsuhsa.hRaDshlslsNGls.pphttl.pslppph.PlslshsluhucTPh-A.htAsttlpp.t.st.pt...p....................ss..ltluHhDlsshT.phTchhssa-oahplpphhhpLhchh.hp.hsulsh.....alGGDNhhs..h.st...hsppsh.phlpclpcphtl.............tl+sGlGhGpsAcs...AhhhAspuL.-tlR..tth..........t.................................. 1 16 34 51 +2703 PF01134 GIDA Glucose inhibited division protein A Finn RD, Bateman A anon Prosite & Pfam-B_4007 (Release 8.0) Family \N 20.00 19.10 20.00 19.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 392 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.29 0.70 -5.61 122 6402 2012-10-10 17:06:42 2003-04-07 12:59:11 17 49 4313 22 1570 23497 12080 363.60 41 65.94 CHANGED DVlVlGuGHAGsEAAhsuA+hGscshLl..............Thph-pluphsCNPolGG......AKGhLs+ElDALG.GhhucssDpsslph+hLNtp+GPAVtAhRsQsD+phYppthpctlcs...psNLplhpspVsclhspp................tth.G.lhht.s..h.sc........sVllTTGT.........aLtu.lhhG..th.sG...sp.hso.sLupsLc.c.hGhchtRhKTGTPPRlctcSIDasth.hpsu-.p.s.s....FSahsssh.......p...pcQ............hs.CalstTspps......HplI+cslc.pos.hasGp..IcuhGPRYCPSIEc...Kll+Fs...............-+spHplaLEPEuhsssp.hYhsGhSToLPhclQhchl+o.IPGLEsAclhR......GYulEYDalsPtpLh.s.oLEoKplpsLFhAGQINGT.oG..YEEAAA..QGLlAGlNAuh.phpscp ...............................................................................................................DVlVlG.u.GHAG.sEA.A.h..............Au...A..R.h...........G...h..p...slLl...........................................................Thp..h..-.....p.l....u....t...h.....s.....C.....N..P...u...l.G.G...............................u.K.G......h....L....l......+.....E......l........D........A.......L.....G........G.......h........h........u.....c......s....s........D.....p....s....t..l......Q....h.......+..........h......L..............N......t.....t.........K......G..........P.......A....V.......p........A....h.....R......u.........Q..s...D...R.......t.......h........Y.........p.....p........t....h.........p.....p.....t...l...c...s.................p..s..N..L...p......l....h...p..p........t...l...s.....-...l.h....s.pt......................................................tth..h.G.....s...h.....h.....t...........s.................h..t...u.p............sVllTTGT.................................................h.LtG..lhlG........p.....h....u.G..................t......p............s.S..h....s...........L.uc..pLc..c...hG.h...c.h......h.RhK.TGTPPRlcsco.IDas...hht.QsuD...s.s...............FSah.s..s.s.h.........p.....pQ................................hs...Ca.lohT.spps..................Hpl....lcsshc.....cus......h...asGh.........I-.u.h.G...PRYCPSIEc........Kll+Fs...................-+sp..HplFLEPE....Gh....s....ss..............p....hYhsGhS....T.S.LP..h.........clQ..hp...ll+o.I..................G...L..E.N.....A.......c...hhRs.............................GYAlEYDa.hs.....P.p.p.Lh.........s...TLE.......o.......K.......t...l..ps......LFhAG...Q...l....N...G....T.oG......Y......E......E......AAA....QGLlAGlNAAhthtsc................................................................................................................................................................................................................................................................................................................. 0 550 1026 1334 +2704 PF02527 GidB rRNA small subunit methyltransferase G Bashton M, Bateman A anon Pfam-B_1265 (release 5.4) Family This is a family of bacterial glucose inhibited division proteins these are probably involved in the regulation of cell devision [1]. GidB has been shown to be a methyltransferase G specific to the rRNA small subunit [2, 3]. Previously identified as a glucose-inhibited division protein B that appears to be present and in a single copy in all complete eubacterial genomes so far sequenced. GidB specifically methylates the N7 position of a guanosine in 16S rRNA [4]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.85 0.71 -5.15 17 4375 2012-10-10 17:06:42 2003-04-07 12:59:11 10 8 4276 14 957 4510 3558 182.20 33 82.38 CHANGED phpphptYhphLhchNpphNLouhp..chpclhp+HllDSlslhphlcptc...t...lhDlGSGAGhPGlPLulhhPph+....lsLl-uhtK+ssFLcpltpcLsLsplpllpsRsEc.......ttpaDslsuRAluslsclsphshp.Ll+ssGhhlthKGtpst-Eltphcpthphpth.hhplsth.tt....pcplh .............................................................pph.tYhclL...hcW...N.cth..N...L....Tulp..-.p.-..h...hh+H...lhD..Sl.s..l..h.........h...l....p....tpp......................p....llDlGoGuG.h..PG.I.PL.u..I....h....h....P.c....h.c...........hT.LlD..S..l....s......KR..l..p..F...L.p...p..ltp.c....L.....s...L..p...s....l..p...s...l.p...u...R.s....Ep......................ht..c.tpaD.h....V.....s....u....R.....A....l......A.....s......L........s..s....l...s...p.h.....s....h......s...L...l......c..............s.......G...p........h......l........A....h.....K..G...t..p..s..p..-.E..lt...p.h.pp.....s....h.p..h..h..th.....t.............................................................................................................. 0 336 646 821 +2705 PF03227 GILT Gamma interferon inducible lysosomal thiol reductase (GILT) Mifsud W anon Pfam-B_1477 (release 6.5) Family This family includes the two characterised human gamma-interferon-inducible lysosomal thiol reductase (GILT) sequences: Swiss:P13284 [1] and Swiss:Q9UL08 [2]. It also contains several other eukaryotic putative proteins with similarity to GILT [3]. The aligned region contains three conserved cysteine residues. In addition, the two GILT sequences possess a C-X(2)-C motif that is shared by some of the other sequences in the family. This motif is thought to be associated with disulphide bond reduction. 22.00 22.00 23.70 22.60 21.20 19.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.24 0.72 -10.81 0.72 -4.20 15 522 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 214 0 342 525 3 107.30 27 45.18 CHANGED lplslaYESLCPssppFlpppLh.hh..hptshtshh-LcLlPFG.pAcsscsto.....spCQHGspECcLNtLpACsIcshsspph..........hshlsClpps.pshppuh......Cspchthpp ................................lplslaYESLCPssp..pFlhppLhPhh.....p..p..ltsh..lsl..pLlP...aG..pA..p...hp.s.sss................hpCQHGspEChhNhlpsCslph.h.......p....p.h................hthlhCh.pt...t...p........t.t..........Chp.....h............................................................ 0 138 199 293 +2706 PF03359 GKAP Guanylate-kinase-associated protein (GKAP) protein Mifsud W anon Pfam-B_1892 (release 6.6) Family \N 19.20 19.20 20.40 20.00 18.90 18.00 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.39 0.70 -4.83 20 511 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 91 0 233 441 0 312.50 34 40.76 CHANGED -s+tt....p.h..pIuhp...........h.p.pths.sp.tsc..t.hpSlGlQVc-chp.ptchp.................tosusousspsD.chps..t.s....ssspspsh..s..sshp+p.pp..........ss...pssthth.p+sVs.................pputhsh.............sstsscspsss.psuthsPsps....................................................t...sp+DGpaFl+LLpuEs-RLcuaCpphE+-.....tc-s-LsE.Ell..........GhIRoAVG................pApLLhoQKFpQFctLC-p........................................sh..........s..Psup.csTspDLsGFWDMLpLsIEDVshKF--LppLKsNsWp................ccssPshsKKPsKs................................+sshs+s+uh-uu........ttpRtcARcRLhAAKRAAphRQsp.......socpu-SlEIalPEAQTRL ........................................................................................................................................................................................................................tt............................................tt.....t.........hpS..lGl..Qsp..cp.h.p..............p.p.................................................s.ps.....h.osthp.s.-.php.t....t.........t.ppps.h..t.......tshtpp.p.....................st..................pslp...................................................................ps.hsh.....................................s..ps.s..s..s..h..psu.h.s.P..s.s..s..hps..p...................................................................................tssphuss...pRD.Gpa....F...l..KLL.puEs.-.RhEuWCppME+-........tcEss.L....sE...-l.L............G+IR.oA.VG................sAQLLMoQK...F.pQFptLCpp........................................sh...............s.....Ps.ut..P..cPTsQDLAGFWD.h.LQLsIEDlshKFc-LppLKsN.sWp.hp...................pp-c.....c.ssPsh.s...KK..ss.+s....................................................psshs....+p+u.-ss..........p.R.pcA...RcR...L.hAAK+.A.sphRpss.......tpEpu-..ohphhhsp.............................................................................. 2 56 73 140 +2707 PF03275 GLF UDP-galactopyranose mutase Mifsud W anon Pfam-B_4203 (release 6.5) Family \N 25.40 25.40 25.50 25.40 25.30 24.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.56 0.71 -4.81 96 1461 2012-10-10 17:06:42 2003-04-07 12:59:11 8 6 1183 57 272 1189 350 188.00 47 53.74 CHANGED thh+uYTcKQWGpcP.p-Lsu...sllpRlPVRhsaDspYF.sDpYQGlP..hsGYTphhEp..MLt.pts.....IcVpLss..Dahchpc................thttcllaTGsIDpaF-Yp...hGcLtYRSLcFE...........pcsh.spssaQGsuVlNYs-.p-hPaTRIhEaKHFps..........ts+Tlls+EYsp.h.....pcu-.....PYYPlssscsppLap+YpcLAcppp...sVhFsGRLusY+Y ............................thlKuYTtKQWGpss.p-LPu..............lIp..........R.L.PVRh..sa..........DNpYF.sDpYQGlP..hsGYTthlEp........M.....L.t...tps........I-VcLss..Dahp.+c.....................th.s.h...s+llaTG.lDpa...........F.-.Yp...hGcLtYRoLcFE.............pEhl...c........tsa.QGsAV.l...NYs-......t-sPYTRIhEaKHFp.t........................p.psKTlI.s...+EYst.h....ccG.D....EPYYPlNspcs..ptLappYpphAp.p.ps..........p.VlFsGRLGpY+Y................... 0 85 177 232 +2708 PF02812 ELFV_dehydrog_N E_L_F_V_dh; GLFV_dehydrog_N; Glu/Leu/Phe/Val dehydrogenase, dimerisation domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 25.30 25.30 25.40 25.50 25.20 25.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.44 0.71 -4.57 262 6065 2009-01-15 18:05:59 2003-04-07 12:59:11 13 15 3955 213 1526 4625 1317 125.30 45 30.51 CHANGED sc+......llhsph.h........ct.uphpshpuaRs.HssshG.PsKGGlRa.aP.t..............................ls.s....-lhtLuhhMTa..KsAlssLPhGGuKGul...hhD.......P+..ths.......ppE.hcclsRuasptltp...lusspslsAsDlGTssp-Mualh..cpappls ......................PcRhl.hclsh......h.Dp.Gplps.cGaRlQassAl...G....Ph....KG....G.lRFHP.s..............................Vshs.........lKhL....uht.oa..KNul......s........s......LP.hGGGKGG........chD..............P+..shS.......csE..l.RhspuahpELtc...alGs.spDVPAsDlGsuu+-huahh.spYc+l.p...................................................... 0 538 1003 1309 +2709 PF00120 Gln-synt_C gln-synt; Glutamine synthetase, catalytic domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 22.60 22.60 22.60 22.70 22.30 22.50 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.73 0.70 -5.24 95 12610 2012-10-02 17:21:26 2003-04-07 12:59:11 19 19 7069 255 2691 9179 7704 221.90 30 57.41 CHANGED stsPRs.lh++shpthpp.tGhs..shhGsE.EFaLhct....sc....ssts....sthss..........................hpsGhaslss..h-.pspclhp-hhpsh.tphGl.plEstH+EVusu.QhElshc.hssslcsADplhhhKhll+plAccaGhtATFMPKPhhsss..GSGMHsHhSlht...ttG.pshFhssts....sLSchuhpaluGll+HstulsAhssP......ssNSY+RLsP........uhpAPshluauspNRoAslRlPts......tsspupRlEhRhPDs........suNPYLuhAAlltAG .................................................................................................t..ssRt..hpc.h..t...h..t.t........uhs.................shhG......E..Eaalhcp........sc...........s............th..............................................................t...hsu..h..ashss...hs...hpsh.hp-hhthh...t.h..Gl...s..l-.shppEV...............u...............s...............u....QaElsht.h.......s.......sslpsADph.............hhh+hll+plAc......p......a...........G.htsoFhsK..Phhsts.......G..oGh.Hsph..Slh.........t.................s....shh...........................st............t................t.....hs.p...th..hl.u....G.h.h.p.ph.......t......h.......shhss.........sssua+RLss..........th..A.s..s..hsau.........h........p..N.R...ss.........lRlPhs..........................t..ttclE....R.s.ss....................sssPYlshushh........................................................................ 0 856 1657 2250 +2710 PF03951 Gln-synt_N gln-synt_N; Glutamine synthetase, beta-Grasp domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -10.05 0.72 -4.40 188 7416 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 5499 249 1602 5187 2852 76.40 36 19.43 CHANGED slcalchpFsDhhGthp.......plshs.......h..spl.........ccs.........hc.pGh.sFDGSSltGatslpcSDMhLhPD...sT..hhl-P..apt..t.....s.hhlhC-lhss ........................................................lphlchhFsDhhGh.p...........plphs...........pph..............pph.....................................hp...ps...h.hFDG.SSl..tG.a......s.t.l......p.cS...DMl.LhPD...po..hh.lDP...apt..t.......s..hhlhC-lhpP.................................. 0 504 986 1339 +2711 PF03710 GlnE Glutamate-ammonia ligase adenylyltransferase Yeats C anon Yeats C Domain Conserved repeated domain found in GlnE proteins. These proteins adenylate and deadenylate glutamine synthases: ATP + {L-Glutamate:ammonia ligase (ADP-forming)} = Diphosphate + Adenylyl-{L-Glutamate:Ammonia ligase (ADP-forming)}. The family is related to the Pfam:PF01909 domain. 20.50 20.50 20.50 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.51 0.70 -5.12 22 4102 2012-10-02 22:47:23 2003-04-07 12:59:11 10 13 2001 3 970 3413 962 243.30 33 50.29 CHANGED ltpLhplhutSshlucplt+p....PhlL.cpLhs...hspspshpthtspLpphLhpsss-.....E............................phhcsLRpF+ppphhRIAhsDlhuhhs............lpclsppLotLA-AlltAulphhhppsssch.shP.t.t.sps..shhllGhGKLGGhELsauSDlDLlFhassssts.....sut+sl-stpaasRluQ+llphLst.TssGhlYclDhRLRPpGsuG.LshohsAapcYapppupsWE+.Ahl+ARsls.Gstplptpa.ph .............................................................................................h..pLhplhutSshlsc.lt.p..p.........Pt..h.l..sp..Lhs...................p.s.....t..h..p..t...h.....t....s..p...L.p...p.h...Lh.....p...sssc..........p...............................................................phhcsLRph+ppphlR...IAhs.D.l....su..h..h.s......................................................................lt...plsppL..otLA-uhlsuul.ph..s..htph..st......c........a...G.......P...................t............p.....t........p......s......p...sh...................sllGMGKLGGtELsauSDlDLIFlacpssss..........................sutc.t.l..ss.t...paa...sR...lup+l....hpl....Ls.p...........T.......s.....s.Gh....l.acVDhRLRPpGsuGsLVhShsAhpcYh....p....p....p......u................h..sW..E+.Ahl+ARslu.G-..plttph..t.................................. 0 261 591 797 +2712 PF03616 Glt_symporter Sodium/glutamate symporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.20 21.20 21.20 21.50 20.70 21.10 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.18 0.70 -5.96 8 1846 2012-10-02 17:06:44 2003-04-07 12:59:11 9 4 1484 0 231 1070 118 354.70 42 89.10 CHANGED lpLsshpTLllAsLlLLLGpall++lsFLp+asIPEPVlGGllVAlslhll+phtshclsFDtS.LQsshMLsFFoTIGLuAshspLhKGGKsLllaLhssssllllQNsVGluhAulLGlcPLlGLlAGSIoLoGGHGTuAAWussFt-p.aGlsuAsElAhACATFGLVhGGlIGGPVA+aLlp+pp...pspspsDsp-p.lspuaccsptp+pIsupsllETlsllslClslGpalusLlps...puLpLPoFVhsLFlGVIlRNhLohhhhap...V.-csVslLGsVuLSLFLAhALMSL+LWELtsLAlPlLlILsVQslsMlLaulFVTFRhMGKDYDAAVlsAGHCGFGLGATPTAlANMpuVTc+FGP ...................................................................plsshtTLshAs.llLLLG+hlspplshL++asIPpPVsGG.Ll.........s.........A.l....h..........l.......hl.....L............c......p........h....hs....h..................p.................l.....p....F....D....h..s............L...p..s.....s.......hML.sF.Fs..T...I.....G..L.........s..A.............s.h....s...p..L.....+.........t....G..G......+...sl...h.........l....F......l..h....ls.s.s.l..l.l.h.Q.N....s.l..G.....l......u.....h.....A..p.....l.......L......G..l.....c..P.L....h....G...L.....l..A..G.....S...I...oL..oG...GHGTuAAau.p.hat.-t..aG.h.....ssAh.pl..AhAsATFGL.VhGGLIG....G...P.V.A......+a.....Llc+tp......................hps..s..t.....t.......s.......-.........s.....p...........p.........................s...........p..........s...........a.......c..........p....s......p...t........s.....+....h........l.....T...u........s........h.........lc.........sl....s....l.........lslC....ls..lG.......p.......h.......l.u....p...h..l...ss.............ssh..........p....l....P..........s..........F....Vs..s..L..Fl.....GVIl....pNhl.s..h.h.t.h..hc..........l.............p..+.s........l..s..l.lGsVSLS.........LF.L..AMALMoLKLWcL.s.s..L..Al.Phll.IL.sVQ.slhMsL...a.Ah..F.V.T.a.RhM.....GKs..Y..DAAVluuGHCGFGLGATPTAlANMpulTc+aGs................................................... 0 69 132 190 +2713 PF01744 GLTT GLTT repeat (6 copies) Bateman A anon Pfam-B_681 (release 4.2) Repeat This short repeat of unknown function is found in multiple copies in several C. elegans proteins. The repeat is five residues long and consists of XGLTT where X can be any amino acid. 21.00 21.00 21.10 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.16 0.72 -4.49 35 266 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 125 0 137 344 218 28.60 43 14.72 CHANGED GLsThGLsThGLsThGLsThGLsohGLsT ........GLss.GLso.GLso.GLso.GLso.GLs............. 0 84 101 125 +2714 PF02686 Glu-tRNAGln Glu-tRNAGln amidotransferase C subunit Bashton M, Bateman A anon COG0721 Family This is a family of Glu-tRNAGln amidotransferase C subunits. The Glu-tRNA Gln amidotransferase enzyme itself is an important translational fidelity mechanism replacing incorrectly charged Glu-tRNAGln with the correct Gln-tRANGln via transmidation of the misacylated Glu-tRNAGln [1]. This activity supplements the lack of glutaminyl-tRNA synthetase activity in gram-positive eubacterteria, cyanobacteria, Archaea, and organelles [1]. 20.70 20.70 20.70 20.70 20.20 20.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.09 0.72 -4.18 191 3483 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 3411 33 945 2164 1701 72.20 30 68.71 CHANGED ls-..cEhpphsppLssILcah-pLsc..lDTs...sVcPhsps........h....s.hp.........sshR-Dts.....ppsh..s...p-phLsNAPpp.....ccs.hFtV .....................lo--EhpphsspLspIlsh.l.-.p.Lsp.l.D.Ts......uV..cPhsps........h............c..hp.................................ss.hR..-.D..hs................pp.uh...s.....p-chLp.NAP.cs.....css.hahV................................................................ 0 327 621 803 +2715 PF05096 Glu_cyclase_2 Glutamine cyclotransferase Bateman A anon COG3823 Family This family of enzymes EC:2.3.2.5 catalyse the cyclization of free L-glutamine and N-terminal glutaminyl residues in proteins to pyroglutamate (5-oxoproline) and pyroglutamyl residues respectively [1]. This family includes plant and bacterial enzymes and seems unrelated to the mammalian enzymes. 20.50 20.50 21.20 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.63 0.70 -5.37 3 437 2012-10-05 17:30:42 2003-04-07 12:59:11 7 3 394 9 188 420 289 246.90 34 86.35 CHANGED M+....L.LpphuhLsLAA...........shu.AA.VhuA+uPVht.+Vl+oYPHDosAFTQGLpYLssGHILESTGphGcSclRVa-Lcsutlptcpslsssl...FGEGlTslcspVYtLTWpDGVAapYDtcTFcsLGcasYpGEGWGLT..+DsKsLhMSsGTAFLpaRDPKTFAtpcoVQVTDpGVPVopLNELEYVcGpLYANVWQTsRIARIcPsTGKVluWIDlSsLL+Elulcuo+.sp....DDVLNGIAa.s-psRLLlTGKLWPpLFEVKLst........tN .....................................................h..hh.........................................................................h.......t..h..cl..lpp.aPHDsp..uFTQGL....h..ts.........s............p...Lh.......E........STG...............h........h.G..p.....Spl..+.......c...h.......c....l...p...o..........G.p..........s..........h......p.......p.......hp......Lss.ph.....FGEGlTh.h......s...c......c......lapLTW+pthu.ahaDh...s..o...........h.p..............l.t.p...h...s.Y.....p.......u.......E...GWGLs.................p-...s.........p.p..LlhSDG.ospLh.......h.....hD..P..........p......s......ap.......hp.p.......l......p......V...s..t.....p......u..ps..lpp.LNELEa.l.........s..G......p.......laANlW........o.......spIhc..I-PpoGpVhuhlD...hs...sLh.t..th............t.....................sVLNGIAa.....s..p.........ps.........c............halTGKhWsplaElch....h......................................................................................... 0 80 145 177 +2716 PF04262 Glu_cys_ligase glu_cys_ligase; Glutamate-cysteine ligase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of bacterial f glutamate-cysteine ligases (EC:6.3.2.2) that carry out the first step of the glutathione biosynthesis pathway. 20.10 20.10 20.20 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.30 0.70 -5.96 7 1652 2012-10-02 17:21:26 2003-04-07 12:59:11 9 13 1468 18 291 1175 573 336.30 42 63.50 CHANGED uctlthl..pppsphLppht+GlEREoLRVsssGpLuhTPHPtslGusLTH.hITTDFuEuLLEhITPsspcl-phLs.Lp-lHcaspspLts.EhlWPLSMPstlss-EsI.lAQYGsSp.G+hKplYRcGLthRYGchMQhISGIHYNFSLP.phashlhptps.ptstp-h.SuuYhplIRNahRauWll.YLFGASPAlsSuFLpsp.ppL..c+h........tpthhYhPYATSLRhSDlGYsNssptsLslsFNslppYlpulppAlcTPstpatplGlht-.GphhQLNsNlLQlEsEhYusIRPKRssppGEp.hpALhppGlpYlElRsLDlNPFsPlGIshpps+FLDlFhlaChhtDus.hDtp-htpspsNapcls .........................s....thl...tp.t.phlpthp+GlEREoLRVs.s.-.G.p.LApTs.HPc.uLGusLTH.aITTDFuEuLLEhITPls.ssl.pchLphLpslHchst.c...pl....s.c....EhhWPLSM...Pshl.s.p.p.p.-.I.lAp..YGoSN.GphKslYRcGLtpRYGthMQ.sISGlHYNFSls.shaps.hh...s.....t........p......s.....................t.......t....st....p-...hhSstYhclIRNahRasWllsYLFGASPul..ssuFlps..c.....p...p.L.......pph..............................tt..shha.hPa.ATSLRh.S.D.LGYs.N.c.s.Qs..s..Lt.l..s.aNsLppYlsuLcpAlcss...a.tlGh.......t..s..p...hQ.l.NsslLQIENEhYusIRPKphstsGEpPscALhctGlcYIElRsLDlNPFoPlGlspppl+FL-LFhlashLs.D.us.ph.ss.s.-.htphp.Nhppl............................................................ 0 62 142 226 +2717 PF01645 Glu_synthase Conserved region in glutamate synthase Bashton M, Bateman A anon Pfam-B_719 (release 4.1) Family This family represents a region of the glutamate synthase protein. This region is expressed as a separate subunit in the glutamate synthase alpha subunit from archaebacteria, or part of a large multidomain enzyme in other organisms. The aligned region of these proteins contains a putative FMN binding site and Fe-S cluster. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.02 0.70 -5.54 15 5264 2012-10-03 05:58:16 2003-04-07 12:59:11 12 61 3389 15 1586 7292 7907 342.40 41 29.94 CHANGED Hh.ssths+pLppAsps..........tshssacpYpchhscc.hlsslRshLchcsscss...........lslppVEss.plhpRFsouuMSaGALScEAHpuLAhAMNclG.upSNoGEGGEssp+hcsssss.......................tIKQlASGRFGVo.............scYLssActlEIKlAQGAKPGcGGpLPGcKVos.IAclRtssPGVsLISPPPHHDIYSIEDLAQLIaDL+plN..cA.lSVKLVuptGlGsIAuG....VAKAsADhIhIuGa-GGTGASPhoulKauGlPWElGLsEscQsLhpsuLRc+VpLpsDGGL+TGtDVshAshLGA-pashGTuuhIAhGClhsRhCHTNsCPlGlATQDPcLR..t+atGsPc+VlNahhhluEElR-lhAphGap .......................................................................................................................................p......h..ust.................p...a.ptatt.h..p..p..p..........h..t.lR..thh.thp.....t...t............l.s.lppV-.s.s.t.p.lh.p+.FssuAMSaG.u.lS...E.........A.HpsL....AhAh......Np....l.G......utSN......oGE..GGEs.st.+.h.t.s.t.t.ss.................................................................................tIKQ..lASGRFGV.s.......................stYL..s..s....u......c.tlpIKlAQGAKPGE.GG..p..LP.....Gp.....K..V.ssh..I.......A...ch....R..t...u..sP.....G.l...s..LI....SPPPH....H...D..............IY.....SI.....ED.LuQ.............L.Ia.DL.Kp.s.N....p..u...luV..K.....L...V..u......p......s......G......l.......G...s....l..A.s..G..........V.u....K....A....t......A...DhI...s....I.u..G..a.-.G.......G...T...........G........A..........o......P.......h.......s......u........l........c....a........s........G......h...P......a.........E.........l.......G...L........s......E.........s.........p.......Q........s...L....h...h....N....s.......L.........R..........s.......+...........l....p...lp...............s.D.G.tl....+T....G....t..............Dl....s....h.A....s...hLGA...-...p...a...G...huTu...s...h...l.u..h......G................C.....l.................h.................h.R..................h................C................H...h...s...s...CPsGVATQ........s.s..........p........LR................t..........c..........apG........psc+V.hN.......ahphlAcE...lRclhAplGh.t......................................................... 1 495 1024 1355 +2718 PF02364 Glucan_synthase 1,3-beta-glucan synthase component Bashton M, Bateman A anon Pfam-B_686 (release 5.2) Family This family consists of various 1,3-beta-glucan synthase components including Gls1, Gls2 and Gls3 from yeast. 1,3-beta-glucan synthase EC:2.4.1.34 also known as callose synthase catalyses the formation of a beta-1,3-glucan polymer that is a major component of the fungal cell wall [1]. The reaction catalysed is:- UDP-glucose + {(1,3)-beta-D-glucosyl}(N) <=> UDP + {(1,3)-beta-D-glucosyl}(N+1). 24.30 24.30 27.70 26.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 818 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.59 0.70 -6.67 9 769 2012-10-03 03:08:05 2003-04-07 12:59:11 10 21 230 0 498 771 22 530.20 35 43.68 CHANGED -FFPssSEAERRIoFFAQSLuTPlPEPlPVDsMPTFTVLlPHYSEKILLSL+EIIRE--.hS+VTLLEYLKQLHP.sEW-CFV+DTKlLu-Essh.psstp.pc-p.............................t.hppKhDDLPFYsIGFKsAsPEYTLRTRIWASLRo.....QTLYRTlSGFMNYS+AIKLLYRVENP-lsQhFuGNs-tLEh..EL-cMApRKF+hlVSMQRYuKFs...t-EhENsEFLLRAYP-LpIAYLDE-Ps.pputEsphYSALlDGasEl.-.NspR+PKaRI+LSGNPILGDGKSDNQNHulIFYRGEYIQlIDANQDNYLEECLKIRSlLuEFEEhphssssPYsssht...p.pspPVAIlGAREYIFSENIGlLGDlAAGKEQTFGTLFARTLApIG.GKLHYGHPDFLNuIFMTTRGGlSKAQKGLHLNEDIYAGMNAhhRGGRIKHsEYYQCGKGRDLGFGSILNFTTKIGoGMGEQMLSREYYYLGTQLPlDRFLSFYYAHPGFHlNNlFIMLSVQhFMl..lLlNLGuLp+pshsCpYDps...pc..hP......GCYNL.PllpWlpRsllSIFIVFaIoFlPLhVQELhERGha+AhhRhh+HhhShSPhFEVFsCQlYupulhssLsaGGARYIuTGRGFATsRlsFuhLYSRFAssSIYhGucshLhLLFuoho........lWhstLlaFWlolluLClAPFlFNPHQFuWsDFFlDYR-alRWLoR..GNS+.+tsSWlsasRloRoRlTGaKp+hlu..s-ctshDssRAphpNlhhuElhhPhhlhhhshlsYhFIsSpsGss................ppsossllRlhIlshhPIhhshslhhhhhhhusshGPhhs .....................................................t.-ApRRltFFspSL...h...hs.....s...lptM.s.a.ol....hhPaYtEphhhs...................p...................pl....................................p.............p......ptlohl.YLp.p..........la...............-WtsFhpch......t....................................................................................................................................................h.hh.h..u..c...............................t........t....u.h...............................................................................................................................thpt.sthKFphh.hshQ.ht..p.............pht.....sh..Lh.........t..t.lplual..-..............................................t....t.......................t...............ha.S.h.Llcs......................................................................haplpLsG.s......s.h.lG-GKs-NQNpullFhRG-hlQhIDhN............Q.....................DsY.hEEshKhRslLtE........a....................t................................................................ssIlGhREalFots.u.Lu.hhu.pEpoFsTlhtRhhu.......................h..s+hHYGHPDhhsthahhTRG.GlSKAp+slplsEDIaA................GhsshhRtGplpHp-.YhQsGKGRDlGhspI..F.psKlusG.GEQhLSR-hahlup.hsh.Rhhohaas.p.GFa..hsshh.h.hsl.h..ahh..hhh.lss.lttt...................................................................h....hh.lhhh.hlPhhht.hhEpGhhpuh.phhh..hphushF.sF.hthhsp.h.psl.hGGAcYhuTGRGFsh.+..Fs...Yphautsphh.hu.c..h....h.h...l..l.hh.h...............................................a....hh..W...h....h....shhhu.PahaNPptFta.chh.Dappa.pWh.......s............tpo.....W.......taht.................................................................................................................................................................................................t.......................................................................................................................... 0 149 330 444 +2719 PF02685 Glucokinase Glucokinase Bashton M, Bateman A anon COG0837 Family This is a family of glucokinases or glucose kinases EC:2.7.1.2. These enzymes phosphorylate glucose using ATP as a donor to give glucose-6-phosphate and ADP. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.86 0.70 -5.65 41 1646 2012-10-02 23:34:14 2003-04-07 12:59:11 11 5 1468 6 393 2092 1039 301.90 40 91.50 CHANGED LsuDIGGTpsRhuLhsssssp.........phpsasss-asolpshlppaLtt....ptshpp.............st....uslAlAG.Plssspsph.TNhsWslShpphppsLGls..slpllNDFsAhAhAlsh.Lsppclhplsuup......sp.....ssushullGsGTGLGlutLlhsssphh.sLsuEGGHs-FuPpsscEhtlhpaLcpch..u+VSsER.lLSGsGLsslYculsphc......shpsth..............tpsusIoptAL......sussshucpsLclFsshhGshAGsLALphsAhGGVYluGGIsP+llphlppSsFtptFpsKGRhps.hlpsIPVhllhpspsGLlGAushh.pph ......................................................................................................................LluDlGGTNARhALtsh...sssp...........................pph..c.s....a....s....s....h...D....a..s.o.......L.........ps....s...l....c...h....YLpp.......php...s.p.s........................................usl.Al.....A...s...P...l........s.....G.........D....h...l...t..h.....T...N.....p...s.....W.....s.....h...S.....h.....t.....p.h.p.p....s.....L.....G...h...s.....clpll..ND.F.sA.hu.h.Al.s.h....L.p.....p.....c.....c.....l.....hp..l.G.G...u.p...............s.h..............ts.t.s.h....uVhG..s....G.TGLGVAtL..l..p.....s....s..c....p.......a..l.....sL..PuEGGHlD......F......A......P......p..o......-......c......E..h..h..l.h.p.h.L......+tch..........GH...V.........S.u.ER..lL.SG.s.G.....Ls.s.LY.c.A.lsptc....................sh.h.spt...........................hpPt-I....T.p....t....AL..............ss.sss...spcsLslFCshhGphuGsL..A.L.....s..hGs.....h.....G.G.VY.luGGIlP.....R..hl.-....h.h...c.....s...S...s....F.R.stF....c.c...K..G...R..ap.s.alpsI.P.Val.l...h.p.s.p...sGLlGuushhp..s........................................................................................................................................................................ 0 133 258 329 +2720 PF01182 Glucosamine_iso Glucosamine-6-phosphate isomerases/6-phosphogluconolactonase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.70 21.60 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.56 0.71 -4.59 317 6449 2012-10-04 00:26:15 2003-04-07 12:59:11 15 20 4109 64 1532 4440 1070 196.50 27 73.82 CHANGED lspshAptlhpthppul.....tp...c.......up.hslsLoGGooPthlact..Lst...............................l..sW.....s+lplahuDERh.VP.scs-SNhths+ctLLs...ph....t.ls.......s.pla...h...............................................puAttYp.pplpphh....................................................FDllLLGhGsDGHTASLFPs..pst.h...t.....t.p..hlsslpts.Ppsss.......RlTLThsslss.A+plhhllsG.tsKtpslcp.shs .............................................................h....huphhsph.l....t..hh..t.p...p.............................sp..hsL.uLu.sGuoPh.shY.ct..Lsp...h....t................................tl..sa..pp.lpsathDE......h......s...l....s.t..c......c.......s.p.Sh.t.t.h.hpcp.l..hspl...............s..ltt.......p..plp..hhs....u..st............................................shcspsppYc..p.tlpp.hs.t............................................................................................lDlhl...L.GlGs..D.GHl.....u......h......N......Pu.......osh.s.................p.t.......t.p...........tsst.h....h....p......s.....s......s.........csPpp..............t.l.....ThG..l...s.s.l...hp.A+clhll.stG.p...pKApAlpphl.p................................................................................ 0 481 916 1262 +2721 PF00462 Glutaredoxin glutaredoxin; Glutaredoxin Finn RD, Bateman A anon Prosite & Pfam-B_3081 (Release 8.0) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.03 0.72 -3.98 279 11246 2012-10-03 14:45:55 2003-04-07 12:59:11 19 92 4174 90 3561 10294 5112 62.30 25 45.51 CHANGED Vhlas+.....stCsaCppscplL....cphs..lt....a.phlclsps..t...phpptltphs............sh.TlPpla...l.sGc.hl .......................lhlas+..........ssC.s.a.C....t....ps....+phL.............pp..pG......lp...........a..p.l..s.lpps...s................phc.p.t.lt.phs..................sh..T..l..Pp..la...l..ssphl............................................................ 0 1009 2074 2907 +2722 PF04399 Glutaredoxin2_C Glutaredoxin 2, C terminal domain Kerrison ND anon COG2999 Domain Glutaredoxins are a multifunctional family of glutathione-dependent disulphide oxidoreductases. Unlike other glutaredoxins, glutaredoxin 2 (Grx2) cannot reduce ribonucleotide reductase. Grx2 has significantly higher catalytic activity in the reduction of mixed disulphides with glutathione (GSH) compared with other glutaredoxins. The active site residues (Cys9-Pro10-Tyr11-Cys12, in Escherichia coli Grx2, Swiss:P39811), which are found at the interface between the N- and C-terminal domains are identical to other glutaredoxins, but there is no other similarity between glutaredoxin 2 and other glutaredoxins. Grx2 is structurally similar to glutathione-S-transferases (GST), but there is no obvious sequence similarity. The inter-domain contacts are mainly hydrophobic, suggesting that the two domains are unlikely to be stable on their own. Both domains are needed for correct folding and activity of Grx2. It is thought that the primary function of Grx2 is to catalyse reversible glutathionylation of proteins with GSH in cellular redox regulation including the response to oxidative stress. 25.40 25.40 27.40 26.30 25.30 25.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.31 0.71 -4.57 23 734 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 698 2 76 308 10 128.90 53 61.06 CHANGED scltsWl+cssshhspLlhPRasphsLsEFuTtsAppaFppKKEstlG.sFsptLscosphlpplsstLppLssLlt.ssptsNt.pLohDDIhLFPhLRsLTlV+GlpaPscVpsYlcphuptopVsLh.sshAl ..............PAIE-WLRKVsGYsNKLLLPRFAKu..u.FcE..FuT....PuARcYFlcKKE...ASsG.sFsshLA+Ss.uLIKpIucDL+tLD+LIl..pPNAVNG..ELSpDDIpLFPLLRNLTLVAGI.pWPo+VADYRDNMAKpTQINLLoohAl.......... 0 19 32 56 +2723 PF03157 Glutenin_hmw High molecular weight glutenin subunit Mifsud W anon Pfam-B_2180 (release 6.5) Family Members of this family include high molecular weight subunits of glutenin. This group of gluten proteins is thought to be largely responsible for the elastic properties of gluten, and hence, doughs. Indeed, glutenin high molecular weight subunits are classified as elastomeric proteins, because the glutenin network can withstand significant deformations without breaking, and return to the original conformation when the stress is removed. Elastomeric proteins differ considerably in amino acid sequence, but they are all polymers whose subunits consist of elastomeric domains, composed of repeated motifs, and non-elastic domains that mediate cross-linking between the subunits. The elastomeric domain motifs are all rich in glycine residues in addition to other hydrophobic residues. High molecular weight glutenin subunits have an extensive central elastomeric domain, flanked by two terminal non-elastic domains that form disulphide cross-links. The central elastomeric domain is characterised by the following three repeated motifs: PGQGQQ, GYYPTS[P/L]QQ, GQQ. It possesses overlapping beta-turns within and between the repeated motifs, and assumes a regular helical secondary structure with a diameter of approx. 1.9 nm and a pitch of approx. 1.5 nm [see 1, fig.2]. 30.00 30.00 30.20 30.00 29.60 29.90 hmmbuild -o /dev/null HMM SEED 772 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.86 0.70 -15.05 0.70 -6.26 2 1571 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 61 0 1 1570 0 130.40 21 95.07 CHANGED S.VAtQYEQplVsPKGGSFYPGETTP.QQLQQtIFWGhs.thlptYYPSVTSPpQsSYYPGQASPQpPGQGQQPGphQpsGQGQQhYYPTS.QQPGQhQQ.tpGp.GYYPTS......LQQPuQGQQ.GQGQQG.........YYPTSsQhp.GQhQQPsQGQ...Q.GQGQ...QPtQGQQPGQhQQG..P...ppP......QQLGQGQQ...PsphQQSGQGQ.GaYPTSLQQ.GQGQpGaY.sS...................QQ........QPuQGQQGp.PuptQQPGQGQ.GaYPsS.QQsGQGQ.GaYPsS.....Q.QpPGQGQQGQ..........PuptQQPGQGQ.GaYPsS.LQp..GQ.GaY.TS.QQ.GQGQQ.GQ......GQK.QQPGQGQQsGQG...QQP...pQtQQPGQGQ.GYYPTS.QQsGQGQ.....QQ.GQGQ.GYYPTS..QPGQGQ.Ga.PsS.QQPGQGQ..........GQ.GQp..................................QQPGQGQpP...pQGQQPGQGQQG...............YYPTSPQ..................QPGQGQ.........QLGQGQQGYYPTS.QQPGQGQQPG......QGQpGahPhSPQ.o......GQGQp.GQh.QPGQGQQGYYPTS.QQsGQGQQ.GQh.QsGQGpQ...P......GQGQQSGQtpQGY.SsYHVSsEpQAAS.hVAKAQQ.AsQLPshCRhEGGDALSASQ ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +2724 PF00745 GlutR_dimer GlutR; Glutamyl-tRNAGlu reductase, dimerisation domain Bateman A, Finn RD anon Pfam-B_544 (release 2.1) Domain \N 22.90 22.90 23.40 23.20 22.70 22.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.10 0.72 -4.07 164 2994 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 2895 1 730 2124 424 100.40 27 23.41 CHANGED tpAEtIIp...pcsppFhphhcsh.p....ssPsIpslRppucpl+cpElc+uhpp...ht....hsccs.ccslcphscplssKllHsPstpL+p.su.tpsc..pph.....lphlpplFs..l.....c ....................pAcpIls...pEsppFhpWlcsh.p........ssssI+slRppAcplppcplp+shpp..ht..........hup.cs....cpllpc.hscplsN+llHsPotpL+phu.pc...sc....sph......lphlpplasL..................... 0 231 487 639 +2725 PF05201 GlutR_N Glutamyl-tRNAGlu reductase, N-terminal domain Finn RD anon Manual Domain \N 21.50 21.50 22.70 22.50 20.40 21.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.89 0.71 -4.63 172 3200 2009-09-11 08:11:09 2003-04-07 12:59:11 10 10 3083 1 803 2306 768 149.10 37 35.09 CHANGED lGlsH+sAslclRE+luhss.pphtp..hht.pl.tt..slp....EsllLSTCNRsElY....h..ssp.................h............................phlsp.htt.ls...hp.p.lppthhhhp...spcAlcHLa+VAuGLD....ShVlGEsQILGQlKpAaphAp.....ctsssuthLpplFppAhpsuK+VRo-Ts .............................................lGlsH+TAsVslRE+lu.Fss...splpp........ulp.pL....h.......p......p.......t....lp......................Es.ll.LSTCNRoElY.........s.....ssc..p...................psttp...t.lh.....................................................caLuc.hpt..ls.............hc.-..lptp.l.a.h.hp.s.s-Al.cHLhRVAuGLD........ShVLG.....EsQILGQlKcAathAp..........c.sts..s.u....shLp+LFppuhssAKRVRoETt......................... 0 265 544 705 +2726 PF02595 Gly_kinase DUF168; Glycerate kinase family Mian N, Bateman A anon COG1929 Family This is family of Glycerate kinases. 20.20 20.20 20.20 20.30 19.90 19.80 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.13 0.70 -5.76 6 4076 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 2802 4 574 2648 422 345.30 42 97.58 CHANGED MKIlIAPDSFKESLSAhEsApAIcpGFcplFP-AsYlslPlADGGEGTV-ullsATpGphhchcVsGPLG-p.VsAhaGhsGDG+TAlIEMAAASGLtLV..Ps-KRNPLlsTShGTGELIttAL-sGAcpIIlGIGGSATNDGGhGMlQALGsphLDusGptlGhGGusLuplAcI-lssLDsRLtpsplcVACDVsNPLsGspGAutVFGPQKGAossMVppLDpsLuHaAcllccssGhslpshsGAGAAGGMGuuLhshLssclKuGIpIVhctlpLtptlpDA-LVITGEGRIDuQSlpGKsPlGVApsAKpashPVIuIAGslscDhsVVapaGIDAlFSIlsthusLc-ALpputssLhpsApslAssLthuts .....................................................................................................................MKlVIAPDSFK-SLoA.psupAIcp.Gh..................p......p......l.h..............P......c....A..........p.................h.......l....p......lPlADGGEGTl-.u.hh....t..A.............s....t......G...p.....h.......h....p...h.p......V.suP....h.Gct....l..............p........A...h.......a..G..h..............................s...............c.....s...........p..........T.......A.....lIEMAsAuGLp..Ll.....s.........p...........p...R.....sP.....L.h....s...o..o.hGs....GELItp..AL.c.p....G.......s.c.c..............I.llGlGGSATNDGGsGMhpAL.Gs....c..h..h...........Dt.pG............p...........l..s..........s...............G.us.L.t.....p...lsp.ID...h.........s.t...h..........c...s.....+....L.......p.......p...s.......pl.pl..As..DVs..NPLsGtp.GA.o.tl...FG..P...QK.........G.........As........p..h.l.tp....LDps..Lppa.uc.ll....p.....p........t...................h..................t.................h................s.......l..............t.....p..........hs..G.uGAAGGhGuuLhu.a..h........s........A........plpsGI-lVhc....h....h....pLcptl...p.....s.......A......D......L..VITGEGR.lDpQohh.G..KsPlGV.A.p.hA..+..p..h.....p.....h..P.V..Iuls..Gs..l..s.c..s...h....t........h......lh..p...t.G........l.s.AsFul...l......t......h......sLpcs..h..p..p..u..t..p..pl....p..s.u.psluthh....t............................................. 0 175 336 474 +2727 PF01228 Gly_radical Glycine radical Finn RD, Bateman A anon Prosite Domain \N 22.20 22.20 22.20 22.80 22.00 22.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.25 0.72 -3.68 141 5415 2012-10-01 23:28:04 2003-04-07 12:59:11 16 8 2388 24 579 3783 69 105.20 42 16.10 CHANGED husGssPhpG.....tDtpGshAsl.sS..luKlsa.thtpsGhhhshp..hssssl...p.........p.cpthpslsshlcs........aa......p.......................sGhHl.phNVls+.......csLh-AtccPEc..Y.sLslRVuGYus.pFs ............................................................usGusPhpG.....tDppGsh.A.sL.sS...VuKL............sh..sh..sps..G..l....h.shp...lsPs.sLst..p...........s.-sphppL..ssll-u..........aF...c........................GGpHl.NlNVhsR...........EsLhDAhc.......H..PEc..YspLslRVSGYuVpFs.................................... 0 177 347 474 +2728 PF00232 Glyco_hydro_1 glycosyl_hydro1; Glycosyl hydrolase family 1 Finn RD anon Prosite Domain \N 20.10 20.10 20.10 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 455 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.66 0.70 -6.29 13 11445 2012-10-03 05:44:19 2003-04-07 12:59:11 13 39 2849 314 2628 8499 1126 399.30 33 95.89 CHANGED hptsFPccFlWGsATAAYQIEGAapcDG+G.SlWD.....sFs+p.Ps+shssps.GDlACDpYHRaccDlpLhpclGhpsYRFSluWsRIhPpG..sGtlNptGLcaYc+LlDtLhppGIpPhlTLaHWDLPQsLpD.tGGWts...RsslctFpcYAcssFccFGD+VKhWlTaNEPhssuhhuatsGhauPG..............hpshpssapsuHplLlAHAcul+ha+-hh....sGpIGlslssshthPhosus.-s.cAu-RthpatssWFh-Plh+.GcYPpphhchhtcp........uhhsshs-pDhcll...psshDFlGlNYYoophhpsps.....psh.s.spshthspts................oshuW.hh.P.GL+cLLphlpccYsp..hsIYITENGhuhpDch.p.....uplpDstRIsYlppHLspltcAI.pDGlsl+GYhsWSLhDNFEWspG.YscRFGllaVs.....hsoppRpsKcSuhWYppllpsNGh .............................................................................h...thPpsFlWGuAsu..A.Q.........hE..G....u..h..p......t...s...G...+..G.............s.h...h.D..................................h.h.h.......p...................t..............t.......h........h............t....t...........................................sc.................u........D...a.Y.........c..+..Y..c........-...................D.lt.Lh.t.......c..hG...hpsaRhSIu....W.........oRl.....h...P..........p.........G..............p...s.......p......s..............N..p...tG.lpFYcplh-c.....h...h.p.h..................s..Ic.PhlTL..............H.......a.....-.h.P.............................t.............L.......................p.....h.................G...........G.Wh.s...........+.c..h.l........c....hFhca.Ac..............hsFppat....c+.........V.........+aWhTaN.E..........s............h..........h.......s.......a.......h......h.......u.......h....h.......ss...........................................tt.s.....p..t.ha...p.s..h....H....p...hlApAhA.........schh...+ph............................ps.p...lGhhl.s.h..s..........h.......YP......h.s....s....p..s......t....-....h.............A....t..........p...................h....................p.......t......h.......h...........................h...........a....h......D..s..........h.........t..G.p.Y.P..t...h..hphhp.cp................................s.h.....p..h..p..t..tD....h.p.h.l................p.s...s...sDaluhsYYhst.hs.p.st.....................................t.............t...s....t........h.....t..h...............................s.s..h..h......t..o.....p....h......s.W.t.I.....P......G..l...h..hl..p...l.hcc.......Yt......hsl.alsE..N........G...................h...G.s......-..p....t.......................tstl.p.Ds.h.RIp..YlppHl.p.thtc......A................l.....t.....-G.ssl.hGYhsWu.h........h......D.......h................Sh.s.s..G..hp.c.R..YGhl..aVD................sshpRh..K.cSh....h....WYpplltspt.t................................................................................................................................. 2 642 1346 1995 +2729 PF00331 Glyco_hydro_10 glycosyl_hydro3; Glycosyl hydrolase family 10 Finn RD, Griffiths-Jones SR anon Prosite Family \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.41 54 2356 2012-10-03 05:44:19 2003-04-07 12:59:11 15 132 632 164 770 2484 350 243.90 30 61.72 CHANGED oLcphhpsph..hhGsAlsts......hhst....ppttllp..pcFNplTsENpMKh-ulpsscG..........................pFsFssADplVsaAppNshtlRGHTLVWHs..QhP....sWhtp.....sss.ss..............LhptlcsHIpsVhs+YK.......plhuWDVVNEshs-ss..........thRp.......SsahplhG........p-alchAFphARcADP......sAKLahNDYNh-ps.....sK.spslhshV+chp.scGlP.......IDGlGhQsHlsssts...........splppulptauslG...h-ltITELDlps.t.................................tpApcYpplhphhhp........phsulThWGlsDstSWhss.............................stPLLFDssapsKPAY.ullssh .................................................................................................................................thh......t....a.s..hs..tNthK........hts..t..s............................h...s..a.t.......s...-..h..ht....hs.t......tp...s.......h........h.+...uH...sl.....h.....W.ts......p...s.............t..Wh.t............t..t...................................................hht.....hh.p.pal..ps.l.ht+.at................hhs...WDVV..N.Eslssss.....................shR.p.................................o.h..a.hp...hhG...............p-a...lt...hA..FchA+....psDP.......................sApL.......ah......N......D....Y.Nhpps.........sK...ppth..h...p.hl.c.p..lp.......p..p.G.ls....................I.D..G......lG.h..Q.....uH.h..........................t.h....t.t...hpt.h.s........t...........h......s...h.l.hTEh....Dlt........................................................st...htthh.t..hht....................h.slhhWsh.p..................hh.t...................................h...............................h............................................................................................................................................................................ 0 311 563 699 +2730 PF00457 Glyco_hydro_11 glycosyl_hydro6; Glycosyl hydrolases family 11 Finn RD anon Prosite Domain \N 21.60 21.60 22.70 22.70 19.30 21.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.50 0.71 -4.95 12 815 2012-10-02 19:29:29 2003-04-07 12:59:11 12 49 338 104 256 868 0 165.60 49 64.32 CHANGED GssuGhaYphWsDsGussshhss.GGsaSssWs..NtGNhVsGKGWssGo........sshslsYsusas.PsGNSYLsVYGWTpsPLlEYYIV-saGoYcPsus.spthsoVsusGuTYDIapTTRhNpPSIpG.TsTFpQYWSVRpoKRosG........TlosssHFsAWtptGMshG.phhYplhAsEGYpSSGSAslsl ............................tsshaaphWp.D.ss..u...s...s.p.....h......s...........s...........uG..paospWs.....s.s..uNalsGKGWpsGs..................................tpslsY...s...u...s.ap......ssG...N....u....YLulYGW.......T.p......s........P........L.......lEYYIVEsa......Go.Yp....P......su......s....sp..............h.GoV....ooDG.uTY-IYposRhN.tPS..I..p..G...TsT.FpQYWSVRps...K.Rs..tG.............olohu...sHFsAWtp..hG........hp...lG....sh...a..hlss...E.G..Yp....S.SGsusls.............................................. 1 85 169 226 +2731 PF01109 GM_CSF Granulocyte-macrophage colony-stimulating factor Bateman A anon Sarah Teichmann Domain \N 20.10 20.10 20.10 21.20 20.00 19.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.63 0.71 -4.17 7 64 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 42 5 16 68 0 118.90 55 84.06 CHANGED APTRsPssVTRPWpHVDA.IpEALSLLNpopDssAsMNc.sV-VVS-.F-.QcPTClQTRLpLYKpGLRGsLT+LcGsLTMhAsHYcpHCPPTPETsCtTQhITFcsFK-sLKcFLFsIPFDCW ...............................APT+.PussTpP.WpHV-A.IpEAhpLLN.SpDssA.MNc.sVEVVSEhFD.QEPTCLQTRLcLYcpGL+GSLTcLcGsLTMMAoHY+.........pHCPPTPETSCtTQhITFcSFKEsLKcFLh.IPFDCW........ 1 1 1 2 +2732 PF01670 Glyco_hydro_12 Glycosyl hydrolase family 12 Bateman A anon Pfam-B_1736 (release 4.1) Family \N 20.80 20.80 24.10 23.40 20.30 19.30 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.44 12 532 2012-10-02 19:29:29 2003-04-07 12:59:11 11 21 280 62 289 492 9 154.30 26 54.07 CHANGED .spsl.hhVuslpShsoslsauh.ssosl.sssAYDlahssss..pssusGchElMIWhsphGslpPhGspluTsol....sGp....oW-lWhG.....hsus.psauaVssoshsshphDV+cFhchhspspuh......stsshYlpshphGTE.ass.....ssshslssaSssVp .............................................htluslpo..hsosh..pa..sh....sss...slh.s.ssAYDlahs.....s.s.s..............t.....s....u..s.s.p....hElMlWlst...hG......s......h...t...P......l...........G......s.................l.....u......s.s.sl...................uGp..............sap..lapG..........s.s.u.sh.pVa.....o.a.l.........s......s..........s...s....h........s...sas.s.........D.....l.........ts.....F...ls........h..ts..s.p.sh............sss..YLs..........slphGsEs.asu.......ssshss..sp..aohtl............................................................................. 0 99 184 259 +2733 PF01373 Glyco_hydro_14 Glycosyl_hydr22; Glycosyl hydrolase family 14 Bateman A anon SCOP Domain This family are beta amylases. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.45 0.70 -5.73 5 548 2012-10-03 05:44:19 2003-04-07 12:59:11 12 11 191 66 172 752 23 308.80 44 78.16 CHANGED aVMGPLcKV...TDWN...oFKsQLpTLKNNGVYAITTDlWWGYVEsAGDNQFDWSYYKTYAsoVKpAGLKWVPIISTHpCGGNVGDDCNIPLPSWVWoKsSsD.-MQFKDESGpsNsEuLS......PlaSGls..KpYsELYuSFApNFuuYK.DlIsKIYLSGGPSGELRYPSYsPSsG.WoYPGRGKFQsYTEsAKSuFRsAMpsKYGSLsKlNuAWGTsLTShS..QIsPPTDGDsFaTNGsYNSsYGKDFLoWYQSVLENHLulIGuuAHssFDsVFGV+IGAKIAGlHWQYNNPTMPHuAEpsAGYYD......Ys+LlcpFKcucLDLTFTCLEMsDS..GouPpYS..hPpTLVcsVuolANsKGlcLNGENALspGsSu.tFp+ltEclTsauauG.....FTLLRlNNlVNsDGSsTuEMusF ...........................................................................................................................................................................................................lhSFHpC.GG.NVGD.s......ssIP.lPpW.Vh-h...s.....tp..........s.sDlhaTs.....+.....t.....Gp...RNhEhLohGs..............Dp.Plh.......p.GRTslphYsDaMtSFRps.h.pp..a.h...su......sIs-IpVGhG..PuGE..hRY..PSY.P..p......u.p....G...W...h....a.................P.G.....l..GE....F........C...YDKYhpush.+sAAt.....................Wth.....s.....s.....s..s..st......YNshP....c..c..T.t....F.F...p..p........s....G....s...a...oph...G..+....FFLsWYSshLlpHGD+lLstA.p..pl.......F............u.....s........t.................Vp..l.uh..Kl.......uGIH.WaYp....s.sHssEhsuGahs.......................Y..lh...thh..tp..........t.......h.h.hss..h..-...hp..s.................s.....PptLl.plh..s...t...h.h....tsEN..u.....h.....h.....s...th.......tph...................................................hsh.ph.................................................................................................................. 1 48 111 143 +2734 PF00723 Glyco_hydro_15 glycosyl_hydr10; Glycosyl hydrolases family 15 Bateman A anon Pfam-B_771 (release 2.1) Family In higher organisms this family is represented by phosphorylase kinase subunits. 21.70 21.70 21.80 21.70 21.30 21.60 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.47 0.70 -5.93 26 2515 2012-10-03 02:33:51 2003-04-07 12:59:11 16 24 1317 16 1160 2459 281 390.30 19 61.49 CHANGED uhttpthhpslts.thhh.ss.oGhllASsopotP..........DYhasWsRDuuhshtulh...............ctht.thppsltphhcs.hlps.hphputsps.sphpss...sLttstapsstsshsGs..WG+.QhDusuhhhhslhph......htsG..........thpssl..l...lps.lpYlsptWsps..saslWE....pppupp.a..opsstt+AllcuhshActhG..s.................................................ssph..hppptsplhspl.......p....ua..........................Wssttsahthsss....................ppsLDush..............lLhshhsasssss...................s+hlushhhlhss.hpsthhlssu..ts........ltR.YspDsh....................................................................t.ht.sGsPa.................................................aLsohh......................................................................................................................................................................................................................................................................................................sp.hhstlhthsuptt.htlschuhshacchsss.................su...p....pt.h.sh..s.tthss.hhphht..ssssGhLuEphsh..........hpGp..huupshsaSasshlpsuh ...................................................................................................................................................................................................h......h.p....hh..hh.h.hh....ts.sGsll.A..u.so....tshs....................cYph.sWlRDushshh..ulhthu............................tp..th.h....h.h..........tp....s.....h........h.........t....................p.....t............lp....hht..h.tsp..p.h...tp........................hh...thts.s...t.......s.s......h..h..G..s.......t......hsphQhDs.hu.hh..h.l.hth.................................hpts.......................hth.tp...h.th..........l.pt....h....l....ta....l.......t......p......t......a...p......t......s........D...GlWE.........pts.p.ph...Splhhh.hA...h.c....uhp..h.h.p.h.h.s.....ts........................................................................................................h.ph.....hpt..ht.pp.lhppl.............hpp..........ua...............................................................................................................................................................s.........h..h.a...h.t.h.ss.........................................ptlDush.......................................................................................lhhs....h..t...h.h.ss.s...c......................................................sphhs.sh.h.l.tp.....L.....h..t....t.t...h.................................................hhR..Y.....t.p.............................................................................................h.........t..ttsa.......................................................................................................................................................................................................hh.sshh.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 366 666 954 +2735 PF00332 Glyco_hydro_17 glycosyl_hydro4; Glycosyl hydrolases family 17 Finn RD anon Prosite Domain \N 24.00 24.00 24.10 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.10 0.70 -5.39 16 2142 2012-10-03 05:44:19 2003-04-07 12:59:11 13 28 364 18 1056 2132 103 250.50 30 66.00 CHANGED IGVCYGhhuNNLPuss-VVpLY+opsIpcMRlYpscspsLpAL+GSsIpllLsls.NssLtsLAus.otAssWVpsNV+saass.Vph+YIAVGNEVssusspu.....llPAMcNlpsALsAAGLuspIKVoTulphsllsNoaPPSsGsFpsssh..hlsPllpaLssssuPLLsNlYPYFuausNPt.slpLsYAhFpssssss.Ds..uhsYpNLFDAhlDulYuAl-KsGus.slclVVSESGWPSsGu...huAoh-NApsYspNLIsHVt....tGTP++PG.slETYlFAMFsENpKssc.....lE+HFGLFpPscpPtYslsF ..............................................................................................................................................................................................................GhshG.......s..p....s.ttsht.h.h.pt.....t.h..t..th+la.t.ss....thlpuh.t..s.o..sltlh...lsls.s.t...l..t.hss.............t....At.tWl..p.p..s....l.....a...........................s........p......hp.......hls.V.G.NE....h.......t.s.tt................llsAhp..slpt.u.....l..t.....h...s.l..t......tlp.......loss..h.t.h.shh...t....t...s.......s....P...St.u..Fp.th....hhtshh..pa.Ltps..s..u.P...hhhN.h...YPahuh....h.......s.s.......h.s.l.saAhFp....s.....s......s..........s.....h.....Ds...................tht.YpNhhD.A.lDuhhsAh...p...t..............h.....Gh.........s....s............l.............p................lll................uEoGWPo...tGs...............su..s.......h.t...s......A......t.sasps..Ll..p.+.lt....................pG....T..Ph....p......P.s....sl.....p.....sYl.F.uhFsE....s..K.....s.....ss..........sE+p.aGlF...s.s..t.p..hY.l............................................. 0 163 606 854 +2736 PF00182 Glyco_hydro_19 chitinase_1; Chitinase class I Finn RD anon Prosite Domain \N 20.40 20.40 20.50 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.61 0.70 -4.69 17 2104 2012-10-03 00:09:25 2003-04-07 12:59:11 14 48 920 28 497 2044 184 174.20 31 58.88 CHANGED IlopuhF-pMLpaRNssuCsuKGFYTYDAFlsAApuFPuFGso.GDsss+KREIAAFhAQTSHEToGthhhus-usauWGYCahpp...........sptpaPCusG+cYaGRGPIQLoaNYNYG.AGp..AIG.DLLssPDLVuoDsslSFKTAlWFWMTsQss.KPSsHsVIsupWpPSssDtuAsRlPGaGsITNIINGGlECspG.psspspsRIGFY+RYCshLGVssGsNLcC ............................................................................................................................................a...shtthh.tAh..t......a.......t......h.s.....t................s...........t......................t.....++pl....As.hhu.phspE.Ts....t...h........h.......h..t.....................s.......h..................t............................s..................hs....................................................h......................p....t....p.....h............s........s......s.......s.....G.....p.....p.....Y..a...GR.G......slQ.loapa.....N....Ys...ss.....Gc............u...l......u....h......D.........L........L........s.s.......P-......l......V..up.........D....sh......l......uhpoAl.WFW......hss..........................htsh......t...............................G.................a..G..hhphl.Nss.pCs......................t..........t....pl....tha..hht..................................................................................... 0 116 292 404 +2737 PF00703 Glyco_hydro_2 glycosyl_hydro7; Glycosyl hydrolases family 2 Bateman A, Griffiths-Jones SR anon Pfam-B_572 (release 2.1) Domain This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.59 0.72 -3.61 136 6352 2010-01-08 13:56:32 2003-04-07 12:59:11 16 146 2285 323 1491 5735 464 105.70 19 11.70 CHANGED spl.p...s........hhlpsphsp....ppuplplpl..............hpstsstshphplpsphhssstpphtt..sstthhhhttt.......................h.h.ps..pLWss....c.p..Pp.LY..slplpl.....tss.....phh-ph..ppphGhR .....................................................................................l..t...s............hh.l.t.s.p.hspt.......ppuplph..pl......................s........t...t...t.....s.....t...p.....h....p....l...p..h...p......l....h.....s.sp.s.....p.hs.ss.........sptths...................................................................ph.pl.p.sP.....cLWss.......-p.P.h.LY..pltlpl........tss...............phl..-th...spphGhR...................................... 0 531 1045 1312 +2738 PF00728 Glyco_hydro_20 glycosyl_hydr11; Glycosyl hydrolase family 20, catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_877 (release 2.1) Domain This domain has a TIM barrel fold. 20.60 20.60 20.60 20.70 19.70 20.30 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.32 0.70 -5.29 56 4091 2012-10-03 05:44:19 2003-04-07 12:59:11 17 134 1587 80 1175 3510 693 323.90 22 49.19 CHANGED FsaRGhhlDsuR.pFhshcplc+hIDtMAhhKhNphHhHLoDs.......puWplpl.....psaPcLochGuhpspt.........................................................hYTpp-lp-llpYAttRtIcVIPEIDhPGHstusltuhsphttt.........................thphtssp.hlssspspoapFlcslhcElsphas......spalHlGGDEs...........................tspWppsst...hpthhtptt...ts..............htph.th........h.t+htphlpptGpp....hlsWs-hhpss..................................ssspshl.sWps............st.stphs.............ppGaplIhoshs...hhYlDhhtsttst..sh..............h.shpps...................hsasshhpt.s..........stptt...........tlhGspuslWsEhl.......ssspl-hhlaPRhhAlAEtsWoss ....................................................................................................................hRGhhlDsu.R.p.ah.sh.........ptlcc.hl.Dhhuh.hKhN.thHh..HlsDs...........................tu....a.t.hpl..........p.thPt.....hs.ph.....sshps.t............................................................................................................t.hYTpp-hp-llpY....A.pp+................sIpllPEI-hP............GH......hp......u......h..l..tu..h.sc..hhtt.......................................................................tht.h.ts......t.......t..hls.....s...p.p.p.sh.p.Flc.sll......s-l..h....p..h.Fs...................sphhHlGuDEs................................tss.a..tt.s.....t.....hpthht..p...t......s..........................tpl..s.h........a.h...p..cl......t..p.h....l......p....p.....p...Ghp.......hhsW..s-.hhhtt...............................................sps....s..h....l..hWps......................ttshphs....................................................................pp.G.a..ch.....l....ss.p........hhYh.shhtt...................................................................pth..............................................................................................ph.ps.h..t..t.......................p.....................................lhGs.ss.hWs-..hh................stp.......h...p....hhaP...p...h.uh.AEh.Wp..t......................................................................................................................... 1 455 732 999 +2739 PF02838 Glyco_hydro_20b glycosyl_hydr11; Glycosyl hydrolase family 20, domain 2 Bateman A, Griffiths-Jones SR anon Pfam-B_877 (release 2.1) Domain This domain has a zincin-like fold. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.78 0.71 -3.58 143 2321 2012-10-01 20:56:08 2003-04-07 12:59:11 10 128 932 72 547 2311 319 127.00 21 15.90 CHANGED ph.sllPtPppl..p.h..tsGs....asls..s.s.sslsh..ss....ts....pt..s.....s....p.h.Lpph.l.p.......ttG.h..phs........tss..sss.........p...ssl..............t.......ts...................thhspEuYtLs.lss....s.....slsltussssGhFaGhpTLtQll..ttt...........ttt........pl..s.ss.pIpDa .......................................................................................................tllPtPp..pl..p.h...tpGp.....hs.l...........s....p..s..splsh..ss...........t.......tps...............s......p.hL.tph..lp...........hhGh....pht...............ss.tss..............p.ssl.......................h..........s.............................................................sth.ssE...uYpLs...lss....p.........tlplpus.s.ss.GhFaulQTLhQLl..sst..............................ttt...........pl...P..ss..pIpD............................................. 0 198 366 481 +2740 PF01183 Glyco_hydro_25 Glycosyl_hydr18; Glycosyl hydrolases family 25 Finn RD, Bateman A anon Prosite Family \N 27.00 27.00 27.00 27.70 26.30 26.70 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.41 0.71 -4.46 28 3628 2012-10-03 05:44:19 2003-04-07 12:59:11 15 150 2015 18 613 2804 137 177.30 24 50.67 CHANGED lDVSpaQusls...aspl.....ppsGl..phshlKATEGssahsshhspphssucssGlhhusYHFhp.s...ssAtppAcaFlssl...thssssh..sLDhE..............tphssstpssssthppFlcpl.cpptGh+sll.YosssFhpsp...hsph.spa..sLWlApYsspssss..........shsshshWQYTusG.hsGl ..............................................lDl.SpaQ....u....p.ls........appl.......................t.psG.h........pashl......+.....s......o......c.......G...........s.........s........h...........h...D....s...t...........app...php..p....upp....t......G...l...h.G.sYaasps.................ss......u.pppA.c..h...Flpps...............shs.ttsh..........s..lDlE.................................t.s.hst.sshpp.tl.p...s...ahspl...p.....p......t...s.....G.....h...p..s.h.l...Yos...t.s...ahpp...p.......................t........s...p.h........sph..............slW.lAp..Y.ssp....s.h...........................ts..hsh.WQaospGphsG.................................................................... 0 217 409 506 +2741 PF02156 Glyco_hydro_26 Glycosyl hydrolase family 26 Mian N, Bateman A anon IPR000805 Family \N 20.20 20.20 20.20 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.19 0.70 -4.95 8 834 2012-10-03 05:44:19 2003-04-07 12:59:11 10 61 453 23 298 816 38 238.10 19 51.41 CHANGED VsPNAptss+sLhsaLsslhs...++lLSGph....Guhs.sshshp-hpplpssTG+hPAlhuhDahc..hosupltcshsssss.p-hIsaW+..pG.GIlslshHas.......sPup..pts.sFhTtsTo.phcsslsss.osEY+.hlhc.lDpIA-tLpcLQsps...VPVLFRPLHEssGcWFWWGscG........PcsYKpLW+hlY-hhsch+GLsNLIWVYosss..s.tssa....YPGDsYVDIlGhDsYts..Dstshos.asphlsLhsutthstluE....sGolP.sthIsshchpas...aF.sWss.........phshsssptlpclapssaVls+DE ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h....t...l.s...t.h....tp.l.p..s......s........lPlla.R.hHEh.........s........G....s............W..F...WW..G..t.p....................sptahp..ha+h...hhch.hp..pt....p.sl.....s.Nll.a....s.....a......s......s......s....s..............................t........s............s.....s.....a..........YPG..D.....sYVDllGhDsYst.............s.......s........t.....t......................a..tp....h..h...t..................t....................ht.................................................................................................................................................................................................................................. 0 140 222 272 +2742 PF00295 Glyco_hydro_28 PG; Glycosyl hydrolases family 28 Finn RD anon Prosite Domain Glycosyl hydrolase family 28 includes polygalacturonase EC:3.2.1.15 as well as rhamnogalacturonase A(RGase A), EC:3.2.1.-. These enzymes is important in cell wall metabolism. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.09 0.70 -5.48 16 3587 2012-10-02 14:50:22 2003-04-07 12:59:11 12 63 822 30 1506 3608 74 283.30 21 67.90 CHANGED CushshsslhVPp..shtLc.sshs......uuholphpGTsphs..........hcp.ph.....hhhlsssplsshsssG.usIDGpGs........tWWcspsp.....ssKstPphlphppsc.suplsslslpNo..hthslptssslshsclsIssssss.............NTDGhclusSsslpIssssltstDDCluIsoGo......slphTsssCusGHG..ISIGSlGspss.ssVssVsVpssslssosNGVRIKT.hp..uuoGoVoslpapNIphpsls.hsIlI-QsYps.ssPssps.souVploslsacslsGTsso.tsslhlLCucu..Cpshshsslslou.G...psoupCpNls.......ssssC .................................................................................................................................................................................................................................................................................................................l.......h....t.h........p............h................h.........G.............u...h.....l.sG.....pGt..................................a..a.......p....................t...t............................t.................t.......P..........p..h..l.t.h........ps.p..s.hhl.p....s........l.pl..h.NuPh.a....p.h.p...h...h...t...s.p.slp.lp...slplps.s.t.ss..................................NTDGh......Dht...s.o...ps...lhIps.......................s..h.....l..p...s.........u................D..D..........Clulp...u....Gpt...........................slhlp..s..s..h..s....s.....s...G...H..G......l..ol.G...S.........sp..t...t........................................V...........p.s....l..hl.........p.....s...s....p......h....h........s..........o...........p.........s.............G..l....R........I....K.......o....h..................uts...G....h..l....ps.....lha...pslphp.........s.....l.........p..............s.......l...h.............l.......s..........t......t...........Y...........t...........s..........t.......t...................s.........t.........t...................................s..........t........h......................l.........p........s.......l.......h..........h....p....s..l...p.....u.........p.......s..t.......t..................s......h........h......s.....t.....t............h.ps.h.hpshtl...................................................sh........................................................................................................................................................................ 0 359 933 1245 +2743 PF02836 Glyco_hydro_2_C glycosyl_hydro7; Glycosyl hydrolases family 2, TIM barrel domain Bateman A, Griffiths-Jones SR anon Pfam-B_572 (release 2.1) Domain This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.88 0.70 -5.52 11 6947 2012-10-03 05:44:19 2003-04-07 12:59:11 12 151 2301 315 1472 6580 701 251.70 22 30.17 CHANGED lcl+c.shhhlNGK.lhh+GVNRHEpssh+G+uhshshhlpDlpLMK.phNhNAVRTSHYPsp.caYpLCDcYGlaVlDEsslEoHGhhphhG........................sssPpW.tAhl-RhpchlpRDKNHPSlIlWSlG.NES.....suGtshcshhcahKplDPoR...sVpYEus.......s.hssphsslhhsh.uphYpc.sh.tp....lpc.lps.....................p.pKPhI.CEYuHsMGNu.GshpcYpphapph.cYQGualW-atDpul.tptss.ss....chhtaGGDFs-p.sDhpFshN..GlhhsDRsP+PuhhphKchhp.hph ....................................................................................t..t.hhl..Nsp..h..h..hp.G..h.s....h.H..p..t...t...s......h.h..G......p.s...........h..........s.....................p..........t...........hh.p.D..l..p..l..h.K...phshNulR......s....u......H......Y......P........t...s.................p.h..h...c.h.CDch.......G.lh...V....h....sE.s........s....h...........s...h...s..h...h.......ht.................................s..p..s..s....p.h.....t.....p.....s..h..h.p.c..h....pc....h..lp.Rs......+NHPSllh.WS.....lu..NEs.....................................stt.t.........h.......p.......t....h......h.......p..h.......h.......+..p....h.......D......s...o....R..............l.p.......h..t.ts.......................................................h.....t..........................s....h.................t......h.....h......t...................t........................htt............................................................................p.....s...h...h......s...E..........as..t......s.....h.........h.....s...................t...................h.....t.................................h............................................................h..............t............t.......h........h..............................t...........................................................................................................................................................................................................................h............................................................................................................................................................................................. 2 553 1041 1294 +2744 PF02837 Glyco_hydro_2_N glycosyl_hydro7; Glycosyl hydrolases family 2, sugar binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_572 (release 2.1) Domain This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities and has a jelly-roll fold. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.09 0.71 -4.69 88 7272 2012-10-03 19:46:52 2003-04-07 12:59:11 13 195 2374 329 1801 6677 638 164.50 21 18.74 CHANGED ppphhsLNG.......tWpFphssss.........................t...t..h.................thss......tta.pplsVPssaphp.sh.........................................tt............sssuhYc+pFplssph.....p...scclhlpF-GVpsshhValNGphVGh.ppsuasshEaDlTshlptGc....NplsVpVh......phssusal................................s.DhaphuGIaRsVhLhspP ...............................................................................................................................................t...h..Lss........W...pFth.tts..........................................................................................t...................................................pt.....th...p.p..l..s..V..P.s..s..aphp....h...............................................................................................................sss.s.h.....Y.p+...p.F....p....l.s.p..s..h.......................p........s.p..p....l..h..L.pF.-.G...V.p...p..t.s.p.V...al.....N......G......p......h......l...........G........h......p........p..........s...u.......a.....t.....s.....h.......c.....h...D.l..T....s....h....l...........p....t...........Gp.............N..hlsVp.V.....................p......s.s.phh...................................................s.h.a............h.....u......G..IaRsVhLhh............................................................................................ 0 669 1268 1576 +2745 PF00933 Glyco_hydro_3 glycosyl_hydr14; Glycosyl hydrolase family 3 N terminal domain Bateman A anon Pfam-B_1151 (release 3.0) Family \N 20.70 20.70 21.30 20.70 20.50 20.20 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.73 0.70 -5.44 39 9251 2012-10-03 05:44:19 2003-04-07 12:59:11 16 142 3405 47 3184 8362 2951 295.80 24 46.55 CHANGED TL-EKls.hs....................................hhGuhhh.........................shtpssshsRlGlPtlhs.pDustGlptt.hs............TsFPsuhulu..uoastc....LhpchGthhGpEhpu......pGlslhhuPsls.lsRsPtsG.RsaEsFuEDPh.Lsuthusthl+GlQu....t........G....VhuosKHFsu....NppEspR...................pssssslsccsl+ElaLhsFptAV.cusssoVMsu...YNplNGs..............ustsphlLsplL+c-hG.....F..pGhVhSDWh...............sspssstulpAGlDhpMssshh................ppLhpulcsGp..lspppl-csspRIlph ..........................................................................................................................................................................................................................h.u.hh....................................................t.h...t.......p...t.h..h...t...............p....h...t...l.....P.h.h.hs..hD.....t.....t.......G.h.ht.thtt..................................................ThaP.....s....s..h.u........h...u.....u....o..a.....s...c.......................ls.p....p.h....G....p....h.....h.up....E...h.pu..........................hGl.sh.s.h......u.P.s.l.....s...l..........s......+.......s...s......p......h.........s....p...s................c.u.a.u....-.DPh.lsut.hut.s.hl..cGhQs.......t...............................u...l..h.us..s..K................H..F.suh........sss....-...s...p+....................................................................p.h..s...s..s..s.h...s..t..p....p..l..p...p...h........h........PF.cts.l..................c.....u....s.....s.........s.......u.........l..Msu..................a.....s......p.........l.....s..u.p..........................Pu.st.....sph.ll..pslLR..p.....c.....h..G..............F...cG.......h...l..h.S...Dhh...s....h..t...t..h.....h.................................htcs.s..h.t.ul..p.....A..G.s.Dh...h.ss...s.th............................................................ptlh...p..t..l........p...p..Gp..........l..s.....t...plspust+lh..h......................................................................................................................... 1 1038 2061 2768 +2746 PF00251 Glyco_hydro_32N glycosyl_hydro2; Glyco_hydro_32; Glycosyl hydrolases family 32 N-terminal domain Finn RD anon Prosite Domain This domain corresponds to the N-terminal domain of glycosyl hydrolase family 32 which forms a five bladed beta propeller structure [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.39 0.70 -5.06 138 4354 2012-10-02 00:26:57 2003-04-07 12:59:11 15 62 2280 59 842 3991 275 289.10 30 57.76 CHANGED HFpP.psWhNDPNG.hY..apGhYHLFYQaNP.huss..W..G...htWGHus.ScDLlpWpcls.hAlhPs.p.aDtpG..saSGSAslhss..p..............lhhlYTGs...sp...........p...tpsQslAhsh.c.u....tpahK.....NPllh....Pss...sspcFRDPpshWhp....-.upWhhllGuppp.....ppGhshlYcS..pD..hhpW.phhsphhpss..t.....stMWECPDha.lss.p........................s.t.t.....hhhVLthS...............pssttshY.hlGp.as..psspa.ss...........s...hh....chDaGp..FYAupoFhD.....ssp.RRllhGWhu.psDtp.tsphs......cGWsu..hholPRplhL.........-..ttspLlQhP ........................................................................................................HhpP.t.s...h...hN....DPNG............hha...a.....pG.p.Y.Hl.F..Y..Q...........a..................P....h.....u.....sh....................a....G.............h...p.................W.....u..H.s....s.....S....c..D.....L..l..pW.......p..p..............h.....s.......h....A....l........h.......P...s.....p..........h.......D.....p...p.....G.............sa.SGS..Ah.........ss..p.......................................................................................lhh.hYTGpsp..............................................pt...pph...Q..s..l..Ahs....csu....................hpap.K.......................sPllht...........P.s.............tsp...cF........R......D...P........pl..............a...pt................s..sp..ah......h.l............l............G.....u.......p...........p......................p................p.........G............p..l....h....lY.p..S.....pD......l....h..p...W.p.h.h..u.t...h.h..t.sst............................uhMWE...C..PDha..lssp.........................................................................lLhhss....sht.........................pshht.s.hY...h..l..Gp....a..p.....pst..pa.sp...............................tp.ht.plD...hG.....h.-.....a.......Y.A......sp....oa.s................sss...Rpl.hhuWh....s...............-.............hs..........tpsW...t.u.sh.................olsRplpl.......ps.....spLhphP............................................................................................................................................... 0 225 502 696 +2747 PF01301 Glyco_hydro_35 Glycosyl_hydr17; Glycosyl hydrolases family 35 Finn RD, Bateman A anon Prosite Family \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.19 0.70 -5.08 37 2284 2012-10-03 05:44:19 2003-04-07 12:59:11 14 47 1041 19 826 2224 59 273.10 34 43.63 CHANGED shll-Gp.hhllSGulHYhRhs.PphWtDhlpKsKshGhNslpTYV.WNhHEPppGp.asFpGt.hDlscFl+hs.pctGLaVlLRsGPYICAEWshGGhPsWLhph..shphRoss.s.ahptscpahstlhshlts..LhhspG.GPIIhsQlENEYG......hss-tsYht.thschhhchsss.shhhssssPhhhshps....shss...lhssssasss...t.........h........ts.pPpp....PhhhsEaWsGWFspWGp.......hpcsspc..lAtsltchltps......u.hNhYMaHGGTNFGhhsGushhs..........TSYDYDAPlsEtGphs.sKYttl+clhpth .............................................................................hhlsGp.hhlhSGtlHY.R...........hs....p.............W....c.....hl..ph.........K.shG......hNslpo..Y...l.........WN.......hHEP.p................Gp.....ap.F..........p..G...................hD......l.t..tFlph...A...pch.GL.a.l.l.l.R..............PGP..Y.........lC........A......EW....ph..G.G..h..P............s..WL.....h....p..............................s.......h.............p..........h.....R.................o........s...........s.................................s.............ah.....t....t...s....p...pa...........h...p........t........l........hs.h.hts..........h.h...s.........p.........G.....G.......s..ll.h.h.....Q.............lENE.YG.....................................s.-.....t..sYht....thtp.h.h.h.p.h.u...h......s..........s.....h.....hs..ss.....s....s..........hh.ps.............sh.t...........lhssssahst.....t.....................................................p......sp.t............Ph.........hh..........hE..has.G..........WastWut.................................sh....p.......p.s.s.pp........hs..s...ltch..lttt...................................s.hNhYM..................aaGG.T.N....F..G.hhsGsshh..............................TS......Y..................DYDAslsE.Gt.s.........Kahtl+phht..h........................................................................................................................................ 0 254 472 661 +2748 PF01074 Glyco_hydro_38 Glycosyl_hydr16; Glycosyl hydrolases family 38 N-terminal domain Finn RD, Bateman A anon Pfam-B_731 (release 3.0) Domain Glycosyl hydrolases are key enzymes of carbohydrate metabolism. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.56 0.70 -5.15 50 2799 2012-10-03 16:37:10 2003-04-07 12:59:11 17 32 1483 64 926 2418 153 271.20 24 29.16 CHANGED plasluHuHhDsuWLaslc-s.........pppspps..asol...lshhcc.s-hpF..htupsthaphhhcc.psphhpcl.+chVpcGphEhluGhWV.sDpshssh-ollcQhhhGp+ahccpFG..hpscsuWhsDsFGaSushPplhp.puGhctahhp+lsasspsph............ptpFhWcu.cu.........oclhoahhsh.............sh......ssph.............................cspsllpthpphtsptt......sspsLhshGhs.........Dsstsshcchlctlpthps.h.....hhsclphuTs.spYacslcp.........................ss-h.sas ..................................................lahlsHsHhDh..t.Whhshccp.......................chplhph.....hssl........ls.hh......c........p..p......P......-...........a.p....F.......htu.........p......s......................t...hh..c...a.h....p...........pP........c.t..........hp..cl...+......chl....p...p....G........+...lph.h...........Gshal.sDss.lsuuEuhlRphh..hGp..phh..pp.....paG.........tps......phuahPDsFG.a..suphPplhp.tsGhc..th........h....htR..h........s.....h..s...pt..sph................................p...pp.Fh...Wp.u.DG.......................ocl....hs.thhst........................hssth...........................................................phpt.h..h..p.t....h.p.ph....t.p...t.th.................s.s.p.h.lh.shGts.............................pt.t.hs.s.pp.ph..hch.hphh.pp...h..........................stsphhh.u.s..pcahptlctt....................................................................................................................................................................................................................................................... 0 336 545 754 +2749 PF01229 Glyco_hydro_39 Glycosyl_hydr19; Glycosyl hydrolases family 39 Finn RD, Bateman A anon Prosite Family \N 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.63 0.70 -6.11 4 463 2012-10-03 05:44:19 2003-04-07 12:59:11 12 17 292 30 189 512 70 356.40 18 64.27 CHANGED hthscsPclshsDutRshR.l..FWR...aC.....stus+hsLuhcpphphshhtshschGhE.lRhHhLL..DlhssRc.ssGpGLh..YNFTalDuhlDhLhE.tlhP....GF..hu.sSGphTsFc.K......+ph.cWp-LVphLAR+aIuRYGlscVpKWsFEsWNEPsh+DF.psuhppp.ahphYcssucul+tssssL+lGGP..........shChul.tahtshpNFhTtEs.V.lDaIShHppsuspuhhh.cp..tsltpscphhsEFt-s..lhp-EA.P.lsatlsp.......Yss.ss.hss.HpsP..hNuuhhhR..lLSpsssaLuh....FS.hThTshFp.sssp.......RpPhhsuhGLlALh...tl.t.s.+uhplLDu..tp...hlhtspHhsh...uspsshpsslllas....+shstpslPsp.hhlslP...shc.Valp..hlDp.pusPausW.+MGpPsaPotEQhcchRtsp-P.htcsspPhsssGcLTLchcLhhsSllLlclssR ...........................................................................................hh....................................................................................................................................................................................asa.t.lDthhDhh..h.t...thhP.....ua...................t.........s....s.....t.................t...................a.....................p....................................p.p..h.t.tW...t.pllpthsp+h.h...tR.Y...G..h.t....p....V.p.....p..W........aEl..WN.E.................Psh.....t............a...h...........................u........p...t.....p......a...h.p.hY...pt.ssc.sl+.t.s.....s.....P....t....h.p..l..G..GP................................................sh.s...h...s................t......a..h....phh..pa...h....ttp....t...h...........h.Da..loh.H.h.s.................................t......................................................................................................................................h.........................................................................................................hs...........................tt...........ss....h.........ht...................t..............h.....h.h....t.ht........................h....ss.......thhs.................th.t.s...h..htt.....tt.............t.t..................................t........h..hhh.t...............................................................................h.......................................................h......ss.....a.t..............s........t...h..h.....t................................................................................................................................................................................................................................................................................... 1 89 140 163 +2750 PF01915 Glyco_hydro_3_C glycosyl_hydr14; Glycosyl hydrolase family 3 C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_1151 (release 3.0) Domain This domain is involved in catalysis and may be involved in binding beta-glucan [1]. This domain is found associated with Pfam:PF00933. 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null --hand HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.56 0.70 -4.83 107 5906 2009-12-10 17:01:25 2003-04-07 12:59:11 17 121 2112 34 2239 5621 1277 251.70 26 33.08 CHANGED hVLLKN-s.....tlLPLsppt..............+lAllGssAs......................................t....sh.t.......................................hsoslpulpptssssshhh...t..h............................................................................................................................................................................ssstthspAspsAppu....DssllhlG...........h...................................t....st.slpLs..tspppLlptl.ssss....tsslVVltsGsPlsh..hhp...............pssAllhuahPG........pcuGpAlA-lLaGc......sNPoG+LPhTaspsh.........sphPshh........................th...tt.sshhsau.cGL.hsh ................................................................................................................................................................................................................................................................hVLL.KN....cs.................thLP.Lptst............................plAVlG.s..As........................................................shh.h...su...ss.........ut..s.........st................................................hso..h.pu.lppt...h...s....p.s....s...h..h...h..h........h......h.............................................................................................................................................................................................................................................................................................sp.thhs..p...A..s..p...s.A.p.pu.........D..s..sll.slG..........tspt............................................................t..ts+...s.s..l...s.L..s.....t...s....Q..p...p..L..l.p.sl...t.sss............pss.........V...lV.l.h.sGpPls.l.t..h.tp................pssAlltsa.hsG...................p.p.u.G......pAl.A-l.La.G.c.................................hN.P...S......G......+...LPhoaPpsh..................................sphPsh.hs...............................................h.hpta...........st...t......sha.sFG...aGLSYo.................................................................................................................... 0 741 1451 1968 +2751 PF02056 Glyco_hydro_4 Family 4 glycosyl hydrolase Mian N, Bateman A anon IPR001088 Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.86 0.71 -4.97 8 3002 2012-10-10 17:06:42 2003-04-07 12:59:11 11 5 1469 33 466 1859 496 179.70 40 41.67 CHANGED KIshIGGGSohhschllu.lp+h-cLsspcltLhDlDt..cRL-tltphscphl-csussl+hptossh+-ALpDADFVhsplpVG....hhssRtlDE+IPh+aGlhu..t-TsGPGGIh+GLRoIPslh-Is+chE-hsPcAWhLNYoNPhuhlTcAhhRhhPthKslGlCchshGhpctlAchLsl ...............................................................KlshIGGGSoaTPcl...l.Ghlt...c......h......cc..Ls..l..p...clhLhDl-s...p+L..-hltt....l..sp...+.........h..l..c.......p........s.....u.....s..s.......h......c....l.p..t....T....h...D...p+cALp.D..ADFVhs.plR...V.....G...........ths...sRt...hDE+IPL+aGllG..QETsG..sG.G.lh+uL..RTIPslh-Isc-hcc.l.s.P.s.A.Whl.NaoNPuuhVTEAhh+..h..t..s.....t..K.hlGlC...sh.P.lGhtptlAchLt.l............................... 0 162 299 370 +2752 PF02449 Glyco_hydro_42 Beta-galactosidase Mian N, Bateman A anon Pfam-B_2131 (release 5.4) Family This group of beta-galactosidase enzymes belong to the glycosyl hydrolase 42 family. The enzyme catalyses the hydrolysis of terminal, non-reducing terminal beta-D-galactosidase residues. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.27 0.70 -5.34 28 1345 2012-10-03 05:44:19 2003-04-07 12:59:11 10 34 760 3 435 3006 388 314.10 25 47.10 CHANGED DY.PEQWsc..-hap-Dh+hM+cAGlshVslGhFuWutlEPpEGpa-FuW.......LDcll-hLtctGlpllLuTP........TAusPtWlsccaPElL.l-tcGphptaGuRppaChoSPsY.RptstcIsctL.....AERYus+PullsWHlsNEYGs....tCYC-pCtpAF...............RpWLcp+Y.........ss.l-pLNcAWGTsFWSppYssa--I.sPp.ts.........phhsPuptLDapRFsoDplhsahct.ctchl+choPchPlTTNFMs........hpshDaachApclDhlSWDsYPs...........stcp......pssthuhtpDLhRul..tpGpPFhl.MEpsPS.sVNW.pshN.t+tPGhhcLhShpulAHGA-sVhaFpW..RpuptusEpaHuullsHsu.sso+sapEVsclGccLcpl ..................................................................................................................................................................Y.P-.ph...........t...phhtcDhthhp.ps.shshsp.lu.F..u....Wstl.E......P....p....c....G...p...a.....c.....F.s.a..............LD......c...h....l...c....h...h...t..........p..t..G..lp...l...h....L.......u....T...s......................o..u.s.............P.t.....Wh...h.......p........p.......a...P.........-...h...h.......h...s................p..........G.t.......h...............h....................h.........G.......s...R..........p....p........h........s....h....s.s..s.sa....+c...hs......t....pls.ptl......................A.c..+....Y.....s....p...c...P.....s...l.hhW.p............l......s.....NE..hus....................sas...p..t...s..t..ttF.....................................ppW..Lc..p+.Y.........ts....lc.tLNpA..W...s.......ss.....a.....W..u......p.......ph..psa...pp......l.........Pt.....hs.............................t.....s.h.......l...Da..pRF.spth.hphhp..phphl+....th.s.s......p.h.......lT....p.N.hh.......................sh-h.h..p.h.u..p.t..h..Dh....h..u..h..D..tY........................t...........................uh....tsh..hRsh............ttp.s..ahl.hE.......p.ps..u....t.h...t.........h........s............s.G........h.h.h.shtt.hAtGup.l....aapa..+p......u.Ephatuhl..st...s.hht-h.thst.ht..h................................................................................................................................................................................................... 2 150 293 365 +2753 PF04616 Glyco_hydro_43 Glycosyl hydrolases family 43 Bateman A, Finn RD anon Pfam-B_5336 (release 7.5) Family The glycosyl hydrolase family 43 contains members that are arabinanase. Rabinanases hydrolyses the alpha-1,5-linked L-arabinofuranoside backbone of plant cell wall arabinans. The structure of arabinanase Arb43A from Cellvibrio japonicus reveals a five-bladed beta-propeller fold. A long V-shaped groove, partially enclosed at one end, forms a single extended substrate-binding surface across the face of the propeller [1]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.00 0.70 -5.32 37 5170 2012-10-02 00:26:57 2003-04-07 12:59:11 9 188 1022 90 1859 5327 430 265.80 18 57.35 CHANGED hpNPllpGhhPDPolhRsscs...YYlssooFtha......PGl.la+S+DLssWp.lupslsptsphsh..tsss..........WAPslp....atsG+aalhao...............ps......sallsucssc...GPWo-shhl.........sstulDPolFhDss...GcpYLhhsths.t..............tIhhtchsssttphsups.phlhsus............shhhsEGP+la++sGa..YYLhsu....pGGTshs....ashsh...uRS+slhG.Paphpsssshlppt.....ssppslpssGHuullposcGc.aahsahstcs..............tshpshGRcThlp.lpWp.DGWP ......................................................................................................h...s...DPt...lh...t...h.........sst......a..Y..h.h....s......o....s.............................................t..h..........lh..p..S...p..D..L.........h......s..W...p........h..s.................s...h............s.........h............t............s.....t............t....h......t.tt.t............................W.....A...P.p...l.h.......................h..t..s..G.....c.....a..a.laas.....................t....t.ttt...................................hh.l.h.....p...u.....s...s..sp..........G.s.a...p...p....hth.......................................sttslD......s...sla.h.......D.c.....s........G.......p.h..Y....h.h......a.u.s.......t...................................t.l...h.....h....s..c..l......s.....s..s...h....h...p....h.....t...s..th..........t...h.hhtst........................................h.h...E...u.P....t..l....h...+......p...s......G..h.......Y.Y..l..hh.u.....................ts.s.s.t.s...........athsh...............................spu..p..s......h.G......Pa...........p..............t....t......hh..................................t.tt..h.........s.s......u.H.s.s..h..h...p...............p...u.....p..........h.h.h.ahsht........................................t...R...h.h..lt....h.ap...ss................................................................................................................................... 0 663 1303 1655 +2754 PF02015 Glyco_hydro_45 Glycosyl hydrolase family 45 Bateman A anon SwissProt Domain \N 20.30 20.30 21.70 20.80 19.50 19.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.30 0.71 -4.25 6 228 2012-10-01 21:39:58 2003-04-07 12:59:11 11 13 86 7 88 232 2 179.70 43 69.24 CHANGED sGpoTRYWDCCKPSCuWsu..KuPs.tPV.oCstsssslss....ssspSGCp.GGuuYhCosppPa..sVsssLAaGFAAsohsGusEsshCCACYtLTFTs....GsltGK+hlVQuTNTGGDLGsspFDhshPGGGVGIFs.GCssQaG...uhhGsQYGGlpStpECsphP-sLpsGCcWRF.-WhcNsssP.hSFcpVpCPupllucSGCp .......................................GhTTRYWDCCKsSCuWss..............K...A.ss....s.p...P...lpoCsts.s.ps.hss.............ssspSGCs...G...GsuYhCssppPW......uVN..ss....hu.YGFA...Ass...h.....s.....G.s...EsshCCsCapLTFTu..................uslsGKcMlVQsTNTGuD..L.....G.......s....N.....p.FDlthPGGG.VGhas.GCs..p...Q.a.u..s.s.st...aGpp..YG.Gl............s.o.........tspCsp......hP.....t..tLpsGCpaRF..sahtss....sNPshsFppVpCPtplsshosC................................................................. 1 41 60 80 +2755 PF01374 Glyco_hydro_46 Glycosyl_hydr23; Glycosyl hydrolase family 46 Bateman A anon SCOP Domain This family are chitosanase enzymes. 20.80 20.80 25.50 24.80 19.70 19.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.33 0.70 -4.79 7 127 2012-10-03 00:09:25 2003-04-07 12:59:11 13 7 97 4 42 129 4 207.80 39 69.76 CHANGED hpLluhsENuohpWhppYsYsEclcDGRGYTsGIhGhsoG.TGDhL.llt.hpchsPsshLsKalPuh++stt.....hcppuslhshhsp-p.WpotspDh.hc.A.scstDcoaas.sAhppscssh....poALspushhDsslpHGsssss-SFtullKRstp+sp.......DEtpahpsFhssRhtsLhsstsc.spscssp..sRshhhsslhcpsNhsLK ......................................................................................hpLlSpsENSolcWctQ......YuYlEDls..DGRGY.TsGIhGFsoG....TGDhLplV.EhYspt...pP.sN.s.LupYLPALRclsu.......s-sp....sulssh.......hsts.....W.p..psA.p....D.tFctAQcctpD+lYas.PAhppucpsG....hssLuphsaYDshl.HGs..u..ss....s....s.....o..Ftulpc+uhpcAp..P.t........GsDEpsaLsuFLDsRh.sht...p..c..ts.cp.-sochcp..AphshlpphshphpssL.................................. 0 18 32 40 +2756 PF02011 Glyco_hydro_48 Glycosyl hydrolase family 48 Bateman A anon SwissProt Family Members of this family are endoglucanase EC:3.2.1.4 and exoglucanase EC:3.2.1.91 enzymes that cleave cellulose or related substrate. 25.00 25.00 40.60 40.50 18.90 18.40 hmmbuild -o /dev/null HMM SEED 619 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.33 0.70 -13.25 0.70 -6.39 7 213 2012-10-03 02:33:51 2003-04-07 12:59:11 10 23 126 21 57 214 0 485.10 49 70.58 CHANGED YtpRFhphYsKI+DPuNGYFSsc.GIPYHuVETLhVEAPDYGH.TTSEAaSYYlWLEAhYG+lTGDaStFppAW-shEcYhIPsppDQPst.MSsYsPNKPATYAsEapcPShYPS.Lchs.ssVGpDPltsELhSsYGos.lYGMHWLhDVDNWYGFGtpsst.......sopsoaINTFQRGsQESsWETlPQPsh-EaKaGGpNGFLDLFTtDs.sYAKQaKYTNAPDADARAlQAsYWAs.WAKEQGK..slSuhVuKAuKMGDYLRYuhFDKYF+KlG..s..pspAu.TGYDuAHYLLSWYYAWGGulsu..sWuW+IGsSHsHFGYQNPhAAWsLusDu-hKPKSsNGtsDWApSLcRQlEFYpWLQSAEGuIAGGATNSWsGRYpphPAGTsTFYGMuYs.pPVYtDPGSNpWFGhQAWSMQRVAEYYYtoGDspAKpLLDKWVcWlhuplplssDG.TFpIPSsLcWoGQPDTW..suoYTGNsNLHVcVssYGsDLGlAuSLANALhYYAstotc......ppA+shAKc....LLDthWt..YpDs.KGlussEsRuDY+RF.-p......pVYlPuGWoGpMPNGDsIcsGlpFlDIRSKY+pDPsWsplpsAhpsGpsPshpYHRFWAQsDlAlA.GsYuhLF ..............................tcFht.atpl...ps....t....sGYas..p........G......lPYH..SlETLhl.EAPDaGH.TTSEAaSYhlWLEAhYG+lTG....D........W.o.....h.....hpsAWcshEpYhIP.....s.p.....p-....QPs...stY....ss.spPATYAsEh.ppPspYPu....lphs.sssGpDPltsELtSsYGost....hYtMHWLhDVDNhYGaGpt.st.......sotsoaINTaQRG.pESsWETlPpPoh-pF+aG....t.....N..GaLsLFst.-s...s..........u+QW+YTsAsDADARAlQAsYWA..WApppGp....plushhsKAuKMGDaLRYshaDKYFpplG...sts.p..s.s.su.sGh-SsHYLhuWYhuWGGuhss.....sWuW+IGsSHsH.GYQNPhAAaALus......s.shtP..KSssutpDWtpSLcRQlEhYpWLQSuEGAIAGGuTNSWpGpYtp.Pu..G..s..s..TFYGMhYs.tPVYtDPsSNpWFGhQsWshpRlhphY.....................p..G............s............tt......s..........t...........tlhc+Wh.Whht.h.ht....t.s...pa....hPupLpW.......sGt...P........................sss..lpsps.s.s.p.DlGhhuuhspsL.aauststt..........................................ttutthAptLLDthat...pst.hGlss.E...ttsY.Ra.p........tlYlP.tsas.Gp.spGs.l...........psu..................pF.slRshhppDs.a..l.thht......s......u..................P......hpaHRFWtps-hAhA.u.ht.h............................ 0 22 43 49 +2757 PF03718 Glyco_hydro_49 Glycosyl hydrolase family 49 Finn RD anon Pfam-B_8840 Family Family of dextranase (EC 3.2.1.11) and isopullulanase (EC 3.2.1.57). Dextranase hydrolyses alpha-1,6-glycosidic bonds in dextran polymers. 21.30 21.30 21.90 22.60 19.80 20.50 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.97 0.70 -6.44 4 30 2012-10-02 14:50:22 2003-04-07 12:59:11 8 4 25 8 8 54 0 496.90 33 80.09 CHANGED sus.psAhpstpsssTsssupLpTWWHssGEhNspTPsQsGNVRpSuhYsVQVupAsssps.hYDSFsYhSIPRsGpG+I............s.o..DGhpaosuhsLoMuWSSFEYS+DVhVclphpsGpolsSsspVsIRPoShsFthphsDssoVhI+VPYSssGYRFSVEF-spLaThhsD..Gst......suGs.psltsEPcNuhhIFApPhL.ut.tpphlPp.sSsshphPpPG.VpNLsosocpllYFpPGsYaMsscp..........HhsLsuNVpWVYLAPGAYVKGAhcFh.sTpupaKsTG+GVLSGEQYVYpADsssuYh.hSsAspscsosl+MhpassusutQp.hslpGlTlutPPapShsh.GNp....hphpV-sYKQVGuaYaQTDGhElY.GohhpssFaHsNDDsLKhYaSsVTlcNsVIWKscNuPVIQaGWTPRNI-NVslsNssVIHsRhhhp-shhNosIhsuSsaY....tshuSs.oouDospTlpNhphpNhssEGhosshhRIhsLpNh-NIpIKNlsIsuasGLp...hsop.Sal.taoshsstK........sspGlslENaTVGGppI..TssN.psspLGRls.hDsp.WssWphh ...........................................................................................................................................s.................hsssthpTWWHssu.hs.t.tshtsspVRpSphYpVpVt.As...t.pp.hacSFsY.SIPRsGpG+................s.o.tDGh.phpsphshsMuWopFpYspDl.lcl..psGpslsssspVsIRPsshsathp.ssstslh.I+VPhp.sGh+FSVEFpsplhohhss...Gpp....h.spGs..hlt.EPpNuhhIFApPhl.st....hlPp.pss...........s.s.hhpsG.lsshs.socsllYFtPGlYahs..pct..........+hpLssNspaVYlAPG.AYVKGAh...cah..tsppsapssG+GVLSGEpYVY..pAssspsY...h.........Aspscppsl+Mhhapsssu.sQp.hphtGsTlssPPapohslhu......spt........hpsplssYKQVGuaaaQTDGhplY.psS..h..l..+..csFaHsNDDsl.....KhY......a....S...s.........sslcsssIWKscNsP.l.I...QhG.....Ws...s..Rs.....Is.sVslcslslIHs....RhhhspshhsosIhsuSsaa.......uss.hpsDsppph.shphpNhssEGhss.sh..h+lhsLpN..hcthhlcNhth....s.sht......hsht.sh..h.thps...............shGl.lpNaolGsppl..shpNhtttphGphs.hcst.hspWph.t............................................................................................... 1 3 4 8 +2758 PF03512 Glyco_hydro_52 Glycosyl hydrolase family 52 Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 276.40 276.00 19.60 18.50 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.30 0.70 -5.88 4 35 2010-01-08 16:07:30 2003-04-07 12:59:11 8 1 33 0 14 36 16 418.20 50 59.32 CHANGED FTLGaPGpuGGLDLELG+PPcQNVYIGLpS.c-ttpYcsLPFatsu.pDEusRYDlEpsDss.......pQcPp.llhPFs+-EIhR-FsuuTDTWpAGDLTFpIYSPVcuVPDPpsAsEEEL+hALVPAVLsEhTIDNTcGppsR+AFFGapGsDPYSuMR+I-Dsss.plsGVGQGRlTuIsSsDculpsAlaFohEDlLspthcENhsFGLGpsuALlhDVPAGEK+TYpFAVCFYRGGlVTsGhDsoYaYTRaFpsIEEVGEYALtQFstlttputcu-pLlssutLS-DQpFMhAHAIRSYYGsTQLLEH-G+PLWVVNEGEYRMMNTFDLTVDQLFFELKhNPWTVKN.LDLaVcRYSYcDcVRFPG--TEYPGGISFTHDMGVANoFSRPsYSSYELaGIDsCFSHMTHEQLVNWlLCAoVYl ..FTLGahGpsGGLsLELu+PscpNVYIGhco.cpsuhYchLPFacss.p-Euc.RYshEps-ss........p+ss.hlhsFucccIpREapsATDTWpAGDLT..FplhSPhcsVPDPts..As--EL+hALsPAVhsElTlDNTpGopsR+uFFGapGsDPYouhRhl.........s-.........ss....s.....pLpGlGQGRphuI.s.o.p.-.csVcsuhtFuhEclLssphpcNh.sFGLGpsGALlh-VPAGEK+TYpFAlCFYRuGhVTsGh-sSYYYTRaFcsIE-VupYAL-phsthtstutptschlc.pu.tLS-DQ+FMlAHAI+SYYGSTQL.L..-.....c.........-Gc.PlWVVNEGEYRMMNTFDLTVDQLFaEL+hNPWTV+NsLDhaVcRYSYcDpV+FPG-.-pp......YPGGISFTHDMGVANsFSRPsYSuYELsGls.uCFSaMTHEQLVNWlLCAsVYl. 0 4 8 10 +2759 PF01630 Glyco_hydro_56 Hyaluronidase Bateman A anon Pfam-B_1150 (release 4.1) Domain \N 18.70 18.70 19.80 19.40 18.00 17.90 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.04 0.70 -5.80 11 533 2012-10-03 05:44:19 2003-04-07 12:59:11 13 6 152 6 235 478 2 274.00 38 74.85 CHANGED hRuP.llsspsFlhsWNsPTphChtchshslDlSh.FullusPpcshpGQslTIFYs-cLGhYPalssps....pthpGGlPQpusLppHLs+spp-IpphlPs.DhsGLAVIDWEcWRPpWtpNWpsKDlY+ppSl-LVQspHPphshscsptcApppFEcAA+tFMp-TL+LGKhLRPptLWGaYhFPDCYN.hcapsss..YsGpC.ssphpRNDpLuWLWppSoALaPSlYLpppLpusp+s.t.h.VRtRVpEAlRVu+lscsspslPVhsYsR.lFpDpsspaLoc-DLlpTlGE.sALGuuGlllWGohuhspotpsC.tL+pYhpopLsPYllNVThAAp ....................................h.....h.tpPFhhhWNsPop.Ch..+ath.l....s........l.ph..Fslhssstpp.h.t.G.p........slTIFYtspLGhYPahs.ps.............hshpGGlPQpss.LptH.Lpphtpcltt.....hlss.p.tGLuVIDW.............Et....WRPhWtpN.Wt.s.KclY+ptS......htllpp..p.....psp....h.s.......p..p.ltt.AptpFEtuA+taM.pTLpLupthRPptLWGaYhaPcCa..N.ashh..p..s..s....YoG..pCsshthtcN.-pL.tWLWtpSsAL.aPSlYl..tltss...ps.....Vp.RlpEAhRluthh.p.t............hPVhsY..s+..h.....h...p..t..............s..p.......h....Lop...tDLhpTlGEosAhG..uuGlllWGs..p..sp...o.....p..pC.tlppYlpp.Ls.alhNVo.us........................ 2 43 64 130 +2760 PF03065 Glyco_hydro_57 Glycosyl hydrolase family 57 Bateman A, Moxon SJ anon Pfam-B_2506 (release 6.4) Family This family includes alpha-amylase (EC:3.2.1.1), 4--glucanotransferase (EC:2.4.1.-) and amylopullulanase enzymes. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.98 0.70 -5.54 22 1145 2012-10-03 16:37:10 2003-04-07 12:59:11 10 25 737 10 560 1481 541 339.30 17 56.79 CHANGED sFhhHpp..hh.p................................uspsYl..........shhphlhch.p........h+hshsloGshlEQlpchtsph...............................................................................................................................................................lpha+p...LhcsGp..lEllsssaaHslhsllsc.....................p-hhpQlchtcchhcchFG...hcPpuhassEhsass................clhchltchGhchlhs-stth..............hhsspsst.hah.......tspt...lslhhRshpLScpIuFpa.......usccah.................pa.h....hh.hhstspspllhlhhDhEsa.....staphtppshh-hhhth..phppp.................shlphhtsp..Ehhpchtscshlhhs.tt.....................................hsWh...........ctshstWlus.tppthhptl ...............................................................................................................................................................................................................................................................................................................h.hH.p...hh......................................t.........................ht.psYh................shhthh.ph.p..........hph.shshosslhp.lpp.h.t............................................................................................................................................................................................................................................................................................................................................................ls.h.h+p.........L.c....p..Gp...lElls...ssasH.s.....l.h..P.Lhsp.......................csh.ptpl.....phuhpt..h..cc..h..F..G....ppP.c.G.hWhs...E.tu.ass........................................................sl..hphl.sp.t.G...h..c..a.h..h...s-stth............................................hhs.ht...s..s.p...ha.t..................sspt............................ltlh.hRDhplS.p.plh..ph..........................shccah..................................................................ph.h..............hh..t..........t............s..th.h.....hh..hchc..th...........tht.....tp.hht...hh.....t.....t...ptt.....................................t.....l..t..h..hp.p.....ch.h..p.....t.h......................h.................................................................pWh.................................................h.......................................................................................................................................................................................................... 1 229 411 492 +2761 PF02057 Glyco_hydro_59 Glycosyl hydrolase family 59 Mian N, Bateman A anon IPR001286 Family \N 27.00 27.00 27.20 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 669 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -13.24 0.70 -6.46 3 168 2012-10-03 05:44:19 2003-04-07 12:59:11 10 10 93 2 88 169 18 493.00 37 83.35 CHANGED MTAAAGSAGRAAVPLLLCALLVPGGAYVLDDSDGLGREFDGIGAVSGGGATSRLLVNYPEPYRSQILDYLFKPNFGASLHILKVEIGGDGQTTDGTEPSHMHYALDENYFRGYEWWLMKEAKKRNPNIILMGLPWSFPGWLGKGFsWPYVNLQLTAYYVVTWIVGAKHYHDLDIDYIGIWNERSFDANYIKlLRKMLNYQGLQRVKIIASDNLWEPISASMLLDuELaKVVDVIGAHYPGTHTVKDAKLTGKKLWSSEDFSTLNSDVGAGCWGRILNQNYINGYMTSTIAWNLVASYYEQLPYGRCGLMTAQEPWSGHYVVESPIWVSAHTTQFTQPGWYYLKTVGHLEKGGSYVALTDGLGNLTIIIETMSHKHStCIRPFLPYFNVS+QFATFVLKGSFSEIPELQVWYTKLGKPSERaLFKQLDSLWLLDSSGSFTLELcEDEIFTLTTLTTGRKGSYPLPPKScPFPosYKDDFNVDYPFFSEAPNFADQTGVFEYFTNIEDPGEHRFTLRQVLNQRPITWAADASNTISIIGDY+WTNLTV+CDVYIETP-oGGVFIAGRVNKGGILIRSARGIFFWIFANGSYRVTGDLAGWIIYALGRVDVTAKKWYTLTLsIKG+FASGMLNGKoLWKNIPVsFPKNGWAAIGTHSFEFAQFDNF+VEATR ......................................................................................FcGhGslS.ususs.SRLLhsY........c..s.......hs...pILcaLF.tss.aGAulph...LKVEhG..u..........D....spoo.....s.....Go....EPSp......+h.t..-.t.......N.....h...R..GYp...ahLht-AKphNPsIplhsLsWuhPGW..l...s......p.....u...............s.........h................p.....hss...........Y.h.h.pW....l..............uu.tphasLslDYl.u..h.h.....NE+.s..a...s.h.p......a.l.K............h....LR.p.hL....................s..........tp......G..........hppl+IlAuD......sh...a...t...s...lu..ts.hh.DspLhpsl.sllGs..H..Y.....s.....s....s...........ss....t........p........u........s...t...KplW.sSE.....shS.......s..h.......s...........p......h......G.u..........us........h.u......Rh........L...N...ps......Yl.su.....hT....uhIsWs...........lluuaY.tlsaspp.uLhs..AppPWSGtY.lps.slWl...AHhT.QFspsGW.hY.L...p...s....s..Gth....pt...........s.GSa..Vu.Lss.s.h.s.......s.h.ohllE.....Th.s.....tp.t.s...h...........t...s..p.hp..h.l...psth..t.t......tlpVW.op...h...s.t....s..p.t........a.phtsh.......s.tG..taolp..lt.splhTlTTh..sst.t+..u.sh.s..ss.....st....hs...............Yp-s..Fs...t.........hs...t..........s..h.s.D.tGsFEhh.s........tt...thhsh+Qh...sptslsWts.......s..hshlGs.pW.s.htlp.Dhhh-............t...h.l.hths..ts......tt..........sh.....h.h.l...ts.Gtat..l...........t.t..........................h..t........h.....t..Whpltl..h.t.....s.......st..h......................G.hshhs..h...spaDsh.l....t...................................................................................................................................................................................................................................... 0 35 48 63 +2762 PF01341 Glyco_hydro_6 Glycosyl_hydr21; Glycosyl hydrolases family 6 Finn RD, Bateman A anon Sarah Teichmann Domain \N 17.70 17.70 17.70 17.70 17.20 16.60 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.16 0.70 -4.98 74 628 2010-01-08 14:25:27 2003-04-07 12:59:11 12 33 332 44 282 614 35 289.60 31 63.55 CHANGED sNPhas.uphtstst..t.....ushssttpplA.phPouhWl.sphstsss...........ltstLsshhupttsusp...sshhVlYslPsRDCsAhASsG....ussuhspY+s.YIDsIsstl...................tassh+sllllEPDoLuNhlTs......ssspC....t.ss..pstahpslsYA.lppLtshs.NVthYlD........AGHuuWLG..ss...uAplhuphhpsAGss...plRGhuoNVuNYssh.............................hDEhsYtp.................slsstLsstGh.ssc...................................................FllDTuRNGhss.........................p.thGsWC..NssGs.GhGtpPosss...usshlDAalWlKssGESDGs.......ssssustaDt ............................................t.h....s...........t.....ss.ssthppl.u.shPsuhWl..sph...s.ss...........ltstlsshh.ststssst...hshlVlYslPsRDC...u.ht.S.s.G..........ussshspY+s..aIDslAshl.......................us..hpsllll.....EPDuLsph..................hspC...........t.ss...pst..hhp.tlpYA.lppL.t.shs.sstsYlDAGH.uuWL..............uAph.huphlpssGss........ps+GFusNVuNapsh.........................................-EhsYsp..................s.lsshl.st.th..ssp..........................................................allDTSR.NGhus.............................t..usW..C.........NssGtulGt.Posss....ssshlDAalWlKsPGESDGss....ssttu.h...t......................................................................... 1 93 195 260 +2763 PF03443 Glyco_hydro_61 Glycosyl hydrolase family 61 Bateman A, Eberhardt R anon CAZY Family Although weak endoglucanase activity has been demonstrated in several members of this family [1-3], they lack the clustered conserved catalytic acidic amino acids present in most glycoside hydrolases. Many members of this family lack measurable cellulase activity on their own, but enhance the activity of other cellulolytic enzymes. They are therefore unlikely to be true glycoside hydrolases [4]. 27.00 27.00 27.50 27.70 26.30 25.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.53 0.70 -4.52 55 827 2010-01-08 16:05:35 2003-04-07 12:59:11 9 12 88 14 728 841 1 202.70 30 69.52 CHANGED HuhVsslhl..sGspYsuapssth.hts.........s.st..ssssG.l..sshsSsDIh..................Cpp.......su..ssussps.sVsAGsplshp.Ws...............aspSH...+GPl......lsYLApC..sss.Csoss..pouhpaFKIspsG...hssuops..........Wus..........DpLI.sssssaslpIPsslusGpYlLRHElIALHuAu........sts........GAQ.YPpChplpVTGuGoss............Pu.G...VshsuhYpsTDPGIhh..sIYps.s......s.Ysl ........................................................................Hhhhtthhl..sG....tth.......................................ssusl........ps...h.s.os-lt......................Cst.................su.....ss..u..s...s..p.s.....sVtAG.spl.shphs...................................hs...s....H....G.Pl...................hsYhucs.....sss...ssshs....ssuhtWFKItptG..............hss..ss.t.................Wu.s....................t..h.....t.....s.....s..s...p...hshp........IPpslssGpY...LlR.tEh...IA.......LHsAt........p....s..........GAQaY...pCsQlpV.o.G.u.Gsss.............Pu...s................Vp.h.P.G........hYp.s.sD.P.G.lh..h......slYts.......sY....................... 0 249 456 651 +2764 PF03664 Glyco_hydro_62 Glycosyl hydrolase family 62 Finn RD anon CAZY Family Family of alpha -L-arabinofuranosidase (EC 3.2.1.55). This enzyme hydrolysed aryl alpha-L-arabinofuranosides and cleaves arabinosyl side chains from arabinoxylan and arabinan. 20.20 20.20 20.90 20.40 20.10 19.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.99 0.70 -5.27 5 187 2012-10-02 00:26:57 2003-04-07 12:59:11 8 20 99 0 116 198 2 263.70 55 59.08 CHANGED CALPSoY+WTSTGALApPKu.GWAuLKDFTsVsaNGKHIVYASsaDTuosYGSMuFGPFoDWS-MASASQTAMSto....AVAPTLFYFAPKNIWVLAYQWGuooFSYRTSSDPTNPNGWSAsQsLFoGpI..SGSuTGAIDQTVIGDDTNMYLFFAGDNGKIYRSSMPIuNFPGSFGopYolIMSDoTsN....LFEAVQVYTVDG...QN+YLMIVEAhGSuG.RYFRSFTAoSLsGsWTPQAATESNPFAGKANSGATWTsDISHGDLVRNNPDQTMTVDPCN ...............CsLPSoY+WoS.T..GsLAp..PKs....G..W..suLKDFTsVsYNGpaLVYAost....s....u.u...s...aG.SMsFusFosWSphuSAsQs.....thspu.......sVAPoLFYFAPKslWlLsYQ.WG.s.s.sFuY+TSoDPoNPNGWSusQsLF.o...G...o...I.....ssu..ssG...s...ID.sl.IuDspNMYLFFAG..DN..G.+IYRuoMPluNFPGuF.Gossssl.hSD.o..p.sN....LFEAspV...Y+lpG...ps..p..YLMIVEAIG..u..sG.....RYFRSF.TAoSLsGsWTPpA..uoE..o.NPFAG.....KAN....S....G.....A....o.WTsDISHG-LlRs.ssDQThTlDPCN............................................................................... 0 30 74 107 +2765 PF03633 Glyco_hydro_65C Glyco_hydro_65c; Glycosyl hydrolase family 65, C-terminal domain Finn RD anon Pfam-B_3470 (release 7.0) Family This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. The C-terminal domain forms a two layered jelly roll motif. This domain is situated at the base of the catalytic domain, however its function remains unknown [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.48 0.72 -3.88 18 1485 2010-01-08 16:08:30 2003-04-07 12:59:11 10 16 1123 2 318 1157 33 52.00 27 6.46 CHANGED PhLPctast.luF+.lhaRGph.lplplsppplplplhpGp..sLslclhGcclsLcs ......PtLPcpWpp..luFp.lha+Gph.LpVpls..p..p.p..lslph...pu............sl.slpl.Gp.hpl..s..................... 0 86 198 275 +2766 PF03632 Glyco_hydro_65m Glycosyl hydrolase family 65 central catalytic domain Finn RD anon Pfam-B_3470 (release 7.0) Family This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. The central domain is the catalytic domain, which binds a phosphate ion that is proximal the the highly conserved Glu. The arrangement of the phosphate and the glutamate is thought to cause nucleophilic attack on the anomeric carbon atom [1]. The catalytic domain also forms the majority of the dimerisation interface. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.27 0.70 -5.78 16 1977 2012-10-03 02:33:51 2003-04-07 12:59:11 10 29 1435 2 510 1594 109 362.80 34 46.01 CHANGED Rhsl......FaLhpshss.......p-....scLs...........lus+GLoG-u.....YtGtsFWDTEhaslPhhl.hstPclA+sLLpYRhppLsuA+..........csA+phGhp.GAhaPWpoup...GpEso..........s.cht..ss.ppphHlsusIAaA.lhpYhpsTG..DppahpcpGh-lll-sA+FWsu+schspp.stapIcsVsGPDEYc........sslssNsYTNhhAtasLphAhchhcphspt..........htlppp-hcpWc-l...ucphalPhccp.GV.......h.Qa-Gahcht.hshtt...h..t.h...h..chthspl.ptphlKQADllhhhYh............htttastcphtpN....h-aYts+Ts+-S.oLSsslaullAAchsch...spAh-aa.pcusc.................lDl.cs.sss.Tp-Gl.HlsuhuGshpsllpGFuGh...+hc..........sspL .......................................................RaslapLhtshss........p-.......tchs.............Iu..sKGL....T....GEu.......YtGHsFWDT.E.ha.hlPhal.hsp..P.p.lA+.s.LLpY..RappL.stA.p................cpA.......p.......p...............G.......h..........p..G..AhaPWpoh..p....G...p...Eso.............s.pht...hu..ttphHlsusIAaA.lhp.Yh.psTG..D....psal.t.c.p.....G.h.c.lLlEsA+FWss....t.s.p....h....s.....p....t..............s..c..apI.cs.VhGPDEYp...........ssVsNN.s.YTNhhAtasl...ph...Ah....phhpp..hspp.................thth.s..p.t......-htp..W.pch.........scphhlP.h...s......p......p........Gl........hsQpDuFhsht..h.s.hsp.....h.spph......l..c.h.ph..tpl.chphlKQADVlhhhYh..............hs.cpF..ot..p.p.ttpN....hcaYEs.hT..s..H..-..S...SLShslaullAA.cl.Gh.......pp.Ahpha.pcusp......................lDL....s....s..s.....p.s.....sp.....-.....G..l..H.h.....suhuGsWhulltGFuGh...csp.pst....................................... 0 155 305 432 +2767 PF03636 Glyco_hydro_65N Glyco_hydro_65n; Glycosyl hydrolase family 65, N-terminal domain Finn RD anon Pfam-B_3470 (release 7.0) Family This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. This domain is believed to be essential for catalytic activity [1] although its precise function remains unknown. 18.80 18.80 18.80 18.90 18.70 18.70 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.62 0.70 -5.07 14 1768 2012-10-02 23:57:29 2003-04-07 12:59:11 10 18 1343 2 416 1391 80 251.00 25 31.06 CHANGED plppsphshpp.p+h.pESlhulGNGYhGhR........Gs.....h-Eshss...............ph.GoY..luGla............phsp..........lu....Ghsp.hpcllN.hsNahtlcl.hlsGcs.hDlst...sclpsappsLDh+pGlLpRphphp..s.ps.lclpscRFlShspppLuulchplp......sl-sps.clpltshlDusVsNcs...p...thhphhsp........chpsss.shlhscT..hsss......htsshsspphs.tsspt.h...sphp.sppcphlspphpsplp.spphplp.KhVslso.ScDh ...................................................l.ppphs.cp...hph..tESLhulGNGYlGhR.................Gs......hEEs.h.s..t............................................................................ph.tGsY...luG...la...................p..t.tp...................hu.........Gh..sc.....hpc.llN...sPshhshcl...tl....s.......s...-.......hsLsp........splpsapppLDh+p..............GhLpRphhaps.s...s.....G............pt...lplsscRFlShsphplsslc.hplp.................slss...s.s....plplpotl.......D...u..s.pNps.......................................hhptlsp...................p.s...t.spp....hhlh..hpT....ppss..............h..s.lsh.....uss.p..........h......t...s.........s.p....................t..............h.p.t.p....p......p.tltpp....hp.spl.p.tGps.h.slp.KhVhlts..Sc-................................................................................................ 2 125 264 361 +2768 PF02435 Glyco_hydro_68 Levansucrase/Invertase Mian N, Bateman A anon Pfam-B_2011 (release 5.4) Family This Pfam family consists of the glycosyl hydrolase 68 family, including several bacterial levansucrase enzymes, and invertase from zymomonas. 19.90 19.90 19.90 21.30 19.80 19.60 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.46 0.70 -5.77 11 358 2012-10-02 00:26:57 2003-04-07 12:59:11 11 10 223 29 68 411 121 335.70 36 80.16 CHANGED hps.s....sophTRADhLKlspp.p.ss...........Th.sIsushsl.ss.....-l.VWDoWPLp-hc.GsVssasGapllFuLsuD.p.....................Rpscs+IthaYp+sGc.........sWhhuG+lht-uss.........spopEWSGoshlss..cGslcLFYTs..h........sssu..pulsstshshhhsD-psVplcGhpps+sLF.p.uDGhhYQshtQs.h.................asFRDPHsh.D..pcG+pYhlFEuNsusEc.........spsshGts-hhsls..ssh.ss.....ssApassGuIGIsphpDspt..o.hclhsPLlTussVsDpsERPpVlh.sGKaYLFThS+tsphu........DGlsGsDsVhhhhusstLpGPYcPlN.uSGLVLsss...............sp.s..hpTYSHashPs...G.ssshhshhsspst...hctsuThAPoltlplpGscohlscshs.......G.IP .................................................................hohtshhpl.t....ps............s....ls.sh.s.h.sp.............pl.lWDohPLpphc...up.hsh....pG..apllhsLsus.p.....................ppscsplhhhYp+.hG.p.............sWh...uGp..lh.pssp......................................pspEWuGoshl............p..tsplpLaYTs.........................p.....thhst...hsh.h.sss.tl....hpshpp.+.lh.p.sD.....GhhYQs...tQ............................................as..hRDP..h.D.....sG.phYhlF.E..uNsus.p...............s.t.hsts.p.h.h.....th.p.....................ttA.htsuslGlhhhps.............p.hchhsPLloussVsDphERPphhh.s....GKaYLFT.S+t.phs........suls.u.s-sh..hhhhspt.lhusYhPhN.soGLVLsss.................t.s..htoYSahsh..Pt.......................u....s.shhs..............h.......s..t...............................................phtuT.APohhl.lpus.p.o.h............G.h........................................................ 0 18 35 50 +2769 PF00840 Glyco_hydro_7 glycosyl_hydr13; Glycosyl hydrolase family 7 Bateman A anon Pfam-B_1478 (release 2.1) Domain \N 19.60 19.60 37.50 19.70 17.30 19.10 hmmbuild -o /dev/null HMM SEED 433 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.81 0.70 -6.11 18 5196 2012-10-02 19:29:29 2003-04-07 12:59:11 15 7 239 68 226 5216 4 183.50 64 97.72 CHANGED QsGT.TsEsHPpLTWp+CTsuGs..CsshsupVVlDANWRWhHpssGtT...NCYoGNpWsoolCPDspTCApNCsLDGADY..suTYGlTTSGsoLoLpFVTpss...tpNlGSRlYLMssDoc.YpMFpLLspEFTFDVDlSpLPCGLNGALYFssMDADGGhu+assN+AGAKYGTGYCDuQCPRDlKFINGpANlEGW.sSssssNsGhGshGoCCsEMDIWEANShusAaTPHPCsssu.QptCsGssCG.s.s....RauGhCDsDGCDFNsYRhGspsFYGsG..hTVDTo+pFTVVTQFlssss...GsLsEI+RaYVQNGpVIpNussslsGlsssssITcsFCsupKssFG-ps.Fsc+GGLspMGcALupGMVLVMSlWDDauuNMLWLDSsYPsssss.tPGstRGoCssTSGsPupVEussPsupVsFSNIKFGsIG.STa .................................................................................................................................................Y.T.G..N.s.Ws.sTlC...P..D..s..t..o.CA.pNCA.lD.GA...D...Y....sGT.YGI..T.T........S..G.N.u.Lo.L+FVT......pup......uoNl..GSRl..YLM..p..s....D.op...Yp....MFpL.l.N.p......EFTF...DVDVSpLPCGLNG..AL.YFV.pM...DuDGGhuK...a.s.sN......KA..GAKY..G...T......G...YCDuQ........CP+DlKFI....N....G......p..........A..N..lcGWss.Ss.s.DsNAGsGpaGoC.CsEMDIW.......................................................................................................................................................................................................................................................................................................................................................................................... 0 86 145 203 +2770 PF02324 Glyco_hydro_70 Gluco_S_transf; Glycosyl hydrolase family 70 Bashton M, Bateman A anon Pfam-B_965 (release 5.2) Family Members of this family belong to glycosyl hydrolase family 70 [1] Glucosyltransferases or sucrose 6-glycosyl transferases (GTF-S) catalyse the transfer of D-glucopyramnosyl units from sucrose onto acceptor molecules [2], EC:2.4.1.5. This family roughly corresponds to the N-terminal catalytic domain of the enzyme. Members of this family also contain the Putative cell wall binding domain Pfam:PF01473, which corresponds with the C-terminal glucan-binding domain. 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null HMM SEED 809 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.63 0.70 -13.54 0.70 -6.77 12 260 2012-10-03 05:44:19 2003-04-07 12:59:11 11 22 109 32 37 910 3 705.80 50 54.62 CHANGED LLMsWWPDKpTQlsYLNYMsptshhs..ss..aospssQtpLstAApplQtpIEpKIupptsTpWLRpshssFVKTQPpWN.pSEs....stcDHLQsGALLasN.SshTsaANScYRLLNRTPTsQTGp.t.+hpt...s....GGaEhLLANDlDNSNPVVQAEQLNWLHYlMNaGoIsusDs-..ANFDGlRVDAVDNVsADLLQIAuDYhKAtYGVccs-tsAhpHLSILEAWScNDs....Ys+DpssspLsMDNhhRLuLlaoLs+.................s.shRushpshIs.............suLssRos-sppspshsNYsFlRAHDSEVQolIAcII+cpINPsssGhoh..ThD-lKpAFcIYNpDhtpuDKKYTpaNlPsAYAlhLoNKDolsRVYYGDhYTDDGQYMApKSPYYDAI-sLLKARIKYVAGGQsMps.h..sss.....s........lLTSVRYGKGA.pAoDs.GstpTRspGhuVlsuNpPsL+LssscplslNMGAAHKNQsYRPLLLoTpDGlssYhsDu-A..tthV+hTDspGpLsFsAs-ItGhpNsQVSGYLAVWVPVGAu-sQDsRssuSspsss-G.placSsAALDSQVIYEGFSNFQsFss..psspYTNtlIApNssLFKsWGlTSFEhAPQYVSSpDGoFLDSlIQNGYAFoDRYDluMSKsNKYGot-DLhcAlKALHupGIpsIADWVPDQIYNLPGcEVVTATRsssaGchpssupIcpoLYVupo+osGpDYQupYGGAFL-ELptpYPplFpppQISTGpshDsS.KIppWuAKYFNGoNI.GRGAtYVLpD.uospYasls...sstsFLPKpLssppups.GFspDGp ...........................................................LLMsWWPsKpTQlsYLNYMsp.shhs.........ss....aospssQstLstAuptlQhpIEp+Iu.tp.tsTpWL+phh.......ssFVcTQspWNhpoEs..........sspD.HLQGGALlasN.ss.......h.......Ts.ANS.cYRLLNRTPTsQsGp...phh...sp....GGaEaLLANDlDNSNPlVQAEQLNWLaYlMNaGoIstsDss..ANFDGlRVDAVDNVsADLLQIsuDYF+utYtlsps-tpA.pHlSILEuWSpNDs...tYsc-pssspLshDsth+huLlauLs+.................s.s.R...u......s..lpshls.....................ss..Lss.Rs.p-.s.ppspshsNYsFlRAHDSE...VQslIup..I......Ipcpl..s.s.s.s.sGhoh.....Th-plppAFcIYNtD.ppsc..KcYTpYNlPuuYAlhLoNKDolsRVYYGDLYTDDGQYMtpKSPYaDAIssLL+uRlKYVuGGQsMps.....sp....p...............tlLTSVRYGpGshsAsDp....Gs.s.pT.RTpGhullhuNsPsLpLss.cplslsMGsAHtNQtYRslLLoTpsGlstY.sDpss.....h+hTDspG.LhF............s............ss.............-............l............tG........h..pNspVSGYLuVWVPVGAsssQDsRssu.Ss.pts.ssG.psacSsAALDSQlIYE.GFSNFQsass..psspYTNhhIApNs..s..LF+pWGlTSFEhAPQYsSSpDs.....o.FLDSlIpN..GYA....F..sD..RY..Dl...u.......h.......u.p......s.....sK..Y.G.oh--LhsAl+ALHtsGIpshADWVPDQlYsLPGcElVTATRsssaG..p....h..ts.......u.IpppL...Ysspo+usGp.YQupYGGtFL-cLptpYPplFpph..................t.......IS.............oG..p.hsss.KIppWSAKYFNGoNI.G+GuhYVLp.D...uos..pYasls...sst.h...LPptLhsp..s.s..GF..p............................................................. 0 6 14 28 +2771 PF03659 Glyco_hydro_71 Glycosyl hydrolase family 71 Finn RD anon CAZY Family Family of alpha-1,3-glucanases. 25.30 25.30 25.30 25.30 25.00 25.20 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.56 0.70 -6.00 53 358 2010-01-08 16:12:28 2003-04-07 12:59:11 9 18 107 0 264 391 4 340.70 30 59.90 CHANGED hVFAHFM...lG.stsh.ossDWcsDhptAptusIDAFALNlu.....sDsaosppLshAapuAs.....pss..FKlFlSFDa....uhWst....spVhshlppYuspsAQhp.hs.GKshVSTFtG.tsuh.........DW.s...sl+susG........hFFlPsapshustshssh....hhDGhhsWtA.WPs...susshssss.........DpsYhpsL.............uKPYhh................PVSPWFaTphss.....asKN...WlapuDs.LWasRWpQlLpL..pP......c..aVEIloWNDYGESHYIG.....Plps........sht......scustp.aspsM...PHDGWRphhs.aIss..........YKsGtt...sshlsp...............-pllhWYRssP.usuC.sussTo.u....Nssstt..thpPs..-hhpDcl..FhsuLLpu.sAslpVo..uGsssphsas.........ussGl.phsVPh....s..sGp.phslsR.sGpslhsssG ............................................................................................VhsHah...lu....s.........sh...s.scatpDhphApt.ttIDuFALNh...u.......-.s.hp......s....ppls...Aa.puAt............phs......FKlFhSFDh.......s..hst...........splhphlppY.....sspsu.hp....h..s......u..+.shVSTF.t.G....t.su.........................................s.W.t.......sl+...p...t...s...u....................haFl.....Ps...apshs....s..t.....th.th...........hhDG....hasWt....u...WP......s.sp...s.....h.ssts................................................Dtta.hphL..............su+s..YMh................PlS.PWFa.Tp.hss.................as.KN...Wl..a..tu-s...LahpR..Wppllp.h..........pP........p..hVpIloWN.................DYGESHYIG.....Plts..th.............stus.p.as..t..s..h....s.........HsuWhphh..aIshYKsG........shlpp..................................................-tlh..hWYR.ps..s.ss.ss.ssos.s..............tPt..phhpDpl..ahsshLpp...suslp.lp...uuss.t..thp.........sssG.h.thplsh....s........sGt....hpl.R.sst.lhp..................................................................... 0 88 143 218 +2772 PF03662 Glyco_hydro_79n Glycosyl hydrolase family 79, N-terminal domain Finn RD anon CAZY Domain Family of endo-beta-N-glucuronidase, or heparanase. Heparan sulfate proteoglycans (HSPGs) play a key role in the self- assembly, insolubility and barrier properties of basement membranes and extracellular matrices. Hence, cleavage of heparan sulfate (HS) affects the integrity and functional state of tissues and thereby fundamental normal and pathological phenomena involving cell migration and response to changes in the extracellular micro-environment. Heparanase degrades HS at specific intra-chain sites. The enzyme is synthesised as a latent approximately 65 kDa protein that is processed at the N-terminus into a highly active approximately 50 kDa form. Experimental evidence suggests that heparanase may facilitate both tumour cell invasion and neovascularization, both critical steps in cancer progression. The enzyme is also involved in cell migration associated with inflammation and autoimmunity [1]. 20.50 20.50 20.70 20.70 19.70 20.40 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.93 0.70 -5.81 9 293 2012-10-03 05:44:19 2003-04-07 12:59:11 9 9 99 3 160 323 6 232.70 31 49.33 CHANGED sccuolslpGpstIApTDEsFlCATLDWWPP-KCsYspCsWGhASlLNLDLss.ILhNAIKAFsPL+lRlGGSLQDpllY-sGc.cpPC.ssFpKsoutLFGFopGCLshcRWDELNsFFppTG...................................AhlsFGLNALpGRsh...hsc.........u...shsGsWDaoNApshIcYTlSKGYs.IcuWELGNELSGSGVGA+VuAcpYApDsIsL+sllpplYpss.tspPlllAPGGFFDtpWaTEhLpposss.lcVlTHHIYNLGPGsDs+LIcKILDPSYLDt.upT.FpslpphlpptGohAsAWVGEuGGAYNSGtchVSsoFl.SF...WYLDQLG ..................................................................................................................................................s.......................................................................................................................................................h......hsh.....p.p..hD.L.sFhppoG...................................hpllFGLN.ALhtp..........................sstWssoNAtthlc..Ys.ssKtYs....I.uWEL.......G.N..E....s....u....p.t..h..s.........h..t..lsu....pQhucDhhpL+p.ll.p..p..h..a...pp....s........t....P.l..htP....t.s.......h......s...............p....hh........p.p....hlpt...uGt.....l-slT.aHh.Y..........l.ss....t....t...........th.h..pc.....hLsPp....h.LD.p...h..pp...hp....pl...phl.p.p...hss......t.t.....tsWlGEs.....uuAasuGt..lSssFssuF...haLDpLG........................................ 0 43 82 120 +2773 PF01270 Glyco_hydro_8 Glycosyl_hydr20; Glycosyl hydrolases family 8 Finn RD, Bateman A anon Prosite Domain \N 17.50 17.50 18.70 17.50 17.00 16.00 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.11 0.70 -5.72 7 1235 2012-10-03 02:33:51 2003-04-07 12:59:11 12 23 1064 33 227 966 30 329.70 32 84.54 CHANGED hshshtsshshshhsshssstsutss....tW-paKucalpssG.......RlhcsusuNhsps......EGQGYGMLhAVhhs.....-pshFDsLapascspLpp..hsstLhuW+hssstps.hts...ssATDGDh.IAauLLhAtKpWt.st+hshhp-Ahshhht.hhshhs..sG...sLhPGshGhsps..hthNPSYahhPsht.tFhshTusstWtslh-cshpllspht....upsGLssDWlshs...sssu.t.th-.......as.phShsslRlsh.hhhhs.....spsshlssaht.....hhpshhcshp.shssluostsusas..hssGhhAspssshtp.lss.....thsph.suu......csYausolpLhshlhpst ....................................................................................t.s.tt.ts..s...........tWcpaKpcal.s..ppG..................RVlDs...u..s..u...pp...s.o..S........EGQuYGMhhAlhAs.....D+ssFDplhsW.Tps..p..Lup................hp...p.....p.....L.......AWha.......u..p..........c................p..s..s....phps...........sNoAoDGDlahAauLLcAu..+h.....Wp....p..t..p...Y..s.......hup.Alh.h..........ths.....t.....c.s.............s.......s........l.....s....hG...................s............h..........L.............LP...G..p......h....G........F.....s.....p....s.......s........t....h..c...............h.......N.....P..........SYhs....P....p....lt....p....a.a....s.....t....h.......u....s....s..........W.p.s.ltcssh.c...lLh.csu............s...pGhsP.D..WVthc......tstu.h..p.......t..sp..................hshhu.SYsAl.Rl....h....h.....ahu.....hh..s.....c.......s...s.........pp...u....c...h...ls.pFts..........hssh....s...t.....s..G.h..s.s.p.t...ss.....l...s......o...s..p...s.....t...s...hs..........P....s..G..h...u...A.s...h........s.....h....h....p...s.h.ss.p................ph.s..t...h.sts............csYYs.sL.sLhu.hh.......................................................................................................................................................................................... 1 80 127 172 +2774 PF03639 Glyco_hydro_81 Glycosyl hydrolase family 81 Finn RD anon CAZY Family Family of eukaryotic beta-1,3-glucanases. Within the Aspergillus fumigatus protein Swiss:Q9UVV0 two perfectly conserved Glu residues (E550 or E554) have been proposed as putative nucleophiles of the active site of the Engl1 endoglucanase, while the proton donor would be D475. The endo-beta-1,3-glucanase activity is essential for efficient spore release [1]. 25.60 25.60 25.90 25.90 25.40 25.50 hmmbuild -o /dev/null HMM SEED 695 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.17 0.70 -6.52 5 477 2010-01-08 16:09:34 2003-04-07 12:59:11 8 36 230 0 313 469 76 526.10 26 69.41 CHANGED sPshsc+ShhPsPsphhusDpsssPLpTNKFYsNhhLsss-QPpasHPYSIp.sssSS.aGLAISHsosSQpsah..ssossspalFNPsGltshVFoA+pFsSusslsl-ppchpphpssLSsossoSsalchPLVpGMGFlTulY+.sLsFsls.SSIsFSTIhu.pSlShupuhsKYsIpLsNNpsWLlYAoSPs...spsFsLTlsusSoIpsSsuFoGLIlQIsVlPs-o..NsY.EslhDtuAGsYsssscLsuspsDupsscYcFsasstGYot.GssLMaALPHHhpSFos.-sQsphluptLsSTlcGlMsGYlTcSWsL.csplspplua-PVSlohsSpt.sYSc-uLpcItuAsspDVNsssSsAESslsS.YFhGKlIA+YAplsLlAcEIlaD-slTKpsLpplshAhshLsuNpQshPLlYDsKWsGlISou..GSpSSpADFGNoYYNDHHFHaGYalYAsAVIuhlDP........oWucc.NRcaVpoLlRDa.....ussucSDoYFPphRsFDWFsGHSWAuGLaEsGDGKNEESTSEDVNuhYAsKLWGLshGDo+LlspAsLhLolh+cAhpsYahhcsssoVpPc-FlG.N+VoGILFsNKlDauTYFGscEa..hhpGIHhlPlTPlSuhlRSs.oFVKQ-WNtKlsPII.-oVu-GWKGILauNpALYDPcsAYcpFus.ssFDsuNpLDNGhShTWaLAh .........................................................................................................................................................................................................................................................................................s..h....Tstah....thh......h......t..t...........h..Ph..h..................................................................h.h.................................................h.....................t..h....................h..........................................................................h...lhpG.saho.............................h....h......h...................................................................p.a.h..h...s...t......ahhY...........................................h.............t...........h...s.t.....h..G.h.hpls......h............t........t........................t......h..h.D...t.ss.h.a...s....htsp...h.........p.....h.p.................t............t.........hpa..p.......h.thtt.t.....G................t.....t.....h.l.h.hA.h.PHHhp.h....................s.........t...t.t...t...........h...s............h.sohcG.hhuh..h.......s....s.......p......h.h.p.................................h...........P.h.............................p...............ths.t.....t..t.h.t.....lht.h.h.p-.l....p...t.......tt..ss.s...o...Ya.GKhls+hA.lhhhs...p.............p....l....h...t...........s..p...s.....h.s....p...........h..h..........hh............p.t.h.hthhh.......s..........s......p......t......t.......s.....h.h.YDstWtGl.lop................................................s........t........hDFG.s.s.h.YNDHHFHYGYalhuuAllupl....Ds.......................sW.....h..pp..........p....s....h...lp...LlRDh...................u.N.s..s..p....s.DshFPhhRsFDhatG.HSWAp.G....l.....h.....t..s..D..........G+spESoSEshp........hhYu.htlaG........h......s.......h.......Gs........p......phpshGsh.h.......hsl.hpuhppYa................h.h..p..s...s.........s........p.....h..................P..tpa.ht...N+...V.....s.G.....llatsKhsasT............a...Fu...s..t.......hltG........Ip.hlPl.hs.hothl..........ts.t....pa.Vppph..........p..........h......h....t....t...h....h....p......t.....h..t...s..s.....Wpul.lhh..uhh.-.tsu...hphhtt......h.t................h.......................................................... 0 91 195 285 +2775 PF00759 Glyco_hydro_9 glycosyl_hydr12; Glycosyl hydrolase family 9 Bateman A anon Pfam-B_843 (release 2.1) Domain \N 20.20 20.20 20.30 20.30 19.20 20.10 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.65 0.70 -5.59 37 1983 2012-10-03 02:33:51 2003-04-07 12:59:11 14 98 574 36 722 2026 39 381.80 25 67.64 CHANGED sYp-ALpKulhFaEsQRSG+L.PsspR..........hsWRsDSGLpDGss...............h.sl.......DLoGGaYDAGDpVKFshPMAaosThLuWushEatsth................tpusplsphhcsI+WuTDYhLKspsu.ss......hasQVGD.....GssD........Hph....WttsEsMshsRssa+lstssPGS-luuEsAAALAAASlVF+ss.DssYuppLLppA+plasF..AcpaRutY...Ssuh.ss.ussaYsShSGapDELhWuAAWLahATs-.....soYLshhps.stphtsts.................sasWDsphsGuplLLA+h.h......p....hppa+sp..s-palsthhss..s.sp...sphTPGGLhah.pWus..LpYssssuFLhhsYuch............s.hssssphpshAcpQlDYlLGsNPhphSYlVGaGsp.PppsHHRs.s.....s+.......suapshhssssPp.phLhGAlVGG..Pss.....................pD.sasDcRssYspsEsAsshNAuhlGsLA ...............................................................................................hh..shha.a.hp+.....s..................G..............................a.tts.u...sh..Dtt.................................................................t.............DlsGGaaDA..G..D..hs..K.........ashs..huh...........o...........s.s.......h.......L....sash.hEhttth.........................tss..s...p..h......s.....c.....hh..ctl+WshDahl+hpst...t.......................hh.h.p.l.....us................ut....sc..............Hth..................Wt.ts..p.....s........h...........s..........h........t.........c......s....s...h......p.....h.....s........t.........p....................u..........o.......s......h...s.u.p.h...A.A....u....h.At..u........u..h......la....+........s..................h......-....s.....s...a..........u...p.p................hLptAc.........p.....hap.a......A...p.....p......h....t...s...h.....................ts..s..............s........s..........t..s.h..Y........ss....s....s...ht.....D.........E......h............hW........AA.s.La.h.u...T...t..p...................pp....Yhphhtp.....h...tt...ht.h.t.t.......................................hsW.sst...h..s..s..st..........h.h..........h...uph......h...................t.................hptht..pt..............s..p..t........h..........hs.thh.................................tt......h.......hs.............s...........t.......Gh.........h....h....h..........h.us....hphs...s...s..s...h.h..l..h..h.ha..s.............................................................................hhtspphh..p.h.A.p.p..p.......l.....sYlLGp........Ns..h...t.h...Sa...h..s...G...a..........G...t....p.........P.............p.p...H..HRh.......................................th.t.h....t......s................Gs....lsGG....P.s.t...............................................tc..ta......D.....t.....h...t.....sa....s..Essh.hN.Ashhhhh.................................................................................................... 0 310 549 663 +2776 PF03808 Glyco_tran_WecB Glycosyl transferase WecB/TagA/CpsF family TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.40 25.40 25.40 32.20 25.30 25.30 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.77 0.71 -4.86 167 2497 2010-01-08 16:18:29 2003-04-07 12:59:11 8 24 2143 0 522 1664 434 170.50 35 65.78 CHANGED phlppAcllhsDGhullhuu+hh.u..tsl.........cRlsGsDlh.tLhp.tsspp.sh..plaLlGup.slhcpsspplpppaPslplsGhpsG...ha.........s.t-p.ppllp...pIpp.ups-llhVuhGsP+QEhaltchtpp...lsss.lhhulGusaDhhuGp.hcRAPpahp+huLEWlaRLhpEP...pRlh+c ..............................p.hlppA-hlhsDGhulVhus+hh...s.....psl.............pRVs.GhDLhppLhp..huspc...sh.........pVFLlGuc.s......-...........V......lp.psttpLppp...a.s.slsl....sGtpc.G..YF............p.cccpslhc....cIpt.uts.cllhVuhGs.PKQE.haltcpcpt........hsss...lhhGVGGoaDVhuGp..lKRA.PphhpphsLEWLYRLlppP...pRltRp........................ 0 167 342 432 +2777 PF01531 Glyco_transf_11 Glycosyl transferase family 11 Bateman A anon Pfam-B_935 (release 4.0) Family This family contains several fucosyl transferase enzymes. 21.50 21.50 21.70 21.60 20.80 21.20 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.12 0.70 -5.43 5 654 2010-01-08 14:30:42 2003-04-07 12:59:11 11 14 365 0 223 638 1259 248.30 22 78.41 CHANGED sVSllh+spptlsLh.AsWAh......Pshss.s.p+hsu.hcGhaTlslNGRLGNQMGpYATLhALAp..hNGRhAFIPsuMHusLAPlFRIoLPVLpscsupRpPWpNaHLsDWMcE-YcclsGcal+hoGaPCSWTFYHH.LRQpIcpEFTLHDHLREEAQ..shLRsLpls.h.GsRPSTFVGVHVRRGDYVcVMPKsWKGVVuDpsYLppAlDcFRARasuslFVVTSDDM-WCKKNIcsSpGDVsFAGcG.puSPuKDFALLoQCNHTIlTlGTFGaWAAYLoGGDTlYLANaThPDSpFhslFKPEAAaLPEWlGIs .....................................................................................................................................................................h............................................................................................................................................................................................................................................................................................................................................................................................................h......................t......h...a...h....t......hhp.p....h.............h..php...s.....pl......p.......pt..hp.................th.h...p.t..h..p..ht.........h.....p..s.....p..s....h...V..u.lHlRRG.D..Y.lpsh.......t...........h..t..s.hss...h.s.......Y...h...p..pA....l...c.h....h.....p...................t.....+.......h....p....s......s........h....F...hlh...S....s...-...hp.W..........s....+....c..........s.....l.......s.....h.....t........p..........s.....p....s.......h....h.sss.......................tss..s.hp...Dhh.L.h...o.p..C...pasIh..s....s..STFuaWu.Aa.L.s......t..s..s.p...l...h.l...h.s..p.h.....t..p.................t.h..tW............................................... 0 67 103 167 +2778 PF01793 Glyco_transf_15 Glycolipid 2-alpha-mannosyltransferase Bashton M, Bateman A anon Pfam-B_1324 (release 4.2) Family This is a family of alpha-1,2 mannosyl-transferases involved in N-linked and O-linked glycosylation of proteins. Some of the enzymes in this family have been shown to be involved in O- and N-linked glycan modifications in the Golgi [1]. 25.00 25.00 37.10 25.60 23.30 23.10 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.12 0.70 -5.49 107 719 2010-01-08 14:40:35 2003-04-07 12:59:11 11 10 156 6 528 697 49 307.20 39 74.37 CHANGED lhh.shhttttp.......................tttttts.tpt.thpth...tpstst.h..tttt...t.............................s+....NAThlsLsR.Np-...L.sllpol+slEcRFN++..apYsWlFLNDcPFo--FKctspshs.Suc...sc.........auhIPcEcWshPsaI...Dp-+hpcshcphtccpIhYGsspSYRpMCRapSGaFa+cslLpcY-aYWRVEPslchaCDl.sYDsF+aMc-ssKpYGFsIulh.E.h.pTIPTLWspsccFh..pppPp...altp.sNhhsalo-..s.....................................................................................supsYNhCHFWSNFEIusLsFaRSc...............s..YpcaF-aLD+sGGFaYERWGDAPVHSIAsuLhLc+ccIHaFcDIGYhHsPa ...............................................................................................tt...............................................................................................+.sAshls.LsR..N....p..-....LtsllpSl+pl..............Ec+FNcp..apYsWVFL.....N..D.tsFs-.-FKctsp.shs....su..p....sp..................aGhI.....P..p.....-.c.Ws.hP.s...aI...Dpp.....p.hpcshp.pht...c..pt.........lhY.G..s...p.SY.+.pMCR.F.S....G................aF..a+H..s.lL.p..p..Y-aYWRV.......E..........Pslcaa.CDl...sYDsF.p.aMpcssKpYGFslulhE..h.pTlPo...LWpp.spcFh...cp..p..sp.......hl....t.p....ss..hhpalo-..c.............................................................................................tupsY..N...hCH....F.........WSN.FEIusLsaaR.Sc...............t..YpcaFpaLDc.sGGFaYER.........W................GDAPVHSIAsuLhLs+sc.I..HaFcDlGYhHss................................................................ 0 147 304 462 +2779 PF03076 GP3 Equine arteritis virus GP3 Griffiths-Jones SR anon Pfam-B_687 (release 6.4) Family This protein is encoded by ORF3 of equine arteritis virus. The function is unknown. 25.00 25.00 291.00 290.80 20.50 17.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.12 0.71 -4.40 3 141 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 2 0 0 140 0 161.00 84 98.16 CHANGED MGCTYSGPAAFLCFFLYFLFIsGSVGSNNTTICMHTTSDTSVHLFYAANVTFPSHFQRHFAAAQDFVVHTGYEYAGVTMLVHLFANLVLTFPSLVNCSHPVsVFANASCVQVVCo+lNSosGLGELSFSFIDEDLRLHIRPTLICWFALLLVHFLPMPRC ...MGpAYstPVAhLCFFhaFLFIsGSVGSNNsTICMHTTSDTSVHLFYAANVTFPSHFQRHFAAAQDFVVHTGYEYAGVTMLVHLFANLVLTFPSLVNCo+PVsVaANASCVQlVCs....p....s.N...So....s....G....LGElSFSFIDEDLRLHIRPTLICWFALLLVHFLPMPRC. 0 0 0 0 +2780 PF04724 Glyco_transf_17 Glyco_tranf_17; Glycosyltransferase family 17 Mifsud W anon Pfam-B_5914 (release 7.5) Family This family represents beta-1,4-mannosyl-glycoprotein beta-1,4-N-acetylglucosaminyltransferase (EC:2.4.1.144). This enzyme transfers the bisecting GlcNAc to the core mannose of complex N-glycans. The addition of this residue is regulated during development and has functional consequences for receptor signalling, cell adhesion, and tumour progression [1,2]. 21.70 21.70 23.20 22.00 21.10 21.60 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.45 0.70 -5.45 12 306 2010-01-08 16:27:10 2003-04-07 12:59:11 8 5 175 0 202 296 1469 291.40 27 74.31 CHANGED hhshS+h+sh.+th....hh.hhhhh.hslhslhh+tpplohhhRPlW-uPPp.FstIPHYatcNhoM.sLCpLHGWthR.-hPRRVaDAllFSsEhDlLplRa+ELhPYVspFVlLESNsTFTGhsKPLhFtp..p+sp.FcFlcs+lsYshlssh.hcpGc...sPFltEuYpRsAL..ctLlRluGlpsDDllIMuDsDEIPStcTIphL+WCDshPplhHLcL+pYLYSF.....................pa.sDspSWRAohHhapsG+TcYtH.RQosplLsD.........uGWHCSFCF+.lp-FhFKhpuYSHsD+sRascY...Ls.cRIQclICpGsDLFDMlP.EEYoFp-lhtKhGPlP.ohSAVHLPuYLLcNh-cY+aLLPGNChRcu .........................................................................hh.....................................................................................h.............h.....t.......s......R+....lhDshhh.s.s..E.lDlL.-lRhpELhshVDhFVl.lE.Ss..h...Tap.G.h...KsL.hFtp....pp...tp..F..p..h..hc...s.Kl....h..Y.h.......h....l..s.........th......................t..up..............ss..a.........t-.s.....a..p.....R...s.....tl................ptl.h....p.....h..t...s........h.....p....s...c.D..lll.huDs.DEIP.s.....cslthL...+h..tc.....shs....p....h....p.hp.h.+..hY...uF................................................a....h.....ss..p....h...p....t.......t......t..h...h..........s...h.p....p.h.....t...........p....p..s.s...t...l..lhp......................uGWHCSaCFp..p.....ph..h.h...Kht...u..h...sHs-.h....p.h.tta.............hs.p.hI.pphlppGt.haDh...p.h.....................P..h....................................................................................................................................................................................... 0 73 120 165 +2781 PF03033 Glyco_transf_28 Glycosyltransferase family 28 N-terminal domain Griffiths-Jones SR, Bateman A anon Pfam-B_1105 (release 6.4) & Pfam-B_2764 (release 7.5) Family The glycosyltransferase family 28 includes monogalactosyldiacylglycerol synthase (Swiss:P93115, EC 2.4.1.46) and UDP-N-acetylglucosamine transferase (Swiss:P74657, EC 2.4.1.-). This N-terminal domain contains the acceptor binding site and likely membrane association site. This family also contains a large number of proteins that probably have quite distinct activities. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.42 0.71 -4.41 43 6160 2012-10-03 16:42:30 2003-04-07 12:59:11 15 40 4548 35 1693 4850 1858 138.00 25 33.17 CHANGED lllsssGTtGcl.PhlAluppLpcpGacVp...luspsshpphl..ppsGlshhs..lsss.....................................thht...hhpthtsshpt..th.thhphlt....phshthsshhussshhush.....htthshhltEp.ulPhhssphhshhssphhh ..............................hllsuGGT.u.G...HlhPsl....Alupp.L....p......p.................p......G.........a..c...lp..........hlGop.....s...s....hE..pp..l.....h.p..p.......G.....l...p.......h.ps........lsss..........................................................................................s.hpt.hh............h...p....t.h..t....s....s.hc.h......h......p..u..h....h.p.....u..t..p....l.......l.....p...c.......h.....p.......s...c....s.....l......h...u.........h..G........G.....a........s....u........s......P....s.h..lAAh...........hhtl.P.s.llHEpsuhsGhsN+hhsphspph..h........................................................................................................................................................................................................ 0 541 1110 1453 +2782 PF04666 Glyco_transf_54 GnT_IV_N; Glyco_transf_55; N-Acetylglucosaminyltransferase-IV (GnT-IV) conserved region Waterfield DI, Finn RD anon Pfam-B_4541 (release 7.5) Family The complex-type of oligosaccharides are synthesised through elongation by glycosyltransferases after trimming of the precursor oligosaccharides transferred to proteins in the endoplasmic reticulum. N-Acetylglucosaminyltransferases (GnTs) take part in the formation of branches in the biosynthesis of complex-type sugar chains. In vertebrates, six GnTs, designated as GnT-I to -VI, which catalyse the transfer of GlcNAc to the core mannose residues of Asn-linked sugar chains, have been identified. GnT-IV (EC:2.4.1.145) catalyses the transfer of GlcNAc from UDP-GlcNAc to the GlcNAc1-2Man1-3 arm of core oligosaccharide [Gn2(22)core oligosaccharide] and forms GlcNAc1-4(GlcNAc1-2)Man1-3 structure on the core oligosaccharide (Gn3(2,4,2)core oligosaccharide). In some members the conserved region occupies all but the very for N-terminal, where there is a signal sequence on all members. For other members the conserved region does not occupy the entire protein but is still to the N-terminus of the protein [1]. 19.90 19.90 19.90 20.00 19.30 19.60 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.72 0.70 -5.68 18 495 2010-01-08 16:26:41 2003-04-07 12:59:11 8 9 124 0 316 424 6 244.80 33 57.08 CHANGED tths..........hpthpsssppl.pplsp...........phhh.hhsH.....hhcctu......slpPslltGpsRssVu.lVlGlPTV+Rp+poYLh-TLpSLlsphot-Epc-sllVValA-oD..salpplspplppcFspclpSGhl-VIusstpaYPshssL+coasDsp-RV+WRoKQNLDYuFLMtYApscGpYYlQLEDDllsp+sFhsshKpFssppsu......pcWhhLEFSpLGFIGKlF+SpDLspLlcFhhMFYp-pPlDWLLsHahhl+sC..tt.s...C.t.hpphhIRa+PSLFQHlGtpSSLpG+.hQ+LKDc-F .........................................................................t.t...........................................................ph....th.hu...s..h..pt..hp...lslGlso....VpR.....p..t..t....s.YLhpTlpSLhppho.pEpp.....chh..ll..V..hlu-............sD....p........a........h.pths...tp...lp...p.Fspc.l.uGhl....lIpss..paY.P.s..h....s..p.l+....p....shsD..s.p.cRs..paR..o.KQ.N.lDYsaLh...a.s..p.s..p.u......t.YYl..L.EDD.lhsp.s.a..hsph....+phhhp...hps............................ppWhhLE.F..SpL.GaI..G.K.......h..a+u..DLshlspFhhh.F.Y.p-hPhDaLLschhh.lhs..............................tp.hlpacPSLFQHhGhhSShtup..p.hp................................................ 1 73 104 181 +2783 PF03414 Glyco_transf_6 Glycosyltransferase family 6 Bateman A anon Pfam-B_4383 (release 6.6) Family \N 25.00 25.00 25.80 25.50 20.70 20.30 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.11 0.70 -5.98 6 613 2012-10-03 05:28:31 2003-04-07 12:59:11 8 8 118 121 171 620 297 246.90 53 92.10 CHANGED htPpVhssphthshWhsthhpsshp........s.phppsctc.ccppptscl.cplp.pshh.s.pRs-VLTlTPWhAPIVWEGTaNpAlL-phYthQplTlGLTVFAlGKYlc.aLccFLESA-+aFMVGH+VhaYlhsDDsuchPhVpLGPhRplpVhEl+scKRWQDISMhRMcTIu-HIht+hpHEVDaLFChDVD.VFpD+aGVETLGpLVApLpshaYtAs.psFTYERRc.SsAYIPhsEGDFYYtuAlFGGosscVhplTptCapuILtDKtNsIEAtWHDESHLNKYFLhpKPoKlLSPEYhWD.+l.GhPuslKpl+hoh.sKpashVRN ........................................................................................................................................................................pluhhhhAh.t.....+Ylt...FLc..hFLEo....AE+aFMVG..HRVp..YY.VFTDpsus.V.P..c..VsLGsGRpLsVlcV.pu.h.p.RWQDlSM+RMEhIu-ahccR.hhpEVDYLhChDVDhc.F.pD+lGV.ElL.......s...s......LhusLHPu.a....Ys..ss.RcsFTYERRPpSpAYIP..psEGDF..YYhGuhFGGoVp-V.+LT+sC+puhhhDpsNuIEAhWHDESHLNK.Yhlh.p.KPo..K.lLSPE.Y........hW.........D..ph.s..ss..l+hh+h.h...K................................... 0 7 24 62 +2784 PF01075 Glyco_transf_9 Heptosyltranf; Glycosyltransferase family 9 (heptosyltransferase) Finn RD, Bateman A anon Pfam-B_839 (release 3.0) Family Members of this family belong to glycosyltransferase family 9 [1]. Lipopolysaccharide is a major component of the outer leaflet of the outer membrane in Gram-negative bacteria. It is composed of three domains; lipid A, Core oligosaccharide and the O-antigen. All of these enzymes transfer heptose to the lipopolysaccharide core. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.29 0.70 -5.16 18 6767 2012-10-03 16:42:30 2003-04-07 12:59:11 12 124 2063 9 1547 5234 3651 225.80 19 64.03 CHANGED htpLtcsLppps.aDhllshpshlKSAhlsthhutsh+hGhctpot...husLhhs+phshsh.tthtVpRhttLhsps......hshstspspsphslshtptsttpsths............ttPhlshhPuuot....ssKpWPt-patcLsptLpcpG..hplhLhsuspc...-cppscplssuhc........hsslssKhsLppsstLlutAshlVusDoGLhHlAAALs+PllulYGsTsPthTs....Phucptssls..phtttsshpppshhsphp .........................................................................................................................................................................l.pt.tt.aD..hh.h.......s....h..............s...............h.......p..s....s......h....l.....s...h...h....h....t.............h........h...p..h....u.....h...t..h..t.t..............................h..........t....h...h.....h.....................................................h....l....p....p............h...th...hh...............................hs..h........................................h........h......s.....t......h..t......h.....t..hs........................................tt...h..l.sh......t...s...u.....uph........ssKp......W...s.....t.......p.p....a....s.p.l.h...p....t...l..t...p....p......s.........hp........l...l...L..h..u..uspc............tpp.h...h.p...t..l.s..p..t.h.t...............................shs.l.....s....s..c...h...s....Lt..p....h....s....sllsp.u....c....hhlusD...oG.hH...lAu...Al.s.p........P......s..lu.l.........a......G...s......o.....s.....s...t...h..hs...............sh....s....t..p........h.......h............................................h................................................. 0 478 953 1279 +2785 PF00274 Glycolytic glycolytic_enzy; Fructose-bisphosphate aldolase class-I Finn RD anon Prosite Domain \N 19.20 19.20 19.30 19.20 18.10 19.00 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.02 0.70 -6.05 58 2226 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 1445 193 546 1888 2485 252.40 41 94.39 CHANGED ELhcsAptIsuPGKGILAA.D..ESsGThGKRhssIGVENTE-NR........RtYRpLLFoo.sslsphISGVILF-ETLYQpss-.GpsFV-lL.+cpGIlPGIKVDKGllsLsGoss.EosTQ........................GLDuLucRCApYhKsGA+FAKWRsVLcI.....s..sssPSpLulpENApsLARYAuICQpsGLVPIVEPEILsDG-HclcpstpVTEcVLAtlaKALsDH+VhLEGoL.LKPNMVTsGpsss.p..+.soPpplAhhTVpsLpRoVPsAVPGlsFLSGGQSEE-AolNLNAhNph.............sh...+PWtLoFSYGRALQsSsLKsWtGKsEN...lpAAQcthlpRA+ANutAslG+Yssss........sssuspoLaltsasY .........................ht..upGhlAh.D...pS..suoh..sK..thtth.s.....l...p.........ps.p.sp..............+thRphlho....s....s....h...............ht.ph....I.G....sILFcpTh....p.t.......p.....G..h...........hsphl.tppGllshlK....VDK........Gh.ss...h.s..ss......st...pshp........................GLDsLh.cR.s.s..p.ah.h..GspFuKWRsVl...pI.................t.PS.thuIt.-sAp......sL.ARYAslsQp..pGLVPIlEPElh...cu...p...H...chctpthlpppl.httl.ph.t-p.pVhLc....hol.hcsshh.tsh...............tp....................................Vst..........lshLSGG.S.....c-cAs..Lst.s.th............................hSauRALtts.lth..tsp....t................................................................................................. 0 175 303 423 +2786 PF01102 Glycophorin_A Glycophorin A Finn RD, Bateman A anon Prosite Family \N 22.70 22.70 22.80 22.70 22.60 22.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.65 0.71 -4.35 5 139 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 43 6 29 143 1 95.20 40 58.86 CHANGED HT.SsSGSso.t.ISo.......oNDspspsStuTP........TtspEVosthssRsh.P.EpG..-slQLsHDFScsVITLIIFGVMAGlIGTILLISYsIRRLIKKSsuDVpP............hPsP..tDs-VPLSSVEIEsPE- ..................................................usss.t.los.......sssppppsohsss........stsp.cs.S.plo.sps.l.sspp..p....-p..s....Q....lsH.cFots.......I...hhIlhsVhAGl.IG..hI..L....lI.YsI....pRhhK.......................................................................................... 0 10 12 18 +2787 PF00606 Glycoprotein_B Herpesvirus Glycoprotein B Bateman A anon Bateman A Family This family of proteins contains a transmembrane region. 18.80 18.80 18.80 19.50 18.40 18.40 hmmbuild -o /dev/null HMM SEED 714 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.34 0.70 -13.22 0.70 -6.50 40 1279 2009-01-15 18:05:59 2003-04-07 12:59:11 13 17 240 20 36 932 2 309.30 28 74.43 CHANGED phsFRVCus.usGu-llRFptspsCPs.hspscsasEGIhllaKcNIsPYpF+VphYhK.lThsosasGh.shs..tlospasc+hPlPhhElst.IDppspChSusphsp.sshhhssac+Dsh.spsh.LhPschtosss+RahTss-hYss.Gsh.hhY+osToVNChVs-spARSsaPYcaFshuoGDsV-hSPFashp..ssp.hs....csss+Fp.lcsYphlDhtsc...hstsssspRsFLppsc.hTluW-htscpsssCshshWcp.hscslRsEts.sSaHFsupslTATFlosh.sphshsps...pthsClpccspctI-clatpcYNsTHlpsGsl.p...hYhTsGGhllsaQPlhspsLschhttphspsssssss.t.........................Rp+Rsssssstt......................................shpopssltaAQLQFsYDpL+salNchLuclAcAWCcpQpRpthlWpELoKINPoulhSAlaG+PVuA+hlGDVluVocClpVsQso.VplppSMR........lsusss....hCYSRPlVoFcahNsop................................hhpGQLGpcNEILLspphlEsCphssc+YFhsGsshhhYc-Ytas+plsls-Issl.sTFlsLNlohLENhDFpsLElYo+sEl+so.sVhDlEplhR-hNhasp+ltslcpslps...sspsshlpGlsshh.pGLG.slGculGsVluuluGAluShVoGlsoFlpNPFGuhslhLlllAsllslalhaR+hppltpsPlchLYPhss...p...shppps ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.hRph.t............................................p..psh..hu.....lQFsYsplpt.lNphhtpl..uWC..Q.+p..hh.th.pl......................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 12 36 +2788 PF00802 Glycoprotein_G Pneumovirus attachment glycoprotein G Bateman A anon Pfam-B_1049 (release 2.1) Family This family includes attachment proteins from respiratory synctial virus. Glycoprotein G has not been shown to have any neuraminidase or hemagglutinin activity (Swiss-Prot). The amino terminus is thought to be cytoplasmic, and the carboxyl terminus extracellular. The extracellular region contains four completely conserved cysteine residues. 20.70 20.70 20.80 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.95 0.70 -5.24 4 4325 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 47 1 0 2618 0 121.00 55 83.44 CHANGED hKTLc+sWcs.pahIVh.SCLYKLNLKSlsQhALSsLAMIh.TSLlIsAIIaIuouNpKspsTosss.phTpQhpNpTosahTpps.puspsSpQuTTos..pThssssT.Gs..phtHoTspTpsppT....T.stspKPshpsptspPPcp.pDc.cFplhsaVPCSICpsN.sChSlC+phspptPsKtsThpPpKpPpsKTT.KKsoKT........oTp+.Tp..ThhpsKsNhoTP..slLoosp..............................HsTs .............................................................................................................................................................................................................................K+D.Ks.spp.PK..c...sTTpP.T.cKPThp..TTcpshpT...TlLsosTotp.E+.................T.QppoLHSTo........................................ 1 0 0 0 +2789 PF02885 Glycos_trans_3N glycosyl_transf_3; Glycosyl transferase family, helical bundle domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Domain This family includes anthranilate phosphoribosyltransferase (TrpD), thymidine phosphorylase. All these proteins can transfer a phosphorylated ribose substrate. 20.90 20.90 21.00 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.75 0.72 -4.38 135 7326 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 4261 62 1772 5253 2054 66.00 26 16.80 CHANGED phpphl.cclhpu....psLoppEspthhptlhsGphs...........cspluAhLhAlph+G...pos-ElsuhscAh..tppup .......................pllp+h....tcG.....psLop-Ehpthhst.lh.....s..G.p.ls.........................-hQl...uAhLhAlph+G....o.pEl.suhspAMhppu............. 0 546 1116 1479 +2790 PF00534 Glycos_transf_1 glycosyl_transf_1; Glycosyl transferases group 1 Bateman A anon MRC-LMB Genome group Family Mutations in this domain of Swiss:P37287 lead to disease (Paroxysmal Nocturnal haemoglobinuria). Members of this family transfer activated sugars to a variety of substrates, including glycogen, Fructose-6-phosphate and lipopolysaccharides. Members of this family transfer UDP, ADP, GDP or CMP linked sugars. The eukaryotic glycogen synthases may be distant members of this family. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.91 0.71 -4.84 63 44775 2012-10-03 16:42:30 2003-04-07 12:59:11 15 435 6658 103 12903 42366 19227 161.60 18 38.27 CHANGED sppp.ptphsh.psphhllhlGRlthp.KGhchllcAhttl.t.........tsshpLllsG..........tsp.ppphpphspphtlpsplhhhuhhs..pclhphhpt....uclhlhsSth....................EuFGhshlEAhusGhPllsos.ssGhs-llpcst........sGhllpss...sscslsctlppllpsp.......phppphsppupph ..............................................................................................ht.................p..t....h..l..h...h...l...u.....R....l....t...........p....K........s.......h....p....h....l......l.....c.......u...h.tp....l.....................................t..s....h........p....l...h....l....h....G................................................pss....t...t....p...h....p......p.....h.....h......t........p...........h...........t..........h................t..............s................p...........l.........p....h...........h........G......h...........h...........s.............................p..........l..t.....p....h......hpt.............u-..l....h.......l.....h...s.S.hh.........................E...s.....h......u.....l....s....h....l...E......A.h....u.t....G....h.....P........l..l....s....s.....s........s.......s..........u........h......t.....-........h..l...t...s.st..................................sG....h..........h....h..........t.........st..........s............................p..............s...........h....s....p....t....l....t....p....h...h....t...p..........................h........................................................................................................ 0 4440 8647 11076 +2791 PF00591 Glycos_transf_3 glycosyl_transf_3; Glycosyl transferase family, a/b domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Family This family includes anthranilate phosphoribosyltransferase (TrpD), thymidine phosphorylase. All these proteins can transfer a phosphorylated ribose substrate. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.42 0.70 -4.98 21 7137 2009-09-11 15:42:01 2003-04-07 12:59:11 16 28 4318 62 1734 5172 3200 239.20 30 60.81 CHANGED shshlDhsGTGGDGtsThNlSTsuAhVsAAs.Gs+lAKHGNRulSSKSGouDlLEul.Glsl.phss-psp+slc-sGluFLFAPtaHsuh+assssR+pLsh+TlFNlLGPLhNP.ApsphpllGVast-LspshAcslppl.shp+uhVVHG.sG.....hDElohtupThVsclc.su...clspaslsPpDhGlppsplpsLpusss.cENtchlc.slLpGpsssh.....t-hlshNAushlhluGhsso.......LcpGsptAt-slcoGpAh .......................................s..hlDhpuTGGs.G....ss.o....Shss...A..llAus....G.l......h...........VuKp..u..s.R.u.l.u......s.p..u.G...o..h..D..hL.Eul....G.....h...p..l..ph.ssc....phtc.........h..l........p..c..........s...G..l..u..h..l....h..u..s..s..h.h..ss....h.c.+.h.h..s.c+...-l....s.s........T...l..s..l..PLlss........................................................ulh.st...clst.s.h.stll...h....c...l......s.....s.....t......pu.h....h.h..........+.....u..............................hD.E.sshh..u........po..h..V...s....hp....sG...........................ph.p...p.h..h.l...o.s.......s...hsL..s..p.s...............h.t.........s...............s.l.c..s.................p...Es.h.p.hLp...shh.tG.t...t.s...................................t-h.lh.h.t.u.u..hh..l.h..h.....u......s......h.....s..p..s.......................lpcuh....thstpslpsGpA................................................................................................................................................................. 0 557 1116 1464 +2792 PF04413 Glycos_transf_N 3-Deoxy-D-manno-octulosonic-acid transferase (kdotransferase) Waterfield DI, Finn RD anon COG1519 Domain Members of this family transfer activated sugars to a variety of substrates, including glycogen, fructose-6-phosphate and lipopolysaccharides. Members of the family transfer UDP, ADP, GDP or CMP linked sugars. The Glycos_transf_N region is flanked at the N-terminus by a signal peptide and at the C-terminus by Glycos_transf_1 (Pfam:PF00534). The eukaryotic glycogen synthases may be distant members of this bacterial family [1]. 23.90 23.90 24.60 26.30 23.50 23.80 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -5.13 154 2390 2012-10-03 16:42:30 2003-04-07 12:59:11 11 13 2271 8 551 1796 2084 180.60 36 42.32 CHANGED htp....chtE...RhGhh............................hlWlH.AsSVGEshustPLlctLtp...phPshplllTosTsTGtphs..pph.....h....s....ps.pta.hP.hDhshslp+FLpphpP.....chhllhEoElWPNllttspppslPllLlNARlSc+Shptap+h.s.h..hhpthlppl..shlhsQsptDupRhhsLGss.pp...lplsGNlKaD.hs..s ...........................................................................hp+htERaGhhtt.h........................ssIWlH....usS.VGE.shAuhPLlctLcp.............c.hP.sh.slhlToh.TsTGt-ps...pph...h...........sp.....slp.phY..LP..aDh.stslp+FLsph..pP.....clsllhET.........ElWP.........Nlltth++cp.......l..PlllsNARLSs+Sh.tsYt+h..s...t...hh+pl.l.ppl..shlhAQscpDupRahsL.G..sp.........p...lpVsGslKFDl..h....................................... 0 170 347 455 +2793 PF01153 Glypican Glypican Finn RD, Bateman A anon Prosite Family \N 25.20 25.20 25.60 25.50 25.10 25.10 hmmbuild -o /dev/null HMM SEED 558 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -12.90 0.70 -5.90 15 584 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 88 10 343 500 0 364.70 25 92.27 CHANGED hsl.Lhllhshsu.stu.ssscu+SCuEVRphYt.u+GaulsslPpstIuGEHL+IC.PQGaTCCTscMEE+huppS+h-Fcshlc-uospLpsllsspa+pFD-aFc-LLppuE+oLsphFspsYGcLYsQNuclFp-LFoEL+pYYhGus..l................NLEEhLs-FWu+LLERhF.+LlsPQYp....ho--YlECls+ts-....pL+PFGDsPRcL+lQlTRAFlAARsFlQGLslut-VVs+sspV...shospCsRAlMKhhYCPaCRGl.....sslKPCpsYClNVM+GCLANQAD.LDsEWpsaIDuLltlA-+.lpGsaslEsVltsIcV+ISEAIhshQENusplosKVFQsCGsP+.sssts....husp-t...p+ch+shs.Ec+PTsusss......L-pLVo-l+p+L+ph+pFWsoLPsslCscchsuussssc...CWNGps+u.RYhscVhGsGLsNQlNNP.EVcVDIo+PDhsIRQpIhpL+lhTs+L+sA.................................hsGsDlDFp.Dss.D-sSG.SGSGsusscc....hsssutchshssssspp.t.hsh.................sptssoushspssshhhh..lhsllshhhthh .........................................h............................................................................................................................hp...............phh..llp.upp.htthF.psat..hh.ps...hhtphasclp.ahh.t.us.....h................................................slpchltcFaspLh.hha..ph.hps.th..................s.ch.pC.ltt...hp....plpsF..GphPp.....lhhp.ht+uh.ssR...hahpuLthu.h-Vhpp.s.tpl..............s..tC.ptl.h+h.aCshCpGh................................pPC.saC.sVhpGChu................t..s.p.ls..Wpth..l..sh..ls.......pt.h.s..............s...h.......-.sh.sl.......hl...p-ul...hpps..t.....pl..s.t...p.......l.........p.h.Cu.s........t.................................t.......p.p...s....................................h.t..h..chhtplp........tha..tLs...lCtp.....p..h....t....t.........CWsG.th.....s...pY.......hh.t..s..tp......Eh....h.p............................p...h..p.h.tLp.h.t..h...h..........................................................................u...................t..................s......t.....................................................................................................h............................................................................................................ 0 64 95 210 +2794 PF05199 GMC_oxred_C GMC oxidoreductase Studholme DJ, Bateman, A anon Pfam-B_891 (release 2.1) Domain This domain found associated with Pfam:PF00732. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.74 0.71 -3.90 85 8000 2009-09-10 15:44:39 2003-04-07 12:59:11 8 92 2304 146 3849 7618 4016 132.70 26 23.46 CHANGED PpSc.GplpLss..s-shs.P.....hl.chsahssst...........D.hpthh....pulc.hstclhpss....................................................tthhshs.......thpssp...........h.thhht...h..shhHss.GTspM.Gts.t.......uVVD.sph+VaGlpsLpVsDuSlhPsh.sssssphsshulA...c+s ...............................................................................................................oc.Gplplps..........tcs......h.t....P..............hl.p.sahs...p...t........................D...hph.hh......puhc.hspc..l.hpp....................................................................................................................................tth..st.........................t.h.tssp.............................h...th.h.tt............s....sshH.............ss.GTs+M...Gs...ssp......................uVVD...s.......p....h.+....Va.......G.............lp.......s.L+.V..sDuSlhPph..su.sNss..sssh..hluE+h................................................................................................... 0 1059 2161 3210 +2795 PF00732 GMC_oxred_N GMC_oxred; GMC oxidoreductase Bateman A, Studholme, DJ anon Pfam-B_891 (release 2.1) Domain This family of proteins bind FAD as a cofactor. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.74 0.70 -5.26 20 7804 2012-10-10 17:06:42 2003-04-07 12:59:11 14 78 2279 50 3835 7620 5057 264.90 27 47.78 CHANGED h-tLatuGGshuos..........................................................ssphhhl.sGpslGGuSsVNhusslRsstpshc-WsschGlphauspchhshhcplp........................tpluV..sspshppsstN..pslhcuuccLGas.t.lscNssus..+.sGhCthG...CtpGtKpuospsaLhsAh.p+.supllossps-+llh..............ttpst+AlGVtspsssss.....hpphhhss+pslluuGAltoPtLLtpSGl.pspc..............lG+NLplHPs ........................................................................................................................................................................................................................DhlllGuGsuGs.slAsR.L.......s.....................p............s...................s.....t.........h.....p..V.....ll.lE....u.G.....s...........................................h...t..h....h..h...........h...........t...........t.......h..........s......h.....t...........h................s...........t.........s......p........................t.................h.................s.......s.......p..........t...........h....................h...................p..G.........+....s...........l.G..G..u..Ss..l..N.u........hh........a...h.........R......s.....p..t.........t........D........a........-........t........W........t........p..................G........t........s........W..........s.......a......p......p....h.h........s...a...a...c.+hE.p.......................................................t..h..p..u...h....s.......G............l.....t....l.............p...................................p.........s....hh..........p............s....h..h..p.........u....s....t.......p....h...G.h....................................h.s.t.....-.....h..s.st.........pt.p...G....h....s.......h....h.....t................................t.....t...................p.............G..............t........R.......h.......o.......s.s..p.......s..a...L............t............s........s..............h......p..........+.................N..............L........p....l..h..s..puhV.p+l..lh..........................................c.....s....p.....+.....A...s....G.....V..p....h..hp.s..stt............................h.p..h...h....s....t....+....-..VlLuAG.u..........l..so..Pp.LL..h...........h........S.......G.l.G.......stt.L....t.......t..h...........s...........l................................h...........h..............p......hs..tVGpNhtDH..t.................................................................................. 0 1106 2163 3210 +2796 PF00446 GnRH Gonadotropin-releasing hormone Finn RD anon Prosite Family \N 18.50 18.50 18.50 18.50 18.40 18.30 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -4.93 0.73 -5.27 0.73 -3.70 25 259 2009-09-12 07:42:02 2003-04-07 12:59:11 12 2 122 0 37 236 0 10.00 83 11.90 CHANGED QHWSHsWpPG QHWSaGWhPG 0 2 5 16 +2797 PF03071 GNT-I GNT-I family Mifsud W anon Pfam-B_2207 (release 6.4) Family Alpha-1,3-mannosyl-glycoprotein beta-1,2-N-acetylglucosaminyltransferase (GNT-I, GLCNAC-T I) EC:2.4.1.101 transfers N-acetyl-D-glucosamine from UDP to high-mannose glycoprotein N-oligosaccharide. This is an essential step in the synthesis of complex or hybrid-type N-linked oligosaccharides. The enzyme is an integral membrane protein localised to the Golgi apparatus, and is probably distributed in all tissues. The catalytic domain is located at the C-terminus [1]. 25.40 25.40 25.40 25.40 25.20 25.00 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.76 0.70 -5.94 4 372 2012-10-03 05:28:31 2003-04-07 12:59:11 10 10 147 7 215 355 35 315.40 31 71.55 CHANGED hhAshhhhaIthhLFhh.opYA-phssulcucNpssuphph.l.clu.pQsRlVtLEc...hhpp.s--lpplRuhlps..ht.cuhs+lshssthsVhsVlVhACsRAshlc+slcplL+Yp.PsApKaPlhlSQDsucpsV+ptshSY.splTYhpHL..Dhpslss..Puc..hpAYYKIARHYKWALsQlFhc+pFSpVIIlEDDhEIAPDFF-YFcAstsLL-pDcolhslSuWNDNGppQhVcs..P.sLYRSDFFPGLGWMLpppTWcELpPKWPKAaWDDWhRh.Ep++GRQhIRPElsRT..hsFGc+GuShGQFFsQaLc.IKLNDhhVcapphDLuYLhcsNYsKcFsshV.+pAh.lpssclshpshs...cG.-VRlpYcspl-FcchAcphGIh--aKsGVPRsAY+GIVsFphps.RRVaLVsPco.lpthssc. .................................................................................................................h............................................................................................................................................................................................................................s.hslhV.hAs.sRss.l.+hlcpL...lphp.....s.....s.......s........p.....h...lh.V........D.s........p....p...s.h...p..........h.lt...a............l...p...h.l.pp..........................p.s.l.t.h.....st.................t..............+..lu...p.HY+huLst..hF...p.....h..t..h.......p............t.....slllE.................-DL-l...AsDFFpYFpt.sh..LL...c....p.D.o.la............ClS............AWN....D..............s.G..............h.....pp.hsc...s....................s...p.....h.....L....Y...Rs-hhPGLG.WhLh....+pla.p........E...Lp...PK...W.P....p...s..........h...W..D...WhR.sppR+sRtCIh.P-lsRo...........hp.F.Gh....h............G...h.s.......G..aa....c.t.ahc..h.+h.N................p........l.t....h......p...t..s...ls..L.....c...................-tYc.thht.l..tu..l.p.....h.....pc....................................................................................................................................................................................................................................... 0 102 128 177 +2798 PF02447 GntP_permease GntP family permease Bateman A anon Pfam-B_1928 (release 5.4) Family This is a family of integral membrane permeases that are involved in gluconate uptake. E. coli contains several members of this family including GntU Swiss:P46858 a low affinity transporter [1] and GntT Swiss:P39835 a high affinity transporter [2]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.44 0.70 -6.00 12 5095 2012-10-02 15:12:49 2003-04-07 12:59:11 11 8 2135 0 706 3333 363 403.30 34 98.00 CHANGED hsLlhlslu.lllLLlLll+hKlpsFlALllVuhhVGlstGMslscllpohpsGhGGTLGplAlllGLGsMLG+lls-oGuAp+IAhThlppFGcc+lpaAlllsuallGlslFaEVGhlLLlPllFslA+puplsllhlulPhsAuLsssHuhlPPHPGPhslAshhsADlGtslLYGlllu.lPssll...AGPLasK...ahpphh..t.p.....st.hstchhpcpchPuFGlSlhshllPllLMhhpTlsplhhscssshhs.....hhpFlGsPssAhhIAlLlAhaThGhtRGhshpplhchhspulsshAhlLLIIGuGGuFKQVLl-SGVGchluphhpshslSPllhAWllAAllRluhGSATVAslTsuGlls.....Pllsths...sss.tLlsLAsGuGSllhSHVNDuuFWlhKcYhslo.lpETh+TWTlhpTIlSlsGLlhsl.Lhull ................................................................h..llhh.hlu...lhl...ll.l..Ll..h.+....hK......h.....p.s..Fl...u.L..l...l....s...uh.h...s............u............l............h...t............G........h.........s......h...........s.........c.............l............l.........p...........s.........h...p......s.......G....h....G..u...T...L.Gtl...u...l....l....lu.hGshlG+l...ltcoGuAppIApollpph....Gc.......+.......+.......st....h..A.......l.........s.....l..s....u.h..l...l.......G.l....s..........lF...........a..-....V.u.h.V..l....Lh..Pls.h.s....lA.+......p.......s.......t.......h................s.......h.l.hl..ulP.h.s.uu..l.s.s...s..H..s.h.l....PP.p.......Pu..P..h...s..h....A...s..h...........h.......t..A..s.....l.....G...h......s..l....l.....h..G....l......l..lu....lP...ss...l...l...........A...G....la.s+......................h..h..t.p......h.......................................t.....h.s....t....t....h.......h...s...p....p........p.h.............P....u.....h....u.........h.......o...l.......h...s.....h.l..l..P.l....l........L..h.h.......h.....p.s....l...u..p.....h.........h.....h.....s........t.......s.....p..s...h.ht............................hh..p..F.....lG....s.....P.....h..h.......A....h..h.......Ius..l.l.Ah.a.s.hG..h....t........p..s..h........s.h....p.p.....l.............c...............h....hsp.....u....l.t..s.h.u.h....l.l...L....l..l..G..uGGsFtplLh...-....SG...luphl....up....h...h.............p..............t.............h....s.............l..s....s....l...lhuallAs.....llRlA.GS....A....T....V.A...h....h.T.A.u...Gllu.............P.h.l...st..h.s........sls.s...t..lh..s..l.A..h.u....uGS.l.h.hSHV.N.D..u.G.F.W...l..h.p.....c.a..hsho.lt-TlKoW...ohhp....Tlluls.G.hlhsh.llsh.h..................................................................................................................................................................................................................... 0 172 385 568 +2799 PF00392 GntR gntR; Bacterial regulatory proteins, gntR family Finn RD, Bateman A, Hoskisson PA anon Prosite & Pfam-B_6405 (Release 8.0) Family This family of regulatory proteins consists of the N-terminal HTH region of GntR-like bacterial transcription factors. At the C-terminus there is usually an effector-binding/oligomerisation domain. The GntR-like proteins include the following sub-families: MocR, YtrR, FadR, AraR, HutC and PlmA, DevA, DasR [1-2][4][5]. Many of these proteins have been shown experimentally to be autoregulatory, enabling the prediction of operator sites and the discovery of cis/trans relationships [3]. The DasR regulator has been shown to be a global regulator of primary metabolism and development in Streptomyces coelicolor [5]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.87 0.72 -4.53 26 49014 2012-10-04 14:01:12 2003-04-07 12:59:11 16 87 4089 55 10949 36460 2838 63.00 29 23.05 CHANGED hhpplhppLcppIhpGphpsGspLPsEccLuspauVoRsslREALppLtscGllptppspGshV ..........................h..tplhppl.cp.t.I....h....p..G....p....h....t..s.G...s....p...L....P...s.-cp...L..u.p...p...h......s.V.S...RsolRcAlppLt.pc.G..l..l.pt...p...p..s..p..GshV.................. 0 2975 6426 8851 +2800 PF02188 GoLoco GoLoco motif SMART anon Alignment kindly provided by SMART Motif \N 20.70 20.70 21.00 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.27 0.72 -6.70 0.72 -4.52 46 1105 2009-09-10 20:12:23 2003-04-07 12:59:11 12 99 91 10 542 983 0 22.80 44 8.24 CHANGED s-cFa-Lls+sQ.upRhDDQRspl ......-cFF-Llt+sQ.usRhDDQRss... 0 104 151 308 +2801 PF04178 Got1 Got1/Sft2-like family Wood V, Finn RD, Fenech M anon Pfam-B_7371 (release 7.3) & Pfam-B_8991 (release 14.0) Family Traffic through the yeast Golgi complex depends on a member of the syntaxin family of SNARE proteins, Sed5, present in early Golgi cisternae. Got1 is thought to facilitate Sed5-dependent fusion events [1]. This is a family of sequences derived from eukaryotic proteins. They are similar to a region of a SNARE-like protein required for traffic through the Golgi complex, SFT2 protein (Swiss:P38166) [2]. This is a conserved protein with four putative transmembrane helices, thought to be involved in vesicular transport in later Golgi compartments [1]. 21.80 21.80 24.00 23.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.57 0.71 -3.99 103 970 2009-01-15 18:05:59 2003-04-07 12:59:11 7 13 313 0 645 936 31 114.90 25 66.90 CHANGED thGlhhhhlu.hhhhhslhh...pshhFuhhaolGslhhlh............uhshLhGspp..thc.hhhppp.Rhhu..ThsahsulhloLh.....huhhhcshhLsllF...ulhp..........h...........ssllhahl..ShhP.hGt.....sslphhhsh ...................................................hGhhhhhh.u.h.hhhhshhh........hhsp.h.hhholGNllhls.......................uh.s.hlhGstpphp.hh.h...p..pp..+...lhu.....Thhahsulhl.sLh...............huh....h....l...c.....s.....hslsllF.........shh...........................hslhh.ahh..uh.P.hGp.....shlp.h...h.............................................................. 0 220 359 523 +2802 PF00516 GP120 Envelope glycoprotein GP120 Finn RD anon Pfam-B_44 (release 1.0) Family The entry of HIV requires interaction of viral GP120 with Swiss:P01730 and a chemokine receptor on the cell surface. 19.90 18.00 19.90 18.00 19.80 17.90 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -13.00 0.70 -6.27 24 146453 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 446 117 0 119635 0 228.30 54 78.04 CHANGED LWVTVaYGVPVWK-AossLFCAo-s+.......slWATpsClPosPssQElsLssVTEsFshWc..NshVEQhpEDIhSLaDQSLKPCVKLTPLCVohNCschpt.................sssTsss.......................sttthtctEh+NCoFNhTs.hRDKpcphhshFYphDlVshssss............tophhlIpCNTSVITpACsKs.a-sI.l+YCAPAGFAlLKCNDccasGpuP.CoNVSsVpCT+tIcssVSTtLLLNGShAEpcshIh.scshs......sNsphIllphphslsIsCpRPsNpThh.thhhu...GpsFaohtplhs....lRQAaCphst.pWspslppVtppLtcp.p.p......psIsFs.psu........................GGDPElsha.FNCtGEFFYCNsotlhN........W.psp............ssps.......pphhlPCRI+QIINhWpcVGKshYAPPhcG...pIpCsSslTGLllsh..Dusssss.......pThhssuu-h+-.WRuELh+YKlVcIpPlGlAPT+sKR+slt...REKR .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................E..t..c..l..........h.I.R...Sc.N.h.o.........s..Ns.K.sI.IV..p..L..s..c..s..V..p...Is.C.T..R...P.s.N......N.TR...+......u...l....p.....lG....PG..p..u.F...Y......u....T......G......c...I....IG.....DIR.Q..A.H.C.N...l...St..sp.....W.N.p...T.L.p....pl.s.p..KL.p.cpFtp..............p..sI.....Fp...s.u.........................GGD.....El.h..h..h.....h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +2804 PF03010 GP4 GP4 Griffiths-Jones SR anon Pfam-B_1094 (release 6.4) Family GP4 is a minor membrane-associated glycoproteins. This family contains envelope protein GP4 from equine arteritis virus. 20.10 20.10 22.70 38.10 17.90 16.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.23 0.71 -4.61 3 72 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 2 0 0 68 0 139.50 93 100.00 CHANGED MKTYGCIFGLLLFAGLPCCWCTFYPCHAAEARNFTYISHGLGHVHGHcGCRNFINVTHSAFLFLNPTTLTAPAITHCLLLVLAAKMEHPNATIWLQLQPFGYHVAGDVTVNLEENKRHPYFKLLRAPALPLGFVAIVYVLLRLVRWAQQCYL MKhYGCILGLLLFVGLPCCWCTFYPCHAAEARNFTYlSHGlGHVHGHcGCRNFINVTHSAFLFLNPToLTAPAITHCLLLVLAAKMEHPNATIWLQLQPFGYHVAGDVoVNLE.NKRHPYFKLLRAPAhPLGFVAIVYVLLRLVRWAQQCYL 2 0 0 0 +2805 PF00517 GP41 Retroviral envelope protein Finn RD, Bateman A anon Pfam-B_44 (release 1.0) Family This family includes envelope protein from a variety of retroviruses. It includes the GP41 subunit of the envelope protein complex from human and simian immunodeficiency viruses (HIV and SIV) which mediate membrane fusion during viral entry. The family also includes bovine immunodeficiency virus, feline immunodeficiency virus and Equine infectious anaemia (EIAV). The family also includes the Gp36 protein from mouse mammary tumour virus (MMTV) and human endogenous retroviruses (HERVs). 25.10 25.10 25.10 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.54 0.71 -4.76 23 40495 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 166 141 21 37279 0 171.40 74 29.32 CHANGED huAsALshosQs+pllushhppppplhsslpt.pclLphplhslcp.l..LpsRVpulE+alcsptphs.hGCs.+p..hC+TshPh....hN.o.....Ws..NhThpp..Wpcplp.lppplhpllt-spppptpsh.shppLss..ht.hhs...WhshssWl............shIphslhlllhllsLpllhtllps.hh..phh.GYpsl.............ph.lhcspp.spst ................................MGAAS.lT.LTVQARp.LL.SG...I...VQQQ.sN.LLRAI.E.A.QQHLLQLTVWGIKQ....LQ.ARVLAV.ER.YLKDQQLLGIWGCSGKL.....ICT.Ts...VPW..NsS.....W....S.....NKSh.............spIW.s......NMTWMp......W-..+.EIsNYTshIYsL..lE-.SQ.s.QQ..E.KN.Ep..-.LLt.LDK..WAsLWN....WFs.Io.pWL............WYI+IF.IhI..V.uGllu.LRIlhh.llph.ls....+lR...p..GYpPl.................ph.................................................................................. 0 12 12 12 +2806 PF02925 gpD Bacteriophage scaffolding protein D Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 41.10 41.00 18.70 17.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.66 0.71 -4.40 2 78 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 54 10 0 51 2296 139.30 87 92.82 CHANGED .sh.psVpatTulAul+hlQASAVLDlTE-DFDFLTusKlWIATDRsRARRCVEACVYGTLDFVGYPRFPAPVEFIAAVIAYYVHPVNlQTACLlMEGAEFoENIINGVERPVpAAELFAaTLRl+AG.p-slhDAEENsR .....sEpuVpFQTAlASIKLIQASAVLDLTEDDFDFLTusKVWIATDRSRARRCVEACVYGTLDFVGYPRFPAPVEFIAAVIAYYVHPVNIQTACLIMEGAEFTENIINGVERPVKAAELFAFTLRVRAGNpDllscAEENlR 0 0 0 0 +2807 PF00044 Gp_dh_N gpdh; Glyceraldehyde 3-phosphate dehydrogenase, NAD binding domain Eddy SR, Griffiths-Jones SR anon Overington Domain GAPDH is a tetrameric NAD-binding enzyme involved in glycolysis and glyconeogenesis. N-terminal domain is a Rossmann NAD(P) binding fold. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.99 0.71 -4.25 112 14213 2012-10-10 17:06:42 2003-04-07 12:59:11 19 19 8352 412 2317 10425 3516 130.00 46 43.38 CHANGED l+luINGFGRIGRhVhRshh.....tp.scl-l.VAI...NDhs.sscthAaLhK.....YDSsHG+a.sspVp..hp........sst..lhl.sGc...p.Ip.lhsc+-.PtplPW.tphulD..lVlEuTGhF...psp-pup.t.....Hlp..u.GAKKVllSAP..u+st.......ssohVhGVN.ccpassp..pllSNASC ............................plulNGF.GRIG.R.shRth.............tt....tt..lc..l..VAl................N...D.....h..............s.......sp..h.h...Aa...h.l+.....YDo....sH.Gp.F.......p.......s.......p......Vp....hp......................psp......lhV...sG.c..........t.Ip....lht....c........+...s....P.t...p.....lsW...t......................p..hGs-..hVlEsTG.h.F........ssp....-...cAp.t...........Hlc........u...G..AK.....KVl.I...SAP....uts..........sshhVhGVN.c-.ph...c..s...s.....pllS.NASC.................................................................................................. 0 709 1371 1873 +2808 PF02800 Gp_dh_C gpdh_C; Glyceraldehyde 3-phosphate dehydrogenase, C-terminal domain Eddy SR, Griffiths-Jones SR anon Overington Domain GAPDH is a tetrameric NAD-binding enzyme involved in glycolysis and glyconeogenesis. C-terminal domain is a mixed alpha/antiparallel beta fold. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.81 0.71 -4.77 71 15479 2012-10-02 22:00:43 2003-04-07 12:59:11 15 25 9082 412 2355 11397 3548 142.80 53 50.30 CHANGED LAPlsKVlp.-pFGItcGhMTTlHuhTusQphlDss....+DhRcuRuAA...NIIPoSTGAAKAVuhVlPcLp.GKLsGhAhRVPTssVSllDLssplcKs.soh-ElssAlKcAuc.......s.hcGlLuYo--.lVSoDahusst..SSlaDuptohsls.....sphVKlluWY .............................................LAPlAK..V.l.p.Dp...F.G...I.hc...GL...M..TT.......l.H.....uh..T......u.s..Q..ps.lD...u...Pp......K...D.....h.Rt...........u...RuAu............tNI.IP...oSTG..AA...KAVG.c.VlPpL.s....G.........K.L.s.G..........hAh.R.......VPTssVSl.V....D.L..os.p.L.c......K.s........s....o........h....-.....-lpts.l.....K......pAup.................................s.h.+.G.lLuY.T....-....-...l.VS..sDa...usst....oSlh.Du..tt.shshs......tphlKlhuWY............................................. 0 719 1391 1898 +2809 PF05024 Gpi1 N-acetylglucosaminyl transferase component (Gpi1) Moxon SJ anon Pfam-B_4796 (release 7.6) Family Glycosylphosphatidylinositol (GPI) represents an important anchoring molecule for cell surface proteins.The first step in its synthesis is the transfer of N-acetylglucosamine (GlcNAc) from UDP-N-acetylglucosamine to phosphatidylinositol (PI). This chemically simple step is genetically complex because three or four genes are required in both yeast (GPI1, GPI2 and GPI3) and mammals (GPI1, PIG A, PIG H and PIG C), respectively [1]. 20.80 20.80 22.40 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -4.63 7 347 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 276 0 237 333 2 167.60 35 30.43 CHANGED phhsplh.lhlDlshGlhlh.hLh...Nhphlsshhhphst.ashcpLpohlthLhssPhGlKLNsplsphluphhlahIclW.oahshhpshl.......hlhhhluh.u.hhGhohhluhlhDhlslhohHlhshYhhss+LhshplpslsuLapLFRGKKhNlLRpRlDoh.YshcQlhLGTlLFolLlFLhPTh ................................................t..hs.hh.lh.DlhlGhhlh.hlh............tpht.l..st...h...t..h..h.p......................hhhctLpphlpW.L.h..u.h.PA..GLKLNppLsthLGchFLahIp.h.W.t.s.h...l..t..h..hp..s.hl..................................hll.h.h....l.u.h.u.....hhG.hoh.l.ulh.Dllsl.lT.hHlasaY..hh..su...+laphplphltSLapLFR.GKKhNl..LR.p.R.lD.........o........p........s.........Ys.........lD.........QLhlGTlLFTlLhFLhPTs............................... 1 82 132 199 +2810 PF04113 Gpi16 Gpi16 subunit, GPI transamidase component Wood V, Finn RD anon Pfam-B_7012 (release 7.3); Family GPI (glycosyl phosphatidyl inositol) transamidase is a multi-protein complex. Gpi16, Gpi8 and Gaa1 for a sub-complex of the GPI transamidase. GPI transamidase that adds glycosylphosphatidylinositols (GPIs) to newly synthesised proteins. Gpi16 is an essential N-glycosylated transmembrane glycoprotein. Gpi16 is largely found on the lumenal side of the ER. It has a single C-terminal transmembrane domain and a small C-terminal, cytosolic extension with an ER retrieval motif [1]. 20.20 20.20 21.40 20.80 17.60 18.40 hmmbuild -o /dev/null HMM SEED 564 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.93 0.70 -6.50 11 458 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 272 0 326 480 1 374.60 25 89.91 CHANGED L.Lhshlhlsssssu.s.st........sYcEsLhL+PLPpspLLASFpFcu............ssstssapppcachFPRuLuQILp+uss+ELHLRFopGRWDsEsWG.spPasGhppGGTGVElWAWl-usscpt....................Acc+WhsLTpuLSGLFCASLNFI.DSocTTcPshoFp.sustss.....tp..sh+LhaGsLPtEsVCTENLTPFLKLLPCKGKAGIuoLLD.GHKlFDusWQSMuIDVpslCsss.sp.Chlpl-QoVDhVlDl-RoKR.+.......s.sspph.C-pSKsYpsc.hCaPhtpssptsWSLs-lFGRslpGsCsLuc...tpssVsLpV........................P.phpVhsp.thht......pssstspsaslpsss......saDlhlPt....p.o....phssh-pPslpApRolsGaGQ-RGGlphhhsNPSsp.............sl-FIYhEpLPWFlRhYlHTlpspls.......tpttsssshlcclaY+PulDRc.+uTpLElphslPst.STlsLTY-FEKulLRYsEYPPDANRGFslssAVIolhs.s.........................pssshplRTouLLlsLPTPDFSMPYNVIIhTSTllALAFGulFNlLsRRalstEcutshptpshhs+Ltt+lht....+l+ ....................................................................................................................................................................................................................h..................................................................................................................................................................s.G.at...au.........................................................Gh.h.......s.h.................................tta..l.t.hsuhhssuht.h.............................h..........................................hhhu.LstE.hC.T.ENLTPhhcLLPst....stsGlssLhp.....pl.a.ps..apo.t.l..h...p.........t..................h...hp......h..h............................................................................h.h.that.....h......t.....s..s..........l.h.....................................................................................................................................h..h.................hph.h....................................s........h.stp...h.G.u.p.pGth....h.s..t..................................h.hhhhp.lPWahp.ahpoh.ph...t...............................................h.h....s....u.p..+......tst....hch.h...lPst.ps...............h...ht......hphc+..hLphtEasPDss+Ga.l.s.ul....l..h...........................................................................h..hhopslLl.LssPDFSMPYNVIhhssThhulhaG.hhshhh.+.h......................................................htth............................................................. 0 116 207 282 +2811 PF02831 gpW gpW Bateman A anon [1] Family gpW is a 68 residue protein known to be present in phage particles. Extracts of phage-infected cells lacking gpW contain DNA-filled heads, and active tails, but no infectious virions. gpW is required for the addition of gpFII to the head, which is, in turn, required for the attachment of tails. Since gpFII and tails are known to be attached at the connector, gpW is also likely to assemble at this site. The addition of gpW to filled heads increases the DNase resistance of the packaged DNA, suggesting that gpW either forms a plug at the connector to prevent ejection of the DNA, or binds directly to the DNA. The large number of positively charged residues in gpW (its calculated pI is 10.8) is consistent with a role in DNA interaction [1]. 20.90 20.90 20.90 21.30 20.70 18.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.38 0.72 -4.42 5 521 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 332 3 23 146 0 67.40 62 95.68 CHANGED MTcpp...ELQpARsAhHDLhTGKRVVS...VQKDG.RRVEYTAoSluDL++YIs-LEuQLGhot.RRRuPlGVRl .............Msp.t....ELt....AhRtAhhDLhTG.KRV.so.....VQKDG.RRlEaTAsSls-Lp+hI...s-hEs.l..G...h....Tp..RRRtPhGhhl................... 0 3 10 18 +2812 PF04965 GPW_gp25 Gene 25-like lysozyme Bateman A anon COG3628 Domain This family includes the phage protein Gene 25 from T4 which is a structural component of the outer wedge of the baseplate that has acidic lysozyme activity [1]. The family also includes relatives from bacteria that are also presumably lysozymes. 25.30 25.30 25.40 25.30 25.10 25.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.18 0.72 -4.46 181 3115 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1505 1 550 1824 1365 100.80 22 74.20 CHANGED sshss...htp.slp.......pslptlLsTphGpph..............................h.saG..ls-lhstsh..ssssttplpptlppultpa.EPRlp.hhpV..plts........................tspl.phplpup..lh.p...t.t..ls .....................phhcplp.......pslpplLsT..usph..................................th.saG....ls.c.hh..sts.....stshttplpptlppAlh.......+........a...EPRlp.hhpVplp........tp.............sspl.shplpuhlh.p........hhh.............................. 0 111 282 426 +2813 PF05084 GRA6 Granule antigen protein (GRA6) Moxon SJ anon Pfam-B_6204 (release 7.7) Family This family contains the granule antigen protein GRA6 which is found in the parasitic protozoa Toxoplasma gondii and Neospora caninum. GRA6 protein plays an important role in the antigenicity and pathogenicity in these organisms [1]. 22.10 22.10 22.10 132.00 20.40 22.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.48 0.70 -4.81 2 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 4 0 3 51 0 183.70 91 94.90 CHANGED MApsthhhRp+RsFsPlTVshlAVshVsFMGV.lsShGusssAssstuVcts.pthsS.Gt...AVGToE-YVNSSthuGupscu.AEs-pputtsEsDVpP.sVh..spEttu.ups.s.pERhEEtsst.+.ssVpps.spssuKRpQtRHRlIGssVlAssVAhLhhhF.RRpu...........GusctG..............GcsEsGuE- .MAHGGIHLRQKRNFCPLTVSTVAVVFVVFMGVLVNSLGGVAVAADSGGVKQTPSETGSSGGQQEAVGTTEDYVNSSAMGGGQGDSLAEDDTT.SDAAEGDVDPFPVL..ANEGKSEARGPSLEERIEEQGTRRRYSSVQEPQAKVPSKRTQKRHRLIGAVVLAVSVAMLTAFFLRRTGRRSP.EPSGssGGNDAGNNAGNGGNEGRG.tGcs-......... 0 2 2 3 +2814 PF00267 Porin_1 Gram-ve_porins; Gram-negative porin Finn RD anon Prosite Domain \N 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.16 0.70 -5.17 18 5868 2012-10-03 17:14:36 2003-04-07 12:59:11 16 2 765 89 237 5994 230 301.00 39 93.43 CHANGED KsGNK...........LDLYGKssGhHhaos.csus-.....GDpoYuRIGFKGETQIsDpLTGaGQWEYsspusssEup.ssp..ttTRLuFAGLKaG-aGShDYGRNYGVlYDlpuaTDhhPEFGGDoh...stsDsaMpsRusGlAT.YRNsDFFGLVDGLsFALQYQGKNtp...............scshh......+pNGD........GaGhShoY-hu..shuhuuAYssScRos-Qt.t.............p..utG-+A-sassGuKYDANNlYLAshYupTpNhT.hu.............shuhANKsQNhEVsAQYQF.DFG.LRPuluYlpSKGKDlsu.........ths-pDLVKYVsVGATYYFNKNMSsaVDYKINhLDcss.h..t.GlsoDDhsAVGLVYQF ........................................................................KsGsc...........hshh.....s.....ch.s.u......h..+....h.....h..os.....pp...u...p...c.........hs.p.h.h..s+.l.GFKGpp..p..l.s..s.t.........Lpu.h.................h....QhE.......p.......h.......p...........s......s............s.......s........-..........................................t...............s...........t..................ts.....R.u.........FsGLK..u-.aG.ohchGR......s...h......u......l.......l.......h......D.......s.....t......s....h..........s.......D............................P....................u...G.csh......................hts.s...s...a.....h.....p......t..............c....s.....p.....s..l...........u...........s.........Yc....s.....s......-......F...h...G....l....s.......s......u......l..p..a..A....h..p.......p....u.p.st...........................................................t...t..hh..............+.p.N..G.D......................u..a..t...s...u...h..s.Y...c.......s......G..................h...h....sh..u........u..u....Y...s..p..p..s...c...s...s.c...p................................................s....s....s.c..+..t.p.....s.a..p...h..s....h....t...Y.......D...s...N...s...l.....Y......l..A..s..t.......u..p...p...p..s.h..p...hs...................................shs..s...K..s...p....s.......h.....E...l....s...A......p....h.......p....a........c.....F.......G....l......p....P.......p....l.......u......Y....h...p.....u.h...s...t.s......lps......................hs-.p..s..h.h...c.......l..s..V......G..A.pY...F...sK..p.....h..S.....s..h.....V.....s...h.......t.....h...............h.....s.......p...sp............................h.....s.....s...........s......h.....s.....u..l..GhhapF......................................................................................................................................................................................................................................................... 0 18 57 141 +2815 PF00746 Gram_pos_anchor Gram positive anchor Bateman A anon Pfam-B_457 (release 2.1) Family \N 20.50 17.00 20.50 17.10 20.40 16.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.04 0.72 -4.03 84 9438 2012-10-02 15:23:12 2003-04-07 12:59:11 16 692 1185 11 443 6888 16 38.70 27 4.73 CHANGED spssptppLPpTG..pssshhhshhGh.hhhssuullhht++ ...........t..tpptppLPpTG....ppss.s...hh..s..l..hGh..l.hhhhusllhh++............. 0 110 196 316 +2816 PF01271 Granin Granin (chromogranin or secretogranin) Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 27.70 26.30 23.80 24.90 hmmbuild --amino -o /dev/null HMM SEED 586 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -13.02 0.70 -5.80 9 290 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 53 2 101 264 0 329.00 22 89.82 CHANGED ts+s-p.Vh+CllEVlSssLsKsSPsPlo.EChEsLcpsccclpspEpppN.ssh.tl+hLpD.A-stt....stpt.su.s.....ph.h.s.....tEsGu+sREpps.........s...tcph...h..c..tpp..ccp.c.hph...h.-ttpccph.ctssElsEsh.s.pspAohccshpElu+h-s.sp.p.RE+scccpKs.p-ss--sh+spshPtEs.ssspstss.............pEuEEspspE.s+c..+.psc+scShsscpp.uGphsh--E.............................s..cptphspcp+shSuh..........hhspG+pppptptccuccp.tcth-cps..p-ls.osshps..p-t.+tlc..sGcp.ttts-sstchps.tsh--cscs-.sHsph.cs...tc-th.s+tPps.......phLshG-ph...G.....th...p......c.ppEEsh..................h.LpppahD.tpWpppcpp++t.hs-phLE..tEEcs-hshsM+spFPEh..s.............uYtKR....sPt.sL+RtactsupEDuLctaht.ts...sct.uEEp+c...ts.pE-soAN+...ht-EDpELEsLuAl-tELpKlA+capshRRG ...............................................................h.ph...ht.t..p..s.s...cshphlc..tpp....h.....p..p...t.s.............hp...p........t......t..t......................................................................................................................................................................................................t.............t.................................................t...t..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 4 11 33 +2817 PF00396 Granulin granulin; Granulin Finn RD anon Prosite Family \N 20.90 20.90 22.50 21.20 20.70 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.76 0.72 -3.86 94 1181 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 140 9 509 1067 2 43.50 48 31.13 CHANGED TCCphss.Gs....WuCCPhspAVCCsDthHCCPpGa....pCshpsspChp .......................TCCph.s..Gs......WGCCPhs.pAVCCsD+h.HCCPpGa....pCshpt.spCh..... 0 196 261 377 +2818 PF04495 GRASP55_65 GRASP55/65 PDZ-like domain Finn RD, Bateman A anon Pfam-B_3985 (release 7.5) Domain GRASP55 (Golgi re-assembly stacking protein of 55 kDa) and GRASP65 (a 65 kDa) protein are highly homologous. GRASP55 is a component of the Golgi stacking machinery. GRASP65, an N-ethylmaleimide- sensitive membrane protein required for the stacking of Golgi cisternae in a cell-free system [1]. This region appears to be related to the PDZ domain. 27.70 27.70 27.70 27.80 27.60 27.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.86 0.71 -4.19 37 638 2012-10-02 11:12:46 2003-04-07 12:59:11 9 5 271 6 397 620 5 118.60 38 48.11 CHANGED hpVassKstphRplpl.sP...............Sshass..................uLLGlolcasshp.ss-plWHVL-V..ssSPAthAG.LhPaoDYIlGss..........sllp..............................spcslhsLlEs+.s+.............................sLpLaVYNo-pDssREVslsPspsWGG-...GuLGCslGYGhLHRIPsh ......................................................................................................................................Ghthp.t....sspp.s..aHVLcV.p.ssSPAthAG.LcP.ahDaI...l...u.h.st......................shLp..............................-.s.-....s.L..hs...Ll..c.s.p..s+..............................Pl+LhV..Y..Ns...c..s...p.ss.R.E..V..plsPs.ptWGGp............G.LGs.l.................................................................................................. 0 138 198 303 +2819 PF04723 GRDA Glycine reductase complex selenoprotein A Kerrison ND anon DOMO:DM04874; Family Found in clostridia, this protein contains one active site selenocysteine and catalyses the reductive deamination of glycine, which is coupled to the esterification of orthophosphate resulting in the formation of ATP [1]. A member of this family may also exist in Treponema denticola [2]. 21.90 21.90 22.30 23.10 21.50 21.80 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.72 0.71 -4.50 3 223 2012-10-10 17:06:42 2003-04-07 12:59:11 9 1 110 0 49 188 4 84.70 42 95.03 CHANGED SlFsGKKVIIIGDRDGIPGPAIEECLKuIssEVlFSSTECFVUTAAGAMDLENQp+VK-ATEpaGAENLVVLlGAAEAESAGLAAETVTAGDPTFAGPLAGVELGLRVYHAVEPEFKuEVDuAIYDDQlGMMEMVLDVDuIIEEMpSIRu ........................................................................................................................................................................................................... 0 22 40 44 +2820 PF01272 GreA_GreB Transcription elongation factor, GreA/GreB, C-term Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain This domain has an FKBP-like fold. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.43 0.72 -4.27 137 7631 2012-10-02 13:30:10 2003-04-07 12:59:11 14 15 4301 28 1688 4424 1946 77.30 33 45.59 CHANGED hss.....spVthGupVsl..ps...ssspppsaplVGs..........pEuch...pps..pIShpSPlupALlG+c.hGDplplpsPsG.php.hcIlplph ...................................t..p.spVhhGupVpl....ts.....ss...s...-...c..p..papI.VGs...................................sE.ucs.........pps.......tIShpSPlu+ALlGKc.hGD....p....l.p.l.ps.PsG..p....hp..hcllplp.................. 0 512 1024 1380 +2821 PF03449 GreA_GreB_N Transcription elongation factor, N-terminal Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain This domain adopts a long alpha-hairpin structure. 21.80 21.80 21.80 22.00 21.70 21.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.29 0.72 -3.92 134 5904 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 4289 22 1237 3294 1838 74.30 42 42.85 CHANGED hp.....tp.h.lTtcGhc+LcpELcpLtpscRPclsctlspARspGDLuENAEYcAAKccQshl-uRIppLpppLsp.A.pl .....................p..h.hThcGhc+.LcpELcpLcpscRPclsptIstA.RuhG.DLSENAEYcAAK-cQuh..lEu.RIppL-ptLpsApl........ 0 399 789 1035 +2822 PF01184 Grp1_Fun34_YaaH GPR1/FUN34/yaaH family Finn RD, Bateman A, Wood V, Studholme DJ anon Prosite Family The Ady2 protein in (Swiss:P25613) is required for acetate in Saccharomyces cerevisiae, and is probably an acetate transporter. A homologue in Yarrowia lipolytica (GPR1) has a role in acetic acid sensitivity. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.36 0.70 -5.04 58 1687 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 1218 0 658 1121 22 193.00 37 86.80 CHANGED FG...Gshs....PG.........htP.sp+phANPAPLGLsuFulTTFlLuhaNspshGlssPNllVuhAhFYGGlsQhlAGlWEhthGN...TFGuTAhoSYGuFWlSauslhls..sFGIhsAYtst.t...........hssAlGhaLluWsIFThhhhlsTlKSTluFhhlFhhLslsFlLLAsuph.......osstsltpAGGhhGllsAhhAaYsAhAGlAsppNSa..hh..sh.lPhsp...pt ..............................hs.............................p+hA.N..PuPLGLhGF..uh..TTll..L.s.laN..s.....Gh........h..s..l.....s...u.......l..l..l........u...hu...l.FYGG.luQlhAGlhEa++G.N.......TFGhTAFoSY.....Gu...........FWLoh.s..s..l.l...l.....h....phGls...su..............................................s..ph.l.G..hY......Lhh....W.u.....lFTh..h.M.hh...u..T...L+.s............sh.sl...h.lFh.sLslhFhLL..Alush........................................s.u.s...t.s.l.hp...h.AG..ahG..llsuhsAh..YhAhutllN.pphup.........h......tt............................................ 2 200 379 560 +2823 PF01025 GrpE GrpE Bateman A anon Pfam-B_817 (release 3.0) Family \N 25.80 25.80 26.60 26.00 25.40 25.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.97 0.71 -4.70 111 5353 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 4825 8 1413 3716 2850 180.10 28 89.19 CHANGED pp.............ttttpsphpshppp............lpp............................hcp.......chc.....ch..pcphh..........RthA-h-.Nh++Rh..p+-hp.phpcauhpphhpc...LLsllD...sl-+Alph...........pspphp.......slhcG..lchshcphhphL.pchGlptlss.h.GctFDPphH-..Altph........tssp.hts.ssllplhppGYplp.-R...llRsAhVhVup ..............................................................................................................t.tttptpppt........tttt.tt..ppc........................ltp........................................h.pt................ph.p...............-h....c-chL.....................R..hp..AE.h-..N.......h...+.......+Rs........pc-hp.......psp...c..a..u....hpch.s.p....-.......l.L.PllD...........sL-RA.lps...............tstphp..............................................sl.hcG...lch..shcpl.hpsl.cc..h.Glctlss.......t...G......p..t..FDPshHp...Altp..h...........ss..sc....hts....ssl.spVhQ+GY..p..Lp..-.R.....llRPAhVsVu................................. 0 485 928 1199 +2824 PF02955 GSH-S_ATP Prokaryotic glutathione synthetase, ATP-grasp domain Griffiths-Jones SR, Bateman A anon Structural domain Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -5.13 37 1779 2012-10-10 13:17:03 2003-04-07 12:59:11 11 6 1678 4 426 2863 2533 171.60 52 54.80 CHANGED EKLa.stpFs....chh.PsTLVopchpcl+pFhpcpsD..lILKPLsGhGGpulFRlpps.DsNlssllEhhTptsp..p.lMsQpalP-lpcGDKRIlllsG-Pls......ulsRlPt.tGEhRuNlAsGGpucsppLoc+-hcIsppluPpL+c+GLhFVGlDVIGshLTEINVTSPTGlpEIcphts .....................................................EKLa.ss.Fs....-l.s.P.T.LV.T..R...s....t.s...p..l..+s....Fhp....c.....H......u..D.....lI...LKPL..D....G...M..G..G..uu..I..FRlcp..........s.....D................s.............N..........L....u..........s.....I........l...E...o....L...T.....p...t......G..p...............c..h..sM...AQpY......L....P....s..I...c..c.....G....DK....Rl..LlV...DGE.P..VP................YsLA.R.....I....P....p.....s..G..E...s...R......G.....N.L...A...A..G...GRG......E...s...p..s..Lo...-....p.Dhc....I.Ac.p.l..G.P...s..L.+.c+.G.LlFVG...LD..l..I.....G.D.....h.....L..T.EIN..VT.SPT..ClREI-t..t.................................................................................................................... 0 103 240 337 +2825 PF02951 GSH-S_N GTS_N; Prokaryotic glutathione synthetase, N-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.30 21.30 21.60 21.70 20.80 21.00 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.54 0.71 -4.30 155 1672 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1622 4 395 1136 1521 119.20 46 37.80 CHANGED l+luhlMDPlpslslt+DSoaAhhhEAQpRGHplaahpsscLth.c.....sG..........cshApspsl.plppsts..........saa...phuptp.phsLuc.hDllLMRpDPPFDhpYlhATalLEps.....pp....tusLVlNcPpulR ..............lKlullMDPIusINI.cKDSoFAMhLEAQ+RGa-LaYh-.sDLhl..p......sG.................csp.Ac.s.+sl....pVc....pshs...........cWa.....ph.s..s....cp....clsL.u-.lDVILMRKDPPFDh...E......a.I..Y.A.T.YlL..E+.A....Ec.....pGsLlVN+PQSLR.................................... 0 89 217 308 +2826 PF03917 GSH_synth_ATP Eukaryotic glutathione synthase, ATP binding domain Mifsud W, Griffiths-Jones SR, Finn RD anon Pfam-B_2922 (release 6.5) Domain \N 25.00 25.00 25.50 25.20 24.00 24.00 hmmbuild -o /dev/null --hand HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.16 0.70 -5.87 10 535 2009-01-15 18:05:59 2003-04-07 12:59:11 12 14 352 15 323 538 43 395.70 31 92.73 CHANGED h-hcphDpchLpcLshDAlsWAhLpGLlhs-+osp+SGsVsuV.lsauPlsLLPoshPcutacQAsplpPlFNELVDRVS.DusFLppoLS+TpKs.....D-FTuRLl-IHpKh......Lcp..NKKp-lRLGlaRSDYMlD...-pTpu....LhQIEhNTISsSFuGlushlopLHpplLpphsc...thul-upplPsNsulpthA-ALAKAWscY...............ssPcAlllhVVQsEERNhaDQ+hlsupL+c+atlsshR+slAEl-ptuclpsD..........hsLhlsGQtVAVVYFRuGYoPsDYPSEpEWcARLLlEpSpAlKCPsIuhHLsGTKKIQQELA+PGVLERFL-sK.--lA+LRcsFAGLWSLDDo.....-lV+pAlEcPEhFVhKPQREGGGNNlYG--l+psLh+Lpc..oEEcAAYILMp+IhPpss.pshLlR.sGhhcpscsISELGIaGsYLRN..+DcVlhNppSGaLlRTKssoSsEGGVAAGFAVLDSlYL ..........................................................................................................................................................hh.thh.hsh.pGhhhh......................................................sPh..oLh..Po.hPcthappuh.t.lp.haNpLh.s.t.lu......p..s...t.aL.tp.hl.......p.p...hh...ps......................D.p.Fsu.p.Lhplatph.................................hpp......sh.h..p.t..l...............LG..lhRSDYMhc.......tts...t.................................l+QlEhNTIusS.Fuu.lu.shsstlHph.lhp.....h..h................................p............t.......p.l.s.t..N..p.s.htt.l...utulstA..hp..ass.pss.hhhh....h.h.ppth.t.th......hhhhth......t.................................................................................th...t.thsshhatsshhstph.spttWpsRhhlEpStAlKCPsl.hpLsGoKKl.QQ..L........u......p.......s................s.............h....L..p..pF..l.......................s................t......p....p..h.tp.........lcps..F......ss.has.....L..-...............s.......................s................p......h..................h....t.....Ah......p..p..P..p...paVLKPQR.EGG..GNNlYtpp..l.phLp.p..ltp................pchsu...aILMphI.....P.....ts..hh.l.....R....s.....s..................h............................t.p.h........lSELGlaGs......hlhp................tt...p......l.......lhN.....p.p..sGaLlRTK.t.pssEGGVuuGhuslDo.hL................................................................................................... 0 114 178 269 +2827 PF03199 GSH_synthase Eukaryotic glutathione synthase Mifsud W anon Pfam-B_2922 (release 6.5) Domain \N 20.90 20.90 23.50 21.80 18.30 17.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.42 0.72 -3.94 51 469 2012-10-01 21:12:38 2003-04-07 12:59:11 10 12 345 15 283 471 21 103.90 38 21.70 CHANGED spslVLhVVQssERNhaDQ+hlEhtLhcct.t.IpolRtThs-lpppsplt...............ss..t.....pL.....hlpu..t-VuVVYaRuGYsPsDY..ss.......-s..pWpARLhlEpSpAIKCPolhtpLu ..............................................s.pssllhlVQtsE.RNhaDQ+hl.....Eh...pLhc.......p........t....t.....lps..l..R......hohs-.lt.pp.s.pls.........................................................ss....p..............pL.....hl.s.s........tEVuVVYaRuGYsPsDY...so.........c.p..pWcARlhlEpSpAlKCPolthpLs............. 0 96 155 234 +2828 PF00255 GSHPx Glutathione peroxidase Finn RD anon Prosite Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.37 0.72 -4.68 12 4817 2012-10-03 14:45:55 2003-04-07 12:59:11 14 11 3035 33 1412 4342 2461 104.50 45 59.73 CHANGED hY-asshslsG.p.hshspa+GKVlLIVNVAShsGhTs.pYppLptL.c+htspGLsILGFPCNQFGcQEPupscE....lKhhpsst...atssFslFpKl-VNGpstcPlYpaLK ................................hashp.sps.hpG.pshsL..p.p.Y..c..G..K..V..lL..l..V..N.s.A........S.c.......C...G.h.T............s....QY..p...t...L..............p......pLaccY.....p...c....p...G.........h........h.l...L..G........F..P.......C..N...Q.F.s.........t...Q...E..P.G.....o.scE.....................Ipp..aCphs...........a.G.V.o..F.P.hF.sK...l.-VN...Gp....s.s..c....P..LapaL........................................................... 0 446 800 1119 +2829 PF03738 GSP_synth Glutathionylspermidine synthase preATP-grasp Bateman A anon COG0754 Family This region contains the Glutathionylspermidine synthase enzymatic activity EC:6.3.1.8. This is the C-terminal region in bienzymes such as Swiss:P43675. Glutathionylspermidine (GSP) synthetases of Trypanosomatidae and Escherichia coli couple hydrolysis of ATP (to ADP and Pi) with formation of an amide bond between spermidine and the glycine carboxylate of glutathione (gamma-Glu-Cys-Gly). In the pathogenic trypanosomatids, this reaction is the penultimate step in the biosynthesis of the antioxidant metabolite, trypanothione (N1,N8-bis-(glutathionyl)spermidine), and is a target for drug design [1]. This region, the pre-ATP grasp region, probably carries the substrate-binding site [2]. 22.40 22.40 22.50 22.40 21.40 22.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.42 0.72 -3.73 159 2307 2012-10-01 21:12:38 2003-04-07 12:59:11 9 6 1331 17 262 1223 27 99.70 35 22.57 CHANGED tl.aFssh....ssshEDttTs..taLpcsAppA.............Ghp.sthls.lc-.lshs.............t..pGth.hDh-s...p.lchlFK.LYPWEhhhc-.......................p...hustl..thtp......sthlEPsWphlhS ...............lahsss..c-shEDcsTs..pYL.p-sAppA.............Ghp...ocalhl--.lGhs....................t...tGph..s.......Dh..-s........plIpslFK.LYPWEahhc-..........................c.......hsshLhtp..t.............sthlEPhWpsIlS................................................ 0 56 147 206 +2830 PF00437 T2SE GSPII_E; Type II/IV secretion system protein Finn RD, Bateman A anon Prosite & Pfam-B_2215 (Release 8.0) Family This family contains both type II and type IV Swiss:P54907 pathway secretion proteins from bacteria. Swiss:P07169 VirB11 ATPase is a subunit of the Agrobacterium tumefaciens transfer DNA (T-DNA) transfer system, a type IV secretion pathway required for delivery of T-DNA and effector proteins to plant cells during infection [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.85 0.70 -5.50 23 13216 2012-10-05 12:31:08 2003-04-07 12:59:11 15 70 4434 45 3599 12796 4731 263.40 26 60.45 CHANGED tthhhcsl..ctssoDIhlps.tc.lhl..Rlsu...hlpplhp.stptsttllpRltshuph-IuE++...Ducls.......suhchclsh.Psshu..tp..lslR..cpsstth..sLpcLuhssshp.................................pthtchLcps...+s.ILVoGsTGSGKTThL...AhLstlssp..pcpIlTlEDssElpl.puhsplplps+.......sslThschLRuhLR.cPDhIhlGElRDtEshcl.lpAspTGH.s.loTLHsNSAhuAlpRLpphu........ls.h.lps.ltsl..ls.+LsRph .....................................................................................................................................................hh......h...p....ts...oDl.h..l.....p....s..............t...........p...hhl.....+hst...............h.h..t..h....................h......s.......p...........h..t....t....l.......h.t....p.....l....t.....h...h.....s.....t.....h..............p.....l.....s.....-......p.........p..................-..u.....p..ht..........................pshch......+.l.s..h....h.s....t.ths...........p............hs...l.R............ph..s...t.t.hh.....sl.p.p..L..s..h......s...............................................................................t.t..h...t.p..h...l...p..p...s..................p.....G.....h.l...L.l..o..GsTGSGKT.T.o.L.......t.u.h..l..s.....h...l.....sps...........tp.......+.I.....l....T....l....E.......D....P...l...E.....h.......h......p........t........h.....p.......p.....l...p..lppc........................ssh...o...a...s...s...s...L+..u.s....L.R....p.c....P..D..l.I....llGE.l..R.D.t.......E..T.h.ch.slp.A.u.p...T...G...H..h..l...h..o..TL...Hs.s...s.utp.s....l.s.R..L..h...s..hs.............ht..........l...p.p...t.l..t..t..s........hs...pl...h............................................................................................... 0 1167 2366 3072 +2831 PF05157 T2SE_Nter GSPII_E_N; Type II secretion system (T2SS), protein E, N-terminal domain Yeats C, Desvaux M anon Yeats C Domain This domain is found at the N-terminus of members of the Type II secretion system protein E. Proteins in this subfamily are typically involved in Type 4 pilus biogenesis (eg Swiss:Q9X4G8), though some are involved in other processes; for instance aggregation in Myxococcus xanthus (Swiss:Q9RF11) [1]. The structure of this domain is now known [2,3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.07 0.72 -4.01 110 3482 2009-01-15 18:05:59 2003-04-07 12:59:11 10 45 1888 2 1060 2838 803 107.90 18 19.45 CHANGED Llp........Ghhs..........................................p......pp.............................lhpsLucp.............................hslsh........lshpshphs.p..hhth.....lshshhpctthlPlp..hc.s......splhl..AhscP...hs.hphh-plphh.hth.plp..hhlsspsplp.phlpch....hspptst .............................................................................................................thls..........................................tpp..........................................lhphlut.............................................................hshsh..................hs.l.p..t..h...t..h..s..t....hhth.........hstphsp+apslP.lp.....tp..s........................sp..lhl...A...hsDP..hs.htsl.-.s.lpht.sst..plc........hslsscpplp.ptlpchht.....p......................................... 1 361 719 946 +2832 PF00482 T2SF GSPII_F; Type II secretion system (T2SS), protein F Finn RD, Yeats C, Desvaux M anon Prosite Domain The original family covered both the regions found by the current model. The splitting of the family has allowed the related FlaJ_arch (archaeal FlaJ family) to be merged with it. Proteins with this domain in form a platform for the machiney of the Type II secretion system, as well as the Type 4 pili and the archaeal flagella [1]. This domain seems to show some similarity to PF00664 but this may just be due to similarities in the TM helices (personal obs: C Yeats). 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.29 0.71 -4.21 340 14010 2009-09-11 10:25:30 2003-04-07 12:59:11 18 9 3954 8 3638 10916 2531 123.70 19 56.05 CHANGED hhcplushlp.uGlslhpulp..hlspptt..........pthhpptlppltpplptGt...sh..s...puhp................phs.....hh.sthhhthlpsu-puGsLsphLpphuphhcpphphp....hchhsshhtPh......hhlhluhhlhhhllshl.ls ..............................................hcpLuhhlp.uGl.slhpulp....hlsppht........................sthhpp.h.l.p.p..l..tp..p.l.....p.p.Gt.......sl.......s......pulp....................................phs......hF..sshh...ht....h.l.t.sG....E....p.....u.G.p.....L..s.....ph...Lp...pluphhcpphphp...........tc...hhts..h..h.Ph...........lllhl.uhhlhh.hllshllP.................................................. 0 1183 2343 3069 +2833 PF00263 Secretin Bac_GSPproteins; GSPII_III; Secretin; Secretin_C; Bacterial type II and III secretion system protein Finn RD anon Prosite Family \N 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.74 0.71 -4.86 155 5833 2009-01-15 18:05:59 2003-04-07 12:59:11 16 82 2114 0 1392 4693 1363 161.40 27 28.57 CHANGED ALppsupsclLusPplhshss......ppAplp.lGpplshh....sspss........ssss.........................hp..hp...........................ssGlpLp.lsP........plss.ssp...lpLplp..phoshss..t..............................................sssPs.....lsp.+plsT.plhlcsGpTlllGGllpcppppspspVP....hLu-IP....llGt.LF+spspppp+sELllhlTPcl...lp ..........................................................................................ALpppupspllu.sPp.lhshss.......................ppA.lp.sG..p.p.lPh.h..........ps.............................s.ss.ss.............................lp....hp.................................................................................................................ps..GltLc.V.s.P................pltp...ssp............lpL.p.lp...phus.hsp..stp..........................................................................................stsss....lsp..+plsT.....pVt.l.p.sGp.o.lllGGlhpcp.ppp..shsp....l.P.....hL.GD.IP........llGt....LF+pp.sp..p.pp+ppLllhlpPpll....................................... 0 408 827 1132 +2834 PF01203 T2SN T2SP_N; GSPII_N; Type II secretion system (T2SS), protein N Finn RD, Bateman A, Desvaux M anon Prosite Family Members of the T2SN family are involved in the Type II protein secretion system. The precise function of these proteins is unknown. 26.00 26.00 26.10 26.60 25.80 25.80 hmmbuild --amino -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.42 0.70 -4.77 42 400 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 393 0 109 339 61 211.50 25 84.42 CHANGED hslsps.lslsulsGolWpGpspplphp...sh.pls.pVpWslsshuLltGpsphslphup....tt.slpG.pG.l.shuhu.uhtspslphshssshlhphh....hshPlps....sGplpl.....tlppht.....upshCppl.pGplhhp.suslpsshGsl.sLG.sltusluCp.s.uplhsphppsssplslshpsplp..sstpaphsuplc.sssshPsslppsLshlGps.cs...pGtashphpGRl ..................................t.lplssspGolWpGp.A..s.l.thp...................sh...h.....G..plpWchphh....sLlhGphphclchsp.......tlpu.tuhl....shuh.s..uhhspshthslPuu.hlhshh.........hPh.s.lph..pGplpl..........slpphp.......hups.hspph.pGplhhp..tspl....p....usl....u....s....l....sLG....sltusls...sp..s..uslslpl.....sp...p..s.st...lt.hphphslt....sss..paphpuhhp..s...tsp.s....t...s....Ltt.LshLGps.su...pGt.h.h.................................................................... 0 28 57 83 +2835 PF03958 Secretin_N NolW-like; GSPII_III_N; Bacterial type II/III secretion system short domain Yeats C anon Yeats C Domain This is a short, often repeated, domain found in bacterial type II/III secretory system proteins.\ \ \ \ \ All previous NolW-like domains fall into this family. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.50 0.72 -3.80 231 6199 2009-01-15 18:05:59 2003-04-07 12:59:11 12 49 1721 11 1290 4691 1113 81.90 24 21.96 CHANGED spll.Lphssus-...............lsshLpph.h..................................................................................stph.pltssspsNslllp.ussptlpplpp.llpplDhsttp ..............................................................................................................pll.LpausAp-...................................ls.plLppl.h................................................................................................................................................ss.ps..plssD..pc.oNslllp.ssssshpplpp.lIppLDh..t..................................... 0 370 706 1018 +2836 PF02501 T2SI GSPII_IJ; Type II secretion system (T2SS), protein I Mian N, Bateman A, Desvaux M anon Pfam-B_2607 (release 5.4) Domain The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for the transport of proteins across the outer membrane first exported to the periplasm by the Sec or Tat translocon in Gram-negative (diderm) bacteria. As members of the T2SJ family, members of the T2SI family are pseudopilins containing prepilin signal sequences [1]. 25.00 25.00 25.60 25.40 22.90 21.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.50 0.72 -4.01 100 934 2012-10-03 10:38:27 2003-04-07 12:59:11 12 4 817 9 179 576 128 81.20 28 65.20 CHANGED tLc-+slAtalA-NplsclpLp.pthPshu...tppupsphuGppWhhphp.ltsTsss.p.......hpplclpVt.........spp.pssslspLssalsp .................Lc-+slAsWlA-Nphspl....pLt...pth.pss.........pppGpsphuGppWaW+pp..shsT...sss.h........lptl-lpVs........ppctsssltphpuhh.................................. 0 34 83 132 +2837 PF03934 T2SK GspK; Type II secretion system (T2SS), protein K Bateman A, Desvaux M anon COG3156 Family Members of this family are involved in the Type II protein secretion system. The T2SK family includes proteins such as ExeK, PulK, OutX and XcpX. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.75 0.70 -5.24 106 1236 2012-10-03 02:11:09 2003-04-07 12:59:11 8 4 993 1 289 1639 405 262.20 26 82.09 CHANGED c.phphcpspshhttpQAhahuhuupshActlLppsh................hspLsph.WA...ts..h.hsl-.....................p.GplsuplpDhpupFNLNsL...............................stsst.................................hpshshptappLLpsLuls......t.tupplscslhDalDsD............................pt..t..GuE-s.Y..tutsssahs.usp.hssluELphl.Ghosphhp+LtPalssLP......s..pthlNlNTh....sA.lLsAlhss.....lshspAppllpp..........R..sts.sap..sls-ahst............lss.......t.....hpstls...lsSpaFtlpup....sphspsphphpollpps...............s.ssshs.l ......................................................................................t.phtpsps..phtQAhahuhu..u..p.p.hAh.t.h.Lppshpppt.................hspLsp....WA............ts....t....h....ls...................................................................p..up..l..psplpDtpupF.N..LNsL......................................ht..s.....................................pshshphhptLl..t.p....l...s.ls...............thpuctl..spul.h-alDp.D..................................pps...t.hhG.s.EDs.Y.....ut.s.ssahs...usp............htsluE...L+hl.p..Ghs...s..........t.l....h..........p.......+.........L..tPhV....s........s........LP...........sphpl.NlNTh...............pu...lLtAlh.s......lo....s.pA..ptllpp...................R..........stp..sat....s.hsph.htt............................lss............htt............hpshls..l.p.Sp.aF..lpsp....hhh.sp.p.hp.hpoll.t.t.p.....hh............................................................................................................... 0 83 161 229 +2838 PF05134 T2SL GspL; Type II secretion system (T2SS), protein L Moxon SJ, Bateman A, Desvaux M anon Pfam-B_6494 (release 7.7) Family This family consists of Type II secretion system protein L sequences from several Gram-negative (diderm) bacteria. The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for extracellular secretion of a number of different proteins, including proteases and toxins. This pathway supports secretion of proteins across the cell envelope in two distinct steps, in which the second step, involving translocation through the outer membrane, is assisted by at least 13 different gene products. T2SL is predicted to contain a large cytoplasmic domain represented by this family and has been shown to interact with the autophosphorylating cytoplasmic membrane protein T2SE. It is thought that the tri-molecular complex of T2SL, T2SE (Pfam:PF00437) and T2SM (Pfam:PF04612) might be involved in regulating the opening and closing of the secretion pore and/or transducing energy to the site of outer membrane translocation [1]. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.33 0.70 -5.18 19 962 2012-10-02 23:34:14 2003-04-07 12:59:11 8 5 829 4 181 775 109 221.90 22 57.66 CHANGED Lhl+Lsssstpshph......hpstttphhtpG....phsstpshspls.hssts.shlllPusslhhpplsLPstst+phhpsLPahLE-plApDl-plHhslh.............stps-.ptpVssVc+phhptWLshhpptGlssppllPDshsLPh..sssuhsshphss..................................................pWLlRpstttGhus-tphhshhhtt...........ltshsssPt.s.s...........htsts.pshhhlhApshht...sshsLhpGtFs.p .....................................................................t...t..h...W...................tt....htpG.............hss..s.....p...p.L..tt....h.........t.....t...ts...lhl..l..lPuptlh.hhplpLP......s.......h....t...t.........+..........p...h...ph.Lsh.l.lE-plsp..D..l-p..l..H..h...slh.........................st..p.tp....ts....pV.s..s.l.c..p..phhcphlphLpts.Gl.sh.spl.........hP.........D....h........h....s.....l.Ph........t.tuhs..s.h.p.h.t.t...........................................................................phlh.Rts...tt...p......Gh...u..h......s..h.......s.hh.sh..h..ht.t...hspt..................ltshs..s.Pt..h.s....................................httt.s......tt...........h.......h.....hh.t....hht....sphsLhpG.ap...................................................................................................................................................................................................................... 1 37 88 134 +2839 PF04612 T2SM GspM; Type II secretion system (T2SS), protein M Mifsud W, Desvaux M anon Pfam-B_5302 (release 7.5) Family This family of membrane proteins consists of Type II secretion system protein M sequences from several Gram-negative (diderm) bacteria. The precise function of these proteins is unknown, though in Vibrio cholerae, the T2SM (EpsM) protein interacts with the T2SL (EpsL) protein, and also forms homodimers [1]. 25.90 25.90 25.90 26.00 25.80 25.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.74 0.71 -4.44 63 1099 2012-10-02 17:03:51 2003-04-07 12:59:11 7 7 852 2 233 747 121 149.60 22 84.64 CHANGED hpplpthWpshssREppllsshushLhlslhYhslhpPhppttppspppltptppplshlpppusphpthp.ssssssstst.........uLpsllspoupptGl....slpRhpspGc.plpVtlcsssFssllsWLspLppppGlplpphclsct..............ssGhVslphhh..tts ........................h...lppaatshosRE+thlhssushlhsshhah....h..l....hpPh....ppphpp...t...ppplpphpp.hshlpspAs.plt...shp...tts..ss...pst...................tlspllsposp.tts.l............sl.t.p...lt....p.........u.........c................p........lplhlp.sssasslhpWLstl.p..t.p.h.u..l.plpphpls.tst.............tsG.lplpth.....s......................................................................... 0 63 127 185 +2840 PF00043 GST_C gluts; GST; Glutathione S-transferase, C-terminal domain Eddy SR, Griffiths-Jones SR anon Overington Domain GST conjugates reduced glutathione to a variety of targets including S-crystallin from squid, the eukaryotic elongation factor 1-gamma, the HSP26 family of stress-related proteins and auxin-regulated proteins in plants. Stringent starvation proteins in E. coli are also included in the alignment but are not known to have GST activity.\ \ \ \ The glutathione molecule binds in a cleft between N and C-terminal domains. The catalytically important residues are proposed to reside in the N-terminal domain [1]. In plants, GSTs are encoded by a large gene family (48 GST genes in Arabidopsis) and can be divided into the phi, tau, theta, zeta, and lambda classes [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.00 0.72 -4.00 58 13485 2012-10-03 01:14:49 2003-04-07 12:59:11 20 103 2532 701 4893 16003 4903 96.70 16 41.41 CHANGED hhshthphhhhshtts................................pppc.thppsppclhchlphhpphLp......sppah.sG-phohADl..hhhshlt...hhhthphshh.........spaPpLpsahp+lhsps ........................................................................................................................hh................................................................t.........h...t...t.....h..h..p....p.....h...p....c....h.....h..p.h....l....-pt.Lt.............ppsa..l....s.......G......-.....p...h.olA....Dl......s...h.....h...sh..lt............hh.....t...t..h.t..h.s.h....................sphP.p.lptahpclttps......................................................... 1 1323 2464 3783 +2841 PF02798 GST_N gluts; Glutathione S-transferase, N-terminal domain Eddy SR, Griffiths-Jones SR anon Overington Domain Function: conjugation of reduced glutathione to a variety of targets. Also included in the alignment, but are not GSTs: * S-crystallins from squid. Similarity to GST previously noted. * Eukaryotic elongation factors 1-gamma. Not known to have GST activity; similarity not previously recognised. * HSP26 family of stress-related proteins. including auxin-regulated proteins in plants and stringent starvation proteins in E. coli. Not known to have GST activity. Similarity not previously recognised. The glutathione molecule binds in a cleft between N and C-terminal domains - the catalytically important residues are proposed to reside in the N-terminal domain [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.47 0.72 -3.80 53 5748 2012-10-03 14:45:55 2003-04-07 12:59:11 15 61 1695 674 2420 16335 5075 72.80 24 29.98 CHANGED hslphashps..tspthRhhLt...htGlpaEt..phhsh....t.....spahphpst...............splPhl.ps......shplspShAIhcYluc ......................................................ha...h.s......ts.t.h.t..h..h.Lp......thG..l...p.....a...-..h.............h..h...l..........s..h..........t...t.....t....p....ht.......sc.a.h..p..h..p.Ph..........................upl..P......sl...p...cs................sht...lh...E.......S.s..A.IhpYLu..................................................................... 0 799 1279 1900 +2842 PF00735 Septin GTP_CDC; Septin Bateman A anon Pfam-B_440 (release 2.1) Family Members of this family include CDC3, CDC10, CDC11 and CDC12/Septin. Members of this family bind GTP. As regards the septins, these are polypeptides of 30-65kDa with three characteristic GTPase motifs (G-1, G-3 and G-4) that are similar to those of the Ras family. The G-4 motif is strictly conserved with a unique septin consensus of AKAD. Most septins are thought to have at least one coiled-coil region, which in some cases is necessary for intermolecular interactions that allow septins to polymerise to form rod-shaped complexes. In turn, these are arranged into tandem arrays to form filaments. They are multifunctional proteins, with roles in cytokinesis, sporulation, germ cell development, exocytosis and apoptosis [2]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.72 0.70 -5.39 14 2609 2012-10-05 12:31:08 2003-04-07 12:59:11 13 25 286 15 1492 3139 269 234.50 37 62.73 CHANGED +GhsFTLMVlGcSGLGKoThlNoLFtosLhss.....................pth.tst-.ctcpTlcIc.tpps.lEEc.Gl+LpLoVlDTPGFGDslsN.spsWcsllcYI-cQa-pYLcpEsplpR.pphhDsRVHsCLYFIsPsGHGL+PLDlthMKpLpp+VNllPVIAKADoLTscElpphKppIhp-IcppsIcIap..Pss-ps-..........-E-.hppscpL+pslPFAllGSsphlEt..cGcpVRGRpYPWGlVEVENssHsDFltLRshLlpTHlpDLp-sTpchhYEsYRocpLpshthtscs .......................................................................pGhpFslh.s.s...G.po.GlGKSTl....lN...o...L....F.t...o....p..h....t..t................................................................................tp.......php..p.....s.l......p.l.p..........t........p...h....p......l......p....E..p.....s..l...+.....l...p..LT.....ll.D..T.s.G....F....G...D...t...l.....s.s...p..............p.......s....a.p.....s....Ih......c.a..I.-....p....Q..a.....-.....p.....Y........Lp....c.........E...........p.....l.....p.....R.........h.....p.....h.....................D.s..Rl.........Hs..CLY........F.I.s....P..........s....G.......H.u.L+.s.LDl.t..h....M.Kp...L...pp..+.......V..N....l.....l.P...lIA.KADo...lo...p.Ehpph..Kp..p........I.h..p-l.t.......p.ps....l..p...........l..Yp......s..t.....p..p.-p..............................................t.....h......p.....stp.....h...p.t....h............P.FAV..lGSp..p....h..p...........h.........t.......s.....+.......h.....l....+....uRpY...s..W.G.h...l.p...V...EN.......p.HsDFhhLRp..h.Ll...p.s.php...DLp-.TpphaYE.taRt.ppLttht.....tst............................................................................................................................................. 4 422 705 1111 +2843 PF00925 GTP_cyclohydro2 GTP cyclohydrolase II Bateman A anon Pfam-B_1147 (release 3.0) Family GTP cyclohydrolase II catalyses the first committed step in the biosynthesis of riboflavin. 23.20 23.20 23.60 23.20 23.00 23.10 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.07 0.71 -5.01 161 5362 2009-09-11 05:22:16 2003-04-07 12:59:11 15 28 3848 3 1428 3867 2940 160.90 39 47.23 CHANGED lcchspsplsTpa.....G...pFchhsac....sthssppH...lALlpG.......ch.s.sspssLVRlHupshsuDlhuoh...ps-sutpLcpAhctIuc..................................p.Gt.....GVllaL.t...pcucuhuLh...s+lpshthp.cp.uhcshcss..............DhRsY.uluAQILpcLGlp......cl.+LLoNsP.c.KhtuLpuaG..lcVsppl.sh ....................................................................................................phhspsplPTta.....G.....pFphhuac..................s...h..h..s.........s.....p...c.H....................lALlhG.................................................sh....s....s......p....ps..lLlRlHS.EClTGDlhtSh........RC.D...C...G.........QL....c....tAhp....t....I....sc...................................c..Gp....GlllYL.c....QEG..R.G..I..GLh...sKl+...A...Y...t....LQ...-p....GhD....T....l....-....A....N.htLG.h.....................ssDhR.-a.s.luA.pIL+.p.LGlp............cl.RLLT....N...N..P...c..Khpu..Lp..sa.G....lpls-cls........................................ 0 444 916 1213 +2844 PF01227 GTP_cyclohydroI GTP_cyclohydro_I; GTP cyclohydrolase I Finn RD, Bateman A anon Prosite Domain This family includes GTP cyclohydrolase enzymes and a family of related bacterial proteins including Swiss:Q46920. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.06 0.71 -4.99 104 4297 2012-10-01 20:59:24 2003-04-07 12:59:11 17 9 3743 160 1127 2999 2646 175.90 44 84.59 CHANGED hcpul+plL.pslGEDPsREGLh-.TPcRVA+ha.pElasG.hppss....phl.......sphFp.....................hs-h..Vll+cIphhShCEHHhlPFhG.pAHlAYl.P..s.s+VlGLSKlARlV-hau+RhQlQERLTtQIAsslpch.LpPcGVuVll-AcHhCMshR.GVc+ssupThToshpGhFcs-spsRpEFhsllp ........................................h.ptsh+plL.ptl.G..E...Ds..s...R-.GL.h-.T.PpRlA+hY...pElF..u..G.hptss........tplh..........ss.h.Fc..........t.................................h-EMVlV+DIs...haShCEHHhlPFhG......+AHVAYI.P....s..s+V......lG....L..SK..luR.....lV-haA+R.......Q...lQERLTpQIApAlt....ch.L..........p....sp.........G.......VuV.ll.EA.pHh.C.Mph..R..G.Vc.K.sso...tTsTosh..tGh.Fcp.stpsR.pEFLphl.t.......................................... 1 354 686 946 +2845 PF04670 Gtr1_RagA Gtr1/RagA G protein conserved region Waterfield DI, Finn RD anon Pfam-B_4577 (release 7.5) Family GTR1 was first identified in S. cerevisiae as a suppressor of a mutation in RCC1.\ \ \ Biochemical analysis revealed that Gtr1 is in fact a G protein of the Ras family. The RagA/B proteins are the human homologues of Gtr1. Included in this family is the human Rag C, a novel protein that has been shown to interact with RagA/B [1,2,3,4]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.53 0.70 -5.22 32 723 2012-10-05 12:31:08 2003-04-07 12:59:11 7 10 282 7 491 12357 922 216.90 39 64.20 CHANGED KlLLMGhptSGKoSh+pllFsNh.sp-ThpLusTl............................cl-pspl+.hutl..slslWDhsGQpsah-s...hhttpp-pIFpsVt......sLlaVh.Dsp........................p..-...htcsltpappslptlhphsPst+laVLlHKhDhlp...c-hRp-hacchtpclpcpsp.........sht.t.................................lshahTSIaDcSLYcAaSpIVppLI.PphsslEphLpphsptsss-clhLF-psohLhlupsstp.sc....t.............................paEphSshI ................................................................................................................+lLLMGhp.tSGKo.........S.......h........p.....p....l......l.............F.........p............p.....h..........s.....p........-....T........h......h.........L.....t...u........T..............................................................c..l...p....+...s.....c...l...p....................u.........l............sh..p.l.W..............Dh.s..G......Q...........s....F.....h..-s.............h............p......p..-...pI...F..p....s....l.s................s.LI..a.V..h..Dsp...................................................c......-.......h..c..s.l..p.p.h...p......p..s...l.....p..t......h.....h.....p......h.....s.......P........s....h.....p........l........l...h...l....H...KhD.....h.....lp..............-.D.....p....+......c..h.........c....-.......h....p...p...c.....l.p....cp...t........shs...........................................................................................ls.h.ahT..S.I.....a.D........c.S....l.a.c.A..a.......Spl...V...pp..L..I...P..plssLEshLp.hsp.....................ph-c...sh...............LF-hsohl..hIusss.p.s..c........hp..........................................paEhhSshI............................................................................................................................................. 1 177 261 390 +2846 PF04138 GtrA GtrA-like protein Kerrison ND, Finn RD anon COG2246 Family Members of this family are predicted to be integral membrane proteins with three or four transmembrane spans. They are involved in the synthesis of cell surface polysaccharides. The GtrA family are a subset of this family. GtrA is predicted to be an integral membrane protein with 4 transmembrane spans. It is involved is in O antigen modification by Shigella flexneri bacteriophage X (SfX), but does not determine the specificity of glucosylation. Its function remains unknown, but it may play a role in translocation of undecaprenyl phosphate linked glucose (UndP-Glc) across the cytoplasmic membrane [1]. Another member of this family is a DTDP-glucose-4-keto-6-deoxy-D-glucose reductase, which catalyses the conversion of dTDP-4-keto-6-deoxy-D-glucose to dTDP-D-fucose, which is involved in the biosynthesis of the serotype-specific polysaccharide antigen of Actinobacillus actinomycetemcomitans Y4 (serotype b)[2]. This family also includes the teichoic acid glycosylation protein, GtcA, which is a serotype-specific protein in some Listeria innocua and monocytogenes strains. Its exact function is not known, but it is essential for decoration of cell wall teichoic acids with glucose and galactose [3]. 21.30 21.30 21.40 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.75 0.71 -4.14 175 4541 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 2742 0 1077 3355 691 115.20 18 65.05 CHANGED +F.sllGshushls....hsl...hhlLh...h...............hthshhlAssluahsuhlhsahhNchaoFcspppt..........htphhpFhhs.shhuhhlshsh...hhlhh.phhth..........h..................................upll...uhhlsh.l...hsalhs+halF .............................ahhhGs.hss.lls...hsl....hhlhh...h...........................hth.p.h.h....lA..s....hl.uhl...luhlhsahhNc...tasF.cspspt.................hpph..h..p..Fhhh..thh....sh.hl.s....hh.l........hh.hhh...ph...h.sh..............hh......................................................................uhlh....u.hlsh.h....hsalhp+hhlF....................................................... 1 357 710 932 +2847 PF00211 Guanylate_cyc guanylate_cyc; Adenylate and Guanylate cyclase catalytic domain Finn RD anon Prosite Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.96 0.71 -4.93 21 9831 2012-10-01 23:51:22 2003-04-07 12:59:11 15 564 1435 89 5200 9161 3221 176.30 23 25.87 CHANGED lhsppacsVolhFuDIsGFTshsspps....shpllphLN-laspFDcLhsppt...lhKlKTIGDsYMssuGls....................cst.tHstphschALshhcth.pshshpp................................pslplRlGlHsGsVluGVlG..h+pscYslaGsTVNlASRMESsGhsscIpl..oppshphLp.......taths.c.uchpl+........G....+............uphpTaalhu ...............................................................................................................................................................h......ppl..sl..h.F.u.............D.l..h.u.....F...T.p..h.sp..phs........................st...p.ll..p.h.L...sc.h.h.......s........t.h.s...p..l.l.p.php............................shh....l......K..h...l................G..D......u......h..h..ss..h..Ghs....................................................................p...p..s...t..p...ss...p.........h..u...l....s....h....h..ct...h....p...p...h.p...hpp.....................................................................................................................s..l....p..l....R....l....G..l..p......s............G.........................l.l...s.....G..s.l......G................p..........p...........p.....a..s....l.h.Gs.s.V......N..........hAu.....R.h....-..u..h.....u..........t...............s.......s................p......l..h..l.......o...p....p.s.h.p.h.lp..................th..t....h..p...h....s....h...l+...................G....................t..........t..................................................................... 0 1996 2930 4185 +2848 PF00625 Guanylate_kin Guanylate kinase Bateman A anon Bateman A Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.98 0.71 -4.84 12 8050 2012-10-05 12:31:08 2003-04-07 12:59:11 16 166 4712 73 2497 6019 2540 171.00 31 44.42 CHANGED pt+slllhGPSGsG...+splhpsLlsphscpF.u.hsVs+TTRs.R.sEhpGccYaFl.o+-phppsIppstFlEaup.asushYGTphpslcplhcpu+p...slLDl-.hQusppl+ps.phsPhhlFItsPShpslpch.cthsppstcpltcchsthcpphpph.....FDhllsNssl-cshpcLcchlpst ........................................h...h.lll....s...GPS.Gs.G..............Ksolh.ptL..hp.p.....s.......p........h....t...hSl.S........t................T.TRt........R....u...Eh....c..G......h...c..Y.....a.Fl..oc.-p.F.cp..hl......p......p.s.p..h....l..E.......a...........u....p....h.......h..............u....N...............h........Y..G..T...s.h.p...tl..c..p.h.h.p...p...G..ps...................llL-.l-...hp.........G....s...p.p..l.....+.....p..............p......h...................p........s......h......h....lFl..t..P............P......S.......h............c..............p...........L........c...............p..........R..............l.............t............t............R...............u.............p......-...........s...t....c.....h....l........t...p...........R........h......t......p..A...p.....p..E..h.....p...t..h.....p.........aD..h.l.l....l.N..-..s...l.c....p.A....hpclppllt.......................................................................................................................... 1 729 1238 1842 +2849 PF02058 Guanylin Guanylin precursor Mian N, Bateman A anon IPR000879 Family \N 25.00 25.00 68.30 68.00 18.60 18.60 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.42 0.72 -3.91 15 87 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 40 5 38 70 0 89.50 45 80.48 CHANGED puVpVp-ssFoFsLESVKKLK-Lp-......spoPRLtpps..........ssslC.spPsLPp-LpPlCpppsAusIlpRLcsIus..DsCEICs.sACTGC .........uVhlQ.tsFphpLESVKKLc-Lp-t.....h.sPRltsps...........hPslC.spPsLPp-LpPlCppppAupIhppLcsIAp..csCElCs.sACTGC............ 0 2 2 7 +2850 PF05120 GvpG Gas vesicle protein G Yeats C anon Yeats C Domain These proteins are involved in the formation of gas vesicles ([1]). 27.30 27.30 27.50 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.57 0.72 -4.17 11 122 2009-09-12 20:43:27 2003-04-07 12:59:11 7 2 104 0 50 138 7 76.20 33 85.04 CHANGED LLplsh.uPlpGllWIu-plpEcA.-pphpD.psLpppLssLphpL-hG-IsEEpF-pcE-ELL.+Lcthtp...htttstttss .......lhhlsh.uPlcGlhWlu-plp-pA.-cEhh.Dsssl+pcLtpLptth-tGEIoEEEa-ppE-cLLpRLpthtt............st............. 0 19 40 49 +2851 PF05121 GvpK Gas vesicle protein K Yeats C anon Yeats C Domain These proteins are involved in the formation of gas vesicles ([1]). 25.00 25.00 36.70 31.80 21.30 19.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.75 0.72 -4.16 19 150 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 127 0 62 157 6 87.20 46 75.27 CHANGED pht......l-s.sscslptGLssLVLTVVELLRQLMEpQAlRRMEpGsLS-splERlGpsL.tLE-pltcLpppaslssp..DLNlDLG.lGsLLsp .....s.plsh-s-slcpsLspLVLTlVELLRQLMERQAlRRh-sGsLo--QlERlGp.oLMtL-cchp-Lp-paGlpsc..DLNlDLGPLGsLL.................. 0 22 49 60 +2852 PF02213 GYF GYF domain SMART anon Alignment kindly provided by SMART Domain The GYF domain is named because of the presence of Gly-Tyr-Phe residues. The GYF domain is a proline-binding domain in CD2-binding protein Swiss:O95400. 20.50 20.50 20.60 20.50 20.40 20.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.04 0.72 -4.38 79 823 2009-01-15 18:05:59 2003-04-07 12:59:11 11 22 290 12 564 801 13 55.40 30 6.25 CHANGED hhWhYh....DspuplQGPFsutpMppWhppGYFsssl.l++h........ttsp..pshh.....hpphhtp ...............hWhY+.........DspG.plQ........G.....PFsstpMppWhpsG.YF..s.......s.Lhl++s........pptph.pshh....ph...t................................ 0 187 316 466 +2853 PF02895 H-kinase_dim Signal transducing histidine kinase, homodimeric domain Griffiths-Jones SR anon Structural domain Domain This helical bundle domain is the homodimer interface of the signal transducing histidine kinase family. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.16 0.72 -3.64 148 3074 2009-01-15 18:05:59 2003-04-07 12:59:11 9 46 2055 3 891 2655 208 69.70 34 8.88 CHANGED spol.RVcsp+LDpLhNLVGELVIscspLtp..hspphp..................................................................pp..........lppshppls....+...........l.sp-LQ-slhph ....................pol.RVsl-+lDpLhNLVGELVIsputLsp..hup.p.hs........................................................................................................................ppc......................Ltp.shspLp...........c.................................................lsp-LQ-uVMph........................................................................................................................ 0 298 603 761 +2854 PF03030 H_PPase Inorganic H+ pyrophosphatase Griffiths-Jones SR anon Pfam-B_1050 (release 6.4) Family The H+ pyrophosphatase is an transmembrane proton pump involved in establishing the H+ electrochemical potential difference between the vacuole lumen and the cell cytosol. Vacuolar-type H(+)-translocating inorganic pyrophosphatases have long been considered to be restricted to plants and to a few species of photo-trophic bacteria. However, in recent investigations, these pyrophosphatases have been found in organisms as disparate as thermophilic Archaea and parasitic protists [1]. 22.20 22.20 22.40 22.40 22.10 22.10 hmmbuild -o /dev/null HMM SEED 682 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.86 0.70 -6.27 139 1540 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 1217 2 611 1460 5022 626.60 43 93.38 CHANGED ulluLlauhhhhttlhptssGs......................................................................................................................................pcMp-IussIp-GAtAaLp+QY+slulhslll..slll....hhh............................................................uhhsuluFllGAlhSuhAGalGMplus+ANlRTApAA................pptu.............hspALplAF+uGuVhGhhVlGLuLLGluhhahlh..t.......................tthhpslsGauhGAShlAlFARlGGGIaTKAADVGADLVGKVEAGIPEDDPRNPAVIADNVGDNVGDsAGMuADLF.ESYssollAshlL...ushhhsst...................hlhaPLl.luulGIlsSllGsahV+sppss....................................................................................................shhpALppGhhloullshlshhhlsthhh........................................t.st...........h.....tlahsslhGLlsuhlIshlTEYYTussapPV+cIAcu.SpTGsATNIIsGLulGhcSTshPlllIssuIhsuahlu........................GLYGlAlAAsGMLossGhslAlDAYGPloDNAGGIAEMu.tLspcVRphTDtL....................DAVGNTTtAlsKGaAIGSAuLsALsLFuuYsppl............................................................................................................sh..hsl..sltsPhVllGLllGuhlPaLFuuhsMpAVG+AAtphVpEVRRQF+-hPGIh..........................-Gp..s+PDYs+sV-IsTpuAl+EMlhPulLsllsPlllGhl..lG............s.............puLuGhLhGslloGlhhAlhMuNuGGAWDNAKKYIE.........sG..ph.G.G....KGS-A.....HKAAVlGDTVGDPaKDTuGPulN.LIKlhslluLlhs ..................................................................................................................llul.hauhh.hht.lht...s......Gs..................................................................................................................................tcMp-IuttIp-GA.uaLtppYphlslhhlllhlll....hhh.h......................................................shhssl.uFllGAhhSshuGalGMplushANsRTAtAA..................................p..u.............hstuhplAF+uGulhGhhlsuluLLslshhhhlh.......................................t..h.hpslhGauhG.uS.lAlFuRlGGGIaTKuADVGADLVGKV........E.tGIPE..DDP......RNP..AsIADNVGDN..VG....DsAGMuADLF.EoY..s..sohl...........Ashsl...ush..hhh..st..s.........................hhhaPlllsu.h.ulls.Sl.l.Ghh.h.lp.s.t.t.s.s....................................................................................................sh.tuLppuhhlo...ullshlshhhhshhhh.....................................sth.............tlhhsslhGlls.uhlIshlTE.....YYTusshp.............PVpplApu.u.p.T.GsuTslI.tGLulGhpSshhPslhIssuIhsu.ahhu................................GlaG.lAlAA....hGMLuhsuhhlAlDAYGPlsDNAGGIAEM.u......t..l.s.......p.........c...........V.RchTD.tL....................DAlGNT..TtAlsKGaAIGSAuLsAL.sLFuuYhppl..........................................................................................................................................th.....hsl.........slhsPhVllGLh.lGu.hlPalFuuhsMp.AVG+AAhphVpEVRRQF+-hPGIh..........................c..G...p...s..+PD.Ys.+sVcIsTcuAl.+EMlh..Pu.lL.........sllsP...lll.Gh.h..hG.................................stuluGhLhGsllo.G.........lh.hAI.h..uNuGGAWDNAKKhlE..........t.G......th..s...u............................KGS-sH.KAuVsGDTVGDPaKDTuGPulN.LIKlhslluLlh..................................................................................................................................................................................... 0 268 465 553 +2855 PF01725 Ham1p_like Ham1 family Bashton M, Bateman A anon Pfam-B_2030 (release 4.1) Domain This family consists of the HAM1 protein Swiss:P47119 and hypothetical archaeal bacterial and C. elegans proteins. HAM1 controls 6-N-hydroxylaminopurine (HAP) sensitivity and mutagenesis in S. cerevisiae Swiss:P47119 [1]. The HAM1 protein protects the cell from HAP, either on the level of deoxynucleoside triphosphate or the DNA level by a yet unidentified set of reactions [1]. 19.60 19.60 19.90 19.70 19.40 19.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.10 0.71 -4.93 110 5049 2009-01-15 18:05:59 2003-04-07 12:59:11 11 23 4640 36 1328 3692 2890 187.30 39 82.95 CHANGED lllATsNptKlcEhpplLsshs....hth............tclt....th.h-l.Es.usoat-NAhhKActhsphh............sts..................sluDDSGLpl-ALsG...hPGlYSARa................u.......................cpt..s.pt....LLch..lpsh.....cs........RsApFhssluhsp..sssp...........hhh................hcGpsc..GpIspps+..Gps.GFGY............DPlFhs.p...sh....spTaAEh...s.cpKN........plSHRu+Alpp.....lhph...L .................................lllATsNtGKlcE..h..pplLsshs.............hcl...hs.............s-hs.......s....-s.tET.........GtTF..tENAhlKActsu.c.h.s............Ghs...................slADDSGLpVD..AL..s.G.....tP..........G.lYSARa.......................................u.................Gpts..........sDpt...N.p+.........LLpp..Lpsh................cc................R.pApF.hsslshsp......ss.sp.............shl..............................................sc.G.ph...p..Gp.......Ih...p...p...s.+...........Gps...GF.........GY............D...Pl...Fhs..s..th..............scThAEL...st-.E..K.s...............tlSHRupAlctLhp..h........................................................................ 0 453 851 1131 +2856 PF04388 Hamartin Hamartin protein Bateman A anon Wood V Family This family includes the hamartin protein which is thought to function as a tumour suppressor. The hamartin protein interacts with the tuberin protein Pfam:PF03542. Tuberous sclerosis complex (TSC) is an autosomal dominant disorder and is characterised by the presence of hamartomas in many organs, such as brain, skin, heart, lung, and kidney. It is caused by mutation either TSC1 or TSC2 tumour suppressor gene. TSC1 encodes a protein, hamartin, containing two coiled-coil regions, which have been shown to mediate binding to tuberin. The TSC2 gene codes for tuberin Pfam:PF03542. These two proteins function within the same pathway(s) regulating cell cycle, cell growth, adhesion, and vesicular trafficking [1]. 25.90 25.90 26.00 26.00 22.00 25.80 hmmbuild -o /dev/null HMM SEED 668 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.39 0.70 -5.85 6 205 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 137 0 142 206 0 536.20 25 62.51 CHANGED pppssls-LlshL-SssLcpl--V+shlpEpLSs-+GshLVNsLVDYaLpTsSppslclLoolpEPHDKaLLDKhNEshs+sssRLsoLoLLGHVVR+QPsWlHKIu+hPLLsSLLKCLKpDoDVVVLhoulLVLITLLPMIPQosKQaLhDaFDIFGRLu...............SWsl..............+sPG+ss-VaLl.......HL+AuVYS.LFHRLYGMYPCNFlSYLR..paYSMKENh-T....FpEVVK...............................PMLp+VRlHPpLVTGTpDaEL.DPoRWKchEsHDlVhECA+lS...............LDsp..EuSsE-Gh.ohsc...........................SsusPhsusp..h.s.stShP..oshphoh+SspTph.spsphD.st..ssc-slWSPSslCGhuTPPSSh.shsP...hplshpsuh.suchhussGuGpspsus.STPuTos.....P...Pshuss..hhph..ssspsps.ppcc.psp.uhshhsRppp.........................lpsl-+psc.................tpthcsulspElhpl...sptp.c..ssh.Ghc.sh.h..cpLhssptpctp...............LsSo.D+....................Gsssspuu.shp..W.....FTPh-ssh+cp..s......c.pt...s..s.usCptsst..........s.PY-th...F-lALPK...susLFlp+KTtEslc+sttpc.p.......phcccslssoS....PhEVLD.clIppGp-AHsc..cR.hs.so.o.c.spht...P...-ELcslRuQLLL....LHsQLLYERaKRpQHAlR..........NRRL .............................................................................................................................t....................................................................................ppP..p.hhsplsp.hht.p...s...t...pRl.sLpLL.s.phlphpPsah.apl...ptsLh.slL+sL........phDssssslhsullsLlhhLP....hl....P.p....uh..tp....aL...chFslauRLs.........................sWph....................................................................................................................pps.s..ph..sc.hhl..............................HL....ph...u..l.h........t...LFp...hLYGhYPsNFlsaLR........pha....p.h.c..p.....s..h.ps...........hpcs.lc.........................................PhhpphplHPp.l.l...T..s...o..h......-.pEl..cspRW.......pchE..s.......cDlVhECtp.l.s.............................................Lssh.....pss..ppt......t.......................................................................................................................................................s.s.P.h..ts...................p..s...........t.h..p......t.t..................................................................t....h..os...p...............th...ps..s.........s......s.........................s....p........s............p......h...s..t.s..s........p........shsths...................................t............................t..................th..h...tttp..............................................................................................h.......p..................................tt....s.p..tp..th........h............................................t......................................................................................................................................................................................tt........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 47 66 110 +2857 PF01567 Hanta_G1 Hantavirus glycoprotein G1 Bashton M, Bateman A anon Pfam-B_399 (release 4.0) Family The medium (M) genome segment of hantaviruses (family Bunyaviridae) encodes the two virion glycoproteins. G1 and G2, as a precursor protein in the complementary sense RNA. 25.00 25.00 46.60 46.00 17.00 16.40 hmmbuild --amino -o /dev/null HMM SEED 525 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.86 0.70 -6.25 15 433 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 136 1 0 365 0 303.60 52 49.41 CHANGED +slYEhKlECPHTVsLGQGhVsGpVELs.lPLsclssLhlESSCNFDl..HsossshQpaTpVsWcKKushs-ossAupsoFEspSpEVsLKGTCslsschhEssaKs...RKTVlCYDLSCNQTaCpPTVaLIuPlpTChoh+SChluLuspRIQVlYEKTYCVoGQLlEGpCFsPs+TlshoQPsHTYDlsTlPlhCFhlsKK...GsshKIhsplEp.lhsKssCT-NslQGYYlChlGupSEPlaVPuh-DaRSuElhS+MlhsP+GEDHDhstsupushRIuGPlpuKVPpTpoo-ThpGlA.FuGlPhYSSLSsLlppsDPcYlFSPGllP-tNHSsCDKKTlPLTWoGalslsGphE+lTsCTVFCTLuGPGASCEAYSEsGIFNISSPTCLVNKsQRFRGoEQplNFVCQRVD.DlVVYCNGQKKVILTKTLVIGQCIYTFTSLFSLlPGVAHSLAVELCVPGlHGWATsALLlTFCFGWlLIPulTLllLKlL+llsa.CS+YosESKFKhILEKVKhEYQKTMGS .....+sl.-h+hpCPHols..hGps.l.G.sEls.h.lttstphh.ESSCshD...Hpohss.pphTpl.WctKu...pptpuspsoFps.ptplsh+GhChl..phh-psh+.s...++olhCYDLsCNpThCpPTlahIsPl.sC...c.Ch...........................................................................................................................................................................................................................................................................................sl.haCNG.+KhIhT+TLlIGQCIYohTSlFSlhPuVAHSlAlELCVPGhHGWAThhLlhTFCFGWlLIPshThhlLhhLhhhs..hpp.s.-pph+.llp.............. 0 0 0 0 +2858 PF01561 Hanta_G2 Hantavirus glycoprotein G2 Bateman A anon Pfam-B_401 (release 4.0) Family The medium (M) genome segment of hantaviruses (family Bunyaviridae) encodes the two virion glycoproteins. G1 and G2, as a precursor protein in the complementary sense RNA. 19.30 19.30 20.30 19.30 17.70 19.20 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.75 0.70 -6.09 6 1286 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 278 0 1 760 0 186.40 42 58.72 CHANGED shsLpPsWoDsAHGVGllPM+TDLELDFSLPSSSpYTYRRcLpNPsNEppplsFHlpI-cQsItA-IQpLGHWhDupaNLKTuFHCYGACpKYtYPWpTAKCahEKDYEYETuWGCNPsDCPGVGTGCTACGlYLDKLKSVGpsaKIISl+YoR+VClQLGsEppCKpIDsNDCLlTsuVKVClIGTlSKFpsGDTLLFLGPLEtGGlIFKpWCToTCpFGDPGDIMSTpsG.hpCPEasGuFRKKCsFATTPlCEYDGNTlSGYpRhlAT+DSFQSFNsT-PHlTss+LEWhDPDuoLRDHINllls+DlsFpsLuENPC+VsLpsuSIDGAWGSGVGFoLsCpVSLTECsoFLTSIKACDpAMCYGAoossLsRGQNTV+IsGKGGHSGSpFpCCHsp-CSppGLpAuAPHLDRVsGhsplDs-KVaDDGAPECGlpCWFpKSGEWlhGILsGNWlVlsVLlVlLILSIhLhSlhCPsR.p+KKu ........................................................................h.............................ChGtC.c.th...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 1 1 1 +2859 PF00846 Hanta_nucleocap Hantavirus nucleocapsid protein Bateman A anon Pfam-B_310 (release 3.0) Family \N 21.40 21.40 21.60 54.70 21.00 21.30 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.39 0.70 -5.78 4 1583 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 297 6 0 1051 1 233.20 54 99.96 CHANGED MSsLcElQ-pITtHEQQLVhARQKLKDAEKsVEVDPDDVNKSTLQSRRuAVSuLEsKLuELKRQLADhlusQKhsuKPVDPTGlEPDDHLKE+SuLpYGNVLDVNuIDlEEPSGQTADWhuIGsYIluFslPIlLKALYMLSTRGRQTVKENKGTRIRFKDDSSaE-VNGIRKPKHLYVShPTAQSTMKA-EITPGRFRTIsCGLFPAQIKARNIISPVMGVIGFuaFVKDWs-+I-sFLspcCPFL.....Pcstusutt.hhuT.RAYhhsRQstlspocl.DIssLhptApAuusTLhsDIpoPHSsWVFACAPDRCPPTsLYlAGlsELGAFFuILQDMRNTIMASKoVGTAEEKLKKKSAFYQSYLRRTQSMGIQLDQRIIlhYMlpWGKEsVNHFHLGDDMDPELRQLAQuLIDsKVKEISNQEPLKL ..............................................................................................................................................Ih.Ylhshs.sIlLKuLYhLoTRGRQT.K-NKGhRIRhKDDo....lNGIR+PKHLalShPsAQSoMKA-ElTPGRFRThlCGLaPsQIpsRNhhSPVMuVlGF.hhsKDWspRIccahtt.C.Fh..........sts..s.h..t....p.p.h..hN+sYhhpRQtsLst.cst-hptlhpaAtsusso..-pIcSPpu.WlFusAPDRCPPTs.......................................................................................................................................... 0 0 0 0 +2860 PF03866 HAP Hydrophobic abundant protein (HAP) Finn RD anon DOMO:DM07503; Family Expression of HAP is thought to be developmentally regulated and possibly involved in spherule cell wall formation [1]. 22.30 22.30 24.60 24.00 22.20 22.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.16 0.71 -4.45 2 5 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 3 0 0 5 0 146.80 39 58.77 CHANGED MMKYlFlALChhAlV.LAoEs-.tcusp+..tLsVtG...LlsClVAllApIVuGLLRVIIGLVlTLSGVLpIVlGsVlhLVsslAuLALDlVtuoVsGIhsshLshshlhphlEEsL+s..uQ.LLsGLVpAlhALPLulLVALssLT-ulApuuCshGhStsGhh ...........................MMKYlFlALChhAlV.LAoEs-.tcusp+..tLsVtG...LlcCIVAllAsIVS.GLLRVVlGLVI.TLS.GVLQIVlGlVlpLVDllAuLALDlVsuTVTGIL.NcLLcFDhlhthlEEsL+s..uQ.LLsGLVpAlhALPLulLVALssLT-ulApuuCshGhStsGhh.................... 0 0 0 0 +2861 PF01543 HCV_capsid Hepatitis C virus capsid protein Bateman A anon Swiss-Prot Family \N 21.60 21.60 21.70 21.70 21.20 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.72 0.71 -4.12 5 9598 2009-01-15 18:05:59 2003-04-07 12:59:11 12 23 146 9 0 6731 0 106.30 87 18.88 CHANGED STNPKPQRKTKRNTNRRPQDVKFPGGGQIVGGVYLLPRRGPRLGVRATRKTSERSQPRGRRQPIPKARsPEGRoWLSPGTLGPShuhRAlsGpDG.C.....GhAGh..sPtsuRs.Ws.ssPhhR ...............................................................STsPKPQRK.T.K..RN..TN..RRPQDVKFPG..GG..QI..V..G..G...V.YLLPR....RGPR...LG.V..RATR..K.......T...SER.........S..QPR..G..R..R.....QPI.....PKA....RRPEGRoWAQPGYPWP......LYGNEG.h......GWAGWLLSPRGSRPSWGPoDPRRR................ 0 0 0 0 +2862 PF01542 HCV_core Hepatitis C virus core protein Bateman A anon Swiss-Prot Family The viral core protein forms the internal viral coat that encapsidates the genomic RNA and is enveloped in a host cell-derived lipid membrane. The core protein has been shown, by yeast two-hybrid assay to interact with cellular DEAD box helicases [1]. The N terminus of the core protein is involved in transcriptional repression [2]. 21.60 21.60 21.80 21.80 21.10 21.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.38 0.72 -3.59 6 7406 2009-01-15 18:05:59 2003-04-07 12:59:11 13 24 134 0 0 5245 0 59.90 89 8.38 CHANGED SRNLG+VIDTLTCGFADLMGYIPLVGuPVGG.VARALAHGVRlLEDGINYATGNLPGCSFSIFlLALLSCLTsPAS ...SRNLGKVIDTLTCGFADLMGYIPLVGAPLGG.sARALAHGVRsLEDGVNYATGNL.P.GCSFSIFLLALLSCLTlPAS.............. 0 0 0 0 +2863 PF01539 HCV_env Hepatitis C virus envelope glycoprotein E1 Bateman A anon Swiss-Prot Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.44 0.71 -4.83 4 42235 2009-01-15 18:05:59 2003-04-07 12:59:11 12 27 140 1 0 24488 0 72.70 67 29.80 CHANGED EVRNlSuhYaVTNDCoNsSIsaphsDAlLHsPGCVPC.+-uNsSRCWlsVTPsVAs+psuh.TpsLRpHlDhlVhuATLCSALYVGDLCGuVhLluQhFhhSPp+H.hsQDCNCSIYPGHITGHRMAWDMMMNWSPTsshllA.hhRlPpsllDhIuGAHWGVlhGLAYFSM.GsWAKVlllLLLhAGVD .....................................................................................................................................................................................................................PssuhVluplLRlPQs..lhD.hl.AGAHW...G..VLAGlAYaSMVGNWAKVLlVhLLFAGVD............................................................. 0 0 0 0 +2864 PF01560 HCV_NS1 Hepatitis C virus non-structural protein E2/NS1 Bateman A anon Swiss-Prot Family The hypervariable region of the E2/NS1 region of hepatitis C virus varies greatly between viral isolates. E2 is thought to encode a structurally unconstrained envelope protein [2]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.19 0.70 -5.99 10 44241 2009-01-15 18:05:59 2003-04-07 12:59:11 12 24 122 3 0 25163 0 104.70 61 44.06 CHANGED +VTGGssA+TTpulsShFosGu+QcIQLINTNGSWHINRTALNCNDSLpTGFLAuLFYsH+FNSSGCPERhASCRsIDcFcQGWGPIoYss..ssspDpRPYCWHYPPRPCGIVPApoVCGPVYCFTPSPVVVGTTDRpGAPTYoWGENETDVFLLNNTRPPpGNWFGCTWMNSTGFTKTCGAPPCsIGGsGNsT..LlCPTDCFRKHP-ATYoKCGSGPWLTPRChVDYPYRLWHYPCTVNFTIFKVRMYVGGVEHRLsAACNWTRGERCDLEDRDRSELSPLLLSTTEWQlLPCSFTsLPALSTGLIHLHQNIVDVQYLYGlGSAlsSasIKWEYVlLLFLLLADA .........................................sTGGusA+sstG.l....s.u....L....F....o....s....GspQsIQLINTNGSWHINRT..ALNCNsSLpT.Ga..lAuL.hYh.p...+...F.NuSGCPER.hASC+slspFsQGW.GPIsasp.....st.s.......-pRP.................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +2865 PF01538 HCV_NS2 Hepatitis C virus non-structural protein NS2 Bateman A anon Swiss-Prot Family The viral genome is translated into a single polyprotein of about 3000 amino acids. Generation of the mature non-structural proteins relies on the activity of viral proteases. Cleavage at the NS2/NS3 junction is accomplished by a metal-dependent autoprotease encoded within NS2 and the N-terminus of NS3 [1,2]. 25.00 25.00 25.10 25.00 24.80 23.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.19 0.71 -4.87 29 2278 2009-09-12 04:23:25 2003-04-07 12:59:11 13 38 114 14 0 2355 0 183.60 72 9.16 CHANGED DsppuuslGssVlshlslhTLoPsYKthLs+hlWWhQYhlsRsEAhlplWlPPLpsRGGRDu.lIlLsslha...PplsF-lTKhLLAlLGPL....YlLpsuLl+VPYFVRApsLLRsChLlRplAGGKYsQhsLLclGthTGTYlYDHLuPloDWAusGLRDLAVAVEPVlFSPMEpKlITWGADTAACGDIlpGLPVSAR ........................................DpEhAASCGGsVLVGLhhLTLSPaYKtaluhhlWWLQYFlTRsEApLpVWVPPLNVRGGRDA.lILLhCslH...PsLlFDITKLLLAlhGPL....alLQ...AuLh+VPYFVRsQGLlR.hChLsRKhAGGHYVQMAllKLGALTGTYVYsHL.TP.L+DWAHsGLRDLAVAVEPVVFSpMETKlITWGADTAACGDIIsGLPVSAR..... 0 0 0 0 +2866 PF02907 Peptidase_S29 HCV_NS3; Hepatitis C virus NS3 protease Griffiths-Jones SR, Knutson S anon Structural domain Domain Hepatitis C virus NS3 protein is a serine protease which has a trypsin-like fold. The non-structural (NS) protein NS3 is one of the NS proteins involved in replication of the HCV genome. NS2-3 proteinase, a zinc-dependent enzyme, performs a single proteolytic cut to release the N-terminus of NS3. The action of NS3 proteinase (NS3P), which resides in the N-terminal one-third of the NS3 protein, then yields all remaining non-structural proteins. The C-terminal two-thirds of the NS3 protein contain a helicase. The functional relationship between the proteinase and helicase domains is unknown. NS3 has a structural zinc-binding site and requires cofactor NS4A. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.80 0.71 -4.68 9 8309 2012-10-02 13:45:52 2003-04-07 12:59:11 10 46 119 110 1 6342 8 144.80 87 19.64 CHANGED pGpl.VLuTuTpoahGTClNGVhaTsaHGAuu+TLAGPpGPlsphasssspDlssaPsPsGApSLpPCoCGusclYLlTRcusll.uR++GD.pssLhuPpPlSshKGSSGuPlLCspGHsVGhFpAAlsotGsspulcFl.PhEslsss ......................................EGEVQlVSTATQoFLATClNGVCWTVYHGAGo+TlAuPKGPlhQMYTNVDQDLVGWPAPs.GARSLT.PC.TCG.S..S...D..LYL..V.......T....R...H..A.....DV..IPVR.RRG....D...S.RG.....SL.....L...S..P.....RPlSYL.....KGSSGGPLLCPuGHAVGIFRA..AVCTR..GV..AKAVDFI.PVEshETT................... 0 0 1 1 +2867 PF01006 HCV_NS4a Hepatitis C virus non-structural protein NS4a Finn RD, Bateman A anon Pfam-B_315 (release 3.0) Family NS4a forms an integral part of the NS3 serine protease, as it is required in a number of cases as a cofactor of cleavage [1,3]. It has also been reported that NS4a interacts with NS4b and NS3 to form a multi-subunit replicase complex [3]. 21.00 21.00 21.10 22.60 20.90 20.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.64 0.72 -4.43 15 2766 2009-01-15 18:05:59 2003-04-07 12:59:11 15 42 118 67 1 2514 0 54.10 82 3.25 CHANGED SsWVLVGGlLAAlAAYChosGSlVIlG+lslsGpP..................sllPD+EshappaDEMEEC .......STWVLVGGVLAALAAYCLoTGsVVIVGRIlLSGKP..................AlIPDREVLYppFDEMEEC............................... 0 1 1 1 +2868 PF01001 HCV_NS4b Hepatitis C virus non-structural protein NS4b Finn RD, Bateman A anon Pfam-B_315 (release 3.0) Family No precise function has been assigned to NS4b. However, it is known that NS4b interacts with NS4a and NS3 to form a large replicase complex to direct the viral RNA replication [1]. 20.30 20.30 20.40 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.12 0.71 -4.91 11 2035 2009-01-15 18:05:59 2003-04-07 12:59:11 14 36 116 2 0 2032 1 181.50 82 8.46 CHANGED hp.pshshlssAstpApshpshlpo.....h.s+hpphWsphhhNhhShIthhsuh.shstNPslushhuFhhulooshssphpLhhslLhGhsuotLusPssuhAhssAhhAGuulso.hLu.lllslLuGatuuVsuA.lsFclhuGchs.s-DhhsLlsslhSPGAuVsGVshuslLhphhs..tGuspWhNRLls ..........................FKQKALGLLQTAo+QAEshsPsVpo.....sWp+LEsFWAKHMWNFISGIQYLAGLSTLPGNPA.IASLMAFTAulTSPLTTppTLLFNILGGWVAAQLAsPu.AAoAFVGAGlAGAAlGSlGLGKVLVDILAGYGAGVAGALVAFKIMSGEhPSTEDLVNLLPAILSPGALVVGVVCAAILRRHVGPGEGAVQWMNRLIA....... 1 0 0 0 +2869 PF01506 HCV_NS5a Hepatitis C virus non-structural 5a protein membrane anchor Paterson M, Bateman A anon Bateman A Family The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. The N-terminal region of the NS5a protein has been used in the construction of the alignment for this family. The C-terminal region has not been included because it is too heterogeneous. 20.50 20.50 20.60 20.70 20.30 20.00 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.48 0.72 -6.96 0.72 -4.20 19 5540 2009-01-15 18:05:59 2003-04-07 12:59:11 14 38 115 5 0 5082 0 22.40 82 2.09 CHANGED usWLhDlWDWVsplLschpshL+ ...GSWLRDlWDWICpVLoDFKTWLp.. 0 0 0 0 +2870 PF04618 HD-ZIP_N HD-ZIP protein N terminus Kerrison ND anon DOMO:DM04570; Domain This family consists of the N termini of plant homeobox-leucine zipper proteins. Its function is unknown. 21.70 21.70 22.10 22.90 20.30 19.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.77 0.71 -3.54 11 75 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 25 0 39 87 0 93.00 39 35.13 CHANGED Mt.ccD......cLGL..SLSLuhstppts............................tpss.phshtPs.................sop.pptsuppthhsspss-tpoFLRGIDVNRhPuss............-h.EEE.sG...VSSPNSTlSS.lSG.KRSppctp ..................................................................................................................huh..uhtLs...........................................t......................................t......t..t.....tspphp.....hsh.lRGIDVNRhPsss.............ch.-E-..sG...VS.SPNSTlSS.hSG..KRuppp.s................................. 0 5 21 27 +2871 PF02329 HDC Histidine carboxylase PI chain Mian N, Bateman A anon Pfam-B_19599 (release 5.2) Domain Histidine carboxylase catalyses the formation of histamine from histidine. Cleavage of the proenzyme PI chain yields two subunits, alpha and beta, which arrange as a hexamer (alpha beta)6. 25.00 25.00 28.30 28.00 15.80 15.10 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.96 0.70 -5.78 4 71 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 62 34 9 58 3 262.60 46 94.92 CHANGED csKlpthchD.....ppAISP.+caCpGYhpPGsh.GNGYVoslKlSsGsVDhoD..hLDu...............IVSYDRsEpNDAYlGQINMhTASSFsGlpGtlWGaDlAhp-sltpcK.hPLYpppQ.-Gss.....lPVYsh+PLL-As.cLFGptc..cRRFsshPGuaVlCANKusTA.cPp..tshK.G...hVWSsIuLulhcDRs+susLFlEDAGla..css.....sc--lhtaLEsph+tsTpSls.CGpDpHlha-chaIGatYshhcPGQlGsALoCAPYVoLApsAlPus..hpPuc....LspholSpW.-chshcsL..p..pK ......................................................s...............+hAluPacpascGYh.PGsh.GpGYVosLKVssGsscKT...DD.lLDu...............IVSYDRAEtpsAYlGQINMhTASSFsGhtGplhGaDlhtpspl..cc.pPLap.pQaDGoc.....L.lYDu+PL.-uhlEhFGTpc..pRRa.shPGAalhCANKulTA.RPpp..spshp.GpuYGVWShIAlShupDtscsushFlEDAGlW.tcss.....sE--lhtaLcs+R+uhshSlspCGcDp.....p.....lhacpoaIGFAashMcPGpIGsAlTsuPYlshshsulPut..hhPs.....hpphohspWL-cMsapsL.......t............... 0 4 7 8 +2872 PF02924 HDPD Bacteriophage lambda head decoration protein D Griffiths-Jones SR anon Structural domain Domain \N 29.00 29.00 29.00 29.00 28.40 28.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.43 0.71 -4.31 74 818 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 475 18 82 442 27 110.10 37 93.51 CHANGED shspshp.hu-hLhhEussph.SR-ssslsuG....sslssGoVLG.....tlsA...o................................................................................................Gc...asshsss.us...DGu.p.sAsulLhtslsAoss.cspsss....lsttu.lspssLsassuhss.sp+ssAhspLssh.uI ..........................................................................p....s.p.thp.hh.usss.s..tTusGshuts.....sssPAhTPLM......LDsu......o......................................................................................................GK...LVsW...........DGp...t....pAVGILs.l.sl-s.T......psslTa...YKSGo..Fsh-slhWP-uss-.pKKtsAFAGoA.....th................................. 0 28 48 67 +2873 PF01517 HDV_ag Hepatitis delta virus delta antigen Bateman A anon Pfam-B_808 (release 4.0) Family The hepatitis delta virus (HDV) encodes a single protein, the hepatitis delta antigen (HDAg). The central region of this protein has been shown to bind RNA [1]. Several interactions are also mediated by a coiled-coil region at the N terminus of the protein [2]. 25.00 25.00 26.00 25.90 21.90 21.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.26 0.71 -4.62 4 1108 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 61 5 0 1072 0 117.60 80 89.58 CHANGED MSQo-sR+sR+G.REEhLEpWlsuRK+tEELE+DLRKspKpIKKLEE-NPWLGNlhGIIRK.cKDGEGAPPAKRsRpDQMEVDSGPtKRP+tuGFTDpERcDHRRRKALENKKKQLSuGGKpLS+EEEEELRRLT-EDEcRERRlAGPRVGsVNPh-GGPRGAPGGGFVPSMpGVPESPFoRTGEGLDIRGoQtFP ...........................................................................................................................KQLuuGGKsLS+EEEEEL+RLTEEDERRERRVAGPpVGGVNPLEGGsRGAPGGGFVPSMQGVPESPFoRTGEGLDlRGsQGFP....................... 0 0 0 0 +2874 PF02985 HEAT HEAT repeat Griffiths-Jones SR anon Reference [2] Repeat The HEAT repeat family is related to armadillo/beta-catenin-like repeats (see Pfam:PF00514). 23.50 17.60 23.50 17.60 23.40 17.50 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.53 0.74 -7.59 0.74 -3.39 590 1878 2012-10-11 20:00:59 2003-04-07 12:59:11 17 196 359 47 1138 5988 309 30.50 27 3.66 CHANGED llshlh..phlp...Ds..s.pVRtsuspslsplsphh ...........hhshlh....phhp....Ds..sspVRpuAspsLsphsp..h..... 0 419 626 897 +2875 PF03130 HEAT_PBS PBS lyase HEAT-like repeat Mifsud W, Bateman A anon Pfam-B_172 (release 6.5) Repeat This family contains a short bi-helical repeat that is related to Pfam:PF02985. Cyanobacteria and red algae harvest light energy using macromolecular complexes known as phycobilisomes (PBS), peripherally attached to the photosynthetic membrane. The major components of PBS are the phycobiliproteins. These heterodimeric proteins are covalently attached to phycobilins: open-chain tetrapyrrole chromophores, which function as the photosynthetic light-harvesting pigments. Phycobiliproteins differ in sequence and in the nature and number of attached phycobilins to each of their subunits. This family includes the lyase enzymes that specifically attach particular phycobilins to apophycobiliprotein subunits. The most comprehensively studied of these is the CpcE/F lyase Swiss:P31967 Swiss:P31968, which attaches phycocyanobilin (PCB) to the alpha subunit of apophycocyanin [1]. Similarly, MpeU/V attaches phycoerythrobilin to phycoerythrin II, while CpeY/Z is thought to be involved in phycoerythrobilin (PEB) attachment to phycoerythrin (PE) I (PEs I and II differ in sequence and in the number of attached molecules of PEB: PE I has five, PE II has six) [2]. All the reactions of the above lyases involve an apoprotein cysteine SH addition to a terminal delta 3,3'-double bond. Such a reaction is not possible in the case of phycoviolobilin (PVB), the phycobilin of alpha-phycoerythrocyanin (alpha-PEC). It is thought that in this case, PCB, not PVB, is first added to apo-alpha-PEC, and is then isomerised to PVB. The addition reaction has been shown to occur in the presence of either of the components of alpha-PEC-PVB lyase PecE or PecF (or both). The isomerisation reaction occurs only when both PecE and PecF components are present, i.e. the PecE/F phycobiliprotein lyase is also a phycobilin isomerase [3]. Another member of this family is the NblB protein Swiss:Q9Z3G5, whose similarity to the phycobiliprotein lyases was previously noted [4]. This constitutively expressed protein is not known to have any lyase activity. It is thought to be involved in the coordination of PBS degradation with environmental nutrient limitation. It has been suggested that the similarity of NblB to the phycobiliprotein lyases is due to the ability to bind tetrapyrrole phycobilins via the common repeated motif [4]. 20.90 13.00 20.90 13.00 20.80 12.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.44 0.74 -7.55 0.74 -2.93 221 340 2012-10-11 20:00:59 2003-04-07 12:59:11 11 54 166 1 174 4509 751 27.50 31 10.24 CHANGED lRttAstuLuplss..............pulshLhpsLps ....hRhtAAhuLGplts..........................pAl.ssLlpsLp........................... 0 65 142 163 +2876 PF00632 HECT HECT-domain (ubiquitin-transferase) Bateman A anon Prosite Domain The name HECT comes from Homologous to the E6-AP Carboxyl Terminus. 20.40 20.40 20.40 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.28 133 4368 2012-10-03 14:42:41 2003-04-07 12:59:11 20 240 341 23 2797 4212 88 293.50 28 21.16 CHANGED phl.....spchh.s.sp.........shF...............................................................................h.tsspt....hhh.s....................................................................a...phlGtlluhulhs.......sh.h.....lslpFs.hha+............................................hlh.....................ttt......ls.hpDlt.p.....................lDsphhps.lptlh..t.p.t......................................h...slsFs.h...t......................................................................................................................................................................................................................................................................pL..h..s..G...psl.VTp..pNtpcYlphh....hca.hlpptlppphps........hpcGFt..pll.s......tphlp.hFsspELphllsG.....ps.p.......lshpsLcptsp..ap.tsas.....tss......................ttlphFa.cl..............................lpp.hsp-pppphLpFlTGss+lPh..sGhp.th..........php.....Ipphss..............................................................................................ss..t....LPoApTCa.shLcLPp..........Ysop-hhcc+LhhAlpps.p..s.Fshs ...................................................................................................................................................................................................................................................................................h..l.pphh...s..s.......h...shF...................................................................................t.p.pt........hh....s..............................................................................pphph................................................F...ca.l..G.p.l.lG.h..........Alh.c.........sp..h................l-.h...FshshaK.................................................................................................................tlL...................................................stt.......hs..h.pDlp..p.......................................................................l.D...s..phh...pu.Lhh...lh......pts..t...................................................................................tl......sLs..Fs..ls.pt.....................s.h.......................................................................................................................................................................................................................................................p..........................h-.L....h.ss..G..................psl....V...T...p.............cN.+.p-.Yl.c.hh.............spa..ph....p....p....t....l..p.p...Q.hpA......................Fh.....c..G.Ft...pl..l.s........................p..h.....l..p.....hF.....s.........pE..L...c..h...l.l.s.G..................s..c.............................lDl..p..c....hcpp........opY..........c...tsas.............ts..p..............................hl....p...h.F.W....cs........................................................l....pp....h....s....p....-.......c....+.....t....ch...L..p..F.lT....GosR.....lPh........tG......Ft..tL..................................phs.......I..pphs.t.........................................................................................................................................................................................ts...p.....LPp.u.p...T...............C..a..N.....h.....LcLP..............Y...p.o.....h......c.....hLcc+LhhAlp.ps.t.u.Ft............................................................................. 0 1023 1454 2163 +2877 PF03451 HELP HELP motif Bateman A, Suprenant KA anon [1] Family The founding member of the EMAP protein family is the 75 kDa Echinoderm Microtubule-Associated Protein, so-named for its abundance in sea urchin, sand dollar and starfish eggs. The Hydrophobic EMAP-Like Protein (HELP) motif was identified initially in the human EMAP-Like Protein 2 (EML2) and subsequently in the entire EMAP Protein family. The HELP motif is approximately 60-70 amino acids in length and is conserved amongst metazoans. Although the HELP motif is hydrophobic, there is no evidence that EMAP-Like Proteins are membrane-associated. All members of the EMAP-Like Protein family, identified to-date, are constructed with an amino terminal HELP motif followed by a WD domain [1]. In C. elegans, EMAP-Like Protein-1 (ELP-1) is required for touch sensation indicating that ELP-1 may play a role in mechanosensation [2]. The localization of ELP-1 to microtubules and adhesion sites implies that ELP-1 may transmit forces between the body surface and the touch receptor neurons. 21.90 21.90 22.20 22.10 21.80 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.51 0.72 -4.46 7 705 2009-01-15 18:05:59 2003-04-07 12:59:11 9 111 107 0 408 642 23 69.00 42 8.56 CHANGED cpG.l+halRG+slshhhPsc.h.shc.sscp-hPsp+LcL-WVYGYRG+DsRsNlalLPTGEllYFhAuVsVLash- ............................................................Ppt........p..tt.c.p.chP.....s..p.cL+L-.a.......V.....YG...YRG.+DC.R.s.NL..a.hL.s.............s......G.......E........l.VYalAuVsVlYNh.......................... 0 124 162 260 +2878 PF03996 Hema_esterase Hemagglutinin esterase Finn RD, Marshall M anon Pfam-B_505 (release 5.5) Domain \N 25.00 25.00 50.60 50.60 20.70 19.60 hmmbuild -o /dev/null --hand HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.92 0.70 -5.16 6 382 2012-10-02 11:02:24 2003-04-07 12:59:11 10 4 208 19 0 295 0 323.80 45 74.88 CHANGED hspplLlssShuhshGF...............scPhsssSHLN.sDWhhFGDSRSDC...sNsu.Pp.shhshpss.phSuK.Suphh.SlFts.HhoDah..YpGpGpphVFYEGVNFSPatuacChspG.sphWhs.KspFYsplYchhup.RslSlVNl.hshss.shssulCKssspshspsshlhpppshssphhpsctshshsshs.............tsCphhhsshplaNspthusph.shhspastslsshs.hssssTtspssuhDhsCcYLtLpPGsYpuhSsthhLshPoKuhChcphc.hsPVQsVpShWspsRpSDshsu.ACp.sPYChFhNpossYsG..stDsHHGDtchRplLSGLhYsusCISQQGshshsssSoph.hhP..YG+CPpAAclts ....................................................................................................................................s.PpsssShLN.usWhhFGDSRoDp...sNss..p.u.hshpsA.phpuh.uuphh.ShFts.thsDah..YpGpGpphlFYEGVN..aoPatAhcChpps....Whp.KhtF.pplYc.hu..+shohVNs..hh.s.t.stuhstsss.sh.psshlhpptshs...htsctshh..s.................s..hh.shphastchhsshh.h..sp.hhshsshs.hs.s.Tts.sss.shpCchl.L.sGpY.shSs.hhLhhPp+uhChshpc.hsPVpsVpShWspsRpSD.hss.AC..sPhChhhppppsYlG...hD.pHGDtthpplLSGL.Ypu.ChSQpGhhp.sss.o...LhP..aGRCPhAAc.ps............................................................. 0 0 0 0 +2879 PF02710 Hema_HEFG Hemagglutinin domain of haemagglutinin-esterase-fusion glycoprotein Finn RD, Bashton M, Bateman A anon Pfam-B_505 (release 5.5) Family \N 25.00 25.00 36.90 36.90 19.30 18.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.17 0.71 -3.92 12 364 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 203 19 0 284 0 131.10 47 29.16 CHANGED pFYppLYphhuh.Rslphlsl...p.shshost.psshCp....ssphh.sNPpalshpsshss........spFsLsu......CscaLVPhChh...suthhssst.....hhps.thaYs.Do.hhYGasssss......stthDhsCcYL.lcPGsYpAhossh ..F.pplYchhu..+slohVNs...shshpsost.ssu.Cp......s.hLpNPAhhspEsp.s-.hht.E..A.FsLss......Cc.alVs.ChF...stcshsppt.....hhs..QhhYspsstVlhGLsspho......sss.shpCchl.LpPGpYpshSs..t.. 0 0 0 0 +2880 PF00509 Hemagglutinin Haemagglutinin Finn RD anon Pfam-B_26 (release 1.0) Family Hemagglutinin from influenza virus causes membrane fusion of the viral membrane with the host membrane. Fusion occurs after the host cell internalises the virus by endocytosis. The drop of pH causes release of a hydrophobic fusion peptide and a large conformational change leading to membrane fusion. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 550 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.87 0.70 -6.37 15 50634 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 45867 505 5 30865 0 432.00 51 96.33 CHANGED IClGYHusNSocpVcTlhEpsVsVTpupELlEos+ssclCshp.....stpPlcLtcColtGhlLGNPpCDhhLs.psWSYIlERssu.suhCYPGsl.shEELRplluSstpap+lchhscs.sWssshsss....GsosACppsuss.uFaRslsWLsKp.....psssYPshssoYsNTcsp-hLhlWGIHHPsossEQssLYpsussh.loVuTpohspsasPpIGsRPtVs..G...QuGRhcaYWoll+PG-olsFpoNGNLIAPcauaplsspu.u.sIhposlslssCsocCQTstGuINospPFQN.lsphsIGcCPKYVKssSL+LATGhRNVPplp....pRGLFGAIAGFIEGGWpGhIDGWYGF+HpNupGoGhAADpcSTQcAIDpITsKlNslI-KMNppF-sls+EFsclEcRIpsLsc+l-DshhDlWoYNAELLVLLENp+TLDhHDSNVpNLaE+VRRQLR-NAc-tGNGCFElaHKCDspCMESIRNGTYDHscYcEEu+lNRpcIsGVKLcSss.sYcILsIYSslASSLlLsshlhGhlhWsCpNGshRCpICI .................lClGaHAsssss...hVcTlh-cplpVTpuspLlpsppsG+lCs.......shtsLchtsColhshlLGsPpC-sh.s.ppWshlVE+usu..pusCYPhDhsDYppLRplluSs...tphEhhscs.sW.stssps....GsS.uAC.+susp.SFappLhWLs+......psssYPsLshoasNscpp-hLhlWGlHHPs.o..ss-QpsLYtpusuh.loVuTp+hppphhPpIusRP+VR...s.....QsGRIshYWTllcPGDtlhhpusGNLIAPRhsFtlpps..........tSuIhpSDsPlspCso.pC.TPpG...uIsss...hPFQN.lp.lThGtCP+YVKpspL+LAT.GhRNlP.php...........oR.G.......lFGAIAGFIEsGWpGhlsGW.aGapppstpG.u.AAD.................................................................................................................................................................................................................................................. 0 2 4 4 +2881 PF01126 Heme_oxygenase Heme_oxygnease; Heme oxygenase Finn RD, Bateman A anon Prosite Domain \N 20.20 20.20 20.30 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.32 0.71 -4.76 18 1215 2012-10-02 21:56:19 2003-04-07 12:59:11 15 7 902 120 452 1089 282 182.30 20 75.77 CHANGED ssLuppLRcuT+csHshAENssFsKsFLpG.lscpsY+pllusLYalYpAlEcEhccp+spshhuslhFP.ELpRpsuLccDLsaaa.G.ssWcptlpsosAsptYVpRl+clusp...pPcLLlAHuYTRYLGDLSGGQlLK+................................................IAppuhsLsss.pGhuFYcFssls...spptFKppYRptLssl.ls-pppppllcEAspAFthNhplFp- ..............................................................................h..hsttl+ttTpt.Hpth.-........p..hhtt...h.....................hs.h.ptahphltt.ah.......hap....s.......lE.t....th................t........................................................................ht.h.h....s...pL...h...R.pt.t.LtpDlt.hh............h..s..............p.h...........t.........t........................h..........t.......s...............................s.......h..t......t...h......s..t..c..l..c....p...............l......spp...........pst.....hhluahYsh.h.u.sLu.GGp.ll.++................................................................................hhtpt..hs..l...s.t.....p..............t..........u..hp..........ahp...a..s...s...................sttth.hptac...pt...l...s...p..l..s...h...s.t.p.p.p...p.phlppAppuFth.hthht.................................................................................................................. 0 109 241 359 +2882 PF00372 Hemocyanin_M hemocyanin; Hemocyanin, copper containing domain Finn RD, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes arthropod hemocyanins and insect larval storage proteins. 20.80 20.80 21.20 21.10 19.60 20.10 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.72 0.70 -5.11 14 743 2012-10-01 19:27:11 2003-04-07 12:59:11 14 15 199 48 195 807 0 247.20 31 41.35 CHANGED s.aElaPphFssu-sIp+Aaph............chtp.....................................................ph.-cs............hlhcsphosslh..spEp+luYFpEDIGlNsaaaaaHhsYPFaasschaGthK-R+GElaaYhaQQlhARYchERLSNGhscl..FpahcslcpGYhP.hhplssGh.au.RPsshplcshcshc.lp.h-caccpIh-uIppGalps.cGppIsLccscul-hLGsllpu....ss-sssh.Y.atoLcshu+hhluthscPps+aphsPulh-papTSLRDPlFYpha+hIsNlFpc .....................................................................................ElhP.hFhs.sphl.pu.......................................................................................................................................t....................hhl...h..s..ao...s...s...h.....p.Ep.+luYFpE..DlGlNsaa.......aaaHhs.Y.........P....h....h...h..p.....p....h...........s....h.p.K.D......R..+GELF.aY.hHpQllAR...YphERLo.N....sLsclp.h...s.a.h.c..s.l.p..G.YhP..hh.p.h.h.s...G.ht.as..sR......ssh...ph......p......c.....h.............p.......p......h.....p..................l......p....chppacpRlh-AIp.pGa....l....hs.t....sG.....p.p..lsL.p......p..................c........u....I-hLGs.hlcu...........s....Ss.s.pa..Y.....G.s.lashu+hhl.uh.p-.sp..tc...a..................suVhtch....p.TuhRDPh....F.Yphaphlssha........................................................................ 0 61 85 176 +2883 PF03723 Hemocyanin_C hemocyanin_C; Hemocyanin, ig-like domain Finn RD, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes arthropod hemocyanins and insect larval storage proteins. 26.40 26.40 26.40 26.70 22.10 26.30 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.60 0.70 -5.24 163 687 2009-09-11 10:37:18 2003-04-07 12:59:11 9 13 179 48 209 753 0 235.80 31 36.26 CHANGED ssYTc-......-LpFsGVplssVpV.p..........LhTaa-ph-h-losul.shs.........................schslhs+hp+LNHcPFsY..pIpVps......spssst..sVRIFluPKaDph..Gp.hshs-pRhhhlElD+FhhcLpsGpNsI...sRpSp-SslTlscphoaccLhpph.ts.sttt.h...........php.stCGaPp+.hLLPKGpt.pGhsapLFVhlosaptspl.tpt........s.sshshCGht.sppYsDp+PhGaPFDR.l.........spht.a.h....NMthpDVpIhac ...................................................sYoppcLtasG..VplpslpV.sp.............................LhTaacp.-hDlssulshs..............................phslhu+hp+LsHcPFsaplsVp.s......spstpt.......sVRlFluPKhDph..G.p.....h.s...h.p-pRhh.hlElDcFhhp.LpsG..p..N..sI...pRpSp-ssholspchoap.pLhpph.tsh.p.t...h...........ph.ph...stCG....aPp+.hL..LP+G.p..p.Gh...hpLalhlosaptsps..tp......................p.ssh.h.CGht..sphasDp+shGaPFDR.l..........ppht.h.h...sNhhhp-lhIha.................................... 0 59 86 184 +2884 PF03722 Hemocyanin_N hemocyanin_N; Hemocyanin, all-alpha domain Finn RD, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes arthropod hemocyanins and insect larval storage proteins. 21.70 21.70 22.60 21.70 21.50 21.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.50 0.71 -3.97 111 668 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 164 48 193 736 0 113.70 27 17.52 CHANGED shhpKQpplhpLhp+lpps...h.t.chhplupsap.......tp.t.a...pcsch...spp.hhp.h+pshhLs+schFSlappcphcEAhtLaclLatA..KDa-sFh+sAsasRpplNEGhFlYAlslAllHRsDscGl .............................................................t..lh.lh.ph.t............phhthspph.........t....t.h....tp.ph......hpp..hh..hphs.hLs+sphFSla.tcHtcpAh.tLhclhhtu....pDa-sFhpsAsa..uRp+lNpshFlYAlolAllHRsDscsl................. 0 49 75 166 +2885 PF00045 Hemopexin hemopexin; Hemopexin Ponting C, Bateman A, Sonnhammer ELL anon SMART Repeat Hemopexin is a heme-binding protein that transports heme to the liver. Hemopexin-like repeats occur in vitronectin and some matrix metallopeptidases family (matrixins). The HX repeats of some matrixins bind tissue inhibitor of metallopeptidases (TIMPs). 21.30 21.30 21.50 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.30 0.72 -4.22 76 5321 2009-01-15 18:05:59 2003-04-07 12:59:11 14 87 208 96 2483 4600 17 44.70 26 26.74 CHANGED lDAshphp.......schaFF+GspaWchssp......php.shPp.lssh..ulss .................hDAshphpt..........uchaFF..K..G.....s..p..aW.+...assp..........php...s...u...aP...+.lsph...uls......................... 0 348 533 1140 +2886 PF05171 HemS Haemin-degrading HemS.ChuX domain Finn RD, Bateman A anon COG3720 Domain The Yersinia enterocolitica O:8 periplasmic binding-protein- dependent transport system consisted of four proteins: the periplasmic haemin-binding protein HemT, the haemin permease protein HemU, the ATP-binding hydrophilic protein HemV and the haemin-degrading protein HemS (this family). The structure for HemS has been solved and consists of a tandem repeat of this domain. 20.40 20.40 20.50 20.40 20.00 20.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.37 0.71 -4.33 108 1015 2012-10-01 19:49:10 2003-04-07 12:59:11 7 7 499 8 260 1007 60 130.00 33 72.42 CHANGED sloctphltstsu......p...hsppls..ss.........hpplLpthtphG.tlMshstNcuslp.+sGsh..pslp.t.....G........hh.lLss...s..hsL+LhhsphspuasVc+sT......pc.G..hhpSlphFDspGsslhplaspcp....s-hs..uWctLlp..pL .......................................loctphhtshss.........c......h.Ah+lp...ss..........l.tplLps.hppsG...clMshstNc.usVp.psGsh..p.p..l....p...t.......s.....t.......hh....l.lss...p....hsL..+..Lh.sphAps.aslccsT.................sc..G...hhpSlphFDtpGs..t..ltpl..aupcp..........s-hssWcplluph....................... 0 53 127 187 +2887 PF00906 Hepatitis_core Hepatitis core antigen Bateman A anon Pfam-B_8 (release 3.0) Domain The core antigen of hepatitis viruses possesses a carboxyl terminus rich in arginine. On this basis it was predicted that the core antigen would bind DNA [1]. There is some experimental evidence to support this [2]. 22.60 17.00 22.70 22.70 22.50 16.80 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.32 0.71 -4.63 12 7177 2009-01-15 18:05:59 2003-04-07 12:59:11 15 10 91 27 0 5167 0 125.70 89 88.87 CHANGED DIDPYKEFGAoVELLSFLPsDFFPSlRDLl.DTAsAL..Yp-ALpSsEHC..SsHHTAL....RQAlhCWtElhsLuoWVGsNLpDPsuR-LVVsY.........................................VNsshGLKhRQLLWFHlSCLhFGR-TVlEYLVSFGVWIRTPssYRP.NAPILSTLPETsVVRp..Ru.....RoPRRRTPSPR.........RRRSQSPRRR.................RSQS.pSpC .............DIDPYKEFGAoVELLSF.LPSDFFPSlRDLL.DTASAL..YREALESPEHC..SPHHTAL....RQAl.lCWG-LMsLAoWVGsNLpD.huR-hVVsY.........................................VNsphGLKhRQlLWFHlSCLTFG+psVlEYLVShtsh..................................................................................................................... 0 0 0 0 +2888 PF01771 Herpes_alk_exo Herpesvirus alkaline exonuclease Bashton M, Bateman A anon Pfam-B_822 (release 4.2) Family This family includes various alkaline exonucleases from members of the herpesviridae. Alkaline exonuclease appears to have an important role in the replication of herpes simplex virus [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.50 0.70 -6.03 14 226 2012-10-11 20:44:43 2003-04-07 12:59:11 12 6 154 6 18 307 11 373.60 23 77.26 CHANGED s.hsplsshshsp.FLcss.htphhu......pptsthstlRasYhahlhh......phsshhusst............................ssshhtcthttshsst...............................plsplhtts....................pphsscttpplhhhlEttTRGQu-NsLWclLRcGllouSKhhausp.t.phs.+hFps.shpsspasuu......sltFGh+sEpsl+sLlsphhs.......sc.................................tsspshGhLhsPpsGlhGsSLDhhss..s.tpuptshl.hpsssplaEIKCRaKYLFsKs-..DPhhptYtpLhppPsttsLtcFlhSIs+PuVEahssG+lPots-aLlTpDtsWc.ts.p++tshsst+ph...htcplthNptspSpVhlhs......DPstspupIsh...................................................cuthplslFsNPRHsYFaQlLlQphVl.ppYlphpssP....pLso.tshlsouhFRc.......R.tssshsCplssc...tl..sscIPllLIlTPVhlstp.shpchlpputshWp.psspcpasph.sWsssus ...............................................................................................................................................................................................................................................................................................................................................................................................................................hs.t.h.t...pth..hh....hlEptTRuQucs.....tLWplLRhshhTASphp.sst...t............s.....s..t..hhp.........h.p...s....t...hp.ss.............ulhFGhppEs..........hs.+.......s......l.ltphhht................................................tsshsCGhhhsspsGhhGAShDh.hhs........p.s..s...h..t..h.h.aElKCRhKYhhp.....p.h.t...tlhtt.s...hhthl.sh..Pslpah..tt.Pt.t-hLhs.p..h........h.............thh..Nt...S.lhlhs.................t...ttt.h.................................................................................h..shFhNs+H..ahQhhlQ.hlh.t.a.t...............s.hhp....t........R.................................t.tlPhhhllTPl.hs...hh..h.tts...ht.t.ht..............h.................................................................................. 0 14 14 18 +2889 PF04793 Herpes_BBRF1 BRRF1-like protein Waterfield DI, Finn RD anon Pfam-B_6247 (release 7.5) Family Family of herpesvirus proteins including Epstein-barr virus protein BBRF1. 25.00 25.00 86.70 86.40 19.90 19.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.65 0.70 -5.64 10 28 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 21 0 0 22 0 273.10 29 93.11 CHANGED aHYP-lssllpElcslsl.ssshsppppPssp.LERulaLsKlhQlLhQHRppEppIlPplpsNlhYaLspLpchsssctpphlpslLscl..sssDhtLsscLsphlsclLpl+YPsthsl.....sppssshspWCLpahlGlspph+phlsssLsstc.sShPSlpshspLu+pLFhspuhhpEsasDssFNh.hNQlVaWTsslchappClapchls-oIsshpsLLKpEl+uFhpWhcppp......phpshthlcahcahlcplTsssocsclsElhphLpcsK...+sLhs aHYPslspllpphsslsl..ssh.s.tppPhhs.lcRuLaLs+lhQlLhQH+ppEphIlPplpspltYhLptLpchsssch.pphI...tplLshl..sssshsLppcLhphlsclLthcYPpthsl........ssshtpWCLpahl.Glotph+phlpshlsotp.sshPShpshspls+pLFhspsh..EshtcpsFN..hN.slFWTsslphappCIapphlhcpIssh.sLLKtEl+pFhsWhc.pc......phtshshlpahcalhcplTsssoph.hsElhshL+HsK...+sL...... 0 0 0 0 +2891 PF05109 Herpes_BLLF1 Herpes virus major outer envelope glycoprotein (BLLF1) Moxon SJ anon Pfam-B_6348 (release 7.7) Family This family consists of the BLLF1 viral late glycoprotein, also termed gp350/220. It is the most abundantly expressed glycoprotein in the viral envelope of the Herpesviruses and is the major antigen responsible for stimulating the production of neutralising antibodies in vivo [1]. 25.00 25.00 409.60 409.50 21.30 20.90 hmmbuild -o /dev/null HMM SEED 830 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.69 0.70 -13.61 0.70 -6.58 3 270 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 6 1 0 74 0 437.20 62 100.00 CHANGED MEAALLLCQYTlQSVlpLsuDDPGFFNVElLpFPFYP+CcVCTADVNlTIlFcVGscs++L-LsFGs.LTPpTKuIYQPlHAoGGpENATsLFlLELLGAGpMALTMRSpphPINlSc.-EppVSlEAlhVYFpDsFDlhWCH+VpMp-PVYLIPcplPPll..WNcCNSTNITAVVRAQGMDlTVPISLPTpPp-SsFSl+hEMsGNGIDhpCchEss.ISPVLPGsNsFsITCSGDKPHFASGGILTPsSPssTPsPhsGYsYSLpLsPRPVPRFLGNsSpLYlaYohsGPcu-GGDaCIposIsFSDcLPTsQDMPssTQsloYTGsNATYpLsMAsSEs.NTAPNVTVTAFWAWuNsTpoDFKCKWsLsTssQpPsGCER.........huGtFsSNRTFElTVusssssuKTLIIoRsATNsTsssaKVlFoKAPDoTpos.hl.ThsastPppTTulPoosplP.....PsoouPThpstDPTSsTPTGhTS.................SphP-sTSPTSsoTosTPNATSPTsssT..TPNATSPTosKTSs.......TPNsTSPTslVoGsTosATSPPoGsTSs........PNATSPpVuEpSP.sNTs.........TPsVTpsPSVlTssshsuphGsoSSPTSuhouhPpo.....ssTPRsNSTSsTP.LTSsHPTGGcNITEsTPuuPST....sHVSTuSPuPsPGTTSQsuGPGNSSTSscPGts+VTcGhPspNATSPSAPSuQcTsVPTsTssGGKANsTT..KcToGSshtuSTpPsTstGusuTTs...YNATT..PPosSSpLRPRWT.TuPPsTTsQATVPVPPTp+P-aSNLSMLVLQWASLuVLTLLLLLVluDCAFRRsSSplHTYTa..PPYDDu.ETtV ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +2892 PF04633 Herpes_BMRF2 Herpesvirus BMRF2 protein Mifsud W anon Pfam-B_5353 (release 7.5) Family \N 25.00 25.00 190.20 190.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.20 0.70 -5.48 10 24 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 21 0 0 22 0 348.00 30 98.13 CHANGED pschsLshsuFssGsLAAoPFlWCFIFtoLaohshassWpotlah.WuhsssplshlhaCllphsp..hsRtlchlsslslhsshhsFhhpshshuhhshlPsLFllNhlhLslWlhlsh-sVYLCPsIhp+YaELGFLsAhslY.YlllppplaloslFhhPFhlFlshGlhuLpsl+++shYcpGlpRR+uIF.spsuKYhshShppshchssh-LlllssLlsuhssuhluLslaTclhhGlspYlaLFhsGshsCGGlhl.sSphhshVashluslshhLlalhsstlshhsppshlhslhlhsahpAlGCplphIRpKL++slNuPpllLslCsLsNlllslshhllsK ......schsLshtuFshGhLAAoPFlWCFIFtoLaohslaoshpshlah.WuhsssplhhlhaCllshpp..hsRtlchhhhhslhsshhsFhhpshshuhhhshPsLFllNhhhLhlWshlsh-lVYlCPsIhpRYaELGFLsAhhla.YhlltptlalosVFhhPFhlFlshGhhuLtsh+c+slapsGlpRR+uIF.htps+Yhshohppshchssh-lhslhhLlhshssuhlultlhoclhhGlspYhhLFhsGhhsssGlhl.pSphhshVhhhluslhhhllalhtsthsshhppshlhslhlh.hhpAluC.lphlRpKLp+slNuPphhLtlChlsNhllslshhslsK.. 0 0 0 0 +2893 PF04682 Herpes_BTRF1 Herpesvirus BTRF1 protein conserved region Waterfield DI, Finn RD anon Pfam-B_4518 (release 7.5) Family Herpesvirus protein. 25.00 25.00 125.00 124.80 19.60 19.40 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.59 0.70 -5.05 12 30 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 25 0 0 27 0 245.40 37 66.63 CHANGED hshhsshK+VpINEutNpthha-spp.l.phPh...................ssh.s+sIlKp......stsspst.....hhAplFaAhut.hP.plGpsPspsasTlhIMsRSpNSLphlP.shplsslQ+LFLKHVLLtchGLENshpsF.tlasp.lsslospQh.phFcpllppsKp+lEDhVFsLNSIspt.FptsVpssssss.thphAhEKYFLMFPPtD+.psAlpFuusllclICpGsshsclltFLp+YlsIpccsstsNhlKlYALLol .........................hhpshK+VplsEusNphhhh..pp.l.ph.....................pht.s+sIlK+...s.ss..p.tsh.t.t.hAplFaslup.hP.plGpuPsssasolhIMsRSpNSLphlP.shclsslQHLFLKHVLLpphGLENhlpsFpslYss.hsslospph.ctFEpllppsKpplEDhVFCLNoIssppFptsVpssssss.slhpAhEKYFlMFPPps+.psAlpFuAsll-lICpGsslsclltaLsKYhsIp+csstssLlKlYALLol. 0 0 0 0 +2894 PF04929 Herpes_DNAp_acc Herpes DNA replication accessory factor Finn RD anon Pfam-B_5837 (release 7.6) Family Replicative DNA polymerases are capable of polymerising tens of thousands of nucleotides without dissociating from their DNA templates. The high processivity of these polymerases is dependent upon accessory proteins that bind to the catalytic subunit of the polymerase or to the substrate. The Epstein-Barr virus (EBV) BMRF1 protein is an essential component of the viral DNA polymerase and is absolutely required for lytic virus replication [2]. BMRF1 is also a transactivator [2]. This family is predicted to have a UL42 like structure [1]. 25.00 25.00 254.70 254.30 21.80 21.80 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.33 0.70 -5.49 11 24 2009-09-11 06:58:28 2003-04-07 12:59:11 7 1 21 10 0 25 0 391.10 34 98.89 CHANGED shuhplcsssLsptsKlY-HlKuclK.sGlIQ......lsG.sssPsLullSslGssGlLsFclp.sAluphpspp.......phscssolSFRN.puhGsTFlaoRELFGssVcsAsltFY+R....tssupP..pFV+splpYsDslTpTpHTSsl.pstl.Phpcplpsspshu+VlLosKTsshLQKWLRQpKo+t..psV+VslNETLuVll.loVG-soKTlDaKsss..t.sstshsssKstDsGsVpsDssspVul-uLhsALulCKIPGlhlPsh+FauusILEVsGs.lKpup.ssssLSVhLLsssspscssst...sh..........sulsspsppsssssssuPP..............................tsspspopp.sssotsPssshs+s.t...ohpR..KRpsc.........pcc+sKKhKhsFNPLI ..phshplcsssLuphuKlY-HlKup.lK.sGlIQ......lsGss.ssPsLulluslGsuGlLshclp.sAluphhspp.......ptspssuhSFRN.puhGsTFlaoRELFGssVcsssltFY+R....psuupP..pFV+splpYsDsso.pTpHTSsl.pshh.Php-+lcssthhu+VlLssKTushLQKWLRQp+op...psV+VolNEsLuVhs.hTlG-soKTlDaKPhs....ss.tttshsssKstDsGsVpsDssspVul-SLhAALulCKIPGshlPsl+FauusILEVsGs.lpsus.sslcLSVlLhpssspspssus...s.........................sulpppspp..ssssssssPP................................s.tts.psposs.u.sopsPssshp+s..ss..p.p+..KRpucp......pppcc+sKKsKhsFNPLI. 0 0 0 0 +2895 PF01673 Herpes_env Herpesvirus putative major envelope glycoprotein Bashton M, Bateman A anon Pfam-B_1084 (release 4.1) Family This family consists of probable major envelope glycoproteins from members of the herpesviridae including herpes simplex virus, human cytomegalovirus and varicella-zoster virus. Members of the herpesviridae have a dsDNA genome and do not have a RNA stage during there replication. 25.00 25.00 54.20 54.10 21.50 17.90 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -13.16 0.70 -5.94 32 143 2009-09-11 13:51:45 2003-04-07 12:59:11 13 2 85 0 0 123 0 494.60 31 93.89 CHANGED lh...sEllhtuHhhpl.........shhssshtsssss................................................................................ph..h..s.........hsssLslDpsCplCpllshhh+c.s.s.pWhtDYAhLChKCltAPhsssuohlsAhEFlalhcpHF.stptss...hFspphlolhDlphHFFlppCF+...............................tpssshlphuNhoaltpuhhRuhLhsptssshshhphh...........................................s.t.shtthccpscsssCs..............................tp......................................th.tt................thsassLlhhlhuGossh.tts.....................h..lhts+tptlcphhpppt..................................ht.ssps-hshGPlLloslhthpp+spTso.ClLCpllss+sp.hhsL+pL+pcllsaspNNlpLhDpIp.lLsshts......................................hs.hpDtsthhtll+tsGspulaKHhFCDPhCAhNttpTsPpVL..Fspsss...ccLphhKAplAstNpatspl...CptLasLshhFKuaQlhphp...tTslusFl+-hstlL++HslsLls.taTlshYV .........................................hsELLhtuH.hpl....sh.p..t.s.sss..ss...s........................................................................t....-hs.h.ts.........hsssLulDptChlCphlplhh+p....shs.pWltDYuhLChKChtAPhsAhushlsAhEFlalhcpHF.phptss...hFtppsLTlhDlphHFFlppCFc.....................................ppss..stVphsNhohht.ussRAhlhs.sssshsttpth............................................s.tthltsac-pup.hc................................................................................................................sassLlhhhhuGTssh.pss.....................hpths.ts+tttlcthhpspp...............................t.hh..ssps-hshGPlhhoslhthps+spTss.CLLCpLlhspha.hlsLRcL+pcllsYspNNsplhDpIt.Vlssh.st.....................................ts.htssshhhtll+tsGspuIaKHhFCDPhCAhs.hpssPtVLFscsss.....cclphhKAtlAssNhat.uRl...CttlhhLhahFKsaQlhh.p....TtlusFlR-hstlLcpHslsLls.paTlspYV................................................ 0 0 0 0 +2896 PF02480 Herpes_gE Alphaherpesvirus glycoprotein E Mian N, Bateman A anon Pfam-B_849 (release 5.4) Family Glycoprotein E (gE) of Alphaherpesvirus forms a complex with glycoprotein I (gI) (Pfam:PF01688), functioning as an immunoglobulin G (IgG) Fc binding protein. gE is involved in virus spread but is not essential for propagation [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.67 0.70 -6.22 19 334 2012-10-03 02:52:13 2003-04-07 12:59:11 11 15 88 4 23 302 1 298.90 33 76.66 CHANGED Phsssshhscs.................WshL.ssss......t.t.lClp.s.ChtDllls...spC...shchsls.......hAphhh..tshppsssshhh..ssslhsphsssp.....ssLpltsAstssuGlYsLhsp...sssstsptss..hhVtVtstttt....................................hsstPh.....h.hsP..pp+Gsphplpsa+uhlassGDoFplslplpsthaD..ssFshplcWhahcs.sspCs.h+IYEsClaHPptPpCLpPsc.ssCuFuSshhupplus+hYtsCstt....hsscC..sssphctsstlpht...sssssLhhpsAssssoGLYV..........hVlhhNGHlcAWsYsllSTssp.ahNslp-hshPth................ssssssssssstss.stshst.hhhhlsulhusAsllhlsslsshsshtppppRchhp........thsshussYsuLPss-.......h.p.sp.Dspss--phs.tppt..................................sspppGSGap ............................................................................................................................................................................................................................................................................................................................................................ss..th+tsph+h.s.cu.LFuPG-TFsh.splhu.htDp.tsashslsWhhhcs.sspCh.hhlYEsClYHPphPECLpPsD.ssCuhoS..hsphlAhRsYusCS.h......stC....h-AahE.l.................pssssptuGLYl..........hVh...............t.t......tsh.p..hP.h..................s...tst.st.........st.h.hhhhhh.s.hhhhh.hhhh.hhhhhhphtpct..t................................t.hY.h..tl....s..c...................................................................................................................................... 0 15 17 21 +2898 PF01688 Herpes_gI Alphaherpesvirus glycoprotein I Bashton M, Bateman A anon Pfam-B_1222 (release 4.1) Domain This family consists of glycoprotein I form various members of the alphaherpesvirinae these include herpesvirus, varicella-zoster virus and pseudorabies virus. Glycoprotein I (gI) is important during natural infection, mutants lacking gI produce smaller lesions at the site of infection and show reduced neuronal spread [1]. gI forms a heterodimeric complex with gE; this complex displays Fc receptor activity (binds to the Fc region of immunoglobulin) [1]. Glycoproteins are also important in the production of virus-neutralising antibodies and cell mediated immunity [2]. The alphaherpesvirinae have a dsDNA gnome and have no RNA stage during viral replication. 20.30 20.30 20.30 21.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.86 0.71 -5.11 13 161 2012-10-03 02:52:13 2003-04-07 12:59:11 12 1 49 0 0 151 0 161.00 44 44.88 CHANGED splLhsllLhulhls...upulVaRGspVSLhssossss.ulhP...s-sslslhGcLlFlscQhPsss.YsGTlELl+as.s.spCapllpshpYsuCPRlpssAFpuC+ppcshp.s.stsplpsslcstsLLpIspPpssDuGlYhLRVplss.sssuDlFslslhlh ........p.LtuLllluLals.....usuLVhRGsoVSLsosS.lsA.AlsP.............hl--DLhlhGcLhFlGsQlP+oshYsGsVELhHYshu.p+C.RVVaVlohouC............PRpsAsAFshCRsscstH.oPAYssLphslsppsLLRlppsshc.AGlYVLRVhVss.AssAulFsLuhsl.h.... 0 0 0 0 +2899 PF01528 Herpes_glycop Herpesvirus glycoprotein M Bateman A anon Pfam-B_929 (release 4.0) Family The herpesvirus glycoprotein M (gM) is an integral membrane protein predicted to contain 8 transmembrane segments [2]. Glycoprotein M is not essential for viral replication [1]. 25.00 25.00 31.70 31.50 23.20 22.50 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.21 0.70 -5.76 30 163 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 92 0 0 141 0 329.60 26 92.25 CHANGED MpsS+sDphth+hWhhplssaslhalssslssluAsFPsl.GFPCaasslVDYushNho......stNttptlTP.......sLFL-ssphhsYlhhohllhhssslYYlluultlhpt++.tssshsshps...lshlGossslahulLphWTlQlFlhsLSaKhlhLAAFsYslHFhh.shastshlophss.hthtpthpsl.c.sshLcpllhph+slhsNlhhshlulphllhuLohthulGNSFalhlucslhuulslFhlLsllahllsEhhLs+Yl+h.hGaalGsllusshLhhPll+Y-shF.......suplpsslslslullsllslshhllRllRhhhtp+pcpscYhslspsst......phc..hp.................................hp.s.csshhpp-ss ..........spsDphsh+hWshplshhslshlshsshhlhAsh.th.GaPCaassllsYtthNho.......shshhthhsP........LaL-s.phhsahhhshllhhhhhhYhllusltlhhp+c.pshphpthtp...h.hhus.solhhuhlphWhhQlhlhhLua+h..LuuhsYhlHFhhhshassthsoph.s...h.pthhtl.c.pstLcthl..h+slhsNlh.hhluhphhhhulshthhhuNsFhhtssphlhsshshFhllslhhhllhEhlls+Yl+h.hGhalGshhuhshlhhsshcYtshh.......ssshppsltsslullsllslhhhllRhlRhahh++p.+pscahshspsst.....ppht.h.t............................................ppps..s......................... 1 0 0 0 +2900 PF01537 Herpes_glycop_D Herpesvirus glycoprotein D/GG/GX domain Bashton M, Bateman A anon Pfam-B_603 (release 4.0) Domain This domain is found in several Herpes viruses glycoproteins. This is a family includes glycoprotein-D (gD or gIV) which is common to herpes simplex virus types 1 and 2, as well as equine herpes, bovine herpes and Marek's disease virus. Glycoprotein-D has been found on the viral envelope and the plasma membrane of infected cells. and gD immunisation can produce an immune response to bovine herpes virus (BHV-1). This response is stronger than that of the other major glycoproteins gB (gI) and gC (gIII) in BHV-1. Glycoprotein G (gG)is one of the seven external glycoproteins of HSV1 and HSV2. This family also contains the glycoprotein GX, (gX), initially identified in Pseudorabies virus. 19.90 19.90 21.60 26.50 18.60 18.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.91 0.71 -4.17 22 286 2012-10-03 02:52:13 2003-04-07 12:59:11 12 2 62 13 0 265 0 111.50 34 26.10 CHANGED lthst.s.cuCplslLts.sshs.thppt.........asAoluWahhhss.......CthPlhhR-YhsCssst..u..sCsthShshhps..shssauhlsts....LlhsPuhhsuGpYhh.lhls.sphhsuclhl ......h........tsCthllLtsssps..hh.sus.............YsApVsWa+lspu.......CtpPIhlhpYssCpsscs.u..sCs.thThshhss..s.spauhlsss....Lhhsss..suGpahh.ltls.s.hhhsphhl.. 0 0 0 0 +2901 PF02489 Herpes_glycop_H Herpesvirus glycoprotein H Mian N, Bateman A anon Pfam-B_1142 (release 5.4) Family Herpesvirus glycoprotein H (gH) is a virion associated envelope glycoprotein [2]. Complex formation between gH and gL has been demonstrated in both virions and infected cells [1]. 20.00 20.00 20.30 24.80 17.60 19.50 hmmbuild -o /dev/null HMM SEED 657 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.97 0.70 -6.52 30 314 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 106 20 0 262 0 524.70 23 85.33 CHANGED hssphhcpLaspsslsEsLtshhp+hpphapss....sshphsst.s.ssssssspsspsls.spshphssh...h.s.h..lps.......pplhp.ha..pp.shhhps....pslhashcstshthslth....sspsplsstlTpsFhhlolp........sp.pslshlFGpspclsslKu.hshp.shthspsspasLlllsshps.hspt.h.hhp.shtchhhphsphshtthh.pplpshhhth..hpstCp.ts.hs.pphhphhFphslshFhhscsh..ttsphlslspllcphspLphlpchhppCassh...................htshphs.........s..lpphusstltshshpph...tphshtppphhlpthhhs..spph..lssptlptltpllpslYppashsh......L..ssssRchLahhhtllp..................spohpssphthhalltTSMCostElup........hsphhupscshslhchFSPChhSLRhDhop-+Ltp.hspss..................s..phspsssshhshLcshcts.shphhstl.pCls.....scshhl..lPlsshT.YVISsch.lspGhsYsVosshlssshhlosl...pssCphs.ssttssppIPslhNlo...pppC.saCs.SsllpYDEssGlpshhYIsspplpppLhsstsshh.ssN.+sH.....YLlLhsNGTVlElpuhhppc..shshlhlllhhluhhlulalLY+lh .......................................................................h..............h...h.................................................................t....h..........thht.hh....t..hhh.pt.............hshhashth.sh.h.hhhh......tpthhhhhhhs.sFhhlslp........sp...h.hhhs.....h.....hps.h..t....s.h..ts.paplhlls.h.p...ph..hhh..shht.h.p....t..h.h....t.hphhhh.....h....tt.st.........hhphhhthshshFhhspt....tst.hslsphlph.ttlh.ltp.hh.hp.shtth....................h.lh..................p....h.ph.th..hht.s.ht..........h..h.thhhhhhh......tp.......hst.hhp.......htphh.thapphhh.h......L.........ss..RptLhhh.thlh.......................t.tpthpthhlh.ouhCs..chst............hhph.tp.p...l.chaoPCh.uhRhDhs.phhh....h.tts.......................hstts.thhshlp...t...h....p.p...hhthh..sth...pC...........sp.hhh..lsls.p.hs.Yllopph.h.pGhsY.lsss.ltpslhlohl...pssCp..t.p..hp.shplshh..Nhs.....tC..hCt.ushhpYspstsl.shhhlps.p....h....h....ss......hsp...hh.s.sscsH........hLhLh.NGTVlclhsh.hct..shphhhlshhhlushlslhhlath........ 0 0 0 0 +2902 PF02689 Herpes_Helicase Helicase Bashton M, Bateman A anon Pfam-B_607 (release 5.5) Family This family consists of Helicases from the Herpes viruses. Helicases are responsible for the unwinding of DNA and are essential for replication and completion of the viral life cycle. 19.00 19.00 19.00 19.00 18.90 18.90 hmmbuild -o /dev/null HMM SEED 819 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.46 0.70 -6.80 15 968 2012-10-05 12:31:08 2003-04-07 12:59:11 9 38 270 0 294 964 161 251.50 33 46.34 CHANGED schs...........................sphhLNhTStsclc.IVc+I+pLSp...tsspsPp.hsWFcshh-sEsshsh.h.........................................LPFusYLITGTAGAGKSTSIQsLspsLDClITGATsVAAQNLSspLstsh..salsTIFpsFGF+SpHlshp..........Rh.hstsss.soIcclQ+pDLshYWsVlsDIsc+hhpt.................cphtthsshohsuhushscsuhPsLscSNlIVIDEAGlLu+HILTAVVFhaWFaNAhhcTPhYcsGtlPCIVCVGSPTQTDAlpSsFsHppQ+pcV+pu-NlLohLIsNcsLpcYsclscNWAlFINNKRCsDsEFGcLLKsLEYGLPlo-EhhcYVDRFVVPcuhIpNPsphsGWTRLFlSHpEVKuYhupLHspLcls...........tssphslFThPlhshVpscsF--Y...KctlspPuLol-cWLppNhsRLoNYSQFtDQDhusspsEh..........s-pu.....................................hllThclpYV+NSploVTuKsKKslhGFoGTacpFtplLcsDuFl-spuc-psEYAYsFLSsLLauGMYsFYsahhssu.st....phhpcLsplslP.ht.....pshsusss.....................................s-h.D-s...............shLspps...DhFYppYphsPssoohshsElhuhYpslKslFlsRaslhpchFGspFspuPFsTas.sNlsh+sstpFsSs.c.hsGLLuaAsss-oYTLpGYTassVhhhsp-tpp......hhphhhppchPplVl+DppGFlSlLcsNlo+hsEol-s.pplchsossDYGISSKLAMTIsKSQGLSL-KVAlCFuss.pNL+huplYVAhSRsssucaLhMNhNPLRpphE...csshlSpHIlpAL+sssThLVY ......................................................................................................................................hht............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lt.p....h..AMTIsKSQG.SLc+.Vulshsps...shphuphYVAhS..RsTS.p........c.hL.+...h............................................................................................................................................................. 0 106 204 254 +2903 PF03324 Herpes_HEPA Herpesvirus DNA helicase/primase complex associated protein Mifsud W, Fenech M anon Pfam-B_3676 (release 6.5) & Pfam-B_4951 (release 14.0) Family This family includes HSV UL8, EHV-1 54, VZV 52 AND HCMV 102. 21.10 21.10 36.20 34.40 20.30 17.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.96 0.72 -3.95 32 154 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 84 0 0 151 0 93.30 30 12.85 CHANGED hslpDlFpsp-hsltlts........t.lpl+llsPh.sFchlhssspsh.shuLhshaptlaspla.u....shpulhPlasYlsPphh.pGss..a..sl.....hFPGFPhl ............s.huDlFplp-sslh.ps.............t.hslRllhPt.hachhlosspsh..hsLhuhapphassla.u......shsulsPlasYLGP-h..pGssh.a.......sl......hFPGaPsl.. 0 0 0 0 +2904 PF03585 Herpes_ICP4_C Herpesvirus ICP4-like protein C-terminal region Bateman A anon Pfam-B_1422 (release 7.0) Family The immediate-early protein ICP4 (infected-cell polypeptide 4) is required for efficient transcription of early and late viral genes and is thus essential for productive infection. ICP4 is a large phosphoprotein that binds DNA in a sequence specific manner as a homodimer. ICP4 represses transcription from LAT, ICP4 and ORF-P that have high-affinity a ICP4 binding site that spans the transcription initiation site. ICP4 proteins have two highly conserved regions, this family contains the C-terminal region that probably acts as an enhancer for the N-terminal region [1]. 25.00 25.00 34.00 29.80 19.40 19.00 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.49 0.70 -5.76 13 161 2009-09-11 08:11:53 2003-04-07 12:59:11 9 3 37 0 0 135 0 321.00 49 27.44 CHANGED GGFRRhPsGssHTP.........sPucushpAYCsPcslucLsDaPLFPpsWRsALsFDPpALApIAARssussst.....................spthGshsussPLRRRsAWM+.QIsDPEDV+VVVLYsPLPGEcLsssssss...s..ttPpWsspR.GGLShLLAALuNR.LCsP-opAWAGNWoGsPDlSuLsAQGVLLLSTcDLAFAGAVEYLshchuuA+RRLIVlDTVssE-WPpDGPA.lSphHlYlRsslpPsuQCsVRWP...sspcLuRAVLsSuclFGPusFARl.EAAFARLYPsssPLRLCRuuNVRYsVcTRhGscTsVPlsPR-YRptVLPuhDGppchstQstuhshGsPDFl-GpAaSHRAAsRWGLGAPLRPVYLupGR+uutu..uPct..lPtslRsFCtpALLEPDs-ApPlVLpsspuss.............hsssPultWssuhGptsT.l .......uuaRp.P.G..hs..........hPsttshtAYC.sthhhtLh-..hhP..Wp.sL.asPtAhAplAthtss...................................sst.LRthsAWMp.Q..sP-DVpllllYsPLstEcl.s....s..........asst+.GGLShlLAALusR.lChPsotAWAGsWTu...PDlusLst...pGVLlLSTpDLuaAGAVEaLt.hh.usucR+LlVlDsVs.pcW.PtDGPA.lSp...h...HlYlRsshpPsuQss..VRWP...sscsLtRuVhsSucsFGPusFARl.EsAaApLYPst.PLpLCRGGNVtYpVpTRhs..o.VPhsPhtYhthVh.s...ptt............tsDF.tttuhSHRAst+WGLsA.LRPlal..u+.shh...tPt...h...hp.hCttslL.PDstA.PlVl.....st..s......................stl.W..t....................................... 0 0 0 0 +2905 PF03584 Herpes_ICP4_N Herpesvirus ICP4-like protein N-terminal region Bateman A anon Pfam-B_1422 (release 7.0) Family The immediate-early protein ICP4 (infected-cell polypeptide 4) is required for efficient transcription of early and late viral genes and is thus essential for productive infection. ICP4 is a large phosphoprotein that binds DNA in a sequence specific manner as a homodimer. ICP4 represses transcription from LAT, ICP4 and ORF-P that have high-affinity a ICP4 binding site that spans the transcription initiation site. ICP4 proteins have two highly conserved regions, this family contains the N-terminal region that contains sites for DNA binding and homodimerisation [1]. 25.00 25.00 102.60 102.30 22.20 21.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.13 0.71 -4.76 14 132 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 38 0 0 132 0 174.20 51 12.23 CHANGED sssGEsWPsusPPPtGRVhaGusG-pRpGLWDsP-VcpAttRapAusuPsPVaVPEhGDssKQYpALVchlas..sp-s..huWLQssKLousDptLschC......p+hhsus+upGo.......hlTGoVssslPHlGcAMAsscsLWALPHsAAuVAMSRRYD+sQKsFlLpSLRRAYAsMAaPtsus .hssGEPWPGusPPPsGRVhaGssG-oRpGLW-sPplcpAttRacAustPssValPEhGDsucQYctLlchlYs.Pst-s..huWLQNPKLossDtsLsphC.......+l..hssuRupGo.......hlTGSVApslPHlGcAMAsGcshWALPHAAAAVAMSRRYDRsQKsFlLpSLRRAaAshsaPcsus...... 0 0 0 0 +2906 PF03361 Herpes_IE2_3 Herpes virus intermediate/early protein 2/3 Mifsud W, Fenech M anon Pfam-B_2178 (release 6.6) Family These viral sequences are similar to UL117 protein of human and chimpanzee cytomegalovirus, and to intermediate/early proteins 2 and 3 of certain herpes viruses. UL117 is thought to be a glycoprotein that is expressed at early and late times after infection [1]. This region is close to the C-terminus of the protein and may be a transmembrane region [1]. 25.00 25.00 48.90 44.20 18.90 18.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.74 0.71 -4.91 14 91 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 27 0 0 96 0 159.10 35 28.34 CHANGED Phpshs.+plhpcshclC+phpspspsIhhhaTRoppVtpslcsh+pcLhphsNlolSsPFphpHT.PhhHs.cssppsschpppGsppsW-hpcsp.s....HshssRpSDh+ohhIpAATPhDFlsAs+lClphupcaPKplslRlsol.psuht...LPIYssssc. .....Phphssh+plh-Esh+hC+shpspspsIhhlaTRspplcstlcsl+scLhphsNlslSssFhhpHT.shsHsPcsuppTtchpppusptsWshcc.p.s.....+phssRpSDh+sMhlpAATPhDLlsAlpLClslspKaP+plslRlhsh.psuhhh..LPIYpphsc.h................. 0 0 0 0 +2908 PF03363 Herpes_LP Herpesvirus leader protein Mifsud W anon Pfam-B_1664 (release 6.6) Family \N 25.00 25.00 29.70 29.60 21.40 20.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.91 0.71 -11.96 0.71 -4.69 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 7 0 0 46 0 111.30 67 134.18 CHANGED RGD.SEuPGPoRPGPPGlGPEGPhGQLLRR+RssSPTtGs.E.P+RVRRRVhlppE-tsVSGsPotPRGD.SEuPGPoRPGPPGlGPEGPhGQLLRR+RssSPTtGs.E.P+RVRRRVhlppE-tsVSGsPsuPptPh.Q................PsApShREWLuRsspRssspPVsTh.RRRVYIEEEEE. .....hGD.SEssGPsRPGPPGlGPEGPhGQLLRR+RssSPTtGs.E.P+RVRRRVLVQQEEEVVSGSPS.PRGDpSEuPGPoRPGPPGlGPEGPhGQLLRR+RssSPTpGGQE...P+RVRRRVLlppE-.psVSGsPs................................................... 0 0 0 0 +2909 PF03122 Herpes_MCP Herpes virus major capsid protein Mifsud W anon Pfam-B_600 (release 6.5) Family This family represents the major capsid protein (MCP) of herpes viruses. The capsid shell consists of 150 MCP hexamers and 12 MCP pentamers. One pentamer is found at each of the 12 apices of the icosahedral shell, and the hexamers form the edges and 20 faces [1]. 25.00 25.00 40.20 40.10 14.90 14.40 hmmbuild -o /dev/null HMM SEED 1354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.16 0.69 -14.22 0.69 -7.25 32 190 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 98 2 0 182 0 1168.50 38 98.62 CHANGED stls.ssuslLspI+.outctLFcsFchhhG-DspcYslpF-sLLGsYCNslpaV+FLETuLulAClss.cF.DLphMs-G+lQFclphPpIA+uss+sss+.sphhlsKtscK+slossFplustsLplLptshps.T.l-phhplpAlppls+slphslDAhERGhlDphLpVLL+KAPPhhlLpsL.pcshhspRshs+lsRuslluphKp+lhpshFFLs+sp......+phlhphLs-hlsus.ppSVhpstsTaos.ssGc.lsGVllTTssVhppLlshlsp.lsctssssPssYGpaVlsGpNLVTAlshG+sh+sF-phsp+llshtps....tsp.p.........psp.spsplsssllplG-+hVhLEuLc.RVYpsTpssaPLtppl-lTFaFPlGLahP...s+YoT.uuplps.....tssscp..PspsaFhNKDphlhplsapsALtTlCHPphhssssslpuLptt.h....ssstch..............t.phauhthpp..sthslhphhhpFac...s+h.sshsshshcsphos-pFh+PsN..hLthELHPhFDFaht.tssshs............shtAohRlhsGNIP.PLuPssF+-uRGtQlts..shppl..ssuTlphlpsThpDssYPhlhYllEAhIHGsEctFshhhpLlupCIpoYWpsstpLAFlNSFtMltaIssaLGsGtlsc-sauhYRclhuclpsLcpsltchsh.s-slss.pshtthssuLhDssLLPPhlac..hs.ll.csp......Rssplpsssp.hsss...sc.ttlshh.+hpshhsctsshhss.ssss..h.hh.t.........-hhlLpKIaYYsllPAhopG+sCuhGVcacplhhsL...pss......................hss-....hsss-cs.hs............pLhssslpslhpsucls.sssshl.tpLttshhpssppTpslclpsshDsupppussp.shpshpssLaNGhhhhuhscpspthh.psaFYPlPhpthausP.....tlssshssplpshlpchPt.........lPshLuuEYhpahRsPhspYu....spstss.solo.sLhuhahKLSPluhhtQh+ptlHPGFAhTVVRpDcFhsEplLaupRASpShFlGpPpVs++E.+ssulsFcloQshuslDhGLGYoushssAplpslsTDMGspsQsLFtshsstsatss-lssalRptluspp..s.....sshshhshGthpsssssGltHGQhAsCEhIlTPVouDlsYFppPsNPRGRuus..lhusDs.sppsAcphhYDHopsDsAasaRuTsNPWASQ+sSLGDlLYNupa+pp..sssshYSPCtpFFoss-lhppN+sLh+LlsEYss..ussssousT-lQahsssGTcphlEcPCthhQEAaPhLsAScpALLcph..................hsspssp..........uEsHhupYLIc-suPlpthlh ..........hhsssuslLopIchuutcplF-.FphhhuD-sphYslpF-sLLGsYCNplphV+FLchuLulAshss.+F.-LshhspG+l.Fclp.PhIApssshsssp.hp.hhsKhhc++slshsFplustslshLptph.s.T.lsthhplcAlpplhRslpsshDuhERGhhcphLplLLcKAPPh.hlL.sh.pc.h.st+hhs+ltRuslluphKpphhps.FhLs+st......+phl.thLschssus.p.Slhhsh.Tass.spGc.lsGVlVTTsslhppLlphlhp.lpcppsssPsoYGphVlsupNhVTAlshGpshtsh-phsp+llshppp....p.p.......t...tst.ppspltscllplG-+hVhLEuLc.RlYpsTps.hPL.tph-LTFhhPlGLa.P...cpYushsuclt......psssct..PpplaFhNKDphl.plshpsAltTlCHPshhsspssltshppt......ch..................t.p.hthhhtp...s.hshhphh.pFhp...tch.hspsp.hhcsphoscpFhpssN..pLhhELHPhFDFhht.tssph.............th.sT.RlhsGNlP.sLsPssF+-sRGhplt...thpth..t.uTlthlpsohpD.sYPtlhYll-uhIHGsccsFthhhpLlspCIpsYWpspphlAFlNsathlthIssaLGsGtlPppshshYRclluhlphltphltphsl.stplss.ps.tthsphlhD.tLlPPhlac......h.+st......csstl.sssps.ssht..hp.tths...Rhsshhhct..hhss..sst.................-.slhpKIaYYshlPAhops+sCshGlchcplhssl...psh......................lsP-...hhss--s.hs.......................................pLlsshlpshhpsucls.sssshh.hpL.hhhhphsEpTpsl.spus.Dsutppussp.thphhptsLasGhhhhu..ch......tpaFhPlPhpthauss.....shssshsstl+phhpphP..........lPshlutpYashhRpPhspas....tps.ss.pul...sLhuhaaKlSPluhhtQh+stlHPGFAhTVVRpDpF.s-plLaup+ASpuhhlGp.pVs+c-.pshussFplTQshusVDhGlGYoussssAtlppshTDMGstsQsLahshss.shhsspsstalRptssstp.......ssshsh.sFGshpstp.suh.HGQtAsCEhIhTPVosDlsYF+pPsNPRGRAus..hhusD..pptsspthhYDHspsD.uhsatuTsNPWASQchShGDlLYNsta+.p..tssshYSPChpFFsss-lhsps+sL.+llsEhss..uhsshsuso-hQahps.GopphlEpPCthhQEAaPhhsuos.ALLcsh....................hpspstt..........sEsHhupYlIt-suPlpthh....................... 0 0 0 0 +2910 PF04797 Herpes_ORF11 Herpesvirus dUTPase protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_6280 (release 7.5) Family This family of proteins are found in Herpesvirus proteins. This family includes proteins called ORF10 and ORF11 amongst others. However, these proteins seem to be related to other dUTPases Pfam:PF00692 suggesting that these proteins are also dUTPases (Bateman A pers. obs.). 20.10 20.10 20.20 21.30 19.90 20.00 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.85 0.70 -6.01 23 85 2012-10-03 01:18:03 2003-04-07 12:59:11 8 1 32 0 1 104 1 307.40 23 75.33 CHANGED hshshWsssltsshlpl.oNppslpltsspsp......LPhsssh.thLtcphsuFuhssolhshp.hspsshhlhus....pssplclhPtslhcsppslslplpsstps.lstGsLphhllslshlphstlhhhhhsspss..h..sssssptusthsspp.splplsGpsspossps.h.hhhsppshsFhc..........................................................................shs+lpstpstsshlpshphucshh+lslp..p.ptsttlphph.lpsshohh.ps.l..........sFpas..hthshptsp...l..lYsssshhlsPspshpVchsspYtsstssttss..hhlsspsssspahlps...s.WhPtsshplslpNsostPlplpsustluhAlFlh.....ptssssshschhhpptosslphsusl ...........................................................................................Wphpl..shhhh..Nh..h.h..tp.t......LPhs.s....lhpthssashs.sh.t......ht.shhhhhst.....ss.h.lhP.hl.p.pt.l.lhlps.....h..tthhhhlh.lp.h..pth.hhh..............sth.p.hp.hss...s.h.ltG..h.s.....p.tp...hh.s.h...hhp............................................................................sphp.......p.h..........hstthh+htht.....t.s..h.h...htsh........l..........sh..s.....s...st....hhslYscpslsIsPtcotpVhasstYhp.usstphsh..hllstpssps..c..hhlcP...slWhPussstlsVhNsSspslsIsssTplApAlFh...........tt...p.hht..sttl.hst..................... 0 1 1 1 +2911 PF02399 Herpes_ori_bp Origin of replication binding protein Mian N, Bateman A anon Pfam-B_1518 (release 5.4) Family This Pfam family represents the herpesvirus origin of replication binding protein, probably involved in DNA replication. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 824 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.50 0.70 -6.59 13 207 2012-10-05 12:31:08 2003-04-07 12:59:11 10 9 101 0 8 215 11 389.30 22 60.09 CHANGED sYsSSsSLARhLYGsDLh-hlh+s..+P.uVolEpQscuPVsFPsPhssssRsVpVVRAPMGSGKTTALlcWLppsLt.usDtSVLVVSCRRSFTpTLtcRhscuGLs....................................GFsTYhsussYlMsst.a+RLlVQlESLHRVspsLLss..YDlLlLDEVMSTluQLYSPTMp.+LscVDulLhpLLRpCP+IlAMDATlNuQLVDhLutLRG-cslHVlls-YAosGFSpRpChlLcsLGs-sLtuslp.....tt.stpst......tsst...stt....psss.....FFucLptRLtuGcNlClFSSTloFSElsARFCtpF..TcuVLlLNSpcshs.DVssWupYRVVIYTTVVTVGLSFDssHFHSMFAYVKPhptGPDMVSVYQSLGRVRoLhcNElhlYhDuSGARu-PlFTPMLLNHVlussusWPspFsplTNhLCpsF+t+Cssua....ttucshhLFsRFKYKHhFERCTLssluDSlNILHsLLssNplpVph-Gsp...slss...csFssFLpsL+tDAhsup+cl+pLR................tsssshshssplh-s-...sVusFhcKY..LRssVss..p-lhpLlpsLus.PlsRtpFlNlslLc...AC.........htlPsAlcSpcVFp+lYs+YAoGslP.........llu..sGtlEhssLssshNssspW.-La+LCsphAcsLslsstpssss.s........lssssllpshsschsp.htphlLElh+CslT-uphhsccsVptstshLuGttstpht..shSptcH...AlulF+lhWcplFGs+lsKSspTFPGssRVKNL+KpEItuLLDuhslDRous+THRpLYsLLMpp+ppFpp.sRY+LRsPsWuchlp.p..........-h.....LEsuLuclsspsWPpspG ..........................................................................................................h....hh+ushusGKT..h.ht.hhtt...................ph...hhl.......shR.sh.tph.tph.................thh..h.p.........pt......ht.........tplhhplpSl.h+h......t.......................h..-..llllDEl.p.hhtp.h.h.o...hh...p...p.....ht..s.....t.hhh.hltts.pllhhDAshst.hhphh................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2 5 7 +2912 PF03325 Herpes_PAP Herpesvirus polymerase accessory protein Mifsud W anon Pfam-B_3097 (release 6.5) Family The same proteins are also known as polymerase processivity factors. 25.00 25.00 331.80 74.80 18.80 18.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.82 0.70 -4.86 6 52 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 27 2 0 48 0 243.10 52 71.12 CHANGED MsAsDasMEFSSsCVHGQ-lVRESGcSAlRVDLDaSVVuELLKWIuPpTRsKRssK+ssssssTsQIlVHuNPPoIKF.LsssSELEFTAosRVuFH-VKNMRlsVQhKNLaQALsNCAVTKLuCoLRllo-H-shLaluSKNuhFoVENFLTEEPF.RuDstF-Rthtppps.............slsNust.pss...........s-.h............s+Kt-R......ssRp.stt.........-H..ttG.upcsKYE.QHKITSYhVsKsuuuuG........-R..osaFNDsKEESDSEDSVoFEasPNoKKQKCu .....MsAPDaNMEFSSsCVHsQDl.lREotsSAs+VDLDauVVu-Ll+WIuPphRsKRNsKKtsssouTVQIhlHusPPslKF.Ls...ssSELEFTAsNRVuFHtVKNhRlsVQhKNhaQsLhNCAVTKLsCTLRlhT-H-ThLaVuS+Nu.FslENFLoEEPF.RuDs.....F-+..htt.................s.sNusshpss..ssG.D.thhs-.h................s+KH-R......ssRKhsttGs..........tDH..ttG.upcpKY-.QHKITSahsoKGusuuG........-R...usYFNDsKEESDS-DSVoFEas..PNoKKQKCs.... 0 0 0 0 +2913 PF04846 Herpes_pp38 Herpesvirus pp38 phosphoprotein Mifsud W anon Pfam-B_4545 (release 7.6) Family This protein represents a conserved region found in most herpesvirus pp38 phosphoproteins. 21.40 21.40 21.50 21.60 21.20 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.88 0.72 -4.66 5 34 2009-09-11 14:05:08 2003-04-07 12:59:11 8 1 9 0 0 24 0 61.50 59 25.88 CHANGED ENsshRSthllsLlhSAKoLVlGSCMuFhAGsLIG+usclcoospssssLhhAFCAGhlssGl .....ENATMRShMllTLIsSAKSLlLGSCMSFFAGhLVGRouc.V+TPlWDTVsLLMAFCAGllVGGV....... 1 0 0 0 +2914 PF04637 Herpes_pp85 Herpesvirus phosphoprotein 85 (HHV6-7 U14/HCMV UL25) Mifsud W anon Pfam-B_5418 (release 7.5) Family This family includes UL25 proteins from HCMV, as well as U14 proteins from HHV 6 and HHV7. These 85 kD phosphoproteins appear to act as structural antigens, but their precise function is otherwise unknown. 18.40 18.40 19.60 20.60 18.00 17.50 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.56 0.70 -6.20 13 77 2009-09-11 05:49:28 2003-04-07 12:59:11 7 1 26 0 1 71 0 464.20 29 71.87 CHANGED lD-pLNFhPslloscch+alpcsLspu.ssstVuhlNuslPMPsYsLEuLlcshl+pphs+s+slhcslI+lslhlN+YhsupchLcchpsthcshhsssp.hcRLcpuLppLh.pstpssssPhslhhtLscpsloputappsLcpLpclhcshshc.sschcs.pYppLpsaNhLapsPhaTopEAl-lYucNLpcLTpppscs..hcLloshpp..stsh-clLNDlhFLLSltphlhpapcpLptLRsaIhhplssLs-shYhsYsQlP-hRpsahsLsctltshhspsp..s-t.....FpshltsLhphl+plpcAsValsPsYl+ttlhth..hh.p.pps....ts.Dps.-tpp.....................h-.-ssushs.shhhspNshsss-lFRs.Pcss.phhtc..hhcpphopplhhsspssploscphplpphsplhhcuuuppsplsPcphhchh.........t......pst.t..t..husVphpsh.................p..pstp.s....uspssshssspPhusstp.p......tst ................................................................ls-pLsFpsslluspchcalttsLppu.ssstVshlNpslPMPsasL-uLl-shl+pthh+s+slhc.lIphslllNaYhsupchL+chpsthcshhsssp.hcRltptLppl..pstpspsss.slhhhlucts.lTtGtappsLcplcplhcshshptusctps.hYppLpsaNlLFpsPhaToptAlpLYtcNLp..cloppppcs..lcLLoshch..p.s.-csLNDhhFLLolpphlhpapcsLchLRsalhpphpsLs-hlYhsYsQsP.p.hRpsahpLsctlththspsp...D......hpshhtslhpFl+plpptslalCPsYlphslhtl.................h.Dps..........................ht.-.ss..u.s.hhstp.hsssslF+s.Pctspp.htc..hhpp..sh.hhhshpptphsspph.lpphhphhhpusph.stlsPcph.chh..p.hs.s.....................l..hsh.................ps.....ss....tt..t......sshP.p.h......ps................................................................ 2 0 0 1 +2915 PF03326 Herpes_TAF50 Herpesvirus transcription activation factor (transactivator) Mifsud W anon Pfam-B_3658 (release 6.5) Family This family includes EBV BRLF1 and similar ORF 50 proteins from other herpesviruses. 19.70 19.70 19.80 20.10 18.10 19.50 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.51 0.70 -5.55 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 25 0 1 68 0 284.80 34 89.77 CHANGED LFRN.Ks+pRupsuss.ssCAphuppll+alhERllhsTD+hFlsAsCSGVslP.sLApslaclhp-sRsKChuuWRpLussRRslMslucpllssYN.sLcssGhlSsch+AFhKLsFPshsLpplhpPlhthspGthsspshhshpG..up++RsspsshaSthsstspaslP-sLltshs-....sGLlssstsDlSsLLpNPppILp....hs.lupFpssssps..slPpsho.lssssoussPtp...sFtssasuso.........pssptpsostshustuspS.....sossptsplssS.ssts.sppshtuplshsosstPu......phsh.t..pssthP...........Pst.hsttp.p.........hssstlppshtsussppP..................tspPsusupps.sssush.pshss.ssossshpsssphsPshsp.ptpsssshps..............hsusshsspssp...........hsPuttp...........tDsslhshlppshssph-huuPss....hhhsshsp-l.p.sssPshcsspsstpt.-phsslssP ........................................................................................................................................................................................................................................................................pShLsauss-lShlL.pcsush.c...hsctsuPt..supThpP..ulPpspSchcPSPousssustp.spspstspuT.........ssspscssssPpuPpusop.....sossQcsp.sCS..s.ssusspssh..haspQsusshsu......plstsssssshshP...............................Pspspshppsshs................................................................................................................................................................................................................................................................................................ 0 0 0 1 +2916 PF04843 Herpes_teg_N Herpesvirus tegument protein, N-terminal conserved region Mifsud W anon Pfam-B_3992 (release 7.6) Family \N 25.00 25.00 25.10 26.60 22.80 21.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.11 0.71 -4.84 23 211 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 85 0 16 203 0 173.50 32 6.75 CHANGED Ss.sQuc.s+aGstAGsQClSNClhaLpssYhsG....ssllspcsLDslLcpGuclDthhcpssh......hs.spastls-lPshlpos...............hGpsshhhSp.h.Ghlpsps.s.pt.hlt.lchlhpsphpphsp.YhlhhssshutAlllp.ssphalFDPHspspt...ssAAVlsspsss-llsYl ....psQhs.schGPsu.GspClpsSlsFLphsahtG...hss.sLos-slDusLpEGuphspthpptsp.........sshsths-LPshlph................aGcssaa.stPhtG.hhp.Tpsss.pt........Fhttsatt+t.s.Y..sll..Tlssh....uhulhpp....scthalFDPHupsph.......stAhhspsphs-lh.al.......................... 0 8 8 16 +2917 PF04523 Herpes_U30 Herpes virus tegument protein U30 Kerrison ND anon DOMO:DM04370; Family This family is named after the human herpesvirus protein, but has been characterised in cytomegalovirus as UL47. Cytomegalovirus UL47 is a component of the tegument, which is a protein layer surrounding the viral capsid. UL47 co-precipitates with UL48 and UL69 tegument proteins, and the major capsid protein UL86. A UL47-containing complex is thought to be involved in the release of viral DNA from the disassembling virus particle [1]. 25.00 25.00 30.40 70.20 24.40 22.70 hmmbuild -o /dev/null HMM SEED 887 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.43 0.70 -7.09 18 68 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 48 0 0 67 0 893.00 26 92.53 CHANGED LpclhppLpssoss.hspL+hLhclElsslslpsl.ossplppFLNpLsphsG.tahsFIppahVFYLL+tuThsspphsht..stcplhphLpph............pphsssstsssssshLsNpplLsplppalsphp.....................................................................shpshppssssspsltshRsVEEllcthapshaphhhtshhh.pht.sssohL-pWLhhhatpshh.........tsshsslpshAppLlsp.............ctc.lFsshsssstshlshPhAcpputtIaplFsps..spsssstssPlLuhssspLs.plssphhFhYcallEuLhpspsY..........ssspstlEpFl..............................................scshshhssluspl.pssop...ppphohsslpphpttLhphGLoccsCpsatphlhhp....s....tssssshcthsphhphlspLshhuahFatCLtpaSPTulhtpch+.ILcpspuhtssppt.....................phsh.Ws..hhslhphFhspsPppphpphspulo.sthh+shhahshpcpWshhhhshp...........ttssssshsstsssopp-lp+aCctlph.......G-s..sY.-s..sllppPhFsptFlcphllPplptILssphpctRuhhplRWLllauus.cs..PpLhpl++sLshlYhtlh-Ihctst.............................................uuhtslhDhhp-lhstlpphs.........ssh..ohsssLLppLahtpassshpphlss.lpcalscspsllpshhshsplusslspsphphsspstpVplshts....ppshpl..............slssFpsslpslpppsp-hhstlsphspplpshhhcLh.sllpclpplppH.l.....shs..p.shpplpcthhcshp+hpsltsplssosshph.s+pFstLapssLlslthlpclLshs..cppssspshl-uhs.shsptssss.s........LsccslptLpchhsphh....psspsss...sslc+paosshchsthsls ..........hpclhcpL+tpsss.tttLphl.plEluulsspsl.oustlppFLstLsp.sG.haFsFl+ppsVFYLLppuTlpssppsL...sAcplhppLpch............tpttssssstsp..shLsNtp..sLsthpphlpplppl...................................................................shpplppssstspshsNCRsVsELl-hhYpphhtahhp....h.a.hthppssDopLDplLhhtahashh...........pussssLts.Fpphlcp.....................ptc.hhsshssoshsssptPsucphpshSa+lFstsltsp-ssshhaPllusshohLs..hluPcphFFasGllptLLpppsh..............t.hcph..L...............................................cthschhs.plspplhcsss....pp.hsltclhphhptLhphGLsccoCssYtpMlhhp.............s.......sspps.thls-htpplhpllassahFFhCL.hYSPTF.LFhp+++hILEppcuhlhus+p.....................phptlWsplshNlpphFssphsEc-Fpthscuso..phtRpaLY+clppKWGshhFshp...........ttsssssshsshcslTppDlh+hCthlpl.......u-s..sYsolhshspaPtFsshFhphhllPplppIhshs.sphpuhss.RLLhLhths.clLhPpphpLh+sLl.lYshhh.lhchDt.......................................................usF+slhDhlh-lhstlppls..........tssh..o.sscLLspLhspuhspsltsplsshlpchhpssts.lppalcasplChuLscspsphsp..c...shsVhl.lps.........phshpV..............shspFhpplcplhcpscpltpslptlspcltslhhRlt.plhp-spplspa.........stsp.shpphscshp+shp..+lpslpsplstshpp...sp+.NthhhsulhphhtlpclLspctlcppuhp.sls-Ahuhhppppuhsshs.s......hsp-shph..L+chhcshh.....pss.pss.....shhpphosst-pss.p............................................... 0 0 0 0 +2918 PF04541 Herpes_U34 Herpesvirus virion protein U34 Kerrison ND anon DOMO:DM04381; Family The virion proteins in this family include membrane phosphoprotein-like proteins such as UL34, Epstein-Barr and R50, from dsDNA viruses, no RNA stage, Herpesvirales. The family Herpes_BFRF1, Pfam:PF05900, has been merged in. 25.00 25.00 31.60 30.90 21.20 21.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.31 0.70 -5.42 29 132 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 81 0 0 115 0 202.70 29 69.62 CHANGED shtcphhschs......sllpRlhhlss.usLcsscssss...hspuss.shCshphp....sspsaslEYVh+h...WA.csps.....sPhhhlpNTGluVhlpsFhstPtss....hu.slsstcsNVhLpsscosuluLsDlpchKt.h..uhDhcshpuhlhhpsalp...spspLpFhhhGPcsssRspplLcpshttt....tsctptp.cpstsshpspsttsssts.sss ...........t........hh....slspRIhhlss.usL+ss-sp.....hspsss.uhCsh.hp....sspsaslEYlLph...Wt.cssp.....sPhhhhpNTGluVhlpsFhptPhss.....s.shs..stcpNV.LpsscolsloLsDl-clKt.h..shchsshpsplhhpsasp...sphpLpFhhhGP-ssu+hppLLcclhttt.....hsptptt..hppt.s.tsp..........hst...................... 0 0 0 0 +2919 PF04533 Herpes_U44 Herpes virus U44 protein Kerrison ND anon DOMO:DM04376; Family This is a family of proteins from dsDNA beta-herpesvirinae and gamma-herpesvirinae viruses. The function is not known, and the proteins are named variously as U44, BSRF1, UL71, and M71. The family BSRF1 has been merged into this. 25.00 25.00 28.90 28.90 23.60 22.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.38 0.70 -5.07 11 59 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 0 0 49 0 205.90 33 77.15 CHANGED Mu..h.tWh.CCGl..h.FG+ssp.pYcpLs-s...tpss-Rh+tEI-hGLPP.GVplGDllpsppsppoL+QsYLLAV.....QoNpIT-aLcRFDusclPpuCpslVpsQIsKL+ulpslIWNsMlSlAlGslolD-uulpsLLsKpAs-ohuLhEMEKlATAlchD..sossWApEIssllsstssssts......sslhspspsphshs.h..s..................cstph..sst ...............h...................utshs.cYhhLpss...-sp.c-lpshl-.sass.GlssuDLhphs+-s-.slc..aLLtLh....QspphssaLc+ahusp..C.sHs+sssclEspK.pplhpsl-lhhLKLsVGEhsh.s.-.-uLchLL-KFusDQsoLsEhpKlhsLlcMD..ppossah.......................................................hptsstts....................................................... 0 0 0 0 +2920 PF04529 Herpes_U59 Herpesvirus U59 protein Kerrison ND anon DOMO:DM04374; Family The proteins in this family have no known function. Cytomegalovirus UL88 is also a member of this family. 21.10 21.10 71.40 69.30 20.30 19.60 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.39 0.70 -5.62 8 35 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 27 0 0 27 0 390.30 40 96.13 CHANGED PsssWpDuALlhsDGTVtEHchhNusLucLlRRplcs..-s--t....sVauSELulYloGRssRpuSsFSIYWpsHS-llYsLTGlTHCsKIsVECGph.........................................................................usDup..............plY-hP+laLlRspD.ussuPpcVsWstTsshWsp-VsIcsVp..p+sssARshsslh...................P-ltthhhPLLphtsc................................................s-.slFcE+VussYpRL...t.chssss+oppclLp+ClcLAutK+LLLlDGst.LENFFLspVCLYpLGEDs....luEEhlGhLtsRP-c.....uuuuFhLH+cshpsAssLAllLNslac+Q-tLPsl-p........RlDpsD.lssAl+cYYcpalsVphpsLuhApclLstFopphsstsuhshl..sttlslDusVSRpDllpVLRu ........................sstWpDuALlM.sDGTlhEHtFhNssLupLlRR.............hl.s..-t--p..........sVauSELuhasSGRhsRRuSsFSIYWpsHSDLlYALTGITHCsKIsVECGph.........................................................................ssDup..............chY-hPtlYLlRsp..D.ussuPpcVhWstT.sshWspDVsIpsVp..p+hsuARshsssh...................P-lttchhPhlp.hsc................................................s-oshFcp+VuusYpRL.....chsshs+sspclLp+CVpLAuuK+LLLlDssh.L-NaFLspVCLacLsEDp....huEEhlGhLts+s-s....tuuusFhLH+cshpsussLAllLNslacapctLPsl-p........Rl-psD.lssshRcYYcuassVpspslusApclLssao-phsshcuhshl..chhVshDusVoRccLltlLph.. 0 0 0 0 +2921 PF03580 Herpes_UL14 Herpesvirus UL14-like protein Bateman A anon Pfam-B_2982 (release 7.0) Family This is a family of Herpesvirus proteins including UL14. UL14 protein is a minor component of the virion tegument [2] and is expressed late in infection. UL14 protein can influence the intracellular localisation patterns of a number of proteins belonging to the capsid or the DNA encapsidation machinery [1]. 25.00 25.00 68.00 67.80 19.60 19.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.73 0.71 -4.41 12 75 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 36 0 0 47 0 142.40 53 74.68 CHANGED pRRcRhhLAEs....+hRtplYKpRTL-LlptGVsspDPtFltAFTuA+pAct-hctpl+SstRlpslcp+sptIpt+V-pQssh+tlLsspRRaLsscFhcpLDpsEDslh-pE-pLp-A....p..huss.st.ct....hsp---uLLspWhLpps ....p.tthRRRLAEs....HlRAclY+-pTLpLhREGVoTQDPcFVuAFMAAKAAHh-LEA+L+S+ARLEhhRQ+AssV+lRVEEQAARR-hLsuHRRYLcPuLuERLDAs-D+LADQEEpLEEA..sssh...st....t.......ht.tppsLLh.W.Ltt.......... 0 0 0 0 +2922 PF04559 Herpes_UL17 Herpesvirus UL17 protein Kerrison ND anon DOMO:DM04385; Family UL17 protein is required for DNA cleavage and packaging in herpes viruses. It has been shown to associate with immature B-type capsids [1], and is required for the the localisation of capsids and capsid proteins to the intranuclear sites where viral DNA is cleaved and packaged [2]. In the virion, UL17 is a component of the tegument, which is a protein layer surrounding the viral capsid [3]. 25.00 25.00 25.80 25.40 22.10 22.80 hmmbuild -o /dev/null HMM SEED 540 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.81 0.70 -5.92 36 169 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 90 0 0 150 0 507.20 22 95.08 CHANGED M-sHltN-phh........sspsthllHl.llscssLpttslsh.......................spshhspspochpsssts.o..............................Wppl.authh.spsslsshl...........................hGLhlSlPlhs.t.p.....assFshlhLRlsh.....................sst......phh-lhFhYt-Llssttch.tpstph.....................................................................ssss.sssstpsplp..sshp-Ihphhppsst....shst................................................tsshsuhhpssh..................t..t.tpststptsuLEsPuplRGtt..................................t..psts..phhssshsthss.ssshsstWpt..pslslshhpHhhosp.phhVlsYppShshstptph......sslsp.ht.......................tphshhss.h.............shpsEhthlhphtsthhssltpshppp........th..pltQplshpl...........................psppthpplscph.cuhtslhstlsp.......sSullcAsluphstpstt......hhs......................shhLa-htssshGcslc.hspsslshththphh......................ssstthhpphhsGcshta.....lClha.spsLpshlVLPGGFulpuphsh ...................................M-sHlhs-hth........sspsthhlHl.llsppsLtttthsh..............................................ht.hhhps.sphpssstp.s..............................Wpps.Fsthh..s.tltphL..s..........t.h.........p....hGLhhSLslhh...p.....asshsshtLRlsh.........................sst..........phhsl.Fhht-Ll..thch.tcspph...t.t.hth.........ths.t...................................................t.st.ssss.tstlt..p...Elhp..hhppsst............................................................ttshsuhhpsth..................t...tttptsth....hssLEPPuphRhht..................................t.p.ps..chhsts.tt.ts.tst.sp.Wpt..tslplh.hpchtstt.phhVl.YppShhhstchsh......psl.thls........................t.....................shtschthhhphttsshpslhtslppt.........th..pltQhhthpl...........................tpp.hhttlhchh.thhtshhpthst.......poshhpAhhupLstttst.......hs........................shhhh-hhpss.G-sl+.hsps.lshshphpht.....................sststhhhpphhsGc.hta.....lClha.s.pcLpshlVLPGGFuhphpls.................................. 0 0 0 0 +2923 PF04544 Herpes_UL20 Herpesvirus egress protein UL20 Kerrison ND anon DOMO:DM04384; Family UL20 is predicted to be a transmembrane protein with multiple membrane spans. It is involved in the trans-cellular transport of enveloped virions, and is therefore important for viral egress. However, UL20 operates in different cellular compartments and different stages of egress in pseudorabies virus and herpes simplex virus. This is thought to be due to differences in egress pathways between these two viruses [1]. 25.00 25.00 43.10 32.70 22.40 22.10 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.04 0.71 -4.45 16 54 2009-09-11 15:08:44 2003-04-07 12:59:11 7 2 36 0 0 45 0 165.10 32 77.71 CHANGED GssDahloSuhsph.s.tpPsFo+pVllahhSAllL+PlCCllFhhYYhhossthhhlsuhshTshaYh.pLslp.hhhlYtNl+pDcLPLsssQphlluhlssupslsFhssuhptlFtssplFhhlhssptps.......h................shusshlshhushlYusDulsDuluFhLPRhWsRull+ ........GssDhhlSSAhsphss.s.pPsFoppslhahhSsllL+PlCClhhhhYYhhTtphhhhhsshshshsaYh.phhlp.shhlYhNl+pDhLPLusstphhluhhssutslhhhhsAhpthFtsstlFhhlhssptp..h.....h................shussshshhAhhlYusDslsDshsFhLPRhWsRslLp.. 0 0 0 0 +2924 PF01646 Herpes_UL24 Herpes virus protein UL24 Bashton M, Bateman A anon Pfam-B_946 (release 4.1) Family This family consists of various herpes virus proteins; the gene 20 product, U49 protein, UL24 protein and BXRF1. The UL24 gene (product of the 24th ORF) is not essential for virus replication, mutants with lesions in UL24 show a reduced ability to replicate in tissue culture and have reduced thymidine kinase activity as the UL24 gene overlaps with thymidine kinase [1]. 18.90 18.90 22.20 22.20 16.70 16.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.01 0.71 -4.82 16 158 2012-10-11 20:44:43 2003-04-07 12:59:11 11 1 93 0 0 143 0 174.40 32 65.99 CHANGED LphLPttRt+AGpRsHhRhY+pLhphh...shsplttaLs.........h.tPsspp..sclpLaFEVsLGpRlsDClhllpss.....tphhCallELKTChhss....shposo+psQRtpGLpQLpDoschLpphsP..sGspthplsPlLlFhuQRuL+slhlcp..hsspplpsssstLtshltsht-hssptpl ............................l.ptRhpsGhRsHhchY+tlhp.h...shsthsthLs.............h..h..shhpt.s+hpLhaEVsLGpRhPDClslhp.s...........ps...........ttshChllElKTChhsu......shpsso+ptQhspGh+QL+-ohphlpphsP....sGsp.hhhlsPhLlFhsQ+uL+s.hspp..hhspplpsshsslhshLtphp-hsl.h..h....... 0 0 0 0 +2925 PF02760 HIN HIN-200/IF120x domain Bateman A anon Bateman A Family This domain has no know function. It is found in one or two copies per protein, and is found associated with the PAAD/DAPIN domain Pfam:PF02758. 22.70 22.70 22.90 38.00 20.50 22.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.99 0.71 -4.89 10 158 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 24 18 53 201 0 163.60 54 42.91 CHANGED pccPlpVMVLKATcPFpYEosEpth+pMFHATVATcTpFF+VKVFNhsLKEKFltp+lIsIScYacpsGlLEINEASoVSEAsssQshEVPssII+cApcTPKIspLpcQsSGslVYGlFhlpKKpVppKshhYEIpDcTGsM-VVGsGchaNIsCEcGDKLRLFCF+L+ ..ppsPhpVMVLpATcPFpYEs.Eptp+pMFHATVATcopFF+VKVFNhsLK-KFhsK+IIsISsYhppsGhLElpc.sSoVS-ssssQphEVPpslIcpAspTPKIspLppQ.spGohV.GlFhlpKKsV.p.p..c.sshYEIpD..sTG.pM-VVspGc.hpsIpCEEGDKL+LhCFcL..... 0 5 5 10 +2926 PF03369 Herpes_UL3 Herpesvirus UL3 protein Mifsud W anon Pfam-B_2492 (release 6.6) Family \N 25.00 25.00 64.70 64.60 17.40 16.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.20 15 263 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 36 0 0 58 0 62.50 88 73.24 CHANGED VsFDTLFMVSSIDELGRRQLTDTIRKDLRhSLAKFoIACTKTSSFSus.sspp+tRtt...sp+sspSNKSLQMFlLC+RAHAt+VR-QLpuVIpuRKPRKYYTRSoDG+o+PsVPVFVaEFsAs-PVhLHRDNVlts .................................................................................................RKPRKYYTRSSDGRLCPAVPVFVHEFVSSEPMRLHRDNVMLA. 0 0 0 0 +2927 PF02718 Herpes_UL31 Herpesvirus UL31-like protein Mian N, Bateman A anon Pfam-B_1786 (release 5.5) Family This is a family of Herpesvirus proteins including UL31 (Swiss:P10215), UL53 (Swiss:P16794), and the product of ORF 69 in some strains (e.g. Swiss:O36420). The proteins in this family have no known function. 25.00 25.00 56.40 56.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.67 0.70 -5.70 30 120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 80 0 0 108 0 262.80 32 85.44 CHANGED pup............stss+psppppthh........hshctaFshluppP-hElchLRpMssPIssocslsLPasLsp.hsscsCLsLSshGap.shGusCssCpssupsphsp.................-hsulhLAFlpQlsslhpa+sFahSlls..................tu.chl+pslsQPpLFasYalL+ssshcshslha......tsssshltMYllF.pspslHlspchlcpLhsss.ssYplssDlhpssalLsl....phcp.s...........ssslsssslhcKls-LshssElttEap+hhshhsch ......................................ttstttsts........ths.ptp..p.ttht............+.ph+thashhtppPs.Elphl+.hphPIstppslsLPFshpp.psscsCLsLSshG.p.s.tusCssCtsssp.t.st.................p.sshhLAFlpQhsslhcaRsFhhSlht......................sp-lLctshsQPpLFahYalL+sustc.s.lha.......ssputhhMallF.pspslHl.pchIcphLsAs.ssYclshclhpspaVLsVppp.t.ps.o.............ssplsssslhpKls-lshss-lh.cap+hhshhp-............... 0 0 0 0 +2928 PF03581 Herpes_UL33 Herpesvirus UL33-like protein Bateman A anon Pfam-B_1115 (release 7.0) Family This is a family of Herpesvirus proteins including UL33 Swiss:P10217 ,UL51 Swiss:P16792. The proteins in this family are involved in packaging viral DNA. 25.00 25.00 45.60 45.30 20.60 18.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.48 0.72 -4.08 29 92 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 74 0 0 76 0 77.30 38 58.70 CHANGED tlhFEsll.Pp-h-llhPTsDA+LNaLsasp+Luuhlpatpst....................tssCsHuplLppKp-thssllsKhLDlcpILcs ...lhFEshl.Pc.h-llhPohDA+LNalshsp+LAuhlcaspst....................sssCsH.uplLppKpchhsullsKhlDlctILc..... 0 0 0 0 +2930 PF03586 Herpes_UL36 Herpesvirus UL36 tegument protein Bateman A anon Pfam-B_3425 (release 7.0) Family The UL36 open reading frame (ORF) encodes the largest herpes simplex virus type 1 (HSV-1) protein, a 270-kDa polypeptide designated VP1/2, which is also a component of the virion tegument. A null mutation in the UL36 gene of herpes simplex virus type 1 results in accumulation of unenveloped DNA-filled capsids in the cytoplasm of infected cells [1]. This family only covers a small central part of this large protein. 20.50 20.50 20.60 21.10 19.80 20.40 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.59 0.70 -5.29 11 176 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 33 0 1 127 0 213.20 54 10.60 CHANGED tpc-tWtuslcAsLccsEs+ucFDAsElsRLc-hAsstGYcst...ch+ppAEpslsApApsspsAl-uVhuFNPYuspNpt......pshhPPlusL+sloWhDuFssAAPhYspLF.Glss-sLhpLh+IutulLctA.sAssGp..lDYapsVspluuDLttlPpLsKYVDFYp+GascF.shhu+LsphRu-shpAsGshshElutAhEplstlR.sPpsA++sL-tGVplhlPStsslhshsstLcc.DhspFcsTAYtEhh ..............................h...ptWhtsl.AsL.thEstthFsusELsRLRDhAAsuGaDh+...slhspAcQVVAAsts..TusoALDTVF+aNPYTPENss........lsPPLAhL+ulTWhDsFulsAPsaTsMF.GVslEGLhhLhRIpuslLhoA-solsGh..ssYhthlhchutsLhtlPtLttaVsFa.puatpa.t..stlpth+tchh.h.sth.h-hthA.Ephth.+.s.tsAt.hlctG..l..su..hlhth.t.hpp.c.p.h.tTAYtc............... 0 1 1 1 +2931 PF03277 Herpes_UL4 Herpesvirus UL4 family Mifsud W anon Pfam-B_4461 (release 6.5) Family \N 25.00 25.00 50.20 50.10 18.70 17.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.06 0.71 -5.06 15 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 35 0 0 68 0 184.80 44 88.16 CHANGED sThIAYoLpsl+sssshslPchEQllCuh-uGoRulsVGscsRCDpLPsGsllIQHsPlGTLlsVDsts-FCSYthhh....ttppstshpshssshhVhPFsoWsssupspplpSsouGlLTl.hhsssolaITlTlYGps...st.sssh.h.spssspssssshst..ts...............................................tDlLspsl+Euclps ..pThIAYSLpps+uShs.sLPDstQVVpsFEhGTRuIhV+GcpRpD+LPpGsVVIQHTPlGhLlllDCpsEFCuYtFhs....pcpppph.puh-uphaAhPFsSWVuSuRscssRSsouGlLTV.lWsscoIYIThTIYGssspt.sssssps.stssss.spsssss..sup.....h.hQss.........................................tDLLsElLREhpLps............................. 0 0 0 0 +2932 PF05072 Herpes_UL43 Herpesvirus UL43 protein Moxon SJ anon Pfam-B_5928 (release 7.7) Family UL43 genes are expressed with true-late (gamma2) kinetics and have been identified as a virion tegument component [1]. 24.90 24.90 38.40 34.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.16 0.70 -5.71 9 65 2009-09-11 05:23:58 2003-04-07 12:59:11 8 2 29 0 0 61 0 353.40 33 91.23 CHANGED GsstssptChtChhssspulhthuhpsulhsuulhll.hhppshAhsshhhssIsshhlsh.hRhshphh-hlshlG+shQhlsshsuulsWslu........stsh.ts.phslssuhhshslhus.lt.apaVshAsusshpa+suhLshssGsllGloshhhslps.shhuhshslls...hssspD....sussLpsTCaY+hsRttslps.scLG+....uhhssss-ssttcEcs.sutsshcsphsh...lhlshVhhlssPhlhslp+hh..ttusphpsstsshhsshsGalluhulp.Lslh.s.pcsLhpsllhhashhtshulsLsshGh.hGsslhLAuusuhuhhsslslRppspshp.....+LAAuplsKslhsslh.....sshhlC ........................Gpsssspsp..lsCshshspuhhtlhlpuAslshshsll.h.ptshA.ssssshhslushhLuh.hRss.shscshsplhthlphhuuhsAhlhWslu.............hshs.ts.phulssuhhshlVhAs.lashahl....ussshFhsshhhlsuGhhlGsSAthh.ls.s.uuhuluhulls...hssspD....As.shccsChhhtsctss.h+sspD.tR....u..ssss..s..ss.........sp+ppsssusplh.sthst...lhlslVshlussslhshsthh..upusshsshshhstshluGHlssuhsp.Ls.shs.shcLocsllhlHsslplhslsLshts.tl.ulhhhLuuAshluLspslslR+Rh+ttc.....+LAAo.hsRuLahslY.....luhhlC............. 0 0 0 0 +2933 PF03387 Herpes_UL46 Herpesvirus UL46 protein Mifsud W anon Pfam-B_2545 (release 6.6) Family \N 25.00 25.00 101.70 101.50 15.50 15.20 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.31 0.70 -6.27 11 70 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 33 0 0 71 0 437.90 44 65.51 CHANGED suhpshS.pslhpcRhctGCL.........LPsPpslhsuAVtALc-ts-slhPssLhuspRpssLhsh+sNpVPESlIlsshusDsps-Yh+pYsushppsLscttLotsslhRslhspYW+YLp..tooGs-lsssstsssss....p.osllLhhsshs.K.Lu+pPFKpcsssusYtsshstL+-AhctlQ+YMYYMRPsDPhssSsDTslRLpElLAYssThYpWhlWhhDslDupVlRpht.h.phstGP.RsshsP-slFtRHLcsGPulsoGo.ussh.............hLssssuslLusLl+luslWppspW+usspG.sssAIVAAVELlollHHHhQYLlNhshsGYssWlcGGlpssaLpuALRuQpRFp+hhG.............pLhPTMospSWushEpuspsWFchAlA+Sllsa...GsPTtaYpslLpsl.........ssP.psphsspsss............stssts.hpstputPPsssu ..................s....t.s.tGlsERRlhsGCL.........LPTPpslLuAAVuAL+p+oD-hpPuhLpss-RustLuuppH.NsVPESLIVcslAuDs+hEYlR+YuuAAppsLu-scLouutlpRulLspYWKYLp..ssSGl-VP-cssscscs......SltlLLpPTlusKhLuRsPFKstussApYsAslAsLRDAl+tlQpYMaFMRPsDPopPSsDTulRLpELLAYVusLY+WA.Whl.TsDt+VC++Ls.ssR+hhsh.tuspuPs-hFuRHL-pGPosooGS..hpsh.............sLpAulucVLupLpRLusLWpsuchpuGTaG.sscslVusVEllSlVHHHsQYIINhTLsGYssWussuLsNpYLRAAlcuQcRFs+hsu.............sLFPTMousSWAcMEhSl+uWFstALAtsLLpp....GsPoh..HYcslLchl........uSp.sphptussPss....................................st............................................. 0 0 0 0 +2934 PF03362 Herpes_UL47 Herpesvirus UL47 protein Mifsud W anon Pfam-B_2182 (release 6.6) Family \N 25.00 25.00 163.40 101.80 18.80 18.30 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.28 0.70 -6.16 13 117 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 34 0 0 101 0 414.00 36 60.55 CHANGED cshcEcphhctpss.hs...hstW.u..sssastGsMYsusPs..csthsa+RshpQutALhh+lstsuLpsulssp.thostcAlhFLlDAslRlAtNsahsupp..................hhtthphhoslp.lPtusus.lLpssshpsP..............tpGPpAAlhRuuhGuLsYWPELRssLsc.schhlRYAtsth.hAEsaLLARhp.utpsuhsspEhc....hLushlTLhsllActslpaLhsusuthLp.scssctshptsptststptlPlsS.tLhsAEstsLushsusph.htssuLutshssuYhAlR....oAhTsLhh-au.t.....sptptcsscthssAhLussllLQRLLGHhNhlLspLstAAhhGGpsl.sVhptThtcYphLhpsssPLYp.sols-Fhc-R--AMcpLcLcs......ssu.sPhsuhchl....lp.tht.ssL-slh....shs.ssshshLGshVsls-hltca+chlhuc ........h.h..Ep.hh......ht....stW.s..t...s.GtMahtt.s..s.hsh.hpsltQupAlhaphhhsshhsthspp..ho.spuhuFLhD.AslRluhNshhhupp.................pshthhp.hhh.stLtslsttuss.lLpssthshP..............ppGPpsAlhRu.hGuLhYWPtlRhhlscssp.hsRYAstth.lA-hhLhuRhp.shpsphsspEtt....hLuphhslhsshutthlpWlphssuhhLt.sh.s+sAahsVstp..ahhlPlsSshLssAEstlLGclsssss.hts.ALssshhsuY.AlR....TAhoshhlcaA.......cstcpspschhspAhLussLlLQRlLGHANhlLshLstAAhhGGhsh..hlhpso.ctYspLhhAssPLYsppThscFW+DhcsAhcplslcP......sos.sP+sshRhl....Ic.shhh.sL-sh.....Pt..........Pp.Vcls-.h.paRp.lhG........................ 0 0 0 0 +2935 PF04823 Herpes_UL49_2 Herpes_UL49; Herpesvirus UL49 tegument protein Mifsud W anon Pfam-B_3850 (release 7.6) Family \N 25.00 25.00 27.60 112.50 19.50 18.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.11 0.72 -4.02 13 97 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 34 0 0 91 0 96.10 58 33.54 CHANGED sRAsPGsp...slstuK.luFSssPsosousWpusThuaN++lFCtAVutVAttHAptAAtuLWDhssPRoNE-L-chLptAsI+ITVsEGhsLlptAN ..........s.........tuhu++LpFSTAPsoPoAPWsspssuFNKRVFCAAVGRlAAhHARhAAlpLWDMspP+TDEDLs-LLshssIRlTVCEGpNLLQcAN.. 0 0 0 0 +2936 PF04540 Herpes_UL51 Herpesvirus UL51 protein Kerrison ND anon DOMO:DM04380; Family UL51 protein is a virion protein. In pseudorabies virus, UL51 (Swiss:Q85227) was identified as a component of the capsid [1]. In herpes simplex virus type 1 there is evidence for post-translational modification of UL51 [2]. 25.00 25.00 33.60 33.50 21.10 19.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.66 16 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 33 0 0 50 0 160.80 50 66.65 CHANGED lCGhtstscc..pYE.lpsss..sss.s.hRLpEAlssVNsLLPAPlTlEDsltSADssRRLV+ApuLARTYpAC.......RNLECLu+HpsutsssuL-AVVpsHhtsspRlADTChAuLhphYhSVGAs-tsTDshV-QAIRhsAEo-lVMuDVAllE+ALGlsupssssu .........lCGhttpspc...pYE.lpsus..ssstu.hRLpEALssVNuLLPAPlTLEDsltSADsTRRLV+ApuLARTYpAC.......+NLECLuRHpsut-sPsLDAVVtsHtpss+RLADTChAulhphYhSVGAsDtoTDshV-QAIRhsAEo-VVMsDVAllERALGLsst.t...sh... 0 0 0 0 +2937 PF04537 Herpes_UL55 Herpesvirus UL55 protein Kerrison ND anon DOMO:DM04378; Family In infected cells, UL55 is associated with the nuclear matrix, and found adjacent to compartments containing the capsid protein ICP35. UL55 was not detected in assembled virions. It is thought that UL55 may play a role in virion assembly or maturation [1]. 25.00 25.00 26.40 25.40 17.80 17.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.48 0.71 -4.86 7 43 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 31 0 0 35 0 164.90 38 89.39 CHANGED ttsh.sshp.stlsslhpVlsPLsl-soapSsp.spcpPsssshhlssRoYhlRAsCppssclHAFFhGLupcss.shs..hsslpshsplhNpp.hhpchtshp...phCcuPFStATlhDsl-ss.....shsIpGlsaHCHCcs.FSh-CWtuA.tAh-+lsshs+shpshst ........htss..sshhhphPslopVssPhsLssoWpupp.s.......hcssps.......ssuslssRoYllRAsCsossslHsFFaulhc-tstphs..hs-LcsFscllNp..llpELtsc+.stthCssPFSsuTIhDssssut..........phsIsGlsYHCHC+sPFSh-CWpuAsuAhp+ltSlupuhtusp.t........ 0 0 0 0 +2938 PF04534 Herpes_UL56 Herpesvirus UL56 protein Kerrison ND anon DOMO:DM04377; Family In herpes simplex virus type 2, UL56 is thought to be a tail-anchored type II membrane protein involved in vesicular trafficking. The C terminal hydrophobic region is required for association with the cytoplasmic membrane, and the N terminal proline-rich region is important for the translocation of UL56 to the Golgi apparatus and cytoplasmic vesicles [1]. 25.00 25.00 32.70 29.00 20.80 19.60 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.34 0.71 -4.60 3 23 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 0 0 22 0 193.30 71 84.13 CHANGED MASEAAQPDAGLWSAGcAFADPPPPYDSLSGRNEGpFVVIDlDTPTDoPPPYSAGsoLlGPlsPsSSG-GEssERGRSRpAA.RAARRARRRAERRARRRSFGPGGL.luTPLFLPEThluAPPDVsuDLlSGLPTYAEAsS........DHPPTYATVsAA........RoTEQPuGuluPsDQPRoQsSGsWRPPpVNSRELYRAQ..............RAARsu ...............MASEAAQPDAGLWSAGNAFADPPPPYDSLSGRNEGPFVVIDLDTPTDPPPPYSAGPLsSVPIPPTSSGEGEASERGRSRQAAQRAARRARRRAERRAQRRSFGPGGL.LATPLFLPE.TRLVAPPDITRDLLSGLPTYAEAMS........DHPPTYATVVAV........RSTEQPSGALAPDDQRR.T.QNSGAWRPPRVNSRELYRAQ..............RAARGS.... 0 0 0 0 +2939 PF01763 Herpes_UL6 Herpesvirus UL6 like Bashton M, Bateman A anon Pfam-B_878 (release 4.2) Family This family consists of various proteins from the herpesviridae that are similar to herpes simplex virus type I UL6 virion protein. UL6 is essential for cleavage and packaging of the viral genome [1]. 22.00 22.00 22.20 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.70 0.70 -6.33 30 194 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 91 0 1 167 0 449.10 31 82.83 CHANGED htahEILpGchuYscGQslasulRsssshhRQl.sslh+ssLsussY--ltsDWppHhss.......lspRastppptps.thtcpsFpoWppTL+hoLhchlpshls.hlastssss....Ys+YlDWlsslGlVPll+p.....s.......tt.hhpphppthppstppp.ssp.+lhssllppstphlpplhpslsultIs-asclpIhashpppphhsh..hssc+hcshVlhpPlhts.spllFDSPlQRLatElhpCasLpEHAKlCQLLNTuPlKlLlGp+scs......ssp+ll-+l...-ppspsusAtpcLl+Lllslps.+plusITDsV-saLp-sossllDpsplhssststh..........................spssppul+cpVssslh+sLEs.INp.FcTIcsL+ptNcshhp+lpphEspLp+hppc............spsssss..................sspl.s.sshpulptl...hppslh.sosshs-sphVANSFhSQYlPsac-phccLopLWEpElhRsFKLs+lssNQGpElulsYSssoIolLLuPahaslLcltpls.LlscphshhS.pElssslacpSRLpsYlsDlut+a.....s ..............hhFhclLpGphGYspGQslapslRsspshh+Ql.hslhpthLsusshcclhs-Wppahp........h..+ht.ttt.t..thtpphapsWttoL+pollshltsllt..has.st.s....as+YlDWlsslGlVPlhch..................ht.h...h.h..tt......thssphhtpsh.hlhplspshpustlhsascsplahphpptph.sh..hpspchchhVhh.Plhh.....t.ttllFsoPlt+lh.ElhtpptLpcHt+lCpLlNThPlKslhsp+pp.......thtchlphh...pppsptssAtppll+hllNhps.+phhslpDoVcualp-hsspllD.s...hssp.s............................ts.thppsh+s.hhppl.thLEt.lpp.hppIppL+phNtth.pplpphcttLp+httt............t.t..t........................ph.p.sh.tshphh....t.tl..hsh.hs-sthlsNSF.upalPshtp..ccLopLWEpEhhRsF+ltphhssQGtE.ul.YSs.slthhlhPah..llph.pht..lstt.h.hu.tElhtslacpo+hphYlp.lt.h.................................. 0 1 1 1 +2940 PF01677 Herpes_UL7 Herpesvirus UL7 like Bashton M, Bateman A anon Pfam-B_1086 (release 4.1) Family This family consists of various functionally undefined proteins from the herpesviridae and UL7 from bovine herpes virus [1,2]. UL7 is not essential for virus replication in cell culture, and is found localised in the cytoplasm of infected cells accumulated around the nucleus but could not be detected in purified virions [1]. Members of the herpesviridae have a dsDNA genome and do not have a RNA stage during there replication. 25.00 25.00 58.00 57.70 18.10 16.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.38 0.70 -5.01 33 122 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 82 0 0 109 0 220.00 27 77.39 CHANGED hshEV+cs.sshslthsssslsshsV....tttlhh.h.t...sshhshcsYhppphspssFtGFshsslsssEDtVpslslsPhllpcRhsla+PpshhcFpLCsll.hLENh..tssosshhhplhshLchlts+.ss.sphsphLhpusphLlsTlhhhath..tshcsphllsphshh+h..Lhp.tpssshsllpsla....t...hpshpLstsspps........hhhcstsuh..hNthass ......hshEV+ps.sshtsthsusslsshsV....stplhhhhps...sphlsscpYhppshspsuFtGFshsslsssEDtVpslslsPhlLpaRhsla+Pcshh-FpLCsLlhhLENh..tpsosshhhplhsaLphsts+.ss.pphpphLhpusphLlsThhahhth...shcsphllsph.hhch..Lht..tsss.sllpsla....t.s.tsshpLstsstps...............tthhps.suh..hs.hh..s............... 0 0 0 0 +2941 PF03554 Herpes_UL73 gpUL73; UL73 viral envelope glycoprotein Finn RD anon Pfam-B_3001 (release 7.0) Family This family groups together the viral proteins BLRF1, U46, 53, and UL73. The UL73-like envelope glycoproteins, which associates in a high molecular mass complex with its counterpart, gM, induce neutralising antibody responses in the host. These glycoprotein are highly polymorphic, particularly in the N-terminal region [1]. 19.60 19.60 20.20 20.00 17.90 19.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.95 0.72 -4.16 16 253 2012-10-01 21:33:21 2003-04-07 12:59:11 8 2 54 0 0 165 2 89.80 51 77.14 CHANGED hhshsstshsstssspspssssssspspssFYshsCsADTYt.oLsSFSSIWsllNslllhsAsslaLpYhCFp+Flsshs+ ...........................................hso.hooT.STTSTKsoSTTHDPN.VM+tH.spsDFYcAHCTSHMYELSLSSFAAWWTMLNALILMGAFCIhLRahCFpsFsspTh....... 0 0 0 0 +2942 PF01802 Herpes_V23 Herpesvirus VP23 like capsid protein Bashton M, Bateman A anon Pfam-B_1435 (release 4.2) Family This family consist of various capsid proteins from members of the herpesviridae. The capsid protein VP23 in herpes simplex virus forms a triplex together with VP19C these fit between and link together adjacent capsomers as formed by VP5 and VP26 [1]. VP3 along with the scaffolding proteins helps to form normal capsids by defining the curvature of the shell and size of the particle [1]. 25.00 25.00 26.60 26.60 23.70 23.60 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.63 0.70 -5.38 32 501 2009-09-11 06:10:29 2003-04-07 12:59:11 12 1 91 0 0 208 0 163.90 62 98.31 CHANGED tplhlslss+Loss-lupLQcphGpllslssh++hhslpsluLpshh.tssssDalplhshh+cphhAllpcVpsspllhshlshG....psht.l+NT.uP.FphssGDtlsllPPlFst.pssl.pLpSssh-LlFPhsVPpsLApEllt+llshslhuhststp..tsshsch...hpslpYpG+pasLs.shpptss.s.ssl+sLslshshhss.ustllhsllss.Lshpspc.hlsthhplhsspp.................hphhc.sshsh.pDhsR...............lsuahohhppLuslhshpshhpVssas.ssspssss......h ........................................................................................s.....sht.lpNs.uP.hphppss.lsllPPhFt....t........sp.......h.hL-SNGFDLVFPMVVPQQLGHAILQQLLVYHIYSKISAGAPsDVNMAELDLYTTNVSFMGRpapls........................................................................................................................................................... 0 0 0 0 +2943 PF03327 Herpes_VP19C Herpesvirus capsid shell protein VP19C Mifsud W anon Pfam-B_3451 (release 6.5) Family \N 25.00 25.00 30.10 53.50 18.50 18.10 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.55 0.70 -5.52 31 121 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 83 0 0 106 0 283.70 27 73.81 CHANGED shsp..shhschhhttttlsptp.shus.hh.hhpsss...suhcslllSlshlshs....h.st..ssusul+shltshYssp...upLtphssslpthlpsp.a.HchlpslGsllpslspshlsplTsVh+Gsshstpsspht.....slh.......lPs-hFlDlDt.h.....................ttspsuhphlYlshlYspph.pcttspla.hhpSttscpslhshLchhaushRtpph.......................................................slpspsshsphhaGAhs+LGhhssssshp.....psphphpussLPVVplpshhsc.hGsWp.h ..............................tpp.stchh.shtpltptpshus.....tpsss...susRshllSlsFLshs....h.sp..suusAlRstltu.Ysss.htscLschstsLpshlcs+sa.HchlphhGsLlphlopstlsplTsVspGsp.uscsspss....sslh.......lPushalDLDtph.....................thsssustalYLlhs.Y.ppct.scptsplY.lhpSp.hs.psltssLcthFuchRhspshp.....................................................clpu.psstpsssauAhscLGhhsps.sshp....+sphh.tusslPVVhl-shshc.sG.sWpth........................... 0 0 0 0 +2944 PF01521 Fe-S_biosyn HesB-like;HesB; Iron-sulphur cluster biosynthesis Bashton M, Bateman A, Wood V, Mistry J, Eberhardt R anon Pfam-B_518 (release 4.0) Family This family is involved in iron-sulphur cluster biosynthesis [3]. Its members include proteins that are involved in nitrogen fixation such as the HesB and HesB-like proteins [1] [2]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.35 0.71 -4.13 98 8207 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 3441 17 2003 4486 4099 102.60 30 78.59 CHANGED Mplsl..TcsAtpplpphhss.ppt.........lhLshcsGs........th.Cuh.uspaplhhls..cs......ssh-hhlc.ssshslhlcs..hstsaltpshslDas.thtshsLp.ssss.hlssssslsc ........................................lpl..T-.uA.sp..+lppllsp.psps...........................tlR.lhVpsuG............Cu..GhsYshshs.c...cs...................................spsD.h..s....h.......E.....p.....p..........G......l...p...l......h..lDt..hSh.aL...s........G..s.p.lDa.s.c.s.h.h.sp.t.Fp......hpN.P.Nu.p.s..p.CGCG........................................ 0 581 1176 1615 +2945 PF02444 HEV_ORF1 HEV_ORF2; Hepatitis E virus ORF-2 (Putative capsid protein) Mian N, Bateman A anon Pfam-B_1896 (release 5.4) Family The Hepatitis E virus (HEV) genome is a single-stranded, positive-sense RNA molecule of approximately 7.5 kb [2]. Three open reading frames (ORF) were identified within the HEV genome: ORF1 encodes non-structural proteins, ORF2 encodes the putative structural protein(s) [1], and ORF3 encodes a protein of unknown function. ORF2 contains a consensus signal peptide sequence at its amino terminus and a capsid-like region with a high content of basic amino acids similar to that seen with other virus capsid proteins [1]. 19.70 19.70 21.60 21.60 18.30 18.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.68 0.71 -3.88 3 266 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 12 0 0 229 0 81.00 74 96.04 CHANGED MGS.PCALGLFCCCSSCFCLCCPRHRPVSRLAAVVGGAAAVPAVVSGVTGLILSPSQSPIFIQPTPSPPMSPLRPGLDLAFANpPuHLAPLGVTRPSAPPLPPVVDLPQLGLRR .Mt..PCALGLFChCSSCFCLCCPRHRPsSRLAsssGGAAAVPAVVSGVTGLILSPS.SPIFIQPTPS..h...pPGLELALsspPs..AP.G................................. 3 0 0 0 +2946 PF02455 Hex_IIIa Hexon-associated protein (IIIa) Mian N, Bateman A anon Pfam-B_2076 (release 5.4) Family The major capsid protein of the adenovirus strain is also known as a hexon. This is a family of hexon-associated proteins (protein IIIa). 25.00 25.00 47.90 47.90 19.00 19.00 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.58 0.70 -5.81 13 134 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 100 1 0 130 0 481.10 62 87.16 CHANGED KVLsIVNuLl-spAIRsDEuGtlYsALLpRVu+YNSsNVQoNLD+LlsDV+pulupp...RhhcsssLGShsALNuFLuoLPusVpRGQENYsuFluALRLhVsEsPp.oEVYpSGPsaalQsuRpG.lpTVNLopAFcNLpsLWGVpssstc.psslSSLLTPNTRLLLLLlAPFT-usolS+DSYLGaLlTLYREslusu.clDEcThpEIspVuRALGs-.DsusLpuTLNFLLTNRpp+l.PssaoLosEEEpILRaVQpuluLhlhp-sts.sosALDpsutsh-PSFYuuNRsFINRLhDYhcRAAAhsPsYFpphlhNP+WlPPsGFaTGsa-hP-...-u..FhWDcscs................phhcccss-cpscstss....sssssutPuS.hsphs........................................uphspshhsGss-.....lhtPtps+N............h.Nssl-pLlDthuR....W+Thtp-.t...............++.+.tt......sc-ssucpsshhchtGsG...........tNPFAHL+P..+u...pha ....KVLAIVNALsENKAIRPDEAGLVYNALLERVuRYNSoNVQoNLDRLVTDVREAVAQRE..RF.+sssLGSLVALNAFLuTQPANVPRGQ-DYTNFlSALRLMVoEVPQ.SEVYQSGPDYFFQTSRQG.LQTVNLoQAFKNLpGLWGVpAPlGD.RuTVSSLLTPNSRLLLLLlAPFTDSGSlsRsSYLGaLLTLYREAIGQA.pVDEQTaQEITsVSRALGQp.DTsSLcATLNFLLTNRpQKI.PsQYuLoAEEERILRYVQQSVuLaLMpEGAT.PouALDMTARNMEPShYAuN.RPFIN+LMDYLHRAAAhNs-YFTNAILNPHWLPPPGFYTGEaDhP-s.NDG....FLWDDlDs................thh.ptph..tcc..tsts......sstts..stsus....hPSLst.hs.stSst......................................................GRloRPRL.GEpEYL..NDsLLpP.RtKN............hsNNGIESLVDKhsR....WKTYAQ-pR-..ts.............t.pp++pppth........---DSADDSSVLDLGGoG...................sNPFAH.LpP+h.h...h........................................................................................................... 0 0 0 0 +2947 PF00349 Hexokinase_1 hexokinase; Hexokinase Sonnhammer ELL, Finn RD, Griffiths-Jones SR anon Prosite Domain Hexokinase (EC:2.7.1.1) contains two structurally similar domains represented by this family and Pfam:PF03727. Some members of the family have two copies of each of these domains. 20.20 20.20 20.50 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.36 0.71 -4.99 14 1806 2012-10-02 23:34:14 2003-04-07 12:59:11 16 20 455 60 944 1749 11 195.20 35 42.73 CHANGED pshhcplcchhptFplosEpLpclsc+FhpEhcKGLs..+csss........l.MlPsaVtshPsGsEpGDFLALDLGGT..NhRVlhVclsGsp..ph-hppppYplP.......cclhpupu...cpLFDaIA-ClpcFh-cht........pspsLPLGFTFSaPspQsulspuhLlpWTKGFch.....sssEG+DVVsLLpcAIp+Rshs.lcVVAllNDTVGThhussYs- ...........................................................t......lpph.t.h..p..ls..pp..p....Lhplhpphhp.-h...ppGLp......pcs....ps..................................lpM.lPoa...Vp...s...h...Ps..........G....s..........E.p.......G..p.a.LA.LDLG..GT..NFRVhhVpl..pspp..................sh.c.h.p...p......p......h...a.s...lP..........................................pcl.h.p.Gsu.................ppL..F.-aIApslsc..Flcp.p.t.h.............................p.....t..p...pls..L.GFTFSFPspQss....l..s.p..Gh.Ll.pWTKGFph.....................s.s.s.....G..c..D..V.Vt...h..L...p......c..A.....l......c+......+......s....................................................l.......cl.s.A.l.lNDTVGTh.hussYp.................................................................. 1 201 397 678 +2948 PF03727 Hexokinase_2 hexokinase2; Hexokinase Sonnhammer ELL, Finn RD, Griffiths-Jones SR anon Prosite Domain Hexokinase (EC:2.7.1.1) contains two structurally similar domains represented by this family and Pfam:PF00349. Some members of the family have two copies of each of these domains. 21.30 21.30 21.40 21.40 19.60 21.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.45 0.70 -5.17 15 1869 2012-10-02 23:34:14 2003-04-07 12:59:11 11 20 457 61 998 1816 6 217.50 34 49.64 CHANGED psclGlIlGTGoNuCYhEchppIptlcG.........spupMsINsEWGuF.DstpLs.sRTcaDlslDcp.SsNPGpQtFEKMISGhYLGEllRhlLlcLscpGlLFc.....Gptss+Lpsshhh-TphlScIEsD.pcsLccscslLpp.LGlpsTss-phll++lCclVupRAApLsusGlAAllp+....RGhcthpsslGsDGSVYcpaPpFpcth..tpsl+-Lhsc.....s-..lshl.uEDGSGtGAAlluAVAt+ ........................................................stlG...lIl.........G..T.GoN.As.Y.hE.ch..p..s..l..t..h.hps............................sps..p..M.slN.hEW...GuF....Ds...t......h..t......tTcaDpt.....lDp............t.....S.................h....N........P..........G................p....Q..............h...a...........EKMlS..GhYLGEl.lRh.l.Llc.h.h..p..........p....s...h...l..Ft.......................s.p...h...s.......p.....p...L....t.p........h.h.....hp..............Tph............l.S.t.....l.....c.........p.....D.........................p...................s............l...p....p........s.p.p.lL.......p.......p.............l...........s.......l......p......s........o...t........p.......-.......t.h...hl.pplsphlupRuA.p.Lsus.............ul................u.ul...................l...........p.......+.....................................................................p............t........................t.....................................p..........ss..lusDGola....ch...a....PpFpphh..pp..s..l.p...p...l...hs................pp.......lph..h.u..cDGS..GhGAAl.lsA.hs......................................................................... 1 215 418 718 +2949 PF03559 Hexose_dehydrat NDP-hexose 2,3-dehydratase Bateman A anon Pfam-B_1070 (release 7.0) Family This family includes a range of proteins from antibiotic production pathways. The family includes gra-ORF27 Swiss:Q9ZA32 product that probably functions at an early step, most likely as a dTDP-4-keto-6- deoxyglucose-2,3-dehydratase [1]. Its homologues include dnmT from the daunorubicin biosynthetic gene cluster in S. peucetius [2], a similar gene from the daunomycin biosynthetic cluster in Streptomyces sp. strain C5 Swiss:Q53880 [3] , eryBVI from the erythromycin cluster in S. erythraea and snoH from the nogalamycin cluster in S. nogalater. The proteins in this family are composed of two copies of a 200 amino acid long unit that may be a structural domain. 25.00 25.00 49.50 35.60 20.40 15.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.01 0.71 -5.30 79 286 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 121 0 55 284 181 203.50 38 87.16 CHANGED psss-lhuWlspp+s.ppchpscRlPLscl..su...Wphssss..lsHcoGRFFsV.GlpVps.....ss.pcV..ssWsQPllp.sph..GllulLs+chcGVLHhLhQA+hEPGshsslpLuPTVQsT.uNYsplppusps.aL-hhhsss...s...RVhhDslQSEcGuhFh+tcNR.hlVEs..s--.........lshsss..FpWlTluQlppLL.ppsphVNhpuRTlLuCL ................sclhuWlspp+s.tpphpsc+lPLscl.....suWphssss..lsHcsGRFFsVhGlpVps..........ss.ptV......spWsQPlIptsph..GllulLs+chsGVlHhLhQA+hEPGsh.s.s.lpLuPTVQsT.uNYsphttu.....tps..aL-hhhss....s...t..s..............cVhhDslpSEcGupFh+pcNRphlVEs...s--.......................ls...s..ssFpWlTluQlppLl.p.psshVNh-uRolLuCL........ 0 14 41 49 +2950 PF04209 HgmA homogentisate 1,2-dioxygenase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Homogentisate dioxygenase cleaves the aromatic ring during the metabolic degradation of Phe and Tyr. Homogentisate dioxygenase deficiency causes alkaptonuria. The structure of homogentisate dioxygenase shows that the enzyme forms a hexamer arrangement comprised of a dimer of trimers. The active site iron ion is coordinated near the interface between the trimers [1]. 19.30 19.30 19.30 19.30 19.20 19.10 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.53 0.70 -6.08 14 1185 2012-10-10 13:59:34 2003-04-07 12:59:11 8 6 998 2 527 1251 479 378.40 34 93.62 CHANGED pYhoGFGNcapo..EulsGuLP.GpNSPQpssaGLYAEQLSG.oAFTuPRppNpRoWLYRIpPSssHtshh.hc....tphhsssast.ts..sPNpLRWpPh.lP...ppssDFV-GLholsGuGcs..hsppGhAlHhYtsNpSMtscsFa.NADG-hLIVPQpGtLplpTEhGclpVpPuEIsVIPRGh+FpVplh.ts..sRGYlsEsaGspapLP.....DLGPIGANGLANPRDFpsPV......AtaEDpcs....hpllsKapGpLassc.sHSPhDVVAWHGNYsPY...KYDLppFsslsoVuFDHsDPSIFTVLTuPSspsGsAssDFVI..FPPRWhVAE.cTFRPP..aYHRN.....sMSE.........FMG..LIpGtY-AKptG.FhPGGuSLHshMosHGPDhpsFEtAopA...-L+Pp+..ls-.ohAFMFEophshtlocaA.hctpplppsYhpC.WpsLcpcF ..............................................................................................................t...................................P.p.s..h.s.LYtEphsG.osFh.ss.......pohhY+.h.h.s...ss.p....h..........................t....h.......t......s...h....t...t...........tph..R.h...s.h.hs..........t....hsalpGhhs.hsususs.....htpp.shtltlat..ss.p.uM..............s..c.h.Fh..NA.D.GD.Ll.hsppGp..hcl.....tTEh.....G.p..............lp....lpss-hslIP.R.Gh..pa+.............l....ph......s.s...................sRsa.lh.Esh.s....u....thpLP......-h..G.l.G.s..pu...l...h...ssRDhpsP.s...............A.t.a...p....-..c.cs.............saplhsK..hp......G........p.......l.......a..p..s.....p..h..sa..sPh.DVVuW.HGshsPa....................+aslccFpslsohta.H.sPSlasVh............p.s....Ghs.lssFV.....PR.h.s.u-..ps..h+..sP..aaHpN.....lh.S.E...hhG...h.l.pG.sa-.A.p.......t.....p....G...FhPGGhS.LHssh.sHG.P..cstsa..-.t..A.p.u.......p.l..t.....pc....hs-...h...AhMh-Tp.............hslpho.chA.h...p...s.....t....hpt..pYh.t.s.Wt........................................................................... 0 150 295 428 +2951 PF01085 HH_signal Hedgehog amino-terminal signalling domain Finn RD, Bateman A anon Pfam-B_1424 (release 3.0) Domain For the carboxyl Hint module, see Pfam:PF01079. Hedgehog is a family of secreted signal molecules required for embryonic cell differentiation. 20.00 20.00 20.70 20.20 19.60 18.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.16 0.71 -4.68 4 383 2012-10-02 01:02:30 2003-04-07 12:59:11 13 11 169 34 136 371 1 126.60 72 36.71 CHANGED CGPGR..shGRRR.sRKLsPLsYKQhhPNVuEpThGASGhhEG+IpRsSERF+-LsPNYNsDIIFKDEEpTGADRhMTpRCKDKLNuLAISVMN.WPGV+LRVTEGWDEDGaHucESLHYEGRAVDITTSDRDRsKYGMLARLAVEAGFDWVYYESKuHIHCS ..................................................................p.hsh..hKQh.PslsEpshGASG..EG+lsRsst+F+.-.L.ss.NY.NsDIlFKDEEso.GADR.lM.TpRCK-+LNsLAISVMNpW..P....G..VKLRVTEGWD..E......D.G.H..............H...uc-SLHYEGRAVD...IT.T....SDR....DRs...KY...GhLARLAV.EAGFDWVYYES+.sHlHCS............................... 0 33 44 85 +2952 PF00730 HhH-GPD Endonuclease_3; HhH-GPD superfamily base excision DNA repair protein Bateman A anon Pfam-B_854 (release 2.1) Domain This family contains a diverse range of structurally related DNA repair proteins. The superfamily is called the HhH-GPD family after its hallmark Helix-hairpin-helix and Gly/Pro rich loop followed by a conserved aspartate [2]. This includes endonuclease III, EC:4.2.99.18 and MutY an A/G-specific adenine glycosylase, both have a C terminal 4Fe-4S cluster. The family also includes 8-oxoguanine DNA glycosylases such as Swiss:P53397. The methyl-CPG binding protein MBD4 Swiss:Q9Z2D7 also contains a related domain [1] that is a thymine DNA glycosylase. The family also includes DNA-3-methyladenine glycosylase II EC:3.2.2.21 and other members of the AlkA family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null --hand HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.54 0.72 -3.76 72 13897 2012-10-03 02:11:09 2003-04-07 12:59:11 20 76 4824 121 3791 10642 6191 138.90 24 47.70 CHANGED lssllspQssspsspphhtclhpth...............h.sscsltphs..pclpplh.................hGhhppKAchlpphuchlhcphtuphspshpphtt............................l.GlGthoushhhhhuhsh.p..hhs..lDstlpRhhpRlhhhpt....s.cphppph.chht. ..............................................................................luhllspQsss.tsl...t.thh.tchhpth..........................................................................PT...sps..l.u.s.....s....s....-clh.p.h.hps...................lG.h.a.s+A..+....s.lp..psuphlhcpasuphPpshppltt......................................................................................................................................................................................................................................hsGlGhtoussshhhuhuh.....hhs..VDspl.t.R.l.h.s.Rhh..hh.pt............s.tp..hcpth.t.......................................................................................................................................... 0 1240 2398 3196 +2953 PF03753 HHV6-IE Human herpesvirus 6 immediate early protein Finn RD anon Pfam-B_1006 (release 7.0) Family The proteins in this family are poorly characterised, but an investigation [1]has indicated that the immediate early protein is required the down-regulation of MHC class I expression in dendritic cells. Human herpesvirus 6 immediate early protein is also referred to as U90. 25.00 25.00 27.00 26.80 18.30 17.90 hmmbuild -o /dev/null HMM SEED 993 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.99 0.70 -13.75 0.70 -7.07 3 44 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 6 0 0 41 0 565.20 63 91.78 CHANGED YSoNVEEGASs-FKhllAQSlGNCIQSIGASVKAAMKQEQSDMEDsLINsAGLLTpcRSMLpcLuLEQLSQLININLLSSASSpFVSsYuKMLSGKpLDFFNWCEPRFIVFACDKFDGLVKKVASESR-LLhDLRANMNN-FIKAlKcIFSKAsVsLDspKLNpsATMLLMMAHNKEMSNP-ISNc-FCcKlNpLKQ-LLEuKNEIIEsNuKNMQhhQ-FAIKQMNQIFMDsCDKTFLKIHlNCKNLIoAAKNLGsAVLQSIVICSNEFSWQ+LKspR+pFKITMMsMITcACEpIEolYDDTGLIKPLsSlsIMEGYIshNKNRpSSICDuNlDPSDShlLELtDFDDHGKYSEESS...IESIHEDDDN.............................lsh.ph.-.ppspsssh.sPphsscp.phphhppIcppsluKMYPsTPSPDVPGKSKEs.......cTFlEsSRQoGcEQTSPNCVCT.......ASVTDLGGPDNlKSITGLpSu.......KchLlK+LLDTQsDSVVs.........pTsStpp-hhshS..phppscEhhQ-KsS..............psKpT-s.sG.........................TFoposp.spS.uuI..phs..sK.sp-hEth.pLhshsDGopDNPLISEMLoFGYETDHSAPYESESDNNDEIDYIAssDSusRTNNIHMNNTNENTPFSKSlpSP..PEVTPSKcsaKs-KhsslSpppKsKKRTA......................................KRKsVuhKosKSKKIKoDpLPcsTNVIVIS......SESEDEEDGsNIIcKShLcKsIKSEscSESSS.............ESDDCTSEDNpLHLSDYD............KVINNG.cCpSKGFPSPVFTIPIRSMpG.THGIRsKFVPKKNWLWFMRKTHKVDNCPIHSScKsNsK-DSDsTEAsHCFhNHFVPIKTDDEEY-KENVSYIYsKIQ-SKIDlEsITPTK+LIT-MlMDNFMDLTDIIKpGIsKHCQDLssKYsVlT.TsCEKsLNVsNSQslsTstTQlFDPsVTGNNSsILNIINDTTsQNDENRCTEGTSNsNEKCTs+SDCNSDpTEVFKLDGYPSDYDPFlENAQIY ......SsNVEEGupt-hKsLsAQSsGsCIQSIGA................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +2954 PF03486 HI0933_like HI0933-like protein Griffiths-Jones SR anon PRODOM Family \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.17 0.70 -6.00 49 4052 2012-10-10 17:06:42 2003-04-07 12:59:11 9 30 3055 3 923 14035 5432 360.40 33 90.89 CHANGED hDVllIGuGsAGlMuAhsA..uptGpcVhLl-+spphG+KlhlSGGGRCNlTN..phs.spalsp...Ns+FlpSA.lupFsspDhIshhpphGlth+pccpG+lFsss..pApsllchLlpchcc.tGVpl+hpspVpsl........ttssss.atlps......ssp.plpssslllAoGGhShPphGSoGhGYplAcphGhslhshpsuLVPhshp.tshh.hppLuGlulcslhh.h.............spsthsaptshLFTHhGlSGPAlLplSuah........psstplplDLlPshs..tltphLppp+ttpspptlpss..LsphLPc+lhthlhct..h.t.st..htploppplpplsptl+saslplsGopuhcpA.VTtGGVshcElss+TMpu+tlPGLaFsGEVLDlsGasGGYNhQhAauoGasAGps ........................................................................h.clllIGuGsAGh.hsAhtA..................up..t......G...t..p..V..lll.-..+.....s..........p...c........h........G..............+......K.........l....h........h.......o......G.....G...........G.......R......C.........N........h.......T...N..................t..........h........s.......................s.........p.........h.......l..s......p......................................N.....s.....+......F.....h............h.....S........s........h.....s............p.........a.............s....s...........D............h........l.........s..h.....h...p.p.....h.....G..l.......t...........h..........+............p.......c........s..........t...........G.............p..............l..............F..............s.s.........s...........................p..........A........p.......s.......l.........l...............c...............h.....L...h...p........c.h.c.p......h.....s..Vp.....l.p.h..p..s.p.l..h...sl.......................................tt...p...p....s...t............a......t....l....ps...............................ss...t......p....h....p...s...c.p...lllAoGG...............h....S.........h.....P...p...h.........G...u...os......G.a.c.lAc.phG..hslh..hpsu..sPh.........ph...........p......h............................ptLtGl.u.lp.sl.h.h....................................sptt...h....sh....ptshLF..T....HhG.lSGPulL...phSuah.........................................................p.............s.....................................l..p....l.s....l.h.P...s....h.s.......................l........tphL...p..pt..................p.t.......t...p.s....p.p....t.lps..h..................L...t...................t.....h.........L..Pc+lh.hhh.ph....t.......................l.........................p............tt.....htp.lstp..ph..p....t....L...sptlpphpl..pssGspshcpA.......V.....ThGGVsh....cE........ls..s+...TM..p..u...+h......lsG........L.aFhG..............EllDlsGhhGGYNhphAauoGasAup.s........................................................................................................................................................................................................ 0 297 604 782 +2955 PF04588 HIG_1_N Hypoxia induced protein conserved region Waterfield DI, Finn RD anon Pfam-B_4868 (release 7.5) Family This family is found in proteins thought to be involved in the response to hypoxia. Family members mostly come from diverse eukaryotic organisms however eubacterial members have been identified. This region is found at the N-terminus of the member proteins which are predicted to be transmembrane [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.40 0.72 -4.22 59 931 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 520 2 533 819 723 53.40 27 41.59 CHANGED s+cpPlVP....lGshussuslshuhhsh+.pG..spptSp+hhRhRlhAQuhTlsAlhsG ..............................t.hhs....................lu.hhussss.lhh.u..hhsh........t..pG............stphS....p+lh+hRVhAQuhslssllh.................. 0 133 262 402 +2956 PF01355 HIPIP High potential iron-sulfur protein Bateman A anon SCOP Domain \N 21.50 21.50 22.50 22.50 21.10 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.70 0.72 -3.85 43 231 2009-09-11 11:04:45 2003-04-07 12:59:11 12 2 165 32 75 216 47 65.40 40 62.29 CHANGED EsDspApALsYhpDAocs-..ps+a........sGQpCuNCthapup...sssshGsCsl..Fs...GKtVsusGWCsuas ........................EsDspAhALGYptDAocsD...ps+asp......tsGppCuNC.t.hapGp...tusshGsCsl..Fs...GKpVuucGWCsAas.......... 0 17 38 55 +2957 PF00713 Hirudin Hirudin Bateman A anon Pfam-B_707 (release 2.1) Domain \N 25.00 25.00 34.70 34.70 20.30 17.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.43 0.72 -3.87 8 20 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 3 141 0 31 0 57.20 81 94.47 CHANGED loYTDCTESGQNLCLCEGSNVCGcGNKCILGSsGccNQCVTGEGTPKPQSHNsGDFEEIPEEYL VsYTDCTESGQNLCLCEGSNVCGpGNKCILGSsGccNQCVTGEGTPKPQSHN-GDFEEIPEEYL. 0 0 0 0 +2958 PF02098 His_binding Tick histamine binding protein Mian N, Bateman A anon IPR002970 Domain \N 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.15 0.71 -4.39 13 345 2012-10-03 08:47:39 2003-04-07 12:59:11 11 3 20 17 31 355 0 139.90 15 70.32 CHANGED hsshQDAW+slp.ssscsaaLhhRTacs-.s.hGpshpCVsspspchscsp+shp...........sshtYpN..sss.sphpshstplpshcptsa.shc....Nshphppt.....sspshsh.lhao...Dtst.Csl...........................lpsspsstu.ct..................................CELWh.psphspsh........................................................PssCptsFpths ..........................................................................................h...............tt.hhhhh..p..sh........t.p.......................t..hpCh.hp.h.h.p.h..ppp..tp...h.......................h..h.t.ah.......pp...t....tph.....ph.......ph.th...p.h..h.p.....p.t....sh...sh.............shh.phptt..............ts.shphplh.as....D.h.pp.Chl...........................lp.h...t..t..p.p....s....t.t....................................................CpL..a.htp.p.t..l..p..p..t................................................................s.p.p...Cp.hatt.C........................................................... 0 31 31 31 +2959 PF00977 His_biosynth Histidine biosynthesis protein Copley RD, Finn RD, Bateman A anon Pfam-B_1089 (release 3.0) Family Proteins involved in steps 4 and 6 of the histidine biosynthesis pathway are contained in this family. Histidine is formed by several complex and distinct biochemical reactions catalysed by eight enzymes. The enzymes in this Pfam entry are called His6 and His7 in eukaryotes and HisA and HisF in prokaryotes. The structure of HisA is known to be a TIM barrel fold. In some archaeal HisA proteins the TIM barrel is composed of two tandem repeats of a half barrel e.g. Swiss:P05325 [3]. This family belong to the common phosphate binding site TIM barrel family [4]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.34 0.70 -5.09 146 7930 2012-10-03 05:58:16 2003-04-07 12:59:11 16 27 3687 37 2167 8472 6948 229.60 33 89.54 CHANGED cIIPslDl....ccG.+.sV...cGs..........pshp.ssDPl-...hApta..p..ppGA-cLthlDlsuut...ps+shph..cllc...c...l..scplt...lPlpVGGG....IRoh-slcpl..Lp.tG....s-....+Vslsos....Al.cs.Pcl..lpchucca.GspplVVulD..u+p...........G..............cVhhpGhpcs....oshcsh-hscchpchG..sucIlhTsls+DGThpGhs...l.ch.hcplsptl.s.lP.VIASGGsushc-lhplh.....ptGh..suslsupsh..apGphs .....................................................................................................................hlIPslDl....c..cG......p......lV.........c.Gs..........................hps.h.p.s...s..DPlp...........hA..ptY....s....ppGA-..c.Lp..h..l...D....l..s..u..up.............ps...+..s....h....ph.....shlc.............p.........l....sppl............lPlpVG.G..G............IR..o............h...........--............l...........p...........pl...........L.p..s.G...........Ac.................+..V..slsos..................Al....p...........s.....P...p.h.............l.pc...h...sc......c....F......G.....s.....p..slVlulDs+t................G..................................hpV.ss.p.Ghpcs......oshcs.h......-..hspch......p..p..h..G.....s..u..c...l.l.l...T..s..h....sp...D.Gs.h..........s..G...h.s..........l...ch..hptl..............sp..t.......s.....p..........l.............PlIASGGsushpc.lhphh.........ptG...s..cuslsupsh..atuph.......................................................................................................... 0 713 1441 1865 +2960 PF00815 Histidinol_dh Histidinol dehydrogenase Bateman A anon Pfam-B_1358 (release 2.1) Family \N 19.90 19.90 21.40 20.80 19.00 18.80 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.24 0.70 -5.84 16 3998 2012-10-02 17:28:28 2003-04-07 12:59:11 15 14 3598 8 1109 3262 5057 401.20 42 91.68 CHANGED hcphltRsh.cssc.lhptVpsIl-sV+ppGDcAlhEaTp+FD..GVp.l-s...hVst-chpcAhctlsscl+pAl-hAhcNIcpFHtsQh..ptshslEsp.GVhsuphspPl-pVGlYlPGGpAshPSTsLMlulPAplAGsccIVlsSPPs.p.sGphsPplLhsAphsGlccIatsGGAQAlAAhAYG.TEo....lsKVDKIhGPGNhaVTAAKhhVps....tlsIDMPAGPSEVLVIADEsAsPcaVAuDLLSQAEHsssSpslLlTsS..cphAcclpptlpcQlppLs..Rt-.hlppuLs..aSsIllscslpEAl-hSNpYAPEHLhlpscsscp.llsplcsAGSlFlGsaSPESsGDYuSGTNHVLPThGaARpaSGLultoFhK+hTlQplTc-GhcslupsVhsLAcsEsLpAHtpAV+hRhc ...............................................t.....t......s.ssp..lppsVpsIlpsV+pcGDp.ALh-Yop+F.D....p...s..p.....h.........s...s...l..........cV..o..t..p-l......psAh...p...p.............l...s...sc...h+pAlphAhcpIcpaHpt.Qp.....ps...............h...p.............h...c............s....t......s.....G.....lh.hsphhpPlp.......pVGlYVPG...G...p...A...shPSoVLMsul...P..A+lAGV..cclVhsoP.........P......................h...................G.........................ss....tl....L.sAA.p.l.s.GV.sc......laplGGAQAIAALAY......G......T.....E.o.......................ls+VDKIsGPGNtaVstAK.RtVhu..............tV...uI.D.M..AGPSElLVlA...D...p.s..A..s..................Pcal.AuDLLSQAEHss.t.u.ps..l.LlT.ss.........tplAcpVp.ptlpc.Q.L..t.p.Ls............R.t.-..lsp.puls....put.lllscs.l.s.pulpluNphAPEH.Lplps.....p...ss...p..p...hl.sp.lcsAGulFlGpaoPEulGDY.s..u.GsNHVL........P.........TsGsARhsSuLultDFhK+hol...p..h..o.c.pu.....h.pp.l.u.psltp..LApsEsLs.AHtpulplRh..................................... 0 350 717 947 +2961 PF00125 Histone histone; Core histone H2A/H2B/H3/H4 Bateman A, Sonnhammer ELL anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.06 0.72 -3.86 64 18172 2012-10-10 12:36:46 2003-04-07 12:59:11 19 47 9743 705 4298 10642 208 64.10 53 50.27 CHANGED psphthhphsltRlh+plppp..........h+lsupAhhhlppslEshhtclhpcAs.hhupcs+RhT..lts+-lphAh+hp .......................................STELLIRKLPF.Q.R..L.V.R...E....IAQDFKoD.................LRFQSoAl..h.A.L.......Q..........E....u......sE...A...YL..V.u...L.FE..D.....o...............s..L.......s...A....h..H....A..K....R................................................... 0 1432 2046 3176 +2962 PF01230 HIT HIT domain Finn RD, Bateman A anon Prosite & Pfam-B_8474 (Release 8.0) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.27 0.72 -3.46 26 8213 2012-10-01 23:45:21 2003-04-07 12:59:11 18 43 4728 105 2350 6615 3709 96.10 27 65.13 CHANGED ptEhsupllh-s-hshAFhDhpPpsssHhLVlP+p.p..lsplpshs........ttluplhhhspcluptlph.....................ttpuhphshpsGtpuGQsVhHlHlHllstcph ................................................t..phss.hl.a..c..s...-..h.s.h...A....F...h...D..l...p.....P..h....s..s......G........H....h...L.....l.....lP+p..c......h.s.s.l.t.-.ls...........................tpt...h...s.c....l....h..t.h.s.pc.l.u.c.t.l...t.t............................................................................t....p.G.h.p...l...h..hN....s..s..t..t..u..G.Q..s..V..h..H..lH..hHllP+h..h........................................................................ 0 773 1489 1980 +2963 PF00816 Histone_HNS H-NS histone family Bateman A anon Pfam-B_1651 (release 2.1) Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.19 0.72 -3.48 151 2937 2009-09-12 23:38:08 2003-04-07 12:59:11 16 8 1220 14 554 1580 133 100.10 31 80.49 CHANGED pcLpphhpclp.........pphppt..cppc.pppsltpl+phhpp..hG..lo...hp-L......................hststt..............sppp+s.sss..KY+pPps.Gp..TWoGRGRpPpWltshh..........spGcpl ..........................................................t.pL.chlccLc...........cptptt..c..hpE..cpc....tlpphp...c...hltt..G..Is...spEL............................................hsssts........tstt+R.sspP.A....K.Yc...s-s.Gc..TWT.G..pGRsPphItpshtptt...................................... 0 67 192 380 +2964 PF01870 Hjc DUF50; Archaeal holliday junction resolvase (hjc) Enright A, Ouzounis C, Bateman A, Dlakic M anon Enright A Family This family of archaebacterial proteins are holliday junction resolvases (hjc gene) [1]. The Holliday junction is an essential intermediate of homologous recombination. This protein is the archaeal equivalent of RuvC but is not sequence similar. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.87 0.72 -4.38 16 183 2012-10-11 20:44:43 2003-04-07 12:59:11 13 2 153 24 103 289 78 92.00 30 64.17 CHANGED shERELlchLpccG........................FAVlRusuSsuu.......DllAspssh.hLsIElKsopct..+lYl........cp-clcpLlcFuc+F...GupPhlAlKh....scsW+Fhssps .....................................p.hERELlphLc.c.pG.........................Fu...V..l......R...us...u.Ssus..........................hsDllA..scssh..hlsIE...sKosppp...plh.l..........pp-plcpLhpFucph...........Gu....pshlA.lKh...ttptWhhh....p................................................... 0 26 59 85 +2965 PF02110 HK Hydroxyethylthiazole kinase family Mian N, Bateman A anon IPR000417 Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.57 0.70 -5.20 9 2596 2012-10-03 06:25:16 2003-04-07 12:59:11 10 9 2292 46 490 2628 261 239.80 38 87.31 CHANGED Lpcl+pppPLVHsITNhVstNFoANsLLALGASPlMuhsh-Ehp-hA+IusA.LlINIGTLss..hcuhhtAscsAp-hspPllLDPVGsGATphRpcssh-LL.phthusI+GNsuEIhuLsGhs.tts+GVDospuuss..s.ltsspplApchsslVlhTGchDhVoDGpp..shslpsGsp..................Lhs+lTuoGChLuuVsAuFhAl...pssLhss.hsAsshYplAu...ppAstcspus.....GSFhsphlDtLhpLstE ................................................................................................................phl+ppsPLlpshTN.Vs.tsh.........sANs.......LLAlGASP.sM.u.p.s.scEs..p..-h.s.p.l..A..s...A..L.l..IN..lG.............T.L.os..p..p.h...puh....ht.As.c.t.A...p.p..s.s..h.P..h.VL.DPV.......u............s.......G.......A.....s..........s...a...R.......p.......c.......h.s........p...cL..L.....s.....h...+.....s...s...........lI.R...G.N....A.SE...Ih....u.L............s...........G..............h..............s......s...........t............u........+...........G.V.........D.....u......s......s....s....s........t..s...................s...l........t.............h....App....h...u..p....c............h.......s.........s.......l....l...l....l..T.G........p........h......D.......h......l....s.........s...u.............pc............s.h..s..l..p.s.Gss....................h.h.s..+...l..T.......GTGChLuAllAuF..h.uht...................ps....s........h......ss......ss.A..s..s.hh.sl...Au......................Eh.A.....s...t...c...s....p.Gs..............GoFpsthLDtLapls......................................................... 0 163 312 412 +2966 PF03865 ShlB HlyB; Haemolysin secretion/activation protein ShlB/FhaC/HecB Finn RD, Henderson I, Moxon SJ anon DOMO:DM07489; Family This family represents a group of sequences that are related to ShlB from Serratia marcescens. ShlB is an outer membrane protein pore involved in the Type Vb or Two-partner secretion system where it is functions to secrete and activate the haemolysin ShlA. The activation of ShlA occurs during secretion when ShlB imposes a conformational change in the inactive haemolysin to form the active protein [1]. 19.90 19.90 19.90 19.90 19.80 19.70 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.24 0.70 -5.74 2 1287 2012-10-03 17:14:36 2003-04-07 12:59:11 8 9 664 2 267 1255 50 340.50 24 65.32 CHANGED GDRhVNopLLFPtlcGpPLKLspLDQGLDQANRLQSNpsplDILPGpplGGSVI+LpNQ+tpPWhlshuoDNYGQKsoGRWLhRssAoLDSPhGLSDFVSLNAN.Th-NPspRaNRAYTLLYSlPYGuFTFSuFuSaSpYp.HQpL.ppsVpLaGpTpQhGlRuDYsF.RspcQIDoLshQlTaKRIcNYFpplRL-lSSPpLThhELuhsHLQIlPNGVhSsNLSVEpuhsWhGAtcpP..sp....D.pFTKsKLFsNh.QRhpLhcuTa.hNshFhGQYS+DsLPGVEWLSLTD+sAlRGFspST.SGDNGhYLpNTLShsapLstholTPRlGsDlGplh.+ts.pGWpuuhGlSoGhslpYQpA.lDLEVu+G.lL...oNpspscDPsQlLs+FSYhF ............................................................h.ht..........h..P.hh.t....G.cl....Lsl+-lEQGl-p.hp.Rls...........s.....p.sp.hpl....hP.....u....p.....p...s.....G....t.S......lhl.p.h.p.p..s.c...h....ph..s..huhDssG..p..cs..T..Gchpss..ss..l..sh-N......shuL..s.D.....h.......l..s.........h.....s.......p...s.......h........p......t.....t....t........p....p.....h.....s........p..........s........h.....s.ht.Y..S....lP..a..G..h..a..p.hs..h...h.s..h..sp..a.p....h...t..h...t...s.....t.....s..h.....p..h..p..G..p.o...p..p..h..shp..h..s..+...ll..h...R.st..p.p..+t.....shth....pl..p..+..ps..........ps.......a..l....s....s...s...c...l.p.h..pp....p.p..h.s..s...hph..G.l....s...a...p.p...h..h...s..t...u....h..h.......s..h....slu..a.p....pG.h.s...h...h..G..A....p...t...s.....s....t........t.....t...............................t........s.....p.....h.....p..t...h.ph..s...h...s...ht..h...s.......a...t...l...h...p...p..............ht.a..ss..t..ht...uQa..o..t..s..s..L.hs.t-p.holGuchoVR.GF...c.c.p.s.l.s...u-p.Gha....h+.N....-..L.........s.....h.......h...............s................t.......h.............h.......h.......u.h....DhGt..l................................................................................................................................................................ss.hh.h.hh.h.h............................................................................................................................ 0 31 115 195 +2967 PF02794 HlyC RTX toxin acyltransferase family Bateman A anon Pfam-B_1230 (Pfam 6.0) Family Members of this family are enzymes EC:2.3.1.-. involved in fatty acylation of the protoxins (HlyA) at lysine residues, thereby converting them to the active toxin. Acyl-acyl carrier protein (ACP) is the essential acyl donor. This family show a number of conserved residues that are possible candidates for participation in acyl transfer. Site-directed mutagenesis of the single conserved histidine residue in Swiss:P06736 resulted in complete inactivation of the enzyme [1]. 25.00 25.00 25.40 25.10 21.20 24.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.69 0.71 -4.56 29 386 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 293 0 46 249 70 134.70 34 84.55 CHANGED phLGtlsWLhhpSPhHRcaslu.lttpllPAlphsQatlhp..cs........................shPlAFsoWAhLo.-sEt+alpssppLhsp.DWsSG-+hWlIDalAP..FGc.....sptlh+pl+cp.Fs...sphh+slRhchsuppttplh.ht.......................lp ...........hlGtlshLhhpSPhH+p.asls..hthpllPAIptsQaslhp..cc........................Gh.PlAasoWAhLs.-sEs+aLps......sp......p......Lhsp.DWpSG-.RhWllDaIAP.F..Ga.....sptl.hchhRp.c.Fs.....tph.hRulRhc.sspp.tplhphph...hsh................................... 0 7 23 37 +2968 PF00529 HlyD HlyD family secretion protein Bateman A anon MRC-LMB Genome group Family \N 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.58 0.70 -5.11 44 15364 2012-10-02 20:27:15 2003-04-07 12:59:11 15 41 2231 47 3183 22645 3068 262.60 18 71.90 CHANGED tsplpspluGhltclhsc-GstVctGplLhplDssshpush......ptpuphttspt...psphhttthpphpshhtspth...............tppptssthtthppshhpsp.....hptsplppuptshpttpsshphs...............sthppstsphtstlss.......tspshhshsplpphhhphppsptt..t.htssspu.hhtt..t.ptlphsptht.....sstslhshl.hpphtlctphtpsphctht.s.pspltsshhsts..hhhpGsssss..h..hhshsssspthts..plhssshsphspthsVclths....ththsspshhsGppshl ....................................................................hpltspV.uG.h.....l..h..p.l.......h...p...-..sp..h.V.....ct......G.p..h.Lh.......p.....l........D.......s.......t.......s...h..pst..ht..........................................pA..p..u.....p....l....t....p....s....p.....t............................t....h.....p.............t....t...........p.....h.....p....p...hp...t.....h....h..t....t....tth..........................................................tpp.t...h..p..p..t.......t...............p.....p.....s.......h...p.t..................................h...s.....p..t.......p..l....p.......p.......u...c.......h.......s...h....t...h.....s....p...s...t..h..hs................................................................sth...s..t......t.t...s...t...h...t...s.h..lps...............................ps..p.h.h.......s.......s.......h...s........p....l......p............h..h....l....s.......l....p....p..s...p...................................................h..p.....u....s.t....s..G..............h.....h..........t......t..........................h.....s...l.......ph.sthss...................ssts..h...h...s....h....l..........h....p...s...h....h.......l...s...s..p.h.......t..p....s.......p......l.......pt......h......h...........s.......s.............p........s...p........l..h....s.s...h.hsp............hh..h.p..u.th...s...........h.........hs.t..t.....s..h.........p...........h..s.................s.........s....h....s....h.......p.....h..s..p.t..h.s.l...p.h.t....ht.......................h..s..................................................................................................................................................................................................................... 0 714 1599 2379 +2969 PF03201 HMD H2-forming N5,N10-methylene-tetrahydromethanopterin dehydrogenase Mifsud W anon Pfam-B_2929 (release 6.5) Family \N 20.30 20.30 22.00 72.70 19.80 16.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.20 0.72 -4.07 13 59 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 31 6 38 60 0 97.90 47 28.43 CHANGED A-LlusVsDMuSsVTAlshAGlLsYhsssTpIlGAPtchspp.sh.oLpplAuLh-ssGlcsh.cuLsPcsLlsoAcSMphssps..-.LssuLclLEch ..AcllusVsDMGShVTAlshAGlLsYhsssTpIlsAPtchsph.sh.uLpplsuLhcspGlcsM.csLsPcsLlsoAcSMphssht..-.Ls.suLclLcc....... 0 10 18 28 +2970 PF01101 HMG14_17 HMG14 and HMG17 Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 21.60 21.40 20.20 20.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.73 0.72 -3.29 18 482 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 149 0 153 417 0 80.60 54 86.60 CHANGED PKRK.......stssstssKpEPpRRSARLSAKPAPPKPEPKPKKsusKcK............ssKscK.uAKGKc..-ptpptsK-ss...suENG-sKs-E..s.ts-ustspc .....................t+.........t.sspthsKpE..Pp..RRSARLSA............K.PAPPKPEPKPKKAusKcp.................cK...ssKGKK...tc.ustu..p.ctss...PAENG-sKo-p...s.ts-t...................... 0 9 13 36 +2971 PF00505 HMG_box HMG_box; MaoC_dehydrat_N; HMG (high mobility group) box Finn RD anon Pfam-B_8 (release 1.0) Domain \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild --amino -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.03 0.72 -3.71 31 8695 2012-10-02 14:16:02 2003-04-07 12:59:11 14 175 1353 52 3772 8773 263 66.20 29 17.82 CHANGED PKRPhoAahlatp-tRtpl+p-sPshc..sspluKtlGctW+sLstc-Kt.Y.ptApct+pcaccphspYc .......................................KRP.h..sA.ah.la....t....p...c..p.......R........p....p............l.......t........t........p.........p.........P............p............h.........p.........su.........-..............l......o.....K.hL..G....p..p...........W..+..........t..L..s.cp........-.K............p.....ah.c.c.Ap.......c...h+t..pahcphssYp................................... 0 1189 1695 2729 +2972 PF01154 HMG_CoA_synt_N HMG_CoA_synt; Hydroxymethylglutaryl-coenzyme A synthase N terminal Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.06 0.71 -4.71 4 1680 2012-10-02 12:25:54 2003-04-07 12:59:11 12 15 1444 28 478 1426 215 165.20 41 40.45 CHANGED WPcDVGIlulElYFPoQYVDQuELEKaDsVssGKYTlGLGQs+MGFCoDREDINSLCLTVVpKLMERsslsassIGRLEVGTETIIDKSKSVKoVLMQLFpESGNTDlEGIDThNACYGGTAALFNAlNWIESSuWDGRYAlVVsGDIAIYspGsuRPTGGAGAVAMLIGPsAP ...............................................h....lGIctlthahPs.....YV.c.....hs.....-.L.....A.........c....s....R......slD....ssK.a..p...h...Gl...G...Qpchulss..sEDIlo.hussA.upsl.l.....s.c..p........-.t.p.pIshllV...uTESulDpSKu...s..us.h....l....p....pL........Lul...............p...s.......s....c.uh...E...hKpACYGuTAALphAhsalp..sp.s...........s......sc...c....sLVl..A....oD....I..........A............+.Y...........u...h..s.........s...........u.........E.P.T.....Q.....GAGAVAMLIuts..s.................................. 0 147 269 383 +2973 PF00682 HMGL-like HMGL-like Bateman A anon Pfam-B_71 (release 2.1) Family This family contains a diverse set of enzymes. These include various aldolases and a region of pyruvate carboxylase. 21.90 21.90 21.90 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.11 0.70 -4.71 20 12580 2012-10-03 05:58:16 2003-04-07 12:59:11 14 57 4555 111 3825 10439 6963 236.80 26 43.97 CHANGED RDGEQuhtss.holpcKlpIActLschGlc.IElG.............hsssuts-hctlcslsctlts...uclsslsR.............sstc-lctuhc.h.ssGsspl+lhlssS-hhhphplppsht.shcpspphlchA+phs.cV..........plusEDuuRsshsalhclsctshsA......GspplslsDTVGhhsPpphtchlptlpppls....s.lulHsHsDhGhAlANolsAlpAGAspl-solsGlG......ERAGNsuL.Eplshulcstt ...................................................................................................................................................RDGpQu........hss..h....s.....scc+lpl..........s....p..........t..........L..s..c......h.....G.....l.p......l...Esu...................hshs.u........t....s.........s.....h.....c.t..l.....p.....p......l...t.....c.....t....h..hp...............spl....p...s...L...h...p.............................................s.s...p....p.........l....c....p....s.....h....c..........h.........................s....s........h....s....h....l...........+....l.a...s...ussp...............................h.........t.........h.......s...................t...t......l...c.t...h......t....p....s.l.p..h........s......+.p...h.....s...h..ps................................ph.u...s....p......s...t....s......t....t...s......h...p.....h...h.h..c.....l..s......ct.h.h.ph...............G.s..p.h.l..slsDT.......s.......G.......h.......h.......p........P.......p.......p..............ht.......c.......l.......l.p...t.l.p..p..p..ls.........................h.lul.Hs.H....s..sh........Gh..A...........lA.s..s..l..s....A....l....c....u..G..............sc....t.....l...-..sslsGhG.......................tp..sGNssl.EsllhsLph..t......................................................... 0 1234 2475 3280 +2974 PF00423 HN Haemagglutinin-neuraminidase Finn RD anon Pfam-B_171 (release 1.0) Family \N 20.40 20.40 20.50 21.80 19.70 20.30 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -12.63 0.70 -6.22 21 2456 2012-10-02 00:45:24 2003-04-07 12:59:11 14 2 198 58 1 2086 0 439.80 33 94.13 CHANGED lhshlshlLullsllhhhshplpphshsssphpphhps.hulspsIcptsccltt.....lpPhhphIsspVuhplPtplsphtp.l.......................psp............................lsctC....................shspsptshhsh.psphhpsl..shpshhhsssssh.sshs.psphp.h.ssohl..ssssshsu..Clp.PshulupslauYoaslhpssCpct......spshphhplGhlpspusthPshpsssthshspsssh+sCSlsssshthhtLCohssssphp-huoss.pslhlshLsltGphpspchpss.hshc.....sastLYsosGsGlhhssplhFhsaGsls..............pstpspupChtstCpspstphCNpu.hsshhus+hhspGllplslsh..stpsplpltolssshhhhGupu+Lh.....s.hhhYpposuWhohs.hstlsls..sshs....lphsspshs..oRPGssp..CthsspCPt.CloGlYsDsa.Lss........shphlsushlsSpps+tsPhhshssspshshhh.lp.sspspsshooosCF..sapp+laChphsElsssshsshpsh.hhspl.hsC .................................................hhlhlhhl.lhhl..hh.hs.hhhphsh.....p...........pp..thsp.lpct..pl.........slhp.Ihcp...l.......slp.P.th.ph.p.l........................pt.......................................hs..h....................tsss..pt.hss.h....hss.....h....ltsh..ph....h.s.hp..h.sshh.........t.shl..sh.hotsh..ssp..sh.hopshYshTa.l..sshpsc......sp.hphhtlGhl+psu.shshhphhp.h..s.spshpsC.luhs.LthshLCpt.sshp....uSst.sshhhspLGh.sphpppphpssh.ht......shsthY.ustpGhhhDspshasV.sshp..............p.ppt.psC.pp..psh..thspss.h.shhss+hsp.ulLolplsh..olt...phhls.s.sslhhhGuthclh........h..ppss.sWho.s.hhshsls.h.ssh...........h.sP.hhs.......+...u..u.s..Cthssh.....lsuhhsssh.l...h......spshphVhuTh..sp.t+hssh.sh.s.hshshhh.hp.s.pshs..hp..pCF..shspchaChphh.htss..sthh.h..hsthhh................................ 0 0 1 1 +2975 PF04814 HNF-1_N Hepatocyte nuclear factor 1 (HNF-1), N terminus Kerrison ND anon Pfam-B_2624 (release 7.6) Family This family consists of the N terminus of homeobox-containing transcription factor HNF-1. This region contains a dimerisation sequence [1] and an acidic region that may be involved in transcription activation. Mutations and the common Ala/Val 98 polymorphism in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) [2]. 28.40 28.40 29.10 28.60 27.70 28.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.29 0.71 -4.18 9 283 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 52 22 104 212 0 171.40 47 37.26 CHANGED MsScLotLQpELLtALLpSGloKEsLlpuLs-ht......................Pthphs-p.hth..hshstutsp........th..uhGcs..................................chotDEs....S-DG--..sPPIhKELEsLusEEAAcQ+......ulV-pLLpEDPW+sAKhlKSYMQQHNIPQREVVDsTGLNQSHLSQHLNKGTPMKsQKRAALYsWYVRKQREltpQ ................................................................................Q.-LLttLhpoGhoKc.llpALtpht................................s.....st.tsh...ht.ut............................p.s......................................................+hSt-.cs...........sps.up..ca.s.....P..Pl....hp....c..h....p...s.h....s.....s.E.EAu.cpc........scV..Ec..LL..pcDsacluchIKuYhQpHNIPQREVV-sT..GLNQSHLSQH..L.NKGTPMKsQKRuALYsWYl+KQpEl........ 0 17 29 54 +2976 PF04813 HNF-1A_C Hepatocyte nuclear factor 1 (HNF-1), alpha isoform C terminus Kerrison ND anon Pfam-B_2624 (release 7.6) Family This family consists of an alternative C terminus of homeobox-containing transcription factor HNF-1, found in the HNF-1A isoform. Different isoforms of HNF-1 are generated by the differential use of polyadenylation sites and by alternative splicing.\ \ The C-terminal region of HNF-1 is responsible for the activation of transcription, and HNF-1A, which has this C-terminal extension, transactivates less well than the B and C isoforms [1]. Mutations and polymorphisms in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) [2]. 27.30 27.30 28.70 35.50 27.20 27.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.96 0.72 -3.71 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 34 0 20 47 0 81.80 65 14.36 CHANGED llToDPEuHTDSuIcEPSS......l.sQD.osILHLQSu.RLSPsPsVSSuSLlLYpsSsSoEoH.SHL.LSSoHusI-oFISTQMASSo ......................VFTSDoEAsSESGLHsPuSQAoTlHlPSQD.suuIQHLQPuHRLS.sS..........PT..VSSSSLVLYQSSDSoNGH.SHL.LPSNHSVIETFISTQMASSS. 0 1 2 5 +2977 PF04812 HNF-1B_C Hepatocyte nuclear factor 1 (HNF-1), beta isoform C terminus Kerrison ND anon Pfam-B_2624 (release 7.6) Family This family consists of a region found within the alpha isoform and at the C terminus of the beta isoform of the homeobox-containing transcription factor of HNF-1. Different isoforms of HNF-1 are generated by the differential use of polyadenylation sites and by alternative splicing. The C-terminal region of HNF-1 is responsible for the activation of transcription [1]. Mutations and polymorphisms in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) [2]. 25.00 25.00 54.90 52.20 20.40 21.20 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.83 0.70 -5.10 11 156 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 43 2 50 130 0 219.60 56 42.10 CHANGED AMDsapus..uss.s.hLsp.s......oPpsu...s.th..u.........................lRYSQpussEssoSos.ups.spu.....osLpQVSPsuL-PSHoLLso-sKh.IusSGGsLPPVSTLTslHSLstss....Ht.tQQsQNLIMssLPuVMuI....................s.uLsooQuQSVPVINSVGuSLTTLQPlQF.SQ......QLHsshQQPLhQQsQ.SHMu..QsPFMAThAQL.ssH.MYS.KsEssQYsHoShhsQsMVITDoosLuTLTSLouoKQ ...................................................AMDsYsus..s..s.sPhLsttSs....t.Pssu....sP...s.Khp..G.........................VRYuQ.ussEssusso.Stt...stshV.T.sposLpQVSPsuL-Pu..HsLLSs-uKh..lSsoGGsLPPVSTLTslHSLp........t.sQQsQNLIMssLsGVMAI.......................s.uLsooQAQSVPVINShuuSLssLQPVQF.SQ......QLHssaQQPLMQps..SHhu..QpPFMAshsQLQssH...hYuHK.EssQYoHouhhPpsMllTDTsslSsLsshosoKQ............................. 0 2 6 18 +2978 PF01844 HNH HNH endonuclease Bateman A anon [1] Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.73 0.72 -4.09 92 7400 2012-10-05 18:28:12 2003-04-07 12:59:11 18 79 3428 3 1824 6263 3802 47.60 26 18.89 CHANGED Cp..hCspph...........thplcHIlPhp....p..uGpps.....hsNLhslCpp.Cppp+psc .....................................Ct..hC..s.t..h........................thplc.Hlh.Phs...............p........GGtss....................hsN..lhhlC....tt...C.Hppcpt.................. 0 538 1239 1594 +2979 PF01848 HOK_GEF Hok/gef family Bateman A anon Swiss-Prot Family \N 21.60 21.60 22.80 22.50 20.90 20.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.01 0.72 -4.63 41 1852 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 385 0 46 440 2 42.80 47 78.31 CHANGED +thlhsLlllClTlLhFshlsRcoLCEl+l+pG.spEluAhLAh .......KhtLlulIVlClTlLsFsLll+cSLCEl+l+pt.shEhsAhLAY.......... 0 6 9 26 +2980 PF05102 Holin_BlyA holin_BlyA; holin, BlyA family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family BlyA, a small holin found in Borrelia circular plasmids that is encoded by a prophage. BlyA contains two largely hydrophobic helices and a highly charged C-terminus and is membrane associated [1]. 21.10 21.10 22.00 21.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.02 0.72 -4.25 3 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 33 0 3 53 0 59.30 54 93.80 CHANGED MDTIpIs...-FLlsLsNIKLItLhIFIollILullLllKPllKDhLoILIuKIhKNsNcKEKc ..........MDTIKlT...EhLINLNE...IKLIuVMIFlTVllL.GsLILLKPLLKDILoIl..IGKlFKNuNsNsp................. 1 2 3 3 +2981 PF05204 Hom_end Homing endonuclease Studholme DJ anon SCOP Domain Homing endonucleases are encoded by mobile DNA elements that are found inserted within host genes in all domains of life. 21.10 21.10 21.10 21.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.42 0.72 -4.11 24 211 2012-10-03 01:41:40 2003-04-07 12:59:11 9 23 116 23 55 250 50 102.50 30 13.28 CHANGED slPpaLto-slplREsFLAGLlDS-GhVccptt.....ssolpTh.polhpGlVplARSLGlpsoVss...cptphctpsVppp.s........YslslusussLpuVLutCusscppts.........tthh ..............IPphhhppshplR.uaLAGLIDSDGhsscptt.....phslpohpsolhcsllplARSL.Glssssss...c.th.h.hptsph............Ysh.h...t.t...t...s....h.t.s...............s.hhh.................................. 1 5 22 47 +2982 PF05203 Hom_end_hint Hom_end-associated Hint Studholme, DJ anon SCOP b.86.1.2 Domain Homing endonucleases are encoded by mobile DNA elements that are found inserted within host genes in all domains of life. The crystal structure of the homing nuclease PI-Sce [1] revealed two domains: an endonucleolytic centre resembling the C-terminal domain of Drosophila melanogaster Hedgehog protein, and a a second domain containing the protein-splicing active site. This Domain corresponds to the latter protein-splicing domain. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null --hand HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.37 0.70 -4.60 37 308 2012-10-03 10:25:13 2003-04-07 12:59:11 11 40 197 12 58 405 104 284.60 23 42.25 CHANGED CauKGTpVlMuDGopKslEclplG-tVMGpDGpPR..cVhsl.s+Gp-pMYclppps.p..................shhsasCsusHhLVL+ss.th....p..th.....................t.R....lsp.hptsa..p...u..tt.thhpph.ts.sscphhpaslpA+Dhs.tLssplRpso...pthhsPlhhtpt.hsphltstt....p.s...s..huYlLGLWlGsG.htcpsphshsopD.tplh..pplpphup..hhslp.phtptp.shh.t..........................................................................h......t..ht..............................................................................................................................................tshspsN.hhptlh.phGhhp........phsKpl..Pphhhs-shphREtFLAGLlDocGhl..pt......psslpsh.pplhpslltlARSLGlpsslspcptp...hthtt..............athp.lssssshhssLshsttspp.htstspth.ct...htFphhE.hpps-aYGlTl..-sD+paLLushhVlHN ........................................................................................ChstGT.lhhuDGs..cslcslphG-hlhG.D.....G....p.....s+......cV....hsl....s....p...Gh-..p..hY..plp.ts...........................................................th.shsssssH.Lshh.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 13 33 53 +2983 PF00046 Homeobox homeobox; Homeobox domain Eddy SR anon Unknown Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.63 0.72 -4.35 182 25115 2012-10-04 14:01:12 2003-04-07 12:59:11 24 290 1497 161 11025 24665 26 54.40 35 16.80 CHANGED ++pRTsaospQlptLEptFp...p..spYsstpcRpcLAppL.....sLs-ppVplWFQNRRtKhK+ ........................................++.Rs.s.a..o.p.t.Q......l..t..t.L.E...c...tFp.................p...s...pY.......st....t..c...........R.t.clA.ppl........................sLo.c.....p..p..V...........p.........l......W...........F..Q.N.RRhK.++...................... 1 2391 3737 7103 +2984 PF00742 Homoserine_dh Homoserine dehydrogenase Bateman A, Griffiths-Jones SR anon Pfam-B_459 (release 2.1) Domain \N 22.90 22.90 22.90 23.30 22.50 22.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.90 0.71 -4.60 147 5558 2009-01-15 18:05:59 2003-04-07 12:59:11 14 32 4234 19 1325 3990 2377 185.10 37 33.91 CHANGED Pllcslcps.LsGsclpplpGIlNGTsNaILopMp....p..GhsFpcsLc-AQchGaAEs.DPstDlsGhDsApKlsILAphshGhpl..s.hs-lthcGIsslsht.......................clphA.pchGhsl+Ll..................uts..ch............ttth.pspVpPphlspscsLAp.lpGs.Nulhlps-hh......uslhlhG.GAGstsTAuuVluDll .............................................................PllcsLpph..huuDcI.....ppltG.IlNGTsNaIhschs..................pt...GhsFs-slppApp...hGauEs.DPpsDl....sGhDsA+Kl....sILAph..........h..G..h........p.l.......p....hs.-l..t..l.E..G.ltslshp................................clp.hA.cc.h...G..h.sl+hl........................uhhc........................tshpscVpsshlsps.+PLAs.Vpss.NA..........lhlpuchh......ssl.hh.....hG.GAGstsTAuuVhuDll.................................. 1 417 834 1118 +2985 PF00103 Hormone_1 hormone; Somatotropin hormone family Sonnhammer ELL anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.21 0.70 -4.99 17 1608 2012-10-02 01:28:15 2003-04-07 12:59:11 15 2 560 24 164 1561 2 156.40 32 94.64 CHANGED LlhSshLLsp.....suSsPsss..........LpcLFppAsphucphapLusphas-F-ppahptpt.......hhpts.......shCHTsolpsPps+-pspphstpcLL+hslhLL.SWppPLhhLss-hpsh.ts..stsllo+Ap-lpcp.cpL.pslcplhp+l...stpphp.h..tasphu...tscc-p+hhshYsLhhCh++Ds+Kl-hal+lLpCRhhhp....C .............................................................................................................tpLhs.sh.hsphla.Lu.tchhp-F.....-c..p..hhsppp...................h.p.............shChopol.s.PpsK.ccs.Qp.h.o......hc.LLchuhhLlpS.W.tPlphLs....t......ts..s......h.t..p...cl..tht.L.......cuh..h...th.....................................h.h..ths.............pptlht.at..Llt..Ch++D.cKh-saLplhpCR.......C....................................... 0 6 15 61 +2986 PF00123 Hormone_2 hormone2; Peptide hormone Sonnhammer ELL anon Prosite Family This family contains glucagon, GIP, secretin and VIP. 21.20 21.20 22.50 21.20 20.80 20.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.58 0.72 -6.64 0.72 -4.09 56 842 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 138 23 275 773 0 27.80 39 34.80 CHANGED HuDGoFTs-ho+hL-phuA+cFlphLhs .......HuDGhFTssYS+hLcphus+calphLl..... 0 10 29 91 +2987 PF00159 Hormone_3 hormone3; Pancreatic hormone peptide Sonnhammer ELL anon Prosite Family \N 22.40 22.40 23.10 22.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.54 0.72 -3.94 21 274 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 120 32 90 268 0 35.70 61 41.61 CHANGED hPscPc..tPG-sAssE-..LupYhssLpcYlNllTRpRY .........YPsKP-..sP.G-DAssE-..hA+YYuuLRHYINLlTRQRY. 0 8 14 32 +2988 PF00220 Hormone_4 hormone4; Neurohypophysial hormones, N-terminal Domain Finn RD anon Prosite Family C-terminal is in hormone5 17.00 17.00 17.00 17.10 16.90 16.90 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.55 0.74 -5.72 0.74 -3.80 8 197 2009-09-11 03:36:00 2003-04-07 12:59:11 12 2 104 0 51 205 0 9.00 83 7.31 CHANGED CYIpNCPhG CYIQNCPhG. 0 3 5 25 +2989 PF00184 Hormone_5 hormone5; Neurohypophysial hormones, C-terminal Domain Finn RD anon Prosite Family N-terminal Domain is in hormone5 25.00 25.00 25.00 31.40 21.80 24.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.37 0.72 -10.76 0.72 -3.44 36 207 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 109 29 60 232 0 77.00 63 55.90 CHANGED RpChsCGPGs+G+.....CFGPsICCG-pl......GCalGTsEoh+CtEENYL..soPCps.GG+sCGs.t....uG+CAAsGlCCssEoCshDssC ........RpChPCGPGs+GR.....CFGPsICCG-pL......GCalGTsE..shRCpEENYL..PSPCpu.......Gt+sCGS.............GGRCAA...sGlCCss.-.uCshDssC................ 0 6 8 30 +2990 PF00236 Hormone_6 hormone6; Glycoprotein hormone Finn RD anon Prosite Domain \N 25.00 25.00 25.60 25.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.49 0.72 -4.17 10 201 2012-10-02 16:54:34 2003-04-07 12:59:11 13 2 150 10 30 185 0 91.40 69 81.48 CHANGED aPcs-hss.GCsEC+L+ENphFS+.GAP.IYQChGCCFSRAYPTPLRSKKTMLVPKNITSEATCCVAKphhRVTVhsslKlENHT-CHCSTCYYHKS ...................aPss-h....GC.EC+L+ENphFS+...GuP...lYQCMGCCFSRAYPTPlRSKKTMLVPKNITSEATCCVAKphp..+...spV..h...s.....l+.lcNHT.......-CHCSTCYYHKs................. 0 1 3 11 +2991 PF04617 Hox9_act Hox9 activation region Kerrison ND anon DOMO:DM04544; Family This family constitutes the N termini of the paralogous homeobox proteins HoxA9, HoxB9, HoxC9 and HoxD9. The N terminal region is found to act as a transcription activation region. Btg1 and Btg2 - the B-cell translocation gene products - may function as cofactors for Hoxb9-mediated transcription. The Btg proteins modulate Hoxb9 transcriptional activity by recruiting a multiprotein Ccr4-like complex [1]. 20.60 20.60 25.40 22.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.56 0.71 -4.11 29 299 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 74 0 96 259 0 167.00 38 65.14 CHANGED MSoSGTloNYYVDSlIspEs..EDhh.usRF.ssushssss.......R.su.s.s-pu.....-asSCoFtsKssVFuuS.WSsl..ps.pssss.sulYH.....PYh.....t.Qstl..suuDu...RY..lRSW.L-Phsssl.shsGh..ssspp.Y...ulKPEsl.u+ts.-s.shcspshhh...s-atssusssspcp...tppttstspssspsp......pE-K....pplDP ..................MSsoGsloNYYVDSllsHEs..E-lh.u.......uRF.ssGs.h.stss.......R.su..l..s-.ps......-FsSCSFtPK..ssVFuu.S.WusV..ps.pssss.s.uVYH........PYs.......p.Qs.l....susD.u.RY..hRSW..LEPhsusl....uhs..............Gh...s......su.....+p.Y...u...lKP-......sh...upps.-s......ts.ps.sh.....s-Yhhsssss.cpp....st.ts....scs.stsct.....pc-K...thD.................................................................................................... 0 4 13 43 +2992 PF01856 HP_OMP Helicobacter outer membrane protein Bateman A anon Pfam-B_395 (release 4.2) Family This family seems confined to Helicobacter. It is predicted to be an outer membrane protein based on its pattern of alternating hydrophobic amino acids similar to porins [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.15 0.71 -4.41 33 2354 2012-10-03 17:14:36 2003-04-07 12:59:11 12 2 57 0 129 2284 1 164.40 30 37.09 CHANGED ulplGYKpFFs..p....p+hhGhRYYGFaDYsauthspppht.s...............................shhTYGsGsDlLaN..h.hspppt.......................................shGhFuGlplAGsoWhsstt.thth..............................................psphpsotF........QhLhshGlRhshs...............t+pulElGlKl.Phltppah........................sshshps..pa+R.auhYlsYsasF .........................................hplGYcpFFs.........p...........p..+..hhGhRYYG.Fa-Y..s.......auhht...pp.th..t...............................................................................shhoYGs.GhDh..LhN.....h..hssp.pt..................................................shG..lFuG....ltluGs....o.ahssptsphts..........................................................................t.phpsstF........QhhhshGlRhpht...........................tcpshElGlKI.Phl.pp.phh......................................sshsh.ph...p.a.cRhashYhsYsasF.................................................................................. 0 39 93 129 +2993 PF02521 HP_OMP_2 Putative outer membrane protein Bashton M, Bateman A anon Pfam-B_1230 (release 5.4) Domain This family consists of putative outer membrane proteins from Helicobacter pylori (campylobacter pylori). 25.00 25.00 29.60 28.90 20.30 16.40 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.48 0.70 -5.91 9 419 2012-10-03 17:14:36 2003-04-07 12:59:11 9 1 55 0 36 429 0 455.30 38 98.14 CHANGED hppphhhhlhhhhhsusLpAF-Y....+lsGts-SFSKlGFNpppINsp+GIYPTpoFsTlsutLpls.ssLLsKthpt..HsLcsslGGhlGulsYDSTKh..........................h.sp.........st.hhsulsa.alGhatGahssp......................s.hst..p.st............+sRsYll.NAaLcYsYKD...hFthKuGRY....puph.-ahSGasQGaEhhh.....phpph+hhWFSSaGRAhAaspWlh-aYus+s.h.pst..hN.............hGhHshslhYp.hKslplpPFhYasPthhsAPGhpIsaDoNPsFpuhGFRupTshhsha.............Plas..h....hsshp......hss.hGcsGtoLhl+QRF-aNpaNFGhuhYpsFGNANuhIGhaGNP.....................lGh-hWsNolYss...ulsshhsAsAhThasasGGsa..+pFhWplhGRhTpus........RAsEtSlulsLuYphsc.plpssl+LpYYsshh+pGYp.hGhhh.....sPh...................ssshpuhhpDRSalMoslsapF ...................................................................h.....hhhh.hhhh.t.u....LpAF-a.......clsGtspsFSKlGFNpp.....INspKGIYPTpoFsolsuhlpls.ssLLsKth.ps...HtLpsslGGhlGulsYDoTKh...................................................hhsp...........s...ht.u.hhapahGhatGahsst.............................................s.tss...p.phst............psRsYllhsAaLp.YsYKD.........hFthKuGRY....posh..sahSGasQGF-hhh..........php.ch+hhWFSSa.GRuhAhs.pWlhsa.Yushs..h...pt...t..hN.............aGhHshslhYp..pK......tlplpPFhYFuPpsatAPGhpIsaDoN.PsFp.uhGaRspTphhsha....................................Plahshh..........hsshh...............hsshhGpsGtoLhl+QRFc.a.NpapauhuhYpsaGNANu.lGhaGsP.....................hGh-hasNolYss...shsshhsAsuhThashsGGha.......++FhWtlhuRhTtus........RAsEtululsLuYphs+.plphsl+LpYYsshh+pGYp...sGhh..........sPh...................tsshtushpDRSalMoslshpF....................................................................................................................................... 0 9 26 36 +2994 PF03328 HpcH_HpaI HpcH/HpaI aldolase/citrate lyase family Mifsud W, Bateman A anon Pfam-B_3076 (release 6.5) & Pfam-B_2811 (release 14.0) Family This family includes 2,4-dihydroxyhept-2-ene-1,7-dioic acid aldolase and 4-hydroxy-2-oxovalerate aldolase. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.16 0.70 -5.46 22 6388 2012-10-10 15:06:27 2003-04-07 12:59:11 9 23 2567 40 1697 6456 3815 211.30 23 76.53 CHANGED phhhalsusssthhchsshhGhDhlhlDlEcus...........hhltpsLpplph.tss...pslVRVssh-oshhpp......hlchGspslhlPhV-oup-stph.phhphs.tthtt.........................tsspsplhstIESupGlhNscEIA..usc........tltulhlGstDhssshGttpssstsElh.....hApsplltAu+AuGl.hhsslhsshssspsahpputhhhslGhDs+.hhps .........................................................................................................................................................................................p.hhahsh.sss.h.hh...ph.h...t...h..h...u......h...D...hl...ll.DhE.c.us...................................................p..tl...t....p....h.l..p.......p.h...........t......h...........t....s......h.......................ps..l..VR.l.s..s..h......s...s.......h......h.....t.....p..................................hl....c...h.............G.....s....p...........s....l......h...lPt...l..co...s...pc.s...pph....st....th.p..h..................t..th..................................................................................................................................................s...tp..h.t.l...l..s.......I.E.o.s...p...G...l.......t......s.........h...s...p....IA.....us..p......................t.ls...ul...h...lG...s....s...Dh.....s..........t..........s.........h.....G......h....t.....t........s.....s.....p....t...s..-..l.....................................hAh....pp....l....h....t.A...u....+.A..s......G......l.....s..h...s..h.l..h.........s...........s.............p...s.......t...p.......t........h...h...p...t.u.t.p...h.h.t..lGhc.st.hhp......................................................................................... 1 422 997 1374 +2995 PF04982 HPP HPP family Bateman A anon Bateman A Family These proteins are integral membrane proteins with four transmembrane spanning helices. The most conserved region of the alignment is a motif HPP. The function of these proteins is uncertain but they may be transporters. 21.10 21.10 21.10 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.55 0.71 -4.27 127 972 2012-10-02 19:04:43 2003-04-07 12:59:11 8 10 814 0 435 951 937 121.40 34 44.35 CHANGED hllAshGASAVLlFusPsSPLAQPhslluGplloAllG....ls...shphh.............ss.sh..........hus...uLAVulAIshMhhs+slHPPuGAsALlslluus.......GatallhPV.hluullLlhlAllhNs.ls..t.R.pY..Pt ..........................hllAshGASAV.LlF.u.sss.S.P..LA.QPhs..llsGplluAllG....ls...shphh....................ss..sh...........hss....ulAluhuIshMhhh+slHPPuGAsALls..l..luss..................uat.a.l.l.h.PV..hluu.llllshA.llhss.l....t...+..pYP................................. 0 128 267 361 +2996 PF01288 HPPK 7,8-dihydro-6-hydroxymethylpterin-pyrophosphokinase (HPPK) Finn RD, Bateman A anon Prosite Domain \N 21.50 21.50 22.70 22.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.69 0.71 -4.40 115 4673 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 4045 60 1118 3316 2284 125.90 37 60.63 CHANGED aluLGSN....l.GcptpplppAlptLpph.thplhphSslYcotPh.G....hssQs..t.FlNsVlplcT.sLsPtpL.LphlppIE.pch.GRtR.pt....+a..u......PRslDlDlLha...ssh..............h...l.....ps.......s.p............LplPHPchtcRsFVLhPLs-l ......................................................aluLGSN...........lus.....h...pplppAlp..........tL.......s.p.............h....................t...........h.p.....l.....lthSslYcTtPh...G................hp.s.Qs......s.....FlNss..l..t..l.........c.........T.......s..........L.......s....Pt.p.......L.Lp.t.....hppIE.pp.h.GR.....hR...pt.......+W...G.................PR.T.LDLDIlha.....ssp.......................h.....l.........ps....................s.c............................................LplPHs.c.hppRsFVLhPLh-l............................................ 0 368 712 952 +2997 PF02603 Hpr_kinase_N Hpr_kinase; HPr Serine kinase N terminus Bashton M, Bateman A, Moxon SJ anon COGs Family This family represents the N-terminal region of Hpr Serine/threonine kinase PtsK. This kinase is the sensor in a multicomponent phospho-relay system in control of carbon catabolic repression in bacteria [1]. This kinase in unusual in that it recognises the tertiary structure of its target and is a member of a novel family unrelated to any previously described protein phosphorylating enzymes [1]. X-ray analysis of the full-length crystalline enzyme from Staphylococcus xylosus at a resolution of 1.95 A shows the enzyme to consist of two clearly separated domains that are assembled in a hexameric structure resembling a three-bladed propeller. The blades are formed by two N-terminal domains each, and the compact central hub assembles the C-terminal kinase domains [2]. 24.00 24.00 24.00 24.00 23.90 23.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.16 0.71 -4.45 84 1842 2012-10-03 03:17:47 2003-04-07 12:59:11 11 6 1787 8 328 1032 93 127.40 32 40.73 CHANGED tp......lplpcLlcc....h..pLclls..Gccslppp.IssuDIsRPGLpLsGaFsaassc...RlQl.lGpsEhoalp.phspc.p+tphhcchhs.hcsPslIlo+sL..psPp-llcsAcctslPlLpoph.sToclhuplopaLc ........................................tlplccLl-p.......l..pL-.lls...Gc.c.s.lpct..Is..s.u..Dl..s..R..P..G..LEhsGYFsaass-...RlQl.lGpsEhoahp......p.hssc..pRh...phhcchhp..-sPslIloRuL............psPcEllcu.Acc...pslPlLpoph..sTop.LhucLosYLc............ 0 115 218 275 +2998 PF01627 Hpt Hpt domain Bateman A anon Pfam-B_971 (release 4.1) Family The histidine-containing phosphotransfer (HPt) domain is a novel protein module with an active histidine residue that mediates phosphotransfer reactions in the two-component signaling systems. A multistep phosphorelay involving the HPt domain has been suggested for these signaling pathways. The crystal structure of the HPt domain of the anaerobic sensor kinase ArcB has been determined [1]. The domain consists of six alpha helices containing a four-helix bundle-folding. The pattern of sequence similarity of the HPt domains of ArcB and components in other signaling systems can be interpreted in light of the three-dimensional structure and supports the conclusion that the HPt domains have a common structural motif both in prokaryotes and eukaryotes. In S. cerevisiae ypd1p this domain has been shown to contain a binding surface for Ssk1p (response regulator receiver domain containing protein Pfam:PF00072) [2]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.91 0.72 -3.98 241 11727 2009-01-15 18:05:59 2003-04-07 12:59:11 18 665 2711 46 3296 9899 750 93.10 19 13.09 CHANGED pllphFhpc.stchlpplppsl...p...............ttshpplhctsHpLKGuuuhhG.hpplsphspplEphhcptp..ptp.............h.thhptlpphlpplpst ............................................................................hlphFhpp..st.ch....l.ppl.pp.tl....p...........................tpshp..s.lhctsHplKGu.uu.hl.......G.hp.p.ltpls....ppl.E..p.h.h...pptp...psp..............................t.l.pthstlpp.lt.....t..................................................................... 0 1088 2075 2778 +2999 PF01628 HrcA HrcA protein C terminal domain Bateman A anon Pfam-B_1133 (release 4.1) Family HrcA is found to negatively regulate the transcription of heat shock genes [1,2]. HrcA contains an amino terminal helix-turn-helix domain, however this corresponds to the carboxy terminal domain. 19.90 19.90 20.00 19.90 19.30 19.80 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.04 0.70 -4.77 165 2791 2009-09-11 06:48:32 2003-04-07 12:59:11 16 9 2743 3 636 1950 812 218.70 24 63.95 CHANGED l-cllppuuclLSpLTphsullhsPp.hppspl+plclVsls.sp.plLsllVscsGpVcspllpls.....................p..sl.sssc....LpphsshlNspLt......Ghslsclppplhp.t.lttthpp..htphhcphlpthtps.h..ttt.......tp..pclhlsGpsNllpts..Ea.ps..........hpcl+pllphl...Epppt..lhpLl..........tps.p.psp...s.....................lplpIGsEs..........hpshpssSllousYphs..s.....p...........................sl.GslullGPTRMsYs.+sl ..........................h-cllppuuplLSplTphsu.l.lhsPp...p.psplcplcll.Ls..sp...pslslllsssGpVcsphlpls....................p.sh..sppp....Lpch.sshlNp+lt......Ghs..l..t..-lpp.pltp...p....l...........s.p.h.h.pp..................ht..p..hhps.llshh...tph.h.t......p.........pplhluGpsNLlphs...ph..ps..........hp.plpplhphl...Ep.ppt..lhpLl...........pph..t...psp.......s............................lpltIGsEs........t...tsl..p..shS.llousYths...t...p.........................................................................hh.GslullGP.T.R.MsYsphl....................................................................... 0 229 443 550 +3000 PF04877 Hairpins HrpZ; HrpZ Kerrison ND anon Pfam-B_6141 (release 7.6) Family HrpZ from the plant pathogen Pseudomonas syringae binds to lipid bilayers and forms a cation-conducting pore in vivo. This pore-forming activity may allow nutrient release or delivery of virulence factors during bacterial colonisation of host plants [1]. The family of hairpinN proteins, Harpin, has been merged into this family. HrpN is a virulence determinant which elicits lesion formation in Arabidopsis and tobacco and triggers systemic resistance in Arabidopsis [2]. 20.50 20.50 31.60 20.50 19.80 20.10 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.01 0.70 -5.01 11 233 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 84 0 19 217 1 221.70 27 93.65 CHANGED MQh.............L...uhssu.LtoshtshsLstsps.s.u.uoSocpLppVIsQLAptLT...................tsGphssoSPLG.....................................KhLsKuhusD........................................................GctGGhhcsVpsALcpLIpEKLGDNFGAuus..............ss.GhtGh.tsuuuuuQpDLhspVLsGLuK....usLsDLLT.ppusGToF.ScDDMPMLccIAQFMDDsPupFspP............DuGSWupEL.K.EDNhLsGsETAQFRuALDlIGptLupptsttssh.....sGGLG.................................G....DsGp.h.....hGhpu..uGtGhGoss.s..p............phspLltGL..pGLtusLpssGtsGssLpsSAApsushllshhL...pNpusA .............................................................................................................................................................................................................................................................................................s..ss..psl.ssLptlhtptLup..ut.s....................................susss..pLhstl..h.ulup....p.Lssl.s.p.....p....sss.ppF.s.....ppDhshhpclupFMDp.Pt.FspP............DstS.....WhptL.c.-Dsh.....hs.tphttFppAh.shItpthu......................................................................................................................................................................................st...................................... 0 0 5 11 +3001 PF02218 HS1_rep Repeat in HS1/Cortactin Bateman A anon Pfam-B_5631 (Release 5.2) Repeat The function of this repeat is unknown. Seven copies are found in cortactin Swiss:Q14247 and four copies are found in HS1 Swiss:P14317. The repeats are always found amino terminal to an SH3 domain Pfam:PF00018. 21.00 21.00 21.50 22.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.57 0.72 -4.07 18 1059 2009-01-15 18:05:59 2003-04-07 12:59:11 10 23 81 0 511 857 0 34.90 59 36.89 CHANGED GFGGKFGVQpDRhDKSAVGaDYpuKl-KHsSQ+DYu+ .........GFGGKaGVQpDRhDKS...Al.Ga-YptchpKHpS.Qp...Dhs................. 0 125 164 293 +3002 PF00447 HSF_DNA-bind HSF-type DNA-binding Finn RD anon Prosite Family \N 21.00 21.00 21.00 21.00 20.90 20.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.76 0.72 -3.86 114 1989 2012-10-04 14:01:12 2003-04-07 12:59:11 12 26 337 17 1305 1973 19 99.60 36 21.34 CHANGED sFlpKlaphl-..........Ds............shsplIpWs..ps.G...soFllhs.spcFupplL..Pp...aFKHsNauSF........VRQLNhYGF+Klpsspt......................................................aEFpppt..F.p+Gpc.........cLLpp..I+R+pssp ........................................................FlpKh..a..p.hlp............................-s.....................p.h...s..pll.pWs...ps..G.............soFl...V.hc........p.pF.sc..........p...l.L.........P.c.............a.F......K............H......s....N....auSF...................lR.......QLNh....Y....G.F+Klstsptt.........................................................hEFtp.t...F.h+G.p...........pLLpp..I+R+t..s........................................................................................................ 1 519 792 1058 +3003 PF00011 HSP20 Hsp20/alpha crystallin family Sonnhammer ELL anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.33 0.72 -4.16 37 9881 2012-10-02 21:54:05 2003-04-07 12:59:11 16 34 4016 178 3585 8031 3065 97.10 23 59.55 CHANGED -h+cscstathphDlsGhp.t-.-lcVplc-sp..lllpupcc......Eccsschhh.tht.tpFhR+apLP-..ss-h-plpush.psGVLslolPKhtst.......ps+sIplp ..........................................................................................pscspat.l.p.h-l..s...G..h...p..p...........-..-.lc..l..p...l.....p..s......s..h........Ls....l..p...up+p..............................................tp.p...p..p......p...p...h..h..h..p...t...h..............t..p..F.p.....Rp..a.p.L.....s.t.................s.....s.....-..t...p..p..l.p.Ash...ps...G..l.Lplsl..s+ttsp........psppItl............................................ 0 1108 2207 2972 +3004 PF01430 HSP33 Hsp33 protein Bateman A anon [1] Family Hsp33 is a molecular chaperone, distinguished from all other known chaperones by its mode of functional regulation. Its activity is redox regulated. Hsp33 is a cytoplasmically localised protein with highly reactive cysteines that respond quickly to changes in the redox environment. Oxidising conditions like H2O2 cause disulfide bonds to form in Hsp33, a process that leads to the activation of its chaperone function [1]. 21.40 21.40 22.00 21.90 19.70 19.50 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.46 0.70 -5.28 181 3184 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 3110 8 630 2039 673 275.00 33 92.77 CHANGED pDtlh+.hhhpsts.....lRuhhlphs.pshpcshpp.Hs.......hsssssthLGcslsussLlu.usLK..hc.splolplps...sGPlshlls-s......sssuplRGhs.......t.spsph.s............sltthl...Gp..GhLsl.oh.D......uh....tp.YpGhVsL...tus......plu-slptYatpSEQlPotlhLush....t.........httAuGhllQhLPss.............................cc.-shsclpphhp.....shhsptLl..shsscpl.Lp+La..p-t........lclh-s....pslpFpCsCS..+ERhts.sLtslupcElpchlpEp.Gpl-lsCcFCsppYpF..st ....................................................................................................................Dhlh+hlhpsts.....lRuhhlpso.colppshp.p.Hs..............hs..sss..pssLGchLsAssLLu.As.LK...hc.sclTlp.lpG..........sGPlshllssu.........s.sptplRGhs........splsh...s.t.................sltshl......Gs...Ghl.sl....sh..D..........u........t-.YpGhVsL..us.........plu-slphYatpSEQlPoplhLssh......pps.........................htsAGGhllQlhPus.............................pc-shs+lpphhp........shho.p..Ll.......shs..s-pl.LhcLat..--p....................lpl.h-t.......................pslpFp..CsCS....+E....Rhts.ALh.oL.scc...Elpshl...pE.....-.tt..h-hpCcFCsscYpFs..................................................................................................................................................................... 0 197 392 522 +3005 PF00012 HSP70 Hsp70 protein Bateman A, Sonnhammer ELL anon Prosite Family Hsp70 chaperones help to fold many proteins. Hsp70 assisted folding involves repeated cycles of substrate binding and release. Hsp70 activity is ATP dependent. Hsp70 proteins are made up of two regions: the amino terminus is the ATPase domain and the carboxyl terminus is the substrate binding region. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 602 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.87 0.70 -6.61 33 23002 2012-10-02 23:34:14 2003-04-07 12:59:11 15 120 7604 156 6301 21235 9750 370.00 30 85.27 CHANGED llGIDLGTTNSCVAVh-uGcsclIsNsEGsRTTPSlVAFscs.ERLVGpsAKpQAlsNPcNTlausKRlIGRcas-..lpp-hp.hPa+lsps.sscshlpsp...Gc..paoPpcISAhlLpKhK-s.AEuYLGcs..VscAVITVPAYFNDuQRQATKDAGpIAGLpVLRIINEPTAAALAYGLDKps.......-cpIlVaDLGGGTFDVSlL-ls..cG..VFEVhuTsGDTHLGG-DFDp+llcallpEFK+c.pGl.DLppDphALQRL+-AAEKAKh-LSS.tpTplsLPalohsstG..shclshsloRA+FEcLssDLlcRThpPscpAL+DAtls.ts-ID-VlLVGGSTRlPtVQchVcchFG.K-PsKuVNPDEAVAhGAAlQuGVLoG......-V+DlLLLDVoPLSLGIEThGGV.hT+LIpRNTTIPTKKS.QlFSTAsDNQs.uVpIpVaQGERphspDN....+hLGpFcLsGIPPAPRG.lPQIEV..TFDIDANGIlsVoAcDK.uTGKcppIoIpssuG.Loc-EI-+..................................MVc-AEpaApcDcpp+EplEs+NpuEshsass-pplc-h.....t-KlssscK....pplcsslptLcpsh....ttt-......h-chcschccLpphs.tlspphYp ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.l........s....h..l....h......t.t............s......p...t.................h............t......t...................l.........t...............t.........s..........V.........l.........sh.P.......s.............a........s.......s........t.......p.......+.....p....t........h......h.......p.........A.......u.......t.....h.......u..........G.........h............p......s....................p.........l........l........p...E.....P....s.........A..A............u...h...........s.....a......t....hp...p.t................................t.t...l...h..l..hDh.................G..G...G...Th.D.....h...o.....l...l...........p.....h...............t...............t.......s........................h.......p.........l....h......s.......s.....s.........t...s......t.....................l..G.......G.............p..........s.h....D..................h...h.....h.p..............h.h....h......t........t...........h............................................................................................................t..............................................p...........................................t......p..........l....h......p.............t..s......c.....t...s...K..............h..................t...........L.....S....s..............................t........................t......h.....................h..................................h.............................................................................................................................h.......................t.................l...s..............c..............t..........p................h............p.....t...h.......................................h.....................h...............t.....t..............................h.............h.......p............s.......l......t.....p..........u............t...h.....p...............t..............t.........l..................t.......t...l....hh.s.GG...s.phPh....l....p..l..t.......t........h.................t.......................................................t.......................p....s.............t....s...h..s...G...s..s........h...............s..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 2234 3809 5213 +3006 PF00183 HSP90 Hsp90 protein Finn RD anon Prosite Family \N 24.40 24.40 24.50 24.50 24.10 24.30 hmmbuild -o /dev/null HMM SEED 531 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.95 0.70 -6.11 11 5703 2009-01-15 18:05:59 2003-04-07 12:59:11 13 30 3641 350 1566 4977 1328 358.50 32 66.61 CHANGED EYLEEc+lKElVKKHSEFIuYPIpLhVpKEhEKEVs--Et-ppc-Etc-c-..............t.chEEs--EcEc-pKK.KTKKVKEsspEhE.LNKpKPIWpRsPc-lTpEEYuuFYKSLoNDWED...LAVKHFSVEGQLEF+AlLFVP+RAPFDLFEsp..KKKNNIKLYVRRVFIhD-s--LIPEaLuFlKGVVDSEDLPLNlSREhLQQNKILKVIRKNLVKKsLEhFpElA......................EcKEsacKFYcpFSKNlKLGIHEDusNRs+LAcLLRapSo+SsD-hsSLcDYVoRMK-pQKsIYYITGESKcpVEsSPFLEpL+cKsaEVlahTDPlDEYshQQLKEFEGKKLsslTKEGLcL-EsE-EKcpcEchKccaEsLsKhhKc.lLsDKVEKVVVSsRLssSPCslVTSpaGWSANMERIMKAQALR...DoShsuYMuuKKshEINP+HPIIcEL+c+.scsDpsDKoVKDLshLLaETALLoSGFsL--PpsaAsRIaRhl+LGLsIDED-ps..p-..spsthpstssscpssssScMEEVD ...........................................................................................................................................................................................................................................................................................................................p.......c......p.............lNptpsl......Wt.Rs.p-..lopE..-Y.tpFY.................+p.l.s.p..-..a.....p-........L.h.hh.H..p..sE...G..p..hcapulLalPp.p.s.P.a..D..h...a.p...p............t..p..p.s....lK.L...Y..V......p.....RV.FIhDs.s..c.p.hhPpa.LpFl+.GllDSpDLPLNlSREhL.Q.......p......s......p......hl.c......hl+ps....ls.K..+s.l.p.hh...p...c.l..s..............................................................................c..c.t..-.p.....Y.p..pFa.ppFuh...........slK...G..h...h...p....D........t.....s.....+...p.....p.ltc.LLp..at.......o.................o.................p.................p..............t.......t..........p..................h..............s..........o........Lp..-.......Y..l...................p..R.....................M...........p..........................-............s..Q.....p......p............IY..Y.l.s..........u..p.sh......tt...............h.ppSPalEhh+p+..G..hE..Vlhhs-...l.D.Eahh.p...l...p..-.a.........-...........s....K.......t.....h...s......l.......s........c........t....t.......h....p......l.......t......p........t.......-........c.......p.....p......p........t...................c.......p......................p..........t...........t............h...........p..........s.l.h....p.h.h...Kp....h...L..t..c...c.........V.p...c....V........h..l........o...p.R..L.........s..s.....o...P..shl.s.s..s..t....t..h.sh.p.Mt+.hht..t...............................................h................p.....hhElNsp...Hsl..l.p.t...........h...............................................................t.......t..........t............h...........t...p......h....s..l....lhp.u.hl...p.t.............t..p.s..t.....a.tth.p.h................................................................................................................................................................................................................................................................................................................................ 0 594 973 1277 +3007 PF04119 HSP9_HSP12 Heat shock protein 9/12 Wood V, Finn RD anon Pfam-B_14318 (release 7.3); Family These heat shock proteins (Hsp9 and Hsp12) are strongly expressed, an increase of 100 fold, upon entry into stationary phase in yeast [1,2]. 26.30 26.30 26.30 26.70 26.10 26.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.95 0.72 -3.89 24 130 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 97 2 87 129 0 58.00 47 59.56 CHANGED MSDsGRKsh..............o-KspEtlTPDSpKSThEKsKEsVTstsD+hAussps-spKShsQpssDp ....MSDsGRKsF..............osKApEplpPDSpKSshE+sKEplTDssD+lAushQP-spKSssQpstDp....... 0 23 49 74 +3008 PF04213 HtaA Htaa; Htaa Yeats C anon Yeats C Family This domain is found in HtaA, a secreted protein implicated in iron acquisition and transport [1]. 21.80 21.80 21.80 21.80 21.40 21.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.04 0.71 -4.01 25 599 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 181 0 91 421 21 153.20 31 45.43 CHANGED sslssGshs..WGlKpSFpsYlpGs.lApGsaphss...Gus...tssspFsF.shssushD...ssspssslpasGsV+as..GHp.............hhLDlslussclshsGssGpLhs-lsSpphp.sstsshut......lslAslshsuhsh...ssssshss.ssstLTtpGupsFusaYsAGpsLDPlohshsh ...................................t..ltpGohsWGl+pSFpsYlpus......s.....pG.p..hphsu................Gus........hssstFsF..shs...su.sh..D....ssps.sslpasG.s.V+as.Ghp....................shLDlolu-Ppls.lp..s.u..s..uplhhs.lp..Sps.hs...u.p.hshuc..................lshushssusht........ssssshsu.ssspLTupGscs.F..u....G.....FY..psG.p..s..h..Dslohslt.h............................................. 0 28 66 91 +3009 PF00126 HTH_1 Bacterial regulatory helix-turn-helix protein, lysR family Sonnhammer ELL anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.67 0.72 -4.23 1536 86700 2012-10-04 14:01:12 2003-04-07 12:59:11 22 49 4604 63 20714 62728 5685 59.70 33 19.98 CHANGED lppLchFhslscps.ShopAAc.pLsl...opsulSpplppLEp.pLGsp....Lhp..R....tt...p...t..l.pLTttGp ........................ppLchFhs.l.sc.pt....ohopA...A..c...pL..tl..........op.s...ulSpplpp.L....Ep....pL....G.s..p........Lhp......R...............ss...+....p......l.pLTptGp........................ 0 3926 9597 15357 +3010 PF04967 HTH_10 HTH DNA binding domain Bateman A anon COG3413 Domain \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.14 0.72 -4.38 30 808 2012-10-04 14:01:12 2003-04-07 12:59:11 7 73 81 0 433 919 135 52.70 37 16.19 CHANGED LT-+phplLptAachGYF-hPRcsslp-LAcplGIScsohpp+LR+AppKlhp ................LTc+QpcsLptAhctGYa-hP.Rc.sohp-lAcpLslSpsohsp+LR+App+ll.t....... 0 47 296 420 +3011 PF01022 HTH_5 HTH_ArsR_family; Bacterial regulatory protein, arsR family Bateman A anon Pfam-B_139 (release 3.0) Domain Members of this family contains a DNA binding 'helix-turn-helix' motif. This family includes other proteins which are not included in the Prosite definition. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -7.91 0.72 -4.29 42 9132 2012-10-04 14:01:12 2003-04-07 12:59:11 15 65 3608 33 2609 16970 1782 47.00 32 34.44 CHANGED cssRlcIlhhLtc...sp.hsls-lsptlphopuslScHLchLcctGlVpp ..........................................c.sRlp.I.l.t.hLtp.........s-...h...s....V.......s.......-.....L.....s......p......t......l......s......h......o.....p.....s..s.l.SpH.Lph.L.+psuLlp............... 0 903 1744 2237 +3012 PF01418 HTH_6 Helix-turn-helix domain, rpiR family Bateman A anon Pfam-B_3373 (release 2.1) Domain This domain contains a helix-turn-helix motif [1]. The best characterised member of this family is Swiss:P39266. RpiR is a regulator of the expression of rpiB gene. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.14 0.72 -4.25 10 9722 2012-10-04 14:01:12 2003-04-07 12:59:11 12 9 2886 5 1519 5638 383 75.30 25 26.84 CHANGED hslLppIpsthpcLscoE+KlA-aILssscpshphSlsplAptuuVS-uollRFs+pLGapGFs-hKlpLup-lusp ................................h..plpp..t..h.p.pL..o..ps.E..cc.....lA.c....a.ll.p.ss.p.p...s....h.pho..lp...cLApps.s....l.S..puolsR.Fs+.+.l..Gap.Gap-....h.K..htLtpth...t..................... 1 359 791 1163 +3013 PF02796 HTH_7 Helix-turn-helix domain of resolvase Finn RD, Griffiths-Jones SR anon Prosite Domain \N 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.01 0.72 -4.10 14 3816 2012-10-04 14:01:12 2003-04-07 12:59:11 10 39 1793 24 542 3351 302 44.90 28 23.91 CHANGED GR.pphscp..p......hsplhpLhppG.hshpplActhsloRsTlYRhhsst ...........................GR......p..hspp.....p...........hpp.lh...c.h..h....p....s...G...h.....s..hp..plAcphsl.oR..s.TlYRhhtt.t........ 0 153 335 445 +3015 PF00165 HTH_AraC HTH_2; Bacterial regulatory helix-turn-helix proteins, AraC family Sonnhammer ELL, Griffiths-Jones SR, Studholme DJ, Schleif R anon Prosite Domain In the absence of arabinose, the N-terminal arm of AraC binds to the DNA binding domain (Pfam:PF00165) and helps to hold the two DNA binding domains in a relative orientation that favours DNA looping. In the presence of arabinose, the arms bind over the arabinose on the dimerisation domain, thus freeing the DNA-binding domains. The freed DNA-binding domains are then able to assume a conformation suitable for binding to the adjacent DNA sites that are utilised when AraC activates transcription, and hence AraC ceases looping the DNA when arabinose is added [1-2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.91 0.72 -4.00 75 482 2012-10-04 14:01:12 2003-04-07 12:59:11 18 29 353 4 136 54828 2557 37.30 26 22.38 CHANGED lp...psp.hsltclApphuh.Stshhp+hF+pthGhoPppahp ...............................plpplAppsGh..S..t.pa.F....p+hF.Kchh.GhoPtpah.................. 0 48 84 114 +3016 PF04204 HTS Homoserine O-succinyltransferase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.84 0.70 -5.69 77 2128 2012-10-03 00:28:14 2003-04-07 12:59:11 11 3 2054 3 324 1318 572 290.40 50 95.62 CHANGED PI+IPcsLPAhclLppENIFVMs-sRAtpQDIRPL+IhILNLMPpK..lpTEsQlLRLLuNT..PLQl-lsLl+hsoHpuKNTst-HLppFYcsF--l..+sc+FDGhIITGAPlEplsFE-VsYWcELpcIh-WocspVpSTLalCWGAQAuLYaaYGlsKh.LspKhaGVFpHp..shp.ps.LhRGFDDpFhsPHSRaT-lpcc-l...pptssLplLucS--sGshlltscct.RplFl..hGHsEYDspTLppEYpRDhtpGh.s.....hplPtNY...........................aPsDDPsppPhspWRSHApLLFuNWLNYhVYQpTPY .....................................................PI+lsccLPAhphLcpENlFV.MsppR.Atp....Q....-IRPL+llILNLMPpK..lpTEsQhLRL..LuNo....PLQV-lphL+hco+pS+NTssEHLpsFYpsF.....--l..ccppFD.....G.hIlTGA.PlEtlp.FE...-VsYWp.Elpplh-WuKsH..Vp..S..TLalCWuAQAuLhhhYG.l...sKhshscKl.GVYp...H..c...h..l..c..s...p...s...h..L......h......RGF.D....D......s..Fhu.P...HSR.as-hst.ppI...pp.hs-.L.cILuco-.-.u.GshLhso+Dt..RplFl..hGHsEYDspTLspE....YhRDlptGl..s......sc..l..PhNY............................FspsDP...pp.pP.pssWRSHusLLFsNWLNYhVYQ.TPY................................................... 0 87 185 247 +3018 PF04955 HupE_UreJ HupE / UreJ protein Bateman A, Eberhardt R anon COG2370 Family This family of proteins are hydrogenase / urease accessory proteins. The alignment contains many conserved histidines that are likely to be involved in nickel binding. The members usually have five membrane-spanning regions. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.75 0.71 -11.52 0.71 -4.98 80 494 2012-10-02 18:22:22 2003-04-07 12:59:11 7 4 398 0 191 653 600 177.30 34 89.46 CHANGED lsh..h...hhsssAhAHsupst...su...GhhuGhsHPlhGhDHLLAMlAVGlWu.uhhus.....+uh.........ahl....PhuFlusMll.GusLulsGls.lPhVEssIusSVlllG..LllA......hst+lPh....ssuhullulFAlFHGaAHGsElsususshh......YshGFslATuhLHhsGlulGhhltpttt........lhRhsGuslAssG.hhL ...................................h..lhhhsssAhAHsuts.........ts.......Ghhu....GhhH..Pl.hGhDHLLA..MlAVGlhu..uthtt.....ptt................hhl....Ph..s..F....l...s...s...M..ll...G...u...h...l...G...h...t...G...l...s...l.....P.....h...sEssI.u.sSl.l.llG....l.h.lu...................hs..h....c..h.sh.................hh.u...h....sls.....ul.FA..lFHGaAHG...s...E....h.....ss....susshh............YssGFshuou.h.L.as.s.Ghu.l.uhhl..tphts.................hhRhsGu.shAhhGhh................................................................. 1 43 101 146 +3019 PF01455 HupF_HypC HupF/HypC family Bateman A anon Prodom_3112 (release 99.1) Family \N 19.60 19.60 20.50 19.90 19.50 19.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.26 0.72 -3.98 170 2243 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 1433 6 492 1128 72 69.50 44 76.37 CHANGED MCLulPucll....pl........p..sphAh...l........-hs.G..lp+clslsLls.-......splGDa.VLlHlG..aAl.pclDccEApco.Lphhp ............................MClGlPupll....sl.........-..tphAp.....V........-.ls..G....lpR..-VslsLlsp.......stlGpW.VLVHVG..FAh.uhIDEpEAccTLcsL........... 1 163 329 421 +3020 PF04809 HupH_C HupH hydrogenase expression protein, C-terminal conserved region Mifsud W anon Pfam-B_3701 (release 7.6) Family This family represents a C-terminal conserved region found in these bacterial proteins necessary for hydrogenase synthesis. Their precise function is unknown [1]. 21.20 21.20 29.80 21.30 20.00 21.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.44 0.71 -4.35 36 1273 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 541 2 95 481 8 110.40 37 72.84 CHANGED GlhNAPslLsEIt-+sts...htsustsHVINLTLLPhostDhsaLsphLGpGslsl.LSRGYGsCRlsuTuhhpVWpVpaaNShDslIL-TlElsslPcVAhAAsEDlpDStcRLpEllc.slp ...........................................hsLhpELhtplpp...........ss...scslsls.LPlstsD...RtFLspLLGcGpls..l...pp........t..s..hu....ESc...IppThhsGlW+VRphss.h..cp.hLhD.plEluslP.p.sh........A.u.tD.................h................... 0 30 56 72 +3022 PF01750 HycI Hydrogenase maturation protease Bashton M, Bateman A anon Pfam-B_548 (release 4.2) Domain The family consists of hydrogenase maturation proteases. In E. coli HypI the hydrogenase maturation protease is involved in processing of HypE the large subunit of hydrogenases 3, by cleavage of its C-terminal [1]. 20.60 20.60 20.80 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.51 0.71 -4.60 27 2632 2012-10-01 20:58:29 2003-04-07 12:59:11 13 8 1344 10 545 1351 63 127.40 28 73.29 CHANGED l+llEtLpctht..t......pVpllDuGTtuh.Ll.hlpp....hc+llllDAlch.GhcPGplchlcs--lsp..hthc.hShHphshtpsLphhp.htph.tclllhthpshph--...hshsLoptVcpslspAlchllpt ........................................chsctL..ttpht.st..........tlcllDGGTtu.h.p.L.l.s.hlpp......sc+LlIlDAl....ch..G...h..sP.Gsl.......hh.......h.c.......s....-..........c..l.....ss.....hh.s....t......t......h.......S.....s...HphshsclLs..hh..c..h..p.....G..p.h..P.tcl..hl.lGlp..P.p............l..c............thsL.....ot.lcst.ltsshpth...t........................... 0 173 357 462 +3023 PF01968 Hydantoinase_A Hydantoinase; Hydantoinase/oxoprolinase Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the enzymes hydantoinase and oxoprolinase EC:3.5.2.9. Both reactions involve the hydrolysis of 5-membered rings via hydrolysis of their internal imide bonds [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.70 0.70 -5.31 61 2437 2012-10-02 23:34:14 2003-04-07 12:59:11 13 36 1379 6 1233 2598 1387 281.50 29 34.74 CHANGED RssTsllNAhLtPlhppalptlpstlcptsht........................................splhhhpusGGlhshcpupppPlcslhSGPAuGllGAuh.....hsGhc........shIshDhGGToTDluhltpGpschsppstl............uGhhsph.hlclpolusGGGSll.hhtpstth+.lGPpSuGspPus...............................................................hshshGGp.hTlTDAslhLGcls.p.hh.....tt......phs.thh.t.hut.h............................................pplApthlplsstphscul+hl.................................................................................s.ppshsspshslsshGGuu.shasstlA..ctluhppllh..hs.uVhsAlGhhlAclptch .............................................................................................................................................................RssTsllsAhltPhhppa...lpph....p...p...t...lp...p....t..t.h.t..............................................sp.lh..hhp.us.GG.lhshpp....hp.t.h..cslhS.GPA....u..Gl.h.Gushh...............thsu...h.p.........................................ss.lshDhGG.......ToTDl.u.h..l....s.......tG......p............c....h.s...ps.th..............uGhthp..hsh.lcl.polusGGGSll........h.......p...........s......u.....t........hp..VGPcSAGupPGP..................................................................................................................ssat.....t...G.G.............h.Tl..T..D.As......l...h.L..G.t.ltsp....hs.........ttt.....ls.ths.t..t....th....tt.lut...lsht........................................spps..At.uhlcluspphspsl+tl...............................................................................................................s.t.pG..hcs.p...p..hs.Lh.sh.G.GuG.....s.ass.tlA......ctlG.....hp.....p.l.....ll..hs....ul.huAhGhulAclt...h...................................................................................................................................................... 0 388 797 1050 +3024 PF02538 Hydantoinase_B Hydantoinase B/oxoprolinase Mian N, Bateman A anon COGs Family This family includes N-methylhydaintoinase B which converts hydantoin to N-carbamyl-amino acids, and 5-oxoprolinase (Swiss:P97608) EC:3.5.2.9 which catalyses the formation of L-glutamate from 5-oxo-L-proline. These enzymes are part of the oxoprolinase family and are related to Pfam:PF01968. 21.40 21.40 21.90 22.50 21.00 21.30 hmmbuild -o /dev/null HMM SEED 527 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.60 0.70 -6.28 6 1619 2009-01-15 18:05:59 2003-04-07 12:59:11 9 25 1011 0 830 1658 1539 505.30 33 58.32 CHANGED hDPIhLE.lFpshatsIAEpMGhhLcpTAhSsNIKERLDFSCAlFDusGsLVAsAsHIPVHLGSMupuVKthl+.thcs..plcsGDVhlsNDPYhGGTHLPDITllsPlFpsst..llFaVAuRuHHADlGGlTPGSMPssSpplapEGhlIss.phLscsGchp-phhc.plhlcss..a.....sRsPcsslu.DL+AQIAANpcGIcpltchI-caGhcsVttahs+lps.uEcul++tIsclss..Gphca.......chDsstpIslplplchcptphhlDFoGTSsQscsshNAspulssuulhYsh+sLlssDIPlNcGCh+PLpIllPcGollsP..cpPAuVsuGNVETS......QtIsDslauAhs......V.AsSpGoMNNlTaGsts............asYYETIuGGoGAu......sGhsGssAVaopMTNohhTDhEllEpcYPlLlpEaulRpsSGGtGKaRGGsG..lVRphcFhpshololLusRR+luPaGltGGpsGusGcN.hlh+ssGsclclsupsolalp....PGDR..............................llIcTPGGGGYG....hp....hcD ....................................DPlhLp.lhp.pthhuIAEpM.uhsLp+TuhSssI+EchDaSsAla...s.s......c.G...p...l..l.A.p.A..s.p.l..PlHl.G.........o.M...s...t...sl.c..t.hlc...th.t.s..........slc.sGDVhlsNcPa..t...G...G.....o....HLPDlolls..PlF.............t.........p.........s....p..............l...............l...........h....alA.u+uHauD.................lGGh..sPG.Shs.spuppla.pEGlh.lss.h+Ls...c....pG...p.h..p..-p.hlp.......hl....hpss...........................R....sspp.shu.DLpAQ.lAA..s..pp....Gh..ppltcllpcaGh-.sV.psh......hptl............pcpuEpssR.ph.l....p..p..l....s....c...............G.p....h....p.h...s.....................s.th.D..s..s.....s.....lp..l...pl..s..l......s.......h......p.....s.............s....p...........hhhDFsGT...us.p.s...s..shN.....u....s......h......u......s.....s..h......u....u.....l...h......a.....s.....h..+......sll.....s........s......-......lP..h.....N.p.............G.sh.pPl.plhl.P..c..Gol.....L...s.P........phPA...A..V..su...u.....s....s...s..S........p...t..ls..cslh.tAhs....................................s..A.u..u...p.....G..s...h.......N....s.....h...s...h.G..stp.............................a..t.h....a...E..o.....l...u...G.G...s....GAs..............sshcGt..s...u...l...p..s...p..h..o.....N.o...p...h..T.s.sElhEt.caP.ll..l.cp.ap.lR.t.s..S.G.GsGcaRGGs..G....sh.R....p..l....c..h..h....p..s....h....p..h..s.l..l..u....p..R...p..h..h..s.....P..a..GltG.....Gts....G.......t.......h.......Gts....h.....l..h.....c.......t..............s..................s..........p.............h.......................p....l..........s...u..p...s..s...h...t..l.p........................s.GDh..............................................l.hl.T.P.GGGGaG.....t.........t.................................................................................. 0 241 519 705 +3025 PF01185 Hydrophobin Fungal hydrophobin Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 21.50 21.40 21.20 21.30 hmmbuild --amino -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -11.33 0.72 -3.53 69 519 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 203 0 178 496 0 81.40 33 62.51 CHANGED C.ss..uslpCCNpspsuss.........hhsslLu.LLGhsht........shsuhlGh.sCoPlsl...l.ul.uss.......u.Cssps..VCCpss...shs.GLls.lG...Cs...Pls .......................................C..ssuplpCCNpsppuss........s.......hsshLs.L.l.G.h...hs........shpu.h.lGh..pCoslsl......l.sh.uhs....................spC...ppp...sCCpss.....s.s...G.Lls.lG....C.sl............................. 0 98 127 156 +3026 PF01155 HypA Hydrogenase expression/synthesis hypA family Finn RD, Bateman A, Yeats C anon Prosite Family Four conserved cysteines lie either side of the least conserved region. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.63 0.71 -4.39 169 2158 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 1453 7 454 1106 89 112.50 38 96.54 CHANGED MHEhSlspullchlpc.pApppt.....sp.+VppVpLclGpLosVps-sLcFsF-shscsT.................................lscsAcLpIpphsspshChsCupphplpphh...................................CPpCGuhpl.pl.hsGcEhplcplEl- .........................MHEloLspshlEllppp.Appp.s.......sp..+VssVaLcIGshSsVEssulpFsF-.ls....s.c..G.T.................................lAc.Gs.cLclp.tsApsaChsCsphspltppp....tp................................CPpCpu.p.p.l.pl.ssG.-pLpl+pIEl........................ 0 154 306 388 +3028 PF01924 HypD Hydrogenase formation hypA family Enright A, Ouzounis C, Bateman A anon Enright A Family HypD is involved in hydrogenase formation. It contains many possible metal binding residues, which may bind to nickel. Transposon Tn5 insertions into hypD resulted in R. leguminosarum mutants that lacked any hydrogenase activity in symbiosis with peas [1]. 25.00 25.00 27.80 27.80 22.60 22.30 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.09 0.70 -5.71 129 1488 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1407 2 399 1039 96 349.00 49 96.11 CHANGED Dsph..spt.LhcpI.pphsp........c.shplMEVCGGHTHoIh+aGlcpLLP....pslchlHGPGCPVCVhPhtcIDpAlpLA.....tpssVIlsTaGDMhRVPGSpt...............................S.LhpA+A....cGA.DVRhVYSPhDAlclAccNP-+pVVFFAlGFETTsPsoAhslhpAt...........ttslcNFslhssHlllPPAlcslLp..ss.......p.splcGFluPGHVSslhGscsYc.lsccaphPlVVuGFEPlDlLpulhhllcQlppG.....cscl-N.pYsRsVpt-GNhtApphlscVF-lp-p.cWRGlGhIPpSGLpL+ccaupaDAcp+Fs.....ls.....thpss-spu...CpCGclL+Ght+Pp-CtlFGpsCTPpsPlGuCMVSoEGACAAYYpYs..ph ......................p....shpLhcclccpss...s.....+PlpIMEVCGGHTHuI.hKaGLcpLLP....c.s.l-hIHGPGCPVCVhPhuRIDsslclA.....pps-VIhsTFGDhhRVP.Gppu..............................................SLlpA+A......cGA.DVRlVYSPhDALclApcNPs+...cVVFFulGFETThPsTAlsLppAc...........tcslpNFhhaspHlpl.PslculLc....ps..........s..stIDuFLuPGHVShllGscsYphlAp.............ca....pp...........PlVVuGFEPLDlLpullMLlcQhhtu......cscVEN.QYpRlVscpGNhhAQphls-VFpl.sss.EWRGLGhItsSGlcLps-YppFDAEt+Fc......ss...............sp.p.ss-.s.tCcCG-VLpGhsKPppCsLFGpsCsPpsPhGuhMVSSEG.ACAAaYpYppt.................................... 0 137 272 345 +3029 PF02494 HYR HYR domain Bateman A anon [1] Domain This domain is known as the HYR (Hyalin Repeat) domain, after the protein hyalin that is composed exclusively of this repeat. This domain probably corresponds to a new superfamily in the immunoglobulin fold. The function of this domain is uncertain it may be involved in cell adhesion [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.74 0.72 -3.97 19 1816 2012-10-03 16:25:20 2003-04-07 12:59:11 11 329 171 0 1519 1518 549 80.40 30 16.53 CHANGED VDTsPPsV.sCsss.lhpsVElGsssspVhasEPTAsDsuG..pssLloRospPGD.FPVG.posVTYsapDsuGNpA.sCsFTlsV .........................................DspsPsl..sC.....P......ss...hphs..s......s........u.....p...s...s.....s.....s....V....s...a...s...t...P.....s....A...o.DNss..............s.s.s..l...s....s.........s....t..s....s.........G...s...........h..Fs.....l....G....s......s.....s...V..s..You.s.D.s.uG.Nsu..sCs..FpVsV.................. 0 1239 1292 1437 +3031 PF01608 I_LWEQ I/LWEQ domain McCann R, Bateman A anon [1] Family I/LWEQ domains bind to actin. It has been shown that the I/LWEQ domains from mouse talin Swiss:P26039 and yeast Sla2p Swiss:P33338 interact with F-actin [1]. I/LWEQ domains can be placed into four major groups based on sequence similarity: (1) Metazoan talin; (2) Dictyostelium TalA/TalB Swiss:P54633 and SLA110; (3) metazoan Hip1p Swiss:O00291; and (4) yeast Sla2p Swiss:P33338. The domain has four conserved blocks, the name of the domain is derived from the initial conserved amino acid of each of the four blocks [1]. 36.30 36.30 36.70 36.30 35.80 36.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.93 0.71 -4.27 22 586 2009-01-15 18:05:59 2003-04-07 12:59:11 12 37 239 11 325 542 0 147.60 43 10.97 CHANGED FY+KNsRWTEGLISAAKAVAtATshLlcsAssllp....ups....o.E.pLIVASpEVAASTAQLVAASRVKAshsSpspppLppAS+sVopAscsLVppspshhpptcp.....pcslDhspLo.tphKstEMEQQVcIL+LEppLptAR++LuplRKpp.Ypps ............................Ytcsu..pWoEGLISAAKuVAtusssLl...EuAss.llp....Gcu..............s.E.cLIsuu+pVAASTAQLVsAo+....VKA..sh.sS.c..s.h...c+Lp...............t............A...........u+uVs.........pAscsLVp.uspsutpp..ppc.................p-s..hc..hs.p...hshhph+spEM-sQs.......c....lLclEppLpptRp+LuplR+p.p.Yp..h............................ 0 97 141 228 +3032 PF04568 IATP Mitochondrial ATPase inhibitor, IATP Kerrison ND anon DOMO:DM04419; Family ATP synthase inhibitor prevents the enzyme from switching to ATP hydrolysis during collapse of the electrochemical gradient, for example during oxygen deprivation [1] ATP synthase inhibitor forms a one to one complex with the F1 ATPase, possibly by binding at the alpha-beta interface. It is thought to inhibit ATP synthesis by preventing the release of ATP [2]. The minimum inhibitory region for bovine inhibitor (Swiss:P01096) is from residues 39 to 72 [2]. The inhibitor has two oligomeric states, dimer (the active state) and tetramer. At low pH , the inhibitor forms a dimer via antiparallel coiled coil interactions between the C terminal regions of two monomers.\ At high pH, the inhibitor forms tetramers and higher oligomers by coiled coil interactions involving the N terminus and inhibitory region, thus preventing the inhibitory activity [1]. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.46 0.72 -3.98 34 329 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 226 8 221 316 0 83.00 28 76.51 CHANGED M......hphsh.........hp.....h.h.h.hthth.t.stGss..GAs.+uGG......tuGsuFsKREtApE-hYh+p+E+EQLtpLKc+lpc....pccclccL-ccIc ......................................................................h.....t..Gph.......Gus.+....ssG...................t.uG......s.uFuK+EtAp.....E-hYh+p+-+E.pLpp.L...Kc+hcc.......pppplcphpptlp............................... 0 69 114 179 +3033 PF01749 IBB Importin beta binding domain Bashton M, Bateman A anon Pfam-B_544 (release 4.2) Family This family consists of the importin alpha (karyopherin alpha), importin beta (karyopherin beta) binding domain. The domain mediates formation of the importin alpha beta complex; required for classical NLS import of proteins into the nucleus, through the nuclear pore complex and across the nuclear envelope. Also in the alignment is the NLS of importin alpha which overlaps with the IBB domain [4]. 23.80 23.80 23.80 24.00 23.70 23.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.55 0.72 -3.82 18 914 2012-10-11 20:00:59 2003-04-07 12:59:11 15 16 301 41 568 813 4 92.30 32 18.60 CHANGED pssscsphRhppYKspu.hcs-EhRRRRcEstVElRKsKREEpLhK+Rplsh.tppt.p...psphsss.t...ssplppsh.h..pulhScD.ppQ.Lpusst ............................t....hsphRhppaKspu.hcs.........-EhRRR.RpE.sV...ELRK.sKR-E.pLhKRRNl..s......h.....p..p.......ss.s.p............tsp.pssh.t............pph...s....p.........h..............psl..S.ss.t.Qltusp.t....................................................... 0 157 244 397 +3034 PF03617 IBV_3A IBV 3A protein Finn RD anon Pfam-B_3183 (release 7.0) Family The gene product of gene 3 from Avian infectious bronchitis virus. Currently, the function of this protein remains unknown. 25.00 25.00 89.10 89.00 18.60 17.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.99 0.72 -4.22 6 81 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 15 0 0 79 0 56.80 82 99.74 CHANGED MIQoPTSFLIlLILLWhKLVLSCF+ECllALQQLIQVLLQIlNsNLQSRLlLWHSLD MIQoPTSFLIVLILLWCKLVLSCF+ECVlALpQLIQVLLQIINSNLQSRLLLWHSLD.... 0 0 0 0 +3035 PF03622 IBV_3B IBV 3B protein Finn RD anon Pfam-B_3190 (release 7.0) Family Product of ORF 3B from Avian infectious bronchitis virus (IBV). Currently, the function of this protein remains unknown [1]. 25.00 25.00 30.60 29.90 21.20 18.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.21 0.72 -3.99 7 88 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 17 0 0 82 0 63.20 73 99.30 CHANGED MLsFEtlIETGEpVlQQISFsLQHISSVLsTplFDPFEsCYYRGGsaWElESA--hSGDDEahE ..............MLDFEsIIETG-QlIQQISFsLQHISSVLsTElFDPFEsCYYRGGsaWElES...AD-hSGDDEalE....... 0 0 0 0 +3036 PF03620 IBV_3C IBV 3C protein Finn RD anon Pfam-B_3232 (release 7.0) Family Product of ORF 3C from Avian infectious bronchitis virus (IBV). Currently, the function of this protein remains unknown. 25.00 25.00 25.50 58.60 20.40 20.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.05 0.72 -4.09 3 126 2009-09-10 22:14:47 2003-04-07 12:59:11 8 1 17 0 0 128 0 92.10 87 87.33 CHANGED MhNlLuKSLEENGSFLTuVYVFlGFVALYLLGRALQAFVQAADACpLFWYTWVlLPGAKGTAFVYNaTYGK+LNNPELEoVIVNEFPRNGWNN ...hhNlLNKSLEENGSFLTAlYlFVGFlALYLLGRALQAFVQAADACCLFW.YTWVVVPGAKGTAFVYKHTYG+.K.LNNPELEuVIVNEFPKNGWNN... 0 0 0 0 +3037 PF04629 ICA69 Islet cell autoantigen ICA69, C-terminal domain Mifsud W anon Pfam-B_5314 (release 7.5) Domain This family includes a 69 kD protein which has been identified as an islet cell autoantigen in type I diabetes mellitus [1]. Its precise function is unknown. 31.40 31.40 31.50 31.50 31.00 31.30 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.73 0.70 -4.40 6 145 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 51 0 70 148 0 173.60 37 43.48 CHANGED PYEFTTLKSLQDPscKLspc...cK-cpppsspsusp.scpLISLE-t+..cEouo..s-cthu.lSuh-ch.hcsos.GAhD-LLDhKs-E.hhht..lPtDPLEPEsuDKDEhpLLN-ILSuSSL-suEhopEWAAlFGDP..pLtsPcP.ShGss-.-.+ssoSSGFLPSQLLDQsM+DLtuSlpsWsupptop.....sps.Psss.......QpPAK.......uupDLoAWasLFADLDPLSNPDAVGKTDKEHELLNA .............................................................................................................................................................................................p.....t...p..t.......p....................t.hl.............t...........................................................................h......hss..c..-sh-K.-h.hLsplhss.uu...tpu-aopEhtssFG..ss........tp.t.s...shu.t..................tp.sotFLPSQLh......D..s..hp..htuu.h............................................................................p.pPtp..................u.spDhoAWFsLFADLDPLSNPDAl.G+o.Dc..ELLNA...................... 0 10 16 34 +3038 PF03921 ICAM_N ICAM_N-terminal; Intercellular adhesion molecule (ICAM), N-terminal domain Finn RD anon DOMO:DM01682; Family ICAMs normally functions to promote intercellular adhesion and signalling. However, The N-terminal domain of the receptor binds to the rhinovirus 'canyon' surrounding the icosahedral 5-fold axes, during the viral attachment process [1]. This family is a family that is part of the Ig superfamily and is therefore related to the family ig (Pfam:PF00047). 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.24 0.72 -4.51 5 218 2012-10-03 02:52:13 2003-04-07 12:59:11 9 28 36 23 89 253 0 89.10 36 19.09 CHANGED pcsFpVpVcPcKslLssGGSlpVNCSToCspP-hlGLETsLsKchL-p.Gpu.WKtFpLSNVocDSshhCahsCuGsQpSuSoNITVYpsPEp .........h.lplpPp.p.shV.h.GuSlhlNCS.T.s.Csp.P....p....h...h....G.LETp.LpK...p....ltp...Gsp...WttapL.s..slp..c..-.o.p.h.h.Cahs....C.u.s.p..Qpps.sssITVYphP-p......... 0 5 9 17 +3039 PF00818 Ice_nucleation Ice nucleation protein repeat Bateman A anon Pfam-B_2 (release 3.0) Repeat \N 20.80 20.80 31.80 20.80 15.40 20.70 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.76 0.72 -5.71 0.72 -4.50 110 1266 2009-01-15 18:05:59 2003-04-07 12:59:11 12 33 34 0 97 1330 0 15.80 74 44.84 CHANGED GYGSTQTAGhcSsLTA GYGSTQTA.utsSsLTA.. 0 35 35 35 +3041 PF00656 Peptidase_C14 ICE_p20; Caspase domain Bateman A anon Bateman A & Pfam-B_2524 (Release 8.0) Domain \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.80 0.70 -4.68 115 4033 2012-10-03 02:24:44 2003-04-07 12:59:11 17 280 992 455 1901 4284 706 230.40 15 47.36 CHANGED thALlIssspaspt..........sphpGstsDscslpphL.ppl.G.....apVps..........h.ps..................................................................................hotpphppt.lpphstptc.....ssD...shlhhhhuHG..tp........s..hsh.............Dsph..l...sl-t................lpshhpshps.sLtsKs+.lhllpACRusphspthttttstt............................................................................................................................................................................................................................tsshtphsstsDhlhhhuosssthShcs.................sspuShahpuLsphl......t.......hpltslhspVppplt................t.......................ttcQhPph.suhhpcphhh ..................................................................................................................................................................................hslll.s..p.ta..tt.....................tth......p...u..s.....t.D.s.p....tht..p...h..h...p..p..h....s...........a..psps..........h..ps.........................................................................................................ho.t.p.p.l.......hp......t...l..p..p...hhp.psp..............ssD...............shhhh..............h..........h..uH.G........tp................t.........t.h..................-sth......l..........sh-t...........................................lts......h.ps......h........s...t..........l....s..p...s....+...l............hh.l..pu....C...+.....u..s.s..h.....ph..h...h...t........................................................................................................................................................................................................................................................................................t....ht.t.h.....s.....s....s.h...l..h..h.h...u...s......h..s..s.....t...h.u.hcs.............................ttts...uh.ahptlhphl.....................................h..phh....ht.tth..............................................................h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 653 1049 1463 +3042 PF00463 ICL Isocitrate lyase family Finn RD anon Prosite Domain \N 19.50 19.50 19.50 19.50 19.40 19.30 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.93 0.70 -6.29 7 4174 2012-10-10 15:06:27 2003-04-07 12:59:11 16 13 1846 69 1148 4424 4281 225.10 28 93.61 CHANGED EVtElcKWWossRW+tTKRsYoAEDIss+RGsl+.lpYsSs.tAcKLa+lLcc+ptptssShTaGsLDPstVoQMAKYLDolYVSGWQsSSTASoSsEPuPDLADYPhsTVPNKVEHLahAQLFHDRKQpEERhphscpc..Rtph.s.lDYLRPIlADADTGHGGlTAlhKLTKhFIERGAAGIHIEDQusuTKKCGHMAGKVLVPlQEHINRLVAIRhpADIhGo-LLslARTDuEAATLITSoIDhRDH.FIlGsTNPsh..tsLsslMstA.htGtpGspLtslEs-WhtKAsLKhFpEAVlDtIptus...hsNKpthltca.ppsp.hpphst.EA+tlAcclhGp-IaFsW-hsRsREGaYRapGGspCulsRuhAaAPYADLIWMESphPDatQA+EFA-GV+utaPcQhLAYNLSPSFNWppAMssD-.QcTaIpRLucLGYsWQFITLAGLHTsALhscsFA+sYup.pGM+AYuppVQpP...EhcsGV-VVpHQKWSGAsYlDtlL+hlpGGVoSTAAMGtGVTEDQF ..........................................................................................t........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 314 663 947 +3043 PF03517 Voldacs ICln_channel; Regulator of volume decrease after cellular swelling Griffiths-Jones SR, Coggill P anon PRINTS Family ICln is a ubiquitously expressed multi-functional protein that plays a critical role in regulating volume decrease in cells after cellular swelling. In plants, ICln induces Cl- currents [1,4,5], thus regulating Cl- homoeostasis in eukaryotes [2,3]. Structurally, the fold resembles a pleckstrin homology fold, on of whose roles is to recruit and tether their host protein to the cell membrane; and although the surface charges of the ICln fold are not equivalent to those of the PH domain, ICln can be phosphorylated in vitro and the PH-nature of the domain may be the part involving it in the transposition from cytosol to cell membrane during cytotonic swelling [1]. 22.10 22.10 22.20 23.30 21.90 20.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -11.14 0.71 -4.32 115 359 2012-10-04 00:02:25 2003-04-07 12:59:11 8 8 280 1 243 349 2 133.60 25 52.92 CHANGED GsLalssp.plhWh.s.............t...............................spGhulsYsuIoLHAlp+.........................shsp.slYhQhp.....................................................................ttssspsssppsppsssElpllPsss................................st......lcslapAhspCus.LpPDss.......t-pptp...t..tptptp.shhhps ....................................ssLalspp....plhWhs.........................................................sshGhul.YssIoLHAls+..t......................shsp.slYh.ls...............................................................................................................t........ttptpppssc-ptpthsEhpllPsst....................................sp...........lcshapAhspCps.LHPDPp...............sp-s.-cc....t...............ttth....................................................................................... 0 73 122 187 +3044 PF04140 ICMT Isoprenylcysteine carboxyl methyltransferase (ICMT) family Wood V, Finn RD, Bateman A anon Pfam-B_15304 (release 7.3) & Pfam-B_5114 (Release 8.0) Family The isoprenylcysteine o-methyltransferase (EC:2.1.1.100) family carry out carboxyl methylation of cleaved eukaryotic proteins that terminate in a CaaX motif. In Saccharomyces cerevisiae this methylation is carried out by Ste14p, an integral endoplasmic reticulum membrane protein. Ste14p is the founding member of the isoprenylcysteine carboxyl methyltransferase (ICMT) family, whose members share significant sequence homology [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.26 0.72 -3.79 12 1448 2012-10-01 22:51:20 2003-04-07 12:59:11 9 8 1272 1 481 2287 1176 94.30 31 44.00 CHANGED hlsh..hlhuphhRhhshhohGphasp+lhh..hssHplVpsGlYpalRHPsYhs.hhhplus.sLlsss..huhlhhshsshhha..RIcpEEpsLhp .....................................hsh..hl..hlhu....h.hhh.h..sh..p.p.L..G.p..h..W..o...h.+........l..hl..........hss..H...p.lV..s...pGlY+..hh+HPsYh..h...............l.....l.........p....lh...s....h.h.....L...L...s..p...u.....h.......h........s....s.....l...l....h.....h.s.....h.....h....s..h...h.La..hR..I..c.p.EEphL..t....................................................... 1 166 278 396 +3045 PF03971 IDH Monomeric isocitrate dehydrogenase Bateman A anon COG2838 Family NADP(+)-dependent isocitrate dehydrogenase (ICD) is an important enzyme of the intermediary metabolism, as it controls the carbon flux within the citric acid cycle and supplies the cell with 2-oxoglutarate EC:1.1.1.42 and NADPH for biosynthetic purposes [2]. 25.00 25.00 27.90 27.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -13.26 0.70 -6.17 5 1265 2012-10-02 21:08:39 2003-04-07 12:59:11 9 3 1189 11 297 1040 630 682.90 61 98.77 CHANGED TIIYTlTDEAPALATYSLLPIV+AFAcsAGIcVETSDISLAGRILAsFPDaLTE-QRVsDsLAELGELspoPDANIIKLPNISASVPQLKAAIKELQ-KGYAVPDYP-DPpTDEEKAVKtRYDRIKGSAVNPVLREGNSDRRAPtAVKNYARKaPHSMGAWSpsSKSHVAaMcuGDFFuSE+ShTlucAssV+IEFluK-GsVcVLKcpVALp-GEIIDosVMSKKALtsFaEcQlpDAKupGVLFSLHlKATMMKVSDPIIFGHAV+VFYK-VF-KaG-LF-pLGVNVNNGlGDLYAKIcSLPAuQRsEIcADIpAVYA+pP-LAMVDSDKGITNLHVPSDVIVDASMPAMIRsSGQMWGsDGKtKDTKAVIPDSoYAGVYQslIDFCKpHGAFDPTTMGSVPNVGLMAQKAEEYGSHDKTFElsADGVVRVl.DAsGcVLhpHsVEAGDIWRMCQTKDAPI+DWVKLAVTRARlSsTPAVFWLDPsRAHDsELIKKVcpYLKDHDTsGLDI+IMSPlEAh+aSLsRL++GpDTISVTGNVLRDYLTDLFPILELGTSAKMLSIVPLMAGGGMFETGAGGSAPKHVQQllEENHLRWDSLGEFLALAASLEHLGlKTGNsKAKVLAKALDsATGKLLDNNKSPSRKlGELDNRGSHFYLAhYWAQALAAQs-DA-LpA+FAPLAKALTEcEAsIVAELNuVQGKPuDIGGYYAP-s-+sAsVMRPSsTFNAAL-u ......pIlYThTDEAPuLATYShLPIlcuFspsAGIsVcTpDISLAGRILAsFP-hLsE.....-Q+lsDsLAELGELsppP-ANIIKLPNISASlPQL+AAIKELQspGYslP-YP-.-PpoD....EE+pI+ARYsKlhGSAVNPVLREGNSDRRAPtuVKsYA+KpPHp.M.G....t.W.ot.s.SKoHVApMp.pGDFausEKShsl......s..p..usslcIEh..l....sp....c....G.s...p.pVLKpslsLpsGEllDuo.hMStcALppFhtcpltcAKpp.sVLFS.LHlKATMMKVSDPIlFGHAV+lFa+-lFsKauchhcplGlssNNGlu-lau+lp.s.L.Pssp.psEIpusl....pusa.s..p..pPsLAMVsSDKGITNLHVPSDVIVDASMPAMIRsuG+MWss-G+.pDTpAVIPDpoYAslYQssI-.C+ppG.AFDPsTMGoVPNVGLMAQKAEEYGSHDKTFpl.stsGslcll......s......t......s....GpVL..hpppVEtGDIWRhCQsKDAPIpDWVKLAVsRARhossP.......AlFWLDtpRAHDsplIpKVppYLt-....H.....D....TpGL...-IpIhuPh-AhpaoLcRl+cGpDTISVTGNVLRDY.LTDLFPILELGTSAKMLSIVPLMsGGGhFETGAGGSAPKHVpQhlcENHLRWDSLGEFLALusSLE....alu..pp..s......s..NsKAplLAcsLDpAsuchL-ssKSPuRKlGE.lDNRGSHFYLAhYWAptLAtQscDs-LtspFuslAptLspsEppIlsELs.ssQGpssDlG..GYYtsDst+sstsMRPSsThNshlp.s............ 0 77 185 259 +3046 PF01231 IDO Indoleamine 2,3-dioxygenase Finn RD, Bateman A anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.40 0.70 -6.05 39 516 2012-10-01 19:57:26 2003-04-07 12:59:11 13 11 214 4 368 501 538 358.10 28 79.96 CHANGED s.h.sLp-YtlS.cpGFLPtp.PLccLP.-.YYpsWEpls...psLPsL.............lts.+plRphV-.plPlLsssp.L....psct.EhRRAYhlLuFlsHuYlWus..spPpc.....................................hLP.sluhPhlclSccLtLPPlhTYAulsLWNa+......htss......shhsh-NL..sslpoFTGohDE.pWFaLVSVhlEtcuu.slphhlsulpAsp.ts-p..ttlspsLpplupslcclsplLpRMaEps-PplFYaplRPaLuG.KsMsshGLP.pGlhY.................-....uspsp.+p...YuGGSNAQSSLIQhhDllLGlcHpssu...............................................................pssFlpEMRpYMPssHRcFLpclpp...................................ssslRpYVhsppsst...tLptAYssClttLpsFRDpHIQlVoRYIlhsu+cst.tt.s.....................................ppcsssht.+GTGGTsLlPFLKQsRD-Ttcssh ..................................................................................httatls.ppGFls.................P..l..p..pLP.......apsWpplh....ppl..sth...............................l.s...tplRt...h.l.c....ph.....P.hL..s..s..pt...L...........pspt...thptA.ahh.....L.......ualspuY..l....at...t........ppstp..............................................................hLPtsluhPhhplup.............p.............Lt.l.P.P.hh...s.Y.A.s.hs.....L.hNap......h...hsss.....................t.h.phcNl.........phl.h..sFps....s.....sE...phFhLlslhhEttuushlphhhp..slp.......u.......hp....psst...............ttlhpsLppltpslpc.ls.th...hppM.........hc...t.......s.cPp..haYtplR.s.altG.......p..s....t...h.....P...pGlhY...............................-.......s.s.p...t..p.hp.....apGsSsAQSSllp..hhDhhLslp.c.ss.....................................................................................ttalp-hRpYMPssH+pFLptl.pp..........................................ss...lRp..hs....t..........ptsp..................tlt.sYstslttltthRstHhphspcYIlh.upp..........................................................................................sout.s....hl...h........h........................................................................................................ 1 115 201 288 +3047 PF02479 Herpes_IE68 IE68; Herpesvirus immediate early protein Mian N, Bateman A anon Pfam-B_2276 (release 5.4) Family This regulatory protein is expressed from an immediate early gene in the cell cycle of herpesvirus. The protein is known by various names including IE-68, US1, ICP22 and IR4. 18.70 18.70 18.70 20.30 17.90 18.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.59 0.71 -4.95 13 78 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 36 0 0 68 0 125.40 39 38.87 CHANGED +sEYGssssstphRs....s+usGuFCussWpPDlsRLspDlNplFRsIspuuhsssssscsLRRsLhDhYlhGhsptR.s.ssWEpLLQLoPsQ.otsLRsTLR-lspRsststc.lpsssplstphaGsECDVS .............................Pp.usssssthlR.....sRtstthsussWpPDlshlppslNpLFR.lhpsuhcspssuppLRRhlhDhYLMGYs+tRLs.psWppLLQlostp.uhpLRsTlR-lpuR.ststc.l..pt.ssl.sphaGsECDls....... 0 0 0 0 +3048 PF01008 IF-2B Initiation factor 2 subunit family Bateman A anon Pfam-B_1302 (release 3.0) Family This family includes initiation factor 2B alpha, beta and delta subunits from eukaryotes, initiation factor 2B subunits 1 and 2 from archaebacteria and some proteins of unknown function from prokaryotes. Initiation factor 2 binds to Met-tRNA, GTP and the small ribosomal subunit. Members of this family have also been characterised as 5-methylthioribose- 1-phosphate isomerases, an enzyme of the methionine salvage pathway. The crystal structure of Ypr118w, a non-essential, low-copy number gene product from Saccharomyces cerevisiae, reveals a dimeric protein with two domains and a putative active site cleft [2]. 22.40 22.40 22.80 22.70 21.80 22.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.63 0.70 -5.42 18 3047 2012-10-04 00:26:15 2003-04-07 12:59:11 12 22 1567 43 1662 2957 1606 282.00 28 77.40 CHANGED pl+GusslulsuhtsLhhhhpctp...hspsp-h.........hpplppthshLtsoRPouVsltsulchlhp...........tpshppscps.......................................llcpupphlp-.shpssptIuphutchI.........cDu-sILTas.sSus........sluVlpp.Ahppu.................+ch+VlssEoRP.thQG.+lsshpLhptGIsVTLlsDoulualMpc....lDpVlVGA-sIhpNGs.lhNKIGThplAllAKppslPFaVsA.phKFs.chthspplhhEtcsPpElhhhsssphh...................................................................slplhNPsFDlTPs-hIohlITEhGslsP ........................................................................................................................................lcGusshulsush.u....lh....h........h.....h....pp...h.t.............p.....t.t..h..................................hptlp...pth.p........hL...t..........s.........s......R......P.....o...u...l..s.....l.h......u.l.cp.hhp............................................t..t.....t...s....h.p..p..h...t...p.t.................................................................................................................................................................lh.ppup.p.h....h....p-....shts.scpI...u.p.h.u.h..p.h.l..................................p.s...s......t..s..lLTas..sous...................sh.u.s.lht..A...t..p.p.s....................................p...t..h...c.V.h.......s......s............E.....o......R.P....h................h.........Q.......G.....+...........h........o.A.................h.c..L.............t..........p..........t................G.................I..s..............s.TlI..s.D.s.s.s.uh..l.Mpp.................l....stV........lVG.ADpl.s.s.NGs.lsN......K......lGTht..lA.l..hA.........+...t........a.......s..............lPFhVsu..ss.phsh.p..h......s...s....p........p....l............l.......E.....+.s...s.p.E.l..h....p...h....t....s....t....p..h...s.s.t...................................................................................................................................................................s.hp.s.h..N.P....s.FDlTPschlo.u.llTEhGlh.............................................................. 0 560 982 1379 +3049 PF00707 IF3_C IF3; Translation initiation factor IF-3, C-terminal domain Bateman A, Finn RD anon Pfam-B_629 (release 2.1) Domain \N 20.90 20.90 21.10 21.90 20.80 20.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.86 0.72 -4.24 150 4680 2009-01-15 18:05:59 2003-04-07 12:59:11 17 5 4531 3 1156 2821 2168 87.80 44 49.92 CHANGED hlplKEl+hpPpI-cHDhpsKl+psp+FLcc.GcKVKlolpFR...GREhs+t-.lGtclLp+htpcl..p-luplEptP.+hEGR...pMhMhLu.P+p .......h.lplKEl+h..pPsIDcpDapsKl+ssh+FL.-c.......GD...K.....VKlol...R...F.R....GR.....E.h..s....Hp-.......lGhclLp.Rltc-l....p-.lA...h...V.EptP..Kh..E.G.R...pMhM.lLA..P+................................................ 2 397 751 978 +3050 PF01652 IF4E Eukaryotic initiation factor 4E Bateman A anon Pfam-B_1315 (release 4.1) Domain \N 21.00 21.00 21.50 21.20 20.70 20.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.19 0.71 -4.71 135 1579 2009-01-15 18:05:59 2003-04-07 12:59:11 13 11 412 47 834 1511 155 157.30 32 65.03 CHANGED PLpppWohWap........stssp.......................tpsapssL+.l..............ssFsoVEcFWulYspl..P.ocLshts-a...............alFKcGI.+P.hWEDstNppGG+Wh..hphp..........................+p..........................ph-chWhchlLuhIGEph..pts...................--lsGlV....ls.lR..t.ptpp...............lulW.s+ss.....................ssppshhpIscpl+..chL..plssp..........h....papsH ..........................................................PLpppWshWa..............tt..s.p....................................................spsap..ss.l.+...l...............................tsFs.o.VEpF...W....s.lYs.pl.....hs.....o..pL...sh....t...s..Da........................a.lFKc..G..I.c...P..hWE..DstNtp.GG+Wh..lphp............................................Kt.................................................thDphWhch....lLuhI.GEpF..pps...............................--l.CGsV.......l..s..l.R.....t....ptc...p........................lulW.s+ss.....................ps.pt....s.h.....h.pItcph+.chL..ph..p......h................................................................... 0 326 467 676 +3051 PF00932 LTD IF_C_term; IF_tail; Lamin Tail Domain Finn RD, Bateman A, Anantharaman V anon Anantharaman V Domain The lamin-tail domain (LTD), which has an immunoglobulin (Ig) fold, is found in Nuclear Lamins, Chlo1887 from Chloroflexus, and several bacterial proteins where it occurs with membrane associated hydrolases of the metallo-beta-lactamase,synaptojanin, and calcineurin-like phosphoesterase superfamilies [1]. 25.70 25.70 25.70 25.80 25.60 25.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.43 0.71 -4.01 51 1619 2009-09-10 16:30:14 2003-04-07 12:59:11 14 164 831 14 675 1585 960 119.30 18 18.47 CHANGED sssssssssssslhls-hstsusss..........calElhNsus.pslDLsGaplpcp.......ssphthhssshh........sGpslhlhtsss..........tt...thhhttt...ssasst...............s-slsLhsss..Gphlsthshsstss .......................................hs........stsplhIsE..h.......s.u.....t....................calclh..Nsu....s...psl....sL..u..G....ap..lppp....................sssh..p.hs.sshhhh..................suph.l.slhssss...........................tts.sphhhptp.......hshsss...................................ssth.h.Lhssp......G.p..l.s.ht......t.............................................................................................................. 0 248 401 556 +3052 PF00714 IFN-gamma Interferon gamma Bateman A anon Pfam-B_615 (release 2.1) Domain \N 20.30 20.30 20.60 26.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.75 0.71 -4.34 9 229 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 125 13 27 233 0 123.10 50 85.86 CHANGED lLusSGsYCQusah+ElEsLKpYFNASssDVu-GGsLFLDIL+NWKEESD+KIlQSQIVSFYFKLFEsLKDN.QsIQ+SM-sIKpDhhsKFFNuSpsKhDDFp+LhphsVsDLpVQRKAlsELI+VMs-LSPcSsLRKR ...........................h.c-h-pLKtaF.Nuups...DVucu.GsLFl-ILKNW+...E.-uD.+KIIQSQIVSFYhKlF-.N.h.K..Ds...QhIQ+ShcpIK...EDh...h...s..+....F...h.N.uu.ppKhcDF.p+LhplsVsDLplQRKAlsELhpVhp.cLS..P.tuphtK........................... 2 1 2 7 +3053 PF00047 ig Immunoglobulin domain Bateman A, Sonnhammer ELL anon Bateman A Domain Members of the immunoglobulin superfamily are found in hundreds of proteins of different functions. Examples include antibodies, the giant muscle kinase titin and receptor tyrosine kinases. Immunoglobulin-like domains may be involved in protein-protein and protein-ligand interactions. The Pfam alignments do not include the first and last strand of the immunoglobulin-like domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.98 0.72 -3.85 52 1594 2012-10-03 02:52:13 2003-04-07 12:59:11 20 231 155 48 526 42115 2 63.00 21 11.35 CHANGED GsssslsCps.....tss.tsphpWhppspthtpttpsth..tt.............................lplsssp.p.cuGsYtChs ..........................................................pslpLsCps...................t.s.....s....t....h.p......W....p......p.......p......s.....p.......s.....h....t......t......t......t....h....t...t....t....p...h....t..t.................................................................................Lh.l.s.s..s......s..............p..cuG.s.YpCh......................................................................................... 1 144 166 265 +3054 PF02395 Peptidase_S6 IGA1; Immunoglobulin A1 protease Bashton M, Bateman A anon Pfam-B_540 (release 5.2) Family This family consists of immunoglobulin A1 protease proteins. The immunoglobulin A1 protease cleaves immunoglobulin IgA and is found in pathogenic bacteria such as Neisseria gonorrhoeae [3]. Not all of the members of this family are IgA proteases Swiss:O32555 from E. coli O157:H7 cleaves human coagulation factor V [2] and Swiss:O88093 is a hemoglobin protease from E. coli EB1 [1]. 19.70 19.70 19.70 19.70 19.60 19.50 hmmbuild -o /dev/null HMM SEED 769 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.74 0.70 -13.53 0.70 -6.41 9 1016 2012-10-02 13:45:52 2003-04-07 12:59:11 11 15 370 10 37 880 2 454.70 25 57.43 CHANGED huhssh..uhAupsp.s.lsYQhaRDFAENKGhFpsGATNIplhsKpGphlG..hhstsPMhDFSsVsss.Gl.....ATLlssQYlVoVpHNu.GhsslsFGs.Gp.................spYphVc+NNhts..................DaphsRLsKaVTEstPsssosshssussYpsppRYsshhRlGuGpQaltp+s.............Gptsphsu.uYpahsuGostt..............uhtNuphhhus.ssstshp.hs........sLsshushGDSGSPLFsYDspcpKWVllGslpshssa.tts.t..slhpppFhsph.pcDpsu.lpt.s...sslshp....ssssGsGslT......pGSp......shchtsppss......................hNtGKsLhFpGt...GslsLpsslsQGAGGLhFcsshphpssss...shTWpGAGIslscGsoVsWpVpsspsDpLpKIGcGTLpVpGsG.NpGuLKsGDGpVILsQQADstsphQAFSpVsIsSGRuTVhLNsspQlsPs.slhaGFR..GG+LDLNGpsLTFc+IpssDpGAhIsNp.os+pSslTlss....ss.IstP............s.s...........h...t.h...hp....pp.phhhLctus.s......s...pss.h.h.G......pppshs+hp.t+htsas..haFstp.ssshsssl.hhhpGps..hhhh.sussNhcGslopppGsLhhoG+Ps.HA....sht.t...........pspslsppDW.NRsF+hcslplcsushhlS..psuslpusIpAo.NuslslG..ssp..hspscuc..TG.hshshctslosscslss.sh..hsGslsLsppus....pthstGltuhsus.plo..spuhashsss ...................................................................................................................................................................................................................................................................................................................................................................................................................t.pa....hp.....t.p...h...t.......................................................s..........................................tt....................t...........h......GDSGSshahast..tpW.h.u......................................................................................................................................................................................................tpsh.h.......t........t..l....h.pth...p.G..u..l.Ft...t...t..h....t...t..h.GuGl.httt..l.Wth........................t....................tD.LtKhG.GsL.lp.h.......t..p.stlphGpG.VlLttp........sFspl.hs.uGpshl.lst....tpth....................t.t........hh.t..GGhL-hsG.s..Fp..l.....s.ts.l.st..............h..............................................................................................................................................................................................................................................................................................p.-W....F.ht..h.h...t..............................................................................................................................................................................................s................................................................. 0 13 18 27 +3055 PF00219 IGFBP Insulin-like growth factor binding protein Finn RD anon Prosite Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.00 0.72 -3.42 54 1051 2009-09-11 00:39:06 2003-04-07 12:59:11 13 24 116 8 458 832 0 54.50 44 18.34 CHANGED Cs.....sC..p....C.P..............s.sPsCs.sVp...sGCGCChs..CA+p.......hG-sC......shpshC..spGLpC ......................Cs...sC..p.......C..P...................ss..s.sCs...sVp......-GCGC.Chs..CApp........G-s...C..........s...s.h....s...s....h.C..upGLpC.......... 0 48 94 234 +3056 PF01378 IgG_binding_B B domain Bateman A anon SCOP Domain This domain is found as a tandem repeat in Streptococcal cell surface proteins, such as the IgG binding protein G. 21.20 21.20 21.20 21.20 20.80 20.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.53 0.72 -4.16 4 50 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 18 74 4 119 0 53.60 73 27.86 CHANGED TYKLllpGpThpGETTTKAVDAATAEp.sF+QYANsNuVsG-WsYDsATKTFTVTE .......TYKLVlpGpT..hpGETTTKAVDAATAEK.sFKQYAN-NGV.DG-WoYDDATKTFTVTE... 0 0 0 2 +3057 PF00475 IGPD Imidazoleglycerol-phosphate dehydratase Finn RD anon Prosite Family \N 20.30 20.30 20.60 24.10 20.20 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.79 0.71 -4.34 146 3695 2012-10-03 01:04:38 2003-04-07 12:59:11 13 12 3613 24 1011 2611 2275 144.30 51 57.73 CHANGED TGlsFaDHMLsthu+HuthDLplpupGD...lclDs..HHTVEDsGI..sLGpAlpcALGDKpGIpRa.................GsuhlPMDEuLs............psulDlSGRPalsac..sph.p.tp.lGs....hsoEhlccFFcuhu.sutlTLH.lphhhGc......NsHHhhEAhFKAhu+AL+ .........................................TGVGFaDHMLctlApHGthcLplpscGD..........Lc...ID-.....HHTVE.DsGIsL...Gp.AlppA.LG....D...K.....+...GIpRa.................Gs.hh...lPMDEsLu..........................psslDlS.....GRPaLsac............spas.....p....p+...l.Gs..............hsTEhlccFFculuhsutl.TLH.l.c...sh..Gc......NsHHhlEu..lFKAhuRALR................. 0 330 666 867 +3058 PF00218 IGPS Indole-3-glycerol phosphate synthase Finn RD anon Prosite Domain \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.44 0.70 -5.33 20 5164 2012-10-03 05:58:16 2003-04-07 12:59:11 16 18 3907 23 1085 4373 2572 228.50 40 76.40 CHANGED Lp+Ihsp++tcVsst+pp.s...uclpt.h..t.sssspuFh-sLpp....tpsulIAElK+ASPSKGhIc.-hssschAcsYppuGAusISVLT-scaFpGuh-hLtplRpslslPsLp........KDFllDpYQIhEARhhGADslLLIlusLs-pp..hpcLhphApuLGM-sLVEVpstpElcRAL.tlGA+llGVNNRNL+oFcVDLssTpcLsshls.......t-.slLluESGItotpDlcphpcpGscuhLVGEuLM+ssDhcttl+cL ..........................................................................................................................LtpIhttpt..ltth...p.t...s.....t.h.t.h................................h...s......h..h..tsLpt..................tt...h...shIsEhK+AS...PS..c.....G........l....p...............p.....h....s....s......thAt.....Ypp.h....t........AsslSVLTDpcaFp...............GSh-........Lptl.p.....p.....t.....s.....s......h.....Pl.Lp......................KDF.IIDsaQIh.ARhhGADslLLhl..u..sL...........s...D.........c...p...........hp...cLhshA.c.s.LsM..s.VLlE.VpsppE...l-...R......Al...pL.......s.........s...c..........l....IG...I.N.....N...R....s....L.........+..T..h..p...s.....D........l..s.....p......T....h.....cL...t.....s.....hlP..........................cs...t...lllSESGIhotsplctl.t.th....ssuhLlGpulMpt.ts..ttthtt............................................................................ 1 350 707 929 +3059 PF05049 IIGP Interferon-inducible GTPase (IIGP) Moxon SJ anon Pfam-B_5519 (release 7.7) Family Interferon-inducible GTPase (IIGP) is thought to play a role in in intracellular defence. IIGP is predominantly associated with the Golgi apparatus and also localises to the endoplasmic reticulum and exerts a distinct role in IFN-induced intracellular membrane trafficking or processing [1]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.15 0.70 -5.65 6 355 2012-10-05 12:31:08 2003-04-07 12:59:11 8 11 60 8 235 584 54 294.00 31 78.44 CHANGED SsEllptIcpAlcEGpLpcllShIc-slpslspshlcIAVTG-SGsGhSSFINALRsIGHEE-sSAPTGVVcTThc+TsYpp.s+aPpVhlWDLPGlGuTs.os-oYLEEMpFupYDhFIIIuSppFSpNclcLApsIppMtK+FYhVhTKlDpDLosp......oFsc.clLQsIpcNhpssLQcstVpEPPlFLlSshcsspaDFPcLc-TLhKDLPshKpHshhhoL.sls-psIspKtpsLKp+IhL.EuL+uuhls........tssDhsNhccshKsYRphFGlDDtSLpplApchsh.ltchcsph+u.phashpc--pllcphhphhlptFhtlht......hh.sssshh+chhapphhFL-hVApDAKslLcKI ...................................................ht.thtp.tp....s.h.shlp..p.h..l......t.t..h..p...p......s.......l.pluVsG-oGsGKSShINA.l....R.G....l....s.p.-...-...c..s..u.....A..T..G......V......l.......E....T....T.........h...........c...........s.....s..Y..............a....P..............p.........h...P......s...V..tlW.DL..P.......G.........h................G...........o.........s.....s..........h..............s........c..........p........Y.......L.p..p..h..p..F........tp......Y.....D......h.........F...I..l...l..u...u...p.R..F.p....t.s..-...s.pLA.....ctI......p.....p...h....t...K..p..F...YaV...RoK.............lD....p..D.l...ts..........p.t.....t......p...........p.s.a..s......c.....p....p..h...Lp..p....IR....pp...shp.......pLp......p...t....tl..pps.lFLlSs.h....p....tta.DF..L.psl.p-Lsth+pph.h.....h.slsshs.thlppKtt.hppplhh..uhhssh.t...............c.t.h.t.hp.Yp..FGlDc.Slt.hupphth...tp.hp..hc.............................................................................................................................................................................................................................................................. 1 49 83 136 +3060 PF00340 IL1 interleukin-1; Interleukin-1 / 18 Finn RD anon Prosite Domain This family includes interleukin-1 and interleukin-18. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.59 0.71 -4.65 16 589 2012-10-02 19:42:32 2003-04-07 12:59:11 14 3 132 42 189 614 0 110.80 26 54.99 CHANGED hpspshpppshFsMshhpsp.psstlPVs..LuIcsppL.aLSC.pc.spssLphEts-.P+hhsspcp-hhFhFp+ststspsp...FESutaPsaFluTpp-pp..h.Lsph.ttps.lTDFph ...........................t........t.hh.h.....h.....p.........t..t....t..s...p..t.....h.PVs..L.ul..p...sp...p..h..hLSC...t..p..p..t...s..p.....P.sLpLc..cs....c...h.....c.........h.....s..p...p.cp..s.....t.......h..FhFh.+pp..suspsp....FESutaPuWFluTsp..ps..p........l.lsp..t...t...............t................................ 0 9 13 38 +3061 PF00726 IL10 Interleukin 10 Bateman A anon Pfam-B_885 (release 2.1) Domain \N 24.20 24.20 24.50 24.50 23.80 24.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.00 0.71 -4.96 3 341 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 126 14 83 316 0 146.60 36 93.10 CHANGED ALLpCLVLLAGVtsSRstYstsESuCTHhPsShPHMLRELRAAFS+VKTFFQpKDQLDNLLLT-SLL-DFKGYLGCQALSEMIQFYLEEVMPQAENHGPEIKEHVNSLGEKLKTLRLRLRRCHRFLPCENKSKAVEQVKNsFNKLQEKGVYKAMSEFDIFINYIEAYMTI ................................................................................................................................................hs...hLp-LRssFpcl+s.h.hQh..cD.p.hp.s..l..LL..pps.l..l.pphKuhh..G..CpshschlpFYL-c.V.h.Ppu....pp....p....s............s....c..l+..c+lsS...lupphpo.L+h..c.LR..pC...+...p....ahs.C...c...s...+..s...c...u...lp...p...l.+...p...sa.pcL.p.p....p.G.l....hKAhuEhDlhlsalE...................................... 0 4 8 25 +3062 PF03039 IL12 Interleukin-12 alpha subunit Mifsud W anon Pfam-B_2071 (release 6.4) Domain Interleukin 12 (IL-12) is a disulphide-bonded heterodimer consisting of a 35kDa alpha subunit (e.g. Swiss:P29459) and a 40kDa beta subunit (e.g. Swiss:P29460). It is involved in the stimulation and maintenance of Th1 cellular immune responses, including the normal host defence against various intracellular pathogens, such as Leishmania, Toxoplasma, measles virus and HIV. IL-12 also has an important role in pathological Th1 responses, such as in inflammatory bowel disease and multiple sclerosis. Suppression of IL-12 activity in such diseases may have therapeutic benefit. On the other hand, administration of recombinant IL-12 may have therapeutic benefit in conditions associated with pathological Th2 responses [1,2]. 25.00 25.00 25.60 27.60 23.60 24.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.44 0.70 -5.23 3 97 2012-10-02 01:28:15 2003-04-07 12:59:11 9 3 54 2 30 88 0 172.50 47 88.05 CHANGED MCP.uRsLLLLATLVLLN...HLSLARsLPsSTsGPuh.t+CLNpSQNLLKTVDssLQsA+EpLEaYSCTAEEIDHEDITKD+TSTVKACLPLELApNESCLAoRETShIT+GSCLTSGKTSaMTTLCLSSIYEDLKMYQlEFQAINAKLLMDPKpQIaLDKuMLsAIDELMQALNaNuETVPQKPS..lsEuDhYRTKhKLCILLHAFRIRAVTINRVMSYLNSS .....................................................................................lhlshLsLL........Ls.shsLPss...s.....h.sLphSpsLLpsssphL.pKA.+QtLchYsCTsEElDHEDIT+sposTlcACLP.ELhhNEs....C.Lsopch.S...hp...pGpCLsot..+.oSahhsLCLpSI.YEDLKhYpsEFpshstt.LhhpscpQlhL..DpshLssIp-LMQu..LNh.s..u...c..sl.sp.psu........hp.c.c.Y+s+hKLCllL+AFplRsVTIsRhhuYLsu........... 0 1 2 12 +3063 PF02372 IL15 Interleukin 15 Bateman A anon Pfam-B_2545 (release 5.4) Family Interleukin-15 (IL-15) is a cytokine that possesses a variety of biological functions, including stimulation and maintenance of cellular immune responses [1]. 28.50 28.50 28.60 30.10 28.40 28.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.04 0.71 -4.37 14 168 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 53 22 54 152 0 126.90 31 84.55 CHANGED VhIhuClSAulPpTEAs...Wp........sVIpDLcpIEplhpSlHlDsoLYT.poDs+.pCphpshpCFLLElpVIL+Ep..csssIccplcNllhhusssLso..ptssTtstCK.CEEhEcKNhsEFLQsFs+llQhFlp ...........................................hh.Ch.shh.tTpAp.....hp..............plIpcL..cplcshl..psh.ch-.........hLYT..so.Ds........c....p.CphouhpCFhhEl.Vllc.Es...ssspl..pcplpsllh.hspsL..s..s........t..tp.hs...p.s..sCp.pCEphE.cKshpEFLpphhpllQh................... 0 2 4 16 +3064 PF02394 IL1_propep Interleukin-1 propeptide Bateman A anon Pfam-B_1500 (release 5.2) Family The Interleukin-1 cytokines are translated as precursor proteins. The N terminal approx. 115 amino acids form a propeptide that is cleaved off to release the active interleukin-1. 21.30 21.30 21.50 22.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.67 0.72 -3.65 23 206 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 87 0 40 195 0 96.10 34 38.57 CHANGED MApVP-Lhp-hhshYS-.pp-hhaEhDt.shhppsF.DhshssLt....DpslpLphScpphsp..oF+psVsllVAs....-cL+Kh.lshsQsFpDDDLculhsslFEEEsI .........................MAhVP-h.p-.hsh....hS.......-.pp-.h...t.Dt..p..p.psh.D...hshssh.................cpslpLplScpptsp..oF+.....psVslVVAh....cKL+..+......hshspsFpD-DLpu.l.hsp.lFEEE.l............... 1 2 2 8 +3065 PF00715 IL2 Interleukin 2 Bateman A anon Pfam-B_709 (release 2.1) Domain \N 25.00 25.00 25.60 25.60 24.60 20.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.02 0.71 -4.47 7 151 2012-10-02 01:28:15 2003-04-07 12:59:11 12 2 76 47 19 164 0 124.30 65 94.53 CHANGED LSCIALTLsLlsNSAPTSSS..............s+pTQppLEpLLLDLQhLLptlpN.cNsKLoRMLTFKFYhPKp.ATELKHLQCL.EELKPLE-VLsLsQSKNhp.p...hK-hhSNIpVTVLcLKGSETpFpCEYDDETsTlVEFLN+WITFCQSIhS .......LSCIALoLsLlsNuAP.T.SSS...................TppTppplcpLLLDLQhLLptlpN.cN.KLoRMLTFKFYhPKp.ATELKHLQCL.EELKsLEEVLsLApSKNhphpc.......h+-.hsNIplhVLcLKGSETpFpCEYDD-TsshVEFLN+WITFCQSIhS.. 0 1 1 2 +3066 PF02059 IL3 Interleukin-3 Mian N, Bateman A anon IPR002183 Family \N 25.00 25.00 25.80 30.50 19.40 18.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.58 0.71 -4.11 5 41 2012-10-02 01:28:15 2003-04-07 12:59:11 10 1 25 2 11 36 0 113.60 47 79.56 CHANGED Au.oposol+soRTs.CSslhcEIls+Ls...lssps.ssLNuDDpshLpcsoLRRsNL-sFhppAs-ohssD.osIcSsLKcLpsCLPsATssSspcPIaIKDtDhsDFR+KL+FYLlpL .........As.sQshsl+Tohsp.CSshlcEIlscLpp.slP..s.ssLNs--psILhcssLRRPNLEAF..pAscohp..Nu.SuIcS....LKpL.PCLPhAT.ssPspcPIpI+DsDasDFR+KLcFYLcsL. 0 1 1 1 +3067 PF00727 IL4 Interleukin 4 Bateman A anon Pfam-B_833 (release 2.1) Domain \N 20.50 20.50 20.70 20.50 19.50 20.40 hmmbuild --amino -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.70 0.71 -4.23 8 198 2012-10-02 01:28:15 2003-04-07 12:59:11 13 2 92 24 28 186 0 84.40 48 78.13 CHANGED pch-hsL+EII+TLN.lTs+.cssChELsVsDVLuAsKNTTEKEhhCRAopVLRphYp+H...............pposlhphLptLDRNLsuLAst.oCoVNEuKpoT..LKDFLERLKoIM+cKYS ......phcIsLpEIIKTLNhLTsc....Ks...sChEL..sV.sDhhsss.K.ssscpEhhC+AuthLpph...p+....................tt..h...h.lttL.psh.uhsth..sCssptsp.......hpphL.tL............................... 0 1 2 3 +3068 PF02025 IL5 Interleukin 5 Bateman A anon PSI-BLAST P05113 Domain \N 21.90 21.90 22.20 25.50 20.50 21.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.47 0.71 -4.05 6 60 2012-10-02 01:28:15 2003-04-07 12:59:11 10 1 46 8 18 50 0 105.40 63 84.89 CHANGED AlEuPMstLVtETLsLLSTHRTLLIGstsLhIPsPpHKNHQLCIEElFQGIDTLKNQTsQG-AVc+LFpNLSLIKcYIDhQK+KCGtERhRVKQFLDYLQEFLGVINTEWThE ..........................hE.shstLVtETLsLLSoHRTLLIustsLRIPsPsHKNHQLCIEElFQGI-TLKNQTspGssV-+LFQNLSLIKcYI..Dt...Q...K+.KCGtER+RV+QFLDYLQpFLGVhNTEWhhE...... 0 1 1 2 +3069 PF00489 IL6 IL-6; Interleukin-6/G-CSF/MGF family Finn RD anon Prosite Domain \N 21.40 21.40 22.20 21.60 21.20 20.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.86 0.71 -4.63 10 256 2012-10-02 01:28:15 2003-04-07 12:59:11 12 2 99 27 74 227 0 135.50 31 71.80 CHANGED I++IlucISAL+KE...hCssaphCcssc-sLAENsLsLPKhtppDGCaQoGFNQ-sCLs+IpoGLhEYQsaLphLpsphcusp.sslcsLQhsspsLlphlpQchcs.t..ssssPsspssLtshhpupspah+csshhLIL+sLccFLphSLRAlR ..................................................h.tt............Ctt.t.C.sp..hlhtppL.slPph.p.tDu.C....ppuh..sp..psCLp+lpsG..Lh.aphhL..c..hlps.p.hts..tt....tp..l..csl.phsspsLhphlpp..chc....p......hh.sPs.hp.sshhsphps..ps.tW.+phsh.hlILpsLpsF...LphuhRsl+.......................... 0 3 6 22 +3070 PF01415 IL7 Interleukin 7/9 family Ponting CP, Schultz J, Bork P anon SMART Family IL-7 is a cytokine that acts as a growth factor for early lymphoid cells of both B- and T-cell lineages. IL-9 is a multi-functional cytokine that, although originally described as a T-cell growth factor, its function in T-cell response remains unclear. 21.00 21.00 21.00 21.30 20.40 20.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.74 0.71 -4.49 7 77 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 28 3 23 73 0 111.60 42 85.17 CHANGED sltG...+cspshtulL.ls..I-cL.cp..sscssCssN.sshhphpl.sDDspcssF....uc+LpQhhphs.pscFslhhp+VppuspsLhNsTs..................hpppKssppppttNsLsFLKpLLpphppshs+lL+G ................pltGpss.phtu.l.h.ls..IspL.pp..t.tssC..sN..shhph.l.sDsspcshF.....tctLpQhhphs.ptcasLhh.+Vppss.hLhNspst.......................hppppshppppphNshsFLKpLLpphppph.+hhht............ 0 2 2 2 +3071 PF00048 IL8 il8; Small cytokines (intecrine/chemokine), interleukin-8 like Eddy SR anon Overington enriched Domain Includes a number of secreted growth factors and interferons involved in mitogenic, chemotactic, and inflammatory activity. Structure contains two highly conserved disulfide bonds. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.30 0.72 -4.00 181 2117 2009-01-15 18:05:59 2003-04-07 12:59:11 15 8 161 279 731 2035 0 63.80 25 56.48 CHANGED hus......C.Chph..psphls.hphl...p.saphhssst.Csp.sulIhp.h+......p.s+plCssP....pppWVpch..l..chL .........................tt.........C.Chph....tpp.ls...hphl.......p..shp.hhssu...tCsp..tlIhp..hK.................p..scplChsP....ptpaVpchl.pt............................ 0 45 90 187 +3072 PF01787 Ilar_coat Ilarvirus coat protein Bashton M, Bateman A anon Pfam-B_1131 (release 4.2) Family This family consists of various coat proteins from the ilarviruses part of the Bromoviridae, members include apple mosaic virus and prune dwarf virus. The ilarvirus coat protein is required to initiate replication of the viral genome in host plants [1]. Members of the Bromoviridae have a positive stand ssRNA genome with no DNA stage in there replication. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.92 0.71 -4.74 20 499 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 29 2 0 481 0 189.10 34 91.67 CHANGED s....sstpsststssuRppphsspRAsuhRsp.t....ssstsshPl......slssspps.tsphplphPssts......h..sspssophsupssssh.sshshcThlpsI.pl.cssTKlYsllhGFlu.pSDGhhGhl..-uhsssssssPsslsRhtFKKspYpu+phshssGpsls-l.sshslVWphDss....tpsspplplscaWluhSc.PslhPPpsFLVs-- ..........................u..stpsttspssst...htttptsspsss.......tss+hsssh........Pss.spss.popWpl+GPNs.s+.....t.a.sspsp.pElos.s.ssspa.hsIsFpohhtplh...spshplaslllphsu.sSsGhhGhV..-sacss...sssuPNsls...R+...GFpKcp.RGhQacsP....sshshsshscshtlVhcacsp....h.suscV...hhpshal.hSt.s.l..PpshLhsp............ 0 0 0 0 +3073 PF01450 IlvC Acetohydroxy acid isomeroreductase, catalytic domain Bateman A, Griffiths-Jones SR anon Prodom_2380 (release 99.1) Family Acetohydroxy acid isomeroreductase catalyses the conversion of acetohydroxy acids into dihydroxy valerates. This reaction is the second in the synthetic pathway of the essential branched side chain amino acids valine and isoleucine. 29.10 29.10 29.10 30.20 28.10 29.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.67 0.71 -4.27 19 5123 2012-10-02 19:36:47 2003-04-07 12:59:11 14 7 3940 36 1202 3289 2644 141.90 39 46.51 CHANGED Fc-EscoDLFGEQsVLCGGlpt......................LlcsG.......................................................................................................................FETLlEAGYp.PEhAYFEslHE.lKLIVDLIactGlttMphslSsTAEaGshspusRlhsptoKttM+clLc-IQsGsFu+chlhpspsup.p..hpthpcp.cpc.IEplGccLRthhshhp ..........................................................................................................FppEscoDLaGEQs...VL.C.G.ulpu.llpsu..................................FE.sLV.Eu.Gat.PEhAYaEsL..H..E...hcLIs-LlhcGGls.p.Mph.sIS.sTA..Ea.G.sYlhu.sclh..tts+.hh...+....phhs-IQsG...p...Fucs.h..l..t..-..t.......p..s..s.psp.....hps.h.Rc.tttpctIEplGtcLRthMs.h................................. 0 361 757 1011 +3074 PF00920 ILVD_EDD Dehydratase family Bateman A anon Pfam-B_1309 (release 3.0) Family \N 18.60 18.60 18.60 21.70 18.50 18.30 hmmbuild -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.64 0.70 -6.08 89 6864 2009-09-11 11:08:28 2003-04-07 12:59:11 16 13 3924 2 1852 5418 6322 522.70 40 91.02 CHANGED +PhIGIssoas...........-hsPsphHLpcluctlKpGlppuGGhP.hpasohulsDGIshGptGMchSLhSR-lIAsslEhhlpups.hDuhlhluuCDKssPGhLMuAsRl.slPulhlsGGshhsGph.....tG...........ppls...hpssa-uhGt.ht....tGclscc-............lpplEpsusPus.GuCushhTANTMsslsEALGhuLPGuushPA......sssc+tphucpuGcplhch..l.....c.c.......slp.Pc-IlTccuhcNAlssshAhGGSTNslLHLhAIA+c...AGlp...lsl.-DF-cluccsPhlsslpPu.G.pah.hpDhptAGGlsslh+pLhct.s..h..LctDshTVo......G+TluEplpps...................................hstc..................................VI+PhssPhps..pG.GltlL+GNLAPcGAVlKhuus........chhta...........cGsAhVF-uE--shpAl.tssclc.Gc........VlVlRapGP+GGPGMsE..MLssTuslh.utGlupcVALlTDGRFSGu.o+GhslGHVuPEAus.GGPIAllcsGDhIpl..Dlt..sppLslh.ls-cE.........l....tp.R.+.......ttat.s.......s......sphp......pG.hLttYsphVssAspGA ................................................+PhIulssoas-hlPu..........H.......hH.Lcc..........hu....chl+culcp.A.Guls.hpF.............s...T.lu................l...sDGIu..h.GptGMhaSLsSR-lIAsSlEhhlsuph.hDuhlhlusCDKlsPGhLMAAhRh.sl...............PslFVsGGPMtsGts..........p.h.............................p.t.ht.....hhshh.c.u.h..t.t.h.s....s.G.c..l..o..c..pc.............l.h..ph..E..ps....uCPos.GoCuGM....aTANoMsslsEuLGl.u.LPGsuol.A....................sp.s..p..Rcplh.tpAGcpllchs........c...c.......................sl.h...Pp...cI......lTccAa-NAhslshAhGGSTNslLHLlAhA...pc.........A...G...l.c......h.sh..cDhsc..lS......cc..VPh..LscltPu....G....p.ah..hpDla....p.A.G...GlsullpcLhc..s..G..........L.L.+t.DshTVs........Gp.TL.s-tlpph............c.........................hs...s.tp.....................................................................lI+s..h-..pP..hpp........cG..GLtl.L.p...........GNLA.p.G.uVlKsuuV.s............ph.h..pa......................pGsAhVF-Sp--.shpAI.....s...s....c..l..p..t..G-.........VVVlRYpGPKG..G..PGM..E..MLtsTu..hLh..utGlGcclA.LlTDGRFSGu.opGhulGHloPEA.A.s.G.....G...............s....IAllc-GDhIplDhs....sppl....pl......l....s..-.tE....................Lsp.R.+.................tt.hp..s..................p............p.ht........pshLttYsphsouAspGA..................................................................... 0 509 1129 1530 +3075 PF05046 Img2 Mitochondrial large subunit ribosomal protein (Img2) Wood V anon Pfam-B_17929 (release 7.6) Domain This family of proteins have been identified as part of the mitochondrial large ribosomal subunit in yeast [1]. 27.30 27.30 28.30 27.90 26.90 27.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.91 0.72 -3.75 22 279 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 228 0 205 270 3 82.20 32 50.39 CHANGED shsYaVpR..opsspLPVYhch+ttGs+hhTt.........IRKlpGDl.sLcpDLpphLs...t........shpVs............................chsGplhlKGthhpclccaL.ppsF .................hsYaVcR......opsp.plPVYhch+.......p.......tGs.+phTh.........lRKl-GDl....htLppDLpphLt...t............tspls..................................phsupltlK.G...h.hpplcpaLhpttF......................................... 0 81 116 170 +3076 PF04156 IncA IncA protein Bateman A anon Pfam-B_2718 (release 7.3) Family Chlamydia trachomatis is an obligate intracellular bacterium that develops within a parasitophorous vacuole termed an inclusion. The inclusion is non-fusogenic with lysosomes but intercepts lipids from a host cell exocytic pathway. Initiation of chlamydial development is concurrent with modification of the inclusion membrane by a set of C. trachomatis-encoded proteins collectively designated Incs. One of these Incs, IncA, is functionally associated with the homotypic fusion of inclusions [1]. This family probably includes members of the wider Inc family rather than just IncA. 50.00 50.00 50.20 50.10 49.90 49.90 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.36 0.71 -4.70 54 286 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 42 0 56 201 0 185.00 17 49.97 CHANGED shptplhslshlllull.llssGlss.Lshhh...hushhsshluhshlAlu..hlLlshulhhhhtpp.ht.hpttp...........................................................chtphppplsplp...cclpp..hppphtphppplpphppshpttp...........p.hpphppclpshppshpphtpchpclh...................t.ht.....htt......t..tthtphhtp.pplhpp...........h ............s.ht.hlhslhhhlhull.llssGlss.Lshhh...husslshhlslshlAls..slllusulhhlhpppphthhpstth...................................................plpphpptlspLp...p-hsp..hppphtphppplpthppsh.ttp...........p.hpshtpplcshppphpphtpchpclh............................tthpphhpphtth..........t.ptlt..htp..t.hp..h........................................................................................................................................ 0 5 6 55 +3077 PF02387 IncFII_repA IncFII RepA protein family Bateman A, Mian N anon Pfam-B_1209 (release 5.2) Family This protein is plasmid encoded and found to be essential for plasmid replication [1]. 22.90 22.90 22.90 23.10 22.50 22.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.85 0.70 -5.24 22 635 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 342 0 25 477 1 239.80 41 90.66 CHANGED M...........s+ppYVpNspPhFs.P+psK+pssFI.phhMcpA....uchDlA+p.hphphhshs.hTh..h.................RpRphNtHRApAhcAll.AMhaHashhoshVpsSIcpLuDECGLoThSpuGNpSITRASRhlspFhEshGhIpsc+haD.hlusYIPKhIhlTPhFF.LhslSptpltsApppplpW.Npphpcctht.lohsEhhhpuhc..hpphhphptphh.h.hp+t.A++hhph....sEcsh+pcIlptLl+pYotsELsphG.puLK+cls.cY..L++ltpp. ...........................................................tph..............P.a..stp.t....th....h...thhht+s.....schDhsh..p.....hahp...h....s.........................hR+Rh.sshRpRAlDALl.uLsaahD..shus.....pVptSlopLAhE.CG..Lu..T..cS...u............uG.......p......l......S..I......T..R.Ao..RuL.pal..t..chGl..I...........o....h.pT..p...aD...hush..lPpcIhhTPAhFthL..sVopstltst..p....c..pRlchp.....sppp....pcpth......ss.Lth.D.E......lh....A+s..hR......ahhpRhpshppch+u+.uh+.....R.tR.ARR.sph....pcpchlphltctLp+clstschht.st-slKRpl.hc...........cpphhht....................................................... 0 6 11 18 +3078 PF02974 Inh Protease inhibitor Inh Griffiths-Jones SR anon Structural domain Domain The Inh inhibitor is secreted into the periplasm where its presumed physiological function is to protect periplasmic proteins against the action of secreted proteases [1]. A range of proteases including A, B and C from E. chrysanthemi, alkaline protease from Pseudomonas aeruginosa and the 50 kDa protease from Serratia marcescens are inhibited. 25.00 25.00 25.10 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.29 0.72 -4.42 27 259 2012-10-01 18:54:33 2003-04-07 12:59:11 9 4 230 3 85 217 6 98.80 31 59.58 CHANGED AoSlhl.ssupluGpWpls...............tsspsCcltLspsph.ts...thuGshsCssph.LuphsssWpsoPcslsLhstsGsslshhs+pscspapuphssG.tslsLpR ..........ssoh...ssusluGpWplu...............tuspsCcltLspsphspG.........hhuGshsCssp...Lsphsu.Wt.s.s.s..ct.lsLh-ssGsslupLhpsupupac..up.ps.uG.pslsLpR.................... 0 13 34 54 +3079 PF00876 Innexin Ogre; Innexin Bateman A anon Pfam-B_779 (release 3.0) Family This family includes the drosophila proteins Ogre and shaking-B, and the C. elegans proteins Unc-7 and Unc-9. Members of this family are integral membrane proteins which are involved in the formation of gap junctions [1]. This family has been named the Innexins [2]. 21.80 21.80 21.90 22.10 21.40 21.70 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.50 0.70 -5.34 99 863 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 123 0 560 840 1 283.30 22 76.49 CHANGED Dshs.RLpaphTshlLlshul...llos+QY.h....GcP...IpCh...ssphs....ppahssYC..alpsTahl......................................................s...hspph.....................s........sthp.c...thchhsYY...............QW.....VshlLhlQAlhFYlPphlW+.....shpGhplctlspth...................ssshhsp.........-ppcpp...................hchlspah.pptlptpp..th.....................................................shhYhhsclL.llNllsQlahhstF.Lut..............................sat....haGhpllt.h........hps....pphppsh..hFP+lThCsap.....phG.........s.hpp.ass.CVLslNhhNEK.......IalFLWFWhhhlulloshsllahlhplhhs.....phhhphLch.......psphts.....................................................................hpchtt..chLphsshFlLphlspNhssllhp-l...lppLhppapp.p ....................................................................................s.h.+lphhhTshl.......lhhhsh...........llssppa.h...................G.pP......IpCh......s.sphs.............tpahpsYC......WhpsT.ahl................................................................t..pth...........s.......................t.t..........t.htYY...................QW.....lsa...hLhhpAhhhYlPphhW+..........hp.uhp.lphlhpth...........................ppsh..t.................ptpppp........................hphlhphh..p..h.p.pp.h.......................................................................................................................shhYhhhchL.hhs......hlhphhhhsha.ls..t..............................................t.......au....thh.......h...........................h..........hFP+.hsh.Cph......hu..........s..p..hsh.Cll.hNhhsEK..............................lahhla....hWhhhl.hhshh.hh.hhh....h..........hh..hh...........................................................................................................................th.....t.ht.sshhhh..ht.p.s...h.ph...h.th.......t............................................................................................................................................................................... 0 230 281 485 +3080 PF01658 Inos-1-P_synth Myo-inositol-1-phosphate synthase Bashton M, Bateman A anon Pfam-B_959 (release 4.1) Family This is a family of myo-inositol-1-phosphate synthases. Inositol-1-phosphate catalyses the conversion of glucose-6- phosphate to inositol-1-phosphate, which is then dephosphorylated to inositol [1]. Inositol phosphates play an important role in signal transduction. 20.00 20.00 20.00 24.60 19.30 18.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.15 0.71 -4.24 60 1141 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 916 36 533 1086 280 110.30 40 26.16 CHANGED GtThh+ssLuphhtsRGl+lpshhphNhhGNsDuhsLss.pphcSKchSKosslsshls.......h................................................pts.HhsslcYlPhlGDpKhAasclcsptFhGsshplplphpspDS ............................................GtThh+pVLschhhsRGl+lsphhphNhhGNpDhhNhtp.pphcS.Kc.ISKopsVs..shls....tp..Lh...............................................tpcssHls..s.c.YVPals.DpKhAhschpuchFhG..s..slph+hpspDS............. 1 186 341 459 +3081 PF00459 Inositol_P inositol_P; Inositol monophosphatase family Finn RD, Griffiths-Jones SR anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.84 0.70 -5.10 59 10135 2012-10-02 15:53:20 2003-04-07 12:59:11 20 42 4250 79 3140 7691 7848 256.80 24 93.87 CHANGED phpplhphs.......hphsp.tuuplltptht......pphshphcs.........sssD...h..VTtsDptsEphlhpt....lppth....Pscsh..luEE.shstss..........................h........sss.hsWllDPIDGTpsFlcGh..............shaulslulhh......ptpPlhGllhsP.......h................hs...p...hapuhpG........p.Gu..hh...su...............l..plpsss........hspsh..hhsththp.....pttphsp...h..htth.thhstt...............h..R.th...Guush..clshlApGpsD..sahch..s......hp.WDhAAGthllcEAG..............Ghls.chsGs........hsh..hs.t..thlsus...........tth...hhpthp.phht .........................................................................................................................h........hhphsp.pAuphlh.phhp...........pthphphcp....................................ss.s...........VTp.sDp.ts..cphlhpt.....Lppt...h..............P..s...t..s..l........lu.EE..s..stt.ts..................................................................................tpp.th.hW..llDPlD..GTpsF.lc.sh.............................................sp.a..ulsIAlhp.................cG.....p.....s...h.h....G....l....lasP......h...................................................ts....c.......ha.h...A..t.cG..............t..G...A....h..h....ss...........................p...l...psppsp...................................hp.ts.h....l...h.s.......h..s..h..p...............t..p.t..t...h.t.p............h.....h.t..th..h..t.thptt...................................................h..R..th...G.uu..ul...ch.s.h.V......A......p.........Gp.........h.D....s.a..hch.s........................hp.WD...h...A...AGthl.lpc........AG..................G..hl.s...s.h.pGp.....................sh.h.......tp....t.....thhhss.........................hh.......................................................................................................................................................... 0 961 1869 2579 +3082 PF03488 Ins_beta Ins_beta_nem; Nematode insulin-related peptide beta type Griffiths-Jones SR anon PRODOM Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.97 0.72 -4.16 11 116 2012-10-01 19:54:34 2003-04-07 12:59:11 9 2 6 1 114 148 0 47.10 30 45.29 CHANGED hRuCGR+LhphVhslC.G-.CsspsshDIuspCCpsp..CTc-aI+psCCP ........+tCG.p+lhp...hlhplC..ut.....C.s.....s...psshcl..u.stCCspt....socp.lpptCCP....... 0 32 37 114 +3083 PF03811 Zn_Tnp_IS1 Ins_element1; HTH_Tnp_IS1; InsA N-terminal domain Griffiths-Jones SR, Bateman A anon PRODOM Domain This appears to be a short zinc binding domain found in IS1 InsA family protein. It is found at the N-terminus of the protein and may be a DNA-binding domain. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild --amino -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.90 0.72 -4.46 4 1346 2012-10-03 10:42:43 2003-04-07 12:59:11 8 14 477 0 86 743 12 33.10 58 28.45 CHANGED MAoVsl+CPpCpus-.VhRHGpussGHpRaRCppC+ ........MAolslpCPpC.s...........usc.VVRNGKSTAGH.QRYL.CScCR..... 0 15 37 72 +3084 PF00049 Insulin ins; Insulin/IGF/Relaxin family Eddy SR anon Overington enriched Domain Superfamily includes insulins; relaxins; insulin-like growth factor; and bombyxin. All are secreted regulatory hormones. Disulfide rich, all-alpha fold. Alignment includes B chain, linker (which is processed out of the final product), and A chain. 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.68 0.72 -3.19 28 1447 2012-10-01 19:54:34 2003-04-07 12:59:11 13 6 340 923 373 1711 0 62.30 31 56.65 CHANGED ppLCGscLV-sLh.hVC...G-......R.GFaY..P...hsh.......ht....................................................................................h....ptt+......tKRG..IV-pCChpsColtpLpsYC .......................................................................................plCGtcLscsl..hlC...st...............h...th..................................................................................................................................................................................ppG.....l..scpCC.hp.sCsht..L.t..hC....................................................................................................................... 0 102 130 226 +3085 PF00552 IN_DBD_C integrase; Integrase; Integrase DNA binding domain Bateman A anon SCOP Domain Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain. The central domain is the catalytic domain Pfam:PF00665. This domain is the carboxyl terminal domain that is a non-specific DNA binding domain [1]. 20.70 20.70 20.70 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.65 0.72 -4.52 37 14492 2009-01-15 18:05:59 2003-04-07 12:59:11 16 61 191 15 23 12714 0 49.90 85 8.72 CHANGED hppphsYa+...cspstpW+GPsplLhcGcGAlllpspppt.hhhlPc......RcsKhl.s ..........I.QNFRVYYR...DSRDPlWKGPAKLLWKGEGAVVIQ.DNSD..IKVVPR......RKAKIIRD........ 0 9 11 16 +3086 PF02920 Integrase_DNA integrase_DNA; DNA binding domain of tn916 integrase Griffiths-Jones SR anon Structural domain Domain \N 20.80 20.80 20.80 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.20 0.72 -4.56 4 459 2012-10-03 08:51:45 2003-04-07 12:59:11 10 6 296 5 41 279 13 64.40 54 14.30 CHANGED EKc+cs+tchl+suEsptK-hRYhhKYl-phtcspFsYSWKLlsTctssAtKp-sluLR.K.tEIpK ....................EKRRDsKsRlL+sGES..QR.p.DGRYLYKYlDsaGcspaVYSWKLss.TDcsPAGKR-slSLREK.sclp+.................... 1 8 15 21 +3087 PF00357 Integrin_alpha integrin_A; Integrin alpha cytoplasmic region Bateman A, Finn RD anon Prosite Family This family contains the short intracellular region of integrin alpha chains. 20.20 10.00 20.20 10.10 20.10 9.90 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.96 0.74 -6.32 0.74 -3.75 27 407 2009-01-15 18:05:59 2003-04-07 12:59:11 15 17 77 12 158 412 0 14.90 53 1.49 CHANGED KhGFFKRph.th.pt KsGFFKRsh.sh.-..... 0 16 23 59 +3088 PF00362 Integrin_beta integrin_B; Integrin, beta chain Finn RD anon Prosite Family Integrins have been found in animals and their homologues have also been found in cyanobacteria, probably due to horizontal gene transfer [1]. The sequences repeats have been trimmed due to an overlap with EGF. 25.70 25.70 26.20 28.00 25.40 25.60 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.47 0.70 -5.59 48 945 2009-09-12 08:02:03 2003-04-07 12:59:11 13 57 153 59 425 786 1 347.30 38 48.53 CHANGED oCp-CIpsuP...pCAWCsp....................................sF.t....sssRCDshp...pL....hppG....CstppIpsPp..uphpl..tcspsLosttt...........................................t.ph.sQlpPQclpLpLRPGp...................................stpFplph+pscsYPVDLYYLMDLShSMcDDLpsl+sLGspLucchp.plTsNaRlGFGSFVDKslhPalsTsPpc.lpsPC....t.t.sCtssFGa+plLoLTccsspFsppVpcQplSGNlDuPEGGFDAlMQsAVC............................pccIGWR..spup+LLVasTDusaHhAGDGK..LuGIlpPNDGpCHLcsss.........hYotSsth....DYPSluQlscKLp-ssIpsIFA......................................VTpphhs.........lY............cpLsphI.s..soslGhLusDSuNVlpLIpcAYpclsScVplp.pshP-s.lslsasupCts.spth.s.....ppCsslplG-p..................VsFsVplsspc.......Cspctt...pshpI+P.....lGFs-p...LpVplphhCc.C ............................................oCtpCl...ts...tCuWCtp.......................................sa............s.R.Cs.....t..pL..h..tpG........C..p.l....P.t..u.p.hph......psps..lspt............................................................................t.sQlpPQpl..tl.....pLR.s...G..p......................................tphplph+....p...sccYPlDLYYLMDLSh.SMpDDLpplcpL....Gs..p.L...hpthp.pl...........T......p....s...hR.lGFGo.FVDKsl.P....ah.pht...Ptc.lpsPC.......t......pC.ssFu....a+plLsLT..s.psppFpptVt.p..p...p.lSu.N....lDsPEGGh.DAlhQssVC........................................................t..pp..IGW..R.....p...sp+LLVFsTDs.shHhA..hD..G+..Lu....GIl.....P.NDGp.CHLpss.................Y.sh.S..p.h..........DYPSluplsppLsp.pNI..IFA........................................V.Tp..p....h.....................................hY.............pphpphl...P......to.s..lG...L...p.p.cS..sN....llp...L...lh.pAYp..plpS.......pV.l.p............p.......scs.....lp...l.pa.p..u.h..C.s....s....h..t......tpCpslplGpp..................lpFplplp..h..p.p.........C...t.p..t......pphhl+s.....hGh.p-t...lpltl..p..h.....CpC.............................................................. 0 104 134 265 +3089 PF00143 Interferon interferon; Interferon alpha/beta domain Sonnhammer ELL anon Prosite Domain \N 25.00 25.00 25.80 25.10 23.90 23.60 hmmbuild --amino -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.95 0.71 -4.37 15 1000 2012-10-02 01:28:15 2003-04-07 12:59:11 14 1 135 18 321 1009 0 150.70 39 85.89 CHANGED LPpsasLss+csLpLLtQMsRlSsh.CLcDRpDFsFPpEhhcupQhQKAQAhsVL+EhLQpIFslFpo-pSS.AuWNpTLL-pLpssLaQQLscLcsCLtppht.pEsshsppssh...Lsl++YFpRIplYLcEKKYSsCAWEVVRsEIhRulShS.sNLptRLRp ................................................ppthhspcsLp..LLs.pMp+.l...s..s..hpC..L.p.-..RpD..F.sFP.p...Ehh.p.u.s...Q.h.Q..K..s.Q.A...l.sl.lHEhlQQhFpLF....s..o.ctSu..AuWspoL...L-phhssLp.QQLpcL-.sClhpphs..pc.s.s..l..hpp.ssh...............Lsl++YF.p+I.s.h.Y.LpEKcYSsCAWElVRsElhRsh.....spL.tthp............... 1 17 20 70 +3090 PF03487 IL13 Interleukin_13; Interleukin-13 Griffiths-Jones SR anon PRODOM Family \N 25.00 25.00 46.40 46.40 23.60 19.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.00 0.72 -4.25 4 47 2012-10-02 01:28:15 2003-04-07 12:59:11 8 1 36 11 14 53 0 42.00 77 32.90 CHANGED MALhLTsVIALsCLGGLASPuPVPsSs....sLKELIEELVNITQNQ ..MALhLTsVIALTCLGGLASPuPVPs.So....ALKELIEELVNITQNQ. 0 1 1 1 +3091 PF01348 Intron_maturas2 Type II intron maturase Bateman A anon Pfam-B_105 (release 3.0) Family Group II introns use intron-encoded reverse transcriptase, maturase and DNA endonuclease activities for site-specific insertion into DNA [2]. Although this type of intron is self splicing in vitro they require a maturase protein for splicing in vivo. It has been shown that a specific region of the aI2 intron is needed for the maturase function [1]. This region was found to be conserved in group II introns and called domain X [3]. 20.80 11.50 20.80 12.00 20.70 11.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.98 0.71 -4.23 25 36524 2012-10-02 14:46:49 2003-04-07 12:59:11 16 20 25025 0 102 35521 17 102.30 63 26.36 CHANGED ppclphhsPlpplltpLscpsahstp........G+PhuhsphssLsDpsIls+aspIhRulhsYYShusN+ppLh.hlpYILRhSCs+TLApKaKs.Tlpphhp+aG................ssLh.pphhsppcph.............shthpphphshpp.......hhschhhlshhp ...........................................h.KKhDTlVPIIPLIGS.L...u.....K.AK..FCNlh..........GHPIS..K..P.l.W.s.D.L.SDS..DII.DRFsRICRNLSHYa.S..G.SS.K.KpsLY...+lKYIl....................................................................................................................................................................................................................................... 1 28 68 90 +3093 PF03519 Invas_SpaK Invasion protein B family Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 26.90 26.40 24.20 17.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.67 0.72 -4.14 7 181 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 173 8 14 65 0 77.60 50 58.83 CHANGED GCsssllusLDsHSTIsLcL.p-hPsIpIuhpsDD...lhlWusls-hppshlptpu.plLt.lhpss..sathGt.h.ltcpss .........GCsPSLIGslDSHSTIsL-L.aulPoIsISlcDDD...VWIWApL....GAsShVlLQQpAYEILhsIME.uC..pFupGsQlLLtcps....................... 0 3 7 9 +3096 PF04741 InvH InvH outer membrane lipoprotein Mifsud W anon Pfam-B_3503 (release 7.5) Family This family represents the Salmonella outer membrane lipoprotein InvH. The molecular function of this protein is unknown, but it is required for the localisation to outer membrane of InvG, which is involved in a type III secretion apparatus mediating host cell invasion [1,2]. 25.00 25.00 25.40 25.20 22.80 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.02 0.71 -4.35 3 211 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 181 0 2 80 0 116.60 64 95.63 CHANGED MKKFYSCLPVFLLIGCAQVPsPSSGSKPVQQP-AQKEQQANAcSIDECMSLPYVPSDLAKNKTLSNQNADNSASKNNTISSSIFCEKYKQTKEQAFTFFQEHPQYMRSKEDEEQLMTEFKKVLLESGSKNLSIYQTLLoAHKRLQAL ...........................MpKhYSCLPhFhLlGCAQss...ShSKPVQQssAQpEQ.AsAsSIDEC.SLPYVPsDLAKNKoLSNpsADNSASKNssISS..SlFsEKY+QTKEQAhsFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLsAH-RLQAL..... 0 0 0 1 +3097 PF00904 Involucrin Involucrin repeat Bateman A anon Pfam-B_1158 (release 3.0) Repeat \N 30.00 0.10 30.90 0.30 22.00 -0.10 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.55 0.75 -5.76 0.75 -2.71 129 691 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 12 0 40 721 0 10.00 64 70.08 CHANGED lEQQEGQLct .LEQQEGQLch. 0 40 40 40 +3098 PF02121 IP_trans Phosphatidylinositol transfer protein Mian N, Bateman A anon IPR001666 Family Along with the structurally unrelated Sec14p family (found in Pfam:PF00650), this family can bind/exchange one molecule of phosphatidylinositol (PI) or phosphatidylcholine (PC) and thus aids their transfer between different membrane compartments. There are three sub-families - all share an N-terminal PITP-like domain, whose sequence is highly conserved. It is described as consisting of three regions. The N-terminal region is thought to bind the lipid and contains two helices and an eight-stranded, mostly antiparallel beta-sheet. An intervening loop region, which is thought to play a role in protein-protein interactions, separates this from the C-terminal region, which exhibits the greatest sequence variation and may be involved in membrane binding. PITP alpha (Swiss:Q00169) has a 16-fold greater affinity for PI than PC. Together with PITP beta (Swiss:P48739), it is expressed ubiquitously in all tissues [1]. 21.30 21.30 21.60 22.40 18.60 19.20 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.57 0.70 -5.20 4 692 2012-10-02 19:24:03 2003-04-07 12:59:11 13 19 134 7 422 566 0 221.10 45 46.39 CHANGED hLIKEYRllLPhSV-EYQVuQLY.lActS+pEouG.GpGVElLhNEPYccs.GtpGQYTHKIYHltS+lPualRhLhPcuALslcEcAWNAYPYsRThhTs.ah.EcF.IcIETaahPDhGpQ-NVapLssssh+ph.lshIDIs.RsQVhsuDYKAEEDPthF+SlKTGRGPLussWtcEh...spstcpP......hMCAYKLspVcF+aWGhQsKlEpFIHc.t.RRlhhphHRQhaCW.DcWh-LTM-DIRchE-ETp+tL ...........................................................................h+Ea.Rl..hP.ho.....V.-E..YpluQLY.lu.ctS+pp............o.........s.........G.............G.pG.....V......E.....llp...N.cPa........p..........c.....s.......................s..............t..............p....GQYTcKlYHlt......S+lPuah+hlh....PcsuLhlcEcAWNAYPYs+..T......h.............h............T.........s......s.ah..-cF.IcIET.hap...s.D..h.G..pp..-N.V......a.sLss.....pp.hpph.pl..shIDIs..pct.l...s.t-YK...t...-..EDPphFp.S........KT.........sRGPL.s..s.Whcph..............pP............................hMCuYKLlplcF+.aWG.h.Q.s+lEpFIp.c..t.........+clhhphHR.....QhasWh.......Dc......W......h.......sLTM-DIRchEccsp.................................................. 0 135 169 310 +3099 PF03278 IpaB_EvcA IpaB/EvcA family Mifsud W, Bateman A anon Pfam-B_4003 (release 6.5) Family This family includes IpaB, which is an invasion plasmid antigen from Shigella [1], as well as EvcA from E. coli Swiss:Q9ZNF1. Members of this family seem to be involved in pathogenicity of some enterobacteria. However the exact function of this component is not clear. 29.90 29.90 35.50 35.00 29.80 29.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.89 0.71 -4.99 9 303 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 125 10 3 134 0 135.40 47 82.57 CHANGED Muppo.lhlshsKlssplLpsVu+Qo.spDlpsWhppE+psYsSRslNpsIDcaChpNNssIosEs+p+lFphVEpca.plsLDhpuAQSSIsHh....ltuNuhFsKKlDp.C.cGhshts+NsTpsplhNhlucpha-Kphs.....scI-lhp.psp ......................p..hhhshsKhssplLpsVu+Qo...stDhppWhppEphsY.SRslNpp....ID....sYChp..........p...Nu...hI...........SpEp+p+lFphVp.....p.....t......h...p.sLDhp...uAQSSIsHh....l.uNt.FsKKhDt..C...cGhs.....s+tsTpsplhs.lucchap+phs.....s-Ichlp.+sp................ 0 0 0 3 +3100 PF04979 IPP-2 Protein phosphatase inhibitor 2 (IPP-2) Moxon SJ anon Pfam-B_5306 (release 7.6) Family Protein phosphotase inhibitor 2 (IPP-2) is a phosphoprotein conserved among all eukaryotes, and it appears in both the nucleus and cytoplasm of tissue culture cells[1]. 20.90 20.90 20.90 20.90 20.50 20.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.02 0.71 -3.90 20 478 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 281 4 294 440 0 115.20 26 55.30 CHANGED +KS.pKWDEhNI..LATaHPu-KDYGhMKlDEPpTPYp......pspsshs-s-sscsls....s-sLucKL..sAucupssshthc.-ppssp..p--p-l..o.Eccp++cpFEp+RKhHYsEhhslKLARpLI..scEhps-s---pp ..........................................................+WDEhNl..h.o.c...tp..ct..shMKI-EP..pTPYp.......h...........tsts.......t.....................t......p.........pc.s..pp.th.s..........................sptl.s.pph..........ts.ps...t..p..............p.h..........t.....t...ppppt..........tpppph.......p.pp.p....t+ptpFc.tRKhHY.s.Eh....h.p.........p.ph....tp...........tt................................................. 1 89 146 221 +3101 PF01715 IPPT IPP transferase Bashton M, Bateman A anon Pfam-B_1875 (release 4.1) Family This is a family of IPP transferases EC:2.5.1.8 also known as tRNA delta(2)-isopentenylpyrophosphate transferase. These enzymes modify both cytoplasmic and mitochondrial tRNAs at A(37) to give isopentenyl A(37) [2]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.37 12 5290 2012-10-05 12:31:08 2003-04-07 12:59:11 12 20 4605 26 1405 4112 3280 236.80 32 78.09 CHANGED hplYKshDIGTAKPohp-hpslPH+LlDllDsscsaosupapcpAhptl....p-IptcG+lPllVGGThLYhpuLlcGls.hPts..-sslRtclcpphtppG.shLatpLsphDPttAstIpPsDspRlhRALEVahhTG+shophtpp............tps..ashl.luls.hc+c.LapRIspRhcpMlcsGhlpEV+tLhsps.......hppshsuh+ulGYpphhsaLpG....chs.LsEhhpphhpsTRphAKRQhTWhRp..t.lpWlDst ...............................QlY+s.h.DIGTAKs....o..........c........E...h........t...s.....lP..H+LlD.....lh.......-.s.....s..p................s..........a..Ssu..cFppcAht...tl...............p.c.I...t...s.c.....G...+.l...P.l.l.V.GGTsLYhpuL...l................ps.....h.........s.............h....s.ts............s..........p...l..........R.........t............p............h..........c.....p...................p...........h......t............p............G......................p..t.......L...app....L...t....p...l...D..P....s.A.t....c.I..c...s...s......s.......pRlhRA..L..El...h.h.h.....o...G..p.s..hophhpp......................................t...t....h.......a....p....s...h....h...h.uls....hs..R.phLa.pR...IspRhc.tMl..p.......p...G..h.lc..E....l...ctL.hpps.........................t....s.....h.s..u..h.....pul.GY+phh.s..Yl..pG............................chs....h--......h....h....p...ph....hpsTRphAKRQ...hTWhR.p....t....hpah...t..................................................................... 0 467 901 1191 +3102 PF01745 IPT Isopentenyl transferase Bashton M, Bateman A anon Pfam-B_2229 (release 4.1) Family Isopentenyl transferase / dimethylallyl transferase synthesises isopentenyladensosine 5'-monophosphate, a cytokinin that induces shoot formation on host plants infected with the Ti plasmid [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.55 0.70 -5.19 7 88 2012-10-05 12:31:08 2003-04-07 12:59:11 11 5 64 7 18 3011 1753 163.70 35 80.48 CHANGED hplaLIaGsTsTGKTuhultLAppTGhPVlsLDRlQCh.plusGSGRPhssELpuTcRlYLs.sRslscGlIsAccApcpLhtcVppa.s.csulILEGGSISLlppMups.aWpssF.WplpRhcLssc-sFhspAKpRlppMLpspsstsSlLsELsphWtpsst+shLEsIDGYRhhlcaApppplsh-pLhslst..chhppLlpuIApEYhpHAhhQEp-FPth.....thttu. ..........lhlIhGPTsoGKTshAlt..L.....A....c.....t.....h.......u.....h.....s....l..IulDplQ.sh..plssG..Su.R.Pp.s.s.E.L.p.u.s.p......R........aLt....pp.l........p....G......h.ss..t..p..h.....t...h..hh.....l............t....tt.................h......llEGGS....h...SLh.th.........p.................................h.h.....h...........h.....t.....t..a........h.t...Rhtphh.......t..uhhtEl..hh...t.h...httl....sh.......p..h.aht..th..t.........t...................................t................................................................................................................................... 0 5 11 16 +3104 PF00605 IRF Interferon regulatory factor transcription factor Bateman A anon [1] Domain This family of transcription factors are important in the regulation of interferons in response to infection by virus and in the regulation of interferon-inducible genes. Three of the five conserved tryptophan residues bind to DNA. 21.00 21.00 21.10 21.80 20.90 20.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.71 0.72 -4.28 26 872 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 101 29 310 642 0 105.30 43 28.65 CHANGED RhRL+PWLltplsSGpasGLpWlsp-+phFpIPWKHAuR+shs...-cDusIFKAWAltpG+aptG.-cs.............DPssWKsshRCALNpos-Fp.lhDpop.csspP.aKVYclh...s..t .........................+h+sWLltQ.lsSspa.PGL.tWls.c.-.+phFpIPWKHAu+pshs...-cDuslFKAWAltp.G.Kapt..G...D.cs...........DPssWKsphRCAL.Np.....s..-Fc.lh.Dpop....c..sspP..aKVYchlst..................................... 0 43 68 158 +3105 PF04120 Iron_permease iron_permease; Low affinity iron permease Wood V, Finn RD anon Pfam-B_71435 (release 7.3); Family \N 25.00 25.00 25.60 25.30 22.40 24.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.54 0.71 -4.57 19 374 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 264 0 195 387 17 117.30 31 64.84 CHANGED Fs+hAstsuphsGpPhsFllAlhlVllWuloGPlFcaSDTWQLVINTGTTII.TFLMVFLIQNoQNRDss......AlQlKLDELItusptApNsh..........lslEcLsEcELcplcptapchuctsppt.thsttpptpsttstp ................................................Fp+huphhsphsGpshsFlhulhlllsWssoG.P.hapaS-TWQLlINTuTTIl.TFLMVFLlQssQNR...Dst..............AlphKLcELltshp....tApsth..........ls...lEphp.tplpthtt.h.t.t.....................ptt............................................. 0 38 103 150 +3106 PF02060 ISK_Channel Slow voltage-gated potassium channel Mian N, Bateman A anon IPR000369 Family \N 20.60 20.60 20.60 20.60 20.20 20.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.79 0.71 -4.56 3 180 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 45 1 100 159 0 96.40 30 75.87 CHANGED MILPNoTAVhPFLT+LWQETAcQGGNsSG.LARRSPLuDDGKLEALYILMVLGFFGFFTLGIMLSYIRSKKLEHSHDPFNVYIESDAWQEKDKAYFQARVLESaRuCYVlENQLAVEQPsTHLPELKPSs .....................................................................................................ttst..p...h.lYlLhVhuhFuhhlluIhLuYh+SK+hE+ps....D.PaplYI.cp-...Wtpt.......................................h.......................... 0 4 10 31 +3107 PF00180 Iso_dh isodh; Isocitrate/isopropylmalate dehydrogenase Finn RD anon Prosite Domain \N 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.21 0.70 -5.50 23 12354 2012-10-02 21:08:39 2003-04-07 12:59:11 15 30 5707 258 3358 9844 7894 320.30 29 93.82 CHANGED pIslLPGDGIGPElhpsAl+VLculsppts...hchpacpthlGGsAIDttG..pPLP-ETlcss+cu.-AlLhGAlGGPcW......t.ssphRPEp.GLLsLRKphs.LaANLRPspha..tLtstSPlKp-hscs.lDhllVRELTGGlYFGp.pttps....................st....phuhsTphYocpE.l-RIsRhAFchAtp+.....+t+VsSlDKANVLcsSc....lWRchsp-ltp..............................EaP-lpLpHhllDssuMpLl+sP.ppFD..VllTsNlFGDILSDtAShlsGS.LGhLPSASLus.......pshulaEPlHGSAPDIAGKsh......ANPlAsILSAAMMLRauhshpcpA......stlEsAVppsLpp......GhhTsDLusts........t.huTs-hs-tl ................................................................................................................lshl.GDGIGsE...lh..ts.hc.llcshh..h...................hpl..php......p.h...h.G....t...t...s..h.c...t.p.u...........p.ls..t.....-s.hpshcc.h..ss...sl.........h..Guls.s.Pp...........................s.th+...s....p....l...lt..lRc..........p.h....s.....hasN...l..........RPs......p..ha.............t.s......ssl+..............s.....t.......t...hD.hll.s.REsot.s..Yt..Ghphthss.....................................................................t........tt.suhsh..phh.o.c.pt...hcRls+hA...FchAhp+.....................+.p.......p.ls.hlc.K.uN.lhchsc....sha.+chsp..-.l.uc......................................................................ch..s......c......l..h..hc.chll....Dshsh...ph.l..p.p.P..tpaD..Vlss.Nl...GD.........llSD.....suhh.s......G.u....lG.hhP...uusls.....................pth..uhaE..s.........s...HG.............o...A..P...c..h.....s.Gps..h..........................sNPhu..Ih....ohshh...L.....c......a.............h.......s.......h...t.......p...t.....A..................ptlpp.u...lt.t.s.ltp......t.h.h.Th.Dhtt.h................tspphhp........................................................ 1 1065 2017 2773 +3109 PF04279 IspA Intracellular septation protein A TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 24.00 24.00 24.00 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.17 0.71 -4.30 129 1696 2009-09-11 22:50:41 2003-04-07 12:59:11 10 2 1533 0 344 990 1626 176.80 39 93.65 CHANGED M.Khll-hhPlllFFlsYp................hts.............................IhhATushlsAollplshhahhtp.+lspMpllohsllllFGuLTlhhpD-pFIKhKPTllYhlFAssLhsuhhh...t+sllcthh..........sptlp.L....scpsWp+LshtWshFFhhhullNhh......................................V........sh....hop-...........hWVsFKlFGhhulsllFhlsQuhhlh+ah.cp .......................................................MK.hlDalPLllFFsh.YK........................h.h.s.............................IasATushIlATslt.l.h.h.sa.l...p.a.+..Kl-+MtllohllVlVFGGLTLhhHs-pFIKWKsTll..YsLFAssLLlSphh...p..K..sLIp...p...hL..................G.c..pls....L.........P........p...t.......l...W..............s+LN..huWulFFlhhul...h...Nla..........................................l.......ua..h.....h.sps..............hWVs.F.K.lF.GlhulTll.Fsllpulalh+Hh.p.c.................................................... 0 85 177 257 +3110 PF01128 IspD UPF0007; 2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase Finn RD, Bateman A, Eberhardt R anon Prosite Family Members of this family are enzymes which catalyse the formation of 4-diphosphocytidyl-2-C-methyl-D-erythritol from cytidine triphosphate and 2-C-methyl-D-erythritol 4-phosphate (MEP) [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.35 0.70 -4.83 6 4492 2012-10-03 05:28:31 2003-04-07 12:59:11 14 13 3924 49 957 7221 4670 220.50 32 84.34 CHANGED lhsllPAAGpGcRMtsGpPKtalsLtGpslLp+TVcuhLup.tlc+IllslsssDhsthppLLss.....plpLVsGGssRs-SVhsGLpAlssss..paVLVHDuARPhltpsslscllAtt-p.psGuIhAlPVpDTIKRs-.sG..hlscT.DRsGLWtAQTPQuFshsLLhcuascu...ppGuphTDDASllEps.GhpVplVsGcssNlKlTpP.DLAlAEhILpp ...................................................sll.AAGhG..p.R.........M.......s............s.................s.........h......P..............KQ...aL...p.l.......s..s........+.....s...ll...h.+...ol.c...s...h..........h....t....p..........s......t.....l.......p..c......l..l....l..s...l...s.....s....s....p...........s....h......h....p....p.....l..h.tp........................ptp.l..p...l...l......p........G.....G....s....p....R.....t.......-......S....V.h...s...u...L..........p..........t.........l.............s........s........p..........t.............s...........h.............V....L.l...H.D.....u.ARPhls...tch...lp.c..........l.l.p.....t....s.......p.........p.............................t..s.....u....s..l.............s........u..l..P............l...t..D.............T.......l....K......p...s......p.............t.s.................hl.......s........p.......T.s.....s........R........s.......p.......L........a......t.......u....Q.......T.....P........Q...........s....F..p...h..p.....h...............Lpc...u...a....p....ps.....................p..p....t....h........p.......h...T...D..-...u...u.....h...........l.E......hh.....G...h....p.......l..t..l..V..p..G..c..h..p..N..l.K....lTpPpDLt.lAchhlt.t.................................................................................................................................................................................. 0 339 640 817 +3111 PF01695 IstB_IS21 IstB; IstB-like ATP binding protein Bateman A anon Pfam-B_982 (release 4.1) Family This protein contains an ATP/GTP binding P-loop motif. It is found associated with IS21 family insertion sequences [1]. The function of this protein is unknown, but it may perform a transposase function [2]. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.91 0.71 -4.86 17 7213 2012-10-05 12:31:08 2003-04-07 12:59:11 12 34 2932 5 1265 9058 3056 156.90 22 63.86 CHANGED p+plpppl+hA+LP.httslEshDasts.ulDcchltpL.tshsalcptpNlllhGPsGsGKTaLAsAlGhpAsc.tGapVhahpsscLlppLptA+t-Gphtptlppls+s.cLLIlD-hGhhPhsppsuphlFcllscRYE+pShllToNhshupWsclhu.DsslAsAlLDRLlHpu.chl ...........................................................................................................................................................................................h...............p......p........t.......p.......s........l..l.h...hG.s.s.GsG....K.o.a.L..A.s.A....l......u..............t................p........h..............h..........p......p..............u....h.......p...........s.......h.....h...h.....p....h......s...............c....l....h...p.........p...........l.......p.......p...........s.......h.......p......p.......s......p.......h...........p.....p.......h......l......p............p.....l......t...p....s....c....l.....L..l.lD...-..l.........G........h.......................h.......s....p................t.........t..........p........h.....l....h......p......l.....l..s...p.....R..h.......p.......p...t.....s.....h.l.....h.T.oN.hs....hp....p......h...t..p..h...hs.......p............................................................................................................ 0 465 857 1042 +3112 PF02189 ITAM Immunoreceptor tyrosine-based activation motif SMART anon Alignment kindly provided by SMART Motif \N 21.10 21.10 21.10 21.20 20.30 21.00 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.63 0.74 -6.93 0.74 -3.24 12 339 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 49 15 123 429 0 21.10 35 14.82 CHANGED -tlYpsLspcpcsp.YspLspp ..-tlYpsLppspcsp.YssLst... 0 9 10 16 +3113 PF01156 IU_nuc_hydro Inosine-uridine preferring nucleoside hydrolase Finn RD, Bateman A anon Prosite Domain \N 27.80 27.80 27.80 27.90 27.70 27.70 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.17 0.70 -5.21 118 5444 2009-09-11 13:52:43 2003-04-07 12:59:11 14 17 2525 62 1248 3860 1059 302.10 28 95.00 CHANGED hpplIlDsD....sG.......h...........DD.AhAlhhuh...t.ps.p...................l...clhulTsl............G..Ns.slcpsspNuhpllphh..............h....ts...lPVht.....Gss.........tP............................t.hhs....tphH....GpsGlss.........................................................................................................h..........sttt...tptp........As............phlhctl.p..ptssc......lsllslGPLTNlAhslptt.Pc.........lhpplcc.....lllMGGuht.................hGN.......................h.ssh...A................EaNha.sDPcAAchVh.s........shs...lshl.sL-..lop....p.....shh.............s.tpthppl....tt.....................ss.shuphltp.hhp.hhhph....................tshshaDslssuhh...h..........Pph...............................................................................................p.spp.hhlsV-ss........s.hshGt............o.ls-.......................t.ssssplsh..plDs.......ptFhphhhp.hlt....ths .....................................................................................plllDsD..........PG..........h........................D.DAlAlhhAl.....t.pP..c........................................l....-lh...ulos.ss.............G...Ns.....sl.-......p.....s..hpNuhp.l..lphh...........................................t.......pp.........l.P..Vht...Gus...pP..Lh..c.....................................hh.hs........spl.H.....................GpsG.lss..........................................................................................................hs...........................sptth......t.stp......As.......................phlhctl...p......p.s.s...tp..........lTllshGPLTNlAhh.lppp..Pc.................lhpplcc.....lVlMGGuht............................hGN.........................................h..oP..s..A.................EaNla....sD.PEAAph..VFpu..........uhs......................lshs..uL.D...lTp....p.......sh.l.......................................o..sc.h.h.p.phtp...............................................ttt.....su.ph.ltp..lhs...aahphph...........p.t........hsGh.lH.D.sh.s.luhl......l.......p..................P.pl..................h................................................................................p....h..pp..hhlp.V-sp...........u.hstGt...T..lsD....................................t.hsp.sssplhh....sl.Dh...ptFhphhhc.hh...hh............................................................................................................................................................. 0 384 731 1025 +3114 PF04183 IucA_IucC IucA / IucC family Bateman A anon Pfam-B_1982 (release 7.3) Family IucA and IucC catalyse discrete steps in biosynthesis of the siderophore aerobactin from N epsilon-acetyl-N epsilon-hydroxylysine and citrate [1]. This family represents the N-terminal region. The C-terminal region appears to be related to iron transporter proteins. 25.00 25.00 26.30 25.60 24.50 23.80 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.57 0.70 -5.13 168 2438 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 938 17 394 1532 11 241.30 25 40.66 CHANGED sttsahph....Ep.ulhtGHPhHPssKuR.Gastp-httYuPEhsssh...pLpWlAlccshhphts......................tth...........htpthsspthtt..h.t............................shtsssah.llPlHPWQhppllt...sthts.lspspl.......l.LG...tust.ahsspSlRTlh.....shs....ps.....hhlKhulslp.Tss..hRslssppltsustlsphLpplhtp.-.hh.....pp..shpllpEsAuhshptst..........................th.tctluslhR-sshth..httt.......ppshs..hAuLhpss ...........................................................s.tsaht.EQ..slhpGHPhHPssKu+hGhs.t..p.-.httYuPE..htp.sh...pL.palAlc+shhttps........................tsh....pph.............lpptlssphhpt.htphh.........................................................t.shp...scahhlPVHPWQhcpllt....s.att.lspthl.........l.LG....tstp.ahsspShRTlh.............................sh.s............ps..........halKlslslp.Tus..hRslsspphtsusthschLpplhpp..-.hh.....tp.....thplhtEsuuhshtspp....................................phhppLuslhR-s.hth........tsp......ppshs..hAuLhtp.t..................................................................... 0 100 225 321 +3115 PF01419 Jacalin Jacalin-like lectin domain Bateman A anon Bateman A Domain Proteins containing this domain are lectins. It is found in 1 to 6 copies in these proteins. The domain is also found in the animal prostatic spermine-binding protein (Swiss:P15501). 21.30 21.30 21.40 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.81 0.71 -4.26 52 1166 2009-01-15 18:05:59 2003-04-07 12:59:11 12 54 224 165 665 1119 2 122.50 23 46.35 CHANGED Gs.WDDG...sa-uVcKlhVutupsuIs.........hlcF-YsKsst.h.spp+Gtps......htscpFplshs.sEalsuVcGhYcp.........tssllsuLpF+TNKppou.hhG.......sGpcFsLp..pGpKIVGFHGpuu.p...hlpulGsYhss ..........................................................................t.h.hp....hh.h..s.....hlt..........lp.h.pY....p.t...s.......t.........s..ttt...Gstss...............thpp.............hpl.s.hs..sEal.....splpGpast..................................t.shl.puLp.....Fp.....T.....sps......p.....h.....s..s....hhs..........psGs..tF.s..h...ss.........p.s.t.pl.....lGF..a.G+uu..t........hlculGsah............................. 0 227 342 470 +3116 PF02375 JmjN jmjN; jmjN domain Bateman A anon [1] Family \N 26.00 26.00 26.40 27.40 25.30 25.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.36 0.72 -4.42 65 1103 2009-01-15 18:05:59 2003-04-07 12:59:11 12 65 271 51 660 1080 7 34.20 45 3.06 CHANGED PlFcPThEEFpD.hpYlpp..Icp....hut..chGlsKllP .PlFcPThEEFc.DshtY....Isp..Ics.....ut..+sGIsKllP..... 0 166 312 494 +3117 PF03957 Jun JNK; Jun-like transcription factor Finn RD anon DOMO:DM01956; Family \N 20.90 20.90 24.10 22.40 19.10 19.10 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -12.06 0.70 -4.64 8 302 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 169 1 108 298 0 171.70 38 70.40 CHANGED METPFYtDDuLSuh....tuuua............s.........u.ss.....shlhK.pohsLNLo-s.uuuLK.Pstt......tstu..ststsuGL.LsSPDlGh.LKLASPELERLIIQS.NGLlTTTPT.PuQFLYPK..........s..lTsEQpsFAEGFVKAL-DLHKQNQ.Lsus..ssuutuASu..hsuP...hus.Au.uslhusuhtsE.PPVYANLSoasPss..s...usuasusohuauA.Ph.........sP.h....ss..PR.......hsALK-EPQTVP-sPS.G-SPP ...............................................................................................................................................................................................................................................ss-.....h.t..hplsos-LE+h..I........l.....s...ss.h.s....oP.s.ss....p....hhhs+....................s..lTcEQEGFA-GFV+ALs-LH..p..pNp...hsss......ss.u..ssss...........h.s......sss...Au.....s...ssh..su..s..hts-..sPV.Yss.....Lssassss......o..........sss.h.s.ss..s.hsas.....................s.h......................ssthP+..................hpuLK.EE.PQTVP-h...st............................................................................. 0 22 33 66 +3118 PF01486 K-box K-box region Bashton M, Bateman A anon Pfam-B_25 (release 4.0) Family The K-box region is commonly found associated with SRF-type transcription factors see Pfam:PF00319. The K-box is a possible coiled-coil structure [2]. Possible role in multimer formation [1]. 23.50 23.50 23.50 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.16 0.72 -4.21 68 4553 2009-01-15 18:05:59 2003-04-07 12:59:11 12 15 1081 0 451 4266 0 93.00 28 47.14 CHANGED asptssss.....htpsphpphppEhsKL+pplctLQ.....ps.RphhGEsLssLshKELppLEppL-pulppIRu+Kscllhsplcphp+KEcplpppNptLppKlt- .......................................................................ptp..pph...ppE.ht...+L+p.p.cplp.......pp.....R.p.h.h...G..E....-..L..s..s..L.s.h.cELp.pLEppL-p....uLpplRs+K............tp....lh.h.pplpphp+...K.....-.....pp.l.pctNp.Lppph....................................... 0 56 254 350 +3119 PF02960 K1 K1 glycoprotein Bateman A anon Pfam-B_345 (release 6.4) Family \N 21.20 21.20 21.50 23.30 20.60 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.83 0.71 -4.10 4 847 2012-10-03 02:52:13 2003-04-07 12:59:11 9 4 3 0 0 751 0 99.90 76 46.82 CHANGED GLSSRLSNRICFWApCANITPETsTVSVSSTTGFK.............h.pTNtLlpIIPATTHAlVVVEEVKSppPaIpV.FLVFMTLVALIGTMCGILGTIIFAHCQKQSDSNKTV.QQLRDYYSLHDFpTEDYTQPVDWY ...GLSSRLSNRICFWApChNITPETaTVSVSSTTGF+TFSTNuLlp..II.ATTHsVVVVcEsKSTNsHIpVPFLVFMTLVALIGTMCGI.L................................................... 0 0 0 0 +3120 PF02149 KA1 Kinase associated domain 1 Mian N, Bateman A anon IPR001772 Domain \N 20.80 20.80 21.10 21.40 20.50 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -8.02 0.72 -4.51 26 802 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 272 3 433 777 1 46.20 40 6.17 CHANGED tsssl+FElplsKl......shhGlch+RluGssahapclssplLppL+L ......s.tshlpaEhEVCKLP.p..........huLsGl+FKRl.u.GsuhsaKslsscIhs-L+L........ 0 138 206 321 +3121 PF02524 KID KID_repeat; KID repeat Bashton M, Bateman A anon Pfam-B_1382 (release 5.4) Repeat This is family contains the KID repeat as found in Borrelia spirochete RepA / Rep+ proteins. The function of these proteins is unknown. RepA and related Borrelia proteins have been suggested to play an important genus-wide role in the biology of the Borrelia [1]. 20.80 15.10 21.20 15.10 20.10 15.00 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.75 0.77 -5.93 0.77 -2.78 28 617 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 25 0 37 464 0 11.00 84 15.55 CHANGED KIDsVEpsLpt KIDsVEKNLpp. 0 37 37 37 +3122 PF00050 Kazal_1 kazal; Kazal-type serine protease inhibitor domain Eddy SR anon Prosite Domain Usually indicative of serine protease inhibitors. However, kazal-like domains are also seen in the extracellular part of agrins, which are not known to be protease inhibitors. Kazal domains often occur in tandem arrays. Small alpha+beta fold containing three disulphides. Alignment also includes a single domain from transporters in the OATP/PGT family Swiss:P46721. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.05 0.72 -3.94 51 1792 2012-10-02 00:52:43 2003-04-07 12:59:11 16 207 356 88 938 4387 283 52.00 31 19.67 CHANGED Csph.............Cs...tphpPVCGoDGhTYsN-Ctl....C.....ppppslplp+pGtC .......................................................................................Cs.......tp.h.pP...VC....G..o.......D...u....p.....T...Y...sN.c.Cth...................C......................pp....t....t..p...l..p...l..t..p..G.C.................................... 0 276 378 621 +3123 PF03522 KCl_Cotrans_1 K-Cl Co-transporter type 1 (KCC1) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 28.00 26.80 24.50 23.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.14 0.72 -4.44 3 128 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 40 0 48 150 0 29.80 66 2.91 CHANGED VKDRNShLRLTSlGSDEDEETEAusEKVHM .VKDRpShL.RLpSlhSDE-.-Eo.tsht...-KlpM.... 0 2 5 18 +3124 PF03520 KCNQ_channel KCNQ1_channel; KCNQ voltage-gated potassium channel Griffiths-Jones SR anon PRINTS Family This family matches to the C-terminal tail of KCNQ type potassium channels. 25.00 25.00 25.20 25.20 24.50 24.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.43 0.71 -5.00 9 568 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 136 9 208 521 1 163.00 42 29.81 CHANGED R+Ssusslss....hsuSPoKs.p....ShuFscRo+hh.shh...........ss.usuhD.lt-EcthpC.hphppLpsshKssIRslRhhKaaVAKRKFKEsL+PYDVKDVIEQYSAGHLDhLsRIKpLQtRlDQIlGKsshhsccp.+sK............shtphShhuRVsKVE+QV...tslEpKLDlLlsh...........appphttssssshshsssphP ....................................................................................................p.s......ttSPsKs.K....uh.uhssRsR.h.p.uhthps.t......................ttt.h.s..-...h...-.-..c.sht..s.-...h..hp...-lhs...s...lK.ssIRul.R..hh+FhVuKRKFKEs...LRPYDV+DVIEQYSAGHLDMLsRIKpLQs.R.............lDQIlG+s......ss+.c.+.p.K.........................................c.p.ShhuRlsKVE+.QV...tsh-pKLD..hLlsh............h.phht..s........................................................ 0 34 52 115 +3125 PF03812 KdgT 2-keto-3-deoxygluconate permease TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 25.30 25.10 19.80 22.00 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.09 0.70 -5.27 3 900 2012-10-02 17:06:44 2003-04-07 12:59:11 8 5 674 0 114 494 12 292.30 53 95.17 CHANGED MKIKpoIEKIPGGMMLVPLFLGALCNTFoPGAGKYFGSFTNGLITGTlPILAVWFFCMGASI-F+ATGTlLRKSGTLVVTKIAsAWVVAlIAupFIP-DGIQsGFFAGLSVLALVAAMDMTNGGLYAALMNQYGoKEEAGAFVLMSLESGPLMTMVILGsoGIAoFEPclFVGAVLPFLlGFALGNLDPDLR-LFSKuVQTLIPFFAFALGNTINLoVIlQTGLLGIFLGVuVIIVTGIPLILADKFIGGGDGTAGVAASSSAGAAVATPlLIAEMsPuFAPVApuATALVATSVIVTSlLVPIlTulWuKKlK ....................MpIK+slE+lPGGMMlVPLhlGALhpTFuPtA...hc......hG..uFTs.uh.h..s..G..s..s..P.IL.ulahhCMGAsIplpAs.spsL+.KuG.oLs.loKlulAhlluhlsu+lhstcG.l......hGLSsLAllAAMs.oNGGLYAulhtpYGsc...............c-u...GAhslhSLpsGPhhTMl...hL...GsA.GlA..s..h..hshVusllPhllGhhLGNLDs-hR-Fhscus.sLIPFFAFALGssIsLshlhp.sGLhGIL.LGlhslhlsGh..h.IhAD+.L...l....u...G.....G.s..G.sA..Gh..AA..SSoAGsAVATPshIAphsP.u.F.p..s.hAsuAT..ulVAsuVIlTuILsPlLTuhhu++................ 0 29 52 90 +3126 PF03814 KdpA Potassium-transporting ATPase A subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.30 20.30 22.30 22.20 20.20 20.20 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.82 0.70 -6.50 14 2239 2012-10-03 11:11:44 2003-04-07 12:59:11 10 6 1897 0 441 1655 66 488.70 47 98.06 CHANGED IhlhlsllsslshsLGtYlt+Vatsp+.............shhs.lhsslEphlY+lhGlsPcp-MsW+pYhhAlLshNlhhhlllahllhhQusLPhNPsshsuhs.cLAFNTAlSFlTNTNhQsYuGEsohSYhSQMhulTh..FsSAATGlAVuhAhlRGlstppsstLGNFasDLlRshhRlLLPluhllAllLlhtGlPQTLtu..hsssTLpGuhQpIshGPVAS.EAIK.LGTNGGGFFsANSAHPFENPsshoNhlchluhhllPsAhhasFGchltsp.....+QuhslhsshhllaluhsslshhuEhtGN..PhlpsLGlp........suNMEGKEsRFGhutSuLaAlsTTusosGuVNuMHDShTPlGGhlsLhtMhLspl.hGGhGsGlhshlhallLsVFluGLMVGRTPEaLGKKIEu+ElKhssLslLlpPlhlLl.oAlAhslPsuhuuhsNPG.HGhSElLYtaoSAuANNGSuFuGLssNT.aaNlohGlsMLlGRalsIlshLAIAGSLAsKpssPtosGTlsTDssLFsuLLlusllIlGuLTFhPsLALGPIu...EtLs ...................................................................hhhlhllhlluhPLGtalt+.lhpsp...................h...h.h.s.lEphla....+lhGlp....s....t....tcMsW+pYhhulLshNhhshhhlahlhhhQthLPL.N.Ppt................h.s.u.h.s...cLAhNTAlSFlTNTNhQtYoGE..sslSYhoQMhGlsh.pFlSAAoGlAVhhAllRu.h..............s...................t.......p.............p................h........p.......................s...................lGNaWsDlsRhslhlLlPluhllAlhhltQGs.Qshp..................s.h.t.................l.......p.........T.....l............p.................G............s..........p...Q.........h.....l.....shGPVAS.EAIK.LGTNGGGFFsuNSuHPFENPoshoNhlphhulhLIPsALsasFGchstcc.............................RQ....GhhlhhuMhhlFlhsls.lsh.huE.hpGN..Phl.t.sh.G.htt.............................usNM.EGKEsRFGlh.............h..............SuLFu.ssTTusSsGuVNuMHDShTsLGGhlshh.M.lspV.FGGVGsGLhshllaslLuVFIuGLMlGRTPEYLGKKI-s+EMKhs.sLslLlpP.hllLhhoAlA....h.........h....h......s..........u.......u........t...s.....u............h............hN........P.....G...............HGhSElLYthoSAAsNNGSuFuGL.s......s........N......T................s................F.............aNhhhuhsMhlGRFhsIlshlAlAGSL..........ssK.....+..h........s.t.o..s..GTlsTcsshFsslLlsslllluALTFhPsLuLGPlAEaL............................ 0 126 260 357 +3127 PF02669 KdpC K+-transporting ATPase, c chain Bashton M, Bateman A anon COG2156 Family This family consists of K+-transporting ATPase, c chain, KdpC. KdpC forms strong interactions with the KdpA subunit, serving to assemble and stabilise the Kdp complex [1]. It has been suggested that KdpC could be one of the connecting links between the energy providing subunit KdpB and the K+-transporting subunit KdpA [1]. The K+ transport system actively transports K+ ions via ATP hydrolysis. 19.90 19.90 20.10 20.10 19.50 19.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.13 0.71 -4.60 3 2067 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 1913 0 393 1342 41 180.70 42 95.45 CHANGED hRsLlPALoohllLhlITGlVYPLlVTVlGQLaFPaQANGSLIc-u.GQVlGSALIGQsFTusGYFHuRPSAsu-u.....YssuASGGSNLAPSNP-LLutIAARVsAhRtEN..sAuspVPVDLVTuSGSGLDPsISPsAAphQhPRVAKARNISlcQLppLIsKHT-uRhLGalGEPuVNVLcLNLALD+L .......................................h...lpsulthhlhh....hl.l.s....Ghl..YPLlhTuluQ.h.h.FstQ.ANGS..Ll.p......ps.....s.......p..............l..l..GStLIG.....Qs.....F.........T...............s......s.......t.........YFaGR.....P.....S.....A.ss.t.................Ysst.......u...S.GGS.Nhus.......o.......N.............P..............cLtpplp........t.+.......l.s...t.h.............pt.......tN............s.s.....s.....s.....V.....P..sDL.VTuSuSGLDPcIoPpuAthQlsRVAc..A......R........s..........l.sh...pp...lppL...l...scpopt..h.huh...lG.ps.hVNVLcLNlALDp............................................................... 1 117 235 323 +3128 PF02702 KdpD Osmosensitive K+ channel His kinase sensor domain Bashton M, Bateman A anon COGs Family This is a family of KdpD sensor kinase proteins that regulate the kdpFABC operon responsible for potassium transport [1]. The aligned region corresponds to the N-terminal cytoplasmic part of the protein which may be the sensor domain responsible for sensing turgor pressure [2]. 26.10 26.10 26.20 28.40 26.00 25.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.12 0.70 -5.20 21 2010 2009-09-10 15:55:13 2003-04-07 12:59:11 12 29 1860 2 394 1489 34 208.40 55 27.21 CHANGED scRG+L+IalGhAsGVGKTYsMLp-A+chhcpGhDVVlGhVETHGRs-TtshlcGLthlPh+plpY+GhtLpEhDlDAlL.tR+PpllLVDELAHTNssGSR+cKRaQDlEELLsAGIDVaTTlNlQHlESLNDlVppITGlpVRETVPDpllcpAD-lhllDlsP--LhcRLppGKlYts-pscpAlsNFFphsNLsALRELALRcsADcV .......pRG+LKlahGAusGVGKTaAMLscApcht.tpGlDlVl.GlVETHGRt-..TsAhl..-G...LthlPh+pl...pY+...G+plpEhDLDAsL....tR.........+.........P.s.llLlDELAHo.....Ns.....PG...S......R...H............KRWQDlEELLcAGIDVaTTVNlQHLESLNDlVp..tlTGlpV+..ET.VPD.hhcp.AD-l.LVDlsP--LhpRLpcGKVYhs.c........p.s.-pA.l.......psFFphuNLhALRELALRcsAD+V.............. 0 120 245 325 +3129 PF04962 KduI KduI/IolB family Bateman A anon COG3717 & Pfam-B_11840 (release 10.0) Domain This family includes the 5-keto 4-deoxyuronate isomerase enzyme EC:5.3.1.17 that is involved in pectin degradation. This family aldo includes bacterial Myo-inositol catabolism (IolB) proteins. The Bacillus subtilis inositol operon (iolABCDEFGHIJ) is involved in myo-inositol catabolism. Glucose repression of the iol operon induced by inositol is exerted through catabolite repression mediated by CcpA and the iol induction system mediated by IolR [2]. The exact function of IolB is unknown. Members of this family possess a Cupin like structure. 20.10 20.10 20.10 20.60 20.00 19.80 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.73 0.70 -5.31 72 2006 2012-10-10 13:59:34 2003-04-07 12:59:11 7 5 1578 16 396 1311 510 252.20 30 91.15 CHANGED LLs+s.t...psGp...lhploPcsA.....sWpalGFplhcLtsGpshphp.....ssspEhslVll...sGpssVss....ts...patplGsR.toVF-.......tsPtulYlPsspphplsA.tss...sclAl.ssAPupssh..ss+.hlsPsslshptRGpGssp....RhVpsIhspsps....AcsLLVsEVh.TPuGsWSSYPPHKHDp-s.s....tEohLEEsYYa+lsPsp..........GFuhQRVYo....-.....DcslD-shsVpstDVVhVP+GYH.PsssssGY-hY.YLNVMAGPp..RtWthps-PsHpWl ................................................................hh.hhh..........t.....hhph.sp.h.....shthh.s.h.p...h...h.h.h..t.sGcshp.h.......hpcRElsllsl.......uGsuslps.........-G...p.hhclGpR................-ulYls....pus.c.........s....lhu....tss.........A+...hh.l.ssAPA....+..p.sa......Ps..+.....hl.s.......ss...........-............s....s........s..................p.....hG..c..s...tss..............Rplpphh.....ss......s.......sh......tsspL.hu.sh.sPGuhWsohPsHpH-cc..s......t.p..ppsa.aat.hpPpp..................hh..................t...sc..ph..s..lpNcp.sVls.P.ph.....sltuusGhcsY.alhsMsG.s...........p.h...hsD..cH.h.................. 0 122 254 318 +3130 PF02422 Keratin Keratin Bateman A anon Pfam-B_1920 (release 5.4) Family This family represents avian keratin proteins [1], found in feathers, scale and claw. 25.00 25.00 25.10 25.10 24.60 24.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.58 0.72 -4.03 4 299 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 20 0 238 304 0 100.70 56 90.20 CHANGED SChs..L.CtP..Cs...PTPLAsSCNEPCVRQC.DSoVVIQPssVVVThPGPILSSFPQsosVG.SouA.AV..GShLutp.GhshuSGGa.GLuGaGGpYsG.hst.s .......................................SC.s........CtP...Cs....PsPLAsS.CNEPCV.RQCQDSpVVIQPsPVVVTLPGPILSSFPQNTuVG.uST.SA...AV......GSh...L..Ssp....Gl........P..I.o..SG....G.F.....sl...u..shus..t.hs.G.hs............................................................................. 0 0 0 98 +3131 PF01500 Keratin_B2 Keratin, high sulfur B2 protein Bateman A anon Pfam-B_706 (release 4.0) Family High sulfur proteins are cysteine-rich proteins synthesised during the differentiation of hair matrix cells, and form hair fibres in association with hair keratin intermediate filaments [1]. This family has been divided up into four regions, with the second region containing 8 copies of a short repeat [1]. This family is also known as B2 or KAP1. 27.60 27.60 28.00 27.60 27.40 27.50 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.61 0.71 -12.53 0.71 -4.47 8 307 2012-10-03 03:07:01 2003-04-07 12:59:11 12 11 41 0 85 433 0 97.90 45 90.76 CHANGED M..ACCuTSFCGFPoCSTuGTCGushCQPsC.p....................................oSCCQPoCspTSCCQPhs..oSCCpPo......................CpP.shQTSCCQPTChQTSuCpTGCGIGGuhGYGQ.GSSGAVSoRhRWCRPDCRVEGTCLPPCCVVSCTsPoCCQLaaAQASCCRPSYCGQSCCRPsC.......CC.............CsEPoC ............sCt......s.hSo.u.CG........p..............soCCpsssspTosCp.P................................CppP.sC.hpsssht..p.us................................................................................................................................... 0 17 17 18 +3132 PF04579 Keratin_matx Keratin, high-sulphur matrix protein Waterfield DI, Finn RD anon Pfam-B_4676 (release 7.5) Family Family of Keratin, high-sulfur matrix proteins. The keratin products of mammalian epidermal derivatives such as wool and hair consist of microfibrils embedded in a rigid matrix of other proteins. The matrix proteins include the high-sulphur and high-tyrosine keratins, having molecular weights of 6-20 kDa, whereas microfibrils contain the larger, low-sulphur keratins (40-56 kDa) [1]. 25.00 25.00 32.40 94.70 22.70 22.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.43 0.72 -10.87 0.72 -3.71 4 69 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 27 0 36 73 0 96.40 74 96.39 CHANGED ACCAhhCCSVPTGPATTICSSDK.CRCGVCLPSTCPHsI.LLQPTCC.DNsPPPCphPpshVPTCFLLNSsHPTPGLpuINLTTYlQPGCEpPC.PRC ..........CCss+...sCSVPTGPATTICSSDKsCRCGVCLPSTCPHpl.LLpPTCC.DsCPPPCplPp.....shVPTCaLLNSspPTPGLpsINLTTasQP.sCE.....PC.PpC..... 0 3 3 5 +3133 PF03882 KicB KicB killing factor Bateman A anon COG3006 Family The kicA and kicB genes are found upstream of mukB. It has been suggested that the kicB gene encodes a killing factor and the kicA gene codes for a protein that suppresses the killing function of the kicB gene product [1]. It was also demonstrated that KicA and KicB can function as a post-segregational killing system, when the genes are transferred from the E. coli chromosome onto a plasmid [1]. 20.00 20.00 20.30 21.20 18.00 19.90 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.54 0.70 -5.86 3 769 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 745 10 73 342 4 428.70 78 99.75 CHANGED MSEFSQT.....IPELVoWAKcpDFSLNLPTERLuFLLAIAlLNNERLDGEMuEGELVDAFRHVocAFEQSoEAIAsRANNAINDLVKQRLLNRFTSEhTEGsAIYRLTPLGIGITDYYIRQREFSsLRLSlQLSIVADElpRAuDSAEEGG-....EaHWRRNVFAPLKYSVAEIFDSIDLoQRlMDEQQQSVKD-IA-LLNKDWRAAISSCEtLLSETSGTLRELQDTLpAAGDKLQAQLLRIQDCVhG+DDLaFIDcLloDLQSKLDRIISWGQQSIDLWIGYDRHVHKFIRTAIDMDKNRVFSQRLRQSlpcYFDcPWaLTYAsAERLlDLRDEEMsLRDEEVTGELPEELEYEELs-l+DQLAp+Ip-hLtVYKEpssPIDLuLVLREYLusYPRoRHFDVARIVVDQAVRLGhApuDaoGI.PDWQAINDaGAKVQAHVIDKY ......................................................MSEFSQT.....VPELVAWARKNDFSISLPsDRLuFLLAlATLNGERLDGEMSEGELVDAFRHVS-AFEQTSETIuVRANNAINDMVRQRLLNRFTSE.AEGNAIYRLTPLGIGITDYYIRQREFSTL.RLSMQLSIVAuELKR.AADAAEEG.GD....EFHWHRNVYAPLKYSVAEIFDSIDLTQRlMDEQQQQVKDDIApLLNKDWRAAISSCElLLSETSGTLRELQDTLEAAGDKLQANLLRIQDATMs.+.D.DL.aFVD+LVFDLQSKLDRIISWGQQSIDLWIGYDRHVHKFIRTAIDMDK.N.RVFAQRLRQSVQsYFDcPWALTYANADRLLDMRDEEM.uLRDEEVTGELP.DLEYEEF.NEIR.E.QLAAlIE-pLAlYKs+QsPLDLGLVlREYLuQYPR..ARHFDVARIVlDQAVRLGVApADFo.GL.PAcWQsINDYGAKVQAHVIDKY.................................................................................. 0 8 23 49 +3134 PF04383 KilA-N KilA-N domain Aravind L, Iyer LM, Bateman A anon Iyer LM Domain The amino-terminal module of the D6R/N1R proteins defines a novel, conserved DNA-binding domain (the KilA-N domain) that is found in a wide range of proteins of large bacterial and eukaryotic DNA viruses. The KilA-N domain family also includes the previously defined APSES domain. The KilA-N and APSES domains may also share a common fold with the nucleic acid-binding modules of the LAGLIDADG nucleases and the amino-terminal domains of the tRNA endonuclease [1]. 21.00 19.70 21.00 19.70 20.90 19.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.46 0.72 -4.44 139 2605 2009-01-15 18:05:59 2003-04-07 12:59:11 8 39 812 3 1531 2312 29 103.00 27 37.07 CHANGED hphsphplll.pp.p.ss...alNhTcltchs.....t...Kc......hppWh+pcpopcllpphppt.s...........................................h...............................h...tttpttp.........hp.G...sY.lH.clshplA.pW.ISs.paslhl.chlpphhpt .................................................h.l..cp.p..ss...hhphpslhpst......t.pp.+p.................pcWhcsppTpEllpphppth.ht.......................................................................................................................h.h.s.ptthsst.........hp.G...hY.lHc.Ll.shA.hW..hSP.cathhlhchhDplpp.................................................................. 2 1330 1420 1504 +3135 PF02172 KIX KIX domain Bateman A anon Pfam-B_4149 (Release 4.2) Domain CBP and P300 bind to the CREB via a domain known as KIX [1]. The KIX domain of CBP also binds to transactivation domains of other nuclear factors including Myb and Jun. 21.20 21.20 23.40 23.10 21.10 20.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.58 0.72 -3.93 6 242 2009-09-11 11:24:58 2003-04-07 12:59:11 11 28 90 6 142 201 0 78.30 70 4.11 CHANGED shcKsW+cplTpDLRsHLVcKLVpAIFPsPDssAhpDpRMcsLluYA+KVEt-MaEpApsR-EYYHLLAEKIYKIQKELcE ............G.h+KsWHEclTQDLRNHLVHK....L........VQAIF.PT...P..DPAAL.K..D+RMENL..VAYARKVEGDMYEoANoR....sEYYHLLAEKIYKI.QKELEE.......... 1 45 57 101 +3136 PF03037 KMP11 Kinetoplastid membrane protein 11 Griffiths-Jones SR anon Pfam-B_1062 (release 6.4) Family Kinetoplastid membrane protein 11 is a major cell surface glycoprotein of the parasite Leishmania donovani. 25.40 25.40 25.40 139.80 24.70 25.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.14 0.72 -3.40 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 19 0 4 44 0 88.60 90 97.89 CHANGED MATTYEEFuAKLDRLDsEFsKKMpEQNtKFFADKPDESTLSPEMKEHYEKFEKMIQEHTDKFNKKM+EHSEHFKtKFAELLEQQKNAQaP MATTYEEFuAKLDRLD-EFNKKMQEQNAKFFADKPDESTLSPEMKEHYEKFERMI+EHT-KFNKKMHEHSEHFKpKFAELLEQQKAAQaP.. 0 1 2 4 +3137 PF03790 KNOX1 KNOX1 domain Finn RD anon Pfam-B_533 (release 7.0) Family The MEINOX region is comprised of two domains, KNOX1 and KNOX2. KNOX1 plays a role in suppressing target gene expression. KNOX2, essential for function, is thought to be necessary for homo-dimerisation [1]. 20.50 20.50 20.80 20.60 19.30 19.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.17 0.72 -4.63 30 1800 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 158 0 141 642 2 43.00 58 16.60 CHANGED sslKAKIhuHPpYspLLsAYlsCpKVGAPPElsshL-tlppctcs ......ulKuKIluHPpYPpLLuAYlDCQK.....lGAPPE.VVucLDtlopEhp.................... 0 14 85 119 +3138 PF03791 KNOX2 KNOX2 domain Finn RD anon Pfam-B_533 (release 7.0) Family The MEINOX region is comprised of two domains, KNOX1 and KNOX2. KNOX1 plays a role in suppressing target gene expression. KNOX2, essential for function, is thought to be necessary for homo-dimerisation [1]. 20.80 20.80 21.00 21.10 20.40 19.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.67 0.72 -4.69 20 1804 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 164 0 135 669 0 34.80 65 13.37 CHANGED sssslGsDPELDpFMEsYCclLsKY+-ELo+Pac....EAhsFLpcIEsQLssLst ......th...sIGhDPE.LDQFM..EAYC...EMLhKY+ppL.............................. 1 13 84 115 +3139 PF00051 Kringle kringle; Kringle domain Sonnhammer ELL anon Swissprot_feature_table Domain Kringle domains have been found in plasminogen, hepatocyte growth factors, prothrombin, and apolipoprotein A. Structure is disulfide-rich, nearly all-beta. 26.30 26.30 26.70 26.30 25.60 26.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.28 0.72 -3.88 24 2697 2009-01-15 18:05:59 2003-04-07 12:59:11 13 237 179 118 1376 2545 55 76.90 39 21.44 CHANGED ChpGsGpsYRGshupTtSGhsCQtWsuphs+phsh.sspp.sstsLtt...NYCRNPDG-tc.PWCYT.pssphpaEaC.slspC ......................................................Ch.p.s.s.G..psYRGsh.s..h....T....t...oG....h.....p..CQtW.s..u....p.h..P..H....p.......a......p...h........p....s...p..p...h..s..........s..tsL.pc.................NY..C............R....N....P.....D....u.......c........t....p....P..WCYT....s.....s...s...p......h......p....a...-a.C.slstC..................................... 0 528 604 870 +3140 PF00197 Kunitz_legume Trypsin and protease inhibitor Finn RD anon Prosite Domain \N 20.70 20.70 21.30 21.10 20.10 20.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.58 0.71 -4.72 17 766 2012-10-02 19:42:32 2003-04-07 12:59:11 13 4 124 55 109 793 0 164.40 31 83.53 CHANGED sVlDs-Gc.lpsGGsYYllsthhutGGG....hphstsup..CPLsVVposs-ls...pGhPlphss.h..tssh.ssh...ohlplpFsshs.phsss.....hWpVscpptt.t..hVphuthcs...sp..s.hFKlc+sst.........YKLlaCstt...........ptCpslGlphD.tcshpRLllopp.sPlslhFpK ........................VlDspGct.lp..s.G..s.pYh.Ihs..shhs..t..uGs..........lth.s.s..ss..t.hC.P....s..Vlppst...p..s...pGh.P..VpFos......hpspsslIp.so.lslp.F.s..ss..o.s...ph.sss..............hW+.ltpp.............s...........s..s...........t.....sh...hVsTGGstu..............tshFKIpKhss...s.............YpLs.aCP.s...........ps.C.hslGhh.hp.....t.phLs.hsp....sh.hhF...................................... 0 7 91 97 +3142 PF02442 L1R_F9L L1L_F9_C19; Lipid membrane protein of large eukaryotic DNA viruses Mian N, Bateman A, Coggill P anon Pfam-B_1868 (release 5.4), Iyer L Family The four families of large eukaryotic DNA viruses, Poxviridae, Asfarviridae, Iridoviridae, and Phycodnaviridae, referred to collectively as nucleocytoplasmic large DNA viruses or NCLDV, have all been shown to have a lipid membrane, in spite of the major differences in virion structure. The paralogous genes L1R and F9L encode membrane proteins that have a conserved domain architecture, with a single, C-terminal transmembrane helix, and an N-terminal, multiple-disulfide-bonded domain. The conservation of the myristoylated, disulfide-bonded protein L1R/F9L in most of the NCLDV correlates with the conservation of the thiol-disulfide oxidoreductase E10R which, in vaccinia virus, is required for the formation of disulfide bonds in L1R and F9L [2]. 21.90 21.90 22.80 65.90 21.10 21.80 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.42 0.71 -4.84 32 163 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 82 6 0 124 2 198.30 32 74.57 CHANGED Musus..slp........slhsthsp+hhppLspp.sssssssC..slcIGplpsch.psCslpltNhC.sssphp...hshllpuhp-.shssLspcp+ttl............Atplshsl.ssssp..hpsplcppC.pupuslsspIsl...pslplspC.usssph.hphphlNoGoutuNCuhpulhsshsppss.....stltpp.pstspsh...hhlhhsllllhlhhlhhh ...........Musst..shp........ThhNhh.-+hhpcLtQp.sssssssC..sIcIGplhhch.+sCslplpNhC.usushp...hshllpuhpE.shssLspcp+shl............AtpIthsl.ssssp...h.splcpsC.supAslsNhlcI...Qsl.ls-CtussGph.hplphlNoGoupuNCulpslhpshsKtss.......Itpp.phshtsh...hhllhlllllllhslhhh...... 0 0 0 0 +3143 PF05047 L51_S25_CI-B8 Mitochondrial ribosomal protein L51 / S25 / CI-B8 domain Wood V, Bateman A, Finn RD anon Pfam-B_9461 (release 7.6) Domain The proteins in this family are located in the mitochondrion. The family includes ribosomal protein L51, and S25. This family also includes mitochondrial NADH-ubiquinone oxidoreductase B8 subunit (CI-B8) EC:1.6.5.3. It is not known whether all members of this family form part of the NADH-ubiquinone oxidoreductase and whether they are also all ribosomal proteins. 21.00 21.00 21.20 21.20 20.90 20.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.29 0.72 -4.23 67 788 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 307 1 534 748 8 54.90 25 38.29 CHANGED sRpFl.ppphsshp.tpNPpl.lhlpc...pss.p....PhlhucYtsG.............................pccplslcshss ............sRpFl.cpplsp....h....p.ppNPpl.lhlpc.....psshp........PtlhucY.t.pG........................................ppctlslcshp................................................................................ 0 164 279 432 +3144 PF04604 L_biotic_typeA Type-A lantibiotic Waterfield DI, Finn RD anon Pfam-B_4608 (release 7.5) Family Lantibiotics are antibiotic peptides distinguished by the presence of the rare thioether amino acids lanthionine and/or methyl-lanthionine. They are produced by Gram-positive bacteria as gene-encoded precursor peptides and undergo post-translational modification to generate the mature peptide. Based on their structural and functional features lantibiotics are currently divided into two major groups: the flexible amphiphilic type-A and the rather rigid and globular type-B. Type-A lantibiotics act primarily by pore formation in the bacterial membrane by a mechanism involving the interaction with specific docking molecules such as the membrane precursor lipid II [1]. 21.70 21.70 22.00 21.80 21.40 21.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.52 0.72 -4.32 15 102 2012-10-02 23:56:30 2003-04-07 12:59:11 8 1 70 0 6 64 0 48.00 40 91.69 CHANGED Mcppp.......-shsuLpEVo.cELDplLGG...GsGVlpTISHEC+h.NSaQalFTCC .........................Mcppp.......thhsulpEVS-cEL-pllGGt..GsGhhtTIoc-C........lhsCC.......... 0 0 1 3 +3145 PF02502 LacAB_rpiB Ribose/Galactose Isomerase Main N, Bateman A anon Pfam-B_1105 (release 5.4) Family This family of proteins contains the sugar isomerase enzymes ribose 5-phosphate isomerase B (rpiB), galactose isomerase subunit A (LacA) and galactose isomerase subunit B (LacB). 24.40 24.40 24.40 24.70 24.00 24.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.55 0.71 -4.53 170 5002 2009-09-12 00:30:27 2003-04-07 12:59:11 13 13 3211 88 900 2738 1726 140.00 34 86.39 CHANGED +I.uluuDHuGhcLKptlhpaLppp...s..h.-VhDhGstst.-...ssDYP-huhtlAptVs......ss.c....s-..........h.GIllCGTGlGhuIuANKltGlRAAlspDshoAchuRpHNsANlLsl........GuRllG.tlAppllcs.......aLsscFc....GGRHtp..Rlspl....s ...............................................+IslGs.DcuuhphK....phlhpaLcpp......G..a..-Vh.Dh.Gst.st.p..............pscYPphuhtlAptVs.........ss.p.........s-..............h..GIllCGTGlGhshuANKl.GlRuAlsp..D...hoAhhs+pcNsuNllsh........Gu+ll..G..t.t...lAppIlcs.........alss.cac.....ssRppp+lstl..................................... 0 348 631 790 +3146 PF00356 LacI lacI; Bacterial regulatory proteins, lacI family Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.07 0.72 -4.34 26 29289 2012-10-04 14:01:12 2003-04-07 12:59:11 16 36 3489 72 5798 19321 1208 45.90 41 13.66 CHANGED Tl+DVA+hAGVShuTVSRVlNss...t.tVSppT+c+VhpAhcELsYtPN ...............TlpDlAchAGVShuTVSR.VlNsp............s..pV..o..p..p.T+c+VhpshccLsYpPN........................ 0 1667 3424 4636 +3147 PF02450 LCAT LACT; Lecithin:cholesterol acyltransferase Mian N, Bateman A anon Pfam-B_2099 (release 5.4) Family Lecithin:cholesterol acyltransferase (LCAT) is involved in extracellular metabolism of plasma lipoproteins, including cholesterol. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.45 0.70 -5.50 7 1122 2012-10-03 11:45:05 2003-04-07 12:59:11 10 21 550 0 521 1745 75 256.00 21 58.91 CHANGED hhplWLsls........hFh.hshssWh..hphVhspsst..hh..tPtsplcs....sGFs..ts.uhEhLD.shlsGhh...appllpsLsshGYctsphltAu.YDWRluss...cpscYappLKthIEphhphp.sppVhLluHSMGs.lhhYFLh...tph..........hspaIcuFIsluuPhhGSs+sltslhSG.......spshshl.s.sht........p.pRhhsspsa.....hhPps......hsh...--cl..ssssh................Nhshtsh.pahtslsht.h..........hh.app........................hssLpssLs.sPthclYClYGsshPT.csYhah..............sthhshts.ss..........hhhs-GDsTVshhuhs...........hCppWhstps...........hphsH....hhsl+s.......spHlsllhps .......................................................................................h...............................................hp..t..............ts.th.............t.t........sh.....h...s......h............................................ht.......hlpt.L.h.t.......G.Y.......s...p....t..h...h.us....YD..W.Rhs.t......................t..p....h.h....p...p....L..t.th.l.............E.........p....h.....h........t.......h..........s.........p..................lh..lluHShGsh.h......h..h.......h.a.h...p..................................h...p...ph...l....t...th..l.lu.s...s.....h.h.G.s....t..s.....h........h.h........G......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t........................................................................................................................................................................................................................................ 1 192 317 431 +3148 PF04369 Lactococcin Lactococcin-like family Finn RD anon DOMO_DM04110 Family Family of bacteriocins from lactic acid bacteria. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.80 0.72 -4.05 3 23 2012-10-02 23:56:30 2003-04-07 12:59:11 8 1 16 0 4 19 0 51.00 27 76.43 CHANGED MKNQLNFNIVSDEELSElNGGpLpal.SsGshsWYpsTsTuKTlsQQTpssauAshsshs .............p.pap.lS-pELuplsGGshshhQs.sushshh.pt.sthhh.tQs.ssh.hth.s......................... 0 0 0 1 +3149 PF01306 LacY_symp LacY proton/sugar symporter Finn RD, Bateman A anon Prosite Family This family is closely related to the sugar transporter family. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.52 0.70 -5.92 4 1275 2012-10-03 03:33:39 2003-04-07 12:59:11 14 7 574 10 131 2345 309 380.70 39 96.84 CHANGED hhsh+NpsFahFuhFFFFYFFI.MusaFPFaPlWLp-VstLoKT-TGIlFSClSLFuIlFQPlaGllSDKLGL+KpLlWsIohlLVhFAPFFIYVFuPLLQhNIhsGullGGlalGhlasAGuGAlEAYIE+sSRsspFEYG+ARhaGClGWALCAohAGIhFoIsPplsFWluSGhAllLhlLLhhuKs-sspoAhVhDAlGANppsFSh+hshpLF+..phWhFllYlVGVussYDVFDQQ.FAsFFpuFFsosptGT+lFGalTThGElLNAlIMFhsPhIlNRIGuKNALLlAGsIMolRIlGsuausoshEVlILKhLHhhElPFLlVGsFKYIousF-sRLSATlaLIGFphuKQLuhllLSshsG+hYDphGFQssYhlLGhIslohTlISlFTLSust..hlh.sst ............................................................h........h....h.ul.h...a..F..h.Y.FFh....h...u.h.a...h...P...F...a....s.....l...W..L.....+.....s.......h..G...L..o..s...s....s.....h.....G....lla....us.h.tL...hu.lh.h...t.s..l.h..G..lluD....+......L...G.h....+..K..h.....L.....l.....h......h...l......s....s......l...h..l..L...h.u......P.....F.....h....l...a....l.....a.....u.....P.....L.........L...p....h....N....h....h......h...G...s..l..l..G...u...l....a...h...u...h.....s...F..s.u..G.....s...s...h.h...-....A....a..h......-.+...h....u......R...p........s.......F....E......Y....G..+..s....R...h.....a...G.....S.l.G....a..A.l..s......u.....s.........l.....s..G.......h.........h.......a......o.....l........s...s.........p.........h.........s..F..W.....l...s..S...s.....................h...u...l...l.h.h..l...L...h..h...h...h.....+...s.....s..................p..s.........h...h.....p...s.......G....A.s....+...p....s..........h....o....h...t.....s.h......t.L.h..+.......p...h....W....h...h..l.....l...a...Vl....G...l..p.......s...s....Y.s.l....aDQQ...F...s.s.F.a.s.sa....F..t.....o...s....p.......G.....s...p...s.a....G.....Y....l..s....oh..G.l.h.E.uh.l.M.h.h....u.P..hl...ls.....R..l.G.....u.+....s..sLL.l..u.uh..l.Ms.....lR..I...h..G...s.u.h....s...T...s...s...h......l...l...l.l...K...h....L..Hs..h.....E.h....P.l..h..ll.u...s....F..K.YI..s....u.p.F.......-...s...+........l..S.u........T....l......a.h....Vs....a...s.h..u.p...p.l...uh...h...l...hSshA.....G......h...h....Y...........-.......p.........l.....G....ap..s...s....a...hl..h....u....h.l..s.L.h....h..hl.h.uhFh...Lpt.t........s................................................................................................................................................................................................. 0 19 50 90 +3150 PF00961 LAGLIDADG_1 Intron_maturase; LAGLIDADG endonuclease Bateman A anon Sarah Teichmann Domain \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.38 0.72 -3.46 91 1228 2012-10-03 01:41:40 2003-04-07 12:59:11 14 21 340 88 258 1340 92 96.80 21 50.58 CHANGED lsGFhDu-GsFplshpppp...................ht.....................h.hpFplshc......c-htlLptIpphhs....................hGplphppss...shpaplss...p.hphllshFspYs..lhopKhhcahpapchh ....................lsGFh-....u...-GsFtlshpppp...................................................................................................phpI...s.p..............pchtlLphlpphhu....................................hGpl..th.....p....pspp...........hhpapl...ps...ppththlls..hh.....s...p.h......hho.p.K.h.h.pahhahp............................. 0 77 194 231 +3151 PF02264 LamB LamB porin Bateman A, Mian N anon Pfam-B_4810 (release 5.2) Domain Maltoporin (LamB protein) forms a trimeric structure which facilitates the diffusion of maltodextrins across the outer membrane of Gram-negative bacteria. The membrane channel is formed by an antiparallel beta-barrel [1]. 25.10 25.10 25.10 25.70 24.10 24.70 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.45 0.70 -5.38 6 1601 2012-10-03 17:14:36 2003-04-07 12:59:11 10 3 781 33 201 906 30 388.90 35 86.13 CHANGED FHGYhRSGlGhousGutppshps.stt.....uhGRLGNEs-TYsElpLupElap-ssKoahh-oMVAhus..stpNDWE.......................sus.shRQhN...VQuKsLlsahPtuTlWAGKRaYQR.+DlHhlDFYahslSGs.GuGIENlclGsGKlSlAhlRp...stshT.ssss......phssslhDlRLAslthhssusLEluscYupAN....p+Dstshp..tsuKDGhMhouchTQulhsG.FNcaVlQYuT.uthutshGphhG............stoph.h.stthcssussaRllsaGshslG-sWpluashhYptupDhh.......usps........h-hhoVsVRPMYKWsshhpThhEsGYpsscspssu..t.-csst..YKlTLAQsWpsGsShWu..RPEIRlaATYhchDcc.sphtshs................stscDsEasFGsQhEsWW ....................................................................................FpGYhRSGh.G..h...s.u........s.......G.ut.pps...h....t...s.......t...............Shh.RLGNE.s-..sYhEL....pLs.Qc..la...p...-..s...s...+.o.h.h..hss...ls...hsl........p.t.p.ssWp...........................uss..s...h..+...p.hs...V...p...sKsh.....l...t.a.....h...P.tu.slWAGKRa.hp+...aDlHMlD.ahh.h.s.h...u.Gs....GuG.lENhc.l.G...G..Klul...Ahs.Rp......................st.s........s.....t.spp...............................phsss.shD....l...Rhssh.....t..h..........s.s....u......sL..p.lusc..Y.up..uN...........pp.D...s........p...s........h.s.t...K...D........u..a..hh...os.p..h.s....Q.s...h....h.....p...G..a.NcFsl....Qh..As....s....hh...u...ps..hGp.....tG...........................stst..h.s..t..........hshpssG..p.hhR...llsp...Gt....h..l.u.Dp.aphh.shh...Yp.p......Dh...........s.s.p.u........................h..c.hosslRPhY.tWs.shhpThhElG.Ypp.hcsp..p......s..s.....................scss...........YKhTLAppa....p..s..G..s.S.hhu..RPtIRh.a..AT...Yhchs-p...s..t.t..h..shs......................csss....D.pa..shG......sQhEhWW.......................................................................................................................... 0 19 67 132 +3152 PF03746 LamB_YcsF LamB/YcsF family Bateman A anon COG1540 Family This family includes LamB. The lam locus of Aspergillus nidulans consists of two divergently transcribed genes, lamA and lamB, involved in the utilisation of lactams such as 2-pyrrolidinone. Both genes are under the control of the positive regulatory gene amdR and are subject to carbon and nitrogen metabolite repression [1]. The exact molecular function of the proteins in this family is unknown. 22.00 22.00 22.20 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.48 0.70 -5.27 15 2484 2012-10-03 16:37:10 2003-04-07 12:59:11 11 7 2180 4 565 1717 1361 236.60 45 95.10 CHANGED plDLNuDlGEuFGsaphGs..D-pllplloSANlACGFHAGDPssMccsVcLAtc+GVtIGAHPuYPDLhGFGRRshslospElhshllYQlGALpuhscupGsplpaVKPHGALYNphs+DcslAcAlscAVtshDssLhLhuLu...GSthlctAcchGLpshpEsFADRsYpsDGpLVPRupsuAllcDs-pslpQVLpMl+cGpVpulDGptlslpADolClHGDsPpALthsp+IRpsLcpt .............plDLNuDLGE.....uaG.a...p...hGs.........DpplLslVoSANlACGFHAGD..spsMppTVch.AhcpG.........VuIGAHPuaPDltGFG...RRs.h.s.l.s.sc-lhs.slYQlGALpuhs...+upG..splpHVKPHGALYNtuAcDtplAcA...l.scA.l.t.s..........h.........D.........s....s.......LhLhGLu.......sS...t.l.lctAcp.h..G.L.tshpEsFADRuYpsD...G....o.....LV.sRppsGA.llcDp-pslppslpMlpcGp.Vp.ols.Gp.hl..s.l............pAc....o....lClHGDs.pAlthAcclRpsLtt.p.................... 0 150 310 461 +3153 PF02061 Lambda_CIII Lambda Phage CIII Mian N, Bateman A anon IPR000278 Family The CIII protein from bacteriophage lambda is an inhibitor of the FtsH peptidase [1]. 25.70 25.70 25.80 26.90 25.30 25.60 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.02 0.72 -3.98 3 144 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 136 0 0 39 1 42.50 77 81.18 CHANGED MpaAIAGuAVMGlupLsESLLERITRKLRAGWKRLlDILNQPGVP ....MpaAIAGuAVMGhspLsESLLERITRKLRsGWKRLh-ILNQPGVP..... 0 0 0 0 +3154 PF00052 Laminin_B laminin_B; Laminin B (Domain IV) Sonnhammer ELL anon Swissprot_feature_table Family \N 22.40 22.40 23.00 22.50 22.30 22.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.70 0.71 -4.56 9 950 2010-07-04 11:48:03 2003-04-07 12:59:11 13 274 91 0 537 928 1 133.20 26 8.37 CHANGED YWthPppFLGcplhuYGGpLcaolpast....ss.sp.upsDVll+Gsslplss.hhspstsh..s-shhchhhplhEsh...aphpst..lo+cchhplLuNlstlLIRAoY.upchsthpLssVoL-hA..+.sst......sAs.VE .........................................YWthP.t.paLGs.p.l.s.SYGG.pL.pasl..phps....................ps...p....pt......s.....D..V.lL...............p.G..s...............s..............hp.l...h.h.............p..p.......s.ts.......t...p....h.........p.......ph.....p.l..p.l.p.E.s.....t.h..........h.................p....ts................st....................loRp-hhtlLusLpslhI.....RAo.....Y...sst..........t.p..s..pLssVsL-sA...p.s.ss.s.......A..VE............................... 0 124 168 337 +3155 PF00053 Laminin_EGF Laminin EGF-like (Domains III and V) Sonnhammer ELL anon Swissprot_feature_table Family This family is like Pfam:PF00008 but has 8 conserved cysteines instead of six. 21.00 13.40 21.00 13.40 20.90 13.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.90 0.72 -4.03 72 16492 2012-10-03 09:47:55 2003-04-07 12:59:11 19 919 252 15 9516 15120 36 49.50 30 19.94 CHANGED CsCsspus.t.....spCc.............oGpC..Cp.sstGpcC-pCtsGaas.........sss.sC ..............................................CpC.s.st.u................tpCs.....................................................sG....p..C......h..C.........p.....t.....s..........s.t........G..t.....p.C.....-.....p.....C.t...s..G..aas............................................. 0 2235 2973 5910 +3156 PF00054 Laminin_G_1 laminin_G; Laminin_G; Laminin G domain Sonnhammer ELL, Finn RD anon Swissprot_feature_table Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.66 0.71 -4.14 21 1504 2012-10-02 19:29:29 2003-04-07 12:59:11 18 398 102 34 720 7364 51 130.20 24 13.02 CHANGED F+TpsssGllh..Ys.Gptsct.........DahultLhsG+lphphchGss....tssltsssp.lsDGpWHplplpRsp+pssLpVs.spp........................tspsshusss....Lshss..LalGG.hP......thtttthshssu.hpGCl+plhlssp.lp ...................................................................hpT.sss.Gllh........Yt...uppspt........................Da.h..u.l.t...L..h..........s....G....+.....l...ph..p..........a..c.........h.............G..ss.................ssh.l..t.....s.......s.......s.......h.....l......s......s..G..p..W.....H.....p..l......p......l........p........R.................p........p......p...............s.........s........l....p..V..s..s..tss.............................................................pt.ps..s.h.ssph...........ls.h..p.s.......la.l..G.G...hP....................h..h...t..t...t...h.....t......s....h..........s.........s.......s.....a.p.GClcpl..lstp................................................................................................. 1 123 175 377 +3157 PF00055 Laminin_N laminin_Nterm; Laminin N-terminal (Domain VI) Sonnhammer ELL anon Swissprot_feature_table Family \N 19.50 19.50 20.00 20.00 19.00 18.80 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.71 0.70 -5.37 9 1311 2012-10-03 19:46:52 2003-04-07 12:59:11 12 174 102 7 725 1124 0 213.80 31 15.25 CHANGED CaPAhsNLhhstp..lououTCGh+tPE.aClloclpt.....cKpCalCDuppsh.t....ppHhhphlscsps.tp..sWWQSp...NGl.....VTlpLDLcttFchTalIlpF+.T.RP.ushllERStDhG.sWtsYpYauhs...Ccsoastsspss.+p....s-llCTScYS-IpP.pcGEVhFpsL-..Puh...Ds.SPclQphlphTslRlpLsRL+TLGDsLhs.........c.clpc+YYYAIpDhsVtG ....................................................P.hhshh.st....l....s...s...sTCG............................s............p.......aCh..h...s...t.......................................pC....hCDu.p..p..................................sH.s.p..h.h..D.h.p.....s............saWQS......................p..s...h...................................pVsl......sLs..L..t.p.....tFcls.a.lhlp..Fp....osR....P.sshll-.+...........S.hD...hG....poWp.PaQYauts......Ctptas.......h.sp.s.s..h..p.......p...........p-sl...C......o.....s..p.a...S....c..h...P.h...p...s.G.....p........l....h...........aph.......l....s............t............P..u.h......p.........h.......-.......s.......SstLp...........-a..hpsTslRlph...h..R..h.p..s.......h...s...s.p.h...hs..........................................p...t.s.h.p..pY.aY.ult-h.VtG..................................................................... 0 151 209 445 +3158 PF01299 Lamp Lysosome-associated membrane glycoprotein (Lamp) Finn RD, Bateman A anon Prosite Family \N 31.40 31.40 31.80 31.40 31.00 31.20 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.12 0.70 -5.28 25 515 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 100 0 288 524 0 247.70 23 80.35 CHANGED YslpphohsYNhoDsshFPsuospsstTssstss..lpuslssta+Csusoslphs.sVs.ph.ssplpAahpssshSppcshCppD...pss.Pss..............s.sssPuPo...hP..sPs.lGpYsVssu..NsT.CLLApMGLQLNlTYppt.ssppssplhNIsPNs.TsssGoCssp.pusLcL....sspshphLsFpFshssp...scaaLptVslshsl.....ssuss..hFsssNsoLshhpAslGpSYpCspEQslplosshp.......lNsFslpVQAFplsssp.FusspECphD-s.shLlPIs.....VGuALuGLlllVLlAYlIGR+Ro+u.GYQol ..............................................................................................................................................................................................................................s.s............................s.Ps.......s....pst...hsp..Y..plpss....st..s..Clhhph.ul.plplp...ap....p......s....t...............h...t.th..hs..ls.s...st.....sps..sG..sC...........t.....s........p....p....s.pLpL............s.t...p.h...h......h..L.....sh....tFs..........h.....s..s.....t................ppa...hL..pp.lplshsh..............ssstt....h...h.ps.t......s......p.s.....h.....p.....h....h...p.....ss.l....G....pSYtCst.....p..p..s...lpl......s.ssh.p..................lphhslp.lQ..s....F.....pl...p...ss....p..F.u.s.........s...p...c...C..s....hD......t.........t....s.............l...lP.lh.lGhsL.usllll...l.ll...u..Y....hl....tp......+pt.....Y.................................................................................................... 0 98 117 186 +3159 PF05147 LANC_like Lanthionine synthetase C-like protein Moxon SJ, Finn RD, Fenech M anon Pfam-B_6095 (release 7.7) Family Lanthionines are thioether bridges that are putatively generated by dehydration of Ser and Thr residues followed by addition of cysteine residues within the peptide. This family contains the lanthionine synthetase C-like proteins 1 and 2 which are related to the bacterial lanthionine synthetase components C (LanC). LANCL1 (P40 seven-transmembrane-domain protein) and LANCL2 (testes-specific adriamycin sensitivity protein) are thought to be peptide-modifying enzyme components in eukaryotic cells. Both proteins are produced in large quantities in the brain and testes and may have role in the immune surveillance of these organs [1]. Lanthionines are found in lantibiotics, which are peptide-derived, post-translationally modified antimicrobials produced by several bacterial strains [2]. This region contains seven internal repeats. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.52 0.70 -5.89 47 1539 2009-01-15 18:05:59 2003-04-07 12:59:11 8 21 836 8 490 1232 30 282.40 16 54.42 CHANGED s.hshsLYsGhsGlulhhhphh..chhss.tp...hhph.spphlpphtpthppt............shohasGtuGlhhshthhsphhsp.pth.tphlpphhphh.pphtttt...............DlltGt..uGhlhhllhl.....p.h.tppph.....chlhphspplhpstpp..............p.hh.p......tsph...hGhAHGhuGlhhsLh.phhhp......hhtp...pphhchlcpslphhhphp..pp.ss...a.sthtpp...............stWCaGssGlhhsh.........hphtpsh.pcpphhcthtpshph.shp..ps....hhpshslCHGsuGsh.hhhhhtchhsppp..h.phtpph...hp.lhshtpp.h...........hstpss.spshuLhpGhuGhh..hhlhchhsspps.......sshhl .........................................................................................................................t......h..G..Ghhhhh..h............t...........................h.th....h.t.h.....l.....pt.h.....ht............................thuhh......G..s..G..hh.h.h.hh..........h.h.........p.h......t.........p..........h....phhp....t.h....h....p..h...t...h..t......................................................-h.h.Gh...uGh...hhh...l..lhh............p.h...tptph............ph.lh.p..hh.pt...lh.t.th.t...............................................................t......hGhuHG..h......sGhhhhLh...h........................hht.........thh....ph.....lpthh...t....h...phh.......ttt..........h........t..................................................tWChGssGhhhhh.....................hht......h.....h...t...p................p....h..p..h...p....h.hp...hhp........tt...........hht.slCHG.sG....h..h.h...........h..h..p...........ht.t........h.....hh..t.h.................ht..h..h..t............t............................................................shh..G.sGhh...h.h.t.............................................................................................................. 0 162 275 398 +3160 PF04738 Lant_dehyd_C Lantibiotic dehydratase, C terminus Kerrison ND anon DOMO:DM04916; Family Lantibiotics are ribosomally synthesised antimicrobial agents derived from ribosomally synthesised peptides [1]. They are produced by bacteria of the Firmicutes phylum, and include mutacin, subtilin, and nisin. Lantibiotic peptides contain thioether bridges termed lanthionines that are thought to be generated by dehydration of serine and threonine residues followed by addition of cysteine residues [2]. This family constitutes the C-terminus of the enzyme proposed to catalyse the dehydration step [2],[3]. 19.80 19.80 20.10 19.80 19.70 19.60 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.54 0.70 -6.26 37 446 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 278 0 138 401 8 415.20 17 49.92 CHANGED l....phscLlpplspphsshssptl..cphLppLlppphLlosLcsshssscsLpallstL................tshsttsp.hhptLpplpphltpasp..tshupttphhpplppphpplh........ss.....ps.LtsDhhhssp........lplspsltpphtpthphlhplssths...spt.thppa+ptFhc+Y..G.........sp.Vslh-hlssstGlGhP.sh...........shspttpcp.hthhtphhptslpsp.pclsLs-p..tlppltssss............h.sos-lhhplps...ptlppGphplhlsshh..tuh.susshGRFt...........tppppltpphtphhpsh.........sh.splshhstpt+stNVhps.phh....sphlsl.sptss.sp...ppltlsDlhVs....ssspplaLhshs.p...pphlh.hssshhsht...phPslhRFLh-lu....t...ht......hshtt.hpthsah....PRIpatchlls.t+Wplsst-lss.....thppatthht..pa+pphplPcplalt............psD.................phlhlDlcsshplplLtpplc.+psphpl ...................................................................................................................h..l..t.......t.h...thltpL...lppthlh....p...lp.s........pshphlhphl....................tttt...hhttlt.pltthht.....thtt...tt.stt..thhtpl..pthptlh...............................................................................pt.l.hDhhhsht....................lst.phh.p.p.l.t.t..hth..lh..hs.........t.t..hpthppt.F.h.c.+a..u.................tt.V..slhphhtt...hu..hshs..............................................t...t......h..ht....ph....h..t..s..ht.tt...tp.....l.lspt........lpthh.p.................h..ph-lhhthht................tt.....t.......p.hlhls.hh..hs....h..uthh.uR.Ft.....................................tph..t..t....h...tp..tt.............s..spl...s.h.s......p.ttNl.htp.....h.....th.lsh..tt.ss...t...........ttl.lsDlhls....sp..ttpl.hlhs.p..h....sp..lh.h.....sh..hs............s.hh+.hLhpls..........................................h......h......tths....ah............P....Rlph.t.p.lllp.tpWplstt..tlsh..............p.t.......htth.....h.t...pappph..p..lP....phlhlh..................ttc..............................p.lhlshppshphthlhpthp.p...................................................................................................... 1 60 101 119 +3161 PF04737 Lant_dehyd_N Lantibiotic dehydratase, N terminus Kerrison ND anon DOMO:DM04916; Family Lantibiotics are ribosomally synthesised antimicrobial agents derived from ribosomally synthesised peptides [1]. They are produced by bacteria of the Firmicutes phylum, and include mutacin, subtilin, and nisin. Lantibiotic peptides contain thioether bridges termed lanthionines that are thought to be generated by dehydration of serine and threonine residues followed by addition of cysteine residues [2]. This family constitutes the N-terminus of the enzyme proposed to catalyse the dehydration step [2],[3]. 22.80 22.80 23.20 23.70 22.60 22.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.20 0.72 -3.97 38 378 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 260 0 115 326 3 93.90 23 10.55 CHANGED hp-AlhhuSssLhppltp.......htss....hps+chRphtpulhpYlhRhssRsTPFGhFuulshGpa......sppsss.phsssp..+ppsphDhtaltplspplcp .......................pEulhhuossLhpslpp.................htts......hps+c..h+.p..hppolhcYhhRhssRsTPFGhFuuluhuph............ssps...sh........ph..s..ssh......ppts..ph-ttalhplsphlt.t............................... 0 47 82 100 +3162 PF00500 Late_protein_L1 late_protein_L1; L1 (late) protein Finn RD anon Pfam-B_69 (release 1.0) Domain \N 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.57 0.70 -5.96 19 3438 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 304 77 0 2486 0 179.90 45 98.21 CHANGED lWhPupsKVYLPPs.PVo+VlsTD-YVsRTsIaYHAuSsRLLTVGHPYaslppss.....p+hsVPKVSu.QYRVFRl+LPDPN+FuLPDpslaNP-pERLVWAstGlElGRGQPLGVGloGHPhaNKh-DsENssthtssstp......DsRpsluhDsKQTQLhIlGCsPslGEHWspupsCssstsp..G-.CPPlELhNosIpDGDMsDhGFGAMsFpsLQusKSDVPLDIssolCKYPDYL+Mus-sYGDSMFFahRREQhasRHaasRAG..ssG-slPsshYlcu..........t.....psshuoslYhsTPSGSlVoSDuQLFN+PYWLQRAQGpNNGICWsNQLFVTVVDsTRuTNhoIsssspss.....ssYssocF+cYlRHsEEY-LpFIhQLCKlsLTsEshuYlHsMssoILEsWphGlsPPPsssLEDpYRalpS..AhpC.cssPPpcpcDPYpch+FW-VDLpE+hSh-LDQFPLGRKFLhQsGlpppsphpspphp..sssssssppssKR+R .......................................................................................................................................................................................................................................................................................................................................................................................................................................hDsTRSTNh.olssthpst..........ssYpss...paKcYhRHs.EEa-LpFl.FQLCplsLss-lhshlpsMssslL-sWphGh...s..s..ssssl.DpYRalpS..AhtC................................................................................................................................ 1 0 0 0 +3163 PF00513 Late_protein_L2 late_protein_L2; Late Protein L2 Finn RD anon Pfam-B_39 (release 1.0) Family \N 20.40 20.40 20.50 21.80 20.20 20.00 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.66 0.70 -5.63 29 499 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 258 0 0 495 0 460.80 41 97.13 CHANGED R...s+RpKRASATpLYpTCKtuGTCPPDVIsKVEtoTlAD+ILpaGSlGVFFGGLGIGTGuGoGGRTGYlPLG.....tsshssssssssRPPlllE...........sVuPoDPSIVSLlE-ou.....................hIpuGAshsshssss...GFslToSus...sTPAlLDlo....ssspoh.losTpapNPsFs-Pohlpsst.suEsoG+lllS....ssTluscs............hEEIPhcTFss.ss..ss.sshSSTPlPssppsst.t...LYoRshQQ.VpVsDPsFLopPpc....LlTaDNPAFEs.p..sTLhFpps...hptsPDsDFhDIltLHRPAlTo.RcGtVRhSRLGp+uThpTRSGppIGARVHaYpDlSsIsst.......-tIELpsLsttussss.............psuLaDlYs-.....sss.s..........hhpss.h.....pshsss.ss.ss.sspssNsTlPhspshcssh.sGPDlsh......Psss..sssshhPhsPshPshsIhlpGs....DFYL+PShahh...+RRKRlsh ....................upRpKRASATpLYpT..CKtuG.TCPPDVIsKVEtsTlADpILpaGShGVFFGGLGIG.TGu.G...T.GG.R..T........G...Y...l.PLGs........pssp.s..sss..........s.......s...ss......R.P....P...l..s.l-...................................sVuPs..DP..SIVo..LlE-.ou.......................................hI-uG.A.s.ssshssss.........GFs.lo...o....oss.........sTPAlLDls..........................sss..p..s......h...oso.s....apN...PsF...s...-.P...ol...l...p...sst....su-s.u...G.clhlo...................ssol.uscs.......................aEEIPh....cT.....F..h..s...........ps........ss............sspSST.P...l...Ps.s...c..t.s....s...pht.............LYuR.....s.h.Q.........Q.............VpV........sD.P.sFLop.Psp....llT..a..D..NPAaEs....sTLhFpps....h.p..APDPDF.h..DI..ltL.HRP.A...lTo..RcGs.VRhSRlG.p+.uT.l+TRSGppIGA+VHaYpDlSsIsss................Ep..IELQsLss.....s.ssss.........h.................pssl..aD...lYu.-s....s.......s.p...............t..h......s.ssst......shs......s.sp.p...s.s..ss...T...l.Phs.s..s.h.s...ss.l...h.s..G..PDlsh..........................ssss.....sssshh...Ph.s...P.....h.....s...Pth....s...l..h.lpuu........DFaLHPShhhh...++RKRh................................................................................................................................................... 1 0 0 0 +3164 PF02354 Latrophilin Latrophilin Cytoplasmic C-terminal region Bashton M, Bateman A anon Pfam-B_874 (release 5.2) Family This family consists of the cytoplasmic C-terminal region in latrophilin. Latrophilin is a synaptic Ca2+ independent alpha- latrotoxin (LTX) receptor and is a novel member of the secretin family of G-protein coupled receptors that are involved in secretion [1]. Latrophilin mRNA is present only in neuronal tissue [1]. Lactrophillin interacts with G-alpha O [1]. 19.30 19.30 69.20 20.30 18.70 18.70 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.07 0.70 -5.74 3 436 2009-01-15 18:05:59 2003-04-07 12:59:11 11 23 39 0 132 329 0 235.30 47 23.71 CHANGED RHhHCCShhPpG..RSol.ESopcoGupSGSGsPhpauh.oQSRIRRMWNDTVRKQSESSFIoGDINSTuTLNRGsMGNHLLTNPLLRPHGTNNPYNTLLAEoVVCNsPSsPVFNSPGuhhp..HsL.NuRDTSuMDTLPLNGNFNNSYSLRSGDY..uDGVQllDRGhNLND.TAFEKMIISELVHN......NLRGtSucs+ss.hchPVsPVsG..uSEDDAIVsDuAo...HtDssGLELlHKELEAPLLPPRsaSl....LYQss.....p....DcSESasupLTAEucEcLQSPsRDSLYTSMPNLRDSPoYP-SSP-s.sEsLsPPPPAcsGssE.IYYKSM.PNLGARN...QLQsYYQluRGSSDGYIlPPNKEGssPEG..D...GQMQLVTSL ..................................................+oaCC.....t...+uo..tSsht.ostp...sot...................................oQSRIRRMWNDTVRKQoESSF.hsGDIN...........SosoLNp.........t..............................................................................ss..R-sssMDTL...PLNGNas.NSYSlttspa........tss...s..t..s......h....s.ht-.sshEKhIlpELspN.......N.+........t.t.......................................................h.....s.......................st...cpp..............s..hh.st.........................tt........tlEhh.....h.....p.t..ptPLl....R.....s..s.....................................................ttsto.hs..s..t...............t........t...............................ss....tRDSLYsShs.Lts.s.........................t.........-........................p..Yh.uh.PtLsst.........hYph......t.pG.......-.....s-t.........t...phlTSL.......................... 1 4 16 60 +3165 PF01273 LBP_BPI_CETP Lipid_binding_gp; LBP / BPI / CETP family, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family The N and C terminal domains of the LBP/BPI/CETP family are structurally similar. 28.30 28.30 28.80 28.30 28.10 28.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.85 0.71 -4.90 36 774 2009-01-15 18:05:59 2003-04-07 12:59:11 20 9 110 3 412 720 0 166.10 17 38.69 CHANGED uLphssp.uh..h.t.Lpplsls-hhssht.t..u....................th.hshhslcIpshplsp.hplchhssssh.hhsth......ssslplps..ph.hh......t.shlcl.........shslslpsplpltp-sp.GcsplshusCssshsslplpl.ss.....hshlhshhpshlppsltpllpsp....lCsll.....psh...Lpsl .................................................................l..h..t.sh..htpt.l.p.p.hplsshhsp..s....h...........................ph.hslpsl..plpphplsp..splph.hsstth.hh.h...............ssslslps....phtht.............t.thlch.................hhslslssplpltpssp.Gpspls.h.s...sCsspls.......p...lplphtss.....hshlhph.h..p.s....h...lpp...sl....pp....hlpsp........lC.hl........sth...h................................................. 0 64 88 182 +3166 PF02886 LBP_BPI_CETP_C Lipid_binding_gp; LBP / BPI / CETP family, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C terminal domains of the LBP/BPI/CETP family are structurally similar. 20.60 20.60 20.60 20.60 20.20 20.50 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.42 0.70 -5.14 10 709 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 111 3 395 669 0 211.60 20 46.01 CHANGED ssLDlhhK......G....EFFshsc+sPsshsssshslP..pscsRMVYhulSDahFNoAshsYacuGaLpholssDhlPs-sslR...Lso.....psFushlPpLActaPshslcLpssssssPhlshpss.slslpsshslpsallh..Psu.s+pslaplshssssosslohpsc+lsGpLchc+lplcLpcSclG.hssEhlpul.LNhhlsshlhPtlN-+Lt+GFPLPLP.ctlpL..aclt...LpsHcsFLhlGADlpa ................................................................................................................................tpppMlhhhlS-ahhs...ohhhhh.p..s..Ghlphsl...s.s.p..ls..p............hh..........so.............................pshu.hlsp.l....s.p..t.a.P.s....t.s.h..lpl...p.s..s.p.s......P.h..ls.......hpss...sss..l..p..ht..ss.l.p.hhsh.......s......ss...s............tsl...............hplshs..s.s......h..ss.p....h.s.h..p....s.....p.......+....L..h.s...p....l.p.l..p.p..h.p..l...p.......h..tp.S..........pl.s...h..s....h.t....l.p.sh...lp.hlphsh...hP..hlN..t..h..L.....t.....tGhs.lPh.....ts....lph....hs......l...hhpshlhlt.................................................. 0 92 125 231 +3167 PF03815 LCCL LCCL domain TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Domain \N 22.60 22.60 22.60 23.40 22.40 22.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.47 0.72 -4.00 68 852 2012-10-01 19:31:27 2003-04-07 12:59:11 14 48 198 1 528 762 11 100.40 30 19.17 CHANGED lsCpsphtpht..........h....hssphhhpCPssC..tpst...splhGot......................lYsssSSICpAAlHuGll.................sspGGplslphhsshppYhuo..p....pNGlpS...tsh..t....tpuF ........................................................pC.sph.p.t.................ssp.hthpCPusC..tpsp...............splh..Got..................................................hYp.s..sSolCpAAlHuGVl.................sspG.G.tlsVt.h..h...s.s..p..p.p..Yh.uS......h....pNGlpS....sh............................... 0 140 228 365 +3168 PF04072 LCM Leucine carboxyl methyltransferase Wood V, Finn RD anon Pfam-B_5898 (release 7.3); Family Family of leucine carboxyl methyltransferases EC:2.1.1.- . This family may need divides a the full alignment contains a significantly shorter mouse sequence. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.09 0.71 -4.72 211 3171 2012-10-10 17:06:42 2003-04-07 12:59:11 9 48 1290 36 972 2545 132 173.20 26 52.57 CHANGED AlhsshsRAhcspt.s...ssllpDshAttllpsh.......................phshtt..............h.thttt...hs.sRs+ahDchhtpth..tsG...htQlVlLuuGLDoRuaRls..t.........ssplaElDhPpllphKpphLspt.sp.....................s...hh.h..lssDlp..psWhssLttt..Gap.ssp..........P...shalsEG.llhYLst-shppLlppls .......................................................................................hhshhtRAhtspt.s.............psllp..DshAt.hlpth............................................................th..tt....................ht.t.h.t.tt......hs..sR...sphhDp....hhtphh..........tss.....................htQlVhLuuGLDo......R......sa.RL.s.............................tsp.ha.E.....lDh..P.p.ll.phKpphltp...t.tt.p.........................................................................................t..thc..h.........lssDl....p...t....p......s....W.....s..s....L.ptt......shc..sst.............................P......shhluEGl.lh.YLstp.t.ppLlptl.......................................................................... 0 293 586 821 +3169 PF04792 LcrV V antigen (LcrV) protein Waterfield DI, Finn RD anon Pfam-B_6155 (release 7.5) Family Yersinia pestis, the aetiologic agent of plague, secretes a set of environmentally regulated, plasmid pCD1-encoded virulence proteins termed Yops and V antigen (LcrV) by a type III secretion mechanism. LcrV is a multifunctional protein that has been shown to act at the level of secretion control by binding the Ysc inner-gate protein LcrG and to modulate the host immune response by altering cytokine production. LcrV is also necessary for full induction of low-calcium response (LCR) stimulon virulence gene transcription. Family members are not confined to Yersinia pestis [1,2]. 25.00 25.00 61.00 41.50 19.40 19.80 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.84 0.70 -5.40 2 127 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 82 1 10 81 0 260.30 49 80.26 CHANGED .lRshpts.phFl--L....ltt.su.uSu..EELltLl+scpIsluhthpPhp-upV....................L.t.AhLhuup.ss..tpGlchl+EhLpu..pPssQW-LRtFhsshaFSLpu.RlD-DlltVhhDshpppsstRptLh-ELttLTAELKlYSVIQupINttLSuttsIpIcstuIsLhD.sLYGYs.s-.haKsSsEYtlLpph........-T.pthlSIKsFLpup.KpoGtLtsLpDpYsapKDNN.lupFATTsSD+SRPLNDhVspKTT.LsDhoSRaNSAlEALNRFIQKYDSVhpclLssh ................................................................................................l.shpts.thFlt.L....ltt.su.tss..pELltLl+scpIslu.ht.pPhp-.ups........................L.t.Ahl.u....up.ss..ttG....lthl+phLps.....pPs.spWpl..RtFhslhh.SLpuDRlD-Dlltshs-shsp+sstRppL+-ELt-LTAELKIYSVIQucINptLSs...sus...hphcspuhN....LhDhsLYGYs.s-thF..Ks.....SsEYKlLpch.............t.........spppl.lolKsFLtupsKpo...Gsluslcs.pYpapKDN.NcLupFuTosSD+SRPLND.VspKTTpLsDloSRaNSAlEALNRFIQKYDSlhpclLssh............................. 0 1 3 4 +3170 PF00056 Ldh_1_N ldh; lactate/malate dehydrogenase, NAD binding domain Bateman A, Eddy SR, Griffiths-Jones SR anon Overington enriched Family L-lactate dehydrogenases are metabolic enzymes which catalyse the conversion of L-lactate to pyruvate, the last step in anaerobic glycolysis. L-2-hydroxyisocaproate dehydrogenases are also members of the family. Malate dehydrogenases catalyse the interconversion of malate to oxaloacetate. The enzyme participates in the citric acid cycle. L-lactate dehydrogenase is also found as a lens crystallin in bird and crocodile eyes. N-terminus (this family) is a Rossmann NAD-binding fold. C-terminus is an unusual alpha+beta fold. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.60 0.71 -4.36 31 12983 2012-10-10 17:06:42 2003-04-07 12:59:11 18 15 8382 421 2210 8108 2565 125.70 35 47.64 CHANGED hKVullGA.GtVGpuhAhtlhtp.slucE..........LsLlDlspp..tscGsAhDLpHusshs.ssshlss...sshsshcsuDlVllsAGsspKPG.o.....RhDLlptNspIh+slssslscsu..ssuhllVloNPVD....IhshlhhKhushspp+lhG .............................................pl.lhGs.G.lG...........sh.............h......h..h.t........s.t...................l.l....h-.....h.............hh.G.s....h.-.....ltc.....h...........................h..................h.................p..............s........h...................s...........s..........t..........s......sh......ssh.c.sADl.Vl...l...o..A.....G....s....s......R....K......P......G..M..s....................Rt.........D.L..h..s....h....NstI...h....Kslsppls.p.s...s........ssu...h.ll.l.l.oN.P...Vs....lhs.l.h.p.Kh.u...s..hspp+lhG................................................................ 0 689 1286 1790 +3171 PF02615 Ldh_2 ldh_2; Malate/L-lactate dehydrogenase Bashton M, Bateman A anon COG2055 Family This family consists of bacterial and archaeal Malate/L-lactate dehydrogenase. L-lactate dehydrogenase, EC:1.1.1.27, catalyses the reaction (S)-lactate + NAD(+) <=> pyruvate + NADH. Malate dehydrogenase, EC:1.1.1.37 and EC:1.1.1.82, catalyses the reactions: (S)-malate + NAD(+) <=> oxaloacetate + NADH, and (S)-malate + NADP(+) <=> oxaloacetate + NADPH respectively. 21.50 21.50 21.90 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.82 0.70 -5.72 8 2631 2009-09-11 11:09:40 2003-04-07 12:59:11 9 4 1423 44 609 1711 3069 322.80 31 94.97 CHANGED M+lshEpL+phIpcVLpphGlsEEcAchVADshlcADL+GhpSHGlsRhPpYlppLppGsIsscsch+hlcpususullDGDcuhGpVsA++uM-hAI-pA+ppGVGlVul+NuNHaGtuGYYuppAAcpGhIGIshTNoc..PhVsPaGG+EshlGTNPlAlAhPupc..hhhlDMATSshAaGKlhsARc+GcpIP-ssulDc-GssTTDPshlhcs.t..lLPhG.......GaKGYGLSlhlDlLuGlLuGushuspVopssss.--tsspsplaIAIsP-hFhss-pFcp+lsphh-ElKuSp.usthpplhlPG-hpshphccpp+pGIslDcslappLpsl ..................................lshppL+p.hhpplLpp.tGhspppApt..lA-hLltAshpGh..pSH.Gl...s...Rl.st..Yl.pplppGt.lp..s.s.p...s.plh.p.p.s.su..h..shlDucpuhGplsuctuMstAI-....h....A.cppG.luhVul+...s.u.s.HhGthuhaspp.sAc.tG.hl.........u..l.shs....s..os....sh...lsP.aGGp-shhGTNPlshu..hP.........s................p......s.........t.....shllDhATSshAhGKlpssp..ppG..c..p.lPs.shulD.p.p.G..p.s.......T...sDP....t.....s....h..................p..s..h.............h...L.P.h.G.........................taK.G.GLuh....hl-lLuGsL.s.Gu.s..h...s..t.......p.......l........s.....p...........h...s.....s.......t.ps...t..h.....s..phhI.s.IcPshF....s..s.....t..t..h.p..tc..hp.p.hhchlpsst....ts..s..p.tlhhPG.c.p.htt.t.p.c.s.t.p.p.G.I.slssshathl...t...................................................... 0 183 341 493 +3172 PF02866 Ldh_1_C ldh_C; lactate/malate dehydrogenase, alpha/beta C-terminal domain Bateman A, Eddy SR, Griffiths-Jones SR anon Overington enriched Domain L-lactate dehydrogenases are metabolic enzymes which catalyse the conversion of L-lactate to pyruvate, the last step in anaerobic glycolysis. L-2-hydroxyisocaproate dehydrogenases are also members of the family. Malate dehydrogenases catalyse the interconversion of malate to oxaloacetate. The enzyme participates in the citric acid cycle. L-lactate dehydrogenase is also found as a lens crystallin in bird and crocodile eyes. 22.60 22.60 22.70 22.70 22.40 22.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -4.53 19 12166 2012-10-02 13:39:36 2003-04-07 12:59:11 13 14 7571 421 2231 7218 2555 135.60 32 49.53 CHANGED TpLDpsRupsh...........lAc...ttsl.sPpsl...pVhllGsHus..........lshlPl.p.sp..sphs.cpplpplhpplppuGscllctKt..GSsshShAhAhs+....hscullpsh......pshh.huVhpsu..husps....................haauhPlhlstsG.....lpcl.l-.hs.Ls-hEccthp.putspLcpplcpuhphh ................................TsLDssRhcsh.................lA-...............hhsh..ss.ps.l............cs.llGsHu...........s...................................V........s.......h........l.......P.......L....h....s........l....p...............................t.........s.....t.....t.....h.....t...tl....h.t.....p..h..tp..t.u...hpl....lph.+.................sus........hu..h..u.uhhp.................hsp...sh..h.ts.................................pt........hus.h....p.......u........ht.......t...........................hhhu.Pshls...p.G.......h.ph....hp.......h.....l..s.t..p..tt..ht...t.h...t.l.t.........h....................................................... 0 697 1291 1800 +3173 PF00058 Ldl_recept_b ldl_recept_b; Low-density lipoprotein receptor repeat class B Bateman A, Sonnhammer ELL anon Swiss-Prot Repeat This domain is also known as the YWTD motif after the most conserved region of the repeat. The YWTD repeat is found in multiple tandem repeats and has been predicted to form a beta-propeller structure [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.77 0.72 -3.63 28 8618 2012-10-05 17:30:42 2003-04-07 12:59:11 12 734 140 112 4691 7279 7 41.70 31 14.93 CHANGED splYWoDhs.p.....sluhsshsGss..pcsllspc........lppPpuIslDh .............tplYWo..D.hs.pp.............pIc.pu...s.h...c....Gsp........Rps.l..l..sss............lsp.Pp..ulslD.................... 0 963 1312 2717 +3174 PF03760 LEA_1 LEA-group1; Late embryogenesis abundant (LEA) group 1 Finn RD anon Pfam-B_1549 (release 7.0) Family Family members are conserved along the entire coding region, especially within the hydrophobic internal 20 amino acid motif, which may be repeated. 25.00 25.00 26.10 26.10 22.70 21.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.55 0.72 -3.83 8 153 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 36 0 41 142 0 71.70 43 51.71 CHANGED MQSuKEKhoNhASoAKEph-lsKAKs-EKAEKuTARoctE+clAHp+cKAKEApAcM-hHpAKAcHAA-K.ps .........hpohKEpspNhuuuAKtth-hsKAplpEKAEKhTARs.hcKclAcc++cAKhspAch-h+pA+tcpsAtK.t.t........ 0 3 22 34 +3175 PF03168 LEA_2 Late embryogenesis abundant protein Mifsud W, Griffiths-Jones SR anon Pfam-B_3080 (release 6.5) Family Different types of LEA proteins are expressed at different stages of late embryogenesis in higher plant seed embryos and under conditions of dehydration stress. The function of these proteins is unknown. This family represents a group of LEA proteins that appear to be distinct from those in Pfam:PF02987. The family DUF1511, Pfam:PF07427, has now been merged into this family. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.19 0.72 -3.63 144 1601 2009-09-13 08:28:33 2003-04-07 12:59:11 8 14 422 3 862 1521 41 100.30 16 47.34 CHANGED plplpN.PNs.hs.lsh.ssls...hslt..h.....su....ptlu..t.us.spshplsspups....hlsl..s.....lplshts.h.........hphhtshh...........hstplshplcuplp.h.h.h.hthsl.shspps ....................lplpN..PNp..hs.lhh..sshs......hplh..h.........ps............ptlu.........ssthssht.h.stp.s.ss..............slsl......s..............lsls..sht.h..................spth..t..s..hh....tt..................hshplphplcs....chp......hh..h..hphph..ht.................................................. 0 152 498 703 +3176 PF03242 LEA_3 Late embryogenesis abundant protein Bateman A anon Pfam-B_3170 (release 6.5) Family Members of this family are similar to late embryogenesis abundant proteins.\ Members of the family have been isolated in a number of different screens. However, the molecular function of these proteins remains obscure. 21.00 21.00 21.40 21.00 20.40 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.54 0.72 -3.74 13 167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 52 0 57 159 0 86.70 32 82.53 CHANGED MARSlo.suKhlSAhlsDslSsh.hpRRGYAAAust..hsu.....hRuGustsss.hts+suEs.....sstssuWsPDPVTGYYRPEspusEIDsAELRphLLs ...................................................................................t.h................ppRuYuA....uAst...httu.........ts.us.s.tphs.......tt..scssts..............spcpssWhPDPsTGaYtPEs.phsE.lD.sAELRttlLp......... 0 7 30 43 +3177 PF00059 Lectin_C lectin_c; Lectin C-type domain Sonnhammer ELL, Griffiths-Jones SR, Eberhardt R anon Swissprot_feature_table Domain This family includes both long and short form C-type 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.85 0.72 -3.39 95 12227 2012-10-02 16:37:33 2003-04-07 12:59:11 16 665 521 504 7014 11645 360 107.50 20 30.77 CHANGED pthsapcApthCp.p...hsupLsshps....tpchchlpphh...........sspphWlGl.......ttpptsapW.sssshp...........................spsspsppCshl............sssphss......psCsp.tphhlCcp .......................................................................................t..hsappA..p.p.hCp..p............ts....u..p.L.s.s..lps.....................pp.E..p..p...a....l.tp.hht................................tppthW.....l.GL..................pptppps...p......a.p.W.s..D.u...s..s.h..s............................................h.tW..........................sst.ssspsCshh.................................ttsspW.ps..............................ts..C......p....p....p......h.........a..lCp........................................................................................................... 0 2294 3027 5110 +3178 PF03041 Baculo_LEF-2 lef-2; lef-2 Bateman A anon Pfam-B_1773 (release 6.4) Family The lef-2 gene (for late expression factor 2) from baculovirus is required for expression of late genes. This gene has been shown to be specifically required for expression from the vp39 and polh promoters [1]. LEF-1 is a DNA primase and there is some evidence to suggest that LEF-2 may bind to both DNA and LEF-1 [3]. 25.00 25.00 60.10 58.10 20.00 19.50 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.22 0.71 -4.57 11 69 2012-10-01 20:20:07 2003-04-07 12:59:11 9 1 62 0 0 68 0 164.60 35 79.78 CHANGED ph.lhapP...tpsIDcstpYlVchpsas..lslosYTsFppsGLhVhlsGhpLhpLlcsssstppssssp.........tpp+sp+NVCFpssss.s+psllshLpsplp...hPsChpphhpclpsp..PRssRaRKRFlFNsYlsNsloCs+Csp.pClhcAhphhYptDpKCVpElpp .........h..hWsP..ht..t..sslDK.sts.YhlchpDa...lsloPYTsFhpsG.hhlclsGhR.Lhh...Llpssss.ppp.psst..................hhc+Sp+NVCFpsstp.t+psllchlppplp....hPsC....hpph.hssLp..hp..PRGsRa+KRFlFNsYIuNlloCsKCcp.pClhcAltthYppDsKCVpElp... 0 0 0 0 +3179 PF03388 Lectin_leg-like Legume-like lectin family Mifsud W anon Pfam-B_2789 (release 6.6) Family Lectins are structurally diverse proteins that bind to specific carbohydrates. This family includes the VIP36 Swiss:P49256 and ERGIC-53 Swiss:P49257 lectins. These two proteins were the first recognised members of a family of animal lectins similar (19-24%) to the leguminous plant lectins [1]. The alignment for this family aligns residues lying towards the N-terminus, where the similarity of VIP36 and ERGIC-53 is greatest. However, while Fiedler and Simons [1] identified these proteins as a new family of animal lectins, our alignment also includes yeast sequences. ERGIC-53 is a 53kD protein, localised to the intermediate region between the endoplasmic reticulum and the Golgi apparatus (ER-Golgi-Intermediate Compartment, ERGIC). It was identified as a calcium-dependent, mannose-specific lectin [2]. Its dysfunction has been associated with combined factors V and VIII deficiency OMIM:227300 OMIM:601567, suggesting an important and substrate-specific role for ERGIC-53 in the glycoprotein- secreting pathway [2,3]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.52 0.70 -5.24 8 783 2012-10-02 19:29:29 2003-04-07 12:59:11 8 10 280 35 470 811 29 200.80 27 52.53 CHANGED cpacpcaSLhtPahspupsslP.hWcatGsshlo.SstIRLTsc...pSppGulWs+psshh.csWElclsF+VsGptt.pltuDGlAlWYTp-psh..sGPVFGusDpasGLuIhlDoasNcsss.p+spPalsuhlNDGohpYDHscDGspspLAu....Cpt.cFRNp-asoplpl+YspshLolhh-.s.lcsps-achChplssVpLPsGhY.FGlSAsTGsLuDsHDlhShhhhplps ........................................h......hoh..P...h....s.......s.....tt.h....s.hWph.tG..s..s...h...h........p.....s....p......h...l.RLTssh.........ps.p.p.G........s.l.Ws..c.sh.................h...p......sWc...l...............clp..F+l.........p.........Gp............t....ph...t..u...D..G.hAlW...h.sp......ppht.............G......s......l....a...G.s...s.....pa.pG.....l....ulhh......Do....a...Np............t....................................t.....hP..h...lps....hhN....s......G..s..h...p..Y-p...ppD...G....p..t..p...t....l..uu.......Cph.........t....h.....R........s...t...s.....h...s.........o....phpl..pY.h..p..t.....h...p..l.................h.....s.............h....p......sp...tp......a.p..........Chp.s....p.....s.....l...t...l.....P......p.....s......h......a.....hGhS..At.TG.......s.L......u.D........sHDllphhhht............................................................................................................. 0 158 234 362 +3180 PF00139 Lectin_legB lectin_legB; Legume lectin domain Sonnhammer ELL, Bateman A anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.90 0.70 -5.00 47 1651 2012-10-02 19:29:29 2003-04-07 12:59:11 14 73 320 724 782 1765 72 204.20 27 39.67 CHANGED ssspFsa........ssF......st......ssLhLpGsApl........ssGhLp.LT.......sssp.......pshG+AhYs.pPlpl...hssssup....h.......sSF.sToFsFsI..h....ssssucGlsFhluPss....ssu..sutaLGLhNtpsss......sspllAVEFDTht...php-hDs....NHlGIDlNSlpSsto...sh................tshsLsu...GpshplaIsYcussppLsVslu.s..p.............cPpp.sllStsl.DLss.lls..-......psaVGFSAuT.G........hsptHhlLuWSFs ..................................................................................t...a.h........t.a......tt.........ts.l....h...Gs..A..h.........ss..s.h.lp..LT.....pst.....................tthG+shas.tP.lph............hp.........s..s..t..........h......................tuF.so...pFsFs.l....................................t........s............suc...........G....huFhls..sst.......sh.sts......suth...L....G...l..hst......p....stt.....................psphlAV...EF...DT.hh................t.....t..c.hss...............sH.l.G..l...D..l.......N......u....l..h.S....h..t..s...........................................hsh.s.lt.s......up.......hpshlsYsu.s.s.....p.......hLsVsls.....t...........................pspp.....h..lo.h....l.....sL...ps...h.l.s....-.............shlGF....SuuT..G.......thh.pt..p.lhuWsF............................................. 1 114 409 591 +3181 PF03954 Lectin_N lectin_N; Hepatic lectin, N-terminal domain Finn RD anon DOMO_DM01961 Family \N 25.80 25.80 26.50 25.90 25.20 25.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.54 0.71 -4.77 5 128 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 27 0 61 131 1 121.20 43 48.84 CHANGED pchpHLpNs.p-c+shcKGPPPoQPLLQRLCSssRLLLLSLuLSILLLVVVCVIGSQN.....SQLQEELRoLRETFSNFTSSTcsEVpALST........................pGGSVGcKlTSLESpLEKQQQDLKsDHSoLLhHVKQFssDLRoLoCQMAsL ......................................s...ptp....tG.....sPs.ps.h.hpR.LpShspL.LLuLuhslLLLV.llCVlGSQs............upLpc-LtsL+psFSNFous...ThsE.lpuLso........................pG.uu..lscKlpSLEupLEKppQ-Lps......cp..SplhhHlpphs.DL+.LsCQhs.h............................ 0 4 4 10 +3182 PF05098 LEF-4 Late expression factor 4 (LEF-4) Moxon SJ anon Pfam-B_6330 (release 7.7) Family Late expression factor 4 (LEF-4) is one of the Baculovirus late expression factor proteins. LEF-4 carries out all the enzymatic functions related to mRNA capping [1]. 20.20 20.20 20.60 22.10 17.80 20.00 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.78 0.70 -5.73 25 64 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 60 0 0 63 0 434.20 40 97.95 CHANGED EpEISYTINaSQDLLYlIhcoYIsK+h.pht-cYsDllDpNsVRTRlhs................sphsSV+Kpslshp+hVah..sssslVPhVsRcshEpsssssspp......l+RllcspVY+...tpsslEIKFEHlYappNhsDphDuLhAsKQIsLaNLLpspsp..slspNSHLGSDEILAslRLEhEYc..sssstssLpphscllsch-.slstppNIoPhLPYTTLhNpIhYRKFpcE+.hl.....hs.t......t.ssssVh+WAlKLDGlRG+Gahs+s.hhll.hDDMQhFSGpl.......................................ss.FslNNlVuFQCEll-..pslYlTDlLpVFKYpYNNRsQYEsSh.psYsl-shsAlpslNhh........sssssphslssh.tst..hpl+FQpFacPPlphst....YsolPsDGFVVLDsphpYVKYKphKTlElEY-spsstFpslpGslpspshhss......Lp+ssIYEsllsc...ssIpVlKpRPDRLVPN ....................................................EpEISYoINhSQDL...LYhIhcoYIs.cph..phtpcYsDlhDpNslRTRl.s................sph.sSVpKpshphc+hVah..spsslVPhVpRcshEpshspsshp......l++llcspVY+......psplEIKFEHlYhppshh..Dp..hDuLhAsKQls....LhNLLpsssp.....sl.hpNSpLGSDEILAslRLEhEY-t...sssst.....shLpt..........hs....plltph-.slsptp.NIs..PhlsaTTl.NpIhYRKFtcEphl..hs.t...........t.sssslh+WAlKLDGlRG+Ghhsps....h.............h.............ll.hDDMQhFuupl.......................................ss..F.slNNl.VuFQCEllst.pphYlTDlLpVFKYpYNNRTQYEsSl.ssYslsshsAlpslNhh.........ppss.pplslpsh....s.thpl+FQpFacsPlp.st...........YsolPsDGaVVLssphpYVKYKhhKThElEYsstsshFpslsGslpshplhss......Lp+ssIYEsllsc...ssl.pVlKpRsDRlVPN.... 0 0 0 0 +3183 PF04941 LEF-8 Late expression factor 8 (LEF-8) Moxon SJ anon Pfam-B_5130 (release 7.6) Family Late expression factor 8 (LEF-8) is one of the primary components of RNA polymerase produced by polyhedrosis viruses. LEF-8 shows homology to the second largest subunit of prokaryotic DNA-directed RNA polymerase[1]. 17.60 17.60 57.60 20.40 15.10 14.80 hmmbuild -o /dev/null HMM SEED 748 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.32 0.70 -6.86 7 329 2012-10-01 19:23:01 2003-04-07 12:59:11 7 2 143 0 0 243 2 320.40 47 92.17 CHANGED VlpDFscLYcplcsp.asLcahLsCss.puss.sol+hLQERKSYFCCAlc.sht+CVLHKCVlVVFGThLDtpFRss-......................sspsslpGTFMlDGRaLSFPNIMMNNNlLlHNFYDKLYu..KsCKRMFLYGNlD-EKpINRAIQLVYDctpDlLFARDVYApDYVVT--LNplLEhYLpsSGKWcPLsFLFcasptps.pLV-pIKhIMpt-INYSIDSLuNKIIYKHsYLlpLlY.cslLptYpt.htps.s...............s.sstsK++Ks.QolhasKEsKKIVDoIVNG+LIYsVSKTFSKQKKsF.N.QDNSSNNNIEIs.PsLKYRlGNEVlRITNDoMRQDMLKQchDFVKFlDSFFHGEMTVAGKKFFLCRsVRLPsVDYphVAc+FppLlppsLlhhss...........cp..........s-stDs.LLIAFNsRPTshpCcRsclspIhYthKRNhsPlElKlsssILFVNHHEGMlCIKKpV+lss....lpIssLLTPYEYHNppSllpshs...spl.EpDcVssLMSKLlQYYYpsahplFsTlPVPKLIVSLTNLKNAMPVhpYss.......t..lssLPlGNSVsVuPclhhNNKMFpLWTLVRDs+LMTAEDPYIPchsLPI+LYNNKlNKLKGKLshupp.psPhlKFh.pS.spsNhVslpsGpVLhhsGVlVSNsKIsWsaDGKRYKIETCpNKsaaVYKIYlYaRplcsQ+lE+lcuphsstsDsValKlslVTSTssLcGlKICGIHGQKGVhNsuEDLTEWMAEDG .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................VAGKKFFLCRsspLPsVDYphVApKFp.LlppsLlhhss...........s......................................p.sp..cs.tllIAFNsRPThhpCp+.ssl.hIhYphKRNhsPlELKl..sspILFVNHHEGMlCIK+plpIps.....pspIssLLTPYEYHNppSllps.ssh....spl.E...p....D...cVpsLMSKLlQYYY+sahplFsThPVPKLIVSLTNLKNAMPVhpYpp..........p.hlssLPsGpSVsV.s.spIhhNsKMF+LWTLVRDs+LhT.............................................................................................................................................................................................. 2 0 0 0 +3184 PF05094 LEF-9 Late expression factor 9 (LEF-9) Moxon SJ anon Pfam-B_6326 (release 7.7) Family Late expression factor 9 (LEF-9) is one of the primary components of RNA polymerase produced by baculoviruses. LEF-9 is homologous to the largest beta-subunit of prokaryotic DNA-directed RNA polymerase [1]. 19.50 19.50 22.60 22.50 17.20 15.60 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.64 0.70 -6.11 7 276 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 131 0 0 138 0 180.10 76 98.01 CHANGED hlpFhsKsPocF-LLhDPscl.ssshFhshccF+hFLKslIsDLK+..KhNaFNSLl-QLIsVYp-ss.t+NEHTchLuRIlhATslVVT-LPSNVFLKKLKhNKFTDsIsYLILPNFILWDHNFllFLNKsFNSKH-suLVDISGslQKIKLTHGVIKDQlQsKNGYAGQaLYSTFLNTASFYANVQChNGsNEIlPP+sSlpRYYGRDVsNlRAWTTRHPNISQLSTQlScVhts....-s.sDWNVKVGLGhFsGANpDCDGDKcVITaLPpPNSLIDLECLLYGDPRasFICFDKNRLuFVSQQIYYLaKNlc+lEpLhcohPllhsLWppa+...stpFupRLEhLLRDssLlhSSNsSaLLappLsplIcsEEMVCuDcElhsLsGpFsslIcSGAKGStsLlcSTcpY+pTcssDlDTVupRAlTuLNSaIoSHNRVKlsGGDIYHNTsVLQNlYLKsshICYKsDshsluslCsLPSEFLFPEHLLDhF ....................................................................................................................................................................................................QaLYSTFLNTASFYANVQCLNGsNEIlPP+uSl+RYYGR.DVs...N..VRAWTTRHPNISQLSTQlScV+ps.......-s..TDWNVKVGLGhFsGANT....................................................................................................................................................................................................................................................................... 1 0 0 0 +3185 PF05150 Legionella_OMP Legionella pneumophila major outer membrane protein precursor Moxon SJ anon Pfam-B_6492 (release 7.7) Family This family consists of major outer membrane protein precursors from Legionella pneumophila. 20.90 20.90 21.20 22.60 19.80 19.70 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.87 0.70 -5.30 2 182 2012-10-03 17:14:37 2003-04-07 12:59:11 7 2 18 0 16 92 0 199.50 58 94.10 CHANGED MhsLKKTssAVhALGSSAlFAGTMGPVCTPGNVTVPCERTAWDIGITALYLQPhYDADWGYNGFTpVGGWRpWHDVDhEWDWGFKLEGSYHFNTGNDINVNWYHhDssoDHWA.hsphHsYs.pWDAVNAELGQFVDFSANKKMRFHGGVQYAhIcsDVNRahNsFhhssFNSKFNGFGPRTGLDMNYVFGNGFGlYAKuAsAILVGTScFhDs...hsF.hGSKNAIVPElEhKLGADYTYAMAQGDlTLDVGYMWFNYFNAhHNTush.....suhETDFuASGPYIGLKYVGNV ..................................................................................................................................................YHa...D...sD.....o....D........+W.....s..........s...hu..............s.........h.............H...........s...........Y..s....N....+WDAVNAELG.QFVDFSANKKMRFHGGVQYARIEA..D......V.....N.....RY.........F.........N...N.....F......A....F.N.....G....F......N..S.....KFNGFGPRTGLDMNYVF.GN...GFGVYAKGA...AAILVGTSD.....F...Y.........D..G......................I........s......F......I......sGS..K..N....A..IVPELEAKLGADYTYA.M......A.QG.DL.T..LDVGYMWFNY...FNAM...H....N.T...uVh.......................................................................................................... 2 10 10 15 +3186 PF03020 LEM LEM domain Bateman A anon [1] Domain The LEM domain is 50 residues long and is composed of two parallel alpha helices. This domain is found in inner nuclear membrane proteins. It is called the LEM domain after LAP2 Swiss:Q62733, Emerin Swiss:P50402 and Man1. 20.50 20.50 20.50 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -7.94 0.72 -4.51 8 494 2012-10-03 03:04:30 2003-04-07 12:59:11 10 23 94 5 269 440 0 41.30 39 7.99 CHANGED sDlspLSDsELpspLppYGlssGPIluoTR+LYEKKLhKLccp ...................ppLosp-LpppLhch.Gls.s.GPIs..soTRplYEKKL.h..ch................ 0 66 87 174 +3187 PF04011 LemA LemA family Bateman A anon COG1704 Family The members of this family are related to the LemA protein Swiss:P71452 [1]. LemA contains an amino terminal predicted transmembrane helix. It has been predicted that the small amino terminus is extracellular [1]. The exact molecular function of this protein is uncertain. 21.00 21.00 21.00 22.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.03 0.71 -5.18 10 2668 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 2218 1 681 1981 523 183.70 35 94.86 CHANGED hhsslhlhllllllslhsssuYNsllst-cslcsAWSplpsphQRRsDLIPNLVETVKGYAuaE+cTLccVsEARA+ss........thpcspshp+hppApsELouuLuRLlllsEsYPsLKANpsFhpLpspLcGTENRIAluRpcYNcuVpcYNspl+pFPollsA+haG..F+stshFpss..EutpssPKVcF ...........................................................................hh....lllh.l..l.l.l.l..l.s.h..h.s..h....s.s.Y..NsL...lp...hcppscpuWup.....l-sQh.pRRsDLlP..NL........Vp..TVKGYAp.......a..E.p....p.....sL.p..p.....VscA..Rspss...................................s..s...p..s...hpp..ht....p....A..p.s.p.L.....osu......L...u..p..L.h....slsE....s.YP.-.LKAsps..FhpLQ...ppLp.sTEN+IuhuRphYNssVppYNspl.c.p..FP.s.s.l.l.A.t.h.hu.......a.p.t.t.s.hapss....tt.t...p.p..s.PpVpF............................................. 0 226 462 587 +3188 PF02998 Lentiviral_Tat Lentiviral Tat protein Bateman A anon Pfam-B_1519 (release 6.4) Family This family contains retroviral transactivating (Tat) proteins [1,2], from a variety of Lentiviruses. 19.80 19.80 19.90 21.70 18.20 19.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.07 0.72 -4.16 7 50 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 15 0 0 41 0 58.70 81 96.23 CHANGED .EEhPpRR.stscEhs.h..a.cEh-sWpasS.RVPGEhLQRWLAMLpsuR.R++VlREMQKWMW+aPKAPVIRsCGCRLCNPGWGo .....Ep.s.t..ht..th..h..h.c-h-sWphhS.RsstphLQhWLAMLp.tp.RtpVhpEhQhhhWh..tA.lhRsCGCRLCNPGWGT. 1 0 0 0 +3189 PF02024 Leptin Leptin Bateman A anon PSI-blast P41159 Domain \N 25.00 25.00 25.40 25.00 24.90 23.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.02 0.71 -4.53 7 218 2012-10-02 01:28:15 2003-04-07 12:59:11 10 1 124 1 24 189 0 120.30 70 91.48 CHANGED VPIpKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPlLSLSKMDQTLAlYQQILTSLPSRNVlQISNDLENLRDLLHLLAhSKSCsLPpspGLEohESLGGVLEASLYSTEVVALSRLQGSLQDMLpQLDLSPGC ...................................lpsDoKsLlK....TIlsRIsDIS+hQSVSSKQRVTG...LD.F...IP...GL...H...PlLSLSKMDQTLAlYQQILTSLPS.RNVlQISNDLENLRDLLHLLAsSKSCsLPpsp..uLc..oL-SLssVLE...ASLYSTE.VVALSRLQuuLQ-hL.pLDhuPtC.... 0 1 3 6 +3190 PF03588 Leu_Phe_trans Leucyl/phenylalanyl-tRNA protein transferase TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.40 22.40 23.00 22.60 21.90 22.20 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.97 0.71 -5.13 9 2004 2012-10-02 22:59:21 2003-04-07 12:59:11 9 3 1956 17 506 1355 651 170.90 46 73.57 CHANGED ssspLltAYppGlFPhhpp..spslhWasP-sRullhs-.plHls+slp+sl+pshapVphstsFtsVI-uCAsst...tpsTWIspthpcsYhpLHphG..aAHSlEsWpscc.....LVGGlYGlulGplFaGESMFSptssASKlAhlpLschLcstuatLlDsQh.spHLcphGApclsR ..........................................s.stRLLtAYppGIFPWass........spP..ll...WWS.....P-.....PRuVL.....h..Pc.....ph...H.lS....+Sh++....h....h....+........p.........s..............a....clolspsFspVIcu.CAss....+..................p.........puTWIscplhcA..YpcL....HchG............aAHSlElW..p.s....s..c.......................LVGGlYGVul..G..p...lFhGESMFS....c...t....p....s...A...SK..sALhh.Ls.p.c.htpp..G...h..pLIDCQ.h.h.s.sHL....tS.LGAp-IsR..................... 0 155 324 426 +3191 PF01819 Levi_coat Levivirus coat protein Bateman A anon PSI-BLAST 2ms2 Domain The Levivirus coat protein forms the bacteriophage coat that encapsidates the viral RNA. 180 copies of this protein form the virion shell. The MS2 bacteriophage coat protein controls two distinct processes: sequence-specific RNA encapsidation and repression of replicase translation-by binding to an RNA stem-loop structure of 19 nucleotides containing the initiation codon of the replicase gene. The binding of a coat protein dimer to this hairpin shuts off synthesis of the viral replicase, switching the viral replication cycle to virion assembly rather than continued replication [2]. 19.40 19.40 21.40 21.00 19.20 15.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.25 0.71 -4.56 5 151 2009-09-11 00:12:26 2003-04-07 12:59:11 12 1 26 164 0 161 0 102.30 51 72.02 CHANGED ApLpslVLsspGuTGNsTlsls...VNssNGVAEllpusuVPutEpRVTlSVRQoSssR+KYslKlcVPslsoQTVNGlssPuVsRpuYssV-LoastaSTscER.slIsppLAALLKDshlIcusIssNsGa ..uphpphsLst.GtsGp.Tls.....ss.sNGVuth.ps.ss.u.th+VThShttsutpp+paplplcl.pssspTssGsp.PssshpuYhsh-LTIPIaATssDs.tLIsKuhtGLLKDGsPIsusIusNSGh... 0 0 0 0 +3192 PF01726 LexA_DNA_bind LexA DNA binding domain Bashton M, Bateman A anon Pfam-B_1975 (release 4.1) Domain This is the DNA binding domain of the LexA SOS regulon repressor which prevents expression of DNA repair proteins. The aligned region contains a variant form of the helix-turn-helix DNA binding motif [1]. This domain is found associated with Pfam:PF00717 the auto-proteolytic domain of LexA EC:3.4.21.88. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.96 0.72 -4.41 9 3433 2012-10-04 14:01:12 2003-04-07 12:59:11 11 9 3000 10 740 2157 1670 64.30 40 31.34 CHANGED MpsLTtRQp-lLchI+splpppGaPPohtEIuptlGhpSssuspcHLcALp+KGhI-hsPGpsRu .............................tLTt.RQpcll-hI+ppl.p.ppGhP..P.ohpEIuptlGh.....p.SssusccHLpsLp.+K...GhIchssspsRu............ 0 239 478 621 +3193 PF01790 LGT Prolipoprotein diacylglyceryl transferase Bateman A anon Prosite Family \N 24.90 24.90 24.90 24.90 24.70 24.80 hmmbuild -o /dev/null --hand HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.67 0.70 -5.44 67 5002 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 4357 0 1133 3460 3019 251.80 31 88.04 CHANGED hsPlhhplG.P..l.......sl+W.....YGlhhhhGhllAhhl.ut+psppt...........shst-pltDhlhauhhusllGuRlhYV......l...aph......shY.....hpp.........P.hplh...plWcG........GhuhHGGllGshluhhla............................s+ppp..ls.....hhphsDhlAPslslG.uhGRlGNFhN.tElaG+.....s....ss........athl.P............................................t....................th.......................h.hh.hHP.......o.LYEuhhp.lllFllLhhhh....++..hthh.GtlhuhallhYulhR.hhlEhhRp..sshh............hhhh...............lphuQlLSl.hllhGlhhhlhtp ................................................................................................sPlhhpl.G..P....l..........sl+W.....YGlhhlsG.hlh.Ahhl...up..+..c..hp+t..................................sh.s..p...-...p.l..-...ll.h...h...u.h.lu.s...ll.GuRlhYV......l......aph............................shY...............hps..................P....hp.lh..........p.lW.pG............................Gh.....uh.H.GGll.G...shls..hhl.a............................................................s.+..+.p.p.....hs.......hh.p.hhDhl.APslslu.uhGRh.GNF...hN...tE.haGc............ss......shs...................huhlhs.........................................................................................................thshh..hHP....o..LYEuhhp.llhFll..L.h...h.ht....................++.................h............t......G....tlhu....lalhhYuhh...R.hhl.E.hh.Rp.ss.h..........hhhh.............................lphuQl..LS.ls..h..l.lh.Glhhhlh..t...................................................... 0 398 772 979 +3194 PF00556 LHC Antenna complex alpha/beta subunit Bateman A anon SCOP Domain \N 20.50 20.50 20.80 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.65 0.72 -4.11 28 509 2009-09-11 14:13:18 2003-04-07 12:59:11 15 1 110 48 157 438 71 38.60 27 65.17 CHANGED ptchhclac.hhsh...hhhhsllAllsHhllhs...tpsWls .................chhplas.hhsh...hshhhslAllsHhllhs.....hpsWl......... 1 40 76 92 +3195 PF04991 LicD LicD family Moxon SJ, Bateman A anon Pfam-B_5278 (release 7.6) Domain The LICD family of proteins show high sequence similarity and are involved in phosphorylcholine metabolism. There is evidence to show that LicD2 mutants have a reduced ability to take up choline, have decreased ability to adhere to host cells and are less virulent [1]. These proteins are part of the nucleotidyltransferase superfamily [2]. 22.60 22.60 22.90 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.53 0.71 -4.22 142 2479 2012-10-02 22:47:23 2003-04-07 12:59:11 8 25 1053 0 634 1816 209 187.40 23 55.89 CHANGED sccpslphalstGoLLGhh+csshlPWDsDlDl.tMshc-hp+.Ltp.hhsp.......h.............................................phlh-ssohhhpptptstp.............csGla.IDIhsl....s.hs................................................................................................ht.ttpphht...................ppc.hhshspl.Plpps.FEGh..hhlPsshcphLpptYG ..........................................sccpslp.aalstGTLLGAlR.c.p.G...h.IPWDDDlDl..sh.R.c.D..Y...p....+...hhp..hhpp.....h.t.................................h.h.t.............h.................................tplhc.t.ss.hhhpp..t..ht.h........................................pulh..lDlhsh........Dhhsp........................h....................................................................................................................................................h..................h.....p.th.p..tp..............................hshtp.s.phhs.h......................pp..hh.sh.p...ht......hh...h.FEsh....h.hPtpacphLpphYG.................................................................................................................................................................................................... 0 221 392 560 +3196 PF01291 LIF_OSM LIF / OSM family Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.50 20.60 20.30 20.30 hmmbuild --amino -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.17 0.71 -4.31 5 88 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 43 8 39 131 0 150.60 44 76.61 CHANGED sLLsQLQNQssLhssou.oLL-PYI+lQGLssP.sLcctCsp+ssDFP.SE-sLpcLoRhsFLpTlsATLGslLppLocLQQcLscsAch...........KLssAccNlRGLtNNVaCMApLLp+Su...hsEPTpss.G...PsPsTossDsFQRKltGCpFLtGYHRFMuoVGQVF ........................................................LhsQlppQhs.lp.sou.sLh..Yhpu.QGpsh.......P.....s...L...c.....c...h.C.s...s...sss.FP.....c...s.s...hsphs+..h..st..Lh...p...l.hshLuss...L...sslTc....QchLssssh............LhpKLpsstsslRGLhsNlhC.hlsph.apsu....phsss..........sPsT.sspDsFQ+KhhGCplLtpY+phhuslupsF................ 0 2 2 8 +3197 PF00549 Ligase_CoA ligase-CoA; CoA-ligase Bateman A anon SCOP Domain This family includes the CoA ligases Succinyl-CoA synthetase alpha and beta chains, malate CoA ligase and ATP-citrate lyase. Some members of the family utilise ATP others use GTP. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.80 0.71 -4.62 20 8836 2012-10-02 00:59:22 2003-04-07 12:59:11 14 29 3569 68 2607 5875 4883 125.00 32 32.56 CHANGED lhsGGTLAhEshtllpts....................supstshlDlGsDsas.........ssphtctlphhssDs.cVpsILlslhlGhGss-t.AuullpAhccsp.......tplPlVuplsGTssD...t.h..pptphLpcuGltlhsusspAstsstsl ...........................hspuusLshpshc.lpth...............................G.hs....ushl..slGGssh....................spphh-slchhhs..Ds...ps..c.....uIlh.h.hG..............sps-t..As.u.h.l.p....A.hppht..........................thPlVshl..t...G..ssAs..............tttt..hl.h....s...u...G.h....s.hsssc.hs.tstt..h.................................. 0 824 1565 2169 +3198 PF00412 LIM LIM domain Finn RD, Griffiths-Jones SR anon Prosite Domain This family represents two copies of the LIM structural domain. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.69 0.72 -3.90 38 16362 2009-01-15 18:05:59 2003-04-07 12:59:11 17 199 437 88 8615 14824 48 57.20 26 24.59 CHANGED CuuCsctIh....sp.hs..uhscsaH.pCFpCspCpp.Lssss....ha.p-u..c....lYC+p.cahcth ................................CstCsp.Ih...................stt.h.....h.................u..........h.......s..........p.....t...a..H.....p..CF......p..............C.........s......p...C.........p..p..t....L....s..sts..........ah....p.....cs....p...................haCpp..sa.t..h.................................... 0 2081 3129 5636 +3199 PF01803 LIM_bind LIM-domain binding protein Bashton M, Bateman A anon Pfam-B_1352 (release 4.2) Family The LIM-domain binding protein, binds to the LIM domain Pfam:PF00412 of LIM homeodomain proteins which are transcriptional regulators of development. Nuclear LIM interactor (NLI) / LIM domain-binding protein 1 (LDB1) Swiss:P70662 is located in the nuclei of neuronal cells during development, it is co-expressed with Isl1 in early motor neuron differentiation and has a suggested role in the Isl1 dependent development of motor neurons [4]. It is suggested that these proteins act synergistically to enhance transcriptional efficiency by acting as co-factors for LIM homeodomain and Otx class transcription factors both of which have essential roles in development [2]. The Drosophila protein Chip Swiss:O18353 is required for segmentation and activity of a remote wing margin enhancer [1]. Chip is a ubiquitous chromosomal factor required for normal expression of diverse genes at many stages of development [1]. It is suggested that Chip cooperates with different LIM domain proteins and other factors to structurally support remote enhancer-promoter interactions [1]. 19.80 19.80 19.80 20.60 19.60 19.30 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.25 0.70 -5.26 42 464 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 238 0 283 431 0 206.00 32 39.09 CHANGED hhRLhpasccL.....ssh.......................sppssltaWppFVpcFFs.........ssushRhslpttss.............................................K.pFElshshlPRaa.pohFsoGlpchphhl.ptsp-pshssushhl-ss+sshhhhatsss..........................plsscGpL+shF.................s.phKIchh-hsspsHpEhl.sRshlt.hht...................t.....t.chspspphp...tt............l.......pslsp.hGlspsshphLclu-llspMpsLhsapppps.luPh-uL+p ...................................................t.hRlhph.c+l.....pph........................sppss..aWctFss-FFp.........ssAhhphshshtsss..............................................K.pa...pIstsllPRaF.pohF...........cuGlp-hhhhl....thp.c..p.s....hts.......s.....tlhl-sspsshlo.ascsh..........................pVs..s-GpLhlpF.........................cs.h+IcsWcFs.............h+pa...cEhl.PRshlt.tsp..................................-.phhsp.ht............................................................................pslsc.hGloptslp.hL.............cls.llpsMp-Lhshp+phs...luPh-sL+.................................... 0 75 140 221 +3200 PF00538 Linker_histone linker_histone; linker histone H1 and H5 family Bateman A anon Arne Eloffson Domain Linker histone H1 is an essential component of chromatin structure. H1 links nucleosomes into higher order structures Histone H1 is replaced by histone H5 in some cell types. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.35 0.72 -3.88 20 1947 2009-01-15 18:05:59 2003-04-07 12:59:11 14 46 377 9 991 1945 10 71.20 32 23.85 CHANGED uHPsht-MIssAIpuLKERpGSSh.AIpKYIps.sY.chslss..hshhl+puLK+sVpsGpLhpsK......GusGSF+Luc .......................Pshtphlh....pAI.p...s.....L..c..E...+...s.G..o...ShtAIpKalps..pa...p....p..h...s.s.............h.t...ph.l+tsLKph.VspGpLhpsK..........G...sGoa+Ls.......................... 0 258 429 691 +3201 PF04454 Linocin_M18 Encapsulating protein for peroxidase Waterfield DI, Finn RD anon COG1659 Family The Linocin_M18 is found in eubacteria and archaea [1,2]. These proteins, referred to as encapsulins, form nanocompartments within the bacterium which contain ferritin-like proteins or peroxidases, enzymes involved in oxidative-stress response. These enzymes are targeted to the interior of encapsulins via unique C-terminal extensions [3]. 24.30 24.30 24.30 24.40 23.60 24.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.20 0.70 -5.48 59 323 2012-10-03 06:22:39 2003-04-07 12:59:11 7 3 313 13 123 298 12 246.00 34 89.18 CHANGED Ms..LtRc.APlostsWppI..-cpstpsh+ppLsuRRhl-ltGPhGhshsulshsclpthps.ttt............lpsthRpshPlsplphsFpLstp-l-sscRsutshDhssltcAAcplAhtEDchIFpGhstsultGlhsssupttlpls..ssstshhpslscAlspLpps.....GhsG.PYsLlluschYstLtchtsp.G.hs.hc+lccll.susllhuPslcs..ullloscsush-LtlGpDlulGYhupsspshphhlhEohs .............................................................................Ms.LhR-LAPlopsuWtpI..-pcsscoh++plu......GRRlVDVssPh..G.shuuVssG+ltplpsssps............Vtsp.hRpstPLlcL+VsFsLsRp-lDsl-RGupD.sDhpslc-AAcclAh.s.EDcsIF.........cGa....s.........sAu.....I....p.....Glpsu....su.s.st....l........s......L........s............pc..s..pshscslupAlspL+..h..u........GlsG...PYullLus-s..Ysplsc.s...s...-p....G.YPlhc+lp+ll.s.....GcIlhuPulcG..AhllosRGGDa-LplGpDluIGYhuHcsp.sVpLalpEohT.............................. 0 44 82 104 +3202 PF03583 LIP Secretory lipase Finn RD anon Pfam-B_3085 (release 7.0) Family These lipases are expressed and secreted during the infection cycle of these pathogens. In particular, C. albicans has a large number of different lipases, possibly reflecting broad lipolytic activity, which may contribute to the persistence and virulence of C. albicans in human tissue [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.75 0.70 -5.21 12 1102 2012-10-03 11:45:05 2003-04-07 12:59:11 9 8 480 0 414 1391 144 260.10 24 62.70 CHANGED hQhuusho..Tl.TQh-hhhlsshLppGaaVVlPDYEGPKo.....TFsVGRQSG.pAsLDSIRAsL+otphoGlssDu+VulWGYSGGulAouWAAtLQPpYAPELpps.LlGAAlGuhssNlTuhscusDGolFuGlls.uLsGlANEYP-h+phlhpclsctup.shcphsptClusulhcashpphhTG.p+sFppGhslLcs..sls+hlp-NtL.hhspphlPplPlhlYHGshDpIlPItsscthhpsWCshGhsslEFuEDhhsG.......HhsEshsGAPAAloWlpsRFsGcsslpG .........................................................................................................................h...................p..h..l...sth.L..s.pGasVs.ssD.....Y....p...G......ss..............sah.s.....s.....p.....t...t.u..tu.....l..LDu.lR.A....uhp...h...t..t............s.l......s........s........s......l...slh..GaStG.G...h.A.u....h....h.A...A....p....h.t.....s.....s.......Y...........A....P....ELs........lh...G.s...s.h...Gu.s..s.........s....s.....l.....t..........t.....h.......h........p..........t..........h........s.........s..........s............h..............h..........s......G.......h..............h..............s..........h.......s.....l...h........G.l........t....p..t...a...P.....p...l...p..........l...p...p.hl.ss.p..u....p...t..h...h....p...p....h.p...p....t..C...h...s..t...h...h..h.t...h..........s.................h...t...p...h.......................................h.........t..s........p.h..h...t....p..............htph.h...p.p..p.....th........................p....h...hP.....s......h....Plhl....hp................u................h.t................D....t....l.....l.s....h.t.s.s.pt.h.h.p...p.ass.......t.........G...............ss.l.p........a..t..t.............h..s..t..........Hh....h........s....h...s..........s...h...t.altthhtt.................................................................................................................................................................................. 1 83 269 368 +3203 PF03279 Lip_A_acyltrans Bacterial lipid A biosynthesis acyltransferase Mifsud W anon Pfam-B_1803 (release 6.5) Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.61 0.70 -5.54 13 5234 2012-10-02 00:16:30 2003-04-07 12:59:11 8 10 2565 0 1047 3702 3078 280.50 24 90.64 CHANGED phpFphphhhPpYahs.WlGluhlhllshhPhshhctlsstlGthht..hhhpctpcpA+pNLshsFP.-ho-sE+-pIlcpshpohuhslhEhuplshhs...ccclpcphc..............htGLEplcphhppscullLhssHthsh-luuhhlsppts..shushtppptN.hlsalhsphRpphstclls+pss........lcshlcuL+pGc.slhhlsDpDhssc.ulhVsFFus.sssssssuhLuh+os..AsllPlhshhpscs...ttashplpPshchp.pt....DspplspthNchlEphItspPEQYhWlp+paKo ...............................................................................................................h...................h.h.h....s.h..h.h....h...l.s.hl.Ph..hh..hh.l.u...t.tl.Gthsh......hh..h..p..p..pt...c.h.s..p..t...N.L.p.h.s...F......P......p.......h.o..p........t........-........+......ctll.t.c....a.psh.u.h.s.h.h....Eh.s.h.h.h.h.hs........s.p.c.l.p.p...h...h.c...................hp.G..h..-...t...l....c..p..h....t..t..p..s..c..u..ll.l.l.s.sHhhs..hE....l....s....u...h...h....h....u....t.....p....t........th...u...s...h...h...............p......p........N...shh..-....h..l.h.....p......p....s..R..t....+......h.....s....t.....p....h.....l..s...+......p..s.....................l+...s.hl.....p...s....L.....+.....p....G....p...hlh.h..h..s.D........p...........D...h.....u..........c.......t....u...l.a....l...s...FF..u....h.......s...s...s...s.s.s...s.s...h.l.h.p...ths....As.l.l.s.h.h.......s.....h.....+..p..scs....................ta...p......l......h....l.....t.....P...s.....h....c....s......s..t.t...........Dt..p....p....s....s...t...h....h...N....c...hl...Ep...h.ltttP-QY.h.Wha+RaKp................................................................................. 1 274 606 844 +3204 PF00151 Lipase lipase; Lipase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.18 0.70 -5.22 16 1975 2012-10-03 11:45:05 2003-04-07 12:59:11 14 31 255 16 1058 2017 14 262.60 27 71.62 CHANGED ppVshtplss.upp.shss.shpcsh+.h..psPpphc..s+FLLapscs....shQhh..ucssTlcsspFsss+hTphIIHGasscG......................pEsWlschscshhph..cssNsIsVDWtsuupshYs.AstNl+lVGtElAthlshLpsphsas.psVHlIGaSLGAHVAGtAG+phsG....plGRITGLDPAcPhFpsssp.sRLsPuDApFVDsIHTsst.....LGhGhoQpVGHlDFFPNGGpp.hPGCppsl...........th...hhtCsHhRSh+Yas-Slh..Nsc.sFsuasCuShppFppscCasC.tt...pCspMGah.hscashppttlptcaaLpTsspSsF .....................................................................................................................................................................................................phhlh...t....tt........t......................h......................p.....s...h..t..t..s..t...a.....s.p.p.shhllHGatsss.................................pp.hh..p.h.h.p...s.hhp...t.......t.s..h.N.lls..lDW......t.....t......h.u.........p........t.........h.........Y..................u..s.t.s..s.p..h..l.Gt..pluph..lphL......t......p..h...s........h.s...h.....p..p.......lHlIG.aSL.GAHl.uG..huGph.hpt..........pl..s..R...IT..............G..L.....D...PAt........P.h....F.p..........t......s........s.........t..............+.....Ls...s.D.A.pFVD....lIHTss...............h.....sh...G.h....pshG+hDF.YP.N....G....G.p...QP.GC...t.........................................................h...CsH.Ruhphah-Sl........p.p..sh..hu..h...C..s..s.h.p.t.......h....t..s....C..hs......................tps...hG......h.....t......t..............................................hhh.stt..sa.................................................................................. 1 307 390 782 +3205 PF01674 Lipase_2 Lipase (class 2) Bashton M, Bateman A anon Pfam-B_968 (release 4.1) Family This family consists of hypothetical C. elegans proteins and lipases. Lipases or triacylglycerol acylhydrolases hydrolyse ester bonds in triacylglycerol giving diacylglycerol, monoacylglycerol, glycerol and free fatty acids [1]. Swiss:P37957 is a extracellular lipase from B. subtilis 168 [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.38 0.70 -5.03 15 461 2012-10-03 11:45:05 2003-04-07 12:59:11 13 7 242 42 222 809 70 188.40 27 62.77 CHANGED psPllhlHGsu........s.tussapphhphFhccG.YssuplYATTaGcssp.sshppsp....pCpal.pplRchl.AVttYTGs.+VDIlAaShGsPIARKAIlGG..pCs.DssssLGssLsppVcTFlulu..............GsNtGhssC.u............hh..hC.NhpsGLh....C....................tScFLpDINsps+h-.uptlaoIaSpsD-llt....tphsCG+pouhIPsucshp.YcthsHppshppTsthQhp ...............................................................pPV.VhlHGsu...................s.sh.hs.a.st.h.tsh...Lts.....p.G...ast..........l..auh.s..a...s..st..st...........s.....hpsst.................h.....s....t...............p........l.s.pF...l-....p.V...L...t..t...TG....A.pKVD...lluHS.G.u.s.l.sRhY..l..c...hh.......sus...c....p....h.t....tl.s..s.......h.s.p.h.t.s.......h.l.u.l.t.........................................u...s...s.h.sh..s.h..................h..hs..s....th....................................................................................Stal.......ppl..N....s....s...s.....h.....u..thhslh.ophDphlh..................................................................................................................................................................................................................................... 0 88 142 214 +3206 PF03280 Lipase_chap Proteobacterial lipase chaperone protein Mifsud W anon Pfam-B_4313 (release 6.5) Family \N 21.10 21.10 21.30 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.31 0.71 -4.78 34 328 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 282 2 63 274 15 184.30 29 59.61 CHANGED sLhppYlsY+pALssLp..............tshtsphsl.sslpphhpphtsLpppaFuts.tpuhFGsEpphpphslc+lpItpsssLospp+tptlttLpspLPsslppu.ppptpt.pcltpttpth.tpG.ssspplhtht.tphlGs-uApRLtplcppctsWcp+hssYhppRspI...ssslspsp+pttlspLRpptF.sssEth ..........................sLappYltY+psLupLp...................sttssthsh.sthpphhpphtsLpt+hFu.sttpshFupEpthpphsLc+h.....c.Ihpst.sLsst.p+tptlp.t.Lh.....tp..h.Ppthppu.....p.........tp.sph.tpLpp....tttt......h..ttt........uss..pphhttR...sthl.G...s-.uApRLtpL-ppcssapp+hssYhtp...RspI.......ps..tLS....sp....-+ptpIpp.LRpppF.sspph.................................................... 0 9 23 45 +3207 PF00657 Lipase_GDSL GDSL-like Lipase/Acylhydrolase Bateman A, Mistry J, Molgaard A anon Prosite & Pfam-B_543 (Release 7.5) Family \N 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.79 0.70 -4.54 65 4517 2012-10-02 11:02:24 2003-04-07 12:59:11 17 78 1181 20 2407 11569 2075 269.30 15 70.51 CHANGED lhshGDSloss........................stsh.tshtshl.t........................shshshhstuhuGps...........................................................................................t.hht.h.ttsthsssllhlhlGsNDh..........................................tts.ptshsphhsplpphlptlp..ptssp.....shhhhsts....................................h.t.tthpphsttasphlpcls.......................tthphshhDhaphh.ph.............................................................................hhhDsh........Hsospupphhuctl ............................................................................................................................................................................................................................................................................................hhsFGDSlsDs...............................................................................................s......t..p.....h...............................................................s......h.s...h...s...h.......s...h....s.........s..G.th..s.ssthh...........................................................................................................................................................................................................hht.....h...h...t......t........t....s...t...t........h....h.......s.....p....s..l........h.....h..l......h.G.s.N..Dhh..................................................................................................t.....p...t....h..h.....s...t....h....h....s....p....h....p........p....h.......l....p....p............L....h.....ph.....G..u+..........................p..h......h.l.h.shs...shs...........................................................................h......................................t.....t...t.......t....s.....h.p....t....h...s....p......h....s.......t...h..a.....N..p...t...l...p.phl...tt........................................ht.t..h..............sh.p.h...h..h....h.....D...h..a....s...h.h......p.h.......h.tp.stthsh......sh...............................................................................................................................h.s.ppah...h.h.....D.s.h.................H.opt.spphlup..h............................................................................................................................................................................................................................................... 0 460 1376 1961 +3208 PF00061 Lipocalin lipocalin; Lipocalin / cytosolic fatty-acid binding protein family Eddy SR anon Prosite and HMM_iterative_training Domain Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The family also encompasses the enzyme prostaglandin D synthase (EC:5.3.99.2). Alignment subsumes both the lipocalin and fatty acid binding protein signatures from PROSITE. This is supported on structural and functional grounds. The structure is an eight-stranded beta barrel. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.71 0.71 -4.21 156 2221 2012-10-03 08:47:39 2003-04-07 12:59:11 18 19 289 489 974 3261 80 128.60 17 76.93 CHANGED sG.pWhhhuhts.....hcphhpthssshtphp...sssshthpthp...h........pss.p..sppts.shcps..ppshphshp.......tssp...phpsl.ph-hpshhlhhtptp....tt.pshhtplhscs.clstph....h.th......shspss.hhph.pps .................................................................................................G.pWh.hhs...ts......hpchh.p.t.h...s....l.s.h.t.thp.....h...sssshss.hhht.......h......................................pss.p........hpph...s......s.h..c..ss......ph..s..h...c..hshph.t.........hsssp.........chp...sl...sh.-h.t...s..hhl.h.hpptp......u..p..p..sh..htpl................s....cs...p.h....ph..................shtt..hh..................................................................... 0 111 187 385 +3209 PF00820 Lipoprotein_1 Borrelia lipoprotein Bateman A anon Pfam-B_1321 (release 2.1) Family This family of lipoproteins is found in Borrelia spirochetes. The function of these proteins is uncertain. 25.00 25.00 40.00 25.00 18.30 20.40 hmmbuild --amino -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.71 0.70 -5.30 7 599 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 62 32 2 611 0 175.20 69 94.88 CHANGED sSShDEKsusphDLPuthc..V........hVSKEKsKDGKYsLcAhVDplELKGTSDKNNGSG.sLEG.KsDKSKsKLoIu-DLsphThEhacpss.ThVS+KVspKDtS.TEEp..h.cpGcLSpKplTRsNGTpLEYo-Mps-.susKAhEsLKN.lphEGsLsus.KTTLplpEGTVTLo+EIsKsGclplhLsDo.ootuoKKTusWsspTsTLTISsNSKKTKpLVFpp-sTITVQpYDSAG.TsLEGoAsEIKsL-cLKsALK ........................................tsp.DLPGtMc..V........LVSKEKsKDGKYsLhATVDKlELKGTSDKNNGSG.sLEGsKsDKSKVKLTIoDDLupTThElFKEDGpTLVS+KVs.KDKSS...TEEp..FNcKGc.lSEKhlsRuNGT+LEYTphpss.ssuK.ApEsLKs.hhL..EGsl.sst.csp.l..l.p.puTVsh........................................................................................................ 0 2 2 2 +3210 PF03202 Lipoprotein_10 Putative mycoplasma lipoprotein, C-terminal region Mifsud W anon Pfam-B_2205 (release 6.5) Family \N 22.40 22.40 22.40 22.70 21.90 22.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.96 0.71 -4.14 12 80 2009-09-11 23:29:08 2003-04-07 12:59:11 8 3 32 0 36 80 0 128.20 30 19.30 CHANGED sITQGPNlIGIHANEKENtETpKFVNWFLNsp.oWcspp.sptpppppp....pTsApaFAESASYILPLKEhFcps.....ptctspsp...........................t...NoascKAL-lFpplucsplluYSDPSDFRSGKFRDuIGusFN.AsVsSKs .........................sQGPsLIGIHu.N-KE.-ptTpcFVpWh.........l.sp...ppsapt.p..t.......p.....p.pp.............tTsupahscsuSYlhPhKphhsps.....p....ttp.............................tpNhhhcps.h.chhp.p.h..p..p..s..p..hs.a.p.-P.ushpSupFRcslsosassh.s........................................................................ 0 23 34 34 +3211 PF03260 Lipoprotein_11 Lepidopteran low molecular weight (30 kD) lipoprotein Mifsud W anon Pfam-B_4108 (release 6.5) Family \N 25.00 25.00 28.90 45.30 19.80 19.10 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.77 0.70 -5.38 6 84 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 4 2 52 54 0 234.40 40 85.01 CHANGED VhusCVlAuSAuls-hsss.....sc.LE-cLYNSllsGDYDoAVpKShEhpcQuKGsIIpssVNpLIcDupRNTMEYAYpLWltsG+-IVKcYFPlpFRlIhuEssVKLI.KRDNLALKLGsssssss-RIAYGDucDKoS-pVSWKFIsLWENNRVYFKIhNTccsQYLKLu..sssssssD+hlYGssoADTaRcQWYLQPAKY-NDVLFFIYNREYNcALcLuRsV-usGDRpAaGHsGcVtG.P-lFuWhIssF .........................................p........sp.lp-pLYNsllsuDYDpAVppohpl.ppspup.lIpplVscLIcstcpNsh-aAYKLW..h.ssup-IV+chFPhpFRLIhspp.hlKlI.+p.shALcLussss.ts-RhAaGDupDK..TS.cVSWKhlslW.ENN+VYFKIhNschspYLKLu..ssssssGD+hsaGussu-opRcpWaLpPsc..a-sclLFaIhNREYsp.uLKLupsV-ssGDRhsaGpsGpVsGpP-hauWhIps...... 0 52 52 52 +3212 PF03330 DPBB_1 Lipoprotein_13; Rare lipoprotein A (RlpA)-like double-psi beta-barrel Mifsud W, Studholme DJ anon Pfam-B_3255 (release 6.5) Domain Rare lipoprotein A (RlpA) contains a conserved region that has the double-psi beta-barrel (DPBB) fold [3,4]. The function of RlpA is not well understood, but it has been shown to act as a prc mutant suppressor in Escherichia coli [1]. The DPBB fold is often an enzymatic domain. The members of this family are quite diverse, and if catalytic this family may contain several different functions. Another example of this domain is found in the N terminus of pollen allergen. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.87 0.72 -3.98 127 5162 2012-10-01 21:39:58 2003-04-07 12:59:11 13 43 2566 5 1785 4300 468 87.00 26 33.33 CHANGED ssssssuha......ssu.....susGpsaphps..sspts....................sps........pulsVplsDhsP................ssppthDLStsAatpluhh.........ps.Gl.ls.Vpa .......................................................................su.uuhY.......sst............susGp..s..a.p..hp.s.hsuAcps..............................................hsp..s+s........s....t+ollV..plsD+sPh...........................ss.s.c.h.lDLSt.sAhp.p.l.uhh.........ps.Ghs..Vp............................. 1 492 1115 1484 +3214 PF03640 Lipoprotein_15 Secreted repeat of unknown function Yeats C anon Yeats C Repeat This family occurs as tandem repeats in a set of lipoproteins. The alignment contains a Y-X4-D motif. 21.10 21.10 23.90 21.30 18.80 21.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.49 0.72 -4.52 22 995 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 347 0 428 953 61 45.30 32 56.64 CHANGED hshlppcD...Gsh.hshcGhsLYpFs+Dpt.G..suss.hss...sWssht .................htptc.....Gp.h.hs.hpGhsLYpFs+D.s.t.sGthpus...s.sss...sW.sh..................... 1 119 258 343 +3215 PF04791 LMBR1 LMBR1-like membrane protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_6189 (release 7.5) Family Members of this family are integral membrane proteins that are around 500 residues in length. LMBR1 is not involved in preaxial polydactyly, as originally thought [1]. Vertebrate members of this family may play a role in limb development [3]. A member of this family has been shown to be a lipocalin membrane receptor [2] 26.60 26.60 27.00 27.10 26.30 26.50 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.54 0.70 -5.91 9 832 2009-01-15 18:05:59 2003-04-07 12:59:11 11 11 260 0 554 801 5 364.40 17 72.65 CHANGED lhhlLhhllalhshhlls+ap+hsct............................tplshhlsshsLuluhsulhLLPhslhu.............sphhh.hspsh...........htalssSlltuLWphlalhSsl.lallhPFshhFhESpsFssppKt..lpuplhEshshhhLhulllLsllhVhusl..........................h.................uhhshhphahshlhSssohhGllLLllhsshGlsch.sshhphhl+....................................spllpDspcph.p.shtpupls+clpp......t.....thtt...t...u.tt.hh..pt.hts.tph..t......................................................................................................................................................ssthphshshshshhsLLhhsshplLhVstshlplllshstlshhs.......ttsslthsohohhGhhGsslphllIhYLhhoohsGhaph.h....hphht.chccTshsphlhNsuhlLl.sSsLPlh.phlGlspaDhhssau...................slphlG.hhhlhlaplhFshloshsL ........................................................................................h.....hhhh.hh..hs....h....h..hlh.ha..tp...t.pt...................................hh.shhh.shhshhhshh.hlh.llPhslh........................................................................................................tt.......................................h.th....h..st.....t....h..h..h.......h.......W...........h.......h.......ahhs.hhhh.......hl..lPhh..ah...........-.......u.......ts.....s.s....t+...........lh...tt...l..h.t..s..h......hh......h..h..hh..h..h..l....hh..h.hh.h..hh.hh...............................................................................................................h...........h.t....h..h.sh..hh..u...h..h..shhGhhl.....hl....h.hhuhG.lst.l...Phshh..p..h..t.........................................................................................................................................p...........h..........................................t............h.............................................................................................................................................................................................................................................................................................................................................................................................h........h.h.h.hh......h...h....phhh..t.h...h..............................h...........................................h........h..h......h.h..h....h..ah........h.ssh.s.........h.t..h.....h..h...h.ttp.......hh.......h.p...sh...phh...th................................................................................................................................................................................................................................................................................................................................................................... 0 202 313 445 +3216 PF03923 Lipoprotein_16 Uncharacterized lipoprotein Bateman A anon COG3056 Family The function of this presumed lipoprotein is unknown. The family includes E. coli YajG Swiss:P36671. 20.90 20.90 21.10 22.90 20.50 20.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.58 0.71 -4.88 25 800 2012-10-01 20:48:06 2003-04-07 12:59:11 8 2 789 0 100 348 15 156.60 50 82.07 CHANGED Pphs...ssp.ssspshslulsotDpRssphluplscssp.lphlssopslchhlppsLppphsupGapl..sssussplplplpchhssVpcushpachsoclplplhupsspG.chsKpYsussshpGs.hoAssscIcpslNplLspllscIhsDsELssalp .....................Ppls.LPQQDPoLhGVTVSI.s.G.A.DQRsDQ..ALAKV..s.R..c..s..Q..l.Vs..LTASRDLRFLLQEVLEKQMTARGYMl......GsNGs...VsLQIlVspLYADVoQGslRY.N..IsT.KADIuIlATAtNG.sKh.oKNYRAoYsl..E..GA.FpASNcsIscAV.NoVLoDsIADMuQDTSlppFIK................................. 1 13 37 71 +3217 PF04200 Lipoprotein_17 Lipoprotein associated domain Bateman A anon Pfam-B_3382 (release 7.3) Domain This presumed domain is about 100 amino acids in length. It is found in lipoprotein of unknown function and is greatly expanded in Mycoplasma pulmonis. The domain is found in up to five copies in some proteins. This family also includes the Mycoplasma arthritidis MAA2 variable surface protein. MAA2 is implicated in in cytoadherence and virulence and has been shown to exhibit both size and phase variability [1]. 22.40 10.00 22.60 10.10 22.10 9.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -10.47 0.72 -3.84 272 508 2009-01-15 18:05:59 2003-04-07 12:59:11 7 17 40 5 287 476 0 92.90 16 31.67 CHANGED pls..pl....p..shphpssst...............tstlPS.plp.spsl.............................h.sshsapl..........phhp...........................ssDppGsL.plp..l.......ph.....phss.................pt...shp.lsGFps .............................................................t.........t.php..tt...........pshhsS..pls..ppslp.....................................ts.sss.shpl................phhp.....................ssDppG.sL.plp.h..................pl...............phss..s..........................pph......php..lsGFp......................................... 0 162 287 287 +3218 PF00921 Lipoprotein_2 Borrelia lipoprotein Bateman A anon Pfam-B_1509 (release 3.0) Family This family of lipoproteins is found in Borrelia spirochetes. The function of these proteins is uncertain. 21.20 21.20 21.80 21.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.48 0.71 -4.24 16 1748 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 30 4 24 1776 0 138.10 61 67.07 CHANGED lKpIV-sshKs..........tsss..sptsussspKssuKlhussusss..usus....susKAAAtluuVoGt-ILpuIspupct....tt.stsh-tsssssuhuhu.t....ptphstsuspKsAslAuGIALRuMAKsGKFAu.ts..ss..ssAVpuAuuoAVs...KlLusLphhIRKTVcusLcpl+EAlc .................................IKEIVE.AAGGS...............EKL.Ks.......sA..A....u..t.G......E.s.NKsAG.KLF.G.K.A.G..A..u..A...p.G.DSEA................ASKAAGAVSAVSGEQI.....LSAIVpAAsA..........A........-Q-Gc.KP.t.-AKN.PIAAAIGct....-sGA..-...Fs....p....-..t..M...KK....D..D...QIAA.AIALRGMAKDGKFAVK..ss.....EK.....tt..........................................................s............................. 1 1 1 1 +3219 PF00938 Lipoprotein_3 Lipoprotein; Lipoprotein Finn RD, Bateman A anon Pfam-B_1076 (release 3.0) Family This family of lipoproteins is Mycoplasma specific. 21.20 21.20 21.90 30.70 21.00 20.10 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.78 0.72 -4.21 16 55 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 4 0 21 46 0 81.00 39 33.93 CHANGED sushpsllppsospchshu+shcLopsK+NLIssLKcuYEssPcpTsplLLsAWKhoL.-tcILpcphs.s.+F.psFGss.scpslpP ...ushcsllppooupchshu+shchspsKKNlIsuLKcSYEsNPccTsclLLsAWK.oh.-sclh.pphs....F.ps........................ 1 19 19 19 +3220 PF01298 Lipoprotein_5 Transferrin binding protein-like solute binding protein Finn RD, Bateman A anon Pfam-B_893 (release 3.0) Family This family of proteins are distantly related to other families of solute binding proteins. 29.40 29.40 29.90 29.60 28.90 28.90 hmmbuild --amino -o /dev/null HMM SEED 570 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.99 0.70 -5.56 37 819 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 180 13 33 777 8 399.10 24 87.70 CHANGED GGSFcl-s.....................stsssspptpP+Yp...Dssopp....pctphschppPuhGhuh+lhtpNhh..............tcc.s.lsppDh..h...................................tphptl.pthp...psspp............................................ps.p..t...ttps.pYVhSGhhYhtshs..p.tpp......hhtG..G..alaYpGppsuppLPs.....sspspYKGsWcahTs.sct...spc....ht.hst..tttssschuAhS.tp.........ctp.tsstpschuhsSEasV-FusKplsGpL..htNtphp.t.ptpstcppphYs...l-AcltGNRFpGpshs.spcsppp.....aPFso-...............spLcGGFaGPpuEELuu+FLssDpclhsVhuAKppsc......................ttpptthpshhDAhp.....................shsshsp+plss......FGcustLllsuh.IsLhs.............................................tssshtsppththtpcphpV.sCCsNLsYlKaGh...................................lppcsspt....................s........................................hFLpGpRTs.....pcplP..ppGs.scYpGoWhGal.ts....Tuaussuspppsts..tA-......FsVsFusKploGpLpspsspp..ssFsI.susIcu..NGFpGoAposc....shslDspsops.phh.h.supVsGGFYGPsAsELGGhFsassst...................................sts.supusVVFGAK+Q .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhst....................................................p.ttht.pp.hss........hs.phstLllsG..lsLhs...........................................................................t..p..ph.p.h..ss...psp.hpY..lpa.Gh......................................................hp.tstt..........................t..............................................................................ha..lpGp..s.............tptl.P......tup.shYp.Gp.hhthh..tt......................h....sst......tt....s...tAc.............VsFusKplsG.......p.l..ps...t.st.t............shph...pu.tIpu........NuF.pGshpsss.................................sspVpGtFYG......spupEluG.h..t.............................................t.tt.thhsV..FuuK+p.................................... 0 10 15 29 +3221 PF01441 Lipoprotein_6 Lipoprotein Bateman A anon Prodom_1149 (release 99.1) Family Members of this family are lipoproteins that are probably involved in evasion of the host immune system by pathogens. 24.60 24.60 31.60 31.50 24.30 24.10 hmmbuild --amino -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.18 0.71 -4.45 42 763 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 58 21 5 741 0 168.60 61 91.18 CHANGED -.tSspsAcpsups.sLscISKKIpDosAFshuVKEVETLlpSIDELA.KAIGKKIpsss...sLsss...usp..NsoLlAGAYsIushIspKLssLpsspt.....LKpKlpcuKpsScuFssKLKspHu-LGtss...soD-sAKpAILKoNss.psKGAcELccLspuV-uLhKAAptAlssulKELT.uPVhA ..................s..sSsNsADESsKGPNLTEISKKITDSNAhVLAVKEVEsLLuSIDELA.KAIGKKIcpNs..uLss-..usp...NuSLLAGAYsISo.LITpKLssLcsppt.....LKcKIpcAKKCSEsFTsKLKssHAc.LGhps...sTD-sAKcAILKTsus..KDKGAcELccL.cSVEuLuKAAp-h.LsNSVKELT.sP.............. 0 1 1 1 +3222 PF01540 Lipoprotein_7 Adhesin lipoprotein Bashton M, Bateman A anon Pfam-B_615 (release 4.0) Family This family consists of the p50 and variable adherence-associated antigen (Vaa) adhesins from Mycoplasma hominis. M. hominis is a mycoplasma associated with human urogenital diseases, pneumonia, and septic arthritis [1]. An adhesin is a cell surface molecule that mediates adhesion to other cells or to the surrounding surface or substrate. The Vaa antigen is a 50-kDa surface lipoprotein that has four tandem repetitive DNA sequences encoding a periodic peptide structure, and is highly immunogenic in the human host [1]. p50 is also a 50-kDa lipoprotein, having three repeats A,B and C, that may be a tetramer of 191-kDa in its native environment [2]. 24.10 24.10 24.10 24.80 24.00 24.00 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.31 0.70 -5.38 3 40 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 3 0 2 41 1 200.70 50 99.44 CHANGED MKKSKKIFITLCGIAATAILPVATISCNDDKLAEKNGKEKADAALKQANALAEELKKNPDYSKILETLNKEIAEATKSFKEAGSYGDYPAIISKLSAAVENAKNEKKAIDDKNAQIA......................................................................................................................KELAERNAKIQSNIEELKKINNEAFELSKTVNKTIAEVEKKFKI-ccFKEQLENFADDLLDKSRQIDEFTTVTSTQEGFTLAELESFKEITTTWFNGMKSEWARVLDAWKNELTEIN...SIIKGVEELKKLSHEISEFSNSVKKTISELEKKFKIDDKTNK-EAKpFKNELENFADQLLNKSHEIDKFVTVTSARcDFSLSELESFKSFNTTWFNEMKuEWARVQEAWKDQLKEISTK ............................................................................................................................................................A-ENtKIppGhcELhKLScchpshucTIshTIsKLE.KKFpIDcsFKcQLhSTI-.LNKKSsElcTFsTVsohKc-FlLuELESFKEhNToWhp...cIhSEWtcVpcAWpcELsEIp.............................................................................................................................s............ 0 2 2 2 +3223 PF02030 Lipoprotein_8 Hypothetical lipoprotein (MG045 family) Mian N, Bateman A anon IPR000044 Family This family includes hypothetical lipoproteins, the amino terminal part of this protein is related to Pfam:PF01547, a family of solute binding proteins. This suggests this family also has a solute binding function. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 493 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.79 0.70 -6.00 3 61 2012-10-03 15:33:52 2003-04-07 12:59:11 10 4 56 0 17 843 30 438.40 27 76.45 CHANGED MKKQLKYhFhhhGlT....hSsILTACS.SopFVlANFESYlSP.LLLERAccK+P..LTFLTYPoNEKLINGFANNTYoVAVASoYAVSELtcpGLLpPIDWAKFNLKKosNuSsp..lpNtEDAK-LFTKpIt-ISpth........KDuKNsELLcWuVPYFLQDLVFVYRGEKIsELE..pcDVoWSDVIKAIV+...HKDRF......................NcNRLlhIDDARTIFSLANIVphE..sKNNolDVN.............................................PKEsslNYFsNVYESFupLGLK+sNLsolFVN.........SDSNIVINELAsGRRQGGIVYNGDAVYAALGGDLRDElsE...NplPsGDNFHIVQPKcSPVALD.FLIINpQQopFccAAHElIa-LAL-GAD..................QTKEpLlKTDEEpGTDD........ED.aYLYGAMQNFSYVNYVSPLKsIS....DEoTGIVuKcsppADhKp.hKQpS.o-Qpo.poEKEc..............sc.aDaYTcTLKuLLp...KsDShE...........LN-psKKLV-TIKKsYpIcKs-uIp.........................WsNLlEKPIoPLQRSNLoLSWLDFKp+a ..........................................h..........hh...h.h..u..hs............h...s...h..l...o.....u.s.....t......p......s.........c.......L....V..lu...Na..ssYlsP.plh...t.E...p.tpp.h..p.s...............plsYphassNEhl...h...s....tl....+....s...s...t.....YDltlsSsYhVs+LtppshlpKIsa....SK...h.s.....l..h..tpt...s.p..t...p..........s.pthpp.hs.cphhtl.tt.......................................ps.s..plL-YhlPYahpDL.lhsap..s..ppl..p.Lp..pppl.a..phhpth..p...p.pph.............................sp.s..K.hh.l-stpp.hsl....uphhp.t......psp.hphh..............................................ht..ph.h.hss.ppht.lt..........t.hpph..ss..hhhN..........ssSs.lls.ls.tph..ts.ul.hYsGDhhaAs......sG-.....p.E.hsp.............h.stp..s.h+.lc.pso..hhD.hhl...p.....s...tp......ptAYphlpplhh.tuhp............................p...............p.................t...h......s....h.NFsaltYsss................................................................................................................................................................................................................................................................................................................................................................................. 0 10 14 15 +3224 PF03305 Lipoprotein_X Mycoplasma MG185/MG260 protein Mifsud W anon Pfam-B_4433 (release 6.5) Family Most of the aligned regions in this family are found towards the middle of the member proteins. 20.60 20.60 27.40 27.40 20.50 18.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.72 0.70 -4.99 17 77 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 34 0 32 75 0 232.50 27 33.10 CHANGED LlDFuoRlAKSFsc.lps.p.sKKus-lQGVLGlDSssNsLaousFAuGsGsYsNFFapl....psupsDFsNFhNK.so.oYpNLpclaNcaKsLIspNGLalN+G.GoYoSNapKFHQLAFuloSTSGahauFAspsu.K.RLpFssps.......hpaPphT.....ppl..+sPspspp.tsh..tspstspsNLLGshslpssph.......pt.t.t.....................t....s.pspu......KsIplYKspIsssKpts.sAlL............Ipsp .........................LlDFusRltKSFsp..p..p.sppssshpsVLGlDssssslasssFAs..usGshssF..hhpl............p...s....s..pscass.Fhs+.so.uYpNLpclasphpphlpppula....lspu..GsYoSs.atphHQhuFuIuSTuGYhasFsspsu..K..plpFspss.......hpaspho........ppl..pssspspp.tsh...t.pp..pt.psshlhphshpsspt...............p.............................t.tp...t..t.ptpt..............ch...hct....h.pttp.t........................................................................................................ 0 20 30 30 +3225 PF00305 Lipoxygenase lipoxygenase; Lipoxygenase Finn RD anon Prosite Domain \N 19.40 19.40 20.70 20.00 19.00 19.30 hmmbuild -o /dev/null HMM SEED 667 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.13 0.70 -13.09 0.70 -6.54 16 1473 2009-09-11 06:47:26 2003-04-07 12:59:11 14 34 251 68 681 1491 13 407.70 28 73.57 CHANGED PusLhKYREEELcsLRG...-GTGchcta-RIYDYslYNDLGsPDpstp.sRPlLGGotpaPYPRRsRTGRtPTcpDPsoEpc.....s..hYlPRDEpFGHlKpsDFLshulKulsQsllPthpus...hp.hssEFcoFcDVcpLaEGGlpLP....hphlsph.Pl.hlc-lh+TDGpt...hL+assPpVlphs+ouWhTDEEFAREhLAGlNPslIcplpEFPsKSpLDsthYGspsSsITtEcLE.pLcG.hTV-EAlpsp+LFlLDaHDhhhPYlp+INs.sssKsYAoRTlLFLp-DGTL+PlAIEL.ShP+PsGc..u.sSpVahPu.-Gspu.lWLLAKAaVhVNDushHQLlSHWLsTHAlhEPFlIATNRpLSslHPIaKLLhPHaRsT.MNINuhARpsLlNusGIlEpohhsG+Yu.hEMSuslYKs.WsFs-QALPsDLlKRGlAlcDsouPaGlRLhIEDYPYAsDGLEIWsAIKoWVp-YVslYYtoD-slppDsELQAWWKElsEhGHGDtKDcPWWPKhQTp--LlcssTllIWsASALHAAVNFGQYsYuGal.NRPThoRphhPp.sTsE...a--lhps.pKsaL+TlssphQollslollElLSRHuoDElYLGpR-s.p.Wss-tcshtAFc+FGpKLp-IEccIstRNsD.sLp.NR.GssphPYTLLhPS .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tW.pD..FuhphlsGhNP.h.lp.....h.....h.....p.....................t..h..P..........................................s....t...l...............h....tt.......h..........t..............t..h........p......t.tpla..hh...D......a.....t.......h.........h.................h..............t.....h...............t.........t.........t..........................t......h......hhu..shsl.hh.....h.....p..................t......s......t.....L..hP.lAIp.l.....p.....P..............s..............................l..h.....h......P......s.............p.....s...................s........W.lA..Khas.ss-.thHp..hhsH..h...........lpTHh..h..Es..hhlAs....R..pL......s.hH.Pla+..L.L..PHhc.T.htIN..shARt.Ll......s..t.....t..........G.....h...h....-.........s.............h..................s.................u..h.p.h..th.h.h.tp...h..t..at....t....s..hPtsl.h.tRG...h.....................................l...sY.Yt.Dul.lWtsl.....pp..................alpthlt..h.aY...............s....s.....t......lttD..E..........LQsWh.p.-h........h...p...........G......h.........t...........h..............p.....t........t...s..h........p..h.po.........tpLhphhThhlahsou.HuAlNh......uQ.h.s.hhu.a.h..P..NhPs..p...P..t.............t.......t...........h.......................hts.hs..s..ts..h..h.....hh...hLu.p.............s.................l........Gp....h....t..................h.........p.......................t......httFttp.l..t.l..tt.l.thNt..t.......h..............................s.Y.hh............................................................................................................... 0 146 331 477 +3226 PF04778 LMP LMP repeated region Kerrison ND anon Pfam-B_2380 (release 7.6) Family This family consists of a repeated sequence element found in the LMP group of surface-located membrane proteins of Mycoplasma hominis. The the number of repeats in the protein affects the tendency of cells to spontaneously aggregate. Agglutination may be an important factor in colonisation. Non-agglutinating microorganisms might easily be distributed whereas aggregation might provide a better chance to avoid an antibody response since some of the epitopes may be buried [1]. 23.60 23.60 23.70 23.70 23.10 23.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.13 0.71 -4.47 6 52 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 10 0 11 48 0 142.60 60 48.35 CHANGED sANpLLspLsDsDscItcAKopL-pEIppANQAlsSNNsASMQSAKSSLDAKVsEITKKLETFNKDK-AKFpELEQTRKsI-EFIsss.KNNP....NYusLlppLTsK+DuKNSVTsSSNKSDIpuANsELKQALscApssKsQlDshsKSlKEQLsspIs ..............sANpL.spLoDpDspIQpAKo-L-pElpKAsQAltSNNTASMQSAKSSLDAKVsEITKKLETFNKDK-AKFpELcQTRpQIQEFINTN.KNNP.....NYS..ELISpLTSKRDSKNSVT-SSNKSDIEoANTELKQALsc.....ApscKsQsDNhsK....ShKEQLssols...... 0 11 11 11 +3227 PF01451 LMWPc Low molecular weight phosphotyrosine protein phosphatase Bateman A anon Prodom_2132 (release 99.1) Domain \N 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild --amino -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.85 0.71 -3.85 27 7884 2009-09-12 21:07:05 2003-04-07 12:59:11 16 44 4157 78 2043 5778 2186 133.10 26 83.15 CHANGED lLFVChGNhCRS.hAEulh+phh.t..hstth.plpSAGsp...supssc.pulplhccpGlslst+hu+plspp....hpphDlllsh.spsthtphsshhPt...hp................phplsDPh...tps....ssFcplhstIcptspphh .......................lLFVC..h.GNhCRSs.hAEulh+phh.........................................t............t.............p.............h....plp......SAGs..........t.....t...................u....p......s....scs.t......uh.p.s........h..p............c............t......G.............l................s............h.......s...s...........p..p..u.+...p...l...s..p..p............hp...p...h...DhllsM...sp.p...ph...t..p.......l....p.t...hhPs..............hp.ph...hhtp...................................................phslsD..Pa.............h.ts...........tsF.c.....p...shc.lpptspth................................................................................................. 0 663 1320 1746 +3228 PF03548 LolA Outer membrane lipoprotein carrier protein LolA TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.00 22.00 22.00 22.00 21.80 21.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.63 39 2649 2012-10-01 20:44:21 2003-04-07 12:59:11 10 3 2149 7 649 1876 1519 159.10 24 75.94 CHANGED splpohsusFsQpshssptt...pspGphhlpRPshhRWphssPpcphlluDGcslhha-splcQs...h....htpslspoPhhLLh..psptphtppasls......tthtshhLpP+.ttssshpphplshsppsh.lpphplhDp.Gpcoplthpshp.ssslssshFpFs ..........................................................................t.thpohpusFs.Qp...s.........p........s....s..........s....h............t................................p.upGp...hhl.....p......R.....P....s........h........hpWch...s.p..P........t...p..........ph...l.l.uDGcpl..h..h...a...s....s.......p.....l.......c.....Qs.....shp.........hpp..s....h...s.....s.....o....Ph.......h...llh............tsp.ssh...p...p.a..........s...lp......................................................p......s.........t....t....p.......h...hL..s..P.....+....s......s....s..s.s......hp......p...h...pls...h........s........p......s.......s.....h...lpph...............p......hh-...p..p.s.p.p...o.s.hp..h.pshp..s.s.sl.ssshFpa.......................................................................... 0 183 390 523 +3229 PF03550 LolB Outer membrane lipoprotein LolB TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.00 20.00 21.20 21.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.87 0.71 -4.64 24 1327 2012-10-01 20:44:21 2003-04-07 12:59:11 9 2 1272 3 243 817 151 154.60 36 75.40 CHANGED plppaphpGpluhh.....usppphuupF.Wppp.sppacLtLossLGpTtlplps..psssspLpsscGphhsussuctLlp.clhGhslPlspLt.Wl...........pGhPs...ssschplDsptpltplpp....psWplsY.pY.....pppsts.LPpplpLp.....pss........hpl+LhlspW ..........................p.lspapscGthAhh...........usp.pps.ApF.h.WQQs..tcpacLh..Lo.s.P.L.GsTtlpLss...psusspLsss.cGppYsAs-AEch.lt.cloGh..slP..lssL+pWl...........................hGl.Pu....sssch..pLD...s.p..h......+.L.spls................psWpVsYtsY......................sspsp.ss.hPpplcLs.....s..s.s........tcIKLhhcpW................................................ 1 45 119 188 +3230 PF04728 LPP Lipoprotein leucine-zipper Kerrison ND, Coggill P anon DOMO:DM04880; Domain This is leucine-zipper is found in the enterobacterial outer membrane lipoprotein LPP. It is likely that this domain oligomerises and is involved in protein-protein interactions. As such it is a bundle of alpha-helical coiled-coils, which are known to play key roles in mediating specific protein-protein interactions for in molecular recognition and the assembly of multi-protein complexes. 28.40 28.40 28.40 28.40 27.70 28.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.87 0.72 -4.17 19 1023 2009-09-10 20:56:13 2003-04-07 12:59:11 8 1 668 20 75 206 2 55.30 64 68.34 CHANGED SNAKIDQLSSDVQTLNAKVDQLSNDVNAhRoDVQAAKDDAARANQRLDNps.psY+K ................sA.KlDQLSS-VQTLNAKVspLosDVsAhR.......u.......sl.......p.......AAK-.......-AARANpRLDN.u.ppYpK..... 0 7 21 47 +3231 PF02169 LPP20 LPP20 lipoprotein Mian N, Bateman A anon IPR002217 Family This family contains the LPP20 lipoprotein, which is a non-essential class of lipoprotein [1]. 22.10 22.10 22.10 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.99 0.72 -3.89 81 734 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 501 0 158 508 331 101.10 22 44.89 CHANGED ss........shssspph...lhAh..............+uuphsAh+pLup..plhshplsupss...................spshhhpsspl..pstVsuhl.........+...Gscllps.h.......ssspah..s.lplc ...................................................s.spth.....hhAhG..u..................+AAph-AhcpLAp....plhGhplsupss...................lpsthhp..s..p..pl....pspVsu.h.I............+.........suc.llcp.hh.......spsh...ah.splcL...................................... 0 55 103 136 +3232 PF04348 LppC LppC putative lipoprotein Mifsud W anon COG3107 Family This family includes several bacterial outer membrane antigens, whose molecular function is unknown. 28.70 28.70 28.70 28.70 28.60 28.60 hmmbuild -o /dev/null HMM SEED 536 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.71 0.70 -5.95 6 1623 2012-10-02 13:57:41 2003-04-07 12:59:11 8 4 1023 1 224 1425 378 366.20 30 91.36 CHANGED pscsNAou-hYlp+ssQoQs.p-ppsa+LhAA+lhlpENclsQApALLtcLs..sLsspQplctuLlcAcluAs+ppspsA.pQLph....sLspLSsoQpsRYacstAplhEsctDslsAsKtRIphDphLosspc+ppNtD+hWuLLpshssuVlps..ssstssssLuGWLsLsphhss.hspPsQLppolpsWpst.PpHsAAphhPstLpsLhshpQsshoplALLLPLoGshthlupsI+sGFssA+......spsssslplFD.Tssp..ol-slhsQAppsGIchVVGPLlKpNV-hLhspsQ..hQslslLALNtosNs.cu.hupLCYaGLSPEDEAcuAAs+hWsDGhRpPlVlsPQN-lGcRsssAFs.RWQphuGoc.Aslcaas.PuDlshplp..........................sspppssDuVYllAssspLspIKshLss.....stshtlYAoS+ussu..NssP-pct.LsGlpFSDIPahhsssssphp..cls.+hspuchuhhRLYAMGsDAWhLhsphsEL+tV.PGasIDGLTGpLShsssCNVER-hoWhpapsG ................................................................................................................................................................................................................................................................................................................................................................................................................................h..........................................................hh.....h..............................................t..a....h..t..............................h...........t..s.........................................................................................................................................p....hulhLP....s...G......h..t..........up....hlppGh..uAh.......................s...p..........s......s......t..l..p....l..h......D...o..sut......sh.s..p..l...ht..Q...s.p.tp.s.s...shVlGPL...+...s....p...V....p..t..L.h..t............p..tt.............................................slss.L.A...LN........t..s.......p.....s......s..........t...........h.............h....uL....u.s..pp..-...s...t..h...A.ph..h...h....t.p..t..h.p..........sh.hh.h.s.p.st...up.R.h..tsF..t.....att..s....tt........................h.....t...................p...h...t..........lt........................................................................h...s..h.hah.h.s.t...t.p.h...lhs.lt............hhss....Sp...........t.t.t........hpsl........hs.p..h.Phh..h.......t..........................................h....................................................R.h.....A.h.GhDuh...l................h.t.......h...........t..............t..th..G.oG.Lth...t...t..t....l.Rt..h............................................................................................................................................................................................................................................... 0 40 100 165 +3233 PF02684 LpxB Lipid-A-disaccharide synthetase Bashton M, Bateman A anon COG0763 Family This is a family of lipid-A-disaccharide synthetases, EC:2.4.2.128. These enzymes catalyse the reaction: UDP-2,3-bis(3-hydroxytetradecanoyl) glucosamine + 2,3-bis(3-hydroxytetradecanoyl)-beta-D-glucosaminyl 1-phosphate <=> UDP + 2,3-bis(3-hydroxytetradecanoyl)-D-glucosaminyl-1,6 -beta-D-2,3-bis(3-hydroxytetradecanoyl)-beta-D-glucosaminyl 1-phosphate. These enzymes catalyse the fist disaccharide step in the synthesis of lipid-A-disaccharide. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.20 0.70 -5.83 8 2446 2012-10-03 16:42:30 2003-04-07 12:59:11 10 6 2354 0 585 1902 2953 358.50 35 93.79 CHANGED IalSAGEsSGDhlGupLlpsL+pcY.....sh+FhGluG.pMpt.pGhcsLhshcElulhGhhElLs+Lh+Lh+lhccll+phlpppsDslIsIDuPsFNlpLtK+LRKpGh+h.IIHYVuPSVWAW+spR..spplt+hsDhLLAILPFEpsaacKhs.LcspYlGHPLsDpIphpss.cspt+-hl.lspsc.hlslhPGSRcuEIp+h.hslhspAtplppphsslphlVslssscact.h.phhtt.shplshlhhsstshcuhhsuDhALhpSGTssLEsuLstoPhVVsYRl+PloaaLAKhLlKlpY..lSLsNIlhsctlhPEhIQtcschphhuhtthhhLtsspp.tcpp+stpcchpphhphtpsptccphsphh .........................................................................................................................lhllAGEsSGDlL.Gu.sLl+AL+....t......ch.P................s.sc....FhGluGs.p..MpA...p........Gh...cuh.a.chc-LuVMGl.....l.....EVL.....t+L.cll+h..tp.clhc.ph.h.p.....pP.....DlhlsIDuPDF.Nlpltt.pL.....K.........p.....p.....G.l......c....................s......l.....+YVS.PSVWAW.R..p..p.R...............lhcIt+ssDh.lLulLPFEtsaY.c....+.......h.......s...l.s..........spalGHs.h.s..D..............t...............h..............s......h............p.........s........c.......c...........t.............s...s...+.............ph...L....ul..............s..............t............s..........t.........t.....h..lA.lLPGS.Rt.uE.lc.......hLhssFlp.....s.....u....p....hL.......p......p.......p.......h.....Ps....l.c.....h.l.l...P..h...s...s....s.....pp...c....p....h.......p.p.....h.....h........t.......p.......h.......s...s...............p.....l........s...............l...h.........l..........l.......c.........s........p..........s......p.....p......s........h.t........Au..D...suLluSGTAsL.EshLh+sPMVVuY+..h..p..s..h.o..a..h.................l...u..+..+...l..l...K.....h..p.a................lo...L.PNll.....u.....s.....c.......p....l......Vs....ELlQ.-csp....sppLut..tlh...lLt.s.s.....t....thht.h................................................................................................................................. 1 184 368 488 +3234 PF03331 LpxC UDP-3-O-acyl N-acetylglycosamine deacetylase Mifsud W anon Pfam-B_3666 (release 6.5) Family The enzymes in this family catalyse the second step in the biosynthetic pathway for lipid A. 20.60 20.60 21.00 20.90 19.30 19.30 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.68 0.70 -5.45 10 2676 2012-10-03 01:04:38 2003-04-07 12:59:11 8 8 2359 35 672 1850 2808 253.70 43 87.84 CHANGED hc..Q+TLK+.VphoGVGLHoGcpspLTLcPAtsNTGIlFhRoDLss..shlPAchss..Vt-TthSTsLupc.sspIuTVEHLhAALtuhuIDNlhIclsusEIPIhDGSAtsFlhLIppAGIpEp..puscchh+IpcsVpVpcpDpaltshPssshclsaTIDFsassls+.sQthohs..hsp-uFtcpIApARTFGFhp-IEaLpspGLstGGSL-NAlVlD-.s+lLNtsGLRFtDE.VRHKlLDLIGDLhLlGpsllucahuaKoGHuLNspLl+pllsspc ....................p.Q+TLpp.lphsGlGLHoGcc.V.plTL...cP..As....s...s...oGlla.p.R.s..D..L......s............s....s.........sphsAs..sps..V..t.....-.....T.h..h..sT.sL.......ss....p....s.....s..........+luTlEHLhuALsuhGI.DNhllE.l.s.u.........s.........ElPIM.DGSAtsFlhh.ltpAGlpE.............sss.......K.........+.....al...+.I..pc..s..V..c...V........c..-....G.....-....K..a..s..c...hp...P.....h...s...........u.....apls..asID....Fs.c..P...s.Isp..Qp.hshs......hs...s.....p......s.FhcpIucARTFG......F..h.+......-...............l.....E......hLpu....tGLshGG.....S.h.-NAIV..........lD-.....cl......L.N......p..-G.............LRa.....c.D...EF.VRHK.hLDAIGDLhhh.....G.t..s.....l.lGtapuaKuGHsLNspLlctllsp............................. 0 209 429 564 +3235 PF02606 LpxK Tetraacyldisaccharide-1-P 4'-kinase Bashton M, Bateman A anon COG1663 Family This family consists of tetraacyldisaccharide-1-P 4'-kinase also known as Lipid-A 4'-kinase or Lipid A biosynthesis protein LpxK, EC:2.7.1.130. This enzyme catalyses the reaction: ATP + 2,3-bis(3-hydroxytetradecanoyl)-D -glucosaminyl-(beta-D-1,6)-2,3-bis(3-hydroxytetradecanoyl)-D-glucosam inyl beta-phosphate <=> ADP + 2,3,2',3'-tetrakis(3-hydroxytetradecanoyl)-D- glucosaminyl-1,6-beta-D-glucosamine 1,4'-bisphosphate. This enzyme is involved in the synthesis of lipid A portion of the bacterial lipopolysaccharide layer (LPS) [1]. The family contains a P-loop motif at the N terminus. 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.22 0.70 -5.48 173 2466 2012-10-05 12:31:08 2003-04-07 12:59:11 9 11 2274 0 558 1911 2496 293.20 33 91.66 CHANGED hthLLhP....lohLYuhlst..hRptha.....t.shh.pshch.slP.VlsVGNlosGGsGKTPhllhLsctLppp..Gh+sullSRGYGucsp...s..shhV.ssss............ssspsGDEPlLlAp..pss..ssVhVutc.Rspusptllptts...s-......................lIlhDDGhQHhtLt........RDl-llVlDu.....tRshGNGhlLPAGPLREPhs..pLpcsDh.l..lh.......sssps..ttt.......h...............................................thpLt..P......................h.....t...sspt.s...l.psh.h.........hAhAGIGpPp+FFsoLcp.hGhplhtspsFsDHasastp-lptltpt..p.........ll.hTEKDAVKht................................p.......hhhlslcspl.....ssshtphlhptl....p ..........................h..hlLhPlShLYuhlsth+phha.............t.hh....psh.......ch..slP....V.lsVGNlosGGsGKTPhllhLsctL.ppp........G....h...+.sull.SR..GYGu+sp................s....shll.ssps........................sstpsG...DEPlLlhp.........+ss.....ssVsVuss...Rspu.s....ctllttts.....sp............................lIlhDDGhQHh.t.L.t........R.Dl-IlllDu........hR.h.hGN.s....hhLPuGPhREshs..pLcp.s.Dh.llh.............sus.s..p..t.s..t...h..............................................................hpLh.st.h..................................................hth.t....ssptps.........h.tt.h.ph...........lAhAGIGpPtRFFso.Lct....h....G.......h.phh.............t.......s.....hsasDHpsastt-..lptlhp...t...........ll.hTEKDAVKhp.s......hst.t.................p............hahLslcspl.......ssph..thl.....t................................................................... 0 177 353 466 +3236 PF03788 LrgA LrgA family Bateman A anon COG1380 Family This family is uncharacterised. It contains the protein LrgA that has been hypothesised to export murein hydrolases [1]. 28.00 28.00 28.30 28.60 27.80 27.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.10 0.72 -4.29 148 3245 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 2309 0 485 1676 121 95.60 31 72.62 CHANGED h.hlGchlsphht..lPlPGsllGhlLLhhhLh.ts.hlp..............chlctsushLLpphsLhFVPuuVGlhsahsl.ltpphh.lllslllSTllslhsouhlhp ......................hhhGphlsphhs..lPlPGSllGhllLalLLt..hp.ll.h.............caVcsuu.....shLlp.hsLhFVPsuV.ulhpahsl.lp.tphh.lllslllSTlllhlssuhss.t................... 0 117 247 369 +3237 PF04172 LrgB LrgB-like family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The two products of the lrgAB operon are potential membrane proteins, and LrgA and LrgB are both thought to control of murein hydrolase activity and penicillin tolerance [1]. 25.00 25.00 26.90 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.33 0.70 -5.19 36 3458 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 2403 0 586 1883 148 210.90 35 88.87 CHANGED Tlhsahluphlac+h+t.hh.sPlLluhllllslLhhhs.........IsYcsYhtuuphlshlLuPA.TVALAlPLYcphchl++aahsIhsulllGulsuhloshhlA+hhuhsptlhhSlhPKSlTTPIAhtlopplGGlsslTAlhVllTGllGullGshll+hhpl+cs.lA+GluhGsuuHAlGTA+AhEhGpppGAhuuLuhslsGllosllsPllhtll ..............................................................................Tlhsahhuphlhp+h.p.....h.....hL....sPLll.uhllllsh.Lhlhs............................IsYpsYh.p...G.........up.hl.......shLLtPA.sVALAlPLYcph.c.hl+cp.a.hs....Ilsul.hlG.o.l.lu.hhosh...hlA.thhGh..s....p.pl.hhSlhP+SlTTPIAhslup.........p...........lGG.......h.s..sloAlhVlhsGllGul.l.G.thlLch...h.+I..........c................s....s......hA......+........GluhGsuuHulGTA+uh.Ehu..p.-GuhuSLuhslsG..llos..ll..sPhlh.l.h................................... 0 145 310 457 +3238 PF01462 LRRNT Leucine rich repeat N-terminal domain Bateman A anon SMART Family Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the N-terminus of tandem leucine rich repeats. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.58 0.72 -4.39 26 4081 2009-01-15 18:05:59 2003-04-07 12:59:11 13 511 131 66 1403 3750 2 29.00 41 7.50 CHANGED tCP.htCpCs......spsVpCsstsLp..plPh.lP ............CP.s..tCsCs..........................sssV.pCss+...sLs...slPssIP...... 0 249 354 721 +3239 PF01816 LRV Leucine rich repeat variant Bateman A anon PSI-BLAST Repeat The function of this repeat is unknown. It has an unusual structure of two helices. One is an alpha helix, the other is the much rarer 3-10 helix. 21.30 21.30 21.30 21.30 20.60 21.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.84 0.73 -7.10 0.73 -3.41 56 206 2012-10-11 20:00:59 2003-04-07 12:59:11 12 15 101 4 120 200 32 25.00 40 14.00 CHANGED hpVRttVAp+..hPsctLttLsp..Ds- ........tVRtsVApp.thPsphLtpLup..Dss.. 0 37 87 114 +3240 PF05083 LST1 LST-1 protein Moxon SJ anon Pfam-B_6166 (release 7.7) Family B144/LST1 is a gene encoded in the human major histocompatibility complex that produces multiple forms of alternatively spliced mRNA and encodes peptides fewer than 100 amino acids in length. B144/LST1 is strongly expressed in dendritic cells. Transfection of B144/LST1 into a variety of cells induces morphologic changes including the production of long, thin filopodia [1]. 22.30 22.30 22.40 24.60 22.00 22.20 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.81 0.72 -3.98 3 46 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 14 0 6 49 0 61.30 43 73.44 CHANGED LGGLLLLsVlLLoI...................CL.......CuhSpRVKRLERutpsuG.cQEPHYASLQQLPVSSSDITDM........KEDLSTDYACIARSTPT ...........................LGGLLLLhVllL.h...................CL.......ChhppRh+tLEpsh.........t...QE.hht.L..LPs.sp.-h.th..........pED.psDYACIs.s................................ 0 1 1 1 +3241 PF00677 Lum_binding Lumazine binding domain Bateman A anon Pfam-B_291 (release 2.1) Domain This domain binds to derivatives of lumazine in some proteins. Some proteins have lost the residues involved in binding lumazine. 21.30 21.30 21.30 21.80 20.40 21.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.58 0.72 -4.09 20 8363 2012-10-03 00:38:56 2003-04-07 12:59:11 12 15 3923 28 2044 5873 3972 84.80 32 79.71 CHANGED oGhlcusupItpIpcp......ssshphslphstptLpchhls.soIulsGlSLTVscls....ssthplslh.cTlp.pTshsphKlGscVNlEhs ................................oGhl-shGplh...p.l...p.p.p...................ss...s....h....p....h..pl........p..s....s...........................t....................l..t......c.......h..h..ls..uSIAlsGlsLT.V.....s.....c.....l.s.....................ss....p......Fs..l..s..l..h..scTlp..pTsL......u...p......h.....p.s.G...s.p.VNlEts.............. 0 668 1327 1740 +3242 PF00894 Luteo_coat Luteovirus coat protein Bateman A anon Pfam-B_123 (release 3.0) Family \N 20.30 20.30 22.60 27.60 19.90 17.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.71 0.71 -4.40 13 1368 2012-10-04 01:49:40 2003-04-07 12:59:11 13 2 47 0 0 1080 0 131.00 58 53.22 CHANGED ETFVFSKDsLsGNSsGoIsFGPSLS-CPAFSsGILKAYHEYKITplhLpFlSEASSTouGSIAYELDPH.CKhSoLuShINcFoIoKuGpKoFouctINGtpW+DSoEDQFtILYKGNGsSS.lAGSFRITI+VthQNPK ..EsFlFShDsLpuNSoGsIpFGPSLSpCPAhSsGILKAYHEYKITulplpFhScASuTouGuIuhELDsp.CK.SuLuShINpFsIoKsupKsFsAphINGpEap-o.........otDQFhlLYKGNGsoo.hAGpFhIphps.h.s................ 0 0 0 0 +3243 PF02122 Peptidase_S39 Luteo_ORF2; Peptidase S39 Mian N, Bateman A anon IPR000382 Family This family contains polyprotein processing endopeptidases from RNA viruses. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.03 0.71 -5.01 15 486 2012-10-02 13:45:52 2003-04-07 12:59:11 10 4 34 1 0 526 2 164.30 35 39.72 CHANGED VcGapoaslPpsPPKsuVlplp+ssGoHsGYAoC...lpLasGpsuLlTutHslsc........sttstSh+sG..sKIPL..s-F+slhcssptDlslLpGP.PNWEuhLuCKusphsTsspLu+usAohashccst.WhussAc....IsGs....p..sp.assVLSNTcsGaSGoPYFsGK.sllGVHpGt...sscscNaNh.MuPIPslsGLTSPsalaEoTuPpGcl .........................pGhh.aphspp.scasV.thh.PsDp.ht.Aos.....s.h.ssEcAhhsu.ts.sp.........s.t..s.c.ph.+ss....phPL...sE.p.h...L.....s..s..hhs.ll.phs.sphtshlttKhh+.t.spp.....p..pushp..hh.....cp.....h..psuc.....s.ts..p..sh..ssVh..S.p..s.pts.hussPhash..p.htsHpG....uppsppap..huPlssh.....Pp....ss..p.................................... 1 0 0 0 +3244 PF04662 Luteo_PO Luteovirus P0 protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_4444 (release 7.5) & Pfam-B_3579 (release 10.0) Family This family of proteins may be involved in suppression of PTGS a plant defence mechanism [1]. 26.60 26.60 27.00 45.30 24.60 26.50 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.09 0.70 -4.87 8 137 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 22 0 0 137 0 208.60 35 86.25 CHANGED llsp.sRpLhsp-RshhsutFLhphshhlsh..............t....cspthlRSlLatLPLLL.....sG-hhassuthph.h.....LsRaultsGhsPsso.........psslcLplPuocsshRhhLtRssoouLuE+lpRap-sLt.pGhcpFp+FLssahpshERpLscsshcsshssphhl-LssLGstLtchVhspplhppuhhoclAhthp+laGEssuls ........................lhlppsRhLhhcpcshlhuhhLlsItphlph...........h..t.st.p.l-hhlRSLLatLPLLl........Gct..lasssthph.h.....hscFuLhsGhtPsso.........sttlcLclPsTc.suhRphLtRssuSshuE+hpRhsEshh.puhEtFpRhLusahcptERplhpustc..hGscphl.LusLGphLtchVsspphpppshhuclAhthp+laGEstuh................. 0 0 0 0 +3245 PF01659 Luteo_Vpg Luteovirus putative VPg genome linked protein Bateman A anon Pfam-B_970 (release 4.1) Family This family consists of several putative genome linked proteins. The genomic RNA of luteoviruses are linked to virally encoded genome proteins (VPg). Open reading frame 4 is thought to encode the VPg in Soybean dwarf luteovirus [1]. Luteoviruses have isometric capsids that contain a positive stand ssRNA genome, they have no DNA stage during their replication. 25.00 25.00 26.80 49.70 24.10 18.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.09 0.72 -4.34 8 687 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 39 0 0 510 0 103.60 47 67.26 CHANGED l.h.E-QAphRHSaSQRTsS+sTPpEVSsSGRlYQsApHSpMEYSRPTMsIRSQsShaSSSsRPLP..sVPSLhshT.sApspP.ssp.hsStStpsspcsSphp .......hh-ppAshRaSaSQpTsS+sT..tpSsSuRlYpsAQ+SphEYS+PTMsIRSQVShaS.StpP.P.pps.SLhshT.pAssp.asspLIsSsSt+hs.psSp..t...... 0 0 0 0 +3246 PF04443 LuxE Acyl-protein synthetase, LuxE Kerrison ND anon DOMO:DM04138; Family LuxE is an acyl-protein synthetase found in bioluminescent bacteria. LuxE catalyses the formation of an acyl-protein thioester from a fatty acid and a protein.\ This is the second step in the bioluminescent fatty acid reduction system, which converts tetradecanoic acid to the aldehyde substrate of the luciferase-catalysed bioluminescence reaction [1] A conserved cysteine found at position 364 in Photobacterium phosphoreum LuxE (Swiss:Q52100) is thought to be acylated during the transfer of the acyl group from the synthetase subunit to the reductase. The carboxyl terminal of the synthetase is though to act as a flexible arm to transfer acyl groups between the sites of activation and reduction [2]. This family also includes Vibrio cholerae RBFN protein (Swiss:Q06961), which is involved in the biosynthesis of the O-antigen component 3-deoxy-L-glycero-tetronic acid. 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.34 0.70 -5.77 7 387 2012-10-02 15:58:18 2003-04-07 12:59:11 7 5 327 0 124 413 321 321.20 24 83.12 CHANGED Ms....lcppplhuSoElDDlIF.upP.phoh-EQcplpp+llhpua.aHYppN-pYRpaCpsptVs..-sl...ps..IsDIPVFPTSlFK.....a..t+LhTss--cIEsWaTSSGTSGlKSplAR...DRlSIERLLGSVsaGMcYVGsaa-HQhEL.lNlGPD.....RFNusNIWFK....YVMSLVELL...YPTs..FTV....spDc....IDF.EpThtpLhpIppptKslCLIGsPaFlYLLsp..a..hK-psIp.FpuGpchaIIT.GGGWKppps-u.....LcRt-FNphLh-sFpL.s.sQIRDTF...NQsELNTCFFED...php++....HVPPW..VaARALDPcTLcPlsc.Gp.GLhSYMDASuTSYPuF....IV.TDDIG.Ilcc..pps-..asGspVEIlRRlpTRttKGCulSMspshp ...................................................................................h....................................t....htthhhp.htap...appst.Y.+paspt....t.hs.....tpl.....ps...lp-...I..PhlP...lph...FK........t...tpl.h.oh.s......p.p..p.l.p.t.h.h.p.SS.G.T.....o..G......h...t...S..p...h.hh......D.p......s.ht.t.....t..s..hth..hh..p......h...h..t.....p..t....h.t.h..ls...h..t..P.s..........p.t..s.s..h.h.h.p......h.h.h....hs.....hh.......hs..sp.....ahl.......ptp.t..........h.c.h..pthhttl...p..tt..pspslhlhG.sh.hh..ah.h.h....h...hccp..shp...h.pl...s...t.s.shlhc.sGGaKthppc......ls+pphpphltpshGl.....pphhsha...s.sE..h...s....hhcs...........sttth............p..s..P..sa....l..h..hR.....D.s.p.s.h...p.s.hs.....G..c.h.........Gllphhshhsh.s.suh....ll.T.-DlG.hltt....ss.....s.h........FcllGRh.ptu-h+GCu..........h.................................................................................................... 1 47 90 108 +3247 PF02664 LuxS S-Ribosylhomocysteinase (LuxS) Bashton M, Bateman A, Adamkewicz J anon COG1854 Family This family consists of the LuxS protein involved in autoinducer AI2 synthesis and its hypothetical relatives. S-ribosylhomocysteinase (LuxS) catalyses the cleavage of the thioether bond in S-ribosylhomocysteine (SRH) to produce homocysteine and 4,5-dihydroxy-2,3-pentanedione (DPD), the precursor of type II bacterial quorum sensing molecule. 20.20 20.20 21.00 21.00 19.60 18.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.67 5 2731 2012-10-02 15:41:56 2003-04-07 12:59:11 10 2 2619 23 302 1250 26 152.40 46 95.62 CHANGED Ppl-SFslDHTKlsAPhVR.lAc+KsTscGDlITsFDlRFCtPNKE.lMcscuIHTLEHLhAshlRNHtN.hssh-IIDISPMGCRTGFYLollGcPspp-llDllcsoMpDVLclp-..pIPuuN-KQCGsYppHoLEsA+clARsaLs+..tI...KsEpLcls ..........................l-SFplDHT+hpAPhVR.lApphpsP.+G..DsIosFDlRFstPN+-.hhsptGlHTLEHLhAshhRsHls.u...clIDlSPMGCRTGFYhslh.Gp...s..sppclscshcsuhcD.lL.p..s..p...s...clPthN..hQCGsYp.HSLptApph.A+plLpp...tl...ps-.....s............. 1 74 166 239 +3248 PF00206 Lyase_1 lyase_1; Lyase Finn RD anon Prosite Domain \N 20.60 20.60 20.60 21.20 20.50 20.40 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.71 0.70 -5.24 18 16439 2009-01-15 18:05:59 2003-04-07 12:59:11 15 45 5117 204 4142 12313 9223 297.30 26 66.01 CHANGED GchssssDthhuhhopcsh.sh+lst.s.p.......uhshhp+AtApssh.h....pctssuIhpuh-clhc.schscpFslsshppsssTssphplsElIu.......t.........csss+VppupSoNDshsTuh+lhhtpslpp.LlstLppLhpslsc+AcpatDllhhGRTHLQcApPlTlGpchpuasttlpc-hpRlppsh.chhhtshsGGsusGTulNsc..ch..hlsccLu...hou....sssNph-Alus+DtllEhhuslshlsspLtKh.ApDlplhuSG..sGhsElshstsp...GSSlMPtKhNPsssEh...lpthuspVhG .............................................................................................................ht...ssthhth.o..t..ps..h.....p..h.....h.....lt......ph.............s.hhh..t.p.u..huth.sh..............................tp.sp.tlh...p.u.h..c.p...l..h..p.....h.p......h..t.........t....t....h....l...p.h.h.p..p.s.......s...t....p.....s..s...p...h...p......l.s.E.h..lu........................................................p.s..sc.clphup.So.........NDh.....h..s.Tuhp...lhhtpsl.chlhst....l.p.p.L.h.psltppAp....pa......p..c..hl.....hhGRT..H.h....Q..c...A.pPhThGpchtuas..t.l.p+...s.hp....+.lppsh....p.h.............h.h....t.h..ht.G....u.s...u......s.........G.....T......u........h.............s.......h........s.....................t.....................t...........h.s..s..c....pLu...................h..s.s.....................s.t.s.........h..h.........p...s..l....s....s+.D.h.h......s..c.....h....h..u.....s.lshlu....s..pLp+lupDlhh.hsos...huhs.El.s......h.st.s.p...G.SSl.MP.t.......K.hNPs.hsEh.....lpthshplhG.............................................. 0 1294 2586 3471 +3249 PF02278 Lyase_8 Polysaccharide lyase family 8, super-sandwich domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_4840 (release 5.2) Family This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen. 20.00 20.00 23.30 20.40 19.40 18.90 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.84 0.70 -5.03 5 1067 2012-10-02 23:57:29 2003-04-07 12:59:11 13 57 784 47 126 780 15 260.60 32 28.72 CHANGED scP..stL+tFNsMDRlsaaNt-huFuFGLSMsSKRTt+YEuhNcENhRGWYTGDGMhYLYN..oDssHYoDtFWPTV-hYKlPGTTspDs+psDs.Tc-.............tpsss.oscauGo.KlDDpaAuuuMDFcNhscTLTA+KSWFhLcDKIVFLGoGIpsTsssssssTTIDsRK.tsssuYssYsscKphohscup.....-spSVFLESa-s++NIGYaFhcsusIolp+csQTGsWp-IN.sSpocccVos-ahTloQcHusc..sssYGYhllPGlsRssFs ............................s.....p.hpha.spMD+hshhp...t.....s..auhuLShhSpR..ltsYE......t.h..N.s.E.Nh+GWaTusGMhY.LYs.....sD..h.s.c...Y.....p-....s..a....W..s..T...sD.h+lPGTTs....h....s...s...p...hs..cs....sss........................................t.t..hps...p...s..huGu....ph....s.s.......p..h.us.h...u.....M.......c..a......s...............s............................s...........p..............o................L....s..A.....+......KSWFhLsDcI.l.h.LGosI...p.........s..s.s.......s........p...s..s...............t.To.l.-.p....R+................s.ssh.pl..h.s..s.s....t.p.h..s...s..p..pp...............................psp...l.h..h....p....s......t..s...s..p...p..s..l.G...Y.a..F.h.p..p.....s.....s.......l.sh.p.pppp.......pGpWp..-I..N.....p..u...p....s....s....c......p....h......pspahplt.sHsps.......sspYuYhllPshspt........................................................................................................ 2 69 106 120 +3250 PF02884 Lyase_8_C Polysaccharide lyase family 8, C-terminal beta-sandwich domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_4840 (release 5.2) Domain This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen. 20.20 20.20 20.20 20.20 19.90 19.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.06 0.72 -3.96 29 875 2009-01-15 18:05:59 2003-04-07 12:59:11 12 50 698 45 82 585 8 76.70 25 8.46 CHANGED lplLpNssphQuVpcsp........slhussFa.ssu......psst...lssspsuslhl+cpsss.hplslu-Pspsssshp...............plh .........................hpllcNscslQuVccsc........slhuhsha..ssust........pssp.....l.pl.p....ptu..hhhl.+......cp..ssp..hcluhh.sPppppss.s...hh........................... 0 46 68 79 +3251 PF00062 Lys lys; C-type lysozyme/alpha-lactalbumin family Eddy SR anon Overington and HMM_iterative_training Domain Alpha-lactalbumin is the regulatory subunit of lactose synthase, changing the substrate specificity of galactosyltransferase from N-acetylglucosamine to glucose. C-type lysozymes are secreted bacteriolytic enzymes that cleave the peptidoglycan of bacterial cell walls. Structure is a multi-domain, mixed alpha and beta fold, containing four conserved disulfide bonds. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.91 0.71 -4.14 21 1035 2012-10-03 00:09:25 2003-04-07 12:59:11 15 9 315 768 356 1346 9 110.30 37 77.66 CHANGED KhasRCELu+pLKt.GhDGYpGlSLusWlChspaESuYsTpAhspN..ssSTDYGIFQINs+aWCscucTPtupNhCsIsCspLL-DDITcslpCAK+Il.DspGlssWlAW+shCp.cDLspa.tsC ...................................................................hhpRCELActLp.t...shs.att.pLssW.l.Cls.aESuasTpshs.......tss...sG.S.s-Y...GlFQINs+aWCp...s...sp..h.......s.......t.............shCpl.sCs...t.L...Lsc-lscsltC.A+p.Ih.p.....t.......p.......G.......h.......puW..........sWpt.hCp...pplsph.ttC.................................................................. 0 50 75 177 +3252 PF01810 LysE LysE type translocator Bashton M, Bateman A anon Pfam-B_1537 (release 4.2) & Pfam-B_7916 (Release 8.0) Family This family consists of various hypothetical proteins and an l-lysine exporter LysE Swiss:P94633 from Corynebacterium glutamicum which is proposed to be the first of a novel family of translocators [1]. LysE exports l-lysine from the cell into the surrounding medium and is predicted to span the membrane six times [1]. The physiological function of the exporter is to excrete excess l-Lysine as a result of natural flux imbalances or peptide hydrolysis; and also after artificial deregulation of l-Lysine biosynthesis as used by the biotechnology. industry for the production of l-lysine [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.14 0.71 -5.02 19 13176 2012-10-03 02:02:08 2003-04-07 12:59:11 13 5 2968 0 3080 9500 5031 188.60 20 91.83 CHANGED lshhhsPGPsshhlhppulp+uhttulhsshGshluDhlhhhlshhGluhllt.sshhhsllthhGusaLhahGhtshRsshpsps.pt..........................tssp.........thhpsahpGlhloLsNPpslLaahulhushls....tht.hthhhhhsuhhlusl.lWhhhluhhsuhhthhhpt.th...hlshlsuslh...................hhFulhLlhpu ...............................h..hhlsPGPss.h.hlhpp....u.l......p.....p.G.....h...p.tuh....hs....shG.h.sh.G.....h...l...h...h...h.hu...s...h...G...l..us...l..lt...t...s.s.h......l.h.p...l...l..ph....s.G...u....s..YL....l....a..lG....h...p.h....h.....+....u........s.......h...p.....t.....p...t...t...t.t..................................................................................t.tst...................shhp..h..a.hp..uh...hss...l.h.N.....PK.s..h...l.a.h...lu.l...h...s..p..h..lss.....................t..s..h...h....t...h....h....h......h.....s..h...s......h.....h.....h.......s.....s....h......h....a....h.....h......h......h....u.......h....h....u...s...h....h......t....t....h....h...p....p....t....p.hh......phl.s...t.l..h.G.s.lh...................h..h.huhhlh...t........................................................................ 0 649 1498 2340 +3253 PF03641 Lysine_decarbox Possible lysine decarboxylase Bateman A anon Pfam-B_741 (release 7.0) Family The members of this family share a highly conserved motif PGGXGTXXE that is probably functionally important. This family includes proteins annotated as lysine decarboxylases, although the evidence for this is not clear. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.77 0.71 -4.34 32 5042 2012-10-01 21:16:48 2003-04-07 12:59:11 9 29 3532 56 1524 3863 1410 131.70 28 49.24 CHANGED Msslscush..cuGup........slGlhsphlhs.pchs........................sptls-l..lhhpshapRKthhschu-AFlshPGGhGThEElhEllThhQlGhc...p+.PllLhs.su...aacsLhpalc.phlppGhlsssstp.hhhhsssscplhptl ...........................................................Mtusscush.......cu.u.u.p...........slGl..h....shlls...p.chs.......................................................ss...h..l...s...c..l.......lh..h..p..s..h..p.p..RKt.hhs........c........h........u........c.u.h.....lsh.P.........GG.hGT.......l-El.hEhloh..hQh.shp.....................pp...P..l...l.Lhs.st..........aaps.L.h.pa..l...p..thh..t.p..u...h...lstpshp..hhhls-ssp-lhp.................................................. 0 441 972 1290 +3254 PF02402 Lysis_col Lysis protein Mian N, Bateman A anon Pfam-B_1555 (release 5.4) Family These small bacterial proteins are required for colicin release and partial cell lysis. This family contains lysis proteins for several different forms of colicin. Swiss:Q02112 has been included in this family, the similarity is not highly significant, however it is also a short protein, that is involved in secretion of other proteins (Bateman A pers. obs.). This family includes a signal peptide motif and a lipid attachment site. 25.00 25.00 57.10 57.00 22.10 21.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.32 0.72 -4.38 9 52 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 28 0 2 37 0 45.10 67 96.50 CHANGED MK..Klhhhhlllls.hhLuACQANYIRDVQGGTVAPSSSucLTGlulQ ..MK..KlhhlhlLllu..hhlLuACQANYIRDVQGGTVuPSSSuELTGlusQ. 0 0 0 2 +3255 PF04971 Lysis_S Lysis protein S Finn RD anon Pfam-B_7633 (release 7.0) Family The lysis S protein is a cytotoxic protein forming holes in membranes causing cell lysis. The action of Lysis S is independent of the proportion of acidic phospholipids in the membrane [1]. 20.70 20.70 20.80 21.00 20.60 20.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.17 0.72 -4.29 7 1200 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 353 0 16 366 0 66.40 84 94.47 CHANGED M-KITTGVuYsTSAsusGYWFLQhLD+VSPSQWAAIGVLGSLlFGLLTYLTNLYFKI+EDRRKAARGE ....................................MEKITTG...VSY.sTSAsGTGYWhLQLLDKVS.PSQWuAIGVLGSLlFGLLTYLTNLYFKI+ED+RKAARGE.................. 1 2 5 9 +3256 PF01186 Lysyl_oxidase Lysyl oxidase Finn RD, Bateman A anon Prosite Family \N 20.30 20.30 20.30 21.50 19.50 17.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.49 0.71 -4.70 4 515 2009-01-15 18:05:59 2003-04-07 12:59:11 12 16 109 0 231 428 15 188.90 54 36.67 CHANGED PDLV.DsthVQtosYlpctsLa.LpCAAEENCLASSAhRssshsYspRhLLRFspRl+N.GpADFhPptsRaSWpWHpCHpHYHSMD.FoHYDLLsssus.KVAEGHKASFCLEDTpCs.GhhKRYACssa.pQGlSsGCaDTYptDIDCQWIDITDV+PGNYILpVpVNPsa.VsESDFoNNlhRCsh+YsG++laspsC+Iusu ...................................................................PDLV.cs.hlQto....sYlpchsha.LpCAt.EENCLu...u.....SAh..p....s....p...........h.s.....Y..s....hRhLLRFsppl+NtGpuD.F...h...P..p.tsR..asW.WHpC..H..p...HYH.SM-.FoHYDLLshssp.....KVAEGHKASFCLEDT.p.....C...........c.....Gh...p+..........R.............Y.........s..........C.s.s.....a..............p...Q.G.......losGCaDTYptDIDCQWIDITDVp.P.G.s.Y.ILp...............VslN..P....papVsESDaoNNsh+Cph...+YsGp....h..h..h..h.sCphs............................... 0 40 63 139 +3257 PF02401 LYTB LytB protein Mian N, Bateman A anon Pfam-B_1515 (release 5.4) Family The mevalonate-independent 2-C-methyl-D-erythritol 4-phosphate (MEP) pathway for isoprenoid biosynthesis is essential in many eubacteria, plants, and the malaria parasite. The LytB gene is involved in the trunk line of the MEP pathway. 18.70 18.70 19.10 19.20 18.50 18.50 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.86 0.70 -5.49 13 3664 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 3349 24 911 2662 2891 277.30 42 82.28 CHANGED IlLApPRGFCuGVcRAIphsEpA.Lppttu.PlYlpppIVHNppVVspL.RpcGshFlEsl........p-lP..........cGshVIaoAHGVuspVcptAccRsLpllDATCPhVTKVHptspphu+cGaclILIG+csHsEVhGshG.ss.........................spshlVpshc-ltpLshps.p+luhloQTTloh--st-IlstLccRascltt.........spssIChATpsRQcAl+sLutcsD.lllVVGupNSSNSsRLhElApcpGs.suaLl-ss-.................-lpt-Whpssp.slGlTAGASTP-hllpsVlp+Lpph ..............................................llLAsPRGFCAGV-RAIp....l.......V..Ep.....A........L.........c.........h.....a.............s.............s....PlYV++ElVHN+a.....VV-sL.c.p.+.G.sl..F..l...-..cl..........s-lP.......................................cs.s..h.VIFSAHGVo.sVppcAc.p......R....s.L..p..l..h..DATCPLV.......T....KVHhcl....t....+............h......sc...........c......G...........h....clllIGH.t.GHPEVcGThGphs........................................sth.h...L....V.......-.......s...s.....-.....D.......l.........t.........p.........L.......p.........l....p....s.........p..c..lshloQTTLSlD-Ttc.llctL.+p..+..a.P.p.lhs...............................Ph.pc-ICYATpNR.QcAV.......+.p..l....A.....p..p.......s......D.....lllVVG.upNS.SNSsRLhElA.pc................h.......Gt....su.aLl-ssp.................................-l....p....p..Wl.....p.....s.....s...p......sl..GlTA.....GASsP-hLVppVlsplpt.h............................... 1 319 613 776 +3258 PF04397 LytTR LytTr DNA-binding domain Yeats C anon [1] Domain This domain is found in a variety of bacterial transcriptional regulators. The domain binds to a specific DNA sequence pattern (see [1]). 21.50 21.50 21.50 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.83 0.72 -3.92 114 8937 2009-09-11 13:50:36 2003-04-07 12:59:11 10 26 2937 3 1808 6179 571 96.10 22 42.29 CHANGED sphhhlshp-Ihal..ps.pschlplhst.....spphhh.ptoLpplcpcls..stFhRsHRShlVNlppIpclp.......t..ph..plhhps.....spplslSRphhcpl+phlt ............................................p..hhhlshp-Ih.ah........ps...p.....s...c..hl...h....l...hop............sp....p....h....h.h....p..h.....s.L.p.....p.....l.c...p.....p.....L....s........pp.......F.h.RsH+Sa.lVNhp.plpplc..........................ph...pl....h..lps.............sppl..l..SRphhcpl+phh.t........................... 0 766 1373 1637 +3259 PF02370 M M protein repeat Bateman A anon Pfam-B_208 (release 5.2) Repeat This short repeat is found in multiple copies in bacterial M proteins. The M proteins bind to IgA and are closely associated with virulence. The M protein has been postulated to be a major group A Streptococcal (GAS) virulence factor because of its contribution to the bacterial resistance to opsonophagocytosis [1]. 20.80 15.60 20.80 15.60 20.70 15.50 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -7.31 0.75 -7.27 0.75 -2.86 59 653 2009-09-11 13:49:06 2003-04-07 12:59:11 11 24 24 1 3 651 0 21.10 53 14.87 CHANGED ptK+plEuchpcLppcpptlc ..EAKKclEA-htcLpp-pQh.c.............. 0 0 0 0 +3260 PF03855 M-factor M-factor Finn RD anon DOMO:DM08003; Family The M-factor is a pheromone produce upon nitrogen starvation. The production of M-factor is increased by the pheromone signal. The protein undergoes post-translational modification, to remove the C-terminal signal peptide, the carboxy-terminal cysteine residue is carboxy-methylated and S-alkylated, with a farnesyl residue [1]. 25.00 25.00 28.40 83.80 22.30 16.60 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.19 0.72 -3.97 2 3 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1 0 3 3 0 42.30 72 100.00 CHANGED MDShAssspSSSlVNAhNp.Ps-slp.pslKNYTPKVPYMCVIA MDSMANoVpSSSVVNAGNK.PoETLN.KTVKNYTPKVPYMCVIA 0 3 3 3 +3261 PF05034 MAAL_N MAAL; Methylaspartate ammonia-lyase N-terminus Bateman A, Moxon SJ anon COG3799 Family Methylaspartate ammonia-lyase EC:4.3.1.2 catalyses the second step of fermentation of glutamate. It is a homodimer. This family represents the N-terminal region of Methylaspartate ammonia-lyase. This domain is structurally related to Pfam:PF03952 [2]. This domain is associated with the catalytic domain Pfam:PF07476. 21.00 21.00 21.40 36.90 20.00 18.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.79 0.71 -4.46 4 186 2012-10-02 11:54:41 2003-04-07 12:59:11 8 4 173 12 42 142 2 155.20 62 38.70 CHANGED M+IccVLhTPGhuuFaFDDQtAI+sGAspDGFsYsGsPsTsGFpplRpsGEulSVtLVLpDGpVshGDCAAVQYSGAGGRDPLFhApcahsllpctltPtLlGRDlssahssAthh-ch.lstptLHTAlRYGlSQALLcAsAtsptsThsEVlsDEash ..MKIKpslhTsGhouFYFDDQpAIKsGAsHDGFhY.sGcPVTtGFsuVRQAGEslSVtLILEsGuVAhGDCuAVQYSGAGGRDPLFLA-cFIPhlpccI+PhLlGRDlcuFhssAchFDcL...+l.........D...Gp.hLHTAlRYGlSQALLDAsAhAs.s+hhsEVVsDEap................ 0 16 27 36 +3262 PF03281 Mab-21 Mab-21 protein Mifsud W anon Pfam-B_4530 (release 6.5) Family This family contains Mab-21 and Mab-21 like proteins. In C. elegans these proteins are required for several aspects of embryonic development [2-3]. 20.10 20.10 21.30 20.50 19.10 19.80 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.92 0.70 -5.28 33 746 2012-10-02 22:47:23 2003-04-07 12:59:11 9 9 110 0 456 647 1 241.40 20 57.39 CHANGED uhh-slcltssscach.........hh.lpt.shhp...................hppptphsshshl+l.............................hps.phhpcals........................tsshlssp+lhstFt.sllppu...............................lsphphps............h.phttpssslpltlppt.........thslDllsulch.shWPp.u.pa.hh.............phWPs.thhpplts..pGhaLls+.........................................tshsshpWRlSFopuEptLhpph............sss++cClpllKtlpcphh...........hsslsoYHLKTllhapC-+hPpt..pWpp.sslupplhsllccLhpCL....ps+plPHYFlP.....phNLhpsh..spstlpphspplhclhp ......................................................................................................................................................................................................................................................................................................................................phh................................................................................................t.hhsst...phhphh..ph..lt.s..................................................h.p..t.t.................t...ts....hpl.ht.............h.hplhPshph...........t................................................sh.Llst.......................................t..t.t.pss.tWhluFstsEptllp.h............sus+.hpsLpl....h..+s..l.tcpth.............................thtsls.sYHL.+ollh..ahspch...st....p....Wpp......ptlupph.tll.tLhpsL....................ppcp..hspaahs..................phshhpth....tth..hh........t............................................... 0 90 123 236 +3263 PF01823 MACPF MAC/Perforin domain SMART anon SMART Domain The membrane-attack complex (MAC) of the complement system forms transmembrane channels. These channels disrupt the phospholipid bilayer of target cells, leading to cell lysis and death. A number of proteins participate in the assembly of the MAC. Freshly activated C5b binds to C6 to form a C5b-6 complex, then to C7 forming the C5b-7 complex. The C5b-7 complex binds to C8, which is composed of three chains (alpha, beta, and gamma), thus forming the C5b-8 complex. C5b-8 subsequently binds to C9 and acts as a catalyst in the polymerisation of C9. Active MAC has a subunit composition of C5b-C6-C7-C8-C9{n}. Perforin is a protein found in cytolytic T-cell and killer cells. In the presence of calcium, perforin polymerises into transmembrane tubules and is capable of lysing, non-specifically, a variety of target cells. There are a number of regions of similarity in the sequences of complement components C6, C7, C8-alpha, C8-beta, C9 and perforin. The X-ray crystal structure of a MACPF domain reveals that it shares a common fold with bacterial cholesterol dependent cytolysins (Pfam:PF01289) such as perfringolysin O. Three key pieces of evidence suggests that MACPF domains and CDCs are homologous: Functional similarity (pore formation), conservation of three glycine residues at a hinge in both families and conservation of a complex core fold [1]. 20.70 20.70 20.80 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.30 0.70 -4.64 69 1344 2012-10-01 20:08:01 2003-04-07 12:59:11 14 63 273 9 771 1199 5 197.40 16 29.28 CHANGED hshsssspa.pph........phpppppphhhtpspsphhphph...ts.ph........LsppF.....hptlppLPsp.as.sp.............................ahphhcpaGTHhlspsphGGphthhhph.......spsplpptphpttc............hssthth.h..................thphptstpspppppppp..s......ppthppthshlhGGss................tsspsappWtpolpp.pP.ssIphp.....................................lhPl....ppLlpp...............tppslppAl...ppY .........................................................................................t.......th.pt..........t.tppp.p..h....h....h...th...h...p....hh.p.hph........p.pph..................LstpF.....hptlpp.L....sp.p...h.s.st...........................................h..phl.ppYGTHal.ss.ss.h..........G.......Gphphhhhh........................sppp...hp.p.pthptt.p...................hpht.hth.h..............................ht.p.hstp....tt....t..p......tt....t......................................pt....p.h....hh...GGp.................................thppW...hpolt........t..tP...hlp..h.p....................................................h.Plhpll.....................ht..hptAh........................................................................................... 1 245 380 547 +3264 PF03523 Macscav_rec Macrophage scavenger receptor Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 45.90 44.70 23.80 18.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.30 0.72 -4.21 4 55 2009-09-11 06:31:35 2003-04-07 12:59:11 8 6 27 0 17 49 0 48.90 75 13.96 CHANGED MEcRIQ.ISDocANLlDoE+FQNFShTTDQRhNDVLLQLNSLlsolQtH ....MEKRIQaISDhEANLIDoEHFQNFSMsTDQRFNDlLLQLSTLhSSVQGH.. 0 1 1 2 +3265 PF03817 MadL Malonate transporter MadL subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 27.90 27.70 24.60 23.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.67 0.71 -4.27 4 182 2009-09-11 09:12:09 2003-04-07 12:59:11 8 1 176 0 51 151 39 121.60 57 91.51 CHANGED MIIYGVALLulCpLsGlhlGshLGshLGVKANVGGVGIAMlLLIhspphLtK+GtLsphop.GVtFWuAMYIPIVVAMAApQNVluALsGG.lALLAuluAllVshhhIslls+.u..puph.sAlE .MIIYGsALLAlCpLAGlhlGDlLGshlGVKuNVGGVGIAMlLLIhh+haLp++Gh.hspcoEtGVuFWuAMYIPlVVAMAApQNVVuALpGGPlAlLAuluuVslCsssIslLuRhu..+tps.....t................ 0 9 20 35 +3266 PF03818 MadM Malonate/sodium symporter MadM subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 59.60 59.60 20.50 19.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.77 0.72 -4.40 4 185 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 183 0 51 137 41 59.80 57 23.68 CHANGED a-.lsssLspNGLITuFAllGlhMaVSahLSthLT+G+lHGSAIAIhlGLVLAYVGGssT ......-hlpKsls+NGLlouFAlVGllMalSshLS++LThGRVHGSAIAIlIGLlLAYhGGshT... 0 9 20 35 +3267 PF02545 Maf Maf-like protein Mian N, Bateman A anon COGs Domain Maf is a putative inhibitor of septum formation [1] in eukaryotes, bacteria, and archaea. 19.40 19.40 19.60 19.70 19.10 19.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.13 0.71 -5.19 12 5338 2012-10-01 20:37:09 2003-04-07 12:59:11 9 24 3690 6 1449 4011 3017 188.60 33 91.91 CHANGED h.llLASsSPtRpplLcpht..lshpshsuph-Ep.....tp..PtthshcLAppKAhu.luphtss...........sllluADTllhls.GclhsKPpst-cAhphL+phuGps...tpVhTulslhpptp.hpsh.....s.TcVpFpp.lscccIpsYlpos-slcpAGuaslpGhGuhhlcplcG.sh.sVhGLPl.tlhphLpphsh.........hts.h ..............................................................h.pllLASsSPtRp..pL.L.p.p.l......G.........l.s...........F.p.h....h.........s.........s........c.......l........D.Es................h.s...-.......t.........................P......p........p...h...........V...h.....+.....LApt.K.A.p..u..........l.s..p.h..hss......................................................tlllGuD.ol........l.s..............l.................s.....G..........c.......l....lG.KP......pstc...cAtp.....hLp......t......h.......S.G..pp...........apl..hT..u..l....s..l...h.....s...s....p.....p..t....htph......................shocVpF+p....Lo...-p...-...I...p...s....Ylt..s..s...-..P..h..c.pA..Guasl..p.G.h.G.s.t.h.l.cc.l.p..........G..shpsllGLPLhphh.ph.Lpp.s........t.................................................. 2 469 903 1204 +3268 PF02792 Mago_nashi Mago nashi protein Bateman A anon Bateman A Family This family was originally identified in Drosophila and called mago nashi, it is a strict maternal effect, grandchildless-like, gene [3]. The human homologue has been shown to interact with an RNA binding protein Swiss:Q9Y5S9 [1]. An RNAi knockout of the C. elegans homologue causes masculinization of the germ line (Mog phenotype) hermaphrodites, suggesting it is involved in hermaphrodite germ-line sex determination [2]. Mago nashi has been found to be part of the exon-exon junction complex that binds 20 nucleotides upstream of exon-exon junctions [4]. 20.10 20.10 22.60 22.10 18.60 18.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.64 0.71 -4.50 3 383 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 275 17 226 307 1 132.20 69 83.18 CHANGED DFYVRYYVGHKGKFGHEFLEFEFRsDGpLRYANNSNYKNDTlIRKEsFVSEuVLKElKRIV-DSEIlKEDD-NWPEPDKlGKQELEIlhsNEHISFsTuKIGSLADVQNScDPEGLRlFYYLVQDLKCLVFSLISLHFKIKPI ...........................................................pFYLRYY...lGH+.............GKFG.HEFL...EFEFR.....s..DG.....+...LRYAN..N....SNYKNDshIR...KEs..aV.ppuVhcE.lKRII.c-....SE........I..hK.......EDDshWP.PD+..l.GRQELEIllGsE......HISFTTo.KIGSLlDVspScDPEGLRlFYYLVQ.DLKCLVFSLIuLHFKIKPI....................... 0 80 120 175 +3269 PF03082 MAGSP Male accessory gland secretory protein Bateman A anon Pfam-B_256 (release 6.4) Family The accessory gland of male insects is a genital tissue that secretes many components of the ejaculatory fluid, some of which affect the female's receptivity to courtship and her rate of oviposition. This protein is expressed exclusively in the male accessory glands of adult Drosophila melanogaster. The proteins are transferred to the female fly during copulation and are rapidly altered in the female genital tract [1]. 25.00 25.00 115.50 47.80 21.00 20.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.82 0.70 -5.00 2 35 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 7 0 1 123 0 235.40 77 99.83 CHANGED MNQILLCS.ILLLhFTVAsCDuEpQLDSuhcL.....KSAsLKNVAPKNstTQAcIsKDDVALKsuKKGDYlMDI-lSDhPLDDYPINpSKShKsSSh..ssI.....LsD....chNQGSNQ.ALKALppRLlhEQNNsLhLRNHSl.LMpEIEARKTDIIpsRQLNlDLELELpolpR+L.EhN..lQNsRKSTKsCKKRsSKD.APP.sNQhQEs.V+NTYRNKYLTLLppLuQKIN.EIApV...ATDVPTtpsPSpGN.sTh ...................LhFsVANCDGEHQLDSSl.D..L..K.S.DppKSAVLKNVAsKNDATQAEIAKDsVALKSGKKGDYVMDI-.V.SDMPLDDYPINNSKSRKNSSTLPSP.I.....LTD....KLNQGSNQIALKALKHRLVMEQNNNLFLRNHSVSLMNEIEARKTDIIQARQLNIDLELELEALKRKLSEMN..VQNARKSTKSC.KKRPSKDIAPP.sNQLQEVIVKNTYRNKYLTLLTQLAQKINYEIANVNNPATDVPTGKSPSEGNPSTT................... 0 1 1 1 +3270 PF04112 Mak10 Mak10 subunit, NatC N(alpha)-terminal acetyltransferase Wood V, Finn RD anon Pfam-B_9176 (release 7.3); Family NatC N(alpha)-terminal acetyltransferases contains Mak10p, Mak31p and Mak3p subunits. All three subunits are associated with each other to form the active complex [1]. 20.70 20.70 20.80 22.90 20.30 20.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -5.03 18 303 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 263 0 218 301 3 149.50 30 21.95 CHANGED +sGcLV+-spFsLF-uhuAlEIh-sKMDoGhlt.s.pt....tlcpuhssu.........pslshtclluIhDpLhss.huWhpGa.oLuQTVhoClYlcpl......t...t.............................ssplhcpVLpuaslulhphhshltshlpsutlaEE..EDhsspshshshhsphs.htchhshL ........................GcLl+st.h.FsLFEuhuAlE........lMDPKMDuGhlsst.p.......th-p.shcss....................csLs.tElluIhDplhsp.hsWhpGa.sLuQTlaTslYl..cp..............................................................sp.l.p.sL+....uashulh+hsshspphlspsthaE..E..E.DF.s.shshp..hhsphs...h....h........................................................... 0 77 120 182 +3271 PF04874 Mak16 Mak16 protein C-terminal region Mifsud W anon Pfam-B_4960 (release 7.6) Family The precise function of this eukaryotic protein family is unknown. The yeast orthologues have been implicated in cell cycle progression and biogenesis of 60S ribosomal subunits. The Schistosoma mansoni Mak16 has been shown to target protein transport to the nucleolus [1]. 20.30 20.30 20.30 21.30 20.00 19.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.49 0.72 -3.93 9 337 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 299 0 235 325 3 98.70 42 33.08 CHANGED lspKlc+REppREcKALlAA+L-+sIE+ELl-RLKpGsYGD..aNhsppsacKhL-tcc.cppsEtE.-....p-Ep--uchEaVuDp..-..t...-lpDhED .....................h.spKlcRREppREcKALsAA+lEpuIEKELLERL+p.Gs.YG...Dh..........h.....Nh.....spphacKsL........c..............p.......p.-...........................c............p............p...........s.........c......p.......-....t........-...............c..........t......-.......-........-....-.........E..c..-....................s..ch.....Ea..V..p.D...t-.ptp..-hp.D.hE............................................................. 1 82 130 195 +3272 PF01274 Malate_synthase Malate synthase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family \N 20.00 20.00 20.00 20.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.84 0.70 -6.50 16 2722 2012-10-10 15:06:27 2003-04-07 12:59:11 17 11 2102 17 690 2600 3654 522.80 38 94.48 CHANGED hslhsshs.ptscILTpcALsFlApL.pRcFsspp+pLLppRpchQtclDpGp.hs..........cFL.PETsalR.-ssWps.us.......................................................................................................................................lsssLtDR+VEITGPsD.RKMllNALNouAhsaMADFEDSsuP...........oWpNllpGQlNLpDAlcspIsapst.tsKpYpLs.......cphuhLhVRPRGWHLsE+Hlh.lDG.....-slsGu...lhDF...GLaFFHNt+ph.......lupGtGPYFYLPKMEuahEA+lWN-lFshApDhlGlPRGTI+ATVLIETlsAuapM-EIlYpLR-HuuGLNCGRWDYlFShIKshpsp.sshlLPDRstVTMssP....FMpAYschLlcTCH+RusaAhGGMAApIPI+cD.tANstAh-pV+tDKlREspsGaDGoWVAHPuLsslshpsFpphh.spPNQhphh+pc...plo....tsDLLs.thPsu.......phTpcGlcsNlplGlpYltAWL.pGlGC..VPlhp...LMEDAATAEISRsQlWQWl+HGVshc-....Gcplohphhtpll.cEphstlt.t.u....tsscap.Atphh.t.sh.uscasDFLTh.uYscls .............................................................................................................h..hht..s..tptl.s.sphhphlspl.hpchsPcppp.L..LstRtphQtplDthp..hs...................sFL..pE..hs.lh..-.......ssh..hh..ts....................................................................................................................................................l....h-Lp.cp......s...ch..sG..s..s.-.t.........t.hlhs...s....l.sts.shshht.DhED....SlAs....................sW.slhp.Gplshp.....suhsGph....hp....s......s.s+.Yp.ht...............tshulLhlRshGaHhscpplh...hcG.........ptlsuu...lhDh......uLhh.h..Hshpsh.................u+..u.uusYhYlPK.........h........cu.pEAthWs-l......FshsE......chlGls..........p.GT.IKsslLlEpl.Ashphc......EllatlR-+lstlNs.GhhDhhhs..I+ohhpt.ss.hl...Rps...shhs.pP....alpAYpch....lpo..............................hh..huGhu.uh.h......ht..ps....................s...sphhs..chhtsKhtcspsGt......sssWVsp.PshAsh.ththapph......spssQhpl........tppp.....h.hs..........hcp.....LLs.....s..u...............pchp.ptlcsNlpshl.........tYl.tWl.pGs.GC..VP.hs...........LM.EDtAThcISppplhpWl+H.t...........h.s...hp..s...............h..........sohphht....phl.spp.t...................................spsta..p.h...........usph.s.lsh.uhp.l................................................................................................................................. 1 193 413 579 +3273 PF02330 MAM33 Mitochondrial glycoprotein Mian N, Bateman A anon Pfam-B_17905 (release 5.2) Domain This mitochondrial matrix protein family contains members of the MAM33 family which bind to the globular 'heads' of C1Q. It is thought to be involved in mitochondrial oxidative phosphorylation and in nucleus-mitochondrion interactions [1]. 20.90 20.90 21.40 21.50 20.50 19.70 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.39 0.71 -4.56 7 515 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 299 14 330 513 1 177.80 22 68.50 CHANGED LssEIc.E+c.th........pcsLs.hsh..shc.pssssEscLh+Ks.uuEcIpVsFNlspsls.sass-p-.spht...-pp.p.sssssh.s.ssppD.s.h.shsl-hphPtsp.lsacspA.....psDh..Fslc-h.htsou-s........EW..c...as.shcsLD.sLhD.thhcaLt-RGlDsshuc.LVphusshE+pEYIsaLEclKpFl .........................................................................................................................................................Eh.......t.............................h...t............th.....p.sttth.Lh+ph.....tsEplpl.h...p.....hs...p..................................t...t.....t........t.......t...................................t..........................................................h...........h....s..l.sh..p..c.s..s..ps....lph.ps.sh............................ss........htIcpls..h.h.tp.sp..................t-h.......tt.h.....Yh...GPsassLD-pLpc.tlhcaLc.cRGlssphusal.cahphKEppEYlpWLc....slcpFl................................................. 0 97 176 265 +3274 PF02157 Man-6-P_recep Mannose-6-phosphate receptor Mian N, Bateman A anon IPR000296 Family This family includes both Cation-dependent and cation independent mannose-6-phosphate receptors. 20.40 20.40 20.50 20.40 19.50 19.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.98 0.70 -5.39 2 195 2012-10-02 14:19:21 2003-04-07 12:59:11 10 3 147 24 129 261 1 170.90 29 61.70 CHANGED M.PhpusWRTtLLLLLLhuVAVRESWQhEEKoCDLVGEKsKEScpElALLcRLpPLFNKSFESTVGQusDhYSYlFRVCREAuNHSSGAGLVQINKSNsKETVVGRhNETpIFNGSNWIMLIYKGGDEYDNHCG+EQRRAVVMISCNRHTLAsNFNPVSEERGKVQDCFYLFEMDSSLACSPElSHLSVGSILLVhhASLVAVYIIGGFLYQRLVVGAKGMEQFPHLAFWQDLGNLVADGCDFVCRSKPRNVPAAYRGVGDDQLGEESEERDDHLLPM ......................................................................................................................................................................................................................................................................................................................................................+p...t...h...h...Cs....p.p.......t........h...hs..p....p.p.ChY..hFEh.c......S........thAC.........s...s.......t..t....s.....p.....l..u.....s..G....u..l...h.h.llh.hhh.ls..VYl.l....GGhhYpRhVhtt+GhcQhPphuhW.tslhshh..t..D.hh.h..h...................................................................................... 0 39 63 100 +3275 PF01232 Mannitol_dh Mannitol_dh_N; Mannitol dehydrogenase Rossmann domain Bateman A anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.80 21.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.86 0.71 -4.27 16 4952 2012-10-10 17:06:42 2003-04-07 12:59:11 18 14 2308 4 770 3058 403 147.30 28 33.49 CHANGED .phlHhGsGNhtRu....alspLlsp.ssh-hshs-ls.pssst......LptQcphhslltp.......sscpsclluslssh.shpts..psllcthsc..sclVoholst.............sshPpIttslspst............Lp+RpstshsPhslluC-Nh ......................+hlHhGhGshtRua.s.ah..sc.L.Ls.......p.....s....s............h..............c..huhscV.s...hssst.........tL...p.t.Q....c...p.lh.s..h.lht..s.........tps...p...pscll...u.u..Vpt..s....l............s...s......t....p....-........t....t....ll....p.th.sp.stsclVohslTEtG.......ht...ttt..h.....s.s.t....P.h..Istsl.tps.......................................hlhth.lp.+R.p.....t.s.....s.h..p.s.hslluC-Nh.......................................... 2 193 431 596 +3276 PF01050 MannoseP_isomer Mannose-6-phosphate isomerase Finn RD, Bateman A anon Pfam-B_899 (release 3.0) Family All of the members of this Pfam entry belong to family 2 of the mannose-6-phosphate isomerases. The type II phosphomannose isomerases are bifunctional enzymes. This Pfam entry covers the isomerase domain. The guanosine diphospho-D-mannose pyrophosphorylase domain is in another Pfam entry, see Pfam:PF00483. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.96 0.71 -4.60 17 2703 2012-10-10 13:59:34 2003-04-07 12:59:11 13 9 1834 0 687 2749 2876 136.20 49 31.58 CHANGED lGlpsLVVVpTcDAlLVuc+ccsQDVKclVcpLKtpsRsE..tphHpcVaRPWGpYcsl-pGcRapVKRITVKPGp+LSlQhH+HRAEHWlVVuGTAcVThspcshlLsENESTYIPhGshHpLENPGKIPLELIEVQSGuYLGEDDIVRhED ..................................................lGlcDlllVpTtDAlLluc+spsQ-VKp........lVp.p.LKtp.s+..p.-......hp....H..pp..la.R..PWG.p.Yc...s.....l.....-.....t.....G.....p.....R.....ap...V..K.....R...I..s.....V.+PG.t.p..L..S.l.Q..h.Ha.H..R..u..E.H...W.l.V.V.s.GT.Ac..VT.h.....s.....s......c............h.l........l.s......c.....N-......S.........h.YIPh..G..shHp.L...cN........P...G.c...l..P.L...c...lI.E....V....Q.oGsY..LsEDDIVRhpD............................................. 0 212 432 567 +3277 PF05007 Mannosyl_trans Mannosyltransferase (PIG-M) Moxon SJ anon Pfam-B_5638 (release 7.6) Family PIG-M has a DXD motif. The DXD motif is found in many glycosyltransferases that utilise nucleotide sugars. It is thought that the motif is involved in the binding of a manganese ion that is required for association of the enzymes with nucleotide sugar substrates [1]. 22.00 22.00 22.20 22.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.89 0.70 -4.74 6 328 2012-10-03 03:08:05 2003-04-07 12:59:11 8 8 285 0 238 382 29 248.70 35 61.37 CHANGED ISTRGNu-ulluhlllhsLaLlpKpp.........hhtAullaG.hulHhKIYPllYslslhL.l.pshtcQ..........oshsphp....pLlsh..t.lhhl..lsoLhoFsusshhhYhhYG.pFL-csYLYHlhRcDhRHNFSsaahLLYLspAsphh..Sphl....tlhAFlPQhlLlhhh.uhpa......hcsLsashFlpTFsFVTaNKVCTSQYFlWYLshLPLlhsp..............h+h.SW++uLsllhLWhhsQuLWLhsAYhLEFpGhNsFh..laLuusLFFlsNsalLtQll ..........................................................................................ISTRGsu-ullssh.lhhhL..a.hl.h.ppc................................hhhAulhhG.luVHhKIYPlIYuhslhh.h.l...tp.p...t.tt.................................sh.hthhh.............ph.hsh..ptlh.hs......hsohh.oFhsls.h..hhYh.hY.G..hpFLpc.sY.LY.H.ls.Rh.DtRHNFSs.Y..h.hl.YL.s.....ss............pt...................u.......l..............................t.hhu...F..l....PQ..ll.L.h.h.h.....l...s.h.th.........................tccLs.h....shF.......h.....QT.hs...FVsF...NK..........VCTSQYFlWYlshLPlhlsp...................................p.h.......s..h.t...p....u..l...h.hl...h..lWhhu...QuhWLh.u.Yh........LEF.Gh.....s.......sF.h...lahuulhFhhsNshlLh.h....................................................................................................... 0 88 140 202 +3278 PF01575 MaoC_dehydratas MaoC_like; MaoC like domain Bashton M, Bateman A anon Pfam-B_297 (release 4.0) Family The maoC gene is part of a operon with maoA which is involved in the synthesis of monoamine oxidase [1]. The MaoC protein is found to share similarity with a wide variety of enzymes; estradiol 17 beta-dehydrogenase 4, peroxisomal hydratase-dehydrogenase-epimerase, fatty acid synthase beta subunit. Several bacterial proteins that are composed solely of this domain have (R)-specific enoyl-CoA hydratase activity [2]. This domain is also present in the NodN nodulation protein N. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.33 0.71 -4.74 32 6997 2012-10-02 20:54:35 2003-04-07 12:59:11 14 82 2642 72 2491 6367 2455 119.40 20 29.77 CHANGED phst.....hspshptphshslo...............sphthaAhhSGDhNPlHlDst.hAphutFs.ssIsHGhhohuhstthlhpthss........sthtthps+FhssVhsGsplpsphtpsu..hpttt.phhphpsshhsssss ...........................................ht..............t.hslo...................ppc..l.hhaA.......t..h..oGD.h..pP..lH...h...Dsp...hA.p....p.....s.....s.....F......s....p........h..I.s....H....G....h...h....o........h....u...h......h.......s...t..h...h...s..p..t.hs.s...............hsth....s....h...........p..l.+F..h...p..P.V.h..s....G.-.....s.l.p....s.p...h.plhs.....hp.tt...t..............st.t................................................... 0 656 1497 2090 +3279 PF03642 MAP MAP domain Bateman A anon Pfam-B_1396 (release 7.0) Family This presumed 110 amino acid residue domain is found in multiple copies in MAP (MHC class II analogue protein) Swiss:Q9Z4J2 [1]. The protein has been found in a wide range of extracellular matrix proteins [1]. 24.30 24.30 25.40 24.50 23.80 23.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.89 0.72 -3.76 10 1325 2012-10-01 21:38:54 2003-04-07 12:59:11 8 6 160 5 10 373 0 87.10 51 73.35 CHANGED VPYTIuVNGsSssltScLsFsscpploYpDLssKVKSVLcsDRGIo-c-L+hAKpApYTVaFKNGsKKVVDLKScIaTtNLFsupDIK .........VPYTIsVNGTSssIhSpLsFspspploYpDLssKVKSVLts-RGIo-hDL+hAKpApYTVaFKNGsKKVlcLKusha.TuNLlsssDIK....... 0 8 8 10 +3280 PF02991 Atg8 MAP1_LC3; Autophagy protein Atg8 ubiquitin like Griffiths-Jones SR anon Pfam-B_1384 (release 6.4) Domain Light chain 3 is proposed to function primarily as a subunit of microtubule associated proteins 1A and 1B and that its expression may regulate microtubule binding activity [1]. Autophagy is generally known as a process involved in the degradation of bulk cytoplasmic components that are non-specifically sequestered into an autophagosome, where they are sequestered into double-membrane vesicles and delivered to the degradative organelle, the lysosome/vacuole, for breakdown and eventual recycling of the resulting macromolecules. The yeast proteins are involved in the autophagosome, and Atg8 binds Atg19, via its N-terminus and the C-terminus of Atg19. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.31 0.72 -4.05 9 1011 2012-10-03 10:59:06 2003-04-07 12:59:11 11 11 362 37 573 990 106 100.50 47 79.62 CHANGED +RptEuc+IRpKYPDRIPVIlEKsspoc.lPslDK+KYLVPuDlTVGQFhalIRKRIpLsP-cAlFlFVss.olPsTushMuslYpcc+DEDGFLYhsYSuEsTFG ....................................................pRptEup+.IR....pKaP-Rl.......P.................VIlE.....+sp...............+..uc....ls.s..l.D...K.....+K.......a.L.V.....P....s..DlT....V...uQF....hal.I..RKRl.........p.............Ls.........s.............E.cAlF.l........F.V.........s..s.....s........l.P....s.........o.u..u........hMuplY-c.cK.D.E.DGFLYlsYSuEssFG...................... 0 190 288 420 +3281 PF00414 MAP1B_neuraxin Neuraxin and MAP1B repeat Finn RD anon Prosite Repeat \N 20.60 20.60 22.30 20.60 18.00 20.50 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.94 0.73 -6.21 0.73 -4.33 11 173 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 24 0 75 166 0 16.90 54 3.54 CHANGED TT+oP-susYsYEptEK ..TT+TP-sSuYSYEpoE+. 0 5 5 10 +3282 PF01124 MAPEG FLAP; MAPEG family Finn RD, Bateman A, Brock T anon [1] Family This family is has been called MAPEG (Membrane Associated Proteins in Eicosanoid and Glutathione metabolism). It includes proteins such as Prostaglandin E synthase. This enzyme catalyses the synthesis of PGE2 from PGH2 (produced by cyclooxygenase from arachidonic acid). Because of structural similarities in the active sites of FLAP, LTC4 synthase and PGE synthase, substrates for each enzyme can compete with one another and modulate synthetic activity. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.87 0.71 -4.53 319 2722 2009-01-15 18:05:59 2003-04-07 12:59:11 13 15 1609 34 1052 2147 1247 123.10 21 86.01 CHANGED hh..hhs....slhshhhhhlshhssthRhttth.......................s.....................ts....hhpR.....................................stRAatNthEthshFhshhhlhtl.t......................shhsshhuhhalsuRllas.hsa...hts......hsh..hRs.hs....ahluhlshhslhlhslh .........................................................................................................................h.hhhsslhslhh.hhhsh.ls.p.h.R.h.p.hp..s..h.......st.............s.................................hs.....thpp...............................shR..sa.tNsh.EhhPlal..s.h.h.lh.hth..s......ss..............s.......shhsth..hu.......hlalsuRlhah.hua..........hts.......hhh.......hRt..hs....hh..hshhshlhhhlhsh.h............................................................................. 0 315 546 790 +3283 PF01914 MarC UPF0056; MarC family integral membrane protein Enright A, Ouzounis C, Bateman A, Dlakic M anon Enright A Family Integral membrane protein family that includes the antibiotic resistance protein MarC. These proteins may be transporters. 23.60 23.60 23.70 23.80 23.50 23.30 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.15 0.71 -5.20 12 4024 2012-10-03 02:02:08 2003-04-07 12:59:11 12 4 2220 0 892 2286 718 199.30 30 95.29 CHANGED hphhhtshlsLhhlhsPlGslPlFloLhpphstcc+pplhh+sslhuhllLhlFhhhGchIhp.hFGIslsuh+lAGGIlLFhIuhcMlpup.pt.....ptpppEtpt...h-plullPLAhPLlAGPGuIToshlhhsptss.........hhhshlulhLshhhshlhhh.ushlhRhlGctGlsslsRlhGllLsslulQhllsGlpshh ............................hhpshlsLhslhsPlG..slPl.Fluls.p.ths.s....tpR....p....c......h.....s.....hp.....uslhsh..lll.h.l.hhahGphlLp.hFGIolsuh+I.AGGlllhhIA.h.c..Ml.h.s.ptst.................t..p.s..p..p.....p....t......t..................t......................t....ps..l....ulVPLAhP..lhAGPGs.I.uslllh...u.s.p.h.t.s..........h...t..h..h.s..h....h.....l.u.lh.l.s.h.l....hs.a.h...l...h.t.h.us.hlhRl....L..Gp...s....G.......l....sllsRlMGllLsslulphllsGlpth...................... 0 241 508 711 +3284 PF02063 MARCKS MARCKS family Mian N, Bateman A anon IPR002101 Family \N 19.50 19.50 21.30 20.30 19.10 19.30 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.43 0.70 -4.56 4 119 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 35 1 60 106 0 175.90 36 96.19 CHANGED GAQFSKTAAKGEAA.AE+PGE.AVAuSPSKANGQENGHVKVNGDASPAAAEuG.KEElQANGSAPA...EEsuKEEAAou...............tAAsccEAuAu...........sAEuEsAEPuSP...AEGEuA.....opTp..cAtssATPSsSsETPKKKKKRFSFKKSFKLSGFSFKKNKKEuGEGAEuE.GAsAt..tEtAK--AAAAAsEAsst..EpAtAsuEEAuAAus.pttttcEs..AusupPp..EsKs-EA..APEK.PsuEEspsAEE...pKsEEKsAEEAs...AsuAA...sEAPuu.......EpEAAsAEEP.........AAs..sQEAsSEsSPt..AssAE ............................ss+t-ss.s-...t..s..ss..sKsNGQ....ENGH.s+.NGshost.........................................................................spu-.sh-sss..........................u...ts.s...sspETPKKKK.+FSFKK.sFKLSGhSFK+s+KEsutsupst..uss.t...t-ttttt.t......ts.t......t....-ttt.ss..ps....t...........t.t...tttst..t..E..t..stts...t..pt...st...t..................................................................................................................................... 0 3 11 27 +3285 PF02124 Marek_A Marek's disease glycoprotein A Mian N, Bateman A anon IPR001654 Family \N 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.45 0.70 -4.86 15 236 2012-10-03 02:52:13 2003-04-07 12:59:11 10 1 52 0 0 276 0 201.10 41 44.49 CHANGED YYPspSsclpWFpsGp.sVc.spalcsshphts-.G.lhopsSslol.........ssssstsh.PPolRCp......lsWa+cuVuppRh.......suussPsVYhtPplol.cFtsGcAVCsApCVPcs..VplpWhlsDt....sssssshtsGsCsp+PGLVNlRSshslSttcushcYTCpLsGYPpslPsFpcotsaDASPpssutshllsll...uVlsGlsslullsllsslClhp ........................................................YYP..psVchpWFpcst.tVt.ssplcsthptpss...G..FopVSsVT..........t..ssssps..PsslpCp......hsWaRD..uVs.pRh.......sAsuhPtVhs.hPslol.....tF....t.s.uasVCTAtCVPcG..VshsWhlsDss....sAspsshpsG.Csc+PGLsslRSshPl..S..t.....t..u...-YoC..+L..s..GYPsslPs..h......ptptoa..p.s..s..Ptsso...V.hphl...ulhhhlhshGllslhshlhlh....................................................... 0 0 0 0 +3286 PF01047 MarR MarR family Finn RD, Bateman A anon Pfam-B_269 (release 3.0) Family The Mar proteins are involved in the multiple antibiotic resistance, a non-specific resistance system. The expression of the mar operon is controlled by a repressor, MarR. A large number of compounds induce transcription of the mar operon. This is thought to be due to the compound binding to MarR, and the resulting complex stops MarR binding to the DNA. With the MarR repression lost, transcription of the operon proceeds [1]. The structure of MarR is known [2] and shows MarR as a dimer with each subunit containing a winged-helix DNA binding motif. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.37 0.72 -4.26 38 19890 2012-10-04 14:01:12 2003-04-07 12:59:11 17 44 3773 111 4329 25839 3062 58.30 23 35.57 CHANGED lohspatlLthltppss.hshs....cLApphplspuslTphlc+LEcpGhlpRppsspD+Rpl .................................lo.sph.h.l...L...h....h....l......t.....p.....p.........s........s.....h.....s.hp............cL....uc.tl..t.....l....sp........u.o.lo..p...hlccL..Ec..c..G..hl..pR.p.p..s.....p..D+Rt.......................... 0 1317 2694 3560 +3287 PF02064 MAS20 MAS20 protein import receptor Mian N, Bateman A anon IPR002056 Family \N 23.40 23.40 23.60 30.10 23.10 22.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.45 0.71 -4.20 33 336 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 234 22 220 310 0 124.60 35 71.87 CHANGED lsAGl.u..ushFlGYClYFD+KRRSDP-aK+KL+ERR...+cptpp..........ttsssstl.P-h.pD....pcslpcaFLp..ElphGEpLlspGch-pGlcHLs.NAlhVCuQPs....pLLplLQpoLPsplFphLlp+Ltt ....................................................sus.s.ush.hlGYs.lYFDhKRRsDPpF++.pL..+..c..+R....+ppttt...................................t.ttthttlssh...p-..................s-thppaFhpplphGE............pL...hu...p...G..c........h.....p..........c....ust.+.hhpAl.tVhsQPp....pLLplhppTlP.sVaphLlp.h..h......................... 0 56 93 163 +3288 PF04769 MAT_Alpha1 Mating-type protein MAT alpha 1 Kerrison ND anon DOMO:DM04990; Family This family includes Saccharomyces cerevisiae mating type protein alpha 1 (Swiss:P01365). Mat alpha 1 is a transcription activator which activates mating-type alpha-specific genes.\ MAT alpha 1 and MCM 1 bind cooperatively to PQ elements upstream of alpha-specific genes [1].\ Alpha 1 interacts in vivo with STE12, linking expression of alpha-specific genes to the alpha-pheromone (Pfam:PF04648) response pathway [1]. 22.10 22.10 22.10 22.80 21.90 22.00 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.22 0.71 -4.99 13 507 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 358 0 51 428 0 156.70 31 59.93 CHANGED chlss.....hhpshtphccsAuhtAhouphhhss.......hst.ssssppAKKslNuFhuFRoYYh..shFsphPQKchSshlolLWptDPp+shWslMspsaSsIRDpluK......ppssLspFlshtsPhhtl.s.stYhplhGWsLopsppGshslp+sssschpphspshs.shsLshpslhshlp.hhhsssashhsphsssoaphhphss ............................................................................................tth..t.t.s.p.t.s.+.+.sLNuFhuFRsY................Yh..hhFs.s.hp.QKphSshlshLWps.D.Ph.+.scWulhAKsYShlRDphsc......tpssLstFhslhsP..hhsh..st.Ylt.hGWt.....h....t.s.pu...................h.p..h....t...............h...slht.h....h.......................................................................................... 0 8 25 44 +3289 PF01554 MatE UPF0013; MatE Bateman A anon Pfam-B_163 (release 4.0) Family The MatE domain 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.61 0.71 -4.62 58 32381 2012-10-02 21:24:20 2003-04-07 12:59:11 13 52 4365 8 7913 28954 6802 160.40 16 68.74 CHANGED Phhls.hlps..........hhthlsshhlupl..GstslAuhuluhslhshhhh.........hshGlusuhsslsupthGApphcphthshtpuhhlshhlulhhsllhhhhsp.lhphhssstp......lhphutpalhhhhhshshh.sh.hshsuhLputGcschshhlshhuh.llslslsal ...................................................................Phhlt.hh...........................hh.th.l.s......s..h.h....l....u.........p......h.......G...........s.......t..s............l........A.....u......h.....u....l..........s.....h..s..l.h.t.h.h.hh.........................hh.h.u.l.u.....hu..s.....s........s........l.........l....u......p.........t..........h.......G...........u........p........c............h............c............p........s................p......p......h........h....t...t....u....h..............h....h.u..hh.h.u.l.l.h.....sl.l...h....h...h..h.s.....p.....l....l..t....l....h.....s....s.....s..s.p............................l.h.p......h..u......t...p.a....l..h.....l.h....h....h......s....h............h....h.h.....s....h....t...h......s.....h.....s.............u.................h...hp...u.h....u...p....s...p......h...s...hh.hsh...h...s..h.....llsl.shs.......................................................... 0 2610 5153 6676 +3290 PF01824 MatK_N MatK/TrnK amino terminal region Bateman A anon Pfam-B_30 (release 4.2) Family The function of this region is unknown. 21.70 21.70 21.70 21.70 21.10 19.80 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.12 0.70 -5.34 15 38881 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 26823 0 17 37918 0 249.20 57 67.98 CHANGED MEEappYLELD+SQQHsFLYPLIFQEYIYALAHD+GLNR.....SILLENsGYDN.K.ShLIVKRLIT....RMYQQNHhllSsNDSNQNhFh....GaNKNhYS..Q..MISEGFAlIVEIPFSLRLlSSL...Et.KclVKSaN..LRSIHSIFPFLEDpFuHLNYVLDILIPaPIHLEILVQTLRYWVKDASSLHLLRFFLaEYpNWNoLIT..sKKu.asF.SKRNpR..hFLFLYNSaVCEYESIFlFLRNQSSHLRSsSSGshLERIaFYGKIE+hl.EVFs+DFpssLWLFKDPFhHYVRYQGKSILASKGTsLLMNKWKYYLVNFWQCaFYlWsQPGRIaINQLSNHSLDFLG ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......p....l.....clhIPaP.lH.hEIL..VQhLp..hWl.pDssSL..HLL.....R.....F.F.L..a...E..........Y...p.....N.........a......N......S......L...I...T................sK....K.........u.........h.................h..........F.....S........K....c.............N....p..R.........h.F........h....F....L....Y....N.....S.aV.h.EY..ESl.F.l.F.L.R.p...Q.S.S..a.L...R...S.TS...G....s..h.L..ER.la..FY..s.....K.....I.........E.......+.......l............l................V.....a........s......p........s........F........p.......t....h...L......W..h.......F.....K..D..P..F......h...H.YV...R...Y..Q.GK.u.ILA..S..K..G.......T..........L...L...M..p.K..WKaYLVNhWQ.ha..Fhh..W..s.Q..P.....t...R..I..a..I...N..Q..L...S..p..a...S...hpFLG............................................................................ 0 2 8 14 +3291 PF00661 Matrix Viral matrix protein Bateman A anon Pfam-B_128 (release 2.1) Family Found in Morbillivirus and paramyxovirus, pneumovirus. 19.80 19.80 25.80 23.60 17.30 15.60 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -11.86 0.70 -6.13 13 784 2009-01-15 18:05:59 2003-04-07 12:59:11 16 3 105 0 0 688 0 266.80 48 96.27 CHANGED pshhshspsuhsssusLpPhPlphss....cG+Ll.QlRltclspssc+cpshsal.sYGFlcsscppsshlGt.p...ps.ttthhTuuhLPLGsG.shusspclLcthsphcIpVR+TAuspEplVFplsshsplLpsaphshpphhlhsAsKhs+uPsplssshshpaplsFlSlThhsssttaplP+slLchRSppshulpLpVhl+lpscs-SshhKshhsccptcs..AohhlHlsNlp+pcsc..hYss-YhppKlcpMpLshuLuslhGsoltl+usG+lsKThtshhuh+thsCaPLhDlsPsLs+hLWusuCEIsclpAILQsSs.p-hhhasDlIhcclphhhc ..............o..hhth..ss...pspl.sh....h..h.t......ctplh.QhRl.c.s.hscpc-s.hahhhhGhl.p.ss...tsslG...............hs.u.LsLGss.ssuc..-Lh+tshphslss++oAs.sEchVF..sps.plLpshhhshsphs.hsAsphspAsphlPhsss.ca+VsahSlThlscpshYplPp.thLchpusshhslsLpVTlcl-scscSshlKolsc...u-sshh....AslalHIG.hspscp+h+phohDhhcpKIR+hsLsVuLuDVhGsSlhl+upGthoKhLts.hu.ptshCYPlhshs.pls+lLWpppsplhplpsllQsus.pthtlhsDh.lsssph.h.t............. 0 0 0 0 +3292 PF03819 MazG MazG nucleotide pyrophosphohydrolase domain TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Domain This domain is about 100 amino acid residues in length. It is found in the MazG protein from E. coli. It contains four conserved negatively charged residues that probably form an active site or metal binding site. This domain is found in isolation in some proteins as well as associated with Pfam:PF00590. This domain is clearly related to Pfam:PF01503 another pyrophosphohydrolase involved in histidine biosynthesis. This family may be structurally related to the NUDIX domain Pfam:PF00293 (Bateman A pers. obs.). 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.32 0.72 -4.03 54 7328 2012-10-01 21:36:44 2003-04-07 12:59:11 12 20 3581 41 1765 5952 4148 73.70 28 47.30 CHANGED Qohpolh.allEEshEls-AI.cc..........p-.s...........plc-ELGDlLhpVlhhuplupc..pstFslp-VhpplscKhhcR+P+lF ..........................................................shh.hlhEEstE.l.h.-...s..l....pp.........................................p.-.tp..................................cl.p.-ELG....DlLhtl..l.hhAph...............hthc.hccs..hpph...scKh.cRh.............................. 1 620 1185 1510 +3293 PF04837 MbeB_N MbeB-like, N-term conserved region Mifsud W anon Pfam-B_3854 (release 7.6) Family This family represents an N-terminal conserved region of MbeB/MobB proteins. These proteins are essential for specific plasmid transfer. 21.30 21.30 21.70 21.70 21.00 20.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.31 0.72 -3.87 9 115 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 74 0 3 93 2 51.20 41 30.92 CHANGED MSplLsLApsFEp+SKppupSTpchlpssFccHEpslpptLppupp+Is-AI .....MSplLsLAp-F...EpKSKpctpSTpphLpssFpchEpulpptLppupp+IpsAI..... 0 1 1 1 +3294 PF04899 MbeD_MobD MbeD/MobD like Finn RD anon Pfam-B_5673 (release 7.6) Family The MbeD and MobD proteins are plasmid encoded, and are involved in the plasmids mobilisation and transfer in the presence of conjugative plasmids [1]. 21.40 21.40 21.40 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.37 0.72 -4.07 6 132 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 105 0 14 86 0 74.90 43 78.49 CHANGED MTELEppLLSALEQLQQDYpQRLpEWESAFs-hQ+MhuLppRENA.......ALSEpVTsLSQQVpcLScQlcRLSp ............................MpELEhpLLsAhppLQpsappphppWpSuaspLQphhphoppcpu.......sLpt+sptLspplppLstphp................................................... 0 5 8 10 +3295 PF03621 MbtH MbtH-like protein Yeats C, Eberhardt R anon Yeats C Domain This domain is found in the MbtH protein Swiss:O05821 as well as at the N terminus of the antibiotic synthesis protein NIKP1. MbtH and its homologues were first noted in gene clusters involved in non-ribosomal peptides and other secondary metabolites by Quadri et al [1]. This domain is about 70 amino acids long and contains 3 fully conserved tryptophan residues [2]. The structure of the PA2412 protein shows it adopts a beta-beta-beta-alpha-alpha topology with the short C-terminal helix forming the tip of an overall arrowhead shape [3]. MbtH proteins have been shown to be required for the synthesis of antibiotics, siderophores and glycopeptidolipids [3-6]. 20.80 20.80 21.10 21.10 20.70 20.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.81 0.72 -4.60 87 1247 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 966 4 274 742 10 53.60 46 64.74 CHANGED M....sssF-csss....pahVLlNcEtQaSLWPshtslPsGWpssh...G.su.......sRpsCLcal-p ...............M..oNPFDcspG.....sFhVLhNs-tQaSLWPsh..sslPAGWclVht.st.......o+tuClpalE........ 0 50 144 224 +3296 PF02289 MCH Cyclohydrolase (MCH) Mian N, Bateman A anon Pfam-B_6511 (release 5.2) Domain Methenyl tetrahydromethanopterin cyclohydrolase EC:3.5.4.27 is involved in methanogenesis in bacteria and archaea, producing methane from carbon monoxide or carbon dioxide. 25.00 25.00 102.30 102.20 23.50 22.50 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.00 0.70 -5.39 6 211 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 188 1 101 219 69 285.00 43 96.87 CHANGED SVNchAtplVEcMl-tuE-L+l-VtKLENGATVlDCGVNssGSa-AGhhaoclCLGGLAcV..slsshphsulshPsVplpTDaPAIAsLGuQKAGWplp..VGc..YFAMGSGPARALALKPKETYEEIGYEDDADlAVLsLEuscLPsEcVsEalAcECuV-PENVYlLVAPTASlVGSlQISuRVVEsGlaKhhEVh.FDls+lK.usGhAPIAPlhsDDlpAMGpTNDsllYGGpsahaVcuD-..s-lcplscplPSssScDYGKPFh-lFKcAsYDFYKIDsGhFAPAcllVNDLpTGKoapuGclNsElLhpSFG .........................SlNthuh.ll.cphlp.u....ccLpltltphpsGspllDsGlp.s.GuhcAGhhhsclChGGLupV...sht....t..th...t.sh.h.shlpVpospPslACLuSQhAGWpls....htc.....aFAlGSGPARALAtc.........c........cla.c-l.s..Yc...D.pu-.t..u....lL.sLEusplPsppVs-clAcpCGV.sPpslhlllAPTsSlAGoVQlsuRVlEsAlHKhppLt.F-lpcIlsuhGsAPluPst..sDtlpAMGRTNDAllYGGpshhhVc..ssp..tthpplspplPSss.Sc-YG+PFh-lFcphshDFYclDstl.FuPApVhlsslpoGcsapsGpls.plLtpSF............. 0 27 71 90 +3297 PF00493 MCM MCM2/3/5 family Bateman A, Finn RD anon Prosite Family \N 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -12.00 0.70 -5.64 62 3858 2012-10-05 12:31:08 2003-04-07 12:59:11 18 63 938 2 2067 6744 833 253.70 39 40.85 CHANGED -hcplpcl.uccs..slhpplspSlAPsIaG..p--.lK+AlhLtLhGGsp.K..phsc.thc..lRGDINlLllGDPG.sAKSQlLKal.p.pluPRuVYToG+GSSAsGL.TAuVh+Dstot-...asLEuGALVLADpGlCCIDEFDKMs-pDRsulHEAMEQQTISIuKAG.Ihso.LNARsSlLAAANPh..hGRYstp+ohspNlslssslLSRFDLl.FllhD.ps-cppDpplAc+llshHhtt...........................ppphptp.................................lshchL++YItYA.R.phpP..t....Loppu.pctlsphYlphRp.s........tt...ttthslTsRQLESlIRLoEAhA+h+LuphVstcDVccAl+LhppShh .........................................................................................................pplpph..t..p.p.....plach..lspSlAPp.Ia..........G..........p..............pc...l.......K+Al.lLhL.......h.G........Gs.........p.............K...................p..h.....s.....s...........s..h..+.................lR...GD......I.....Nlh.lhG...DPusuK....S...Q.h..........L.p....a....h..............p....h....s.......s...........h..........u........l..........h...........o....o...........G.....p............u...u..........S.......u...s............G.......L........T......A............t..........l.....h.....+........D.........................o..t..p......................hh....l.......E..u.......G.A........h.......V............L.u..D.t....G.....l.............sh....IDEF.D.K..Mp.t.....s...psslHE.......sMEQ..Q..o.l.......ol........u.....K....AG......l..h.sp..L..p..uR.s.ul...lA..Au...N.Ph..............u.................p....a..................s................................h.........s................................t.........N.........l............t............h.........sl..lS.R....F..D..l..............h.........h........l........l.........h........D.............s...............................D....................l....up.....a..l.ht..a..t...................................................................................................................................................................................h.s...t..............h....p...p..Y....l.......h.s...+..................t....h..t............P.......h.....................l..s.........s....t..p......h.l......p....Y....t.hRp.t..................................t.......tt......h..os.RpLts....hlRlup....uhA+hphp...t................s.t.t.cst..Ahplh.....h........................................................................................................................................................... 0 763 1189 1734 +3298 PF00015 MCPsignal Methyl-accepting chemotaxis protein (MCP) signalling domain Sonnhammer ELL anon Blast MCP1_ECOLI/361-421 Family This domain is thought to transduce the signal to CheA since it is highly conserved in very diverse MCPs. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.51 0.70 -4.79 9 30898 2011-09-19 18:58:56 2003-04-07 12:59:11 16 309 2287 12 9245 27179 1335 198.30 30 36.30 CHANGED ppAo-hAppAsppA..............ppsspsVcpslpshcplsspSppIscIlsVIspIA.QTNlLALNAAlEAARAG..EpGRGFAVVAsEVRsLAp+SApAA+EIcsLIppsspp..............l-sGsshlcpTucshpcIVsulsclsshls-IAuuosEQSpulppls....pulschsplTpps..............sAhsEEuuAAutsLcppApcLpphlstF+lp .......................................................................ttt.........................s..............................h...p...p....h....s...p.......t....h.......p....p.......s.......s....p.......s...h.......p....p.......l....s...p....p....u.......p.......p.......I.s...........p...l...l.......s.l.......I...ps.......I...A...p...QTNL......LALNA.A.I....E.A.....A..R...A....G............Ep....G......R..G.........F.AVVAsE.......V.......RpLAp+.....o.......t.......p.......u.......s.......p.......E.......I.......p...........p.......h..........I....p.......p.......h..............p.......p.......p.............................t.....................................................................................h...............t...........................................................................s........p..........p........u.......s...............p........h..................s..............p.......p.......s.......s...........p.............s..............h..............p..................p..............I..............s..................p..............s.......l..............p....p..............l.......s.......s...........h..............h..............p..............p..............I.......u..............s..............u...........s..............p.......c......Q.......s.......p........s.......h....p....p.....ls................ps...l....s.......p....l...s....p....h...s.pp.s..........................................ss.h....spp.s...s.......t....s.......u.......p.......p.......L.......p.p.....u.......ppL.pphlt.Fp.................................................................................................................. 0 2775 5451 7351 +3299 PF02993 MCPVI Minor capsid protein VI Griffiths-Jones SR anon Pfam-B_1634 (release 6.4) Family This minor capsid protein may act as a link between the external capsid and the internal DNA-protein core.\ The C-terminal 11 residues may function as a protease cofactor leading to enzyme activation [1]. 25.00 25.00 27.70 27.70 23.70 23.20 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.55 0.70 -4.21 9 139 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 101 1 0 157 0 227.40 52 97.63 CHANGED FusLAPRpGspPhhu..psIGsS-h+GGtFsWGSLhS....Gl+shGSslpNhup+hhNSpThp.l+ptlpDoslhcpVsphhupulsulVDIuR.clppclpphh-+h.s.....-p.hsptcsh..s...h.tstpstscP.scpshlstsstP.Psh.psl..hs......hsspcPhhs.shssss...psPsplshPP..tsp....ssss...s.....ssppsp..............+.hRsusWQusLssllGhGVpsspRRpCY ...........FuSLAPRHGoRPaMGTWs-IGTSQLNGGAFNWuSlWS....GLKNFGSTl+oYGsKAWNSSTGQhLR-KLKDQNFQQKVVDGlASGINGVVDlANQAVQ+pIsSRLD..P..hPs..................pt.h...-th.................s+G-KRPRP....DtEEsh.lhpscEP.PSYEEAlK.G............hPsTRP.IAshAs.uVh.......pPsTLDLPP....sPs....sssss........ss.s.s.....s....ss.ssstR..................ssttpt.stNWQSTLsSIVGLGVpSlKRRRCY................. 0 0 0 0 +3300 PF02249 MCR_alpha Methyl-coenzyme M reductase alpha subunit, C-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2706 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (this family), 2 beta (Pfam:PF02241), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The C-terminal domain is comprised of an all-alpha multi-helical bundle. 25.00 25.00 32.70 26.70 21.80 21.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.78 0.71 -4.12 22 7244 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 318 31 49 7541 13 118.60 61 67.10 CHANGED DQIWLGSYMSGGVGFTQYATAAYTDNILDDasYY....GhDYlp-KYG...s........scu.s.oh-sVpDlATEVTLYuLEQYEcYPThLEDHFGGSQRAsVhAAAuGsosAhATGNupAGLuuWYLS.hLHKEuaGRL ....................................GVGF.TQYATAAY..TDNI.LDDas.YY....GhD.YlcDKYG.......s........h.s..+sKs..ThDlVpDl.ATEVTlYGhE...QYEpaPThLEsHFGG....SQRAuVlAAAuG.hosulATGNuNAGL..suWYLS.MhLHKEuauRL.............................................................................. 0 10 32 41 +3301 PF02745 MCR_alpha_N Methyl-coenzyme M reductase alpha subunit, N-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2706 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (this family), 2 beta (Pfam:PF02241), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The N-terminal domain has a ferredoxin-like fold. 20.50 20.50 21.00 20.50 19.40 20.00 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.81 0.70 -5.19 5 1424 2009-09-10 16:32:42 2003-04-07 12:59:11 10 3 189 31 50 1557 13 56.70 70 20.77 CHANGED EKKLFl-AL+cKFc.E-PcEKpTKFYsFGGWKQSERK+EFVEaAKKlAEKRG.IPFYNPDI..GVPLGQRKLMPYpVSGTDsaVEGDDLHFVNNAAMQQMWDDIRRTVIVGMDTAHcVLEKRLGKEVTPETINEYMETlNHALPGGAVVQEHMVElHPGLVsDCYAKIFTGDDELADEIDK+FLIDINKpFPEEQAEQLKKAIG+RTYQVsRlPTIVuRsCDGATsSRWSAMQIGMSFISAYKlCAGEAAlADFSFAAKHA-VIpMGohLP .....................................................................................................................................................................................................................................................................AMQIGMSFIuAY+MCA.GEAAVADLAaAAKHAuVlpMushLP............ 1 11 33 42 +3302 PF02241 MCR_beta Methyl-coenzyme M reductase beta subunit, C-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2692 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (this family), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The C-terminal domain of MCR beta has an all-alpha fold with buried central helix. 25.00 25.00 325.40 325.00 24.30 18.90 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.52 0.70 -5.17 6 82 2009-11-23 11:17:05 2003-04-07 12:59:11 13 1 65 31 49 86 12 252.00 65 57.60 CHANGED GYALRNIMsNHlVAsT+KNThNAlALuoILEQTAMFEMGDAVGuFERhHLLGLAYQGLNADNLVaDLVKtNGKcGTVGTVVASlVERALEDGVI+VcKpLsSGFKlYcPsDhAhWNAYAAAGLVAAshVNsGAARAAQGVASTlLYYNDILEYETGLPGVDFGRAEGTAVGFSFFSHSIYGGGGPGIFpGNHlVTRHSKGFAIPsVAAAMCLDAGTQMFSPE+TSuLlGoVaStIDEFREPLKYVsEGAsElKc+ GYuLRNIhsNHhVAhT+KNshpAsALoSIhEQsAhFEMGsAlGsFERhHLLGLAYQGLNANNlVaDLVKpNGcsGTVGoVVtSlVERAlEDGVIpscKphsSG.aphYcssDsshWNAYAAAGhlAAshVNCGAuRAAQuVuSTlLYaNDlLEaETGLPGlDaGRspGTAVGFSFFSHSIYGGGGPGlFNGNHVVTRHSKGFAIPCVsAAMsLDAGTQMFSPEpTSuLlGsVautI-EFREPlKhVAcuA......... 0 10 32 41 +3303 PF02783 MCR_beta_N Methyl-coenzyme M reductase beta subunit, N-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2692 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (this family), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The N-terminal domain has an alpha/beta ferredoxin-like fold. 20.90 20.90 23.90 32.00 20.00 17.60 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.92 0.71 -4.83 7 83 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 66 31 49 89 12 178.50 51 41.17 CHANGED DplDLYD-+GpLlEEsVPLEAlSPL+NPsIpslVpDlKRoVAVNLAGIEsALpsuplGGKushI.GREl-LsIVtNAEuIA-plK+hlQVsEDDDTsVcLlssGKphLlQlPSpRlcVAA-YosusLlsutAlspAIIctF-VDMaDAshVKuAVhGpYPQolDahGuNlAolLusP.pLEG ...D+IDLYDD+GpLLcpsVPLEAISPLpNsuIp+llpssKRTVAVNLAGIEsuL+TGplGGKuppI.GRELDlsIVsNA-uIA-clcchlpVp-sDDTsVcllsGGKphLVQlPotRlcsAA-YoluhhssAuAlspAII-pFclsMaDAshV+AAVhGpYPQohDhtGGNluolLslPpp.EG......... 0 10 32 41 +3304 PF02505 MCR_D Methyl-coenzyme M reductase operon protein D Mian N, Bateman A anon Pfam-B_2115 (release 5.4) Family Methyl coenzyme M reductase (MCR) catalyses the final step in methanogenesis. MCR is composed of three subunits, alpha (Pfam:PF02249), beta (Pfam:PF02241) and gamma (Pfam:PF02240) [1]. Genes encoding the beta (mcrB) and gamma (mcrG) subunits are separated by two open reading frames coding for two proteins C and D [2]. The function of proteins C and D (this family) is unknown. 25.00 25.00 108.30 108.10 18.90 17.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.76 0.71 -4.65 10 75 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 59 0 51 72 4 148.00 41 92.72 CHANGED I-VcIFPHRlLpA-TTEKlLNclY-.l-GltRVllHGpsLPcpVsaGPA+GhPVNHs-R+lIpVKGp-lEL+VpVGRIhVplcD.c..-ppl-cI-EICcElhPFuYclpsG+Fh+ccPTVTDYlKYGcc....lcclDsRLlGhVDPpuRhpssVslIK ..h-lEIFPpRhLps-TTEKlLNclhs.l.cGlpRlllaG.sLPcpVsYGPA+GtPlsHs-R+hIpVpGpslELpVpVGRlhlpl-s.....-shl-cIcclCc-.hh...P...FuYclp.....GpFh+scsTVoDYhKYG.p.....sphDccllGhsDP+u+hppplthl.......... 0 11 32 42 +3305 PF02240 MCR_gamma Methyl-coenzyme M reductase gamma subunit Mian N, Bateman A anon Pfam-B_2713 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (Pfam:PF02241), and 2 gamma (this family) subunits with two identical nickel porphinoid active sites [1]. 25.00 25.00 320.70 320.50 18.60 18.00 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.56 0.70 -5.24 7 85 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 67 31 49 88 13 246.80 61 94.67 CHANGED sQaY.PGpTplApNRRKaMNP-hELcKLREIsDEDlV+lLGHRsPGE-Y.SVHPPLEEMD.PEDslR-hVEPhpGAKtGcRVRYIQFsDSMY.APAQPY.RuRsYhWRaRGlDsGTLSGRQlIEhREpDLEpluK..LlETEhFDPApsGlRGATVHGHSLRLDENGlMFDhLpRhlasE-TG+VhhVKDQVGc.LDEPVDlGcPLsE-pLtchTTIYRlDslshR-DcEhlElVpRIHphRThuGFtP ...PQaYPGsTpVApNRRKaMNPshcLEKLR-IsDEDlV+lLGHRuPGE-YpolHPPL-EM-EP-sslR-lVEPhsGAKAGDRlRYIQFsDSMY.APuQPY.RuhshhhRaRGlDsGTLSGRQlIEsRERDLEcloK..Ll-TEhFDPApsGlRGATVHGHSLRLDEsGhMFDhLpRhlhsccoGpVhYVKDQVGpPLDc.VslGcPlsE-cL+c+TTIYR.hDsluh........R-DtEsl-lVp+IHptRThuGFt.... 0 10 32 41 +3306 PF02315 MDH Methanol dehydrogenase beta subunit Mian N, Bateman A anon Pfam-B_12628 (release 5.2) Domain Methanol dehydrogenase (MDH) is a bacterial periplasmic quinoprotein that oxidises methanol to formaldehyde. MDH is a tetramer of two alpha and two beta subunits. This family contains the small beta subunit. 20.60 20.60 21.10 20.70 19.70 16.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.06 0.72 -4.05 4 44 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 44 23 21 47 0 90.00 58 93.97 CHANGED MKplLshhslAushu..hsssALAYDGTpCKAPGNCWEPKPGaPEKIAGSKYDPKHDPtELsKQsESlKsM-ARNtpRltNhKKTGKF.YDVKKI ..............hhhhshhsushs...hussAhAYDGTpCKAPGsCWEPKPGa...P-KlAGSKYDPKHDPpELsKQs.pSIcuMEpRNtKRltpFKKTG+FhYDVpc........ 0 6 16 18 +3308 PF04349 MdoG Periplasmic glucan biosynthesis protein, MdoG Mifsud W anon COG3131 Family This family represents MdoG, a protein that is necessary for the synthesis of periplasmic glucans. The function of MdoG remains unknown. It has been suggested that it may catalyse the addition of branches to a linear glucan backbone. 25.00 25.00 28.80 28.60 17.70 17.30 hmmbuild -o /dev/null HMM SEED 484 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.60 0.70 -6.23 4 1625 2012-10-02 23:57:29 2003-04-07 12:59:11 7 2 926 2 268 967 64 475.30 48 91.60 CHANGED FshDsVscpA+sLAucsYpu.KssLPoshpchpas-YQpIRFppD+AhWss.csPFplpFaHpGhaFcoPV+IsEVsustspclpYDPshFsaG.slhpscpssKDLGaAGFRVhYPlNotccpD.ElssFLGASYFRslGpGpsYGLSARGLAIcTA.PuGEEFPcF+tFWlE+PpssscplVlYALLDSPpsoGAYRFslpPGcsslMDVputla.R-cluKLGlAPhTSMahaGsNp.p.spsaRPElHDSsGL.lhsGNGEWlWRPLsNPpaLpVSsF.spNPpGFGLLQRsRDFu+YpDL-s+Y-hRPShWVEPKGsWGcGoVpLVEIPT.DETpDNIVAFWsPcp.PcsGpEhpFuY+LpWu.spsclp.PpsuaVppTtpuhGDVtppshhR...hohtFVVDFpGsthKtLssstPVospsslcsNutllEs.olR.NPsh+GaRLshclcst-s.pp.sEhRAtLsspsscPLoETW.YQhs...h ..................................Fshs.ltphApsLAtpsap..u.s..p.s.LPs.s.lt..shsassY.ppIQass-+uhWps....l.c.p...aclpFaH..G.MhFcp.V+l.pV..s........s.......s....t....s+c.......I+.......asP-hFsa.s.......-s...t..h....D..p...-..s...s...p.....D..L...G...FAGFRVh..h.....s.......h...s..p........p.......t..pc....-lVSFLGASYFRslGsst.YGLSARGLAIDT.uh.s.o.s..EEFPcFptFWlEpP......K....P.s.s.pplTlYALLDSPssTGAY+Fs...I..p..s..G.+.s....s.l.h.DVpu+lYhR............cc............lpcLGlAPhTSMFhaGsN......p.......p................ss...saRPplHDS-GLuha.s.G....N....G.........E.........WIWRPLNNP....p.+.L.p.houa.ssc.N.P+GFGLLQRsRDFS+YpDltshY-pRPShWVE.P...........+....u.........c...........W.GKGsVpLlEIPTssET.DNIVAaWpP-p.hs..csG.......cEhsFpY+LhWS...s...............p.s.......l+..ss.h....AhVht.....T......RpuhG...s.h...........p......ss........hh.....chshtFsVDFsGu.-h.K..........h.sts.s.......P.h.....sh.pss.I.s.s.s.u..thl...........cp............p.lt..Y....s..s..s..h..c..G.aRlhhchh.......s.p..........ss....pc..........ss-MRhhLts...t.s....ps..lSETW.YQh..h................................................................... 0 60 133 191 +3309 PF02975 Me-amine-dh_L Me-amine-deh_L; Methylamine dehydrogenase, L chain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 25.50 25.40 17.80 16.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -11.05 0.71 -4.14 2 110 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 90 102 36 118 21 113.60 54 68.56 CHANGED .sRttapPQDpD.puCDYWRHCSIDGNlCDCsGGSLTsCPPGTcLusuSWVASCaNPsDGQoYLIAYRDCCGhpssGRCsClNspGELPVYRPEFsNDIlWCFGA-sDAMTYHCTlSPIVGKA .........ts..........t.t.Ds.puCDYWRYCAIDG.LCo.CCGGohoSCPPGTpsSPsoWlGoChNPtDGpsYlIuYpDCCGKso.C.G...cChC.ps.Et.EpPlY+P...phsNDIsWChGsts....YpCosusllGh..... 0 7 19 26 +3310 PF04934 Med6 MED6; MED6 mediator sub complex component Wood V, Bateman A anon Pfam-B_4045 (release 7.6) Family Component of RNA polymerase II holoenzyme and mediator sub complex. 20.20 20.20 20.20 20.60 19.30 20.10 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.89 0.71 -4.63 32 317 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 279 3 225 313 2 139.90 37 51.89 CHANGED lpW+sPpalp....................h..sLp....ssNVL-.YFup..SPFYD+oSN...Npll+hQtphsp.................................................hppcLppMsGlEa.llsps..pEP...................................slaVI+KQpRps.............sspspslssYYlluuslYpAPslhsllsoRl....................hsslhplppu....hsphs.phspasP ................lpWpsstal.............................hLp.....ssslL-.YFup...sPFYD+TsN...Npsl+hQphh.p.......................................................................................cLppM.s..GlEY...llhps..pEP....................................lalIRKQp.Rps.................ssplhsLusYYIluu..s..lYQAPsltsVls..SRl........................loslpslppuhcpst.shspapP......................................... 0 76 120 183 +3311 PF03525 Meiotic_rec114 Meiotic recombination protein rec114 Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 33.70 30.80 24.30 19.70 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.03 0.70 -5.94 4 26 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 25 0 15 25 0 261.40 35 69.83 CHANGED YS+YTIPSaAPsGFsShLEPPpIDKWQHLSsNCTLQF+VLLhDStQlhlpVlLNNSThLEpIRLPLGsNpDhIQFSCKsPIISCKYISEEFGP+hL+RFQlNLPNDl-FNRslVSLKNLsFVl+TAKTSIApsThssQsps.NNupKlsFsEss.NsSoYppsNsQFQ............................TQNMlhDFSQphQEKstREssNpSNITLPpDs..hsIsQQpaPss-hNVVpoSQDLNTPpATQTlhupPEsL.VpshElSpshssooss.PshcNppppsthsSDhhS+Kthh.pp.sshhpshcLPKERpppEpplpuhh..hpsssThlhS.tppNpl+pNs ..................YSpYs....APpGFpohhpPs.pl-KWpH..l..S.ssssL.pFplLLh....cspp.lhlpV..lhN.........N.uslLEpIplP.l.....us.s.......p-lIQFSsKuPhISCKYlscch.usp...h.lRRFQhsLss-s-Fs+ssssLpsLsFVl+sA+...o..ohupsshps...phps.ps.spphsh..ps...phss..t..p.ph........................................................................s.s..h...sp.................................................................................................................................................................................................................................................................................................................................................... 0 2 7 13 +3312 PF03243 MerB Alkylmercury lyase Bateman A anon Pfam-B_3505 (release 6.5) Family Alkylmercury lyase (EC:4.99.1.2) cleaves the carbon-mercury bond of organomercurials such as phenylmercuric acetate. 21.00 21.00 24.00 29.60 18.90 20.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.70 0.71 -4.30 22 236 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 189 13 65 184 11 118.90 31 52.93 CHANGED LolpPTsH+hpls..GcpLYsWCAlDsLhass.hlspssplpSpsssTGpslcloVcs.st...lsslpP.sssVVShl.ssss..s...sl+suhCs.hlpFFuo.psApsWhspHP...supllslt-Aa.pLGptlttplh ................hohh.TsHphpls.....u+.pLYuhCAlDoLhhsu.ll.u.p.s..sclpStCssTGpslplolps.s.p......lhslpP..sshsVuhlsssct.s..........shpsuhCs.plp.FFsS.psApsWhsp+s....th..tllslppAa.tlup.h.t..h............................ 0 26 44 57 +3313 PF02065 Melibiase Melibiase Mian N, Bateman A, Eberhardt R anon IPR002287 Family Glycoside hydrolase families GH27, GH31 and GH36 form the glycoside hydrolase clan GH-D. Glycoside hydrolase family 36 can be split into 11 families, GH36A to GH36K [1]. This family includes enzymes from GH36A-B and GH36D-K and from GH27. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.45 0.70 -5.78 13 2958 2012-10-03 05:44:19 2003-04-07 12:59:11 13 57 1465 71 1044 2993 252 265.70 24 45.35 CHANGED SFpTPEllhsYospGLNuhSQpaHsLhRp....+lh+statsc.RPlllNsWEAsYFDaNcsplhslA-cA.....tcLGlEhFVLDDGWFGs...RsDDsuuLGDWhlspcKaPsG...Lssluccl........+spGhcFGLWhEPEMVs.s-ScLa+pHPDWllp.sssRstopuRNQhVLDlups-VhDaIhpphspLLspssIDYlKWDhNRplTEssSsths.......ssHpYhLGlYclh-pLsspaPcllaEuCuuGGGRFDhGhLhYhPQhWsSDsTDAl-RlsIQaGTSL............lYPsushGAHVSAsPNcQstRtT.ShphRutVAhh.GshGhELDlspLsccEcptltp.lAhaKchRpllt.....hGshYRLpsPt..su.NpsAhhhVSsDpspAlltahplhussshs..........shl+LtGLDsc ...................................................................................................................................................................t..p..t...P..h.h....h.Ns.........W.p..s....h.........h....h.........c..........h..........s..........c....pt..........l..........h.p....h..A....c..th................tc..h.....G...h.......-.....h..h....l..l..D....DG...Wh.s..t.................R.p..s...c................t....................h.......G...........c........h......h......s........s..........t........p.......+a..P.....s.....G..........lpsLsct.l...........+..sh.G.h.c.F.Gla...h....p....s...................s...........t.........o......t.........h........h...t..t....a....P.....................................................................................................................................................................................................................tt.hh...t.......hta.hKh..D...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................s............................................................................................................................................................................................................... 1 383 695 892 +3314 PF01372 Melittin Melittin Bateman A anon SCOP Family \N 20.60 20.60 21.10 25.90 19.70 20.50 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.39 0.72 -7.01 0.72 -4.59 3 18 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 16 4 1 33 0 24.20 71 40.82 CHANGED GIGAILKVLoTGLPALISWIKRKRQQ .GIGAlLKVLuoGLPsLISWIKp..... 1 1 1 1 +3315 PF02964 MeMO_Hyd_G Methane monooxygenase, hydrolase gamma chain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 118.90 118.40 20.90 19.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.92 0.71 -4.33 3 19 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 18 54 4 20 0 160.40 53 94.31 CHANGED IH-NsTRsAWhuKIApLNTLEKAscFIpDFRscHoSPFRsSY-LDlDYLWIEtKIEEKLAVLKs+cFN-sDLlsKsTsGEDApuVtsssVAKMcAAsDKYEAERIHIsFRQAYKPPVMPVNVFLDTDRQLGT+LMELRNTDYYATPLEuLRKcRGVKVVHL ......IH-NssRsEWhuKIApLsolcpAstFlpDFRhpaoSPFRpoYcLDlDa.aIEtKIEE+LuVLKscc.h.ssu-LlTKsosGEDAttVpsshlAKhcAscs+Y.EAERIHIpFRphYKPPVhPVNVFLcTDptLGT+LMELRNTDYYusPLEsLRKcRGVKVlpL....... 0 1 4 4 +3316 PF03203 MerC MerC mercury resistance protein Mifsud W anon Pfam-B_2720 (release 6.5) Family \N 23.00 23.00 23.00 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.74 0.71 -3.72 42 387 2009-09-11 08:49:01 2003-04-07 12:59:11 9 5 300 0 102 277 593 114.30 31 81.69 CHANGED hD+hulshSsLCslHC...lshPhlhshLsshG.hhh.....hhsphhHphhlslslslullAlhhGhhpH+.phthhsluhhGlslhshuhhh.cshh.....ct...............................................slTllGssllshuHlhNhRhs ..................hD+hGshuSllsAhtC.hshPslsuhuuulG.uhh...............hcs............hah....th..lLslhssluhlA.slGa...hpH+.phh..hhh.L.uhlG.sllhsus..h.....hhhshhh.....ps.................................................................slhhlGhsLhlusplhshh..s...................... 0 45 71 89 +3317 PF05052 MerE MerE protein Moxon SJ anon Pfam-B_5840 (release 7.7) Family The prokaryotic MerE (or URF-1) protein is part of the mercury resistance operon. The protein is thought not to have any direct role in conferring mercury resistance to the organism but may be a mercury resistance transposon [1,2]. 20.70 20.70 22.10 21.70 20.30 19.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.66 0.72 -4.00 6 198 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 160 0 22 56 10 73.70 75 93.45 CHANGED MNSP-+lPsET+..pPlTGYLWGsLAVLTCPCHLPILsh..VLAGTTAG.AFluEaWGIAALsLTGLFlLSloRhLRAF...p ............MNuP-+LPsET+..pPloGYLWGALAVLTCPCHLPILAs....VLAGTTAG.AFlGE..H..WGlAALsLTGLFVLSVTRLLRAF.c........ 1 5 9 16 +3318 PF00376 MerR merR; MerR family regulatory protein Finn RD, Bateman A anon Prosite & Pfam-B_3021 (Release 7.5) Family \N 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -7.41 0.72 -4.39 347 3063 2012-10-04 14:01:12 2003-04-07 12:59:11 18 35 1779 31 910 13735 1815 37.70 36 21.31 CHANGED sIu-l.....Ach.s..GloscolRaY-ct.G.Ll...ssp+spsGhRhY ..............sIGcl.....Act.s.....GVsspsl...RaYEpp..G.Ll.....ss.tR..s..s..u..u.hRpY................. 0 354 658 814 +3319 PF02411 MerT MerT mercuric transport protein Bashton M, Bateman A anon Pfam-B_1796 (release 5.4) Family MerT is an mercuric transport integral membrane protein and is responsible for transport of the Hg2+ iron from periplasmic MerP (also part of the transport system) to mercuric reductase (MerE). 29.30 29.30 29.30 29.40 29.20 29.20 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.55 0.71 -4.28 3 490 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 366 0 109 326 70 112.50 47 88.93 CHANGED MSEPpNsRGALFAGGLAAILASsCCLGPLVLlsLGlSGAWIGNLTlLEPYRPIFIuAALVALFFAWRRIYRPVcACcPGsVCAlPQVR+TYKlIFWlVAlLlLVAluFPYVlsaFY ......................psu+u...u...Lh..u.G....s..L.A.AlhASsCClGPLVLlsL.GhS.GA.WI.G....N......LT.s.LE.PYR.PlF.IGs...A.LlAL..h..FAWp+l...Y......R..P..s..p..A...Cc.....P.G.c.lC...A...l...Pp...l.R...t.........s....h...K.llF.W..l.VusLVLluLuFPYlhPaFY......................................... 0 32 75 97 +3320 PF02475 Met_10 Met-10+ like-protein Mian N, Bateman A anon Pfam-B_2239 (release 5.4) Family The methionine-10 mutant allele of N. crassa codes for a protein of unknown function, Swiss:O27901. However, homologous proteins have been found in yeast (Swiss:P38793) suggesting this protein may be involved in methionine biosynthesis, transport and/or utilisation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.28 0.71 -4.72 11 1002 2012-10-10 17:06:42 2003-04-07 12:59:11 11 35 540 11 661 9354 2050 197.00 24 47.12 CHANGED psaslIGDllllplpsEhpp.acphIuEslhchp.slKsVhp+supl..cG.aRshchEllAG.ps.opTlH+ENGspa+lDluKVaaSs+htsERtRlsph.sc-GElVlDMFAGlGPFuIslA+htKschVaAl-lNPcuh+aLpENI+LN..KVcshl.sIhuDsc-Vh...cshAD..............................................RllMshPtpsccFLcp...Ahptl+-G............GVlHY .......................................................................................................p..athhGc....l..h..h..l..p....h..........t....p..................t....t.....l.....u...p............h...l...h.p....t............t...l...c.s...l...h....p.c....h.s..........h......t..s..t..h...R................p....h.....c....l....l..h.....G....c...p.....................h..............s......h.pE.....p....s..h....p.....a...p...h..........D.....h......s....+.....l.....a....a.....s......s....p............t...s.............E......+........t.......R...l.......s........p................h.......h.........p.........t........s.............-.....s............V..hDh.a..AGl...G..s.....F......u.l....s...h...u.....p...............+....s....t....h.........V...aAs-......lNPp...u...h....c.......hL..p.c........N..h...p.....l..N.......+..l...........p.....s.........h.......l........s.....h.....p...h.....D.....s.....p...p....hh............hh..p........................................................................................................................................................................................+...h..l....h....s..h............................s..........p......h....h.......h.................................................................................................................................................................................... 1 209 377 540 +3321 PF02965 Met_synt_B12 Vitamin B12 dependent methionine synthase, activation domain Griffiths-Jones SR, Eberhardt R anon Structural domain Domain \N 20.80 20.80 21.50 21.30 19.70 20.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.16 0.70 -11.32 0.70 -5.14 11 2573 2009-09-10 23:08:37 2003-04-07 12:59:11 12 20 2384 10 722 2189 810 252.90 35 25.51 CHANGED sslpElhsalc.+shap.......................s.t..stcsp+lh.supshLp.........ucslsshLsu.........................................................................u-hhuhFAsTuGhthEchtcshppptD.hpuhllcAlGstlAEAhA-plcccl+pc....stscslsho.........hR.uPGYsuh.-hoEpttlapLlpsEp.hGl+LT-ShhMsPtpSVSGlhhhsPp.u+h.hhuphspsphpDhhtRKth .............................................................................................................................................................................................................................................h..ltplh.aIDWtsFF..sW..........ph.hG.a....................Ptl..h..pc....G....tA.ppla.t-u....pt..hL.....c.cl.tpphl..........ps+uVhGha.PAspsGDDl.lhtscsppphhth.h.......................................................................................................................pp.thsshsluDalssh.oGhs..DalGh.FAV..TuGhth.-thscta.c.sp..p.DDYstIh.l+ALADR..LAEA......hAEhlHc+VR+c..hW....GYus.sEsLss--Llp.....................ppYpGIRPA.PG.Y.P.A........CP-Ho-Ktsla.cLL..c..........s..-.....c..h.......G......hpL.T.E.S.a.AMhPsuSVSGhYau.H.P.c.....u.....+...Y......F.....u.l..u....p...l.tpD.Q.l.p.c.aAhR+t..................................... 0 250 478 616 +3322 PF03724 META DUF306; META domain Yeats C anon Yeats C Domain Small domain family found in proteins of of unknown function. Some are secreted (e.g. Swiss:O25998) and implicated in motility in bacteria. Also occurs in Leishmania spp. as an essential gene. Over-expression in L.amazonensis increases virulence (Swiss:O43987; [1]). A pair of cysteine residues show correlated conservation, suggesting that they form a disulphide bond. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.50 0.72 -4.18 122 2207 2012-10-03 08:47:39 2003-04-07 12:59:11 11 29 1689 2 524 1564 142 100.60 23 59.33 CHANGED sltsspWpl......h..put.shhs...tpps...pls.........h.tss.................s...plsGp...uG..CNpahGsapl.pss..........p.....lshs....sluuTchuC..s.s.hhphEpthhpsLps.spshpl.p..ss............pLsLps ..........................................................................ttpa.l..........h....p.ut...hs.t.............psts....pls.........h..s..pp...................t....plsGp.....su..CNp..a.....h..Gp...h.p.......l...ps.s..........p.......lphp....sl.u.s.T.c...Mh.C......ss.....p..hhphEpphh.p.hLpp..stphsl.s......ss.............pLhL..t............................... 0 156 334 437 +3323 PF01676 Metalloenzyme Metalloenzyme superfamily Bateman A anon Pfam-B_1926 (release 4.1) Family This family includes phosphopentomutase Swiss:P07651 and 2,3-bisphosphoglycerate-independent phosphoglycerate mutase, Swiss:P37689. This family is also related to Pfam:PF00245 [1]. The alignment contains the most conserved residues that are probably involved in metal binding and catalysis. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null --hand HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.65 0.70 -4.99 27 6157 2012-10-03 20:55:17 2003-04-07 12:59:11 13 22 3518 49 1368 5458 2001 425.30 23 96.34 CHANGED pphlLllLDGhG.tsspc...tssl..psApT.........PshDplhc.........sspGhtlslP..uphGsS-luHhsl..............huhpstchasspshhtshsc....tt...........Glco+.sclthhsphst.c......hhllhDtR..cl.sppspshhcpltp.pltph........l....s..h...........h.tpuacshhltps....ts.ssshlcs...........lsPphl....stshh.hpss-sl.hhsacsc+stplhptlhppsach.hpptth..t+lh...hh.atuph.ushhh..sstshsph.hchhsppsl+phtluETtphs+lshhas..............h.l.sspGsThslpschpuhthscphlcsl.tsca.D.hlhlNhsssD.hsGHsGchcuplculEthDp.plscll-tltpssshlllTADHGNs.phhs..................TtHTtcPVPlllhut....sh+.......hsphh.tsphAsluuTlhplhGhchsp.hpsh .....................................................................................................................................................................................................................................................................................................................................................................................................hhLlllD..u...hG...h...stttc................sssh..........tsA.ss.......................s.shsclhpt...s................t.t.p..G...h..s...l.s...LP....sphG.uc.l.u.+hshussphshpshschstthppsphhpsstht.hhthhhssstshhhhshhssttsppp.pthhthhphstttsh.t.h.....................................................................................................................................................................hh.t.httsthhhhpsssshhhhhtpcshthtphhhhhpphttphpttthhhhthhhhhhhssp.sshhth.s.p.s.hss...p..s.p.h.h..s...h...p..t...l..p.p.....h..t...h....u-s.....phu..+.ls..h.asG...............................................................................................s.p.s..h...T...h..c..lp..s..c....h....u..s....h....cl...h..-...th.................l.................c.t.l............p..................p......s......c...............h....s......hl.h.sNhs..shD.hhGH....p....t.............s...h.....p....u...h...h..c..A..l.E..shDp...pl.s.c...l...h....c....t.....l................p.......p......s......s......h......l....l..I..T.A.DHG...N..s.s.phts........................................................................................T.s.H....T....p....p................V.....P.....llh.h..us.............th.p........................s.s.p.l..t.p....t..s...p...h....u..D..l.usTlhphhuhp.sp.hpu.h.................................................................................................................................................................................................... 0 517 943 1184 +3324 PF02066 Metallothio_11 Metallothionein family 11 Mian N, Bateman A anon IPR000869 Family \N 21.40 21.40 57.30 56.80 18.50 16.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.18 0.72 -4.09 2 7 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 1 0 7 7 0 54.10 63 100.00 CHANGED MEFToAhFGsSLl.op.pTppKHNLVssCCCSpsTpcso...SCsCoKCuCcoCKC MEFToAhFGASLIpop....pTppKHNLVNsCCCSpsspcso...SCsCoKCuCcoCKC. 0 7 7 7 +3325 PF01439 Metallothio_2 Metallothionein Bateman A anon Prodom_1611 (release 99.1) Family Members of this family are metallothioneins. These proteins are cysteine rich proteins that bind to heavy metals. Members of this family appear to be closest to Class II metallothioneins, seed Pfam:PF00131. 23.00 23.00 23.30 23.30 22.90 22.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -11.06 0.72 -3.43 23 407 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 153 0 53 379 0 72.40 50 90.61 CHANGED M.SC.CGu.sCGC..GSu.CKCGssCs.CpMYPDls.pt.soTo..pTllhGVAPpKs...puuEhuss...ussGCKCGusCpCsPCsC ..........MSC..CGGsCGC...GSu.Cp.CGss......ssKMYPDls.pt...soTo...pTllhGVAPpKs..phE...u.s.....Ehusst.....sssG.....C.K.CGs.sCp.C.s.PCsC................................................ 0 2 22 40 +3326 PF02067 Metallothio_5 Metallothionein family 5 Mian N, Bateman A anon IPR000966 Family \N 25.00 25.00 25.60 25.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.01 0.72 -3.91 2 57 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 16 0 28 47 0 40.30 57 94.64 CHANGED MsC..CGosCpCuuQtstssCsCspDCpCsscptststCsp MsCKuCGTNCpCoupK.CGsNCuCspDCpCsCKN.GPK-pCCp.... 0 4 5 19 +3327 PF02068 Metallothio_PEC Plant PEC family metallothionein Mian N, Bateman A anon IPR000316 Family \N 26.80 26.80 26.80 29.50 25.60 26.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -10.77 0.72 -3.61 4 37 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 20 3 16 36 0 72.90 53 73.21 CHANGED GCDD+CGCssPCPGGsuCRCsSu...puuuGstEHpsCsCGEHCGCNPCsCs+spspsSG+u..+ApCoCGsuCsCASCAS ..........hCs-+CGCslPCPGGs......sCRCsus.....tuuu.Gs.s-HsTCsCGEH.CGCNPCsCsKs..tsoGpG.....CpCGsuCsCAoCu.... 0 2 11 13 +3328 PF02069 Metallothio_Pro Prokaryotic metallothionein Mian N, Bateman A anon IPR000518 Family \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.47 0.72 -4.14 13 100 2012-10-05 18:33:37 2003-04-07 12:59:11 11 1 96 1 39 94 51 47.90 40 73.21 CHANGED oTsTphKCACssChCsVohssAlp+-GKsYCSEuCAsGH.hsspGC....GHsGCsC ................pCACssCsChVs..scAlp+-G+hYCscuCAsGH..psps.C....tpssCtC......... 0 8 21 32 +3329 PF00131 Metallothio metalthio; Metallothionein Sonnhammer ELL anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.81 0.72 -11.32 0.72 -4.09 21 581 2012-10-05 18:33:37 2003-04-07 12:59:11 15 4 201 21 109 522 0 58.90 56 96.13 CHANGED .DP......CsCtc.susCsCus.uCp.CpsC+CssCcKss.ssCssuGC...sKCu..psCsCtt......t..pCoCCs ......MDP......CsCsp....uG...oC..sCuu..S...CK.C.c.sC+C.....TS.C.KK..S.C..C.uCCPsGC....sKCA...pGC..l..CKG......tspcCSCC.................... 0 14 17 30 +3330 PF01717 Meth_synt_2 Methionine_synt; Cobalamin-independent synthase, Catalytic domain Bashton M, Bateman A anon Pfam-B_1909 (release 4.1) Domain This is a family of vitamin-B12 independent methionine synthases or 5-methyltetrahydropteroyltriglutamate--homocysteine methyltransferases, EC:2.1.1.14 from bacteria and plants. Plants are the only higher eukaryotes that have the required enzymes for methionine synthesis [1]. This enzyme catalyses the last step in the production of methionine by transferring a methyl group from 5-methyltetrahydrofolate to homocysteine [1]. The aligned region makes up the carboxy region of the approximately 750 amino acid protein except in some hypothetical archaeal proteins present in the family, where this region corresponds to the entire length. This domain contains the catalytic residues of the enzyme [2]. 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.06 0.70 -5.46 14 4790 2012-10-01 21:20:02 2003-04-07 12:59:11 13 9 3434 28 1233 5086 1353 308.90 33 54.56 CHANGED hPTTT.IGSaPQTsclRcsRtphppuplu..tpYpsth+ttIcpslchQEclGlDVLVHGE.ERsDMVEYFupplsGh.shTtsuWVpSYGSRsh+Pshhhuclptspsholc.thhupshsscs..VKGhLTGPlTILsWSFsR...pD.s+cshshQlAlAL+cElt.cL-sAGhtlIQlDEP...ALREGlPLpp..tc.stYhpWultuFRlssssstscsQlpoHhCYSpassIh...stIsshDsDVloIEtu+sc.phlsslpc.htas+plG.GVaDlHSP.+VPSsEEltshlpcshch.........h.t-pLWVNPDCGLKTRshpEshsuL+NMVcAAct .....................................................................................................................................................hPTTT..lGSF..Ppop..p.....l.......+..p.....h.......R..........t..t........a.......p...............p..G..p......lst...ppapphhp...........ppIpphl......phQEclGL.D.VllcG....Ehc..R....s....-..............h.Vp.a...F.u...E.p....L.........sGa....h..............h.......s..............p....p...............u.............a..........V...............Q......u....a.............G................s................+................s.............s....+.....s.............P...l..........l....h.....G...-.l........s....p...s.....p..s.....h..........s.......V...c......h.....s......p.a.......u.p.....u.h....T.s+s...............h..Ku.....h...L......T..G......P......s......T.....l....l.....s....h.....o.a.....sR................pt......s...pc....p.....hst..........p.l...A...h.....A.....l...+-El.t......cL.c.s..A.......G.l............p.l.......I.QlDE...s........................Al..+..E.t.....l.....Ph+p..............tc...h....p..t...a.....L............p......h........u............l........t.......u........a...........p............l.......s....p..........u......s....s.........t..........s..........c.....s......p..........I..p...T..........H.h..C...........Y..................u..........p...........F...........s..........shh.................st.l..s.....s..h.....c..s....Dl..l...o.....l..E..s...u....c....S....c.............h.............p.............h...l..........c...sh.....c......p.....h........s..h....s..p..c...l..G.....GVh..D....l.+....S....P.....c....l...P....s.....h.-..c..lt..s.h.l.c.cu..hph..........................lsh..c..pl...alsPD............CG.......L........p......o......p.......t......h.............ps......h.....tt.Ltthltssp.t....................................................................................................................... 1 360 749 1029 +3332 PF01035 DNA_binding_1 Methlytrans; Methyltrans; Methyltransf_1; 6-O-methylguanine DNA methyltransferase, DNA binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1191 (release 3.0) Domain This domain is a 3 helical bundle. 21.30 21.30 21.50 21.40 20.70 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.97 0.72 -4.24 93 7975 2009-09-12 05:33:12 2003-04-07 12:59:11 15 49 4422 25 1944 5717 2191 84.40 39 44.75 CHANGED osFpppVhpt...ltp.IPhGplsoYuplA....ctlG.p.....spuuRsVGpAh...spNP....hshhlPCHRVlsusGp.......lusa....th........uhth..Kp.tLLptEuhth..p ................................osFpppV.Wps.Lpp..IP.hGpshoYuplA................ptl.G..p................P.p..A..sRAVG...s...As........upNP.................lul.ll..PCHRVlsssGp.........L.s.Ga..........ss......................Gl.pc..Kp..tLLphEuh...t........................................... 0 624 1221 1628 +3333 PF02870 Methyltransf_1N Methlytrans; Methyltrans; 6-O-methylguanine DNA methyltransferase, ribonuclease-like domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1191 (release 3.0) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.44 0.72 -3.38 11 2628 2009-09-14 13:10:39 2003-04-07 12:59:11 10 12 1908 11 442 1666 131 76.10 23 35.10 CHANGED tlhaohhcoPlGplhlsssEcs.lptlthtspshs......h.t..phhhts-......hsshthhppplcpYhAuptp.....thslPlch ....................................t.hhhthh.soPlGtlhl.s.s...s-....p.....u..Ls.ulha..tspptp.............h.ph..ph..h...t..sc......................ts...h....t..th...p...p.....pLcc...YFuGppp......hslPls.............................................. 0 118 236 347 +3334 PF00891 Methyltransf_2 Methyltransf; O-methyltransferase Bateman A anon Pfam-B_152 (release 3.0) Family This family includes a range of O-methyltransferases. These enzymes utilise S-adenosyl methionine. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.42 0.70 -5.18 30 3396 2012-10-10 17:06:42 2003-04-07 12:59:11 13 42 1164 67 1586 4681 447 220.60 22 62.75 CHANGED tps.........phYshsssuchLhsspss...sshsshhhhhtspshhpsWtpLp-..ulh-Gt.....ssFpcsaGhs..ha-ahus.Ds...chsphFspuMssssslhs.hchhpshpsFpslsolVDVGGGsGsssptIsptaPpl+uhsFDLPcVltsAss.......tssVcaVuGDhF.-slPp..ADAllLKalLHDWuDpcClclL+pChcAls...tpG+VlllEsllspssssshps...........hhhDlhMlshss.G+ERoccEacpLhp .........................................................................................................................tttt......................h..................................................................h.............h.............h...tp.......h........p..st...................s..h.t...h..h.h...s....s....ha.ph...h.t....p...p.s..........phtp....h.......F..t......p..u.M.......t.........s..............s........t.........h...........h........h...............................h.......h.................t....h.........h.....s.....h.........p...........s......h........p................p.............l..l.DV.....G...G.G.p...G....t.h...h...t..t.l..h....p.....t....a..........P....p.......l.......c......s....h.....l..h...D.....L.....P...p.....V.....l...t.t..utt.................................ts.p.l...p...h..h...u...u...D...h.....F.....p.........s.......l......P.........t......u.......D.........s.....h......h...h.p.....h.............l....L.............H..c...........W.....s....D.....c....p.......s...h.p...l...L.ppst.p.A...hs........t.........u...+...lllh..-h.l...h..sp...t.....t...t.....t..............t..t......................................h.h.c.h.h..M..hshhs...Gp.....pRs.tcattlh................................................................................. 0 329 890 1294 +3335 PF01596 Methyltransf_3 O-methyltransferase Bateman A anon Pfam-B_749 (release 4.1) Domain Members of this family are O-methyltransferases. The family includes catechol o-methyltransferase Swiss:P21964, caffeoyl-CoA O-methyltransferase Swiss:Q43095 and a family of bacterial O-methyltransferases that may be involved in antibiotic production [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.17 0.71 -5.27 8 3858 2012-10-10 17:06:42 2003-04-07 12:59:11 12 23 2673 61 1074 4599 1860 191.10 25 85.30 CHANGED h-oSsh..Es-hL+ELR-tTs+pP..hstMplss-EGQFLuhLl+LhsAK+TLEIGVFTGYShLssALALP-DG+IsAhDlscEshplGhsaIQKAGVscKIshhlGDAhpsL.-pLhp-cp..spFDFsFlDADKsuYspYaE+hLcLVKsGGLIAhDNTLWtGpVs-s.-sssP-shp.....lhchNchLusD.RV-IuhLslGDGITLsRRl .................................................................................t...................................................h...........l.s...p..p.u....p..h.L.p.h..L...l.....c...h..h...p....s...+....p.....l......L...E.l.G.....T.hs.G...YSu....l...h.....h...A...p.......u..............l...........s........p........s............u.........c.....l......s...Tl....-...h.........s..t.........c........h....h...p..........h......A........+....p......h.p....c......u....s...........h......p......p...........c.................l...p...l..h............G.......s..A.....h..-..s......L......s.....p.............L...h..........t...........t............................s...a...........Dh.l......F............l.D............u........s..K............s.......p........Y...h......p......a.....h........p.......t..h..l..........p..........L.........l.c....s..G.G......l....l......l.s......D......Nl...............l....h......t......G.....t.....l....hp......t..........t........p..t.t......................lpph....c...h...l....h...p....p....p...hp...s.s.hl.sls.....DGlh.h.h..t................................................................................................ 0 347 681 901 +3336 PF02390 Methyltransf_4 Putative methyltransferase Bashton M, Bateman A anon Pfam-B_1023 (release 5.2) Family This is a family of putative methyltransferases. The aligned region contains the GXGXG S-AdoMet binding site suggesting a putative methyltransferase activity. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.20 0.71 -5.26 19 4759 2012-10-10 17:06:42 2003-04-07 12:59:11 12 26 4414 24 1194 8689 3879 194.50 28 77.49 CHANGED hP........ph..........Datphasp.s...........................lhlEIGsGhGshllshApppP-h.alGlElchsslttshp+htph........thpNltllptsAhphhpphhs.spLp+lhl.FPDPW.KpRH+KRRllpsphlpthtphLt.GGhlhhtTDspshhc.hhcthpp.............................shhphh................pss-h..sPhs........s.htssTchEp+shptstslaphhFp+h ........................................................................hs.htt............hchp.p.h.F.s.p..ptP...........................................lhlEIGhGhG....phl....s.......t.h..........A.....p........t....p...P..-.........h.........s..a.l.GlEl.....p.ps..sl...s.....t.....s..L..pp.h.tct............................................................sl.s.N.......l..+..l..l....p...t...D......A....h.....c.....l......L...p.....p...h.....h...s......p......s....p.....l.s....p..l..a...l.....h...F........PD..P..W.....K....p....R.Hp.............KR........R...........llp.....sFlphhtph..L.p.........s..............G.....hl....+.ht...T...D..csh.h.cahL.cshsp........................................................................t..hp.t.h..................................................s...c..h.........s...s.h.............t...p.shTcaEp....+hpp.t.Gpslaclhh...h...................................................................................................................................................................... 0 408 771 1016 +3337 PF03737 Methyltransf_6 Demethylmenaquinone methyltransferase Bateman A anon COG0684 Family Members of this family are demethylmenaquinone methyltransferases that convert dimethylmenaquinone (DMK) to menaquinone (MK) in the final step of menaquinone biosynthesis. This region is also found at the C-terminus of the DlpA protein Swiss:Q48806. 20.70 20.70 22.30 21.90 19.30 20.00 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.54 0.71 -4.35 11 3287 2009-01-15 18:05:59 2003-04-07 12:59:11 10 22 1989 41 1060 2581 1112 153.90 30 75.23 CHANGED pphsTssLsDshst.sssspPhhss...hstpsphsGpssTV+shp..............DNsLl+pslppsutGcVLVlDGtGsh..........cpAlhGshluthAtppGhpGlVlaGslRDlsslcpLDlslhAhussPst..usppuhGp..lsVslshuGVT..lpPGDhlhuDssGllV ............................................................................................sstl.D......h................p.....h.....t....h..h.p.......s...hps.......hss..p...s.p...h.sGphhTV+sht..........................DNs..h....l....p..c..hl.....-..p.....s.s........t........G...c..VLV...l...D..u....s.....G.s.h.....................cp.A..l.hGs.hl.A.p.h.A.h.p.p.Gh....t.GlVlp.G.u..VRDlspl.p.p..h.....s..ls..l..h.A.h..u..s.h.Phs.................s.s.p..c...u..h.G-.............hsls..l..........s..................h..........G...................G.......ls................lhPGD.hlhAD..s.sGll.............................................. 0 248 604 862 +3338 PF03492 Methyltransf_7 Methytransf_6; SAM dependent carboxyl methyltransferase Bateman A anon Pfam-B_1148 (release 7.0) Family This family of plant methyltransferases contains enzymes that act on a variety of substrates including salicylic acid, jasmonic acid and 7-Methylxanthine. Caffeine is synthesised through sequential three-step methylation of xanthine derivatives at positions 7-N, 3-N, and 1-N. The protein 7-methylxanthine methyltransferase (designated as CaMXMT) catalyses the second step to produce theobromine [1]. 20.20 20.20 20.30 20.90 19.80 19.30 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.15 0.70 -5.68 23 707 2012-10-10 17:06:42 2003-04-07 12:59:11 10 4 157 8 380 728 190 269.90 29 87.55 CHANGED lppslpph....h.ts...hssslslADLGCSSGPNTFhslssIlcslcpphppps.....tssPEaplahNDLPsNDFNTlF+hLP.h..................tphFsuGVPGSFYGRLFP+pSLHhsaSSaoLHWLSpVPctlp-p.s..h.....NKuplahs.........pVh+AYhsQFpcDhshFLcsRuEElVsGGhMlLshhGRps.......D.ssptss..hhh-LlupuLsDlspEGll---KlDoFNlPhYsPsspEl+thI-cpGSFpI-chEhhp.....h..p....................tsucthusshRAlhEshLssHFGpslh-clFc+atp+lspph.hpp...p.hhsllluLpRp ..........................................................................h...................sp.hhlADLGCSsGsNohhhlppllphlpphhpp.t....................chphhhNDLssNDFNslF.p....t.L......sth.t....t........................shahsu...l.PG.SFY.s.R.LFPppSlchhaSuhu............Lp.W.L.....Sp........l.............P..ptl..s................................Ncsplahs..sss.....tlhpuYhpQappD.h.p.FLphRupElhs.GGthllsh..h.u+ps.................p....p.t.t.ts............hhp....hl...ttulpph.........stc....G........hl....pc-ch.-sFslPh....Ytso.pE.l...ct....hlpp.....p....u...p......F...p...l.p...p.hc..hhp..............h...................t.........................u.thu.th.hR.uhhpsh.l.......t...p.....h.....G........t.......t........l....h.c.tlFtphtphhtt...................................................................................................................................... 1 54 213 296 +3339 PF02086 MethyltransfD12 D12 class N6 adenine-specific DNA methyltransferase Mian N, Bateman A anon IPR002294 Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.83 0.70 -4.93 27 3733 2012-10-10 17:06:42 2003-04-07 12:59:11 10 25 2224 21 654 3159 1250 233.30 24 80.29 CHANGED ahGuKppLlspIpchlPp......ptppah-PFsGuGuVhhph......phtpslhNDlps.lhslap.llcs.........scpllpphp.h....hh..p..ssp-hahph+............pchN....pshp.h.c......sh.hhhhsthshsslhphsppupasssaGtatp......hhstpplpthsptlps.....sphhstsh-tshhhh.phts.hlYhDPPYtshp.tssapsapp.shspppphthtshhcpht....tphthphhlsss.......ssp.hcclaps...hslhphpthpsl ...................................................................................................ahGuKhtLhsp.l.......hp...h.lP....................ptpph.lEPFsGuG..uV.hh.ph...............php.p..h..l.l..s..Dlssc..Llsl...a.p.....hlpp.............................pspp.h.l...pthcth....................h..............s...p.pc.h.ah..ph+...............................................................................................pphs............p.th..p...h.c..........................uhhhh.a..l.s...+.h..s.....a.......sG.l..h......c....h....s........h.......p....u......p.....h........s..s....s.....h.....u.....p.apps....................hhs....p...p....p..l...h.t...h...s...p..p..hpp......................sthhst..s.a......p..........p.....s...h......t............t......h..............t.......t......s.....s.......l..lY.h.DPPY......h.s..........................ts............s....a..p.......t.....a....p..t.t....s.a.s.........p.......p....p..h..p.L....t..p....h...h.pph................t.th...h...hlSsp....................ss..h+phapt......th........................................................................................................................................................................................................ 0 232 454 559 +3340 PF01340 MetJ Met Apo-repressor, MetJ Finn RD, Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 31.60 31.50 20.80 19.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.33 0.72 -4.28 3 798 2012-10-02 18:44:02 2003-04-07 12:59:11 15 1 794 30 96 197 8 102.70 87 97.11 CHANGED AEWSGEYISPYAEHGKKSEQVKKITVSIPLKVLKILTDERTRRQVNNLRHATNSELLCEAFLHAFTGQPLPDDADLRKERSDEIPEAAKEIMRELGIDPETWEY .....AEWsGEYISPYAEHGKKSEQVKKITVSIPLKVLKILTDERTRRQVNNLRHATNSELLCEAFLHAFTGQPLPsDsDLRKERSDEIPEAAKcIMREMGIsPETWEY.............. 0 12 33 66 +3341 PF04648 MF_alpha Yeast mating factor alpha hormone Kerrison ND anon DOMO:DM04511; Family The hormone is excreted into the culture medium by haploid cells of the alpha mating type and acts on cells of the opposite mating type (type A). It inhibits DNA synthesis in type A cells synchronising them with type alpha, and so mediates the conjugation process. 19.00 19.00 24.60 19.20 17.90 16.20 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.97 0.73 -6.12 0.73 -3.43 3 152 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 27 0 59 116 0 13.00 78 26.85 CHANGED WHWLSl+PGQPMY WHWLpL+PGQPMY. 0 6 30 45 +3342 PF04202 Mfp-3 Foot protein 3 Bateman A anon Pfam-B_1860 (release 7.3) Family Mytilus foot protein-3 (Mfp-3) is a highly polymorphic protein family located in the byssal adhesive plaques of blue mussels. 20.70 20.70 21.60 23.10 20.00 19.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.69 0.72 -3.80 2 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 5 0 1 49 0 71.80 63 96.17 CHANGED MNNoSVSVLVsLVLIGSFAVpSDAAsYYGPNYGPPRRYGG..YNtYNRYuRtYGG.+GWNpGWpRGhRG+h ..MNNhSVuVL.lALVLIG.FAVQSDA..u.......hhYsPsYssPh.YssGh..Y..NGYNtY.tt...RYGhNKGWNsG.W................... 0 0 1 1 +3343 PF01078 Mg_chelatase Magnesium chelatase, subunit ChlI Finn RD, Bateman A anon Pfam-B_616 (release 3.0) Family Magnesium-chelatase is a three-component enzyme that catalyses the insertion of Mg2+ into protoporphyrin IX. This is the first unique step in the synthesis of (bacterio)chlorophyll. Due to this, it is thought that Mg-chelatase has an important role in channelling inter- mediates into the (bacterio)chlorophyll branch in response to conditions suitable for photosynthetic growth. ChlI and BchD have molecular weight between 38-42 kDa. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.14 0.70 -5.16 56 4229 2012-10-05 12:31:08 2003-04-07 12:59:11 16 28 3288 9 1225 9364 2476 172.30 42 36.05 CHANGED DhsDV+GQppA+RALElAAAGGHNLLhlGPPGoGKoMLApRLPulLPPLotpEuLEsstItSluG..thptshhppRPFRuPHHouStsALlGG..Gs.hs+PGElSLAHpGVLFLDElPEFsRplLEsLRpPLEsGclsIoRAptpl.saPAcFQLVAAMNPCPCGahucssptCpCo..spphpRYhu+lSGPLLDRIDlplcV.stlshtpL .............................................................................................Dht-lhGQ....t....p...u....+....R....A....l....c...l....A...A.................A.............G......t.......H.............s...l..l..hhG..sPGsGKoM.LApR.l......s....s......l......L....P.................s..............h...s.....................p............E............h..l............E...........s..s.......t...........l....h.........S.......ls.......s..................................t.....t.......h...t...p.t..............P..F...Rs....P.........H..........H.....os.........S.............s.A..l........l...G.......G..................G...........s...hspP..G.....E.l.o...L....A....H.p..G....V.LFLDEl...sEF...pcplL.-s.L.R..pPl.Es..G..p.l.p.lu....R.s.t....t........p....l........p........a....PA+F.LluA.hN.....PsPsG....h..t...t.....t......t.t..C...s.........stph.cY.s+..lS..GPhlDRhDlplph.......h................................................................................... 0 414 845 1059 +3344 PF05043 Mga Mga helix-turn-helix domain Moxon SJ, Bateman A anon Pfam-B_5126 (release 7.7) Domain M regulator protein trans-acting positive regulator (Mga) is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions [1]. This domain is found in the centre of the Mga proteins. This family also contains a number of bacterial RofA transcriptional regulators that seem to be largely restricted to streptococci. These proteins have been shown to regulate the expression of important bacterial adhesins [2]. This is presumably a DNA-binding domain. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.76 0.72 -3.70 33 5627 2012-10-04 14:01:12 2003-04-07 12:59:11 8 55 1351 2 389 3825 25 88.20 19 15.90 CHANGED tshshpplhphhhcc...............ShphplLphlh..tpcphslpphupchalSpoolhRhlcclsphLp.paslpl.....pssplh.GcEhpIRhFahhLa .........................................................................t..............pp..................phph.h...l.l..phl....h......tppth..shp..p.lspchhlScuTl.h+clppl.p.p.h.Lp...pa..s..L..pl.........ttps...h..p..l..h..GsE.hpl.Rhhhhph.................................... 0 126 235 307 +3345 PF05220 MgpC MgpC protein precursor Moxon SJ anon Pfam-B_6685 (release 7.7) Family This family contains several Mycoplasma MgpC like-proteins. 18.30 18.30 21.50 24.60 17.90 16.50 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.49 0.70 -5.23 3 49 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 7 0 9 57 1 203.40 69 29.95 CHANGED MGSstVPSLWYWVV...uE-ssSGKuTWWA+TELNWGTDKQKQFVENQLGFKETSuTDSHN..FKupuLTQPAYLISGLDVVADHLVFAAFKAGAVGYD...MTTDSSASTYNQALAWSTTAGLDScGGYKALVENTAGLNGPINGLFTLLDTFAYVTPVSGMKGGSpNTEsVQTTYPVKSDQKATAKIASLINASPLNSYGD-GlsVFDALGLNaNFKhN-ERLPSRTDQl .....MGsshlPSLWYWVV...upcssSsKsTWaApTpLsWGpDKQKQFVENQLGaK..-souosSHN..F+SpuhTQPAYLISGlDsVsDpLlFuuFKAGuVGYD......So.SSuS...TKDQALAWSTTsuLDScsGY+sLVpNssGLNGPING.Fol.DTFuaVsPhSG.+sss.ts...o.tslpTsYPVKsspKuTsKIsSLINAoPLNSYGD-G...lsVFDALGLNaNFK.NpE+Lspt................................ 0 8 8 8 +3346 PF02308 MgtC MgtC family Bateman A anon Bateman A Family The MgtC protein is found in an operon with the Mg2+ transporter protein MgtB. The function of MgtC and its homologues is not known. 23.00 23.00 23.20 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.71 0.71 -4.32 11 3455 2009-09-11 06:02:09 2003-04-07 12:59:11 11 7 2432 0 775 2251 137 132.60 37 56.32 CHANGED LllAhllGulIGLERphRt...+hAGLRTpsLlAhGuslhsllSlpsthshs.........PsRlsAQllSGlGFLGuGsIL+cu........sslpGLTTAAolWsoAulGllsGuGhaphulhuolhlLh..lhLpslsphlp+p.hp .........................lhlAhllGulIGhER..p.h.....+p..................+s.AGlRT.ahLVu.lGuslh...h......l.....l.u.......hh....s....h....t.s....h..........................DPsRlu...A....QVV....SG.l.G.F...L.G.A..G..s.I..l..h.ps...............................ps.l.pG...L.TTAAslWsoAulGls.h..G.s.G..h......ahhA.lhusl.h..ll.h.s.h.hh..lp...ltp.......hht................................... 0 254 503 630 +3347 PF03448 MgtE_N MgtE intracellular N domain Bateman A anon Bateman A Domain This domain is found at the N-terminus of eubacterial magnesium transporters of the MgtE family Pfam:PF01769. This domain is an intracellular domain that has an alpha-helical structure. The crystal structure of the MgtE transporter [1] shows two of 5 magnesium ions are in the interface between the N domain and the CBS domains. In the absence of magnesium there is a large shift between the N and CBS domains. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.29 0.72 -3.70 169 3637 2012-10-02 13:19:07 2003-04-07 12:59:11 12 24 2860 12 968 2783 1850 101.10 22 23.58 CHANGED hcssDlAcllp......pl...s..ccphtlhphL.stcptu-lhppls.tc.hptpllpph..sspphsp.llpphssDDtsDl...lpcLspct.tpplL...sth.stcc+pplppLLsYs-c .....................................................pssDlAphlc......pl......s..cp....ph..hlh.ph.L....ss-p...tu-ll.p..pl...s...pc...hp...tpl...l...p...p...h...sspchu.c...llp...phss..D..-ss-l...lpcL...s...p.sh....hp...pl...L...s.th...s.cctpplpp.lLp.Ys-p........................................... 0 358 675 840 +3348 PF03165 MH1 MH1 domain Bateman A anon Pfam-B_519 (release 3.0) Domain The MH1 (MAD homology 1) domain is found at the amino terminus of MAD related proteins such as Smads. This domain is separated from the MH2 domain by a non-conserved linker region. The crystal structure of the MH1 domain shows that a highly conserved 11 residue beta hairpin is used to bind the DNA consensus sequence GNCN in the major groove, shown to be vital for the transcriptional activation of target genes. Not all examples of MH1 can bind to DNA however. Smad2 cannot bind DNA and has a large insertion within the hairpin that presumably abolishes DNA binding. A basic helix (H2) in MH1 with the nuclear localisation signal KKLKK has been shown to be essential for Smad3 nuclear import. Smads also use the MH1 domain to interact with transcription factors such as Jun, TFE3, Sp1, and Runx [1,3]. 25.00 25.00 25.20 27.80 24.20 24.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.70 0.72 -4.03 19 1320 2012-10-05 18:28:12 2003-04-07 12:59:11 11 19 119 10 608 1055 0 101.20 42 22.56 CHANGED hc+sscuLlKKLKc+.....tL-sLhpAlpspGsss..................otClsls..............................+ph-uRLt............st+KuhPHllhC.....RlaRWPDLpptpELKslssCpts.hp.cpcp......lClNPYHYpRlp ................................................................t.pKhspsLl+KL+Kch...tthE-L.pAlo.....s....ps......................stCls.ls....................................................................................................p..h.ph.ts....................scpKGhP+lIhC..............+l..WRWPD..Lp..t..tELK...s...lt.h.C.phs..hphppst.............lC....lNPYHapRl...................... 0 141 193 390 +3349 PF03166 MH2 MH2 domain Bateman A anon Pfam-B_519 (release 3.0) Family This is the MH2 (MAD homology 2) domain found at the carboxy terminus of MAD related proteins such as Smads. This domain is separated from the MH1 domain by a non-conserved linker region. The MH2 domain mediates interaction with a wide variety of proteins and provides specificity and selectivity to Smad function and also is critical for mediating interactions in Smad oligomers. Unlike MH1, MH2 does not bind DNA. The well-studied MH2 domain of Smad4 is composed of five alpha helices and three loops enclosing a beta sandwich. Smads are involved in the propagation of TGF-beta signals by direct association with the TGF-beta receptor kinase which phosphorylates the last two Ser of a conserved 'SSXS' motif located at the C-terminus of MH2 [1-3]. 25.00 25.00 25.10 25.00 22.20 24.60 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.32 0.71 -4.46 11 1087 2012-10-01 21:55:46 2003-04-07 12:59:11 9 17 139 31 549 937 0 167.40 50 39.60 CHANGED hpcsphWsoIuYaEhssRVGEha+lsss...slhlDGas....Dss..sssRhCLGtLoNsNR..ssslppsRt+IGcGVpLshc..sGp..ValpspScpsIFVpSsshshptG.hc.s.pVpKlhPs.solKlF-hphhtpllppssppu.tu..................................stpLp+.hColRlSFVKGWGtcY.pRQslpuoPCWlEl+Lsts ...............................................................h.-PtaWCSIuYaE....hssRVGEsF+ssps.......ol.hlDG...as................DPS.....susRF..CLGhL...S.........N..V.s...R..............spslEp.sR.pHIG.+............G...VpLhhh..sG...-........VascC.l...S.............-.puIFVQ......S............sh.s..p....t.......G.....h...........+Ps.......s..........Vp...........K.......I.....................PG.........s.............s....l...K...............lFs.pc...htthh..tpss....s.p.u...ts...........................................................................V.pLp+.hC.ol.RhSF.VK.GW.G.s....-Y..pRQslouTPCWlElpLpt.s................................................................ 0 143 186 378 +3350 PF00129 MHC_I Class I Histocompatibility antigen, domains alpha 1 and 2 Sonnhammer ELL anon Prosite Domain \N 28.30 28.30 28.40 28.30 28.10 28.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.17 0.71 -4.93 25 25261 2012-10-03 22:02:01 2003-04-07 12:59:11 13 14 308 732 732 17686 0 155.50 62 74.51 CHANGED GSHSLRYFhTAVSRPGhGEPRFIuVGYVDDTQFVRFDSDAtsPRhEPRAPWhEQEG.PEYW-RpTphuKuptQsFRVsLRTLhuYYNQSEuGSHTlQhMaGCDlGsDGRLLRGYpQaAYDG+DYIALNEDL+oWTAADsAAQIT+RKWEtAshAEp.tRAYLEGpCVEWLRRYLEpGKETL ......................................................................SHShRY..FhTu..h.S..RP..G..R..G..EP...R..FIu.V.G.YVDDTQF.VRF....D...S...D.A.s.S..........R.h.E..P.R...A.PW...l.....E....Q.....E.G......PE..Y.W...........D...c.p.T..ph.h.K...s.p.sQ...s..R.s.L.p.s....L..p.t.Y..Y.N..Q..S............E............u.......G............S............H......Tl.Qh.MYG.....CD..l...G....s...D...G....R...h.LRGapQ...AYDG+D..YIAL..N.E.D..L..+.SW.....T....A.....A....D.....h..A.....A....Q....I....T.......p...R.........K...W....E.......A..A...c.....A..EQ....h.......RAY...L.E.GpCVE.......WL....RR...YLENGKETL...................................................................................... 0 92 131 319 +3351 PF03707 MHYT SPNTR; Bacterial signalling protein N terminal repeat Yeats C anon Yeats C Repeat Found as an N terminal triplet tandem repeat in bacterial signalling proteins. Family includes CoxC (Swiss:Q9KX27) and CoxH (Swiss:Q9KX23) from P.carboxydovorans. Each repeat contains two transmembrane helices. Domain is also described as the MHYT domain [1]. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.17 0.72 -3.86 27 2404 2009-01-15 18:05:59 2003-04-07 12:59:11 11 83 683 0 856 2299 124 61.50 27 23.87 CHANGED GhGIhuMHasGMhAhph.........s..h.ap.hhhh.hSll....lultssssul.hhshttthhhthhhsusll ...............GhGIhuMHasGM.hAhphs...........................st..ltas...hh.......h.......s....h...l...Sll.............lA.l.ss.u.h.hAL....hhs..h..t....t.h............h....................................... 0 190 449 678 +3352 PF02426 MIase Muconolactone delta-isomerase Bateman A anon Pfam-B_2784 (release 5.4) Domain This small enzyme forms a homodecameric complex, that catalyses the third step in the catabolism of catechol to succinate- and acetyl-coa in the beta-ketoadipate pathway EC:5.3.3.4. The protein has a ferredoxin-like fold according to SCOP. 20.10 20.10 20.40 20.20 19.80 19.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.95 0.72 -4.17 7 493 2012-10-02 00:20:33 2003-04-07 12:59:11 11 4 392 0 136 363 26 90.60 44 92.14 CHANGED MLFpVcMsVplPssMss-tAsplKucEKAhSQcLQ+pGKW.HlWR...lsGcYuNlSlFDV-SssELHplLtsLPLFPYMsIEVpsLsRHPSul .............................................MLFhVcMsVpl.Psshssptsscl+AcEKshuQcLQcpG+Wh+LWR...ls.G..pYuNl.SlFDVcsssELH.s.l.Lp.uLPLa....P..a.M....sI.cV.ssLspHPSsl.................. 1 29 70 109 +3353 PF03526 Microcin Colicin E1 (microcin) immunity protein Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 25.00 26.40 23.70 24.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.50 0.72 -4.37 3 96 2009-09-10 20:23:55 2003-04-07 12:59:11 8 1 70 0 2 41 0 48.70 45 43.97 CHANGED YYFLASDKMLYAIVISTILCPYSKYAIEHIAFKFIKK-FFc+RKNLNssPsAKls ..hhhh.hhhh.................lLaPhuKahIEchAlKFT+.-FWpps..FFssssGKhs....... 0 1 1 1 +3354 PF04687 Microvir_H Microvirus H protein (pilot protein) Kerrison ND anon DOMO:DM04728; Family A single molecule of H protein is found on each of the 12 spikes on the microvirus shell. H is involved in the ejection of the phage DNA, and at least one copy is injected into the host's periplasmic space along with the ssDNA viral genome [1]. Part of H is thought to lie outside the shell, where it recognises lipopolysaccharide from virus-sensitive strains [2]. Part of H may lie within the capsid, since mutations in H can influence the DNA ejection mechanism by affecting the DNA-protein interactions [3]. H may span the capsid through the hydrophilic channels formed by G proteins [1]. 21.30 21.30 36.80 61.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.93 0.70 -5.38 3 108 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 1 127 1 300.50 78 94.51 CHANGED MGKLFGGGQSAsSsGIQGsVLAoDNNsVGMs.DAGIKSAIQGSNVPNSsEAAPStlSGlMA.......cAGKuLL-GTlQAGostVusKLlDtVGLGGKSAuDKGKDTRDYLAAAFPELNPWERAGAGASSuGlpDAGFpNQKELTKMQLDNQKEIAKMQNETQKEIAGIQSATSRQNTKDoVYAQNEMLAYNQKESpARVASILENTsLTKQQQTSEIMRQMLTQAQTAGQYFTNDQIKELTRKVuAEIDtVH..............Q-TQNSRYGSSQVGATAKDVSNsITDAASGlVDaF+GhDptVADsWNNFFKDGKScGIuSNpR+ ............MSKLFGGGQpAASuGIQGsVLATDNNTVGMs.DAGIKSAIQGSNVPNscEAAPshlSGsMA.......cAGKuLL-GslQAGostVo-KLlDhVGLGGKSAADKGKDTRDYLAAAFPELNAWERAGAsASSAGMVDAGFENQKELTKMQLDNQKEIAEMQNETQKEIAGIQSATSRQNTKDQVYAQNEMLAYQQKESTARVASIMENTNLSKQQQVSEIMRQMLTQAQTAGQYFTNDQIKEhTRKVSAEVDLVH..............QQTQNQRYGSSpIGATAKDISNVVTDAASGVVDhF+GIDKAVADTWNNFWKDGKADGIGSNLSR.... 0 0 1 1 +3355 PF04726 Microvir_J Microvirus J protein Kerrison ND anon DOMO:DM04878; Family This small protein is involved in DNA packaging, interacting with DNA via its hydrophobic carboxyl terminus. In bacteriophage phi-X174, J is present in 60 copies, and forms an S-shaped polypeptide chain without any secondary structure. It is thought to interact with DNA through simple charge interactions [1]. 19.00 19.00 21.40 46.90 17.80 16.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.72 0.72 -6.88 0.72 -3.99 3 63 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 53 4 0 16 2 30.40 78 97.41 CHANGED pKutRRSG.............GKRKGARLWYVGGoQF ...pKut+RSG.............GK+KGARLWYVGGpQF 0 0 0 0 +3356 PF04517 Microvir_lysis Microvirus lysis protein (E), C terminus Kerrison ND anon DOMO:DM04358; Family E protein causes host cell lysis by inhibiting MraY, a peptidoglycan biosynthesis enzyme. This leads to cell wall failure at septation [1]. The N terminal transmembrane region matches the signal peptide model and must be omitted from the family. 25.00 25.00 50.50 49.90 17.30 15.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.82 0.72 -4.22 3 70 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 48 0 0 58 2297 42.00 67 46.41 CHANGED pPVSSWKALSLsKTLSMASSARLKPLNCSRoPCVaAQETKpL ..RPVSSWKALSLRKTLLMASSVRLKPLNCSRLPCVaA.EThph. 0 0 0 0 +3357 PF04478 Mid2 Mid2 like cell wall stress sensor Kerrison ND anon DOMO:DM04295; Family This family represents a region near the C terminus of Mid2, which contains a transmembrane region. The remainder of the protein sequence is serine-rich and of low complexity, and is therefore impossible to align accurately. Mid2 is thought to act as a mechanosensor of cell wall stress. The C-terminal cytoplasmic region of Mid2 is known to interact with Rom2, a guanine nucleotide exchange factor (GEF) for Rho1, which is part of the cell wall integrity signalling pathway []1. 23.10 23.10 23.10 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.05 0.71 -4.81 6 122 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 88 0 81 123 0 108.60 28 28.33 CHANGED sos.ssTI.TSlIcGpTILSstYTTlTYTPouTu..usss+ptpp.GLSKKN+NIVIGCVVGIGVPLllllLsLlYhFCIpssRTDFIsSDGKVlTAY+sN+hoKWWhsLLGKcl..o-cYpS-SPlGu.ssshps.tltpscDlhpsspshohctsss-up ..................................ss.....sh..o.oh...p.G..p..Th....h.s.sth..o...T...l....Th...o.sosos..........ss.s.sp.s...pso...G....L....Spps+.sIl.IGlVVG.lGs..sll.l.s.hlsll.ahhh..........................................................................................th........................................ 0 14 46 75 +3358 PF01187 MIF Macrophage migration inhibitory factor (MIF) Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.10 0.71 -4.00 6 669 2012-10-01 20:38:22 2003-04-07 12:59:11 13 13 403 152 326 668 118 108.50 27 78.19 CHANGED PhhplcTNlPpspVPsuL.p+LostlApuhGKPtphluVclsPGttMsaGGSo-PCAlhSlpSIGsV.uucpN+SaoptlhchLscELplspDRlhIpFaDlpuuslGaNGoThu ...........................Phh.lpTN..l..s..h...s..p.h.s...p...s..lh..............p.c.h.sptl.uph.......h....GK....Pcsalh....lt.lp...sshshh..F...u..G.o.p.-.P.sAhs.pl..p..Sl.G.s..l...ssppN....cphottlsp.h.lppcLu..ls.t.s.R.halpahshpst..hGaNGssh.................................... 0 106 186 263 +3359 PF03775 MinC_C MinC; Septum formation inhibitor MinC, C-terminal domain Bateman A, Finn RD anon COG0850 Domain In Escherichia coli Swiss:P06138 assembles into a Z ring at midcell while assembly at polar sites is prevented by the min system. MinC Swiss:P18196 a component of this system, is an inhibitor of FtsZ assembly that is positioned within the cell by interaction with MinDE. MinC is an oligomer, probably a dimer [1]. The C terminal half of MinC is the most conserved and interacts with MinD. The N terminal half is thought interact with FtsZ. 29.70 29.70 30.10 30.20 29.30 29.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.20 0.72 -4.40 15 2118 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 2070 4 484 1401 159 105.10 38 45.31 CHANGED ThllcpsVRSGQpIYuc.GDLlllGsVssGAEllA-GNIHVYGsLRGRAhAGspGss.sApIaspplpuELluIuupahhs-c....lssphhscsspltlcs-tlhlpsL .........hllcpsVRSGQplaut...uDLllhusVssGAElIA.cGNIHVYGhlRG.RA.lAGss......Gsp..pApIhsppL.pAE.LluIAGhYhh.u-p.......lssp.....h.h.s..p..s..s.p...l.h.L.p..p.s.t.lhlp..h........................................... 0 152 303 397 +3360 PF05209 MinC_N Septum formation inhibitor MinC, N-terminal domain Finn RD anon COG0850 Domain In Escherichia coli Swiss:P06138 assembles into a Z ring at midcell while assembly at polar sites is prevented by the min system. MinC Swiss:P18196 a component of this system, is an inhibitor of FtsZ assembly that is positioned within the cell by interaction with MinDE. MinC is an oligomer, probably a dimer [1]. The C terminal half of MinC is the most conserved and interacts with MinD. The N terminal half is thought to interact with FtsZ. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.14 0.72 -4.24 8 1242 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 1231 5 255 716 30 102.50 34 43.57 CHANGED hppssl.-LKuophslslLpLcss...slsclhptLucKIspuPsFFpcsPlllslucl....-slshpALcpshpppGlpslGl.....thtppc...l..hph.G.LslLocucsp .........................hppssl.ELKGSo.FTLoVl+..L+cs...-..cs.lcptLp-...Kl.u...QAP.u...F.h.c.pAPVVls.lu..sL.........cs...s..l..s..a.sslpp...s...l...s...ssGL+llGVss....tstphcsp......h....tch...G.LPlLstupt..................................... 0 66 134 198 +3361 PF03776 MinE Septum formation topological specificity factor MinE Bateman A anon COG0851 Family The E. coli minicell locus was shown to code for three gene products (MinC, MinD, and MinE) whose coordinate action is required for proper placement of the division septum. The minE gene codes for a topological specificity factor that, in wild-type cells, prevents the division inhibitor from acting at internal division sites while permitting it to block septation at polar sites [1]. 25.00 25.00 29.00 25.50 23.00 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.37 0.72 -4.47 104 1852 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1808 14 433 931 152 69.60 41 75.76 CHANGED SAslAKERLQlllAH-Rsthps...-....hLspL+cEIlpVIsKYVpl-p.-plplpl-.+s.psh......ssL-lNIslsc ......oAslAKERL.....QlIlA.c-Rsssts....sc.....YLspL+c-ILcVIsK..YV....p.IDs...-tlpVp.l-.+s....scl........olLElNlsLPc................... 0 129 267 356 +3362 PF00230 MIP Major intrinsic protein Finn RD, Delamarche C anon Prosite Family MIP (Major Intrinsic Protein) family proteins exhibit essentially two distinct types of channel properties: (1) specific water transport by the aquaporins, and (2) small neutral solutes transport, such as glycerol by the glycerol facilitators [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.68 0.70 -4.89 14 9641 2009-01-15 18:05:59 2003-04-07 12:59:11 15 26 3806 68 2727 7120 1558 211.20 28 85.40 CHANGED Ehpphsh..hRAslAEFluThlFlFhuhG..ouluhsttst.........................s...........sllslAlAaGhulhshV.ssup..ISGuHlNPAVThuhhlupplollRAlhYhlAQhLGAlsusulL+hhpsu....................pssthss.lusGhssspuhshEllhTFhLVhslaussDch+ssp.up....APLuIGhhVshstLsuhPhTGsuMNPARSFGPAllttp..............assHWlaWlGPhlGAhluullY ................................................................................h..........tt.hhuEhlGT.....h..lh..l...hh.GsG........ssh....h...th.t..h.s..................................t.........................shh.hls..h........u.a..G.....h.u.....lh.....h..h..l.a....s...sut.....l...S...G..u..Hl..N.....P...A..........VTl................uh.......h........l.......t.........u.............p...........h.........s............h.............t......c...s......h.......Y......l...l...A...Q.hl.GAh.....h.....u.......u.s.l.l.h.h.hh.h..s.h...............................................................t.t..hs..h...h.....s....p..h....s..s......s......t......h....s...h..h..p....u......hh........s.Eh..lhT...hh...Ll.h.s...l.....h.........u.......h........s........s.....p....t............p..s.....s.....s.....................................h..s.s.l..s.....l...........G...h.h..l....hh...l....t.....h....s.....h.......s....s....hT......G......h.u.h.NPAR.s...h...G...P.t.lh.t..t.h........................................th..t......h.W.....l.....h...l.uP.l..l....G.u.hluuhha..................................................... 0 763 1487 2164 +3363 PF03094 Mlo Mlo family Mifsud W anon Pfam-B_2483 (release 6.4) Family A family of plant integral membrane proteins, first discovered in barley. Mutants lacking wild-type Mlo proteins show broad spectrum resistance to the powdery mildew fungus, and dysregulated cell death control, with spontaneous cell death in response to developmental or abiotic stimuli. Thus wild-type Mlo proteins are thought to be inhibitors of cell death whose deficiency lowers the threshold required to trigger the cascade of events that result in plant cell death. Mlo proteins are localised in the plasma membrane and possess seven transmembrane regions; thus the Mlo family is the only major higher plant family to possess 7 transmembrane domains. It has been suggested that Mlo proteins function as G-protein coupled receptors in plants [1]; however the molecular and biological functions of Mlo proteins remain to be fully determined. 18.50 18.50 18.50 18.50 18.10 18.30 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.66 0.70 -6.38 10 512 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 76 0 283 513 6 337.20 33 80.51 CHANGED sstpoL-pTPTWAVAsVCsVllhISlllE+hLH+lG+ahK+++KKALaEALEKlKsELMLLGFISLLLTluQsh.IucICl.spclupsMhPCs........s.tcEcsshupph...........ssR+LLt..............shAssusspCucKG+VPLlShpuLHQLHIFIFVLAVFHVlYsllThsLG+hKI+sWK+WEcEstsh..-a-husDspRFRasH.......-TSFlRcHh.shWo+.oshhhWVtCFFRQFasSVsKoDYLTLRpGFIssHhss..ss+FNF+KYIpRSLEDDFKsVVGIS.hLWshsVLFLLlNlsGWcoYFWloFIPLlllLsVGTKLEtIIocLALEIpE++sVlcGsPVVpPSD-hFWFu+PcllLaLIHFlLFQNAFpIAaFFWIhasFGl+SCaHcphshllsRLllGVhlQlLCSYhTLPLYALVTQMGSpMK+ulF-EpsucAL+sW++ssKc+pchtt.......ppGusssssucssss......spuoSssplLppspspsss ............................................t.........sW.ls.hh.hhl.hshhh-h.lphhtp...........hh.pp.pp.p.h.tsl....+hpt.......ELMllGhlSLlLsh.hpt..l..plCl.s...t.h.......h.h.C.....................................................................................................................................C.......t......G......sh.hS.tulcpLH..hFlFhLAlh....HlhashhT..hhLuhh+.....h.pt.W+.WEtph.t............p.......................t........h.....h.p................p....pFhpt+h......t..h.p........hh........ah....hsFh+QFh.tSV.+sDYhshR.uFl.h......Hh...........pasFppYhhRuhEp-FchlV..Gl.........S........l..Whh.slh...hh...........hhs..hp.G.h............hhW.hsh......lPhhll.LhlGsKLphllhphu...........c..............ht...t.......t...t.....hh...........p............t...............hl....ps........pcphFWFt+PphlLhL.IphhLF..................Q.NuFphshFhW.h........h..p..hu..h...p....oChht.p..h...........h........h.+lh.hG.hhhQhlCSY.TLPLYAlVo.QMG.ophK..tlht.pphtpslhtWttt.s..+pp..t..............................................................tt....................................................... 1 53 194 244 +3364 PF03304 Mlp Lipoprotein_12; Mlp lipoprotein family Mifsud W, Bateman A anon Pfam-B_1663 (release 6.5) Family The Mlp (for Multicopy Lipoprotein) family of lipoproteins is found in Borrelia species [1]. This family were previously known as 2.9 lipoprotein genes [2]. These surface expressed genes may represent new candidate vaccinogens for Lyme disease [1]. Members of this family generally are downstream of four ORFs called A,B,C and D that are involved in hemolytic activity. 21.00 21.00 21.40 21.00 20.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.21 0.71 -4.21 13 220 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 33 0 23 188 4 128.90 36 65.43 CHANGED sQQTKSRpKRDLoQcEhsQQ.K.p.p..EEhLhppLs..................cspKo........ahDWLp........Ehhsuhuch.cFLEsc...cuKhKoh.saIKutlDspsup...............spsNcttsshpsslppaF+GsshDhF....uNptlhpCh ..................................pQsKSRtKRDLo...QcE........sQp.K.p.p..EEhlhppLs.......................cspKs..................ahDWL+............-hlsuhsch.pFLcpc..........cuKhKshhsaIKutlDpsss....................ppssp.tss.h.pp.lpthh.pGs..h.D.F....sspt..t................................................................................ 0 9 9 9 +3365 PF03562 MltA MltA specific insert domain Bateman A anon Pfam-B_1127 (release 7.0) Domain This beta barrel domain is found inserted in the MltA a murein degrading transglycosylase enzyme [1]. This domain may be involved in peptidoglycan binding. 25.00 25.00 28.10 27.70 21.90 20.60 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.85 0.71 -4.50 47 1797 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 1393 13 305 1164 129 152.30 46 39.97 CHANGED llcuStspsspapaPlYphPssh....................................................................tp.thhsa.sRtpIp......tsult.puhpluascs.lDhFhlplQGSGplphsD....Gphlp.luYAupNGaPYpuIG+h..............Ll-cG..................clstcphShpuI+pWhptp.Pp.clpclLppNsSaVFF+ ...............................................VlpuccpRpupapaPlYthPsc..........................................................................t.tht.h.o.Rsp.I.........uG.A..L.s...cu.hlLuYups.lDsFhMclQGSGhlch..s..D....G..p..h..l..p.huYAuKNGHsYpSIG+h..............LlD+G...................l...c....tp....p...hSMQu..I+pWhcp..p....Pp.cl....pE....LLppNPSaVFF+................................ 0 69 167 238 +3366 PF01642 MM_CoA_mutase Methylmalonyl-CoA mutase Bateman A, Griffiths-Jones SR anon Pfam-B_1611 (release 4.1) Family The enzyme methylmalonyl-CoA mutase is a member of a class of enzymes that uses coenzyme B12 (adenosylcobalamin) as a cofactor. The enzyme induces the formation of an adenosyl radical from the cofactor. This radical then initiates a free-radical rearrangement of its substrate, succinyl-CoA, to methylmalonyl-CoA [1]. 19.00 19.00 19.20 19.00 18.00 18.90 hmmbuild -o /dev/null HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.56 0.70 -6.36 11 3025 2009-01-15 18:05:59 2003-04-07 12:59:11 17 11 1507 35 1053 2608 2315 449.20 38 72.72 CHANGED apTsEGIslcPlYt..ccchp-hpt..htshPGhhPFsRGsYsTMYshpPWTIRQYAGFuTsc-oNthY+hslAsGQpGLSVAFDLATppGYDSDsPRlhG-VGpAGVAIDol.DMchLFcGIsLsphS....VSMThNGsllPlLuhYlssuEcQGVp.ppLsGTlQNDILKEahsRsTYIaPPcPShRlIuDIhtasuccMP+aNoISISGYHhpEAGAoAs.ElAaTLADGlpYl+sulsuGLslDsFAPRLSFFauIGhNaFMEIAKhRAARhLWA+llcp.Fu..sKssKShhLRsHuQTSGWSLTtQDPaNNllRTslEAhAAslGGTQSLHTNuFDEAL..uLP...T-FSARIARNTQlllpEEStls+VlDPhGGSYYlEpLTcclsccAhphIpcI-EhG................GMs+AlcsGlsphcIpEuAA+cQscIDpGcpslVGVNKYpl.cEttl-..lL..cl-sssVctcQlc+LpchRusRDsctlctAL-tlscsAt......tst...NLLsLulcAuRspsTlGEho-Ahccsasca .....................................................................................................................................................pt..ht.ha..........t..t.........t....t...h.....................h.sh..sG..PahRG..sh...s..s.h.h..h....sp...sW...s.....hRQaAGauoA.c.-.oN.tha..+.h.t..l.s..t..G...p.p.....G..L......S..s......A.FD..h.sTh.hG.hD.sD.p.s.p...l....h....G...-V....G.t...s.GV...ul.s.ol....DMc..hLFc....sI.sL..s..p..h..o....lS..M..Th.N...u.s...sh...s....l..l.A.hal.ss....u.........c..-..Q...............G.............s..s................................p......................p......................lpG..Tl..Q..s..D..I..LK................E....ahs+...sT.aI.a.....P.P..p..s......S.h..+.l..l.....u..D.lh.t.a.s.u.p.p..h..P..+a...s..o..ISI.SGYHhpEAGAssspElAaTLAsGhpYlcshl........p..t....G....l....s..l....D....p....F...A...s....+L..S.F..F.auhuhs.h.FhplAKh.RAARhLWuclhct.as.............sps......s.+..............u.h....h..LR.....hH..sQTSGhoLTtQ-PhsNllRoslpAhuAsh...G...s...spSLH.T.Nu.hDEA.l........u.LP.......o-hSsRIAhsTQllltpE.otlsc..shDPhuGSaalEpLTppltcpAhthhpcl-p.h...G.................................GhstAl...csGh.phpIp-uutcpptpl-p..Gc....p....sllGVN.ca...................t...p.....-.....p.........h....-...............l.l...........pl..-.........s.............s.....h............p......t..p.......Q..l.....t..........pLp..p.h..+.....t.....p...R.....c.....p.....t.....t.............sp.............t...sLt....tl....p..ps....At.................................tt........Nlh.thhhpAs+s..h.solGEhstshptshst.................................................................................. 0 382 755 937 +3367 PF03972 MmgE_PrpD MmgE/PrpD family Bateman A anon COG2079 Family This family includes 2-methylcitrate dehydratase EC:4.2.1.79 (PrpD) that is required for propionate catabolism. It catalyses the third step of the 2-methylcitric acid cycle. 28.20 28.20 28.70 28.80 26.30 28.10 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.25 0.70 -6.31 16 2262 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 1523 6 768 1930 3171 437.30 28 94.04 CHANGED ppshcclAcaltshphosh....psh-ps+htllDslGsuhsuhpp.sspthhs.hs.sshsps.ssplhsTptphssstAAhsNGshs+hLDasDsahuAchttsSsslPulLAsA-tluts...........h..c-lLpAhlhuYEItsplAtptuhpchs...hspshalslAuAAusucllsL-p-plhpAluhAhspusulRphcputhsup+t..usu.AstsGlhuAhhAtpGhsG..Psslh-sphGFhcshhss.shphp.................atlpsshhK..aPsphHupoAs-AAhpL+cchp.....hpcIcplplcTapsAh+llscss.......sPpshcsscaSl.YhlAlsLlhGphslppacsch.hpDscl.sLtc+lphp.sscasptY....Pppps....splhhccsssht-ttl-hPhGc.hcpshuhsplh-KFcplhtthhs..pppp.hhhttshcptpltshhss ...............................................................t...h.plsc.al.hshp.hss.h......shcpA+htllDsl..Gsul..t.u.h...p..h..s.......s.s.p.phhs....h......s................u.....t.....s.......s......s..............s.......u............sp.V....G..........s..........s...........h.....p.l.....sP..s...tAAas.Gshh+hLD.......a......sD.T..........a.h....u...A...c........ht.t..su.........s..........sls......u....lLA..sA.-h.l.ups...................h...+clLpAhltuaE..lpsp..l.....u.........h....t.........s......u..hs..+hs.............hs.p....s.h..h.s...p.....l.....u.....usA.ssuch......LG......Ls..p-p......l.hsAluh....A..h.s..p...u...t...u.....L......+...p...h...p...c..us....s.sup+t..hssGpAsppulph.......Ahh.A.p.p.G...h.G.....P..s..slh.t...s.t......h...G....Fh.cs...h.h...p...s...p...s.h.cht...pshu...................................pas.h....p..s..l..h.a..Kh.aP........upaaupsAl-AA.h...pL..tpp..httt............ss-I.cclsl...c......T...pc..s.sh.cl...l..s....c.ts............s.ssst.-..t.ca.sltYhlA..lsL.l...h...G.c...l..s..h...........s.c.apc...........sh...h.....p...c....s........clps.LtpKlps.h.p.D...s..p..hots.Y..........Ppc.psh................ltl.th.p....c..G.s.....p.h...p..hhlch..s.hG....p...hccs......uhsplh.pKFpt.ths...th.hs....ptp...hhh....t..................................................................................... 0 185 425 615 +3368 PF02406 MmoB_DmpM MmoB/DmpM family Bashton M, Bateman A anon Pfam-B_1148 (release 5.2) Domain This family consists of monooxygenase components such as MmoB methane monooxygenase (EC:1.14.13.25) regulatory protein B. When MmoB is present at low concentration it converts methane monooxygenase from an oxidase to a hydroxylase and stabilises intermediates required for the activation of dioxygen [1]. Also found in this family is DmpM or Phenol hydroxylase (EC:1.14.13.7) protein component P2, this protein lacks redox co-factors and is required for optimal turnover of Phenol hydroxylase [3]. 25.00 25.00 25.90 43.80 24.10 19.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.48 0.72 -4.00 40 224 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 168 27 81 207 3 87.60 31 82.55 CHANGED sspVslslp..ss--scsllEAlttD......NPs..stVp-psuhl+IcucGcLplc+polpEtL.GRsaclp.-lplslsohuG+lpps.D-phslpa ..............stVslslp..ss-pucsllEultp-......NPs...ssVpcpsuhl+I-up.ucLhlctpslpEtL.G+s...achp.plclshsohsG+lspp.DDphhlh........... 0 29 57 73 +3369 PF03176 MMPL MMPL family Mifsud W anon Pfam-B_357 (release 6.5) Family Members of this family are putative integral membrane proteins from bacteria. Several of the members are mycobacterial proteins. Many of the proteins contain two copies of this aligned region. The function of these proteins is not known, although it has been suggested that they may be involved in lipid transport [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.92 0.70 -5.73 34 10072 2012-10-02 18:57:54 2003-04-07 12:59:11 10 47 2346 0 3112 23929 8914 311.20 16 68.96 CHANGED shsspsusuhtshcphsphF.p..ssssshhhlhh.tsstsLss.sspsshpphlsplppDsstlspl.D.......................hs.......................ss.....stthhhSsDG+Ashh.lshpGs.usstu.pslsslcshscpss...s.pGhpshlsGsAuhhtDhpcusstshtllthsslsllhllLlllaRSllssllhlhTVslSlssuhGl.lhlhtthhslslsshslsl.hshlhlAsGoDYslhLluRa+EthttG.cptt....uhhpAhtuoGpVlsuuGLslAhshhu...LshucLshhsp.lGsslulGlllssLsshTlh.PAlhsl...........hGRh...............s.t....................thW..phuthls+tPtsh ............................................................................................................................................................................................................................................................................................t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..s.............-.......t.....p.....h....h.............h..h..........l...........h............p...............t..............t......s.......t.....t.......s........p.......t.......h..........p.........t....l.....p....p.....h...h.p.ph...........................ts.h.....p..s...h...l....s....G.........s..s...h...t.....t....-......h..t...p.........t...h...p.......p.......s...h..h....h.h......h......h..l...s...l.s.l......l...h....l...l....L..h......l..s......a.....R....S......l....h.....s.......s.....l....l....s....l......l...s.....s.........h......l....u.....l.........h..........s.........s........h.........Gl.......h.....h.........h..........h............t.........h.........h.............h..........h...........h...............s...............l..........s.........s..........h.........s...................s.........l......s........h....s......l....h..l....u....l.....u....h......D..Y..sl......a....l.....l.....s........R....h.........+...E.......p..............h.....t........p..........s.......t..........s......t..........p........................A....l...h.....p......u....h....t......s......s....G......p....s....l....h........h...u........u....l....s...h....s....s.u....h...h..u...............h......h.......h............s...............s...........h.........s......h.......l..............p...p......hG..h...........s.h...u....l.......u.l....l....h....s..h....h....s.s..h.s....ll...P....A....l..h.t..l..................................h..uph...............................................................................................................................................hh........................................................................................................................... 2 1002 2163 2770 +3370 PF01926 MMR_HSR1 50S ribosome-binding GTPase Enright A, Ouzounis C, Bateman A anon Enright A Family The full-length GTPase protein is required for the complete activity of the protein of interacting with the 50S ribosome and binding of both adenine and guanine nucleotides, with a preference for guanine nucleotide. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.14 0.71 -4.13 703 46032 2012-10-05 12:31:08 2003-04-07 12:59:11 18 181 5181 76 12638 71407 21684 130.90 24 37.52 CHANGED plullGhPNsGKSoLlN..tL...............s..............t...t.....p..........hsh..................................lush..s...us..Tpch.hh.......thh.h..........................................tt.................h....h.llDosG..................................lh............pt...tp.h.........................t.........hhpt........h..hptl.pps...........-l...llh.......ll.......D.sp................h.......t................t...pl......................h.....................ppl............................................................tt.shlllhNK .....................................................................lullG..h.PNVGKST..LlN..tl..........................................s.......................s....p.c..............hsh............................................................................................................................................s..u.....s..h.......s.......t..s....T...pc.......h..................th..h.ph.................................................................................................................................ss.t.p....................h...........h.llD..T..s....G.............................................................lh................cstt.ps.t...................................t....hhp..p........................h....hp....t...l....pps.................................cl...........llh........................ll.....-...hpp...............t.............................tt.....l.....h.....t.h...........................................ths.h.hhhhsK.......................................................................................................................................................................................................................................................... 0 4467 8137 10741 +3371 PF01054 MMTV_SAg Mouse mammary tumour virus superantigen Finn RD, Bateman A anon Pfam-B_518 (release 3.0) Family The mouse mammary tumour virus (MMTV) is a milk-transmitted type B retrovirus. The superantigen (SAg) is encoded by the long terminal repeat. The SAgs are also called PR73. 21.60 21.60 27.40 27.40 21.40 19.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -11.90 0.70 -5.53 5 88 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 18 0 0 93 0 231.20 73 95.85 CHANGED MPRLQQKWLNSRECPTLRGEAAKGLFPTKDDPSAHTRMSPSDKDILILCCKLGIALLCLGLLGEVAVRARRALTLDSFNSSSVQDYNLNNSENSTFLLGQGPQPTSSYKPHRlCPSEIEIRMLAKNYIFTNKTNPIGRLLITMLRNESLPFSTIFTQIQRLEMGIENRKRRSTSVEEQVQGLRASGLEVKRGKRSALVKIGDRWWQPGTYRGPYIYRPTDAPLPYTGRYDLNFDRWVTVNGYKVLYRSLPFRERLARARPPWCVLTQEEKDDMKQQVHDYIYLGTGls.lWt.hFaYT+EGAlA+lLEshKAssh ................................................................GLFPTpDDPSAppRMSPSDKDIhILCCKLGIALLCLGLLGEVAVRARRALTlDShN.sSSVQDYNLNsSENSTFLLtQGPQPTSSYKPHRhsPSEIEIRMLAKNYIFTNcTNPIGRLLlhMLRNESLsFSTIFTQIQ+LEMGIENRKRRS.TuVcEQVQtL.AoGLEVKcGK+SshVKIGDRWWQP..GTYR...GPYIYRPTDAPLPYTGRYDLNFDRWVTVNGYKVLYRSLPFRERLARARPPWCVLoQEEKDDMKQQVHDYIYLGTGM..ahthFa.o+EtAht+ll-phpt............................... 0 0 0 0 +3372 PF05067 Mn_catalase Manganese containing catalase Bateman A anon COG3546 Family Catalases are important antioxidant metalloenzymes that catalyse disproportionation of hydrogen peroxide, forming dioxygen and water. Two families of catalases are known, one having a heme cofactor, and this family that is a structurally distinct family containing non-heme manganese [2]. 20.80 20.80 20.80 20.80 20.20 20.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.71 0.70 -5.65 6 1280 2012-10-01 21:25:29 2003-04-07 12:59:11 7 5 890 24 300 1030 18 231.50 36 96.59 CHANGED MF.hHsKcLQY.sKPs+PDPshAKtLQElLGGpFGElSsAhpYLaQGaNpRscsKh+...DLLhDluTEEluHVEMlATMIu+Ll-sAssctpEcAsc.s.hhtulhuGhNspH..uIloGhGuhstsSsGsPWoAsYIluSGNLlADhRtNlsAEupuRlphtRLa-MTDDPGl+DMLoFLlsR-thHQ.pahtAlcpLctp-.s.hlPss........as+thEcQEhu+phhNhScG-.sopupWhpGcu.-thtthphlhtshshuthPcL...............+sAP.h.+sT.......lsspthh .........................Ma.hap.K.c.L..p.a..s.V.+.l.spPsPthA..phL.EQhG.GspGELuAAhpYhsQu.hsh..p...s.t...t..t......+...-lLhD...IuTEEluHlEhluohlthL..hcs..u...ss.p........h.........c....s....s.......t............................................................................................sp...H...........hl.......tutush.ssusGsP...WoAsYlp..s..pG-.huDLhpNlAAEt+A+hhYppLhph.o.DD.ssl+-sLpFLhsREhsHpppFtcALpplpsph...............................................................................................................................................................................................ttt......................................................................................... 1 112 214 248 +3373 PF03962 Mnd1 Mnd1 family Wood V, Bateman A anon Wood V Family This family of proteins includes MND1 from S. cerevisiae. The mnd1 protein forms a complex with hop2 to promote homologous chromosome pairing and meiotic double-strand break repair [1]. 29.90 29.90 29.90 30.10 29.80 29.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.63 36 274 2012-10-04 14:01:12 2003-04-07 12:59:11 10 2 237 0 195 289 2 181.60 31 82.42 CHANGED lhchFhpopshasLK-LEKhsPK.p.GIsshpVK-llQsLlD.-slVpsEKIGouNaYWsFPupshp..........................ptcsphpcLppclpchcpchpplppplppt.ctsRcps.............p-Rpp.lhpclppLpcchcpLpsclp..phppsDPptlcch+ppsphhtpusspWTDNIas..lpsahpp..phshpppplc...pphuIs.p-h-Y ..........................................hhphFhpopshapLK-LEKhsPK.p.GIs.u.hsVK-llQuLVD...D...shVcsEKIGooNaYWuFPucthp....................................................................tpcpphcpLppplpchppchtpLppplppt.....ct....sR.p-o................cERpp..hhpclppLcp...chppLc...s-lp.......phpc..s......D.P.p.hlcphcp.thpht+cuss+W.TDNIas..l.pp..ahpp..p..h.shppptlc...cthtls.t-hc................................................. 0 82 114 162 +3374 PF04039 MnhB Domain related to MnhB subunit of Na+/H+ antiporter Kerrison ND, Finn RD anon COG2111 Family Possible subunit of Na+/H+ antiporter [1], [2]. Predicted integral membrane protein, usually four transmembrane regions in this domain. Often found in bacterial NADH dehydrogenase subunit. 21.80 21.80 22.10 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.69 0.71 -4.06 25 1811 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 1346 0 552 1386 262 127.40 30 27.60 CHANGED shIlcsss+hlsshllhauhhlhltG..H.sPGGGF.uGlhhAsuhlLhhluhuhp....phhphphttlhslG..lhhshlssls.hhhGhshhs.......h.hshhu...........pshlhsshhhslGlhlssstsshhhh ...............hllpsss+llhsllllhuhalhhpG..HssPGGGFluGLlhusA...hlLhhlA.h.shc..h............h...............pp........h...h.................h......s...............ht.h.lh.ulG.......ll..h......ush.o...ul.s.u.hh...h.G..t....s.FLop.............hhhplPhlu....................phcl.soshhFDlGVhlsVlGsshhh................................................................................... 0 158 340 458 +3375 PF01899 MNHE DUF68; Na+/H+ ion antiporter subunit Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Subunit of a Na+/H+ Prokaryotic antiporter complex ([1],[2]). 21.80 21.80 21.80 21.90 21.60 21.50 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.97 0.71 -4.59 164 1796 2009-09-11 05:06:25 2003-04-07 12:59:11 11 2 1345 0 544 1267 256 146.80 25 89.96 CHANGED hslhLhllWlhLs........s...........sp.........llhGhllullls.hhh.....pt.h.h...h.....................h........tllth..lhhh.lh-llhuslpVAthl.Lp.spht....p.Puhlplslclcsch.slslLAshIoLTPGTlsl-l...........sp-p.....ph..LhlHslc......hsst.tt....hhppl..ppphEchlh............cl.a ..........................slhlhhhWlhls.........s...p.hs..hss.........hlhGh.llulhlh....hhh.......pt.h.hst..p................................hhhphh.....thlph.lh.hhlh-l....lpuslp.Vsph.l.......lp..sphp...hc..P......u.hlt.h...h.c.lc.s-h.slslL.ushI.TLTPGTlslcl.................................sp..-..p.....ph.lhlHsl-...hsst...cp............htpl...ppphEchlhcl....................................... 0 161 337 456 +3376 PF03404 Mo-co_dimer Mo-co oxidoreductase dimerisation domain Bateman A anon Bateman A Domain This domain is found in molybdopterin cofactor (Mo-co) oxidoreductases. It is involved in dimer formation, and has an Ig-fold structure [1]. 20.90 20.90 20.90 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.91 0.71 -4.52 25 2673 2012-10-03 16:25:20 2003-04-07 12:59:11 11 47 1355 31 742 2393 488 104.90 33 28.91 CHANGED cauIh-Lsl..NSsIspPpHsEhlslss.......tsYsl+GYAYuGGGR+IsRVEVoLDcGcoWpLAslcatEc..+.....tc.............tpaCWsaWsL-lsls-...Lhssc-IhlRAhD-uhslQPc..chhWslhGMMNNsWaRVsIphp ............................................phsh..pShls.P.t..t..t.p..l..hs.....................t.hplpGh....Aa..sGs..u..t.lt+V-...lo...hD....t....Gt.sWp.u...p...l..t.....t.............................................asWphW..php..l...p.l.s...................ust.....c...l..hsRAhDcshssQPc....p.hWNhh......s.............t........................................................ 0 208 443 625 +3378 PF01967 MoaC MoaC family Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family are involved in molybdenum cofactor biosynthesis. However their molecular function is not known. 20.70 20.70 24.50 24.30 20.40 19.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.79 0.71 -4.50 19 3524 2009-01-15 18:05:59 2003-04-07 12:59:11 16 27 3330 47 977 2349 1242 135.10 51 71.35 CHANGED MVDlopKssopRpAhApuhlphpscslchIppsplt..KGDVlusApIAGIhAAK+Tu-LIPLCHP.LsLouVcV-........hchp.ct..lclpusV+spG+TGVEMEALTuVSVAsLTlYDMsKAl-+s.......hhIpsl+LlEKoGG ...........................MVDVSsKstThRpAhApuhlph.p.s.c.s.l.p.h..I.........t...s............u.....p...hp...................KG.DVlusARIAGIhAAK+Ts-LIP.L.CHP.LhLopV-Vs...................................hp..h..p....s....p...p.......t....lcIpups..+ss..G.+.T..GVEMEALTAsSV................AALTIYDMsKA.V....-..Ks.................MhIssl+LlpKoGG..................... 0 325 623 829 +3379 PF02391 MoaE MoeA; MoeE; MoaE protein Bashton M, Bateman A anon Pfam-B_1056 (release 5.2) Family This family contains the MoaE protein that is involved in biosynthesis of molybdopterin [1]. Molybdopterin, the universal component of the pterin molybdenum cofactors, contains a dithiolene group serving to bind Mo. Addition of the dithiolene sulfurs to a molybdopterin precursor requires the activity of the converting factor. Converting factor contains the MoaE and MoaD proteins. 21.40 21.40 21.70 21.50 21.30 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.42 0.71 -4.29 18 3376 2009-09-10 22:57:28 2003-04-07 12:59:11 12 13 3008 21 906 2269 1201 115.20 35 72.11 CHANGED cltlsspP..lsssthhphlsssps..GAIVsFsGhVR-hstG+.pVppLpYEuYs.MAtcpLppIspEsc.........p+asshc.ltlhHRlGhLplGEssllluVuusHRp-AacAscahlDplKpcl ........................................................t...lt.ps..hs..h..s.p.....h.p.h..l..t..p...ppp......G.A.llsF....sG..pV.R..s.t.........s............t.....G............c......pl...ps......L......p.....h..E.t.Ys.s.M..s..c+t..LtcI..sp-sp............................p..+...W..s...l.tp....lsl.hH.RlG.pLtsG-thVhluVoosHRpsA.FcAspalhDhlKsp.s............................. 0 273 561 756 +3380 PF01076 Mob_Pre Plasmid recombination enzyme Finn RD, Bateman A anon Pfam-B_717 (release 3.0) Family With some plasmids, recombination can occur in a site specific manner that is independent of RecA. In such cases, the recombination event requires another protein called Pre. Pre is a plasmid recombination enzyme. This protein is also known as Mob (conjugative mobilisation). 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.25 0.71 -4.67 18 1003 2012-10-02 18:54:05 2003-04-07 12:59:11 14 11 568 0 72 798 47 164.00 24 46.75 CHANGED Mu.aulhRhpKhK.ssslsGhppHspRpppsp...pNcDIDh-+othNh-Lhss.pshsappcIcphlpcph.tp.RtlRpDAVlhsEhllTuss-Fa.csho.E-p+caF-puhcahpccYGc.pNllhAslHhDEs...........TPHMHhG.......lVPhs-ct.....+LSAKclhst+cpLpphQs....chschhpppGapLcRGptt...oc+KHhssspYKp ......................................................................h...Khp........tsht.ht.H....pR.....pp.............sl..pctt.N...hc.h..h.....p............t.....p......h..p............p..t..l.p.phlpp....h.....................pth+.pcs.h.hh..........chll.o...s..s..c....hh.......p........t..........h........s..................p..c..............t.........cpah....p...p........shp...hh....p.c...+..h.....Gp..pNlh.AslHhDEp....................TPHhHhs.......................hVPh..spst.........................+ls.u+plhs......c......p.....p......Lt......thps...................ph.phht..p....h..lp.R.G.......ott..cp...h..ah............................................................................................ 0 20 46 62 +3381 PF03389 MobA_MobL MobA/MobL family Mifsud W anon Pfam-B_3424 (release 6.6) Family This family includes of the MobA protein from the E. coli plasmid RSF1010, and the MobL protein from the Thiobacillus ferrooxidans plasmid PTF1. These sequences are mobilisation proteins, which are essential for specific plasmid transfer. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.53 0.70 -4.93 9 1275 2012-10-02 18:54:05 2003-04-07 12:59:11 10 14 701 1 159 1186 123 200.30 31 35.78 CHANGED RSAssuAAYppss+h..E+ps+shDYsp+p.lhapEhlL.PspuPp.h....sDR......pshWNcVEshE+RssupLs+-lplALPhELTs-pphtLlc-FlcchhsscGMlADhslHt.........................Dss.tNPHsHlMhThR.lstDGhhtpK.....hhlstsGp.h...........hsttGc.......lhhp.W.sspshhsphRcsWt-phNptLpttGlshRIDtRSacpQGI-hhPTlHlGssu+thE++ ..................................................................SslusuAY..huspplhs........-....h...u...........hh..c.as..p+p....lhh.pphhL..Pt..p...sP..t...h................t-R..........................ppL..WNt.....V.....E..t.h...E...+.........p....psu..pl..ARch.lAL.P..p..E..L.......s.pp.p...........hpLlpcaspc.p...h..s..s..p..GMhsDh.AlHp........................................................sss....t....NP..........HsHlhhohRs...l..s....cGh.h.ttc.............h..tptst.....................................................................ttt.............................psW....sspp...thp.taRcpWushsNphLtpt.s..h.......pt.....R.........lDcRSh.......pp.Q..............u.......h....c........PphH.G.tsttht...................................................................................................................... 0 30 90 120 +3382 PF03205 MobB Molybdopterin guanine dinucleotide synthesis protein B Mifsud W anon Pfam-B_2446 (release 6.5) Family This protein contains a P-loop. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.92 0.71 -4.47 28 2671 2012-10-05 12:31:08 2003-04-07 12:59:11 9 40 2297 9 887 3759 1045 131.50 27 46.35 CHANGED shlhlVGspDoGKoTLhctLlshhhppGh+sshhhchch.Gp.slshPGs..............luhs.hcc...h.h.pth..ppphs...............llhh.Gaps.sspsc...lslsp...phsphhppph..........................pt.shllso ...........llsls.G..h.s.soGKTTLlcpL.l...s..t......L.......p.........t...c.........G....h.....+....l...u...h....l..KH.sc.H......p.....h...-...........l..D...h.......s..Gp....Do..h....ch...tpA.G...A...t.t..s.l.l.s..upp...............ht.h..h....p.p.....h..............p...p...p.s.....L..t....lhtths..................hD..llLl..E..GaKp.t...shsK.....l.lh.R......t...........................................hhh.hhh.t.............................................................................................................. 0 285 523 732 +3383 PF04698 Rab_eff_C MOBP; MOBP_C-Myrip; Rab effector MyRIP/melanophilin C-terminus Waterfield DO, Finn RD, Eberhardt R anon Pfam-B_4174 (release 7.5) Domain This domain is found at the C-terminus of the Rab effector proteins MyRIP and melanophilin. 20.80 20.80 22.10 20.80 20.20 20.70 hmmbuild -o /dev/null HMM SEED 714 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -13.27 0.70 -6.42 7 193 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 47 0 70 165 0 315.90 37 56.40 CHANGED ChDlltGuhhEsslENEGSIsGSDSTFYRQSE.GHSMMDTLAVALRVAEEAlEEAISKAEuau.DSLDKQNEAsYLR-HKEELhEELATTIlQKIIRKQKsKoEQt..Et-s-WPpsp....ssSspsuDpuhhshPGucRusssLWRSQSAFSlsuEDs......htoussEuhh+p.+spsp+.+-c..SALPSWKSVDpLs-oshsPVLpSsDGNWVALpsso.hPPs..RhLAKPKSpsFpALEstSpVsSAYDEhGS-SEEDaDWu.ALscLp.pspth......tps..pst.s.us-ph..ssSPSsuh.sNsEshh.SDSETSShsSSpEu+.tpu+..WlpR+sspN.sptEKh+lpGEL..DVNFNPQusuhEhSDSSEsEEs.ash-++uRRW+Rs+stsEE.s.t.spscuph+sLpTpps.t...DLSETDlSsEspcp+o.sDshEEKL+oRLaELAhKMS-KETSSGE-QESEsRTEs-NQKpuLSSE-supsVQEELKKKYSAVSLCNISTEVLKVINATEELIAESoGPW-hPssstD+tcGoFPlGTD.lRLDEQLToLEENVYLsAGTVYGLEGQLoELEDAARpIpSsTsEoELA-LEDQVATAAAQVHHAELQISDIESRISALTlAGLNlAPCV+LTR+R-QKQpsQVQTIDTSRQQRRKLPAPPV..KuEchEuSsVTslKTFNRNFlLQGShTpRsK.ERKSosKDLMEPsltSAlMY ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................st.Ts-pELu-LEDpVAssAupVppuE.plSDIESRIuALphAGLslts..sc.p+..+pp.................................................................................................................................................................................................................... 1 2 8 33 +3384 PF05161 MOFRL MOFRL family Bateman A anon Guo J Family MOFRL(multi-organism fragment with rich Leucine) family exists in bacteria and eukaryotes. The function of this domain is not clear, although it exists in some putative enzymes such as reductases and kinases. 20.20 20.20 20.20 21.40 19.80 19.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.38 0.72 -3.92 14 869 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 746 3 399 837 348 107.80 45 25.09 CHANGED PssLlsGGETTVslpG..p.G+GGRNpEhsLuhshtlc....st.sh...hhLuusTDGIDG....soDAAGullsssshtp.hpstGlD..tphLpssDSYshapthus...LlhTG.TGTNVND ..............................PsslLuGGETTVT.l..p..G................p....G.......+GGRNpEhh..LuhA....hslp.......................Gh.s...s.l...........hsLuusTDGlDG.........spDsAGAh.ssssols+...hp..t...tG...........l....c.....stshLssNDuashFpsl.us............Llh.T.GPTtTNVND................................ 0 127 238 325 +3385 PF04603 Mog1 Ran-interacting Mog1 protein Waterfield DI, Finn RD anon Pfam-B_4771 (release 7.5) Domain Segregation of nuclear and cytoplasmic processes facilitates regulation of many eukaryotic cellular functions such as gene expression and cell cycle progression. Trafficking through the nuclear pore requires a number of highly conserved soluble factors that escort macromolecular substrates into and out of the nucleus. The Mog1 protein has been shown to interact with RanGTP which stimulates guanine nucleotide release, suggesting Mog1 regulates the nuclear transport functions of Ran. The human homologue of Mog1 is thought to be alternatively spliced [1,2,3,4]. 19.80 19.80 20.80 21.90 18.00 19.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.86 0.71 -4.24 3 266 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 230 2 185 261 3 130.10 31 62.57 CHANGED ssPLFGGALSAILPPthlDVS-LRpIPDNQEVFsHsSTDQ.SlIVELLEhQE+VpDuuAARYHFEDVAusNDAcu.scVhSVcPLuL-sL.uLRu-CssAWlLoGpQpV...AKcNpEuAssVsIHhALlRLPQaQTDLLlTFNsPs .................................p.h.LaGGAls.s.slPt....sahDlS....sl.RpVPDsQEVFs............s...s............s...........s.............c...........p...S..........lIlEl...................LEhh......s...................p.......s..........ps..t....p..A....hpaahpD...lu.t.p...s......s..ts..........hpl.....p....t................h..............t.p.h............t.............................................t................s........h..h.h...u.tp..p.h......................s+....t...p...p...........t..p......l.hl.hh..s..llR.L..p.....hpT.......Dlllohs.P.h........................................................................................................................................ 2 59 103 150 +3386 PF04879 Molybdop_Fe4S4 Molybdopterin oxidoreductase Fe4S4 domain Bateman A anon Bateman A Domain This domain is found in formate dehydrogenase H for which the structure is known. The first domain (residues 1 to 60, 448 to 476, and 499 to 540), comprising two small antiparallel sheets and four helices, coordinates the Fe4S4 cluster just below the protein surface [1]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.96 0.72 -4.15 65 10650 2009-01-15 18:05:59 2003-04-07 12:59:11 11 141 2936 51 2519 7754 1735 56.60 28 7.53 CHANGED hchsposCs.aCGsGCulplts.......pssclh............plpustppPsNp.....GplCsKGttshph.lts ................phshosCs..hC..u.....s..G..Cu.lphts.................cssc..lh.........................pl..cu...c..s...s....s..s....s..sp............................GthCsKG.tthhchl.................................... 0 756 1595 2106 +3387 PF00384 Molybdopterin molybdopterin; Molybdopterin oxidoreductase Finn RD, Griffiths-Jones SR, Bateman A anon Prosite & Pfam-B_2803 (Release 7.5) Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.38 0.70 -5.74 32 25621 2009-09-12 07:55:41 2003-04-07 12:59:11 17 170 4023 138 4565 20492 6386 349.30 16 53.56 CHANGED RLppPhlR.....................................p.pGcahtloWc-ALshlsptlpp..tphsp....h...sssushsssEshhshpchhpthsut.hth.sthsch.............sss.ptshhhssslt...............sl-suDh..llLhGsNstpptsll......ss+hhpthhpst.....hclsslusphs......hshshcaluh..ssso.hsl.hshtpshhpphpt..sc.....................................................................................................................................................................................................................pshlllGt..Ghhpp.....tcGsshhttltslsthhs.ht....sasshs........................................................................................hlpstAspsush....................................................clsh.ss.thh...t..t....phhalhG...........................sDtsph..t......ph....chhllhpsph.spsAphADllLPussasE....+puhasNsEGpsQt....tppsl.ssG-A+pDWcIl+sLuch .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................Rlt.Ph..hp.....................................................t....s...t....h....h.sWppAhp.h......ttt......h.........t..........................................t...............................s...................t........h..................................h............................h.........................................t............h............h...t...................h......h..t.....h..h.h......t................................h........t........s..p..........t.............................................................s..........................................p.h....t.....p.s..ph.....h.....hhh.G....t.......N......h..t...s..t..............................h..h..h.......t.hh.tt............................hphlsl.s...s.hhpt............................ht....t.s.........a....h..........p.s.s.Dh.sh...h........u..h......h............h.h..........h..........p..........p.................h.................st..........................................................................h.........t.............h...................h...........................................................................................................................................................................................................................................................ht.......t..t..h...........t........h........t....h....s........t............h....................t...........................t...............................................h.........h.........s.h..............u..pp..............................u.........................h......h...h....h.......hG..p...h........s..........G...s.......s.....................................................................................................................................................................h....t.......t.............h..h..............................h..............................................................................................................................................................t..........................................................h.......h..............................................................................h..t...h...........h......h.h.t.................................................................s...............t.............p....t.h.h........t.h.t.........ph........c...h.h...l.s....-........h.......h...s......t............s.............s..................h............u........D...............l..........l............L.P.s.s.....hE...................p..........t............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 1319 2784 3767 +3388 PF01568 Molydop_binding Molydopterin dinucleotide binding domain Bashton M, Bateman A anon Pfam-B_129 (release 4.0) Domain This domain is found in various molybdopterin - containing oxidoreductases and tungsten formylmethanofuran dehydrogenase subunit d (FwdD) and molybdenum formylmethanofuran dehydrogenase subunit (FmdD); where the domain constitutes almost the entire subunit. The formylmethanofuran dehydrogenase catalyses the first step in methane formation from CO2 in methanogenic archaea and has a molybdopterin dinucleotide cofactor [1]. This domain corresponds to the C-terminal domain IV in dimethyl sulfoxide (DMSO)reductase which interacts with the 2-amino pyrimidone ring of both molybdopterin guanine dinucleotide molecules [2]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.25 0.72 -4.24 87 16734 2012-10-02 17:45:13 2003-04-07 12:59:11 16 106 3319 124 3570 12374 2211 112.40 22 13.29 CHANGED lhL.hosR.shtphpoth.tstps.thtphts......hl.lsPpDApphG.lpcGDhVclpoppG....phhstA..tlocp.lt.Gslhhshtaht.................sssNhLT..ssthcPhuttPth.+ss.s .......................................h.l.hos.+..ht.p.h..H..oth..t......s.hp.h..h.p.h.tp.hts.................hlhls.....Pp..DApp..h..G.........lp.....s...........G...D...h....V.............cl....h.....s.......p..p.....G...............pl..t..s.tA........hl..o.p......c.....l........h........s............G........s........l........h........h.....hshhh...................................tsthNsLT...t.thsshsthst.p............................................................................... 0 995 2156 2944 +3390 PF04744 Monooxygenase_B Monooxygenase subunit B protein Waterfield DI, Finn RD anon Pfam-B_6020 (release 7.5) Family Family of membrane associated monooxygenases (EC 1.13.12.-) which utilise O(2) to oxidise their substrate. Family members include both ammonia and methane monooxygenases involved in the oxidation of their respective substrates. These enzymes are multi-subunit complexes. This family represents the B subunit of the enzyme; the A subunit is thought to contain the active site. [1,2]. 22.70 22.70 23.10 24.50 19.20 22.60 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.30 0.70 -5.76 5 143 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 76 12 16 144 19 231.40 40 82.99 CHANGED GEKSQpAFLRMRTIpWYDlKWSK-olKVNEossIoGKFHVFEsWPcAVupPcsSFLNVGpPGPVFVRLooaINGphsPRSlsLEIG+DY-FEVsLKARRPGcWHVHTMlNVcGGGPIIGPGpWIsITGSMuDFcNPVTLLTGpTVDLETaNhuNsIFWHlhWhulGlAWIGYWstRPMFLPRhhhlpAGcDD-LlsspDKKVuhlVLluTLLlVlhGY+sTEoKaPhTIPLQAGppKslpPLPVcsNs......VSlKVpcANY+VPGRALRlTlcVTN+GDpPl+lGEFTTAGlRFlNusVhKc.DssYP-ELLAscGLShDssuPIAPGET+sV-lcApDAtWEVQRLuDLlYDPDSRFGGLLMFaDsoGNRplssIuGPVIPsFs ...............................................................................Gtcs.tsh.Rhpoh.aaD.h.at.............p.htls-.hshoGKhhlhtsWPp.slshPthuFhNhu.PuPshhRhtphlst....p..h..Shsl.hGts..YpaclplKARhPGpaHlHshlNVcsuGPllGPGtalslsGshssFpsslphLsGpTl.shEsashsphhhW.................................................................................................................................................................................................................................................................................................. 0 7 12 15 +3391 PF03473 MOSC MOSC domain Aravind L, Anantharaman V anon Aravind L, Anantharaman V Domain The MOSC (MOCO sulfurase C-terminal) domain is a superfamily of beta-strand-rich domains identified in the molybdenum cofactor sulfurase and several other proteins from both prokaryotes and eukaryotes. These MOSC domains contain an absolutely conserved cysteine and occur either as stand-alone forms such as Swiss:P32157, or fused to other domains such as NifS-like catalytic domain in Molybdenum cofactor sulfurase. The MOSC domain is predicted to be a sulfur-carrier domain that receives sulfur abstracted by the pyridoxal phosphate-dependent NifS-like enzymes, on its conserved cysteine, and delivers it for the formation of diverse sulfur-metal clusters. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.44 0.71 -4.68 30 4691 2009-01-15 18:05:59 2003-04-07 12:59:11 12 41 2815 8 1479 3587 1209 130.30 22 44.61 CHANGED pGhss-.........sshphptus.............cpslhlhspcslstlpppls.pt...............ssstFttNlsl.....sGhs................Esphh....cphplGs..shlcVspspp.Chhhshch.........psuph............................hthhhtshh.GhhhpllpsGplpsGDslpl ..............................................t...............th.hphus...............spsl..h..lhsp...ps...h...t...t.....h..pp..c.h.s...tp....................................................shstF..t.tNlsl........................s.G..h.s.................................................Essh.h.............cth...+.l.....Gc..................sll..pVs....p.s..pp...Ch.th.sh.p.h.............ppsph.............................................................................................................................................h..t.h...h...h.s.......h......G..h.h..h....p.....l..l...p..s...G...h....l.psGDtlp................................................... 0 425 840 1217 +3392 PF02722 MOSP_C Major Outer Sheath Protein C-terminal region Bashton m, Bateman A anon Pfam-B_653 (release 5.5) Family This is a family of spirochete major outer sheath protein C-terminal regions. These proteins are present on the bacterial cell surface. In T. denticola the major outer sheath protein (Msp) binds immobilised laminin and fibronectin supporting the hypothesis that Msp mediates the extracellular matrix binding activity of T. denticola [1]. 25.00 25.00 25.00 26.90 24.90 24.20 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.45 0.71 -4.76 8 373 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 20 0 10 327 0 169.80 53 36.12 CHANGED DALL........ohQW+WlSsGsYhATAsuNVFGpplhspshosahDsAuFl+lETK.uGDPhT+LlsGLsuGV-sRlYIPhTatlYhsssu....t................th.sspIsLPVsGKsWsSY+IsLu-auWlKPasslYGsTNRhshss.u...........pphppthshtYcVGlohSPlEKVElcspWEQGpLucsPYhs........Ipcslos...cpa.GTFVCGlKlsW ...................DALLThtYRWhSuGuYFAotuoNVFtsshL.s.p..s.h.s.p.h.DhAAalKLETK..uuDP.TphLpGLDhGV-sRsYhPlpathhh......................................................ssssIphPVhGKsWsSYRhshG-YGWVKsYAslYGuTN+tsss.s.sus..................pphptEYCuhYcsGlshSPhEKh.h..h............................................................................. 0 10 10 10 +3393 PF02707 MOSP_N MOSP; Major Outer Sheath Protein N-terminal region Bashton m, Bateman A anon Pfam-B_653 (release 5.5) Family This is a family of spirochete major outer sheath protein N-terminal regions. These proteins are present on the bacterial cell surface. In T. denticola the major outer sheath protein (Msp) binds immobilised laminin and fibronectin supporting the hypothesis that Msp mediates the extracellular matrix binding activity of T. denticola [1]. 25.00 25.00 36.30 35.50 18.20 20.10 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.22 0.71 -4.74 7 405 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 20 0 10 364 0 167.70 49 36.84 CHANGED laAElplKsLpVshpostsu..........t..as.spppsolEATLHCYGAYhTlGpsPsFhsNFApLWcPalsss.Ycpc..cspYAPGFsGhGGKlGY+ApsluuSGlslDluhLSFuSNGsW-utsos...................HSKYGFGuDhsLsYshtt....pchlplElAuNATLpptYppuspp.ssst..ppsplLWslGuRlTLpPhssF+hshAhDsGs ..............................spl.htsh.hth......................ht.ht.thp.hpATLHhYGAYhTlGpsPshhssFA.LWcPahspt.Yppc..ts.YtPGFtG.GG.KLGY+tpDIuGoGlohDIuF.pFASNssW-upsss.s..s..u.s.................................................................HSKYGhGuDlhhuatRsR....QEhl+VELsGNuTLu..sGYspu..sts..........s.......phstlLWsVGu+lohp.hhGhph.hA......................................... 0 10 10 10 +3394 PF01618 MotA_ExbB MotA/TolQ/ExbB proton channel family Bateman A anon Pfam-B_1099 (release 4.1) Family This family groups together integral membrane proteins that appear to be involved translocation of proteins across a membrane. These proteins are probably proton channels. MotA is an essential component of the flageller motor that uses a proton gradient to generate rotational motion in the flageller [1]. ExbB is part of the TonB-dependent transduction complex. The TonB complex uses the proton gradient across the inner bacterial membrane to transport large molecules across the outer bacterial membrane. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.57 0.71 -4.55 28 9835 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 2969 0 2514 6816 4219 133.80 23 52.20 CHANGED hcthsshtth.thhshpphhthths.....htptthhctlphthppsh.sp.ttpLppshslLuTluusAPalGLlGTVhGIhpsFhslutsup...sshsslusGIupALlATAhGLhVAIPul.lhYNhlsppspthhtchcthtp .......................................................................t........................................tt.h....h....c..t..l.p.t...t.h.c...p.th....pp....tt.p....ph..pp..shsh...L.uslu.u.hu...Ph....lGLhGTVhGl...hpu...hts......l...u...t............tt............ss...h.s.......h....l.......us.......GIupALl....uTshG.lh..sA.l.s.ul..hhashlpp.pspphht.hp....t................................................. 1 796 1613 2101 +3395 PF04006 Mpp10 Mpp10 protein Wood V, Bateman A anon Pfam-B_12513 (release 7.3) Family This family includes proteins related to Mpp10 (M phase phosphoprotein 10). The U3 small nucleolar ribonucleoprotein (snoRNP) is required for three cleavage events that generate the mature 18S rRNA from the pre-rRNA. In Saccharomyces cerevisiae, depletion of Mpp10, a U3 snoRNP-specific protein, halts 18S rRNA production and impairs cleavage at the three U3 snoRNP-dependent sites [3]. 21.40 21.40 23.30 21.40 20.90 20.30 hmmbuild -o /dev/null HMM SEED 600 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -13.28 0.70 -6.15 6 429 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 298 0 297 439 2 435.40 22 88.04 CHANGED plttlpussstFlhspst.usshcslsph.sh.sphp.cstscpsPLsplsh-uhDsEQIWpQLclpsc.lLssl.p.tst..p.hhspccIoshsc.p.p.pp-Dsp-..-.st.ppuDD-p-.c-.Ep......h......s..s.tEsst--.pt..sEss-.pc.-...........-.tt....p...tp.ppKch-spsl-DpFFcL-EhpcaLpptEcc..Ecut.s--cctc-h..D..pchpsD..................Fu.tc.ppsccptslpYcDFFs...............c.hp..pct-.p.pttsscch.cccu-pcp.cpsctD--.-ppps..p..p.......sh.sthc........--.s..tscpsuscphSSaE+cpt+lpp+IcpLEcEsLucKsWpLpGEVsAppRPpNSLLEccL-FD+sA+PsPVITEEsTcoLEDlIKpRIpDpsaDDV.Rps+lsssttch+cphpLscpKSKpSLAElYEpEYh+tsspph........sthspc.sctHpEIpphhssLhhKLDALSsFHFsPKPstsElcIVoNsPAlsMEEVuPlAsSDAthLAPEEIacssKstc...........hocsEhsppDKpRcRRpKKpKpp.................................Kthphstptpphts-tssp.shtps.shhsKht-pssss ............................................................................................................t......................................................................................................la......................................................................................................................................................................................................................t..................................................................................................................................................................................................hpD....tF...hphpphp....t....h...-tt...........t......t.....................t..t.tt...p................c.................................................................................................t...p...t......t..t.........h....h.tDh..ht.......................................................................................................................................t....................t...........t..............t.................................................................................................p....t.......t.......t.....t......t.....t........h...S.s.aE..+pp.c...............h.tpp..I..pplEtt.ltp+.WphpGEsputpRP.NSLL.E....p....s....Lc...F-c.s........s+..................sP.sl..............Tp...E.ho.............p...........lEp........hI+pRIhpp.........taD-.l..+p....................................................t....c.t............h...p.....lsppKup.uLu-lYEp-ah...p.tt....................tpp.p......t+tE..lp..phhppl..cLDuLSshH...ahPKss.......p.h.p.....l..h...s.....s...h.s..s.lshE.-stP.....h...s...h......u....s...s....s......h....lA.PpElht..t.t.t............................hs..ptEhotp..-.+t+.R...pt....tK....p.p..............................................................................t.......t................................................t................................................................ 1 106 169 251 +3396 PF05172 Nup35_RRM MPPN; Nup53/35/40-type RNA recognition motif Guo JH, Coggill P anon Guo JH Domain Members of this family belong to the nucleor pore complex, NPC, the only gateway between the nucleus and the cytoplasm. The NPC consists of several subcomplexes each one of which is made up of multiple copies of several individual Nup, Nic or Sec protein subunits. In yeast, this Nup or nucleoporin subunit is numbered Nup53, Nup40 in Schizo. pombe and in vertebrates as Nup35. This subunit forms part of the inner ring within the membrane and interacts directly with Nup-Ndc1, considered to be an anchor for the NPC in the pore membrane [1]. This region of the Nup is the RNA-recognition region [2]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.46 0.72 -4.26 7 252 2012-10-02 20:46:34 2003-04-07 12:59:11 8 5 191 5 164 260 2 95.70 32 25.66 CHANGED ssspsusp......VhVFGFP.u.ss.lltcFupaGpIlcch..................................................hspsuNWh+lpYps..pAp+ALpcNGhlhssslhlGV..s..tsKplhstp ..................................................s....scpW.....VTVFG..FP..u.sushlLppFupaG..sI..lcch.....................................................hs.ssuNWh+lpYpSchpAp+ALp+.NGpl.hs..sslh.lGVp.s...h.cphht..p................ 0 60 91 135 +3397 PF01188 MR_MLE Mandelate racemase / muconate lactonizing enzyme, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain C-terminal domain is TIM barrel fold, dehydratase-like domain. Manganese is associated with this domain. 22.40 22.40 22.40 22.50 22.30 22.30 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.73 0.72 -3.44 1272 5681 2012-10-02 01:07:48 2003-04-07 12:59:11 16 31 2337 582 1586 7550 3093 69.00 28 18.12 CHANGED tl...pulRcs....hus.th...........t.lhlD.....s...N.........tu..............h..............p.pAlph...hctL.......p.p..........h.....h.alE.pPlss......tsh.......pshtp.l...p....p.ps....lP...l.sss .............................h.ltsl+ct.....hP..-h........p..ltlD.......sN................tu................W.sh..........ppAhph...sctL..........s.s...................l....talE.-Psss........tph.........cshtp.h....p...........c.ts..........slP..lAs............................. 0 468 964 1332 +3398 PF02746 MR_MLE_N Mandelate racemase / muconate lactonizing enzyme, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports fold similarity with enolase N-terminal domain. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.34 0.71 -4.10 9 7573 2012-10-02 11:54:41 2003-04-07 12:59:11 11 23 2340 767 2008 5956 2681 111.20 25 29.06 CHANGED tlpshlsssshs...Ph.huhtohtpt.hVllclp.s-GlsGlGEusshG.......ps.sltshlcsaLuPhLlGpDssplpshhphhh+ushs.....shoAtAAlDhALaDlpu+shshPlscLlG .........................................................................................hhhh..........................................................t....h.....l......llc..l.p...s...-...s....G.......l.....s.....G...h...GE....s....s...s....hs..............................ts.h...t...s...t...l...p...c...........l..t....s.h......l.....l......G......p.......-....s....t...p....l...p.....p..........l.....h.....p.....t...h....h..p...t..h.hh.......................hshsA..h....uAl...D.......hALWDlh........uK.thsh.Plap.LLG....................... 0 558 1207 1629 +3399 PF04152 Mre11_DNA_bind Mer11_DNA_bind; Mre11 DNA-binding presumed domain Wood V, Finn RD anon Pfam-B_3909 (release 7.3); Domain The Mre11 complex is a multi-subunit nuclease that is composed of Mre11, Rad50 and Nbs1/Xrs2, and is involved in checkpoint signalling and DNA replication [1]. Mre11 has an intrinsic DNA-binding activity that is stimulated by Rad50 on its own or in combination with Nbs1 [2]. 25.00 25.00 27.40 25.20 23.20 24.80 hmmbuild --amino -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.04 0.71 -4.41 56 403 2009-09-10 20:52:09 2003-04-07 12:59:11 9 9 297 12 239 389 1 164.80 32 24.61 CHANGED aphpsIPL+TVRPFlhc-llLpcps.........pssscpclp.paLhp............p.V-phIccAppph.................t....tttcss..LPLlRLRV-Yous.................aps.NPpRFup+.FVG+VANss......DllpFa...+.+Kptpp................pptp.tptthht.pphsplclcsLVp-aL....ssppLslLscsshucAVppF ....................aphp.IPLcTVRPFhhc-ll.Lsccs................................psps.pppl..p..p.alhp............t.V-.phI-cAppph...........................................s.p.pps...lPLlRLRV-Yous.................apshNspRFup+..Fls.+VANsp......DllpFh..+++cpppt..............t............tttt.tpt.h....tt.pp.h.sshcVEsL...Vpcah....ps.pLplLspp.shscAlppF.................................................. 0 81 132 200 +3400 PF04085 MreC rod shape-determining protein MreC TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family MreC (murein formation C) is involved in the rod shape determination in E. coli, and more generally in cell shape determination of bacteria whether or not they are rod-shaped. 28.00 28.00 31.50 30.70 27.50 27.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.64 0.71 -4.79 20 3591 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 3553 5 739 2380 2076 151.70 29 50.93 CHANGED pVlsh.ssssasppllIs..pGppcGl.....pps.sVls....spG.LlGpV..spVsptoS+VhLLoDssp...plslplp..+ss.....................................................hp.GIlsGpsspp........tLplphls..sss-IchGDhlVTSGLG......GhaPsGl.VGpVsplctcsttht..t.htlcPssclpcLcaVhllh.s ..............................................................Vlsp.sss..s.appplsIc..+G..sps..G......l........tps.sVls..........spG.....lVGpV..s.pVsphoS..pVh.Ll.s..c.ssp......tlslplt...+ss........................................................................hp..ullp....G..p...s..s..ps...................Lplppls..........ssscl..p..h..G.......Dh..lV.T.S.G.L....G.........uh..aPpGlsVGpVspV.pp...c.s.tthh.....pplp.lcPsA.shpcl.chlhllh..s.......................... 0 258 496 635 +3401 PF04093 MreD rod shape-determining protein MreD Finn RD anon manual; Family MreD (murein formation D) is involved in the rod shape determination in E. coli, and more generally in cell shape determination of bacteria whether or not they are rod-shaped. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.02 0.71 -4.38 14 2682 2012-10-03 02:46:00 2003-04-07 12:59:11 7 2 2669 0 496 1464 599 156.00 23 93.18 CHANGED pRhhhtallhls.hllullhth.hsh.ss....hhhhtPpaLhLhLlahslthsppsulhhuhlhGllaDlhhsullGlpshhhsllsallshhhthl+h......hshhhtshl.....l.h...hshllthllths.h.hlh.shap..Pph..lhshlluhlLh.hlhhLhptl ..................................h....h.hll.hl....hl...l.s..hl.lp......h...h......s..h..hs......hhhht.P...pa...l......ll.h...l.l...a.h......s.l.......t.......h...s...c...p...s..s....l......h.hua..lhGllhD.l.h..........h..u..u..........h..........l.G.lpslshsllsall.s.h.h...h....p.h.h..hp...........ls...l..h..h...ts..ll........lhl.......hs.hh...l...t...h..l.....l.h..h..s..p...h...........h.h...h.....l.......s.......h...p.........P.p.h....lhs.h..ll.t.slL.ashlhhhh...h...................................................................... 0 156 313 416 +3402 PF03919 mRNA_cap_C mRNA capping enzyme, C-terminal domain Finn RD, Bateman A anon Sarah Teichmann Domain \N 26.50 26.50 27.70 27.30 26.40 26.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.68 0.72 -3.71 11 377 2012-10-03 20:18:02 2003-04-07 12:59:11 10 15 304 18 250 369 89 107.30 30 20.63 CHANGED phNoVDFhLchsct....t.shL....pphGhlpah...........tt.s.....................+hssp.pphss+IlECpa..-p..psp...WhahRhRsDKopPNshsTscsVlpoIpssVTcEhLlc ..................................................................NolDFplclph....................ps..l.........p.s....hh......ah.......................................................tthp.......h........................t..phpp...p...hs...s+..IlECph...........-p....psp...................Wp.......hh.......RhRsDKspsNphsTsppVhpSIp-sVoc-tLl....... 0 96 147 211 +3403 PF01331 mRNA_cap_enzyme mRNA capping enzyme, catalytic domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family represents the ATP binding catalytic domain of the mRNA capping enzyme. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.36 0.71 -4.61 12 521 2012-10-02 00:43:09 2003-04-07 12:59:11 14 25 354 18 336 652 333 173.70 27 35.15 CHANGED pPlShs+.cthchLpppsYhVs.KsDGhRhhhhlsps........sahlDRc.phahlpthpaPhph..........hhphTLLDGEhllDhhtt.....tphRYLlaDhlshsGps.lsppshs..pRhphlp+clhpPpstth..psthhphc..PFtlphKshh.h.tsp+.h..htphhtplsHcsDGLIFpsscsPYssG.pspslLKWK ...................................................................pPVShs....p....pphp..L...pp..sYhVs..K....sDGhRhhhh..lstp...............tsahlD.R...p...ps..hhh.l.p.....hpaPh...........p......................t.h..hp...sTLlDG....E.h..l.h...Dph.s........................ttp.+.....aL.laDhlhh........s...............ups....l...hp...pshp.............pRlthlpcplhp..Pht....ph.....h...............p.....p................h.....hp.........tt.......................p....s.....F...........p......l.ph.Kshh.hh...thpp.......h..............ht.phh...............pl....H..t.sDGLIFp...s..h..p....t...sYh.G...psp.p.....lLKWK......................................................................................... 0 145 207 288 +3404 PF02940 mRNA_triPase mRNA capping enzyme, beta chain Griffiths-Jones SR anon Structural domain Domain The beta chain of mRNA capping enzyme has triphosphatase activity. The function of the capping enzyme also depends on the guanylyltransferase activity conferred by the alpha chain (see Pfam:PF01331) 20.40 20.40 21.00 21.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.62 0.70 -4.60 24 259 2012-10-01 23:11:28 2003-04-07 12:59:11 10 13 212 14 161 256 64 210.30 23 42.20 CHANGED .cshs+sls-alht................lpspshpplElEhKlGhlhctp....sspRhphsl.opslhspp.........pspFpsslscspapphpcaLpphs..............................................................t.t..hh.hshhcspppDphYpht...................tp.splRloh...Dsp...sschhtt...IcKcRlssl.lasPpssa..........................................DhRlSlslEhshs.s.......stsssps.thpRpKcRhSYh.+ss..h+hDLT+V..........s..tpscspppaElElEl ...............................................................................h...ltphhh..h..............h..t.....lElEh+hGhlhstp.........ptpRh..th..s...h.........s.tslhppt................thtFpsshs.....tttapthpphLpphs.....................................................................................................................h.hshh.cppppDphaphs......................tctsplRlop....Dtp........ssphhtt....ItKp+ls..slpl.a.Pp.s.s..h.........................................................DhRlSl..slEhshsts..................thtpp..s.t..hpRpKcRhSYp.pts..........hplDlTpV......................t...ttstptphpaElElEl........................................... 0 60 97 142 +3406 PF02349 MSG Major surface glycoprotein Bashton M, Bateman A anon Pfam-B_864 (release 5.2) Family This is a novel repeat in Pneumocystis carinii Major surface glycoprotein (MSG) some members of the alignment have up to nine repeats of this family, the repeats containing several conserved cysteines. The MSG of P. carinii is an important protein in host-pathogen interactions [2]. Surface glycoprotein A Swiss:O59920 from Pneumocystis carinii is a main target for the host immune system, this protein is implicated in the attachment of Pneumocystis carinii to the host alveolar epithelial cells, alveolar macrophages, host surfactant and possibly accounts in part for the hypoxia seen in Pneumocystis carinii pneumonia (PCP) [1]. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -11.10 0.72 -4.01 205 505 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 3 0 0 514 0 90.30 24 46.08 CHANGED pphht-ppChp.htptC....Lpps.........Cp.php......................Cpp....l+cpC...p.phppctp..................................................hpphhhpph+s.....plpsc....pcCpctL.pc.C.thpp..sp ........................................p....h-ccCtc.htpcChh.Lpps..st..phptp..............CppL+ppCtphthpph.....................................................................hpchLhcthcs......shpsc......pcCpctL.pchCspLpc.tp.t.................. 0 0 0 0 +3407 PF04066 MrpF_PhaF Multiple resistance and pH regulation protein F (MrpF / PhaF) Kerrison ND, Finn RD anon COG2212 Family Members of the PhaF / MrpF family are predicted to be an integral membrane proteins with three transmembrane regions, involved in regulation of pH. PhaF is part of a potassium efflux system involved in pH regulation.\ It is also involved in symbiosis in Rhizobium meliloti [1]. MrpF is part of a Na+/H+ antiporter complex, also involved in pH homeostasis. MrpF is thought to be an efflux system for Na+ and cholate [2]. The Mrp system in Bacilli may also have primary energisation capacities [3]. 21.30 21.30 21.30 21.30 21.00 20.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.36 0.72 -3.69 16 1647 2009-09-11 16:03:59 2003-04-07 12:59:11 8 2 1250 0 488 1048 205 54.60 31 57.36 CHANGED hsDRVlALDslsstllullsllulhhpphhhlDlAlllulLuFluTlAhA+altt ........................sDRllALDslsh.shullslhulh....h.s.s.s.hal-shlllAlluFluTluhu+Fl......... 2 147 300 407 +3408 PF04471 Mrr_cat Restriction endonuclease Waterfield DI, Finn RD anon COG1715 Family Prokaryotic family found in type II restriction enzymes containing the hallmark (D/E)-(D/E)XK active site. Presence of catalytic residues implicates this region in the enzymatic cleavage of DNA [1,2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.69 0.71 -4.28 227 2560 2012-10-11 20:44:43 2003-04-07 12:59:11 7 96 1770 1 790 2681 416 113.70 20 32.61 CHANGED l.p......p...hs.stpFEclltp....lhpp.h..Gap.......s..p......h.st.t.us.Ds...G..lDl...l......h...p.p.s....u......p.............p..h..hlQsK+a.....p........s..p...luh.pt.lpp.h.hu.s.h.p..t......p.p....us......p......GlhlT.o.us...Fo...psA....p...ph.A....p..p.....p....p........lpLl..DupcLhchl ...............................................................h...hss.tFEphltp....lhp.....p...h...Gap...................s.p.............h.st...t..ss...D.t.....G....l...Dh..l................h...pp...s.t....t.......p..................................ph...hlQ.sKca........p..................s..p..........lu.t....tt....l.p..p...h.hu...s.h.t..t................h.t.......ss................p..........................ulhl..T..s.us.....Fo....p...sA....p....ph.A....pp.......p....t..............lhLl..stppLhph............................................................ 1 266 525 668 +3409 PF00924 MS_channel UPF0003; Mechanosensitive ion channel Bateman A, Martinac B anon Pfam-B_1136 (release 3.0) Family Two members of this protein family: Swiss:Q57634 and Swiss:Q58543 of M. jannaschii have been functionally characterised. Both proteins form mechanosensitive (MS) ion channels upon reconstitution into liposomes and functional examination by the patch-clamp technique. Therefore this family are likely to also be MS channel proteins. 20.70 20.70 20.70 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -10.89 0.70 -5.00 37 13351 2009-09-13 17:39:23 2003-04-07 12:59:11 13 28 4579 14 3447 9838 4024 200.60 21 41.76 CHANGED tplhphllhslshlhsLshlGhs...hsullushGhhGluluhuhQshlsslluGlhllh-+.shclGDhlpls...............shpGtVpclslpsTpl+sh-sphlhlPNsplhsppltNaop...pstpRlphslslsasos...hcplhchltchhtppstlhp................................................tshlhhsphussulshplpsasps.......tchhslppplthc.........lppthccpsIph ......................................................t..lhph.h.l.h.h.l.s...l...l..h..h..l.....s..h..l...G.ls........hs..s........l...l........s....s....h....G........s....h.u..l....u....l....G....h....u.h...Q..shlss.hl........uG....l....h.l.l.h.p.c..shclGDh.lp.ls..............................................sh.p..G..s....V..p..cI.....s.l.p.........s.........T.p.........l............p.........s...........h...........D.........s.....p.....h.....l..h.lPNstlh.s.t..s.l..t..Naot....................shtc....h..t..h.s..l...s.l.s......h.....s..s.s...............h.p........p..h.h...p.h........l.....h.....p........h....h...t...p...p....s..t..l.hp...............................................................................p....t..........l..h....h...t...t...h...s...t..s...s...h...s...h...t...lt..h..a..sps..................t.p..h...h.t.....h..ttplhtp.........lhphh.pptslp.................................................................................................................................... 1 1007 2127 2858 +3410 PF00985 MSA_2 Merozoite Surface Antigen 2 (MSA-2) family Finn RD, Bateman A anon Pfam-B_1052 (release 3.0) Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.00 0.71 -4.12 5 652 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 19 0 1 531 0 103.60 55 57.25 CHANGED TTTTTTTNDAEASTSTSSEN.NHpNAETNPKGcGEVQpPNQANKETQNNSNVQQDSQTKSNVPcTQDADTKSPTAQPEQAENSAPTAEQTESPELQSAPENKGTGQHGHMHGSRNNHPQNTSDSQKECTDGNKENCGAATSLLNNSSNIASINKFVVLISATLVLSFAIFI .........................osoToTTNsuEupToTso..............sA-Ts......spscu.ps.p..Ps.....sspEopssuN..s.pcopsKup..s..pQst.scSspttPc...............APpt.QTtpsE..usP.-N.................................................................................... 0 1 1 1 +3411 PF01741 MscL Large-conductance mechanosensitive channel, MscL Bateman A anon [1] Domain \N 22.40 22.40 22.60 22.60 22.30 22.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.71 0.71 -3.99 8 3477 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 3349 6 744 2091 328 128.40 42 92.61 CHANGED hlKEFKEFAhRGNVVDLAVGVlIGuAFGKIVoSLVsDIIMPllGhLlGulDFu.sluh.h....G...........hsusslsYGlFIQslINFlIIAFAI.FlsIKsIN+L....++KcEs....ps.A.s-....sppsLLTEIRDLLKpp ...........................hlKEFK-Fh.hRGNVlDLAVGV..IIGuAFupIVoSLV...s...DIIMP.l.l.Gh.l..l....G..u..l....D.Fs...sht.h.h.........st...............................h.s...s..s.slpY...GsFIps.l.....lsF....lIl.A....Fsl...F.h..h...l.....K....h....l....N..+...l.................p..+..+...c.c..tt......t...t.s.....s....ss...........pp..LLsEIRDLLcp.............................................. 0 218 475 634 +3412 PF01716 MSP Manganese-stabilising protein / photosystem II polypeptide Bashton M, Bateman A anon Pfam-B_1814 (release 4.1) Family This family consists of the 33 KDa photosystem II polypeptide from the oxygen evolving complex (OEC) of plants and cyanobacteria. The protein is also known as the manganese-stabilising protein as it is associated with the manganese complex of the OEC and may provide the ligands for the complex [1]. 25.00 25.00 37.30 36.80 20.40 19.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.58 0.70 -5.31 8 265 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 197 17 67 285 155 210.30 51 79.74 CHANGED -hQsLTY--I+uTGLANpCPsLs-ssRGolsl-uGppYtlscLCLEPosFhVKEEPsNKRQcAEFVssKLlTRhTooLDQIpGclpVsuDGSlTFpEKDGIDFQAlTVQLPGGERVPFLFTIKNLVApTpPshoSlsoShDFcG-FpVPSYRGAsFLDPKGRGluTGYDN..AVALPusuDcE-Ls+pNVKpsshuKGpISLpVuKVDusTGEIAGVFES.QPSDTDLGAKEP+DVKlpGlFYApl- .....................pshTY.plKGTGhANpCPslssu...ts.sh.slcsGp.YphpchClEPToFtVKt....E.s.hs..K.pt...t....s..-..F.pTKLhT.R.hTYTLDp........lpGshpVssDGslpFp.E.cDG..IDaAslTVQLPGGERVPFLFTlKpLsApupsss...................FuGpF...hVPSYRGusFLDPKGRGssTGYDpA...VA.L.P..A.....t..u....D.....p-....-L.+EN.K..ph..ts..spGplshpls+scspTGElhGlFpS.QPSDTDhGuK..Ph-VKlpGlaYupl............... 0 21 48 60 +3413 PF03429 MSP1b Major surface protein 1B Finn RD anon Pfam-B_4414 (release 6.6) Family The major surface protein (MSP1) of the cattle pathogen Anaplasma is a heterodimer comprised of MSP1a and MSP1b. This family is the MSP1b chain. There MSP1 proteins are putative adhesins for bovine erythrocytes. 25.00 25.00 231.30 62.20 20.90 22.80 hmmbuild -o /dev/null HMM SEED 726 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.50 0.70 -13.29 0.70 -6.64 5 35 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 4 0 10 61 0 481.30 64 97.78 CHANGED MTEDDKQQQQNQSNVVQAISAVFQRKSAELQRLNDFIKGADGTLKNVHPHMKSLEALSKQLSEKIAAEAAAKADAKYESVGLRAKAAAALGNLGRLVARGKLKSSDAPKDLDQSIDALPFMDEAPDTGERVVVPAGEEQEFGKAAAWGLAGFKRTVDESLEMLGRGMNMLAEGQAQISQGIADKSTALVREGLETSRLGAGLCRNGLVEASYGVGYANETMGKYAGKGLEKCKNKLGDACYKWSKALEEIENLRTAIDAKAEQQVEGEAWSPEGVSANTFYRGLHKIGAAIAVAAQATWEGLAMTGKFMGAVAKLAGAVSMCVAAYTAAIVGMAAATPATLLLTAMDNQS.VNNAVVKVSEYLHSNVEQATKDLMASEFAMMTFGGIMTCAKLMKGSFAAINQKFEEINATLTREATDIAQGIKETYKSIGDAF..................KSANDGIAKWT.....AALAGYASVEQLEEAKEADRAQAEQRAEAQAMTERVAGERAATVAAGTETIKTIVS..................DMCNELAQIGGLSQAERDALVQSFTPKPPARTTKEIVSQMCNSVKSAFGSISHITNVIRQAGKDAQKIDPQVEVAEISPETIYAMSEALYALNMQESAsINNALLAAVNDSSKDDQAIVTDLINATIEVCTEQTNTLAGHTAEVQAGLEAAGIKLDDAQGLQEATPEA.KGVEGINPEELEQAAEGLATAVNEASADGKIQSLNQQETQIAQGGQHAAQQQSSGWSR ...............................VhQthpAtht+.NthlcthDsh.K.h+.pMpsL-ALpppLppKhA..tEsssKhsthhtSsuLRAKAsA................................M-ps..PsshE.h.s.hGEE.pEFG+AsAaGLsGhK+TlDEulEMLsRGMpMluEGQAplupGltsKsstlV+tGLEhStLGsuhChpuLs-hoaGlthspc.sMGKhAGKGL-KC+pKLtsAs.KW.pAhpEl-sLRTAI.....-t......tAtppsEGEAWSPcGVpuNsFY+uLppIGttIAsAAQATWEGLAMTGKFMGAVAKLAGAVSMCVAAYTAAIlshh...hPAsl.LshMsspS.lsphVs+sucYLHSNVEQATKDLMASEFAMMTFGGIMTCAKLMKsSFAAlNQKFE.ElNATLs.RcuTDIsQGlKEsYpSIGDAF..................KSsNsGIAKWT.....AAlAGYASVEQLEEAKtADRsQA-QpAEtQAMscpVAttRAATVAAGTtTIKTIVS..................DMCNELAQIsuhSQAthsA...........................................................................tEuthlpsALLtAVNDoSKDDQAIVTsLINAsIEVC..TcQTNTLAGHTAEVQttLEAAGl+h-DAp....ss.pt.KGhEGIN.EELtQAA................................................................................................................................................................................ 0 0 0 10 +3414 PF04421 Mss4 Mss4 protein Bateman A, Wood V anon Wood V Domain \N 21.20 21.20 27.90 21.50 19.50 19.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.38 0.72 -4.15 5 159 2012-10-01 21:11:27 2003-04-07 12:59:11 8 4 136 5 109 153 0 95.40 32 61.01 CHANGED sscsllNpcc.shV-hPLhsp+pcR.......t...........Dssss-.lc-FaLVKDM.....FsFENVGFS+slc.shKaLVCADCE+GPlGaaDhsscpsa.luLERVsHp .........................................................................................................h...httt...............t..................sssss-h....lptaWhVp.DM.....asFENlGFo+s.................Vs.....s.....h..............KaLlCADCEhGPIGapsh.s...s+psa.lAhcRV........................ 0 38 53 90 +3415 PF03940 MSSP Male specific sperm protein Finn RD anon DOMO_DM01786 Family This family of drosophila proteins are typified by the repetitive motif C-G-P. 21.20 21.20 21.50 21.80 19.90 21.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.52 0.72 -10.77 0.72 -4.57 3 8 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 6 0 4 8 1 45.20 74 47.76 CHANGED CGGPC..CGPCGG..CG....PCG.G.CGPC..CGPCGPC...CGPCGPCGPCCGsscsaCGC .ssGPCCGPCGG..CGPCGG.CGPCCGGCGPpCGPCGuCGPCCGsspsaCGC........ 0 3 3 4 +3416 PF05063 MT-A70 MT-A70 Finn RD anon Pfam-B_3025 (release 7.7) Family MT-A70 is the S-adenosylmethionine-binding subunit of human mRNA:m6A methyl-transferase (MTase), an enzyme that sequence-specifically methylates adenines in pre-mRNAs. 21.50 21.50 21.80 21.80 21.40 20.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.38 0.71 -4.71 10 995 2012-10-10 17:06:42 2003-04-07 12:59:11 9 19 544 0 535 970 231 158.10 28 43.04 CHANGED FslIlhDPPWc.+hhtt.t.....plsYsshsp--lpsLslscLtsc.psllFLWsTspthhps...+psLcpWGachlsc.lsWlKsNphscslt..shpssphhp+spEHCLlGlKGsspp.hst..hptth.....spslIlu...hctpS+KPsElatllE+ls.....stc.+LELFuRs..l+PGWholGsp ......................................................aslIhhDPPWp.........t.............................t..Y.ss....h.....s...........p-.....lt.pL..s...l...p.l..s.....sp....sshlalWss..s..tt..h..ts...........hchl..p....t....W..G.....a....ch.h....sp..hhWlKhs.....p.t..t................p....hh..................................th...........st......h...hp..pspEphLhu.h.+.......Gss.p....t.....t..........................t..................pppllhu................http.....S........+..K............Pst.....hhph..l-phh......................ssht..+lELF.u..Rp.........hpsGWh.shGsp............................................. 1 194 302 434 +3418 PF01993 MTD methylene-5,6,7,8-tetrahydromethanopterin dehydrogenase Enright A, Ouzounis C, Bateman A anon Enright A Family This enzyme family is involved in formation of methane from carbon dioxide EC:1.5.99.9. The enzyme requires coenzyme F420 [1]. 25.00 25.00 142.50 142.40 19.50 19.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.46 0.70 -5.36 4 55 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 54 54 41 57 4 271.60 56 99.53 CHANGED VVKlGlIKCGNIGTS.llDhLLDERADRcDI-VRVVGSGAKMsPEplE...csshchlcEh-PDFlIaluPNPAAPGPpKAREhLutushPAlIIGDAPGL+VKDEhEEQGLGYIllKsDsMIGARREFLDPsEMAlFNADVlKVLAuTGAaRlVQEAIDchI-clKtGK..psELPplVIsppKAVEAtcFoNPYAKAKAMAAapIAEKVADlDV+GCFhppDsE+YIPIVASAHEMhRhAAcLADEARElEKuNDsVhRsPHu.-GKhLSK+pLMtKPE .VVKIGIlKhGNIGhSsllDLlLDERADRpDIsVRVlGSGAKMsPEplE...cssschlp-h..cPDFlIhISPNsusPGPppARElLtttslPsllIuD.uPuhK.s.K..D..t...hccpGhGYIIlKsDPMIGARREFLDPsEMAhFNuDllKVLAsTGAlRlVQptlDcsIcslctGK...-lcLP+IVlos-KAVEtupFsNPYAKAKAhAAaphAEKVAslDVKGCFMsK-hEcYIPlVASAHEhhRhAAcLsDEARElEKusDuVhRpPHupcGclLsKscLhpKPE.... 0 9 28 35 +3419 PF02536 mTERF mTERF Bashton M, Bateman A anon Pfam-B_1422 (release 5.4) Family This family contains one sequence of known function Human mitochondrial transcription termination factor (mTERF) the rest of the family consists of hypothetical proteins none of which have any functional information. mTERF is a multizipper protein possessing three putative leucine zippers one of which is bipartite. The protein binds DNA as a monomer [1]. The leucine zippers are not implicated in a dimerisation role as in other leucine zippers [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.04 0.70 -5.86 15 1485 2009-01-15 18:05:59 2003-04-07 12:59:11 9 25 144 8 968 1433 16 187.20 13 64.71 CHANGED LsLh+ShtFosSpISoI......IpsYPplLlscscpoLssKLpaLpS+GASpS-lTclVSsVPcILupcth+olushaDal+cllhs..DpuSphE+hspsLspuspp.sh.pNlulLR-LGlsp+lLhsLLlSchpPVsG+......cph-tplccllEhGhDPsss+hVhuhp..llhphsDKslctpVshh+sLGFslsDVhslhp+tPphLshSpcp...h....................K........t.p.llsoIEphlu..........LGFSR-EhthMV+RaPtslshSsEp.VK+KhEFlVKcMshPl+slVphPpVhuYSLEKRhhPRssll+sLhSKG.......cLsslSpsLssTDppFLp ..........................................................................................................................................................................................h..............................................................................................................................................................................................................................................................................................................................h.............h..h.h...t......h.t..........t.......h........h...........h.................hh..............................................................................................................................h.........tht.hh..p.....................................hG.hs.tppl.h...t...h..lhp.h.P..ll...t.h.............s.cp..lp.php.ah........ht...........h......th.............t.........l....h....t.........P..hh..ht...h.......................................................................................................................................................... 0 188 543 757 +3420 PF02219 MTHFR Methylenetetrahydrofolate reductase Bateman A, Mian N anon Pfam-B_2407 (release 5.2) Domain This family includes the 5,10-methylenetetrahydrofolate reductase EC:1.7.99.5 from bacteria and methylenetetrahydrofolate reductase EC: 1.5.1.20 from eukaryotes. The structure for this domain is known [1] to be a TIM barrel. 19.90 19.90 20.00 19.90 19.70 19.80 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.62 0.70 -5.32 12 4458 2012-10-01 19:29:00 2003-04-07 12:59:11 12 9 3842 40 1248 3279 1590 271.20 29 75.03 CHANGED plpphhp..puchhhSaEFFPPKTspG.pNLhsphcRhptht.PhFlsVTaGAuG.....spt.chohslspthppppsl-shhHLTCsshshttlcptLcphhphGlRNILALRGDsPtstc..atp.ptt..hpYAhDLV+hI+p....caGDhFsIuVAuYP..EsHP......psts...hptDlhaLKcKl-AGADFlITQhFa-s-sal+FhspspstGh....shPIlPGIMPIpsacphp+hsphs.pspIPpplhsplp.sl+sD-psl+plGlchth-hspcLlspG...V.slHFYTLNhEcushtIlcpLG ..............................................................................................................t..........tp.slShEhaPP+......s......t....p...h..t......p..pl...h.p.s.l.c.p..Lpt....hp...P..cFl.oVT.auus.u..........................s.p.c.pp.Thphs...p.tlp...p........c...h....ul........p..shsHLT.Ch.s.t.s.t.....p..clpphlts.h.t.p.hG.l.c.pIlA......LRG..D....s.t...s......t...........................s...t....s.....s..............h.p..a.A..s-.L......l...p.h.l+.......................phu..c..............F.......s.......I.u....l...A...u.....Y..P.........Eh.HP....................................cu.p.s......hps..Dl...t..p..L+c.K....l....-....A......G.....A....s.....h.....hI.TQhF.FD.......s....-....p....ah....cF...h...-........c....s....tt.t.Gl.........s.l.P.I...lsG.....I..h...P.l.s.......s.h..p.p.ht..+..h.s..p.hs..ss.c..lP..ph..hhphh-....t................h.........c....s......D....s......p.........s....hc...t.h...G.h..p..h...u...h-...h...h...p...pL......h....p...p.G............l.......shHhY.T.hNpsphshtlhp.................................................................................. 0 432 804 1058 +3421 PF05068 MtlR Mannitol repressor Bateman A anon COG3722 Family The mannitol operon of Escherichia coli, encoding the mannitol-specific enzyme II of the phosphotransferase system (MtlA) and mannitol phosphate dehydrogenase (MtlD) contains an additional downstream open reading frame which encodes the mannitol repressor (MtlR). 21.50 21.50 21.50 21.90 21.00 21.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.03 0.71 -4.82 4 1088 2009-09-11 14:05:50 2003-04-07 12:59:11 7 2 679 8 70 326 4 169.90 49 95.31 CHANGED Msshs.......................EsclLE+LNtscolRuFhhTuVslLsEAlctLl.plF.RKD-aAVK.sV-PLLssoGPLsDLoVRLKLlaGLGlIs+plapDIpHhhpl+cpLNcDspEYsFsDs.Ils.IppLsslschuhL...shct.-ssD.SlYphphtRhpphl+SsLoLAlTplhppLsh ....................................................hp...EscVLEpLsAscslpSFhhTAsplLspulphLl.plF.+cD-aAVcY.AVcPLLs..tsG.PLuDloVRLKLIYuLGlls+t.YpDhphhhtl+ctLNc-ss-huFsDDtIls.hup..L.p..sl..s..s.s.........s...t.h.c..ADhulauh..tRapphV+oshsLulTpllpclo.h.................................... 0 6 21 46 +3422 PF03083 MtN3_slv Sugar efflux transporter for intercellular exchange Bateman A anon Pfam-B_623 (release 6.4) Family This family includes proteins such as drosophila saliva [1], MtN3 involved in root nodule development [3] and a protein involved in activation and expression of recombination activation genes (RAGs) [2]. Although the molecular function of these proteins is unknown, they are almost certainly transmembrane proteins. This family contains a region of two transmembrane helices that is found in two copies in most members of the family. This family also contains specific sugar efflux transporters that are essential for the maintenance of animal blood glucose levels, plant nectar production, and plant seed and pollen development. In many organisims it meditaes gluose transport; in Arabidopsis it is necessary for pollen viability; and two of the rice homologues are specifically exploited by bacterial pathogens for virulence by means of direct binding of a bacterial effector to the SWEET promoter [4]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.83 0.72 -4.11 25 2182 2012-10-03 12:15:12 2003-04-07 12:59:11 11 19 651 0 1265 2047 467 83.20 24 68.44 CHANGED .hlGhlsssholshFhuPLush.tpll+pKSs-shshh.hlsshlsushWhhYGlhhpDh..hlhhsNslGshltsl.llLalhYs.ccp ...........................h.hlGhlushhulh.hahu..s...........l..........sh..hhp..lhp.....s.+...........os.p.t.hs.h.h.....lsshlsshlW...hhYG...l.....h.....p.....p...Dh....hlhhsN.s..h.......Gh.l.huh..lhhhhahha.....t........................... 0 350 759 1105 +3423 PF03821 Mtp Golgi 4-transmembrane spanning transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.40 22.40 22.60 22.80 21.40 22.00 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.65 0.70 -4.97 3 234 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 71 0 98 196 0 190.60 37 71.58 CHANGED HlVMSVLLFIEHoVEVAHGKuSC+h.pMsYLRhADLlSSFLLIssLFIISlSLLIGVVKNREKYLlPFLSLQIMDaLLCLLTLLGSYIELPAYLKLA.RsRsuuSKlPLMTLQLLDFCLSILTLCSSYMEVPTYLNFKSMNHMNYLPSQEDlPHsQFIsMMlIFSVAFITVLIFKVYMFKCVWsCYKaIKsMNSsEEcssSKMhp..KVVLPSYEEALSLPsKTPEG-PAPPPYSE ..................................................................................................................................................hhs..shshh.hslshLhhhhsthhsYGshp..........p......p.suallP.FFChQLFDFsLosLsAhSsls...YlPslpcalsp..........h.Pa+-clhphss.....sLhl...Illlhahhllhh....K..uYhIsCVWsCY+Yl..ps+..N.ss-.........h..s.....sth.t.........phl...L.Ps..Y-.Ah....p..ttp.ssPP.hs....................................................... 1 15 21 45 +3424 PF04208 MtrA Tetrahydromethanopterin S-methyltransferase, subunit A TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 25.00 25.00 27.10 25.70 24.80 23.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -4.82 6 110 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 65 0 76 111 15 167.80 46 71.55 CHANGED K+cPAEGWPlVpGDYlVGDPESsVuVVTLGSHlp.csslcAG....AAIuGPCKTENLGIEKVlANlISNPNIRFlllCGuEVpGHITGQohcALHpNGVD.-st.IIGApGAIPYlENlscEAVERFp.pQVElVDLIDlEDh-pIsptl+EClpKDPGAh-E-PhllclpE..GtcEEEEss ...........KKtPAtGWPllpG-Yhl..GsPcSsVAVlTLGS...........Hh.....ps.slc....A....G.......................AAIsGsC+TENLGIEKllANlISNPNIRFlllsGsEVp...GHloGQslhALacNGl...sc...-G...+...IlGApGAIPFlENlsp-ulcRFQ.ppV.EllDlI-sEDhutIpstIc-shuKDP.GAhtt.-shllclpt..tttt......st....................... 0 17 47 62 +3425 PF04211 MtrC Tetrahydromethanopterin S-methyltransferase, subunit C TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 25.50 25.50 25.60 89.50 23.40 25.40 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.61 0.70 -5.34 4 55 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 53 0 40 55 5 258.60 39 98.46 CHANGED MSs.uuGGcAtt.....uhP.pplMAlGIlsGLlGhYhush.ss..ltslhuuLuAlsAsVWGADAVRRVAuYGLGTGVPSIGhhuLGhGhlAAlhGlulsus.slPhhAAPIlullluAllGsllGsLsp+lltMKIPIME+shsEISsAGsLulLGhosAlAGSashpuVlshVlAsGhIALlFIlsuMuILHPFNACLGPsEsQcRTLhLAspsGhIshhlA........GLhshsls.....shLlGhlhWhlsFhKFhphoh+DAsuVlaoGhlPKpE ....................huh.usuu.Att.....hhPpsplhslGlluuLlGIYlu..thhss.....lh....shlGGLuAlsAhVhGAsslR+VAuYGLGTGVPSIGMluLGhGlluulhGltlush.......................s....h........s........uP......Il.......ulllAhllGhllGhLus..pslsMKIPlM.puhscLuhAGALulLGhosshu.Guash..s...............sll...............s...............sslssGhIAlhFIhuuhAILHPFNACLGPsEspcRTLtLAltsGhluhllh.......................ulhshshl..........sllluhlhWhlsatpalphohcDAssVhhss.lPct............. 0 9 26 34 +3426 PF04207 MtrD Tetrahydromethanopterin S-methyltransferase, subunit D TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 25.00 25.00 58.50 58.50 21.90 21.60 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.59 0.70 -4.96 4 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 53 0 40 55 5 213.10 54 91.73 CHANGED MDhll...lhhhhITlGGllIssuVHFVPVGGAPAAMATATGVGTGTsQLAAGAGLTGLloAAsMs......spshhlIhhuGAVGuMlMlulTMLVGslIYVYGVGsVPsSAKVclDPITthcQ-hYVTPGTEGHGlPTVsFVSGIIGGuLGGIGGuLlYaALhclhhs.u...huss..........VAuIhAlGhFFlNAVlASYNIGGTIEGFHDPKFK+hP+uVVuSLVASI .................................llslhtIsIGGslIuhuVHFVPVGGAPAAMApuTGlGTGTs.LAAGAGhTGLluAAshst..............................s.shhllhhoGAVGuMlMhulTMllGshIYVaGlGlsPAuuKsphDPITtDpQc.YloPGTpGHGlPTVsFVSGlIGuhLGGlGGuLlYhuLhp.lsh.s.....s...........s...........l..AulhAlGhFFlNAVlASYNIGGTIEGFHDPKF.K.+hPpulluuhlASl.................. 0 9 26 34 +3427 PF04206 MtrE Tetrahydromethanopterin S-methyltransferase, subunit E TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 20.20 20.20 20.30 131.00 19.50 19.60 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.92 0.70 -5.38 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 55 0 38 57 5 270.60 54 91.35 CHANGED LGlLALhGAuATIAGAuEDLESDVGSQSNPNSQVQLAPQMGNlHRaFNKAISGEPVSYuLaCuIAGoVAaVlMpphsLPslhALAlGAuIAAhVHssYAlTAaLGRluS.uupFsQPlYLDMlhSHLGPIAGHGFIsTFCIVulAYLMTllLs...HPFPLPLLAhIWGITIGAIGSSTGDVHYGAEREYQ+YPFGGGlPVAsHGDITRKAEhGlRNuMDsuaFCAKaGGPLTGLAFGLIVFLusWssllhs.....ttGulIs......hhGLlIVhlLIIhN .....lGslALhGAhATlAGsAEDLESDlGSQSNPNSQVQLAPQMGplHRhFNKAISGEPluYGLasuluGulAasLh.......ths....hs..sllAIslGuslAAhVHGsYusoAalGRhsu.ppcFsQPlYhDllpoHlssIhuHuFIAsFshlshuYLhs...ss..Lu......pPF..PLPLlAlIWGITlGAIGSSTGDVHYGAEREYQph.FGuGlPlAspGsIsphAEhGhRNulDsuaFCuKaGGPlTGlsFGLIVFL-hWRollFs.....thG.u.lls..............hGlllVllhhlhN............. 0 8 24 32 +3428 PF04210 MtrG Tetrahydromethanopterin S-methyltransferase, subunit G TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 21.50 21.50 21.60 58.60 21.40 20.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.32 0.72 -4.30 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 54 0 38 48 10 69.20 41 66.95 CHANGED --K..lPpslssss-apclpcRLD-IEcKlEFssuElhQ+hGK+lGRDIGILYGlVIGLlL.hIhsllshhF ........s.....lPtslsssp-apcl.c+LDcIEcKVEFssuElhQRhG+KlGRDlGILYGlllGll.lhll.h..h....h......... 0 8 24 32 +3429 PF02007 MtrH Tetrahydromethanopterin S-methyltransferase MtrH subunit Enright A, Ouzounis C, Bateman A anon Enright A Family The enzyme tetrahydromethanopterin S-methyltransferase EC:2.1.1.86 is composed of eight subunits [1]. The enzyme is a membrane- associated enzyme complex which catalyses an energy-conserving, sodium-ion-translocating step in methanogenesis from hydrogen and carbon dioxide [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.68 0.70 -5.42 6 101 2012-10-03 05:58:16 2003-04-07 12:59:11 13 2 81 0 68 133 38 294.50 41 94.20 CHANGED KEQcVh-IuGVKIGGQPGEhPTVLAGTIFYu+HKIVcDEc+GIFD+-AAEsLIppQE-huDtTGNPhllplhGpTPEAIl+YIDFVA-loD.uPFLIDSopG-lRhAAAcaAoElGLADRsIYNSINsSl--uEI-ALspSDlsAuIVLuFNsMDsoVcG+lplLEsGu.ust-KGhLplA-cCGI.Kh.LIDsAlTPlGsGAGsAl+sohslKuKaGhPlGSGhHNsPSAWDWLRca+KcptEttt.........ssDlGoNllQthAuuDFlLYGPIENA.hlFPAsAMlDhhIuEAs+.-lGlEss .............................................+cQpVh-luGsKlGGQPGEaPTlLsGoIFYspHcIV...pDtc+GhFD+ptAEsLlscptEhSDhTGNPthlpl..hu..pos.....E....Ahp+Yl-Fls...-ls-.sPFLlDSosu-sRhuusc.h.s.sElGls-RsIYNSINhuhs..c..p..E.l...c...ALp..c..Sclc.uuIlLuF...ss..h..D..s.o.lpG+hplLpsGu.ts.ccGhLplAc.csGIpp...LlDsAlhPlGsGuGh.uh...RushslKu+aG.hP...sGuGhHNssSuW.cWL+ch+Kp.ht.phht................................ssDlGuNllth.htGuDFlLYGPI-NAthsFPAsAhsDhhluEusc.-hGht..h.............................................................. 0 20 49 58 +3430 PF05175 MTS Methyltransferase small domain Yeats C anon Yeats C Domain This domain is found in ribosomal RNA small subunit methyltransferase C (eg Swiss:P44453) as well as other methyltransferases (eg Swiss:Q53742). 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.93 0.71 -4.88 34 8728 2012-10-10 17:06:42 2003-04-07 12:59:11 9 24 4157 22 1810 48732 14113 160.50 23 53.65 CHANGED phphpohsGVFStsclDhGoplLlpslst...hsup....llDlGCGhGlluhhhuctsPchp.lshsDhsttAlpuu+tshpsNtlc....sclhhuDlhssht..tcaDhIloNPP.............FHsGt.sht.tlspchltsAtptLpsuGcLhlVuNcpLsYpsh...lcchFG..pschlscsstaplhp ............................................................................t................................................................h.....h...t.........h.................ttp....................lL.DlG...s...G..s.G.........s....l.....u..l....s....l...........A...........p..........t..........t........P............p.............h.............p...........l........s........h.......s...........D......l........s........t......p....A.........l..........p.......h......A.........c.......p....N.....h.............p........p................p.............t.........l.........p..........................l..............p...............h..............h.........p............u.............c.............h..............h...............s..............s.................l..............t....................t............p.............a...........D..........h.......I......l..o......N....P......P................................................h....t......s........up.....p...............s..............t...........h......h........p.......p........l........l....t.......p.......A....h.........p...h.L.p..s.s......G.......L.....h.......l...........h.................................................................................................................................................................................... 0 525 1050 1457 +3431 PF02416 MttA_Hcf106 mttA/Hcf106 family Bateman A anon Pfam-B_1826 (release 5.4) Family Members of this protein family are involved in a sec independent translocation mechanism. This pathway has been called the DeltapH pathway in chloroplasts [2]. Members of this family in E.coli are involved in export of redox proteins with a "twin arginine" leader motif [1]. 20.50 18.00 20.50 18.30 20.40 17.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.51 0.72 -4.85 12 5974 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 3284 1 1554 3673 2404 70.10 27 71.89 CHANGED luhschllIhlVslllFGsKKLPEluRuLG+oLRtFKpAs+phpst..............................pcpptpc .......luhhcll.l..l..h.l.l.s.l..L..l.F..G.s....c.K.LP....p....lups....l.G......p.......u...l.......+.......tFK......c....uh.p.-..ptt.....................................pttttttt..t............................................................................................................ 0 510 1045 1336 +3432 PF02316 HTH_Tnp_Mu_1 Mu_DNA_bind; Mu DNA-binding domain Mian N, Bateman A anon Pfam-B_12856 (release 5.2) Family This family consists of MuA-transposase and repressor protein CI. These proteins contain homologous DNA-binding domains at their N-termini which compete for the same DNA site within the Mu bacteriophage genome. 21.60 21.60 21.60 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.77 0.71 -4.48 4 448 2012-10-04 14:01:12 2003-04-07 12:59:11 11 15 279 6 72 410 4 108.40 21 26.71 CHANGED pchsGhPGlPphssGVphhAptpGWpKpp+pGsKG.hAhEYclsSLPpEsRttllt.........uAuhsEhsspo.hhcshsppllhshppLssDppptslchl....l....Phl.stDchhsthGhoppp.tTLhhh.Alssp ......................h.GlsGlPpospGlp....h....t....A+.+.....-.uWt....p........R.....+..R..........pGh.....pG...uhEYplsSLP.psptt.l...h.................................................................................................................................................................................................................................................... 0 16 39 58 +3433 PF02914 DDE_2 Mu_transposase; Bacteriophage Mu transposase Griffiths-Jones SR anon Structural domain Domain \N 20.30 20.30 20.60 20.50 20.00 19.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.41 0.70 -4.83 2 134 2012-10-03 01:22:09 2003-04-07 12:59:11 10 8 111 3 20 153 2 197.90 45 33.31 CHANGED VthLpAMpWINGDGY.HNVaVRa.sG-lhRPKTWhWQDV+TRKlLuhRsDhSENhDoIRLShhDVloRYGlPc..HlTIDNTRuAANKhhTGGs.NRYRapVpEs-spGlh.hhGhchHWTSl.hGKGhGQAKPlERAFuhGGLt-YVDKH.hLtGAYsGsNs.tKPD....Nhu-psVDhthFLhsLtpGlt.aNshssR.TEhCuGK.Sas-sFER-aA ..............................pLcAhphlNGDGapasVFV..+..W....s.G..c....l..l..RPKsah...WQDltoRKILuaRsDhSE.Nt-.olR.LuhhDl.......l..pp......Y..G.IPc..+lpIDNsRu.sAsKhhTG.Gs.............NRaR..........F...K.V+...-.-...-spGlh..hl.G.h...c.hHWTo..l....h...hs.cGhGQAKPlERAF.u..hsu..Lt-hlDK.+.PthsGAYTGsss.uKP-.....Nhu.c..+ulsh-hFlts..ltptlthaNs+ssRpoEhCtG..h.SacpsFptpY.............................................. 0 8 14 18 +3434 PF03888 MucB_RseB MucB_ResB; MucB/RseB family Bateman A, Finn RD anon COG3026 Family Members of this family are regulators of the anti-sigma E protein RseD. 29.00 29.00 29.10 29.30 28.40 28.80 hmmbuild --amino -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.68 0.70 -5.38 8 1077 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 1056 12 197 621 90 281.60 46 88.58 CHANGED Lp+MscAsppLSYEloFVhp+suSh-ohRYRH.scp.DG+shA+LlpLDGstpEVlpRDsclSYhpPulpsFTlc.ss.lssshP.lhpsDhc+LSsaYDFlpVG+uRVAGRhssVlRllPKDsaRYuYllalDcEopLsL+SDLLDR-GpLLEQFRslsLsluptlststctLpssshPt...llpsspsstpsshuWpssWLPpGFshhs.thpp.slo-c..l-SthYSDGLFsFSVaVpsscusslpDpps+pGspTlhSchl....GspElTVVG-lPhuTAcRIApSI+Fs .................................LppMs.ASQsLNYElSF.lp.l.s.c.p.u..lESLRYRH..Ach..D.sc.sl.ApLlphDGPpREVlpRG..s.E.lSYFE..P...G...l-PFT.........ls....Gc..hIsDo....lP...ul..lh...............s..D...............hc.+L...u.....s..aYDF.lsl.G.R.sRlAsRhCpVIRlVs+Du.pR.Y.SYllWhDp-o+LPhRsDLL....DR.DG.E.sLEQ..FRl..lu.hs..V...s.....p.....c...l..u........s....s..h........p....s.L.s........K...s....sL.Ps..................lL...sl..P....s.....u......p..............p....s..c.........h.......s....Wps.sWLPpGFsplupsRR...............s............L........s...s...........h......-s...........h.....lE..SpLYSDGLFSFSV.V...s..t......s..s.......s...s...s...s...-...Q.h..lR.p....GRRT..lhopV+........sss..E..ITl..VGElPPpTAKRIAcsIcF.t...................................................... 0 31 91 149 +3435 PF03108 DBD_Tnp_Mut MuDR; MuDR family transposase Bateman A anon Pfam-B_271 (release 6.5) Family This region is found in plant proteins that are presumed to be the transposases for Mutator transposable elements [1,2]. These transposons contain two ORFs. The molecular function of this region is unknown. 21.80 21.80 21.80 21.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.18 0.72 -4.40 89 954 2012-10-02 23:28:20 2003-04-07 12:59:11 10 78 48 0 424 946 0 64.40 24 8.07 CHANGED ssthtlGphFpst..pph+pulppaulppphphchh+oc.pp+htscCh....scs...CsW+lhAuptpc.sptapl .....................hhhGthFssh..cch+pAl...pp.au.lpp.c.hp.ach.h+os..pp.+hps..cCh......tcs.....................Cs..W+l..pApphpt.pphatl.................... 0 13 155 219 +3436 PF04310 MukB MukB N-terminal Mifsud W anon COG3096 Family This family represents the N-terminal region of MukB, one of a group of bacterial proteins essential for the movement of nucleoids from mid-cell towards the cell quarters (i.e. chromosome partitioning). The structure of the N-terminal domain consists of an antiparallel six-stranded beta sheet surrounded by one helix on one side and by five helices on the other side [1]. It contains an exposed Walker A loop in an unexpected helix-loop-helix motif (in other proteins, Walker A motifs generally adopt a P loop conformation as part of a strand-loop-helix motif embedded in a conserved topology of alternating helices and (parallel) beta strands)[1]. 23.00 23.00 23.10 23.10 22.50 22.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.58 0.70 -5.02 2 745 2012-10-05 12:31:08 2003-04-07 12:59:11 7 2 733 6 73 491 11 224.10 84 15.54 CHANGED MIERGKapSLThINWNGFFARTFDlDpLVTTLSGGNGAGKSTTMAAFlTALIPD.oLLHFRNTTEAGuopuSRDKGLaGKLpAGsCYuhLDslNSRpQRllhuV+LQQVAGRD+KVDIKPFhIQGLP.pVpPTpllsETls-+pARVh.lNElKDtltthEGspFKpFsSIsDYHu.MF-hGlIs++LRsuSDRSKFYRLIEASLYGGISSAITRSLRDYLLPpNuG .......MIERGKFRSLTLINWNGFFA.RTFDL..DE...........L..............VT........TLSGGNGAGKSTTMAAFVTALIPDLTLLHFRNTTEAGATS.GSRDKGLHGKL.....KA.G.VCYShLDTINSRHQRVVVGVRLQQVAGRDRKVDIKPFAIQGLPhSVQPTQLlTE.TLNERQARVLsL.NELKDK.LEsMEGVQFKQFNSITDYHSLMFDLGIIARRLRSA.SDRSKFYRLIEASLYGGISSAITRSLR.DYLLPENSG........................ 0 7 23 49 +3437 PF04288 MukE MukE-like family Mifsud W anon COG3095 Family Bacterial protein involved in chromosome partitioning, MukE 25.00 25.00 26.80 26.60 21.50 17.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.52 0.70 -5.56 3 756 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 742 12 72 290 5 226.30 80 96.59 CHANGED MPsKLApAIANPLFPALDSLLRSGRHISoD-LDNHAFLMDFQs-L-tFYpRYNVELIRAPEGFFYLRPRSTTLIsRSVLSELDMLVGKVLCYLYLSPERLApEGIFTsQELYDELLTLADEuKLLKLVNNRSSGSDLDRQKLtEKVRoSLsRLRRLGMVhTlG-psSGKFRITESVFRFGADVRuGDDPREAQtRLIRDGEAs..TP-shslEsQtQLhENDTsEtDE.DoEa.GE-E .........MPVKLAQALANPLFPALDStLRSGRHIGLDELDNHAFLMDFQEaLEEFYARYNVELIRAPEGFFYLRPRSTTLIPRSVLSELDMMVGKILCYLYLSPE.RLANEGIFTQQE.LYDELLTLADEuKLLKLVNNRST.G..S..D..lDRQKLQEKVRo....SLNRLRRLGMVWFMG.pDSSKFRITESVFRFGADVR...uGDDPREAQtR...LIRDGEA..........MslEN.c..LQL.......N..DEoE..E.s.Q...sDS...GEEE.............................................. 0 7 22 48 +3438 PF00893 Multi_Drug_Res DUF7; SMR; Small Multidrug Resistance protein Bateman A anon Pfam-B_1082 (release 3.0) Family This family is the Small Multidrug Resistance (SMR) family. Several members have been shown to export a range of toxins, including ethidium bromide ([1] and quaternary ammonium compounds [2], through coupling with proton influx [3]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.02 0.72 -3.52 17 6822 2012-10-02 19:55:49 2003-04-07 12:59:11 14 6 2594 2 1186 4014 2383 92.70 32 83.38 CHANGED hsal...hLhlAhlhEllusshLK........ocGFo+hhPolhsll...uauluFhhLohuhcp.lPlulAYAlWoGlGhlhsslsullhFtEplshhpllulsLl ................................................................tWl...hLhlAhl.hElh...ushs...L...Kh...........................o.p..G..F..s..+....h...h.....s...s..l...l..s...ls...........sh..sl.S.F...h...h...L..u.h..Ah.+p....l..P..l....G...lAY...AlWs.Gl.Gh...l.ss.slh...uh...l.las-slshhpllulsLl......................... 0 303 684 942 +3439 PF01225 Mur_ligase FPGS; Mur ligase family, catalytic domain Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family contains a number of related ligase enzymes which have EC numbers 6.3.2.*. This family includes: MurC (Swiss:P17952), MurD (Swiss:P14900), MurE (Swiss:P22188), MurF (Swiss:P11880), Mpl (Swiss:P37773) and FolC (Swiss:P08192). MurC, MurD, Mure and MurF catalyse consecutive steps in the synthesis of peptidoglycan. Peptidoglycan consists of a sheet of two sugar derivatives, with one of these N-acetylmuramic acid attaching to a small pentapeptide. The pentapeptide is is made of L-alanine, D-glutamic acid, Meso-diaminopimelic acid and D-alanyl alanine. The peptide moiety is synthesised by successively adding these amino acids to UDP-N-acetylmuramic acid. MurC transfers the L-alanine, MurD transfers the D-glutamate, MurE transfers the diaminopimelic acid, and MurF transfers the D-alanyl alanine. This family also includes Folylpolyglutamate synthase that transfers glutamate to folylpolyglutamate. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.62 0.72 -3.91 236 13732 2012-10-10 17:06:42 2003-04-07 12:59:11 20 28 4352 28 3015 10183 5325 86.30 23 18.21 CHANGED plp.hlulsupshs....shu.hhlshtGhpl...............sGpDhhpp......hhptG.Atshlsc....pst.............................t.th..tspt.tl.l..sttphLuplspthh .....................lphlslcup..t.hs........shs..la.l.s..h.h.G..t.ps...............sGpD...hhsp........................hhp.pG....ss....shl.sp......cst...........................................................................th...t....s..p..h..s..t.l..V....hs.t.tphLuplst.h.h............................................................................. 1 1033 2013 2574 +3440 PF02875 Mur_ligase_C FPGS; Mur ligase family, glutamate ligase domain Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family contains a number of related ligase enzymes which have EC numbers 6.3.2.*. This family includes: MurC (Swiss:P17952), MurD (Swiss:P14900), MurE (Swiss:P22188), MurF (Swiss:P11880), Mpl (Swiss:P37773) and FolC (Swiss:P08192). MurC, MurD, Mure and MurF catalyse consecutive steps in the synthesis of peptidoglycan. Peptidoglycan consists of a sheet of two sugar derivatives, with one of these N-acetylmuramic acid attaching to a small pentapeptide. The pentapeptide is is made of L-alanine, D-glutamic acid, Meso-diaminopimelic acid and D-alanyl alanine. The peptide moiety is synthesised by successively adding these amino acids to UDP-N-acetylmuramic acid. MurC transfers the L-alanine, MurD transfers the D-glutamate, MurE transfers the diaminopimelic acid, and MurF transfers the D-alanyl alanine. This family also includes Folylpolyglutamate synthase that transfers glutamate to folylpolyglutamate. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.13 0.72 -3.95 50 21443 2009-09-14 13:20:23 2003-04-07 12:59:11 16 65 4466 65 4592 15708 6742 85.40 20 18.16 CHANGED lt.tRh-hlsp..........pshhllsD.YAHsPsuhpsslpshtth...............t+llhlhGsht-...Rstshps.hspltsthssh.lllhs....t.tp.ststh ..........................tRhp..h..l.s.p.............................p.s...s..p.l..lsD..aAHsPsuhps.s.l.puh..p..t.h..s.....................................pt+.ll.hlhG.shhc...................+....s...t.s....h....t.t...h.s.p..h.h.s...t.....ss....llh..h..t..................h............................................................. 0 1552 3028 3916 +3441 PF02873 MurB_C UDP-N-acetylenolpyruvoylglucosamine reductase, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_1092 (release 5.2) Domain Members of this family are UDP-N-acetylenolpyruvoylglucosamine reductase enzymes EC:1.1.1.158. This enzyme is involved in the biosynthesis of peptidoglycan. 21.00 21.00 21.30 21.20 20.80 20.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.28 0.72 -4.28 13 4494 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 4271 10 953 3207 1942 113.70 37 35.04 CHANGED cpplhppspphpppR.h.ppPhchPsu..GShF+NP.sp..............................tAGpLIEcsGL.KGapIGGAp...lSchHANFllNpGsAoucDVlsLIchV+ppVt-caGlhLc.EV+hIG ...........................t..plhpthpclppp+....h...p..P..h....p..h..s..os..GShF.K.NPhsp...........................................................hAGhLI-p.u.GL.KG....h....p.....l.......GG....At.................VSp..+..HAhhllN...t..u......s.......A......Tup........D.......lhsLhctVpppVt-+FGlpLcsEV+hlG.................. 0 325 632 810 +3442 PF02976 MutH DNA mismatch repair enzyme MutH Griffiths-Jones SR anon Structural domain Domain \N 21.90 21.90 23.00 22.30 20.70 21.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.34 0.72 -4.15 50 953 2012-10-11 20:44:43 2003-04-07 12:59:11 10 5 911 6 127 531 32 101.00 60 39.83 CHANGED h.LGAsAGS+PcQDF..scLGlELKTIPIstpGc.............PLETTaVslAPLhshsGl.sWcs.SpVpcKLp+VLWlPlpG-RpIPl......u-RpIGsslLWpPss.p..ppphL+pD .................hLGAoAGSKPEQDF.......AtLGVELKTIP.lcuhG+.....................PLETTFVCV.APL...TGNoGV.TWEo.SHVRHK..Lp..R..VLWlPVEGER...sIPL............AcR...RV...GoPLLWSPsc...E....E-cQLRpD................................... 0 19 53 92 +3443 PF01624 MutS_I MutS;MutS_N; MutS domain I Bashton M, Bateman A, Studholme, DJ anon Pfam-B_800 (release 4.1) Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF05188, Pfam:PF05192 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds with globular domain I, which is involved in DNA binding, in Thermus aquaticus MutS as characterised in [4]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.30 0.71 -4.04 60 5408 2009-09-12 04:05:03 2003-04-07 12:59:11 15 62 4333 49 1586 4546 1040 110.20 40 13.01 CHANGED TPhhpQYhclKpp.a.DslLhF+hGDFYEhFh-DAhhuu+hLslsLTt+t........tstsp..hPMsGlPh+uhcpYlp+Llp.p.Ga+VAlsEQhEsstts.................+.....s...llcRcVs+llTPGTlh-ss ...................TPhMpQY.hclK....tp.....a.s..........D..slLhaR..h...G....D...FY.E...........hF..........a.....-......D........A.......h......p........A.uplL-IoLTpRs................tsssp....lPMsGlP.a..Hu.......h.-sYlsc..Llp.p..Gh+V.AI..sEQ.h.p-.Ptps......................................K......G......VcRcVscllTPGTlh-t.s........................................... 0 529 978 1330 +3444 PF05188 MutS_II MutS domain II Studholme, DJ anon Pfam-B_800 (release 4.1) Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF01624, Pfam:PF05192 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. This domain corresponds to domain II in Thermus aquaticus MutS as characterised in [4], and has similarity resembles RNAse-H-like domains (see Pfam:PF00075). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.77 0.71 -4.11 47 4968 2009-09-11 06:26:47 2003-04-07 12:59:11 12 45 3989 50 1597 4177 853 129.10 23 14.77 CHANGED shlsu...lht.........cssp.hGluhlDlooGchhlscht.-....hpcLhscltp...lsPpElllspsh.p....................hphh.thphh.....s....ht.hphpp....s..ppplp..pta......sspslcshs........t..thsls.AhusL.....lp......Ylcpsppp.......tlsplp .................................................NhLsu....lhp................pppt..hGlAhlDl.oo.Gca..h....ls..p..hs.s............hppl.ts-ltp..........lsPpEllh.sc...s.hsp...............................................h..h..p...h...h.....t.......t....h..p.h.h.................t................hp...h..p..h...ps.................s......p..pplp......ppa.....................ts..p..slps.hsh.....................................pt.hthsls...As.usL.....Lp..............Ylcc.TQpp........pls+l............................................................................................... 1 504 952 1325 +3445 PF05192 MutS_III MutS domain III Studholme, DJ anon Prosite Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF05188, Pfam:PF01624 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds with domain III, which is central to the structure of Thermus aquaticus MutS as characterised in [4]. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild --amino -o /dev/null --hand HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.54 0.71 -4.54 306 6533 2009-09-15 09:54:17 2003-04-07 12:59:11 13 68 4115 50 2168 5633 1219 291.10 24 33.92 CHANGED ssThpsLElh.......ps................tstpps..........oLhsll.cc.shTshGpRLL+p..a.ltpPLpc.hspIppRhcsVpthh....p............t............t..ppl......pptL....c.pl..s.Dl-Rll....sR.........................l..............................th..........sps.........s...........s+-lht.....ltpul....pth.p.l..............................................................tph.......htt............................................ltphhp..pls................plh.......phlppslsp..p............sh.h.................................................pcG.......t........................hl....tsGhsscLDph+phtppscphltphppc.ppptshsslchthspshsh.....................................................hhpsppsttpphstpahpppshtss.Rahos-Lpchppclhpsppcthth-pplhppLhpp.h.hsp..hstl.ppsspslApLDsLhuhAc .......................................................................ohpsLElh................psh...................tstpps................oLh..tlL...Dc..otT.s.h.G...............s........RhL......+p.......W..lp.............pPl...h..s...hp.p.I.......pp..R..ps....lp..thh................p...............t...hp........ppl..........................p.p..hL.........................c..pl....h...Dl.ERll..........uR......................................................................l..................................................................sh........................sps.........................s....................................s+DLh.p...........lp.pul..............ptlsp.l..............................................................................................pph.............t.hps......................................................hlpplhp.........pls...s...............htclt.................phl..ppu.lsc....p..................sshhh..................................................................................p-Gs............lI..tsuhstpLDch+phtppspphltphptpccppsulpsl+ltaspshua.........................................................................................................................................................................................................hhphppsttphsstpah++pThtsspRahssELpch-sclhpucpcththEtpl.a.pplhppl.h.....tp......h.....pt.....l...pphup...sl.ApL.DsLhuhA....................................................................................................................................................................................................................................................................................................................................................................................................................... 0 758 1352 1838 +3446 PF05190 MutS_IV MutS family domain IV Studholme, DJ anon Members of PF01624 Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF01624, Pfam:PF05188, Pfam:PF05192 and Pfam:PF00488. The mutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds in part with globular domain IV, which is involved in DNA binding, in Thermus aquaticus MutS as characterised in [4]. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.87 0.72 -3.81 51 5085 2009-01-15 18:05:59 2003-04-07 12:59:11 13 40 4000 48 1596 4267 904 92.30 34 10.61 CHANGED uasscLDch+phhcpspphltchppct+c..chGl...psLK...lshsphhGYalclocsp....tppl....Pt....pahc..ppT..hKsutR...atTscLpphppclhpscpc .....................GastcLDchRphtcsu.ppa..l..t...c...lct+ERc.........coGl................poLK.....l..s.a.N...p.....V...a..G..YaI-..l..ocup..............tppl......Pt.......cahR.....+QT...Lp..NuER.................ahT..sELKchEcclLsAcp.t.................................... 1 515 946 1314 +3447 PF00488 MutS_V mutS;MutS_C; MutS domain V Finn RD, Studholme, DJ anon Prosite Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF01624, Pfam:PF05188, Pfam:PF05192 and Pfam:PF05190. The mutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds with domain V of Thermus aquaticus MutS as characterised in [4], which contains a Walker A motif, and is structurally similar to the ATPase domain of ABC transporters. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.56 0.70 -4.96 35 9697 2012-10-05 12:31:08 2003-04-07 12:59:11 16 85 4461 50 2841 8224 1959 222.30 35 28.75 CHANGED YsRPphscp.............slpIctGRHPVVEphh...sttsFlsNDspLspsp........phhlITGPNMuGKSTYlRQsALIslhAQlGSFVPAcpAcIullDpIFTRlGAuDcLspGpSTFMVEMhETAsIL+sATccSLVllDElGRGTSTaDGlulAaAlsEaLt..cch+uhTLFATHY+ELTpLspp.....hss.........Vp..NhHhssh..........Eps.pslsFLapl.ppGsus.cSYGlpVAcLAGl.PpsVlpcA+phLppLEpp ................................................................................spPp.h...s...p..pt..............tl.pl.p..puR..HP..l..l....-p..h......................................h.....l....s.N..s....l.p....l...s...p..p.p...................................phh.l...IT...........G.....P.......Nhu...G.K...S.Ta........hR.................p...............luL..................lslh........A.Q...h.....G..s.a..V.P..A....c............p......u.......p..l.....s.l.h.........D.p..I.F...o....R..l............G..s...........s....D.........s......l....t.......p....G.....tSTF..........h.............sE.......Mp..c........hs............p.I...L........p..........p.........A...............o..........p................p.............S............L...l..L..hDE.......lG........+.GT.soh..DG.h....ul.Ah.A.ll-h.lt................p.......p...h.......p....s....h.s.lh.u.T.HY.h..E..L...s..p.......h...s....p...p......h..t...t.................l.t.....N...h.p.h..p...h...........................................p..s....ppl...h.....a.....h....a...cl......h.......G....s.s.s......c.S..a.....ul.plAp.hh..G..l......Pppllp.....cApphht.....t........................................................................... 0 1046 1858 2452 +3448 PF03023 MVIN MviN-like protein Griffiths-Jones SR, Studholme DJ anon Pfam-B_1348 (release 6.4) Family Deletion of the mviN virulence gene in Salmonella enterica serovar. Typhimurium greatly reduces virulence in a mouse model of typhoid-like disease [1]. Open reading frames encoding homologues of MviN have since been identified in a variety of bacteria [2], including pathogens and non-pathogens and plant-symbionts. In the nitrogen-fixing symbiont Rhizobium tropici, mviN is required for motility. The MviM protein is predicted to be membrane-associated. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.45 0.70 -6.07 15 3361 2012-10-02 21:24:20 2003-04-07 12:59:11 9 10 2911 0 898 5047 4017 445.50 29 80.32 CHANGED lAshhGAushuDuFhlAF+lPNhlR+lhu..EGuhssAFlPlasctp.........stccuptFspplhslltsshlllollh.llsu.hlltlhusGh.........st-shpLssthhplhhPalhhluLsulhsulLNstc+FhssuhoPllhNlshIhslllhtsphs...hh..........u...LulGlllGGlhQhLhplsslt+tGht.................hc.hhshp-psl+cllphhhssllusulsQlsLhlsptlASh......lpsGuhuhltYusRlapLPlGlFululuTllLPplS+shtsp-ts......phtphhspul+lshhlhlPsohulhlLutsIlslLap+GsFstp-sttsuplLtsaulGLlsauLhplLtpsFYApc-s+oPhplullshllNhshs.....llhhsshussGlAlAsuluuhhshshLahhlt+phh........hh.ttththhltp..hlsshulhusllhhlpphhp..asssphhhhhht ............................................................................................................................................................................hAthhGA..G..h..s.sDAF.h.lAh+lPNhl.R.c.l..hA..EG..A..F.u.p.A...FV.P..l....hschp..p...........................pspc.t..spt...a....s...sp..l...h...s..h.l.s...h..h.l.h.l....l.T...l.l.u...h.l...huP...hl.....l.h....l...h.u.s.Ga..........................sspp..h..s.Ls....st....hh+.I.h.h.P.a....l.h..hl.u.Ls.u..lh.u...u....l.....L....N....s.h....s...+....F....u.h..s..A...a...u..P....l.....l.....h.N.....l.....s.....h....I.....s....s.....h...l......h.....h...s....s....h....h....s...........s.hh.........................u.........L.A.h...u.l...h...l.G...G...l.....l....Q....h...l...h...p...l.......s....h.L...p.+..h...G.hh.............................................ht.s...c...h...s....h....p......c...s........s......l....p....+........l........h.....p....h.....h....h.....P.....u....l........l...u.....s.......u..l....s......Q.....l..........s.....l........l........l....s.....o.......h........l......A.S.h................................L..s.s.....G....u.....l....S..........h..........h.......h..........Y......A.....s......R..........L..........h........p.....h..........P........h........G..........l........l....G..l....A....lu.T....l..l....L.....P.p.L.S+.t...h.ss.s.s.ps......................p.hpph.hshul+hshlls.lPuulul....h...lLu.........tPlst.sL.....F..p......h.............G..p.....F...s...s...t....D...s.....t..h.s.u...t.u...L.h.s..Y..u.l....G...L...l.u...h.h....l.h.+...l....L.s...uF...Y......A...+.p.D...s.+..T....P.l.t....l....u..l...l...s...l.l.l.sh..l.hs.......................lhh..h......h.........h..........t.......h......s....G.......L....u.....l...u..t....u....l....u....u....h....l.s...s..s...l....L....h.h...h..L..p..+.p..hh....................h.t..s..t......t..h...h.h..h.hhp........l.hlu.s...h.l.h..u....s.s....l....h.h.h.......h.....................hh........................................................................................................................................ 0 310 608 772 +3449 PF02344 Myc-LZ Myc leucine zipper domain Bashton M, Bateman A anon Pfam-B_829 (release 5.2) Family This family consists of the leucine zipper dimerisation domain found in both cellular c-Myc proto-oncogenes and viral v-Myc oncogenes. Dimerisation via the leucine zipper motif with other basic helix-loop-helix-leucine zipper (b/HLH/lz) proteins such as Max Swiss:P25912 is required for efficient DNA binding. The Myc-Max dimer is a transactivating complex activating expression of growth related genes promoting cell proliferation. The dimerisation is facilitated via interdigitating leucine residues every 7th position of the alpha helix. Like charge repulsion of adjacent residues in this region perturbs the formation of homodimers with heterodimers being promoted by opposing charge attractions. 20.50 20.50 21.30 21.20 20.40 20.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.22 0.72 -7.47 0.72 -4.32 2 981 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 838 4 26 560 0 27.60 84 13.20 CHANGED u-Ep+LIuEK-.LR+RREQLKHKLEQLRNSpt .SDEHRLIAEKEQLRRRREQLKpKLpQL....... 0 2 5 9 +3450 PF01056 Myc_N Myc_N_term; Myc amino-terminal region Finn RD, Bateman A anon Pfam-B_387 (release 3.0) Family The myc family belongs to the basic helix-loop-helix leucine zipper class of transcription factors, see Pfam:PF00010. Myc forms a heterodimer with Max, and this complex regulates cell growth through direct activation of genes involved in cell replication [2]. Mutations in the C-terminal 20 residues of this domain cause unique changes in the induction of apoptosis, transformation, and G2 arrest [3]. 22.20 22.20 22.40 22.30 22.00 22.10 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.43 0.70 -5.27 9 1946 2009-12-11 14:44:54 2003-04-07 12:59:11 13 10 1164 2 129 1419 1 143.50 44 69.28 CHANGED MPlsuShssKNYDYDYDSlQPYFYhDp-D.sFYa..pQps.phQPPAPSEDIWKKFELLPTPPLSPSRRsSLu........................TA-QLEMVoEhLG........sDsVNQSFICD.ushupoFlKSIIIQDCMWSGFSAAAKLEKsVSE+LAShpAuRKEsshusss......................uussRhsusYLQDLusuASECIDPSVVFPYPLs-su........pss............sAsP..............psh.shcssPs....uSSSSGsDo..........-pp---EEEp-EEEEIDVVTVEK..ppp+pcsssSto..............ta.SPLVLKRCHVshH.QHNYAA..PST+.cDtPusKRl+LEup..spslpth.s.....pRKCsSP .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...p.pp......t....s..........................s+s.tSPL..V.LKRC..H.Vs.hH..Q.HNYAAP.......P.S.T+.....h-....Y...P..u....uKRh+L-u.............uRVLKQISs.........sRKCsSP.......................................................................................... 1 13 28 56 +3451 PF01669 Myelin_MBP Myelin basic protein Bateman A anon Pfam-B_1868 (release 4.1) Family \N 22.00 22.00 33.60 26.40 20.00 19.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.84 0.71 -3.87 5 142 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 42 9 32 174 0 115.40 55 72.19 CHANGED MASASTSDHARHGhGs..RHRDSGLLDSLGRFFG..GDR+VPRKGpGKs...shtttl..hPp.+tttttst...s-ssVVHFF+shhoP...ss.ptthpthu..ho.................h.p.pppuh+u+K....-GpGs.....chG......tptpuSP.SRR ................hAoASThDHARHGahs..RHRDoGlLDSlGRFFG..GDRs...sP+RGSGKss............sRssHhGSLPQ.+s...pH.uRst...D-NPVVHFFKNI.VoP+...........sPPspu+utth.......t........................................................................................................................................................................................................................ 0 1 2 10 +3452 PF01275 Myelin_PLP Myelin proteolipid protein (PLP or lipophilin) Finn RD, Bateman A anon Prosite Family \N 20.40 20.40 20.40 23.30 20.00 20.20 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.79 0.70 -5.03 7 395 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 126 1 135 334 0 203.20 47 86.95 CHANGED GCaECCI+CLGGVPYASLlATlLCFsGVALFCGCGHEALoGTEpllEpYFS..pNhpDahhLhsVIphFQYVIYGlASFFFLYGILLLAEGFYTToAVKplaGEFK..........TTsCG....................RClS......shFlhlTYlLslsWLuVFAFSAlP...VaIYaNhWoTCQsls.ssps.ssshtplCsDsRQYGllPWNA.PGKlCG...sLtsICpTsEFphTaHLFIsAhAGAuATllALlpYhhusoaNaAVLKhhuRtst.p+h .................................Ghh-CCh+CL...sul...PaASLlATlLha.GVALFCGC.GH.ALouT.tllc.paFp......psh.t.Dh....lhph.Iphhp.YV..IYG.lAuhFFl.YG.llLL.sEGFaT..T.u.Al..+.c.l.a.G-.aK..........sThC.G................................R.Clo.......................................u.hFlhlTYlhhlsWLhV.huFoulP...Va.haa.NhWo.hCpshp.spts.............plChDh...............RQa...G..ll..P..h...s....s......s....h..K...............l.C.ss...........sh...phCpo..sE.h..ho....acLFIsAhsGA.u..hsl.luh...............lp...ahhhhohNaAhl+.hs+.pt............................... 2 31 42 79 +3453 PF00063 Myosin_head myosin_head; Myosin head (motor domain) Sonnhammer ELL anon Blastp MYSA_HUMAN/1-840 Domain \N 19.10 19.10 19.20 19.10 19.00 19.00 hmmbuild -o /dev/null HMM SEED 689 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.23 0.70 -6.49 24 7667 2012-10-05 12:31:08 2003-04-07 12:59:11 16 484 1348 171 3682 6883 268 481.40 32 44.72 CHANGED s-DhstLohlsEsulLcNL+pRYt.sshIYTYsGhhLlulNPa+pls.lYopchlptYcG+p............+tEhsPHlaAlA-pAYRsMhs-pcsQsllloGESGAGKTEsoKhlhpYlAuluussst..................................................p.tplcpplLpuNPlLEAFGNAKTsRNNNSSRFGKalcIpFstsGph....sGupIpsYLLEKSRV...lhQspuERNaHIFYQLLuGsssp..+ccLpLs...............sspsYtaLspsu....shsl.........................sGlDDsp-FptsppAhcllGhocp-ppsla+llAulL+lGNlp.Ftpst.....ppptu.............csppt.hpthutLhGlsstshtpuLhp.plpsG........pphlsps.shppAhhu+DALAKulYuRLFsWlVs+lNpsLstpp......................................ppsthIGlLDIhGFEIF..-hNSFEQlCINasNEKLQQhFscphhphEQEEYh+EGIpWshI-a.hDspssl-.LIEp...PhGIlulLDEpshhP+u....oDpoFhpKLhppa.spps...pa.p+s.c..........tspspFtlpHYAGcVpYsspual-KN+DsLpssllslhpsSps.sllsplFpp.p..............................................................tttpttpspptph.pTsutpa+pslspLMpsLpss...sPHalRCI+PNcpKtsspa-sphVhpQlpssGlLE..slRlpRtGFssRhhascFhpRYplLsspsh............tsscpusctlLppl......sh-...tppaphGpTKlFh+ ......................................................................................................................................p...t.........h.....l...Ra.....................................h.....a................o...............u...........h.h.ls.hNP....h..t...h.......................t..........t.............h.......t.......................................................t............P...Hh..ashs..p.s.a.............h..................h................................................t.....................s.....Q.....s..h.l................h................o......G.......E.......SGuGK..T..........s...........s.K.......h..l...h.p.....a..h.....ut...l.u...s.......tt......................................................................................................................................................................................................................tplc.p..p..l...........l....tu............NP..lL..E.....A.FG...............N.AKTh+N.s...N.............S.............SR...F..............G....K.a.l.c.........l...p..F.....s.......p.G.t.l...................................uu..p.l..p...............p...........Y...............L.L...E........K.S.........Rl...................shQ............t.s.E......Rs.aH........lFYp..l.h..s.s....t....p.................p..............hp..h.h.l.t............................................ss..p.a..tal.sp....s......................ph.p.l............................................................................................................s...h...s.....D....t....p......p.....h............t.......h......................p.......A....h....p.hl.G.hs.........p...c................................sl.hplhu...u...l.....h.+...h..GN...hp....Fpttt..................t.t...p..ts..............................................................sp...p...........hp.....hs.....u..........Lh..sl.....s...........t...p.....l....ps.....l....hp..ph.hst..............................................t-h.l...p...s..s...h.t.p.......s..................sh..s...uh...uKslY.t..p.hF..a...............h........l.......t...lN..t....h....................................................................................................lu..l..L....Dh....h.G..F.E.h.....................t.N...............s...h...E..Q..h..h..lN..hs.........s.E..p..............LQ........t......a..p...................h............hh...-........p.........p...................................Y.....t....E..........t.....l................h.........................................l......p..a...............s..................t...............h.h.................p.........l.......l.............tt.............................................................u..............l...h..hl..p-ts...h...t.s...............sp...t...hh.t....p.h...t.........t...tt...................h.................................................tF.l...H......a..u.s..........V.......Y.p....h..........t....h...h.....+...N...-.....h...........p.h....th..h...............t.........u......t.......................hh.......h.h...................................................................................................................................................................................................................................................................................sh..s....h...p.........p.h....L..h.t..l...tt.................ts.al+C..............l..............h......s...................N...........................................................................................................................h............p.............hhtQlp...t.hhp....hp..h..t...........u.a................s......p.......h.......hh..............a...........h...................................................................................................................................................................................................................................................................................................... 0 1142 1634 2706 +3454 PF01576 Myosin_tail_1 Myosin_tail; Myosin tail Bashton M, Bateman A anon Pfam-B_356 (release 4.1) Family The myosin molecule is a multi-subunit complex made up of two heavy chains and four light chains it is a fundamental contractile protein found in all eukaryote cell types [1]. This family consists of the coiled-coil myosin heavy chain tail region. The coiled-coil is composed of the tail from two molecules of myosin. These can then assemble into the macromolecular thick filament [1]. The coiled-coil region provides the structural backbone the thick filament [1]. 51.00 51.00 51.20 51.00 50.90 50.80 hmmbuild -o /dev/null HMM SEED 859 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.14 0.70 -13.91 0.70 -6.82 15 2444 2009-01-15 18:05:59 2003-04-07 12:59:11 14 63 325 2 1048 2212 11 548.10 35 44.88 CHANGED -hEpp+c-lEpsLp+KEuElstlss+lE-EQshltpLQ+pl+-LpuRIcELEE-LEsERsARsKuE+pRuDLucELEELuERL-EuuGATuAQhELNKKREAELsKLRRDLEEuslpaEsslusLRKKHsDAlsElu-Ql-QLQKsKuKhEK-KuphptEl--ltupl-phsKuKtssEKhsKphEsQlsELpsKhDEhsRpls-lsopKuRLssENu-LsRQlEEtEsplssLo+hKupLsuQLE-A+RsLEEEuRpRssLpupl+slppDhDsLREplEEEsEAKu-LpRQLSKANuElpQW+oKaEsEuht+sEElEEhK+Khpt+lsEhE-plEutpsKsssLEKsKsRLpuElEDLpl-lE+usutsupLEKKQ+sFDKlluEh+pKs--lpsEL-sAQ+-uRshSoELa+LKsph-EstDplEuL+RENKsLuDEl+DLs-pLuEGGRslHELEKs+RRLEhE+-ELQuAL-EAEuALEpEEsKshRuQlElsQlRsEhERRLpEKEEEhEspRKNppRsl-SlpAoL.EuEsKuKuEshRlKKKLEuDINELElsLDtAN+usA-tpKslK+hQpQl+-LQpplE-pQRp+--scEphthAERRsssLpuElEELRssLEpuERuRKtAEsElsEAs-RlsELsuQssoLsupKRKLEu-lsslQuDLDEshsEh+sA-ERu+KAhsDAs+LA-ELRpEQ-pop+lE+hRKsLEpplKELQsRL-EAEusAlKGGKKhIpKLEsRVRELEsELDuEpRRptEspKshRKt-R+lKELphQsEED+KNh-+hQDLlDKLQtKlKsaKRQlEEAEElAshNhsKaRKhQ+ELE-AEERADpAEpsls+lR...uKuRs .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...................................h.....h.............ph...p.p.p.............t....t........++...p......phtt...t.t.t.......th..c+.t........p.h..-hpsh............hp...t...t.......h.pp.p....p...hp..p...........tp.p..........t..p.......h...pt.....tp.p.h....sch.t....httth....p.p............th.t..t.......hp.......p.........t.h.t.......t.......cht..p...h........................tt...tt.tt.p..h.tphpt.hpthp........ph......c.p.t.......hc-..ts.......thp.............c.......tt..............h.Rhphp.h.....t.th+tphpc.ch.t...t+-.-p.-t....h..+.p.t..t....h..cp.hp.tL..-..t..Ehc.......+..t.h.t.+...cchc.plt...............p.h..p.....thp.tts..p.............t.........-...........+p....h....c..............p................p.............h...p....................-....h...............hth...--t........t........pchht.p..t..pcc....t.hts-h.p.............hp...htts.+t.++.h.......c.-h...............-..httph.......t.........tt.......t....t....s....s.......h.........p...tt....pp....h-tcl.plpt-h--..pp.p.h.-+hc+s...p..h.t........h.t.....p-Ltt.Ep.......p..tttc..E.p.+pphEtp..............K-.Lp.+Lp-..hEt.s..hp...t........h.+.t....lttLEs.+.................l....tpLE..tpl-tEp....+..............ct....ts.+thR+h-++...lKEl..h.Q.s........-..........-..-++p..p.h.p-.h-Khp.+lK.....thK.RQ.h-Eu................Ep.sp...s..ph.R+hQ+EL--tpEts.-...hppplpth+..................................................... 0 216 306 674 +3455 PF00819 Myotoxins Myotoxin Bateman A anon Pfam-B_1337 (release 2.1) Family \N 25.00 25.00 80.40 80.30 21.20 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.44 0.72 -3.90 2 29 2009-09-10 20:59:54 2003-04-07 12:59:11 12 1 8 2 0 31 0 41.80 88 77.46 CHANGED YKRCHhKtGHCFPKphIClPPSSDFGKMDC.W+hKCCKKGSsp YKpCHKKGGHCFPKEK.IClPPSSDFGKMDCRW+WKCCKKGSsp.. 0 0 0 0 +3456 PF02384 N6_Mtase N-6 DNA Methylase Bashton M, Bateman A, Mian N anon Pfam-B_508 (release 5.2) Family Restriction-modification (R-M) systems protect a bacterial cell against invasion of foreign DNA by endonucleolytic cleavage of DNA that lacks a site specific modification. The R-M system is a complex containing three polypeptides: M (this family), S (Pfam:PF01420), and R [4]. This family consists of N-6 adenine-specific DNA methylase EC:2.1.1.72 from Type I and Type IC restriction systems. These methylases have the same sequence specificity as their corresponding restriction enzymes. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.90 0.70 -5.35 15 7778 2012-10-10 17:06:42 2003-04-07 12:59:11 11 73 3514 13 1474 8902 2092 276.20 22 51.07 CHANGED pstDlhGDsYEYlLupFApspGKpuGEFaTPpsVocLlsclL....-spssc.IYDPAsGSGuhLlpsp+altt........sspspslsla....GQEhstoTapLA+MNhhlHsIchstht...lphuDTLtsspap........-tphDhVlANPPast+.W........ssssltsDs..Rap.t.....hsPpssA-aA..al.HhlhpLussG+AAlVlssGsLac.GusEucIR+tll-pshl-slIuLPspLFas.TuIPsslllLsKsKs.........pcscVLFIDAsp.atpcs..+ppstLos-cIpKIs-sapph.....................c-ls.....tFu+sAol-EIpcNDYNLslsRYVsstcpc-- ........................................................................................................................................p...p.huph...Y.E..h..h.lt.............p.............h................................t....t.............s.............p..........p..s..Gpa..a..T.P.p.t.l...s.c..h.h.s....pll.........................................p.s...p.......t...t.....p....l.....h.....DPs.sGoG..u..h....L....h.p...s....h....p....h...l............................................................t.p.........t...p...h.....p.h.a..................G......E...h.....s...............t.......s...h.....t.........l.....u.......t............h.....N........h........h.......l........+.....s......l........p...........t...............t....................lh......s...s.....o......L..p...p..s.hh....................................................tph..D...hl..luN.P..P.a....u..h..p..................................tt..p..p..h.......t..p......s....................h.................................hs.s.p...s.....p...s....p....h..h..............F....l...t.....p.....h.....l.....p...h..........L..............p..........s...............s..........G.........p......h......u...l.l...l.s.p....u..h.L.hp....s..........s......t......p......t....p...............l....R..p...h.l.l..........c........p.......s.........................l.c..s.....l.............l...t......L.........P...........s.......s...l...F.......h...s.....s..s......l...s...s..s..l..h...h..h.p....K.p.p.........................................tpp..p.l..h..h.......h..c..h...t....t............h....................................tt........t.p....h.........t...............................t..............p.........h.............h.........t.h..h.t...........................................................................................................................................t........................................................................................................................................................................ 0 495 1021 1282 +3457 PF01555 N6_N4_Mtase DNA methylase Bateman A anon Pfam-B_164 (release 4.0) Family Members of this family are DNA methylases. The family contains both N-4 cytosine-specific DNA methylases and N-6 Adenine-specific DNA methylases. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.58 0.70 -4.87 49 7047 2012-10-10 17:06:42 2003-04-07 12:59:11 13 54 2966 14 1466 6738 4026 224.00 20 60.70 CHANGED lcllhssPPYshspp........................................thsptps..........hppahpa.....hhphlppstcl.........LKssGslhl.ss.phht............................................................................hhthhhp.hs..ahhhshIhWpKs........t.shsp.pt..............phstspEhllh................asKscph....................................................hhshphhch.hpptpthphtt........................................h...........W......................................................................................................ht..ppppt..............H......ssp+PhtLhc+lI.phsop.........sDlVLDPF....hGS.......GTTshsAtpLsRp.......................aIG.hEhppcahc..hutc .....................................................................................................................................................................................................................lchlhhDPPY..ht.p.................................................................................................t.t.......p...............................p..ta....h.p..h............h.......h..ht..hhcl.............L.p...s.G.s.h.hl....hs...pth.........................................................................................hhhh.h...p.............s................h..h....s.......l.h..Wppt............................................hsp....t........................................................................hh.t.....t...t..E..l...hh.................................................................................h.....Ks.tt..............................................................................................................................................................................................................................................................................h..........................W.....................................................................................................................................................................................h......t..............................................a..........s..s...KP.t...L....lppl.I....t..h....s..o..p................................................s..s...l...V...L.DsF....sGS.......GTTsh......s...uh..p.h.s..R.p................................................hlu.h-.hptthhp.....t........................................................................................................................................................................................................... 0 537 1019 1270 +3458 PF04245 NA37 37-kD nucleoid-associated bacterial protein Mifsud W anon COG3081 Family \N 24.60 24.60 25.60 24.90 23.10 23.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.20 0.70 -5.10 105 1727 2010-01-13 16:22:14 2003-04-07 12:59:11 8 1 1574 0 252 1017 43 316.50 30 96.20 CHANGED llHplsppsp........................phhhpsp.h.h..ssphp....t..hltchhpth..spc.sttsauph.........................t..thsttlpphhp...t........ppsFhphSpphuppLh.pphpc.p...hs....uGpllhspa...........p..t....sp.........hlhlhhlcpcpuhhhspp............lchp.p.ptl.........sls+lphuAhIslsph..............................phhlshlcscsscp......sstaatc.FLuspp.thsspppocshlpslccaspst.......lspppp.p.....ph+pplhsahpc.......s.-..plslpcluspl..................t.p.....th..sFt....pahppp..th.plp......ps..Fssspsslcph.t+h..sspstulslphctphhs....p.....phh..s.pss-.s....l..pIps..h.s.hpcplpc ..........................................................lHQl.pccp..........clhLp-phL.s...spplp.....p.hltcltchh....ssc.p.psauha........................ps...hsph.L.p.hhp....s...............pcDFlth.SpshsppLp.-.............th..sc.s........hs....uG.llFspa........................phhu..s-................................aLhlhhls.ppohphsps........l-ls.s.phL....................sls+hDhsAcIsLoph..............................spalsalcsR.ls+c.......ls.Fh..hc..FLuss..slssKtps+s.Lhpssc-...astpt.....ph-ctpp..p.......pl+pp...lasahpE.......u.-....El...pl...c.pLup-Lh..............ss..........ps...sFh.....-aspEp........th..tlp....Es..hs.s.D....+..p.s.lcph..pKa.....sutusGlslsh.ss..lhs-........chh...s..ssD.T..........l......hIKs...h...h................................................... 0 62 131 201 +3459 PF01235 Na_Ala_symp Sodium:alanine symporter family Finn RD, Bateman A anon Prosite Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.50 0.70 -5.90 6 5556 2012-10-03 01:44:59 2003-04-07 12:59:11 12 8 2931 0 865 4003 1762 405.60 38 87.29 CHANGED hhchhp....hhu++scuG..sloSFQALsTuLAuRVGoGNluGVAhAIuhGGPGAVFWMWVsAFlGMATuFsEsTLAphYKh+Dp-GshpGGPtYYlp+GLuhRWLullFAlh.llloFGhlhsulQsNuIAcAhssuFshsslVTGIlLsllsuLlIhGGlKRIAtlophlVPhMAllYllsALsIlhhNI-plPsVIttIhcSAFuhptAsGGhlGusl..AhhhGlpRGLFSNEAGhGSAPpAAAAAcsp..HPspQGlVQhlGlFlDThIlCTsTAllILLoG.h.......................................hss.pLcG..............................................................hplTQtAhppthG.uaGupFluluLlLFAFSollu.YYYuEsNlhaLh.sph+ulh..haRlshlAhVhaGoltshsllWphADlshulMAlhNLIAIlLLpplsachhKDYhcQhKpGhpP.Fcspch ................................................h...............t..t..p..p..p..p.u...slSsF.Q.ALssuLAupVGoGNIAGVAsAI.......s........hGGPGAlFWMWlsAhlGhAotasEuoLA.hY......+.............p...+..-..t...s......G..p.ap......GGPhYYl........p.........+....G.L.......s.........t...........+..............W........h.......u.......l....l.......F.....A.....lh....l.l......l........u........h........G......h....h........h.....s....sl.....Q.......u........N.......o.......l..................u..................p..............u...............h................p...............s................u..............a..............s..................h.............s..............s...............h..............l..............s.................G.............l........l..L.s.......l...ls..u...l.l.Ih.G.G.l.+..p..Iu.plsphlV..PhMAlhYllsulhllhhNhsplP....sshthIhpu.A.F...s..h..p....u......u.s.G.G.h.hG...s...s...l..........................h.GlpRGlF..SNEAGhGSAP.AAA..uA.psp..............H....P..scQGllphhGlFlDTlllCThTAhlIL..l....o...G....h...................................................................................................................t....s...s..h...p...G......................................................................................................................s...t....l.Tpt.Ah...s..shh......u.......s......h.......G..s.....hh.......l.s....lu....lh....hFA..Foollu.hYYuEp.s...........lt.a......L.......h....................s.........p...p.....h.....l..h..............laR.......l...l...h....l............s.....h.l..hhG...u.......h.........h...........s.......l...s....h.........l...WslADlh......hGlMAlsN...LIAlll...L...u....thshphh+DY......p.p..h..c....t.................t................................................ 0 285 542 723 +3460 PF03390 2HCT Na_citrate; 2-hydroxycarboxylate transporter family Mifsud W anon Pfam-B_3683 (release 6.6) Family The 2-hydroxycarboxylate transporter family is a family of secondary transporters found exclusively in the bacterial kingdom. They function in the metabolism of the di- and tricarboxylates malate and citrate, mostly in fermentative pathways involving decarboxylation of malate or oxaloacetate [1]. 25.00 25.00 25.00 27.90 22.10 24.10 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.36 0.70 -6.10 36 838 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 694 0 149 549 5 398.60 39 93.58 CHANGED +IsslPLPlallhuslllluhhh...........spLPssMlGuhulhhlhGhlhGpIGpRlPIl+s.lGGuAIlslFlsohlVaasllspsslcssoshM.Ks.......u.sFL.hYIAsLlsGSILGMsRclLl+uhl+hhssllsGslsAhhsGhlVGhlhGh..........shpcshhaIllPIMuGGhGtGAlPLS..hYuplhG.hsppphhSpllPAlhlGNlhAIlsAulLsplGc+pPcLoGNGp.Ll+spp.....htsp.pcpptplshpthGsGlllAsohFhlGtllschl.......sl.uhshMIlhsAllKhhsllPpphcpGAhphapFhusshTasLlsGlGluassLsplluAl.ohsalllshslVluhlhsuhhlG+lhshYPlEuAIsuG.CtushGGTGDVAlLSAusRMpLMPFAQISoRlGGAhhlllAslL ...........KIsulPLPlYlhh..hh.llhlsthh..............spLP.s.shlGuhAlhhhhGhlhGpIGp+lPIhpp.lGGsulhshhlsuhhVaasll................sp..s..lcusst.hM.cp.......................u..NFL.haIAsLlsGSILuMNR+lLlpuhh+hlssllsGhlsAhhsGhlVGhlhGh..................shpcshhalllPIMuGGhGtGhlPLS..hYuslhG...tsppphhuplIPhhhlGNlhAIlhAulLsplGc+hPp....LoG.pGp...Ll+pss.........sp..pcppptplshpthusGhllAsohFlhGhllp+hl...........sl..s.s.h.Ml.llsshlKhhslhPschcpGAppl.cFhSpslTasLMsGlGlshhsLp-llssl.ohs..lllshslVluhlssuhhl.G+hhshYPlEuAIsuu.CpushGGTGDV.AlLSAuNRMsLhsFAQIuoRLGGAIsllluoll.................... 0 28 64 100 +3461 PF03553 Na_H_antiporter Na+/H+ antiporter family Bateman A anon Pfam-B_620 (release 7.0) Family This family includes integral membrane proteins, some of which are NA+/H+ antiporters [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.93 0.70 -5.39 38 7404 2012-10-02 15:12:49 2003-04-07 12:59:11 9 9 2159 0 1156 6794 758 256.90 17 76.09 CHANGED sssFGs.ttShlu.ssshusthhtstlhsphp...........h.hhhpshshtlhuhhhhhhluhlhh......hhhslsshhpp..tsh.....................phshhthtt.............Phhslhhhslhth.hhsshsh.shhs.shshthh...hpssshhtshhhuuhsuhhhthhsth.......htphsshlscG.hpsMhhslhlllhAhuhuullpcsG.hssllpslspth.ps.........hthhshlhhlluhhlshusGsuauolsIhsslhtshstphshsh..........hshsusltsuushu-shsPhSsoslhsssshts...plsplp.sshshhhhhs.h.hhlhhlhhGh ......................................................................................................................................................................................hhhs....s.h....h..h.h.....h.........h.h.ttht................hh.h..........h.h.shh..hh.h.h..h............................th..........h.t......................................................................................ht...h.....................................s..hh.h.h..h..h..h.......h.h......t.h.....h.hh......s........h.........h...........h.................h..........h.........s........h........h.h.thh.................hp................s....................................................h........h...............s.......h.........h.h..u....hh....h...h..hh.h.th.............................h.tp..h.......p...h...h.....s.....c.....G....h.....p..s..M.....h.........h....h.hl.h.....lh.uh.u..h..u.ul..h.p.....p......h......G.....h.ls...s...ll.....p.....t.....l....h....ph.h...ps....................................t.t.h....h.h..h..s.h.h.l.l.u.hh..l..s.h..u..h.G.s.u...as....o.ls.l.h....u.s.l.h.h.sls.p.p.h..s.ls...........................hhh.s.u.slt...su...us.hs.s.th..Phu..s...s.s......l.....h......s........sh....h..........................t..........h.......s...........h............h........h..s..h..h.....h.h..............l..hhh............................................................... 0 401 740 997 +3462 PF00287 Na_K-ATPase Sodium / potassium ATPase beta chain Finn RD anon Prosite Family \N 23.30 23.30 23.30 23.70 23.10 23.20 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.96 0.70 -5.63 11 608 2009-09-11 13:38:36 2003-04-07 12:59:11 13 6 122 10 324 552 0 246.50 31 90.97 CHANGED MA+.cpccs........p.supWKcFlWNPcppEFhGRTuoSWshILLFYllFYGhLAGlFshTlhVMLpTls-apPKYQDR....lAsPGLhhpPcs..sp...hEIsFssu.sspSappaVpsLppFLcsYssopQsp....hpsCs..PscYh-pss.s...pspKKuCpFptphLG.sCSGlsD..sFGYs-GKPCVllKhN......RIlGFpPcs...............s.......ssh.lpCsuKcsE..tpplsslpYaP.s.....GshsLhYaPYYGKphpssYlQPLVAVpFsNlo..tssEltlECKla.usNlphs-.+D+F.GRlsFK .......................................................................tth.ppah.a...ssp..pt...ph.hGRTstuW.........hh..Ihl....FYlhFYs.hLsuhFshsh.h.s.h....h...T.l.s..s.h...h..P....p...a...p..-p...................hs...s..P.G......l....hhtP.s............tt.......h...l..a..phs..c..spoaptaspplppFL....p.......Ypss.ptp..........sCs.........tthh.p.t............tpsCpFph.p........L.....t....sC....Suhp..D.....aGYpp...G.p.PClhlKhN......................Rl.lsahPp..........................................................t..............ssh...ls.Cp.s....p......p........t....t......c.........t..p.......p.......l.......sp.l.......pYaP.s....................sh..s......h.h..Y..a...P...Yh...s...............p....s..sY.....h.p...P....LVAVp..Fh..shs............hshtlplcC+hh.upNl..s.p.tcc.h.Gphth............................. 0 79 107 205 +3463 PF00939 Na_sulph_symp Sodium:sulfate symporter transmembrane region Finn RD, Bateman A anon Pfam-B_1100 (release 3.0) Family There are also some members in this family that do not match the Prosite motif, and belong to the subfamily SODIT1. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.60 0.70 -5.62 11 6208 2012-10-02 15:12:49 2003-04-07 12:59:11 14 27 2554 0 1377 7719 2167 426.10 24 91.05 CHANGED tsphhpLlhhlslhllIahlPsPsGlsspAWphFAIFhusIlGlIhcPlPhuslAlhululh.llstsLs..................lstuLSGFussssWLlFsAFhlutGah+TGLG+RIAhhhlphhGpoo..LsLGYullhs-llLAPuhPSsTARuGGIlaPllpuLss..........uhGSsspcs.................................o.+plGuaLMhssh.uosITSuMFLTAMAsNsLshslhsp...hsGhploWhpWhlA....AlssGllhLllsPLllYhlaPPclcps.............-shphAcpcLcpMGPhstpE+tllslhlLsLlLWlFGs...........pls......lsAoTsAllslulhllhp.....................llsWc.DllpppsAWsTllWaGuLIsh..AshLspsGFlpWlusshsshlu..uhssthuhhlllhla.ahsHYhFASsoAaluAMhPlFluVupul.G.ss.hhhuLsLsFusulhGhlT.YGoGPuPlhaGuGYlsspcaW+hGhlluIlslllaLslGshWWphLuhh ..........................................................................h............h.h..h...h.h..h.h...h...h..s...h.....s...................s.........h.....s..................s....h..h.........h.........hu..............l..hl..............hh...hl..h...h...l.h.c...s...l.Ph.s..s.s.u..l...l.s...h..s.lh.....lhs.hh.......................................................................................p...s.h.u.u.a.us.s..slaL.hhuu..Fhluhu..hpc.ps....L....s...+R.l.A..h..h.l..l..p...h..h....G.p...p.s.................l.h..l..u...a...h..h..s..h..s.....l...h..h.....s..s..su...s..oA..h.h..h..s..l..h..h...s.l.l...p.s.ls.........................................h.t..s.p...p.p............................................................................................................................................................................................................................t.p...h.u.th...l.h..h.s.h.hhu.s...s...lsu.hhh.hsu....s.s...s.Nh.l.hh.t..hhpt....................t..h..p....l..s..ah..p..W..hhh.....uh.P....s..l...l....h....l.....h.l...h....h.......l.h....h.h....h.a.........s...p....h....p..p...h....................................................................t..s....t...h..h.....p..p...c.h...p.p...h...G.............h..s..h.....t...E......h......h...h....l.......s.......l...h..l....l..s..l..h......h..........W......h......h..t.s..........................................ht..................l...s...s..s...s..l..u...l...h..s....h.s..l..h....h..h.hs.....................................................................l..l...s....W...c........h.....h.....p.....p..p.....h.s......W.....s.....s.l.....lh.....h.uuh..lsl..............uss..L..p.p..oG.h..hp.al............uptl..ts.h..hs...............h..t..............s.........h...s.h.h.h....l..............l..h...l...h...h.h.h..h..h......F.s..S.s..o.A...ts....s....h.h.h....P.l...h....hs..l..........u..........h...u...h.......................u.......h..........s......s......h......h....h...s...l.....h....h...........s....h....u....s..u....h...s......hh....h...Phus.......s..P.....s..s.....l.......h..........a........G........s........G......h........l.....p..............h.....p.......c......h..h...+....h......G.....h....h....h......s....l...l.s...h........l...l...h.h...h.h...s.............hh......h........................................................................... 0 425 767 1127 +3464 PF04902 Nab1 Conserved region in Nab1 Kerrison ND anon Pfam-B_6188 (release 7.6) Family Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors [1]. This C-terminal region is found only in the Nab1 subfamily. 25.00 25.00 26.50 44.50 17.60 18.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.04 0.71 -4.76 2 61 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 38 0 27 45 0 154.70 68 34.82 CHANGED GERDELSPKRIKVEDGFPDFQ-sVQTLFQQARAKSEELAALSSQQsEKsMAKQMEhLCsQAuYERLQp.ERRLoAGLYRQSStEHSPsGLsSDsSDGQGERPLNLRMPNlQNRQPHHFVsDGELSRLYsSEAKSHSSEsLGILKDYPHSAFTLEKKVIKTEPEDSR ..GERDELSPKRIKlEDGFPDFQ-oVQTLa.......QQ..........A..+AKSEEhsALuSQ.........Q.sEKV.MA..KQMEFLCsQAGYERLQpsERRL.SAGL.YRQu..SEEHSPN.GLsS.D.sS.DGQ.GERPLNLRMP..N..LQ.NR.....Q.P..HHF.....VlD..........G..........E..........LSR.L.Y.s...S...E.AKS+SS..E.SLGILKDYPH........S.AFT.L.EKKVIKTEPEDSR.......... 0 1 4 10 +3465 PF01849 NAC NAC domain Bateman A anon [1] Family \N 21.30 21.30 21.40 21.40 20.50 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.73 0.72 -4.44 91 1327 2009-01-15 18:05:59 2003-04-07 12:59:11 13 15 510 10 771 1213 39 57.30 36 25.22 CHANGED pKKhpphhc+lGl..cplsslpcVslpp.sctphlhhspPcVp..pu.s.ssoahl..hGcscpcs ...............-KKhpp.sl.pKLGl.....psls.Glpc..VshhK.sc..shlhhhsp.P.cV..............tS.s..usTahl.hGcuchc................... 0 231 389 577 +3466 PF00175 NAD_binding_1 oxidored_fad; NAD_binding; Oxidoreductase NAD-binding domain Sonnhammer ELL anon Prosite Domain Xanthine dehydrogenases, that also bind FAD/NAD, have essentially no similarity. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.61 0.72 -3.49 72 19747 2012-10-02 19:13:12 2003-04-07 12:59:11 16 239 4712 191 6047 15837 2129 105.10 20 24.44 CHANGED hlu.GGoGluPhhshlpphhtt..p...........tspshl.haGs+spc.cll..hc-Elcphttph.t....hhh.h..........ppssss.tst+GaVpctl.c..ch.............ttth.lahCG.P.sMhcsspp ..............................hlu.uGsGlsPhhu.hl...pp...h..h..t.....p....................................................ptsp...h.h...l...h..aus...R..........stp....-.......h..h.........a....t.c....-.l.p...p......h..t.pph................h...h.hhh.............................spsp..s.....s..............................p.....G...h...l.s....p.....t..l....h...p..phht.................................t.s..s.p.l.a.l.CG.sssMhcsst.t...................................................................................... 0 1691 3423 4899 +3467 PF03446 NAD_binding_2 NAD binding domain of 6-phosphogluconate dehydrogenase Griffiths-Jones SR anon Prosite Domain The NAD binding domain of 6-phosphogluconate dehydrogenase adopts a Rossmann fold. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.76 0.71 -4.40 163 12732 2012-10-10 17:06:42 2003-04-07 12:59:11 10 80 4225 91 3550 30059 15093 161.10 28 46.30 CHANGED htpIGhIGLGlMGpshAhNl...h.ctGasVslas.Ro..t.....spscphhtp...u..........hhsstohp.-hspsh......chllhhVtsussVcsll.s........lh..stl........ptG..sllIDsussphpsspchscp.lpppG..ltalsusVSGGptGAtpGs.olMsG..Gspp.uacplpP.llpshus.........sls.......h.Gs ..........................................................................plGhl...G.L.G.h.M.......G....t...s....h....A..t..Nl.................h...c....t....G........a......s.......V......s....l.....a..s.....p.s......t...........p...t...s...c.....p...h...h..t.p....u.......................................st..s.s..t....o...s.p....-...h..s..p.sh.........................chl...l...h.........M......l..........s.........s..........u..........s...........s..........l.......c.........s..........l........l.......s.......................l..h.....s.t..h..................................................p..t...G.......s....l..l......l...D.......t...u.......s.....s.s........p..s.......s.p.c.h.s....p.......t.......l....p.........t......p..G.............h.p...............a.....l...-.........u....s...........V....S..G........G..p...t..........G...........A...t.p......G.........s.............olM..s...G..........G...s....c...p......s....a..c..p..l...p.P...lh.p.s.h.us...........slh.h.G.t........................................................................................................................................... 0 986 2066 2904 +3468 PF03447 NAD_binding_3 Homoserine dehydrogenase, NAD binding domain Griffiths-Jones SR anon Pfam-B_459 (release 2.1) Domain This domain adopts a Rossmann NAD binding fold. The C-terminal domain of homoserine dehydrogenase contributes a single helix to this structural domain, which is not included in the Pfam model. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.33 0.71 -10.36 0.71 -3.70 30 6031 2012-10-10 17:06:42 2003-04-07 12:59:11 11 30 4281 27 1498 6276 2360 124.60 26 23.63 CHANGED GhGslGsslhchLpcstt........slclsulsscc...hptt..........htshhsstshsssl-clls........c.sDllVEs.uu.pslcphshphLcpGhcllssshuAlu...shhtcLhphAcpsssclhh. .........................................................................GhGsV..Gu.u..l.l..c...l..l...pcppptltt...............phshplp.lss..l.ss..pc......h.ptp........................................ht.t.t.h..s..s..t..t...h....s....p..s...h..p.....p.llp...............................p.hsl.lVE......l...u......u......s......p.....s.....s.....t.....s.....h..h..h.c..sL.p...p..GhHV.....V..T.A.NKs..hhA.........sahppLtph.A.cpss.hphhap................................. 0 464 940 1256 +3469 PF01210 NAD_Gly3P_dh_N NAD_Gly3P_dh; NAD-dependent glycerol-3-phosphate dehydrogenase N-terminus Finn RD, Bateman A, Moxon SJ anon Prosite Family NAD-dependent glycerol-3-phosphate dehydrogenase (GPDH) catalyses the interconversion of dihydroxyacetone phosphate and L-glycerol-3-phosphate. This family represents the N-terminal NAD-binding domain [2]. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.72 0.71 -4.50 44 5998 2012-10-10 17:06:42 2003-04-07 12:59:11 18 26 4763 27 1531 8856 4915 150.50 28 44.49 CHANGED pluVlGuGuWGTALAtlLucsu........ppVplWspcs.........phhcplspp+p.NscYLPs.lhlPs.slpsssDLtcAlpsu-hlllsVPopshcplhpplpshl.....ppsshllt........ssKGl...Et.........sohphlScllpEhlstp....luVLSGPshAcEVAtth.osssluu...pstphu ....................................................................lsVl.G.u.Gu.a......GT.A.l.A.h..hl..ucsG............ppVh.lW.s..p..p............................................chh.p.p..l......p......p.......p.......+......p.......N..................p..........a.......L..........s.....s.......h............t.......h.......s........p.......s..........l...........p.....s.....s......s........D....l....t.........p...A.....l.............p......s......u...............-......h.....l......l.h.s..l..........P..o...........p...........s........h..c...p.........s..h..p.p....l..t..s..hl..............pss...s...h...l..l.t...........................ss..K..Gl................E.............................soh.ph...h...s..c.l...l...p..-.......l.stp.................luV.lSG.....P..saA.pE.lutt...oshsluu.ps.p..h....................................................................................................... 0 513 967 1299 +3470 PF01513 NAD_kinase DUF15; ATP-NAD kinase Bateman A, Wood V anon Pfam-B_797 (release 4.0) Family Members of this family include ATP-NAD kinases EC:2.7.1.23, which catalyses the phosphorylation of NAD to NADP utilising ATP and other nucleoside triphosphates as well as inorganic polyphosphate as a source of phosphorus. Also includes NADH kinases EC:2.7.1.86. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.62 0.70 -5.63 33 6066 2012-10-02 15:20:27 2003-04-07 12:59:11 16 26 4752 59 1890 6136 3391 258.10 24 81.92 CHANGED plullssss....................ptpstthspcltchLhsp........shhhh.ttthtt..........................................................phhhhshphhcpss-hllslGGDGThlsssphhsp....slPllGlss.GshsFhsph..p.cshtphlsphlp.schplpct.....hl-...........................shlppspp...............hh..thslN-hslhtutssphhp..hclhlssshhsshts-Gl.............................lluTPsGSTuashuuGGsllpPsltsh.l....s.....hhhttcslVlssppplph.......ppsshlshDGppplpht.sshlplphu.pt.hhhlch ........................................................................................................................................h.......................................................................................................................................................................................................................s..p...p..s...D.lllslGGDGThLpAs.+t..h..tp...............s..lP..l....l.G.l....s........t......G.....p...L...G..F.....L....s.-.h...............psp..p.h....p..ph.l....p.....p.l..........h..........p...s..............p......h......p...l......p....p..+.......hLc.............................................................................................................................................................sp.l...p.ppsp.....................................................................hh.ph..h.A.l..N...E.l....sl....pp..u.s..t.sp.hhp...........h-l....hl...c.s.....p....h..h....p...s....h..puDGl....................................llSTPTG...STAY..shS...u.G..G...sl.....l.p.P.s.lp.u.l.hlsPlss.........+s.ht.sR...Pl...V.....ls.s.p.splplph...............tppshh....ls...hD.u..p...t..h..t..h..p.s...ppl..lphst...hphh................................................................................................................................................................................................................... 0 612 1178 1585 +3471 PF02540 NAD_synthase NAD synthase Mian N, Bateman A anon COGs Domain NAD synthase (EC:6.3.5.1) is involved in the de novo synthesis of NAD and is induced by stress factors such as heat shock and glucose limitation. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.63 0.70 -5.47 11 9645 2012-10-02 18:00:56 2003-04-07 12:59:11 12 26 4887 87 2593 10500 6866 175.90 22 39.30 CHANGED llpclpsFl+phVpcsGs.pGVVlGLSGGlDSulVAhLshcAl.........Gp-psluLlMP.s..ssccDhpcAhslscpLuhphpplsIcshhpuasp....thppst....pchscGNlKARlRMshLYsaAsphshLVlGTuN+oEhhlGYFTKaGDGusDlsPIusLhKopVacLA+tls.....lPccllcKsPoAsL...asGQTDEcELGlsYc.LDplLp....hhp.t...p-..tthslstphscpltphlpKoEHKRc.P ........................................................................p.......hlt.p..h.h....p....p.....s....Gt.....c....p....l..l.......L.G..l..SGGlD...S..o....l...s....u....t....L....s...p....c.A..l.............................................u..p...p...p.....l.h....s..l.....h...l............s...........s..............h..................t..........p..........p............p..............p..............h..p..t......s......h......t.......h..h....p.t...h.....t.....h......p.....h.....h.....h......l.....s....h.....p........h..h...........t............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 860 1651 2190 +3472 PF00146 NADHdh NADH dehydrogenase Sonnhammer ELL anon Prosite Family \N 20.00 15.00 20.00 15.00 19.90 14.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -11.82 0.70 -5.37 23 21511 2009-01-15 18:05:59 2003-04-07 12:59:11 16 18 13672 0 1222 18971 2989 236.20 39 95.74 CHANGED hhllshLhlIlsllluVAFLoLhERKlLuhhQhRKGPNhVGshGLLQPlADGlKLhhKEslhPusushhlFhluPslulhLullhWsslPhshshl..shNlGlLFhLAlSSLuVYulLhuGWuSNSKYuhLGuLRAsAQoISYEVoLuLILLol.llhsGSashsslspsQp.....hhWhlh.phPlhlhaaISsLAETNRsPFDLsEuE.SELVuGaNVEYuussFALFFLuEYuNllhMshLs.slLFLGu.h.............................s.hhhhhKshhlhhlFlWlRuohPRaRYDQLMpLsWKsaLPLoLuhllhhsulhl ...............................................................................h...lh.h..h.l....l....L...l.u....l....A....a...h....o..l......h...E.......RK.lLuah.QhR+GP........N...........h..........V...........G.......................h..........G......l......L......QPhuDulKLh.hK.E......l.h.Ptt....u.s........h....l..a.h.hs.Phl.u...l..h..l........u..lh.h..a.h..........l.......P.........h....................s..............h................s............h..............h........................................................s...........h......s....l...u..l.L..........a..........h..l........s..h.o.Sl..u..VY.sl......l..h........uG.Wu.S..N...SKYuh....lGul..Ru..sA.Q..sISY.E..ls.h.s.l.h.l.l...s........h......l.h..h......s........u.............s..........h...s...h.......p..h..........s.Qt..................................h.W......h......h......h...................h...s....h...h...h..h..a.....h....h..ss...l....A.......Es....N.R...tP.FDh...s...E...u..........E...pE....L..s...s.G..a......h...EY...u.uh.Fuh.h....h.h.u....E.Y.h.....t.h.h....hh.shh.....s.h.ha..h..s.s..........................................................................................h.hhhc.h.....h...h.....h.h...h.h...h.c.shsRh...Rhs...h.....hh..W.h....h......................h.................................................................. 1 394 766 1015 +3473 PF02477 Nairo_nucleo Nairo_nucleocap; Nucleocapsid N protein Mian N, Bateman A anon Pfam-B_2892 (release 5.4) Family The nucleoprotein of the ssRNA negative-strand Nairovirus is an internal part of the virus particle. 25.00 25.00 41.20 28.40 17.60 17.10 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.50 0.70 -5.79 3 472 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 16 1 3 283 0 180.00 74 96.38 CHANGED MENKIcANNK-EFNcWFKpFuEKppLsssaTNSASFC-cVPsL-pacaKMALATDDsEKDSIYSSALVEATRFCAPIYECAWsSSTGlVKKGLEWFDKN..uDTIKlWDAsYh-LKsElPcsEQLluYQQAALKWRKDVGFcINpaTuuLoHsVlAEYKVPGEllhslKEMLSDMIRRRNlILNGGGD-APKRGPVSREHV-WCREFAuGKFlsAFNPPWGDINKuGKSGIPLlATGLAKLAELEGKcVhD-AKpoltsLcGWV--NKDpVDcuKA--LlpslpKHlAKAlELuKpSNALRAQGAQIDTsFSSYYWsWKAGVTPETFPTVSQFLFELGKsPRGsKKMpKALlSTPLKWGK+LIELFADDDFppNRIYMHPAVLTuGRMSEMGlCFGsIPVASPDDAAQGSGHTKuILNaKTcTEVsNPCAsTIVpLFEIQKsGa ..............................................................h.hpMA.ATDDupKDSIYASALVEATKFCAPIYECAWsSSTGIVKKGLEWFEKN....uGTIK...SWDEsYsELKV-VPKIEQLANYQQAALKWRKDIGFRV.NANTAALSNKVLAEYKVPGEIVMSVKEMLSDMIRRRN.lLN+sG.--ss............................................................................................................................................................................................................................................................................... 0 3 3 3 +3474 PF04131 NanE nanE; Putative N-acetylmannosamine-6-phosphate epimerase Mifsud W anon COG3010 Family This family represents a putative ManNAc-6-P-to-GlcNAc-6P epimerase in the N-acetylmannosamine (ManNAc) utilisation pathway found mainly in pathogenic bacteria. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.22 0.71 -5.33 7 2374 2012-10-03 05:58:16 2003-04-07 12:59:11 9 5 1884 8 180 1739 546 188.00 47 81.67 CHANGED hstMAhAut.uGAsGlRhpGVpslpthpshVslPIIGIlKRDhPssslhITshhp-lDpLAssGs-lIAhDuTsRsRP..lsl-shlcpIKcph..pLhMADCSohEEGlhspphGh-hlGoTLSGYTssp.s...s-PDapLl+sLspAGhh..VhAEGRhsTPE.Apcsh-hGssuVsVGuAlTR.ccIsphFspAlpp ...................................................................................................hutMAhAAppuGAVuI.......Rh........p...ul...p...s.lps.l.+p.h.V..s.lPIIGIlK......R.D.h.s...s....s..s.V..a.....IT.s.s...h.c....-VDtLsp.sGs.-lIAlDu..Tt.RpRP.......hslp.phlp...pI...+.c.+h..........hL.h..M......A.D.s.S.o....h...E..-....u..l....s....A...tc...h...G.hDh....l.........G...T..T..L......S..G.....Y.....T..s..sss.........ppP..D.....h.....p....L.....l....+...p....L....s....c....s......s..s......VIAEG.+hpTPppAtc.s.h.c.h.GAauV..VV..G.u..A..I.T...R...Pc...c..IsphFssAlc....................................................... 0 49 100 147 +3475 PF04660 Nanovirus_coat Nanovirus coat protein Waterfield DI, Finn RD anon Pfam-B_4486 (release 7.5) Family Family of conserved Nanoviral coat proteins [1]. 25.00 25.00 143.60 143.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.99 0.71 -4.56 3 85 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 11 0 0 65 0 166.00 74 99.27 CHANGED suRWshpGhK+RRosRRKYGRhAYKP..PoS+VVSHluSlLsK--VVGs.EIKP.DuDluRYpMhKVMLlsTL+MsPGELVNYlIVKSSSPIANWSouFosPuLhVKESsQDhVoIVuuGKLESuGsAG.oDVTKSFRKFl+LGuGISQTQHLYLlhYoSsAlKIsLEsRlYI-V ....huRas+puIKKRRVGRRKYGS.KA.....ATSHDYSSLGSILVPENTVKVFRIEPTDKTLPRYFIWKMFMLLVCKVKPGRILHWAMIKSSWEINQPTTCLEAPGLFIKPEHSHLVKLVCSGELEAGVATGTSDVECLLRKTTVLRKNVTEVDYLYLAFYCSSGVSINYQNRIhhcV. 0 0 0 0 +3476 PF00956 NAP NAP_family; Nucleosome assembly protein (NAP) Finn RD, Bateman A anon Pfam-B_1009 (release 3.0) Family NAP proteins are involved in moving histones into the nucleus, nucleosome assembly and chromatin fluidity. They affect the transcription of many genes. 26.90 26.90 27.30 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.60 0.70 -5.25 97 1619 2009-01-15 18:05:59 2003-04-07 12:59:11 13 20 399 31 836 1514 9 214.90 28 67.14 CHANGED +..p+l.psLcplQtchppl-tcapcchhcL.Ep+YtphhpPlap+...RpcIlsG.h...tsp....................................................................................s.lP.sFWLssh.cNpshlu..phIs.-cDcphL.caLpDlchphhcs...............uFpl.FpFss...NsaFpNp..lLoKpYhhps.....p........................pspuspIcW+.pu+slThphhp+....................KpcpKtpt..th+.lpct..tc...........SFFNFFs......s.......p...........................t.t..ptpchp..t.ht.DaclGphl+-cllPcAlpaasGcs ................................................................................................................................tlttLctlQ..chsplptph.cchhp..........l.EpKatph.hp.Ph...ap+...Rp.pllps...........................................................................................................................................................................................................................................................................................................................................IP.sF.W.hss.......h..tN..ps.........lu...............thlp...-pDEth.L..caLpc.lclp..hct..........................uap.l.pFhFpt......N.sY.Fp.Np.............lL...s..K.pah..hpp.....psp............................................................tspuspIcW+..pGcs.lsh.ph.pp......................................................tp.ppptt................p..............................................SF.Fs.aFs........................................................................................................t..t..p.t.................s.cluphl+-clhPpsl.aah............................................................................................... 0 233 366 559 +3477 PF03892 NapB Nitrate reductase cytochrome c-type subunit (NapB) Bateman A anon COG3043 Family The napB gene encodes a dihaem cytochrome c, the small subunit of a heterodimeric periplasmic nitrate reductase [1]. 21.10 21.10 21.20 21.20 20.80 20.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.10 0.71 -4.48 75 1044 2012-10-01 23:37:15 2003-04-07 12:59:11 9 3 988 11 158 429 23 131.50 46 86.09 CHANGED hhshsssussssssp.......................ltsLRsss.lssp.ssssshpph.....spcstphpRsYspQPPlIPHsl-u...YplshssN+CLoCHuhppspcstAshlSsTHahD..R-GphL.uslSPRRYFCsQCHVPQsDu.pPLVpNsFpsh ..............................hhs.hsshstssts.................................................t.s....sst.-..shh+h.......P+Epp+hshsYVNQPPhIPHSl-G...YQ.VTpNsN+CL.pCHuh.....-....s....h+s...o.GAs+ISsTHF.h.D....p..................DGK..Vs...upVuPRRYFCLQCHVPQuDs.tPlVsNoFpP.s.................... 0 28 80 125 +3478 PF03927 NapD NapD protein Bateman A anon COG3062 Family Uncharacterized protein involved in formation of periplasmic nitrate reductase. 28.00 28.00 28.20 28.20 26.40 27.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.32 0.72 -4.15 59 1029 2012-10-02 00:20:33 2003-04-07 12:59:11 8 1 992 2 164 424 22 78.70 39 82.85 CHANGED pch.HIsSLlVpspPcclspVpssltslsssEIauhssp.GKlVVllEusspttlhcplspIpslsGVLssuLVYHp...h-st ......p.phplsSLVVpA.Ks.-.+.l.sslpspLsshPus.Eltss....-s..Gp.....LIV.VlEu-sp....-sllpoIEu.lcslcGVLuluLVYHQQ-p............. 0 34 85 130 +3479 PF03059 NAS Nicotianamine synthase protein Mifsud W anon Pfam-B_2173 (release 6.4) Family Nicotianamine synthase EC:2.5.1.43 catalyses the trimerisation of S-adenosylmethionine to yield one molecule of nicotianamine. Nicotianamine has an important role in plant iron uptake mechanisms. Plants adopt two strategies (termed I and II) of iron acquisition. Strategy I is adopted by all higher plants except graminaceous plants, which adopt strategy II [1,2]. In strategy I plants, the role of nicotianamine is not fully determined: possible roles include the formation of more stable complexes with ferrous than with ferric ion, which might serve as a sensor of the physiological status of iron within a plant, or which might be involved in the transport of iron [1]. In strategy II (graminaceous) plants, nicotianamine is the key intermediate (and nicotianamine synthase the key enzyme) in the synthesis of the mugineic family (the only known family in plants) of phytosiderophores. Phytosiderophores are iron chelators whose secretion by the roots is greatly increased in instances of iron deficiency [2]. The 3D structures of five example NAS from Methanothermobacter thermautotrophicus reveal the monomer to consist of a five-helical bundle N-terminal domain on top of a classic Rossmann fold C-terminal domain. The N-terminal domain is unique to the NAS family, whereas the C-terminal domain is homologous to the class I family of SAM-dependent methyltransferases. An active site is created at the interface of the two domains, at the rim of a large cavity that corresponds to the nucleotide binding site such as is found in other proteins adopting a Rossmann fold [3]. 23.80 23.80 23.90 23.80 23.70 23.50 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.86 0.70 -5.24 13 220 2012-10-10 17:06:42 2003-04-07 12:59:11 11 2 133 12 98 214 3 240.50 29 85.46 CHANGED spN...ssslVcKIssLYstIS+LsSLs....PSscVssLFTcLVssClPssP.lDVoK..Ls..clQchRspLI+LCupAEGhLEuHaSshLuua-.NPLsHLshFPYasNYl+Lo+LEaslLupasst.sPs.+lAFIGSGPLPLTSlVLA.....opHLs.....................sTsFcNaDhsusANspAppLls.uDssL.usRMsFHTuDlsclss-LssYDVVFLAALVGMspE-KsKllsHLu++MAsGAsLllR.SAHGA.....RuFLYPllDPp.Dlp..GF-VLulaHPs.D-VlNSVIlARKhh.ss ...............................................................................................tt........hltplhth.ttlt..pL.s.t......p...p......pth...Ft...pL...hs.....p....ch.p.....Lt...phpthhtplhplps.ApuhLEtahu.p....l...s...uh..p......s........P....p..p.......l.p...F...sYassY.pLsphEhphhsh..thst..........st..+VhFlGSGPLPhoullLs.....tph.hs....................................sshh.shDh-ssA....tphAp.pLlp...s...s......L........p.pphp.Fhsss...h.t.p............h..s..p..s.lt.taDhlhlA.uLVu.......pKtplltpLtphhs.......s.....uu..hLlhR...supGh.....Ru.hL.Y..s...lc.t...pht...saphhs.h.P...........h.so.hh........s...................................... 0 27 56 80 +3480 PF04159 NB NB glycoprotein Bateman A anon Pfam-B_1501 (release 7.3) Family The NB glycoprotein is found in Influenza type B virus. Its function is unknown. 24.40 24.40 24.70 80.70 21.60 24.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.43 0.72 -4.27 24 1614 2009-09-10 20:28:57 2003-04-07 12:59:11 8 2 1536 0 0 543 0 95.00 93 99.71 CHANGED MNNATFNYTNVNPISHIRGSVIITICVSFTVILTVFGYIAKIFTNKNNCTNNsIGL+ERIKCSGCEPFCNKRDDISSPRTGVDIPSFILPGLNLSESTPN ....ATFNYTNVNPISHIRGSlIITICVSFIVILTIFGYIAKIhTNRNNCTNNAIGLCKRIKCSGCEPFCNKRGDTS.SPRTGVDIPuFILPGLNLSESTPN............. 0 0 0 0 +3481 PF00931 NB-ARC NB-ARC domain Bateman A anon [1] Domain \N 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.75 0.70 -5.55 12 16257 2012-10-05 12:31:08 2003-04-07 12:59:11 17 1033 906 35 4750 16655 176 210.80 21 35.74 CHANGED hcs.l-plhp+Lhp.p...pphtllulhGMGGlGKTTLAppla.pc.p.lpppF.DhhhWlsVScphst....hclhcs..lhpcLt.ss..h..........................spcs.sp.httclpchLppK..RaLllLDDVWcc...pa-plt.shsstp.ts+lllTTRspsVstthss..sphhplp.hLp.-cuWpLFppc.....shcpp..........tpsplE-lu+plspcCpGLPLulpslGuhhtpKp.pl.pEWcchhcp.hppphht..p......splhs.hlthSYcsLst.pLKpCFlhhu.......hFPcshphttcplhchWhspshl.sps. .....................................................................................................................................................................................................u..G..l..GK.TT.......l....A...........p.....t....l........h....p.......p.........................h.......p.........p.....p.........F........p.....h.....h.....h......a.....l............s.....s.....s....p....p..h.sh.................hpl...pp.......l...h...p...p..l.....t..h.p.......................................................................................t...s.....pp.....hh....ph....l....p......p.....h.....L.......p.......p....+...........+.............h...L......l....l....L....D............D.........l...............p.p...................ph.........p...........t...............l...........t......t..................h.....................................h........s..............................G...........o............+.........l...l..l...T......T..................R.........p.........p.........p.........l..............h......t...........h.........h........t......................................t........t..............h.......h.....p....l........p............L................s............p....p...........c.....u....h....p....L.....Fp..pp.......................................s.h..tpp.................................p.h............p.....l...u.......p.......p...l...s..p..h.......s...t...G.......l..P.L.Al...................h....s.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 726 2813 3845 +3482 PF04485 NblA nblA; Phycobilisome degradation protein nblA Kerrison ND anon DOMO:DM04316; Family In the cyanobacterium Synechococcus PCC 7942 (Swiss:P35087) , nblA triggers degradation of light-harvesting phycobiliproteins in response to deprivation nutrients including nitrogen, phosphorus and sulphur. The mechanism of nblA function is not known, but it has been hypothesised that nblA may act by disrupting phycobilisome structure, activating a protease or tagging phycobiliproteins for proteolysis. Members of this family have also been identified in the chloroplasts of some red algae. 25.00 25.00 27.10 27.00 23.20 20.70 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.41 0.72 -4.10 14 86 2009-09-10 15:48:25 2003-04-07 12:59:11 7 1 55 22 26 74 5 52.30 38 84.84 CHANGED s.sLoLEQcFpLphappplpplshEQspchLl-hh+QhMl+-Nll+tllKpuh ....pLSLEQpFpLpshpppVpphSpEQAp-aLlchhcQhMl+-Nhh+pLlKpp..... 0 3 18 26 +3483 PF03801 Ndc80_HEC HEC/Ndc80p family Bateman A anon [1] Family Members of this family are components of the mitotic spindle. It has been shown that Ndc80/HEC from yeast is part of a complex called the Ndc80p complex [1]. This complex is thought to bind to the microtubules of the spindle. 20.50 20.50 20.70 20.50 19.80 19.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.88 0.71 -4.80 37 358 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 283 5 242 368 4 153.10 29 25.72 CHANGED stpc+oshht........sRsShs....shtspps............+DPRPL....+D+saQsphhpplhcaLsppsa....spslo.+sLpsPopK.-FhtlFpaLYpp.lDPsaca...tp.ph-pEl.plLKpLpYPa..sIoKSplsAlGusp.WPphLuhLpWLhpLsphhsphhsp.......Dpshtspth ............................................................................s..................R.S.h.......s...t.tp...................pDsRPl.........pD+saptphhpplhcaLspp.sa............ttsl.o.....p..sl.ps.....PotK.-F.hlFpaLapp...l-.Psaph.......................pp.ph.--E.ls..lLK.pLpYPa..sloKSpltAsGusptWPphLuhLpWLhclsp.hhpthhppp.............ss.t...................... 0 82 134 203 +3484 PF00334 NDK Nucleoside diphosphate kinase Finn RD anon Prosite Domain \N 21.60 21.60 21.80 21.80 21.40 21.50 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.50 0.71 -4.37 171 5820 2009-09-13 01:46:53 2003-04-07 12:59:11 14 37 4319 479 1940 4004 2188 131.60 43 80.92 CHANGED E..RThsllKPDuVpRs...LlGcIlsRh.Ecc.Gh+lluhKh.hplocchAcp..aY.....scHps.+P....FFssLlpahoS.....G.P....llshVlEG....c.sulptsRplh....GsT.sPtc.A...tPGTIR...u.DFuhsh........sc...NslHGSDSsE..oAp+EIshaFs...tpEl .....................................................................E+ThslIKPD..u.V..p..+.s.............l.l.G.c.I.l.s.Rh.Epp..Ghcll.u.h..K..............h.h..............p.l..o.c-.p...Ac...p...aY................................u-.....H....ps...+P................F..Fs..s..L.l..c.F.MoS.................G..P.......l.l.s..h.............Vl...EG..............cs..Al.pp...hRplh.......G....uT..sPtc.A......................tsG.....TI..R...u..Dauhsh................sc........NslHG..S.DS..E..o..Ap+EIshaFst............................................................................. 0 667 1125 1540 +3485 PF05031 NEAT Iron Transport-associated domain Yeats C anon [1] Domain This domain is involved in the transport of iron, possibly as a siderophore. 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.50 0.71 -4.11 53 2305 2009-01-15 18:05:59 2003-04-07 12:59:11 7 72 393 47 143 1431 4 121.10 25 39.58 CHANGED ssspltcGpYslsaplhKcss....s-tShhssYhpcPuplplcsGKphlplTlss..o.....sahpshplp....sGp...........hhcscll...Scsp........tps.s+s.lcFc.Vsslspclssphplhl.....h...sYctpaplphthDsss .....................s..pt.tssphslsapl.hKs...ps.....spp.S.hh..s.sah.p.cPuplhh.p.s..GK.hh.lp.h.slps...u......................sahpcaplp...tsst.............................thss.cll........ScDp...................pps...s+s..lpFt..Vs....s......hsppls.shlcl.l.....sp...h....sYcapYslph.hp................................................... 0 46 94 107 +3486 PF00880 Nebulin Nebulin_repeat; Nebulin repeat Bateman A anon Pfam-B_1603 (release 3.0) Repeat \N 20.10 20.10 20.20 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.86 0.73 -7.23 0.73 -4.11 143 8841 2009-01-15 18:05:59 2003-04-07 12:59:11 13 120 100 0 4092 7213 2 28.70 29 38.71 CHANGED DsPphhpA+pssphhS-hpYKcsaccp+s .....DsPchhpA+pstchhS....-..hpYKcsa-ct+........ 0 255 486 1390 +3487 PF04299 FMN_bind_2 DUF449; Neg_reg; Putative FMN-binding domain Kerrison ND, Bateman A anon COG2808 Family In Bacillus subtilis, family member Swiss:P21341 (PAI 2/ORF-2) was found to be essential for growth [1]. The SUPERFAMILY database finds that this domain is related to FMN-binding domains, suggesting this protein is also FMN-binding. 20.70 20.70 20.70 21.20 20.50 20.30 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.14 0.71 -4.76 129 884 2012-10-02 11:35:36 2003-04-07 12:59:11 7 4 757 2 337 872 133 165.30 34 77.90 CHANGED MYlPstFp..ts-hstltphlcppshuhLl..............o.t....ssss...l.AoHlPhlLc...tt..........sspshLh..uHlARuNPph...ppl................ptss.p..lLllFpGP.cuYlSPsWY.....ps.tcsVPTWNYtuVHsaGphplhp...Dsc......hltphlscLospaEssh.........tpPWp...hs-sssc..alcthl+uIVGhclpIscl ............................MYlPttFt...sc.ptltph.lpppshusLl.....................o.p...ssss.......Ao.HlP..hhLs.....t.........................tsspshLpuHlARsNPpappl................................ptsp..c....VLllFpG....s....cuY...ISPsWY...............ps.....tc.....t.....VP.TWNY.huVHsaGp.hplhp.Dpp............hltshlscLoppaEssh......................tsPWphs-ssts..alcthh+uIVGhcIplscl............................. 1 89 193 279 +3488 PF00960 Neocarzinostat Neocarzinostatin family Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 27.70 27.20 22.60 23.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.04 0.72 -10.55 0.72 -4.13 4 25 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 19 25 6 38 1 109.60 34 72.85 CHANGED usoVoPAoGLSDGpoVoVSuoGhssGTsYpluQCAhVssGhhACNsAshoshTsDAsGpuosSlsVR+SFpGhlhs.GT.hGoVDCsTs..uCplGlGssuG-hup.sAIoFu ...........sloVoPuoGLoDGpoVoVouoGhssG.oshtluQC..As..ls..s..GthACssAsssssT.sDAsGss.osolsVR+SFsu.shs.Gsth.uoVDCs.Ts..uCtlusus...susssut.ssloF..................................... 0 4 6 6 +3489 PF03391 Nepo_coat Nepovirus coat protein, central domain Mifsud W anon Pfam-B_3589 (release 6.6) Domain The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure [1]. 20.30 20.30 20.50 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.72 0.71 -4.99 11 339 2012-10-04 01:49:40 2003-04-07 12:59:11 10 7 28 141 0 391 1 168.10 69 25.28 CHANGED PslsaPhps.tchphhhlhhPPhphsluussuhposshhhupshhsusppsYsasssLlSaaLGhGGol+GcV+psuosFhossLhVs.pWtGsosshppLhphPtshlct.DGphplcIpSPaacTss.hh-ututh.lss......luGPlAPpspsuphtahlpIcpIst..s ........................................PTLVFDPGV.FsGKFQFLTCPPIFFDLTAVTAL+SAGLTLG.Q..VPMVG.TTKVYNLNSTLVSCVLGMGGTIRG+VHICAPIFYSIVLW..VVSEW.N..GT.T..MD.WNELFKYPGVYVE...E...DGSFEVKIRSPYHRTPA..R...LLAGQSQRDMSSLN.FYAIAGPIAPSGETARLPIVVQIDEIVR..P................................. 0 0 0 0 +3490 PF03688 Nepo_coat_C Nepovirus coat protein, C-terminal domain Mifsud W anon Pfam-B_3589 (release 6.6) Domain The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure [1]. 21.10 21.10 21.90 23.80 20.40 21.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.79 0.71 -4.61 21 412 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 28 141 0 397 0 149.10 66 26.03 CHANGED hshsphF.sWhsl...sshpsss....hphpIPuRltDls........spsssVshtsNPhuhhhussGhHtGp.lpL+hpWsh.psphuptpGslthhphhtchupshsspstl......ssh.ssuhph.lphGsFuGssssu..shshhsc..altlphspucplppLpVslclhsGFpFYGRo ..SF..EDDYF.VWVDF...SEFTLDK....EEIEIGSRFFDFT........SsTCRVSMGENPFAAMIACHGLHSGl.LDLKhQWSL.NTEFGKSSGSVTITKLVGDKAhGLDGPSQl......FAlQ+LEGss-LLlGNFAGANPNo..HhSLYSR..WMAIKLDQAKSIKlLRVLCKPRPGFSFYGRT........ 0 0 0 0 +3491 PF03689 Nepo_coat_N Nepovirus coat protein, N-terminal domain Mifsud W anon Pfam-B_3589 (release 6.6) Domain The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure [1]. 27.30 27.30 27.60 28.80 27.10 27.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.56 0.72 -4.26 21 379 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 32 141 0 386 0 87.90 68 12.85 CHANGED hsaspslplPp-sptGslLuplslhsthpshsuhsatcWlppG..hlpsplclhh+lssssFsGlolhhsaDAasRl.s.sshssshshshshsl ...LAGRGVlYIPKDCQANRYLGTLNIRDMISDFKGVQYEKWITAG..LVMPpFKIVlRLPANAFTGLTWVMSFDAYNRI.T.SRITsSADPlYTLS.V......... 0 0 0 0 +3492 PF00064 Neur neur; Neuraminidase Eddy SR anon Overington and HMM_iterative_training Family Neuraminidases cleave sialic acid residues from glycoproteins. Belong to the sialidase family - but this alignment does not generalise to the other sialidases. Structure is a 6-sheet beta propeller. 19.60 19.60 19.80 19.60 18.90 19.30 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.77 0.70 -6.40 7 29236 2012-10-02 00:45:24 2003-04-07 12:59:11 13 4 28530 235 0 16025 0 436.40 57 99.38 CHANGED MNPNQKIlsluulululsllslLlpluslhlsls.hh+ttt....pps.ssps..p..Npslptpshsp.sshshhhs....ppspahs.sculCslpGashhuKDNuIRlGpp...uplhVhREPaVSCsPsEC+pFhLoQGshlssKHSNGTl+DRosaRsLhSs.LGpsPslhpo+FcsluWSuouCHDG+tWhsIsloGsDssAsAslhYsth.T-sIpSWu+sILRTQESpCVClsGsChhVMTDGsAsspAph+IhhhccG+Il+pp.lousupHlEECSC.YsppspVpCVCRDNWpGuNRPllpls.p.hsapstYlCoGlhsDTPRspDsshossCs.Ns.ssps....GVKGFuacpGss....sWhGRTISpsSRSGaEhlhl.sGhopssS.p.hp+QslVsspsWSGYSGuFh.hst.st.pChsPCFaVEhIRG+Pc.EppshWTSsS.lshCGsssphssWSWsDGAplsa ....................................MNPNQKII.TIGSlshsIuhlslhlQIu.llohhs...H.hQht....spsp......p..p..s........s........s...h....p...p..sh...s...p.......s...Y..ls...ss..hph.hs....hhu..hh.shsps.CsloGaA.aSKDNSIRlGup...GDlaVhREPalSCush-C+pFhLoQGshLNsKHSNGTl+DRoPYRTLhSs.lGpsPsshNo+hcslAWSuSuCHDGhuWLpIsloGPDssAsAslhYNGhlTDoIpSWpps.ILRTQESECsClNGoChsVMTDGsu.sGpAs.h.K..Ih+I.EcGKIVKospL.s.usshHhEECSC.YPc.tsplpCVCRDNW+GSNRPh.VshN.s.hphphuYlCSGlhGDsPRPNDuouSsssh.sPssppG..u.GVKGauF+.GNs....VWhGRThSpsSRsGaEhhhs.sGW.os.ssS.s.sh+QsIVshssWSGYSGuF..asp.st.sCIpPCFaVELIRGRP.c.E..pslWTSsS...lsFCGssushsshSWPDGA-lsh............... 1 0 0 0 +3493 PF02932 Neur_chan_memb Neurotransmitter-gated ion-channel transmembrane region Bateman A, Sonnhammer ELL anon Prosite Family This family includes the four transmembrane helices that form the ion channel. 23.10 23.10 23.20 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.56 0.70 -4.48 50 6256 2009-12-16 13:54:54 2003-04-07 12:59:11 11 81 288 162 3294 5277 71 169.90 20 43.13 CHANGED lllPClLIohLohLsFaLPsDuG.pKsoLuIosLLohTsFhlllscplP.poShslPLlspYllhhhhlssh.lhhsllllNsphRsP.psHphsphl+phhl.phlPphh.....hhppsstshssspstptttstsh.h.................................................................................................................................................................sststtstsssssss.hphssplcpulpus...............paIupp....................................................................................h+pcsp..p.tpsWhhsutslDRlthhlFslshllsolsha .................................................................................................................................................hhlPshhlsh..l..o....hl.s........Fa....l........s............c..u...s.................t+h.s...L...............u...............lo..s.l......Loho.s.....h...hh........hp..p.tlP......t..s......S......h..s...........h........s....l...........................h..h...............h.h..........h..........h..h..hh...l..hs......h.......l..................h.s..h.l.s.....h.............h....p..h...p....t.............t........h............h..............t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 922 1195 2303 +3494 PF02158 Neuregulin Neuregulin family Mian N, Bateman A anon IPR002154 Family \N 20.30 20.30 24.60 22.80 19.40 19.00 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.50 0.70 -5.69 2 267 2009-09-11 01:37:10 2003-04-07 12:59:11 10 8 42 4 77 244 0 313.30 50 58.48 CHANGED AEELYQKRVLTITGICIsLLVVG.MCVV.AYCKTKKQRpKLpDRLRQSLRpcppNlsNhsNtPHpP.NPPP.cNVQLVNQYVSKNVISSEHllEREsETSFSTSHYTSTsHHSTTVTQTPSHSWSNGhoEShIS.cS.SVIhhSSVENSRHoSPsG.PRGRLpGlGG..c.sSaLRHAR-TPDSYRDSPHSERYVSAMTTPARMSPV-F+TP.SPKSPs.EhSPP.SShsVShPSVAVSPFlEEERPLLLVoPPRLREK+YD+........pQhNSaHHNPuHpSoSLPPsPLRIVEDEEYETTQEYEss.EPsKKlsNSRRtKRTKPNGHIuNRLEhDSsoSS.SSsSESETEDERlGE-TPFLuIQNPLAASLEsAshaRhA-SRTNPsuRFSTQEELQARLSS .......................AEELYQKRVLTITGIClALLVVGIhCVV.AYCKT....KKQ.RK...phHs+LRQs..hpsc+.pN.hhslANG..P..pHP.tsPs.E..plQhs.sp.YlSKNl...uo-Hllc+EsET..o..FSsSH.sSsu.HHso.TsTpT.s.....S...........H.......o.......WS.t+oESlhS.-SpS.slhhSSVtsS+psSPss..sRuR...hs...u...hsu.......c....c....ps..hh....th+-o.DShRDSP.HS..ERYVSA...hTTPARhS.......P.V-Fchs.....sspsPs.phosP.uuh.shS.hPshs.hs.h.h.t-pp..PLl..h..................................................ts.......................................................s.S.Pso.Ph+l..--..ph-sh......pph.ss.................p..........t......p...t+sp.sG.hu.c........sp.t..u.s.s.............o.........s-t-.-t...........uEsTPFLuhps............sh........tsRT........................................................................................................................................................................ 0 2 9 27 +3495 PF03823 Neurokinin_B Neurokinin B TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 28.00 35.90 19.50 18.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.91 0.72 -4.22 3 39 2009-09-10 23:24:23 2003-04-07 12:59:11 9 2 26 0 16 39 0 54.90 63 37.49 CHANGED MRSsLLFAAILALSLApSFGAVCEEPQEQVVPGGGRSKKDSDLYQLPPSLLRRLYDSRs ......MR.hLLFsAILAhSLApSFGAVCcEsQEpssPGGG+SK.+DsDLYQLs..sl..LRRLacS+S.................. 0 1 1 4 +3496 PF00243 NGF Nerve growth factor family Finn RD anon Prosite Domain \N 21.30 21.30 21.30 21.70 21.00 20.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.57 0.71 -4.36 7 3765 2012-10-02 16:54:34 2003-04-07 12:59:11 13 4 2066 24 107 2860 0 90.40 67 44.04 CHANGED psht+RGEhSVCDSlSlWVT..-KpoAsDl+G+pVTVLs-Vshssus.hKQYFFET+C+ss.......sssssGCRGIDp+HWNSpCpsopoaVRALTh-spp.VuWRaIRIDTACVCsLopKoGp .............................p-..su+RGEhSVCDShSc..WVT...-Kp.TAVDhpG.tpVTVLtcV...sls.put.lKQYFYETKCpsh..........................t....s.K.pG.CR.GIDt+a.WN.S.CpTopo....aV+AL..T.-tp............................................................. 0 12 21 45 +3497 PF02979 NHase_alpha Nitrile hydratase, alpha chain Griffiths-Jones SR anon Structural domain Domain \N 27.20 27.20 27.20 27.20 25.90 27.10 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.23 0.71 -5.02 37 386 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 295 57 122 382 231 157.00 41 86.98 CHANGED sp.o-hthRspALEplLhEKGllssusl-thl-haEschGPpNGA+VVA+AWsDPsF+ttLLpDuoAAhtEl....GasGtp..G.........EahlsVENTsplHNllVCTLCSCYPWPlLGLPPsWYKossYRuRsV+-PRsVLt.EFGlsLPsssclRVWDSoAEhRYlVlPhRPsGTEGhoE-pLAsLVTRDoMIGsuls ...................p.....t.h.hAL.phL.tKGllsst.lcthhphh-pp.u.PttGAclVA+AWsDPsF+thLLtDusuAh.tp.h.....Ghts.p.G......................phhhslEsTsph+NllVCoLCSChsWslLG.LPPsWYKs.tYRuRhV+-PRtVLs.E.hGhplsscsclRVaDooA-hRYhVlP.RPsG.T-.shsp-tLttlVo+-shIGsu......................... 0 38 69 88 +3498 PF02211 NHase_beta Nitrile hydratase beta subunit Bateman A anon Pfam-B_5347 (release 5.2) Domain Nitrile hydratases EC:4.2.1.84 are unusual metalloenzymes that catalyse the hydration of nitriles to their corresponding amides. They are used as biocatalysts in acrylamide production, one of the few commercial scale bioprocesses, as well as in environmental remediation for the removal of nitriles from waste streams. Nitrile hydratases are composed of two subunits, alpha and beta, and they contain one iron atom per alpha beta unit [1]. 21.30 21.30 21.70 21.70 19.80 21.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.69 0.70 -4.64 33 440 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 230 80 156 463 350 165.30 25 93.48 CHANGED MsGsHDlGGhpGh.GPV..s.-....s.--PlF+t-WE+RAhulphAhsu..hGt.....................WslDpsRaAtEphsPs-Y...lpsoYYc+WltulppLhl-pGhlop-ELst.......uphhpsstshsp.slsA...-tVsshLtcGsPspR.sssssspFtlGDpVRs+.shsss.....uHTRLPpYlRG+sGplt.thaGsaVaPDosA.cGtGEs.PpaLYsVtFsupELWGss.ssssssVslDlWEsYL-sA ..................................................l....p........t..tts..sFpt.WEtpsaulhhshtt..hGh.....................ash....--......h......R.......h..........u....h.......E....p.....ht.....s.....t.pa.......h.t.hsYYc+WltulppllsccGllotpELtth...........t.....st.....................................................................p.l.h..............a.Rh..ah.th.G.l................................h.l...................................................................................... 0 31 80 108 +3499 PF01292 Ni_hydr_CYTB Prokaryotic cytochrome b561 Finn RD, Bateman A anon Prosite Family This family includes cytochrome b561 and related proteins, in addition to the nickel-dependent hydrogenases b-type cytochrome subunit. Cytochrome b561 is a secretory vesicle-specific electron transport protein. It is an integral membrane protein, that binds two heme groups non-covalently. This is a prokaryotic family. Members of the 'eukaryotic cytochrome b561' family can be found in Pfam: PF03188. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.42 0.71 -4.89 84 1155 2012-10-03 10:28:09 2003-04-07 12:59:11 15 11 975 0 237 5046 1484 182.50 26 73.89 CHANGED ta..shhhR...........hhHWhhAlhhlhhhh...oGhhhhhh..hhhhh...................................................................hph....Hhh..hGhshhslh.lh+lhht.....................................hptphPthhttsshtph...................hhhhlhhhhhhhsloGhhh.............................................stshtphhthlH..hhuhhlh.shlhlH.lhtslhpphh.t...slppMhsG .............................................................................h.ashhsRlhHWssAlhhllLlh....oG.lh...h....s.h.h...s.h.s.h.st.h....................................................................hsl..........Hth...hGahLluhh..hhhll.thh..............................t.hhth..s.h.......t...t...h.t...p..h...h....h.h...h......h...t..l..h..p.....s..c..........t.....s...h....s....u...t..s.sph.....................hhh.l.h.as.L.l.sllllTGllhhh......................s.....h....................................hs.uh..t..h..h..hth.....H..hhlAhlhl..halhsH.l.h..hs.h.hs..ph.t...p.......hhcuMlsG........................................... 0 56 139 181 +3500 PF04097 Nic96 NIC; Nup93/Nic96 Wood V, Finn RD anon Pfam-B_5541 (release 7.3); Family Nup93/Nic96 is a component of the nuclear pore complex. It is required for the correct assembly of the nuclear pore complex [1]. In Saccharomyces cerevisiae, Nic96 has been shown to be involved in the distribution and cellular concentration of the GTPase Gsp1 [3]. The structure of Nic96 has revealed a mostly alpha helical structure [4]. 19.10 19.10 19.70 19.50 18.90 18.70 hmmbuild -o /dev/null HMM SEED 613 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -13.15 0.70 -6.39 40 382 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 274 4 256 382 2 536.10 25 67.14 CHANGED s.plh-tapths..............................sshcscpls-hWphlpphh.....................shpusshptpllpsu+paLEppahpalpshlt+s.pcAthtGhPsshs+lpualch+ltpts......sLt...l...supPlWsllaYhLRsGhhppAhchl....tpthpphcpsFhsahpsaspstsppLsschps+lptEasppl+ss.t............DPYKhAlYplI.....G+CDl....spp...shs.plspolEDaLWl+Lsll+pss.t.sps.......h-pasLs-lQshlh.paGpppFss......stps.hYaplLlLoG.FEtAlpaLhc.....hspsDAVHhAIuLs.huLLpsust.p.........pplLshssps......lNauRLlspYs+.pFchsDsptAlpYhhhlshhps...............ppth.shhpcslp-L.....lL-oR..-FshLLG.clpp.cGp+..hsGll-c.........lhthps.cp.hpplsppsApc..s-ccGphp-AlhLYpLup..............paDpVlsllN+hLupslspsst......................................stospsslhthApplhchYpsssth..p..lpspscpT.hplLlplhphhshappspappALptlppLs.llP.............ss.sp..lcctuppFps..lssplh+slPslLlhsMssltphhpplpssthss.tp................ppplspL+ppA+slh..haAuhlpY+hP .......................................................................................................t...hhthhtth...............................tthp.ppp..hschWhhhpth.......................................shpshphphphlptuppaLEppahpahpshlhts.ppAthGG......hPsshp.......hl+ualpl+h.t.t..........tht....s....................pspslWsllaYhlRsG.hptAhphl...........pth..pp.....tpF..hshhppa.........hp...........s..................sppl...s..php..pp.lp.capptl+pss.........................DPa+hAlYpll.........GpCDh........sppp....tls.pshEDal....Whp...L....s.lp.ssts.p.s....................p.hsLtplQp.lh.ph.Gt..phFst.........ttp.hhah.lLhLsu.FEtAltaLhp.....h..scAVHhAlsLt.htLLthsst.p.................tthlshpstp.........................lN..hspllh.Ysp.pF.c..ssspt...AlpYhhhlp..ts...............................pt..phhhpslp-l.....llc..o+...pashllG..chp...cGp+.........h.Ghl-p......hts...t.hpplh.psAp...spppGh..hp-AlhLYpLAt.................................................ph.-pslplhsphL.upslst..t......................................s.sttpp.h.hAtphhphaptps...h........................hpt..pps...hhlLlplhphhs.hpst..p....h.tpA..l...c.lppLp.llP.............hs.tp....lcptsttaps..hsstltpslspllhhshphl.p.hp.phttst.ts..t.................pt.hppl+p.upslh...auu.l.a+h............................................................................................. 0 88 139 216 +3501 PF03824 NicO High-affinity nickel-transport protein Griffiths-Jones SR, Kerrison ND, Finn RD, Eberhardt R anon Manual Family High affinity nickel transporters involved in the incorporation of nickel into H2-uptake hydrogenase [2] and urease [3] enzymes. Essential for the expression of catalytically active hydrogenase and urease. Ion uptake is dependent on proton motive force. HoxN in Alcaligenes eutrophus is thought to be an integral membrane protein with seven transmembrane helices [4]. The family also includes a cobalt transporter. 25.00 25.00 25.00 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.72 0.70 -5.09 20 2471 2012-10-02 18:22:22 2003-04-07 12:59:11 11 9 1720 0 612 1814 250 244.40 22 78.85 CHANGED hhuhshLuashGlhHAh-.ssHhtAl...lpshhtsscssltsGhhhohupSslhslhulhlshushhltp.....hsthpphsuslth.lsusFlllluhhshhlhpslhchatt..t....stt.tsthpthhtt.....................................t.hs+hht.......h+.h.tpphHhhssG...u.G..susphthhu.sst...th.shshhshhsasslFuuGhp.ssuu.sllhhuhhshsahs..hhthhasholsGsulsluhhsshlhshphlAp+hshpss...hstlstls...hphlGhhllhlhulshlsuhlh ............................................................h.hhhhhhuahh....G....lhHAl.s.s.sHhtsl................ht.phh......h...........s..t...........c.....s.s.l...t..........uhh....hu....lutohhh..shhs.lhl..sh..sshhltp.................thp.thsu.t.h.hht...l......s.shll...llu.h..s..h.h.h.h.......ps.hh.p..hth.htt...............t.t...h..p.th.h.................................................................................................t.h.........................t.....h...p...c..ph+..h...h......h...u...............t...........p....s.sph.th...h.s.s.t........th............sh.s...h.....h....h.h...h....hhhssGh..h.sssh.sllh.h.sh...h...h......shsh....hht...hhhshsl.hs...hh.l..shhhshh...h.p........hs.t..+.h..s.h.ts....hhp..hhshhs.............hhGhhhlhhhh.hshhh....h................................................................................................... 0 143 336 484 +3502 PF00374 NiFeSe_Hases Nickel-dependent hydrogenase Finn RD anon Prosite Domain \N 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.84 0.70 -5.73 12 4820 2009-09-12 07:59:36 2003-04-07 12:59:11 14 9 1571 65 1336 3444 238 268.90 27 63.43 CHANGED RGLEhILKGRDPRDA.tFspRICGVCThsHALASVRAV-sAlGIclPpNAphIRNLhhtuhhlHDHlVHFYHLpALDWVslssALpADPttsucLspslS.shsh.oss.h..csVQs+lKcFVESGQLG.FtNuYas..p.AYhLPPEssLhAsuHYLEAL-hQ+chschhuIFGGKNPHs.shlVGGss.sh.slDs..........cRL.s.hpuhhcclp-FlcpVYlPDllsluuhYKc..hhhtGGhs.pNhLu....aG-aPps.....hstsohhhPsGslhsushscVhsl....D.t..c...EaVpHSWYc.ss-.huhHPa-GhT-Ppashhs.................D-s.+YSWlKAPRa+G+shEVGPLAphlluhup....uc......pthcc.l-hhltths......lssstlaST..............LGRTsARulEsthssp.ht.hhccLhsNlpsGc.sss.sccW-.sphPppu+GVGhs-APRGuLuHWl+IKDGKI-NYQsVVPoTWNuuPRsspGplGshEtuLlsT.hscPcpPlEILRTlHSFDPClACusH ......................................................................................................................................................................................................................................................................................................RGhEh.lhpsR.....s.h.s.s.h.hh.spRlCGlCshsHshu.shuhE.....s.....A..h.....s......l...p......l.....P.......p......A.phlRslh...t.s.hhpsHhl................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 448 910 1161 +3503 PF04891 NifQ NifQ Kerrison ND anon Pfam-B_6173 (release 7.6) Family NifQ is involved in early stages of the biosynthesis of the iron-molybdenum cofactor (FeMo-co) [1], which is an integral part of the active site of dinitrogenase [2]. The conserved C-terminal cysteine residues may be involved in metal binding [1]. 19.40 19.40 19.50 19.50 19.10 18.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.37 0.71 -4.09 39 166 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 159 0 76 167 16 160.20 35 80.39 CHANGED cshAsl..Luhuhtptt.tt.usls.hh.GLsssshssLls+aFP........s..thtsh.....sshsss.spE.spLpsLLhsc.....pusssstuthlAtllAptshtssHLWpDLGLtsRsELStLlt+aFPsLAutNspsM+WKKFFYRpLC.-p-GhhhC.puPsCstCsDastCFG .....................................................h....thLuthhttht.t.stssl.s..t.h.GLsssphttLlsph.FP............................t..sthh..h..........sth.ssstspEhtpLtsLLhtp..............tuss..sststhlAtllActshtssHLWpDLGLtsRs-LotLht+aFPsLAspNsp.sM+WKKFhY+plC.cpcGhhlCpuPsCstCsDashCFG.............. 1 20 43 57 +3504 PF01106 NifU NifU; NifU-like; NifU-like domain Finn RD, Bateman A anon Pfam-B_1206 (release 3.0) Family This is an alignment of the carboxy-terminal domain. This is the only common region between the NifU protein from nitrogen-fixing bacteria and rhodobacterial species. The biochemical function of NifU is unknown [1]. 20.90 20.90 21.00 21.60 20.20 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.17 0.72 -4.20 20 3818 2012-10-01 19:25:19 2003-04-07 12:59:11 12 19 2999 6 1182 2450 2182 69.10 37 38.36 CHANGED lpcVl-c.lRPhLhtDGGDlELlcV-Gs....pVpVcLpGACuuCspSshTLpsulEp+Lp-pls.pslcVlsl ..................lctlL-p..lRPhLt.t.DGGc..l..pl..h..c..l..p..cs.......hV.h.l.ph.t.Gu.Cs.GCsu.ushT..L..KpGIEptLhpplP..p..lh.tV......................... 0 376 769 1014 +3505 PF01592 NifU_N NifU-like N terminal domain Bateman A, Wood V anon Pfam-B_772 (release 4.1) Family This domain is found in NifU in combination with Pfam:PF01106. This domain is found on isolated in several bacterial species such as Swiss:O53156. The nif genes are responsible for nitrogen fixation. However this domain is found in bacteria that do not fix nitrogen, so it may have a broader significance in the cell than nitrogen fixation. These proteins appear to be scaffold proteins for iron-sulfur clusters [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.44 0.71 -4.19 16 4872 2012-10-01 20:52:23 2003-04-07 12:59:11 11 21 4284 25 1274 2971 2268 125.30 35 78.48 CHANGED tYo-KVhDHahNPRNsGsl-csss.......hsGpVGu.uCGDsh+LplKVctpsshItDA+FpTFGCGSAIASS.ShhTEhlpG+sl--A.hploNp-IActLut......LPs.KhHCSshup-AlcAAltsYcu+phps ......................................................YpphlhDHacN.P......+N..h..G.ph.-ssst........................suh..h.s..s..P..sCG..D.s.hcLpl.+....l.........s..c..............p.............s......h.....I.....pD......s..+Fc..s.hGCu.uhASo.ShhT-hV..c..GK.o....l-EA....t............l........p............p...s...p.l...h...pp.l.t........................ls....s...s..c....h.c...s...s.hLu-s..s.lh.sulsca.t+ht.s................................................................ 0 441 833 1074 +3506 PF03206 NifW Nitrogen fixation protein NifW Bateman A, Mifsud W anon Pfam-B_2891 (release 6.5) Family Nitrogenase is a complex metalloenzyme composed of two proteins designated the Fe-protein and the MoFe-protein. Apart from these two proteins, a number of accessory proteins are essential for the maturation and assembly of nitrogenase. Even though experimental evidence suggests that these accessory proteins are required for nitrogenase activity, the exact roles played by many of these proteins in the functions of nitrogenase are unclear [1].\ Using yeast two-hybrid screening it has been shown that NifW can interact with itself as well as NifZ [1]. 20.60 20.60 20.80 20.70 20.40 19.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.43 0.72 -3.93 39 208 2009-09-11 15:38:48 2003-04-07 12:59:11 9 1 203 0 87 189 12 100.60 30 92.76 CHANGED Mo.......hlcph.....ccLouAE-FFpahslsYDscVVsVsRLHIL++FspYlspt...sh..ssh.s-sthhsph+shLp+AYpDFlsS.oPhpc+lFKVap-tstp..........hVsLsslps ..................hhpcl.....ppLooAE-FhpaFslsYDscllsVsRLHILKRFspYLttt......c......th.spppthtthRphLppAYpc.FhpS.sshpp+lFKVhppt.............hVslstl..t............... 0 19 52 70 +3507 PF04319 NifZ NifZ domain Bateman A anon Pfam-B_6057 (release 7.3) Domain This short protein is found in the nif (nitrogen fixation) operon. Its function is unknown but is probably involved in nitrogen fixation or regulating some component of this process. This 75 residue region is presumed to be a domain. It is found in isolation in some members and in the amino terminal half of the longer NifZ proteins. 20.30 20.30 20.50 21.10 19.80 18.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.47 0.72 -4.60 41 240 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 176 0 121 231 7 74.10 42 68.32 CHANGED t.PtFphGp+V+sp+slRNDGTaPG..pchG-lLVc+GshGYVhsIGoaLQphh.IYuVcFl-pGhlVGCRccELpss ....t..PtFphGp+V+up+sl+ND.....G..TaPG..pphG-lLl.++G-hGaVhslGoa..L..Qp.h..IYsVcF...h-....p......uh...l......V....Gh+ccEL.s.h............... 0 31 76 98 +3509 PF01077 NIR_SIR Nitrite and sulphite reductase 4Fe-4S domain Finn RD, Bateman A anon Pfam-B_1092 (release 3.0) Family Sulphite and nitrite reductases are vital in the biosynthetic assimilation of sulphur and nitrogen, respectfully. They are also both important for the dissimilation of oxidised anions for energy transduction. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null --hand HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.87 0.71 -4.87 91 15116 2009-01-15 18:05:59 2003-04-07 12:59:11 17 120 3409 86 2437 13876 917 143.60 20 41.54 CHANGED susslRsspsssssshsptthhDs.........................ptlstplp......................tphpththPtKFKlulsG.........ssssCststsp.Dlullustcsth.................................................................................GaslhlGGthutp.ttsthhh......sah.t-.....p.lhphlctllphap........cps+t+p...Rlt.hlcch..Gh-cFtppl..tp.hsp.h ..................................................................htthps.hss..G...s..h.Cchs.shDs..........................................pshsttlt...........................t..h......s..p.h..p..p..h..t..h..P..t......K...h.+..h.....sluu..........Cs.N.s.C.ssst.sp.Dlu..l....lG....h....t...c...p....s.............................................................................................................................shplh.luut.h.s.......................................p...........................h....hh...t....h...h...................................................................................................................................................................................................................................................................................... 0 719 1561 2066 +3510 PF03460 NIR_SIR_ferr Nitrite/Sulfite reductase ferredoxin-like half domain Bateman A, Dlakic M anon Bateman A Repeat Sulfite and Nitrite reductases are key to both biosynthetic assimilation of sulfur and nitrogen and dissimilation of oxidised anions for energy transduction [1]. Two copies of this repeat are found in Nitrite and Sulfite reductases and form a single structural domain. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.90 0.72 -4.35 174 13756 2009-01-15 18:05:59 2003-04-07 12:59:11 12 109 3391 98 2671 12133 911 66.90 22 18.80 CHANGED htpps.t.h..hlplpls.s.....Gpl.ssp....phptluclu.....c..cau.t..upl.+lTspQs.l.ltslptpplsslhptLtph ..........................th......t....atlplphs..u.........Gpl.osc..................tlptls-lA.................c....cas..t......Ghh.+h.Tspps.l.hh..sspppplttlhptLt..s..................... 0 772 1683 2259 +3511 PF02665 Nitrate_red_gam Nitrate reductase gamma subunit Bashton M, Bateman A anon COG2181 Family This family is the gamma subunit of the nitrate reductase enzyme, the gamma subunit is a b-type cytochrome that receives electrons from the quinone pool [1].\ It then transfers these via the iron-sulfur clusters of the beta subunit to the molybdenum cofactor found in the alpha subunit [1]. The nitrate reductase enzyme, EC:1.7.99.4 catalyses the conversion of nitrite to nitrate via the reduction of an acceptor. The nitrate reductase enzyme is composed of three subunits [1]. Nitrate is the most widely used alternative electron acceptor after oxygen [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.44 0.70 -5.00 66 2225 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 1577 10 386 1081 183 214.50 41 82.12 CHANGED ls.phLaslaPYlslslhllGohhRacp-QaoWpocSSQlLcp+.tL+hGSsLFHhGILslhhGHlhGLLhPpshhcslGlopthhph.hAhhsGuhsGlhsllGhslLlhRRlsssRlRtsoohsDhhlLllLhlplhlGLhohhhuttph.......-hshhhplusWhpulhshpPs..uphlsslshhaKlHlhhGhslFslaPFTRLVHlaS..sPltYLtRs.Y.lhRpRp ...............................................................................................N.FhasIaP..YlshslFllGshhR............Y...cYsQaoWput...S....S.....QhL..-.++.thh.lu.S..sL.F..HlG.......I.L....s..l...FhG.H..h..h......G.hL...sP.....c...h...h...h...p....s.h........l....s............p.....s...h.....p....h.......h.A..h....hhG..u......h..s......G..l..ls.L....lG....h.s.hLlhRR.l.h..s..t..R....V...............R.s....s..o....o....ssD..ll..lhhlL..llps..hLGLhohshospph............Dhs...h....hp.lss...W.......h.p.S..lh...o...F.p.s....s......uph.h.s.sV.s.h.l.F.+l....HllLGhT.lFhlaPFTRLVH...l...aS...sPl..p..YlsRp..Y.lhRpR.............................. 1 127 252 327 +3512 PF02087 Nitrophorin Nitrophorin Mian N, Bateman A anon IPR002351 Domain \N 20.90 20.90 21.10 21.00 20.70 20.80 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.31 0.71 -4.49 3 15 2012-10-03 08:47:39 2003-04-07 12:59:11 10 1 3 66 0 101 0 166.40 42 85.60 CHANGED CSTNISPKpGLDKAKYFSG.sWYVTHYLDKDPQ.VTDpYCSSFTP+ESuGTVKEALYHYNuKKKTSFYNIGEGKLESuGlQYTAKYKTVDKK+sVlc-ADsKNSYTLTVLEADDSSALVHICLREGsKDLGDLYTVLoHQKDuEPSAKVKuAVTQAGLKLSDFVuTKDLuCpYDD.QFTSL .........................CopNlpsKpshDKsKYFoG.sWYVTH.Y.L..D.p-P..p..V.T.D.cYCuuF.s.s.+.p...u...sG.p...V..K...E.ALY.HY.Nsc.scs...o.F.Ys...l...u..E..u..p..l....pS....sG.....KY.T..A..K..ap...pVDKct..c.clc.cs.s...t..s.pYT..h..T..lh-sD.D..S.p.AlV..HhC...h....p...cus...c...s....l..hsLY..s....VLsRs..c..s..sp..s.s.s.KVKsAl.s.psuLKLscFhssKs....sCpYDs.phh............................................................................. 0 0 0 0 +3513 PF00881 Nitroreductase Nitroreductase family Bateman A anon Pfam-B_481 (release 3.0) Domain The nitroreductase family comprises a group of FMN- or FAD-dependent and NAD(P)H-dependent enzymes able to metabolize nitrosubstituted compounds. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.94 0.71 -4.42 141 16517 2012-10-02 14:48:17 2003-04-07 12:59:11 19 110 4534 206 3919 12487 3614 164.20 18 68.07 CHANGED lppR+Sh..R.pFpsps......lsp-p...lppllpsA.................ph......uPoutsh....psh....chhll.p.......stchp.......pcltphhht.h..................................................................thtpthhtsAshhlhh..........................................ssptttthshh...ssuhsspshhL..sApshGlusshhusht.......ttt............lpchl.sls............tplh..hhlsl..Gh ...............................................................................................................................................................................pR+Sh..+..p....a.p.sp.......lsp-.p...l.p.p.l.l.p.su............................................ph............uP.S.u.t.sh............Qsa.............+hlll...p..............stct+...................................pclt.ph..h......t...............................................................................................................................................................................................thttthh.t.p.A....s..h....hl..hl..........................................................................h....h.s.t...p....p.......h.......h.s.h.h.......ss.uhsst..s..h....hl..uA.p.s.h..G..lus...shhsuhp...................................tpt..........lpc...hl....s....ls.......pp........hp.h...hhlsl..Gh........................................................................... 0 1261 2568 3341 +3514 PF05211 NLBH Neuraminyllactose-binding hemagglutinin precursor (NLBH) Moxon SJ anon Pfam-B_6567 (release 7.7) Family This family is comprised of several flagellar sheath adhesin proteins also called neuraminyllactose-binding hemagglutinin precursor (NLBH) or N-acetylneuraminyllactose-binding fibrillar hemagglutinin receptor-binding subunits. NLBH is found exclusively in Helicobacter which are gut colonising bacteria and bind to sialic acid rich macromolecules present on the gastric epithelium [1]. 24.40 24.40 24.50 24.60 24.00 24.30 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.56 0.70 -5.14 3 170 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 54 3 13 172 0 241.00 39 94.79 CHANGED KKshLAluLuSLLlGCAhasulEsthPsctppppsspohE....osEhsh.LcFNYPl+ucQsPpNcHlVlLLcP+IQlSDNIsKsYpcKFccuLhLQlpoILE++GYpVlpF..pDcc-lshspKKKuaLVLcMcGpVuILEDlKhslc-ss........uLsc-clVDhSSGaVplNFlEPcSs-llHSFulDVSclcAlpcplcoT+ouSGGF.lsKThVHc.K-T.N+-DAI+KILN+hYAsVMp+hsKELTK+NlE+YcKsucEMKs+K ..................................................................................pKs.luhsluulLlusu.p...hht................................t.sh.hphsY.hpucp..s.tss+h..llLLcP.plQhSD.N.IsKsYpsKFcsuLhlQlppILc++GYp..V..lph...pDcs-lshsp..K+cuaLsLchsGplsl.LpDhKhs..l.pcss..........uhsctcsV.hSuGalplsFlEPpSs-slcSFslDlSclchhpc.....hlchp+o.suuGh....ls......s.......oh.....V.........Kts....NpsDAI+plLN+hYsplMpcls+cLTpKNl-pYcKssc-hKsp+............. 0 3 7 13 +3515 PF04170 NlpE NlpE N-terminal domain Mifsud W, Bateman A anon COG3015 Domain This family represents a bacterial outer membrane lipoprotein that is necessary for signalling by the Cpx pathway [1]. This pathway responds to cell envelope disturbances and increases the expression of periplasmic protein folding and degradation factors. While the molecular function of the NlpE protein is unknown, it may be involved in detecting bacterial adhesion to abiotic surfaces. In Escherichia coli and Salmonella typhi, NlpE is also known to confer copper tolerance in copper-sensitive strains of Escherichia coli, and may be involved in copper efflux and delivery of copper to copper-dependent enzymes [2]. 21.40 21.40 21.40 21.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.10 0.72 -3.59 17 993 2012-10-03 08:47:39 2003-04-07 12:59:11 7 4 912 6 119 541 17 87.20 46 43.00 CHANGED YpGlLPCADCsGI-TsLhLppDGTahLpppY.s+csts.phsuphsWscsuspls.L.sstu-..tspahss-ptL.MLDppGphlpGpL .acGlLPC.......A.......D....CcGI-...ToLhL.......c.......c.....D......G.......T..alhsEcY.LG......s.....+.-............c..P...s..o.F.suh.GT....W........s...c........s........u.......c.p......l.l..Ls.c..u...p..G.-....cs.....YY.+..s......p..s.s..s.LpML..Dp.-.Gpslpup..................................................... 0 27 68 98 +3516 PF04973 NMN_transporter Nicotinamide mononucleotide transporter Bateman A anon COG3201 Family Members of this family are integral membrane proteins that are involved in transport of nicotinamide mononucleotide [1,2]. 23.00 23.00 23.00 23.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.33 0.71 -4.82 105 2756 2009-09-11 15:40:44 2003-04-07 12:59:11 7 3 2145 0 399 1643 528 184.10 26 82.74 CHANGED shlEhlAslhGlhslhLsh+pslhsa.hGllushlYshlhapspLau-hhLplaYhh.hs...lYGWahWp.ptp...ptp......pth.lpphshpphhhhlhhhhlsshhh.uhlhp...pas.........push......Pal..Duhsoshullupahhu++hlEsWlhWlllDllulsLaht+Gl...............hhouhlaslahlhulhGah..pWp+th .......................................hlphluslhGllslh.hsuct.+h.hsahhG....llsss....l.ahhlh........a........p........h.........p.........L.Yuphlhplaahh.hs.....laGaa..tWp..ptp......................tpp................tplph..+h.h...s.hpp.h.......hh...h...ls..l.s...ll.shhlh...shhhs...h.s............................ssh....................Pah....Duhhh.slSllA.hLh.s++.a.h...EpWlhWllhsllu.lslah.hpuh...............hshsh.Yhlahh.ulhGhh...Whp.t.................. 0 104 221 319 +3517 PF01233 NMT Myristoyl-CoA:protein N-myristoyltransferase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of NMT are structurally similar, each adopting an acyl-CoA N-acyltransferase-like fold. 25.00 25.00 25.60 25.90 22.60 24.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.00 0.71 -4.62 6 493 2012-10-02 22:59:21 2003-04-07 12:59:11 14 9 326 58 293 473 38 152.50 55 33.97 CHANGED -GPIDK.hs.-cVppEPhsLsuGFEWsolDlsDctQLcElYsLLs-NYVEDcDAMFRFsYStEFLpWAL+sPGW+.-WHlGVRVppopKLVAFIuAlPssl+Vp..sKslssVEINFLCVHKKLRSKRLAPVLIKEITRRVNlssIaQAlYTAGllLPsPVoTC .........................................shc..plcpEPasL.Pp.uFpWsolDls..s..pp.LcElYsLLs-NYVED..DDsMFRFsYS.-FLpW...........ALp..PPGWh.pWHsGVRl...........p.....p.....o.pK.LVuFISAIPssl+lh.....................cp......h......h.............+.hsEINFLCVHKK.LRSKRlAP.VLI+.....EITRRV...NLpuIaQAlYTAGlVLPpPVuTC................... 0 93 151 228 +3518 PF02799 NMT_C Myristoyl-CoA:protein N-myristoyltransferase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of NMT are structurally similar, each adopting an acyl-CoA N-acyltransferase-like fold. 20.60 20.60 23.80 20.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.23 0.71 -4.92 39 510 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 331 51 298 486 34 186.00 47 42.34 CHANGED EVGFStLsss.T.....hs+tl+hYpLPcpspTpG.LR.McpcDlspVpcLLpcYLp+F.cLs.hF.ocEElpHWhLs........pcsVlhoYVVEsss..t+ITDFhSFYsLPSTVlpss.pHcsLpAAY.FY.soss.................ppLppLhpDALIlAKp.hsFDVFNALslhDNphFLccLKFG.GDGpLpYYLaNa+sss.hss.............oplGlVL ...............................................-VtFSpLstshT.....hpR.ph+hY+LP..c.................s...s.............p..TsG..LR.Mc.+DlstVpcLLppYL.p.p.F.cLsPsh.scEElpH...WhlP.......................p-pll.oaVVE..............ss..............s....tclTDFhSFYoLPSolhpps...pHpsL+AAY.aY.sssp.............................................................................ssLhpLhsDAL.IlAKp.psFDVFNALslM-Np..pF.LccLKFG.hGDG.p.LpYYLYNa+sss.hts............................pplGlVh....................................... 0 95 157 236 +3519 PF02070 NMU Neuromedin U Mian N, Bateman A anon IPR001942 Family \N 18.40 18.40 18.90 22.80 18.10 16.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.68 0.72 -6.91 0.72 -4.42 8 56 2009-09-11 14:39:29 2003-04-07 12:59:11 10 1 29 0 18 49 0 23.70 66 18.09 CHANGED aKlDEEaQGPhuuQSRGYFLFRPRN .....hcEEhQuPhssQSRGYFLFRPRN 1 1 2 5 +3520 PF03980 Nnf1 Nnf1 Finn RD, Wood V, Mistry J anon Wood V Family NNF1 is an essential yeast gene that is necessary for chromosome segregation. It is associated with the spindle poles [1] and forms part of a kinetochore subcomplex called MIND [2]. 22.40 22.40 22.50 22.70 22.10 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.15 0.72 -4.06 26 221 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 191 0 149 223 3 104.50 26 47.01 CHANGED spsLcssppQhsphhpppspcEFspIhcERslpp+LsELDcllpcAccRpct.t.............................sshhcsLsPpcllcupLhsthppphspLspplpplpt-NppLtpp..lpphccp .....................t.hpplhcphhppLppshpcEhpcIhcEpslpttLNcLDcllpEA+p.Rcp..t.....................................................................ss.hpsh.sPcphlpuplh....shh....hpttspLptplpphptpNtpLtpplttt+p................................... 0 35 71 115 +3521 PF02898 NO_synthase Nitric oxide synthase, oxygenase domain Griffiths-Jones SR anon Structural domain Domain \N 21.00 21.00 21.90 21.70 20.30 19.50 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.17 0.70 -5.79 8 836 2009-01-15 18:05:59 2003-04-07 12:59:11 10 23 543 489 206 694 26 326.40 50 49.70 CHANGED sssRssE-LLspA+DFlsQYYoSI+RtpScAHhuRLpEVpKEl-sTGTYcLTpoELlFGAKpAWRNAPRCIGRIQWuKLQVFDARcsoTApsMFEtlCNHIKYATN+GNlRSAITIFPQRTDGKHDFRlWNsQLIpYAGYKpsDGollGDPAsVEFTElCIcLGWKP+pGpFDVLPLVLpAsGcDPEaF-.lPPELVLEVPlcHPcYcWFpELGL+WYALPAVSNMLL-sGGLEFoAsPFNGWYMGTEIGsRNhCDspRYNlLEcVAp+MGLDT+sssSLWKD+ALVEINlAVLHSFQpsNVTIVDHHoAoESFMKHh-NEh+hRGGCPADWlWIVPPMSGSlTPVFHQEMlNYhLpPuFhYQ.s-sWKTHhWK .........................................................Lh.pAptFIpphYpp.....h+h.....t.p.hppRlc....-lphEIcpTGTYph...Ttp.ELlaGAKhAWRNus...RCIGRl.WspLpVhDAR.......cssstpthhptl..spHIphATN.pGpl.+..ssIT.IF.s..+t..sG.t..s..h+I.aNsQLIRYAGYc.p.....G.hh.GDPAsh-hTcls..c.LGWp.s...p...t..s.pFDVLPLlhQh.s.s.c.s.Pcha-.lPscLlhEVPIcHP....c..a.....h.hpcLs....LKWYAlPhlSNMhL-IGGlpa...suuPFNGWYMsTEIGsRN.......hsDstRYN...lLE....cVActhsL.....DTp.+.s..sS...................L..........WK.......D+ALVElNhAVLHSFp............pp.sVoIVDHHoAucpF.pa.cNEtp.ttt.s..ss..ucWsWllPP...lS.solTslaH..p.t.h..s.N.hh..hpPsFhYp......c..p........................... 0 54 88 134 +3522 PF02474 NodA Nodulation protein A (NodA) Mian N, Bateman A anon Pfam-B_2183 (release 5.4) Family Rhizobia nodulation (nod) genes control the biosynthesis of Nod factors required for infection and nodulation of their legume hosts. Nodulation protein A (NodA) is a N-acetyltransferase involved in production of Nod factors that stimulate mitosis in various plant protoplasts. 33.80 33.80 38.20 34.30 27.00 23.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.39 0.71 -5.02 30 1084 2012-10-02 22:59:21 2003-04-07 12:59:11 10 1 856 0 23 753 0 170.20 67 98.03 CHANGED MRSpV+W+LCWENELpLuDHlELu-FFRKTYGPTGAFNAKPFEGuRSWAGARPElRsIuYDu+GVAAHhGlLRRFIKVGplDLLVAELGLYGVRPDLEGLGIoH..ShRVMYPVLQcLGVPFuFGTVRpALcpHlpRhs.RpGLATIloGlRVRSTLsDVahDLPP.TRl.EDlLVlVhPIGRshSEWPsGTlIDRNGPEL .......................................-H.ELu-FFRKoYGPTGsFNAKPFEGuRSWAGARPElRAIuYDucG.lAAHhGlLRRFIK..VGp......sDLLVAELGLYuVRPDLEGLGIuH..S..l.R.sMYPlLp-LGVPFuFGTVRpALcpHlpRhs.R.pGlATllsGlRVRSThs-la.DLP.P.TRl.ED.l.LV.l.VhPlu.RshoEWPsGshI-RNGs.............. 0 2 9 12 +3523 PF01798 Nop Putative snoRNA binding domain Bashton M, Bateman A anon Pfam-B_1362 (release 4.2) Family This family consists of various Pre RNA processing ribonucleoproteins. The function of the aligned region is unknown however it may be a common RNA or snoRNA or Nop1p binding domain. Nop5p (Nop58p) Swiss:Q12499 from yeast is the protein component of a ribonucleoprotein protein required for pre-18s rRNA processing and is suggested to function with Nop1p in a snoRNA complex [1]. Nop56p Swiss:O00567 and Nop5p interact with Nop1p and are required for ribosome biogenesis [2]. Prp31p Swiss:p49704 is required for pre-mRNA splicing in S. cerevisiae [3]. 25.00 25.00 27.70 26.40 23.90 22.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.53 0.71 -4.91 16 1302 2009-01-15 18:05:59 2003-04-07 12:59:11 13 19 497 30 857 1287 98 145.10 41 29.75 CHANGED plpchscplhpLt-hRppLhcYlcs+MsplAPNLosLlGphVGARLIuHAGSLppLAKhPASTlQlLGAEKALF+uL+.....o+ssoPKaGlIapushIppussps+GKluRhLAAKsulAuRlDshucp.ss....hG..h+pclEpRlcplcpt ..................................................................ltphscp.llpLschRp....pLhpYLps+MptlAPNLosLlGp..h.....lGARLIu.+A.GSLtsLAKhPAS.T..lQlLG......AE..K....A.L....F...R...ALK........T+.s...s...TPK....aG..l.IYHush..........ls..p.u..........s....s.cpKG+..huRhLAuKsulAuRlDs....h....u..-......sss........s..............hG.ph+p.plEpRLchhp..s.......................................... 0 299 489 709 +3524 PF02451 Nodulin Nodulin Mian N, Bateman A anon Pfam-B_2163 (release 5.4) Family Nodulin is a plant protein of unknown function. It is induced during nodulation in legume roots after rhizobium infection. 25.00 25.00 168.50 32.00 19.20 19.20 hmmbuild --amino -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.91 0.71 -4.85 6 33 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 2 0 5 25 0 114.60 33 83.64 CHANGED sEAYESP+FKKFVTcCoSHVuETCSssss.pspEAl+..psshGLuaCLFDSMEp..CLs-HpAulhs.....................................psplcshsshPsolp.....................................................................................................s...........tP............................................lLIpTlpFRTVL+TCS+VSARoCLTAPNVATSsLsACLtPShNQCVYPsu....s.sssssPPI ..............h..ss+hp+hlT.souHVsppCS.......ptt.h...s...huhphhspM.t..Ch..atsth...............................................................................................................................................................h..hhphpsshthCo..oAp.ChssssVssSsL.th.hs.hppClY.................................. 0 0 5 5 +3525 PF01189 Nol1_Nop2_Fmu Nol1_Nop2_Sun; NOL1/NOP2/sun family Finn RD, Bateman A anon Prosite Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.53 0.70 -5.03 10 8954 2012-10-10 17:06:42 2003-04-07 12:59:11 12 43 4410 23 2832 7258 1507 238.20 24 57.46 CHANGED llpsssspsshslRsNspKtsRcshtpsL-ppulshpsluphsps..hshcpssulssLPtapsGashlQstSu.hsshhLsPpsc-t.............ILDhCAAPGGKToalsplhhp.pupllAsDhsppRlpplhsNlpRLGspshhlp...sDssc.tptssss......FDRlLLDAPCSusGsIc+cPcl+hhRp-pDItplspL..Q+cLLputh-hl.................KsGGhLVYuTCSltsEENpt..........sIchaLpc+scs-Lsspshscsht.ht.........hlphshphhP+ppstDGFFhAcLp ....................................................................................................................................................h..t..............t......t..............................h..............................................................................................................................................h......tt.....h...s......t...a....t.p......G......h...h...hl.Q.-tuo....h..s....s....h....h.....L....s.......s....p....s....s..pt.......................................................l.LD..h..C.......A......A......P.........GG.......K..............T............s........p.l.........s..........p...................h..............h............t...............t......p.................G..........t...........l..........h....A...........-h..s...t.p.R....l...p..h..l......t........p........s........h.....p........R......h......G.....h......p.......s...h......h.....ht............t-..u..t.......p....h............t..h..h...tt.......................................FD+...l...Ll..D.A..P.CS....G.........G....s........l.......R.......+..........c..........P............-..........h...............t.........h.......h..h...........p.............p......p.........h.......t.....p...l.sph...........Qtpl.Lptsh..phl.............................................+..s...G.G..h..llYuTCo....h..pEN..pt........................................................lp.h...h.....L....p.....p...t..........t.....h.....p.......................h...................................................................................................hp.h..P....t..........................-.uhhhs................................................................................................ 1 951 1651 2323 +3526 PF04135 Nop10p Nucleolar RNA-binding protein, Nop10p family Kerrison ND, Finn RD anon COG2260 Family Nop10p is a nucleolar protein that is specifically associated with H/ACA snoRNAs. It is essential for normal 18S rRNA production and rRNA pseudouridylation by the ribonucleoprotein particles containing H/ACA snoRNAs (H/ACA snoRNPs). Nop10p is probably necessary for the stability of these RNPs [1]. 22.40 22.40 23.40 30.30 22.10 22.30 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.75 0.72 -4.23 44 426 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 393 22 298 416 50 51.40 49 80.10 CHANGED hchhl.csstpc......lYTL+..pss.sGp.TtsuHPARFSP-DKYu+YRltlK+Rhs ..............hhahl.sssGpR......lYTLK........Kss.sGp......TpSAHPARFSPDDKYS+aRlslKKRa...... 0 95 170 244 +3527 PF05048 NosD Periplasmic copper-binding protein (NosD) Moxon SJ, Bateman A anon Pfam-B_5499 (release 7.7) Domain NosD is a periplasmic protein which is thought to insert copper into the exported reductase apoenzyme (NosZ) [1]. This region forms a parallel beta helix domain. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.36 0.70 -5.21 120 1281 2012-10-02 14:50:22 2003-04-07 12:59:11 8 160 695 0 642 3494 729 202.70 19 40.37 CHANGED hsNshsusshhh............sGlhl.ts............ssshl.....psssltpspsul...hhttupssplpsN....thpss.....thGlphhtuss...........shlpsNhhpssp...........tGlhlhsop....tt.slp.....sNplpsspt.......GI.hLptuss.......sp...lpsNpl...................ssspGlhlhsu..........ssNsls.sNplssN............shGlh...................hssS.......ps.....NplasNhF.sNs.slphssst...........sspW................ssst.................GNaW.uch.sG.........tD.tsuDG..IuD...s....sY....phs..ssh....DhlPLhtst ...............................................................................................................t.....................................................................................h..............t..h............t.sul............hh..t.s..p..p..sh.l..pss...........thpps..............phG.lp...h.h...sss..................shlps.N.h.h.ps.st................sG..lh....lhtss..............ps..plp..................sNthtssps..............Gl..h.l....p...t..s..s.s...............sh......lps.N.p.l...........................................t..ss.....s.....p.....G..l.h.lh.su..................spsplp....sN.p.hp.ss.......................t.h.Glt......................................hs.s.u..........ps..............Ntl.h.t...N.t...h...s.N.....t....h...........................................................................................................................................................................................t................................................................................................... 0 208 429 517 +3528 PF04054 Not1 CCR4-Not complex component, Not1 Wood V, Finn RD anon Pfam-B_13503 (release 7.3); Family The Ccr4-Not complex is a global regulator of transcription that affects genes positively and negatively and is thought to regulate transcription factor TFIID [1]. 22.50 22.50 32.10 23.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.29 0.70 -5.60 30 447 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 288 0 299 459 11 319.90 38 17.68 CHANGED sLLh-hs........ph.pshpspph....tphhpsFuphhptLpPhthPGFsFAWlpLISHRhFLP+lLp.hspppGWshaspLLhsLl+Fls.al+ssp.lscslpllYKGsLRllLlLhHDaPEFLsphHaphCssIPssClQLRNlILSAaP.+sh+LPDPFsssLK.l-hLs-hppuPplht..s.stsLps..tlKKslDsaL+t.tsssshlpplhst..lhhsppctt......sssphNl.......pLlNAlVLalGhpAlsphpppspssshssps..sthsllppLhpcLcsEsRYhhlsAIANQLRYPNsHTHaFspllLplFs.p.....sssppsIQEQITRVLLERlIVN+PHPWGLlITFhELlKNs.pYpFachsFl+ss.PEIcplFcsls+s .................................................................................................ht..................t.t.......phh.sFspthphLpPhthPGFsauWLpLlSHRh..FhsphLt.............h......s.p...p.p............GW.shatpLLhsLhcals...saL+...ss...........p.....lspshphlYK..GsLRl.LLlLhHDFPEFLs-aHathCssIPssClQLRNlILS..Aa.P.cs..h..+L..PDP.....F..sssL.K.l..........-hLs..Eh.p...sP.plhs..shsssl.......thKp.lDpaLps.....tssssh...l.pp..lpptl.hs.pt.....................sssphNh.................LlNALVLYlGhpAl........tphpp.................pst....s.....s..............s.hs......s.....s...................sths.lhppLh...hpLcsEuRYhhlsAlANQLRYPNuHT+aFStslLhLFu................psppptIpEQIsRVLLERLIVpR..PHPWGLllTFhELl......+Ns.tapFaph.Fl+su.P.ElpplFpslhp.s............................................. 0 116 177 255 +3529 PF00066 Notch notch; LNR domain Sonnhammer ELL, Bateman A anon Swissprot_feature_table Domain The LNR (Lin-12/Notch repeat) domain is found in three tandem copies in Notch related proteins. The structure of the domain has been determined by NMR [1] and was shown to contain three disulphide bonds and coordinate a calcium ion. Three repeats are also found in the PAPP-A peptidase [2]. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.82 0.72 -3.69 53 1456 2009-01-15 18:05:59 2003-04-07 12:59:11 12 302 120 25 783 1259 39 37.40 38 5.70 CHANGED p.hppCs..........ttCtpph.usGhCDpp..CNshsCtaDGsDCs .........................t....pCt..........ttC.tsph..uDGhCDpt..CNsttCtaDGsDCp. 0 251 317 518 +3530 PF03000 NPH3 NPH3 family Bateman A anon Pfam-B_1584 (release 6.4) Family Phototropism of Arabidopsis thaliana seedlings in response to a blue light source is initiated by nonphototropic hypocotyl 1 (NPH1), a light-activated serine-threonine protein kinase. Mutations in NPH3 disrupt early signaling occurring downstream of the NPH1 photoreceptor. The NPH3 gene encodes a NPH1-interacting protein. NPH3 is a member of a large protein family, apparently specific to higher plants, and may function as an adapter or scaffold protein to bring together the enzymatic components of a NPH1-activated phosphorelay [1]. 20.60 20.60 21.40 20.80 20.20 19.60 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.58 0.70 -4.87 51 658 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 36 0 421 661 0 228.20 33 41.97 CHANGED pD.WWhEDl.stLsl-hacR.llsuh+u+..slpsclIupuLhtYAp+hLssh.............................t.sss.sppcp+tllEoIlsLLPs.c+ss....l...ospFLhtLL+sAhhLsuut...ss+pcLE+RIGhQL-pAol-DLLIPu.........hssp..........slYDVDhVpRIlcpFlpp..................t.........................................t.tt....ts.stsshhpVuKLlDuYLAElA.sDs.NLplsKFhuLAE...slP-tAR.hcDGLYRAIDlYLKsHPtLo-sE+c+lC+lhDCpKLShEAC ...........................................................sWWh-Dl.s.Lplshap+.llhuhcsp...sh.......p.ph..lutslhhYAp+h.L.sh..................................................................t..ss....tptcp+hllEslls...lLPs..c+ss...................ssspFLhtL.L+sA.hhlpsu....ss.+tpL.E+RluhQL-pAolsDLLlPs............ssp................shaDl-hVp.Rll...ptFltt................................................................t...ts..sstsshhpV...u+LlDsYLuEl.A..D..s.sLphsKFhsLAp...slPc.AR....hcDGLYRAlDhYLK.....s....H..P..tlsctE+cclCp.lh-spKLS.-As...................................... 0 54 267 350 +3531 PF03116 NQR2_RnfD_RnfE NQR2, RnfD, RnfE family Mifsud W anon Pfam-B_2882 (release 6.5) Family This family of bacterial proteins includes a sodium-translocating NADH-ubiquinone oxidoreductase (i.e. a respiration linked sodium pump). In Vibrio cholerae, it negatively regulates the expression of virulence factors through inhibiting (by an unknown mechanism) the transcription of the transcriptional activator ToxT [1]. The family also includes proteins involved in nitrogen fixation, RnfD and RnfE. The similarity of these proteins to NADH-ubiquinone oxidoreductases was previously noted [2]. 22.90 22.90 23.30 23.60 22.80 22.80 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.05 0.70 -5.24 169 2456 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 1700 0 519 1837 989 327.80 35 93.22 CHANGED shhspsuPHl+ssh.shp+lMhhVllAhlPuhhhu.hahaGhts..................................................................LhhhlluhlsulhhEslhtplRp+pl...h.DtoslVTulLlAlslPsshPhWhlslGssFAllluKplFGGlGpNhhNPAlsGRshLhh.uaPspM.os....Ws...................s.............s........Duh....osATPLshhpss.......................h........................p..hs.......hhshFhG.h.........GulG..EsSsLulLlGGlhLlhp+lhsW+Isluhlsuhhlhuhlhthh...s.............s.ht.hsshaHLlsGGhhhGAhFhATDPVouuhTs+G+hlaGhhlGllshlIRsau.uYPEGlhaAILlhNhhsPLIDa..astspshu++ ......................................................................h.hhtsusas.+.sth.ssp+lMhhVllAhl.Puhhht..ha..a.G.hts......................................................Lh..hhl.uhlsulhhEslh.h.............pl.R.++.....pl..............h....-...tus.l.......lTulLlAlslPP.......hh..PhWhsslGssFu....llluKpl.a.....GGlGpN.hNPAh.sGhs..h.Lhh....u...aPspM...ou.......Wh....................................s................................hDGh....otATPLsthpsu................................................htt........hs........ptht.....hh.shahG...ls.......GuhG...Eh..ssL.AhLlGG.l.hLlap+.lhpW+...Issuh.l..lsh..hlhuhl.hthhu........................s.hh..hsshhH.LlsG.u.....hhlGAhFhATDPVouuh.Ts+G+hlaGhLlGlhshlIR...shu...uYP..-..G.....lhaAlLluNlhsPLIDaash.+shs+................................................ 0 174 339 445 +3532 PF01566 Nramp Natural resistance-associated macrophage protein Bashton M, Bateman A anon Pfam-B_624 (release 4.0) Family The natural resistance-associated macrophage protein (NRAMP) family consists of Nramp1, Nramp2, and yeast proteins Smf1 and Smf2. The NRAMP family is a novel family of functional related proteins defined by a conserved hydrophobic core of ten transmembrane domains [5]. This family of membrane proteins are divalent cation transporters. Nramp1 is an integral membrane protein expressed exclusively in cells of the immune system and is recruited to the membrane of a phagosome upon phagocytosis [1]. By controlling divalent cation concentrations Nramp1 may regulate the interphagosomal replication of bacteria [1]. Mutations in Nramp1 may genetically predispose an individual to susceptibility to diseases including leprosy and tuberculosis conversely this might however provide protection form rheumatoid arthritis [1]. Nramp2 is a multiple divalent cation transporter for Fe2+, Mn2+ and Zn2+ amongst others it is expressed at high levels in the intestine; and is major transferrin-independent iron uptake system in mammals [1]. The yeast proteins Smf1 and Smf2 may also transport divalent cations [3]. 29.80 29.80 29.80 29.80 29.70 29.70 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.11 0.70 -5.66 126 5115 2012-10-03 01:44:59 2003-04-07 12:59:11 13 22 3156 0 1386 3862 534 343.50 27 78.31 CHANGED hATslpAGupaGYpLLWlllhuslhshllQthuA+LGlVTGc....sLuphh+cca......s.phhshhhhhhstlushAsslAEllGs.AluLplLh..s.......lPh.........hhusllsuhsshhlLh...ptu.a+hlEtllhsLlsllslsFlhplhlspPsh......splhtGhl.Pshss..........sslhls.....lullGuTVhPaslaLHSullp.s+t..h..............................pt............l+hsph.Dsh.lul.sluhllshulllhAAusha.....tstt..tl........................sslpsAtpsLpP........lhG..hA...shlFuluLluuGluSshsushAGthlhpGhl.phchsh.h+h.hsp.hthhsshhslhhhussht............hLlhSQ...VlhulhLPhsllsLlhhsss+p.....lM.G.pasNsh ............................................hoshtuGu.paGY.t.............L.La.l.ll.lu.slhuhllQ..............hhuu+.L...Gls.T.G.t............sLuch.h.p..c....ph.............................s.p.h.h.....sh.....hh.h.....l.....hstl.u..h.....h...........Ao..s.l......u..E..................l.l.Gu....A....l...u..l....p.l..Lh...u.............l.sl......................hh.Gs....ll.Ts.ls.sh...l.l...L.hh..................p..........t..........G......h+hl.Etl...lh.sLl.h.h.....lh.....l..s..a..h..h..t..l..h...h....u.....p.....P..s...h.........s.pl.h.....p..Gh.l...Pp.h.h.............................tslhhs.........................lul.l..G.....AT.l.MP....H...s..l...a.LaSulsp.s+t.hst.....................................................................cp........ptlphsp....h..D.ss.lul....hl.u....h.h.l.N..h....slll....huAushh.......ssts....sl...........................................................................................ssh.tp..sh..ps..L....ps...............................hh....G.....hu......shlF...u.lu....LlAuGhsSol.su.........o..h......uG..p...hl......hp......G.hl......p....h......c.h...s.....h......h..............h............R....p.........h.l............T..........h....hl.....P..s....h..........l.l..h.h.h.hs.ssst................................lL.l.h.u.Q......Vlh.u...h...t.L..Ph.sl.ls...L.lhh..o.s.pcp..lMG.phhs..h............................................................................................................................ 0 404 819 1134 +3533 PF03813 Nrap Nrap protein Bateman A, Wood V anon Wood V Family Members of this family are nucleolar RNA-associated proteins (Nrap) which are highly conserved from yeast (Saccharomyces cerevisiae) to human. In the mouse, Nrap is ubiquitously expressed and is specifically localised in the nucleolus [1]. Nrap is a large nucleolar protein (of more than 1000 amino acids). Nrap appears to be associated with ribosome biogenesis by interacting with pre-rRNA primary transcript [1]. 22.90 22.90 22.90 23.50 21.70 22.60 hmmbuild -o /dev/null HMM SEED 972 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.00 0.70 -13.73 0.70 -7.02 26 425 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 280 0 303 441 12 679.90 22 81.89 CHANGED SauL+Tulp.p.psh..slDlslTMPcplFpcKDYLNaRaaaKRAaYLAhlAttL.....ccsts.thplpasahssDsLpPlLhlp...Ppsppssc...hco+....hpIpllsuhPpslFs.p+LLPs+NsIRsspssp.............PTPaYNuollo-sshstYhKhLapst+....ps-uF+DAClLGRlWLpQRGFuSuhpp....GGFGsFEauhLhuhLLpG....G......G.pGpplLhpuaSSYQLFKuslpaLAspDL.tppslhhsuss..............httshpsPslaDssptlNlLhKMoshSYphL+pcAphTLphLN-stpDpFstlFltKhs..shh+aDtllcl...............sphssspplphhphps.......h..plaplLc+AL.....G-RlphIplp..tpsppsaslpp+hsspttp.......slplGLllNP.ccsp+lVD+GPuu....EcccEutpFRpFWG-KA-LRRFKDGSIpEolVW...Ss...ssptsIlppIlpYlLcpHlphs..scclph....hucpFcphL..s..............shsssstsuFtsltcAFssLpKslpsl-.tLPLsl+pltPsussLRYoulp.PhPh................tsssh..-VllQFEoSu+WPD-LsAlp+TKsAFLLKIu-pLp.sss..hpshlsh-pp...hshthstshLsllpspGasFRLRlhp-REpsLhcRtlt........ssssptK.tpst.shhtac+pahtsP+HTpslpsLsp+ashaSPTlRLhK+WhsuHLL..ssHl.s-EllELlsl+sFlpPhPassPuSspsGFLRsLpFLucWDWpp-PLllDls.............................scLstpphctIpppFpshRp..DPsh..sp...sshFVAoc.D..ssGlhWo.....ptsshslAsRlTuLAKuAh.pllc....ppGh.............phslspLFpsuLpsYDFlIpL+sp.slpspps................ssspFKNL..pp..stsp....hsp.....tsshc.h.taVc-LptpYu....ssllFFps.........spsssVIuGLWs.Pphhps+sa+lshshsspPhs......................s.cpVpl..N+-AllsEIupLGs-LlpsI-s....p .....................................................................................h-h.l.hPtthht.cDhhs.ph..KRshalthlht.l............................................th....tht.....ttp.h.s.l.l....................t..............................................lplh.s.........t....h...t..........+h..s.t.......t...ssl........+.................................t.............................................sTPhYNt.lh.p......h......phh.th............t...thtpuhhLh+hWhpp+t.......h..t................................tshs..th...h..sh.....hh.hLhpt...................................l...hs....shphh+.hh......hlu..t..p.h.....t.h.ht...............................................shh.c..t.hNlh.phs...ht............l.p..t..A.t..sh.h.h.pt............s..t.....F....t.hhh......................taD.hhpl...................................................................................l.thltpuL.......spRhphl.h........................................h.h..ttt................................l.lul.h.ps.tth.ph..lphGP.s......................pt......tuttF+pFWG.t.+.upLRRFpDGsIhcs.................hlW....................p......tt..l.t......plhp.ahL..thH..h........tptl...............t.hp..h.........................................s.t.....hhpsapplt.................p.l.t...........hp.......lPLplttl.shts.hRhsp...s.s........................................................................................phh..hph-tSspWPpph.ulphh+hAh.l.pltc.Lt.t......................................................p...t.....hplh.h....p......uhhF+h.lhhth-....hh.t...............................................hp..h...s.htthlpsht.....t.pa...hussh+lhKpWhtsp.hh........hh..tp.h-Llsh.ha...........h.............p............................s..................Pt....................o..suFh+hL.hlupasWp..pPl..ll..shs.............................t.php...tp..tltpth.t.Rt...........................hhlso..D...................ts.....hao.................t..s...lhtphh.lA.ttu.h.thlt........t.................................ph...t.....lF...h...ssht.a.Dh..ll.lp.t.....t..............................................................................h................................................hp.......hht...Lp.....at........t.hhh..t...........ttt.lshhh..............h.t..................................................................................t.......................................................................................................................................................................... 0 118 180 266 +3534 PF03916 NrfD Polysulphide reductase, NrfD Finn RD anon DOMO:DM04466; Family NrfD is an integral transmembrane protein with loops in both the periplasm and the cytoplasm. NrfD is thought to participate in the transfer of electrons, from the quinone pool into the terminal components of the Nrf pathway [1]. 20.30 20.30 20.30 20.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.25 0.70 -5.02 13 2377 2012-10-01 19:35:38 2003-04-07 12:59:11 9 13 1174 0 635 1611 579 296.60 25 81.36 CHANGED ssstphssl...sWuhslAh.YhFll..GluAG..ulhlAhhh....chhptp..ttt..s.s...tl.lpsusll.uslsll.h.ullhllhDLs.RP..+Fhthh.h...asF..o...Shhs..h.Gl..hhht..sahslll...laLhhhhct..pltsht......h.s...........ht.hlt+....lhsl...u...........tthsp..sltllh.hlhAlhluuhTGaLlSslhuhPhhpsslLPs.hFlAuuhuSG...lAs.hllshll.hs+hps.csp.ssthlpphthhl.hht..phhlLs......h....hhV.G..hth.......ssstp..hpuhps...A..........Lpu.hh...s......hhFhlGlshlGlhlPllLt.h..hlsc.lptshshl.lhuulhsLsGshhhRahllhsG.hh.lu ..........................................s....h.tsh..sWs.h..IAh.alalh..Gl.u..uG..uh.h.l.Ahhl..................hh...apht.......tht....s.t.......hl..h+.s.s.hl......h...u.h.h.u.lh..l..Ghlhl...hhc....l............s....R..........Pa....ph....h.......h...h.....h....h.........h..h.......s.......a........s..................Sshs...h..ss....hhhs........lY.h..s...l.hs.....la.hhh..hh..t....cl.h.tht............................................................lp+........h....hth.....................................................thhh....hlts.ls....hlL.u.s....hl..tuh..sG..hL....l..o..u.h..h.s..h.....P......h......W..p.....s........s..h.....hPs..LF.l..hou..h.uG...h.uh...slh......h.h....h..........l.....t.........t...........p.hp...........s.....p..........s.....-....t............p...h......l...t..th......hsh....lhs....tl.hl.lh.....................s.......h...a...l...s......lth.............uss.t..h......hp.th.sh.....s...............................................h.s.u..s..h..a.........s.................hhF..W.l.t.l.l..h.h.s..h.l..hP..l...l...l......h...........hh.....s+...h.....p.p.s..h.t....h....l......h...hsu.h....h...sLlGs.h...h.h.R.h.llh.s.............................................................................................................. 1 251 451 567 +3535 PF02723 NS3_envE Non-structural protein NS3/Small envelope protein E Mian N, Moxon SJ, Bateman A anon Pfam-B_1913 (release 5.5) & Pfam-B_7381 (release 8.0) Family This is a family of small non-structural proteins, well conserved among Coronavirus strains. This protein is also found in murine hepatitis virus as small envelope protein E (e.g. Swiss:O72008). 25.00 25.00 27.80 27.60 19.90 19.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.91 0.72 -4.06 12 303 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 234 0 0 157 0 78.20 35 97.76 CHANGED M.h..slhhh-ssshVlshllhhllhllhLllslAhLshI+LChpCsshsNshlhtPshalYs....hYpsa..clpP.ss..s.l ......Mh..shsht-TsshVlshlhlhLlhllllllslAlLssl+LChhCCslsNshllpPolYlYs+s+phYpsa..phps................. 0 0 0 0 +3536 PF02071 NSF Aromatic-di-Alanine (AdAR) repeat Casavant T, Bruckert F, Bateman A, Mian N anon IPR000744 Repeat This repeat is found in NSF attachment proteins. Its structure is similar to that found in TPR repeats Pfam:PF00515. 27.00 1.00 27.10 3.10 26.90 -999999.99 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.98 0.77 -6.20 0.77 -2.28 51 10 2012-10-11 20:00:59 2003-04-07 12:59:11 15 2 2 0 5 284 6 13.00 28 22.38 CHANGED AuptappAupha .....AuppYpcAAcha...... 0 5 5 5 +3537 PF05064 Nsp1_C Nsp1-like C-terminal region Wood V anon Pfam-B_3555 (release 7.7) Family This family probably forms a coiled-coil [2]. This important region of Nsp1 is involved in binding Nup82 [2]. 22.50 22.50 22.50 22.50 22.30 22.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.49 0.71 -4.45 30 353 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 282 2 233 349 0 113.90 30 21.20 CHANGED ssspssp.sssss........LcsKsL--lIsKWop-LscppKpFppQAspVspWD+hLl-NG-cIspLassshcsEpsQscl-QpLpalcsQQcELEphLssYEpchcplh.sph..sps.......stc ........................hs.....t...ssss.s.........hphppL..--lIs+Wsh-Lpcpp+cFhpQAspVssWD+hLlcNGc+IppLap.cstcsctsQpcl-ppLshltsQQcEL-shLsshEcplcp...sp........................................ 0 80 128 189 +3538 PF03146 NtA Agrin NtA domain Bateman A anon [1] Domain Agrin is a multidomain heparan sulphate proteoglycan, that is a key organiser for the induction of postsynaptic specialisations at the neuromuscular junction. Binding of agrin to basement membranes requires the amino terminal (NtA) domain [2]. This region mediates high affinity interaction with the coiled-coil domain of laminins. The binding of agrin to laminins via the NtA domain is subject to tissue-specific regulation. The NtA domain-containing form of agrin is expressed in non-neuronal cells or in neurons that project to non-neuronal cell such as motor neurons. The structure of this domain is an OB-fold [1]. 20.80 20.80 20.80 22.80 20.00 19.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.79 0.71 -4.49 2 48 2009-01-15 18:05:59 2003-04-07 12:59:11 10 18 29 4 20 43 0 116.40 73 11.37 CHANGED TCPERALERREEEANVVLTGTVEEILNVDPVQHTYSCKVRVWRYLKGKDlVApESLLDGGNKVVIuGFGDPLICDNQVSTGDTRIFFVNPAPPYhWPAHKNELMLNSSLMRITLRNLEEVEFCVEDKP .....CP.E+sLE..+REEEANVVLTGTVEEIlNlDPVp..pTYSCKVRVWRYLKGK-l......Vsp....E.sL.LD...GGNK...VVIGGFGDPLICDNQVSTGDTRIFF.VNPAP.YhWPAHKNELMLNSSLMRITLRNLEEVEaCVE-K................................ 0 7 8 13 +3539 PF02136 NTF2 Nuclear transport factor 2 (NTF2) domain Mian N, Bateman A, Griffiths-Jones SR anon IPR002075 Domain This family includes the NTF2-like Delta-5-3-ketosteroid isomerase proteins. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.45 0.71 -3.72 43 1695 2012-10-03 02:27:23 2003-04-07 12:59:11 15 45 499 189 981 2094 108 119.60 22 36.72 CHANGED luptFlppYYpthss.....c.ptlsphah..t-sShhshsGp................thpGhpsIscphtsLshsp......h.ptplsslDsQ.......ss.ttslllhVsGplpsssp..h...ppFsQsFhLsspp....tsaaVhsDhaRh ................................uptFlp.pY.Yphhsp...........sh....p.t.LtphY.........tcs..o.h..h..sh.tsp......................................................shp.G..tp.sIt....c.t.h...p...s.L..s..hsp.............h..phpl.p.s..lDsp...............................sshpssl.ll.V.sGtl..............p....s..sp..................hpFsQ..sFlLssps..........ssahlhsDhhp................................................................................... 0 254 446 711 +3540 PF00483 NTP_transferase ADP_glu_Plase; Nucleotidyl transferase Finn RD, Bateman A anon Bateman A Family This family includes a wide range of enzymes which transfer nucleotides onto phosphosugars. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.68 0.70 -5.08 64 22811 2012-10-03 05:28:31 2003-04-07 12:59:11 18 112 5108 164 5670 20398 12452 242.50 22 69.14 CHANGED tulILuGGsGTRLaPLT+shsKphlshhsp...shlphslsslh...uulpc.lllsspppphhlpctltcssphu.........lplphshpstsc.........GoAsAltlut.hltppps......hLlLuuDtlhp...satphlppttspss....ssohhhhsspssotaGllchs......sst......plhpFhEKPptsp.....................sshsshGlYhassslh.hh.p..lpp.tctcspls-hhtthlcpsphshshhhps...................................WpDlGohcula-usttltpt .........................................................................................ulIh.Au.G...h..G..T.R..h......h.......P....h...T........c....s..........h.......s..........K......t...h.l......s......l........s...........s.......+....................s...........hIp............a.sl....p...p...h.......h.......t............u..............G.....l.....c....c.........l.....l.l....s.....s...t.........t....p.........t.........p...t.....l..p...c....h...h....s...p..s.......p..ht......................................h..p.l.t.h..h...h...p..s...p.st.........................G..h.u...p..A....l...h...h..u...t..t..h..l.ssps...................hllL.s....s....D....h...l..h.s.......................sh.p...p....h...l........p.......t.........t..........t....p...p...s.....t.....................h.....s..s.............h..h..h......h........h....p...p....s...p....t...a...G....l..l...phs......................................pps..........pl...h...p...h...h...E....K.....P..p....t..s.p........................................ssh..s..s....s..G..hY....h..h.s..s..p..l.....h...........h..h...........p............................p.................t.........t.....t.......s......c........................h....p.......-......h.....h........h...h......h...p....p.....t...p....h........h...t....h...hts........................................................................................................h.D.hG.s....htshhph.......th........................................................................................................................................................................... 2 1857 3653 4776 +3541 PF05014 Nuc_deoxyrib_tr Nucleoside 2-deoxyribosyltransferase Bateman A anon COG3613 Domain Nucleoside 2-deoxyribosyltransferase EC:2.4.2.6 catalyses the cleavage of the glycosidic bonds of 2`-deoxyribonucleosides [1]. 27.80 27.80 27.80 27.80 27.70 27.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.36 0.71 -4.20 107 1106 2012-10-02 19:28:18 2003-04-07 12:59:11 10 7 886 35 291 866 351 125.70 22 73.25 CHANGED lYLAuPh.Fs..t..sphphspchpphLpphuh....psh.hPh.............ptph......s......s.thuptlaptslptlcpuDsllAs.ls.shc......sDsGTuaElGaAhAhsK......Plhshps-hc.........................................p...thshNLM .....................................lYhAush..Fs....t...tp....hphh.p.p.lhphlcptsh..........phh..hPh..................ct.ph........tt.........tshttsptlhpsDh.pt.lppuD.lllAh.ls..s..........DsGouhElGhA.hA.h.....s.K......Pl.l.s.hhpstp...................................................................................................................... 0 83 165 228 +3542 PF03825 Nuc_H_symport Nucleoside H+ symporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.27 0.70 -5.71 4 1944 2012-10-03 03:33:39 2003-04-07 12:59:11 11 11 949 0 284 2015 437 383.60 38 93.67 CHANGED Mshsh+LplM.FLQahIWGuWhssLhsYMhhoh.FsuuplGhVYSuhGIAAllhPhlhG.lAD+ahSAc+shAlsHhlGAlsLahAApsTs.tshFhlhLl.ShsYMPTluLoNSIuausLt....DssssFPsIRlhGTlGaIhu.......hhlluhuslE.oph.LhIuAusShlLusaALTLP+hP.scppAspulsshLGLDAFhLh+s.phhlFFlhohlhus.Lphh.lFuNsFLpshst.......hhsphuollhSluQhuElhFhLslPFFLpRaGIKsVMllulVuaslRFuhFAYG.DssshshsLLlLuhllaGsuaDFF.lou.lFV-pcsssslRuSAQGLFhhhspGhGsllGuhluG.hhlchaohsG.....shDWpTlWLlhAuhulhlAslFhhhF+.scs ..........................................................................................................................MthphpLphh....Flp.ahl.W...Gu...W...h..s...s....L.u.....a...h.h.s......o..h..Fsuup.....IGh.l.Yus.h.uIAA..ll.h.P.h.L...l.G...h...l...u...D...+...a...h..s...Ap...+....l....h....u....l..h....p.....h...s...u.A......l....h..........h.......a.........h.......A................A.......p...........................T......s............s...s...h....F...h...l....h...L..h.........u...h...s...Y...M...P..T....l...u..L...s...N..o..lu.a..s..plt............D.h.s.p.D.FP......I.R..la....G.TI......G.F..I.h..u...........................h........h...h....l...........u...........h..........s....s...h...p.....o...........p...h........L....h..I..s...u...........u....s....S..h..l..L........u..l.......a........s..............h...s.....L.....P......c....h......P.........s........c..p.........p..........u.............p....p.....s....h..p.....s....h........L.G....L.......D..A....h..s......L...h.+...................s...+.p...h.....h.l.F....F.l..h.....u.....h.L..l.u..s...s...L..t.h..h......h...Fu..Ns..a...Lp.s.hu............................h..s.p.p.s...o..h...h..h.......o...l.uQ..h....S....El.h..F.hL...s..l....PF.F....h+....R.a....G..I....K..pV..h.L.l.u...h..lu..h.s.l.Ra.u..hF.u...aG.s..ss..........h......sh.s.L..Ll...L...uh....l.....laGs.u.FD.F.a...lou......laV.-pcs.s.sp.h.Ruu..AQGl.h...h...h...h...sp...GhGshlG.sh....luG....hh.h...c.h.a....s......s.t..........................................hh.s....W.ps.h.W.h.hhA..sh..hl.l.A.hhF..h..hh.F+.pc.h................................................................................................ 2 107 174 238 +3543 PF02321 OEP Outer membrane efflux protein Bateman A anon Bateman A Family The OEP family (Outer membrane efflux protein) form trimeric channels that allow export of a variety of substrates in Gram negative bacteria.\ Each member of this family is composed of two repeats. The trimeric channel is composed of a 12 stranded all beta sheet barrel that spans the outer membrane, and a long all helical barrel that spans the periplasm. 30.90 30.90 30.90 30.90 30.80 30.80 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.95 0.71 -4.70 113 30969 2012-10-02 20:27:15 2003-04-07 12:59:11 13 38 2653 50 9040 27392 6704 187.10 16 75.76 CHANGED hpphhptshpp....ssplptt...ptphpts.ptphphtpst.h.hPplshssshshptststtht.................................................hslsl......shslas.....spttsphctuptphptsptphppsppplthpltpsahphht.spppl.phtpptlptscpthphtpppaptGthsh....s.lhpsptp..hhpsctphhpsptphptshtpLtthhu .........................................................................................................h.t.slpp....p..ss...lp..tA.....ptp..lptA...ptplp..hucu.......s...h..h......P...s...l....s...l.....s..u....s...h......s...h..s...p..s...s..h.s..s..h..h.t.t.t...t.................................................................................................................hu.l..s.l.......................................sh...s.l.h.ph..............Gphps...plc.t.A..ptphp..t..u..pt..p..h...p....p....s....thsl...h..t...pl.s.p...sa.hs.htt...tpppl..p....h.t...p........p.......t.l.p.s.t....p.ps.hc..h..s..p..p......p.a..p......s...G..h..ssh..h..........-..lhpAptp.....hhps.p..tp.h..hpsptphttsthpLtthhG................................................................ 0 2657 5583 7554 +3544 PF02265 S1-P1_nuclease Nuclease; S1/P1 Nuclease Bateman A, Mian N anon Pfam-B_2480 (release 5.2) Domain This family contains both S1 and P1 nucleases (EC:3.1.30.1) which cleave RNA and single stranded DNA with no base specificity. 21.90 21.90 21.90 22.00 21.80 21.70 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.12 0.70 -4.75 81 645 2012-10-01 21:01:47 2003-04-07 12:59:11 11 7 360 1 375 699 285 243.10 24 82.02 CHANGED WGppGHhhlupIApphL.sspstptlpplL.ss............ssLuplusWADcl+p......h.................chsus....WHalsh.s.t..........t...aspsh....t.s.ppss.lluuIpphsstLpctpts..ppp+........spALtaLlHFlGDlHQPLH.suh......ttDpGGNslpVpah......scp.....s........NLHplWDothl..psh.t.............shsphsssL.pphs....tpphpsht........h.s...s....................htWApEShphup..phsh.....ssspssps...Lus..p.YhtsthsllcppltpuGlRLAshLNplhs ............................................................................................................WuttGHhhlutlApph...L...sspstttlp.plLst...............................ssl.sp...husWs.D...pl+t.....t.....h.....................................................ph..sus...hHal.shst.................................................spsh..........tp....s..psts..lhsulp..phtp.tL.t...stpts........ttpp..........................................s.uLta...lsHhlGDlH.......QPLH.s.uh.......................tDtGGNplp.lph...........spp...........s.............................NLHplWDst...hh.....pphht...........................................shpthh..ptl..pphs..............tpphpth....................................s.s....................................tt.asp.-ohp.hsp..p..hsh............................th.tstp.............lst....p.Y..ht..p..ths..lsppplt.uGhRLAthLstlh.t........................................................................................................... 0 171 268 338 +3545 PF03066 Nucleoplasmin Nucleoplasmin Griffiths-Jones SR anon Pfam-B_2930 (release 6.4) Family Nucleoplasmins are also known as chromatin decondensation proteins. They bind to core histones and transfer DNA to them in a reaction that requires ATP. This is thought to play a role in the assembly of regular nucleosomal arrays. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -4.69 23 444 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 114 62 203 360 1 149.80 36 64.51 CHANGED pspsaLaGCELspccpphoFcs.....t--c....cspHpLsL+olCLGspAK-.EhNlVElpu...hsh-upp.lplslAsLKsSshPhVolsGhElsPPVTFRL+sGSGPValSGpH.lshtpt.t.t................................................---EE-p--E-D-ED----pppE-.sPsKps+ .........................................................................pshhaGCELpucc.shpFcs...........-p-.........cs-....HpL.sL+.p...lsLGssAKD...Eh..p.lV.Espu...hsh..-up...lcVslAoL..Kh..S.s.pP.t......VoLs..s..Fp.l.s.P.PVs....h..+L.+.........sG....S......GP..V..aloGp........H.h..ls..hp.p.-t..p....pc...........................................................................pc--...-tt....---p-..-..-..cp......t.................t........................................................................... 0 29 44 89 +3546 PF03177 Nucleoporin_C Nucleoporin; Non-repetitive/WGA-negative nucleoporin C-terminal Mifsud W anon Pfam-B_1212 (release 6.5) Domain This is the C-termainl half of a family of nucleoporin proteins. Nucleoporins are the main components of the nuclear pore complex in eukaryotic cells, and mediate bidirectional nucleocytoplasmic transport, especially of mRNA and proteins. Two nucleoporin classes are known: one is characterised by the FG repeat Pfam:PF03093; the other is represented by this family, and lacks any repeats. RNA undergoing nuclear export first encounters the basket of the nuclear pore and many nucleoporins are accessible on the basket side of the pore [2]. 25.90 25.90 26.70 25.90 25.50 25.60 hmmbuild -o /dev/null HMM SEED 587 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.99 0.70 -6.00 63 645 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 273 7 432 629 4 485.40 17 42.05 CHANGED hsssh.pppLchhhpslpsLhcFlppsts............................................................hstppchtlhtctctlsuhhtLhpphpcthshhp.hlhp.............................p............hsshhhhlsp.sphpplhshtacplht.....sp.p.tp.htp.Llsshlspslhpsu..th-.phusth...p.ps...........ssh.t.........................phhpup.pssh.shcp.hchhlp...............hpth.tph.......shpshpptssphhshphhttshtl.hhphsptt.spsppshphhpc.tt.pps....+p..ht.htp.hshac.shplhcphcshsshsp.h................................t...ppttphppch.phhtphs-.sFthtlYcahlpp...sth.phh.phppsalhpaLpc..................................thucLhWlahhppp.cahpAApsLhpL....sps..................hpcRl-hhhptluthsst.......s.................htphlpclpchl-lsplQcplhp.hlps.tph.....................ttp..h....pthhpt.....................Lhsphshshphh.................................lpLthhphss......stthlptlWpchltp..s...p....pttttt..........................hpthphthsslhpplpts.hshshsthh.h ................................................................................t......pplp.h.pthptLhpFlpp.t.h...........................................................................thttphth.tctptlt.....uhhtLh...ph.t...ph.th..ht..hh.p................................................................................t.............hp.h.hhhtp.pt.pplh...thhhc.p.....lh................p...hp.....hth.l...hs.l..hthhpsu.............hc.thsphh....p.tp..............................s.h.p.....................................hpup.ph....t.h.p.....hc.hhp.....................................ht.....h..................................htth......s.tph.hp..hp.h...t.hhh.h......hh........hst.t.....p....t.t.s.p.......pp...t.....tp........p..........pt.....h.p.httap.hhp...hh...p.ph....shs.ths..h......................................ttsp.hpph.ph...hpp.t-....Fp..hlapWh.l.....pp...........shh..ph.....h..p............p.pshltpaLpp...........................................h.cLh..Wha.hppp..pat.pAA.psLhpL.u...spp.....................................hpc+hthhhtt.lsthsstt.......................................tphlcclppphc.lhplQpplhp..hlpt.ttt...........................................................th.t...................httLhsphs.shp..h....................................................hph.h.p.tt.t...........p.thhpthWpphlpp...p..........ptt.................................h...thh.lhphh.......................................................................................... 0 144 236 357 +3548 PF01773 Nucleos_tra2_N Nucleoside_tra2; Na+ dependent nucleoside transporter N-terminus Bashton M, Bateman A, Yeats C anon Yeats C Family This family consists of nucleoside transport proteins. Swiss:Q62773 is a purine-specific Na+-nucleoside cotransporter localised to the bile canalicular membrane [1]. Swiss:Q62674 is a a Na+-dependent nucleoside transporter selective for pyrimidine nucleosides and adenosine it also transports the anti-viral nucleoside analogues AZT and ddC [2]. This alignment covers the N terminus of this family 21.50 21.50 22.10 22.10 20.40 20.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.15 0.72 -3.64 166 4054 2009-09-11 09:51:31 2003-04-07 12:59:11 15 8 1905 1 661 2186 512 74.60 34 17.73 CHANGED lGlllllhlAaLhSpsR+.sIphRsVh...hulslQllluhhlLpsshGpshlpslusslspllsauptGssFl..FGsLs .......lGllVllslAaLhSs....sRK..pIphRs..Vh...hhLllQlhluhhhLt..........ss..hG..t.thlpshup...shppllsaus.s.GssFlFGul........... 0 162 320 492 +3549 PF03800 Nuf2 Nuf2 family Bateman A anon [1] Family Members of this family are components of the mitotic spindle. It has been shown that Nuf2 from yeast is part of a complex called the Ndc80p complex [1]. This complex is thought to bind to the microtubules of the spindle. An arabidopsis protein has been included in this family that has previously not been identified as a member of this family, Swiss:Q9C953. The match is not strong, but in common with other members of this family contains coiled-coil to the C terminus of this region. 20.90 20.90 21.50 21.50 20.60 20.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.68 0.71 -4.23 31 294 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 249 4 211 290 3 143.80 25 32.91 CHANGED hsp......t.sFPhLsscEIlssLps...hsls.hotcsLp+......PssphlpplYpphlphhhGh.sh-pl.t..........hhtsstpslp.sp.................hhp-slshhshh+hhpchh.ssGls.DFshpDlh+PcscRhp+hLSullNas+FREp+h...thhcctlpchcsth .................................................................................hp......p.sa.h.LsspEIstplpp............h..sl...s....hstpslt+.............Pp..s..ph..lphlap.thlphhhsh..sh.-sh.......................hhhsstpslp..sp......................................hhpss.hsl..hshatph.pchh.ts.....sls......DFshsDlhcP..p..s..cRht+...hLSull....NFh+FREpph...thhpchhtphcp.t........................ 0 69 116 172 +3551 PF04121 Nup84_Nup100 Nuclear pore protein 84 / 107 Wood V, Finn RD anon Pfam-B_13117 (release 7.3); Family Nup84p forms a complex with five proteins, of which Nup120p, Nup85p, Sec13p, and a Sec13p homologues. This Nup84p complex in conjunction with Sec13-type proteins is required for correct nuclear pore biogenesis [1]. 18.50 18.50 18.60 18.60 18.00 18.40 hmmbuild -o /dev/null HMM SEED 697 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.03 0.70 -6.85 16 402 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 252 7 259 410 4 519.90 20 76.82 CHANGED pphp.hcLEtchW+LlptLaphR...htpppccptphc.........shsSctsh.cplhppNsplhEhpLllsWLcssh.t..h-tssslsh...os+WpNTlhslt.pht..shs.tt......shVpslDsDAPlR-pps.LcstDcpcDppla+hlFpLlhsGpl-EAhchCcpoGNhhhuhILpG...hcpa.hDPslDhphss.........................ps.pGsp++sLW++ssaplupssplDcYERAIYuhLu.Gslss.lslsts.sWEchLhsYlsphhshplEs.lhspshssp........h.lPpphht....olpplhspL.stpssclh-puccPlRll.stlILsslssllcshschL.sshss.....t..tpsalLRllTHLslFl+hlsh.tp.p.sDc..llssYlphLtttt.h-LIPhYloaL.spp.tl-tYShFLpsls-sptRc+QlElupphuLs....................lssIh+pTVppl.hp-o-pthh.sspssh.....p.clochDp+lIsulEWL.h-sp.hs-ulppuhAlhR+FLhsGKlcuh+phhpthshcslhp.Yph....t..cslssc--......sshcEllpYcshlpulctap-Wpch..........................pppp.sscpphsphpp+lpthopsshcLlhshLlt................s-sp....pctphh.plRsLYlPhllhtLHp..lhtpsp.....hhpculpLushVAsEscplYh.lF.ssG+LpEaLpplucsShl ....................................................................t.....hp.Eh.hWpLl..lh............c.................t......................................ott..h.pt..h...h.t.tst.hh.p...h.lhpWLpt.h.............................t.......................................thhW..........Th.tl...............................................hlpplDsDAshR..pt.....h..........Lp..Dp..-.thhhhhaphlRtG...h.p.c.h.chspptsp.W+Ah.lt...G............h...........ss.t.p...t...........................................................s.........p.tlW+hssh..........t....h.up..........p............t.........t...........h.......s......a..................-+Alau..hls...G.pht..t......h.....h...ls..p.....sWc-hlashhp.h.l.p..hc....t.l.t.............................................h.st.....h...........................s..h.....p...p.hh.pt..l.......t.t..p.th.t..tspp..h.phlpt........llh..t...........ph..s.t.......h.h..p...th.........l...t.........t............................................................................................................................................................................h................lRhhsHl....h..lhhp...l.t...............................................t...............p......................................t.........................l...l.......t..tYl.p.h.L........t......t.....p..hlshYss.L.st..th.......t.hu.hL.t.l..........p....p.......+............l...phh.....h...tls.............................................h..thhp.......h....p.....h....htt................................................................hp.....-...h....hpsl-Wh...h.s............t.h.c...........hh..s.hhh+ha.h......h.........tp.h...tshp.hh..h........t...h.................................................................................p........h....t.h.....p....hhth..p.h..ph.p.............................................................t.......t..t......h.......h.....hp.......t.....h..h...h.........thl..................................................................................................h..lRthhhP.hh.h.h.t........h..tt.....................................h.........tshths..lu..p...hh.................h........tt..t..p...h..hh..h............................................................................................................................................................................................................................................ 1 84 143 213 +3552 PF01029 NusB NusB family Finn RD, Bateman A anon Bateman A Domain The NusB protein is involved in the regulation of rRNA biosynthesis by transcriptional antitermination. 21.30 21.30 21.40 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.47 0.71 -4.18 114 7994 2009-09-17 00:13:35 2003-04-07 12:59:11 13 8 4433 32 1709 5198 2636 125.80 25 44.18 CHANGED pp...s....RphAhpsLhph............................htpthhpthhs...............hspp.spsahppLlhGshcphtplDthlsp.hh.shslc+hshhs+slLRlulaElha..h..clPspsslsEuV-luKph...ssppsst..FlNuVLcplt+ ....................................................................................................................psRphAhpsLhph.............................ptthhs.hhs.........................ttt.h.sst....Dtsh.hp..cLl...hGshpppspL.Dthlst..hh....s..hs.l..pc..l.s..t.h..+.sl.LRluhYplha.........h.....clP......s..+....s....sl.sEuVElAKph..........uspp....s.tp...alNGVLcphh............................. 1 580 1130 1455 +3553 PF02357 NusG Transcription termination factor nusG Bashton M, Bateman A, Finn RD anon Pfam-B_697 (release 5.2) Family \N 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.45 0.72 -3.75 170 5994 2012-10-02 20:41:53 2003-04-07 12:59:11 14 4 4395 19 1234 3385 2659 97.80 33 51.97 CHANGED tpWYllpstsspEp...c..ltp...pL.p....p..t..lps..................hlPhtph....ch...ps........+hp...ppshaPGYlFlchshs....s......p.....h..htl........pss..uVss...hl.......G.......sp.....Pss.......lscp .................................................................pWYllp..sa..S.GhEp....+..Vpp................sL...c.........p....p....lps.shtc...............................hhplhlPpEcl.....hcl+...sG........+pp..hc+phFPGYVLVchshs.............D..........c..........s..Wpl......l+sT....PsV.s.G..Fl.........u...........s+......PsPl.p.t........................................... 0 418 810 1052 +3554 PF04277 OAD_gamma Oxaloacetate decarboxylase, gamma chain TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 21.20 21.20 21.20 21.20 21.10 20.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.65 0.72 -3.61 96 1171 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 853 0 225 777 96 73.50 28 65.31 CHANGED ltpultlM.lhGMuhVFlFLhlLlhhlplhupl....hpch.............ssppssssstss.sspststt................s............sscllAsIsAAlppa+pp ..............puhtlh.hlGMuhVFhh...Lh...lLlhslphhutl...........lsch........................hsc.s..s...s.s.tsts..tssss.................................................s...................................sclhAsIs.AAlppap..t..................... 0 83 144 191 +3555 PF03977 OAD_beta OadB_MmdB; Na+-transporting oxaloacetate decarboxylase beta subunit Bateman A anon COG1883 Family Members of this family are integral membrane proteins. The decarboxylation reactions they catalyse are coupled to the vectorial transport of Na+ across the cytoplasmic membrane, thereby creating a sodium ion motive force that is used for ATP synthesis [2]. 30.00 30.00 30.40 30.50 29.90 29.90 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.12 0.70 -5.71 9 1424 2012-10-02 17:06:44 2003-04-07 12:59:11 8 5 969 0 273 1037 261 354.20 54 94.72 CHANGED plG....................pllMllVGlhLlYLuItKcaEPLLLlPIGFGslLsNlPtuGls..shp......................................................GhhhhhaphuIsstlhPLLIFhGlGAhTDFGPLlANPKTlLLGAAAQhGIFhslhsAlhL....G..Folt-AASIGIIGGADGPTuIYloopLAP-LLusIAVAAYSYMALVPIIQPPlh+hLTTpcERKI+Mp.QLRpVS+hEKIlFPIllhllsuLLlPsAsPLlGMlhhGNLhREsGVV-RLucsAQptLlNIVTIhLGLuVGSphpADpFLsspTLhIlsLGllAFssuTAGGVlhAKlMNhFppcKINPhIGAAGVSAVPMuARVspKluhEEDPpNFlLMHAMGPNVAGVIGSAlAAGVhLthlu ........................................s..GphlMllluhlLlaLAItKpaEPLLLlPIGFGslLsN.....l.P..........u.....Ghh...................................................................................................................................................................................................sGhLhhhaphuIp.o.ulhPhlIFhGlGAMTDFGPLlANP+s.lL.LGAA.AQFGIFsollsAlhL...............u......F.oht.pAAuIGIIGGADGPTuIaluu+L....A............P.c...L.LGuIAVAAYSYMALVPlIQPPIMKhLTTccERpI+M..t....ph.....R..p.......V..S.........KpE.......KIlFPlllhllsuLllPsusPLlGMhhhGNLhREuG.V.l-.RLScTAQNtLlNIVTIhLGLoVGuphpA-pFLp.pTLtIlsLGllAFshuTAuGVLhAKlhNh....h.o..+....p....KINPLIGuAGVSAVPMAARVspKlGh-tsPpNFLLMHAMGPNVAGVIGSAlAAGlhLshl.h................................................. 0 113 184 234 +3556 PF04225 OapA Opacity-associated protein A LysM-like domain Mifsud W, Bateman A anon COG3061 Domain This family includes the Haemophilus influenzae opacity-associated protein. This protein is required for efficient nasopharyngeal mucosal colonisation, and its expression is associated with a distinctive transparent colony phenotype. OapA is thought to be a secreted protein, and its expression exhibits high-frequency phase variation [1,2]. This is a LysM-like domain. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.78 0.72 -4.09 12 1645 2012-10-01 23:00:54 2003-04-07 12:59:11 7 7 935 1 206 836 122 83.80 37 23.87 CHANGED sW+papVtpGsTLhQlFRcNsLsloDlsAhs+lEGusKPLSplKsGQhl+hplsspGplstLplEsssp.tlhFhRtuDGoatRsK ........+pYhVpsG-TLuplF.ppa.GlshsDlhtlAps.p..s....s.K.sLsN.L+sGQplph.ph.s.A.s.G.p..L.ptL.o..h-.s.upppph.h.asRp.ss.G.ua....h.................... 0 28 71 142 +3558 PF03373 Octapeptide Octapeptide repeat Bateman A anon Bateman A Repeat This octapeptide repeat is found in several bacterial proteins. The function of this repeat is unknown. 17.50 3.90 17.90 3.90 17.10 3.80 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.78 -5.53 0.78 -5.51 0.78 -2.61 3 3282 2009-01-15 18:05:59 2003-04-07 12:59:11 9 83 143 0 12 2907 0 8.00 83 23.91 CHANGED PGKEDNNK PGKEDsNK.. 0 12 12 12 +3559 PF02317 Octopine_DH NAD/NADP octopine/nopaline dehydrogenase, alpha-helical domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_9653 (release 5.2) Family This group of enzymes act on the CH-NH substrate bond using NAD(+) or NADP(+) as an acceptor. The Pfam family consists mainly of octopine and nopaline dehydrogenases from Ti plasmids. 23.30 23.30 23.50 24.20 22.80 23.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.55 47 615 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 543 5 140 433 40 146.60 33 43.51 CHANGED lphtpshLphsLsNhsslhHPshhlhshuphc......................thp.......sch.hahpGhs...stsucllcslDpERhslApAluhph..hshtphhph..tY.......thcsss..lhchhpssp.uasslssPss.hp..........pRYlhEDlPhGLlshtuluchhGVssPhhcullphssshhGc ......................................h.shtpsllpssLpNsNs.lHPs.hlhNhGpl-.......................as............uEaslat-Glo...cpssclLcul-tERlsluctL.Ghch.........shp-s..hh......ptYh.......................tttp.-s.ps..L...pch..hpTss.sastl..s.sPsp.hc.....................sRYlsEDlsaGLshhsuluchhs.VsTPsh-ullhlusshht.p......... 0 53 95 127 +3561 PF02101 Ocular_alb Ocular albinism type 1 protein Mian N, Bateman A anon IPR001414 Family \N 25.00 25.00 32.20 31.50 22.90 22.60 hmmbuild -o /dev/null HMM SEED 405 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.42 0.70 -5.87 2 77 2012-10-03 04:04:29 2003-04-07 12:59:11 10 2 52 0 48 82 0 326.40 45 95.20 CHANGED MASPRLGhFCCPThDAATQLVLSFQPRsFHALCLGSGsLRLsLGLLQLLsGRRssG.tuPATSPsASV+ILRAAsACDLLGCLGhVIRSTVWluaPpFl-slSshNtT-IWPAsFCVGSAMWIQLLYSACFWWLFCYAVDsYLVIRRSAGhSTILLYHIMAWGLAsLLCVEGAsMLYYPSVSRCERGLDHAIPHYVThYLPLLLVLVANPILFpKTVTuVASLLKGRpGlYTENERhMGAVIKhRFFKIMLVLIhCWLSNIINESLLFYLEMQsDIpGGSLK.lpsAA+TTWFIMGILNPAQGhLLSLAFYGWTGCSLshpsP+h.IQWEohTsSAAEGsa.oPl...hPHpNPt..KVspVGGpTSDEsLShLSEsSDASTlEIHTAotSCNhpEsDsh..spG-L .................................................MAsPpl..hCC.....h...s.s.t..hh.tFp..hatshslsSushthhhslhQlh.............h..t...........t....................................t...p...IlhhhshsDhLushGllhRSsl..Wlu.h.Pshlp.shSshst.o.claP........ss.F...CV.u.S.u...hWIQ.L.hYSAsFW.WhFCY.AVDsa.LVl..+c.SAG.hSs.ll.LYHhhsWGLAsLLslEGhshLYa...P.....S.....h..u.....p.C...E.pGLp......HA....I....PHYlTTYhPLLLVLluNPILFp+TlsuVsSLLKGR.pGIYTENERRhus.IKlRFFKIMLVFhlC..WlsNIINEoLL.FYL......EhQsDl........psssL+.l...+.s..AAhhTWaIMGILNPhQGFL.oLAFaGWT.Ghp.lth.....p.h.h.W-phssS.h.s.tt......................p...t.........p.....tpp.s-slshLSE................................................................................................ 0 14 18 32 +3562 PF02100 ODC_AZ Ornithine decarboxylase antizyme Mian N, Bateman A, Moxon SJ, Wood V anon IPR002993 & Pfam-B_34796 (release 7.7) Family This family consists of ornithine decarboxylase antizyme proteins. The polyamine biosynthetic enzyme ornithine decarboxylase (ODC) is degraded by the 26 S proteasome via a ubiquitin-independent pathway. Its degradation is greatly accelerated by association with the polyamine-induced regulatory protein antizyme 1 (AZ1) [1]. 23.90 23.90 24.00 25.70 23.40 23.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.58 0.72 -4.57 28 301 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 183 1 182 274 0 105.80 32 48.95 CHANGED hsssphspW......pslls....................ppsL..aV.lPp.sshspu.........................................KculhuLLEaA--p.LpssclhlshpKscs-...pssLlRohpalGFcllsss.p.............ss.......sschhaMsasl ..............................................................................s......pW.......tlhs.........................spsL...alp.lPt..ssLspG........................................sK-ulhuLLEaAEEp.Lp.....sspVhlChc+sRsD.....RusLl+sFs.alGFEllpPs.c............................................shsP........pschhFMsY.h.......................... 0 47 74 125 +3563 PF02423 OCD_Mu_crystall ODC_Mu_crystall; Ornithine cyclodeaminase/mu-crystallin family Bateman A anon Pfam-B_1960 (release 5.4) Family This family contains the bacterial Ornithine cyclodeaminase enzyme EC:4.3.1.12, which catalyses the deamination of ornithine to proline [1]. This family also contains mu-Crystallin the major component of the eye lens in several Australian marsupials, mRNA for this protein has also been found in human retina [2]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.69 0.70 -5.66 11 2697 2012-10-10 17:06:42 2003-04-07 12:59:11 10 13 1722 12 903 3070 1011 299.80 23 91.90 CHANGED scshhhhttcVthcshhs.chhshlEssFRpaups...thcpsP+lssas+c....GslclMPsh.us...hhGhKaVsuaPcNscp.GLPTlsAhhVLs-ssoG.hPlhLh-sshLTAlRTAAsuAluuKaLA.psupshAlIGsGsQuhhQscAhptlhs.lpcl+laDlcscusc+hupplpt...thplsAssssppAVcGADllsTsTsscp......sllpssaVpsGsHlNAlGuDssGKsELcs-lLtcADlaV.-a.sQs+tpGElpp.................lssccshspLu-VhsGphsGRsssccITlFDSsGhAlcDhushchla-pspsts .......................................................................................h......................thhph...lpp.s.h.t.ths...pt........th...ts......sp......h.......h..h.........h.....p............ts..hh.h...h.MPu........h....h..........s.......s.........................h...............hGh.K.......hl.s......s...........h..s......p.....Ns....p..........p....u....h........s.........s..h.........p....u..h.h.l..L..hD.s..pT.G.hs.h.u.l.h.-ushlTshRT.A..A...s..o....u.l..A.......s....c..h....L.........A.....p....p.......s.....u.........c.........s........l........s.....llG.s.GhQA.p.h.p.l.p....u..h...t....t......l.......h.......s....l..cc...l.....t......l......a.............s........p...s..............p....p..s............p....p....h.............s........p............p....h.........p...........t.........h..........t..............h...........p.......h......h.............s.........s..........p.............s........s.........p.........c.............A.........l...........p....s....A.......D......l.......l.s....o.sTsupp...............P..l..l..p..s..........p.....h...l.............p.....s....G........s............a....l....s...s..l.G.u....p.s...s.h..p.E.l...s........s....c..l....l.....t.....p.......u..........p............l....hV....-.....h.......s.......t...h.....p.......c.....t.G.-.lh........................................................hs.t..p....t...h....h..s..-....L..u......p.l.........l........s.....G....p.....t......s.....s...........R........p.....s.........s.......c..........c.............l........o..l...........F.cssGhulpDlssAph.lhpph....s.................................................................................................................................................................................. 0 225 509 735 +3564 PF02159 Oest_recep Oestrogen receptor Mian N, Bateman A anon IPR001292 Family \N 20.60 20.60 21.80 21.20 20.50 20.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.98 0.71 -4.05 11 220 2009-09-12 00:57:07 2003-04-07 12:59:11 10 10 133 0 28 206 0 120.80 46 25.14 CHANGED MYs-ps+s.....ushNY.-GA.YD.....Fssss....suAPs.apsuolu....Yhsus..ssaGssu.uthpoLsssssSPLhFlp.ouPQLSPaL.H.sG.....pQVsYYL..........-ousoshYRSSsssppQuu....cEhhouus-............+Guhuh-SsKE ....................hYs-ps+s......ushsY.EGs.Y-.....assss......ssuPl.Yuposls....Yhsus...suaGssu.Guh.sLs.osssSPLhhl..ssP....Q.LSPFl.H.tu.....pQVsYYL...........EsussshYRsssssppQuu....cEp.huoos-..............+GuhuhESsKE................... 1 2 4 11 +3565 PF04664 OGFr_N Opioid growth factor receptor (OGFr) conserved region Waterfield DI, Finn RD anon Pfam-B_4529 (release 7.5) Family Opioid peptides act as growth factors in neural and non-neural cells and tissues, in addition to serving in neurotransmission/neuromodulation in the nervous system. The Opioid growth factor receptor is an integral membrane protein associated with the nucleus. The conserved region is situated at the N-terminus of the member proteins with a series of imperfect repeats lying immediately to its C-terminus [1]. 19.80 19.80 19.80 21.00 18.70 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.26 0.70 -5.01 13 234 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 123 0 109 217 96 177.60 34 53.33 CHANGED hh-.c.s.-h..........hNL+FYpNEIshp...PsGhaI--lLppWpss.....Y-hLEcNHoYIQWLFPlcEpG.lNhpApsLThcEIctF+psc-l+++hlcuYclMLcFaGIcLs-c+sGpVpRApNappRFpNLNcasHNpLRITRILKsLGpL.GhEpapsPLV+FFL...pEoLVptpLssl+pSuLD.YFhFsl+s+ppRRELltaAataacPptcFlWGP.chhphp ..............................................ht.....................lpFYpsch.........spGhhI-plhppW.ts...........YchLEcsHsYIQWL.FPlpE.u..hN..hh...A..LTtpElc...tF+psp...ch....hc+hlp.uYch.MLpFaGlcLt.............s.p.sG...p.....l...t...+.u...t..s.............a...p...........cR.a.p.p.....L..s.pp.pHN.............LRITRILKSLupL.GhcpapssLs+ahl...pctlhppph..s..s.lp.puslc.Yah.slh.................................p................................................................ 0 34 49 73 +3566 PF04680 OGFr_III OGFr_repeat; Opioid growth factor receptor repeat Waterfield DI, Finn RD anon Pfam-B_4529 (release 7.5) Repeat Proline-rich repeat found only in a human opioid growth factor receptor [1]. 19.50 19.50 20.90 19.50 17.20 19.30 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.41 0.73 -6.43 0.73 -3.63 4 97 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 10 0 7 144 0 21.10 66 13.11 CHANGED uPpET.GPRsAs....Pss...DpPAE .uPpETPGPpsAG....PAu...DpPAE..... 0 7 7 7 +3567 PF04606 Ogr_Delta Ogr/Delta-like zinc finger Mifsud W anon Pfam-B_5059 (release 7.5) Family This is a viral family of phage zinc-binding transcriptional activators, which also contains cryptic members in some bacterial genomes [1]. The P4 phage delta protein contains two such domains attached covalently, while the P2 phage Ogr proteins possess one domain but function as dimers. All the members of this family have the following consensus sequence: C-X(2)-C-X(3)-A-(X)2-R-X(15)-C-X(4)-C-X(3)-F [2]. This family also includes zinc fingers in recombinase proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.63 0.72 -4.30 67 983 2012-10-03 10:42:43 2003-04-07 12:59:11 7 4 577 0 122 677 13 46.70 40 45.45 CHANGED pCPpCsspA+hRoocth..otps.....pc....hYtpCpNh.p.....CupoFsst.ohs+sl ...........pCPhCss.s.A..+sR..o.Sp..h.....osps.....+-.....+YhQCpNl.p......CutTFhohEohp+hl.................... 0 13 47 82 +3568 PF01276 OKR_DC_1 Orn/Lys/Arg decarboxylase, major domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.29 0.70 -5.93 10 4944 2012-10-02 18:26:03 2003-04-07 12:59:11 15 15 2315 25 649 3284 459 379.60 33 61.32 CHANGED PPFaKALhcYVccsphoFssPGHpGGssFpKcPAGphFYDFaGENlF+uDlssusspLGDLLtHpGstp-ApcaAA+VFsAD+oYFVlNGTSsuNKlVs.AlsssGDhVLlDRNsHKSlpH.ALhhuGAsPVYLcPs.RNsaGllGGIstppFpc-sl+ctls-ssss+s......hhAVIpNuTYDGslYNs+pll-plc+hss.IhFDSAWVuYppFt.Phhtssoshtu-..p.ps.sIhsTQSsHKhhAuFSQuShIHhK-sph......ls+cRFNpAaMMHsSTSPaYPlhASLDVAAuMhcGsuG++Lhp-slcpulchRKpl........lssuphFcPatPplVcspp....................ha.hcssppWHuFcshsspphalDPsKlhLpTPGhs.csG-hc-hGlPAslVApaLcEp.GIlsEKoD..sILFLho.GtscsK .............................................................sPhhcALh.p.Yscp.t.phsF...s.s.PGH.....ps.Gp.h.a.p+......pP.sGp......ah-a...aG....c..shh+s.Dls...shst.L.......GsLLpH.pGs.h.t-A.pchsA+...sF.sA....-....+o.........ah..VlNGT.SsuN+slh..us......h......s........s.G........-........h.l.L.lDRNsHKS.lhp...uLh..hs....G..u.p.P.l.Y.l.css..R..........Nt.h.G.l.l....G.G..Is..t.p..php............p..hl..p...ct....l.p..c.....s.....s.s.s..ps.....................hsl.ls.p..s.TY..D..Ghh..Y...N...sc..t....lh...c...h....l...p...p..h..s.........l..hF..DpAWsuat..p.Fp....s....h.....ht.s.p..........s..htu-.........t..t.....hl.hsTQSsHKhhuuhS.QuShIHl+ss.....................hsc.cph....N..p.A.ahhHsoTS...PtYslhASl....D.su.sphhcut..sG...........c.pLh.pc.slchu.............lc.hR+tl................sps..hhh...cs..a............lss...........................h.a..h.....s.s.....t...tWH.u.F..t.s..h......t.t.......hlDPhKlhlhs......PGht...pG.ph.p.p.....GlPAs.ll..s.t.aL.t.c.p.Gllsp+ss..slhhLho.u.s.t............................................................................................................................................................................ 1 197 374 507 +3569 PF03711 OKR_DC_1_C Orn/Lys/Arg decarboxylase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.20 20.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.69 0.71 -4.12 32 4706 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 2279 25 600 2682 216 119.40 31 18.69 CHANGED LPslapppPchYcshsl+-LCQphHcha+ppslhpLppchF..sshPchsMsPpcAatchl+sclEhVslc-h.GRluAphhLPYPPGlslllPGEha...upsllcYhhhLp-hhspaPGFps-IpGlahppcsst .............................................................hPplhtt.......Yt.th.tlppL.spthath..hh..p....s.......t..h...........th...a..........p.h.hP..p...h.hh....s....P..pp.A..a.....t..l...c.s....p....sEhV..slc..c....h..GR.......l.......u.......AphllPYPPGlPllhPGEhh........s.ps.ll.cYlt.h.Lp.p...h..thhP.G.Ft..-hpGh.......s.......................... 0 176 343 466 +3570 PF03709 OKR_DC_1_N Orn/Lys/Arg decarboxylase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain This domain has a flavodoxin-like fold, and is termed the "wing" domain because of its position in the overall 3D structure. 28.60 28.60 28.60 29.00 28.40 28.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.18 0.71 -4.03 82 2923 2012-10-01 22:20:39 2003-04-07 12:59:11 10 5 1008 23 274 1331 55 108.10 28 15.04 CHANGED hpttshppltptlpt.pshplltssshcch..hh..tscluulllsh-................phtpsllpplcppshplPlFlhsct...sspplsschlppls...thhph.hpssscahApplppAsppY ........................hpttslccLtssLst.pshpllhssspc-h..hhp.psclsul..lhshs........................c.stsllsclcpp.s.pl...PlFhhssp....sshcl.s.s.shhphl.....tahph...h.sss-.lAtclcpssscY................................ 0 34 100 179 +3571 PF01277 Oleosin Oleosin Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 20.70 22.80 20.40 19.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.49 0.71 -4.53 15 355 2012-10-03 03:08:05 2003-04-07 12:59:11 12 4 58 0 111 359 0 107.80 35 54.63 CHANGED SosQlLsllsulPlGGsLLsLAGLTLAGTlIGLsVATPLFVIFSPVLVPAslsIGLAlTGFLsSGuFGlTuLSSlSWlhsYlRtppssss-pl-hAKtphp-hstasGpKs+-hGp+h ...................pllpslsusssussLLlLuGLTLs.GolluLsluTPLhlIFS.PlLVP..AsIsssLlssGFluuGuhGlsAl.uslsWlh+ahpu.....t.p..s......h....s....th..p.hh...........................t....................................... 0 25 71 86 +3572 PF02191 OLF Olfactomedin-like domain SMART anon Alignment kindly provided by SMART Family \N 20.80 20.80 21.20 21.20 20.00 19.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.73 0.70 -5.11 40 1087 2009-01-15 18:05:59 2003-04-07 12:59:11 11 39 91 0 497 860 0 245.70 37 33.48 CHANGED sLhslupPhhl.....+pssp...phGuWh+Dsh.....tssc+lahhsshsss...p..lhcapphpsF........htsptspp.........hpLP......hshpGsGaVV.Ys.GulYYpct............popsllKa-Lsspslsspp.LssA.uap..s.hsYthuuhosIDlAVDEpGLWlIYuTppssGp.....IVlS+LsPpoLpl..ppoWsTsh.KpssuNAFhlCGsLYsscohstps..........scIpYsaDTp..ssppp.tsslsFpppapthstlcYNP+-ppLYsWssGa.lhYslphtp ................................................................................h.LpslspPhpl.....ppssp...phGuWh+DPh......pssc+l..a...hh.sh.h.tp....pplhEY.pshpsF............htuptsps............................a+LP.......athpGTGaVV.Ys.GulaaN+t..............popsIlKaDLco........ch.........hs.s...............pt...............hlssA..sYc....................s..p.P...Y....p..W....u...GpoDIDLAVD.Es.GLWlIYuTpp..s..s...Gp.......IVlSpL....sP.hTLpl..cpTWc.T.s.h.sK.puAuN.AFhlCGlLYslcSh.psp..........sclpYsYsTp......ps.pt.p.h......ss.ls.F.......NpYp.aluhlDYNP.+DptLYsWsN..sa.ltYslph................................... 0 66 116 258 +3573 PF00691 OmpA OmpA family Bateman A anon Pfam-B_166 (release 2.1) Family The Pfam entry also includes MotB and related proteins which are not included in the Prosite family. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.65 0.72 -3.58 84 17270 2009-09-13 10:58:36 2003-04-07 12:59:11 15 239 3418 100 4376 13311 4143 97.50 27 31.23 CHANGED hFs.sssp.l..s.......spspptLp....plu..phlp...........t........ltltGa.....................sss........hss....tpt....NhpLStpRApsVtph.Lhp.tG....ls.sp...cl...h..puhGp..spshssssssp.spthp.........R ..........................................................hFshspup..l......p........sp..s.p...p...h...Ls..........plA.....phLp.........................p.....h.....lpl.p.GH.....................................................................................TDs............................pGs......pph.........................NhpLSpcRA....pu....Vtph...Lhs...tG...................ls..ss............+l......ps.....hGhGc.......ppPl..ss.s.s..s..tt..spt.N.................................................... 0 1365 2716 3618 +3574 PF01389 OmpA_membrane OmpA-like transmembrane domain Bateman A anon Bateman A Domain The structure of OmpA transmembrane domain shows that it consists of an eight stranded beta barrel [1]. This family includes some other distantly related outer membrane proteins with low scores. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.28 0.71 -4.77 13 1301 2012-10-03 17:14:37 2003-04-07 12:59:11 12 16 841 9 132 2219 115 165.60 47 47.12 CHANGED Pp-NTaYsGuKsGWupaHDss.................psstssHcNshshGsFGGYQlNs.....LuhELGYDahGRhsh+upst..........................sstapsQGspLosKhuYPlsDsLDlYu+lGuhlsRuDsK....tpu.t.......tpsHDstsSsLhAsGlEYAlsPElAsRLEYQalsslGchpstsp............pPs.uslolGlSYRFGQus ..............................................................................................................................................................................................PpsNTaYs..Gu...Kh..G..W.u..p..a...c...Dss....................................sst.ss.c.c..s...p..l..u..h.G........s..F...u.....G......Y..Qh..N.sa...................luh..E...h.......G.....Y....D........a......L.....G..Rh..s...h....c.u...p..........................................................ss.s.a..c..A..p...G.....l.....p.....L......s.....s....K..h..u.........Y......P..........l......s...-....c.....L...Dl..Y..s.....R..L..G...G..h..l....W....R......u.....D...s...+...s.t..............t.........................hpsHD.T..GVS..Pl...h.A....uG..lEY..A..l..T.....s..-lAs..R..L....E...Y.Q....W..s......N....N...I...G...D.sp.o.h.Gs............................RP..D...s..uh....lS....lGlSYRFGQs................................................................... 0 12 44 91 +3575 PF03938 OmpH Outer membrane protein (OmpH-like) Bateman A anon COG2825 Domain This family includes outer membrane proteins such as OmpH among others. Skp (OmpH) has been characterised as a molecular chaperone that interacts with unfolded proteins as they emerge in the periplasm from the Sec translocation machinery [2]. 29.20 29.20 29.20 29.20 29.10 29.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.14 0.71 -4.29 151 2754 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2008 6 700 2002 1837 154.60 21 89.11 CHANGED lslhh.h....hss....................ssst...Ap......................+luhlDhpplhpp..sttcp....hppplppp....hp...phpsc..............................................................................lpphppcl...ppthpphppp..tt.........thsppttppt..pp..chp.....................pptpchpphppphp...pc...........................lpppppp........hhptlhpc.l...ppslpplucp....pshsllls............pss...............llas..ss..sh..DlTscVlptls ...................................................................................................................hh..hhh..h.hss.............sstt.......Ap.........................................Kluhlshpplhp.p..h.st...tps....spppLcpc...........hp...thts-...................................................................................................................................................lpph.ppcl...........psthpchppp....t...........................hhstsptpch....pp.......-lt................................ptppph..ppt.....tpthp........pc...................................htpcppc........thpplhsc..l...ppulcplApp....pshsllls...........tss.......................lhYs..ss....sh...DITscVlcpl...................................................................................................... 0 256 489 615 +3576 PF01278 Omptin Omptin family Finn RD, Bateman A anon Prosite Family The omptin family is a family of serine proteases. 20.10 20.10 20.40 20.40 19.60 19.30 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.87 0.70 -5.32 7 512 2012-10-03 17:14:37 2003-04-07 12:59:11 15 3 418 9 44 269 1 275.50 56 92.86 CHANGED uos...s.hsP-shosshSlGsLuGKo+EhVYcs.-sGRKlSQLDWKhpNsAIl+GslsW-.hshlolsApGWToLuStuGpMsDhDWhsS....spss.WTDcSpHPsTplNYANEaDLNlKGWhLpsssY+lGlhAGYQEoRaSaTApGGSYlYsssu....phGsFPssh+sIGYpQ+FphPYIGLsGsYRhscFEhsuhFKaSsWVpApDNDEHY..hRclTFR-KspsppYYusulsAGYYlTssAKlasEhoas+hpptKGsTplhDp.SssoushsssuAGIpNhNahsTAGLpYpF ................................t...h.shoP-slssslSlGsLsGKo+EhVY.s.-s...GRK.lSQLDWKhpNs.AIl..+Gsls.W....-h.......h.s.....l..olsA.tGWTo.L.u.S..t....uGpMVDpDWMsS.......spPG....WTDcSpHPD.Tp..lNYANE...aDLNlKG.....WLL.......ppsNY..+..lGlhA....GYQEoRaSaTA+GG..........S...YI.......Y..........ss...tc.....................IGs..FP...s...G....R....uIGYpQRFcMPYIGLs.G...sY.R.h.p.D.F.EhsuhFKYSsW..Vp.ApDN...DEHY.................h+.+......l..TaRpK.s.cs.ppY.YusulsAGYYlTsNAKlasEhuas+hs.ptKGsTplhD+.sss..Tu.autsuAGIpN.NahsTAGLpYpF............................................................................. 0 10 21 29 +3577 PF03922 OmpW OmpW family Bateman A anon COG3047 Family This family includes outer membrane protein W (OmpW) proteins from a variety of bacterial species. This protein may form the receptor for S4 colicins in E. coli [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.23 0.71 -4.64 9 1853 2012-10-03 17:14:37 2003-04-07 12:59:11 9 3 1309 10 354 1343 105 183.40 37 85.72 CHANGED psGsahl+uthspVhsssusshh.......hsssschs...sssssp.sLohTYhhoDplusEL.............luuT.hpHplssps...sssshluc...sphLPPTLhsQYa..hsssush+PYlGAGlNYshFaspphsss....thoclKLcsuaGhAhplGlDhhls.cshhlNhsVhhhhlcosAshp....sussthcscV+LDPWVhhhGlGa+F ...............................................................................................tGphhhRst.hsVhPspsussh................ssh.sshs........lsss.spluLohoYhhTD.N.lulEL............................................luu...o..PhpHcls..s.s..................s.....h.G..c....lus..........s+.pLPPT..Lhh.Q.Ya..hhsss..u...chRPYV..G..s..Gl...NY.Th.......F.as.p...p.hsss................................uh.o.sl...p...l..c...s.S.aG.s.A.s..p..l..G..hDYhls...cpWhlshsVhYh..sI.cTsushp.............hu..s...s......p......t............p.s...s..l+..l..DPa.V.hhhusGYRF................................................. 0 69 155 256 +3578 PF03532 OMS28_porin OMS28 porin Griffiths-Jones SR anon PRINTS Family \N 20.70 20.70 20.80 48.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.74 0.70 -5.31 2 39 2009-09-11 23:15:34 2003-04-07 12:59:11 8 2 24 \N 1 27 0 167.90 72 99.15 CHANGED KIFSNLIINGLLFGFVSLNVFADSNNANILKPQSNVLEHSDQKDNKKLDQKDQVNQALDTINKVTEDVSSKLEGVRESSLELVESNDAGVVKKFVGSMSLMSDVAKGTVVASQEATIVAKCSGMVAEGANKVVEMSKKAVQETQKAVSVAGEATFLIEKQIMLNKSPNNKELELTKEEFAKV-pVKETLMASERALDETVQEAQKVLNMVNGLNPSNKDQVLAKKDVtKAISNVVKVAQGARDLTKVMAISLY ...........KhFtNLIIsGLLFGhVsLNVFADSNNsshlp.pSNVlEpsDQKDsKp........LDQKDQVspuLshIsKVTEDVosKLEtVRESSLtLVESNDAulVKcFVGSMSlhSDsAKGsVlASpEATlVAKhSGhsAEsAN+VVEhSKKAsQETQKAVSVAsEAhFLIEKpIM.NKSPNNKELELTKEEFAKVEpVKETLMASERALDETVQEAQKVLNhlNGhNPSNKDQhlAKKDVtKAISsVVKVA............................ 0 1 1 1 +3579 PF02462 Opacity Opacity family porin protein Bateman A anon Pfam-B_2356 (release 5.4) Family Pathogenic Neisseria spp. possess a repertoire of phase-variable Opacity proteins that mediate various pathogen--host cell interactions [2]. These proteins are integral membrane proteins related to other porins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.62 0.71 -4.27 4 799 2012-10-03 17:14:37 2003-04-07 12:59:11 10 2 118 1 25 2965 35 133.70 47 70.81 CHANGED sGYchs.shRlAsDYT+YtKhsus...............sosl+Gh.....................LGhSslYDFDTtSchKPYlGsRVu......................oN.thchossA+h.thcu....sScp+lGlGVlAGVpYclTsNlsLssGhcYNplGph.psopVpshtspsGlRapF ...........................................................................................................VGYD.FG...s.....W.....R...I..AAD....Y...A.S.....Y.R..K...Wsss...............................p.............s..p......p...l..ptp.ssstpp.p.pp.................................QtNGoFHAsSSL.G.L.S....A.I...Y....DF.K..L..N..D...K.F...KPY..I..G..A..R..V..A...................................................................................hs....ps....+....h..p....s.....c....o....s...p..s....t..p..p..t................................................................................................................................................................................................................................................................... 0 4 9 13 +3580 PF01160 Opiods_neuropep Vertebrate endogenous opioids neuropeptide Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 21.00 21.50 19.60 18.40 hmmbuild --amino -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.01 0.72 -4.20 31 202 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 67 0 73 181 0 44.50 39 20.36 CHANGED -CspDCutCuh+..spps.plNsLsCoLECEGcLsosctW-tC+clLpht ...............-CtpcCshCsh+..st...s.slNsLs.CsLECEupl.ssp..W-hCpphLp.h................... 0 3 11 26 +3581 PF04966 OprB Carbohydrate-selective porin, OprB family Bateman A anon COG3659 Family \N 19.80 19.80 19.80 19.80 19.70 19.30 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.98 0.70 -5.31 48 1348 2012-10-03 17:14:37 2003-04-07 12:59:11 7 5 568 0 433 1321 465 361.60 17 76.71 CHANGED cpshthssphthshshDLp+lhGh.cusphphs...hhpppGpslsss......hsshtshpplhGtt.......................................pthRLs.phhhppphh....................sspLsl+hGphshsspF.......................hssshsspFh.....shuhsusssss.........hsshhhsaP...hushusplchp.ssp....hhlpsGsacssssshss.............pGhsh........psssGshlssEls..ap.t......................................httsthsGpYclGuahssuphsshh.shsh....h.....................ts..spptp......................................uptGhahshpQpltpsss..........tsLslFupss............husssssh.....hsttlshGlshtGshtsRspDslGluhuhtphsspsptttt.ht..sh..............tssEhhhElaYphplsstlslpPslQYlhpPuss...........sphssuhlhG.l+hphsF ............................................................................................................................................................................................................................t.t..tph..thph-LtthhG...h....pshphphs.....hhp.t.ps.h.shosp.................s..ts..h...s..h..pt.hh...utt...............................................................................................................ph.+Ls...phhhp.p.p.h................................tspL..pl..cl.Gphs.hsp..a........................................................s.sh..t.spFh.........................shsh.s.s.s.s..sss..........................hst.hh...s..aP.......tthus...pl.....ph..p.sst...................hhh.p.sGsa.p..ss..ss....s.hps...............................puhsh........................ttssGh..hh...hchs......at.t......................................................................s.s.th...sG.p..a..phu...s......h.h..s.ssp.h.s..sh.......h..s...t.h...............................................................s..pt.tp.............................................................................s.p.huhh.hshp.Qtlht.s.................sl.slhu.phs................................hsss...s.s.sh..................hs...h.thshGhsh..tu..h.....tRss....Dp...lGluh..u.h.t.h...ssp.h....p....th...t..t.................................pssphh..hEhhY..phpl........s..s........t..lp..lpPshQalhpP..uts.............pth.sssh..lhG..l+sphpF............................................................................................................................................................................................................................................... 0 101 252 353 +3582 PF03573 OprD Peptidase_S43; outer membrane porin, OprD family Griffiths-Jones SR anon MEROPS Family This family includes outer membrane proteins related to OprD. OprD has been described as a serine type peptidase [1]. However the proposed catalytic residues are not conserved suggesting that many of these proteins are not peptidases. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild --amino -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.97 0.70 -5.68 47 2107 2012-10-03 17:14:37 2003-04-07 12:59:11 8 5 748 29 364 1637 75 371.00 27 90.15 CHANGED tGFlEDSpsslphRNaYhNRDa+ss......ss.t...........upp-..EWuQGFlhsapSGaTpGsVGFGlDAhGhhGlKLDuGtuc..uG.oGl.......LP......hs.sD....Gc..ssDcau+hGs.ssKhRlSpTpLKhGc.hhPshPlltssDuRlLPpoFpGstlsSpElcsLsLpuGphspss.Rssoshc.chsh.ht........sussucphsasGGsYphs.cphosuhahucL-DlacQpYhulsashPluts.sLssslphacocDsGp....u....................thG..plDNcsaSshhuhph.GuHohsluaQ+ssGDssasYls..............G.ssshaL..sNsh.huDFssssE+SWQlRYDhsFushGl.....PGLoaMsRYlpGcslcssss....................s-G+..-WER-h-lpYVlQSGshKsLul+hRpuohRssh..........sschDEsRLIVsYslsl ........................................................................................................uFl-Dup.hshthh.h....hpR.Dh+ss..................st...............thpp......chsp..u...hhLs...apSG..a.......st.......s.......h.......l.......G...hG..lDs..huh..hulcLsssssp.....us...sth................h..............p.....ss.....tt........scsa.......u.......c.........h.G.....s.....s........sKh..+..h....u....p..o....h..l..........+.........h..Gp.h...........s.p.h.Ph...lhss.stRhlPpoap....G....s....t....h....s.s....p....-.h...s......s...L..shpshh....hs.c.hpt............sp.s..s..h....c....chh.ts....................................tsspscth..p.hhGu..p..Y.p........h.....s...ss.h.s..hphh.huphc-h.........hcQ...haht.h.sap..h.s..lu..s......sLsssh..p.ha....tsc....-c..Gs......u...........................hss...th...D.sp....s..a..s..tt..h..o..hsh...t..s..t.....ph....t..h....sa..t..p.....s.....s....G...sp...u.....a....hht.................u..sssha.h..........hu....D..F.s.s...ssE+oh......h..t..hsY-hts.h.s.l.....P..GLs.hhs..p..Ylh...G.shcsssh......................tphc..ch..thshphsYslQs.G.......hKshth+h+...s.hc.t....................................................................................................................................................................................................................................................... 0 43 136 258 +3583 PF03169 OPT OPT oligopeptide transporter protein Mifsud W anon Pfam-B_3048 (release 6.5) Family The OPT family of oligopeptide transporters is distinct from the ABC Pfam:PF00005 and PTR Pfam:PF00854 transporter families. OPT transporters were first recognised in fungi (Candida albicans and Schizosaccharomyces pombe), but this alignment also includes orthologues from Arabidopsis thaliana. OPT transporters are thought to have 12-14 transmembrane domains and contain the following motif: SPYxEVRxxVxxxDDP [1]. 24.50 24.50 24.50 24.90 24.30 24.40 hmmbuild -o /dev/null HMM SEED 624 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -13.07 0.70 -6.09 45 2900 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 1215 0 1473 2844 385 534.50 19 86.15 CHANGED phThRuhllGlllsllhussshhhuh+sshlshsslsstlluasls+hht..................................................h.hs.stPaohpEpsllpshus....uuss.h....shusshlhs..thhah..................tpph...shshtlhhshssshlGhhhushlR+hllhs..sphha......Psuhsssplhpshcpsp...............................................................p..hpph+hhhhshhsuhlatahsthhF.hlush.uhh.h........sthshshhhhths.........hshDashhh..lusshlsPhhsshshhhGslluahllhPlhhat.sshhsshhs.......h...us..............tshhhshtta.shushhhuuhhsl........hthssslspslhhphpsht...............................................t.......p-.ppphhpphc-.....................lP.hahhsshl.lshslulhhhhhhh..hphsh..........hulllulhluhlhshssuhhhulosh....ssul........shls.llhuhhhsup.................slAslhhsuhshsshupAtshhpDLKhGahh.tssPRthhh....sQhlGslluullsssVhphh..spt.hhss.........shshss.spshhhuulhhulhus.........shsahhlhhuhlhGhlhshlthhh....................tph.hPt..................lhhu.....lshhhssphss.......shhlGslhthhlc+...............................httthhpch.............shlluuGLssGtulhulllhhhh ..............................................................................................................................ThRsh.hlG......hlhshlhsshsh.....hhs.h+..ss.h....sslss............t..llu..hsl...hphh..........................................................................ts.hshhE.ps..................hl..phhus.......uuts.............hu.........s.hlhs......s.h...hhh.........................................................htth.....shh.hhhh..s...hss...t.....hlG..hhh..sh..............hR+.........h..h........lh.........sph.a....................Pp......shssstlhpsh.cptt................................................................................................................p..thphhhhshh...............hu..hh..ath........s...h..h.h...hs.....th..sh...................................h..h..t..h.......tht..........................hshsh...s.hh....lGs...s....h.l.................hs.hh......ssh.shhhGh..hls.ah..l..hhPh..h...h.....h..h..........h.........s............t....hs...................t...........................................phh..h..s...h.tah..sh..uhhhhs....u..h.hsl............................hthht..hh.ps...hh...t...hp..thh...................................................................................................................................................s.....t.t...p...h..t.t...pp..........................................................................l.s..h.hh.h.h.hhl.....h.hhh.h...h.h.h..h.....h..h........h....hsh..............hshl.l.shhluhh....hshs.suhh.huhss.......hsuh...................shl.s...hl.hu.hh.h..s.......................................shhslhhss............hsh.ssss....usshhpDhKhG.....ahh...ths...........P...tt..h..................uphlGshhushl.ss...........lh.hh...............shh.hs..........................s.shs..s.sp.shhh...........s.slhhulhss............................................sh..a.th...l...hh...shhhGh.hh.lhs.hh..................................thh.hP.......................................lhhu......hs.h.h.h.s....t..hs.h....................h.hl.Gh.h....ht.hh...hp...c.......................................................................hh..t.thht..ph.................................................................shlhuuGL.sGtulhullhhhh.h.................................................................................. 0 399 892 1266 +3584 PF04069 OpuAC Substrate binding domain of ABC-type glycine betaine transport system Kerrison ND, Finn RD anon COG2113 Family Part of a high affinity multicomponent binding-protein-dependent transport system involved in bacterial osmoregulation. This domain is often fused to the permease component of the transporter complex. Family members are often integral membrane proteins or predicted to be attached to the membrane by a lipid anchor. Glycine betaine is involved in protection from high osmolarity environments for example in Bacillus subtilis [1]. The family member OpuBC is closely related, and involved in choline transport. Choline is necessary for the biosynthesis of glycine betaine [2]. L-carnitine is important for osmoregulation in Listeria monocytogenes. Family also contains proteins binding l-proline (ProX), histidine (HisX) and taurine (TauA). 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.64 0.70 -5.04 87 7137 2012-10-03 15:33:52 2003-04-07 12:59:11 7 15 3034 55 1357 6630 3203 253.50 21 72.82 CHANGED sslslusts.as-shhhuplhtplLcpt.Gh.sschsshss.ss.hhsul.ps..G-lDlhs.tahsss......................hpthtc.thtpphshhh.s.shhhssp.uhsVsphhsc.............sl...polsDL.........tphss............shsschhsts.sGhs.......shhcsY.Gl......hh..ushtsh.s.....tlhpAhcptc.shlshsasscshhsph....c.lhhLc..................................Ds...+shhss.....tpltslsp.pshtcctPp.ltphLpplp..l.sscphpplttplst.pttss..pp.sAppalpp ............................................................................................................................slplusts...h.sE.st..h.h...s...p..l...h..p...p..l...lE...pt...Gh...ssp...h.h.s..h..s..s.......sh.hapul..ts........G..-....lD.l.hs.pas.sss.......................................................................hpt.hp....p...ht.t..p...h.thhh.h.s....shh.h.s.s.s.h...u..l..sVspth..up....................................t....sl...polu.D..L.........................................tchtt................shsschhs..pt...sGhs...........sh..h..c..s..Y...sl...............h...s.s...h.s.sh.s.............hhhpAh.pps..c...hsshhs...a.s...s..c..s..h.....h..s..p..h........c..l..t..h.Lp..........................................................................................Ds.......cp..h..hss......................hp.s.s..s.lsp....cs....hh...c.c....p.Pp..ltp.h.l.splt.......l.ssp...phpphphpl...ps...ptt...ss.....pp...sAcpalp...................................................................................................................................................................... 1 350 775 1085 +3585 PF01718 Orbi_NS1 Orbivirus non-structural protein NS1, or hydrophobic tubular protein Bashton M, Bateman A anon Pfam-B_1752 (release 4.1) Family This family consists of orbivirus non-structural protein NS1, or hydrophobic tubular protein. NS1 has no specific function in virus replication, it is however thought to play a role in transport of mature virus particles from virus inclusion bodies to the cell membrane [1]. Orbivirus are part of the larger reoviridae which have a dsRNA genome of at least 10 segments encoding at least 10 viral proteins [1]; orbivirus found in this family include bluetongue virus, and African horsesickness virus. 25.00 25.00 111.40 111.20 18.80 18.40 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.23 0.70 -12.87 0.70 -6.48 7 185 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 65 0 0 167 0 452.80 51 99.24 CHANGED MERFlRhFplsuptA.tlRhhtsISspWTCSHh+RsChhpGhCs+QpFpps.s.tshcpt-hstAp+llclAtphhhsRpclWhpshpshsp.hs-phtpchppshppL.-sYppSshpcchtshhphpcssp.lhhDDShShh.hhYhPhspss.spssplsRhtphhhsFYsspsscs.hl..pp.thcthhsphhp.stctlspC.aTGsptsl.tlhahP.phhshhss.p.sphlhRhsphDlphIhphsh+c.sRlhhQRFGhcssu.sslaphhlt+lch.sh.pollpt+h.....psWpphhlPhlLhRthhhthhs.p.hhsahpschsCQsCaltptsphcplhllDsRsuElsG...ssshths+hhcHhDs-.ph.pht-LptsEhlsR.usHWhshsChoot-AlhlThh.IHRhlRGsGlhss.thp.uh.hLARshLhWt..ssttpSslFRLhCashhthpspspGphhsWtDLGsFhchlhcspcLs.s.sEchasshhchs.....LhYhpp....h+hsshs...tsthpptplhpllpt.hsh ....MERFLphaslsG-hANAsRhFhsISPQWTCSHLKRsCLhNGhCs+Qp.F-cs.h.tAT.Dsp-.tpAh+LV-LApcAhhDR-TVWLpshKshsp.hpppl-tphccpsspLh-s....YppSGhh-Eh.p.pthssspR.lhlDDShShhPYhYlPhppGp.hlssshlSRatQluahFYsssssDc..aIs.s.hGlRttpsplK+tlEcplss..CPYTGhpGRlhpVhaLPlphhshhchp-.Apph.RhAuhshp.hh+s..Gatct.RhlpQhFuh.sssEhslHphMLh+hch.scspolVphRhhpsGs.sWpoWhlPhhlhRpuh.t........p.E.llsaM.c+KhTCQ.lC.ahhc.t.h.ThsVlDs+hu.ELsG...soshhhs+h..HssNc.pV.ps+.LhhsE.ht+IsDHahhppCaTstEAlhsTAIpIpR.IRGsG.Wss.happuhhhLsRlllhW.h.shspRSAlhRLhCFshaGhtPpAcGph.DWssLGoFhc.lLcG.-Ls...EDEp.sahoMhcMs.....hhasp+....s+hsshs...tsshE.tpVhplsth..p............................ 0 0 0 0 +3586 PF01616 Orbi_NS3 Orbivirus NS3 Bateman A anon Pfam-B_1029 (release 4.1) Family The function of this Orbivirus non structural protein is uncertain. However it may play a role on release of the virus from infected cells [1]. 25.00 25.00 47.30 47.10 20.50 20.50 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.13 0.71 -5.34 11 375 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 70 0 0 339 0 202.30 56 91.79 CHANGED p-holhsh.....PPhh.soAPshs..........thpshuLslLspAhossTGAotspKcEKAAauuhAEAL+D..stslRpIKhpVsppsLscL+t-Lpsh+R+psll+hlhhlsuslslsoohlsuhophssplpphhptp...............hlshslpslsLhsTshhlhsu+hppplppplcRsK+-IhKRcoYssAAphshsuss.........thsLpsschut .............s.............pP....PpYsPSAPhPS..........oMPTVAL-ILDKAMSNTTGATpsQKsEKAAFASYAEAFRD..DlRLRQIKRHVNEQlLPKLKo-LuGhKKKRAIlHhTLLlAAVVALlTSlsTLoSDhuVs..hKhNsTps-ls............ahKsLsshlGllNLGAThlMMsCAKsE+uLNQQIDhIKKElMKKQSYNDAVRMSaTEFS.........SlPLDGF-hP.h............... 0 0 0 0 +3587 PF00898 Orbi_VP2 Orbivirus outer capsid protein VP2 Finn RD, Bateman A anon Pfam-B_1525 (release 2.1) Family VP2 acts as an anchor for VP1 and VP3. VP2 contains a non-specific DNA and RNA binding domain in the N-terminus [1]. 18.40 18.40 18.40 18.50 16.10 17.90 hmmbuild -o /dev/null HMM SEED 946 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.48 0.70 -13.27 0.70 -7.02 25 468 2009-09-11 12:16:15 2003-04-07 12:59:11 12 2 67 0 0 373 0 558.40 24 90.95 CHANGED uLLs+YPLAIplsVKl-D.GG+HsllKIPESDMIDlP+.oIlEALsY+PtRNDGlVVPRLLDITLRAYDsRKSsKsA+GlE.FMTcs+WMKWAIDD+MDIQPLKVoLDcasS.VNHQLFNClVKAcSANADTIYYcYaPLEsusK+......CNHTNLDLLRSLTssEhFHsLQGAAYuLKoTYELVsH..SERcshSEoYpVGspcaIpLpRGT+IthpGpsYE+FlSSLVQVllcGKlP-cIRsEIApLNc...I+sEWhsAsYDRs..+IRALELCKILSsIGRKMLDs.....pEEPKDEMsLSTRFQFKLDEKFh+sDpEHlNIFcVGusATD-GR.FYALIAIAATDTQpGRVWRTNPYPCLRGALIAAECcLGDVYhTLRpsYcWSLRP-YGp+ERsLEsNK.YVFuRlNLFDoN.LuVGDcIIHW+YElhps..+ETTaDcGYhCsppt..s.DDELlCclDED+YKEMhDRMIQGG.WDQERFKLHsILT-P.NLLTIDFEKDAYLssRSELVhPsYYDKWIsSPMFNARL+Is+GEIAThKuDDPWssRuV+GYIKssuESL-YsLGsYYDLRlpLaG-sLSLpQcQS..AVFpahuQpDDFusLTchppGtsVCPHS....GGshYTFRKVALhlluNYE+LsPsLHEGhEccpYhHPu...lsstac++VLEM+DhSQL...ICFVFDYIFEK+pQLRss+EARhIlYLIQssoGstRL-VLccsFPNFhc+lhsL+-lKplpDLNVIN.FhPLlFLVpDNISYhHRQWSIPMlLFDcs.IRLIPVEVGAYANRFGhKSFhNFhRFHPG-uKK+QcADDsHKEFGslsF-YYssTKISQGplcsPVVToKhDsL+lHlASLCAGLADSlVYTLPVAHPKKsIVLIIVGDDKLEPplRSEQIVs+Y.aSR+HlsGlVSIsVsQsuQL+VHopGIs+HRlC-KsILKYKCKVVLV+hPG+VFGNDELMTKLLNV ..................................................................................................................................................................................................................................................................................................................................................hhhp.t.s.....-sh.h.p..sht..............Hs..thh.phh..t...h.pthsYtht.p.hplhsh.......sp...tphp.....c.....h.s....p..h..t..h.....+th.h........tt.tthphhh.......tshs.......hphph..........................t...............tp.......h.plp................I..ptp....http.s....p....p.sp.lpthht..hh.p.......p.tsts.p.shp.+Fphpl.tph...pstch..I.ht..tttthsp.t+.FhsllhlsssDs.pttha....h.hlhhhh...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.ptt....hh....p...hphhhus.CtGht-.h..hhPhtpP..shlhh.h.s.......t.t.h..hh....p...thh.l.lt...............tt................................................................................................. 1 0 0 0 +3588 PF01700 Orbi_VP3 Orbivirus VP3 (T2) protein Bashton M, Bateman A anon Pfam-B_1622 (release 4.1) Family The orbivirus VP3 protein is part of the virus core and makes a 'subcore' shell made up of 120 copies of the 100K protein [1]. VP3 particles can also bind RNA and are fundamental in the early stages of viral core formation [1]. Also found in the family is structural core protein VP2 from broadhaven virus which is similar to VP3 in bluetongue virus [2]. Orbivirus are part of the larger reoviridae which have a dsRNA genome of 10-12 linear segments [1]; orbivirus found in this family include bluetongue virus and epizootic hemorrhagic disease virus. 25.00 25.00 48.40 47.90 23.50 19.20 hmmbuild -o /dev/null HMM SEED 890 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.54 0.70 -13.39 0.70 -7.12 7 204 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 66 2 1 161 0 531.10 67 98.12 CHANGED ssuPYLcGDplpsDsGPLLSlFALQEIhpKVRpsQtchtstupEl-sshP-VppIlssl+sLtcp+sY+llppPshSaRaIshQSp-RhhRVsoaaERhSplG-shcpp-PhcFhssllc+V+alRscGuFlLaslsT+.hcGtEls-s-sLGV-hpshhssLsA.sRthlQstLsuhlIpNtpsscp.VDVa.GACsDslYRlHstLpuYlEssQhtphRpulsWLpphGppKRIpaspcaLTDhhpuDTIalLohpLPsNPpVIW-VPRsuIuNLIhNhALslPTGpYluPNPRIsSITlTpRITTTsPFA.LpGhsPTAtQMsDVRKIYLALMFPNQIlLDl+.-PGHplDPllphVuGVlG+LhFoaGPRhhNITpsMAp.LDhuhscaLhaMhssRhplpaGsoGcPLDFhI..GppQaDCNthtss.pTGpGYNGWu.VDs.tccPoPYsHVpRhIpYhshDScElID.RhhG.shsY.happMhchLltAG+ssEtsYhctML.aHhVRFA+INQIINcDLlSAFShPD-pFshhhssh.pssattssPlVLDlSahSIWFAFphRF.PssRs-hl..tPLlESVYAScLSlhKlcsppLphhhspsP-shlpA+PochWKAVhcp.PEPl+slhsLstp+sFlshRDlhpWlppsthQcSLhhhh-cEAWtsssD.pDLMhsccVYhHRp.lPEPhLDDlcpFRR-uFYYTNMlDu.PshscsVhhohthhhlpAshGph+uAlRphlDDssal+hGssLRslhlcFF-ShPPp-lLpALPFsYpscE+sG.lshsol+hsspspsaaLlYNV-hsshPDphlslsPshshTKlalpp+lVcRVcsssALuVhN+cFluY+uKhRlMDlTpuLcsGsQLAuPos ....................................................................................................................................................................................................................................................................................................................PRstlsNlhhshuhs.Phs.ah.sss+Istlolstcls..psssFu.l.u.hsTt.QhsDVRKIYLALMFPGQIILDLKIDPGERMDPAVRMVAGVVGHLhFTAGsRFTNlTQNMARQLDIALsDaLLYMYNTRlQVpYGPTGEPLDFpI..GRsQYDCNsFRusFtTGsGYNGWuhlDVEhR-PAPYsHsQRaIRYCsIDSRElIpPtT.a.GhsMpYhhapcMh.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +3589 PF05059 Orbi_VP4 Orbivirus VP4 core protein Moxon SJ anon Pfam-B_5992 (release 7.7) Family Orbiviruses are double stranded RNA retroviruses of which the bluetongue virus is a member. The core of bluetongue virus (BTV) is a multienzyme complex composed of two major proteins (VP7 and VP3) and three minor proteins (VP1, VP4 and VP6) in addition to the viral genome. VP4 has been shown to perform all RNA capping activities and has both methyltransferase type 1 and type 2 activities associated with it [1]. 25.00 25.00 98.50 98.30 16.00 15.70 hmmbuild -o /dev/null HMM SEED 644 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -13.16 0.70 -6.76 6 86 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 47 5 0 97 0 564.20 62 99.40 CHANGED M-P.HAVLYlopsls.hLccuFLPIhcLsGtEsLNsLWhtpGpasTDhYshGslpKWoIRQLRGauFIFlup+.cpIphtDsshshDllI.Ppphscsps.KcFEThIGhcRVtLRKsFGDhLRsYAhpashpFHGSEAETL.hA-P+RH+VhGhPpsPPshsh.sph.ssapsDpPTDEKLVSMLDYllYSAD.plaYVGCGDLRTLppFt+RDp+RFsRVpWhCIDP.IuPEosssNVllhpthlssscDL+pahp.ssslE+lLIWDVpoDputhushEWEppphpEDcLGEpIAhuhpshhuhAlIKHRIPp.ocppapsaoShLlPQPGAPhsMYELRNlhRL-GaS+VDRoHIPcApspplptcDsRpLVcpaHGpsRG+pLK+slaEaLHIpRpsGLpHtu-.PRADLFYLTNppNts+hp-ItcVlEpSsIuTlWVGsc.hasYDDFsYsRpplML+Fsp+s+hVlDGNGhILaLMW+hs.schspclsYDPuWApsFuVlhh+shsss.VPDlSLCRFIGLRphSohLRlpocpVHchsDlLK+LGLDlSGHLFIuLhSuuYlhDLhWWhcMIh-WSlhs+ppKLtslpctpAEVIEW+E-+AsEPWHhhsDLhAALhthuch.ch.hhcp..uslppWl-hLR ..MP.EP.HAVlYVTpELsHllKpuaLPlWclpGsEoLN-LWLpNGKYuoDlYAaGclppWohRQLRGHGFIFlST+.KslQLsDllhsVDVRI.P+-lh+s.D...hKtFEs.IGRRRl+hRKsFGDhLRsYAa+hAI.hHGSEAETLNsANPRLH+lYGhPc.PPhYhEhtph..ts.FsDEPTDEKLVSMLDYhlYSsE.EVHYVGsGDhRTLhpFtKRSPtRF+RlhWHlYDP.IA.s.-sp..sNVhVHphhVsuK+Dlh+phNhLKRVERLhIWDVSoDRupMsDcEWEppRFAEDRLGEEIAhphuGhFS.AlIKHRIPp..h-pYHshSTYLhPQPGAssDMYELRN.hM+L+GYSHVDRphHP-ApVhplVs+DlR+hVEhYHG+D+GRhLKKpLFEaLHIhRcNGLhcEs-EPRADLFYLTN+CNhuh..slYcVh+KShIAThWVGps.LaDYDDaulPRuhlMLpsSa+DlRlLDGNGAILFLMW+YP.-hhKKsLsYDPuWAMNFAVohKEPlP-PPVPDISLCRFIGLRVESSlLRlpNPplHpssDELKRMGLDlSGHLYVTLMSGuYVTDLhWWF+MIL-WSups+EpKLcsL+cStAEVIEWK-pMAERPWHVRNDLIAALREaKh..KhthRps..Asl-SWL-lLR................ 0 0 0 0 +3590 PF00901 Orbi_VP5 Orbivirus outer capsid protein VP5 Finn RD, Bateman A anon Pfam-B_1525 (release 2.1) Family cryoelectron microscopy indicates that VP5 is a trimer implying that there are 360 copies of VP5 per virion [1]. 23.60 23.60 24.20 143.70 22.00 23.40 hmmbuild -o /dev/null HMM SEED 508 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.52 0.70 -6.30 14 206 2009-09-11 01:23:19 2003-04-07 12:59:11 12 1 72 0 0 195 0 462.80 62 96.62 CHANGED tLuRhG+phssALTSsTAK+IYpTIG+AApRhAESEIGSAAIDGllQGolcShlTGEsaGESlKQAVILNVlGss-shPDPLSPGEptltpKlcELEcEp+p-hlpp+HsccIhcKaGp-.L-clh+hhsspt+hpptEccQh-hLcKAlcuhtclhcpEscplpcLtcALp+EsptRTc-EscMlppaRpKhcALpsAI-lEppuhpEEAIQEhhshoADllEsAAEEVPlhGuGhAoulATuRAIEGuYKLKclIstLoGIDLoHLcsPcIpPphlpslLcpsst...lsDppLspultuKlctlcE.ppElcHlpppIlPclKKthc--cc....hts.pcphIHs+sh.paKlPppQpPpIHIYoAPWDSDpVFlFHsluPHHtscSFhlGFDLEl-aVaaEDlotchHtL.GuAppssGRoF+pAY+EFhphAhp.stssthHp+RLpRSpusHPIYLGShpYplSYtpL+pNA.plVpss-LQhHlLRGPl+FQRRsIluALhaGVcll ...oLSRFGKKVGsALTSNTAKKIYsTIGKAAERFAESEIGSAAIDGLlQGSVcSIlTGESYGESVKQAVLLNVLGuG--lPDPLSPGE+GhQhKl+ELE-EQ+sElVRlKaNccI..tc+FG...c-.LE-VYcFMsGps+pEttc-cQaclLpKAVsSYpKlltpEcpph+pLApALQ+Ehs-RTcsEppMVcEYRpKIDAL+sAIElER-GMQEEAIQEIAGMoADVLEAASEEVPLlGAGMATAlATuRAIEGAYKLKKVINALSGIDLSHLRTPKIEPshluThL-pcttc..IPDppLAhulluKtcuIp-NppEltHIcpEILP+hKKhM-E-+E....lpuh--KhIHP+lhM+FKIP+sQQPQIHIYoAPWDSDDVFhFHClS.HHtNESFFlGFDLuIDlVHaEDLosHWHAL.GuAQpAtGRThpEAY+EFhNLAlusshsothHtRRhlRS+ssHPIYLGShHY-IoappL+sNAp+lVYD-ELQMHlLRGPLHFQRRAILGALKaGsKl........... 1 0 0 0 +3591 PF01516 Orbi_VP6 Orbivirus helicase VP6 Bateman A anon Pfam-B_765 (release 4.0) Family The VP6 protein a minor protein in the core of the virion is probably the viral helicase [1]. 20.00 20.00 20.00 22.00 19.10 19.90 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.03 0.70 -5.37 3 135 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 47 0 0 149 0 255.80 52 93.72 CHANGED LLAPGDVIKRSSEELKQRQIQINLVDWhEScuuKE-..EPK-EucuEpp..lSDGEGsQpccGpKEESuKETcDAsVDRRlHTsVGSGSusKGSGERAsKcADtGDGKstGGGGDADcGuGATGTs.GGGWVVLT-EIARAIESKYGTKIDVYRDEuuAQIIElERSLQKELGISREGVAEQTE+LRDLRRKERs-s+IKAV.+GsRKp.Rpptcusup+EGVtEE.spEEso+IGITIEGVMSQKKLLSMIGGVERKsAPIGARESAVMLVSNsIKDVsRATAYFTuPTGDPsWKEVAREAuKKKNILAYoSTGG.DsKTEFLHLIDHL ................................LLAPGDVIpRSoEELKQRQIQIpLlDW.-s-s.t........Kcp....EsKt.E.scsctp........cDG.E.........G..s.p....p...c.supKcc....uu.....cE...s..pD..AssD..RRlcTsVGpGous+GsG.ERs....sc..ssD..t..............GDuKst....tGuG-s...ctGsGs....s........Gss..t.GtWVVLT-EIAcAIco+YGsclc......VY+s-..s....s....u..pIIplE+SLQKELGloREssAEQTEtLRcL+...R..Kc+st....s+s+...us.....t.....+...GtcK...p....tp.p...cu........s.u.p+.Eu...sp.c-.....s.........pcpss.....pl..u............ls....IEsVMSQKKLLSMIGG.sER+htsIsARESuVMLVSNsIcDVsRATAYFTAPTGDspWKEVARcAoKKcNIhAY.oS....oGG..D..sKpEFLHLIDHL........................ 1 0 0 0 +3592 PF00897 Orbi_VP7 Orbivirus inner capsid protein VP7 Finn RD, Bateman A anon Pfam-B_1523 (release 2.1) Family In BTV, 260 trimers of VP7 are found in the core. The major proteins of the core are VP7 and VP3. VP7 forms an outer layer around VP3 [1]. 25.00 25.00 48.60 43.30 18.90 18.80 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -11.96 0.70 -5.51 7 191 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 65 22 0 159 0 303.00 60 99.57 CHANGED MDuIsARALoVlcuhso.pDsRsph-sss.-hhuIhlsRaNuhT.RsVohRPooptcRsphFahslDhsluAhslplu.l.PsYp.shtTlulLApsEIPYTspAhsclsRlouphsshtssRp.hh.a.ssptlhtPGpha.hsAups.pshsluushhplolssutpsplsshlhPspsDslMhhFlWpplppapsssGss.-susssplolsssphcAGs.lls..sG.AslslsssuspsuhlchpVhaYhshs.o.shYsshpsplhssYSY+s.pWauLRuhlLpphslPshhPPhhPss-tpplLsLhLlSpLtDsYsshpP.aslhus.sh.sphptul...shsAh+ ........................................................MDsIAARALoVh+ACsTLp-sRlslEusVhElLGIAINRYNGLTLRuVTMRPTS.sQRNEMFFMCLDMhLuAsslNlGsISPDYtQphATIGVLATPEIPaTsEAANEIARlTGETuTWGPuRQPaGaFLpstElhQsGRaa.hRAuQslTuslsusshhQVShNAGARGDlQt.lFQspNDPh.MIYhVWRRIcsFu.spGNSQpT.sGVTVsVG.GVsMRAGc.IlA.WDG.QAslpVpNPstpsuMlQIpVlaYlShDKTLsQYPuLsApIFNVYSa+s.TWHGLRsAILNRTTLPNhlPPIFPPsDR-slLsll.LLSsLADVYoVLcP-FslaGVsshsGslsRAl...stsAY...................... 0 0 0 0 +3593 PF02072 Orexin Prepro-orexin Mian N, Bateman A anon IPR001704 Family \N 22.60 22.60 22.80 25.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.90 0.71 -4.34 3 54 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 40 4 25 44 0 111.00 55 75.78 CHANGED MNsPsAKVsWAAVTLLLLLLL.PPAlLSLGuuAQPLPDCCRQKTCSCRLYELLHGAGNHAAGILTLGKRRPGPPGLQGRLQRLLQASGNHAAGILThG+Rt.ERPuTRhpsuhpChAustsoVoPsG+uuh .....................htslhLLLLLLL..shLho.ussAQsLP-CC.RQKTCSCRLY-LL.....H..G..........A.....G...............NH..........AAGILTLGKR+sGs..s.sLQuRLQRLLpu.SGNaAAGILTMG+Rs.t-su................................................. 0 3 5 9 +3594 PF03827 Orexin_rec2 Orexin receptor type 2 Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 28.20 27.00 21.90 18.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -8.88 0.72 -3.92 3 46 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 37 0 28 36 0 57.30 70 13.61 CHANGED LGVHHRQEDRLTRGRTSTESRKSLTTQISNFDNVSKLSEQVVLTSISTLPAANGAGPLQNW .........LGVH++Q-DRLsRGRTSTESRKSLTTQISNFDNlSKLSEpVVLTSISTLPAANGuGsLpsW....... 0 1 4 12 +3595 PF02999 Borrelia_orfD Mlp; Orf-D; Borrelia orf-D family Bateman A anon Pfam-B_1511 (release 6.4) Family Borrelia burgdorferi supercoiled plasmids encode multicopy tandem open reading frames called Orf-A, Orf-B, Orf-C and Orf-D. This family corresponds to Orf-D. The putative product of this gene has no known function. 21.70 21.70 21.90 24.00 21.50 21.60 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.29 0.72 -3.86 4 180 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 29 0 13 111 3 94.80 63 78.45 CHANGED SLP-cPshPhhpTLpsLup.EApLu-YVMYL.sFLs+TKsKVN....DspYPca.Y.D.SThKDEpoIps...lKaNIslahcYIcKTKPIsctVYpKYS+Lph ......sLPEEPcPPIIpTLKSLAKYEsQLS-YVMYLlTFLuKTKVKVN....DPNYPEYsYPDLSTLKDEHSITu...lK+NIslYLEYIcKTKPIAcKVYpKYSpLKh... 0 8 8 8 +3596 PF04160 Borrelia_orfX Orf-X; Orf-X protein Bateman A anon Pfam-B_3014 (release 7.3) Family This short protein has no known function and is found in Jaagsiekte sheep retrovirus. Jaagsiekte sheep retrovirus (JSRV) is the etiological agent of a contagious lung tumour of sheep known as sheep pulmonary adenomatosis. JSRV exhibits a simple genetic organisation, characteristic of the type D and type B retroviruses, with the canonical retroviral sequences gag, pro, pol and env encoding the structural proteins of the virion. An additional open reading frame (orf-x), of approximately 500 bp overlapping pol [1]. 25.00 25.00 96.40 96.30 19.10 19.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.04 0.71 -4.41 16 37 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 3 0 0 36 0 151.00 93 93.07 CHANGED MQPtNPMIYITKIVILYACNLKF.VKLHGKLLNLsLLVLNSLFSLNMVSTLEVYALITSGKQMLLTFLNLDVLNMFMFLLTLFPIFSWP.FTLENQHVTVFNIhCFsFLLQEShKPL+QIMDLsILAVLFNVFVFL.KFIIKQtFLIIHRDKVL MQPENPMIYITKIVILYACNLKFPVKLHGKLLNLALLVLNSLFSLNMVSTLEVYALITSGKQMLLTFLNLGVLNMFM.LLTLFPIFSWPPFTLENQHVTVFNICCFAFLLQESHKPLKQIMDLVILAVLFNVFVFLSKFIIKQEFLIIHRDKVL 0 0 0 0 +3597 PF04061 ORMDL ORMDL family Wood V, Finn RD, Bateman A anon Pfam-B_4871 (release 7.3); Family Evidence form [1] suggests that ORMDLs are involved in protein folding in the ER. Orm proteins have been identified as negative regulators of sphingolipid synthesis that form a conserved complex with serine palmitoyltransferase, the first and rate-limiting enzyme in sphingolipid production. This novel and conserved protein complex, has been termed the SPOTS complex (serine palmitoyltransferase, Orm1/2, Tsc3, and Sac1). 21.30 21.30 45.20 44.90 20.90 20.80 hmmbuild --amino -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.00 0.71 -4.61 37 393 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 252 0 253 347 2 131.30 48 74.56 CHANGED NhNssWlst+.........GsWlhalllIhll+lhh.hlP..shosshuWTLTNlsaslsoalhFHhlKGoPF-h...stGsYcpLThWEQID...................pGsQaTss+KFLhsVPIlLFLlooaY...o+Yc.hhFhlNh.sslhlsllPKLPhhH+lR ......................N.NspWhsu+..GhWhhallllhhL+llh.slP..hhSsshuWTLTNlha.hu...............pYlhhHhVKGTPF-t...spGthctLThWEQlD...................pGsQa.T.......soRKFLhllPIlLaLluoaY...T+.YD..h..s..hFhlNh.lullsVllPKLP.hHtlR................ 0 63 115 181 +3598 PF02784 Orn_Arg_deC_N Pyridoxal-dependent decarboxylase, pyridoxal binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain These pyridoxal-dependent decarboxylases acting on ornithine, lysine, arginine and related substrates This domain has a TIM barrel fold. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.64 0.70 -5.20 17 10956 2012-10-03 05:58:16 2003-04-07 12:59:11 11 31 6377 85 2329 7307 4527 211.00 26 55.03 CHANGED Dlu...lp+ttthhpthhsh......hpshYAVKsssshsllplLschG..s....GhDsuSpsElphVLu....hGsssc+IlassssKstuplchAhpp....tlshhshDshpELcpltchtsct....plhlRlpss.-spspshlus.....KFGhshpp...stthlctApphs.lplhGlphHlGSths-hpsahpAspcshplhctht.phGh...hchLDlGGGasss.p...........s.phpchupslpsulcchhs.......ptspllsEPGRhhsus .............................................................................................................................h.....t.t....................ha..sh.Kss.p....t.l...lp.hh......h.....p.........G..................................sh-.ssSh.............sEl.h.s.ht..............hs..h...........s.......p....p......l.hhss.s....h....K......s.tt.l.p...h....A.h.t...................th.h.h.h.sh-.s..t-...l....p....h.lt...p.hs..th.................................tl.hlR.lp...........s......................t..t..t...s....t................h......h...s...s.......s.......t.....s.....KF.Gh.sh......p.............p..........h........h.t.h.l.c..t..s.p.p.......h.............s..l..p..l.h.Glc..hH..l...G..S....p.h..s.....c..h..c..s....ah.pu.....h.....p...c....s....h.......plh.sph..t......ph.............G...h.....p.................lphlslGGGhu.l...s.Yt.........................................t..t.........t...h....h...t........h.......t...h...h...pt...h.h.t...................plh..hEsGRhlsu............................................................................................................ 0 749 1458 1952 +3599 PF00278 Orn_DAP_Arg_deC Pyridoxal-dependent decarboxylase, C-terminal sheet domain Finn RD, Griffiths-Jones SR anon Prosite Domain These pyridoxal-dependent decarboxylases act on ornithine, lysine, arginine and related substrates. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.45 0.71 -4.37 105 9303 2009-01-15 18:05:59 2003-04-07 12:59:11 17 26 4591 89 2414 7018 3338 153.60 20 33.75 CHANGED sLlspVhsh+p.st..........................................................................................tt.............hhh.lssGhhsp.hsshhhst....hhslhhht...............................................hpspstttsslsGsoC-usD.hl...sp.........chhLP.........plp.GDhlshhssGAYshshus.saNuhs..tsshlhl ..................................................................................................................................................................................................................hsplh.hcp..................................................................................................................p...................hhh.lDsuh.ts..t....hcsh.h....h.s.t........ha....l.h..s..lpt..................................................................hs.p..t..s....t..p...p..ssl.sG...hC-.osDhl..sp.............................................shtLP........................php.G.D.hL..s.hhssG..............AYsh.sh.us....saNshs..pss.h......................................... 1 788 1532 2038 +3600 PF02088 Ornatin Ornatin Mian N, Bateman A anon IPR002463 Family \N 25.00 25.00 40.40 40.20 18.40 17.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.35 0.72 -4.39 2 7 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 1 0 0 6 0 37.90 75 84.39 CHANGED l..Ct-h+E.GQPscKCRCsGKPCTVG+CshARGDssDKCh L.YCG-FRELGQPDKKCRCsGKPCTVG+CphARGDssDKCh 0 0 0 0 +3601 PF02250 Orthopox_35kD 35kD major secreted virus protein Bateman A, Mian N anon Pfam-B_3549 (release 5.2) Domain This family of orthopoxvirus secreted proteins (also known as T1 and A41) interact with members of both the CC and CXC superfamilies of chemokines. It has been suggested that these secreted proteins modulate leukocyte influx into virus-infected tissues [1]. 21.90 21.90 23.60 22.30 21.50 21.70 hmmbuild --amino -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.46 0.70 -5.05 5 131 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 30 7 0 113 0 211.00 44 89.18 CHANGED hCopcEDs+YMGIDllhKV.TKpspTsssDshCQulp-lpESsc-tDEuoEctuTSTspGDshu...................oTYaTlVGGGLSlsFGFTGCPplsSlSEascGsaVYVRLSScAPW.+sTsslShNRsEAL.ulLEKCELSIsIKCSNpchsE.........TThsouoLsPcIopcsT-p...............uDIIGSTLVDT+CVcSLDloVcLGDMC...K+oS-LSlKDuhKYsDGELl....-DsuDsaslsSssLpAC ....................................................Cptctpphahthpl.h+l.s+ps.h.sssphC..hhphpps.....................-............t..s.ps-s.......................sshhSl.sGGLphshuahpC....+olup.sstsTV.A+huSlsPh..pscsss.uhT..+.....--s....l....thlc-C.VsIpl+Cs.-cpcs...............hhppssht.p.scpKs.s......................pcllGSh..I..VDscCVpslchpV+ItDhC...KppS.hpl+DhFphssGp........................................... 0 0 0 0 +3602 PF00213 OSCP ATP synthase delta (OSCP) subunit Finn RD anon Prosite Family The ATP D subunit from E. coli is the same as the OSCP subunit which is this family. The ATP D subunit from metazoa are found in family Pfam:PF00401. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.82 0.71 -4.40 192 4976 2012-10-02 21:03:42 2003-04-07 12:59:11 13 14 4570 6 1207 3297 2540 165.70 25 84.70 CHANGED lAppYAcALaclA.p.-.......ps..t.....l-ph...tppLptl..ssltpsscltphlssPtlsspp....Ktpllpp.lhps........................tl..sp.....hstNFlplLs-ppRlshLspIhptapp.lhsctcshhpspVpoAhsLsspptpplpptL...ppthsp.plplphplDssllGGlllcl...GspllDuSlpscLpplpppL .............................................................................................................................................lup.YApAlaphA..h.c.........ps...p.......l-ph...tp...pLthl....thl..t..p...s..t.....p.l.tphLs.s....s....s..lsspp....+tc.hltt...lhsp..........................................th...st.....h.pN.h.l.p..lls.......-s.pRl.s...h.ls.p.l.........hppa.p.l...hs.p.p.p...sp......h....ps....pVt.S......A......hs..L.o.....c.....p.....p.hp.cl.tpt..l.............pc..p....h....u.....p.....p...l.p.l.p..splD..sl.lGGlllcs..........G...c.....p.....llDsSl+s+Lpplppt................................... 0 405 776 1019 +3603 PF02566 OsmC OsmC-like protein Mian N, Bateman A, Finn RD anon Pfam-B_2694 (release 7.0) Family Osmotically inducible protein C (OsmC) (Swiss:P23929) is a stress -induced protein found in E. Coli. This family also contains a organic hydroperoxide detoxification protein (Swiss:O68390) that has a novel pattern of oxidative stress regulation [1]. 22.80 22.80 23.10 23.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.55 0.72 -3.82 173 7485 2009-01-15 18:05:59 2003-04-07 12:59:11 14 22 3139 56 2170 5359 1255 100.10 21 63.21 CHANGED ussp.....ussPh-llhuuluuChshshthhsppp.th..pl..psl....plplp.......h...chs.t.................hpplp.....lplplthss......s.......pphpchlpts.pchCslt.psl.ptssplphp .........................................................s.tsp...ussPt-LlhuuhuuChshsl...thhhpct..th...sh.....ssh............plpsp............s............pts.ps..s.t.................hpplp......lphpl..p..hs..s.........hs.t........pph.pcll..........p....hu...c....phCslu.psl..psslslph.p.............................. 0 654 1315 1789 +3604 PF03207 OspD Borrelia outer surface protein D (OspD) Mifsud W anon Pfam-B_2915 (release 6.5) Family \N 25.00 25.00 66.10 66.00 23.70 23.70 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.91 0.70 -5.23 2 24 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 13 0 1 22 0 218.40 86 98.61 CHANGED MKKLIKILL.SLFLLLSISCs...........L.......DNEGsNS.sYESKKQSILuELNQLLtQTTNSLKEAKNTTDNLNASNEtNKVVEAVIssVNLISSAADQVKuAppNMHDLAQMAEIDLEKIKpSSDKsIhAuNlAKEAYsLTKAsEQNMQKLYKEQpc..co.S-SD.........hpsSsEIKQAKEAVEIAWKATVcAKDcLIDVENsVKEsLDKIKTETsNNTKLsDIcEsAELVLQIAKNstEIsQEVVAh MKKLIKILL.SLFLLLSISCs..................LDNEGsNS.sYESKKQSILuELNQLLtQTTNSLKEAKNTTDNLNASNEANKVVEAVIsAVNLISSAADQVKuATKNMHDLAQMAEIDLEKIKpSSDKAIhAuNVAKEAYsLTKAAEQNMQKLYKEQpc..co.S-SD..........psSsEIKQAKEAVEIAWKATVcAKDcLIDVENsVKEsLDKIKTETsNNTKLADIcEsAELVLQIAKNsKEIsQEVVAL... 0 1 1 1 +3605 PF02471 OspE OspEF; Borrelia outer surface protein E Mian N, Bateman A anon Pfam-B_962 (release 5.4) Family This is a family of outer surface proteins (Osp) from the Borrelia spirochete [1]. The family includes OspE, and OspEF-related proteins (Erp) [2]. These proteins are coded for on different circular plasmids in the Borrelia genome. 25.00 25.00 30.80 30.40 21.50 20.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.42 0.72 -4.17 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 28 0 4 77 0 108.10 74 60.70 CHANGED FSEFTVKIKN.KDNsuNWoDLGTLVVRKEEDGI-TGLNsG.......hGHoATFFSlEESEVNNFVKAMTcGGSFKTSLYYGYK-EQSss.NGIpNKEIhTKIEsINsSEaITFhGD FS-FTVKIKN...KD.NuuNWsDLGsLVVRKEEDG...I-TGLNsG........GHSATFFSLEESEVNNFlKAMTcGGSFKTSLYYGY+-...EQSst.NGIpNKEIITKIEpINsoEaITFLGD... 0 4 4 4 +3606 PF03968 OstA OstA-like protein Bateman A anon COG1934 Family This family of proteins are mostly uncharacterised. However the family does include E. coli OstA Swiss:P31554 that has been characterised as an organic solvent tolerance protein [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild --amino -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.59 0.71 -4.13 50 3517 2012-10-01 21:43:16 2003-04-07 12:59:11 9 18 2150 10 780 2490 1095 121.40 23 30.26 CHANGED pIcuDpt.phcp..pss...hshasGNVhlpQGshplpA-clhlhpstp............................tthpplpupG.....shhptphphsspthpupAcphpYp..stpchhhLps.sAhlp......psssslpGspIphshppph ..............................................................................................IpuD.ph.p.h..c......tss......ssaoGN...V.h..l..p..Q......G..s..h.p.l.p...ADclh..lppsps.........................................................................................................................pshp.plssts........shht..t.p.h...p....h....s.....s....c........h..c.G..pAsphp.Yc.....htp.chhhL...p...s....s....u.hlp..........................psspsl..p..G.sc.Ihaphcpt......................................................................................................... 0 220 460 635 +3607 PF04453 OstA_C Organic solvent tolerance protein Waterfield DI, Finn RD anon COG1452 Family Family involved in organic solvent tolerance in bacteria. The region contains several highly conserved, potentially catalytic, residues [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.29 0.70 -5.38 45 2046 2012-10-03 17:14:37 2003-04-07 12:59:11 9 9 1874 0 426 1515 1009 363.00 25 49.70 CHANGED ppRhh.hthpppttlss.sap........htl-hshlSDp.sYhpDhsss.............hshtspsp.L.p..............puphsY......ttt.h.hshp..sppapsl.............tttpsspP....aptlPplshshhtsphh.....shphshpuphspFpcpstth.................................pusRhphpPplshshppshGh.lssphplp..tsh..................Yphststs..t............shspshsRslPphsl-sshshtRshph..................tappoLEPchtYhh.....hP..p..........sQssl..P..........saDos.hphshtpLFpps+asGh.DRIssuNplohulosph...hcsssh........................phphslGQhaahpsppshh.................sp....ss.ppstSshhsphshphspthphsushpas.pspphppsssshpYps...p...sthlslsYpahpsphttt...........................hpplshsstaslsp............pW .......................................................................................................................pRhh.hhhpcst.shsp..sap.........hslDh.spV..S..D......sYhpDhssp........................hsssossh..hpp...........................phplsY..............ts..psh.s.hslp.........sppaQsh......................ptps..sps..........YpthPpl..shsh..htsshh...............shchph.uph.spFh..ps..p..t..s.s...........................................pusRhphpPslshPhs...sshG...lss.ph..p..lh........sst...........................Y.ptsp..s......t.............................phpcs.ssRshP.p.h.pl-s..ths....a.....-Rsh..ph....................sa...p...QTLEP+hpYlY.............sP..h+................sQs.sl...................saDos.....hp..............s.hssL.F....p...sp.pasGh..DRlssuNplohGloo.Rh....h-sss..........................................E+hshslGQ..haYhscppst.......................................................p.......sspps.s.pss.hs.s.phthph..........spphshpushp.....Y....cs.c...s....p..hspus.sshpYp......s.......pphlplsYpYtssp....hh..t.tt.......................................lpQlshsuta.lsspW.......................................................................................................................................... 0 113 246 338 +3608 PF00865 Osteopontin Osteopontin Bateman A anon Pfam-B_1593 (release 2.1) Family \N 25.00 25.00 28.50 28.50 24.40 24.30 hmmbuild --amino -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.47 0.70 -5.17 7 105 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 40 2 25 108 0 216.10 52 93.19 CHANGED K.AsSGSSEEK.......h.pKasDAVAThLpPDPSQKQshLAP..QNshSSEEsDDh.KQpTLPSpSNESp-phDD.DDDDDD.DHssSpD...Ss-S--sDpsDD.cpsDESHHSDESDE.VssaPT-.stspVhTPslPTs-ssD.GRGDSVAYGL.RSKS+pF+hSchQhP..DAT-EDlTSHhcScEhsss.KsI.VA.cLphPSD.DSptKsSpEoSQlD-pSVETcS+EQS+pac.+AsDpSs..................E+SssIDSQEs..SKsSpE.p...F+SHEDKLs.D.KS.E-D+HLKhRlSHEL-SuSSElN ............spSGSSEEK.......h.shass.luoWLpsDPSQKQshLAs...QNslSSEE.psD..cQpTLPSpSN.ESH.-chDD.DD-DDs......SpD...Ss.-.............sD-S+cSDESDEhVs.cFPT-.ststsFTPhlPThss.s.GRGDSlsYGL.RSKS+.phchs..s.Qhs..DuT-EDhTSchcS.tE.pts.csh.hsp.l.hsSs.cspt...ps...........S.....EsSQhD-.ShETpSpcps+.hp..c.spccSs....cpSs.I-SQEp..S+sSpE............hpStEc..s.D.+S.E--+aLKh+hSHEh-SuSSEhN....... 0 2 2 5 +3609 PF00185 OTCace Aspartate/ornithine carbamoyltransferase, Asp/Orn binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 29.40 29.40 29.70 29.70 28.10 29.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.01 0.71 -4.36 123 10559 2009-09-13 17:21:38 2003-04-07 12:59:11 19 39 4814 296 2543 7158 4618 158.00 28 43.70 CHANGED cGl.clshlG......D.t.splspShl...hshst.hGh.c.lplsuPps..h.s.......t...h...........t..t.hplp...p..shp.culps......sDllasspht........................pEc.............tphcthps.aplspchlp..t.sps.c...sllhHsLPh.......R.....s.........................EloscVh-.....s.sp.ShlacQAcNtlasphAllhhl ....................................slpluhlG........D..thspsupShh...........hshsp..h....G...h.....s...lpls.uPcs...h.s..............................t..thl................t...ht..pt..G.......spl.pls.......p.shc..-u.l.c.s......s.Dl...lhss.pht........................cEpt...........................pc.hpt.hps....at...lspc..h.hp....t..sp....s....s................s.lhhHsLPs............R....s.........................h.E.l.os-Vh-..........u.sp.ShlFc.QAcNtlasptAllhh.................................... 0 832 1612 2137 +3610 PF02729 OTCace_N Aspartate/ornithine carbamoyltransferase, carbamoyl-P binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.90 20.90 21.20 21.40 20.60 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.81 0.71 -4.53 105 10527 2009-01-15 18:05:59 2003-04-07 12:59:11 16 43 4811 296 2554 7124 4842 142.50 36 39.25 CHANGED +c.llsl.pD...hspc-lppllchAtph+c........thtpt................thLpG.+sluhlFhcsSTRTRhSF.EsAhppLG..upslhls...ssspl..........u+.........GEolpDTu+sluph.sD..slllR..........phppsslpph..Achs.....slPV...INuhs...stpHPsQuLsDlhTl...pcc .............................................................+chLslpD.hotpEl.ptllchAtphKp......................ttppt............................hLcG..Ksluh.lF.c.sS..TRTRsSF.E.sAhtcLG..ucshhls....sssopl................................u+...GEoltDTu+...lLuph..sD....sIhhR................p.p.p.t.t.s...cpl.....Ac..au..............slP.V.....lNuhs....s......ptHPTQsLhDlhTlpE......................... 0 833 1616 2149 +3611 PF02338 OTU OTU-like cysteine protease Mian N, Bateman A, Finn RD anon medline:20130692 Family This family is comprised of a group of predicted cysteine proteases, homologous to the Ovarian Tumour (OTU) gene in Drosophila. Members include proteins from eukaryotes, viruses and pathogenic bacterium. The conserved cysteine and histidine, and possibly the aspartate, represent the catalytic residues in this putative group of proteases. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -11.08 0.71 -3.52 51 2248 2012-10-10 12:56:15 2003-04-07 12:59:11 14 84 441 33 1395 2228 90 124.90 19 19.92 CHANGED .uDGs.Cha+uluptlh..................plRpts.....sphht......................ppppthpphltsp.................hh..h.t.t....sWus.......................................plpl.tuhuphh..phpIhl..............h.tsht.....hh.p.h.....tthpptlplha.t.....stHa ..........................................................................................uDG.sCLa+.Alup...tlh.............................tptth.hp...plRpts............sphlp..............................................................pptppa.p...hlpss................................................ahpphtpss.....pW.Gs................................................................................................pl-..l...huluchh.....phsIhl.....................................hp....ttst.......h....t.......................t..........hhh.............t..Ha........................................................................ 0 507 785 1100 +3612 PF00724 Oxidored_FMN oxidored_FMN; NADH:flavin oxidoreductase / NADH oxidase family Bateman A anon Pfam-B_642 (release 2.1) Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.92 0.70 -5.45 12 10595 2012-10-03 05:58:16 2003-04-07 12:59:11 15 107 3470 129 3408 9713 2738 312.00 28 71.36 CHANGED pLFpPlclGs.hpLppRllhsPh.ophtutccG.lsp..hhhcYastRuphssshlIsEushlsspus.shssssslas-cpltpa+cls-AlHcpGuhlhlQlaahGtps..schhtpts.........sstss..hsss...............+tlot--Icphlp.paspAA++Ahp.AGhDuVElHuApGYLls.pFLsPtpNcRoDcYG.GShENRsRFsLEll-tlpcslGp-t.....luhRlSs.shhtst.tshtEs...h.hhhhhschthchhsh.phAhl.chspPt.htshpspht...p.tss...phl+phhphPllssGp..hsssp.tshl.htc..scsslluhGR.hlusP-Lsp+lccGh..p .........................................................................................................LFpPhp..l....s........s....h.....pL.pN.R..llhuPh....s.........p.............h.......t..........u.........t...........t......s......s..................h...........s.........s......h.....hhta....Y..t..p.R....A..p..........s.s.GLl.......l..s.....t.u.s.t.l.s......s.......p.........u.......t.......s............h.....s.......t.........s....s.....s....l........h........s......s........p.......p........l..........t.....u....h.+...p...l....s.....c.....u............l..........H.............t...............p.........G.............u.........p..............h.........h....lQ.l......h.H...s.G..Rh..ut...........t...h....stt............................uss...............s..ht.t..................................s...........+th.o.p.c.-I......t...p...l..l.p.sFspAAppAh................c.A.G..............FD..............GVE.lHu..Ac.GY..L.lp...QFlos.hoN...p.Rs.D.p......Y.G....G...S.h.-N.....R..h....RhslEllc....A....l....p....p...s....h....u....s....c..h..........lshRl.....S...s..............h....c....h..............h..............p.......s.........s.......h......s.......h..p....-s...................ht.h.h..p...h.......h....t...t....h....t......h......p......h...h.....................................t...h.......u.........h..............h.....p..........h.......s...p.....................................t.....s..............s..h....t...........................h.........................t.t.l.....+...p......t......h.....p.....h..s...........l..l....s......s...Gt.....................h............s.....s...........p...........................A.................p................p.h........lt.p.....................G........t......s........D....h.lu..huRshls.sPchst+htps.......................................................................................................... 1 954 1981 2860 +3613 PF00174 Oxidored_molyb oxidored_molyb; Oxidoreductase molybdopterin binding domain Sonnhammer ELL, Bateman A anon Prosite Domain This domain is found in a variety of oxidoreductases. This domain binds to a molybdopterin cofactor. Xanthine dehydrogenases, that also bind molybdopterin, have essentially no similarity. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.09 0.71 -4.84 150 4576 2009-01-15 18:05:59 2003-04-07 12:59:11 14 45 2283 46 1578 3981 1512 159.10 27 44.65 CHANGED hspts.hP............tl.s....tsapLpl.....s..G.......h.Vcpshshohc-Lt.p..hPp..tphss.slpCsss....................Wuh...h..sspWsGV.LpclLctsss...............p......su.........................paVhhpu.hD....................................................s.....YspulPl.scshc......................csllAaphNGc.....sLs.pHGhPlRLllP.shhGh+ss.KWlpcIplhsp..t......tu...aWpp ......................................................................................................psaplpl.........p..G......................V.tp.....s.....h..s.....hshp-Lh...t...h.s......p..p..h..h..h....p.h.p...........Csts...................................Wuh.....ss.WpGssLpclLp.t..s..ts..........................................................p...ssA.....................+aVtFpu.h-...........................................................................s..Y..s.p.u.l.pl..s.cA.hc.........................ts..llAht.h..sG..c.......sL.ss.p...p.G..t..P..lR.lll.........P........t......h.....h..G........h+.s.....s.K..hlhpIplspp...s.......sha............................................ 0 442 944 1319 +3614 PF00148 Oxidored_nitro oxidored_nitro; Nitrogenase component 1 type Oxidoreductase Sonnhammer ELL anon Prosite Domain \N 28.60 28.60 28.70 28.60 28.50 28.50 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.07 0.70 -5.83 116 3989 2012-10-03 15:23:08 2003-04-07 12:59:11 14 13 1338 128 1280 3778 993 356.20 21 84.43 CHANGED CshhGAh.hshhslcsshsllHGstGC...ssastshhspphc.............c.h............hhoTshsEpsslaG.GpcpLtculcplhppapP..chIslhoTChsphIG-Dlpulscph.....ppph......................llsssssua.sssp.pGactuhpullcpl.....................................................................................p.tpcst...............plNll.sshtls.....uDhpplccll.cthGl.........................chsshhsussslc-lpphspAphslshspp.htt....suchlcccaGlPhhp...ssshGlpsTsphlppluchhG................tt..h.sptltc....cc.sphhcthhc.h+....thlt...G.++.........sslhus..s.shshulsphlt.E.hGhcsshssstssstt.h..t........................................................ph.t.htt............lh.s..ch.h-lcph.lpphp..sDll.....lGssp.sphhucch...........h.hGhPhhsthsht.ptshhGYpGsh....plhcclssslh .........................................................sthhGuh.hshtslcs.shhlhHGs.GC...........st.a.h.h.sh.ht.thht...................p.......................hh.sosh..pEpsl..l..h.G...up...ccL...h....csltphh...p.......h.....h.......t..P...............ph.lhlhsoCsstlIG-Dlpuhs.cph.....ptchsh...................sllssps...s.ua..sssps.tG.hchshp.ulh..chhhs.........................................................................................................................t.t..tp.t....................slsl...l.Gshshs.........u-h..hp...l+...tl..L...cph...Gl..........................................cl.shh.ss.s.s.s.hp-lt.ph......sp.Aphsl.......hhs...............t....ht...............huphhpc..c.h.uh.Phht...............................hG.h..pthtthlptl.sp.hhs.................................................................h....t.....ltt........tp....tt...h.......h...t...t....h.p....hp..........hht.........G..+p................hh.l....hss..s.....h.h....t...........hsphh...-....hGhps.hh.h.sh....h.t..ptt...........t........................................................th.t...htt......................lh.t...s..........p....h..phh....t.p........ht.............s-...lh.............h..us.h..tt..hh.t.ph.....................uh..P.hh...........h.hhGatGh..hhp.h.....h.................................................................................................................................... 0 432 869 1085 +3615 PF00361 Oxidored_q1 oxidored_q1; NADH-Ubiquinone/plastoquinone (complex I), various chains Finn RD anon Pfam-B_4 (release 1.0) Family This family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.78 0.70 -5.13 33 91875 2012-10-02 00:39:38 2003-04-07 12:59:11 15 57 35037 6 4521 86901 9765 225.60 28 56.30 CHANGED ussllhhaluaE..hhslssal.Llshattp.cuhpAuhphhlhstluShhLLhuhh..hlahhs.uohsa.tlhp...............s....hhhhhhlhluhhhKhuhhPhHhW..........LPcuhtusssssullhuuhhlhsulallhRhh.lh.......hh.hlhhluslohlluuhsulsQsDlK+llAYSoluphGa.hhshluh...s.h..tsuhhhhlsH.uhhpusLFhhs.shhhpp.t.........spslhhhs.ulhthhPhhthhhhlshhuhs..GlPsh....sGFhuKhhlltsh ....................................................................................upphh..h.ahh.hE.....sh......hh.......l....................h.................p............p........................p.........s.........h..............p..........A.....s..........h....p.hh.l.h..p.t.h.u.s...h.h....l..L..h.u...h.....h.........h......h................s......G......p.......h.....p..h..t..p..h.t.....................................................................................h........h...h.....h...h....h...hu.....h..hh...K.u.h..Ph..H.h..W..............hP.c.sh.p..u...........s......s.....h....s....u..h..........l..h...u.s.h.....h...h.s.s.h........h.l...l...h....ph...............h...............................................................h...l....h....h...l.u.lh....o.hh.h.u.u.h..hu..L......s......Q.....s......p.....l..+.....+ll.....A....aS....olu....p.h...G..h.........hh....hh.lhh......................ss.................t.....h..u....h.....h....p.h....h.s.....a....uh.h.p...u.h.l.Fhhh....s......h.p.p.hp................................spsh.h.hh....t......s.........h..h...p...h...h...P....h..h..s...s....h..h........h..l..shL.uls........G..l.P..P.h.............s.s.FhsK.hIlp..t........................................................................ 1 1460 2936 3788 +3616 PF00662 Oxidored_q1_N oxidored_q1_N; NADH-Ubiquinone oxidoreductase (complex I), chain 5 N-terminus Bateman A anon Pfam-B_22 (release 2.1) Family This sub-family represents an amino terminal extension of Pfam:PF00361. Only NADH-Ubiquinone chain 5 and eubacterial chain L are in this family. This sub-family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.03 0.72 -4.41 32 19906 2009-01-15 18:05:59 2003-04-07 12:59:11 15 26 14684 2 1246 18780 2012 60.60 42 9.78 CHANGED hlphhpWhssps.hplshuFhhD.hohhhlslshhVohhlhhaSh.YMp.pDPphs.......RFFpYLth ..................................hhWsWh.h.s.....ss....F...slp..hu..a...hl..D..s..LoslhhhllohVu......hhVll.YS..ss..YMu...c..D....p...s.h.....................RFFsYhsh............................... 0 404 818 1051 +3617 PF00420 Oxidored_q2 oxidored_q2; NADH-ubiquinone/plastoquinone oxidoreductase chain 4L Finn RD anon Pfam-B_193 (release 1.0) Family \N 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.15 0.72 -4.39 194 10049 2009-01-15 18:05:59 2003-04-07 12:59:11 19 10 7521 2 1437 7095 1962 96.00 27 90.05 CHANGED hhhhshhhhFhlulhGlhh.RppllpsLlsLEhhhLulhlhhshhuh.h...........................................pshhsphhslhlLshuAsEuulGLALLVthhRsaGo-plpslslLps ................................................h.hhhhhshhFhlGl..hG..l..l.....h...Rp.....pllphLl..sLEhMhhulhlhhs.hhuh.h.........................................................................................................................................................tss..h..s...phhsl.hhl.shuA.sEuulGLAlllth.h..RspGosplpslshhp.............................. 0 454 913 1189 +3618 PF00499 Oxidored_q3 oxidored_q3; NADH-ubiquinone/plastoquinone oxidoreductase chain 6 Finn RD anon Pfam-B_61 (release 1.0) Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.24 0.71 -4.41 100 8649 2009-09-11 06:35:13 2003-04-07 12:59:11 15 6 7126 2 830 7391 1836 152.30 22 84.54 CHANGED lhsslhhlh..s.psPlhhslhllhhslhhuhh..hhhhs...ssahuhlhhllYlGGlhllFlahssls......ssphhth.......................................................................thhhhhhhh.......hhhhhhhhhhhht.h.hths.ht..th..h...............................................hhssthh.hhh....hhslhLllslls..sltls ........................h..sul.hss.s.......s..ssPlaus.....LhLll.shhsssul..hhh.hG...usFlul.llhl.lYlGuhhVlFlasshhs............spp.hs.ct.h.t.p.h.........................................................................................................................hhhh.h..h.s.hhh.............hhh..h..h..h.....h....h......h....h......h.......h....t.......t.....h.........h....h.....s.....h.....s....s.....h...t...t..h..s..h..t..ht...............................................................................................................................hhth.la.o........h.sh..hhh......lsuh..hLLlsllssl.l..................................................... 0 275 538 685 +3619 PF00507 Oxidored_q4 oxidored_q4; NADH-ubiquinone/plastoquinone oxidoreductase, chain 3 Finn RD anon Pfam-B_68 (release 1.0) Family \N 21.60 21.60 22.10 21.60 21.30 21.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.52 0.72 -4.20 99 11473 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 9331 2 859 9513 1923 105.50 45 89.88 CHANGED llshllhhl..............uhh.......ls......cph..t...-K.osaE.CGFcPh.spuR...hsFol+FaLlullFllFDlElshlhPhshhh.....tt.hhhh..hhhhh...hFlhlLhlG.lhYEWppGsLcW ...................................hh.luhlLhhl........saa..........Ls...p.....h.ss...ss.......EKhSPYE.CGF.D.Ph...Go..AR.............LP.FSlRFFLVA.ILF..LlFD..L.EIALL..LPhs.huh..........h.p.s....s...h.hs..........hhhs.h..hh.l.l.l.L.s.lGLlYEWt.pGuLEW............................... 2 284 563 713 +3620 PF01059 Oxidored_q5_N oxidored_q5_N; NADH-ubiquinone oxidoreductase chain 4, amino terminus Finn RD, Bateman A anon Pfam-B_381 (release 3.0) Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.32 0.72 -10.67 0.72 -4.18 143 6646 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 4701 0 249 6505 763 104.90 37 24.78 CHANGED MLKlllssl.hLlPhshh.t...hha...hhhshllhhlShh.h..hp.shthhhs.hhshh.huhD.lSssLllLShWLhPLMlhASppthpp.shtpp+hFlhhlhhLplhLlhT .....................................................................MLKlllP.Tl..ML.hP..h.s.ah..s...p...hlW....s..shh.h.SllI.uhhSL....h......a............lp.................s..p................s............h...........t.....s..........hs.h.........h.husDsLS.sPLLlLTsWLLPLMIl.A....SQ.sH...lpp.EshsRp+..halohLlhLQ.hhLIhs.................... 0 77 152 195 +3621 PF01058 Oxidored_q6 oxidored_q6; NADH ubiquinone oxidoreductase, 20 Kd subunit Finn RD, Bateman A anon Pfam-B_1345 (release 3.0) Family \N 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.22 0.71 -4.54 169 7525 2009-01-15 18:05:59 2003-04-07 12:59:11 17 28 3465 80 2007 4724 2053 123.50 33 48.57 CHANGED CsGC...shul.tshtsshh-lh.t..hp.........htat..slhssst..........................ptDlllV-Gul..ts......................pstlchlhcht.tcschllAlGsCAshG.Gl.shts.......sh...........................................slhp.........lsV...lplPGCPPpPctlhtslhtl ..................................................sC.slph..hssts.s...hhDh............................c.aG.h.hh.h.u.ust...........................................puDlhlVsGslspch.......................................................................................tPs.l.c.+..l.a-p.h.s.-P.+h.V..IuhGu.CAssG.Ghathts........sh............................................................................h..........lpGl-.ch...............lPVD..lalPGC...PPpP-shltulh..h................................... 0 667 1322 1707 +3622 PF01237 Oxysterol_BP Oxysterol-binding protein Finn RD, Bateman A anon Prosite Family \N 25.40 25.40 25.40 25.50 25.30 25.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.26 0.70 -5.96 149 2468 2009-01-15 18:05:59 2003-04-07 12:59:11 13 56 312 10 1498 2357 5 330.10 27 52.43 CHANGED ulhuhl+p.....slGpDLS+.lohPlhhsEPhShLQ+hsE..hEY.scLLspA...sp.................psDshpRhhhVss.aslSshssp..hpR.t.......KPFNPlLGETF....E.hsc..t.....shchluEQ.........................VSHHPPloAhascs..p.s...............aphtupstscsKFhGp.Shplp........hGtsh...lplpp........................................................................................t.........sEpYshsp..ssspl+sIlhGphalE.hGchhIpsp.........p..oG.pshlcFps.p.....Ga...huup...pp.........lpGt..lhc..p..............sspsha..plpGpWs.cplhhppsp...........................................................................ttpt.....phlWcssshss..p..pha.h..s.hs.hsL..Nt.....c..........plssTDS..RhRPDp+AhEpGch-tAspEKp+lEccQRptR+cc..............pppsp..pap.P+aF..pcs......................hpshstp................a.hpss......YWch .......................................................................hhsll+p......plG.h.D.L..o..+...ls...hPs...h.EPhS..hLp+.h.s-.....h.p.a..s...c..L.l.s.p.A...s.p.............................................................p.ps.shcRhh.h........Vsta...hl..Su.atpp.......htpst...............................KPaNPlL...GETa..........................................-.h.p.....p............sh.ph.....l..uE.Q.........................V.SHH.....P.P.........loA..h.a..sps......ps...................hph..s.p...ht...h+.sK..F.........h.........G.p..S..lpl..h................hG..p..hp.......lph.p.....................................................................................................................................t......sEpY.hh.sh..ss..stl+..sI...lhG..p..ha..lEh.hG.ph...pIpsp.............................p.....ou..h..psplpF..pt..p.................................sa....h..u.s..ph.....pp................lpGh...lhs..t....................................stcs.h.h....plt.G.pWs.pp.h.h.httss..................................................................................................................................pt......phlacss....h.s........t..t..ph.h................L............c.........................................ssp-S....hh+...s.s.p........c..t......l.c.p...u.c.h.-t..AsppKp.clE-tQRtt...p+pc.........................................................p.p.p..t............tap.......s+......aF.....ppt.................................................................................t.............................................................................................................................. 0 500 792 1157 +3623 PF00543 P-II Nitrogen regulatory protein P-II Bateman A anon SCOP Domain P-II modulates the activity of glutamine synthetase. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.23 0.72 -3.54 93 4823 2012-10-01 21:59:08 2003-04-07 12:59:11 17 7 2688 149 1478 2996 1680 101.60 53 82.59 CHANGED IpAII+..P.KL--V+cALsphG.ltGhTVo-VpGhG+QKG..sElaRGs....................cahss.hlPKl+l-llV.sDctl-pll-sIhcsApTGc...lGDGKIFVtsl-..cslRI.RTG ....................................IpAIIKPFKL--V+EALscl.G.....lp........G.hT.V.o.E.VK.GFGRQK.G.H...TE.l...Y.RGA.................................EY.hV-..FLPKVK.lEl.........lV.s.D.-...l.-.p.ll-sIh...cs.Ap...T...GK........IGDGKIFVhslp..cl.lRI.RTG......................... 0 486 973 1256 +3624 PF04275 P-mevalo_kinase Phosphomevalonate kinase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Phosphomevalonate kinase (EC:2.7.4.2) catalyses the phosphorylation of 5-phosphomevalonate into 5-diphosphomevalonate, an essential step in isoprenoid biosynthesis via the mevalonate pathway [1]. This family represents the animal type of the enzyme. The other is the ERG8 type, found in plants and fungi, and some bacteria (see Pfam:PF00288). 23.90 23.90 24.90 24.20 22.70 23.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.53 0.71 -4.37 11 154 2009-09-10 15:39:43 2003-04-07 12:59:11 9 8 123 1 104 164 4 113.90 38 29.84 CHANGED hSGKRKSGKDalo-+LppRLsts+sp..IlRISpPlKpcaA+chsLDhpcLLusGsYKEpYR+DMIpWuEpcRpcD.GaFCRtAhpps.......spslhIVSDsRRpoDlcaF+EsYG...hshsl ....................hoGKRKuGKDals-h.ltp.pL...st..s.....hst...........ll+lSsPlKcpYApppGL...DhpcLLssusYKEpaRt-MIpWu.EcpRppD....s....Ga....FCR...t...shcts...........spslhllSDsRRhsDlpaFpptas.......sthh...................................... 0 32 52 78 +3625 PF04699 P16-Arc p16_Arc; ARP2/3 complex 16 kDa subunit (p16-Arc) Waterfield DI, Mifsud W, Finn RD anon Pfam-B_4180 (release 7.5) Domain The Arp2/3 protein complex has been implicated in the control of actin polymerisation. The human complex consists of seven subunits which include the actin related proteins Arp2 and Arp3, and five others referred to as p41-Arc, p34-Arc, p21-Arc, p20-Arc, and p16-Arc. The precise function of p16-Arc is currently unknown. Its structure consists of a single domain containing a bundle of seven alpha helices [1,2]. 21.90 21.90 22.90 22.70 19.60 21.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.89 0.71 -4.20 36 389 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 277 15 237 330 3 143.80 37 92.13 CHANGED psaR+lDIDsh-.-sh.c.stps............spsphtsptsplRshLpuGchttALphsLpssPhsucs.psK-tthpsVl-VLsuhKs....s-.IsshlcsL.....sppph.DsLMKYlYKGhusPs...............................stsp................hulLLsWHEKllpluGlGsIVRshoDR+TV ..........................................................................s.taRplDlDth-.pp..p.p....t..............st....sph......tsptspl.cshLp......p.....G.....ch.htALpssLc.s..s.Plssc....s....ssK-tshthVlcVLt..uhKs....s-...lp.psl.puL......spsth.DlLMKYlYKGhptss.......................................................psss...................................ulLLpWHEKhhthuGlGsIlRVlTsR+pV......................... 0 69 116 181 +3626 PF00864 P2X_receptor ATP P2X receptor Bateman A anon Pfam-B_1590 (release 2.1) Family \N 19.60 19.60 24.30 19.70 19.40 19.20 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.38 0.70 -5.75 19 762 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 78 7 284 674 19 263.70 36 83.68 CHANGED F-YcTsK.Vll+shplGll.RllQLhllsYllGWVFlacKGYQppDssl.SSVhTKlKGluhsN.s........hht+lWDVADYVlPsQGsssFFVhTNhllT.sQpQGpC...........PE...lP-......stCspDss.CptG.sshtusGltTG+Cls....hss..sh+TCEIhuWCP..sEs-pt.Pss...shLtpAENFTlaIKNsIpFPpFshoKpNllsshssoalKoCpYctp..psPaCPIF+LGsllccuGpsFpclAhpGGllGIpIpWsCDLDhshppCpP+..YSFpRLDsp......pslSs.GYNFRa................................A+YYpc.sGsEhRTLhKuYGIRFDllVsGpAGKFslIPThlslGSGluhhGluollCDllLLahh..................+ppcaY+pKKFEplpcsp.phstpp......pt ..........................................................................................................................................pYpT.+.hhhpshphGhh.hhhphhlh.Yh.h.................a.shl.pKtYQt.-.......SoVhoKlKG.hs.hp....................................plhDss.-Ysh.P................p.........t..ssh.lh..Tph.hT.tQ..thC..............sE...................t......h.C....pDtt...C......G.............................u......p.G..h........TGpCl..........t.....tTCEl.uWCP.h.E.tt...........p...........hh.tA.sFTlhlKN.......pl.aPh..ap..hp.......p.......tN.....l.h........th....s.....t....Chap.....p....C....PlFclG.lhp.s......G.ts...F.........pp.........hA....p..........G.......GhhGl.ItWsCsLDh....p.C.Pp..YpFp.tL-..............h.....GaNF.R.a................................A+aah..........s......s..phRsLhKsaGIRhDlhV.Gp..uGKFshl.hhh.hssshs.h.Ghs............shhhDhlh..h......................t...Y.ttKhp.h............h................................... 1 76 97 158 +3627 PF04045 P34-Arc p34-Arc; Arp2/3 complex, 34 kD subunit p34-Arc Wood V, Finn RD anon Pfam-B_9846 (release 7.3); Family Arp2/3 protein complex has been implicated in the control of actin polymerisation in cells. The human complex consists of seven subunits which include the actin related Arp2 and Arp3, and five others referred to as p41-Arc, p34-Arc, p21-Arc, p20-Arc, and p16-Arc [1]. This family represents the p34-Arc subunit. 19.70 19.70 19.70 20.20 19.10 19.20 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.77 0.70 -5.09 27 380 2009-09-11 15:40:12 2003-04-07 12:59:11 9 9 288 15 254 369 3 223.70 41 65.50 CHANGED hlSlplKsap-LhptG......s.chLppcYushh........sssEsGYshoLhlDLp..phs.....ptpppllpcluhLKRsshAuPFcpsFpt.tphsp...................spphhsI+YRs..DEolYlcsptDRVTVIFoTlFpDEsDplhGKVFLQEFV-AR+ps...QoAPQVLaSH.-PPLElpshsssp......ss-shGYlTFVLFPRHhsptcpppsoIspIphFRsYhHYHIKCSKAYhHoRMRtRVs-FLKVLNRAKPEsts ..........................................................................lSltl+happL.paG......ApchLpctYGshl..........sssEsGYsholhlDLp..plP...............psp...........ppllpphuhLKRNshAusFEchFph.tphpp...................stphhsIpYRc..-EshYlc....s........ptDR.VTVlFSTlF+D-sDhlhGKVFhQE..Fh-..uRRts.........psAPQVLaSp.....-PPLELps.ssst.........sssshGYlTFVLFPR.H.spsptptpoIshIphFRsYhHYHIKCSK....AYhHoRMRt+sssFLpVLNRA+P-s.p.......................................... 0 79 133 205 +3628 PF00067 p450 Cytochrome P450 Eddy SR anon Overington and HMM_iterative_training Domain Cytochrome P450s are haem-thiolate proteins [6] involved in the oxidative degradation of various compounds. They are particularly well known for their role in the degradation of environmental toxins and mutagens. They can be divided into 4 classes, according to the method by which electrons from NAD(P)H are delivered to the catalytic site. Sequence conservation is relatively low within the family - there are only 3 absolutely conserved residues - but their general topography and structural fold are highly conserved. The conserved core is composed of a coil termed the 'meander', a four-helix bundle, helices J and K, and two sets of beta-sheets. These constitute the haem-binding loop (with an absolutely conserved cysteine that serves as the 5th ligand for the haem iron), the proton-transfer groove and the absolutely conserved EXXR motif in helix K. While prokaryotic P450s are soluble proteins, most eukaryotic P450s are associated with microsomal membranes. their general enzymatic function is to catalyse regiospecific and stereospecific oxidation of non-activated hydrocarbons at physiological temperatures [6]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 463 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.40 0.70 -5.97 50 39592 2009-11-03 19:16:01 2003-04-07 12:59:11 17 282 2977 873 20665 39656 2722 330.60 17 79.49 CHANGED Psss..shPlhGshhplth....pphhtp.hpphpccY..GslaslhhGs.pshVlltshchl+clLhcps.thssc.t.shhtp...hhpspGlhhsss..scW+phR+hhhsshpsh...ph.shpphlpcpuppLlcplccpssps......hDhtphlsphshssIsslhFupphs.hp-pp...........h.phhphhpphhp..hhts.hhphh.hhs..hlhhhsspht+hhppshphhpshhpphlcc+cpslsss.................p.hDhlchhLht........tppst....phopcsltsslhslhhAGs-TTSooLpasLh.hLhcaP-lQc+lpcElcpll.Gptc.........tsshpDhsphPYlcAsl+EsLRhhslsP...tlsRtspp...Dsplps...ahIPKGTpVhlslhult+Dspha.ssP-pFcPpRFLscps................h.tpshtalPFusGhRsClGctlAchEhhlhLsplLppFplc..s.ss.hsh.pt.....llhhs.shplph ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................h................................+...t......h..................h............................................................h...............................................h........h.....t...........h.....................................................................................h.............h............................t.............h...................h...........h.....h.s..........................................................................................................h.......................................................................................................................................................................................................................................................................................................................h..............................................t........................h...........t.....h........h.....................t......h.....h......t.........t....t..........t.t..t.t........................................................................s...h....h....p......h...ht.....................................................h...s........p...p....l........h......t.................h............h......................h......h.........h......u.................G.........................-.....T..................o....s.........s.......s....h................t....h..h....h............h......l.....h......p....p....P...............p....h..........................p....p.........h.................p.....E........h.............t....h.....h....t...t......................................t.....t...t.......h..............p......h.............h.........h.......p...............t.............s.l...........p..............E........s.............h............R........h..........h..............s....s............s..........s.....................h..........h.........................R..h..s...h.p........................s.....h...........p.................l.........t...............s.....................hh.l........s.............t.................G..................s.................h...........l.........h..............h..........s......h...............h...................s............h..........p................+.............s............................p................h..........a....................p...............P...............p........p.............F...p......P..p......R...a......t...tt..................................................................t....h.....h.......s.........F....u.........h...............G............................+........................Ch...........G.......t............t................h...A..........h........hp.....h..........h.......h......h...h....s....t...l...l...t....p...a...p.h................................................................h................................................................................................................................ 0 5443 11134 16825 +3629 PF00870 P53 P53 DNA-binding domain Bateman A anon Pfam-B_782 (release 3.0) Domain This family contains one anomalous member, viz: Zea mays (Q6JAD8). This sequence is identical to human P53 and would appear to be a a human contaminant within the Zea mays sampling effort. 20.30 20.30 22.60 22.60 18.80 20.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.31 0.71 -4.71 7 550 2012-10-03 00:25:27 2003-04-07 12:59:11 13 15 172 206 160 635 0 158.10 54 43.93 CHANGED soVPossDYPGsasFcLpF.Q.SuTAKSVTsTYSPpLNKLaCQLAKTCPlplhVspsPP.GshlRAhAlYKKsEHVs-VV+RCPHHppss-.s-t.hAPsuHLlRVEGN.hupYhEDs.ThRpSVhVPYEsPQlGoEhTTlLYNaMCNSSCMGGMNRRPILTIITLEs.-GplLGRRuFEVRlCACPGRDRKTEEcsh .................................................lPs.psY.G.atFpl.......tF...p....os..s.s.K....SsshT................YSs....LpKLasp..lAKT.CPl..plhl.....s....s...P..P....G...s...hlRAMslYK+spHhsEVV+RCPpHchs..p.-.....s.-s....hs.s.....s..pH.....LI.RVE...G..s.....tu.pY....h....-....D............T....h..RpSVlVPY....E...sP.p.....l...G.o..-......h.....T..T.lhY....saMCN.SSC..h...G..GMNRRPILhIlTLE.....s.........t...s...G.plL....GRpshEsRlCACPGRDR+s-Ecp.h...................... 0 40 49 92 +3631 PF04636 PA26 PA26 p53-induced protein (sestrin) Mifsud W anon Pfam-B_5416 (release 7.5) Family PA26 is a p53-inducible protein. Its function is unknown. It has similarity to Pfam:PF04636 in its N-terminus. 29.40 29.40 36.00 33.00 28.20 28.30 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.58 0.70 -5.79 13 327 2012-10-01 19:19:04 2003-04-07 12:59:11 8 4 116 0 185 285 7 362.70 43 81.45 CHANGED tpGPSsFIPsp-llphsstscpppthhp-sh...GRh..........DplopVhGhHPpYLcpFh+optalhphDGPLPhsaRHYIAIMAAARHQCSYLVshHpp-FLpsGGDspWLpGl-asP.KLRpLs-lNKlLAHRPWLIsKEHIptLlK....s.GpssWSLuELlHAlVLlsHhHuLuSFVauCGlp.-hD.h....tstshps.........ss..spsps..............ph.sssshsssptstus....................................t-VEtLM-RMKcLpcp..pc-EuSpEEMsT............RFE+p+ppohhVh.........suscptcssssssluRal-DssFGYpDFuRRGppslP.TFRsQ.....DYoW.....................................EDHGYSLlNRLYs-l.....GpLLD-KFpsshsLTYpT...............hAs+ssVDToth.......RRAlWNYlpClaGIRaDDYDYGEVNQLL-RSLKsYIKTssCaPE+sTpc.YssahhpF+HSEKVHVNLLLhEARhQApLLYALRAIsRYMT ........................................s..............................p.hh.cth..t.G...Rl..........Dpls.VMsh.HPpYLpsFh+ophhlL..ph..D...G.P.Ls..h.aRHYIuIMA.AARHpCsYLlshahs...cFLp...s.......G.....Gs.....s........p........WL.p.G.L......c..sP.tKLppLs-lNKlL..AHRPWLlo.+..-.HIptLl+........................s..tcpsWSLuELl.pAlVLLsHhHuLu.SFsFGsG.lpsE.hc.....thu..s.hp..........................ss.sp.p..s....................................sss.st..p...t.....s..........................................................-lEsLMc+M.+..p..Lp...cp....p..p-ps..o.......pEE.hts..............................RF.Ehp+ppohhsh............................ss...c.....sl..pah-Ds.sasYp.DFs+..............+G..p....P..TFRsQ......DYsW.....................................E-HGaSLl.s..RLYs.-..h.....GpLLDEK....F....phshs....LTYsT...............hu..h...+ps..VDTo.hh.......RRAlW...NYlpChaGI.RaDDY....DY.G.ElNQLL-RshKlaIKTlsChP.E...+s..Tcc.Ysth..h..cp...FcaS....EKV.HVNLLlhEARhQAtLLYALRAIs+ah.................................................................................................................................................... 0 56 73 122 +3632 PF02251 PA28_alpha Proteasome activator pa28 alpha subunit Bateman A, Mian N anon Pfam-B_2837 (release 5.2) Family PA28 activator complex (also known as 11s regulator of 20S proteasome) is a ring shaped hexameric structure of alternating alpha and beta subunits. This family represents the alpha subunit. The activator complex binds to the 20S proteasome ana simulates peptidase activity in and ATP-independent manner. 21.10 21.10 21.20 22.70 20.60 19.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.11 0.72 -3.80 18 310 2009-09-11 04:56:17 2003-04-07 12:59:11 13 5 109 7 133 251 0 61.10 41 23.66 CHANGED lss-spt.KV-sF....+ppLppEAEpLlusahPpKIhcLDsL.LKsstlNlpDLosl+us..LsIPIPDPs .................p.-spt.cV-sF....RppLhpcAEpLlssaFPpKIhcL-sh.L.....+-stLNlp-Losl+u..s..LsIPlPDP........................ 0 28 37 74 +3633 PF02252 PA28_beta Proteasome activator pa28 beta subunit Bateman A, Mian N anon Pfam-B_2809 (release 5.2) Family PA28 activator complex (also known as 11s regulator of 20S proteasome) is a ring shaped hexameric structure of alternating alpha and beta subunits. This family represents the beta subunit. The activator complex binds to the 20S proteasome ana simulates peptidase activity in and ATP-independent manner. 20.90 20.90 21.30 20.90 20.20 20.60 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.69 0.71 -4.74 16 411 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 150 7 201 352 1 137.60 43 56.79 CHANGED ssG..hlssNcplhpllcplKPElppLhEphshlphWIQL.IP+IEDGNNFGVuIQEcslpclssVcocstuFhspISKYaspRGchVuKsuKhPHVtDYRphV+ElDE+pahpLRlhlh-lRNpYAhLaDlIhKNh-KIp+PRus..sptthY ............................................sG.l.sNcplh.sllp.hlKPElppLhEphsh................VphWlQhhIP.+..I..EDGNNFGVulQ.........Ec.s...lpclpslcocstua...hspI..S+Ya.pRuchVuKsuK.P.H.....V.....tDYRphVpEhDEtpYhplR...............lhlh-lR.NhYs...LaDlI....KN...h...EKlppP+up..pt..hY................................................................................. 0 70 89 138 +3634 PF05138 PaaA_PaaC Phenylacetic acid catabolic protein Bateman A anon COG3396 Family This family includes proteins such as PaaA and PaaC that are part of a catabolic pathway of phenylacetic acid [1]. These proteins may form part of a dioxygenase complex. 27.60 27.60 27.80 28.70 27.40 27.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.49 0.70 -5.43 117 1413 2012-10-01 21:25:29 2003-04-07 12:59:11 7 4 637 42 464 1179 369 266.20 33 88.22 CHANGED M.................................................thppsLhchlhphADssllhupRhuEW...hu+APsLEc-lALsNhu.D.lGpuphhashAtp.......................L.....s..G..........pscDcLAahRpstca..+NhhLhEhPss...........caApohsRpaLhDuhthhhhpsL.ppSo.sslAtlAsKssKEtsYHh+put-hlhpLu.cGTcES+p+hQsAlsphW.ash.-hFsss-s-.................pshtttGhssssspLRppahsplsshlp.pssLslP...c.....st.hphs.G+....pG.pHo-thGhl................LschQhhpRuaPsu.sW ...................................................................................ttttp...................thcpsLhc.lhphucopllhhp.tu.p.W...hs+APsLccchsLhshs.DphGH....uh.hL.aohA.tp.......................L..........G...............tsc.Dc.lh.th...p..st.ch...+.......sl...hph..Psh.............saA.DshshsaLlDuhtlh..ssL..scoohsshAphhs+.h.hKEpsaH.RpuhchlhpLu......pGT.....c...tp...+phhQpAlschWh.sh..hFsss...-sc....................puh.s.h.th.h.t.h.s.s.cp.LRppahspssstlp...hs.LslP...s...............ss....h....phs....sc....pG.tas...G.l.................htchphhp+ua.tu.tW................................................... 0 123 282 386 +3635 PF02758 PYRIN PAAD_DAPIN; PAAD/DAPIN/Pyrin domain Bateman A anon Bateman A Domain This domain is predicted to contain 6 alpha helices and to have the same fold as the Pfam:PF00531 domain. This similarity may mean that this is a protein-protein interaction domain. 22.40 22.40 22.90 23.00 22.10 22.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.52 0.72 -4.28 38 767 2012-10-01 21:41:45 2003-04-07 12:59:11 11 56 60 11 298 774 1 81.90 26 11.69 CHANGED tchhLlpsLEpLscc-hccFKthL............ptsphpIspspl-p.ss.chclAsLlhppaspctAhshslplhcchspp.cLsccLpc ........p.tLhphLEpLs..cp-hc..cFKhh.L..................ppst.tplPh....s.pl-p..As..thclAslLlppa.stptAhphslplh..cchspp.-Lscchp.................... 1 23 69 115 +3636 PF00658 PABP Poly-adenylate binding protein, unique domain Bateman A anon Prosite Family The region featured in this family is found towards the C-terminus of poly(A)-binding proteins (PABPs). These are eukaryotic proteins that, through their binding of the 3' poly(A) tail on mRNA, have very important roles in the pathways of gene expression. They seem to provide a scaffold on which other proteins can bind and mediate processes such as export, translation and turnover of the transcripts. Moreover, they may act as antagonists to the binding of factors that allow mRNA degradation, regulating mRNA longevity. PABPs are also involved in nuclear transport. PABPs interact with poly(A) tails via RNA-recognition motifs (Pfam:PF00076) [1]. Note that the PABP C-terminal region is also found in members of the hyperplastic discs protein (HYD) family of ubiquitin ligases that contain HECT domains - these are also included in this family. 21.00 21.00 21.20 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.17 0.72 -4.31 38 874 2009-09-12 06:34:48 2003-04-07 12:59:11 13 31 321 31 523 833 28 70.80 49 9.26 CHANGED tshsAuhLAsAsPpp.......QKQhLGEpLYPhI....p........phpP..-hAGKITGMLLEhDNsELLpLLEss-sLcsKVsEAlsVL ...........................hushLAsAs.Ppp...............QKQhL......GE+LaPlI....p.........................shps.....phAGKITGMLLE..h..D...s...oE....L...LphLES.s.-...uL+uKV..-EAlsVL................... 0 158 255 394 +3637 PF03068 PAD Protein-arginine deiminase (PAD) Mifsud W anon Pfam-B_2195 (release 6.4) Family Members of this family are found in mammals. In the presence of calcium ions, PAD enzymes EC:3.5.3.15 catalyse the post-translational modification reaction responsible for the formation of citrulline residues: Protein L-arginine + H2O <=> Protein L-citrulline + NH3. Several types are recognised (and included in the family) on the basis of molecular mass, substrate specificity, and tissue localisation. The expression of type I PAD is known to be under the control of oestrogen [3]. 25.00 25.00 26.30 27.30 24.80 24.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.39 0.70 -6.04 7 323 2012-10-01 20:45:11 2003-04-07 12:59:11 10 8 86 12 176 305 2 310.40 44 60.02 CHANGED sLsEsslFTDTVsFRlAPWIMTPNT.PP.EVYVCplpD....N-cFLcslspLspKApCKLTlCPp.ENRsDRWIQDEMEhGYIpAPHKohPVVFDSPRsRGLKDFPlK+lLGPDFGYVTRE..hsssSuLDSFGNLEVSPPVTVpGKEYPLGRILIGuS.aPpSsGRcMspsVRDFLpAQQVQAPVELaSDWLsVGHVDEFLSFVPssDpKGFRLLLASPuACapLFQEKQctGaGEAhhF-GL+tppp....oIscILuscpLpcpNtasQpCIDWNR-lLKRELGLuEuDIIDIPQLFphcptt.....AcAFFPsMVNMlVLGKaLGIPKPFGPlINGRCCLEEKVpSLLEPLGLpCTFIsDahsYHhhtGEVHCGTNVRRKPFuFKWWpMVP ................................................................................s......lapDoVhFRlAPhlhpPsh..P.plals..p..h.hp...........p........Flct..ltt.l.st...pst.hp..l...hl...s...p......s.p.s.DpW.hQD.....chEhG.Yh.....ph......P.....p....p....s....h......s....V....l....h.......c.....o.P...R.s.ttL.....p..c....F..s....h.+..plh....u.s.shG..aV.s...p.t.................t.....t...s..ssl.DS..hGNL-VSPP.lss.t.G.KpYPhGRIlhGss...hs....t.....t.u+phtpslpsFL.AQ.pVQ.sPlc.LaoDWLhVGHVDEFhsFlPs....s....s....p....KGFp...hLlASPpushcLhpctpppG......aG......cs..h..h.....F.......p.................u...lt.......t...............t.....p........................oIsplLusc..pLh..ptNtas....p...p.s...I.........chNR-lLK+ELGLs.EpDIl-.lPtLFph......p.p...............................A.....AaaPsh.VNMlV.L.....s....+.p..LG....I..PKPaGPh........l.....p..GpCsLE..pclpsLlcsLGhpCsFIDDa.sYHh.h...hGElHCGoNV.pR.pPFsa.KWWp.......................................... 0 12 52 89 +3638 PF04371 PAD_porph Porphyromonas-type peptidyl-arginine deiminase Kerrison ND anon COG2957 Family Peptidyl-arginine deiminase (PAD) enzymes catalyse the deimination of the guanidino group from carboxy-terminal arginine residues of various peptides to produce ammonia. PAD from Porphyromonas gingivalis (PPAD) appears to be evolutionarily unrelated to mammalian PAD (Pfam:PF03068), which is a metalloenzyme. PPAD is thought to belong to the same superfamily as aminotransferase and arginine deiminase, and to form an alpha/beta propeller structure. This family has previously been named PPADH (Porphyromonas peptidyl-arginine deiminase homologues) [1]. The predicted catalytic residues in PPAD (Swiss:Q9RQJ2) are Asp130, Asp187, His236, Asp238 and Cys351 [1]. These are absolutely conserved with the exception of Asp187 which is absent in two family members. PPAD is also able to catalyse the deimination of free L-arginine, but has primarily peptidyl-arginine specificity. It may have a FMN cofactor [2]. 25.00 25.00 28.80 27.50 24.10 24.30 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.07 0.70 -5.39 22 1778 2012-10-01 20:45:11 2003-04-07 12:59:11 10 7 1418 31 444 1482 890 324.50 33 93.65 CHANGED a+MPAEapPppusahsWPp+s-sW..ttthtsAptsasslAcAIu+h.........E.VplsVsssp...htsARthLss.........slcllchshsDuWhRDsGPsallssp.......G.chc..slDWpFNuWGGhh.....sappDspVupplsclpthshapss......alLEGGuIcsDGpGTlLTTcpCLLs.sRNPpLo+tpIEppL+caLusp+llWLtcGhhts...-.TsGHlDslspFlsPupVlhshs-DtsDPpYthhptshchLpshpDAcG+.hplh+lPhPs..................th.cpsG-RLsASYsNFhIsNsullhPtasDs.sDphAhclLpphFPc+clVGl.suRcllh.GGGslHCITQQ.P ................................................................hhPuEap.pptshhhW.P...p.c.....s.s...W........t......t...h..p.....sp...p...sa...s...p....lspsI.uch.........-..Vh.l.hst..ptp........htp.s.p...p.hlsp.......................plp.h.l...c..h...s..s..s..D..s..WhRDpGPhhl..hs.sp.............................u....p....ht.......slD..as..F..N.u..W..Gsph................................shc.p.D....s.p....l...s...p...p...l..s....c.....h....t....t....h..s.....h...h.pss...................hlLEG.GuI....cs.....DG..p....GTlLsTc.......pCLLs.........s....R..NPp....L................o.........+.p.p..I..Epp.LpchL...G..l.c.........+lI..WL...s.pG..h....hts....................-.....Ts..uHlDslspFl.....sssp..........llhs.......h..s.......-..D.p.......s.......D.......s.......p.......Y..t.h.hptt.h.chLppt.pcsc.....Gp.hplhcL..PhPt.......................................c...sG....c...R.L...s..A.S..YsNFh.lsN............s....u....l.l..........l........P..ta........s.........D........s.....s.........D......ph.A.hchLpphF......P.s......+....c..l.l.Gl.ss....c.pll....h.tGGslHClTQQ.P...................... 0 159 301 392 +3639 PF03551 PadR Transcriptional regulator PadR-like family Bateman A anon Pfam-B_1014 (release 7.0) Family Members of this family are transcriptional regulators that appear to be related to the Pfam:PF01047 family. This family includes PadR Swiss:Q9EXE6 a protein that is involved in negative regulation of phenolic acid metabolism. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.38 0.72 -4.21 33 8479 2012-10-04 14:01:12 2003-04-07 12:59:11 9 19 3006 28 2200 6899 485 72.50 28 49.56 CHANGED lLtlL.sc.psh..aGYplhpclcph..Ghhphs...cGoLYPhLc+LEccGLlssphppt..htu.sRKhYpLT-sG+ptLpc ................................lLslL..tc..pst...aGY.-.lhp..p.l....c....p....h...............s..h...h...p....h..s.........tG...olYshLp+L.pcpuh.l............p.........s....t......h.pp.............t..............tt....sR.....K...h.Y.p..lT..cpGcptLt............................................ 1 832 1595 1975 +3640 PF03283 PAE Pectinacetylesterase Mifsud W anon Pfam-B_1589 (release 6.5) Family \N 22.90 22.90 22.90 23.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.18 0.70 -5.82 27 583 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 204 0 340 545 102 274.10 26 69.61 CHANGED hhhhphs..hssp...................VtlsllpsAhtcGAlCLDGShPuYHhccG.GoGussWLlphEGGG.WCss......hcsChtRppT..chGSSphh.ppplsFs....GlhSspsppNPDFaNWN+VplRYCDGuSFsG.csc...spuspLaFRGp+IapAlh--Llsc.GMppAcpslL.oGCSAGGLusllaCDp....F+shLPt....sspVKCloDuGaFl...DshclsGscshcpha.....psllplps.tpslspsCss+hpPs........CFFPQpllptIcTPlFllNuAYDsWQlpphLsP.ss..tt.WttC+hs...hspCsssQlphlpsFRsphlsulpsh..tpsppsGhFlsSCasHCQotpptoWhsts..SPhlpspslAcuVGDWaasRp..hctl ...................................................................h.........................................tshC.DGo.s..ua..a..hp.u..........tu.s.p.p.all.hp.....GGu.hC.s..............hpsC........R.....ht.s...........thho...S..p......h.......p...t.h...hs......................Gllus.p.t.tNP.aas..WN.hVhl.YCsGssasG...s.......s.....p........................................................p...............t...............s..............t.........p..............h..............hapGtpl.........hp...s...lhpcLhsp.....Ghtp....A....p.........p.h..lL.sGs..SAGGhushlpsDp....hpph.hst............sspV.+sluDu..G.hFl.........................s......th..thtt..ht.h...........t.hh......t...h..p.......tt.hst.C...t.h..t.............Chas..hh......hps.Ph..Fhhp.haD.hQh.t.......................................hp..t....ph.hhptht.thht..h................u.ahsuCh.Ht...............a...................htt..........h...h..............h................................................................................................................................................. 0 128 217 285 +3641 PF03403 PAF-AH_p_II Platelet-activating factor acetylhydrolase, isoform II Mifsud W anon Pfam-B_3469 (release 6.6) Family Platelet-activating factor acetylhydrolase (PAF-AH) is a subfamily of phospholipases A2, responsible for inactivation of platelet-activating factor through cleavage of an acetyl group. Three known PAF-AHs are the brain heterotrimeric PAF-AH Ib, whose catalytic beta and gamma subunits are aligned in Pfam:PF02266, the extracellular, plasma PAF-AH (pPAF-AH), and the intracellular PAF-AH isoform II (PAF-AH II). This family aligns pPAF-AH and PAF-AH II, whose similarity was previously noted. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.42 0.70 -6.17 7 427 2012-10-03 11:45:05 2003-04-07 12:59:11 8 13 218 13 281 1346 263 295.60 23 70.00 CHANGED huupspsplPtssGPasVG.............CsDLMhsts....cusFlRLYYPs...p.-psp.-sLWIPpcEYh.GLu-aLss.phhGplL.phhhGShphPsphNush+ss..-K.YPLllFSHGLGAFRTlYSAlshpLASpGFlVAAVEHRDcSAusTYahc-tsssE.t.......pcpWlhh+chpt.-pEhplRpcQVpQRspEC.pALshlhpIs.tGpsscNl.....LsssFD...hppLKsslDpo+..lAVhGHSFGGATsl.......poLuc-....pcFRCuIALDsWMaPlsc-ha.Sph.QPlhFINStcFQhstslhpMKK......................hhs.ccpp+hITlcGSVHpsFsDFsFloGclIG+hhpl..KGclDsp.Ah-lss+ASLAFLQKaLsLcc-asQWssLh-G.spNl .......................................................................................................h.............G....lG....................................t.....................t...h.....plaYPs....................t.....................ah..s...........t................s.h.......t..t..h...h...............t...h............h..h................h..................h..h....t......................h.p..h..s..s.........ss......h.............t..................st..............t.....p..aP.lllFSH.G....l..u...u.....R.ph..YSt.hshpLASp...GalVsslE...H......p.....D.tS.....us.h.o..h.h......p..............tt...................t.................................................................................................h.........t....t.................t..h....thRptQ.lp.R.....h.tE......h....shp..hl...ths....tG..p............h.........................h......t..p...........h...t.h..psp....ls.h...pp.........lshhGHSFGu.ATsh...........................................tsh.pp................p.h...p...s.....sl.hl.....Ds..a...h.......h...s......l...t.........p......t............................t.........h.................................P.............h.....h.hl.p....s..p....t...a......................t........p.............................h..p.......................................................................................................h..................t..................h.....h...h...h.....t.s.H.s.sDh..h...h......s.................................................................................................................................................................................................................................................................................... 1 109 170 222 +3642 PF03985 Paf1 Paf1 Finn RD, Wood V anon Pfam-B_ (release 7.3) Family Members of this family are components of the RNA polymerase II associated Paf1 complex. The Paf1 complex functions during the elongation phase of transcription in conjunction with Spt4-Spt5 and Spt16-Pob3i [1,2]. 20.40 20.40 20.50 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.59 0.70 -5.74 12 358 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 263 0 255 360 4 334.60 24 76.57 CHANGED p-aIs+l+YpNsLPsPshsPKhlpash....spsh.sp.hloSLhpcpphssLh...DpDLGhslDLl.........Dpchhts.s.ssp...LcscDchLL+Dsthsp.........ho+pc.ptVoaLR+TEYIS...sp.sp.t.....stcthpschthslcpshpppp..hhstpp.lctlEpTF-sspp.....phhpHssK+plpsVcshsLLPDhsphcpsahplpFsGss..........shstp-tp.........p.thpsslhtsh-hEt-caluhahs........hpppL-cphcDhp........cppc.YcaKhhR-Yshphhp...........psthp-hslhhs............pcpsssYYpPLcoRlcL++RR......lc.lVpppshsplslphRsssspEpchpcthRtch-shshsph--.E-Ec-ct.p.ppc.pcspspppptttptps.pptt...........tp.t.ctppttcptpsssotss ....................................................................................................................................................thlspl+YpNsLPs.shsPKhlphsh.......tt........phh.s....h.hh.os.Lt+p.phph.....-.-lGhslD..L.l.................s.....phh.....h..s......s...............lcPtDp.LL..c..p...h................................................psp..pt.ts.V.saLR+TpYIS..........st...s.t.t....................s.pt.php..h....thp.hp....p.t............t.....s...p.t....l.p.t...I-p..oFpsspp....................................tH.....s...+....p..lpsVphhPlhP..Dhpha.....p..s..hh.lhFsssP...............................s.t.tt.....................t.h.p...shl.h..........t..........t.........p........p....pa..h..s..h.....ah...s..............................h..t.p...h...p.....t.t-..................t...t........p..a.p.......aphhRpYph.php..................tpth.-....s.hhhh........................c.tpssaY..l..s.+hpLppcR....................t..h............p........t......s.l........lph+.t..st.t.....-...t..p.h..p.h.t.....t......................................................................................................................pttt........................................................................................................... 1 81 135 206 +3643 PF02671 PAH Paired amphipathic helix repeat Bateman A, Mian N anon Pfam-B_281 (release 5.4) Repeat This family contains the paired amphipathic helix repeat. The family contains the yeast SIN3 gene Swiss:P22579 (also known as SDI1) that is a negative regulator of the yeast HO gene [1]. This repeat may be distantly related to the helix-loop-helix motif, which mediate protein-protein interactions. 21.30 21.30 21.60 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.13 0.72 -4.34 121 1587 2009-01-15 18:05:59 2003-04-07 12:59:11 16 30 270 12 991 1522 9 48.30 34 10.07 CHANGED sphYppFLclLptapp.pphstsp.................lhp................c....Vsp.Lhp.....sa..-Llp...tFsp.FLs .................................................chYppFLcllpp.app..pp..lstst.........................Vhp................p.....Vsp..LFp.....s.as..-Llp...tFsp.FLP............... 0 344 546 754 +3644 PF00221 Lyase_aromatic PAL; Aromatic amino acid lyase Finn RD, Eberhardt R anon Prosite Family This family includes proteins with phenylalanine ammonia-lyase, EC:4.3.1.24, histidine ammonia-lyase, EC:4.3.1.3, and tyrosine aminomutase, EC:5.4.3.6, activities [1-3]. 21.80 21.80 22.20 22.00 20.90 21.70 hmmbuild -o /dev/null HMM SEED 473 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.48 0.70 -6.05 143 3331 2009-12-15 11:53:36 2003-04-07 12:59:11 14 13 2194 102 901 2989 703 434.10 35 86.47 CHANGED lpl.suss.Lolspltslu....p.ps...tplpLs..spuhpclppupphlp.chlpps.pslYGlsTGFGthusscI..s.........cc...htpLQcNLlhSHusGlGp............................................slspshlRshhllRlsoLu+G.....aSGlRhpllctLhph.LNpslhPhlPppGSlGASGDLAPLAHluhs.llGc.Gcs.hh.p.......Gp..................h.....hsu.s-ALppsGlpP..l.pLtsKEGLALlNGTpshoAlushulhcAppLhthAtlhuALohEAlpGsspsFcsplH.tl.RsH.GQlcsAstlRplLp.G...S......plhpscp....................+..............lQDsYoLRChPQVhGAsh-slphspcslphEhN..usoDNPLlh...........s....-...............t.s.......cl..lSGGNFHupPlAhAhDhlslAluclGslu-RRlspLlssths.G.LPsFLs......s....ss.GLsSGaMlsQhouAuLsSEN+hLApPuSsDSls.oSusp..EDHVSMushuAR+h.tchl-NlptllAlELlsAsQAl-hR........t.....shph.usshptlhphlRppVshh..p...p ..........................................................................................................................ltstpLoltplhtlh......p.....ts......spl.pls...tpuhttlptut.thlp.p..hh......t...p.s.p................s...sYGlsTGFG.thu...shpl..s..................cphtpLQ...csLlh.SHusGlGt..........................................................shspphsRhhMll+ls..o.LspG.....aSGlRhpllptlhth.lNt.slhPhlPt..pG.SlGASGDLsPL.uHhuhs.Ll.GcG...cs...hh...p...........Gc..........................hhsutcu...LpttG.l..p..P.....l.p.LtsKE..GLALlNGT.ps...ouhuhhulhcAppLhthuplhuAhohEuht.....Gp.psFc.t..p..lH.t.l.Rsa.GQ.hpsAtthRp..l..Lp.u........S.....plhppppt.............+...................................................lQD.sY....o.LRC.PQlhGush-slp...hstphlp.hEhN..u.....ssDNP....L.lh......s....c.............................................................t.s....cl..lSGGNFHu.pPlAhuhD..LtlAlAcluslu-RRhstLlssth..s.....s..LPsFLs......s.......ps..GlsSGa.M......l...sQh..ss.Auls..SEs+t.LApPuS..l.DS...hs.ouspp........ED..+VSMushAuR+h.hchl-shptllulE......hlsAsQ...ul-hR........t.....s.p.......ush.h.pps.hp.hhRphls....p........................................................................................... 0 266 520 727 +3645 PF02089 Palm_thioest Palmitoyl protein thioesterase Mian N, Bateman A anon IPR002472 Domain \N 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.93 0.70 -5.47 2 572 2012-10-03 11:45:05 2003-04-07 12:59:11 10 11 252 6 338 538 22 229.00 32 83.50 CHANGED DPPuP.PLVIWHGMGDSCCNPhSMGsIKKMVEKcIPGIaVLSLEIGKshhEDVENSFFLNVNsQVshVCQILtKDPKLQQGYNAhGFSQGGQFLRAVAQRCPoPPMhsLISVGGQHQGVFGLPRCPGESSHICDFIRKoLNAGAYsKhlQERLVQApYWHDPI+EslYRNaSIFLADINQERsVNESYKKNLMALKKFVMVKFhNDoIVDPVDSEWFGFYRSGQAKETIPLQESTLYTpDRLGLKtMDKAGpLVFLAhEGDHLQlScEWFhAHIIPFLc .............................................................................PhlhhHGh..............h..Dp........sh......t.h.....p...hhp..p.....h.s.Gh...hshslpl.s.p..............s....s....tsh..hhplptQhp.ls.p.............lt.p.........L..t..p.....G...hshlGaSQ...Gu.hhRulhpps.s.....s.....s.h.....p.................shIolu.u.p......tGhhuhs..h..C................t......h.......h.C..phh....pphl.p.h.ts.Ysphs...Q...ppls..upYa+...D..P..h...c..........s......Y..h...pt...S.FLs..clN.s..E............p...................h............N..p.....s.....Y......+........c...Nh........hpLp...phVhlhF....sDshl..Php.Sp..aFG....aa.........t...ss.t..........p.p..h...........h.sh...pcp.t.lYtcDhlGL+ph.cptGpl.hh.hs...h...G...t.H...h....t.....h..s.....t............................................... 2 127 185 265 +3646 PF02569 Pantoate_ligase Pantoate-beta-alanine ligase Mian N, Bateman A anon COGs Family Pantoate-beta-alanine ligase, also know as pantothenate synthase, (EC:6.3.2.1) catalyses the formation of pantothenate from pantoate and alanine [1]. 20.70 20.70 20.80 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.87 0.70 -5.64 7 3517 2012-10-02 18:00:56 2003-04-07 12:59:11 10 12 3268 96 934 2614 2426 268.30 42 94.43 CHANGED hplhpsltplpphp.+phR.p..t+plGFVPTMGhLH-GHhsLlcpA+.tcNshVVVSIFVNPsQFGssEDh-tYPRsl-cDhtlhEptsVDllFsPssc-MYPtshpsplp.tt.........Lop.LEGtsRPGHFcGVsTlVsKLFNlVpPsRAYFGpKDhQQlhllcphVpDh.hDlEllssPIVR-pDGLAhSSRNsYLssEpRKtA.uLY+uLptutphlps....GE+sspclhptht.tl....lcttpsh.lDYlEltDhp..L....-Phpp.cpt..llhVAshlGc.......sRLIDNhhl ..................................................lhpslttlc.p.hh....pp.h..+.tp.......up..pluhVPTMGsL..H-GHhsLlccA+.....p...c......s.....D...............h......V.VVSIFVNPh.QF...u.s...s.EDhspY.P...RsL.pcDhphL..........p..........p............t..................G...............V.....D............h......lFsP.s...s.c-h.....Y....P....p....s........h......p.....s...p.s...p...lss..................................lushL..-G.up..R.P.G.........HFcGV.s.TVVsKLFNl............V.....p....P......D......hAhFGcKDaQQLslI+pMVpDh..s.h.....s.....l.....c.Ilus.P.hlR.-.p..D.....GLAhSSRNsYLo.s-cR.p.tAstL..csLpt.....s..tp....th..ps......................G.p....c......s..h...pt.l..h..ptst....ph.........................Lp..p..t......h....h..plD....Ylplhcsss...L.....................p..s......h......p......p......h......p....p......t......h......ll..h.h.A.shl.Gp.....................sRLIDNh................................................ 0 311 620 808 +3647 PF02548 Pantoate_transf Ketopantoate hydroxymethyltransferase Mian N, Bateman A anon COGs Family Ketopantoate hydroxymethyltransferase (EC:2.1.2.11) is the first enzyme in the pantothenate biosynthesis pathway. 25.90 25.90 26.10 26.10 25.70 25.40 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.44 0.70 -5.27 117 3509 2012-10-10 15:06:27 2003-04-07 12:59:11 10 8 3295 45 993 2531 3391 255.50 45 92.45 CHANGED p..+h...TlsclpphKpp...sc+IshlTAYDhshAplh-puGlDhlLVGDSLGMVlhGhsoTlsVTl--MlaHscAVsRGsp....puhlluDhPFhSYpsoscpAlcsAs+lhc.puGApAVKLEGGt.....thsctlctLsctGIPVhuHlGLTPQslpthGGa.+lQG.+.stpsAppllcDAhuLpcAGAFulVLEslPspLAppITp..pl.slPTIGIGAGsssDGQVLVhpDhLG..l.......sshtPKFVKpYs...shsshlpp......AlppYsp-V+sssFPst- ....................hTlspLtchK...p...p...sc.....+lshlTAYDh..shA+lh-p.s.G.lDllLV.GDS.LGMllhGa-o....TLPVTl--MlaHscAV.tRG.....ut..................p....shll.uDhPFhoYt..s.o.scpAhpsAs+lh+..u.GApuVK...lEG....Gt.........tl.s-.s........lchLsptG.IPVhuHlGLTPQSV......shhG.GYKVQ...G.+.s.....tc........s......A......ppLlpDAhAl-pAGAhh...lVLEs.VPspLApcITc..tL...s.IPsI.G.IGAGs.......s.sDGQVLVhaDhLG.......l...............t...ss.....hhPKFlKpah..............shss....s.lpp..........AlppYhp-V+susFPup................................... 0 329 652 849 +3648 PF02711 Pap_E4 E4 protein Mian N, Bateman A anon Pfam-B_1589 (release 5.5) Family This is is a family of Papillomavirus proteins, E4, coded for by ORF4. A splice variant, E1--E4, exists but neither the function of E4 or E1--E4 is known [1]. 20.50 20.50 22.10 23.40 19.00 17.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.81 0.72 -3.17 40 248 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 78 0 0 191 0 91.30 42 90.27 CHANGED LpLh.ssp.....+YPLLcLLsshp...s....PPp..P.Pp..shAPp+......s+RRl.sD.............sDsssspssssssh.t..........pssWTVpTsssol..olpupTpsGToVsVTL+L .......LpLs..sss.....+YPLLpLLsoh........T........PP+.hPsPs..PWAPp+.......+RRL.sD.............pDps.p.s.p.psssss................ps.WTVpp...t...ol..pLpApTKcGsoVlVTL+L........................ 0 0 0 0 +3649 PF04755 PAP_fibrillin PAP_fibrillin Mifsud W anon Pfam-B_3698 (release 7.5) Family This family identifies a conserved region found in a number of plastid lipid-associated proteins (PAPs), and in a number of putative fibrillin proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.48 0.71 -4.59 12 635 2009-01-15 18:05:59 2003-04-07 12:59:11 7 13 130 0 381 643 102 170.70 22 64.03 CHANGED tpLKppLl-ulhGp.p.RGLp.Aos-s..+scI.phlppLEuhNPossPs.-s.sLLsGpWhLhYTottslh......sLl.tspl.h.l+ltpI.QsI........Dspshsl.N.sp..htu....PhhpssholsAcFEltSspRlplpFccuhlt...l.....................h.t..t.hpssht.l........Plph.hssspspuWL.sTYLDc..-LRISRGstGslFVLh ........................................................................t......lht.h...t...h..Gh...hp........pp.......ptpl.phlpt......LE.....sh....s.ss.s......t......sh....p........s..................s.........h.LpGpWpLhY...oo..ttthh...................................h...........s.th....h......h.p.....h.s.p...lhQ.tl....................cs..ts....h...p.....h....N..hhp.........h............h.s.......h..p.s.....t........h...plp.u....p..h.p...h......h.......s.......s.......p........+.l......p.l..pFp.ps.hlt........................................................................................................................s.....h....................................t......t.s.p...u.h...h..hTY.LDc......clRls.R..us.t.Gshhlh.h.................................................................................................................................. 0 148 296 349 +3650 PF04926 PAP_RNA-bind Poly(A) polymerase predicted RNA binding domain Wood V, Bateman A anon Pfam-B_1341 (release 7.6) Domain Based on its similarity structurally to the RNA recognition motif this domain is thought to be RNA binding [1]. 22.10 22.10 22.30 22.20 21.70 22.00 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.84 0.71 -4.70 54 630 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 301 11 392 595 5 146.80 29 22.62 CHANGED sFFpp.YKaYLplhsuupsp-.pphcWpGhVESKlRhLlt........pLEphss.ltlA.HPaP+sFp.......................................................................t...ppppp............................haposaaIGLphpttps.pttp................lDlphsspcFhshsps....h.t..h.tsthplpl..paVKpppLPs.V..atputp+sp+s .............................................................................sFFpp.YKaYlhlhs..s......us......s......p-..pthpWs.GhVES+lRhL.lt........pLE.+.s.ph...ltl.A..Hs.PpsFs.........................................................................................................................................psppp.....................................apohaa.lGLthcp...s....t.s.t....t.............................p..lDlshsl.ppFpsp.lhp....h.p...hh.ctsh.plts..pHV.+.pppL.phl....th.pc....t.................................................................. 0 121 204 312 +3651 PF04795 PAPA-1 PAPA-1-like conserved region Waterfield DI, Finn RD anon Pfam-B_6501 (release 7.5) Family Family of proteins with a conserved region found in PAPA-1, a PAP-1 binding protein. 21.00 21.00 21.90 21.90 20.10 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.12 0.72 -3.28 19 276 2009-09-11 14:54:57 2003-04-07 12:59:11 7 9 209 0 180 273 0 83.40 31 21.17 CHANGED Etth+RAEtARRR+.QuEK+sEEpKh-TIp+LL+ppu.......tt+.tt...tttppshtppcstcsppsssshlRals..utpGopluhPpc.lssP .................Ehth+RuEtAR+R+..uEK+sEEc.........K.......t-TIp+LLKppu.............tp.p.t.t..p......t.t.t.....pp.....tttpt..c.ttp.tt.ssshlRalp....s..pGshlshP.t.h........................................................... 0 50 95 142 +3652 PF03333 PapB Adhesin biosynthesis transcription regulatory protein Mifsud W anon Pfam-B_3068 (release 6.5) Family This family includes PapB, DaaA, FanA, FanB, and AfaA. 20.40 20.40 20.40 23.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.00 0.72 -4.17 14 275 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 191 2 10 111 1 87.40 42 78.55 CHANGED hpphst......hhhph+pthLhPGplsEc+FaLLhEISsI+ScKVI.AL+DYLVhGhoRKElCE+asVssGYhShuLsRLp+lsphVtpls.aY .................t.th...............h+tutL.PGplsEEpFaLLl-ISsI+S-KlIhAL+DYLVpGaSRKpVCE+ashssGYFSsslsRLpclsphVtpLssaY........... 0 1 3 6 +3653 PF03628 PapG_C PapG chaperone-binding domain Finn RD anon Pfam-B_3074 (release 7.0) Domain PapG, the adhesin of the P-pili, is situated at the tip and is only a minor component of the whole pilus structure. A two-domain structure has been postulated for PapG; a carbohydrate binding N-terminus and chaperone binding C-terminus (this domain). The chaperone-binding domain is highly conserved, and is essential for the correct assembly of the pili structure when aided by the chaperone molecule PapD [1,2]. 19.60 19.60 20.70 19.80 19.00 18.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -10.42 0.72 -4.38 3 75 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 38 2 1 61 0 108.50 73 33.56 CHANGED HGNLSIDSANGNYASQTLSIYCDVPVoVKISLLSNTpPAYNN.QpFSVGLGNGWDSIISLDGV-puEETLRWYTAGS+TVTIGSRLYGEuGKIpPGsLSGSMTMlMpLP ........HGDLSINSANNHYA..AQTLSVSCDVPsNI.RFhLLpNTsPsYSH.GppFSVGLGH...GWDSIVSlNGVDTGETT.M..RWY+AGTQNLTIGSRLYGESSKIQPGVLSGSATLLMILP.... 2 0 0 0 +3654 PF03627 PapG_N PapG carbohydrate binding domain Finn RD anon Pfam-B_3074 (release 7.0) Domain PapG, the adhesin of the P-pili, is situated at the tip and is only a minor component of the whole pilus structure. A two-domain structure has been postulated for PapG; a carbohydrate binding N-terminus (this domain) and chaperone binding C-terminus. The carbohydrate-binding domain interacts with the receptor glycan [1,2]. 25.00 25.00 48.80 48.10 22.10 21.10 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.50 0.70 -4.90 4 66 2012-10-02 17:35:21 2003-04-07 12:59:11 8 2 29 4 1 50 0 216.30 62 66.66 CHANGED MKKWFPAFLF.LSLSGsNDALAuWpNlMFYuFNDh.shsuGNVplhDpsQFhlsWpoGuATAT..YsuCsGPEFssG..lYapEYlAWlVVPK+V.TpsGYslFl-VpSKhGWS.ENpsDpD.YaFhpGYcWDphsssuuRlCh.sGpp+pLsppFs-lhFplhLPsDLPKGcYshPl+YlRGIQ+HaYsahts+YKhPYs.hKpLPtsNTl.hShcNsGuCRPSAQSLEI ..MKKWFPAhLF.LslSGpssA...hpshhFYShsDs..h.thsVhlTphsQFIsshpsu.uTsT..aspCNG.sascG..hYapEYhAWlVhPK+V.ohNGYslalElpsKGSaS.-sp.DNDsYahhKGatWDE.A.suGplC.p.GEpppLs.p.FsslhhpstLPsDLPhGDYohsl.ahpGhQRp.hsYlGuRaKIP.slhKThPhpsph.F.hKNhGGCRPSAQSLEI 0 0 0 0 +3655 PF03025 Papilloma_E5 Papillomavirus E5 Bateman A anon Pfam-B_1916 (release 6.4) Family The E5 protein from papillomaviruses is about 80 amino acids long. The proteins are contain three regions that are predicted to be transmembrane alpha helices. The function of this protein is unknown. 25.00 25.00 37.90 37.60 21.10 19.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.91 0.72 -4.03 13 139 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 32 0 0 103 0 70.20 58 92.77 CHANGED llhlFllCFCVhLhlChhl.PLL.SlplaAhlllLVllhW.VshTSPhcsFhVYllFhYlPhaLlHhHA.hhhp .....LLssFLLCFCVLLCVCLLlRPLLLSVSsYsoLlLLVLLLW.losuSshRsFhVYllFlYIPLFLIHsas....... 0 0 0 0 +3656 PF02380 Papo_T_antigen T-antigen specific domain Mian N, Bateman A anon Pfam-B_1131 (release 5.2) Family This domain represents a conserved region in papovavirus small and middle T-antigens. It is found as the N-terminal domain in the small T-antigen, and is centrally located in the middle T-antigen. 19.10 19.10 20.20 19.80 18.30 17.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.92 0.72 -4.06 10 268 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 63 6 0 201 0 85.00 40 54.97 CHANGED V.ussasssh-phaCKpWssCh+shs.spCs.CllChL+ppHcp+.KhhR+sPLVWl-CYCa-CappWFGh-loppslhhWspIIupTPhcsLcL .......................l..t.h....cphasKpWshC.pp.p.scCs.ChhC.LchpHhp+.KhhRKp...PLVWl-CYChcCappWFGhsl.TpE.ohpaW.plltpTsa.p.LcL.................. 1 0 0 0 +3657 PF01507 PAPS_reduct Phosphoadenosine phosphosulfate reductase family Bashton M, Bateman A anon Pfam-B_590 (release 4.0) Family This domain is found in phosphoadenosine phosphosulfate (PAPS) reductase enzymes or PAPS sulfotransferase. PAPS reductase is part of the adenine nucleotide alpha hydrolases superfamily also including N type ATP PPases and ATP sulphurylases [1]. The enzyme uses thioredoxin as an electron donor for the reduction of PAPS to phospho-adenosine-phosphate (PAP) [1,2]. It is also found in NodP nodulation protein P from Rhizobium which has ATP sulfurylase activity (sulfate adenylate transferase) [3]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.14 0.71 -4.44 27 7027 2012-10-02 18:00:56 2003-04-07 12:59:11 14 43 3053 30 1986 5609 1987 185.10 24 61.61 CHANGED phshsaShGp-usVhLcLshKshhs..h....................P.llalDTGacF.Eshcah-cltc+h...sls.lhlhtstcshtpths.hs...phapp........hs.lhKscslpp........................................ALcch..p..AhhsGhRRs-st.sRuphtlhphcss.t.......................hl+lh.PlhsWott-lWpYlhtpslPhssLatpG.....................................apolGCh.sTss ..................................................................................................................t..sh.hS.h.G.t-us....V.hL.c....L....s.t..ch..hhs....h.....................................................s..ll..a.l....D..T......s......h......p.........F...........E.s........h........p.....a.......h.........-...............c.....h.........t...cph..............sh.....p....l....h......s....h....p....s............t......s...h..t.....t..t..h......s.........h........t..................t..h.app......................................................h.h.K..sp.s....hpp........................................................................................................................sl....p.....ph..........t...t..u..h...h..sG...h.RR.--s.......t...s.......R.........u........p.........................h.....h..s..h....c..p..t......tt.....................................h.+.....lh..Pl..h...s...W...o..ptDlWpYl...tp.......p.......sl.............h........ss.La...t.t........................................................a..ho...lGsh.hTt........................................................................................................................................................... 0 598 1228 1656 +3659 PF03285 Paralemmin Paralemmin Mifsud W anon Pfam-B_4064 (release 6.5) Family \N 25.00 25.00 25.40 26.60 24.60 24.70 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.94 0.70 -4.92 6 266 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 38 0 120 211 0 209.00 26 60.43 CHANGED h+KQMQEDEtKTRtLEETItRLE+ElEpLEsGsSs.suoKEs.stsu........sPAtpE.p.csl.ssppoPLsTs.ttpchS.oPh+.stussMMpA............................................VVHAV...DGsspNGlpsLSSSEVDELlHKADEVTLuEuutsu...-............sss.s.t+sTP.R+EITGVpA+PtpssstssstpPutEsPVTMlFMGYQNVEDEsETKKVLGlE-.TIKAELVVIEDu-spstsps.....+-pAPPNGSAuEPstsssptEEsphs..ssssssssc....DhshKKQRCKCCoVM ..................................................................................................................................................tp.ppDp.php.LEpsl...R..L..EpE.lp.LEp.t.t.t..hu.spEp..h..t..........................hpc.....c...p............................................................................................................................................................................................................................................................................................................................................................p.PVTMlFMGYQph.-D.pt.Et.pp.hh.uhpt...hlpAElVlIp-ttt.................................................................................................................................................................................................... 0 4 21 51 +3660 PF01508 Paramecium_SA Paramecium surface antigen domain Bateman A anon Bateman A Domain This domain is a cysteine rich extracellular repeat found in surface antigens of Paramecium. The domain contains 8 cysteine residues. 21.20 21.20 21.60 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.79 0.72 -3.76 78 1822 2009-01-15 18:05:59 2003-04-07 12:59:11 11 40 5 0 1522 1826 0 63.40 27 47.23 CHANGED ttTcspCpua.ss....CTs.t..........sG......suClsh...ssCssY.sspssCtps.............ssspChWsss........tsChsto.......Cs ....................h..ocspC.psah..ss............Cos..................sG.......suCl..ph....ssCssa...tspp..s..Chts................ssstChWsss............tpChs.ts......Ct.......................... 0 1522 1522 1522 +3661 PF03210 Paramyx_P_V_C Paramyx_P_V; Paramyxovirus P/V phosphoprotein C-terminal Mifsud W anon Pfam-B_2037 (release 6.5) Family Paramyxoviridae P genes are able to generate more than one product, using alternative reading frames and RNA editing. The P gene encodes the structural phosphoprotein P. In addition, it encodes several non-structural proteins present in the infected cell but not in the virus particle. This family includes phosphoprotein P and the non-structural phosphoprotein V from different paramyxoviruses. Phosphoprotein P is essential for the activity of the RNA polymerase complex which it forms with another subunit, L Pfam:PF00946. Although all the catalytic activities of the polymerase are associated with the L subunit, its function requires specific interactions with phosphoprotein P [2]. The P and V phosphoproteins are amino co-terminal, but diverge at their C-termini. This difference is generated by an RNA-editing mechanism in which one or two non-templated G residues are inserted into P-gene-derived mRNA. In measles virus and Sendai virus, one G residue is inserted and the edited transcript encodes the V protein. In mumps, simian virus type 5 and Newcastle disease virus, two G residues are inserted, and the edited transcript codes for the P protein [2]. Being phosphoproteins, both P and V are rich in serine and threonine residues over their whole lengths. In addition, the V proteins are rich in cysteine residues at the C-termini [3]. This C-terminal region of the P phosphoprotein is likely to be the nucleocapsid-binding domain, and is found to be intrinsically disordered and thus liable to induced folding [5]. 25.00 25.00 38.50 64.70 23.40 19.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.64 0.71 -4.35 21 673 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 103 5 0 640 0 161.50 42 37.80 CHANGED sclcpchcpILsplsol.tlKs-lpsIKs.......olATlEGploolpIh-PGsusss..sssclctssc.pPllssssGcssspltc........................psplthD.Lu+Plsspsppshthssssssho..+..pslpuLIcsphhsscp+pchhphlsps+opp-Ltcl++tIl .......Klcpp.phllpphsSl.hh+uElpplKp.......SlushEupLu.hhIh.PGhupss..shuDlc..tssc.+PllspssGcs.s...Vhp........................tGplhhshh.pPls+.s.p.hhshs..ss..s..s.shu..+..sslRulIpSp.hc.sppthLhohLDshcuhp-ltKh+ph................................ 0 0 0 0 +3662 PF00946 Mononeg_RNA_pol Paramyx_RNA_pol; Mononegavirales RNA dependent RNA polymerase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_586 (release 3.0) Family Members of the Mononegavirales including the Paramyxoviridae, like other non-segmented negative strand RNA viruses, have an RNA-dependent RNA polymerase composed of two subunits, a large protein L and a phosphoprotein P.\ This is a protein family of the L protein. The L protein confers the RNA polymerase activity on the complex. The P protein acts as a transcription factor [2]. 20.60 20.60 24.20 24.20 19.40 19.00 hmmbuild -o /dev/null HMM SEED 1072 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.82 0.70 -13.58 0.70 -7.19 41 1469 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 433 0 4 1221 0 621.10 23 54.24 CHANGED EsHLsSPllppclhhhlp.hsshspshthcsps.h.t.hc..hphhtt.psphhtchtph.pshltpplhsh...........ththl.asph...Lh...ph...phspphpphh+hs..spshshhsptlpphhp.....plshpLsspsphtppttt..hp.................phhplt........................................thht.spahp.h.hWh.h...............chph+pllpphpptppppppshlh..hcspshhlllsspllhlhspp.th..hhhhTa-hlLMhsDllpGRhps.hhsshs.......sphssht.pl...........ppLaplsDpl..hthhGsssYsllshlEslshuhL.QLt-.hh.Lp...GpFhsahhsEltp...tLptpshhsptt...hhp.....................lhslhp.phshc.huElauhaRpaGHPhlc....utpAhcKVRcphstsK..llshpshhcstuhFpthlIsGahcc+st.........................tWPssphshpssp.lpphhcssptloh.phslcpacpastlpFppth-hshspDLohahKDKAlSss+p-Whosa.cp.h............h.....psstsstoRRLl.sFLp-ssFsshphlpYVhstpYLpDs-aslSholKE+ElK.sGRhFAKMoa+hRtsQVluEsLlAspluchF+-ssMstsphcLpKpLhphSp.u..................................................................................................................psscshchsushloTDLpKaChsaRaposshauppLsclaGhssLFpWlHhhlpcSslYluDsasPPpsssp.hsL-psss.........sslhhpsshGGIEGhCQKhWTlloIshlhLsAhcsss+ltullQGDNQsIslTpcV......shshppccptshptsppahptL+pshtslGHpLKtpETllSucahlYSKplaacGhlLspuLKshoRsshhosslh-sspuusSsluTshtchtEpGhshhhuahlshhhshpplhh..........pl....................hashssshspslpphhh.....................ps.shlhth...sllPupLGGl.sahshoRlahRslGDPlTsulAclKch...............Ipsshhsppllphlhs.ppPup.....usahcLssDPaSlNlststssTshLKphspcslhps..osNshlpulapcssppE-cpLupFLhspcslhPRsA+tIhspo.sGtpcpIhGhlDoT+Tlhptshpppslssphlpplhphshpphphhhphhpp ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lp.......uh..................................hs.hh..hh..hhpp.t...........................a..................................................hh.h...tt.....t.h...h......h..h.h...h.h....s.p.hhpDKuhs..pp.h.t.h...........................t......sp+l.l..hltp..hs.....p.h..h.p...thh....p...ph.huhp.KE+Elp...GRh....sh..R.h.hhsE.hltp.hh.hh.t.hhh.s..p..c.h...s.........................................................................................................................t.....t.h....hDhpKas.t.Rhp....hhp.hhsphhGh.phaphhH.hh..shhhhs...s.....t.....h.t....p.........t.h.hh.....GGlE.G.hpQ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 2 2 4 +3663 PF01692 Paramyxo_C Paramyxovirus non-structural protein c Bashton M, Bateman A anon Pfam-B_1202 (release 4.1) Family This family consist of the C proteins (C', C, Y1, Y2) found in Paramyxovirinae; human parainfluenza, and sendai virus. The C proteins effect viral RNA synthesis having both a positive and negative effect during the course of infection [1]. Paramyxovirus have a negative strand ssRNA genome of 15.3kb form which six mRNAs are transcribed, five of these are monocistronic.\ The P/C mRNA is polycistronic and has two overlapping open reading frames P and C, C encodes the nested C proteins C', C, Y1 and Y2 [2]. 25.00 25.00 180.00 179.80 22.20 21.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.42 0.71 -4.70 4 36 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 19 0 0 53 0 194.90 70 97.69 CHANGED M.phlKuhl.LtcRcQEspphophpssoShsSY..SsPTsc+TctsohpSopts+coA...cPolstKscQQ+pt.KIlDQlp+l-SLGcpss.pQ+phlEhLIpKlYptsLGEEhsQhl.LRlWuhEEoPEuspILpMc.chRc.llpMKhERWlRTLlRGKpspL+.FQpRYpEVhPYL.ppKVEpVIMEEAWsLusHllQ- .M.phl+thl..ttRcpEppphopM.SDS.hpSY.sst.psEcTEAGShssSTh.Kcpuh.hcPplpsKpcpp+RRPKIIDQVRRVESLGEQsSQ+Q+HMLEoLINKlYTGPLGEELVQTLYLRIWAMEETPEShKILQMREDIRDQlL+MKTERWLRTLIRGcKTKL+DFQKRYEEVHPYLMhE+VEQlIMEEAWpLAAHIVQE 0 0 0 0 +3664 PF00973 Paramyxo_ncap Paramyx_ncap; Paramyxovirus nucleocapsid protein Finn RD, Bateman A anon Pfam-B_158 (release 3.0) Family The nucleocapsid protein is referred to as NP. NP is is the major structural component of the nucleocapsid. The protein is approx. 58 kDa. 2600 NP molecules go to tightly encapsidate the RNA. NP interacts with several other viral encoded proteins, all of which are involved in controlling replication. {NP-NP, NP-P, NP-(PL), and NP-V}[1,2,3]. 21.40 21.40 23.40 23.20 20.60 21.30 hmmbuild -o /dev/null HMM SEED 524 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.76 0.70 -5.80 9 5874 2012-10-01 19:59:50 2003-04-07 12:59:11 14 1 449 4 0 2227 0 180.70 74 97.00 CHANGED MAoLLKSLALFKRNKDKPPLAuGSGGAIRGIKHVIIVPIPGDSSIsTRSRLLDRLVRhlGDPDISGPKLTGsLISILSLFVESPGQLIQRITDDPDlSI+LVEVlQS-pSQSGLTFASRGssMDDEADcYFoh--Psuu-ppphtWFEN+EI.DIEVQDPEtFNMlLAoILAQIWILLAKAVTAPDTAADSELRRWlKYTQQRRVlGEFRL-KtWLDsVRNRIAEDLSLRRFMVALILDIKRTPGNKPRIAEMICDIDTYIVEAGLASFILTIKFGIETMYPALGLHEFAGELSTIESLMNLYQQMGEsAPYMVILENSIQNKFSAGuYPLLWSYAMGVGVELENSMGGLNFGRSYFDPAYFRLGQEMVRRSAGKVSSsLAuELGITtEEA+LVSEIAupTs-DRssRuoGPKQuQVSFL+sDpu-stp.psut+--t+shQs+tctppu.+ss+hscsoDppsso.sscThlDlDpspEuspDP.ss++SAEALh+hpAMApILccsshssDoshsYND+DLL ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................KVSSTLASELG.ITA.E.D.A....R....LV.S.EI...A....M.H.T..T...ED.R.hSR.A..VGPRQ........AQ..VS.....FL..H..G..DQSEN..EL.P..GLG.G.KE...D...RRV..KQ.SRGEA......RESaRETGs.S.R.ASDARAAHLPTuTPLDIDTASE.uQDPQDSRRSADALLRLQAMAGILEEQGSDTDTPRVYNDRDLL................................................................................................................................. 0 0 0 0 +3665 PF02725 Paramyxo_NS_C Non-structural protein C Bashton M, Bateman A anon Pfam-B_1636 (release 5.5) Family This family consists of the polymerase accessory protein C from members of the paramyxoviridae. 20.60 20.60 20.70 21.80 19.10 20.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.82 0.71 -4.62 6 123 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 31 0 0 111 0 164.30 72 90.13 CHANGED pPSc.l..shhph++shpuGp+PssppctpcssssR.+coLRISsNHApQQhDQs+oAphhphIRDLE+ulssLh+hsss.cpspcpsLpYsVIMFMITAVKRLRESKMLTlSWFpQAL.llssSpEEpcsLppAMhILAplIP+EhL.LTGDLLPuLppp-.LM .........................................................G.LSRPSPSAHWPSRKsWQHGQKYQTTQDRoEPPAcKRRQAVRVSANHASQQLDQLKAVHLASAVRDLERAMTTLKhWESPQEISRHQALGYSVIMFMITAVKRLRESKMLTLSWFNQALMVIAPSpEETMNLKTAMWILANLIPRDMLSLTGDLLPSLWGSGLLM....... 0 0 0 0 +3666 PF01806 Paramyxo_P Paramyxovirinae P phosphoprotein C-terminal region Bashton M, Bateman A, Coggill P anon Pfam-B_1628 (release 4.1), Karlin D Domain The subfamily Paramyxovirinae of the family Paramyxoviridae now contains as main genera the Rubulaviruses, avulaviruses, respiroviruses, Henipavirus-es and morbilliviruses. Protein P is the best characterised, structurally of the replicative complex of N, P and L proteins and consists of two functionally distinct moieties, an N-terminal PNT, and a C-terminal PCT [1]. The P protein is an essential part of the viral RNA polymerase complex formed from the P and L proteins [1]. P protein plays a crucial role in the enzyme by positioning L onto the N/RNA template through an interaction with the C-terminal domain of N. Without P, L is not functional.The C-terminal part of P (PCT) is only functional as an oligomer and forms with L the polymerase complex. PNT is poorly conserved and unstructured in solution while PCT contains the oligomerisation domain (PMD) that folds as a homotetrameric coiled coil (40) containing the L binding region and a C-terminal partially folded domain, PX (residues 474 to 568), identified as the nucleocapsid binding site. Interestingly, PX is also expressed as an independent polypeptide in infected cells. PX has a C-subdomain (residues 516 to 568) that consists of three {alpha}-helices arranged in an antiparallel triple-helical bundle linked to an unfolded flexible N-subdomain (residues 474 to 515). 25.00 25.00 164.00 163.80 19.90 19.00 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.64 0.70 -5.12 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 20 2 0 49 0 240.10 59 42.99 CHANGED EESToSs-EMATLLsSLGVIQSApEFELSRDASaVFA+RsLKSANYAEMTFNLCGLlISVEKSp-sKV-EN+sLLKQIQE-lcShRDlHKRFSEYQKEQNSLlMSNLSTLHIITDRGGKTDsP-soTRSPSVFTKuKENKlKKTRFDPSMETLGspKaKPDLIREDEhRDEI+NPVhpEpNs-scASNASRLlPS+EKsTMHSL+LVIENSPLSRsEKpAYIKSLpKCKTDQEVKsVMELFEEDI-SL ..tESsp.hcchhTLLpsLGVIQSspch-..pDtphVhstpsLpsAshAphh..lsGLllusphspssKlsp.pp.lhplppslcphc-.a+Rh.E.QKEQ.SLlhS.lSsL+IhT-RGGKpDps-ossRos.lhsKsKEpKhKtTRFDP.MET.G..K.hPDLhRcsEhpsE.cs.VhpEhsopsctSNAoRLlPp+ppsTM+SLhlVIpsSsLSpupKtuYIppLp+CKoDpEVpplM-hhpEDlpS... 0 0 0 0 +3667 PF01279 Parathyroid Parathyroid hormone family Finn RD, Bateman A anon Prosite Family \N 19.70 19.70 20.90 20.00 19.40 19.30 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.49 0.72 -3.37 11 168 2009-09-12 04:59:12 2003-04-07 12:59:11 12 2 66 21 66 160 0 89.80 41 64.87 CHANGED pKRSVSctQLMHD+G+sLp-hcRRhWLQcLLc-VHTAphht................................shstsu.p.KPsssTKNlP.tatL.-pEup..sLsQETpKs.saK-....p.hcs.shKKKsKs ........KRuVSEhQLMHD+GKslQshcRphWL+chlp-lHTAphc.................................sss..csussp+Pt..tppcNh................-sc...t+.....Lspt.sKs............................................................................... 0 3 8 27 +3668 PF02195 ParBc ParB-like nuclease domain SMART anon Alignment kindly provided by SMART Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.10 0.72 -3.88 51 10329 2012-10-01 20:12:50 2003-04-07 12:59:11 13 78 4621 22 2371 7819 3243 91.10 27 28.42 CHANGED hplslsplp....sppps+........tpplccLhpoIcp..................pGhh.pPllVcppt.....shapllsGcRRhcAsphhG......hpcl.....ssllhphs........cppththslh-Nh ..........................................................................h..lslsplt.....ss.h.pPRph...........spppl.p-.L.s.pSIcp......................................pG.ll...p.....Pl..lVcptt...........................stYpl..l..s.G.c.R..R.h.+......A.s.p.h.hu.........................hppl.......P..s..l.l.t.phs........cpph.h.t.h.slhcN............................................................ 0 734 1530 2000 +3669 PF00644 PARP Poly(ADP-ribose) polymerase catalytic domain Bateman A, Griffiths-Jones SR anon Bateman A Family Poly(ADP-ribose) polymerase catalyses the covalent attachment of ADP-ribose units from NAD+ to itself and to a limited number of other DNA binding proteins, which decreases their affinity for DNA. Poly(ADP-ribose) polymerase is a regulatory component induced by DNA damage. The carboxyl-terminal region is the most highly conserved region of the protein. Experiments have shown that a carboxyl 40 kDa fragment is still catalytically active [2]. 23.90 23.90 23.90 24.00 23.60 23.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.32 0.70 -4.90 28 1951 2012-10-01 23:25:29 2003-04-07 12:59:11 15 267 268 92 1317 1843 56 176.60 20 23.35 CHANGED Lps.plpsLcpsScEaphlppYhpsTtsss+t.....shplhclF+lpRpuEtccFpttcp.......hcN..............+hLLWHGSRloNasuILspGL+ls.spuPlsGhh.....FGKGlYFADhsScSA..pYshs..sts.......ss.....suh......hlLu-VALG-.h.clhtsp..h..pp...shpSstGhGcstP......tt......ps.....lPtucs.hssthpsst...l..sEYlVYcssQl+h+YLlclph ............................................................................................................h..tp.pat.............l.p.h.p....s.h.t......................thp...l......p....l....c.....l....p...p.......t.........pp..apt.tpp..........................................hs.................................cphLaHG.o...p.h....p.........s..h.................t..u..Il...p...p..Gh....................s..............h..s....s...h.p.....G..t..h............................aGp..G.l.YF..A..s..p...s..o..h.Ss...........p..Ysts......ts................................ps.......th........................................hh..lsc.VhlGp..................................................hh.u....t.............................................................................................................t.alla.p....t...ph..tall.h..h................................................................................................................. 0 534 710 997 +3670 PF02877 PARP_reg Poly(ADP-ribose) polymerase, regulatory domain Bateman A, Griffiths-Jones SR anon Bateman A Domain Poly(ADP-ribose) polymerase catalyses the covalent attachment of ADP-ribose units from NAD+ to itself and to a limited number of other DNA binding proteins, which decreases their affinity for DNA. Poly(ADP-ribose) polymerase is a regulatory component induced by DNA damage. The carboxyl-terminal region is the most highly conserved region of the protein. Experiments have shown that a carboxyl 40 kDa fragment is still catalytically active [2]. 26.80 26.80 26.90 27.00 25.50 26.70 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.57 0.71 -4.49 35 515 2009-01-15 18:05:59 2003-04-07 12:59:11 9 63 207 33 360 542 8 135.40 33 17.01 CHANGED cScLstpVQ-LlpLIFDhctMppsMtEhpaDscKhPLGKLSpcpIppGYplL+clpchlp...............tssppstl.c......LSNcFYThIPHsFG...hp+PP..llcotchlKpKlchLEuLp-I-lAspllcsspssc....hssLDppYc ..........................................................spLs.tlpcLlphIash..ch...h...c..psM.h...c.h.................p.hD.h.p....K..hPLG.KLS+ppI.ppGapsL...pclpphlp................................................ts.s.pp.ppl.-...........lSscFYTlIP.H.s..F.G....h.pp.PP.....lIss................ctlp........p.K.l.c.hL.......-sLt.DIElA.pLlpsspp..sp.........cPlDtpYp............................................... 0 142 203 291 +3671 PF01358 PARP_regulatory Poly A polymerase regulatory subunit Bateman A anon SCOP Domain \N 24.00 24.00 24.50 25.00 23.70 23.90 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.90 0.70 -5.77 6 111 2012-10-10 17:06:42 2003-04-07 12:59:11 13 2 69 24 18 112 3 256.90 44 70.01 CHANGED huhcKP.hhYFcEIssEh-YcsEstsph..pKhPhQGQLKLLlGELaFLs..pLp++shLsu.sslVYIGSAPGsHIpaLh-aapshsl.IKWhLlDGRsHDspLpuLps....Volls+FVDEcYlpph+pt..hph.+llLISDIRSpRG..pEPoTcDLLpDYuLQN.MlSlLKPlASSLKWRCPFPDQW...I+-FYlPcGpEhLQPFAPsaSAEMRLLSlaousshpLpsloppcuhpYEKKMaYLNphlR.+IllsFDYsNQcYDaFaMa+hL+Tlhhs..KoFsosKsKVlalppSIF+hLsI .........................h.thpcP..hhahp-lstth-Ycscsspp...h.....KKhs..apGQhKLLLuELhFLo......+LpR+..Gh...Ls......u......ssVVYlGSAPGoHIpaL......c.-hF.shs..l+WhLIDsRpHDs.h.LpuLcs......VoLls+Fs.sEp...hl+..pl+cp..hp..tcIlLISDlRStc..s.......ssEP..s.T...tD....L...L.p...sYsLQNhh..lplLpPhAS.LKaRsPFP...DpW.............hccah..lscGschLQsFAPphSuEhRLlslhss.pshp.hppls.p.DshpYE++MaYhNpllR.phl...lsFDYsspp..Y.DaaahahhL....ps.lh.s..p.a..s.ptpll.hppthFp.lt................................................. 0 14 16 18 +3672 PF00740 Parvo_coat Parvovirus coat protein VP2 Bateman A, Finn RD anon Pfam-B_436 (release 2.1) & Pfam-B_445 (release 3.0) Family This protein, together with VP1 forms a capsomer. Both of these proteins are formed from the same transcript using alternative splicing.\ \ As a result, VP1 and VP2 differ only in the N-terminal region of VP1.\ VP2 is involved in packaging the viral DNA. 19.80 19.80 20.00 20.00 18.20 19.70 hmmbuild -o /dev/null HMM SEED 529 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -13.16 0.70 -5.55 24 3412 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 190 158 0 2269 0 301.20 29 80.82 CHANGED G.......suGuGuuGVGsuoGsWcssopapt.....spVospsTRphhLsh.ssc...Ypplp..ssspos.............sssahshsTPWuYhDhNtassaFSPpDWQpLlNshtph+PcuhphcIFNl.lKpVTp....sss.ssshsNsLTuslplhsDssapLPYs.ussppsshs.aPhcsahlsQYuYh.....................sohssssss...p.scc........osFasLEpt.shphLRTGspFph.oYpFp.shPh+psasappshphhhNPlhpphhhthsshsssss.st......hppsptss..httpspNalsG............Pt.....h.ppshpsssusshpsshs....................ts.tthssppsshsst.shspptspssstt......hppphtass.s.puscssp.thspphshscptht.....t..t.p.hh.......hssss.uspos.ph...........sslhshhhhsuulWpscslYhpG.IWsKhPcsDt+h+hp.sthGshshppPPsQlFlKhsPsPss...p..ss.usssShIspYuThphpsclpaclc.+csoppWNP.hQhohss.......s.tshlsas .................................................................................................tt......G...spu.a.tts.h.........lhsp.oR..hh....tc...Y+.....s...................p..hth.TPWthhshNtht.aFsP.-aQhlhpph.tht..thp.pl.tl.l..Kpl.p.....t.................................................................................................................................................................................................................................................................................................................................................................................................................................ss.pss.ph...............ph.h..stl.s..slh.ps.lWsK..phD.p.hhp.s..s.hs.ppsPsplFlKhh...ss.......t..s.....ohls.Yssh.hpsph.acht..h.totpWpP..p.s............................................. 0 0 0 0 +3673 PF01057 Parvo_NS1 Parvovirus non-structural protein NS1 Finn RD, Bateman A anon Pfam-B_400 (release 3.0) Family This family also contains the NS2 protein. Parvoviruses encode two non-structural proteins, NS1 and NS2. The mRNA for NS2 contains the coding sequence for the first 87 amino acids of NS1, then by an alternative splicing mechanism mRNA from a different reading frame, encoding the last 78 amino acids, makes up the full length of the NS2 mRNA [2]. NS1, is the major non-structural protein. It is essential for DNA replication. It is an 83-kDa nuclear phosphoprotein. It has DNA helicase and ATPase activity [1]. 20.00 18.00 20.00 18.20 19.90 17.90 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.45 0.70 -5.53 24 1228 2012-10-05 12:31:08 2003-04-07 12:59:11 12 11 210 5 11 1432 7 174.20 34 51.93 CHANGED p......................spspcca..............hsLlchLl........ccGlsoEcpWhths..ppYhphpssssutpplcsuLphsppchssstsshcalsptsss............hshppN+lhplhphpGYsPhhsGphlhsWhs+phGKRNTlWha.........................GPusTGKoplAp...................AIApslPhaGsV..NWsNcNFPFsDsss+hll.WW-EGhhpsphVEssKulLGGpsl+VD.....pKs+sosplpsTPVllTSNs-hshV.hsGsssohtHtps............Lc-RMhphphscplsss....aGhlopp-l+pah.pWupp.........thphshphh .....................................................................................................................................................................................................................................................................................................................................................................................................hh...........................hlst....h.......h.G..hl....Nh..N.....s..FsF..s...D...st...t.+...ll.haE.E..s..h..h..p...p..s...hV...E..s..uKsILG.Gp.tsRlD......Kt+sS.h..l..sPVlI.o..oNtD..l....h.l.ssGssso..Htts............lppRhh.hph......h..s.....hs.h.....h..hh.........................h................................................................. 1 9 9 10 +3674 PF00989 PAS PAS fold Bateman A anon Sequences from SMART alignment Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.22 0.71 -10.01 0.71 -4.29 49 15580 2012-10-04 01:10:46 2003-04-07 12:59:11 19 1711 3946 104 4551 43707 3363 102.20 15 16.49 CHANGED pchptllcs...hssslhshD..tsGtlhhhNsshpplhGhst..pchhGpslhphl.ttp.......hhptltphhtstpptpshphphth..........puphhhhplpssshhstssp...shhshhpDl .............................................t..hptllps.........hs..s..s..l....l...s....l......-.......pp.....G.......p..........l..........h...h....h..Ns.s....s.p....p....l......h.....G.....h.......st.......p..-........h..........h.......G.......p.......s.........l....h....p.....h.......l.............................................hh...p..h..h....t.......p......h.....h..........t......t......p......................t...........p...h...h.....................................t.......h.....h.........hp..h........h.......................................................................................................................................... 0 1473 2845 3740 +3675 PF03793 PASTA PASTA domain Yeats C anon Yeats C Domain This domain is found at the C termini of several Penicillin-binding proteins and bacterial serine/threonine kinases [1]. It binds the beta-lactam stem, which implicates it in sensing D-alanyl-D-alanine - the PBP transpeptidase substrate. It is a small globular fold consisting of 3 beta-sheets and an alpha-helix. The name PASTA is derived from PBP and Serine/Threonine kinase Associated domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.77 0.72 -4.32 237 11616 2009-01-15 18:05:59 2003-04-07 12:59:11 14 55 2312 34 2319 8832 1936 62.10 23 21.30 CHANGED splP.slhG....hshpcApp.hLpp.......tGl........................ssshspGp...VlpQsPss.....Gspl.pp..us.plplhlup ..................tlP..s.l.hG............hshppApp..tLpp.........tGl...........ht..t................ssphs.pGp.......V........l.....p.......Q.....s.P.s.s.........Gsp..l..pp....ss..plplhlu.................................. 0 910 1702 2086 +3676 PF00292 PAX 'Paired box' domain Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.47 0.71 -4.42 6 1629 2012-10-04 14:01:12 2003-04-07 12:59:11 13 23 304 7 623 1527 5 115.90 67 31.76 CHANGED GpGcVNQLGGVFlNGRPLPNpIRp+IVEhAppGlRPCsISRQLRVSHGCVSKILsRYpETGSIRPGsIGGSKP+.VsTP-V.s+IcEYK+psPuIFuWEIRD+LLp-GVCDppslPSVSSISRlLR .......................................................putVNQLGGVFVNGRPLPs..sl..Rp+.IVE.LA.+.p.G.l.R.PC.DI..SR.Q...........L................+..............V...............SH............GCVSKILu........R........Y.hETG.S..I..+.P..G.s.I...G.G..S.K.P..............+........V.............A...TP.....c..........VVp+Itp.YKRc..sP..shF....AW..EI...RDRLLs..............-Gl...CspsslP.....S........V......SSIsRllR........................................................ 0 161 211 439 +3677 PF03535 Paxillin Paxillin family Griffiths-Jones SR anon PRINTS Family \N 19.20 19.20 19.30 19.50 19.00 19.10 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.48 0.71 -4.50 6 135 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 44 6 60 102 1 163.20 41 33.41 CHANGED ssPP.V.sPPSu-sLNGo......pWs.usppatsppP..ppsPhh.sStsKs..........SSsssssuEEEHVY........................SFPNKQKSuEsSsssMoSSLGSNLSELDRLLLELNAVQcSsP.uFPu-Ett.uPPLPuusss+Y.l.ENusSsssKsuPPspEKPKRN.uG+slEDVRPSVESLLDELESSVPSPVPsspsup.u-hsuPQcsssSQQ ..........................................................................................................................................tp.s..u.s...s..s..s..t.s...u-c-HlY........................Sh...PsKp...+S...u-Puss.s...h...SSS...LGoNLSELDRLLLELNAVQaNss.....u.........Fss..p...............-t......t...usshPss............Epsss.shpsss....c+.P.ts..sphh--hRPoVEoLLspLEsuVPs..........ss........s...p..t.p........................................................................................ 0 4 9 28 +3678 PF03717 PBP_dimer Penicillin-binding Protein dimerisation domain Yeats C anon Yeats C Domain This domain is found at the N terminus of Class B High Molecular Weight Penicillin-Binding Proteins. Its function has not been precisely defined, but is strongly implicated in PBP polymerisation. The domain forms a largely disordered 'sugar tongs' structure. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.03 0.71 -4.21 165 11499 2009-09-13 02:10:02 2003-04-07 12:59:11 10 19 4376 46 1966 8801 4788 176.10 24 27.45 CHANGED hl.stRGpIh........DRs......Gph.LAsstsshs.......lhhsspphpptpt......................................................hpcLuplL.t....hs....pclpp...........................................................................................................................................tttpshp.lhltc............................pls.....cphsplpphthp.....................Glthp.sp..pRhYPp..us...hsupllGalst.......................................................t.thG....psGlEpta-phLpGpsGtpp.hps..cstGp.hltphp ...................................................................................................................................................................................................stRGpIh...............DRs......Gps...LA.tstsshs...........l..hh..s.....p..p.ht.p.t.t.p..h..............................................................hppLupll...p......hst.........pplpc........................................................................................................................................................................................................................................................................................................................................................................................t...pt..t.p..p.h..p..hh..hltp...................................................pl.s.....pp.h....t...t....lpp..hthp...........................................Gl.php...sp...pRhY..P..t...Gp......hhupllGassht....................................................................t.ttthG...ppGlEpta.-...........c......h...........Lp....G.p.s.Ghpp..hph....Dph.Gphl...tt........................................................................................................................................................ 0 675 1302 1665 +3679 PF01395 PBP_GOBP PBP/GOBP family Bateman A anon Pfam-B_1765 (release 3.0) Domain The olfactory receptors of terrestrial animals exist in an aqueous environment, yet detect odorants that are primarily hydrophobic. The aqueous solubility of hydrophobic odorants is thought to be greatly enhanced via odorant binding proteins which exist in the extracellular fluid surrounding the odorant receptors [1]. This family is composed of pheromone binding proteins (PBP), which are male-specific and associate with pheromone-sensitive neurons and general-odorant binding proteins (GOBP). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.75 0.71 -4.49 158 2289 2009-01-15 18:05:59 2003-04-07 12:59:11 17 9 218 135 802 2480 0 116.50 15 75.34 CHANGED hhhhh..ht.........hhtphtpph.........pphhppChpc...h.slsp.ctlpphpptphsss.........pphcCahpClhp.p..hshhs....psupl..p..hctlhphhtthhtt........pphpph.....lppCt.......stssss...CcpAap.hhp....Chhpppt ........................................................hhh.....................htthhptChpp...............tlst...p.t...h.p.p.h.t.p.h..p..h.ss..............pphpChhp.Chhp.c..hs.l.hs.........ps.u.p.h......p...hc..p..h.hp.h.htthhtt..........pphtph......hppCt..............ttptps.......tC...p...t.uhp.hhp....Chhp...h................................................... 0 184 287 686 +3680 PF00427 PBS_linker_poly Phycobilisome Linker polypeptide Finn RD anon Pfam-B_159 (release 1.0) Family \N 24.60 24.60 24.90 30.90 22.60 24.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.86 0.71 -4.48 152 614 2009-01-15 18:05:59 2003-04-07 12:59:11 16 8 108 15 206 625 257 129.90 39 46.64 CHANGED hpsssspschp..plIpAsYR...QlaspphshpspR..hsslE.SpL+sGpIoVR-FlRuLupS-hY+ppFapsssshRhlELsa+HlLGRuPhsppEhtta.plluspGhpuhIDuhl-SsEYtpsFG-csVPY.Rs ...............h....hopsphptlIcAuYR...QVhu..pp..h...h.hp..spR...hsshE.SpLcsGpIoVR-FlRuLApS-hY+cpFapsssshRhlELsa+HlLGRuPhsppEhttahplhus...........pGapAhlDuhlDStEYtcsFG-ssVPY.R............. 0 28 118 181 +3681 PF03792 PBC PBX; PBC domain Finn RD, Mistry J, Burglin T anon Pfam-B_3021 (release 7.0) Family The PBC domain is a member of the TALE (three-amino-acid loop extension) superclass of homeodomain proteins [1][2]. 25.00 25.00 26.00 27.40 20.10 23.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.37 0.71 -4.61 8 415 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 103 0 176 302 0 161.20 75 49.67 CHANGED Shu.....phtpslu-lLQQlhsITDQSLD.EA.QARKHuLNsHRMKsALFsVLCEIKEKTsLShRss.......E--PPDPQllRLDsMLlAEGVuGP.........-cuGstuAsuuusss......ssohEHuDYRAKLuQIRpIYHuELEKY-pACsEFTsHVhNLLREQSRoRPISs+EIERMVsIIpRKFsuIQhQLKQSTCEAVMILRSRFLD .......................t.........p.hptlhtI.s-psLD.-s..pt+..KasLss.HRMKPALFs.VL.CEIKEKTs.LSIRusQ......EE-PsD..PQLMRLDNMLLAEGVuGP.........EKGGGuA....AAAA..AAAAoG......sssDNSlEHSDYRAKLuQIRQIYHoELEKYE...........QACNEFTTHVMNLLR.EQSRTRPIoPKEIERMVuIIHRKFSoIQMQ....LKQSTCEAVMILRSRFLD.......................... 0 32 51 106 +3682 PF02229 PC4 Transcriptional Coactivator p15 (PC4) Bateman A, Mian N anon Pfam-B_6534 (release 5.2) Domain p15 has a bipartite structure composed of an amino-terminal regulatory domain and a carboxy-terminal cryptic DNA-binding domain [1]. The DNA-binding activity of the carboxy-terminal is disguised by the amino-terminal p15 domain. Activity is controlled by protein kinases that target the regulatory domain. 20.60 20.60 21.10 20.90 20.00 19.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.70 0.72 -4.74 203 1136 2009-09-14 14:13:59 2003-04-07 12:59:11 11 16 977 22 344 696 66 53.70 41 52.06 CHANGED sh......hph..utp+clslppapGp.shlDIR-aat..c.s.t..hP..spK.GIoLoh-p......hptLtchl ...................u..spp.GapKclshlSaNGt.sKaDIRpWss...D+s.+..........huK....G..ITLosEE......appLhct.h................ 1 134 213 285 +3683 PF01851 PC_rep Proteasome/cyclosome repeat Bateman A anon [1] Repeat \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.40 0.73 -7.87 0.73 -3.42 157 1407 2012-10-11 20:00:59 2003-04-07 12:59:11 17 15 352 4 875 1693 10 34.70 28 7.63 CHANGED uAshulGllttG.sssp...tshphLp..ph.h..sspsshtt ...uAhhuLGLlhtG.ossp..pllphLhsh.hp.pspp.h............ 0 256 433 695 +3684 PF01135 PCMT Protein-L-isoaspartate(D-aspartate) O-methyltransferase (PCMT) Finn RD, Bateman A anon Prosite Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.23 0.70 -4.80 9 3573 2012-10-10 17:06:42 2003-04-07 12:59:11 14 23 2359 22 1324 13744 3799 199.20 32 79.48 CHANGED tp.pptLlcpL+ppGhlto-+VhcAMtsl-RpcFlsc.....tsYhDsPhsIGas.....sTISAPHMhAhhhEhLc..LpsGh+lL-lGoGSGYhTAshAphVGppG.....hsluIE+I.cLstpuccNlcp.shp......sVhlhhGDGphGasthAPYDAIaVuAAuPclPpsLlcQLcpGGRLllPVG...stpQhlphh-Kps.Gpl.h+shtsVhaVPLsspct .....................................................................................p...thl.p.p.h..h..t...t.s...l.p...s.tp..VlpAht......t..l...P..Rc.h.Fl...............s..........t..........s........h...t...........p.....t......A.....Y......p.......s......t......s....l...P..I..u..t.u............................po.I...S..p..P.....h...h.....l......A.....+...M...h...c.....h....L.p..............l..p......s.........s....s.........+....V..L.EIGo.G.S.G.Yps..A....l....L...A...c...l....s........t.....c....s...h..........................ol.E......+.h.p........s.....L.t....t..p.A...+.....p.....p.....L.....c....p....h...s.h.p.......................................N.l.p...h.....t.....h.....G......D........G.....h.......p.........G......a.......s.....s........p.........A.........P...........a.....D......s......I.......l......V........s.......A......A........s.........s..........p..........l........P.........p........s..........L..........h........p........Q........L....p...............GG..h.....L.......V...h.......P...l.........G..........t.........tt............Q.....h.l.h.h.l.p..+.....p..s.............s...ph.....p..l..s..haVPlh....t......................................................................................................... 0 387 810 1083 +3685 PF00705 PCNA_N PCNA; Proliferating cell nuclear antigen, N-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_598 (release 2.1) Domain N-terminal and C-terminal domains of PCNA are topologically identical. Three PCNA molecules are tightly associated to form a closed ring encircling duplex DNA. 20.30 20.30 20.60 21.70 20.20 19.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.51 0.71 -4.62 13 917 2012-10-02 11:47:48 2003-04-07 12:59:11 13 9 610 146 433 892 363 118.30 32 46.62 CHANGED MhEARLlQGSlLKKVLEulKDLls-AsFDCSuoGlsLQAMDSSHVuLVuLpLRSEGF-cYRCDRNluMGhNLsSMuKlLKCAGN-DIlTl+A-DsuDTlshlFEsssp-+ls..DaEMKLMDlDsEa .........................hEsch.pu.slLK+ll-ul..+..-L.lp-..ssa...........-.ss.p.sGlp....lQuMDsSHVuLV..sLhL.pscuFp.c..Y..+..C..D...+.........s.h.sl.GlNls.shsKlLKpus..sc.D..hlp.l..c.t...........c.-.s.s......-..s.l..s..h....h..h..Es.p..pc...+...hs..phchh.hp.............................................. 2 146 247 355 +3686 PF02747 PCNA_C Proliferating cell nuclear antigen, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_598 (release 2.1) Domain N-terminal and C-terminal domains of PCNA are topologically identical. Three PCNA molecules are tightly associated to form a closed ring encircling duplex DNA. 20.60 20.60 20.60 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.51 0.71 -4.22 12 906 2012-10-02 11:47:48 2003-04-07 12:59:11 10 9 585 135 428 908 335 118.50 31 47.57 CHANGED GIPEpEYsslV+MPSuEFARIC+DLSpIGDsVVISsoK-GVKFSssGDlGsuNIhhpQsosVDK.E-AshIEMsEPVsLTFALRYLNtFTKATPLSspVTlShSu-lPlVVEYKIA-MGal+aYLAPK .................................................................................IP-...cY..sshlphsSsEFt+Is.+DLpp.h.u..-.....s......l.h..I..ps..s..K.-...u.l..pF..s..sp.G..-.......hG.s.uslhl...p....p.........p......h...-..c.......-.p.......t.....l...plph...p.-.s.Vs...h.oF..ul+YL...ss.F...s.K.A.s.s.L.u.s.pVplph.us.-..h.P.lhlcY.p.......l......t.......s.....h.....G.h.l.paaLAP......................... 0 155 249 354 +3687 PF02429 PCP Peridinin-chlorophyll A binding protein Bateman A anon Pfam-B_2945 (release 5.4) Domain Peridinin-chlorophyll-protein, a water-soluble light-harvesting complex that has a blue-green absorbing carotenoid as its main pigment, is present in most photosynthetic dinoflagellates. These proteins are composed of two similar repeated domains. These domains constitute a scaffold with pseudo-twofold symmetry surrounding a hydrophobic cavity filled by two lipid, eight peridinin, and two chlorophyll a molecules [1]. 20.10 20.10 22.20 21.40 18.80 18.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.90 0.71 -4.08 9 449 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 30 13 0 428 1 140.30 71 79.66 CHANGED Dp.IGcAAKpLSEASYPFlK-IDWhSD....lYlKsLPut.ss.pshcAIDKMIhMGAshDsshLKAAApAHH+AI.GSID.ApGVTShADYsAVNAALGRhlASVPKupsMDVYNuhAsh.hssslsstMFupVNshDApAAhKAFhsFKDVV ......................DcIGsAAKtLuDASYsFhK-lDW.ss....laLp.hPGp.ps.csLKAIDKMI.MGAthDspLLKtAA-AHHKAI.GSIs.spGVTShADa-AVNAALGRlVASVPKQpVMDVYsuhtcI.sDPpVss.MhShVNshDA.pAhpGFhpFKDVV........ 0 0 0 0 +3688 PF01884 PcrB PcrB family Enright A, Ouzounis C, Bateman A anon Enright A Family This family contains proteins that are related to PcrB Swiss:Q53726. The function of these proteins is unknown. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.59 0.70 -5.11 5 644 2012-10-03 05:58:16 2003-04-07 12:59:11 12 1 623 16 186 1485 1169 227.50 42 96.70 CHANGED a-llEc+t.lHlTLLDP--ssPEEtlEll...t-uGTDAIMlGGSs..tuVsLDNslRtI+Kl..hsLPIILFPGsssGlSRYADAlFaMSLLNSsNsaWIlG.......ApsLGAtTlpKhs..lEslPMGYlVlEPuss..VGaVG-A+.lPpNKPcIAAhYsluucaLGMRlhYLEAGSGAstsVsEEslclsKsLscssLIVGGGI+SuEpA+chlcuGADlIVTGNllEEssp.lEctl+sltcst .................................................................................................................................................................................phppW+...HlFKLDP.s...K....p.l....s....D...-.....s...L...-tl...........s.SsTDAlh..lGG..oD.....sVT......DNV...l.+..l..hscl+................caslP..lVLE....lSsl.-u.l...h....P....G....hDh...YalPoVLNSp.cstah.sG.............................hphEAl.K..paG.chhs.....a-Ellh...EGYlVl....Ns-..uK.........VAp.lTcAp.ss.l.s.p-Dlt.AY.Aph.Asc...........hh+l..P..lh.Y.lE.Y.....S..G....s..Y..G...D...l......p...t...V...p...s..l...s...p...p...L.....s.......c.......s....p.......L.....a...Y..G.GGI.pstcpApEMAph..ADTI.VVG....s....l....I....Yc....D.l......cpA.LcTV...ch.......................... 0 58 123 158 +3689 PF04194 PDCD2_C Programmed cell death protein 2, C-terminal putative domain Wood V, Finn RD anon Pfam-B_19053 (release 7.3); Domain \N 30.00 30.00 30.90 30.40 29.60 29.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.31 0.71 -4.65 24 515 2009-01-15 18:05:59 2003-04-07 12:59:11 8 14 299 0 347 505 5 170.90 25 44.09 CHANGED P.......a.p.a.lss-.Eshp...........shsp.spptplssh-p.tstts...sup....-tc-hhEt......stD+sFp+FpcRlupsPEQlLRY............pasGp..PLhhSpsssssch..........................lPpCs..CGupRlFEhQlhPphlshLcsc...p.s.........uh-WGTlllhsCs+sC.......stsGYh.EEashVQ.-. ..............................................................................................h......h.l.ht.E..................t....p...p..p..tp....h.....t..pp..t.tt..ttt........tt.....................tpp-thEt.........ptD.cs.Fp+Fpp+lu...t.s...P...cQlLRY....................shs..Gp......P.L.hh.osp..s....s.t.p.t............................................lPpCs..CG.upRhFEhQl...hPpllshLchs.....p.s.................................................uh-WG....TllVaTCt..psC...................tst.sah..cEalhlQ........................................ 0 120 191 285 +3690 PF04868 PDE6_gamma Retinal cGMP phosphodiesterase, gamma subunit Mifsud W anon Pfam-B_4858 (release 7.6) Family Retinal rod and cone cGMP phosphodiesterases function as the effector enzymes in the vertebrate visual transduction cascade. This family represents the inhibitory gamma subunit [1], which is also expressed outside retinal tissues and has been shown to interact with the G-protein-coupled receptor kinase 2 signalling system to regulate the epidermal growth factor- and thrombin-dependent stimulation of p42/p44 mitogen-activated protein kinase in human embryonic kidney 293 cells [2]. 25.00 25.00 67.70 67.60 20.70 18.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.88 0.72 -4.13 5 124 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 56 4 53 95 0 77.10 77 91.72 CHANGED PPcuT..opusPsAosGPTTP+KGPPKFKQRQTRQFKSKPPKKGVpGFGDDIPGMEGLGTDITVICPWEAFSHLELHELAQYGII .............stsphhusstsstGPsTPRKGPPKFKQRQTRQFKSKPPKKGVpGFGDDIPGMEGLGTDITVICPWEAFsHLELHELAQYGII...................... 0 2 6 23 +3691 PF00233 PDEase_I PDEase; 3'5'-cyclic nucleotide phosphodiesterase Finn RD anon Prosite Domain \N 24.80 24.80 24.90 24.80 24.40 24.70 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.49 0.70 -4.61 8 2707 2012-10-01 20:28:14 2003-04-07 12:59:11 14 49 307 332 1579 2650 61 218.10 32 32.64 CHANGED YHNhhHAhDVoQosahLLtssulcphho-lElLAhlhAAhhHDlDHsGToNpFplpo..cS-LAlLYssc.SVLENHHluhuh+LLQsEphsIFpNLo++capplhchlh-hlLATDMStHhphhccl+shlpptch............h.hs...c+hpllsLll+AADLSssTKsaplp+RWsthlhtEFFpQGDhEpphGhc..pPMCDRcsA.hlspsQlGFIDaIscPlaplLsDlscc..spslh-tl-sN+ .................................................................YHN....hHAhsVsps......h.............ah.h....l...............................p.................s..........t.................l..................t..............t..h..............h..........s..s..........l....E.h.............h.A.hlhAAhhHDl..D.HsGhsNs....Fhlp.o..................ps..L...A..h..l..Ys......cp....SV.LE..........s..HHh.uhu.h.p.l.L...........p..........p..p..p......h..sI..h..........p.....s..........L...s...............p..c.............p......h............p............p..h.........c..p.........h........l...hphlLAT.D.h..........u.......p..H...h..p..h......l...s..p...h....p....p....h.l.ppt.ph........................................................h..p.........c+h.hl...h.p....h...h.l+.s..uDlSss.................s..+......shc.lpp............p...W...sp...............tlhpEF...ap.Q.................G.D.........hE+..p.h.........s............h....h.............s......P...h...h....D...R............p..............p...s...p................l...s.......p....Q..l.u.FIca.l.s.......hPha..p.t.h..sp.h..h.................................................................................................................................................................... 0 624 791 1167 +3692 PF02112 PDEase_II cAMP phosphodiesterases class-II Mian N, Bateman A anon IPR000396 Family \N 23.70 23.70 23.70 23.70 23.20 23.60 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.20 0.70 -5.52 5 323 2012-10-02 15:46:01 2003-04-07 12:59:11 10 6 249 0 179 390 31 254.00 25 76.11 CHANGED Fss..........ssLGQsGGlp-GstouaLlcccusssFlpLDuGollpuls.cLslSKahusshsITlPp.s...h-sushtKsoahlcs+IpsYaIoHuHLDHVuGLVINSPshh.t......p...........................sKKTIaGLsaTIcsLpKHlFNsplWPNLsutGph.h............h.plshh-LsPuEasslTtTThSlls.........................FPlsHuuulhp...hh.....STuFLF+DslS......s-sIlsFGDsEsDpssup.ShptcIWuslAshItpsKLKuIlIECSsPp-oPDspLFGHLoP+aLlpELspLp.......ohssSou.sLssLN...VIloHlKsslAcss......NPccsILtpLcpLsEtssLG.VsIpIsppG ..........................................................................................................ht..ttth.t..h....t.................................hhth-ttsh..h.............................................................................................................h...tshhhhp.ltsahIoHsHLDHlu..G..h..l..l.s.o.st...............................................p..K....l..hu.stTl.pslppthFN.hhWPN...h...ss..st.h.........................................................................................................................................................................h.hpp...s.....................................Sohhhlppp...............................p.hhhFGDsts............Dp....hp....h............p.....t.pplWp..hA.hl.........t..tpL+ulhlEsSa..s.s.s..s.p.p.L.a.G.HLsPphlhtELp.Lt..............................................phs.....t...t........h.....sh......lhl.H.hK...................................................h...t............t..h.h.................................................................................................................. 0 41 97 144 +3693 PF00341 PDGF PDGF/VEGF domain Finn RD, Bateman A anon Prosite Domain \N 20.80 20.80 22.80 21.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.53 0.72 -3.87 57 855 2012-10-02 16:54:34 2003-04-07 12:59:11 12 13 154 99 329 797 0 80.20 36 33.29 CHANGED CpPR-slV-lhpEasspsst..hap..PsC.VslhRCu...GCCs.c.EulpCsPopspslohplhcl............ttt.spphhhlshtpHspCcC ......................CpsRphlV...-l...hp.Eh.....ss.p..s.st...hFh..PsC.V.lh...RCu...G...CC..s...s...culpChPo..p.spplshplh+l..............................t..hp.p.phsplshtpHppCcC....................................... 0 40 65 165 +3694 PF04692 PDGF_N Platelet-derived growth factor, N terminal region Kerrison ND anon DOMO:DM04730; Family This family consists of the amino terminal regions of platelet-derived growth factor (PDGF, Pfam:PF00341) A and B chains. 24.90 24.90 24.90 24.90 24.60 24.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.73 0.72 -3.48 10 162 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 44 9 51 136 0 73.90 43 34.82 CHANGED EEssIPcELIERLu+SEI+SISDLQRLLEIDSV................usEDssppcl+ppps+sspH.....h.-h..+sl.SRRKRS......lEEAVPAl ..........................pts.IPcELhEhLucSpI+SIcDLQRLL......c..hDS.V.................tEDsh-tsLph.ppsHss.pc...............p....Rt+RS..........hE.Al.A....................................... 0 2 6 18 +3695 PF00800 PDT Prephenate dehydratase Bateman A anon Pfam-B_1095 (release 2.1) Family This protein is involved in Phenylalanine biosynthesis. This protein catalyses the decarboxylation of prephenate to phenylpyruvate. 20.60 20.60 20.60 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -11.10 0.71 -4.74 134 4375 2009-01-15 18:05:59 2003-04-07 12:59:11 13 27 4002 7 1176 3130 2645 177.50 33 54.29 CHANGED luaLGPpGoaScpAAh....phh.......stssphlshsohp...............clhcuVppsc...s.-hullPlENSh..pGsVstohDhL..hp.t..s..................lpIhuEhh...l.lcHsLhstsss............................ph..pc.Icp........lhSHPQAluQCppaLppp....sphp..hhsssSTAtAAch.....t....................pppts.....sAAIuuphAAchY.uL.plltc.sIpDpt.sNhTRFlllucpts ....................................lAaLGPcGoaochAsp..........................phh.........................tph..ph...h..s..hs.s.hp......................................-lhp.sVc...sGp............s.-auVl..P..lE...N..oh....pGu.lspshDhL..hc..s.....s.................................lpIsGE.hh...lsIcas.Lls.t.sss.........................................................ph...pp..Ic..p............la..SHP..............Qu...........luQCppaLppp.........s..p....hc...h.sssSTAtAAch........l..t...........................t.p.s.s..........sAAIus.c.t.uA.p..hY.....GL.p...........lltc..sI..........p-.p.p.p.NhTRFlllucp.s................................................................................................... 0 367 767 1005 +3696 PF03740 PdxJ Pyridoxal phosphate biosynthesis protein PdxJ Bateman A anon COG0854 Family Members of this family belong to the PdxJ family that catalyses the condensation of 1-deoxy-d-xylulose-5-phosphate (DXP) and 1-amino-3-oxo-4-(phosphohydroxy)propan-2-one to form pyridoxine 5'-phosphate (PNP). This reaction is involved in de novo synthesis of pyridoxine (vitamin B6) and pyridoxal phosphate [1]. 24.50 24.50 25.20 34.70 22.80 24.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.54 0.70 -5.22 9 2100 2012-10-03 05:58:16 2003-04-07 12:59:11 8 4 2052 50 516 1491 2175 235.70 50 96.84 CHANGED hhLGVNIDHlATLRpARssthP-slcAAhlAtp.AGADtITlHLREDRRHIp-pDlhhlpclhpsc.....hNlEhulo-Ehht.......lALcs+PcpVsLVPE+RpElTTEGGLDlstttp+lcshlccLpssGhcVSLFIDss.cpIcAutpsGAshIELHTGtYAsh+s...................cscptcp.h...hp+lppsAthAt-lGLtVsAGHGLsYpNVpslstI.t..ltELNIGHullucAlahGLtpAVtcMtplhtt ...................h.hLGVNIDHlATLRNAR.........G..s....s.........aPDPVpAAhlAEp.AGADGITlHLREDRRHIpDcDVphL+.p.slpT.+.............hNLEMAlT.-.E.M.ls.................IAlch+....P.chsCLVPEKRpElTTEGGLDV..su....p..t..-+lpsssp+Lps.A..GI.cVSLFID..s..D..t..cQ.....IcAAtc....l..GA..s.hIElHTGsYA.c.A..p.s......................cs..c..p..t..pc.........htRltpuAphA...ts..L....G.......LpVNAGHGLsYcNVpsl..........Aulsp..lpELNIGHuIIu+AlhsGLppAVt-MKplh..h............. 0 150 324 427 +3697 PF00595 PDZ PDZ domain (Also known as DHR or GLGF) Bateman A anon [1] Domain PDZ domains are found in diverse signaling proteins. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.67 0.72 -3.80 58 26099 2012-10-02 11:12:46 2003-04-07 12:59:11 19 730 2536 601 11639 33290 6344 80.30 22 16.19 CHANGED plpltt...ttttlGhslssssst..............slhlsplhtG..uuAptss.lchGDpIlpl..NG.....pslpsh.sppcshthlcsssp.....plpLtlt ......................................................................................h.....h...tttshGhsltsspst.........................................................sl.h..l.s...p...l........h....tu.....u.....sA.p.....p..s..........G..L.......p...........s............G...Dp.......I..l.......p.........l.........NG..........................hs.l....p...s....h........s........p..p...c..s....l..p..h..l..+..p..stt.......tlpL.l.h....................................... 1 2855 4028 7376 +3698 PF00544 Pec_lyase_C pec_lyase; Pectate lyase Bateman A anon SCOP Domain This enzyme forms a right handed beta helix structure. Pectate lyase is an enzyme involved in the maceration and soft rotting of plant tissue. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.59 0.71 -4.85 10 1509 2012-10-02 14:50:22 2003-04-07 12:59:11 14 45 449 44 752 1578 23 190.70 27 45.93 CHANGED spslVIsustohD..............uhs.hsh..pphhhG...hGussplsNhGhtlh...psoSNVIl+NlpIcsh.......l..t.t....sssspDGDAIplp.Nus.slWIDHsolSsusapss.hs.........aDGLlDIpcuSssVTISNshFssHcKshLhGH.uDst.upDpG.h+lTlsaNhF.sslspRhP.RsRaGhhHlaNN.Y.......sphppYuhGlussuoILSEuNpF .................................................................................hh...................................................................t.t.......lh.l..s..S.....p..c.Tl...G........hGs..s.s.t...lts...h.....G..ht.lt..............t.s.....s........N.....VIl+..N.lplpsh.....................................................thhs.s..uD..u..lslt.......s.....u.......s.....plWlDHs..s.hs..s.s..........................................tDGl..l....Ds......h..p.....u.o...st.lT..l.S.ps.h....a..............p..............s....+.....s...c...........s..h..L..h..Gp...sc..p......st...D....ps....hp.....lThtaNaa..p..........shsp...........Rh.P...+...h.....R......h....G........h........h...H.......l..hNN.a..............................t..p.h.....h.....a.uh.sss.....t.s.s..p.lhspsNhF.............................................................. 0 220 471 647 +3699 PF05041 Pecanex_C Pecanex protein (C-terminus) Moxon SJ anon Pfam-B_5192 (release 7.7) Family This family consists of C terminal region of the pecanex protein homologues. The pecanex protein is a maternal-effect neurogenic gene found in Drosophila [1]. 19.80 19.80 25.70 20.80 19.00 18.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -5.32 8 305 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 96 0 180 282 2 209.20 49 13.85 CHANGED psVcpDc..DSsLVTLCauLslLuRRuLGTASHs.hSsuLEsFLYGLHsLFKGDFRITs.+DEWVFADhDhL++VVAPAVRMSLKL..HQDHFossDEaDE.ssLY-AIssacpphVIuHEuDPuWRsAVLuspPsLLuLRHVhDDGsDEYKlIMLs+RaLoF+VIKlN+ECVRGlWAGQpQELlFLRNRNPERGSIQNs+QsLRNlINSSCDpPlGYPIYVS.......PLTTSas-opsQL ................................................................h.....t...s.ssLlsLshuLslluR.R.u.L.Gs.Auap.....hu........ss.l-sFLaG.LHsLFKGDF....R....Ios.+DEWlF.u.Dh-LLppVVsPulRM.oLKL..HQD.pFsss...-..E.a..-..-s..tsLa-AI........psaEp.....p.....hlIuHEuDP..sWRpAVLusp.PpLLuLRH.l......h.D.-Gs..s..EYKlIMLp+p.aLoF+VIK.......lN+ECVRGLWAGQQQELlFLRNRNPE.RGSIQNsKQsLRNhINSSCDQPl.GY.PIYVS.......PLTTSa.sop.Q............................ 1 53 65 112 +3700 PF03211 Pectate_lyase Pectate lyase Mifsud W anon Pfam-B_2273 (release 6.5) Family \N 25.00 25.00 27.90 27.20 19.20 20.30 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.25 0.70 -4.94 12 678 2012-10-02 14:50:22 2003-04-07 12:59:11 8 10 217 9 311 638 4 205.80 33 67.04 CHANGED ushussssss...tsuuspshpsoIpVttGpsaDG+stpasus.pshussspu-pQcPlFhLEsGAoL+NVllGtstuDGlHCpG....sCsIpNVhapDVsEDAlTlKupu......sspIssuuAhpAsDKVlQhNussolslps...FhAsDaGKLhRosGssphpt.....sslphsssssssshtuls+s.uDssplpssshph.....ssVsptacGsp....usspssc .....................................sss.........P.....susss..hsl..sps.hhVtsGpsa..D..G..thppastu.....hs..h..u..ss..sp.s.tppcslFhLcsGATLKNlIl..G.....ps...tt-G.lHCcu....sCsl-NVaaccVsE..DAlol..Kupuss...................shplhGGuApsAsDKVhQhNGt.G..s............lpIcs...F.hsp.ca.GK.....lhRSCGsCps....p..........p...............s.........sl.s.s.p..s.h..s.s.ss.....sl.hulNpNhuDpsolpsh.s.lph........tth.p.ptapt.t...........stt....................................................... 0 88 194 287 +3701 PF04191 PEMT Phospholipid methyltransferase Wood V, Finn RD anon Pfam-B_14367 (release 7.3); Family The S. cerevisiae phospholipid methyltransferase (EC:2.1.1.16) has a broad substrate specificity of unsaturated phospholipids [1]. 21.70 20.30 21.70 20.30 21.60 20.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.21 0.72 -3.93 105 2350 2012-10-01 22:51:20 2003-04-07 12:59:11 8 12 1459 0 1059 3719 1768 101.80 20 45.04 CHANGED hphhhGhhLhshGhhlshsuhhsLGhtGsahGDaFh..hhp..t.......hlpsusaphhsNPhYhuushshhGhuLh.tssshs.lllshlshlhhhhhlt.hEcPahtclYuppt ...................................................................hh...hhuh.hl.h.hh...u.h...hh..h....h.......u...h.h...t..h...t...h.t...t...s..h..h.s.....s..............ttsp.p.............................LlssGsYp..h.sRpPhY.hu.h.h.l.hh.h.......G...h...s..l........h.....hs....s...h...h.........s.....llh...s.....h......h..h......h...h.h..h.h.hh.hhhh.EE.thl.t.phaut................................................................. 1 349 655 903 +3702 PF03965 Penicillinase_R Pencillinase_R; Penicillinase repressor Finn RD, Bateman A anon DOMO_DM03102 & Pfam-B_5099 (release 14.0) Family The penicillinase repressor negatively regulates expression of the penicillinase gene. The N-terminal region of this protein is involved in operator recognition, while the C-terminal is responsible for dimerisation of the protein [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.36 0.71 -3.99 47 2958 2012-10-04 14:01:12 2003-04-07 12:59:11 11 9 1713 20 713 2659 173 112.30 25 82.47 CHANGED lustEhcVMcllWpput.sosp-lhpt.Lspt.hth..uhoTVtTllsRLhcKGhlspc+p.G+tahYpshlscpphhpttscpllschhts.shsshlsphlcpp..tlotc-lppLpphlpp ......................lopsEhclMc.llW......p......p.....s......p.....h.....osp.-lhpt..L.....p...........c............p....h....p....h..sh..sTltTlls.R.L.h.c.Ksh.lp.....p.....c......+......p.....G....+.....t.....a..h..Yp..s..hls.c.c....ch.h.pt.ts.ps..hlsch.h.s.s...sh...ts.h.ls.phlppp...plo.tc-...lccLcphlp............................................................... 0 301 536 647 +3703 PF00805 Pentapeptide Pentapeptide repeats (8 copies) Bateman A anon Bateman A Repeat These repeats are found in many cyanobacterial proteins. The repeats were first identified in hglK [1]. The function of these repeats is unknown. The structure of this repeat has been predicted to be a beta-helix [2]. The repeat can be approximately described as A(D/N)LXX, where X can be any amino acid. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.50 0.72 -4.66 95 11082 2012-10-03 04:02:01 2003-04-07 12:59:11 17 346 1513 31 3972 17635 5755 38.50 33 30.01 CHANGED usLpsAsLpsusLpsusLptAsLssAslpsAsLpsuslps .........................pLttAsLp....s.....AsL.....p......s.....A....sL.....p.s....A.s.L......p.s.A.....s...L.....p......s.A.sLpsupl....................... 0 1098 2769 3557 +3704 PF00354 Pentaxin pentaxin; Pentaxin family Finn RD anon Prosite Domain Pentaxins are also known as pentraxins. 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.30 0.71 -4.88 9 1094 2012-10-02 19:29:29 2003-04-07 12:59:11 12 93 109 168 564 1160 95 183.20 29 30.79 CHANGED KsFVFP+ES-TsYVpLhs.LcKP.LpsFTlCh+hYo-LS..RuaSlFSYuTpp..pDNElLlahc+sspYShhlGss...clhh+s.EphsuPsHlCsSWESuSGIsEFWVsGK..PhV+KuL+KGYTVtspsSIlLGQEQDSaGGsF-toQShVGEIuDlpMWDhVLoPEpIpolYtG.sshs....sNILsWRuLsYElpG.Vhl+P ...................................................................FP...t.st...sas..ltsp.hths...L.puFTlC..hh..h.t.s..shs.......p.h..o..h...F..SY.us...s......p.s...N..-l....l.l............h...t......p.......t......t............s...s...h........p.........l......h...l...ssp.....................t.s..h..h...............h................t........s.....s...p.W...pH.l.C...soW...s.....o...p....s.Gh.hp.....hahD.Gp....................h.s.s...t..p.s..............l.....t..t..u..a....s..l....t...s.......s.G..s...l...l.LG......Q....-Q..D........s..............h....G.........G........s.............F.......-..s............s.....Q.......u...F...lG...-.luplshWDpVLo.sp.pl.....t.s.l..h..ps........p.h.p........GN..ll.sWtshphp.h.s............................................................ 1 154 195 378 +3705 PF02896 PEP-utilizers_C PEP-utilising enzyme, TIM barrel domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.60 19.60 19.60 19.60 19.50 19.40 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.91 0.70 -5.54 17 9485 2012-10-10 15:06:27 2003-04-07 12:59:11 13 33 4444 34 2008 6852 3404 301.30 34 43.25 CHANGED +tt.uphtsh.uhssDGp+lclhANlupsp-stsAhssGAEGlGLhRTEalahsp.sp.hPsEcE....................QhpsaculhcAhsG+PVslRTLDlGuD..KtLPa.h.............................chspEhNPhLGaRulRlslsp..s-lhcsQlRAlhRAus...hu........plpIMhPMluohpElcp.s+pllcch+tplctphhth.s.shplGhMlElPuAAhhA-phA+c.sDFFSIGTNDLTQYThAhDRtsst....lual......asPhpPulL+hlppllctAcpcG......hhlGhCGEhuG.DPpul.lLlGlGLDphShSshul.ts+thhtphph ...........................................................................................................t..ht..shs...c.s...p...pl.clh.sNluss.p..-.s...t.s.shp..G....A-GlGLhRTEalahsp....s.....p.....hP.s...cE.............................................................................Qhpsa.p.p.lh.p...u.h......t..s..+....s...V.ll..Rs.h....Dh.su.c......+...Lsh..h................................................................................................t...s.cE..t.NP..hL..GaR..u.lR....ls..l..sp.....s-laphQ.l.RAll.RAus.....ts..................slcIM...lPhlsolcEh.......c..........t...s+.p...l.......l.c..........c.t........t....t.p.l.........t...p...p..............h.....h...p...t.....s.....l...c.lGhMlElPu.sAhhA-p.h..u........c.......c.......s.......D.........F...FS...IGTNDL...T...QashAhDRssst..........lutl.................asshs.P....ull.chlppllcs...A..c....p.pG........................thl.GlC.GEhuG.D.s.p.ss...hLl..s.h.....Gl-plShss.s..s.l.sts+hhltph.h.................................................................................................................... 0 656 1260 1670 +3706 PF01327 Pep_deformylase Polypeptide deformylase Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 22.00 22.00 22.00 22.60 21.30 21.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.89 0.71 -4.83 130 7120 2009-01-15 18:05:59 2003-04-07 12:59:11 16 13 4525 176 1596 4663 3258 154.60 34 87.31 CHANGED hhpIlphs-s....hL+phupsVp...phs...sp.lppllccM..h-...TMhss.s............GlG.LAAPQlGh..shRlhl....lchsppptt.....................llINPcl.................................................hsp......pthtttEGCLSlP.....s....hhspVpRspplplchhD.hpGpphph.c...hpG.......h.hAcslQHEhDHLsGhLalD+.....lsphcpt .........................h.hpIlphscs....hL+ph.ucsVs...ths...........sp.lppLlcDM......h-TM..hss..p......................................GlG..LAAsQlGl..spRlll.......lc....ls..p..ppst.......................................lhlNPcI...............................................................t.p.p.......t..p.....s..t.tEGCLSlP..........s..hhu.V.t.Rhpclplch...hD...t.....p............G....pt...hpl..c...scG.......h..hAhslQHEhDHLsGhLFhD+ls..p.t................ 0 529 1053 1351 +3707 PF01562 Pep_M12B_propep Reprolysin family propeptide Bateman A anon Pfam-B_117 (release 4.0) Family This region is the propeptide for members of peptidase family M12B. The propeptide contains a sequence motif similar to the "cysteine switch" of the matrixins. This motif is found at the C terminus of the alignment but is not well aligned. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild --amino -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.60 0.71 -4.36 119 2978 2009-09-11 14:07:58 2003-04-07 12:59:11 14 104 182 0 1366 2653 2 126.10 24 15.43 CHANGED hEllhPp+lp.......................................................hpttt..............ph....splpYplphpGcphhlcLcpscthlussashh...............pYspsGsh.hspps.ht....scCaYpGtlpu.s..s.S..hVul..SsC.uGL+Ghlphp.stsYhIEPl........pts..ssh........pHllY .................................................................................................................................t..................th....tphpap.l.ps...G.c.ph...hLcL.p.h...s....p.p.l...lu.ts.ashp......................hh.s....p.s....G...pt...hspt.s.................pcCaYpG....p..l.ps.ps.....s....S...........hsu..l..SsC...s...G...L............c........G.hhp.hp...s...ts.YhIE.Pl...........pts......sp.................Hhla............................... 0 218 304 662 +3708 PF03413 PepSY Pep_M4_propep; Peptidase propeptide and YPEB domain Bateman A, Yeats C, Rawlings N anon Yeats C Domain This region is likely to have an protease inhibitory function (personal obs:C Yeats). This model is likely to miss some members of this family as the separation from signal to noise is not clear. The name is derived from Peptidase & Bacillus subtilis YPEB. 20.90 15.00 20.90 15.00 20.80 14.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.15 0.72 -3.33 348 4684 2012-10-01 23:09:26 2003-04-07 12:59:11 14 98 1721 4 837 3397 168 65.70 19 24.77 CHANGED tlo..hppA.hphA.........tpp...............h.......s....st.hhp....hphttt...................................psphsYc.lp.hp............sstt.........hph...lDAtoGcllppp .....................................................................................lo.cpA.hphA.................hpp.................................h.............s......sp..hhp........hp.tpp............................................................psphsYp..lp..lp...................sstp............hchh....lDAp.oGcllp........................ 2 290 543 701 +3709 PF00311 PEPcase Phosphoenolpyruvate carboxylase Finn RD, Coggill P anon Prosite Family \N 20.60 20.60 20.80 20.60 20.20 20.50 hmmbuild -o /dev/null HMM SEED 794 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.50 0.70 -13.20 0.70 -6.70 65 4128 2012-10-10 15:06:27 2003-04-07 12:59:11 12 6 3076 5 773 3411 2728 540.50 36 85.56 CHANGED pLslcLVhTAHPTElsR+ollpKp++IuphLppL-p.phsspcc.................pplcppLpccIphhW+TDElRph+PTVlDElchuLpYFcpsLaculPplhcclppsL....pppasthphPh..........shhpFGSWlGGDRDGNPsVTs-lThcshphQRphslchYlpslcpLhppLShS..hspsss-Lhpplcpcpt..............phschh.ph.t.h..pEPYRltLuhlppRLtsTpcc.pphhssp......................hsssphYpssp-hhp-LpllpcSLppsu.t.lusspLtcLlppVcsFGhpLspLDIRQESs+Hs-slsElscaLsl.....stsYsphsEp-+htaLhpELps+RP.L.....lPs.......ph.....shSccTpEslssh+sltclppcaGscshpoYlISMscssSDlLpVlLL...t+EuGLhpssss.............spLtVVPLFETl-DLcpAPplMppLhslPhYRphlss..............................shQEVMLGYSDSNKDuGhloSsWpla+AQcsLtclAcca....GVpL+LFHGRGGoVGRGGGPu.apAILAQPsuol.sGpI+lTEQGEVlusKYuhP-lAhtsLEhhssAllpAS..Llsss.sp.......ppWpplM-cLuspSpppYRsLVaEpP-FlsaFppsTPlpElupLsIGSRPA+R....+s..stslpuLRAIPWVFuWTQoRhhLPuWaGlGoALpphh...tpssp.....plphLcpMYpcWPFF+shISplEMsLAKsDLpIAppYsppLs.ssc.tct...lappIppEaphTpchlLplTsppcLLsssPs............LpcSlpLRNsYlsPLsaLQVpLL+RhRptspp.............................pphscs-hLcsAL.LTINGIAAGMRNTG ............................................................................................................hhTAHPTp....sh.........h.....h.........................................................h.....h...h.o........ps.sEhp.sh.hhppshhpslPphhpchp....p..h.....t..t......h..........................lp..hstWhGGDRDGN.P.VTsphht.sh.htt.hh...h...h..h...hs................................................................................................................................................................................................................................th...l..h.t....l.........................h..hh...h..FGh.h..hDhRp...puthattsls.t.l.h....t........h.....th................tpY.tt..hsEtp+.t.h.Lhp...p.L..t.t...R.....L........s.................h..............s.....t........h..t.c..Lthhphh...tth.......tshtthlIShspssSDl.Lt.l......lL.......+.Esth.................................................h.lsPLFEpltDLp.tu.thhtphhshshhpth.l.ts.......................................................hQ..lMlGYSDSsKDuGhhuusWt....YpApptLh....p....h....s....c....p....h.................u...lclphFHGRGGo......lGRGGu...P.s...a....A.l....huQP.......to..l...p.u.tlRl....T.....QGEh.It.p..au....p....hshpsLphhsu.A.h.Lpts...hh........t...sp..................................................pap.t.hM-phu.h.u.h..p.YRsh...Vh.......p.p........p..F...h....YFh.....................tuTP..Ehup.h.s.lGSRPupR.....ps......ts..s....lpsL.RAIPWlFuWoQsRhhlP..sWhGh..Gsuhpth.........p.t..........................p.t.LptM...........hppWPFFpshls.l-MVhuKss..lAth.Ysphh...st.p.h........hhptlhtp.hp.shphlLt..lt.sp.p.p.lhtt.sh..............l.t.pplphR..Yh..ssLshhQ.h.h...lt+.Rt.t................................................thl.hohtGlAs........................................................................................................................................................................................ 0 201 483 654 +3710 PF00821 PEPCK Phosphoenolpyruvate carboxykinase Bateman A anon Pfam-B_1309 (release 2.1) Family Catalyses the formation of phosphoenolpyruvate by decarboxylation of oxaloacetate. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 587 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.87 0.70 -6.35 56 2517 2012-10-02 15:24:17 2003-04-07 12:59:11 13 4 1840 45 434 2002 389 325.30 56 96.18 CHANGED palp-sscLspP-plalCDGS-cEhcpltpphlcsGphhhLt.p+a.NsaLsRocPpDVARVEs+TFIsopscc-ssPssN............WhsPp-hcppl.pplasGsM+GRTMYVlPFSMGPluSPho+l..GVplTDSsYVVhSM+IMTRhGpplL.....ctL.ssctpFV+slHSVGtPh..s....ptsss.WPCNs.ccphIsHhP--+......pIhSaGSGYGGNuLLGKKCaALRIASshA+...-EG.WLAEHMLIlGlTsPp..GccpYlAAAFPSACGKTNLAMltPsl....sGWKlcsVGDDIAWh+hs.p-GpL.....hAlNPEsGFFGVAPGTu.cTNPsAMpol..tp.NoIFTNVAhTsDG.cVaWEGhspp..Ps...........plhsWpG.......cs.Wp.s....pssc.PA...AHPNSRFTsPhsQCPhlc.PcW-sPcGVPIsAIlFGGRRssslPLVhEAhsWpHGVa.lGAohpSEsTAA.Apupsuhlc+DPhAMLPFhGYNhGcYhpHWLshupc.........p+hP+IFpVNWFR+sp.sG+FLWPGFGENtRV.LcWlhcRlcG....c.ssAhcTPIGhlPphssLslpGL.thspt....s...hcclholststWhpEl.cplcc.aapph.GscLPpplhpcLctlcpRlpph ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sap.hpslGDDI.sWh+...s..tp.....G..pL.....hAINPE.sGFFGVA..PGTshpTNPsA........MpTl........hpN....T.......lFTNVAt.T.s.....DG.sV.a.WEGh-c-h...ss............plTDWhG.ps.Ws..................cstp...PAAHPNSRFss..PApQCPhID.Pt.W.EsPpGVPI..sAIlFGGRRs..p....sVPLVhEAhsWpHGVa.lGAsht......S..........Es....T.AA...A.............t...tp....t..............lh+DPhAMhPFhGYphupYh.................................................................................................................................................................................................................................................... 0 144 249 359 +3711 PF01293 PEPCK_ATP Phosphoenolpyruvate carboxykinase Finn RD, Bateman A anon Prosite Family \N 25.50 25.50 25.80 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.40 0.70 -5.97 128 2905 2012-10-02 15:24:17 2003-04-07 12:59:11 15 5 2608 26 690 2097 1598 442.70 48 86.52 CHANGED sssplhh.NlssspLhEculp........cs.....EGhlsssGALslpTGpaTGRSP+DKFIVc-ssoccplaW......GslNpPhst-pFctLhp+shsaLss+.claVhDsauGADsca.RlslRVlsEhAWpuLFs..+NhFIRP....sp-ELp.sF..cP-aTllsuPsFpAcPtpc.GspS-shlhlNasc+hhLIuGTpYAGEMKKulFol.hNYlLPt.+slLsMHCSANlGp.c..G.Ds......AlFFGLSGTGKTTLSA......DPpR.pLIGDDEHGWs.-sGVFNhEGGCYAKsIsLSpEsEP-IasAI+..FGulLENVVl.ctps+plDasDsSlTENTRsuYPlcaIsNthhss.huGcPcsIIFLTsDAFGVLPPlS+LTs-.QAMYHFLSGYTAKVAGTEtGl.oE.PpsTFSsCFGAPFhPh+PshYAchLsc+l.......pc.asspsaLVNTGWoGGs...YGs..GpRhslchTRAllsAhLsGsL....sssc..acppshFslplPpsls.....GVsspl ..................................................................................................................................ts..plhaN.Shs.Lapctlp.......s................cGhlT.ph...GAlsspT.G.haTGRSPKDKalVc-..s....soc-....plaW.........t..ssNpPhstEsa.ppLhthshc.LssK..claVhDuas..Gu.s.cpR.lplR.h.ls.EhA.Wp..u.hF.s.....+.N.M.F....IR.P............o.p...-.E.......Lt......s.F..........c.......P.D.Fslh..N...u.sp....h........p..s...s.....h..p-...G.h...s........S..E.....shl.hhNhs....c+....h.lIuG.T....Y..u..GE.MKKGhFSh.MNYlLPh..cslhSMHCSAN....s....Gc....c......G...DV...........Al..FFGLSG..TGKT.T.LSs......................DPc..R.....pL.IGDDEHG.Ws....Ds..GV..F..NhE.G.G....CYAKsIs..LS.p.E..t..E.P.-..Ia..s.AI+..hsu.lLE.NV.......sl..c.pst..plDa.sD.s.Sh.T.............E..NTRsuYPI.aIs.....Nhs.....h.....Ps....tuu....Hs.p.p..........lIF..LTADA..FGVLPPlS+L.o.s-.QshYHFLSGaT..A..K............lAGTE.+.......Gl....TE..P.ps.......TFSsCFG....AsFls.LHPopYAchLsc+.h.......pt..t.sspsYLVNT.GWsGs.....................GKRhsl+.TRullsAIlsGsl.......p.pu-......htphs..hFsLtlPsplsGVssp....................................................... 0 226 436 591 +3712 PF01195 Pept_tRNA_hydro Peptidyl-tRNA hydrolase Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 21.70 20.70 19.70 20.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.20 0.71 -4.80 116 4825 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 4548 28 1217 3288 2684 180.00 38 92.48 CHANGED LlVGLGNPGtcYppTRHNlGFhslDpLsc......chs......hphppppthpuhhsp.h...ht..............sp...cllLlKPp.TaMNhSGpuVttlhpaYc.....lps.ccllVlaD-lDLshGpl+l+tsGusG.GHNGl+SIhppLGo.pcFhRlRlGIG...+...P.....ttt...slssaVLupFstpEtptlpp.slppuscslpthlpp...s....hstshsph.s ...............................LIVGLGNP...GscYttTRHNs........GF...hhlDt.LAc...chs.........hshpp.p......p.........Fpu..h.huc...ht...ls.....................................Gc..clhLl+Pp.T....aMNhSG.cu..V.t...s.l......h....s....a..Y.c...................lss....-....-.......l..lV.l..aD-LDLssGplRl............+.t.......sG...us.........G....GHNG..............l.K.S.....I.hs.p.........L................G..............s.....p............s...............F.....h.........Rl..RlGIG....+.Ps....spt...................pVssaVLucF...s.p.p....E..pt.h.lsp...sl-c.uscsl.phhlpp..s..hppshschp............................................................ 0 407 789 1024 +3713 PF03564 DUF1759 Peptidase_A16; Peptidase_A16_N; Protein of unknown function (DUF1759) Griffiths-Jones SR anon MEROPS Family This is a family of proteins of unknown function. Most of the members are gag-polyproteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.76 0.71 -4.39 8 975 2012-10-02 13:37:57 2003-04-07 12:59:11 10 76 48 0 921 926 2 119.20 20 16.10 CHANGED FuG-hpEWpsFh-lFpShlHsppcLucl.KFpYL+ShLpG-AAsllpHhsl.TusNYpsAh-tLpcRY-NscpIhpsLhcclhcl.s.sspcssptL+plh-ssscllRtLcplGcps-..DshlspllLpKlspcs+++hlppu+cpcs ............................................FsG.p..h..p.a.tFhp.Fp.shlc.p.tp.ls.s.hp.Khth.L......h..phL....p.G..p.Atphl.p...s.h....h.....ss..t...s..Ypts...hp..hL.pppas.p...p.h...lh.pthhppl.p.h....................ts....tthpt...h...htthpth..h...t..l..pt..h..t.....t......t..........h.........h..ph................tt................................................... 0 563 601 892 +3714 PF03566 Peptidase_A21 Peptidase family A21 Griffiths-Jones SR anon MEROPS Family \N 25.00 25.00 46.00 25.00 19.00 18.60 hmmbuild -o /dev/null HMM SEED 648 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -13.09 0.70 -6.43 5 29 2012-10-04 01:49:40 2003-04-07 12:59:11 8 1 9 20 0 36 0 607.00 68 93.74 CHANGED uDANlAupp..sR+RoRslR..............................................pNP..............ltApsVsVuss+RpRR+RRsG++hss.ssspsAstcluQsLsusolossushooM.PslRshAttclDlussSlGWaaKYLDPAGAsESu+AlGEYSKIPDGLl+aSVDAEhREIastECPsVo-solPLDGupWoLSIISaPhFRTsalAlANlsNc..ElSL-slN-lIpsLNN.luDWRD.lsosQWspFopsssaYhpIhVLpPTYAhhDVPDPT-.GlstoVoDYRLTYKGITsEuNsPTLVDQGaWVGAQaslsPsoEsQpslsc..uGosslusoshhpssuusshthsWA..sLPsGGoAPssssuhT.soSGpFhshchsG.s..uuVhoaTlPpGathEs........ssFAssGDTlTFsh....suGsslslTp......TAPTsTlTlhuohTuossl..sRslss-oG.....hss.l-ssulNRp...plslPPhThuQsssNsPKhEQFLlKETtGsYlVHpKMpNPVFpMTPASSFGuVpFssPGa-hssstsulGGIRDThDsNaSoAVsHFpSLSpSCoIVsKTYpGWEGVTNsNSPaGQFAHoGs.K-DElLsLAccLsscLTGVYPAsDNFAGAVSAhAAshLuplsKSpATuSlIKuVApsAsGslpuuhApLsGllpSl.G+luAR....l+ARRARRRAuRts ...........MDSNSAS....GKRRSRNVR...............................................................IAANTV..NVAPKQRQARGRRAtSRANNIDNVTAAAQELGQSLDANVITFPTNVATM.PEFRSWARGKLDIDQDSIGWYFKYLDPAGATESARAVGEYSKIPDGLVKFSVDAEIREIYNEECPTVSDASIPLDGAQWSLSIISYPMFRTAYFAVANVDNK..EISLDVTNDLIVWLNN.LASWRDVVDSGQWFsFSDDPTWFVRIRVLHPTY...DLPDPTE.GLLRTVSDYRLTYKSITCEANMPTLVDQGFWIGGHYALTPIATTQNAVE....GSGFVHPFNVTRPGIAAGVTLTWA..SMPPGGSAPSGDPAWIPDSTTQF.QWRHGGFDAPTGVITYTIPRGYTMQYFDTTTNEWNGFANPsDVVTFGQT.GGAAGTNATITI......TAPTVTLTILATTTSAANVINFRNLDAET............TAASNRS...EVPLPPLTFGQTAPNNPKIEQTLVKDTLGSYLVHSKMRNPVFQLTPASSFGAISFTNPGFDRNLDLPGFGGIRDSLDVNMSTAVCHFRSLSKSCSIVTKTYQGWEGVTNVNTPFGQFAHSGLLKNDEILCLADDLATRLTGVYGATDNFAAAVSAFAANMLTSVLKSEATTSVIKELGNQATGLANQGLARLPGLLASIPGKIAAR....VRARRDRRRAARMN............. 0 0 0 0 +3715 PF02160 Peptidase_A3 Cauliflower mosaic virus peptidase (A3) Mian N, Bateman A anon IPR000588 Family \N 21.30 21.30 21.40 21.50 20.70 21.10 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.39 0.71 -4.72 6 102 2012-10-02 15:32:34 2003-04-07 12:59:11 10 11 42 0 23 125 0 173.90 29 22.75 CHANGED NPNSIYIKGpLhF+GYps.hplchYVDTGASLChAs+alIPEEaWpsuc+PIpl+IANsplIpIsKVspsl.lpluGcpFhIPTlYQQ-oGlDlllGNNFC+LYpPFIQapDpItF+hsp..psV.hpplT+AhhsuspuFLEShKKpSpspp..shNIopsp.....-plshlp.hc....-E+hapth..+hptIEpLLEp.VsSEpP ......................................ht.h.hpshlDTGAslChhpc.hlIPcchW.psppsl.hlphAssphhplshhspplplhIs.s.....chFpIP.plY.....pp...-.....o....s..hDhllGsNFhp.LYpPF.lph.p.-.p.lhFph..st......l.hhttlsps.....hphshpth....hps..hpptpp..spp....shp.....th...................................................................................................................................... 0 1 17 19 +3716 PF01828 Peptidase_A4 Peptidase A4 family Bateman A anon MEROPS Family \N 21.00 21.00 21.00 22.50 20.30 20.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.49 0.70 -5.27 22 234 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 99 7 175 241 1 194.70 32 70.59 CHANGED SoNWAGAVL.....sussYTuVoupFsVPpPosssus.................st.suSAWVGIDGD.ThssA...ILQsGVDFhlp..sGpsoaDAWYEWYP-hAYsFsslslSsGDsItlsVsAoS.osGsAhlENhooGpoVo+Tlou...ousLsthNAEWIVEDFppuso......hVshAsFG.oVsFTGApAss.uGpohGhssATll-lc.Qs..spVLTcsolsusotloVpYl ..................SsNWuG.ulh...........sussaosVousasVPssosss.u.............................s.uSuWVGID..G......s....ssssu...............lLQsGl..shp.............h.....p.........sG..p..ss...........asAW.............a..EWa.......P...s..h..u...h.....s..h...s.....s..hs.lssGDplpsoV.s..s..so...s.o.....s..G..s....ssl.p.N.......ho........sG....ps....sopo..hos.........sssL..sttsAEWIVE....D....h...p....t....s..s.s.......hs..sh.AsFG.sVsFos........spAss......sutp..h.....s...ss....u...p....h..hph.....t..tt..s.s.......................................................................... 1 28 100 140 +3717 PF01829 Peptidase_A6 Peptidase A6 family Bateman A anon MEROPS Family \N 25.00 25.00 324.40 324.20 20.70 19.90 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.33 0.70 -5.43 3 9 2012-10-04 01:49:40 2003-04-07 12:59:11 11 1 7 40 0 24 0 359.20 60 88.70 CHANGED AhspNNLh+LSpPGLuFLKCAFASPDFNTDPGKGIPD+FEGKVLo+KcVLTQS.INFT....sN+DTaILVAPTPGVAaW....sAcAPAssuAloTTssFsAVsFPGFsSLFGToATsRADQVoAFRYASMNsGlYPTSNLMQFAGSIoVWKsPVK..LSosQaPVATTPsT..SQLVHAIsGLEulLAVGs-NYSESFI-GVFSQSVCNEPEFEFaPILEGlQTLPPANVTVAQAGMPFNLsAGAtsVAGaTGlGsMDAIsI+VTAPTGAVNTAlLKTWAClEYRPNPNosLYcFAHDSPAsDElALQpYRKVARSLPVAVtAKpNAoMWERVKSILKSGLshASsVPGPVGlAATGlpGIu-LIuuLuF .......s.hNhsALsRLSpPGLAFLKCAFAPPDFNTDPGKGIPD+FEGKVloRKDVLsQS.IoFs....uspDTaILlAPTPGVAYW....sAsscAushshoTT.sFsuVsYPGFTShFGTTATS...RSDQVSSFRYASMNsGlYPTSNLMQFuGSIoVWKsPlK..LSosQaPVuT..sPAT...SsLV.HsLsGL-GVLAV.GsDNaSESFIKGVFSQSsCNEPDFEFsDILEGlQTLPPsNVslusoGQPFsLsAGuEssoGlsGaGNMDTIVI+VSAPTGAVNoAILKsWuCIEYRPNPNAhLYQFuHDSPPhDElALQEYRsVARSLPVAVhAAQNAoMWERVKuIlKSuLusASsIPGPlGlAASGIpGLSsLhpuhuF.... 0 0 0 0 +3718 PF01252 Peptidase_A8 SPASE_II; Signal peptidase (SPase) II Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.30 28.10 23.20 24.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.22 0.71 -4.60 181 4771 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 4358 0 1061 3181 2288 151.40 31 88.98 CHANGED hlsh..hl....lh.lDQhoKhhlhpph.................h..................psh....hlss...h.hslshlp..NpGsAFuhhsst......h.hhhhlslllhhhlhhh.hh+htp.......tthttlu.l....uLllGGAlG.N.llDRl.........hh...........................Gt......VlDFls.h......h.................hta.P..sFNlADsuIslGslll....llth.hh......tt.ppp .....................................................................................hhlslll....ll.lDQhoK..hhlh.ppht.......................hs....................psh..........ll..s..s....h....hslshsp..NpG.....AA.F...S.....hLsst.............ph...hFh.hlslsls.l.h.lh...hh...hh.+p..tp......................pth..htlu..h.....uLl..lGGAl.GN.llDRl...........hp............................................Ga.VVDalph......h...........................hp.a..s.....lFNlADssIs...lGshllllth.hh.......pptt....................... 0 371 723 907 +3719 PF01640 Peptidase_C10 Peptidase C10 family Bateman A anon Pfam-B_1522 (release 4.1) Domain This family represents just the active peptide part of these proteins. Residues 1-120 are not part of the model as they form the pro-peptide, which before cleavage blocks the active site from the substrate. The catalytic residues of histidine and cysteine are brought close together at the active site by the folding of the active peptide. 21.80 21.80 21.80 22.00 21.40 21.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.78 0.71 -11.58 0.71 -4.53 53 317 2012-10-10 12:56:15 2003-04-07 12:59:11 12 11 122 17 47 318 19 176.30 29 29.83 CHANGED WsQssPYNphsP..............stpssTGCVATAhAQlMpYacaPpp......uhGsh.sYpsstsp.........hphsh.....sposYsWssMhsshsp.........shspppt.cAVApLMtclGhAVpMpYsss...uSG.uhstp.shpALpphFsYsts.lphh...+sthssspWtshlhpELss..utPVhYsGsstss.....GHAFVhDGY....................scsuha...HhNWGWuGt.usGYap ..........................................................................WsQttPYN....t....hsP...................ttpshsGCVATAhAQlMta....ap..a....Ptp....t.tt...sht.t..t..........................ttt..asWsp...h...sshp...............sptpt..pslupLhtclG..hu..l..pM..pYs.s.........uSu..u...tt....s.ps.....Lpp........a.sYs.....ts..ht.h....+p.hs....t..pp....a....phl.h.p-Lpp...s..p..P...Vh.hs..Gsstss..................GHAaVhDGa............................................................................s.pps...hh........HhNaGWsG...ss.Gaa................. 0 18 39 47 +3720 PF03415 Peptidase_C11 Clostripain family Bateman A anon MEROPS Family \N 25.80 25.80 26.00 29.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.47 0.70 -5.49 7 367 2012-10-03 02:24:44 2003-04-07 12:59:11 9 25 244 4 90 357 28 328.40 17 58.43 CHANGED VahsuDN..sLptah.pDlpEMhpu.pss.....sllllhDth.....................hppsphh+lsccsph..ttl.sssth-hN.GDsssLcsalphhpspa-ADphhLlhWsHGsuah.csp...h.s+ulsaD-sN......tsplthsElpcsLp.....ts.plDlLuFDACLMGohEVhYplRs.....pADYlVASu...PG.GacY.phls.l..ssssss.....................plut.hV-pYt-.hssshh.........shoLSlaDhScl-tlhpslctlApsh................ss..pp.............shhsh......................................as.hspt.hsphsYpsLh-Lsphhpt.pphssthtth.ts...............................................................h....pshpphllYu..aG-hsst....shppuh...........oIaLPpssp..............shptahssasslt.....a.tchtWsc................l-pW ................................................................................ah.uss......sh..t...t.h...t..sltphh.t..s..htt.........t............plllhhst..........................................................t...thh.....h...p.t.t.t...h.........tth...hp...........p...s....s...s.s...ps....lpphlp....shp....a....PA.......c...p......YsLllhsHG............sGW..........h.....................s+..u.....h..s..hDpp.p......................................tt.t.hs..l..s.-lspulp.................sshph-hlhFDuChMuslEVAYpL+s......hscYllAS.s.sph.......u.......Ghs..Yp..p.hhstl..........s.s..ss..s..ht...........................................p.h..u.pthh.s.Y..t.s..h.s.sh.................................tho..l.olhDhsp.l....c.s.lt....phhcp.l.h..th.....................................................................................................................................................................................................................................................................................................................................................................................................htt.h........................................................................................................................................................................................................................ 0 51 78 88 +3721 PF01088 Peptidase_C12 UCH; Ubiquitin carboxyl-terminal hydrolase, family 1 Finn RD, Bateman A anon Prosite Domain \N 20.80 20.80 21.00 21.00 19.10 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.62 0.70 -5.16 72 1199 2012-10-10 12:56:15 2003-04-07 12:59:11 16 18 337 29 757 1143 15 189.70 30 64.44 CHANGED sWhslESsP..sVhsphlppLGV..pshphp-lauL.Ds.....Lthl.pPlhullhLF.h.........hptppsttht...........tpt.sptlaFh+Q..sIsNACGThAllpslhN..sts.........lp.GstLspahchstsh.sP.ppRuphLpss....ctlcpsHsuhA.....ppu...........pspsst......pcs......saHFluaVs.....hsGpLYELD..Gh+p.tPls+Gtss.......scsalpcst...llp.chhp+h ................................ah.lEusP..tlhs.....ph....l.pphGl...pshphp...-lauL..-t..............p..lt.....l.......................pPlhul.....lhLF.ht.............................tt.........tt..t..p....tt....................h.......p..t..spslaFh+.Q..sIsNACuT......hA..............llpslhN........spt.......................l..p...G.s.tLpph.h..c......s..t..s..h....s.s.t.+uhhlp.ss...........p.tl...cp....sHsshA...ptt...............................................ppps...........cpcs...........saHFlual..............hsGpLaEL...............D..Gh......+....tP..ls.h..Gs.hs........pcshl.ptst....lhp.phh................................................................ 0 230 382 594 +3722 PF01470 Peptidase_C15 Pyroglutamyl peptidase Bateman A anon [1] Domain \N 22.70 22.70 22.70 22.70 22.20 22.60 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.15 0.71 -4.48 9 2041 2012-10-01 19:48:29 2003-04-07 12:59:11 12 13 1648 60 470 1259 104 185.10 39 88.92 CHANGED hKlLlTGF-PFGs-slNPoh-ss+pLst.ppIusAplhuchlPssFpcut-sLpcslpch+PDlVIslG.AsGRotITsERVAlNlsDA.R..IPDN-GpQPlDcsIpsDGPsAYFoTLPlKAMspsl+csGlPAslSpoAGTaVCNalhYtshah.sppu.sl+uGFIHlPalP-QVlsK.tp.....PSMsL-s.ltGlpsAIcsuhct. ...................................................clLlT.G.Fs.PF.u.s.............c........p....l.....NPu........h....E..s....l...c...t.L.............s...................p...............p.......I...............s.............s........s.............p.........l.......h.....t..h.....pl......P............s..s.......F......t.c.u....h.........p.......h...l....p...p....t...l....p.......p.......h.......p.......P....D...........h...V...l...slGQA.G.G.Rs.s...l.TsE....R....VA...I....N....h..s..DA....R.......Is.D......Np......G...p.Q.P.l.Dp..s..I...h......-..G........s...s....A........Y..F........o.sLP........l..KA....hl...p..........sl.....p..............p..........p..........G....l..P.......u...........s..........l.............S..poAGT..aVCNalhYthh..a...h....h......p...p.........p.....h.........t....h.+.....uG....FlHlP.....a.hs.-.Q.s.h.s.+.s.s.t..........suMslpplhtul.phultsh...p.................................................................................... 0 141 256 365 +3723 PF01831 Peptidase_C16 Peptidase C16 family Bateman A anon MEROPS Family \N 20.20 20.20 20.40 20.20 16.80 19.10 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.58 0.70 -4.85 2 113 2012-10-10 12:56:15 2003-04-07 12:59:11 12 9 56 0 0 122 0 244.20 70 4.43 CHANGED AFDAlhSEsLSAFYAVPSDETHFKVCGFYSPAIERTNCWLRSTLIVMQSLPLEFKDLtMQKLWLSYKAGYDQCFVDKLVKSsPKSIILPQGGYVADFAYFFLSQCSFKsaANWRCLcCsMELKLQGLDAhFFYGDVVSHMCKCGNSMTLLSADIPYThcFGVRDDKFCAFYTPRKVFRAACAVDVNDCHSMAVV-GKQIDGKVVTKF.GDKFDFMVGaGMTFSMSPFElAQLYGSCITPNVCFVKGDVI ..................s...................-.pahKVhGLYsPphTRsNCWLRSVLhVMQKLPhpFKDhslQcLWl.YKttYsQhF.VDpLVspIPtsIVlPQGGYVADFAYWFLo.C...DapshA.W+ClKCDhu.LKLpGLDAMFFYGDVVSHVCKCGpSMsLIssDlPaThHFuL+DchFCAFhT.RpVa+AACsVDVNDsHSMAVVDGKQIDs+hVTphouDKFDFIlGHGMSFSMooFEIAQLYGSCITPNVCFVKGDII.............................................. 0 0 0 0 +3724 PF00648 Peptidase_C2 Cys_protease_2; Calpain family cysteine protease Bateman A anon Prosite Family \N 20.10 20.10 20.30 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.94 0.70 -5.42 9 2514 2012-10-10 12:56:15 2003-04-07 12:59:11 16 97 332 30 1434 2331 90 246.30 28 35.47 CHANGED LFpDPpFPssspSLtapphsP.....htlcWKRPsEIsssPpFIlGGAo......RT....DICQGsLGDCWLLAAIAsLTLN-cLLtRVlPtsQSFpEs.YAGIFHFQFWpaG-WV-VVlDDpLPT.+sGc..LlFsHSs-tsEFWSALLEKAYAKlpGsYEALsGGuToEuhEDFTGGlsEaaEL+cAPs...................................NLhc..IIt+hlc+ss.............LhGsShchsss.sshEshphptLV+GHAYSVTGsc-Vph....+Gph.pLlRlRNPWG.pVEWsGsWSDssspWs.lD.--+spLphphp.DGEFWMSFcDFlpaFoRLEICNLT .....................................................................................................................................................................................W.hR.....s.....t.........c......h.......h......t......p................s........t........h.....h.......t..s.h.p...................t...........s..l..t..QG....l........G.c.....C.a....hl....u....u.l....u....s....l.............s....................p.................p.........h.................h......t..p.......l......h....................t....p................p..........h.............p......p..............p..................h.............G..h.......ap.......h...p.............h.....a.....p............G..p..W...............h.p.Vl.lD.D.h.L...P.............s......h............p.......s....p....................l......h...a.........s........p.........s..........t.p.....t......s......-....h........W.ss.L.lEK...AYA.K......l............t........G...........s..Y...........p......s.l..............p........u.G....p....s.........t.......c.....u...h........c.h......TG..s.....h..s....p.....h...t......h....p..p...s..................................................................................................................................................p.h.h.p......hh..t...p....h....h...p.ptp...............................................h.h.ss....s.h.....t........................t.............t...t....h...t...t...........u...L.......h..t...tH..AY..ul.h...s.h.p.p.......h.p.h...............................................p..Llcl.....+N.....PW...............G......p..................h.......c.....W...p....G........W......u.........-..t...........s...............p....W..........p.........................t.........p....t.......t......h.....................p...................t..................s.G.......F..W.....hs..hp-h.h.p.apth....................................................................................................................................................... 0 477 681 1105 +3725 PF01478 Peptidase_A24 Peptidase_C20; Type IV leader peptidase family Bateman A, Yeats C anon Yeats C Family Peptidase A24, or the prepilin peptidase as it is also known, processes the N-terminus of the prepilins [1]. The processing is essential for the correct formation of the pseudopili of type IV bacterial protein secretion. The enzyme is found across eubacteria and archaea [2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.44 0.72 -3.88 103 5022 2012-10-02 13:41:03 2003-04-07 12:59:11 13 9 3530 2 1191 3816 981 108.60 23 49.30 CHANGED llhhshLlhh..uhhDhcpthlPs.......tlsls.....hlhhullht.............h.h.shhshhttlhuu.........hssalhhhh.....................................................................................................hh.....................tht....uhGtGDhKLhuslGshhu.......httl...................hhhlllus.lhGslhsl ......................................................................................................h.hhhshllhh..uhhDhcp........th...l.P.s........tl.s...hs........hh.h.h.ullhs............................................h.h....hhs...h..h.s.s.lh.Gu..................hss..ah...hh..hh........................................................................................................................hh....................t.hp.........uhGhGDlKL...huul.....G...s...a.h.G...............hp.tl........................h.h.l..l.h.h.uu...lhuhlhh.h............................................................... 1 403 759 1005 +3726 PF03510 Peptidase_C24 Endoptase_C24; 2C endopeptidase (C24) cysteine protease family Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 25.20 26.00 24.70 18.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.36 0.72 -3.86 5 118 2012-10-02 13:45:52 2003-04-07 12:59:11 9 6 60 0 0 131 0 103.30 42 5.21 CHANGED GYuVHIGNGlYISlTHVAsussclhus-hKss+osGEhChl+ustIp.ousslGoGsPlpDspssPluTshpc+oasTTos+IsGhpsssso..pT+pGDCGLPYlD .....GaslHIGsGlYlSssHVA+uus.h.spchhshp.ssu.-hChl+ustIt..SsAtl.u.pGpPVp.DshtuPlu.................TthtcKsaoToohKIsGhthsssT..pTppGDCGLPYhD. 0 0 0 0 +3727 PF01364 Peptidase_C25 Peptidase family C25 Bateman A, Griffiths-Jones SR anon Pfam-B_516 (release 3.0) Domain \N 20.00 20.00 20.00 20.10 19.20 19.90 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.57 0.70 -5.49 77 266 2012-10-03 02:24:44 2003-04-07 12:59:11 13 33 169 1 119 314 445 362.10 21 31.09 CHANGED hlII....s..ssph.................hstsppLssa+psp......GhsstlVshpplhspas.............sG......sssu.........................IRsalchhasph.s..t...h.ph....llLhGDs................................tt...lPshps...................................................................................................................s.hsoDsaauhl-s...............................................scths-..lslGRlssposppspphlsKllsY-ps...t.....t...........W+pph..hhluss...................................................................................................hp..thtsthtthhst.h.sp+lahs..sh.ppss............................h..............lhpsls.p..GshlhsY..h.G......HGutssh........sstt.............l......shsshpshsNts....phPlhlshoCthupa.sss................ohuEthl......hsssuGAluhlusoc........hsh...................................sss.....spthspthhctlhts.t............p..............slGcshhtu+tphh............................................hsLlGDPulpl .....................................................................................................................................lII....s....stph.............................hstspcLssa+ppp.......Ghpstllsspplhspas..................G....sstA.........................lRpal+hhaspt.s.......t.pY...llLhGDs.........................................thlsshts.............................................................................................................................sthso.Ds.aa...uhlcs...............................................ss.h.-...lslGRhsspos......p..-spshl.sKhlpYpps..........us................Wppph..hhhuss.....t........................t..............................................................................................................................................................................................hs....thspthtt.th.st....h.hp+lhhs..sh.ptss................................................lhpths.p........Gs.hlh.s.Y..h.G.............HGutsth..................uppp...............................l......shsc.hp.s..hsNts.........phPlhlos.sCthu...pFDtst....................ohuEthh...........hsspu...GAlAhhuooc.hsa...................................ssh...............spthspthhctlhsptts................slGcshphuppphhpt...............................................shhp......asLlGDPulph............................................ 0 77 113 119 +3728 PF03785 Peptidase_C25_C Peptidase family C25, C terminal ig-like domain Bateman A, Griffiths-Jones SR anon Pfam-B_516 (release 3.0) Domain \N 20.80 20.80 20.90 21.40 20.50 20.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.39 0.72 -4.17 3 29 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 9 1 7 33 38 79.70 41 6.38 CHANGED PTcMQVTAPAsIstosASasVuCDYNGAIATIScDGcMaGTAVVc.sGsATIsLTcuIT-EoNLTLTVVGYNKlTVIKsIpV ................Ppp.phThPApls.spuShslsss.sGuhssISpsGphaGouVsp.sGsATlNlT.slTs.pushslTlst.NhhsVIKsIps......... 0 7 7 7 +3729 PF03412 Peptidase_C39 Peptidase C39 family Bateman A anon Bateman A Family Lantibiotic and non-lantibiotic bacteriocins are synthesised as precursor peptides containing N-terminal extensions (leader peptides) which are cleaved off during maturation. Most non-lantibiotics and also some lantibiotics have leader peptides of the so-called double-glycine type. These leader peptides share consensus sequences and also a common processing site with two conserved glycine residues in positions -1 and -2. The double- glycine-type leader peptides are unrelated to the N-terminal signal sequences which direct proteins across the cytoplasmic membrane via the sec pathway. Their processing sites are also different from typical signal peptidase cleavage sites, suggesting that a different processing enzyme is involved. Peptide bacteriocins are exported across the cytoplasmic membrane by a dedicated ATP-binding cassette (ABC) transporter. The ABC transporter is the maturation protease and its proteolytic domain resides in the N-terminal part of the protein [1]. This peptidase domain is found in a wide range of ABC transporters, however the presumed catalytic cysteine and histidine are not conserved in all members of this family. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.43 0.71 -4.34 36 3484 2012-10-10 12:56:15 2003-04-07 12:59:11 10 38 1880 2 701 2968 283 127.90 24 22.09 CHANGED +h.hlhQscppDCGlAsLuMlLpa..aGpphslscLRphttsstcGsohhuLhpsucphGhcspulpschs.hh....ppl.hPhIsah....phsHahVlhph....pcspllIsDP..uhGphplsppcFpppao.GhhLhhsssss ............................................................t...hh.Q.pt.-..CGh....AsL.shlh.pa.......a...G...p.......p....h........s.......h...t.........p.......L.........+...........c..........h.........h..........t........h............s............t..........p....G.....s.o....h.h....s.......l......h......c...s.A.c..p....l...G.h.....p....s+.s...l...p....h.shp....pl...........spl..s..h..Ps.Ilah......................phsH..asVl..hph.......................cps......p.....l....h..l..t..D.P......uhu.h...h.........p...h.s..h.pc.F..t.pta.....s.....Gh.h.lhh..t..s...................................................................... 1 198 417 576 +3730 PF00770 Peptidase_C5 Adenovirus endoprotease Bateman A anon Pfam-B_900 (release 2.1) Domain This family of adenovirus thiol endoproteases specifically cleave Gly-Ala peptides in viral precursor peptides. 20.90 20.90 21.20 21.50 20.50 20.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.46 0.71 -4.74 13 133 2012-10-10 12:56:15 2003-04-07 12:59:11 13 1 103 2 0 101 10 176.50 66 88.43 CHANGED hFLGTFDKRFPGFlucsKhuCAIVNTAuRETGGlHWLAhAWpPpSpThYhFDPFGFSDp+LKQlYpFEYcuLL+RSALuuosDRClTLlKSTQoVQGPsSAACGLFCshFL+AFspaPssPMcpNPsMsLlsGVPNphhpsPps.ssL++NQptLYcFLpp+SsYFRpHpcpIccsTuFs+lp ...........YFLGTFDKRFPGFlu.scKlAC.AIVN..TAGRETGGVHWLAFuWNPRSpTCYhFDPFGFSDcRLKQIYpFEYEuLLRRSALA.o.sDRCloLEKSTQTVQGPpSAACGLFCCMFLHAFV+WPcpPM.DtNPTMsLLTGVPNuMLpSPpV.sTL+RNQEpLYcFLtpHSsYFRsHcspIc+sTAF-+h.......................................... 1 0 0 0 +3731 PF03568 Peptidase_C50 Peptidase family C50 Griffiths-Jones SR anon MEROPS Family \N 25.80 25.80 25.90 26.50 24.70 25.50 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.13 0.70 -5.78 18 368 2012-10-03 02:24:44 2003-04-07 12:59:11 12 14 277 0 272 385 4 306.50 25 21.85 CHANGED lphspssssL.....lloR..lssspsPhh.l+lPlp+hssp.ts.t.............................shcchccIlppsppss........pspps+ccWWpcRhtLDp+hppLlpslEcsalusapslh.s..h...psshpphspph.shLpptL.scpt..........hphsphlLclh.......................hhsthpcLhhhhlclLpapt.t........uhsEtchphhphhlc-slschppppspp......ppaslLVlDpplpphPWEslssLps.s.VoRlPSlphLhphhpppcsphp.th.hssp........................shYlLNPsuDLspTpccFcs..hFpphts.....WcGhhuphPop-phhcsLpssDlFlYhGHGuGtQYlpspslp+hpppusshLhGCSSstlt.pGp.hcstGshhpYlhAGsPhllGsLWDVTD+DI ...........................................................................................h....l.+...h.........hh..h.hsh..t..t........................................................hhtph.tl.p.t..s.............sttt+ttWWt.tR.t.LDp.phptll........tph-.p..hhu..sa.pslh..................t.......tth.ph..ltp...l.tpt.....................................................p..h.hp.h.........................................t.h..h.....hphht...........................................shs..t.p...h.............h.t..htt...h.t.t.................................tt.hhLllDpp..lp.h.hPWEshshhpt.....s...lsR..hP..Sl.t.hlhp..h....h..........p......t........t.................................................................................................................shallN.Ptu-.....LtpTp..p...php......hpp.............Wp.uhh.sp........Pst.p......p.......h.ptL..t..pp.....c...........lhl.YhGHGuGtpal.ps.pt.l...t.+.......h...........p...p......p......u....s......shLhGC....SSutlt.tG...........p..h-sh...............Gh.hpYhhAG.sPhllusLW-VTDpDI.................................................................................. 0 103 169 235 +3732 PF03421 YopJ Peptidase_C55; YopJ Serine/Threonine acetyltransferase Bateman A, Mistry J anon Bateman A Family The Yersinia effector YopJ inhibits the innate immune response by blocking MAP kinase and NFkappaB signaling pathways. YopJ is a serine/threonine acetyltransferase which regulates signalling pathways by blocking phosphorylation [1][2]. Specifically, YopJ has been shown to block phosphorylation of active site residues [3]. It has also been shown that YopJ acetyltransferase is activated by eukaryotic host cell inositol hexakisphosphate [4]. This family was previously incorrectly annotated in Pfam as being a peptidase family. 19.90 19.90 20.30 19.90 18.00 19.20 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.14 0.71 -4.41 17 290 2012-10-10 12:56:15 2003-04-07 12:59:11 11 1 201 0 29 190 4 171.80 37 54.49 CHANGED spsLhpYuptshsplpsspt...s.shsphDlchLshLstshNpRhPsLNL+hacSsp-hhpulpp.......pssstuhRsllp.....hstsshH+sAlDl+.+.sG+solllhEPAsh.s.t......lsthsphtpphppphhspschshlEsphQ+S.s-ClhFuLshAl+uapcp.shhcplHcs.tpts ................................ctLpshhptlpsplhcGphh..phshpchDlchh..PhLVsptNp+aPp.........LNL+hhp..Ss.p-..h...spuIKp..............ht.puspStRhlls.....hss....sulHhsslDh+......h.....h.sG..Ks..SlIlhEPAshsshsss...............hLuhc.sc.....s..sh.-pp..p..L...P..cs.....hhuhlEhDIQRSso-CsIFSLuhAKKhhhEt.tshs+lH-cphp.t................... 0 2 8 16 +3733 PF03290 Peptidase_C57 Pox_I7L_G1L; Vaccinia virus I7 processing peptidase Mifsud W anon Pfam-B_4082 (release 6.5) Family \N 21.00 21.00 21.20 281.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.35 0.70 -5.96 11 72 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 43 0 0 60 7 422.30 72 98.86 CHANGED MDRYTDLVINKIPELGFTNLLsaIY..ShsGLshslDlSKFhTNCNGYVV-+a.DcSsTAGKVSCIPluhLLELVcpthL.......stPsstcp...ELslKctLl......spL+s+Ypshp-lhsLP..TSlPltYFFKPhL+EKVSKAlDFSQMDl+sDDL.S+tGlpsGE.NsKlV+lKIcPD+cAWMSNpSIpsLlu.hua.GoEVsYlGQFshpFLNshslaEKh-hFht+phLualL+DKI+....puppRYVMFGFCYhuHWKClI....aDKccphVsFYDSGGN.PsEFHHYcNFYFYSFSDGFNsNs..cs.SsLsNpNsDlDVLFRFFpssF.ss+hGCINVEVNQLLESECGMFISlFMllCshpPPKGFKulRKlYTaFKFLADKKhTLaKSILF.........Nhschsl-lccl-scul+EYpKME+WTpKuIssLssKIos+sNcIlN M-RYTDLVISKIPELGFTNLLCHIY..SluGLCoNIDVSKFLTNCNGYVVEKY.DKSsTAGKVSCIPIuhhLELVESGaL.......S+.P.NSSD........EL-QKKELs......-ELKsRY+SIhDlFELP..TSIPLAYFFKP+LREKVSKAIDFSQMDLKIDDL.SRKGI+TGE.NsKVVKh..KIEPERGAWMSN+SI+NLVSQFAY.GSEVDYIGQFDMRFLNSl..AIHEKFDAFhNKHILSYILKDKIK....SSooRFVMFGFCYLSHWKCVI....YDK+psLVSFYDSGGNIPoEFHHYsNFYFYSFSDGFNTNc..++.SVLDNoNCDIDVLFRFFEsoF.GAKlGCINVEVNQLLESECGMFISLFMILCTpTPPKSFKSLKKlYTFFKFLADKKMTLFKSILF.........NLpDlSL-lsEoDNsGLKEYKRMEKWTKKSINVICDKlTTKlN+IVs.. 0 0 0 0 +3734 PF00851 Peptidase_C6 Helper component proteinase Bateman A anon Pfam-B_326 (release 3.0) Family This protein is found in genome polyproteins of potyviruses. 25.00 25.00 33.40 31.50 21.30 20.50 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.42 0.70 -5.86 19 1177 2012-10-10 12:56:15 2003-04-07 12:59:11 13 27 145 1 0 1300 0 409.10 47 19.54 CHANGED pFapGaspsFhch+s.pshsHsCssshs.VcpCGclAAllsQulaPstKITCppChpphpshstcEhtphlpsphppthphhpsthspFsHlpphLchlppthpspNhNhpshtEIh+lls.spppuPFs+lpclNchLlKGspsosp-htpAoppLLElsRahKNRT-sIcpGuLpoFRNKlSuKAplNsuLhCDNQLD+NGNFlWGpRuYHAKRFFsNaF-plDPucGYs+YhlRcsPNGpRKLAIGsLIVstsLcphRpphpGcsIpp.sloppClSppsGNaVYPCCCVTh-DGpPhhSElhhPTKpHLVlGNoGDPKYlDLPpspsspMYIAK-GYCYlNIFLAMLVNVsEc-AKDFTKhVRDhllPcLGcWPThhDVATAChhLslFaP-spsAELPRILVDHsspTMHVlDSaGSLoTGYHlLKAsTVsQLIpFAsssL-SEMKaY+VG ..................................................................................................................................................................................................................................................................................................................FW+GaspsahphR..h.s.ps.H..p.C.t.s.s.ls.lcpCG.clAAlhspulhPChKITC.pCspchtphstp-.htphl.pp.phpphtp...p...hts.hs....cFt+....hhphLphhcp..h..h..psts..t..sh..phht-lhchls.phppssapplpclNphLlKG.s.ps.Tsp-hppAoppLLEluR..ahKNRp-slccGsl.psFRNKlSuKA.....plN.sLhCDNQLDpNG.NF....lWGpRt.YHAKRFFuNYFE.lDPscGYppa.hRhpPNGpRKLA.IGpLllPhshpphRcphpGc.lpp.sloppC.lSp.psssaVYsC.CCVTh-DGpshhS-lhsPTKpHLVIGNSGDsKYlDLPps.c.sppMYIAKcGYCYlNIFLAMLlNlsEp-AK-FTK.hVRDhlls.cLGpWPThhDlATsCh.LplaaP-stsAELPRILVDHpsphhHVlDSaGShooGYHlLKAsTVsQLIpFspssL-S-hKaY+VG...... 0 0 0 0 +3735 PF01830 Peptidase_C7 Peptidase C7 family Bateman A anon MEROPS Family \N 20.50 20.50 70.50 70.50 20.20 20.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.77 0.70 -5.22 14 61 2012-10-10 12:56:15 2003-04-07 12:59:11 13 3 5 0 0 57 0 157.40 65 35.17 CHANGED MSCLRKPSQSLVLSESVD.....PTT.....VD..PFVDVRAEEVVPTGCMTLWEYRDSCGDVPGPLSHGDLRRLRTPD.GVCKCQIHFELP....TVLKSGSTGTVPEHPAVVAAFMGRPRRCSLEQRTKELDFRFLQLVHEGLPVRPSYMIARPPRPVRGLCSSRDGSLAQFGQGYCYLSAIVDSARWRVARTTGWCVRVAcYL+LLQWVGR+SFGSFQIEESAVEHVYHVI.......VDTEaQSEQDGALFYQAVSDLAARDP .....................MSCLRcPSQuLVLstS.D.....Pso.....lD..PFsDVtsEEVVPpGChTLWEYRDSCGsVPGPLSHGDLh+L+TPD.GVC+CQIHFELP....TVL....................................................................................................................................................................................... 0 0 0 0 +3736 PF03569 Peptidase_C8 Peptidase family C8 Griffiths-Jones SR anon MEROPS Family \N 21.60 21.60 22.20 228.00 21.20 21.50 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.45 0.70 -5.03 2 18 2012-10-10 12:56:15 2003-04-07 12:59:11 8 4 4 0 0 20 0 161.60 79 15.59 CHANGED hARuIGlSptustELVRsT+VsEsKPHLsPMpEAp.s.cQ.L.stRuh.Vssc.tslEls..clPscEGcCa.h.Fp.s.hT.hIhspKP..p-lluhhp.sssThDSL-Io+psthVHh.sG-sapsY+pIhAhLc.h.h.s.p.hlVGAcpspltDYVtAutpFLhp..pWl+NGL+lAtthhpPt.lhK..h.NsShP+sl.psD..YIp LARAIGLSHsAsuELVRAT+VDEsKPHLVPMEEhKEAPRQQLVPRRSTFV-NHEEEVEVDsLRLPTEEGRCFELLFN.NQlTPAIFDKKPLLRDVL-VFEENVCThDSLEISHSD+CVHIVPGETFRNaKEIKAVL+VILWN-P-.ILVG............................................................... 0 0 0 0 +3737 PF01707 Peptidase_C9 Peptidase family C9 Bateman A anon [1] Family \N 25.00 25.00 115.50 114.60 22.30 20.00 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.34 0.71 -4.72 8 257 2012-10-10 12:56:15 2003-04-07 12:59:11 11 7 47 2 0 311 0 192.80 66 8.58 CHANGED DPWIKsLTNsP+GNFTATlEEWQAEH-uIMpuIpspusssDsFQNKsNVCWAKuLlPVLcTAGIcLTsEQWsslh.sF+-D+AYSPElALN.ICTKaaGlDLDSGLFStPoVPLoYc.......ssHWDNpPGG+MYGaNppVAtpLp+RYPhlptthcoGcQlslspt+lpshsscsNllPlNRRLPHuLVspacppcuuchEphlsK DPWIKTLpNsPKGNFpATIcEWcsEHsuIMAuIss+thohDsF.QNKANVCWAKuLVPlL-TAGI+LsccQWSpIl.AFKEDKAYSPEVALNEICTRhYGVDLDSGLFStPhVSlaYt.......sNHWDNRPGG+MaGFN.EsAshLER+YPFh+Gphsh...s+QlsVsTR+IcDasPssNllPsNRRLPHSLVsEH+ss+GpRhEWLVsK........ 0 0 0 0 +3738 PF00413 Peptidase_M10 matrixin; Matrixin Bateman A, Finn RD anon Prosite Domain The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null --hand HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.42 0.71 -4.50 66 3213 2012-10-03 04:41:15 2003-04-07 12:59:11 19 158 1096 316 1273 3093 388 160.90 27 39.87 CHANGED +W.p+.pp....LTYplhsho..sclspspVcpslc+AFpsW.ucV......oP.LpFpclt.......p...s....ADIhIsFs............pspHGDshP..FDGsuG.........sLAHAahPs..........hsGDsHFDsDE...pWThs...............................................................................................................................................................................spGhsLhhVAsHElGHuLGLsHSss.sAlMaPhYp.ht.p......pLspDDlpGIQpLYG ................................................................................................................................................................................................................................................................................................................................................................................h........tt.h....pp....shtpA.hp..h...W.sp.s.................ss..lp..Fppl.............p.tp..........ADI.h....l.t.Fh..............................p.t..t..c.....s......s.........s...............s......a.......-...u......s.s..........................hL..A+.A....ah..Ps..............h..tGss....H.a..-...t...s.c.......a.s.h.s...........................................................................................................................................................................................................p...s..h.s...L..h..h.VA...s...HElGHuLGL.s.H..........o...........s......s..................p........u...l...M.h.....P.h.hp..h..................tLtt-Dlpu..lptlY........................................................................................................................................................................................... 2 289 498 817 +3740 PF00675 Peptidase_M16 Insulinase; Insulinase (Peptidase family M16) Bateman A anon Pfam-B_88 (release 2.1) Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.58 0.71 -4.45 26 11461 2012-10-02 15:41:56 2003-04-07 12:59:11 15 55 4386 192 4038 9720 2217 136.00 20 22.38 CHANGED pVsspps.ssspuslulhlssGSthEspp.sGlAHFLEHhhFpG.Tp+hss.ppltphlcphGGph....NAhTscEpTsYhhcshscs......lspul-hlu-hhtps...thscppl-c........Elptlcsch.....lhctlcssuaptpsLupslhsPt .........................................................................................stht.lhh.t.s.Guh........t-...s.....t....t..p...........s........G...l.A.Hh.l..E.H.............hh..Fp............G......o.....p..p......h......s.....s....t....p.....ht..p.hl.p.p..h.G....u.p.h...........................NAhTu....h....-........p..TsY.h..h....p....s.........s..pp....................................lp..p..s.l.......p....lhs-...h....h..t.p..s...................t.h.s..p..p.p...l.........c..p...............tp.........t....h....h..h..t..E.h.p......h.psp..............h...t.....h.................t.......................................................................... 1 1413 2529 3444 +3741 PF00883 Peptidase_M17 Cytosol aminopeptidase family, catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_990 (release 3.0) Domain The two associated zinc ions and the active site are entirely enclosed within the C-terminal catalytic domain in leucine aminopeptidase. 25.00 25.00 25.00 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.89 0.70 -5.55 17 5342 2012-10-02 19:46:12 2003-04-07 12:59:11 16 21 3337 121 1482 3949 3549 304.00 41 63.96 CHANGED lthuR-LhssPuNhlsPshhAcpApclupshu...lplpVlcpcphcchtMGuhLuVupGS..ppsPphlhlpYpsst.....cspphluLVGKGlTFDSGGhsIK..sussMctMKhDMuGAAuVhushpAluplchs.hsVhsllshsENh.SGsA.+PGDllsshsGKTlEVpNTDAEGRLlLADulsYApp.hssctlIDsATLTGAhslALGsshsGlaoss-pLtcplhpAuppuGEtlWRhPhp-c.YhcplcS.slADls..NhusttG..GuhTAAhFLppFlpp.....WhHlDIAGsshtpptt......GuoGhsVpTLsp ..............................................h..hsRDLsNtPsNh....hsPstlAp....p....A.....p....p.lspphu............lps...c.l.l...sp..cplcchG..hsuhh.....uVG+.G....S.....pp....sP....t....llhlc...............Ypsss...................t........ss...t......lsLVGKGl..TFD.................o.....GGlSlK......Pu.s..sMc.pMKhDMuGAAsVhGshps..lu.p.h.........p..L.....s.......lp....Vhullss.s...ENhsuGsA.............h+P................GDllTshs...........GpTVEVhNTDAE.G......RLVLADuLsY....Apc.....h............c........P..c......h....l....ID.....hA.TLTGAs...h.............lAL...G...p.c...h...sul....h....os....s....-....s..............L...s....p....p..l....htA...............u.ppss-..h..h..W..+L....P.....L.s.-..........p.....a....p.....c.p........l..c.........S...sh...A.D.ls........N..h.....u..u.p.t.s.........Gu......lTA......AtF......L.......p......c.......F.....s............c................p.....................................h.......sWsHl...DIAGsuhpssst.........tG.uTGhsVphLs.p........................................................................................................................ 0 495 885 1213 +3742 PF01401 Peptidase_M2 Angiotensin-converting enzyme Bateman A, Coates D anon Swiss-Prot Family Members of this family are dipeptidyl carboxydipeptidases (cleave carboxyl dipeptides) and most notably convert angiotensin I to angiotensin II. Many members of this family contain a tandem duplication of the 600 amino acid peptidase domain, both of these are catalytically active. Most members are secreted membrane bound ectoenzymes. 19.40 19.40 19.50 19.50 18.70 19.30 hmmbuild -o /dev/null HMM SEED 595 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -13.13 0.70 -6.32 6 802 2012-10-03 04:41:15 2003-04-07 12:59:11 13 14 230 68 446 817 299 468.30 35 85.85 CHANGED DsshpssshssDEAtAphFsEpYspSApsVh.c.stAsWsasTNITsEsu+h..Ecuh.sppas.saGp+AKch.ss.aQNhT...l+RIItpVpsLspAsLPltchppYNplL.sMppIYSsupVCaPN..uoChsL-PDLTNlhAoSRpYscLLaAWEGW+DtVGhslhPhY.castlSNcAhR.sGaoDsGs.WRShYESPohEpDLE+LYQpLpPLYLNLHAYVRRALHR+YGscYINLcGPIPAHLLGNMWAQoWpNIYDhVVPFPstPNlDsTpsMlpQGWssp+MF+.A--FFTSLGLLPhPPEFWscSMLEKPTDGREVVCHASAWDFYNtKDFRIKQCTpVsMEpLssVHHEMGHIQYahQYKDLPVoLRcGANPGFHEAIGDVLALSVSTPcHLHpIsLLsp.ssshEpDINaLhKMAL-KIAFlPFuYLVDQWRWtVFsGphspppYN.-WW.LRhKYQGlCPPVsRspscFDsGAKFHIPussPYIRYFVSFllQFQFHEALC+tAGHpGPLHpCDIYQSpcAGt+LtsshphGhSRPW.EshKslsGpsshsApAhLsYFpPlopWLpppNpRpGEsLGWPEYpWpP ...........................................................................................................................................................................................................................................................................h..hht..p.....h.......utW..ts.ls.t....s..................t..h..t.......t.t.h....................t....p....s.t.t..h..............hps.p.......h+..p.....hthl..t........t.....s...s.l........s......t.c......t..ph...t....pl....h..s...p...Mpt......hYutu...ph.C..........p......................tp.C......h......Lp.....P......p...lpp.....lh...s......pS+.......sh..........pchhasWpuW+ptsG..h.....h....+s.a.chV.t...LtNc..sAp....s.................satsh.uth.W.....p..t....Y.....-......................s..........p..........h...........pp.....l-.pla......p.....plpPLY.....pLHuYVRtpLhptY..G...p..h.l...s........p.G..s.lPAH....L....L.....Gs.MWuptWs..sl...hsh.hhP..................a.s..t.t..s..t.h..D..l.o..........t...t..h.hp..p.s.a.ss.......chap.u-pFFhSlGh......hs....pFW.ppS...hh..p.c....PtD...R..cl..lCH.s.o.A.WDhh.......t........t.....t.........D.......h..R.....IK.Cs..p...ls.h-...phhshHHEhGHlpY...ht...Y..p..p...Ph..hh+pGA.NsGFHEAlG-hhuLSssTPp.......aLp.p.lGLL..p.p........................t...s......................c..............t..p.........IN.......h.L...hp...ALppl.shlPFsh.hh-pWRWtVF.pGp....ls...p..p....aNptWW.ph..+.catG...l.hsP..l...Rs.p.p...hDsuuKaH..lss.s.hs.a.h..............RY..ahuhlhQFQFacuLCpt........u........t.........p.......p.......s...........P.......L......ap.CD.lh...t.sp...tAGph.Lt.phh.phGtSp.....sW.psh.............p.hs..G..p..p...p.......hsspsl..hpYFpPLhpWLpp.pNt.....hGW................................ 0 164 207 346 +3743 PF00557 Peptidase_M24 pep_M24; Metallopeptidase family M24 Bateman A anon SCOP Domain This family contains metallopeptidases. It also contains non-peptidase homologues such as the N terminal domain of Spt16 which is a histone H3-H4 binding module [3]. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.78 0.70 -4.75 656 20523 2009-01-15 18:05:59 2003-04-07 12:59:11 19 77 5099 201 5995 15852 9743 218.60 23 58.35 CHANGED pth+pAsplsspshp.th....h..t.......lc...s...........................G.........................h.........o..-t-lsshlp...h......h...tp.......t..u...........tpth..........ua.....s...sllssG.sss...h..........sHh...hs..........s.tpt................l....pp....G.................................-l..lhlDhGsph.....GYtuDhoRTh....h..h.......G................s.....t.......cp.la.......phlhcup...p........tul.p.tl.+....P.G.....s...........p..hs-lsptspphlp..p.......h......G.h...................................thhh+.......thG.H........GlG......l.plH...tt..............................P......h............l.....s........................................................tt......tth.........Lc.....sGMlhol.EP.................Gha..........h.s.....................................................sh..........................................................................h.Gl+lE-......sllloc .....................................................................................................hh+pAsplss.pshp.th...h.....ph..........lc.........s...........................G.................................h..............o.....ph-...l..s..thhc...ph........h.........pp.................tu..........................hsth.....ua..........s...shlss..u..hN.ss........l...................sHh.....hs....................s..sph...........................l.......cp.......G.................................-l.......l.h.l..Dh..G..s...h...hs..............G.atuDhoRTh...h......l............G..........p.ss.....p.................................tcc.lh...............p.h..s....h....c....uh...................tul....p...th....+.................P...G........s................................p......ht-.l...stthpphhp..p...............t.............G.h...................................................thhtc........hsG.H....GlG............h...ph.H.....-t............................................P.......l.............................................................................t....pshh.....Lc................GM.....lho...l..EP....................ulh............s...........................sh...................................................................................................t.usphEc...slllT.......................................................................................................................................................... 1 2007 3696 5023 +3744 PF01742 Peptidase_M27 Clostridial neurotoxin zinc protease Bateman A anon Pfam-B_407 (release 4.2) Family These toxins are zinc proteases that block neurotransmitter release by proteolytic cleavage of synaptic proteins such as synaptobrevins, syntaxin and SNAP-25. 20.10 20.10 20.40 20.60 19.20 20.00 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.48 0.70 -5.61 13 216 2012-10-03 04:41:15 2003-04-07 12:59:11 12 5 34 118 5 238 0 366.70 42 32.95 CHANGED hsIN.sFNYsDPVsNcsIlhl+...sctsspaaKAFcIssNIWllPERa.aGpsPs...hstssplpuh.s.uYYDPNYLoTDsEKDcFLpshIKLFpRINssssGcpLLshIssAIPahGs............................spT..spFssspcosslshK..p....sspshhhsNLlIFGPGssIl-ssshshp.......sppsucpGFGoht.lpFsPcasasFsss...........p.FhsDPALpLhHELIHuLHtLYGIK.ssshplh.ppps.ahstpts.lphEElhTFGGpDhplIssssppplhshhLssa+sIAp+LNplpos.sss..h.lspYKphacpKYphsp-ssGpasVNlsKFsclYppL...aTEpshustaplpsRppYashcYh.hph.slLssslYsIp-GFNluphshplshsGQNtplNs .....hpIN.sFNhssPVDNcsllhl+...sctsspaaKAFcltsNIWlhPERa.aGps.p...hscs.phcs....uhYDssaLsTssEKDpFLpshIK.LhpRINssshGcpLLphIssuIPa.hs............................pps..tpatthhposhls.p.......sspphhhsNllIhGPGssIh-spsh.ht........p.sucpGhGoht.lhFpP.hsatasp..............FhhDPAlpLh+pLI+uLahLYGIK.sssh.l.hp.ps.h.p.phs.lph.-hhs.GGpDhphIsss....h.shhhssh+shtpphNchpspltss....hN.hK.hhcpKath..sspshaslslshFsc.ap.l....h.p.phspthphhpRppYa.hsas..........c.YsI.pGFs.sphNhplshpspNpsI.s......................... 0 2 5 5 +3745 PF02073 Peptidase_M29 Thermophilic metalloprotease (M29) Mian N, Bateman A anon IPR000787 Family \N 21.50 21.50 21.60 21.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.29 0.70 -5.70 73 2095 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 1554 6 468 1517 492 367.30 34 92.86 CHANGED M....hcpplc+aAcllV.....chGlslQtGpplllpus.hc.stchs+hlsccAYctGAtpVhlpas......DsplsRhhhpt.usc-p..hc..phPpach...pphhccssuhlulhu.pssshLssl-sc+luthp+At.sts.hcthhpth.tsthsWslsshPotuWAp.h.....psp.ul-phh-hlFcss+lDtt.sPltsWcp+scplpphschLNcpph.pL+ap.us.....GTDLolplscp+hWtuust.sp.....hhsNhPotEVFTuPhcpsVsGhVpsspPLsapGsll-shpLsFccGclV-hsA..cpGc-hLpcll-TD-GA+hLGElALlsssu....hshhhaNTLFDEN.....uSsHlAlGpAYstsltsG.t.........tGhNcShlHlDhhlu........usclplDG...tsp..slh+sGsa ..............................................................................................................................................................................hcppLc+hAclll.....phGl.s.lQ.Gpplhl.....pss.....lp.thtL.s.+hlsccAYp.hGAt.Vh.lpap......Dpt.lp+.hh.tus..-p..hp.........phspach.t...t.t.h.h.cp..t..s..uhlulhu......psP.....shhssl-..s.....p+ltthp.psh.utu..hc......hhpthttsphsWslsuhP.s.tWAp.las........ssctuhp.thh-tlFc.ssR.l.s....t..sPl.t.sWcpHsptL....pphsc.hLNc.pphttLH.as...us........G...TDLTluLs..c..p..+..h..W.p.s.us.s.hsstt.......hhsN...h.P.....T.....EEVFTuPspp.........p....ls......GhVs..s....s...p..P..Ls...Y..s.G..s..l.I...-.s..hplpF..c..cGcIV..-hsA................cp.G..c....c...hL......c.c..l.............l............p..s..D......E.............G.............A....+h.L...GElALVsssSP.....Iuppsh.....h.Fa.....N....TLFD......EN.....AosHlAlGs...AY..s.......h.......s......h............s......G.t.......p..h.......s.......c..p..........h.......t...tGhN.p.S....................l............HhDaMlG............osch.s.lDGh.tsGp...lh+sGpa..................................................................... 0 181 345 417 +3746 PF01432 Peptidase_M3 Peptidase family M3 Bateman A anon Swissprot Family This is the Thimet oligopeptidase family, large family of mammalian and bacterial oligopeptidases that cleave medium sized peptides. The group also contains mitochondrial intermediate peptidase which is encoded by nuclear DNA but functions within the mitochondria to remove the leader sequence. 19.80 19.80 20.00 19.80 19.50 19.70 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.57 0.70 -5.45 29 8043 2012-10-03 04:41:15 2003-04-07 12:59:11 15 23 4148 12 2004 6381 1947 390.10 24 63.94 CHANGED hhhpspscplR+psapAhhpphtp.t.....h..stllpchlph+......AclhsasoatphsLts.hspsspsVhsFLtclhpcttPhhcc.hphhpphh.tt.......hshp...clpsWDh...........as......lspp.hp.aFsh.ppslp..................................tpsapp..............ptshhuphalDhasRcu.KtuG..Aashshhst...............................................shshllhNasp.................shs-l.pTLFHEhGHuhHshhoppp.shhssh.sss.....h-hsElsSph.Ethhh-hthhthhs..chpstts...................l.pchlpphh........h.thhRQlhhuth-hplHpttp.st..s...h.c..atplppchhs..hssss.......sp.th.s....ths.l.....thYsusYYsYhaupsh..usshapphhc.s...s.........chhpphLptGGStsPh-hlcphtsc.spscshhcshsh ...............................................................................................................................................hh.spspplRcphapuhh.pthp......p.......................t..............p.........h...sthh.pphl.ph.+tth.....................Ap...lhGap...s...h.t.....p...h.....t...L..t...s.............s...ps...s...pp....l..h..s.h.L...p.l.h.p.p.t....h....sh...h.p.c...ht.hpp...................................hGhp.......plp..aDh............................as..................hsp.tt.h.c......h.hsh.pp.....s.hp...............hhh..t.h...sh.........................a..p..........................................................ptth..............sphal..Dha.............s+..p..u....K....pu.G....Aahs.shhst..............................................................................s.hsa..l......l...h...Nast.....................................................shs-............l.hTL.hHEhGHuhH.p.h...h.o..p..p.p......s..h..sss...sss.................hshsEhsSph.....Eth....h.hc....h.....h.p.h.h........c..h.c...p..t.t..s................................................................l..pp.hl...c..p.hh.................................h.thh.cpl..h.u..hF.-hpl..Hpt.........pt....s.t..............t.............lsp..hhtp..lpp...ch..hs........sh..............................s.p..thsh.........tau+l..............thY..t..s..s..a..Y..s.........Y..h..a..u..p..sh.......ussh.a.p..t....hh..c....p.u.....s.........................h.......ch...h.p...hL.p.t.GuSp.p.s.h-.l.h.c.t.h.t.s.c..sp.cshhpt...h......................................................................................................................................... 1 652 1241 1669 +3747 PF02074 Peptidase_M32 Carboxypeptidase Taq (M32) metallopeptidase Mian N, Bateman A anon IPR001333 Family \N 19.90 19.90 19.90 19.90 19.70 19.80 hmmbuild -o /dev/null HMM SEED 494 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.46 0.70 -5.87 3 1205 2012-10-03 04:41:15 2003-04-07 12:59:11 10 5 1118 17 345 1078 910 472.10 36 95.91 CHANGED MphcsshKElL-hhRRIuaLGcAsALhuWDpRTshPKcGpt-RAcuhGpLSsLlaEltTcPRhsELlEKlcGp..hEDLsEDs+ssVRlh+RpYEcsKuIPEchhKElupssSKAETAWEEAKuKDDFStFpPYL-+LIuLAKR...hlpYlG.YpE-P.....YDALLDLYEPGlRsRDL-pLFcELccuL+PLL-+ILuSG++PDsusLcK.+YPKEtQ+cluLalLQchGYDL-u.GRLDsTAHPFsTslGhGDVRITTRY-EcDFRsAIFGTIHEhGHALYEQslDEAahGTPlu-GASLGIHESQSRFWENlIGRSKcFWEhaYP+lKEsh.sphcDloLEDFahAlNtV+PSFIRVEADEVTYNLHILlRaELE+AlFSsEVplEDLPEhWN-KYccYLGIsP+TY+EGILQDVHWAGGsFGYFPTYoLGslYAAQLatKhpEDLP-FEsLlARGEFcPIKsWLREKIHtaGSRa+P+-LlK+ATGE-lNAcYFlRYLcsKY .........................................................................h.......pphh.phhcc...lp.t...hp.phhulhtWD.pT.hMP..p.tG.s..ptRu-shuhLush....haphhT....s....sp....hs....ch....lp..ph...c..pp..............p..p.....L....s.........p.....h......p.....p....t....slcc.....h++p..a.ppsptlPtphhpthsthsucucpsWcp.A.Rt.p.s.Das.tFtPhLpcllchp+c.......hhph.h..u....h.t..p..s..s.........................YDsLL-.YEPGhTsppL-plFspL+....ptlssLl....p.pl.hp.p.....t.....p....p........p...s...s..h..lp.t...p............as.......tcpQ.cphsh..pl.h.c.hl.GaD.Fsp.....GRL.Dp.osHPF.ssu.ls...s.DVRITTRY.sEs.-FhpulhuslHEsGHAhYEQ...s.l...s...c...p...ht....s.o.P.l.....u.........putSMGlHESQSLFaE..lGRSpuFhphhhsh.hpcth....sph..s...s..h.....s.h..-..shacthscV.c.su.h.IR.l-ADElTYs..hHlllRYElE+tLhsG...plpVp...DLPphWNcKhpp..Y.LGl.p.s.p.s.c.tp..GsLQ...DlHWosGsF.G.YFPoYsLGshYAAQhhpshpcsls...........s........l..-s..hl.tp..G-.hsslhpWLpcpIapaGsh.h.s.s.p.-Llpcu.T.G.E.s.LNspahhcaLcp+Y............................................................................................................................................... 1 111 208 279 +3748 PF02102 Peptidase_M35 Deuterolysin metalloprotease (M35) family Mian N, Bateman A anon IPR001384 Family \N 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.12 0.70 -5.80 4 234 2012-10-03 04:41:15 2003-04-07 12:59:11 10 2 67 1 138 280 1 281.40 31 91.47 CHANGED MRhTsLuoAlLALAssslAhPspos.uPsLDVoLoQVDNTRIKAVVKNTGuEcVTFVHLNFF+DuAPVKKVSlaRp..................ssEV.FpGIKRRhpopGLoc-uLToLusGEolEDEFDIAoToDLopGGslTIRocGFVPIssDsKlTGYlPYpSN-LplpVDuAKAAsVspAlK.LDRRTKVssCsGoRpSALoTALpNsspLANtAAoAApSGSuS+FpEYFKTTSppsRosVAuRhRAVA+EAuSsSSGSTTYYCsDsYGYC-oNVLAYTLPupNlIANCDIYYSYLPALspoCHAQDQATTTLHEFTHAPGVYSPGTDDLGYGYcAATuLSuSQAL.NADoYALaAN.......ulclKC .............................................................................................................................................................................................................................lpl....p.....l.......................t.t.......hps.lpNtutp..hp.hhp.h.t...h..s.t....spph.l..t.t..............................t.t..h..FtGhh...h...ttl.........p..h..l.sttohp.......hs.hA.h.ph..t....t.....h..h...........t...s.......h....h..h.............t...........................s.......h.......h...ss..ht...h.t.s.............s.................................h..................................t........c.Rs..p......h..s..s.......C.s.s.s.ptstl.psALpssupLAptAtp...A...s..p...s...t.s....s...phappaFKoss...sphp.phVtspF.ptlupEs.sps.s.sups.ohhCpD..s.h.s...h..C....p.....s.....s........s..lAYT.....l..s.s.......p..s.....ls..C.Pha.....as....p..hP..s..h......o.ppC...p...u..t...D.......Qs.T....ohLHE.hoHh.thh.......tsp...Dh..u.YG.apssppL...su...s...p...ulpNADoYulFAps............hhht.................................... 0 38 76 114 +3749 PF02128 Peptidase_M36 Fungalysin metallopeptidase (M36) Mian N, Bateman A anon IPR001842 Family \N 23.30 23.30 23.30 24.00 23.00 22.90 hmmbuild -o /dev/null --hand HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.28 0.70 -5.62 31 458 2012-10-03 04:41:15 2003-04-07 12:59:11 10 28 290 0 202 440 80 321.30 31 49.08 CHANGED AsYpVYPaslssPsc.......GsRsllssP.hss..hASPauWpsssss...paTs...TRGNNshA.ps..ssussth.....sshpPsu.....ssLpFs.aPashshs.Ps.........sahsA.ulTpLFYhsNlhHDlhYphGFsEtuGNFQhNNhGpGGtGsDhVlspAQDGSG..................hNNANFuTPPDGpssRMpMYlWstss.................................................................................................................................................................PpRDGsF-uGIllHEYsHGlSNRLoGGPuNosCLss.p............ouuMGEGWuDahuhslplKsuDTtssshshGsasssps...sGIRsasYSTshshNPhTYssl.......sshsss........HulGsVWAohLaElhWsLIDcaGhss.....sha.........tsG.....GpplAMpLVlDGMtLQPCNPsFlpuRDAILsADhslssGtNp.......CpIWcuFA+RGLGhuAp ........................................................................................................................................................................t.........................................................................................................................................................................................................................................................................................................................................shp.....................u...........ttD.h.h...s....ts..................hN.NAshtss...s.D...GpsschphY.hsttt..........................................................................................................................................................................s.hD..suh..-AuIlhHEYsHulps+hssG.uss...t.t...p................uGuMGEGauDahAs.sh.ph.................p......ss....s.p.......t.......t..s.......s.......h.......s....h..Gp.......ass.....s..........s.....................s..........s..lR...p.h....sh........ss.......p........s..N..s........hsa.ssh...............pths.pl...................HshG....plWushLa-hhhshlp......p.h.Ghs...............s.h...........................................Gp.lhhpllhpuh.h....ss.......s.spFhpu+cAIhpADthh...hs.....Gtpt........s.l.chaAtRGlu....h........................................................... 0 108 146 176 +3750 PF01447 Peptidase_M4 Thermolysin metallopeptidase, catalytic domain Bateman A, Griffiths-Jones SR anon Psiblast P06142 Domain \N 22.20 22.20 22.20 23.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.20 0.71 -4.10 13 1948 2012-10-03 04:41:15 2003-04-07 12:59:11 13 69 1073 112 314 1608 50 143.00 32 26.78 CHANGED sssstsGctVhGc......tp.lsls+sust..ahLpDsTctss..lpoYsussp........hohsuo.......lhpssssshs.....s.tuuuVDAHaaAthlYDYY+shasRsSlD...spGhplpSpVHY.....G..ssYNNAaWsGspMlYGDGDs...sFhPLSuslDVVuHElTHGVTEp .......................................................................s...thGh..h.st........sht.hst.sst........a.h..-.s.p...t....t....h............................tshshppt.................sh..st..................hh.p.sp.spshs.........tps...susD.Aa...a.h.u.stsa.Da.Y+...sh.asRsSlD.......spGhtlhShV...Hh..........u.....ps..a..s.NA..F......W.......s..G.....s...p..M..saGD.G..-s...............h..Fh.slsuulDVsuHEloHGVTp............. 0 95 182 266 +3751 PF03410 Peptidase_M44 Protein_G1; Protein G1 Finn RD anon Pfam-B_4417 (release 6.6) Family Protein G1, named after the vaccinia virus protein, is a glycoprotein expressed by many Poxviridae. 25.00 25.00 131.40 131.30 20.90 20.50 hmmbuild -o /dev/null HMM SEED 590 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -13.06 0.70 -6.26 3 79 2012-10-02 15:41:56 2003-04-07 12:59:11 8 2 49 0 0 69 0 519.90 70 99.81 CHANGED MIVL-NGVRVFINsuMsKDIYlGlSNFGFENDIsEILGIAHLLEHILISFDuTRFVANASTARSYMSFWC+uIRuc.TplDAlRTLVSWFFu+GuLKDDFSlS+IRaHIKELENEYYFRNEVFHCMDILTFLuGGDLYNGGRLSMLDpL-sVR-lLscRMRRIuGPNVVIFVRELuPusLuLLspTFGoLPACPpTIPuTlhsSIsGKsVMsPSPFYTVMV+V-PTL-NILuILCLYEoYHLVDYETlGN+LYVTFSFIHEpDYEuFLRGsGcLplo.spRIcLNYSDDYlMNlYLNFPWLpHDLaDYLTplNsDosSlLpSLEc-IYpSVRsRDlIVVYPNFSsoMsNTRDRQpH+lVVLDs.NluhuupPsRSIcLMKRQT+NElFIRYGDAuLlDYVsLALu+.RchuL+Rps+GIRl+HuFSADDI+sIMESDTFlKYSRSKPAAMYQYIFLSFFASGNSI-DILp+RESlVcF.uR+sKNKIVFGKpARYDVsTKSSFVCGIlRGPpLocsuLT-hMW-LKRKGLIYSLEFTcLhuKNTFYLFsFTIYPE-VYcYLuopKLFSu+ClVVSpKG-sEDFSSLKKDVVI+l .............MIVLPNKVRIFINDcMKKDIYLGISNFGFENDI.DEILGIAHLLEHLLISFDSTpFLANASTSRSYMSFWCKSIput.opsDAIRTLlSWFFs.N.G.KLKDNFSlSsIRhHIKELENEYYFRNEVFHCMDlLTFLuGGDLYNGGRIsMl-NLshVccMLsNRMpRISGsNIVIFVKcLssuTLshhppTFGoLPuCPElIPss.hs.sossGKIVMhPSPFYTVMV+lsPTLDNILuILhLYEsYHLIDYETIGNQLYlTlSFIDEo-YEuFLRG.ul....LplupsppIs.hNY.oDDYhMNIYLNFPWLuHDlaDYITcIN-cocslLhSLps-IYsSIlsRDhIVIYPNFSKuhhNocDsQpH.IVVL..................Ds.....sN.....c..s..hh.....+........+...P.......h.....psIs...........LMK+.h..T.s.NEIaIRYGDASLhDhlTL...uLSh....pDhsL...+..RssEGIRl+HsFSADDIpAIMESDoFLKYS+SKPAAMYQYIFLSFFASGNSI-DILsNR-ST.Lch.uK+s.KsKILFGRNTRYDlssKSSFVCGIV+GKsLccsoLs-hMW-LKKKGLIYSMEFTsLhSKNTFYlFsFTIYsDEVYcYLsssKhFou+CLVVSsK...G...-lEsFSSLKKDVVIRl........... 0 0 0 0 +3752 PF01427 Peptidase_M15 Peptidase_M45; D-ala-D-ala dipeptidase Bateman A anon Psiblast Q47749 Family \N 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.35 0.71 -4.70 5 2743 2012-10-02 01:02:30 2003-04-07 12:59:11 12 9 1142 6 295 2325 328 113.20 39 86.22 CHANGED cssGhlsLcpllsDlchDuKYATtDNFTGK..shYpssRsLucc-sApuLtcApulAsspGhsLllWDuYRP+sApsshhtWsAsPEsshst-sthssIp+sth.s...suopSoHSRGoAIDLTLh+sDpGpLVDMGocFD-McERSHssAsu.VussuspNR+..........pLRuIMEuuGFpuYSuEWWHFcLsDps....Yscsah-FsVs ........................................................................................................................FTG+..l.............ssY.....s...sR.hlhp.csh..s...sALtpA.pp..p..A..p..s............G..as.Llla.DuY....R.P.QpAs....p......t........h......h......t.....W.......s.......t......p..........P.................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 109 194 256 +3753 PF01435 Peptidase_M48 Peptidase family M48 Bateman A anon Swiss-Prot Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null --hand HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.52 0.70 -4.79 21 10571 2012-10-03 04:41:15 2003-04-07 12:59:11 13 115 3867 4 3276 8591 3845 215.40 20 61.15 CHANGED hhhlhsshhhhphst.hhtplhtph............stppsplcphlpclApps....sh.....hclhVlcssp...............sNAFshstup....lslpsuLl....pt......................................hscsELtAVlGHElGHhttpHshhp..hhhuhhhslhhhhshshhhs.thh.tttt.......................hlhthhhhshSRppEhpADthuhph........hhpshhp.t..........hshhtplpp.......................................ttssssshhsthhpTHPshspRlpt .........................................................................................................ht.................................t................................t...t.......l.h...p...h..s.p....p...l...spps..............s..h.s...............p..l...h..l..h.p..s.sp............................hNA........F......A......h.......s......s.....s..........p...........................l.s.l.psG..Ll.....pt.............................................................................................hsc..s..E..l.tuV.luHEluHlt..ptchhpt...........h.h.......t....s...h......h...s............s....h......h....h......h.........h.......s........t.........h......h..........h.........h.......h.....h......s........t......h.....t.........t..s....t............................................................................................................hhs.t.h..h....h..t.h...S...R...p....p.E..hpADthuhpl...........hhpuhhp.psh.................................phh..p.p...h.tt.......................................................................................h....t....s......t....t...h.......s....p.......h....h...t......o..HPs.ppRlt........................................................................................................................... 1 1028 2092 2791 +3754 PF03571 Peptidase_M49 Peptidase family M49 Griffiths-Jones SR anon MEROPS Family \N 19.10 19.10 19.20 19.20 18.80 18.70 hmmbuild -o /dev/null HMM SEED 551 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.95 0.70 -6.44 27 706 2009-09-11 00:19:35 2003-04-07 12:59:11 10 9 421 5 366 722 86 347.00 24 67.47 CHANGED spl.appspch............lYshcsptt..LGa.spsshosYYs..........sslocp-h-hls.chhpspsl...s.NTRlhK....sucs......aplhlASsppsppst.h..............phpGpp..lplshGDast.hp+lsppLcpAppaAAN-sQpcMLctYlcpFpoGshpsHKcuQ+hWlKDhuPhVET.IGFIETYRDPtGlRuEaEGFVAhVNK-pot+FspLVssApphlphLPWsps..........aEKDpFh.PDFTSL-VLoFuuSGlPuGINIPNYDDlRps.GFKNVSLGNVLuusspspp.....lsFls-cDpclapKapspuFEVQVGLHELLGHGoGKLhpcstsGpaNFDhps..ls.lssc..l...soaYcsGETWsShFGsluuuaEECRAEsVulYLshpc..-lLcIFGhpssp.....-tccllassaLpMlcuGL.huLEaasPcsc....KWsQAHhQARFsIl+slLc..tspshlclppsp.sp...hsslplclD+S+I..osG+pAlpcaLt+LplYKSTu-hcsGpchaschosVs-p....ahchR-lVlt..+KpPR+halQuNThlss........spVp.lh-Y-po.tGhIpSalER.. ...............................................................................................t.............................................................................................................................................................................h.h........G...a.t..htphs..lppAtt...hutsttptthlt.hhp.atpGs.ptacp.phhWl.pphss.l-.......GFhEsYtDP.h.G.h+.upaEu.hlth.....hs........t.ottht.hs..tput.h....Phs.t..............ac+..p..h.....t..s..shpslps..h...h..h...uu.....s..........h.....P....h....GI.....NlP...............N.........p.lRtp.G.Ks..VpltNlhtsh..t...........h.l.p.....t.....t..p......th.h..p.h.t....s..lp...sshHEh.hGHGsGpl......................................................................................................................................................................................................................................................................................................................................................................................s.................................................................................................................................................... 1 147 232 311 +3755 PF02868 Peptidase_M4_C Thermolysin metallopeptidase, alpha-helical domain Bateman A, Griffiths-Jones SR anon Psiblast P06142 Domain \N 24.40 24.40 24.70 24.40 24.20 24.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.14 0.71 -4.50 113 1990 2012-10-03 04:41:15 2003-04-07 12:59:11 10 77 1088 114 328 1719 59 152.70 37 28.88 CHANGED usLhYpspSGALNEuhSDlhGshlc.ahttt.........t......t..........DWllG--lhp........t...G..suLRpMssPsps.........sspssphs........sh.....................tD.....sssVHhNSGlhN+AFYLlupu...........................hsGhuhcKAtcIaYcAhsh.YhTsso....sFspu+suslpAApDL...aGttu..tphpsVpsAassVGV ...........................................usL.YpspSGALNEuhSD...lhGhhlc.a..tt...................t...............sWhlG-.-lht.................t.u....suLRsMpsPsp.................tsps..schsca..................................ttDsGGVHhNSGI.N+AhYLlupu.............................t.u.l.GpcKs....tpIaYpA.s....Y....ho...s..so....sF.......pps...+suslpuA..p..-L........YG........s.......tpspsVtpAassVGl............................................... 0 104 192 278 +3756 PF04951 Peptidase_M55 D-aminopeptidase Bateman A, Rawlings ND anon COG2362 Family Bacillus subtilis DppA is a binuclear zinc-dependent, D-specific aminopeptidase. The structure reveals that DppA is a new example of a 'self-compartmentalising protease', a family of proteolytic complexes. Proteasomes are the most extensively studied representatives of this family. The DppA enzyme is composed of identical 30 kDa subunits organised in a decamer with 52 point-group symmetry. A 20 A wide channel runs through the complex, giving access to a central chamber holding the active sites. The structure shows DppA to be a prototype of a new family of metalloaminopeptidases characterised by the SXDXEG key sequence [1]. The only known substrates are D-ala-D-ala and D-ala-gly-gly. 25.00 25.00 33.80 31.10 22.20 22.20 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.55 0.70 -5.18 41 469 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 424 5 164 444 48 259.60 34 95.97 CHANGED MKlaISsDhEGlsGlsshpplps.......usspYp+uRclMTp.......EsNssl-uAhpuGA....s-VlVNDSHusMsNL...lh-clcs.....cspL.lpG.ps+shuMhpGl.-puhDushFlGYHu+AGs.p.GlhuHThsutshpplhlNGptlG.EhslNAhlAGtaGVPVsLluGDDh.htpEs..cthhP.ssphlslKcuhu+huuhshosppspptl+puscpAl..cptpph.....pshphssPsplclchpssuhA-hsshhPslERl-usT.Vcapupshh-shpshpsl ........................................MKlaISsDhEGluGls.s.hppsps..........sst..c.Y.pcsRchMTp.......-ssAslcGshp.u.G.u.......sEllVsDSHus.....hp.NL...hh-p...l.ct.....Rspl.l..pG..ps+.shuMhpGl....-.p.u.hDulhFlGYHAtA.G.s.c.GlLuHThsusshtplhlNGhthu.EsslNAthAuchGVPVsLloGD-s.h.tc-s...cthhP...psthVs..lKculu.p........huuhshsPppspptIptusppAl..ppttph................tshp.h.s.....s.PhplclphpssshA-.hhshhPslc..Rl-..u.pT..Vpapupshhcshphh.s.............................. 0 54 107 134 +3757 PF02031 Peptidase_M7 Streptomyces extracellular neutral proteinase (M7) family Mian N, Bateman A anon IPR000013 Domain \N 25.00 25.00 26.20 25.30 23.40 22.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.66 0.71 -4.44 4 82 2012-10-03 04:41:15 2003-04-07 12:59:11 11 2 55 2 34 84 5 131.00 48 61.41 CHANGED AVTVsYsASsAPSFpoQIApusQIWNSSVSNVRLptGSs.ADFoYhEGNDsRGSYASTDGHGRGYIFLDYpQNQQYDSTRVTAHETGHVLGLPDHYSGPCSELMSGGGPGPSCTNsYPNSsERSRVNQLWANG ...........sTlhYsuSpAssFcutIspustIWNuS..VsNV+Lt..p.u.os...Achs.h.h.t..s.s.cspu.ohAsssGt...GpG.hIaLs.h.p.t.s.Q.t.YssTR...lsAHEhGHlLGLPDpYsG.PCSpLMSGuusGsSCTNshPsAsE+uRVpphaA.G............. 0 11 25 33 +3758 PF01457 Peptidase_M8 Leishmanolysin Bateman A anon Prodom_3085 (release 99.1) Family \N 19.30 19.30 19.30 19.30 19.20 19.10 hmmbuild --amino -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.83 0.70 -6.16 8 1086 2012-10-03 04:41:15 2003-04-07 12:59:11 11 33 163 1 602 1111 26 328.90 21 71.05 CHANGED H+CIHDtLQARVlQSVAtQ+hsPuuVSAlGLPYVosssh...tpAsssDauhusuoo..VsRAAsWGsLRIsVSsEDLTDPuYHCupVGQplsNHtGslssCTAEDILT-EKRDILVpaLlPQALQLHs-R.LKV+QVQGcWKVTGMssslCucFKVPssHlTs.....GVoNTDFVLYVASVPSE.....uVLAWAsTCQVFuD.G+PAVGVINIPAAsIsSR..YDQlsTRVVsHElAHALGFSs...sFFcss.GIlppVoslRGKsa.................................sVPVINSsTVVAKAREQYGCsoLEYLElEDQGGuGosGSHlKhRNApDELMAPsuu..AGYYoALTMAlFpDLGFYpAcFopAEsMPWG+ssGCsFLocKC....MEcNITpWPuM.FCN.......copsshR..CPTsRLsLGoCslssYpssLPsYaQYFT.........sssLGG.SsFhDYCPallsau..........sGuCsQcsSsAsshhctFNVFS-AuRClDG..sFpPKsssu..hls.YsuLCANVpCDTAs+TYSVQVhGuoGYssCTPGtR.lcLuTVSsAFpcGGYITCPPYVEVCQuN .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t......t........sDh..hhhh.......................................hu..A.h..C..............................................+.....PhhG..h...h.....p.....h.......s..............t........h............................t................................h......t.....h......hh......HEhhHsL.........G.Fs................hh...t.............................................t.......................h...........t..t.....................................................................................h.hl......o.s.p......sh.t..h..s...+pha.........s.........C.....s.........h.........p.........t........hp.lE..s.........t..G...u...t....u....o....h.h.o.....Hhct.+...................h..sEhM.s.....s................................s.......ts.........h...h.........ot...h...T...hAh.h...p.....Dh.....G...a.Y.p..s..s....h..s....h..s..p....h....................WGpt...uCt...h..h...t.t.C........................................................as......................................C....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 425 463 569 +3759 PF01752 Peptidase_M9 Collagenase Bateman A anon SWISS-PROT Family This family of enzymes break down collagens. 22.10 22.10 24.70 27.80 19.40 17.40 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.98 0.70 -5.51 7 608 2012-10-03 04:41:15 2003-04-07 12:59:11 12 12 306 3 82 554 4 282.40 35 35.67 CHANGED hpplhtth.hG....uppDplWLuss-hhpYYA..shpt.s..lsttpt-LAt+lhP.pa.CpssuhIcupphosupsApuCclhtsK-ttFHpshpsspsPVtDDtssplcVslFsssssYhpY.hah.......FspsTsNGG.YLEGNPuchsN.spFlAYchtphs.Dl.lhNLpHEYsHYLDuRFspYGoFscshtcuphlWW.EGhAEYhHYpQG...hpAAhphhspG.chsLSslhsTT...a......SpDosRIYRWGYLAVRaMhE.pHspDspohLshoR.G.pas.au.tsthhs.hYss-athW ........................................ppl.phs.ls....ssp.cs...hWLhss.ulaYsuph..uphc.s.shs..thhp.Ah+hhPhhtpp.hhsAh.phps...hsupstsu.sslshpc.cc.t+pthh.s+s......shD.Dsshsl+sushsoc-ch++L.haA......pFt.psssNst.h.cGNPsDlh...shshasu.-..c..a..p.h....N....c.....hhh..scphshYl-u.h...spahoacR.T.s.c.p............S...............h...............hsh.p.hhtEas..HYhQG....pht...ssu.lh.....up.....G....c....h....h....s-..hhshh.pt.......usc.osplh.ht.lh..sth...-.....t....p...p.ssphhhh.u+h.G......pa-hYshths.h.u..hYsppapha................................... 0 22 36 62 +3760 PF00768 Peptidase_S11 D-alanyl-D-alanine carboxypeptidase Bateman A anon Pfam-B_864 (release 2.1) Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.57 0.70 -5.10 12 8726 2012-10-02 21:13:33 2003-04-07 12:59:11 15 33 3497 44 1522 7430 2600 235.10 31 61.21 CHANGED sspssstP.........plsApuullhDhsoG+lLYppsscphhs.ASlTKlMTuhlVhcAhpttplc.sshVslupssathst....tsSphhLc.spploV+-LlpulhlsSuNcAslALA-hluGup......csFVchMNstAcpLGl+NT+F.sspGLsscs...............hSoApDhAlluptll+t.h.pphphspcpphsFp..........phshhNpNtLlhpps..lDGhKTGaTstAGasLVuoAscs.shRlIuVVhsAps .........................................................h............pl.sA.p...u.h.l.l.h.Dh...s...oG.....c...l....L....h........pp.....N....s.c.p.thsPASlTKlMT.uhllhcul.....p.t.....t.......p......l..................s..h......s...s...h..V..s....l.o.p...p.u...h.t.t.s...................sS...ph.h.l.c....s...G.p.p.lolc-LlpuhllpSuNDAulALA....-....t....l....u.....G..o.p.....................................p..sF.l..ph.M.Np.p.A.c.p.LG.h..p....s.....T.+.F.h....s........s..s........G.L.....s....s..s..s.t....................................hooA.cDhAllu.pthl.....c.....p......h....s...p...........h....s....h.....t...p....p..p.p...h...s...a.s..............................shp.h.h.....N......p......N.....t.........L...l..........h............p......t.......s.............................s................l....D....G...h.....KTG...a.T.stAG..as..L.lu......o.......A......p.......c.......s.......s.......h......R..l..I.s.VVhsu..s.................................................................................................. 1 486 949 1228 +3761 PF02113 Peptidase_S13 D-Ala-D-Ala carboxypeptidase 3 (S13) family Mian N, Bateman A anon IPR000667 Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.31 0.70 -5.98 9 2907 2012-10-02 21:13:33 2003-04-07 12:59:11 10 7 1969 75 742 2721 1248 317.50 25 88.36 CHANGED LstslsshltD.PpL.Gu.sGlhlpcssoup.lapapusp.hlPASstKLlTAsAALhsLGssacFoTcVhssG....tshpGsLhlhGuGDPTLssps............lsslscp....L+cuGVpsl.phslhlDsSlFsu.shusuW..sD.s.sasuP.sushl......DsGph.splss..t.Gp....p.ss...h...t......ttusts.........hs.pphhlpGsls.t...h..shsVpssuthAuchhpcpLtttGlphsuslshssss...puupsLAstpSsPL.clLppMhKpSDNhhAEslh+tluhsh.ptPuoapsussuVpptL.sphGlDssshhLtDGSGLSRpshloucTlsplLpshs..ppsshpshlssLPlAGts.....GTLpsRht....spsssGhl+AKTGoLouV.uLuGa..........lpspsGchlsFuhl.N...hsstsspshcsshsthtspl .....................................................................................................t.....huh.l.p.ht.....s..s....t....h..h...t.h..p..sp....h..PASs.Klh.Tsh..A.Ah.........L....s.s..s..ap..h....p....Tp.l...........h.................p.s.....................................tu..........sl...hh.h.hsuDP..h..htt...tp....................................h.....t..h.h.tt...................l.p......t.....t........G...l...p.....p.....l...............p.l..h...h...Dt..oh....a.........t...........t.....h..s.......s...........h...........t..s..............................s........tsh.h..............s.shh......................................................................................................................................................................................................................................................................................................................................p....t...h.hthh.t.ht.t....s....h.t....h...................s........t........h......h.......t...........t..s..................t...sp..........l...s...t....h..........p..S....s..l........p..llp.h.p.SsNhhA-tl..h....h..l.............u..................h.....................t..............................s...........s.......h........t......t..u.....t...s.........l......p.p.hl....t......p..h..G.l...s...h...s.......s.......h.......h..h..D.GS.....GL.....S....pt.......shlsst..hhph.Lthhh.....................pp.........................h..h...p...l.P.lu.Ghs...............................G.olp......R.ht.......t....h..tu.ltsKTG.o.............L......p.....s...........l..........................sluGh..........hh....s.t...s.G.p...h.hhs...h.st..............................h............................................................................... 0 232 506 664 +3762 PF02129 Peptidase_S15 X-Pro dipeptidyl-peptidase (S15 family) Mian N, Bateman A anon IPR000383 & Pfam-B_2704 (Release 7.5) Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.90 0.70 -4.98 48 2545 2012-10-03 11:45:05 2003-04-07 12:59:11 13 23 1647 54 719 8025 2907 265.50 22 43.65 CHANGED DG..scLtsclhpP......psstslPsllptoPYt..tpp.hssshthtpst.....................................................asscGYssVh.-sRGptsS-Ghhsst..........s.pEspDstssI-Wlss..............p.sWssG+VGhhGhSYtGhhshtsAs...............sss.uL+slsstsu.....hsshas..hhhpsGshttsshhsh........................sshttthhupthsstphhpthht................htthhp.hhtchcpp.......................psshssaWpspsh...p....ph...stlcssslhspGhtD.shh.psshphapsLpsss......................+LhlushsHst ...............................................................................................................................DG...shlts..slhp....P...........s.............ts.......tp.hP.....s..l..h..p..t..s....P..Yt........ps....h.h.p...p.h..s..h..h..t.h...........................................................................................................................................................as.s..c..G..Y..ss...lh...s..s.....s......R.....G......s......t........p........S....p......G..h.h.ssh.............................shp..E..h.....p..D..s..h...s.l..I...-......W.l.ss............................................p...s...W......s......s...G.....p.....V.G.h..h..G.hS.Y.h.G....h..h.p..h....t..s...A..s...............................p..s..s..s.....s...L....c..s.....l...l...s..h...su.........h.s..s...h......Y...p..........h...h...h...p...s......G...h......ht....s......s...h..h..h...h..................................................................t...h...h....t...h..h........s...p....t.........t......t..h...h.t..............................................................t...htt..ptt...............................................p......s...t..h....s...p....a..W......p...p...t.sh.....................h....................ppl...p..ssshhstG...h.tD.shh..t..tsh.ph..a..p.tlttt..............................phhh.t.htHh............................................................................................................................................................................. 0 221 468 626 +3763 PF00716 Peptidase_S21 Assemblin (Peptidase family S21) Bateman A anon Pfam-B_729 (release 2.1) Family \N 20.60 20.60 21.20 21.20 19.60 20.10 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.07 0.70 -5.10 11 192 2012-10-01 19:43:34 2003-04-07 12:59:11 12 2 89 47 0 212 0 280.20 30 49.47 CHANGED ELhLs.-sVpptLPsss........slPlNI-HpssssVGpVlulhssppGlFhlGllsssphhslLppsupsuhhuppss...shh..-thL.hLosaLPuLSLSS++....husspts...-sshFpHVALCulGRRhGTlAlYups.-hslstFscLSsup+-tlhp....sppssu.............hhtsshps.spsLLusAlcshal+-RhshLppc+phAGIt.scoYLpASssh..tsssp.pssspph........................shpsss...........shsssps...............spshssssPsu......................ssssusustsh......tDhlhlPtspatpLlsupttsts....ss.sssthhhPsssss.sshP .........pLhLs.-hVpthLssts.........slPlNlsHptss.VGtVhulhss..cG.FhlGhlsssphhpllppsuptshhsptss........................-.ll.hloshhPulSLSSh+................tstt.s......................stshFtHVuLCslGRRhGTlssYsts.-hslt.F.tpLo.sp+ttlht................stt.hs................ts.tsshps.shsLLusulsshhl+-Rhshlt.c+p.sGlt.tpoYlpASt......t.t.........tt...............................s.............................................................s.s..shhssh........................s..ssssssushs.s......t-hlalPtsta.pLls..upttst.s.....s..sts..h.P....................................................................................................................................... 0 0 0 0 +3764 PF00717 Peptidase_S24 Peptidase S24-like Bateman A, Finn RD anon Pfam-B_616 (release 2.1) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.42 0.72 -4.33 228 17337 2012-10-02 16:34:55 2003-04-07 12:59:11 18 67 5051 47 3901 12330 5963 72.80 24 32.79 CHANGED pVpGcSM....t..sslh.sGDhllVc...pts........................psppG-..lVlh..phssp.............shlKRlh..thsssthh..................hsslhlssp ..........................................................l.GsSM...............p.s.s......l.........h......s..........G.........D.h..l.l...V.c...........+ts..................................................................psppGD..lV..lh.........phssp.....................................shlKRlh......th....s...s...c...h...h.h....................t.l.h...................................................................................................... 0 1246 2442 3261 +3767 PF03572 Peptidase_S41 Peptidase_S41; Peptidase family S41 Griffiths-Jones SR, Finn RD anon MEROPS Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -11.05 0.71 -4.84 142 9366 2012-10-02 13:07:06 2003-04-07 12:59:11 13 89 5410 38 2078 8432 3390 171.70 27 34.34 CHANGED plGYl+lssF..t............tsspphpptlpcLpp......pshculllDLRsNs.......GGhlstulplsshals.......sssll.pp....sptsppp......................................................t.t.t....................hstPllVLlspsoASAoEIhA.uAlp-tpRu..hllGpp.TaGKGslQshhpL....ss............ssslp...lThu+ahsPsGps.lpth...GlpPDltl ..........................................................................................................plGYl+l.s.s.a..tt...........................................tsstt.lp.p..hh.pp.Lt...................sspul.l.lDL.RpNs.............G.G.h...l..s.t.s.s..t..l.s.s.hhhs........sssl..h....p..h......sph.hs.pt...........................................................................................................p.hs...sp.h.........................................................h.tsts.llVL.sstt.ou...Su....uEhlA..tsL..p......c......h.....p..RA......hl.l...G...cp...Th.....G.....t.....u..h.....s...p.p...h..h.pl.............ss................................s.h.h.lp.......ls.h.u.+..h...h..s...P...p.G..ts...hpth..........GlhPsl...................................................................................................................................................................... 0 802 1448 1814 +3768 PF03574 Peptidase_S48 Peptidase family S48 Griffiths-Jones SR anon MEROPS Family \N 21.50 21.50 22.20 31.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.86 0.71 -4.20 4 138 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 109 4 5 124 0 146.60 84 79.25 CHANGED YlEQGQNLRMTGHLHHIEPKRVKVIVEEVRQALTEGKLLKMLGSQEPRYLIQFPYVWLEpYPWpPGRsRIsGsSLTs-EKphIEsKLPusLPDApLINSFQFMELIEFLH+RSQEDLspE+RMsLSEALAEHIKRRLlYSGTVT+lDsP .YLEQGQNLRMTGHLHHLEPKRVKlIVEEVRQALTEGKLLKMLGSQEPRYLIQhPYVWhEKYPWpPGRSRlPGTSLToEEK+QIEpKLP.....sNLPDApLloSFEFLELIEFLHKRSQEDLPscHQMPLSEALAEHIKRRLLYSGTVTRIDSP. 0 0 4 5 +3769 PF03575 Peptidase_S51 Peptidase family S51 Griffiths-Jones SR anon MEROPS Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.95 0.71 -4.48 24 2034 2012-10-03 00:28:14 2003-04-07 12:59:11 12 9 1732 6 487 2189 1077 152.20 25 62.84 CHANGED Yhpphtpshpp.LG......ht.lssLchst........-hpstlppsDhlaVGGGNTFpLLptlpcpsLsphlpctl.ppG.hsYhGhSAGu......hl.....s.s.oIpsss...hs.h....p..sa...p.u......LsLlsatls.PHa...sstp.t.....E...shtpplppa...psh.slluls-GsAlhlpscp .........................................................hhtsht..lG......hp....lph.lc.hhs...............c..ht.tl...p...pA..-....hlhluG.G.N.oh.p....Ll.....pph......c.....c.....p.....s.....lhp......hlp...c.hl....p......pG...slhh..Gh.SAGA.......................l..................h..ss...s.l.t.s.s.s......p.h.s.s...................p.tsh.........s...u.....................................L.sL..h.s.....h.....t......ls..PHa.....sst..p.............p...shppclpph............t.s...p.......h..s.h.luls-s.sslhlptt.p.............................................................................................. 0 177 325 427 +3770 PF03576 Peptidase_S58 Peptidase_T4; Peptidase family S58 Griffiths-Jones SR anon MEROPS Family \N 22.20 22.20 22.70 23.00 21.10 22.10 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.01 0.70 -5.25 108 1255 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 969 47 448 1181 478 314.80 32 91.85 CHANGED hNuITDVsGVpVGHsTl.ttt......................sl+TGVTsIlP+.ts..ppsssAusaVhNGsGc.spGhshlcEhGhlps..PIsLTNThulGh.sssullphhhst..s..........hshshPlVsEs.DuhLNDItuttlp..........tpcshpAlsu.A..................tsss.......hspGuVGAGTGMhshshKGGlGoASRllst...........uaTVGALVtuNa.Gph.....tphhl.uGh.lG.pchushs.s.........................................................................tpGSlIsllATDAPLsspQhpRLApRAtsGlARsG..ustssuSGDlslAFSTusps.tstst................l.pt...ls.LapAAupusEcAIhsulhsAps.hsG...t..pscth.ul .........................................................................................h.ssITDVsGlpVGHtoh..p........................thtTGlTVlls.....................ppsssAulcVhsGsst.optsshlc.hshlpp...sllLosspuhGh.uusGlhchhhcp.....sh......t.hpssssssPlVst...u.hL.....Dlt.stssp..................................tphuhtA.hps.A.........................t.s.s..........hs..pGs.VGA.Gs.....Gh.......s.......s.........h.....s.......h................K.......G.......G.....l......G.o...A.....Sth.lss......................shsVGALVhsNh..Gsh....................tthhh...ssh.....su......t..h..tt.h......s..................................................................................................ttsstsosIsll.ATDAsLsptQhpRlAttApsGlARsh..ssas.hsGDhhhAhoTupths...t.............................................lsslhtAAA-shpcAllpulhtAps.h.u.........h............................................ 0 134 269 374 +3771 PF00082 Peptidase_S8 subtilase; Subtilase family Eddy SR, Sonnhammer ELL anon Overington Domain Subtilases are a family of serine proteases. They appear to have independently and convergently evolved an Asp/Ser/His catalytic triad, like that found in the trypsin serine proteases (see Pfam:PF00089). Structure is an alpha/beta fold containing a 7-stranded parallel beta sheet, order 2314567. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null --hand HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.35 0.70 -5.26 63 15113 2010-07-03 07:17:22 2003-04-07 12:59:11 17 580 3631 324 6420 14631 3376 299.50 19 44.44 CHANGED slullD.oGlp.....ssHs-lp......................................sph........t...ua.........................................................................................shssss.......................p.ttstss............................HGT+sAGhluuss...tss.s..hGVAhsuplh...............ul+l...lssst..ssss......................hhpulphss.................pph.clhshSh.Gs............................t.....psshhptsh.........p......stsspGtlhVhAuGN......ssssss.....................sssPuhh........psh..loVGuss......................................................................................................................................pp.................s.....ph....s.aSsh.........s............sss........clsusGs..sh..........ssh...sssp..................tshtthsGTSh.........AuPtsuGhhAllhp....t.Pph...............ospplpt.hllpoAp.hs...........................hsphh......GaGl..lshtpslp ................................................................................................................................................................................................................................................................................lsllD.oG....l...............p.p......p.ht............................................................................................................th..................................t...h....................................................................................................................................................................................................................................................................................................................................................................................s.h....ttt....................................................s.t.ts............................................................................HGT..ps.A.u......h.......l.....uu..........t.................t................................................................t........................h..................G........l........A............p.....u...p.lh..........................................sh.+h............hssts.....sptts.....................................................lht.u.lphsh.................................................................pps.sp..l..l..s...h...Sh...Gs....................................................................................................................t..t...h.....h....p..t...sh........................p.................ts.h..p..p..G.....h..h.h..l..s..A......A..GN..........................s.u....s.s.st..........................................................................................................................................hs...P..uth................sss.......lsV..uu...s..s...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pp...............................................s..............th...............utaSsh.............................u................................................................s.h.............................-..l...s....A.....P....Gs...sl......................hush.........ssst.....................................................................................................................................................ht.hhsG.TS..h...................................A.s.P..hl..u.....G.hs.A.Ll.hp.................t..s.ph..........................................s.st.t....l.....pt....h....lh..p..s.....u..t...............................................................................................u.G............t............................................................................................................................................................................................................................ 0 2244 4178 5504 +3772 PF00326 Peptidase_S9 Prolyl_oligopep; Prolyl oligopeptidase family Finn RD anon Prosite Family \N 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.29 0.70 -5.12 70 9052 2012-10-03 11:45:05 2003-04-07 12:59:11 16 114 3171 311 3615 20889 7745 202.90 21 30.52 CHANGED ssF......s..tphthh..............spGhlhshsshRGuuthGcpatcss.ptphtpsshsDhlsuscaL......hppt..assss+lulhGuSsGGhhsusshs.tpschF+sulutsshsDhlphhtts......hst.pa.caGss.p..stchYpthsshsshcshh..........thsshLlhpGhpDsRV.h.tcuh+hhstLp.........ppGss.hhhth.ssuGH...ustpsptph.cthtphhuFhhpthshp ..................................................................................................................................................................h..h......pp.G.hh.hhh..s....s.....h.....R....G.............u....s...........t.............h.......G...........p...t................a........h................p.......s...........h.......h.......t.........p.......h......t.......t........t......s.....h......p......D....h...h....s.....s...s...c..h..l.............................hp..p....s.....h....s...c...s....s.....+...l..ul.h...........G..t.S.h...G....G.....h...h.....s.........s............h..............s.............h.............s......t..............t..............s...........c..............h.................F................p...............u............s..........l............u......t..............s.........s...........l...........s.....D........h.....h.........t.........h....h....t.....t.................................hs.t.........p.....h.........p.........h.......s............s.........s......p..................s...p...h...a....p....t....h.......s........s..h...s.......h.c.p......................................hts.s..l......L...l...h..p..G..t...p..D....s...p.....V.......h....t.p...u....h...p..hh.s...t.Lp........................ptst..s...h..t.h.........h.........h.h.........s..t....s..H.............u.h....t....p...s....p...t...p....h.......p....h.......h.t.thh..t..ah.phh...h................................................................................................................................................................ 0 1280 2250 3034 +3773 PF03418 Peptidase_A25 Peptidase_U3; Peptidase_M63; Germination protease Bateman A anon MEROPS Family \N 19.70 19.70 19.80 19.70 19.40 18.80 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.11 0.70 -5.72 2 602 2012-10-01 20:58:29 2003-04-07 12:59:11 9 2 413 2 149 477 6 219.30 31 94.65 CHANGED AVEsK-hhhppQshPsp..EIKGhI.KE+-ctGlKIphV-lTcEGAEh.GKKcGRYlTLEu.GIREpDoEhQEthptVFAcEhshFlcsLNIscDASCLlVGLGNhsVTPDALGPhAV-NLLlTRHLFcLQPEsVQ-GaRPVSAhsPGVMGhTGIETSDIIhGVlcpspPDFlIAIDALAARulERVNsTIQISDoGIHPGSGVGNKRK-lSh-TLGlPVIAIGlPTVVDAVoIsSDTlDaILKHFGREMK-p.+PS+SLlPuGMTFGcKKhLTEDDLPspcQRQoaLGhlGTL.--EKRpLIHEVLuPLGHNLMVTPKEVD.FI-DMANVlAsGLNsALHccVsQENhGuYsH ...................................................................................Gh....p..p.....t.....t..t..hhl.s...lp.l.......p...s.t.p.....h.sK...GpYlTl-h..t....h.t..s.th.pph.t.hsp.ht.h..............................p.......ph...LllGLGNhplTsDuLGPhshpplhlTRHl.h.p......s..p.....h.....t..t.h.p...l.sultPGVhu.TGhEos-ll.ullpphpPchllslDALAuRphpRlspoIQlssoGIpPGuGlGNpR.tlspcslGlsVlulGlPTVlpAsslsp-shp..h.................................................................................................................................................................. 2 82 127 135 +3774 PF01136 Peptidase_U32 Peptidase family U32 Finn RD, Bateman A anon Prosite Family \N 21.30 21.30 21.30 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.49 0.70 -5.43 166 7851 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 3207 0 1379 4878 390 231.70 27 57.79 CHANGED lppltchlp..h.h.......................phtsDu...lIlsDhGllphhcct.splslHsSsphsltNhpslphhpch...GhpRlVLuREL...olcclpp...ltp..ps...........s...lElElFlHGul...........CluaSG+ChhSph.htsps...................sN.+GpCs.psCRh.sat....................................hhppt.pstp..................................ahhSspD.l..shlpplscLhcsG.lsulKIEGRh+s..tYls.plspsYRpslDsh.tt.............t.t..phhppLpph..hpR.......shssGahhspsst ........................................................h...h.chlc..l.h.......................p.hGsDA....lIhu...D.Gl......lt.....h....s.......p....c.......p......h......P......p...l........t......l..HhS..sQssss..Nhtslc.aa.pch...G..........h.p...RlVLuREL.....ohcpltp...............lpp....ps.......................s.h..-lElFla....Guh............Clu..aS..GRChlSsa..hst.+s...................sN...pGs..C..s..psCRh...pap....................................................................lh-pt...pst..c.t.....hsh....t.....p..Gp.........................................................ahhsspD....l....shlpplscL.hc.t.G..lcSlKIE..GR.h+s..tYls.plspsYRpAlDthhts..........................p..thhpp.ltph........hpR.......shssuah.tp...p............................................................................................................... 0 454 880 1149 +3775 PF03577 Peptidase_C69 Peptidase_U34; Peptidase family C69 Griffiths-Jones SR anon MEROPS Family \N 20.70 20.70 21.10 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.26 0.70 -5.60 13 1802 2012-10-03 21:14:07 2003-04-07 12:59:11 10 11 864 0 402 1558 66 354.50 29 76.08 CHANGED ACTTlLVGKpAShDGSThIARsED....tsushsPK+alVlps...ccQP+c.Y+Slhoshph...cLP-sPhpYTSsPsu....sspc.GIWuEAGlNpsNVAMSATETITsNpRlLGsDPhV..................psGIGEEDhlTllLPYlpSAREGVcRLGpllEcYGTY.EuNGlAFSDpcEIWaLETlG......GHHWlAtRlPDDsYsssPNQhsIDcFDhsDs....-sYhsSsDLc-FlcppHLs.shp......................tcFNhRcAFG.opscKDppYNsPRsWhhQ+aLs........P-hc..psPcspclPahp+...PpRKlolEDlKalLSsHYpsT.....saDPYGs..pGstpsccta.........R......PIGlNRoppsalLQlRsslPptluGVpWLuaGsssFsshVPFYssVscTPstap..cTsschoss..shYWts+hlAsLuDscYptassslcsa.cp ..........................................................................................................................uCTsllVGKpAohDGSshluRs-D.................t.h.sp.phhhh.s......t.p..t..t.t......h......h....s...................s....t..h.....ph..........l.P.....t.....p...s...h.....p....Y.ss....h.ssh...........................t.p...Gh..h....s..p....s..G..hNp....t....s..V..u....h....o....u....T.co..h.h.s.Npp.s.h.u.h..D....P.h.l.......................psGl.sE.ps......hhslsLPhhcoAREGVphlGpLl-c..Y...G..................s...........h.....E..............u.............N........u.............l.......h............huD.p.s.E.lW.a.h.Ehhu..............GH......p......WsAtR...l.PD...DsY.ssh.sNphtI...p.p.h........D...h...s....c.....................pshhhSsslhpasccpth...s..sh.p.......................................................t.F.shppsau.....s....t....s.......t...c.....th..Y...s...psRsWhh.......phhs...................Pphp..............p.s..p......s..t....phPhhhp.....s.p.+.plol.pDlt..hh.psH.apsT.........a..D.s.hs.................t......pt.a.....................................R................sIuh...pstpstlhQl..R..s...t...h...P.tt..hs.sl.WhuhG..sshsshlPaa....ss....h..p..p....h..s..ap...................t.s..s.t......p...h...s.p...............shaWh.phlsshs..pat.hh...pth...t.......................................................................................................................................... 0 123 214 291 +3776 PF03419 Peptidase_U4 Sporulation factor SpoIIGA Bateman A anon MEROPS Family \N 25.00 25.00 27.20 26.60 24.70 24.50 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.54 0.70 -5.34 34 409 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 402 0 90 356 5 271.70 27 96.09 CHANGED M...hlYlDllaL.NhlhshhLLhlTAthl+ppsphhRllhGuhlGuhhsllhhh.P......hhshhhphhhKllhShlhlhhuFuhcphth............hl+slhhFYhsoFlhuGuhhuhphhhptshhhpsthh.h........hsahhllluhsshahhh+thhchlpp+phptphlhcVplthsspphpl+uLlDTGNpLpDPlTptPVhlV-hstlcpl.hs.ph..........slpphtph....t....p+hRlIPY+ulGp.ppGhLhuhKPDplhI.pppchlpspcsllulspppLSspscYpullpPcll ............................lYhDlhhl.NhhhshhlLhhouhhh+t.phphhRllluAhlGuh.hslhhhh..P....................h.hs.h.h.h..p....hh...K.l..lhSllh.lhhuFshpsh+p....................ahpslhsFYhsoFhlGGshhuhphh..hps.st.h.h..shhh.............hs.hh..h.llhuhs..lh.a.hhh.c.thhc.l.c.p.p.p.hpts.lhclclpls.p..c.....p....lplpuLlDoGNpLhDPlTppPVhlhchss.lcph.hst.h...h........t.........ph...p.h..p............hhp+l..RlIPa+uVGt.ppthLhul+PDplpl.pps.p.p.hhhpcsllulssp.pLSspscYpsllpPphl......................................... 0 47 75 80 +3777 PF03411 Peptidase_M74 Peptidase_U6; Penicillin-insensitive murein endopeptidase Bateman A anon Bateman A Family \N 20.30 20.30 20.40 20.80 20.20 19.60 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.57 0.70 -5.14 3 916 2012-10-02 01:02:30 2003-04-07 12:59:11 8 8 890 8 146 530 97 237.60 59 82.86 CHANGED QSIGSYoNGCIlGAsALPscGEsYQVMRhsRNRYaGHPcMItaLERLSpcAussGhPTlLVGDIuMPuGGRFLTGHASHQsGLDADIWLp.MPKpRaTsApppcspALslVcRDup+VDs+lWsPs+soLIKLAAQDP-VTRIFVNPAIKpQLCpTAGsDRuWL+KVRPWaGHpuHFHVRLTCPADSsECEsQPhVPuGDGCGcELtSWF..EPPKPG..ToKPcKKssP....PLPhuCQAlLNuPsh ..........................QSIGSauNGCIlGAssLPl............pu-s...Y...QVMR..o..cpcR..YaGHPDLlh.F.I.QR.L..Sp......p.s.p..p.h..G.h.G.T.l.L.IGDMuMPuGGRF..suGHASHQoGLDVD.I.aLp......LP.K..p....R.a.o....s......A....Q..L...h.....c.P...p.A..lc.L......V.s......+......D......G....+....+V..l.s.shWpsphh.u......LIKLAApDp..-VT..RIFVNPAIKpQL...C.h...-.....A.......G.s.D.....R..........s.....WL....RK.........V........R.P..W..F....t.................H+uHMHVRL+CP.As.Sh.ECE-QshP.....Ps.....GDGC.G.A.E...L...pSWF...............-P....P.K..Pu......os.K..P....c.K.K.s.P..P................PLPPuCQALLcp.s.h............................................ 1 31 69 105 +3778 PF01343 Peptidase_S49 Peptidase_U7; Peptidase family S49 Bateman A anon Pfam-B_707 (release 2.1) Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.46 0.71 -4.44 20 6794 2012-10-02 13:07:06 2003-04-07 12:59:11 13 22 2890 24 1513 5321 2903 149.70 26 46.31 CHANGED pthcptKPVlshssshuASGuYalAosA-+IlusssullGSIGVhhphsshpshlcKlGlshpslpsGpaKsss..ohhcshos-t+phhQphl-psaphFlppVAcsRslsssplcplApG..clapGppAhcsGLVDElGsh--Alsphtp.hspls ...............................................t..tptsKPlh.ssh....s....s...hu....A....S....G....u....Y..hlA.s.s.A....s...c...I.h.s..s..P..s..u..l..l.G..S....I..GVh.s.t......h.....s....h.p.p............h.L.c.+.h.........G............l.chc.........hhp...s...G..p..aKssh...........shhpshos.c...s.+..p.thp.p.h.l..sp.ha.ptFlp.hV....u....p...sR..............t......h....s.........h........p.......p.......l........p........p........l...A...pG.........cha.......p......G.......p......p.......A......h......p...h......GLlD......pl...u.s.h.cc.s.l.tthtp.......................................................... 1 508 959 1257 +3779 PF03420 Peptidase_U9 Prohead core protein protease Bateman A anon MEROPS Family \N 22.00 22.00 22.10 39.20 20.00 21.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.40 0.70 -5.07 2 91 2012-10-01 19:43:34 2003-04-07 12:59:11 8 1 86 0 6 81 1485 196.80 38 87.65 CHANGED .E.QLLIEsWG.su.hhsts...PhlEu+.st-hGhc..LYIEGIFMQupVVNRNtRhYPK+lhEpAVpcYIpEQVhTpQALGELNHPsRuNVDPhpAAIhIp-hWWcGssVhG+AhVlpss+..Gphltu.IcuGWlPGVSSRGLGSlpps.-Ghp.Vp-.F+LTVGVDsVWGPSAPsAaVpP..lTES.......pTtEhspSsDstahtLAEshKphL ................................t...............................pppG.tK..ha.IEGIFhQu-lhNRNtRhYP.+plLp+sVsc.Y.pc..lp..sppALGELsHP...s.......ts.....s.........ls..psuhhIpcLhh..c..Gss...shG+A+l.l.-ssp..GchltuLlcu.Ghh.GVSSRGhGolp.cp..cGhslVp-sFhLssusDlVhsPSAPDAaVps..IhEu..p......................................................h..................................................... 0 3 5 6 +3780 PF03036 Perilipin perilipin; Perilipin family Griffiths-Jones SR anon Pfam-B_1154 (release 6.4) Family The perilipin family includes lipid droplet-associated protein (perilipin) and adipose differentiation-related protein (adipophilin). 28.20 28.20 28.60 28.50 27.70 28.10 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.30 0.70 -5.63 17 474 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 106 1 237 475 0 278.60 26 73.43 CHANGED husstps...QpsVVsRVssLPLVSSThshVpssYsuTK-sHPhl+SVC-hAE+GVpolsosAhsuApPllp+LEPQIusuNchAC+GLD+lEcpLPlLppPs-plhups+th...........l.tsVsuAK-oVsp.loussshshuuspsus-hT+shhstsh..VhuoRhsphsosuVDssLspSEclVDpaLP.o.-cELts.upp........................scu.-sssh....t.psuYaVRLGuLSs+lRcRshppols+l+pu+ppsQEtltQLppshsLlc.sppshp...pphhtt.tphh.hWhphppst.pst..............tp.pplEtcslslsRslT.............ppLQosshslsuSlpGLPsslp-pstplpptsttl.tshtshtshpc...ls.thLspu+tpltphptsLDplh-hllsNs ...................................................................................h..........p.pslpRlhplPlVpushphhppsY..ps...Kpp.s.h.lt.shphsEps...........l.......sh..A...h....t.s.Pl...l.p....p....lp.s....l...sh.ssphss+GLD+lEpplP..h..l..p..P....sp....p.........l......htp...h+th..................h..t.l...ps.......stssls..............hst...t..........th..........th............hh..t..s...h..h..phh.......t..sh-.s.....h.s...hs-.h..l..-.hl.P.s...pt-.t..st.p.....................................................................ps.p................phh.+lupLutphpp+shppshtplpth.p...t...pphh..l..sht.L.ht......................................................t................................p......h.tt............................tpl.thh..h....ht.hP...th....t.h.th..................h..........h......h..h.t.h................................................................................................................................................................................. 0 54 73 152 +3781 PF01497 Peripla_BP_2 Periplasmic binding protein Bashton M, Bateman A anon Pfam-B_461 (release 4.0) Domain This family includes bacterial periplasmic binding proteins. Several of which are involved in iron transport. 27.50 27.50 27.50 27.50 27.30 27.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.16 0.70 -11.13 0.70 -5.24 33 15783 2012-10-03 15:23:08 2003-04-07 12:59:11 13 38 3934 62 3407 11094 1155 233.80 16 72.71 CHANGED lsshshssspllhtL...Ghtsplsussstspt.htttt...............sh.........spssstpphsh...................EplhslcPDlllssphh..........sph.chhptthsslshsssp.....................pshhctlptluphhspp..ccAcphhpphppplspsppphssh....ptpshhlhhhttt....thhhhhussshhsplhct.huhpsh.hstt.......tpttt.luhEplhs..hssDhllhtsp.....................pppshctlhps....sthppl.sAl+ss+lhthssph ................................................................................................................................hsht.t.h.chhhtl...............s.h..t...s.........h....s....u.......h....s....t............s...p.........................................................ph.............................................p..l....u...s...h....t...p...s..sh.........................................................Et.l......h..s.......l......c........P..D.....L.l...l...s.sstt.......................tt.th.....p..h.....h..p.....t....h....h......P...s...l....h...h...s..h.s.p..................................psht.p.p...l.p....p....l.u....p.hh...spp............c.c..A..c...p.....h....l....p....p....h...c......p..p...l...s...p..h....p...p.phtst...............ttp.s.h.h...h..h...h..h.tss.............................t..h.h..s...h....s...s..p.....s.....h...h...s...p..l..l..pt....hG.....h...p.....s..s...hss...............................ttsh.h..p...l..o..h...E..p...l...hp........hs....P..D.h..l..h.lhst............................t.t..t.t...h...p...t....l....h...p.s...................s.h...h...p.ph..sAl.+ssclh.h....t..................................................................................... 0 1003 2187 2887 +3782 PF00532 Peripla_BP_1 periplasmic_binding_like; Periplasmic binding proteins and sugar binding domain of LacI family Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Domain This family includes the periplasmic binding proteins, and the LacI family transcriptional regulators. The periplasmic binding proteins are the primary receptors for chemotaxis and transport of many sugar based solutes. The LacI family of proteins consist of transcriptional regulators related to the lac repressor. In this case, generally the sugar binding domain binds a sugar which changes the DNA binding activity of the repressor domain (Pfam:PF00356). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.68 0.70 -5.20 6 5137 2012-10-02 13:57:41 2003-04-07 12:59:11 16 10 2047 27 638 25439 2217 255.60 21 79.54 CHANGED shslGhllsps..csPa....ahclstulscAuppaGhslhLlssspps-tt...pthc.LtsptsDGllIs.ohssc..scIpthtct.shPVItsscshs.s......sVPssh.D.shpAut.psspaLlptGHcp...l...slhstssSsh.sstcRspGahsAltssGh....h+phplhpssschpsutpAlpphhpps..Pshp..AllshNDpsAhGuhhsh.tpGh.lcs.psVs..h.ulhuasuL........spsshl..s.lss.psstp.lGhpsu-hlhp.l........tp-pscslhIs.....thhhtcs ................................................................................................................................................s..tlGll.l.P....sl........pss.....a.............a.s...p.....l...h.p.u....l...p....p.....s........s....p.....p.....t.......G..........a.....p.....l.........h.......l.........h........p..............s............s.....t.....p......t....c.tt.............pt.l....c....p....l.......h.......s.......p.......t...l...D....G...l....l......l.......s......s........t.......s...............p................s...........p..........l......h.....p........h.......h.....p........t........s......h..P....l.......l...h...h..s....c..t.......t..s.s......................................s..l..s.....h....l.....h..........D......s.........h....p.........u.........u......h......p....h....s.....p........t.......L.......l.......p.........p......G.....p....c..+.............l......................u.h...l.....s.....u.......s........t........s...........t............h......s............s..........t.............c.........R......h......t............G............a.........p.......p.......A.......l.......p......p.....t.......uh..................h....p....................h..........h...........h........t.........s........s..............p............s..............h...............p.........t.............u..........h.......p........h.......h.......p....p..........h......h......p.............pt..................P...p.........hp..........ulh..s..h...s..-...p.h..A.h.G...s...h......h.....t.t...G..........h..p......h..s..........t..t.......l..............................s..h......u...h....p.....s.....h.........................t.....h.............s........l.....s............p..........................G...........s.s...p...hhh.t...l..........................................................t........................................................................................................................................................................ 1 130 282 466 +3783 PF00141 peroxidase Peroxidase Bateman A, Sonnhammer ELL, Studholme DJ anon Prosite; PfamB-105, Release 14.0; Family \N 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.92 0.70 -4.87 162 7995 2009-01-15 18:05:59 2003-04-07 12:59:11 18 39 2016 398 2881 7388 3647 260.20 25 77.65 CHANGED Vcphlpphhpp........psshus...sl.........lRLhFHD.........................................Chs...............t.GCDuSl.ll.......htsEcs.sss.Nt...uLc..uhc..................ll.....-slKspl-p..tC.st..h...VSCADll.................sLAucsulth...................s............GGP..hh.sl.hGRcDupsupt.tts...tl...........................................................P.................ss.t.shsp.lhptF.sphG.L.ssp-hVsL.u......................................GuHT..lGts+.Ct.h...........+h..................ths.thh...t.......C.......s....................h.h.D....os.................spFDNsYapsLhs....................................ptshhpoDtsLh....sss......pspshVppaAss .................................................................................................................h....htthht...............tphus....hh..........lRhh.aHD................................................shs.........................................uGss..uuh.hh..............hss.pps.hss.Nt....sLc...uhc..................ll.....psl...K.pphsp.......t.................lShADl.l.........................sLAussulpt..........................................................s................Ghs...sh..shs.sG.RtDuhpspt..sth...t.l...................................................................P...................ss..hssspp...l...h.s.p...F...tp.hu.....h.s.s..p.Eh.V.AL..u................................................................Gu.H.o....lGt.sa.ss...........p............................t..th......t......................tspt..............hs.h..c.......sP...................................................................stasNs.aFtsLhs..........................................................................................................................ph..shh..poDhsLh..p..ss......th.c..tlschasp................................................................................................................................................ 0 552 1714 2399 +3784 PF01328 Peroxidase_2 Peroxidase, family 2 Finn RD, Bateman A anon Sarah Teichmann Family The peroxidases in this family do not have similarity to other peroxidases. 21.00 21.00 21.10 23.70 20.10 20.40 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.05 0.70 -5.76 2 357 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 84 13 308 383 0 183.60 23 63.07 CHANGED MFupll.hsuslssh.....tcs........psshhsassPGPsDsRuPCPAhNuLANHGalPHDG+uIoh.sl.sAh.sthsluss.hthAlssAhlls...stushhso.hNLs.LsE.ph.hEHDtShSRtDYhpussp.....DshsFstphFpp.Lshhsu.p.hshsshsth+htR.phppEhD..h.aotp+.l.sh.EouhhhuhluD...Psps....scl-Wh+haFppEphPY+.GW+.sSstppl..lsuh.utllhAs.....ssLPpG.ltssApsVsluFuu.hsPhhhshN........plhs ................................................sth................................................................a..ss..t..s......s...D........RuPCPuL.NsLANHG.........al.P..+s..Gt.s.l...o...ht...pl...h..p.....u........h..tp..s......hs.h....u..........s........................h......s....h...h.h...........h...h...............h.....h......................................................................................................t............h.........h.......s....L.......s....t........L..........s.t.......H.......sh..hEtDsSlsRtDhh........................................hht..............ht.............t....................s..............h...s.....h......p........t....................................................................................................................................................................................................................hhhhh..................................................................................................................................................................... 1 104 191 267 +3785 PF04088 Peroxin-13_N Peroxin 13, N-terminal region Wood V, Finn RD anon Pfam-B_8055 (release 7.3); Family Both termini of the Peroxin-13 are oriented to the cytosol. Peroxin-13 is required for peroxisomal association of peroxin-14 [1]. 20.90 20.90 21.10 22.70 20.70 20.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.01 0.71 -4.27 29 243 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 221 0 175 247 0 154.00 37 39.23 CHANGED GYG.........Gh.G..ts.s..shht..hspSTpATFQlIESllGAhGGFAQMLESTYMATHsSFFsMloVAEQFupLKssLGShLGIFAlh+al+plhtKlssthh...............tspuhssspF.ppFpspt....ps.ptt..........ts+.ShKPllhFluAlhGhPYLlsKllp .........................hhsshYs.......uhh..s...s..s.ss.ss..phs.pp.hppSopusFQhIESIVsAFuuhApMLESTahAsaSSFh.........AhluVA-pFupL+spLuslhuhFsllRhl+plht+lpthhh..............hpssu.hs..stF..spapss.....t....s..t...........sss+sSphPlhhFlss.lhGhPYLhhKll.t...................................... 0 50 87 141 +3786 PF04882 Peroxin-3 Peroxin-3 Mifsud W anon Pfam-B_6513 (release 7.6) Family Peroxin-3 is a peroxisomal protein. It is thought to be involve in membrane vesicle assembly prior to the translocation of matrix proteins [1]. 26.70 26.70 28.40 27.20 26.60 26.60 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.32 0.70 -5.77 32 408 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 258 4 274 399 2 283.60 23 86.51 CHANGED hsuhtsahpR+++KlhlsuullGusYl...lspashpK.lp-hpp+hsp-hhs+E..................pl+p+FcQsQp-CshTlLuL.LPslspslhc.tLssEplsppLppp+.........................................................................................p.............t....................................................................scsKhpLWs-LKIpolTRhlTllYolohLhlhhRlQLNlLGR+pYL-...........o...........................................................shthutsppttppsths................................................hpspppYLSh..SWWLLs+.GahplhshlcpsVpcsFssls.+ppLolschppllhplpptlpt........s........sspp..........hlshLlP...t....hl.po...................s.hss.s.....pssspLpcLlsETpcll..-SsshspVlpthlspuFsplh-plt.tphs.pt.s.s...........................tthsshsphp..............hpLAplLshlscQsptlssss.......................................sNpalpslpp.lccLcsFuAsVYos.F ........................................................................................................h.pppt.hhh.........shhssh...hh......h...h.h.t+......h.....p......p.....p.........tphh.pt..................ph..p.+..Ftp.pp.s.................s..h.h..h.h..ht..l.t.....hs.p.lh..lp............................................................................................................................................................................................................................................................................................................................................................................tp+hplWpplcl.shs+.hs.hashshL.lhh+lQlslluth.Yh.p..................................................................................................................................................................................................................t............................................................................pptaL.s.....ahhtp..Gh.th.p.hpt.l...t...ht.....hp.............p..p.....hsh.phtphh.tl...h..............................................h..hhhs......................................................................................h.thhtEh.thl..ps..h..lht...p..h.hhhpth..........................................................................................................h.hs.hh..ht........hhtt.........................................s.hl..t......h......ht.h...lht.................................................................................................................... 0 85 138 210 +3787 PF03212 Pertactin Pertactin Mifsud W anon Pfam-B_2005 (release 6.5) Family \N 21.30 21.30 21.40 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.50 0.71 -4.39 8 2661 2012-10-02 14:50:22 2003-04-07 12:59:11 9 35 708 7 134 1809 3 114.80 27 13.74 CHANGED VssLpLss.GsVsF.......usPAsssGpFpTL.slpoLSGoGsFhMNssls......supuDhLsVsssAoGpa+lhV+NoGsEPsSuss.LsLVcTs.GGsAuFTLuNtGGtVDlGTacYsLsss.....tNssWsLps ........................................................tl.hst.upl.h.......................p....s...t................a...sL..ss..s..p..L.s.G...s..G..s.hh.h..po..s..ls......ss..t..s..DpLsV...p.G...s...s.oG.s.hplt.V.ss...s...G...s.p.......s.s.......s.......s...s....t......lpllp.....ss.....s.....u.....s.....u........s......Fs......h......s......s.......................t....h....V...s......hGsYpYpLhps............s...tsW.Lh.s.................................. 0 32 55 98 +3788 PF02917 Pertussis_S1 Pertussis toxin, subunit 1 Griffiths-Jones SR anon Structural domain Domain \N 20.90 20.90 21.30 21.30 20.60 20.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.55 0.70 -4.79 2 121 2012-10-01 23:25:29 2003-04-07 12:59:11 9 5 104 6 16 77 1 179.80 46 59.63 CHANGED sPsthVYRhD.RsPE-lFppGFoshGsscNhh-Hlh....GRSh.luoSposhsuhp..tpah.Ehh.EH.hpthl.t.RAsp...HFhshhhpscs..shhtttpshF-.sDp.hsphGhhhhts.hsYQpEahsct.IsstNlR..othhhsulssEsspscassuR.VsppTRhN.Pp.ass+hp.hph.us...hhPs.Ghshshphp.spAhushs.ptGpuhsLs.ats.saS ...s..sDFVYRVDSpP.P.-lIFR...D.G....FohhG..h..NR...N..hQQaIS...........GR..SC...u...uGS..S...D.SpaIATT.So.....h......s.o.Ysh....t+uh.a.uR.ush.pG..plYRYQIRADNNFYShhsSl..s..YL-o.pGu..phs..th..p+sh...hph..Q...pEYlushsIhPENIpcAssl.laDu..sTG..s.p.s.s.p.h..NupYlshsTpSN..P...........................................................slh.hl...pthppphhshhh..hsth..ht....................................................................... 1 3 10 13 +3789 PF02918 Pertussis_S2S3 Pertussis toxin, subunit 2 and 3, C-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.90 21.90 23.50 27.60 21.60 21.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.42 0.72 -4.32 3 72 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 64 12 5 26 0 109.00 71 70.55 CHANGED ThR+TGQPATD.HYYSNVTATRLLuSTNSRLCAVFVRDGQPVIGACsSPY-GR.YR-MYusLRRhLYhIYhuGLuVRVHVSKEEQYYDYEDATFpTYALTGISlCNPGuSl .....TGDKT.....NAYYSD.EVISELHVGQIDTSPYFC.......IKTVKANGSGTP..VV.ACAVSKQSI.WAPSFKELLDQARYFYSTGQSVRIHVQKNIWTYPLFVNTFSANALVGLSSCSATQC.F...................................... 0 3 3 4 +3790 PF02529 PetG Cytochrome B6-F complex subunit 5 Bsahton M, Bateman A anon Pfam-B_1348 (release 5.4) Family This family consists of cytochrome B6-F complex subunit 5 (PetG). The cytochrome bf complex found in green plants, eukaryotic algae and cyanobacteria, connects photosystem I to photosystem II in the electron transport chain, functioning as a plastoquinol:plastocyanin/cytochrome c6 oxidoreductase [1]. PetG or subunit 5 is associated with the bf complex and the absence of PetG affects either the assembly or stability of the cytochrome bf complex in Chlamydomonas reinhardtii [1]. 20.80 20.80 20.80 20.80 20.50 20.20 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.63 0.72 -4.43 17 647 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 629 9 42 207 66 35.10 81 95.05 CHANGED MlEPLLsGIVLGLlPlTlsGLFVsAYhQY+RGs.phsh .....MIEshL.GIVLGLIPITLAGLFVTAYLQYRRGDQLDl..... 0 13 32 40 +3791 PF05115 PetL Cytochrome B6-F complex subunit VI (PetL) Moxon SJ anon Pfam-B_6510 (release 7.7) Family This family consists of several Cytochrome B6-F complex subunit VI (PetL) proteins found in several plant species. PetL is one of the small subunits which make up The cytochrome b(6)f complex. PetL is strictly required neither for the accumulation nor for the function of cytochrome b6f; in its absence, however, the complex becomes unstable in vivo in aging cells and labile in vitro. It has been suggested that the N-terminus of the protein is likely to lie in the thylakoid lumen [1]. 21.70 21.70 22.80 22.70 20.80 20.70 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.14 0.72 -4.04 28 723 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 681 2 17 306 0 30.30 69 83.62 CHANGED MhTllSYhGhLhuuLshTlsLFlGLsK.IcLI ........M.TITSYFGFLLAALTITssL.FIGLsK.I+LI....... 0 8 12 14 +3792 PF03742 PetN PetN Finn RD anon Pfam-B_3260 (release 7.0) Family PetN is a small hydrophobic protein, crucial for cytochrome b6-f complex assembly and/or stability. 20.30 20.30 20.50 20.50 20.20 20.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.25 0.72 -4.38 22 1420 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 1364 8 44 221 4 25.50 91 97.19 CHANGED MDIloluWuuLhshFTFSlALVVWGRNGh .............MDIVuluWAALMVVFTFSLSLVVWGRSGL. 0 13 31 40 +3793 PF04614 Pex19 Pex19 protein family Wood V, Bateman A anon Wood V Family \N 22.20 22.20 22.30 24.10 21.90 22.10 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.82 0.70 -4.76 27 341 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 273 5 218 335 3 236.60 24 75.49 CHANGED ts-...........................sppshhpphppphppLhs.................................tttssptppphpphhpphst................ppsssstpsppss..............sFpsslpcThp+Lpcuucplssshtt.............s--hLsplLpshshss..........sss-tshtthl.sMMpQLsSKEVLYpPlKELpsKaPtWLcppps..plspE.chp+YccQhplsscIlppFEp..............tsYsDpp...cpct-tlhcLhpphQ-hGpPP..............sELluphsss........................tstsh.sshstt..-sCppp .......................................................................ttt.t...........p.pt.h.tpLht..............................................t..thttphtphh.pthtt..................................................................t.th.tt....p.p..t.........t..t.......t............................pFppslpcThpt....lp.c.s..up...p.......hpssh.....................--...l....s.p..hhcths.s.t............................tssct....sh.shht....sh.MppLhSK-l..LYp.PhK....El......s-K.......aPpWLpppcs...........plstE.-hcRYpcQhplhpcIsptaEp........................ps.sDpp........tchchlh-...lMpp......hQphGpPP...............p-L.su-hsss..h............................s.......tC.................................................. 0 68 120 177 +3794 PF04757 Pex2_Pex12 Pex2 / Pex12 amino terminal region Bateman A, Wood V anon Bateman A Family This region is found at the N terminal of a number of known and predicted peroxins including Pex2, Pex10 and Pex12. This conserved region is usually associated with a C terminal ring finger (Pfam:PF00097) domain. 22.20 22.20 22.90 22.40 21.70 22.10 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.47 0.70 -4.94 113 868 2009-01-15 18:05:59 2003-04-07 12:59:11 9 26 288 0 602 841 8 218.00 17 58.58 CHANGED -pplpshLpsplppllphht..............hhthhhp......atcEl.phlhphllhthshhp........tssohGEpahsLthhs..st.t......................................................................t.....phhsttp+hhhlhh..hlhhPYlhpKlpphhpptttpt..t..................................................................ptthhp.hhshlpsh..hphhphhphhhFlhsus......ahols.pRlhul+hshh............ttph.t...............................ssaphhsthlhhphhhphlhhhh......shh..h.hh.............s ..........................................................................ppl.shLtsplpphhphht.....................h.t..hhhp................atsEl.thhhphllhthslhp....................tssohG-phhsLphhs..t................................................................................................................................t..th.sttp+h.hhhhh.h..lhhPYl....hp+lpphhtpp...tt.tt....................................................................................................................................hphh.....hp..hhs.hlp.sh......hphht....hhphhhFlhsup............................aholh..cRlhGl+hshh..................ttphppt...............................................ssachhst..l...h.hph..h..h..phhh.hhh........hh..................t............................................................... 1 173 313 489 +3795 PF03011 PFEMP PFEMP DBL domain Griffiths-Jones SR, Bateman A anon Pfam-B_822 (release 6.4) Domain PfEMP1 (Plasmodium falciparum erythrocyte membrane protein) has been identified as the rosetting ligand of the malaria parasite P. falciparum [1,2]. Rosetting is the adhesion of infected erythrocytes with uninfected erythrocytes in the vasculature of the infected organ, and is associated with severe malaria. PfEMP1 interacts with Complement Receptor One on uninfected erythrocytes to form rosettes [2]. The extreme variation within these proteins and the grouping of var genes implies that var gene recombination preferentially occurs within var gene groups. These groups reflect a functional diversification that has evolved to cope with the varying conditions of transmission and host immune response met by the parasite [3]. A recombination hotspot was uncovered between Duffy-binding-like (DBL) subdomains [4]. Solution of the crystal structure of the N-terminal and first DBL region of PfEMP1 from the VarO variant of the PfEMP1 protein is found to be directly implicated in rosetting as the heparin-binding site [5]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.34 0.71 -3.86 41 832 2012-10-03 17:31:28 2003-04-07 12:59:11 10 55 9 4 23 881 0 120.50 28 18.86 CHANGED hFpcWVpphLcD..+h+.cKlspClpssct..p.spst..CpppCpChc+.WlppKccE.WppIKc+FpcQtchtp........................ttt.h.lpthLpph.h..shpcshsstcclp+lcchlpppttsstt...pst..s.......sppcshIDpLLp+.pccAcpCppppssps .....................................alpph..p-h.+h+..cK..h.....c.spIN.ssp..ps..Ccss...........CpptCcsYcp.WIocK.K.p.EWDtlps+apshpsucp..................................................................................................................................................................................t................................................................ 1 22 22 23 +3796 PF00365 PFK Phosphofructokinase Finn RD anon Prosite Domain \N 20.30 20.30 20.30 20.30 19.90 20.20 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.75 0.70 -5.45 12 6795 2012-10-02 15:20:27 2003-04-07 12:59:11 15 15 3846 82 2001 4903 1585 268.50 35 74.02 CHANGED K+IuVLTSGGDA.GMNAAlRAVVRpAIhpGh-VasIh-GYtGLlpG...pItplshtsVushlphGGThlGSARh.EF+pcEGRhtuhppLhcpGI-uLVVIGGDGShpGAphhppEau......................hsslGlsGoIDNDhsGTDhTIGhDoALpplh-AIDtIcsTApSHpRsFVlEVMGRaCG.lALhuGlAsGADhIhIPEts....h.p-plspplpcspp+GK+psIllVAEGshs....s..sphhcplhhpt.sh-TRlTVLGHlQRGGoPoAaDRlLAS+hGscAVchLLp .....................................................................+IulLTSGGDuP.G.hNA.u.lR..u.l...V.......+...p....u..l....t............p........G...h...c...V..h..G...l..hc.G...at..G...L...l......ps....................ch....h...p......l......s......h............p......s...........V......s......s......h.....l.............p.......p..............G..G.T....h.L.G.o.u..........R..........h.............................t...........h...........p............p..............t............p.............s............p............t...........p............s...........h.........c.......p...........l.......c.c.............t........u......I-.uLllIGG...D.GS.h.pu......A...t...t.....L.....s...........chs................................................................................................l.s.slG..lPt..TIDN..Dl..s...u..T.....D........h........Tl.....Ga.....-T.....Alp....p....h....h.....-..u.l....D....c....l....+...s...T...u...s.............S....H....p....R...h.h.ll.E.................VMGRpsGalA...Lh.u..u....l.....A.......s.......G......u........-.......h.....l.llP.Ehs...........................hs.h.c....p...l...h...p...p.....l....p.......p.......t....h.......t.......c.......G.......+....p.......t...s........l....l.l.l.u.EGshs........................................................t...............p................h..................h..............t..................t.................l.................................t.................t....h...................................t..............h-.sRssl............L.......GH.hQRGGs..Pss..hDRhh..u......ophGh.Ahphl..t................................................................................................... 0 732 1287 1714 +3797 PF02901 PFL Pyruvate formate lyase Griffiths-Jones SR anon Structural domain Family \N 24.30 24.30 24.60 24.50 24.00 24.00 hmmbuild -o /dev/null HMM SEED 648 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.84 0.70 -6.10 129 4920 2012-10-01 23:28:04 2003-04-07 12:59:11 10 9 2399 26 529 2860 102 572.20 38 79.76 CHANGED hh.....p............clsh....lppsh..h.ucss.l.s.........hc+shhlpctt+c.s......tGh.shs..scpspu........Ihspt.........shhI.......c-tElIVGhp.TstPhttshh....Pphuh+h.......lcst.hsh................................sapl.sppscchhpc.............htKotsctlhshastEh.......ptshcsslhT........uh.pshGtG+lhsDYc+lh.hhGlctlh.......p-tppchpphs.............................hht...hs....cchhchtcElu-p......pthpELtphAphhshs.ucP.ApshpEAlQhhaFs.aLushpEpNG.suhShGRhspaL.sYhc+Dlc.pGh...lT-cp.....AQELl-phhlKlp.lphhRssthsp.................hFuG.ss.ahshslGGhs....h-G.cshlschSahhLcsht..pl.tsspPslolhhs.pphP....csFhchssclshp..su..papND-lhh.................................hh.pD..DYu.lssCVps..h.............thG.....Kphpah...uAhhNl..uKsL.hslNsGhDph...ss......tplu.....P..ph.ssl..p..ph.hsa--lhpsacc.hcalschhlpuhNlIchhHcchs.cshh...uLh...Dsslp+shshsh....................sGlussuDSLuAIKh..........sKhhslc....c.pcu.................LsssFcsp.....................................................................................h....hPKYGNDD.....DclDplAtclsptahp.clcphps.h..R......suhhshulLolouNV.saG ...............................................................................................................p....clsh.....lppsh..a.scts.l.s..........h-+s.ht.hhc.tpp..p..........pu...shs.....sp.....hsps.....IhsptshaI....+c.....EhIVGhQ..sstP.hctshh.............P..h...u.lph.......lcpp..hs.....h.....................................................................shpl..s.chccl.ap-.............hp+Thpp...t.las..haos-h..........................................htspco..s.llo........uhss..sh..G...cG.+I...IsDYpR.lh..haGlchLh.......cctttphsplp................................................tthh....cchhcLtEElA...p...p..............hp...t....ht...p...........lhphAthhuhshppP.ApshpEAlQhhaFs.aLsshhppNG..uuhShGRhspaL..Yh.cRDlc...sGh....loEpp......ApEhl-phhhKlph.Vphl..R.ospasp..............................hF.u.G.ss.ahT.olGGhs............hD..G.......R....shVsc.SathLcolt..sh..uPpPNLTlhas.pp.LP..............tsFhchssclshp..pu..QacN.D-lhhs................................................pD..DY.ul.ssCVps...h...............hsG........Kphpaa......uAtsNl..AKslLhulN.GGhD-t........t.............h.p.h.u....................P...ph..tsl..p....s-h....Lsa-cVhpph-p.hhcalsphhl.puh.NlIchMH-+a.sa-shh...ALh...Dcsl.......tRshu.h.sh.......................s.G.lussuDSLuAIKh..............spsh.s.....lc.-.psu.................Ls.sDF-hp.........................................................................................tc...aP+YGN.s.D........-cVDsluscllcpahp.clcp..h.ps.h..R.......su.h.T.uhlTIouNVsaG............................................ 0 178 334 439 +3798 PF01471 PG_binding_1 Putative peptidoglycan binding domain Bateman A anon Pfam-B_2277 (release 4.0) Domain This domain is composed of three alpha helices [1]. This domain is found at the N or C terminus of a variety of enzymes involved in bacterial cell wall degradation [2]. This domain may have a general peptidoglycan binding function. This family is found N-terminal to the catalytic domain of matrixins [3]. The domain is found to bind peptidoglycan experimentally [4]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.50 0.72 -3.99 237 10090 2012-10-01 23:43:47 2003-04-07 12:59:11 13 329 3183 14 3243 8659 2140 56.00 25 14.51 CHANGED psssVptlQphLp.ph.Gah...........t.........shs.....uhaustTppAlcpFQ.pthGLs.ss..GhssspThptL ..............................................................t...tlttlQ.phL..p....th..Ga.............................s...............ths............uh.a..ss....p..o.p..p.....A....l...+.pF.......Q..p.......t......t......G..........L.....s...s.....s.....Gh.h..stpThphh...................... 1 982 1922 2518 +3799 PF00300 His_Phos_1 PGAM; Histidine phosphatase superfamily (branch 1) Finn RD, Griffiths-Jones SR, Rigden DJ anon Prosite Domain The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue. Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches. The larger branch 1 contains a wide variety of catalytic functions, the best known being fructose 2,6-bisphosphatase (found in a bifunctional protein with 2-phosphofructokinase) and cofactor-dependent phosphoglycerate mutase. The latter is an unusual example of a mutase activity in the superfamily: the vast majority of members appear to be phosphatases. The bacterial regulatory protein phosphatase SixA is also in branch 1 and has a minimal, and possible ancestral-like structure, lacking the large domain insertions that contribute to binding of small molecules in branch 1 members. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.35 0.71 -4.14 267 22415 2012-10-02 11:42:54 2003-04-07 12:59:11 17 114 4654 228 6555 16302 6276 153.40 22 62.77 CHANGED plhllRHGp...........op...............................s........t....................hpGpt-.............LoppGtppApthu.pt......Lp................................................shphst.......lhsSshpRstpT....Aph.lsp..............................tht........................................................h.h....tL...pEhsh.s.............Gh.htphttth..................thtthhp......t...h..................................................shtphhp.Rstphlpplh............................ttspsl....llVuHusslpsl ........................................................................................lhllRHGc.........................op................................hN..................htt.h..............................................................hp.G.ts-..................s.s.....................Lo.p.p.G..t.pp.A..p..t.hu..p.h.......Lp..........................................................................................................................................................................s.h.p.hc.t..........lh..o...S..s......h.......p..........R.uhpT.........uph...ltp...........................................ths..........................................................................................................................hs.h...h.p.....pL...........pE.h.ph....G................h....................p....uh..p....h....t...p..h..t.t..th..................................thhp.................t.h.....s...................................................................................tuE.............s.h...t.p..hht...R.h.......h.h.pphh......................................................ttppl.....llsuHusslp............................................................................................................................................................................................... 0 2047 4021 5493 +3800 PF00342 PGI Phosphoglucose isomerase Bateman A, Finn RD anon Prosite Domain Phosphoglucose isomerase catalyses the interconversion of glucose-6-phosphate and fructose-6-phosphate. 20.50 20.50 20.50 20.50 19.40 20.40 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.73 0.70 -6.07 10 7325 2012-10-02 15:05:26 2003-04-07 12:59:11 14 16 5754 105 1399 5509 3130 366.00 35 89.39 CHANGED DYSKsplss-hhptLlpLAcEttlcthp-tMFsGE+INsTEsRuVLHlALRsRospslhsDGpDVhP-VstVLs+MKsFs-+lRoGsWKGtTGKslscVVsIGIGGSsLGPlhVpEAL+shsps............plaFVSNVDGTalAEsLKpLssEsTLhlVASKTFTTsEThhNAcoAR-Wlhpthst.....cuuVAKHhlALSTNspcVcKFGIDsp..NhFsFWDWVGGRYSVWSAIG.LPlALulGa-NF-chLpGAcshDcHFsooPhEcNlPlLLALlulWhsNFhGspT+AlLPYDQhLa+husYlQQLsMESNGKhVopcG.shlsapTGsIsFGEsGTNGQHuFYQLIHQGT+lIPCDFIusVpopp......chssHHcpLhSNFFAQs-ALhhGKosEEV+pEh.tu........psLlPHKsFpGsRPosSILlscLoPasLGALlAhYEH+lhVQGhlWGINSFDQWGVELGKsLApsIhscLcsuthhs.....uaDuSTsuLI ....................................................................................................................................................................................................................................................ut...s......h...h....s.........l.p..................s...hth...tt....t.tl.hputhp...h..h.s..s.p.....ps..llsIGIGGS.L.....Gshhshc.........h......L.p......htt..........................................h.ah.ssN.lsss...t..........lt.c.hl...ph...l....................s............s.........c............s............s........h....h.ll..hSK.........ohTT.E..............shhs.hp.h.+phhh.p.t...h.t.......................ttt.....h.t+.+hhAh...o.s....p..t.....p...t.....s..h....c....h....t..h.stt........ph..F...h....D....VGG.Ra.SlhoAlG.Ls.....l.s.ls.h......ts..h..p.................phLp...G.A..p.t..h..-p...c.a......t...s..s...s..h.....c.....p.....N..................h.s..h.h.h.u...h..l.t..h...h.h...sh...Gh..tschl..l.s.Yp.t............hlphastahpQh..hESpG..K....sh.p.G.........................h..s....ush.a....oss.HuhhQhl.pp...........s..phh....h-h.lh...hpp.p............................h.p..p...h..s..h..sthph.Lh...GK....s....c..h.pt.ph.....t.........................l..h..s.a....p.s......h...G.h.Ps..lhh..p.l.sshslG..LlhhaEhthhh.pGhlh........sls..sF...........c....Q.G........VEhhK...h.tlh....................................................................................................................................... 0 472 905 1183 +3801 PF00162 PGK Phosphoglycerate kinase Sonnhammer ELL anon Prosite Domain \N 19.90 19.90 21.80 21.70 19.50 18.70 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.33 0.70 -5.91 154 5701 2009-01-15 18:05:59 2003-04-07 12:59:11 14 18 5059 60 1467 4176 4138 364.90 46 93.99 CHANGED olcDl.........slpGK+VLlRlDhNVPl...c..s.G.......pIoD-sRIcuulP.TIchll-p.GA+.VllhSHLGRPc........................thps.........................chSLpPV...................................AptLuclL.Gpp...VpassD..s....lGspscptltth..psG-llLLENlRFpstEp.......................cscsphscpLAsLu...DlaVNDAFGoAHRAHASshGl..sphl...susAGhLhc+ElchL.sculpsP.pRPhlAllGGuKVSsKlpll....csLlp+..lDplllGGGMA.TFLtA.pG.hslGpSL...hEp..........-hlch...................A+cllpcA.cptthclhLPsDhllu.....cc.Fst...su.ps.psssssp..ls.....ss.hhsLDlGPcTlchasch.lppA+TllWNGPhGVFE.hssFupGTtslscAlAcs....su.hollGGGDosAAlpp.hGhs-.cho..HlSTGGGAsLEhLEGK ..........................................lpDl....Dl.p.GK+....VllRsDhNVPl......cs.G.......pIT..s.Ds...RIpAuLP.TIchhlc.p.G.u+.VllhSHLGRPct................................................................................................p.pt...................................................................chSLtPV...........................................................ApcLuch.L......u.....pp.......V.t...h.....s....s.........D......s..........lG.................p.lct.hht..t..l..psG..-l..lLLENlRFppt....Ep.................................Ks-.p.huKphAu..Lu.......D...l........aVN.DAFGTAHRAHASshGl..uphh.....sus.A..G.h.LhppElchL........sc.AlpsP...pRPh.lAIlGGuKVSsKlsVlcsLlcK.....sDplllGGGMAaTFltA..p.G...h.p...lG..p...SL...hEc......................Dhl..-h.....................................................A+c.llp.cA.pt..............plh.LP..lDsl...lA..................sc.......F........u....s.......sA.......p.....s....phs.....s...sss......ls......................ss....huLDI.G.Pc..ohc...h..asch.lp........s......A.KT........llWNGPh..GVFE.hssFup.GTpslucAlAcs.........suhoIl.G..GGDosAAlp.......p.......h.......G.......huD.chS.......aISTGGGA.L.EhlEGK.................................................... 1 516 944 1234 +3802 PF00408 PGM_PMM_IV PGM_PMM; Phosphoglucomutase/phosphomannomutase, C-terminal domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.44 0.72 -3.97 112 12833 2009-01-15 18:05:59 2003-04-07 12:59:11 15 36 4927 56 3279 9608 4242 81.50 20 16.51 CHANGED .lNhplsc..t..........tt...htphtphhps........................htps.phhttcGhhl...............lRsSGTEPl...lRlhhEup.sppthpphtppltph...........lc .........................................................................................................................................t...th.t......................................htsshc.h.h.h.s..c...G...t.......hl................................lRsSG....TEPh......lR..lh...sE..ut...sp.pt.hpphhpphh....h............................... 0 1056 2051 2778 +3803 PF02878 PGM_PMM_I Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain I Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.69 89 14497 2009-01-15 18:05:59 2003-04-07 12:59:11 11 70 4976 61 3802 11145 5249 133.00 29 27.44 CHANGED thhFGTsGlRGhss.tp......hssphshplupuhuphlppp.......ttsplllGtDsRhsutthtpshhpslsusGlcshhhG......hhPTPsluatscp..hp......ssuGlhlTASHNPsphNG....lKhhhss.Gttlsssh.pppIpphhpphptht .......................................................t..hFGTsGlRGhhs..tt...............hs.thshplutu.h.up..h.ltpp.........................tptpl.llGt.Ds...R.......h......s......u.................htp.slhtsL.su..s..G..l.c...V.h..hhu..........................hhsTP.sl..u..a..hs..p..p.....hp..........................ss.uGlhlT.......ASHNPhp.NG.............hKhh........s.......s........s....G.....tt..lsssh...pptIpphhpt....h.................................................. 0 1252 2394 3225 +3804 PF02879 PGM_PMM_II Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain II Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.47 0.72 -3.70 98 14085 2009-01-15 18:05:59 2003-04-07 12:59:11 11 54 4971 55 3517 10795 4847 103.50 26 21.14 CHANGED st....Yhcpltphht.......hptp...sh......p.llhsshpGsusthhtpllpp.huh..............phht.hptts-ssFss...phPsPpt....t.shp.hhphsppp.....ss.cl..uluhDsDuDRlsls.....cppG ................................................tYlphltshhs..............h..p..h..p........s.l..........................+..lllDshsG..s.utthssp...l...h.p.c.hGh...................................plhs..lpspP..Du..sFss.............ttssPpt...........t.sht..ht....p.h..s..h....cp..................tA...D....l....GlAhDGDuDRhhll.....cttG............. 0 1142 2220 2982 +3805 PF02880 PGM_PMM_III Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain III Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.64 0.71 -4.03 584 13734 2009-09-11 23:52:46 2003-04-07 12:59:11 11 46 4950 49 3296 10440 4816 115.60 24 23.67 CHANGED s.....GDp.lhslhuphh........hppt..........ht.s........s..lltTlhosh.ul-cshp...p.hGh..p.hh.cotVGc+alhct.hppt.....s.......h.shGGEpS.GHhhhh.ch.....tp.....s....sDGllsuLhllplhs.........pp..s..p....slu-lh.phhpt...a .................................................................pGsp.lhslhu.phh.....hpp.............ht.s......................s.tllpolhooh..t.lc...c.hhp...p....h.....Gh..p.....h.h.c.......shsG..a.+a..lh......c......p......hpct.................................s............h..hhG.GEpS.ut..h..hht..ch...................................sp...........s......cDGlhs...slhl.h...p...hhs...........pp..s........p..sLs-lhtph.t............................................................................... 0 1070 2094 2795 +3806 PF04608 PgpA Phosphatidylglycerophosphatase A Mifsud W anon Pfam-B_5195 (release 7.5) Family This family represents a family of bacterial phosphatidylglycerophosphatases (EC:3.1.3.27), known as PgpA. It appears that bacteria possess several phosphatidylglycerophosphatases, and thus, PgpA is not essential in Escherichia coli [1]. 21.00 21.00 21.50 21.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.95 0.71 -4.38 172 2384 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 2276 9 516 1528 1638 145.30 32 86.26 CHANGED hphlut.shGsG.hh.hAP.GThGolsAlshhhhl...h..........hht.h..............hhh....hlhh.....shllGlahsstspc.ths.......hcD.utlVhDEllGhhlsh......hhh...............sh..............................h..............hh.ll......u.FlhFRhFDIhKPhPIshhD......cc................hp.............GG...hGlMlDDllAGlhAslshtlhh .................................................h..hhuhhFssG.h...hs.P..GThGo.LAulshhhhh..............................ph.sh.....t..hhh.........hhlhhshhhGlhlsptst+..chs..........s+DcG.t.l.VhDEhlGhh.Ish......hhh...................s..........................................sh.............hh..shGFlhFRhhDhhKPhPIchhD...+p.................lc.............GG...hGlMlDDllAGlhAuhshhhh.t............ 0 164 318 426 +3807 PF03334 PhaG_MnhG_YufB Na+/H+ antiporter subunit Mifsud W anon Pfam-B_3611 (release 6.5) Family This family includes PhaG from Rhizobium meliloti Swiss:Q9ZNG0, MnhG from Staphylococcus aureus Swiss:Q9ZNG0, YufB from Bacillus subtilis Swiss:O05227. 23.40 23.40 23.60 25.20 22.90 23.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.47 0.72 -3.93 234 1788 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1344 0 548 1242 226 82.50 33 67.13 CHANGED hlllGuhhslluulGllR.hPD.....hasRlHAsoKusTLGsshlllushlhh........h........ptt......hsh+hlLlhlFlhlTuPlu...uahlu+AAhp ...........hlllGuhhslluulGllR.h.D......hasRhHAsoKusTLGshhlLlushlah...........spsh......................hsh+hlLhhlFlhlTuPlu...uHhlu+AAh......... 0 162 340 457 +3808 PF02304 Phage_B Scaffold protein B Mian N, Bateman A anon Pfam-B_9648 (release 5.2) Family This is a family of proteins from single-stranded DNA bacteriophages. Scaffold proteins B and D are required for procapsid formation. Sixty copies of the internal scaffold protein B are found in the procapsid. 25.00 25.00 150.70 150.60 19.00 16.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.58 0.71 -3.80 6 76 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 51 2 0 60 1 115.60 64 97.99 CHANGED hpcNtssshopEsIpsospPQhRNEsshNsSs.pGssssT-PuGLRRDPVQpclEAERQcRspIEAGKuhCuRRFGGATCDDpSAcIaApFD.ssppVQPAEFYRFNDuElsKaGYF .hTcNQssspSQEulQNpNpPQhRsEsAhNspuVpGshssT.puGLRRDuVQsDlEAERpKRs-IEAGKuhCoRRFGGATCDDKSApIYApFDcNDhRlQPAEFYRFpDuElNpaGYF 0 0 0 0 +3809 PF04717 Phage_base_V phage_base_V; Phage-related baseplate assembly protein Waterfield DI, Finn RD anon Pfam-B_5996 (release 7.5) Family Family of phage baseplate assembly proteins responsible for forming the small spike at the end of the tail [1]. Also found in bacteria, probably the result of horizontal transmission. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.39 0.72 -4.04 71 4590 2009-09-13 07:45:09 2003-04-07 12:59:11 7 55 1466 5 992 4204 146 90.00 34 15.09 CHANGED Gslsslshs....p.............uR..lRVp..................sGs........h.osWl.hhsstAG..psppatsPslGEQVllh...sG..-.spulllsu.lassppssPsss ....................................................................................Gs.sAhVsu........p................hDp.GR..l+VpF.a........................t...tssc............p.Ss.WlRVup...sa......A......G...ts.......a......G.th......hlPRl.GpEVlVs....F.lsG...DP.D.pPllhGp.lYsspshsPh................... 0 209 482 739 +3810 PF03864 Phage_cap_E Phage major capsid protein E Finn RD anon DOMO:DM07502; Family Major capsid protein E is involved with the stabilisation of the condensed form of the DNA molecule in phage heads [1]. 20.90 20.90 20.90 21.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -12.17 0.70 -5.51 77 1242 2009-09-10 22:20:44 2003-04-07 12:59:11 10 3 734 1 126 899 64 301.00 29 96.11 CHANGED FssssLs.tsls...phs.hsshltsh..lFtp..p...s...opplsl-tp.sshhslh.shsshsssu.tht...ppptpspshpss..alt.ppslpss-ltshRsh.Gpp.............sthppht.phlsc..ch...tph.cpphchThEahphsAlts.p.lh......sscsss.hhDah.phshstts...hth....t.ts...ssh.spshpslcphpppl...phhsss..hslsusphastlhs..psplpc...thpth..t.........hp.............t..psh.patGhh..........................a.pcht................t...stph....hls.......sscshhhss.......t.shhpphausss...ppsss.u...............tshasp.h..pt.cspshplpspSsPLslsscPsslhp.sps ....................................................................................................as.tplh.th.p....ph....phpshhhpl..aFpps..........hs.hpTpplhlcpl....su.h..h.shu.shs.oPhs.tupslp....pcutps..p.hp.ss.....al.Ks+cpls.pp..h...hhRhs.sp-.................ssshphh.cllhpsh...ppt.cpulsts.Eh.sssAlh..tGK.hh........spth.p....hpl...Dh.....shstptph........t......phup..stss..h.cs.tpcl.ctht.th.......ush.sshhlhsspsap.hlps......acth+-......thcsppsu......................ttsl+.............shstsl..sa.pGhh.................lhhhpsp...Yhpp..................sG....spcp........als..................ssphllsss.........t.spGhphY.Gshp......tps.p.G...............sshYsKpa...hpss.cPstph..shspSsPL.lhs.cPsthl.lp.......................................................................................................................... 1 43 83 102 +3811 PF05125 Phage_cap_P2 Phage major capsid protein, P2 family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 20.40 20.40 20.60 20.80 19.70 20.30 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.99 0.70 -5.55 5 788 2012-10-03 06:22:39 2003-04-07 12:59:11 7 2 540 0 77 634 13 308.10 45 94.81 CHANGED RppFsAYLupLAKLNGlssscVu+..KFsVEPSVsQpLtNslQESSDFLKpINIlPVuEhKGEKIGlGVoGTIASTTDTuGsstR+spDhoALssN+YECsQINFDhalsYAhLDhWA+.FsDFQcRIR-AIlcRQALDhIMIGFNGTSRA-TSsRAsNPhLQDVAVGWLQKYRNpAPARVM....oE-sKooGKV...lls.GcsuDYcNLDALVFDusssLIDPhaQ-DPcLVsIVGR-LLADKYFPlVNKc.QcsTEplAuDlIlSQKRlGGLPAVcVPYFPAcAlhVTTL-NLSIYap--S+RRoL+ENP-KDRlENYESpN-DYVVEDYuuGCLlENIcV .................................thssYhtplApLNuls.st..t..lsp......+FoV.EPoVpQpLcsthp-S.up.FLphI.Nl.hsVs-.pGphlslGssusIAuTs..-o.s.....s..pc.Rp.P.pc.st.l.tsp.cYcC-..QsNaDshlsY.spLDh.WAp.....a.pD...FQ.RlpstIs+p.ALDhIMIGFNGsp+A.c.sSsh.ssNPhLQDVNhGWLp+hRp.....-As.p.+VM..............sp.s....t....o....o....sph.............hts.....u......c..u.G....c..YsNLDAlVhDshssLI-.hap-Ds-LVVIsGRpLlu.schhPllN...pt....ppNoEhlAup.lIlSp+plGGL.AlpsPFFPssshLITpLcNLSIYaQcsocRRhh.h.-sPchcRlEsapShN-uYVVEDYttsuhl-pl.......................... 0 9 29 54 +3812 PF05144 Phage_CRI Phage replication protein CRI TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The phage replication protein CRI, is also known as Gene II, is essential for DNA replication. 21.00 21.00 22.70 22.20 19.90 19.00 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.67 0.70 -5.30 13 166 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 131 0 13 147 10 238.00 25 63.01 CHANGED MIDalshclPhcc..............s.lshucVsplu....ssGsVEacs...spclpVsG.........SahouIul+sh...pus....s.tspLplcusPAKhhQGHNVaGo-DLpshshthapslhss.......hs-.hchlchGthplsRlDsTaShcLsocpplhuhLcuhcpsu+ppp+usts.shtoTlYFsKs..........SR+aoLphYSKt-Elp.thcs.............+ph.ctLspscLhpaAss+LRhEhpl+o+.Lpchs........lpasssapsc ..................................................................................................ls...splhphs....sst.tlpap.....tphpsps.........op.oshth+.h.........put........h.stlplcusPuKhh.QGH..NVaG.ss.....slphssth.hhstlhph..................sthhchl..s.s....tscloRIDhThshplus..c.hpphlcthpshuptpt...+sstp..s.htsTlaas.c.s..........u+ph.pl...phYsKt.sElp.pphpc......................p......tt.s.p...pp..h..h.....-.hp.t.spL.ta..u..ts..h..lRhEhph+pphLpchs..................h.................................................. 0 2 6 12 +3813 PF02303 Phage_DNA_bind Helix-destabilising protein Mian N, Bateman A anon Pfam-B_9239 (release 5.2) Domain This family contains the bacteriophage helix-destabilising protein, or single-stranded DNA binding protein, required for DNA synthesis. 25.00 25.00 26.70 26.40 21.70 21.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.12 0.72 -3.96 3 81 2012-10-03 20:18:03 2003-04-07 12:59:11 12 1 61 22 8 90 1 84.70 33 88.25 CHANGED lKVEI+sSQVuV+oRSGVS.+pGKPYTlp.EQpAYVDLGG-YPlLFsIsLDEGQPPYusGhYplHPuSFKlNpFGuLtlGR.lRLlPsK .......................hpl..tt.th..psRshps.+pu..K..P.t.phh.EQpA....hl...hhG...Gp..aPs.hc...lpL-csQss....YssGhYplHssSa.hls.s.a...GsLplc+..h......h................ 0 1 4 7 +3814 PF02305 Phage_F Capsid protein (F protein) Mian N, Bateman A, Haft D anon Pfam-B_10357 (release 5.2) Domain This is a family of proteins from single-stranded DNA bacteriophages. Protein F is the major capsid component, sixty copies of which are found in the virion. 25.00 25.00 70.30 26.40 19.30 21.90 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.78 0.70 -5.96 7 248 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 72 12 1 272 157 229.10 36 98.46 CHANGED Spl.ou.hpRs.aDhSHhshpshphGcLhsI.hs.VlsGDoFphctsshhRLoshhhslhsc.plDhahFaVPpRhla.cpW.pFht-ssss..................uPhsuhspsohhDahGhsspls..sh+lsthhapuY..IaNsYFRs..h.ppstss.us.......hsts-sphs.............h+ss+h+shaTusLP.Pppthuhphsl...............................................................................................................uooulsI.uLptAhsh.+h.pccshhhoRYh-llpuahGspShDuc.pRP.hLhtosh.sss.sVstTspo........sLutFSsps..s.K+hhs+.FV.EHGhlhsLhhsRhs.TapptlchhhuR.phpa.Dhh..PsL.upLs.ptl..KEla..........tsusssthFthtEt.t.YRa+PspVssha.....psLDua.h...asshP......sLpcphl.ps.sshDcsht...Ssp.spa.sphhFNhpshRsMPshpsshh ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 1 1 +3815 PF03335 Phage_fiber Phage tail fibre repeat Bateman A anon Pfam-B_3576 (release 6.5) Repeat \N 20.00 5.00 20.00 5.00 19.80 4.90 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.45 0.76 -6.72 0.76 -3.45 38 2424 2009-01-15 18:05:59 2003-04-07 12:59:11 8 41 239 21 57 1692 4 13.50 43 14.97 CHANGED susGsHsHohouss ....suhGuHoHoloss..... 0 14 30 41 +3816 PF03406 Phage_fiber_2 Phage tail fibre repeat Bateman A anon Pfam-B_854 (release 6.6) Repeat This repeat is found in the tail fibres of phage. For example protein K Swiss:Q37842 [1]. The repeats are about 40 residues long. 20.20 20.20 21.10 20.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -7.95 0.72 -4.32 50 1985 2009-01-15 18:05:59 2003-04-07 12:59:11 8 103 486 0 146 1761 33 43.20 57 13.82 CHANGED ohopKGllQLSSATsSsSEohAATPKAVKs.shDpAssphspsps ......ohspKGlVQLSSATNSsSE..o.LAATPKAVKA.AhDhAsu+hsspp......................... 0 4 69 102 +3817 PF02306 Phage_G Major spike protein (G protein) Mian N, Bateman A anon Pfam-B_8833 (release 5.2) Domain This is a family of proteins from single-stranded DNA bacteriophages. Five G proteins, each a tight beta barrel, from twelve surface spikes. 25.00 25.00 27.70 27.70 24.00 20.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.10 0.71 -4.64 6 79 2009-09-10 23:22:16 2003-04-07 12:59:11 10 1 55 6 1 73 2 173.40 60 100.00 CHANGED MFQpFlSKHNuPl.TSp.slusolTPAus.A..PVlsTPclou.+oslhlshTlTAuostuG.Fsasl+lDsosPssNQVlSVuAsLS.sVsuchIAslVRFEsAs.ssPTulP.uhYDsYPlE.uhtsGuuhSs+DCVTIDsHPRTsGNsVYVGlhlaSstWoAuploGllSlNQVs+EsTVLQPLK .MFQsFIS+HNosF.hSc..lssSlTPAus.A...PVLpoPchou.pohah.ulTlsAus..uG.FhHslpMDTSsssuNQVlSVGAsluFsuDscahAClVRFESu..oVPTolP.osYDVYPl-.utpsGGhhoVKDCVTIDVhPRTsGNNVYVGhMlWSN.aTAo+spGlVSlNQVI+ElhsLQPLK 0 0 1 1 +3818 PF04688 Phage_holin Phage lysis protein, holin Kerrison ND anon DOMO:DM04726; Family This family constitutes holin proteins from the dsDNA Siphidoviridae group bacteriophages. Most bacteriophages require an endolysin and a holin for host lysis. During late gene expression, holins accumulate and oligomerise in the host cell membrane. They then suddenly trigger to permeablise the membrane, which causes lysis by allowing endolysin to attach the peptidoglycan. There are thought to be at least 35 different families of holin genes [1]. 20.60 20.60 20.60 20.90 20.40 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.29 0.72 -4.44 24 332 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 263 0 29 157 1 44.80 42 47.79 CHANGED GhsPlPlsEsplpphh.......SslhThssolhuWaKNN.lTp+u++ppphL.+ ......GhSPIPlD--pl...........SollhslsuLashaKsNs.sopcGKhupphLK....... 0 12 20 26 +3819 PF04531 Phage_holin_1 Bacteriophage holin Mifsud W anon Pfam-B_2644 (release 7.5) Family This family of holins is found in several staphylococcal and streptococcal bacteriophages. Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the buildup of a holin oligomer which causes the lysis [1]. 21.60 21.60 21.90 22.10 21.20 21.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.79 0.72 -3.92 23 523 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 423 0 29 274 7 78.90 39 81.28 CHANGED pINWKlRhKNKshhsAlluulhLlspplst.....hhuh..shsph.pplpsllsslhslLshlGllsDPTTcGluDSppshs.YpcP+c .............INWKlRh..KsKshhl.ull....uAlhLhhQ.plst........hhGh....chpshspQlss...hlNulLslLslhGVl......sDPT.T..cGlu.DSp.A.p.YptP+............................ 0 11 23 27 +3820 PF04550 Phage_holin_2 Phage holin family 2 Finn RD anon Pfam-B_61235 (release 7.0) Family Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the buildup of a holin oligomer which causes the lysis [1]. 25.00 25.00 25.50 25.30 20.40 18.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.90 0.72 -3.89 6 181 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 164 0 22 73 0 89.80 62 93.82 CHANGED Ms-uEKolIsLhlh....GALIulGKVLsGsEPIThRLhlGRhlLGoulShlAGlALlphPDls.lALsGIGSuLGIsGhpslElhLpR+...htup ...........MsscEKSlL.SLFhI....GsLIsVGKVLAGGEPITPRLFIGRMLLGGFVSMVAGVVLVQFPDLS.sAVsGIGShLGIAGYQVIEIAIQRRhKtp.......... 0 0 8 13 +3821 PF05106 Phage_holin_3 Phage holin family (Lysis protein S) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family represents one of a large number of mutually dissimilar families of phage holins. Holins act against the host cell membrane to allow lytic enzymes of the phage to reach the bacterial cell wall. This family includes the product of the S gene of phage lambda. 22.30 22.30 22.70 22.60 21.90 22.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.12 0.72 -4.02 25 643 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 434 0 62 284 2 96.80 31 90.20 CHANGED Mscc.PchW..AslLshLpp.h...splhuuslAhlhAhLRhhY..sGssh+cpllEuslCGhlshshtssl.........pahGlssshushsGuhlGalGV-plRphspRhls+K ....................................t.h.pllshlhsth.....thhuslhAhshAhLR.hhY........sGsuhpcsll-uhhCGhluh..sltssL................c.ah....G......h.s..ps...luh..hhushIGalGs-plpshlhphhs++............... 0 6 17 39 +3822 PF05105 Phage_holin_4 Holin; Holin family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Phage holins and lytic enzymes are both necessary for bacterial lysis and virus dissemination.This family also includes TcdE/UtxA involved in toxin secretion in Clostridium difficile [1]. 19.80 19.80 19.90 19.80 19.70 19.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.44 0.71 -4.42 57 986 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 662 0 122 758 27 117.20 29 83.22 CHANGED lu...uhhshh..hGuhsthLhhLllhhll..DalT.GlhsA....hhp+c........lsSphGh+Glh+KlhhhllVhluthlDhhlhts......tslps.....s......llhaYluNEulSIlENhuphGlPlP...phl+phlc.pLpscs .....................................................h..shhshh..hGshs.h.hltll.lhhhll..DalT.Ghht...A....hhptc............................lsSch..Gh.p.Glh.+Klhhh.l.lls.luh.h.lDhhlsss..................slpp....s.............l.lhFY.l.u.NE.hlSll.ENh.uc...h.G..l..P..l.P...phLpphlp..Lppc.p......................... 1 60 95 103 +3823 PF00589 Phage_integrase Phage integrase family Bateman A anon MRC-LMB Genome group Family Members of this family cleave DNA substrates by a series of staggered cuts, during which the protein becomes covalently linked to the DNA through a catalytic tyrosine residue at the carboxy end of the alignment. The catalytic site residues in CRE recombinase (Swiss:P06956) are Arg-173, His-289, Arg-292 and Tyr-324. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.99 0.71 -4.59 35 44346 2012-10-02 14:09:14 2003-04-07 12:59:11 17 151 5378 81 8222 34800 8709 168.00 19 51.02 CHANGED phLstcplppllstspp........thcs+sh......lplhhtTGhRhuElhsLchsc.lshppphhhl................psKsp+p...RtlPls...pthhphlcchltpp............tpc......................hlFssp...........tsp.hsppshppth+phstp.....sulp......thssHsLRHohuopLhcpGh.slcslQclLGHsslshT.thYs+sstpc ....................................................................................................................................................................................................................................tph....t.h..h..t......h.t........................p.....t..h.h............................h...l....h....h....h...o..G.h..R...h.......u..E.........l.....h..........s.................L......p...........h...........p...........-.....l...........s........h..........p........p.....t......h.....h...t..l...............................................................................tsK...s.p..pp.............+h.l...s.ls................pph....h.......p......h....l...p...p....h...h.ptt........................................t.p.p...........................................................................................................................h..l..F....st.................................................tt..p.......h.....s.....t....p.....s.....h.....p....p.....h....h........c..p....h...s..pp..............................ss.lp............................ph..s...s..Hs..h..R..H.o....h........u......o.......t...........h.........h.........p............p............G.........h........s............h.......p...........h........l.........p......p........h.............L.G.H......p......s......h...p....s....T....phYs+h....p.............................................................................................. 0 2564 5275 6927 +3824 PF03245 Phage_lysis Bacteriophage Rz lysis protein Bateman A anon Pfam-B_3219 (release 6.5) Domain This protein is involved in host lysis. This family is not considered to be a peptidase according to the MEROPs database. This family Rz and the Rz1 protein (Pfam:PF06085) represent a unique example of two genes located in different reading frames in the same nucleotide sequence, which encode different proteins that are both required in the same physiological pathway [1]. 25.40 25.40 25.60 25.40 24.70 24.70 hmmbuild --amino -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.49 0.71 -4.14 30 1696 2012-10-02 17:03:51 2003-04-07 12:59:11 8 3 601 0 80 1081 5 120.20 51 80.99 CHANGED putphcpptschspplphsp....ssIschpsRpR........slAsLDs+aTcELu-A+uc.-sLRsDVAuG++RLp..lpA..oCsu...sspssusuulssuuusR..LsssApcDYhsLR-cItpsppQlpuLQ-YlRsp.Ch .....................................NAlsYKtQRDcps.pc..L.c.hAN.....AsITD.MQh.RQR............DV.AALDA+Yo+ELADA+AEN-sLRs.DVAAGR...+RLp.......lpA.......sCss.......st.csTu.sSGhs.N.u.s.u.PR...LussA.....ERsYatLR-tlhphppQLcshQ-YIRoQC.......................... 0 7 26 55 +3825 PF00959 Phage_lysozyme Phage lysozyme Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family includes lambda phage lysozyme and E. coli endolysin. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.47 0.72 -3.77 18 3255 2012-10-03 00:09:25 2003-04-07 12:59:11 14 30 1340 658 411 2608 736 110.60 29 59.84 CHANGED hhTIGhG......pspslssp.t..................hopppusshhphslp.thtctlsphhp...........shstsphDAlsshsaNhGht...........hhttoohlcthpttphhttssplhc.h.s.G.......phhptlhpRRpp ....................................................hhTlshG...............sspslhsuhp........................................hocpps.stl.h.....t.h.-ht..ps...t.....h.l.p.p..h..lps..............hs.p.pt.s.u.ls.hsaNlGs.................Gth...t.sS.T........hh+.+...l.N...t.........G.....-..h..p..u..A..C....c..p....l.......h.t.W.h.h..s...sG.............pth..h...GhhpRRc............................................. 0 74 208 308 +3826 PF03863 Phage_mat-A Phage maturation protein Finn RD anon DOMO:DM08200; Family \N 25.00 25.00 25.70 25.70 19.00 18.60 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.33 0.70 -5.41 11 306 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 19 0 0 219 0 218.90 48 94.72 CHANGED aaPcchht..hslplspYthsthhssp..hpthss..hhph...poPcphs..........sPssah.GhcslTslch.ssshhhph.sssscas.upusuuslchpsal..ss..........hhuphsashpsphsschtschSthssphGshluEuRcTlphLuhhhtplhcua+Al+RGDl+cltphlp.h...............................pptcapupssushWLEhpYGlhPLhhDIpushE.......-ah+sHtchthhhRhSsshGpshslphsth.Puhshh...slpspsslpRR.........pthahshssupLt.lS.....uluhlNPhpluWElsPaSFVVDWFlNVGchLEth..phhhslchlsG.hsp+clch+Sl.ol+shhssss.............shp.upspshtshhsRshpsshPpssstlcoshush.HllDulALl..sQRlK+ .......................E.sassolhsYscstp.sNuaShchhsN..........aTPtRhs......+pa+hPosaSpGhhsVTol-Q..GAapRphSshGRsa-.tsGhuhoLcsc.....us.phhs.s...hhh.hs.shssp....................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +3828 PF04233 Phage_Mu_F Phage Mu protein F like protein Kerrison ND, Finn RD anon COG2369 Family Members of this family are found in double-stranded DNA bacteriophages, and in some bacteria. A member of this family is required for viral head morphogenesis in bacteriophage SPP1 (Swiss:Q38577). This family is possibly a minor head protein. This family may be related to the family TT_ORF1 (Pfam:PF02956). 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.41 0.71 -3.55 13 1923 2009-09-11 06:39:01 2003-04-07 12:59:11 9 14 1455 0 257 1607 203 114.20 19 30.44 CHANGED pplpssltpultpGpshcclu+pl+ch.................................th.s+tRhphlARspstps.sAuphtp..pptphtht.h..pasustDsRsRspHtcLcGpl.....aphD-..hp.sst.............PspchNCRCslh .................................................................................................t..lpp.lhpul.h.p.G.p.....s...p..phtcplptt..................................................................hth.sp..c...h...pplhcTphsp.s...hs.tuphpp.......htph.sh.....t....h.....h....p.a.hu.....s....h...D.s...+...s....p...........p....t........p.....t..s.h.....c..G..p.h.........ac.....hc.-.sh.h.t...hth...........................................................P..s..p.......h.sCRCph.h.................................................. 0 81 168 215 +3829 PF05136 Phage_portal_2 Phage portal protein, lambda family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.1); Family This protein forms a hole, or portal, that enables DNA passage during packaging and ejection. It also forms the junction between the phage capsid and the tail proteins. 26.30 26.30 26.60 26.30 26.00 26.20 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.98 0.70 -5.92 62 1366 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 672 0 152 1135 323 316.40 34 69.92 CHANGED ustspphtsWpstst..ussstltt.shsplpsRuR-l.s+NsshusuulcphhsNllG.sGlp.p.tths.................spthpcplcphW.ppWs.ss...psDssGphsahthpplshRshl.psGEshsphhhtpts......shhhshplpll-sDpls...ssh...........ssss..p.......lptGlEhcphG+.lAYalhcs+Pssshht...........phpRlPAp.......pllHlac..pRsGQhR.Glshhusslh+lppLcpap-ucLhptcluAhhuuaIcpspss.ssshtt.ss........................t.ptpt.hplpPG.thttL.PG--lphhssscssssassFhpt.LRslAuGh.GlsYctlotDhssssYSShRtullEhp+thpthpp .......................................hssphtta.s.sp..osDstlh..shpthpsRAcsL.scNsshussulphhp-+lVG..ph..hhlp..htss.hphh.............AcshscclcstW.pEau.s......h.DVpu....phTas.h.R.sltsah.hsGElFsphshspts......sthhthphchlpPchls.ss......................stus..............lptGV......pls..c..h..G.cshuYalhc..st..st.h.t............chpplstc.........phlHl.h.ps..h.cssQhR.Gs.s.hhuVh.plphLcphpsspLpuAhltAhh.ushIcpt.ss..ptshch.ts..............................................h..s.tt.l...pl.sG.htlscLhPG-clph.su.pcssssapsFcpu.LRhlAAGh..slSYpplu+sast.oYSutRtuhsEuhchahhhpc............................................. 0 47 88 121 +3830 PF05133 Phage_prot_Gp6 Phage portal protein, SPP1 Gp6-like TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This protein forms a hole, or portal, that enables DNA passage during packaging and ejection. It also forms the junction between the phage head (capsid) and the tail proteins. During SPP1 morphogenesis, Gp6 participates in the procapsid assembly reaction [1,2]. This family also includes the old Pfam family Phage_min_cap (PF05126). 24.50 24.50 24.70 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.23 0.70 -5.92 56 1289 2009-12-18 13:44:14 2003-04-07 12:59:11 9 4 997 13 170 1119 57 413.50 14 90.04 CHANGED pscpltphlpcphtpppp......h..hppYYpu...p.chhppttt...............t.hthcp+hshsaschllcphsshhhspshphsts.....sp......pssctlpchhp...tNchcsptpplhpsshhaGtuahhhahs........pssp.....hchth..hssppshslassshppp.hshlphhpppspptht...............................plasssp..........hhhhphpss...thh.t.stttt..........................astlPlltasN.sc.p.....puch.cshhsllDshscsh..sphsschpshs.chlhsl........................hs...........................t.ttphhpshpptphhpl.................sssssschlp......pphsssshcshhctlcp.lhphutssshs.spthu.....ussSuhAlchthpthppcsppppppappulpchhphhhphhs.....tps.h...........clphsascsh.tshsppsc......sthtt...tGhlopc..shlpths.........hl......s..-scpEhc..chpp-ptphtptttsttp ............................................................................................................................................t..........................h..h.paaps...........p.phh.t..t................................h.s...p+hshsasphlscthssh.h.h..s..p..s.hphsts.............pp...........................ph..pt....lpphhp...........ts.p..h..ppt.p.phhppsh..h...hG.tu..a..h..h..h.hhs.....................ps..sp.......................hplph.........hss...p......p..h...h..sl.h.s.s.p......p.p.p.....h..h.h.lph...hp...p.p..t..t.pt.t...................................................pla.sss.t.............hh..h.h..hp.tt...............thh............st..t.......................................................hsh.lP..l....h...h.s..s.....c.p..............................huph.cphh.sllDthspsh.....uphss..p..h.p.th..t...phh.h.sh.............s........................................t.ttp..h.p.p.h.t...ptthhpl.......................................s.tss.ss.phlp.............tph...s...s.tth.c.shhc.tlp...p.lhphsttsshs..spphu.............ustSu.u.....hc..hp..h.shptpsppppphhp.p.ulpp.h...h...p.hhhp.hhp.......................t..............................plp...hpas...p...s..hshshtpth...p..........shht.......tGhlop.c...ph.ht..h................hh..............s..cs.p.tthc.....chppct...........t........................................................................... 0 61 125 149 +3831 PF05135 Phage_connect_1 Phage_QLRG; Phage gp6-like head-tail connector protein TIGRFAMs, Finn RD, Bateman A anon TIGRFAMs (release 2.1); Domain This family of proteins contain head-tail connector proteins related to gp6 from bacteriophage HK97 [1]. A structure of this protein shows similarity to gp15 a well characterised connector component of bacteriophage SPP1 [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.32 0.72 -3.84 81 1873 2009-09-11 08:37:08 2003-04-07 12:59:11 8 5 1271 14 232 1181 124 94.80 16 85.42 CHANGED Ms.......................Lc......clKphLpl..-sspcD..sllpthlp....sApphlpsthsp.....................t.pphssthphhlhtlss.phap...p+ss..pupss........p.hsh..shpshlspl+th ..................................M...............LcclKthL+l....-psp...-D....chL.pt.hlp....sApphlp...s.hhst.......................................................................................th..sthph..s...hh.lss.hhYp.......ppt..........st.t............................h...........h............................................................................................................ 0 72 146 194 +3832 PF04492 Phage_rep_O Bacteriophage replication protein O Kerrison ND anon DOMO:DM04335; Family Replication protein O is necessary for the initiation of bacteriophage DNA replication. Protein O interacts with the lambda replication origin, and also with replication protein P to form an oligomer [1]. It is speculated that the N-terminal half interacts with the replication origin while the C terminal half mediates protein-protein interaction (annotation of Swiss: P14815). 28.70 28.70 28.80 28.80 28.60 28.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.04 0.72 -3.80 9 670 2009-09-11 22:41:49 2003-04-07 12:59:11 8 3 446 0 27 476 7 97.40 48 35.34 CHANGED phhEpR.....hADL-DGYhRlANpll-uhhtscLotRph..pVlhAlhR+TYGaN........K.hDhlsNsQlAplTs.......lssp+lscAhppLlccsll...ppsG.+hlGhNpslSpW ................s...E.p..VADL-DGYsRluNtLlEAhhhusLTp+Qh..pVhLAlhRKTYGaN...........K.hDhlossQLuElTt.......l..s+.+C.spAKppLV+hsIl.............hQpG....t.hGhNpslSEW.................... 0 7 8 15 +3833 PF04984 Phage_sheath_1 Phage tail sheath protein Bateman A anon COG3497 Family This family includes a variety of phage tail sheath proteins. 26.20 26.20 26.20 26.40 25.90 26.00 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.39 0.70 -6.05 41 3025 2009-09-13 02:25:27 2003-04-07 12:59:11 9 6 1415 24 555 2645 2611 301.10 16 92.64 CHANGED M...s.....tsphhPGhYlphpssuttshshuspGhluhss......shshGss.....spshpl.ssts...phhphhG..........hssphhthlcthhp.tsstplhlh+l.st....................Gspsssph...........................................................................................................t..hhtshhsGtpustltlhl..phstt...................................hshhh..ts..h............l.sp...........thh.....t.s.ht....................sstsltuGss..suthss.chht..........shssh...pshphshlshss.......tsp..slpssltshspphptt.ttcthullss.ss..s.......t...................-thhslssthshhsss.ht.............sssalAGhtAt.sshppuhss......hphsuhhslshp..hspsc....hpthlpsGhhshpts..ssslplhpslsohhp.s.tspspsatp.pslRshshlppsltpthpp.hhsctss......sc............sshtslpssltsah.cpLpstGslp.sacs......ptDhplpt.......................sscpsclhhphshpsVsshcplhhshplp .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......................................................................h.......................thhh.hs...........................................h........h............................h.....hhs..h..hs......................................................................................................................t..h..........t..........h....................................................................s...hhhuhh.......uh...s....t.....s..................................t..........h.....s.......h...h..s...h..................ht........t.p................t..........h...............t.t....l..s....shh...........................t...t.h.h.h..t...shs..................s...h.....hhchhthl..p.tht...h...........hh............s......................hhttl..t.hh..tth.t.t..hl...shp...................t.....p...............................t.t.sth.h.h...t.....hc.h.h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 146 338 468 +3834 PF04630 Phage_tail Phage major tail protein Mifsud W anon Pfam-B_5341 (release 7.5) Family \N 20.70 20.70 20.80 21.20 20.60 20.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.01 0.71 -5.22 14 370 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 319 0 29 212 1 175.60 25 78.65 CHANGED MA...ohGlctltluLlDspsshlh.cussGL.........ossGla.hDs....pshGTpTANITsLpuussplaGNNpltcsshupupPpVAhshNsLsa-lppKlhGhhsDscGGYs.ps..pKP.+VAlLlpS.pslDpsp.lYauFusGphsE.outNhsTss.supsRs-DslTasuLs.........pshsspshKha.ss-.................ssFs.pssMhp-VFsGYstsss ....................................................t...............................................pt....p..u.shphplosltsp.hscha.usN.t.lhh.t.t....pGsuclpschshhs.lP...s.-.shspllG.t.p.ccp..s.G.shh.hup...pspP.saVull.hEo..c.stsss.shaluLhKGpFoh..suhchpT..cp..cs..s..pss.plohphhs................t.tt....hh.h.h...t..t....................t...t...............s....................................................... 0 11 19 26 +3835 PF05100 Phage_tail_L Phage minor tail protein L TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 25.00 25.00 25.40 25.20 20.00 23.20 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.43 0.70 -5.20 8 1425 2012-10-02 17:50:33 2003-04-07 12:59:11 7 6 613 0 63 1256 146 186.40 62 86.69 CHANGED TshGu.-hhhFps...............E.pttGt........PlsWQGppYpsaPIpupGFEtsGcGousRPoLTVuNLhGhlouhspshssLVGApVlR+cTYA+FLDAsNFssG.NPsADPpQEh.lshWhlEQpot.su.posoFlLSoPs-hpGshlPuRphLusoCsWs....YR.G-sCGYsGsshhDcaspPToDsutDcCo+slouC+hR..ahtsNllsFGGFsuss+l ............................................T..GG.ERYFFCN................E.NE.K.GE................sVTWQGR.pYQsYPIpGoGFEhNGKG.ousRPoLTVSN....L.........a.........G........h......V.TG....M.AED......L......QSLVGuTVVRR+VYARFL.DAVNFV.sG.Ns..-....ADPE..QEl..hsRWhlEQhS-LoA..hoASFVLuT.PTETDG..AlFPGRIMLANTCh.....Ws..............YR..GDECGYs....GPA.VADEaDpPTo......Dl.p...K..................D+CS.KCh.p.GCchR...........s.s......l..sNFGGFLSINKL.................................. 0 5 25 45 +3836 PF04761 Phage_Treg Lactococcus bacteriophage putative transcription regulator Mifsud W anon Pfam-B_3898 (release 7.5) Family This family represents a number of putative transcription repressor proteins found in several Lactococcus bacteriophages. Horizontal transfer may account for the presence of similar proteins in Lactococcus [1]. 25.00 25.00 54.90 54.80 21.90 16.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.90 0.72 -4.20 2 33 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 27 0 3 26 0 56.50 75 97.85 CHANGED M..EpslsHhGpsl.QcSVEaYKcpL.hc.ShpFlpsSLlPQLaEWSNAYKAAVELTK ....MpQEKTINHLGQlVYQESVEFYKEKLSVa.SKDFLQNSLIPQLYEWSNAYKAAVELTK.......... 0 2 2 2 +3837 PF04985 Phage_tube Phage tail tube protein FII Bateman A anon COG3498 Family The major structural components of the contractile tail of bacteriophage P2 are proteins FI and FII, which are believed to be the tail sheath and tube proteins, respectively. 22.40 22.40 22.60 22.50 20.80 22.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.73 0.71 -4.81 53 1069 2009-09-11 14:55:36 2003-04-07 12:59:11 9 6 755 0 146 654 22 163.70 38 97.85 CHANGED slP+.pL+phNlFsDGpuahGplpplTLPKLstKhE-aRuGGM.sGslclD..hGh..-tL-sphshuGh.pt-llphaGhsshsustlpFpGuapppc.....upssslplshRGchpEl..DhGshKsG-csphphohssoYYKlsl.sGcsllEIDhlNhlptlsGsDhLus.hRpslGl ....................hlP+.+L+hhNlFh.D.G.p.shhGhlpslTLPKlocKh-pYRGGGM.sGuls..lD..hGL..sAL-spashuGh.pttlhpta....Gtssh.c.u..l...LRFsGuhpp-c.......u-s...s..lElhh...RGRhpEl..Dh.G-h..Kp.G..Es.s.ppphs.h.s.s.o.YaKLol..sG..csLhElDhlNh..l.h.l.sGsDhltp.hRsslGL................................................................. 0 28 72 114 +3838 PF05155 Phage_X Phage X family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.1); Family This family is the product of Gene X. The function of this protein is unknown. 20.80 20.80 21.00 20.90 20.10 18.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.82 0.72 -4.05 18 179 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 139 0 15 160 7 88.90 29 24.99 CHANGED shsphuspphschspthphhhthWppG.scl+shl...s+uTFaRacpcLhphGlDhuph.sls+tsss.hlPLs+lL-scss......phP-WYhpPshhhs ..................hs.tGshshs+tsphhphYhhLhppG.psl.+t..th.........s+soaaRahpc.Lh.phG.lshuph.slschps...lPhschlphc.s......phPs..aYhcPs....s........... 0 2 8 14 +3839 PF02912 Phe_tRNA-synt_N tRNA-synt_2_N; Aminoacyl tRNA synthetase class II, N-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.00 21.00 21.10 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.12 0.72 -4.25 53 4298 2012-10-01 23:07:44 2003-04-07 12:59:11 13 4 4261 7 933 2788 1981 72.50 33 21.23 CHANGED tpApshcsL-plRVcaLGKKG.lTplh+sLGsL.ssEER.thGthlNps+pplppslsp+cptLcputlst+Lt ...................p.tAsshpsL..cplRVpaL.G.K.K.G.plT.thh+s..lt.sL.ssEERsthGthlNps+ptlppsls.....t+pptLcpttlpt+Lt................ 0 317 621 796 +3840 PF02332 Phenol_Hydrox Methane/Phenol/Toluene Hydroxylase Mian N, Bateman A anon Pfam-B_15166 (release 5.2) & Pfam-B_3223 (Release 7.5) Domain Bacterial phenol hydroxylase is a multicomponent enzyme that catabolises phenol and some of its methylated derivatives. This Pfam family contains both the P1 and P3 polypeptides of phenol hydroxylase and the alpha and beta chain of methane hydroxylase protein A. 24.40 24.40 24.40 25.90 24.00 23.70 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.55 0.70 -5.06 23 1141 2012-10-01 21:25:29 2003-04-07 12:59:11 13 3 362 190 164 1148 22 186.10 33 63.63 CHANGED sWs.papppcs.......ttpsl+hscWpsacDPhchhYtsYVphpscp-ststu.lhsuhscs+thpph.sstat.sphptth.sslphsEauA.hupuph.schu..husshpssusa.tshDcl...Rahphphhhhcchstps.phshup...+thaps-.uhpuhRchh--hhh.stDhhEshluhshsh-sshsshhahtL.sptAspsGDpshuhLhsuhpsDpsRHsphusshlphhlppstp .......................................................................pttpppt..u.hh-uhspssu..th.cscah.pshKhhh.sslp.hEYtAhtu..u.hl.scph..sussh+suh.h..QslDEl...RHsQpphah.pta.t...pt.hshhp....pcha.sps.hhpss.+phF-Du.h.oussh.E.h.h.hu.lshshEhllTNlLhVsh.hphAAhNGDhsTsThhhSs.p.o.D.E.uRHhshGhps.lphhlppt..s................... 0 56 115 147 +3841 PF04663 Phenol_monoox Phenol hydroxylase conserved region Waterfield DI, Finn RD anon Pfam-B_4509 (release 7.5) Family Under aerobic conditions, phenol is usually hydroxylated to catechol and degraded via the meta or ortho pathways. Two types of phenol hydroxylase are known: one is a multi-component enzyme the other is a single-component monooxygenase. This region is found in both types of enzymes [1,2]. 18.40 18.40 19.40 19.20 17.60 18.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.38 0.72 -4.23 25 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 72 6 28 84 3 66.00 43 56.72 CHANGED s+DphENFpGtpLLYluW-cHLhFCAPhsLPlPPshPFGsLlppVLsssautHPDaA+IDWspspWh .....+Dtp-NFsu..LLYlGW-cHLhFCuPhshPlPPshsFusllppVLsssautHPDFA+IDWuplpWh....... 0 4 17 26 +3842 PF04674 Phi_1 Phosphate-induced protein 1 conserved region Waterfield DI, Finn RD anon Pfam-B_4596 (release 7.5) Family Family of conserved plant proteins. Conserved region identified in a phosphate-induced protein of unknown function [1]. 25.00 25.00 35.00 32.20 23.90 23.10 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.02 0.70 -5.23 21 304 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 33 0 177 296 0 240.80 43 84.05 CHANGED LpYHpGslL.sGsl.oVsllWYG+FsPsQ+ullsDFltSL.....Suss.............sssP..SVAsWW.+TscpYhptsssphs.......sslsLupQlhDpshShG+pLoctplppLAucu..u........tpsulslVLTusDVsV-sFChupCGhHutohsuhhth............phsYsWVGNStsQCPGpCAWPFHQPhYGPQus...........PLsuPNGDVGlDGMVhsLAohLAusVTNPauNGYYQ.GsssAPLEAAoACsGlYGsGuY.PGYsGpLLVDtsTGASYNAsGlsGRKaLlPAlaDPsoSsCsTls ......................................hpYHtG.s.lL...s..G..s....l.sVpllWYGpasssQ.......+..u...lltDFltSlosss.............s.tP...SVupWW.posp.tYh.ttsttt..................tplhlu..tph......Dp.th.Sh..GKsLpp.pl.plstpu..s...............tp.sulhlVLTutDV....s....V....p........s..........FChuhCGhHs.s..hs..u........................phsYsWVGNutpQCPGtCAW.PFt.tP.h..Y..G..Ppss.................sLhsPNGDVG..lDGMlhslAphLAsssTNP..as.suaa.p...G.......sssAP........lE............s...sssCsG.lY.GpGuh.s.GYsGplhsD.soGASYNs.Gh.pGR+aLlPtlasPtspsCts...................................... 0 30 114 149 +3843 PF03831 PhnA PhnA protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 23.00 23.00 24.00 24.40 22.00 18.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.70 0.72 -4.56 104 2536 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2436 2 438 1315 90 55.40 62 47.32 CHANGED hh+DuNGshLp-GDoVolIKDLKVKGoShs.lKpGTtV+sIRL...st..sssccI-s...Kl-...G .......hl+DANGNlLsDGDoVTlIKDLKVKGoS.ps.lKhGTKVKNIRL.....V....-GDHsIDCKIDG....... 0 121 255 351 +3844 PF02562 PhoH PhoH-like protein Mian N, Bateman A anon COGs Family PhoH is a cytoplasmic protein and predicted ATPase that is induced by phosphate starvation. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.32 0.71 -5.09 164 6190 2012-10-05 12:31:08 2003-04-07 12:59:11 11 24 4026 2 1327 5544 4974 200.10 47 57.55 CHANGED sI+s+ThsQppYlculp...ppD....lsFGlGPAGTGKTYLAVAhAVpuLpp..pcVcRIILoRPAV.EA...GE+LGFLPGDLp-KlDPYLRPLYDALa-hl........................sh-psp+hh..EcshIElAPLAaMRGRTLscAFlILDEAQNTTspQMKMFLTRlGhsS+hVlTGDlTQlDLP.ps.p.....pSGLhcAhclLcs.lc..uIuhsphsppDVVRHsLVp+.IlcAY- ..................................................................................................s.I+s+s.sQp.t.Y..l...p.slh.......pp-......llF..ul.GPAGTGK.T..........a.....L..A..l.....A..t.Al..p.A.L...pc......pp...l..c.....R...I..l....L.T..R..PAV....EA.............GEc..L.......G..F..L.P..G.D..l.p.....E..K..l..-.....P..Y...L......R.P...l..Y....D.....u..Lhph..l....................................................................u.h..-..p...h....p..c....l....h...-...+.....s.hI.E.l.......A..PL......A......Y...M......R...G.....R..T......L......s...c......AF..lIL.DEA.QNsThpQ......MKMF....LT.R.l....G..h.s..S...KhVl.......oGDl.o..Ql.DLP..ts..h...............pSGLtp..AhchLc..s...lc..........tlua.sph...p..t.pDVVRHslVtcllpAY........................................................................................................................................................................................................ 0 436 867 1125 +3845 PF02114 Phosducin Phosducin Mian N, Bateman A anon IPR001200 Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.53 0.70 -5.67 2 1240 2012-10-03 14:45:55 2003-04-07 12:59:11 11 6 686 7 485 1332 19 169.10 36 84.47 CHANGED ht.ApSpo...hE.shEG.uspTGPKGVINDWR+FK.LEoEpp-p.s+phccll+phS...........pppcpKD.pE+hStKMolpEhthhcKsh-DEphLppYR+QpMp-M+QpLphGPpat.VhEl.SGEtFL-hI-KEQK.ThIhVHIYEDGl.Gs-AhNushICLAAEYPhVKFC+l+uSshGAusRFopssLPsLLlYKuGELluNFlpVT-QLuE-FFssDlEuFLpEaGLLPEKEhhVLppsp........-pDhE ..........................................................................................................................................................................................................................................................................................................l....p..t..-..c.E..D.E...p..h...L..c...p.....YR........c.+pMp....-h........+.......p.....c......L.......s..........h.......s........s.....+..........a.....G......p.....l......t.........E...l.......p...........s............G-................pFL..........-...s.....l....................E.......................+.......p.......p........K..s...s.................s....l.......l...VHl..Y.-..-..u......l...p.....s..Cc..l......LNssL..s....s..L.As.cY.s.h...V+..FCK.I+.A...s.s...s...s.....s.t...........-+....Fss...cs..LPTLLVY+.s.G...-LlusFl...uls.-..ph..............................................................................................t............................................................ 1 154 244 369 +3846 PF00068 Phospholip_A2_1 phoslip; Phospholipase A2 Eddy SR anon Overington and HMM_iterative_training Domain Phospholipase A2 releases fatty acids from the second carbon group of glycerol. Perhaps the best known members are secreted snake venoms, but also found in secreted pancreatic and membrane-associated forms. Structure is all-alpha, with two core disulfide-linked helices and a calcium-binding loop. This alignment represents the major family of PLA2s. A second minor family, defined by the honeybee venom PLA2 PDB:1POC and related sequences from Gila monsters (Heloderma), is not recognised. This minor family conserves the core helix pair but is substantially different elsewhere. The PROSITE pattern PA2_HIS, specific to the first core helix, recognises both families. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.18 0.71 -3.86 37 1645 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 315 390 515 1676 0 114.20 33 69.11 CHANGED NLhQFtpMIppss..Gppshh.pYssYGCYCGhGGpGoPhDshDRCChsHDsCYucs.p.sGC.....pP+hstYoYpppsGs.lsCss.pssCpptlC-CD+sAAhCFups..sYN.pphh.....shsp..sc+C ................................lhphtpMIpphs....sp...pshh....p.....Y...tsYGC....YC.............G..........h...........G........G........p...........G.......p.....P..........hDs.....h..D.+CChs..HD..sC...........Ys........c..s....t...........t......ps.C..............pP...p....h.s.t..Y....s....a......p...h..........p........s...............s.......p.......lh......C........s..........s......p..........s........................Cpp.lCpCD+t.uAh.C..h.tps......sYs.pp.hh................................................ 0 195 222 307 +3847 PF04185 Phosphoesterase Phosphoesterase family Bateman A anon Pfam-B_1803 (release 7.3) Family This family includes both bacterial phospholipase C enzymes EC:3.1.4.3, but also eukaryotic acid phosphatases EC:3.1.3.2. 20.40 20.40 20.40 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.43 0.70 -5.26 20 2105 2012-10-03 20:55:17 2003-04-07 12:59:11 9 30 757 2 743 1807 98 335.40 23 58.08 CHANGED lcHlVllhpENRSFDHhaGphsssps............................st.h....h.....s..stt...sh.p.sa...s.t.s.shhsu.h.ps..Hth.t.t.thstGh.tD.h...........shs.shsMuhaptpclPha.atLAcsFslsDpaFsSl.GuTpPNRlalho.uss.....................cssGs................................................puss.ls.st..........tsashpshs-pLppsGloWslYppthsssh..............h.sh...................................cthhtssthtphthssFtpDlcpspLPpVSalhs.......sssps-...HPuass..tGspalspllcsLtusPp.WscTlllloYDEsGGaaDHVssPsssh.s..sthh.............s..s...........ht.hGLGsRVPshlISPa..s+sGhlscp.........paDHoSlLphlcppFs....ls ............................................................................................................................................................................................................lcplVlhh.EN+SFD.......phhG...h.....ssps............................................................................................................................................................a...............................................t...........h....t....................h.....t.....t..t....h..t.t...uh....s..............................................t...shsha............p..t..p..p..hPhh..hsL....AptaslsDsaa...........s....uh.....hs......sotP.N+.hhhho.uss..........................ss..t..........................................................................................................ss.ss.h...t....................sh..s.hp.s..h.s-...tL...pp..s.....G.....l.o.W...t..hY.t.pt.....h...........s....s....h......t.............t..................................h..........h........................h............t........................................................................................................................tt.t.h.s..t.th.....t.....h.....h.s.pFht.clt......s......s..p..L.P.p.V..oa..lhs......................sts...st...............H..............P..........s.............s..........s............................st........G........s.........h........altp.......ll.pslp...psPp...W..p....c....T....slllsaDE....s.s..G...a......aDH..V.s.s.s..h....ss.........................................................................................ht..G..R.lPh.hll.SP..a....s+s....Gh.Vspp.....................ha-HsSlL+hlcphaul.................................................................... 0 244 433 617 +3848 PF04272 Phospholamban Phospholamban TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain The regulation of calcium levels across the membrane of the sarcoplasmic reticulum involves the interplay of many membrane proteins. Phospholamban is a 52 residue integral membrane protein that is involved in reversibly inhibiting the Ca(2+) pump and regulating the flow of Ca ions across the sarcoplasmic reticulum membrane during muscle contraction and relaxation [1]. Phospholamban is thought to form a pentamer in the membrane [1]. 22.40 22.40 22.40 95.30 21.10 22.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.79 0.72 -4.52 2 32 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 28 15 20 25 0 52.00 90 96.41 CHANGED MEKVQYlTRSAlRRASTlEhs.QARQpLQpLFlNFCLILICLLLICIIVMLL MEKVQYLTRSAIRRASTIEMPQQARQNLQNLFINFCLILICLLLICIIVMLL. 0 1 1 4 +3849 PF00922 Phosphoprotein Vesiculovirus phosphoprotein Bateman A anon Pfam-B_1160 (release 3.0) Family \N 20.50 20.50 21.70 21.50 20.30 20.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.80 0.70 -5.19 5 426 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 22 22 0 209 0 151.00 51 89.39 CHANGED oR...LpphLKsYPcL-sTLu-I-EhE-ppc-cssp..FpsDusscsopPSYYLu--hc-S-oEsssEDspsl.splPPsspVE.a.ts.spDshhDDDl.....sVsFs-c.sWosslpKsssG+tsLcLouPpGLTstQhsQWpcoIcAls-pSKthRLupspIcsou-GLllpER.MTPslSsoo...................-.hPsSssSssoopslS.......o.posoPuspSssS...LGLP-lsus....uhh.+EhpLsPlsuSssPYcsTLs-LFGSp-ualsYsssGshSLK-AlIuGLK+KGIYN+IRI .......................................................................................................................................................................................................P.GLot.QpsQWhhTIcAVspSuKaWNLuECph.sSs-slIlKtR.hTPDs.pss.......................h.psps.pSEulS.........................slWsLppT.........shphpsK+AulpPLTloLc-LFuSctEah.hssptt.p.hthhlhGl+h++LaNp.............. 0 0 0 0 +3850 PF00343 Phosphorylase phosphorylase; Carbohydrate phosphorylase Finn RD anon Prosite Family The members of this family catalyse the formation of glucose 1-phosphate from one of the following polyglucoses; glycogen, starch, glucan or maltodextrin. 19.00 19.00 19.10 19.00 18.60 18.90 hmmbuild -o /dev/null HMM SEED 713 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -13.21 0.70 -6.65 9 4954 2012-10-03 16:42:30 2003-04-07 12:59:11 15 27 3182 252 1289 3940 903 590.10 40 79.46 CHANGED ALhpLGlsLEELhEpEpDAGLGNGGLGRLAACFLDShATLsLPuaGYGlRYcYGhFcQpIscGtQsEhPDtWLchGsPWEhpR.-hphsVcFaG+Vp....psupt..hcW.psphlhAlAYDhPlPGYcssssNolRLWSA+s.sp-FNLscFNsGsYlsAlpp+ppuENlocVLYPsDsphpGKELRLKQpYFlVuAoLQDIlpRaKpsc........psasshscKVAIQLNDTHPsLuIPELMRlLlD-EphsW--AW-lTs+TFuYTNHTlLPEALE+WPVcllcpLLPRHhpIIaEINcRFLptVtppaP.sDhc+hcphSll-E......t..+pV+MAaLslVGSHsVNGVAplHS-llKpclFsDFhplaPsKFpNKTNGITPRRWlthsNPuLutllsctlG.-cWhssL-.LpcLcpasDsstFhcphtplKptNKh+LAcaltpchsVplNPpuLFDVQVKRIHEYKRQLLNlLHVIshYpcIKcsst...tphsPRssIhGGKAAPuYahAKpIIKLIssVA-VlNsDPtVsshLKVVFlsNYsVSlAEhlIPAuDlSEQISTAGTEASGTuNMKFsLNGALTIGTLDGANVEItEclGc-NhFIFGhps--VpplcccG.YcspcaaptDscLcpVlppIpsGhFuP.psctFcsllsS......l.asDhYhVhtDFtuYl-sQcpVDphY+spcpWsctuIhNhAssGhFSSDRTI+EYAccIWslcP ....................................................................................................................................................................................................................................................................htthsh.s.lpcl.-.........E.DsuLGNGGLGRLA...A..C..FlDS.hAolshsuhGhGlpYcaGhFcQth....s..G.QhE...P...-.W.....h.p........t...sW...h.c.....p...........hth...V..ta.h.G..h........................h......lhu.saDhs.......lsG...Yps.t...s.spsLRLWpups....t...h...s.h..t..Fs.t.G...t....a.tu.tpt..scslotlLYPsD.sp....tG+.L.RLhQpYFhsusslps.Ilcca.h..t..t...........t.sh.p..ph.s-..h......slp.lND..THPs.lu.IPELh.Rl.L.....l.D..pct.hs.W..-c..Aapls..pps.......huYTNHTlhsEALEpWslphhpp.lL.PRhh.p..II.cI.s.pp........ah.t................l.p.t.....p..h....s.....s..s...........t....h...h..tp..h.ull.c.-.................................s.....t.p......V+MAhLsl.hsu.a.pVNGVutLH...oc...l............l......pp...sh.h......t.c..a...h...p....l.....a.....P.....p......+......F..p......N......h......TNGl.........T...........R.....R..................W..................l......................h...............s...NP.........tL......u..t....l.l....s.................c................t.......l...........G.............c........p........W......h.........p.....-...................h.................s..............p..............L...p.....p........L...........p....p.........a.........s.............-.....c..........s...........t....h......t.........p.......p............h........t...p........l.......K....................h.........p............N............K..h...........+......L.........A.....p..........a.....l.........p.........p..............p..............h.................s...............l..........p.................l............s....P.....p............u.............l.....F.........D......l............Q............l......KRlHEY.KR.QhLNh..L..al.....lt..hYp.c...I+.p.s..sp.............hphhPR.s.h.lFuG..K..AAPuYhhAKpII+.hI.splAchlNsDP.tlsshLK.V.......VFl.NYsVShAch..lI..............PAuDlSEQIShAupEASGTuNMKhhLNGALTlGTlDG...................ANV..................E..hh-....tl.G.....t-N.h.al.F.G..p..s-p.V...t.h....h...t.p..s.................Y....p...s....h....phh.p....ts...tlctslp..l...s.G..h.h...s.s........p........t..h....aptl.h.pp........h......t.....D.ahlhtDatsYhcs.p...................cpltt..Y..........p.s.t..........p.tWhphs.lh.....NhAp.G.hFSSDRoItpYspcIWph........................................................................................................................ 0 432 793 1068 +3851 PF00124 Photo_RC photoRC; Photosynthetic reaction centre protein Sonnhammer ELL anon Prosite Family \N 21.90 21.90 22.10 21.90 20.10 21.20 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.54 0.70 -5.36 55 8728 2009-09-11 05:37:13 2003-04-07 12:59:11 14 5 3408 262 234 6932 2419 232.40 49 91.25 CHANGED lYlGahGlhol..hhuhluhhlIuhshhsss........sassh..hpphahlulpPPsspYGLu.hsPLtE.............GGhW.hsshahssulluWhlRpachu+pLGhGhHlshAFuuAIhhalsLslIRPlhMGSWucuhPaGIhsHLDWssshuhpYsNhaYNPFHMLuIuFlaGouLhhAhHGuhILussphs.sscplc..hs.t.usE+ts........hFaR.hh....GashstcuIHRauhahAlhsshhuulGIllo..................Gsll.....csWhpWhshashsPha ............................................................................................ss...htphh.hul.ssusuhGhp.hhPlWEsts.s...cWh..GG.apllshHhhlGlhsahsRpaELuh+LGMRPaIslAaSAPl..AAhsuVFL.lYPlGQG.SaS-uhPhGIuupFsFhlsFQucH.NlhhpPFHMLGVAGVFGGuLh.uAMHGSLVpSoL.....h......h.E......s.s-.......st........otshuachuQEEETYsh......VsApsaFucLIh......suFN..NoRuLHFFhhhaPVsGlWho.....ul.Glssh..................u..h.....tsa.ph..h.......s.................................................. 0 46 140 195 +3852 PF01895 PhoU DUF65; PhoU domain Enright A, Ouzounis C, Bateman A, Cerutti L, Dlakic M anon Enright A Domain This family contains phosphate regulatory proteins including PhoU. PhoU proteins are known to play a role in the regulation of phosphate uptake. The PhoU domain is composed of a three helix bundle [1]. The PhoU protein contains two copies of this domain. The domain binds to an iron cluster via its conserved E/DXXXD motif. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.15 0.72 -3.54 388 11541 2012-10-02 11:27:25 2003-04-07 12:59:11 14 27 3795 24 2708 7226 2003 87.20 22 51.69 CHANGED ltchuphst.phlppuhpu.h...hpp.D.....phA.ppl.h..pt....-cplDph........tppl..........p.....phhphh.tt..pp..sh.....................hp....h..h.....hht..lspslERluD.aussIAchs ..................................................pMuphst.ph....lp.pul.pu..h.........hpp...D....h........chA..pcl..h...pp..........Dcp.lsph.......pppl.............pc....phhphh..sp.......ppshtp...................lph...h...hs...hlp...hspslERlGD.autsIuch.h..................... 0 935 1793 2290 +3853 PF00502 Phycobilisome Phycobilisome protein Finn RD anon Pfam-B_10 (release 1.0) Domain \N 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.93 0.71 -4.47 22 4056 2012-10-01 21:46:00 2003-04-07 12:59:11 14 6 1323 188 272 2702 388 96.30 32 84.97 CHANGED TpllssADspG+ahosu-LpulpuhlppustRLcusptlssNtssIlppAuptlascpPplhssGGssYssRph..AtClRDhcaYLRhlTYulluGssusL--hslsGl+EsYpuLGlPsushsculphhKss..shphlu......................upsssEhssYFDYslsulu ...............................................................................................................................................................hh...............................h....thths.......................................................................t..................................................... 0 37 167 246 +3854 PF02333 Phytase Phytase Mian N, Bateman A anon Pfam-B_14843 (release 5.2) Domain Phytase is a secreted enzyme which hydrolyses phytate to release inorganic phosphate. This family appears to represent a novel enzyme that shows phytase activity [1] and has been shown to have a six- bladed propeller folding architecture [2]. 18.90 18.90 19.30 18.90 18.60 18.60 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.35 0.70 -5.75 2 586 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 259 7 131 592 319 193.30 42 71.03 CHANGED MphsKThLLosAAGLhLohsAsSspA.Hhls-.aHFpVsAtsET-PVsousDAADDPAIWlc.KpPppSKLITTNKKSGLsVYsL-GK.LHSYchGKLNNVDlRYDFPLNGcKlDIAAASNRSEGKNTIElYAIDGcpGpLpSITDPN+PIuosIsEVYGFSLYHSQKTGtaYAhVTGKpGEFEQYElssstpGYloGKKVRtFKhNSQTEGhsADDEYGsLYIAEEDEAIWKFsAEPsGGSpGpVlDRAsGcHLTsDIEGLTIYYAssGKGYLhASSQGNsSYAhYERQGpN+YVAsFpITDG.chDGTSDTDGIDVLGFGLGPcYPaGlFVAQDGENIDpGQtsNQNFKhVsWEpIAp+lG.hPplpKQVsPRKhpDRS .....................................................................................................................................s.DA.....A...DDPA....lWlpPp......sP......s...p...Sh...lluTsK....c....u......G..Lh..VY..DLsG+...l..Q.....................h..........s...s..........G...+.h...N....NV..DlR....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s.................................................................................. 1 46 81 114 +3855 PF04833 COBRA phytochel_synth; Phytochel_synth; COBRA-like protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_4078 (release 7.6) Family Family of plant proteins are designated COBRA-like (COBL) proteins. The 12 Arabidopsis members of the family are all GPI-liked [1]. Some members of this family are annotated as phytochelatin synthase, but these annotations are incorrect [3]. 19.00 19.00 19.10 21.20 18.90 18.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.41 0.71 -4.49 22 265 2009-09-11 09:29:15 2003-04-07 12:59:11 10 3 41 0 152 243 23 163.40 49 35.02 CHANGED YlAhVTlpNap.ht+lps..WpLuWpWt+cEhIaoMpGApso..-puDC.....upah..hshs+...sCc+cPslVDL.PupshspQluN...CC+sGsl.shu.DPupStSuFQhpVuph.....sssppslpsPpNFplpu.....sPsYsCGsshhVsPT......pF.sssG.ppsTpAl........hTWpVsCshoQ ..................YlAhVTh.NaQ.aR.+Ip.s.PGWpLGWsWAK+E.lIWSMhGAQsT..EQGDC...........S+FK........ushPH....sC++sPs.lVDLLPGsPaNpQ...l.uN...CCKuGVlssa...sQDP.u......susSuFQloVGhu......G.T..oN.c.TV+hPpNFTLtu....PGPGYTCGssthVs.sT......pFhosDt..+RpTQAl......................hTWs.VTCTYSQ.......................... 0 22 94 125 +3856 PF00360 PHY phytochrome; Phytochrome; Phytochrome region Finn RD, Mistry J, Hughes J anon Prosite Family Phytochromes are red/far-red photochromic biliprotein photoreceptors which regulate plant development. They are widely represented in both photosynthetic and non-photosynthetic bacteria and are known in a variety of fungi. Although sequence similarities are low, this domain is structurally related to Pfam:PF01590 [1], which is generally located immediately N-terminal to this domain. Compared with Pfam:PF01590, this domain carries an additional tongue-like hairpin loop between the fifth beta-sheet and the sixth alpha-helix which functions to seal the chromophore pocket and stabilise the photoactivated far-red-absorbing state (Pfr) [1]. The tongue carries a conserved PRxSF motif, from which an arginine finger points into the chromophore pocket close to ring D forming a salt bridge with a conserved aspartate residue [1]. 20.30 18.00 21.00 19.50 19.90 17.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.12 0.71 -5.15 80 3555 2012-10-02 14:34:25 2003-04-07 12:59:11 15 76 2538 22 318 3683 22 152.50 56 28.28 CHANGED lhchps...hLs-hlh+..chshulhs..psPslhDLlcs-GAALhapsphhplGtTPscpplc-lspWLp.p..t.sss......shu.TDuLscs.YP.sAtslu-sssGhhAlpIo.ppch...lhWFRscssppVpWGGs.ccssphss...st+hpPRsSFcsahElV+s+SlPWcshEh-.....AhcsLphhLhsshh.cp.....ps ..........................................................................................................................IL+TQTlLCDMLLR.....DuP.l.u.IlT...QSPNlMDLV.KCDGAALaYc.sc......h........WhL.Gl..TP..o...E.sQI.+DIsp.WLhch......HssoT.............GLSTD...SL...h-A..GYP....uA..s..s..L.G...Du...........V.C.G...MAAs..pI..o....s......+...D..h........LFW.FRSHTApEl+WGGAK.Hc.P.t..-cDc............up+..M...H.PRoSFcAFlElV+.+ShPWp......................................................................... 0 89 180 254 +3857 PF03284 PHZA_PHZB Phenazine biosynthesis protein A/B Mifsud W anon Pfam-B_4020 (release 6.5) Domain \N 20.50 20.50 20.80 20.60 20.30 19.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.95 0.71 -5.15 8 88 2012-10-03 02:27:23 2003-04-07 12:59:11 8 2 31 21 8 134 0 153.10 69 98.67 CHANGED Mscstssp.sFsDphEL..Rp+NRtTVEpYMcT+Gp-RLRRHcLFTEDGsGGLWTTDoGpPlVhpG+-+LAcHAhWSL+CFPDWEWaNV+lFET-DPNHFWVECDG+GKILFPGYPEGYYENHFlHSFEL-sG+IKRNREFMNPhQQLRALGIPVPpIKR-GIPo ..................................................................................M.spth.p.shpDt.EL..RR+NRAT.V.E+YMp.hKGt.-RL.pR.H.pLFVEDGCuG...WTT.-oGpPlVhRG+-pL.tc.hA.W....+C.....FPD.W..E.W.aNl+IFET-DPNHF.WVECDG+GKhLhPGYPEG.YhE..NHalHSFEL.Es.G+IKRNREFMNshQpLRALuIsVPpIKR-GIPT........... 0 4 5 7 +3858 PF00388 PI-PLC-X Phosphatidylinositol-specific phospholipase C, X domain Finn RD anon Prosite Family This associates with Pfam:PF00387 to form a single structural unit. 22.10 22.10 22.20 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.82 0.71 -4.80 140 2890 2012-10-01 22:17:21 2003-04-07 12:59:11 14 118 764 50 1401 2585 81 145.10 30 19.62 CHANGED MspPLo+YFIsSSHNTYLsG.............................sQlt.....up.SSscsYhpsLppGCRClElDsW.......DGsss...p............................................................................................................P........llhH...GtThT...op..l.Fc-VlcsI+caAFhsS.p...........YPlIlSLEsHC.oscpQ..pphAchhpplhG-hLhspshst................ssstlPSPppL+tKILlKsKp ......................................................................................................s.Plo+YaIso..o...HN..oYLh.s.............................pQ.h.h.......u.p...opsp..sY......hps..L.....pt.GsRslE......l...D.s.a.........cus..cs....p..............................................................................................................................................s......hlhH........Gh...s...hs...................sp.l.....hp...-................Vl.c.s.Ipc...a.u...F..tss..p.....................hPl..Il..S.l...Es......+s.....s.p..pQ...p.t...h.....s..p...........hh...pp..l......aG-...h...Lhspshp............................ssptlPospp...L+tKIllps+..................................................................... 0 360 614 1002 +3859 PF00387 PI-PLC-Y Phosphatidylinositol-specific phospholipase C, Y domain Finn RD anon Prosite Family This associates with Pfam:PF00388 to form a single structural unit. 20.20 20.20 20.30 21.00 19.60 20.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.76 0.71 -4.11 86 1860 2012-10-01 22:17:21 2003-04-07 12:59:11 14 98 351 26 1049 1720 51 113.40 41 11.94 CHANGED -LScLlsYspulp....FcsFpt.sppp....hhchtShsEspspphhc.pp..stp..hlcaN++pLoRlYPpG...hRl.cSSNa.sP..hWssGsQh..............VALNaQT.Dh.sMpLNpuhFp.N.......GpsGYVLKPshhR ...................................................................LSsLl.Yspss.p......ap.s....hp........pt.p..........t..................hhchsShsEsputchh........p..pp...............stc..al..p..aN..p+pLoRlYPpG...hR.l.DS...S.Na...sP..hWssGsQh..................VALNaQT.......sh..sMpLNpuhFp...N.......G..ps..G...YVLKPphh........................ 0 250 418 726 +3860 PF02192 PI3K_p85B PI3-kinase family, p85-binding domain SMART anon Alignment kindly provided by SMART Family \N 21.00 21.00 21.10 23.30 19.70 20.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.34 0.72 -4.56 13 271 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 91 6 146 207 0 76.20 47 7.94 CHANGED hlplcVsppsTlppIKp.LWKpAcphPLaphLt-.suYhFsslNppApcE.ELpDEo..RRLCDlRPFhPlL+LlpRpssp ...........hlslcssRpAoLpsIKp..LW+cAcphPLap..hLp...-.suYlFssVsQ..p.AEpE..EhpDEo....RRLCDl+s.F..hPlL+llp+pGs....... 0 32 44 93 +3861 PF00794 PI3K_rbd PI3-kinase family, ras-binding domain SMART anon Alignment kindly provided by SMART Family Certain members of the PI3K family possess Ras-binding domains in their N-termini. These regions show some similarity (although not highly significant similarity) to Ras-binding Pfam:PF00788 domains (unpublished observation). 20.80 20.80 20.80 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.17 0.72 -4.10 12 588 2012-10-03 10:59:06 2003-04-07 12:59:11 13 31 130 78 323 502 1 105.50 25 9.04 CHANGED ssp.hPcsl..hh.ssplhlslhhp..........ttp.shThpsssssostplhtphlpK...phshhpps-ssp........DalL+VsGppEalh.ssasLtpapaIppplppstpscLsLhp.st ..............................................s.....t.l...h...p..s.p.lhlslhhp..............s.ppshThpssssss.stplltpslpc.......ppsh...ht.ps.-.tt.............-YlL+VsGpcEYLh.s......c......as.Lspa.............p..a...IppClp.sp......cLhLhp...s............... 0 83 113 201 +3862 PF00613 PI3Ka Phosphoinositide 3-kinase family, accessory domain (PIK domain) Ponting C, Schultz J, Bork P anon SMART Family PIK domain is conserved in all PI3 and PI4-kinases. Its role is unclear but it has been suggested [2] to be involved in substrate presentation. 29.00 29.00 29.00 29.20 28.80 28.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.20 0.71 -5.11 16 1353 2012-10-11 20:00:59 2003-04-07 12:59:11 15 53 314 96 849 1260 21 179.80 26 14.57 CHANGED cchcs.sppc+cplcpIlshDPhspLotcE+phlWpaR.a.hlhshscALs.......KhLh.SVcWsshppsspshpLhtc...Ws...lcsscALELLsssass....hVRsaAVpsLEp.hsD-ELppYLLQLVQsL+YE.........................................................hpcShLs+FLlp+ALpNh.clGpFhaWaL+uEh......aspsh.spRFsslhEtahcsh.spthtpltp ..........................................................................................h.............l....l.h.t.h....s.........l.p....p-.pphlW.p.hR....h....hh.....p..............p.uLs...........................+hLh..s.s.p....a..pp..........p....p..s.t...p....h.............h.p.l.L..t..p......................W..s.........l.ss.p..A.LpL.Ls..s..p..ass.....h.............VRpa...AVppL.c.p..hs.c-c...LhhYL.QLVQ.A..L.K...aE............................................................................................................................................h.p.u.....sLscFLl....p.R.............Ahp.s...plu+hlaWh.L.....p...s-h.................cst..ph...t..p.h..t.lhpthhp...........hh.................................................... 0 284 422 654 +3863 PF02226 Pico_P1A Picornavirus coat protein (VP4) Bateman A anon Pfam-B_345 (release 5.2) Family VP1, VP2, VP3 and VP4 for the basic unit that forms the icosahedral coat of picornaviruses. Five symmetry-related N termini of coat protein VP4 form a ten-stranded, antiparallel beta barrel around the base of the icosahedral fivefold axis [1]. 20.80 20.80 21.60 21.00 20.30 20.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.30 0.72 -4.24 31 6859 2009-01-15 18:05:59 2003-04-07 12:59:11 11 25 444 79 0 4094 0 67.40 60 13.37 CHANGED GAQVSTQ+oGuHETustAssGSoIsYTNINYYKDuuSsSAs+QDFoQDPuKFTcPVhDlhpcshPsLp ........GAQVSpQpsGoHEss..Ns.AosGSsIsYhNINYYKDuASuuAu+QDFSQDPSKFT-PVKDl.hpchhPsLp......... 0 0 0 0 +3864 PF00947 Pico_P2A Picornavirus core protein 2A Finn RD, Bateman A anon Pfam-B_138 (release 3.0) Domain This protein is a protease, involved in cleavage of the polyprotein. 21.00 15.00 21.20 15.10 20.70 14.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.54 0.71 -4.33 9 2140 2012-10-02 13:45:52 2003-04-07 12:59:11 14 29 270 3 0 2018 0 113.10 63 8.57 CHANGED NhHLAT.pDapNslh.sasRDLLVsposApGsDpIARCsCsoGVYYCcS+pKaYPVshpsPshhhlcts-YYPtRaQophLluhGhuEPGDCGGILRCpHGVIGIlTAGGpGlVuFADlRDLhhlE- ....................................NRHLATpsDWpNsVWEsasRDLLVooT...sApGsDT.I.ARCs...CpoGVYY.CpS+pKaYPVSFpt....Psh..h....l.ptSEYYPtRYQSHlhlutGauEPGDCGGILR.CpHGVlGIlosGGpGlVuFuD..lRDLhhh--.......... 1 0 0 0 +3865 PF01552 Pico_P2B Picornavirus 2B protein Bateman A anon Pfam-B_214 (release 4.0) Family Poliovirus infection leads to drastic alterations in membrane permeability late during infection. Proteins 2B and 2BC enhance membrane permeability [1,2]. 20.00 20.00 20.20 21.70 19.40 19.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.26 0.72 -3.84 22 1455 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 271 0 0 1484 0 99.00 57 5.36 CHANGED QG.loDYlppLGsAFGsGFTspIucplsplpshh..psslscKllKtllKllSALVIllRNpsDhhTVhATLALLGCsGSPW+aLKpKlCphhtl.Palp+Qu ..................QG.loDYIcpL....GsAFGo....G....FTs....pIs-pVs....t..Lpshl...........psslsEKlLKsLlKIISALVIllRN.ppD..h.s..T.lhATL.ALl.GCs...uSPWpWLKpKssphLtI.Phsp+Qu....... 0 0 0 0 +3866 PF00345 PapD_N pili_assembly; Pili_assembly_N; Pili and flagellar-assembly chaperone, PapD N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain C2 domain-like beta-sandwich fold. This domain is the n-terminal part of the PapD chaperone protein for pilus and flagellar assembly. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.55 0.71 -4.33 154 7954 2012-10-03 16:25:20 2003-04-07 12:59:11 15 5 993 92 559 3765 45 119.70 30 51.71 CHANGED ulslssTRlIass..spc.psolplp.Nsss.tPhLlQsWl......-s.........tt......t.ptps.P......Fllo.PPlhRlcssppptlRl...hp..sss....LPpD...R.ESlFalNlppIPsts......................ps...plplAl+s+lKLFaR.P...suLt ..............................ulslssTRlIas.u..s.p.+...ps.slplp.Npss..psh.....Ll.QoWl..................-s......................tss.ppps.s............Fl.lo.PPlh.+lp.stpppplRl..hp....ssp.....LP.pD.R...ES.lFahNlcpIPsts..............p..p........spNsLplA..h..po+lKLFaR.Ptsl....................................................... 0 63 176 377 +3867 PF02753 PapD_C pili_assembly_C; Pili_assembly_C; Pili assembly chaperone PapD, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Ig-like beta-sandwich fold. This domain is the C-terminal part of the pilus and flagellar-assembly chaperone protein PapD. 22.80 22.80 22.80 22.80 22.50 22.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.14 0.72 -4.14 155 6654 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 783 91 357 2906 15 65.10 26 28.05 CHANGED lplpNPTPYalohssl.pls...........spplpss......MlsPhushphslssssss......plpaphINDaG.........uhsptpt ......lplpNPTPYYlols....s.l..pss..............sts.l.pss.......MlsPhust.shslssssss..............plpaphI..NDYG.........uhh....h........................ 0 28 98 239 +3868 PF00114 Pilin pilin; Pilin (bacterial filament) Sonnhammer ELL anon Prosite Domain Proteins with only the short N-terminal methylation site are not separated from the noise.\ The Prosite pattern detects those better. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.84 0.72 -3.34 147 1425 2012-10-03 10:38:27 2003-04-07 12:59:11 14 15 722 48 226 1319 90 110.70 23 71.92 CHANGED ARuQloculuhhsuhKosl.Ehhhsp........Gphssssss.............hGhss.sss..ht.......shsssstsss.....................usIssshssssst......lsGpslslsts..ss..............................ssWsCss.......................sl.sscahP..s.uCp .............................+uphspslt.hsu.hKsslt-hhhsp................Gth..ssssss..................hGhss...sss.hps.............Vpslsls..s........................GslTsshpssss........lpup.plsLtsp...ts...........................ssss.pW..t..Csss............................sl.ss...chh...PssCp.................................... 0 58 120 168 +3869 PF05137 PilN Fimbrial assembly protein (PilN) Bateman A anon COG3166 Family \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.43 0.72 -4.15 214 1587 2012-10-02 17:03:51 2003-04-07 12:59:11 8 5 1422 0 398 1103 323 79.40 23 35.54 CHANGED pllspL.sphlP.-s.lalsslp..hpss.......................plplsGhut......s.splushhcsLc.p........................................Sshhp..sspltshppss....................tttp.......htp............Fslpspl ........................hlppL.sp.hlP..-t..saLo..slp....h.pss................................................................................slplpGhup.........o.sslsshhps.Lc..p..........................................sshap.......ssphtshppps......................tps..............hhp............Fplpht.h..................................................... 0 129 253 341 +3870 PF04350 PilO Pilus assembly protein, PilO Mifsud W anon COG3167 Family PilO proteins are involved in the assembly of pilin. However, the precise function of this family of proteins is not known. 21.80 21.80 21.80 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.82 0.71 -4.31 75 807 2012-10-02 17:03:51 2003-04-07 12:59:11 8 8 747 2 266 706 143 144.00 26 68.03 CHANGED L...cptpscEppL+ppappKtppAsNLsta+pQhtpl-ppastlL+QLPscsElsuLLsDIspsGhssGLpFpphc.ts.Els..+-FYsElPIsIpVsGsYH-lGsFsuslAsLPRIV..oLcshslsss....pss..........tLshshhA+TYRah- ........................................................thptpEtpL+pph.pp..K..t...t...ps....s....sL..p....th.......c.....p.......Qlt...p..h...c......pph.......s....t.hl....c.......pLP....s.c.s....E.hssLLpcl..spsuhs......sGL...p.......h.pp......l......p.......t...............s.........p.h.s...............p.c......a............a..h.c.l.P.....l..slp..ls.G.sYaplup...F.sss.l..u...s.LsRIl..olcslslpts....spp.................ttLphphhApTYph...................................... 0 94 174 231 +3871 PF04351 PilP Pilus assembly protein, PilP Mifsud W anon COG3168 Family The PilP family are periplasmic proteins involved in the biogenesis of type IV pili [1]. 21.20 21.20 21.20 21.30 20.90 21.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.54 0.71 -4.30 54 679 2009-09-11 22:48:09 2003-04-07 12:59:11 8 3 667 8 183 533 67 140.50 27 79.17 CHANGED DLcpahscs+scspspIcPLPchpsapshsYsuss.hRsPF........sPhclt.......t.ttpspsslpPD.....hsRtK...EhLEsasL-sLpMVGol...s.pssthaALlcsssGslaRV+sGsYlGpNaG+lspIo-splplhElVsD.Gp.GsWlERsssLtLp .....................................................Lptahtphctpst..l.tsh..........h.shsYps.tt..h+..sPF....................s....phh.................thttsssht.P-..........p.Rtc...psL..EpasL-sL+hlGsl..........s.p.s..sphhALl.p....ss...sup.lapVpsGsYlGpNtG+lspIs..c....splplhEhl..D.u.t.GsWhcR.spLtL..................... 1 48 110 154 +3872 PF04697 Pinin_SDK_N pinin_SDK_N; pinin/SDK conserved region Waterfield DI, Finn RD anon Pfam-B_4141 (release 7.5) Family SDK2/3 is localised in nuclear speckles where as pinin is known to localise at the desmosomes where it is thought to be involved in anchoring intermediate filaments to the desmosomal plaque [1,2]. The role of SDK2/3 in the nucleus is thought to be concerned with modulation of alternative pre-mRNA splicing [4]. pinin has also been implicated as a tumour suppressor. The conserved region is found at the N-terminus of the member proteins [3]. 25.50 25.50 26.40 26.30 25.20 25.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.61 0.71 -4.13 2 80 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 51 0 41 78 0 126.30 73 20.63 CHANGED MAVAVRoLQ-QLEKAKEuLKsVDENIRKLTGRDPs-lRPhQsRhLslsGPGGGRGRG..LLRRGhSDSGGGPPAK.RDl-GAl.RLuG-pRsRR-SRp-SDsE.DDDVKKPALQSSVVATSKERTRRDLIQDQs .......MAVAVRoLQEQLEKAKESLKNVDENIRKLTGRD..P..NDl.RPhQ....ARlLu..L..oGP.G..GGRG..RGu..l.LL.RRGFSDSG.G...G..PPAK.QR..D.LE.G...AlS...RLGGERRTRRESRQESDsE.DDDV..K........K.PALQSSVVATSK.ER.T.RRDLIQDQN................................................... 1 10 12 22 +3873 PF00224 PK Pyruvate kinase, barrel domain Finn RD, Griffiths-Jones SR anon Prosite Family This domain of the is actually a small beta-barrel domain nested within a larger TIM barrel. The active site is found in a cleft between the two domains. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.06 0.70 -5.83 13 7047 2012-10-10 15:06:27 2003-04-07 12:59:11 16 29 4703 229 1953 5106 2651 326.30 42 68.66 CHANGED hR+TpIlsTIGPu.................opoVE....pLpcLhcAGhNVsRhNFSHGoaEYHtssIcNVRcutcphut.......VAIALDTKGPEIRTGshps.....tlplssGcchhlosDtsh.tsssscchlalDYps...lscslpsGshIhlDDGlloLpVlphsssp.plhscspNsGslus+KGVNLPGscVDLPALoEKD+s.DL+FGVcp.s.VDhlFASFlRpAsDVpplRclLG-cG.+pIpIIuKIENppGV...sN...hDEILcsoDGlMVARGDLGlEIPA.cVhlsQKhlIuKCNhsGKPVIsATQMLESMhpNPRPTRAElSDVANAVLDGsDCVMLSGETApGpYPsEuVphMpclslpAEpulsphsla ...............................................................................++TKIVsTl.G....P.A.....................o.p.s.....-..hl.ppll.pA.G.h.NVhRhNFS.H..G..s.a......--Ht.pphppl......R.p.s.tp.p..h..u..cp...................lu..ILh..Dh.................p........G.........P.....c.....I...R..su......p..........h.....c............s.........G....t......................lpLpt.G.pp..h..h...l.......s........s..........c.............t......s..h......t........................................p..c.............p..l.u.l..s...Y..p.s.................l.s.p....Dl.......p......s.....G..s.....hl......L........l.........D.....D........G....hl...p..........Lp.....Vh...p..l....p..........s.................s...........p.......................l..................h.s....c...V..h.....s....u...G........L........u........s.p......K...Gl.......N.......l........P...Gs.....s..l..s.....l..PA..L.oEKD..pp.D.l.p..a..u..h..ct........s...lD.alAhSFlRpupDlt....p....l....R....c.....l.........l........c....p...............p..........G......p.........p..........l.........p.........l.l....uK....IEptE...ul...cN....hD-I..l.....c.....s.....o.....D......G.....lMVAR...GD....L....G....VEl..P......h....E...c.......V.shl................Q.........Kt...l...Ic+s.p..................p.h.sKsVITATQ..ML-S.......Ml...psPpPTR..AEsoDVAN.A....llD.G.TDAVMLS.uEo..AsGpYPlEuVpsMu.pI.s..hpsEp........t..................................................................... 0 656 1238 1641 +3874 PF02887 PK_C Pyruvate kinase, alpha/beta domain Finn RD, Griffiths-Jones SR, Bateman A anon Prosite Domain As well as being found in pyruvate kinase this family is found as an isolated domain in some bacterial proteins. 21.70 21.70 21.70 21.90 21.60 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.29 0.71 -4.54 173 6414 2009-01-15 18:05:59 2003-04-07 12:59:11 11 22 4576 233 1792 4498 1547 116.60 30 23.66 CHANGED s-...ulutuAspsApc....ls.spuIlshTpSGpTAphlu+hRPs.hP...IlulT.ptp................st+pls.lhhGVhs.......................h...hhsptp.s.........s-chlppu.lch...............uhcpGh.lpsGD..hlllsuG...hstsG..sTNhh+lhp ..................................................................-uluh.uuspsApc......Ls...sp..uIlshTp.S.GpTA+..hl.S.+..aRPs....sP......IlAlT.scp...........................................st+p..ls..Lh..hGVhP.......................h.....hhc.phs..s...........................s-ph.hpt...u...hph...............shc.p.Gh..h......pp...G.D....hl.l.lsuG......s...h.....u....tsG......sTNhh+lh.h............................................. 0 593 1132 1494 +3875 PF02827 PKI cAMP-dependent protein kinase inhibitor Bateman A anon Bateman A Family Members of this family are extremely potent competitive inhibitors of camp-dependent protein kinase activity. These proteins interact with the catalytic subunit of the enzyme after the cAMP-induced dissociation of its regulatory chains. 25.00 25.00 30.50 30.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.62 0.72 -3.86 9 148 2009-09-11 05:02:57 2003-04-07 12:59:11 11 1 62 106 88 142 0 68.20 41 81.75 CHANGED TDVEoshuDFhuSuRTGRRNAlPDI.s.SsAssso.u-LslKLutLsl.Ks-Gcpcspcssp-psscspsEu.p.c ........h-VEo..shu-FluouRTGRRNAlPDItu.SpAssso..s-Lsh+LusL.s.lpps-u..ptc...sppsspct.tpspspt....t.................... 0 14 19 43 +3876 PF02173 pKID pKID domain Bateman A anon Pfam-B_1547 (Release 4.2) Family CBP and P300 bind to the pKID (phosphorylated kinase-inducible-domain) domain of CREB [1]. 25.00 25.00 26.10 26.70 24.40 22.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.19 0.72 -4.72 17 369 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 106 1 112 349 0 42.00 56 14.79 CHANGED -s.uDuspssp++.R-ILoRRPSYRKILNDL....uupsssh.....ptpp .........s-ultDSpKR.REILSRRPSYR........KILN-L....SS-ssuls....+h-............ 0 20 30 61 +3877 PF03832 WSK PkinA_anch; WSK motif Griffiths-Jones SR, Bateman A anon Griffiths-Jones SR Motif This short motif is names after three conserved residues found in a WXSXK motif in protein kinase A anchoring proteins. 25.00 25.00 25.50 25.50 23.90 23.90 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.16 0.72 -4.60 12 161 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 35 0 78 190 0 31.00 42 5.82 CHANGED pcshssWsShK+LVTsRK+s+ssscpcttps ...tcshssWsSFK+hVTsRK+s+sstcpcpt-.t.... 0 4 9 23 +3878 PF00069 Pkinase pkinase; Protein kinase domain Sonnhammer ELL anon Unknown Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.65 0.70 -5.24 54 114309 2012-10-02 22:05:25 2003-04-07 12:59:11 20 4512 7677 2311 65911 134432 5368 237.60 20 40.43 CHANGED aphhptlGpGuaGsV..apuhcpsssp........hhAlKhlptp.ptppptt.t.h....................Elplhppl.p....Hss..I..lphhshhps..................psplh..llhEahsss.....cLh.phlp................tpshl...scppspphhtpllpulpYlHs.p..........sllHRDLKspNILls.psspl...................KlsDFGlAp...................ttsss.thpshsGT.hYhAP....El....lt...............sptasttsDlWSlGlllhphls.....Gp......sPFt.sp........sphthhtphhtsthphs...............................................ppspcllpphLphcPpcRho.....sppllp+sah ..............................................................................................................................................................................p.l.G...p....G...s.h....u..................V................h...h.......s......h......................t.....t...t........................................h...A.....l...K......h.....h....p.................t........t............................t...........t................................................................................E..h...p...h.....h....t...p....l......p..............................H.............p..........l...........l......p.....h....h...s....h...h...t.p......................................................................tp.p..h..h.........l...l...h...E.......h........h..t...s.t...........................................s.L..h....p....h..lp.................................................................tt..t.....h...............s....t....p.....h.......h...............t......h.......h....h........p.............l...............h............p............u.......l........p....a.......l.......H...p...p.....................................................s.l.....l......H............R..............D.........l..............K..............P.........p................N...........l.....L.....l........s.....p....p...s..p.h....................................................................................+.l...s.......D.....F....G.....h...up...........................................................ttt..t.......h....p......s.....h......h........G......T....................h...........Y..............h..........u......P..............Eh..........h................................................................................t..t....t....h.....s..........t....s.......D.....l.........a...............u..............h..............G............h.....l....h........h.....p...h..h.t.......................u.p............................................s...a....st..................................................p......................h.......h..........t.......h..........h.....t..............................................................................................................................................................................................................................................t..h....p....h..h..........t.......h.....h.....t..............p.....s...t...p.....R.....s..................t...h........................................................................................................................................................................................................................................... 0 23780 38306 53856 +3879 PF00433 Pkinase_C pkinase_C; Protein kinase C terminal domain Finn RD anon Pfam-B_135 (release 1.0) Family \N 20.90 15.00 20.90 15.70 20.80 14.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.80 0.72 -3.44 160 3537 2009-01-15 18:05:59 2003-04-07 12:59:11 19 82 369 79 2010 3207 9 48.00 30 6.79 CHANGED plputpDssNF.D.........p...cFT.pps.....................................................sthoss......s.thh..............................sshsp.......pp..FhGFoYsssp .........lpu.pDssNFD...........p....-FT..pps.................................................................................................................................................sthoPs................ttpsl.....................................ss.sp.................pp.......FtGFoYss............................ 0 477 765 1344 +3880 PF02253 PLA1 Phospholipase A1 Bateman A, Mian N anon Pfam-B_3500 (release 5.2) Domain Phospholipase A1 is a bacterial outer membrane bound acyl hydrolase with a broad substrate specificity EC:3.1.1.32. It has been proposed that Ser164 is the active site for Swiss:P00631 [1]. 19.70 19.70 19.80 22.80 18.30 19.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.91 0.70 -4.96 78 1227 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 1131 11 196 747 50 244.60 43 80.00 CHANGED .uhlspchph-ptsp...pssaslpsa+sNYlLPhoas.sssNppshpst..........s.t..hcshEsKFQlShKhsl.hpslhs.t....sssLahuYTQpSaWQlYNp.p.SuPFRETNYcPElFhhhsss.....h...h.....hphphlslG...hsHpSNG+ussh..SRSWNRlYsshsacp.....s.sa..slsh+sWaRIsE...stpp.......D.DNPDIpcYhGph-lshsYths.cpphshhlRpNhp..sss+GulclsaoaPl........tspl+hYsQYFsGYGESLIDYNpcp.p+lGlGlsLs .................................................htsh.......t..hppp..cssaslhsYcsNYll.as.o.sshNppshss.t...........sps.....t++sEsKFQLSlthsL....ac..s.l.lG........susLhhuYTQpSaWQl.Ns.cp..SSPFR..E...TNYEPplFlsassc.....aphs...hhshR.p..l..phG...hsHpSNG+.......o-Pp................SRSW.N...RlYsphhh-p.......G.sa..hlpl+sWahlsp....s........D.DNPDIs+YMGYaplpl..uYphs..-thl....o.h..phpaNh.........ssuaGusEluhoY.P.I.............spcl+hY..sQhasGYGESLIDYNapp.oRlGlGlhLs................. 1 39 107 161 +3881 PF01735 PLA2_B Lysophospholipase catalytic domain Bashton M, Bateman A anon Pfam-B_2127 (release 4.1) Family This family consists of Lysophospholipase / phospholipase B EC:3.1.1.5 and cytosolic phospholipase A2 EC:3.1.4 which also has a C2 domain Pfam:PF00168. Phospholipase B enzymes catalyse the release of fatty acids from lysophsopholipids and are capable in vitro of hydrolysing all phospholipids extractable form yeast cells [1]. Cytosolic phospholipase A2 associates with natural membranes in response to physiological increases in Ca2+ and selectively hydrolyses arachidonyl phospholipids [2], the aligned region corresponds the the carboxy-terminal Ca2+-independent catalytic domain of the protein as discussed in [2]. 19.70 19.70 19.70 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.57 0.70 -6.33 15 966 2012-10-02 11:19:24 2003-04-07 12:59:11 13 15 204 2 571 979 5 338.50 26 56.67 CHANGED IulAsSGGGaRAMLsGAGhluAhDsRs.ssss...sLGGLLQSoTYluGLSGGsWLVGoLAhNNasSlpslhsp..tp.slWslspSlhs.P..tGlsls.pshphasslscpVppK+sAGFNlSLTDlWGRALSashhs.hppGGsuhTaSSlpssshFQsuEhPaPIhlADGRh....PGsslIslNuTlFEFoPaEhGSWDsolpuFssscYLGTplsNGsPl.pspClsGaDNsGFlMGTSSoLFNphLLp.lNoos.....hsshlppllpchLp.-hSpcps.DIu.Ys.sNPFp-ss.h.tpstos.........sIssscsLaLVDGGEDG..QNIPLhPLLpspRcVDVIFAlDsSs.Dscp.WPD.GsSLVsTYERpau....spu.puhuFPYVPDspTFlN.LGLss+PTFFGCDu+NhTsh....sp.sPPLVVYlPNs.aoahSNlSTFKhsYs-o-RpuhIpN.GFcuATpsN.p.DssFhuCVuCAIlpRp.EphNhotPspCppCFpsYCWNGTls ............................................................lulhhSGGGhRAhhshhG.hlhuhp........................sGlLpssoYluGlSGu.sW.h....h....u....olh.s..s.........h...s.....p..sl................p.............................................h.t......h..p..ps...........hhp................................h.............p.......p..h..tha................h.p....pltt..+tpt...G..h....hohsDhW..................G.h.h.l.........s..p.hh.....................................................t............t...................s.........h.........phS...s...p..................p.t.h.p...pup.PhPIhsu......th.p.............................s.s..............................h......t.h....t........p.........hhE.FoPaEhG...........ph...tuF.hshchhGoph....Gp...................................t............................p.....thh..........h..............Ghhhus.....u...h....s.......thh..................................................................t..h.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.......................................................................................................................................... 0 144 261 426 +3882 PF02988 PLA2_inh Phospholipase A2 inhibitor Griffiths-Jones SR anon Pfam-B_1254 (release 6.4) Family \N 21.70 21.70 21.80 21.80 21.00 21.40 hmmbuild --amino -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.98 0.72 -3.55 5 85 2012-10-03 01:43:02 2003-04-07 12:59:11 10 2 45 0 26 94 0 82.30 38 41.96 CHANGED +SCEICHNlG+DCsu.asEECuSPEDsCGTVLhEVSSAPLSlRolHKNCFSSSlCKL-aFDlNsGpEoYLRGRIsCC-c-cCEs ...............sCElC+s.hG.psCsG.hh..cpCsuscDsCsplh.hEloo.uslShpssaKsChoSshC+LshlssNhGpcsYlRu+hpCCpp-sCcs........ 0 2 2 12 +3883 PF00321 Thionin plant_thionins; Plant thionin Finn RD anon Prosite Domain \N 20.50 20.50 22.80 23.80 19.50 16.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.19 0.72 -3.74 37 120 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 29 31 30 148 0 44.90 51 37.08 CHANGED KSCCPoTsARNsYNsCRlsGs.spshCAshoGCKllSussCPssas.+ ..KSCC.oTsuRNsYNsCRhsGs.upthCAslssCKllSG.sCPssas+.... 0 4 5 14 +3884 PF01307 Plant_vir_prot Plant viral movement protein Finn RD, Bateman A anon Pfam-B_881 (release 3.0) Family This family includes several known plant viral movement proteins (e.g. Swiss:Q85292) from a number of different ssRNA plant virus families including potexviruses, hordeiviruses and carlaviruses. 20.30 20.30 20.50 23.90 19.40 20.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.54 0.72 -4.31 78 326 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 133 0 0 321 0 104.10 33 92.51 CHANGED LoPPPDao+shlssulGlululhlahls+..s.sLPpVGDN..lHsLPHGGpY+DGTKplpY.suPspt...........ttpssphhshhhllhLs.hhIah.sph..........ptptC.spCs ..LoPPPDhocshhshAlGluluhhl.ahhsp..s.pLPps.GDN..lHpLPHGGpYpDGTKpIpY.ssPppt.............tp.ss.ph.t.shhllllLs..hhIhhhspht.........ppppC..pC.h................... 1 0 0 0 +3885 PF04819 DUF716 Plant_viral_rep; Family of unknown function (DUF716) Finn RD anon Pfam-B_5106 (release 7.6) Family This family is equally distributed in both metazoa and plants. Annotation associated with Swiss:Q9SLW7 suggest that it may be involved in response to viral attack in plants. However, no clear function has been assigned to this family. 25.00 25.00 28.30 27.20 23.20 23.20 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.89 0.71 -4.74 32 388 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 80 0 247 370 0 128.40 27 44.55 CHANGED hshuL...ppLhhuhAFh.EhhLFahHs.p........s+tulEsphH.LLlhslhlsshsshltlhhPpshh..lpLh+ushlhlQGsWFh.........................QhGFhLasP............................sstschc...tspccshhhlshpFsWalshshlhlsshYsh ................................................................h...slpplhhuhAhhhp.....hhLFhhHs..p........s..+tsl.-h.p.hH.LLlhslhlsslsshlclh...h..P..s.......s.hh............l..pl.h+uhhhllQGoWhh.........................QhGFhLasP................................hstsphc...psptpshhhlshtFsW....HlhhshlhhhshYs.h........ 0 44 107 147 +3886 PF05015 Plasmid_killer Plasmid maintenance system killer protein Bateman A anon COG3549 Family Several plasmids with proteic killer gene systems have been reported. All of them encode a stable toxin and an unstable antidote. Upon loss of the plasmid, the less stable inhibitor is inactivated more rapidly than the toxin, allowing the toxin to be activated. The activation of those systems result in cell filamentation and cessation of viable cell production. It has been verified that both the stable killer and the unstable inhibitor of the systems are short polypeptides. This family corresponds to the toxin. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.00 0.72 -3.84 8 1079 2012-10-03 00:18:00 2003-04-07 12:59:11 8 3 735 0 333 1012 133 89.40 31 94.60 CHANGED MhhsF+cKsLcpFapcss..optIsushsc+Lpc+Lphl-sAps.pDLphP...uh+hc+L+G.chcsaaSI+VNspaRLIFpacsu.s.....ssllsYlD.H ...........................................MlhsFpc...ct.hppha...p....t.tp.....tpt....h......s..p.h..tpth.t.++LphLcsAp.s...h...pDLphPs.....u.+LctL.pG......c..c..p......G.aSI+l...Ns...p...aRlsFc.apss..s................sh.lph.DYH.............................. 1 107 224 290 +3887 PF01672 Plasmid_parti Putative plasmid partition protein Bashton M, Bateman A anon Pfam-B_1163 (release 4.1) Family This family consists of conserved hypothetical proteins from Borrelia burgdorferi the lyme disease spirochaete, some of which are putative plasmid partition proteins [1]. 22.10 22.10 22.10 22.10 21.20 22.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.68 0.72 -3.55 34 451 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 32 0 18 262 0 83.30 42 46.53 CHANGED AYtYLKlApulp-GllppchlhEsGhppohphlcsccssshKKS+p..........N.IKPLRFQLKspESYDFYKpNuKFTuFlL-clFpspKD ........sYpYLKIApulp-Gllp.chlhcNGlppolphlcspp.s.pl+..KS+p..........N.IKPLRFQLKspES.YDFYKpNsKFTuFlL-clFpspK-........ 0 17 17 17 +3888 PF05016 Plasmid_stabil Plasmid stabilisation system protein Bateman A anon COG3668 and [2] Family Members of this family are involved in plasmid stabilisation. The exact molecular function of this protein is not known. This family also encompasses RelE/ParE described in [2]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -10.17 0.72 -3.59 158 6528 2012-10-03 00:18:00 2003-04-07 12:59:11 9 12 2562 20 1504 5217 580 85.40 16 88.52 CHANGED lthstpA..tpDlcclhchh............spphhpplppthpplt.ptsthscs.................h.thhphtht................sahlhYp..l......p.tthll.llplh+ppcth ...............................................h.hstpA..hc-lcclhphh...................tpph...h.pp.l...t..p.t.l.p.p.Lt....p.s.t.hspt.........t.........................h..sh.hc.h.t.ht................sahl.lYp.l..........ppsp.tl..l.l.lplhcppp......................................... 0 448 979 1251 +3889 PF00681 Plectin Plectin_repeat; Plectin repeat Bateman A anon Pfam-B_68 (release 2.1) Repeat This family includes repeats from plectin, desmoplakin, envoplakin and bullous pemphigoid antigen. 20.50 20.50 20.50 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.78 0.72 -4.43 119 3591 2009-01-15 18:05:59 2003-04-07 12:59:11 15 114 75 10 1456 3428 0 43.40 31 9.87 CHANGED hpLL-uQhuoG.GllD.Ptssp+lolcpAhccGllstchtppLhpsp .....................lLEuQhsoG.Gll.....D.Pt.....ss.....c.....+.lo.l.ppAhp+GllstchtptLhps........... 2 150 264 648 +3890 PF01523 PmbA_TldD Putative modulator of DNA gyrase Bashton M, Bateman A anon Pfam-B_845 (release 4.0) Family tldD and pmbA were found to suppress mutations in letD and inhibitor of DNA gyrase. Therefore it has been hypothesised that the TldD and PmbA proteins modulate the activity of DNA gyrase [1]. It has also been suggested that PmbA may be involved in secretion [2]. 19.70 19.70 20.90 20.80 19.40 17.40 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.71 0.70 -5.14 158 5034 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 2139 8 1513 4013 3929 291.30 25 63.32 CHANGED sts-l....hhp...p.spshsl.phpssclcphpp.spstuhulRl......hh....ss.+hG.husos...shsp.ps......lp.phlcpAh..p....hAc.......hspt......thtthsshsthsht...............hp.t..........htsh...shcc.......thchht.chpptstphttthhs.......sshspsppphhlhsSpGhphppppshhththpshsp......pssphpps.............ht.h...................tthsscpl.....ucpu....scpAhptl..s..upphpsGph.s..Vllsspssusllp.shs....pshpucplhps..pShh.....ts.......+l....GcplusptlolhDDPp...h.....suhuohsaDsEGlss.pcphll-cG.lLpsa .................................................................................................................................u-l.hhpp.spstol...sh...c....pu..plcssph.....st-..pul.Gl.Rs.........................ht.............sp..+pG...hA....t......os.........slo....tu....................lt.pslpt..Ah..s.....................hA+....hsst................ttsshsshs.hshp....................hp.........h.sh..t..shcc.................tlclht.cs-ps.u.hs.t.-tclpps.......tuuhssthphhlhuso..c......Ghhs...s.php...shhplussllup.........csschcps...h..........shtshphh.....................t....sscth..................uccA........sc.p...A..lspL...s......utts.s...s..G.ph..s....Vlhusshs.....us.Llpcslu..p..ulpG.stsh+t.........sShh......ts..............pl.....Gc.p.l.s....s....p...h....lT..lh-Dsp..............l...........pthu.S..hsh............DsEG.s.ss..ppp..slIcsGlLpsa..................................... 0 444 903 1233 +3892 PF03332 PMM Eukaryotic phosphomannomutase Mifsud W anon Pfam-B_3713 (release 6.5) Family This enzyme EC:5.4.2.8 is involved in the synthesis of the GDP-mannose and dolichol-phosphate-mannose required for a number of critical mannosyl transfer reactions. 20.00 20.00 20.20 20.10 19.80 19.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.37 0.70 -4.79 9 691 2012-10-03 04:19:28 2003-04-07 12:59:11 8 10 520 12 309 572 314 202.50 39 83.41 CHANGED hc-hLp.cLRp+lsIGlVGGSDhpKhtEQLs...tcsVlscFDYsFuENGLsuY+tG+.lupQolhpaLGE-KlpcLlNFsL+Ylu-lDlPhKRGTFIEFRNGMlNlSPIGRsCSpEERp-FtcaDKp+pIRpKhVcsL+ccFs..chGLTFSIGGQISFDVFPpGWDKTYCLpHlEp-..FcsIHFFGDKTh.GGNDaEIasDPRTIGHoVsuP-DTlphlpElht. ......................................................................................................phlt..cLc.p..+.sslulVGGSDhs.KhpEQl...........tp......s..l..h...........p..p.a...D.....ahF..sENGhhhY+.sc.hh..p..ps.h....hphLG-.-.p.hp.c.h.l.p.asLph.h...u..c..l...p...l......P..h.....+.......R..G.sFl.E.hRs...GhlNlSPlGR.ssoh-ERpt......ap.p.h.D.KcpplRpphltsLppcFs.................ths..Lpash..GGpIShDVFPpGWDK..sY.s...Lpc...l.............p...p...p..........t.......h..p.pIaFFGD+s.....G..GNDYEIasp..p.s.hGasVssPcDThphhcpLh..................................................................... 0 99 159 241 +3893 PF03901 Glyco_transf_22 PMP; Alg9-like mannosyltransferase family Finn RD, Bateman A anon DOMO:DM04662 & Pfam-B_7750 (Release 8.0) Family Members of this family are mannosyltransferase enzymes [1-2]. At least some members are localised in endoplasmic reticulum and involved in GPI anchor biosynthesis [3-4]. 20.40 20.40 20.70 20.60 20.20 20.30 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.61 0.70 -5.48 16 1285 2012-10-03 03:08:05 2003-04-07 12:59:11 12 33 352 0 898 1337 89 359.40 19 69.81 CHANGED hhahhhlshRlhsuhhsth...csDEhapshEsh.HhhlashGh.TWEap.phulRSaha......hlhthshhhlthhhtcsphhl............................................hhhsRlhhulhusls-hhlaphlspph......................................shpluphhlhh.lsshhhhhsuochhssShphhhshluLthh................................................hshhhlsshuhhtRPpusllhlPlshh.............hL..hhpphhpta........hhhslshhshhhh...slllDphaY...............G+hlhsshNhlpaNVh....sstsshYGscPaaaYhhsshsthshshhhhhlhushhh...................................hhsshhhhLhlaShhsHKE.RFlaPlh.Plhhlsuuhslsph.....................phthpth........hhhhhlhhhsslshuhhhulh+phGsh..h....chhstlppss.......t.s....slh...................lhtphaphPsphal. ......................................................................................................................h....hhh.hph.h...shhs..h.....psDE.....ap..-...sh....t........h....h.................h.........h........h....................s........h.................s..W.-a..........................s.l....Rshha...............................h...h..h...h...h...........h...h.....h....h.........h.h.....h........................h..h.....................................................................hhhsRh.h.h.uh...h.s.h.h.s....h....hhhp....h.t..ph.h....................................................s.....p..h..s...hh....h..h...hh.....hh..sh.....h.hhh...htoc...h...hss.shthh..hsh.huh.hh..h.....................................................................................................................................................t...hh.hh....h..h....shhu..h.h...h..t..s.....hh.hhh...sh.h.h...................................h.l.........hh.t..p......h.........................hh.h...s..h...h..h...h...hh........h...s.lhlDsh.a.a............................................................t...p.h..s..h......s....hs..hlhaNlh.......tstssha.G.......s..p.......Phh.aYh..hpsh........h.h.h....s..h...h..h..h..h.h..h..hhhh..........h.........................................................................................h..h..h.h..h.hs.hhh.h..l....hl..h.Sh..sH....K.E.RFlhP....hh.P..hl...h...l.h.u....uh....sh.t.h.h..............................................................................................................................hhhhh.h..h.hh..shhh...u.h.hhuh.h....p..ths.......................h........h......t..................................................................................................................................................................................................................................................... 0 296 484 741 +3894 PF00822 PMP22_Claudin PMP22; PMP-22/EMP/MP20/Claudin family Bateman A anon Pfam-B_1393 (release 2.1) Family \N 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.09 0.71 -4.59 13 2113 2012-10-03 00:20:40 2003-04-07 12:59:11 15 8 110 0 1175 2148 0 165.70 24 76.65 CHANGED MhlhLhuhhlsplusssLLhloTl.sshWhlushss..................htsGLW+sCsssosssphssts.....t.pssLpAspAhMlLSl....IhulluhllhhhQhhshcpGscahl........uGIhhllusLClllusuIYTs+hssthtpsh......scacaGauahLuWluFshshluGllY ..................................................................................................................................h....hhuh.h.lshhuhl..sh.lh..s..ssh..s...h..Wh..hs.shh.s....................................................................hhapGL...WhsC...sh........p................u.......................s...............s.........t..h.....p.....C.......p........h...s...........s............h......h........s..........h.........s............s.............l....Q.........u.........s..R..u...L.....h..l....h..ul..............................llu..h..lu..h..l....l....s....h.....h......G....h....p.....C......h....p....t...s....p..p.ttsht..............................................hhhhuGl..h.a.l.l.......u..........G.lhs..l......l.ul...s..h......a.....s.......s......p........h........h..p..-......h...h....s..sh...................t.chch.........G.....huh.alGWsushlhhlu.Gsh.................................................................... 0 109 224 589 +3895 PF01625 PMSR Peptide methionine sulfoxide reductase Bateman A anon Pfam-B_1111 (release 4.1) Family This enzyme repairs damaged proteins. Methionine sulfoxide in proteins is reduced to methionine. 22.30 22.30 22.30 22.30 21.60 22.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.86 0.71 -4.35 66 6530 2009-01-15 18:05:59 2003-04-07 12:59:11 16 26 4318 25 1573 4711 3705 153.90 41 66.75 CHANGED ppshhAGGCFWssEshFpplt........GVlpspsGYsGG..psps.PoYcpVsps......oGHsEsVclpaDPshloappLLchF..aph.aDPTphstQusDhGsQYRSuIahpsppQcphAcphhpphppp............hsptlsTplpsh..psFasA...E-YHQcYht+pspt..Ys..phhh .......................pshhAGGCFWuhEphF.p.p.l.s.........G..Vh...sstuGYsGG......ps......t.N......Po.....Y...c.pV.ss....sp............TGH......sEsVclsa....DPph...l..........S..a..cpLLp.ha..a.ch...h..D..PT.......s...h..s....p..QG.s..Dh.G.sQYRo.uIah...p......s.....p.....c......Qcp.....hAcpshp..p.hppp.....................hpps.I..sT.E..l..............t......s.....h........p..s.......F.......Y....A.......E-YHQpYh..c..KNPpt..Yst..hh............................................... 0 508 1007 1340 +3896 PF02366 PMT Dolichyl-phosphate-mannose-protein mannosyltransferase Bashton M, Bateman A anon Pfam-B_556 (release 5.2) Family This is a family of Dolichyl-phosphate-mannose-protein mannosyltransferase proteins EC:2.4.1.109. These proteins are responsible for O-linked glycosylation of proteins, they catalyse the reaction:- Dolichyl phosphate D-mannose + protein <=> dolichyl phosphate + O-D-mannosyl-protein.\ Also in this family is Swiss:Q94891 Drosophila rotated abdomen protein which is a putative mannosyltransferase [2]. This family appears to be distantly related to Pfam:PF02516 (A Bateman pers. obs.). 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.35 0.70 -5.01 22 2315 2012-10-03 03:08:05 2003-04-07 12:59:11 13 20 1595 0 896 3419 711 228.90 21 38.08 CHANGED hhhtlhuhhsRhaplshsspllasEsphschtshYhptpaahss+..PPluphlluhshh.....lsshsssa.a.s.h.phhss.sshhthRhhsuhhusLsssLsYhhshphshsthsuhluullhhh-suhlT.u+ahLl-uhLlFFhshuhhshhph...............hotphhhhhhlsGluLGhulssK.hsuhhslhhlhhhshhplWphh......cpph.hh........................h.ahhsphhhLlllPhslalhh..ahlHhhhhh ............................................................................................h...hhhshhhhhhhh....h..s.....sh.....h..........sc.................h..........t.....Y....h......c....h..s.....h.............h..........hp..........s.hs...........hh..h.......s..hh...............h.h.t...h...s..........s.....s...h.......h.....a....h...ps....h....s......p......h...h...h.....s............h.........s....................h...u......h.R.h..h.......ss..hh....usl.s.ls.l.s......hh....hs..h.c.............l.....h.............t........s......p............h........s.........u.hl..A.u.l.lhh...h......p.....s...h.h.....h.....s.h.u..p.h..s..l..L...D...s...h...l..s...h....a.l...s..s.u.h.h..s..hhhhh.................................ptt..h....t......t....s....h........h.......h....h..h.....h..l....h..Gl.s..h.Ghu..h..hs.K...h...h....s..h...h....s..l............l.h...h..h..h....h....h.....s...h....h..p.t.h..h..................p.th.h...h.h....................................................h...h..h....h..h....h..h..h..L..l.l..l..P.h.h.l.hlh...h..th..h............................................................................................................. 1 277 520 754 +3897 PF03393 Pneumo_matrix Pneumovirus matrix protein Mifsud W anon Pfam-B_3641 (release 6.6) Family \N 20.80 20.80 21.90 51.30 20.00 16.30 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.47 0.70 -5.45 9 182 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 17 2 0 128 0 183.10 74 99.13 CHANGED METYVNKLHEGSsYTAAVQYNVlEKDDDPASLTIWVPMFQSShPADlLIKELtslNILV+QISTPcGPSL+VhINSRSAVLAQMPsKFoISANVSLDERSKLAYDlTTPCEIKACSLTCLKsKsMLTTVKDLTMKThNPTH-IIALCEFENIhTSK+VlIPTYLRSISVKsKDLsoLENIsTTEFKNAITNAKIIPYAGLlLVITVTDNKGAFKYIKPQSQFIVDLGAYLEKESIYYVTTNWKHTATRFuIK ..........................................FQsshs.s.lhc.LhslTITTLYsASQsGPILKVNASAQGAAMSsLPKKFEVNATVALDEYSKL-FDKLTVCEVKoVYLTTMKPYGMVSKFVsSAKuVGKKTHDLIALCDFhDLEKshPVTIPAal+SlSl+pp-.solEshhosEhcpAlTpA+IhPYuGLlhlhThsssKGhFKhltstsQhIV-LGsYlptESl........................ 0 0 0 0 +3898 PF03246 Pneumo_ncap Pneumovirus nucleocapsid protein Batreman A anon Pfam-B_3020 (release 6.5) Family \N 18.70 18.70 22.90 22.90 18.40 17.40 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.12 0.70 -5.94 4 663 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 24 70 0 329 0 129.50 46 99.85 CHANGED MSLppl+LsDlp.KculLspSpYTIpRssGsoTulospslQpclspLCGMlLhTcascac.sApIGhQYhhotLGp-co.pILRsuG.cVpsVhT..Ksaol.hpGKphKhE...VLsIpulssuhhtslEhpARcohsphLKEtu.plPpNQR.sAPDsslIlLCIuALlhTKLAusscsGL-sslRRAspVLpsthpRYPph-l.cIAcSFYELFE+KsYYhslFIcaGhA.uSopuGS+sEuLFsslFMpAYGAGQsMLRWGVlA+SspNIMLGHsSVQAEhcQVsEVY-hspKhGsEuGhhHlRpsPKAuLLSLTsCPNFuSVVLGNAAGLGIIG.Y+Gps.NpELFsAAcuYAcpLKEsNhINaSuLsLTsEE+EAhpp.LNhsDDsspc ..................................................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +3899 PF03438 Pneumo_NS1 Pneumovirus NS1 protein Bateman A anon Pfam-B_3221 (release 6.6) Family This non-structural protein is one of two found in pneumoviruses. The protein is about 140 amino acids in length. The NS1 protein appears to be important for efficient replication but not essential [1]. The NS1 protein has been shown by yeast two-hybrid to interact with the viral P protein [2]. This protein is also known as the 1C protein. It has also been shown that NS1 can potently inhibit transcription and RNA replication [3]. 20.50 20.50 21.70 266.10 19.60 18.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.85 0.71 -4.36 2 37 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 12 0 0 30 1 136.00 85 98.24 CHANGED MGSpoLShIpVRLpNlaDNDcVALLKITCaTs+LIhLTpsLAKuVIHTIKLsGIVFlHlITSSDhCPsssIlspuNFToMPlLQNGGYIWEhMELTHC.QsNGLlDDNCEIpFSK+LSDSphspY.NQLSpLLGhs MGsNSLSMIKVRLQNLFDNDEVALLKITCYTDKLIhLTNALAKAVIHTIKLNGIVFlHVITSS-lCPsNNIVVKSNFTTMPlLQNGGYIWEhhELTHCSQsNGLIDDNCEIKFSK+LSDSsMTsYMNQlS-LLGhD 0 0 0 0 +3900 PF02478 Pneumo_phosprot Pneumovirus phosphoprotein Mian N, Bateman A anon Pfam-B_2290 (release 5.4) Family This family represents the phosphoprotein of Paramyxoviridae, a putative RNA polymerase alpha subunit that may function in template binding. 21.00 21.00 21.50 23.70 16.70 20.90 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.74 0.70 -4.87 8 859 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 20 0 0 264 2 99.10 76 96.37 CHANGED PEGKDILFMGsEAAKhAEAFQ+Sl+psuptt.pSIsG-.lpT........luEplpLPslspss............osKsuppKssptsssslh...-lEplpEchlssss-sps.sscss-ossps.....KK+VoFcsscs...G+YTKLEKEALELLSD.pEDsD-ESSlLTFEE+Ds..ousSIEARLEuIEEKLSMILGhL+TLslATAGPTAARDGIRDAMlGlREELIscIhsEA.....KsKAAEhh+EE-sQRuKIGsGS.VKLTEKA+ELNKIlEDpSoSGESEpEpE....-sDtpt-DI .........................................................................................................................................................................................................................................lREELIA-IIKEA.....KGKAAEMMEEEMNQRSKIGNGS.VKLTEKAKELNKIVEDESTSGESEEEEE.K-hQ-NNQt-DI....... 0 0 0 0 +3901 PF01048 PNP_UDP_1 Phosphorylase superfamily Finn RD, Bateman A anon Pfam-B_1190 (release 3.0) Domain Members of this family include: purine nucleoside phosphorylase (PNP) Uridine phosphorylase (UdRPase) 5'-methylthioadenosine phosphorylase (MTA phosphorylase) 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.13 0.70 -11.60 0.70 -5.39 159 15802 2012-10-01 20:25:13 2003-04-07 12:59:11 15 225 4828 837 4138 10648 3603 227.10 18 76.95 CHANGED plullsuss............tchtthtpp.........h.ph.hhp..............tthphhhGph......tsttlslstpG.........h.Ghspss..hsshthlp..th.......................tsctllthGss........Gul....p.s..lp......sGDlllssp.....h.......lp.........hshtss....hh...................................................hhsshsth..hssplhphhtp..........htphs........................hplpp....Gshhsssuhhhps................tschp...hhpphG.......ss.....sl.-MEssshstlApph.s..ls.....hh.slpsl....os.............ttt..................pphtphhppstpphtpl..ltphlp ...................................................................................................................................................................................................hsllsu.s.............cht.hhp................htph...hs...................tshhhh.h.Gph............pup.p...l........s..l...h...t..sG.................h..G..hs.s.s....s........hhs.ttllp......th..........................................................ss..c...tl..l..t..s..Gs...s......................Gul...........pts....lp....................lGD....l..l..l..sps.........h.....................hp....................hssss.......h.h......................................................................................................................hh.st..hts....s....s....p...l.h..p...thhp................................sspphs.............................................................hphch.......Gshhos-shhhss............................................tphp..............hhpp.hu.....................sh.........sl..-ME.uusl.......s.t.s...u.tp....h...s.....l...........hh..slts.l......oDhh.t.............................................................ph.t........................................................................................................................................................... 0 1276 2469 3410 +3902 PF02233 PNTB NAD(P) transhydrogenase beta subunit Bateman A, Mian N anon Pfam-B_2220 (release 5.2) Family This family corresponds to the beta subunit of NADP transhydrogenase in prokaryotes, and either the protein N- or C terminal in eukaryotes. The domain is often found in conjunction with Pfam:PF01262. Pyridine nucleotide transhydrogenase catalyses the reduction of NAD+ to NADPH. A complete loss of activity occurs upon mutation of Gly314 in E. coli [1]. 19.80 19.80 19.80 20.20 19.70 19.70 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.59 0.70 -5.71 10 2429 2012-10-03 09:55:27 2003-04-07 12:59:11 11 22 2140 27 707 1732 3888 439.00 53 87.05 CHANGED sLhphsYls...uulhFIhuLtGLSspcTARtGNhhGIlGMsIAllATl..lssstsshs..................hlluulllGusIGlhIAp+VpMTsMPQLVAhFHSFVGLAAVLVuhAsaltppstuhsssu.............s.sshphlElaLGlaIGulTFTGSlVAFGKLQGlIsS+PLhLPu..RHhLNhuLLlssVllhlsFhhssshssthssLll..................uluhlhGhpLVhuIGGADMPVVISMLNSYSGWAsAAuGFhLsNsLLIlsGALlGSSGAILSYIMCKAMNRSlhsVIhGGFGsssusuuucs.t...sGps+psoA-EsA-hLhsApSVIIlPGYGMAVAQAQaslA-lschLcccGlpVRFuIHPVAGRMPGHMNVLLAEAsVPYDlVhEM-EIN-DFscTDVVLVIGANDTVNPAAp-DPsSPIAGMPVL-VWKA+sVlVhKRSMuoGYAGV-NPLFa+-NTpMLFGDAKKss-pllcpl ................................................slsshsYllAulL...FI...huLtG...LS...p.cTuRpGNhaGhh..GMul.AllAT...l....h..s..s..s...s...t..s..hs..............................h.l.ll.uhll...GGsl....G........hhh..A....++Vc..MTpMPpLVAhhHShVGLAAVLVuh.su...alp....t..s..h...s..h.h.............................................h.ss..hphsElaLGl..h.IGAlTFTGSllAFGKLpG.......p.....l.........s...u.....pP..l.......hL.Ps....+..H.hl...Nlsh....ll..s..sh.h.h..h..l.hF.s.........t..s....s...u........h...........h..h.s..L.hl.hs...........................hlAhshGhtllhsIGGADM...........P......VVlSMLNSYSGWAAAAtGFhLs..NslLIlsGA............LVGSSGAILSYIMC.....K.......AMNRSFlu.....VIh..G.G......F.G.....s...........s..s........s.....s......s................u...s...s...t...........................ttGp......h..+.p...ho.......A.......-..........-.s...............A.thL.....p.sApoVIIsPGYGMAVAQ..AQasVtElscpL+t.c.Glp.V..+FuIHPVAGRhPGH.MNV....LLAEAcVPYD...hVhEMDEIN...cD.FusTDVVLVIGAND.sVNPA.Ap-.DP.sSPI.uGMPV....L-VaKAp....sVlVhKRSM.s.oGY....................AGVpNPLFa.c.-N.Tp.....ML.FGDAKpsl-sllcu................................ 0 215 421 584 +3903 PF03833 PolC_DP2 DNA polymerase II large subunit DP2 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 24.00 24.00 24.00 40.70 23.00 23.80 hmmbuild -o /dev/null HMM SEED 900 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.62 0.70 -13.57 0.70 -6.88 6 125 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 111 1 80 138 362 805.40 45 69.80 CHANGED ccYFEpLpcEl-+hY-IAcpARc+GhDPossVEIPlApDMA-RVEuLlG.pGlAcRIRELs.t-huRE.sALclucEIl-G+FGDhc...+Ecth-pAVRTALAILTEGlVAAPlEGIAcV+Ic+N..sDsocYLAlYYAGPIRSAGGTAQALSVLVuDYVR+tlGlDRYKPoE-EIERYlEEV-LYcptss.LQYpPos-ElRLsscNhPlpIsGEuT-csEVSGHRDLsRVETNplRGGhhLVLsEGll.KAPKllKYscplslEGW-WLcclhcusccu-...........t...pEEc.tlst...............ssDKalcDlIAGRPVFuHPS+sGGFRLRYGRuRNoGhAThGlpPATMaLls-FlAlGTQlKsERPGKAusVVPVDTIEGPsVKL+NGDVl+IsshpcAhcVRs-VtEIL.LG-hLlsYGDFLENNHsLhPAuascEWWIQpl.sus..........................tsDsctl+.........ts.s-pAl+huc-aclPLHPcYTYaWHDlosE-lchLtshltp..p.chpspc.....pchVL.lch.ppsKclLEhLGlsH+V+-splll-paaPhhtuLGhslppp.....phph.hpstpssl-hlNtluslcl+c+A.ohIGuRMGRPEKAc-RKM+PsVHsLFPIGpAGGupRsItcAsccsp.......shcVEluht+CPsCGcpohpphCPsCGoh.c............................sclcshs+pcIcLs-lhccAhcslGlpc..hDclKGVKGMhStpKhPEPLEKGILRAKp-VaVFKDGTsRFDsTDlPlTHF+PpEIGVSVEKLRELGYs+DhhGsELcc--QlVEL+PQDVIlscsuA-YLl+VAsFlDDLLp+FYsL-tFYNlKscEDLlGHLVIGLAPHTSAGVVGRIIGFocAssGYAHPYFHAAKRRN .........................cYFcpLppclcchaclAccARppGhDPpscVEIPlApDhA-RVEsll........G................cG..VAcRIRE.Lt..pchu..+..E.....sALcluc-h.....s.....-GchGchs......+cpth-tAlRTAlAlLTE.GlVAAPlEGIucVcltcN..sDGo-YlslYYAGPIRSAGGTAQALSVLVuDYlRptlGlsca+P..p..-...-ElERYsEElpLYcptss.LQYpPps-El+hhscNhPlplsGEsT-c.EVSGaRDLpRV-........TNplRGGhhLVlsEGlhhKAsKlh+....as....c....p....lth-.uWs.WLp-llssptpsc.............................................t..c.....p......p.t..t..................................htsssKalc-lIAGRPVFuaPSc.GGFRLRYGRuRNoGaAosGlpPAoMhllD-FlAsGTQlKsERPGKAssVsPVDoIEGPhV+LpsGsVl+l-s......hccAhc...............l..+......spV-cIL.lG-hLlsaG-FlENNHsLhPuuYs.EWWhp-htpss...................................................hs.p.hp.............ss.ccAlchupchshPLHPcYTYhWcDloh--lttLtchlhp.......hpt.t..............................tp....l...l....p.......t..phKc....hLEhLhl.Hp.....h.....p..s.....pplhl..p..t..hs..hhhsLGhshp................................tht..t..........h...t...ss...psslchlNclushcl+p+AsohIGsRMGRPEKucpRcMpP.sspsLFPIGp.uGGspRsltcAsc.psp....................hpl-luhpcC..spCGp...ohhthCstCGspp...h.C..Ct........t..........................C.psthc.hpshpp......tplslpphhppAhcpls.pp...hc.lKGV+GhhSppKhsEPLEKGlLRAKpslhsFKDGTlRaDhTDlPlTHF+PpE.lslolE+L+ELGYpcDh.GpPLpp--QllEL+sQDllls................c.....s....sucYhl+supFlDDLL-+aYulp.FYsscpp-DLlGcLVhGhAPHTSAGllGRlIGFopAtVGYAHPYFHAAKRRN........................................ 0 17 48 68 +3904 PF01620 Pollen_allerg_2 Ribonuclease (pollen allergen) Bateman A anon Pfam-B_1050 (release 4.1) Family This family contains grass pollen proteins of group V. Swiss:Q40963 has been shown to possess ribonuclease activity [1]. 25.00 25.00 77.70 26.60 23.70 23.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.09 0.71 -3.89 12 100 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 13 15 2 117 0 121.20 46 87.34 CHANGED TVALFLAVALVA.....GPAASYAADuGYsP..............sssTPAssus....AuGKAT.T-EQKLl................EDlNAuFKAAsAAAAssPPADKaKT..FpssF.osusKu.lAstuo.........psstLssKLDsAYplAYcuApGATPEAKYDAFVuuLTEALRVIAGsLEVHAVKPAs ............................................................................................................................ttE.plI................-clsAuFKsAssAAsusPssDKFps..FEAuF.stu.Kt...........ssuu..uht............shphlPpL-AA.hK.AYsAssuAsPEsKYssF.AuLocAlpshuts.cV..s................ 0 0 0 2 +3905 PF01190 Pollen_Ole_e_I Pollen proteins Ole e I like Finn RD, Bateman A anon Prosite Family \N 20.80 20.80 20.90 22.70 20.50 20.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.88 0.72 -4.15 91 693 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 68 0 380 702 0 93.10 23 40.42 CHANGED VpGhVaCpsCpt..th.....sshsltGApVplpCcs....stt......thhtpuhTDppGhFp...ltl.t.s............tt.ptCpshLhso...Pps..sCshst.............uhpsupl .......................VpGhVhCssCpt...shp.....tstsltGApVplp...Cpst......pst........hthptp.usTDpsGhFpltlss.s.................tttptCpspLh..so.....sps.......sCst.ts...........h................................. 1 40 199 290 +3906 PF00659 POLO_box POLO box duplicated region Bateman A, Mistry J, Sammut SJ anon Prosite Family \N 21.00 21.00 21.00 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.22 0.72 -4.00 81 913 2009-09-12 20:48:28 2003-04-07 12:59:11 13 8 221 70 585 888 4 67.70 25 17.34 CHANGED aps+hulsapLSsusltV..FsDpo+lll.ssptp...tlpYls....p................ptppppashsph..........spplpc+lphlc .........hps+hulsapLossoltV..F.s......DpT+lll..ssstp........tlpYls.......c.........................................ptpp.psathsp..............hsppLtp+lphh.............................................. 1 224 294 444 +3907 PF02563 Poly_export Polysaccharide biosynthesis/export protein Mian N, Bateman A anon COGs & Pfam-B_1505 (Release 7.5) Family This is a family of periplasmic proteins involved in polysaccharide biosynthesis and/or export. 26.80 26.80 26.80 27.00 26.00 26.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.59 0.72 -4.22 183 3850 2009-01-15 18:05:59 2003-04-07 12:59:11 11 25 1898 24 1011 3117 920 91.50 27 23.26 CHANGED sshsssssss..sYplGsGDhlplpVaspsc..............................................................sht......VsssGpIshPhlGslplsGhTssplppplpppLpp..hl...psP.pVsVpltp ............................sh......ts.pYplusGDhL....pl..s...V...a..s.psp................................L................................................s.s.......shh............VsscGsI.hh...P....h.l.G.p.lplsGhThsp...lpspIps+Lsp...hl......ps.P...pVsVpl..t................................... 0 301 639 825 +3908 PF01743 PolyA_pol Poly A polymerase head domain Bateman A anon Pfam-B_814 (release 4.2) Domain This family includes nucleic acid independent RNA polymerases, such as Poly(A) polymerase, which adds the poly (A) tail to mRNA EC:2.7.7.19. This family also includes the tRNA nucleotidyltransferase that adds the CCA to the 3' of the tRNA EC:2.7.7.25. This family is part of the nucleotidyltransferase superfamily. 23.00 23.00 23.80 23.10 22.80 22.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.78 0.71 -3.92 21 6516 2012-10-02 22:47:23 2003-04-07 12:59:11 15 54 4623 24 1693 4940 3053 124.10 35 27.78 CHANGED hYlVGGsVRDhLLG+ps...c.......DhDlsos.........AssppltphF.tpphhh.....Gtcathhplhhss..ph....lElAThRscp..tshts.pp.phs............ol--DhhRRDFTINAlhhs.....sssp....llDhhs.GhpDLcsphlR ..............................................................................................................hYlVGGuVRDhL...L...Gp.s........+...........................................DhDl.s.s.s..................................................up..P..c.p...h.p...p..h...h......p......psh.......h..................................G......h.......ca...t...sh..tV.h..h...ps...pt......................hEl.sT.hR...s...-p.t.......t..s....s..s..p.....p...s..ps..phsp.................................................................ol-.-Dh......tR......RDFTINAlAhs.......................t.s...s...p.............llD.a...s...G..h..pDLps+llR................................................................................................. 0 581 1098 1445 +3909 PF01518 PolyG_pol Sigma NS protein Bateman A anon Pfam-B_803 (release 4.0) Family This viral protein has a poly(C)-dependent poly(G) polymerase activity [2]. 25.00 25.00 27.90 27.10 16.50 16.10 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.14 0.70 -5.97 4 145 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 59 0 0 124 0 334.10 58 99.39 CHANGED MssolRluVSRssuGsuuQTlhpsahLLRssloscshpsshthQ.+FPshtpss+pLsPLtshstDRhl++ssltplhoR-hhhssDh.tphsacssshPho.sspuhphtcLlsshauEt...h-Hl....hPs.usoYsPuulA+hhohsMAGhsP.cG-shhhcssl.aLAA-LlsaphsLPYhls.lDGsosI.shPotsVEchLss.lutLsslDhSaGlEsRuDpRhTpDsupsSSRSlNEL.scEptt+h.shKlhLsh.shQLKlELDsLAcp+sE.pt.thlsuFGp+LFpQhShFusIDp-LhpLslhIKDpshths.tplhphWo.IRou.ucslssuuhslplcsGsWhltcG-DstLoVpPsRl ...........................................tshRhslS+.ttsssuQplh.NaYLLRCNISADG.RNAT+AVQuHFPaLSRAVRCLSPLAAHCADRT...LR..RDNVKQlLTRDLPFsSDL.INY.AHHVNSSSLT..TSpGVEAARLVAQVYGEQh.shDHl....YPoGStTYCPGAlANAISRIMAGFVPpEu-sFs.sGsIDaLAADLlsapFVLPYMls.VDGcsp.I.VlPot.TVEEMLss.suLLN.sIDASFGIES+SDQRMTRDAAEMSSRSLNELc-HEpRGRM.PWKIMLAhhAsQLKlELD...ALADpRsEsQuNAHVTSFGuRLFNQMSuFVsIDRELMcLALlIK-pGFAMNPuQlsuKWo.IRpS.usohshSuhplplctGpWhhhp..................................................................................... 0 0 0 0 +3910 PF00738 Polyhedrin Polyhedrin Bateman A anon Pfam-B_423 (release 2.1) Family These proteins are found in occlusion bodies in various viruses. The polyhedrin protein protects the virus. 25.00 25.00 71.90 71.70 20.60 20.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.34 0.70 -5.29 17 392 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 161 4 0 238 0 188.10 78 98.09 CHANGED lGRTYVYDNKaYKNLGuVIKNAKRK+HhlEHEhEE+pLDsLD+YhVAEDPFLGPGKNQKLTLFKEIRNVKPDTMKLVVNWSGKEFLRETWTRFMEDSFPIVNDQElMDVFLVlNhRPTRPNRCY+FLAQHALRCDPDYVPHEVIRIVEPSYVGsNNEYRISLAK+GGGCPlMNLHSEYTNSFEpFlsRVIWENFYKPIVYVGTDSAEEEEILLEVSLVFKIKEFAPDAPLaoGPA .........................................................LGPGKNQKLTLFKEIRsVKPDTMKLVVNWSGKEFLRETWTRFMEDSFPIVNDQ.E.lMDVFLVlNMRPT+PNRCYKFLAQHALRCDPDYVPHEVIRIVEPSYVGsNNEYRISLAKKGGGCPlMNLHSEYTN.SFEpFl.s+VIWENFYKPIVYlGTDSAEEEEILlEVSLlFKl.................... 0 0 0 0 +3911 PF03364 Polyketide_cyc Polyketide cyclase / dehydrase and lipid transport Mifsud W, Mistry J, Wood V anon Pfam-B_1457 (release 6.6) Family This family contains polyketide cylcases/dehydrases which are enzymes involved in polyketide synthesis. The family also includes proteins which are involved in the binding/transport of lipids. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.71 0.71 -4.15 95 3012 2012-10-02 19:24:03 2003-04-07 12:59:11 15 30 2234 27 1131 3986 1993 127.70 23 69.89 CHANGED lshssp.plaslls.Dh...cpascahPhsp...........s..scllppsst........................................phphshtshpppa...sscsttphspp............................................ht.hpspWphhsht.............................................tstsplphphph..pht..hhshhhthhhpphhpphhpshp ....................................................................lshsscphaplV.s..Dl...ps...Y.PpF..l..Phsp...........................u..s...c..l.lppsss..........thhA.......................................................................pl.p.l.u.h...s..u..l...p.p..s..F.......so.c.s..p..h..p.spp.................................................................................l.h..p.h..h...s...G.....s..F...c...p...h....ps..t..W..pFps..hs.............................................................tsssclphp..l.ca........-..a..s..s..t...l..h...p.h...h..huhhhpchspphlpuF................................................................................................................................. 0 346 687 944 +3912 PF01736 Polyoma_agno Polyomavirus agnoprotein Bashton M, Bateman A anon Pfam-B_1917 (release 4.1) Family This family consist of the DNA binding protein or agnoprotein from various polyomaviruses. This protein is highly basic and can bind single stranded and double stranded DNA [2]. Mutations in the agnoprotein produce smaller viral plaques, hence its function is not essential for growth in tissue culture cells but something has slowed in the normal replication cycle [1]. There is also evidence suggesting that the agnogene and agnoprotein act as regulators of structural protein synthesis [1]. 20.40 20.40 26.00 26.00 18.30 18.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.87 0.72 -4.33 3 130 2009-09-11 01:00:53 2003-04-07 12:59:11 11 1 5 0 0 117 0 58.10 81 91.17 CHANGED MVLRQLSRQASVKVuKTWTGTKKRAQRIFIFlLELLL-FCcGEDSVDGK.RK+souLTEps-S .MVLRQLSRKASVKVSKTWSGTKKRAQRIlIFlLEFLL-FCpGEDSVDGK.Rp+po.uLTppp.S........ 1 0 0 0 +3913 PF00718 Polyoma_coat Polyomavirus coat protein Bateman A anon Pfam-B_748 (release 2.1) Domain \N 20.10 20.10 20.20 27.90 18.60 19.90 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.15 0.70 -5.51 12 3425 2009-01-15 18:05:59 2003-04-07 12:59:11 15 1 57 68 0 825 0 141.30 62 81.55 CHANGED LLlKGGlEVL-V+TGPDShTpIEAaLNPRMGps.........tphaGFSpsIslusshssDsPppspLPsYSsA+ItLPhLNEDhTCsslLMWEAVSlKTEVVGloSLhNlHu.tup+s...sshGuuhPlpGhsaHhFAVGGEPL-LQGlhpNapssY....PssllsPp.....shsspuQV..LsPphKA+LDKDGtYPlEsWsPDPS+NENTRYFGoaTGGhpTPPVLpFTNTsTTVLLDENGVGPLCKGDGLYLSuADIsGhasppss.pQpaRGLPRYFsloLRKRhVKNPYPloSLLsSLFsshhPphpG ......................................................................................................................................................................................................................................................tsPPVLphTNTsTTVLLDE.GVGPLCKuDsLYlSAsDlCGhF.........TspsG.sQpWR....GLsRYFKlpLRKRpVKNPYPIShLLssLhN+hs.+VsG................... 0 0 0 0 +3914 PF00761 Polyoma_coat2 Polyomavirus coat protein Bateman A anon Pfam-B_871 (release 2.1) Family \N 25.00 25.00 43.10 25.30 18.70 22.50 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.24 0.70 -4.93 8 383 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 41 1 0 366 0 225.50 48 91.94 CHANGED GAsLolLhphlApVuElAuuTGhSVttIhuGEAhAsIEl...plAsLs.slE.Gl....sssuEAlAAlGLTspsaAllsut....P....sAl...uGh.......AAlh.QTVoG....uSAl...AssGhpaausWcHcVssVsL.tp.sMALplahP--.hDILFPGspoFsp.laYLDPh.+.WGsSLFpsVGpulW.c.lhRs......shspl..s..o..+-lptRTsp.lp.......-oLARaLEssRWslosuPlshYssl....psYYupLsslsPs.hRQlApR.h..hshG+o...slDpsDuhpthspch..-l..ppPp.sp..............SGpaIEKhtAPGGApQRsAPDWMLPLLLGLYGDlTPshcuacDp.ppcc+c ............................................................................................................................................................................................MALplapP--haDILFPGVssF..VNsl...pYLDPt.H.WGPSLFpoluQuhWp.llp-...........slstl..s..S..pElpcRTpchhh.......-sLARhLEpopWslsN...u...P.hshYshl..............p-YYucLs..sl.pPs.lRQl..Ap.Rctp.lshG+oa..slDpsDslpthspph..cl...cs.p..lp..............SGEaIE+shAPGGANQRsAPpWMLPLlLGLYGsVTPuLcuhE..DGsppKcc.R................................ 0 0 0 0 +3915 PF00348 polyprenyl_synt Polyprenyl synthetase Finn RD anon Prosite Domain \N 20.70 20.70 20.70 21.10 20.60 20.60 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.58 0.70 -5.54 16 11487 2009-09-13 10:32:47 2003-04-07 12:59:11 12 34 5158 295 3330 8899 6348 242.40 28 77.16 CHANGED llttht.hhhtuG.KRlRPhlllhsuchlu.........hphsshhslAsslEhlHshSLlHDDl..MDsuclRRGpPTsHtpaGpssAlLsGDulhspAFphlsphp....h.sphphhtl.cLspsssspG.lt.QhhDlpst........chohcphhphhptKTutL.FtsusphuulhuGss..tctpcsLpcauhplGhAFQlhDDlLDhhusspplGK.sGsDlppsKsThsslhuLct...uppctppllcpshpp..pphshpshsth.tlcthtthhh .....................................................................h....h..hh.h.h.s.G.G...K.RlRP.hls.l.h...s.s.p.h.hs....................................hp.t..p.t....h.....h.t.hA...s.u..lEh....lHs...hoL....l.............HD.....Dl........hD....ps.c..hR..R.Gp.s.....................T.........s...............p.................t.................t................a..................G...................p..........s................s.............A................l......LsG.D.......hL.....h...s....hA....ap.h....l.s....p.hs.............................p.........p.......h...h...t.....l.....h.....p.....h.......s..............h....s.......s...h..............s...........p.G....ht.....Qhh...c....h..t...s.t..................................................ph.s.h..c...p...h....h.p.l....h...pt....K...T..utL..ht........tu.sph........G.u.........l.....h......u............s......s.................s........t..........p........................h...............p.......t.....l....p....pa...u...pt.l.G..h.AF.QlhD...Dl.L.D.h..........h...u.......s......s..p............p.....h..G...K.s.s.....G.s...Dl.p........ps..K...Th....P..hl..h...u...hcp............up.t....t...t....t...t.....h...h..p...p.....t...........t..........................................................ht....................................................................... 0 1100 2096 2820 +3916 PF01943 Polysacc_synt Polysaccharide biosynthesis protein Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family are integral membrane proteins [1]. Many members of the family are implicated in production of polysaccharide. The family includes RfbX part of the O antigen biosynthesis operon [2]. The family includes SpoVB from Bacillus subtilis Swiss:Q00758, which is involved in spore cortex biosynthesis [3]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.75 0.70 -5.13 31 9954 2012-10-02 21:24:20 2003-04-07 12:59:11 12 25 3559 0 2091 9084 1735 278.40 14 58.36 CHANGED lh+NhhhhhhsplhshllshlhhshluRhLuspsaGlhuhshshsshhshlsshGlsssls+plutsps.........phtsthhhsshhshhlhsllhhhhhhlht............hhshschshlhhlhhhhhh..hhhsss.hhshlhpuh.-phphhslpphlpplshhhhhhlhlhhhss.....lhhhshhhhhusllshllshhhhph.h..h...........hphhhht.hpth+phlp.uh.lhlsplssslhsthspl.hluhhhGsts........lGhYssuhplhhhh.tlhsshss.shhPhhuclh .............................................................................................pssh.h.hhh.up.ll.s.h..h.l.u.h.l.h.h.h....h...h..s...p....h....l.......G.....s......p.....s...h.....G.....l...h...sh...s..hs...l...h...s.h...h....h...h...l....s...s.......h...G.....l....s....s....u....l....s....+.h.....l..u..p..hps.............................ph..h.p.....t...h....h......h....s.....h.....h.....h.....h.....h.....h.....h....h...s....l.....l....h.....h...h..h..h..h..lhs............................hht......t....s.....p.....h.....t..........h..........h.....h....h....h....h....s..h..shh.........h.h....s...h....h.......s.....h.........h.....p....s....h...h....p....u........h........p.......p......h.....p....h........h...s....l....p.......p.......l....h...t....p....l....h......t...l....h.....h....h...h....l....h...h..h.h..h.h..t...........................hh..t...h.....s..h..h..h..h..h..u...s...h..l.s...h...l....h......s...h..h..h...h...h...h.hh....t...........................................ph..t.h.............h...t....h....h.....p...p........h..h......p....h.....u....h.........h....h....l......s....s.l....s.h.....l....h....p....hl.-.p.....h....h.........l...s...t...h...h...sh.st..................................hG..ha....s..h...u.h...p.......l.h.t.h.h.h.h.h.h.sshhs..shhPhlst..h............................................................................................................. 0 663 1369 1745 +3917 PF02719 Polysacc_synt_2 Polysaccharide biosynthesis protein Mian N, Bateman A anon Pfam-B_1536 (release 5.5) Family This is a family of diverse bacterial polysaccharide biosynthesis proteins including the CapD protein (Swiss:P39853) [1], WalL protein (Swiss:O86159) mannosyl-transferase (Swiss:O05349) [2] and several putative epimerases (e.g. WbiI Swiss:O69130). 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.51 0.70 -5.52 77 3666 2012-10-10 17:06:42 2003-04-07 12:59:11 10 11 2439 14 813 25131 16928 278.60 42 55.79 CHANGED lLVTGuGGSIGSEls+Qllph....sPpclllaspsEhshYplcp-...h...h.t.pl.......lGDVpDpcplppshpthpl-hVaHAAAhKHVPlsEh.NPhEul+sNlhGTtNlhcAAlpssVcphVhlSTDKAVpPsNlMGAoKRhuEhlh.Ahsppp......................TpFssVRFGNVLGSpGSVlPLF+cQIppG.GPlTlTcPchTRaFMTIsEAspLVlpAu..shucGG-.....lFVL-MGpsV+IhDLAcphl.cL.Ght..........DIpIchsGlRPGEKLYEELlhps-shpsppasclhtsps .......................................................................................................................lLlTGuu.GS.I...Gu..El..s......+..p.l..h.ph...........s..P...c.......c.....l....l.......l.......h.......s..........+.........s.......E......h........s.......h....a....p......lp..p-........................l..........h..t...p...............l.....p....h.........................h....I..........u......D.....V...................p.............D.......p......p......p......l......p....p......s.....h.....c......................h.............p............s.....-.........h...........V...a....H....A....A.......A......h........K......H.....V......P........h......h.............E.......h.......N.....P.....h....E....A....l.....+....s........N..........l....h.....G........T....p..N........l.......h.......c...............A..............A.......h.........p...............s..............s................V..............c.................+............h.................V..............h...............l...............S.................T..................D.................K.................A....................V................p.................P................s................N...............l................M............G................A................o................K..R.....h....u.....E...h....l...h...u...h.spps.......tp..............................Tp.a..s..s.V..R......F......G....N.......V.......L.......G.......S.........p.........G..........S.......V........I......P....l....F....c...c...Q...I.......p....p......G.......u...P...l..T....l.......T......c....P.......c.....h.......T...R...a...F..M...T...l...s...E....A..s....p....L...V.......l...p...A.u........s...h...u.........c........u....G..-................l....F....V...h..c..M....G....p..s...l..+...I...h...D...L...Ac...ph.l...p.L..G............................-lp.I.c.h.s.G.l.R..........P.........G.EKLaEELlspp..E..t..hps.pphtchahh........................................................................................................................................................................................... 0 272 542 695 +3918 PF02530 Porin_2 Porin subfamily Bashton M, Bateman A anon Pfam-B_1122 (release 5.4) Family This family consists of porins from the alpha subdivision of Proteobacteria the members of this family are related to Pfam:PF00267. The porins form large aqueous channels in the cell membrane allowing the selective entry of hydrophilic compounds this so called 'molecular sieve' is found in the cell walls of gram negative bacteria. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.36 0.70 -5.41 16 586 2012-10-03 17:14:37 2003-04-07 12:59:11 9 3 282 0 170 720 19 326.40 28 88.24 CHANGED DAlVtAEPEPsEYV+VCDsYGsGaFYIPGTETCl+luGYlRhphshGs.....t...sh.ss.ssstpssastpoRhplphsTto-TEhGsL+sahchhhshssss...su..t........................................hslt.AaIpLGGh+sGpshShaDs.hlGhtuDslss.......spthNplpYpa-uGsuFsAulSl--tpusss................................hs...hsspssslVuuVcsstuhushpsssu.................aDs-hcpsAs+stlslp...susu.olhlsGsYusus..Y.........................................hsssp...................Wus.....huuhpapsssKsslssuhph.......................sshstatlGsslcYshlcslohps-lsYschsp.......................phpsp-slsGh .....................................................................................................................................................Dh.lh.c..sh-YV+lCshaGsGaaYIPGT-TCl+ltGYlR..h..-s..t.h.ss................................s.h....s.s..p..t.pss..hst..tu..Rh...t.lphsot.opTEhG..s..Lps...ahph...ca.s...hs..sss.t..su............................................tts..htlphAalph...u....G...hphGhs.S.Fps..htu..h...s.s.slss...sh.ss.ssh...ps.s.p..lsYTashG.s.G...aoA.s...lulEpss..ssss.....................................................s.sh......h.ss.h.h.Pcllutlch..stuaG..uhthssA...........................................................a-s.....s.......h..p.t.....aA......s..p.s...tssls......lss.t.slhlpu..sYu..su..s...................................................................................................................................................................................................ssstp......................htt...W..ss.t..Wss............................au.uh.p...att.st+h...t.h.shthsh.....................................ssh..ttht.ss..ssltap.V.shshss-htYhp.hs....................................st.hsh................................................................................................................................................................... 1 38 89 115 +3919 PF01379 Porphobil_deam Porphobilinogen deaminase, dipyromethane cofactor binding domain Bateman A, Griffiths-Jones SR anon SCOP Domain \N 22.10 22.10 22.70 22.70 21.70 22.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.42 0.70 -5.42 19 4004 2009-09-11 15:01:14 2003-04-07 12:59:11 15 21 3728 9 1153 3057 2572 207.30 46 65.99 CHANGED pl+IGTRpSpLAlhQuphVhctLcphhPs....hph-lhhlpTpGD+ILDpsLuKlG...GKGLFsKELEpALLps.....clDlAVHShKDlPs.lPcGLhlusIscR-DP+DAll....pshpsLppLPpGullGTSSLRRpuQLttphPcLchcs.lRGNVsTRLpKL-ss..-aDAIILAsAGLpR...LGhpsclsp.....h.s-phLPAlGQGALuIEsRpsDpchhslLp ................................h.lpIuTRpS..LALhQuphVtstLp..tt..a..P.s.........................lpsEl.lsh.sT.pG.D...t....I....L....D..s..s.L.........u...+.......l.G............GK.G.LFsKELEpALLcs.......csDlA...VHShK.......D......l......P..s..t..h..P..pG.LsL.u.sls..cR.ED.....PR.DAhV.....o....................p.....s..............h......t..........s....Ls...s........L...P..........p....G..u...........l..........VGTSSLRR....p....s.......QLtt..........h.....R........P..........D..........L.......pl.p....s..l...RGNlsTRLpKL............c........sG...............-aDAIILAsA..GLpR................L.......G.h..p..s.....c....lpph.................lss-.....hL.......P..........AsGQ.GA....luIEsRts.D.pchhtlL.s....................................................................................................... 0 364 739 986 +3920 PF03900 Porphobil_deamC Porphobilinogen deaminase, C-terminal domain Bateman A, Griffiths-Jones SR anon SCOP Domain \N 21.30 21.30 21.40 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.62 0.72 -3.87 21 3787 2009-09-11 23:02:33 2003-04-07 12:59:11 10 15 3616 9 1082 2829 1539 74.20 34 23.23 CHANGED thpshAERuhl+pLpGGCpVPIusauph.....tsp................lpLpuhlsssDGh........hhchptps.....t.pcutclGhclAcclhs ...........h.hpVpAERuhhppL.-GGCplPIuuaAplp......ssp................................................................lp.Lc..u.lVu.s.sDGsp...............hlcsphpG.............................s.pcucplGhplAccLl.................................................. 0 334 692 920 +3921 PF00280 potato_inhibit Potato inhibitor I family Finn RD anon Prosite Domain \N 24.50 24.50 25.20 24.50 24.30 24.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.02 0.72 -3.83 60 372 2012-10-01 19:32:51 2003-04-07 12:59:11 13 2 66 54 144 396 2 61.90 42 75.93 CHANGED KsSWPELVGhsuctA+tlIp+-pPslpsl.ll...sGos.VTtDa..cssRVRlaV...st..tsh.VspsPtlG ...........................KooWP.ELVGhsscpAc.ph.Ihc..-pPclpll..Vl....PsGoh....VThDa..cssRVRlaV.......st........sh..VspsPplG.............. 0 20 82 119 +3922 PF00767 Poty_coat Potyvirus coat protein Finn RD, Bateman A anon Pfam-B_868 (release 2.1) Family \N 19.70 19.70 20.20 19.70 18.60 18.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.56 0.70 -5.09 33 6595 2009-01-15 18:05:59 2003-04-07 12:59:11 13 28 378 0 0 5499 0 218.30 55 33.96 CHANGED +D+DVssGT.sGTFsVPRlKsls.sK.hplP+l+G+slLNL-HLLpYpPsQhDlSNTRATppQFpsWYcuVKp-Y-lsDp.pMsllhNGLMVWCIENGTSPNIs..GsW.....sMMDG-EQVEYPLKPll-pAKPThRQIMsHF.SDsAEAYIEhRNsccsYMPRYGLQRNLsDhSLARYAFDFYElTS+TPsRAREAHhQMKAAAlRssssRhFGLDGNVuTp-EsTERHTAsDVN+NMHoLLGs+h ..............................................+D+DVssGo.sG.phsVPRl..ct..h..opK.MphP..p.h.c.G.pslLN..L-HLlpYpPpQ...h.DluNTRATppQFcsWa-uV+t-Y-l.s.-s...pMsl.lhNG...L..M.V..W.CIENGT.S.P....slN..GsW...............sM..M..D....G...-...-..Q......V..E.Y.P.lKPll-pA..+...PT.....hRQIMtHF....S.DsA.E.A.YI.E.h.R.Nt.pc.sYMPRYGLpRNLpDhSLARYAF..DF...YEhs.S+.TP.sRAR.EAHhQ.MKAA.AL.+..ssps+.LFGLDGsVuopp.EsTERHTspDVspshHsLLGhp.s...................................... 0 0 0 0 +3923 PF00157 Pou pou; Pou domain - N-terminal to homeobox domain Sonnhammer ELL anon Prosite Domain \N 21.00 21.00 21.00 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.42 0.72 -4.33 20 1357 2012-10-04 14:01:12 2003-04-07 12:59:11 12 9 164 22 578 1331 1 69.20 59 18.40 CHANGED c-ssshcELEpFA+p...FKp+RIsLGaTQuDVGhALusLaGss...FSQTTICRFEuLQLSaKNMCKL+PlLc+WLp-AE .................c...ss.cELEpFA+p......FKQRR...IK.LG..aTQuDVGhA..Lu.........sLa.G.ss...FSQTTI...C.R..................FEuL.pLSFKNMCKLKPlLpKWLcEA-...................... 0 111 158 327 +3924 PF05061 Pox_A11 Poxvirus A11 Protein Moxon SJ anon Pfam-B_5994 (release 7.7) Family Family of conserved Chordopoxvirinae A11 family proteins. Conserved region spans entire protein in the majority of family members. 25.00 25.00 32.10 30.30 24.70 19.30 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.92 0.70 -5.36 9 62 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 43 0 0 49 0 290.10 58 99.77 CHANGED MTslPVTDIsN...-YslTsFSEDsYPsNKNYEIToGQLSILRTVN-KL.....LA+TppspshpsDlspt........hhPs-DsPhoIlp+V.uPpssa.-sss.......llhuE..pQRppRlNIhhS.stEslIE+cslpp...tloSl.op..............TPSLGsVFD+-KRl+LLE-ElhpL+p+ps...psssNL-NFT+lLFGKsshcSsElNKRlsIVNYASlNpSsLThEDLEsCS-EEID+hYcslKQYN-ohKK+IlVTphIoIlI.VlEQlLVKLGF-ElKGLSsElTSEIIDlpIG-DCEtIAsKlGIuNSPlLNIslFllKhhIpRI+Ih .........................................................................................MTTVPVTDItN...Dh.lT.pFSEDsYPSNKNYEIThtQhSILppVNshl.....hAhssSPp..phpSpls-s..................lhPD-DSPsTIIE+V.pPpTshlD.sssssp.....tEllluE..QQRppRhNIpVS.stEAlhEpcsh......IT.ShPop..............TPSLGVVa..DKDKR..IphLE-EVhpLRNppu.pocoSsNLDNFT+lLFGKsP.h+SoElNKRIAIVNYAsLNsSsLSlEDL-lCSE-EID+IYKsIKQYpESRK+KIIVTNlIII.lI.sIIEQsLlK.L.GF-ElKGLSo-lTSEIIDVEIG-DC-AlAsKLGIGNSPVLNIlLFllKlFV+RIKIl.......... 0 0 0 0 +3925 PF04651 Pox_A12 Poxvirus A12 protein Mifsud W anon Pfam-B_5523 (release 7.5) Family \N 25.00 25.00 45.40 38.30 23.20 22.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.44 0.71 -4.17 11 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 41 \N 0 60 0 182.20 63 97.69 CHANGED MA-.KKLo.R..SSYDDYIETlNKlTPQL+TlLuHIuuEQusptsNhs....sssssss.pssuG...sssohp+op+soposspp......+.ss............SGAPpR+pss..hus.sc..psQ..hhQAVTNuGKIVYGTlK.DGKLEVpGpVGElNpDLLGI..ESVNAGRKs.o+up.............stpph..............puu....h+KtcshsssspshDh..GMs .......MADKKNLAVR...SSYDDYIETVNKITPQLKNLLAQIGGDsAVKGGN........NNhsSQs-V.TAG...AssTKSKSoKChTs+sKo........pSoSo...............SsS+sS.p.oSGAP+RRTTs...ooS.hNA..hDGQIVQAVTNuGKIVYGTVR.DGQLEVRGMVGEINHDLLGI..ESVN.AGKKKsSKKh................PTsKK...........hshSSG....MRRpEpINssDsClDh..GM.h...................... 0 0 0 0 +3926 PF04848 Pox_A22 Poxvirus A22 protein Mifsud W anon Pfam-B_4558 (release 7.6) Family \N 21.00 21.00 21.00 21.10 20.80 20.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.82 0.71 -4.32 13 94 2012-10-03 01:22:09 2003-04-07 12:59:11 8 1 75 0 1 111 206 152.10 38 81.27 CHANGED plICAhDlGsKNPARTllEl.ps........ss..I+llcIsKLDWS.ssWE+pVA+Dlsp....ashshVLLE+Qs+RSPasKFIYFIKGhLYs..opT+VIslsPs.....hsGsSY+sRK++SlclFLshhshFG..lss..lsch+KLDDVADSFNLAl+Yl...LsK .............................................hIsAhDlGs+N.AhsllEs.cs.............ss.....l+..llDl......u..K.lc....ho.....pDa.c..+.pl.....s.+Dlsp.............hphssVLlERQP..cR.u..s..hl.+......h..l...a.FI.+..u..ah....h....p......s..s......s..KVIs.V..SPs..........................hsG...s.oY+-.R....K...K..pS.....V....Esh....hs....ahcs.as.......lpc....sl...s...cp.+.KhDDlADoashAhpal....................................................................................................................... 0 1 1 1 +3927 PF04584 Pox_A28 Poxvirus A28 family Waterfield DI, Finn RD anon Pfam-B_4756 (release 7.5) Family Family of conserved Poxvirus A28 family proteins. Conserved region spans entire protein in the majority of family members. 21.40 21.40 24.50 24.50 20.70 18.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.30 15 68 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 47 0 0 50 1 139.50 60 99.61 CHANGED MNslolFhIllATsAlClllFQhYslYENYDNIhEFNssHusLEYSKolss.stlDRpVaDPNDplaDsKpKWRCVpas.ssYVSlShFGF.pussusp....l+pFsTl-sClsaTFScuscusIaNPChsss..pSp-ClFLKSlL ..MNuLSlFFIVVATAAVCLlhlQuYSIYENYsNIKEFNAsHAAhEYSKSlGG.PuLDRRVpDsNDsIpDVKQKWRCVsYs.NuaVSASlFGF.pA-sGsN....IRKFsThppCIDaTFScshshcIaNPClsPN..sssECpFLKSVL.... 0 0 0 0 +3928 PF04665 Pox_A32 Poxvirus A32 protein Mifsud W anon Pfam-B_5586 (release 7.5) Family The A32 protein is thought to be involved in viral DNA packaging. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.68 0.70 -5.07 10 981 2012-10-05 12:31:08 2003-04-07 12:59:11 7 4 115 0 835 1034 176 165.70 48 66.54 CHANGED Ep+FsRcSLLcsPFRMAlVGGSGSGKTsYLLSLFpTLVc+Y.KHIFLFTP....VhNsuYDuYlWPDHIpKVoopEE...LEYsLsssKpKIE+asp.upspK....t.pFLlILDDlGDhQhRS+sLlslhNaGRHlNlSlIlLCQTY+HVPlNGRsSITHaCCCNVS-SDlENhlRSMSI+GoKKpLl+slulhRuup.ppR+Vl.IIEDSVFspGEtRICYDoAD-pVltpclDhsILlsQFSHMKppLss ................................................................................................................pc.pphhpc++h+pcppp....................................................................................................................................................................IEhluK.KhQ.ua.c.........YP+R...sL.LILDD....F...A..S...H.h.K..s....R...-.Q....-...M.C.R.IL.K.....K...LR.HF.N..IS.VVICVQ.TA...KS..L.SKD.V.KR.IL.T..D.Il.L.F.P..s.h.scD..h.E.LMpESMusK.hc+cE.lWEtYKllps.......P+.oshcIH.IhsNpV........................................hlKst............................................................................... 1 782 794 835 +3929 PF04948 Pox_A51 Poxvirus A51 protein Finn RD anon Pfam-B_6937 (release 7.6) Family \N 19.90 19.90 20.60 20.60 18.50 18.90 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.11 0.70 -5.77 8 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 35 0 0 70 0 276.80 52 97.71 CHANGED M-.hIlsss.Sh.+DhDll-alKssFssc..hspscNVCoKaDNllhoopoKsKllluDhPpIDpslSphYppsht..pslsRlSRFCKlltLcscKD.....alYlPtocsll..slLsIsppssssp....pCElphhsssspssI..pLp.scFsIlpss.pshhVKGsNllllIllF-EtsaPtIPLIRoISsNsVlISRHsRLHcElPscNWFKFYVEL+HsYoSuLhlllDGolLYAsuDYKTHChISKp.psp+c-lsDDCtCCYsssplplhsKp-llEpssCcsIRGGlpIplpcVGcFuASalGKYPNh-YIKIsluosYcMIsKQDplSGKptpusYlYGIA+R ..............................................................................................................................................................................................MD.lIVhslpuL.Kch.EplsalKNsFhhc...sptppsCcKlcNVhIsupoKssslIADlPhlDsulS-lhpohht..hsluRISRFsplIcl-cccc.......YsYhp..p-sls..sIloIu+ccD.........sCEllIsS.DpusssI..cLsph+hAILshs.sSFFsK...G.Nus.LlILLFD.hshsusPLLRS.losNsVlISRHpRLHcElPSpN.WFKFYlsl+psYCSlLYhVVDGSlhaAhADp+THshISKs.hhcsssINDECcCCYh-.PQI+ILDR-EMLsu..S..S.....Cchs..R+s..IhhsLs-lGcFGSSh..lGKY.EP-hIKI.ALSsutslIpspDhIsGR+taShYVYGIApR.................... 0 0 0 0 +3930 PF04924 Pox_A6 Poxvirus A6 protein Finn RD anon Pfam-B_5792 (release 7.6) Family \N 19.70 19.70 20.00 19.80 18.30 17.70 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.43 0.70 -5.66 12 67 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 44 0 2 54 1 362.30 62 98.12 CHANGED MDKLRslYp-FYpIS+cYLE+pTsppsssssa-sDVshhhslVPlLEpKlss.IssshoD-sllhhM+asNY+hFSFWFLKSsAVVKSVYN+Lcp-pE+p+FhplFKDlLlssQTLlSlNsMYpNlKQDTs-IVsDSKKIlEIVspl+sussEssAYKlLQsNaoFIVKTINKlLSDENYLLKlIAlFDocLloDK-KLpEY+ElFolSsESllaGI+ClS-L-lsolslp.NN..KYltFFKKlLuslILFQNssLsup+FlplVuKLYslIapphpTNspluhLlo-VLDSlKsKlSl--lKpcGVpNlQoLI+aIusN+s.YKsIlucEYhKREsslIcILQsIsscssIcasGpslDlctLlchhK-+ahp ..............................................MDKLRVLYDEFhsISK-pLERETGLosSDlDhDhDlsIFMTLVPVLEKKVCs.ITPoIcDDcIlsMMKYCsYQuFSFWFLKSGAVVKSVYNKL-.DsEKEKFlssF+DMLLNVQTLI.SLNuMYopLRQDTEDIVSDSKKIMEIVSHlRuSTsENAAYplLQpNNSFIlpTLNKILSDENYLLKIIAVFDSKLIS-KEpLNEYKpLaTISoESllYGIRCVSsLDISSVpLs...NN..KYVhFlKKhLPpIILFQNNDlNuQQFANVlSKlYoLIYpQLpoNV-VGsLLTDsl-SsKTKISVEcIKQsGINNlQSLIKFISDNKcpYKoIISEEYlu+EDcIIoILQsIlNEacIcY-spllNhR-LIshh+ERYu.......................... 1 1 2 2 +3931 PF04745 Pox_A8 VITF-3 subunit protein Waterfield DI, Finn RD anon Pfam-B_6036 (release 7.5) Family Family of Chordopoxvirus proteins composing one of the two subunits that make up VITF-3, a virally encoded complex necessary for intermediate stage transcription [1]. 25.00 25.00 237.70 237.40 18.40 17.30 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.71 0.70 -5.48 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 39 0 287.30 66 98.14 CHANGED MF-PVPDLNLEAolELGDVsI-sT+u+s+E..SsuYVSKsRRLFVH+SKD-ERKLALRFFLsRLYFLoYKElNYLFRClDsVKDVsITKKNNVIVAPYlILLTlSSKGYKLTESMIEhFFPELYNEsSKKFRFNSQIsIIQEKLGYssusYHVYEFEhYYSTVALALRsc+.....-s-lFNsRcESplVSSLSEITYRFYLIpLKSshVQWSuSTGoVINQlVNTVLlTVY-lLpKslpps+pFpCTLApEoclPlpLLlDRh-hFsKIIs-L++TNSFKISK+DKcsLLKYCp .MF-PVPDLNLEAolELG-VNIDpTs.shl+E..souFlSRSRRLFsHRSKD-ERKLALRFFLQRLYFLsaRElpYLFRClDAVKDVoITKKNNIIVAPYIsLLTlASKGhKLTETMIEsFFPELYNEpSKKFKFNSQVsIIQEKLGYpsuNYHlYDFEsYYSTVALAIRccc.....sSs.IFNlRQEShLVSSLSEITYRFYLIpLKSDLVQWSuSTGAVINQMVNTVLITVYEhLphslcs.cspFsCoLAlESc.LPlcLL+DR.s-LFsKhIs-LK+TsSFKISKRDKDTLLKYFp....... 0 0 0 0 +3932 PF04835 Pox_A9 A9 protein conserved region Waterfield DI, Finn RD anon Pfam-B_4431 (release 7.6) Family Family of Chordopoxvirus A9 proteins. 21.50 21.50 21.90 24.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.84 0.72 -4.06 8 68 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 1 53 0 54.00 71 55.25 CHANGED AIDlhRHhFMYFCEs+lRPNSFWFVllRollSMlMaLlLGlsLLhISsNs-csc ..............AIDLCRHFFMYFCEQKLRPNSFWFVVVRAIASMIMYLVLGIALLYISEQDDKKN..... 0 1 1 1 +3933 PF04508 Pox_A_type_inc Viral A-type inclusion protein repeat Waterfield DI, Finn RD anon Finn RD Repeat The repeat is found in the A-type inclusion protein of the Poxvirus family [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.54 0.74 -7.01 0.74 -3.68 27 453 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 22 0 1 343 0 22.90 49 16.96 CHANGED cElc+h+p+Ip-L-cpLscspcs .pElscL+sRIpDLERpLs-C+cs.. 0 1 1 1 +3934 PF03286 Pox_Ag35 Pox virus Ag35 surface protein Mifsud W anon Pfam-B_4295 (release 6.5) Family \N 24.10 24.10 26.20 26.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.48 0.71 -4.79 10 84 2009-09-11 15:09:14 2003-04-07 12:59:11 9 3 42 0 2 75 0 198.20 52 93.58 CHANGED M.SWSINLu..uu.GDNFKTL-EIRAHVRSTTEssD..cssDDIFPs.....DI.................cIP.op+pP+pK+.....sTs.RK.......ssssKucKscKEKss.ttcc.psDs-K.............................TEENEs.sp..p-sscscpusSsssps......s-Ds.....................hDsSDLKlAT-sIlKDLKtLNsRVoAlSTVLEDVQAuSIoRQFTSLsKul-pL+slApsGKppVs..RKKs+s.sKK ..............M.AWSIo.p..us.oSSFpphsEIRAHLRsoA...ENpD......K.N-DIFPE.....DV...................................lIP..STcPKTKR.....sTsPRK......PAsTK+S..TKKtc-.+pplEE..E.....s...llEEhcp....sTEENSsssss..oPssGD.IsESlsAs-h-...........-.DssD-.........................poDhSDLKVATDNIVKDLK+IhoRISAVSTVLEDVQAAGISRQFTShTKuITsLucL.VopGKS.KVV..RKKVKo.CKK............ 0 1 1 1 +3936 PF03336 Pox_C4_C10 Poxvirus C4/C10 protein Mifsud W anon Pfam-B_3519 (release 6.5) Family \N 21.00 21.00 21.20 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.25 0.70 -5.66 8 127 2012-10-10 13:59:34 2003-04-07 12:59:11 8 2 33 0 1 132 15 260.00 52 96.81 CHANGED hlplHlFo-shFcshKp-lh..lp..php......p.t..h.............pcSKplhhcsoLsc-lhspl+sl....lYcpLKslVc......sVpVcNclTllpY-+GDahsppps..ssshs+NtlshaLLlaLpps-pGGcs+lYlcssss.hlslooDlLFDKolsH-oppVcsGcKplAlhDVhlc..h..ccsllsTIcY.hsssIsLYD+EsDp.sLCYC-lp.Ipshs..s-hhphGlIsDRSGKClLVHpstclsphcc...lacSFp-lChpphh-.....tlhplppsss+sIAWSslc.sscsDpalPpsc-hYKhLpclss+p+s.ppt+l-hh...........shss---E.hahhCpVo+YYFsLPc .........................................................................pTIKlFNp.EFDsIRN-lhpLhKhVp.......................................sss.l.pl.s...pD.s.D.I..-sIRcI....L.Y+phKN...Vc....................sl-lsssIoFhKYs...........N.ssl.....T....s.....s.h.t..YhLVIYLppsh...plKhh......aP.....Ts...p..........I.....p.o.s..........c..D.....IMFuKoLsF+.ppVhpshKhl.hhsISls.....Yp.ShspIpY..ssh...........IDIpssppsp.pLCYChIT.hDsHaL.lDlETlsVlVs+SGKCLLVNpahhhhhhpc...I.sSFsDlCMDpIF-h.spscELFoLpNDDsRNIAWDsDK.csssshWhPhT---YKFLS+Lh.hAK...ssThFDYY...........VLs.GD.T-PsTVF.FKVT+aYhNh..h................................................. 0 1 1 1 +3937 PF03287 Pox_C7_F8A Poxvirus C7/F8A protein Mifsud W anon Pfam-B_4089 (release 6.5) Family \N 24.10 24.10 25.50 31.90 23.90 24.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.80 0.71 -4.49 11 76 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 37 0 0 56 0 148.20 44 89.05 CHANGED MGIpH-LDIalVsEslulKslpLhKGDSYGCsIclKlsspKplcFlllL+.PDWppIs-lKPIpMclNGhsl-spLl........................pcohhphIYsuslslpscos.lphaSDsccp.apctYPslpINs.KKhYclhcpGhThhaI-SPIsspDKhpahc ......MGIpHEhDI.lIsssIAL+NLpLHKGDsYGC+LcIISsshKplcF+hIl+.PDWSEIcclKsLpsasNshslcls+l........................ccohYhlIYpAslpLYscpTplLlaS..Ds-s-.aK+YYPhIsLNhhsKcYcVK-cNYossaIEaPllshcchp.hc........... 0 0 0 0 +3938 PF04701 Pox_D2 Pox virus D2 protein Mifsud W anon Pfam-B_5832 (release 7.5) Family \N 25.00 25.00 66.70 66.50 21.70 21.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -11.02 0.71 -4.22 10 55 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 41 0 0 41 0 135.20 51 90.63 CHANGED ch-lK+..LssI..ltNssllFssDlsplhsE+aIlLE+s.sGpshclHlYcssARFDN+oIa+lVKalY+sRsclL+llFPspshhESlcsLhPshTlsl...........pcsstss.hp...scs.ssKhlLLELFNSF+hGKsss...shPYYhLP ........phDIKK..ITDL..L.NusILFPDDV.pclLpEKYIVLERcsNGTPsssHIYco.hARFDNKSIYRIAKFLFhNRPDVIKLLF.....LEslEPLLPsKoINI..........ShssoEhPpL-....sPluTKlsLLELFNAFRoGtu-....PlPYYYLP.... 0 0 0 0 +3939 PF00874 PRD BglG_antitermin; PRD domain Declerck N, Bateman A anon Pfam-B_772 (release 3.0) Domain The PRD domain (for PTS Regulation Domain), is the phosphorylatable regulatory domain found in bacterial transcriptional antiterminator such as BglG, SacY and LicT, as well as in activators such as MtlR and LevR. The PRD is phosphorylated on one or two conserved histidine residues. PRD-containing proteins are involved in the regulation of catabolic operons in Gram+ and Gram- bacteria and are often characterised by a short N-terminal effector domain that binds to either RNA (CAT-RBD for antiterminators Pfam:PF03123) or DNA (for activators), and a duplicated PRD module which is phosphorylated by the sugar phosphotransferase system (PTS) in response to the availability of carbon source. The phosphorylations modify the conformation and stability of the dimeric proteins and thereby the RNA- or DNA-binding activity of the effector domain. The structure of the LicT PRD domains has been solved in both the active (pdb:1h99, [2]) and inactive state (pdb:1tlv [4]), revealing massive structural rearrangements upon activation. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.89 0.72 -3.86 211 15913 2012-10-02 16:05:11 2003-04-07 12:59:11 15 106 2053 11 1448 9078 56 90.60 18 28.55 CHANGED pcllphh.....cpphphph......ssthhhsLhhHlp..hslpR.lpps..........hphpsshh.pplcp.Y.sptaphspchhphlppph.shpls-sElsals.lHltss ..............................................t.thlphh.ppp.h...phph..........spthh.....sLhhHlp..hh...lp.R..lpps...............hph.p.s.s..h.h...p...p..l...p...p.......a.....spta.p...hs.p.p.hh.p....h....lpp....p..h....s.h....p..l.s.c.s.Elsals..lHhht.................. 0 437 816 1106 +3940 PF04580 Pox_D3 Chordopoxvirinae D3 protein Waterfield DI, Finn RD anon Pfam-B_4684 (release 7.5) Family Chordopoxvirinae D3 protein conserved region. Region occupies entire length of D3 protein. 19.90 19.90 20.30 20.10 19.60 18.60 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.61 0.70 -5.18 10 66 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 37 0 0 60 0 225.60 54 96.18 CHANGED MDIhll+Dst.YPhhsscsNcs.FlLLGNHspFIsshLpclpp+..hhFascYtloPDchG.oLplchlsSSat..I+s+hVsV-EFIshGpshcWCspphpts....phscsDpllIaDIsahcsshWKRIlhlpCPshlssphEp............FlTNPa.lhppsp.......clF+NllLRStlNshIFs.psSsLcpLLsH.Ilophsl-+h..pslls...ac-sssl+LlppCY-Rs+F+AFVYAWFsuQl.sNsphENEKVc+sacpVpchI ...................MDIFI.VKDNK.YPKVDNDDNEV.FILLGNHNDFI+sKLTKLKE+...VFFS-YIVTPDsYG.SLCVELNGSSFQ..HGGRYIEVEEFIDuGRQVRWCSsSNHIS....cDhHTDKFlIYDIYTFD..uFKNKRLV.FVQVPsSLGD..DS............aLTNPh.L....S.......PYY+NuVARQMVNsMIFN.pDSFLKYLLEHLIRSHYRVSKH..ITIV+...YKDT-ELNLTRICYNRDKFKAFVFAWF......NGVsENEKVLDTYKKVSsLI.......... 1 0 0 0 +3941 PF03288 Pox_D5 Poxvirus D5 protein-like Mifsud W anon Pfam-B_4009 (release 6.5) Domain This family includes D5 from Poxviruses which is necessary for viral DNA replication, and is a nucleic acid independent nucleoside triphosphatase. Members of this family are also found outside of poxviruses. This domain is a DNA-binding winged HTH domain. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.96 0.72 -3.72 41 1059 2012-10-04 14:01:12 2003-04-07 12:59:11 11 32 639 6 337 979 69 91.70 20 16.60 CHANGED pssDslh-Fhtphhs..............tu.hlPs.phlattYhpahccpGath.......LstppFppclsp.hhpt...s.....ahp++p+..................sthspthhphhhhp-h.s ...........................................................ps-shhtFht.h..........................................h..h.hsp..p.LYcsYhtahppp..Gats.......hohppFtpslpp.hhp.......t..........apc+p............................................................................................ 0 258 294 317 +3942 PF04805 Pox_E10 E10-like protein conserved region waterfield DI, Finn RD anon Pfam-B_6357 (release 7.5) Family Family of poxvirus proteins. 25.60 25.60 25.70 94.90 25.00 25.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.47 0.72 -4.45 11 59 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 43 21 69.90 74 73.15 CHANGED slEtCK++LYsIssTLPCssCRtHApcAIpcNNlMSSsDlNYIYaFFIuLFNNLssDspa+.IDlpKVpPL .NIEuCKRKLYTIVSTLPCPACRRHAThAIE-NNlMSSsDLNYIYYFFI+LFNNLASDPKYs.IDlsKV+PL. 0 0 0 0 +3943 PF04497 Pox_E2-like Pox_E2; Poxviridae protein Finn RD anon Pfam-B_3979 (release 7.5) Family This family of proteins is restricted to Poxviridae. It contains a number of differently named uncharacterised proteins. 30.00 30.00 32.80 32.70 29.70 28.10 hmmbuild -o /dev/null HMM SEED 728 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.09 0.70 -6.50 27 184 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 43 0 0 167 0 610.30 29 95.71 CHANGED M...h..spplRcuhtphtsp....................phsaccLshpc..shpLlthGlH.spLPcphYscsl.chs.splhhFcPcpVphhDLlpllpppps.s..pphtstltaa+ppllppsshp.llp+hlphhhl.s-DDlch....llsc...t.hss.hLhplNsp.ltph..hhhScptlcplhpp.shphhshLYp+.s.hshphLhphhtcasIsPsNpulhp.p..shstsl-llpphsppp.......hlchlspplhsscphhphlh......IhtsplsshhshspcaLhsphsc............GlhsshhFs...........hspht..ptlocc..........phphIscaIshYchp..................uplhsph.c-hlpcphshtlh....pst.hlphhphshpsptshhtshp.sh--ll...........paLDsltlpsh.chphs.l..-.................hhhs.s.haN..spllc.hlpphuhsptKhptLhht..............hs.s.tshthhhphht..psshhhsst.hhsolh.h..............ht..t.hhhh...p...t..p.sh.ststpsphp.h..h.p..thph.hht........p.........................s.th.ss-scllsplaDlsphApaGll.hshhhhss.WhPlhshl........................phsphssssph.psslhpls.schscatsh........shst.lsshasthsshhsslhhYllsuhhh.....s.pp..thppFlppllsshhcGhtlt.........hspslpsss.p.hhclcphls.ts.ssh.hhhphhL+ssltlhccl ......................M..h..sp.hR+Ah.c..s+....................phshthlppcc..tlsLlchGhH.shLPKsLY.psl.p.s..plhhF.PchlsshDll.pslpp.p.p.s..-hhtshl.aHKsslhhsu.hs.llhhhh.Y.ll.ocsDlca..........lhpp....ss.hthhL.hINt..l.hh..hpho.sEIlsllpc.shhhh..lYpp.s.lD.chlhph.DcYsIsPlpsulhc.p..s.EhhIcllhthshsp.......hlshlsps.hhpsshhphIhs.....hhpt+lshhhshlp-aLpshh.D.......................sIhushhFp...................lshhs......lTts.............................E..alph.hshYshh...................................hp.hs...p.Dhlh.c.shpIs......poss.l.hhphP..hp..p.shlhp.p.ph.DIl...........palDphchso..chuh-sl..-.................hhho.ohhhN..ssllphhhhp.h.h.hhhhhLhh...............hshpp..sshhLpth...+Ghhhh.sh.slhoh.................h.h.hp..ph.....p.hpsc.lsh.sshppsh.h.hhphpul...............................................hsp-ctlh.plaDlsRhA.aGhh..s.hh.ps.WsPlsphl........................chp-hhs.t+..phsllslhsp..hchps.........ph..slss..AtlshhhsTll.Yllhs.hh.....slpp...sccFV.pllphhhcuhtl..........L.p.lpsss.h..pclc-hsspGs.ss..aLh+.plphh..lh-cl......................................................................................................... 0 0 0 0 +3944 PF04656 Pox_E6 Pox virus E6 protein Waterfield DI, Finn RD anon Pfam-B_4392 (release 7.5) Family Family of pox virus E6 proteins. 25.00 25.00 25.70 195.00 20.50 20.20 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.73 0.70 -6.31 11 74 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 45 0 0 63 0 552.40 72 100.00 CHANGED MDFIRRKYLIYTIENcIDFLRsElhsKlSNFoLNHVLAlKYLlhsFs+sVlTKDVLuNsNFaVFLHhV+CscVY-hVL+pSFDlPsLYlKuLlKNYshFschIpsYKphspcLl.Dc+FlElschusphp-lIGVNYDhtLNPLFHpGEPI+sMEIIYuKLFKKTcF++V+KlpVlRLLIWAYLoKpDTGhcFsDNDsQDlYTLaQKoGs.llpS-MTEpFKEYIFs..ss+TSYWlWLpEsIhNDsclYhctsApoMY-KlLSYIYSElKQGRVNKNMLKLVYlFEsDp.I+ullLpIIYGVPGDILSIIDo+DEsWKpYFluFYK-NFIDG+TFsSspoF.-DLF+VVA+IDPEYFDsc+.IhSlFcpcP-plchFDchDINsTYlSplIYpTpDlsLpslEchptCQIYN-DTcYaIKEYNTYLYLsE-DPhVl.cGhLsKLSsl.sps++..hoLFScsILKYYlDG+LAslGLVLssYcsDlll+lloHLKClEDVTsFlcausC+NsSllPSllRTILuNFNluIIlLFp+FLRENlaaVEpaLD+opHLTpNDKKYILplIppGRS .MDFIRRKYLIYTVENNIDFLKDDsLSKVNNFTLNHVLALKYLVSNFPQHVITKDVLANTNFFVFlHMVRCCKVYEAVLRHAFDAPTLYVKALTKNYLSFSNAIQSYKETVHKLTQDEKFLEVAcYh-ELGELIGVNYDLVLNPLFHGGEPIKDMEIIFlKLFKKTDFKsVKKLSVIRLLIWAYLSKKDTGIEFADNDRQDIYTLFQpT.GR.IVHSNLTETFRDYIFP..GDKTSYWVWLNESIANDADIVLNRsAITMYDKI...LSYIYSEIKQGRVNKNMLKLVYIFEPEKDIRELLLEIIYDIPGDILSIIDuKNDDWKKYFISFYKuNFINGNTFISDRTFN-DLFRVVVpIDPEYFDNER.IhSLFSTSA.s-IKRFDELDINNSYISNIIYEVNDITL-TMD-MKKCQIFNEDTsYYlKEYNTYLFLpEsDPMVI-NGILKKLSSIKoKSRR..LNLFSKNILKYYLDGQLARLGLVLDDYKGDLLVKhIsHLKsVEDVSAFVRFSTcKNPSILPSLI+TILASYNISIIVLFQ+FLRDNLYHVEcFLDKSlHLTKsDKKYILQLIRHGRS. 0 0 0 0 +3945 PF03394 Pox_E8 Poxvirus E8 protein Mifsud W anon Pfam-B_3759 (release 6.6) Family \N 20.50 20.50 20.70 89.40 20.20 16.80 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.68 0.70 -5.00 8 66 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 41 0 0 52 0 236.70 77 85.75 CHANGED h+NTYLYHNYAYGWIPETAlWSSRaAsLDlTDYYPITLGLLKKFEFMhSLa+GP...s.sYpsKINTEFlupGSFhGRalsaa++FoILPTcEFISFLLLTSIPIYNILFaFKsTpFDhsKHoLFuuhYosss+HlELA+Yh++uGDYKPLFu+Lc-..cslYo.....u...shPlshtsll+spsPsu.p.ssS.DYETLANLSAILYaTcYDPVLhFLhFYlPGlSVTTKITPuVEYLMcKLsLsKcDVsLl MKNTYLFDNYAYGWIPETAIWSSRYANLDASDYYPISLGLLKKFEFLMSLYKGP...IPVYEEKVNTEFIANGSFSGRYVSYLRKFSALPTNEFISFLLLTSIPIYNILFWFKNTQFDITKHTLFRYVYTDNsKHLALARYh+QTGDYKPLFSRLKE..NYIFT......G...PVPIGI+DIsHPNLSRA.R.SPS.DYETLANISTILYFTKYDPVLMFLLFYVPGYSITTKITPAVEYLMDKLsLTKsDVpLL. 0 0 0 0 +3946 PF04943 Pox_F11 Poxvirus F11 protein Finn RD anon Pfam-B_6911 (release 7.6) Family The protein F11 is an early virus protein. 24.10 24.10 24.10 24.20 20.30 24.00 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.06 0.70 -5.60 7 107 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 36 0 1 85 0 197.40 38 88.29 CHANGED o.tphss......hslp.p............Phhhlspussp.l....Lhsslalp..slcslhspsstllscssphpp.s..p..apht.l.l.sshhhchlhDtpsaFcls-shll+LcHGshahptshasssshuFsAlICl+NpGhSulhVsposhlppshppGssllhRSspulphLPQIuGcuhYLIlplsPTcchhcphh.slhssspspsu.........p.phthsuppc+cshchlsslIphpl.LEchhhchs....hhpEhtshYsslhh........................................pppppp.lpshhppscshhppuhssh.hh..........tpsspssh+cchLhtthp.+Dlp.lhsshsp...............laschtclhpclsshls..................tps-hlppaIhthlspspssh.psplhpsL.....hslsslsahl ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +3947 PF03337 Pox_F12L Poxvirus F12L protein Mifsud W anon Pfam-B_3082 (release 6.5) Family \N 25.00 25.00 87.60 87.40 24.50 17.70 hmmbuild -o /dev/null HMM SEED 651 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.27 0.70 -6.61 11 87 2012-10-02 01:06:00 2003-04-07 12:59:11 8 1 43 0 0 83 0 615.20 60 99.36 CHANGED shhsplpssss..t....plsssLpch.chsllLA+spcGKGlllhusNlptspsh...lslopLcllulsthl-shssPstPhspLhIsuh-o-.saYSPcTSpo.PLlDIL++Ruppps..-LtpAl.tc.hs.pussSloEINpWhspsGLt+YRFlsacDt+thph....sphTllD-MsIsaIGpHhIWVKDh.sYsRPElDI.LsaDlcsluptspWucl.pshstphhplhuhhlpuhlos..sGPplYMIoTY.PG+sFhshsSsK.Llp-FLcWlp-.hhtshp....................TlsLlGahSSlFDhPLL+ssasps.pGWshl....usssllScsGh+lhllDhupFuhuh.olp-YCpaWsusshshscD..llocpEs+hphchlccsuscsspsLasAshsppssLsplhsssshhtFssL-DMllspuhhhuApp.stthYhPstssshshlppulptctVpoh..ssss..t.hppa+L+SllclltsphYPlG+PpaVpp.hscGKLYIALCcVTh+ssl+IPllassc.sEsshoF.ssLTSVDIphAt+lGGYpI+.lsALpW-cShpl.+sslpchhstlscl...spospLhsplsp....scLh.pspss.hsp..hl..atAFAASYCRtplHslIcclDSHalGsaVh+HsYpclalpsssststshLSphhcl ..........................................................................s...QhLMKTAN......NYETIEILRNYLRLYIILARNEEG+GILIYDDNIDSlMSM...MNIT+LEVIGLT.HCTKLRSSPPIP...MSRLFMDEIDHE.SYYSPKTScY.PLIDIIRKRSHEQG..DIALAL.E+.Y.sIEN..TDSISEINEWLSSKGLACYRFVKFND.Y.RKQhhh+.ho+tTIVDSMIIGHIGHHYIWIKNLETYTRPEIDV.LPFDIKhISRDELWARI.SSSLDQTHIKTIAVSVYGAITD..NGPhPYMISTY.PGNTFVNFNSVKsLILsFLDWIKD.IM.TSTR....................TIILVGYMSNLFDIPLLTVYWPNN.CGWKIY.....NNpLISSDGARVIWMDAYKFSCGL.SLQDYCYHWGSK.PESRPFD..LIKKsDAKRNhKSlVKESMsSLKSLYEAFcTQSGALEVLMSPCRM.FSFSRIEDMFLTSVINRVScNTGMGMYYPTNDIssLFIESSICLDYIIVNNQcS.....NKYRIKSVLDIISSKQYPAGRPNYVKN..GTKGKLYIALCKVTVP.TNDHIPVVYHDD..DNTTTFITVLTSVDIETAhR.AGYSIVELGALQWD-NIPcLK-..sLLDSIKhIYDL.NssTTN....NLLEQLIE............N.INFNNSSII.LFYTFAISYCRAFIYSIMETIDshYISQF....SYKELYlsSSaKDINEsMSQMVKL................... 0 0 0 0 +3948 PF04596 Pox_F15 Poxvirus protein F15 Mifsud W anon Pfam-B_5182 (release 7.5) Family \N 25.00 25.00 167.60 167.30 16.70 16.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.85 0.71 -4.58 11 51 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 38 0 0 33 0 136.20 68 88.25 CHANGED LsPF+sMs+IKINpc-NClLGNRCFVKlscV+ahPpsuls....Topolph+sacFTLsELLYSPFHFpQsQaQYLhPuFVLpCI-EAs+NpppC+YChss+ssc..suLNINIFlPThspphYlIIGLRIKsFWsssFcIE M+PFKNMNKIsINpcDNCILANRCFVKIDTsRYIPssSIs....oSshIRIRNHDFTLSELLYSPFHFQQPQFQYLLPGFVLTCIDKsoKppKcCKYCISNRGDD..DSLSINlFIPTINKSIYIIIGLRhKsFWKsKFEIE. 0 0 0 0 +3949 PF04708 Pox_F16 Poxvirus F16 protein Mifsud W anon Pfam-B_5863 (release 7.5) Family \N 25.00 25.00 25.40 33.50 23.90 24.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.49 0.70 -5.06 9 65 2009-09-11 09:28:37 2003-04-07 12:59:11 7 1 39 0 0 56 0 212.30 56 91.97 CHANGED cpAAIlTSLlSLFDsSlpaQlchC+pYCp.LuhpVlhEl+EFGYIs-csLpocpW.ssltsssIshlVFYQlKQLoISscpLYshhh+.sc.ssl+lYFV+DsLsFDG..hPPoF+plshslphtsRKKl+DlIsLlshpTsscpllpcFlpsNFGsVctLLpllcpssLWlphhLsppcp+t..................hshhpa+pFlsKl+clct.hpsp.l-cICsshpsIsl ..............M.KVVIVTSVuSLLDASIQFQKTACRHHCNYLSMQlVKEI-EFGTINEKNLEFcTWKDVIQNDEIDALVFYRVKQISISsGVLYcSMMR.NRTKPISMYFVRDCLAFDG..sPPSFRMTSCNINAYNRsKIKDLIIL.MNMKT.CNKKIIGEFIIDNFGSVcuLLSIINSNVTWVTSVINNSNGRGINIRVS........NNKMLTITSFRRFVNKLKhYKTTKCsSQLDNLCT-hNKMcI.... 0 0 0 0 +3950 PF04767 Pox_F17 DNA-binding 11 kDa phosphoprotein Waterfield DI, Finn RD anon Pfam-B_6128 (release 7.5) Family Family of poxvirus proteins required for virus morphogenesis. Protein function necessary for proteolytic processing of the major viral structural proteins, P4a and P4b [1]. 25.00 25.00 32.90 32.20 18.10 17.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.27 0.72 -3.74 11 57 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 37 0 95.10 63 94.23 CHANGED hhhcoPFhlsTc.EGRYLVLKAlKlCslRTV-Ccus+ASCVLKVEKP.susC-R..ssoPs.Rstht.......osPs.+sssplPFMRTNhLpsl.usNR.NssuRlLu .u..AHTPFYINTK.EGRYLVLKAVKVCDVRTVECEGS.....KASCVLKVDKP.SssC.ERR......PoSPu.RCtRh.......ssPu.....pQVPFMRTsMLpshFAsNR.NV.uSRlL.p 0 0 0 0 +3951 PF04599 Pox_G5 Poxvirus G5 protein Mifsud W anon Pfam-B_5216 (release 7.5) Family This protein has been predicted to be related to the FEN-1 endonuclease [1]. 20.10 20.10 45.60 45.30 19.00 18.40 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.60 0.70 -5.78 11 78 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 70 4 413.50 57 96.90 CHANGED MGIKNLKoLLLcptsLppl...csh.+s.hstIFVDTMSlFholAasVssl--LpspFhcalpta.ppsG+VTLFlDRGsIsIKcsLREKR.+suhcNThKRKphElcp.........LpstIstLslsDhhYEEhKT-lcL+IcKLpFa.FLuspsNlKhs.L-csLstl...-sVsIlYCDslDAEFVMCpcAKcls.poGpWPllISoDQDTLLhuSsDshsKI..ItohsphYpalPssco+YLoKLlsLsNGCDaFsGLYGhsITpKoLpp..IpL....F-DFol-Nll+....SLshKNYsp.......Ktos+hlD.....lDpIIcFIscY.................osLDcslY.pppsssslolQEFlFoALsp+Wpphcsohlc.ssulhssLhslLc..P++cIsps-lpplpphlpcs.hp++sslssIpolssIFGYchspsssllhGIhshpslhLsacc...pFYFNspsIIc ...MGIKNLKSLLLEs+SLTlL...D-slh+laNGIFVDTMSIYIAVApCVpNLEELsolFlKYVNuWl..K..KuGHVTLFIDRGSI+IKQsVRDKR.RKhSK.TpcRKhLELEK..............................................................ssu-Ip..NV.ouhMtEEIKAEhQLKIDKLoFQIYLSDuDNIKho.LNElLT+h.psENVTlaYCDchDAEFVMCLEAKspapoTGEWPLIISTDQDTMLFuSsDsHPKh..IKslTQLFKFlPoAEssYLuKLTALVNGCDFFPGLYGtSITssNLNK..IQL....FoDFTIDNIVs....SLA...IKNYaR............K.TNosVD.....VcNIVTFINcY.................ANLDD.VY.ualP..PCQCTVQEFIFSALcEKWNcF.....KsSYLc.sVPLsCpLhYALE..PRKEIcVSEVKTLuohIDh-..NsKscI-sIKSIoSIFGYuscN..ss..oI.VFGI.hK.cNLLLulNs...oFYFNsollI.o................ 0 0 0 0 +3952 PF04787 Pox_H7 Late protein H7 Waterfield DI, Finn RD anon Pfam-B_6266 (release 7.5) Family Family of poxvirus late H7 proteins. 22.10 22.10 22.40 29.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.76 0.71 -4.40 9 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 53 0 142.60 56 98.71 CHANGED MDc+L+oluhThFpGELoThDIhsLthalhsppPp-TlFShc.csspFhIDFcY.DssLASsYlspphpsI.s-cYhsaushIAcELTNh-IIp-DlssYIpsSc+LKRhIKhY+s...pKps++IppssK+LKlAhp+GlDY-YIK-sh ...........MDKRhKSLAMTuFaGELsTLDIMALIMpIFK+HPNNTIFSVD.+.DGpFhIDFEY.DsYKA........SpYLDlsLTPIouDECKoHASSIAcpLsssDIIKEDIu-YIKToP+LKRFIKKYR....NRScsRIScDocKLKIALuKGIDYEYIKDAC........ 1 0 0 0 +3953 PF03289 Pox_I1 Poxvirus protein I1 Mifsud W anon Pfam-B_4306 (release 6.5) Family \N 25.00 25.00 184.70 184.50 19.20 17.70 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.12 0.70 -5.59 11 65 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 42 0 0 54 0 307.60 73 99.43 CHANGED MEp..-QLVLNSISA+ALKuYhsoKIs-hVDELVs+KasQKKKSpsK+hEsRIPlDLI+psFV++FpLcsY+sGlLsSLIsSLVENNYFop.DGKLs-sucpELVLsDlEK+ILupIs+sSsLYIDluDVKsLAuRLKssAssFpFssppYhLEsDKlE-lINpLs+NssIhLDEKsSlKDShYhls-ELL-VLKsRLFRCPQVKDNhISRTRLYDYFsRlTKp-EsKIYVILKDt+IAcILGIETVplGsFlYTKHShLlsoISuplDRYSK+Fp-sFYpsIAEaV..KDNEKlNVSKVVEsLhVPslph-t...E .s.EhEDQLVFNSISARALKAYFTAKINEMVDELVTRKCPQKKKSQAKKPElRIPVDLVKSSFVKKFGLsNY.GGILISLINSLVENNFFTK.DGKLDDTGKKELVLTDVEKRILNsIDKSSPLYIDISDVKVLAARLKRSATpFsFNGHTY+LENDKIEDLINQLVKDEuIQLDEKSSIKDSMYVIPDELIDVLKTRLFRSPQVKDNIISRTRLYDYFTRVTKRDESSIYVILKDPRIAsILSLETVKhGAFhYTKHSMLTNAISS+VDRYSKKFQESFYEDIAEFV..KENERVNVSRVVECLTVPNITIuSNs.E.. 0 0 0 0 +3954 PF04661 Pox_I3 Poxvirus I3 ssDNA-binding protein Mifsud W anon Pfam-B_5571 (release 7.5) Family \N 18.60 18.60 18.80 18.90 18.30 17.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.80 0.70 -5.60 11 69 2009-09-14 12:26:09 2003-04-07 12:59:11 7 1 43 0 0 57 0 259.30 58 95.83 CHANGED ssphppssscsst.hTCssslEasKSLSpSspKsIEuVpLosSQYPSCSsIsIsLs-oLuSKhsSsaIhlEGEuKIY+NKKsstpu......-sYFLKI+PouASPhLYQLLEsIYsNI+csp+lPsSLssl.slssh-EKTFpcGslYI..NKhsGAllEYpssG.sputlpolscElEsLuKRDtQhuKsIlsPIVFYRsuspsKVTFALKKlIh-R-hossVlDlsGcsp+lsMu.....Eo....sEE-hsRGLGll-..pD.........csh-E--t-..poLFNV ......................ts.p.csssuuDSlpTCuGVIEYAKSISKSNs..KCIEhVTLsuSQ.YssCSSISI+LT-SL.SSpMTSTFIhLEGEoKlYKNKucpsRS......DGYFLKI.KlTAASPMLYQLLEuVYGNIKcpcRIPsSL+uL.sVETIsEKTFKDEs.IFI..NKLNGAhVEYlSsG.ppSIlRSI-sELEuLSKR-+QlAKAIIsPlVFYRSGsETKITFALKKLII-REssANVIGLsG-SERVSMT.....Es....sEEDluRsLGlVDl.-D.........EhcEDsDcE..cslFNV....................... 2 0 0 0 +3955 PF04713 Pox_I5 Poxvirus protein I5 Mifsud W anon Pfam-B_5901 (release 7.5) Family \N 25.00 25.00 73.10 72.90 23.10 22.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.36 0.72 -4.27 11 51 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 32 0 76.80 53 97.05 CHANGED hlss+ElhosIGlTlLhllMlloGuALlhKplsP.a+hlshRShshsRVlshLEalulllFIPGTlsLYuAYl+pLhh ...MhsAhslLoAIGITlLMLLMVISGuAhIlKclsP.pclhoMpSlpFNRsVTIhcYlulhIaIPGTIILYusYlKoLh... 0 0 0 0 +3956 PF04595 Pox_I6 Poxvirus I6-like family Mifsud W anon Pfam-B_5073 (release 7.5) & Pfam-B_6224 (release 8.0) Family This family includes I6 proteins as well as the related F5L proteins. 20.80 20.80 37.30 34.40 20.70 20.00 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.89 0.70 -5.68 13 110 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 43 0 0 95 0 309.40 41 87.11 CHANGED +pss..............ISFsFss.FYYsN-sLFsKPpNoL-DVsKSlllhpoFcYEpaVIpull+hL..+thualhDlaFlPlGWLsG....t--s....sspHlsl+...llh.oss..htsl+spl+-hLuhaslhshsl...hps-pclsIspFshsts....hPssVl.......ShaPFDs-t.lLlVlFFGpapDuYCGIoY.sss+-pl.tllphLtPhVuElplloD-lsRFsol+lhss..pss+tFPcsp.......ltoICEll+tF-cpcFsssssss.s....shssalPK+lVSllDLPSsV-I+CtScsGlDalTHIssK+LsslLlIsKDsFl+ssohsGTFKKENllW+GpYTYR......Ihc ................s...hIpaslDs.hahCscslho+.spT...h.cpllhhto.shpphhIpuhIKhl........hslhhlslu.Lp.....h-Ds....s.oHlh.t...lIh.soh...ISlKuplI-hhs.hshhshsh...hpschchpIsoFslssS....hPhphI.......pahPFsTD.....hhahG.hpc.hsG.sY................Ph.S.h..lSsphsRho.l+.hNs..hSshhFspNh.......lpsICE...pa-ths.sss..po.o......sups.lspclsShhs.....plps.scsssDa.T.hNscpLhhIllI.........Th.uhhlh..llhtuIhhY+............ 1 0 0 0 +3958 PF03338 Pox_J1 Poxvirus J1 protein Mifsud W anon Pfam-B_3556 (release 6.5) Family \N 20.70 20.70 21.70 76.40 20.30 19.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.79 0.71 -4.51 12 67 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 48 0 0 46 0 143.40 63 94.92 CHANGED MDH.ppYLLThFLp-D-SFF+YluppsD-pAhuDlpsIsphLDFLLulLIRSK-KLEulGahYEPLSEp.a+slh-Fp.Dh+sL+pLFs+hsl.phs.spplpls+GYluDFVlSlhRLp+phths.ss..psspYIDPpcshthsNlluILp MDH.sQYLLTMFFsDDDSFFKYLAuQDDEouLSDIhpITpYLDFLLhLLIpSKsKLEAVGHCYESLSEE.YRpLs+FT.D.p-FK+LFNKlPI..Vo.DuRV+LNKGYLhDFVISLMRhKKEsuLs.ossh-PlRYIDPR+DIuFuNlhsIL+. 1 0 0 0 +3959 PF03339 Pox_L3_FP4 Poxvirus L3/FP4 protein Mifsud W anon Pfam-B_3380 (release 6.5) Family \N 25.00 25.00 30.90 156.20 22.70 22.60 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.95 0.70 -5.97 5 71 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 43 0 0 64 0 314.20 61 90.71 CHANGED MptNNtsp-sscspspsplPps.s.ppsppshh....-..cFlEpRLCsYE+Eps.hph-ChluhhaslpKQVscE.......EssCWlELSuLV+u+KALGFPLlYusKcaoa.G+sLYFEpFKs...s+Vp+LTsso+ClsDsllFQIVlILYSLYK+sIaSD-FlFDLVSIPRSTIohSVNQLVFslsTDsLVVLSls.TRLY+AcLPQSCYLsYlau+ssLA.++shEooNY.FFEWFI+NHlchLo+QslDIFKlKK+YlTsspIsRLsEPGTLVYVh+-DhalhGITLT-VSloDNVRVLFSsDGt..slLEIDDFSlcDVFsAGELlsRSQsooI ..................t.+hNsppRh.p.s.hsps+sc.pss.ss.+pcNK..CR-......EsuDFINIRLCAYEKEYC...NDGYLSsA.YYMLKQVDDE.......EhSCWuELSSLVRSRKAVGFPLLKuAKRI.S+....G.S.MLYFEQFKN....oKVV+LTP.QlK..CLsDoVIFQsVVILYSMYKRsIYS.NEFCFDLVSIPRTNIVFSVNQLMFNICTDlLVVLSICGNRLYRTNLPQSCYLNFIHuH.ETIA.RRGY...E+SNY.FFEWLIKNHlSLLTKQshDI.hKVKKKYATGAPVNRLLEPGTLVYVP.KEDhYalGISLTDVSISDNVRVLFSTDG...hVLEIEDFNI+clFMAGEhFVRSQSSTI... 0 0 0 0 +3960 PF00485 PRK Phosphoribulokinase / Uridine kinase family Finn RD anon Prosite Domain In Arabidopsis the region carries two binding domains, a phosphoribosylpyrophosphate-binding domain and, at the very C-terminus, a uracil-binding domain. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.17 0.71 -4.75 12 8151 2012-10-05 12:31:08 2003-04-07 12:59:11 13 32 4282 57 1728 5333 1269 168.80 27 60.59 CHANGED lIulsGsSGuGpoost+phsplFst..l.sh...............hsuhhphsps..chpht-cpsttspphoahuPcANsFDLhhphh+shtputsscp.hYsHsssptss...........pph.tssclLhhEGLHuhhspc...ltpLhDhplhlsss.slchhpKlpRDhspRG+ohEulhsoI..tRhPDassYIsPQhppsDlshpplPsstsu ...........................................................................................lIul.uGusuuG.....KoT.hu....ptlhp.h...lt.t.......ht...................................h..l.spD...s..a..h..h...s..t..p..h..h.......p.....c...t...h........h...t.....t.....t...h....s...h.......t.......t...P.....p......uh....Dh....p........h...L..h....p.h...l....p...p....l....t......p.........G.........p.......s....l....p..h..........P...h...Y....s.....ah....h.....t...s.hh..............................tp.....h...h..p..s.....s..c...l..l.IlE...G....l......h.......s...h.......t...s...tt..........................l..p.c...h.h.D...h...pl.....a.............V.............D.............s.............s...........................-..............l.............p.............h.............h.p+..............l........RD.h.........R.s.......h...s..h...p....tl...h..tp..........h...hhh.....a.t..a..l.tP..p..hphscl.h.........h............................................................................................................................. 0 527 1013 1414 +3961 PF04872 Pox_L5 Poxvirus L5 protein family Kerrison ND anon Pfam-B_6088 (release 7.6) Family This family includes variola (smallpox) and vaccinia virus L5 proteins. However, not all proteins in this family are called L5. L5 is thought to contain a metal-binding region [1]. 21.90 21.90 22.10 22.70 21.80 21.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.66 0.72 -4.34 12 65 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 0 0 46 0 76.90 55 61.63 CHANGED ElhhhFp.+s+hs.sPI-ph.pco.LhCctstLhI.sLP..ssphsALulstpPIshpsCcsLLpSING.SppVSLsDlLpR .............ELsMhFh.Kp+lP.DP.ID+l.c+usLuC--DKLMIhGLP.hsspssALSIN.u+PIVYKcCscLL+SING.SQ.VSLNDlLRR... 0 0 0 0 +3962 PF03356 Pox_LP_H2 Viral late protein H2 Finn RD anon Pfam-B_3929 (release 6.5) Family All Members of this family show similarity to the vaccinia virus late protein H2. This protein is often referred to by its gene name of H2R. Members from this family all belong to the viral taxon Poxviridae. 21.40 21.40 21.40 76.20 21.30 21.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.95 0.71 -4.61 12 119 2009-11-17 14:52:54 2003-04-07 12:59:11 10 1 51 0 0 56 0 179.90 64 97.56 CHANGED MDcTTLsVNGlEL-YsRp+tscslphA+sSTlhFFlllLhlSslLhaaQsScNslhsELs+YtRIKsslpuW+PLVpuKT+lES-RGRhhuhs.+sDhFpFpClDFGsYalPlRLDppTFLPQAIRRGpGDGWMl+KAuchDsSAQQFCEYllts+usNsITCGhcMhsclGYSGYF.suHWCushhsll .MDKTTLoVNuspLEYlREKtscGIRuAKsSTlhFFVLILAlSuLLLWFQsSDNSlFuELsKYsRIKNsVpuW+PLV-SKTKLESD+GRLhAAG.+D-lFpFpCVDFGuYalPlRLDpsTFLPQAIRRGsGDGWMVKKAsKVDPSAQQFC-YLI+s+ScNVITCGscMhspLGYSGYFhssHWCSsh.sh.... 2 0 0 0 +3963 PF04887 Pox_M2 Poxvirus M2 protein Kerrison ND anon Pfam-B_6168 (release 7.6) Family This family includes M2 protein from variola virus. The function of this protein is not known. 25.00 25.00 64.90 64.60 20.60 20.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.38 0.71 -5.00 4 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 26 0 0 40 0 194.80 75 88.90 CHANGED pCsopcYRYW.LAupLTIGLsYsI.Ep..tEC+hc....s+hushllTGYGLpIshpITs.lspphVAuuEGhsssNpLslLLFhspchoc.tssl....plTITCh-h-CDssshcpsLssplpKN...-lhIhGSClTCVsL-T.Pspl......Nshhs+PhShhltcssuYohR.....sapcchspC.lDhccluYslC.+p .........CPPRQDYRYWYFAAELTIGVNYDINSTIIGECHMSESYIDRNANIVLTGYGLEINMTIMD.TDQRFVAAAEGVGKDNKLSVhLFTTQRLDK...VHHNI....SVTITCMEMNCGTTKYc.SDLPESI.H+pSSCDITINGSCVTCVNLETDPTKI......NPHYLH.PKDKYLYHNScYuMRGSYGVTFIDELN...QCLLDIKELSYDICYRE...... 0 0 0 0 +3964 PF03341 Pox_mRNA-cap Poxvirus mRNA capping enzyme, small subunit Mifsud W anon Pfam-B_3728 (release 6.5) Family The small subunit of the poxvirus mRNA capping enzyme has been found to have a structure which suggests that it started life as an RNA cap 2-prime O-methyltransferase. It has subsequently evolved to a catalytically inactive form that has been retained in order to help stabilise the large subunit, D1, and to enhance its methyltransferase activity through an allosteric mechanism [2]. 25.00 25.00 45.50 45.40 19.80 19.20 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.72 0.70 -5.36 9 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 42 4 0 43 0 279.60 73 99.50 CHANGED h-pltc.I+-GltshhPFY-oLP-LsLshGKs.LPSLEYGANYFLQLS+VNDLNRhsTDhLSLaTHDLhhsEoDLEKVYE.hsIcSVKoYGKoI+ADAVVsDLSA+NKLFKKERshlKSNNaLsENNLYluDYcMlTFEVFRPLF-huoEKaCIlKLPTLFGRsllsslRVYCSLFKsVRLaKssuDSWLKDSAIMVspcsaptNlscFhoalRcsTKSssW+DuNN.VpFslLcssV-+EFI-KFLsFSspVYEuLYYVHSLLYsSMTSEsKSIENEaQ+KLlKLLh ..MDcIsKNIREGsHVLLPFYEoLPELNLoLGKSPLPSLEYGANYFLQlSRVNDLNRMPTDMLpLFTHDlMlPEoDL-KVYEILpIsSVK.YGRShKADAVVADLSARNKLFK+ER-AIKSNNaLTENNLYISDYKMLTF-VFRPLFDhssEKYCIIKLPTLFGRsVIDThRlYCSLFKsVRLhKCVSDSWLKDSAIMVAS-lpKKNlDhFMSHV+SVTKSuuWKDsNs.VQFSILp-PVDoEFIsKFL-FSsRVYEALYYVHSLLYSSMTS-SKSIENcaQR+LlKLLL.. 0 0 0 0 +3966 PF03213 Pox_P35 Poxvirus P35 protein Mifsud W anon Pfam-B_2785 (release 6.5) Family \N 19.20 19.20 19.20 20.10 18.60 18.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.03 0.70 -5.43 12 130 2012-10-03 05:28:31 2003-04-07 12:59:11 9 3 65 0 1 104 12 279.60 48 99.48 CHANGED MAtsp.....clslYVIPll.GRssp-VlPphct...h..tchcslKc..........ss...psp.thphs............+hh.Wp......Gsl-s...ac-.......YFSuhCphhCopEhKpolA+HhSLW-ph...spsshpss-scallVlEDDNTlpc...lp..sl+shIpuMp-psIDlLQLREshpssssRs.hs.t.tp.shasYsGGYDhSLSAYIIRluoAh+lhspIhcptGlSsuLshElh+lEpcLtlNR.VLssuspYVpH-h+hlsc+Rss.ch+sulhsRlusWluppaPshhYhlopPLFSFFGlFDIsllGllhlLaIllLlIFslNSKLLWFLuGhhhoYll ...............................................chslhVlPll.sRssu-shPpl+p...h....pc.hcslKc.......hsp.tsh.h.pscsch.hsh.................................thl.Wpt.....Gslcs...asc.......aFSuhCsshCTcEsKpsIA+HhuLWcSh...h.s-hcspcscalVllEsDNslcc.....lphlcsllpsMp-ppIDIhQhREhhpssp.s+s.h.s.p.ts.thasYs.GGYDhSLSAYIIRlssshplhscIIKstGlSouh.hElh+lEpchplNR.llssus+YV.H-.+hlscpRhp.ph+sshWsRluphhu+paPshhYhhopPLlSFFGlFDIsllGlllILFIlhMl..IFslsS.KLLWFLsGhhhThhl.................................................................................. 1 0 1 1 +3967 PF03395 Pox_P4A Poxvirus P4A protein Mifsud W anon Pfam-B_2985 (release 6.6) Family \N 25.00 25.00 49.40 49.30 18.50 18.10 hmmbuild -o /dev/null HMM SEED 888 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.75 0.70 -13.56 0.70 -7.01 4 92 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 46 0 0 84 0 847.60 67 98.38 CHANGED QLEcSEYlF+llSTlLPplC...............LDYKV--tLppTFVHPFDslh.sshGsls+t-plpsulppLGINYLlsshsp..+LFshllssspIthhssshths......................................................................................................................................................................s+sNPllNsHoFs-LPsFTpcLlphRhps.Et+ARFhGGYlhsctus....................ssshs.h....cYPsLsF-NTYhhNlLYpssIss.ht..F+..................................................................................................................................A+sssGlhhhpDasNLlulRsLlospspsRF-psashpphApcaslsls.s.s.sclDLhoMso........................................................................................KphlhahQaFsDpYtch.hpaNGsslh.sc.clhslslSh+YQuhIs+Lspha.slPlhNshslhshstspsttphslsh.slpalslssNlsaFlshhN.......hlA+EpRs..ssLcs.p.ShFWDGlDYp-YKpKplp-hhFIsuoCYVFuLap+NsTTaCShLoDhluAspTP.RVCllPRsluu.+TsscLluEsLcSlNshoh+-FP+pssStl.HIGLSEpGFMRFFQLLRLlss+s.EoAlKEVlhsYsGlKhuDpGsPah.I+p-SYpsFlhLLFuuMGF+VoV+pSlhGSsNhohIolRP.RVo+pYIsshLhKsSCS+s-A-KLlousa-LLsFMlSsush+DhpSYh.tRphssshaauG....s.ps--..tTIIphopPlslLDRlslpGlhuAsTlsEhLssDhFtPENthFKsNLpthIpSsploG..-sIhptMPhslLD+llTs.......AGssp.VSls-llDNIss.o.DCDtTN-IsDhINouLKcohsKcNshlsSpshsuVANpScppLtDl+pS.oC+hAslFKsLA+SIYTlERIFNs+lSD-VKh-hLEKhKsFosISpSLYsD.......LIulEslKAlLYIlKRSG+sl-cTpIssD...-l+KSYplI+PKIhshhNYYsphSRsYFppMKKNLNMpDsstsS .....QLEDSEYLFRIVSTVLPHLC................LDYKVCDpLKTTFVHPFDlLLNNSLGSVTKQDEL.QAAISKLGINYLIDTTS.pELKLFNV.sLNAGNIDlINssINIS........................................................................................................................................................................SETNPIINTHSFYDLPPFTQaLLNIRLTDTEYRARFIGGYlKPDGSDo...................MDVLAEK....KYPDLNFDNTYLFNILYKDV.IsuP..I.cp.FK..............................................................................................................................................AKIVNGVLoRpDFDNLIGVRQYlTsp.DpsRFDssYsIsDAApHYGVNLN..TLPLPNVDLTTMPT.......................................................................................YKHLIMacQYFlss.........Y-+VsIYYNGN+hlasD.EIhsFsISMRYQSLIPRLV-.hFPDIsVNNNIVL+TRD.PQN.AsVNV.sV.sLPNhQ..FVDIs+NpKFFINFhN.........LLAKEQ.RS..TAIKV.sKSMFWDGhDYEEYKSKsLQDMMFINSTCYVFGL.YNHNNTTYCSILSDIISAEKTPIRVCLLPRVVGG.KTVTsLISETLKSISSMTIREFPRKDKS..IMHIGLSETGFMRFFQLLRLMADKPHETAIKEVVMAYVGIK.LGDKGSPYY.IRKESYQDFIYLLFASMGFKVTTRRSIMGSNNISIISIRP.RVTKQYIloTLMKoSCSKNEAEKLITSAFDLLNFMVSVSDFR...DYQSYR....Q....Y....R..........N..Y.......CPR...YF.YAGS...P.............................EGEETIICcSEPISILDRIDTRGIFSAaTINEMMDTDIFSPENKAFKNNLS+FIES.GD.ITG..EDIhCAMPYNILDRIITN........AGTCT.VSIGDM.LDNITsQS.D.CN.MTNEITDMINASLKNTISKDNNMLVSQALsSVAN+SKQp.IGDLRQS.SCKMALLFKNLATSIYTIERIFNAKVuDDVKAShLEKYKlFTDISMSLYKD.......LIAMENLKA...MLYIIRRSGC+IDDA.QITTD...DLVKSYSLIRPKILSMINYYNEMSRGYFEHMKKNLNMTDGDSl....... 0 0 0 0 +3968 PF03292 Pox_P4B Poxvirus P4B major core protein Mifsud W anon Pfam-B_4215 (release 6.5) Family \N 25.00 25.00 27.30 25.20 17.20 17.00 hmmbuild -o /dev/null HMM SEED 666 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.24 0.70 -13.22 0.70 -6.58 9 314 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 152 0 0 187 3 273.70 52 97.52 CHANGED MEoshs......lFlsu+lsLtssYsNphL.LlspsHlHtPspSlSCSlCsSLuplss.--hISAGARppR.sl+R...........R...stspsst....p.ss+psss.h........shVPIDEluSTpDWpl+LR+DGsAIA+YLpssKsDlpNFTIQDhlslM+KLNIhRosRsELFELLuHVKuoLosoSlSVKsTHPLVLIau+ucP+IG-QhKEL-+lYSsSpYphLLSTTRFQShHFsDMSSSuDLhFca+ssDSshFlHPIhhALFGlKLPALENsFVaGDSYSLLpQLach+KV+P-NYMLLVNRLTE-uPIlhTGVsDslSTEIQRAslHTMIRKhIhNlRMGIFYCp---ulDsaLMKIIHssCSplMoDEEQhLASILSIVGF+PsLVSVs+Puhussa....DMpLQoVPYIVVsPhKMITTSssPISINosslaSLTaDutoGRVlFsPsshuYtttsssssssshssh...sssshtpshsSPVIVNGsLlFYVERRQsKNhhuGECYTGaRSlIsDpPI-Vup-lslNGIMYRL+SAVCYKlGD...phh.ssCs.....uuDIFLKGaYTILFTEhGPWhYDPLSlasKuuR-uRLhRAhKNpYt+ps..sthD-up.FY-WlKG-Gus.hhtuKQQ.LMNHhsMF-DDLLoMEEAMSLISRpCCILIYAQDY-PYloAKsIo-lF .................................................................................................................................................................................................................................................t.............shlsIDElTSTHDWQhpLRKDusAIs+YLh-pKCDlpNF.ThQDLlpVM+pLNIIRs-RpELFELLuHVKuoLossSV...SVKsoHPLhhIYu+scs+IG-QhK.L-shasPS.cYQsLlsTTRFQSspFsD..MSo.SS.DhLFcFK.cpD..Sh..halHPIlhA............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +3969 PF03296 Pox_polyA_pol Poxvirus poly(A) polymerase nucleotidyltransferase domain Mifsud W anon Pfam-B_4019 (release 6.5) Domain \N 19.50 19.50 19.90 19.50 19.30 18.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.85 0.71 -4.65 13 65 2012-10-02 22:47:23 2003-04-07 12:59:11 8 2 45 9 1 62 42 149.10 72 31.45 CHANGED chsShcclAc...chLsShNVsshoc.cl..MGRHsVSsL....VssVNpLMEEYLRR....HNKsCICYGSYSLHLLNPcI+YGDIDILQTNuRsFLIsLAFLI+FITGpsVlLLKVPYLKNYhVL+DccssHIIDSFNIRQcTMpsIPKlLIDNIYIVDPs .........N.lTSMEELARDMLNSMNVAlIDK..uKV..MGRHNVSSL....VKNVNKLMEEYLRR....HNKSCICYGSYSLYLIN..PNI+YGDIDILQTNSRTFLID..LAFLIKFITGsNIILSKIPYL+NYMVIKDENDNHIIDSFNIRQDTMNlVPKIaIDNIYIVDPT........ 1 0 0 0 +3970 PF03294 Pox_Rap94 RNA polymerase-associated transcription specificity factor, Rap94 Mifsud W anon Pfam-B_4535 (release 6.5) Family \N 25.00 25.00 63.60 63.50 19.20 19.00 hmmbuild -o /dev/null HMM SEED 795 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.67 0.70 -13.50 0.70 -6.59 10 107 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 53 0 0 84 0 595.20 68 99.97 CHANGED M-oKESlLIEIIPKIKsYlh-sshssKSYsDFIScNKsIFllNLYNVusITEEDIRLLYsTIEQNhDlDDQTLlSIFSYIGYKFEQsl+EEIooSLthsEps.hTD-M.TaNhYsLFFNTLDMYlRQ+RINVLVND-ssuD......lsINY+..TSDLsSsF-sstEPEVREIPFNMK-hlsYVSKNlDQlRFSKKYLDFAYLCRHIGIPISK+KhNlRYlaLYclDGloIPIVI+DFLDVKYVYLcETGKsY+NsFSE-hNsSLhDWG+lIIPpL...KN++LYSYlFLSsYaL+DLF.-LIpp+-ssF+stcp.hctIpltEPtuW+c-VplEahPCEHQI+LtEAhKlDs-YFsKlNsFAsEYIYYEDGlAYC+ICGhNlPpFNlDAADVIKss..VIVoTaNKSIFLSEPYSYFVHSQRFIFNIIMSFDsIMKSQTWsMKYNINRLILNFLI-INu+RQcYEK+FusEI.K+GlFFLRLSANLFDIHsSSTELFYSuKhLNLNYIVlLVIlLNSSADFIlSYMsuKKKp........VsEooLKauISVIIYDFLlKT+ICEKsuLDTIs..LhT-VYhSIMPEELcsHapRIllEL+KLlSIpRocptPsYDV-s+h...Plssl..+FFsspslhsp.Mhshp.tch...pspslhpPs.hstsoc-shppF.cclThc--hKVLIRhNDTNAopLVhFsoHlKIEIE+KKlIIsLKs...................LFIsNsLKYYY..............SssuhYVFRFGDPFPFD--LIDppHVQaKlNsYNLLRaaLLPcSD.VFVYFucSLsR--LEYsFYhFLspYVNs.VppWIDENIo+IRELYhhNFNN ....M-oKEolLl-IIPKIKsYlhDsshssKSYsDFIupNKsIFllNLYNVssITE-DIRLLYhTIEQNhDssDpTLluIFSYIGYKFEpsl+E-hsoSL.hs-p..hTD-M.saNhYshFFsTLDhhlRQ++lslLVND-hp.GD......h.lsY+..sSDLsosFssph-Pcl+cIPFNMKshlsYlpKNlDQlRFSKKYLDFAYLCRHIGIPISK+KhNhRYlahYplDGloIPIlI+DFLDVKYVYLEpTsKhY+NsFuE-.Nsul.-WG+lIIPhl...Ks+tLYSYlFLSsaaLpshascLltpc-shFhthpp..hchItltEP.uWcccVplEhhPCEHQI+Lh-AhKlD.spYFsKlNsFspEaIYYEDGlAYCplCGhNlP.FNLDAuDVlKss..Vl.VsTaNKoIFLSEPYSYFlHSQRFIFNIIMSFDsIMKSQTWsMKYNINRLILNFLI-INS+RQcYEK+FusEI.K+GlFFLRLSANLF-.psSSTELFYsuKhLNLNYIVsLVIlLNSSADFIlSYMpSKpKp........VpEooLKYuISVlIY-FLlKT+IC-KusL-TIh..LhT-VYTSIMPEELclHapRIhlEL+KLlSIpRSthpPNYDVEspt.t...Phssl..+FFsspslhs+sM..ss...pph....hppp.IstPs..sp..octshppF.+c.lTsD-DlKlLIRhpD.TNAoKLVIFPoHLKIEIERKKlIIsLpo...................LalsNsLKYYY..............Sss.LYVFRFGDPhPF---LlDpEHVQaKINCYNlLRYcLLP-SD.VFVYFSsSLNRcsLEYuFYhFLupYVN..VppWIDENIo+I+ELYhINFNN...... 0 0 0 0 +3971 PF03340 Pox_Rif Poxvirus rifampicin resistance protein Mifsud W anon Pfam-B_3377 (release 6.5) Family \N 25.40 25.40 27.90 86.10 19.80 25.30 hmmbuild -o /dev/null HMM SEED 541 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.82 0.70 -6.44 11 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 11 0 61 2 518.40 71 98.91 CHANGED hhsSlIss......--u...s+RpNVFusDsppPThYMP.QYIolsGlh...ssssssVls..aEIRDQYIsAhNpFlLoIsLPElKGlG+FuYlPYVGYKsIpcluls......SsNslIWEosGE-LFssshsscpA.phSGaSpELNDlSsGhoPNDsIK-ussVYlYl+TPFDs-..cTaSSLKL.u-oKlslslTFNPlSDlllhDusFsh-uF..l+-FVYssELSFlGYhV+slphKssYIEcs....RRsluQhNpsT..sslo-VaulTuLuVYlKP..aYG.h-N+FISYPGYsQTEccYIpuaV-RLL-DLlIVScs....hPcsF.P-su-lVEVPssGlVsIQ.DsDVhV+IDNVPsshslYaHTNlLlFGTR+NSssYNISKKFSsIsGsYScsTc+IhFoclpHolNIoDVSIPVulWsup+NlYsGDNRSspSKsKDlalNDPFlKGlDFhNK.hDlISRhEVRFGN-llYSEsuPIS+IaNpLLossssusRpL...........pFNasPtsFF+PTsLhANsSRGKDKLuVRVsapshDssNPIpYVsKQLVllCsDLY+loYDss.IplsKIs- .................lINSLIGu......DDu...IKRuNVFuVDsQhPTLYMP.QYIoLoGVho....NsusDspslu.oFEIRDQYITALNHLVLSlELPEVKGhGRFGYVPYVGYKCIpHVSlS......SsNGlIWElpGEELYNsCl.NNshALcpSGYSpELNDIShGLTPNDTIKEsoTVYVYIKTPFDVE..cTFSSLKL.SDSKITVTVTFNPVSDIlIRDSoFDaEoF...sKEFV.YVsELSFIGYMV...KNlQlKPSFIE+P....RRVlGQINQsT..AslTEVHAsTSLSVYsKP..YYGNTDNKFISYPGYuQsE+DYIsAaVpRLL-DLVIVSDu........PPoGa.PESAEIVEVPpsGIV..SIQ.DsDVaVKIDNVP-sMSVYLHTNlLhFGTR.KNSFlYNISKKFSAITGTYS-ATKRhhFuHISHSINIhDsSIPVSLWTuQRNVYNGDNRSscSKAKDLFINDPFIKGIDFKNK.TDIISRLEVRFGNDVLYSENuPISRIYNELLopsssGTRTL...........pFNFTP+hFF+PTT.IsANVSRGKDKLSVRVVYSoMDsNpPIYYVpKQLVVVCNDLYKVSYDpG.VslTKIh.G...... 0 0 0 0 +3972 PF03293 Pox_RNA_pol Poxvirus DNA-directed RNA polymerase, 18 kD subunit Mifsud W anon Pfam-B_4188 (release 6.5) Family \N 25.00 25.00 259.40 259.30 22.30 21.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.82 0.71 -4.73 10 55 2009-09-11 05:39:38 2003-04-07 12:59:11 9 1 41 0 0 36 0 160.00 73 97.25 CHANGED MSTFsppVYLPVsLpPHELTLDl+cNI+cAVh+cYLHKEouGlMAKKI-IChDpELPLGEIVNNpVVV+VPClVTYKYYKlGDlVpGTLNIEDESNIoVtCGDLICKLsRDSGTVSFsDSKYCFIRNGpVY-NGSpVSVsLKEAQpGh.-SsFVFLAoIlD MSoFVsNsYLPVTLcPHELTLDIKsNIRsAVYKsYLHREhoGhMAKKIEIpcDsELPLGEIVNNuVVIsVPCllTYtYY+VGDIVRGTLNIEDESNVTIQCGDLICKLSRDSGTVSFSDSKYCFhRNGsAYDNGSEVoAVLhEAQQGh.ESuFVFLAsIVD.. 0 0 0 0 +3973 PF03396 Pox_RNA_pol_35 Poxvirus DNA-directed RNA polymerase, 35 kD subunit Mifsud W anon Pfam-B_3921 (release 6.6) Family \N 25.00 25.00 92.40 92.30 19.60 18.40 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.80 0.70 -5.38 11 79 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 48 0 0 66 0 275.80 64 95.56 CHANGED RcEpplsl-LsPulATFIKHGFNp+V+WPlLslGVVLsNTTTAVNEEWLTAlEahPTRKIFYsaspcILcpElsFCVYLpKo.QopscsYloLtDFDYYlIcsDs.php+l-KPKELcETLLHoFQEYR..hKNhQsIELlAFSSGTpIs--llppLo.FLslElFNREYsNlKsllsppFcohsPFIVlAPhG+LTFFlEcYsWhDhKoHlK-lLDaLEtsLluDl+SHplpss..hpD.s.ssSuYNssSGhLaVNDllTMslVNFFGCsuRLsoYH+FDhoplDscsFl+ALucAh ..REEsoIsV-L-PuLATFIKpGFNshV+WPLLNIGlVLoNToTAVNEEWLTAVEHlPThKIFYKHlaKILsREhGFhVYLK+S.QSEcDNYITLYDFDYYIIDpDs.olohVDKPsELKETLLHsFQEYR..lKSsQoIELIAFSSGTlIsEDIVs+LT.FLDlElFNREYNNVKsIlcs-FlupuPFIVIuPhGKLTFFlEsYSWhDFKSphKDIlDFLEGsLlAsIHsHhIcVu..ssD..ETVSSYNPpSGhLFVNDLhTMsIVNFFGCNSRL-SYHRFDMTKlDlEhFlKALS-Ah. 0 0 0 0 +3974 PF04490 Pox_T4_C Poxvirus T4 protein, C terminus Kerrison ND anon DOMO:DM04331; Family This family of poxvirus proteins are thought to be retained in the endoplasmic reticulum. M-T4 of myxoma virus (Swiss:O55698) is thought to protect infected lymphocytes from apoptosis and modulate the inflammatory response to virus infection [1]. 25.00 25.00 49.20 49.10 24.80 18.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.99 0.71 -4.45 6 35 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 17 0 0 33 0 141.70 45 64.07 CHANGED sppsFIlsslccuVYusGHhsYhEhSs.Nlshl-slPpCu+pIoluVSCDp.ssshp.atcppphpcs-lpIslphDoSCl+ahShshSlps.Cp++Louhup........-pLsCstlcsppHs+YLKoCssspFDR.haKpYh.HQ+salsKlhh .pspsFIlssscpshaGssH.sYlEhSs.Nsus.-slPcCS+plplSV.CDQ.suslc.apchpphcssslpIslKhDoSCIcaluhshShhNECp++Looh.p........-pLoCsuhchps+sKYLKTCossKFDRpsaKpYh.+p+shhsKs..L.... 0 0 0 0 +3975 PF04491 Pox_T4_N Poxvirus T4 protein, N terminus Kerrison ND anon DOMO:DM04331; Family This family of poxvirus proteins are thought to be secreted or retained in the endoplasmic reticulum if the protein also contains an additional C terminal region (Pfam:PF04490). M-T4 of myxoma virus (Swiss:O55698) is thought to protect infected lymphocytes from apoptosis and modulate the inflammatory response to virus infection [1]. 25.00 25.00 28.90 31.10 18.80 17.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.27 0.72 -4.13 5 44 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 24 0 0 36 0 46.50 59 24.35 CHANGED TWclcIGLCIps.cDF+up+TGC..+hspGPGGLITEGNGFRIFsHD-C .TWtLKIGLCIhA.KDFYscRTDCSVHhssuuGGLITEGNGFRllhHDpC.. 0 0 0 0 +3976 PF03295 Pox_TAA1 Poxvirus trans-activator protein A1 C-terminal Mifsud W anon Pfam-B_4259 (release 6.5) Family \N 20.70 20.70 21.10 93.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.95 0.72 -4.32 10 57 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 41 0 0 35 0 62.90 71 41.95 CHANGED uMIKopVALREEPKloLLPLVhYccPEcVlslINsLRsKEGlYGoCaacEccpsIcloLRSLl .SMVKSHVALREEPKISLLPLVFYED.EKVINsINhLR-KDGVYGSCYaKENuQhIcISLRSLL 0 0 0 0 +3977 PF03355 Pox_TAP Viral Trans-Activator Protein Finn RD anon Pfam-B_3956 (release 6.5) Family These proteins function as a trans-activator of viral late genes. 25.00 25.00 245.00 244.90 19.50 19.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.84 0.70 -5.02 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 42 0 0 35 0 253.90 80 99.79 CHANGED MSLRIKIDKLRQIVTYFSEFSEEVSlNVDsuSslMYIFAoLGGSVNIWoIVPLsSNVFYDG-sNpVFNLPVLKVKuCLCSFHNDAVVoIEPDl-NssV+LSSaHlVSVDCNcEslPHRTsTuISLGIsQKKSYIFNF+KYEEKCCGRTVlHLDLLLGFIKCISQYQYLTVsFc.DKNLlLKTPGo+DTFVRcYSMTEWSP-LQsYSFKIAIsSLNKLRGFKKRVlVFEoKIVMDsDDNILGMLFRDRlGoY+VNVFMuFQD MSlRIKIDKLRQIVAYFSEFSEEVSINVDSsDpLMYIFAALGGSVNIWAIlPLSASVFYcGs-NhVFNLPVSKVKSCLCSFHNDAII-IEPDLENNLVKLSSYHVVSVDCNKELMPIRTDToICLuIDQKKSYVFNFHKYEEKCCGRTVIHLEhLLGFIKCISQaQHLsIhFK.DDNIIhKTPGNpDsFSREYSMTECSQELQKFSFKIAISSLNKLRGFKKRVNVFETRIVMDsDDNILGMLFSDRlQSFKINIFMAF.D. 0 0 0 0 +3978 PF04441 Pox_VERT_large Poxvirus early transcription factor (VETF), large subunit Finn RD anon Pfam-B_3920 (release 7.5) Family The poxvirus early transcription factor (VETF), in addition to the viral RNA polymerase, is required for efficient transcription of early genes in vitro. VETF is a heterodimeric protein that binds specifically to early gene promoters. The heterodimer is comprised of an 82 kDa (this family) subunit and a 70 kDa subunit. 25.00 25.00 242.20 242.00 16.70 16.70 hmmbuild -o /dev/null HMM SEED 700 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.39 0.70 -6.37 9 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 0 0 60 2 696.20 72 98.19 CHANGED hYhVsPQLVlLVs+sQcIc+sLYLohYshID-p..SslY.YFl+saLch..ppPcll+RHILLTL+lpQlKGYlpsLLslp-DIIIYSHKNNLEYSYVDNTIFNPFs.TQ+KTLI+sD...uFLYNlYssACDFLVlWVApAs..DTslsEhGSY........EEVDsNILKFEp+LlpsFspLDL-holpSKFNNIFRTNL+.TGL+sIlpp.....p...sh+hLlh+oDEaFIshoGN+FlLsD.......EpLNLSIWDssssLAISSDGcTlhlNsVcLFT-lls-.slQMERIKu...DlTYKlaLsTPITS+lKLDIETSFIFlET.ATNNILLSsDKKISIILAKNHISIKVKNaIPNIEKYFTFLllulNphFNsVQpSuDFTKlETlYWSRICQNTKsKNRKPVIlsSLDss.MpKlSDNFY+Scp+....................EVFlNsNGlMFoChDPhG+YNslGFLuIFY+Lp+..hCIPCCFL+sQuHo-TFpSCVapc-.lpcshlsPYILNFGKlV.TpSKlSFLPIlFssahNcshpIshEpDNKRL+tTsGYaVV+uCpss....IhRlRThsDIIpFVNpsssILIssDhVYFPM....sh.chsN...+laILIQEIVHElVhl+KptcpDtIthh.ssps+...L+-hFPhposphsItp-sGlsLTTDGFalDGchFspcLSopassFhcNlospsts.uKYFu.lFKYVl.....T-sh-hFIKTWlINIMlphGhssp.st..hhstLEKYYsp R.YIVSPQLVL.VGKGQElERALYLTPYDhIDEK..SPIY.YFL+SHLNI..ppPEIVKRHILLTLRMoQLKGYLGNLLDI+DDIIIYSHKNNLEYSYVDNTIFNPFVaTQKKTLlKsD...SFLYNVYPGACDFLVIWVARAs..DTSIPEFGSY........EDVDNNIlKFEThLhEVFPpLDLDhTVESKFNNIFRTNLKhTGLKKIIQ+Vp..-LDl.NYKSLLS+hDEaFINMTGNHFILND.......EpLNLSIWDhDGTLAlSSDGDTlMINNVKLFTDLV.SDIDTQMERIKG...DITYKVaLuTPIsSRIKLDIETSFIFIET.ATNNILLSoDKKISIILAKNHISIKVKNHIPNIEKYFTFLVIAINuMFNSVQKSuDFTKVETVYWSRICQNTKNKNRKPlIIs.LDss.MKKISNNFY+SDcK....................EVFINDNGIMFTCMDPLGKYNpVGFLNIFHchpK..hCIPCCFL+DQSHcsTFSSCVHQhD.V-KcIlSPYILNFGKVV.TESKhSFLPIIFDsFLNDGMoAshEQDNKRLKETSGYHlVRCCsG-....sIVRLRTsSDIIQFVNEDKNILIsNDMVYFPM....NsoDIGp...KIHILIQEIVHEVhIVKK+EooD+IDFFPPNYKL...LKDLFPKQThpssIpSDuGMsLTTDGFYIDGKLFN-DLSSKYVTFTKNVhsS..DuV.uKYFSPLFKYVI.....oEAKDRFIKTWhINIMl+MsVDPss....IIPsLEKYYPN. 0 0 0 0 +3979 PF04947 Pox_VLTF3 Poxvirus Late Transcription Factor VLTF3 like Finn RD, Iyer L anon Iyer L Family Members of this family are approximately 26 KDa, and are involved in trans-activator of late transcription [1]. 25.00 25.00 35.80 35.50 24.90 24.70 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.96 0.71 -4.75 23 135 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 120 0 1 101 263 169.80 31 59.54 CHANGED pthppssphs...hs+h.Ha+ssLpphQu+pssp........lsscllcplccclccpplp....................hsclspshVtpaLKcLs...h.sKpYcsshhIhphlTups...ssplspch-pplhchFcplhphapc..............tpcpshlsYsalLa+lhchh.....uhsppLssh.hlKstsKhpspD..placclhscLsWcahtt ........................p..pcsshas.s+h.Hh+psLc+h.u+p...........hss-llspLhchhcKppIs..................hs-lstshVpshLKthc...h..pKtYchVh.Ihsplpscc.....slosch.pclhclFcclhhhhpc.................ss.pphlsYSahL.Klh-lh.....uhscpLps..plKshsKtsspp..hlWcphhschphchh..p.... 0 1 1 1 +3980 PF04498 Pox_VP8_L4R Poxvirus nucleic acid binding protein VP8/L4R Kerrison ND anon DOMO:DM04340; Family The 25 kDa product of Vaccinia virus gene L4R is also known as VP8. VP8 is found in the cores of Vaccinia virions and is essential for the formation of transcriptionally competent viral particles. It binds both single stranded and double stranded DNA and RNA with similar affinities. Binding is thought to involve cooperative interactions between protein subunits. The protein is proteolytically cleaved during viral assembly at an Ala-Gly-Ala site. Possible roles for VP8 include packaging and maintaining the DNA genome in a transcribable configuration; binding ssDNA during transcription initiation; and cooperation with I8R protein to unwind early promoter regions. VP8 may also function in either transcription elongation or release of mRNA molecules from viral particles [1]. 25.00 25.00 26.10 174.20 20.80 19.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.34 0.70 -5.03 8 58 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 43 0 216.10 66 86.45 CHANGED AKlKFPRShLSIaplVPRsMTRYELcLlpoEsITGsVFTTsYNl+KNLGls-.-KLThpsIEcYYLD.sN-VLTLMlsNTslscl..us.R++uRR.pKNPVlFRpGSsPLlllFcSRKKlsIY+Ecpcpss.sooYspIssslALls+YushpLLDVHoPSusLpLsAVYGh.sscpEL+KLuosKElEsYQsos.LpEPl+LsDFppLF-slKKsIsLTNlsh AKsKFPRShLSIFNIVPRTMoKYELELlpsENITGAhFTThYNIRpNLGLGD.-KLTIEAIENYFLDPNNEVhsLlINNTDhouV...lPKKuGRR.NKN.VIFRQGSoPlLhIFEoRKKlNIYKENh-SA..sopYo.IGDNhALISKYAGlslLsVaSPSoSM+LNAlYGF.TsKNcLcKLSoNKELEsYSSoP.LQEPIRLNDFlGLh-CVKKNIPLTsIP.s 0 0 0 0 +3981 PF04395 Poxvirus_B22R Poxvirus B22R protein Mifsud W anon Pfam-B_3510 (release 7.3) Family This is highly conserved C-rich, central region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses. There are three pairs of conserved cysteine residues. 25.00 25.00 42.70 42.30 22.10 16.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.82 0.71 -11.44 0.71 -4.81 14 108 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 32 0 0 94 0 176.60 39 11.78 CHANGED hssC-ssShphMspssulPcpFNsTLpphulpssssops..aYtC.hl.psss-C....uls.hhsssohpslsp..pppsppppRcpco....hpshpp-DhhChaptYshsppts-.......Csssp....................cppppccpscss.ssp.sclshhutccLGs..+shIPpcssclQlGspG.t-us....VsGDssIYppVKpchcptlpshhssss.....ssshPp .............husCc.csSM..sLMsuVlssspEFNsTLcpl.G.lpsspssss...YYtC.hl.sssusC.phlsLsphlsshThssl.p...sshsssssR+pRs...................hp..ssc-lpClYcsYGlscp..sc.......C........................................hpps+ccp.scst.........pthcLhppupc-Lth..csVIP+uTT+hQVGupG.ssGs....VsGDss.apsVKschp.hhcchhPpls....hsh...cp........ 0 0 0 0 +3982 PF00550 PP-binding pp-binding; Phosphopantetheine attachment site Bateman A anon SCOP Domain A 4'-phosphopantetheine prosthetic group is attached through a serine. This prosthetic group acts as a a 'swinging arm' for the attachment of activated fatty acid and amino-acid groups. This domain forms a four helix bundle. This family includes members not included in Prosite. The inclusion of these members is supported by sequence analysis and functional evidence. The related domain of Swiss:P19828 has the attachment serine replaced by an alanine. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -3.71 163 41592 2012-10-02 01:16:24 2003-04-07 12:59:11 20 3219 5404 127 13580 37590 2935 66.40 23 7.49 CHANGED ptltp.hlsphl..t......t.ppl...sss.ssh....h.c.hGh.DSlthhplhsplpcp.h......h..plshs..plh.p.tsolpplsphl ........................................ltp.lhsclL.......s........h..ppl............shc..ssF..............h..-..LG...s..DSLtslclhs..p.l........c..cp..h..............u..l.........pl.ssp............plh...p...hs.....Tltslsth......................................... 0 3350 7619 11106 +3983 PF02503 PP_kinase Polyphosphate kinase middle domain Mian N, Bateman A anon Pfam-B_2701 (release 5.4) Domain Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules. 25.00 25.00 27.60 26.70 22.40 22.10 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.22 0.70 -11.20 0.70 -4.82 173 3692 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 2906 6 697 2965 1012 200.80 31 31.95 CHANGED pppppalcpaFpcplhPlLTPlulDsuHPF.PhlsNtoLslslpL..c..............s......s..................................................t........pt..h.AhlplP..psl.PRhl..pLP........ttts...................alhLEslIptalspLFsGh...clhssh.FRlTRsuDlpl-.p..--.sc.DLlptlcppL+.pR+hG..psVRLElsss.hspplhchLhc..........plplsc.p-lapl.sG..lsLspLhplss.hs...pscLcas.sasPt.hs..ttl .............................................................................tpppalcpaFpcplhPllTPlul..D.s.s.HPF..PhlhsculslAVpl..p....p.........tps...................................................t..........................p.....tp..h.AllplP...psl..PRhlpLP.............................schs.t.th..........................alhL-sllpt..alccL...Fs..Gh...clhssapFRlTRsuDlpl-..p....--...s..c...sLhptlpppL.ppR+h.G..psVRLE....l.pps..........h.sp....t....h....h.c..hLhp..........chs.lsc.pDlahs.sG...lsLscLhplss.ls......+scLph..PasPhh....h.................... 0 208 457 598 +3984 PF03012 PP_M1 Phosphoprotein Griffiths-Jones SR anon Pfam-B_1336 (release 6.4) Family This family includes the M1 phosphoprotein non-structural RNA polymerase alpha subunit, which is thought to be a component of the active polymerase, and may be involved in template binding. 20.60 20.60 21.50 21.50 20.40 19.70 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.94 0.70 -5.06 11 932 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 30 6 0 723 0 217.20 76 99.90 CHANGED MSKIFVNPSAIRAGLADLEMAEETVDLVN+NlEDsQAHLQGEPI-VDuLPEDhp+LpIsDspssphsspssppEtup-EDFYhsEupDPhlPFQSaLDslGhQIVR+MKTGEtFFKIWSQusE-IlSYVssNF.PtPssKsoc-KSTQTssccsppssptosssp+-cpSspsshsup.-sSGPsuL-WusoN--D-uSlEAEIAHQIAESFSKKYKFPSRSSGIFLWNFEQLKMNLDDIV+pA+slPGVsRlAc-GsKLPLRCILGaVA.spSKRFQLLVsoDKLuKlMQDDLN+Yhup .......................................HLQGEPIEVDsLPEDM+RLpLDDtKPSsLGEhA+sGEuKhcEDFQMDEGEDPuLLFQSYLDNVGVQIVRQMRSGERFLKIWSQTVEEIISYVsVNF.PssPG+SSEDKuTQTssRElKKET.hsuso.QR-SQsSKA+MsAQ.oASGPPALEWSATNEEDDLSVEAEIAHQIAESFSKKYKFPSRSSGIFLYNFEQLK..................................................................... 1 0 0 0 +3985 PF02818 PPAK PPAK motif Bateman A anon M Greaser Motif These motifs are found in the PEVK region of titin. 21.00 21.00 22.80 21.00 18.10 20.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.44 0.72 -4.04 33 559 2009-01-15 18:05:59 2003-04-07 12:59:11 10 58 28 0 180 495 0 27.60 59 1.68 CHANGED PPsKVPEsPKKsVsEEKlslslPKK.Es ........PPAKVPEVPKKsVPEEKlslslPKKsEs... 0 15 15 26 +3986 PF01326 PPDK_N PPDK_N_term; Pyruvate phosphate dikinase, PEP/pyruvate binding domain Finn RD, Bateman A anon Sarah Teichmann Family This enzyme catalyses the reversible conversion of ATP to AMP, pyrophosphate and phosphoenolpyruvate (PEP). 20.00 20.00 20.10 20.10 19.90 19.90 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.06 0.70 -5.40 101 5203 2009-01-15 18:05:59 2003-04-07 12:59:11 14 40 3545 11 1535 4567 2865 323.40 27 40.31 CHANGED tsssth..h.GGKussLucht.............ptGl.sVPsGFslos.ss..apthh................st..l.t.l.th.t.....................ts.tthtthsttlpphlhp.sthP...spltpt...ltps..........hpp..hht.........................s.lAVR......SS.......................AssEDhsps..SFAGphcoh..Lslpu............cplhpul+psauShass+AhtYR........tpps.hst.............tpsthAVlVQpMltu........p..soGVhFTtsP..........hsu.ppst.hhlpushG.LGEslVuGps.ssDpahl..pp.th..........................stp.h.hh.........................................tttpt...sLs-...............................pp.....ltpLsph........upclEpt..aGt......P..D..IEWAlss.......sp...lalLQuRPloshtptsttt.tht.......................lpst .................................................................................................................hp...th...l.GGKsAsLuEMh.................................................thGl.s...VP.s.G.FslTs.cu..hpp..ah....pp........................................................st.....l.p.p.l.hphhp....................................................l.ls..s..hs..th.t.t..s.hsthhp.hlls..hshs........sch...ps........ltpt..............hsp...hhs............................................sshs.lR..........oS.......................................................usuED..h.pt.........uh.AG.p.coh........lslps.......................................................h-pl..htAlctVauShhssRAlsYR..............htps..hsc..................................p.huhuVsVQp....MVhu.........................s....uoGVhFT.hss..........toG.tp..p..h......s.......h...hs.G..GE...sVV.s.Ghh...sP.cp...hh.h...t............................................................................................................................................tph......slps............................................................................................................................................................pt....hpcLs.chutplEpH....Y.tc........................s...DIEash-s............................Gc......LallQsRst...ppstttt.................p................................................................................................................................... 1 586 1082 1362 +3987 PF01239 PPTA Protein prenyltransferase alpha subunit repeat Finn RD, Bateman A anon Prosite Repeat Both farnesyltransferase (FT) and geranylgeranyltransferase 1 (GGT1) recognise a CaaX motif on their substrates where 'a' stands for preferably aliphatic residues, whereas GGT2 recognises a completely different motif. Important substrates for FT include, amongst others, many members of the Ras superfamily. GGT1 substrates include some of the other small GTPases and GGT2 substrates include the Rab family [2]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.44 0.73 -7.42 0.73 -4.39 143 2973 2009-09-11 15:11:22 2003-04-07 12:59:11 17 39 322 678 1957 2958 11 30.30 27 27.00 CHANGED cELphspphlptsscN.assWpaRphllppht ..........pELphspphl..pp..sscN.a.ssWpaRphllpp........... 0 614 1031 1549 +3988 PF02541 Ppx-GppA Ppx/GppA phosphatase family Mian N, Bateman A anon COGs Family This family consists of the N-terminal region of exopolyphosphatase (Ppx) EC:3.6.1.11 and guanosine pentaphosphate phospho-hydrolase (GppA) EC:3.6.1.40. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.61 0.70 -5.22 10 5020 2012-10-02 23:34:14 2003-04-07 12:59:11 11 15 3248 19 1263 3783 1832 281.70 27 64.20 CHANGED llschssGt....hphls+pKppV+Lu-u.hspss....hLsp-uhpRslssLpcFsElhpsass..pplpsVATuAlRcAhNu--Fls+lp+thG.....hsl-lIoGp-EARlsahGVspsh..scuctlVlDIGGGSTEllhGps.....tcsptlhSLslGsVplscpahscD..slspcphpth+chlcph..LpchssphchtG....htpslGsssThcsltplpssps.........ttpITtcclpphlcclhphsp.....cchcltGlsp-RAsllsuGslIlpulhctLphc.slplSssuLREGllhuhlhpc .............................................................................t...stt......h.p.hl.s.ph.+p..hVRLupG...lspss...............tLsp-uhp.R.....u.hpsL.p.hFsch.h..p.....s.h..s.s......pp..l..p..sVA.Tu.slR..p..AtNu.s..-Flpcspc.hh.G.........h.sl.....cl.IoGpEEA+..Lh.a............h...G............l.......t.......p.....s.....h...............s.........p..........s..........c..............h........L.....V..l.D.....IGGGSTE..ll.hup.s.................................tp.stt....h...h...S.h..s..h.GsVp..h...t.....c.p...a.......h.....s.....s...s..............hs...p..c.p....h...p...t...h....pt........t...sppt..............l.p...s.....h...s....t.....p....h.ph.ts......................h.hs.l...G.....s...u..s.T....l.p.....s.....l...t.p.l...h...h..t..h.s.........................................sttlo.hpclpp....lhp...cl.h...p.hst...............hpc..h.p.l...G...l..s.t-R.tsl..lsuGhslltsl...hctl.....s...h.........p...p......h......tlu.ss.uLREGllhphh...h........................................................................................................................................................................... 0 378 804 1077 +3989 PF04403 PqiA Paraquat-inducible protein A Kerrison ND, Finn RD anon COG2995 Family Paraquat is a superoxide radical-generating agent. The promoter for the pqiA gene is also inducible by other known superoxide generators [1]. This is predicted to be a family of integral membrane proteins, possibly located in the inner membrane. This family is related to NADH dehydrogenase subunit 2 (Pfam:PF00361). 21.50 21.50 21.60 21.50 21.00 21.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.76 0.71 -4.66 167 3875 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 1245 0 618 2356 195 157.40 30 72.64 CHANGED ulpcs.hAlhluullhhlsANhhPlhp..hph...hGt.ppssTlhuGlhtLh.ppGthslAhllhhsSlllPhh+llslhhLhhsh..pht...............h.t..htpth.lh+hlchlscWSMlDlFllullluLlclss........luslpsGsuh...hsFuhllllohhushphDsRhlWcthp.t.pp .................................................lppshAhh.ls.ulllhl.ANl..hPlhh..........hpl.............hGs...p.psT..lhsGlh.L.h.pp...u...s...h.sl.As..llFlsolhlPhhpllslhh...Lhhss...c.ht.h...........................h.shppp......lhchl.ph.ltcWuMlD............l.F.............l.lulhluhl+..hts...................................hhs.ltsG.su.h...hhFshhllLThhus.phDsRhlW-ph....t..................................................... 0 132 279 449 +3990 PF01502 PRA-CH Phosphoribosyl-AMP cyclohydrolase Bateman A anon Pfam-B_782 (release 4.0) Family This enzyme catalyses the third step in the histidine biosynthetic pathway. It requires Zn ions for activity. 21.50 21.50 23.60 23.40 20.90 19.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.57 0.72 -4.35 168 3692 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 3587 2 1007 2642 1827 75.10 51 36.77 CHANGED MlAaMNcEAlppTlpTGpspaaSRSR.pcLWpKGETSGphQpVp.clphDCDsDslLltVct......tGs..A.CHT......G.pcSCFap ............MluaMNpEALp+TlcoGc.ssaaSRSR.pcLWpKGETSGphQpVh.s......lphDCD.sDoLLlhVp...........hGs....s.CHT.......G.ppSCFh.t............. 0 326 664 868 +3991 PF01503 PRA-PH Phosphoribosyl-ATP pyrophosphohydrolase Bateman A anon Pfam-B_784 (release 4.0) Family This enzyme catalyses the second step in the histidine biosynthetic pathway. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.62 0.72 -3.70 53 4463 2012-10-01 21:36:44 2003-04-07 12:59:11 12 19 3909 57 1077 3829 3398 91.00 30 42.54 CHANGED lpchccshttcttp.tPps..stphhsh........ptphlt.......EEh.sEhht.............Asp....ps.................................shsclsctlsDl.......lYhshGthsthG.......................l..c.hcslhcplccuN .......................................................................h..phtphltp.Rpps....pPpsS...aTspLhscG...........pplspK.lG..............E..Eu...sEssl...............AAp.......st............................................................................................s.cp..c.lssEsuDL.......lY..Hh..h...V...hl.t..p.tG...........................................................................................l.....s.hpclhppLppp................................................................... 0 327 690 914 +3992 PF03208 PRA1 PRA1 family protein Mifsud W, Bateman A anon Pfam-B_2976 (release 6.5) & Pfam-B_8147 (Release 8.0) Family This family includes the PRA1 (Prenylated rab acceptor) protein which is a Rab guanine dissociation inhibitor (GDI) displacement factor [2]. This family also includes the glutamate transporter EAAC1 interacting protein GTRAP3-18 [3]. 21.30 21.30 22.10 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.99 0.71 -4.88 58 779 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 305 0 497 744 4 144.50 22 68.28 CHANGED phptthus..hRPW.s-Fh.-hsphupP..pshu-htsRlppNLsYFpsNYhhlshhlhhhsLlh.pPhsLllh..hslhsuhhhlahhps................pslslhs+phsspphhh.................slhlsolh.llal..sus..ssslhhslshuhhllhhHAuhRt.sp.....h.ht-pcstths .................................h...thsshRsh.s-Fh...s..........p....phuhP..pshschpsRltpNLtYapsN...Yhhlh.hhlhh..h.s.llh......pP........hhllsh..hhlhs..s...hhhlh..hhps.................pslhhh..t..p.phs.spt.hhh..................................slh.hsuhh...lhhh....su.s..hhslh.hslshshh.llhhHAuh+...p..................s........................................................ 0 145 277 397 +3993 PF00697 PRAI N-(5'phosphoribosyl)anthranilate (PRA) isomerase Bateman A anon Pfam-B_247 (release 2.1) Domain \N 20.60 20.60 20.60 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.28 0.71 -4.76 23 3795 2012-10-03 05:58:16 2003-04-07 12:59:11 17 13 3604 9 968 2961 2133 193.90 32 66.13 CHANGED s+hsulpphpDlhtshsssu....htuhlhssssp+phs.cpuptlhpsss.hh..lVGVFhNpslsplhphhpphuLDllQLHGsE..st-htphlt..lPll+ththstsht.....ht.sppt.p....shhLlD........utpGGoGpthDWshlpphht...........................hpllLAGGLsP-NVspAlp.......pshGlDVSSGVEos...Gl.KDhcKlptFlps .................................................................................................................KlCGlop.p-spsAhpsGA.....shh.G.h..l..F...h.......p...S........R.............pV..........s.............h...c.p......A.......p...p............l...h...p...t.....h....s.....t......................hV.GVF..l..s........t...s...h......s.p.........l.h...c.....h..h.....p...p.h.s......L.shlQ.......L..H........G......s.......E..........s..t..ph.....h..........p.........t..........l....c...................t...........h....s........h.......h....p....s...lt..h...s..t..shp..................ht..h...p...p...h...tth...........DhhL.hDs............................................................sthGG.....oGpsF.D..Ws..l...l.ss.ht...h............................................................................................h.s.h.lLAG..GL...ss...-..N...ls..c..Alp.................hts....hulD..luSGVEss........Gh....KDhp+lpphhp.t........................................ 0 323 632 826 +3994 PF03967 PRCH Photosynthetic reaction centre, H-chain N-terminal region Finn RD, Bateman A anon DOMO_DM03113 Family The family corresponds the N-terminal cytoplasmic domain. 20.10 20.10 20.20 120.20 20.00 17.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.59 0.71 -4.58 21 85 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 78 111 27 89 105 137.20 45 53.31 CHANGED Ms.sGshhuahDlAplslYsFW.lFFAuLlaYLppEs+REGYPLps-.ssustsspu......hhslPpPKTFcLtcG+.shslPsspp-....psclshsposshsGuPahPTGsP...hsDGVGPuuaA.RpDhP-lshcGps+IsPLRls ....M.tsshhuahDlAQlsLYsFW.lFFAGLlaYL+pEs+REGYPL-s-..ss....s....phtstG.....hhslPpPKTFhLsc.Gp.TholPsspss....ptsls..hptsushsGuPhhPTG.sP...MlDGVGPuuaAsRpDhP-Lsh.cGps+IVPLRl... 0 6 14 17 +3995 PF00432 Prenyltrans prenyltrans; Prenyltransferase and squalene oxidase repeat Bateman A, Finn RD anon Pfam-B_130 (release 1.0) Repeat \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.07 0.72 -4.38 69 2497 2012-10-03 02:33:51 2003-04-07 12:59:11 16 40 482 411 1550 6750 154 43.70 26 14.26 CHANGED hshcphhpalhppQp....-GGasspsss............pscsttohhulsuLslls ............................pplhpalhp.pQp.......sG.Ga.s.s.+.ssp.................................hsDs..ha..ohashsuLpll............................. 0 520 866 1268 +3996 PF01080 Presenilin Presenilin Finn RD, Bateman A anon Pfam-B_789 (release 3.0) Family Mutations in presenilin-1 are a major cause of early onset Alzheimer's disease [2]. It has been found that presenilin-1 (Swiss:P49768) binds to beta-catenin in-vivo [4]. This family also contains SPE proteins from C.elegans. 24.50 24.50 24.50 24.70 24.00 24.40 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.44 0.70 -5.64 6 478 2012-10-02 13:41:03 2003-04-07 12:59:11 12 7 156 1 255 421 7 264.60 37 83.85 CHANGED cEEtsLKYGApHVIhLFVPVoLCMllVVuTlpolpFYsppsu.pLlYTPFscpo.osup+hLsSlhNuLlhIuVlVlMThLLlVhYKa+hYKlIHuWLIlSSlhLLFlFohlYLpElh+sas.......lshshsTlhlhlhNFGslGMhsIHWKGPLRLQQhYLIhluALMALVFIKYLPEWTsWhlLssISlWDLVAVLsP+GPLRhLVETAQERNEslFPALIYSSs.hhhhVs........sss.t..posc.s..............................ssptps.........phsp.pps.ptpcDsusoptpp.....o...............ppsuhhhs....cphttph.-lps....s...p....pt..........pEERGlKLGLGDFIFYSVLlGKAuuo..GDWsTTIACFVAILIGLClTLlLLAla++ALPALPISIshGLIFYFuTchllpPFh- ................................................................................lh.PV.hs.Mh...h...s..h.h......h........................................................t..................................h.......h.h.............t...................t................th..t....uhhsshhhhshlhhhThhhlh.Lah.....hphhthlhsahhhushhll.hhh.sh.hh....lhptht.......hshD.h...oh.h..hhhhN.hu.slG.h.hs.l.a..h.t..s..sh...l....pQhYllhhuslhA..h.......hhphlP-...WosWhlLhhhulaDlhAVLsPhGPL+hLl-hAppRsc...PuLlYput......h....................................................................................................................................................................................................................................................................................................................................................................................................................................ttslKLGLGDFlFYSlLlu+....Au............sh.s.hhsshl.ullh.GLshTLhlLuh.h.p...c...sLPALPlSlhhuh.....hhhh.st.hh.................................................................................................................. 0 93 133 193 +3997 PF03991 Prion_octapep Prion_octopep; Copper binding octapeptide repeat Bateman A anon Bateman A Repeat This repeat is found at the amino terminus of prion proteins. It has been shown to bind to copper. 12.10 0.50 13.10 0.50 11.80 0.40 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.22 0.75 -5.16 0.75 -2.73 5 127 2009-09-13 17:14:17 2003-04-07 12:59:11 7 9 13 0 5 130 0 7.80 94 27.84 CHANGED PHGGGWGQ ..PHG.GGWGQ. 4 0 0 0 +3998 PF03063 Prismane Prismane/CO dehydrogenase family Griffiths-Jones SR, Bateman A anon Pfam-B_2956 (release 6.4) Family This family includes both hybrid-cluster proteins and the beta chain of carbon monoxide dehydrogenase. The hybrid-cluster proteins contain two Fe/S centres - a [4Fe-4S] cubane cluster, and a hybrid [4Fe-2S-2O] cluster. The physiological role of this protein is as yet unknown, although a role in nitrate/nitrite respiration has been suggested [1]. The prismane protein from Escherichia coli was shown to contain hydroxylamine reductase activity (NH2OH + 2e + 2 H+ -> NH3 + H2O). This activity is rather low. Hydroxylamine reductase activity was also found in CO-dehydrogenase in which the active site Ni was replaced by Fe [2]. The CO dehydrogenase contains a Ni-3Fe-2S-3O centre. 21.70 21.70 28.20 24.40 20.20 19.50 hmmbuild -o /dev/null --hand HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.80 0.70 -6.13 251 2395 2009-09-15 11:55:15 2003-04-07 12:59:11 15 14 1486 49 664 2022 69 468.60 34 95.85 CHANGED MaChQCpps....ts.suC.phs.................GVCGKss-sushpDlL.lhshpGluthupc.u+ch..........tspphhhpuhFsT.....................lTNssFcspphlphlpcshsl+...pph...............................................pshpphstpss.........phhlsthtpp..sp..DlpuLcphlhaGlKGhs.........AYtcH............AhhL.Gtt-c-.lhphhpcshssh..ss.csls......tllshslcsGths.hpsMtlLDpussttaGsP....ps..TpVslGlh.ps..ssIlloGHDlpshc.LLcQTcspGlslYTH.....GEhLPupsYPsh+..pYpH.......hsGsaGsuWppQpp-FsuhsusllhToNClhPs..h.p.sp...Y+s+laTTuslua...PG.spHI...p.............tpt.....t......................cDaotlIcpAlcshs.p.............c.............ps.p......lhs......GFu+psllshu..............c.......................tll-A.....................................................VKsGsI++hhlluGCDGt+ssRs..Yas-hAcpL.PpDsllLTsGCuKa+a...s+hsLGsl.........G.G...................IPRlLDhGQCNDuYuhlhlAhtL..AcsF......s.s.-lN-LPl.....uhsluW..aEQKAVslLLsLLsLGl+sl+LGPohPuFlSPsVhplLs-s....a..slss..hs.slEpDhpthh ..............................................................................................MaChQCp........ssuCpht.................GhCGKss-sushQDhL.lhshp..G.lu.sas..hc...uRcht.............................scphh.puhatT..................lTNsNFcs.phlth..h.tp.t.hthcpth.....................................................................................................tshtph..tpst.......ph...h..t..ttt...tt..shhslc.hhhhsh+sh...........ahcp............uhh.....sthc...........p-lh.t.....hpp....hh.s.hh..ss....shs..........llthshchGths.hpsMtlLDtupTtt..a....GpP.............ps...opVslt....sh.ts..hsIlloGHDLpslttLLcQ...o...c...u....p......G....l....slYTH......GEMLPupsYPth+..+apH..................hsGshGsuWtpQph.FsthsusIlhToNClhss..s.......YpsRlaTpu.suh......PG.s.p.H.l...p...............................t....................cDFs.lIppAhph.s.......t...t...hhhshshs....................................................slhuhActll-h......................................................................VppGpl++hhlluGCDuhpst+p....Yas-hAppl.PpDslILThuCuKa+a...s+.hshGsl..............s.G...................lPRllDhGQCNDuhuhhhlAhtL..uchh.............s.h..slN-LPl....shsluW...aEQKAlslLLs..LLtLGlpsIhhGPshPsFhosslhtl.Ls-p.......a.....sltsls..ss-pDhpth.............................. 0 274 478 585 +3999 PF00484 Pro_CA Carbonic anhydrase Finn RD, Bateman A anon Prosite & Pfam-B_9319 (Release 8.0) Domain This family includes carbonic anhydrases as well as a family of non-functional homologues related to YbcF. 22.10 22.10 22.10 23.40 21.90 21.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.89 0.71 -4.22 497 5590 2009-01-15 18:05:59 2003-04-07 12:59:11 14 20 3442 146 1635 4073 995 149.00 28 64.39 CHANGED slhluCuDSRls..sphlh.shtsG-lFVlRNsGNlls................................stss.huul-a.......AlphLpV.ccIlVhG....HssCGulp......su..........h.............................shlsp....hl....pthpsshpp.......htt..tt......................pph..........ph.............ctNVhpplppLppp..Phlp....pthtpup..........lplpGhhYclps......Gpl ...................................................lhluCuDSRls.....sphlh..s......h......t......s......G..-.lF.Vl..RN..suNlV...........................................................stsshu....ul.pa.....................AVp.....h.L.p..V.c.......cIlVhG....HssCGulp.......us........................................hp.........................s.................hshlpp..a.l..pp.h.p.ss.hhp.........hpph.h.tt..........................................s.pph....th......hc.sVhppltpLtpp..shlp..................t.t.h.t.c.sp.............................ltl+GhhYslpsGp................................................................. 0 480 1010 1381 +4000 PF01619 Pro_dh Proline dehydrogenase Bateman A anon Pfam-B_1092 (release 4.1) Family \N 27.00 27.00 27.10 27.00 23.80 24.20 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.91 0.70 -5.44 15 3379 2012-10-01 19:29:00 2003-04-07 12:59:11 13 21 2877 20 991 2741 1035 291.10 31 35.23 CHANGED lsG-sltpslcpscpLc....ppthphohDhLG.........Etspsss-Apphhhshtpslpslucpstst...........thhthPshph.phstLh.phs.....shpp-h...shpphlt+lcslsptApchslslplDAE-pphhpho......lclhtc..h.p.tppshsslGsslQAYL+csspslctlhslAp+pshh....htlRLVKGAYh-uEtp+Ap..Gh.s.ssaopcspoDhtap...shschlhtscsh......lastlAoHNscolAhshplscppt.hs...spaEFQpLaGMu-tlpppLspps......hssRhYsPaGsh...pchluYLlRRLhENsuNsuFsp+thshp ....................................................................................................................................Gpsltpshtthcthp....ppGhphoh.D...hLG...................................Essh.stt-.Apthh..tpa.....phlc...slspt.s....s.....................................s.....h....h......photlp..s....+.h..s.........phphct..........shpph.h...s...p.l...pp...l...s...th...A.........c..........p...h..........s...l.t.lsIDAE-tsc...L....-ho...........................lclhcc.....h..............p...........................h.......t........s.......a..........s...s.......l.G..hVlQAYh...+cs..slchLh.chApcp....tht.....lhlRLVKG.AYh-sEhth...up..........G..........h..t........h.ssaoc...Kht.o.Ds..sYh......................shh....c....h..l..L..t...s..sph.......................lasthATHNspol..uth....h.p......h....s..........t...p...p...............h....h............................................................s..p...............aE..F..QhLaG........Mu-s....lhc....p....l..ss..ps............................hss.RlYsPh...Gsh...cs...hluYLlRRLhENs.ANsuFsp+hhp..t........................................ 0 297 600 836 +4001 PF00160 Pro_isomerase pro_isomerase; Cyclophilin type peptidyl-prolyl cis-trans isomerase/CLD Sonnhammer ELL, Wuster A anon Prosite Domain The peptidyl-prolyl cis-trans isomerases, also known as cyclophilins, share this domain of about 109 amino acids. Cyclophilins have been found in all organisms studied so far and catalyse peptidyl-prolyl isomerisation during which the peptide bond preceding proline (the peptidyl-prolyl bond) is stabilised in the cis conformation. Mammalian cyclophilin A (CypA) is a major cellular target for the immunosuppressive drug cyclosporin A (CsA). Other roles for cyclophilins may include chaperone and cell signalling function [1]. 20.80 20.80 20.80 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.41 0.71 -4.01 168 13237 2012-10-02 15:38:38 2003-04-07 12:59:11 16 154 4706 283 5300 10643 4524 159.30 35 60.14 CHANGED hlph..ps...........G.plhlc....L.hsctu.PhsspNFl.p.........Lsp..................pG..........................aYc..sshFa......R.l..........lps...F........hl...........Q................uGc...........................................t......ss.............s........................h.sEh..........................................................................p............pt..GsluMA.pt.....................so....s.uSQFFI......sh..............................ts.........................s.......tL..D........s..........ta.slFGcVl..pG...h-.llcpIp.p........s.........................lhIhpstl ....................................................................................h...lpTs......hG.c.I.t.lc...L....a..s..c...t......u.Pp.Ts.c.........N....Fh..s...........L..sp..........................pG.................................................................aY.c....Gs.h.....FHR...V....................................Iss...........F...............................Ml................Q............................G.GD...............................................ss...sst.........................................hut............p....................hpsE.h............................................................................................................tlpp...........tt..GsL.u.MA..psG..........................Pso..................s.GSQ......FFI..sh........................................................................................s.s.......................................ss...........aL..D........s....................pa..sVFG.c....Vl..........cG..............h-...V..l.c..pIp.p..........................stsst.............stPhps..lhItpst................................................................................................................................................................................... 0 1874 3188 4422 +4002 PF00235 Profilin profilin; Profilin Finn RD anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.69 0.71 -4.00 72 1081 2012-10-02 21:07:43 2003-04-07 12:59:11 14 8 440 49 440 984 4 121.20 33 90.65 CHANGED SWQuYVD.....ppLhus......Gp.lspAAIlG.p.D...G.u...........lWA...pSss...F.plps....................pEhssIhss.Fpp......s...sslhssGlpluGpKYhslp..u-.....sps..lh.uKKu.psG.....lslhKTspAllluhYs-s...................hpsGp.sspsVE....cLuDYL ..............................................................uWpsYlD......pL.hss..........up..lssAAIlG..pD.....G..s.........................VW..A.pSss.....F.phps.............................pElsslhss.Fs-........s......spltssG..lhluGp..........KYhllp.....u-.............sss....lh..u.....K.....K..u....suG...........lsltKT..s.pAll.lu.hYcEs...................hpsG.p.sshhlcplucYL................................................................................... 0 172 257 346 +4003 PF02161 Prog_receptor Progesterone receptor Mian N, Bateman A anon IPR000128 Family \N 24.10 24.10 24.10 24.80 23.90 24.00 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -13.11 0.70 -5.68 5 113 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 60 0 25 104 0 382.60 60 62.18 CHANGED MTElpuK-sRAPHsAGussSP..s.pP..tRpDussh.uSQsSD..........s.utsSulPlu.LDuLLFsRpsQup-.P-EKsQ-pQshsDVptA.sclEAocs+GusSsRP..PcpDsssLDSVLDTlLAPSGPuQupsS.PAhEstoSWCLFGPElPEDPRus.uopthloPLMSRPEuKAGDuuGhuuupKs.P+ulSPSRQ.L.PssGustWPGAsuKsusQsAsluVEE-uGhcAEGSsGPLLKGKPRsLuGsuuGGGAsAsAPGs.sGGhs.VPKEDSphuAP+sSLsEQDAPsAPGpSPLATTshDFIHVPILPLNsAaLAARTRQLLEuE.oYDGGA.....FAPPRSSPSAsssPVPuGDFPDCuYPP-u-PK-DuFPlYGDFQPPALKIKEEEEGsEAAuRSPRPYLuAGAuuAsFsDaP......PPhPPRAPsSRPGEuA...sAssususSssSSsGPoLECILYKAEGAPPsQGPFAsuPC+sPuAuuCLLPRDuhs......uAssSuAAPALYsPLGLNGLPQ.LGYQAAVLKEG.LPQVYPPYLNYLRPDSEASQSPQYSFESLPQ .........................................MTELpAKssRssHsuGusPSP.pluSPL.sR.sss.F.uSQsSD..........s.s.sSulPIS..LDGLLFPR.CQGp-..stKTQspQsLuDV-GAa..stsEAspusG.....usuups..PEKDpsLLDSVLDTLLsPuGstQSpsS.P.A.sEshosWCLFGsELPEDP.us...PuTptlLsPLMSRstsKsGDuSthuAupKlhPpGLSPspQLL.PsssustWsGAssKPus.ssssp.sEE.-su.cs-t...ususlLKucPRs.tGssttuth.ssssu.hs.Guhs.sP+EDuRhsAs+suL.E.pDuPhAPGRSPLAToh..hDFhHVPILPLstAhLAAR..TRQLLEt-.sYDGGA.........FusPRuSPsAsSsslssuDFP-ssY.s.-s-sK-ssa....shau-FQ.PsLKIKEEptusps.........sYhsuustsss.h.ch..............s.ps...............................s.sssLEClLYKAEs..A.st.....ssass.Ps+ssusuuClLP.tp................s.ssuuu...sP....slY.sLuLNGh.Q.LGYQA.AVlK-u.LsQVYPPYLNYL..R.PDoEsSQSPQYuF-SLPQ........... 0 2 2 6 +4004 PF02244 Propep_M14 Carboxypeptidase activation peptide Bateman A, Mian N anon Pfam-B_2335 (release 5.2) Domain Carboxypeptidases are found in abundance in pancreatic secretions. The pro-segment moiety (activation peptide) accounts for up to a quarter of the total length of the peptidase, and is responsible for modulation of folding and activity of the pro-enzyme. 22.10 22.10 22.10 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.14 0.72 -4.31 103 1016 2009-01-15 18:05:59 2003-04-07 12:59:11 11 15 167 24 581 999 1 73.10 22 16.68 CHANGED a+l...pspsppplplLppLp...ps.plcFWpsss....thspssclhVsspphtshpshLpppslpaplhlpslQphl-pEp .........................hcl.spspppl.phLppLp...........pp.pl.cFWpsss.......t.sps.lDlhVss..p.ph...p.s.hpshLppp.sl.p.a.p..lhlc.slQphl-pp......... 0 141 201 408 +4005 PF04352 ProQ ProQ/FINO family Mifsud W, Moxon SJ, Bateman A anon COG3109 & Pfam-B_7673 (release 7.7) Domain This family includes ProQ, which is required for full activation of the osmoprotectant transporter, ProQ, in Escherichia coli. This family includes several bacterial fertility inhibition (FINO) proteins. The conjugative transfer of F-like plasmids is repressed by FinO, an RNA binding protein. FinO interacts with the F-plasmid encoded traJ mRNA and its antisense RNA, FinP, stabilising FinP against endonucleolytic degradation and facilitating sense-antisense RNA recognition [2]. 21.70 21.70 21.80 22.00 21.00 21.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.39 0.71 -4.47 46 1580 2009-09-11 08:23:37 2003-04-07 12:59:11 8 2 1071 7 186 857 21 115.00 41 57.46 CHANGED p+hsss.ppslshLtcpFPtsFstpst.hpPLKlGIapDLhtclpptt...lS+spLRpALppaTpuhRYLpuh.ptGssRlDLsGpssutlot-csp+ApppLtcp+p.csttcctppt..s .............................phpsscEslshLtcpFPtsFss..-st...s+PLKlGIhpDLl-cl....ut..c........LS+spLRsAL+hYTp...ShRYLtul.KsGAsRhDL-G..pPs....Gclsppcsp..aAppp......Lpct+s...+sptpcttQ.ut........................................ 0 25 76 133 +4006 PF02428 Prot_inhib_II Potato type II proteinase inhibitor family Bateman A anon Pfam-B_2913 (release 5.4) Domain Members of this family are proteinase inhibitors that contain eight cysteines that form four disulphide bridges. The structure of the proteinase-inhibitor complex is known [1]. 20.40 20.40 20.90 20.40 20.30 20.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.21 0.72 -3.91 10 341 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 42 25 13 355 0 48.10 62 74.95 CHANGED sKACTpECDPclAYuhCPRSEGs..h.sslCTNCCuGhKGCpYYusDGoFICEG .......PKACP+NCDsRIAYulCPpSEpp..tps.pICTNCCAG...p..KGCpYFSsDGTFlCEG............ 0 1 8 11 +4007 PF00260 Protamine_P1 protamine_P1; Protamine P1 Finn RD anon Prosite Family \N 20.50 20.50 20.70 20.60 17.20 17.60 hmmbuild --amino -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.00 0.72 -4.15 14 147 2009-01-15 18:05:59 2003-04-07 12:59:11 15 1 133 0 9 97 0 47.40 74 96.52 CHANGED ARY.RsCRSpSRSRC.RRRRRRs+...............RRRRRpsRRRR...Rt..ssRR...Ysh..RpRR .ARYRCCRSpS..RSRC..RR...RRRCR.RRRRRCCRRRR.Rs..CCRR...YThhRCtR.. 0 1 1 1 +4008 PF00841 Protamine_P2 protamine_P2; Sperm histone P2 Bateman A anon Pfam-B_1350 (release 2.1) Family This protein also known as protamine P2 can substitute for histones in the chromatin of sperm (Swiss). The alignment contains both the sequence of the mature P2 protein and its propeptide. 25.00 25.00 25.30 37.70 24.10 18.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.63 0.72 -3.43 4 60 2009-09-11 10:14:09 2003-04-07 12:59:11 14 1 43 0 8 54 0 92.80 65 88.38 CHANGED MVRYRVRSPSEsPHps.tQphcspEQG....p-QGLSPEcVEsYGRTHpG+aHYR+RpCSRRRLhRlH+p.+RSCRRR+R+uCRHRR........R+RRGCRpR .MVRYRhRSPSEpP..Hps.GQ.phctpEQG....ptQGLSPE+VEsY.GRTHRG+p.HaR+R+CSRRRL+RIHRR..+RSCR.RR.RR...RSCRHRR...........RHR..RGCRpp........... 0 1 1 1 +4009 PF03247 Prothymosin Prothymosin/parathymosin family Bateman A anon Pfam-B_3463 (release 6.5) Family Prothymosin alpha and parathymosin are two ubiquitous small acidic nuclear proteins that are thought to be involved in cell cycle progression, proliferation, and cell differentiation [1]. 23.90 23.90 24.10 24.20 23.70 23.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.24 0.72 -11.24 0.72 -3.89 8 185 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 42 2 61 210 0 90.50 59 75.61 CHANGED uDsuVD..uusElosKDLKEK.KElVEEsEsuK-....sPsNGst.sEENGppcuDsp.--EEEs..-E--EE--G-G---Es---E.......Es-GsTsKRAAE...-E-D-s-sKKQKTD-sD ............u-tsV-..susElosKDLKEK.KEl..VEEuENG..+-......APANGNs...NEENGEpEuDsEs-EEEEp......tE-p.....E......EE-s....-.GEEE-s-E-E.......E.sEusssKRAAE.......pE-D-lDsKKQK.T-ps................ 0 3 8 21 +4010 PF05044 HPD Prox1; Homeo-prospero domain Moxon SJ, Bateman A anon Pfam-B_5293 (release 7.7) Domain Prospero is a large drosophila transcription factor protein that is expressed in all neural lineages of drosophila embryos. It is needed for correct expression of several neural proteins and in determining the cell fates of neural stem cells. Homologues of prospero are found in a wide range of animals including humans with the highest level of similarity being found in the C-terminal 160 amino acids. This region was identified as containing an atypical homeobox domain followed by a prospero domain. However, the structure shows that these two regions form a single stable structural domain as defined here [1]. This homeo-prospero domain binds to DNA. 25.00 25.00 42.70 42.70 22.60 21.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.94 0.71 -4.56 13 191 2009-09-11 05:20:22 2003-04-07 12:59:11 7 3 88 3 116 167 0 139.90 65 21.88 CHANGED psLTPhHL+KAKLMFFYTRYPSSslLKsYFPDV+FN+ssTuQLlKWFSNFREFYYIQMEKaARQALuEGlsssc-lhVo+DSELa+sLNhHYN+sNchEVP-pFltVsppTLREFFsAIpuGKDs-PSWKKsIYKVIs+LDcpIPEhFKSPNaL-cLp ..................p.sLoPsHLKKAKLMFFaTRYPSSshLKsY...FsDVKFNRClTSQLIKWFSNFREFYYIQMEKaARQAls-GVos........s........c..-LslsRDsELaRuLNhHYNKuNDFE.....V.P.-pFlcVsphTL+EFFpAI.uGKDs-PSWKKsIYK...lIs+LDs.lPEhFKossh..................................................... 0 23 31 75 +4011 PF02840 Prp18 Prp18 domain Bateman A anon Bateman A Family The splicing factor Prp18 is required for the second step of pre-mRNA splicing. The structure of a large fragment of the Saccharomyces cerevisiae Prp18 is known [1]. This fragment is fully active in yeast splicing in vitro and includes the sequences of Prp18 that have been evolutionarily conserved. The core structure consists of five alpha-helices that adopt a novel fold. The most highly conserved region of Prp18, a nearly invariant stretch of 19 aa, forms part of a loop between two alpha-helices and may interact with the U5 small nuclear ribonucleoprotein particles [1]. 22.10 22.10 22.30 22.30 21.10 22.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.81 0.71 -4.38 29 352 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 294 2 249 327 2 137.60 42 40.45 CHANGED l.............htalphl.....LpcWcppLpscppp....tpohpuc.s........hsthhQT+cpl+PLhcpl+p....p..pLspDILpsLspIlpthp.pRc.......YhcAsDuYlcLuIGNAsWPlGVTMVGIHpRo.....uRpKIau.sp....l..AHlhNDEpTRKYIQulKRLlTFsQphaPs.c ..............................................htahphlLppWtppLspcppt................+pohpGK.s.................sshhQocchl+PLF+pl+c.........p..sLssDIhpslsc.Il.ct.h..p.Rc.......YlcAsDuYLcluIGNAsW.....PIGVTMVG.IHtRo.....uREKIas...pp.....lAHlhsDEspRKYlQulKRLhThsQppaPsc......................... 1 85 132 201 +4012 PF02340 PRRSV_Env PRRSV putative envelope protein Bashton M, Bateman A anon Pfam-B_939 (release 5.2) Family This family consists of a conserved probable envelope protein or ORF2 in porcine reproductive and respiratory syndrome virus (PRRSV) also in the family is a minor structural protein from lactate dehydrogenase-elevating virus. 25.00 25.00 27.00 29.20 21.20 21.00 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.51 0.70 -5.19 4 222 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 9 0 0 236 0 230.50 83 92.22 CHANGED Mh.hSSLhPhLI.hF.hsFCLu.PSPsGhW.hhSDWFuPRaSVRs..hT..sYRR.acshlp.CpPDl.paG.Kt.hGMLWHMKVuThlD-hlppRlhppMpHpGptsWtQVhoctsLppIushslVsHhQplAulEtEuCpYLhSRLPhlpshussh.NVTIpa...LNpshhI..uPuspshhss.+.WLlShpSSlFSSVAAussLaIVhhLRlP..RpVFGFhahpts++o .................................MLSRs.WCPLLISSYFWPFCLAS.SPVGWWSaASDWFAPRYSVRALPFTLSNYRRSYEAFLSQCQVDIPTWGsKHPLGhLWHHKVSTLIDEMVSRRMYR..IMEKAGQAAWKQVVSEAT.....L.....SRIS.uLDVVAHFQ.HLAAIEAETCKYLASRLPML.H.NLRhTGSNVTIVYNSTLsQVFAIFPTPGSRPKL.HDFQQWLIAVHSSIFSSVAASCTLFVVLWLRIPhLRoVFGFRWLGAhF...... 0 0 0 0 +4013 PF01366 PRTP Herpesvirus processing and transport protein Bateman A anon Pfam-B_1171 (release 3.0) Family The members of this family are associate with capsid intermediates during packaging of the virus. 20.30 20.30 24.90 24.60 18.90 18.70 hmmbuild -o /dev/null HMM SEED 638 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.96 0.70 -6.53 32 211 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 109 0 0 193 0 587.10 37 83.17 CHANGED htQcLsAlhuQlpshshplplL+hCDPss..h...hphsslphNshhlphLpcplhstLtpQs.phpsosLoltLphhLcshptcsttlhpuLp...p............tppaFppsh...tLs..ttCshHpplplshYG.sttlslplshlNDlEphLKpLNhsahhlsspsulpslpplhpFLschhGsuslssP-lYssop.PCh.Ca-ELslssNQGcolp+RLhsphCsHlspph....lpsth-s.lpplshshshsspch.......t.ht......................htspptpsps..................hhppAtphLcsaslFp.ssstplYplS-LpYWsuoupt...............tsshcthsssLspLhp+Epphcthpstl.phhlhscsspHFhchatsp.............sh-pLasGuhhsSs--hI-ALhpsChspahspPhhpcLhppps-hhstLpplLpphp............................sssssssststs...................................................t....phst.ttsp.t......................................................pshp-sphR+ctYhc+lo+cuhspLtpClcpQcc.LpKhLslsVaGsslhcphsplhNtFhtRptalpts.htst........spssstsF-sppal+ssLhppplssphLssLsppFacLlNGPLhscs.chFs.PsNssLhasl-ssGlLPHhKp-Lschhhsshps.pDWhsspFppFYsF....ss...pslsssQ+hs ....................h.hQ+LhslhupsppashplEhL+hCDPpl..hh..tc.sshKhNuhtlhhLh+plhPtlhtQs.ptp.o.LolhLchlLcthhc-sthL.tuLtsat......t...t.D.........httaappsh...tLs....CshHpplpLphhs.ss.hshpLshLpDlEpFLpphNasahlhsspsultshtplhphLtphsGhu.ls..Elas.up.sCh.CaEELslhsNQGcolp+RLtshlCsHlshp....pspsp.-s.hppl.pshh..stchs...tslsslct.....................l.upsss.ps...................hhp-ApthLctaslFp.shstplYulS-hpaW.tou.................pshhcths.slppLshhcp.hhcthhssl.phtLaGcpscch.thhtt.tls...........h.-plhlGuhhsuPschI-hlhphshpta.ssPlhp+L.p.ppp.hstl+plLpclp......................................s............s......................................................t...................................................................shtthhppstp-sphR+RtYhp+lSchuhuplh+Cl+pQcp.lpKhlcVNlhGplhhchhuplhNGFhhRppahpts...ss........shusthsaDtHhalhssLl++plssthLPtLspphacLlNGPLFsHspcpas.P.Nsshhaus-NsGlLPHlK--Ls+hh.u............ssts...s-WhVscaptFasF......ss..hpslsshQ+th............. 0 0 0 0 +4014 PF02666 PS_Dcarbxylase Phosphatidylserine decarboxylase Bashton M, Bateman A anon COG0688 Family This is a family of phosphatidylserine decarboxylases, EC:4.1.1.65. These enzymes catalyse the reaction: Phosphatidyl-L-serine <=> phosphatidylethanolamine + CO2. Phosphatidylserine decarboxylase plays a central role in the biosynthesis of aminophospholipids by converting phosphatidylserine to phosphatidylethanolamine [2]. 20.00 20.00 20.20 20.30 19.80 19.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.26 0.71 -5.02 112 3949 2009-01-15 18:05:59 2003-04-07 12:59:11 10 31 3153 0 1299 3080 1984 203.70 27 63.28 CHANGED hstaFsRth+sst..Rsl......stsss..........hllSPuDGplhth...l.....tt................h.hKt...hthtphlss..................................................................................................................ps..shhlslaLuPtDYH+h+sPssGplpp.hpahsGch...................h...............................sss.hthpp.................................................pNERshhhhp......hu.t......hhhltluuhhlspIhhth.t.........................................................................tstplp+GcchGhFph.GSsllllh.ts.h..................p.plphGp.plphGps.ls....t ..................................................................................................hstFFsRhl+sss........R..s..l..............spsss..........................hlluPADGt.l......sph...sp.l..p..ttp................h..h.KG...ta.shppL.Lus...........................................................................................................................................................................hps..shhsslaLuPtDYHRhHhPss.G.p.l.p.c....h.hahs.Gc.h........a.........................................................sVs.hpupp..............................................t.h.h.s.c.N.E....Rshhlhc.......sphG..............................hshl.VGAhhVupIhh..sh......................................................................................................ptu.plp+Gc-hGhF+h.GSTl.l.l.lhstst.....................p...p..lp.....sp...hsphGptlh.t......................................................................................................................... 0 425 811 1087 +4015 PF04230 PS_pyruv_trans Polysaccharide pyruvyl transferase Kerrison ND, Finn RD anon COG2327 Family Pyruvyl-transferases involved in peptidoglycan-associated polymer biosynthesis. CsaB in Bacillus anthracis is necessary for the non-covalent anchoring of proteins containing an SLH (S-layer homology) domain to peptidoglycan-associated pyruvylated polysaccharides. WcaK and AmsJ are involved in the biosynthesis of colanic acid in Escherichia coli and of amylovoran in Erwinia amylovora [1]. 25.30 25.30 25.40 25.40 24.80 25.20 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.77 0.70 -4.70 163 2481 2012-10-03 16:42:30 2003-04-07 12:59:11 8 39 1781 0 656 2150 297 275.20 15 70.68 CHANGED NhG.Dtl...........hhuhhphLpptt..psplhshs.......t.ttttthht.h...............................................................................................lllsGu...................................t.....hh..........hhhhhhtpthpp.h.......lhhutuh...................t...h..............ppttpp...hhpphhpp.hsh...lslR-ph.Shp.........htphsh.......psthssDss.h..hht..................................ttthhthtptst................t.thtphhhhhhphhhp.pthphhhh.ht.t.t.t..p.h............................h.hh...hsspchhshls.pschllssRhHuhlhuhhhshPsl.slsh.ss ................................................................................................................................................................................................................................NhG.s.h....h....hhuhhph.l....pp.........t...p....h....ph.shs......................t....t.t.....h.ht.h...h..................................................................................................................................................hlhsGu............................................h..h............hthhhhsth.hpp...h.......................hhhutul...........................us..ht.................................stthpp...........hhp.hh..hsp..ssh.....lslR-ph.Shc......hhpphslp................psphssDss.h......hls.......t...t..........................................................htthh.http.t...............................tth..t..pt.hhthhph..hhp.........pthp.hhh.h..s..hth.t...t........p.h..........................................................thh.h.h...t.hs...st...ch.hph...ls..psch.slusRhHuhlhuhhhshPhl.sls......................................................................................................................... 0 230 461 572 +4016 PF00223 PsaA_PsaB psaA_psaB; Photosystem I psaA/psaB protein Finn RD anon Prosite Family \N 19.00 19.00 20.60 19.50 17.90 17.50 hmmbuild -o /dev/null HMM SEED 684 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.30 0.70 -6.45 22 3289 2009-09-12 09:11:12 2003-04-07 12:59:11 14 18 1996 14 178 2825 824 538.70 58 96.78 CHANGED FSpsLApsP.....TTphIWphhAsAHD.F...-oHsu.s.Epl.pKlFuuHFGHLAlIFlWhSG.hFHsAh.uNapsWlpDPhpl+P.ApslW.PhhGQphls......scsGush...shhssSGlaphWhshGhsophpLhtsulhhLlhAulhLaAG............WhH.hp.th........................PpLtW........FpssEShLNHHLuuLhGluSLAWuGHhlHVAlPhsphhcs.............hsshhpl.PhPpsL.......................sPFFohNWutYup..................hLTFpG.....G...LpPtTGuLW..LTDlAHHHLAIAllFIlAGHMYRTsa.GIGHshKEll-upps.................hs.uHpGLa-hlssShHhQLulsLAhlGolo.lVApHMYuhPsYsYlupDYsTphuLaTHH.aIuGFlhVGAhAHuuIFhVRDY..DPthN..........tsNlLsR.......lLcH+-AIISHLsWVslFLGFHohGLYlHNDoMpAhGcPp.....ptI.lpPlFAQaIQuhp..upsh........sssu....s......s.s.hhsG....lsuls.scss.hhl.lGsuDFLVHHhhAhslHsTsLILlKGsL.ARuS+LhPDKtshGapFPCDGPGRGGTCplSuWDphaLulFWMhNoluhVhFaapWKh.S.hhGss................................................upFspSShhlsGWLRDaLWtpSSQlIsuYssh.....LSsauhhFLhuHhlWAhuhMFLhShRGYWQELIEolVWAHpKh.lAstI.......pPhALSIsQGRhVGlsHahlGsIhThhAFhlA ............................................................................................................................................................................s.Ecl.pplFuuHFGQLuIIFlWhSG.hFHsAh.uNapuWlpsPhpl+P.ApslW...PhhGQ.hlp.............scsGush.s..hhSGhaQhWhs.GlpophpLYssAlhhLhh..uulhLhAG............WhH.hp.th........................PplpW........FpNsEShLNHHLuGLhGluSLuWsGH.lHVulPhs+h.cs.............hss.h.-l..Ph.Ppth....................................sPFFohsWs.Yup..................hLTFhG.....G...h.pP.TtuLW..LTDhAHHHLAIAlLFll.AGHMYRTNa...GIGHShK-lLEAHpsP..................hhGpGHKGL.Y.-hl.ss.ShHhQLulsLA.lGslo.lVA..pHMYuhPsYsalApDasTQhuLaTHH.aIuGFhhsGAhAHuAIFhlRDY..sPp..ps..........hsNl....LsR.......hLcH+-AIISHLsWsslFLGFHohGLYlHNDsM.AhGpPp.....p.tI.lpPlFAQWIQssH..uhsshsh.....................ussu......u............spo.h.hh.sG......l.sAls.sp.s...h.lslGs.uDFLVHHhhAhslHsTsLILl.......KGsL.ARuS+L...hPD..KtshGapFPCDGPGRGGTCphSsa.D.t.h.................................................................................................................................................................................................................................................................................................................................................... 1 36 97 152 +4017 PF02531 PsaD PsaD Bashton M, Bateman A anon Pfam-B_1336 (release 5.4) Family This family consists of PsaD from plants and cyanobacteria. PsaD is an extrinsic polypeptide of photosystem I (PSI) and is required for native assembly of PSI reaction clusters and is implicated in the electrostatic binding of ferredoxin within the reaction centre [1]. PsaD forms a dimer in solution which is bound by PsaE however PsaD is monomeric in its native complexed PSI environment [1]. 22.40 22.40 24.20 24.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.57 0.71 -4.97 20 196 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 148 6 61 196 124 127.70 66 81.82 CHANGED ssLssps..PhFGGSTGGLL+uAEsEEKYAITWTSpKEQlFEMPTGGAAlM+cG-NLLYLARKEQCLALuT.QLRs+F..KIpDYKIYRI..FPsGEVpYLHPKDGVFPEKVNtGRtslGpssRpIGpNsNPsslKFoGK.psa- .............Lssps..PhFGGSTGGLLppApsEEhYsITWsSsKEQlFEMPTGGAAIMpcGpNLLhLARKEQCLALG.T.QLRoKF..KIpcYKIYRl..FPsGElQYLHPKDGVaPEKVN.GRptVGps.RpIGcNssPhplKFoGKtsa-....... 0 19 43 56 +4018 PF02605 PsaL Photosystem I reaction centre subunit XI Bashton M, Bateman A anon Pfam-B_1741 (release 5.4) Family This family consists of the photosystem I reaction centre subunit XI, PsaL, from plants and bacteria. PsaL is one of the smaller subunits in photosystem I with only two transmembrane alpha helices and interacts closely with PsaI [1]. 25.00 25.00 40.70 39.20 22.20 21.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.99 0.71 -4.69 21 179 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 141 6 63 174 145 149.10 48 79.09 CHANGED pphlpshs.DPhVGsLuTPlsoSsho+sFIsNLPhYRpGLSPhhRGLElGMAHGYFLhGPFshLGPLRN.o-hAhlAGLLuulGLllILTssLohYGsls.........spsssh....................................-sLpTpcGWupFsuGFhlGGsGGAhaAahLlpsh.l.ts .........h.phlpPhssDPhlGs.LtTPlsSSshsthalsNLPAYRpGlSPlhRGLElGhAHGYhLlGPFshh...GPLRs.o-hAthAGhLuAlGLVlILTlsLolYGhsuFpp......spssss...shsh...p..........................s.-sLpTscGWupFouGFalGGhGGuhaAahLltslth..sh........ 0 20 47 59 +4019 PF00737 PsbH PSBH; Photosystem II 10 kDa phosphoprotein Bateman A anon Pfam-B_465 (release 2.1) Family This protein is phosphorylated in a light dependent reaction. 20.80 20.80 20.80 21.30 20.70 20.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.50 0.72 -4.39 19 1173 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 1095 17 62 665 119 39.10 76 66.99 CHANGED pTtLGslLKPL.NSEYGKVuP..GWGTTPlMulhMsLFhVFLlIILpIYNsSllL ........TslGslLKPL..NSEY.GKVAP..GWGTh.hM............................. 1 14 42 56 +4020 PF02532 PsbI Photosystem II reaction centre I protein (PSII 4.8 kDa protein) Bashton M, Bateman A anon Pfam-B_1731 (release 5.4) Family This family consists of various Photosystem II (PSII) reaction centre I proteins or PSII 4.8 kDa proteins, PsbI, from the chloroplast genome of many plants and Cyanobacteria. PsbI is a small, integral membrane component of PSII the role of which is not clear [2]. Synechocystis mutants lacking PsbI have 20-30% loss of PSII activity however the PSII complex is not destabilised [2]. 19.30 19.30 19.50 25.70 18.50 17.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.58 0.72 -4.33 15 674 2009-09-11 13:22:19 2003-04-07 12:59:11 9 2 643 17 44 216 33 35.40 85 96.30 CHANGED MLsLKlsVYsVVlFFVuLFlFGFLSsDPuRNP..sR+D .....MLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGRcE...... 0 13 31 40 +4021 PF01788 PsbJ PsbJ Bashton M, Bateman A anon Pfam-B_1227 (release 4.2) Family This family consists of the photosystem II reaction centre protein PsbJ from plants and Cyanobacteria. In Synechocystis sp. PCC 6803 PsbJ regulates the number of photosystem II centres in thylakoid membranes, it is a predicted 4kDa protein with one membrane spanning domain [1]. 21.20 21.20 21.20 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -7.91 0.72 -4.42 25 1304 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 1239 17 50 454 91 31.80 78 95.70 CHANGED Mus...TGRIPLWlVuTVuGluslullGlFFYGSYuGLGSSL ......MAD..TTG.RIPLW..l.IGTVsG.I.VIGLlGlFFYGSYSGLGSSL...... 0 14 34 45 +4022 PF02533 PsbK Photosystem II 4 kDa reaction centre component Bashton M, Bateman A anon Pfam-B_1331 (release 5.4) Family This family consists of various photosystem II 4 kDa reaction centre components (PsbK) from plant and Cyanobacteria. The photosystem II reaction centre is responsible for catalysing the core photosynthesis reaction the light-induced splitting of water and the consequential release of dioxygen. In C. reinhardtii the psbK product is required for the stable assembly and/or stability of the photosystem II complex [1]. 25.00 25.00 25.50 25.50 23.10 22.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -7.99 0.72 -4.02 25 756 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 711 17 53 470 13 40.60 78 72.83 CHANGED shllAp..LPEAYuhFcPlVDVLPlIPlhFhLLAFVWQAAVuFR ..s.hFFuK..LPEAYAhhsPIVDVMPVIPlhFFLLAFVWQAAVSFR.. 0 13 35 46 +4023 PF02419 PsbL PsbL protein Bateman A anon Pfam-B_1884 (release 5.4) Family This family consists of the photosystem II reaction centre protein PsbJ from plants and Cyanobacteria. The function of this small protein is unknown. Interestingly the mRNA for this protein requires a post-transcriptional modification of an ACG triplet to form an AUG initiator codon [1,2]. 19.70 19.70 20.40 20.40 18.60 17.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.29 0.72 -7.54 0.72 -4.53 14 1035 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 985 17 49 215 89 36.70 90 86.43 CHANGED pssNPNppsVELNRTSLYhGLLLlFVLulLFSSYhFN ...TQSNPNEQ.NVELNRTSLYWGLLLIFVLAVLFSNYFFN..... 1 14 32 41 +4024 PF05151 PsbM Photosystem II reaction centre M protein (PsbM) Moxon SJ anon Pfam-B_6558 (release 7.7) Family This family consists of several Photosystem II reaction centre M proteins (PsbM) from plants and cyanobacteria. During the photosynthetic light reactions in the thylakoid membranes of cyanobacteria, algae, and plants, photosystem II (PSII), a multi-subunit membrane protein complex, catalyses oxidation of water to molecular oxygen and reduction of plastoquinon [1]. 21.10 21.10 21.40 21.40 20.80 20.50 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.06 0.72 -4.35 19 1155 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1094 17 44 275 4 26.80 81 83.33 CHANGED MEVN.LGFlAolLFlllPTsFLlILYlpTsu ......MEVN.LuFIAosLFILVPTAFLLIIYVKTVS.. 0 11 30 40 +4025 PF02468 PsbN psbN; Photosystem II reaction centre N protein (psbN) Mian N, Bateman A anon Pfam-B_2222 (release 5.4) Family This is a family of small proteins encoded on the chloroplast genome. psbN is involved in photosystem II during photosynthesis, but its exact role is unknown. 20.40 20.40 20.60 20.50 19.10 19.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.16 0.72 -4.34 32 1871 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 1799 0 52 442 322 43.00 83 95.48 CHANGED ME...sAhhlsIhluslLlulTGYulYsuFGPPS+pLcDPFE-HED .........ME...TATLVAI.ISsLLVSFTG.YALYTAFG.QPSpQLRDPFEEHtD... 0 14 38 48 +4026 PF04725 PsbR Photosystem II 10 kDa polypeptide PsbR Kerrison ND anon DOMO:DM04871; Family This protein is associated with the oxygen-evolving complex of photosystem II. Its function in photosynthesis is not known. The C-terminal hydrophobic region functions as a thylakoid transfer signal but is not removed [1]. 21.80 21.80 21.90 23.50 21.60 21.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.37 0.72 -4.00 5 100 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 62 0 33 95 1 92.70 63 71.49 CHANGED SGuKKlKTD........cPY......GhGGGMsl+sGVDASGRKuKGKGVYQFVDKYGANVDGYSPIYoP-EWSPSGDVYVGGTTGLhIWAVTLAGLLuGGALLVYNTSALAs ..................................usKKIKTD..............pPa......GhuGGhsl+sGlDASGRKuKGKGVYQFVDKYG.ANVDGYSPIYss--WSsoGDVYsGGsTGLhlWAlTLuGlLuGGALLVYsTSALu......... 0 8 21 29 +4027 PF01405 PsbT PSBT; Photosystem II reaction centre T protein Bateman A anon Pfam-B_1880 (release 3.0) Family The exact function of this protein is unknown. It probably consists of a single transmembrane spanning helix. The Swiss:P37256 protein, appears to be (i) a novel photosystem II subunit and (ii) required for maintaining optimal photosystem II activity under adverse growth conditions [1]. 20.30 20.30 20.50 20.50 20.10 19.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.89 0.72 -6.96 0.72 -4.36 3 1892 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 1795 17 44 477 0 27.70 90 83.23 CHANGED MEALVYlFLLlGTLuVIFFAIFFRDPPRI ....MEALVYTFLLVSTLGIIFFAIFFREPPK.V.. 0 12 31 41 +4028 PF03912 Psb28 PsbW; Psb28 protein Finn RD anon DOMO:DM04467; Family Psb28 is a 13 kDa soluble protein that is directly assembled in dimeric PSII supercomplexes. The negatively charged N-terminal region is essential for this process [1]. This protein was formerly known as PsbW, but PsbW is now reserved for Pfam:PF07123. 25.00 25.00 56.40 56.40 18.70 17.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.38 0.72 -4.09 34 184 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 126 1 63 178 102 103.70 44 82.25 CHANGED IQFhcGlsEpllP.-VRLTRS+.DGssGpAhFhFcpPps..lst.pt.s..-ITGMaLlDEEGEltTR-Vpu+FlNG.....cPpulEAsYhhcopp-W-RFMRFMcRYApsNGLuas .IQFhpGlsEpllP-V+LTRS+.sGssGsAhFhF-pPps..l-ptps..p.tsITGhaLID-EGElsop-VsuKFlNG.....cPptlEutahhco.p-W-RFMRFMcRYuptNGLsa..... 0 18 45 59 +4029 PF00849 PseudoU_synth_2 YABO; RNA pseudouridylate synthase Bateman A anon Pfam-B_421 (release 3.0) Family Members of this family are involved in modifying bases in RNA molecules. They carry out the conversion of uracil bases to pseudouridine. This family includes RluD Swiss:P33643, a pseudouridylate synthase that converts specific uracils to pseudouridine in 23S rRNA. RluA from E. coli converts bases in both rRNA and tRNA [1]. 23.80 23.80 24.10 23.90 23.70 23.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.13 0.71 -4.41 57 25319 2009-01-15 18:05:59 2003-04-07 12:59:11 17 48 4782 26 5572 17465 6565 147.20 25 50.03 CHANGED ph.lllsKPtGhsspsts............................thtthhth.hhtt..tpttphthlpRLD+sooGlllhupssphspplpp.h....tppp.lcKpYhuhl.............................................h...ptshhp..h..............................................................................ssptshshhcslpps.........................................sphshlplplhoG+pHQlR...tphst .............................................................................................................................................................llllNKPtGhlspsss......................................................................................................t..t..h...h..h..h..h..h.h.t........ttst...c..h...hhVt..RLD+..........-ToGL...........ll..l..s.p.s..s.pht.p.pLtc...............................p+p....l....p....K....pY.hAhVp.....................................................................................................................................................................t...t...t....h...l..p...t...s.l.tp.................................................................................................................................................................................................................h.....h.....h.....h.....h...h....p....s....u....p...s.......u....h...o...p.h..chlpph.............................................................................................t.s.h..ohl..c.lplp.p....G..RsHQlR...hhht......................................................................................................................................................................................................................... 0 1812 3469 4634 +4030 PF00796 PSI_8 Photosystem I reaction centre subunit VIII Bateman A anon Pfam-B_528 (release 2.1) Family \N 19.80 19.80 19.80 19.80 19.40 19.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.46 0.72 -6.76 0.72 -4.36 36 650 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 624 7 44 352 15 23.70 74 61.13 CHANGED LPSIhVPlVGLlhPAlsMulLFlaI .LPSIFVPLVGLVFPAIuMA.LaLal... 0 11 31 39 +4031 PF02427 PSI_PsaE Photosystem I reaction centre subunit IV / PsaE Bateman A anon Pfam-B_1594 (release 5.4) Domain PsaE is a 69 amino acid polypeptide from photosystem I present on the stromal side of the thylakoid membrane [1]. The structure is comprised of a well-defined five-stranded beta-sheet similar to SH3 domains [1]. 20.90 20.90 21.40 23.50 19.50 20.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.19 0.72 -4.60 25 181 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 147 11 65 180 119 62.20 61 69.39 CHANGED lpRGSKVRILR.ESYWap-VGTVAoVDpoG..l+YPVlVRF-KVNY.......uGlNTNNFAhcEL.clt ...pRGuKV+ILR.ESYWaN-lGoVsoVDpuu..l+YPVlVRF-KVNY.......uGlsTNNFA.cElpcV............. 0 19 45 58 +4032 PF02507 PSI_PsaF Photosystem I reaction centre subunit III Mian N, Bateman A anon Pfam-B_2122 (release 5.4) Family Photosystem I (PSI) is an integral membrane protein complex that uses light energy to mediate electron transfer from plastocyanin to ferredoxin. Subunit III (or PSI-F) is one of at least 14 different subunits that compose the PSI complex. 21.10 21.10 21.80 22.00 20.90 21.00 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.15 0.71 -4.72 10 178 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 141 6 65 172 142 142.20 43 86.29 CHANGED phhAllhL.slhls....sPssusADlAGLsPCpESsAFpKRtKsolK+LpppLtpY-PsSuPAlAlptph-+TKpRF-pYupAGLLCGsDGLPHLIsDG....RaoHAGEFllPGlLFLYIAGWIGWVGRuYLlAVpsocc.PT-KEIIIDVPLAlKhhhsGFsWPlAAhpEatSGcLlA+D- ............................................t....hhh.shhh.......s.....shs.ph..us.LTPCp-S.tFtp+tptthppht......tstos............ptRF-pYups..LCG.s.DGLPHLIssG.........chs.HhG-FllPulhFLYIAGWIGWsGRsYLhslp..p.p..cp..ss.pEIIIDVPLAhphhhpGhhWPluAhpEhhsGcLhtp-.s....... 0 19 42 58 +4033 PF03244 PSI_PsaH Photosystem I reaction centre subunit VI Bateman A anon Pfam-B_3007 (release 6.5) Family Photosystem I (PSI) is an integral membrane protein complex that uses light energy to mediate electron transfer from plastocyanin to ferredoxin. 25.00 25.00 25.40 25.40 18.80 16.60 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.72 0.71 -4.94 6 67 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 39 4 27 66 2 124.50 62 96.53 CHANGED olssVpPo.slKGLAGSSluGtKLtl+su.huh.+ssppRAsuVVAKYGDKSVYFDLEDluNTTGQWDlYGSDAPSPYNsLQSKFFETFAuPFTKRGLLLKFLlLGGGSLlsYhSusuutDl.LPIp+GPQpPPclGP....RGKI ....................ssspPs...s.lpGLuGSS....lsGpKLsh.+Pupp.uh...+sps......h..RuuuV.VAKYGDKSVYFDL-DluNTTGQWDLYGSDAPSPYNsLQSK.FFETFAuPFTKRGLLLKFLlLGGGuhlsYhuusuosDl.LPIK+GPQ.PPp.GP....RsKl................... 0 8 20 25 +4034 PF01701 PSI_PsaJ Photosystem I reaction centre subunit IX / PsaJ Bashton M, Bateman A anon Pfam-B_1599 (release 4.1) Family This family consists of the photosystem I reaction centre subunit IX or PsaJ from various organisms including Synechocystis sp. (strain pcc 6803), Pinus thunbergii (green pine) and Zea mays (maize). PsaJ Swiss:P19443 is a small 4.4kDa, chloroplastal encoded, hydrophobic subunit of the photosystem I reaction complex its function is not yet fully understood [1]. PsaJ can be cross-linked to PsaF Swiss:P12356 and has a single predicted transmembrane domain it has a proposed role in maintaining PsaF in the correct orientation to allow for fast electron transfer from soluble donor proteins to P700+ [1]. 20.50 20.50 21.20 21.30 19.80 20.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.85 0.72 -4.19 31 684 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 652 7 51 324 19 36.80 72 80.63 CHANGED Mps...hppYLSTAPVluslWhshTAGlLIElNRFFPDhL ......McD...lKTYLSsAPVLuTLWFuh.LAGLLIEINRFFPDAL.. 0 13 38 47 +4035 PF01241 PSI_PSAK Photosystem I psaG / psaK Finn RD, Bateman A anon Prosite Family \N 20.60 20.60 21.00 21.30 19.70 19.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.66 0.72 -3.97 9 219 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 131 9 87 209 97 76.60 33 73.61 CHANGED s..sshVhslshsssLhsstFGhhshpp+ss...usslPhss.............t.........ssGFsLs-lLAhuSlGHllusullhGLpshGsl .............................................t...oshlhhl.hssslhAhthGhhshpppss......u.ss.lP.ts.............................uGFs...ls-lLAssSlGHIlGhGllL.GLushGs..... 0 19 60 81 +4036 PF00421 PSII Photosystem II protein Finn RD anon Pfam-B_182 (release 1.0) Family \N 20.10 20.10 20.20 20.50 20.00 20.00 hmmbuild -o /dev/null HMM SEED 437 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.55 0.70 -5.89 19 3769 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 2075 40 249 3264 956 386.90 46 96.73 CHANGED hGhsWapspshllNssG+LlusHlhHsuLlshWAGuMsLaElAhFsPpc....PMacQGhhlLPahApLGhssu.GG.slss.shhssG...........h.tlhuotllh.GhlFhAhlh.....sh.-.hchFt..hshcsthcLspIhGIHLhLhGlhsFhhshhthhhshs.shWss...DshslTs.s.ssAshaGh.shsPFsssG..........llutHIhsGllsIhuGlaHlhs+P.thhh+AL.hhshEshLS.SlAAlhhhuFlsushhWYsssAhPsEhaGPTt.phsQ...pQthphhVc.....stpLutshush.t.huhhcYlhpSPutthlFt.Gth.+h.slttsWLt.hhh.s..G...................lDhstlhps..Pap...p+huhE..........................Yhp+A.LGpl.phstsshp.suV.hsSPRuWhohuHhshAhhFFhGHLWHuuRshh..shAGh-tslscplEhshh.KlhD .........................................hll..N.....s..G+LlusHlhHsuLl.....shWAGuMsLaElAhFsPpc....PMacQGhhllPahspLGhssu.GG....sl.ss.h..shhssG............................h.tlhuutllh.GhhahAhlh................sh......-...........h..........hFs.............hshKsp.chspIhGIHLhL.GlusFhhsh.htl....hh....h....hs.shWss........DshtlT................s..h...s.....ss.s..sh.....aG..h...h.......h.s...P..F.s..stG..............IhutHlhhGhlsIluGlaHl.s.+.......P.t...h..h......h..+............uh..........h....h.............shEshLS..SlAAl.hhuFlssshhWasssshP.EhaG.PTt.phsQ........tQth.hhhVp................s.pLutshuph.t.huhhcYlh.psPstthlFt.Gph.ph.sltssWLt..hh.s..G..................................lDh.stlh+s..Pap...p+hSsE..........................Yhp+A.LGpl.plsts....shc.suV.h.sSP...RuWhshuHh.huh.h.hh.GHlWHuuRshh....hG......................................................................... 0 50 157 222 +4037 PF04012 PspA_IM30 PspA/IM30 family Bateman A anon COG1842 Family This family includes PspA a protein that suppresses sigma54-dependent transcription. The PspA protein, a negative regulator of the Escherichia coli phage shock psp operon, is produced when virulence factors are exported through secretins in many Gram-negative pathogenic bacteria and its homologue in plants, VIPP1, plays a critical role in thylakoid biogenesis, essential for photosynthesis. Activation of transcription by the enhancer-dependent bacterial sigma(54) containing RNA polymerase occurs through ATP hydrolysis-driven protein conformational changes enabled by activator proteins that belong to the large AAA(+) mechanochemical protein family. It has been shown that PspA directly and specifically acts upon and binds to the AAA(+) domain of the PspF transcription activator [2]. 32.00 32.00 32.10 32.10 31.90 31.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.43 0.70 -4.99 11 2762 2012-10-03 05:15:35 2003-04-07 12:59:11 7 5 1843 0 615 1709 167 219.40 26 93.85 CHANGED slasRltcll+uslschl-chEDPp..+hL-QtlR-hcsplscu+pslAphhApp+phEcclcptpppspchcppAptALstG..............pEpLA+EsltchpshcppspshcsphsptcststpL+pplspLEsKlpph+scpphLtARtpsA+Appplppphushs...ssuAhsphcRhcpKlt-hEsptputupht...........psts....hDtclcptthptss....cpsLupL+utpsp ...................................ulFsRhtcllpuslsshl-..c..sE..D..Pp..+hlcphIp-.hccpLscscpssAcslAppKphpc+lcptptphtchpp+A....ph..A.Lpps....................c-sLA+t...ALtcptphpshl.pshc...pphsphcsshspl...ccp.l..schpp+lsch+..s.+.pp.s.lh..u...........+pp.sAp...........up..p..p..ls...p...p.h..suhs.........hs.s.Ah...tp.h-ch...c.......c+...l..pp....hpAcu.cutspht...............t.ts.............L-pch...uph.tt.ssth............pp.LApL+t....ht................................................................................... 0 180 388 505 +4038 PF04839 PSRP-3_Ycf65 Plastid and cyanobacterial ribosomal protein (PSRP-3 / Ycf65) Kerrison ND anon Pfam-B_2979 (release 7.6) Family This small acidic protein is found in 30S ribosomal subunit of cyanobacteria and plant plastids.\ In plants it has been named plastid-specific ribosomal protein 3 (PSRP-3), and in cyanobacteria it is named Ycf65. Plastid-specific ribosomal proteins may mediate the effects of nuclear factors on plastid translation. The acidic PSRPs are thought to contribute to protein-protein interactions in the 30S subunit, and are not thought to bind RNA [1]. 25.00 25.00 45.80 45.40 20.50 19.40 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.56 0.72 -3.91 24 145 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 125 1 52 140 129 48.90 60 35.66 CHANGED LTsYFFWPRsDAWEplKspLEuKsWIsps-+lpLLNpsTElINaWQEps ...LTsYFFWPRcDAWEpLKscLEuKsWIocp-+lplLNpATElINaWQ-p.G.. 0 11 35 47 +4039 PF03034 PSS Phosphatidyl serine synthase Griffiths-Jones SR anon Pfam-B_1414 (release 6.4) Family Phosphatidyl serine synthase is also known as serine exchange enzyme. This family represents eukaryotic PSS I and II which are membrane bound proteins which catalyses the replacement of the head group of a phospholipid (phosphotidylcholine or phosphotidylethanolamine) by L-serine. 25.00 25.00 26.10 25.60 19.60 18.30 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.96 0.70 -5.00 18 319 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 164 0 208 305 10 251.10 42 61.24 CHANGED PHPAhWRlVhuhuVlYllhLsFLLFQshcss+plh.talDPc.Ltp.ls.c..+pYuts.Cphhossc.......lhpp.hDhFshAHhlGWhsKslllRcahLsWslSlsFElhElTFpHhLPNFpECWWDplILDVLlCNuhGIhhGhhss+hLch+pYcWsul+.......................ch.o..hpGKhKR....hlhQhTPpShst.....................acWtshpo....hpRFhtlhhllhlhhlsELNsFFlKalLthPPsH.lslhRLllashlussulREaYsYl..sDs.pp++l.....GspsWlhhulhhhEsllslKhutp .....................................PHPAhWRhlhGhuVlYhl.hLhFlLFQ.shpss+phh.halDPp.Lt.......h..h...E....cpYus..s.Cp....lhs.-p...........lhsp.hDhFshuHhhGWhh.KslhIRshhls..WhlSlh..aElhElsFtHhLP.NFsECWWDp...........lILD.lL.lCNuhGIahGMhsscaLch+pYc.Wtulp..............................................................ph.o...hpGKhKR........shhQFTPtsWst.....................hcWhs.pu....hpRhh......tlhhhhlla..l...sELNTFFLKalhhhsspH.lshhRllhhshlsssslRpaYsal.........sDs....sK+l.....Gpp..sWlhhsI....shhEhllslKau.s........................................... 0 90 117 167 +4040 PF01515 PTA_PTB Phosphate acetyl/butaryl transferase Bateman A anon Pfam-B_799 (release 4.0) Family This family contains both phosphate acetyltransferase and phosphate butaryltransferase. These enzymes catalyse the transfer of an acetyl or butaryl group to orthophosphate. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.76 0.70 -5.37 21 7061 2012-10-02 21:08:39 2003-04-07 12:59:11 14 23 3973 31 1389 5144 3674 287.60 33 59.22 CHANGED phhchlhcpAc......stpp+IllPEGp-tRlLcAAptlhppGlAp.slLlG..s..-lpt.h.thtlphth.hplhsspss.th.ccasspahchRKpKGhTh-tAcchlp.DsshhushhVchGpADGhVsGsspoTucslRsuLQIItstsGspllSulFlM.hs............cthhhauDCAlsspPsu--LA-IAlpoAcsA+ths.hcP+VAhLSaSThGSucupss-+VtpAspls+ctpP-L..slDGElQhDAAlspcVAppKsPs.SsluGpANVhlFPsLpAGNIuYKlsQRhuphpAlGPIlpGhspPlNDLSRGsSscDIlNssAlTA ....................................................................................h.....lhphup......tt..t+lV..hsE..G..p-.RslcAsthhhpp.sl.sp.slLlG....s..cl.pt..h..t..h..t...h..t......l....p...h........s.........s...hplls...s......p...s..s..................c...p...h.....ht..t....h..h..p..h.tp..t+..........h.........T.......p...Apchlp...csshhushhVptGc......ADuh..l..s.G.sl.p.os.upsl+sslpl.I.t...s.t.s.G..s...p...h..sSuh.h.hh.hhs............spshhhuDsulN..............s.............P..s..............u.cpLA...-IAlt.u.Ac.os.....c..t.F.G............h..-....P.+VAhLSa.Sshs.S.u....p.......s....s....s..........s...p+sppAs.c.ls............c....c..........t..........t..........P.....-.......L.......hlDG..hphDAAls.p.l.up.pK.....h.P.s.....Ss.l.s.G..p.A.slhlFPsLpuGNlsYKhlp.p.h.u...s..h...h.ul.G....PlL..............Ghpp.PVpsLSRusssc-llphsslss.......................................................................................... 0 423 876 1160 +4041 PF02126 PTE Phosphotriesterase family Mian N, Bateman A, Griffiths-Jones SR anon IPR001559 Domain \N 20.00 20.00 20.00 20.00 19.80 19.90 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.65 0.70 -5.41 4 1183 2012-10-03 00:45:34 2003-04-07 12:59:11 13 6 947 100 216 791 85 301.20 34 95.71 CHANGED lshuphGhTLsHEHlphshsuFhpshPptassppt..........................psuthhpcLtch+A+GVcslV-hTshslGRslphltcVuctTslpIVAuTGhYh.sshP........usphcSVEpLTphhlcEIpaGI-sTuIKAGIItphssu..tlTPhpE+VLcAsApAptpTGsPloTHTutu.ttGhpQhtIhppcGlDlSRVslGHsD.spsDls.LhchhshGsYlthDph.....Ghps.........h.s.pcRlthl+tLhDcGahc+lllSpDhpphacuhhpshhshu.......hsa....l.sslIPtL+p+GlopcsLcshLl-NPppahp ...................................................................................thsshGhTLsHEHLh.h.....s.h..u.s...hh..p........s.s..s...p..t...h.s...........................................................ph.ptshtElps...h....h.s.h.G.sco....lV-hT.s.t..s.hGRss...phltcVu.cc..T...G..lNlVuu..TG...a....Y...h...c...th...h...s...................p.t.l..t.pp.o..V.....c.p.....LAphh.lcElpp....G............I..............s..........s..........T......s......l..........+....A..Gl..I.u..E..l....usu....stl.T.h..Ec+..........shpA.AAhAp.pp.T.G...sPl....ssHs.s....hss...h..G.h..E......lc.l.......L..............p...cp.GV.c.s+...Vsl.u...H.sD...p.....pp..D..............h.....s..h..h..hc..hh..c.h..G.sa..lpFDtl.............hhp.t.......................h.hP..-.pcRl.shlttLh-c.Ga.tcpllLStDl....sp.............c....t....h....h.ts.s.s.G.hG..........asa........l.s....sFl..P.tL....t....p....p....G..ls....psp.l....c....ph.hl-NPuphhp................................................................................ 0 78 132 176 +4042 PF00809 Pterin_bind DHPS; Pterin binding enzyme Bateman A anon Pfam-B_1411 (release 2.1) and Pfam-B_3423 (release 6.6) Domain This family includes a variety of pterin binding enzymes that all adopt a TIM barrel fold. The family includes dihydropteroate synthase EC:2.5.1.15 as well as a group methyltransferase enzymes including methyltetrahydrofolate, corrinoid iron-sulfur protein methyltransferase (MeTr) Swiss:Q46389 that catalyses a key step in the Wood-Ljungdahl pathway of carbon dioxide fixation. It transfers the N5-methyl group from methyltetrahydrofolate (CH3-H4folate) to a cob(I)amide centre in another protein, the corrinoid iron-sulfur protein. MeTr is a member of a family of proteins that includes methionine synthase and methanogenic enzymes that activate the methyl group of methyltetra-hydromethano(or -sarcino)pterin [2]. 25.50 25.50 25.70 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.29 0.70 -4.96 42 8265 2012-10-03 05:58:16 2003-04-07 12:59:11 17 42 4552 134 2104 6541 3687 202.90 28 37.28 CHANGED llNlosDSFu-Guphhs.pt.slpp..........AcphlppGAcllDlGupuo..tP...........shlsscpEhpRllPllctltt..............sl.lSlDThcsclhctALct.GuchlNDstuhp........p.chhplstchss..slVlMHhp..................spshtpp...phc.-llp-lhphhptp.ht.h..sGls..pcllhDPGl..GF..u+s.pcshtllpplsc.........htth....shPlLlusSRKshlu .........................................................llNls.cSFssG..u.....p.....h....h.....p......h......t..h.....s..hp.c...................................Ac.phlp..p.....GAs......lIDlGs.-us....tP..t.................s..l..s..s.c.cE...h.pRlls.....l.l.....pultp...................................clslSlDo.+scVhct...uL....c....s....G....u...c......h...lN.Dshuhp.........................p.ph..hpl..s...t.c.....h..u.s........slllMahp..........................................................p.s.....t..cs.........sph..c.................-.l..hp.....clh.p.hhtpp.h..h......sG.ls..ccIllDPu.hsF..........uts......c...c.s.h..t.l.l..p.p.l.pt.............................hpth................shs..l.l..hGsSpKthhu................................................................................... 0 718 1410 1818 +4043 PF01091 PTN_MK_C PTN_MK; PTN/MK heparin-binding protein family, C-terminal domain Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.70 21.20 20.30 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.63 0.72 -3.99 10 216 2009-09-12 05:24:01 2003-04-07 12:59:11 13 5 76 1 107 213 1 60.10 45 38.44 CHANGED GA-CKYcFpsWGECDusTGhKoRoGoLKKALaNA-CQpTVoloKPCsptsKsKspu...KKGcGK- ...........Gu-CKYpF..p.uWGECDssTuhKoRo.GoLK..+...uhh.sAsCpp..TlsloKPCsptsK....sK..pt.......pKtp............................ 0 19 28 65 +4044 PF05196 PTN_MK_N PTN/MK heparin-binding protein family, N-terminal domain Finn RD anon Manual Domain \N 20.00 20.00 22.80 22.80 19.40 18.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.32 0.72 -4.00 6 140 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 45 1 55 116 0 58.00 63 38.71 CHANGED KKEKsKKshtuS-CuEWpWGsClPNStDCGhGhREGT.....Cs-pT+KlKCKlPCNWKKcF .............KKEKscKps.tt..S-C..u.EWpWu.sCVPoSGDCGlGhREGT.....Ccpph+p.+CKIPCNWKKpF........... 1 4 9 25 +4045 PF04387 PTPLA Protein tyrosine phosphatase-like protein, PTPLA Mifsud, W anon Pfam-B_1525 (release 7.3) Family This family includes the mammalian protein tyrosine phosphatase-like protein, PTPLA. A significant variation of PTPLA from other protein tyrosine phosphatases is the presence of proline instead of catalytic arginine at the active site. It is thought that PTPLA proteins have a role in the development, differentiation, and maintenance of a number of tissue types [1]. 21.20 21.20 23.80 22.00 19.50 19.50 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.17 0.71 -4.82 70 686 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 312 0 448 652 10 151.90 32 59.31 CHANGED Qsh.AllEllHuhlGlV+Ss.lhsThh.............................QVhuRlhllaulh..hhhPpsps.......................p.shshhlluWSloElIRYuaYshsl....hshs..PthLsWLRYohFhlLYPlGls.uEhhh.................lapulsh............h...sthshuhtah.......................hhLhhhLlh.....YlPG..hhhlYsaM...ltQR+KhLtptp .......................................................................................Qsh.AllE.....ll.H.....s.h.lGlVpos.l...hsThh.............................QVhuRlhllas.lh..t..sh...phps..................................p.slhhhlhuWols.........Ell....RYsaYshsl.............hs.hh......PthLpWL.RYohFllLY...PlGls.uEhhh.................latulsh...htt.......t.hs...h.h......P.th.sh.s.h...pah............................hh..Lhhhhhh................YlPs....h.hh.ahaM...hpQR++hlt...t.............................................. 0 128 210 344 +4046 PF01242 PTPS 6-pyruvoyl tetrahydropterin synthase Finn RD, Bateman A anon Prosite Domain 6-Pyruvoyl tetrahydrobiopterin synthase catalyses the conversion of dihydroneopterin triphosphate to 6-pyruvoyl tetrahydropterin, the second of three enzymatic steps in the synthesis of tetrahydrobiopterin from GTP. The functional enzyme is a hexamer of identical subunits [1]. 21.30 21.30 21.50 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.77 0.71 -4.43 149 3812 2012-10-01 20:59:24 2003-04-07 12:59:11 14 10 3101 68 1103 2685 1412 126.40 30 87.70 CHANGED pls+c.hpFsAAHpLh.sht...............GcCpp.l..HGHsaplclplpup........hscsG...hlhDFsclKphlpp.lhp.p..hDHphL.N...-hsthtt..............................................P..TuEslAtalacpLpptl..........thpltp.lclh..E.ossshsp..ap.tp ..................h.lh+c.hpFpAAHpL...phs.........................t+Ctp.l..HGHoahVclplpGc.h....................scsG......h.lhDFuc..lK.phh+...lhc....p.......hD...HphL..N.....-h.sth..ps.....................................................................................P..TuE....slApalapplpstl................splp.p..Vplh..E.Tssshs.ap................................................................................. 0 366 700 933 +4047 PF00854 PTR2 POT family Bateman A anon Pfam-B_571 (release 3.0) Family The POT (proton-dependent oligopeptide transport) family all appear to be proton dependent transporters [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.02 0.70 -5.68 25 7665 2012-10-03 03:33:39 2003-04-07 12:59:11 16 26 2595 5 2449 6215 1113 304.30 20 67.32 CHANGED apTIhhuullYslGhslhsluuss.sh...............s.sht..............hshhaluLhlIulGTGulKPsVSuFuuDQF-.csp...ctppspFFshFYhuINsGSLlushhsshlp........pphsasluFGlsulsMllulllFhhGpthY+ph..tss.hhhs...hlshllsshh+ptthths...ttthlhhs....hpphs................pthhppphhtsp.......sshlhhshshhhshh.....sp.spsthlptlhhhhtlh.hhhlh.sphhs.huhhhlt.lsthphhlhs.hplsssphsshsshslllhlslhshllsshsp.....hphshohhh+hulG.hhhthushhssh.................................................lph.....thssstuhsss.....hhhl.shhhhhthsplhlpusu.s.....hhh.phhhspt.shhpuhhhhhpssss ..................................................................................................h..slhhu.uhl...h.h..l...........Ghhh......h..s..h.....th....................................................................................................................hha..hu.l.h.h..l.s.lG..s.G.......hh....K...........sshushhuc....ap...tp.........................c.ph....su....hF.shaY....hu.l.NlGuhh.u.h.hsshlt.................................................................pp..hu.a.t...h..uF...s...l.s..s..h..u.hh...l..u.ll.......h.a......h..h......s..t.....t.......h.....h..........h.....t.........................s.........hp...................ht.h..h.....h...h.....h......h..h..h......h...h.........h..................................h........h..................................................................................h..h...........h.h...h.............................h.h.......h............h...................................t..p..p...h......hhhh..hh.h....h.h.....h....h....h.....hh...h..h.t....p...h...............s..t.......h.s....h..h.......h........p........s.p...........t.................................h.......h.......s...................h...........p.........l.................s.ss...h.p..s...l.sshhl...h.l....h.ssl...hshlhh..hhp..........................t.s.h..h.+.huhG.........h....h....h..hh.uh....h..h..h..hh..............................................................................................................................................................................................................................................hth...............hs...t...t..t..s....hs...................hhl.......hhhh..hsphhlssl..u.s..................hhh.phh.tt.h.s.hhuhhhhh.h..h.......................................................................................................................................................... 0 540 1303 1941 +4048 PF00381 PTS-HPr PTS HPr component phosphorylation site Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.36 0.72 -4.09 168 8181 2009-01-15 18:05:59 2003-04-07 12:59:11 14 41 3582 89 1297 4207 498 83.30 31 35.45 CHANGED h..hp.tpl...sltsttGLHARPAuh..lVptAspF..suclplt.ps.s....ppssAKSlhulhsLustpGsplplpscGp.........DtppAlpsltpllp...st .......................ptphhlt.sppG.LH..ARPAsh..lVpts....ppF......su-lplp...pp..s.......cpssu....K...S....lhulhs..Lu..ltp...G..splpl.p..A.c.Gs.........................D..t.ppAlpslsphhpt.t....................... 0 417 759 1033 +4049 PF01885 PTS_2-RNA DUF60; RNA 2'-phosphotransferase, Tpt1 / KptA family Enright A, Ouzounis C, Bateman A, Kerrison ND anon Enright A Family Tpt1 catalyses the last step of tRNA splicing in yeast. It transfers the splice junction 2'-phosphate from ligated tRNA to NAD, to produce ADP-ribose 1"-2"-cyclic phosphate. This is presumed to be followed by a transesterification step to release the RNA.\ \ \ The first step of this reaction is similar to that catalysed by some bacterial toxins.\ E. coli KptA and mouse Tpt1 are likely to use the same reaction mechanism [1]. 20.90 20.90 21.20 22.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.26 0.71 -4.99 88 1070 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 913 1 404 881 94 168.10 38 78.73 CHANGED sp...ppt.sclSKhLShlLRHt.spchGLplDppGal..slspLlpthpt......p..sht.h.....shpclpplVpss-KpRFplp..................................................................................................................................sstpIRAsQ...GHSlp.Vc......Lph...sspt........................PthLaHGTspcthssIhppG..Lp.MsRpaVHLoss.............................hpsuhhsG...tR+.sssVllhlDspphh.psG............hpFahSsNsVhLoc......tl ....................................p..pp.sphSKhLSalLRH.........t..Ppt........hG.........l..s..lD.pcG..as..slccLl..tthp.t..........t......shp...l..........oht.lcplVts....s-....Kp......RFshp.......................................................................................................................................................................t.s.s....tpI.R..AsQGHSh.p...Vs........h.....sptt..................................sPthLaHGTspchh.spIhp.pG..Lhth.....p.Rp.....aVHLS..ss............................................ttsAhhs.G...tR+..sssVllhlcspphh..pc..G............l.Fa.upNGVhLos.......sls................................................... 0 144 252 336 +4050 PF00358 PTS_EIIA_1 phosphoenolpyruvate-dependent sugar phosphotransferase system, EIIA 1 Finn RD anon Prosite Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.62 0.71 -4.60 14 7674 2012-10-02 20:27:15 2003-04-07 12:59:11 15 18 2590 18 868 4397 51 132.00 41 27.04 CHANGED ssstslsoPlsG-llsLspVsDpVFuuchhGcGhAIhPosGpVhAPlcGsltplFsT+HAlGlpS-sGsEILIHlGIDTVcLcGcGFpuaVppGscVctG-hLlpFDlstlcpsuhshhoPlllTNoscassl ...........................s...plhuPl.sGcll.sLs.cVsDsVF.u.pchhG-GlAI....c...P........o.....s.....G......p.........l..h...APs.s.Gplsp..l.F.s.T.p.HAlGlp...o.....-..s....G......l.......E.......l.LlHlG.l.DTVp..L..c.....G...c........G......Fp.sh.V..p.pGpcVp.sG.shLlphDlshIcp.su.h.ssh.oPVl.lT.Ntspht..h................................................... 0 230 456 653 +4051 PF00359 PTS_EIIA_2 Phosphoenolpyruvate-dependent sugar phosphotransferase system, EIIA 2 Finn RD, Griffiths-Jones SR anon Prosite Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.97 0.71 -4.47 153 18510 2012-10-02 23:31:29 2003-04-07 12:59:11 17 113 3409 21 2010 9683 678 141.30 21 36.45 CHANGED p.hhspphlhh.s..php...scpcslphh...sp..hLhcp..s.........hlpst....hhpslhpREc...hhsTsl....s..su.........lAlPHsc........t.t...lp.......cs..sl.slsphpp.sl.pa....s............tt.Vcllhhlssscsst............alp..hLsplsph.l...tspph.....hppLh...p.sps.pclhp.llppt .........................................................................................t..hptp.lhh.sh..psp.....sppcslchh...sp..hL...h..pp...s........................hlpss.........ahpu.ll....p...R..Ep....t..hs.Thl....s........su........................lAl.P.Huc....spt.....lp.......cs...sl.sls..p....h.p..p..sl..pa...s.............t.-..spsl.p..l..lhhlusssspp............Hlp...hLspLs.ph.l....tc..c..ph.....hpp...Lh...p..sp..s....p.clhp.llt..t........................................ 1 621 1143 1577 +4052 PF00367 PTS_EIIB phosphotransferase system, EIIB Finn RD anon Prosite Domain \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.50 0.72 -4.71 138 13320 2009-01-15 18:05:59 2003-04-07 12:59:11 15 22 2535 8 1413 7412 51 34.90 43 6.63 CHANGED AppllpslGGpcNIpslspChT.RLRlsl.....p....D..pstls .........AtpllpulGG....p-NIs..slsp.C.h.T.RLRlsl.....p.....D..pspls.............. 0 324 668 1027 +4053 PF02378 PTS_EIIC Phosphotransferase system, EIIC Mian N, Bateman A anon Pfam-B_639 (release 5.2) Family The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The sugar-specific permease of the PTS consists of three domains (IIA, IIB and IIC). The IIC domain catalyses the transfer of a phosphoryl group from IIB to the sugar substrate. 26.80 26.80 26.90 26.80 26.60 26.70 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.05 0.70 -5.61 55 25880 2012-10-01 19:13:17 2003-04-07 12:59:11 13 36 2821 4 2482 14149 266 300.10 17 57.49 CHANGED tcuhhhslPlllsuulllslus.................................thhs.h..hhptsusssFshLsllhuhulAhsl..t.......sssssss.uuhluhlsh.h...hshhhh..t......................................hsshhshhhsspGhhuullsullsshlaphhhp...h.h+lPcslshhs.....uptFsslIsshhshllhuhlhthlhshhpsslhs.h....h.......hhtssshhushlhuhlhphlhhhGlHtshhsshhhtsh...........hhh....s..hh................................hh.tt.h............hhhGssGssLAhhhshhhtp+spptptlspuuls.......................shlhGIsEPl.Fuhshlhshl.hhlshllsu ............................................................................................................................puhh.hlshlls.u..G...l....llulst......................................h........................................t.h.h.t.s...h...h.....t.h....h...t....h.h..u..s...s......s......F..s....h...ls........l...l....h..uhsh..u....h....sh.........................s.ss..sh...h...s.....u..s.h.h...u.h.ls..h...h......................................................................................................................................h....h..h......h....s....s...s....Gh....huu.l.l.su.hl.uu..h....l.h...p.h.hpc..............h...cl.....P....p...s...l...s..hhh........s.h.h...ss....l....lss...h....h...s....h.h...l...h....u....h..l....h...t....h.l....h.....s..h...l..s..s.s.lt.s.............h...............ltsts....hh..u...s.h.l.h.u.hl.hth.hh.h.h...G.l.Ht.s.h.s....s.s.hh.h.hhh........................................................hh.........................................................................................................................h.h.............hh..hu....s..s..u...s....s.....lu.h...h.h...s....h....h....h....t....p..+.......p..p.....t....t....p..t..h...hs.u....uls.....................................s.h.l.h.G.lsEPh.....auhsh.shs....h..hlsshlss............................................................................................ 0 606 1215 1835 +4054 PF02255 PTS_IIA PTS system, Lactose/Cellobiose specific IIA subunit Bateman A, Mian N anon Pfam-B_3710 (release 5.2) Domain The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The lactose/cellobiose-specific family are one of four structurally and functionally distinct group IIA PTS system enzymes. This family of proteins normally function as a homotrimer, stabilised by a centrally located metal ion [1]. Separation into subunits is thought to occur after phosphorylation. 20.80 20.80 20.80 21.30 20.70 20.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -9.98 0.72 -4.12 110 4200 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 1842 38 343 1558 6 94.80 39 88.68 CHANGED EphshplIhpuGsARStshcAlptA+pG-accA-phlppApctltcAHphQTpLIppEAsG..s+hphollhlHAQDHLMouhsh+-LspEhI....cLa+c ............lshplIhtuGsARSphhEAlpt.A.+p..G..-.F..p.pAcphlppApp.s.lhcAHcsQT.p.LlppEA..sG...s.c.h..p..lollhlHAQDHLMTohhhp-LhcElI-La+c................. 0 79 164 248 +4055 PF03714 PUD Bacterial pullanase-associated domain Yeats C anon Yeats C Domain Domain is found in pullanase - carbohydrate de-branching - proteins. It is found both to the N or the C terminii of of the alpha-amylase active site region. This domain contains several conserved aromatic residues that are suggestive of a carbohydrate binding function. 25.00 25.00 26.00 25.40 24.80 24.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.50 0.72 -3.88 39 1526 2009-01-15 18:05:59 2003-04-07 12:59:11 9 80 727 16 186 1138 23 104.40 26 14.47 CHANGED psplplHYpR....sc..usY-sWsLWlW.s-ssssss............hs.......pshshstp-c.YGsahslpLspsspp....lGFllpps...sspD.........usD+hlsh....ptsptlWlhpGspplahsps .................s.shhRlHYp+..........ss..usY..........-shuLWhW....s-spsssp..................tWs........tshph.stpDc.YGtYhcl..pl...s..s..s..t..sp..........lu...Fllpss..........ptps......hstD.......hpl.....c...hh.....t........shs..p...l...Wlh-sDtplYhp........................ 0 48 96 146 +4057 PF03829 PTSIIA_gutA PTS system glucitol/sorbitol-specific IIA component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 26.60 25.80 24.20 22.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.51 0.71 -4.42 35 1216 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 908 2 104 427 58 117.30 39 95.66 CHANGED Msh.....lYpoplspIGs.Apphlp-.pMl..IlFs-sA.Ps-Lt-aChlHphs..p...hpsslpsGshlplsspsY.ITAVGslAppNLcpLGHlTlpFDG.spps-hPGolalps...ps..sslphGs..pIp .......Mph.lYposIscIGspAp-hLs-...pMl..IhFsE.s.A.Ps-Lc-aCaIHspu.....p....hpss..lpsGsphslup.ppYslTAVGslAppNLc-LGHlTLpFDG...hs..-..s..chPGsl+..Vss.....ss....s..cIssGshl.h................. 0 31 55 83 +4058 PF03830 PTSIIB_sorb PTS system sorbose subfamily IIB component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.70 21.70 23.30 22.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.84 0.71 -4.35 122 5348 2009-09-11 23:04:30 2003-04-07 12:59:11 10 6 1539 14 422 2069 35 149.20 34 73.55 CHANGED pIshsRlDsRLlHGQVustWspphssspIlVlsDplA...pDclpcshlchAs.PsGlphplhslpcslcshpssp.hssp+lhllhcsPpDslcllcuGl....slcp..lNlGshphpp....G+cpls..p.slslsppDlpsh+cLppp.Glc.lplptlPs-spt...s ............IsLsRIDsRLIHGQVustWspph.ssspIlVss.DpVA.............pDslp+sll.chu.s..PsGlphplhslp..Kslc.shps.sp..h..sp.p..+lhllhcsPpDshcl.l.-.u.Gl....s.l....c.p.....l...NVG..sM.......uh..p.p......G.+p..p..ls......p.slslsccDlpshccLppp.Glc.lplptVPsDst........................... 0 108 241 337 +4059 PF03209 PUCC PUCC protein Mifsud W anon Pfam-B_2839 (release 6.5) Family This protein is required for high-level transcription of the PUC operon. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.31 0.70 -5.75 11 317 2012-10-03 03:33:39 2003-04-07 12:59:11 10 5 173 0 127 1156 603 383.60 31 84.17 CHANGED TLNRVMIVELAVPAslVulMlALPhLhAPFRsLhGa+SDo+tSALGa+RsPYIWhGolh.hGGhAlMPFAlllLos..ps...tsPtWhGhhsuulAFLhlGsGlHhsQTsGLALAsDluscEsRP+VVGLhYVMLLlGhlloAlhhGhLLss...................as.u+LIpVlpGsulsshVLNllAhW+.Estp.tps.....t.cscpcPsht-AWtphs.upspAhphLhslhLGThuFsMpDVlLEPYGGpVlsLsVupTTpLTAhhuhGsLlGFhhuuhsLupGhcshphAshGshlulsuFhhllhuuhhs..s..hslFhsGshhlGhGuGlFutuTLTusMpLAstspuG...lALGAWGAsQAoAAGluhhlGGslRDllpths.........t.suhuYshVaulEhhlLllolhhhssllcsptt .............................................................................................................................................................................................TLNRVMIl.E..LulP.Ahlsu.hh.lul...hh.h.uP..h..R.shhGatSDspt.hhGh..+R..sPa.Ihh..Gshh...h..s.G..hh....l.h....s.....h.....u..l...h...h...lu..s...................sh.h.....h...Ghh.h.usluF...l...hh...G..hGl.....p..ss.pT.shLA.....LhsDhs....s.t...c....p.R.......s.p.ll.ullas.M.......h....l..h.Gh.h..l..o......Ah.lhG...t..l..Ls.s.........................................................a.o......t..+.L....lp...V...l..p.s....su.......l....l....s....h..s....L....s.....h....l.A..lW..t.Et...+..tstts.................ttsp..p....s....Ft...p...s...h.t..phh......spsps.....tph..hhh.l...hl.us.hu.a.......h..Q.D.l...l......L......E...PY.G.Gp.l...huh..o...l...u.p.TT.p.....Ls.....A.h.h.u...s...G....s.Ll..Ghh...h....s....u....h...h....l..s.....t...t.h.s........h...p...h....s...t....h...G..s..h.h..u....hhu...h...s...h...l..l...h....u..u.hs.........s......sh.hhtsu...s..hll..Ghu.sGlaususl..os.h.M.sl..s...s...t.....s......p.....sG.......lhlGsWG....A.s....Q..AhAtGluh.h.l.GGs.l.tD.......ls..p...thss................s..sshuY.u...hVat.l..Ehhlhhs.u.lhhlh.lsht...t......................................................................................................... 1 27 64 92 +4060 PF00806 PUF Pumilio-family RNA binding repeat Eddy SR anon [1] Repeat Puf repeats (aka PUM-HD, Pumilio homology domain) are necessary and sufficient for sequence specific RNA binding in fly Pumilio and worm FBF-1 and FBF-2. Both proteins function as translational repressors in early embryonic development by binding sequences in the 3' UTR of target mRNAs (e.g. the nanos response element (NRE) in fly Hunchback mRNA, or the point mutation element (PME) in worm fem-3 mRNA). Other proteins that contain Puf domains are also plausible RNA binding proteins. Swiss:P47135, for instance, appears to also contain a single RRM domain by HMM analysis. Puf domains usually occur as a tandem repeat of 8 domains. The Pfam model does not necessarily recognise all 8 repeats in all sequences; some sequences appear to have 5 or 6 repeats on initial analysis, but further analysis suggests the presence of additional divergent repeats. Structures of PUF repeat proteins show they consist of a two helix structure [3,4]. 21.20 18.60 21.20 18.60 21.10 18.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.47 0.73 -7.78 0.73 -4.18 50 6866 2012-10-11 20:00:59 2003-04-07 12:59:11 14 49 324 360 4462 7032 23 33.70 25 20.65 CHANGED phpsplhpLsp.cpaGshllQ+hl-h.tstpptphlh ............splhpLsp.DpaGshVlQ.....+hl-t..ss.ppt.................... 0 1426 2555 3719 +4061 PF02245 Pur_DNA_glyco Methylpurine-DNA glycosylase (MPG) Bateman A, Mian N anon Pfam-B_3352 (release 5.2) Domain Methylpurine-DNA glycosylase is a base excision-repair protein. It is responsible for the hydrolysis of the deoxyribose N-glycosidic bond, excising 3-methyladenine and 3-methylguanine from damaged DNA. 21.00 21.00 26.40 21.00 19.50 18.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.20 0.71 -5.14 149 1752 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1672 8 481 1305 356 183.20 36 88.25 CHANGED spp...Fa.spsshtlAccLLGphLlpp..t....st.lsG........pIVETEAYhG......sD.AuHuapG.pTs.........RspsMFGssGphYVYhhYGhHaChNlVstscGh.usAVLIRAlEPlp.Ghph....hp.....................tpR.h................t..................................ppLssGPG+LspALuIsts.tsGtsLss.......ss........................lhl...........sss........hss..........tp..l.ssusRIGIs....cus-hPWR.Faltussh .....................tFh.spsshpl.A+pLLGt..hLhtp.....s...t........tt.hsG........hIVEsEAYhG......s.DtAuHuatG...pTs.........RspsMas...sGplYlYhhaGhHhhlNlVsp.s.c.G.h.spuVLIRAlEPhp.Ghph.......ht..................................................................tpRtt.........tt..........................................thpLssGPGKLspALu.Ishp.hsGtsLts...........ss..........................................lhl.............ppt....................................t.s..tp..l.hsusRIGIs.....csschPhRahlpGss................................ 0 157 292 393 +4062 PF04845 PurA PurA ssDNA and RNA-binding protein Mifsud W anon Pfam-B_4535 (release 7.6) Family This family represents most of the length of the protein. 24.60 24.60 24.70 24.70 23.80 24.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.46 0.70 -5.23 6 296 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 121 4 173 276 3 177.70 43 73.55 CHANGED pQELASKplcIQsKRFYLDVKQNsRGRFlKIAEVGsGG......pKSRlhLShsVAsEh+DpLucF.-aYApLu.....ppttp..pp.......................pptLKSEhllRDsRKYYlDLKENpRGRFLRIpQTshRG.........s.spcQpIALPAQGhIEFRDALscLI--YGss........p-..uELPEusslpVDNKpFaFDVGSN+aGVFhRlSEVK..ssYRNSITVPhKsWs+FscpFscYsEcM .......................................................ppLAo+.lplQpKRFYlDVKps.+GRFlKlAElh.st..........................p+u.plhLuhssA.th+p.Lsph.chhspls..........t.t....tt.................................hLKo-hl.p-pR+YYhDLKENtRGRFLRlp....Qshs.s.....................tttppIslPAQGhlEFRDALspLl-caG....................tt....-....LP.....Eupsl....p..V..D.sK.pFaFDlGsNch.GlFlRlSEV+....ssa.RsoITlP.....h.....csWscFtphhscas-c....................................... 0 40 74 120 +4063 PF02700 PurS UPF0062; PurC; Phosphoribosylformylglycinamidine (FGAM) synthase Mian N, Bateman A anon COG1828 Family This family forms a component of the de novo purine biosynthesis pathway. 21.30 21.30 21.30 21.30 20.70 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.58 0.72 -4.03 102 1833 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 1819 26 504 1114 1244 77.80 38 68.30 CHANGED ac.scVhVoL..KsuVLDPpGpAlcpALppLGaps.VpsVRlGKhlElplpss.spppAcpplcchC-+L.LANPVIEsYch.-lpc ...........lpVpVpLKsuVLDPQGpAlpp.A.L.p.p.L....G..ast.Vpc.VRhGKhh..-lpl-s.s...st.p..tscsplcphs-cL.LANsVIEcYphcl.......... 0 167 348 442 +4065 PF00855 PWWP PWWP domain Bateman A anon Bateman A Domain The PWWP domain is named after a conserved Pro-Trp-Trp-Pro motif [1]. The domain binds to Histone-4 methylated at lysine-20, H4K20me, suggesting that it is methyl-lysine recognition motif. Removal of two conserved aromatic residues in a hydrophobic cavity created by this domain within the full-length protein, Pdp1, abolishes the interaction o f the protein with H4K20me3. In fission yeast, Set9 is the sole enzyme that catalyses all three states of H4K20me, and Set9-mediated H4K20me is required for efficient recruitment of checkpoint protein Crb2 to sites of DNA damage. The methylation of H4K20 is involved in a diverse array of cellular processes, such as organising higher-order chromatin, maintaining genome stability, and regulating cell-cycle progression [2]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.28 0.72 -3.78 273 2569 2012-10-02 16:56:36 2003-04-07 12:59:11 12 117 297 46 1417 2476 10 92.50 23 11.99 CHANGED hpsG.-lVWu....K.h...cGa...P..hWPuhlhs.t......................................pt......t......t...p...........tt.........tsph.............................h....VhFFu...sp.p......a..u.....al..p..t.pp.lhsap......pt..p...............pp............h...pp.................p............p...cp..................psh...ppAlppAt...pt .....................................................u-lVWu....K.h......cGa.........P...aWPAhl...hs.........................................................................t...t..............p.........tt............tsph.............................h..VhFFG.......sp..p..............h..u.....al..s..........p.p.lh.sap.....ppp..................p.p............h.tt..................t............p..pp....................ttappAltch...t............................................................................................................................. 0 323 550 943 +4066 PF02436 PYC_OADA Conserved carboxylase domain Mian N, Bateman A anon Pfam-B_628 (release 5.2) Family This domain represents a conserved region in pyruvate carboxylase (PYC), oxaloacetate decarboxylase alpha chain (OADA), and transcarboxylase 5s subunit. The domain is found adjacent to the HMGL-like domain (Pfam:PF00682) and often close to the biotin_lipoyl domain (Pfam:PF00364) of biotin requiring enzymes. 20.60 20.60 21.20 21.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.28 0.71 -4.88 139 3341 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 2687 38 837 2634 637 192.00 34 23.79 CHANGED VhhaplPGG.hoNLtsQhcp.GhtD+hc-VhcphscVpc.lG.lshVTPoSplVGs.Alh.Vhs.......................sc.+hhshsppVhsahcG.hGpPPushscclpp+lL.ps..ccs..osRPu-hLpP.-h-plcpElpp.h............h.............sc-DlLoYsLaPpVstcFhct+pphssht.hss....................hhhshphspphp...lcl-..Gcshhlcl ........................................lhhtplPGG.....hoNLppQh+p....Ghs-+a--Vh.c.thscVpp.hG.lshVTPoSplVGs.Alh.Vhs.......................................sc..chhshscplhshhpGch.Gp.s.sus..hs.pc.Lp.p...plL..cu.........p..c..sl....Ts..R....P..............u..-....h..L..pP....-..hc..p..l...c..p-ltchh....t..t.....h......................sc-DlloaAlaPplh.....hcahct......+p......p..h....s..s.h..p....l.sp..t.............................hh....s....t....s..p.php...lplp.Gpthhlp.................................................................................................... 0 304 553 729 +4067 PF03013 Pyr_excise Pyrimidine dimer DNA glycosylase Griffiths-Jones SR anon Pfam-B_1388 (release 6.4) Domain Pyrimidine dimer DNA glycosylases excise pyrimidine dimers by hydrolysis of the glycosylic bond of the 5' pyrimidine, followed by the intra-pyrimidine phosphodiester bond. Pyrimidine dimers are the major UV-lesions of DNA. 22.10 22.10 22.50 22.30 22.00 21.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.85 0.71 -4.38 26 421 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 392 7 56 317 25 114.10 37 90.02 CHANGED MphhppsLlscLscppLLupaREhstl..............+ssuhtpcHhTl............shhFhpp.hhLhphathlhcEMpcRG...YpsstpWhs............tpapGchsPs....hhshpclttphPhhsc+.p.Yhpps .................Mphhpp.hhlscLscppLLupHREhsul..............+usuhs.p+hTl............shhFhtpshhLhphHthlhpEMppRG...Yps.s.p..Wh..-..................paR.uchpPs....h.shtcl.....t.p....hshhscast.Yhtt................................................ 0 21 32 43 +4068 PF01948 PyrI Aspartate carbamoyltransferase regulatory chain, allosteric domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain The regulatory chain is involved in allosteric regulation of aspartate carbamoyltransferase. The N-terminal domain has ferredoxin-like fold, and provides the regulatory chain dimerisation interface. 25.00 25.00 46.80 45.80 18.50 17.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.25 0.72 -3.96 60 1268 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 1241 116 234 645 96 95.50 52 59.80 CHANGED cpcLpVpsI+sGTVIDHIsAGcu........LpVLclLs.lp..ssstploluhNlsSp+hG+.KDIlKIEsph.Lscc-lsplALlAPpATlNII+-YcVVcKhplpl ..c.pcLpVEAIKpGTVIDHIPAphG........hKlLpLF+Ls...cocpRlTIGhNLPSsch..G+..KDlIKIENsF.......Lo--plspLALaAPpATVNhI-NY-VVpKp+....... 0 65 137 188 +4069 PF02748 PyrI_C Aspartate carbamoyltransferase regulatory chain, metal binding domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain The regulatory chain is involved in allosteric regulation of aspartate carbamoyltransferase. The C-terminal metal binding domain has a rubredoxin-like fold and provides the interface with the catalytic chain. 21.80 21.80 21.80 23.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.08 0.72 -4.53 70 1294 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 1268 116 241 677 90 51.10 47 31.64 CHANGED cplpGl..l+CsNsNCITs..s.EPVpopFtV..hp.pshpL+CcYCE+hhscpclhp .........-cIssV..ltCPNsNCIo+...s.EPV.s.S..s.FtV...........pcsscltLKC+YCEKcas+p.V..t..... 0 70 143 195 +4070 PF01243 Pyridox_oxidase Pyridoxamine 5'-phosphate oxidase Finn RD anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.76 0.72 -3.97 141 8674 2012-10-02 11:35:36 2003-04-07 12:59:11 15 54 3619 83 2886 8805 4127 89.20 19 47.13 CHANGED lsp..php....phlp..p................phssLAT....ss..t-..GtPpsphhh..hhhstsp...........hhhhhsstpu......p+spsltpsPc.lulhhh.tp.........h..t.ptlp...l..pGpuchlsctp ............................................thtthlt..ps...............ps..hh.lA.T................ss......tc..........G.....p......P....p.....s.....phhh......ht..h.hstp...............................hhh..h..h...o...s..t..t.u.........................p+....s....p.p.l..........p....p......s......P....p....lu..l.......h..hh..p.................................ht.ptlp.....l.....pGp.uphlpt..................................................... 0 870 1911 2492 +4071 PF00282 Pyridoxal_deC pyridoxal_deC; Pyridoxal-dependent decarboxylase conserved domain Finn RD anon Prosite Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.97 0.70 -6.02 11 7254 2012-10-02 18:26:03 2003-04-07 12:59:11 14 35 3273 90 2360 6947 942 298.70 24 67.83 CHANGED PGYL+t...hlPtsAP.csEshppIhpDhcchlhPGlopWpsPpFauaaPsssSh.uhlu-hLssuINssGFoWtsuPAsTELEhlshcWluchltLPttFhtp.t...GGGVlQsouSEusLlullAARp+hhpch+tps........huKLVsYsSDQsHsSl..cKAutlusVc...h+hl.scp..phthpspsLccAIEcDhppGhlPhaVsATlGTTsssuFDslpcluslspc......aslWlHVDAAYAGoAhICPEaRp...hhpGlEpADShshNPHKahlshhDCoslWV+-cstL.pshphss.YLppsp...shssDhtcapIshuR..+hculKlWhllRuaGlcsLpspIRcphphAphhpshlppDsR.FElss...chthuLVCFRlK .........................................................................................................................................................................................................................t......h........................................................sh.s.....h..h.p..h.......P.t...h.s.....lE.......hhlshl.s.p.h.h.....t..h...s............................................us.Gs....hp..s.u.u.o...pus.hhu.....h....h.........s.h+......t......p.......h......h.....t......p......h...t.t...t..t..............................................htc.....s....hh.s..u.pp..s.H...h...s.h....cK...s...u....t...h....h....s...lt..........................lp....l....s....st.........pt...t.h.p...............h..t..s...l.c...p.t.....l...p....................t...........s........p......p..............h................................h.......l................l.....u...o..h..G.......oT......sG.....s.h.....D.....s.....l....p....t....l....s..s....l...t..p.c...............................h..s...l...a...lH.V......DA..A.h.u...G.....................h..........l......s......s........hp...........................h.ht.l.....p...........t.......s.c..S......lshss..HK.......ah.h.ss.....h.ssu...hh...h..h.+.p....p....h...h......p..........t..h........h...p...s........Y..L..t...................................................................................................................................................p.......h..h.......p...h.......tl...h.u.R................t...h...h...p.ha...hhc....h..G..h...p...s...htthhpp.........t...........ht.......hAthht..p.....l....t....t..................h..lh............h.s.....l..h....................................................................................... 0 695 1263 1924 +4072 PF00719 Pyrophosphatase Inorganic pyrophosphatase Bateman A anon Pfam-B_613 (release 2.1) Domain \N 21.90 21.90 21.90 22.00 21.70 21.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.63 0.71 -4.78 157 4053 2009-01-15 18:05:59 2003-04-07 12:59:11 14 17 3177 158 1242 2843 708 156.90 40 75.74 CHANGED ssl...lEIPpsop.sKYElDKcsshhhlDR.............hhass.....htYPtNYGalPpTls......................sDGDPlDlLVlss.tshhsGsllcsRslGlLpMhD-.....uttDtKlluVsss...Dsta.p...slpclp-ls.thlppIpcFFcpYKsLc......psKhl..plpsatstctAtchIpcuhppa.pp ..........................................slIEIPtsup...hKYElDK-o.G....t...lhlDR.............hhhou......hhYP.sN.YGalPp..TLu............................tDGDPlDV.LV.ls.s...hP.l.h.P.G.s.V.l.c.s.Rsl.GlLpMtDE........ustDtKllAVPts....c.pa..s.......clcD..l...s...Dl...s....t.....hh...hppIp..HFFcpYKsL-.................tsKaV..c.l.psa.tstctApt.Ihpuhcphp.t....................................................... 0 391 752 1029 +4073 PF02547 Queuosine_synth Queuosine biosynthesis protein Mian N, Bateman A, Eberhardt R anon COGs Family Queuosine (Q) biosynthesis protein, or S-adenosylmethionine:tRNA -ribosyltransferase-isomerase, is required for the synthesis of the queuosine precursor (oQ). It catalyses the transfer and isomerisation of the ribose moiety from AdoMet to the 7-aminomethyl group of 7-deazaguanine (preQ1-tRNA) to form epoxyqueuosine (oQ-tRNA). Q is a hypermodified nucleoside usually found at the first position of the anticodon of asparagine, aspartate, histidine, and tyrosine tRNAs [1,2]. In Streptococcus gordonii , QueA has been shown to play a role in the regulation of arginine deiminase genes [3]. 25.00 25.00 25.30 25.20 21.70 24.10 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.28 0.70 -5.72 110 4206 2009-09-10 15:09:34 2003-04-07 12:59:11 10 5 3721 5 867 3069 1946 327.50 45 97.57 CHANGED pls-FDacLPpELIAQpPsp.....RcsSRLLllcpp..........ss..t....lpcpp.Fp-lschLpsGDLLVhN-T+VIPARLaGpK......t.....oG..................G+lElL......lhch......h...t...........ttt........a.s..hl+su+ch+sGsplhh..........st.......sh.....pupVltp...tpt..sth.llphphpt..........shhphLcchGc.....lPLP.PYIcR...................ts........stt...Dp.cRYQTVa..A....c..csGuVAAPTAGLHFoppLLppLppcGlphuhlTLHVGhGTFpPV+...s-clpcHpMHuEahplspcssptIpps+.tpGtRllAVGTTslRsLE.o.......A......upp.........Gt...........lpshpGcT-lFIhPG..YpF+lVDsLlTNFHLP+STLLMLVSAFuGp-plhpAYpcAlccc.......YRFaSYGDAMLIh ...................................p.lsDFcFcLP-c.L.IAphPht..................pRssuRLLl...l...ctp...................su.....p...l.p...c.p...p.FpDll-..hLp.......sGD...hLVhNsT+VlPARLaGpK..............t.......oG.............................................u+l...ElLl.+............................h...s...scp..........h.s.........hl+.......s.........u........K.............+........h.........K........sG...s...pl.hh...............s-...................................pl.pA.p.hh.tchpp........ttt....lhch.p..ass.....................h.h-lLpplGc.....hP.L.P.P.Y.I..cc...............................................t......-..t........Dp-cYQTVY......A....c...c.....G.ul..AAPTAGLHFocpLLpclcsK.GVchsalTLHVGhGTF...pP...Vc...V-slp-Hc.MHoEahplsp-ssctl..............pt...s...........K.......t.................pG.s............RllAVGT..TSl..RoLE..o....s......upp.........pst...............................................l..p..s..h.p.G..TsIFIhP..G..Ypa+l.VDuLlTNFHLPcSTLlMLVSAF..........u......G.h-.phhsAYcpAlpc.c.......YRFFSaGDAMhI................................................. 0 311 582 740 +4074 PF00788 RA Ras association (RalGDS/AF-6) domain SMART anon Alignment kindly provided by SMART Domain RasGTP effectors (in cases of AF6, canoe and RalGDS); putative RasGTP effectors in other cases. Recent evidence (not yet in MEDLINE) shows that some RA domains do NOT bind RasGTP. Predicted structure [3] similar to that determined [1], and that of the RasGTP-binding domain of Raf kinase. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.91 0.72 -3.50 54 3464 2012-10-03 10:59:06 2003-04-07 12:59:11 18 281 276 21 1924 3208 3 89.70 17 10.59 CHANGED spthl+Vahssts.............hpoltlstssospcVlpthlcKhtl.......sssppatL............hhtptstcc..........hLtss-pPlhhhhph..t.........psphhlppppp ............................................t...hl+lah..ts...s.........................hpslt..ls.pp.sTsp-..V.lptl...lp...+h...........tl..............ss......spp.asLh................................hhtt....ts...tc+.........................................hLt..s.p-psl.hhhhph................thphhlcpt..h........................................... 0 459 669 1233 +4075 PF03528 Rabaptin Rabaptin Griffiths-Jones SR anon PRINTS Family \N 27.20 27.20 27.40 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.52 0.72 -4.13 5 248 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 45 2 100 219 0 108.70 41 30.00 CHANGED KAlAsVSEoTKQEAlscVpRQpQEEVASLQAIlK-Tl.......SuY....EoQasL.LcQE....................RtQhtQspEucERE.lu+L+phLucAp..DsLE+pMKKs+ELs-pL...Kpssp-hEpcI .....................ph.uth..spcQEth.pshp.pQEEsASL.us...lp.-ul..............suY...........EsQaph.LppE...........................................ptQhtQ.pEutpR.......E....ls.cLpp.hLpcup.....-..sLEcpMc....ctQE.t-hL...+p.s.s.pppI................................................ 0 9 16 42 +4076 PF02144 Rad1 Repair protein Rad1/Rec1/Rad17 Mian N, Bateman A anon IPR003021 Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.72 0.70 -5.63 26 354 2012-10-02 11:47:48 2003-04-07 12:59:11 11 7 284 3 227 571 15 238.90 27 75.42 CHANGED lFoAsossscpLhslLpslshhs..c..........AhlploscGl+hssE.cs+slQupsalspslFssYpa....................................................................tt.t.ptshssFplsLsslL-sLsIFGssssss..............................................................sCphsYpGpGsPLslhLE-...sslpTpCclsT...............Y-s-........-sh-lshs.csplhhclIh+uchLpsAlpELctstspplplhsos..p.............pP.......................aFtLsop.GphGp......Spl-asscps.....................................lhEpFplp..........pcshps.Ycauhlc+st+AhtlAsKVslRhDppGlLSlQhhh ....................................................................................................................................................h.u.hsssctl.phLpslt.atp...p............uhh.lopp.Glph.s.....s.E.pu..+sl.Q....upsalp.p.p.lFppaph............................................................................................ppttspFtlsLshLl-sLslFusss.st.........................................................................................................................................shphp.Y.t.u...G.tPLhlhlc-........ssls..s..p.s...plpT....................hts-......................tsh-.hs..hp..ps....s.l.h...chlhp..o.phL+cAh..p..............-Lc.s......s.p....lplhhss...p.........................tP........................hh.plps..Gt..hup.......splca...s...psps..........................................h...h....-...tFpsp................ph..s.Y+hshlc..sh.c.....Aht....hu....sK....l.tlRhstpGhLolQhh....................................................................................................... 0 86 129 188 +4077 PF03215 Rad17 Rad17 cell cycle checkpoint protein Mifsud W anon Pfam-B_2764 (release 6.5) Family \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.80 0.70 -5.82 3 406 2012-10-05 12:31:08 2003-04-07 12:59:11 10 19 278 0 287 3132 1602 329.80 21 56.41 CHANGED lp-DcsE.WaEKYKPpstc-LAVHK+KIc-VcpWLcApsLEsp.+p...ILLIoGPSGCGKSTslKlLSKELGhplpEWlNPssh+pPsN..QcoDF+GhspspSpF.....hSQhEoFsEF.L+uo+Y.hlQhhGcshpscKKlILlEDLP..NpFatDo.+sF+pVIRpaLpSucsh..PLIhlITEh..-sLEGDNNQR+.....oFsu.pIMsKEILp-PRlosIpFNPIAPTlMKKaLspIlspEhph.tGKsKsPK+pSllEhICQuopGDIRSAINSLQFSuS.....KGppNlR.hKcG......hSLcustVL.pLSKScREu........ps.hshpspp.puhhtKDVoLuhFHAIGKVlasKRuossEl.........DSERIsu.............pLpp.pR-ph.............LVEsEsslp.StLSG-lF+LGLaENYlDFs...hoIDDAsslsDhLShuDsLSGDasos.YsLRElSToFuspGshtpN+upsYh.pphtspphps.aKsQh.......................Fhh.t.hhcpC.uhKtha..FYLPuL.shpot.h.hhshho..M+spstIshlpcI...........GhhPL................cR+a ..........................................................................tt........W.pp.a.........tPts....t-..l....s....l...p........c....K....l.....t.p.l...p.p.h..l....p................t..................h...........t......t................t..t...............................lLllp..GPsGsGKo.s.s....l.p.h....L......u....p....p......h.....s..h..p....l..........c.......h...............s..s................................................................................................................................................p...t......a...........t....h.........................................t..............................................................................................................p.............................p....l..l..L...l..-..-hP.........s.h...t.....t....t..hpt.l.p.h....h..............t....t.....................Pllhhlo...................t........................................lh....s.....p....l.....t......p.....t..ht.........l...pFNslsss.hhKhLphl...ht......p..............................................h..s..........l.p..l...st..............GDlRsAl..tsLph.s.....................................................................................................................................................................+-.sl.hF+u.lG+llaspp.........................................................................................................................................................................................pt.h..........t.....h..hl.hpN...h...................h.p.....h...h..p....hu.sD..h.................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 96 159 246 +4078 PF04824 Rad21_Rec8 Conserved region of Rad21 / Rec8 like protein Kerrison ND anon Pfam-B_2686 (release 7.6) Family This family represents a conserved region found in eukaryotic cohesins of the Rad21, Rec8 and Scc1 families. Members of this family mediate sister chromatid cohesion during mitosis and meiosis, as part of the cohesin complex [1]. Cohesion is necessary for homologous recombination (including double-strand break repair) and correct chromatid segregation. These proteins may also be involved in chromosome condensation. Dissociation at the metaphase to anaphase transition causes loss of cohesion and chromatid segregation [2]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.56 0.72 -4.78 16 498 2012-10-01 19:44:35 2003-04-07 12:59:11 11 5 266 4 328 500 17 52.40 30 8.40 CHANGED ssspt.phspls.supsR+pAA+hFaphLVLpsp..phIplcQ.pcPYu-IhlpssPsh ...............s.......hptls..psss.R+.pAAphFaphL....VLtsp..pslcl.pQ.....p.....ps.....au.cIhhp.t..................... 0 93 164 254 +4079 PF04825 Rad21_Rec8_N N terminus of Rad21 / Rec8 like protein Kerrison ND anon Pfam-B_2686 (release 7.6) Family This family represents a conserved N-terminal region found in eukaryotic cohesins of the Rad21, Rec8 and Scc1 families. Members of this family mediate sister chromatid cohesion during mitosis and meiosis, as part of the cohesin complex [1]. Cohesion is necessary for homologous recombination (including double-strand break repair) and correct chromatid segregation. These proteins may also be involved in chromosome condensation. Dissociation at the metaphase to anaphase transition causes loss of cohesion and chromatid segregation [2]. 20.80 20.80 20.80 22.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.44 0.72 -4.11 49 651 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 292 0 433 644 1 112.00 34 17.91 CHANGED MFaoptlLs+.cGsLuplW.........LAAphpp+.....Ls+pplhpsslsposc..........................................pIhp.............Ppss............huLRlSGpLLhGVVRlYu+KspYLlsDspcsh.+l+hshp.s.........thsh..............ttpsshsslsls .......................................MFYsp.lLs+..pGPLuplW.........LAAp.hc..+K.......................LsKs....p...lhpssltpos.-...............................................pIlp...........................P.p.s....................hALRh..SGpLLl...........GVVR...........IYs+Ks......cYLLs.Dsscshh..+l+hsa+.s.........s.h-.ls.......t.................................................................... 0 128 232 348 +4080 PF03835 Rad4 Rad4 transglutaminase-like domain Bateman A anon Bateman A Domain \N 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.92 0.71 -4.79 41 550 2012-10-10 12:56:15 2003-04-07 12:59:11 10 22 265 6 403 698 3 136.50 22 18.13 CHANGED pslptsphostcpppsp.................tppphtspch.hssaWsEVa...sp................tpc+WlsVDshst...........hhpph-sh...sppspps.hsYVlAass-sss+DVTpRYst.p.hsups++hRls..................tpWa......cpllp......app.pppc..................................DphE-tphp ........................................................................................sp..............................................hs.hhWsEVa.....sp................tpp+WlsV.Dshph........................................sh.pPh..........tpshp.....p...hsYVlAap.s.c...s........ss+DVTpRYsp..p.h..stsc+tRls...........................tpWh.......pp..s.lp............hpp.thpp..................................-phEp.ph.................................................................. 0 105 204 338 +4081 PF04098 Rad52_Rad22 Rad52/22 family double-strand break repair protein Aravind L anon Aravind L Family The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to Rad52 [1]. These proteins contain two helix-hairpin-helix motifs [1]. 20.30 20.30 20.30 20.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.98 0.71 -4.49 35 602 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 410 33 261 527 127 137.00 28 43.84 CHANGED chpplQspLc+.lss-alotR.GssG.p+........lsYl-uapllsLANElFGFNGWuoplhslpsc................ah-pptt.t...................+hslshsshVRlTLKDGT....a+EshGhGpspshcsKutAapKAKKEAsTDALKRALhs.FGsslGpslYD+phhtp..hsphpps..shch ...........................................................................tt.Lcp.hssE.lp.Rht...tu.u.p+............lsY....l..suctlhphhN-lFGas..GWpsplp.slss............................................................................hshsshsclo.l+...s.so...........................u...ths.....pph....cut..t.....s..........c.......A.hKtuhosuhKR.Ahhp.aG..lGphlYshp.hht.......................................................................................... 0 80 143 209 +4082 PF04139 Rad9 Rad9 Wood V, Finn RD anon Pfam-B_28077 (release 7.3); Family Rad9 is required for transient cell-cycle arrests and transcriptional induction of DNA repair in response to DNA damage. It contains a Bcl-2 homology domain 3 (BH3) [2]. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.50 0.70 -5.26 16 382 2012-10-02 11:47:48 2003-04-07 12:59:11 8 8 271 3 251 862 61 232.50 24 56.82 CHANGED hu+AlpsLSRlGD-LalEsscctLsL+slNSS+SAaushhFss.FFppY.........st.sssshpC+lthKulLslFR......................uhsthptsVE+Cpltl....pspps+.......................................llhplhCKpGlpKTaplsappspsl.pAlaspspssshlphss+lLs-hlsaFspsh-ElTl...........sssp.t+..Vhl+oasE-shct.........p.cshpTplslcs.-EFcpaplstc.................scITFslKEFR...........................uhLtaAEshsssluhaFspsG+Phhho....hs..c.phlEupFlLATl .........................................tcsl.sLu+h.ucpl.lp..s..............t.....p....t.......LtlpslNso..+SuasphhF.t......FF.ppa.................................tt..ttt...h..p.Cp.l.h..K.u.hh.s..lF+....................................................................................s.t...ppsl-psplpl.................pspps.+..............................................................................l.h.hph.h...s...c......p.G....l...h..K....o.....a.....pl..s.h..............p..................ts......p.....sh..p.................s................hh................s.p...p....t..t...s........s.h.....h.t...hps............+..hLtch.lt..pF..ss.....s...h.....p..-lsl...................................thss..pp.......l.hp....s..at...cp...h...st..............................................p.pshtTphs....l.s....c-....F..p..pa...plttp.............................................hplsF.slK-h+...........................uh...ls...a......u-......s..h..p........h.......s.............ls........hhas.tsGcPhhhs.....hp......p...hpsphllhT..................................................................................................................... 1 84 135 195 +4083 PF04002 RadC RadC; DUF2466; RadC-like JAB domain Kerrison ND, Finn RD, Iyer LM, Zhang D, Aravind L anon COG2003 Family A family of proteins present widely across the bacteria. This family was named initially with reference to the E. coli radC102 mutation which suggested that RadC was involved in repair of DNA lesions [1]. However the relevant mutation has subsequently been shown to be in recG, where radC is in fact an allele of recG [2]. In addition, a personal communication from Claverys, J-P, et al, indicates a total failure of all attempts to characterise a radiation-related function for RadC in Streptococcus pneumoniae, suggesting that it is not involved in repair of DNA lesions, in recombination during transformation, in gene conversion, nor in mismatch repair. Computational analysis, however, provides a possible function. The RadC-like family belong to the JAB superfamily of metalloproteins [3]. The domain shows fusions to an N-terminal Helix-hairpin-Helix (HhH) domain in most instances. Other domain combinations include fusions to the anti-restriction module ArdC, the DinG/RAD3-like superfamily II helicases and the DNAG-like primase. In some bacteria, closely related DinG/Rad3- like superfamily II helicases are fused to a 3'-5' exonuclease in the same position as the RadC-like JAB domain. These conserved domain associations lead to the hypothesis that the RadC-like JAB domains might function as a nuclease [3]. 20.60 20.60 20.60 20.60 20.50 20.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.13 0.71 -4.43 107 5120 2012-10-10 14:49:21 2003-04-07 12:59:11 10 20 3434 8 980 3486 385 118.30 39 57.04 CHANGED tlsssp...plhphlp...plt.....stpp....EphhllhLsspspllthpplsp...GolspshVpPRElh+pA.lptsAsulIlsHNHPSGsspPSppDhplT....c+ltpAspllsIpllDHlIl.u.......pspahS.ht-p.Ghl ................................................................h.hposptstcalt.....phs.....s.h.p.p.........EtFhlLaLsspsplIt.tcp.l....Fp....G.Tls...ps.V...a.PR....E....ll...+pA.lp.t.sAuu...l.ILsHNHPS..........Gs..s......p....P..S..psDhtlT....c+lhcAspllsIclLDHlIl..G...........ps.p.hhS.atEcGh....................... 0 330 632 813 +4084 PF04712 Radial_spoke Radial spokehead-like protein Mifsud W anon Pfam-B_5891 (release 7.5) Family This family includes the radial spoke head proteins RSP4 and RSP6 from Chlamydomonas reinhardtii, and several eukaryotic homologues, including mammalian RSHL1, the protein product of a familial ciliary dyskinesia candidate gene [1]. 22.70 22.70 22.90 22.70 22.10 22.50 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.60 0.70 -5.75 22 369 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 118 0 235 361 8 295.50 22 75.46 CHANGED hptplpps...+AaL..ppsspsshsLY-HLsplLs+llc-cPps.uhDl..hEshStpl+pspa............tppcs.ts..phtsshchspppppLFtpst...............................p.p.............tcthscs.lssl...............hpt.shaappuGlGLuc-Esa+lhhAlKpLsc...pcslpssRFWGKIhGhptsYalAEsp..hccsEcptc.tt.t.t.t..ph..t.ttt.................................................lPhEtsts.....GsN+asYaVsspsu.s.sWs+LPsVTPtQIhsuRpI++hFTGcL-AsVho.aPsFs.............GsEtsYLRAQIARISuuTpluPhGh...Y..............phpE-Et.ttppt.tt.p............ph.csP-acslps............ph...phssWVHhhtaILsQGRssah...........tpptE-c...............-E-c-ccc--.cEs............c.EhGssLLsslscD................................................t.tlsst.......ssWoh+hsu.shhs..................paulsVl+SthWPGAashu..s....u++apslYlGaGhKhssp.sasP.....s.PPPstpEass....lsEhpDPos-EEtthch......sp-psp ................................................................................................................................................h............................................................................................................................................................................................................................hht.sGhsls.pc.h...l.hulh.l.p...p....h..tphhFWG+lhGhp..tsYhlsps........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..c...................................................................................................................................................W................................thhhhps.hW.Ghhhh............tt........h..hYhG.G.p.......................................................................h........................................................ 0 110 129 177 +4085 PF03089 RAG2 Recombination activating protein 2 Griffiths-Jones SR anon Pfam-B_4702 (release 6.5) Family V-D-J recombination is the combinatorial process by which the huge range of immunoglobulin and T cell binding specificity is generated from a limited amount of genetic material. This process is synergistically activated by RAG1 and RAG2 in developing lymphocytes. Defects in RAG2 in humans are a cause of severe combined immunodeficiency B cell negative and Omenn syndrome. 19.40 19.40 19.40 19.60 19.20 18.80 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.13 0.70 -5.72 18 5792 2012-10-05 17:30:42 2003-04-07 12:59:11 9 5 4451 0 28 5451 1 268.30 58 91.61 CHANGED KpsELKLRPloFSNDSCYLPPLRpPAlspl.ssp-u-sppYLIHGG+TPNNELSspLYlhohsS+u.NKKsoLsCpEKELsGDlPcuRYGHTlsVVaSRGKohsVlFGGRSYhPsGQRTTEsWNSVVDC.PpVFLlDLEFGCCTSahLPELpDGhSFHVSlARsDsVYlLGGHoLpossRPPpLa+LKVDL.LGSPsloCTlLsuGlSlSSAIVTQsus..cEFlIlGGYpS-sQKRh.CNolsL-DssIcIppREsP-WTu-IKHSKTWFGusMGpGuVLlGIPu-sKptssDup.aFYhlsFtp-c-t.....stQsCSQEST.pD.EDSsPLEDSEEFYFupE .........................................................ts.....pYlIHGG+TPNNElSsplYlhohss+s..N+KlThpCpEK-LVG-lPtARYGHolsVVaSRGKohsVlFGGRSYhPsupRTTEpWNSVlDC.PpVFLlDhEFGCsouahLPELpDG.SFHlulARpDslYhLGGHSLssssRPPpLaRl+V-L.LGSPsl..oCsl.L.ssGlSlSSAIl.Tps.....u.....s........cEal.IlGGYpSDsQKRM.CshlsL--stIchp.hEsPcW..TsDIpHS+hWFGushGpGshLlulPstsp....s-s..ahY.lpht......t............................................................. 1 2 4 11 +4086 PF04901 RAMP Receptor activity modifying family Finn RD anon Pfam-B_5615 (release 7.6) Family The calcitonin-receptor-like receptor can function as either a calcitonin-gene-related peptide or an adrenomedullin receptor. The receptors function is modified by receptor-activity-modifying protein or RAMP. RAMPs are single-transmembrane-domain proteins [1]. 20.40 20.40 20.70 21.50 19.80 19.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.45 0.71 -4.33 19 191 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 42 22 86 148 0 108.10 39 71.27 CHANGED tthtphChppFpccMcsls.phWCsWsphhp.YppLosCTchhAchlsCaWPNshs-cFhhtlHppaFpNColsththpDPPsslLhshIllPIhlTlhhsuLVVWRSKco-uhs ............................hh...phChptFptpMppls..ptWCsWs.ph....h...t..YppLopCTchhAptlsCaWPN..shs-pFh.htl.HppaFpNCs...l.s.th.thpDPPsslLhshIllPlhlThhh.suLVVW+SKcs-s.h...................... 1 9 17 37 +4087 PF00638 Ran_BP1 RanBP1 domain Bateman A anon Prosite Family \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.41 0.71 -4.07 8 1512 2012-10-04 00:02:25 2003-04-07 12:59:11 13 83 333 19 988 1495 10 115.30 30 23.31 CHANGED EVKoGEEDEEsLFppRAKLaRFDs-s..ppWKERGlG-lKILKpKcs.sKlRlLMRRDplLKlCANHhIossMpL.KPhsGS-RualWts.uDaADu-uKsEpLAlRFKspEsA-pFKppFEEuppt ......................................ltTGE.E..s..E.c.slap.h.+.uK..L.a..ca..............s.....p....ps...............pp..W.....K....ERGhGsl.+..l......L.............c.............p......p.........p......s.....t.........p..........s.........R.llM..R.p-..........p.s.....h.............+...lh..hNphl...hs...s...h.pl....p......t.........ss..p...................c...u.hh.a........ts......hD......h..u...-..t...p....s...c...................s..............pp..h..sl+h....ts....t.-......Apphtphhpch................................................................. 0 331 490 762 +4088 PF03085 RAP-1 Rhoptry-associated protein 1 (RAP-1) Mifsud W anon Pfam-B_1750 (release 6.4) Family Members of this family are found in Babesia species. Though not in this Pfam family, rhoptry-associated proteins are also found in Plasmodium falciparum. Indeed, animal infection with Babesia may produce a pattern similar to human malaria [4]. Rhoptry organelles form part of the apical complex in apicomplexan parasites. Rhoptry-associated proteins are antigenic, and generate partially protective immune responses in infected mammals. Thus RAPs are among the targeted vaccine antigens for babesial (and malarial) parasites. However, RAP-1 proteins are encoded by by a multigene family; thus RAP-1 proteins are polymorphic, with B and T cell epitopes that are conserved among strains, but not across species [1,2,5]. Antibodies to Babesia RAP-1 may also be helpful in the serological detection of Babesia infections [3]. 25.00 25.00 55.30 48.50 22.00 18.70 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.76 0.70 -5.03 12 102 2009-09-11 11:32:32 2003-04-07 12:59:11 10 3 9 0 17 101 0 195.70 39 57.46 CHANGED scoLtssspshpsttpppchspsMpp.hs.hssphh-tlCptshcp.spC+ptVssYVsRCpptsCholDshpashptp.psLsLPsPYQL-AAFhlF+pssusPh+pthcphhhRF+psupausY+pFlhsLLp+N...hhhcssssD..l-shls+YhYMsTlYYKTYLsl-phpu+hhN+hsFs+alFuhtI++ALppIl+sNlPccht.chslsclpplssuYtpYhh.sQlPshspFApcFupMVhcsLlpolu ....................................................hpttMt..h...tp...tphCpts.tt.ppCtt.ltsYhpRCtt..hsChTlDshph....t.psLsLPs.aQL-AAFhLF+pSsuN.t+pthcp.hh..........Rh+t...ttpasta+pFlhsLhppN...hhp.p.tssD..l-.thsspahYMsTlaY.KTYL...sls.hsAKhhN+huaopclFuh.tIppsLppll+.NlP.sht.phs.tplpplssuYtpYhh.oQlPshspFAccauphshcsLl.sls................................................................................ 0 2 10 10 +4089 PF00071 Ras ras; Ras family Sonnhammer ELL, Fenech M anon Swissprot Domain Includes sub-families Ras, Rab, Rac, Ral, Ran, Rap Ypt1 and more. Shares P-loop motif with GTP_EFTU, arf and myosin_head. See Pfam:PF00009 Pfam:PF00025, Pfam:PF00063. As regards Rab GTPases, these are important regulators of vesicle formation, motility and fusion. They share a fold in common with all Ras GTPases: this is a six-stranded beta-sheet surrounded by five alpha-helices [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.79 0.71 -4.85 61 21243 2012-10-05 12:31:08 2003-04-07 12:59:11 17 332 1006 663 12839 30754 3385 151.90 29 62.64 CHANGED KllllGDuGVGKSsLllpFspspFsppa.sTIth-.....Fhp+plplD..........ucplcLpIWDTAGQEcacslpstYYRsupGhllVYslTscpSFpplp.pWhp-lpchss..........pslshlLVGNKsDLcp.................................pRtVstpcupphA.......cchs.lhFhETSAKsshNVccsFhpls+plhp .........................................................................KllllG.c.u........u.V.G...K........o....s........L............l...............h...........p.............a..............s.................p..............s...............p..........F..............s...........p............p........h.........................s........T.........l..................h..-..........................h..h...t....p......p...l...p....l..s.......................................s.p..p...l....p..L........p....l.........W................D.............T.........A..................G............Q............E.............c..........a.............p............s..........l...........p..............s...........h................a...........a.................+...............s................u..............p.....u....h.......l.......l.......l.........a....s................l....................o...........s............p.........p..............S..........F..........p.......s............l.............p......p..........W...h...p....c...l.p..p....h.ss...............................ts.h...s..h..l.L....l.......G...N.....K....s......D....L...p.p......................................................................................................................p.+.......V...s.....t......p.....p.....u.....p.....t....h..A.....................c...p...h.......s.....h...............a........h...........E.....s...S...A..p...s.........s..........s.....l...c...p....s...F..phhpth..t..................................................................................................................................... 0 4857 6747 9773 +4090 PF00616 RasGAP GTPase-activator protein for Ras-like GTPase Ponting C, Schultz J, Bork P anon SMART Family All alpha-helical domain that accelerates the GTPase activity of Ras, thereby "switching" it into an "off" position. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.30 0.71 -4.65 115 1769 2012-10-03 21:54:49 2003-04-07 12:59:11 14 106 254 6 1114 1705 9 183.90 26 14.50 CHANGED hlppllp...pEl........................pp.ssp...sslh.R..uNohso+.hlspah+t.....hGppYLcpsLtsh....lpcl.h..ppchsh..ElDPtcl.................................................................................................................................................sppphp........pshppL...tphspphhssIhs.Sh.sp....h.Phsl+hlsppltpplpp+a...t.................................pth..hpsluuFlFLR..FhsP..AI......ls.Pchaslh....p....ts.stptpRsLhhluKll.QslAs .............................................................................................................................................................................................hpphhp....tEl.............................pp...ssp...pplh..R.uN.ohso+.hhppah+h...........hGppYLpps.L..tsh.......................lppl.h........cpch....sh.El.D.Ps+l................................................................................................................................................................................................................................................................................ppplp......................psh.ppL...hthspphhssIhs..Sh..sp......h.Ph....tlRhlhpp.....lpptspp+.a.t...................................................................................p.h......hphluuhl.FLR..FlsP.AI............ls..Pchaslh......p....................p..sspspRsLs...hlAKhlQslAs................................................................................................................ 0 379 541 819 +4091 PF03836 RasGAP_C RasGAP C-terminus Griffiths-Jones SR anon PRODOM Family \N 22.10 22.10 22.50 22.50 21.90 22.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.45 0.71 -4.50 38 469 2009-01-15 18:05:59 2003-04-07 12:59:11 10 30 217 6 318 442 1 133.50 33 10.26 CHANGED shtchKpcslcslpcLEphGhlspp.NpYQp......llspIAp-I+sppphRppcppElctlppohpsLpc+spaLppQlcpYpsYlcsshtslptptp...................tpthhth+php+p.s+t.chGsaKaoAppLtccGVllchp..s.ptp ..............................htcpKp+hhcsLp.pLE..phGh.....lspp.spYQp......lls-IApDI+spcphR..ppRptELtpLppThpsLscKssahppQlchYcsYIcsshs....sLppptt..........................h.t.pthpcp.s.+tp+hsshKYoAtpLp-K.GVLlchpshp..t........................ 0 97 155 240 +4092 PF00617 RasGEF RasGEF domain Ponting C, Schultz J, Bork P anon SMART Family Guanine nucleotide exchange factor for Ras-like small GTPases. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.16 0.71 -4.57 148 3434 2009-09-12 23:24:09 2003-04-07 12:59:11 14 184 292 23 1986 3165 4 186.00 25 19.95 CHANGED hshsst-lAcQLTll-h...cl....app....Ip.....................pE..............................hl..spta.......sp...pp.............................ss.s......lpthlpphNplopaVsspIltp.p.............shccRspllp+aIclA...........p....................p.hcplpNasohhAI.luuLssssltRLcpTWptlspcth...c............h.hpcLppl............h............................ssp..................pNapsY..R.....ptlp...................................ps.....Ps.....................lPalGlaLp..........................DLsalpcu...ss...................shlps............................................llNFpKh..pp ........................................................................h..hsshclApQLThh-h...pl......Fpp....lp..................................................pE..............................hl..t.t.h.a......pp.............pp...t.......................................................................ss.s.........l.pthlppa.Nplo.....h.a...Vs..spIltp..t.....................shppRs.p.l.lp+.a.IclA..p..........................................c..h.p..p...lp..NasohhAI.l..uuLs..s...s..s..l...t.....R.L........c....p.T....Wp..........p.......l.spc.p........p.....................h..hp....pLp..p.l.....................h..................................................................ss.p..................pN..ap...ph....R.......phlp..................................................................phps....ss..............................lP.a......h..........u.........l.h.Lp..........................Dlhhlcpu....s.................sh.h.tt....................................................................................hlNapKh................................................................................................................... 1 658 907 1382 +4093 PF00618 RasGEF_N RasGEFN; RasGEF N-terminal motif Ponting C, Schultz J, Bork P anon SMART Domain A subset of guanine nucleotide exchange factor for Ras-like small GTPases appear to possess this motif/domain N-terminal to the RasGef (Cdc25-like) domain. 28.30 28.30 28.30 28.30 28.20 28.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.50 0.72 -3.92 88 2431 2012-10-02 12:00:53 2003-04-07 12:59:11 15 132 260 14 1433 2250 2 105.10 21 10.42 CHANGED plpuuolp..tLl-tLssp.ph........c..sahpsFlhoa+sFs.ostclhphLhpRa............................h.ssstph.ptt..............................................................hph..+...lhpllppWlppaht...-......apps...............hlpplhpa ............................................ltuuoh-cLlp+Ls.pp...phh...........................-s.salp.sFLhTaR.....sFh.o.s..........pc.Llpt.L..l.p..pa.............................................................................................................................tstt..ptt...........................................................................................................................................................................................hph....+...lhpllp.tWlppa.....-...........Fpts.........hht.h............................................................................................................................................................... 0 411 620 975 +4094 PF01858 RB_A Retinoblastoma-associated protein A domain Bateman A anon Swiss-Prot Domain This domain has the cyclin fold [1] as predicted [2]. 25.00 25.00 25.50 26.70 24.40 24.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.50 0.71 -4.64 7 366 2012-10-03 00:42:12 2003-04-07 12:59:11 12 10 157 16 185 368 1 188.10 38 21.11 CHANGED TPVpoAhsolppLpshlsuh.spPSppLpphhpsCsps.ppslhcRlptlhEhhhpphs.u.ct...........s.phAppRhphA.pLYY+VLEshhpuEtcpLss.shosLLspch....FHcoLlACslElVhtoYps.........s.hpFPalLEshslpsF-F.KVIEsFIRp...EssLsREhlKHLNSlEEplLESLAWppsSsla-hl ..........................................TPV.ssAhpolppLps.hlsu..h..ts.tP.S..ppL.ph..hp.s.Csps.......P...pp...s...l.h..........pRlcplhchappp.hs.....t.s.sttt.................................shphu.ppR...hphAt.tL...YY+lLE..sllppEp.c..................R....L...s.....s..t...................s....h.otLL....p.p.-h....FHpSLlACsLElVhhoYps.....................s.hsFPall-lh.slss....Fc....F...aKVIEsFI.Rs..............Ess..L.sR...-hlKHLsplEEplLESlAWppsSsLaphl............ 0 49 77 127 +4095 PF01857 RB_B Retinoblastoma-associated protein B domain Bateman A, Griffiths-Jones SR anon Swiss-Prot Domain The crystal structure of the Rb pocket bound to a nine-residue E7 peptide containing the LxCxE motif, shared by other Rb-binding viral and cellular proteins, shows that the LxCxE peptide binds a highly conserved groove on the B domain [1]. The B domain has a cyclin fold. 20.70 20.70 22.20 21.10 20.50 19.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.86 0.71 -4.66 6 352 2012-10-03 00:42:12 2003-04-07 12:59:11 15 10 158 15 185 360 5 140.60 39 15.49 CHANGED soLslFa+KVY+LAulRlpsLCp+L.s..........lp-cLccpIWshFcaoLspps-LMhDRHLDQllhCAhYshAKVsp..-hoFp-IhpsYRsQPQApspVaRSVhIct.........p...p............................h..pchsDIIsFYNplalsplKs ...............................................oLslFa+K.l...Y+LAulRLps....LCt.+L.s.............................ls.s.-..lc..cpI.WT.hFcao.L..p.p.s..-L.MhDRHLDQllhCuhYshs.K....Vs..p..............-.h.oFppIhpsY.R...p..Q....P.Qu............psp.V...a.RsVLlcp.........................................................................................pchsDlIpFYNplal.plK............................................ 0 53 81 127 +4096 PF02196 RBD Raf-like Ras-binding domain SMART anon Alignment kindly provided by SMART Domain \N 25.70 25.70 25.70 25.90 25.40 25.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.09 0.72 -4.22 10 710 2012-10-03 10:59:06 2003-04-07 12:59:11 10 34 96 14 293 599 0 70.50 30 9.82 CHANGED phhpVaLPssQpolVtVRsGhol+DsLppslcpRGLssssstVhhht......t+csLshcsctuhLsu.cElhlEhh ....................hhplhLPspppsll.ssRs...Ghol...c-sLpthlcp+G....Ls.p..sssVahht.......pccslshspsh..s...Lsu.c-lhlEhh..................... 0 45 68 156 +4097 PF02033 RBFA Ribosome-binding factor A Mian N, Bateman A anon IPR000238 Family \N 25.00 25.00 26.10 25.40 24.90 23.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.07 0.72 -4.00 183 4543 2009-09-13 23:02:53 2003-04-07 12:59:11 13 5 4463 8 1061 2620 2217 105.00 34 77.59 CHANGED RspRlu...pplp+plupllp..pp.l+..DPRl.........shlolocVclSsDLphAcVal...ol....h........................sp....................ppt..cpshpsLppAsGalRpplucplp..lRhsPcLpFhhD...p....ol-put+lspLl ..........................RspRlupplp+Elupllp....cc..l+........DPRl..............thlTl..ocVclosDLphA+Val...oh..l..s.......................sp...................ppt...cpsh.puLp.c.Ap..GhlRppLG+p.lc..LRhsP-LpFhh..Dp.Slchut+lspLl......................... 0 369 692 897 +4098 PF05025 RbsD_FucU RbsD / FucU transport protein family Moxon SJ, Bateman A anon Pfam-B_4828 (release 7.6) Family The Escherichia coli high-affinity ribose-transport system consists of six proteins encoded by the rbs operon (rbsD, rbsA, rbsC, rbsB, rbsK and rbsR). Of the six components, RbsD is the only one whose function is unknown although it is thought that it somehow plays a critical role in PtsG-mediated ribose transport [1]. This family also includes FucU a protein from the fucose biosynthesis operon that is presumably also involved in fucose transport by similarity to RbsD. 25.00 25.00 27.90 30.40 24.60 23.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.74 0.71 -3.87 133 2765 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 2076 86 395 1291 399 134.10 35 98.06 CHANGED MpcsslLNs-L.pllAshGHsDpllIuDAshPh....s.tspplcLslphsssshhplLcslLsph.l-phlhupphthp....ss...thhsslhpth.........ttsh.lphls+ppF.hcp.s+p..upAllpTGEpssYuNlILpsGVlh ........h+hsslLss-l.plluchGHsDpl.......ll...sDAs........hPh..........s.ts.ttlclslpt.sssshhplLpsllsphpl-s..hh....hA.pphhtp........ss...ph.ht.plhpchtt..............t.tssshpIphl.sH...p...pF.ccp.spc.......upAll.RTGEsosYu.NIILpsGVs...................... 0 101 207 293 +4099 PF02341 RcbX RbcX protein Bashton M, Bateman A anon Pfam-B_948 (release 5.2) Family The RBCX protein has been identified as having a possible chaperone-like function [1]. The rbcX gene is juxtaposed to and cotranscribed with rbcL and rbcS encoding RuBisCO in Anabaena sp. CA [2]. RbcX has been shown to possess a chaperone-like function assisting correct folding of RuBisCO in E. coli expression studies and is needed for RuBisCO to reach its maximal activity [2]. 21.00 21.00 22.40 21.70 20.80 20.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.37 0.72 -3.93 16 676 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 573 77 69 329 10 109.20 67 78.97 CHANGED M-lKpIAKDTAKsLtSYLTYQAVRsVhsQLuETNPshAlWLppFSuptpIQDGEuYLctLhpEsp-LuhRIMTVREHLAc-VsDaLPEMVRsuIQQuNhEHRRQhLERlTQ .........................................MsLKQIAKDTAKTLQSYLTYQALRTVLAQ.LG........ET........N......P.P....L..u..hW.....LpNFSu..GKI.Q.DG.EuYIEpLhtE...K....s....D.L...ALRI..MTV.REHI.A.p....E.l....s..E.F..LP..EMVpTGIQQANMEQRRQHLERITp............................................... 0 18 48 64 +4100 PF00415 RCC1 Regulator of chromosome condensation (RCC1) repeat Finn RD anon Prosite Repeat \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.40 0.72 -3.75 104 14888 2012-10-05 17:30:42 2003-04-07 12:59:11 13 606 566 106 9774 17535 3239 51.30 29 20.18 CHANGED cGplasWG..psptGQLG..............tssppspthPph..lphhps..............lhplu.sGspHohsl ......................GplasW......G............ps....s..........h.....G.Q.....LG.....................................................tss...p..p...s...p...h....t..P..ph.........l..p...s..hps.................................tlh..p..lu...s..G..s...t.Hohsl....................................................... 0 3249 5152 7453 +4101 PF04381 RdgC Putative exonuclease, RdgC Kerrison ND anon COG2960 Family Members of the RdgC family may have exonuclease activity. RdgC is required for efficient pilin variation in Neisseria gonorrhoeae, suggesting that it may be involved in recombination reactions [1]. In Escherichia coli, RdgC is required for growth in recombination-deficient exonuclease-depleted strains. Under these conditions, RdgC may act as an exonuclease to remove collapsed replication forks, in the absence of the normal repair mechanisms [2]. 25.00 25.00 28.60 28.30 24.30 24.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.78 0.70 -5.46 71 1392 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 1234 4 260 909 119 288.20 47 96.62 CHANGED MW..FKNLhlYRhscshshss-pL-cpLsptsFpPCuup-hpphGWlsPhucpsp..LsHssssplLlsh++E-KlLPuuVl+ctlcc+lpplEtcpuR+lt+KE+cpLK--lhppLLPRAFo+pppThuaIDsppshllVDuuSsp+AE-lluhLR+o.lG.SLPls..PlpsppsPsssMTpWLtps.psPssapls-EsEL+us.t-cuuhlRsKpp-Lpu-...EIpsHlpuG.KhVo+LALsWp-+lsFlLs.....--hslKRlKFtDhlpEps--h.sp-Dhst+hDADFsLMosELsphlspLlpshGGppp ...............................................hWFKNLhlYRL..s+-ls......h.....p.u.-p...hEcpLuphsFoPC...G..S.Q.D..h.s+.hGWV...s...P..hG...p..p....u.-h....L....s....H...su....s.s..pll..lsA+..KEEKlLPusVIK..pt....Lc....t.Kl....t.c....l.E....s...-..p...u.R.K.L...+KpEK-sLKD-Vlc...sLLPRAFS+hopThhaIDsssGLIhVDs.ASuK+AEDsLALLRKo..LG....SLPVV.......PLs...h...cs...s....p......hsLT-WlcsG..sss...p....G.....FpL..h-E...AELKuh...hE-.G.ulI.RsK+.QD.Ls..u-...E.ItsHI-.AG.KlVTKLAL...s..W....p......p...RlpFVhs.....-DholKRLKFsD.pL+-QN..-Dl..scED.hApRFDADFhLMouELusLlpsLl-uLGGEt.p......................................... 0 53 130 207 +4102 PF05183 RdRP RNA dependent RNA polymerase Wood V, Bateman A anon Pfam-B_2226 (release 7.7) Family This family of proteins are eukaryotic RNA dependent RNA polymerases. These proteins are involved in post transcriptional gene silencing where they are thought to amplify dsRNA templates. 20.80 20.80 21.00 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 580 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -13.05 0.70 -5.82 94 667 2009-01-15 18:05:59 2003-04-07 12:59:11 7 26 187 3 486 719 5 486.90 23 46.88 CHANGED splhhpssphphss+lhRc.................a.t.pp.....Flclpas-cphpthh.t...........................hpphtphhtpulhl..........s.htapa..hs.stpt...............h+ppssahhsss.........................hshpphhphh.sshpp................sKhhuR.....hu.shosohs........lphpph.............phlsDl........................................................ttpth..........shoDGs...GhlS.slsctl...tppls.............................................................................htthPoshQ..hRh..................sG.sKGllh.l....s.ph.........................hlhlRt.ShhKa.................psthpsl.....................-lhphup..................p.shLN....................cphlhlLpphGl....pc.........hFhph.pptlpphtp..hhts....pthhphhpphtp.s.hth................................................................hhttuh..........tppsalpphlpthhppplpphpp.....+h+I.lsputhh..hGlhD.t.................Gh.....Lc..........sclalt.............................................tsttp.phlpG..pllls......RsPshpPuDlph..lcA.............lth....st............Lpp.h....pslllFss....pGt.................................pshss.huG..uDh.DGDtah...lh.......WD.pl............hs.tshp................................hphpphtcahhp.....hpsshlGhhsstahths..........................shhsspshpLuphhutslDhsK.sGh.hp.....h.p..................ht.pthPcahp.......................pptt.....................sppllsplacphtptttttttp .........................................................................................................................................................................h.s..ht.ssphhRp...a.t.tp......hlclp.h.ptt...h......................................................................hhpphhthhtp.uh.h...........stphap..a..hs.stst.........................................h+ppt.s..ahhs...............................................................hph.t.thhphh..sphpp..........................sKhhuR........hu.shSpoh..........lt.pph..................h.Dl........................................................................................tttth......................haoDGh...GhlS.thsptl..hptht.............................................................ht..PoshQ..hRh.......................sG.hKGhlh..l.....................s.p...................t..........................l.lR...S..Ka....................................ttptttl...................................................................................-lh..p..hup....................shLN...........................pphl..lLpp..hu.l........pp............hh.ph.pp..lpphtt....hh.s.............ttshphhpt....t...t................................................................................................hh..Gh............ppsalpthl.phhht.tl...............tth..pt.......+h+I...ls..p..uhhh...hGshD.h......................sh.....Lp............................splalph..............................................tttt..h.ltG.......llls......+sPshhPGDlph..h.pA.........................................................V.h..........st..................Lpp..h....hsslVFsp....pGt...............................................................+shss..huG..uDlDGD.ah..lh.........WD.pl......................h......shp...........................................................t.pph.pahhp....................h.ps.lGhhsph+..hhs................p..........th.s..shpLuphhu......sV..DhsK..oGh.sp....h.t........................h.Pcaht..................................t...........................................utplls.lh..h.........t..................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 210 321 430 +4103 PF00154 RecA recA; recA bacterial DNA recombination protein Sonnhammer ELL anon Prosite Family RecA is a DNA-dependent ATPase and functions in DNA repair systems. RecA protein catalyses an ATP-dependent DNA strand-exchange reaction that is the central step in the repair of dsDNA breaks by homologous recombination [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.95 0.70 -5.37 28 12149 2012-10-05 12:31:08 2003-04-07 12:59:11 16 16 9034 84 1090 10099 5077 240.80 66 92.66 CHANGED KALssALuQIEKpFGKGolM+LG-cs.t.cl-slSTGSLuLDlALGlGGlP+GRIlEIYGPESSGKTTLsLHsIAEsQKpGGssAFIDAEHALDPtYApKLGVclDsLLlSQPDsGEQALEIsDtLVRSGAVDllVVDSVAALsPKAEIEGEMGDuHVGLQARLMSQALRKLTGsls+SNThlIFINQIR.KIGVM.FG.sPETTTGGsALKFYASVRLDIRRhuslKcus....chlGscT+VKVVKNKVAPPF+pAEFDIhYGEGIS+hGEllDLGVchsllcKSGAWYSYpsc+IGQG+ENA+paL+-sP-lusEIEpplRpphsh ...................................................................................................................................s.lM..+.h..G..p.t..t.......pl.p.s.......l.uTG.SL.......uLDl..A.L.......G...........l.........GGLP+.G....Rl..lEI.Y.....G.P.E.S..S.GK.T....T..L....s....L.p..s...I....A......p............u......Q...........K................p..........G....G........h................s...A...F..I....DA......E........H.....A........L..........D............P........h.........Y......A.........c..........K....L..........G........V.............s............l.........D..........s...................L..........L.........l...S.......Q.........P...........D...........T............G.......E....Q.......A..........L.......E....I........s.........D.......s....L........V.....R.......S.....G..A.........l......D........l....l.VlD.SV.AA..L..s....P..K...A..........E.....I..E....G..E..MG..D...........S.p.s......G...L..Q..A...R..LM..S..QA..LR.....KLTu....slp.pos.s.....h.s....I.FINQl.R.........K....l......G......V...M....F.....G....s...P...E..T..T..s..GGpALKFYuo..lRh...-......lR.......p........t......t........lK...t.tt............t.h.G.ps+h+lhKs+hs.................................................................................................................................................................................................................................................................................................................................................................................... 1 379 738 938 +4104 PF02565 RecO_C RecO; Recombination protein O C terminal Mian N, Bateman A anon COGs Family Recombination protein O (RecO) is involved in DNA repair and Pfam:PF00470 pathway recombination. 20.50 20.50 20.70 20.50 20.40 20.40 hmmbuild -o /dev/null --hand HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.54 0.71 -4.24 157 3973 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 3952 7 846 2634 1633 151.60 18 61.19 CHANGED ht-htthshutalsELlpthl..-pcsp.sp..LaphhhtsLp.tL.....spps......s....hhhhh.FElpLLphhGas.slspCstsGpp.s.shh..h......................ttpsspsttshths.pshtllthhhpt...t...ht...hsspshp...pht......plhptalppplst.thpspp ............................shhthhhuhalsELls+hl....t....c....p.....pst...st.........LFphhhpsLp.tL..............spsp............ss...thhthh.FElplLst.hG.ausshscCstsupssstthh.hh....................................................................................................................t.ppttpsttththsspshhlhphhtth..thpt.......ht........t..h.s.t.p.p..hp.........php.................phhphhh...c.a.lst...L+Sh..................................................................................... 0 279 551 713 +4105 PF02132 RecR RecR protein Mian N, Bateman A anon IPR000093 Family \N 29.10 29.10 29.50 29.40 28.90 29.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.20 0.72 -4.52 192 4246 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 4214 6 904 2429 1218 41.00 38 20.52 CHANGED p.tsppLApAltpstpplphCphCtslo-..p-....hCsICssspR.D ...............ts.cLApALhpApcclpaCshCtslT-..p-........sCpICs-spRD.... 0 309 605 770 +4106 PF03837 RecT RecT family Aravind L anon Aravind L Family The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to RecT [1]. 20.80 20.80 21.00 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.19 0.71 -5.02 81 1551 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1057 0 145 1046 127 180.60 24 65.71 CHANGED sphhptshshlpp..........................pL...tt......ssspshhsslhpssphsLsPht....ppsYllsa.....................hs.tcsphhhuhcGhhplApRssphp..ulputslhctc.hphpht...........ht........ppsphlusaAhhhh.....psstp.h..hhhshpphppttp................p.ttt.........................s.W..tsp.cpMhcKsslpphhp+hhPhshp.............shhstDEh...............t.pppppsss ..........................................s....p.hhthlpp..........................th.....hp.......ssstphhshlh...ssphG..LsPhs....ppsYhl..P.................................ht.sth.phh.luhcG..h.plh....p..cstp..hc..........uhp.hp.s.hc....ht.t.........................................................t..hushshlhh.....pstsc..h...p.hhhc..phcp.hp.......................ptptt......................................usW..ps..p.......c....pMh.++pshhphh.+hhhshu.....................shhstDEt......pp.........t.ts................................................ 0 44 96 119 +4107 PF03838 RecU Recombination protein U TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Domain \N 25.00 25.00 26.10 25.80 23.70 23.10 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.98 0.71 -4.73 43 1337 2012-10-11 20:44:43 2003-04-07 12:59:11 9 3 1214 10 140 682 9 163.20 48 83.68 CHANGED NRGMoLEctINpoNpYYLspslAVIHKKPTPlQIV+VDYPpRStAsIpEAYF+psSTTDYNGVY+G+YlDFEAKET+NKouFPLcNhHpHQlpHhcplhpQsGIsFlllcFsshcchallsuppLhpaWpt..pssG+KSIPhs.I.....pcpuaplphshpPplsYLcsl-pl .......................................pRGMohEc.INpoNpYYLppslAVIHKK.PTPlQIVcVDYPp.....RSpAhIsEA.YF.+psSTTDYsG...VY..p.G.h.Y.IDFEAKET+s.Ks.uFPh.....p.....NhHtHQIcHMcpshp.QpG..I..sFll.l+Fso..hpE.sYlLPsp...clhpaa.....psp.........G+KShsls.I.....cc.pGapI.phsh.t...Ppl.sYLcsl-p.............................................. 0 47 90 115 +4108 PF02631 RecX RecX family Mian N, Bateman A anon COG2137 Family RecX is a putative bacterial regulatory protein [1]. The gene encoding RecX is found downstream of recA, and is thought to interact with the RecA protein. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.60 0.71 -3.91 147 4223 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 3600 6 808 2825 1656 117.80 22 66.09 CHANGED Lpp+....h.sp.ch....lcpllscLpchsal.sDpcaActalcs+ht.pshGstplppcLp.pKGlsppllp.psLp....p..hs.tpp.......................Atplhp++hpph.....t....................hctcpKhhpaLhp+GFshchlpp...slp........ttt- ............................................h..st.p......hppllshhtc.thl.sDtcautthlpsp...t..ps.h...G...ttl.p.pcLt.p.K.G..ls......p....p..h..lc....ps.lp...........c..hc.pc..................................Apcl.hpKch.pph......ht.ss...............................................................hphcpKlhphLhp+Gash-.Ippslp........t.tp................................... 0 262 516 680 +4109 PF02014 Reeler Reeler domain Bateman A anon Bateman A Family \N 21.00 21.00 21.00 21.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.95 0.71 -3.90 40 496 2009-01-15 18:05:59 2003-04-07 12:59:11 11 45 110 8 357 521 42 122.60 24 18.03 CHANGED sCschh...PpH......stsPp....sssaplsss..spsatsGpphpVsl....tt.ssspF.cGFhLpARssss.tt..........................lGpFsls.sss.hpphhsC.....sAVTH...ssspsKpplpl.hWsAPss.s.GslhFpATllpphtlaasc .................................C.th.......tt..........ttp.t...sssap...ltss.........sptahsGpphpVol...................sst.F..cGFhlpAcptsspt...........................lGpFphh.ssp..........h..p.......h..hhsC.........................su..loH...sssp......tK..s..p.l.p..l.hWtAPs................ss....s...G...sV.hFpAT.ll.ppht.haah........................... 0 151 183 286 +4110 PF04221 RelB DUF415; RelB antitoxin Mifsud W anon COG3077 Family RelE and RelB form a toxin-antitoxin system. RelE represses translation, probably through binding ribosomes ([1], [2]). RelB stably binds RelE, presumably deactivating it. 20.90 20.90 21.00 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.44 0.72 -4.23 6 2614 2012-10-02 18:44:02 2003-04-07 12:59:11 7 4 1541 3 352 1624 75 79.40 22 87.15 CHANGED suhlshRID-clKspAssVLcpMGLThSpAl+lhLsplApsculPF-lplPpsNptTlsuIpctctGps.......ppshsscchhscl .............lphRlDpclKppApplhpphGlshosAlplhlpplspp..pu..lP..F....c.l....p.......h......s..................s.......p..ts.h....t.s.h.......p.....c............................................................................. 0 120 212 267 +4111 PF03763 Remorin_C Remorin, C-terminal region Farmer EE, Finn RD anon Pfam-B_1798 (release 7.0) Family Remorins are plant-specific plasma membrane-associated proteins. In tobacco remorin co-purifies with lipid rafts. Most remorins have a variable, proline-rich C-half and a more conserved N-half that is predicted to form coiled coils. Consistent with this, circular dichroism studies have demonstrated that much of the protein is alpha-helical. Remorins exist in plasma membrane preparations as oligomeric structures and form filaments in vitro. The proteins can bind polyanions including the extracellular matrix component oligogalacturonic acid (OGA). In vitro, remorin in plasma membrane preparations is phosphorylated (principally on threonine residues) in the presence of OGA and thus co-purifies with a protein kinases(s). The biological functions of remorins are unknown but roles as components of the membrane/cytoskeleton are possible. 22.10 22.10 23.80 23.60 21.60 21.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.77 0.71 -4.54 28 381 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 26 0 224 354 0 105.30 33 31.49 CHANGED pccp..puhhsAW-EuEcuKhps+hp+cpscIsuWENpcKAKsEApl+KlEpc.LE+KKActtEKhpN+lAtl++cAEE+RAts......EA+Rscchh+scEtAs+h..RsTG+hPsphh ......................t..pthtshhsAW-c.u.EpuKhps+...hc.......+........c.cspIpuWEsppK....AKsEAph+.KhE..........t...c.lE+c+AcshEKhts+lAt.s++pAEEp....RAts......EAc+spchh+st-.tAshh....R.tsGphPtp.................. 0 36 136 183 +4112 PF03766 Remorin_N Remorin, N-terminal region Farmer EE, Finn RD anon Pfam-B_1798 (release 7.0) Family Remorins are plant-specific plasma membrane-associated proteins. In tobacco remorin co-purifies with lipid rafts. Most remorins have a variable, proline-rich C-half and a more conserved N-half that is predicted to form coiled coils. Consistent with this, circular dichroism studies have demonstrated that much of the protein is alpha-helical. Remorins exist in plasma membrane preparations as oligomeric structures and form filaments in vitro. The proteins can bind polyanions including the extracellular matrix component oligogalacturonic acid (OGA). In vitro, remorin in plasma membrane preparations is phosphorylated (principally on threonine residues) in the presence of OGA and thus co-purifies with a protein kinases(s). The biological functions of remorins are unknown but roles as components of the membrane/cytoskeleton are possible. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.84 0.72 -4.19 5 55 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 19 0 34 55 0 59.00 43 30.80 CHANGED ssVscEpApscsssPPPs..cpKsDDSKALsVVEsssEEsAscKsucGSlDRDVlLA+L .......................ssc-hA.pEK...ss.sP...PPs.......cpc....s....DDSKALs.lVE...K.....ss.E.s.....stc....K..ss....p....GSl-RDssLA+l. 1 5 22 28 +4113 PF01244 Peptidase_M19 Renal_dipeptase; Membrane dipeptidase (Peptidase family M19) Finn RD, Bateman A anon Prosite Family \N 20.20 20.20 20.40 20.20 19.80 20.10 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.68 0.70 -5.50 48 2249 2012-10-03 00:45:34 2003-04-07 12:59:11 16 15 1559 27 897 2181 2191 312.60 26 85.48 CHANGED th.tph.llDuHsDhshphhtt.t.................t.t.hchsl.+h+pGtlsuthhula.h...........................thppuhptsLcplshhpphhpppsp.lplspossDlcpshp.cs+luhlhuhEGucsl..sscl...shLctaapLGlRhlsLTa.stsN.hucushpt........pssGLoshGcclVpchNcLGlllDlSHhucpshhDsl.......plSctPllhSHSsu+ulss+sR..NlsD-plcsltcsGGllslshhsta.lpps........................................................spuolc-hlcHl-alsslsG.h-aVGlGoDF.....DGss........................................stulcc...................suph.pLhptLhc.cGas-p-lcplhttNhlRVhcc .........................................................................................h.......lhDsHsDh.h.t.h..................................tt....hphsh.+hp...pGtl.s.u......thhula..h..............................................tht..p.s.h.p.t..s..l.p...t....l.s....h....hpp.hh.....p..p.......s...........s.........lt.............h..s...p...os................s...D.......l......p.......p.....s.........h.........p.....p..........s.....+..........l.u............s.....llulE......uu......csl..........sssl...............shLc.h.h..a.p.h.........G.l..RhhsL.s........a.stsN.h.u...sushtp....................................ttsGL.o.s.h..G+p..llpch....N.clG..h.......hlDlSHhucpshh...-sl.............ph.o...p....t......P....ll.hSHSs.....s...p.ul.............s.........s.........p.s.........R................NlsDc.l....ctl........t....c.....p......GGllt.....lsh.....hstF..lpst...............................................................................................tpssl.pc..h....s.....cHlcalhpl.hG.....h-.p....lGlGoD..F............DG.ht.t...........................................................................................................................................................................................stslps............................s.u.p.h.s....plhtt.Lhc....p....G.........asc.........pclp....cl....h....ttNhlRlhp.t.................................................................................................................................................................................................................... 1 281 511 734 +4114 PF01664 Reo_sigma1 Reovirus viral attachment protein sigma 1 Bashton M, Bateman A anon Pfam-B_1003 (release 4.1) Family This family consists of the reovirus sigma 1 hemagglutinin, cell attachment protein. This glycoprotein is a minor capsid protein and also determines the serotype-specific humoral immune response. Sigma 1 consist of a fibrous tail and a globular head. The head has important roles in the cell attachment function of sigma 1 and determinant of the type-specific humoral immune response [2]. Reovirus is part of the orthoreovirus group of retroviruses with, a dsRNA genome. Also present in this family is bacteriophage SF6 Lysozyme Swiss:P21270. 25.00 25.00 43.90 38.50 19.70 19.00 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.31 0.70 -5.26 3 60 2012-10-01 20:11:45 2003-04-07 12:59:11 11 3 13 30 0 78 0 208.30 62 46.59 CHANGED IsuLPSRlGoLEuS+IDSVlPPLslpSouuTRlLchhYDoSDFsIsNSVLoLRsRSToPTaRYPLELsSAsNpVulucNYRhRpGpWoGQLpYpsPuLsWRAsVTlNLM+VDDWLlLSFopFoTsSIhAuGKFVLNFVTGLSPGWtTGDTEPSoT..lsPLSTTFAAIQFlNGGuRlDAFRILGVuEWsDGELEI+NaGGTYTuHTNVcWAPMTIMYPCss .FDuINSRluslEQSYVASsVsPLRLNSS..TKVLDMLIDSSTLEIN.SSGQLsVRSooPNLRYPIsDlSGu..IGMSPNYRFRQSMWIGlVSYSGSGLsWRVQVNSDIFIVDDYIHICLPAFDGFoIADGGDLSLNFVTGLLPPLLTGDTEPAFHsDlVTYGA.pTlAIGLSu.GGsPQYhSKNLWV.EQWQDGVLRLRVEGGG.ITHSNSKWPAMTlSYPRSF.. 0 0 0 0 +4115 PF04582 Reo_sigmaC Reovirus sigma C capsid protein Mifsud W anon Pfam-B_2922 (release 7.5) Family \N 41.00 41.00 41.00 43.10 40.90 40.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.02 0.70 -5.26 4 160 2012-10-01 20:11:45 2003-04-07 12:59:11 7 1 59 7 0 145 0 239.10 45 97.25 CHANGED MAGLsPpQRREVVuLILSLTSSsThNsGDLTslYERLosLEuustSLcpSluslsoclSDlSusLQshspoLs-spusLsuLposVpALpsSVssLSoslssLoshsSuHsuulSsLQTolcuNossISNLKSsVSupGLsIoDLppRVpuLESuoSpuLpFusPLSlusGVVSL-MDPYFCSpphuLTSYSA-AQLMQFQWhA+GpsGSSsoIDMsVNAHCHGRRTDYMMSoTtuLTVTuNsVoLsFsLDhIT+hPSDLSRLlPssGFQAASFPVDVSFTRDosTHuYQVYGsYSSuRVFpITFsTGGsGTANIRFLTVRTGIDT ........................Ls..QRREVluLILSLTSssshs..GDLs.lh-RLosLEuust.Lppo.......lssh.sploslSupLpshstslspspspLpsLptplpthpssl.s..ssslsshot..pspss.losLpsphss.tstlsNL+s..sV....s....s.uL..slosLppRlpslEsssup.hphtsPLp..lssGslSLphcPhFCo.phsLoSYSspA.LhpFpW.s+.upsGuu.sslsh.lpsHsHGpRTsahhSoptshTVs.usss.Lshsls.lhp.ssD.huhLlPstGFQtAoFPVDlSFpRsssoHsY..Qs..Y..GsaspsthFplsa.ssts.st................................................................ 0 0 0 0 +4116 PF00979 Reovirus_cap Reovirus outer capsid protein, Sigma 3 Finn RD, Bateman A anon Pfam-B_1049 (release 3.0) Family Sigma 3 is the major outer capsid protein of reovirus [1]. Sigma 3 is encoded by genome segment 4. Sigma 3 binds to double stranded RNA and associates with polypeptide u1 and its cleavage product u1C to form the outer shell of the virion. The Sigma 3 protein possesses a zinc-finger motif and an RNA-binding domain in the N and C termini respectively. This protein is also thought to play a role in pathogenesis. 18.40 18.40 18.50 22.40 17.90 18.30 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.30 0.70 -5.51 5 149 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 31 5 0 145 0 250.80 49 98.83 CHANGED MEVRVPNFHSFVEGITSSYl+sPACWNA+TuWDospFapPDVI+VGNAYCCoQCCGVLYYGuhPsDGpaFPHHKCHQQppRsDoPLLRaVRIGRTTEHLLD...QYAVtLpoIADHY--tupcpscEPtuDpVAuLDIlsRTESlRoDpAVDsDFWTsPLERRsD.DSRRDIA.oAhW+MIDASupShTLPDCLVSsuLHoRslFcQM..tTTToIYDVAsSGKsu+FSPMVAshPpR-uGPlpLsccssc-sVsosWp..sHF.......AlSPIIGGVG.IoGQatRsShHpVGHPlIGSGKKlSHYRNLFM-lsRGWSKSoFsCAsGLEPAE.sESRLRGHARTMLGRo....LPcVCDaussspossssoPLp..+osKlThlECG .........phl-.h.sua.th...asstphWt.....h.P-hhhhGsAhsC.pChGVl..G....t...hPHH+CpQ...p.s.s.hphsc..RhTthhhc...tash.hpthhp.hp..t........tt.l.......................ps.hp.....s.............p...Wp.sl...p.....sst...h..tth.hh.ps........hlsphhh....as.thht.......hoIYDVAsSGK.ul+FS..PMVushu...pR-uGPlhLssssst-sVhohhp..uHF.......AhSPllGGVu.loGpatRtShH..hhshltsstKhp..Rph..t...sat..ththhhs............................................................h................ 0 0 0 0 +4117 PF01446 Rep_1 Rep; Replication protein Bateman A anon Prodom_1565 (release 99.1) Family Replication proteins (rep) are involved in plasmid replication. The Rep protein binds to the plasmid DNA and nicks it at the double strand origin (dso) of replication. The 3'-hydroxyl end created is extended by the host DNA replicase, and the 5' end is displaced during synthesis. At the end of one replication round, Rep introduces a second single stranded break at the dso and ligates the ssDNA extremities generating one double-stranded plasmid and one circular ssDNA form. Complementary strand synthesis of the circular ssDNA is usually initiated at the single-stranded origin by the host RNA polymerase [1]. 26.00 26.00 26.20 26.10 25.70 25.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.67 0.70 -5.02 25 775 2012-10-02 18:54:06 2003-04-07 12:59:11 12 3 413 0 22 552 35 156.80 31 69.80 CHANGED hKhuhpsppIlscuhpcpPpuRaLFLTLTV+Nsp..G-cLcpolspMscuFpRLhpaKK.........hppsllGalRusEVThNc.pcs.....oYHPHhHVLLhVcsoYFp......pNYlsQs-WscLWc+uh........KlDYcPlVcl+sVKspp.........pp..psl..p..................sAlhEsuKYsVKssDhhsssp......ts.pslh-LppuLtppR.IuaGGLLKpI+....+cLpL-DsE..puDLlpss--ccp.spsp......plhhhWphpppsYhl .........................................th.....t...t...h..lFLTL.oh..Ns......hpp...Lpttlpthtpu...ap+h...phpp....................h.ts.....hhG...ah...+thE.l.Thsp......ps................................ap.HhHhllhlp...shht.......t.altp...tpW.phWp.p.sh......................ths.....s.......hplp.hp..........................................................................h.p......h...............................................lsauGlhKph+.......K.Lp....s-h........shlp..p.pp............phh....................................................................................................................... 0 8 12 22 +4118 PF04057 Rep-A_N Rep-A_protein_1; Replication factor-A protein 1, N-terminal domain Wood V, Finn RD anon Pfam-B_6000 (release 7.3); Domain \N 21.70 21.70 22.00 21.90 21.40 21.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.19 0.72 -4.22 11 381 2012-10-03 20:18:03 2003-04-07 12:59:11 7 20 239 3 250 351 3 97.50 29 16.47 CHANGED LTsGAIutlhs....u-ssh....cPVLQVl-lK.Iss.......sspRYRhlhSDGpst.hpu.MLuoQLNshVcsGplppsullpLpcalsNslpt...sR+llllhpLEVlsp .......................................................lo.GAlttlhp.......spss......pPllQllsl+..lss.............sssspRaRllhSDGhph.hpu.MLATQL.NphlcsspLpp.s.sllplpc...ahssslp........s++.l.lI.lhclEVlt............................. 0 86 146 208 +4119 PF01719 Rep_2 Plasmid replication protein Bashton M, Bateman A anon Pfam-B_1901 (release 4.1) Family This family consists of various bacterial plasmid replication (Rep) proteins. These proteins are essential for replication of plasmids, the Rep proteins are topoisomerases that nick the positive stand at the plus origin of replication and also at the single-strand conversion sequence [2]. 25.00 25.00 25.10 25.10 24.80 24.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.46 0.71 -4.80 24 278 2009-09-17 10:57:14 2003-04-07 12:59:11 12 4 204 9 32 240 1 182.60 27 74.28 CHANGED hsKcpt.............RpWsFllYP..ESh.....PcsWpphL..-phtl.hhhSPLHDKDls.............psGchKKsHaHlllhacsspohppVpplhcph........susthspl..tslcshYcYhsHts....s.cKapYshs-IhshsGF.....DIcpalshssp.c+.p..llppllchIc-pslh...php-Lhpashppt.c..hhslhtppsh..........hFhspY.lcupptp .................................................................................pt..........phasFlhY....ESh.....Ppsahp...hL......-pht.lsh.hhSPhHDKDls..............psschKKs...HhHshhhacshpohsplppllpph..............sssthlphh....hS.....pshapYhsHtp....ss-Kp....YshpDItshsGF...........-l-calh.ssp.p..p.p.....hlppll-hIccpshh...php-Lhtahhtpt.p...hhshhh.ps...........hh.ttY.cpth..t........................................................................................ 0 2 6 23 +4120 PF01051 Rep_3 RepB_protein; Initiator Replication protein Finn RD, Bateman A anon Pfam-B_313 (release 3.0) Family This protein is an initiator of plasmid replication. RepB possesses nicking-closing (topoisomerase I) like activity. It is also able to perform a strand transfer reaction on ssDNA that contains its target. This family also includes RepA which is an E.coli protein involved in plasmid replication. The RepA protein binds to DNA repeats that flank the repA gene [3,4]. 22.40 22.40 22.90 22.40 22.20 21.90 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -4.75 166 2007 2012-10-04 14:01:12 2003-04-07 12:59:11 16 8 1146 6 232 1665 74 220.10 17 69.43 CHANGED ppthlhpsNsll.psshp........hohtEh+lhhhhluplcsp..ttpt.....................h.h...phtcht.th....shspps....hpplppshppLh.cch.hph.......................tppht.hhhhhtahp...................tpuplpl..phspclhshLhpL...pp..p..F.TpapLpphtp.l.cSpYohpLYclL.....ppa....................cs....hsp..hp..........hsl.......-...........-h+p........hhsl.........................sph.phschcp+VL.csulcE...lsp........pssh.tlphcph.+...pG.......Rp...lsslpFph .....................................................................................t...hh..pNph.h.ps..t.......hshhphplhh...hhltphppp......pptt..................................hph.....shpchh.th...............thpsps........hpplcpshppL...pp..hph...............................pt.......hh.ahs.h...............................tpshlpl.....phs.sp........lt..hlhph........pp...t...a.sphplpphsp.L.cup..auhpLa.clh..........hpa.........................ps....ttt........hh...........lpl...........p...............................-h+p........hlp.l...........................................tsth...phs..phcp+ll..csslp-.....lsp........................hsph..tlshpph..+.........cG....Rp....l.th.F........................................................................................................ 0 58 131 189 +4121 PF02486 Rep_trans Replication initiation factor Mian N, Bateman A anon Pfam-B_2164 (release 5.4) & COG2946 Family Plasmid replication is initiated by the replication initiation factor (REP). This family represents a probable topoisomerase that makes a sequence-specific single-stranded nick in the plasmid DNA at the origin of replication. Human proteins also belong to this family, including myelin transcription factor 2 (Swiss:O15150) and cerebrin-50 (Swiss:Q16301) [2]. 29.60 29.60 29.60 29.70 29.40 29.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.19 0.71 -4.80 84 1463 2009-09-11 00:34:10 2003-04-07 12:59:11 14 8 833 0 83 967 19 187.80 25 54.34 CHANGED hhssploRlDlAhD.hpt..........hshschhpph.ppsthhpph.........p...............hpphushp.............hGpThYlGu+.pSp...hhhRlY-KshE...............t..p..WhRhElcL+spct..hshchlhs.....tp.hsshhhthhphh.....................................t.tthpl.....ssphp..ths.......................h-pt....hpalpc....psutslphltph.t......................tphpshltpllpp ...........................................................h..sthsRlDLAlD-hss.........hhshspltcch.pptchhsth................hp...................hpphtssp.......................pt.thGpT.hYlGSp..pSp......hahplY-KshEph.......................t..hcp..sph..hsRhElcL+pccs....h....hshc.Lhs..............hp...hpthhhthhsthhphh................................hhtthp.lcl.........ssp.c....ho...........................hccs.....hpWlp.+....Qs.u.slthh..ph.t............tpphh..h...p........................................................................................................... 0 24 46 64 +4122 PF04796 RepA_C Plasmid encoded RepA protein Waterfield DI. Finn RD anon Pfam-B_6223 (release 7.5) Family Family of plasmid encoded proteins involved in plasmid replication. The role of RepA in the replication process is not clearly understood [1]. 21.40 21.40 21.50 21.60 18.90 21.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.90 0.71 -4.63 10 189 2012-10-04 14:01:12 2003-04-07 12:59:11 7 2 146 0 63 178 18 157.60 32 48.85 CHANGED lPYGshPRLlhsaLsT......cAl+o+o.............slpLGpShocFlccLGhps..oGGtpGslsslR-QlsRLuussh..........sluhcsuscu..sspshslhccthhhW.spcss.....QpuLas..SpVpLopsFFcpLtc+PVPlDhsAl+tLppSPhuLDlYsWLoYRlphl..s.+s....ssloWcp .................................................................lPYGshsRLlLhalsT......pAl...+s+sR............clpL.G...pShspFh.c.t.hGlss..pGGc..shptlRcQhpRLhuspl...............................................phthp..s....sts.........thh.shthscph......h..h....h.hps.p..pss...............Qt.uh...at....sh.....lpLS-sFappL.hc+sVPlDhcAl+tLp...t..SshAlDlYsWLsYRlatl....p..+s....shlsWp.s.............. 0 21 45 54 +4123 PF01421 Reprolysin Reprolysin (M12B) family zinc metalloprotease Bateman A anon Swissprot Domain The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. Members of this family are also known as adamalysins. Most members of this family are snake venom endopeptidases, but there are also some mammalian proteins such as Swiss:P78325, and fertilin Swiss:Q28472. Fertilin and closely related proteins appear to not have some active site residues and may not be active enzymes. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.29 0.71 -4.73 38 3201 2012-10-03 04:41:15 2003-04-07 12:59:11 14 123 179 77 1444 3423 29 190.20 28 23.35 CHANGED +YlELhlVsD+thap+hsushstl+p+lapllNhlNphYps.LNlpVsLsuLEIWospDpIslpssussTLppFspWRcp.Llp++pHDsApLLouhsass.slGhAahuuhCsscpSsGlspcap..pshhlAlhMAHELGHNLGhpHDst.....tCpC....ssssCIMss.huppsuh.hFSsCShppappFlpptpspClhNpP ..................................................................................................................................................+alEhhlVsD.pt...h...h......p....h........h.....s.....t.....s.....h......p.....t..hp.p..hlhpl.....h..N..h..........l......s...t..h...........a........pp........l......s.........l....p.......l....s...L.......l...s...l......t.....l..............a.........s.............p...........p............s...........t........l.......p......l...........s..................t.........s...........s.................s.......p..........o...L..p...sF.....s......p.....W.....p...p.............p.............h....h......t........p...........p...............p...........H...............D.....s.........A........h..L.l..T.........t.........p.............s..........h..........s.........s.........t..........s.............l........G............h.....A................l.........u.........uh.............C......s....s...t.........p..........S............s.......u.............l.s........p................-..............p...........................s.........h.........sh.A..............h......oh..........AHE..l.G...............H............s........lGMp.HDss.............pC......p.........C.................sttss......lM......us....h.......s......t.......t...............s.h.....taSsC..Stpp.h.pp..a......L....p...p.....t..t....s...p.CLhs.P................................................................ 0 219 301 684 +4124 PF05140 ResB ResB-like family Finn RD anon Pfam-B_1866 (release 7.7) Family This family includes both ResB and cytochrome c biogenesis proteins [1,2]. Mutations in ResB indicate that they are essential for growth [1]. ResB is predicted to be a transmembrane protein [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.68 0.70 -5.94 127 1951 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 1367 0 559 1727 720 307.30 18 58.66 CHANGED h+hAlhLLlllAluSlhGTllP.Qp................ss.Yhppa........G..hhuplh.pLsL.clYsShWFhsllhlLslSLlsCslpRthshh+sh+ph..htptp..Lp+hshttphph...........................s.psshppltthLcpp.tac..lptpptp.................lhAcKGhhs+hGsllsHhullllllGuhhuuhh....................................uac........uphhls..pG...........psssh......sG..........saslclccFpl-ahs.................sG.tscpFtSclslhcss...........pp..hpptplpVN.cPLpacGhslYQssau.sshphhthsssh....tt....h.phshphhs.sst...............................plplhuhhhs...........................................................pp.psssPsltlplhs.ptth......................................................................................................................sspspthhthphsphpthphssh.........................................................................hlph...........stshp......hoGLplp+DPGhslValGshlhllGlhhuhalpcRRlWlhhps...........................tsplhluGhss+sphu.atcEhtc ............................................................................................................................................................................................................................................................................................................................................htc.h..s.hh.Hhuhlhhhhuhhht..........................................................................................................................h..p..t..h..l...ts.......................pt....h................................hsaplplpcF..hphhs.....................................................................................................pt...sppatopltlhptt.........................tp...hphplthN.cPLph.t.Ghp.h..Y....ua.s..s..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................................................................................................................ 0 197 410 519 +4125 PF04851 ResIII Type III restriction enzyme, res subunit Finn RD, Mifsud W anon Pfam-B_4631 (release 7.6) Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.52 0.71 -4.46 55 18536 2012-10-05 12:31:08 2003-04-07 12:59:11 10 220 5248 26 4564 43274 12710 160.50 19 20.83 CHANGED phphRsaQtpAlcplhpthppt...............................ppcsllhhsTGoGKThhuh.........t...hhtclhp..................+sLFls...sppsLhcQsh.cpa........................................pttt.th..t...pstphhhsslQpLpt.................................ph..stas..................lllhDEsH+ususs...................applhp.................hpsthh......lGhTATP..pc ............................................................................................................................................................................................h....p..Q.pAl..p...p....lh.p.t...hppt......................................................................................................tp.pt.l..l....h..t..sT...G...oGK....T..h..s..h.h...................................................................................p......hh..t.p.ltc.................................................psl..l....l...s.......s...p....p....s....L....s......s...Q...hh...pch.............................................................................................................................................................t.........h...............t.....t....t......t.......h..................................................................t...........t..........t...............t.....h.......h.......h..............h..t......h.................................................................................................................................................................................................................................hh.l...h.DEs.......H....t.........................................................................................h.t............................................................h....h.o..uT.................................................................................................................................................................................................................. 1 1830 3138 3952 +4126 PF00239 Resolvase recombinase; resolvase; Resolvase, N terminal domain Finn RD, Griffiths-Jones SR, Bateman A anon Prosite & Pfam-B_3830 (Release 7.5) Domain The N-terminal domain of the resolvase family (this family) contains the active site and the dimer interface. The extended arm at the C-terminus of this domain connects to the C-terminal helix-turn-helix domain of resolvase - see Pfam:PF02796. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.83 0.71 -4.32 222 13550 2009-01-15 18:05:59 2003-04-07 12:59:11 16 84 3644 51 2369 10593 1582 133.80 24 45.62 CHANGED hhuYsRV..STpc....Qs.....hppQ.hptLcphu.sp.........p.l.....at-c.hSGpp....t.p..R..stlpchlp.tlcp..s...D..s......llVtclDRLuRs.ht-hlpl.lcpl.pppGlplhsls..p......hcs............sssh.uch...hhslhush....Ach..E+phlp-RsppG.lttA+t...pGch ......................................................................hhYsRl...Sotc............Qs..........hpt..Q..h.....p..t..l.p.p....hs.pp................t..l.............apD.p.....hSGtp.................h.p.......R........sthpc.h.lp...tlcp.........s.....D....h..............................llVtclD...RL.uRs..ht-h.hph..l.p..tl....p...p......p..s..lp...l.hsls..p..s............hcs......................................sssh...sch..........hhp.l..h.u.s.h..................Ach......Epc.........h..htcR.hpt...Ghtt.ttt.pG............................................................................... 1 844 1618 2027 +4127 PF00072 Response_reg response_reg; Response regulator receiver domain Sonnhammer ELL, Griffiths-Jones SR, Finn R, Fenech M anon Prodom Domain This domain receives the signal from the sensor partner in bacterial two-component systems. It is usually found N-terminal to a DNA binding effector domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.07 0.71 -4.06 57 151337 2012-10-01 22:20:39 2003-04-07 12:59:11 19 3468 5531 462 44329 116945 19054 111.60 26 30.95 CHANGED lLlV.DDcshhpphlpphlp.....p.tuh...pVs.tsssuppulphl....pppp.....hDlllhD..........lphPs..h...sGlclhppl.+pps...s...ssllhlT.upss..thshpul.psG.ApsalsKPh.shscLhptlp ........................................................................................lLlV...-.D..c.........h....h....t....p....h....l....p....p....h.Lp.............p...tu....a........................p........l.......t.....t.....u.........s......s..G.....p....p..A.l..p...h.h...................pp..pp...........................................D.....l......l..l..h..D................................................l.t.....h.....P.....s.....h................s..G.....h.......c.....l......h...c.....p....l....+ppt.....................p.........................h..P..l..l............h....l........T.....u........p........s........p...........t............s.........t..........h.....p.......u...........l.....c...........h......G......A.....s........-....Y.....l...s...K.P...h...s...pc.Lhttl.t.................................................................... 2 15208 29453 37848 +4128 PF02813 Retro_M Retroviral M domain Bateman A anon Bateman A Family Retroviruses contain a small protein, MA (matrix), which forms a protein lining immediately beneath the phospholipid membrane of the mature virus particle. MA is located in the N-terminal region of the Gag precursor polyprotein. The N-terminal segment of MA proteins directs the Gag protein to the plasma membrane where budding takes place, and has been called the M domain. This domain forms an alpha helical bundle structure. 25.00 25.00 25.40 42.10 24.10 19.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.96 0.72 -3.82 3 206 2012-10-01 20:35:47 2003-04-07 12:59:11 9 15 46 1 0 218 0 81.30 90 16.63 CHANGED DsVIKVLsohCKDYCGKTSPS+KEIuosLSLLpcEGtLcSPSDIausupWDhlTAALoQRAMsuuKAGELKTWGLlLGALKAAREE ...............ACKTYCG.....KTSPSKKEIGAMLSLLQKEGLLhSPSDLY.....SPGSWDPITAALSQRAMVLGKSGELKTWGLVLGALKAAREE. 0 0 0 0 +4129 PF00424 REV REV protein (anti-repression trans-activator protein) Finn RD anon Pfam-B_169 (release 1.0) Family \N 20.80 20.60 20.80 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.33 0.72 -4.09 13 15943 2009-09-12 07:43:04 2003-04-07 12:59:11 13 3 89 13 1 6741 0 83.00 60 88.13 CHANGED Mst+st...-EtL+RtLRLI+lLaQoN.PYPp.ssGTApQRRpRRRRWRpR.cQIhALA-RIhsh..-.PhspPlDptl.cLQcLsIQpLPDP.....................Psss ......................................................................................................P..Po....sEG.T.RQA.RRNRRRRWRpRQR.QIcu.ISthILSohL......GR.P.sEPVPLQLPPLERLoLDCsEDC.GT.oGTpsssssphh............ 0 1 1 1 +4130 PF00472 RF-1 RF-1 domain Bateman A, Finn RD anon Prosite Family This domain is found in peptide chain release factors such as RF-1 (Swiss:P07011) and RF-2 (Swiss:P07012), and a number of smaller proteins of unknown function such as Swiss:P40711. This domain contains the peptidyl-tRNA hydrolase activity. The domain contains a highly conserved motif GGQ, where the glutamine is thought to coordinate the water that mediates the hydrolysis. 21.30 21.30 21.50 21.40 20.80 21.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.28 100 11930 2012-10-03 10:08:23 2003-04-07 12:59:11 15 22 4796 19 3127 8043 5160 115.70 40 37.79 CHANGED .hstsh.clplsts-lclcsh.RuoGsGGQpVNKssSA....................V+l.pHh.......Pou..................lllpsQppRSQhpNRcpAhchLpu+Lhph...t........hpcpcppppsp+tuphcph-tupc..hRsYsh..psplKDtR ........................................cphclcIs.s.sDl+lD.sa.R.u.SGAGG.........Q+V.N+..T-SA...........................................VRl..THl.........PTG..............................................IVVps...Q...s-RS..Qpp......N+cpAhphLp..A+L.hph.p......................hpcp...p.t....pt..s.s.tR.p...s...p...h..t...t..h...s....tu..pp..IRo..Ysh...PpsplpDhR.............................................................. 0 1030 1966 2619 +4131 PF04506 Rft-1 Rft protein Wood V, Bateman A anon Wood V Family \N 20.80 20.80 23.20 22.70 18.40 18.10 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.70 0.70 -6.14 4 365 2012-10-02 21:24:20 2003-04-07 12:59:11 8 9 278 0 263 371 4 394.10 23 88.38 CHANGED pSShpGhsaslhhQlhsRIlTFhlNthllRhlusclhGlssl+LplLpSTlLFLSREulRhAtlphsups...h.pss.tch.pphaLSs.lpsllss..sYI.h.aluhs.ulhhSha.ahslss....lh.shucsslFlhhl.phlcL..L.psaahlsQhhlhhstpstscuhuhhhsulhphtlsshs...p.............cuhuhL.Fshsslu.plp.huhhca.thps......FSshLsKltp.......p.......haasp-hlphhhohhhQslLKpLLT-G-Khlhsh..lhShp-QusYsllsNhGSllsRhlFpPIE-ssphaFuphlppcsphsp......+pulslLsplL+hhuhluhlhhsFG.sYSshVL.hhuGscaussus.tslLphYshYI.hhAlNGIhEuFhhusssucQIhcauhhhhshSlhaLlhualLhsh..hustGhIlANIINMslRIlYshhaIp+.a+-hshs.ohshshh.sphhlhshhhusllsaWh.ups.hLsahlsslhhuhshLhhhhlsc+phhphhhhhhsphthK ................................................t..........hhhQlh.+hhoFhhNthllR..hls.t...hhGl..s...pl..lh..ohlFhuREuhRhuh.p.......t...................................t.............t............................................................thh..hs.h.......hh...h..................................h...................................h.h....shh..lh.h...s..hhlcL..hsEs..h..a....h.l.p...h.hth.+..hhc.....s.h.....uh..hhh.sh.h.p.hh..hh......h.h...................................................hh....h.h...h..h.......h.hh....h..h.......h..................................................h........h......h..........................................................hp....t.h...ph.hhp.hhhQuhhKplLT-.G-p..hlh.s...h...h..............ohtpQG.hYslssNh........GSLlsRhlFtP..lEEo...hhhFup.............h.l...................t.........p...........t.........p........t...................p................................................................................t......sh.....p..........h.L....t.......ll+hhhhluh.hhhshG..shu.hhLpl.h.....h...G...t..t..hs...sst.......s.......................hLphY.......s.h...Yl...................hL........AhNGlhEs.Fh.ushs.t.pp...............ltp..shhhhhhShhahh...hs...h..h.hh......t..............hG...s....G.............hlhANhhNMhh.RIhas....hhal....tp.......h.h.......t.....t............................................................................................................................................................................................................................hh.hh............................................................................ 0 96 153 222 +4132 PF04589 RFX1_trans_act RFX1 transcription activation region Kerrison ND anon DOMO:DM04454; Family The RFX family is a family of winged-helix DNA binding proteins. RFX1 is a regulatory factor essential for expression of MHC class II genes. This region is to found N terminal to the RFX DNA binding region (Pfam:PF02257) in some mammalian RFX proteins, and is thought to activate transcription when associated with DNA. Deletion analysis has identified the region 233-351 in human RFX1 (Swiss:P22670) as being required for maximal activation [1]. 25.00 25.00 28.80 28.80 18.60 23.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.25 0.71 -4.49 8 197 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 39 0 75 132 0 135.80 50 21.09 CHANGED MQsSEuGSDosuoV.sLpTSsuuQu....sVPuoQpRslVQshsps.Kst.sQQl......sV.hsQQVsQQVQQ..VQHVYsuQVQYV.EGu-uVYTNGsIR.oAYoY.sEoQlYuQoSGuuYFDoQ...GuuAQ.....VTTVVSS.......HoMV.......GIsMsVuG.SQIISSo.uuYLItGG .......................................psucsuusssuoV..tLpspsstQ......Vs.sp.................sQ.l......sV.......p...s.....Qp.VQQ...VQHVYPuQVQYV.EG.uD..sVYTNGAI.R.osYsY.sEoQhYoQs..o...uusYF-sQ......GouAQ.......VTTssSS.......HuMV.....soGGIsMsVsG.uQllSSo..GsYLIts.s............................................................ 0 3 8 28 +4133 PF02257 RFX_DNA_binding RFX DNA-binding domain Bateman A, Mian N anon Pfam-B_3682 (release 5.2) Domain RFX is a regulatory factor which binds to the X box of MHC class II genes and is essential for their expression. The DNA-binding domain of RFX is the central domain of the protein and binds ssDNA as either a monomer or homodimer [1]. It recognize X-boxes (DNA of the sequence 5'-GTNRCC(0-3N)RGYAAC-3', where N is any nucleotide, R is a purine and Y is a pyrimidine) using a highly conserved 76-residue DNA-binding domain (DBD) [2]. 27.00 27.00 27.20 27.30 26.40 26.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -9.78 0.72 -3.84 34 732 2012-10-04 14:01:12 2003-04-07 12:59:11 10 11 217 2 423 656 0 83.20 43 10.10 CHANGED c+p+phaA.hhWLhsshE.tsp.ssslPRs.......................plYscYhptCsppp.lcPLssAoFGKLlRslFPsl+TRRLGs..........................RGpSKY....HYs.....Gl+l+sss ...................p......hs..hpWLhsshE..pspssslPRs.......................plYscYhpaC..............p.............p..............pp.h.cP.lsuAoFG.......KlIRplFPsl+TRRLG.o..........................R..GpSK...Y..........H..Yh.....Gl+lK...s................................ 0 100 152 268 +4134 PF03214 RGP Reversibly glycosylated polypeptide Mifsud W anon Pfam-B_2662 (release 6.5) Family \N 23.80 23.80 29.90 29.80 23.70 22.40 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.19 0.70 -5.70 4 169 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 63 0 94 181 3 299.80 50 91.75 CHANGED MSh..IpcsEVDIVIuALpsNLTsFhppWRPFFStFHLIlVKDP-hp.cclplPEGFss-lYs+oDhE+VlGup.sSIpFSGauCRYFGaLVSKKKYlloIDDDClPAKDsuGhsVDAVsQHlhNLcoPATPFFFNTLYDPaRcGADFVRGYPFSLRpGVsCAhSCGLWLNlADhDAPTQslKsppRNTpYVDAVMTVPttAMhPlSGINlAFNRELlGPAhhPALhLtGEGKhRW-TlEDlWsGhCsKllCDHLuhGVKTGLPYVWRs..EtGsAl-SL+KEWEGlKlMEcllPFFpSlKLspTusssEDCVIELActVKEpLGp.DPhFsKAADAMhcWlcLW+Slss.SA ....................................................................................................................................................s........cs-lDIVIsslp.sL.sFhp.WRPahp.aHLIlVpDsD.t.c.lplPpGFDhclYs+sD....lp+..lLGsp..ss.IsFpspuCRsFG..ah...VS...+K...KYlh.oID.DDChsA..K..D.ss..GpplsAl..pQ..H..l..pNLhsPSTPa....FFN..TLYD.PaRc.....GA.D.FVRGYPFS.L.........R..........E..........G............l........s........T..........A.........lSpGLWLNlPDYDAPTpL.V......K.........Ph.....cR..NoR...........Y..VD...A..V....hTlP+GshhPhCGMNLAFcR-LIGPAMaF..GL..hscGp.....slGRaD...DhWuGaCsKVlC.DHLGhGVKTGLPYlaHS.........KA.u.s.s.FssL+K.EacGlhhpE.-llPFFQs.....st.L.s.....cpss..oVp.pCYhE.Luc.V+p+...L..ut..l.....Ds..hFt.K.hA-AMlsWlcuWcplss...st...................................................... 0 14 66 82 +4135 PF05045 RgpF Rhamnan synthesis protein F Moxon SJ anon Pfam-B_5448 (release 7.7) Family This family consists of a group of proteins which are related to the Streptococcus rhamnose-glucose polysaccharide assembly protein (RgpF). Rhamnan backbones are found in several O polysaccharides of phytopathogenic bacteria and are regarded as pathogenic factors [1]. 25.00 25.00 26.50 26.00 24.80 24.40 hmmbuild -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.78 0.70 -5.97 7 464 2009-01-15 18:05:59 2003-04-07 12:59:11 7 50 316 0 120 454 62 322.40 20 50.84 CHANGED llFlSNuplocpspppLps..hsDchl.RENpGFDhhAa+-Gl-hlGFDcLspYDplhlhNcThaGPla.huphFpchEp+.ssDFWGIosH+thp.s....tsthhscHlpSaFIuh+psllpSpsFcsaWcshsphpsht-sl.haEophTshFhshGaphpshlDoc+hsusa..hlcsD.ohhs.ssILK+Rl.Fhcsphl-spth...hPhhLchlccsosYslsLIhcpl.chu.P.sLs.....h.lLsschlps.ttss.sptKlAVphHlYYsDhL-EhLshhpNhshsYDLhlTTsos-K+tEIcph.L.ptsG.pss.VhVs.tN+GRDhsshh.sL+-hL..c+YDhVs+hHTKKS.pssh.tGp.a+cchl-.Llcs...scNlLshFcpNsplGlslssl.sahpasplspAW.tN...tsphtclh+phslph+hD....sT.VhuYGThaWF+.cAL+pLF-hchp.cDhssEP.hspsolhHAlERLlsYhAhssuY ................................................................................................................................................................................s.....h............t.l...........h.t...hh.RpN.GaDhhuappu.h....h.....t..h..ph.splhlhNco..hG.Ph.........ht...hh.tp..ht..t.p....thDhaGho.......t....................................h..HlQSaahsh....pt.hhts..F.paWpth...h.p.s..htpsl..aE..hophh.p..Ghphtsh..h..........................................................h.................hh......t...............................................................................................................................hhhhHhh...hh.th.t.......h.........hphhho...h.............h.t...h.................h......ph.psh..hh..........h....t..hthhhhhps...+p..........................................t.t.h.t....hht.hh...........htt...h..t.http..hu.hhhs.......................................................................h...h...th.........h............................G...shhah+......thht..h.t......t..............h....................t.....hE........................................................................................................................................ 0 26 73 97 +4136 PF00615 RGS Regulator of G protein signaling domain Ponting C, Schultz J, Bork P anon SMART Domain RGS family members are GTPase-activating proteins for heterotrimeric G-protein alpha-subunits. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.78 0.71 -4.00 74 4150 2012-10-03 22:10:09 2003-04-07 12:59:11 14 139 303 83 2509 3808 11 120.60 21 22.88 CHANGED ohcpllssps.GhphFppFLcsphsc..-slpFWhusc-a+pt..........................sppchhpp..AppIapcalt.sus.ppl.sl-ppstpthppslpp............................ss.shFc.tuQpplhp.hhcp-sas+FLcS..shYpch .........................................................................hppllps....uhthF.p....p...F..L.c.p.ch.st...........-sl.............p.....Fa...hss.c.ca+ph.............................................................................................................................s.p.p..p.htpp......uppI..a..ppa.l....t.........s..p....us...p.........p....l...s...l.s...p......p.....s..p..p..p...l..p..p..p.lpp.......................................................................................................ss.ph.Fc..pup..p..p.lhp.....hh..c..p..c..s..a..scFlpS.tha.p........................................................................................................ 1 835 1141 1805 +4137 PF00974 Rhabdo_glycop Rhabd_glycop; Rhabdovirus spike glycoprotein Finn RD, Bateman A anon Pfam-B_167 (release 3.0) Family Frequently abbreviated to G protein. The glycoprotein spike is made up of a trimer of G proteins. Channel formed by glycoprotein spike is thought to function in a similar manner to Influenza virus M2 protein channel, thus allowing a signal to pass across the viral membrane to signal for viral uncoating. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 501 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.53 0.70 -6.27 8 3017 2009-09-17 10:48:14 2003-04-07 12:59:11 13 3 114 5 2 2015 0 333.50 41 96.20 CHANGED hlslllllthsss.hsp..PlhssssspssWs.Pl.sashsCPssthspscuupspsPhpac-.ssGhlosspV.GalCpu....slapsssah+aVGY+shT....Fut+slcPTls-C+tAhpchssGsspsppsLaFPsPsCtWhosVsss-tshhhlTP+oVplDsYstcalDscF.uG+Copps.....CpTpassolWhs-ss.hpstCsp.hpplcGhhasspsSppsshus....cpGa+Pah.LcsuCplsFCGK.GIRhspG-Wlulsss....sssphhshPpClss.....lsshRssts-ashls-Ll+...cRpECL-sascIhsopsloshcLS+hRshtPGhGcsYslhNGoLhcupspYlpV-h...-lh.s+ssh+stspsshhhWsph..FGshhtG.cGlltss..phphP.lppasulh-suhh.hhpst.lPHPshpsspsspD.......hhsschts.tpsl-slDhthssWuhalhluuhsllsllLllllhpCCshssp..pPptppsIshpphsps..p ............................................................................................................Wp.sl.ht..tCsp.s......s.......cs..s.p....hsshpa.c..p.uhlss.ph.tahshu....shhtsts.hphhshhshs....athpphc.h..ts+.uhp.thstssphcts.h.P.PshtWhpsshppc.ohhhloPpssplDsYs+phhsp.F.sGcCps..s.......CpTpashslWhs-ps....tspCDh.......hp..s.....cG......hh.ss....c..ts..c..p.sshss................ptGha....LctACplphCGh.Gl+h.pGshlthps..........ssth..hhs....Pcphss..................hhsh+.ssp.-a.....hl..p.-.L.l+.......pRp...c...CL-shppIhsstslo.h.LShht....G.s...sahh.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 2 +4138 PF03342 Rhabdo_M1 Rhabdovirus M1 matrix protein (M1 polymerase-associated protein) Mifsud W anon Pfam-B_3629 (release 6.5) Family \N 25.00 25.00 319.20 319.00 21.20 20.70 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.29 0.70 -5.21 4 24 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 9 0 0 24 0 221.50 62 98.90 CHANGED MuD..p.GEphhs..u.sLhcL-u+LcN.s.cDuplsth.u..KcssshptE...osK+pscPp....LEpllL+aVsEcpplDAhKcFGtLItpI+.SHQtELTpHLE+VusEpRANLpALhcSQpEppKsoKpILSslIulRuplsENuS.+PKsLDhDQV+sERALGFshGYpTAlslhs+lKshsPppssptpVKshAlpAhEc-EYEGStphF+pVlctlKtchc MuD.IEMuEpLVLSHG.SLADLD+RLDN.APKDsRuALFSS..sssso+QKs...SPKKKssPT...TLEElIGHFVsEDLQLDAsKAFGQLLRRIKhSHQEELTQHLEKVNsEpRAKMGALLESQKENGKKTDNILSILIuMRGEGAENASKKPKVLDGDQVRNERALGFNRGLTTAAIAMKKFKLEDPLsLCKGSVKRAALSAMEKEEYDGpRETYusVuKAlKA-lc. 0 0 0 0 +4139 PF04785 Rhabdo_M2 Rhabdovirus matrix protein M2 Kerrison ND anon Pfam-B_2486 (release 7.6) Family M protein is involved in condensing and targeting the ribonucleoprotein (RNP) coil to the plasma membrane. M interacts specifically with the transmembrane spike protein (G) is important for the incorporation of G protein into budding virions [1]. 25.00 25.00 414.40 414.30 16.80 16.70 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.28 0.71 -5.28 4 134 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 30 1 0 136 0 202.00 89 99.89 CHANGED MNhLRKhlKoC+D-EoQKssssSAPPDDDDLWLPPPEYVPLpEloGKtNhRNFCIsGEVKlCSPNGYSF+IlRHIL+SF-sVYSGNRRMIGLVKVVIGLsLSGSPVPEGMNWVYKLRRTLIFQWA-SpGPLEGEELEYSQEITWDDDoEFVGLQIRVsA+QCHIQGRlWCINMNSRACQLWSDMoLpTQQS-EDcsTSLLLE MNFLRKIVKNCRDEDTQKPSssSAPPDDDDLWLPPPEYVPLKELTGKKNMRNFCINGEVKVCSPNGYSFRILRHILRSFDEIYSGNHRMIGLVKVVIGLALSGAPVPEGMNWVYKLRRTLIFQWADSRGPLEGEELEYSQEITWDDDTEFVGLQIRVSARQCHIQGRlWCINMNSRACQLWSDMSLQTQRSEEDKDSSLLLE 0 0 0 0 +4140 PF03397 Rhabdo_matrix Rhabdovirus matrix protein Mifsud W anon Pfam-B_3980 (release 6.6) Family \N 25.00 25.00 166.50 166.30 18.10 17.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.91 0.71 -4.91 10 35 2009-09-10 23:57:29 2003-04-07 12:59:11 9 1 9 0 0 31 0 165.90 66 85.60 CHANGED KRTILlPPPHLTSNDEDRVSTILTEGTLTITGPPPGNQVDKICMAMKLARAILCEDQHPAFNPLVHLFQSAMIFGETSEKIDFGTRSKTLITSFKlAEAKAIYLDoSPVRSRIEAKKYTTPIRHGSVTYYGPFlFADDHVGGKGHREKLGALCGFLQSssYGQAKDYY .KRTlLIPPPHLhSsDE-RVollpsEGplplTGhpPosLpEKIhhuMpLAuAIlGGD.HPuFpshsaLFQptMEFGuTpEKlsFGoRcsss.ToapVh+A+tlYLpopPl-K+IshppYosshcsuoITaoGpFLFSusHVGscDsRsKLAuL-GFhpSsSYtpsKDYY 0 0 0 0 +4141 PF00945 Rhabdo_ncap Rhabd_nucleocap; Rhabdovirus nucleocapsid protein Finn RD, Bateman A anon Pfam-B_477 (release 3.0) Family The Nucleocapsid (N) Protein is said to have a "tight" structure. The carboxyl end of the N-terminal domain possesses an RNA binding domain. Sequence alignments show 2 regions of reasonable conservation, approx. 64-103 and 201-329 [1]. A whole functional protein is required for encapsidation to take place [2]. 19.70 19.70 20.30 20.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.15 0.70 -5.88 11 7911 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 119 82 36 2619 0 200.80 64 92.45 CHANGED ssphVss+hPs.sDssEYPhsaFpcsp+phhshhps...hDLsphRphlhsGlpuuclssscVsSYLhtshcthcsphccDWsSaGlpIu+KG-pIs.hsLlslpcpcsp...h.cGtpcssRs...s-DtaLshhLLuLYRlu+sssp...sY+sslhD+lpp.hcst...phstcphl.sstchhssWsspsNFphlsuuhDMFFpRFcpHtauslRaGTlVoRYcDCuuLsohoahhKlhsLo.hc-shpWlFs+sht-Elh+MMp.PGQEIDsscSYhPYhhshGLSsKSPYSSsssPphashlHhlGshlsSsRShNAphlsstshpclossuhlluashtucushcppFhps-cchpptcsp-tsps-ss.scs.........spus-shsWhuhapchthphscphcsah+RhVpsls ............................................................................................................................YLhush..............................................................................................................................................................................................................................................................................................................................................ttc.h+.s...ch.t.sssu..............t.........hst-sps..uhYtch.hphsp.hh.tlp+hhts..o.......................................... 1 8 8 36 +4142 PF03216 Rhabdo_ncap_2 Rhabdovirus nucleoprotein Mifsud W anon Pfam-B_2146 (release 6.5) Family \N 20.20 20.20 20.30 20.60 19.90 20.10 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.11 0.70 -5.73 4 252 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 12 0 0 122 0 172.00 65 94.19 CHANGED uul+cpFsGLRDVKssshtspGp-acPuclpLslYt..ossDhDhphIh+AlutVGGspTscALulLhAFVhttos.sth-stsKlLp-hGFpVcplPhusslpssltsP.pcLApslspENlh-lV+GlLaTCALhsKYsVDKMtpYhppKLtcLAoSQGlsELpphsss+usLt+luuslRPGQKlTKAlYuhILlpluDPsTtuRA+ALsAMRLsGTGMTMVGLFsQAAKNLGAsPADLLEDLCM+SlV-SARRIV+LMR.Vu-AcslpA+YAlMMSRMLG-uYFKuYGlN-NSRIosILhsINs+Ys-sThtGLpGlKVSs.FRcLAccIAclLVcKYDssGssGpGASDlIRpA ............................................................................DtKVIVDALSALGGPQTVQALSVLLSYVLQGNTQEDLtsKCKVLTDMGF+VTQusRATuI-AGIhMPMRELALTVNDDNLM-IVKGTLMTCSLLTKYSVDKMIK.................................................................................................................................................................................................................................... 0 0 0 0 +4143 PF02484 Rhabdo_NV Rhabd_NV; Rhabdovirus Non-virion protein Mian N, Bateman A anon Pfam-B_2189 (release 5.4) Family Infectious hematopoietic necrosis virus (IHNV) is a member of the family Rhabdoviridae. The non-virion protein (NV) is coded for by one of the six genes of the IHNV genome [1], but is absent in vesiculovirus -like rhabdovirus [2]. 25.00 25.00 72.90 201.70 18.40 16.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.35 0.71 -4.18 2 24 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 6 0 0 24 0 110.50 88 99.66 CHANGED Msp+s.sTshtAL+-lLRYKspVAtHGFLFDDGclVWpE-sDtsWpRLhsVVsALhSSpRMppsLaMDhSITKG-GaLLFsDLQGTpNh.a+pP+FRpaLh.l--FLshPR MDHRDINTNMEALREVLRYKNcVAGHGFLFDDGDLVWREEDDATWRRLCDVVNALISSKRMQRVLYMDLSITKGEGHLLFVDLQGTKNRLYKEPRFRRHLILIEDFLAYPR 0 0 0 0 +4144 PF00554 RHD Rel homology domain (RHD) Bateman A anon SCOP Domain Proteins containing the Rel homology domain (RHD) are eukaryotic transcription factors. The RHD is composed of two structural domains. This is the N-terminal domain that is similar to that found in P53. The C-terminal domain has an immunoglobulin-like fold (See Pfam:PF01833) that binds to DNA. 19.70 19.70 19.70 20.30 19.60 19.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.95 0.71 -4.53 21 1068 2012-10-03 00:25:27 2003-04-07 12:59:11 17 44 147 75 369 1068 0 154.90 36 22.50 CHANGED lcIlEQP+QRGhRFRYcCE.GRSsGSIPG.tpSpcssKTaPolclpsYpG.puh.lploLVT+-pP.h+PHP....HsLVGK...c.C+cG......hhplclsP-.shs.hsFpNLGIpCV+KK-lcpslppRh.............................phs.sPap.lt.pphcthpchDLNsVRLCFQsal..cpsGsashsLsPVlSsPIaDp ...............................LpIhtQPcp+.aRhRYpsE...sS.+Gul.u...........ps..ppsaPs.V.............c..lp....sYts....sh...lpl.lsTsst.....h+PHs....HclsGK..........s..s.p.p.s.................lh..cl..t..ltPc...shh..hshsshGIhpl+.pp-lptt.h.t..................................................phs..s.hp....................shVRLsFpsal......sps...shp.hh...lhSpPI.sp........................................................ 0 51 81 191 +4145 PF02115 Rho_GDI RHO protein GDP dissociation inhibitor Mian N, Bateman A anon IPR000406 Domain \N 22.70 22.70 22.90 22.80 21.90 21.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.22 0.71 -4.90 16 507 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 274 48 308 499 3 181.80 38 87.83 CHANGED MuEppspstphpphttpp-c.p.tss.........YKPPspKSlcElhchDK-DESLpKYKEsLLGsss...slsDPssP.NVhVp+lsLLssstP.slphDLoG-..lcplKcps.FsLKEGscY+lKIsF+Vp+EIVSGL+YlppTaRtGl+VDKspaMlGSYuP+.s-....Y-ahTP.EEAPpGhLARGsYslKSpFTDDDKpsHLoWEWsLpIpK- .................................................................ptp.t..........acss.sp..KolpE...h..ph............Dt-DESLp+aKcsL...L.Gtss...............sls-s.ssP..pV.h.l.ppLsLh.sp..st...P....sl..s..hDLss..s....l.pp.h...Kcps..FslKEG.spY+l+l.sFcV.p+-IVSGL+YlppshRpG...l+...V..DK......tp.......M......lG.S.Yu.P.....p..s-...........Yphhh...s.......E...EAPsG.hlAR.GsYsspSp......FsDDDcps....H.....L..pacWshpIpK-.............. 0 79 153 234 +4146 PF00581 Rhodanese Rhodanese-like domain Bateman A anon MRC-LMB Genome group Domain Rhodanese has an internal duplication. This Pfam represents a single copy of this duplicated domain. The domain is found as a single copy in other proteins, including phosphatases and ubiquitin C-terminal hydrolases. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.63 0.71 -3.73 147 30295 2009-09-13 10:52:53 2003-04-07 12:59:11 15 226 4760 191 9427 22860 9896 101.90 20 44.29 CHANGED ssp..pltphh.......ttsshhllDlR........sttc..............................a..............pt..........u....HIsuA....l.slsh..................................t.......htphththspsp......tl..llas................tstpustssth.....hpt.....Gap.......pl.....hhlpGGhpsWttp ..................................................................t....h.thh.......ttsh...hllD....l.R.......................ss..t.-.............................................................................................................a.....................................pt...................G......H..l..s....G....A...........l.sls.h...............................................................pt......................h.t.p...h..h....t..............s...p..s.p.............................tl.........llaC.............................ttu..cus.t.sst.h............................Lpp..h........Gap..................................pl..................h...l......p.G.GhttWt..t.................................................................................. 0 2977 5838 7876 +4147 PF00620 RhoGAP RhoGAP domain Ponting C, Schultz J, Bork P anon SMART Domain GTPase activator proteins towards Rho/Rac/Cdc42-like small GTPases. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.75 0.71 -4.60 87 7538 2012-10-03 21:54:49 2003-04-07 12:59:11 22 300 353 36 4418 7012 27 147.00 25 17.80 CHANGED Phllppslphlcp.pG..lppcGlaRhuGspsplppLppth-psts..........................thpptslpsluulLKpalRcLPcPLlshphapphh.t..shph.............tstppp.....................hptlppllp..pLPpsshpsLchLhtaLpclsp..psph............N+MsspNLAhlFuPsllps.t....tsp .............................................................................................................Phhlppslp.h.l.cp...pG............l.p.p.c......Gla...R..l..s....G.s..t....s.....p.l..p..p.........L.c..p.t.hcpst.p..........................................................t.pt.hclps....l....u...u.....lL..K.....h..a.h....R....-..L..P.......c.........P.L.....l....sh...p...h...apphl..p.......shph...............................t.stpp.p................................................................................hptl.p.p..llp.......pL......P.....p.....s....s..h..p..s.L..p.h.Lh............p..aL..p..c......l.....sp........psp..................................NpMs.s.pNLAh.lauPsLhps......sst........................................................................................................................... 0 1311 1920 3077 +4148 PF00621 RhoGEF RhoGEF domain SMART anon Alignment kindly provided by SMART Domain Guanine nucleotide exchange factor for Rho/Rac/Cdc42-like GTPases Also called Dbl-homologous (DH) domain. It appears that Pfam:PF00169 domains invariably occur C-terminal to RhoGEF/DH domains. 20.90 20.90 21.00 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.45 0.71 -4.40 192 7395 2009-01-15 18:05:59 2003-04-07 12:59:11 15 583 301 68 4161 6705 27 174.70 22 17.10 CHANGED llpEllpTEpsYlpsLphl..hphahpslpp..................................pshlstpc.........lp....t..lF.u.slpplhphp.pp.....h.LppLcp.................................................tp..........pp..................l.............uclF...lp.htp..............hhph.YspYsss...hspuhphl....pp..hp.............................................ppsp.........Fpp.......hlp.hpt.............ptht........LpshLlpPlQRls+Y.LL............Lccl.lKpT.s.t..sc........-h.p....sLpp........Alptl.pplspplNc .............................................................................................htEllpTE.ppY...l.c.p....Lphl......hp.....h..ahpshpp.............................................................................................thl..s..t...pp..........hp......h.lF..s....N.l...p....p.lh....p.....h...p..pp.....h..L.p..p.....Lcp....................................................................................................................................t...tp..............tt.pp...................................l.............................us.ha.................lp...htp.......................hh.ph..YtpYsp..s..hs.p...u..h..ph.l.....pp.......ht.......................................................................................................................................................p.p.pt...................hpp............hh.p.p.hpt.....................................s..pphs...........Lp..s...h.L..l...p........P....l..QR............ls.+Y..LL..................................L....p..c.l...l+p..o....s....p....sp....................-t..p.........pl.p...p.........................Alphh.pplhpphN..................................................................................................................................................................................................................... 1 1275 1737 2801 +4149 PF03527 RHS RHS protein Griffiths-Jones SR anon PRINTS Family \N 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.96 0.72 -4.18 18 2442 2009-01-15 18:05:59 2003-04-07 12:59:11 9 181 606 0 216 2289 23 39.60 54 4.56 CHANGED laaYHsDphGtPLpLocs-GchsWpAcYcsWGslhpEpssp .....lHhYHCDHRGL...PLALlSp-Gs....T....sWsAEYDEWGNhLsEEss.......... 0 42 71 144 +4150 PF00073 Rhv rhv; picornavirus capsid protein Eddy SR anon Overington and HMM_iterative_training Domain CAUTION: This alignment is very weak. It can not be generated by clustalw. If a representative set is used for a seed, many so-called members are not recognised. The family should probably be split up into sub-families. Capsid proteins of picornaviruses. Picornaviruses are non-enveloped plus-strand ssRNA animal viruses with icosahedral capsids. They include rhinovirus (common cold) and poliovirus. Common structure is an 8-stranded beta sandwich. Variations (one or two extra strands) occur. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.07 0.71 -4.57 50 28435 2012-10-04 01:49:40 2003-04-07 12:59:11 15 90 713 302 0 21797 0 144.60 21 43.85 CHANGED hsssspsshss.phsshhp........h.pppsssshpth....t...t.pphht.hpshshs.ttpshhhhphsh.spthph....h.......hGthhpaaoahRsuhcl.phh..h........su......oth......ppG.........p..........h.........l...l...sh....lPsGss...............................................sssst.................................hpuhhssH.hhshttssosplslPalussshpshh...........................................pa.sshshshhshssh.sh ......................................................................................................tst..s..ths...p..........tt..ps.h.th.......t.ts..................t..h.........s........................s...............t........................h.......h.s........a.p.l.sh...ht.hsp............htphhc.haTYhRh.DhEhshh...h...........ss.......................s..........t..........h..........hha..lP.sG..ss..........................................................................................ps...s...sp........hh..p...t...ts....sphslPahu..tphh...t.h.s.........................................h..................h................................................................................................. 1 0 0 0 +4151 PF02267 Rib_hydrolayse ADP-ribosyl cyclase Bateman A, Mian N anon Pfam-B_3719 (release 5.2) Domain ADP-ribosyl cyclase EC:3.2.2.5 (also know as cyclic ADP-ribose hydrolase or CD38) synthesises cyclic-ADP ribose, a second messenger for glucose-induced insulin secretion. 25.00 25.00 45.70 37.20 19.60 18.80 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.78 0.70 -5.03 12 149 2012-10-02 19:28:18 2003-04-07 12:59:11 12 3 48 156 78 136 0 212.20 38 81.23 CHANGED Tsphp-IhLGRChpYs.....thhpPt................tphsCpsIhcsFhpAhhsKsPCsls.pDYcshhpLsspo..lPssKolFWSpsp..plsHsasp.sppchhTLEDTLhGahuDsLsWCGp....tssSthsapsC...Pshpc....CsN..NshssFWppsSppaAcsAs..........................GlVpVMLNGSps..ss......asppShFuslElssLpss+VsplplaVhc-ht.....sssp-oCusuSltpLcphlps+shtasChss.+slhhLQClcpsppssC ...............................ph.plhluR.Chpah.......hpst....................t.phsCptlWcsF.ps...hh...s..K...s...PCslh.pDYp.h....hp.L...stps......lPpsKolFWppsp...lsppasp...sppc.hhsLpDs..LhGhhsDtL....sWCGp.....pss.sthsY.p.SC...Ps.pc....CpN....NslssFW+puS.tpaAcsus..........................GslpVhLNGShs..ss.......as.hpuhFushEl.sLp.pKVpplplaVhaplt.....s.s.s...h-oCupsSlp.LcphLpp.hshpaoChsshc.ssph.l.Chcpspp.tC....................................... 0 22 23 41 +4152 PF01872 RibD_C RibD C-terminal domain Enright A, Ouzounis C, Bateman A anon Enright A & Pfam-B_6425 (Release 8.0) Family The function of this domain is not known, but it is thought to be involved in riboflavin biosynthesis. This domain is found in the C terminus of RibD/RibG Swiss:P25539, in combination with Pfam:PF00383, as well as in isolation in some archaebacterial proteins Swiss:P95872. This family appears to be related to Pfam:PF00186. 20.50 20.50 20.50 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.29 0.71 -4.71 27 7001 2012-10-03 00:23:32 2003-04-07 12:59:11 12 23 4048 36 2265 6046 2900 188.90 22 66.43 CHANGED salhhphshSLDGthusssGsuph....lsstt...............spthhpphRppssulllGt...sTVhs-sPthssphspt.............cpPh+lVlssphchs.psp......llsps....hhhhtt...............................h.sllcpLtpp.shpslhlEGGusLtsu.hlctGLlDclplhluPhllGu..utshhsstsh.t.....hplchtphpphusslhl .............................................................................................................................hlhhphuhSLD.....Gh.hA......t....s..s.....u...p...s...p.....a.............l.o.s.tt.........................................sc.pt..s.p...p.h...R...s....p....s....sAl.l..h..Gt.................sT..h....h.....t....-....s.....P.....t.....h....s..s...c..h..tt..............................................ppP..h..+..l..V...l..s.p..p.....h..c.....h...s......s.tp.............................................lh....p....s....s............h..h..h..h....t...t................................................................h............................................h.s.ltp.hl.p.p.....L....t...pp.....s........h........psl..h.l....EG.Gs.....p..lss....s.hl.............p.......t....s....L.....lDElhlhlu..P.h.l...l.Gs........u.....hs..h..h.s..t.....s.ht...............phphhphp.hs.....h................................................................. 0 833 1564 1990 +4153 PF05062 RICH RICH domain Bateman A anon Pfam-B_277 (release 7.7) Domain This presumed domain is about 85 residues in length and very rich in charged residues, hence the name RICH (Rich In CHarged residues). It is found in secreted proteins such as PspC Swiss:Q9KK19, SpsA Swiss:O33742 and IgA FC receptor Swiss:P27951 from Streptococcus agalactiae. This domain could be involved in bacterial adherence or cell wall binding. 22.00 22.00 22.00 23.70 21.70 21.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.73 0.72 -3.85 26 495 2009-01-15 18:05:59 2003-04-07 12:59:11 7 33 170 0 1 455 0 74.10 40 17.91 CHANGED spppcKttptVcpalcKhLs-l...pLcKc+HTQsVsLhpKLucIKpcYLhcLss.ppc............splpph.pcsKscLDAAF-pFKK ..............ppcKutptVspYhpKhLp-h...pLc+c+HpQsVuLlpcLssIKppYL.-lssspsK..........sphppLspKspu-lDAAhpcFKK........ 0 1 1 1 +4154 PF01042 Ribonuc_L-PSP DUF10;UPF0076; ribonuc_L-PSP; Endoribonuclease L-PSP Bateman A, Finn RD, Kerrison ND anon Pfam-B_797 (release 3.0) Domain Endoribonuclease active on single-stranded mRNA. Inhibits protein synthesis by cleavage of mRNA [1]. Previously thought to inhibit protein synthesis initiation [2]. This protein may also be involved in the regulation of purine biosynthesis [3]. YjgF (renamed RidA) family members are enamine/imine deaminases. They hydrolyze reactive intermediates released by PLP-dependent enzymes, including threonine dehydratase [5]. YjgF also prevents inhibition of transaminase B (IlvE) in Salmonella [4]. 27.30 27.30 27.30 27.30 27.10 27.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.45 0.71 -4.35 30 11334 2012-10-01 19:40:00 2003-04-07 12:59:11 16 61 4115 187 3227 8930 3544 116.70 29 81.61 CHANGED otsAP..tshusYSp.Alp..ssshlalSGQlslcs....tssclh.sssstpQscpshpNlctlLptAGsshsc......lVKsTlFlsDhsc.FAplNplaspaFsps..........PARusVpVutLP...hsshlEIEslAhh .....................................................s..ss.a..up...ulh..........hs.s..h.l..a.h.SG...Q...l..s..hs...............................s....sp...l..........s.....s...s.....l.......p.....t.....Q.....s......c....p....s....l...c.......N.......l...c....s....l....L........p....t..A....G...s.....s...h..sc......................l...l...+...so...l......a..l......s.........D...........h..s......c......F....s....s...h.N..........p...l...a...s...p.a...Fspt................................P..A..R....o.sVt.V...s.t..Ls.....sh.hlE.I-slAh.h............................................................. 0 905 1902 2685 +4155 PF00317 Ribonuc_red_lgN ribonucleo_red; ribonuc_red_lg; Ribonucleotide reductase, all-alpha domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.61 0.72 -4.18 137 5796 2009-09-10 18:18:06 2003-04-07 12:59:11 16 66 4614 118 1297 4181 2797 77.40 30 10.10 CHANGED hthshhuhp.hL.tpcYLh..+sp..ps.........hhEsspphahRlAhslAt.p..................................................................................sphphsp....caa....phhsphpahsuoPslhNAGpsts .....................h..hoahuhp.ph.hspYhl.......+sp.......sG....p...........hhE.s..pphhhhVAhsLup............................................................................................schphAp......cha....chhsphca.p.sATPThhNuGpsp.p.................................................................................................................... 0 412 816 1098 +4156 PF02867 Ribonuc_red_lgC ribonuc_red_lgC; Ribonucleotide reductase, barrel domain Finn RD, Griffiths-Jones SR anon Prosite Family \N 19.50 19.50 19.50 19.60 19.40 19.40 hmmbuild -o /dev/null --hand HMM SEED 538 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.54 0.70 -5.95 136 8373 2012-10-01 23:28:04 2003-04-07 12:59:11 10 91 5010 124 2033 6508 10158 423.00 26 65.30 CHANGED loSCFl........s.hsDol.....cuIhcshppsuhlpKtuGGlGlshop.lRspGu.lpussususGllshh+lhssssphlsQuG.pRpGAhslYLcsaHsDIh-FLs...hK.ppsuc-phR..........................................s+sl.hulhlsD..hFMc....tlcp.......stpWsLhsPpp..........................................l.phaGp.......capp.Y.ph...............................pps.......htc..plpA+cLappIhpsth-oGpPalhacDssNppssp.....p.............p.....h.........GpIpsSNLCsEIsp.tp..........................................................phusCNLuSlNLsphlpts............................................................................hshcplccslchssRhLsslIDhstaP.l.pscp............................sspcpRslGlGlhGLsshLhthtlsYsSp-AhphscplhEtltatAlcuShcLA.c.cpGsat...................sa..csStaupG....h.hchh..................................p............................................................................................WppL+ppltph.GlRNuhhhAlsPTuohS...ls.ssosulEPhhuplas+pshsst......h.hss..h.ptlpp..............h..t.ht.l.......................tsl.sh..tl.tphhth..ac.o.Aa.......-l...s.cthlchsAstptalDQu.Shslal................s..cs..ohpclpphahhAactGLKo.hYYh .......................................................................hsSCal.........thtD..sh..............puI..h.ts.h.pp.hh.lsp.uuGlu...hthup......lRthGs.lp....s.h..p.s..usGhlsh.h.+.hhps........................sh.thssQh.............................G.h...........Rt.GA......h...........s.lalphaH.Dl.p.Fls...h+.ppp...u.pc.t.+..................................................................................................hpsh..shulh.l..s...c...hhhp.h...s.p.........stphh.L...hsP..p...........................................................................................t..h.................................thpt.Y......th...............................tp............h.h.p...pl.pA.pclaph.l.h.p..t.hpoG.Pal.hhhD......psNptss.........................................u......lp.....S..NLC..sEIh...t.............................................................p...hsCs..L.........u.........S.........hNlsthhp.......................................................................................ch.t.c.......hsphh..h....+h....L.....stl....h......D......h.....s......h..s......h...thpp............................ss...t.t+slGlGhh..shts.h...L.ht.tl.Ys....S.....p...u....h.phsphhh.hlthhshp..sS.plA..c..ct..s..s.a.......................a..p.t..o.....h..s..p....u.....h.........ph..ht..............................................t...............................................................................................WttLtt..tl..p...........Gh...h...Ns.h.Al.hP..o..sohS.....ls...........s.....s.o.s.ulc.P.hs...h.h.........+p..hpst...................................................................................................................................................................................................t...........h...h......a.p...s.sa..................ch...s.pthlchhu..hp.........p......alD.......Qu.Shsl.h................s....ph.......shppltphhhhAa.c.........h.G.....l.Ko.hYY.................................................................................................... 0 708 1339 1729 +4157 PF00268 Ribonuc_red_sm ribonuc_red; ribonuc_red_sm; Ribonucleotide reductase, small chain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.50 20.50 21.10 20.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.77 0.70 -5.40 13 7021 2012-10-01 21:25:29 2003-04-07 12:59:11 16 12 4393 189 1340 4104 4718 239.00 25 82.28 CHANGED tshLpcstchF..ashcas-IhphhpcuhuspWhspElsLucDhpDhcp.LstsEppFhpalhAFhuAuDslVN.NL.schosphp.p-hcaaYs.QhuIEslHScsYShllpsLh.pDtpp+pphhsuIhspPsIpcKscWlhchls-scu.huE+llsFhhlEGIFFuuSFASIhaL+pRGLMsGls.oN-LISRDEulHTshAChlYsphh...c+Ps.chI....hclhpEAV-lEpcFlp...spsPsthhhhs...sctlpQYlcasADRLLsslGhstlassss..s.s.hs ..................................................hh.....................p....h.....p.h.app...h.s.p.aWhspc.ls..l..op.D.h.s.a.......p.........p.....L......sttE.pp.hhhpshs....hhshhD.o...l.s..p.s..h.......sl..h....h..p..h..s...c.......c..shhst..th...E.s.lHu....cuYohlhp.......s...........l............s.............s....................t...........t...............t.......h....a.......p.......h.....h...............p...................................................l...........p...c....+...........s............p...........h.............l...........................p.............h.............h.................t.............t.............t..............t...........h....................p............p...................................l.........u............s.....h.....l......Euhh....F.a.u..u....F.h.h....h.h...h............t..p.........p....s..h.....h.ss.uph.....h..chI.RDE.u.lH.h....h....h.h.t....h....h....h....p.........h....h....t....t.......p....p...........t.............p.....p....l...............hplhh...p....h....h.p.........E...h...p...asc...................h..h..s.t.......sh...........tp..h..ppalca.uspth.tlGhc..ha..sp.........s.................................................................................................................................................................. 0 402 783 1091 +4158 PF00545 Ribonuclease ribonuclease; ribonuclease Bateman A anon SCOP Domain This enzyme hydrolyses RNA and oligoribonucleotides. 21.30 21.30 21.70 21.30 20.30 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.31 0.72 -3.91 80 855 2009-01-15 18:05:59 2003-04-07 12:59:11 15 18 696 335 302 839 28 88.20 33 48.99 CHANGED LPspsp....pphshhptGhs.............sh.tpsGshFtNpEp......................hLPttssG....hYpEhsl.h.s....G.....sRGA..cRlVhusp...........uh.haaTsD.HYsoFpph ..................................................................................LPtphhpphphhptGhsh..................sh.spsGphFsNpEt......................................tLPptpsu........hY+EhsVth...s...................G......sRGu...cRllhuss.............uhhaaTsD...HYpoFpph........................ 0 68 167 250 +4159 PF03631 Virul_fac_BrkB Ribonuclease_BN; Virulence factor BrkB Bateman A, Eberhardt R anon Pfam-B_4424 (release 7.0) Family This family acts as a virulence factor. In Bordetella pertussis, Swiss:Q45339 is essential for resistance to complement-dependent killing by serum [1]. This family was originally predicted to be ribonuclease BN [2], but this prediction has since been shown to be incorrect [3]. 25.60 25.60 25.60 25.80 25.50 25.50 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.98 0.70 -5.03 155 5654 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 3939 0 1383 3900 630 252.90 22 76.58 CHANGED tpcchshhAuu.luaasllulhPhlhhhhulh.s........h....h...........thhpplhsh.......ltph...........................................hP.....t..sh.phlpshl.p.phh......pps.....................ttulluhuhll..........asusshhpslppuhNp....ha.............ph..pp.t+....s.........hl...ppphhshshh.l....hhsllllluhslsshhth..........................hhsh......h...hhphhph........lshhlh.hhh.hshlYphhPs.t..chphchsh.Gulluulhahlsphs...au.h.Ylsphs..sa.sshYG.ulu.ullllll..WlalsuhllLhGAplssshpptp ...............................t..pphsthAusluYaslLSlhPlLhlhhulh.u.h...hs................th.tpplh..sh......lhs.....................................................................lP........s..tss..shlpshl....p...p.l.t....sps................psu.l...l.ulshll...........aou.thhss.....lppul....Np....la......................cs...pc...pR....s.............hl...hphhhs.h....hhh..l....hhs.l.h.lh.h.ulslss..hhts.h.th..........................................hhsh..........th.......hhphl.ph......lshh.lh.hlh.ahhl.YhhlP....s...t.....+s................p.h.+t.s..l..hGuhluul.h.a...lh.p.hh...Fu..hYl.s.p.hs..........sh...sthYG...uhu..sl..l..ll..hl..WlahsshllLhGAtlsusht...h.............................. 0 407 908 1180 +4160 PF00825 Ribonuclease_P Ribonuclease P Bateman A anon Pfam-B_1558 (release 2.1) Family \N 22.30 22.30 23.40 23.00 22.20 22.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.61 0.72 -4.17 141 4169 2012-10-03 01:04:38 2003-04-07 12:59:11 13 4 4132 9 899 2569 2063 107.70 28 88.95 CHANGED h.s.+ptRLpppp-Fppla.ppup+.hts...pth..llh........htt..............................shsts..........................Rl..GlsluKKhup.AVpRNRlKRhlREsaRhppsp.ls.......uhDlVlls+.ps....ht..phshpplpp.........plpp...hhp+ht ..................................................pt.Rlpppp-Fptla....pp.u....p.....p....hu.s.....tp.h....llh...........hht......................................p......p.tps.......................................RlGloVuKK.hup.AVpRNRlKRhlRcs.hRh.t..p.p.p..ls..................shDhVl.lA+..ps...........st.....phsh..p.p.lpp.........sLp+.lhcp..t................................................ 0 309 600 766 +4161 PF00445 Ribonuclease_T2 ribonuclease_T2; Ribonuclease T2 family Finn RD anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.66 0.71 -4.68 116 3203 2009-01-15 18:05:59 2003-04-07 12:59:11 13 17 1368 27 551 2843 56 162.70 25 76.12 CHANGED aD.........a.ahLs...........lpWsssaCt...................t......tpC........sttstsF.....slHGL.WPs..................p........ts.............h.sp.....C.........h........................slhsphpphWPsh.t.p.........shhpHEWpKHGTCus........................................tpsYFstshplh..pphsl.shLtpt..lhs.spstph.........shsplpsAlppss.....s..ttsslpCpp...................................hLpEltlChsps..............h...sCsp ..........................................................h.hs..pW.sshCt.....................................t........C................t.a.....TlHGL.WPs..............................................ths...t...................h..p.sCsss......hs......p.....................................................phhs.p.L...pp..hWPs...h.tsts..................phW.ctE..a.pKHGoCst.s.........................................t.tYFp.psh....phh................p.......ph......s.h.....phLtph......lhs...t..spph.......shpclt.sslppsh..........t...ttstl.pCpts.......................................................h.pl.hs............................................................................. 0 163 305 441 +4162 PF00687 Ribosomal_L1 L1; Ribosomal protein L1p/L10e family Bateman A anon Pfam-B_115 (release 2.1) Domain This family includes prokaryotic L1 and eukaryotic L10. 21.70 21.70 21.80 21.80 21.40 21.60 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.64 0.70 -5.00 255 5940 2012-10-01 21:21:48 2003-04-07 12:59:11 16 17 4958 95 1878 3991 2982 212.10 38 87.56 CHANGED lphlpp.....h.............................................t..ttcFscol-ltlsLph.........sppsspph...........psslsLP+.sht............+sh+.l...........sVhs.psp.ptp.......................pstthsuc.lluh--Ltpp........hc.pth....tcaDhhlAsschhshls.+..LGphL....us+GhhPsPh.....s.lsh.....................................s...lsptlcch+suphhhps.....+ss..slpstlGch..sh.sscclh-Nlpsllstlhph..t....hsKGh...l+slhlp..oTMusu .........................................................................................................................................................th.hthttththht.................................................chs...psKFcp..o..V-.lulpL..sl........Ds++u..D..Q.pl................R..Gsl.........sLPp.GTG............+shRV................h.VFupG.scsctt...........................scs.A.GAD.hVGh--Lh.cp............lp.pGh......h.cFDlllAoPDh.Mt.hVG..+...LG+lL....................GP+GLM.PNPKs........GTVTh..................................................................DVscAV.c.csK.u.Gp...l.p.aRsD...........KsG.hlHssIGKl..oF...ss..........-cLhENhpuhlsslh+tK...Ps..........suKGh..Y..l+plslo.oTMGs................................................................. 0 649 1169 1576 +4163 PF00466 Ribosomal_L10 L10; Ribosomal protein L10 Finn RD anon Prosite Family \N 22.00 22.00 22.20 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.06 0.72 -4.08 158 5754 2009-09-13 14:02:41 2003-04-07 12:59:11 15 11 4979 96 1716 3716 2281 98.80 30 50.84 CHANGED hspppKpphl..pclpp..hlppsp.slhlsch....pGlsssphpplRppl.+ps...ssphpVsKNoLhchA...lcp.ssh................t........l.......p..s.hl...pGssulhao..pp-sss.ssKhlpca.K ......................................ppKpthV....scls-hh.......c.....s.......ut..usll..s.-h....c.GloV..sph..opLRcph.Rcs..............ssphcVs...KNTL..h++A....lct..ssh-.......................s................L.....s...c.hh....sG.Psu.ls.Fo....p-.-..ssu.sA+llpcFuK............................ 0 591 1068 1434 +4165 PF00298 Ribosomal_L11 L11; Ribosomal protein L11, RNA binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.40 21.40 21.40 21.70 21.30 20.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -8.99 0.72 -3.83 171 5449 2009-09-13 02:34:37 2003-04-07 12:59:11 14 14 4897 169 1606 3179 2185 69.30 49 46.79 CHANGED KTPPsohLL+KAAsl......c........+GSs..pP............s+pp.VGplohcQltEIAchKhs.DL....sussl-uAh+hlhGTA+SMGlpV ..............................................KTPPAu.sLLKKA.AGl.....c........+G.Su...cP.........................s+sK..VGp.lTpsQlpEI..........AcsKhs....DL.....sAs.slEuAh+hIt......GTARSMGlsV........ 0 555 1012 1339 +4166 PF03946 Ribosomal_L11_N L11; Ribosomal protein L11, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The N-terminal domain of Ribosomal protein L11 adopts an alpha/beta fold and is followed by the RNA binding C-terminal domain. 19.60 19.60 19.60 20.10 18.90 18.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.56 0.72 -4.51 167 5468 2009-01-15 18:05:59 2003-04-07 12:59:11 9 15 4889 114 1604 3175 2123 58.80 56 39.60 CHANGED hlKLplsAGpAsPuPPlGPALGptGlNIhpFCKpFNspTpc....hp...Gh.lPVhITVas..D+SF ..........hlKLQlsAGpAsP....uPPVGPALGQ.pGl.N.....IM.cFCK...tFNA+T.pc................pt.Gh..IPVhITVYp.D+SF................. 0 555 1008 1337 +4167 PF00542 Ribosomal_L12 L12; Ribosomal protein L7/L12 C-terminal domain Bateman A anon SCOP Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.08 0.72 -3.97 191 5164 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 4749 15 1334 3031 2145 67.20 57 51.44 CHANGED EFDVlLsss..GspKIsVIKsV....Rs.lT.GLGLKEAK-LVEuA...Pps...lKEu..luK-EAEclKcpLE-AGA.pVElK ..................EFDVlLpuA...Gs....pKltVIKsV..........Rp.l.....T.GLGLK...EAK-LV-uA...Pp.s...lKEu..VuK.--A.EplKppLEEAGA.pVElK.......................... 0 469 872 1139 +4168 PF00572 Ribosomal_L13 L13; Ribosomal protein L13 Bateman A anon Prosite Family \N 21.10 21.10 21.30 21.70 20.30 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.59 0.71 -4.08 115 5540 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 4945 229 1626 3368 2257 124.90 46 81.05 CHANGED WhllD..AcsplLGRLAoplAphLtGKp+s.apPt..hssGD..hVlVlNA-KlhlTG............pKh...pp+hYhph..oshsGuh+p.shpphhp.+..pPpcllc....+A......V+GMLP+..sphG+phhc+L+lYsGstHs.atst......pshtlph ..............WaVlD..Aps..ps.LGRLAoplAphLRGKpKspaT..PH..lDsGD..a.V.IVlNA-KltlTG............pKt...s.c.KhYY+H..osas.....G.GlKphoh.tchhp..+pPc+llE..........pAV+GML..P+....ssLGRthhc.KL...+..VYuGsEHs.HsAQpPpsl-.l...................................... 1 568 1025 1367 +4169 PF01294 Ribosomal_L13e Ribosomal protein L13e Finn RD, Bateman A anon Prosite Family \N 24.90 24.90 25.90 25.00 21.80 24.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.11 0.71 -4.81 6 596 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 416 4 328 576 5 161.50 47 82.53 CHANGED shIhpsHF+K+WQphVKTWFNQPARKsRRRtARptKAt+IuPRPsuGsLRPlV+sPTl+YNhKVRuGRGFoLEELKuAGIst+aA+TIGIAVDHRR+N+SlEuLQsNVQRLK-Y+uKLIlFPRcsp+s+ts-ussEEltpATQlpGshhPIpp.pt+plch+clocctKpapAFssLRht .............................l.psHF+KcW......Q.......p......h.......V+saFsQPu.+K.hRRRpuR.tKAttluPR.P.s.s.......LRPlVRsP.T.l+.Y...N....p....K...lRsGRGF.oLp.ELK.uAGlst+hApTIGIuVDtRR+N...+...S.EuLp..tNVpRLK.p.Y+uK..Ll...lFP.+..+.s..p..t....s.+t.....G......D......u.....s..s...E....-.....l...t..h...A..............s.....p......h....p......s............h.hPltp......h.ht........c....h...hhloc....p.....c....thp..AatpLR................................................................................. 0 130 199 274 +4170 PF00238 Ribosomal_L14 L14; Ribosomal protein L14p/L23e Finn RD anon Prosite Domain \N 23.60 23.60 24.00 23.60 22.70 23.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.89 0.71 -4.36 103 6114 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 5540 233 1569 3096 2117 119.80 57 97.21 CHANGED MIphpohLpVuDNSGA........KplpCIp.Vl............st.p....ppphAslGDlIlloV.Kcu.................sscl+K....uplh+AVlVRs+Kt.h+.RtDGshlpF--NAsVlls..sp.s.p......PhGTRIh...GPVu+El.+..........pc.p........asKIsSLAspll .....................................MIQ..pohLpVADNS...GA.............................................+clhCI+VL....GG.S............pR+aAslGDlIl..soV.KcA...hP.................pGpVKK....G-Vl+A..VlVRT.+Ks..hR..RsDGShI+FD-NAuVlls..sc..t.p.................PhGTRIF...GPVuRELR..........pc.p........FMKIlSLAPEVL........................... 0 520 978 1314 +4171 PF01929 Ribosomal_L14e Ribosomal protein L14 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the eukaryotic ribosomal protein L14. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.50 0.72 -3.63 53 552 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 422 10 304 545 2 73.10 34 48.58 CHANGED .RpshslpplpLTchplc.lsRsu+otsl+KAhcpuclppKWsposWAKKlss+cpRusLoDF-RFKlhhA.++pRsph ....Rpshsl+plpLTchtlc.ls..+uu+scsV+KAaccu.clppKWtposWAKKltspc+RuphoDF-RFKlhhsKptRp........................ 1 106 170 248 +4173 PF00827 Ribosomal_L15e Ribosomal L15 Bateman A anon Pfam-B_1567 (release 2.1) Family \N 21.70 21.70 21.90 22.50 21.60 21.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.07 0.71 -4.80 57 759 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 568 73 409 697 98 176.40 58 88.23 CHANGED uhYpYlcEhW++.ppshh+hLh+.Rhhca...............Rp.sulhRlpRPTR.D+ARpLGYK.AKQGhllhRVRVRRGGR++shs+uup..+P+phGlsplps.s+slQtlAEERuuR+.hsNLcVLNSYWVGpDupYKaaEVILVDPpHsuI+sDsclsWIspssHKtRthRGLTSAG+KuRGLt.s+G.+tspKsR.sShRAs...a+ ......................GAYKYlpE.La+KKQS.D.Vh.RFLhRlRsWpa..........................................RQh.sslpR.ssRPTRPDKARRLGYK.AK.QGaVlYRlRVRRGGRKRPVP.KGusYGKPpppG.VNpLKh.tRuLpulAEER.sGR+.husLR....VLNSYWVspDuoYKa..aEVILVDPtHpAIRpDPchsWIs.ps.V.HK.H.R.EhRGLTSAG+Ku.RGL..sKG.Htappsh..sS.pRtsW........................... 0 129 221 315 +4174 PF00252 Ribosomal_L16 L16; Ribosomal protein L16p/L10e Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 20.70 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.77 0.71 -4.40 141 7936 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 7072 230 1613 4271 2288 118.30 47 88.89 CHANGED P++s...............KaRK......tp+..s+......h.pGtutt.........Gspls.aGc..............auLpuhEsu.hloucQlEAAR........huhsRhl................++...suc..........laIRl...FPcpslopKPh-sR.MGpGK.........GssctWsAhV+s.GpllFEl...s.G.l..s........phA+cAl+hAupKLPl+s..+hlt ..............................................PKRs.KaRK......pa+....G+....h.+Ghupt........................................Gspls..FGc..............auLpAl..Esu..hlTuR.QIEAAR.........hAhoRah..................................+R........sG+..........lWI+l..FPc+slTt+...shtsR.MGp..GK...................G..ss-hWVA.l.ps.G+ll.aEh.......sG.V..sc.........phA+EAhchAstKLPh+sphl......................................... 0 541 993 1334 +4175 PF01196 Ribosomal_L17 Ribosomal protein L17 Finn RD, Bateman A anon Prosite Family \N 20.20 20.20 20.50 20.50 20.10 20.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.29 0.72 -3.42 153 4802 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 4671 165 1229 2613 2175 101.60 51 72.16 CHANGED hl.+NhssuLlp.............................+c+IpTThsKAKElRphsE+lIT.................lAK....c..................s.s...............................lpuRRp..shuhl.......................p...scp..................h....................................lpKLFsc........lus+Yts.RsGGYTRIlK.hu..RpG................................DsA.MAlIELV ..............................MLRNhssuLlp.............................HEpIpTThsKAKElRph.lE+..LIT...............................LuK....c.......................s..s............................................................................................lts..R..R....Ahu..hl........................+...scp..................s....................................VpKLF.s-..............luP.R.a.s.s..RsGGYTRIlK.sG...R.pG................................DsAPMAlIELV............................................................. 0 426 794 1042 +4176 PF00861 Ribosomal_L18p Ribosomal L18p/L5e family Bateman A anon Pfam-B_495 (release 3.0) & Pfam-B_741 (release 4.1) Family This family includes ribosomal proteins from the large subunit. This family includes L18 from bacteria and L5 from eukaryotes. It has been shown that the amino terminal 93 amino acids of Swiss:P09895 are necessary and sufficient to bind 5S rRNA in vitro [1]. Suggesting that the entire family has a function in rRNA binding. 27.80 27.80 27.90 27.90 27.70 27.70 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.24 0.71 -3.90 33 5752 2012-10-02 16:33:16 2003-04-07 12:59:11 17 13 5132 229 1513 3405 2152 116.10 39 83.74 CHANGED scpctpp+++hhhp++hhssst+sRLlVh+SN+alhAQllssststslssASshst-Lpp..s.psshsNhsuAhtlGhLlAcRAlp........cGl.....................stlsaspsuhpYtGRVtAlAcuA+EuGLph ..............................pchtt.p.R.+tp.h.p.p.+...h......t.s...s.....t..p....sRLsVaR.Ss+cIYAQ........lIs...sssut..s..Ls..u.ASol.-.+.-lpp.......................................hs..h..ssh-AAttVGcllAcRAhp........................tGl..............................................ppVs.FD.R....u.G.a.h.Y.HG.R.VpAL...A-AAREuGLpF..................................... 0 490 947 1257 +4177 PF01245 Ribosomal_L19 Ribosomal protein L19 Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 22.80 22.30 21.10 21.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.23 0.71 -4.29 127 4774 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 4673 153 1203 2595 2397 113.20 52 88.03 CHANGED ts.llc...plEppph.....+..ps..lPpFcsGDTV+Vpl+lh.........E......Gs.........................+-RlQsFEGlVIu++s.t..Gl..spoFTVRKl.StGlGVERhFPlaSPtlcpI-VlR..+G............+VRRAKLYYLRs....hpGKuA.R.IKEch ......................................plIcpl-p..pQl.......+....p-.lPsF+...s..GDTVcVp.l+Vl.........E.Gs.........................+.cRlQ.sFEGV..VIu++s..t..Gl...spsFTV.RKI.S.s.G..l.G.VERsFPlHSPtl..-pIEVhR...+.G........................................c..VR..RAKLYYLRs....hpGKAA..R.IKEp.h............................... 0 415 776 1019 +4178 PF01280 Ribosomal_L19e Ribosomal protein L19e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 29.00 29.00 19.40 19.00 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.79 0.71 -4.43 63 764 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 542 78 423 735 79 138.90 55 54.02 CHANGED ssLphQKRLAAslLp......sGpsRVWl..DPpchs-lusAhoRpsIRcLIc-GhIhtKsh....................pupSRuRsRcpptp++.tGR++GhGpRKGspsAR.hspKphWhpRIRslRRhL+chR-stc.ID++hYRp...LYh+AKGspF+shppLhpalc.ptth.c ..............s.sL+hQKRLAAuVLp......CGcpKVWL...DPNEhsEIusANS.RpsIRK..LlKDGlII+KPs....................psHSRuRsRc...hptA++..KGRHpGhG...KR.KGTtsAR.hPp.......KhhWMRRhRlLRRLL+.+YR-ucK.ID+Hh.YHp...LYh+sKGNsFK.NKRhLhEaI++tKA-................ 0 147 245 348 +4179 PF00181 Ribosomal_L2 L2; Ribosomal Proteins L2, RNA binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 27.10 27.10 27.10 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.55 0.72 -4.01 50 6547 2012-10-03 20:18:03 2003-04-07 12:59:11 18 12 5878 241 1538 4221 2237 76.40 54 28.40 CHANGED GRNsp.G+ITsR++GGGpKp.tYRhIDFpR.spts......l.uhVhsI-YDPsRsAhIALlpats...Gccp.....YILuspGLplGsplhu .............................GRNsp..G+ITs.RH.pGG.G.HK+.tYRlIDFKR..sK.cs.............IsupVtpI....EYDPNR...oA.pIALl.p.YtD.....GEKR...........................YIlAPc.GlpsGsplh................................... 0 516 966 1289 +4180 PF00453 Ribosomal_L20 L20; Ribosomal protein L20 Finn RD anon Prosite Family \N 24.10 24.10 25.20 25.00 23.10 22.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.37 0.72 -4.12 129 5432 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 5206 158 1102 2738 2165 104.60 52 89.30 CHANGED sRV...KpG..sssRpR+KKlLKhAKGahGs+uphaRsApppVh+AhpYAYRDR+p+KR-FRpLWIsRINAAsRtpG...l..oYSpFlpuL+pusIp......lNRKhLA-lAlpDspuFspl ...................hRVKpG..ssuR.tR+K.KlLKh.AKGYhGu+pplaRsApptV.h+uhpYAYRDRRp+KRsFRpLWIsRINAAARtsG...............l...........SYS..+hhpGLK+A...sl-......lsRKhL.A-lAlpDtsAFssl.................................... 0 386 724 933 +4181 PF01157 Ribosomal_L21e L21e; Ribosomal protein L21e Finn RD, Bateman A anon Prosite Family \N 21.60 21.60 22.00 23.90 21.10 21.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.14 0.72 -4.20 6 896 2012-10-01 20:16:17 2003-04-07 12:59:11 13 6 529 75 505 859 81 94.60 51 64.65 CHANGED spScGhRRtTRhhFpRcFRcHGlssloThl+pYKcGDhVcIKssuSlQKGMPHKtaHG+TGpVaNVs.uulGlhlNKcVpsphl.KRIplpsEHI+.Sc ...........po+GhRptTRhhFuR.sFR++.G..s.l.P.Lu.TYh..+hYKhGD....hVDIK...s........sGuVQK.GM.P.H.KhYHGKTG.cVaNV..Tpc.AVGlll.NKpVps....+hlt.KRIs.VRlEHl+cS................................. 0 137 228 326 +4182 PF00829 Ribosomal_L21p Ribosomal prokaryotic L21 protein Bateman A anon Pfam-B_1297 (release 2.1) Family \N 21.10 21.10 21.10 21.10 20.80 20.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.94 0.72 -3.86 67 4725 2009-01-15 18:05:59 2003-04-07 12:59:11 16 12 4593 156 1157 2545 2044 95.10 44 82.00 CHANGED MYAllcoGGKQa+VptGchlhlEKl.ss.csGsplphscVLhluss.tpsplGpPhlcG..ApVpAcVlpc.s+ucKlhlaKa+p+Kph+++pGHRQ.aTcl ...................................MYAllco.GGK..Qa+VptGphlh.l.E......KL......ss......c......s............G.......pp.lpF..s..c..VLhVuss.......p.lplGs.P.h..V..sG...ApVsAc.Vlpp....G+.u.c..KlhlaKa+.p.+Kp.h.++.+pG.HRQ.aTcl.................... 0 416 764 986 +4183 PF00237 Ribosomal_L22 L22; Ribosomal protein L22p/L17e Finn RD anon Prosite Domain This family includes L22 from prokaryotes and chloroplasts and L17 from eukaryotes. 20.10 20.10 20.30 20.40 19.70 19.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.24 0.72 -4.09 126 6446 2009-01-15 18:05:59 2003-04-07 12:59:11 14 18 5594 236 1745 3781 2204 106.90 41 78.62 CHANGED Aht..+tl+lSscKsphlschIRGpslpcAlshLchs....................................................sc+uuphlhKlLcSAhANA..p.....psps.h-.s-pLhlpclhlscGsphK.RhpPRApGRAs.lpKcssHlpllls- .....................................Aht+tlRhSs..pK.s.RhVsDhIR..G......+pls..cAl.slLpas.........................................................P.c.+.A.Athl....tK.lL..pSAl.A.NA...E.............................pNtu...hD....hc....s..LhV..........scsaVs.c.G.s........s.......h.K..Rh.pP..RA....+G...RAstIp..KcTuHITllVs....................................................... 0 576 1053 1415 +4184 PF01776 Ribosomal_L22e Ribosomal L22e protein family Bateman A anon PSI-BLAST P56628 Family \N 23.30 23.30 23.70 24.20 22.10 22.20 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.55 0.71 -4.06 35 584 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 350 6 314 534 5 110.50 52 81.65 CHANGED p.KK...ssh....KFslDCopPsEDs.IhD...lusF......E+aL+-RIKVsGK...........sGNL.G.....ssVsls+p.csKlsVsoclsFSKRYLKYL........TKKYLKKssLRDWLRVVAo..sKssYELRYFpl.spsp----- ......................t.pK.sh+FslDCopPVEDs.IhD...su.sF......EpFLp-RIKV.s.G.K...........sGNL..G......s.s.V.sl.p+p..KsKIs..Vs.S..-......h.s...F.S.KRYLKYL...........TKKY.LKKps......LRDWLRVVA.s..sK..s..sYELRYFpI.sp---E--.c...................... 0 100 168 246 +4185 PF00276 Ribosomal_L23 L23; Ribosomal protein L23 Finn RD anon Prosite Family \N 21.10 21.10 21.20 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.83 0.72 -4.04 12 6180 2012-10-02 20:46:34 2003-04-07 12:59:11 15 12 5417 233 1734 3501 2091 88.90 36 80.46 CHANGED hcll+hPllT-K.shphhcp.NphsFhVshcusKhclKcslcplasV+VhsVNThlh.sKhhR...Gph..hhp..hKKAhlpLp.cs.pthshhsch .....................................h.cllhtPll.TEK..uhth............h.........-.c.......s......ph.sFpVsh...cA.sKspIKpAVE.pl.F...s.....V..c..VtsVN.T.hs...........h....p....u.....K.....h........K.......R........hG.....p........h.....h....G......p............p..t..c...hKKAhV.oLp.tG..ppl.h...t.................................................. 0 536 997 1350 +4186 PF03939 Ribosomal_L23eN Ribosomal protein L23, N-terminal domain Finn RD anon DOMO_DM01622 Family The N-terminal domain appears to be specific to the eukaryotic ribosomal proteins L25, L23, and L23a. 20.50 20.50 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.78 0.72 -3.94 47 612 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 318 8 359 542 1 56.00 52 33.22 CHANGED tss..ts+AtpA.............pKAllKG..spu.........p+p.+KlRTSspF+..................RPKTL+hsRsPKYPR+SsPcp .........................................................................................s.pspuKAhKA........pKAVlKG...sau......................+Kt..+K.lRTSsTF+..................RPKTL+LpR.pPKYPRKSsP+.............. 0 80 141 211 +4187 PF01246 Ribosomal_L24e Ribosomal protein L24e Finn RD, Bateman A anon Prosite Family \N 20.80 20.80 20.80 20.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.37 0.72 -4.12 7 1060 2012-10-03 05:12:49 2003-04-07 12:59:11 15 10 541 76 653 956 68 67.60 43 44.21 CHANGED h+schCpFsGtcIYPG+GhhFlRsDupVFhFtsSKCcp.F+.++pPR+lsWTshYR+pHtKs.stEstccp ...............M+hchCtFsutpIYPG+Ghh.aV.....R....sD.uKlFpFp.sSKC.c.ps.F.p.h+....+NPRKlpWTthaR+tptKths.c...................................... 0 228 377 537 +4188 PF01386 Ribosomal_L25p Ribosomal L25p family Bateman A anon [1] Domain Ribosomal protein L25 is an RNA binding protein, that binds 5S rRNA. This family includes Ctc from B. subtilis Swiss:P14194, which is induced by stress. 21.50 21.50 21.60 21.90 21.40 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.86 0.72 -3.86 184 3556 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 3478 158 851 2159 2048 87.40 33 49.38 CHANGED LpAphRp....p..hG.KuuuR+LR+.pGtlPAVlYGts.pc.s.....hslslstpcltchl....ptshtssl.lsLp.lc....G......pp.tpsll+-lQhcPlps.plhHlDF ............lpuphRp.....p..hG..KuAuR+LRc.sG.plPAllYGts..pp..s........................lslplcppclhphlt...ptt...h...h....s.s.l.lsl.s.l.c.G...........cp.hp.Vllp-lQhcPh.+s.plhHlDF................ 0 295 572 730 +4189 PF01016 Ribosomal_L27 Ribosomal L27 protein Bateman A anon Pfam-B_1340 (release 3.0) Family \N 20.50 20.50 20.80 21.10 20.00 20.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.54 0.72 -4.19 88 4787 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 4661 166 1215 2396 1851 80.80 60 81.70 CHANGED AHKKuuG.So+N.GRDSpuKRLGVK+aGGphVpuGsIIlRQRGT+hHPGpNVGhG+DaTLFALh-GhVcFppp......pp...++hVsVh ...................AHKKuuG.ST+N.GRD.SpuKRLGVK+aG...GphVpAGsIIlRQ.R.............G.............T...+hHPG.sN.....VGhG.+D.cT....LFAhsDGhV+FcpK........s+s...RKhVSV.......................... 1 418 783 1027 +4190 PF01777 Ribosomal_L27e Ribosomal L27e protein family Finn RD, Bateman A anon PSI-BLAST P51419 Family The N-terminal region of the eukaryotic ribosomal L27 has the KOW motif. C-terminal region is represented by this family. 22.60 22.60 23.50 22.90 21.40 20.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.14 0.72 -3.86 42 533 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 381 6 297 498 5 82.70 49 58.62 CHANGED KVTK+MuK++ltKRS+lKPFlKllNYNHLMPTRYol.Dl..t.Ksl......ls..p-sh+-ssp+ccA++pl+ttFE...ERa..........KsGKN+WFF.pKLRF ...................KVT+pMuKKKlsKRSK.lKsF..lKllNYNHLMPTR..........Yol.Dl......Ksl........ls..p-sh+-ssp..+..c....cA++p.....sKtt.hEERY............csGKN+WFF.pKLRF.................................................... 0 107 164 238 +4191 PF00830 Ribosomal_L28 Ribosomal L28 family Bateman A anon Pfam-B_1561 (release 2.1) Family The ribosomal 28 family includes L28 proteins from bacteria and chloroplasts. The L24 protein from yeast Swiss:P36525 also contains a region of similarity to prokaryotic L28 proteins. L24 from yeast is also found in the large ribosomal subunit 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.65 0.72 -4.14 114 4841 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 4485 146 1176 2313 1891 59.80 39 73.86 CHANGED +c.CplTGKpshh.GNsVS.............H..upp......+T+RpatPNLppp+lassp.schl+l+los+sL+o..lcKpG ........................+hCplTG.Kpshs.GNs.hS..............H..upN.............pTKR+ahPNLpphRhhl.-...s...+h.h.+.l..pVSs+uL+s...lpt.............. 0 402 770 1005 +4192 PF01778 Ribosomal_L28e Ribosomal L28e protein family Bateman A anon PSI-BLAST P17702 Family \N 21.60 21.60 22.10 22.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.56 0.71 -3.69 67 772 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 384 5 475 736 4 113.20 30 53.54 CHANGED LhWpll..+psss.FhhKpp.......stpFop-shNlsshsoh+asG...................................Lsss+slsl..pss..sG....tlhlshKps...cpsppPu.+thpp.....hplsp..sh+cuhcplcphhpt.....aRt-Ltpt...uhpRhot.............lhpst ...........llWpll..pps..s.F...h.lKp.p...........sppFs+.......p.hNlsulsshp.ss...................................LuNs.+hssl...pts...pG..................slhlh.h.Kph...ccsp..tPu...+h....ap+..............splsc...shc+slpplccplth...........apt.Lt.pt.....shpRhotlhp..h...................... 0 169 262 389 +4193 PF00831 Ribosomal_L29 Ribosomal L29 protein Bateman A anon Pfam-B_1296 (release 2.1) Family \N 24.50 24.50 24.50 24.60 24.30 24.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.53 0.72 -4.28 156 5085 2012-10-02 11:59:50 2003-04-07 12:59:11 18 7 4872 242 1354 2614 1785 57.70 41 77.08 CHANGED spElRphos.cELpcclt-L+pELhpLRhppus.Gpl..ppsscl+plR+sIARlhTlls-+ ................hpELRphos.-ELppc...LtpL...KcELFsLR..hQtAT...GQL..ppsscl+pVR+sIARl+TllpE+............. 0 475 879 1141 +4194 PF01779 Ribosomal_L29e Ribosomal L29e protein family Bateman A anon PSI-BLAST Q24154 Family \N 20.70 20.70 21.30 25.30 19.20 18.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.16 0.72 -4.09 30 523 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 333 6 267 471 3 39.40 67 40.99 CHANGED KSKNHTsHNQ.N+KAHRNGIKKP+c..pRa.ShKGhDsKFL+N .........KSKNHTsHNQ.o+KsHRNG.I.KKP+s...pRY.....t.SLK..G.lDPK..FLRN........... 0 77 129 188 +4195 PF03947 Ribosomal_L2_C L2; Ribosomal Proteins L2, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.50 21.50 21.70 21.90 21.10 21.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.54 0.71 -4.34 58 6875 2012-10-01 20:16:17 2003-04-07 12:59:11 13 12 6045 240 1601 4450 2557 124.70 56 47.82 CHANGED lchGNslPLppIPlGThlHNlEhpPGcGGplsRoAGohAplluKps..p.ashl+LPSGEh.+hlpppC....hATIGtVuNhsppphslG.KAG+sRWhGh.....RPpVRGVAMNPVDHPHGGGEG+sthGR.psVoPWGpP ..............................IKsGNsLPLp....sIPlGTslHNlEl+P........G..+GG.......Q.lARoA..........Gs.AQ..........llu.......+...........-G................pYsplRLsSGEh.Rhl.ssC.........+A........TlGpVGNsp.+.t..h..slG.KAGpsRWhGh.....................RP.sVRGss.MNPVDHPHGG.GEG+ss..h....G..R....p...PsoPWGh................................... 0 533 1001 1341 +4196 PF00297 Ribosomal_L3 L3; Ribosomal protein L3 Finn RD anon Prosite Family \N 26.50 26.50 26.80 26.80 26.40 25.20 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.72 0.70 -4.99 13 5795 2009-09-16 22:34:17 2003-04-07 12:59:11 17 16 5080 235 1672 3675 2404 198.80 40 84.21 CHANGED KsGMTplh.........scs.tss.sVTllEssPssVltlpshs...............................................s-GhpulplshpphKcppsshplluHhphtsss....tKtalhEhplssu..........cpaE..pslsV.slFppsEhlDVtGlTKGKGFpGshKRWGhp+hPtpHupu..+R+lGslGA.hcPuRVhhosthsGphG.c+RT.hNhKIh+lss......................................csshlhlKGuVPGs+pplVpl+ ..........................................................................................KlGMTplF.............scs.G.hlP.VTVl-ss.s...shVsQ..l+oh-................................................................................................................................sD...G.Y...p....A..l..Q..l...s...h...s....s.....h........+........t.....p..p........s........s..K...P.....t........G...H......htKAss...................t..................spRhlh.E...h+.hsss............................pthp.l.Gp..plsl.-l.FtsGc.hVD.VoGsSKGKGFt.........GslKRasFpt.t.s.t...o.H.G.sph....HR.psGSl....Gs...tts..P....uR.VFKG++MAG+MG.sc+..VT..l.QNLcV.l.+VDs...............................................................EcsllLlKG.u.V.PGsptuhlhl+................................... 0 600 1058 1399 +4197 PF00327 Ribosomal_L30 L30; Ribosomal protein L30p/L7e Finn RD anon Prosite Domain This family includes prokaryotic L30 and eukaryotic L7. 22.40 22.40 22.40 22.50 22.30 22.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.32 0.72 -4.37 115 5033 2009-01-15 18:05:59 2003-04-07 12:59:11 15 10 4388 240 1490 2792 891 51.90 37 54.41 CHANGED tlplshl+u.hl...utt.cp+pslcsLGL++lspsVhhcsoP.slp.........GMl.pcVpaLV .....................l+lThh+S.sI....Gp..pp+tT....l.puLGL+...+lspo...Vhh.c.D.s.P.ulR.............GMlppVpahV.................... 0 474 898 1215 +4198 PF01197 Ribosomal_L31 Ribosomal protein L31 Finn RD, Bateman A anon Prosite Family \N 21.80 21.80 22.20 22.70 21.70 21.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.37 0.72 -3.98 163 5588 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 4410 108 1124 2737 1977 73.20 45 92.06 CHANGED M.KpsIHPc.Yppl.shpss.sGspFhopSTh...............pt..l...pl-lsSps..HPFYTG.c.p+h.lDosGRV-+Fp++a.utt ..................................M.Kp..sIHPc.Y+.p.l.sh.pso.sG..ptFhotSTh.................shshl...pl-lsSps..HPFYTG...+....Q+h..ls.s.sGRV-+Fp+RaGh.h............... 0 368 722 947 +4199 PF01198 Ribosomal_L31e Ribosomal protein L31e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 26.10 25.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.60 0.72 -4.31 59 847 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 531 78 443 768 76 81.00 49 67.39 CHANGED pcllTR-hTIsL++th+tsshKKRAP+AlKpIRcFsp+pMtTc..DVRlDscLNctlWu+GI+ssPtRlRVRluR+cs-p-suppc .....................................-VVTREYTIs.lHK+.lHGl..............sFK.KRAPR.AlKEI+KFAtKpMGTs..DV.RlD.s.cLNKtlWu+GI....+.s.VPhRl.RVRloR+.RN--.E-u.p...................... 0 137 224 315 +4200 PF01655 Ribosomal_L32e Ribosomal protein L32 Bateman A anon Pfam-B_1346 (release 4.1) Family This family includes ribosomal protein L32 from eukaryotes and archaebacteria. 25.00 25.00 26.20 25.80 24.60 24.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.22 0.72 -4.01 75 903 2009-01-15 18:05:59 2003-04-07 12:59:11 13 11 637 76 442 805 68 103.50 53 78.33 CHANGED +ph+FpRapSc+ap.Rlss..sWRKP+G.lDs+lRR+a+Gphh...hPpIGYGSs+csRtlhPS..GacchLVpNlc-L-hL......pscshAucIApsVuu+KRhpIl.c+ApcLul+VhNs ..................+sK+FhRHpSDRah.+lpp..sWRKP+G.IDsRVRR.RF..K.Gp.hh...MPsIGYGSsK+TR......HhlPo..GF+..+F.LVHNV.+-LEhL....hM.pN.+sasAE......IAHsVSu+KRptIl.cRA.tpLul+VsN......................... 0 133 228 323 +4201 PF01783 Ribosomal_L32p Ribosomal L32p protein family Bateman A anon PSI-BLAST P31558 Family \N 21.20 21.20 21.20 21.30 21.10 21.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.89 0.72 -3.79 171 5348 2012-10-03 10:42:43 2003-04-07 12:59:11 18 14 4994 161 1126 2495 1357 53.90 34 82.85 CHANGED AVPK++sS+o++chRRup.h.....plpss.sl................s.ssp..su.phplsHplssss.G..aYps+plhpt ...............AVPp++sS+o..++shR.Ro..+.h......pl.sss..sl................s.s.s..sG..-.....h.+.lsH...+...lshs...G.....hYpG+plh............................................. 0 378 726 946 +4202 PF00471 Ribosomal_L33 L33; Ribosomal protein L33 Finn RD anon Prosite Family \N 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.48 0.72 -3.72 117 6317 2009-01-15 18:05:59 2003-04-07 12:59:11 15 6 4846 154 1214 2452 978 49.00 45 90.31 CHANGED R...htlpLt..Cop..............sssppYsTsKN++spsc+LEl+KaCs........hspKHTlH+EtK ....................R.pIpLt..sot........................sss+pYhTsK..N..+..R..N..s.P-RLE..lKKasP........hs+KHsla+EsK............ 0 417 791 1027 +4203 PF00468 Ribosomal_L34 L34; Ribosomal protein L34 Finn RD anon Prosite Family \N 25.00 25.00 30.30 33.30 22.90 22.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.21 0.72 -4.28 143 4214 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 4149 154 1033 1670 249 43.90 63 86.81 CHANGED hKR.TaQPoph+RpRsHGFRuRMp.T+sGRcVLppRRtKGR+cLos ............MKR.Ta.Q.Poph.+RpRsHGFRsRMu.T+sGRpVLApRRsKGRKpLos.... 0 353 675 875 +4204 PF01199 Ribosomal_L34e Ribosomal protein L34e Finn RD, Bateman A anon Prosite Family \N 20.60 20.60 21.50 21.40 20.00 19.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.21 0.72 -3.76 52 630 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 462 6 352 585 12 89.50 47 77.16 CHANGED Ms...+lhh....++R.sYpT+SN+++hs+TPG..G+lVhpahKKpsshP+..CupCtph..LpGl.thRstchp+..hsKpp+pVsRsYGGshCspCl+c+Il+AFL ...............Ms..p.RlTaRRR..sYsT+SN+pRll+T.PG..G+LVh.ahKKpussPK..CGs..C.st+..LpGl.......slRPp.chtp..lS..+scKoVsRsYGGshCupCV+-..R..IlRAFL................................................ 0 122 194 280 +4205 PF01247 Ribosomal_L35Ae Ribosomal protein L35Ae Finn RD, Bateman A anon Prosite Family \N 21.00 21.00 21.00 29.10 20.90 19.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.25 0.72 -4.21 44 533 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 374 8 317 528 13 93.90 51 77.29 CHANGED LY..sKulahGY+Rup+NQppssuLlKIEGVss+c-upFYlGKRlsYVY+uppppp.........t.o+hRsI.WGKloRsHGNSG....sVRA+F+.pNLPspAh.GpplRlh ....................LasKuhahGY+Ruh+NQp.pT.uLlKIEGVps+c-sp.FYlGKRs.AYVY+A+pp..pp.....................so+hRsI.WGKVTRsHGN..SG....sVRAKF+.pNLPs+uhGtplRVh.................... 0 112 169 243 +4206 PF01632 Ribosomal_L35p Ribosomal protein L35 Bateman A anon Pfam-B_1156 (release 4.1) Family \N 22.20 22.20 22.30 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.95 0.72 -4.07 22 4520 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 4430 154 1091 2113 1842 60.70 44 86.05 CHANGED sKhKTp+uusKRFKtTusG.thhRc+As+pHlLtKKosp+K.R+Lcppshlsps-sctlcthL ............PKhKT++uAAKRFKhTuoG.plK.Rp+Ahp..pH....lLs..+..K.....osKpK.R.....pLR.tssh.V..s.p.uDh.cpl+phL.............................. 0 376 708 922 +4207 PF00444 Ribosomal_L36 L36; Ribosomal protein L36 Finn RD anon Prosite Domain \N 21.70 21.70 22.00 21.80 21.60 21.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.90 0.72 -3.97 108 5148 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 4407 121 1073 1688 153 38.20 58 90.83 CHANGED MKVRuSl...K+hCcsC+llRR+......G+lhVIC.ssP+HKQRQG ......MKVRsSV...Kp...h.C..ccC+ll.+R+......G+lhVIC....sNP+HKQ.RQG... 0 360 686 906 +4208 PF01158 Ribosomal_L36e L36e; Ribosomal protein L36e Finn RD, Bateman A anon Prosite Family \N 19.40 19.40 19.90 20.30 18.60 18.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.33 0.72 -4.06 39 534 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 365 6 288 505 4 94.80 52 79.12 CHANGED ss+...slsVG..LNKGHpsT+...........p.ppsRsS++...KGhhoK+s+hVR-llREVsGaAPYE+RshELLKluKD...KRALKFtKKRLGTHhRAK+KREEhpslltt..RKt ..................h.+.slsVG..LNKGHp.sT+...............ps.tsR.S+p............KG.thoK+o+FVR.-llREVsG.aAPYE+Rs.hELLKloKD...KRALKhhKK.R.L..GTHhRAK+K.+EEhsslltt.R+..................... 1 97 156 230 +4209 PF01780 Ribosomal_L37ae Ribosomal L37ae protein family Bateman A anon PSI-BLAST P54051 Family This ribosomal protein is found in archaebacteria and eukaryotes. It contains four conserved cysteine residues that may bind to zinc. 21.90 21.90 21.90 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.22 0.72 -4.12 55 636 2012-10-03 10:42:43 2003-04-07 12:59:11 14 4 512 74 373 557 67 86.00 53 92.54 CHANGED s+RTKKVGlsG+aGsRYGuoLRKpV+clElpQ+u+YsCsaCG+pu.VKRpusGIWpC..+pCscshAGGAYsssTsuutos+psIcRl.cchp- ..............KRTKKV.GIsGKYGTRYGASLRKhVKKhEloQ...Hu+YsC..sFCGKss.lKRp.uVGI.WpC..cpCt..+slAGGAashsTsuAsol+S.slRRL.+-...p................... 0 127 212 300 +4210 PF01781 Ribosomal_L38e Ribosomal L38e protein family Bateman A anon PSI-BLAST P23411 Family \N 21.00 21.00 21.20 21.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.30 0.72 -4.22 26 458 2009-09-10 15:39:13 2003-04-07 12:59:11 13 5 349 6 280 401 2 68.40 56 82.79 CHANGED P+pIpDIK-FLphsR.RpDA+...Sl+IK......Kssps.....TKFKVRCS+YLYTLVVsDpcKAcKLcQSLPPsLplp-l ....................................P+pIp-IK-FLhhAR..R..KD.A+.....SV+IK...........KNpcs............lKFKVRCSRYLYTLVlp.Dp-KA-K.LKQSLPPuLpVp-l............... 1 97 156 225 +4211 PF00832 Ribosomal_L39 Ribosomal L39 protein Bateman A anon Pfam-B_1293 (release 2.1) Family \N 21.60 21.60 21.80 24.60 20.60 21.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.05 0.72 -4.49 44 553 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 446 76 309 534 23 42.30 60 75.51 CHANGED pKhRLAKAhKQNR.lPtWlhlKTsp+lchNsKRRaWRRo+LKl ...hKp+LAKttKQNRPlPpWlRh+TsNpIRYNuKRRHWRRTKLt........ 0 101 179 245 +4212 PF00573 Ribosomal_L4 L1e; Ribosomal_L1e; Ribosomal protein L4/L1 family Bateman A anon Prosite Family This family includes Ribosomal L4/L1 from eukaryotes and archaebacteria and L4 from eubacteria. L4 from yeast has been shown to bind rRNA [1]. 20.80 20.80 21.20 20.80 19.80 20.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.01 0.71 -4.91 165 5579 2009-01-15 18:05:59 2003-04-07 12:59:11 17 12 4937 231 1620 3471 2695 191.40 36 83.57 CHANGED lpL...ssplFsh.....ph.ppsll+psVht.hus.................pRpGTtssK.sRu-VsG....us+KPa+QKGTGpARtGoh+..............uPha+GGGhsaGP+P.R.sash.+lN+K..........h++hAl+oALotpsps.splhll-.....s..h.p..lp............psK......................TKphhphLps.................lph............................p.............psLllss........p................sp.................................slhhuuRNlssV.clhsspp.....................lNlhcllptcp....lllTcsAlcp.lpch ........................................pLscslFu.h..-.h...Nps..llapsVhs.hAs...........tRQGT+.ssK..sR....u-..VoG....uG.+...KPa+QKGTG.RA....RpGolR..............uP.aRGGG.h.sF.uPpP..R..sa.u..h.KlsKK.........................h+RhAl+SsLSp.....pspp.sp.l.hll-.............sh..s...hp..........................sPK......................................TKp..h.sph..L.ps......................lsl.........................................c...cs..L.llss...................c...............hc..c.........................................................slh...LuuRN..l....s..s.....V....cVhs..s.s.s.....................lsshsllstcc....llhTpsAlcplEE.h......................................................................... 0 573 1032 1366 +4213 PF01020 Ribosomal_L40e Ribosomal L40e family Bateman A anon Pfam-B_884 (release 3.0) Family Bovine L40 has been identified as a secondary RNA binding protein [1]. L40 is fused to a ubiquitin protein [2]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.75 0.72 -4.37 3 580 2009-09-11 04:53:38 2003-04-07 12:59:11 12 14 450 7 314 466 8 48.80 64 40.21 CHANGED VMEPTLsALAKKYNCEKKVCR+CYARLPPRATNCRKKKCGHSNsLRhKKKLK .............l.IEPSLp.LApKYNC-KhICRKCYARL.PRAoNCRK+.KCGHoNpLRPKKKlK......... 0 93 167 254 +4214 PF05162 Ribosomal_L41 Ribosomal protein L41 Wood V anon Wood V Family \N 22.30 22.30 23.10 23.10 22.20 21.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.82 0.72 -7.02 0.72 -4.27 3 137 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 118 2 61 85 2 24.80 85 51.88 CHANGED MRuKWKKKRMRRLKRKRRKMRQRSK ..MRAKWRKKRhRRLKRKRRKMRtRSK.... 0 22 33 49 +4215 PF00935 Ribosomal_L44 L44; Ribosomal protein L44 Finn RD, Bateman A anon Pfam-B_1065 (release 3.0) Family \N 25.00 25.00 26.30 28.60 23.70 22.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.73 0.72 -3.97 44 678 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 513 75 382 617 73 76.10 56 65.83 CHANGED pHo.HcVspYK.pGKtothupGcRRacR..+ppGaGGpp+Pl.c+.uKs......TKKlsL+hcCspC+ptphpsh..hRsK+FElsp ..........KHp.HKVTQYK..pGKsShhAQ.GK.RRYDR..KQsGYGGQTKPlF+KKAKT......TKKlVLRLEC..s..p..C..+p+pp.h.sl..KRCKHFELG.G.............. 0 123 209 303 +4216 PF00281 Ribosomal_L5 L5; Ribosomal protein L5 Finn RD anon Prosite Domain \N 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.57 0.72 -4.18 17 5354 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 5030 233 1428 3077 2087 56.40 51 30.97 CHANGED Nh..MclP+..lpKlVlNhGlGEuspc...LppuhptLptIoGQKPlhT+A++olusF+lR ................................slMplP+..l-KIVlNMGVG...E.Ass..c.pKhL......-.sA..........s.p-.....Ls.......hIo.GQKPl......lTK.A+...KSlAuFKlR................. 0 489 916 1202 +4217 PF00673 Ribosomal_L5_C L5_C; ribosomal L5P family C-terminus Bateman A anon Pfam-B_69 (release 2.1) Domain This region is found associated with Pfam:PF00281. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.12 0.72 -4.25 133 5473 2009-01-15 18:05:59 2003-04-07 12:59:11 16 5 5067 233 1480 3191 2159 95.30 53 52.03 CHANGED sIGs.+VTLR.Gc+MacFL-.+llslslPRl+DF+GlsspuFDupG.NashGl..cEpllFPElc..Y...Dth.p..lhGMDlsll...............ToAcssp..........cu+.LLcthuhPFh ...........PIGsKVTLR.G-.RM.a.EFL-.+LlsluLPRl..R....DF.RGl........s........s........+u.F....D....G........R........G.N.YohGl..+EQl..I..F.PEI-.....YD+l-+..l..RGhDIslV.................................TTA...p...oD-..........EuRtLLpthuhPF.t............................................... 0 504 944 1249 +4218 PF01159 Ribosomal_L6e L6e; Ribosomal protein L6e Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 22.20 22.40 20.50 21.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.43 0.72 -3.81 46 607 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 377 8 327 549 4 102.90 43 45.55 CHANGED lNGVPLRRVsQpYVIATST.KVDlusVc..........l.cc..lsD....sYFp+p+....pcp+......Ks.Es.shFs..pctp.cppsscpR+s.......D.....QKsVDssllsuIKKp.Pp...LppYLuupFoL+suphPHchpF ...........................................................lNuVPLRRlsQpYVIATST.KlDlSu..Vc..........l.c.+..lsD....tYFp+c+..........p+t+.........+p..Eu.-hFp.......pcpc...+h.....plsppRKt..........D..............QKsVDptllstIKt.h.P.......LpsYLtuhFuLps.G....hPHchhF............................................. 0 110 171 254 +4219 PF03868 Ribosomal_L6e_N Ribosomal protein L6, N-terminal domain Finn RD anon DOMO:DM07096; Domain \N 25.00 25.00 25.80 25.80 24.90 24.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.95 0.72 -4.08 19 310 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 167 1 127 268 0 59.80 38 24.76 CHANGED hhspK...tpt+sSRNPsLsRGIGRYSRSAMYpR+uLYKhK...sKsstsh......KccsttsssKsl ..................ht...tK....tp.+ssRNssLs.+GIuRYSRStMYp++..A.lY+hK....hpsstst.......Kp+s.sslsKs............................... 0 38 57 81 +4220 PF01248 Ribosomal_L7Ae Ribosomal protein L7Ae/L30e/S12e/Gadd45 family Bateman A, Finn RD anon Prosite Domain This family includes: Ribosomal L7A from metazoa, Ribosomal L8-A and L8-B from fungi, 30S ribosomal protein HS6 from archaebacteria, 40S ribosomal protein S12 from eukaryotes, Ribosomal protein L30 from eukaryotes and archaebacteria. Gadd45 and MyD118 [1]. 20.40 20.40 20.50 20.50 20.30 20.20 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.83 0.72 -4.47 54 5260 2012-10-10 14:40:03 2003-04-07 12:59:11 21 25 2077 176 2228 3938 199 91.70 23 56.33 CHANGED pplpphlphuhcssp.lthGhpcssKslcps..cA+LlllApssss.phhp......hl.hlsp..cpslsh..hhls..uttcLGphsGhphh.ssuhulhssGsuphlh ...........................p..h.phlt.h.uh..cstp...lhhGhpp.......shK....s..lcps.............pup...........L..V.llA......p....Dss........s..........phht.........................l..shsp..........ctslP.h....h.h.lt.....s.p........t....cLGpssGpph......ssslulhs.t.t....h................................. 3 763 1239 1762 +4221 PF03948 Ribosomal_L9_C Ribosomal protein L9, C-terminal domain Finn RD, Bateman A anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.56 0.72 -3.90 147 4474 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 4407 129 996 2561 2059 87.20 34 56.38 CHANGED hp....pA..pplAppLp..shs..lpltt+uG-.sG+LFGSVTspDIA-ulppp..GhclD++clpl.sp..s.IKslGpaplsl+LHs-Vsuplplp..Vssp .........................................h.spApthtppLp....shp..VplssK.u......G..-....sG..+.L..F.G.....Slos+-IA-Alppt....G.......lc.......l-K+cl......cLsp...s.I+.slGpapVsV..+L.Hs.-VpuplpVpVst................. 0 346 671 852 +4222 PF01281 Ribosomal_L9_N Ribosomal_L9; Ribosomal protein L9, N-terminal domain Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.04 0.72 -4.72 157 4600 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 4492 134 1130 2731 1908 48.00 47 29.79 CHANGED McVILhccVpsLGctG-lVcVpsGYARNaLlPpshAhhATpts...lcph....c ................McVILlccVtsL..G.p..hG-lVpV+sGYARN.FLlPpGhAl.ATptslpthc.................... 0 398 748 961 +4224 PF00338 Ribosomal_S10 S10; Ribosomal protein S10p/S20e Bateman A, Finn RD anon Prosite Family This family includes small ribosomal subunit S10 from prokaryotes and S20 from eukaryotes. 21.20 21.20 21.90 21.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.11 0.72 -4.14 193 5735 2009-01-15 18:05:59 2003-04-07 12:59:11 17 13 5081 211 1645 2828 1995 96.50 50 83.97 CHANGED lRIcLcua-t..phL-pssppIlcsu......ccp.uhplpGPlsLPT+p.pphTlh+SPasp.Kco+-p.FEhRsHKRll-l................pssscsl..ctL.....h.plpl....PsGVsl-lpl ..................................................IRIRLKAaDH..+llDpostcIV-TA.................KRT..GAp....V.p...G.....P.I.P.LPT++..phaTll.+SPHhp..K.....cSREQ.FEhRTHKRLIDI.l.......................pP.T...s+TV....DuL......M...+L-L.....suGV-lEIp....................................... 0 550 1016 1368 +4225 PF00411 Ribosomal_S11 S11; Ribosomal protein S11 Finn RD anon Prosite Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.13 0.72 -3.88 13 6124 2012-10-02 16:33:16 2003-04-07 12:59:11 14 12 5534 199 1594 3230 2256 109.70 55 79.77 CHANGED GlsHIpuSF....NNTIlTlTDlpGpslsWuSAGusGFKuoR.KuTPaAAptAApsAActsh-pGhpplEVpl+G........PGsG+-uAlRAlp+uGlhIspIcDVTPhPHN.GCRPPK+RR .....................................................GlAHIpuoF....NNTIVT.I.T...Dh.p.......G........N.........s........l.u..W.....u.....SA...G.u.h.G.F...K.GSR.....K.STP.FA.AQ..hA.A...E...s.Auc..t..A.........h....E.......h.......G....l....+..s...l-...V...h..V.K.G..............................PGsGRE....o.A..l....RA.L.....p....u.....u...G.....l....c........l....o....tIpDV.TPlPH...N.GCRPPK+RR...................................... 0 517 960 1328 +4226 PF00164 Ribosom_S12_S23 S12; Ribosomal_S12; Ribosomal protein S12/S23 Sonnhammer ELL anon Prosite Family This protein is known as S12 in bacteria and archaea and S23 in eukaryotes. 19.90 19.90 20.00 20.00 18.80 18.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.68 0.71 -4.69 18 6678 2012-10-03 20:18:03 2003-04-07 12:59:11 20 19 5947 204 1582 3422 2092 119.60 62 94.59 CHANGED sThppLlR+c...........RcchtpcsKssALcGsPp++GlChclhslpPKKPNSAlRKlsRV+Lp..NGhcVTAaIPG-G..HsLpEHs.VLlcGGp......VtDLPGVRY+ll+GhhDsuGVp..tRtpuhSKYGsc+P+ .....................................TlpQ.Ll.R.+.s..............R.p...p.h.h.p+.s..+....sP.A.............L..............p...............u.............s.PQ+.RGV...CTRVh..............ThTPKKPNSALRKVARV..RL......o........NG......hEV.TAYIPG.G...HNLQEHSVVLlRGGR.......VKDL..PGVRYH..IVRG.sLDsuGVp........sRpQuRS...KYGsK+PK...................................... 1 531 988 1316 +4227 PF00416 Ribosomal_S13 S13; Ribosomal protein S13/S18 Finn RD anon Prosite Family This family includes ribosomal protein S13 from prokaryotes and S18 from eukaryotes. 24.60 24.60 24.70 24.70 24.40 24.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.39 0.72 -3.59 15 5791 2012-10-02 21:21:44 2003-04-07 12:59:11 17 7 5215 199 1480 3234 2174 105.00 49 84.93 CHANGED RlhssslsusK+..lhhALTtIaGIG++pAptlhpcsslDtspRsu-Lo--plcpltphlsp.......................ahlpu-LcpclppDIcRLhcIcsYRGlRHhtGLsVRGQRTKTNuRT .......................................RIAGVslPpc...K+..l.IuL.T.a.IaGIGpspup....pIltps.G.............ls.....s....h+lp.-Lo-...-plsplRc..ls.p.........................................................................ahVEGDLRREls.hsIKRLh-lG..s....YRGlRHR+G.LPVRGQ+TKTNART.......................................................................... 0 503 954 1256 +4228 PF00253 Ribosomal_S14 S14; Ribosomal protein S14p/S29e Finn RD anon Prosite Family This family includes both ribosomal S14 from prokaryotes and S29 from eukaryotes. 19.70 19.70 19.80 20.50 19.30 19.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.84 0.72 -4.58 178 6805 2009-01-15 18:05:59 2003-04-07 12:59:11 16 10 5516 200 1562 3442 1882 54.30 45 63.36 CHANGED hp..lpph.cspshsRhpsRChlsGRs+..uhhR+F.sl..sRhsFR-hAtpG.lPGlpKuS ................t..lpphP+s.usssRhpsRC............phsGRP+..uhh..RKF.GL..sRlphREhAhcGplPGlpKuS.................... 0 508 968 1302 +4229 PF00312 Ribosomal_S15 S15; Ribosomal protein S15 Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.90 20.90 21.10 21.90 20.60 19.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.55 0.72 -4.22 9 5932 2009-01-15 18:05:59 2003-04-07 12:59:11 17 14 5410 212 1580 3366 2136 82.00 44 77.02 CHANGED htpthhlpphtcpppspGSsEhQlhhLTp+l.+LppHhccH+KDapSpRGLhphluKR++LLsYL+pcs.....hh+YcphIppLGlR ...................................c..Ksplltca..t..p..p..c..s..DT.GSsEVQlAlLTt+IspLs.cHhc..p.H....K..K.D+cS+RGLl+hVu+RR+LLsYL++pD......htRYcp...LIpcLGLR.................... 0 536 990 1322 +4230 PF00886 Ribosomal_S16 Ribosomal protein S16 Bateman A anon Pfam-B_1025 (release 3.0) Family \N 21.20 21.20 21.40 21.60 21.10 20.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.89 0.72 -4.25 158 5911 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 5742 192 1200 2936 2304 56.10 47 58.89 CHANGED RhGpK+pPaY+IVVsDuRssRD.G+aIEplGhYsPh....tp........................................plplch-+hphWlspGAQPo ..........RhGtK+p..P...FY+lVVsD.uR...s.....R..D.GR..hIEplGhY.....N..Plts..............................................plplch-+.lhaWLspGAQPo.............................. 0 415 780 1019 +4231 PF00366 Ribosomal_S17 S17; Ribosomal protein S17 Finn RD anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.70 20.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.33 0.72 -3.93 15 5324 2012-10-03 20:18:03 2003-04-07 12:59:11 15 10 4890 201 1529 2971 1891 68.80 50 69.69 CHANGED GhVVSsKMcKTllVcl-phthHPKYs+hlKRpKKh.AHsssshppl..GDlVcItEsRPLSKTKRFpVlcV .............................GpVV.S.D.K..M-KTIsVtlE.p..........hh.p.HPlY.u.........K.......hl++opKh....+AHD.Es..N.p.s..p.l....GDhVcI..h.E.s.RPLSKTKpapLVc........................ 0 521 966 1279 +4232 PF00833 Ribosomal_S17e Ribosomal_S17; Ribosomal S17 Bateman A anon Pfam-B_1566 (release 2.1) Family \N 22.80 22.80 23.30 24.70 22.50 22.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.63 0.71 -4.36 32 683 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 508 9 396 637 67 107.30 53 84.61 CHANGED MGRVRTKTVKRAuRhllEKYYs+LT.hDFppNK+ls-E..VAlIsSK+LRNKIA...GasTHLMKRIp+GPVR....GISlKLQEEERERR.saVPEhStlDhs......hlpVDt-Tp-ML+p.hsh..slsshhl.st .........MGRVRTKTVK+uu+hlIE.+YYs+LT.hDFcTNK+ls-E...lAlI.s.SK+LRNKIA...G.YlTHLM.KRI....Q+G.PV.R....GIShKLQ.EE.ERER+.paVP-hSAl.-.p........lcVD.-Tt-hLc..Lsh..pls......s................................................................ 1 138 226 324 +4233 PF01084 Ribosomal_S18 S18; Ribosomal protein S18 Finn RD, Bateman A anon Pfam-B_712 (release 3.0) Family \N 21.00 21.00 21.20 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.46 0.72 -4.10 121 5681 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 5182 193 1326 2673 1849 53.20 50 56.97 CHANGED tpphp.hlD....Y.KcschLp+Flo-.pGKIlPRRlT..GssuKpQRplspAIK+ARhlALL ......................s..thppID.........Y.KDs.shL+c.FIoE.pGK..IlPRRlT..GssuKtQRplspAIKRARhluLL............ 0 451 833 1106 +4234 PF00203 Ribosomal_S19 S19; Ribosomal protein S19 Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.70 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.69 0.72 -4.36 22 7420 2009-01-15 18:05:59 2003-04-07 12:59:11 16 5 6888 210 1487 3278 2069 78.30 55 82.39 CHANGED Rol+KsPFssh+Lh+Khcp.s.ttcK.phl+TWSRsSsIlPpMlGpslulYNGKpal.VhIoscMlGHKLGEFu.TRphttH ..........................................................RSLKKuP.F.V.s.t.H.L.h....+K...............l...-.t.h....s........t...p...........t.c.K...cs......I+TW...S...RpST.IhP.shlGhTIA.VHNG..+cHl.PValT.-.c.MV.G.HKLGEFAPTRTapGH............. 1 498 945 1255 +4235 PF01090 Ribosomal_S19e S19e; Ribosomal protein S19e Finn RD, Bateman A anon Prosite Family \N 24.20 24.20 24.50 25.30 24.10 23.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.46 0.71 -4.75 62 756 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 526 9 441 663 97 128.00 43 85.22 CHANGED sTVcDVsscchlptlAphLKcs.s+lcsP-WsshVKTGsaKEhsPps.sD.WaYhRsASlhR+lYlcu.PlGVtpl+phYGG++c..+G..s+Ps+as+uSGulhRpsLQpLEphGlVcKs......psGRplTspGpp.LD+lAtplhpcht ..............................oV+DVstpchlpshAtaLK+p...GKlcl..........PpWsDhVKTu.ttKEhsP.-...D.WaYhRs...ASlhR+lYlRs.........slGVG..php+lYGGppp....pG..spPsHascu.SGulhR+sL.QtLEphtllEps.........pu....GR.plTtpGp+DLD+IAsplhtt.p...................... 0 182 268 368 +4236 PF00318 Ribosomal_S2 S2; Ribosomal protein S2 Finn RD anon Prosite Family \N 23.80 23.80 24.00 23.90 22.30 23.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.26 0.70 -5.11 177 7370 2009-01-15 18:05:59 2003-04-07 12:59:11 15 20 5811 224 2025 5251 3040 185.10 40 79.94 CHANGED lLcAGlHh.GH.p......sphWNP+Mp...Y.Iasp.R..s...GlHIIDLpc.ThthlppAhchlpphst.ps....................................................................sc....lLFVGTKp...pu..pchltctApcs......sth.al.s.....pRW.lGGhLTNapolppplpphc......h................t.hsK+p.h.htcpht+Ltp.hsGlpphtpl..Pc....l...lhllDs....pp-p.AlpEApcLsIPll..ullDTNssPsh...lDasIPuNDDuh+ulpLhhphluculhcup .....................................................................hLcAGVHF.GH.p......T..+pWN.PKMt...a..Iasp....R...s.........................u.l.a.IIsLp+.Tht.hhp.pAhphl....pphst..ps....................................................................splLFVG.TK+........pA..............p-slt....c...tA.tcs.............s.th...aV..N.........pR.W.LGGhLTNapTl..ppplp.+hc.c....lct.tpsG.............................hphhsKKEhh...h.p+pht+.L.pp.....LG.GIcp.........Mttl..........Pc................h.lhllDs.....pc.E....p..hAlpEA....ppLsI....Pll..ul.l.DT.N.sc.P-.................lDhsIPu..ND.DAh+ulpLhhthhApAlh-u...................................................... 0 683 1211 1641 +4237 PF01649 Ribosomal_S20p Ribosomal protein S20 Bateman A anon Pfam-B_1685 (release 4.1) Family Bacterial ribosomal protein S20 interacts with 16S rRNA [1]. 20.60 20.60 21.40 21.40 19.40 18.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.93 0.72 -3.60 14 4402 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 4355 191 972 2284 1917 80.90 41 93.95 CHANGED ANhKSApKRh+psp+pRl+NpuhKStl+ThlK+shpslpss-ps.......tAppths.stphlD+supKGllHKNpAAR+KS+LAttlpp ...........ANhKSAhK.Rscpsp+ppt+Ntuh+SthRThlK+s.c....tAlts.s.D.p.p...................sAp..pthptApphlD+sA..s.KGlIHKNpAAR+KSRLutplp.t................... 1 342 654 833 +4238 PF01165 Ribosomal_S21 S21; Ribosomal protein S21 Finn RD, Bateman A anon Prosite Family \N 21.00 21.00 21.70 21.30 20.50 19.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.66 0.72 -4.52 140 4324 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 3974 61 1053 1753 1966 57.00 45 77.38 CHANGED s.pVplpcs-.sl-pALRRFK+php+sGlhp-h+cRc...aaEKPotcR++Kpttut++p.t+ ............s..lhV+-NE.shDsALRRF...KRs.spKsG.........llpEhR+.R.E...aYEKPoscRK.++ptuAhKRch+............. 0 333 644 864 +4239 PF01249 Ribosomal_S21e Ribosomal protein S21e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.40 25.10 18.30 19.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.94 0.72 -4.04 36 472 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 349 6 258 420 6 79.10 54 76.04 CHANGED MpNDsGchVDLYlPRKCSATNRlIsAKDHASVQINlucVDts.G+.hsGphpTaA...lsGhlRphGESDcslsRLspccGllsps ..................MpN-tGch.VDL..YlPRK......C.S.AoNRIIpAKDHASVQIslucV.Dcs..GR.hs.GphpTYA...lsGhlR......th....GESDD.ulsRLApc-Gllt..s...................... 0 88 142 211 +4240 PF01282 Ribosomal_S24e Ribosomal protein S24e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.30 25.00 24.30 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.61 0.72 -4.19 70 778 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 520 12 448 664 66 83.90 46 63.63 CHANGED hhl-lhHsup.uosS+p-l+-KLAphhps.ss-tlhlash+TpFGsG+osGauhIYDsh-thKchEPcatLhRs.slhpc.ct..tucc ..............................hVlDVLHPG+.AoVsK........sElREKLAchY.Ks.ss-sl.hVFGFRTpFGGGKoT..GFuhIYDol-hAKKhEPKa..RLsRp..GLhcKhc..uR.p....................................... 0 129 224 315 +4241 PF03297 Ribosomal_S25 S25 ribosomal protein Mifsud W anon Pfam-B_4038 (release 6.5) Family \N 28.80 28.80 28.80 29.10 28.70 28.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.42 0.72 -4.00 24 591 2012-10-04 14:01:12 2003-04-07 12:59:11 10 9 405 8 328 514 4 102.30 49 83.60 CHANGED MPPKc........p..pttKtttusuGGKs.+KKKWSKGKs+DKLsNhVlFD.KuTYDKLhKEVPsYKlITsSVlS-RLKIsGSLARpALc-Lpp+GlIK.V.s+HpuQhIYTRus .....................................t......t.t.pt..s.s...t.uGGKt..cKK.KWSKGK..V.+DK...lNNhV.L.F.D..csTYDKLhKEVPs..Y.KLIT...PuVlS-RLKIpGSLARtALp-Ltp+GlIKhV.spHpsQhIYTRs................. 0 112 176 252 +4242 PF01283 Ribosomal_S26e Ribosomal protein S26e Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 20.70 21.00 19.80 19.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.46 0.71 -3.85 37 581 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 425 4 325 528 16 107.20 56 86.00 CHANGED MspKRRNsGRsK..+uRGHVp.lRCoNCuRsVPKDKAIKRFslRNlVEsAuhRDlp-Au.lY...psYslPKLYhKhpYCVSCAIHu+lVRlRSc-.....sR+.Rsss.+h....t.ttp.sst.s ...............MspKR+NsGRsK..+G.RGHVpslRCoNCuRClPK..DKAIK+FslRNIVEu..AA........lRDlsEAS.Va...............stYsLPKLYsKLpYCVS.CAIHu+lVRsRS+E.....sR+sRsPP.Rht.t.........ss.............................. 0 118 180 260 +4243 PF01599 Ribosomal_S27 Ribosomal protein S27a Bashton M, Bateman A anon Pfam-B_638 (release 4.1) Domain This family of ribosomal proteins consists mainly of the 40S ribosomal protein S27a which is synthesised as a C-terminal extension of ubiquitin (CEP). The S27a domain compromises the C-terminal half of the protein. The synthesis of ribosomal proteins as extensions of ubiquitin promotes their incorporation into nascent ribosomes by a transient metabolic stabilisation and is required for efficient ribosome biogenesis [3]. The ribosomal extension protein S27a contains a basic region that is proposed to form a zinc finger; its fusion gene is proposed as a mechanism to maintain a fixed ratio between ubiquitin necessary for degrading proteins and ribosomes a source of proteins [2]. 21.30 21.30 22.50 22.50 20.00 20.00 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.58 0.72 -4.14 12 740 2012-10-03 10:42:43 2003-04-07 12:59:11 14 11 555 5 375 629 45 45.60 58 31.50 CHANGED AVLcYYKVDssGKlpRLR+ECPt.pCGuGVFMApHtDRpYCGKCthT ..................AVL+aYKV...D.c.s..G..K.l.....pR.LR+EC...Ps...cCGA.GV.FMAsH...h...D...RpYCGKCshT........ 0 122 208 302 +4244 PF01667 Ribosomal_S27e Ribosomal protein S27 Bateman A anon Pfam-B_1929 (release 4.1) Family \N 24.50 24.50 24.50 25.30 24.40 24.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -8.76 0.72 -4.42 7 748 2012-10-03 10:42:43 2003-04-07 12:59:11 12 5 546 9 399 669 46 54.30 62 63.75 CHANGED PpShFlcVKCPsChs.psVFuHupThVhChtCupsLspPTGGKu+lptth..hhc ..............................PsSaFMDVKCPGCapITTV.FSHAQTVVlCsuCuTVLCQPTG.GKA+L.TEGCSFR+K.......... 0 125 218 314 +4245 PF01200 Ribosomal_S28e Ribosomal protein S28e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.70 25.70 20.80 19.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.15 0.72 -4.23 38 607 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 501 10 344 470 76 67.10 58 88.74 CHANGED M-pt......ptsh.....AcV...lcllGRTGscGplTQV+V+lLcs.sp....sRhlhRNVKGPVRhG..DI.LhL...hETEREA.R+Lc .........................p.sphAcV...h+V..LGRTGSpGpsTQVRVcFl--..ss.....RpIlRNVKGPVREG..DI.LsL...LEoEREA.RRLR...... 0 109 191 285 +4246 PF04758 Ribosomal_S30 Ribosomal protein S30 Wood V, Bateman A anon Wood V Family \N 20.00 20.00 21.00 20.70 18.70 17.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.83 0.72 -4.22 23 515 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 387 7 297 462 15 57.30 61 58.05 CHANGED KVHGSLARAGKV+sQTPKVsKpEK+.Kp.sGRA+KRhpYNRR....Flsll.t.G...+K+GsNups .....KVHGSLARAGKV+.uQTP..KV....-KQ....E..KK....Kp.pGRA++RhpYsRR....FVNVs...s.hG....pK+t.Nss....................................... 0 113 168 244 +4247 PF00189 Ribosomal_S3_C S3_C; Ribosomal protein S3, C-terminal domain Sonnhammer ELL anon Prosite Domain This family contains a central domain Pfam:PF00013, hence the amino and carboxyl terminal domains are stored separately. This is a minimal carboxyl-terminal domain.\ Some are much longer. 21.30 21.30 22.30 21.80 20.90 21.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.64 0.72 -3.77 124 7624 2009-01-15 18:05:59 2003-04-07 12:59:11 15 11 7097 199 1388 4926 2193 82.90 50 31.68 CHANGED pl..........uppLEc+........h.sFR...+sh+psl......pph......psu.s.cGl+lplSGRL...sG....s-hARsEh..hpc........Gpl..sLpolcspIDYuhspAtTphGhlGlKVWI ...................................................IAtQL.cpR..l.uFR...RA..hKpAl.........pps................hpusA..+GIKl.plSGRL....sG.......AEIA.RoEh..a+E.................G+l.........PLpTlRAcIDYuhtEAcTp........YGhlGVKVWI..................... 0 478 888 1168 +4249 PF01015 Ribosomal_S3Ae Ribosomal S3Ae family Bateman A anon Pfam-B_1334 (release 3.0) Family \N 25.00 25.00 27.20 27.10 22.10 18.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.05 0.71 -4.98 69 888 2009-09-11 07:57:53 2003-04-07 12:59:11 13 8 583 4 470 815 113 191.30 50 82.23 CHANGED ut+Kst.t+hhDsap.pKcWYslhAPsh.F....sppplGcThssc.......s-tlhGRlhEsoLuD.LssD.p..psapKl+hplpcV.pGc..sshTpFhGh-hTcDhlRSLVR+hpohI-uhl-V+TpDGYh.lRlhsluaTp++............ApsSQh+sIRcphh-llpccusptshcphVpcll...............sslup-I.c.ps+pIYPL+cVtIRKlKlLcpPc ..........................................................tuKKGh.KKKssDPFs.+K-WY-l..KAPsh.F....s.hRs...lGKTLVs+opG.....................h+.As-uLKGR.....VhEVSLAD.Lps...D.-....puaRKh+Lhs-.-V..QG+..Ns.LTNFaGh-hTpDKlpShV.+KWp.T.hIEApV-VKT..oD.uYh.L.RlFsluF.....T+++.sQh...++TsYAppoQlR.tIR+KMhEI.hpccs................psssL..cclV..pK.l.I..............P-s...Iu+-IEK.uspsIYPLp.sValR.KVKlLKpP+.......................................... 0 157 256 356 +4250 PF00163 Ribosomal_S4 S4; Ribosomal protein S4/S9 N-terminal domain Bateman A, Sonnhammer ELL anon Prosite Family This family includes small ribosomal subunit S9 from prokaryotes and S16 from metazoans. This domain is predicted to bind to ribosomal RNA [1]. This domain is composed of four helices in the known structure. However the domain is discontinuous in sequence and the alignment for this family contains only the first three helices. 21.70 21.70 21.70 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.14 0.72 -3.61 18 11914 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 10020 203 1485 8634 2034 83.20 44 43.67 CHANGED uR.YpGsphKhsRR.sphstLssc....ppptsp......pstpcshps+h........+hSpYtlpLcEKQKlRhhYG.lhERQLhpYsclu...pKh+GspG.slhplLE ...................................................................................h+h.RRhG..h....h..Lss.+.............t..t.h.c.s...............ss.tppu..tspp..............K.lSpYtl.p...L..c.E.KQK.LR.a.pY.......G..loERQhhpYh+hA...........p+tK.G..s...T..GpsLLQLLE......................................... 0 530 969 1271 +4251 PF00900 Ribosomal_S4e Ribosomal family S4e Bateman A, Finn RD anon Pfam-B_1205 (release 3.0) Family \N 25.00 25.00 25.00 25.50 24.50 24.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.35 0.72 -4.31 81 821 2009-01-15 18:05:59 2003-04-07 12:59:11 15 13 555 7 448 754 91 75.80 52 30.43 CHANGED cosEpaRllhDs+GRhslpcIss-EAphKLs+lpsKshspsGhsplshHDGRslhhsp.........sph+ssDolhlsl...ssp......cIh .....KTsEpFRLlYDsKGRFslH+Ios..EEAc...YKLCKV+.+lph.......Gp+GlPa.LsTHDGRTIRYPD.........PhIKsNDTlplDL.tosKIh..................... 0 144 246 342 +4252 PF00333 Ribosomal_S5 S5; Ribosomal protein S5, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 27.00 27.00 27.00 27.20 26.80 26.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.04 0.72 -4.23 13 5805 2012-10-02 17:51:51 2003-04-07 12:59:11 15 11 5179 202 1620 3309 2088 66.50 52 34.20 CHANGED cLpE+VlslpRVsKhs+uGR+hpFpAlVVVGDcNGpVGhGhGKA+E.VssAIpKAltpAK+sllsVsh .....................pLpE+llsINRV.sKs.V.KG.GR+hpFs..ALVVVG..D.t.s.G+.VGhG.hG.KA..+.E..VPsAIpK.Ah-pA++..shlpVs.h.................................... 0 552 995 1332 +4253 PF03719 Ribosomal_S5_C Ribosomal protein S5, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.40 20.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.26 0.72 -4.70 114 5623 2012-10-03 01:04:38 2003-04-07 12:59:11 10 12 4962 202 1648 3246 2178 73.10 45 37.14 CHANGED VhG+aGuu+Vhl+PAstGTGlIAGG.ssRuVlEhAGlcD....lhsKohG.SpNshNhl+AThcuLpphposcplAphR.G ................lpGcaGuucVhlpPAscGT.....GlIA.GG.ssRAV.LEhA.GlcD.....lluKo.h.....G...Ss..NP..hN.lV+AT.lcu.L.p.p.hpss.cplAthRG.................... 0 557 1007 1357 +4254 PF01250 Ribosomal_S6 Ribosomal protein S6 Finn RD, Bateman A anon Prosite Domain \N 21.00 21.00 21.10 21.00 20.90 20.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.76 0.72 -4.14 180 4748 2009-09-12 05:02:19 2003-04-07 12:59:11 12 8 4630 210 1190 2634 2089 91.70 35 77.12 CHANGED +pYEhhaIlcPcls-c.phpshl-chpsllppp.uGplh.ch-p...W.Gc.R+LA.Y.IpKh..........pcGaYhlh.phpu..s.spslpEl-Rth+ls-sllRahsl+h ......................p+YElhal...l...+...P.s..hsEp...phs......uhlE+a.psllsss.GGplp.ch-c...W.G+....RpLA....Y...Ip....K.h.....................................+cGaY.....hll.slcu..s...spsls.El-+.....hh+.....l.....s.-sllRphll+.h................................... 0 409 769 1011 +4255 PF01092 Ribosomal_S6e S6e; Ribosomal protein S6e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.20 25.20 24.10 24.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.45 0.71 -4.50 7 798 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 566 5 447 741 92 116.50 52 53.22 CHANGED MKLNlSaPtsGsQKhlElDD-+plRhFh-KRhGpEV-u-hlG.EacGYsl+IsGGNDKQGFPM+QGVLsssRVRLLhucGpsCYRPRRsGERKRKSVRGsIVsssluVLsLsIl++GEp-IPGLTss .................................hKlNlu.PssGsQKhlEl.-D-p.+lR..Fh.-KRhupEVsu........-........s..L........G........-........E.a.........K............G.........Yl.h+IoGGsDKQGFPMKQ.GVLsssRVRLLLs+G............ps.CY..R...P...R..R...............sGERK.R.KSVRGCIV........sssLuVLsLlI.V.K.pG...Ep.....-lsGLTD......................................... 0 141 238 341 +4256 PF00177 Ribosomal_S7 S7; Ribosomal protein S7p/S5e Sonnhammer ELL anon Prosite Domain This family contains ribosomal protein S7 from prokaryotes and S5 from eukaryotes. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.77 0.71 -4.62 166 7992 2009-01-15 18:05:59 2003-04-07 12:59:11 16 14 7200 206 1618 4678 2374 146.70 44 86.24 CHANGED MsR...+.tpst+R...l.s....Dshasst.........lVs+hlNplMh....................cGKKulApcIlYpAh-hl............pp....+.spp...sP.................lp.....lhppAlcNlpPtlEV+.......uR......RlGGuoYQV.PlEV.ps.pRphsLAlRWllpuuR....p.......Rs.t+..sMsp+LAsEllDAu.ps..pGsAlKK+E-sH+M...AEA.N+ .....................................................sR+..sphs++...h...s....Dshapst.........lVs+.llNplMh....................cG....KKuhA.pI...VYpAh-hI......................................cp.....+...Tuc.........sP..........................................................Lp.....VhppAlpNltPt.l-V+........uR......R...lG.Gu.sa.Q.V.PV.EV..pP.tRRssLAlRWLlsuAR............h...........Rs....tK.....oMs-+LAsELlDAA..ps...pG..........sA.lK.K+E-sH+M...AEAN+.......................................................... 2 573 1027 1367 +4257 PF01251 Ribosomal_S7e Ribosomal protein S7e Finn RD, Bateman A anon Prosite Family \N 26.60 26.60 26.60 26.90 26.10 26.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.14 0.71 -4.82 27 599 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 415 4 298 532 6 174.90 53 92.14 CHANGED sKIhKpsst....PoEhEpsVAQALhDLEsss...-LKupL+sLplsuA+E.l-luu.sKKAlllaVPhP.Lpua.+KIQt+LsRELEKKFus+cVlhlApRRILs+PpRp..tt...ppQKRPRSRTLTAVH-uILEDLVaPuEIVGKRlRh+lDGo+lhKVaLDp+-pss..lEaKl-oFsuVY+KLTGK-VsFEFP.stt ............................................Kl.Ksp..stp...PsEhEps.luQ..........ALh....-LE.ss...DLKu.pLR.c.LtIsuA+E...l-Vus.....s+..............KAll.IaVPhP.L.+uF.pKI..QsRLlRELEKKFS.G+.HVlhlAp.RRI.LPK.PpRp..s+....ppQKRP.R.S.RTLTAVHDAILEDLVaPsEI..V..GKRlRh.....+lDGS+llK.....Va.L.Dp.pppss..lEa.Kl-TFuuVY+KLTG+-VsFEFPt................... 0 102 164 239 +4258 PF00410 Ribosomal_S8 S8; Ribosomal protein S8 Finn RD anon Prosite Domain \N 24.00 24.00 24.50 24.50 21.10 23.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.51 0.71 -4.30 119 6129 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 5619 211 1551 3553 2221 126.70 42 95.88 CHANGED DsIuDhLTRIRNAphsp+ppVpl.Ps........................SKlptslhplLpcEGYIpsaph.........hppppp...t...............................lplp.LKY.........pttps..lIpplpRlS+PGhRlYsstpclP+.......lh..sGhGl...........sIlSTS+G.lMoc+cARcppl.GGElLChVa ....................................................DPIADhLTR..IRNA.pts.p....+psVpl..Pu........................SKlK..tsIs.plLpc.EG...........aIcsach...........................hc..-..s.p..t......t.......................................................................lplp.LK.Y.............ptct..........lI.p...s...l..cRlS+PGLR..lY.sptc-.lP+.........Vh......sG..L...GI............................uIl.....ST..Sc....G......l...hTD+cA.Rp...............psl..G.GEllsYVa.......................................... 0 516 966 1289 +4259 PF00380 Ribosomal_S9 S9; Ribosomal protein S9/S16 Finn RD anon Prosite Family This family includes small ribosomal subunit S9 from prokaryotes and S16 from eukaryotes. 21.80 21.80 21.90 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.56 0.71 -3.86 183 5483 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 4963 199 1599 3183 2185 121.50 50 81.31 CHANGED GRRKoulARVhl.ps.G.....................s....G.p.lplN.s+............sh...p-Ya..spphhphplhpPLt.lssp...........................................................hsp...............................aDlhlp.................VpGGGhoGQAsAlRhuIARALlp.....................................hs....sp............h+stLKpt......GhLTRDsRhhERKKa..Gh++AR+p.QaSKR ...............................................GRRKsulARVhl...hP...G.................................................s.....G...cI..slN.s.+.....................s.lcpYa..s....pc........sh.chhl.pQPL.t.l.s.pp...........................................................h..s..p................................aDlhls.................V.p.G..GGhoGQAG.....A.IRHGIuRAL.hp.............................................hD...ss..............hR.ssLKcA..............................................GhLTR..DuRhsERKKh..GL..+KAR+tsQFSKR...................... 0 557 1015 1351 +4260 PF00834 Ribul_P_3_epim Ribulose-phosphate 3 epimerase family Bateman A anon Pfam-B_1291 (release 2.1) Domain This enzyme catalyses the conversion of D-ribulose 5-phosphate into D-xylulose 5-phosphate. 20.60 20.60 20.60 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.32 0.71 -5.21 13 6022 2012-10-03 05:58:16 2003-04-07 12:59:11 14 17 4642 50 1359 4019 3152 198.40 43 88.92 CHANGED hlAPSILSADFu+LucElpslppuGuDhlHlDVMDuHFVPNlTlGPhVlculRsh...sphPlDVHLMlcssDphlssFAcAGAs.hIoFHsE..AocHlcRolphI+ctGsKAGlVLNPuTPLssl-alL-clDlVLlMSVNPGFGGQuFIPssLsKlcpl...R+hhsp......hshhlEVDGGlsscshtplscAGAshlVAGSAlFuus .....................................................................................................................................IAPSILSAD..F...ucLu.c-l.ppl...p......t...u.....G...A.DhlHlDVM...D........G..........HFVPN...l.T..h..Gs..llcu..l.Rph...................sphP.....l.DVH.L.Ml....p......s..............P..-.......c.......a.lss.F...Ac.A......G..A.s....hI......oh.H.sE..................A.s......p....H......l...c.......R......s.l...p...h...I.+........p......t.....G....h.K...................A.........Gl.s........l.N.P.u.T..P.l.p.h...l.c....l..l......c........p......l.......D.h..lLl...M.o.......V.N....P..G.F....G...G....Q...p...F...Isp.s.l...cK.l.cpl......+..phhc.pps..................hs.hpIEVDGGl.s..s.p.s.h.tphs.p..AG.A-.hhVA.G.S.u.lFpt.s..................................... 0 465 866 1148 +4261 PF02009 Rifin_STEVOR Rifin/stevor family Bateman A, Lawson D anon Lawson D Family Several multicopy gene families have been described in Plasmodium falciparum, including the stevor family of subtelomeric open reading frames and the rif interspersed repetitive elements. Both families contain three predicted transmembrane segments. It has been proposed that stevor and rif are members of a larger superfamily that code for variant surface antigens [1]. 34.70 34.70 35.40 54.50 33.20 34.60 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.28 0.70 -4.77 16 412 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 2 0 71 414 0 288.50 30 96.01 CHANGED aNp..Np.hITs..ppsshoo......RhLsEC-las.spYDNDPEMKcVh-pFscpToQRF+EY-ERhhspRpKCK-psDK-IQcIIlKDKhE.........................KSlA-KVEKsCL+CGssLG.GlhsusGlhG...slulsph.sp....uAthsAhp.h.csu.................htsslcph.......cuhsphhshhsh...............thhpulhssssYps.hsllsslhs.tstshCshspsstsshhshsspsttshhstp....................VpshspsAsssAptsspthstsltptssuhhss........IhuSllAIllIlLlMlIIYLILRYRRKKKMpKKhpYhKLLpc .....................p........h...ps......R.LsEC-las.s.Y-sDsEMKpVh-pFs.cpT.pp.RF....cEYcE.+h.cpRpppKEps-KpIQ..KIIhKDKhE...........................KSls-KsEKt.CLcCuhshG.ulssphGlhu...........shshsth.ps........sshss....u..hp.u...ttu.....................htssltts..........c.shsphhthhph....................tshpslhsss.....sasshhslh......shlts...th.........t.s.h...s..........s..h.........s........s....s.....s.......t..............h...shsh...thhph..hst.......................................stphhps..usssutt....ss..s.th.s....s....shtstthu...hhps.......tsIhuSslslllIll.lhlIIYLILR.YRR.+K.phKhp..KhL....................................... 0 71 71 71 +4262 PF02197 RIIa Regulatory subunit of type II PKA R-subunit SMART anon Alignment kindly provided by SMART Domain \N 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.42 0.72 -4.52 13 656 2012-10-01 20:11:07 2003-04-07 12:59:11 12 27 223 33 372 614 5 37.00 33 10.58 CHANGED pulpsLLcshsspVh+ppPuDllpFstsYF.p+LpcpRt ...........slpplLcshthpllpppPs....clh...pFsspYF..p+Lpptp.......... 0 132 172 259 +4263 PF01782 RimM RimM N-terminal domain Bateman A anon PSI-BLAST P51419 Domain The RimM protein is essential for efficient processing of 16S rRNA [1]. The RimM protein was shown to have affinity for free ribosomal 30S subunits but not for 30S subunits in the 70S ribosomes [1]. This N-terminal domain is found associated with a PRC-barrel domain [2]. 23.50 23.50 23.50 23.90 23.40 23.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.74 0.72 -4.06 170 4228 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 4195 7 921 2739 1298 83.60 27 46.27 CHANGED hlGpIsssaGl+Gpl+lhsh.T-....s-thhshtshhht..................pttt............hplpph+.hp..pp.thllphcGlss+spA.ctLpGtpl..hlscsp ..............lG+lsssaGl+Gcl.+Vhs.h.TD......s-..p.hhc.h..ss..hhht.......................................pssp.........h.t..........lplps..h+..hp..........ps.hhll+hcG.....ls.....stssA.ctLpstplhlspp................................ 0 302 605 781 +4264 PF00848 Ring_hydroxyl_A Ring hydroxylating alpha subunit (catalytic domain) Bateman A anon Pfam-B_407 (release 3.0) Domain This family is the catalytic domain of aromatic-ring- hydroxylating dioxygenase systems. The active site contains a non-heme ferrous ion coordinated by three ligands. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.46 0.70 -4.68 128 4563 2012-10-02 19:24:03 2003-04-07 12:59:11 14 18 1613 110 962 3945 3034 206.60 16 58.44 CHANGED hp.ph.phttphphc....................hpsNWKlhh...-NahE.sYH..hs.ssHsph.t...hppht............................................t.t.h.h..............h.hshttt.tttsth......th.pptphtsh....................hhhlFPNhhlt.hhsshhh...hhphhP...husspsphphphhhtss............tssch..............tpphtphhpt............ltpEDhthscphQpG....lpo.....................sh..tsu..h.........sttEt.sl..pp....Fpphltchl...st ...................................................................................ht.hhp.......................htsNWKhhs..Es.h.h-...sYH...ss.hsH....shs.p.s.h..tths....................................................................t...hth..s.tts.hh..............................h...t.sh.s.h.p.hhtt....hsttp.h.......................t.ht..tt.....h..thh....................................................................th.s.la....Psh.hh....................ss..thh.............hphh.hP.......hussc...sp..h...hhhh.hs.t.s.....................tstch.....................ppthhp.h..t........................ht.pDht.htt....pts.......hts.....................................................................................-.............h......t.................................................................................................................. 1 181 516 777 +4265 PF00161 RIP Ribosome inactivating protein Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.70 20.70 19.90 19.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.47 0.70 -5.14 73 945 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 239 176 103 936 0 197.20 29 61.64 CHANGED sFslssus...psYssFlpsLRsplsssstt.......hs..lPlL.......ssssp+alhlcLps....ssp.....lTLulchsNlYVlGY......psssphahFp............psspssLhsss.....tpppL.sasGsYssLpptush......Rpp.lsLGhptLssulssLhths...................................sspstAcsLllhIQMluEAARF+aIcppltssh........tpshpPs...sthlsLcssWuplSptlppu ...................................................................................Fsht.ss...tsYss.lsslRppl..tp.hpp........hpt..hsVhs.p.......tssstpahhl-lpsh..........ptp....p.lpLhlchsNLYlsGFh........sssssaapFs................................-hsphhhsss...........pshsl.shsu.s.YssLppsuu............Rps.hpluc.tsLssuh.s.Lhpassss.................................ppstu+......ullt.hlphsuEA....hRFp..Ipcphcpsh...............tpsp..shshs...st...t.ls..hp.s.WuclSpsl................................................ 2 2 17 64 +4266 PF04957 RMF Ribosome modulation factor Bateman A anon COG3130 Family This protein associates with 70s ribosomes and converts them to a dimeric form (100S ribosomes) which appear during the transition from the exponential growth phase to the stationary phase of Escherichia coli cells. 27.90 27.90 28.20 28.70 27.10 27.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.69 0.72 -4.28 27 514 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 498 3 92 194 24 54.70 67 89.99 CHANGED MKRQKRD+hcRAas+GYQAGlsGRS+EhCPap.sh-s.RspWLuGWREuhpD+hsGh ....MKRQKRDRLERAapRGYQAGIuGRSKEhCPYQ.sLsp.RStWLGGWR-AMtDRss..ht....... 0 17 34 64 +4267 PF04321 RmlD_sub_bind RmlD substrate binding domain Waterfield DI, Finn RD anon COG1091 Domain L-rhamnose is a saccharide required for the virulence of some bacteria. Its precursor, dTDP-L-rhamnose, is synthesised by four different enzymes the final one of which is RmlD.\ \ \ The RmlD substrate binding domain is responsible for binding a sugar nucleotide [1,2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.59 0.70 -5.52 37 4012 2012-10-10 17:06:42 2003-04-07 12:59:11 12 20 2929 21 1194 32857 21491 276.30 30 91.82 CHANGED Mp.lLlTGusGQlGp-Ltcthtt.pshslluh.s+s...........phDlscspultphl.........pph+Pc..lVlNsAAaTAVDpAEs-..-tAhtlNutGsttlAcust.phGs.LlalSTDYVFDGs........tsts.YpEsD.ssPhslYGpoKLtGEpAVhs..sss.cthllRTuWVY.ut...tGp..NFVcTMl+LA..tpccplpVVsDQhGsPThstslAcslhtlhpphhp........hGlaHhsssGt....soWasFAptIhcpsstps........cVpPlsospaPpsApRPt.SsLssp+hptshsh.h.s.WcpuLtchlpphht ..........................................................................................................................................p.lLls.Gu.s.GQ....LG....p......p........L.t.......p...........h.......h......s.........t.......t............s...............p.......h.......l..uh...sts.....................................................ph.D......l...s...s...........p........s.......l......p......p.....h...l.......................................p..p.....h.....p.......P........c..........l......l........l......N....s...A.......A........a.......T.......s.......V.....D.......t........A.....E.........s....-....................c......h....u..............h.......t.......l...........N.....s.......p....u..s...........p........p........l............A..........p..........s.............s...............p...........p...............h...............u..........u...............h.......l..........l...........a....l...S...T..........D.....Y......V.........F.........-....G.p................................t.s..t..s.....a.....p......E......s.........D.........t.........s.........s.........P..........h........s........h..............Y.......G.....p.....o...K....h.......t.......G....E.....p.......t.....l......tp..................hs...........s.......c............h..........h.........I..........l......R........T..........u.....W.......l.....a...ut....................hG..p.......N......F.......l......p.......T.......M.........l........c.....L.u........pp.......+................c...........p..................l......p........V......l........s...........D.........Q........h........G......s........P........T..........h....s......t......s...L...A..c.h.....h.........h.....p.....l....l...p.....p.t...t.t..................................hG.l.YHh..s..s..s..u.t................s..oW....a....-...a.A....p....t....lhc.t.s.sh.t.........................p.l.p...s....l..s..o.....s......p......a.....s.....t..............A........t.....R....P....t........S.....h...L...stp+.....h.p.t.t....h..........s..h...............h........s..Wppultphht...th.................................................................................................................................................................................................................................................. 0 375 753 996 +4268 PF03035 RNA_capsid Calicivirus putative RNA polymerase/capsid protein Griffiths-Jones SR anon Pfam-B_1282 (release 6.4) Family \N 19.50 19.50 21.00 20.70 19.00 18.00 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.72 0.70 -4.55 34 896 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 582 0 0 699 0 183.40 57 98.34 CHANGED MAGAhlAGLAu...DhluuulGoLIsAGANAlNQ+h-a......................c.Npp...........LQpsSFpHDKEMLpuQlpATppLQtchlsl+pulLsAGGFSssDAARuulsAPhT+l.lDWN...GTRaaAPs......ShpTTsaSGpFsssss............p...................ssssohpopST.Soslossstss..................osssSRTosWVpsQN............p.LpPahpuALpTsaVTPPSSp.uSSs.........uoVSTVP+tlLDSWTss.....FNT+RQPLFAplR ........................................MAGAFhAuLAu....sllusulGSLlsAGAsAlNQ+h-F......................-pNpp...........LQQASFQHDKEMLpAQlpATppLQpphhpl+puhLhtGGFStoDAuRu....AlsAPhT+s.lDWs...GTRYaAPs......upsTTh.uGtFoss.s.......................................................utssohhospT.sotluuss.ss..................hssosRTpsWlppQN............pslpPahpGAh.phsaVTPPuSp.uSo..........uo..VSTVPcth..Sa.s......FNTcR.PhFA............................... 1 0 0 0 +4269 PF00680 RdRP_1 RNA dependent RNA polymerase Bateman A anon Pfam-B_32 (release 2.1) Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.51 0.70 -6.25 39 14992 2012-10-02 12:54:00 2003-04-07 12:59:11 15 154 6054 160 0 11808 1 228.60 22 27.06 CHANGED l.....shss+stlptsshcshhssh.....pcPusLshtDPRhssth...........schths+thhph.h.tplssh.hpshpcshstlhphh.shshtphshtpshpsl........hcsLs.......hsTSsGhPYh........ttKK+chhsptsts........................................th...hhhpshphhtsp.hu.thlhhsslKDELRsh-Kl..................................ptsKTRhhpusPlssslss+hthsshssthhpp.shphshsVGhs..pttWsclhtpLsp..uphhhssDaSsFDuoloPhlhssl..hplhpphh........hphhpshhhphlssshthhcsplh..........clpsGhPSGpssTslhNolhs.lhhphshhchhtshphpp..................p.lc......hhs.YGDDhlluhs.phs.h.....hptlpp..............phtc.hGlphT...sDKops.......hpplsFL+Rph..phspsh...hhshhcpcpIhs.lp..Ws+sspspp.......................plpslshthhcs....spchh.p..clpchhtphlpttsht............Phhtphthcah.t ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..h................h.h......t....h........................................................................................h.........p.............h......l..............s....h....h..........h.t..s..h.h..........................h..tG..P..S.G.ssTo..No.hhp...l..h....h....h....h..t.....h.........p.h...h...............................................................hh....uDD.h.................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +4270 PF00978 RdRP_2 RNA_dep_RNApol2; RNA dependent RNA polymerase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_13 (release 3.0) Family This family may represent an RNA dependent RNA polymerase. The family also contains the following proteins: 2A protein from bromoviruses putative RNA dependent RNA polymerase from tobamoviruses Non structural polyprotein from togaviruses 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.43 0.70 -5.80 36 2606 2012-10-02 12:54:00 2003-04-07 12:59:11 16 43 501 0 2 2727 0 250.60 22 29.18 CHANGED psDhsls.lc..csplp.oc.psh..p.....pcshhtPslRous.p.....tRpsThp-s..LhAhpKRNhssPcLpcssshpphup.plscpFh..cshhspchhc........sshhss.tthspahsphpshptttl...ts.shhsL.pt...lshppYpaMlKsDlKshl-sohph..EhsssQTIsaacKhlsuhFuPlFpplscRlhtsLps.+llh.ssh....hss.hhscphphhss...hps......lElDhSKFDKSQschHthsphtlhctLGlss.lhsh.Wp.sh.......ccpohl.....p...Dhps..Glth.l.aQp+o..GsshTahuNT...l...lshshlupsh..s..lp...psphshFuGDDSLl...hshps...ts.spthsohaNhEsKlhc....h....sh....PYFCuKFLl..p..ss..sss.....hh.VPDPlKhlhKLGpcchhc.....phLp-hapShsDth+.a.c.hshhphhphshhchh+h..th.tshtsltphl.tuhstapsh ............................................................................................................................................................................................................................................................................................................................................................................................................................l...hpp...hhshaushh+.ltct.l....t.h..L..s...phhhhssh.........s..h.s.th.h.tsht........t.s.....hEsDastFDpo.Qsph......h.h.t.hEhhlh.......c.t.h..G......h......sp....llc........................h.....................t............h....oG...Th.hNT.........hs.hh....h....h....hh..........ht.........t........h...hh..GDD.hh....................................................................................................................................................................................................................................h.................................................................................................................. 0 0 1 2 +4271 PF00910 RNA_helicase RNA helicase Bateman A anon Pfam-B_11 (release 3.0) Domain This family includes RNA helicases thought to be involved in duplex unwinding during viral RNA replication. Members of this family are found in a variety of single stranded RNA viruses. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.37 0.72 -3.87 99 3937 2012-10-05 12:31:08 2003-04-07 12:59:11 17 83 1135 0 26 5004 845 96.30 36 6.53 CHANGED lhlhGss.GsGKShhsph.lhptlh...t..................p.sslYs.tsssscaa.sGYp..tQslslhDDhspss.ss....p..htthhpllsossa.lsMAslccKu.h.FsSphllsooNh .....................hlpGsP.GsGKShhssh.luptls.....................................t.s.sss.Y.......hs...s...c.s...ca.....a.....D.....G....Yc......t...Q.t...V..Vlh.DDh..s..pss...ss..........p...Dhphht..ph.l........soss...ahs.shAul.Ep.....KG...h.Fs.Sc.hllsooN...................................................... 0 12 14 23 +4272 PF00940 RNA_pol DNA-dependent RNA polymerase Finn RD, Bateman A anon Pfam-B_1108 (release 3.0) Family This is a family of single chain RNA polymerases. 22.00 22.00 22.10 24.60 20.80 21.90 hmmbuild -o /dev/null HMM SEED 405 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.45 0.70 -6.00 94 680 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 462 20 294 684 441 385.90 35 41.81 CHANGED u+sL.GppGLcWLKlalANla....GhDK.hSh.........p-Rlpasc..cp..h..................cpIhcsA........cs.................................................................................Plpt....p.......aWhp......A-cPaQhLAsChElpps....h.....cpsss..ppa.....hSplPlaQDGoCNGLQHYAALGtDhhGAppVNLhP...u-..c....PpDlYotVuphVpctlp.....p-.................................................stpspphuphl..pst......lsRKllKpoVMTpVYGVThhGuppQlpcpLppht...p....................................................................................................................................tt.hhpsupYluphlhpulpphFsuAcpI.pWLspsAphlsp.ssps................................................VhWsTPhGLPVsQsY+...ctppp.plpos....lpshshpps.stsss..........sppKQt.................sAhsPNFlHSLDAoHhhhouh..pstc.......tslsFuuVHDSaWTHA...........ssl-p.MspllR-pFlclaup.s.llpcLtppappphtp................................................................................................................................................................................hphsslPppG.......s.hDlp...........plhcSpYFFs .....................................................+.L.G..pGl.pWLKlHluNhh..........G..h.cK...hsh.........pcRhtasp..pp..h....ppI.hc.SA..pp.................................................................................Plps..pp.........WWhp.....A-cP..aQhLAsChEltpu....h.....cp.s.s.P..tpa........hSplPlaQDGoCNGLQHYAALGtD.hGAptVNLhs..............u-.p....PtDlYstluph.Vpphhp........p-..............................................................tttst.h...Aphl........tshlsRKllKpoVMTsV.YG............VT....hhGupp..Q.lt.cp.Lpph............................................................................................................................................tt.hhtsupYlsph.hhpu...ltphFpuu..ptl....pW...L.s.p.sA.ch.......lsp.p.ps...............................................................................................................................VhWsTP.l.GlPVhQs.Yp..p.t....p.p....tl.pss..........h.ps.h.hh.p....t..t.h...............................stpKQt............................................suhsPNFlHSLDuo...Hhhhoul.tstc................tsl.s.Fs.u.VHDSaWTHA...........sslsp.hsp.llR-pFlplasp...sllppLh.pph......tph.t............................................................................................................................................................................................................................h..hPtp.G.......s.h-lpplhpS.YFF...................................................................................................................................... 0 102 176 254 +4273 PF03118 RNA_pol_A_CTD Bacterial RNA polymerase, alpha chain C terminal domain Finn RD, Bateman A anon Pfam-B_172 (release 3.0) Domain The alpha subunit of RNA polymerase consists of two independently folded domains, referred to as amino-terminal and carboxyl terminal domains. The amino terminal domain is involved in the interaction with the other subunits of the RNA polymerase. The carboxyl-terminal domain interacts with the DNA and activators. The amino acid sequence of the alpha subunit is conserved in prokaryotic and chloroplast RNA polymerases. There are three regions of particularly strong conservation, two in the amino-terminal and one in the carboxyl- terminal [2]. 23.80 23.80 23.80 24.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.94 0.72 -4.56 121 5961 2012-10-03 02:11:09 2003-04-07 12:59:11 10 30 5358 22 1041 3553 2145 65.70 44 20.39 CHANGED ppp.p..p...phhphsI--L-LSVRuhNCLK+ssIpTlu-Llphocp-Lhcl+NhG+KSlcE...IpctLpc ..........................c....php.lLhhsl--..L.-.Lo..VRShNCLKp..t.sIphluD...Llp..+...oE....s...-....LhKscNhG+KSLpE...lKchLt................ 0 373 707 891 +4274 PF05066 HARE-HTH RNA_pol_delta; HB1, ASXL, restriction endonuclease HTH domain Bateman A, Aravind L, Iyer, LM anon COG3343 Family A winged helix-turn-helix domain present in the plant HB1, vertebrate ASXL, the H. pylori restriction endonuclease HpyAIII(HgrA), the RNA polymerase delta subunit(RpoE) of Gram positive bacteria and several restriction endonucleases [1]. The domain is distinguished by the presence of a conserved one-turn helix between helix-3 and the preceding conserved turn. Its diverse architectures in eukaryotic species with extensive gene body methylation is suggestive of a chromatin function. The genetic interaction of the HARE-HTH containing ASXL with the methyl cytosine hydroxylating Tet2 protein is suggestive of a role for the domain in discriminating sequences with DNA modifications such as hmC [1]. Bacterial versions include fusions to diverse restriction endonucleases, and a DNA glycosylase where it may play a similar role in detecting modified DNA. Certain bacterial version of the HARE-HTH domain show fusions to the helix-hairpin-helix domain of the RNA polymerase alpha subunit and the HTH domains found in regions 3 and 4 of the sigma factors [1]. These versions are predicted to function as a novel inhibitor of the binding of RNA polymerase to transcription start sites, similar to the Bacillus delta protein [2,3]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.28 0.72 -3.63 67 1524 2012-10-04 14:01:12 2003-04-07 12:59:11 8 24 1377 1 259 816 13 68.90 34 20.55 CHANGED hoht-sAhpVLcppu....cPhphp-IhcphhcpuLhpht............u+oPtsolsuplhs-hp...........pshFlcl....tsphuLtsh ....................h.ShIElAhslLcp+u.....csMsas-llscI.ps.hhtpp...............spplcspls.pFYT-LN...........hDGpFlslG...-NpWGLRsW......................... 0 72 138 195 +4275 PF04090 RNA_pol_I_TF RNA polymerase I specific initiation factor Wood V, Finn RD anon Pfam-B_43469 (release 7.3); Family \N 19.90 19.90 19.90 19.90 19.30 19.80 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.48 0.71 -5.04 10 106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 102 0 77 105 0 186.60 21 39.59 CHANGED hhhs.lspss+cs+tpFllhspGhEVlP..........sshs-l+sa....pppHIspLssLLHlNlLRcNWslAY+hFsLLIRlPsVDIRsIWsLGlEILsplsppsss................cFh-Whsshao...s+ssFspsssp+.hAPVFRoGSRoHTPhYllo.LWslLlpsp.....................................ascLh-+LuEhlLpPPY.sDutlaFlhuhC+llcAs-LuscF .........................................................................................sp.psh....+hpHlssLoslLHhslhctcaspAhRsaulLl....R......h....p......s....VD....lR..s..h..WulGsEILhptsppsst................................................cahphh.t.has.......tpt...t...............h......................h.h...hh..h...t......................................................................ht.h...htphh...Pa..t..hh.h.u.h.h....................................................................................... 1 20 40 65 +4276 PF01193 RNA_pol_L RNA polymerase Rpb3/Rpb11 dimerisation domain Finn RD anon Pfam-B_172 (release 3.0) Domain The two eukaryotic subunits Rpb3 and Rpb11 dimerise to from a platform onto which the other subunits of the RNA polymerase assemble (D/L in archaea). The prokaryotic equivalent of the Rpb3/Rpb11 platform is the alpha-alpha dimer. The dimerisation domain of the alpha subunit/Rpb3 is interrupted by an insert domain (Pfam:PF01000). Some of the alpha subunits also contain iron-sulphur binding domains (Pfam:PF00037). Rpb11 is found as a continuous domain. Members of this family include: alpha subunit from eubacteria, alpha subunits from chloroplasts, Rpb3 subunits from eukaryotes, Rpb11 subunits from eukaryotes, RpoD subunits from archaeal spp, and RpoL subunits from archaeal spp. 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null --hand HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.95 0.72 -4.85 218 7433 2012-10-02 13:35:44 2003-04-07 12:59:11 19 28 6156 190 1663 5625 2992 199.80 31 64.81 CHANGED lphhhpu.stTluNuLR+hLlsphsslslsshpl.p..t....................................................................................................................................................................................................................................................................................................................................................................................................................................ttpp...............................................................................................hhlclpTs.Gsh........sPpcslpp.AhchLtpphp ...................................................................................................................................-PLE+GaGpTLGNuLRRlLLSS.lPGsAlTplcI-GVhHEaSol.GVpEDVhpIlLNlKtlsl+hpsccpphhplphpGsu.loAuDIhhssslchh....................................................................................................................................................................................................................................................................................................................................................................................s.thhhsshsptsphhhthphptthshssstpscpsptslGhl.lDuhaoPlp+lsYpVEssRVtpps...........DhDKLsl-l.Ts..G.ol...................sPc-Alpt.AA+ILt-pl................................................................................................................................................................................................................... 0 593 1066 1406 +4277 PF02150 RNA_POL_M_15KD RNA polymerases M/15 Kd subunit Mian N, Bateman A anon IPR001529 Domain \N 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -8.06 0.72 -4.26 10 723 2012-10-03 10:42:43 2003-04-07 12:59:11 11 10 427 90 486 620 22 35.70 34 27.56 CHANGED lcFCscCsNhLhspsc+psp....tCRsCsYcp.s-s. ...hpFCscCsNhL.h...s..c..cscpsph.......htCpsCsYpp.h............................. 0 142 257 398 +4278 PF01194 RNA_pol_N RNA polymerases N / 8 kDa subunit Finn RD, Bateman A anon Prosite Domain \N 24.70 24.70 24.70 26.90 24.40 24.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.35 0.72 -3.94 49 498 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 450 97 310 402 203 60.50 55 79.07 CHANGED MlIPVRCFoC.G+llu.......chWccYhphlpp.....G.s.........ucsLDcLGl.cRYCCRRMlLoH......VD...lI-cll ...................................MIIPVRCFTCGKVlG.......scW-pYlphlps............shsc..........................................u-ALDsLGL.cRYCCR.RMlLoH......VDLI-clL............. 0 101 176 260 +4280 PF04990 RNA_pol_Rpb1_7 RNA polymerase Rpb1, domain 7 Finn RD anon Pfam-B_288 (release 4.2) Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 7, represents a mobile module of the RNA polymerase. Domain 7 forms a substantial interaction with the lobe domain of Rpb2 (Pfam:PF04561) [1,2]. 20.80 20.80 21.40 22.10 20.50 20.70 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.89 0.71 -4.45 42 792 2009-01-15 18:05:59 2003-04-07 12:59:11 7 76 658 90 242 733 168 119.00 55 10.85 CHANGED TTL+pVTssopIYYDPDPpsTlIEEDp-aVpsaa.-hPD..t.-.s.....phSPWLLRlELDRcthhDKcL.oMpplup+IppsFu......sD.lplIaoDDN.A-cLVlRlRlhpsp...................................ct..-p-p.....c....-D...hFL+clEsphLsslsLp .................TTLR+VTusTuIYYDP.............DPp.sT...VItEDpEaVslYY.EMPD........hD.so.........+hSPWLLRlELDRK+MsDKKL.TMEpI.A-KIptuFG....................-D.LssIas-sN.....ucch......lhRlRhhttt...................................................t.tpt...........p.....t.....hL+ph.tphLtth.L........................................................................................................................................................... 0 88 139 207 +4281 PF04563 RNA_pol_Rpb2_1 RNA polymerase beta subunit Finn RD anon Manual Family RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain forms one of the two distinctive lobes of the Rpb2 structure. This domain is also known as the protrusion domain [1]. The other lobe (Pfam:PF04561) is nested within this domain. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null --hand HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.23 0.71 -5.40 21 11314 2009-09-13 15:18:01 2003-04-07 12:59:11 10 107 8899 140 1837 8858 4389 301.50 26 35.23 CHANGED sLlc.Qh-SFstFlppsLp.............-hlpp.sslpspst.............phchplphtplpls.cPphs-..........h.Pp-A+hRslTYSutlYVshchphpps...............................tchpppcValGclPlMhco....h.t....c...htp.....tGhFIlNGsE+VllsQ.hhusshhas.cpcpsuhhshssshhophtttpph.tsppsplht+hspstthshhshhhthths.splhhthhhhth.hph.pphthphptpth....................pthhh.spt.shsthttphhphths+cpphttshphhphphh.+lsstpsscptpshhlshhhphLlhhphshtp.DDhDHhuN+RlchsGpLLps.F+hhhp+Lp+sl+pphptshpcs..........hs.pshlpu.psIosslcphhuou ............................................................................................................................................................................................................................................................................................................h...............................................................h....h....t........h........s..h........................................-s...........cs.hsa..ts.lh..h.hth.....t..................................................................................................p.t.h.....h.G.p........hPhMh..................................................................................................................................................GhFllNG.E+lllsQ.hhusshhh..t.tt.t...h........................h....t......h...hh.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....hh..h.hhh.h....h..DDhcphss+Rlc.su-Llp..sp...a..R.....h....u...L.s...R...hp...+........s.V.+c.ch...s...t..t..s.cs.....................................hs.P.p..p...hI.....s.....h....+....s....lsuulKpFhuou......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 649 1155 1555 +4282 PF04561 RNA_pol_Rpb2_2 RNA polymerase Rpb2, domain 2 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Rpb2 is the second largest subunit of the RNA polymerase. This domain forms one of the two distinctive lobes of the Rpb2 structure. This domain is also known as the lobe domain [1]. DNA has been demonstrated to bind to the concave surface of the lobe domain, and plays a role in maintaining the transcription bubble [1]. Many of the bacterial members contain large insertions within this domain, as region known as dispensable region 1 (DRI). 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.11 0.71 -4.88 22 13001 2009-09-11 22:08:18 2003-04-07 12:59:11 9 96 7019 145 2482 9371 4016 126.70 21 18.82 CHANGED psstlhhppchspssh..................thhssshhptputhhphchstpthhhsh.phppcIslhllh+AlGh.sDp-Ilptls..a..............s..hh.hhh.phpct.sh.op-pALphltp.......hht.sc.splptupchl............t.lt+.pls.+lshp.s.csp...ps..lhhhlctLltht.hshtp.DDhDHhuN+Rlc .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t....................s........h.t..........................................................................................................................................t......h...p.l.......................................t..................................................................................................................................................................................................................................................................................................................................................... 0 840 1548 2077 +4283 PF04566 RNA_pol_Rpb2_4 RNA polymerase Rpb2, domain 4 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 4, is also known as the external 2 domain [1]. 21.60 21.60 22.30 21.60 21.50 21.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.98 0.72 -4.16 12 7585 2009-09-11 14:40:16 2003-04-07 12:59:11 8 53 5403 96 610 6531 395 61.70 56 13.64 CHANGED ValNGsllGspcsPccLspplRphRRpGclss..lulhhs.cpp-l+I.TDuGRhsRPLlIV-N ............lFVNGVWlG...V.H.p.....DP..t...pLVpslpcLRR...+..........s.................l......s..........h....EV.Sl..l..R..DI.....R.....-..........RE.....h+IaTDAGRVhRPLFlV-......................... 0 212 356 512 +4284 PF01191 RNA_pol_Rpb5_C RNA_pol_H; RNA polymerase Rpb5, C-terminal domain Finn RD, Bateman A anon Prosite Domain The assembly domain of Rpb5 [1]. The archaeal equivalent to this domain is subunit H. Subunit H lacks the N-terminal domain. 21.30 21.30 21.70 22.10 21.10 21.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.23 0.72 -4.39 64 636 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 504 98 378 576 240 72.50 49 38.79 CHANGED lNlscHpLVPcHplLoc-EtpplLpcYplc.pQ............LP+IhtoDPls+hh......Gh+.GsVl+IhRcS...pTuGchlsYRlVl .........................lNITcHpL.VP.c.HhlLopE.EppcLLp+...Y.cl.+.-.sQ..............................LPRIptsDPVA+Yh.......................Gl++G..pVVKIlRp.S...E.TA.G+YloYRlV............... 0 129 223 317 +4285 PF03871 RNA_pol_Rpb5_N RNA polymerase Rpb5, N-terminal domain Finn RD anon DOMO:DM07083; Domain Rpb5 has a bipartite structure which includes a eukaryote-specific N-terminal domain and a C-terminal domain resembling the archaeal RNAP subunit H [1,2]. The N-terminal domain is involved in DNA binding and is part of the jaw module in the RNA pol II structure [3]. This module is important for positioning the downstream DNA. 21.00 21.00 21.00 21.20 20.80 20.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.06 0.72 -3.58 36 436 2012-10-11 20:44:43 2003-04-07 12:59:11 9 10 315 91 270 395 9 95.30 34 42.69 CHANGED Mssp.......cpht.RLaRshRTlhEMlcDRGYhl..spcElsholcpF+ppasc..............pspRschph.sp.psc...................ssspIaVhFs....-psplGlKsl+sasp.php ..................c...pEhh.RLaRhp+Tlh........p..Mh+DRGYhV..spcEl...s..o...L-cF+ppau-t.t......................pPpRpcLsh.ssps.s-....................sssplaVhFs....--sp....VGlKsl+taspph.............................................................................................................................. 0 97 154 223 +4286 PF01192 RNA_pol_Rpb6 RNA polymerase Rpb6 Finn RD anon Prosite Family Rpb6 is an essential subunit in the eukaryotic polymerases Pol I, II and III. This family also contains the bacterial equivalent to Rpb6, the omega subunit. Rpb6 and omega are structurally conserved and both function in polymerase assembly [1]. 20.60 20.60 20.80 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.43 0.72 -4.22 57 4597 2009-01-15 18:05:59 2003-04-07 12:59:11 17 6 4471 137 1177 2396 1900 54.00 32 56.84 CHANGED cclhshhss+YclshllupRAcp..lp.hsssshl.pcsp....+PlhhAlpElscshhp.pl ..............pchlc+lss+apLVllAA.+RARQ...........lp..t..G...t...s..........sh..lt...tpss.................KssshALcEIt..pshls........................................ 0 378 740 987 +4287 PF03876 SHS2_Rpb7-N RNA_pol_Rpb7_N; SHS2 domain found in N terminus of Rpb7p/Rpc25p/MJ0397 Finn RD, Anantharaman V anon Hand Domain Rpb7 bind to Rpb4 to form a heterodimer. This complex is thought to interact with the nascent RNA strand during RNA polymerase II elongation[1]. This family includes the homologs from RNA polymerase I and III. In RNA polymerase I, Rpa43 is at least one of the subunits contacted by the transcription factor TIF-IA [2]. The N terminus of Rpb7p/Rpc25p/MJ0397 has a SHS2 domain that is involved in protein-protein interaction [3]. 20.90 20.90 20.90 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.02 0.72 -3.85 145 1033 2012-10-02 11:08:51 2003-04-07 12:59:11 12 19 491 64 703 949 87 71.80 24 30.62 CHANGED pchlplsP.phhs...........................................shppslh.......ptLhpphts+.................hstp......h.G..........lllulhcl..pp................lsc....................Gpl..h.sss..GssahpVpachllF .............................t..hlpltP.phhs............................................phppslp.......pp.L...pch.ts.p.................sstp........h.G...........hllulhcl...pp...lsc....................................GhI..h..sGs.....Gh.s......hhp.......V.pachlVF............... 0 224 387 571 +4288 PF03870 RNA_pol_Rpb8 RNA polymerase Rpb8 Finn RD anon DOMO:DM07082; Family Rpb8 is a subunit common to the three yeast RNA polymerases, pol I, II and III. Rpb8 interacts with the largest subunit Rpb1, and with Rpb3 and Rpb11, two smaller subunits. 25.00 25.00 26.50 26.40 24.00 23.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.78 0.71 -3.98 7 366 2012-10-03 20:18:03 2003-04-07 12:59:11 10 4 309 91 251 321 4 131.00 41 90.57 CHANGED -DIFpVpslDP-GKKa-+VSRlpspSpshc.McLhLDINoplYPlthsDphpLslAooL.h.Dss.ssu....pasP..http.o.hsDpaEYlMYGKlY+lE..Ess..tts.+lu.sYsSFGGLLM+LpGctppLpsFclDpplYLLh++ .............................-DhFsVpslDs...pK..aDR............VSRlpspS.p.s.h.c..hplhLDlNs-lYP.lphu...........D.phplsLAoTL..p.D...G...s.s...-su...................taps........ts.pc...o....hADpa-YVM.aGKlY+.hE.......Ess.....tss..pls.sYsSFGGLLMpLpGshppL..p.s..hclDp.lYLLh++................... 0 84 143 210 +4289 PF05158 RNA_pol_Rpc34 RNA polymerase Rpc34 subunit Finn RD anon Manual Family Subunit specific to RNA Pol III, the tRNA specific polymerase. The C34 subunit of yeast RNA Pol III is part of a subcomplex of three subunits which have no counterpart in the other two nuclear RNA polymerases. This subunit interacts with TFIIIB70 and is therefore participates in Pol III recruitment [1]. 25.80 25.80 25.80 25.80 25.50 25.60 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.02 0.70 -5.25 20 415 2012-10-04 14:01:12 2003-04-07 12:59:11 7 7 320 3 292 406 4 252.70 25 92.27 CHANGED MAsss........pls-ltpcla-thhpp...sphhoQp-Lpsh..hspsshsplhsslppLl-ppLlcLlp..pssc.Lta+hlsp--ApK..........hsshus-Eu.LVYshI-uoGscGIWs+sI+s+oN...Lppshlp+s........LKoLEo+p....aIKSVKsVchPs+KhYMLasLpPSc-lTGGsWF..oD.p-LDspFIssltphlhpalspKoh.sh........................................................................tptpthshssshpuYsTstplhpalscsu..los.........VpLopcDIppLl-sLlYDG+lEplpss.....t.hpsh+sshpshhph.p.........................................s.hsssshspsPCupCPVFchC..sssusloPcsChYhccWLp ......................................................................................................................................t....................hht.h......t......thh..p.l.t.............p.t.hh.hhp..Lhp.t.hphhp.........tsst..lhaphht..pp.t.tt...............................hpsh....sspEt.llYphIcpuGppGIWp+sl+h+os...L..p.....lp+h........LKsL.Es.+.p....hIKsl+.sVp............ts..p+KhYhLhsLpPs.plTGGsWa..sD.tph-tpFlp.ltp.hhpalpppt................................................................................................................t..h.....s...o.pplhphlpp.t....lsp....................hpLs.pslppllpsLlhDspl-.hh.s...............hh...t.............................................................................................................................................................s..s.h..hp..hP..Cs..CPlhp.C..t.su.l..s..s.ppC.YhppWh...................................................................... 0 108 172 245 +4290 PF05132 RNA_pol_Rpc4 RNA polymerase III RPC4 Wood V, Finn RD anon Pfam-B_18856 (release 7.7) Family Specific subunit for Pol III, the tRNA specific polymerase. 22.10 22.10 22.60 24.40 21.30 22.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.57 0.71 -4.16 36 332 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 269 0 241 326 2 126.50 25 30.42 CHANGED .csphalhQlPshLPshhsssssp...........................................pptpcsppspppppsspptpssspt.......hppl........pGplGKlhl+KSG+lplclG.slshD.........................lshGssssFLQ-llslss..........tcps................phhsLGplptKhllTP-h ...............................p..tphhhhQhPshLP.h..stt.t.........................................................tt.tt....tt..p.t..pt..ttp.t.stpt................s.sh.pph......................spG.tlGKlhl+KSG+lplpl.........G..s.......lhhD................................................ls....hG.s...ss...sFlQ-llslst.....................cps..............................phhs...LGplpp.+.hlsoPch...................... 1 76 128 199 +4291 PF03431 RNA_replicase_B RNA replicase, beta-chain Finn RD anon Pfam-B_4422 (release 6.6) Family This family is of Leviviridae RNA replicases. The replicase is also known as RNA dependent RNA polymerase. 25.00 25.00 25.00 25.00 23.70 23.70 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -13.04 0.70 -6.30 6 321 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 25 10 0 316 0 252.20 52 80.15 CHANGED lhspsslt.slus-Lh.shs...........ShuhsuhD.ssDsFc.lsYL+sElLoKa-sauhG.-s....culAatKFLsAEpcCtlpNpchahh.sasEp..hShuEusI+hu.RphIu+LLu-.ssh.shhc+CRFSGGAopsssRpautPuhKaAt.....ptslTsRAlcYshAh+cssu...........s-s+hhclsssN+ssTVPKNuKsDRsIAhEPshNMahQhGlGuhIRcRLRphGIDLNDQTlNQcLAppGSlsssLATIDLSuASDSISlcLVc.LLPPcaachLhcLRSshGhl.DG+llpaEKlSSMGNGaTFELESLIFAAlARSlspllthcsuslulYGDDIIlPocssssLh-VhpaVGFpPNp+KTFosG.PFRESCGKHYFpGVDVTPFYI++sIssLsDLhLlLNplhRWuTlsGlhDPRsasVapKYt+hlP+hLptss...PDsYusuAhlstshhsshstp+tasRhhshlhchtR.h...............chs-hhSYha-hhuch.shhcsu...ss.hs.tps.hhh..t.hp+...h....sots.uhhcshssSc ................................................spDL..thh...h.......uhs.sphD.pscsFp.lsYL+sElhoKasshs.u.sp................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +4292 PF01876 RNase_P_p30 DUF53; RNase P subunit p30 Enright A, Ouzounis C, Bateman A, Dlakic M anon Enright A Family This protein is part of the RNase P complex that is involved in tRNA maturation [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.49 0.71 -4.97 62 477 2012-10-03 00:45:34 2003-04-07 12:59:11 11 9 410 3 326 499 10 141.70 25 49.82 CHANGED hsRlslhhsss....h.....tptlsphpp....taDllulpPtspcshphAspsh..c......lDllohshst...+hshhlc+hhhphAhc+Gl.tlElsaushl...................ps..shsRpphlssh..ppllphs+p..pslllSSuApshhplRuPhDlhsLst.hh...G.....lspscucpuls.p. .....................................hpRlslh.hsss...t.t........ttths..phhp....ta..DllAlpPts....pchhphAspsh....c.....................lDllshshst......+hs.h.h.............hc.+hh.l.ph..Alp+Gl.thElsaushl..............................ts..sttRpphls...ss............tplhphs+u...........+slllS..SsAp..p...............h..plRuPhDlhsL......ht.lh.G.....lspppuptuls........................................... 0 104 184 268 +4293 PF01900 RNase_P_Rpp14 DUF69; Rpp14/Pop5 family Enright A, Ouzounis C, Bateman A, Cerutti L, Dlakic M anon Enright A Family tRNA processing enzyme ribonuclease P (RNase P) consists of an RNA molecule associated with at least eight protein subunits, hPop1, Rpp14, Rpp20, Rpp25, Rpp29, Rpp30, Rpp38, and Rpp40 [1]. This protein is known as Pop5 in eukaryotes. 21.50 21.50 22.20 22.00 20.70 20.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.35 0.72 -4.13 65 500 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 381 7 356 486 8 109.40 22 67.85 CHANGED RYllhcllh....................................sslsp.....................c.slhptl+pulpphaG-hGsups....sstlhshahss.oth.....uIlRssRsthchlhuuLshlsplss...................ptlhlcsltlSG...TI+psccthlch ...................................................RYlhhplhh...................................................plst......................p.slhphlcpultphaG-hGsuts....................t.tl..p..l......hh....h....sstTth............sIlR.sp.+pthchlhuuLsh.lsplps..............................psshhpslplSGTl+pspphhh.......................... 0 109 192 282 +4295 PF00074 RnaseA rnaseA; Pancreatic ribonuclease Eddy SR anon Overington and HMM_iterative_training Domain Ribonucleases. Members include pancreatic RNAase A and angiogenins. Structure is an alpha+beta fold -- long curved beta sheet and three helices. 21.70 21.70 22.10 21.70 21.50 21.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.09 0.71 -3.83 108 847 2009-01-15 18:05:59 2003-04-07 12:59:11 15 3 206 473 241 891 0 119.30 33 78.61 CHANGED ppsshppFppQHls........sss..s..............hCN.phMppt.phhp.t....pCKshNTFlHps..hssVpulCsptsssCp...tppNCapSpsphplTpCplous.st......hssCpYpsoppp+h.lhVAC-sp...........lPVHhD ................................t..othppFt.pQHhs........sss..sp.................hCN.phMpp+.shppt......pCKshNTFlHps..hssVpslC.tp...ts..l.sC+.........ss.pp.NC..apSpsthplTsCclosusp......hPsCpYpss..t.p..p+.a..llVAC-ss...........lPVHhD.......................... 0 15 18 39 +4296 PF00075 RNase_H rnaseH; RnaseH; RNase H Eddy SR anon Swissprot; SCOP and HMM_iterative_training Domain RNase H digests the RNA strand of an RNA/DNA hybrid. Important enzyme in retroviral replication cycle, and often found as a domain associated with reverse transcriptases. Structure is a mixed alpha+beta fold with three a/b/a layers. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -11.40 0.71 -3.81 65 16967 2012-10-03 01:22:09 2003-04-07 12:59:11 19 181 3870 344 1941 16858 2641 121.40 50 20.38 CHANGED sps.hshaTDGSs.tp...stpu...tAGh.lh.pps...............+..pht.tsls.....tsosQp.uElhAlhtALc.s......ts........ppls.....IhTDSpYl......hp.lhp.......................................sh.p.p.tps....lhs......plhphl...hp.ppp.lhltaV.uH.pGh......s.GNchADcLAppusp ............................................................................s..t.shYs.DGAsspp..............sphG.........pA.Ga...Vs...s+G.................................+......pcs..hsLs..........cTT...NQ.+....s.ELp.A...l.hL...A....Lp...D......SG.............................scVN..........Il..T.DSQ..Ys.....lG.....IIp.........................................................uQPs....c...SE.S.........t.lVN............QIIEpL.............I+...KEc...V..Yl.uWV....PA...H..KGI...............................G.GNE....pVD+LVStGI.R.......................................................................................... 0 638 1217 1694 +4297 PF00773 RNB RNB domain Bateman A anon Pfam-B_1009 (release 2.1) Domain This domain is the catalytic domain of ribonuclease II [1]. 20.30 20.30 20.40 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.05 0.70 -5.15 86 6644 2009-01-15 18:05:59 2003-04-07 12:59:11 14 56 4239 12 2000 5433 2047 308.20 33 41.36 CHANGED RpDL..pch........hhh.oIDsts.scD....hDDAl................plcphs.sGt............................aplhVHIADVopalp.s........oslDp.cAtpRus.........................olYLssp.slsMLPppLu..p....slsSLtssp......................................c.Rhshoshhpl.spp.Gp.......l.ts...hchttulIcspt+loYcplpphlpsttpt.......................................tppLptLh....p.luptlcppRhpp.........Gulph.s.hs....-h.................................ph.l.....................sp......pspshshhhhp.....................ps.ucpllcEhMlhANpssApal.......t.pp..thsslaRspstPs.p........th.phhphh.........................ht..ttt...........htphht........psp..p.hl.......phhlh+sh...ppupY........sspsh.............HauLuh....stYo+aTSPlRRYsDLlsHR.LpthLpp ..........................................................RhDL...Rsh...hh.TID..s.ts..u+D......hD..D..A..l.........................................................................hs....c.pht....sGt............................acLhV............t...........IADVohYVp..s..................................osLDp.EAh..pRu.s.............................SVY..h.s.........s.....p...s...l.P...MLPcpLS.......s.............slCSL....p...Pp........................................-..Rhshssthpl..stp...Gp.................................l...hs......hc..hh.......pull....c.........S..p.t..+.h..oYspVpp..hl.p...s..p.pp.ttt.................................................................................................lhtpl.p..Lt....p..ltphLp.p.tR.h.p+.........................Gu.lsF..-..ps...................Eh....................................................................................+hh.l..................................sp................psc....p.lh.pp....................................Rp.uc.+llEE...hMlhAN.ssA..chl.....................p...ct........thss.laR.l...H.p.t.Ps.c...........phpphhphlt................................................hhth.ht.t..................................shpphhpt...............htsps...tth.l....................pph.hhR.sh.....ppAtY....................................................................sscsh..s.......Ha.GLuh....................phY..sHFT......SPIRR.Ys.DLllHRhl+thl..t......................................................................................................................................... 0 677 1244 1691 +4298 PF02508 Rnf-Nqr Rnf-Nqr subunit, membrane protein Mian N, Bateman A anon Pfam-B_1638 (release 5.4) Family This is a family of integral membrane proteins including Rhodobacter-specific nitrogen fixation (rnf) proteins RnfA and RnfE [1] and Na+-translocating NADH:ubiquinone oxidoreductase (Na+-NQR) subunits NqrD and NqrE. 20.70 20.70 20.90 20.80 20.60 20.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.31 0.71 -4.71 128 4543 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1639 0 935 2710 1506 193.80 38 92.91 CHANGED hp..ph...plhhsulhhsNsllsphLGlCPhLAVosplpsAlGMGlAsshVlshSshhlull+phl..........................................stlRl.saIhlIAuhVpll-hl...lcu......as..sLYpsLGIFlPLIsTNChlL........GRA-hhAp+................pshhtShlDG.lGsGlGFslsLllluulREhlutG..shh....t..................s.sh.pshslhllssG....AFhshuhllt ................................................................................................pchhplhlsslhhNN.sLlp..hLGlCP.hLA.Vo.pplp..sAlG.hGlAsThVlsloshsspLlcpal...........................shl..Rhlsal.hlIAulVph.l-hl...lct.au...sL..Y.ps..LGI...FlPLIs...TNChll........G.tA....hhsp+...................p.shhpShl..G.husGlGaslslllluulRE.hlusus..l..........................stsh...pshsl..sh..lssG........A.Fhuhuhll.h.......................... 1 309 603 792 +4299 PF03259 Robl_LC7 Roadblock/LC7 domain Mifsud W, Bateman A anon Aravind L Domain This family includes proteins that are about 100 amino acids long and have been shown to be related [3]. Members of this family of proteins are associated with both flagellar outer arm dynein and Drosophila and rat brain cytoplasmic dynein. It is proposed that roadblock/LC7 family members may modulate specific dynein functions [2]. This family also includes Swiss:Q9Y2Q5 Golgi-associated MP1 adapter protein and MglB from Myxococcus xanthus Swiss:Q50883, a protein involved in gliding motility [4]. However the family also includes members from non-motile bacteria such as Streptomyces coelicolor, suggesting that the protein may play a structural or regulatory role. 22.20 22.20 22.20 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.46 0.72 -4.42 41 1879 2012-10-02 21:07:43 2003-04-07 12:59:11 12 15 656 43 934 1705 25 90.00 22 65.84 CHANGED hs.hlhphhtpssulpsslllss-Glslst.....shssscscplAAhsuulhuhuctsspphs..ssplcphhlcscpshlhlsssus...tshL.slls ..............................h.phh.p.p.ssG.lptsllls.s..D....G..lhlss.................sh.sp.s..s.u..........-....p..........l.AA.l.s...u...ul.h...uh.ups.....su.......pphs.......t........ss....lp.phhlchcp.....u.h.lh..lhss..uc....tshL.slh.................................... 0 317 624 826 +4300 PF00480 ROK ROK family Finn RD anon Prosite Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.99 0.71 -4.45 14 15339 2012-10-02 23:34:14 2003-04-07 12:59:11 15 52 3744 43 2992 10332 2028 179.30 24 53.05 CHANGED ulDlGuT+lthslhs.suplltpcchsT.....ssspthlsslsshlpch.sphs.......ululussGhlspsp......hlshsPp..hshtc.hslhpplcpchslPVhlpNDANsuAhuEphhssupshpshlhloluTGlGuGllhss+lapG.spG.AGElG...Hhhhs.p...........t.hCsCGpp.GClEshASGpAl .......................................................................................................ulDlGuo.p.l...p.....h..u..l....h.....s................s............u........p........l....l....t......c.....p.....p......h..s.ss.............pss....p....p....h...l....p....t....l....h....p.....h..l....p....p....h.h..t..p...ht..........................ul.G.lu.hP......Gh..lc....p.p................sh.l...h...t...s..s.p.................s.a....p.......s....h....s..........l...t....p.....t....L.........p........p........p......h............s............l...........P........l......h...l.p....ND..ss..s...s..A......l...u......E.............h.............h.............h....................G.............s.............u.......p.....s.....h..........p........s.....h.l......h..l.............s.............l.............G............T...G.....l.....G.u..Gl....l...h.......s.G...c.....l.h...p..G...t......ps..tu..G.E.lG.........H.h..h..h..s....t......................................t.....C....C..Gp......h...G..ClEshhut.s................................................................. 0 1016 1988 2532 +4301 PF02027 RolB_RolC Glyco_hydro_41; RolB/RolC glucosidase family Bateman A anon Bateman A Family This family of proteins includes RolB and RolC. RolC releases cytokinins from glucoside conjugates [1]. Whereas RolB hydrolyses indole glucosides [2]. 21.30 21.30 25.80 22.50 18.50 21.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.15 0.71 -5.13 41 119 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 15 5 14 115 0 190.50 20 62.83 CHANGED hspstapsh.slphlpssp.cLctpLppAhpsapshhppslh.htpthh..h..............................t.hs.-hhh.spphhYlYssppphpphhp.s+hlspsu.spullAsslPPYppslohtthhphhNpls.........hsspps.pchsaFlAl.hPsssFhct.phplpstpst.hhhsFaspp..............................ss.hsa-.l...lAhGcshh ......................hshapsh.clphlpstt.cLptpLppAhpsacshhppslh.h.pth.....................................................h.hs.-hhh.stphlYlYssppthpphhp.p+.ls............pss.tpullAsslPPYppslohtphhphhNpls..........sst..psspchsaFlAl.hPossFhch.ph.lpstp.stphhhsaaspp...............................st.hsa-.l...lAhGpshh........................... 0 8 14 14 +4302 PF01815 Rop Rop protein Bateman A anon PSI-BLAST Domain \N 21.20 21.20 21.20 51.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.01 0.72 -4.52 12 216 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 168 35 7 146 14 57.70 73 91.23 CHANGED MTKQEpsALNMAKFI+uQo.LLLLEKLspLD...LDccAs.CE+LHEpAEpLappLusRlt.p-.p MTKQEKTALNMARFIRSQT.LTLLEKLNELD...ADEQADhCEpLH-cA-ELapSh.sRFts-sp.... 0 1 3 6 +4303 PF00980 Rota_Capsid_VP6 Rotavirus major capsid protein VP6 Finn RD, Bateman A anon Pfam-B_1047 (release 3.0) Family Rotaviruses consist of three concentric protein shells. The intermediate (middle) protein layer consists 260 trimers of VP6. VP6 in the most abundant protein in the virion. VP6 is also involved in virion assembly, and possesses the ability to interact with VP2, VP4 and VP7 [1,2]. 25.00 25.00 33.60 40.00 24.50 17.70 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.15 0.70 -5.83 3 1454 2009-09-11 12:38:02 2003-04-07 12:59:11 12 1 356 40 0 791 0 234.90 80 99.79 CHANGED MDVLYSLAKTLKDARcRIVEGTLYTNVuDIIQQsNQlIsTLNGSTFQTGGIGNLPlRNWsFDFGpLGTTLLNLDANYVENARTTIDYFIDFIDuVClDEMVRESQRNGlAPQS-oLRpLAuuKFKRINFNNSSEYIENWNLQNRRQRTGYlaHKPNIFPYsNSFTLpRSQPtHDNlMGThWLNsGSEIQIAGFDSoCAlNAPuNIQcFEHlVpLRRsLoNATloLLPsAPRlopPsVIPoADGtTTWLFNPVILRPNNVpVEFLLNGQlIssYQARaGTlsARNFDSIRISFQLlRPPNMTPuVsALFPQAuPFsHHATVGLTLRIESAoCESVLuDANEshLSIVTulRQEYAIPVGPVFPPGMNWTELLoNYSsSREDNLQRlFTVASIRSMlI ...........................................................................................................................................................................................................................................................sstTTWaFNPlIL..RPNNVEVEFLLNGQIINTYQARFGTIlARNFDTIRLSFQLMRP..P..NMTPAVsALFPQAQPFp.HHATVGLTLRIESAVCESVLADAsEThLANVTuVRQEYAIPVGPVFPPGMNWp............................................ 0 0 0 0 +4304 PF01525 Rota_NS26 Rotavirus NS26 Bateman A anon Pfam-B_762 (release 4.0) Family Gene 11 product is a non-structural phosphoprotein designated as NS26 [1]. 21.10 21.10 21.50 21.10 20.70 21.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.45 0.70 -4.55 6 556 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 255 0 2 356 2 185.60 87 99.47 CHANGED MSDFGIN...LDAICDNV++spSsScTuSQlSNRSSR+MDFV..D-EELSTYFNSKu..SVTQSDSCSNDLssKaSIIoEAVlCDESAHVSADAlQEK-Eo....lsQlDaslMKWMhDS.DGIslNGGlNFo+uKSKsp....................cpEschT-.KScTNl.sasSlsIsSslGtFNPIppplKhEAls-hFEDEDs-sChC+NCPY+EKYhKLRpRMKsVLlDhIsEM ...........MS.LSIDVTSLPSIS.SSIFKNESSSTTSTLSGKSIGRSEQYISPDAEAF.NKYMLSKSPEDI.GPSDSASNDPLTSFSIRSNAVKTNADAGVSMDSSTQSRPSSNVGCDQlDFSLsK.......GIsVsANLDSCI.SISTsp....................KKEKSKKD.KSRKH...YPRIEADSDSEDY..............VLDDSDSDDGKCKNCKYKKKYFALRMRMKpVAMQLIEDL................... 0 0 0 1 +4305 PF02509 Rota_NS35 Rotavirus non-structural protein 35 Mian N, Bateman A anon Pfam-B_1677 (release 5.4) Family Rotavirus non-structural protein 35 (NS35) is a basic protein which possesses RNA-binding activity and is essential for genome replication [1]. 25.00 25.00 120.20 120.00 18.80 18.60 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.02 0.70 -5.71 3 493 2009-09-11 00:14:00 2003-04-07 12:59:11 9 1 240 7 0 353 0 301.10 90 99.67 CHANGED MAELACFCYPpL-cDus+ahPaN+pAIKCMLoAKVDKccpSpaYDTIlYGlAPPPpFKKRFNTs-NSRGMNYETDMYsKVAsLLs-lLNuIKlsp-K.sA-IlusVISVRHLENLlLRIEN+DDILScsscLllKSVLIAhGLlKEsETTsTAEGGEIVFQNuuFTMWKLDYpSH+LMPIhDsNFlEYKITlNccsPIsD+cV+ELlAELRWQYNKFAVITHGKGHYRVV+YSoVANHADRVYuTFKSspK+GssacFN-LDpRlIWsNWtAFluSMpsGsTLEluK+LLFoKMK.sSsoFKGlTT-RKhDEVShlG ...MAELACFCYPHLENDSYKFIPFNsLAIKCMLTAKVDKKDQDKFYNSIVYGIAPPPQF+KRYNTsDNSRGMNYETsMFNKVAlLICEALN.SI.KVTQS-.VANVLSRVVSVRHLENLVLRKENHQDVLFHSKELLLKSVLIAIGQSKEIETTATAEGGEIVFQNAAFTMWKLTYLDHKLMPILDQNFIEYKITLNEDKPISDlpVKELlAELRWQYNRFAVITHGKGHYRVVKYSSVANHADRVFATYKNNsKSGNshDFNLLDQRIIWQNWYAFTSSMKQGNTlDVCKKLLFQKMKQEKNPFKGLSTDRKMDEVSHVG.... 0 0 0 0 +4306 PF00981 Rota_NS53 Rotavirus RNA-binding Protein 53 (NS53) Finn RD, Bateman A anon Pfam-B_1048 (release 3.0) Family This protein is also known as NSP1. NS53 is encoded by gene 5. It is made in low levels in the infected cells and is a component of early replication. The protein is known to accumulate on the cytoskeleton of the infected cell. NS53 is an RNA binding protein that contains a characteristic cysteine rich region [1]. 22.10 22.10 24.10 24.10 19.60 19.00 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.89 0.70 -5.84 14 573 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 251 0 0 494 0 455.80 70 99.18 CHANGED MATFKDACaHY++lsKLNptlLKLGANssWRPuPssKhKGWCLDCCQaT-LTYCpGCoLaHVCQWCsQYsRCFLDsEPHLLRMRTF+ssITKEDLpsLIsMYshLFPINc+IVsKFhsslKQ+KCRNEahhpWYNHLLhPITLQALslcl-.sclYYIFGYYDsMsptNQTPFpFVNhIshYD+LLLDDlNFcRMuhLPssLQppYAhRYFSKSRFlSpph+plphSDFScphlp.spcsPspthplhRNsop.......hcWNcpCc....LlpstpsYhshhpTSapEpYsVSspshhasptKLphlS+hhKPNYlsSNHtpsAocV+sCKWCslsssapsWpDFRl+clYNslhsFIRALhKSNsNVGHCSSpEplYthl+slhhhspscpaspolpplFshL-PV-lssscYlLhsaplsa-lhsllhpslpsc.lPpILols-htsIlpuII.cWFDlchMRchPhsstoTscL+cLpccscLs-EYDhhlSDsE ....MATFKDACYaYKRINKLN+sVLKLGVNDTWRPSPPTK..YKGWCLDCCQHTDLTYCRGCTMYHVCQWCSQYsRCFL..D..s-PHLLRMRTFKNEVTKsDLhNLIDMYshL....F....PINp+IVsKFIssTRQHKCRNEChsQWYNHLLMPITLQSLSIELD.GDVYYlFGYYDsMpslNQTPFSFsNLlDhYDKLLLDsVNFsRMSFLPssLQQEYALRYFSKSRFISEp.RKClsD.HFStNVlE.NLHNPSFKlQITRNCSE.....hSs-WNtACK....LVKshssYFslLKTSHlEFYSlSTRCRhFTQaKLKlASKhIKPNYlTSNH+TsATEVHNCKWCSINNuYpVWNDFRlKKIYDNIFNFLRALVKSNsNlGHCSSQEKIYEalcDVLsVCD-E+WKhuVscIFNCLEPVELssVcYVLFNHElNWDVINlLVQSl.GK.VPQILTLsDllhIhpSIIYEWFDIRYMRNTPMTTFTlDKLRpLpTtsKTV.-YDSGISDVE................................................................................ 0 0 0 0 +4307 PF04866 Rota_NS6 Rotavirus non-structural protein 6 Mifsud W anon Pfam-B_4831 (release 7.6) Family \N 25.00 25.00 45.80 45.80 19.10 17.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.30 0.72 -3.98 12 253 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 198 0 0 89 0 91.00 90 99.27 CHANGED MN+L.QRQLFLENLLVGVNSTFHQMQKHSINTCCRSLQRILDHLILLQTIHSPAFRLDRMQLRQMQMLACLWIHQ+NHDLQsTLGAIKWISP .MNRLLQRQLFLENLLVGVNShFHQMQKHSINTCCRSLQRILDHLILLQTIHSPAFRLDRMQLRQMQhLACLWIHpHNHDLQATLsAIpWISP..... 0 0 0 0 +4308 PF01665 Rota_NSP3 Rotavirus non-structural protein NSP3 Bashton M, Bateman A anon Pfam-B_1010 (release 4.1) Family This family consist of rotaviral non-structural RNA binding protein 34 (NS34 or NSP3). The NSP3 protein has been shown to bind viral RNA. The NSP3 protein consists of 3 conserved functional domains; a basic region which binds ssRNA, a region containing heptapeptide repeats mediating oligomerisation and a leucine zipper motif [2]. NSP3 may play a central role in replication and assembly of genomic RNA structures [2]. Rotaviruses have a dsRNA genome and are a major cause cause of acute gastroenteritis in the young of many species [1]. The rotavirus non-structural protein NSP3 is a sequence-specific RNA binding protein that binds the nonpolyadenylated 3' end of the rotavirus mRNAs. NSP3 also interacts with the translation initiation factor eIF4GI and competes with the poly(A) binding protein [3]. 25.00 25.00 53.30 53.10 24.50 24.10 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.84 0.70 -5.45 6 508 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 241 10 0 389 0 266.80 87 89.08 CHANGED EoTQthspSIlNuuF-AAlsussSsL-phGIpYDas-.VhuRl+sKachlhDDSGVpNNhIGKAtTIDQALssKhuSAtRNcNWhTsspTlARLDEDVN+LRhMLSuKGIDQKMRVLNuCFSVpR.PGKSSSII.......................pCT+LhK-KIERGE..lEV-Dphh-.+M-lDs...IDWKu+a-phcp+FpShtthV...........sEKYssWV.....hKA+KlsEsMhsLQ.sIupQQs+IsE...hphaNpKLp.+s..lps+.sShIuulEWhhpShph-D-l+sDhpQphNolsVINshpulD......DlE .........ESTQQMVSSIINTSFEAAVVAATSTLELMGIQYDYNE.VFTRVKSKFDYVMDDSGVKNNLLGKAlTIDQALNGKFGSAIRNRNWMTDSKTVAKLDEDVNKLRMhLSSKGIDQKMRVLNACFSVKRIPGKSSSII.......................KCTRLMKDKIERGE..VEVDD.SaVDEKMEIDT...IDWKSRYDQLEKRFESLKQRV...........NEKYNoWV.....QKAKKVNENMYSLQNVISQQQNQIAD...LQQYCNKLE.sD..LQuKhSSLVSSVEWYLRSMELsDDVKsDIEQQLNSIDlINPINAIDDlE............................. 0 0 0 0 +4309 PF01452 Rota_NSP4 Rotavirus non structural protein Bateman A anon Prodom_2202 (release 99.1) Family This protein has been called NSP4, NSP5, NS28, and NCVP5. The final steps in the assembly of rotavirus occur in the lumen of the endoplasmic reticulum (ER). Targeting of the immature inner capsid particle (ICP) to this compartment is mediated by the cytoplasmic tail of NSP4, located in the ER membrane. 25.00 25.00 98.70 98.50 24.80 24.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.11 0.71 -4.93 5 1516 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 459 20 0 1040 0 169.00 81 99.03 CHANGED MEKLTDLNYTLuVITlMNsTL....HNIIp-PGMsYFPYIASVLTVLFThHKASlPTMKlAh+TSpCSYKVIKhVVVTIFNTLLRLuGYK-plToKDElEpQhsRIVKElRcQLcMIEKLTTREIEQVELLKRIYDhLhspsssEIDMSKETN+KsaKTLc-Wu.sKcPY-PT-VlA.s ......M-KLsDLNYTLSVITLMNDTL....HoIIpDPGMAYF...PYIASVLTVLFTLHKASIPTMKIALKTSKCSYKVIKYCIVTIlNTLLKLAGYKEQVTTKDEIEQQMDRIVKEMRRQLEMIDKLTTREIEQVELLKRIHDpLIsRPlDhIDMoKEFNQKNlKTLDEWE.SGKNPYEPpEVTAS.. 0 0 0 0 +4310 PF05087 Rota_VP2 Rotavirus VP2 protein Moxon SJ anon Pfam-B_6280 (release 7.7) Family Rotavirus particles consist of three concentric proteinaceous capsid layers. The innermost capsid (core) is made of VP2. The genomic RNA and the two minor proteins VP1 and VP3 are encapsidated within this layer [1]. The N-terminus of rotavirus VP2 is necessary for the encapsidation of VP1 and VP3 [2]. 25.00 25.00 63.80 63.70 17.90 17.40 hmmbuild -o /dev/null HMM SEED 887 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.69 0.70 -13.64 0.70 -6.62 3 539 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 239 6 0 403 0 737.00 88 99.98 CHANGED MAYRN...+RcpNopppDscpEK-sEpQ-..........-K-+pELKEKVhDKK-sVlTD..Ds.pDhK-psss-NLKlsDpVKcSpKE-SKQLlEVLKTKcEHcKEIQYEILQKTIPSFpPcEoILKKLcDIKP-.AKKpsKLFRLFEPKQLPIYRANGEKELRNRWYWKLKKDDLP-GDYDVREYFLsLYsQVL-EMPDYlLLKDMAVENKNSRDAGKVVDSETApICDsIFQDEETEGsVRRFIADMRQRVsAERNTVcYPAILHPIDYEFNcYFLpHQLIEPLTN-lIaNYIPERLRNDPNYILNMDsNLPoTARYIRPsLLQDRLNLHDNFESIWDTlT+ANYVLARSVVPDLKELVSTEAQIQKMSQDLQLEALTIQSETQFLTGINSQAANDAFKTIIAsMLSQRTISL-FVTSNYMSLISuMWLMTIVPo-MFIRESLVACQLAVINTIIYPAFGLQ+MHYpNGD.RRPFpIAEQQIsNFQVpNWLHFVNsNQFsQVVIDGVlNQsLNDsIRsG+IINQLMEALssLSRQsFsTYPlDYKRSVQRGILLLSNRlGQLVDLTRLlsYNYETLMACITMNMQpVQTLTTE+LQLTSVTSLCMLIGNsTVIPEPpTLFHYYsoNVNFHoNYNERINDAVAIIsAANRLDLYQKKMKuIVEDFLKRLHIFDVsKVPDDQMYRLRDRLRpLPVERRRlDVFsIILNNMDQIERASDKIAQGVIIAYR-MpL-YDEhYGaVNlARDlNGFQQINLEELMRTGDYuQITNhLLNNQPVALVGAIPFVTDSSVISLIAKlDATVFAQIVKpRKVDTLKPILFKINSDSNDFYLVsNYcWVPTSTTKVYKQVPQQFDFRsSMHMLoSNLTFTVYsDLLsFVsADTVEPINAVAFDNsRIMQEL ...................................MAYRKRGs+..REs...Q.Q..NERLQE.KElEps..sDs.M............p.psNN+.K.QQL.SDKVLSQKEEIITD..............sQDDlKIADEVKKSS.KEESKQLLEILKTKEDHQKElQYEILQKTIPTFEPKESILKKLEDI+PEQAKKQhKLFRIFEPRQLPIYRANGEKELRNRWYWKLKKDTLPDGDYDVREYFLNLYDQILIEMPDYLLLKDMAVE..NKNSRDAGKVVDSETAsICDAIFQDEETEGslRRFIA-MRQpVQADRNlVNYPSILHPIDaAFNEYFLpHQLVEPLNN-IIFNYIPERIRNDVNYILNMDhNLPSTARYIRPNLLQDRLNLHDNFESLWDTITTSNYILARSVVPDL..K..E..LVSTEAQIQKMSQDLQLEALTIQSETQFLsGINSQAANDCFKTLIAAMLSQRTMSLDFVTTNYMSLISGMWLLTVlPNDMFlRESLVACpLAIlNTIVYPAFGMQRMHYRNGDPQTPFQIAEQQIQNFQVAN.WLHFlNNNpFRQVVIDGVLNQsLNDNIRNGpVINQLMEALMQLSRQQFPTMPVDYKRSIQRGILLLSNRLGQLVDLTRLLuYNYETLMACITMNMQHVQTLTTE+LQLTSVTSLCMLIGNsTVIPSPQTLFHYYNVNVNFHSNYNERINDAVAIITAANRLNLYQKKMKuIVEDFLKRLpIFDVsRVPDDQMYRLRDRLRLLPVEhRRLDIFNLILMNM-QIERASDKIAQGVIIAYRDMQLERDEMYGaVNIARNLDGaQQINLEELMRTGDYuQITNMLLNNQPVALVGALPFlTDSSVISLIAKLDATVFAQIVKLRKVDTLKPILYKINSDSNDFYLVANYDWlPTSTTKVYKQVPQ.FDFRsSMHMLTSNLTFTVYSDLLuFVSADTVEPINAlAFDNMRIMNEL... 0 0 0 0 +4311 PF00639 Rotamase PPIC-type PPIASE domain Bateman A anon Prosite Domain Rotamases increase the rate of protein folding by catalysing the interconversion of cis-proline and trans-proline. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -10.28 0.72 -3.25 87 5141 2012-10-02 13:30:10 2003-04-07 12:59:11 16 48 2764 71 1422 7342 3338 93.40 30 34.16 CHANGED HILltst..................tp.tts....cppupplhpplpsGt.....s.FsplApphS.Dsso...uppGGcLG..h...hsts.......phssp.Fpcslhshp...Gp.lS.tPl.....co..phGaHllcltc .................................................................................................HILlp....................................t.tts.......cpphpplh....ppl...psGt........................s.Fuc.lA+..pa..S....-...s.s.S........up..p..G.....G-......LG......h...........h.p..s.......................ph.s..st....F....c.....c....s.....l..h.s..hp........h.....Gp.....lo..sP..l...............+o....p.h.G..aHllcl..t.............................................................. 0 466 906 1210 +4312 PF03428 RP-C Replication protein C N-terminal domain Finn RD, Bateman A anon Pfam-B_4463 (release 6.6) Domain Replication protein C is involved in the early stages of viral DNA replication. 22.90 22.90 23.00 23.60 22.60 22.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.85 0.71 -4.82 52 484 2012-10-04 14:01:12 2003-04-07 12:59:11 8 4 191 0 102 471 13 159.10 41 44.88 CHANGED RshThuhltuQhtuppht.ssts.lsKWclaRplscA+shlGlsDRuLAVLsALLSFaPcscL..spcssLlVFPSNtQLuhRApGMusuTLRRHLAsLV-AGLIhR+DSPNGKRYAR+spsGplppAaGFDLuPLluRApElpphAppltA-+pth+th+EclTltRRDltKLIpsuh-E ..............................................t.ht.t..st...t........tt..ssKWplhc.lsc.A+shlGls.cRuLsVLsALLoFaPcs..pl.......s............tp.s...........s..l...l.......VFPSNtQLuhRspGhussTLRRHLAhLV-AGLIhR+D.SPNGKRYARRs..p.s.GpltpAFGFsLuPLlsRut....E.lcthAppltsc+tth+phREplTlhRRDItKLIphuh........................... 0 12 46 66 +4313 PF03055 RPE65 Retinal pigment epithelial membrane protein Griffiths-Jones SR anon Pfam-B_947 (release 6.4) Family This family represents a retinal pigment epithelial membrane receptor which is abundantly expressed in retinal pigment epithelium, and binds plasma retinal binding protein. The family also includes the sequence related neoxanthin cleavage enzyme in plants and lignostilbene-alpha,beta-dioxygenase in bacteria. 17.40 17.40 17.90 18.30 16.40 17.30 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.91 0.70 -5.94 151 2113 2009-09-10 15:57:50 2003-04-07 12:59:11 10 23 714 11 903 1992 868 396.50 24 89.26 CHANGED pusassstpEh.sst....thpVp.GpIPspL.sGshhRNG..Ps.h...............ts....hthpHhFDGDGMlpuhpht.......sG...c....sp.apsRalcTptaptEp..psu...c.hlh.u.ht..............................sshps.............................hpssANTsllha...sG..............................+lLAh.hEsu.hPapl-s....toL-TlGh...hs.hs........................stlt.............tshoAHP+hD.PtTG-......lhsFuh......p..s.t.....tshlphhpl........sssG.............t.hhtp.hsh.shs..t..sshhHDFulTcsYslhhp..Plp...hs..thh...hut.......................................pshtacsppssp.htllsR...c....st..........ptl...............phacs.sss.asaHhhNAa....E..t.ts..............cllh-ssph...ssss.hh.............................................tt...t.hp......................tupLtRaplshp...s...............................................................................s.......ps.pp....phls.....pt...sEFPpl...........s.sphsGpp.hRasY.....................................shpsps......................hsulsKh............Dh....ps.......................sp....tp................................haphs.....spasuEPlFVP....ps..............su..........t......tEDDGallshVhct...pp............t...pSp..LlllDAp..shsp..sl..Acl.pLPt.+l..PhGhHGsalss .........................................................................................................................................................................................tp.............h..l.t..Gp.lP............t.l.pG..hhRsG..s.s.h................................th.HhF......D....G.tu..............hlpthphp..........................................sG.......p.sp..atsRalpopt.hhttt...t.s.........hh.t.ht.........................................................................ht...sssNsslhhh.......ss..............................................chhuh...Ess.....ht...lp...........tsLpThuh..........hs..hp..............................................................s.lt...................shsAHP+.h....D..s....sGp.............................hhshsh.....s...................................shhphhhh......s.ps..................................t..hh.p..h.h....h...t...ssh.hHs....au.l..Tcpa..hl.h.p...slhh.p...thh......ts.............................................ht..ap.....ph...sp.hh....lls+......t...........st.........h............................................th...hcs.......sh....hh.hHhh.........NAa.....E.....ctt..................................pl.lh...hsth...ts.....h...........................................................................................................s.lhchhlsht....s............................................................................................s.......th..pt.....p.lt...........pt...shE.hspl........s....ph..hGp.p.h+asY........................................hht..................................sslh.Kh...............Dh...........s................tp........hp..........................................................ha..s.....s.th...suEPhFVP...ps....................................................su.......t.......tEDDG..hlls.hlhst.......tp...................t..p.u..lhll..DAp.....sh.......pt............lAph..pl..P...pl..PhGh..HGhah..t................................................................................ 0 234 534 747 +4314 PF02318 FYVE_2 RPH3A_effector; RPH3A_effect_N; FYVE-type zinc finger Mian N, Bateman A, Eberhardt R anon Jackhmmer:Q13875 Family This FYVE-type zinc finger is found at the N-terminus of effector proteins including rabphilin-3A [1] and regulating synaptic membrane exocytosis protein 2 [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.06 0.71 -4.20 21 739 2012-10-03 17:27:21 2003-04-07 12:59:11 11 21 94 8 344 696 0 106.60 29 14.78 CHANGED SpLTD-EAcHVhpVlQRDhcLRKKEE-RLucLKp+lpcEso++plLusptphs-opCl+ChpPFpFLlNoK..RQClDCchhlCKs.Cupa........sKpEpGWlCcsCphuRll.KhtSLEWaYcsV ..............................................LpctEtchl.pVltRs.phc.phEp.......c.......R..l..t..c.Lpp..c.l.......E.p.h.+.+...th..h............u...p...........t..........p.................s......p...p.....p.........Ch....hCtp...........h....s..hl.hsss........p.CpsCphplC.pp..Cssh............................pppcthWlCslCpc.p..ppl.hhtoGtWFap....................... 0 57 84 185 +4315 PF04390 LptE DUF532; RplB; Lipopolysaccharide-assembly Kerrison ND anon COG2980 Family LptE (formerly known as RplB) is involved in lipopolysaccharide-assembly on the outer membrane of Gram-negative organisms. The lipopolysaccharide component of the outer bacterial membrane is transported from its source of origin to the outer membrane by a set of proteins constituting a transport machinery that is made up of LptA, LptB, LptC, LptD, LptE. LptD appears to be anchored in the outer membrane, and LptE forms a complex with it. This part of the machinery complex is involved in the assembly of lipopolysaccharide in the outer leaflet of the outer membrane [1]. 23.30 23.30 23.40 23.60 23.00 23.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.01 0.71 -4.48 162 2176 2009-11-10 14:17:04 2003-04-07 12:59:11 7 3 2150 4 501 1407 1106 153.80 20 84.72 CHANGED hhhhslhlu...uC...Gap.hssh.sh...................shthpslhlpss......pst..s.lpptlpcpLttsssthhsst...............hhLplphp.shsppshshs...tsu......ps..scaplshslpaplt...phsss.....phl......shs...sppsas..hs........ss..sl......upptpcpt..hhcchtpphAppllpclst ................................................h.hhhslllu....uC.....G.a+hpsss.th.........................s.phps.hhlsss...........cs.......s.lsctl....cppL...p....ts.s.sphlspp.pt.................spLclttsshsppshol.......psG....................ps...sEa..plhhslshplh......hssp......chh..........sho........sh+sa......s.............sp....sl..........Aps..sp....pch.....lh..p.ch.tccsucQllppl.................................................... 0 147 308 412 +4317 PF04032 Rpr2 DUF363; RNAse P Rpr2/Rpp21/SNM1 subunit domain Kerrison ND, Finn RD, Mistry J, Wood V anon COG2023 Family This family contains a ribonuclease P subunit of humans and yeast. Other members of the family include the probable archaeal homologues. This family includes SNM1 [2]. It is a subunit of RNase MRP (mitochondrial RNA processing), a ribonucleoprotein endoribonuclease that has roles in both mitochondrial DNA replication and nuclear 5.8S rRNA processing. SNM1 is an RNA binding protein that binds the MRP RNA specifically [2]. This subunit possibly binds the precursor tRNA [1]. 22.60 22.60 22.70 22.70 22.50 22.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.31 0.72 -4.11 86 508 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 375 5 347 512 76 95.70 24 51.13 CHANGED p.RlpaLhphAp............................................................................hst.sspl...u+pYlphhcplup+tplc.ls.p..h...KRphC+cCpshLlPGhssclRlp............................................................psh...llh......pChpCup ...................................................................................................................RlpaLhphAp............................................................................h....t.t..ts.s...pl....uchYsphhcplu..p....+tpl...+...ls.p........h...K.Rp....hC+..pCsshLlPGh.s.s.p.l+lc.............................................................................................................t..tph.......lsh........pChpCs...................................................................... 0 97 190 288 +4318 PF01765 RRF Ribosome recycling factor Bashton M, Bateman A anon Pfam-B_949 (release 4.2) Domain The ribosome recycling factor (RRF / ribosome release factor) dissociates the ribosome from the mRNA after termination of translation, and is essential bacterial growth [1]. Thus ribosomes are "recycled" and ready for another round of protein synthesis. 22.80 22.80 23.00 25.70 21.70 22.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.75 0.71 -4.78 133 4805 2009-09-17 04:39:55 2003-04-07 12:59:11 14 8 4668 22 1216 2907 2488 163.70 46 86.18 CHANGED shcp-lsplRT.GRAssulLDslpV-hY...G.s.s....PLsplAslols-sRplhIpPaD.pshlpsIE+AIhs.u....sL.GlsP.p.sDG...phIRlslPsLTEERR+-llKts+.chuEcuK......lulRNlRRDu.cpl.Kchpcpt...loED-h+chpcclQKlTDcalpclDchhppKEKElh ........................................hcccluplRo...G.R.As.s.ulLDtlpV-Y..Y..................G..ssT...........PLsQlAslo..l...s..-uRsLhIs........PaD..+.............o..hltslEKAIhs.S.DL..GlNP....s..s-G..................slIRlslPs.L...TEER.RK-Ls...K.l+.phuEpAK..........VAlRNlRR.DApDpl.KK..hpKct-..IoED-h+c..tp.c.-lQKlTDphlcclDphlssKEpElh................................... 0 421 787 1030 +4319 PF00076 RRM_1 rrm; RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) Eddy SR, Birney E anon Published_alignment Domain The RRM motif is probably diagnostic of an RNA binding protein. RRMs are found in a variety of RNA binding proteins, including various hnRNP proteins, proteins implicated in regulation of alternative splicing, and protein components of snRNPs. The motif also appears in a few single stranded DNA binding proteins. The RRM structure consists of four strands and two helices arranged in an alpha/beta sandwich, with a third helix present during RNA binding in some cases The C-terminal beta strand (4th strand) and final helix are hard to align and have been omitted in the SEED alignment The LA proteins (Swiss:P05455) have an N terminal rrm which is included in the seed. There is a second region towards the C terminus that has some features characteristic of a rrm but does not appear to have the important structural core of a rrm. The LA proteins (Swiss:P05455) are one of the main autoantigens in Systemic lupus erythematosus (SLE), an autoimmune disease. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.06 0.72 -4.34 79 50512 2012-10-02 20:46:34 2003-04-07 12:59:11 17 788 1401 465 29915 55800 1095 67.70 24 23.75 CHANGED laVssL..s.ssoccpLcphFpp.aGtl.hshplhtc.....ptpscGauFVpFpspcsAppAlpthsu.pplpu+plp .................................................laVss.L........s..............p.....s.....s........c....p........p..........L......c........p......h....F.........p........p....a...........G..........p.............l.....t...........p..............s..........c................l............h............p...........c...................s.............s........p...........s............+...........G...............a...............u........F.......V...........p.......F........p...........s........t.......p.......s.......A...p..p.......A....l..p..t...h..ss..t.l.tsp.h................................................ 0 9087 14852 22632 +4320 PF00398 RrnaAD Ribosomal RNA adenine dimethylase Finn RD anon Prosite Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.48 0.70 -5.36 20 6621 2012-10-10 17:06:42 2003-04-07 12:59:11 15 22 4983 44 1737 10162 4608 241.80 28 87.89 CHANGED tsct+pphGQNFLpsppllpcIlcpsslp......pspsVlElGsGpGtlTptL.schs..cpVsulElDs+LschLpccht....psslpl.lppDhhpashPp........................p.atlsuNlPY.sIoosIlcpllp.......ttph..psshlllppphA++hhut.s..hhspLolhhpshs-hphltclspp..................................hFcPsPpVDS...ullclcR+spshh.s.tshppacshlcphhstcspsLtsslpphhstpphpthhc...tlspsshlsthshsphhshaphhsp .................................................................h....pKphGQ.N.FLh.D...p..l...l..........p...p.I.l.p.sh..s..hp..................ps.s..s..l.lEI..GP...GhGu...L..T.p...L....hc..p.s............pp...lh....u...l.....E....l..D....p.......c.......L.....h.......s..h..L...p...c.pht...................h..s..p.....l...p..l...l..p...t...D....s...L...c...h..s..h...sp.......................................................................................................................tt.h...p..l.l...u...N.L......P......Y....p.....I..o....o..P....l..l.h..+l.lp...............................th......h.p.s...h.hlM...h.Q+..E.V......u..cR..l..s..A...t.P..s...s......+..t.Yup.Lolhh.p.h.h.s............p.s.ph.lh.pVs.p....................................................................sFhPs...P.pV-S..................ull...+..........lh.........+.....t.............p...................................................s.........p.........s.......p.h.h.......................p.....ls...ptsFs..p.RRKs.....l....tss...L.......t.......t.......h.....h.....s.....t.....t...........h.....t.....t.....................................t......lt.....p.......p..t..thh........t..................................................................................................................................................... 0 583 1087 1443 +4321 PF04353 Rsd_AlgQ Regulator of RNA polymerase sigma(70) subunit, Rsd/AlgQ Mifsud W anon COG3160 Family This family includes bacterial transcriptional regulators that are thought to act through an interaction with the conserved region 4 of the sigma(70) subunit of RNA polymerase. The Pseudomonas aeruginosa homologue, AlgQ, positively regulates virulence gene expression and is associated with the mucoid phenotype observed in Pseudomonas aeruginosa isolates from cystic fibrosis patients. 28.00 28.00 29.50 56.20 27.60 27.30 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.85 0.71 -4.51 45 788 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 784 1 95 305 19 150.50 57 94.34 CHANGED MLpphcpApE+WGGspclID+WLppRQpLlVpYspLsultsh....tsspphss.pplpsFCphLVDYlSuGHFEIY-plhpcscthupp.uhclApplaP+IptoT-ssLsFND+Ysp.tpts..pltslsp-LSpLGEhLEpRF-LEDpLIchLass ....................MLNQL-NLTERVtGSNKLVDRWLcsRKHLLVAYasLVGIKPu....KEuahpLNEKALDcFCQsLV.DYLSAGHFsIYERIl+KlEGsG....pLscAuKIaPpLEsNTQpIMDaaDsslE.sAIDcDNhLEFQQsLSDIGEALEARFsLEDKLIhLl....t... 0 13 31 65 +4322 PF03873 RseA_C Anti sigma-E protein RseA, C-terminal domain Finn RD anon DOMO:DM07070; Domain Sigma-E is important for the induction of proteins involved in heat shock response. RseA binds sigma-E via its N-terminal domain, sequestering sigma-E and preventing transcription from heat-shock promoters [1]. The C-terminal domain is located in the periplasm, and may interact with other protein that signal periplasmic stress. 21.50 21.50 21.80 21.90 20.60 18.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.61 0.72 -4.08 40 851 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 844 4 108 351 9 55.60 58 26.46 CHANGED sphPVLpTlPhsGsusPVSh.........stst.......ppssspp.QlpcQc+RIsAh..LQ-apLQpRL ................PEoPVFN..TLPhM.G.KASPVSL..........GVPSt....ssssu...pQp..QV.QEQRRRINAM..LQDYELQRRL........ 0 10 33 73 +4323 PF03872 RseA_N Anti sigma-E protein RseA, N-terminal domain Finn RD anon DOMO:DM07070; Domain Sigma-E is important for the induction of proteins involved in heat shock response. RseA binds sigma-E via its N-terminal domain, sequestering sigma-E and preventing transcription from heat-shock promoters [1]. The C-terminal domain is located in the periplasm, and may interact with other protein that signal periplasmic stress. 23.20 23.20 23.20 23.20 22.70 23.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.01 0.72 -3.68 35 1145 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1114 6 217 651 141 93.30 42 44.32 CHANGED Mtc..EpLSALMDGEts-p...pllptLspDp-hppoWpsYHLItDslRu-ss....tshphDluu+VusuL-sEPsh.s.............................ppPtPppsc+ts ...........Mt+..EpLSALMDGEslDs..............-LLs..tL..s..cs.s....E.hpc..TWcsYHLIRDsMRG-ss.........pslchDI.o..uRVhA...Al.EpEPs+ts.........................hhscuQPtPpphpp............................................................................. 0 35 97 162 +4324 PF04246 RseC_MucC Positive regulator of sigma(E), RseC/MucC Mifsud W anon COG3086 Family This bacterial family of integral membrane proteins represents a positive regulator of the sigma(E) transcription factor, namely RseC/MucC. The sigma(E) transcription factor is up-regulated by cell envelope protein misfolding, and regulates the expression of genes that are collectively termed ECF (devoted to Extra-Cellular Functions) [1]. In Pseudomonas aeruginosa, de-repression of sigma(E) is associated with the alginate-overproducing phenotype characteristic of chronic respiratory tract colonisation in cystic fibrosis patients. The mechanism by which RseC/MucC positively regulates the sigma(E) transcription factor is unknown. RseC is also thought to have a role in thiamine biosynthesis in Salmonella typhimurium [2]. In addition, this family also includes an N-terminal part of RnfF, a Rhodobacter capsulatus protein, of unknown function, that is essential for nitrogen fixation. This protein also contains an ApbE domain Pfam:PF02424, which is itself involved in thiamine biosynthesis. 23.20 23.20 23.20 23.40 23.10 23.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.67 0.71 -4.59 118 1246 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1191 0 247 726 78 133.00 33 87.61 CHANGED Vlslcss......hshVc.spppou..Cu.uCs.spsuCGtthlsphhss...ps.tphpl.......tsshshcsG-pVplulsEsslLpuuhllYllPLlsllhuuhluphlhts........-hhshlsulhuhshuahhs+hh...scphtp......p.thpPh..llph.h ............................VluhpsG....pAhVp.s.phcuu..Cu..oCu..u+.....suCGothlschssp........ps..pplhl........sssp.sLtsGpcVElGlsEs.oLLp.SAhLVYhhPLluLhlsAsLh.phLhss..........-lhulhuu.llGshsGFlls+th...u+.+.hst......cspaQPllLpl.h............................ 0 81 160 208 +4325 PF03113 RSV_NS2 Respiratory synctial virus non-structural protein NS2 Mifsud W anon Pfam-B_2717 (release 6.5) Family The molecular structure and function of the NS2 protein is not known. However, mutants lacking the NS2 grow at slower rates when compared to the wild-type. Nevertheless, NS2 is not essential for viral replication [1]. 21.00 21.00 220.50 220.30 18.50 18.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.72 0.71 -4.17 2 45 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 12 0 0 37 1 123.40 90 100.00 CHANGED MsTspsc.TsQRLhlsDM+PLSlET.IhSLT+-IITHpFIYLINHECIVRKLsEpQATFTFLVNYEMKLLHKVGSTKYp+YTEYNoKYGTFPMPIFINHsGFLECIGIKPT+pTPlIYKYDLNP MsTTpssTTsQRLMITDMRPLSlETIITSLT+DIITH+FIYLINHECIVRKLDERQATFTFLVNYEMKLLHKVGSTKYKKYTEYNTKYGTFPMPIFINHsGFLECIGIKPTKHTPIIYKYDLNP 0 0 0 0 +4326 PF04479 RTA1 RTA1 like protein Kerrison ND anon DOMO:DM04303; Family This family is comprised of fungal proteins with multiple transmembrane regions. RTA1 (Swiss:P53047) is involved in resistance to 7-aminocholesterol [1], while RTM1 (Swiss:P40113) confers resistance to an an unknown toxic chemical in molasses [2]. These proteins may bind to the toxic substance, and thus prevent toxicity. They are not thought to be involved in the efflux of xenobiotics [1]. 20.60 20.60 21.00 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.42 0.70 -4.87 6 1213 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 150 0 974 1227 0 215.40 25 69.27 CHANGED IPFlhGsIhEhVGalsRshSSpNsschssallQsVhLLIAPsLYAAoIYMlFu+llclhtscshhLhSu+FsTshFVsuDllShlLQAsGGGLhuss....sS....poTGSpLlhAGLhIQlhhauhFlIsphpFhaplttpshahcc..h............................stpW..hahNhsLhluohLIhlRSIVRlVEFlpGa-GaIIoHEaalYlFDulPMhLssllFlls.hhtNlFchpscs.slp. ................................................hhhG..sh...hEhlGahuRhh.s.....t.....p...s.......s.........s....h...s......s.......a.l.h...Q.h..lhlllA...P.s........h...h.....s.....Aul............YhhLuRll.h..h.....h.....s........s..p.....t.....t............h..l.....p.....sph.hohlFlss.Dl.luhl..lQu....s..G.....G..u.l..h..u..s..u....................ss............hp..h....G...pplh.luG..LslQ.l..hh.h...sh...Fh.hh..s..h.h.........Fh..hR....h..t....p.......t........t...h...t....t.h.t.......................................................................hth.......th...h....hh.s.....L......hh.....us..hh.........I..hl..............Rs.......l.a.R....lsE..........h.......s..........t...........G............h....s.............u.......h.............l......hp............c...............E.hhh.hlhDuhhhhlshhhhsh....h...................hhh.......................................... 1 179 455 813 +4327 PF02334 RTP Replication terminator protein Mian N, Bateman A anon Pfam-B_12997 (release 5.2) Domain The bacterial replication terminator protein (RTP) plays a role in the termination of DNA replication by impeding replication fork movement. Two RTP dimers bind to the two inverted repeat regions at the termination site. 21.00 21.00 21.00 23.20 20.80 20.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.72 0.71 -4.15 3 54 2012-10-04 14:01:12 2003-04-07 12:59:11 11 1 52 17 14 37 0 119.40 66 98.83 CHANGED MKEEKRSSTGFLVKQRAFLKLYMITMTEQERLYGLKLLEVLRSEFKpIGFKPNHTEVYRSLHELLDDGILKQIKVKKEGAKLQEVVLYQFKDYEAAKLYKKQLKVELDRCKKLIEKALSDNF ........................EKRosoGFLlKQRAFLKLYhIT.MTEQERLYGLKLL-lLRpEFKshG..a+PNHoEVYRuLHELl-D.GIL...KQlK.sK.K..EGsKhQEVVLYpF.pD...h..Et..AKLYKKQLKsELDRCttLIcKAlpDNF............................................. 0 1 7 8 +4328 PF02382 RTX RTX N-terminal domain Mian N, Bateman A anon Pfam-B_833 (release 5.2) Family The RTX family of bacterial toxins are a group of cytolysins and cytotoxins. This Pfam family represents the N-terminal domain which is found in association with a glycine-rich repeat domain and hemolysinCabind Pfam:PF00353. 19.50 19.50 20.70 21.90 18.80 18.10 hmmbuild -o /dev/null HMM SEED 653 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -13.02 0.70 -6.26 8 309 2009-09-11 00:38:24 2003-04-07 12:59:11 10 16 156 0 5 280 2 490.10 49 61.72 CHANGED pLsplKsplppshpsststL+pAupssKpuLppAupul+suGKKLILYIPKs..Y-utpGNGLpDLVKAA--LGIEVpR-ERNshAlAppshGTscKlLGLTERGlsLFAPQLDKLLQKppKlusslGSou.slupNLuKApTVLSulQShLGosLuGMcLDELL+spp..Gt..Sph-LAKAGlELsNpLV-NIASussTVDuFoEQlspLGohLpNsKuLGulGsKLpNLP..sLupsGhGLDllSGlLSulSAuhlLuNKcAsTuTKAAAGhELoNQVlGNIsKAVSpYILAQRlAAGLSTTGPsAuLIASsVuLAISPLSFLuIADKFcRAKpLEuYSERFKKLGY-GDuLLApFa+ETGsIDASlTTINTsLuAISuGVuAAuAGSLVGAPIuLLVSuITGlISGIL-hSKQAMFEHVAsKlusKIsEWEKKHG.KNYFENGYDARHtAFLEDsh+lLsshNKEapsERlVuITQQ+WDspIGELAGITRpGDKlpSGKAYVDaFEEGKhLcpcsccFsphlhDPtcGpIDlSso..ppoohLpFlTPLLTPGcEpRERpQoGKYEYITcLhVpGhDsWsVsGVtspsulYDaTNLIQ+s......sssssch+Es+IIucLG-GsDpVFlGSGSoplpAG-GHDsVaYsKsDsGtLTIDuT ............................................................................................................................................pthtphhppus.thKpsh.tuupthppuup+LhLhIPcs..Ycs.pGsulp-LlKAA--LGIcl.hp-tsshthspp.h.sssppllGhT-RGlsl.APQLDpLLQKh.....KluptlGuss.slspplsKstolLSslQshhG.sLuGhsLDpLlpp....t..op.-lAKuul-LhNpLVsslus.spol-uFupQlspLGuhlpsh.tLuulGsKLQNLs..sLs.hu.GL-hlSGlLSuhoAuhhLustsApT.upKsAAGhElsspllGNlsKAlSpYIlA...QR.hA.tG..LSoTu..ssAuLIsSsV.LAISPLuFhshADcFp+ActlcpYucRFKKhsY-GDtLLAtah.+poGsIDAulTsIsTsLuuluuGlu.AAusuSLlGAPluhLVuulTGhISsIL-hSKQAMFEHVApKhts+Is.EWEKp.s...KNYFEpGYDARH...A.L..pDshphL.phs+phtsERslhITQQpWDp.IG-LAuIo+p.u-+..SGKuYlshhcpGthlctp....p...hDs.pG.....IDlSss.....th.o.phlhFhTPhhTPGpE.RERhQoGK.EYhTpL.lpth.DpWplp..tstpushDhopllQhh.......t.ps.ph.ph+l.upLGstsD.Va.uuu.........os.l.uGpGaDhV.Ys+.sphGhLslDup............. 1 1 2 5 +4329 PF00016 RuBisCO_large Ribulose bisphosphate carboxylase large chain, catalytic domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain The C-terminal domain of RuBisCO large chain is the catalytic domain adopting a TIM barrel fold. 20.50 20.50 20.50 20.50 19.80 20.40 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -11.91 0.70 -5.58 16 53466 2009-01-15 18:05:59 2003-04-07 12:59:11 15 20 33109 329 419 47715 394 226.70 77 64.51 CHANGED GItVER-+Ls...KYGRPlLGsTlKPKLGLSuKNYGRAVYEsL+GGLDFhKDDENlNSQPFMRWR-RFLaVhEAlp+ApAETGElKGHYLNVTAsTsEEMacRAEaAKE.lGssIIMhDhlsGGaTAssohApWsRcNs..hlLHlHRAhHuslsR.Q+pHGIpFRVlsKhhRhuGuDHlHsGTV.VGKLEGDtthshGFhchLRpshlppDtucGlFFcQDWuuh.ulhPVASGGIHsh+MPuLlchhG.DDsVLQFGGGTlGHP.GssuGAsANRVALEAhVtARNEGR-hs+E..Gs-llRcAAKhss-LpsAh-lW ................................................GIQVERDKLN...KYGRPLLGCTI.KPKLGLSAKNYG.RA.VYECLR.G........GLDFTKD........D.......ENV...N.SQP.F..MR.WRDRF....L.F.sA.EAlaKuQA.E...TGEI.KGHYLNsTAuT.hEEMhcRA...a.A+E.LG..s..sI.lM.h.D.alsG.GaTA.ssolA.hasRcNs....hL...L..H..l..H.RAhHush....s..R.Q+N.HGh.p.F.R.V.l.s.K.hh.Rh.S.GsDHlHu....GTV.VGK.L.EG-...hshGFh.DhLh-sal.-h.s.hs.p...G.laF..s.Q.D..Ws.....S..h..tV.hPVASGGIHshpM.t.Lh-hhG..DDsVLQ.FGGGTlGHP.G.tsGAsANRVAlEuh......h.u........................................................................................................................................ 0 102 241 333 +4330 PF02788 RuBisCO_large_N Ribulose bisphosphate carboxylase large chain, N-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain The N-terminal domain of RuBisCO large chain adopts a ferredoxin-like fold. 20.70 15.00 20.70 15.00 20.60 14.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.59 0.71 -4.44 16 48498 2009-01-15 18:05:59 2003-04-07 12:59:11 11 16 32360 297 249 42335 208 118.10 84 32.42 CHANGED +hsYasP-YssKDTDlLAsFRlTPQsGVsP.EAAAAVAAESSTG.TWTsVWTDLLTshDhY+u+uYcl-PVPGpssQaaAaIAYPlDLFE....EG..SlsNlhTSIlGNVFGFKAl+ALRLEDhRlPhAYlK ...........................................................................+LTYYTP-Yc.T.K.DTDILAAFRV........TPQ..P..GV..P.PE.EAGAAVAAESST.G.TWT..TV..W..T..D.G.L.T.S........LD.RYKGRC...YcI.....E.........P.....V.......s...............G.........E......E..............s...QY....I.....AY.V....AYP.LDLFE............EG....SVTNMFT.SIVGN.VF.GFKALRALRLEDLRIPsAYsK.......................... 0 62 146 202 +4331 PF00101 RuBisCO_small Ribulose bisphosphate carboxylase, small chain Sonnhammer ELL anon Swissprot Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.15 0.72 -4.16 48 1831 2009-01-15 18:05:59 2003-04-07 12:59:11 15 17 903 230 280 1682 177 80.90 41 69.50 CHANGED phpThSaLPsLospplt+QlcYllspGWssslEaspspphh..................spYWpMWpLPhFsspDsupVltElppC+psaPstYlRlluFDsh+..pspslSFIlpRPs ......................h..tshSaLPsLo..-.-QItKQ..lpYhlspGWssslEascc..t..h.................................................spYWphathPh..hs.pp.ttlh.plttshtt..st.t.al+h.uhDs................................................ 0 88 203 253 +4332 PF02915 Rubrerythrin Rubrerythrin Griffiths-Jones SR anon Structural domain Domain This domain has a ferritin-like fold. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.69 0.71 -3.85 38 3551 2012-10-01 21:25:29 2003-04-07 12:59:11 12 31 1616 63 1303 4084 1248 115.30 23 56.86 CHANGED -hLttAhsuEpsupthYpthAcphcpcs...lAclFpthAcsEpcHAthlh+hhpch..sh..s.....hhp.pht.hh..............slctshpsshhpEctuY.hYhcltcchtppptpch......hcphspsEptHtchactLh .................................Lhtu...hsuE.p.p.u.h.p.hY.....t...hh...A...c...t..Ac..c....-..s....tlu...pl.Fp.phAppE.p.pH..Ap..hh..h..c..h....h...p..p...h............................................................................................h.t..................sl..t..t....h..h...t...s...t...h.t.....p...p.....hh......a...h...p....h...u...c...p..........s........t...t...h...t..th................hpthsp..E.ttHtphh.th.h............................................................................ 0 520 931 1151 +4333 PF01330 RuvA_N RuvA; RuvA N terminal domain Bateman A, Finn RD anon Sarah Teichmann Domain The N terminal domain of RuvA has an OB-fold structure. This domain forms the RuvA tetramer contacts [1]. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.72 0.72 -4.18 33 4290 2012-10-03 20:18:03 2003-04-07 12:59:11 16 6 4259 26 913 2706 793 61.10 36 30.56 CHANGED MIshl+Gplsclstshlll-ss.GlGYpltsstsphhphspst..psplasphllREDu....htLYG ....................MIuhl+G.h...l....h.ch.....p...s.....s.....h.....ll..l-ss....GVGYclpss........sshh...pl.......ts........t.....csplaTahlV....REDu..hhLYG............... 0 307 609 779 +4334 PF02075 RuvC Crossover junction endodeoxyribonuclease RuvC Mian N, Bateman A anon IPR002176 Domain \N 21.50 21.50 21.50 21.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.86 0.71 -4.38 9 3205 2012-10-03 01:22:09 2003-04-07 12:59:11 12 4 3142 4 812 2187 1059 147.50 43 83.83 CHANGED IlGIDPGochsGYulIcp.suppLphlssGsIRTsos.sLspRLhslh-ulppllcpapPshhAIEplFhupNssSslKLAQARGslhLAAspctlsVtEYsPppVKpAVsGpGpAsKpQVphMVp+lLsLsscPpP..tDAADALAlAIsH .....................................................ILGIDPG.phsGaGlI..............c.......t......p......G....p.......p.........l.....p...h...lus.....G.s..I....+T........s...s....s..............s..l...s..pRLp......tIasulsclls.p............a.....p.....P.....DhhAI..Ep.l...F....h.s............+......N.....s.s.o...sL.K.LGQARGsAll.A....us.p..ps..L.P......Vh.EY.ssppVKpu..VsGhG.pA-K....pQVp.tMVp.p.....lL.pLs.s..t..P.p..........sDAADALAlAIsH.................................................................................... 0 280 550 699 +4335 PF02042 RWP-RK RWP-RK domain Bateman A, Schauser L anon Pfam-B_9740 (Release 5.1) Family This domain is named RWP-RK after a conserved motif at the C terminus of the presumed domain. The domain is found in algal minus dominance proteins as well as plant proteins involved in nitrogen-controlled development [1]. 21.90 21.90 21.90 23.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.58 0.72 -4.14 23 430 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 55 0 303 406 6 51.00 45 9.02 CHANGED pppplohcplppYFphPlpcAA+pLs...........VssTsLK+hCRchGIsRWPaRKl+SL ..................h..tslolpsLpp..YFp.hslc-AA..+.p.LG..............................V.s.sTsLKRl...C...R........phG.I.sRWPpRKlppl..... 0 101 213 273 +4336 PF01365 RYDR_ITPR RIH domain Bateman A anon Ponting CP (EMBL alignments) Family The RIH (RyR and IP3R Homology) domain is an extracellular domain from two types of calcium channels. This region is found in the ryanodine receptor Swiss:P21817 and the inositol-1,4,5- trisphosphate receptor Swiss:Q14571. This domain may form a binding site for IP3 [1]. 20.70 20.70 21.30 21.40 20.00 20.40 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.31 0.70 -5.10 18 1311 2009-09-11 08:33:13 2003-04-07 12:59:11 16 91 139 8 659 1057 8 191.60 27 11.19 CHANGED tVhplLp-Lltahsssppctp...p.....c..hh+sh+p+QcLhRp.ulhphVhcllptsascp............hs-ppcttap................clhpLsachLpthppGsRpNQthhtcphs........h.th.hs.G.l.h-slpslLhsN.cLhptlpEtt.lcphluLlc+pG..RcschLDhLpslssusspslcssQ..-hIphpllssGc......DlLlphpl+sshsph ..........................................................................ht.LpcLlhhhhs.ppph.......t...p.........+....sh.+p+Qp.LhRp.shh.p..h.Vhcl..l.p...tsht..tp...........................tpp.pp..t..tap..............................................................................clhp.hsa.chL.p.th...........p...........p.....s.s.RpNQ.t...hht..c..p..ls........................h.....t..hs.G........h...h-......s.........h...ps...llhsN.cLh.p.t.l..p...-t.....t..lc...p....h.Vs....hl...cppG.................Rp.sp.......hL.chLpsl.s.s.s.p..s..p..slcssQ....-hlp.pll.s..s.uc......................-lLlphplhts....h..................................... 0 187 233 423 +4337 PF02026 RyR RyR domain Bateman A anon [1] Family This domain is called RyR for Ryanodine receptor [1]. The domain is found in four copies in the ryanodine receptor. The function of this domain is unknown. 27.40 27.40 28.10 27.70 27.10 27.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.17 0.72 -3.96 38 1192 2009-01-15 18:05:59 2003-04-07 12:59:11 11 75 161 22 593 969 18 92.00 37 8.15 CHANGED sasPpPlDhoslsLspcLp..................pls-+hAENhH-lWApc+l................ptGWpYG.....scss+pHPpLVPYspLsEpEKchsRphupEslKslluhGaslppsc.c ..............................................................tapPpPlDhop..lt.Ls.c.Lc...........ths-+lAENhHslWAtc+l................ptGWpYG....hp.....Dc.s..s..+pHPhLVPYspLs-......c........EKphsRp.spEslK.sLl.uhGaplphs.p.................. 0 108 161 353 +4338 PF00575 S1 S1 RNA binding domain Bateman A anon [1] Domain The S1 domain occurs in a wide range of RNA associated proteins. It is structurally similar to cold shock protein which binds nucleic acids. The S1 domain has an OB-fold structure. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.15 0.72 -3.89 51 46862 2012-10-03 20:18:03 2003-04-07 12:59:11 18 199 5058 118 11675 32137 17037 74.50 26 20.26 CHANGED phphGsllpGpVpslsp..hGhhV-ls...shcGhl.hSplstp............hh.psspshphGcclclpllcl-pppppl.LSh+ ...........................t..phGpll..p.G.p.Vp..p.lss.......hGs.F.V...........-.....l........s................s.........h...........-.......G.l......l........+..l.Scl.s.p........................................................hht.p..s...p.......c...h...l...p........h.G.c.cl....c..VpV...l...c.l....-..t....p..p.c...plsLoh.................................................................. 0 3844 7387 9758 +4339 PF00438 S-AdoMet_synt_N S-AdoMet_synt; S-adenosylmethionine synthetase, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold. 21.30 21.30 22.70 21.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.31 0.72 -3.78 115 5500 2009-01-15 18:05:59 2003-04-07 12:59:11 15 15 4837 44 1389 3870 2374 99.60 55 25.69 CHANGED ppaLFTSESVoEGHPDKlsDQISDAlLDAhLppD..................Ppu.......RVACEThloTuhVlluGEl............oop...AhlDh...pclsRcsl+-IGYsp....sp.hGFDhcos.sVhsslcpQSs ............t.phLFTSESVoEGHP.DKluDQISDAlLDA.lLppD..................Pp...u..........RVACET.......hVp....TG..hVlVsGEI......................TTs.......AaVD...l...pcllRcTl+-IGYsp........u.c...hGFDuco...C.uVlsuIscQSP.................................. 0 486 899 1177 +4340 PF02772 S-AdoMet_synt_M S-AdoMet_syntD2; S-adenosylmethionine synthetase, central domain Finn RD, Griffiths-Jones SR anon Prosite Domain The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold. 21.00 21.00 21.10 22.20 20.30 20.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.56 0.71 -4.17 192 5503 2009-01-15 18:05:59 2003-04-07 12:59:11 11 14 4862 44 1376 3875 2680 120.30 51 30.99 CHANGED c...p.GAGDQGlMFGYAssETspLMPhPIhlAH+Lsc+LuclR..K..sG.t...l..........saLRPDuKoQVTlcY....s.........s..+..P.....l+.lcol.VlSoQH.sssls.................cpl+cslhcpVIcsll.Pt..t....h.lcs..c.TcaalNPTG+F ....................p.GAGDQGlMFGYAssE.Ts.......pL.......MPhPIsLAH+....LscRlu-lR..K.....sG...p....L.......................saLRPDuKoQVTlcY....-s........................s.+...P..l+lDTV..VlSTQH...s..t.-.l.s.........................pc.p.l.ccslhEcl...I.+sVl....Ps....p..............h..L..cc.....p...T...+aaINPTGRF............................................... 2 473 885 1162 +4341 PF02773 S-AdoMet_synt_C S-AdoMet_syntD3; S-adenosylmethionine synthetase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold. 25.00 25.00 25.00 25.00 23.80 21.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.85 0.71 -4.44 11 5527 2009-01-15 18:05:59 2003-04-07 12:59:11 11 14 4855 44 1384 3894 2871 137.30 65 35.56 CHANGED IGGPpGDAGLTGRKIIVDTYGGauuHGGGAFSGKD.oKVDRSAAYAARaVAKSlVAAGLs+RC.VQlSYAIGVAEPLSIhV-TaGTuc..hopccLlcllRcNFDLRPGsIlKpLDLt+P...IYppTAuYGHFGRpc..FPWE+ .................IGG.PtGD.uGLTGRKIIVD..TYG.Gh..u.+HGGGAFSGKDPo.KVDRSAAYAARYVAKNlVA..AG.LAc+CElQl.............uYAI.GVAc...PlSlhV.-TFG.T........u...+........l.s..-.pp..l....h...c.h...V....+...c..hF.DLRPtuII....c...hLDL.......t.+.P.........IYppTAAYGHFGR..p..c.......hs..WEp....................... 0 483 898 1172 +4342 PF02574 S-methyl_trans Homocysteine S-methyltransferase Bashton M, Bateman A anon COGs Family This is a family of related homocysteine S-methyltransferases enzymes: 5-methyltetrahydrofolate--homocysteine S-methyltransferases also known EC:2.1.1.13, [2]; Betaine--homocysteine S-methyltransferase (vitamin B12 dependent), EC:2.1.1.5, [3]; and Homocysteine S-methyltransferase, EC:2.1.1.10, [1]. 21.20 21.20 21.20 21.20 21.10 20.70 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.88 0.70 -5.12 16 4674 2009-09-12 00:27:11 2003-04-07 12:59:11 11 22 3246 24 1463 4033 4093 288.50 31 39.11 CHANGED llllDGuMGTpLpspshshsc.hhphh..................ss.s..s.ppP-llpplHcsYhcAGA-llpTsTapuo.........hhuhu-hslscts.pplsptusclARtst-phs......psc............hhVsGulGPhsthhs...sschsu.hsssh-..slhchap.phcsLh-uG.sDlLhhETl.shtps+Ahlphlcp.hhpt.uh.shslh.ussl.-uoshppsushhcuhh..h..sphshlGlNCshsspphp.hhphhsp.......sthlhsaPN..uG.s.sss....htastpssphsp......slccaspsGu....plIGGCCGToPcHIptlucslps .....................................................................llllDGuMG.Th.l.p.....p......h....s...l...s.....t...t..s.h.hht..........................................s.s.-.h..h.s.l.opP-...l..lp..pl.H.p.sY....l........c......AG.A.....D.........l.....l.........p.T.........NT..F..sus.............................hh.s.h..u.c.......a.......s...h...p...p.......h....s...........t-.l....s.tt...usclA.+psscch............................sscp................taVAGslGP.hs...s.....ss.p.s.u.h.t...s..l...oh........-..........plh.pta.p.cphcu.L......l.....-......u.........G.....s.....Dl......l......llE....T...h...Dhhp.s.+A.u....l.....h..ul...c......p...............h...............c.......t.........................s....h................p............hs.......l.........h........s..s....s...l..h.-...u.s..u.....h.p.h.u.u..p..s..h....pu..............h...h...........................p..s.....h..s....l......G..l..NCuh...G...sp...t..h.p.t.h...l...p..t.l.uph...................s..pthl.....s....s....aPN........A..G..L.P..p..ths................pYs..t..........s......s...p...p....hu....p..............hh.p.ca.h.....p..t.Gh..............sll..GGCCGTTPpHI..p..slscslt.s.............................................................. 0 500 891 1214 +4343 PF04689 S1FA DNA binding protein S1FA Kerrison ND anon DOMO:DM04705; Family S1FA is a DNA-binding protein found in plants that specifically recognises the negative promoter element S1F [1]. 20.50 20.50 21.10 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.53 0.72 -4.10 4 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 20 0 27 40 0 66.70 66 44.78 CHANGED tsssthEu.KGLNPGLIVLLVlGGhLLsFLVGNalLYsYAQKNLPPRKKKPVSKKKMKREKLKQGVssPGE .....p....tEs.KGLNPGhIVLLVVuuhLLlFhVGNYsLYhYAQKTLPP+KKKPVSKKKhK+E+LKQGVSAPGE....... 1 6 16 20 +4344 PF05116 S6PP Sucrose-6F-phosphate phosphohydrolase Moxon SJ anon Pfam-B_6442 (release 7.7) Family This family consists of Sucrose-6F-phosphate phosphohydrolase proteins found in plants and cyanobacteria. Sucrose-6(F)-phosphate phosphohydrolase catalyses the final step in the pathway of sucrose biosynthesis [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.42 0.70 -5.08 23 845 2012-10-03 04:19:28 2003-04-07 12:59:11 8 21 568 13 267 12656 492 232.40 21 49.05 CHANGED sphllVoDLDpThl........u-stuLtchpslhc..thcp-shLlauTGRo.sshpcLhcEhsL.pPDhllsSVGTEIhY..Gp.shhPDpuWpphLsp.cWs+..phVhchhuchPp.L.phQs-p-QpsaKlSaal-ppsu.shlppLpphLccpsLcl+lI.....aSsGpsLDlLPtsAuKGpALpYLtp+aphs...sppoLVCGDSGNDtpLF.lssshGVhVuN.uppE...hl..htEss+sp.plaaAppcsAuGIl-ultHasl ...............................................................................................................................................................................s..hhlh.DhDtThh.......................tp..........t...h.....h..t..h..t....t....h.hp............t...pt...phhhshsTG....pshp.phh....p....h....h.....p........p..h....s.....h..................p.....P......c..h.......h..........l..ss....l..G.......o.c.lhh.....................hp.....p......h.......h..............s.....p.....s.....a......p......p..........h.......l..............s...................p.......t......a....th.............p...tl......p..............c.....h.......l............t....p.................h............s..........p............l.....h................Q......................p...................t.................p...........p.....s.......t.....................a.....................+.......h.......s...................a......h.........h...c....p.....p...t...t........t....p................................l...c.t......l........p...p..h...h......p..t...p...s....l....p...s...p...h.h.................................h.....p...s......u.....h.....c.....l...D..ll.....P.h.sA.u.Ksp.A.l.p.a.L.h...p.+..a...sls.............hpp...h...l..s..h...GDS....G....N.D...h.p...hL..t.....sh.......t....pu..h.ll..uN...spt-.................................................................................................................................................................................................................... 0 83 184 233 +4345 PF01023 S_100 S_100_domain; S-100/ICaBP type calcium binding domain Finn RD, Bateman A anon Pfam-B_242 (release 3.0) Domain The S-100 domain is a subfamily of the EF-hand calcium binding proteins. 20.30 20.30 20.30 21.00 20.10 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.88 0.72 -4.63 24 1006 2012-10-02 16:17:27 2003-04-07 12:59:11 14 21 63 279 499 923 2 43.00 37 14.75 CHANGED LEculpslIslFHpYSs+cGctpsLsKpELKpLlp+ELssaLcp ........LEpultslIssFHcY.usc-..G..c.p..tp.LoKpELKpL.lppELsshlp........ 0 28 46 114 +4346 PF05124 S_layer_C S-layer like family, C-terminal region TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 22.50 22.50 22.60 22.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.51 0.70 -4.72 21 129 2009-01-15 18:05:59 2003-04-07 12:59:11 7 18 45 0 79 134 6 166.90 23 29.97 CHANGED psluussGaAplhIssslKsl-LG-EalsDaEhhsllpss.......sslchp-s..................hts...pplGlALpYsGDclpslccscphc.lusYA.ph.hDD-sp.s.cLpsaFphc....EpK-lolshGpclp...Vhsu-lhhp...tts..sh.hssPlshLDoEh...uL-su-psLILVGGPVVNplTcELsss..GtlsI..........-spSsATlsllcssAN..GscVLVVAGGDRtuTcpAApALlphl ........................................................................................................................................................................................................................................................................................................................................................................................................................s..ph.................s...p.hs..pslILlGGPVuNtlscp.ltsp....h.lpl.............................................s.sps.u...s.lt..h......l.......c.......sshN......sps.VlllAGu.DR.uTcsAsphh.......................... 0 8 16 59 +4347 PF05123 S_layer_N S-layer like family, N-terminal region TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 25.00 25.00 30.00 28.90 24.70 22.20 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.82 0.70 -4.78 18 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 36 0 27 57 1 268.00 29 44.88 CHANGED KKIGAIAVGuAMluouLuouuh.AspclG-VssFh...sshVK.sGpPNVcIVVGSsA.AAhDVVSAAsIAAKIGSLhYpEssVE..DuSAslshsssu-S--lsl....h...sth.hhussspthlhsuuDsDY..........s.shsssshsshshsuhspsc....ssssLtD...........................LssLhplpDlDPssa...............hs.sDhD.AsEhlhsplsss.......h.ssotshplscDpllYsols..................apsshsuh..tshpsLp.....GhcIPaLGcEhslVclDpD.........DDhlhlGpcsY-Gslcp.G-saslGsGYpVcl ..............................KKIuAlAlGuAMluusLusush.Ahpp.l.uslss.....s.hVp.sGpPNVcIVVGSsA.AAhDVVSAAsIAAKIGSLhYpEtslc..ssusslphpspsc.o-sh.l.....................hss.sstthh.hs..ssss.sY.................s.t...s...ss..hs.....sh..htshsphs.....thhslt-..............................................................................................................ls...shhplpch..DPpsa...............hs.pD.-...us...Ehlhshlpss.................stphpltccphhYholh.....................hpss...ts.....t.htslt..............Gh..plshLGpchhllplsts.........schlhlGp.sapGhlcp.G-shslGsGYplcl................................................................................................................................................................................................................................................................................................... 0 3 6 16 +4348 PF00954 S_locus_glycop S-locus glycoprotein family Finn RD, Bateman A, Mistry J, Guo X anon Pfam-B_357 (release 3.0) Family In Brassicaceae, self-incompatible plants have a self/non-self recognition system. This is sporophytically controlled by multiple alleles at a single locus (S). S-locus glycoproteins, as well as S-receptor kinases, are in linkage with the S-alleles [1]. 21.80 21.80 21.90 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.31 0.72 -10.95 0.72 -3.78 52 2074 2009-09-12 20:30:07 2003-04-07 12:59:11 15 105 84 0 900 2114 0 104.00 27 16.50 CHANGED aRSGPWN..GhRFoGlP..-hp.thsh..hshsFs.....psspElsaoaphs.sssh.hSRlhlossGhlpphsW.ssspsWsh.hattPtc.pCDhYthCGsauhC...sssssPh..CsClpGFhPcs ...........................................................................s.Wt..t..h...........................................h.....h.shs..........pspp.Ehh.as..a..p.h.t.....s...s......s..h......h.s.R.l..sl...s.....s........G....p...l.p.hhs...W..........t.....s...s...p.......p...W.s........h.....h...a...p.....t...P..p.......c...pCD.h......Y......s......hC.Gs.a..u..hC.............s..h..s....s.........s....P.h.....Cs..Cl.p.GFpP......................... 0 43 507 728 +4349 PF00526 Dicty_CTDC S_mold_repeat; Dictyostelium (slime mold) repeat Finn RD anon Pfam-B_96 (release 1.0) Repeat \N 21.30 21.30 21.80 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.84 0.72 -7.18 0.72 -4.07 30 447 2009-01-15 18:05:59 2003-04-07 12:59:11 13 20 8 0 381 438 10 23.70 56 31.53 CHANGED NsCThDoCsspsG..CsHTPlsCDDs .NsCThDSCsss..s.G..CsHTPIs.CDDt...... 0 300 381 381 +4350 PF00277 SAA SAA_proteins; Serum amyloid A protein Finn RD anon Prosite Family \N 21.10 21.10 21.20 21.30 20.60 20.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.41 0.72 -3.67 18 208 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 68 0 67 243 0 98.00 63 73.11 CHANGED WapFhtEAsQGAtDMWRAYpDMREANa+sSDKYFHARGNYDAApRGPGGsWAA+VIScuREshQuhh.........GRGtEDotADQcANcWGRSGtDPN+........YRPpGLPcKY .......................hpFltEAhpGAtDMWRAYs.DMREANYpsuDKYFH.ARGNYDAApRGP...G.Gs....W....A...AcVI........SDAREshQphh......................G+GtEDShADQtANcWGRS.GpDP.N+...............aRPtGLPpKY..................... 0 12 14 23 +4351 PF04455 Saccharop_dh_N LOR/SDH bifunctional enzyme conserved region Waterfield DI, Finn RD anon COG1915 Family Lysine-oxoglutarate reductase/Saccharopine dehydrogenase (LOR/SDH) is a bifunctional enzyme. This conserved region is commonly found immediately N-terminal to Saccharop_dh (Pfam:PF03435) in eukaryotes [1,2]. 22.10 22.10 22.10 24.60 21.50 22.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.10 0.72 -3.90 35 168 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 154 4 103 180 22 102.10 34 17.28 CHANGED hhs.Rp..lELcGHllDShlls+slDpIh-hGGsFcllcFslG+p+sDsSaAclpVpAcspcpL-pILspLpplGAs..s-....p-scLtsuspDtVhP-sFY..oTTNas ...................................h..scplcLcGHLlDohlls+sLDhIh.-hGGsFcl.lchc......l......G.pp.....+.p........ssS.aAclp.VsAsspphL-cIlspLpslGAs....-........p-spht...ss.tstVhPcsFYsoT.h.................................. 0 34 78 97 +4352 PF04092 SAG SRS domain Bateman A anon Pfam-B_1675 (release 7.3) Family Toxoplasma gondii is a persistent protozoan parasite capable of infecting almost any warm-blooded vertebrate. The surface of Toxoplasma is coated with a family of developmentally regulated glycosylphosphatidylinositol (GPI)-linked proteins (SRSs), of which SAG1 is the prototypic member. SRS proteins mediate attachment to host cells and interface with the host immune response to regulate the virulence of the parasite. SAG1 is composed of two disulphide linked SRS domains. These have 6 cysteines that form 1-6,2-5 and 3-4 pairings. The structure of the immunodominant SAG1 antigen reveals a homodimeric configuration [2]. The SRS domain is found in a single copy in the SAG2 proteins. This family of surface antigens are found in other apicomplexans. 20.80 20.80 20.80 20.80 20.60 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.94 0.71 -3.74 58 921 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 7 14 768 924 0 131.00 20 67.81 CHANGED sssssoC..............sssssssplslopp.ssolTlpCussss....hhPs..shsp......................ptCs....t.ttssp.hslpslL..ssssps..Whpt......spsspstsL.slspsshPtss.psFhlGCptpsss.....................sttssCpVpVsV ..........................s...hssC......................ts.ss..sshpl.slopp.psslolpCsssss.......hhPs..shsp.........................phCp...............t.t...ss...s..p.t..h.sl.s..s...l.l.....ssssps......hhpp..................sssssshsL...sls.sshP.t.ps..p..p..h..hhu....Cttssss......................................................................tttssCpVpVsV........................ 0 325 325 768 +4353 PF01259 SAICAR_synt SAICAR synthetase Finn RD, Bateman A anon Pfam-B_1426 (release 3.0) Family Also known as Phosphoribosylaminoimidazole-succinocarboxamide synthase. 19.70 19.70 20.70 21.10 19.40 19.40 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.54 0.70 -5.28 11 4852 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 4550 22 1252 3379 2876 240.90 34 87.99 CHANGED ph.llsc.....GKs+-IYcl-D..spLLhlspDclSAaDslhcspIstKGplhsphSpFhFphLp.h.lssHhlct.s......................-.sshls+KhchlPlEsVVRsYlTG....ShhKc...sGsh..cGlclsssllEs.hhs-slhsPphpsE...Hs.slu........hsps..hlG.-pssplcchshplaphhcchhtppGlIlsDhKhEFGlDp-s.cllLsDEl..PDSSRhWsssshc....h......hDKQhhRchLsssspuhp.ts....hsphspsl ..........................................................p.thlYc...GKsKclY..........p...........s..............s.........c............shl.l.hh.hpDcho.........A........as...s.h......hp.......p.......p.....l....s.sKGtlsNplosahFctLp.....c.........h....sl........s.....o...Hhlcths.........................................................s...pphl.l..+...+...l......c............h...............l..P..lEsVlR.shh.s.G....ShhK+....h..G.hc........pG....h..tLs.p.slhEh......hh.K.....s..........D.........sltDPhhssp..........Hspsls.........................................hss..t....................................-.p...l.s...pl...+...chs......h......cl....p....hlpchh..t.ptG.....lhLlDhKlEFG....h..........s......p.....s..G.......c......l.lLuDElu.PDosRh..WDtcshc......p..............hD.K-haRp.....Ls.s.h..h.tsap..thh.+l....hh..................................................... 1 399 785 1040 +4354 PF03534 SpvB Sal_SpvB; Salmonella virulence plasmid 65kDa B protein Griffiths-Jones SR anon PRINTS Family \N 20.00 20.00 20.20 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.82 0.70 -5.39 17 349 2009-01-15 18:05:59 2003-04-07 12:59:11 8 62 230 0 113 340 33 223.30 27 12.78 CHANGED LussGssGhAolSlPLPlSsGRG...huPsLuLsYsSuuGNGsFGlGWphslhsIsR+Ts+GlPpYssp.....DpalGPsGEVLlssh.sspGp.phcpt.pthtshshstsaoVoRYpsRlEupFsRlEaWpPpsssss............sFWllassDGplHlhG+sspARlusPp....ss.s+lApWLLEESVo.ssGEHIhYpYcsEDcsss-tschpsp............stoAQRYLppVpYGNhpsutsLashss....s.Pssp..p.........WLFpLVFDYGERssshpssPtaps..........stsWhsRpDsFScacYGFElRTRRLCRQVLMFHph ...................................s.s.GtA.o.holPlslssG.R.u...hsPsLuLsYsSs....u.........G.......N....G......................hGlGWsl.u.s.....u.I.p......R..........c....T.p.......t........s..lP.p...Yssp........-t.h...h.h.suc.hl........stt.st..................tp..................................................tth..t.s.....t..pa.sc.hp.s.t..Ft+lphht..sts..sstt.............aWhlhstsG.hthhG...t.....s......s....su.+.l.....ssst......................t.....tphhpWhl.p.cshs...stGpt.lhYpYttp...s.t..................................................................................................................................................................................................................................................................................................................................... 1 46 73 97 +4355 PF03538 VRP1 Sal_vir_VRP1; Salmonella virulence plasmid 28.1kDa A protein Griffiths-Jones SR anon PRINTS Family \N 21.80 21.80 21.80 22.50 21.20 21.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.12 0.70 -5.38 4 244 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 119 0 42 182 1 201.10 21 19.16 CHANGED sppsSPLL........S+ltstaplsstlhcp..............GYpSlFDIlRhsRcpFIccapt..us+utt.haDhAsuhApQlhppFRpppL................o+tV+tshhpsaSs.s.......PpYts.F.-s...WpphsPssusEussSPVuYLhclYphs.p.E.susspul.slsERRsDLusLhlsscuINppIssLplVNphLSpthpthlp.ps.t..ss.thLupsRaP.pLPYcasppQIphuhsspcspLtcIhppsshsaP...W.......................................................................h.....LSsslssA.schsh...........................................................................................hAsphu.pQQplhsEsltssspp...FYQsNYGlss.sss.hctlshFspQTuloVs ...........................................................h..............................................................................................................................................................................................................................................................ushtYh..hh.......................................t.................h..thspRRPDLtpLhlsp..pshppplssL.h..phh..........................................................................s.h..hPYp.shp.lp....l...s..thtth............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 6 18 32 +4356 PF01758 SBF Sodium Bile acid symporter family Bashton M, Bateman A anon Pfam-B_697 (release 4.2) Family This family consists of Na+/bile acid co-transporters. These transmembrane proteins function in the liver in the uptake of bile acids from portal blood plasma a process mediated by the co-transport of Na+ [2]. Also in the family is ARC3 from S. cerevisiae Swiss:Q06598 this is a putative transmembrane protein involved in resistance to arsenic compounds [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.22 0.71 -4.89 17 4441 2012-10-02 17:06:44 2003-04-07 12:59:11 11 12 2582 2 1463 5194 1717 184.60 24 56.02 CHANGED slhLhlhMashhhplphcchtphhpc...sKhlhluLlhpallhPllhFllu.hhhthhst...................hhhGllllGssPssuhu.las.LucGchs.Lulshsshoo....htshhhsshhhhhlstt.h.ls........h.hhphhtolllhlhlPhhhGhls+hhh.......hh.phhh.hlsshulhullhslhlhhuhpuphlsph ..........................................................................................hLhlhMa.shhhp.lphpch.t.pl.h.cc.................s+...s.............l...h...l.u.l.l.h.pa...ll.hP.hh....h.a.h.L...u....h...h...h...t..h...st..............................................hhsG.l.I.L....l.G.s.sP.s.s.s.h.s.....V.a..........o.hL.uc..GDss..h...................ols.h.sul...so........lhshhhsP......lh...h....h....h...l...s...s.t...h..ls...........................h..h..s..h...hh..S.l..lhh.lll..P..lhhGhlh+.thh...............................h.h..t...p..p..h...h.s..h..l...ss..ho.l...l...ul.lhs.ls.lh.hu.hpsp.lh..t.................................................................................... 0 485 921 1207 +4357 PF03536 VRP3 Sal_vir_VRP3; Salmonella virulence-associated 28kDa protein Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 40.50 33.00 19.10 21.30 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.68 0.70 -5.21 3 62 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 47 13 7 51 0 213.90 60 91.85 CHANGED PINRPsLKLNLPSLNVVssSEtPQMsSTNE+LKNNFNSLHNQMRQMPsSHFKEALDVPDYSGMRQSGFFAMSQGFQLsNHGGDVFIHA+RENPQSKGDFAGDKFHISVtREQVPQAFQALSGLLFSEDSPIDKWKVTDMERVsQQSRVulGAQFTLYVKPDQENSQYSAShLHKTRQFIECLESRLSESGlhPGQYPESDVHPENWKYVSYRNELRSGRDGGEMQcQALREEPFYRLMsE ............................................................................................s.h..sssc..cLKsNFs.L.asQhRphPsoaFK.A.sVPsYSshpQSuFhsMtQG..FQ.lsNH....u.hDVFIHAcREsPQSpGcFAGDKFHISVhR-.VPpAFQALSGLLFSEDSPVDKWKVTDM.....p+Vs.....QQuRVulGAQFTLYlKPDQEsSQYSApaLHKhRQFIpCLEScLScsGVh.sG.ppP-SDV+PEsWKYlSYRNELRS......sRDGuEhQcQtLREEPFYRLMhE............... 0 2 3 5 +4358 PF01536 SAM_decarbox Adenosylmethionine decarboxylase Bashton M, Bateman A anon Pfam-B_600 (release 4.0) Family This is a family of S-adenosylmethionine decarboxylase (SAMDC) proenzymes. In the biosynthesis of polyamines SAMDC produces decarboxylated S-adenosylmethionine, which serves as the aminopropyl moiety necessary for spermidine and spermine biosynthesis from putrescine [1]. The Pfam alignment contains both the alpha and beta chains that are cleaved to form the active enzyme. 21.20 21.20 21.30 21.40 19.90 20.40 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.20 0.70 -5.77 15 693 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 387 56 340 679 15 293.80 34 87.33 CHANGED sSAhuFEGhEKRLEI.Fhcsthhs-spu+GLRuLs+uplDclLssAcCoIVSohuN-plDSYVLSESSLFVasaKIIlKTCGTT+LLhuIPsILcLAcslu...........hpVpuVpYoRtsFlFPssQsaPHRsFoEEVshLDuaFu....supAYlhGsscps.p+WalYoso.....sps.....pps...pPsaTLEMsMoGLD+-pASlFaKscu...............usAusMTppSGIc+ILPsSpIs........DFpF-PCGYSMNu..l-usshuTIHlTPEDGFSYASFEosh....hshcshslspllp+VLsCFcPscFSVsla....sssss+phpp.shsl-lc.uYshcctshpsls.hussllYtcFs+st ..............................................................s...hFEG.EKhLEl.F.................................................................t........t.......s.......t........s...........LR.s.ls....ttphcphLp.spCpIlS.hp.......s.......ct.......hDuYlL..S..ESShFVash..+.lllKTCGTTpLLhuls.l.....Lc.lAtths..........................................hp.ltplhYoRts.FhhPt.....t.Q..h.PH.csap-EVthLsthFs.............supAYhhG.p..sps....pW...alYsss....................................t................p........................ps.s.TLE.hhMo.tLD.p...hsp..F.ah.s.ps.....................................................................sputthoptoGlp..c..lh...Pt.s....t....l....s................................................sa.FpPCGYShN............u........h.................p......ss......th.TIHlTP..E...s....t....a..SYASFEss...............................p.t.h.s.h.sp.llp+VlthFp..PscFslsla.............t.t.t.t..........t................h...........................................................t............................................................................................... 1 100 180 271 +4359 PF02199 SapA SAPA; Saposin A-type domain SMART anon Alignment kindly provided by SMART Family \N 20.30 20.30 20.50 20.40 20.00 19.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.07 0.72 -4.19 32 428 2009-01-15 18:05:59 2003-04-07 12:59:11 10 66 91 0 167 403 0 33.70 48 10.43 CHANGED GpcpCshGPuaWCpshcsAppCs..AVpHCpppVWs .......GhcpCshGPuaWCpshcTAspCs..AVcHCpppVWs...... 0 47 59 101 +4360 PF03058 Sar8_2 Sar8.2 family Mifsud W anon Pfam-B_2148 (release 6.4) Family Members of this family are found in Solanaceae plants, a taxonomic group (family) that includes pepper and tobacco plant species. Synthesis of these proteins is induced by tobacco mosaic virus (TMV) and salicylic acid [1]; indeed they are thought to be involved in the development of systemic acquired resistance (SAR) after an initial hypersensitive response to microbial infection [1,2]. SAR is characterised by long-lasting resistance to infection by a wide range of pathogens, extending to plant tissues distant from the initial infection site [2]. 27.10 27.10 27.20 27.90 27.00 27.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.40 0.72 -3.66 4 33 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 9 0 0 33 0 83.60 47 99.24 CHANGED MFSKT.LFLChSLAILlhVISSQADAREM.SKAAAPITQAMNSNNIoD.QKoGAGllRtl.Ghhh+hspsh...................CKhCpCp.tlCshC..Ct ..Mh.KsNlF.LChSL.IlLhlISSQssAREM.ScAuA.slTpuMsuNNhTp.pKsG...uulh+plsshhppssp.ssp.....................hhGpsCKhCssph..C.h.......... 0 0 0 0 +4361 PF04790 Sarcoglycan_1 sarcoglycan; Sarcoglycan complex subunit protein Waterfield DI, Finn RD anon Pfam-B_6135 (release 7.5) Family The dystrophin glycoprotein complex (DGC) is a membrane-spanning complex that links the interior cytoskeleton to the extracellular matrix in muscle.\ The sarcoglycan complex is a subcomplex within the DGC and is composed of several muscle-specific, transmembrane proteins (alpha-, beta-, gamma-, delta- and zeta-sarcoglycan). The sarcoglycans are asparagine-linked glycosylated proteins with single transmembrane domains. This family contains beta, gamma and delta members [1,2]. 27.40 27.40 27.80 29.50 26.80 27.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.62 0.70 -5.22 16 335 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 85 0 193 287 0 224.50 34 82.30 CHANGED laplGIaGWRK+ClYhhVLLLhllhVlNLsLTlWIlpVhpFs.cGMGsLclppc.Glpl.cGco-F..lpPlYspcIcuRpDpsLhlpS......spslolNs...RNtpGpls..s+lslusct..l.stsptFplpss.st+.LFosDpsphhht.tpLclssPpGu....lFt+uVpTstl+us.sspcL+LESsTRplshcAscGVplcAtAGtl-hpuppDlpLpSs.......cGplhL-A.pslhLs....+LPhups......ssGspps...hYclCVC.ssG+LFhussstsps....Cp .............a.hGlhGh+ppshahhllLLhllhllNLhlTlh............IhtV.....hphs...sGhsphcl...ppp...G..........lhh...c.G.o-h......l..PL.atpplp...uR.spsLhlpS.......spslslpt...hs..Gph.........spL..ls.pt..s.spsptFplpss..stphLFosDtpph........h..phspGs..................lh.pslpTstlp....up..s.p-LplcSssR.lhhcuscGVplputs..lchpst........t-lpLp.S..............-GplhLsu...tslhls....pLPpus....................sst.tpt................haclCsC..ssGpLahs.ssstp................................................................................................. 0 42 56 118 +4362 PF03343 SART-1 SART-1 family Mifsud W, Mistry J, Wood V anon Pfam-B_3690 (release 6.5) Family SART-1 is a protein involved in cell cycle arrest and pre-mRNA splicing [1][2]. It has been shown to be a component of U4/U6 x U5 tri-snRNP complex in human, Schizosaccharomyces pombe and Saccharomyces cerevisiae [3]. SART-1 is a known tumour antigen in a range of cancers recognised by T cells [1]. 20.50 20.50 20.70 20.50 20.00 20.40 hmmbuild -o /dev/null HMM SEED 613 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.63 0.70 -13.46 0.70 -6.17 38 393 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 272 3 287 395 7 455.60 25 80.12 CHANGED -uLSIEETNKlRhpLGLKPLslsss............................ssshcscputshcshpch........pcpscs++cc-ch..pppIpc...u+-ctp.pppKLts..psLu-t...........t-ss.DspuWlpcp+Kh..p+phpttctct.....................tpptpc+pps..spasspD...LsGlKVsHclcchpc..GcshlLTLKD.ssVL-s--t.....D.LpNlsLh-+c+tpcplch+K+p............p.sp...t.tppslLu.pYDE-lpsccpcp.........hph......cspupssct..................................pppccp......pspt+h+hphhsh-.....................spspp.sSDYhs.p-.lK..hKKsKK....................Kctpppp++phh......Dp.t..st..t..s.t.tttt...............cl-pps......................ss.hcpc.p.ps.shs...............DD-DL..QtpLsppR+hth+....c+pchcs.EclAcplcpppspsp........pcpp-.......................csulVls-TSEFlps..Lp........psshtcpttpp...................pcpspp.tsh.psppsptp.............tstssphpps.ss.pcc-........................t..tthssshl-cEsslspGLuusLphLKp+Gllc................ppppchhtppthhth..phptphpcct.c.tss+hsph....R-chh.chppcp+-ppc..............................tY+PcVcLcYlDEhGRphssKEAFKp.LSHpFHGKGsGKtKpEK+lKKlE-E+ .................................................................hSlE-sN+lRhpLGLpPl.h...........................................................t..t..t.p.h............................t.....pptpth...tttltt....+ptt..........htt..ttltc.....................sh..sWl.p.pph.....................................................ttt....ttts.........LtslpVtHthtph.p....ttp.lLTL+D....psl........pptp....................D..L.N.p......hhcppchpcpht.pppt......................t..........................t.....t.l.Lt...pYD-....p.ttpt.pt..............................tttt.h.....t..........................................................................................t.tt..t........t.tp.p..t..p.p.........................t.t.ss-h..hp...tc..h.p.....hKK.K..p.....................................+h...th.pp...................................................................................................t....................................................................................t..............................................--..t-h.....tt.Lttt+pht.h...........t........p...p.....tlh..p.lt.t.ttt.............................................................................tsslhhsthsEFsts.....lt...............t......t.....t...............................................tt..t....t.t.tt.............................t.thtt....p...tt.............................................................................................................h.ttE..hstGhuusLthhpp+uh.lp..........................................................ttt......t..ptp..h.........t.......t....p.pcppt....................................................................hpsplplpYhD-.G+.hs.KEAF+..LSHpFHGKssGK.K..hEK+h.K+hppc........................................................ 0 101 164 247 +4363 PF00269 SASP Small, acid-soluble spore proteins, alpha/beta type Finn RD anon Prosite Family \N 20.60 20.60 20.70 20.70 20.10 20.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.84 0.72 -4.11 66 1862 2009-01-15 18:05:59 2003-04-07 12:59:11 15 3 421 3 380 1047 9 56.40 40 83.13 CHANGED NchlVPp..AcpAL-ph................KhElApElG....V..thppt..G..-lTSRpsG.p....VGGpMVK+MlpttEppht .....................................Nphhl.tAppul-ph....................KaElApEhG...........................V.p..s..s.....t.....sh.o..uRpsG.u........VGGph....sK...RhlphAEppl.t....................... 1 177 310 333 +4364 PF04259 SASP_gamma Small, acid-soluble spore protein, gamma-type TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The SASP family is a family of small, glutamine and asparagine-rich peptides that store amino acids in the spores of Bacillus subtilis and related bacteria. 25.00 25.00 26.00 25.80 24.60 24.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.66 0.72 -3.85 7 317 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 241 0 29 139 0 75.60 64 86.98 CHANGED StTsAQpV+pQNtpSut............t.auTEFASETNsQpV+QQNtQSt.t.t.su.......utpspassEFASETssppV+pQNtpupApKppsS ..............K.ATSGASIQSTNAS...........YGTEFA.TETNVQAVKQANAQSEAKKAQASuA......QSANASYGTEFATETDVHuVKKQNApSAAKpSQSS........... 0 8 20 22 +4365 PF03898 TNV_CP Satellite_CP; Satellite tobacco necrosis virus coat protein Finn RD anon DOMO:DM04608; Family \N 27.20 27.20 28.60 306.80 21.10 27.10 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.30 0.71 -5.26 4 4 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 4 62 0 5 0 199.80 42 98.28 CHANGED MoK+Qsppps....+psstpsV+sIlpsphEpKRFsLlsssNsssTAGTVhNLSNsIIQGDDIsQRoGDpI+hhpphL+sRsTuITsSQo..F.RFIWF+DNpNRGTTPuVTEVLsSAshhSQYNPlThQQ+RFTlLpD.VpLsCSlsGcsIKcpshshstp.tlaYNGAsuVAuSNGPGAlFhL.IGDplsG..paDluhEhhYhDh MoK+Qsppps....+psstpsV+sIlpsphEpKRFsLlsssNsssTAGTVhNLSNsIIQGDDIsQRoGDpI+hhpphL+sRsTuITsSQo..F.RFIWF+DNpNRGTTPuVTEVLsSAshhSQYNPlThQQ+RFTlLpD.VpLsCSlsGcsIKcpshshstp.tlaYNGAsuVAuSNGPGAlFhL.IGDplsG..paDluhEhhYhDh 0 0 0 0 +4366 PF03110 SBP SBP domain Bateman A anon Pfam-B_737 (release 6.5) Domain SBP domains (for SQUAMOSA-pROMOTER BINDING PROTEIN) are found in plant proteins. It is a sequence specific DNA-binding domain [1]. Members of family probably function as transcription factors involved in the control of early flower development [1]. The domain contains 10 conserved cysteine and histidine residues that probably are zinc ligands. 21.60 21.60 22.20 22.70 20.90 21.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.08 0.72 -3.76 30 552 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 54 3 340 562 0 72.80 55 16.94 CHANGED pCQVEuCsuDLSsu.KpYHRRHKVCEhHoKushVl.luGlpQRFCQQCSRFH.LsEFDEuKRSCRRRLAGHNcRRRKsps- .................................CQV-GC.psDL.oss.K.cYHRRHK....VCEhHu..K..ust.V...l.luGh....p..p..RF.......CQQCS.............RFH.LsEFD.p......s......KRSCR+RLss.HNc....RRRKst..t..................... 1 93 251 302 +4367 PF00496 SBP_bac_5 Bacterial extracellular solute-binding proteins, family 5 Middle Finn RD, Yeats C anon PDBSum Domain The borders of this family are based on the PDBSum definitions of the domain edges for Swiss:P06202. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.29 0.70 -5.64 226 27055 2012-10-03 15:33:52 2003-04-07 12:59:11 17 39 4221 130 5660 20528 6027 361.60 19 67.34 CHANGED plhPtLAcs....................p...hssDspsasFpLRcsl+apDG.s......slTAcDVha....o.....hcchhssssssthhhhh.................................................htplpsscshslplphppP.ss.........hhhhhsthstthh.....................................................................tttttsthtppslG..oGPaplpp...ap....sp...plh.lp+.......Nss..Y...W.......................t.sps....tlcplphphl......csssthtth.psGclphh..hths.....ssshtth....ttptthph........................tts...shthhhlthNhp.....................pt...sh.sc..........hplRpAlshAlD+c.tlscth...................h.tshstsstsh.hssshshhtt.ht.......................................................hs....pcA+pLLpcAGhpss.......................................hhhhhsssssstp.phuphlppplpp..lG.......lclplpsh.t.........................................thh........psshshhhh..sasss.hssstshht.hhtssstst ..................................................................................................................................................................................................h.PtlAcp.h.................................................................p...h.s..p.D..s......p...s...a...s...Fp..L..R....c..s.s.....+....a....p...s.....G..p...................................sloA.cD....l..ha........o.................h..p...+...h..h..s....p.s.ss..thhhhh.........................................................................................................................................................hhs.lc...s.h..D...s..t.T.l...p.lpLppP...s...................h..hhth..l..u........s.h.hhh.....................................................................................................................tt.t.t...t...t..t..p..h.s.p...p.s...l.G..........oG.P.a.p.lpp....................apt...sp......plt..hp.+................N.ss........Y.....W.....................................................t..tps................t.lc.p.lph.phh.................-s....ss...thtt...h...ps..Gp....h...p....hh.......t.h..s..............sp.p..h..t.p.h.............pp.s.tthph...............................................................ts....sh.shh....h..l..th.Nhp....................................................ps....sh...ss............hplR.pAls..h..ul..c..+.........p...tl......s.p.t.l....................................................h....t..s.....t....s...p.....s......s....t......sh......hs....s....s...h..h..t..h..s..s.t.ht...............................................................................hs......pcA....+....p....l.....L....p....c....A....G..hp....s.......................................................................................hhh.h.h......t.....s.....p.....s.....s.....t....p....p....hu.......ph.........lp...p.p....hpp.....lG...........l.p.l.p.l...psh...ph..................................................tthhpt....hp.p.t..p.a..c..h..hhh.....s.a...s..ss.......ss..thh..h.t......t............................................................................................................................................................................................................. 0 1562 3337 4515 +4368 PF03480 SBP_bac_7 Bacterial extracellular solute-binding protein, family 7 Bateman A anon Pfam-B_808 (release 7.0) Family This family of proteins is involved in binding extracellular solutes for transport across the bacterial cytoplasmic membrane. This family includes Swiss:P37735, a C4-dicarboxylate-binding protein [1] and the sialic acid-binding protein SiaP. The structure of the SiaP receptor has revealed an overall topology similar to ATP binding cassette ESR (extracytoplasmic solute receptors) proteins [2]. Upon binding of sialic acid, SiaP undergoes domain closure about a hinge region and kinking of an alpha-helix hinge component [2]. 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.60 0.70 -5.30 35 6254 2012-10-03 15:33:52 2003-04-07 12:59:11 8 19 1778 54 1762 5551 7504 278.70 22 82.64 CHANGED hupss.ussssptpus.phFschlpEc.osGcl..plplaP..supLus-t.phlctlpsGs......l-hshsusuhhssh.sPphtl..hsLPFlF.........pstptsc+.hhsuthu..cpLhpphcpp.GlpsL..ua.apNGh+phos.sp+PlpsP-Dh+G..LKlRl.tushhhphhctlGAsPpshsauElYpuLpsGslDGpENshssl.hst+haEVQKYhohos..Hsh...sshhllhspshassLst-hpphlccAspEus.phtpchhpctspphhppltc.....sGhplh.hos.cppsa.pcuhpslac...c.atcp .............................................................................stsp.s.h.t.t.sh..pth.u.ch...l..p...c..t....osG.c.l..plc.l.as....supL..s.s..st...ph..h-..t..l..ps..Gs......l-.hs.h.s.s.s.s.h.h.s.sh..sP..t.hsl.......h..s..l..P.....a.l..h..........p.s..h...p.p.h..tp...h..h...s..u..t..hs...........ppl..h..p...p..h...pp....p....G...h..h.s..L.....uh...h..........s...s...G.h....c...p....h.....ts....t....p....+.....s.....l...p........s..s...s.....D.....l...+...G.....lK...l.R.s.......s....s.....h.h.hp..h....h.c.t.h....G.....A.....s.....P..ss.h..s.h.u.E.lYsALp.p.GslD.u.tE..s.s.h.s.sh..hshp.a..h.E..V..t..+..a.h.s.h..s.s.......ash......s.s..h.h.ll..hs..pp...h...a.ss...L..........s...s..........-............pphlpp.u..s....p.cus....p...h....t....p........p..h....h.............p....p....t.........p.p.....p..t................hp.......p.hpp.........tG.s..pl........h..s..p......p.h.p.sa....pc..s.s.p.s.h.hpp.....t......................................................................... 0 536 1184 1514 +4369 PF01297 TroA Lipoprotein_4; SBP_bac_9; Periplasmic solute binding protein family Finn RD, Bateman A anon Pfam-B_1416 (release 3.0) Family This family includes periplasmic solute binding proteins such as TroA that interacts with an ATP-binding cassette transport system in Treponema pallidum. 22.10 22.10 22.30 22.10 22.00 21.60 hmmbuild --amino -o /dev/null --hand HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.47 0.70 -5.14 141 7592 2012-10-06 15:37:50 2003-04-07 12:59:11 12 12 3969 42 1428 5388 2851 272.40 24 83.10 CHANGED Vlsoh.sltslscpl..uG..-.............t.spVpsll.ss.Gs-.PHsa..pssPpshpplpp...ADlllh..sG.hshE..s....a.lschh.........ttsphthl.......shsp........sl................................................................................phhtt..........................................................................................................ttttcpsp........................................................................................................................................................................................................DPH.lWhsPpsst.thspslsctLsch.......-Pppts...hYppNhppahpcLpplcpphpptls.sht.t........pphls.hHsuas..Yhscpa....GLp...huhh........thss..tpcsospcltplhctl+ppslpslFh-sphssc..hscpl...upcs.G..sp...l.......lhlcsl........................st......sYhphhpp....Nhpsltpul ........................................................................................................................Vlsohtsltshscpl......uG...-.................................p...splp....sl....l.ss....G.s-...s....HsY......cspsp....Dl...t.p....l.....pp...ADlll.a.sG..hshE.....s....a..hp+hh.................thp.ppp.hphl................tssc.....sl.......................................p..........................................................p...p.................................................................................................................................................................................................................................................................................................................................................................................................................................DPH..sWhssppuh..t.h.spsItct.L.sc.h.......DP.p.pps....t.Y.cpNhppahp.c.LppLcp.ph.pp...ph.s...sh.p........+th.l.s..pHs..Aas..Yhucpa................Glp......t..hshh..........slss...-pc....sospplpplh.ch.l..+.....c.p.....p..l.ps.....lFs...E.s..p.s.s.s..c.......sscsl.......uc.cs....u....sp...h............hhlssl....................ttt..t.tsp...sYhs.h.hcp.shpsltps.................................................................................................................. 0 431 876 1187 +4370 PF04405 ScdA_N Domain of Unknown function (DUF542) Yeats C anon Yeats C Family This domain is always found in conjunction with the HHE domain (Pfam:PF03794) at the N-terminus. 28.90 28.90 29.00 30.50 28.30 28.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.56 0.72 -4.58 70 1136 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 1115 0 170 543 24 56.10 44 24.94 CHANGED ts..pslG-lVsphspuuclFccasIDFCCGGptsLscAs.pcpsl-.stllpcLpslt ....+.spslGElshshP+A.oslFRpYclDaCCGGppoLtcAu.t++slDls.l.scLspL.t.......... 0 52 104 142 +4371 PF02667 SCFA_trans Short chain fatty acid transporter Bashton M, Bateman A anon COG2031 Family This family consists of two sequences annotated as short chain fatty acid transporters, however, there are no references giving details of experimental characterisation of this function. 20.00 20.00 20.10 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.63 0.70 -5.71 4 740 2012-10-02 15:12:49 2003-04-07 12:59:11 9 3 662 0 154 1076 192 418.50 44 98.19 CHANGED hlpRlophhsthVSKaLPDPLIFAhLLThVTFllshsLTspssls.........lVshWGsGFWsLLuFuMQMALIlVTGpALAous.Vp+lL+plASlsKsshpulhLVTFhu.IAshINWGFGLVVGAhFA+....ElARplKGsDYsLLIAsAYhGF.lTWtGGhSGShPLLsATPspslp+lhst..s.pthIPlspTlFSuYNlhIhshlllshPFlhhMhhPKpuElhuIDs......KLltcEhc.pcpls..cDsTlA-+LEpS+lLuhlIuhLGhuYLGhYFacpGF...lolNsVNhhFlhsGlLLHtoPhAYMRAIspAARSsAGILVQFPFYAGI.hMMcaSu..lGG.....lIophFhsVANccTFPlhTFaSuulINhhlPSGGGcWsIQGPhllPAuQALGsDlGKosMAIAaG-tWhNMhQPFWALPALuIAGLGsRDIMGYClosLIFsullhslGLhhl .......................................................................................................................................................................................ht+hsphhsphspRaLPDsal.FuhLLTllsh.l..hAh.hhs.s.psPh.p.........................hl.ph.WG......s...GFW...s..LLuFu.MQMALll..VTGasLAo.os..l++.lLp.p.h.A.p.hs..+ost.p.u.l.hh.Vo..hluhlush...l...NWGFGLVl..GAlhA+....El....A....R....+....l..c....s....s..D...Y.....LllAuAY.GF...lsWtu.G.lSuS.h......PLhhAT.s...G...p.......htchh....u....................lIPh.o-Tl..Fo.s..aslh..h.s..lsl..l.l.s..hP....h..ls.t.h..h....h.....P....c..s........p...c..s...l.s...l.Ds................pllt.c.....-s.s...h......p.....p..........h....s............................t......p..s...............s.P.uE.+LE.p.Sh.....l......L.......o...ll.l....u..h.L..G.l..s.Y.l..h..h....a..F.....p...p.G.hs.........l...s........L.N.........h.........V.....N...h..h...F....L.......h....h....G.l......L.......L......H.....t......T......P...h....s....Y.......hc.........A....ls....p...A....s...+.....o.s.u....G...I.......L.......lQF..P.F.Y.A.G...Ih.u.h..M.t.p.u.u....hs.G.................h......I...o....p..a...F....l..s.....l...A.....s.....c...c..T......F...P..l..h....s.....F..l.....S..u..u..l.l.NhFVPSGGGpWslQuPhllPAApsL.G....sc.........h.........u.........pssMAlAaG-uWsNhlQP...F....W..A..L..P..s..L..u..I.........A.........G..L...t....s....RD...I...M..Ga.....C....l....s...tL....l..hs.Ghlhslshhh............................................... 0 45 86 124 +4372 PF04486 SchA_CurD SchA/CurD like domain Kerrison ND, Bateman A anon DOMO:DM04327; Domain Members of this family have only been identified in species of the Streptomyces genus. Two family members are known to be part of gene clusters involved in the synthesis of polyketide-based spore pigments, homologous to clusters involved in the synthesis of polyketide antibiotics. The function of this protein is unknown, but it has been speculated to contain a NAD(P) binding site [1]. Many of these proteins contain two copies of this presumed domain. 24.20 24.20 24.40 45.70 24.00 24.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.21 0.71 -4.25 16 83 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 40 0 25 85 0 109.00 36 61.55 CHANGED h.RHALoYsl+PGststlAclLu.........hps.sAssDsss.llpTolFh+-shVVRll-VcGDh.s.hhtahu.p..s.tsEpAlsPhLcpsRchu-scuhhshhtcAAhsslppsssts ....RHALoasV+PGstttlAclLA..........thps.pA..tsDssohLhpoolFh+sshVVRhl-Vc....GD.L.t.hhtahu.p..hpssEtAls.hL..cp..sRchucspuhhthatcAuhsslpphs..s................................ 0 9 21 25 +4373 PF02630 SCO1-SenC SCO1/SenC Mian N, Bateman A anon COG1999 Family This family is involved in biogenesis of respiratory and photosynthetic systems. SCO1 (Swiss:P23833) is required for a post-translational step in the accumulation of subunits COXI and COXII of cytochrome c oxidase [1]. SenC (Swiss:Q52720) is required for optimal cytochrome c oxidase activity and maximal induction of genes encoding the light-harvesting and reaction centre complexes of R. capsulatus [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.88 0.71 -4.65 6 2856 2012-10-03 14:45:55 2003-04-07 12:59:11 9 21 1787 30 1082 3642 2070 164.20 24 74.44 CHANGED IuLllusGuthsaLh......LpTsKtspssph.....csplsGPFpLh-.pGc.Fsp-sLpGclSLlYFGFTpCPDICPstLc+lsshlcpLcpc.pIclQslFIolDPcRDTPcVLKcYlpsFcsuFlGLTGshcplKslsccaKVaasps.ssKssp-YhVsHSsFhYLIss-G+hlcta..shs ..............................................................hhhh................................................................................h....s...F....pL.h....s....p...s.G...p.......h...s.....p...s...h....c...G....+....h.hl...............l.a.FGa.TpC..P..D..l..CPs...p...lsp...h....s.ph...h..........c..........p..........l...sp............p......t.......t.......c...........l..p.sl....F....l....o..l...D....P..c......R.......D.....T....s.......p.h.L..p.......p....Y....s.....p...t...F.....s.....s.....p.....h.....h..u.....L...o...G....s..........p.........p.......l.....p...p.....h.....s.....c......p......a......p......l.....h.a....p.....p............s.......................................................................t......t................s...............Y.h....l...sH...........os......hhaLls...pGph.........th................................................................... 1 322 667 895 +4374 PF02036 SCP2 SCP-2 sterol transfer family Bateman A anon Pfam-B_1050 (Release 5.1) Family This domain is involved in binding sterols. It is found in the SCP2 protein Swiss:P22307, as well as the C terminus of Swiss:P51659 the enzyme estradiol 17 beta-dehydrogenase EC:1.1.1.62. The UNC-24 protein Swiss:Q17372 contains an SPFH domain Pfam:PF01145 [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.11 0.72 -3.72 174 3603 2012-10-02 14:08:01 2003-04-07 12:59:11 12 54 1915 19 1132 2747 690 100.00 20 46.14 CHANGED lsplhpt.......p..tttstlpplsu.......shphpl.psh.h.....shhlshp...ss.phpl........hst.t..psD..sslshsssshhplhsuc....sspphhhps+..LclcG.Dhtluhclpslhp .............................................................................................psthptl.pu...............hlplcl..pshsh....................phhlshp........st..plpV................httht...tpsD..solps..s.ss..s.....Lht.lh..stc.......ss.ssh.hhps+..Lc.l.c.G.Dh.pluhplpslh............................ 0 351 606 884 +4375 PF00375 SDF Sodium:dicarboxylate symporter family Finn RD anon Prosite Family \N 19.80 19.80 19.90 20.00 19.40 19.40 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.42 0.70 -5.71 135 10806 2009-01-15 18:05:59 2003-04-07 12:59:11 13 12 3666 24 2273 7267 1474 380.50 26 90.76 CHANGED L...hhplll..ulllGlllGh......................................h..hsphsshlp.hhGslFlphlphlllPL.lhsollsuluslts..scplG...+luhpslhhahhooslAshlG.lhluhlhpPG..........................stts..........ssststpstpssshhp....hlhshlPsN.......hhtuhs........ps....................................................................slLslllFullhGlulstlsp..cs........cslh.phhcshpclhh+llphlht.luPlGlhulhAthsup.hG.....hshlh.sluthllsshluhhlhhhllhslhhhhhsph.sPhch.h+thhsshlhAFuTsSSsATLPlslcssccph..........G..VscplusFllPLGsTlNhsGo.ula.ulsslFl...AphhGls.lo.hsphlhlllssslsSlGsAGVPGuulls.lshlLs..slG...lP.....h.pu....luLllul-hll.DhhRTslNVs.GDsssuhlls+h .............................................................................hhhplll.u.lllG.l.llGh......................................................................................................h.h..t.h..s..p..h..h.....p...lGsh.FlphlKMllhPl.lhs.ol.l.s..u...............lus......h.tp...........hpphG........+l.u.hh.sl.hh.a.h...h.s.ohl.AhhlG.ll.hu......lht.Pu....................................................................hh..ts.........................tt.s.s..h.s.s....t.s..p..s....s.l.hp........hl.h..s.h....l...P...s...N.......hhtuhs........ps............................................................................................................................................................................................................................................................................................................................................................................s.hlsl.lh..FulhhGlu....ltt....hspps....................pslh..phlpsh..sphhh.p.l.l.p.h.l.h................p...hAPlG.l..hulhutslup..hG...........hs.slh..sl.u.phlls.hh.hshll.h.hhll.hs.l.l.h.h...h.s..t.h..s.............sh.p.h...h+t.l...t...ps.hl.....hAas.T...pS.....S...tu...slPhshc...phcc..h......................................G..ls.csls...uhs.lPhGho.hNhsGs.ula.sh.sslhl.......Ap.s..h.........G..l..s...l....s....hsp....h.l.hl..l........l........lh.hl.sSh....G.............s.A.GVsG.........uuhls..lshsLs....shG.......lP..............h...ps.......lullhu....l-h.....lh..D.sRTs....lNlsGss.lsshllup........................................................................... 1 579 1152 1742 +4376 PF02982 Scytalone_dh Scytalone_DH; Scytalone dehydratase Griffiths-Jones SR anon Structural domain Domain Scytalone dehydratases are structurally related to the NTF2 family (see Pfam:PF02136). 20.20 20.20 20.30 20.40 19.80 20.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.20 0.71 -4.65 3 125 2012-10-03 02:27:23 2003-04-07 12:59:11 9 3 89 20 62 135 1 145.70 58 84.45 CHANGED sITFcDYLGLpssLFEWADSYDSKDWDRLRKlIAPTLRIDYRSFLDKlWEAMPAEEFlAMISDKsVLGDPTLKTQHFIGGSRWEKVSDTEVIGHHQLRVPHQ+YTDoThpEVslKGHAHSsNhHWYRKVDGVWKFAGLKP-IRWuEYDFDcVFcDGR-Sa ......................sap-hhuhppssaEWADSYDoK..........DWDRLR+CIAPTL+.........lDYRSF.LsKh..W..EAMPA-EFlAMhSDssVLGNPLLKTQHFl.Gu.o+WE+lSDsEllGaHQLRVsHQ.+Y.......TDso.....hsp..VsV..K......GHAHShNpHaY+Kl-GlWKFAGltPtl.a.phph.............................................. 0 7 26 50 +4377 PF03313 SDH_alpha Serine dehydratase alpha chain Bateman A anon Bateman A Family L-serine dehydratase (EC:4.2.1.13) is a found as a heterodimer of alpha and beta chain or as a fusion of the two chains in a single protein. This enzyme catalyses the deamination of serine to form pyruvate. This enzyme is part of the gluconeogenesis pathway. 22.20 22.20 22.20 22.60 21.90 22.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.76 0.70 -5.37 152 6294 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 3519 0 918 3774 323 283.70 38 69.93 CHANGED pcp.slsluphhhcNEhs.htssp.p.lttthsphhssMtssscpGL.....ps.pGlhsGslphh+chh.........................................................................................................................ph..tppL.t.................................tt.................................hthhshshhhAhAssEtNAuGGplVssPTsGu.....uGllPuVlhhh.cchph.................................................s-.-plhchLhsuuslGhllKpsAoluGAtsGCQuElGsAsuMAAAulstlhGGoscQlppAuphulpphLGLsCDPluGhVplPClcRNAhuAspAlsuAphA.hts.s.t..ptIshDcVlcoMtpsGpshssth+ETupGG......LAhs ....................................................................................................................................................................................t.p.shslutlhhpNEhs.ht....spp......c...lttthsphhpsM...p...s...sl...c+...Gh.........ps..c.G.lh.s.Gs.l.p.h.h.R+hh.......................................................................................................................................tl...pctL......................................................t..ht.p.c................................................................s...htshs.hlshaAlAVsE...p..N.Au.G.G..pl.VsuP..TsGu...........sGllPuVLhhh...c+at.h.....................................................sc...-phh.+aLhsA..u..AlGhlhK...pNASISGAEsGC.......QuE........VGsAsuMAAAGlstl.h.G.............G.o...............Pp..Q..l....s.AupluhcctLGLsCDPVuG.V.plPClcRNAhu.AspAlsAAcMA..lpt....s.st...stl..s.l.DcVIcoMhpsG+sMs.spa+ETucGGLAh.h....................................................................................... 0 290 558 751 +4378 PF03315 SDH_beta Serine dehydratase beta chain Bateman A anon Bateman A Family L-serine dehydratase (EC:4.2.1.13) is a found as a heterodimer of alpha and beta chain or as a fusion of the two chains in a single protein. This enzyme catalyses the deamination of serine to form pyruvate. This enzyme is part of the gluconeogenesis pathway. 21.40 21.40 23.00 22.00 18.90 21.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.13 0.71 -4.17 190 5230 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 3484 3 788 3051 241 140.80 39 37.95 CHANGED SlF.Dlap.IGIGPSSSHTsGPM+AAphFhpt.lt.............................p..........psp+lplpL..aGSLAhTG+GHuTDpAllhGLhGhpP-slchc.............th..hhtthttpt.l.h...............s......ptlpFs.p.pslhachcphh.s....hHs..Nuhphp..........Ah...sssthlhppsaYSlGGGFl......hs..p...pttst ..............SlF.Dlap.lsIGPSSSHTsGPM+AGptFhst.Lt...............................................p............pss+..lpl..cl..YGSLuhTG+GHsTDhAllhGLhG.pPpslclc..............h..hht.h.tptpl.h.......................tt.......cpl.pas..hp..p..s..l..h..a..c......p..p.l...s.......hH....NuMplp.............Ah.....tss..p.....h....l...hp.p...TaaSlGGG.FIhscc...t........................................................... 1 238 475 648 +4379 PF01127 Sdh_cyt Succinate dehydrogenase/Fumarate reductase transmembrane subunit Finn RD, Bateman A, Griffiths-Jones SR anon Prosite & Structural domain Family This family includes a transmembrane protein from both the Succinate dehydrogenase and Fumarate reductase complexes. 25.20 25.20 25.30 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.95 0.71 -4.28 125 5134 2012-10-03 07:11:12 2003-04-07 12:59:11 17 5 2968 88 1395 3246 2736 114.00 19 77.37 CHANGED hphp+P...........hs.ph......thhp.....p..hsshh.lhpRloGlsLhh.hhhhlhh........h.h.hhhht.sst.sasth.........tshhs..........hhhlh.hhhhhshhaHhhsGl+pllh..D...h.hh......pss........ttps.hhhl.shshhhhhhh .................................h.........................ht..............ssh.t...l.hpRl.oGlllhl......hhhlhh................h.h.....hh...hs...sth...s...apth.................tshhss........h.hht.hhh..h.hhlhulhaHshsGlp..pllh.....D...h...h.........cst.............thph...hlhh...shslhlslh.s....................................... 0 374 831 1144 +4380 PF02810 SEC-C SEC-C motif Aravind L anon Aravind L Family The SEC-C motif found in the C-terminus of the SecA protein, in the middle of some SWI2 ATPases and also solo in several proteins. The motif is predicted to chelate zinc with the CXC and C[HC] pairs that constitute the most conserved feature of the motif. It is predicted to be a potential nucleic acid binding domain. 20.40 20.40 20.60 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -6.81 0.72 -4.38 183 7596 2009-09-11 12:15:01 2003-04-07 12:59:11 10 82 3806 5 1631 5164 1418 20.10 67 3.86 CHANGED RN-sCPCGSG+KYKcC.Cttht .....RNDPCPCGSGKKYKpC.pGp..t..... 1 545 1051 1369 +4381 PF00995 Sec1 Sec1 family Bateman A, Griffiths-Jones SR anon Pfam-B_530 (release 3.0) Family \N 20.20 20.20 20.40 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 564 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -13.08 0.70 -5.88 150 2115 2009-01-15 18:05:59 2003-04-07 12:59:11 18 29 328 14 1402 2034 44 467.10 18 86.95 CHANGED hKlLll..Dptstslluhshphs-Lhpps.Vslh...................tplcs..........................pR...............psh.shpslahlpPs.tcslctl.hp-...................lpp........................................spYppaalhFss..................shs..cshhcpLupss.............shptlpplp-....hhlcals...l-sslF.....oL.....p..hs.............................psa....tthhs..............................................................................................pttppslp....phupuLhsl.hhohsp...........hPhIRhpts....................................................hucpluptltphlpct.........p.htttpps.t.........................................sl..LlIlDRshDhloPLlppaTYQuhla-llslpp...splpl..........psst.ttp............................+.phhLss..pDthasphpttpasclsp...plpphlpcap..p.p..............................ppppttshs.clpphlpp.lPphp.cppsplshHhslssplhpplppcp...Ltc.hhclEQslss.ssststp.....hhp.lhch..l.............ssp.....h...-+LRLlllahlp.....t.t..spphpphpchlpps..t.hs.pph.....thlpplpphsthht..p.................................t..............t..pp.tht.h.......thhpth.pt..p........................ssp.hhsp.ap.......................Phlppll-sl...........................hpsph..sppashhssps.................................t.t...tt........................................................pph....pcllVFllGGsTYsEhpslt.plspptt.................................h.clllGuTsll...........ssppFlp.pl .........................................................................................................................................................................KhLlh..Dp.........hht...ls...h.h..p.....ptlh.p.....t..l.h.h......................lpp.................................pp.....................t.h..ph.....slahl.p.......P....p...............ps.....l..phl..hpc.....................................................hpp..................................................................................hhpt.h..alh....Fss....................................hs..t......h..h...p.l....tptt...........................t.l..t.t.l...-............h.h.sa..hs....h-splh...sh...............p..................................s.h......th...................................................................................................t.t.t..lp.....thsptlhsl.....h.s.h.t....................hP.h.l..ph.tt.....................................................sp..hlsp..lt.p.h.hpp.....................h...p.p.......................................................................s....LlllDRshD.hoslhpp....hTYpuhhp-l......h...s....l....p......t......s..t...hph........ps...t.t.............................................................c.ph.Lst...pD.ha..p..tt.ph.spl..st...tlpp.hpphp....tp..............................................tptt.tsht....chpphl....pp.hPphp.p..tt...t..hs.h..Hh...slspt......h..ph.hp.....t.pt.................h......p...hhphE.....pplh....t.s........ptt.....................p.....h.ch.l..........................................................stt.h.s..stl.Rl.h.h.l.h.hh.......t....t.......t.ptht..p...l....ps...............hs.......pth..............hlpphtt..hsh.h.t...................................................................................tp..ht.....................t...tt..p...........................................t..p.........h..st..at.......................shltplhpph.....................................................hpt.t..........ppt.a..h.h..ps............................................................................................................................................................................................tpth.phllFhl...............GGsoh.tEh..tsh.h..hspt.t............................................hclllusoplhsspphlpt................................................................................ 0 535 782 1152 +4382 PF03908 Sec20 Sec20 Wood V, Griffiths-Jones SR anon Pfam_B-21631 (7.2) Family Sec20 is a membrane glycoprotein associated with secretory pathway. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.89 0.72 -4.28 6 380 2012-10-01 21:14:52 2003-04-07 12:59:11 8 5 278 0 245 459 5 90.80 25 29.24 CHANGED SpspplTcuLhshpphhspslppSs.slpsLssSTpsLpphs-capphpsllppo++LlKphp+p-psD+hllaluhuhFlhsVsYlVhKRI .....................t..spplTp.uLtcspphhspplppS.p.shpsLp..........pSopslpphspcap.s......h.ss..............hlppu+pLlpphtRpphoD....+....h.l....l....h.hu....h..h.hhls..s.lhallh+Rl........... 0 69 131 200 +4383 PF03911 Sec61_beta Sec61beta family Yeats C anon [1] Family This family consists of homologues of Sec61beta - a component of the Sec61/SecYEG protein secretory system. The domain is found in eukaryotes and archaea and is possibly homologous to the bacterial SecG. It consists of a single putative transmembrane helix, preceded by a short stretch containing various charged residues; this arrangement may help determine orientation in the cell membrane [1]. 19.20 19.20 20.20 20.70 18.90 18.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.80 0.72 -4.15 33 504 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 411 11 343 453 48 40.20 40 41.61 CHANGED ouuull+aYs-.-sp.GlKlsPhsVlhhSlsaIshVllLHlhu ...........uushl+aYT-.-us.GlKlsPhsV.Llh.SlsFIssVhhLHlhu........... 1 104 195 283 +4384 PF03839 Sec62 Translocation protein Sec62 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.40 22.40 22.40 22.50 22.10 22.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.72 0.70 -4.94 6 341 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 280 0 226 348 2 199.00 29 55.86 CHANGED sK+phFFRsK+ll+hLpstchKppKsKspsc.s........K...psp-cppchhKphhpss.s.tscK.....l.sppttp.cKKc.pKclc.Lpl.pcsQhFsD.s-hYVWlY-PlPhpsalhGllhllull.AhsLFPLWPhhhRpGVYYLSlGuhGhlushhslAIlRhILFl...IlaslshG+.thWlFPNLhtDVGFl-SFpPLYsachp.s.p+sphKKccKsKpKKKcKss .................................................................................h....ha+hh.+h..h..hh.p.th.tttt....p...t.........................t......pt.p....h..p.p.....h...t.......h.....hp..h.tsp+......................tp........t..p.t.t.c..c......p+..ph+....L.ch.....pt..cQhhh..D...s..t.hY.VWlY.-.s.s....p.hhphlhuhlh..llull...AssLFPLWPhhhRhGV....a.YLSlushuhluhhhslAl...............hRhIlFh...lhahhs...h.thWlhPN.Lh.p.D.l.GFh-SFpPlasap.p.......pt.p...Kp.p.t+t..........s................................. 0 76 124 185 +4385 PF01369 Sec7 Sec7 domain Bateman A anon Pfam-B_1629 (release 3.0) Domain The Sec7 domain is a guanine-nucleotide-exchange-factor (GEF) for the Pfam:PF00025 family [2]. 21.10 21.10 21.20 21.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.26 0.71 -4.63 140 2266 2009-01-15 18:05:59 2003-04-07 12:59:11 15 40 354 21 1387 2049 18 181.20 34 16.19 CHANGED p........pth.................c...h.........pshptFs......pp..scculphlhppsh.h..................ttpsspp....lA..........pFL.t.psss...........ls+ptlG-aLuc...tpshs.................................hplLctalch.acFp..................shslspALRphLppFcLP....GEuQpI-RllcpFu....................................p+Yh..........................................................................................................................................................................p.sN.........s.........................................sh.atss..Dssal...LuaullMLNTDLHNs.p....l+.p+........................MohpcFlcNsc...........................uhs.........supc.hsp-hLpplYc.........sIpppcl ....................................................................t........h.....uhphFN.p.....Pc.......cGlphL.ppshl..............................................tpss.pp.lA..............pFL..h..ppcs...............Ls..+..phlG-aLGc......ppphs..............................................................hpVLc.s.al.ch....a-Fs..................s.h.p.lspAL.R...............pFL..t..p..F+LP......G.E.u.Q.c....I-Rl..h-.tFu...............................................pR.Yh...................................................................................................................................................................................................................p...CN...Pt....................................................................................................................................hFtss....Dssal....LuaulI.hLNTDlHss..s.........l..+...cK.........................M.ohccFlcN.+.................................................Gl.s........sG...p....D..lsc-.......hL..........pslYppIppp.h..................................................................................................................................... 0 457 692 1048 +4386 PF01043 SecA_PP_bind SecA_protein; SecA; SecA preprotein cross-linking domain Finn RD anon Pfam-B_507 (release 3.0) Domain The SecA ATPase is involved in the insertion and retraction of preproteins through the plasma membrane. This domain has been found to cross-link to preproteins, thought to indicate a role in preprotein binding. The pre-protein cross-linking domain is comprised of two sub domains that are inserted within the ATPase domain [1]. 23.00 23.00 23.20 23.00 22.70 22.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.58 0.71 -3.72 120 5736 2009-01-15 18:05:59 2003-04-07 12:59:11 15 29 4782 29 1118 4232 2582 118.40 41 14.40 CHANGED s.scp...ssphYppssplsppLpc..s.....................DYplDEKs+sltLT-pG...hp+sEchh.....hl.......................................ssLYsspsh.phh+alppAL+A+tLFp+Dh-YlV.c.Ds....c.....VlIVDEFTGRlMtGRRaS-GLHQAIE .....................................................s.ppsophYtpsscll..p.L.p.c..p..........................-YplDEKs+sltLT.EpGl.pc.sE.chh......tl...................................................................-sLY.s.s...p.Nh.....sLh..Ha..lspAL......+A+hLap+DhDYlV......p.....-.G..........E.............VlIVD.E.aTGRhM.t......G.RRaS-GLHQAlE......... 0 393 750 958 +4387 PF02556 SecB Preprotein translocase subunit SecB Bashton M, Bateman A anon COGs Family This family consists of preprotein translocase subunit SecB. SecB is required for the normal export of envelope proteins out of the cell cytoplasm [1]. 20.80 20.80 21.50 21.30 20.20 19.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.78 0.71 -4.61 114 1987 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1915 16 404 1065 1207 142.90 37 93.68 CHANGED Mu-ppp..................tPp.hslpphYlKDlSFEsPsuPplF..tpphpPclslplsssuppl...............s..-shaEVsLplslsu+hp..pp.............ssFlsElp.AGlFpI........p.slsp.-plp.hLtltCPslLFPasRchluclspcGGFPPLhLsPlsFsALYppp...h......tppps ..................................................................................p.tFpIp+lYsKD..lSFE.sPsuPplF.....ppch...pPclp..lsl.sstup.pL...............u...-sha.E..VlLpl..oV..o..u.p.s.-c.............ssFlsEV..p.Q.uG..IF.s.I.........t..s.lps.p.ph..sph..LushCPsILFPYARcsIoshls.+.Gs.F.P.tL.L.s.PlNF-ALahphhpppt.t............. 1 112 227 308 +4388 PF02355 SecD_SecF Protein export membrane protein Bashton M, Bateman A anon Pfam-B_844 (release 5.2) Family This family consists of various prokaryotic SecD and SecF protein export membrane proteins. This SecD and SecF proteins are part of the multimeric protein export complex comprising SecA, D, E, F, G, Y, and YajC [1]. SecD and SecF are required to maintain a proton motive force [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.90 0.71 -5.10 19 8057 2012-10-02 18:57:54 2003-04-07 12:59:11 11 21 3723 9 1953 12076 6096 184.70 28 42.02 CHANGED thpshhcshss.shphhphchVGPsluppLtppulhAlhhAhlhIhlYlslRFch.hAhuAllA.LhHDlllslGhhulht.....l-lshssluALLTllGYSlNDTlllFDRlREs...hp+.pptshpclhshulspTLoRTlhTohT.....sLlsllsLhlhGu.....sslpsFuhshllGllsGTYSSlalAssllhhhtpc .............................................................h..hhh..............h.phhp.tcsV....GPs..l.G......p.-..h...h....p...p....u....l....h....A...h.....l...l.u...l....l....h..l..h...l..a...h..h.h....h...a.....c.....h.....t.....h.........u....l..s...A....l..lA....L.......h..t..s..l...l..l..h...l..u....l...h...u....l..h..t................hpl...s..L..s..s..l...A..u....l..lh..s.l..Gh..u..l..ss.sl...l....l...a-...R.I...REp........................l..+...p...................s....p....s...h.....t......p....s.l.s..p....u...h...s...p...s...h....s...p....h..l...s.o....s....l.T........T...l..l...s.s..l....s....L...a...h....h....Gs.............us.l...+..G......F....A...l.....s....l.hl..G..l.l....s..u.h....a.o.ulhlupslh.hh...hh......................................................... 0 650 1298 1669 +4389 PF00584 SecE SecE/Sec61-gamma subunits of protein translocation complex Birney E anon Swissprot Family SecE is part of the SecYEG complex in bacteria which translocates proteins from the cytoplasm. In eukaryotes the complex, made from Sec61-gamma and Sec61-alpha translocates protein from the cytoplasm to the ER. Archaea have a similar complex. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.50 0.72 -4.39 185 4664 2009-01-15 18:05:59 2003-04-07 12:59:11 15 11 4540 10 1213 2641 1958 56.60 28 61.98 CHANGED phhp.FhcpspsEl+.KVsWPo+cEshpsThsVllhlllhulhlhhlD.hhhthll.phlh ............h..hp.Fh+pstpEl+.KVsWP.........o+cEhhpsTlhVhshshlhu....lhlas.lD.hllstllphl............. 0 411 783 1027 +4390 PF03840 SecG Preprotein translocase SecG subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.50 21.50 21.70 21.60 21.20 21.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.46 0.72 -4.24 183 4339 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 4294 4 929 2471 1834 73.50 31 72.34 CHANGED hsllhllhlllulhLlslVLlQpuKG.uuluush.GGGu..stolFGup.sutsh...Ls+hTslluslFhlh.ulsLuhlsp .......phLlllhlllulhllhllLlQpu.Ku.ushuusF..uuGu.....ptslFGsp..tut..sh....LsR.hTs.l.Lu.slFhlh.slsLuhl........................... 0 319 620 789 +4391 PF04856 Securin Securin sister-chromatid separation inhibitor Mifsud W anon Pfam-B_4643 (release 7.6) Family Securin is also known as pituitary tumour-transforming gene product. Over-expression of securin is associated with a number of tumours, and it has been proposed that this may be due to erroneous chromatid separation leading to chromosome gain or loss [1]. 20.30 20.30 23.60 33.30 18.90 18.20 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.90 0.70 -4.65 8 108 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 70 0 48 106 0 208.00 36 95.25 CHANGED hssllhss+EN......usPsstlppscuhh.............................................+.PLuStsp..s+opu................................................tssshpL.+h...Gplhs.shslspsspKuLsshD....pslpsKsst..........pscpsshsscshs.puKKlpuu................................................schshEhhPchp+.hPapP.GaE..uFD........s.-cplp+LsLps..........sPht.....h.hhs--ptptp.hpl.s.sP.......................Lc.ssls.cus.............................................shsuLssl-l ..................................................MssllassKEN..........tpPu.pp.lsspculh.................................................Luotss......hKuhs....................................................tpsplshs+h...GKshss..s.....u.....ls.KssRKuLGsVs....tpsscspssh..................................pp+psshss.cc..hocpss..Kspou.............................................................sssss-saPEIEc..hhPasPl..sFE..oFD.......hPtEcpIu+LsLss..........lPLh.......h.pE-.c..c..pc...phss..P.......................lch..sp.s.E.us..........................................................hhps.............................................................. 0 6 13 23 +4392 PF00344 SecY secY; SecY translocase Finn RD anon Prosite Family \N 22.20 22.20 22.30 22.20 21.80 21.70 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.15 0.70 -5.52 157 6369 2009-01-15 18:05:59 2003-04-07 12:59:11 15 14 5237 19 1533 4121 3873 333.00 34 79.26 CHANGED olhuLGIhPaIoASIIhQLL...................shspLtclppp.GcpGRc+lsphoRhhollluhlQuhuhshhhtths.........................hh..lhhll..pLssGohhlhWLuEhIoc.hGl.G.NGlSLlIhsuIl.sslsts................................lh...phhphh......................t..hshhh.........h........lhhllhhlhl.lhhllalppup++IPlpas+p..............tsp..psa.lPlKlNhuGVlPlIFAsul..lhhPt...hluphh......pt......................................t........hhtp.lshhh...................................hpshhY.hhlahshhlhFuaFas.shshsPc-lAcsl++pGsaIPGlRPG.csTpcaLs+lls+lohhGul.aluhlullP..pllsshht..........................h..huG.TulL....IhVu....lsl-...hhp ..............................................SIFALGIhPYIo......ASIIhQLLp.................slhPpltc.hpKp.G-tGR+KlsphTRY.hTllLuhlQuhuhsh...shsshssh.t.......................................shh..lhhslhLssGohhlh.......WLGEpITc.+Gl.G.NGlSllIFuGIluslPts............................................lh.p.hhpthh.........................................p..s..t..h.shlh.......h.........lhlll.hhlhl.hhhllalp........pupR+I.P.lpYu++................htsp..soalPLKlNhA..GVIPlI....FASSl..lhhPt....sls..pah..sss.................................................t....hh.t.p....lst...hh...........................................s.spsla...hllYsshIlhFsaF...Ys..slt.hNPc..-hA-.....NLK....Kp.Gual..PGlRPG.cpTtcYlscllsRlTh.hGul.alshls.llP...h.htthhs..........................shh...hGG.TS.LL....IlV.sVsh-hh....................................................................... 0 523 976 1295 +4393 PF04628 Sedlin_N Sedlin, N-terminal conserved region Mifsud W anon Pfam-B_5308 (release 7.5) Family Mutations in this protein are associated with the X-linked spondyloepiphyseal dysplasia tarda syndrome (OMIM:313400) [1]. This family represents an N-terminal conserved region. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.85 0.71 -4.29 32 687 2012-10-04 00:47:01 2003-04-07 12:59:11 8 10 305 4 469 992 6 133.60 27 84.69 CHANGED IlGp.....pDsPLaph-Fssst..ps...........................................ppL...tpFlsHuuLDll--hhap..................................ssshaLttlDpa....pphhl.oualTsuplKFlllap..................................................shs-ssl+pFFp-la-hYlKhlhNPFY..........................p.ss............sIp.Ss.....sF-p+lppluc+ ............................................................................................................................lluppspPla.hphss.tp..p.t.t....................................................................................t.l....palsHuuLDll--thht.......................................................................ssshaLt...........hl....p.ph.......pp..h..hl...u...a.l..........T.s......o.......p.......l......+..Flllhc..............................................................................shp-...ssl.+...s......h.Fpcla.ph.Yl.c.h.h.h.NPFY............................p.s..ss.................Ip.S.......tFcpplp.hh............................................................ 1 166 259 386 +4394 PF00477 LEA_5 seed_protein; Small hydrophilic plant seed protein Finn RD anon Prosite Family \N 20.60 20.60 21.10 20.80 20.50 20.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.65 0.72 -3.78 5 271 2012-10-01 20:07:30 2003-04-07 12:59:11 12 10 102 0 87 294 3 71.90 39 87.44 CHANGED MASG.........QEcREELDcRAKQGETVVPGGTGGKSLEAQEHLAEGRS+GGQTRKEQLGoEGYpEhGoKGGpTR+EQhGpEGYpEMGRKGGLSTpDcSGuERAA-EGI-IDESKF ....................................................................................Eu..tp....+GGpsppcphGcEhYpEhGpKGGp.sptcphsp-hYpEhG+KGG......u..s.p.p................................................... 0 20 59 77 +4395 PF03841 SelA L-seryl-tRNA selenium transferase TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.03 0.70 -5.72 11 2029 2012-10-02 18:26:03 2003-04-07 12:59:11 8 7 1584 2 363 1893 697 327.50 37 78.46 CHANGED VlNhTGsVlHTNLGRAlhu....-cAhpAshpuhpc.ssLEaDL-sGKRGsR.salpcLLpcLTGAEsuhVVNNNAAAVLLsLsolApGKEVIlSRGELVEIGGuFRIPDlMcpuGs+LhEVGTTNRTHl+DYcpAIspNTAhLhKVHoSNYplpGFTppVsht-LstLu+EhslPlhpDLGSGsLlDLspYGlst.EPTVp-tlupGsDlVoFSGDKLLGGPQAGIIVG+K-hI-+lpppPLpRALRlDKlsLAuLEATL+LYlpP-+htc+lPTL+hLopshctl+tpApRLpthLtstLu.t.hpVpltsuhuplGuGShPhpcLsShslolpscps......slsuLptthRths.PlIGRlc-stlhLDlRoLt .............................................................................................................VhNhoGsllHTNLGRu..s....ttshpshtpshpt.ssl.E.h......sL.p......p..u......t......R.......u..p.R......p.....h.....l.t.p......h.........l......p.p........ls.u.....A...E.......s.......A....h.......lV.N.......NN...A..A.A.V............h....L.........h.l....s.................s.h....u................p.s................+................E....VVl........S..........R........GELV.EIGGu.....F..R..l....s..-.l.h.p.t...........u.....Gs..pL......h..E..V..G........o......o..N....+...T...+...h..p..DY...c.....pAI......s.E...p..T.Ah..L..h+V...H.SNaslpGF..pptl.sht-ls.t.l...u......+c...........t.............s.........lPllsD.lGS.G...h....c.h.........t.hh..h.....Es.t.h.p.phl.tsGsDLVsFSGDKll...GG....P...Q...A...G.lIl.GK....K.p.....hI..s....p.....l.....p......p.....p......s......L....t..RAl....R.lsKh.o.Lu....uLpss.L...chY.l....p........p..t........p.plss..hph..Lp.ts....t.lt.tA.p.l....lt..................................th..s.u..lGuGs.hP....l.shhh.h......................tht..h.+........sllsRh.pt.hhh-hpsl.t.................................................................................................. 2 127 232 306 +4397 PF04593 SelP_C Selenoprotein P, C terminal region Kerrison ND anon DOMO:DM04433; Family SelP is the only known eukaryotic selenoprotein that contains multiple selenocysteine (Sec) residues, and accounts for more than 50% of the selenium content of rat and human plasma [1]. It is thought to be glycosylated [2]. SelP may have antioxidant properties. It can attach to epithelial cells, and may protect vascular endothelial cells against peroxynitrite toxicity [1]. The high selenium content of SelP suggests that it may be involved in selenium intercellular transport or storage [2]. The promoter structure of bovine SelP suggest that it may be involved in countering heavy metal intoxication, and may also have a developmental function [3]. The N terminal region always contains one Sec residue, and this is separated from the C terminal region (9-16 sec residues) by a histidine-rich sequence [2]. The large number of Sec residues in the C-terminal portion of SelP suggest CC that it may be involved in selenium transport or storage. However, it is also possible that this region has a redox function [2]. 19.20 19.20 20.70 49.00 18.00 17.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.37 0.71 -4.36 4 16 2009-09-13 02:32:08 2003-04-07 12:59:11 9 1 10 0 6 37 0 124.80 54 34.28 CHANGED GQHRQGH.EssDhs.ASEuLQ.SLsQ+KLUR+tCINQLLCKLsc-SEuAsSSCCCHCRHLIFEKoGSAITUQCsENLPSLCSUQGLhAEEpVhESCQsR.PPAAUQ..uQplsPTEASssUSUcNpsKKUKUsSN ............................G.pRpGH.-spDhs.uSEslQ....Q+KLsRK..RCINQLLCKhscDScsA.uSCCCHCRHLlF.Ec.oG.SAlTUQCsENLPSLCSUQGLhAEEN.lhESsQsR.hPPAAUph.SQ.hsPsEA.sssUuU+ppsthscs..N. 1 1 2 2 +4398 PF04592 SelP_N Selenoprotein P, N terminal region Kerrison ND anon DOMO:DM04433; Family SelP is the only known eukaryotic selenoprotein that contains multiple selenocysteine (Sec) residues, and accounts for more than 50% of the selenium content of rat and human plasma [1]. It is thought to be glycosylated [2]. SelP may have antioxidant properties. It can attach to epithelial cells, and may protect vascular endothelial cells against peroxynitrite toxicity [1]. The high selenium content of SelP suggests that it may be involved in selenium intercellular transport or storage [2]. The promoter structure of bovine SelP suggest that it may be involved in countering heavy metal intoxication, and may also have a developmental function [3]. The N-terminal region of SelP can exist independently of the C terminal region. Zebrafish selenoprotein Pb (Swiss:Q98SV0) lacks the C terminal Sec-rich region, and a protein encoded by the rat SelP gene and lacking this region has also been reported [2]. N-terminal region contains a conserved SecxxCys motif, which is similar to the CysxxCys found in thioredoxins. It is speculated that the N terminal region may adopt a thioredoxin fold and catalyse redox reactions [2]. The N-terminal region also contains a His-rich region, which is thought to mediate heparin binding. Binding to heparan proteoglycans could account for the membrane binding properties of SelP [1]. The function of the bacterial members of this family is uncharcterised. 22.90 22.90 23.40 23.30 22.80 22.80 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.84 0.70 -4.98 6 83 2012-10-03 14:45:55 2003-04-07 12:59:11 9 4 48 0 49 89 1 177.60 34 63.13 CHANGED QspSShCK.PPpWpIcDpsPMhNuhGpVTVVALLQASUahCLlQASRLpDLRlKLcppGYoNISYhVVNcQu.pSphhascLKc+luEcIPVYQQ-.sQsDVWpLLNGsKDDFLIYDRCGRLsYHLuLPaSFLoFPYVE-AIKtsYCEchCGNCSLso.ps.-.CKssTh...........tsssKssEsp.....sct.HsH.....Ht++HsHsHcH.s.....ssphpcs.pss.sstsp.Ps..uh.HHHH+H+ ...........................................................................hs.h..............h..phtsLp.+Ltpp....Gh.slpahlVN....pps...Sp............h............ha..Lpp.......pss......tl.......sVYp...Qp.tpsDlWphLsGs...KDDFLIYD..RCGRLs..YHlsLPaS....hL...pasYVEtAI+hsYpcphCGsCo...hp.s............t...s......p.spth......................t.pps..p............t.....p....................................................................................................................................... 0 10 14 34 +4399 PF01641 SelR DUF25; SelR domain Bateman A, Enwright A anon Pfam-B_1539 (release 4.1) Family Methionine sulfoxide reduction is an important process, by which cells regulate biological processes and cope with oxidative stress. MsrA, a protein involved in the reduction of methionine sulfoxides in proteins, has been known for four decades and has been extensively characterised with respect to structure and function. However, recent studies revealed that MsrA is only specific for methionine-S-sulfoxides. Because oxidised methionines occur in a mixture of R and S isomers in vivo, it was unclear how stereo-specific MsrA could be responsible for the reduction of all protein methionine sulfoxides. It appears that a second methionine sulfoxide reductase, SelR , evolved that is specific for methionine-R-sulfoxides, the activity that is different but complementary to that of MsrA. Thus, these proteins, working together, could reduce both stereoisomers of methionine sulfoxide. This domain is found both in SelR proteins and fused with the peptide methionine sulfoxide reductase enzymatic domain Pfam:PF01625. The domain has two conserved cysteine and histidines. The domain binds both selenium and zinc [2]. The final cysteine is found to be replaced by the rare amino acid selenocysteine in some members of the family [1]. This family has methionine-R-sulfoxide reductase activity [2]. 21.90 21.90 21.90 22.00 21.70 21.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.52 0.71 -4.43 18 5387 2012-10-01 21:11:27 2003-04-07 12:59:11 13 18 4111 31 1382 3866 2926 121.70 48 59.33 CHANGED c-Eh+psLos.QacVhpppuTEpPaTsEYscpaEcGIYsslssGpPLFpSpsKF-SGCGWPuFhcPlspcu.IphptDpShsMpRsEVpstssDuHLGHVFs.DGPpp....ssGhRYClNSAuL+Flst .......................................t.tch+ppLo.s...pYtVspcpGTEtP....F.....o.uc....Y...h........c.....p.....t.c...p.........GlYss.h..ssGpPLF.....pS.p.....sKF...-...SGC.........GWPS...Fhc.P.............l.......ss....p.......s........l.......p..htp.......D.......t.S........a............G..............Mt.RsEV+stpssuHLGHVFs.DG..P......ps........s.GlRYClNSsuLcFhs.t........................ 0 441 862 1147 +4400 PF01403 Sema Sema domain Bateman A anon Bateman A Family The Sema domain occurs in semaphorins, which are a large family of secreted and transmembrane proteins, some of which function as repellent signals during axon guidance. Sema domains also occur in Swiss:P08581 the hepatocyte growth factor receptor and Swiss:P51805 19.10 19.10 19.10 19.20 19.00 19.00 hmmbuild -o /dev/null HMM SEED 433 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.70 0.70 -5.96 26 2637 2009-09-11 14:23:39 2003-04-07 12:59:11 14 95 167 27 1239 2166 0 367.60 25 40.30 CHANGED aphhhhc.tpspLaVGA+stlasLslpslp....ph...pplsWsus...ppppcpChh+GKs......ts-CtNal+lLtshsps.+LhsCGTsAapPhCphh..plss.....................................aph.stpp.sGpucCPasPppspsulhs.s.....sp......LYuuoshD......Fhup-ssIhR.hu........ptssl+Ttht...sphhLstPpFVsuasIs.s.t......pschlYFFFpEsus-tt.ts....tcslaoRluRlC+sDhGGpchlp.spWooFLKARLsCShPu..t.s..haFspLQssahl.sss..........pssllaGVFoTspsshtu....SAVCsaslpsIppsF.pGsatpp...csspppWhshps.chP.PRPGpCsssst.........thPDpslsFh+sHsLMtpsl.slpptPlhscpssphphTpIsVD..plpstssp.asVhalGTspGpll.hhlslspss..s...hhlEEhplhpss....tPlpshplsppp .......................................................................................ptplalGu..h.stlatls.ts..ht..................................tht..ss........tph....p..p..Ch.....tspt..........................ppst.Nh.....h.+....l..l.........................h....s...p...........p...pL...............hs..CG...o.ss.ps.hCthh....phss..............................................h.p.h...........t.....p............p.spsts.s..h..sstts.t...s..ulhs.s.....................sp............................Las.u.s...shs.......................a.sps.........h...l..h........R.tht.......................p.t.s....h.c..t..........s.t...ph..l...p...........t.....P..p...Fl.....t....u.....h.........h...............shl..Y..Fh.a.p.ch..shphp............................tp..hh...hoR...lu.R...lC......p.....s..D.G...s..t.p.hl........phhs.....o......a.hcs.............cL.C.shss...............t.........has....L..pssh.h..h.s..tt...............................p.llaulF..o....s..s...s..s.h.s..........................SAl...Csash.p.....s..lppsF..p...u....htpp....p.ts.t........p..t.h...h...s..htt....t....hs......s...+......s.s....t.C..s.ts................................................................ph.s-phh..s...h.h.....c....p......p.s.......lh..........p....h........sl....t..t......p......Pl...hh.....p....p.....t...s..............................hTplsVs........t.s...t....s.......a....sVhFlG........T.....p..p..Gplh..hl..h............tt.............................hhtphthh....................t................................................................................................................................................ 0 199 302 687 +4401 PF01118 Semialdhyde_dh Semialdehyde dehydrogenase, NAD binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1079 (release 3.0) Domain This Pfam entry contains the following members: N-acetyl-glutamine semialdehyde dehydrogenase (AgrC) Aspartate-semialdehyde dehydrogenase 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.45 0.71 -3.81 188 10449 2012-10-10 17:06:42 2003-04-07 12:59:11 19 35 4748 117 2601 11025 5845 122.70 26 35.48 CHANGED +VullGAoGhlGpcl.lclLpp..c...lchhhlhuspp..psGpphsthh.............htshhlps.......p.tth...ppsDllhhu..tssssupphssph..hptGh....hVID...ousaRhcs........................csshslPEl..N...tcplppt ...........................................plullGAoGhlGppl..l...p.h..Ltc......c......s.....l.....p.....h...h......h..h.....huosp.......ps..Gp..pl.s..h................................h..p..sh..s.lps.......hs...psh......pss.D..lsh..h....u....s..u....s...s...s....o..p......c....h.s...s..ph.............hp..s.Gs...........hVI...D...sou..s..a.R.hps.....................................................csshslPEl...N....ctlt..................................................................................................................... 0 809 1648 2203 +4402 PF02774 Semialdhyde_dhC Semialdehyde dehydrogenase, dimerisation domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1079 (release 3.0) Domain This Pfam entry contains the following members: N-acetyl-glutamine semialdehyde dehydrogenase (AgrC) Aspartate-semialdehyde dehydrogenase 20.90 20.90 20.90 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.50 0.71 -4.43 32 8792 2012-10-02 22:00:43 2003-04-07 12:59:11 13 14 4668 113 2044 6116 4228 174.30 26 50.65 CHANGED LpPLhct..stl.ptlhVsohQulSGAGtp...........................................tssshshslshNllPalss..cct..poccchphhs-spphlshss..........u.CsRVPlhcGHopslphcht.....pshslccshphhtst.........stshlhs.....p.phPpsttsts...ssss.VGRlRpDshtsp...lthhsluDNlh+ ................................................................LtPLhc..t....htl...p.p...l.sl.s.oh...pul..SGAGt..p.......................................................t..s..s.h.......h.......s.h........luhNll......Pah..ss.......h....cps..........ps.cc..E..h....+..h.h..s...-...s...p.....c...l...l.....s.ss........h................s.slplP....shcGHspslp.hc.ht......................psh.slpch...hph.h..tps.............................................tshlhsps...............ttthPpsh..tssG........ssts.lG.p.lR....p..D....t..s....sp......hlthhsluDNLhK............................................ 0 662 1325 1741 +4403 PF03925 SeqA SeqA protein Bateman A anon COG3057 Family The binding of SeqA protein to hemimethylated GATC sequences is important in the negative modulation of chromosomal initiation at oriC, and in the formation of SeqA foci necessary for Escherichia coli chromosome segregation [3]. SeqA tetramers are able to aggregate or multimerise in a reversible, concentration-dependent manner [3]. Apart from its function in the control of DNA replication, SeqA may also be a specific transcription factor [4]. 22.70 22.70 23.00 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.24 0.71 -4.59 37 801 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 780 13 101 335 9 177.20 64 98.90 CHANGED MKpIEVDEELY+YIASpTpcIGESASDILRRLLslssp......................pshshspPs.t..............................................................................s.spsttctVp.................slcpLlps-chsppctAVsRFlhlLssLYptsspsFspsh...plpGRsRlYFApscpsLLtuGss........oKPKpIPsoPFWVlTNsNTuRK+thLpplhtphthsspll-clpshl .....................................................................................................................................................MKTIEVDDELYpYIASHTcHIGESASDILRRMLKFoAss..........................t.....usPsht........................................................................ps.t...h...sc.sps...spsl+D+VR........................AMRELLLSDEYAEQK+A...V.N..RFMLlLSTLYoLDspAFAEAT..ESLHGRTRVYFAuDEQTLLpNGNp........TKPKHVPGTPYWVITNTNTGRKpSMlEHIMQSMQFPAELIEKVCGTI.............................................................. 1 13 36 70 +4404 PF04360 Serglycin Serglycin Finn RD anon DOMO:DM07201; Family Serglycin is the most prevalent proteoglycan produced in haemopoietic cells. Serglycin is a proteinase resistant secretory granule proteoglycan [1]. 30.00 30.00 32.00 30.80 28.90 27.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.03 0.71 -4.48 3 52 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 31 0 21 58 0 122.00 45 76.59 CHANGED QpLLpCoRLVLALAFILVauSSVQGYPsRRARYQWVRCsPDSNSANCIEEKGPtFDLLsGESN+IPPPRTDl.PlhpppsLN-lFPLSEDY..............................SGSG.GSGSGSGSGSGSGFLsEMEQEYQPVDENDAFYaNaRShDRNLPSpNQDLGQDGl ...................ss+lsLsLAllhhLtsus.pGhPsp..+ARYpWV+CsPDosSANCl-EKGPhF-L.PGEuN+I.s...hsD.hshpp...p..shschFP..l.SE..-h......oG..sS..G..SGS.GuuSGSG.....hsphc.-h....h-pps..a.s..s...................................................... 0 1 2 5 +4405 PF00450 Peptidase_S10 serine_carbpept; Serine carboxypeptidase Finn RD anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.43 0.70 -5.31 67 3902 2012-10-03 11:45:05 2003-04-07 12:59:11 17 45 756 20 2456 3840 269 353.80 21 80.72 CHANGED PG.....ts...hsh...cpaoGYl.......sls.ttpsppLFYaFh....ESpp....sPppc.PllLWLNGGPGCSSls.Ghh.ElGPFpls......ss..sLhhNsYSWscsANllFLDpPsGsGFSYo..sssss.......hptsD....................ppsApDsatFLtpahpc.FPca....tsp..cFYIsGESYAGpYlPtluptIhptspt..............tlNL+GhhlGNuhs-s......hhphsshhs.....ahattullo.-ctacphpptCphs..............................stsp..tppChsthpphtt.................................slshYsIhpssh..............................................................shsshs.phspt..ahNptsVpcALph.....ss.hpWptCsp.tl........th..sch.pshhshhp...pllpss..l+lllauGDtDhhsshhuspthlcs....LshsstspapsWhh........................sspluGaspsY........ts..loFsoV+GAGHh..VPh.pPptuhthhppalsu .....................................................................................................................................sGal...........................tht.........s....t...p...hF.aa.......hh..........................pu.pp...........ss.t.pp....Plhl.....Wl.............s.G...............G..........P.............G.s.SSh.......h..u......hh..E.h.G...Ph.thp..............................t........t..h......h........Ns..h.o.Ws...p..........h..............u............sl......lal..-p..............Ps....G.......s..........Ga......S..h..s...psstt...................s.p...............................................pt..s.....u.ps...hhthl...p..t........ah...pt....a...s.pa.....................t..s.......s....hals..G........E....SY.u......G.....hh..hPh..luthlhptt.t.................................hs.Lp..........G...h.h.l.s.........s...sh...h..s.....................p.h....thh.........................hhh........h....s.....h....l.....s........p......p...........h....p.....t...h....t...t.......h.......................................................................................s........t....h.t.h...............................................................t.............ths.hshh...............................................................................................................................................................t......h...........a.....h..s......p......t..l...p...p...s.lt.h......................tat.ss......l................................t..t..................s....h...h..........h...h..................tl..l...pt.t.........hpl.hl.h..........s.....G.p...............hD.h.h.s..............s..h...h......u...........s....phh.hpt.....................h.t.h....h...............a...................................................................tt....hG...h............................tt......hshh.l..h...........tuuH........hs.........t......s..s..hh.............................................................................................................................................................................................................................................................................. 1 823 1473 2074 +4406 PF00079 Serpin serpin; Serpin (serine protease inhibitor) Eddy SR anon Overington and HMM_iterative_training Domain Structure is a multi-domain fold containing a bundle of helices and a beta sandwich. 21.70 21.70 21.70 21.70 21.30 21.60 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.21 0.70 -5.32 180 5658 2009-01-15 18:05:59 2003-04-07 12:59:11 15 39 760 254 2183 5686 67 297.10 23 87.17 CHANGED pusssFuhcLa+plspps.......ppNlhhSPhSl......psuLuhlhhGApG..pTtppl..tpsL...th....................................hstpp.......h.tt.........................hppl.hpplpp..............................tshplphANtla..h....p..cs.hplppsFhptscp...hYpup.hpslDFpsssp.utppINsWVpcpT...............pu+Ipcllsst.lss.s..TphlLlNAlYFKGpWpp..tFstppTppp.sFa..hsp.......spsh.pVsMM.ppp.....spaph......t.p....sspl..lcLPY........................ps.....sh.SMhllL..........P........su.lp..plc......ppLs.tt.....lpp.....hhpphp...hp......htl...tl..PKF..plpt....sh..cL.ppsL.pphG..lpchFs.spAD.hSsls...............................................................................s...t.LhlScllHKuhl-VsEcGoEAAAuT.........................................................Fpss+PFlFhIpcppo........................tslLFhG...+lhsP .........................................................................................t.....hshph.hp..h..tt............ttN..hhhSP.h.ul........shu......hh......h.G...u.......p.......s.......p.......T...tt..p..h....p...s.l..th............................................................t.tp...h................................................h.t.pl..hptltt..................................................................tt.t..l.p.h..sst..l..a..h.............p......pt.....h.......t...h........t.p....ah..p...h...pp.........ha..t.............sp....htth.s.Ft.....pt.....t....s........t...p....tI...N..p.a.l..pp..p.T..........................p.s.p.....I..t....p....l....l..t.t.........l...s......t............s.......hh..h...ll.Ns..l...aF...+.........u.p...Wp..p......F...p.....p.........t..T.p.tt......Fh.......hsp.........tp...t......h...tV.hM...tp...........t.t.h..h.........................h..tt...t..h.....pl....lp.lPY....................................ps..ph..ohhl..lL.........................P.............pt....ps....lp....pl..................p.p..l.s....tt...............lt.p.....h..h.p.t.hp...p........ht.........l................hl..P..+F....pl....p..t....ph....s.......l...pphL..p.p....h.G......hp...p....hFs.....t..pus..hsths.......................................................................................................................................................................t....ttlhl..s..p..h..h...pp...s.h......l...p.............l..s.Ep..G..spu.s...u.so.................................................................................................h....h.s+P..Fhh.h..lh....p...p.......t............................lFhGph................................................................................................. 0 518 734 1365 +4407 PF02403 Seryl_tRNA_N Seryl-tRNA synthetase N-terminal domain Mian N, Bateman A anon Pfam-B_518 (release 5.4) Domain This domain is found associated with the Pfam tRNA synthetase class II domain (Pfam:PF00587) and represents the N-terminal domain of seryl-tRNA synthetase. 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.44 0.72 -4.03 89 5178 2012-10-01 23:07:44 2003-04-07 12:59:11 17 14 4789 32 1407 3708 1996 107.80 30 25.05 CHANGED MLDl+hlRp....Ns-tlccpLppRt....ss.hsl-cllpLDcc+Rplhhcs-pLpscRNphS+pIuptcppt.....pcsstlhtcspplspclpsh-t.clpplcsclpphlhslPNlP .........................MLD.l+.hlRp........s.-tVtcpLt..pR................stt..h..sl.-....clhpL.....Dp.cR...Rpl.....hs..csEpLpuc.RNphS...Kp....I...upt...Ktpt...................--spsl...htc..h....pp....lupclc..shcs.clsplpsclpplhhslPNlP............................................................... 0 483 906 1191 +4410 PF01445 SH Viral small hydrophobic protein Bateman A anon Prodom_1504 (release 99.1) Family The SH (small hydrophobic) protein is a membrane protein of uncertain function [1]. 21.90 21.90 21.90 35.70 21.30 21.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.01 0.72 -4.31 12 328 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 18 0 0 315 0 56.10 83 99.85 CHANGED MPAIQPPLYLTFLLLILLYLIITLYVWll.TITYKTAVRHAALYQRShFRWSFDHSL MPAIQPPLYLTFLLLILLYLIITLYVWIlLTITYKTAVRHAALYQRSFFRWSFDHSL. 0 0 0 0 +4411 PF00017 SH2 SH2 domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.69 0.72 -4.17 58 9081 2012-10-01 22:44:06 2003-04-07 12:59:11 19 379 283 390 4864 8244 21 78.00 27 14.81 CHANGED WaaGploR.....p-AEchLhp......tpsGsFLlRcScs.p.Gs...aoLSVppps.........pVcHa+Ipppsss........halssptp..FsoLt-LlpaY ..................................Waa.Gt.l..o..R...............pp..A.EplLtp.................t...sG.s...FL.lR......c..S...ps....p......G.s..........a..s.LS..lptps........................................plpH....h.pI.p..pp...sst................................hh.h......s.....t......t.....tp.......F..s..o..l..pLlpaa............................................................... 1 1219 1597 2956 +4412 PF00018 SH3_1 SH3; SH3 domain Cerutti L, Sonnhammer ELL, Eddy SR, Finn RD anon Prosite Domain SH3 (Src homology 3) domains are often indicative of a protein involved in signal transduction related to cytoskeletal organisation. First described in the Src cytoplasmic tyrosine kinase Swiss:P12931. The structure is a partly opened beta barrel. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.20 0.72 -4.57 61 10749 2012-10-02 18:48:24 2003-04-07 12:59:11 23 695 444 373 5929 20245 89 47.10 29 7.39 CHANGED hALYDapupp..ssELshpcG-hlpllpcsss...sWWcuc.ttt...sppGhlPu .....................hAlYc..a..p.......u....pp.........ss.......E.......L..o.......hp...c..G..-hl.p..l........l......p...c.....s........ss..........sW.Wpu.ch...tt............sp....p....G.hhPs................................... 1 1639 2410 4041 +4413 PF04908 SH3BGR SH3-binding, glutamic acid-rich protein Mifsud W anon Pfam-B_6650 (release 7.6) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.23 0.72 -4.05 6 396 2012-10-03 14:45:55 2003-04-07 12:59:11 10 6 165 7 218 466 811 91.70 40 45.15 CHANGED MVl+VYlASuSGshtIKK+QQ-VlthL-up+IpF-plDIot..cE-pRcaMRcNs....s..p+PssGtPLPPQIFN-DpYCGDYDuFhpApEpNTlhsFLtLs ...................slcVah.uos.oG..s..htIKK+QQcVhthL-A.p+I....pFc...plDIut..sE-pR.caMRcps.........pc..st.suhsLP....PQIFNp...-p....YCGDY-sFh-.ApE.pstl.tFLtL.................................. 0 60 87 142 +4414 PF03579 SHP Small hydrophobic protein Bateman A anon Pfam-B_1121 (release 7.0) Family The small hydrophobic integral membrane protein, SH (previously designated 1A) is found to have a variety of glycosylated forms [1,2]. This protein is a component of the mature virion [1]. 21.20 21.20 21.20 107.20 19.50 21.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.11 0.72 -4.62 3 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 10 0 0 46 0 64.00 66 92.80 CHANGED MsNTSITIEFTSKFWPYFTLlHMILTIISFLIIISIMIAILNKLCEaNsFHNKTLElspuhpNs MsNTSITIEFTSKFWPYFTLIHMILTIISLLIIISIMIAILNKLCEaNsFHNKTLElspthpss. 1 0 0 0 +4415 PF01488 Shikimate_DH Shikimate / quinate 5-dehydrogenase Bashton M, Bateman A anon Pfam-B_336 (release 4.0) Family This family contains both shikimate and quinate dehydrogenases. Shikimate 5-dehydrogenase catalyses the conversion of shikimate to 5-dehydroshikimate. This reaction is part of the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. Quinate 5-dehydrogenase catalyses the conversion of quinate to 5-dehydroquinate. This reaction is part of the quinate pathway where quinic acid is exploited as a source of carbon in prokaryotes and microbial eukaryotes. Both the shikimate and quinate pathways share two common pathway metabolites 3-dehydroquinate and dehydroshikimate. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.66 0.71 -4.06 44 8457 2012-10-10 17:06:42 2003-04-07 12:59:11 15 52 4195 91 2050 10811 5424 121.30 22 33.35 CHANGED -LApphhs...tlp.sppsLllGAGchucLlsptLhsp.uscclhlsNRThc+AppLAcch.....ts....shsls-lp...phlspsDlll.ouTuupp.............llspphlcpsh...t.......h.lhlDIulPRslp.tlsthpsshlYslDDLctl ...............................................h.............tlp..sppsL.l.l..G.A...G...G...s.u...c...u.l.h.h...t..L...h...pt...G........s...p......p...l......h......l..s....N.......R......T.......h.......p.......+......A....p......p....L...A...pph.....................t..t......h....p......s....h....s....h....s.....c....lt...............th..h...t...p....h......D....ll...l...s.u...T....u..u.s.h.................................................h............h..............................................................................................h......................................................................................................................................................... 0 611 1298 1762 +4416 PF00464 SHMT Serine hydroxymethyltransferase Finn RD anon Prosite Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 399 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.06 0.70 -6.10 12 6460 2012-10-02 18:26:03 2003-04-07 12:59:11 14 19 4794 86 1789 5320 4336 351.50 52 88.05 CHANGED Ls-tDPElashlcpEhpRQpcplELIASENasSpuVh-AhGSsLoNKYuEGYPGpRYYGGsEalDhlEsLs.cRAhchFsh-....hVNVQPhSGS.ANhuVYpALlpPtDplhGLsLscGGHLTHGh.ss.......oupaFcuh.YtVs.cTGhIDYDpLccpApha+PKlIVAGsSAYuRllDat+hREIADclGAYLhsDMAHIuGLVAAGVhPSPF.aAcVVTTTTHKoLRGPRGGhIhh+............-h..-ltcKINpAVFPGhQGGPh.HsIAAhAVAhKpAhpPEFKsYpppVlpNA+AhucsLpcpGYcLVSGGTDNHLlLVDL+s+GlsGscuEtsLtpssITsNKNolPsD.cSshlsSGlRlGTPAhToRGFsEt-FpcVutaI ..................................................................................................tphDs-lhph.l.p.p....E.ht.R.Q..p.pp...lELIASENa....sS......uVhpAtGS......hLTNKY......A.......E.G.....Y.P.......G..+.......R..........YYGGCEaVDhlE..pLAI..-R.AKcLF.....GA.....-.................................aANVQ...........P..H.S..GS.........Q......AN.......hAVYh.AL..L.pP.....G...D....ol............hG.....Ms.Lsp.GGHLTH...G.u.l......sh......................S..G+.h.a....p.......hlsY.Gl..c......c...o...t.hIDY.D..p..l..c...c...h..A..h..c......a.+.....P...K.l.I..l.A..G...h..S.A..Y..s...R...h...l.D...a.t...+...h...R....-...........I....A...............D...c.....V...G..A.......hLhVDMAHlAGLV......A.....A...G.....l..a.....P......s.....P.....l......P.....a.......A....c..l..VT.TTT..HKTLR......G..P.R..G..Gh.IL.sp............................................................................p-htKc..l..N..pAlFP.G.h..Q.....GG.P.L...H.V.I..AAKA.V.A.ht..E.A.h..pP-F.K.pYt.ppVlcNA+shA.....c................s......h......h......p......c.......G...h.cl......V....S...G....G....T......DN.H.Lh..Ll...D...L.p.......s......p.......s............l...o.G+..p.A-.th...Ls.ps..sI...TsNK..N.slPh..D...sc.s...P....h..l..T.S.........GlRlGTPAlToRGFtE..t-hcplAphI........................................................................................................................................................... 0 595 1137 1501 +4417 PF04917 Shufflon_N Bacterial shufflon protein, N-terminal constant region Mifsud W anon Pfam-B_6667 (release 7.6) Family This family represents the high-similarity N-terminal 'constant region' shared by shufflon proteins. 27.10 27.10 27.50 27.10 26.10 27.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.35 0.70 -5.36 3 374 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 230 0 47 352 5 249.60 32 68.40 CHANGED MKKaD+GauuLEVGAuLLIVlllIuhuAchhpDYLpo+cWQssAcpsNsaToAVRSYVGKNYoTLLASSTTTTPAVITTsMLKNTGaLsSGFSETNS.GQpYQAhIVRNuQNsELLQAMVVSsGGpuhPhsALpQlAKDITsGLGGYIpDGKTAlGAhRSWSlsLSNYGssoGsGHIAVhLSTDDLSGAtEDoDRLYRFQVNGRPDLNKMHTAIDMGSNNLNNVGTVNAssushSGNVuGpNGTFSGulsGNou.....lTAGGDIRSNNGWLVTRNSKGWMNETHGGGaYMSDuSWLRSVNNKGIYTGGQVKGGTVRADGRLYTGEYLQLEKTAVAGASCSPNGLVGRDoTGAILSCQSG ......................................................................................................................................................................................................ttGh....h.p...h.h.....slhl..hhh.hh..hh..h.t...hts...ptht.p...A...pphs...pAsppYlt...cph.........s.sl......st.s.........................Ph.......h.......h......Th....t...Lhp..p..s.hL.suhptp..N...uQp..h.hh....l...h+..s....s...t.....s....st.....h...puhhh.o....p....G....Gp...s..h.............t....t.hh.h....ut.....hs..u.........GG....hl.......t........p.........s..........t..............u...........h..........G...shtuW...p........s.............ssa............u............t.s...........ss....G+lA.....hh.h....tt.............t..s-.......h....LYR..tVsu+P-.hN....tMps....slshssp...slpshtsh.s.st............t....h........................................................................................................................................................s...ts...s.....ssp......h.t.......s.....G..........p.l......pu......p......sph.stthl....pht.t..s..st.Cs..............sGhhu..hs..st.G...L................................................................................ 0 12 27 33 +4418 PF02973 Sialidase sialidase_N; Sialidase, N-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.21 0.71 -5.33 6 557 2012-10-02 19:29:29 2003-04-07 12:59:11 11 25 252 12 9 354 128 186.90 45 24.36 CHANGED l.psh.hE..ssshsIusGpshcLsuEhs...lphL-pGTlllcFKuspps.ulQSLFSlSNupssN..caFplYlsNo.plGhELRsscuhhNYshups..sslhshhtsssshNTlAhKA-.psKpY+LalNGclltshspssspF..IssIsGlsslpLGuTsRtG.sptYsFsGsIsplplYNcsLoD-tLppcTGtTs ..............................................................................................l.pshphp..shphsh.Ssstt.scLSu..Elp....pphp..pu..TV..hMcFKsDsp.s......uh......s...LFulSsup..ttp.....pYFohhl..h..ss..phslEhRsusupt..Yh..ps....hpl..h..st.phspssh...ss.....t.su-....ctphpLYVNG..........lp.....h...s......S.p.s...s....sF..Ipc..hs...sls.+..s.plG.......A.........s...c.R....s....s...p....s.....ha..s..u....shpIcplolaN+ALos-EVpphot................... 0 5 5 6 +4419 PF03482 SIC sic; sic protein Bateman A, Howe K anon Pfam-B_5 (Release 7.0) Family Serotype M1 group A Streptococcus strains cause epidemic waves of human infections. This family includes the sic protein an extracellular protein (streptococcal inhibitor of complement) that inhibits human complement [1]. 19.90 19.90 20.60 20.20 19.60 19.00 hmmbuild --amino -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.13 0.70 -4.78 8 450 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 7 0 4 450 0 193.50 81 91.33 CHANGED ETYTSRNFDWSGD....DDWPEDDWSGDGLSKYDRSGVGLSQYGWSKYGWSSDKEEWPED.WPEDDWS..........SDKKDETEDKTRPPYGEALGTGYEKRDDWGGPGTVATDPYTPPYGGALGTGYEKRDDWGGPGTVATDPYTPPYG.......................................................................................GALGTGYEKRD.............................DWRGPGHIPKPENEQSPNPSHIPEPPQIEWPQWN...GFDGLSSGPSDWGQSEDTPRFPSEPRVTEKPQHTPQKNPQESDFDRGFSAGLKAKNSGRGIDFEGFQYGGWSDEYKKGYMQAFGTPYTPSAT ................................................ETYTSRNFDWSG...............DDWP.....EDDWSuD........................YuW.....SSD..K........sEDDWS...........SDKKDETEDKTRPPYGEALGTGYEK.RDDWGGPGTV.ATDPYTPPYGGALGTGYEK.RDDWGGPGTVATDPYTPPYGGALGTGYEKRDDWRGPGHIPKPEN.E.QSPNPSHIPEPPQIEWPQWN......GFDGLS.GP.SDWGQSEDT.PRFPSEPRVsEK...P...QHTP...Q.....KNPQES....DFDRGFSAGLKAKNSGRGIDFEGFQYGGWSDEYKKGYMQAFGTPYTPSAT............................. 0 2 4 4 +4420 PF00158 Sigma54_activat sigma54; Sigma-54 interaction domain Sonnhammer ELL anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.98 0.71 -4.78 287 23376 2012-10-05 12:31:08 2003-04-07 12:59:11 21 270 3214 38 6053 30277 7477 165.30 44 32.07 CHANGED llGpSsshpp.lhchlpplA...sos...ssVLlpGEoGTGKEllA+uI.....Hp.......tS.sR...p..st...PFlslNCA..Al..P-s...LlESELFGac+GAFTGAppp+t.GhFEtAsGGTLFLDEIG-hPlshQsKLLRVLQEtplpRlG.u.s.c.s.lplDVRlIAATNcsLpptlp.pGp..FRcDLYYRLNVlslp ......................................................................................lGpo.ss.hpp.lhct.l.p..p....l.A.......t..os.......ss.V.L.I.p.GEoGT.GKE....l....l...A.+.u.l.........Hp..............................tS..s..R.......t.......s....t.......PF.....l...s.....l......N.......C.....A......A..l..........P.c..s..........L........l.....E.........S......E......L.............F..........G..........a......p..........+...........G.........A......F.....T........G......A........p........p.........p.........+.........t.........G...h....F.....E...t.......A......c.......G.....G.....T....L.F.L.DE...IG-h.....P..h.p...........hQ....sK..LLR.V..L....p........-..t...p.....h.......p...R...l.....G...u...s.....p...s.....l...p..l..D.V..R...l.I.u.A......T.s..........c..s...L..p...p.......hl.p..pG.p..FR..cDLaYRLsVhsl.................................................................................... 0 2150 3866 5098 +4421 PF00309 Sigma54_AID sigma54_AID; Sigma-54 factor, Activator interacting domain (AID) Finn RD anon Prosite Family The sigma-54 holoenzyme is an enhancer dependent form of the RNA polymerase. The AID is necessary for activator interaction [1]. In addition, the AID also inhibits transcription initiation in the sigma-54 holoenzyme prior to interaction with the activator [1]. 21.20 21.20 21.80 21.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.36 0.72 -4.49 153 2778 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 2568 0 623 2038 696 48.40 41 10.45 CHANGED .pLphc.sQpLshTPQLpQuI+LLQhoshELpphlppplpcNPl..LEhpc .............tLpl+.uQpLshTPQLpQuI+LLQLSslELppplpptl-pNPL..LE...................... 0 216 410 521 +4422 PF04963 Sigma54_CBD sigma54_CBD; Sigma-54 factor, core binding domain Finn RD, Wigneshweraraj SR, Buck M anon Prosite Domain This domain makes a direct interaction with the core RNA polymerase, to form an enhancer dependent holoenzyme [1]. The centre of this domain contains a very weak similarity to a helix-turn-helix motif which may represent the other DNA binding domain. 21.10 21.10 21.90 21.50 20.90 20.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.23 0.71 -4.87 22 2858 2012-10-04 14:01:12 2003-04-07 12:59:11 8 8 2612 2 650 2119 1007 194.30 32 42.31 CHANGED --Dt...hpshss.st.oLp-HLhpQlsls.hsss-RhIAhtLIDslD-sGYLp.hsLpElsppLs....sphscVcpVLphlQpF-PsGlhARsLpECLhLQL+phs.....h-.AhthlscpL-LLApRDassLt+hstlcE-DLt-hlp.IppLsP+PGspapssts-hllPDVhVRps.sGtWhVELNs-slPRlhlNppYh .................................................s......htt..h.s...tpoLp-.aLh.Ql.p..l.t....h..o.cp..-.+t..IAphll-slD..-sGYLp.....h..............s........l-.-..l....h..c....phs..................................ls.h..--..l-..t.......l.L.c.hl.Q.p.h.DPsGVuA+sLpECLllQ...Lpphs..........................hhp...A..h.t.l.l...p.c.a...l-.hL....u...p.......+............ca....pp......L.h+.hh........p........l....p.p.c.p.l.....ccsl.s.l.I.p.s.LsP.+.P......G.t..p.h....t....s......s......c........s...p.......Y.......l.......l.PDlhVp..............c......p....s.......s.........c.....a..tVp..L.Nsc.shP+lplsppY............................................................................ 0 226 431 549 +4423 PF04552 Sigma54_DBD sigma54_DBD; Sigma-54, DNA binding domain Finn RD anon Prosite Domain This DNA binding domain is based on peptide fragmentation data. This domain is proximal to DNA in the promoter/holoenzyme complex. Furthermore this region contains a putative helix-turn-helix motif. At the C-terminus, there is a highly conserved region known as the RpoN box and is the signature of the sigma-54 proteins [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.92 0.71 -4.72 29 2839 2012-10-04 14:01:12 2003-04-07 12:59:11 8 11 2603 3 645 2149 926 157.50 46 33.86 CHANGED psalpcplppApWLl+uLcpRtcTlLKVuppIVcpQcsFLpcG.ptL+PLsL+clA-slshHESTVSRlTssKYltTPRGlaELKaFFouul.uossGu-.tSspAl+thI+pLlssEs..pcPLSDspIsphLccpG.lplARRTVAKYREuLsIPuSspR+R ..........................p.pal+pplp-A+WLl+uL.cpRpcTLL+VupsIVcpQps.FF..............pp.G..t.c.t..hKPh...sL+Dl....Aptl........sh........H.E..S........TISRsos...pKY.lpTP.+G.lF.ELKaFF....o..sp..l..........so...........p......s............G........u...-.......s.............S........osu.....I+s.hl+cLIss...E.s......p..K.P.LSDs.+lspl.Lp.-.pG..I.lARRTVAKYRE.pLsIPsSspRKp............................... 0 223 427 544 +4424 PF04546 Sigma70_ner sigma70_ner; Sigma-70, non-essential region Finn RD anon manual Domain The domain is found in the primary vegetative sigma factor. The function of this domain is unclear and can be removed without loss of function. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.22 0.70 -4.71 124 3805 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 2580 2 391 2852 1705 206.60 36 49.01 CHANGED aP.....sslshlLppa-+lpstEh..RLsDllsGalDP.......s..spss..t.hssc.spsptsc.................p-------p-.........................-s-sG.DPE.Atp+Fstlccpapphpp.......sl..pcpG+s..spp...stcthptluclFtth+LsPKpaDtLlpplRshh-pVRtpERtIhclsVcpu+MPRcsFl+tFs.GNETshsWlcphls..sp..csauptLpchps-IpcsQpKLhplE ................................................................................................................aPtslshlLppY-+.hp..s-ph..RLoDll...o......GalDPs.................................tttshAss.s..st..l.ss.chsc.s-hs-.....................................-------Dpss............................sps-s-su.DPclAtp+Fuplp....sQachs+c................ul....pc.p....GRs.......ccp........stttht.......pLu-lFpph+LsPKQFDtLVsphRshh-RlRsQERhIM+LCV-pu+MP+.c.sF.lptFs..u..........N........E..........sst.sWh-thht..ts...+sautt...ltchpp-lhcs.p+LttlE............................................. 0 87 203 293 +4425 PF03979 Sigma70_r1_1 sigma70_r1_1; Sigma-70 factor, region 1.1 Finn RD anon Prosite Family Region 1.1 modulates DNA binding by region 2 and 4 when sigma is unbound by the core RNA polymerase [1,2]. Region 1.1 is also involved in promoter binding [1] 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.71 0.72 -4.05 31 3466 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 3283 1 714 2077 1357 81.80 31 15.80 CHANGED ..chspspl.Kp.LIppGKccGa.lTYcElN-tLP.t-h..lss.EQl--lhphl.s-hGIpVl-.....-s-.....p.......tEp.t...sp-csst-Esp ..................h..t..pppl..+p.Llpp.G..K.c.p.G.a..LTYsEl....N-.pLs....sph.......lDu..-.........Q.lE-llphl..sDhG..IpVh-.......ps..s.-s.-.............sh........hhs-p..............tsDpss.....s........................................... 0 240 448 579 +4426 PF00140 Sigma70_r1_2 sigma70_r1_2; Sigma-70 factor, region 1.2 Sonnhammer ELL, Finn RD anon Finn RD Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.97 0.72 -7.50 0.72 -4.27 183 8438 2009-01-15 18:05:59 2003-04-07 12:59:11 15 36 4678 31 1886 5884 3958 35.90 42 8.55 CHANGED sDslchYL+EIGch.sLLos-EElcLA+clcpGtptt.c .........tDsl+hYL+ElGpl.sL..LT.sE.-ElclA+RlcpG....................... 0 601 1232 1599 +4427 PF04539 Sigma70_r3 sigma70_r3; Sigma-70 region 3 Finn RD anon manual Family Region 3 forms a discrete compact three helical domain within the sigma-factor. Region is not normally involved in the recognition of promoter DNA, but as some specific bacterial promoters containing an extended -10 promoter element, residues within region 3 play an important role. Region 3 primarily is involved in binding the core RNA polymerase in the holoenzyme [1]. 25.30 25.30 25.30 25.30 25.20 25.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.21 0.72 -4.09 82 10957 2012-10-04 14:01:12 2003-04-07 12:59:11 11 41 4723 32 2549 7548 3445 76.80 28 20.25 CHANGED Epls+lp+sp+pLtQchGRpPoscElAcpLshs.ccVcchhphuppslSLchslupctDsphs-hlpDs.s.ssp-ts ......................................plN+lt+s.p...+p...L.h.......Q.........c...l........G..R..-...Po....s...-E...lA.....cch......s.....h.....s.-cV.....p..ch...L...c...h..u..p..c..s..h...S.l...-..s.....P.....l.......u......p...-....p...D...u.....p......l...s.......D......h...ltDpp...P.p..s.......................................... 0 876 1740 2204 +4428 PF03084 Sigma_1_2 Reoviral Sigma1/Sigma2 family Mifsud W anon Pfam-B_1759 (release 6.4) Family Reoviruses are double-stranded RNA viruses. They lack a membrane envelope and their capsid is organised in two concentric icosahedral layers: an inner core and an outer capsid layer. The sigma1 protein is found in the outer capsid, and the sigma2 protein is found in the core. There are four other kinds of protein (besides sigma2) in the core, termed lambda 1-3, mu2. Interactions between sigma2 and lambda 1 and lambda 3 are thought to initiate core formation, followed by mu2 and lambda2 [5]. Sigma1 is a trimeric protein, and is positioned at the 12 vertices of the icosahedral outer capsid layer. Its N-terminal fibrous tail, arranged as a triple coiled coil, anchors it in the virion, and a C-terminal globular head interacts with the cellular receptor [2]. These two parts form by separate trimerisation events. The N-terminal fibrous tail forms on the polysome, without the involvement of ATP or chaperones. The post- translational assembly of the C-terminal globular head involves the chaperone activity of Hsp90, which is associated with phosphorylation of Hsp90 during the process [2]. Sigma1 protein acts as a cell attachment protein, and determines viral virulence, pathways of spread, and tropism. Junctional adhesion molecule has been identified as a receptor for sigma1 [1]. In type 3 reoviruses, a small region, predicted to form a beta sheet, in the N-terminal tail was found to bind target cell surface sialic acid (i.e. sialic acid acts as a co-receptor) and promote apoptosis [4]. The sigma1 protein also binds to the lambda2 core protein [3]. 25.00 25.00 26.30 64.10 16.90 16.50 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.37 0.70 -5.84 4 120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 42 18 0 98 0 322.90 50 99.77 CHANGED MARAhasFhTshFGs.pslPhscpQlopLLpSSNSPWQct..shshslspuhloTsphPhsGShhYQcShLaSuhlPhlLhspDAW+-hpahchsWTsssLsGLVtAssP..AsP.YpPtuupaaDlppYPpWApchR.LpphYP.Lht.TLLNhhphGPlsYV-T.ssMlSGplsshhMohaG+sFtEIAhpLsQosuNhPhtsDusYDpthRhllSLahLSYlGVlpQssTIpGFaFpoKpRGsutEuWhL.Ys.TpupRlplspR+auahssRSPDWNhDhSalhuusLoAhlhSsRQ.PLluNpuVsNpupNhPGaoussGs.V+tlplhshAsEhIcphhhsGllossctpplptpusshpphhpscLssltspDDtL.ptpPphAR.RlKPFssssWssGpottulAuLAshh .....................................................s.F.Gs.psl.shNcp..SpLLpuuNSPWQhh....h..hu.Glso.os.sPhsGSphYQ.ShLhSuTl.hshtspctWtshphhtLsWos.sLsGLVsA.s.s.AsP.....hQstusph.Dh.sYPpaApc.RthpphY..LhtsTLLshhthGPlhYVcs.ssMhSGtlsphhMs.hGpsFh-hshpLhQus.NhPhp.DusYsp.hphlhulahhShhGhlppptThtsFaFt.tp.u.t.-.hhL.hs.ststth..sttp......ahhstSPcWphs.shl.uu.Lot.lhus.p.t..P.h.hsptsl.s.upsh.sh.o...s.t..s...l..th..p.h.hstthh.phh.sGlhstupttthpt.hstht.hhpttltthhhtss.h.....t..th..hpPa.stpas..G.oh.s.............................................................. 0 0 0 0 +4429 PF02454 Sigma_1s Sigma 1s protein Mian N, Bateman A anon Pfam-B_2133 (release 5.4) Family The reoviral gene S1 encodes for haemagglutinin (sigma 1 protein), an outer capsid protein and a major factor in determining virus-host cell interactions. Sigma 1s is one of two translation products of the S1 gene. 20.10 20.10 21.00 29.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.45 0.71 -4.32 5 20 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 8 0 0 20 1 114.30 61 84.05 CHANGED CQ+uLNpGSRRSR+RsKYTLIhSoGSl+DSMpQpNESSLLSKVGpsWLHQhVppNLQSPDWKALSEPSKQLSMDLIRVLPsWVsEW-sLRQDLQsYAhTpsISL...REWlLRN....sTLDH ...CQKGLNQGSRRSR+RLKYTLIlSSGSsRDSMMQTNESSLLSKVGhsWLHQSVMhNLQSPDWKALSEPSKQLSMDLIRVLPSWVlEWDNLRQDLQSYALoTsISL...REWILpN....VTLDH...................... 1 0 0 0 +4430 PF03842 Silic_transp Silicon transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 49.40 33.40 23.50 23.00 hmmbuild -o /dev/null HMM SEED 513 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.80 0.70 -6.42 4 167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 79 0 7 165 0 287.70 62 98.66 CHANGED hhoshssl+hhYShuLlIFSlIIVsALMFspsTKlApDspPhsALllMhhuIlWhSMlEGtQsShVGLPP.VD+sLYKESHPlTa+.suLuaKGDNLDRYLMGRQFMVlhlsFsINLCGuPL-su...-VLGLPpllppIFLsoGIAMILhsshIGQLTsQVNASHCMlDaINsaFhhFTLYssLlIEhoGVMHuSYLIQshhhhluGKPVpTNE.PRouhQshFFWGRVLhSLulLsFuLAVTlpALFsGpTTM...WphIPssVAllLFFlLMSlVGhLEGMQIAFFAVAKl.+pERGsp.Fu+KTCELLF+GpGcNLPGFMlGRQhTVshCFFllARVTTLDIEVGss-NIFGVSDGhQtFhNhGFhGAlITTILASIsWQLsASAFPlAFLsNPhsYIlLhluLhLEATGlCuGAWhLuhIpK+VstFphDEVYVGTPEE.RhutD+sDtphcts...tthhhGssh........................coasspppDhLc.........t.sup.cEttt..tu.s.cTcAL.chscpQ..-Ah-hhsups ......................................................................................................................................tst....-VLGhPshlp.lFLsh.GLuMIlFTChlGQLsoQVNAoHCMlDaINNYFALFTLYsAMslEFSGlMHuuYLIQ.lhutlSGKPI.SNE.P+sGhphhFFWuRVLMSlAILuFshAVslsALFsGpThh...asulsssluVhlFFhhMulVGhLEGMQIAFFAVAKLPtpERGouaFG+KTC-LLFcGNGpNLPGFMIGRQLTVVhSFFlVuulTuLsIpPGpGs.NIFGlSDGAQsFLNaGFpGAVITTILASIoWQLAASAFPlAFLNNPlTYlLLhlALhLEhTGlCuGAWV...................................................................................................................................................................................................... 0 6 6 6 +4431 PF04801 Sin_N Sin-like protein conserved region Waterfield DI, Finn RD anon Pfam-B_6302 (release 7.5) Family Family of higher eukaryotic proteins. SIN was identified as a protein that interacts specifically with SXL (sex lethal) in a yeast two-hybrid assay.\ The interaction is mediated by one of the SXL RNA binding domains [1]. 20.00 20.00 20.60 20.60 19.90 18.60 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.46 0.70 -5.62 15 414 2009-09-10 16:59:54 2003-04-07 12:59:11 8 6 306 0 263 406 1 288.50 21 70.12 CHANGED EED....DPVVpEIsVaLupoLs-..pLYlhQYPl+stttsaDssp....................h.ss+lKPpspclEh-hulDTpSpsYDt.KuE...h.hsG........psupcpsshpsthh-tpsFhSs+shsssscYAVGlhpsGElHLTPLpuIlQhRPShpahD.K..p-pcpKs....cpsup--s-..scp............p..c.hptloV+FuR...tsp+t+ptR.pohphhpph.u-EsWlchpaashp.sspsphc+ptLhupsssss........sshshSsp-YlshLhssstccphs........ssspcsLspppL+shPLh-.Ql+sLhpcu+..VhpFuplhpLl.................spss.....s-plLcsLppsAhLVpGsWVsKS-llasc...u.......tshh.A-shppARDalLapFops.ct.lpRppls...ssspLssc-s.....+-lLsphAp..sp....ss+sWclhhssDc.-F.........p..cas-lVp+..QchhWpupppcLcchh ......................................................................................................................................--....Dslltphslals.............s.tt........plalh..Q...YP..+.st..............s....h..pt......................................................ps+hKsppt.h..l..E.l-hsl.sp....s...t....as...hs..................................................................t.tpt..t.h.ppt.....hh............t.tts......s...s.tYh...h..uh.h......p......p....t...........p....l..HLsPl.p.ulhQhRPphpalD..t........ptp..tpt............ptt.tptttp..tpp.............................................t.hp.lp....h.p..ht.......p.......stpt.....t......t..t.t.....s...h..p..h.tp..E.Whphpaht.p..s...s......................hh............................................................h.......................................................................................................................................................................................................................................................................................................................................................................................................................................h.......................................................................................................... 0 94 148 217 +4432 PF04954 SIP Siderophore-interacting protein Bateman A anon COG2375 Family \N 26.30 26.30 26.60 27.10 24.30 25.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.58 0.71 -3.73 169 1985 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 1328 1 509 1471 82 120.80 30 41.36 CHANGED schhLLsGD-TALPAlushLEpL...P.sssputshlEVssss-.p.slss........ssslplpWlhRsss............tsshLhp.slpshsh....................................................................ss......ssasW.lAuEusss.+slRcaLhp-tGls+p.plthsuYW+pG ..............................................................................................................................h...hLhluD-oulPAlsphL...EsL..............P.ss.spsp.sl.l.c.V.ss.ssc....p.....Lsp...................hssh.plpWlh+ssp.............tslss.slpp.hph...............................................................................s......................ss........shasW.lsGEupsl.+slR+hlptEhulspp.plpssuYW+t................................................. 0 125 323 436 +4433 PF02146 SIR2 Sir2 family Mian N, Bateman A anon IPR003000 Family This region is characteristic of Silent information regulator 2 (Sir2) proteins, or sirtuins. These are protein deacetylases that depend on nicotine adenine dinucleotide (NAD). They are found in many subcellular locations, including the nucleus, cytoplasm and mitochondria. Eukaryotic forms play in important role in the regulation of transcriptional repression. Moreover, they are involved in microtubule organisation and DNA damage repair processes [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.21 0.71 -4.54 20 5869 2012-10-03 09:55:27 2003-04-07 12:59:11 12 37 3562 114 2282 5061 910 172.30 31 58.89 CHANGED GAGISspuGIPDFRS.csGLas+hspcs..LssPpshhchsphhps...FYsht+chh....pspPsssHphlstLEcps.plhplhTQNIDsLcccAGsp..pllEhHGShspspCssCch.hstpplhpphchtp.s..............................pCspCGu...................................llKPDlVaFGEsLP.cphppshc.clpcsDlllVlGTSLpVhP .......................................................................................................GAGlSs.p.....S..G..I.............P....sF.R......u...........t...s..G.......l.a........p........p........h........p...........t.................l.............s..s.c.......t.....h...h........p.......p...s...h...h.htp....................F..a.p.h.h.c........phh...............pspP......N.h..u...H.hhl..............A....p....L.............p.........c.........t.........t..............h........h.t....l..........lT.Q...........NlD......s.............L........H.......p.............c........A............G..sp.....................pll...ch.HGslhp..s.p....C.....h.....p....C.t......t...........h........s....h.....p.........h..........t...p....h....t...t..t....t...s......................................................................................pC...s...p...C..s.u.....................................................................................................h..l+P.c.l..V........h..F......G.....E...........ls........t....h...p...p......s......h.............p...t............h.........p..........p...u.............D.........lh.lllG.TShtVhP............................................................................................................. 0 811 1362 1902 +4434 PF04247 SirB Invasion gene expression up-regulator, SirB Mifsud W anon COG3094 Family SirB up-regulates Salmonella typhimurium invasion gene transcription. It is, however, not essential for the expression of these genes. Its function is unknown [1]. 29.70 29.70 29.70 29.90 26.90 29.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.48 0.71 -4.39 63 976 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 953 0 140 446 33 123.40 43 97.35 CHANGED h.YhslK+lHlshlslSlsLFllRhhhhhpsus.hhpp+h.....l+IsPHllDTlLLlSGlsLhhlh...phhPFss..sWLotKlhullsYIsLGhhAL..+ps+.....spth+hhA.FlsAlsshhhlstlAhoKt..sh..l ......a.hLhplHLlsluLSlsLhslRaahphppps.....ht.h.....t+a.....s+IlP......llDTlLLLSGIuLhhhs...phhPFostutWLTEKLhuVllYIlLGalAL..+pt+.....o...p..p..sRhhA.F.LA.LlsLhhIlKLAsTKhPlL..................... 0 28 71 108 +4435 PF01380 SIS SIS domain Bateman A anon Bateman A Family SIS (Sugar ISomerase) domains are found in many phosphosugar isomerases and phosphosugar binding proteins. SIS domains are also found in proteins that regulate the expression of genes involved in synthesis of phosphosugars. Presumably the SIS domains bind to the end-product of the pathway. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.28 0.71 -4.54 45 30074 2012-10-02 15:05:26 2003-04-07 12:59:11 17 45 4875 210 6102 21679 8959 126.50 18 45.69 CHANGED hltpsc.plhlhGtGsuht.suhththph.pplshhsshst.usp.....httt.hshlspsclllhlo..hsspstchhpsst.hsppps.sph..lsITspssusluppu...chhlhh.ss.ph.......hpshssphsshtslhht ..........................................................................h...ptp.plhhh.G.tG.s....o....hh...s....u.h....p....h....t..h....pl...p.......p...h.......u...h...h..s...s.....t....s..h...s..u....st....................h.tts......h....s.....h.....l...s......p....s.......s....l....l....l.s.lS.......tSG........p.....o........t........-........h.....lt.....s.....h.......p...h....s..+....p.......p........G....spl............lsl....o...s.......t...s...s..o....s.....l..s.....p.....u.........s..h...s.l...h..h....s.s..ch...........h.s.h..t..s..h.ss.phhhh.hh...h................................................................. 1 1857 3650 4961 +4436 PF05185 PRMT5 Skb1; PRMT5 arginine-N-methyltransferase Wood V, Mistry J anon Pfam-B_4050 (release 7.7) Family The human homologue of yeast Skb1 (Shk1 kinase-binding protein 1) is PRMT5, an arginine-N-methyltransferase [3][4]. These proteins appear to be key mitotic regulators. They play a role in Jak signalling in higher eukaryotes. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.61 0.70 -5.57 35 750 2012-10-10 17:06:42 2003-04-07 12:59:11 11 21 319 29 484 1612 79 345.80 25 63.15 CHANGED -.hssWphWsslRphC.sap.spLpVuLcls..psl.Pst....p.lpRWhuEPlcslllssshFlsN..pp.GaPsLsKtpQplltpahphpssthlls........................................................t.sscpshss...Ylp....Yl+aLhpp........s......shsttcpht..sYpDhLQsPLQPLpDNL-StTYElFE+DslKYp.Y-cAIppALhDhssppctss.........................hllhVlGAGRGPLVcpsLpAuppss..s........plclaAlEKNPsAhlhLp.p+.phcp...W...ts+.VpllpsDMRpapsst.......................KsDllVSELLGSFGDNELSPECLDGhQ+.....aLpps.GISIPpuYoSY...................lsPIsuspLasc...........lpthpp................................ssshEpPYVVphpshthlusp.............................ppsapFpHPstp...............................p..tcNsRapslpFps.pp...cuhlH..........................GFuGYF-usLYpD........lpLSIpPs..........................................s...+oss.MhSWFPlaFPLcpPlhlpcss.........plplphWRpss..spKVWYEWslss ......................................................................................................................................................................................t............................................................................................................................................................................................................................................................................................................................................................................................................................................hllhslGuGpG.Ls.h.uhpAu...........................thcl.Y.AV.E.t...s...s.....A...h.t.h.p.th...p...t.p.t.....h.............ssp...l.pl.....l...p...u...ch.cch....p...h.P.-...................................................................psDlllSEh.h..G..s..h.h....N..E..h...s.E.....s....l.....ut.....p.+...........aL.+ss...G....l....hPsp..hsha.....................luP.h.s.s..t..p....l..a.p.c....................hh.s.h.tt...................................................t.hcps.h...l.....s....p....hp.sh....h.....l.uts..................................................p..s.h.s.F.p..cspts....................................................................tp..p.p.h..pt....t..h.p.F..t....h...pp.........su.hlH................................GhusaF-...s....h...h...h.t.s............................lh..L.S.h..t.Pp.................................................................p........shhpWh...hhh.l....p..pP.l...lptsp............pl..p..h.....h................................s............................................................................................................................................ 0 182 271 391 +4437 PF01202 SKI Shikimate kinase Finn RD, Bateman A anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.08 0.71 -4.36 98 7157 2012-10-05 12:31:08 2003-04-07 12:59:11 17 51 4272 70 1785 5455 2766 155.30 27 64.96 CHANGED hGuGKoTlG+hLActLshpFlDs.Dp.h..I....EppsG....hslspIFp.pc.GEssFRchEpcs...l..pclhppp.....shVluTGGG..slhppc...spphLp.............ppGh.llaLcssh-tlhpRl..p.pcpp....R...PlL.psps.........p.h....h..chlh.pRpslYpc..h.....u......s.h..h.hss.spps..............pplsppllptlp ...............................GuGKoTlGp.t.L.....Ap..p.L........s.....h.p.........Fl....D..........s...D.p..h.........l.............p.pps.s.......tsl...u..p..l..h.s...h.p....G...E...s...s....F......R......p...h.E..p.....p....s........l........p.c....l....t..pp....................ssVl..u....s.......G..G...G......s..........s...hp.cp..........sR.s..hLc.......................................tp..sh....s..l.a...L..c..s.s..h...-..h..lh.pRl....p..tcps............R......Phh...psps...........................ph......h...chlt.....p.Rp..s.h.Ycc....h.....s..........t.h..h..l.ss...spts.............ptlspplht...t................................................................................................................................................. 1 506 1065 1476 +4438 PF02731 SKIP_SNW SKIP/SNW domain LOAD anon LOAD Family This domain is found in chromatin proteins. 25.00 25.00 30.50 30.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.14 0.71 -4.77 28 359 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 296 0 242 348 3 154.70 53 31.04 CHANGED spaI+YTsssp.ss.t........pp......RII+hsppp.....pDPL-P..P+F.K.+K+lspsssSPPsPlL+......SP......sRKLTtEDppcWcIPPslSNWKNsKGYTIsLDKRluADGRu..Lpcsp.....lN.-+FApLo-AL.hA-+pAREElctRschp+phs.+EppcKE-+LRpLAp+ARp-R .......................................s.palR.YTPupQs.sshsp.........tpp........RlI+hVEhQ.....pDPhEP..P+F.K...pKKlPR.G..PPS......P..Ps....PVhH..........SP..sRKl.Tsc-Qpc....WKIPPsISNWKNsKGYTIPLDKRLAADGRG..LQsVp................IN.-pFAcLuEALahADR+AREtVchRAphp++hApKEKppKE-cLRplAQ+ARpcR................................... 0 80 132 199 +4439 PF03217 SLAP Bacterial surface layer protein Mifsud W anon Pfam-B_2530 (release 6.5) Family \N 27.00 15.20 27.10 15.20 26.80 15.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -11.08 0.71 -3.93 27 528 2009-09-14 12:59:11 2003-04-07 12:59:11 9 28 47 0 108 469 0 105.10 28 27.52 CHANGED oK......slMHsAYsYs.psG.c...+l...ss...h.hpshs....sls...h.hs........s..sts..I.sGKpYY......+Vup.s.....cYItAuNlDGTpRh...LK+NAYVYss.supR....ssph.hh+KGoplsTYGushplhNGK.pYY+IGt..s..tpYVKsuNF ......................................................................lh+suhhYs.psG.p...ph......st.........h...hhthp....plp....h..h................s..hhp.....I...sup...pa..Y......plsps......pYl.....pus.N..l...s...u...s.p+p...................lp+.NAYlYppss.p+...............ttp....h....hlpK..Gpplp..s..a..Gs..p.h...h...t..Gc.pY.Yplst....s.....pal+s.s.............................................. 0 10 10 24 +4440 PF03843 Slp Outer membrane lipoprotein Slp family TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.60 20.60 21.00 34.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.70 0.71 -5.24 38 1178 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 813 0 129 486 27 159.70 46 83.93 CHANGED LuGCso.lPptl..tspsssh.shsplpssssshhGppVRaGGhIlslcN.ppspTplEllshPLss.su+P.phs..ppopGRFlAphsGFLDPssatpGRhlTllGslpGhcpsplGchsYpaPVlpspsh+lWp....htpph.hss.......a...s.ah..ashh.....hWt ..............................LuuCso..lP..psI.t.s.ssP.ss.QpshVtVhstPtLYl..........GQpARFGGKV..lsVpN.tpscTcLEIAslPLDS..uA+P.sls..ps..spGRlh.AchsGF.LDPVsa.RGphVTVlGsIsGs...s.GK.IGpsPY.pFhlhps...pGa+hWH......Lppp.V.hssp..........sa..h.Y...G..u.....h.........GaG....u....Y................................ 0 32 65 98 +4441 PF01464 SLT Transglycosylase SLT domain Bateman A anon Prodom_3175 (release 99.1) Family This family is distantly related to Pfam:PF00062. Members are found in phages, type II, type III and type IV secretion systems (reviewed in [4]). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.54 0.71 -4.61 39 14235 2012-10-03 00:09:25 2003-04-07 12:59:11 15 167 3793 20 3295 11652 3304 115.80 22 28.70 CHANGED shhttstpphslpstllhAlstpESsasPpAhS......tss.......................uhGLMQlhssTspthth...........shsphhcPppslpsGspaLp....phhpphst.......shhhuluAYNuGhuphtchhptttptstphhh .............................................................h......ttthtl.s...t.h...lh.ul.h.htE...S....s......a......s.........s...p...A....h......S................sss........................................................................................AhG..LMQ.....l..h...s...s.T..u..p.p..hs.......................................sh.s...ph.h.-P.t...p....s..l.ps....G.s...p.....Y...Lp..........th..h..p..p..hst.......................shhhs.luA.Y.N..u.G....u...p..h.phht...........h.................................................................... 1 900 1993 2657 +4442 PF02258 SLT_beta Shiga-like toxin beta subunit Bateman A, Mian N anon Pfam-B_3684 (release 5.2) Domain This family represents the B subunit of shiga-like toxin (SLT or verotoxin) produced by some strains of E.coli associated with hemorrhagic colitis and hemolytic uremic syndrome. SLT's are composed of one enzymatic A subunit and five cell binding B subunits. 21.10 21.10 21.20 21.50 21.00 20.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.44 0.72 -4.14 2 261 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 133 146 1 142 0 67.60 81 78.02 CHANGED As.DCspGKlEaoKYN-DDTFTVKVssKEhaTsRWNLQsLL.SAQlTGMTVTIKossCcsGuGFuEV.Fp ...................M.AADCAKGKIEFSKYNEDDTFT...VKVs..GKEY.WTSRWNLQPLLQSAQLTGMTVTIKSSTC-SGS.GFAEVQFN.... 0 0 0 0 +4443 PF04102 SlyX SlyX Bateman A anon COG2900 Family The SlyX protein has no known function. It is short less than 80 amino acids and is found close to the slyD gene. The SlyX protein has a conserved PPH(Y/W) motif at its C-terminus. The protein may be a coiled-coil structure. 30.00 30.00 30.00 30.20 29.60 28.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.43 0.72 -3.60 114 1397 2009-09-11 12:49:59 2003-04-07 12:59:11 7 3 1387 1 285 730 213 67.60 42 92.66 CHANGED plppRls-LEh+luFQEcsl-pLNpslscQppplccLpcplphLtp+lcshp.sst...ustsc-...sPPPHY ............hEtRlsELEs+lAFQE.TIEELNtslstpphphs+LpcpL+hLscKlcs...t....p...sSsh......As.pu-E..........TPPPHY.............. 0 67 152 221 +4445 PF02481 DNA_processg_A SMF; DNA recombination-mediator protein A Mian N, Bateman A anon Pfam-B_2252 (release 5.4) Family The SMF family, of DNA processing chain A, dprA, are a group of bacterial proteins. In H. pylori, dprA is required for natural chromosomal and plasmid transformation [1]. It has now been shown that DprA is found to bind cooperatively to single-stranded DNA (ssDNA) and to interact with RecA. In the process, DprA-RecA-ssDNA filaments are produced and these filaments catalyse the homology-dependent formation of joint molecules. While the E.coli SSB protein limits access of RecA to ssDNA, DprA alleviates this barrier. It is proposed that DprA is a new member of the recombination-mediator protein family, dedicated to natural bacterial transformation [2]. 29.60 29.60 29.70 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.27 0.70 -5.44 97 4687 2012-10-01 21:16:48 2003-04-07 12:59:11 10 17 4194 1 1010 3571 633 204.40 38 60.24 CHANGED t.hppts...h.phls.ht-spYPptLcp.l................c...sP.hlLah+G..........shshL....t...slAlVGoRpsoshGtphspplsppLup.t.....uh..slVSGlAhGIDstAHpuAL.....ptt........GtTlAVLusGl..-hh....YPppNppLhpcIhp..p.GhllSE.....assssp.PpptpFspRNRIIuGLopullVlEAsh+SGoLlTAchAhc.u....RcVaA.l....PGsltsspupGsppL....IppG.Atllps ......................................................................................h...t.th.phls.ht-ttYP..h.Lpp...l...............................h...........s.sP.hlLahcG.........................shphLp.............p.plAlVGoRp..s...........ot....hG.p..........phsp.....pls.pp...Lup..p.......uh..s.llS.GLAhG..IDssAHpuAL......p.s.t..................GtTl.A.V.l.usGl..-..t.h....YP..pp.pppLtppIh.....ptGh..llS....E........a..s..s.....s.s..........Ph...t...hp..FPpRN....RIIuGLS.p.u.ll...Vl....EA...........s.......h....+.S...G.SL..ITA.chAh.EpG.......R.-VaA.l......PGsl.ts.s...hSpGsppLIcpG..Apllp.s................................................................ 0 340 676 868 +4446 PF03467 Smg4_UPF3 Smg-4/UPF3 family Bateman A anon Bateman A Family This family contains proteins that are involved in nonsense mediated mRNA decay. A process that is triggered by premature stop codons in mRNA. The family includes Smg-4 [1] and UPF3. 21.30 21.30 22.10 21.60 19.50 20.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.37 0.71 -4.27 6 408 2012-10-02 20:46:34 2003-04-07 12:59:11 10 11 263 3 260 394 1 159.80 33 36.82 CHANGED ppsppctp.KVVlR+LPPsLTcc-hhpplps.Ls-c..................WshFchhsushSacsptYSpshlpFps.sD.lhEFpshFsGal....FlDsK......ssphhAlVEhAPa.QKlspssK..hKcDs+pGoI-pD...PcahcFhcplt..ctpsspsh..s-p.lc+.........tcstppht+h.sTPLl...-Yl+pK+sp+ ......................................p......h...KlVlRRLPPsLTcpph.p.tl..t...s.......ls.tc......................................................p.ha...p.h.....hs.......s.ph...sh.h..........s.p..h.a..........S..............R...AYl.s..Fp..s...-.D...lh.Fpc..pacGal...................F.lDs+...............................G..p..h....sh......VEaAPa..Q+...h....sp...pp.............p+cD..s+.....tGTI...-pD...s-YhpFLEs..h....t.........p...p.p.....hts.......ct.lpc..........................pt.....ps.t.....p..h.....thh..sTPLl......pal+.p+ph........................................................................................ 0 76 129 202 +4447 PF04927 SMP Seed maturation protein Kerrison ND anon Pfam-B_6221 (release 7.6) Family Plant seed maturation protein. 21.00 21.00 21.00 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.04 0.72 -4.01 37 316 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 61 0 192 289 0 59.60 34 56.79 CHANGED tlplG-sL..........pAsuttsucKP...VstpDAAulpuAEs+ssGt...............tshPGGlAA..shpuAAshNtpsst .................lphu-sh............p.sut....suc+P............lstpDAutlp.uAEs+spGt.................psts..GGl..AA..shpuAAshNtph..t........... 0 25 103 158 +4449 PF04355 SmpA_OmlA SmpA / OmlA family Kerrison ND anon COG2913 Family Lipoprotein Bacterial outer membrane lipoprotein, possibly involved in in maintaining the structural integrity of the cell envelope [1]. Lipid attachment site is a conserved N terminal cysteine residue. Sometimes found adjacent to the OmpA domain (Pfam:PF00691). 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.36 0.72 -4.36 146 2731 2012-10-01 23:09:26 2003-04-07 12:59:11 8 6 1710 10 507 1486 1060 70.60 29 49.17 CHANGED GshlspptlsplpsG.Mo+pQVphlLGoPhhs.ssFsss.pW..Ylhphpps.....t.tt..........p..ppplslhF-ssshlpths ..........Gphlstsslppl+.......hG.Mo+pQVthlLG.sP...hhp...s......h...p...sp.....sW..Y..lhpppsu.....p.t...........................p...ppsltltFsssGhlps..s........................................... 0 102 248 380 +4450 PF01668 SmpB SmpB protein Bateman A anon Pfam-B_1766 (release 4.1) Family \N 20.20 20.20 23.20 25.90 19.70 18.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.11 0.72 -4.17 166 4544 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 4483 12 970 2590 1894 67.80 47 43.63 CHANGED ph..l..ApN+KA+a-YtI.-paEAGlsLpGoEVKSlRs.G+sslp-uastl.csG....EhaLhssaIssY.ppush.h ............hl.ApNKKARH-YhI...-.paE..AGlsLpGoEVKSlRs...G+ssLpDuaspl..+s.G....Es..aLhssHIsPYppush.h........ 0 339 651 828 +4451 PF01713 Smr Smr domain Bateman A anon [1] Family This family includes the Smr (Small MutS Related) proteins, and the C-terminal region of the MutS2 protein. It has been suggested that this domain interacts with the MutS1 Swiss:P23909 protein in the case of Smr proteins and with the N-terminal MutS related region of MutS2 Swiss:P94545 [1]. This domain exhibits nicking endonuclease activity that might have a role in mismatch repair or genetic recombination. It shows no significant double strand cleavage or exonuclease activity [2]. The full-length Swiss:Q86UW6 also has the polynucleotide kinase activity. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.89 0.72 -3.79 123 5312 2009-11-23 09:46:11 2003-04-07 12:59:11 16 56 3968 8 1427 3901 1760 78.00 31 16.90 CHANGED lDLHGhphc..-Ahptlpphlppshppsh...................pslh.lIpG+G......................sG.h......L+ptltpaL....hphphl.tht.s....t.G.usGshhVhl+ ........................lDL+Gh.ph-..-Ah....p....tLt.p.a.lspuh.t.psh................................................................pplp.llHG+G........................su..h..........L+ptVt.paL.....tppppVhuap.A....ptG.GsGuhlVhl........................... 0 513 907 1211 +4452 PF00835 SNAP-25 SNAP-25 family Bateman A anon Pfam-B_1606 (release 2.1) Family SNAP-25 (synaptosome-associated protein 25 kDa) proteins are components of SNARE complexes. Members of this family contain a cluster of cysteine residues that can be palmitoylated for membrane attachment [2]. 21.20 21.20 21.50 21.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.32 0.72 -3.19 13 273 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 105 20 132 235 0 55.90 41 26.35 CHANGED PhNK....hps.c.ttstppsW+sNsD......GtVlssQP.tRVhD.tpssh.....sspuGYI..pRI.TNDA ..................PCN+....h+s.c...uacpsWusNpD.........GtVVus..Q....P...u.R.Vsc.pppph........usSGGaI..pRl.TNDA.......... 0 27 39 81 +4453 PF00565 SNase Staphylococcal nuclease homologue SMART anon Alignment kindly provided by SMART Domain Present in all three domains of cellular life. Four copies in the transcriptional coactivator p100: these, however, appear to lack the active site residues of Staphylococcal nuclease. Positions 14 (Asp-21), 34 (Arg-35), 39 (Asp-40), 42 (Glu-43) and 110 (Arg-87) [SNase numbering in parentheses] are thought to be involved in substrate-binding and catalysis. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.32 0.72 -3.79 18 4232 2009-01-15 18:05:59 2003-04-07 12:59:11 12 46 2100 210 1727 3872 2427 105.40 24 47.39 CHANGED +lRLsulDsPEosc..t..................psaGpcApcah+chlhtpclhlh.hsp.....D+YGRhLuhVahs.........spslNttLl+pGhAhsht.hYss..p.tppsphhpsEpcA+cc+hGlWup ........................................................................lRLhsl-..s..P...E...ht.............................................................ps.a....G..pc......A..p...p.h..h.c...ph...l..h....s.....c...p..l......p...h.t.h..sp..................D..c......a......G.....R.h...l.....uh.l.a.hs..........................................sps..l..sptl..VcpG..h.......Ahs........hp....h..............h......s..................................t......h......................p..........t.......h..h..pspppA+p.p.+hGlWp.................................................. 0 583 1042 1458 +4454 PF00209 SNF Sodium:neurotransmitter symporter family Finn RD anon Prosite Family \N 19.70 19.70 19.70 19.70 19.40 19.50 hmmbuild -o /dev/null HMM SEED 523 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.89 0.70 -6.03 15 11073 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 2324 84 3409 8360 1447 275.40 18 89.59 CHANGED R-sWssKh-FlLSslGauVGLGNVWRFPYLCYKNGGGAFLIPYhlhLlhsGIPLFFLEluLGQasppGulslWt+ICPlFcGlGYAshlIshalslYYsVIlAWAlaYLasSFTssLPWspC......spsWNTscCh-.h..psts.............t..sho......t.SPstEFWcRpVLplo..........sGIpchGsl+WpLsLCLlluWlllYFslWKGVKooGKVVYFTATFPYllLhlLLlRGlTLPGAhcGIpaYLpPcap+Lt-PpVWlDAATQIFFSLGlGhGsLlAhuSYNKacNNCYRDullluhlNusTSFlAGFVIFSlLGFMApcpGls...........................................Iu-VAcs.....................GPGLAFIAYPcAlThhPhSPhWulLFFhMLlhLGLDSpFsslEullTullDpaPhhh+...+RElhshhlslhsaLlGLhhl.TcGGhYlhpLFDpYuAo.hsLLhlshhEsluluWlYGss+Fhc-Ip-MlGaRPshaa+hCWpFloPslhlslhlhSllpa..pPLsYss.Ys..........YPsW.u.ulGWhhALSShlslPlahlh+lhpt ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hh................................................................................................................................................... 2 1079 1659 2764 +4455 PF00176 SNF2_N SNF2 family N-terminal domain Sonnhammer ELL anon Published_alignment Family This domain is found in proteins involved in a variety of processes including transcription regulation (e.g., SNF2, STH1, brahma, MOT1), DNA repair (e.g., ERCC6, RAD16, RAD5), DNA recombination (e.g., RAD54), and chromatin unwinding (e.g., ISWI) as well as a variety of other proteins with little functional information (e.g., lodestar, ETL1). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null --hand HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.00 0.70 -5.62 28 15349 2012-10-05 12:31:08 2003-04-07 12:59:11 18 509 4016 6 7427 19108 4238 288.60 21 25.86 CHANGED aQhcGlpWhhphh................................................................................................................psthuGILADEMGLGK..............TlQsIullt......................hLtcttphh..................tP...................................................................................................................................................................................................................................................................................................tLllsPt.ShlpsWhsEhp+as.s.sl..pslsatGsp................................................................p.hhphtphhtshsVllToY-hlh......................+phshlpp..hc..WphlllDEuHpl...K........NspSpltpslpp.lp..sppRllLTGTPlQN....sLtElauLlsFlhPshas.........shc..sFcphh.....tts....................ptptspcthppLppllps......................hlLRRhK..s-l..pcs.....LPsKpcpllhsphoshQpchYp............phhppschhhstss................................................thpphhshlhpL+KhsNHPhLh ..................................................................................................................................................................................................................................................................................................................................................aQ..uh.t.ah................................................................................................................................................................................................................................................................t......th.s.s.lL....A..D..-......M....G.........LGK.............................T....l..p....s..l..s..h.lt...................................h.h.p..t...t..t.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hLl.l..s......Pt.....o...............l....l....t....p....W....t....p...........E....h...p....c....a...h....................p.l.............ph.h....h...h....p....u....sp..........................................................................................................................................................................t..........t...............t...........h.......t.......p.........h......s.......l....l.....l...s...o....Y.phht...............................................................................................................................p....p....h.....p...h....ltp............hp..........a..p...h..l....l..l...D...E........u.Hp.l..........K.....................................N...t..p....o....p......h......h...p.....s..l.....p....p...lp..................spp...+.......l.......h.....L.................T..........G.....T...P..l....p...N.............................sl.t..E..L..a.....u...l..h....p....a.....l..........P..s..h..hs................................................shp.....pF...p.p..h.h.................................................................................................t.......t....t...h...p...p...L....p....p.....h...l...p...s.........................................................................................h.h..l..R..R..h.+..........ppl...........p..............................L..P.........+...............................p...................h......h..h....h..t...h....s..t..............p...p......t....h...Yp.................................................................th.h.t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2588 4337 6155 +4456 PF04855 SNF5 SNF5 / SMARCB1 / INI1 Kerrison ND anon Pfam-B_6054 (release 7.6) Family SNF5 is a component of the yeast SWI/SNF complex, which is an ATP-dependent nucleosome-remodelling complex that regulates the transcription of a subset of yeast genes. SNF5 is a key component of all SWI/SNF-class complexes characterised so far [1]. This family consists of the conserved region of SNF5, including a direct repeat motif. SNF5 is essential for the assembly promoter targeting and chromatin remodelling activity of the SWI-SNF complex [2]. SNF5 is also known as SMARCB1, for SWI/SNF-related, matrix-associated, actin-dependent regulator of chromatin, subfamily b, member 1, and also INI1 for integrase interactor 1. Loss-of function mutations in SNF5 are thought to contribute to oncogenesis in malignant rhabdoid tumours (MRTs) [3]. 21.80 21.80 22.00 21.90 20.30 20.80 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.66 0.70 -4.91 25 526 2009-01-15 18:05:59 2003-04-07 12:59:11 7 15 262 0 346 474 0 197.40 33 41.27 CHANGED ppApp..........tsLVPIRLDl-h.................................-th+LRDTFhWNlsEplloP-tFAphlscDLclPsss...hhppIussIppQlc-Yts.........................................s.ht.p.....................................................................................-hRllIcLsIslupppLhDpFEWDlss.sssoPE-FApphssDLGLuGEFssAIAHuI+EQl.hhpKplh......................Gashsupslt-s-hctth.hss..................hsshhp.spptscpasPhlppLocsElE+pEt-+-Rph....R .................................................................................................................t..sp..c.LVPIRLDh-h.......................................ss..+lRDsFh..WNhp........-.......ph.hos-.....FAph..hscDLc...L.......ss...............hhstIsptIcpQ.lppa.s...................................................................................................................................................................................................................................................................................................s-.Rll.l.pLslpls.ph.....hhDpFEW-hsp....tssPE......pFAhphst-LuLs.G.EFsssIAauI+tpl.h.h.p+...........................sh.h.st...tsh.s.p........................................................h...s.t.scpapPhl..LoptEhE+p.tcppRp.R................................................................................. 0 105 189 289 +4457 PF01174 SNO UPF0030; SNO glutamine amidotransferase family Belitsky B, Finn RD, Bateman A anon Prosite Family This family and its amidotransferase domain was first described in [1]. It is predicted that members of this family are involved in the pyridoxine biosynthetic pathway, based on the proximity and co-regulation of the corresponding genes and physical interaction between the members of Pfam:PF01174 and Pfam:PF01680 [2]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.28 0.71 -4.67 4 1876 2012-10-03 00:28:14 2003-04-07 12:59:11 14 6 1744 30 575 2672 2181 181.00 43 94.15 CHANGED VLALQGAhhEHhctlc+ChsEs..........hsVKcsEpLspsDALIIPGGESTuMuhlhc+hGhh.sLhEFl+NspKshaGTCAGLIhLSpplusp...lhpLsLLcVsV+RNAFGRQspSFppch-Fpshh...psFsusFIRAPVI-clLss-sVplLh-hs....G..clVsAt.Qs.phLusSFHPELuEsshRhacaFlcphVp .................................................VLALQGuhtEH..hp....h....l..c....p.h..G..s...cs...........................htV..+..p.hc.....c..L......p....p........l...D....GLIlPG..G.........E......S....T..T........h.s....+....L............h....c..p.......h....s....h.hpsl.....+.ph.lp.....s....................G....h..P...laGTCAGh.....I.l.L.......A.c....c.....l..t.st...............pp.....h.....L.....u.......t.......h..........D.........l.....s.........V.........c...R.............N..........A............F.........G............R.............Q.......l............-............S........F..........c......s..c........l..c........h....p..G..lu.........................t.s..h.......u....V.......F...I...R......A..P....h....lp.p........V.........G...................p....s......V...c...l..L...u.p.hs.........................s.....c.I.V...A..s+....Qs...shL.usSFHPE..L.....T..s....D...hR.lHpa.Flphh..t........................................................................................................................... 2 210 383 500 +4458 PF00080 Sod_Cu sodcu; Copper/zinc superoxide dismutase (SODC) Eddy SR anon Overington and HMM_iterative_training Domain superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the copper/zinc-binding family is one. Defects in the human SOD1 gene cause familial amyotrophic lateral sclerosis (Lou Gehrig's disease). Structure is an eight-stranded beta sandwich, similar to the immunoglobulin fold. 21.00 21.00 21.00 21.30 20.80 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.28 0.71 -4.04 127 3852 2009-09-11 05:27:43 2003-04-07 12:59:11 15 29 2380 488 1163 3237 322 143.20 32 72.64 CHANGED Asshlpst...................ssp....ltGslphppss..ss......lplpsplpG...L.sP.........................G..hHGhHlHchGcs..sss..........stSAGuHa....NPts..tp..H.Gts..sst.................tHsGDLsNl..hs.sssGsu..phs..hhss.hl....sL.s.....sll....G.+ulVlHsstDDh...................popssGsuGsRlAC.GlI ............................................h...................tsp.......shGslph...ppp...s..ts.........lpl.ssp...l...p.G....L..ss..........................G.....HGFHlHp......h.Gs.....s...sss..............s.t.SA.......G.sH.a......sPts.....tp........H.suP....stt...............................tHhGDLssl..........hs...s....scG.p.A......phs...lhss..pl.........tlps.p.....slh.......G..+ulllHsssDshs.........................st.s.h..Gsu.GsRhACGlI................................................................................ 0 373 655 952 +4459 PF00081 Sod_Fe_N sodfe; Iron/manganese superoxide dismutases, alpha-hairpin domain Eddy SR, Griffiths-Jones SR anon Overington and HMM_iterative_training Domain superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the Mn/Fe-binding family is one. In humans, there is a cytoplasmic Cu/Zn SOD, and a mitochondrial Mn/Fe SOD. N-terminal domain is a long alpha antiparallel hairpin. A small fragment of YTRE_LEPBI matches well - sequencing error? 21.10 21.10 21.10 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.85 0.72 -3.72 25 8157 2009-01-15 18:05:59 2003-04-07 12:59:11 17 14 5135 325 1451 5799 936 79.40 42 40.82 CHANGED sapLPsLPYsYsALEPHIScEThEhHHsKHHpsYVsN.LNshl-sh.-hspc...shEpllhp......shpsultNN.huGHhNHolaWpslu ...........................apLPtLP.Y..........s.h.s.A.L...c.P.a.lstcThchHHsK.HHp....T..........YVsNlNsul...c...st...p.....h...t.sp..........sl..E.c.l.lhp................................shpsu....l....h.NNuGG....HhNH.olFWcsLt.................................. 0 485 915 1223 +4460 PF02777 Sod_Fe_C sodfe_C; Iron/manganese superoxide dismutases, C-terminal domain Eddy SR, Griffiths-Jones SR anon Overington and HMM_iterative_training Domain superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the Mn/Fe-binding family is one. In humans, there is a cytoplasmic Cu/Zn SOD, and a mitochondrial Mn/Fe SOD. C-terminal domain is a mixed alpha/beta fold. 20.90 20.90 21.10 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.56 0.72 -4.20 27 8609 2009-01-15 18:05:59 2003-04-07 12:59:11 13 16 5213 325 1661 6184 1017 97.60 44 50.45 CHANGED G.E.Ps...GpLucAIscsFGSa-pFKppFspuAsuh.GSGWuWLVhc...sspLplhsssNtsss.hspG..hsPLLslDVWEHAYYlcYpNtRP-YlcsFW.slVNW-.sscca .................................................Pp..GcLtsAI.cpsF.G...S...a-...p.FKp...p.F.s.s..AA.s..s.p.F......G.S.GWuWLV.......h.......s........................................s...............G......+..L.tl...s.s.TsNpssP...l.......s........p..........s.............t........h........PlLsl.DVW.E..HAYYl...........p.Y........p.......N.h......R....sc........Ylpsaa.sl.l.NWp.stp............................................ 0 546 1040 1403 +4461 PF03002 Somatostatin Somatostatin/Cortistatin family Bateman A anon Pfam-B_1891 (release 6.4) Family Members of this family are hormones. Somatostatin inhibits the release of somatotropin. Cortistatin is a peptide that is related to the Somatostatins that is found to depresses neuronal electrical activity but, unlike somatostatin, induces low-frequency waves in the cerebral cortex and antagonises the effects of acetylcholine on hippocampal and cortical measures of excitability [1]. 20.30 20.30 20.40 22.50 18.00 19.50 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.11 0.72 -5.88 0.72 -4.26 5 165 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 79 0 56 144 0 17.70 81 16.29 CHANGED +-RKAGCKNFFWKoFTSC ....RERKAGCKNFFWKTFTSC 0 2 9 24 +4462 PF01680 SOR_SNZ UPF0019; SOR/SNZ family Belitsky B, Bateman A anon Pfam-B_2034 (release 4.1) Family Members of this family are enzymes involved in a new pathway of pyridoxine/pyridoxal 5-phosphate biosynthesis [1]. This family was formerly known as UPF0019. 20.40 20.40 20.40 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.37 0.70 -4.73 10 1980 2012-10-03 05:58:16 2003-04-07 12:59:11 12 7 1849 59 591 1309 540 205.00 67 69.72 CHANGED pGotplK+GhApMLKGGVIMDVVNsEQA+IAE-AGAsAVMALERVPADIRAsGGVARMSDPphIcEIhsAVSIPVMAKsRIGHFVEAQILEAlGVDYIDESEVLTPAD.paHI-KcKFsVPFVCGARNLGEALRRIsEGAAMIRTKGEAGTGsVVEAVRHMRtlsu-IRclpsh.oEDELassAKcltuPY.ELlppltctG+LPVVNFAA .............................pss.clK+GhApMLK..........GGVIMDVsss.EQ.................A........+IAE-AGAsAVMALERVPA.DIRAsG..GVuR.MSDPc.hI....cE....Ihp....AVSIPVMAKsRI.GHFVEAQlLEAlG..VDYIDESEVLTPAD-ta.HlcKppFpVPFVCGA+.sLGE...ALRRI.uEGAAMIRTKG...E.s.GTGslVEAVRHMRplsuEI++....l...p..sh..........s...........-....D....E....L.hshAK.-LtAPY.ELlpp.ltcp.G+LPVVNFAA........................................................... 1 227 402 516 +4463 PF02208 Sorb Sorbin homologous domain SMART anon Alignment kindly provided by SMART Family \N 21.10 21.10 21.80 23.60 20.10 20.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.52 0.72 -4.59 3 265 2009-09-11 11:43:11 2003-04-07 12:59:11 11 12 39 0 84 262 1 46.10 56 5.58 CHANGED lKAsp..u.sshDEsGIPL...pTVDRPKDWYKTMFKQIHMVHKPs..sD ..........lKu.sph.uhGssDEsGIPl...poV..-RPKDWYKTMFKQIHhlp+..s............. 0 6 14 35 +4464 PF04203 Sortase Sortase family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The founder member of this family is S.aureus sortase, a transpeptidase that attaches surface proteins by the threonine of an LPXTG motif to the cell wall [1]. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.44 0.71 -4.54 138 4873 2009-01-15 18:05:59 2003-04-07 12:59:11 8 19 1737 74 783 3560 293 135.30 24 51.42 CHANGED lpIPsls..lsh...s.lhp....s.....ss........tpsht.....Gsuahps.....ss...s.........Gt..ps.sslluGH....p.........sshFtpL.pcl.......c.tGDtlhlps................tsphh.pYc.......Vpsht.............l.....ss..ph......ph.....hs............pts...............pphlTLlTCs....sh.......................sspRhlVhuchs ................................................................................lpIPpls.......ls.h...P.lhp.......G...................ss.........ppsLtp............Gsuahcs.............sshs................Gt...ps....ss..llsG.H.........c.....hsssthF.s.s..L.p.ch.............................c.pGctlh.lps....................................tschh...sYc...............lpp.h.p........................................h.l.........pss....ch.......................ph.............lp.............................pts........................................cchlTLlTCs...sh..................sscRllVpuch................................... 0 283 543 695 +4465 PF04832 SOUL SOUL heme-binding protein Mifsud W anon Pfam-B_3872 (release 7.6) Family This family represents a group of putative heme-binding proteins [1]. Our family includes archaeal and bacterial homologues. 21.20 21.20 21.20 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.04 0.71 -4.74 71 715 2012-10-02 11:08:51 2003-04-07 12:59:11 7 16 287 15 420 683 1151 165.80 25 74.09 CHANGED shEpPsYpVlpp.sss.......aElRcYsstlhA.........pspsp.ss.hcpu.sspuFphLssYI....hGp.Nps.....ppcIsMTuPVhpps..........................ttts.......................paphpFhh.Pupas.hps...hPtPsDs.plplpch.PupphAslpFSGhss-ppltppttpLpphLpppu...hp.....stu........sshhAtY...ssP.hs.shhRR.NElhl.lp ..................................................s..-ps.apllpp..tsp..........aE.lRpYt..s.h.hhs.................................ps.p..h...p......t.....hptu..hppu............F.tp.LhpYI........tGp..Npt..........ttcl.s.M.TsPVhpph.........................ps........................tps.....................phslpFhl..Psp.....a....p.........ps..........sPtP.s..cs...p.........lp.lp.ch..ss.hph.h.l..h.pFu..................G..h...s...s...pp..s..htp..ptppLtptL.pps.s......ht...........ts..................hhhutYssP...h..h.Rp.NElhh...t...................................... 0 183 274 351 +4466 PF04267 SoxD Sarcosine oxidase, delta subunit family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Sarcosine oxidase is a hetero-tetrameric enzyme that contains both covalently bound FMN and non-covalently bound FAD and NAD(+). This enzyme catalyses the oxidative demethylation of sarcosine to yield glycine, H2O2, and 5,10-CH2-tetrahydrofolate (H4folate) in a reaction requiring H4folate and O2 [1,2]. 20.10 20.10 22.40 23.60 19.30 17.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.99 0.72 -4.16 89 550 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 367 8 172 490 1589 83.10 43 76.88 CHANGED lIsCPhCGs.R-cpEFsatG-Ac.lsRPss..sus.s-cpWt-YlahR-NPtGhatEhWhHstGCtpWhsssRDTlTaElhs..shhAtp ....lIpCPaCG..RsEpEFshuG-A+.lsRPts......sss.hoDc-Wt-YlFhRcNP+Gh+tEhWhHstGCpcWFsssRD.TVTcclhssapst.p.......... 0 36 89 125 +4467 PF04268 SoxG Sarcosine oxidase, gamma subunit family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Sarcosine oxidase is a hetero-tetrameric enzyme that contains both covalently bound FMN and non-covalently bound FAD and NAD(+). This enzyme catalyses the oxidative demethylation of sarcosine to yield glycine, H2O2, and 5,10-CH2-tetrahydrofolate (H4folate) in a reaction requiring H4folate and O2 [1,2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.88 0.71 -4.35 6 546 2012-10-01 23:12:28 2003-04-07 12:59:11 7 2 369 7 174 1588 3279 147.50 26 76.56 CHANGED RAhPup..AhsAslppuluhsLPstsssluoss...olhWluPDcahlls...Eutss.hAsLspulus.hu.ollDlStuRshIcloGspActlLsKusuhDLpscAFsVGtAssThhu+stshlh....RTGsDsFcllVhRSFA-phWchLp-uuuE ....................................................................t........thhtshp.p.s.hGhp.l.P...s.t..s..s..s...spss.....tslhWlGPD..E..WLlhs.......sp.s..t......s......h......tt...t....l....t....p...s..l....s....s......ht......u......l..l..-l......S........s...u..p...s...s..lcl...o....G.sps....c.p.................lL.s...+.u.s.......s....l...D.....L....p...s........p......s...F...s...s......G.....p....s.s...p....T....h....h.....s...+..s...s...l...h....lh................+...s...u...s....c...s...a.clhlh..R..SFA..cahhphLtcAut..................................................... 1 36 91 126 +4468 PF03172 Sp100 Sp100 domain Bateman A anon Pfam-B_3126 (release 6.5) Domain The function of this domain is unknown. It is about 105 amino acid residues in length and is predicted to be predominantly alpha helical. This domain is usually found at the amino terminus of protein that contain a SAND domain Pfam:PF01342. 20.60 20.60 20.60 22.00 20.50 20.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.37 0.72 -4.06 11 240 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 40 0 80 238 0 98.90 43 21.23 CHANGED phhhEslhp+.FKcpKl-IAsAIc+sFPFLEGLRD+shITc+MYcDs.-uCRNLVPVs+VlYslLocL...E+sFshohLpsLFScVNLccYPcLppIh+SFpssht..s ......t.h.-shh++.F+ppKlEIA.AIp+sFPFLcuLRD+shIo-chac-s.-uh+N.LVPVp+VlYslLopL...E+s...F...s...hshLcsLFSclNLccYPcLhpIh+SFppsh..t.................... 0 5 8 16 +4469 PF03014 SP2 Structural protein 2 Griffiths-Jones SR anon Pfam-B_1375 (release 6.4) Family This family represents structural protein 2 of the hepatitis E virus. The high basic amino acid content of this protein has lead to the suggestion of a role in viral genomic RNA encapsidation. 28.00 28.00 28.00 28.70 27.90 27.80 hmmbuild -o /dev/null HMM SEED 620 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -13.12 0.70 -6.64 2 3837 2012-10-04 01:49:40 2003-04-07 12:59:11 9 3 32 8 0 1287 0 127.50 61 98.02 CHANGED hhGsphsSQ..sLP.................AGuR.tQ..Rs.uutWpsQ.QRPpuA.......sGsAPLTssssAssTtsVPDVDptGAlLhRQYNLsTSPLs.ushuuTNhlLYAAPlsPLhPLQDGTsopIMuTEuSNYAQYRV.thTlRaRPlVPNAVGGauIShuaWPQTToTPTSlDMNSITSTDVRlllQPG.Au.LsIPpERLtY+NpGWRSVETsuVspE-ATSGhlMlClHGoPhNSYTNosYTGsLGhlDFAlcLphRNLoPGNTNsRVoRhpsTA.Hpl+tsssG.AplTTsAAsRFMtDl+a.hGTstsGElG+GIhhsLFNLADTlLGGLPopLlpuAuGQhhYuRPVssANGEPpVKLYhSVEsA.pDKsIhlPHDIDLGsSpVshQDYsNQH.pDRPoPuPAPpRshusLRusDVLhlo.....lTsAE.....hsQshaGuuo.......ThhhhNlhTGspAsApSlDWoKsTlDGh.lpTlpt.Sto..FhsLPhhGK.uhW..GsptAGY.YpYNoTtp-.I.hlpN..GppVsh.sYTs.LGt..oshShlh.ltPhpA.......sD.P..httHThsD.CspChsLGLpsCshQu...pssEhpRLh.+lu+Th.S .......................................................................................................................................................................................................................................................................................PVNSYTNTPYTGA......LGLLDFALELEFRNLTPGNTNTR.VSRY...oS.oARH+LRRGADGTAEL...T.TTAATRFMKDLHF...TGTNG.VGEVGRGIALTL..FNLADTLLGGLP.............................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +4470 PF02090 SPAM Salmonella surface presentation of antigen gene type M protein Mian N, Bateman A anon IPR002954 Family \N 25.00 25.00 25.80 25.60 20.90 20.10 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.87 0.71 -4.21 4 199 2009-09-11 15:23:32 2003-04-07 12:59:11 10 1 185 0 4 50 0 140.00 66 98.38 CHANGED MHSLTRIKVLQRRCTVFHSQCESILLRYQDEDRtLQAEEEAIlEQIAGLKLLLDTLRAENRQLSREEIYSLLRKQSIVRRQIKDLELQITQIQEKRsELEKKREEFQEKSKYWLRKEGNYQRWIlRQKRhYIQREIQQEEAESEEII .................MHSLoRIKVLQRRCTVFHSQCESILLRYQDEDRtLQAEEEAIlEQIAGLKLLLDTLRAENRQLSREEIYoLLRKQSIVRRQIKDLELQIlQIQEKRsELEKKREEFQcKSKYWLRKEGNYQRWIIRQKRpYIQREIQQEEAESEEII...... 0 2 2 3 +4471 PF02510 SPAN Surface presentation of antigens protein Mian N, Bateman A anon Pfam-B_1678 (release 5.4) Family Surface presentation of antigens protein (SPAN), also know as invasion protein invJ, is a Salmonella secretory pathway protein involved in presentation of determinants required for mammalian host cell invasion. 20.10 20.10 20.90 20.60 19.80 19.70 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.97 0.70 -5.39 2 189 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 170 0 7 96 0 271.70 61 90.70 CHANGED MGDVSuVSSStNhLLPQQDEVuGLSEALKKAlEKHKTEY.sDKKDREYGDoFVMHKETALPVLLsAhRpGAPAKSEpHsGpsSGLpHNsKG-FRIAEKLLKVTuEKSVsLlus.tKsDKoAALLSS+NhQlttVuuKKLSsDLKA...VSELADNshtloDDNlKA..sDpKsIsGEGlRKEGs.LAtDVAsSRhAAsNTuKuDDKDHKKIKEssQLPlQPTTIADLSQLSGGDEpMPLAApSK.hMThFPhADGVKt-DsSLTYRFQRWGNDYSVNIQARQsGEFSLlPSNTQVEHRLHDQWQNGNPQRWHLhRDDQQNPQQQQHtQpSGEEDDA ......................................................................................................................................................MGDVSAVSSSGNILLPQQDEVGGLSEALKKAVEKHKTEYSscKKDRDYGDAFVMHKETALPVLLAAWRHGA.AKSEHHNGNVSGLHHNGKGELRIAEKLLKVTAEKSVGLISAEAKVDKSAALLSsKNRPLEuVSGKKLSADLKAVESVSEVsDNATGISDDNIKALPGDNKAIAGEGVRKEGA.......PLARDVAPARMAAANTGKP-DKDHKKVKDVSQLPLQPTTIADLSQLTGGDEKMPLAAQS..KPMMTIFPTADG.VKGEDS....S.LTYRFQRW..GNDYSVNI....QA.....R...QA.GEFSLIPSNTQVE...HRLHD.QWQN.GN.PQRWHLTRDDQQNPQQQQ.HRQQSGEEDDA.......... 0 2 3 4 +4472 PF04573 SPC22 Signal peptidase subunit Waterfield DI, Finn RD anon Pfam-B_4675 (release 7.5) Family Translocation of polypeptide chains across the endoplasmic reticulum membrane is triggered by signal sequences. During translocation of the nascent chain through the membrane, the signal sequence of most secretory and membrane proteins is cleaved off. Cleavage occurs by the signal peptidase complex (SPC) which consists of four subunits in yeast and five in mammals. This family is common to yeast and mammals [1,2]. 25.00 25.00 30.20 25.20 24.00 24.40 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.14 0.71 -5.04 14 439 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 303 0 302 411 5 141.80 28 86.75 CHANGED MpohlsRuNul.huaoLolhAhlshssahSsh.FpchpssssIpshc..hhl+slpcFs.sspc+sDhuhlTFDLssDLspLFNWNsKQLFlYLoAEYcTtpNt.lNQVVlWDKIIhcs-puplsh+shpoK...YhFaDsGsGL+Gt+NloLsLpWNVhPpsGhLshspus.GphshsFPspYp ....................................................ph..Rhpth.hs.h.h....s..h.hhh...hh....hhh..h.s.h....h........p..s......s..s.p..l.p..........................h..ph.p..ah.....h.tp...t...p-.s...lpFs.ls...........sD.......L...p.....s.........l.....Fs.WNsKQlFlYlsAcY........to.....t........p......s.........t.....h.......N.....pl.....slWDpIl...pt......-.p.......s.h...lthp.s.......psK............Y.hh.D...s..s.tl........tt.pshslplpasl.P.sGhl.hs.p........................................................... 0 87 158 247 +4473 PF05122 SpdB Mobile element transfer protein Yeats C anon Yeats C Domain This proteins are involved in transferring a group of integrating conjugative DNA elements, such as pSAM2 from Streptomyces ambofaciens ([1]). Their precise role is not known. 25.00 25.00 53.50 53.30 19.80 17.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.77 0.72 -3.94 3 33 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 23 0 10 35 0 52.40 68 85.52 CHANGED MRIGPVQIGTHRDRHGQTKHAAVCTNDGCGWSADYTSQSAAQLAARTHRCKVS ....RIGPVQVGTahDpR.G+pKHsAACTAPRCGFSADYsSRuAAELAARTHRC.Vp.. 0 2 8 10 +4474 PF03771 SPDY DUF317; Domain of unknown function (DUF317) Yeats C anon Yeats C Domain This a sequence family found in a set of bacterial proteins with no known function. This domain is currently only found in streptomyces bacteria.\ Most proteins contain two copies of this domain. 25.00 25.00 26.20 25.10 24.00 24.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.19 0.72 -4.48 16 112 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 16 0 67 132 0 64.50 25 43.21 CHANGED hsSPDupstlpaps.........tsssssWpl.....hsssssststWpApFsspsPscLluuhssuL.ssssshpc .......................SPDthhhlta.s.........sstsssWpl.....tsssssssstWtAsFsspTPs-llAuhssuL.sssts................ 0 17 57 67 +4475 PF00435 Spectrin spectrin; Spectrin repeat Bateman A, Finn RD, Stabach P anon Pfam-B_1 (release 1.0) Domain Spectrin repeat-domains are found in several proteins involved in cytoskeletal structure. These include spectrin, alpha-actinin and dystrophin. The sequence repeat used in this family is taken from the structural repeat in reference [2]. The spectrin domain- repeat forms a three helix bundle. The second helix is interrupted by proline in some sequences. The repeats are defined by a characteristic tryptophan (W) residue at position 17 in helix A and a leucine (L) at 2 residues from the carboxyl end of helix C. Although the domain occurs in ultiple repeats along sequences, the domains are actually stable on their own - ie they act, biophysically, like domains rather than repeats that along function when aggregated. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.14 0.72 -3.75 83 21182 2009-09-14 14:25:53 2003-04-07 12:59:11 16 452 349 133 9769 19145 10 103.80 17 36.64 CHANGED pphppFtpcss-hppWlpcp.cthl..ssp-h.spclsslpsLhc...+Hcthcp-lss.ppsplpplp....phuppL.hspt...t.ss...pplpp+hpplsppWppLpphstpRcppLpp ............................................t..hppF...pphpp..h...t...sWlp........c.p.....c...t.............h...l.......p.s..p...s.....h.....u......p...s..h....p.....s....l..p.....t......hlc....................ca..c..s..h.p......p....-..l...p...........s...p..pspl.p.p.lp..............................ptup.pL....hppt................t.ps.....................ppl...p.pp...hpp........l......p.p...pW..p...p.L.pp.hht.p..RpppLt......................................................... 0 2238 2954 5943 +4476 PF01564 Spermine_synth Spermine/spermidine synthase Bateman A anon Pfam-B_798 (release 4.0) Family Spermine and spermidine are polyamines. This family includes spermidine synthase that catalyses the fifth (last) step in the biosynthesis of spermidine from arginine, and spermine synthase. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.43 0.70 -5.44 17 3831 2012-10-10 17:06:42 2003-04-07 12:59:11 12 30 2623 95 1322 3677 1590 224.20 28 64.46 CHANGED hWFpEh.s.....................hss.uhsh+VcclLact+ScaQclhlaco...........psaGclLl............LDGslQhoEcDEahYpEhlsHlshhs+s.....NP++VLlIGGGD........................GGsLREllKHssV.............EclshV-IDptVI-hu+calPphuss......hpcs+lpl.hlsDGhpalpphps.....paDlIIsDsoD.PhGPucs.....LFpctaachhtcsLptsGlhssQu..cs.alphchhpslhpsh+psh...sthhhsslPTYssshhsahlsocp........ps .............................................................................................................................................................................h....................................htht.p..lhptp.o..a.Qcl.l..h.cs.......................ta.Gphhh..........................LDG......h....l.....h..o......p.....t..DE....a....h.Y...pE......h....hs....H...ss..hh.s.cs..............p.s.+.+.VLl.lG...G.GD..............................................................G...u.s...l.R.E...l...h..+a.......s.l...............................................................................................c..p.ls...h..V..E...I....D.......t....t..Vlc.h..s....+....p....a...h..s.p.hsss...........................hcDs...Rhpl..hlsD..Gh.p..alpp.spp....................paD.....lI........I..s.....D......s......o.....D...P..........h....G.............s.ups............La..o.....p.t.FY.pt.s.tc.sLp.s.s.G.l..hls...Qs........tss.h..h..p.......p.....h..h.p.h...h.p.p.h.p..p........h.......F.............st........h.s.lPoa.s.t.hhhhhhtpt...........tshtt.................................................................... 0 463 818 1110 +4477 PF02819 Toxin_9 spidertoxin; Spider toxin Griffiths-Jones SR anon Homstrad Family This family of spider neurotoxins are thought to be calcium ion channel inhibitors. 21.30 21.30 21.50 21.70 21.00 21.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.11 0.72 -3.89 6 29 2012-10-01 22:06:18 2003-04-07 12:59:11 10 1 12 6 5 49 0 39.90 38 58.82 CHANGED Chu.sYc+CshGtpPCCcsR.sCcCslhhsNCcCK..+hlhEhhGhu ...Chs.hhcpCshsppsCCcsp.sCpCshhssNCcCp..+hh.............. 0 1 2 5 +4478 PF02513 Spin-Ssty Spin/Ssty Family Staub E, Mian N, Bateman A anon Staub E Repeat Spindlin (Spin) is a novel maternal transcript present in the unfertilised egg and early embryo [1]. The Y-linked spermiogenesis -specific transcript (Ssty) is also expressed during gametogenesis and forms part of this Pfam family. Members of this family contain three copies of this 50 residue repeat. The repeat is predicted to contain four beta strands. 25.00 25.00 25.70 28.00 22.10 18.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.28 0.72 -4.40 27 646 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 56 6 298 766 0 48.10 42 57.44 CHANGED VGppVpHhac-s..t.hspWcGhVLsQlPspsolaaIKY-sDsslYshpLh ..lG+pVpHsa--G.pt...shscW+GhVLsQVPspPolYaIKY-sDsslYsYpLh.......... 0 15 29 99 +4479 PF05215 Spiralin Spiralin Moxon SJ anon Pfam-B_6625 (release 7.7) Family This family consists of Spiralin proteins found in spiroplasma bacteria. Spiroplasmas are helically shaped pathogenic bacteria related to the mycoplasmas. The surface of spiroplasma bacteria is crowded with the membrane-anchored lipoprotein spiralin whose structure and function are unknown although its cellular function is thought to be a structural and mechanical one rather than a catalytic one [1]. 20.80 20.80 20.80 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.55 0.70 -5.25 4 22 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 13 0 0 21 2 211.00 67 98.10 CHANGED K+LLSILAVFGVSAVGTTSVlACNKTESNNLSRVKTIAAPtTVAAtssppVTKtEIKsuL-sNVLKAVQGVVKTApAsDF.aEVYpDNcGpAL-TlNLcAGpV-VYVQITPAKDKTVVIGKoGYIKVTLPKt...hKsDISsVTVsEQTVtIKsusPpsVTKsELpAVNp.AsLApAVLsAIKsKsssstAS-FtITNNGstGsYSAsKsVEVTVKApDsSspIoGpFKFNAKVTAThs KKLLSILAVFGVSAVGTTSVVACNK.TESNNLShVKTIAAPATVAstp........PKpVT+sEIKTALE....ANVLKAVQGVVKTATAA.DFQFDVYpssKGT.uLpTIsLEuGpV-VYVQITPAKDKTVVIGKoGYIKVTLPK....hKVDIpsVslspQhVtIKAucPKpVpKDELNAVNT.sTLApAVL-AIpphAPNAG..ASDFEITNNsstGsYpstK-VcVTVKAKs-SsNISGpFKhpAKVpAhh........... 0 0 0 0 +4480 PF03533 SPO11_like SPO11 homologue Griffiths-Jones SR anon PRINTS Family \N 21.10 21.10 21.20 22.50 21.00 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.17 0.72 -4.08 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 23 0 15 36 0 42.20 74 10.96 CHANGED AFAPMGPEASFFDVLDRHRASLLAuLRRGGGEPPuGGTRLASS ......AFAPMGPEASFF-VLDRHRtSLLAALRRGGtEPPuGGoRlASS.... 0 1 2 3 +4481 PF05032 Spo12 Spo12 family Wood V anon Pfam-B_51047 (release 7.6) Family This family of proteins includes Spo12 from S. cerevisiae Swiss:P17123. The Spo12 protein plays a regulatory role in two of the most fundamental processes of biology, mitosis and meiosis, and yet its biochemical function remains elusive [1]. Spo12 is a nuclear protein [2]. Spo12 is a component of the FEAR (Cdc fourteen early anaphase release) regulatory network, that promotes Cdc14 release from the nucleolus during early anaphase [3]. The FEAR network is comprised of the polo kinase Cdc5, the separase Esp1, the kinetochore-associated protein Slk19, and Spo12 [3]. 20.50 20.50 21.10 21.30 20.40 20.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.40 0.72 -7.57 0.72 -4.33 9 136 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 117 0 96 116 0 34.20 45 21.64 CHANGED ASPTDsLlSPCSpKLs-HKuKhFtt+spPspLths ..SPSDslhSPCopKLsshKsKpat.pt.ppsphhh............... 1 27 53 81 +4482 PF03907 Spo7 Spo7-like protein Wood V, Bateman A anon Wood V Family S. cerevisiae Spo7 Swiss:P18410 has an unknown function, but has a role in formation of a spherical nucleus and meiotic division [1]. 34.10 34.10 34.70 37.70 21.60 34.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.29 0.70 -4.72 9 132 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 128 0 99 124 0 205.40 40 52.65 CHANGED SPsuhIFRNLLILE-sLRcQshp.+hh+hQaThFLohLhulssahhYtLYasscps....pG.......lhRhhLphsllhhhlTllLFHlSGpY+RTIVlPRRFhs.TNKGlRtFNlKLVKl+ssa.cchhD.lRhh.h.ls...la.hphhlhhutps..u.hhpFhpsspl+spsR...........................lGusDVKLlLsP+sFos-IREGWEIYRsEFWs+EusRRRcppp ...........................................SssstIYhNLLILEsuLRtQhlpLRtRRRpaThFLhlLshhluahsYt.Lahpscpsu...pG..................................hlchh.phsLhu.ullTslLhasoGpacRsItaPRRalssoN+GLRshNsKl.Vhl+ssWhpch.hshlta...............hphhh.....t.ts...u...tahps.s..pp..p................................................................................Gus.lKLlL.s+sFSsphREsW-.YRsEaWt+.EstRRt...h.......................... 0 23 51 83 +4483 PF01052 SpoA SPOA_protein; Surface presentation of antigens (SPOA) Finn RD, Bateman A anon Pfam-B_408 (release 3.0) Family This family includes the C-terminal region of flagellar motor switch proteins FliN and FliM. It is associated with family FliM, Pfam:PF02154. 23.00 23.00 23.30 23.20 22.70 22.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.01 0.72 -4.38 130 6032 2009-01-15 18:05:59 2003-04-07 12:59:11 15 12 2189 8 1289 3714 814 75.30 26 29.28 CHANGED ltpplppsslplpshluctplslp-lhpLpsGcllsLsps.htctlplhl....ssphlhpGchs...thss....phulclpchhppp ............h..t.l.clslplssplGcsphslp-lLpLp.GsVltL-p...ss-...s..lc.l.h.l....suthlupGcls...slss....phul+lscllp..s....... 0 420 802 1028 +4484 PF05036 SPOR Sporulation related domain Bateman A anon COG3147 Domain This 70 residue domain is composed of two 35 residue repeats found in proteins involved in sporulation and cell division such as FtsN, DedD, and CwlM. This domain is involved in binding peptidoglycan [1]. Two tandem repeats fold into a pseudo-2-fold symmetric single-domain structure containing numerous contacts between the repeats [1]. FtsN is an essential cell division protein with a simple bitopic topology, a short N-terminal cytoplasmic segment fused to a large carboxy periplasmic domain through a single transmembrane domain. These repeats lay at the periplasmic C-terminus. FtsN localises to the septum ring complex. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.47 0.72 -3.82 136 7624 2009-01-15 18:05:59 2003-04-07 12:59:11 8 118 2474 2 1762 5585 1681 74.80 19 23.88 CHANGED ssssahlQlu.uhs...spssAcphhscLptp.sht...........sthpssssha+Vhl.GsasspppA..pphtp.pLp.......tthsshlhp .........................................t..ttahlQlu.uhp..........stspA....pp....h.tt..p....Lptp..uhs..................sphtsss..shaRlhl.G.....sass+ppA....pphhp.pLp......tthpshh..h.................................. 0 527 1100 1450 +4487 PF03845 Spore_permease Spore germination protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.94 0.70 -5.78 17 1773 2012-10-03 01:44:59 2003-04-07 12:59:11 8 5 343 0 394 2732 134 306.50 19 86.06 CHANGED plostQhhhlIhsh.lGsGlLshstshAcps.t..uWIullluullshlhhhlhhhlhpp..asppslhphhpchhG.KhLGtllshlahhYFlhhush.lRshuEllthahh.csPhahlshhhhhlslYhlhpGlcsluR....hhhhhhhhhhhhhhlhlhshphhchcNLhPlhspGlhslL+u..stsshhsasshElhlhlhsahpspKpst+hshhuhhlsslhYhloshlsIsshus-hsppthaPtlshhcslcls..hl-Rh-hhhlhlahlhhFhshslhhausshuhsplF+hpppp......hlhhlhslhhlhs.lhpsts.h ..............................................................................................................................ls.hphhhhlh...h....lG..s..u.....l.L...sh...sph...hs..pts...t..D....u.....W.l..u...l...l.l.u......sl.hs....h.l..h.h..h...h..h....h.h....l.h.pp............h.s..s..t......s......h....h...p..h.h.........p.p.h..h......G....+........h......l.G.......p......l......l.s.....h....l.......a.......h....h.......a....h...h....h....h.s...s.......h..h...........l.......p....s....h....s.....p.......l...l.p...h...a......l...........h.............s............p.................T...P..h..h..............h..............l..............h............h..............h..............h.h...l.........l.........s.h.........Y....h....s........t.t..G.......hc.........s...ls+............hhthh....h...s...h.h....h.l...h...h....h.h.....l.........h....h..........h............s...........h..........p.................h..........c............h.........p....p........l........h......P...............l............h.......p............p.........u.......h...h........s........l.....lpu........sth.s..h...h....s....a....h..uh.t....l.h.h.....h.l.h.P.a.l....p......p....p.....p....p..h..h.+.....s.h.h...h.u..h....h......h.s.s.l..h.h......hh.h.sl.hsl.s.hh.......u..p.th..tp..h..haP.s...l.ph.h..+h.l.p.ls.......a..l-.R.h.-h....lhl.....h...h....Wh....h....s..lhss..h.....s.hhh.a.sssh..s..hp....p.........l.........h.p.h...pppp........hl.h.h.h.h.s.l..l..hh.h.s.hh........h.............................................................................................................................. 0 202 327 345 +4488 PF00588 SpoU_methylase SpoU rRNA Methylase family Bateman A anon MRC-LMB Genome group Family This family of proteins probably use S-AdoMet. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.89 0.71 -4.31 139 17854 2012-10-01 22:53:19 2003-04-07 12:59:11 14 38 4871 41 4348 12081 7850 141.80 26 56.74 CHANGED shhllLsplpcPtNlGulhRostshGs.pslhl..hp.psh...hssp....sh+s.uhGu.h.hls.hhhh...sshpc.hlpp..lcpt.s.hhl..hus....sh....pup......shh...........ph....sht.......pthsllhGsEspGlspphhctsc.t..hlpIPh.tsph.pSLNlusAsul.hla ...........................................................................................s.hhllLppspcPtNlGslhRossshGs....p...s...lhl.............s.....p........t....sh...................s..tp.................s.h+s.....uhG....u......hp.h..l....s...l.hhh.................s..s............l..s..p.....h............l.p..p............hptt....t.....h...h.l..............hus....st........sup............shh...............ch.............shs......................................................tshsllh......GsE......sp.G..l...s...p..p....h........h....p..t.....s....-....t...............hl.p.I.........P........h......t............s............p.............s.....p...............S.LNlus.AsullhY............................................... 0 1420 2797 3687 +4489 PF03862 SpoVA spoVA; SpoVA protein Finn RD anon DOMO:DM07026; Family Members of this family are all transcribed from the spoVA operon. These proteins are poorly characterised, but are thought to be involved in dipicolinic acid transport into the developing forespore during sporulation [1]. 25.00 25.00 27.40 26.90 20.40 20.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.38 0.71 -4.00 73 1094 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 417 0 209 677 19 114.50 41 85.14 CHANGED hs.hlpAFlVGGlICsIGQllhshh.....h..chssshshlshlhlGulLTGlGlY-clucaAGAGusVPlTGFuNulsuuAlEappEGhlhGlusshFplAGslIsaGlhsualhuLIa .....shlhAFlVGGlICslGQllhchh...........+hssshshsohVhlGAlLsGh.....GlYDclspFAGAGusVPlTGFGNSlspuAhEttpc..shllGlus...shFcluuusIshullhualhuLI............................................ 0 103 167 179 +4490 PF04026 SpoVG SpoVG Kerrison ND, Finn RD anon COG2088 Family Stage V sporulation protein G. Essential for sporulation and specific to stage V sporulation in Bacillus megaterium and subtilis [2]. In B. subtilis, expression decreases after 30-60 minutes of cold shock [1]. 22.00 22.00 22.50 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.89 0.72 -3.95 34 963 2009-09-11 01:05:24 2003-04-07 12:59:11 7 3 793 10 180 528 40 83.90 54 80.87 CHANGED MplTDVRl+plssc...u+lKAhsSlThDssFVV+DlKVI-G.ppG.LFlAMPSRKst-....................................Gpa+DIAHPIssEhRpplpcuVlctYccth .............................................MplTDVRlR...+.lps-......G+MKAlsSITl..Dc....p.FVlHDl+V.I-G.psG.....LFVAMP...S+..+.Ts.D....................................GEFRDIAHPINS-hRpcIQcAVlptYcc..h....................... 0 100 147 165 +4491 PF04293 SpoVR SpoVR like protein Kerrison ND, Finn RD anon COG2719 Family Family member Swiss:P37875 is Bacillus subtilis stage V sporulation protein R, which is involved in spore cortex formation [1]. Little is known about cortex biosynthesis, except that it depends on several sigma E controlled genes, including spoVR [2]. 18.50 18.50 19.80 19.00 18.30 16.30 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.78 0.70 -6.11 62 1383 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1289 0 332 906 166 407.70 54 84.95 CHANGED us-WsFchLpphpccIpclA.ccaGLDsYPsQlElIouEQMhDAYASsGMPltYpHWSFGKpFhpscptY++GhhGLAYEIVINSsPCIAYLMEENohsMQsLVlAHAsYGHNsFFKsNaLF+pWTDApuIlDYLsFA+sYIscCE-+..aGh-tVEplLDusHALhsaGVDRYtRstplShpcEctRp.....p-Rctahp.pphN-LW..RTlP..........pcppttpp..............ppt.+aPtcPpENlLYFlEKpAPhLEsWQREllRIVRcluQYFYPQ+QTpVMNEGWAoFWHYsIhNcLacpGtlsDuhhlEFLpSHosVlhQP.hssPtaoGINPYsLGFshhpDIcRICppPT-ED+cWFP...-l.......AGs.shhcsLc.Ahcsa+DESFIpQaLSPclhR-h+LFsltD-sccs..hpVsuIHsEcGY+plRcpLuppYslush-PsIQVhclDhpGDR ............................................GsDWTF-LLchYhsEIc+lA.chYtLDsYPpQIE.lITuEQMMDAYuSlGMPlsYsHWSFGK+FlcTEphY++GQpGLAYEIV.......INSNPCIAYLMEENTlsMQAL.VhAHAsYGHNSFFKNNYLF.................+sWT......D...................A..s........u....I..lD..............YLlFA+pYIscCEER..YGl-EVEclLDSCHALMNaGVDRY........K....RP.p.K.l.S...h.pE.E.csRp..........................cpREcY..LQ....SQVN....LW....RTlP+.......................cc.ccp..ps.................t...ct+RaPs..E.PQENlLYFh..........E..........Kp..........AP..........L..........L..........E....s....WQREILRIVRKluQYFYPQ+QTQV..MNEGWATFWHYTILNcLY-.......cGclT-.cFMlEFL+SHTsVVhQPsasuPaaSGINPYALGFAMFpDI+RICpsP.........T-.......E.D+.hWFP...........-l............AGu..cWL-sL+aAMcsFKDESFIsQaLSP+lhR-h+hFslhDD-+csh.lcluAIHs-cGY+pIRppLusQYsLushEPNIQVasVDh+GDR.................................................. 0 93 198 268 +4492 PF04232 SpoVS Stage V sporulation protein S (SpoVS) Kerrison ND, Finn RD anon COG2359 Family In Bacillus subtilis this protein interferes with sporulation at an early stage and this inhibitory effect is overcome by SpoIIB and SpoVG. SpoVS seems to play a positive role in allowing progression beyond stage V of sporulation. Null mutations in the spoVS gene block sporulation at stage V, impairing the development of heat resistance and coat assembly [1]. 20.60 20.60 21.50 21.00 20.40 20.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.72 0.72 -4.41 26 629 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 425 4 199 331 10 85.30 63 87.47 CHANGED ME.lLKVSu+SsPsuVAGAlAullRcpupsElQAIGAGAlNQAVKAlAIARGalAPsGlDLlslPAFs-lpI-GE-RTAIKhlVpsR .........MElLKVSu+.SsPNSVAGALAGVlR.E+..G.sA..EIQAI...GA.......GAlNQAVKAlAIAR.G.FV...AP..oGl..DL.lClPAFs-IpI.DG...E....ERTAIKLIVpP........................ 1 106 166 186 +4493 PF03539 Spuma_A9PTase Spumavirus aspartic protease (A9) Griffiths-Jones SR anon PRINTS Family \N 20.90 20.90 21.00 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.03 0.71 -4.32 4 22 2012-10-02 15:32:34 2003-04-07 12:59:11 9 6 15 1 2 24 0 147.80 49 14.35 CHANGED IKGs+LKGaWDSGA-ITCVPthaL.-EcPltpphIpTIHGppcpDVYYlshKIpGRKlpsEVIuTsLDYlllsPuDlPWhhKtPLELTIKlDlccQQcpLLpposLSpcGKchLKcLF.KYsALWQpWENQVGHRRIcPHKIATGTLpP+PQKQY+INPKAKs .........................hKGsKLpuaWDSGApITClPpsaLppEpPltpp.lpTIHGppppsVYYlphKlpGRKlpsEVIuosh-YhllsPsDlPWhhppPLpLTlhlslp-.pcplLpposLscctKppLppLhpKYssLWQpWENQVGHR+IpPHpIATGThtP+PQKQY.INPKAKs.... 1 2 2 2 +4494 PF03779 SPW SPW repeat Yeats C anon Yeats C Repeat A short repeat found in a small family of membrane-bound proteins. This repeat contains a conserved SPW motif in the first of two transmembrane helices. 20.80 20.80 20.90 21.00 20.60 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.78 0.72 -4.59 16 414 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 200 0 160 360 9 51.30 29 53.28 CHANGED hpWshsllGlWhllSPWIl.Ga.ossuuhhhsslIsGllVslLu...lhhutsspc .............hhshllGl...alhhSPWll..Ga...uss.s.u..hhhss.llsGlllslLu.....hhhh.....t............. 0 38 108 138 +4495 PF00494 SQS_PSY Squalene/phytoene synthase Finn RD anon Prosite Domain \N 26.00 26.00 26.00 26.10 25.80 25.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.85 0.70 -5.10 152 3383 2009-01-15 18:05:59 2003-04-07 12:59:11 14 23 1893 58 1260 3142 2434 253.00 23 78.59 CHANGED hcptucoFhhushhLs.tphRpslhslYuasRhsD-lsDpss...............s.stttph..................Lptacptlpthh................................................t......ps....lh.......................psltpshpphp.........lshp..hhtpllcuh....thDlp....................................pp........hh.ohs-.........LppYshtsAus..VGhhhhplh...sh..............ssp.......t..hphA............................p.....p...........lGhAhQlsNllRDlscD................htp...GR.hYLPt-..hh.tp..........sls.p.....plht........................stt.hpthhp.........................phhstAcphhpputshltt.............................l.sttsphshhhshslh.ttlLc...p.lcpssht.h.....hp..tRsplsthc ...............................................................................................................................................h..phucoFhhuh.hhLs..tph.Rpslhs.lYshsRthD..-..lsDsss...............sst.tth..................h.Lptacppl.pp.hh............................................................................................ht.stcs..lh..........................................tsL.t.p.sh...p.pat....................lshp...htshl....suh.....thD..lpt........................sp.........ht.shs-L.tt..YChtsAus..VGhhhstlh.uh..t..............................................pst...............ts..hphA............................pp........lGhAhQl.sNllR.DlsED.....................................hpp......GR..lY...lPt-....h..tp.......h..sl...s....p.....clhttt............................stt..ht..phhp.........................thhppAc.phhppu.ht.hlst.........................................l...s..t..p..s..t..h..sh..h...hshhhh.hthLpt.lptssh..t.h........p...t+h.l....t............................................................................................................ 0 384 788 1058 +4496 PF00299 Squash squash; Squash family serine protease inhibitor Finn RD anon Prosite Domain \N 22.00 22.00 23.20 26.10 21.90 21.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.73 0.72 -4.13 12 37 2012-10-01 19:46:11 2003-04-07 12:59:11 13 1 17 32 0 60 0 28.70 64 90.24 CHANGED RhCPRILMcCK+DSDCLucClClcph.aCG ..RhCPRILMcCKpDSDCLupClChcp.G.aCG... 0 0 0 0 +4497 PF02117 7TM_GPCR_Sra Sra; 7TM_GCPR_Sra; Serpentine type 7TM GPCR chemoreceptor Sra Mian N, Bateman A anon IPR000344 Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Sra is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 18.90 18.90 19.40 19.10 18.60 18.20 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.96 0.70 -5.82 10 141 2012-10-03 04:04:29 2003-04-07 12:59:11 11 3 7 0 141 262 0 276.40 20 94.23 CHANGED MSs.sCAScsclpRhsSLNF+IuQalsLlsIllTFIhTYaAlKllhp+SIFplSTKILLhpNLFaANLHQlhYuIpslphLY+uFFhls-PCshLpoEt-CthYhcVLlsGsSGMlYuQTGLLIERsCATFl+sYcpKpShhsGlsISIlVLhsShuTu+IIIWDDPL-salLuCahaPpcSssRushFhsIsTlLolFNLllSllIh+YNK+LEY..STRFpVusRFpKRElI-STpTICFLshoQFlhhFlYShGlhlL+pI+..phIshcpaahhVVWsYTlPFIAlhFPlLLIYRIRpo+ssRsphIpplTspKQTQ-EHI+QhKshW .........................................................................................................................................s....h.h.h.pS..hhhs.h...h...hhh...h.h.o...h...h.hshh...u...l.....p.hl....hp...p...s...l....a..........p.....u....T+....hLLh..slh.ss...hHp.h..h...............h.....h.......................t......hh...................l....h..........+s.....h..h..h.....sc.sCp.lh..h.pp.....-Ch.........h.h.h.hh.h...s....h...h...h.h....h..ph......u....Lh.l-R..............hhup.h.h...p.h......t.ph.phh..u..h.h...............l......h....hhlh....h.oh.................h........s.h....hh.....h....h.h....s...s...sh...ss.h.h...s.C.hh.....st......s.h..t..p.........h....p....h..hhh..h..hh..h.l...s...lh..s.h.l..h..s.h..h....lh.hh....s.....p.+.h.ch...............p.pp.apl...t........tRapp..hEsl.oopsl...s..hlsh....hQhlhh...hla.shs.h....hhhh...h..p..p...h...s.......h....a....h......h..h.....a.h..a...............s...h..s.ahs...h...hhPhllh.h.p....h..p.http.R...t.......t....I....t.t.h.pt..t........ts....pp.a.h.tp.hpt.W................................................................ 0 43 57 141 +4498 PF02175 7TM_GPCR_Srb Srb; Serpentine type 7TM GPCR chemoreceptor Srb Mian N, Bateman A anon IPR002184 Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srb is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 21.70 21.70 21.90 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.64 0.70 -4.81 3 69 2012-10-03 04:04:29 2003-04-07 12:59:11 11 4 5 0 68 166 0 199.20 35 65.84 CHANGED FHPVYRlAQFYoFhVSsFAhPuLIYFMFcKLFKLsFHGNLKsLLIuYFIolLLFAlhlCFsFGYQFFVPFFl+SNCDLIINuTLFKYGHsoulFlMTlPMlLPluFTIERFsAMKMAcoYE+lRTLLGPVLVllLIIIDshhlYhIappEsFDcsFISFlllPuToA.LsFNoFLWalLYLNIsNFlhNllLLhlH+KLKpRhhh+NoSLSTKYplEEISQSS+FTLIVTFTHLLFF ...........................................................apslaRhu.haphhluhhuh..sLh...a...F...lhh...+l.h...................F..HsNL.K.......h..............l....h..hsYFhshhl...auh...........hhh.h...s.h............h...hphh....h....P.F...........h....s..p...s.p.CsL....l...I.s.......h...h....aKh..hphhhh.hhhTh...s..hhh..P.huhoIERalAhth.AcpYEps..s.hLGPl.L..s...h..h..h...............h.hh.shhl.....hhhlac.s.....E.pFs.ss.l.S.F....hh..h..P.ss......s..........A.........p..h......a...h.h..h..L...l..hl.p.hhNhl.h..N.hhL.lhh......p....p..+h.Kp...................................pps.o..Los+Yph.EE....lhpS.oK.Fs.l..h..lhF.hHllFF......................................................................................... 0 13 20 68 +4499 PF00530 SRCR Scavenger receptor cysteine-rich domain Bateman A anon Reference [1] Domain These domains are disulphide rich extracellular domains. These domains are found in several extracellular receptors and may be involved in protein-protein interactions. 26.10 26.10 26.10 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -11.18 0.72 -3.66 136 8985 2012-10-03 20:35:02 2003-04-07 12:59:11 13 369 133 13 6194 7713 21 96.30 35 47.20 CHANGED ssGss...spGRVEl.h.........s.G.........pWGsVCss.s.......Wshp..sApVlCRp..LG........h.ssshps.tsssh...........ts.ts.....lh..hss.....lp......CpG.sEssLtpC...........pshst..ps...C.sp.t..ps....suVhCp ..............................................................................sGsst...s..pG.RVEV.h.............s..G............pW.GT............V.CDc..s...........Ws....hp....-........Ap...VVCRQ....LG.................................h..usu.h..ps...tssta............................GtG..sGs.........lh....lcs..................lp........................C.......p.......G.....s.......E.........s..s....L...h.pC..........................t......ps.h.st......ps............C...sH...p......cD......Au.VhC................................................................... 0 3267 3815 4627 +4500 PF00319 SRF-TF transcript_fact; SRF-type transcription factor (DNA-binding and dimerisation domain) Finn RD anon Prosite & Pfam-B_6396 (Release 8.0) Domain \N 20.40 20.40 20.40 20.60 20.20 20.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.43 0.72 -4.73 10 5768 2009-01-15 18:05:59 2003-04-07 12:59:11 13 20 1081 36 1424 5480 5 47.50 55 19.44 CHANGED KpIENpoNRpVTFSKR+sGlhKKAaELSVLCDs-VulIlaSssG+LaEauo .................................+I-NptsRQVTFoK...R.R.sGLhKKAh.EL.S.VL..C.D.A-Vu.L.I..l.F....S..s.s....G..KL..a-aso................ 0 300 819 1128 +4501 PF02118 Srg Srg family chemoreceptor Mian N, Bateman A anon IPR000609 Family \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.76 0.70 -4.96 49 400 2012-10-03 04:04:29 2003-04-07 12:59:11 16 10 7 0 395 556 0 228.30 17 81.72 CHANGED hhlQhsYhlPuhhLhlhhlhhlhhs++...ahpsSFatLaphDhlsslhhhl.shhhsRlhhah..lC.hhs.h.hhssshhhshhhhhhhahhthphloplhlolsRho.sVhaPhpapphWp+hh..lllhlhllPahhlWshll...u.ps.lthh...tGshhhsahctl....assho...hapllahlhslhlslhoshlshh+lpths.p+hcplE+pLshhshhhohsahhhshhphh.....hshhhshhs..hhphhhhhhhhs.Dhhslut.............PhlLllhssplRppl ........................................................h.h.Yh..h.sh..h..lh....h.h...h....h...h....h....l...h...h...t...p..t......................pp..sF.....ah..l........h...hsh..........h.....h...............shhhhh...........shhh.Rh..h............s...h...h....h.....................t........s.......h........h....hp.h.h.h...hhh...a.hhh.....h....p.h.t...hhhshsRho.slh..h........h......p...............h...............p...............p..............h...W..p..............p........h......h.....h......h..h....h..hl..h..hh....s.h.h...h..h.h.phhh.............t.thh.h.......h.......s....s...h.h..h.......h....p.h..........................h..sth..............hh...h...h...h.....h..h.h.h..h.h..h...s...l..h...s...sh..h.......s..h....h............p.......h.........t..........t...............h..............t.......t............p..........h.............p............p..............h.............p........p..p....l...h.......h........h..s.h..h...h.sh....hhh....hhhhhphh................h...hh.t...........thhh.......h..hs...Dh...h....sls..............................................shhh.lhhstpl+t.............................................................................................. 0 119 168 395 +4502 PF02290 SRP14 Signal recognition particle 14kD protein Mian N, Bateman A anon Pfam-B_7955 (release 5.2) Family The signal recognition particle (SRP) is a multimeric protein involved in targeting secretory proteins to the rough endoplasmic reticulum membrane. SRP14 and SRP9 form a complex essential for SRP RNA binding. 21.20 21.20 21.90 21.90 20.70 18.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.37 0.72 -4.07 22 297 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 258 6 206 281 2 95.60 33 71.62 CHANGED Lss-pFLocLocLapcsppcu..SValThK+h..........ssps.ps.spssp......shpp....ssp.tsLlRAosG....................pK.KlSTlVpsc-ltp..FhtsYuslh+upMsuL .....................Lps-pFLscLscLap...pscppu..SValThK+h.......................................................shps..ps.spptp................spstp........ssp....sLlRA.o.sG.......................cK.KlST.............lV........psc-lsp..F.ttYuslhKusMsuL................ 0 61 103 162 +4503 PF01922 SRP19 SRP19 protein Enright A, Ouzounis C, Bateman A anon Enright A Family The signal recognition particle (SRP) binds to the signal peptide of proteins as they are being translated. The binding of the SRP halts translation and the complex is then transported to the endoplasmic reticulum's cytoplasmic surface. The SRP then aids translocation of the protein through the ER membrane. The SRP is a ribonucleoprotein that is composed of a small RNA and several proteins. One of these proteins is the SRP19 protein [1] (Sec65 in yeast [2,3]). 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.09 0.72 -3.41 77 498 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 439 21 335 512 73 93.80 33 54.93 CHANGED hllYPsYlDsp+o+scGR+lspphAVcsPphpEItcAs.cpLslpsh.lE.cKtaP+c.....atppGR....VtVphcp.........................sKpplhptlAphlpph+ ...................slYPsYlssp+ohuE.G.RRlspph.......AVc.sPpspEIt-ss..ptl.....G......lssh...lE...sKtaP+-...............a.tttGR....V+Vpl+p.................................t.h..th.sKcplhhhlAphl.ph........................................................ 1 106 190 274 +4504 PF00660 SRP1_TIP1 Seripauperin and TIP1 family Bateman A anon Prosite Family \N 25.00 25.00 27.00 26.30 22.40 21.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.48 0.72 -4.21 14 386 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 42 0 177 280 0 99.90 47 50.10 CHANGED hsulusAssps........plsELssllsDl+upLs-Yhuhtts.sosh...shPssllslh.th.uohTDDSaTThhoplsFstlophlTtlPWYSoRLh.....PslsusLussususu .......................hsoLAtuD-cl........sl..l.ELsVaVoDI+AHLupYh.Fpuspso-....ThPs-lA-........As..a..s..h..s.sa.TTh..L..T..u...I....s.s..-QVT+hITuVPWYSoRLc....PAIouALupsGIho................................... 0 34 66 111 +4505 PF05022 SRP40_C SRP40, C-terminal domain Wood V, Bateman A anon Pfam-B_9034 (release 7.6) Domain This presumed domain is found at the C-terminus of the S. cerevisiae SRP40 protein Swiss:P32583 and its homologues. SRP40/nopp40 is a chaperone involved in nucleocytoplasmic transport. SRP40 is also a suppressor of mutant AC40 subunit of RNA polymerase I and III. 20.90 20.90 21.40 24.30 18.90 20.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.69 0.72 -3.48 35 306 2009-09-11 14:16:26 2003-04-07 12:59:11 7 10 256 0 204 294 1 71.00 45 13.93 CHANGED s.FpRlc....hpclph.cpcLtDNoYtuts......ssaGcKAsccLthsRGKsFTKEKNKKKRGSY+G.GpIs.hussShKF .....................FpRlc.....pc.l..p.....hc.scltDNoapupt.......ssaGc.....+AsccLthT+GKuFp+EKsKKKRG.....SYRG..G...sIs..hpsp.ShKF....... 2 74 119 170 +4506 PF04888 SseC Secretion system effector C (SseC) like family Finn RD anon Pfam-B_5525 (release 7.6) Family SseC is a secreted protein that forms a complex together with SecB and SecD on the surface of Salmonella. All these proteins are secreted by the type III secretion system [1]. Many mucosal pathogens use type III secretion systems for the injection of effector proteins into target cells. SecB, SseC and SecD are inserted into the target cell membrane. where they form a small pore or translocon [1,2]. In addition to SseC, this family includes the bacterial secreted proteins PopB, PepB, YopB and EspD which are thought to be directly involved in pore formation, and type III secretion system translocon. 29.00 29.00 29.20 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.14 0.70 -5.09 35 864 2009-09-11 03:00:25 2003-04-07 12:59:11 7 2 470 0 55 430 6 247.80 21 61.88 CHANGED lhhthsplhuchtppphpsphpphpphppppp....pchp-hpcplccttcpu...ccApKs.Gl...huKIhGWlusllollsuAh.......hhlsusulGs.....suAhhlusss..suhlutustp.u..............shsuhluptl.spsLsshsh..tth..hsttlstu..lssslsshu.....sssssssushsspsAuc.husthsttht..hht.hhtthhphhtp..tp.........................hhs..........................ptlphss.ssplssslsp.....................Gusphssushpppuspt.A-hthtpst..hptlpshhcphh-phsp..hhcshpphhpthhphls..spusstsplspps .......................h.hhhhtlhhpsttpphps.ht.hpthpcspp....tphpchppphpct.ctt.....ccs..p+...sh.....hutlhshlhshhsslhush..................sh.hhhhGh............shhhusth......shstsuh.t.h..................................................ht.lthth.......stslt.hhsh..t.h...............hs.tt.lssu...hstslt.ht...................hh.sstt.hhtthAtt.hspt...hs.p.h.p...........................................................................................................................................................t....shphssthtp......................................................uhsthtts.hptph.phhAp.th.phh...p.hpphhc..hcthtp.........p..tphhpth.p.hp....ut.thhhht..t.................................................. 0 17 23 39 +4507 PF00474 SSF Sodium:solute symporter family Finn RD anon Prosite Family This family includes Swiss:P33413 which is not in the Prosite entry. Membership of this family is supported by a significant blast score. 19.80 19.80 19.80 19.80 19.70 19.60 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.29 0.70 -5.83 10 13052 2012-10-03 01:44:59 2003-04-07 12:59:11 12 51 3717 6 4094 10595 8262 353.80 21 74.16 CHANGED YaLAGRShssashGhSlsASshSuupFlGLuGsuhtSGluhuhauhstLlslhllhallushat.....psGslThP-YlptRFtu++.lllaLSuLuLLlhlshthouslluGAtLIcpslGlsYpsAlllluuhTslYTlhGGhhAVsaTDTlQullMlhGsllLhlhshhclG..Ghsshhp+hhsAsPph..sDh..........hs...ssuhp.h+sPlstsShWsuhshGhsGl........PHIltRshuuK.....cu+slhpGhlhuhshhlllhsGhluhhhashclAsssP....cssGsplssuNhAaPpLshcLhPshltGlhLAlhlAAlMSoLsulhluuSohFTcDlYppl++cutssEpclsthutlhsLllsulAllsulpsspts.lhhhlphAauhLuushusVlLLulFWcRsNppGAhhGhIlG ............................................................................................ahhuG+.sh.s..s.h.h...h..uhuhh.us....h...S.uhp.h.....l.G..h.s.u....h.s.......a..h...............G....h..s...h.......h.......h..h....s.....h..s............h....h..h..u.....h.h...l..h...h..h...l...h.s....h.ht.......................p.h..s...h...h....T.h..s.....-....hhp..t...R....a....t........s......p........h..............h.......p..h.............l.......u.....u.l......h.........h....l.........l...........h......h....h........h...h.....h.....s...............s.........t.....h.............h.....u.......s....u..........h..........h......h........p.....................h........h..........G..........l.............s...................h..............................h.......u..............l...............h.............l....h....s....h....l...h............h.....h............Y.s...h..h.....G...........G...hhu...s.........s...hs..D.hl...Q.....s.................h.....l....h...l..h..u........h..h....h........h...h..h....h........s........h.....h........t......h..G..............u...h....s....t....h.......h....p.....t...h..........t....h...t..s...........th..................................................................hp......s..s...........................................h............h......h....s...h.............h........s...h......h....h....G...h.h.s......................p.h.l...Rh.hssc..............sh.p.t.h..h.....p.....u.....h.....h.......h......s.....h.......h.......h.....h.....h.....h......h......h.....h....h............h.....h.....h.....s.....h.....h........s.......h....h.....h.....h.....................................................h..........................t...s....s.....p........h......h...h.........h.h.......h.............t........h........h.........s...............s.......h.........h.....h...G.........l....h...h..u....s.....h....h.....A...A.........h.....h.S..o..hs....u...l...s.s..u..osh...s....pD.......l......a.................t...t.......h.....h...........p...............p.....t...............................t.........s..............p..............p............p......h....h......h........h....u....+....h.......h....s.....l......h..........h....u....h....l....s........h....h....h...u.....h...................t....s.....................l...h.......h...h.............h...h...h.u........h.h.s.........us..h...hsh...h.l.h.u.l..a...a.p....+....h..s...t........GA.hhuhhhG................................................................................................................................................................................................................ 1 1309 2409 3400 +4508 PF04686 SsgA Streptomyces sporulation and cell division protein, SsgA Mifsud W anon Pfam-B_5645 (release 7.5) Family The precise function of SsgA is unknown. It has been found to be essential for spore formation, and to stimulate cell division [1]. 25.00 25.00 26.10 25.70 20.70 19.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.20 0.72 -4.33 27 307 2009-09-11 07:36:54 2003-04-07 12:59:11 7 1 100 3 119 297 0 99.20 41 68.34 CHANGED sls..spLRY-ss.DPaAVplsF.psss..stsV.pWsFuR-LLt-Glp..pPsGpGDVRlhPs.tt..spshlhlpL...suPsG...pAllchssstltsFLcRT.thVPsGpEp .....................lPspL+Y-ss....DP.aAV+hsF..+sss......stsV.pWsFuR-LLscGLp....pPsGpGDVRlhPs..t.......uts.tl.hl.pL......puPsG...pAllcsssssLtsFLcRT.plVP.GpE........ 0 30 93 118 +4509 PF00720 SSI Subtilisin inhibitor-like Bateman A anon Pfam-B_679 (release 2.1) Domain \N 21.80 21.80 22.10 23.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.25 0.72 -4.06 22 154 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 96 4 53 172 1 89.80 34 64.64 CHANGED YAPSALVLTlGpGpuAAoAsspRAVTLoCsPssuGTHPustuACApL+u..ssGDhstL...sspssthCT+pacPVsVTs-GVWpG+RVuaE+TFuNp .................................hsh..s..s..sss..+uss....LsC...s...Ps...uGo.HPsstuACApLcs..s..s....G.c.h.ssl.....ssssthCThp.Y.s.P.Vss.sssGsWpG+tVsappsasN......... 0 20 44 53 +4510 PF04056 Ssl1 Ssl1-like Wood V, Finn RD anon Pfam-B_13499 (release 7.3); Family Ssl1-like proteins are 40kDa subunits of the Transcription factor II H complex. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.20 0.71 -4.71 6 389 2012-10-10 16:07:06 2003-04-07 12:59:11 9 14 298 0 266 650 20 173.80 38 45.11 CHANGED VlDsScuMp-pDh+PsRhshhlKhlptFlcEFFDQNPISQlGlIshKsthAc+lT-LoGNPcsHIcuLcoltp.pspG-sSLQNAL-hAptsLptlPuHsoREVLIlauSLoTsDPGDIapTI-sLK+ppIRsSVIGLSAEltlCKcLsppTs.G...tYuVlLDEsHh+-LLhcpssPPPuspsp..csoLI+MGFP ...........................llDhSpu.M...t.-.pD..h+....P.sRh.th..s.lph..h..........ptFlcEaF.-QNPISQlGlls..h+s..thApp.l.o..........-.............lo.....G......NPp....cHlpuL...........pph.......h.....p...........p..s...p......G....pPS..LQNuLchAh.........t.tL.....h.........p......h......P..u....+..s..o..R.E.............l..Ll.lh..u.u....L..s..osD.P.u...s........I.a..psI.ps..L.hp.ppIRlu..ll....GL...uA...plt...lCpplsppTs...G................pYtVhls-..pH..h+-.L.lhph.ssPP....ss..........t..........t.......tt.................tss........L...lh..M.GFP....................................................................... 1 99 149 221 +4511 PF04386 SspB Stringent starvation protein B Kerrison ND anon COG2969 Family Escherichia coli stringent starvation protein B (SspB), is thought to enhance the specificity of degradation of tmRNA-tagged proteins by the ClpXP protease. The tmRNA tag, also known as ssrA, is an 11-aa peptide added to the C terminus of proteins stalled during translation, targets proteins for degradation by ClpXP and ClpAP. SspB a cytoplasmic protein that specifically binds to residues 1-4 and 7 of the tag. Binding of SspB enhances degradation of tagged proteins by ClpX, and masks sequence elements important for ClpA interactions, inhibiting degradation by ClpA [1]. However, more recent work has cast doubt on the importance of SspB in wild-type cells [2]. SspB is encoded in an operon whose synthesis is stimulated by carbon, amino acid, and phosphate starvation. SspB may play a special role during nutrient stress, for example by ensuring rapid degradation of the products of stalled translation, without causing a global increase in degradation of all ClpXP substrates [3]. 20.00 20.00 20.00 20.20 19.80 19.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.74 0.71 -4.61 137 1666 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1653 33 367 1037 412 152.90 35 95.39 CHANGED hs..sht.hhlRulhchl...........lssshp.aIshcsptsG..VplPpphh.c.sp.hhslhtcthtsLplss-t..hphshpFuGhPcplhlPhsAlhuhas.pssh.GhhF-spss.tt..............................................................ststtspsp...ttts.csshcVVp ...........................................h....spRPYLLRAhY-Wl..............lDNphTPa..llVcssh..PG..VpVPhcas.+DGpI..VLN.l...us+...As.s.sL..clsN-t....lpFsARFGG..lP+plhVPluAVlAIY.ARE..N..Gt.GhhF-....sEsshscsss......................................................t.tt.spp.........ssspsspt...s.tts+PsL+VVp.................................................................................................................................................................................................................................... 2 87 197 277 +4512 PF03531 SSrecog Structure-specific recognition protein (SSRP1) Griffiths-Jones SR, Mistry J anon PRINTS Domain SSRP1 has been implicated in transcriptional initiation and elongation and in DNA replication and repair [1]. This domain belongs to the Pleckstrin homology fold superfamily. 25.00 25.00 25.30 26.10 24.80 21.10 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.46 0.70 -5.02 6 417 2012-10-04 00:02:25 2003-04-07 12:59:11 9 9 289 7 279 395 7 199.50 39 38.01 CHANGED FsGFR-pDlscLtsFhpusauhshsEKpLsVpGWNWGpschtGshLoFslsS+sAFElPLosVSQs.lsGKNEVsLEFHpsDsutl......uLMEhRFHlPso.tpptsuD.......ssphFtcslhuhADV.suot-AlshFc-ItlLTPRGRYDIcla.TFl+L+GKTaDYKI.YoSllRLFLLP+pDpppsFFVlSLDPPIRQGQT+YsaLVhpFspDE-h-l .................................sGFppp-hpplpphh.cp..tap..hpltp.+-hsl+GWNWGpschs............t.....s.......L.....sF.s.......lts.+.ss.FElPhu.plSp..s...h...sG...K.....N.EVslEF....p..............s-s...sts......................................pLhE.hRFalPss.........p.......p...t...ttD...............................s...sp...hFhpplhpKAc.lhp.s.sG-slshF..p-.l...hL..TPRGRYDIcha.s.h+L+GKTaDYKI.apslhRlFlLP+t..Dph..ph....hhVluLDPPl+QGQTRY.aLVh.Fpp-E-h..h....................... 0 104 161 234 +4513 PF04722 Ssu72 Ssu72-like protein Waterfield DI, Finn RD, Mistry J, Wood V anon Pfam-B_5993 (release 7.5) Family The highly conserved and essential protein Ssu72 has intrinsic phosphatase activity and plays an essential role in the transcription cycle. Ssu72 was originally identified in a yeast genetic screen as enhancer of a defect caused by a mutation in the transcription initiation factor TFIIB [1]. It binds to TFIIB and is also involved in mRNA elongation. Ssu72 is further involved in both poly(A) dependent and independent termination. It is a subunit of the yeast cleavage and polyadenylation factor (CPF), which is part of the machinery for mRNA 3'-end formation. Ssu72 is also essential for transcription termination of snRNAs [4][5]. 25.00 25.00 27.00 37.90 22.20 21.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.33 0.71 -5.00 27 359 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 268 27 254 362 2 191.20 49 89.59 CHANGED spL.+hssVCASNpNRSMEuHphLtc..uGa.sVpSaGTGStV+LPGsShD+PNVYpFG.TsYc-IYsDLhupcp.ch...YcpNGLLpMLcRN+clKtuPE+Wpc...................ssct...FDlllTCEERsaDsVl-DLhsRt........sphpcsVHVlNlDI+DspEpAhlGuptIL-Lsphlp....................tsspshE-pl.cllscapcca.phshLaslsaY ...................L+hssVCuSNpNRSMEAHphLpc...........p.Ga.sVpSFGTGotV+LPGPuhccPNVYsFs.ToYcphYsDLhp..KDt..cL....YppNGlLpML-RN++lK.sPERaQc.........................sp-h.....FDlllTCEERsaDtVl-DL.sRt...............pphspPVHVINlDIpDNcE.EAtlGuhhIh-Lsptlp.......................tspsh-scls-lLtcapc+p.phshLaolsaY.................... 0 79 124 185 +4514 PF04184 ST7 ST7 protein Bateman A anon Pfam-B_2088 (release 7.3) Family The ST7 (for suppression of tumorigenicity 7) protein is thought to be a tumour suppressor gene. The molecular function of this protein is uncertain. 19.10 19.10 21.70 19.50 18.80 18.10 hmmbuild -o /dev/null HMM SEED 540 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.86 0.70 -6.27 5 405 2012-10-11 20:00:59 2003-04-07 12:59:11 7 7 139 0 132 319 4 378.60 62 93.97 CHANGED luWSWTYLWslWFAlVlhLlYlLRuPLKLpEsLsuVol..FLNTLTPKFYVALTGTSSLISGLILIFEWWYFRKYGTSFIEQVSVSHLRPLlGGVDNoussuS..Sss-stpNRQNVuECKVWRNPLNLFRGAEYSRYpWVTG+EPLTYYDMNLSAQDHQTFFTCDoDpL..RPuDoIMQKAWRERNPQARIpAAapALElN......................P-CATAYVLLAEEEATTIoEAE+LFKQALKAu-shhR..pupphpupupph-A.+RRDTNVlVYIKRRLAMCARKLGRlREAVKMMRDLMKEFPLLSMLNIHENLLEALLELQAYADVQAVLAKYD........DISLPKSATICYTAALLKARAVSDKF..SPEuAuRRGLSTAEMNAVEAIHRAVEFNPHVPKYLLEMKSLILPPEHILKRGDSEAVAYAFFHLQHWKRlEGALNLLHCTWEGTFRhIPYPLEKGHLFYPYPuCTETADRELLPSFHEVSVYPKKELPFFILFTAGLCSFoAMLALLTHQFPELMGVFAKAslsllhusht..h.chcsahPusIhppLsst ...........................................................................................oh................FLsoLTPKFYVALT.GTSSLISGLIh.I.FEWWYF+KaGTSFIEQVSl.....s.......Hlp...P.hhGG........s-.ss.s..p.s.........s...sp..s.......................t.....................t..s............ps.s.ppsls...ECKVWRNPLNLFRGAEYp..RapWsTG.+.EPLTYYDMNLSAQDHQT.FF.TC-oDth.....R.PuD.slMQ+AWRERNP.ARIpAAapALElN...........................cCAsAYlLLAEEEATTIs-AE+LFKQ..ALKAG-shYR..pSQph..Q...Hpu.staEu...hRRD.TNVLlYIK..R..R..LAMCAR..+LGRh+EAVKhMRDLhKEa.P...hshhN..IHENLlEuLLElQAYADVQAVLAK.Y.D....................................D..ISLPKSAsICYTA.ALL.KsRsV.uDKF..SPEsAS+RGLSoAEhs..AVEAIHRAVEFNPHVP.KYLLEMKuL..I.LP.PEHILKRG.D.SEAlAYAFFHLtHWKRlEGALNLLpCTWEG.T.....FRhlPaPLE+GHLFYPYP.CTEsADRELLPs.FHcVSVYPKKELPhFIhFTAGlCS.TAhlAlL..THQaPE.MGlhA+s...h..h..........................h.............................................................................................................................................. 0 33 46 86 +4515 PF03298 Stanniocalcin Stanniocalcin family Mifsud W anon Pfam-B_4401 (release 6.5) Family \N 20.70 20.70 20.90 20.90 20.60 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.69 0.70 -5.32 16 167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 69 0 77 133 0 193.90 45 78.10 CHANGED Mlh+suL..LllhlLssuu.......aEss.--s..usR+uRhSsposu-VsRCLsuALpVGCGsFuCLENSTCDTDGMH-ICpoFLaoAAKFDTQGKoFVKESLKChAsGloSKhFhslRRCooFQcMluEVQcECYsKLDLCoVA+pNPpAIsEVlQlPspFPNRaYspLLpSLLsCDEETVssVRsSlhu+lGPshusLFplLQsssssssussu .............................................................sh..llhhshs.st..........Es..pcu..u.p+u.RhuhpsoA-l.+CLssAhp..V.G..CGsF.tChE.N.s.o.C-hcG..h.....a.....-.....IChoFLasAuKFDsQGKuFlK-uL.KChA.p...ulp.pKhh.th..R+CsshpcMlhp...lQcECY.Kh..slCus...A+cNscsIsEhlph.shFs...p.c.YscLlp.LLpCsE-shpslpcSl.tpht.shuuLhplLphspss.tp..s................................. 0 17 25 43 +4516 PF02200 STE STE like transcription factor SMART anon Alignment kindly provided by SMART Family \N 25.00 25.00 47.60 47.60 21.90 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.51 0.72 -3.91 8 177 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 156 0 105 177 0 104.80 74 16.69 CHANGED sQlIRRahLssG-.YlSCVhWNsLaaITGTDIVRsllaRFpsFGRslpspKKFEEGIFSDLRNLKsGsDAoLEpPKSPFL-FLY+NuCIRTQKKQKVFYWFSVPHD+LFsD ....................s.QhIRRFhLPoG-.aVSCVhWNNLaHIoGTDIVRCLsFRFQAFGRPV+NpKKFEEGIFSDLRNLKsGoDAoLEEPKSsFLDFLaKNsCIRTQKKQ...KVFYWaSVPHDRLFLD........ 0 29 61 92 +4517 PF02876 Stap_Strp_tox_C Staphylococcal/Streptococcal toxin, beta-grasp domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family \N 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.47 0.72 -3.88 41 3442 2012-10-01 21:38:54 2003-04-07 12:59:11 12 3 194 211 29 1049 1 99.20 31 42.23 CHANGED +......pls.lslahcspppph....spplpssKcpVTlQELDhKlR+aLh..ccapLY.................sushppGhItaphss..sppasaDLa.h.s......pchLphYpDNKslcScpl.+I-VhLp ............................hh.sttp.t......tp.hphsKcploLKELDaKlRchLl..cpapLY.................pushppGpIplphcs..sspashDL...........sccLphpc.scslcuppltcI-V.l.................... 1 16 24 28 +4518 PF01123 Stap_Strp_toxin Staphylococcal/Streptococcal toxin, OB-fold domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family \N 20.70 20.70 20.80 20.90 20.50 20.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.10 0.72 -3.50 22 1277 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 190 179 10 523 0 85.50 32 37.19 CHANGED NlpphYtshshsttp..thposcphlppsLlapsps.........pslpsEhsspphsppa.KsKpVDlaGl.YthpCht.t............hstshYGGVT.p .........................NlpphYtp.phpphp..shpsscphLspsLlFps.t.........pslpsEFpspshusca.KsKpVDlaGlsYthpChh.t.............tspChYGGVTh................ 3 1 8 9 +4519 PF04022 Staphylcoagulse Staphylocoagulase repeat Bateman A anon Prosite Repeat \N 19.70 19.70 20.20 26.20 18.90 16.60 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.17 0.72 -4.58 22 1401 2009-01-15 18:05:59 2003-04-07 12:59:11 7 18 155 0 5 881 0 26.90 84 26.32 CHANGED RPTaNKPSETNAYNVTTHAsGpVSYGA ..RPT.NKPSETNAYNVTTHANGQVSYGA.. 0 5 5 5 +4520 PF02821 Staphylokinase Staphylokinase/Streptokinase family Bateman A, Griffiths-Jones SR anon PDB Domain \N 21.70 21.70 21.90 31.20 21.50 21.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.29 0.71 -4.33 11 458 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 171 31 11 340 0 122.40 32 79.68 CHANGED sstppsssshplphtspssDhchp.hlhs....hphtsschloppElttthpphlsp.sp.satlhppc.shhtcssshpt.p.hspcpphshhIss+ptthshsscsuhp.hhs...ptc.l.pchhlhcc ............................uph.sss.hl.hsspssDschs.lLpsphh.hsltsGsoLTppclthhspahLst.sa.-atlhEpDsShhhcssshh+.h.hsp-pphoahIp-+ttshstsscsshp.thN...pp..l.pKhhl.KK................................... 0 1 4 7 +4521 PF01017 STAT_alpha STAT; STAT protein, all-alpha domain Bateman A, Griffiths-Jones SR anon Pfam-B_856 (release 3.0) Family STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. STAT proteins also include an SH2 domain Pfam:PF00017. 27.50 27.50 27.70 27.70 27.40 27.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.10 0.71 -4.56 26 626 2009-01-15 18:05:59 2003-04-07 12:59:11 15 30 117 9 231 569 0 170.90 31 24.35 CHANGED h-+Qppl-s+lppl+spsp.cs-psl+tLcchQ................-pasapap.......olpshtchp...hNs.t.ct.hppcphhlpphhp........pLphpRppllpchppllshlcplppslls-EL.-WK+RQQhACIGGP.s.pssLDQLQsWhTtlAEsLhQlRQQLK+lp-LppplsYssDPlspt+spLppplppLlpsLlpS ....................................................................................................pcp.plppplppl+thsp.ph-pch+tLp.....p..hQ...................-.aphpap..............plp....s......ht.php...............pt....s...p.........hpp.c....t....l.pphhp............tLp.phR...t..pllp.c....hp..p....hlshhcthQ....ptlls-ELhpWKRRQQlAs..GGP..s...ps...sLD..pLQsW...hptLAE.lhQhRQ....Ql++hcc.Lpp.p..h.shp.s.pP..ls.pths.L...ptplsplhpsLlpS............................................ 0 34 51 117 +4522 PF02864 STAT_bind STAT protein, DNA binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_856 (release 3.0) Domain STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. This family represents the DNA binding domain of STAT, which has an ig-like fold. STAT proteins also include an SH2 domain Pfam:PF00017. 20.20 20.20 20.40 21.90 19.50 19.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.80 0.70 -4.78 9 708 2012-10-03 00:25:27 2003-04-07 12:59:11 10 35 125 9 273 620 4 217.30 41 33.61 CHANGED FlVE+QPCMP.pPpRPLVLKTtspFTs+lRLLV+h.ELNhplKscslhD+pss-hts.ch.......R+FNlhGoso.KlhNhEco.ssSLus-FpaLph+E.+stsusRsNpcGshhVTEELHslsFEophshtG..LpIcLcThSLPVVVISNssQhPsAWASILWaNhhosps+N.sFFssPPtusWsQLuEsLSWQFSSpst..RGLs.-QLshLA-KLhspsus.....sstploWscFCKEphss+uFoFWhWl-uIlDLlK+ ................................................................FllE+QP........P.Vl.KT.tspFss.pl..RL........LVth.cLNh..phps..s.h...s.....p..hs-.ts..th..................+p...s...h.h.......s......pso..cl....hN.pps...php....ssphhph...ph+..p...pht....t...htR.s...s....c...Gs........VT..EEhaslhF.pop.hshtu...........L....hhp......l......c......ThSLPV.VVIspss.Q.ssAhAolLW.Nhh.s.p.s..+.........s.FssPsts.WsQlsEsLs..hpFpu...st..........RGLs.-pLshLupKLh....s.ssst...........ssh.loWu.pF...sK..................E.s..........l.s.s..+......sF...oFW.Wh-ull-LlK+............................... 0 45 65 143 +4523 PF02865 STAT_int STAT_prot; STAT protein, protein interaction domain Bateman A, Griffiths-Jones SR anon Pfam-B_856 (release 3.0) Domain STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. STAT proteins also include an SH2 domain Pfam:PF00017. 26.00 26.00 29.50 26.40 25.30 25.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.69 0.71 -4.16 22 576 2009-01-15 18:05:59 2003-04-07 12:59:11 12 26 102 3 210 502 0 117.70 39 16.63 CHANGED o.WtplQpLpschL-Q.lppLYsssF.PhElRpaLApWIEs..QsW-ths......s-shAshlhpsLlppLpcphpphspp.sshLhphplpcht.pplpsha.ppsPhplsthlpphLppEp+llppApps ...........utWhplQQ.LpschLcQ.lppLY.s.c.p.F.PhE...lRpaLApWIEs...Qs.W-ths.........cpshAohlhcsLl....ppLppphs+.......hs....tE.sshLlphplt+ht.ppL..Q..spa.pcsPhplsthIpphLhpEp+llppApps...................................... 0 29 43 96 +4524 PF03875 Statherin Statherin Finn RD anon DOMO:DM07003; Family Statherin functions biologically to inhibit the nucleation and growth of calcium phosphate minerals. The N-terminus of statherin is highly charge, the glutamic acids of which have been shown to be important in the recognition hydroxyapatite [1]. 25.00 25.00 29.20 35.40 16.70 16.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.24 0.72 -4.08 2 11 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 8 0 1 8 0 39.40 74 68.08 CHANGED DSSEEKFLRRltRFs.G.YGPYQPhs..PLYPQPYQP...QY ....DSSEE.KFLRRltRas.G.YGPYQPhPEQPLYPQPYQP.YQQY 0 1 1 1 +4525 PF00836 Stathmin Stathmin family Bateman A, Mistry J, Segerman B anon Pfam-B_1551 (release 2.1) Family The Stathmin family of proteins play an important role in the regulation of the microtubule cytoskeleton. They regulate microtubule dynamics by promoting depolymerization of microtubules and/or preventing polymerisation of tubulin heterodimers [1]. 20.40 20.40 20.40 20.60 20.30 19.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -11.03 0.71 -4.55 4 392 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 92 15 157 297 5 129.70 55 68.99 CHANGED SDhcVKpLpKRASGQAFELILpPPSh-usP-hslosPKKKDhSLEEIQKKLEAAEERRK.pEAElLKQLAEKREHE+EVLQKAIEENNNFSKMAEEKLspKMEs.KENREAplAAhLERLpEKDKHsEEVRKNKEhK-.u .......................................DhpVKplpKRASGQ.uFElILp.PPS...............u...s..........hsho.s.P....+.+.KD..hSL.E..EIQKKLEAAEERRK..op...EAplLKpLAE.KREHE+EV..lpK.A..lEE.NNNFsKhAcE...KLp.KM.......EtsKE.NREAplAAhhERL.pEK-c+h.tEVR+sKE.p.................................. 0 26 39 74 +4526 PF02116 STE2 Fungal pheromone mating factor STE2 GPCR Mian N, Bateman A anon IPR000366 Family \N 26.20 26.20 26.30 26.20 25.50 26.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.94 0.70 -5.37 19 170 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 139 1 88 160 0 273.90 32 65.70 CHANGED asPhpphlsapts....G.sshslshsplsphlppplphuIhaGsplGAs.llhlllLhhlo+sc....+oslFllNpsuLhhshl+usL.htYhhusasul.hhhTu.hphlopsshtspsAuslhpslLlssIEhSLlhQlpVlaps.s.+hhthhLhulSshlulssluhhhssslpshh....slhssssst.shhah.....slssIlhusSIsFhohlLlsKLhhAI+pRRhLGL+QFsuh+ILhIMusQTlllPS...........ILhIlpY..hssh..spLssluhhLVsLSLPLSSlWAu ..............sPhpQslshhts.....G..s.hsl....shstl-shhp.tlphsIsausplGAs.hlhL.llhh.h.hopscc..h..........+s.lFllNhluLhlsllRssLhhhahho.sass....hhhhaoGsa..ph...lstu.shpsSlAusllplllsshlEsSLhhQshlhhps..hs....phh+..hhlsslShllulssluhphssslhpsh..................slhp....s..ss....hs.....hh.Wl..................phshIlhssSIsaaohlhssKLlh.A.lhoR...R.hL..Gh.Kp.F.suhclLlIMusQohllPu............lhslLpa.h.ss....s.phsolshslVllsLPLSSlWAt.................................................. 0 14 42 72 +4527 PF02076 STE3 Pheromone A receptor Mian N, Bateman A anon IPR001499 Family \N 24.90 24.90 25.50 26.70 24.50 24.80 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.92 0.70 -5.32 50 604 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 245 0 180 590 1 238.10 28 69.11 CHANGED sshuhluhlLslhPLsWHh+u+NsushhlhhWhhlssLpphlNullWssss.ts.hhs..saCDIss+lhlGuslulPuuslCIs+pLtpIhsscpsphstspp..p+plhhDlhlslhhPllhMuLpalVQuaRasIhcthGChsshhsohsuhhlhahWslllulluslYusLsLhtFh++RtQFsslLpsspSuLohuRahRLhhLuhl.hlhhhPlshaslh..lphppss....hsa.SWspsHss...a.spI.paP.............hsplhhsRWlssssuhlhFhhFG....hup-........AhphY .............................h..hh....h.hhh.st..s.ush...hhhhWhhlhsl.thlNullW.sss..hs.hhP...sWCDI..s..........s+.lhhus.slG.lssush..CIsRpLt.p.Iss....s.ct.s.t.hotpc+................+Rp..hhhDhhlslGlPllhhsL.......phllQspRasIhcthGChsshh.ohsshhlhhhWs.hlhuhsuslYus...lslhhhhp+..Rtph.pphlts.s....Ss.l..s..h.s.Ra..h....R....Lhhhshh.lhh.hhPhshh.hh..h.............a.sat.hH........h..t...l..hs..................h........h....h.shhhh.hhFu.hs.-s...Y........................................ 0 99 134 164 +4529 PF04885 Stig1 Stigma-specific protein, Stig1 Mifsud W anon Pfam-B_6528 (release 7.6) Family This family represents the Stig1 cysteine rich plant protein. The STIG1 gene is developmentally regulated and expressed specifically in the stigmatic secretory zone [1]. 21.90 21.90 22.00 21.90 21.70 21.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.49 0.71 -12.08 0.71 -3.97 14 196 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 43 0 144 189 4 94.50 32 43.63 CHANGED llhlllslslphhshsssssss............p..tppspssshhssspss..ssh..ut....................pCsp..sstICpsst.....ssphsCCpN.+CVDltosctNCGsCsptCca.up.pCCsG.CVslthDpppCGpCsppCt.Gp.CsaGhCsYA ..............................................................................................hh.........................................................................................................................................sC.....t...........C.hsh........s..pCGhCsp.tC.ph.u.p.pCC..sGt.C...Vs...ltsD.ppCGtCsp..........tCstGphChhGhCs.............. 0 50 108 135 +4530 PF05217 STOP STOP protein Moxon SJ anon Pfam-B_6629 (release 7.7) Family Neurons contain abundant subsets of highly stable microtubules that resist de-polymerising conditions such as exposure to the cold. Stable microtubules are thought to be essential for neuronal development, maintenance, and function. STOP is a major factor responsible for the intriguing stability properties of neuronal microtubules and is important for synaptic plasticity. Additionally knowledge of STOPs function and properties may help in the treatment of neuroleptics in illnesses such as schizophrenia, currently thought to result from synaptic defects [1]. 27.00 27.00 27.20 27.20 26.60 26.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.79 0.70 -4.60 20 445 2009-01-15 18:05:59 2003-04-07 12:59:11 7 11 81 0 305 392 1 156.10 15 61.31 CHANGED ut..ClCplCsCu+p..hCp..ppslplslshphh.......SsYcp-a............sspssptss.hsttchps+thsl....................osYppDFphhsh........stpccsh.tpshshsupouYsosa.shss.ssspht.sp.hsshs..hshsspToY+p-Fp.shpt.....sshsscsp.tpthss.....a.s.o............appsapspssssht..................................thppchshssos.Fpuph..........pphca .........................................................t.....................................................................othtpp............................................................................................................................................osh+.pDatsapht.....t.hp..p.p.p.h.t..sss.s..F.pstTo...a......ppcahs..h...th........h.....t...sh.+s.t......h....s...hPhp.s.o............sa+.pah..s.................................................................................................................................................................................................... 0 134 160 207 +4531 PF03088 Str_synth Strictosidine synthase Griffiths-Jones SR anon Pfam-B_1533 (release 6.5) Family Strictosidine synthase (E.C. 4.3.3.2) is a key enzyme in alkaloid biosynthesis. It catalyses the condensation of tryptamine with secologanin to form strictosidine. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.69 0.72 -3.95 8 707 2012-10-05 17:30:42 2003-04-07 12:59:11 11 11 260 14 391 1067 163 85.90 36 22.65 CHANGED sulsVssp.GVlYFTDuSo+Ysh.+plhhshLpGcssGRLh+aDPoT+sT+VLLccLaFsNGlulSsDpoallhsEsshpphh+Yalpts .............................................sslslsps.GplYFTDo.........S.........s...........c..........aph....c..............p.......a........h....hsh...........h..p...u.......c.s.sG..R.Ll+YD...............s.p..T.pps.pVL.lcs.Lt.FsNGVulSsDpsa.......ll....lsETsttR.lh+YalpG.................... 1 98 227 319 +4532 PF04270 Strep_his_triad strep_his_triad; Streptococcal histidine triad protein TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family All members of this family are proteins from Streptococcal species. The proteins are characterised by having a HxxHxH motif that usually occurs multiple times throughout the protein. 20.90 20.90 21.30 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.58 0.72 -4.24 5 2985 2009-01-15 18:05:59 2003-04-07 12:59:11 8 20 316 3 93 1739 0 48.40 47 18.71 CHANGED YTT-DGYIFsPsDII-DpGDAYlVPHGsHYHYIPKc-LSsSELAAAQAYhupK ..........sDGYlFs.PscIlp-.s....u.....s.ualVPH..G..sHaHaIPKspLSt..E.hthAp................................... 0 21 31 53 +4533 PF02516 STT3 Oligosaccharyl transferase STT3 subunit Bashton M, Bateman A anon Pfam-B_1095 (release 5.4) Family This family consists of the oligosaccharyl transferase STT3 subunit and related proteins. The STT3 subunit is part of the oligosaccharyl transferase (OTase) complex of proteins and is required for its activity [2]. In eukaryotes, OTase transfers a lipid-linked core-oligosaccharide to selected asparagine residues in the ER [2]. In the archaea STT3 occurs alone, rather than in an OTase complex, and is required for N-glycosylation of asparagines [3-4]. 27.90 27.90 27.90 28.10 26.90 27.70 hmmbuild -o /dev/null HMM SEED 483 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.93 0.70 -5.53 15 968 2012-10-03 03:08:05 2003-04-07 12:59:11 9 12 617 11 541 870 126 459.70 26 63.95 CHANGED hltlllhuhhhhssshuc.........lFsstphhshh.-hDsYYpaRlsctllpcGa........hsthsaa.pptaYPhG.hlsassllshL.hshhsllhhhhs......hslpplshhhsPlluulsuIssahLs..+clpsctsGlluAhlluluPu.YlsRoluGha..Ds-hhslhlhhhshhhalcuh+sus............hhassluuLshhlhshuWsGh..........lhhhshhsLhllshLlhu+..........apschhhlh...............shshllssls..hl.hshlGat.........hhthhhlhulhphhshsshstt............hhshhpats.....lhhsu.lsplsslh..ssslhsluhhGhlu.h..................sGhhaslhp..........hschpl.hls.ls.apshuhhuhhhul+Fh.lhshPlsl...hasshhlcshtshhhhhu.....................hhthhhssshsh.lhlslssslshlushths.tlhsphhtpttsth+h.....t..........................................ssssssllhstasashhhhshtsp.....Vshsuhu .....................................................................h...h..h.hhhhh...h....h...............hhs..h..h.h...a..........h..hp....EF.DPa.F..paR.tsphlspp..Ga................athhsaFD.t..............saaP...hG...h..l..h..h..G.....o..h..h....s.s..L......h...h..ssth..lh.h.hhp.................hpl.t.s.ls..l..a....h..ush.huu.lss.l.s.sa...hls.....cch.................t......s........s...........t........u........G....l...lAAhhhul.sP...............u.YlsRSh.uG.a..Ds-sl..s.I...h...h...h.h...h.s..h...a...h.a..l...+.ulcpss.................................................hh.a..u..shs..uLshh...h.h..l.u..W.uGa..........................sallsll.s..L..a.shhh.l...l..h.tR...................................................................hsp..pla...huh.................................shh.hhl...shlh.......s.ht..l..sFlu...Ftshp.........s.pth..u.........s..hu...l..h.sl.l...t..l..h..h....h..hthh.t................................tlshttaps.........................lhhhh.....hhh.shhhh........hhslhhls..h.h..uh..lu..s.h..............................................................................................................suhh.hSLhs...............sasp.h...p.h.shh.s.sls.at...s.s..s.......a.uh..hhs...lp..hh....l....hhhP.s.ul..........hhs..h....h...h.......p...........t....p...l.hl.hhhu........................................................................hhs..hhhsssh.l.h..lh..Ls....L..sP..s.....h...s.h...h.....u..u....hshs.....p...........lhs......p...h.....h.t...t.....t...p.......................................................................................................................................................................................................................................................t.s.......ps.h............lh.s.hhshhh..hhhhtsph....shsp................................................................................................................................................................................................................................................... 0 184 311 447 +4534 PF03481 SUA5 Putative GTP-binding controlling metal-binding Bateman A anon Bateman A Domain Structural investigation of this domain suggests that it might be a GTP-binding region that regulates metal binding and involves hydrolysis of ATP to AMP. It is found to the C-terminus of Pfam:PF01300. 22.90 22.90 23.00 23.20 22.60 22.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -11.20 0.71 -4.04 384 2044 2012-10-03 16:42:30 2003-04-07 12:59:11 8 11 2016 4 615 1629 1172 136.70 28 39.29 CHANGED lohEp..lc..pllGt...........h........t..t......t....sp..tP.........p.APGMhhpHYAPputl........h..ls........p.................th...pt.............................................sptlhhlshst......................................htt...........s................hsh...........usptchppsApsLFssLRphDp...........p......sh.chIhsp........s..hs.p....p...Gl..G.tAIhsRLp+A.Au ............................................lTh-plcpllGp.......................th.h........pc..............sp...tP.c.APGMKYpHYAPc.sslhll..p.....................sh....t........p..............................................................spclullshpph................................................................................htth......t..........hhh........h...........ustschpp.sApsLassLRphDc..............p....sl.chI..hsp........................s.hs....p...s.....sl.G...tAlhNRLpKAAu............................ 0 225 402 517 +4535 PF01300 Sua5_yciO_yrdC Telomere recombination Finn RD, Bateman A anon Prosite Family This domain has been shown to bind preferentially to dsRNA [1]. The domain is found in SUA5 Swiss:P32579 as well as HypF and YrdC Swiss:P45748. It has also been shown to be required for telomere recombniation in yeast. 22.80 22.80 22.80 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.11 0.71 -5.03 163 8425 2009-01-15 18:05:59 2003-04-07 12:59:11 13 39 4856 15 2171 6104 3104 175.00 27 51.50 CHANGED phLcpGtllAhPT-osYuLuscst.sppAlp+lhphKpRs..sK..Plslhss.....slpp..lpphst...th.......spthhp............hhpp....hhPGPlTllhph..................pp.......lsp..hl.............sstsslGlRlPspsltttLhc..................th.t.....s...........lluTSANhSG.c...sss.tsspclhpp.l....sstlD.......hllcss........hh.tshsSTll..c.lh......t.p......l.lRpG ........................................t.hlppGtlluhPT-os.YuL..u.......scst..........s...........p.......pA..........l..........p+lhphKp.R.P..t.........s+.........slhlhss........shpp..l.pp.asp......h..........................s.s.t.h.hc.....................................lh..ps.....h.......h....P.......G...PlTh..lltt.....................................tpp......lsp...hl...................ss.s.h.so....lulR....l.....Psp.P..lshtLlp.........................................................th.u.t...P...................lsuo..SA.N....l.S.G.p............ss.s....p..s....s.p......c...l.hpc...L..........s.s..p..l.c......................h.l.l.cu.......s............ht.......t......t..........h.s........STll..Dhs.......st.st.....l.lRtG......................................................... 0 706 1382 1838 +4536 PF00862 Sucrose_synth Sucrose synthase Bateman A anon Pfam-B_484 (release 3.0) Family Sucrose synthases catalyse the synthesis of sucrose from UDP-glucose and fructose. This family includes the bulk of the sucrose synthase protein. However the carboxyl terminal region of the sucrose synthases belongs to the glycosyl transferase family Pfam:PF00534. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 550 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.61 0.70 -6.54 4 604 2012-10-03 16:42:30 2003-04-07 12:59:11 14 9 221 27 131 815 109 398.90 57 63.47 CHANGED TRVHSlRERls-TLoAHRNElLuLLSRh.upGKGILQPHQLlsEaEsI..tED+tKLcD..GsFu-Vl+SsQEAIVlPPWVALAlRPRPGVWEYVRVNVacLsVEELoVsEYLpFKEELV-GSSsuNFlLELDFEPFNASFPRPTLoKSIGNGVQFLNRHLSuKhFHsK-ShaPLL-FLRlHsYpG+oLMLNDRIQslsuLQusLRKA--YLuoLPsDTPYSEFEH+FQElGhERGWGDsAcRVhE.hHLLLDLLEAPDPsTLETFLG+IPMVFNVVILSPHGYFAQsNVLGYPDTGGQVVYILDQVRALEsEMLhRIKpQGLDIsPRILIVTRLLPDAVGTTCsQRLEKVhGTEHoHILRVPFRTEKGILRKWISRFEVWPYLETasEDVApElAtELQupPDLIIGNYSDGNLVASLLAHKLGVTQCTIAHALEKTKYP-SDIYWKKFEc+YHFSCQFTADLIAMNHsDFIITSTFQEIAGSKDTVGQYESHTAFTLPGLYRVVHGIDVFDPKFNIVSPGADhoIYFPYoEpEKRLTuLHPEIEELLYS ..............................................................................................................................h...h.........................h..hh...pEhhh.s..hhhhhR..p.u..phhplp..th.h-.h...ph.Lth+-thh.t.........p....t....L.El..DFtsFp..hPp.p.sp.IGpGhpals+ahou.hh..t...p...p......h....Lhta.L..hph......p...G........hhls.pp.lps..tLp..lhhA..hl.th..ppsatpht.p........hpthGhE.GWGssAt+s.p.hphL.-llpuP-.s.shEtFhuplPhhF.....plVlhSsHGaF.u..Q..ts..V.L..Gh.PDTGGQ........VVYILDQVRAL.Ep.E.h.l.R...l.....c.......Q..GL.....s...l.....h.P+I....l.....l...l....T.R.L.l.P.-..A..h...GT..s..C..s......Q..+..L..E..cl........G..o...c.......t..s.pIL.RVPF+s.pp.G.h.lcpWISRF-.lWPYLE.p.as...c.............D...........s...........s...........t..........E....l...........h.......t...........Eh...........p......u...........p...........P.D...L..I.lGNYSDGNlVAoLLup+LsVTp.........C.......sIAHAL...EK...T...K...YP.......c....SDI...Y....W+...c....h...--KYHFSCQFTADLhAMNpoDFIITSTaQEIAGSK-oVGQYESHtAFTL..P.G.L.Y.RVV.HGIDVFDPKFNIVSPGAD.slYFPYT-pc.c.RLTshHscIEELLas........................................................... 0 25 79 113 +4537 PF02657 SufE UPF0050; Fe-S metabolism associated domain Bashton M, Bateman A anon COG2166 Family This family consists of the SufE-related proteins. These have been implicated in Fe-S metabolism and export [1]). 20.20 20.20 20.20 20.40 20.10 19.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.31 0.71 -4.49 7 2432 2012-10-01 20:52:23 2003-04-07 12:59:11 10 5 1792 5 531 1474 985 125.00 36 81.81 CHANGED lhppF.cstph-c+Yc.LlphGppLsshscchhtptp..l.GCpSplal.htshpssth..hF.u.o-AhlspGlhullhphhsGpTss-Ilshssh.FFpcLult.pLS.uR.pGhpulhtthpphsh .........................................hpsFt.hssWE-+YchlIpLGcpLP.sLs..-...c...h..+...s...p...t...pp...l...pGCpSp.V..Wl............h....h.....p......p....s.........p....s.......G........p...l........cF..p.....G..D..SD..A.t...IV+.........GLlAlllt...hh........sG.pTsp-lhshc...s....p.sa.Fccl..GL...p.p.p.L...S..PoRopGLpAhlctI+stA.t............................... 1 147 317 440 +4538 PF05076 SUFU Suppressor of fused protein (SUFU) Moxon SJ anon Pfam-B_6089 (release 7.7) Family SUFU, encoding the human orthologue of Drosophila suppressor of fused, appears to have a conserved role in the repression of Hedgehog signaling. SUFU exerts its repressor role by physically interacting with GLI proteins in both the cytoplasm and the nucleus [1]. SUFU has been found to be a tumour-suppressor gene that predisposes individuals to medulloblastoma by modulating the SHH signaling pathway [2]. Genomic contextual analysis of bacterial SUFU versions revealed that they are immunity proteins against diverse nuclease toxins in polymorphic toxin systems [3]. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -11.15 0.71 -4.68 132 954 2009-09-17 14:43:38 2003-04-07 12:59:11 8 30 737 5 252 788 16 163.50 19 53.36 CHANGED ts.ssh.slslhhhts..t.......pshhphlThGhS..............................th.ht..sp........phhthELhhtl.................spp.hashphLt...........slA.phshp....pss...h..l....s.Gphl..................s..psht....tso.....phsuhhlhts......hhtssphsth.............stt.......lpahpllPlppsEhpahp....ppG...scsL.hpth..tpt.....shtlhDhpRps ............................................................................................................s.......lslhth.s.............shhpasThGho.................................th.ht....tt.......t.pththELhhtl...................ptt..hhhp.hLt...........slA..phshp....ptp..h...h..............s.Gphl..........................s..pshs........tss...phsthlhs.s......hhtssp.h.sth...................stt................Vpal.llPlsppEhphhp.......ppG........hps..l..hchh..ppt.....shplhDhpR......................................... 0 80 153 211 +4539 PF04198 Sugar-bind Putative sugar-binding domain Bateman A anon Pfam-B_1085 (release 7.3) Domain This probable domain is found in bacterial transcriptional regulators such as DeoR and SorC. These proteins have an amino-terminal helix-turn-helix Pfam:PF00325 that binds to DNA. This domain is probably the ligand regulator binding region. SorC is regulated by sorbose and other members of this family are likely to be regulated by other sugar substrates. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.22 0.70 -5.48 31 3935 2012-10-04 00:26:15 2003-04-07 12:59:11 8 27 2304 24 585 2201 210 247.90 26 76.87 CHANGED sphssshcLEptLcc+auLccshVV..Psss.....st..sss.hpplutsuuphlpp.hlp.ssp..lluluhGcTltssscthss..hphpslphVshhGGhst...hs.hps...lstphAc+h.supsthh.sPshhsssphtcslhpptslpsllphhcpsDlslhGIGshtp..puthhtpshhspp-hppl.pptuuVG-lhGt.aFDtpGphlp.tshss+slulpl-pL+phsphlulAuGppKspAIhAAL+us.hls.sLlTDEpoAptlLs .....................................................t.hplEppLpc+a...u.LpcshV..l.......s..sps.................sp.....tsh..tpplup.suAphLpp..hlp...ssp..........llul.u.aGp.T.......ltsls..c.p.l....ss.........h.....p.h..p..p..l..p....h.....Vsh........t.......G.......Gh......u......p......tht...hp.ss..p.ls.tphApph..su.p.s.p.hl.sPhh.hsssp.......htpslh.pcpslpplh.phhpp.....uclulhGIGshtp.....puth.h....ps.h.h.s.p.....p-.h.......p.p.l.tppsAVG.-lhuh.FaDtcGphlp...thpp+...slGlsL.pp.Lc..........p..........h.....sph.lulA..uGppK.s.pAIh.............uuL+....us..hls..sLlTDpssAptlL............................. 0 153 343 458 +4540 PF00083 Sugar_tr sugar_tr; Sugar (and other) transporter Sonnhammer ELL anon Prosite hmmls-iteration Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.46 0.70 -5.91 44 32448 2012-10-03 03:33:39 2003-04-07 12:59:11 19 163 3323 0 14571 74553 5580 356.20 18 81.17 CHANGED hshhsuhuG.hhFGaDsGhIuuhhthhshhhpas.htpppssth.................................................................p.h.ulhVuhh.lGshlGulhsuhhuctaGR+.hullh.ssllhllGsllpshup..........shhhlhlG........RllhGlulGhhsshsPhaluElA...PpplR.GslsshhQLhl.shGIhluhhhshsh.tthss.........tW+l.lulshl.ulhhhhsh.halPESPRaL.l.psch-.cAcpsLt+...hpt.....t.ss.pltc.hsphpts.t.t......hshtplhpths...hh..lhhuhhlphhQQhoGhNslhYYusslFpslGhsss...hhsolllG.llNhshThlu.l...ahl-+hGRR.......phhLhG.......hsshshshhhhs....sshhhsss........phhuhshlshhhhalshFshuhuPlsallsuEhFP.psRstuhulAsssNW..lhsFlluhhhPhhtssls.......hhshhlFsuhhlhhhhasaahlPETKGholEclspha .................................................................................................................................................................h...........h.......h..h.....s.............h....h......s........h................................................................................................................................................................................................................................................h.....h..h...u..h.....h........h....l..u.....t.....h.....l.....G...u...h.........h........h.......G..........h............h.......u......D........+...........h.........G....R.............+....h................s...........h......h.h....s....h.h...h.....h.........h..........h..u..s....h..h........u..h.ss......................................sh.h.h..l...h..hh............................R...h.l...........G.....h...........u.......h.......G......s.........t...............h......s...................s......s.........s...................h......h.....h........s.........E....h...u.....................s.....t....p.......h....R......G....h......h............s...............s.......h.....................p....h......h.......h.......s.........h..............G....h.........h......h....u..........h........h......h.....s....h......h..h.......h.s.................................................W......R....h.......h....h.........h....s.....h.....l....................s......l.....l....h.....h.......h............h....h.....h......h.....l.....s........E...S.......P...t....a.....h.....h................p.........t.........p........................t........p....u.............t.........h...tt..............................................................................h.............t...........................................................................................h............h..h................................h...h......h....h.....h....h.....h..................h........h.......................h...............h....................................................h..................h.........h.................h.........h..................s....................h.........h........t............h......t........h..t............................hh...hs......h.....h......h............h........h....................h....h......h.....h....h....h...s...h...........................hh....h.....c.........p.........h..........G.......R....+..................................h....h....h.....h.....s..................................h..h...h...h...h....h......h.h...h..h.h.........................h..tt..................................................h.....h.....h.....h...h..h...h....h.......h...h....h....h....h....h........t..................s.............................s............s..........h........................h....h......h.........s.......E....h...........h...s............t.....h.....R.............s.......u...h.....u....h.....s....h......................h.................t..h.................h....h.....s.....h........h.....h.......s.....h.....h..h..............h....h.....t.......h.t.............................................h...h...h...h......h..h.....h...h..............h....h......h....h.....h....h.........h......h....h...h......-.s........t.................................................................................................................................................................................................................................................................................................................... 0 3767 7345 11757 +4541 PF01253 SUI1 Translation initiation factor SUI1 Finn RD, Bateman A anon Prosite Domain \N 25.50 25.50 25.50 25.60 25.10 25.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.91 0.72 -4.18 161 2651 2009-01-15 18:05:59 2003-04-07 12:59:11 17 21 1783 5 1143 2005 478 80.00 33 45.58 CHANGED ttppthl.....+lphp......p+.pts...........KtVThl...pG.ls..h.tth............-lcpluKpLKpchusG.Gol...................t...sp.IplQGDppcp.lhp....hL...cpshtscp ...............................s..tpshl.....+Iphp.............pt.pts...........KsVThl...pG.ls.......h.p-h............-LccLu.ppLKK+huCG.GoV...................+..st.IplQ.GDpRcp.lpp....hLtpc.Ghthp................ 0 338 612 910 +4542 PF03846 SulA Cell division inhibitor SulA TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 30.00 30.00 30.10 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.82 0.71 -4.41 9 642 2009-09-10 23:49:51 2003-04-07 12:59:11 9 1 637 6 70 245 12 111.40 62 68.04 CHANGED hpShass+usptShssppsupsssttsssGLISElVYpEDQPhhTQL.LLPLLQQLGpQSRW.LWLTPQQKLSRpWVQpSGLPLsKVhQlSQlsPhpTV-uM.RALpTGNYSVVlGWLs ....................................a.pStasp+uspasssspphAc.suspsssuGLlSElVY+EDQ.........PMMsQL.LLLPLLQQLG..Q..Q..SRWQLWLTPQQKLSREWVQuSGLPLTKVMQISQLuPpHTVESMlRALRTGNYSVVIGWL............. 0 5 17 43 +4543 PF00916 Sulfate_transp Sulfate transporter family Bateman A anon Pfam-B_223 (release 3.0) Family Mutations in Swiss:P50443 lead to several human diseases. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.58 0.70 -5.40 34 7799 2012-10-03 01:44:59 2003-04-07 12:59:11 15 65 2994 0 2983 6879 2252 253.50 24 45.21 CHANGED hGlhRLGhllcalScsllsGFhuGsAlhIhlpQlcslhGlp....sppsthhslhpulhp.hpp..............hph.shlhuh.hLhhLhhhphls.....h......thhhhsssssLlsllluThhshhh........ptcthslshlGclssG..lsshslsp....hshshltphh.huhshullulhEulhsucshuthpshpl.DuN+EhlAhGhsNlluuhhushsuoGuhuRSslNhp.uGs+TtlSslltulhlllsllhlsslhthlPhulLuulllhsshu.Lhchpphhp..la+ls+hDhll ...................................................................................................................................hGhh+.l...Ghlh.p.a.lstsV.lhGF..hsulul.hI...hh..sQ.......ltt..h.h.....G.ht...........................t......t...h.....h.........t...........h.h.....t....s....l.h.......h..t.................................t.......s...h...h...h....u...h....h..s...l.h..h..l...h..h...h..h.hhs....................................................h..h..h...h...h..h...P...s.s.L...l..s..l.l.h......s...s.h.hshhh........................hhp.h.......s.l...s.....h......h.........G......p......l......s......su..........l.s.......s......h.......t.......h..P..........................h...............s.................h............p.................h............l.........t......t......l.....h..s.....s....u..h.....s........l.A...h....lu..h...l.........E...o...l.h..s............u....p..s....h......s......t.....h.............p......s......t..............p........h...s....s.........N...p...Eh..l.u..........Gl...u.Nlh...u...uh...h.u.u.h.s...ss.uuhuRos..........lNhp.u.G..........u+...o....t................luulhtu...lh.l....l...l....h.l.l..h....h.s....s....l..h..t.h...l....P.....h.us.Luulllhsshs.hh.c.h..p..p.h.h.t..hh.+.h.st.-h....................................................................... 0 855 1655 2449 +4544 PF03856 SUN Beta-glucosidase (SUN family) Finn RD anon DOMO:DM02469; Family Members of this family include Nca3, Sun4 and Sim1. This is a family of yeast proteins, involved in a diverse set of functions (DNA replication, aging, mitochondrial biogenesis and cell septation)[1]. BGLA from Candida wickerhamii has been characterised as a Beta-glucosidase EC:3.2.1.21. 25.00 25.00 82.40 81.70 17.00 22.30 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.11 0.70 -5.08 50 297 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 121 0 204 287 0 247.20 45 59.94 CHANGED tDGshsCupF..Posp.Gllulsalu...GGWoul...hs...............s...sussCpcGsYCSYAC.sGhsKoQW.Po.sQsosGtSl.GGLhC+.sGpLa+oNss.tchLCt.GsG..sspspNpl.upsVAlCRTDYPGoEsMlIPThVsuG.s.psLoVsDtssYYpWpGhtTSAQ........YYVNssGVSsE-GClWGosuss..lGNWAPlshGA...Ghss.GhoaLSlh.N........Ps.spst.sFslKIhusssss.luGsCpY-..sGsasu...........sGusGCTV .....cGshsCupF..Posp..Gsluls.alu...GGWusl...hs.................s...supsCpsGsYCSYAC.PGhtKoQW..PS..sQsosGpSl.GGLaCc.sGpLh+osss..c.LC.tGsG..usp..shNch..up.sVAhCpTDYPGsEsMlIPThVsuG.sotsLsVscpss..YahWpG..TSAQ........YYVNssGVSs-DuClWG........o.s..u.ss..lGNWAPhshGAGhss.....G.tT......alSlh.N........Ps.s.sst.sFsl+Isusssus.lsusCph-.....sGsasu...............sGusGCTV............. 0 29 95 169 +4545 PF03439 Spt5-NGN Supt5; Early transcription elongation factor of RNA pol II, NGN section Bateman A anon Bateman A Family Spt5p and prokaryotic NusG are shown to contain a novel 'NGN' domain. The combined NGN and KOW motif regions of Spt5 form the binding domain with Spt4 [1]. Spt5 complexes with Spt4 as a 1:1 heterodimer snf this Spt5-Spt4 complex regulates early transcription elongation by RNA polymerase II and has an imputed role in pre-mRNA processing via its physical association with mRNA capping enzymes. The Schizosaccharomyces pombe core Spt5-Spt4 complex is a heterodimer bearing a trypsin-resistant Spt4-binding domain within the Spt5 subunit [2]. 20.20 20.10 20.20 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.70 0.72 -4.23 64 521 2012-10-02 20:41:53 2003-04-07 12:59:11 8 26 429 5 355 529 81 83.80 32 11.04 CHANGED slauV+s.psGpE+slshhlhp+.....ppps.......l.pIh..Slhssss....lcGYlalEAppp.sslppslpulhplpsh..........th..lslcEh.chL ..........plWsV+C.phGcE+plshtlhpKh.th.tssp.......l.pIhSlhs.-p.....lKGYIYlEA.+p.scVcpAlcGlsslhhth..............th..VPlcEhsclL............ 0 111 199 297 +4546 PF01975 SurE Survival protein SurE Enright A, Ouzounis C, Bateman A anon Enright A Family E. coli cells with the surE gene disrupted are found to survive poorly in stationary phase [1]. It is suggested that SurE may be involved in stress response. Yeast also contains a member of the family Swiss:P38254. Swiss:P30887 can complement a mutation in acid phosphatase, suggesting that members of this family could be phosphatases. 20.90 20.90 21.20 21.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.54 0.71 -5.01 133 3045 2009-09-11 04:46:26 2003-04-07 12:59:11 12 9 2646 51 1005 2370 1174 189.50 39 68.06 CHANGED M+ILlTNDDGl....pAsGlpsLhcsL.pph...t.-..VhVVAPcpppSusupulTlpcPL+l.pph.............................h......haul..s.GTPsDCVtlulptl.h.tp...............P..DL..VlSGINpGsNlGpD.lhYSGTVuAAhEushhGlPuIAl....Shssp.........................t...............a....phAsphstpl.lpplh.ppsh.s........t....sslLNlNl..Ps......hs.hppl..pGl+lT+hGp+.tatpp .....................................+ILloNDDGl........pAsGlpsLtcsL..cph................s-....VhVVAP-.............p.s.+.SG....s.Sp.u....l.T...L...pp...P....L+.h.pht............................................................................ttta.ul..sGTPsDCVhlulssl..hpt....................PDl..VlSGINtG.sNl..GcD....llYSGT....VuA.AhEGt.h.h.G..lPA.lA..l.Shssp....................................................t.........c....a....csAup....h....s....ppl..lpplh.....pps....l....s..........s...........splLNlNl....Ps.......hs...hppl.......+Gl+lT+hGp+t...pt........................................................ 0 297 627 859 +4547 PF02104 SURF1 SURF1 family Mian N, Bateman A anon IPR002994 Family \N 20.80 20.80 20.90 21.00 20.70 19.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.82 0.70 -4.50 181 1742 2009-09-11 11:18:05 2003-04-07 12:59:11 10 8 1323 0 681 1517 1849 208.90 23 76.92 CHANGED s......lhshslhls.LGhWQlpRhp.Kpsll.splppphp...s.sPl.slsth................t.............................................................pappVpls.Gpahsppphhlt.spsp.......p...................st........sGa....hVlsPhphs.........su.........phlLVsRGal.s..........t.tst............s......................................s..lplsGhl+......sp.h.tt.hh.............ts..s.t..sphhhs.........hDlst....hu.....pthsh..........htPhhlp....................................................................................................s..sh........shhp.........hsspHhsYAlpW..F..............uLAhh ...............................h.hhhhshhht..LGhWQl.pRhp...K....pphh....splp...pp..hp....t..sPl..slsph...................ts....................................................................................................................................paR+Vpls.Gpa.s.s.pp.hhlt..spsh................p.............................sp.....sGa....hVlTPhphs.........sG................phl..LVsRGal.st...p..t.s.ts......t........s..............................................................Gp.lsl..sGhl+..........spst.....t.hh.......................ts.....sss.....tthhht.............hch..s.t....hu.................p.thGh.............lts.h.h.lphstp.............................................................................................t.ss..........lss.......shhs.....................hsspHhuYAlpWauhuh.............................................................................. 0 198 411 560 +4548 PF02077 SURF4 SURF4 family Mian N, Bateman A anon IPR002995 Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.74 0.70 -5.03 6 375 2012-10-02 13:32:46 2003-04-07 12:59:11 10 12 249 0 240 803 568 224.50 40 84.33 CHANGED N-hhu+sEDhAE.shh+ps+sYLPpluRLhLluTFhEDGlRhhhQWs-QhpYhspsWshsaahAslFllVsllu.LhGsshVhhRp+VshAsGlLhhlllLQslAYullhshcFLhRNhullGGLLLllAEohlcp+o..hFAGlPshs-..scsKsYh.LAGRVLLlhMFloLl+F-...hSahpll.sIlGsshhlhVsIGaKTKhuAlhLVlhLhshNlhlNuaWolPpppshRDFlKYDFFQTLSlIGGLLLllshGPGtlShDE+KKcW .......................................................................................tphE-hh-...ph.c.hK.aLPtluRhhllsTFhEDulRhhhQWs-Qh.Ylpth..hp.........h.....shhls..p..hFlhlNllu.lsusshl...lhRp..hsphAshsLhsllhh.QsluY...u.l..l.....a....D..h...pF......hhRN.lul.hG.GLLlllu.-.....Shsct+p......hF...A..G..lPp.ht-.....pp.KtYh.LuGRlLLlh..hFh.....s.....h.lh.....p....................ho.h..h.......p.........l........l.............s.l.....lG.h...h...h.......h....l........hVsl.Ga.K....sKh...uAhhLV........lh.L.hhN.l.h.h.NsaW....s.........h..............cs.h.+D..FhKY.D.FFQ.sl...Sl.lGGLLLlVshGPGtlShDE+..KK.a............................. 0 72 116 186 +4549 PF01617 Surface_Ag_2 Surface antigen Bateman A anon Pfam-B_1042 (release 4.1) Family This family includes a number of bacterial surface antigens expressed on the surface of pathogens. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild --amino -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.63 0.70 -5.17 10 3846 2012-10-03 17:14:37 2003-04-07 12:59:11 12 3 1018 0 167 4193 20 149.70 31 84.65 CHANGED ssuGuFYIuscYsPuhupFssFSscEs.....ps.TpsVFGlKpDhsshshspststs....Fs..sYshpapsN.FhGFuGAIGYuMsGsRlElEsuYEpF-sK.....Np.......................................Gsshcs..............................................................DA+casALo+p..s....t.shsssphlhlcN-ulsshSlMLNuCYDlhpEGlPloPYlCAGVGuDhIS.....hhpshNPKhSYQG..KlGlSYsIoPElSlFsGG+YH+VlG.NcFc-Is.shpsssssssup....sthAtlTlsssaFGsElGsRFsF .......................................................................................................................ss.............................................................................................................................................................................................................................h.u.......t.s...Rh-h-h.hp....h.h.......................................................................................................................................................................................................................................................................................................................................................................................................s..culss.hS...sh.lNshYD...l...h.h..-...s...h.......s..l...oPY.....l..ssGlG.........u..s..hls.........................h.hs..p.....t....t.....h..uatu..........KsGlSYp.l..o..Pclp.lasG......uhY...atshs..tp........a...t........................................................................................................................................................................................................................................................................................................................... 1 26 99 118 +4550 PF00084 Sushi sushi; Sushi domain (SCR repeat) Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.70 17.00 20.70 17.00 20.60 16.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.52 0.72 -3.62 66 23751 2009-01-15 18:05:59 2003-04-07 12:59:11 15 763 241 368 12142 20429 36 57.10 26 31.65 CHANGED Cs....P......h..s..ht.t.......spaphssplpapC.psGaph.tut......sthpC...t....supWsst......s.p..C ...........................................Cs....Ps...............pG....hp..........................ssaphG.s.p.lp..a...p..C..p.....s......G......a...pL..tGs.........................sphpC.........ts..........su..p...Woss........P.t...C.................... 0 3225 3866 7035 +4551 PF04099 Sybindin Sybindin-like family Wood V, Finn RD anon Pfam-B_3240 (release 7.3); Family Sybindin is a physiological syndecan-2 ligand on dendritic spines, the small protrusions on the surface of dendrites that receive the vast majority of excitatory synapses [1]. 21.30 21.30 21.40 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.80 0.71 -4.48 11 676 2012-10-04 00:47:01 2003-04-07 12:59:11 7 13 308 17 471 982 9 142.70 28 83.69 CHANGED lasLYlhN+uGsLla.p-appsh.t.............................................................................thsoNEpllluuhhaSl+uIsuplSP............h.up...........sGlchlEossF+LahhpThTGlKFlllT-sss.sth-sLl+hhY-LYoDaVlKNPFYsl-MPI+sELFcppLcphlcsh .........................................................................................................laslalhs.+t.Gshla...pca..t.t...s...t...............................................................................................................t.h.s....ss-...thhl.huhha.........S..lpuhsppLo.P...............................................................................suh...p....h.c.T.spa...+...L.ah.a..pT...T....Gl..K....Fll.h..o......-.s.........t..............t.......................s...h.....h.....c.....s........L....h....p....h...h.h.plYs-a.....V..l..K..NPh....ap.....h......c.....h....P.....l...p.s.-.hFcpplpthlp.h.................................. 0 156 257 389 +4552 PF02383 Syja_N SacI homology domain Mian N, Bateman A anon Pfam-B_1090 (release 5.2) Family This Pfam family represents a protein domain which shows homology to the yeast protein SacI Swiss:P32368. The SacI homology domain is most notably found at the amino terminal of the inositol 5'-phosphatase synaptojanin. 20.40 20.40 21.30 20.40 20.30 20.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.67 158 1561 2009-01-15 18:05:59 2003-04-07 12:59:11 13 28 323 1 1068 1526 19 302.60 26 34.15 CHANGED saGllGllcl..ps........shaLlllTpp.ppVuplt............c.........sl..a.+lpssphls..lspsthp...................................................................................................................................................................................p....pphhphlcp...........lh....ps...ssFYFS.h..s.............aDLT.............pol.....Q.....ppttttt..........................................................th-.pcFhWNpalhpsl.hphp.........................................tsppalhslIpG.............................aspptphtls.........................................................................t..hplsLIoRRSpcRAGTRahpR.Glc-c..GpVANaVETEQIlh..................................s................................................................s....................................sphhSalQhRGSlPlaWpQ..s.....s..ht........hpPplpls.sh-ssh..tuhscHFppL.hppY....................GslhllNLl...........pp+spE..thLsptapptlphh................................p..tpp...........lp..ahtFDFHpp......s...pt..hchcs .......................................................................................................................................................................................................hGllGhlph...t........................hallllTpp.pplu.p.lh.............c...................l...a.clpssphlslppsthp............................................................................................................................................................................................................ppp.hhphlpp...lh.......ts..ssF..YF....S..h......s...................aDlT.....................pshQ....cphttt......................................................................................................................................................tphc.pp.Fh....WNphl.hp..l..hpht.....................................................................................hppahl.sll...pG.............................ahp...t.p.h.ht.......................................................................................................................................tp.hthsLIoRRSpc...+.........AGsRa.hpRG.ls..cc..................GpV.ANhVETEQllh...............................................................................................................................................................................................p..............sphhS..a...lQ..........hR.GS..lPlaWpQ...ss...hp...............hpPp.lpl.....s......th......-s.sh......tuh.p.......pHFppl..hp.pY...............................u.h.h.llNLl..................pp+...stE....thLppt..apptlp.hh............................................t.ttp............................lpahtaDaHp.s+t.....p.................................................................................................................... 1 394 614 887 +4553 PF02078 Synapsin Synapsin; Synapsin_N; Synapsin, N-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon IPR001359 Domain \N 20.40 20.40 20.70 30.30 20.10 18.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.39 0.72 -3.91 4 232 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 91 20 115 258 0 96.20 58 19.27 CHANGED sstR.s+lLLVID-PHTDWuKYF+GKKlhGDhDI+VEQAEFSELNLsAassGGhhVDMQVlRNGTKVVRSFKPDFVLlRQHAauMA.sEDaRsLlIGhQYuGlPS ...................s....p.+hLLVlD.-.pTDWuKhF+GKKlp.G-aDI+VEQAEFSElNLsAausG.uhsVDM...........pVhR.....NG....o...KVV....RS.F+PDFVLlRQHAauM.u.scDaRsLlIGLQYuGlPS.................... 1 22 30 66 +4554 PF02750 Synapsin_C Synapsin, ATP binding domain Mian N, Bateman A, Griffiths-Jones SR anon IPR001359 Domain Ca dependent ATP binding in this ATP grasp fold. Function unknown. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.35 0.71 -5.22 4 298 2012-10-10 13:17:03 2003-04-07 12:59:11 9 8 99 20 154 323 10 166.60 58 35.89 CHANGED NSLaSlYNFCDKPWVFAQLlpIaKpLGsEcFPLIEQTaYPNHKEMLosPsFPVVVKlGHAHSGMGKVKV-NpHDFQDIASVVAlTKTYATsEPFIDuKYDIRVQKIGsNYKAYMRTSISGNWKsNTGSAMLEQIAMo-RYKLWVDoCSEhFGGLDICAVcAlHGKDGRDYIIEVhDSSMPLIGEHQ-ED+QLIsELVlsKMsQ ............................................NSL.SlYNFpsKPWVFuphlp.l.+pLG...............-pF.PLl-QTa...a...P...Na+........p..M.....l..o.h.sp....F..PVVVKhGHAHuGhGK..........l.KV-Nph.DF...QDI.uSVV.A.....hs.....p..T.......Ys..TsE..PFI.D..u.K.Y.....DlRlQKIGsN.Y..K.A.YM................RTSIS.G.NWKsN...T...G.S.AMLE.Q.lA.Mo.-RY....+l...WV.Ds.C..SEh..FGGLDICAVcAlHuK..DG+..DaI.h.E.....V.....h.....ssoMPLIG-pt.tED+pLIs-LV...ls+Ms......................................... 0 29 39 95 +4555 PF00957 Synaptobrevin synaptobrevin; Synaptobrevin Finn RD, Bateman A anon Pfam-B_303 (release 3.0) Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.66 0.72 -4.38 111 2176 2012-10-03 05:55:03 2003-04-07 12:59:11 16 31 380 40 1410 2035 37 80.20 29 38.98 CHANGED ssc+lpplpsplcclpslMpcNl-.+llcRG-+l-tLsc+o-sLpssuppFcppupcL++phWW+Nh....KhhlllshllllllhlIllhhss ......................t..cpltplpsplc-Vps.......l.MpcNI.-.c...........V.L.cRGE+L-........p....L...s...-.......+o.-sLp.ss.u....p.pFc...ppApcl....p....pph..hhpp.h.......thhhhhhhhhhhhlhhh................................ 0 499 779 1125 +4557 PF01284 MARVEL Synaptophysin; Membrane-associating domain Finn RD, Bateman A, Yeats C anon [1] Domain MARVEL domain-containing proteins are often found in lipid-associating proteins - such as Occludin and MAL family proteins [1]. It may be part of the machinery of membrane apposition events, such as transport vesicle biogenesis. 32.20 32.20 32.20 32.20 32.10 32.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.85 0.71 -4.37 93 2286 2012-10-03 17:26:12 2003-04-07 12:59:11 18 15 277 0 1320 2031 0 146.20 17 64.66 CHANGED hhh..tsllRhhp...hlhu..lllhulsushh.......................................................htt..sthsashhsushshlh.shhhlls.h........hh.thshshlhhsh-hlsslhahsuhsshAsthps..................................................................tt....sttspptpAussFsahshhlahssshh .........................................h......hhl+hhp....hlhu.llsauhh.ushh.................................................................st....s.s.....stp.a..hlhlush....salh...sl..hh...ll...h..hh.........hht.p...h...th.s...h..h..s...h.hh.ss.l..h..shh........a.h.s.ushhh.u.tt.hss................................................................................................t...h.sthtAu.........s.s..Fu..ahshhhahsshh............................................................................................................ 0 214 413 813 +4558 PF01034 Syndecan Syndecan domain Finn RD, Bateman A anon Pfam-B_1182 (release 3.0) Family Syndecans are transmembrane heparin sulfate proteoglycans which are implicated in the binding of extracellular matrix components and growth factors. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.17 0.72 -4.32 8 599 2009-09-13 18:19:46 2003-04-07 12:59:11 15 20 109 8 276 557 2 67.10 41 10.96 CHANGED SpslhcRpEVL....AAVIAGGV.VGlLFAlhLVhFllYRM+KKDEGSYuL-EPK...u....Nuu.YQK.sss+EF ............s...............suhllGhV.suhhhsIhll..LahhY...+h..Rp+DEGSYpl-E...s+.......s....................................................... 0 38 59 144 +4559 PF01387 Synuclein Synuclein Bateman A anon [1] Family There are three types of synucleins in humans, these are called alpha, beta and gamma. Alpha synuclein has been found mutated in families with autosomal dominant Parkinson's disease. A peptide of alpha synuclein has also been found in amyloid plaques in Alzheimer's patients. 21.10 21.10 21.20 23.20 20.90 20.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.69 0.71 -4.24 4 229 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 66 9 81 186 0 111.10 60 94.74 CHANGED MDVFhKGhShAKEGVVAAAEKTKQ...........GVsEAAEKTKEGVhYVGoKTKEGVVQuVsoVAEKTKEQAssVGGAVVouVssVApKTVEGAtNIAAAoGlVK+--hsp.....psPQEtstEsh.p...-P.sEuhEtspppG .............MDVFMKGhShAKEGVVAAAEKTKQGVsEAAtKTKEGVhYVG...........oK..TKE.GVVpu..Vs.o.....VAEKTKEQsstVGsAVVouVssVApKTVEG...AtNIA.AATGlVKK-phsp......th.sppth.t................................................. 1 4 10 31 +4560 PF00837 T4_deiodinase Iodothyronine deiodinase Bateman A anon Pfam-B_1631 (release 2.1) Family Iodothyronine deiodinase converts thyroxine (T4) to 3,5,3'-triiodothyronine (T3). 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.41 0.70 -5.36 4 345 2012-10-03 14:45:55 2003-04-07 12:59:11 12 5 107 0 155 377 21 150.70 28 86.45 CHANGED LhhphLhsLlllPhFlusshhLhLLD.spl++Hllthtp+...................sPshshu-hs.RhhThpuL+sVW+uQhLDhaKps+.GGsAPNopVVplsGpc..................C+ILDFupGpRPLVLNFGSCTUPPFhu+hsAFpRLlpcapssADFLllYIEEAHPSDGWshssss...apI.pHQsLpDRlpAAplLLptA...PsCtVVsDTMsNsSstAYGAhFERLaVlQcG+IhYpGG+GP.uYplpElRsWLE+hp ................................................................................................................................................................................................hhhs.t.h..h....h.....p.h.p.ts...........Gt..APs...s.lh...tt....................................................................p....t..hp.hhlYltEAH.........s.........s.Dt....W..........................h..h......p.ps..p-Rh.hAp..h.....tt........................h..hhDth.ts.....at.....hh....................................................................................................... 0 50 60 90 +4561 PF03903 Phage_T4_gp36 T4_tail_gp36; Phage T4 tail fibre Finn RD anon DOMO:DM03599; Family \N 27.30 27.30 27.80 27.70 27.20 27.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.32 0.70 -4.25 6 60 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 45 0 2 59 0 172.50 31 32.80 CHANGED MADLKlGSTsGGSVIWHQGNFPLsPAGDDlLYKoFKIYoEYNKPQAsDNDFVSKANGGT....YtppVhFpcGlslsss...ssshsGIasGsGDGAoh-ssshclhSWhGIGFcsu.....ptsGsttlhhsspssphssRuslpus...hs.s.sP.ss.cLTRK..DY...VDusINTVTA.........NANSRVLRSGD..TMTGsLTAPNFFSQNPASQPSHVPRFDQIVIKDSVQDFGYY .............................................MADLKhGoThGGs.lWpQGNhsL.PsusplhYKsa+.lYoE.sKPpA.s-..shVS.pusGGs...........h.t.lth...ppul..ph......stt..shhhuttsuss...........ts..sh..h.u..shuhtst...........ts..hhh...s.spss.httc.h...h.st................hthp..D....V.u.hshsst.........Nstph..ptss..shsG.L..sssphh..t................................................................................................................................................................ 0 0 0 0 +4562 PF03906 Phage_T7_tail Tail_fibre_T7; T7_tail_fibre; Phage T7 tail fibre protein Finn RD anon DOMO:DM04804; Family The bacteriophage T7 tail complex consists of a conical tail-tube surrounded by six kinked tail-fibres, which are oligomers of the viral protein gp17. 21.20 21.20 21.20 21.80 21.10 20.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.83 0.71 -4.59 10 136 2009-09-10 23:06:56 2003-04-07 12:59:11 9 13 86 0 4 133 238 162.20 35 27.80 CHANGED MAsT....ohhsYshsGosTsFsIsFE.....YLARpFVsVTLlu.....Dp+hLslNsD.YRFsssTTIohopA.hsPAsGasl.IEIRRhTusTDRLVDFsDGSlLRAhDLNlSQlQoLHlAEEARDhs.......ADoIGVssDGslDA+..GR+IVN.LAsussstDAVshtphp.shssosh .............s...hhsaphDGsspsFslsFt.....YLs+p..VhVol.t......-pp..hsls.s-..Ypas..spsoIp..lspA......PA..s.........G......sp.lcl+RsTs..ssshLl-FscGShL.....puhDLshsphQshalApEut.Dhs..........................ss..sh.u.lssc..s.clDAc......uc+Iss.husshs.stDsssht.hp................................................................................................................... 0 1 2 4 +4563 PF02217 T_Ag_DNA_bind Origin of replication binding protein Bateman A anon Pfam-B_827 (release 5.2) Domain This domain of large T antigen binds to the SV40 origin of DNA replication [1]. 25.00 25.00 34.90 34.50 19.10 18.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.25 0.72 -3.59 11 427 2012-10-02 18:54:06 2003-04-07 12:59:11 11 5 43 20 0 407 0 89.50 71 14.60 CHANGED .DhPssLcuaLSpAlhoN+T.ssFLlaTTpEKsppLYspl......p+asspaphh.tpasssth.LallTss.+HRVSAVpNaCpKhCTVSFlhsKGVpKp ......KDFPsDLHsFLS..QAVFSNRTlAsFAVYTTKEKAQILYKKLM.....EKYSVTFISR..Huh..u....uHNI.....LFFLTPH.RHRVSAINNaCQKLCTFSFLICKGVNKE... 0 0 0 0 +4564 PF05010 TACC Transforming acidic coiled-coil-containing protein (TACC) Moxon SJ anon Pfam-B_4807 (release 7.6) Family This family contains the proteins TACC 1, 2 and 3 the genes for which are found concentrated in the centrosomes of eukaryotic and may play a conserved role in organising centrosomal microtubules. The human TACC proteins have been linked to cancer and TACC2 has been identified as a possible tumour suppressor (AZU-1) [1]. The functional homologue (Alp7) in Schizosaccharomyces pombe has been shown to be required for organisation of bipolar spindles [2]. 28.80 28.80 28.90 30.30 28.50 28.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.50 0.70 -4.56 13 318 2009-12-17 14:40:01 2003-04-07 12:59:11 9 5 92 0 143 281 0 199.10 50 21.90 CHANGED paSQKDhDAslphs+cEh..p-...........cs.-h+pKaEchppcshEMtKIlsEaEcTIsQhlE-.......................sp+p...Kplo+cplQcllpEK-Q......................shuDLNShE+SFS..................................................DLFKRaEKhKEVlEGa+K.......................................................NEEsLK....KCsp-YLsRl+KEE.................QRYQALKsHAEEKLc..................pANcEIAQVRoKApuEssALQAsLRKE.......................................QM+lpSLE+sLEQK...........................sKEh-ELTKICD-LIuKMtK ...........................................................................................h..pp.Dhsuslphh+pE..lhppE....................hEspEh..+cKYE.Ep+pEsh.EMcKIVuEYEK..TIAQ..MI...E..-..........................................................................................................cQ+p.........pphS.p...pslQpLh.hEK-Q..............................................................AhADLN.SlE+Shu..............................................................................DLF+RYE+hKpV...lEGa+K.........................................................NEEsLK......KCAp-YLuRl+.pEE..................QRYQALKhHAE...EKL-..................+ANpEIAQVRsKApsEpsALpAuLRKE.......................................Qh+V..cSLE+sLpQK...........................s+EhEELTKICDELIuKhtK......................................................... 0 36 46 81 +4565 PF02202 Tachykinin Tachykinin family SMART anon Alignment kindly provided by SMART Family \N 20.50 20.50 20.90 20.50 20.10 20.40 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.38 0.74 -5.50 0.74 -3.71 12 43 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 25 3 5 60 0 10.90 62 26.56 CHANGED pPcPspFhGLM +P+PppFaGLM 0 0 0 3 +4566 PF04972 BON TAD; BON domain Yeats C, Bateman A anon Yeats C Domain This domain is found in a family of osmotic shock protection proteins (e.g. Swiss:P27291). It is also found in some Secretins and a group of potential haemolysins. Its likely function is attachment to phospholipid membranes ([1]). 23.60 6.70 23.70 6.70 23.50 6.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.71 0.72 -4.22 187 8163 2009-09-11 21:46:55 2003-04-07 12:59:11 12 45 1945 6 2280 5479 2633 61.90 25 43.04 CHANGED psplpstLttp.....tlsst.s.lpVpsp.sGtVhLpGpVs.stpptptAtplApslpG.VppVh.stlplts ...............................tplpstLhtpt......lpsp..p..lpV..psp..sG..tVhLsGpV...optptpp.Atpl.A..p.s..l.sG...Vp..p..Vt.splph..t...................... 0 572 1203 1719 +4567 PF02969 TAF TATA box binding protein associated factor (TAF) Griffiths-Jones SR anon Structural domain Domain TAF proteins adopt a histone-like fold. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.83 0.72 -4.03 10 386 2012-10-10 12:36:46 2003-04-07 12:59:11 12 9 260 1 262 923 23 63.60 41 12.60 CHANGED sollPpEShKVlAESlGIusLs-EsuphLA.DVpYRl+EIsQ-AlKFM+HuKRp+LTssDlDpA..LR ........................................hhst-ol+slAESlGl..s.s.Ls--ss.ph..LApDVpYRl+clh..p-AlKFM+HuKR....p....p.....LTspDlspAL+.................... 0 93 143 209 +4568 PF04658 TAFII55_N TAFII55 protein conserved region Waterfield DI, Finn RD anon Pfam-B_4395 (release 7.5) Family The general transcription factor, TFIID, consists of the TATA-binding protein (TBP) associated with a series of TBP-associated factors (TAFs) that together participate in the assembly of the transcription preinitiation complex. TAFII55 binds to TAFII250 and inhibits it acetyltransferase activity. The exact role of TAFII55 is currently unknown. The conserved region is situated towards the N-terminus of the protein [1]. 25.00 25.00 30.10 27.40 23.40 22.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.18 0.71 -4.74 35 402 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 286 0 275 378 1 157.60 35 40.36 CHANGED lEpQhILRh.P......pss-hl+pulpssshs...........slsh+....p-tR+AsVplssphau..ApLVDLPsIlEuhKThD+.KshaKoADIsQMLlshp................lps-p.shpht.........................................ph.tp.tpcpapa...HGlTPPh+slR+RRFR.+chsc...............pthpplEccVccLLct.............DpcAp......s ......................................................lEpQFILRhss...................p.sphl+phlppsphs.p.................hslphc.........tDsR+uhlpls..s..........t...hs..ApLVDLPsllEuh.K.ThD+.KshaKoADIsQ....MLlstts......................cp.shp.s..................................................tp.htpcpa.as...HGlTPPh+slRKR.RFR.Kphpp........................hph.clE....c-VccLLptDtpA..s................................................ 0 89 145 216 +4569 PF05069 Phage_tail_S tail_comp_S; Phage virion morphogenesis family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Protein S of phage P2 is thought to be involved in tail completion and stable head joining. 24.30 24.30 24.50 24.40 24.20 24.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.09 0.71 -4.44 46 1328 2012-10-01 19:49:39 2003-04-07 12:59:11 8 5 800 0 174 1065 22 130.20 23 91.49 CHANGED s-hptlpphLstLhtp...sss+ptlh+pluppL+cupppRhptQpsP.DGssatshp.......htt+ps+h+ct..hhpph....phuchlpspsssssss....hGssphhAtlHQaGh....................+s.....t.....plp...............hPsRshL......GloppDcphItchlhpaLs .............................................h...tthpphhptlhtt......tthpt..hhpplup.p.lcp.s.p.ppph.pt...pp.s.P.cG..p..satshp.....................h.+..t..p.....t.t..........p..h...pp....t.....h.............htch............ph.tp..lp..h.p.s....ss.s..ts............G...p.s....t.....h..Atl....HpaGh....................................................................pt...............hp........................................................................hstR.hL.........Ghs.tt.s.......p.l.phl.....h............................................................................................................ 0 44 110 145 +4570 PF02203 TarH Tar ligand binding domain homologue SMART anon Alignment kindly provided by SMART Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.68 0.71 -4.42 88 3556 2012-10-02 01:04:29 2003-04-07 12:59:11 10 31 949 15 528 4206 102 164.70 20 30.56 CHANGED hhp+lsIpspLhhlluhlslLhlssusLuhhuhppuspslpphhpsphtppttlspu...hhpsRhsLsR...hhhhsts...t.....sphlspA.cptlspuppsaptahshsp..ss.t-pp.hssplpppapp.hppulpshhshlpuu.shsthhphsspphpshapshtpshtp....pssps ........................hppl..+lhstLhh...lLuhh.sl..L....l..s.o..uul.shhu...lp...p...s...p...p...sh......sh.pp.....t.p.p...p.s.t..Lsp.s...h...s..........h..h....p....s.R..h...s..Ls+s...............hthhh.sp.....t........................tphlss....A....p....pp....L.p....p...u....p.p.t...a....p....p....a....t........s....hsh.......ts....tst...s...hsp...plpp...p....ap.t....h...ps...ulpt....h....h..p..h.hps....u...p....hs...s..a..hs...t.s..s..p...t..hp..shhpthhtsh.t............h.............................................................................................................................. 0 59 179 344 +4571 PF00539 Tat Transactivating regulatory protein (Tat) Bateman A anon SCOP Family The retroviral Tat protein binds to the Tar RNA [4]. This activates transcriptional initiation and elongation from the LTR promoter. Binding is mediated by an arginine rich region. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.89 0.72 -4.35 83 7178 2009-09-12 06:54:57 2003-04-07 12:59:11 13 3 90 16 0 5391 0 64.90 66 73.19 CHANGED M.....-slDPplEPWp+PGSQPpTsC.NpCYCK+CCaHCQ.....lCFlpKGLGISYGRKKRt.R.R+sspsspsH .................M......EPVDP..pLEPWpH..PGSQ.PcTAC.ssCYCKKCCaHCQ.....VCFlpKGLGISYGRKKRR..QRRRs.Pp.supsH............................... 0 0 0 0 +4572 PF01026 TatD_DNase UPF0006; TatD related DNase Bateman A anon Pfam-B_1370 (release 3.0) Family This family of proteins are related to a large superfamily of metalloenzymes [1]. TatD, a member of this family has been shown experimentally to be a DNase enzyme. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.93 0.70 -5.18 98 8430 2012-10-03 00:45:34 2003-04-07 12:59:11 16 26 4852 19 2301 6563 3019 252.30 29 93.02 CHANGED lDsHsHLsh..th.................tp............hsphl.pc.....sppsslptl.l............................ssussh................t.php..pshplucp..ast.lasslGlHP...tpsp....................................tpchh..........ppl..pph...h.pcscllAIGEhGL.....Da..............hps.sst...........ctQpc.lFcpplplAcch.shPlllHsRc......Apc.........-hlcll........................cp.t.hsth...tslhHs............................................aoGshchspphl.ch.....GhalSluuhlsa.p..upp.....hpcll..pplPh-+lLlETDuPalsP.................................sh+........G.ppNcPshlhpssctlAc.l+s.............hsh--ltphsppNspclF.s ..........................................................................................................................................hDoHsHLs.....ta....................t.t.-................................hspll....tc...A.t.p.s.GV...p....t..h..l.....................................................s..s.u.ssh......................................................p..shp......psh.p.L.upp...............as..t..........l.....a..s.sl.G.l.....H.....P..........hp.spp............................................................................................h..pptsh............ptl..pph...........t....pps.....c....l..V..A..lGE..h...GL.Da....................hhpt.sst..............................................phQpc....sF..ppQl.....p..l.....A.....p.c......h.....s.......h....P.....ll..lHsRc........................App..............................................-.h.h.plL...........................................................................................cctt...stt..............sGlh.H..s...............................................F..oG..s....h.c.h...A.pphl..ch..................................Gha..lu..hu..G...h..lT..acp......upc.................................l+-sh.........ptl.P..l..-.....+l...LlET....D.u....PaLsP..........................................................................................hPh+......................G...cpNcPsh.....l.ht....l.s...chl....Ap..l+s......................h.s....h.-...cluph.oppNstplFt..................................................................................................................................................................................... 0 745 1397 1904 +4573 PF03430 TATR Trans-activating transcriptional regulator Finn RD anon Pfam-B_4420 (release 6.6) Family This family of trans-activating transcriptional regulator (TATR), also known as intermediate early protein 1, are common to the Nucleopolyhedroviruses. 20.40 20.40 25.10 22.50 20.10 19.60 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.95 0.70 -5.88 5 60 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 43 0 1 63 0 525.40 31 92.51 CHANGED pMpslppS...YsusSTPs+ssFspu.pE.sshp.p...ph.phsT-hsschshssh-suh.cos.ss...shos.u.hps.hcpscp.spAssp-ascEpstpshsEtsouhosp........t.hsEohscottsssspR+uS..............ElDSDsDsu-SScG...cKhssKPKhRp+YKKuTIQsssoLoccppasTpICTVAssspIs+YFtpD......................................FusaLpch+o-sshsuNRFSDYISETGYYMFVVKKuEc...KPFE..VlFAKaVsNlspEYTNNYYMVDNRVFVVSlN+lRFMISYKLV+EpGI-IPPSpslCsDApAER.....sshKCYFs-VKc.sFpssLINaFNLDMaYuQTTFVTLMQSlGEsKosMLLNKLYcMaQD+oLFTLPIMLSRKEPslE-s...........spsssasSsYVuQIlKYSKsVpFPpssPsptVhDcL..................slIVTQKSoLTYKYSSVANLLFscYt.p....pDNNA-uLKKVKKE.DGsttLVEQYLotNpN.DcTSHNFIVLsFK..NDERLTIAKKGhEFaWIoGEIKDIsVsDLIcKYs.RasHHVF+IsNVNRRESTThHNNLLKLLuLlLQNLlcL-DlpcaAspshsCpYc .................................................................................................................................................................................................t.........t...........t..t......tp.h....h..stp...p..p.............sps......t.pt.tppso..................................................................c.spp..csspusp......pp.h.+s...Kh...c....phcKtthps.p.tphp.....pp.p.s.p..l.sp.lts.......tph...s..p..hh.c...........................................................htshhtph.....s...spt............pspR.Fssah.psuYYMFlVp.cscs.....c.sFc.........lhasphVpsVs.EYsspYhhlDphVhVVohs+hRFMISYpLlpchtIcIP.ppphspc...thtpp.....ss.p..CaFp-VKs.tFhshLhshFpLDhhYsQsphshLhpSlGEpKsthlhpplhpMhpD+sLFTLPl.loRKEs..pps................................sp..sSsYVppIlchScs...lpF.p....s....s.....sph..hhsp......l...................................s...hhp..pp.s.hTYKYuSVAplLasp.............ppps.sspLhKlKKE.sGshtLlEpYLststs.s.puaNFIllshK.....sDERlTIlKps.-FhWIsu.I.KD...I.ssDlIpKYp.pasHHlFslspsNR+E.sshHNshlKLLuhhhpsllsls-hhphAppphsCpa............... 1 1 1 1 +4574 PF02668 TauD Taurine catabolism dioxygenase TauD, TfdA family Bashton M, Bateman A, Mifsud W anon COG2175 Family This family consists of taurine catabolism dioxygenases of the TauD, TfdA family. TauD from E. coli Swiss:P37610 is a alpha-ketoglutarate-dependent taurine dioxygenase [1]. This enzyme catalyses the oxygenolytic release of sulfite from taurine [1]. TfdA from Burkholderia sp. Swiss:Q45423 is a 2,4-dichlorophenoxyacetic acid/alpha-ketoglutarate dioxygenase [2].\ TfdA from Alcaligenes eutrophus JMP134 Swiss:P10088 is a 2,4-dichlorophenoxyacetate monooxygenase [3]. Also included are gamma-Butyrobetaine hydroxylase enzymes EC:1.14.11.1 [4]. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -12.03 0.70 -4.72 123 5659 2012-10-10 13:59:34 2003-04-07 12:59:11 11 59 1664 78 2098 5388 5861 240.80 19 77.33 CHANGED ph..psls.......................hs.t.........................phtpltptl.tppGlllh+...shs.......h.......sspph.ht..................hupth..G....l....h........................................................tt........pssshhsts........................hsaHsD...s.ah........tssth....thLhsh..psss...............pG...Gp..Thhss..shtsappLs.........sphhptl..p.slphhpshttthh.....................................................ttpP.....l..lphc.............s..sucpshasss.........h......th........................................thscups.....hh.....ctlhp..hh.p.....ps..chphpapap.t.....GDlllaDNtpshHuRs.sa........................stpRplh+sh ........................................................................................................................t...................................................httl.hth..l....ph.....thl.hh+.......s.................h.........s.tt.....t..........................hspth........G...............h...............................................................................................................p.t....t..................................................h.HsDhs...a.................tssth...........thltsh..phst.....................................tG.......Gp.......T.h.a...s..s..hhtAa..ct..Ls..........sthp.phl.........p....slps.h+.s.httsht...............t......................................................................t....hhpP.....l......Vc.pH.............................P....o....G....c..p..s....l..ahst.......hspph...........sh.........................................................................s..s.-.u.pt......ll...............p.Lhp....th..p........ps.......chth.p.a.c.Wp..s....................GD...l.....hhhD.......N.t..ts.Hht...s..........................t..Rhhhth................................................................... 0 584 1221 1766 +4575 PF01361 Tautomerase Tautomerase enzyme Finn RD anon Prosite Domain This family includes the enzyme 4-oxalocrotonate tautomerase Swiss:Q01468 that catalyses the ketonisation of 2-hydroxymuconate to 2-oxo-3-hexenedioate. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.42 0.72 -4.36 28 2760 2012-10-01 20:38:22 2003-04-07 12:59:11 16 3 2116 139 636 2083 293 59.20 29 84.64 CHANGED Phlplclh...cGp....osEQKcpLlcclT-shscshGt.spssltVllcEhspssauluGcshspt .....................Phlplclh........cGc........opEQ.KppLspclT-s..lsc.....s....h.ss...s.....p.p....s..l.p....VlIpEhppssauhuGp.h...t......................... 0 159 365 504 +4576 PF02959 Tax HTLV_tat; HTLV Tax Bateman A, Jeang K anon Pfam-B_1456 (release 6.4) Family Human T-cell leukaemia virus type I (HTLV-I) is the etiological agent for adult T-cell leukaemia (ATL), as well as for tropical spastic paraparesis (TSP) and HTLV-I associate myelopathy (HAM). A biological understanding of the involvement of HTLV-I and in ATL has focused significantly on the workings of the virally-encoded 40 kDa phospho-oncoprotein, Tax. Tax is a transcriptional activator. Its ability to modulate the expression and function of many cellular genes has been reasoned to be a major contributory mechanism explaining HTLV-I-mediated transformation of cells. In activating cellular gene expression, Tax impinges upon several cellular signal-transduction pathways, including those for CREB/ATF and NF-kappaB [1]. 20.60 20.60 22.60 22.20 19.10 18.50 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.57 0.70 -4.89 4 661 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 18 5 0 385 0 177.20 81 65.43 CHANGED HFPGFGQSLLFGYPVYVFGDCVQGDWCPISGGLCSARLHRHALLATCPEHQlTWDPIDGRVlu.....................................................phEPTLGp+LPoLuFP-PGLRPQNlYThWGtoVVChYLaQLSPPlTWPLlPHVIFCHPtQLGAFLTpVPhKRlEELLYKluLTTGslIlLPEDsLPTThFQPuRAP ...............................................SLLaGYPVYVFGDCVQGD.WCPISGGLCSARLHRHALLATCPEHQITWDPIDGRVIGSALQaLIPRLPSFPTQRTSKTLKVLTPPhTHTTPNIPPSFLQAMRK.Y.SPFRN...GYMEPTLGQHLPTLSFPDPGLRPQNLYTlWGuSVVChYLYQLSPPlTWPLlPHVIFCHPtQLGAFLTNVPhKRlEELLYKIuLTTGAlIILPEDCLPTTLFQPsRAP.............................................................................................. 0 0 0 0 +4577 PF00683 TB TGF-bp; TB domain Bateman A anon Pfam-B_82 (release 2.1) Family This domain is also known as the 8 cysteine domain. This family includes the hybrid domains [1]. This cysteine rich repeat is found in TGF binding protein and fibrillin. 20.90 20.90 21.00 21.00 18.70 20.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.77 0.72 -4.39 34 2359 2009-01-15 18:05:59 2003-04-07 12:59:11 12 321 62 9 1128 1987 2 42.80 35 12.21 CHANGED spCptslsst..sTKsp.CCCshGt....AWGss.CE..hCPhps.ospappl ...........tC.s.tsl.sst..sTKpp.CCC..o..h.Gp.........uW...G....ss...C..E....hCPhts..ospaptl............... 1 138 208 540 +4578 PF00566 RabGAP-TBC TBC; Rab-GTPase-TBC domain SMART anon Alignment kindly provided by SMART Family Identification of a TBC domain in GYP6_YEAST and GYP7_YEAST, which are GTPase activator proteins of yeast Ypt6 and Ypt7, implies that these domains are GTPase activator proteins of Rab-like small GTPases. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.90 0.70 -4.86 101 7104 2009-01-15 18:05:59 2003-04-07 12:59:11 13 151 344 13 4605 6792 78 197.50 19 30.10 CHANGED pphRtpl......W.hlhs.................................t....t..pttt.t....ppIc.hDltRThs...................ppt.ttpp.pLpplLpuauhhss...p.lG.......Ys..QGhshlsuhlL.......................hhh.s-pp....................uFhshsplh............thhhpshatpsh......t..lpthhh...hhcpllp.phhPc...lhp+.l.pc........slp..h.ashpW....hlsl.Fs.pshsh.phshR....lWD.hhh....pu......ph.lhp.hslu.ll.phh...cp.pll .....................................................................................................................................................................................................hR..hW..h.t......................................................................................................................................................p.......pt........I...p....hDl....R.oh...............................p.........h.ttt...t.ptL.....h.......p..l.......L....h.......s....a..u.h....h...ps..............p..lG.............................................Y...s....Q........G..........h..s.............lsuslL.....................................................................................hhh....sEtp........................................uFhhhs.tlh..................hhhp..s..ha.p.s.h.............................s...........hptpht................hhp.p.l..l..p...ph.........Pp.....Lhp+..l...pp......................................h.sl..p....t..h....a.s.hpW.............hh.sl.F..........t..p.........p...........h..s.......h....p.............sh+................lWD.hhh.....pu...................................ph..lhh....lslu..ll...h.pt........................................................................................ 1 1678 2429 3584 +4579 PF02970 TBCA Tubulin binding cofactor A Griffiths-Jones SR anon Structural domain Domain \N 23.20 23.20 23.40 23.20 22.90 23.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.78 0.72 -3.91 38 366 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 285 4 243 354 0 91.30 34 79.30 CHANGED QLcIKsusl+RLlKEcthYccElppQpp+lc+h+sc....st.-cYslKpQ..cpVLpEoptMlPchpp+lppshpcLpph..lpstct.....hc-hsptp......cA ..............pLcIKTusl+RLhKEcthYc+EhcpQcp+l.......c+h+s-...........ss.-pY....slKpQ..........pclLpEochMlPcspcRlptAhtcLpph...Lpptpp......hc-hpphh................................ 0 76 124 191 +4580 PF03558 TBSV_P22 TBSV core protein P21/P22 Finn RD anon Pfam-B_3028 (release 7.0) Family This protein is required for cell-to-cell movement in plants. Furthermore, the membrane-associated protein is dispensable for both replication and transcription [1]. 20.60 20.60 20.60 24.80 17.30 20.50 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.14 0.71 -4.88 2 32 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 24 0 0 31 0 185.20 71 95.35 CHANGED MDsEYEQVs+PWNELYKEATLGNKLsVNVG.EDsElPLLPSNaLsKsRluhSGGYIThRhlRI+IlPLVSRpuGVSG+LaLRDIoDTTG+KLHsTELLDLGKEIRLohpHLDFSVSsRSsVPIVFGFE-LVSPaLEGRELFSVshRWQhGLSAQsYSLP.s.WKVhYQE-sL+thhP..KKAsKTsSs. .............MDTEYEQVNKPWNELYKEsTLGNKLhVNVGMEDtEVPLLPSNFLTKVRVuLSGGYIThRRlRIKIIPLVSRKAGVSGKLYLRDISDTT.GRKLHCTEpLDLG+EIRLTMQHLDFSVSsRSDVPIVFGFEELVSPFLEGRELFSlSlRWQFGLSpsCYSLPpuKWKVMYQEDALKsLKPSK.KKASKTDSS.V................................... 1 0 0 0 +4581 PF01840 TCL1_MTCP1 TCL1/MTCP1 family Bateman A anon [1] & Pfam-B_7391 (Release 8.0) Family Two related oncogenes, TCL-1 Swiss:P56279 and MTCP-1 Swiss:P56278, are overexpressed in T cell prolymphocytic leukaemias as a result of chromosomal rearrangements that involve the translocation of one T cell receptor gene to either chromosome 14q32 or Xq28 [1]. This family contains two repeated motifs that form a single globular domain [1]. 25.00 25.00 43.40 40.90 16.80 14.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.81 0.71 -4.61 8 104 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 31 6 50 77 0 104.10 40 95.40 CHANGED MA.ssshpsphPhPPshLhshccsIYEDEapRsWlslsVEoocp.......s.ss+hcsplTVHLpphsslhpEshsss.lssspLPtMWpL.scspYpusDuoaWRLlcHuQhsssEpLlLcLlss .........................t.phss.Ps+LWlhptslY.DEhpRo.Wlslshc.ssthp.................................V+lpQhpV.hG-.shpPoplssS.LPlMWQLY.Ptc+YpusDSphWcIhaHlp..l.pusp-hlLchlsc........ 0 3 3 8 +4582 PF03634 TCP TCP family transcription factor Bateman A anon Pfam-B_1979 (release 7.0) Family This is a family of TCP plant transcription factors. TCP proteins were named after the first characterised members (TB1, CYC and PCFs) and they are involved in multiple developmental control pathways [1][2][3]. This region contains a DNA binding basic-Helix-Loop-Helix (bHLP) structure [1][3]. 19.90 17.00 20.20 20.20 19.60 16.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.60 0.71 -3.80 113 2128 2009-09-15 15:36:31 2003-04-07 12:59:11 8 3 670 0 282 2132 1 128.50 28 54.94 CHANGED sss+KDRHSKlpTutGsRDRRlRLSltsAt+hFsLQ-hLGaDpsScTl-WLlppucsAIpclsss................................sssss.............s..........t.....t....................................................tt.ttth.t.ttspppss..hshsc...ps+....................scuRs+.....u+c.coppchphptthss .........................................ppch.Tt.s.RsRR...hRLshthAtpFFsLQ-hLGaD+sS+Tl-WLlspSKsAIc-Lspp.........................................................p.sss..............s.s..................p...t.p..........................................................................................p...t............t..................p.....tp...............................t..........................tt................................................................................................................................................................................................................................................... 0 30 170 231 +4583 PF03645 Tctex-1 Tctex-1 family Bateman A anon Pfam-B_2986 (release 7.0) Family Tctex-1 is a dynein light chain. It has been shown that Tctex-1 can bind to the cytoplasmic tail of rhodopsin. C-terminal rhodopsin mutations responsible for retinitis pigmentosa inhibit this interaction. 20.50 20.50 21.00 20.70 20.20 19.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -4.18 104 888 2009-01-15 18:05:59 2003-04-07 12:59:11 8 19 260 9 593 840 7 100.80 26 53.99 CHANGED ss.......clppllp.cslpptLts.t.......pY.pt.scspphsppls-plhppl.pp......h.................ppYKalVpshIhp.............................................ppspGl+suopshWDs.....ssDshsohpa..........pNcslaslssVaulhh .....................................s.tlppllcpslpptLts.t.......tY....pt..pp...s....sphsppls......-plhppl.pch.............................................ctYKal.......V..pss.Ihp.........................................................ps.u.t.G.lcsu....opshW.....Ds.....ps..Dshsohpa..........cNpo..h..aslssVaulh........................................... 0 246 318 461 +4584 PF00838 TCTP Translationally controlled tumour protein Bateman A anon Pfam-B_1548 (release 2.1) Domain \N 20.80 20.80 21.20 21.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.05 0.71 -4.25 9 650 2012-10-01 21:11:27 2003-04-07 12:59:11 12 9 399 15 320 645 3 149.40 39 92.31 CHANGED MllYKDlhosDELhSDSash.cllssllaEs-G+hVspp..us-ph.IGANPSAEGu-....EGs--sscpslDlVhsaRLpEp..uFDKKsahsYlKsYMKslps+Lpcpp.Ec..l.hFcKplpsalKplLup..FK-hpFFlGESMss...DG.VslhpYR..EsGtsPahhaaKcGLhE ..........................................MllYpDll..o..s...DEhhSDs.a......ph......c......l.h.s......s..l.......h...a.EV.cu+h..lsp...............sss.s..lG.uNs....SAEts-..............EG...s..-.....s.s..s....pslDlVhs...a..+L.........p.........Eo.........sF.s.....Kcsahs.....Y..l.K.......sYhKplps+.....L...c-...p....p..s..-p......................lp.FppsstthhK.c.l..Lup...a..Ks..hp.F..............ah..G....Eo..Mss......................DGhl...shhpY+........-su...ssPhh.haaKcGLp.......................... 1 93 147 215 +4585 PF03347 TDH Vibrio thermostable direct hemolysin Mifsud W anon Pfam-B_3633 (release 6.5) Family \N 20.90 20.90 20.90 49.20 20.80 18.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.99 0.71 -4.70 2 84 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 15 1 2 77 0 138.50 78 90.33 CHANGED h-LPSIPFPSPGSDElLFVVRsTThpTcpPVpshVpDaWTNRslKRKPYcDVYGQSVFTTuGSKWLouYMTVsINs+sYTMAAlSGYKcGhSoVFsKStphpL.QcaY.SVtsFVsssEpSIPShsYLDETPpYFVsVEAYESGsGphhVMCISNK.SahECcpQ. .h-LPSl...PFPuPGSDElLFVVRsTThpTpuPVNshVsDaWTNRNlKRKPYKDVYGQSVFTTSGoKWLouYMTVNINs+sYTMAAlSGYKcGpSsVFsKS-pspL.Qc.Y.SVusFVGEs.EpSIPShhYLDETPEYFVsVEAYESGsGphhlM........................... 0 0 0 2 +4586 PF01285 TEA TEA/ATTS domain family Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 21.30 22.40 18.50 19.80 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.82 0.70 -5.32 19 717 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 216 11 331 600 0 283.20 40 76.13 CHANGED sP........ssh.......................hpstuss..sssc.pspu.h+s.c...................pDuEsVWSs-lEpuFQpALth.PPhGRRK..hS-cGK.YGRNELIAcYIhh+TGKpRTRKQVS.................................................SHlQVL........pShLK........tD.shpphsp.psAphssups..............................sstPphpss.lcshspstYs.h.sss.sshht......hpPhs..hssP...............ssshp..shsosplphlpFshalpt.p.psDphs+..HLas+lptsp.p.ssPslch.-l+phhspFPchpuuLc-.................Lh-chP.ssuhhhl+h.hDl.sssh..t-s.........ssuhYhhs........spYEotcNhshs....sSTKV...............ho.sKQVsEKspp...caAthEpsRhhaR.phSshpEhhhsh.ppL++Lsc+YhhNSs......................ltshThhpVlos.......pso.cs.........Lsl..........s.Vacs.ssppHGs.H ..............................................................................................t...............................................................t.........................................t..p..shWs..clEpuF.puLthh....GppK..................hp.pu..+..a.G.RNELIucY............I...h......h+........T.........G....K............p......RTRK...QVS....................................SHIQVL..........................+p...h...pst.l.K...................................p......h...t...h.......t........t.................................................................................................s.....h.p........p...........as...........s.............s.......................................t.s.tth..hthp.............hhp..t............h...............h..h...................h........h................htp..............................t.hh..th..h.ph.............................t..a...........p.hpu..cph...h...ssohs...................ho.s+phscphp................................t..tt+..a....o.............................................................................................................................................................................................................................................................................................................................................................................. 0 80 133 231 +4587 PF03848 TehB Tellurite resistance protein TehB TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.07 0.71 -5.13 6 1247 2012-10-10 17:06:42 2003-04-07 12:59:11 9 8 1191 16 158 6802 2835 180.30 47 77.23 CHANGED FaC+cEDYFpKKYNhTsTHSEVlEAVcsVpPsKsLDLGCGQGRNSLaLuLhGYDVTAhD+NssSIu.Lpcht-+EsLs.lpsulYDINuAslsEpYDFIlSTVVhMFLpscRIPpIIpNMQc+TpsGGYNLIVuAMsTsDhPCslPFSFTF+EsEL+cYYpcWEllKYNEshGpLH+TDtpGNRIKh+FuTML ................................................hhhcsEsY.F.sc.KY...t...h...s..........s..H...S........-.....V.......l......p.......A......h........p..........s......V.......p........s.......G...K.......s....L.DLGC...G.p.G...R....N.....S....L..a...L...A.........t....p........G....a...D.....V...T...A..h..D.....pN.s......h..u...l.......t....p.......l..p.........p.......I...t...p..........t............E.........s......L....-............l......p.......s......t.....l.....h.......D....l....N.....s.......h......o....h..........s.......t......p.....Y.....D....F.....I...l...S....T..V....V.....h....M....F...L.....p.......s.......c....p.......I.....P.......u...l......I...t.N.M.Q.c...p....Tp.s..G....G....Y....N....L...I.....V..s.......A........M..........D.......T...t..........D..a.........P..........C..........s...........l....s........F....P......F...s..F...K.E..........G...E....L..........t..c.Y...Y..c..s..W...E...h...l.K.Y..N..Ess.Gc...LH.+.pDtNGNRIpLRFAThL............................................................................................................................. 0 43 94 129 +4588 PF02765 POT1 Telo_bind; Telomeric single stranded DNA binding POT1/CDC13 Mian N, Bateman A, Griffiths-Jones SR, Sammut SJ, Wood V, Mistry J anon pdb_1s40 Domain This domain binds single stranded telomeric DNA and adopts an OB fold [1]. It includes the proteins POT1 and CDC13 which have been shown to regulate telomere length, replication and capping [2-4]. POT1 is one component of the shelterin complex that protects telomere-ends from attack by DNA-repair mechanisms [5,6]. 21.10 21.10 21.40 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.93 0.71 -4.28 48 329 2012-10-03 20:18:03 2003-04-07 12:59:11 12 6 201 64 177 352 3 138.30 21 19.57 CHANGED assls....t.hppsthhshhullhssphs..hps.pt..scahhshpl.....hD.sh.s.t.............stlplhhappphppL........................P.lpp.GDlltlc........................................................php..............lptasschp.........usssst..................ssatlF...psshsssh.................shhtusp...aphssp-pph...lpsLhphtt .........................h........p.sphlslhuVVhphp.s...hho.+G....sDaptslpl.....sDpoh......................sslps.plFpssh.ctL........................Ptlpp.GD.llhl+...................................................................pl+....................................lphapscht.........slssps.......................uhhhF...........tst.ss.sh....................tsst..hphstp-pphhttLh.h..t......................................................................................... 0 42 85 131 +4591 PF03070 TENA_THI-4 TENA/THI-4/PQQC family Mifsud W anon Pfam-B_2039 (release 6.4) & Pfam-B_7791 (release 7.7) Family Members of this family are found in all the three major phyla of life: archaebacteria, eubacteria, and eukaryotes. In Bacillus subtilis, TENA is one of a number of proteins that enhance the expression of extracellular enzymes, such as alkaline protease, neutral protease and levansucrase [1]. The THI-4 protein, which is involved in thiamine biosynthesis, is also a member of this family. The C-terminal part of these proteins consistently show significant sequence similarity to TENA proteins. This similarity was first noted with the Neurospora crassa THI-4 [2]. This family includes bacterial coenzyme PQQ synthesis protein C or PQQC proteins. Pyrroloquinoline quinone (PQQ) is the prosthetic group of several bacterial enzymes,including methanol dehydrogenase of methylotrophs and the glucose dehydrogenase of a number of bacteria [3]. PQQC has been found to be required in the synthesis of PQQ but its function is unclear. The exact molecular function of members of this family is uncertain. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.19 0.70 -4.58 25 3227 2012-10-02 21:56:19 2003-04-07 12:59:11 11 23 2223 70 822 2427 629 201.20 20 78.91 CHANGED cpplc.hhpphhp..HPFhttltcGsLs+pphptalhpcYhYltshs+hhu...hhhu+ssDhphhhchhpphh-thssE.....lpha.h+hs-tlGlshp-..lppppssPuscsalshhlsh.uppsshtEshsAhhsphhuhtphs..............................pphpphlp........tt..ahcalssassp.acptlpctpchlcplhphhtspt...........hpchpplhhpshpaEhsFh.stshcs .......................................................t....p.hhpth.hp.......H.sFlppltpGsL..s..tpth..phYlhQDh.hYl.t.pasp.h.hu......hhhs.........+.....s.....s.........s....h....c...t.....h..t..h..........h...h.p...p...h..........t.s...h...htsE......................hph.a...pphh.....p.tl...G..l.s.....t...p-............hpp.t.............hs..Pss.........htYssa.hh........ph...s.........tp......s..s........h..t....p..h..h.u....uh.l.s..s.....hhYt.phu..................................pp.l...t..p.p.p.........................t..a.tpWIp.h.au...s..c......a...p...p...hl.p.........th..hp...h.......lsp.h.h.pt..h.sppt........................hpchpphahpusphEhtFa.phuhp.h........................................................................................... 0 226 474 673 +4592 PF04876 Tenui_NCP Tenuivirus major non-capsid protein Kerrison ND anon Pfam-B_6119 (release 7.6) Family This protein of unknown function accumulates in large amounts in tenuivirus infected cells. It is found in all forms of the inclusion bodies that are formed after infection [1]. 21.00 21.00 21.10 277.70 20.90 20.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.09 0.71 -4.72 4 65 2009-09-11 07:41:10 2003-04-07 12:59:11 7 1 9 0 0 61 0 173.80 81 97.70 CHANGED spppsDVuVGPIsGLNYphLYDhLPspVSDNITL.DLK-P-+VTEssKKLILKGsl.lAYHHPLETDshFspVHKHhP-as+SFLEHLLGupspspNuhIDlGhFFslLQspLGDWITcpaLKHsN+MSKpQIKpLlspIIchAKAEuuDTEpYEcVWKKMPuYapsllp.lLHK .V.QRTIEVSVGPIVGLDYTLLYDTLPETVSDNITLPDLKDPERVTEDTKKLILKGCVYIAYHHPLETDTLFIKVHKHIPEFCHSFLSHLLGGEDD.DNALIDIGLFFNhLQPSLGGWITKNFLRHPNRMSKDQIKhLLDQIIKMAKAESSDTEEYEKVWKKMPTYFESIIQPLLHK. 1 0 0 0 +4593 PF03300 Tenui_NS4 Tenuivirus non-structural protein NS4 Mifsud W anon Pfam-B_4315 (release 6.5) Family \N 20.60 20.60 21.00 406.10 20.30 20.50 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.66 0.70 -5.33 5 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 9 0 0 53 0 284.70 82 98.63 CHANGED MSLu+lsSpSKSplLsDDLSE+uAK+h-cuNKKKLALSsRPL.............TKGRhTIDsAATVLGLEPFSFADVRsNoYDMFlAKQDYSIpANR+A+FsIDV-Ph.aF+KPLppaPFFRIATFAlVWIGIKGRAsGTlTFRIIDKSYlDPsDQVEVEVsYPISKNFAVLGSLPNFLuhEDtcNLQV-lsIcDuSVQNCVISRoLWFWGIERTDLPVsMEoQKTVMFEFEPLsD+slNHLSsFuNFTTNVVQ+AVs......sAFTTKuhsElDsthEFGVVKQP+......pIPI..........l+K++Shl- MALSRLLSTSKSKVLYDDLSEESQKRVDNKNRKSLALSKRPL.............NQGRVTIDQAATMLGLEPFSFSDVKVNKYDMFIAKQDYSVKAHRKATFNILVDPY.WFHQPLTHYPFFRVATFAMVWIGIKGRASGITTLRIIDKSYVNPSDQVEVEVRYPISKNFAVLGSLANFLALEDKHNLQVSVSVDDSSVQNCVISRTLWFWGIERTDLPVSMKTsDTVMFEFEPLEDKAINHLSSFSNFTTNVVQKAVG......GAFTSKSFPELDTEKEFGVVKQPK......KIPI..........TKKSKSEV.S 0 0 0 0 +4594 PF05099 TerB Tellurite resistance protein TerB Bateman A anon COG3793 Family This family contains the TerB tellurite resistance proteins from a a number of bacteria. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.51 0.71 -4.45 153 3006 2012-10-03 21:00:09 2003-04-07 12:59:11 8 32 1728 5 694 2123 2633 135.10 20 59.21 CHANGED t.lpph..htphh.sss.sphtss.t............plAlsA....LhsclApADGphspsEhptlpplltpthslsstpspplhptspptppc........ssshhpasptlpcths.........cp+hpllcsLWplAhADG....phcttE-sllcclAplLslscp-hhth.....+t ..........................................................................h.hthth...................t..t..t.............h.sshs....lhsclsp.u.D.Gp.lsps..E...h..phh...p...p......lh.......p.....p.....h....s....L...p....s.....t....p..p..p...t....h...t...p....t.h.p...t...tppp........................................shsh.p..ph....h.p....p....lcp.h...............hch..t.p....t...hLchhhpl....Ah.A.D.G........plcspEcplLpplAphLGlsptph.....h........................................... 0 189 419 574 +4595 PF03741 TerC Integral membrane protein TerC family Bateman A anon COG0861 Family This family contains a number of integral membrane proteins that also contains the TerC protein. TerC has been implicated in resistance to tellurium. This protein may be involved in efflux of tellurium ions. The tellurite-resistant Escherichia coli strain KL53 was found during testing of the group of clinical isolates for antibiotics and heavy metal ion resistance [2]. Determinant of the tellurite resistance of the strain was located on a large conjugative plasmid. Analyses showed, the genes terB, terC, terD and terE are essential for conservation of the resistance. The members of the family contain a number of conserved aspartates that could be involved in binding to metal ions. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.07 0.71 -4.84 93 6495 2012-10-03 02:02:08 2003-04-07 12:59:11 11 11 3058 0 1376 4775 2679 192.90 30 58.96 CHANGED hslhhlEhsLSsDNhhVlullspp...lPtp.......................pc..+slhaGlhuAllhRhlhlhhushLlph.................................shlhhluGhaLlahuhchlht.h.................t.hpphhshsssh........................................hhhsllhIthsDllFulDSlhAshulo................pchhllhsuslhuIlslthhuthlsphlc+ashlphsshslLsalGsch...llps ......................................................TlhllEhlLulDNllhlullssp...LP.s.p......................p.pc.+.A.hh..h..G.l.h.h.A.l.l.h.Rhl....h..lhh.h.u.allsl.....................................................................h......sh..lhhlG.Gl.....F..Ll..apusp.lpcph.................................c.tt..c..pt.h.tt..ss.st..........................................................................................................hhhslhpIh.....lh..DllFu..l.......DS...l...lsshGhs.....................schhlh.h..s..us.ll.Alh..lhhh...s..uphl..s......p..h..lp.+..a..P..t.lth..hshshLhhIGhpLlhps.................................................................................................. 1 380 808 1111 +4596 PF02342 TerD TerD domain Anantharaman V anon Anantharaman V Domain The TerD domain is found in TerD family proteins that include the paralogous TerD, TerA, TerE, TerF and TerZ proteins [1][2] It is found in a stress response operon with TerB and TerC. TerD has a maximum of two calcium binding sites {2] depending on the conservation of aspartates {2]. It has various fusions to nuclease domains, RNA binding domains, ubiquitin related domains, and metal binding domains. The ter gene products lie at the center of membrane-linked metal recognition complexes with regulatory ramifications encompassing phosphorylation- dependent signal transduction, RNA-dependent regulation, biosynthesis of nucleoside-like metabolites and DNA processing linked to novel pathways [2]. 25.30 25.30 25.80 25.50 24.80 24.90 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.19 0.71 -4.96 187 3080 2012-10-09 20:46:33 2003-04-07 12:59:11 13 38 634 6 728 2301 75 182.90 29 70.99 CHANGED hthsLsKGp...plsLscpss.......tplplsls.Wc.ttt..............................tshDLDsSshhlstt.............s+stsssphlFasphpusssul.h.......................sGDshsut....t............................................................................................sEslplsLspls...sslpcllhslolasut................Fuplpsuhl.clhst..................sspplsc......asls....shsscouhlhuclYRc.............................supW+.......hpAlGpGh......ssGlts..lsppaGhtl .........................................................................................lsLpK.Gp...plsLsKpss......htplhluLuWcstt.tt...........................................tshDLDsSshhl..ts.....................s+st.ssschlFa..s..shp.u...s..s.....G.u..l..ht..........................................sGDshTGt.....ut.G.D............................................................................................cEplpl.cLsp.ls.....splc+llh.s.V.sIasups..............sFuplpsu.al..Rlhst..............................sspEls+......acLs....tshutcTuhl...huElYRc.................................................................supWK...FpAlGpGh.......ssGLts.lsptaGh.............................................................................................. 0 203 487 654 +4597 PF03592 Terminase_2 Terminase_small; Terminase small subunit Finn RD anon Pfam-B_3755 (release 7.0) Family Packaging of double-stranded viral DNA concatemers requires interaction of the prohead with virus DNA. This process is mediated by a phage-encoded DNA recognition and terminase protein. The terminase enzymes described so far, which are hetero-oligomers composed of a small and a large subunit, do not have a significant level of sequence homology. The small terminase subunit is thought to form a nucleoprotein structure that helps to position the terminase large subunit at the packaging initiation site [1]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.96 0.71 -3.90 71 1427 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 1058 51 150 1030 539 147.30 23 79.63 CHANGED LTsKQctFspEYlt.....s.hNATpAAl+A.GY.....St+o...Apshu...sc.LpcscIpphIsEt...hpchppcphhssc-lLphLsclspu-hp-.h.h..t................................................ttspthp.hpsphpD+l+Ah-h.LuK+hs........hp-c...........t.t.t.hhp..ht......s ................................LTtKQcpFsptYlp.....s....h.Ns.TpA...Al......pA..GY.....Ssco.....Ap...spuscLLc.p....s...c.......lpth.I...pch.......pp...ch....hp...c....th.......h..shppl.l..thL...tp...hths..p..pc.h...h..h...t...................................................................................................t.thh....h..p.tsc.cuhch.lh+hht....h....h.pp.............................tsst..h................................................................................................................................................. 1 40 90 117 +4598 PF03936 Terpene_synth_C Terpene synthase family, metal binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_728 (release 3.0) Domain It has been suggested that this gene family be designated tps (for terpene synthase) [1]. It has been split into six subgroups on the basis of phylogeny, called tpsa-tpsf. tpsa includes vetispiridiene synthase Swiss:Q39979, 5-epi- aristolochene synthase, Swiss:Q40577 and (+)-delta-cadinene synthase Swiss:P93665. tpsb includes (-)-limonene synthase, Swiss:Q40322. tpsc includes kaurene synthase A, Swiss:O04408. tpsd includes taxadiene synthase, Swiss:Q41594, pinene synthase, Swiss:O24475 and myrcene synthase, Swiss:O24474. tpse includes kaurene synthase B. tpsf includes linalool synthase. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.46 0.70 -5.17 87 2046 2009-09-15 09:56:15 2003-04-07 12:59:11 11 32 433 92 834 2322 3 234.00 21 48.78 CHANGED lLclA...KLs....FNhhQthappELcpls+Wa+-hsl.scLsh.hR-RllEsYahshushaEPp..aStsRlhhsKhhhlhsllDD.....haD..sauohcE...lc....thscslcRW........s...ssh-.pLPcah+hsapslhchhp-htpchtpc.scp......hh.hhtcthpphhcuahpEucWh......pss.hhPo.h-EYhpsuhhohuhhhhhhhshhsh...sphhs.cpshchlpppsp...lhchhshhsRLh....sDltsa..........................pcEhp+GchssulpsYMcph.ssocE-Ahpclpphlppsh..+phs ..............................................................................................................................................hs...........p.....pht.t.h.......p..........p...W.........h.pp.........th.....t..p...h........h....+.....cc.....hhphhhh..h.hu.h.h.hp..Pp.....hu.tR.h.h.h.s.+.hhshhh.hh..DD............ha.D....shu..sh.c-.............hp...........hhspslp.ph..............................s........sshp..th.s.p..h...h......c..h.....h...h.....h..s..l..hs.....h...h...........p..ch.tt.phtt.p.....ttp...........................h..ht.pt..htphhp.u..hh..h.E.s..c.ah...................tps...hhPs..h...pEYh.p.t.hho.s.uh..hhhshs.hh.....hh....t..hs..cpshp..h.htp..st.............lhph...sshhh+...Lh..................sD.lhoh..........................................p.p..E.........p......c.......s.p......h..s...ss.l....h...h.h...pp......h...s.h.o.pp.Ahpthtthlppthcph........................................................................................................................... 0 148 537 713 +4599 PF02909 TetR_C tetR_C; Tetracyclin repressor, C-terminal all-alpha domain Griffiths-Jones SR anon Structural domain Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.52 0.71 -4.52 18 1930 2012-10-03 00:15:22 2003-04-07 12:59:11 12 6 804 87 677 1699 15 139.70 19 61.05 CHANGED hP.pss-.sWpshLpssA+ShRpALLsaRDGA+lahGo.tsussphcshEspLphLscsGFosccAlhslpslupFslGuV........LEcQu..tpsstpspptss.hppth.s...Pl....Lppshp...shspsss-ssFEhGLpllIsGlcsth ..........................................................st..sWcstLpshAcshRpshhpH..P...t.s......s.p...l.h..h....s...p..........s..........h....u.......s......s......t......h......p....h....h-...thl.t.hL....p....s....s......Gh.s..s.t.p.s....h.t..sht...sl..t....t....alhGts.....................................h.p....p...p....s................t.....t.s..s........t........h..t...t..t..sh.....htth......................Ph..........lt.t.sht......................ht.t.........s.........c.....t..t......F.....p....hGLphllsGlt...h.......................................................................................... 0 228 518 626 +4600 PF03299 TF_AP-2 Transcription factor AP-2 Mifsud W anon Pfam-B_1736 (release 6.5) Family \N 20.30 20.30 23.70 22.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.14 0.70 -4.78 6 424 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 99 0 242 359 1 190.70 64 48.09 CHANGED ssEVFCsVPGRLSLLSSTSKY+VTVAElQRRLSPPECLNASLLGGlLRRAKSKNGG+pLR-pLcKlGLsLPAGRRKtApVThlTuLVEuEAlHLA+DFuhVCEsEFPu+slApals+pplss.p-hstR+phLhtopplsp.EltclLopDRoPlssp+spshL..-sulQpsLoHFSLhTHGFGssAhpAslpulQshlsEulphl-Khh. .............................................sEVFCSVPGRLSLLS.ST.SKYKVTVuEVQRRLSPPECLNASLLGGVL.R..R...AK.SKNGG..Rs..LRE...+L-KIGLNL...PA....G..R...RKAA..NV...TLLTSLVE.G...............EAlHLARDFG.YlCETEFP...uKAluEa.L.sRpHt.-.s.....s.-...tsRKsMLLA..oKQlCK.EFsD.LLuQDRo..PlG...sS..RP.sPIL.......Ess.I..QssLTHFSLITHGFGsPAlCAAloAhQNYLsEuLphhDKh..h............................................................. 1 59 75 151 +4601 PF02559 CarD_CdnL_TRCF CarD; TF_CarD; CarD_TRCF; CarD-CdnL_RID; CarD-like/TRCF domain Mian N, Bateman A anon COGs Family CarD is a Myxococcus xanthus protein required for the activation of light- and starvation-inducible genes [1]. This family includes the presumed N-terminal domain, CdnL.\ CarD interacts with the zinc-binding protein CarG to form a complex that regulates multiple processes in Myxococcus xanthus [4]. This family also includes a domain to the N-terminal side of the DEAD helicase of TRCF (transcription-repair-coupling factor) proteins. TRCF displaces RNA polymerase stalled at a lesion, binds to the damage recognition protein UvrA, and increases the template strand repair rate during transcription [3]. This domain is involved in binding to the stalled RNA polymerase [3]. The family includes members otherwise referred to as CdnL, for CarD N-terminal like, whichdiffer functionally from CarD. The TRCF domain mentioned above is the RNA polymerase-interacting domain or RID [5]. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.11 0.72 -3.96 152 5749 2009-09-12 00:28:14 2003-04-07 12:59:11 11 11 4259 6 1333 4473 2560 101.50 33 11.35 CHANGED plphGDh......VVHspHGlGpatulcphpl.......sGhp....p-..alhlpats......ss...p...lhlPlsp.lshls.Ralus..psph..hLscLusps..........W....p+ppp......Kl+psshphus-llchhu ......................LcsGDhVVH.pHGlG+ahGl...p.s.h-l.......................sGhp.........p-..YlhlpYss..........sc......pLaVPVs.p..lchlu..RYluu.............psph...sh......Lp+LGusp..............W........p+t+p..............Ksppplc-lAs-LlclY..................................................................... 0 485 917 1141 +4602 PF03529 TF_Otx Otx1 transcription factor Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 27.10 27.10 22.30 21.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.58 0.72 -3.77 22 293 2009-09-10 18:52:32 2003-04-07 12:59:11 8 2 133 0 88 237 0 92.20 55 34.78 CHANGED SPASIS........P...hs.DP.Lus.....uouSCM....QR..........suuYPMo.YsQu...suYu.QG...Ys.u.SoSYFuGlD.CuSYLu.PM.....HsQLsusGusLSPMuusu..Muu.Hlspus .......................................................................................SPASlS......P........LP.-P.h.ss.......u.suSCM....QR....................ssSY.PMo.YsQu...uuYu.QG...Ys..s...SSSYFuGl.D.CuSYLu.PM........Hspht.s.........pLSPMusso.huu.Hhppp................... 0 3 11 34 +4603 PF03849 Tfb2 Transcription factor Tfb2 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.20 21.20 21.20 21.30 18.40 20.70 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.01 0.70 -5.82 24 407 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 301 0 280 375 6 296.10 32 69.68 CHANGED -YLcuLPsplhs+LY.psPATCLAIaR.hLPslAKhalMpMla.-pPlshp-l-pWl+ssu.....ptppppulcpLppL+lh...........................spssst...thlpLNssF+ps.h+pALsGGtppsSFGsssspss.....pcssslshLDpYAtp+WEslLHaMVGostspt..........sSp..sVhpLLpputLhct.....sssthc......ITspGFQFLLQDsNuQlWsLLLpYLchuE........shsMDsV-lLsFLFhL......uuL-LG+uYshss.Lo-TQhphLpDL+DaGLVaQ+ps..psppFY.PTRLATsLTSsssshpssstu.pt...................................pspshppGhlIlETNaRlYAYTsSPLQIAlLuLFsclchRFsNhVsGpITRESlRpALtsGITA-QIIsaLpsHAHP ...............................................................................................................altt.hs.thhppLY.....tp.s..............hshulaR.................LP.l...A+.hlh......thl...a....h....p.p.s....ls...........tslt.........W..........s......t.p........................tpt..ppuhphLtt.L+lh...............................................t........tt......th.lss.Fp..ps.h.........p.s......LhG......u.....sp.....t......s...........................................tt..hs..ht.L-pYu.ppWEs.......lLtahVss....st.t..................u.t..shhplL..tutLhpt..........tt...t.t.........................ITp.tGF.....pFLL.-hssQlWhhhl.Ylp..h.p.................................tht.hs...s.-hLsF..lF.L....................u...pl....G............p....sYphps......h.op...s..h.........p......h.....LtcLt-hGLla...p..p..t........pt....thaa...PT...+L.A......h.........sLsss.s..s........................................................................................................................tttppGhlllETNa+lYAY...........Ts.....S.......Lp.....l.ullsLFsch.h..RhP.N..........hlsu.tlTRcSlppAlttGIT.ApQIIpaLpppAHP............................... 0 104 161 236 +4604 PF03153 TFIIA Transcription factor IIA, alpha/beta subunit Mifsud W anon Pfam-B_3542 (release 6.5) Family Transcription initiation factor IIA (TFIIA) is a heterotrimer, the three subunits being known as alpha, beta, and gamma, in order of molecular weight. The N and C-terminal domains of the gamma subunit are represented in Pfam:PF02268 and Pfam:PF02751, respectively. This family represents the precursor that yields both the alpha and beta subunits. The TFIIA heterotrimer is an essential general transcription initiation factor for the expression of genes transcribed by RNA polymerase II. Together with TFIID, TFIIA binds to the promoter region; this is the first step in the formation of a pre-initiation complex (PIC). Binding of the rest of the transcription machinery follows this step [1]. After initiation, the PIC does not completely dissociate from the promoter. Some components, including TFIIA, remain attached and re-initiate a subsequent round of transcription. 30.50 30.50 30.70 30.70 30.20 30.20 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.47 0.70 -4.56 31 563 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 271 8 357 547 0 253.50 22 80.52 CHANGED +lYcsVI.-DVIsssRpsFt-p.GlDEQsLp-L+plWppKLspopsschsa-sss..sss..stp........................................t...........ss.pp.psps.sssssstshthssG...............thtstsG..hpsthPshss..ss.st.......t...............htthptshstsuuspss...........t........................................................tt.shtp.sss.ht...................................ppscustss.............t...sp.hpstt...........................hc.phhshputth..slpptspt..ttpp....................................................................................................................................hQhDGssssssp............pcp....D.-D-.......................................lppD..sssDDhs.cpDsp-..ssssVhlCpYDK...........VpRsKNKWKhpLKDGIhshsGKDYVFpKApGEuEW ...........................................................................................................................................................a..lh.tpVh.t.....p......F.p......s...t.thl.php.t.W....ptKh...t.ts..h..p...t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..s...................................p.pp.................................................................................pp-..s...D-......pc.....p...t..............tplhlC.aDK...........lpR......s+s+......WKh.LKDGlhphss+.-hhFt+s.s-h-a........................................................................................................................................ 0 117 189 283 +4605 PF02268 TFIIA_gamma_N TFIIA_gamma; Transcription initiation factor IIA, gamma subunit, helical domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4941 (release 5.2) Domain Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIA (TFIIA) is a multimeric protein which facilitates the binding of TFIID to the TATA box. The N-terminal domain of the gamma subunit is a 4 helix bundle. 20.90 20.90 22.50 21.70 20.10 19.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.31 0.72 -4.38 21 360 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 275 4 215 313 1 47.90 50 39.93 CHANGED saYELYR+So...lGtuLTDuLD-LIssupIsPpLAhKVLtpFDKslscsLp .....hYpLYRpoT........lG.sLp-oLD-LIpsspIsPpLAhpVLhpFDKulspALt............. 0 74 117 179 +4606 PF02751 TFIIA_gamma_C Transcription initiation factor IIA, gamma subunit Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4941 (release 5.2) Domain Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIA (TFIIA) is a multimeric protein which facilitates the binding of TFIID to the TATA box. The C-terminal domain of the gamma subunit is a 12 stranded beta-barrel. 21.20 21.20 22.70 21.50 21.00 20.40 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.76 0.72 -4.15 26 357 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 277 4 214 301 1 50.30 52 42.76 CHANGED psKuKloF...K.GcLcTYRFCDsVWTFIl+ssph+h.....................sppplpVDKlKIVACsu ..........pV+s+.loF...K....G+LcTYRFCDsVWTFllpDspF+....................................spphlp.l.D.K.VKIVACsu................. 0 74 115 178 +4607 PF02291 TFIID-31kDa TFIID-31; Transcription initiation factor IID, 31kD subunit Mian N, Bateman A anon Pfam-B_6729 (release 5.2) Family This family represents the N-terminus of the 31kD subunit (42kD in drosophila) of transcription initiation factor IID (TAFII31). TAFII31 binds to p53, and is an essential requirement for p53 mediated transcription activation. 29.00 29.00 29.00 29.00 28.90 28.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.48 0.71 -4.36 5 368 2012-10-10 12:36:46 2003-04-07 12:59:11 10 8 276 1 247 382 2 121.40 39 53.69 CHANGED cuGhoupsccsPKDAplltpIL+-hGIpEYEPRVlsQLL-FAaRYTosILcDApVYucHA+Kusl-.....lEDVRLAlssplspSFTuPPPREhLLcLAs-RNppPLPQI+PsaGlRLPPDRYCLTusNacL+ ...........................t.........pphP+DAplht.lLpsh.Gl..s..p..Y.EsRVh.QhL-FA.aR..........YsoslLpDA.....tl.....Y.......u......s..H....A..t+.....s...sls.........................s-DlRLAIpsR.....hshpF..p....s...........s.........P.....P+-...h....Ll-lApp+NphPLP.................h.l..h.........s..........h.......G..R..LP...P.-+asLou.sapl............................. 0 80 126 189 +4608 PF03540 TFIID_30kDa TFIID_30kD; Transcription initiation factor TFIID 23-30kDa subunit Griffiths-Jones SR anon PRINTS Family \N 21.00 21.00 21.20 21.00 20.90 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.30 0.72 -4.13 20 312 2009-09-14 12:53:53 2003-04-07 12:59:11 8 5 267 0 220 305 3 51.70 52 26.68 CHANGED PhIPDuVTsaYLs+uGFps.......sDsRlsRLluLAsQKFlSDIAsDAhQau+hRs ......PhIPDAVTsaYLs+uGFps.............sDsRl.hRLluLAsQKFloDIAsDAhQas+hRt......... 0 75 121 180 +4609 PF02002 TFIIE_alpha TFIIE; TFIIE alpha subunit Enright A, Ouzounis C, Bateman A anon Enright A Family The general transcription factor TFIIE has an essential role in eukaryotic transcription initiation together with RNA polymerase II and other general factors. Human TFIIE consists of two subunits TFIIE-alpha Swiss:P29083 and TFIIE-beta Swiss:P29084 and joins the pre-initiation complex after RNA polymerase II and TFIIF [1]. This family consists of the conserved amino terminal region of eukaryotic TFIIE-alpha [2] and proteins from archaebacteria that are presumed to be TFIIE-alpha subunits also Swiss:O29501 [3]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.29 0.72 -4.53 7 544 2012-10-04 14:01:12 2003-04-07 12:59:11 12 13 435 2 355 727 205 102.50 21 29.94 CHANGED cLlpElltphhG..tcsh.llcsLh.cucso-E-luc.LtlchsplR+lLhpLa-s+Lsca+Rt+D...-ppsWhpYYWhlphc+l.pllKt+hpphlc+LcctLchEps ............................h..lhcthhs....pt..h..hl..l-h.L.h..p..c........s.....t......l.p..-....--luchlph..pt..pplRplLtpL.p.cc+.l.l...p.h...+...p...p...p-.............................p.s.p..s....h..tp.....h......h...a...h..ls.h..c.ph....h....shl.c.h+hpphhpplctp.t................................................................ 0 106 199 293 +4610 PF02186 TFIIE_beta TFIIE beta subunit core domain Bateman A anon [1] Domain General transcription factor TFIIE consists of two subunits, TFIIE alpha Pfam:PF02002 and TFIIE beta. TFIIE beta has been found to bind to the region where the promoter starts to open to be single-stranded upon transcription initiation by RNA polymerase II. The structure of the DNA binding core region has been solved [1] and has a winged helix fold. 22.50 22.50 22.70 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.12 0.72 -3.98 12 229 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 199 2 152 218 0 70.60 34 24.23 CHANGED s+h.s.LthhV-YhKp+.....scPlol-El.shls.hDIsssllshLc.....p.s+lcasscs.papahslasI .................................a.hshLthlVcahKp+.......scPLol-EIL..-php..lD..l..s.......p....p...hc..phLp............sNsKlchs..c......u.....patFKPhYsl............................ 0 38 67 115 +4611 PF02270 TFIIF_beta Transcription initiation factor IIF, beta subunit Bateman A, Mian N anon Pfam-B_4519 (release 5.2) Family Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIF (TFIIF) is a tetramer of two beta subunits associate with two alpha subunits which interacts directly with RNA polymerase II. The beta subunit of TFIIF is required for recruitment of RNA polymerase II onto the promoter. 19.40 19.40 21.20 21.30 19.30 19.30 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.70 0.70 -4.60 25 473 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 278 10 318 464 4 184.60 23 82.69 CHANGED stDlDLssuc..p..pVWLVKlPKYLuppW........schssp....tplG+l+Ipps......ttc.pVslhLscp.ps....................plP+EYslphpssp.......spshhVFoEps..t.ppp...p..........................................................pshscKhALtG+Vs+cspshPsts-..pYt+lhpp+.thpsspP++pVphL-ch..Vspshts..suh+usssch...hh......tccKKp-uKpsRhs+p-LLDhLFpsFEcapYWslKsLccpT+.QPcuYLKEsLcpIAhhsK+GPapspapLKPEY++ ........................................................................................p............p..thaLh+hP.hl....p...tW...........................tt......t...........luplhh..t.................thth...ht................................................................hPppa.h.......h.t................................s.hlhsp...............................................................................................tph.h.Ghl.p..ch.p....hhP.................p............p.....pa..p.hh.t.p.....t......p....t.l...pp............t.h..........t.h..t.t...........................................pttt.+thR..hspptl.shlathFc.c.......h...ash+tLht.h..........p.QP.................a......L+phLppls.h.hp.s....t..h.apL+s-hp..................... 3 103 173 251 +4612 PF01096 TFIIS_C TFIIS; Transcription factor S-II (TFIIS) Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.20 0.72 -4.32 109 1781 2012-10-03 10:42:43 2003-04-07 12:59:11 13 25 640 94 1065 1584 290 39.10 43 20.94 CHANGED hpCs.c...Cspccshah.phQpRSADEP.ThFapCh..pCsppW+ ...........pCs..+....C.....t.p.c.c.s..hah.phQ......oRS...A..DEP.hTs....Fap.Cs..pCup+W+.......... 0 343 578 851 +4613 PF04994 TfoX_C TfoX C-terminal domain Bateman A anon COG3070 Domain TfoX may play a key role in the development of genetic competence by regulating the expression of late competence-specific genes [1]. This family corresponds to the C-terminal presumed domain of TfoX. The domain is found associated with Pfam:PF00383 in Swiss:Q9JZR1. It is also found as an isolated domain in some proteins suggesting this is an autonomous domain. 23.00 23.00 23.00 23.30 22.90 22.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.50 0.72 -4.07 48 1181 2012-10-03 02:11:09 2003-04-07 12:59:11 8 3 1046 4 158 663 24 79.70 38 45.23 CHANGED tssR.L+-......LPNlshshEchLp+sGIcolppLcplGAhpuah+l+ps.utslslpL...LauLpGAlpGhHWsslsppc+pcLlpth ................t..pRL+D......LPNhuhplEphLtcsGIcslcpL+tlGActsal+.L+pp....sstlohcl...LatLEGAI.GlH.ss.LPts++pELhch........................................... 0 37 78 123 +4614 PF04993 TfoX_N TfoX N-terminal domain Bateman A anon COG3070 Domain TfoX may play a key role in the development of genetic competence by regulating the expression of late competence-specific genes [1]. This family corresponds to the N-terminal presumed domain of TfoX. The domain is found as an isolated domain in some proteins suggesting this is an autonomous domain. 22.20 22.20 22.20 22.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.02 0.72 -4.14 85 1775 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1572 2 258 995 70 89.90 27 59.84 CHANGED lh-hl....ush.GslstRpMFGGaGlah.cuhhFAllsc.s...pLaL+u.sspspsha.pstGs....tsata..tpth.h....shsYaplstphh--tptltphs+huhpsuhp ....................c.lstL..us..lph..RsMFGuYulah.cs.......s.l.h.uhlh-....s......cLal+u.scpstphh..sppss....hsash.....th.......shpaahlstphhc.....st..Lhph.chuhpth.............................. 0 60 131 199 +4615 PF00019 TGF_beta TGF-beta; Transforming growth factor beta like domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.30 20.30 20.60 20.40 19.70 20.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.86 0.72 -3.76 20 3712 2012-10-02 16:54:34 2003-04-07 12:59:11 15 19 848 111 1288 3220 0 95.50 38 29.32 CHANGED sssCph+pLalsFp.DlGWspWIluPcGYhApYCpGpCsaslssphsso....spAllpsls+hh..tP.pssspPCClPT..cLsslohLahD-sps.hhl+phpsMlVcpCGCp ............................p.ptCp++sLaVsFp.-l.G..W.s.p...W.IlA...P.pu.YpAhYCpGp.Cs.a...sh.s.s.ph..sso........................sps.h...lp.s..l.h..............pth...........s.s......s..s.spsC..Cl..Po..chssloh.LYh...-..p....p....pp......hh..hc..ph.sMllptCuC..................................................... 0 194 298 677 +4616 PF00688 TGFb_propeptide TGF-beta propeptide Bateman A anon Pfam-B_110 (release 2.1) Family This propeptide is known as latency associated peptide (LAP) in TGF-beta. LAP is a homodimer which is disulfide linked to TGF-beta binding protein. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.43 0.70 -5.08 81 3044 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 870 4 1012 2611 12 185.70 18 54.29 CHANGED sl...........................................s.h....tpthpppphcthcpplLphlGLp..cp...Pps............ttstssP...pah.L.-LYptht..............tppsttpss.........t...t......................usolhoFtsctphpp..........ptptpphhFslSsl...sps.cplstA-.L+lahpt..............sspspphplplaplhp.........tttt.pcLLss+h...lphs.....suWpsFDloss.lpp..Whppspp..NhG.....ltlpl.......hstct.sshssp..t.....t......................tptpPhLlsa ..............................................................................................................................................................................................................s.............h..t.ht......................................................................................................................................sp.l...ht..h............................................h.hFs.h.o....th........pt..p.......l..tAp.lh....lah.t.............................................tttthplp.lhph..t.....................ttt.....tph..lsp+h...........lphpt.................stW....sF..D.V.o..s.....s.lpp...Wh.......tpsp.......s..hG......l.lp.s.......................p...pt...pt.....t..........................................................t.pPhh.h.................................................................................................................................................. 0 166 252 544 +4617 PF01702 TGT Queuine tRNA-ribosyltransferase Bashton M, Bateman A anon Pfam-B_1643 (release 4.1) Family This is a family of queuine tRNA-ribosyltransferases EC:2.4.2.29, also known as tRNA-guanine transglycosylase and guanine insertion enzyme. Queuine tRNA-ribosyltransferase modifies tRNAs for asparagine, aspartic acid, histidine and tyrosine with queuine. It catalyses the exchange of guanine-34 at the wobble position with 7-aminomethyl-7-deazaguanine, and the addition of a cyclopentenediol moiety to 7-aminomethyl-7-deazaguanine-34 tRNA; giving a hypermodified base queuine in the wobble position [1,2]. The aligned region contains a zinc binding motif C-x-C-x2-C-x29-H, and important tRNA and 7-aminomethyl-7deazaguanine binding residues [1]. 23.80 23.80 23.90 23.90 23.60 23.30 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.52 0.70 -5.08 133 5152 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 4473 77 1485 3705 2879 234.20 41 60.66 CHANGED Ecsl.plQpsl.GuD...IhhshDcss...sh.....ss...st.ctscpuhccTh+Wsc+shpth..............................ttpppsLFGIlQGGha.-...LRppusctlt.p...........s..hsGaA......lGGlusGE......sp.pphhpllc.hsss.......hLP............ps+PRYL.hGlG.pPtslltuVthGlDhFDClhPo.RhARpGphh...TppG............................................pl.............plcst.patpD..pPl..-.sCsChsC.............p.......paoRAYl+HLhpspEhlu.pLlohHNltahhplhpplRpuIpps.pht....phhpp......hhp ..........................................................................................................Ecuh.pIQps.L.GS.D..IhM.hF.DECs.......sh............ss.....sh..chsc...cSh-ho.....hR..WAcRshcta........................................phspppu..LFG...IlQ..GuhacD...LRc.pS...scsLs.p.............t..FsGYA........lGGLu.VGE............s+..ppMh.c......ll-...hsss..........hLP.........................................pc..KPR...YL..MG.VG..pP-......sLl.....-u.V.t.+.G.lD.M.FD..........CVhPT.Rs.A....RNGphh.TspG....................................................................................................................pl.........pl+NA..catpDh.pPl...D....p.Cs..C.YoC......................................p...........s.YS.RAYL+HLh+sp....E....h..........l....G....hpLsolHNLpah.pLMpplRpAItp..s..p..htpFhppFh.t...................................................................................................................................................................... 0 514 934 1253 +4618 PF04858 TH1 TH1 protein Kerrison ND anon Pfam-B_6070 (release 7.6) Family TH1 is a highly conserved but uncharacterised metazoan protein. No homologue has been identified in Caenorhabditis elegans [1]. TH1 binds specifically to A-Raf kinase [2]. 20.00 20.00 22.40 21.10 18.70 18.00 hmmbuild -o /dev/null HMM SEED 584 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.84 0.70 -6.36 5 182 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 117 0 129 184 0 427.00 40 78.05 CHANGED Ms-DaEs.scuWspusGcGpttu--supuE..sEsssuVlpECLc+FuT+DYIMEPuIFssLKRYFQAGGoPEsVIphLSENY+AVAQMsNLLAEWLIlTG.......VcPscVQAsVENH.............................................LKsLllK+FDP+KADuIFTEEGETPAWL-pMIcHcTWRuLlY+LAEcaP-CLMLNFTIK................LISDAGaQuEITSlS.TAAQQlEVFSRVL+TuIssILsuGsDDl++s..IEElA+MVCHGpHTYVFuQlMluVLuQEpcGGos......s+RLSQEIp+aAtcpspsATsITlA..LuuSAsaPpACpALuuMLSR................GALNPADITVLa+hYoSSD...PPPV-LIRsPsFL-LLlsuLFKsGVKI.Ns-HKuKYhaLLAYAASVh-..tKp.ps..-cslsKDELKuT..AIEcAHAIhCNss+GsSELlA-lsTLYsCI+aPVVusGVI+WV-ssVTEPuYF+LsT..-osPVHLAlLDEVAssHsuLQsQVLcLLl+LFE....S+pDEL-Ih.QLEhKKsLLDRMVcLLARGaVlPVloYlspChcR.DTDlSLIRYFVTEVLElluPPYSsDFVQLFLPlLEN-sIsGoM+uEG-sDPVoEFI..VHCKu+ahol ..................................................................................................t..t.h..pcthpth.p.DhIME.Pslhs.....lp..pYhps........GG..sP...pp..........llphLSpsYpuhAQhssLlucWLh.hs....................hp..plpthhcsp................................................LKphlhcpFDPpKADslF.otp...u...p.....sPtWL.pphI....t..................c.pWRpL...hYpLAE..t.aPc...CLhLsFsl+...................hI.SDAGaQs...EIsSlo..TAspphpVFo+VLpsslsphlp........s.....p.-.s.hpps....l.-hs+hlCpupHTYlauQshhph......Lspc....t.ps.s.s................h+Rlu...QElpp.A.ppt.psss.........hphs...........Lssu...s........t......s...ps.....ppulsuhLsp...............................ssL...ssuDlp.hLac..as.s.c......PPPlphlR..Pthl-lhh..psLFp.s.u.pl....s........ca+.cahalLAYAussh-......tpt..p........................hsp..-..-.lcs.......T..pAlEp....spsl.p..s.s..tu.t.....s...chh.uplt.pLhpsl.chPlVuhGVl+Wlc.sl..ps.p.aa..p..h.s....-psP.haLslL.-ElsshH.hhp.plLplLhplh-..............spt.s.pl-hh.th.Eh++hllD+hVpLlo..pGhVlPVl.............palpps.htp.phDhSLIRaFVoEV.....................L-hlsPPYos-Flphalshlpstplh.ssh.........t.....ht.Fh................................................................................................... 0 63 75 106 +4619 PF00314 Thaumatin thaumatin; Thaumatin family Finn RD anon Prosite Domain \N 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.25 0.70 -4.87 124 1409 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 247 50 615 1398 11 174.60 39 73.41 CHANGED NpCsaTVWPushss...u............GGhpLssGpo..hsls...sPsu..h.uGRlWuRTsCsFsu.............u..pupCtTGDC..G..GtLpCs.G.s.GtPPsTLAEaoL..........t...t......DFYDlS.LVDGaNlPhslsPp...........s....sC.....ssuCss.....................................-lsspCPs-Lps......................................uCpSAC...........ts.....FtsspYCC.....ts.ststsCt....PotYSphFKptC..PcAYSYAhDDsoSo..FTCs.....u...ss.YplsF..C.P ......................................................................NpCsaTVWsuhhss.........s...................sG..ht..L...s.Gpo...hsls....ss..ss.....h..uGRhWu........RTsCshss......................sG..phpCtT..GDC....u..G.....hl.....pCp...u..s...ut.P...P.s.TLA..EasL...................st..ss....Da..YD...l.S.LVDG.a.NlPhthsPp...................t...sssC....psssCss..............................................................................................................................................s.ls.t.....CPsp..Lps.....................sh................uCps.sC..................................s.....a.t.p.sph.CC............................s.......tsC.......................so....a..SphF.Kp.t.C.P.pAYSYshD.D.....o....os......ao.Cs........s......ssY.lhF.C............................................................................................................. 0 129 377 515 +4620 PF01946 Thi4 Thi4 family Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes Swiss:P32318 a putative thiamine biosynthetic enzyme. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.60 0.70 -5.26 8 671 2012-10-10 17:06:42 2003-04-07 12:59:11 12 29 532 20 351 23606 13980 162.40 35 48.14 CHANGED SRAhscpYacDLlcYAEoDVlIVGAGsSGLoAAYYLAKp..sLKVAIlEpplS.GGGsWhGG.LFsthVVc+PA+thLcElGItYE-pschhlVtcAA.FsSTlhSKslp.sslKlFNussVEDLIlR-......spVuGVVhNWohVphu..H.....hDP.TIcA+...............lVlsuTGHDushuuhsVKRl..h.ts..tclsG.+uh.hspAEcslV..+sTREVhPGLaVuGMtlutlcGAsRMGPhF ..........................................................t.........h....p.....h...c..h..D...VlIVGAGsuGLs.uA...h...h...L......u...ct.........sl....+....l.s.l..l...E......p........p......h.......t...........G.......G........s......h..............h..........h........G.......u...........h.....h...........................h.................h................h....p......t........................................h..........h...................t....h.........t...........h.........................................................................h.....................................................................h........h.................h..............s.h...h.........h......hp...D.lh.h.........................l......u.h.................................................................h.DP......h....................hhht.sG.H..s..............pp............................................................................t..s.h....uE..hh......s..................................................................................................................................................................................................................................... 0 103 208 290 +4621 PF01964 ThiC ThiC family Enright A, Ouzounis C, Morett E, Bateman A anon Enright A Family ThiC is found within the thiamine biosynthesis operon. ThiC is involved in pyrimidine biosynthesis [1]. The precise catalytic function of ThiC is still not known. ThiC participates in the formation of 4-Amino-5-hydroxymethyl-2-methylpyrimidine from AIR, an intermediate in the de novo pyrimidine biosynthesis. 25.00 25.00 27.60 27.60 24.10 24.00 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.32 0.70 -5.96 113 2842 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 2688 6 832 2312 704 430.20 62 77.26 CHANGED TQhchA+cGllTtEMchVApcEsls..................................sEhlRpplApG+llIPsNhsH...phcPhuIGcuhpsKlNANIGsSsstssl-pEl-KhchAhcaGADTlMDLSTGG.clcplRctIlcso.slPlGTVPlYQAh....tchpstlh-ho.-.hhcslccQAcpGVDahTlHsGlshchlpthptt.pRlhGIVSRGGSlhsuWMhtppcENPLYppFDclLEIh+cYDVolSLGDGLRPGsltDAoDpAQhtELhsLGELscRAhctsVQVMlEG.PGHVPlspIctNlclpKclCcsAPFYlLGPLVTDIAPGYDHIouAIGuAlAuhtGAshLCYVTPuEHLuLPsh-DV+EGlIAtKIAAHAADlAKs..hssAp.c+DptMucARtshDWccQFpLulDP-+A+ph+ccshst......pschCoMCG.caCuh..+lspchh ..........................................TQhcYARpGIITsEMEalAlRE.N.h.s.............................................................................sEFVRcEVApGRAIIPANIN.H...PE...E.PMIIGRsFhVKlNANIGNSAloSSI-EEVEKlsWAhRWGA.DTlMDLSTG.c.pIH-..............TREWIlRNS.PVPIGTVPIYQAL....EKVsGhsE-L.TWEh.FRDTLlEQAEQGVDYFTIHAGVhL+alPhTA...cRlTGIVSRGGSIMApWCLs.HH..pENFLYpHF-EICEIhttYDVohSLGDGLRPGSItDANDcAQFuELcTLGELT+hAWca.DVQVMIEG.PGHVPMphIcpNMchphchCcEAPFYTLGPLsTDIAPGYDHITSuIGAAhIGWaGs.A.MLCYVTPKEHLG.LPN+-DV.Kp...GlIsYKIAAHAADLAKG...HPG.Ap.hRDsA..hSKARFEFRW-DQFsLuLDP-pARtaHDETLPp-s...........tKsAHFCSMCGPKFCSM+Iop-lR....................... 0 268 553 714 +4622 PF02568 ThiI Thiamine biosynthesis protein (ThiI) Mian N, Bateman A anon COGs Family ThiI is required for thiazole synthesis, required for thiamine biosynthesis [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.08 0.71 -4.86 8 2899 2012-10-02 18:00:56 2003-04-07 12:59:11 9 10 2786 3 551 4081 1379 188.10 39 45.56 CHANGED GopG+VLuLLSGGIDSPVAuahhhpRGscVshlaFhstshpstcshcKlc+LAplLucaps.hph+LhVhsapclQc-lhc+s....sEshpsVlh+RsMh+lAphhAcchsscAlVTG-uLGQVASQTL-NLpVIpsso.shsIlRPLIGhDK--IlclAKcIGTa-ISIc.c.-sCohhs.K+PsT+AchppVcKhcpclph ................................Tpt+slhL.lSGGlDSs.VAuYhh..h.+....R...G...l...c..lch.l.a...F..................s...h.......s.....u....s....p.......s..h.......t...+......s....p......s........h......t...p....h.....h.....s....c.....a...u........u.................p......l.......c.....h......h....t....V......s.....F.....s.....p...l..........t.....c..I..h....ccs.................sc...s.......h....h.......l.l.......h...+.R..h.....M..h.Rh.A.s..c..l..s..c.c.h...s.s......u.l...l..TG.......E..u.....L......G.........Q..V........u......S.........Q......T......L..p......s..........h.p..............s....I...s..s.........V....o..........s...........s.............lL..R....PL..l..s.......h.......D..Kp-IIplAccI...sT.......-huhp....-hCsl.hs..cpPpscshhpchpt.Etph......................................................................................................................................... 0 185 341 458 +4623 PF00975 Thioesterase Thioesterase domain Finn RD, Bateman A anon Pfam-B_180 (release 3.0) Family Peptide synthetases are involved in the non-ribosomal synthesis of peptide antibiotics. Next to the operons encoding these enzymes, in almost all cases, are genes that encode proteins that have similarity to the type II fatty acid thioesterases of vertebrates. There are also modules within the peptide synthetases that also share this similarity. With respect to antibiotic production, thioesterases are required for the addition of the last amino acid to the peptide antibiotic, thereby forming a cyclic antibiotic. Thioesterases (non-integrated) have molecular masses of 25-29 kDa. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.24 0.70 -4.13 38 5677 2012-10-03 11:45:05 2003-04-07 12:59:11 15 590 2120 48 1644 8115 689 222.20 18 14.88 CHANGED tpLhshP.uuG.usphapsluptlss....hshtslphsuct.......hpshpplscthhstlpphtscs...PaslhGaShGuhlAaElAppLppp...GhtspsLhlhst.sPhh.sttttstthspst...............hhsthpchsth...st.hhtspphhtthlssltushpshtsht.............thshpt...........................hthhhtssc.hsshs....spt.............Wpphsstshp..hchhsGsHFhlhp...t....plhptlpptl .......................................................................Lash......s..u..u...G..sshta...t.s..l....sp....t....Lssp.......hslh..u..l..p.....h.suht.............tt.............h....p....s.....l.....p........p...h......s....p....t......h.......h...p...t...l..........t..........p.....h...........t...........s.....p............s........P............a............h.............L.h.Ga.S.h.Guh..lA..a-...h.A.........p...p....Lppp.........Gp.p.......l...t.tL..h...l...h.....-....s.....h...s.....P..t...h.....t........t...t...t.......t...t...h.ttt.t....................................................................hhttht..p.ht.th...........s.t..h....h.t..p....t....t...h..h.......t.....h..h....h...s..h....h...t.....s...s..h.t...h..h..t.ph...t.........................................................................................h.hh.h.h...s.t....t.....s............s....h..t.............t...............................................................................W.t.t.h.h.t....t.t.ht.......hh..h...s.u.s..H.h.hht....................h................................................................................................................................................................................ 0 397 893 1351 +4624 PF01289 Thiol_cytolysin Thiol-activated cytolysin Finn RD, Bateman A anon Prostie Family \N 19.60 19.60 20.70 20.50 19.10 18.80 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.59 0.70 -6.12 5 995 2012-10-01 20:08:01 2003-04-07 12:59:11 14 5 457 12 47 624 8 335.40 41 87.32 CHANGED ss-pIDcaItGLNYNKNcVLsapGEuIcNhsPKEGhKcuscFIVVERKKKSINTNsuDISVlsSloSRTYPGALllANc-LlENQPDlLsVKRcPlTlSIDLPGMsNpDNpIsVpNPTpSNVssAVNsLVs+Ws-KYSpsa.NlPARlQY--pMAYScSQLpAKFGssFKslNNSLsIDFsAIScGEKQlpVluFKQIYYTVSVNtPsNPSDlFsKSVThE-LQp+GVSAEsPPlYISSVAYGRsVYVKLEToSKSs-VcAAF-AAlKGsSVpussEacsIl-NSSFKAVILGGDAp-cscVVTGDlsslRDlIK-GAsFo+KNPGlPISYTTsFLKDNplAsV+NNTEYIETTSTAYocGKINLDHSGAYVAQFpIoWDElSYDscGpEVVT+KsWDcNs+DKoAHFTTSIPLPGNARNIsIaARECTGLAWEWWRTVlD-RNLPLVKcRNVSIWGTTLYPpsSsTV ....................................................................................................................................................................................................................t.hasuul..ssp.h....ps...t.l.....h.t.ht..phphs.Lsu......s.h..h.p...ss.ushttu.lsplltph..t.t.....t.......sphpYp...s.o.ppl......th..s..ssh.....p....h.......ht..s.sltlsFt....ul.....tp.Gchp..........l.......hsaKQI...a...YsVss..s...sp.pPuch..FtpsVThcpLpthGlsspt..P..salSsVuYGRplYlKLp.Ts....S...+....S....p..c........Vp....A........A....FcAh...l...p........s....h....p....l..t..s....p....s....p..h....p..pI..h....c....pophpAVlhGGssp.ptsp....ll...s.t.p.......hs..l.c...sllp..csup.a.stc..pPuhP.IuYTosFL+DN.....hAs..h.pN.s..o-Y..l.ETps.p.tYpsucl.slDHSGAYVApa.IoW-ElsaDppGpEllp.+sW-tNspshTAtFspsI.l.tN........hRNlplhhccpTGLhWphWpTlh-ppsl.h..phplohhGTTL.sphp................................................................. 0 13 36 41 +4625 PF00108 Thiolase_N thiolase; Thiolase, N-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Thiolase is reported to be structurally related to beta-ketoacyl synthase (Pfam:PF00109), and also chalcone synthase. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.71 0.70 -5.41 22 14120 2012-10-02 12:25:54 2003-04-07 12:59:11 18 36 3652 158 4432 24361 7473 245.80 34 62.42 CHANGED hcsVVIVuAsRTPlG.uatGuhpshsAs-Luuhslcuslc+ss..lssp..pVs-lIhGsVLpuut.GpssARpAuLtAGlPpplPAhTlN+lCuSGLpAlshuuQtItsGpu-lllAGGhEsMSpsPahh.....+sGh+hG.sh.hhD.hl..............hDuLhsshsthhMGlTAENlAcpasISRcpQDpFAlpSppKAttAhpsGpFpsEIlPVpl....p+Gc...hslspDEt.R.ssThEsLupL+PAFpKt.GTVTAGNAStlsDGAAAlllMocs ......................................................................................ppssIls.uhRTP......h..........G....p.......h........t......G.........uh.....p..s...h..p..s.s-...L...uu..................hsl...c...s...h...lp.R...........s..................s.....l.s.s.p.................tl...-....-....V....l.........h..............G....s......V...h..p......s...u..........p.......G...t....s....h...A..R.p...u.......u....l......t...A.......G...........l..P............s......l........s...u...hT......l......N..+h...CuS.Gh.pA............lth...AuptIt....s.......G.....p.s.-....ls.l..A.GGs...E....s..M...S..p..s.P..h.hh...............p....h.s.....h..p...h.....s.....p...............s.t..hh....................................s..s....h....t...s....h.....h.....s....t......h......t......M...G.......o..A..E..p.........l...A..........c..p.....a........s.............l..oR....-....p.Q............Dta.A......h.pSp.p.+.................Att..A..h..p..s.G....h.F..........p.....p.E..I..l..Pl...ph...............tp....G....t......................hhhs..p.....D........Et...R......s.........t.........oo.........h..E.t.........L.up.L.........+..P...s.....F.....p..........t......t......................G.................o..........V......................T.A...............GN.uSslsDG.AuAlllhoc............................................................................................................................. 0 1259 2619 3654 +4626 PF02803 Thiolase_C thiolase_C; Thiolase, C-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Thiolase is reported to be structurally related to beta-ketoacyl synthase (Pfam:PF00109), and also chalcone synthase. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.41 0.71 -4.69 22 14316 2012-10-02 12:25:54 2003-04-07 12:59:11 13 37 3621 158 4583 11836 4437 125.10 38 31.49 CHANGED lpPLA+IhuaussGVsP.plMGhGPs.Asp+sLc+AGls.lsDlDlhEhNEAFAuQsLust+plshD..................pKVNlpGGAIALGHPlGsSGARIlsTLlpphpccs.uphGlAohCIGGGtGsAhllE+ .................................................h.pPhAcl..h..uhussGs.........-........P....phM...s..h..G.P..s......A.s...p.......+.sLc+.....u.G.......ls....l....s.D....l....DlhE...........lNE....AFA.....u.......Q.sL.us.h...+...p...L.....u......l...s..t.............................................p..+..l..Nl.sGGAIA.lGHPlGAo.......G.......ARlls.oLlpp.......L.......c.......+............c..............s.......s.............p.............h..........G......l......s......o.....h....C..l..G..GG.GhAhllE......................................... 0 1267 2699 3788 +4627 PF00085 Thioredoxin thiored; Thioredoxin Sonnhammer ELL, Eddy SR anon Prosite Domain Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond. Some members with only the active site are not separated from the noise. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.57 0.72 -4.35 50 24231 2012-10-03 14:45:55 2003-04-07 12:59:11 15 236 5259 369 9501 25842 8046 102.30 23 50.59 CHANGED slhhlssps..Fcphltp....s...c.VlV-FaAsWCG.CKtlAPha-clApchpt.....slhlsKlDss.cs..tsluscasVcuhPTlhha+sGptss..chsGu.hstsslhphlppp .................................................h...hstts.a..p.p.....l....t.p............p...........t..s.l..ll........c.....F..a.....A....s......W..........C...u......s....C.....+....t...l......s......P......h....h.......c...c...l...u...p...c...hpt...........................pl...p..h..s....+.........l.......-...........s...........-........p......p........................p..............l......s.................p.....p......a......s..........l......p........u......l......P.T...l..h......h....a.......c.......s......G....p.......ht...........ph........Gs....hs.tp.tl.tphlt..t...................................................................................... 0 3323 5599 7830 +4628 PF00585 Thr_dehydrat_C Thr_dehydratase_C; C-terminal regulatory domain of Threonine dehydratase Bateman A anon Bateman A Domain Threonine dehydratases Pfam:PF00291 all contain a carboxy terminal region. This region may have a regulatory role. Some members contain two copies of this region. This family is homologous to the Pfam:PF01842 domain. 20.80 20.80 21.20 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.83 0.72 -4.39 16 4280 2012-10-02 00:29:19 2003-04-07 12:59:11 13 7 2590 4 1023 2991 981 90.50 33 30.33 CHANGED EtstlttspEtlhshphPEcsGuLh+Flpslush.s.IThFcYRtpss.cpuplLsGlplsptp-..s-hhpplpchuassh-hosscshphaL ................ERs.htptpEtlaslpFPEcPGALhcFl.p.tl....u....s...c........s..IThFpY.R.......p......p........s......s....c.......t......u..........plllGl..p....l.....s.....p.....s...p-...htplhp.pL..p..p.h...G..........a.shhDlocsphh+hal........................................... 0 240 560 825 +4629 PF04163 Tht1 Tht1-like nuclear fusion protein Wood V, Finn RD anon Pfam-B_64620 (release 7.3); Family \N 21.00 21.00 21.10 21.00 20.10 20.90 hmmbuild -o /dev/null HMM SEED 544 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -13.13 0.70 -6.11 2 96 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 92 0 70 102 1 333.90 18 73.00 CHANGED .hF..hR...hha.FhIhIpFh.oSE.hGclpshhphp.lhao-s.suhssLtpha...hhKSTChp-shthhlspC.lhNs..oID..sRlcoAIpholC-FptStl..hPppChh...GS..-Ch.cLESoSpWWhoaoupapclsplCh.s.L.hpKE..lcl.hNlT.l.cpF...l-hhl.HLh.Fp..pDp.N.hlDchsthF.N.s.E.p.ts.s.Rl..E..sLs.hpN.....hVp.c.lhpT.cQLcspIh-hNS.hhN...Essshp.plhpcLs-chs.cNsIt.StIschps-..s.hpc....LlphopD.lpp.hppN.chVNo....L.shohtLKcpLtuhp+..SEpQ.hpt.plLQhhsshLpsSh...hs.p.l..hhshhpphhp.ahthhSsl.usFAhhs.plFuThssl..chLphp++.l.sh.plpl.hlHh.shhhh.hV..ahWhT.phIhR.hualtlp.+haaL...LCull.al.a.KYRsS+hssch...lPh.c....Ya.pahtl..s.YLsshpsSLIDh ...................................................................................................h....................................................................................Chp.Aht...hh.pC..h.....pu.....p..s....l..s.............tphp..s.....AhpLulCEhpsus.h..phPppC.s..............................t.p.h.ts...ClpsLEu.ssQaWToYSushpphsshC..ptsphshEK....-phLpla...pslTclhpph..........h.pt.h...p.p.hh........p...p.p.p.pt.p.....h....h...ppltthh.p..............p..............t...p....t.....p...h....t..............h........htt..........................h..p.............t.....h.t............................................................h....................................................................................................................................................................................................................................................................................................................................................................t........................................................................................................................................................................................................................................................................................... 0 17 39 61 +4630 PF02926 THUMP THUMP domain Aravind L anon Aravind L Domain The THUMP domain is named after after thiouridine synthases, methylases and PSUSs [1]. The THUMP domain consists of about 110 amino acid residues. The structure of ThiI reveals that the THUMP has a fold unlike that of previously characterised RNA-binding domains [2]. It is predicted that this domain is an RNA-binding domain The THUMP domain probably functions by delivering a variety of RNA modification enzymes to their targets [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.63 0.71 -4.31 48 6107 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 3468 22 1490 4208 349 137.50 18 30.26 CHANGED pclpphh.t.thphphh.tthttlhlhhst.....ttptltphhthh...........hl.ph..h...........hts.tsh-clhctstplhpcphht........oFtVcscptspp................phsuh-lpctlGphlhcphs........pVcLppPDllltlElhpp.pshltlpp ...................................................................................................................................hh..........................t.h.l..t...t............shptlhc.....shhhlh................................shp+.lh..s.........................tphsh.p.....s..h..c.-.la.p.t.s.h..s...l.sh...pph.hsht........................TF.sV....p.....scptscph...............................pass.hclpc.hlssulhcpht..............................pspVsh...p.s...P.....D.....lp...lplpl.pc-..tshl.h...................................... 1 431 822 1180 +4631 PF02511 Thy1 Thymidylate synthase complementing protein Mian N, Bateman A anon Pfam-B_1648 (release 5.4) Family Thymidylate synthase complementing protein (Thy1) complements the thymidine growth requirement of the organisms in which it is found, but shows no homology to thymidylate synthase. 20.10 20.10 20.40 20.30 19.60 19.90 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.35 0.71 -4.94 163 1591 2009-09-11 08:36:29 2003-04-07 12:59:11 10 9 1413 90 459 1297 3344 199.20 24 79.78 CHANGED Vp...........Llsh....................s........s....s-phlstAARssass...........tph.st................................................tcspthl.chlhc..puH.....hSshEass....hoFtlc.soRuls+QllRHRh.hSa.sphSpRYs.......................................................p.hpph..t.........ahl.P......phpp.s....................htp...hhpcthppuh..................................ctYpcll.pt..........................u......hup....EtARhlLP..ustTclhhohNhRsLh+.FlpLRss........ppAQh.EIRplAtthhc.hlpc...hhPhl ......................................................................................................ss.thh..AuRhsats.......................ph.t..........................ppspthl.chl.hc......htH..............tSs..hEass....hoFtlp.lS+sst+..pl..hR......HRh..sSh.......s..phSp..RYs...................................................................p.hpp......p....................ahl.P................phpp.t...............................................tht.chhpp..sh..ppuh...................................csYppll...pt................................................................................s.hup......EhA.RhlL.P..tu.hpTchhhohNhRs.Lhp...Flp..LR.s.s........pc.A...ph...EIRp.lAtthhch.ltphhP..h................................................ 0 198 347 413 +4632 PF00303 Thymidylat_synt thymidylat_synt; Thymidylate synthase Finn RD anon Prosite Domain Swiss:P28176 is not included as a member of this family, Although annotated as such there is no significant sequence similarity to other members. 20.60 20.60 21.20 20.70 20.30 20.00 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.71 0.70 -5.39 141 4583 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 4044 372 1007 3278 1831 266.00 44 92.72 CHANGED -ptYLcllccIl.......cpGpp+..........sDRTGsG...ThS.lFG..hphRac.Lpcs.......................FPLL.TTK...+Vah+ullpELLW.FlpGsTssphLp.....pp.sl+IW-t.u..............pph...............................................p.G-L.......GPlYGhQWRc...ass.............s..........................pt..lDQlpplIcpl.Ks........NPsSRRhllsA..........WNssDl.......spMALPPCHhhhQFaVs.............................c..G+...............................LSCpLYQRSsDhhLGlP.FNIASYuLLTcMlAphs.....G.....LcsG-FlHshGDsHlY..................................................pNHl-..tl.cpQLpR..pPpshPpLpl....p.....php.......sl.ca....ph-Dhp..l.sYpsas.tIc...................h.hAV .................................................................................................h.ptYl-Lhpc.lL.......cpGs..+..........................sD.RTGo.G.....ThS.l.FG..tQhRFs...LpcG.......................FPLl.TTK......+lhh+ullpELLW..FlcG.sT.......NlphLp...........cp.slpIWD.-WAscs........................................................................................................................................G-L.........Gs.V..YGh.QWRp...Wss....................G.............................................................................................................................pt.lD.Qlpp.llcpl...K..p........s.P.s.S...R.RhI.....lSA...................W.Nss.-l...................cpMALsP.CH.s..haQF..Y..Vs........................................c..G+..........................................................LSCpLYQRSsDlaLGlP...F.N.IASY.A.L.....Ls.pMl....Ap.s..............G.......L.c........sG-FlaohGDsHlY....................................................pN.Hh..-..p..s..c...Q..Ls............R............-P..t...s.h..P...pLhl.............s....csc.........................slaDa..............ph-Dhp....l.sY..csHs.tIK.hslAl...................................................................................................... 0 318 610 831 +4633 PF02223 Thymidylate_kin Thymidylate kinase Bateman A anon Pfam-B_484 (release 5.2) Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.27 0.71 -4.92 25 5133 2012-10-05 12:31:08 2003-04-07 12:59:11 12 17 4611 113 1395 4369 2991 185.50 31 82.85 CHANGED lEGlDGuGKTTpsphltctLpspuht..hhho+EPsuo.lGchlRphLh....ptphsshs...tslLFsAsRhpclt...phItssLppuphVIsDRahhSolAYQuh.tpt..hchlhplspsshh....pPDlslhL-.lsschulpRhpt+sp......ch.tcphchhpph+ptahcLsp..spphhllcAsps.lEplpppI ....................................................................................................hEGh-GuGKoTt....h....p....h..l....h....p.t.L........p....p.............p..........G............h.............p.......hl.......h..TR.E....P......G...G....o.......l......u......E....p.l...R.pllL...........pspp.h.ssps...............EhLLah..A.u..R..tpclt.............ph.I....t..P.....A...L....p.....p....G..p......h..V.........l......s............DR.....a.........h.....s......S.o........hA........Y.............Q........G......s........u........R......s.................l.............s.............h..........p..........h.........l.....t............p.....l........p.....p........h.....s..........hts........hpP............D.L..T....laLD..lss.-...h....u....h....p....R....ht.t.psp.....................s...Rh-.p...E..s....h....c.....F....a...p...+....l....+p.t...Y...h....p........l...s....p.........t.....s..p.......c..........h.......h..h..l...D.A.....s....ps..h-pVhp.................................................................................. 0 460 873 1164 +4634 PF01290 Thymosin Thymosin beta-4 family Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 22.30 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -8.06 0.72 -4.36 25 536 2009-01-15 18:05:59 2003-04-07 12:59:11 15 14 139 15 223 588 0 39.70 49 73.45 CHANGED uDcPchsE.....lssFDKscLKKTETpEKNsLPTKEs.I-QEKptp ....................thsE......lEpFDpsKLK+TETpEKNsLPoKEsIcpEKpt......... 0 80 94 160 +4635 PF00086 Thyroglobulin_1 thyroglobulin_1; Thyroglobulin type-1 repeat Bateman A, Sonnhammer ELL anon Swissprot_feature_table Domain Thyroglobulin type 1 repeats are thought to be involved in the control of proteolytic degradation [2]. The domain usually contains six conserved cysteines. These form three disulphide bridges. Cysteines 1 pairs with 2, 3 with 4 and 5 with 6. 20.60 20.60 21.30 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.01 0.72 -4.62 20 2274 2009-01-15 18:05:59 2003-04-07 12:59:11 13 161 191 10 1102 2134 3 67.10 28 20.34 CHANGED Cphphtph...h............tpsspshYlPsCc.ccGpYpshQCp.....ppG.CWCVDst.GpclsGo.ptpGc..spC .......................................................................ttthtsha...l.P..p.Cc..cpG..t..ap.s............h.Q.Cc.............................psG..hCWCVDtp..G.ppl.s.G...o...p.h.t..sp..spC................................ 0 270 360 665 +4636 PF04278 Tic22 Tic22-like family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The preprotein translocation at the inner envelope membrane of chloroplasts so far involves five proteins: Tic110, Tic55, Tic40, Tic22 (this family) and Tic20. The molecular function of these proteins has not yet been established [1]. 20.10 20.10 20.50 20.60 20.00 19.80 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.49 0.70 -5.14 13 173 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 89 1 101 185 2 211.40 21 70.51 CHANGED hp.s...............................t..t..psh.R.shsh.......hhhsshAhuu.hsstt.........t.AhuLot..........c-Vhc+LsuVPVFTlssosspPlLsospst....spSluhlalsppDAcsh.Lspl+pppP................-h.upsspVssloLuplYclspt.tt.........csptlsFpFlP-spQlcsAhplhcpsGpph.spFpGVPlFhucu..........LslppcscphhPhFFsK..........EDLpttLc+hppppsclssuhp...IcVssLEsllcshcsuc.ssth-cllFIPPtcul-ah.pphpp ...................................................................................................h..h..........................................................................................shs...........tpl.phLtslPVahlsstps....t..l.....hht..p..................ttls.h...hhh..ptp-Apth.lt.....p.lpt.....tp..............................h..ttthpVhslsL..sps.Y.ph..............................pspsl.hFpalPs.ppl.......psAh.......pl.hpp..p......spt...pt...........ht..G......V....P.lF.scs...............L..sl..p...p.t....s.....c.p........hhPhFFpK...................pslpp.lpph.ppp.p.....t..........t.p....I...tVhslpslltthp.p.....p..pt.hpphhhlPst...ht........t........................................ 0 33 73 93 +4637 PF01826 TIL Trypsin Inhibitor like cysteine rich domain Bateman A anon Bateman A Domain This family contains trypsin inhibitors as well as a domain found in many extracellular proteins. The domain typically contains ten cysteine residues that form five disulphide bonds. The cysteine residues that form the disulphide bonds are 1-7, 2-6, 3-5, 4-10 and 8-9. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.41 0.72 -3.85 167 3970 2009-09-12 21:20:27 2003-04-07 12:59:11 12 461 156 15 2435 3452 1 56.00 32 11.06 CHANGED C.s..ssppap.pC..ss..sC..toCsshps........Cs.........tt..ChtG..C....h....C..psG.al..hsss......pCV.hscC ..............................C.s..sspt.a...p...tC......ss...........sC..st..oCpshsss.................Cs...................pt..C..h...pG..C......h........C..............ss..G...hl.........hsss........pCVshppC............................. 0 585 787 1646 +4639 PF00121 TIM Triosephosphate isomerase Sonnhammer ELL anon Prosite Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.63 0.70 -5.31 56 7470 2012-10-03 05:58:16 2003-04-07 12:59:11 13 10 5835 333 1528 5657 2745 212.10 40 91.49 CHANGED hhlsuNWKMst...shpcspshhpp.hstthts......slc...lhlsPshshLps.spphlp.......ssplt..lGAQNsphps.p.GAaTGElSsphLpDhGspaVllGHSERRphat...EosphlspKhttulcpGLpsl.lClGEolpp...+csspohplltpQlpshlptlstp....t....sllIAYEPVWAIGT.G+sAosppspcsathIRphltph...stphupplpIlYGGSVsssNspclhppsslDGhLlGuASLcs-sFhsIl ...................................................hlhGNWKhNt...sht..t.stth...hpt...htthhss..............ts-......lsl.ssPhhhLst...s.t..ph.hp.......................s.s.p...lt......luAQ.....Ns............hh......pt......s......GAaTGElS.stMLp-hG.spaVllGHSERRp..h..a...t...............EoD....Eh.....lscKsttAh.pp.GLps.IlClGE.......oL.-p............RE.u.....G..p.......o...-..lltpQlcusltslstp...t.....hp..................plVIA.YEPl.WAIGT.Gc.s.A.o.s.ppApphpt.hlRphltt..........stthup.p..ht.l.YG.GSVpstNst-lhs.....p..s.cl....DGhLlGGASLcsptFhtl....................................................................... 0 528 988 1298 +4640 PF02466 Tim17 Tim17/Tim22/Tim23/Pmp24 family Mian N, Bateman A anon Pfam-B_2241 (release 5.4) & Pfam-B_7792 (Release 8.0) Family The pre-protein translocase of the mitochondrial outer membrane (Tom) allows the import of pre-proteins from the cytoplasm. Tom forms a complex with a number of proteins, including Tim17. Tim17 and Tim23 are thought to form the translocation channel of the inner membrane. This family includes Tim17, Tim22 and Tim23. This family also includes Pmp24 a peroxisomal protein. The involvement of this domain in the targeting of PMP24 remains to be proved. PMP24 was known as Pmp27 in [3]. 29.70 29.70 29.70 29.70 29.60 29.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.81 0.71 -4.09 129 1856 2009-09-11 16:05:24 2003-04-07 12:59:11 14 27 332 0 1255 1746 12 125.60 21 61.48 CHANGED ssstchhhss..stuathGslhGuhhsh............................................................hhthh.pssshp...phptslpsstp...s.......hutshushuhlassh........csslpt.h.R.u...+....cDhhNulhAGshoGu.lhut.....ps....Ghpshshu.uhshushuus.....hpth......tthh .............................................................................................phh.ss.stuhhhGshhGsh.hthh...................................................................................................................th....psss..hp......php.t.s.hps..htp....tu........hu.ts...hushuslaush.............psslpp..h...R..s....................K.........cD.hNulhAGshTGu..lhuh...............ps.......Ghpsh.hh...u.u.hh.h.us.ht.uhhphh....h........................................ 0 381 672 999 +4641 PF04821 TIMELESS Timeless protein Mifsud W anon Pfam-B_3454 (release 7.6) Family The timeless gene in Drosophila melanogaster and its homologues in a number of other insects and mammals (including human) are involved in circadian rhythm control [1]. This family includes a related proteins from a number of fungal species. 25.00 25.00 26.50 26.40 24.50 23.50 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.53 0.70 -5.14 23 381 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 261 0 242 392 4 240.50 26 24.27 CHANGED -sssYtlu-DALtsL+DLh+aL+h.D-php..-ltRsLupuplVpsDLlslLspahp..........cs+ls.AslcLLVsLThPlEh....cs..p....ssst++ph..Lpph..uYKcAhhs...pplhpsllc.hhhsshthshu-RopcDpslIchlLhLlRNILtIssssptptcsD........-cpss+sphl.AhppQslhcLlLhlsSs..tppash.....hlLEIlhhhh+s....sspcLhtsstptotpc....sc-Lpshhc+Epu.p.....+thsp.sssoRHSRFGshhhVp .................................thYhhts-sL...tsl+Dlh+hl+h.Dppt...sltptlutsp.llpsDLlP.lLhphtp.........................................pppl.h.ss.lclhV.LT..Ph.h...........tt......................s.t...p..pph...pl.ph..sYKc.uhhp............................tphhtslhc.hhh...hl......t..h.s.......h.......tcR......stc-.........phllchlLhLlRNlLtl...st.s.....t..t......phpsc..................................psts....pspll.shp.p.sl.cLLLhlso...s............p....p....p...ash........hllEIlhhlh+s..............ssp..pL...h...t......s.t.........p.t.........s..h......p..c.....................................................t........t.............p..............L.pshh...p.pE.tt..p.....................pthht..p.ssRHucFGs.h........................................................................... 0 90 138 205 +4642 PF05029 TIMELESS_C Timeless protein C terminal region Moxon SJ anon Pfam-B_5695 (release 7.6) Family The timeless (tim) gene is essential for circadian function in Drosophila. Putative homologues of Drosophila tim have been identified in both mice and humans (mTim and hTIM, respectively). Mammalian TIM is not the true orthologue of Drosophila TIM, but is the likely orthologue of a fly gene, timeout (also called tim-2) [1]. mTim has been shown to be essential for embryonic development, but does not have substantiated circadian function[2]. Some family members contain a SANT domain in this region. 29.40 29.40 30.00 29.90 28.30 28.90 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.88 0.70 -5.58 5 232 2009-09-11 11:39:37 2003-04-07 12:59:11 8 10 182 0 164 239 1 364.30 22 35.30 CHANGED hLHRIAaDLcMsuLLFQLSLFsLFp+lLS-Puussp.....KELcpls...+allRKFlclAssNPKlFVELLFaKssuss+ElspGYsst-stosSKRA.....sWosEEEupLc-Lalsspcl...EspDVV-WILcsLssssRsR+sll++Lh-...hGLtDoscDhhpsKS.......A.ppKupphlLWT........................uDp-l.ELpcLa.......--aRDssDlLs+l.............hcshsu+RS+AclVcK...LLsLGLlo-+RpLp.hKK+RKKpusu.h.sst.p.cs....htE-...E-p...hPcpcsccsccch..utsspsssslcs.pLtppl+pEshphsLhWLQssLhcAA-DcEED.tssQulPLlPLpEcsEEAMENcQFQ+lL+pLGlRsPt...SGpEuaWRIPhhLssspLhhhut.Lut.E..tE.c...EhpschsGEp.u-..Ep...Eccsphhct..pARpRKtGlsoscpc.ths.........cp..+usPKtpph............................................pttptppht........uo-u-pEc-D..hDE......sp.hE.psp-ts..s+p.ttsts-h...sshs.tp..c.s.hssDs-pAD-.usPch+c-KRh...............................tpls.tP.pRR+Lt...llEcDDE ..........................................hhaRlA..achchpshLFplslhplF.ch........lps......t...shs.........+Eh.phs....phll++hhph.h.tp.......p..s.....tlhsEl......LF.K..ts.sss.h.lp.Gatp......s....s.t.pc.t.................hhstt.tptp..p.plh....h.hts.....t.tt.-llphlht.Lss.s.ppR+t.....h......tt.....t.............uh......t..........h.......................................tpt......lhp..................................................................t....thp..lh...........tt.t....p.lt...............................hp......tt.......sp.t..p............hh..s........h..tt...tp.h...t...ttttt....t.............................................................................................................................t................t.......................h..h....tt............p...........s.t..............t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ttt.t..................................................................................................................................................................... 0 52 85 130 +4643 PF00965 TIMP Tissue inhibitor of metalloproteinase Bateman A anon Pfam-B_1239 (release 3.0) Family Members of this family are common in extracellular regions of vertebrate species 21.40 21.40 21.50 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.73 0.71 -4.68 25 416 2012-10-01 21:39:20 2003-04-07 12:59:11 12 6 133 18 187 403 1 147.00 35 78.34 CHANGED -ACSCs.P.sHPQpsFCsuDl...........VIRAKl....Vucp.hpsss...........thhpYclKphKhaKGhsp.....clphlaTssspul............CGlp.L-sN.tcpYLIoG..+h..DGchal..slCsalp.W-sLohoQ++GLsptYptGCs.C+Is........ChshPCh......lousscCLWTDh.hhptsh.upQucHhsC.lpcssupCuW ..................................................uCo.C......cP.Qp......taCp.uch...........Vl+uK..h...........lupp.stt.ss..............................hhhpYplK.hKhaKG.pp...........clph.....laT.s..sspul............CGl.p..Lcss..........pppYLls.G...ph....sGc.hal..slC.s..al....Wsp..Loh...sQ++....ul.....stpYp...hG...Cp....C.plh.............C.........h..sC.............hpu.scClWhsh....h.........p....h.t.............uhpuphhsC..h.p......s.CsW...................................... 1 51 65 110 +4645 PF03549 Tir_receptor_M Tir_receptor; Translocated intimin receptor (Tir) intimin-binding domain Griffiths-Jones SR, Moxon SJ anon PRINTS Domain Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation [1]. This family represents the Tir intimin-binding domain (Tir IBD) which is needed to bind intimin and support the predicted topology for Tir, with both N- and C-terminal regions in the mammalian cell cytosol [2]. 25.00 25.00 121.40 120.10 24.40 21.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.18 0.72 -3.85 3 132 2009-09-10 22:06:29 2003-04-07 12:59:11 9 2 104 8 1 79 0 66.00 83 12.33 CHANGED PDsAASsAEoATKDQLTQEAFQNPDNQKVNIDENGNAIPSGELKDDVVAQIAEQAKAAGEpARQQA PDAAAsAsEoAT+DQLTKEAFQNPDNQKVNIDE.GNAIPSG.LKDDVVApItEQAKAAGEpA+QQA 0 0 0 1 +4646 PF04553 Tis11B_N Tis11B_C; Tis11B like protein, N terminus Kerrison ND, Mistry J anon DOMO:DM04398; Family Members of this family always contain a tandem repeat of CCCH zinc fingers Pfam:PF00642. Tis11B, Tis11D and their homologues are thought to be regulatory proteins involved in the response to growth factors. The function of the N terminus is unknown. 19.10 19.10 22.60 21.20 17.90 15.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.80 0.72 -3.88 6 107 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 40 0 47 82 0 114.70 52 31.66 CHANGED MsTsVlSs.hFDh.-VhsptNKhhsas....s.......PSsus.........sLhDRKsVGTPuhsh.F.RRHSVTsss..............uKF..sQNQhlNs..............LKh-PS.....s.sTuhsNKEN+FRDRSFSEsGER...LL.....ppppPGG ............................MoTTLlSA.haDl..-hLsKs.......p.K.Ls...................s.shs..........shLD+KAVGTP.....suu.........G.......F.RRHSso..p.................sKF..ptNp.hh....uS.....................LK..E.Pu...........sAL..s+E...s+FRDRSFSEsGER...LL.....pQpQ.Gu.......................... 1 4 9 19 +4647 PF01108 Tissue_fac Tissue factor Finn RD, Bateman A anon Prosite Family This family is found in metazoa, and is very similar to the fibronectin type III domain. The family is found in cytokine receptors, interleukin and interferon receptors and coagulation factor III proteins. It occurs multiple times, as does fn3, family Pfam:PF00041. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.52 0.72 -4.31 59 788 2012-10-03 16:25:20 2003-04-07 12:59:11 12 11 75 71 325 782 5 101.60 24 26.72 CHANGED hhhhLlshlLhh..........ssssssslssPp......slphpShNFcplLpWcP.....sts.ssssYoVpap.......htpp..........................pWpsh........Ctphop.ppCDLTsthts....hptsYhsRVpuhsssppSsh ......................................hh.............................stlssP.p......slp.hp.S.....h.N.hc..p.l.L.pWps...........ts....s..tss.sYoVpap............hspp.................................pWpsh.........tC.tphs.p..ppCDlos.thts....hppsY..hsRV+AhhusppS........................................................................................................ 0 20 41 128 +4648 PF00265 TK Thymidine kinase Finn RD anon Prosite Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.29 0.71 -4.71 13 3293 2012-10-05 12:31:08 2003-04-07 12:59:11 13 11 3022 53 635 2063 1299 177.10 38 89.01 CHANGED GpIplIhGPMFSGKSTELlRplpRYpluph+ClllKashDsRauss.....VhoHDshphsAh....sostLp-lhsthpp..hcllGIDEuQFFs..-llpFs-phANtGKhVIlAuLcusFp+c.FssIhpLlPluEpVsKLpAlC.hCa+-.ASFotRLs......sEsplhlIGGs-pYlSVCRpCY ...........................................................................................pl.hhh.GsM.uGKo...p........pLLpt....s.....hp.a..p.............t.....s.hp..s.l....l...h.pssl.D.s...R..hutu................pVs..SR..hGhptpAh....................ss.s...h...h....p....c....l.....h..p.....p......h.........p..p.......p....h.....c...slhlDEsQF.....h.....s...pp.......l.h.p.l.s.c.l.l.s...p...h.s.l..sV..lsa......GLcsDF.+s-hFt.u.othL.L.s..h..A.D..c..ls..c..l..+..sI..C.....h..CG.....+..c.....AohshRlsps...............shh-ucQ.....l.....h...I...G.......G.....sE.........p......YhslCR+Ha............................................................ 1 226 409 532 +4649 PF00693 Herpes_TK TK_herpes; Thymidine kinase from herpesvirus Bateman A anon Pfam-B_186 (release 2.1) Family \N 25.00 25.00 26.90 34.60 22.40 19.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.94 0.70 -5.24 14 508 2012-10-05 12:31:08 2003-04-07 12:59:11 13 2 87 61 0 548 0 249.80 52 70.99 CHANGED GsaGlGKTTTucslsstssst.s.hLahPEPMsYWRshFs.oDslsuIhssQsR+ppGplust........cAshlsAphQupassPYhlhattlsshh...uuphs.s....s.P....slTLlhDRHPlAuslCaPhARYllG-hohpsLluhlAslP.EsPGsNLVVsoLs..sEHhcRLtsRuRPGEplDhshlsALRNVYuhLsNTspaLpsGss.....WRcsWspLshhssshhtphsp.sshstp...t.PsLpDTLFAhhKs.ELhsspGs.LhslHAWsLDuLss+LpsLpVFslDls .....GPHGhGKTTTophLsuhuu.R..DcIVYVPEPMTYWpVLGA..SETIuNIYsTQHRLDpGEISAG........DAAVVMTSAQITMuhPYAlTDAVLAPHI...GGEusuS..pAPsP...uLTLlFDRHPIAALLCYPAARYLhGuMTspAlLuhluhIPss..hPGTNIVLusLP.EccHlcRLu+RpRPG.E.+.lDL..uMLsAlRpVYthLsNTl+YLQtGup......WR-DWGpLsu..sAh..phscsps....s...t.PcIt-TLFslF+sPELhsssG-.LhplaAWsLDsLAp+LpshplFhLDh.c.................................................................................................... 0 0 0 0 +4650 PF03219 TLC TLC ATP/ADP transporter Mifsud W anon Pfam-B_2261 (release 6.5) Family \N 19.30 19.30 20.20 19.30 19.10 19.20 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.75 0.70 -6.02 27 560 2012-10-03 03:33:39 2003-04-07 12:59:11 9 8 194 0 186 483 158 424.20 32 84.80 CHANGED tsptppthuc...........h+thhWPlcthElpKhlPhslhhFhILFNYslLRshKDolVlTuh...GAElIsFLKlWsllPuAllhhllYsKLuNhhopEplFYhllssFLsFFhlFuallYPhp-hlHPss.s.phhshl.sshphhlshhtpWoaulFYlhuELWuolVloLLFWtFANpITplpEAKRFYsLaGlhuNhuLlluGphlhahus.........hhpths.sl...csathplphlhslllluGllhlhlahhls+.V.........Lsssph...........sshppt.+KpKsKholt-ShphlhpS+YlhhIAlLVluYGlsINLVEssWKupl+plYPss.......s-YssFMGphsshTGlsollhh.hlGusllR+hGWthuAllTPlllllTGlhFFuhllFpcpls.hhush.GhsPLhlAVhlGuhQNlhSKusKYSLFDsTKEMAYIPLDpE.KsKGKAAlDVlGu+lGKSGGulIQphlhllh...uohss.sPaluslhllllllWlhuVppLs+pa ...............................................................................................................................................................hpthhhslp...hEhpKhlPhshhhFhIhhN.shlRshKDslllst....uAEhlsFlKhahshP.u.hlhhll..YsKLs.s.h...hshct.lFahlhs.FlhFFslFuhll.a.P..t.....ch....hH.......ps...h..............p..............h..........h..............st...ht.h....hl...h...l...ht.pWoas..laYlhuELWsslhhslLF...WtFANpITpscEAKRFYslhulhuNhullhuG.hlhhhup........................hh..h...ts...ss.th.l....h..h.slllhsuh.lhhhha.....h.hl.s+..l............................lsp.h..................thhpt.cpp+.Kh.uhh...-Sh.phlh....pS+YlhhlAlll.lsYGls.......I.N....L.........VEs....sWK....upl....+....p..ha.P.ss................ppY...ssa..h.Gpa....hh..s.Gl...ss...l................hhh...hl.....u..s...sl...l.RphGWhhuAhlT...Pl.hh.h.....l..T..G......h.h.FF.....u.hlh..F....t......p...h.......s.....h....h.....ss.h.................h..h.......sP.Lh......l..A.lh....l...G....ul..Q..NllS.....KusKYoLFDsoKEMAYIPLDp-hKs.KGKAAlDVlus+lGK..SuGullQphhhh.lh....uohssh..sshlhhh.hhllhhhWlhusttLsppa...................................................................................................... 1 91 121 149 +4651 PF05017 TMP TMP repeat Bateman A anon Pfam-B_1012 (release 7.6) Repeat This short repeat consists of the motif WXXh where X can be any residue and h is a hydrophobic residue. The repeat is name TMP after its occurrence in the tape measure protein (TMP). Tape measure protein is a component of phage tail and probably forms a beta-helix. Truncated forms of TMP lead to shortened tail fibres [1]. This repeat is also found in non-phage proteins where it may play a structural role. 15.00 2.90 15.00 2.90 14.90 2.80 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.78 -6.01 0.78 -6.19 0.78 -2.30 95 345 2009-09-16 13:22:24 2003-04-07 12:59:11 9 15 44 0 65 324 23 10.90 35 10.45 CHANGED WssIpshhssh WsuI+shhos..... 0 54 56 56 +4652 PF02581 TMP-TENI Thiamine monophosphate synthase/TENI Mian N, Bateman A, Griffiths-Jones SR anon COGs Family Thiamine monophosphate synthase (TMP) (EC:2.5.1.3) catalyses the substitution of the pyrophosphate of 2-methyl-4-amino-5- hydroxymethylpyrimidine pyrophosphate by 4-methyl-5- (beta-hydroxyethyl)thiazole phosphate to yield thiamine phosphate [1]. This Pfam family also includes the regulatory protein TENI (Swiss:P25053). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.00 0.71 -5.16 89 5722 2012-10-03 05:58:16 2003-04-07 12:59:11 12 31 3919 62 1431 4669 2676 177.90 29 71.70 CHANGED lYhlTsst.t........tp...hhchlcpAlpu..........G.lshlQlRpK.....p....hss...pphhphApplttlsc..ph..ss.hllND..clclAht.....lsA.......DGVHlGQ...cDh.ssspsRplhusshllGlSspshp-...stpAt...ptus..DYluhGslasTsTKtss..pshGlptl.pphtpthp..........l....PllAIGGIshpslsplh........psGssulAVloAl .............................................................................................................................ahlss................tt...hhphl..cp.s.l..pu.....................G...ls..hlQ..lRpK..................................s......h.s.s......p..c.hh.p......h......stp......l.ttl.s.c........pa..........ss...ll..lN......D.....c.h....cl..Aht..........h.s.A...........c..G....V..Hl..GQ................c.D...h.........s.........s......s.......p......h....+......p....l....h...........s.......s................s.....h.......l.......l.G.lS..s.+..shp-.....htpAt..................t.t.ss......DY.....lu..l......G....s.l.a.s........T.s..o.......Kp...s........s...ss..hG......l....p..t...l...p.ph.t.p.t.hs..............................l..PlVAIGG..I..s..h..p..s.h..s.p.lh..............t.sG...s...sulAVlsul............................................................................................. 0 467 957 1229 +4653 PF00721 TMV_coat Virus coat protein (TMV like) Bateman A anon Pfam-B_746 (release 2.1) Domain This family contains coat proteins from tobamoviruses, hordeiviruses, Tobraviruses, Furoviruses and Potyviruses. 23.00 23.00 23.50 23.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.92 0.71 -4.29 47 557 2009-09-11 05:07:10 2003-04-07 12:59:11 16 2 125 27 6 518 0 140.10 31 54.13 CHANGED sYs...hss...pphhhhsssaschpshhshlpshpuspapspsuRstltstlusl.h......psssshspRFPss.s......h....hlhhtssslt.llssLhsuhc..ocsRhhEhppss....ssssups.s.....ssptssD..ush.sl+ssltplhstLsp.t....sshasp.....spFE ........................Ysh..hss...pphhhhsssascs.plhshhpsuhusphps...ppuRsslppphusl.h......pshsshssRFPss.s.........h....hVhhhssslcsllssLhsuhD..T+NRhhEscs.t.....sPssuEs.s.....sTpts-D......uol.Al+sslspLhstLhp.t.....pshaspspFE.................................... 0 1 1 4 +4654 PF00229 TNF TNF(Tumour Necrosis Factor) family Finn RD anon Prosite Domain \N 20.90 20.90 20.90 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.70 0.71 -4.57 52 1178 2012-10-01 20:41:10 2003-04-07 12:59:11 13 10 195 271 481 1150 10 120.10 23 49.88 CHANGED LpWppst...tshhtsGhphpssp..LllspsGlYalYoQlhFp........tsstt.................h.lshtlhhhssph....sp.hsLlpuhc.sssptts..........hhpslYhGulapLppGDclhlp..ss..p.shlchs..sspoaFGsatl ............................W.tt......h.h...p..shph.p..ssp....Lhl.psGlYalYuQVhFp........ts.tt............................t.h.lsttlhphssth......sp.ph...sL..hp.shp..ss..s.pt..ts...............hhpoh..ahG...G.lhpL.c.pGDcl.lp...ls......p.phlpht....tstoaFGhhhl............................................... 0 63 98 204 +4655 PF00020 TNFR_c6 TNFR/NGFR cysteine-rich region Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.07 0.72 -3.89 37 2327 2009-01-15 18:05:59 2003-04-07 12:59:11 13 54 156 88 889 2043 25 38.90 30 15.97 CHANGED Cpps..pYpc...ps...t.C..C.shCps..GphhhpsCst.spsT.hC ...................C.ps..pY...ps.......sh....tpCh.C..spCss....uph.hh..psCos...spso..hC.......... 0 184 248 442 +4656 PF01107 MP Tobamo_MP; Viral movement protein (MP) Finn RD, Bateman A anon Pfam-B_815 (release 3.0) & Pfam-B_1906 (release 4.1) Family This family includes a variety of movement proteins (MP)s. The MP is necessary for the initial cell-to-cell movement during the early stages of a viral infection. This movement is active, and it is known that the MP interacts with the plasmodesmata and possesses the ability to bind to RNA to achieve its role [1]. This family also includes consists of virus movement proteins from the caulimovirus family. It has been suggested in cauliflower mosaic virus that these proteins mediated viral movement by modifying plasmodesmata and forming tubules in the channel that can accommodate the virus particles [2] and references therein. The family contains a conserved DXR motif that is probably functionally important. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.83 0.71 -4.92 18 493 2009-01-15 18:05:59 2003-04-07 12:59:11 13 31 165 0 22 530 0 167.20 20 27.73 CHANGED hpplchs-...hlsLsps-phhs..shhpthKpshh++schlhsl............ppsculsplsLlsps.hp..........ctpchsal+luulhlslcsph.cshcsslplsLlDsRh.pstc-.uhlusacsshshtchtFplhPpYulShpD.slc+shplhsphcsls.MccGspPholcassshtloNSphslsh+phhssl ...............................................................................................................h..................p.h.............p.h......pp..hhhs..................tpspth..lsllpt........................ptpphsa..lHlusl.lshchh.h...p......sh.ssshtlsLhDsR.......h..tp...hc-...u....hlushpsshsps.p.hhh.p.h.hPsaslshpD.shppshplhlphcshp.hptGt..psholpht.hh.hhhs...s.h...........h....................................... 0 0 15 18 +4657 PF04052 TolB_N TolB amino-terminal domain Bateman A anon Bateman A Domain TolB is an essential periplasmic component of the tol-dependent translocation system. This function of this amino terminal domain is uncertain. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.14 0.72 -3.96 174 1688 2012-10-01 20:48:06 2003-04-07 12:59:11 8 19 1656 15 404 1275 1658 103.20 37 23.83 CHANGED lpIcIop.GsspshPIAlssFt...spsss........thspplupllssDLppSGhFpslstsshhppstp....sspspassWpslsupullsGpls.ttsss...hplpacLaDlhp..sppl .........................................lcI.Isp.GsssshP.IuVl.PFp........htuss............hPpcluslluuDL.cp.S.GpFsP.lspuphsppsss..........spplphss.W.p.u..l....G.h-A..lVsGpVs...s..scG..papVsapLhDs.tt.............................. 0 106 226 315 +4658 PF03349 Toluene_X Outer membrane protein transport protein (OMPP1/FadL/TodX) Mifsud W anon Pfam-B_3708 (release 6.5) Family This family includes TodX from Pseudomonas putida F1 Swiss:Q51971 and TbuX from Ralstonia pickettii PKO1 Swiss:Q9RBW8. These are membrane proteins of uncertain function that are involved in toluene catabolism. Related proteins involved in the degradation of similar aromatic hydrocarbons are also in this family, such as CymD Swiss:O33458. This family also includes FadL involved in translocation of long-chain fatty acids across the outer membrane. It is also a receptor for the bacteriophage T2. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.48 0.70 -6.11 22 2710 2012-10-03 17:14:37 2003-04-07 12:59:11 11 6 1716 33 577 2172 277 395.70 23 92.57 CHANGED sshssstshAuuh.hst.usuul.uRAhuGpuuhtc.ssushhpNPAshshhcp..sp..hphGhshlssc.lchpssssstph......................tstsssssshhlPphthlh....s-pauaGhulhsshGluocass.sahups..............huspocLtsls.lshusuY+Vs...cclSlGuulshsaspsplpp.......hhs...........................shsshsss...s.sssshhthts-shuhGaplGhhachsc.ssplGhsYpucs+h.chcGphshshssuhhs.h........................shsGplpl...plPsthpluhhHph.s-paslthshphstWSshpcl.........thtts.....tst..hshshs.sa+DshthulGssYphssphTlRuGhsYcpsshsspp.tsshlPsscppa.hShGhoYshs...ptslDhuauahhtccsshsp.s................sshshp.spssshlhulshshpF ........................................................................................................................................................................................s....ss.s.uu.uat.l.tp.......os.s....uh..up..A.hu.Gtus.h.s.s.s.A.u.sh...h..NPAhhshhcp.....sp.......hssuhs..h........l..pss.......h..p...h..p..s..s..s...s..s.t..t.t.................................................................................................psts..h..ss.s..t...h...l...P..s.h......a..h.s..h...l..........s-.p.......h...s..h.G.h.u..h.h..s...s..a.G..h.s.o...-....a...s..s.....s..hsu..t.............................................................................................hu.s.p..s..sl..p...s......hs......l....s.....sh..u...Y.....+...l..s.........p.phShGhGhsh..h.a..s.p.up.lpp............h.s..............................................................................................s..shs......sss...st.h.....s....p...h...p..us....s.hu.h..G..a..s..hGh..h.a..c..l........s...c....s......t..R..h....GlsY+S....clc....h...ch.c...G.s.hs....t..h.......s..h...t.................................................................................s.s.u..p.h....s.l.......slP..phhplus....hpp..l....ssp.h.......sl..phshpaTsWS..p..appl..................................ps.p.s.......................t....sh....h.p..t.p.t.......ta.c.D.sa....phu.lGs.....s..Y.......p.h...........s...c.......p.h...........s..hR..s........Gl..........ua..........Dpo..........shs..........s............p...............p.....t...............s...............h...............p.............l............P........c..............s....-......R.h.........h.............hohG....ss..Y..p.hs........ts....h....slD......h.uh..s.a.h.h.t.p.p.s..p.h.s.p.................................ssh..php...ps.p..s....hhuhphshtF........................................................................................... 0 153 342 479 +4659 PF04281 Tom22 Mitochondrial import receptor subunit Tom22 TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The mitochondrial protein translocase family, which is responsible for movement of nuclear encoded pre-proteins into mitochondria, is very complex with at least 19 components. These proteins include several chaperone proteins, four proteins of the outer membrane translocase (Tom) import receptor, five proteins of the Tom channel complex, five proteins of the inner membrane translocase (Tim) and three "motor" proteins. This family represents the Tom22 proteins [1]. The N terminal region of Tom22 has been shown to have chaperone-like activity, and the C terminal region faces the intermembrane face [2]. 28.70 28.70 28.90 28.70 28.60 28.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.90 0.71 -4.64 30 269 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 232 0 191 247 0 117.40 30 76.41 CHANGED M.VcLTEVcDEshp........t..hsptpstttss.s-s--.o-s-SDh.sD..-.D........-sETlh-RlsALKDIlPPppRppIusthusssohh+oshshuGphlWsloTSALLLGVPhALAlhsEpQllpMEK.EhshQcsAs-.lLAP ..............................................................................................t.pcs-p-h.pp...-..D.p.........cETlhERlhuLp-hhPspsRpthusshs...tssshspshhpaoupuhWlhoTSuhlLslPlshth.....t...EpphhpMEp.p.p.hppt........t......................... 0 55 92 147 +4660 PF03220 Tombus_P19 Tombusvirus P19 core protein Mifsud W anon Pfam-B_2714 (release 6.5) Family \N 25.00 25.00 26.80 299.80 21.40 15.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.19 0.71 -4.91 4 59 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 3 0 44 0 172.00 83 99.71 CHANGED MERAIQGsDAtcQAsuERWDGGsGuohoPFQLPDESPohcEWRLHpsEpsscpDpPLGFKESWuFGKVVFKRYhRYDhpEsSLHRsLGSWpGDoVNhAASRFhGVsQlGCTYSIRhRGlSlTLSGGSRTLQRLlEMAIRhKho.LQLsssEVEusVSRGCPEusps...cESE MERAIQGNDAREQANSERWDGGSGGoTSPFKLPDESPSWTEWRLHNDETNSNQDNPLGFKESWGFGKVVFKRYLRYDtTEASLHRVLGSWTGDSVNYAASRFFGhsQIGCTYSIRFRGVSlTlSGGSRTLQHLCEMAIRoKQELLQLTPlEVESNVSRGCPEGscsF.ccE..p..... 0 0 0 0 +4661 PF03544 TonB_C TonB; Gram-negative bacterial TonB protein C-terminal Griffiths-Jones SR anon PRINTS Domain The TonB_C domain is the well-characterised C-terminal region of the TonB receptor molecule. This protein is bound to an inner membrane-bound protein ExbB via a globular domain and has a flexible middle region that is likely to help in positioning the C-terminal domain into the iron-transporter barrel in the outer membrane [1]. TonB_C interacts with the N-terminal TonB box of the outer membrane transporter that binds the Fe3+-siderophore complex. The barrel of the transporter, consisting of 22 beta-sheets and an inside plug, binds the iron complex in the barrel entrance [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.39 0.72 -3.78 109 5753 2012-10-03 21:09:15 2003-04-07 12:59:11 9 80 2084 10 1601 5569 1848 76.60 24 29.38 CHANGED ht.pYPppApppsh..pGpVhlphslsssGplps.hpllpuss..t.hL-cuAl.cslcp...hpatPthts.sp...slshph...slpFp...L ..........................h...pYP..tt.At.p.t.th....pG...p...V..h..l..phs.l.s..t.s.G.......p..l..p..........s....l........p.......ll....p............u...s..................s..........s...hlDc..tAl.c.slcp.....h+.....ap.Pshts..Gp......sl.p.hhh....sltFp.................................................. 0 574 1099 1396 +4662 PF00593 TonB_dep_Rec TonB_boxC; TonB dependent receptor Bateman A, Yeats C anon Yeats C Family This model now only covers the conserved part of the barrel structure. 16.60 16.60 16.60 16.60 16.50 16.50 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.66 0.70 -4.48 666 34093 2012-10-03 17:14:37 2003-04-07 12:59:11 19 67 2349 66 8506 31731 11867 284.70 12 36.18 CHANGED sPphul......aphss..............shslhu..saupuacssshsphh..............................................................................................................hssssLcPEpupshElGhchphts..........hphshsh.aptchcshlstsssssshsstsstttttttttttstttthththhtththhhh............................................................................................................................................................tttssttststtsthhtttsthhtththtttthtththshshthsspttsssssshph..suashh-ht.ssY.phspt................hplphs...lpN...Lh..........s.cpYhthhs.t.........................hhh.u............................................sR.shhhslshp ......................................................................................................................................................................................................................................................................................................................................................................................................................................sphuh......ap.hsp..................................phplhs...ua..up..s..h..p...s.....s..s..h..tp..hh....................................................................................................................................................................................................h...ssssL....c....s...E....p.....u.....p...s.....h...-lG...h...c..h..p.hh..s...............................hphs.......hs..h..a.p.p..c.....h.....p.....s....h........l......h.......t......s.......s.......s.....s...........s.......s.................s.....h.......t......s.......t.......t.........t.........t.....t....s...t...h.......t......t.....t..t...h...t.........h..h..h..h...t.h..h..h..t..t...t...t...t..h...h..h.....................................................................................................................................................................................................................................................t.t.t.t..t...t.t..t..t..t..t....h....t....h......h....t....h...h...t.....h....h.....t.........h.....t.....h.....t............t..........h...t........t......h....t...........h..........t....h........t.......h...........t......h......t.........s........t.........t...........h....s........s...........s......t........t.....h....h...p..h................s.......s........h.........t....l....h.s.lt.....hs..Y...phspp..................................................hpl..t..hs.....spN.........Lh.....................s...p..ph.h.t...ht.t................................................h.s.................................................s..R..s..hhhshph.................................................................................................................................................................................................................... 6 2644 5782 7351 +4663 PF01131 Topoisom_bac DNA topoisomerase Finn RD, Bateman A anon Pfam-B_505 (release 3.0) Family This subfamily of topoisomerase is divided on the basis that these enzymes preferentially relax negatively supercoiled DNA, from a 5' phospho- tyrosine linkage in the enzyme-DNA covalent intermediate and has high affinity for single stranded DNA. 23.10 23.10 23.20 23.20 22.70 23.00 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.60 0.70 -5.65 126 9972 2009-01-15 18:05:59 2003-04-07 12:59:11 15 124 5053 35 2152 8171 4882 388.60 29 56.06 CHANGED chshsLspAthsRphhDhllGhslSp.hh....s..........t.....tsl.SsGRVQoPsLthlV-R-cEIcsFh....sp...a..aplp...sp.............h....................ttthh..............................ttpptphhscp..pApphhppl.p.............t......lp..plcpccc..pp.ssP.PashssLQpcAspphu.houpcshplAQpLYE........p.G....lIT..YPRTDSphls.ps......s.hpthhphlpp..t......hhs.....h.th...tp..........stp...sAHcAIhPTtshs.......sphs..........cctp.........lYcLIhcRalAs......hhssshhppsplpl...t........tt.........FpspGppllp..tGaptl...h.........ttpppp................lP...lpp...Gch...........l.h..tp.p...h.tcpTpPP.spaoEuoLlptM-pt.........................GlGpsuThAsIIppLh...cRtYlppt....p.pt...lhPTchGhtlh.chL.......p....h...p..lssschTuphEppL-pItcGchs.hpphlp .....................................................................................................................................p...hsLssAt.ARphhDhllG..hs.h.S.hl......p..................................................p..ptsL..Ss..GRVQossLt.lllcR-..cEIcsFh......scp.....a.....aplpup........h..................pttttp..................................................................hptpphch.h..s.cp......psp..t..l..h.....p.t..lps......tt................hpVp.pl.pp.Ktp..............pp..tsPhPash..o.oLQpcAup.+hs..au......s....ccThpl.......AQ.pLYE............p.t.....hI..........T..Y.RTDSph......l...spp......u.hp.t.shph.l..t..p..p..h.st....................pahs....h....p....h..h..t..t..t.......................pssp........pAH.cAIhP...T.tth................tphs...................................................pppt..plYpLIhp+alAs..h.s.s.uhhcpsslpl.......ph...................................st.hp....F..p..u..p..G.ph.lh...t..Gahpl.............h...........tttppppt.t.........................................................LPt..lppG-t.............................................lph......tp..hp.......pp.+h.TpPP.s+aoEAoLlp...t...h.Eph........................................GlG........p.......suTh......AslIpsl..........c.R....t........Y........lph.............p.+p.........lhsTphGhhlhphl.......p.............................p...........lhs.......p......hTAp....h.....EppLcpI.tp...Gphphpphl............................................................ 0 747 1386 1825 +4664 PF02919 Topoisom_I_N Topoisomer_I_N; Eukaryotic DNA topoisomerase I, DNA binding fragment Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1377 (release 3.0) Family Topoisomerase I promotes the relaxation of DNA superhelical tension by introducing a transient single-stranded break in duplex DNA and are vital for the processes of replication, transcription, and recombination [2]. This family may be more than one structural domain. 25.00 25.00 25.50 26.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.37 0.70 -5.05 47 487 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 324 17 282 471 28 203.10 51 28.20 CHANGED hKWpoLcHNGVlFPP.Yc.LPcsVKhhY-GcslcLsscsEEVAsFaAshL-o-.aspp.sFp+NFFpDF+chlscpttt............IcchcKCDFophapaappp+....EpKKshocEEKK........tlKtE+-c.h-c.YtaCllDG+.+E+VGNFRlEPPGLFRGRGpHPKpGpLK+RlhPE.DlsINluK-u.lPpPP....sGH.+WpElpHDNoVTWLAhWpE.NIssp.hKYVhL ................hKWppLEHpGslFsPsYc.LPcsV+hhY-GcslcLssc.sEEVAoFautM.L...-.......p.....-......Y....spcthFpcNFFpDa+.c.hsppptt...................hIpshpKCDFs.hapaaptpp.......E..t+.......K..sho+E.....EKp.......................tlKpEp-c.hpppYtaClhDG+.+E+lGNF+lEPPGLFRGR.G-HPKh.....GhLK+RlhPE.DlsINhu.....K...........-utlPpPP...................sGH.+WKEV+HDNpVTWLA.WpE.NIpss.hKYlhL.................................. 0 94 151 229 +4665 PF01028 Topoisom_I Topoisomerase_I; Eukaryotic DNA topoisomerase I, catalytic core Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1377 (release 3.0) Domain Topoisomerase I promotes the relaxation of DNA superhelical tension by introducing a transient single-stranded break in duplex DNA and are vital for the processes of replication, transcription, and recombination [2]. 20.90 20.90 20.90 20.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.56 0.70 -5.26 134 1273 2012-10-02 14:09:14 2003-04-07 12:59:11 15 15 1066 23 472 1266 171 214.00 37 46.11 CHANGED tDucsRpQYhYt...a.t.+-tpKac+hhchtptlsclR.cplpc-L..........psthsc.c+.lAsslhLlD................phhlRlGs-cYsc-st.ohGhsTLRscHlp.lps.............................................................................s.tlpFcFhGKsulcaphplc....Dpplh+slpth...........p...chPGpcLF.............cpls................................osclNpaL+chh..........ts.........hTAKsFRTasuolthhpt.....Ltph...................................................................................................t.....shsptppthstus+pVAthlspT.ul.s+ppYlpspllp..thtt..t.h...tthtph ....................................................................sph+tp...........ps.tKapphhthtphlstlR.tthptph.................ttt..t....p..hAshhhhl-.........thhlRsGspp.tpcpt....o...hGhsoLRscHlp.lpt.............................................................................p.slhFDFlGKDu.I+ahscVt...........h-.....c...+V..aKsLphhh............cs...KtPG--LF.............c+Ls................................oshlNcaLp.clM......ps...................lTAKsFRTYsAS.h.Thtpp.....Lpchs..............................................................................................................................st..s..shsp+hhshscAs+tVAhlssH.p+uV.s+.s..phhphphLptthpt..............s................................................................................................................................................. 0 137 260 379 +4666 PF01751 Toprim Primase; Toprim domain Bashton M, Bateman A anon Pfam-B_500 (release 4.2) Family This is a conserved region from DNA primase. This corresponds to the Toprim domain common to DnaG primases, topoisomerases, OLD family nucleases and RecR proteins [1]. Both DnaG motifs IV and V are present in the alignment, the DxD (V) motif may be involved in Mg2+ binding and mutations to the conserved glutamate (IV) completely abolish DnaG type primase activity [1]. DNA primase EC:2.7.7.6 is a nucleotidyltransferase it synthesises the oligoribonucleotide primers required for DNA replication on the lagging strand of the replication fork; it can also prime the leading stand and has been implicated in cell division [2]. This family also includes the atypical archaeal A subunit from type II DNA topoisomerases [4]. Type II DNA topoisomerases catalyse the relaxation of DNA supercoiling by causing transient double strand breaks. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -4.03 136 24868 2012-10-01 21:47:57 2003-04-07 12:59:11 17 150 8181 82 4223 22716 8361 107.80 31 17.30 CHANGED plhlVEusscstslpphhs..................................htlhsshGphhsh.p...........................................................lptltphh.............................................phpclllssDsDp.....-Gpplshplh.phhtthh..........tchhhspl ............................................................................................................................................................................................pLhlVEGcS.s.u.soh...cpuhs..............................................pp.psllsh....+G+..l.l.N.lpc.sp...hsc........................................................................................................................................................hhpspplpsl.hpsh...........................................................................hc..h..c..cl..llhoD..sDh.............-Gt.tIthhlh..phhht...h............thlhht.................................................................. 0 1416 2671 3539 +4667 PF01533 Tospo_nucleocap Tospovirus nucleocapsid protein Bateman A anon Pfam-B_950 (release 4.0) Family The tospovirus genome consists of three linear ssRNA segments, denoted L, M and S complexed with the nucleocapsid protein. The S RNA encodes the nucleocapsid protein and another non-structural protein [1]. 19.80 19.80 22.00 60.50 19.50 19.10 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.54 0.70 -5.14 11 1122 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 38 0 0 714 0 238.40 61 94.92 CHANGED MSps+.lTccpIpcLLsuuct-VElEp-psphuFNFKsFapsNps.lc.hohssslohLKsRppIhtssKpucas.FsshsIlto...SspVus..sDaTF+RL-uhIRsKhlptl..ocNscsppchhsKlhshPLVpAYGLp..uhhDtsulRlhlhlGGsLPLlASlcohtshuhsLAhYQssK+EpLGI..ppFsThEQLsKVspVhpupuhphscs.cchhcphscILssssPsspGuh..uhc+YsEplpthtss.F ....MSpVK..LTKEsIVsLLTQG+.DlEFEEDQNhlAFNFKTFCLsNLDpIK.KMSlhSCLTFLKNRQSIMKVIKQSDFT.FGKITIKKT...SDRlGA..sDMTFRRLDSLIRVRLVEE......TsNuEsLsoIKoKIASHPLlQAYGL...PLsDAKSVRLAIMLGGSLPLIASVDSFEMISlVLAIYQDAKa+-LGID.KKaDT+EALGKVCTVLKSKuFEMsEDplKKuKEYAsILSuSNPssKGSl..uh-+YsEplshh.phF... 0 0 0 0 +4668 PF00087 Toxin_1 toxin; toxin_1; Snake toxin Eddy SR anon Overington Domain A family of venomous neurotoxins and cytotoxins. Structure is small, disulfide-rich, nearly all beta sheet. 20.70 20.70 20.70 20.80 20.40 20.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.12 0.72 -3.60 48 594 2012-10-03 01:43:02 2003-04-07 12:59:11 16 1 92 141 6 666 0 60.80 38 79.57 CHANGED hpCapp.s....socTCP.tGpNlCY.K......pa.pc...p..+Ghh.l-RGCusoCPpscs.hhplpCC.sT.DcCN ..................................pChpp.t...s.sscoCs.sGcshCYp.K......tapc..........p.ps.hh..lcRGCu.s..sC..Ppscs.....thpl.......p.CCsT.DcCN...... 1 3 3 3 +4669 PF00451 Toxin_2 toxin_2; Scorpion short toxin, BmKK2 Finn RD anon Prosite Domain Members of this family, which are found in various scorpion toxins, confer potassium channel blocking activity [1]. 25.10 25.10 25.30 25.40 24.80 25.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.28 0.72 -3.86 48 130 2012-10-01 23:31:40 2003-04-07 12:59:11 14 1 40 50 1 165 0 31.00 44 65.14 CHANGED lcCssopcCht.sC+chhGpttG.KChNsKC+CY ....l+Csuopp.Chp.sC+cthGhttG.KChNs+C+Ca. 0 0 0 0 +4670 PF00537 Toxin_3 toxin_3; Scorpion toxin-like domain Bateman A, Moxon SJ, Finn RD anon Pfam-B_8170 (release 8.0) Domain This family contains both neurotoxins and plant defensins. The mustard trypsin inhibitor, MTI-2, is plant defensin. It is a potent inhibitor of trypsin with no activity towards chymotrypsin. MTI-2 is toxic for Lepidopteran insects, but has low activity against aphids [1]. Brazzein is plant defensin-like protein. It is pH-stable, heat-stable and intensely sweet protein [2]. The scorpion toxin (a neurotoxin) binds to sodium channels and inhibits the activation mechanisms of the channels, thereby blocking neuronal transmission. Scorpion toxins bind to sodium channels and inhibit the activation mechanisms of the channels, thereby blocking neuronal transmission 21.50 21.50 21.60 22.00 21.30 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.76 0.72 -4.09 33 487 2012-10-01 23:31:40 2003-04-07 12:59:11 13 1 55 78 15 598 0 53.00 38 69.05 CHANGED +D.....uY..Is.pscNCsYpC.....h.h..ssaCsshC+c.pGAcuG.aChahu...hs.ACaChsLPDss ................+-GY....ls..pspsCthpC........hh...ssaCs.p.C........cp..pGup.s....G.YChhhu..........tCaC.sLPDp....... 0 7 7 7 +4671 PF00706 Toxin_4 toxin_4; Anenome neurotoxin Bateman A anon Pfam-B_589 (release 2.1) Domain \N 20.90 20.90 21.80 21.60 18.00 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.40 0.72 -3.75 7 75 2012-10-01 20:50:19 2003-04-07 12:59:11 12 1 27 7 9 95 0 43.60 56 70.66 CHANGED sChCDsDGPslRssshoGTl...huuC...suGWcpCtuhhssIu.CC .sChCDSDGPslRGsoLSGhl..Wl.......uuC...PSGWHpCpuptshlu.CC 0 9 9 9 +4672 PF02079 TP1 Nuclear transition protein 1 Mian N, Bateman A anon IPR001319 Family \N 25.00 25.00 68.00 67.90 24.40 24.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.79 0.72 -4.20 3 42 2009-09-11 05:49:06 2003-04-07 12:59:11 11 1 36 0 15 30 0 52.00 85 95.33 CHANGED STSRKLKSHGMRRGKNRoPHKGVKRGGSKRKYRKSSLKSRKRGDDANRNYRSHL .STSRKLKSHGMRRGKNRoPHKGVKRGGSKRKYRKuSLKSRKRuDDANRNaRSHL..... 0 1 1 2 +4673 PF01254 TP2 Nuclear transition protein 2 Finn RD anon Prosite Family \N 25.00 25.00 51.80 51.70 23.00 22.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.11 0.71 -3.83 4 40 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 25 0 12 46 0 124.20 61 99.72 CHANGED MDTKTQSLPsTHsQPHSNSpPQSHssppCuCS+HCQopSQS.....pssRSpSSSppPtuHR......S.sG+QSQSPuPSPPs+++K+sMHSHpsPSRPso+pCSpsKNRKNLEGKlpKRKtlKRppQVYKsKRRSSGRKYN ..MDTKTpSLPlTHTQ.HSNSpPQS+T...st.CsCo+HCQohSQSCppupps......SpSRSSSQSPsuHp........SsoGpQS..QSPssSPPPK+HK+TMpSHHuPsRPThh+sSCPKNRKNLEGKlpK+KhsKRhQQVYKTK+RSSG........ 0 1 1 1 +4674 PF04406 TP6A_N Type IIB DNA topoisomerase Waterfield DI, Finn RD anon COG1697 Domain Type II DNA topoisomerases are ubiquitous enzymes that catalyse the ATP-dependent transport of one DNA duplex through a second DNA segment via a transient double-strand break. Type II DNA topoisomerases are now subdivided into two sub-families, type IIA and IIB DNA topoisomerases. TP6A_N is present in type IIB topoisomerase and is thought to be involved in DNA binding owing to its sequence similarity to E. coli catabolite activator protein (CAP) [1]. 20.50 20.50 20.50 20.80 20.20 20.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.02 0.72 -4.31 65 534 2009-09-14 12:29:41 2003-04-07 12:59:11 9 9 417 7 349 531 120 69.40 27 18.33 CHANGED shp...psp+.....hsphhhllphl.pctlppsphs.ThR-lYYpspp................hF.ss........QspS-pll-Dlph.hhsl..REphplh ..........................t..tpsp+hsthlhllphl..hchlp.p..sphs.Th.....R-lYYpshp.................hF.ps........Q....sps.DpllcDlsh.hl...tl..sRpsLpl.............................. 0 105 197 287 +4675 PF00590 TP_methylase Tetrapyrrole (Corrin/Porphyrin) Methylases Bateman A anon MRC-LMB Genome group Domain This family uses S-AdoMet in the methylation of diverse substrates. This family includes a related group of bacterial proteins of unknown function, including Swiss:P45528.\ This family includes the methylase Dipthine synthase. 27.30 27.30 27.30 27.40 27.10 27.20 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.45 0.70 -4.63 171 17348 2009-09-13 05:43:58 2003-04-07 12:59:11 15 82 4907 229 4809 13595 4640 206.00 21 64.04 CHANGED plhllGhGsGs..s.-hlThcAhchlp.pA-ll.....h....sss....................p......shphl..h....................................t........hphhpts......................hpphtchl..htthppstt..Vshls.sGDPhla..uhsthlh.t..hlttt...h....hc..llPGlSohpsssuthuhshs.psthhphhh...............s.t..ttthhpph.t...........sslllh........hsst..t.......htplhphL...........hpt...........p.tlhlscp.....huhs.s.Ep..lh.psslppls .............................................................................................................................................................................lhlVGsGs.Gs........p...h...lTh..+uh...........ch...Lp...pA....Dll.....................hhcs.............h......ssphl.hphh......................................................................ttt............ht.hh.phs........................................................................tpch.s.p...h..h........l..p..t.h.p.pG..pp......Vshls..uGDPhla.......uhutc.hl.p...........th..t....p...............t...s...lt....................hp......l..l..PGloushuu..sut.....s.....Gl....s.hs.....t.h...h.pslh....................................thtt..p..pt.p..hp.t.h.ttt.......................t.pol..l.hh...................................husp...p......................lsplh.p.tL......................................................hpt...........s.pp.lsl..s.cc........ho.ps..p.pp..lh.pssltph................................................................................................................................ 0 1545 3155 4100 +4676 PF04201 TPD52 Tumour protein D52 family Bateman A anon Pfam-B_2632 (release 7.3) Family The hD52 gene was originally identified through its elevated expression level in human breast carcinoma. Cloning of D52 homologues from other species has indicated that D52 may play roles in calcium-mediated signal transduction and cell proliferation. Two human homologues of hD52, hD53 and hD54, have also been identified, demonstrating the existence of a novel gene/protein family [1]. These proteins have an amino terminal coiled-coil that allows members to form homo- and heterodimers with each other [1]. 25.00 25.00 29.60 28.80 24.90 23.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.06 0.71 -4.51 4 406 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 101 0 173 372 1 162.90 44 79.83 CHANGED uLlhspshsEsstshssohsuo..sLSEtEp-EL+sELsKlEEEIpTL+QVLAAKEKHhuElKRKLGhsshsEL+QNluKSW+DVpsTsAYh.........tQKsosAhuuVGosIs+KhsD..................h+NSsTFKSFEpKVto....lKo+VuGs+.sGus.hsclluuusssSAp ...............................................................h..s....t....p.s......LoE.pE..p....E....E....L+....t....E....LsK.V...EEEIp..TLRQVLAAKE+HhuE.lK.RKLG........losLpELKQNlu...+uWp.DVpsosAYp+TpE...............................................TLSpAGQKsSAAh....S....slGosIo+KLsD...........................................MRNSsTFKSFE-+Vts....l..KoKVs.usp.ssss.h.p..t........................................................................... 0 40 50 92 +4677 PF00205 TPP_enzyme_M TPP_enzymes; Thiamine pyrophosphate enzyme, central domain Finn RD, Griffiths-Jones SR anon Prosite Domain The central domain of TPP enzymes contains a 2-fold Rossman fold. 27.90 27.90 27.90 28.00 27.80 27.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.75 0.71 -4.41 309 15120 2012-10-03 09:55:27 2003-04-07 12:59:11 17 40 4534 251 3745 11432 6158 134.60 28 23.53 CHANGED lppssphlppAc...+P..lllsGuGshhs.p..Apppltplu-ph.slPV.ssThhG..+Gsls.p..........sc................Ph....hG.hhGhh.u.....stsustsl.pp..uDlllslG.sc...hsp.hs......hs..phs.tassp........s..p.........l..lpl.D......................l-s.........scls+sh.............ss.lsllGDsp.......tsLptL .........................................lpcshph.l..p..p..A.c...+P...l.lhsGuGs...h..s...p.........A.s.p.....p.L...p...c...hs-ph..plPl...s.s...T..h.hG...t.Gs..ls.p............................s.c...................Ph.h...lG..hh....Ght.G......st.su..s.h.sl...pp....uD..l..l..lslG.sR...as.c..ps..tu..........tht..tassp.......s...c.........l..lp.....lD.......................l-s....................sp.l.s.+.hh..........t...sc..lsll.GDsp....tsLpt............................................................................................................................................... 0 1002 2186 3051 +4678 PF02775 TPP_enzyme_C TPP_enzymes_C; Thiamine pyrophosphate enzyme, C-terminal TPP binding domain Finn RD, Griffiths-Jones SR, Mistry J anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.08 0.71 -4.64 327 23243 2012-10-02 16:07:47 2003-04-07 12:59:11 16 110 4932 295 6025 21857 10671 146.80 22 24.62 CHANGED -lGptph.hhtph.......t.spp.als...ss...shusMGhulPsAlGsp..l....s........s..t.....t.pVl.slsGDGu..ahhs.h.pELtossph..sl.slhlllhNN.phhGhlctt...pp.hhhttphss.p...............hts.DastlAcu.h..Gsp.uh...p.lpsh..pcl...ppslccA...h......ppptPsll.-l ........................................................hGp..h.....................stp..hhs......ss........shus..h..G..h.u..l....ssAlGsph................u...........................p.P.....c.....c..pVl..slsGDGu....h..h..hs..h..p.-..L..s..............s.....s.h...p.h..........sl....s..l....t.....l......ll.l.N.N....p..s.....h....u...h..spt.................pp..hh..h.t..t.p.h..s..s.......................................tth...D..a...s..t....l.A.......pu.h........Ght...sh................pl...p.....s.t..............p.c..l.......pp....s...lpcAh........ppss.ssllch....................................................................................................... 0 1846 3761 5012 +4679 PF02776 TPP_enzyme_N TPP_enzymes_N; Thiamine pyrophosphate enzyme, N-terminal TPP binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.87 0.71 -4.77 118 17953 2012-10-02 16:07:47 2003-04-07 12:59:11 13 63 4635 275 4426 14362 7899 169.30 28 30.25 CHANGED hssuchlschLpp.hGlcp...............lFGlsGsphhs.lhcultpt.......tlphlhs+cEpsAuhhAs..uau+..ho.Gc.......sulshsss.GP.GssNslsulssAhts.phPllhlsGpsstpths.........tthhp.ph..D.thsh.....hp..shs.Kh......shplpsssphsphlpcAhptA..hssptGPVhlslPhDlhttpssts. ..................................................................................h.psuphllcsLpp..p..G..V..c.p......................lFG.hP...Gu.shhs..lh-u...l...tppt....................tlc.al....h...sRHEp..........uAuahAp....GaA+.......ho..Gc...........................sG..V.s..lsTS..GP.G.s.s........N..hlsu......lss.A.h.h.-....sl..P...l.lslo...G...pss.sshls.............................pss..aQ.ph..........D....hhsl.........................hp.......sh.s...+a.............................shh.l......p......p......s......p........p.......l......s.........p.........h.......l.......p..............c...Ah.......phA......................s........ucsGPVhlslPtDl..t.h....t..................................................................................... 2 1202 2609 3623 +4680 PF01963 TraB TraB family Enright A, Ouzounis C, Bateman A anon Enright A Family pAD1 is a hemolysin/bacteriocin plasmid originally identified in Enterococcus faecalis DS16. It encodes a mating response to a peptide sex pheromone, cAD1, secreted by recipient bacteria. Once the plasmid pAD1 is acquired, production of the pheromone ceases--a trait related in part to a determinant designated traB. However a related protein is found in C. elegans Swiss:Q94217, suggesting that members of the TraB family have some more general function. This family also includes the bacterial GumN protein. The family has a conserved GXXH motif close to the N-terminus, a conserved glutamate and a conserved arginine that may be catalytic. The family also includes a second conserved GXXH motif near the C-terminus. 30.20 30.20 30.30 30.20 29.70 30.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.65 0.70 -4.94 137 1980 2009-09-12 01:14:50 2003-04-07 12:59:11 12 13 1569 0 659 1576 259 253.30 21 77.55 CHANGED hlWclp.....ps.s......pp.........l.................aLlGThHlsctsh..lsstlpphhppu-slslEh...................thhh..s..spplp.shlsscphpplsthhttht..hs.....phhpphcP.hhlshtlshsthpp............thssp.....Gl-.thtttAt.........tpstphhs....LEshchQlshh.psh...shpp....phphlhpsl......pp.h.....tpt.sph..hpp...hlpt.atpuDh....ptlhph.........htp...........thsph.hcsllscRNpt....hspplp..phhppt...........tthhssVGAuHLsGtpu.....................................ll.................................shLpttGaplpth .................................................................................................................................................s..s.....pph.....aLlGohHh.u.sp.sh.t....s....l....ss....tlhpthppuDslhlEh.....................................h...........s......stsl.p.phl..st.c.p.hp..pL.pph.....p..t.h....hs............................phhpph...a.ls.hh.lt.....htp.hpc.........tlpsph.......Gl-h.th..ttAt..............tppt.hht................................LEstp..Ql.shh...pth.........................s.tt....t..ht.hLts.sl.....................sp..h.....................pss...sch.........hpp.........hhph.ahp.s.s........sshhp...............................hpp.................ht..p.l.hc.s.LlpcR.Ntt......hspplp......th.ttp........................hlssVGAhHLhGt.ts............................................................l..................................................................phLp....................................................................................................... 0 225 409 534 +4681 PF02534 T4SS-DNA_transf TRAG; TraG; Type IV secretory system Conjugative DNA transfer Bashton M, Bateman A, Staddon J anon Pfam-B_1146 (release 5.4) Family These proteins contain a P-loop and walker-B site for nucleotide binding. TraG is essential for DNA transfer in bacterial conjugation. These proteins are thought to mediate interactions between the DNA-processing (Dtr) and the mating pair formation (Mpf) systems [2]. The C-terminus of this domain interacts with the relaxosome component TraM via the latter's tetramerisation domain. TraD is a hexameric ring ATPase that forms the cytoplasmic face of the conjugative pore [3]. The family contains a number of different DNA transfer proteins [4]. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.47 0.70 -5.91 9 2504 2012-10-05 12:31:08 2003-04-07 12:59:11 9 20 1333 0 409 3978 356 393.00 19 69.09 CHANGED passhs-hpptt.hhpctshlhs.h.ths..................hGt...hhstsGpF.........ashlhAsotuhKusullIPshLsasp.olVVhDPKuElaphTuphRcppup.cVhlh-Ptsspsp..paNPlDaIsttss..stscDlhtlsshlhsspsh..........................c-aapssAtpLhsul.............hhhhphpstscshstslphlpshtu-.......csh.hsphhpphpssctststpslushhspsc.............cphoSVhushsuphphassP.lpshsStSDFslcclt..cccsslalslsspshphhsslhplhhp.hhphhtpp.sshctp.ht.....sLFlLDEFspLGhhchhcpuluhhttYslplhhIhQoluQLps....tYsppsApohlsspsshlsaussN....-TAcaISchlGptTlchcssSc.....ssspuso+opohs.spRsLlpPcElhpMssccp............IllhpupsPl+scKshYacc...........................pphpsphsc.t.hhstphthu .............................................................................................................................................................................................................................................................................................................................................................tth.hl.hus.o.tSGK..s.....h...s...h.....l.....h....P....s.....l...............h..........p.......h.....t........t.....S.......h.....llhD..K.......s.....-.......h...........h...........t........h..........s........u.......t...............h.........h.....p....p.......t.......s.......h......c.......l......h........l...hs........................h.................s................p...........p........u...................p.........a...........N.P..h.....t..........h...l.....p...p.....t..........tp........h.h.....s.....h.......t.........l..s..s...h....h...h..s...t.pt.........t.................................................................................t.c..sa.Wtpsup.t.L.hs.uh.........................................................hhh.hh..h..t...t.........t...........s.......h...s.............h.........hp...h....l...t......t.......h.........t....s.t.............................t..h.......h................h..........h..h.t..t.............h......t......t.............t.........................s.........h......h....h...t..t........h...t....t....h......t...t..s.t..........................................................................pphsulhss...h..ts..h...l.s.h......a.....t.............l......t...p....h.h..........s...........t....s.......-...........h.........c..l...p..p..lt..............pp......h........s........la.l.h..h...s...s.....p...p......s...h.............lh.......t..l.h.h..p............h...h.p...h........h.......t...p...t..................t...........t...t..........h....................hhhhh....DEhs.sl...G....h.....h...........h..tp..h..hu.........hhtu.......htlphhhlhQsh....s.Qlpt...................hY.t.p..t.t....h.p..sh..h....s.s...s....h..hh...h..s..tp..........psh..c..lS.p..h...h.Gp.....T....h............s.hs...............................t...................t....p..........s.........s.......p...........t....+....L...h.........tE.lh......h........t..t............................lh....h....t....s..............s.....hh...t..........................................................h.............................................................................................................................................................................................................................................................................................. 0 134 274 337 +4682 PF00923 Transaldolase Transaldolase Bateman A, Griffiths-Jones SR anon Pfam-B_787 (release 3.0) Family \N 20.90 20.90 22.80 22.20 20.20 20.00 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.71 0.70 -5.28 131 6718 2012-10-03 05:58:16 2003-04-07 12:59:11 14 21 4201 128 1447 4243 3165 256.10 30 91.25 CHANGED lahDssshchIc.....chht...lpGsTTNPolht+......Ahp.....tpthhcctl....pphppt...................................................t.hh..lhh..h..hhthh..........sG.hVShEVss.hshDscuslpcA+cLhphhp........t.slhIKlPuT.....-GlpAhcpLp.pc.G...............IplNlTLlFShtQuhtsAcA.............G...........ssllSsFVuRlschh..........................tt.sGltsspphaphacph.s...................a.sTtlhsASh+sshplh..sLsGsctl.Tlssshlcphtp.csts..........................tt..t....hp.p.hh.ph.s-Glc...pFtpsappLhttl ................................................................................................lhhDTusl..ptl+..........chtt.....h.pGsTTNP.S.llhp........t..........hpt.h.c.-.sl.tphppt........................................................................l........................................sG.plSs.EVsu................h.Ds-shl.pc...A+cLhphhs.......................plll..KlPs.T..............................hp.Gl...pA...hchL...p...p.-..G.............................IpsNlT.............LlFShsQuhhsAcA..................G........sphlSPFVG.Rlt-hh..................................tsGltslpplhphacpa.s....................................h..pTtl.....hu.AS..aR..........ss.tplh.........tL..s..Gs..-h.......l..T.ls..sllpphhp..psts...........................t..ptl.......................................................................................h..................................................................................................................................... 0 481 911 1226 +4683 PF00382 TFIIB transcript_fac2; Transcription factor TFIIB repeat Finn RD anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.18 0.72 -4.16 12 2462 2012-10-03 00:42:12 2003-04-07 12:59:11 14 23 536 21 1522 2387 745 70.60 26 32.13 CHANGED ls+hsspLcLsc...hVtcsAtclh+pshcpthlpGRSstulhAAslYlAsRhpphpRohpEIsslspVschol ...................................ht+hssp.L.s..Lsp........pltc...pAtpl...h.c....p...s...h............c...........p.............t...........h.....h......p......G...Rs...s...........pulsA..AslYhA.s..R.....h..p...s.....h..s.+.....o.hc.El.u.sls..p.Vschpl........................ 0 455 879 1280 +4684 PF00405 Transferrin transferrin; Transferrin Finn RD anon Prosite Domain \N 20.30 20.30 20.30 20.30 19.70 20.20 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.08 0.70 -5.57 10 1389 2012-10-03 15:33:52 2003-04-07 12:59:11 12 15 229 250 465 1509 132 257.20 32 86.39 CHANGED VRWCslSssEtpKCpsap-pM+pht...sPsloCV+KuSsl-CIpAIAssEADAlTLDGGhlaEAGLsPYsLKPVsAEsYGo+-pPpTaYYAVAVVKKuSsFplNpLQGKKSCHTGLGRSAGWNIPIG..lLcstLsWtG.spcslEcAVucFFSuSCVPGAcc.sthPpLCQLCsGputsK..CusSspEPYaGYSGAF+CL+DGAGDVAFVKcSTVhENLsscA-R........DpYELLChDNTR+PVD-YKsCHLApVPSHAVVARSsssKE-hIWcLLspAQE+FG+-pop-FQLFSSPsGt....KDLLFKDSAlGFl+lPophDStLYLGa-YhTAI+NLRc ...............................................................WCshup..E.t..KCttht.....t......................tlpC.ht.t.t..s..cClptIh.t.tcAD.sh...slD.u.s.la..A.u................h...tLhP....l.hs......E...h..........t....................................t..........t...................t........t.........t............Y...............h......u.VAVV.....+.....+.....s......s.......s....h.......p..............h..............p....p..................L......p........G...+...+SC..Hou.h..s..p.....s.A.G..W.p.l...Ph...u..........h..t........h..........................................................t....h...s..p...aFs.t.u.C.....s.......P.....G..........s.......p...................................................s....p.LCthCh.G.....................................t.........C..t..............s...t.p.-.Y..h.GasGA..h...........+...C.L.h-...s........t......G-....V.AFl....+..........p..........t...........s....l.........p...........s.....h..........t....s........p.............................................ppap......LLC.....s...s....s.....c...t..s......l...s.....p....a....p...p..CpL.u....ps.P.....s+uV.l.s...R.................................t.......l.h..p.hL....p..........t....t.................................................h..phFts........................psLhFpDss.th..h......................h................................................................................... 0 88 121 296 +4685 PF00868 Transglut_N Transglutaminas; Transglutamin_N; Transglutaminase family Bateman A anon Pfam-B_783 (release 3.0) Domain \N 21.40 21.40 21.50 21.80 21.00 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.47 0.71 -4.07 50 614 2009-01-15 18:05:59 2003-04-07 12:59:11 15 12 106 39 309 576 0 116.20 30 17.57 CHANGED LplpsVDlpppp...NstpHHTccap..........spcLlVRRGQsFplpLphscs...apsstDplplhhphG..stPs.....spGTpsslsl....spttp......sssWsupltspsusp.....lplslpssssAslGcYp.Lsl.....pspst .........................tlpplDh....p...pp...N..ptpHHTpcap...........................................spcLlVRRGQsFplpLphsps....a.sst....-plph.hp.s.G......stPs............spGTpsshsl........sstts......sssWsAtl...p.p..psps.......lslslpoPs.sAslG+YpLplpst.s.............................................................. 0 56 87 169 +4686 PF00912 Transgly Transglycosyl; Transglycosylase Finn RD, Bateman A anon Pfam-B_558 (release 3.0) Family The penicillin-binding proteins are bifunctional proteins consisting of transglycosylase and transpeptidase in the N- and C-terminus respectively [1]. The transglycosylase domain catalyses the polymerisation of murein glycan chains ([4]). 20.70 20.70 20.80 20.90 20.50 20.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.16 0.71 -4.96 172 12887 2010-01-08 14:08:55 2003-04-07 12:59:11 17 38 4268 62 2627 9890 3274 179.10 35 26.43 CHANGED sG......t.ltth........tpp+phl.s.hs..............p.l...s.t.......lhpAllusEDccFap......HtGlDhtulhRA.hhpsl.t...sspht.......pGGSTITQQlsKs...h.hL..........p........p.....+o....h......pR...KlpEhhhAhpl......Epp..hoKccILch.YLNpl.h.G.p....ssaGlpuAuphYFuK............sspc..Lshs.....EuAhLAulhpuPs......ta....pP..h......p......scpspp....R...pshl.LppMh ............................................................t......ph........tppRhhV.s..hc................p..l.....sst.......lhpAllA.sEDp...+F.ac.............Ht.Gl.DhhulhR....A....h....hpsl..h................................su.pts...........pGGSTlTQQlsKNh..aL.................s.....................p-..............+o.............h....tR.............Kh.pE.hhl.A.lpl..................Epp....h.o......KccILph.YLNp.lah.G..p......ssaGlpsAAptYFG..K...........................sspc........Lols........................puAhLAGlsp.uPs..........tY.sP.....hp..s...................sctutp.........R..p.s.hV.LppM............................................................ 0 760 1609 2146 +4687 PF00456 Transketolase_N transketolase; Transketolase, thiamine diphosphate binding domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain This family includes transketolase enzymes EC:2.2.1.1. and also partially matches to 2-oxoisovalerate dehydrogenase beta subunit Swiss:P37941 EC:1.2.4.4. Both these enzymes utilise thiamine pyrophosphate as a cofactor, suggesting there may be common aspects in their mechanism of catalysis. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.87 0.70 -5.63 12 11328 2012-10-02 16:07:47 2003-04-07 12:59:11 16 27 4696 96 2755 11612 8619 262.80 31 48.34 CHANGED .cpphsssIRhLuhDAVppApSGHPGuPMGhAslApVLapchL+psPssPpWhsRDRFVLSNGHuuhLLYuhLHLsGY.DLsh--L+pFR..QltS+TPGHPEhtassGlEsTTGPLGQGIuN...AVGhAlAp+sLAApaNcsGasIsDaaTYVhhGDGCL.EGlSpEAsSLAGpLpLGpLIshYDcNpIoIDGcsphtFs-Ds.ttRacAYGWHVlts..sGcDspuIptAltpA+tsps+PoLIts+TlIGaGussptGscssHGAPLGs--ltth+pthGac.ttsFtlPp-lYstapt+ht..GtptpppWpphFusYtptaPEhuAEhtRRhsGcLP .................................................................................................................h.....ptlRh.s..hphl.t...pu..t...........u......G.......H................G..s......s..huh.A..sh..h...s.L.a..p....c...h......h.p...h.p...P.t.......p.s.........p......h..........s.R.D..R..a.lh.....S........t.GHu.u.hlYuhLa..L............p.............G...................h..................l............s.h.....-c.L.c.s.FR..........p......h.....t.......S....p........s....s......G..........H.......P.c...h....t.h................s...G..........l....-.h.o.TGsL.G.p.Ghss....AlGh..A...h...u...p...+...h...h...t...t................t...........................h.p...h.......s..p.......h..sY...s..hhGDGph.EG..uh..........E........A..........hs....hAup.p....L.s...pLlhhh.D.s.N..t........h...p......l........D..............G.....................s..........p.........h.............h........h.........s...........p..........c............h.........t............t.....+..........a....c.u.....h.......G..W....p.V.....l.p......s...............s....G..........p...D....h....p....t....l.........t....A....h.......p....t....A.......p.......t..................s.......+..............P.olIhs.+.T..h.h...G..h..G..............s........t........h.......t..........s..........p......t.....t......s.......H......t..........t.................h....s................p......t...h.......t..............h.h.......t........h......................................................................................................................................................................................................................................................... 0 827 1666 2288 +4688 PF01818 Translat_reg Bacteriophage translational regulator Bateman A anon PSI-BLAST 1reg Domain The translational regulator protein regA is encoded by the T4 bacteriophage and binds to a region of messenger RNA (mRNA) that includes the initiator codon. RegA is unusual in that it represses the translation of about 35 early T4 mRNAs but does not affect nearly 200 other mRNAs [1]. 25.00 25.00 73.20 73.00 19.80 19.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.63 0.71 -4.58 11 74 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 70 2 0 56 724 121.60 58 92.25 CHANGED MlEIsLpcP-DFLKlRETLTRIGIANsK-KpLYQSCHILQKpG+YYIVHFKELLpLDGRpVclopEDhpRRNsIApLLcDWGLspIlssc...-hsspNpFRVISaKpKsEWpLhsKYpIGp .MlElsLpcP..-D...FLKV+ETLTRIGl.AspK-.KpLYQSCHILpK......pGpYYIVHFKEL.htLDG+psslocEDh.RRNpIspLLpDWGLlcIlsspthhc.hssh..Nph+VIoaKpKsEWpLhsKYsIGp... 0 0 0 0 +4689 PF01997 Translin DUF130; Translin family Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family include Translin Swiss:Q15631 that interacts with DNA and forms a ring around the DNA. This family also includes Swiss:Q99598, that was found to interact with translin with yeast two-hybrid screen [1]. 20.40 20.40 20.60 21.00 19.60 20.30 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.33 0.71 -4.72 74 629 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 351 42 428 614 144 190.20 24 78.99 CHANGED cctREcllclsR-lsphS+cslht................lHptst..........tt.st.phlpcsppplpplp.phht...............shs.........apaptthssuhQEalEAhshhtal...........................pst....pLhohc-ls..h.............................................................ls.t-YLhGlhDlsGELhRhslssltpGshcp...................................shplhphhcclasthhhlsh...............th......tslR+KhDsh+pslcKlEpslashpl+st ...........................................................................................................phRE.clhchs+-lpttu+chlhh...................lpphpt............................tp.sp.chhppsppplpp.lp.phht................sts....................hapappthp.slQ-h......l.puhsahtal...........................c.sp....sL.l.ohc-lsp.l.h...............................................................h.ls.p-YLhGlhclsuEL.....h........Rhsls.slst.G-.hpp.........................................shplhp.hlpcl.hssaphlsh..................................................ptl.+.++hDsh+.s.......lcKl.EpssYslplRs.h............................................................. 1 140 221 335 +4690 PF02133 Transp_cyt_pur Permease for cytosine/purines, uracil, thiamine, allantoin Mian N, Bateman A anon IPR001248 Family \N 21.10 21.10 21.10 21.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.43 0.70 -5.93 14 4605 2012-10-03 01:44:59 2003-04-07 12:59:11 10 14 1918 2 1514 3768 1528 398.50 19 86.59 CHANGED PVsp.ccRshshhNhhshWhusshslssashuuss.lthGLsaapshlslhhusllsshhlsltuthGsphGlshslhSRuSFG.lhGulhPhlsts.lhAhsWaulpsalGupslhhhltplast....hhs...sh.ssssh.hlsFhlFhllphhhlahshpplp+hhshtuhlh.hsuhuhhhWshstspu....Gslhs..st.s.........hhsulhuslusauohlsshsDFTRaups.psshhsthlslshshslhhhhullssuuuhsh.hGsshWsshplltpa.ss.......hushlhslllshuplusNhusshlusGhshuslhP...hhlsh+puuhhsullulshssWsLhusssp...hhshLshhushLushuGllhADYahlR+uhhplsphat....psuhYha..chGhNacAhsAalsGhhhslsGh..........tsshsshthhsluYhVG .....................................................ppR...p....h....t....h....h.s.hhs...hWh.u.sshsl.s.s.hshuu.hh...h.................h.................h...........G...Ls...........hh.pshlulllGshlh.s.hh.hsh...h.uh.hGsch.GlshhlhsR.h.s.F............G...h.................h.G.u...h.................l...ss.l..lt.s...lh.t.l.........u....W.............au..hps.hhu..u..........t.s...hthhl.s..p.h.hs.....................................hs..h.s.hh.l.s.....h..h..l..h.h..l.l............h...h.h...h...h..h.G...h.....p..s.......l...p...h.h...t...h.ht........s...........s.....h...l...h.l....h..h...h...s...h...h..h..h...h.h.sp.s.sh..........u....h....h.t.........t.s.....t...............hh...s..u..l....s....h..s..s...u..h....a................h.s....h....ss....hu...Da.oRa.......s..p.s........t..p.....s............s......h......h..........s........t....h...h....u....h...h...h..s...h....h...h.h....h.h.....h...u....h...h.....s...s...u...s..ssh...h.............s....t......s.......h....h......s.......s...h...................................h..s.....................uh.h..l..ss.l.l..l.h..h...h..s.l...s...s..s...s...s..s.....s.......h..........h....u..s..u...h...s.h....s..s...l...hs....................h........l....s..h..+...t.....t...s....h....l.....s..u..l.....l....u....h.....l....h....s....h..t..l..........h....s..s........sh................hh..a.........Lshlus.hlsPlh.....Glh.l.sD.Ya.l.l.+....+...t.p.h...p...hst.h....h.............................t..h...................s.h.......N........h...h...u..h...h...s......h..h.h.u.h..h.hs.h.h..................................................................................................................................................... 1 391 822 1249 +4691 PF04236 Transp_Tc5_C Tc5 transposase C-terminal domain Bateman A anon Pfam-B_2955 (release 6.5) Domain This family corresponds to a C-terminal cysteine rich region that probably binds to a metal ion and could be DNA binding (pers. obs. A Bateman).. 27.40 26.40 27.40 26.40 27.30 26.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.48 0.72 -3.75 14 75 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 10 0 72 54 0 58.60 37 15.42 CHANGED WpKuGYh...spss.sFhTPupaC.Fs.cssssDChhsGCschuFI+Cu+CcphlCFcHFlV.phHhC ........Wa+uGYh...ss+Ps...tFcTPs-ah.Fs...csstssCsh..Csphuhl+CsaCcp.hCFscFh....H.C...... 0 16 22 72 +4692 PF00872 Transposase_mut Transpo_mutator; Transposase, Mutator family Bateman A anon Pfam-B_376 (release 3.0) Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.15 0.70 -5.91 14 6867 2012-10-03 01:22:09 2003-04-07 12:59:11 13 34 1819 0 1273 6707 991 221.60 22 89.13 CHANGED hspsp.hsthtth.sh...thsuts-hhcshhcshhpplhpsEhschlGstcaERsttRp.st.......RNGpps+slsTpsG.pl-lplP+sRsGsF.P.......sllp+hpRp-pulpuslhphYlpGlSTRclpchlptLhG.pt..lSpupVSplscplc-tltsapsRsLpcs.asalalDAhahKlR..sspVhupulhlAlGlssDGpR-lLGhtls..suEutphWpshLpsLpsRGLps.lpLlluDuatGLspAlstsasssshQRChlHhhRNlhstss+cpt.cpltstl+sIapAs-h-tsttth-thhsthss..+aPtlsshh-cuhpcllsFhsFPtsha+plhoTNslERLNcElRRRs+shslFPNtsohlRLlhslLt-h......cccWhtu ..............................................................................h...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................h.....h.os.N.......Et.h.......h.+..t.............................................................h...................................................................................................................... 2 397 800 978 +4693 PF00273 Serum_albumin transport_prot; Serum albumin family Finn RD anon Prosite Domain \N 25.50 25.50 25.80 25.50 25.00 25.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.31 0.71 -4.84 42 692 2012-10-01 21:15:37 2003-04-07 12:59:11 15 8 71 324 248 788 0 168.30 26 84.70 CHANGED ppphspphpphG-cshpulsllthSQth.pssacElsKLlp-lsphtcpCsts-.....pChcshtshhhcclCpppshhs..utlscCCpcstsE.RspChhphcp-p.sth.shth.....s-scchCpsapcspctFhu+alaEhuRRHP.hhss.lLtlAppYpphlpcCCp..sps.ssCh ..................t.pp.Ct.hpphGccthpsh.hllt..hop+hPpsshp-lhplsp-lsphtp...cCC...p..s...st....hsChcs....ths.....lhsplCpppph.hs..splscCCscs.h.h.............p.Rp..Chhth.csDps...........shp....t............stsc-lCpt.pc.spp..thh.s.........p...a..........LaEhu+..++Pp.hsts.LhplsppapphlpcCCp..sps.tsCa.................................. 0 11 18 55 +4694 PF01359 Transposase_1 Transposase (partial DDE domain) Bateman A anon Pfam-B_394 (release 3.0) Family This family includes the mariner transposase [1]. 22.90 22.90 23.00 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.44 0.72 -4.21 63 449 2012-10-03 01:22:09 2003-04-07 12:59:11 13 27 113 11 235 404 0 74.10 30 26.29 CHANGED hY-..N.pRp+sWlpsGpssps.ssKsslas+KlMLsVWWDh.cGllaaELLtsGpTlsu-hYppQLp.cLppslpcKRPphhsR+ .............................a...s.pcp.tpWl...p.sp.s...p...ts.Kspl.atK.K...hhLslaWsh.p..Glla.a-l..LssGcTIsu-hYspQ.Lp.cl...tptlpphp.t............................. 0 51 123 182 +4695 PF01610 DDE_Tnp_ISL3 Transposase_12; Transposase Bateman A anon Pfam-B_1015 (release 4.1) Family Transposase proteins are necessary for efficient DNA transposition. Contains transposases for IS204 (Swiss:Q50911) [1], IS1001 (Swiss:Q06126) [2], IS1096 (Swiss:Q50440) [3] and IS1165 Swiss:Q48788 [4]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.56 0.70 -4.80 128 7865 2012-10-03 01:22:09 2003-04-07 12:59:11 12 34 1808 0 1018 5504 425 158.50 24 62.84 CHANGED lGlDEhphp+.tpp..hhshht.h..ct...........pplltlhtsRsppslppahp.h.s.p..pppplctVshDM.ssatsslpphhPp.Ap..llhD+FHllchhsc.ulspl...Rpp.hpph.......tt..............................lKps.+all..LK........p..cpLspp.pttp............ht...hh....pt.s...pltpAYtlKpphtpha...p.p........p....httupp...hhppWhpth.......tstls.........hpchscolppahptIlshFc..............hoNGhlEGlNs+IKsl+RpuhGa+shcphpt+l ...........................................................................................................................................................................................................................................................th.......hht...th.............................................................................h..ht....hhhh..............................t........ht....t...............................................h.....hh.............t.......h..hh.h....ht....hh.hh.....p.t...................................................h.t....h..tp.h..................h...................htphhtt.h....p...ht..l.h.shht......................ho.Nu.hEu.hNthl..+.hc..p.shGhts.t.hh.......................................................................................................................................... 0 276 676 802 +4696 PF01710 HTH_Tnp_IS630 Transposase_14; Transposase Bashton M, Bateman A anon Pfam-B_1769 (release 4.1) Domain Transposase proteins are necessary for efficient DNA transposition. This family includes insertion sequences from Synechocystis PCC 6803 three of which are characterised as homologous to bacterial IS5- and IS4- and to several members of the IS630-Tc1-mariner superfamily [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.57 0.71 -4.44 9 1697 2012-10-04 14:01:12 2003-04-07 12:59:11 11 12 366 0 262 1502 90 102.50 39 76.09 CHANGED MAYSlDLRpKVlsalEsGGuITEAS+lFpluRsTIYpWLp.+Echush+spsRpp...KlDp-cL+pclcsNPDhhLpEhA+cFGV..pPuolpYthK+MtlTRKKpshh.cpt..+.sphppshs .........................................YShDhRp.KV.l.s.h.h.E.c.s.t.o.h.oE.AS....c....l.......F.......p.......l......S...R.sT.Ia....tW...Lc....hK.......c...c...T........G....-....h.....p.....p.......p....s.....+....t....pp.......tK.l.....D......h...c...c...L...K...s.a..l....p.....c...p......P.......D........s...hl...p......E......l......A......p......c......a......s......s......pss..o..lp.h..ALKt.hGhT.p.KKp..........................ph....................................................................... 0 52 103 234 +4697 PF01797 Y1_Tnp Transposase_17; Transposase IS200 like Bashton M, Bateman A anon Pfam-B_1347 (release 4.2) Family Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases for IS200 from E. coli. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.40 0.71 -4.25 178 5614 2012-10-02 12:35:40 2003-04-07 12:59:11 11 33 2186 40 1185 4727 578 98.60 26 68.64 CHANGED hhphpaHlV...assKYR+pl.lssp.ltpclccll.pplspphphcllp...hsstsDHVHlLlphsPph.ulScllptlKutSS+hlp..pcasphhpphhh......h..WspuYassosG..ss.shcslccYIcsQ .......................................................................................h.....hahh...hh.sp..Rp..th..h.tp...h...t.........pthtp.hh.tp.h.s.p.....h.........t.....h.p.lht..........h.p...h.....s.DHlH..h.L.l.......p...........h..........s.........s.....p...........h.......s............lo......p.......h....h...th.lKu.t.o...u.h....h....lh............pp.....h...........p..h.....t.....h....+hh..................tth...W.s.p.u..Ya.sp...oss...t....s.thlt.p.YIpt.......................................................................... 0 388 819 1034 +4698 PF01385 OrfB_IS605 Transposase_2; Probable transposase Bateman A anon Pfam-B_1210 (release 3.0) & Pfam-B_4602 (Release 7.5) Family This family includes IS891 [1], IS1136 [2] and IS1341 [3]. DUF1225, Pfam:PF06774, has now been merged into this family. 19.00 19.00 19.00 19.00 18.90 18.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.55 0.70 -4.96 39 7021 2009-01-15 18:05:59 2003-04-07 12:59:11 14 26 1439 0 1819 6621 252 194.50 20 55.24 CHANGED ppYp.plsutssQpslccsspAacSFFshhcthcpt........sphP+Yppc....pt.........phhlhhppsphp....hcpsplplsh...............hplp.hcsphphchc.....ps+lhhhhpth....spahsplsh-.hpt......................spspssptsuIDlGlssLsslssspst.......hlhpu.+.ltut.phhs+phuclppp........hptpt........p+ssc+lp+LapKpspphcchlcphsppllpphhph..slpslslGthpthp ....................................................................................................................................th.s..shptsht.ph.....pu....a.pp.ahpth............................................thP..p....a+pc.......t.............................t.....h......p.t...t..t..hp................ppt.....t..l.hlsh............................................h.t.......h..p..h.....t...p.p.p.h..t....................thp.t..h...h.l.tpp..............spaal..s..l....hc...hp.....................................h...t.s....s.p.h....l...G.lD..l.....Gl..p..p..h.s..s..h...o..s..u.p..............................h..p...s....p...............h..p....p....h.....p...p...c..h....t....+..h..p..+..p.l..s....++......................................hptupp..............hp+tptc......l....t.+.....l...a.......p.......+ls....sh+p........Dhh....+..+l..oppl..sp.p..h.t..hh.....sl.EsLthtsh....pp............................................................................ 1 529 1255 1605 +4699 PF02992 Transposase_21 Transposase family tnp2 Griffiths-Jones SR anon Pfam-B_1531 (release 6.4) Family \N 25.40 25.40 25.40 25.40 25.00 25.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.29 0.70 -5.26 49 1289 2009-09-13 07:43:22 2003-04-07 12:59:11 9 104 66 0 750 1419 1 149.60 32 20.18 CHANGED chhRWHtEp+p..pDGh.hRHPuDupsW+phDcpaP.-FAs-sRNlRluLuoDGhNPFu..uspaShWPVllhsYNLPPhhCMKpp.hhLolLIsGPppPGssIDVaLpPLl--LppLWp.pGVcsaDs.ppcpFsLRAhLlaTIsDaPAhu.LSGhss+Gph.ACshChcpTtuhhLcpupKhsahs.HRRFLPhsH.aRppcptFcss..hEppssPp.h...oGcclhpplcslp ................................................p.............................................................................................................................................................hh.G....P.ppP.s...p....s.....l......s..s..aLpP......llc-lt.L.ap....tGh............h..........h........c.t..t.p......p....h...l.+shlh.hshsDhPAht..t.l.u.G..........p.u.h..h.....uC..h.C.p.h..h.......h.....tt......+.........a.t.......h........................................................................................................................ 0 205 337 361 +4700 PF02994 Transposase_22 L1 transposable element Griffiths-Jones SR anon Pfam-B_2299 (release 6.4) Family \N 25.00 25.00 25.00 25.00 24.10 24.90 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.27 0.70 -5.96 3 1528 2009-01-15 18:05:59 2003-04-07 12:59:11 9 29 45 16 1329 351 40 221.10 32 80.72 CHANGED +GKRRNhoNRNQDapuSSEPsoPTSuSPusPNThENpDLDpKuYLhMMlEGlKKDspsSLREplEspuKElQpsLKEhcEsIsKQVEshpEcoEKohKElMEl.......................pKpl+ELKcEl-oIKKpp.EsTL-IEs.tK+pGslDhShoNRIQEMEERISGAEDSIEpIsoTlK-NsKpKKlLsQNIQEIQDolRRPNLRIIGV-ESEDpQLKGPsNIFsKIIEENFPNLK+EhslsIQEAYRTPNRLDQKRNTSRHIIVRToNApNKERILKAVREKGQVTYKGKPIRITPDFSPETMKARRuWTDVIQTLREHKhQPRLLYPAKLSIIIEGETKlFHDKTKFpcYLSTNPALQRIIKE..KsQ+KNuspsLEcsR+ ............................................................................................t..........................................................................................................................................................................................................................................................................................................................................................................................................t.....l.......s...s.Rh.pph....E...-.+.IS.p.lE.......-.t.................c.h.......h....p.......s...p...........c...p.......c..............t......c..............l..............+...p..............p.......p.p....lc...-.......Lpsp.h.......+..R...s...N.lRIIG...lPE..G.pc.....p..t...c.s..s....p.sl.h..pcIh.....t..E..N..hP...s...L....h.c...ch.c...lplp.c..A..p..R...s..s.sh.h..s.s...c...c...s...s.....P...R..p.Ill..K........hh....+.hpsKE+.....IL+s...ARp.+.....p.ls.....a.+..G......ps...Ip.l......sD..hSs.-s.hptR.R.ca.pslh+.L+c+.sl.p.p...l...hYPu....+lp.hphp..G...c.hp.....F.s.t...h........................................................................................... 0 10 330 417 +4701 PF03017 Transposase_23 TNP1/EN/SPM transposase Griffiths-Jones SR anon Pfam-B_1491 (release 6.4) Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.23 0.72 -4.46 33 134 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 10 0 57 124 0 67.50 30 18.45 CHANGED ppss..KChllDWssscc.lVuEGchpSs-P...pphV.splPLGPsAspVhVcslh.s-AhlWRPssplhhlt-ulGs ......................hhsh.t.pp.hVAcuplhStsP...pphV.sshsLG.phscVlVcsV...h.p......p-AhL.RPhsplphhtDAl................ 0 0 41 49 +4702 PF03004 Transposase_24 Plant transposase (Ptta/En/Spm family) Bateman A anon Pfam-B_1902 (release 6.4) Family Transposase proteins are necessary for efficient DNA transposition. This family includes various plant transposases from the Ptta and En/Spm families. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.80 0.71 -4.30 40 501 2009-01-15 18:05:59 2003-04-07 12:59:11 9 46 31 0 204 486 0 114.70 17 21.72 CHANGED sschWpshl.....paWpoccuccpScpsppsR......uhuh....hhHpuGppSatpltcchcpch.......scpsshh-lahcTHp.+sDGo......alcp+ucplhcphppplpcp.sphsstsst.s..................phscla...........................hpssutcc+.GRha..GlGuhtpsh ..............................................t..t.Wt.hh.....th.Wt.s...c.hpthStpsptsR.......ttht.......hhHpsGo.....coas......thtcp..hptc.....................sp.ss.hh.......clahps+..p..p..psGp...........stt.s.pp..h..hp...t...........hpp.htp.....t.............................................................................................................................t.................................................................................................... 0 4 77 124 +4703 PF03050 DDE_Tnp_IS66 Transposase_25; Transposase IS66 family Bateman A anon Pfam-B_2526 (release 6.4) Family Transposase proteins are necessary for efficient DNA transposition. This family includes IS66 from Agrobacterium tumefaciens [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.98 0.70 -5.18 126 5890 2012-10-03 01:22:09 2003-04-07 12:59:11 9 35 1314 0 841 6700 1022 192.80 29 59.76 CHANGED +uhsusuLlApllssKasppLPLYRQpphhsct.Gl.plsRuTlusWhtpsuph.LpP.lhstlpptlh.psshlasDETslplLt......tp...sc.sp.pualWshssst................hlhapass...sRu.upts.pph.....Lts....ap......hLpsDuYuuYsp.lh...........slpc..ssChAHsRR+Fh-shpt..................shuspuLpt.IspLYt.......lEpch.......p................s.....-.......pRtthRpppupPllsphcpWhp.....tph........tplsspotlucAlpYhl...sphssLhpalcDGcl.plDNNhuERulRshslGRKNa.LFusotpGuct ..........................................................................h...h.....s....p...t.htt....G...h....lstt..hst.h.h..........ht.....ht...h.....t.....h....t....lh...........h..l..tsD-ssh.....................t.tp..sh.h.Whhhtst.....................hhha...h..t..t+t..t............h.........h...............ht...u........hl...sDt....ht....sat...h.....................h........hChs...H.h.....p..Rt..h...ph....................................ht.hlt.........htt....ha.t.................................hc.t...th.t.....................................p...................ttht.R.p...........lh.t.t.h..t.hh...................................................ph....t.s...h.tp.u.h...t.Yhh....pph.t.h...ahps..u.h..hs...NNhsEpslR.hsls...++s.hh.ts..us.......................................................................................................................................................................... 0 197 489 640 +4704 PF03400 DDE_Tnp_IS1 Transposase_27; IS1 transposase Mifsud W anon Pfam-B_2448 (release 6.6) Family Transposase proteins are necessary for efficient DNA transposition. This family represents bacterial IS1 transposases. 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.65 0.71 -4.04 2 3369 2012-10-03 01:22:09 2003-04-07 12:59:11 8 11 576 0 195 2345 250 104.30 60 82.31 CHANGED hllC.EhDEQWuaVGuKuRQ+WLaYAYsphptsVlAasFG.RT.tThtcLhuLLoPFslshhhoDsWs.Ytpcl.tchHlhtKhaTQRIERpNLsLRp+ltRLuRKolsFS+SVElH-KVIGpalph+ha. .............................................................h..DEpWuaV....G.uK....u....R..Q+W.LaYAY.c........php.c.sV.l..AaVF.GcRT........hsTlt.........+L.h.u.....L..L....o.s..F.....c..V.s...l..aMTDGW.PLYESR..L........K.GK..L..H..V..I...S...K.R..Y..T..Q..R...IE.RHNLNLR......QH...LARL...u...R.KoLSFSKSVEL...HDK.VIG.aalph+aa.......................................... 0 49 122 175 +4705 PF04195 Transposase_28 Putative gypsy type transposon Bateman A anon Pfam-B_1755 (release 7.3) Family This family of plant genes are thought to be related to gypsy type transposons. 20.10 20.10 20.20 20.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.09 0.71 -4.99 2 725 2009-01-15 18:05:59 2003-04-07 12:59:11 7 21 14 0 419 710 1 151.90 37 20.83 CHANGED MActptsh-uphhPsshh.pphpthls+chhPtpthht.hsAhGEuhPTPch..csVhF.pFhhsGhs.Ph.pFFhsILEFYslphtHLsPNulhhlA.FhHhCEhFlGlRPphhLFphhFhlp....hS.PhVVGGshFQ.RGpl.p+Yhshsh+pp.csW+usWFYs..s-.A...LPp.s. ......................................................................tph....p.hls+thh...P..tp..t..hh..t.h.h...h.G.E..u.h.PsPph....csV.hF.sFhh..uGhh.....PhSpFFh..s.lLpFYslphtHLsPNulhplAIFsHlCEhFlG.l...c.PphpLFRhhF..hlps.........hs..s.....s.........V.........s.........G.....u.......sh.Fp.R.s.sl....p..+...Y.h.sh.s...h.+pp..c...s.W+upWFYh.......sp..s........................................................ 1 0 19 24 +4706 PF04693 DDE_Tnp_2 Transposase_29; Archaeal putative transposase ISC1217 Mifsud W anon Pfam-B_5730 (release 7.5) Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.09 0.70 -5.43 3 202 2012-10-03 01:22:09 2003-04-07 12:59:11 7 3 27 0 42 452 37 154.30 27 83.72 CHANGED MsKEL..sRpEYYKALccAlsplhhuMTGlRKDVAsRLlLGuVlGGsAT..EIAQss-MDYETVLKNLDKLANs..cLIElVKKlVtDHPVlLIIDDTHDHKLYARAhPV.SRNGsQhFYCRsHKRFEPAIQLLlIAlKDLssNcoYlIsIIPYIPRKVtE.LKcRGEcuEFKTKI-hhLEhLsoLhscaNVsslVFDSWYVNSKTLpGNTVGELKSNuRVVE...........G-RHVPVuEFPpGEYLVEYL..GTPIKLLVIDsYKchGRRYFFSTDLNDTsEDIITTWENRWDIEVLIRELKALGLEcSSFLTWlRNpGFlsLKALSLLlVpsFKYSLGL+LG ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 23 23 41 +4707 PF04740 LXG LXG domain of WXG superfamily Zhang D, Mifsud W, Aravind L anon Pfam-B_3568 (release 7.5) Family This domain is present is the N-terminal region of a group of polymorphic toxin proteins in bacteria. It is predicted to use Type VII secretion pathway to mediate export of bacterial toxins [1]. 22.30 22.30 22.40 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.12 0.71 -4.53 35 970 2012-10-01 21:44:22 2003-04-07 12:59:11 7 41 392 0 96 785 2 186.40 20 39.12 CHANGED KplDspplhstl-pptpphcphppplpslcpulpslssLcs..LpG+uuculKsaapshahPlhpthhphl-phpphL.pplpstlpsh-stssuhI-psFLcpEL..ppuls+scphhpphpppl..sshhsslsDllpls.hsppshppplppApcchpcsl-+LtshDpptsshhspscsthptlpphlppLpshh.ssut........pssuYp ....................................th.t..pph.t..pphppphpslp...p.ulpp...h...hssss...L.pGcuhsusKsaappshhPlhps...hhphh-thppth...tphl.p.p.apupV.-.s...s..p.shlc.p.shLc....-l.........pplsp..pp.h..pshp...p.p.h........pp....h..h....s.s...t..p................h..s......p...ph....hp.......ph....tptp...+.clpcpLc...+Ltp...F.s...p.pp.s.p...ap.ph.p......ph.p.lppslpplpshh....................s...................................................... 0 29 53 79 +4708 PF04754 Transposase_31 Putative transposase, YhgA-like Mifsud W anon Pfam-B_3820 (release 7.5) Family This family of putative transposases includes the YhgA sequence from Escherichia coli (Swiss:P31667) and several prokaryotic homologues. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.06 0.71 -4.96 54 3243 2012-10-11 20:44:43 2003-04-07 12:59:11 7 7 798 0 393 1853 67 186.50 44 64.84 CHANGED sPHDulFKphhspspsA+-FLchaLPspltplsDLsoLclEssSFl--sL+ppaSDlLaSlcspp............t.cG.....YlYlLlEHQSpsDphMAaRLh+Yslshhp.......+H.hct..........scpp.LPlVlPllFYHGpps.ashshsah-hFs.....sstlAcplhstsa..LlDloshsD-EIhp++p....huhLpLl.KHI+.pR.Dlhchlcplsplltphhpscpplp .............................................................................pPHDAlFKpFLtp.s.-.sARDFlp.....lHL..P..t...l..+..plCD.......L....pT....L....+..L.Es.s...S..F.l.........-....c..s...L.....+.....t.....h.a.SDlL.aS...lcTpp.............G...sG.....Yl..Y.s..l..I.EHQSps-.phMAF..R.........hM.....R.....YuhAA..MQ..............+H..L.-t...............................sa.c.p......LPLVlP....l......LFYHGppo..PY..P....a.S.h..s..W...h.DtFs........sPt.lA+pl.Y.s..p..sFP...LVDlT.lh..PD-..E..Ih..pHR+.........hAlLE.L.l..Q.K..H...IR.pR..DLht.ll.-pl.ssLLsps.ssspQ.p.................................................................................................................................. 1 151 264 314 +4709 PF04986 Y2_Tnp Transposase_32; Putative transposase Moxon SJ anon Pfam-B_5271 (release 7.6) Family Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases IS1294 and IS801. This is a rolling-circle transposase. 20.70 20.70 20.70 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.12 0.71 -4.54 65 1246 2012-10-02 12:35:40 2003-04-07 12:59:11 8 16 499 0 183 1121 344 156.80 27 48.81 CHANGED GhhuVLHTaGps.LsaHPHlHhllsuGGlstss.....pWtp....sp.t.ahhss+sLuphaRsphlptLppth.t..h........h.t.tthsthltphhpc................................................pWsVhsp.shs..pscssLpYLuRYhpRssIospRlhphs......sspVsF+a+Dhc...............sscpcphsLsstEFlcRhlhHVL.PcGF++lRaYGhLu.....ttcppp ...................................................................................................................Ghhshlppa.Gpt.hphpsHhHh.h.sGshsptt.........................hhh.hch....tp..hhphh.phl.pt.h.th.................htpttt.p.hh.pthtc..................................................tasl.sthhht....ssp....psh....tYLuRYlp+sslutpRLtt.hs.......tspltaphps.p...............ppppphhhhss.-FltRhhhHls...thp.hhRaaGhhu.......pt........................................................................ 0 73 119 151 +4710 PF01498 HTH_Tnp_Tc3_2 Transposase_5; Transposase Bashton M, Bateman A anon Pfam-B_462 (release 4.0) Family Transposase proteins are necessary for efficient DNA transposition. This family includes the amino-terminal region of Tc1, Tc1A, Tc1B and Tc2B transposases of C.elegans. The region encompasses the specific DNA binding and second DNA recognition domains as well as an amino-terminal region of the catalytic domain of Tc3 as described in [1]. Tc3 is a member of the Tc1/mariner family of transposable elements. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.15 0.72 -3.92 26 1241 2012-10-04 14:01:12 2003-04-07 12:59:11 13 82 129 7 1065 1248 10 65.70 24 22.63 CHANGED cRpIlphlccsPp...hohpcLtpphtp..GhslSppTlp+pL+ptGhpu.pps+++Ph.Lotcpt+sRLpFAppHls ...............................................................ostclttpl...............s.h.slStpTlp+hL.+p.t....G.h..tu..p...ht.h+.K.P........h..Lo......t....pppct.RLtaAptH............................. 0 570 817 1048 +4711 PF01527 HTH_Tnp_1 Transposase_8; Transposase Bashton M, Bateman A anon Pfam-B_527 (release 4.0) Family Transposase proteins are necessary for efficient DNA transposition. This family consists of various E. coli insertion elements and other bacterial transposases some of which are members of the IS3 family. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.29 0.72 -4.02 45 12930 2012-10-04 14:01:12 2003-04-07 12:59:11 15 69 2688 2 2404 10672 1730 75.20 20 64.00 CHANGED hpp.pRaocEhKtplVcps...csGtslsclu+chGl.ssssLapW++ph..........suhtssssp....................h.hshpp-spcLc+hhsc ................................tpppa.otE...h...K.h.......p...h...l.p................s.........h........c........s.......s........h........s........l..s.......p.........lA..c....c....h.....Gl..s.s.s.....s.l.hp.W.h+.php................ts.t.s.s..tp...........................................h.t..ptc.tpLpt....t.................................................................................................. 0 628 1441 1931 +4712 PF03221 HTH_Tnp_Tc5 Transposase_Tc5; Tc5 transposase DNA-binding domain Mifsud W anon Pfam-B_2955 (release 6.5) Domain \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.26 0.72 -4.16 194 2088 2012-10-04 14:01:12 2003-04-07 12:59:11 11 91 246 2 1635 1984 4 63.10 21 13.60 CHANGED hsph-csLhpWlpph...pppshslosphlpppApp.lh................ttsshpsSps.....Wlp+FhpRa..slptpp ...................................t.EctLhpWlhph......pppG....hs.......s.otphlpppApp.lh...........................tts.shps.ups.......Wlp+..FhpRa...tlt...h....................... 0 469 953 1437 +4713 PF02281 Dimer_Tnp_Tn5 Transposase_Tn5; Transposase Tn5 dimerisation domain Mian N, Bateman A anon Pfam-B_5683 (release 5.2) Domain Transposons are mobile DNA sequences capable of replication and insertion into the chromosome. Typically transposons code for the transposase enzyme, which catalyses insertion, found between terminal inverted repeats. Tn5 has a unique method of self- regulation in which a truncated version of the transposase enzyme acts as an inhibitor [1]. The catalytic domain of the Tn5 transposon is found in Pfam:PF01609. This domain mediates dimerisation in the known structure. 21.40 21.40 21.40 21.40 20.70 21.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.43 0.72 -3.93 2 403 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 154 6 49 283 17 97.40 52 31.63 CHANGED EuhTlP.hLRtpGLlcEActVEuQSs-TVLp.DEhplLhhhsKs+tc+tptAsSLpWAY.uIA+LGGFhDoKRTGIASWsAlWEGWpsLQS+lsGahsAK-hhApG.pl .............hhlssplhs.........R.hGl.pE.....sps-SCEKILTPoEWKLL.W.l+lcGK.....LP..sQhP.TLK.........WAsLpLAKLGtWHDS.KRTGcPGWsVhWDGWFRLQDMlEGYhlhKSL.......Dp-..................... 0 11 26 31 +4714 PF00576 Transthyretin HIUase/Transthyretin family Bateman A, Percudani R anon Bateman A Domain This family includes transthyretin that is a thyroid hormone-binding protein that transports thyroxine from the bloodstream to the brain. However, most of the sequences listed in this family do not bind thyroid hormones. They are actually enzymes of the purine catabolism that catalyse the conversion of 5-hydroxyisourate (HIU) to OHCU [2,3]. HIU hydrolysis is the original function of the family and is conserved from bacteria to mammals; transthyretins arose by gene duplications in the vertebrate lineage [4]. HIUases are distinguished in the alignment from the conserved C-terminal YRGS sequence. 25.30 25.30 25.30 25.80 24.70 25.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.54 0.71 -4.18 26 1803 2012-10-02 19:08:27 2003-04-07 12:59:11 16 7 1506 439 545 1303 706 109.50 40 82.21 CHANGED ssLoTHVLDsupGpPAuulplcL..........a+hs...ssshp.lsostTspDGRh..tsLlsspphtsGhYcLpFcsucYaputGh................ssFh-hlsltFsls-ss..pHYHVPLLLSPauYSTYRG .............................................s..LosHlLDpspGpPA...........ssVpVpL...............................ch...s........sssh....p..l.s.s...uhTspDGRl............ts..........l.........h.................s...........t..............p.................s..........h..............s.....sG....................t..................Y..+lhF.csucYF...cpp.sh...................psFas..pls......lpFpls...css.......pHYHVPLL.LSP.auYSTYRG.................. 0 132 273 418 +4715 PF03896 TRAP_alpha TRAP-alpha; Translocon-associated protein (TRAP), alpha subunit Finn RD anon DOMO:DM07004; Family The alpha-subunit of the TRAP complex (TRAP alpha) is a single-spanning membrane protein of the endoplasmic reticulum (ER) which is found in proximity of nascent polypeptide chains translocating across the membrane [1]. 23.10 23.10 23.10 23.40 22.90 23.00 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.92 0.70 -5.41 11 349 2009-09-11 14:06:22 2003-04-07 12:59:11 11 8 203 0 190 299 3 225.90 31 89.23 CHANGED M+.h.plLLLhLLuFPusLl.hutspu.hssA..p-...--Es........-D.lsp---DpA.VE-Dp..t.oEt-E---.....tplpuSPcADTTILFVKs........c-FPAsplVKFLVGFTNKGs.cDFlVESlEASFRYPQDapaYIQNFTA..........LPhNslVpPpcpATF-YSFlPuEsh...............uGRPFGLVIsLNY+DusGNsFQ-AVFNQTVsIsEp--GLDGEThFhYlFLuGLulLllluhaQhLpSppR+Rs....tthVEhGTuups-VDhsWIPpETLspl.....sK....uSP+............pSPRpRptKRuAGsD ....................................................................................................................................................................................................................................................................................t..............tsssts.cshhhFsps...........p.hsusp.schLl.uhpNc..G...p....p.shhVpslp....u...Sh....+....hP.t...D.......a.......phhlQNaTA..........h..h.s.t...V.s..tppuThpYsFhs.sc.h...............t..spsasLsl.sls.Yp.Dh..s.....G..p.h.ap.sss.aNpTVsllE.t.p.su.l.DsEo...lFhY..hh.L.s.u.l..s.l.h.s......lh..hht.........p......h.....h.t..t....h...p+p+.......................h.plE.....h..........G.....T...s......s.....t..p.s........h...D..pWl...Ptcp..l.p..................................................................................................................................... 0 55 83 145 +4716 PF04051 TRAPP TRAPP_Bet3; Transport protein particle (TRAPP) component Wood V, Finn RD, Mistry J anon Pfam-B_9946 (release 7.3) & Pfam-B_6495 (release 8.0) Family TRAPP plays a key role in the targeting and/or fusion of ER-to-Golgi transport vesicles with their acceptor compartment. TRAPP is a large multimeric protein that contains at least 10 subunits. This family contains many TRAPP family proteins. The Bet3 subunit is one of the better characterised TRAPP proteins and has a dimeric structure [2] with hydrophobic channels. The channel entrances are located on a putative membrane-interacting surface that is distinctively flat, wide and decorated with positively charged residues. Bet3 is proposed to localise TRAPP to the Golgi [2]. 20.70 20.70 20.80 20.70 18.80 20.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.99 0.71 -4.69 84 1130 2009-01-15 18:05:59 2003-04-07 12:59:11 11 22 333 34 772 1043 17 154.70 25 76.72 CHANGED hFslhauplVs.hhc.............shp-lp.........................................................................ppLcphGaplG......hRLlE..................................chhh+..............................................................tt.tRppchhcshchlspsha+hhauppsss...........................lppspt.....................tpah.lh-p.s.l...............................spal.p.s.....ttht.pL.....................passhhsGll+GuLpshthsspVshpps.h...........h.stsshpI+hph.p ...............................................................................................................................hshhasplVs..hhp......t........sht-lp...................................................................................................ppL-ph.............GaplG......h+LlEphht+..................................................................................................p...tRhpchh-hhchIspshW.ph.has+p..s...c..s.................................lppsp....p...........................spah..lh-pp..l....................................................spal.phs..............p..t.t..tl........................................asshhsGll+GuLpt...l..sht.s.......p.Vshp.s.h..............s..ssh.lcht..h..................................... 0 266 420 622 +4717 PF04956 TrbC TrbC/VIRB2 family Moxon SJ, Bateman A anon Pfam-B_5261 (release 7.6) & Pfam-B_14627 (release 10.0) Family Conjugal transfer protein, TrbC has been identified as a subunit of the pilus precursor in bacteria. The protein undergoes three processing steps before gaining its mature cyclic structure[1]. This family also contains several VIRB2 type IV secretion proteins. The virB2 gene encodes a putative type IV secretion system and is known to be a pathogenicity factor in Bartonella species [2]. 28.40 28.40 28.40 28.40 28.30 28.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.14 0.72 -3.99 114 1258 2009-09-11 06:06:03 2003-04-07 12:59:11 8 2 765 0 305 1048 45 96.80 19 86.80 CHANGED Mp.................hhthhhhhhhshhhhhhhspsAhApss..........sshpsslpslhshlpG.luhsluslsllssGhthhhGphs....hpphhhlllGlslhhuAsplsshl ..........................hhh................hhhh.hhh.hh.h.h.hs..ss.AhApsss................tshpsslpslhs.lpG.luhsluslsllssGh.thhauptp........hppll.h.lllGhs....lh.h.uAsplssh................ 0 47 166 228 +4719 PF03743 TrbI Bacterial conjugation TrbI-like protein Finn RD anon Pfam-B_776 (release 7.0) Family Although not essential for conjugation, the TrbI protein greatly increase the conjugational efficiency [1]. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.83 0.71 -4.80 149 1918 2009-01-15 18:05:59 2003-04-07 12:59:11 9 40 1082 20 412 1681 119 183.10 23 39.41 CHANGED hhl.tGohIsusLhTulsSsh........PG.l.hspVopsla.ossG....ptlLlspGopllGpY.pus...ls.GpsRlhlhWsRllhP.s.....Gts.lsl...stsusDth.GtuGlpGp.VssHahphauuAhlhollus....ssphh.............sssssssssssh...............hhpsssp.shsphupphlcpshslt.PTlplptGph.lslhVs+Dlsh ....................................................................................................................hlstGohIsssLh.oulsosh.................sG...l.hsplspsla..sssG....phlL.l...sp...Go....plhGph...pus.....ls.u...p.s..R..lhlhapch.l.hs..s.........Ghs..lsl...............stsuss.........h..G...tuG..........lpGp.V....s.s...+h.h.phh...usAh.hhuhlss.hsphh..........................................t...tt..psstsspsth.....................................................................................thtpusup..uhsphup.p...hlcp..sh...sl...Ps.lhlptGpt.lslhlscDl.h....................................... 2 81 230 319 +4720 PF04610 TrbL TrbL/VirB6 plasmid conjugal transfer protein Mifsud W, Bateman A anon Pfam-B_5275 (release 7.5) & COG3704 Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.67 0.70 -4.65 129 1847 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1036 0 437 1663 116 208.00 14 43.80 CHANGED lslhlhlhGhhhh...hst.hptsh.tchltchltlul.hshlhts.....hs...sasshlhsshss.hstth...uu.........sstsssshtthspl..h.stuhshsptl.........hppssshh............hhhhhhth...lhhlsshlhhhls....uh.lhlshlthtllhhhGPlhlshhla.shTpphhppWlsplluhulhllllsllhul....shshhs.shhssh...................ssspsslppshshllhuls...hhhlhhtsPslA....uul .....................................................................................................................................hl.hhh.h.sh.hh.....ht.....t.....sh....phh.ht.....hhh.hsh...hhhhh.s.......hs.........h....phl.h....p.h...t.htt.h...........st..............................ts.h...sth.........h...thsht..h..ht.h..................................................hpphsth.........................hhhhh.hhh.....lhh..l.sh.h....l.hhhhh........shhh.llshl........thhlhh.hh...u.Plhlsh..hh.a.shT.+p.h..hp..s.alp..tllu..h...sl...p...h.hllslllu.l.........hh.shhs..phhsth........................................tts.h....tt....h..h.....hh...hhslh.....hhhlhh.h.s.hht.................................................................................... 1 96 236 316 +4721 PF03461 TRCF TRCF domain Dlakic M anon Dlakic M Domain \N 25.00 25.00 26.00 25.20 24.40 24.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.01 0.72 -4.05 117 4305 2009-09-10 17:08:24 2003-04-07 12:59:11 10 10 4219 3 926 3507 1246 100.20 30 8.89 CHANGED cL....slsAhlP-sYls-.p......pRlplY++luss....pspc-lpclppELhDRFGshPpplpsLlplscl+hhApphslpplptpppp.l....hlpa.sppsshshpthhhhhp .......pLplsAhlP--YI.s.D.p......pRlphYKRluss.............csp..p....-lc-lpsELlDRFG.......hP.....c....sppLLplspl+hhApphGlpc.l....c.....tpsps...l.....tlpF..sppsphp.........hh.............................. 0 316 613 786 +4722 PF03546 Treacle treacle; Treacher Collins syndrome protein Treacle Griffiths-Jones SR anon PRINTS Family \N 21.10 21.10 21.70 21.50 19.50 20.60 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.24 0.70 -5.71 4 216 2009-09-11 10:35:01 2003-04-07 12:59:11 9 8 25 0 59 246 0 175.30 26 51.50 CHANGED sPAPPGKsGPuAsQApstKPEEDS-SSSE.-SDSE-EsPAshsssQsKPSGKsPQVKuASssupps..spKGsPPVsPGKAGPsAsQA......tc.pspSSpcppssptEsPsAh..TpS.spsKP.tpsSQVRssSsss.Gs.........PtKstssA..sQsGKp.EDS-SSSEEESDS-s.....suuPAQAKSSGKl.Qh+sASGPsKtPPQKAGPsATQVKs-+uc-DSESSEEpSDSE-..EAPsAhosAQAKsAlKhsQhKASP+KGTPhossuA+ssPVpVGTsAPpKAuAVoSPssuSSPAlA+GTQ+PsEDSSSSEESESEEE.TAPAsstGQuKSlGKGLpVKAASsssKts.GQGTAPs.PGKsGPuss.VKAEsQED.SESSEE-SsS...EEAAAsPAQVKTuVKpPQuKANsusTRsssAKushSAPGKsVsAssQhK.tSPAKsKPPsRs.QsSsVSsRGQsSVPAVGKAsAsAAQAQsGPVtsspEDSE.SSEEESDSEt..EsPsQsKPSGKTPQVRsASAPuKt .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 6 6 11 +4723 PF00088 Trefoil trefoil; Trefoil (P-type) domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 21.00 21.00 21.10 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.01 0.72 -4.32 86 900 2009-01-15 18:05:59 2003-04-07 12:59:11 13 54 128 38 475 805 8 43.30 33 8.06 CHANGED pCs.l.......ssppRlsCGhss...IopspCp.p+GCCacs...........p..........sssaCFas ......................C...h...ssppRhsCusss......lo...p..ppCp..s........+G......CCass.....................t.................ssPhCaas............. 0 154 186 297 +4724 PF01204 Trehalase Trehalase Finn RD, Bateman A, Wood V, Studholme DJ, Moxon SJ anon Prosite Family Trehalase (EC:3.2.1.28) is known to recycle trehalose to glucose. Trehalose is a physiological hallmark of heat-shock response in yeast and protects of proteins and membranes against a variety of stresses. This family is found in conjunction with Pfam:PF07492 in fungi. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.72 0.70 -5.86 20 2673 2012-10-03 02:33:51 2003-04-07 12:59:11 13 12 1263 20 654 2119 179 396.50 30 72.24 CHANGED +hasDpKpFlDhshhs-..ssplhpthpsph.cs.st..sspsLppalc-sFpts.t.h............th.sPtstp-p.c.ah.h..pcssLRh.sppLsphWss..LsRplptpstths.thollshPp.........PaVVPGGRFpElYYWDSYahhLGLLtSsph-............hu+uMl-NFhahIc+YG+I.NGsRoYYLsRSQPPhLohMlhthhc+h..........ss-thh+chLsslccEasaWhs.......ss+hssVsshu..sapLstYhsss-TPRsESht.DhthAp+hs......-tscs.hYp-L+uuAtSGaDaSoRWlc-..Gps.s.hsslpTssllPVDLNulLa+hEpsIA.....hFssthsp.............hpsushacppAcpR+puI-paLWs--sGhahDYDhppp............cpsshhoAoshaPLWsshusscp....st.hsspslsphppsuhLp...suGlusoslc...oucQWDaPNGWAPhQhlshpGLp+YG....cplAccLAhRWLtpsppuascp....GtllEKYDVsp.......suchG..GGGGEYssQ-GFGWoNGVhlhhLchas .................................................................................................................................................................................................................................h.......................................................l..hht..h...................................................................t.t....hp.....l..........h.....hh+..........................t.......................................p....h....st....................................sall.P....G....G..R..F.pE........h.Y....h...WDoYh.....hh................GLh.s.......t...h...p............hh..tshhtNhh..l..pthGh...l......Nu....sR.YY.....tRSQPPhh..uhMV..hhpt........................s.t.hh....p....p....hh...s....t.l....tt....a.s.a...Wht.................................s.t.....tp.h.s....t..h..................t..........h......Ls.p......Yhs...s..........-.......ss.p.s..-..S..h..h.sh..tsAt.p.s.........................ps.sthatp.l...c..s.s....t.ts..u......aD....h....o......S.R..W......h...tp.....s.............................................h...s...o.....l...............ps..........s......pl...l.......s.........l...D..L..N.....uh.hap.....cphlA.............phsthh..Gc..............................................tpps.ttacphApthp.ps.lp...phhWspp.pGh.ah.......D.achppc.....................................p.....p..p...h..h.s...s....s...u....h.hP..L.as.sh....Aspp..p......................s..p...h...h...s.p...h..t..p...t...p..h.lt.............PuGls.so...php........s.sp.Q.....W..D....t...Ps....u.....W....s.....P.h.pa..hu.....lpGh.p.p.YG...............................pphA.........c.l....utp....alpp........spp..s...a..................t................p..............p..................t..............t.lh.................E..KYs.st.....................................ss..t.....ss.hch.......p.....p...GF.GWoNushh.hlt...s...................................................................................................... 0 245 385 556 +4725 PF02358 Trehalose_PPase Trehalose-phosphatase Bashton M, Bateman A anon Pfam-B_762 (release 5.2) Family This family consist of trehalose-phosphatases EC:3.1.3.12 these enzyme catalyse the de-phosphorylation of trehalose-6-phosphate to trehalose and orthophosphate. The aligned region is present in trehalose-phosphatases and comprises the entire length of the protein it is also found in the C-terminus of trehalose-6-phosphate synthase EC:2.4.1.15 adjacent to the trehalose-6-phosphate synthase domain - Pfam:PF00982. It would appear that the two equivalent genes in the E. coli otsBA operon [2] otsA the trehalose-6-phosphate synthase and otsB trehalose-phosphatase (this family) have undergone gene fusion in most eukaryotes e.g. Swiss:P31688 and Swiss:P93653. Trehalose is a common disaccharide of bacteria, fungi and invertebrates that appears to play a major role in desiccation tolerance [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.31 0.70 -5.45 24 2499 2012-10-03 04:19:28 2003-04-07 12:59:11 11 26 1614 1 1046 2316 209 215.20 27 42.78 CHANGED hhDaDGTLsslspp.s.sshssschhssLppLuucss.hlhllSGRshp....phhsps.slsluupHGh.lRhssuppaps...................s.stshshhcplttlhpphscphsGuhlEpKcsulshHYRpAssc....pspphhpplcsshpsp.slclppG+pllElRPshss.KGtslctllpphs.t..............s..sphslslGDDhT.DEDhFcslppt.......tshshclhsss..stpsopAthhlpsss ......................................................................................................................................................hhDaDGTL........s......sls....sp...P......p.t.....s..h.s.s.s...p..hhpsLpp.L.........us.ps...ss..t...lullSGRshsp........l........c........t..h.....s...........t........h..............p......l....s....lA.GpHG.h-h....+..t.....s........s.....u.....p.....p..hhs.......................................................................................................t.h....s.h...s....h....h...p...p....l.t......t.....L...p....p..h........h........t......p....h...s.....G..sh.....l..E........p..K.t.hul...sh...HY...Rp......A..s........p..c..................t...t.p.....tl..h......pt...h....p.p..h..h..t.....p.h.....t.l.p......lp..t..G..+....p.....l.lE......l..+..Pt........sss..KGp....Al....ptlhpp.hsht.............................................sp.hslhlGD.DhT..DEsuFtslpph........................sh...s.h.h...lhtst...........o.A........s.............................................................................................................................. 0 327 667 904 +4726 PF03973 Triabin Triabin Finn RD anon Pfam-B_20829 (release 7.1) Family Triabin is a serine-protease inhibitor. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.90 0.71 -4.55 4 329 2012-10-03 08:47:39 2003-04-07 12:59:11 8 4 13 3 2 409 1 140.20 25 76.19 CHANGED Ap....s.hMP.Gs.p..tchhssFchpcFFs.scWYlTHucsss+PplCpcapsous.c.........clpas...htS-VpCpsspVpGtcG..aSFpCcsss.....pcFpu.hoVluTDYcNYALlhRCspa.pSshcDsaLVhpRpKpus.Puulpop ...............................................................................................ttt.................tshpsFsspcF....ap..u......pW...Y....VT..H....s....p.....p...s.....o....p.....s..s..l..C..+..p..a.p..s.s.pp...sup..h.hs....hp..a..s.h..s..t....ttt......p.......l....pC..p..s..p....t.............p..p...t....p....pt......h....sFsC........p.....s....ss...........................hph.....p..h.....h.s.llsTDYssYAlhY..RCsph....s...s...t........h.....p........D........N.h.LlLpRp.t..s.t............t......................... 0 1 2 2 +4727 PF02080 TrkA_C TrkA;TrkA-C; TrkA-C domain Mian N, Bateman A anon IPR000309 Domain This domain is often found next to the Pfam:PF02254 domain. The exact function of this domain is unknown. It has been suggested that it may bind an unidentified ligand [1]. The domain is predicted to adopt an all beta structure [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.04 0.72 -4.38 181 18298 2009-09-12 01:06:25 2003-04-07 12:59:11 16 80 4247 44 3835 11843 3990 69.60 20 21.79 CHANGED lhphplst.ss..lsG.cslp-.ht....ltpt.....shhllul..p....Rs......tthhhPss.sthlpsGDhlhlhuptps.lpp..htphhs ........................h...h.l.t.ss..hsG.+slp-..lp.........ltpt.........sshlsul....h...................Rs..............sphhh...Ps...s...sshlp.tGDhlh..l..h.ustpp..lpp.ltp............................... 0 1256 2566 3307 +4728 PF02254 TrkA_N KTN;TrkA-N; TrkA-N domain Bateman A anon Pfam-B_289 (Release 5.3) Domain This domain is found in a wide variety of proteins. These protein include potassium channels Swiss:P31069, phosphoesterases Swiss:Q59027, and various other transporters. This domain binds to NAD. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.38 0.71 -4.09 219 14934 2012-10-10 17:06:42 2003-04-07 12:59:11 13 58 4445 85 3537 12367 5545 116.30 21 31.04 CHANGED llllG.hGchGttlscpLtp.s.p...................llll-pst..pp..lpph..p.pph...h.........slh.GDuocppsLpps.slppsc..sllss...hs.......s-....psslhsshhs.+ph.st..hp...ll..A+spstpp.....tchl.pp..hG.....s..ctllsP ..........................................................hlllG..h.G.p.h.G..t.tlucp...Ltp.pt................................slhll....-....p...s..t.......pp.............lp...ph......p..pph......hp.....................slh..G.D..u.s..c.........c....l......L..c.....p...u....u....l........p.......p....s-.........hll..ss...ss...................sc......psshhsshhs..+p...hhst.........hp.........ll....u+s..psspt.........hph...l....pp.......hG.........h...s.hl................................................................................. 0 1086 2338 3026 +4729 PF02386 TrkH Cation transport protein Bashton M, Bateman A anon Pfam-B_529 (release 5.2) Family This family consists of various cation transport proteins (Trk) and V-type sodium ATP synthase subunit J or translocating ATPase J EC:3.6.1.34. These proteins are involved in active sodium up-take utilising ATP in the process. TrkH a member of the family Swiss:P76769 from E. coli is a hydrophobic membrane protein and determines the specificity and kinetics of cation transport by the TrK system in E. coli [3]. 19.60 19.60 20.00 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.98 0.70 -5.88 26 6295 2012-10-03 11:11:44 2003-04-07 12:59:11 11 18 3786 2 1475 4562 3549 309.50 23 66.68 CHANGED pluttphpuhpphhphlhhhhhhhchlutlhlslhhlhhhthtts..............................h..ha.AhapohSuhssuGhSlpssS.hssFss..shhlphlhshhhlhGu.huFslhhchhhhhh.t...........h..hhhahtsh.hcshh.Llhhuhhslhhhphhssh.................................htthshstplhssaFtssssRTAGFoshDluphssushlhhhlhMaIGussuoTuGGl+ssphthhltsh..........pthhpt..........................................thpstpattcplppphlh....cshhlhlhhhllslspshhL.hhtpss....hhshlFEssSAaGTVGLShGh...............ssshohs......uKlllIhhMlhGRlchlshllhhshhh. ........................................................................................................................................................................................................h.......t.hht.h.h....hh.thhh..phlu.t.hh.hs...h..h...h.l...h..h..h...h..h..h..t.s.............................................................................................ha.p.ulhpuh..Ssh.....ssuGF...s.hh......s......s.......S.....l...h.........ass........h.lphl.lshhlllGG.ls...F...s....l.hh...p..l..h..tt..t.t.............................................h.pha.h+.....s...hphhh....ll.hh...u...h..h..hl.h.h.ht.h.hthh.........................................................................................................s..h..s..h.t...s....h.h.h.s.s.a.h...s.s.s.s.pTuGF.s.o......h......D....h...s.....p.....h....s...........h..s..........h....lh.h.....hhM..................F..IGus..sGST...u...GG...........lKshp.hhllhhhh...............h.h.h.h..p..............................................................................................................th..s..hh.h.....s........+...tl....s....p....c....h..lp........psh.s.h.h.h..h........h........h.h.lh.....h..h........s.......h...h...........h........l..................h..........s.......s........................s....................................h.......shh.hp.s....hS................A.husl......G.......u...h....G..h......................ss..shssh...................uKhllhhhMhhGRlthholllhhh............................................ 0 480 956 1266 +4730 PF02005 TRM N2,N2-dimethylguanosine tRNA methyltransferase Enright A, Ouzounis C, Bateman A anon Enright A Family This enzyme EC:2.1.1.32 used S-AdoMet to methylate tRNA. The TRM1 gene of Saccharomyces cerevisiae is necessary for the N2,N2-dimethylguanosine modification of both mitochondrial and cytoplasmic tRNAs [1]. The enzyme is found in both eukaryotes and archaebacteria [2] 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.29 0.70 -5.54 10 846 2012-10-10 17:06:42 2003-04-07 12:59:11 11 9 491 7 551 1753 342 314.40 25 78.44 CHANGED Gcu+lplsct..ho........lsstssVFYNPtMcFNRDLoVsl........LsshspKhh................................................hplLDALSASGIRuIRaA..LEhsslcclaANDlsscAVEhI+cN......spLNsls.......-.hllhNpsDANhlMt...pppctFcsIDLDPFGSPuPFlDoAlpSlccp.GlLsVTATDsAsLCG.saPcsCh+KYs.AhsL+s-hCHEsGLRILlshlsptAAKYc+ulcPLLSaSpDHYhRVFV+l+cGst+uccshcphGalhaC.tC..........hp....cpssGhschp........scCtaCGschpluGPlWlGPL+DccFlschlctscs.....ttph..pc+lhtlLphlc-E..lDsPhaYshspluphlKlssPPhpcllsuLtphGFcsStTHhsPsuIKTsAPhcslh-lh+ ...............................................................................................................................................................................................................t.............................t.sFaNsh....t....h.....NR.Dlsh.hs............................t....h..t....p.......................................................................................................................................................................p.hL-uhuu.oGl........RulRas..hE............................h..........p....lh.h..ND..............hs.tu.hp..hptN...................hp..hNt.ht......................................h...p.........DAph...h..h............................p.ac.hlD...lD..P...a..G..o.....s.....ss.al-u.Ahp.s..l.tp....t.Gl.....lhlT.sTDhush...s.u.t..chs.hthY......u......u.................h....p..t...h...pE.....h.................ulRlllt.ltptAsphthhl.Pl..h.uh.h.ca..a..hRlhlclh.pu...t.....s.....c.....t.p.....hhh.C.....C......................................................................................................................................ts....s....s.t.....p....h...h.hGPha.sslts.taltph.ht...t.....................h................ph..t.hlph.h....p..........E.......t................s.h.aa.p..p.phsph...ht.h.t......h...hhtt..l.p.GapsohoHht..u.l+TsAs.t.hhth......................................................................... 0 165 299 437 +4731 PF00133 tRNA-synt_1 tRNA synthetases class I (I, L, M and V) Sonnhammer ELL anon Prosite Family Other tRNA synthetase sub-families are too dissimilar to be included. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 601 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -13.17 0.70 -6.52 15 21195 2012-10-02 18:00:56 2003-04-07 12:59:11 17 120 5058 69 6103 31264 24036 374.40 23 54.93 CHANGED shhphWpctsh.FctshptsKs.pts..FslhssPP.sTGplHhGHALspoLpDhllRhppMpGass.ahPGaDptGlssptpVE+KlutcptpshhchGpEcFhscshcattEassph+sphp+LGhhlDas+-hhTh-.phppuVhclFscLaccGLlYRGp+lVsWsssspTALS-hEV..pYKDsc................ushhalsasl......s-ups........plllhTTpP.Tl.ussAlAVpP-........................................-.+hpcLlGpphphPF.......hsRchPllsD.-aVch-tGTGsV+hsPAHs.sDYplGp+asL.....chlNsls-sGshs-ss..................scapGhchFcAcKtllcpLpEpGlLlchcshpHShPaC.RossPl.hhhosQWFV+hcs.......Lscsulcslc....clpaVPcp.tEpcahsWlcNhpDWCISRQhaWGp.IPsWhsc-ssElas....h.ph.tsp.ppcsppshhth.hhchl............phcpssDVLDsWFsSG.hPauslsaP.s.sst-acphaPsDhllpGhD.hhhWhtphlhhuhthpGps......PaKpVlsHGllhDupG+KMSKSlGNslDPh-llc.....................................paGADALRhaLh.sossupDlphS ..............................................................................................................................t..........................................................................................................................................................................................t..t.p..............-....t...................t...........................................u..c....u...........hs...h......................................................................................................................................................................................................................................................................................................................................................................................................................................................u...........t..h................................h.......................................o...................................................................................h................t..h............t............................hs...................sh........p..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.................DWslSR...Qh.h....WGh..lP........h........h........h............t.....p..................t.................................................................................................................................................................................................................................................................................................h..............p........-.h...h.....D.....s....a.....h...........p.........S...........s........h...............h............t...........h.......h...........t.............................................................................................t...............................................h......................p...........................................h..........P........s...s......h....h.....h.t.....G...-..............h.......h....h......a......h...................h........h............s......h...............h.........t..p..........................................P...a...p..........p...l...h...........p...G...h...h.....h.....s......t.........t.........t.......p........K...M.SKS.....h...sN.s..l.s..P....p...h...h.p............................................................................................................................p.h.G..s..D.....hhRhh.h....s............................................................................................................................................................................................................. 2 2043 3764 5105 +4732 PF00749 tRNA-synt_1c tRNA synthetases class I (E and Q), catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_350 (release 2.1) Domain Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only glutamyl and glutaminyl tRNA synthetases. In some organisms, a single glutamyl-tRNA synthetase aminoacylates both tRNA(Glu) and tRNA(Gln). 19.80 19.80 19.80 19.80 19.70 19.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.86 0.70 -5.77 18 10836 2012-10-02 18:00:56 2003-04-07 12:59:11 16 90 5178 51 2955 8426 5158 287.60 31 62.06 CHANGED cV+TRFsPpPoGYLHIGpA+sALhNahaA+pasGphllRh-DTssc+pp.EhtcuIh-slcWLGlchp..........tshhpS-+a-thhcastcLIccGhAYhsasosEcLcphRcp.........phs..tps+hsccsLpLa.E-hpptptpstttslRhKlshpus.hshcD.Vthclphss.........cchshsKhcshPTYcFssslsDtl.pITHslRspEahsssspahhlhcAluht..ssat+.hhhlNhssphLSKRKhshh..hphhcshs....shlshLt+hGaos.....puhcEhhstptlhcph.hsh.scuhttFspccLph .......................................................lhsRFAPSPo..G....hLHlG.....ps+o.AL.hsah....h.A........+................p........h......s....G........p....h.........l......L..........R....lEDTD...t......R......p..s.........c..hhc..................s...............Ihcslc.W.......L..GlcWD...............................................ttshh.Q.Sc.R..h..-..h..Ypphhp...pL....l.p.........p.G........hA....YhC....h....so.t.E...c..l.ct...h..Rtt...........................sts.....................t......s...p......Y.......s.....t......c......s..h......p..h....h.............t......p......h.....t....t....t....t....h....t....s.t....t.s....s.lRh.........+......h.......s....................s..........s.........s........h.........t...h..........c.......D................l............t............G...........c...........l.......p..h.....s..s..t.................................h..c..Dh.l......l.......t.+.......t....c..........G....h....P.........s........YshusslDDth.s..ITHllRGp-als.s....s.s...pp..hh..........l..........h...c....s.....l...............u...............h..................t..................s.......P..........................a.t.H.h.s.h...h...l....s.......s.................t.....p...+LSKRc..........t...............s..........h..............................................................t.........................h.c...ths.........sh.l.s.h.l.t.h.h.G..as.......................t..u...p..p......hhp......h...th.............hp........t......tt.thh..pcL........................................................... 1 979 1835 2473 +4733 PF03950 tRNA-synt_1c_C tRNA synthetases class I (E and Q), anti-codon binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_350 (release 2.1) Domain Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only glutamyl and glutaminyl tRNA synthetases. In some organisms, a single glutamyl-tRNA synthetase aminoacylates both tRNA(Glu) and tRNA(Gln). 21.00 21.00 23.00 21.50 20.00 20.60 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.49 0.71 -4.59 111 2972 2009-01-15 18:05:59 2003-04-07 12:59:11 13 80 2452 17 1073 2415 678 183.90 39 29.60 CHANGED AsRhhsVh...cPlcls.lss...h.....s........t..phchshHPpp.schGpRplsau..p...plaI-psD...........h.pcLt..G.ppVRL+t.hhslcs.pcl..p..sGplhp..l.sphc..spohtts......h.Kscu.hlHWVus....ppu.lss.phh.Y-pL...hp.........p.t..........thl......lNPcShthh...puhsEssl...tshphssh...hQFERhGYaplD ...............................APRtMAVl.....cPlKll..IpN..a......................sts.............psEhlph.ssHP............p...p.....s............-...h.G.....s....Rp.lPFo..p.....ElaI-+sDFhEp............................................................ssKpatRL..s.G..pEVR.L+s.u.Y.l.I.c..............s..pc...l..........KDs....s.Gslsp.........lhsoYD.....s.-.ohutss........................psRKVKG..sIHWVSu...............scu....lss...ElRLYD+.L.Fs...............................s.p.Psst.....................chl.....lNPc..SL..hhh....ppuhsEP.u.l.............ts.s...t......s.t.ct.............aQFER.GYFssD......................................... 0 358 646 899 +4734 PF00750 tRNA-synt_1d tRNA synthetases class I (R) Bateman A anon Pfam-B_1276 (release 2.1) Family Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only arginyl tRNA synthetase. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.18 0.70 -5.63 9 5698 2012-10-02 18:00:56 2003-04-07 12:59:11 14 23 4847 14 1682 4669 4137 317.60 27 59.58 CHANGED hlsslLspstph..stthpppcVll-FsSsNsst.hHlGHlR.shlGDuluRLhEhhGasVlRt.alsDaGsQhshhhspL.pttptphosp..ltphpshYttshhch.s-El.......ht.cthphlshLputspp....apphhsp.lh-hh+pphpchhsshDVhhhE....uE..hhpsphschlpcL+cpGhlhEpDGAhhlhssta....G-shsh.lllKSDGshsYhssDlAhhhp+h.pcthDhhlYhlsscpcsahtphaAAspthG.asPc.ssclhtls.hVsLscDtc..+hppRuGssVpLsDLL-.............................uIGlsAs+Yu.lppspsosh.hDh ..............................................................................h....hhtt.ttth..........t.....s.ps.p.pl.h.l-asSsNsst..s...hHlGHhRusllGDulspllc....hhG.a.p.V.h..............+t.alsD..............hG.....sQ.h.......................s..h..L...h.....h..........u.........h.............p......................t..........h..............h.......p.......c...............t........h............h..................................t.....................................l.......s...........t.........h...............t...........p.................h........Y....h......t...t...........p...........h............t................................t...........c.................t...............................................................................................................................h...........t...........p...............p...................u...p......p............h..h...h.................p.h.........p.....s.s.c..tp..............................hp.ph.h...p....p........h...h....ch.............h....p....p....h....p...p....h.....h....s....c....h....sl....h..h.pc.................uc....h..h.....s....s.....t......l....t......p....s.l..p.t.L..cp.p.....G.....h.........h.h...............E...............p............-..........G..........A......h...........h.......l.......p.h..s.....p.a...........................G.c.s..h...s..h......l..lh.K.s...............D......G......sh.hYhs.s.............D..lAhth..+h.............p....h....t...h..c...c..hl.hlhu..scpp.......tahtplhs....s........hct....h........G.....a...........s..................p.....sh....p...l.........h.l....s..........hhh.............h..........h.............c......s...Gc......................thpsR.p..Gs.sl..pL..c-l...l.-E.s........h.p...t......s....h..........h...............................................................................................l.uhsu....lha......lp.ph.pth.Fp........................................................................................................ 0 587 1058 1400 +4735 PF01406 tRNA-synt_1e tRNA synthetases class I (C) catalytic domain Howe K, Bateman A anon Swissprot Family This family includes only cysteinyl tRNA synthetases. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -12.00 0.70 -5.30 11 5726 2012-10-02 18:00:56 2003-04-07 12:59:11 14 22 4818 10 1568 11552 7784 301.80 46 64.24 CHANGED .hsslcptpVshYlCGsTVYDhsHlGHuRshlsFDllRRhLph.hGY-VpaVpNlTDIDDKIIp+Atpptpo..................................................................................................................hpplscpaIpsapcDhcALNVL.PshcPRsT-alscIl-hlppLlc+GaAYsus......GDVYFcVsshcpYGpLSspsl-pLctsspsts.......ttK+sshDFsLWKuuKsGE....suWsSPWG+GRPGWHIECSsMsschLG.....splDIHuGGhDLhFPHHENElAQSpAhas.tphspYWhHsGalpl-sEKMSKSLGNFlpl+DhLppa-schLRahhhssHYRs.L-aoEphlppAps ....................................................................................................................PlpsscV.pMYVC..G.sT..VY...s..h..s..HlGp.A.R.........s.h.l.sFDl.lpRa...Lch....hG.Y.c..V...........p....YVpN....l.T..D.........l................D..........D...K.....I.I.....p+..Ap..cp......u.........s.........................................................................................................................................................................................ht..p..l..s.c.+.a.l.p..t...apc..DhsA.....L.s..l.h............P......s...........h....c........P.....R........A...T....c....a..l.......s.......-..h..I..p....hl.c....p......L.l.c.+GaA..Ytus............................GDVY.F.....c.....l.....p.....p.........h...............t.......s.......Y.........G.....p..L.....S...t...........p..s.l..-....-.....l.p..s..G.u.c.s..css..................................p.pK+s.PhDFsLW.K.s.......u....K............s...G....E.....................su...W...s..........S.....P.........W........G.............p............GRPGW...HIECSAMup..chL.G.................s.p....hD..IHGGGtDLhFP..H..H....E.......NEI.AQ.....S......c.............u..........s.............p.................s......p................p.........a..........s....p......Y...W....h...H....s...G..h..l..p..l.......c....s..........E.................KMSKS.....L.GNFhT.......l.......+-.l........L................c................p................a.....s........s..............p................s......lR.aa.h.l..s..s..H.Y..R..s..slsaS.c.csLppA..t......................................................................................................................................................................................................................................................................................................................................................................................................... 2 550 1024 1339 +4736 PF01921 tRNA-synt_1f tRNA synthetases class I (K) Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes only lysyl tRNA synthetases from prokaryotes. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.25 0.70 -5.78 39 631 2012-10-02 18:00:56 2003-04-07 12:59:11 13 4 615 2 222 4907 4602 353.70 38 66.72 CHANGED ttWs.p.Accll.cRhpt.....s....phlhpTGhuPSGhsHIGsFt.EVlpsshVt+Ahcph.utp......s+llhhuDDhDsLRKVPc...NlPsp..hppaLGpPLoplPDPa.Gs.ppSaucHhpt.hpchL-phGh.-hEahSuo-hYpuGhasctlhhsLcp....h-cIhcIl..hhscc....hpts...........................a.PahPlCscsG+lhps.lhshc.ptsplpY.s............csGcphpsslpsGt.........sKLpW+lDWuhRWtuLuVcaE.hGKDhts....ShshuscIs.clhGtcsPhshsYEhhh.c...ssp......KhSpSKGsslolc-WLchusPEsLpahhhp.cPppthclphc.sl.+hlDEY.ph.cta..........ppstppphtsslaclppsp ...............................................................................................................................................................tWs.c.Accll...c+.tt.........tp..............hlhp.oGhuPSGhsHIGsFt.EVhpsshVt+Ahcpl..upc.......oRhIhhuDDhDs...hRKVPc.............Nl.....P...s.p..th...........lt.pals.hPLoplPDP............a...G......s....apSaucH.pshlpphL-pFGh....-h.EFhS.uo-hY+SG..tacptlhpsLc+..h-cIhclhh.hh....sc-.....cpts.........................................................................Y.Pa.hPlCscsG+lhp.s....s.lp..phDs.pssTlsYcs.........................csGppt...plsl.s.s.Gp.........sKLpWKsD...W...s.M..R.Wsshs..VD..aE....h..G..KDHts....Shs....l.u.....scIs...c..IhGs..c.s.P.....t...h..s....Y....E...h..h...h...c...ssp.........KlSpS+GN.ulolc-hL......c.h.s..s....s.Es........Lt.h.h.h.h.p.......+...P.p..p..A.h.+.l...h......FD................s.I...s...+.s...h...D...E.Y.phhptYh...........ppphcpphtp.saclp...s....................................................................... 1 67 145 176 +4737 PF00152 tRNA-synt_2 tRNA synthetases class II (D, K and N) Bateman A, Sonnhammer ELL anon Prosite Family \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null --hand HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.02 0.70 -5.67 107 16099 2012-10-02 14:22:40 2003-04-07 12:59:11 15 73 4974 111 4475 12333 7486 357.10 28 69.99 CHANGED sc-hRLcaRaLDLRpsphpp.tlphRsclhpslRpahscp.GFlElETPhLspussEG......ARsFlVss..............tpaau.....LsQSPQLaKQlLhluG.h.-RhapIu+sFRsE-h...+scRps.EFTplDhEh..uFsstc-.lhplsEpllpplhc.ph.h.......................slcls..........F.Rloap-Ahcpauu-c.sDh+h.th....ph.p.htph............t.thphht..............................................................................................t....htphhstlhtch.......scc........tlh....t.pthphhalsDaPhh......p......................+PFst..spp...ps.......................shucsaDlllsG..hEluuGuhRlpc.chQpphFcthshs.....tcp..thFsahLcAhca.GsPPHGGlAlGlDRLlMllsstpsIR-VIsFP+spp .........................................................................................................................................................................................................................................................................................................................................-h+hc.RaLD.L.h.p.s..p.t...............t...s....hphRupl..hpslRca............hs...pp...G.......Fl-lE.TP......h.......l.......s.......t.....s...s.....s....pu..........tA+sFhsp.....................................p.hhs......hhLt.........S....P..p.........L.a.....hc......h........L..h....s..u.G......h....-.......+..l..apl.......u......+.sF..R...sE...sh..........s..s..R.HssE..FT..........l-.h..h......u.....a..h.c..h...................p.............-.....lhs..l.s..E..........s..hl..ppl.hp.pl..h..........................................................................thphs..............ts..F.+....lo...a.p-.A....hcp.....h...ts......pp..............c.hc.............th..t..htth..........tth.thph......................................................................................................................................................................................................................................hGtlhsph........hcc.......lh.........-pp.hhpPsalhc.aPht.......h.......................................................pPhst.....ps....ps.s.........................................................................................thsptaD.l.hl.sG............hEl.us.G.pclpcs..c..Q..p...p.pFct......s.hs....................................p--....t............h....t.alc.A.h.cY..G.....h.P.Pp.u..GhulGlDRLlMll...ss....t....p...s.....IR-VIhFPph....................................................................................................................................................................................................................... 0 1514 2790 3758 +4738 PF02081 TrpBP Tryptophan RNA-binding attenuator protein Mian N, Bateman A anon IPR000824 Domain \N 20.50 20.50 24.40 25.60 18.10 17.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.55 0.72 -4.55 10 142 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 138 252 55 115 0 73.50 64 97.61 CHANGED Mpssp..suDalVIKAlEsGVNVIGLTRGoDTKFHHSEKLDKGEVMIAQFTEHTSAIKlRG+AhItTsaGplcSEuK ......h....tsuDYlVIKAlEsGVsVIGLTRGpDT+FHHoEKLDKGEVhIAQFTEHTSAIKlRG+AhI.TpaGplcS-....... 0 27 44 48 +4739 PF00587 tRNA-synt_2b tRNA synthetase class II core domain (G, H, P, S and T) Bateman A anon MRC-LMB Genome group Domain Other tRNA synthetase sub-families are too dissimilar to be included. This domain is the core catalytic domain of tRNA synthetases and includes glycyl, histidyl, prolyl, seryl and threonyl tRNA synthetases. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.26 0.71 -4.68 112 18502 2012-10-02 14:22:40 2003-04-07 12:59:11 20 111 4989 173 5461 19073 11934 180.70 25 33.50 CHANGED lpptlhphhpchhp...p.GapcltsPhlhppclhpt.oGc.tphhcc...hap...h...........pcttt........c............p......h..........................................................................................................hLpPpsp.slsthatpphhp.hpp..LPl+hhthush.aRpEh....psp.GlhRhRpFpps-hhhass.s.pp..............................................s.pthtchlphhpplh.pp...lGl.........saclh.sspsshst.tppphshcsah.sp ...................................................................................................................................................................................................................................................................................................................ctlpphhhcphp.....ct.G......a.p..ElhsPh.lh....s.t.....c.lapt...o......G..+...h...s..p...a...s..-s.....h...ap....h..................p.s..tp..................................c............p......h...............................................................................................................................................................................................hLtPp...sp....sh.ss..h........ac.s..........p.l...p.............S....................h..+............c.......L...P....l.....p.......hsp.h...u...s......s.a.....RsEh.......pstpGlh..R.....sR..p.F.p.p.t.......-.ta.h.F..sp..s..-p..........................................................................................................hp.c.php.p..h.h..s.h.hp.clh...pp......h..Gl...............sa.+.h..h.h.ts.p..s.sh.u.h....s.u.scpach.ph.....t........................................................................................................................................................... 1 1884 3455 4605 +4740 PF01411 tRNA-synt_2c tRNA synthetases class II (A) Howe K anon swissprot Family Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only alanyl-tRNA synthetases. 19.50 19.50 19.60 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -12.92 0.70 -6.22 25 6692 2012-10-02 14:22:40 2003-04-07 12:59:11 14 24 5160 30 1902 5657 5079 441.50 35 60.86 CHANGED ElRppFLcFFc.c+G...HphlsSusllPc.sDPoLLFsNAGMspFKslFLGtt.....psshsRAssoQ+CIRsGGKHNDL-NVGhTuRHHTFFEMLGNFSFG.....DYFKcEAItaAWElLT..ph.....asls.-+LalTVapsD-........EAhslWpchs.lPcpRIl+hstc.......DNFW.....pMGDs.............GPCGPCSEIaYD+G.chsst.sus.sp....-ssRalEIWNLVFhQaNRps......cGshpsLPc+slDTGMGLERlsuVLQshpsNa-sDlFhslhpthpplouh...s........spsthphuaRVIADHlRulsFhluDGllPuNpGRGYVLRRILRRAlRau.+pLGhcps.FhtcLVssllchhGssaPELccptshlpcllcpEEppFt+TLcRGhpLlcchlpphpp..spslsG-suF+LYDTYGFPlDLTp-lAcE+GlsVDhpuF-pshpc.p+cco+..pstpstt.....hthshpsltcltss......s-FhtYsph.............pspuplhulhp.sschlspltt.Gppsh........llL.DpTsFYAEuGGQluDpG...hlpsss....scFpVpssQchs.uhllHhGpl..ppGs..lplGDpVpuplDppR ......................................................................plRphalpaFt.p..p.s....H.h..............Sssll.P..sDsoLLahNuGhs........h.K.hF.G.................................h.........R..hsssQ+sl....Rss....Dl-sV.GhT.s..RH..pTFFEM..LGNFSh.G.................DY..F.K........c....-AItaAWEhLT.....ph.....hths.-+LasTha.....t.....Dp........................EAhplWtp.............................l...........stp+Ih.h.......p....................DNFW........phGt..............G.PCGP.soEIaaD..+..G......t..hh..s...t...........s....t........t....................-ss..RalElWNl.VF.Q....asc..pt......................................ps..p..h.p.L.P.p..s.lDTG.MGLE..Rls.........ul.........hQ.........t.........hp..........sNa-hDla.hsl..lpthtph.ss....................................................t.t.ts.huh+VlADHlRshsahluD..G..shPuNpGRGYVLRRllRRA.....h....Rau......p......h...L......G............h..............p........t........s......a.....h.hcLl.ss.......l....hp...........h..........t....t......h..a..s..-lh.p.........p..t..t......h.....ltph...lp...tEEppFh..c..........T...............LppGh......tl....hpp............ltp..ht...........................tphls.Gp..sFc.L...a..DT..YGFPl....-LTt-h......h.p.-............t.s..........h..p..lD.t...sFpt.tMtt.Q+.......pcu+...tstt.tt.................t.......t......h.......p...h....t.....................sphhsYp.t...............................p.h.p.u.pl.h.t.l.h.........................p........s.............t..........s........p...t...h.....p........t.....u..p..p..s.h....................ll..L..-p.TPF.YAE..uGGQsuDpG.............hl...t...s..ts........................h...h..t.V...t..D..s..p...+.......h......s...s.......h.......h.h..H.h....s.pl.......t...........u.t........l.....p..hGpt.lphplD............................................................................................................ 1 653 1183 1586 +4741 PF01974 tRNA_int_endo tRNA intron endonuclease, catalytic C-terminal domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain Members of this family cleave pre tRNA at the 5' and 3' splice sites to release the intron EC:3.1.27.9. 21.10 21.10 21.30 21.80 20.60 20.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.66 0.72 -4.18 78 852 2012-10-11 20:44:43 2003-04-07 12:59:11 12 20 456 40 569 852 86 86.70 26 27.42 CHANGED F.h+ahVY+cLRc+.Gahl+sGhK......aGsDFtlY........tttsshsHucalVh............l.lspspth......................shpclhptsRlupuV+KphllAhl.....sppsc.......lsa ..........................hpYhlY+cLRs+.Gahl.p..sGhK............F.GsDal.lY.........ttsPhhhHupa.hVh..................l..h.s.......s.cph..........................................sht.p.lhshsR...lussV+Kphllshl.....p................................ 0 178 316 463 +4742 PF02778 tRNA_int_endo_N tRNA intron endonuclease, N-terminal domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain Members of this family cleave pre tRNA at the 5' and 3' splice sites to release the intron EC:3.1.27.9. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.37 0.72 -4.42 65 347 2012-10-01 20:02:48 2003-04-07 12:59:11 9 7 240 33 209 357 30 66.70 25 25.64 CHANGED hpuhL.sspVllt.....s.csspp...LacpuaYG+.............hpsphLpLSllEAhYLhp+.Gtlclh....pptchlshcclh ..................................h.h.tt.lhh........t..pp........lhpp...shYGp........................................h.tphLpLSL.EAhaLshp.GpLplh......ptc.lsh.ph............... 0 50 112 158 +4743 PF01746 tRNA_m1G_MT tRNA (Guanine-1)-methyltransferase Bashton M, Bateman A anon Pfam-B_2049 (release 4.1) Family This is a family of tRNA (Guanine-1)-methyltransferases EC:2.1.1.31. In E.coli K12 this enzyme catalyses the conversion of a guanosine residue to N1-methylguanine in position 37, next to the anticodon, in tRNA [1]. 21.50 21.50 22.40 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.96 0.71 -4.70 23 5148 2012-10-01 22:53:19 2003-04-07 12:59:11 16 19 4707 18 1419 3601 2980 190.20 39 74.76 CHANGED hpph.c+clsplphpshRsasts++phsssphahsuhshhhKhc..h.thp............shpshhlpsps..h.pphh.pl....pp-....cllhLsuchEsh.pclpppt.......hasIGshV.psschss..hhstththh.Glh..st.pL.h-sa.......llttPth....o+shs..lp..pV.-lLLpspc..hpsW..cpulhcshspR. ..................................................................................h.s+AhcpsllplpshshR-asp.s.+.Hp..p.V.D.D.p..PYGGGs.....GM.l..hp....s..p..P..l...hsAlcsh............pt.........................pts+l..Ihh..oPpG....c.hs.Qph.sp-.L.............u.p.p.p............................cL.lhlCG+Y.....E.GlDE...R.lhpt.hss...........pEhSIGDYVLoGGElsA..hshh..DulsR.llPGVL..Gppt.SttpDSFs........................sGLL-hPpY..........................TRPts..acG........hpV.P.-V..L.....lSGsH...t..pIccWR..hcpuLt+Th.+RP....................................................................... 0 488 884 1178 +4744 PF03054 tRNA_Me_trans tRNA methyl transferase Griffiths-Jones SR anon Pfam-B_823 (release 6.4) Family This family represents tRNA(5-methylaminomethyl-2-thiouridine)-methyltransferase which is involved in the biosynthesis of the modified nucleoside 5-methylaminomethyl-2-thiouridine present in the wobble position of some tRNAs [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.24 0.70 -5.90 21 5331 2012-10-02 18:00:56 2003-04-07 12:59:11 11 13 4511 6 1265 6684 5283 320.80 41 86.58 CHANGED t+VlVuhSGGVDSSVuAhLL+cQG.apVlGlaMcpasp...t-phs.........tCsutcDhpDAp+lscpLGIPhhhlsatccYhpcVhp.Flc-YppGpTPNPDlhCN+pIKFuhhhchshp......LGsDhlATGHYARlshs........................spstLtcuhDppKDQoYFLsslscctLppllFPLGchp..KspVRplApctuLt...sAcKKDSpGICFlucps...accFLpcaLssp..pGsIlchc.....GpllGcHcG...hahYTIGQR+GLslut....htcPhYVlcKDscsNplaVup...p.tLhpcplhspphsWhs.....thsst..hpsps+hRapp...s.hpsplphhss.tplcVpFcpshpuVoPGQssVhYps-....tsLGuGhI ..........................................+VlVuMSGGVDSSVs.A.h.L..L.p.c...p..............G...Y...c..........V.h.G.l..aM.+......Wcc.................scppt...............................hC...s...u...t...pD...h...t.D...A...pt...V...s.c...p.l.G.I......s.....h.a.sl...s..F.t...p.c..ah-p..V...hphFl.s..E..Y.p.t..G.RTPNP....s....lhC.N..+......c.I..KFp..shL-hAhp.................L..G..AD..h.lATGH..Y.uRht.p..t......................................................sst..h.plh+u....h...D.....s.sK...DQo.YFLhpL.spc..QLt+shFP.lGc..h..p.........Ks...c.....VRc.lA.p.c..............h..............G......Ls......sAc.KK.D........SpsIC..F.Is-p.p............app.FL..p...p...................a....l.....s.....s.....................p.......sG..p.....h.h....sh..-.................................G.chl...G.cH...p...G................lhaaTlGQR.+..G..LG.....Ius....................stcP..ha.Vl.shDh...p..pN..hlhVu.p...........c....t..L.hspt..l..h..s..p..p..lpahs......................p..p.......s.h..cs.psKhR..Y+p....................tsh........s..p...l..................p..........h..........h..........s..........-.........t.................p..............h....pV..h.F...s..-..P.t.....A...lo..P.GQu.sVaY......p......u.-......hsLGGGhI................................................................................................................................................ 0 456 838 1088 +4745 PF04558 tRNA_synt_1c_R1 Glutaminyl-tRNA synthetase, non-specific RNA binding region part 1 Kerrison ND anon DOMO:DM04413; Family This is a region found N terminal to the catalytic domain of glutaminyl-tRNA synthetase (EC 6.1.1.18) in eukaryotes but not in Escherichia coli.\ This region is thought to bind RNA in a non-specific manner, enhancing interactions between the tRNA and enzyme, but is not essential for enzyme function [1]. 25.00 25.00 25.50 25.30 23.70 24.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.84 0.71 -4.72 27 278 2009-01-15 18:05:59 2003-04-07 12:59:11 10 19 200 1 172 258 0 152.70 33 20.99 CHANGED sss--LhpLFpplGLs-pKu+EhlKNpKlossLpsllppu.sss..sshs+pptsLLasLAo..ph+ssp....hs+pshllphIhsuclKTshQlsAAhcalpspss....shsssch-ctsGV.GV.VT.--lcptVsphlppp.KppIhpc..RYp.hsshhhhscttspL+WAcsp ...............................c.h.pLFttlGLsEpKA+EslKNp.plossLtphlppA.....tts.......sshs+s.....sGsLLYslAo....+l+s..st......+h.shl...lpaIsspKl+Ts.Qls..AAlcYlpspst...tsl-tscF-ctCGV.GVhVTsEpIcpsVpphl..ppp..+tplltp..RY+.hs..hshlh...scs+st...L+WADu................................. 0 70 103 143 +4746 PF04557 tRNA_synt_1c_R2 Glutaminyl-tRNA synthetase, non-specific RNA binding region part 2 Kerrison ND anon DOMO:DM04413; Family This is a region found N terminal to the catalytic domain of glutaminyl-tRNA synthetase (EC 6.1.1.18) in eukaryotes but not in Escherichia coli.\ \ \ This region is thought to bind RNA in a non-specific manner, enhancing interactions between the tRNA and enzyme, but is not essential for enzyme function [1]. 21.20 21.20 21.20 21.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.24 0.72 -3.29 23 240 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 176 1 146 227 0 84.40 35 11.22 CHANGED hhKstlDhplLcLLGPKsEtDltKKpKps+.....sptstt.h.......sspspspss-ssscps.hhEthhG-...hHKPGEN.........Ppsh..Lhpc ......................lKsplDhplLcLLGPKsEuDLt.Kt.Kttc.................tphtp...................ttpsss...pssp.ssspst.h.Ephhup....FHKPGENhp...............Ppoh.hLppc....................................... 0 55 82 120 +4747 PF02091 tRNA-synt_2e tRNA_synt_A;tRNA_synt_2e; Glycyl-tRNA synthetase alpha subunit Mian N, Bateman A anon IPR002310 Family \N 20.00 20.00 20.50 20.90 19.90 19.50 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.97 0.70 -5.30 9 3035 2012-10-02 14:22:40 2003-04-07 12:59:11 10 8 2991 8 663 1810 1805 283.40 66 88.81 CHANGED FQshIhsLQcaWupQGCllhQPaDhEVGAGThpPsTFLRuLGPEPWpsAYVpPSRRPsDGRYG-NPNRLQ+YaQFQVllKPsP-NIQ-LYLsSL+ALGIDshsHDIRFVEDNWEsPTLGAWGLGWEVWLsGMElTQFTYFQQlGGltCcPVosEITYGLERLAMYlQpl-slaDlhWs-u....lTYG-lFhpsEhEhSsYNFEtAsV-hLFphF-.a-cEAhphlc.s..LPLPAY-hVLKsSHuFNlLDARGsISVTERpRYIhRlRsLARtVActYhptREpLGFPL ...................................................FQplILsLQpYWu........c......QGCsllQPaDhEsGAGThHPhThLRAlGPEP.WsAAYVpPSRRPsDGRYGENPNRLQ+YYQFQVllKPSP-N.IQELYLsSLctLGlDPt.HDIRFVEDNWEsP..TLGAWGLGWEVWLsGMEVTQFTY..FQ..QVGGl-Cc.PV.o...uEITYGLERLAMYlQsVDSVYDL.Ws.-G.........................loYGDlF.+QsE.....hEpSpYNFEhADs-hLhppF-paE+E.....Appl..............L..............c..t.t...................L....s.............LP...AY-hlLKASHoFNLLDARGAISVTERppYIhRIRsLu+sVAcsYhtpRctLGFPh..................................................................... 0 211 416 543 +4748 PF02092 tRNA_synt_2f tRNA_synt_B; Glycyl-tRNA synthetase beta subunit Mian N, Bateman A anon IPR002311 Family \N 25.30 25.30 25.50 26.10 21.50 25.10 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.58 0.70 -5.96 167 3055 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 2988 0 666 2359 2774 541.00 41 78.66 CHANGED hLlEIGoEELPuchlpphhppltptlpptLpctpLsa..s.slcsauTPRRLAlhl.pslsppQ.sDtp.Et+GPshphAhstsG.sPT....cAAtGFA+up....Gls.......lcc.Lpht..p.........s.sKu-alhh.ppptpGpsstplLschltphlpsL.shsKtM..RW..G.......s.....t...shcFlRPl+WllsLhssc.....llshpl.......h.G....lpouphopGH.RFht.st..slpl.sssscYhptLcpp.hVlsDhpcR+phItpplpphA.ppt..s...sps.h-..-s..LL-EVssLVEaPsslhGpF-.ccFLp.lPtElLlooM+pHQ+YFslhc.p.sG...........pLhPpFlsVuNhpspc..ptlhpGNE+VlRARLuDApFFacpDp....KpsLpshltcLcpVlFpcpLGolh-KspRlptLAthlA..ptl.............s......s.....s...h.ppspRAAhLsKsDLlTpMVh..EFPELQGlMGphY.Ahts...GE....sptVAtAl....pEHYhPphuuDpLPpo.sGsslAlADKlDTLsGhFulGth.PTGSpDPaALRRuAlGllRIll-pp..hslsLppLlppu.hp.h....tt...............hstps.shpplhsF..hhpRlcshhp..cp.u.....hsh-llpAVlu...ts ........................................................................hLlElGoEELPs+slpshtcphtpphsstLcpstLsa..............s..s..l....p..hauoPRRLAlpVp.sLuppQ.s..Dpp.Eh+GPuhphAh...Ds-G.p...s....o.......KAApGF....sRup.Glo...........l-p...l...chh.p..........................s...cpsEalhhp.tp.ht.Gps.spslL.sshlspslppL.shP..KsMRW....G.................s.....s.....shcFlRPl+tlssLh....uc.c.....l.ls..hpl.......hG........................lpou+so+G..HRFh.......s......s...............t..phsl...s.sA..cpY.phLccp..hVIsDh................pcR+thIhpphcphA.pph........u..............s..ps...s..l..-....-s.......LL-EVsuLVEaPsslhupF-.-cFLp.lPpEsLlhoM+scQ+YFsVhc...p..sG.......................................+Lh.PpFIhVuN...hpscc....pplIpGNEKVlRsRLuDAcFFappDp....................K..........p.....s...L..p..s.....h.l..s+L..p..s..VsFpppLGolt-KspRlptlAshlAppl...................................ss-..sscspRAuhLuKsDLhTsMVh..EFs-LQGlMGtaY.........Apts.........GE.........spsV...AhAl....pEpYhPphAuD.pLPps.....l...usslA...lADKlDTLs.GhFulG.h.PoGopDPaALRRAAlGllR..........Il..........lc.....ps...........h.sls..Lpp.Llppu..hp.hh..ssp..................................................htsspshs-lh-FhhsRh+shhpcp..u.......hshDhlpAVLu.p.................................................... 0 207 417 544 +4749 PF00261 Tropomyosin Tropomyosin Finn RD anon Prosite Family \N 35.00 35.00 35.10 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.67 0.70 -5.27 43 1328 2012-10-03 05:16:33 2003-04-07 12:59:11 15 16 401 54 388 1233 5 180.20 54 81.39 CHANGED KKhpplcs-hDpspEplpcApccL-pp-KptpcAEuEVAuLNRRIQLlEE-L-RupERLssAhpKL-EApKAADES.ERupKVlENRuhpDEE+h-.LEtQLKEAKhlAE-ADRKY-EVARKLsllEsDLERAEERuEhuEuKlsELEEEL+lVuNNLKSLEsucEKuup+E-pYEEpI+.Lop+LKEAEsRAEaAERoVsKLpKplDcLED-LhspKE+YKsls-ELDpThsELsuh ..............................hp.ph-thp.pthtthppp.lc.....tc....cth.p..pA...Eu-VA...uLsRRlp.LlEE-L-.R..upERLsoAhpKLEEA.pKAADES...E..R..uh..KVlEN..Ru.h.pDEE+M-h....EhQLKEAKal.......AE-.....AD+KY-...EVA.....RKLsh.lEs-LERsE-RsEhuE..s+hh-LEEcL+.h.spsLK.uLpst........t-..........c..hsp+E.-pYccpI...+hLsp.+LKEA.....E..sRAEhAE.Rs.VtKL.pKplDcLE...-..p...lhtt+.c.hthpp.h-.sh.-h....................... 0 115 147 252 +4750 PF03301 Trp_dioxygenase Tryptophan 2,3-dioxygenase Mifsud W anon Pfam-B_4263 (release 6.5) Family \N 21.20 21.20 23.40 21.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.12 0.70 -5.33 5 1051 2012-10-01 19:57:26 2003-04-07 12:59:11 8 6 627 72 420 1075 498 187.70 29 87.19 CHANGED -SsQsGspcsScu.GhsYGDYLpLDKlLoAQ+hlS-.ttp.lHDEHLFIVTHQAaELWFKQIlaELDolR+LLsss+l--s+.hLclhctLcRlV+ILcLLssQaolL-TMTPLDFsDFRcYLoPASGFQSLQFRlLENKLGV+supRl.YNtp.YpssFtss....chLLsoEcEpoLLcLlpuWLERTPGLc.pu.s..FWhKapcSVhchLs-LhAptuscpssEVlp+cLst-YcKscEVhtSlhD.p.H-.hltpGpRhhoacAhpGAhMIhFYRD-PRFppPaQLLptLMDIDoLhTKWRYNHVlhVcRMlGS.KpGTGGSSGYtYLRSTlSDRYKVFlDLFNLSTaL .....................................................h..................h.Y.tYlthp.llt.Q.....o.....................sEhlFlh.HQ........s.ELah+.hhaEhpthht....ht..........t..t......h....t...............phltRh.hh.p.l.tt...h.tl.l.t.s..hos.pa...th...R.......Ls.uSG....FQShpaR.lE...............h...hG..............t.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 143 259 354 +4751 PF04820 Trp_halogenase Tryptophan halogenase Kerrison ND anon Pfam-B_2531 (release 7.6) Family Tryptophan halogenase catalyses the chlorination of tryptophan to form 7-chlorotryptophan. This is the first step in the biosynthesis of pyrrolnitrin, an antibiotic with broad-spectrum anti-fungal activity. Tryptophan halogenase is NADH-dependent [1]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 454 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.49 0.70 -5.86 23 1611 2012-10-10 17:06:42 2003-04-07 12:59:11 9 14 630 30 502 3763 1761 272.60 18 79.12 CHANGED +lVIlGGGTAGWhAAAtLu+th.st..hplTLlES-pIGslGVGE.uTlPsIpp.hpphLGl-Et-FhRtTpuTaKLGIpF.sWtp.............tuccYhHsFGs.hGtshthhsFap.....aWLchpttG.hssshssaslsshsApts+F.uss.ps......htslsYAaHhDAuLaAcaLR+hAcs+.GVpRlEGclssVphcs.sGaVsulphc-....Gchl-uDLFIDCSGFRGLLIppsLpsGacDWScaL.sDRAlAs.stsss....sssPYTc...uhAcpAGWpW+IPLQHRsGN.GaVaSScah.....s--cAhstLhsplsupshtpP.chlRFssGR.............R+psWs+NsVAlGLASGFlEPLESTuIHLlps....ulppLlplFPscshs.Ps.tlscaNcpsshEhEclRDFllLHYpsopRsDoPFWcpsR.phslP-sLpc+lcLFpsp...............uphhpttc-hFtpsSWhpVhhGQGlhPctacPLscshss ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s..a.p....h.ptt..hs.....t.h.L.h.p.....u...t...G..sp.h..h.....t....t...h........t......................t.....................t......t...............h..............t....l....h.tp......................t...t...........h....p..s..c..h..hlDsoG.t...u...h.....l.........t..p.............h.....................................................p..........h..p...p..........h.................................t.......s.h....h..h......h....t........................s......t...........................h....s..........t..s...G.........W....W....I.....Ph.....t.......t................................................................................................................................................................................................................................................................................................................t..h.h............h.s.u..hhpP......h.....u....sh....................................h........................................................................................................................................................................................................................................................................................................t....................................................................................................................................................................................................... 1 157 306 427 +4752 PF01371 Trp_repressor Trp repressor protein Bateman A anon SCOP Domain This protein binds to tryptophan and represses transcription of the Trp operon. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.74 0.72 -4.07 19 1612 2012-10-04 14:01:12 2003-04-07 12:59:11 14 4 1574 49 242 835 143 86.80 39 83.21 CHANGED pphptlh-hlhphtspcpp.thhp.lhT.sEhcuLupRlplsc.LLcppho.RcIppchGsShATIoRsupsLchusssapthLcphh ...........................phpphh-hlhshhsp-pph.hhp.lhTssEh..cuLupR.l.clsc.LL+.tp.ho.+-Icp-hGAuhATI.oRs..s.p.uLch.u.s.s.thcthL-chh............................ 0 91 147 201 +4753 PF00290 Trp_syntA trp_syntA; Tryptophan synthase alpha chain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.51 0.70 -5.85 18 4254 2012-10-03 05:58:16 2003-04-07 12:59:11 15 8 3921 94 1062 3690 2893 247.30 35 89.87 CHANGED Fuphptpsp.suFVPFlsAGDPs..hEsolcllcsL.cuGADllELGlPFSDPlADGPsIQpAshRALsuGhThspsl-hlcchR.phsslPllLhsYaNPlhphGhEp....FhtpstcsGlcGlllsDLPhEEusslhphspctslshlhLluPsTs-pRlcplsctusGFlYlVSphGVTGscs.shssplpphlp+lKpho.shPlhlGFGISss-pspphs.tsADGVllGSAlVchlpp......phcptcpslpcltph...sp..hcsus+ ............................................................................................................thptptc.sAh.ls.alssG.DPs..h-pohchlcsL........c....u....G............A.............DhlELGlPFSDPlADGPsIQp..AshRALs..s..G.s..........o..hppsh.c..hlpp....l.R...................p....p.....sph.P..llLMs...........Yh.N...l....hp....h..........Gl-p.....................F.hpcstcs.G.V.cGlllsDlP...h....E...E.u...s......h...t...t.t...s...p...c...t.s...l.s..hIhLssPso.sc...-Rlcplsptu.p...............G.F..lYh.lSh..sGV.TGs......ps....p....h.s.s.s..l.p.ph..ltc.l.+p.h........s......s......h.......PlhlGFGIussppsppht....t...sADGlI.lGSAlVch..lpp..........ph.p.p.........t.........................t.............................................. 0 339 685 908 +4754 PF03222 Trp_Tyr_perm Tryptophan/tyrosine permease family Mifsud W anon Pfam-B_2873 (release 6.5) Family \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.24 0.70 -5.65 13 4713 2012-10-03 01:44:59 2003-04-07 12:59:11 8 5 1373 0 483 3542 123 382.30 25 94.07 CHANGED +osphlGGshlIAGTsIGAGMLAlPlssAGshFhsohllLlhsWhhhhtSGLhlLElhpphts.....ssuhsTlucchLG+shpllsslshhFlhYhLhhAYIouuGuILppslsphhh.hshss+sssll.....FsllhusllhhuTp.slDphsplllhuhlluFslshshhlsclcsshLhs.....h..sthhshllsulPlhlsSFGFHusVPSLhpYhsps.l+cl++uIlIGoslsLllYllWplsshGslspspFhtlltpGuslssllpALtthhpSsshthslphFuhhAlsoSFlGVuLGLFDaluDlFKa.pcspsGRhchshlTFlP..................................PLlhulhaPpGFlhAluYAG.lusslassIlPslLsapuRpcpspts.a+VhGGshhllllllhGlllhlsp ..............................................................................h...........l.hGTsI..GAGh.L....hlP..l.s..h..u..u.s.G.h..h.h..s..h.h..h...l..lhs..a..s.hh.h........a.su.h....h.l..h.c..hs..p..sss.............................ssshsslsc..c..a...h....G...+......t......h.s.......h....l.h....s...h.h.......h....h......F.......s....l......a...s....l......s....h.u...Y.......h..........s......u.....s..........u....s......h...........l..........p..........p..........h..........h......s....p...........h....t........h.......s........h......s..........p....h....s...s..hh...........................h.s....l...l...h...u....h...l.....l...h........h....G.......pc.......h....l.......s.....+......h......s......s...h........l..l..h..s....h....l....h......h....h.l.....l....h.......h..h......h..L....l....s......p......h......p.........s...h....Lhs..................h....s..s...s......h...h......h...h.......l...h....h...s.l..P....l....h..l..h.S..F......u.a....p....s.....s....l..s....S....h.......s.........p....h........h......s.....p...c........hc.......+......l....p.....p....s......l......h...h.....u.....o.........h...l.s....L.l...h....a..l...h...a...h...h...u...s..h.h........s.l....s............s...p.............h...h...s..h...ht...p...s.......l..s.s...L..lp..s.h.up..h..hs.....s..........s.....h.....l.p.h..s.s.....s....l..hu.....h..l..A.....lsoS.....F.l......Gl.........h.L..G........l.h...-.......h....l....s....-......L.....h.............p.......h........s.......s....s..................t.........h........u........+...h.......p....o...........h..hl...s..F..l..s..............................................................s.h..h....h....s..l..h.a..P.s..u...h..l....hh.l..uh.uG.....h.............huh...l.h.h.llP.hh..h...hh......t......s....R.p........h.....................................................ap....s......hh.st.h.hh..hhhhhhh........................................................................................................................................................................................................................................... 0 100 201 347 +4755 PF01509 TruB_N TruB family pseudouridylate synthase (N terminal domain) Bateman A anon Pfam-B_792 (release 4.0) Family Members of this family are involved in modifying bases in RNA molecules. They carry out the conversion of uracil bases to pseudouridine. This family includes TruB, a pseudouridylate synthase that specifically converts uracil 55 to pseudouridine in most tRNAs. This family also includes Cbf5p that modifies rRNA [2]. 25.60 25.60 25.80 25.60 25.30 25.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.67 0.71 -4.05 171 5359 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 4803 48 1566 4094 1764 143.70 39 46.08 CHANGED ++...hh.....p...h+....KlGHsGTLDPhAoGlLslslGc.ATKlhpalh.sss.KpYpuphpLGhpTsTtDtpGpl..l...tp........h.tl.sppp....lcpsl.ppF.pG.pIpQlPPha.SAlKhsGc+LYchARpG..hpl-...hts.RplpIaplcl...lph..p...ts.......plphclpCS+GTYI ............................++lhpsc...KsGHsGTLDPhAoGlLslsl.Gc.ATK.hs.p.alh..sss..KpYpsplpLGtpTsTtD...........s..........pGpl.........l....pp...................s...p.h....s...t..cp.....................lp.p........sl.p..p.a.pG...pI.pQ.....lPP.haSAlK.hpG++hY-hARpG..........p..V..c.........p..ts..R.lsIaplph.....lp...h.....c..ts..........................phphcVpCSKGTYI.................................................... 0 520 968 1305 +4756 PF01456 Mucin Tryp_mucin; Mucin-like glycoprotein Bateman A anon Prodom_3102 (release 99.1) & Pfam-B_3837 (Release 8.0) Family This family of trypanosomal proteins resemble vertebrate mucins. The protein consists of three regions. The N and C terminii are conserved between all members of the family, whereas the central region is not well conserved and contains a large number of threonine residues which can be glycosylated [1]. Indirect evidence suggested that these genes might encode the core protein of parasite mucins, glycoproteins that were proposed to be involved in the interaction with, and invasion of, mammalian host cells. This family contains an N-terminal signal peptide. 29.60 29.60 29.70 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.34 0.71 -4.41 6 1107 2009-09-12 22:44:45 2003-04-07 12:59:11 12 5 11 0 62 1096 1 150.00 37 94.82 CHANGED Mh.....hpRlLC.lLhLAL.CCssVCsTApt..............................................tGQhssssspuusGpsppTTTTTTTpssTTTTTTT.................Tssss....sspsssTTsssTTTTTsspAPupssTTocAPTssssR..........SLGussWVssPLLLhVSAhshT..Ass ......................................................................................................................Mh...pCRLLCALLVLAL...C.C.C..s..sVC.s....s..tpt....................................................................................................................................................................................................................t..........t....t............t.....s......t.....t.....s....ps........p.....t.....p........s...s..s..s..s....s.....p..s.s...s.s...s.ps..sp................................................................................t.t.t........s.......t....t..p....s...s.....p...s.s...s..s.s.s......s......o........T....T....T.T.T..s..sp......A.P....s...........s.....T.T.o.cAP....o.s..o.......TT.+A.P........Stl.RchDGSLuSSAW.....VCAPLlLAsSALAhT..sl.......................................................................................... 0 3 3 62 +4757 PF00913 Trypan_glycop Trypanosome variant surface glycoprotein (A-type) Bateman A anon Pfam-B_1351 (release 3.0) & Pfam-B_2618 (release 8.0) Domain The trypanosome parasite expresses these proteins to evade the immune response. This family includes a variety of surface proteins such as Trypanosoma brucei VSGs such as expression site associated gene (ESAG) 6 and 7 [3]. 22.10 22.10 22.10 22.20 21.90 22.00 hmmbuild --amino -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.08 0.70 -5.55 63 493 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 12 4 163 485 0 305.80 17 79.35 CHANGED hshhllhshh......sppssussptulpts.sWpslCplop-Lc.plsutshpphpshhsthpphpptth+hplash....tpsssppspthsslsuhh....ppstpshpshhssthppulsAsusuuhltG+IcEalsl...hspspsuuss....CLssssssssssss.........hthstspsphphsssssp...stssstlsssGapshh...........sssssptssssppCpLh.....tsssssGhspsss.........tssslphu.uGhlsls.....tsssthshtthtshttttptttt.hhthhtt..........tptspssssthpssssshpsssphppslpphht.....tptttptpphtptlpphhtsspspthpt....hhttht.p.plstthh....stsptspLs.cIssh.ppLtplLthhptptt .................................................................................h....hhhhh...........h...ts...t...t.t..u.lttp.shptlCsloptL+.t..............lsthst....p....pht.......t....h...t.p.ph....p..ph.p.....t....h..p..h..p....lhhh.........htsst..pss........ptht..h......Lhtth......hpthtphppphpph........t....ptuhtAuu....tA........uhuAGRl-Ealtl...........Fup..ut...........s....susph............Clusssssssptsp....................thp.s.tth.hppspps........htttpshss..tshpsl............................sspshts.s.ss....pss...CsLh.......pssssG..hhtsss..................hppshhau..uGlhshs...............t.t..........................................................................................................................................................................................................tt......................................................................................................................................... 0 121 163 163 +4758 PF03249 TSA Type specific antigen Bateman A anon Pfam-B_3060 (release 6.5) Family There are several antigenic variants in Rickettsia tsutsugamushi, and a type-specific antigen (TSA) of 56-kilodaltons located on the rickettsial surface is responsible for the variation [1,2]. TSA proteins are probably integral membrane proteins. 19.80 19.80 20.30 30.10 19.50 19.30 hmmbuild --amino -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.73 0.70 -5.96 8 365 2012-10-03 17:14:37 2003-04-07 12:59:11 8 1 7 0 1 363 0 374.30 72 96.08 CHANGED IELGDEGGLECGPYAKVGVVGGMITGVESsRLDsADu-GKKHLPLTTuhPFGGTLAAGMTIAPGFRAELGVMYLpNIoAEVEsGK..............ssucADosssTD...uPIhKR..KLTPPQPTIMPISIADRDhGVDlsNlPQAQstts..QlNDs..RuAcRIAWLKsYAGIDYhVKDPNNP.GsMhlNPVLLNIPQGNP...uNssptAhQPsDhsILDH-QWRalVVGlsALSNANKPSlSPVKVLSDKIoQIYsDIKPFAcIAGI-VP-ssLPNSASVEQIQNKMQELN-lLEELRESFDGYI.GNAFAsQIQLNFhIPQtA..QQQQGQG.QQQQAQATAQEAsAAAAVRlLNG..N-QIlQLYKDLVKLQRHAGIKKAMEKLAAQ.t.DutspGuGDsKKKQG...ASEcSccsutu........KETEFDLSMIVGQVKLYADLhTTESFSIYAGVGAGLAYTpGKIDsKDIKAHTGMVASGALGVAINAAEGVYVDIEGGYMHSFSKIEEKYSVNALMASlGVRYNF ........IELG-EG..GLECGPYuKVGlVGGMITGsESsRLDsADs-GKK+LsLTTuhPFGGTLAAGMTIA.GFRAELGVMYLpNIoA....pVE...GKsKs.....................DStGcsKADSu.sGsD.....API.RK.R.hKLTPPQPTIMPISIADRDhGlDlsNlsQAtAtts....Q..LNsE...QRAAtRIAWLKNhAGIDYhVpDPNNP..GshVlNPlLLNIPQGsP...uNs....pRspQPsshsIhs..H-QWRaLVVGlsALS.NANKPSsoPVKVLSDKIopIYSDI+.FAcIAsI-VP-ssLPNSASVEQIQsKhpELsphLE-lR-SF-Ghl.sNAFssQIQLNF.hP.tt....QtQ.GQ..QQQQ..uQsTAQ-AsAAAAVRhLNs..NpQI.QLY+DLVKLpRHAGl+KAMEpLAsQ..........D.stt.............ps..tspscppp....ss.p.pp.....t.t..........+.EsEFD...LSMl..VGQVKL..YADlhhTEShSIYuGlGAGlAaT.GKIDshDlK.pTGMVsS............................................................................... 1 0 1 1 +4759 PF01166 TSC22 TSC-22/dip/bun family Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.10 25.90 24.90 24.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.89 0.72 -4.01 10 339 2009-09-11 08:14:02 2003-04-07 12:59:11 13 4 87 2 161 275 0 59.20 65 15.71 CHANGED MDLVKoHLMYAVREEVEVLKEpI+ELh-+NupLEpENsLLKsLuoPEQLsphpu..plpsss .................MDLVKSHLMYAVREEVEVLKEQI+ELhE+NSpLEpENsLLKoLASPEQLuQhpu..pht..ss.................. 0 26 42 96 +4760 PF04668 Tsg Twisted gastrulation (Tsg) protein conserved region Waterfield DI, Finn RD anon Pfam-B_4556 (release 7.5) Family Tsg was identified in Drosophila as being required to specify the dorsal-most structures in the embryo, for example amnioserosa. Biochemical experiments have revealed three key properties of Tsg: it can synergistically inhibit Dpp/BMP action in both Drosophila and vertebrates by forming a tripartite complete between itself, SOG/chordin and a BMP ligand; Tsg seems to enhance the Tld/BMP-1-mediated cleavage rate of SOG/chordin and may change the preference of site utilisation; Tsg can promote the dissociation of chordin cysteine-rich-containing fragments from the ligand to inhibit BMP signalling [1,2]. 25.00 25.00 29.60 27.40 18.40 24.10 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.06 0.71 -3.95 11 135 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 83 0 82 122 0 122.90 45 58.17 CHANGED psphop+SplE-L.-ulPsLFcAlTu..EsDut.hpWslhoFPl....h..hsptpshsphL.s.ss.pts..htsPsssloss........CTVlYhspChShp+C+QoCESMGASpYRWFHsGCCECVGspClsYGspEoRCppC.. ........................................s.s.opKSpVE-l..-s..lP...uLFcALT...Es..Dst...hpWslhoFPl....s.thsphc.s..hspah.ph.s...p...p.....hp......h.s.......h........sss.hpss.............CTVlYhcpChShppC+.oCcSMGASpYR.............WFHsuCCEClGspClsYGspps+ChpC.............. 0 19 25 55 +4761 PF04705 TSNR_N Thiostrepton-resistance methylase, N terminus Kerrison ND anon DOMO:DM04814; Family This region is found in some members of the SpoU-type rRNA methylase family (Pfam:PF00588). 27.10 27.10 27.70 93.80 27.00 27.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.38 0.71 -4.29 3 6 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 6 6 1 11 0 114.70 52 41.82 CHANGED MTELDlIsNsSDPAVQRIIDVTKHSRSsIKTTLIEDlEPLMcSIRAGVEFIEVYGSDooPFPu-LLDLCc+RsIPVRLIDuSIVNQLFKuERKAKVFGIARVPRPARFADIASRu .MsphDlIsstSDPAVQRIIDVTK+SRSslKTsLIEDsEPLscuIpAGVEFIEVYGs-uoPlsscLLshCcpRsIPVRLlssulsNpLFKuERKsKsFGIARVPRPu+FuDlAsR.s. 0 0 1 1 +4762 PF00090 TSP_1 tsp_1; Thrombospondin type 1 domain Sonnhammer ELL anon Published_alignment Family \N 21.60 12.00 21.60 12.00 21.50 11.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.19 0.72 -3.85 31 18854 2009-09-12 10:45:09 2003-04-07 12:59:11 14 787 233 25 10470 16743 573 50.30 28 19.16 CHANGED o.WspWSsCSV.TCG.pGhphRpRhsst......sssCstsspp......schC.phcpC ..............h.u.sW....o.........t....C.......S......t.....o.......C.........G.....s.......G........h.......p.....p.....R.p....+...p..C.....t..s....................s......s...p..............C....t......s......t..t..t....................hp.....C..p.t.C........................................................ 1 2752 3498 6471 +4763 PF02412 TSP_3 tsp_3; Thrombospondin type 3 repeat Bateman A anon SwissProt & Pfam-B_2972 (Release 8.0) Repeat The thrombospondin repeat is a short aspartate rich repeat which binds to calcium ions. The repeat was initially identified in thrombospondin proteins that contained 7 of these repeats [1]. The repeat lacks defined secondary structure [2]. 25.00 13.60 25.00 13.60 24.80 13.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.18 0.72 -4.48 11 4235 2009-09-16 13:31:17 2003-04-07 12:59:11 13 198 363 36 1963 3922 3104 30.90 43 18.51 CHANGED cDuDuDGlGDAC....-pDhDsDGl.shhDNCPhsuNssQ .....................................C................p..t...D...t...D..s..D.G.l..D...p......p.......DsCPhsss.t.......... 0 606 831 1367 +4764 PF03073 TspO_MBR TspO/MBR family Mifsud W anon Pfam-B_1882 (release 6.4) Family Tryptophan-rich sensory protein (TspO) is an integral membrane protein that acts as a negative regulator of the expression of specific photosynthesis genes in response to oxygen/light [1]. It is involved in the efflux of porphyrin intermediates from the cell. This reduces the activity of coproporphyrinogen III oxidase, which is thought to lead to the accumulation of a putative repressor molecule that inhibits the expression of specific photosynthesis genes. Several conserved aromatic residues are necessary for TspO function: they are thought to be involved in binding porphyrin intermediates [3]. In [2], the rat mitochondrial peripheral benzodiazepine receptor (MBR) was shown to not only retain its structure within a bacterial outer membrane, but also to be able to functionally substitute for TspO in TspO- mutants, and to act in a similar manner to TspO in its in situ location: the outer mitochondrial membrane. The biological significance of MBR remains unclear, however. It is thought to be involved in a variety of cellular functions, including cholesterol transport in steroidogenic tissues. 20.70 20.70 21.20 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.98 0.71 -4.68 148 1274 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 1043 0 537 1142 1226 141.80 27 85.38 CHANGED llhlslshssuhhuuhhoss........ss..W.YpsLpKPsasPPs.alFsslWTlLYhhhuluuahlapth.........tpptsphslslaslQLsLNhhWSslFFuh+......phthAhl.llhLhlsllhshhtahp..lsphA.uhLllPYlhWlsFAshLNhs.lhtL ...............................................h.hhhhhshhhuhhuu....h....h....s......t.......tp....W...Yt..s.Lp+P...s.asP...Ps..hlFs.lWsl..L.Y.h.hh.u.l.uuahlapp.........................ttpttphsltl.a.sl.QL.hlNhh..Wo..lF....Fsh+......phhhAh.ltlllLhhh.lhhh.....h.......hha.hp.......lsp...h..A...uh..L....h....l.....P...............YlhWlsFAshLNhslhh.................................................. 0 183 330 441 +4765 PF02956 TT_ORF1 TT viral orf 1 Bateman A anon Pfam-B_1612 (release 6.4) Family TT virus (TTV), isolated initially from a Japanese patient with hepatitis of unknown aetiology, has since been found to infect both healthy and diseased individuals and numerous prevalence studies have raised questions about its role in unexplained hepatitis. ORF1 is a large 750 residue protein. The N-terminal half of this protein corresponds to the capsid protein. 19.60 19.60 20.10 19.60 19.30 19.40 hmmbuild -o /dev/null HMM SEED 525 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.85 0.70 -6.05 58 3965 2012-10-04 01:49:40 2003-04-07 12:59:11 9 3 72 0 0 2670 1 111.10 45 87.44 CHANGED Msa.WWhRR....R+...WhhphhR...................RhRRhhRRh+RRhps..RR..RRR.hhthp+....RphhRRRp....hRR++KKl.hlpQWQPsslR+CpIcGhhPlllsGc...sptspNYshcp--hss.....t..........PaGGGaosppFoLchLY--ap+tpNhWTtSNppLDLsRYhGsphpFYRHspsDFIVpasppPPFphschotsshHPuhlhhsK+KhllPShpT+PpG+thl+l+IpPP+hhpsKWYhQpDlCsssLlsltuoAsshpaPasusposs.sloFts.Lss.hYppshhh............................sstppt.t.h.t.laps........................thYpohts.tpl.p.httssptpp....ttp....t................................sss.Yst..ap.........................................................tlpYcsGhaSshaLssh+h.shp.h..............ssat-lpYN.PhpD+GpGNplWh.p.hoKtsspa...spspschllpslPLWsshaG..Yh-alpppt.tspshhtsthlslhsPYTpP...hstss..sshual.hDhsFspGKhPts.sshlshhhct+W.YPphhaQppsls........slspsGPa..sY.+s-ppss.pLs..hpYcFpFpWGGs ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s..Kps..Y...sKspSKCLltsLPLWAusYG..YhEaCuKsT..GDs..N..IchNsRhl.IRSPaTsPQL.ls..H..ss..P.+GaVPYS..........................................................................................................h......................................................................................................................................................... 2 0 0 0 +4766 PF02957 TT_ORF2 TT viral ORF2 Bateman A, Mifsud W anon Pfam-B_1489 (release 6.4) & Pfam-B_4693 (release 7.6) Domain TT virus (TTV), isolated initially from a Japanese patient with hepatitis of unknown aetiology, has since been found to infect both healthy and diseased individuals, and numerous prevalence studies have raised questions about its role in unexplained hepatitis. ORF2 is a 150 residue protein. This family also includes the VP2 protein from the chicken anaemia virus which is a gyrovirus. Gyroviruses are small circular single stranded viruses. The proteins contain a set of conserved cysteine and histidine residues suggesting a zinc binding domain. 21.10 21.10 21.10 21.10 20.90 21.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.77 0.71 -3.41 17 774 2009-09-11 23:42:44 2003-04-07 12:59:11 10 4 129 0 1 695 0 100.80 26 68.28 CHANGED apPs...hastpthptpWhsshhpoHshhCGCscslcH.......hhp..........................hh+ptssL.ht.pt.tpht.h..sssp-uss.......tssG-s.t..sh......spuDlDhL.hAt-hs-p ....................pss...hhs.ptppphWhpsshpoHushCuCssshtH................Lpph...t...th.t....................s.s.t.s....t.t......h+h..........h.........................................................s.s.t..t...tt.................................t..............................................................t.t...p...........s.................................................................... 0 0 1 1 +4767 PF03542 Tuberin Tuberin Griffiths-Jones SR anon PRINTS Family Tuberous sclerosis complex (TSC) is an autosomal dominant disorder and is characterised by the presence of hamartomas in many organs, such as brain, skin, heart, lung, and kidney. It is caused by mutation either TSC1 or TSC2 tumour suppressor gene. The TSC2 gene codes for tuberin and interacts with hamartin Pfam:PF04388 , containing two coiled-coil regions, which have been shown to mediate binding to tuberin. These two proteins function within the same pathway(s) regulating cell cycle, cell growth, adhesion, and vesicular trafficking [1]. 19.50 19.50 19.60 21.20 19.40 19.40 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.15 0.70 -5.39 5 261 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 189 0 169 279 0 274.60 30 18.27 CHANGED lV+SEsElEDIlsAVDGLl+VFpVKLYRLPusHAl+VYslLluHLEtHYc+PalLtplSlIRY+IF-WhLpARANuSaHIGYP-uE..pss+VRFSsYLGl-uP...............ppupuosphsLs.hussptsspssslhPsosLTsISI+RuCpVIVpCLKcEpDWpVlQLVLoELPcVLQNKALIQGND..lDuLAsTLhKMhsD.hplE+L.pusstusspoDlHsLVLPALoSLAoYHpaLDsspQ+sIIsuLcpGLIoRpASlCIsoLTILlLEMP-sLhs+LPDLLlcLSKMSuTshlAlPVLEFLSTLlHLPpHLauNFssppYMsVFAISLPYTNPaRYDHYTVSLAHHVIAuWFlKCRLPh .............................................................................................................................................................................................s..........................................................................................................................................................................................................................................................t....h....p...s.......hl..shshhhpsllphL.c.p-sDWcVhphVLs+Lstp......Lp.+sLhh...s...ss...plcpLpssLsph.l....p..................s...h..h..................c...ph.....tt..ss..p..s.hp+sDlthsl.h.sLosLloYHphh.s..+sc..........pc....-hVtsh..pG...l..h..p..........c..........s...A..pt..C..l...hALolCshEh.P..sl....hKtLssll.s+h.o+l.s..oss.hAl...lLEFLusLuRLP.pLYtNFtt-patpVFuIsl.Y...............h....p.................st+....................t................................................................. 0 54 81 133 +4768 PF00091 Tubulin tubulin; Tubulin/FtsZ family, GTPase domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes the tubulin alpha, beta and gamma chains, as well as the bacterial FtsZ family of proteins. Members of this family are involved in polymer formation. FtsZ is the polymer-forming protein of bacterial cell division. It is part of a ring in the middle of the dividing cell that is required for constriction of cell membrane and cell envelope to yield two daughter cells. FtsZ and tubulin are GTPases. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. Tubulin is the major component of microtubules. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.38 0.70 -4.52 94 20546 2012-10-03 12:11:42 2003-04-07 12:59:11 20 44 10719 170 3790 14692 2809 152.80 36 51.77 CHANGED pIhslslGsuGsphssphhc.................................................................................psh.s.schlhlsTDspslsp.hpsspp.....lhhspphhpGh..G..AGus.t.hGpp.................................stccshcpI.cctl............c.s.hchhhlssuhGGGTGoGhusllschh+-h.................................s........t.lsluhsshPh.....ph.Eshhc.hsAhhulppL.hcpsDsllllsNspLhc........ls..............spphslp...................ssapp ...............................................................................................................................................phh.t.thhp.....................................................................................................................................................................sshhs...sc.h.l.hlsh.-.s..t..s...h...........ct....s.......+........s...ush.................l...hs.........cp..h....s....h.....G..........G................AGsN.......t....hGc.................................................................................................hhc.s.h-...l...pp..h...................................p...s.h.....p................h..h..lh.tuhG.G.G..TGs.....Ghu.sllh....ph.hp.p..................................................t......h..hhhsh.s.s..h..P.............ph..-s.h...h..c.......h...........t..u.....uh..ppL......hc......p...........sD.....p.....h...hhlsN..pt.lhc..................ls......cp.hth...sa..t.................................................................................... 0 1325 2173 3053 +4769 PF00418 Tubulin-binding tubulin-binding; Tau and MAP protein, tubulin-binding repeat Finn RD anon Prosite Family This family includes the vertebrate proteins MAP2, MAP4 and Tau, as well as other animal homologs. MAP4 is present in many tissues but is usually absent from neurons; MAP2 and Tau are mainly neuronal. Members of this family have the ability to bind to and stabilise microtubules. As a result, they are involved in neuronal migration, supporting dendrite elongation, and regulating microtubules during mitotic metaphase. Note that Tau (Swiss:P10636) is involved in neurofibrillary tangle formation in Alzheimer's disease and some other dementias. This family features a C-terminal microtubule binding repeat that contains a conserved KXGS motif [1]. 21.60 21.60 22.50 21.70 20.70 21.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.32 0.72 -4.26 12 1318 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 98 0 474 1352 0 30.90 51 14.99 CHANGED Vplhsptl.Dlp.pVpSKsGSp-NIKHpPGGGp ..........VpI.spKl.Dhp..pVpSKsGSh-NI+HpP..G..GGp... 0 102 146 270 +4770 PF01021 TYA TYA transposon protein Bateman A anon Pfam-B_90 (release 3.0) Family Ty are yeast transposons.\ \ A 5.7kb transcript codes for p3 a fusion protein of TYA and TYB. The TYA protein is analogous to the gag protein of retroviruses. TYA a is cleaved to form 46kd protein which can form mature virion like particles [1]. 25.00 25.00 26.50 26.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.43 0.72 -3.79 4 183 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 13 0 119 181 0 91.70 76 12.65 CHANGED ACASVTSKEVpTNQDPLDVSASKhpEa-+sSTKANSQQpTTPsSSAVPENtHHASPQsAQsP.PQNGPYpQQsMMTsNQANsSGWuhYG+PSMMPYoP AhASVTSKEVpoNQDPLsVSASpl.EaD+sSTKsNSQQpTTPuoSAVPEN.HHsSPQPASVPPPQNG.Y.QpsMMT.NQA.sSsWuaYt+PSMhsYo......................... 0 88 88 88 +4771 PF03251 Tymo_45kd_70kd Tymovirus 45/70Kd protein Bateman A anon Pfam-B_3418 (release 6.5) Family Tymoviruses are single stranded RNA viruses. This family includes a protein of unknown function that has been named based on its molecular weight. Tymoviruses such as the ononis yellow mosaic tymovirus encode only three proteins. Of these two are overlapping this protein overlaps a larger ORF that is thought to be the polymerase [1]. 25.00 25.00 192.80 192.50 19.60 19.50 hmmbuild -o /dev/null HMM SEED 463 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.37 0.70 -5.80 13 23 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 0 0 23 0 464.50 32 74.92 CHANGED MSNGhPsSsRpshlcpoQtplhpssochtssssstLssslPLcpscGosohsphlRcPsl+hRhpssPPssPQssRs.soLpPL-hstpsuhhscVHETlcVppsstppspL.pssQLPupspRhpSlPpHLphsupttsRlHARRuDVL.uhosstslpslspsssslLQspsusRt.LHRsLshPpsLHLps.RppsuL+sR+ospRpLQsAsppspLAEsthH..s.spPlpppsGILGPsPLhscspR..........sPpsshppssss..........................slLPsPphspuupuaLPsPTossPs+sspulpRslHLHsSpssosclRPpRlRssulQQspspLGHspuLGQSsNLRusppspPo+pplpLhPhssspspsl.hssh........sP.hppp.Sh....hP+Psss.sshstsssp...hpsplPssh.s.......ssphsssss......suhssssssssssssss...............Ps MSNGhPsSsRRshlapSQRplsposSchpspssstLsssLPLspscGosuhsphlRHPslRhtppPsPPppPQssRs.soLpPLthPtppShhpcVHEThpVppsstppscL.pspQLPspspRp+SlPpHlppsu..pptp+lHARRsDVL.uhssptslpshspsssslLQspsuoRt.LHRslsLPcsLHLps.tsposL+sRco.pRpLQsAsppPhLAcsphH.....s.s.slpcpsGILGPsPLsscspR.s..............sPpsthspssss.............................h.s.t.slLPss+h.poSpuHLPssTsssPspsspuLpRPlHLHpSssposchRPpRlRpculpQscspLGH.psLGQSusLRsscpssPs+ptLpL.spPstpspslspssL........sP.httttSh.h..hPpPsshhsshshsosp...hphplPpsh.s.......spth.ssss......sphshtsshpss..sss.h.....ssss..s................................ 0 0 0 0 +4772 PF00983 Tymo_coat Tymovirus coat protein Finn RD, Bateman A anon Pfam-B_1429 (release 2.1) Domain \N 19.00 19.00 20.10 26.20 17.90 17.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.92 0.71 -4.56 9 153 2012-10-04 01:49:40 2003-04-07 12:59:11 13 4 51 27 0 158 0 167.60 34 41.30 CHANGED ps.QssIsssuopLP.ssGppsPoIl.PFQhpssohGsp-susplolAotssluplTohYRHApLspLpAsIpPouhAsupPsTVsLsWVPsNSoATsupILs..lYGGQpFslGGuIsospsIpVPssLssVNPhIKDSVpYTDoPKLLlYSsAsssss..sssTsolpIpGplp ....................h..............s...s.sssslslPFQh.p.shsh.Gst.s.s.t.s.ols.luussslopLsusYRHApLhpLcAhltPshsuhupPholslVWssAs.ssssssplLp..sYGGpphslGGslshsushpVPAsLsplNPhIKsSVsYsDTP+L.hh.sssssssus.....sssh..shlhlpGhl............................. 1 0 0 0 +4773 PF00264 Tyrosinase tyrosinase; Common central domain of tyrosinase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family also contains polyphenol oxidases and some hemocyanins. Binds two copper ions via two sets of three histidines. This family is related to Pfam:PF00372. 22.30 22.30 22.30 22.30 22.00 22.20 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.22 0.70 -4.19 127 3739 2012-10-01 19:27:11 2003-04-07 12:59:11 15 75 1610 58 1016 3969 31 141.50 35 41.98 CHANGED tspapphs.................uhHt................................................thshs.hHs..ss....hFhsWHRhYlhhaE.....ptLpp.................ts..................sssthtlPYWDWs..........................sps..ht.ssslhs....ss.h.uh....................................t.h...hss...Fs..shhsph.......................................p.cshp+sh..........................ssttspthsstpp........................ltphlht..................................sshpsFpshh...ps.................................thHsssHh.hlG......................................Gp...............h....ushhsushDPlFalHHuplDRlathWQph ..........................................................................................................................................................................................h.psYDhFV................hlHhhsscss.......................................htss.sh.pshch.uHp...us.......uFlPWH.RhaL..L.haE.....+plp+....................ls.........................sspsFslP..a..Ws............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 335 609 862 +4774 PF03064 U79_P34 HSV U79 / HCMV P34 Griffiths-Jones SR anon Pfam-B_2985 (release 6.4) Family This family represents herpes virus protein U79 and cytomegalovirus early phosphoprotein P34 (UL112). 23.20 23.20 23.20 24.60 22.50 23.10 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.82 0.70 -4.86 5 51 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 24 0 1 53 2 236.80 47 48.69 CHANGED RcYGTF-NVscsYcQIVocolcLRRacF-oGClI-FlusSG+CEsausGWIS.MIhWTSETsS......tGSLTlDIssD-GppKTY..pARGtILCSKSITSISQso...EG+-+lLTlspENGKLQlTaVTlsKsu+-s-l+slG.DsKstcpFEKECpAs-RKKp...DD-++K+SuKQKEKRRsED..cK+cEDc+KKpE.......c++psDs-Kpsspc-cu.sttpp..pph.........D.....psscEKRQK.aH-s..-RcLEcQScE .............R+YaTFsNssRlLHQsVspoFDVRQFsFDoARlVsCl-G-G+s.phsKGWLC.ATIMQpu-uuuuu......pstQGhMSlDITuDspLpcph..FsRGuIVhNKoVSSVVGss....ssscuuLLThluEsGsLQVTaVcHhh.psHs...psusssu.....usGsAusAu..AVs.s.....oShGuS.......uGspcGsus.ppppRRRpc..........ppH--cR+Kppp.....................pttsu..GuuGuuGGG.s.GuGSGGppu..sotp....thLc-...........st..pRQK.......ERc..Pspp......................................................... 0 0 0 1 +4775 PF02134 UBACT UBACT_repeat; Repeat in ubiquitin-activating (UBA) protein Mian N, Bateman A anon IPR000127 Family \N 20.50 20.50 20.70 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.08 0.72 -4.40 128 1697 2009-01-15 18:05:59 2003-04-07 12:59:11 16 51 346 44 1116 1656 35 65.80 34 11.72 CHANGED pF-cDDsss..lcalhusuNlRAppasI..s...h...............shhps+tls.upIIPAlAoTsAlluuhs..shEhhKllpsp ..............F-pDDssp....lcFlhuuuNLRApsasIp.s...t...............sp.tps+.t......ls..spI.I.PAlAoT.sAhlsuls..shEhhKlht..t................ 1 397 620 910 +4776 PF01977 UbiD DUF117; UPF0096; 3-octaprenyl-4-hydroxybenzoate carboxy-lyase Enright A, Ouzounis C, Bateman A anon Enright A Family This family has been characterised as 3-octaprenyl-4- hydroxybenzoate carboxy-lyase enzymes [1]. This enzyme catalyses the third reaction in ubiquinone biosynthesis. For optimal activity the carboxy-lase was shown to require Mn2+ [1]. 19.80 19.80 20.40 19.80 19.40 19.70 hmmbuild -o /dev/null HMM SEED 407 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.04 0.70 -6.02 155 3152 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 2165 3 810 2356 887 380.40 36 83.80 CHANGED lptL.ccpG..p....Lhclcp.Vs.spL....Elutlsc+........hhp........t..tss....AlLFcsl...cGh.........hsll.sNla...u.op........cRlshsL.Gh.........phpplsppltphh.p.................ht.hsh.hphh............pusspcshh..tpphD..L..pc.LPlhppaspDGG......alTh...................uhVloc..sP.cs.......NlGhYRhQ.lhs.....c.pcluh+hh..t+css.phap...ch....pcpG...........................c..phPVAlslGs-PshhhuAs.hP...lP...slsEhthAGhLp....Gpslcllcst...sssL.VPAsAElVlEGhls...s........p.ht....EGPFGDasGY.Ys..scp....hPVhcVpslptRc..c.PIa.ss.lsGpPs..tEcphlutsspchhl...shlptthP.p....lh-lhhs.tusha..hsllsIccpa........sGc.........u+plhhuhaustt.h....hsKhlllV....D.cDl-lpDhppVhWAlso.....Rhcss+.....Dlhllsss.s..ss .............................................................................................................................................................................phL.-ppG..pLh+I.st.Vs.sphElst..l.scc.........sh+...................s..tGP....ALLF-Ns...p.Ghs........hPV.l.sNla...G.o........................cRlAhuh..G.................shpplsc...h..l.shhhc....................................h....sh..phl............................psusspp..lh..p....u..-c..l.s......L.............tc..lPl.ps.ast.Duu....................shlTh....ulslo.+..sPpc.........tppNlG..IYR.Q..lhu.....+.s+lhh....+als....p+ssA.hc.hp..ch....t.c.....G............................................c..t.hPlulslGsDPshhluAssP........l.P.........sl...oEh.thAGhLR....Gp.hclsps......s...s.s...lpVPAsuEllLEGhlp.................s....................tcht...E.......G.......P......a...G.....DaTGY....Ys..tscp......hPVhplpploh.Rc...c..sI.apoT.hs.G+..P...s...sE.sshLussh.s......clhl...sl.Lp..p..phP..E....lhD.hahP..p.G.ssa...hAlloh.+K.pY..........sGa.........A+pVhhusaohhpth....asKhlIls..................D....-D...l.s.scDhscVlWAlso.....RhcPsRDhlhlpsssh................................................ 0 231 482 658 +4777 PF01209 Ubie_methyltran ubiE/COQ5 methyltransferase family Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.45 0.70 -5.25 5 4079 2012-10-10 17:06:42 2003-04-07 12:59:11 13 9 3618 4 1229 27075 9319 227.00 38 91.29 CHANGED pTslKEp+...VpcVFcSVAo+YDhMNDVlSFGIHRLWKc.FTh+psus+cGs.....shLDVAGGTGDlTFcLo-ulGsoG.............KVlllDINEsMLKhGccKl+-pGhh....sIEaLpuNAEcLPF-D.soFDslTISFGLRNsoDh.KuL+EhaRVLKPGGpllCLEFS+PphPlhcpAY-hYuKtVMPhhG+llAc-h-SYpYLsESIRcFPDQ-TLcuMhc-AGFcuVcYcsLTGGlsAlHhGhK ............................................................................................................................tp....Ktpc...VtplF.c.s..l..A.s.+....Y..D....l.....h.....N......c......l....h....S.........h.....G.....h......H.....+....h.......W...+........+......h...s....h.....p.........t....h.........s......s.....c.........G.p...................................p.l..L.....Dl..A...u....G....T....G........D...l....s....h....t....h....u....c...t....s...s..t....p..u......................................................c.V.s..h..s....D.....h......s........p.......s.......M.......L......p......l.....G......+......c.........+.........h....h.........p.....h....s..h...........................s..l..p......a......l.........p...............u........s...............A......p........p........L.............P.................F.................s..........D.......s.........o...........F..........D............s......l........T...........I.......u............F......G............L...........R.............N............V...........s.......-...........h............p.............p...........A......L........c....E........h....h.....R.....V.......L..K....P...G.....G.+..l........l...........l........L.........E.....F.........S.......p.....P......p.......h...............s........h....p....p.....h........Y..........c......h......Y..........h......p.............l.....l...P......h.....h...G...p......l.......l...u...p.....s...t.......c..u......Y...p.....Y.......L...s..E.....S.....I...c.....t.......a.P.....s...p....c..p......L.....t......th...h.p...c.AG.F............p.....p..l......p........a......p......s.ho.uGlsAlHhGhK......................................................................................................................................... 0 414 798 1059 +4778 PF03981 Ubiq_cyt_C_chap Ubiquinol-cytochrome C chaperone Finn RD anon Pfam-B_5272 (release 7.2) Family \N 21.50 21.50 21.50 22.20 21.00 21.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.60 0.71 -4.33 63 1111 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 985 0 313 697 717 153.30 26 61.36 CHANGED pphsls-ThsucaphhsLHhallhhR.................l+sts...ttu.....ptlsQplh-thhpDh-ppl+chuls-hsl...sKph+ph....sptahGthhAYDpulst...sstsLusALh.RNlhpst................shp...phttlssYltpthttLsshsspslhsuth....ta ..................................................................chp..phlsuchphhshcshhshlR..................................LKu-t........hus.....hplpQpLl-pahccs.pchc.c..t..p..h.p-hht........stplpcL....pphh.hhht.sh..hAhss...........hsspL..sthLtpphhhuh........................................................................scshh.....chsuLsu.Vtsslss.lcsluusuhtssh...ssh..................................................... 0 109 180 253 +4779 PF02271 UCR_14kD Ubiquinol-cytochrome C reductase complex 14kD subunit Mian N, Bateman A anon Pfam-B_4192 (release 5.2) Family The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex [1]. This Pfam family represents the 14kD (or VI) subunit of the complex which is not directly involved in electron transfer, but has a role in assembly of the complex [2]. 20.70 20.70 21.20 29.60 20.60 19.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.12 0.72 -4.27 25 388 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 303 52 242 377 2 101.50 37 76.61 CHANGED lhppPtLs+lhhPlAphahN..huGYRphGL+hDDLlsE...EssslpcAL+RLPccEsYsRsaRItRAtQLSloHplLP+ccWTKsEEDssYLpPYlt-lcpEtpE+p- ..........................h..........hh.s.ltpha.hs......suG..ap..ch.GL+...........hDDll.E..........s.sVpcAl+RLP...cc....h.sRsaRlpRAhp....LShp+plLP+-p.WsK.-...-D..h......Y...LpP...hlp-lppEppE+p...................... 0 77 127 196 +4780 PF02320 UCR_hinge Ubiquinol-cytochrome C reductase hinge protein Mian N, Bateman A anon Pfam-B_11849 (release 5.2) Family The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex [1]. This Pfam family represents the 'hinge' protein of the complex which is thought to mediate formation of the cytochrome c1 and cytochrome c complex. 22.00 22.00 22.20 24.20 21.60 21.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.41 0.72 -4.03 24 379 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 288 52 237 373 5 64.70 37 55.86 CHANGED VDPhppLcEcCttpscCsphhpcappCscRVpucsp........scEsCspEhFDhhHClD+CVA..KLFspLK ................DPhpplcEcCtpptcC..sphhc+a-pCs-RVpscsp...............scEcCsEEhF-hhHshDcCsA..KLFppLK.................. 0 78 131 193 +4781 PF02921 UCR_TM Ubiquinol cytochrome reductase transmembrane region Griffiths-Jones SR anon ref [1] Family Each subunit of the cytochrome bc1 complex provides a single helix (this family) to make up the transmembrane region of the complex. 19.90 19.90 20.20 20.00 19.80 19.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.09 0.72 -3.74 30 409 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 305 58 228 405 5 61.80 36 24.87 CHANGED upTch..phPDFo......sYcccp.........css-sp+uFoYhhl.GuhulhsAsuAKssVpsFlSoMSAS....AD ....................psh..phPDFs......pYccpp..............pusss+....+uFoYhhs.Gu.....sulssAhuAKssVppFlsoMSASAD..... 1 79 127 188 +4782 PF02939 UcrQ UcrQ family Bateman A anon PSI-blast P13271 Family The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex [1]. This family represents the 9.5 kDa subunit of the complex. 25.20 25.20 26.70 26.40 25.00 24.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.49 0.72 -4.16 7 257 2012-10-01 20:10:32 2003-04-07 12:59:11 11 4 223 52 177 256 0 78.40 35 78.44 CHANGED hhtahshsK.+GIhoYulSPapQ+shsGhFcpul.NsFR.RspophLYhs.PhshhYhlashupcpNphL.pKsstc.hpc ..........................hht.hhsh.s..+..+tlhoYuLSPacQ+shsGhhppul.Ns.aR.Rh+.sphhhVsP...PhlhsYhlhsWupccpchhppKsstt..t................... 0 51 86 144 +4783 PF00984 UDPG_MGDP_dh UDP-glucose/GDP-mannose dehydrogenase family, central domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1105 (release 3.0) Domain The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate [2]. 20.60 20.60 20.90 21.50 20.50 19.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.83 0.72 -3.77 26 6968 2012-10-02 19:36:47 2003-04-07 12:59:11 14 24 3680 103 1988 5681 4792 94.60 34 22.22 CHANGED lpoAEhlKhssNsahAs+IoFhNElupIs-plGsDlpcVhculuhDsRls....ahpsG.GaGGuChs+DshsLsttupphshssp..hhppllpsNpsp ...............psAEhsKlhsNsahAh+IuahNElu.p.ls-p.............h..G..l...slpclhcussh....D.sR..ls...............hh.pP.G.....G.a.GGpClPKDshtLl.t.p.s.p.p...sh..p.....llpthhpsN...t............................... 0 678 1317 1697 +4784 PF03720 UDPG_MGDP_dh_C UDP-glucose/GDP-mannose dehydrogenase family, UDP binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1105 (release 3.0) Domain The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate [2]. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.49 0.72 -3.89 161 6858 2009-01-15 18:05:59 2003-04-07 12:59:11 10 20 3675 103 1921 5532 3768 98.00 25 23.17 CHANGED ulLGluFK..ssocDhR-SPulsllptLhpp...Gu.....p.lpsaDP..hstptthth.........................................tlph..htsh..tcslpssDslllhT.-a.spFcp.h...s.tthhphh....pss.llhDsR..slh.ct ....................hlhGLsFK..ss..oDD..h.R-.Ssuhslhcp..Ltp.t..Gs....................c..Vh.laDP..hspppthth................................................................................................slph.h.p.sh...tpsh.p..s.ADslll.ss.cappacs..h......................t.t..h..................tt.....hlhDs+shh................................................ 0 659 1277 1636 +4785 PF03721 UDPG_MGDP_dh_N UDP-glucose/GDP-mannose dehydrogenase family, NAD binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1105 (release 3.0) Domain The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.95 0.71 -4.91 27 7072 2012-10-10 17:06:42 2003-04-07 12:59:11 9 27 3696 103 2033 10113 8347 178.30 31 42.76 CHANGED h+IullGhGYVGLssusshuphG..hcVlslDIspp+lcplspGp.PIhEPGLpcllcpsh...pLphoschppslcpuDlhhIsVsTP.cp.....sptsDLpalpssscpluphlcp...tpllVh+STVPsGoscthhh.llpchstht.........hcatls.sPEFLpEGsAlpDhhpscRlllGspspsst......thhpcl .................................................+Isl.l.G.h.GYVG.L.ssu..s..h..hApt.s........hc...Vl..u....l...........D............l.......s....t...........p.......+....l.....c....t.........l.............s........p........G......p............s......I......h...........E.....s.........s.....l......p.....c..........l.........l.....p.....p.....s.......h..............................................h..................p......l....p...h....o....s......c.....h..p.....p......s...h......p......p.....u..D.hhh.....I..A..V...s.T..Phct..................stp.s.D......h.....s..h.....V.....p.u.....s.s.c.s..lu.p...h.hpp.................hsll.l.h.c.S.T..VP.V.Gss...c..h...h.t......lh.c.p.h.ss..............................php.....ls..a..sPEhlp..pGp..u.l...hD...hh.p.s.sRllsGhssppst.........hh.......................................................................................................................... 0 686 1345 1734 +4786 PF01704 UDPGP UTP--glucose-1-phosphate uridylyltransferase Bashton M, Bateman A anon Pfam-B_1634 (release 4.1) Family This family consists of UTP--glucose-1-phosphate uridylyltransferases, EC:2.7.7.9. Also known as UDP-glucose pyrophosphorylase (UDPGP) and Glucose-1-phosphate uridylyltransferase. UTP--glucose-1-phosphate uridylyltransferase catalyses the interconversion of MgUTP + glucose-1-phosphate and UDP-glucose + MgPPi [1]. UDP-glucose is an important intermediate in mammalian carbohydrate interconversion involved in various metabolic roles depending on tissue type [1]. In Dictyostelium (slime mold) mutants in this enzyme abort the development cycle [2]. Also within the family is UDP-N-acetylglucosamine Swiss:Q16222 or AGX1 [3] and two hypothetical proteins from Borrelia burgdorferi the lyme disease spirochaete Swiss:O51893 and Swiss:O51036. 19.70 19.70 19.70 19.80 19.20 19.60 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.43 0.70 -5.98 8 1651 2012-10-03 05:28:31 2003-04-07 12:59:11 13 13 879 45 747 1343 69 349.50 26 75.79 CHANGED EhsuFhpLapRYlsc..spspplcWDcIcpPs.--ll.....cY-pLpt.s....pphuplLsKLAVLKLNGGLGToMGCpuPKSlIEV............RsshTFLDLtVpQIEpLN+pY.s.ssVPLlLMNSasTcc-TpKllcK..Ysss+lc.IpTFpQSpaPRlsKDoLLPlPpts..sS.s...-tWYPPGHGDlFcSLhsSGhlDsLLAQGKEYlFVSNlDN.LGAoVDLpILNHlIp....ppsEYsMEVT-KTpADlKGGsLhoY-G+l+LLEluQVPpc+l-EFKShpK.....FKlFNTNNl...WlsL+AlKRLl-sspLcL-IIsN.Kpl...s...........cslcllQLETAhGuAIppFcsuhGl.pVPR.sRFLPVKoo.SDLhLlpSsLYsLc.sGolphsstR.t.ssPll+LGsEFpcVusahpRlsuIP.sllELDHLTVSGDVaFGpNloLK .....................................................................................................................................................................................l.................t.tt.h..s.....+huVlhlsGG...............GTph..G..h.ps.P..K..u.h..h..pl..............................t..p..t.hoh....h.p.....l.......s.p.p..l........t......p.......l......p........c........p..........h.......s.....s.......s.......l.P...h.hl.M..sS....Tcc...........s..Thp...hhc..c...........Yh....s...........h...p.hc...........lh.hFp.......Q.s...p...hPt.l.s.t.-u..hl.ltp.............ps.......s..........................t...............PsGpGs....lapuL........h........s..............u.......G.......h...........L....-c.......h.hpp..G..h.c...a.l.alt........sl.DN....Ls.ts....sD.h........hluah.....hp..............p..s....s-h.s.hc...ls.t+s...pss...tpsG..h..l.s...........p.....h...............-..........s.......c..........hp.............l...lEh...u.p.l..st..c........ht....th.p.s...pt.................hhh.F..N.s...s...Nl........ahsh...thlp..p.l........h..........p....t.......t..h..p...h..h..hh..s.K...p....l......c........................ssh.t.lh.....p.....h..t....h.......hh...hsl....h.a..........t.........p...s.h..........s...........l..pV.sR....pcF.PlKss................................................................................................................................................................................................................................................................................................................................................. 0 291 446 619 +4787 PF00201 UDPGT UDP-glucoronosyl and UDP-glucosyl transferase Finn RD anon Prosite Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.60 0.70 -5.95 14 8949 2012-10-03 16:42:30 2003-04-07 12:59:11 13 81 1315 25 4768 9465 106 279.00 18 62.32 CHANGED GKVLVaPh-hSHWhsh+sllccLlpRGHElsVLtsuuohhlc.tcsuslphcsassuhopc-lcs.hhphhpphhhthsp.sshhphhsthp....chushhtssCpplltNKpLhppLpESpFDVlhsDslhPCGtllAcLLpIPhVa.LRhsPshhhp+tstth.hPsSYVPhllosLSDpMTFh-RV+NMlhhLhhcahhphh.pp.asphhSElLsRPsTls-hhu+AshWLlRshash-aP+PlhPNhsFlGGlpC+PAKPLspEhEtalpuSGE+GlVVFSLGSMVSshsEE+AphIAsALupIPQpVLWRacG.....p+PssLusNT.....hLhKWlPQNDLLGHPpT+AFlTHuGusGlYEuIppGlPMVshPLFGDQhDNht+MpsKGAuVoLNhtpMoSpDLhNALKsVIND.sYKENhMpLSplH+DpPhcPLDhAVFWlEaVMRHKGA+HLRsAAHDLTWaQYHSLDVIGFLLusVsslsFlshKsChasaRKhltttp+s ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p........................................p.u..l..l.h....luh................G........o..h...h....t.............s......p......p...........h........p...t..l.....h.......t.....u....l.......t.....p...............s.....t...........t.....h.....l.......W.....h......h...p..s.............................................t..........h.......t......p......s..h..........................hl..h...p....W.....h.....P......Q..........p..........l..L................s..................H.......s.................t................s.t.s..Fl.o.H.s.G....h.......s.S....s.h.E.ul..h..t.G.V.P.h.l..s...................h................P....h.....h.....u......D.....Q..........h.N....s.t.........h............h...........t...........p...........h...................t........h..........u........h........................h...............................t........................h..........p.....t.....t.....t......l......t...t..l..t...t.l..h......t...t........................................................................................................................................................................................................................................................................................................................................................................tth................................................................................. 0 1059 2520 3921 +4788 PF03152 UFD1 Ubiquitin fusion degradation protein UFD1 Mifsud W anon Pfam-B_3272 (release 6.5) Family Post-translational ubiquitin-protein conjugates are recognised for degradation by the ubiquitin fusion degradation (UFD) pathway. Several proteins involved in this pathway have been identified [1]. This family includes UFD1, a 40kD protein that is essential for vegetative cell viability [1]. The human UFD1 gene is expressed at high levels during embryogenesis, especially in the eyes and in the inner ear primordia and is thought to be important in the determination of ectoderm-derived structures, including neural crest cells. In addition, this gene is deleted in the CATCH-22 (cardiac defects, abnormal facies, thymic hypoplasia, cleft palate and hypocalcaemia with deletions on chromosome 22) syndrome. This clinical syndrome is associated with a variety of developmental defects, all characterised by microdeletions on 22q11.2. Two such developmental defects are the DiGeorge syndrome OMIM:188400, and the velo-cardio- facial syndrome OMIM:145410. Several of the abnormalities associated with these conditions are thought to be due to defective neural crest cell differentiation [2]. 35.60 35.60 36.10 35.70 35.00 35.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.95 0.71 -5.13 37 579 2012-10-01 20:15:13 2003-04-07 12:59:11 9 16 320 2 392 551 43 168.30 40 42.35 CHANGED papppa+sYPluhh....c+p..plppGsKIlhPPSALscLsp....lpl..p.aPMlFcLp...Ns........psp+hTHsGVLEFlA-EGpsalPhW........................MMpsLtLpcGsh.....lplp.sssLPpGsalKlpPposcFLD..IosPKAV.LEssL.RNausLThGDhItIsYssppYtlcllEl.....KPss....AlolIETD.....lpVDFssP ....................................p.apt.apsashshh..............p+....phphGsK............l.hhPPSALcpLsp.................................Lpl....p..aPM..lFcLp.......Nt.........................ps.s..+hoHsGVLEFsA.-.E.Gp..saLPhW..............................................MMps....L.hLp....pGsh.............................lplc...ss.sLPhuoalKLQP..p.oss....FLD......I...os...PKAV..LEssL.R.N.F.us.LTpGDlls..l........s...Y......N...s........c.h.Y.c...l..pVhEs................KPss......ul...sll.ETD.....h...pVDFssP.............................. 0 134 226 333 +4789 PF02512 UK UK_protein; Virulence determinant Mian N, Bateman A anon Pfam-B_2106 (release 5.4) Family The UK protein is an African swine fever virus (ASFV) protein that is highly conserved amongst strains, and is an important viral virulence determinant for domestic pigs [1]. 25.00 25.00 176.40 36.90 20.00 17.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.18 0.72 -10.40 0.72 -3.60 7 23 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 9 0 0 19 0 88.40 80 101.25 CHANGED MSTHssSPKEKPVDhNNlSEKsuVVNNAP............................................................EKPAGANHIPEKSA.cMTSSEWIAEYWKGIKRGNDVPCCCPRKMTSADKKFSVFGKG.LMRShQKss ...........MSTHssSPKEKPVDMNsISEKSuVVNNAPEKPAGANHIPEKSA.EMTSSEWIAEYWKGIpRGNDVPCCCPRKMTSADKKFSVFGKG.LMRShQKss...... 0 0 0 0 +4790 PF03044 Herpes_UL16 UL16_UL94; Herpesvirus UL16/UL94 family Bateman A anon Pfam-B_4392 (release 6.4) Family This family groups together HSV-1 UL16 Swiss:P10200, HSV-6 ORF11R Swiss:P24442, EHV-1 46 Swiss:P28970, HCMV UL94 Swiss:P16800, EBV BGLF2 Swiss:P03221 and VZV 44 Swiss:P09293. UL16 protein may play a role in capsid maturation including DNA packaging/cleavage [1]. In immunofluorescence studies [2], UL16 was localised to the nucleus of infected cells in areas containing high concentrations of HSV capsid proteins. These nuclear compartments have been described previously as viral assemblons [3] and are distinct from compartments containing replicating DNA. Localisation within assemblons argues for a role of UL16 encoded protein in capsid assembly or maturation [2]. 25.00 25.00 42.80 42.80 18.60 18.30 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.20 0.70 -5.45 31 277 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 92 0 0 130 0 223.10 36 94.79 CHANGED Ms.........tsthphl+pFLpcEClWhh.lssssph+lYpussshSshh...........tss.sssspslplplhlh+P+t.....pphhlslhlNGthh.....sssphchhhsptl.t.sphhllhFuslsssshsh..lPs.ss.pss..ssttls.stlhpsup..hl.sp-shsssshu............h.lG.sGAWh...p.ustslYhahls.DLhshCPsh.phPSLu+llsthssCcstt....Cs.Cpspt.tHVsshsshssssss..ussC.ChsPCthhcus....lslpuppsLhsllF.-sphthplsthpttpsslssslscllsGhsssGcplssssssWpLlplsshhSRhhlhuC.sLK+ ...........thss.......phscRLLNDVsVWsp.VRsDshLpIhoAphsLppchp......ttspsss.ssssusLcIaLYLTKPKp.p.tp+ssHITslVNGs+A......hshLp+hssc+oPhG.uchashplu+sphsPsPhE.lPDPpsEP...........................................................................................................................................................................................................................................h....... 0 0 0 0 +4791 PF03252 Herpes_UL21 UL21; Herpesvirus UL21 Bateman A anon Pfam-B_3264 (release 6.5) Family The UL21 protein appears to be a dispensable component in herpesviruses [1]. 25.00 25.00 56.20 56.10 18.10 17.20 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.73 0.70 -6.03 14 85 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 38 0 0 74 0 424.90 37 97.05 CHANGED MEhpYpsslta..psVsFYlsssGsRAYFlCGGClhSVsR.....ttpsuElAKFGLsLRG.G.sD+slAsYVRoELtR.pG..hphuhPsscc-.....VFlDslulL..............s.ss...usEtDLCGth-lEVhDPtLA-ahVSLpsosGLllssuccpsp-+ll+LacsPslsNssSsFlYsPNpssFsLsQApLscLPsuLpsLVcGLFDsIPs..............sRpPlstps...ppT-VIlTupRAApsh............................hstttptstpst++sslSsFVQV+aI.......PRVhshWsspusss.................sstoLpcLhplhhtsDtllhcs........pthsGlspchspA..+sslsptstslFGptut.hsFlGt.....tshslossQ+FsLhQYllp+tchssCYshLccLscsYhss..pcssss..Dpt................sluDssNslhR-sshlGtsuEtllthshhpsth......................ssssupssps-ussLLclApsphth..s.........ssshppp+htcluthLstLYsGtshhsuAhphuchhGsuthlsshh-sphhoAF-cussu....p+hstYLtuLls .............M-htYtpshha..psVhFYlststpRAYFhpGGClhSlsR......tpssElAKFGLslRG.u.ss+slAsYVRoEL..tppG.....ht.shs.spp-.........VFlDslslL..............t.ss...ss-hDlhst.-lEVhD.hLsch.hsSL.sssslhlssstshsp-phlcLhthPslss.ssStFhYsss..sFsLspApLscLPtSLp.LspGLFDslPs..............sR.sLsscs...p+TslllTupRAAcsl............................hsptpsptt+tt++sslSsFVQl+hI.......P.RVhshWsspttss.....................s.slptL.hlhhhuDcllhcs........pt.sG.LpcE...scA..ppslhptshslaGptGt.hsFhGt......uhsLoshQ+FslhQYIlpR.chhsCYsslccLscpYspt..psts..ss.ssps................hlsDssNtlhRcshhhG.hsc.lh.h................................sut.s..-uthlhchhtt..................shs.t..h.h.luhhLshlYtstshhssAh..sphhsss..lhhh.ph..hoAF-...hh....t+hhthl..Lh.h........................... 0 0 0 0 +4792 PF01499 Herpes_UL25 UL25; Herpesvirus UL25 family Bateman A anon Pfam-B_700 (release 4.0) Family The herpesvirus UL25 gene product is a virion component involved in virus penetration [2] and capsid assembly. The product of the UL25 gene is required for packaging but not cleavage of replicated viral DNA [2]. This family includes a number of herpesvirus proteins: EHV-1 36, EBV BVRF1 Swiss:P03233, HCMV UL77 Swiss:P16726, ILTV ORF2 Swiss:P23987, and VZV gene 34 Swiss:P09287. 25.00 25.00 50.40 50.30 22.70 22.30 hmmbuild -o /dev/null HMM SEED 540 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.52 0.70 -6.20 15 156 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 90 1 0 140 0 513.80 29 94.47 CHANGED hthhhts..hh.scs+Nhltsshsshphsp.phutphRspt....phchsph++c..hlpAELDsLttpptstssplsscLcslEp......tltchsps.sslp...............................ssspusppssssps...hs.hstspsssh...............pVsIs.sDPslpacsshps-hlsslYsspupWsso....FGsWYtsLpcshhpcRhhP+sh+ussstssohStcLMssslssLpussphahuDppahuDpsAALCLlsAYh.....utpsus.hP....sohs-LLppLPphlchLss-lpstps..usssapFshsc.sppphhuPhs+tt+YspssFssHtlhshLh+tGVlsthsGt...........scssGsshsD.-lsh...slsshlhusslPhhsccQhhLRuGlsuIpuLlLlapLLpsusVasc+ss+phpLuuLls.sthssss.......ss.utpssshpt............spNFpFLhccYVlPhYptsspsslopLFPGLsuLslstpsttussstpt......hlslous+aQ.....slhcllstcLpp+cs.......stllsAHDuLtlphEcGLulLLppspPppu.hpshtpuQFsV ........s........hhh.scscN.hltss..hhhhhp.thshshcstt...pphthtps++p..hhtstL-sLtt....pttshst-lcp+lcslEp..........................plpph.sssh.....sshp................................................ps.stssptsssuts....tt.ssstssss.tsts................................pltIspNDPsl......pac.oshps-llshlYsspsshsso.....FGsWYtpLpcthhsc...PpshRhsphcssphSpphhssslsuLpssshhhsssp.h.u..pAsLCLhhhYt.............shtss..p.phP.....sohtpLlppLPphl....ctls....ppls....s.....p.......stsshtashpc.Pctpahs.Phsp...YptGshspHsllthLh+puVlsthPG...t..................t..st.phsssstsD.clsh....thsshhht.tps.hhhc-QphLRuslsslsuLlLlh+LLtsssVa.us+hsphhpLuslls.sssssts......sss.stptsh.ht..u...............................tspNFpFLhppYlsPhYttsPs.lplopLFPGLshLslstp..st.....p..shsssp+...............slshuusthQ..........tslhchhhhph.pp+pt.......hcllpsHDulhhpaEpGLGhLhp.shstpt.hpsht.stFNV...... 0 0 0 0 +4793 PF04496 Herpes_UL35 UL35; Herpesvirus UL35 family Finn RD anon Pfam-B_3981 (release 7.5) Family UL35 represents a true late gene which encodes a 12-kDa capsid protein [1]. 25.00 25.00 27.80 28.20 18.10 17.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -9.97 0.72 -3.80 11 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 30 0 0 30 0 99.00 38 74.64 CHANGED ssFDPssPsThos-slpshs.VsLlphLNss.tslpsusptsp.lssA+psllhGtusuhsclR+pHsspTlpRssMFApsDsuoWlRPolGLKRTFsPtlhp ......sFDPssPsThos-slcshh.V-llhtLNss.t.lpsspptsthhpsA+pslhhGtusuhsclRppHsspTlpRpsMFAssDsuoWlRPolGLKRTFsPtlh... 0 0 0 0 +4794 PF03970 Herpes_UL37_1 UL37; Herpesvirus UL37 tegument protein Finn RD anon DOMO_DM03155x; Family UL37 interacts with UL36, which is thought to be an important early step in tegumentation during virion morphogenesis in the cytoplasm [1]. 25.00 25.00 30.40 30.30 18.60 17.80 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.76 0.70 -5.59 9 82 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 33 0 0 78 0 264.90 37 25.28 CHANGED stspshshpsLLssLtuL........upsssstcshssthsucsRsuIupFhhSosplslppscshW+cLhp...slhclYppot.PEAAhLAcNhsGLlhWRlslpWscs..phh-phcpL+plshthTupEslphLopNsLRhSAshGPsshp.hlo-WhshFcssspsshshoscshhpuRt.h........ltphsAuLsppRFsLIYDhPFVQEGlRlluttssWlsPFslhhpphpssshT..PLTRsLFhluLlDpY.hssssssp.....Lp-hFs-slptls .....................................................................................................s.s.phshppLLssLssL........sppsssscs.u.htsuchRuuhutFLLSssslsstcsctpWc..sLhp...tlCtlapspthPEsAhLAENLPGLllaRLslshscs..pshcphchlpchlhshsus-s.ptLssssLRsuAshG.PVph+thls-WlspapslscsshshsPcs.hcAhttts........lspssAsLsp.tauLlashPFVQEGlRhLuhsushls.FsshhpplssuoLT..PLTRALFTLuLVDEY.hsssptss..s..LhttFtcsVptIc................. 0 0 0 0 +4795 PF02282 Herpes_UL42 UL42; DNA polymerase processivity factor (UL42) Bateman A, Mian N anon Pfam-B_5119 (release 5.2) Domain The DNA polymerase processivity factor (UL42) of herpes simplex virus forms a heterodimer with UL30 to create the viral DNA polymerase complex. UL42 functions to increase the processivity of polymerisation and makes little contribution to the catalytic activity of the polymerase. 20.80 20.80 20.80 25.70 20.40 19.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.76 0.71 -4.63 12 135 2012-10-02 11:47:48 2003-04-07 12:59:11 11 2 32 8 0 135 0 137.10 27 63.67 CHANGED sphsLsctpLsclhushts.ssshtssFhlhsccshhlpsshtupplhhslptpthsph.......ss.tlhLu.s-uppsLl..hthspt.psht............phshtlcspsshRpLlQplhhssst..........ht..hss.sss.hslhppp.sshshhh .....................splsLpcspLscllsuhus.tss....hhsohhlhsstshhlasohhuppVhhslcpsthSph.......ss..h..u.scuppsLl.shthspt.psst............phslslsspsshRsllQ+lhsssut..............hss....pssuhslhtpt.suhsshh...................... 1 0 0 0 +4796 PF03117 Herpes_UL49_1 UL49; UL49 family Mifsud W anon Pfam-B_2110 (release 6.5) Family Members of this family, found in several herpesviruses, include EBV BFRF2 Swiss:P14347 and other UL49 proteins (e.g. HCMVA UL49 Swiss:P16786, HSV6 U33 Swiss:P52441). There are eight conserved cysteine residues in this alignment, all lying towards the C-terminus. Their function is unknown. 25.00 25.00 53.80 52.70 18.40 17.70 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.04 0.70 -5.28 16 80 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 52 0 0 75 0 247.60 42 50.98 CHANGED hsspspuLlsAL+ppussVPCGNPh.sMs+sLshpsshpss+sllPlspp........................................................ssplspplhsplLuhslLusllslPlhshsht+shRtttsts.......hhsVlCt-CGHCLNhGKsKhps...hsFsPosh.FYsRDpKEKphhhCssoGRlYCShCGSpplpshplhEhs...h.hGh..shl..RAVlusNAAhslpssspphDlllPChuostsCtu.slL+clolpcLLhLT..upssphhCt+Cps .......s..phpuLluuLRccuupVPCGNPlasMs+thlppaCtsssRaLlPlpshsht.tsp.......sts.........................................h..p.s+lshaulussLRsGLluSVI-LPlhChs+hKCpRahcsts........lhAVVCppCGHCLNhGK-KLcsp..psFsLNSh.FYYRD+QEKuVlassps-hlHCSLCGSpplspp+lYElsptshhGt...hpVp..W+AVlG.NAACulhstphthDlllPC..usRoC.usVllRtloV.+LLpLT..SHupshhCt+CQ.... 0 0 0 0 +4797 PF03121 Herpes_UL52 UL52_UL70; Herpesviridae UL52/UL70 DNA primase Mifsud W anon Pfam-B_203 (release 6.5) Family Herpes simplex virus type 1 DNA replication in host cells is known to be mediated by seven viral-encoded proteins, three of which form a heterotrimeric DNA helicase-primase complex. This complex consists of UL5, UL8, and UL52 subunits. Heterodimers consisting of UL5 and UL52 have been shown to retain both helicase and primase activities. Nevertheless, UL8 is still essential for replication: though it lacks any DNA binding or catalytic activities, it is involved in the transport of UL5-UL52 and it also interacts with other replication proteins. The molecular mechanisms of the UL5-UL52 catalytic activities are not known. While UL5 is associated with DNA helicase activity and UL52 with DNA primase activity, the helicase activity requires the interaction of UL5 and UL52 [see 2,3]. It is not known if the primase activity can be maintained by UL52 alone. The region encompassed by residues 610-636 of HSV1 UL52 Swiss:P10236 is thought to contain a divalent metal cation binding motif. Indeed, this region contains several aspartate and glutamate residues that might be involved in divalent cation binding. The biological significance of UL52-UL8 interaction is not known. Yeast two-hybrid analysis together with immunoprecipitation experiments have shown that the HSV1 UL52 region between residues 366-914 is essential for this interaction, while the first 349 N-terminal residues are dispensable [2]. This family also includes protein UL70 from cytomegalovirus (CMV, a subgroup of the Herpesviridae) strains (e.g. Swiss:P17149), which, by analogy with UL52, is thought to have DNA primase activity. Indeed, CMV strains also possess a DNA helicase-primase complex, the other subunits being protein UL105 (with known similarity to HSV1 UL5) and protein UL102. 20.80 20.80 21.00 20.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.84 0.72 -4.24 38 329 2012-10-02 15:26:12 2003-04-07 12:59:11 10 8 214 0 89 310 49 69.60 23 8.88 CHANGED shtpsshhllplp.........+spsFtClphpHppp.pps...........splalslpssp.ttlhhshhppCF........usK.........CssNphpsthoshss ..............h..pthhlhplt.........p.tshhChpht+t+pups..............lalslchpp.....hshhQ+Ca.......ssc.........Cpspthps.h.....s.................... 0 37 49 70 +4798 PF03049 Herpes_UL79 UL79; UL79 family Bateman A anon Pfam-B_2433 (release 6.4) Family Members of this family are functionally uncharacterised proteins from herpesviruses. This family groups together HSV-6 U52 Swiss:P52469, HVS-1 18 Swiss:Q01003 and HCMV UL79 Swiss:P16752. 20.50 20.50 20.70 21.70 19.90 16.20 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.43 0.70 -5.31 19 61 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 50 0 0 55 0 246.10 36 92.22 CHANGED hlG+alh.pspshoshlhplhhKllpGpsLsohp.-EL+hl+LlhsphashGLplhLLREslsNsGspDsslLsRKVPsEaWphlY-shcphssssc...hLhsEsptupLhh+Ls.pssslhphlspalhpchGL..tlplssp.lpDGNhLFsLGoVhspRLlhlhtFhhhaWGppphEPhVRhhspKlahhYLIlsG+Lplptshap.psssphsGlhphlhpDhhuapG....slspss.............h.p.tp.hDh..lhlhsssl ...hG+al..phsshsphlhpIhpKhhpGpsLsoh+.EEL+ll+LlhshhaphGLpshLLREshsNsGlsDssVLuRKlPspaWhhlYctLcphssstc...hlhsEspuApL.h+Ls.ps.th.hthlscalhcchGL..slslsp-hhpDGNlLFsLGolhsaRLhhlstFhhtaWGpppaEPhlRhhspKhahhYLIhsG+lpls.ssap.ppopc.sGlhshIhcDh+sFtG....slstps.phhp.pp......................... 0 0 0 0 +4799 PF03043 Herpes_UL87 UL87; Herpesvirus UL87 family Bateman A anon Pfam-B_1736 (release 6.4) Family Members of this family are functionally uncharacterised. This family groups together EBV BcRF1 Swiss:P25215, HSV-6 U58 Swiss:P24437, HVS-1 24 Swiss:Q01007 and HCMV UL87 Swiss:P16730. The proteins range from 575 to 950 amino acids in length. 25.00 25.00 25.40 25.40 24.10 24.70 hmmbuild -o /dev/null HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.86 0.70 -6.12 19 90 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 54 0 0 80 0 494.10 43 72.04 CHANGED tLht.hpssp..........hshhP.hssspsththtcF.lLphhptW.s....Nsshtchtpplhpslpppsthlhhhh+hssppslplpshslhcapptl.hhh.p.lphpppK+tst.tshthslsp........ppthltasss..sslaRshhsh....................................................................................................usucplspspsshp.ssptss.......................sRthVshhcRlpaAP+ct............................................shshththp....................phpspsttp.shtppFsslsplSls.sF+VNlFNTNhVINTKlsspptss.....pslhslP+.LTpNFVh+KaohKEPuFTVSlFaSsDhs.pssAINlNIsGshlpFLaAhush+CalPIcslF.PAuluNWNSTLDLHGLENQslVRssR+cVFWTTNFPSslSspcGhNVSWFKAATAsISKlpGpsLpsplh+Els.IlshppAplshsKNplFThLEpRNphQIQsLHKRFLEsLatpsuhLRLs........s+slh+lsppGlFDFSK+hlAHoKsKH-CAL..lGh+hsNslPKllspsKKhRLDcLGRNANaLohh+psspp...hsth+tpll++ll+pLu.....l+p+ ...............................................................................................................h.....ht.p..........hshhP...s.hs..t.hcF.llph.p.Whs....ptshtchhtplhtphpp..thlhhhh+.stppshtlpthsh.capptl.hh....p.lphtpp+p..............t.hcshlhh........pph.lhassp..sslaRslhhh....................................................................................................uhst.hh.sps..t...........................................spthlthhpRl.as.ht.............................................shs....hth........t............pt.s.sttp..hs.pFs.hptlols.phtVNsFNTNhVINhKhshpphst.....hph.plP+.hTpsFVhhKaohKEPuhTVSsFhSsshs.hhsulNlNIpGsh.cFLauhush+halsIcphF.PAslsN.NSoLDlHGLEsQsllRstRpcVaWTTNFPshlSppsslNVGWFKAATAIlP+VSGssLEslLLKELshIpshcplslDas.LHRlFThLEpRNsYQIPFLsKQhl..LFlRsshL+LpGhtpc..lc+hlhcAspcGlFDaSKphhuHTK.KHpCAL..lG.RhsNslPKllspsKKh+LDcLGRNANhLohh+phttt...hsth+hpllhcllttLt............. 0 0 0 0 +4800 PF03048 Herpes_UL92 2111; UL92; UL92 family Mifsud W anon Pfam-B_2111 (release 6.4) Family Members of this family, found in several herpesviruses, include EBV BDLF4 Swiss:P03223, HCMV UL92 Swiss:P16798, HHV8 31 Swiss:P88920, HSV6 U63 Swiss:P24440. Their function is unknown. The N terminus of this protein contains 6 conserved cysteines and histidines that might form a zinc binding domain (A Bateman pers. obs.). 20.50 20.50 20.50 20.80 20.00 20.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.51 0.71 -4.73 18 82 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 66 0 10 71 1 162.60 28 70.67 CHANGED pCphpplpsslTphpslsslYhCspCtcYHVCDGu.p-CsllsTtEGhVCthTGpshssslpsssthsspshpcsp...tphp.p.hhNllpslhpclhpYhppsss.hsclppplh.scGpLpccltslIphTFscChplhsshpps..hsllsSlYIHlIISlYSs+TlYsshlFKsT+NK+aDsllKpMRppWMssL ..........Cphpplpsshs.p.hpltslYhChpCtphHlC.Dts.ppCslls.T.tEu.hVCshTGhshtshhsssp.h.h.pshppst....p.......shltslhp.lhpah.p.s..hstlhpplh.ptsthp.pltp.l.hTFtpshp.hpth..pth..hsllsphalplIIulauptThYsshlhKso+pK+.DslhKphR.tahss.s................... 1 2 8 9 +4801 PF03038 Herpes_UL95 UL95; UL95 family Mifsud W anon Pfam-B_2060 (release 6.4) Family Members of this family, found in several herpesviruses, include EBV BGLF3 Swiss:P03220 and other UL95 proteins (e.g. HCMV UL95 Swiss:P16801, HVS-1 34 Swiss:Q01023, HSV6 U67 Swiss:P24444). Their function is unknown. 21.60 21.60 22.30 23.20 18.70 21.50 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.11 0.70 -5.57 20 82 2009-09-11 15:08:21 2003-04-07 12:59:11 9 2 52 0 2 72 0 333.20 36 87.25 CHANGED MhsLsphp..scscsphsc+YpcuVcLALshCEus.P.sQFKLIETPhsSFLLVTNVlPc-ssshsstss..............................pthchpslphs+hpthptlhshphpsstpptsssss.................hhp.psphhtssYllYcppphphALshNKssllppsLchlssPspWsappssDPLslLWLLFhGP+SaCpcssChhtc+hGp..PGPlLLPPhhYcPspDlpoFhshsppYVhshYpch..................................................us.h.shshsPFchsRl+cslppl...schsspslhl....S+pCLLCsLY+QNph....uppss.ssshu.hIILsstutphhso.....htsp+pssousslLaPsYslssLlsslstsssu ..................................................................................Mhtlst.p....schsst.shRaccuVphALssCEus.P.-pF+LIETP.psFLLVTNllPc-ps.h.shss.................................pt.phpphphsc.sths..h..hcppsshpthsssst..............ss.t.hs.psphhhssYllYpKpphchuLo.NKsphlptsLc.lhsPshhsapsspDs.slLWLLasGP+SaCtcssChupc+tGp..shPsLLP.hhYcPspDh.oahshsphYVashYcshch..................................................u..p.h.ph.hhPhshsRl+cslptl...tchsscplsh.....SRsCLLCsLYpQNcl....Appcs.sssauPllI.sstushp.lo......hoahhPupssssLaPsYclupLlssls.sptu......... 0 2 2 2 +4802 PF04817 Umbravirus_LDM Umbravirus long distance movement (LDM) family Finn RD anon Pfam-B_5103 (release 7.6) Family The long distance movement protein of Umbraviruses mediates the movement of viral RNA through the phloem of infected plants [1]. 25.00 25.00 33.60 33.10 21.00 20.50 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.44 0.70 -5.22 4 24 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 8 0 0 24 0 226.80 42 93.30 CHANGED MoSlINV.ssuKSppscGsstSSVRtGcpptsPtsKPtusHssSR+sKGssHPAsTsKcsppslpuspAsssHpcHtGssl.tEusGuVHssRstRRuRRuGuMcsRQ.TsQPppRtscsch.sERRAplDGLLPPLLDTlsGph.GsAtlLhaCltAl+RpLRp+h.cPlQsspcVAuopGcsssQLs-puspsutsLssDGtGRAstus..psl.pGusV.pVCssCsts ...............MoolINV.ssscu+psRGsspsSlRtGcpctAttsKPtsspsPsRRppGGsaPusss+cspcshptspssssHppHtGoslhREusGGVHssRstRptRRuushuPRQpsssP+QRhApsclssERRAplDulLsPLLDTlctpspGsAslLLaClsAlRRELRc+h.cPVQPsHsVuuoptppusQL.-puspssssLpssGcGpustss.ppslpptssV.pVCssCsh.s.............................. 0 0 0 0 +4803 PF00021 UPAR_LY6 u-PAR/Ly-6 domain Sonnhammer ELL, Bateman A anon Prosite Domain This extracellular disulphide bond rich domain is related to Pfam:PF00087. 17.70 10.00 17.70 10.20 17.60 9.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -10.76 0.72 -3.56 40 1221 2012-10-03 01:43:02 2003-04-07 12:59:11 16 8 118 68 543 1327 0 74.40 20 55.41 CHANGED CasCht..tspsCpss.....sCstspsh....Chsspsth..ss...psphhh+sChp.stC.........ph.shthphstsslph...........sC.CppsLCN ......................................ChsC...ht....s...sts......Cpst.......hpCss.s..psh....................Chs.....s..p....s.....p...h.t..........s..t........t.s.ph.hh+..uC..s.......stC..............p........t.h...s.......h...p...h...ph..................pC..Cp.tshCN............................................. 6 39 65 185 +4804 PF00919 UPF0004 Uncharacterized protein family UPF0004 Bateman A, Moxon SJ anon Pfam-B_1257 (release 3.0) Family This family is the N terminal half of the Prosite family. The C-terminal half has been shown to be related to MiaB proteins [1,2]. This domain is a nearly always found in conjunction with Pfam:PF04055 and Pfam:PF01938 although its function is uncertain. 21.30 21.30 21.70 22.20 20.90 21.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.67 0.72 -4.16 160 8275 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 3943 0 2232 6265 3365 97.10 32 21.38 CHANGED +lhlhThGCphNhhDSEhhtuhLppt.Gaphssp...c.cADllllNTCulppsAcpcshppltchpchc............st.............................thls...........VsGChuQpts..cclhcths..hDhllG .....................lhlhThGCphNhh...DSEthhshL..p.s.t........G......Y....p......h........s.........s...s..........c.....c............A.....D..........ll..llNTCulp-pAppcshptlsc..htphp................t....................................................................................................................................hhl.s................VsGCh.up.pps...c..p.l.h.c.p.h.s..VDhlhG...................................................................................................... 0 840 1539 1927 +4805 PF03649 UPF0014 Uncharacterised protein family (UPF0014) Bateman A anon SWISS-PROT Family \N 20.50 20.50 20.70 22.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.77 0.70 -5.22 3 1867 2009-09-14 12:52:49 2003-04-07 12:59:11 8 5 1800 0 366 1096 131 235.30 37 93.21 CHANGED hpHsllpL.ht-LuaALhLVllAILIua+EKLuLEKcILWuuGRAIIQLIIVGYVLtYIFSlDsssusLLMlulMlTlAAalAp+RINh+uKohlu.aLaITlGAoThISL...AVLIIssslcFpPhYVIPLsGMIlGNTMNoluLAh-+LustVpSEpcpIps+LuLGATPtQAlAsaIRsAIRAALIPTVNpsKoVGLVSLPGMMoGhlLAGuDPlpAucYQIlIMFMILSTAoLSTIllCYLsYRcaaNu .......................................s............sLsl.uh..h.L.....l.l.l.s...l..h...lS.h...t...p...+L......ultK-llhushRAll....QLlll.Gal.Lp.Yl.F.p..l..s..s.h.hl.s..l.Lh.l..lhh.hh.sA.u..a..s..s.....tpR...u.....p.....h.....h.....h....+...s...h...h...............h...l...u...l.s..l..s..s...u.............l..s...L......sl...l....l....l..s..s.sh.....a.tP.t..tlIPIuGMlhGNuMsA....luLshppLtpphpp.c....ppplpptLuLGATs+pAutshl.Rcul+sullPTlDSs+Tl.GLVSLPG..MMoGlIluGssPlpAI+YQIhVh...Fhlh.uss...ul.o..sll.A.s.hLsY+paas........................ 0 125 227 309 +4806 PF01169 UPF0016 Uncharacterized protein family UPF0016 Finn RD, Bateman A anon Prosite Family This family contains integral membrane proteins of unknown function. Most members of the family contain two copies of a region that contains an EXGD motif. Each of these regions contains three predicted transmembrane regions. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.55 0.72 -3.84 145 2676 2012-10-03 02:02:08 2003-04-07 12:59:11 14 8 1140 0 1249 2385 333 76.00 30 60.26 CHANGED a....h..sohshlhlAElGDKTQlsslsLAuth.tpsh.sVhhGsslAhhlssslulhh.Gphl.......sphls.p..hlphluullFlhhG ..................htohshhhlAElGDKTQlsolhLAuca..........s.....h...sVhhGshluhhlsss.....l...ulhh.G.ph.l...........ushl..s.p......hlphluullFlhhu............................... 0 407 768 1060 +4807 PF01170 UPF0020 Putative RNA methylase family UPF0020 Finn RD, Bateman A anon Prosite Domain This domain is probably a methylase. It is associated with the THUMP domain that also occurs with RNA modification domains [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.09 0.71 -4.70 16 3872 2012-10-10 17:06:42 2003-04-07 12:59:11 13 26 3399 14 1064 14743 3401 195.00 31 40.56 CHANGED RsaRsastPusLpssLAtAhlpLushpsspsllDPhCGoGTlhIEAALhutphh...........................................hGsDhct+hlpuA+hNsppsGlschlphhphcsspLp...hss....ps-sllosPPYGh+lupptsl.ppLYpphhcph+chhps...hhshhhspppshppshppt..shcththhplthushpht ...............................................................................................................................RGYR....t.p.Gt...APl+.E.s.LAA...ul.l........h....h...........o........s....W..........p............s..........s...........p.........s.........l...lDPhC...GS...G..T...l..h...I...E......A..A...h.h.u.t..shAP.GhpRthhh.ph..............................................................................................................ttttttt.p........php.....hh..G....s.....D....h...D..s......c...h....l.......c.......h.........A......+............p........N.........A.........c.......p.........A............G.......l...........s.......c...........h..........I.............p..........a........p.....t....h......c...l....p....p...lp.....................spt.................thG...s.....l.l...s....N..P....P...Y...........G.....E.......R....l.................s.................s........c..........t.......t........l...................t.L........Y......p.....t.h....G...p.h..h.cp..hts..........hphh.lh..o..u..p......p..h....p.h.hthp......us+ph+hhNG.lcs.......................................................................................................................... 0 349 629 868 +4808 PF01171 ATP_bind_3 UPF0021;ATP_bind3; PP-loop family Finn RD, Bateman A, Yeats C anon Prosite Family This family of proteins belongs to the PP-loop superfamily [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.14 0.71 -4.79 33 7781 2012-10-02 18:00:56 2003-04-07 12:59:11 15 37 4816 13 2118 7133 3921 174.70 26 46.74 CHANGED phllAlSGGsDShsLLhlLtchttp..h..........plsslHlcHslR.p.pucp-tpalpphCpphs..lslhltphsh......st.....tpslEptARchRYchhpchhtppshphllhAHHtDDQhEThlhpLhRGsuh.....tuLsu..hts.....tp.h........ts.hpllRPLL.shs+p-l.pascppplsahcDpSNts.pYp.RNplRpp ................................................................lhlulS.G.GhDS..h.s.LLp..lL...t...p..h....p..p...p.h.sh...................plts..l...p...ls....a...s...h.....p.....t........t.....u........t.t..........t...p..hl..p.p..h.s.p....p.h.s.............l..s.....h...t.l.hp..hsh............................t..t.....s.p.s.h.p....s.....h.......u......R........p.............h............R..................h...........t...........h...........h....t.....c....h.....s..............p...........c..........h......s........h.....s...............t.............l....shu.H......H..t.....D.......D.th....E.....T.h.lh.....s..l.h..+.G..u.th............t.s.l.t....u...hs...........t.p..................................................................ts...hhl...lR...PL..l.........hh........p.c.p......-..l....h....p....a............sp..........t.....p........tl.........h..h...D...s....s....t....h...R............................................................................................................................. 1 734 1357 1799 +4809 PF01172 SBDS UPF0023; Shwachman-Bodian-Diamond syndrome (SBDS) protein Finn RD, Bateman A, Moxon SJ, Mistry J, Wood V anon Prosite Family This family is highly conserved in species ranging from archaea to vertebrates and plants. The family contains several Shwachman-Bodian-Diamond syndrome (SBDS) proteins from both mouse and humans. Shwachman-Diamond syndrome is an autosomal recessive disorder with clinical features that include pancreatic exocrine insufficiency, haematological dysfunction and skeletal abnormalities. It is characterised by bone marrow failure and leukemia predisposition. Members of this family play a role in RNA metabolism [2] [3]. In yeast these proteins have been shown to be critical for the release and recycling of the nucleolar shuttling factor Tif6 from pre-60S ribosomes, a key step in 60S maturation and translational activation of ribosomes [4]. This data links defective late 60S subunit maturation to an inherited bone marrow failure syndrome associated with leukemia predisposition [4]. 24.40 24.40 25.50 27.00 20.00 24.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.81 0.72 -4.19 103 652 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 473 7 444 614 94 91.60 36 38.64 CHANGED ssssll+h+.p...tGc+..FElllhsspshpa+.........pGp..........phsls-VltspplFpss...s+Gpp.....AucppLppsF.......G.Ts-hpclhcpILc+GElQloscpR+ph .............................ssslVRhK...p...tGc+....FEl..hs..a....s...c...l...hpaR.........sst.................-tcL--VLps.ppVFtss....uKGpt.......................Ascp-LppsF..................................G.Tsc.p-Ihpp..............ILcKGE.lQlopcpRpt...................................... 0 143 255 370 +4810 PF01142 TruD tRNA pseudouridine synthase D (TruD) Finn RD, Bateman A, Moxon SJ anon Prosite Family TruD is responsible for synthesis of pseudouridine from uracil-13 in transfer RNAs [1]. The structure of TruD reveals an overall V-shaped molecule which contains an RNA-binding cleft [2]. 19.20 19.20 19.80 19.20 19.00 19.00 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.33 0.70 -5.79 69 2142 2009-01-15 18:05:59 2003-04-07 12:59:11 13 12 1702 7 755 1714 284 327.60 28 79.21 CHANGED M....................sth.h..htstPtssupl+sp......P-DFhVcEhh..s.hpssG.cG-H.lhlplcKpuhsTttlscplA+hhslst+cluaAGhKDR+AlTpQahSl...hsspp......s.ch.........t......slpllphs.RHs+KL+hGsLtGN+FpIhl...R.sls......st...lppplptlttt.GVPNYFG.QRFGps.usNhhhupthhpu................thp.......tc+....................................................ph+uhalSAhpSaLFNpllSpRlc......ttshspslsGDhhhhts.........stshhhspt....tthpt+ltptclp.ouPLhGp.sthhs.pupst.phEpplLsptsh...hhp...thtphth...cstRRslhlhs.pshp......h...sp.....slplpFtLPsGuYATslLREl ...................................................................................................................................h..................t....t.shhpt..........spDF.V.E...............................h...............t...................s............u....p..........G.....-..a..lhlplhKpshsTh..sh....phLu+....hh....t.l.....p.....+.......p.lu..aAGhKD++AlTpQahshp...hsttp............hssh................................................c....shplh.phs..h+pcKL+lGsLpGNt...Fplh.L.............R..pl.......s............................ts................................lc....pt....L........p...........pl................p.......p................t....G...h.sNYFG.QRFG..t......t...........s..s..........s.........h.......p...........huh..phhps......................................ph.................hppc..........................................................................................................................................................................s..h..Rphals..AhpS...hlFNphlupRlc.................phshp.ps....ltGDhl.hts...................................................ptph.ah.s.....p.t.........tt...ppchtt....t.c...l.................hs...us.L....Gt...s.hh.......s....ptt...sh..th-pthl...stp...s..........ht..........hhh.p....p.h.........p...usR...Rhhhhhs..p.plp.ap..h.....pt...............................................slplpFhLstGuaATsllREl................................................................................................................................. 0 257 430 625 +4811 PF01139 RtcB UPF0027; tRNA-splicing ligase RtcB Finn RD, Bateman A, Eberhardt R anon Prosite Family This family of RNA ligases (EC:6.5.1.3) join 2',3'-cyclic phosphate and 5'-OH ends. They catalyse the splicing of tRNA and may also participate in tRNA repair and recovery from stress-induced RNA damage [1-3]. 20.00 20.00 20.00 20.20 17.80 19.90 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.67 0.70 -5.48 136 2812 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 1686 6 719 2149 307 317.50 32 93.53 CHANGED hpssshlas..ptthl-..................stulcQltslA.sL.....Psl..hc..shsMPDsHhGhG.hsIGuVhuhc.....uhlsPuuVGhDIsCGhphlpT.sLphp-l.....p.s...ph...pcLhstlhcslPsG...........hs......ttst...h..pthpphhtpthphsh.cpth.tht.................................s.ptlsppstp+ut...........t........QLGTLGuGNHFlElp.........h......th.t......h.slpc........sp....lhlhlHoGSRGlGppluscalchhpp....th....tcht...h..pls...........................................................Dc.pLAhhshsoptupcYlpAMshAtsaAhsNRphltchltc........................sh...pphh.........hthphl............h-lsHNhs...........chE..............pH....................tpplhVHRKGATRAh................hGp.llIPGSMGssSYlltGpttutt.......oasSssHGAGRhhSRspA++...................phshccltcphtt.......lpspspps................lh-EuPtAYKcl-pVlpsh..sslschVs+L+PlsslKG ........................................................hhs............l-...................t.uhpQl....hssA....ph........Phh..hp.....hhsMPDh.H....G...h...G....hsIGushshp.................shl.P..uhVGhDIsC..Ghthhps.sl.ttch.......t........p.h.tp.hhptl....ph...hs.u........................................................................................................................................h.pp.h..........................tplGolGsGNHFhElp.................................................lp..t...sp................lhlhlHoGSRGlGptlsp....ahthht.................................................................................................................................................................thshh.....to.....h...pp...Yhtthth...A.taA.hNRphhht.hhp..........................th..tt..............h.....................hsstHNhs.............phc...........................h......................pphhlpRKGAs..u.................................Gt.hl.IPGoMGshSalltGh.ss..................h..hSssHGAGRhhuRsp.s+p.....................h.sh.pp...pt.htt.......l.spp.t.................hh-Eh..........P.AYKs..lctVhts.......tslhphhhpl+.lhshK..................................................................... 0 285 480 624 +4812 PF01205 UPF0029 Uncharacterized protein family UPF0029 Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.70 21.10 20.10 20.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.35 0.72 -4.37 179 4099 2012-10-03 01:04:38 2003-04-07 12:59:11 14 14 3898 2 953 2747 154 107.50 44 49.97 CHANGED lK+S+Fluhht.lssp.p......-spphlpplcppa.c....AsH.psaAahl.....s.tssp....................................h.chsDDGEPuGTAGpPhL.plLpt....p.......sltslslVVsRYFGGl+LGsGGLlRAYssusppulp ..................................................................lKKSRFIshl.t...lps..-.........-A+sa..lppl+..p..c..+hc.........AsH.sChAahl.....G.t.p.s..p...................................................................................................h.choDDGEPuG.T.AGhPhL.shLpt....p............slssls.....lVVs...R.....YFGGI..+LGsGGLlRAYususspul.t..................... 0 306 605 816 +4813 PF01256 Carb_kinase UPF0031; carb_kinase; Carbohydrate kinase Finn RD, Bateman A, Yeats C anon Prosite Family This family is related to Pfam:PF02110 and Pfam:PF00294 implying that it also is a carbohydrate kinase. (personal obs Yeats C). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.45 0.70 -5.09 9 4484 2012-10-03 06:25:16 2003-04-07 12:59:11 12 18 4216 36 1246 6104 1631 239.10 30 56.40 CHANGED shlluGspsasGAslhuuhuAhphG..sGllpVts..sshsslpshhPclhscsh............st.h....thhpphcslllGPGLGps.th......hlphlLspppP...lllDADuLthls.p........sthpssslLTPHssEFtRLsssss..........tss+.phspphupphsssllLKGstsllss...suslhhspsGsushApGGpGDVLuGhIuuhLupst.........sshcAshsusahHutAuphhspctuh..shhsspltchlsp ............................................................................................................................................h.hllGG.....s.....t....s....hsGAs......h......h.....u....u......t....A..A....l......+....u....G..................AG.L...V..................p....l..h.s.......t.......p.....s.....h....s....s....l....t..s...t.....h..P..E...h..M.s.tshp.............................................t.h........t........h....p..p.......s....s..s....l.l....l...G....P....G....L....G....p.....s....p.h...s.tp................................hl.p...t...l.....h......p.....p....t....p...P.....................lllD.............A.....DAL..s..ll.upp.......................................thpp.p..h.....l.....lT...P.....HssEhs.R.L.h.u.h.sh.t........................................hpp..s...+...h.t....s...s....p....p..h...s..p......c...h.....s.....s...s......l...V.........L.........K.........G........s........s............T..........l...........l........s..s........................ss.....p.......h..h....l.......s....s....s....G........s.........s.........u...........h........A........o...G.........G.GD.V....L.uG.hIu..u....l...l.u.Q.th........................................ss...h-.A....u..t..s....u.s..h....lHu...hA..uch.hs.t.p...h.....hhsscl.t........................................................................................ 0 429 805 1066 +4814 PF00902 TatC UPF0032; Sec-independent protein translocase protein (TatC) Bateman A, Moxon SJ anon Pfam-B_1212 (release 3.0) Family The bacterial Tat system has a remarkable ability to transport folded proteins even enzyme complexes across the cytoplasmic membrane. It is structurally and mechanistically similar to the Delta pH-driven thylakoidal protein import pathway. A functional Tat system or Delta pH-dependent pathway requires three integral membrane proteins: TatA/Tha4, TatB/Hcf106 and TatC/cpTatC. The TatC protein is essential for the function of both pathways. It might be involved in twin-arginine signal peptide recognition, protein translocation and proton translocation. Sequence analysis predicts that TatC contains six transmembrane helices (TMHs), and experimental data confirmed that N- and C-termini of TatC or cpTatC are exposed to the cytoplasmic or stromal face of the membrane. The cytoplasmic N-terminus and the first cytoplasmic loop region of the Escherichia coli TatC protein are essential for protein export. At least two TatC molecules co-exist within each Tat translocon [1]. 25.10 25.10 25.20 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.57 0.70 -4.94 144 3911 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 3551 0 920 2811 2612 213.10 33 80.99 CHANGED hpH.LpELRpRllhsllulllshh.l....sa.ha.........................spt...........lhp.hl.tpPhtthh........................................................................................................pllhhs..st-sFh.shl+luhhhulhluhPhllaQlWtFlsP.GL.....acpE++hhhhhlhhuslLFhhGshFuYallhPhshpFhhs....ass......................................th...h..........pshhslspYlshlhphhlsFGlsFpl.PllhhhLsp..hGllssphltctR.+asllhhhllAAllTP.PDshophhlulP ...................................................................h.pHLhELRpRLlhsllulllhhl.shh.hF.................................spp.........................lhp..hl..stPl.h.p.t.h.t.t.........................................................................................................................................................................................................................phlsos..lspsFh.s.lKlshhsulhluhPll.lYQlWsFluP..GL.....Ycp....E.+...+h....hh...shlh..so...slLFhhGhsFuYalVhPhshp.Fhhs........hus....................................................................................................................ps.s........pshhslssYlsFlhpl.hh.uF....GlsFE.l...Pl...llh.hLsh..hGllosppLp.ct.R...+asllshFllu.ullTP.PDlhSQhlLAlP.............................. 0 296 615 790 +4815 PF01206 TusA UPF0033; SirA; Sulfurtransferase TusA Finn RD, Bateman A, Eberhardt R anon Prosite Family This family includes the TusA sulfurtransferases [1]. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.07 0.72 -4.37 195 5303 2012-10-01 20:42:06 2003-04-07 12:59:11 12 41 2909 8 1162 3036 759 69.70 28 50.37 CHANGED tplDspGhtCPhPllcs+cslcph.psG.....phl...p.....llsscssuh.pslspascppGt.pllph..pp...........ps..sta.phhl.cK ............pLDshGhtCPhPllts+cslp....ph.psG......-hL...p..................llsssssuh.psIst.asp.pp.Ga..pllsh.pp...........ss....ssa.phhlpK..................................... 0 395 744 997 +4816 PF02381 MraZ UPF0040; MraZ protein Bateman A anon Bairoch A Family This small 70 amino acid domain is found duplicated in a family of bacterial proteins. These proteins may be DNA-binding transcription factors (Pers. comm. A Andreeva & A Murzin). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.18 0.72 -4.22 23 5872 2012-10-01 20:57:08 2003-04-07 12:59:11 13 3 3008 36 1347 3446 3377 70.70 25 92.00 CHANGED hatGshphplDspGRlhlPupLRpthshp....h.slshGhsspLElastspWcphtpc..tchshsphspRthtch ........................hhtGshphp.l.Ds.p.GRlh...lP......sphRc...t....hs...hp..............t....l..h.l.......h.........G.........h.....p..s.......p.........l.t.lastspW.pp.htpc...tp.hs...p....t.....h................................................... 0 490 935 1179 +4817 PF03650 MPC UPF0041; Uncharacterised protein family (UPF0041) Bateman A anon SWISS-PROT Family \N 20.50 20.50 20.60 22.40 20.40 20.30 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.65 0.71 -4.18 6 818 2012-10-03 12:15:12 2003-04-07 12:59:11 8 10 299 0 552 772 7 104.80 35 81.39 CHANGED hhphth+thWpp.sGPKTVHFWAPshKWGLlhAGluDlKRss-hlSGsQshALluTuuIWTRauhllpP+NYLLuoVNFFltssuusQLsRIs..sYphpssD.hpp...hhhpth.....tucphup .......................h.........hpt..sh.posHF......WuPl...hp.....WGlslA...ulsD.h...p.+...s.sEh.......ISss..............os................AL..sshu.h.lah..............R..au.hh....lpP+Na...hLhusphhstssthhQhhRhh.....pap...h....t...........................ttt............................. 0 173 292 451 +4818 PF03668 ATP_bind_2 UPF0042; ATP_bind2; P-loop ATPase protein family Bateman A, Yeats C anon SWISS-PROT Family This family contains an ATP-binding site and could be an ATPase (personal obs:C Yeats). 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.89 0.70 -5.33 4 3506 2012-10-05 12:31:08 2003-04-07 12:59:11 10 6 3441 0 667 2112 733 275.40 43 93.34 CHANGED hpllIloGhSGAGKoVAl+uLEDhGaYCVDNLPssLLPchs-hht...s+.oplAlshDlRshs.osclhctt.phhpc.thoP+llFL-AccsTLlRRYp-oRR.HPLuucsLsLEu.IstEpchLEPL+u+AsLIlDTSchSs+sLtEplccthtGsptcpholsVESFGFKYGIPlDADhVFDVRFLPNPHWsspLRPhTGh-tsVusal.++sEspcFl.ph+chLshhLPhhc+ps+SYlTIAIGCTGGKHRSVhIAEpLu-YF+uctpssh..pHRsLE+c ............................................pllIloGhSGuGKol.AlpsL........E...DhGaaCVD...N......L....P.ssL....LPc.h.s.c..h...ht.....................t.......s....p........p........c.l.AlslDlR.s..t..s..h.h....s.....p..l.h....ph....lt..p..h..t.s....p.......sh...s..hp.lLFLDAscpsLlpRY.p.-TRRhHPL...u...............s..........p..................s..................h...............h...........l....-........u..Ipt.E....RclLps.L.+stAs..hl.lDTopho......s..+cL+c...p.......lpp..t...h...t...s...p...p..p....p.....p...h....p....l..p....l.SFGF.K.a..G..lP.l.D......A.....D.....hVFDV.R.F.LPNPaa.s.cLR.P....h...TGlDp.sVtsYlhp.ps-sp.pFhpplpsh.Lchh...LPtap.c.-u......+S.............h.LTlAIGCTGGpHRSVhlAEpLuc.hL....p.......p.......c.......t.......p....s.....st......pHR-lt+...................................... 1 224 442 572 +4819 PF01985 CRS1_YhbY UPF0044; CRS1 / YhbY (CRM) domain Enright A, Ouzounis C, Bateman A anon Enright A Domain Escherichia coli YhbY is associated with pre-50S ribosomal subunits, which implies a function in ribosome assembly. GFP fused to a single-domain CRM protein from maize localises to the nucleolus, suggesting that an analogous activity may have been retained in plants [4]. A CRM domain containing protein in plant chloroplasts has been shown to function in group I and II intron splicing [5].\ In vitro experiments with an isolated maize CRM domain have shown it to have RNA binding activity. These and other results suggest that the CRM domain evolved in the context of ribosome function prior to the divergence of Archaea and Bacteria, that this function has been maintained in extant prokaryotes, and that the domain was recruited to serve as an RNA binding module during the evolution of plant genomes [4]. YhbY has a fold similar to that of the C-terminal domain of translation initiation factor 3 (IF3C), which binds to 16S rRNA in the 30S ribosome [1][2]. 20.80 20.80 21.10 21.40 19.60 19.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.49 0.72 -3.98 144 3330 2009-09-11 02:11:35 2003-04-07 12:59:11 16 11 2788 4 855 1854 228 83.80 36 62.62 CHANGED Lo.scp+chLRuhu+pL..cPl.l...tlG+sGlocullpplcpsLcs+ELlKl+lhp.s..stps....pcphsppls.........pp.ss.upllp....hlGcsllLYR ...............LosKQ+paL+uhAHsL...cPl..l.............lGcsGls-slltplcpAL-.p.+ELIKVclhp..s.....sc..-s........pppls-tls..........................................cp.os.uphVQ.....hI.G.+.h..lVLYR.............................. 0 200 497 697 +4820 PF01894 UPF0047 Uncharacterised protein family UPF0047 Enright A, Ouzounis C, Bateman A anon Enright A Family This family has no known function. The alignment contains a conserved aspartate and histidine that may be functionally important. 20.20 20.20 28.00 21.30 18.80 17.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.34 0.71 -4.28 109 2428 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 2035 22 936 1913 1389 118.70 38 81.59 CHANGED hcITspl...pphlppo..ulpsGlshlFs..HTTAulslNE.......shDPsVppDltphlscllP......pst..............tYpHsp......................ssusAHl+usLh.GsSlolPlpsGcLhLGTWQuIahsEaDs........s+.pRplhlplh .........................h.lTccl..pph.s.p...h....s......s.lps.GL..lplhl..HToAuLslNE............NsDP.s.V+pDhpph....hp+l..l.Pcss...................................tY.cHsh.p.Gs................................DchsAHlK.uull.G.sSls.lPl..p.pG+LtLG..........TWQGIaLsEacs........sp.pR+.llspl.................................... 0 352 620 800 +4821 PF01458 UPF0051 Uncharacterized protein family (UPF0051) Bateman A anon Prodom_3219 (release 99.1) Family \N 20.80 20.80 21.30 20.80 20.70 19.70 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.03 0.70 -4.91 122 7024 2009-09-11 15:27:28 2003-04-07 12:59:11 12 12 3542 6 1556 4676 4761 229.60 32 51.85 CHANGED s.sssstts+hllhltcsupss..ll-p.................s.ssssshpssssclh.lscsAplpahp.lpp.......spssh.phsstts..phtp.suphp.thh.shGuphs+tphpspL.tGpsupsplpulhh.spsppphDhpstlpHtu.p.spop.hh+ullpc.....pu..cslFpGhlplppsA.pposup.pscsLlL.ocpAcssohPpLEIhsDDV.csuHuAol.GplDc-pLFYLpoRGlscpcApplllpu.F ..........................................................................s.thstapRsllls--supls..alEs..................ssshtsss.sh..puulV...El.h.ltc.sApl+...ass.lpsh..........upssh....sh....ss.+R.u.....hs.t.c.suphp..Wsps..ph.G.u.tl..o..h..ph..ssshL..pG-supuchholuh.ss.s...p...Q...p.....t.Ds.ss+hhHhu.p.Tp.SpIlpKul.sps................p.u.ps.sapGhlpltpsA.ptu.c..........up.pscsLll..s..cpupu.-ThPhl-I.......c.ss...sl.ph.pHtAol.u+ls--QLFYLhSRGlscc-ApphlVpGF...................................... 0 534 1049 1341 +4822 PF01933 UPF0052 Uncharacterised protein family UPF0052 Enright A, Ouzounis C, Bateman A anon Enright A Family \N 21.00 21.00 23.00 22.80 20.90 20.40 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.02 0.70 -5.10 32 3153 2009-09-11 09:44:17 2003-04-07 12:59:11 13 5 2883 21 736 1931 763 296.60 33 87.30 CHANGED lVslGGGTGLuplLpGL+ph.................ssclTAIVTVuDDGGSoGcLRcths.hlsPGDlRsslhsL.u-.....pphhtplhpaRFp.....................................................ssstLpGHshGNLhLsuhpphtsshtcAlphhuplLtlcG..+llPh..opcslsLtAchp.....sG.........phlhGEspIsp...................................................ttthlcclhlp.........sppspss.cul-AIp-ADhIllGPGShaTSllP.LllssIscAlpcop.A.hlhlsNlhsp.G.Es......sthsssc+lcshtptsucthhDhlllsspt.sstp.tpphhpcssp.sphsttthcphshphhtsphlttt ...............................................................................................lVslGGGpGLu.hlLpuL+ph........................................ssclTAIVTluDsGGSoGclRcph.s.hl....sP...GDl.RNsLs...uL.......o-....................sphhpp..l.FpY.RFs..............................................................................ssupLuGHslGN......L.hls.u.l.........s.p.h..p.s.s...........h.h.....pA....l....p.hlsplLplcG.....+llPh.....o.p.p.slsLp..Ahhp................sG..................phlh...GE..s..pIsp............................................................................................................................................hps..hlcclhlp.......................................spstAspcs.l.....cA.....IpcA....D..h...IllGPG.ShaTSllPsLll.....sp.lucAl....t.......c........o........p...A.............hlYl.sNlhsp..G.ET.............ssho.stc+l....cslppa.....l....u.....p........hl...D....s....V..l...l..ss..p.......s.....p.t...h.t.p.t.hh...p.c..h...h..s...p.h.sh.t...t....pp..hpsh.......ht.............................................................................................. 0 227 481 642 +4823 PF02130 UPF0054 Uncharacterized protein family UPF0054 Mian N, Bateman A anon IPR002036 Family \N 25.00 25.00 27.50 26.30 23.70 24.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.90 0.71 -4.91 159 4595 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 4493 7 1042 2973 2301 142.80 31 85.81 CHANGED l..phppptths..................th..lpthhphhttpht.................lslhhsscpplppLNppaRs+DtsTDVLSF.shppsst....................................................LGDIlluh-tstcpAtc.hs.+s..hpcclsaLhlHGhLHLl..G..YDHp.p.p...p-tptMcthEpplLppls .......................................h.tp............................hpphlphsht.p.thttt.............ElolthVDspchppLNtpYR.scD...........pPTDVLSF..shcp.tth.t.h.........................................................................LGDll..Ishphspc..p.........ApEhG..+o................hccchuaLs............lHGhLHLLG....YDHh..p..s......-EtccM.uhppcILpth.t.................... 0 363 685 878 +4824 PF01679 Pmp3 UPF0057; Proteolipid membrane potential modulator Bateman A anon Pfam-B_2192 (release 4.1) Family Pmp3 is an evolutionarily conserved proteolipid in the plasma membrane which, in S. pombe, is transcriptionally regulated by the Spc1 stress MAPK (mitogen-activated protein kinases) pathway. It functions to modulate the membrane potential, particularly to resist high cellular cation concentration. In eukaryotic organisms, stress-activated mitogen-activated protein kinases play crucial roles in transmitting environmental signals that will regulate gene expression for allowing the cell to adapt to cellular stress. Pmp3-like proteins are highly conserved in bacteria, yeast, nematode and plants. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.53 0.72 -4.26 117 1655 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 1131 0 687 1065 84 50.50 45 65.92 CHANGED tcl..l.hlllAIl....LPPluVal..ctG.hutch..hINllLTlLG.a....lP........GllHAlallhp .............................t..hhcllluIl....LPPLGVhl....tpG..h...GhsF.....lINILL..TL..LG.Y...................lP..............GlIHAhalh.................... 1 179 378 588 +4825 PF01893 UPF0058 Uncharacterised protein family UPF0058 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein has no known function. 25.00 25.00 40.60 40.50 24.10 19.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.00 0.72 -3.93 23 129 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 72 2 82 129 5 85.20 36 91.95 CHANGED MH....K-ELlpLHphhspl++ahc.......ppsssptFcpY-pLcIsPsHlH+oKsEHKaAIFlLusslAphhu.pc-ts.spplup+h+-hA-csh+ ......M+K-ELlcLHphLsplpchhcp......ppssst.hcpYcpLslpPsaIHKoKsEHKtAlFlLussluphhu.pc.cts.stphstthtp.s.p...t.................. 0 13 54 73 +4826 PF02694 UPF0060 Uncharacterised BCR, YnfA/UPF0060 family Mian N, Bateman A anon COG1742 Family \N 22.30 22.30 22.50 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.62 0.72 -4.03 4 1396 2012-10-02 19:55:49 2003-04-07 12:59:11 10 3 1342 0 296 828 194 106.10 50 96.41 CHANGED hl+olLLFlhAuLsEIGGuaLVWLWlREs+uhhhulsuuIhLsVYGalsTLQP.AsFGRVYAAYGGVFlshulhWGhhVDshpsDhaDWlGAhIsLsGVhVIhaAs.Ru ....................................hl+ohhLFhlsALsEIhG.C.aLsW.L.WL+c.s..tShahllPuuloLuLFsaLLT.LpP..uu...u.........GRVYAAYGGVYlssuL.hWLhlVDG..h+.shaDhhGAlIsLsGhhlIlhu..R............................. 1 88 179 236 +4827 PF02696 UPF0061 Uncharacterized ACR, YdiU/UPF0061 family Mian N, Bateman A anon COG0397 Family \N 19.90 19.90 20.10 19.90 19.60 19.80 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.86 0.70 -6.20 3 2147 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 1912 0 684 1870 2381 431.40 40 89.41 CHANGED soLSslTcths.lspsYsAhDPVsLshF.ssRLla+Ns+LAssLu.....hcPSpLpcNuhsEh..V.st..EuLLsG.......shsPLApVYSGaQFGsaAuQLGDGRGlLLG-.hLsDGsohDWp...LKGAGhTPYSRhGDGRAVLRSoIREsLASEALHpLGIPTTRALSLVpossShspR-ssEP..uAVlhRFAPSHIRFGsFE+F+YRH-oEpltQLuDasIccYau......................Esph..pluDDE........D......KYctFFRcVVuRsAsLVAQWQAsGFAHGVLNTDNMSIhGLTlDYGPFGFLDcYEPuFIsNHsDauGRYSFuNQPAVshWNLQRLApoLSsllus-A..........LN...EALEc................YcpVaLT+YuplMuQRLGls.sLtcpMo.......Ep.....KEhs-sLVspLLslLApo+sDYscFFtpLp......Focs+SDsAsLhht.DE.....Flsu......A-a--WFAth.....hpG.......+LQ......QDhLppVPPoEhAARsoLhcpANPLhVLRsWllEcVl-...Aup+DG........DhosL++LaptLpNPa ...........................................................................................................................................sspll.h.hs.p...t.lA.t.tLu............s.t..h......p..t.t.h.h...........huG......ppl.h....sG.....................................h.p.PlAp...sY.u.G......H..Q......FG.s.......a..ss.....pLG.DGRulL..L..G..E......h.........t....s.....s............G.....p.........p....hD.hp...................LKG....uGhTPY............S.R..h......G....D.....GRAVLRSslREaLsSE..................AMa.t.L.GIPTTR..ALul....lso.s.....p.......sV..h......R..............E........p..........h..........-...s..............GAllhRlAsS.HlRhGpFE....a...a..............h..............h.....p...............t...........p.....................-.......p..............l......+.p........LAD..a....s...I.c.+...aa.P.............................................................................................th.t.ssp...........................p..............tY.hth....hppV...sp....RsApLlApWQsVGFsHGVMNTDNMS.IhG.TlDYGPauFhD.........s.......a-PsalsNp...sD....p.p.G.RYuasNQPsluhWNLt....+LApsL..h..P..l..l...s.......-..t...............................lp.......p.u...L..s.p...................................................................ap.ph.......h.....s...pa....t....p.t....M....c...pKL...Gl................hs...........................p-...Dp...t...Ll..spLh.plM..tp.....pp.s..DYTp.hF...RtLu....................................t..p.t....t....s...s...s..s..l...h............t.t.....................hh.s.p....................................................sta..c.t.Whtta.........tt..........................Rlt.........................pc...................t.s....t.....p.................p.tph..Mp......ps.NPthl.RNalsppAIc...tApp.G...........Dh..s.lccLhpsLppPa.................................................................................................. 0 200 396 551 +4828 PF03401 TctC UPF0065; Bug; Tripartite tricarboxylate transporter family receptor Mifsud W anon Pfam-B_3343 (release 6.6) Family These probable extra-cytoplasmic solute receptors are strongly overrepresented in several beta-proteobacteria [1]. This family, formerly known as Bug - Bordetella uptake gene (bug) product - is a family of bacterial tripartite tricarboxylate receptors of the extracytoplasmic solute binding receptor-dependent transporter group of families, distinct from the ABC and TRAP-T families [3]. The TctABC system has been characterised in S. typhimurium [2], and TctC is the extracytoplasmic tricarboxylate-binding receptor which binds the transporters TctA and TctB, two integral membrane proteins. Complete three-component systems are found only in bacteria [2]. 21.20 21.20 21.20 21.50 21.00 21.10 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.38 0.70 -5.47 9 6762 2012-10-03 15:33:52 2003-04-07 12:59:11 9 22 1130 7 2903 6666 2997 269.60 28 83.24 CHANGED lARhluppMucpLGQPVlV-N+sGAGG.IuushVApuAsDGYTlhl.susuh.slssahYsplsYcsh+DhsPVs.lsssPhVLVVsusSshpslp-LlshA+ssPsKLoaASsGhGoosHLsuElhpucsGsphhHVPYKGuuPAlpDLluG+VDh.MhsslsostshIpus+lRALAVsotpRpstLP-VPTlAEsG.ltGh-shsWaGhhAPpGTPssVlp+LssAhppAhpsPtlhcthpshGhpsh...ssoPpphsphhpuEspRWutLIpchGl ............................................................................ARhlupt.h..s..p..t..L....G...p..s..l.l.V..-N......+.s.G.A.u.G..slG.s..s.t.l.u..c.u.s..s...D..G.Y.Tl.hh....s.s..s.uh....sl....ss...t...l...h...t...p...l...s...a..c......s...hc.D.h..s.P.l.u.h...l.u...p.s.s.h.l.l...l..V.s......s....s....s....P....h....c....o.lp-l...ls..h..u..K.s...p...P...u..p...l.s.a..u...o..u.G...s...G.o.s.s....H....L..s....s.t...h....h....t.p......t.s...G....l......c......h....p....a.......V...P...Y...+....G...u..u...s.A..ls.sll...u...G..plsh.....hh........s...s........h.s.s.....s....h....s.....h.....l.......c......u.G..+..l+.s.L...A......V..s.......u...s.....p.........R.........h.........s.........t...........l.........P.........-.....V..PT........h........s.E.t...............G.....h.......s.........h...p....h.....s.....s.......W....h...........G........l........h.....u...........P..........s.us...P...s...sllpcLssslp.p..sl.p.s..s.p.h..p.......p..p.h.p.p.h..G..h...p...s.h......st.s...s..p.ph..s.p...h...lpp-...htca.tpllct.t.h........................................................................... 0 403 1505 2405 +4829 PF01980 UPF0066 Uncharacterised protein family UPF0066 Enright A, Ouzounis C, Bateman A anon Enright A Family \N 21.40 21.40 21.40 22.60 20.70 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.58 0.71 -4.15 184 2012 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 1761 6 580 1490 102 122.50 43 54.57 CHANGED p+hssP+Q.sth..............sspuplclhspa....tpuLpGLEpaSHlhllahhHcs..................................hhp.P+h.t.......ss.tphGVFATRSPtRP...NPIGlSlVcLh..plcssp.....LtlpGlDh....................lDGTPllDIKP.Yls...hh.Ds ...............................................KhuhPRQ...sslsp......sspuplcLh.s.a..........ts-ulcGLEs..F..SHlWllFhFHps.............................................................................p...shcsp.V.R..P.PRL..s........Gst+hGVFATRSsaRP...NPIGhSlVcLc..plc..spp....................Lplu.ulDL....................lDGTPllDIKPYlPas..p........................................... 0 215 365 478 +4830 PF03006 HlyIII UPF0073; Haemolysin-III related Bateman A anon Pfam-B_1581 (release 6.4) Family Members of this family are integral membrane proteins. This family includes a protein with hemolytic activity from Bacillus cereus [1]. It has been proposed that YOL002c encodes a Saccharomyces cerevisiae protein that plays a key role in metabolic pathways that regulate lipid and phosphate metabolism [2]. In eukaryotes, members are seven-transmembrane pass molecules found to encode functional receptors with a broad range of apparent ligand specificities, including progestin and adipoQ receptors, and hence have been named PAQR proteins [3]. The mammalian members include progesterone binding proteins [4]. Unlike the case with GPCR receptor proteins, the evolutionary ancestry of the members of this family can be traced back to the Archaea. 26.20 26.20 26.50 26.50 26.10 26.10 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.93 0.70 -4.94 81 4985 2009-01-15 18:05:59 2003-04-07 12:59:11 15 18 3463 0 1622 3448 460 203.00 27 78.87 CHANGED ashppEhsNshTHhlGslhshhsh....hhlhshuthtt....s.ht...........................................................................hlshslashuhhhhhhsSslaHt...hsptc..................scthhppl.D+suIalhIAGo....YsPhhhhsh.....................................p.s.hs.hhhhsh....lWs.hulhGllhphhhh.........................tph..+hlpshhY....lshGahslhslhth..hhthsshs........lhhlhhGGlhYslGul.FYuh+.........................................................h...a.....tHtIaH..lFVlhuuhsHa ................................................................................................................................................htpEhsNshoHh........l.G.hlhs.l..h..sh....s.h..l.....h.h.h.u.s..tt............shht.....................................................................................................................................................................................hhu.h.slas...huhhhha.h.sSolYHs.h......h...st...p..................s+th.hp.th.DHsuI..alhIA....Go.......YTPh..hlhs.h.............................................p...s.........h.u.....h........hl..h...h.l.........................lW.s...h.u.lh.Gllh..c.h.hhh................................pph....+hl.sh......s..h..Y....lsM...GW.hs.l...hslhth......hht.....hss.hs.......................hhhlhh.G.GlhY...ol...Gsl...FYst.+...........................................................................................................................................h.......a...............tHtIaHlFVlsuuhhHa................................................................ 0 475 832 1230 +4831 PF02082 Rrf2 UPF0074; Transcriptional regulator Mian N, Bateman A, Yeats C anon IPR000944 Family This family is related to Pfam:PF001022 and other transcription regulation families (personal obs: Yeats C). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.41 0.72 -3.86 118 8068 2012-10-04 14:01:12 2003-04-07 12:59:11 15 22 3692 17 1984 8534 1782 81.20 29 53.34 CHANGED Mp.lospscYul+sLlhL...At.ptspt....lsspplAp..pt.sls.saLp+lltpLpcsGllcShRGstGGapLA+sspcIolt-llcul .......................................Mp.loscscYul+.sl.lhl................Ah.....p..s...p....t..t................loh.p.plAc..p..sl.s..saLcplhspL..p+..s.G..l..l..p..o...h......R...G.s.t....G.G.a..p.L....u.+.s.s.pc.Iols-llcs................................ 0 686 1302 1664 +4832 PF03702 UPF0075 Uncharacterised protein family (UPF0075) Bateman A anon SWISS-PROT Family The proteins is this family are about 370 amino acids long and have no known function. 20.40 20.40 20.40 20.40 20.30 20.00 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.13 0.70 -5.56 9 2041 2012-10-02 23:34:14 2003-04-07 12:59:11 9 6 1964 6 505 1648 2620 357.50 42 96.82 CHANGED .hYIGlMSGTSlDGlDhsLlchst...scspLltuchh.PhPssLRpplhsLpps.sssoLpphGpL-pplGhLaucslsphLpppplpsspIpAIGsHGQTlhHpPs.uphPFThQlGDssllApcTGIssVuDFRR+DlAhGGQGAPLVPAFHpAlFtssspspsVLNIGGIuNlSlLhPstsVhGaDTGPGNsLhDAWhp+aps..tsaD+sGtaAupGpVstsLLspLLs-PaFuhPuPKSTGREhFNLsWLpcpLtpt..............spDVQATLsELTApoIs-ultpttss.spcLlVCGGGA+NslLMtRLuthLPs.hpVsoTsphGlssDhhEAhAFAWLAapplssLPGNlPuVTGApphshLGAIaPs ..........................t.hhIGlMSGTSLDG.lDssLsphct....................pp..l...t..h....l....u....s..h..s..........h.....Ph.s.......ss...........L.+.......ptl......h......s.....l......p.p......s.......p..................p......t......s...l........p......p......h...u..pL...-........pp............L.u.p.haApAVp.tLLp.....p...ps..l...p.s.pcI.sAI.GsHGQTVhH..p...P...................p......s............................h..........s...............a..........Tl.QIGD.sshlAtp....T...GIsVVuDFRp.+DlAh..........GGQ.GAPLVPAFHpuL....h.......s.......c............s....s......c.......p......R.......h...l......LNIGG..IANlol........L.............................s....................s.......p.......s...............l.........h.......G..a.....DT.GP.GNhLhDu...Wh..t..+.p.t..u......................psYDcsGpa.A.tpG..p..V......sL..L.p.p.....h......L..s.....-..P.YFstssPK.STGRE.hF....s....hs....W...Lp..p..p..Lp..phst....s...................spD.....l.tATLsc...l.......T....A....hoI.......u....cp....l....t.............t.........h............s.....s............s.......c..................clhV..CGGGu+NshLMtcLtsh.L.........s..................s.......hp.....V.p...oo.-.s....h..G.....l.s...u.....Dth.E.............AhAF.AWLAh.csl.t.Gl..PuNlPuV.....TGAsp..slLGsla..h.............................. 0 141 308 416 +4833 PF02367 UPF0079 Uncharacterised P-loop hydrolase UPF0079 Bateman A anon PSI-BLAST P31805 Family This uncharacterised family contains a P-loop. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.79 0.71 -4.49 14 4392 2012-10-05 12:31:08 2003-04-07 12:59:11 12 12 4337 6 946 3129 2397 122.70 35 71.63 CHANGED ThplGpplup.L.....psGsVllLpGDLGAGKTshs+Glupul.Glpt..tlsSPTFsLlp.Yp....sshhlhHhDlYRLtsh-.hphh.....t.h-h.hs.-ullslEWuEthsp..Lspppl..plplchhspucpph .................................h.tlupplu...ph.h........p.su...s...l...lhLpGDLGAGKTTho+.Glsc....u.L....Ghpt........sV.p.SPT...a...o....ll.c...p....Yp......................sp.h..s.l..a..Hh..DlYRLs.cs.c.E.h.tth............................Ghc-a.h....hs.suls.llEWs.p.t.h.ts....hL...P...s....s.p...l...plplphtspsp...hh................................................................................ 1 332 637 811 +4835 PF03652 UPF0081 Uncharacterised protein family (UPF0081) Bateman A anon SWISS-PROT Family \N 29.20 29.20 29.20 29.60 29.10 28.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.52 0.71 -4.05 174 4409 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 4296 8 962 2703 2115 133.70 34 91.91 CHANGED tplLulDhGp+RIGlAlu-shthh.Apslpslp.pp.............tshppltpllpch......p..ss.tlVlGlP.........hsh.s.Gststtspts.ccFuppLppph.........sl...s.lhhhDERhoohtAp.phl..h.t.......tsh.pp.....pc++p...hlDphAAslILpsaL-p .............................plLulDhGoKplGVAluD..hs.hh.Aps.Lpslp...sppt.............p..shspl.tcllcca.......p..ss....tlVVGLP.......................hsM....s.Go.p.us.t.sp.ps.cpFup.pLpp.ph.............sl.....s.VthhDERLoTstAc..ptL...h...p..........................tsh..p+............pc++p.....hlDphAAsl..ILpsaL-.................................................... 0 324 638 820 +4836 PF01868 UPF0086 DUF49; Domain of unknown function UPF0086 Enright A, Ouzounis C, Bateman A anon Enright A Family This family consists of several archaeal and eukaryotic proteins. The archaeal proteins are found to be expressed within ribosomal operons and several of the sequences are described as ribonuclease P protein subunit p29 proteins. 21.00 21.00 21.00 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.76 0.72 -4.34 36 446 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 413 9 311 441 62 88.50 28 44.89 CHANGED hlsspNllp....HELlGLclcVlpopssshlGlcGhVlcET+solhltTpcs...tplPKctslFpFplsstp............................VclcGshLhuR.PEcRlc++ ..............h....tpLhp....t-hpGshlpVscSps...suhlGlpGIVlpETcpshhl........l......s.cc...s.....p......h..............+.h..........lPKpsslFp..hplss.t..............................................................................................hplhGpph.hRstcRht+............................. 0 99 173 254 +4837 PF03007 WES_acyltransf UPF0089; Wax ester synthase-like Acyl-CoA acyltransferase domain Bateman A, Auchincloss A anon Pfam-B_1896 (release 6.4) Domain This domain is found in wax ester synthase genes such as Swiss:Q8GGG1. In these proteins this domain catalyses the CoA dependent acyltransferase reaction with fatty alcohols to form wax esters [1]. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.70 0.70 -4.83 16 1646 2012-10-02 12:01:53 2003-04-07 12:59:11 11 16 348 0 475 1277 107 248.90 24 54.16 CHANGED LushDuhFLhhEsssp.hHlGtlslhchs.tsss........hcchtsshtpptphhPhh+p+.hsh.hshststWhsDschDlsaHVRRsALPuPGshcELh-LlucLtupsLDRsRPLWEsallEG.LssGRhAlhhKhHHAlsDGVuuhplhtphhstsPcssshsss.p.ssss....psphpttu....................hshsptltthssulsGsspssschhttsh.........spssshshsss......poshN.sslutpR....RhustplsLscl+tVscthGsTlNDVs .......................................................................................hsshDthalhhE..s..sp..t.....h.H.sutl.hhhp.s......sss.......................hcp.h.h.pt.h.t.p.....p...h.t.hh..P.h.h.c....p.+....h...........h..s.h...s.t.....s.hW..h.p..-s...p.hD..lshHl....R.....+....s....u.L....P......s....P.....G...s......h....c...........-.....L.....h..c.hl.....uc..l.t..up..L.D.+..s.R..PLWEh.....a.l......l....E........G......L...........s......s............s..............R......h......A............l..h.hKhHHulsDGlu.u.hp.lh...tph.h....s.....t.s...s......s..s..s...h..s..s.......h...t...ssts.........tttht..s..................................................................ht...h.hp.t..l..t....t....h......s..s..s..l...t.......s....h....s......p...s.s..h.p.hhttsh................st...p...h..s..h...s....h...s...s.s......................to..h....N....s..s.l...s...s...tR......................+.h..u.s...t.p.hsLscl+tlt.p...t..h.......s...solNDVh............................................................................................................................................................ 0 129 319 422 +4838 PF03653 UPF0093 Uncharacterised protein family (UPF0093) Bateman A anon SWISS-PROT Family \N 24.50 24.50 24.60 25.00 24.40 24.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.15 0.71 -4.25 64 1256 2012-10-01 21:57:53 2003-04-07 12:59:11 8 1 1156 0 386 1034 1528 142.40 37 93.54 CHANGED h..shY.WlKuhHlluVluWMAGLFYLPRLaVYHs-st.sts.ptpphFp....hMEc+Lh+hIhsPAMlsohlhG.lh.lshts....uhh.hs..sWh+s.KlshVllhshaHhhhsthtKchttspsphos+haRhhNElPTlLhllIVlhVllKP....F ...........h...hahWl...KuhHllulluWh..AGLFYLPRL..FVYHupsp.ss...s...........t....p.pphp.....lME++La+.hIhs.PuhlsollhG.hh...Lh.ht.........thhh..h.st..GWhHs..K.....LshVlLLlsa.Hh.h..s..u..t...hh...+phtp.sp....s.p..+.S.t.+.aa..Rh.hN..ElPsll...h...l.s.IVlLVllKPF..................................................... 0 107 248 316 +4839 PF02016 Peptidase_S66 UPF0094; Peptidase_U61; LD-carboxypeptidase Bateman A, Studholme DJ anon SwissProt Family Muramoyl-tetrapeptide carboxypeptidase hydrolyses a peptide bond between a di-basic amino acid and the C-terminal D-alanine in the tetrapeptide moiety in peptidoglycan. This cleaves the bond between an L- and a D-amino acid.\ The function of this activity is in murein recycling. This family also includes the microcin c7 self-immunity protein Swiss:Q47511. This family corresponds to Merops family S66. 19.70 19.70 19.70 20.10 19.40 19.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.71 0.70 -5.46 167 2826 2009-09-11 05:12:09 2003-04-07 12:59:11 10 10 2139 40 547 2166 479 284.20 28 90.74 CHANGED lulluPS....sshpp.p..ph..ppulphLc.shGhplhhuppshpp..ht.....hh...uussppRsp-LpphhpDsslc..uIhsspGGaGusRlLshlD...as.....hl...ppp........PK..hhlGaSDlTsLthslhtps.Ghso..hHGPh.....hss.ht........t..................sshohpphtph...Lhs.............................t.ths.sssththhtsG..........pspGpLlGGNLsll.............spLhGTsa.hs....................phcs.......tILalE-lsEt....sacl-RhLtpLphuGhhcplpGlllGchsth.........s.t.stslpcllpchh..tths.....lPllhshshGHst.sph.....slPlGspspl..s ...............................ltlluPS........sthpt...p...th...ptulppLp....shG......hplh...s..pthhcp............t...hh.....uu.os.ppRhpDLp........phh..p..s..s..s..l..c...hlhssh.G.GasusRLLsh.lD......ap.......tl.......pps.........PK.....lhhGaSDhTulptulhtps...slh.T..aaGPh..hssths..........t.................sshohppahph.....lps..................................................................thssth.th.hs.ss......................pspG.plhGGNLshL..............t.tlh.G.T.sa..hP............................phcs.......tILhLE.-.s.sEp........shcl-R...hLhpLtt....s.G.lhsphpulllGpaptt....................s.stsh..sh.pp.ll.tphh.....tphs..................lPllhshshGHsp.s.p.h.....slPlGspupl................................................ 0 184 356 461 +4840 PF01981 PTH2 DUF119;UPF0099; Pep-tRNA_hydrol; Peptidyl-tRNA hydrolase PTH2 Enright A, Ouzounis C, Bateman A, Mistry J, Wood V anon Enright A Family Peptidyl-tRNA hydrolases are enzymes that release tRNAs from peptidyl-tRNA during translation. 21.10 21.10 21.90 21.20 20.80 19.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.37 0.71 -4.39 69 1006 2012-10-02 19:40:38 2003-04-07 12:59:11 11 13 703 22 562 916 123 109.50 31 60.74 CHANGED phKhllVVRsDL+MuKGKlAAQsuHAulushhph.........hpts...............................phlcpWh.ppGQtKlVL+sp..spccLhcltppAcphGlsstlIpDAG+TQls.PGohTlLulGPuspphlDclTGcLKLL .............................................t.hlllR....s....D....L........p.....M....s........p.........GKlAAQ.su...HAulsshpth............ppp..s..t.........................................phlc.p.Wc...p..s...G..ptKlV..lcs.t........s.p..p....phh.......p...L...t.......t.p.Ap.p....h.....sl....s....st.....l.lpD........A................G..hT...p........l....s....s.......u.........o..h.....T..slul....tPsstp.lsph..stpL+Lh..................................... 1 194 334 470 +4841 PF02021 UPF0102 Uncharacterised protein family UPF0102 Bateman A anon SwissProt Family The function of this family is unknown. 21.20 21.20 21.20 21.30 21.10 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.07 0.72 -4.06 8 3091 2012-10-11 20:44:43 2003-04-07 12:59:11 12 8 3030 1 769 2274 378 92.40 35 72.54 CHANGED tuEshAscaLcopGhpllsRNaRs.phGEIDlIApc..scplVFVEV+sRsussassh.htuVT.+KtcKlhcTAppaLApps..hpssssRhDVlsVh .........................h.uEphA.tpaLp.p.p.Ghplls..pNa+......s......+..........h........G....EIDlIhcc.............sp.......s......l...V............FVEV+h...R...p.....s.....s..t.....aG.s...............s...t...tu.Vo..hpKpc+lhp.sAp.ha...L...t...p..ps......ht...s..s.....s.....s..RFDllul.t.............................................. 0 263 523 664 +4842 PF01875 Memo DUF52; UPF0103; Memo-like protein Enright A, Ouzounis C, Bateman A anon Enright A Family This family contains members from all branches of life. The molecular function of this protein is unknown, but Memo (mediator of ErbB2-driven cell motility) a human protein is included in this family [1]. It has been suggested that Memo controls cell migration by relaying extracellular chemotactic signals to the microtubule cytoskeleton [1]. 19.90 19.90 19.90 20.00 19.70 19.50 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.79 0.70 -5.53 11 840 2012-10-01 19:17:44 2003-04-07 12:59:11 12 10 707 8 506 901 112 265.40 28 88.15 CHANGED .PulAGsaYspssccLpphlc.hhhcshs.t.......tRtlhs.......PHAGYhYSG.sAupuYptLsp....s..-pllIlGPsHsshs.sslulhs.utacTPLGslcVDp-hscpLhpppth......hs.--hsc.htEHSlElQLPFLpahhtc.......hKIVPlhluhps.EsstplGchlscsl+-.s...slllsSSDhsHau.....................................Ppclspph...DchhIctItph....s.cshhphlpphssT.lCGhsPlhlhl.hh+phs......pcuclLcYusSu-lstspsSsVuYAuhl ..............................................................................................sshAGtaYsss.tp.Lpppl.p...th...h..t....p.s..h..t..tt...........................s+sl.ls................PHAGYhYSG.sAAhuY.tt............ls..........sthc.....plhlLGPuHps....hh...p......s....su.l..s.s.......h.......sta.pTP.L.....G.........s.........l.........tl.........D...pchh....p.pLhp..p....t.h...............................hph.....s....ptsc..pp.....E..HSlEhpLPalpphhpp...............h.pl.lPlh.......lG...........t...........h.s............p....h......t....t....p.h....u....phl..sph......h...t..c.p..........slhllSSDhs.H..a..t.....................................................................................st...th.t.pp.h.....D.p..hs...h...........p.....tltph................c.pthh......p..h..l.p..p.....h..t.o..hCG.htPlsshlth.h..pth.t........................hp..h...phlpYtpSu...p.s.p........s.....p.s.p...sVuYuuh.h............................................................................ 0 198 336 436 +4843 PF03706 UPF0104 Uncharacterised protein family (UPF0104) Bateman A anon SWISS-PROT Family This family of proteins are integral membrane proteins. These proteins are uncharacterised but contain a conserved PG motif. Some members of this family are annotated as dolichol-P-glucose synthetase and contain a Pfam:PF00535 domain. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.32 0.70 -5.14 128 4592 2009-09-13 19:27:26 2003-04-07 12:59:11 8 18 2804 0 1355 3742 1049 287.10 16 61.23 CHANGED shhhlh......hlhtthshpplh.............................ptlt...phshhh........lhhu..hh....lshhs..........................................hhl..puh+...........Wphllpt........ht...............plshhps.h...thhhhuhh....hsh...l.hP..uphG............-sh+shhLp................ppulsh.....spuhss.h........lhpp..lhsh....hslhhhshhshhh.............hh..shhhhhhhh...............hhshhhsshhhhhhhlhth................................h.h.hhht+ltptht.phtpshpt........................h.t...h...hhhh.hhohhhahhthh.thahlhtuhu.hsh........shhhhhhhhshshlss....hlP......sPGG..hGshEsshshhhs.....hhGhssstu.....hsh........sllh+hlshhhsh......h...................hGh .............................................................................................................................h....hh.....hhht...t...h...s.ht.plh.............................p.slp.........phs.hhh.............llhu.....hh....hshls......................................hhh.......sh.t..h.p.h.lhpt..............hth..........................clsht...ps....h....hsshhs.h..hst...h..hs...sh..s.G......tsh+hhhhp.....................pt.ulsh.....spsssh....h.........lhtt......hhsh......lslh.h..hs.hlhhhh.............................h..h...h.ht.hh.h...hh........................h..hs..h.s..l.hhh..l....h.hh...hhhhth...........................................................................................h.h..p.h...t..p..h......h..t....p..htt.t.htt..............................h.........hhhh.hhhohlpahsh.sh....hha.h.l..h....h.h.hs.hss..............shhth.h.s.h...h..h.l.u.tlsu.....hls....hhP.GG.lGs...hEsshlhhhs.........hhs.l.s..t.s..ts..........lss..................hLlaRlhhahlsh..hlu............................................................... 0 455 894 1153 +4844 PF03656 Pam16 UPF0108; Pam16 Bateman A, Wood V, Studholme DJ, Mistry J anon SWISS-PROT Family The Pam16 protein (Swiss:P42949) is the fifth essential subunit of the pre-sequence translocase-associated protein import motor (PAM) [1]. In Saccharomyces cerevisiae, Pam16 is required for preprotein translocation into the matrix, but not for protein insertion into the inner membrane [1]. Pam16 has a degenerate J domain. J-domain proteins play important regulatory roles as co-chaperones, recruiting Hsp70 partners and accelerating the ATP-hydrolysis step of the chaperone cycle [2]. Pam16's J-like domain strongly interacts with Pam18's J domain, leading to a productive interaction of Pam18 with mtHsp70 at the mitochondria import channel [3]. Pam18 stimulates the ATPase activity of mtHsp70. 29.10 29.10 29.10 29.20 28.90 28.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.58 0.71 -4.34 3 354 2012-10-01 22:35:57 2003-04-07 12:59:11 8 8 266 8 259 368 4 119.10 36 83.90 CHANGED MA+RsAlQVIlsGsQVVGKAFARAlRQEh.....AAuRsAAuAtssAS+.RusAsSNhu.GISL-ESpQILNVcc...sLNhEEVpK+YEHLFcVNDKSKGGSFYLQSKVaRAKERLDEEL.+IctKE-KcKupsA+T ..................................................MAthlh.plllhGupllGRAFscAh+Qth..........t..uu...p...t..s....s...t...u...t...t..p..u....st.....ts..s...s.t..s......s...h......p.....G..........hoL..cEAppILN.Vpc.........ths..hEc.lt...c..........+ac+LFcsN......-.....p...p..p.......G...GSFYLQSK........VhRAKERL-.......tEl....p.......t.t.p...............t.t......................... 0 84 143 214 +4845 PF03657 UPF0113 Uncharacterised protein family (UPF0113) Bateman A anon SWISS-PROT Family \N 22.70 22.70 22.70 22.70 22.40 22.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -4.58 4 344 2012-10-02 17:37:24 2003-04-07 12:59:11 8 3 328 4 256 328 3 168.10 36 92.25 CHANGED h+lRpssstEhcLIcctLptYG.ts..alpphth.shtGch+-VasVshslhcslc...Lp.apsGhplGs..hsEhth+hphsLtthhhllpsohN.hshls.+uEhLFLYGRDlatcultchsthGp...lhlhNc.s-hlGIG............hpDthhlKNL+DhG.YLR+ ...................................................................R.hpt.E.p.hh...t.h.t...h...h.........h.........................................c....h.......p.p........c...RVYYVs-pl...h.+...h...As....s..........c...........pL..h...S......hGsClGK..Fo.K..........p..s...K..FRL..H..I..T..AL.s..h..LA.aA.+a.KlWlKPsuEhsFLYGNcVlKut...l..GRh...o....E.s...s.s.pat..GVVV..a..o..MsD...lP.LGFGhsu.....ttps.t.p.hp.s..s...s..h..l.l.h.p...............u.......DlGEYLR........................................ 0 87 139 208 +4846 PF03350 UPF0114 Uncharacterized protein family, UPF0114 Mifsud W, Vella Briffa B, Bateman A anon Pfam-B_3587 (release 6.5) & Pfam-B_10597 (release 10.0) Family \N 21.50 21.50 23.40 23.00 21.10 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.59 0.71 -4.20 30 1494 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1367 0 351 823 291 119.80 42 68.01 CHANGED llasoRalhl.hhlGlllutlshhlphhhplhchl..........sphtph............sc.spllLsllslIDlhLluslLlhlshGhYEhFlS+lshtpp...................-pPcWLshhshssLKhKLutsIlsI.ulphLcpah ....................................hauSRWLhsPlYhGL.luhlsLs..l+Fh.pElhHll.............sslh.sh.............................sE..s-l.l.Ls.lLuLl..DhsLlusLLlMVhhuGYEsFVSpL-.lscpp......................-c.pWLu+hssssLKsKlAtSIVuISSIHLL+sFh............................ 0 68 180 274 +4847 PF01594 UPF0118 DUF20; Domain of unknown function DUF20 Bashton M, Bateman A anon Pfam-B_495 (release 4.1) Family This transmembrane region is found in putative permeases and predicted transmembrane proteins it has no known function. It is not clear what source suggested that these proteins may be permeases and this information should be treated with caution. 27.20 27.20 27.20 27.20 27.00 27.00 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -11.86 0.70 -5.45 32 12059 2009-09-10 15:51:07 2003-04-07 12:59:11 11 19 4455 0 2730 8340 2223 323.20 19 87.74 CHANGED hhhhhhhhlhhhshhhhhs....lhhshlluhlluhlhpslhphL.ppttht+hlulhllhlhhlshhshhhhhlhs.hhsphtpllpslP............phhsthtshltplttph.hh............t.ht.thsphlsphhspl.....hstlhshhtthsthhlphlhhllhhhahLhctcphhphlhphhPtph+pphpthhpphppt...ltsalhuphlhullhulhshluhhlhsl...aullhullsslhs.lIPhlGuhlshlP.hlhhhhhss...hthlhslhhhhllp.l.spllpPhlhuct........hslpPlhlllullhushlhGhlGhllusPlhsllpshlpthh ..................................................................................................................hh...hhhh.h.h.hh...h.hh...h.h...hht...............sllhsh....lluhhl....sh.....l....h........p.........P...........l................h...............p............h...............l........p................c................h................t................h..............s.................R............s..............l............u...........s..............h.......l...h.........h......l..l.h...l....h.l....l...s.....h....h..l....h....h...l....h.......s.......l.....h...p...p...h.....s....p.........l....h.p..p..ls.............................................p.h..h....p....p.....h...p....p.....h....l....p....p.....l.....t..t.h..t.h...........................................tth..t..t..t..h.....s....p...h.....h...p.....p...h...t..s..p..h....................................hs.t.h....h....s....h....h...s....s....l.......s.......s......h...h....h.......t....h.......l...l...h...h.......l.h....s....h.......a......h....l....h...-....t....p....p....h..........h....p....h....h....h....p......h....h.....s....p....p.........p....t....p....h....t....p....l....h....p....p.h...spt.......lssaltu.ph....lhul...l.....hGl...h....s....h..l.....u......h......h......l........h........G..l.........sa...ul..l..l.ul..l..s..slhs..hlP.h...l...G......s..hl....u.h.l.P...s....s....l..h..s....h....h........h.............s...............s.......................h.t........h.l....h......l....l....l....h....h.h.ll...p..t.l.pu.s.l....l.pPhlhucs............lslpslh.lllu.l.lhu...u.t.......l...h....G........h...h......G.hllulPlhulhtslhp...h.................................................................. 0 829 1727 2265 +4848 PF03715 Noc2 UPF0120; Noc2p family Bateman A anon SWISS-PROT Family At least one member, Noc2p from yeast, is required for a late step in 60S subunit export from the nucleus [2]. It has also been shown to co-precipitate with Nug1p, a nuclear GTPase also required for ribosome nucleus export [1]. This family was formerly known as UPF0120. 19.80 19.80 19.90 22.30 19.30 19.60 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.84 0.70 -5.56 38 404 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 292 0 273 396 6 277.60 32 42.37 CHANGED psshl-slLKssYpual+ss+h.ssh+ohshINFh+NossELau.lD.shuYphuFtaIRQLAIHLRsulssps........c...............-ua+sVYNWQalauLchWucVLuth..........tspposLcsLlYPLVQlslGsh+L.lPospaFPLRhall+uLl+LS.psTusaIPlhPhLhElLsSs......phs+s.s........................+....pu.shcshDFphsl+sspuhLpo+sYp-ulh-plh-LlsEahshaupsIuFPELshPsllpL++ahKps.......+ss+as+plppLlcKlppsupaIpc+Rsp..lsFuPsspspVcsFhp-hc..hcpTPLspYhts.+ .............................................t..hhpslLKthY.shlps......s..+h...ss...spshs..hIsh.hppohsELau.l....D...shuYppuFhaIRQLAlHLRsuhstpp................K...........................................................-sapsVYNWQalpsLchWspVLut.............spps.LpsLlYPLsQ.....lhlGsh+L.lPo..s.paaPLRhphl+sLhpLu....p....so.ssaIPlhshll..E...lLpps.......php+t..s.......................................p..........ps.sh+slsFsshl+hsps..Lps+sap-.ulh-p...............lh-LlhEahsh..hup..sIuFPELshPsllp.......LKpalKps.....................+ssphs.......ptlppLlpKlppNupaIpp+R.pp..lsFuspctttVpta.pphp...ptTPLstaht........................................................ 0 102 161 232 +4849 PF03661 UPF0121 Uncharacterised protein family (UPF0121) Bateman A anon SWISS-PROT Family Uncharacterised integral membrane protein family. 20.60 20.60 20.70 21.80 20.20 19.50 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.58 0.70 -5.18 7 337 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 274 0 234 322 0 229.50 21 81.74 CHANGED .ps..s..s.s....uuhhpalhsN+l-TAhWhuRlhTlahulhalLPhlssp.uhs....hYp+sLlAsAATSALRLHQRLP.pFp..hSRtFLtphhhEDSsHYLlaSllFl.shPlohsllPVhLFulLHusoaopKlLDshG............pNShhhsR....hlshlphppQNIL+hIACsEIhLMPhslhhhFSGpuullhPFlYY+FLshRYSSRRNPYsRshFsElRlslpslAhpspCPshlp+hlhsuIsFlSRLAPssh .................................................................................................ht..........................................h.ah..phhslh.s.sh.h.ah.l..s..h.h....s.......p.....t..tt....hYphu......hl.uss.h.o......u....l...h...l...a..p.....p..h....thp.........h...s....t...t..h...l...t....p...h...l..t....-.-.sspY..L.....hh.uLh.a.l....h....s..h.....l.h..h......u.llPh.hlaSlhHsu.oas.....c.....p.hL.............sh..s.............................tp.s.t.h.tc......lhp...hVppttps....hht.h...s.AssEl........h.....l.h.h.h.h.l.h..h.lh..t......ptSh........l..h.l.Yhp.FL+hRY...pp....ssaspshatphphhl-t.hh.p...tsP.hhtp................h................................................................................. 0 77 126 188 +4850 PF04297 UPF0122 Putative helix-turn-helix protein, YlxM / p13 like Kerrison ND, Finn RD anon COG2739 Family Members of this family are predicted to contain a helix-turn-helix motif, for example residues 37-55 in Mycoplasma mycoides p13 (Swiss:O05290). Genes encoding family members are often part of operons that encode components of the SRP pathway, and this protein may regulate the expression of an operon related to the SRP pathway [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.21 0.72 -3.99 9 1476 2012-10-04 14:01:12 2003-04-07 12:59:11 9 2 1462 5 202 999 31 100.70 44 89.94 CHANGED l-KTh+hshLF-hYtuLLTcKQtsYhpLYYh-DhSLuEIAEcaslSRQAVYDsIKRTpchLppYEpKLpLhpKaphRpclhpclp-ph.p.t...phhc.l ..........................EKs.RMN.hLF-FY.usL...L....T...c...KQ..ps..Yl.E.LYY.h-DaSLuEIAEpasVS....RQAVY....D....N....I.K..R..T.c.....c.l.L.E.-.YEpKLc....lhp....c....a......t.R....p....p....lh...c...pl....t...phhsp..p.........h................................................ 0 92 147 176 +4851 PF03660 PHF5 UPF0123; PHF5-like protein Bateman A, Wood V anon SWISS-PROT Family This family of proteins the superfamily of PHD-finger proteins. At least one example, from mouse, may act as a chromatin-associated protein[1]. The S. pombe ini1 gene is essential, required for splicing [2]. It is localised in the nucleus, but not detected in the nucleolus and can be complemented by human ini1 [2]. 26.70 26.70 27.30 30.50 23.80 26.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.82 0.72 -3.98 19 319 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 281 1 224 212 4 103.40 71 89.84 CHANGED MSRHp.DLlhChKQPGtslGhLC-pCDGKCPlCD...SaV+P..pohVRICD-CSaG.......phss+CIlCG.......s.Gl.....s-AYYChECs+LEKDRDGCPRIlNlGSs+sDh...aap+K+ts ..............Mu+HHP.DLIhCRKQPGlAIGRLCEK.....CDGKCsICD....SYVRP.....sTLVRICDECNaG.......oYQGRCVICG.......GsGl.....SDAYYCpECTh.EKDRD..GCPKIlNLGSo+TDL...FYERKK.hu......................... 0 78 126 188 +4852 PF03658 Ub-RnfH UPF0125; RnfH family Ubiquitin Bateman A, Burroughs AM, Iyer LM, Aravind L anon SWISS-PROT Family A member of the RnfH family of the ubiquitin superfamily. Members of this family strongly co-occur in two distinct gene neighborhood contexts. In one it is associated with a START domain protein, a membrane protein SmpA and the transfer mRNA binding protein SmpB. This association suggests a possible role in the SmpB-tmRNA-based tagging and degadation system of bacteria, which is interesting given that other members of the ubiquitin system are analogously involved in protein-tagging and degradation across eukaryotes and various prokaryotes. The second context in which the RnfH genes are present is in a membrane associated complex involved in transporting electrons for various reductive reactions such as nitrogen fixation [1]. 22.40 22.40 22.70 23.30 22.20 19.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.55 0.72 -4.11 10 1334 2012-10-03 10:59:06 2003-04-07 12:59:11 9 1 1297 2 253 739 157 83.20 51 83.26 CHANGED IcVEVVYAhPc+QhLpclsVs-GsTVc-AIppSGlLphaP-IDLppsKlGIFu+slK..L-ssLKDGDRIEIYRPLlsDPKElRRc ......................................ltVEVsY.A.L.P.-..+Qh.L.hp.lsl.......p-GuTVc-AIcsSGl.L.p.h.......hs-........I........D...L.............s....p...s...K.V.G..IaSRssK.......Lsssl+DGDRVEIYRPLlADPKElRRp........... 0 53 131 196 +4853 PF03458 UPF0126 UPF0126 domain Yeats C anon Yeats C Domain Domain always found as pair in bacterial membrane proteins of unknown function. This domain contains three transmembrane helices. The conserved glycines are suggestive of an ion channel (C. Yeats unpublished obs.). 23.40 23.40 23.80 23.90 23.30 23.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.85 0.72 -4.36 177 6692 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 2470 0 1367 3826 471 80.50 31 75.05 CHANGED lhhhDhlGlssFulsGshhAhp..th......s..h.hssllluhlTulGGGhlRD.lLhsp.h..Phhhh..p.phYs....hsulhuuhl.hhhhhthhh .................hlLDhlGlssFulsGshhAh....ch...ph.........s.....h..hssllhuslTulG.GGhlRD.lL.hsc.h...Plhhh.........c...thYs.............s.s.s.hh.uull.hhhhh...hh.......................... 0 358 812 1115 +4854 PF03673 UPF0128 Uncharacterised protein family (UPF0128) Bateman A anon SWISS-PROT Family The members of this family are about 240 amino acids in length. The proteins are as yet uncharacterised. 20.40 20.40 20.80 20.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.36 0.70 -5.07 5 20 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 0 16 22 0 215.40 49 96.05 CHANGED MLlNTLVFETLGsPEKEREFKlKDLK+WGFDLlLGKlDGc-AYFsSchusREsGDKaocsGKEYEIcElLcELPKNs+LaA+IEhEcGpAYLhsaLREED.pNhPlL+pPAsplLhAFaKK+KLspLlKsl+uVGloT-FaKc+.GlcSlPLPYEELPPVARRFLR-ARKVEK-.sGFGRluFAYFGEs+-K-sRYRLpWLLPTIALFDl-IScKlDKsLuhLK ..l.shhlhEshGss.pERcFKhKsLKtWGaDLhhGpI-GccsYFsuchsc+ctG-p....YspcG+EYclpEsh.cElPKNs+LhA+I.hEcGpsYL..hhaLc-ED.psh.lh+.ssthlLhtFacKcKLspLlKtl+sVGloo-hhKcs.hhcuhPLPYEEhPPhsRRhLR-sRcVc+-.TGFGRhsFtYaGE.cDtptpYRlpWhLPTItLFDl-IApclDKsLuhLc.... 0 6 6 9 +4857 PF03647 Tmemb_14 UPF0136; TMEM14; Transmembrane proteins 14C Bateman A anon Pfam-B_2984 (release 7.0) Family This family of short membrane proteins are as yet uncharacterised. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.05 0.72 -3.59 61 574 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 290 3 347 543 6 92.90 30 64.14 CHANGED phhuhshuuLlssGGlhGYs+sGShsSLhAGlshGslhuhu......uahlp.......spshuhtlul.ssSs.............sLssshshR....hhpop.KhhPsslhhslusshsshhhh .................hhuhsausLlssGGlhGYh+sGShsSLhA.GlhhGslhhhu.............uahhp..........ppshshhl....uL.hsos................................sLssshuhR...........hhp.ot.K.hhPsGlhsshohhhhshh..h...................................... 0 95 186 274 +4858 PF03677 UPF0137 Uncharacterised protein family (UPF0137) Bateman A anon SWISS-PROT Family This family includes GP6-D a virulence plasmid encoded protein. 34.00 34.00 34.50 34.20 32.00 31.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.76 0.70 -4.84 6 77 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 47 0 11 42 3 230.60 45 92.16 CHANGED MupLKp.h-shFKKNppsphEshpKKchpt-.......lhssoLSspEcp+lcpLl..p+YsFcDE.hpccDltulppLouQIKpIp+QpVLLhGE+IhKVR-LL+o..apEssFSuWl.Lsaus+posYNhLsYYELFhsLP-.sLKlEhpplPhpAsYhLASRcGo.E+K.clI+phpG.opophl-Ilc+.hP.l...........ss-p+pppLScphlplL....+llspsscLSp.spslLcpLhcKlp.......spsp ................................MuplKp.h-shFKKs.p...sphpshtK+phphE.......hhssp.LSSpEctphcpLl..EcYsaucE.h.ppDltp....lphLouQlKpIp+QtVLLhGE+IhKVR-lL+s..ap-ssFSuWl.Lsaus+posYNhLsYYELF.sLPc.oL+hEhpplPhpAsYhLASRcGs.ccK.-lIcphpG.opuEllcllc+.hP.h...........ss-c+ppsLupphhphh.....+llpps.s.clSppp...LcpLhcKhp...Ksp............................. 0 5 7 10 +4859 PF03669 UPF0139 Uncharacterised protein family (UPF0139) Bateman A anon SWISS-PROT Family \N 20.90 20.90 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.48 0.72 -4.51 9 201 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 167 0 134 179 1 98.30 39 82.70 CHANGED hssssDPRRss+hpRYKPs..oss......-DhhsDYMNlLGMlFSMCGLMh+hKWCuWlAlhCSsISFANsRs.S-DsKQlhSSFMLSlSAVVMSYLQNPpPhoss ........................h..thsDPR.RssplhpYpsPssps..............-D.ssDYhslLuhlFuMsGlMh..+.hKaCuWhAlhhShhS.aANs...+s....opDs.KQ.h.....h.....SS.F..M.......hSl.AlVhoYLtsstsh......................................... 0 46 71 107 +4860 PF03686 UPF0146 Uncharacterised protein family (UPF0146) Bateman A anon SWISS-PROT Domain The function of this family of proteins is unknown. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.51 0.71 -4.31 6 77 2012-10-10 17:06:42 2003-04-07 12:59:11 8 1 77 1 56 119 15 127.40 30 93.74 CHANGED Ml-lAchIAcEst+G.KVVEVGIGhahcVActLpcpG.hDllAsDIscc...pA..pGlphhhDDlhsPslulYcuAchIYSIRPPPElhssll-lu+tVsAshhIpPLsG-.s...pphKLlNY+Gt.FYth-s ......................h....t.pts..+lVElGlGt.p.hcVAttLp.ct.G..h-.VhssDlptp.............ts....tGlp.....hhhDDlh......s....Ps...h.....plYc..sAclIYS..lRPPsELp.sllcl..A+clsuslllpsLus-t........t.h+lhsapt..hYh............................................................................... 0 10 32 47 +4861 PF03685 UPF0147 Uncharacterised protein family (UPF0147) Bateman A anon SWISS-PROT Family This family of small proteins have no known function. 21.10 21.10 21.20 27.30 21.00 19.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.87 0.72 -4.11 23 121 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 118 5 81 116 72 84.00 45 95.16 CHANGED shtss-cpl+pshthL.pcIlsDsoVPRNIRRAAs-uhctLpscppssuVRAAsAISlLD-ISpDPNMPhHsRThIWsllSpLEol+ ......h..pscpplcpslthL.ppIlpDooVPRNIRRAAs-uhctLpspsps.uVRAAsAIulL--ISpDPNMPhHsRThIWpllStLEol+. 0 23 47 65 +4863 PF03695 UPF0149 Uncharacterised protein family (UPF0149) Bateman A anon SWISS-PROT Domain The protein in this family are about 190 amino acids long. The function of these proteins is unknown. 25.00 25.00 27.50 27.40 20.60 20.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.49 0.71 -4.39 169 2253 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1277 1 349 1168 204 173.10 26 84.88 CHANGED hppLpph..Lp.......ttt.h......shsplcGhLsullsu.....stt..lss.ppWl...shlhs.......................pstt.s..pphhphlh...............phhpphtppL......tp.ts.th...................p.hhspp..ps.......tpspslppWspGFltGh...sl.tppp...................pphssp.......hpthlps...lttlupht......ptt.................spcp..tppthtplhE.hh..sshhl......ap.htttt ...............................................p.lsph..Ls.........ppu.hlssAEhcGllouhlsu......spt...h.s...ppWL...shlas......................p.pu.t.h..pchhphlh...............phhsssuppL.....p-.ts..F......................p.h...hspp..-s..........pcsssltpWspGFhtGlul.htsc......................hspl..ss-................htpulcs.....lt.lup.t.......spcp..................spcp....htpuh-cll-hl+hAsLhlashah.............................................................................................. 0 58 143 260 +4864 PF03681 UPF0150 Uncharacterised protein family (UPF0150) Bateman A anon SWISS-PROT Domain This family of small proteins is uncharacterised. In Swiss:Q9A3L8 this domain is found next to a DNA binding helix-turn-helix domain Pfam:PF01402, which suggests that this is some kind of ligand binding domain. The structure of this domain suggests that these domains oligomerise and due to structural similarities may bind to RNA. The monomer adopts an alpha-beta-beta-beta-alpha fold and forms a homotetramer. Based on the properties and functions of structural homologues of the HB8 monomer, the protein is speculated to be involved in RNA metabolism, including RNA binding and cleavage [1]. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.29 0.72 -4.33 144 2769 2012-10-02 16:06:15 2003-04-07 12:59:11 10 19 1473 7 747 2274 167 47.90 27 43.64 CHANGED pYssllct...p-css....ahsphP-ls....ush....opG-Th-EAhpphp-Alphhlps .................Ysshlp.....tccsu....ahsp..hPDl.s........ush....opG-.Th-EAhppsp-Althhlt............... 0 231 511 642 +4865 PF03692 CxxCxxCC UPF0153; FliB; Putative zinc- or iron-chelating domain Bateman A anon SWISS-PROT Family This family of proteins contains 8 conserved cysteines. It has in the past been annotated as being one of the complex of proteins of the flagellar Fli complex. However this was due to a mis-annotation of the original Salmonella LT2 Genbank entry of 'fliB'. With all its conserved cysteines it is possibly a domain that chelates iron or zinc ions. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.35 0.72 -11.12 0.72 -3.51 35 4094 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 2313 0 1272 3041 657 89.40 24 56.80 CHANGED hCps....C.GtCCh................htlptpDhtcl.thsth......................................tChhLc.tcst...pCpl.....Yp..pR....PpsC+hhPh....................tththhs.sCs .....................................Cps....C....Gt.CCh..........................................hp..l....t.p..-...ht.c.l....th..t...t...h..............................................................................................sC......hhL.s.....tcst...........pCpl.....Yp.....pR......PpsC+hh.h............................................................................................................ 0 397 784 1044 +4866 PF03672 UPF0154 Uncharacterised protein family (UPF0154) Bateman A anon SWISS-PROT Family This family contains a set of short bacterial proteins of unknown function. 20.90 20.90 20.90 21.50 20.80 20.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.03 0.72 -4.35 5 1101 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1097 0 123 358 3 64.60 49 82.30 CHANGED LsIVLuLLlGlhLGaFISpKhMK+hLKKNPPINEstlRhMhtQMGRKPSEsQINQlM+uhpsQp ...................lllVlAL.l...sGh....lG...GFal.ARKhhpcal..pc..NPPlNE-MlRhMMhQMGQKPSp+KlpQhMptMp+Q.t.... 0 36 71 96 +4867 PF03693 RHH_2 Uncharacterised protein family (UPF0156) Bateman A anon SWISS-PROT Family This family of proteins are about 80 amino acids in length and their function is unknown. The proteins contain a conserved GRY motif. This family appears to be related to ribbon-helix-helix DNA-binding proteins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.58 0.72 -3.81 5 1166 2012-10-02 18:44:02 2003-04-07 12:59:11 9 7 788 2 292 822 140 77.80 31 88.88 CHANGED MuKNTSVsLGEHFsuFIDuQVQuGRYGSASEVIRSALRLLE-pETKLcALRsALIEGEcSG-ucsFDhDuFlsE+cpcssp .......................h.olsL.s-chcpFIcshlp..SGcYsotSEVlR-u...L....R......L.L....c....c....+....E....s.....c....l....p....u....L...R.p....h.l..t...pG....h...p....S...G.......s....p...h....p..p.h.........pth.......................................... 0 68 163 215 +4868 PF04229 GrpB UPF0157; GrpB protein Kerrison ND, Finn RD, Eberhardt R anon COG2320 Domain This family has been suggested to belong to the nucleotidyltransferase superfamily [1]. It occurs at the C-terminus of dephospho-CoA kinase (CoaE) in a number of cases, where it plays a role in the proper folding of the enzyme [2]. 20.10 20.10 20.40 24.40 19.70 19.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.88 0.71 -4.53 107 1502 2012-10-02 22:47:23 2003-04-07 12:59:11 9 16 1107 1 300 1132 38 162.00 27 79.16 CHANGED tttlplss.ass.pWsppapptppplpshl....usphl.......plcHIGSTuV......PGLsAKPlIDlhlsVpshpshc..ph.spsLpshGY.h.ht.c.sh.t..............+chFh+......................sp..+stplHlhtt..uss.phpcpLhFRDaLR..scspttppYtplKppLu...tphsp...shptYssuKs..salpclh..pcA .............................t...l.lhsass.pWtppapc.pptltshl....spthl.......................plcHIGSTul......sslsAKP.IIDIhlt..Vp.shp..p.h.s..ph..tctLpt..l.G....Yhh..t...c...shsp..................+h.h.ht+.........................ttttcsh+lHlhth..sst..phpppLhFRDaLp..spsphsppYsplKppLs......tphst.......shppYspsKssalpplhpc..................... 1 98 188 247 +4869 PF03682 UPF0158 Uncharacterised protein family (UPF0158) Bateman A anon SWISS-PROT Family \N 20.40 20.40 20.60 21.10 20.20 19.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.96 0.71 -4.62 3 182 2009-09-11 06:49:59 2003-04-07 12:59:11 8 7 173 0 63 158 12 143.10 24 66.17 CHANGED QNPLlLRlcRLM-AFAKSDDERDFYLDRlEGFllYIDLDKPQsELDAL.pELEENuDRYCLIPKLSFYEoKKIMEsFVNEKVYDIDTKEKLL-IlQSKcAREsFLEFLYDHcoEQEKWQQFYsERSRIRIIEWLRsNcFQFVFEEDLDhP+pLLEpLK+sLFs .............................................................................l.h.pl....A..ht.s...s...th.p..haLDh.pG..lh.hl....s....-......................s.....t..-h.....-....th..............p..-.........l.....-...p.............s......s...-.R.Yh.hlP.phs.......h..p.t.hplMcsFlpp.l..t.-.ch+ppL..hpsl.p.u.+s.u.acpF+chlh.-.a.p..hccWhpapscph+thhh-.WLcpptht...................................................................... 1 29 50 57 +4870 PF03690 UPF0160 Uncharacterised protein family (UPF0160) Bateman A anon SWISS-PROT Family This family of proteins contains a large number of metal binding residues. The patterns are suggestive of a phosphoesterase function. The conserved DHH motif may mean this family is related to Pfam:PF01368. 20.70 20.70 22.10 23.00 19.90 20.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.07 0.70 -5.01 69 570 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 459 0 317 545 50 288.60 36 89.24 CHANGED hpIuTHsGsFHsDEsLAshhL+h.hstat.............sAcllRoRDsphl.spsD..lVlDVGGhYDs...................pppRaDHHQ+sFsthh........p.thsp........+LSSAGLlapHFG+cllpphht.........................hscpp...lphlapplYpsFlcslDAlDNG..lsph...................t...ta..sshoLuuhluphN..PsWs-.t.......t.tsp-ptFtpAhphsup.hhptlcthstshlsA+slVtpAhpptht...sGcIlhL.sp.hsPW+caLaplEp-tth.................h..aVlascs....spWRlpsVsh.pssoFcsRtsLPcsW+GL+D.c-Lsph..........oGIs.GslFsHsuGFIGGscohEusLpMAch.uLp ................................hluTH.sGsFH............sDEsLAshhL+h..ls..tap..............sucl.lRT.RD..s...phl..sps-................lVlDVGG.YDs...................tppRaDHHQ+sFstsh..........tthts.......+LSSAGLlap.HaGcplltphht............................sptp..l..phlapplYp.sFlcslDAhDN.G.ls.h...................t....tat.hshsLushluphN.........PtWsp.t...........................tsp-ptFtpAhphstp.FhptlphhstsalsA+slVt.......pAh.p.p.phph.......p...sGcIlhL...sp..hsPWKc+LaplE..p-..ht...........................hhaVlascp....tspWRlpsVsh.p.so.Fp.sRhsL.Pc.sWRGL+D.cpLsph............oG..Is.GslFsHsuGF..IG.G...scohEuAlphAphuL...................................... 0 126 196 269 +4871 PF03687 UPF0164 Uncharacterised protein family (UPF0164) Bateman A anon SWISS-PROT Family This family of uncharacterised proteins are only found in Treponema pallidum. They contain a putative signal peptide so may be secreted proteins. 25.90 25.90 26.30 26.20 24.40 25.80 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.91 0.70 -5.90 6 67 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 30 0 13 55 29 294.80 33 72.18 CHANGED sl....thhshat+suthssuhuhssslushAuusu.h.otpc.Kthu+.....Rsl.lPoGGRapsLssuFTALAsDASFFEANPAGSAshs+sELuhFHsstlssSHsETLSaVtpsGphGYGsShRsFaP-.shp..hu.p............KspGhlu....IhNhu+tF.utYRFKGlSlGuNlKsGaR......................supp.pHlsVsuDlGLphshsVAKsFuScEPNhalGLuh+NlGhoVKs........................................ssssssts.ssstssHsTsohltlGFAYRPlpaFLFulGlphthNVpslpsss..hhuhuFhh..hp.hsh.sshhhpG...tt.thouGuEhp.ssh+ls ................................h......hh.h..hhhshhsh...ssthsu........stt..p.pt.sc.....t.l.lsoGG..RhphLssuFTALAsDASFFEANPAGSAshscsElu.hFHsstlssSHh-Tluastptsph.G.YGsS...h+hFas..sh....h........................pthGhlu....IhNhu+th.ttaRFtGlSlGsNlKhGaR..................................pupt.pHlslsuDlGLphshsVuKsFuSpEPNhalGlshpNlGholps....................................................sstpsc.shsohlhhuhAYpPlphFLFuhGlph.hNlpsl.tt.......t.....p......p....hhuhuhhh.shphlsh.uuhhhpu....tphRhusGuEhphsphpl........................................................................................................................................... 0 8 12 12 +4872 PF03691 UPF0167 Uncharacterised protein family (UPF0167) Bateman A anon SWISS-PROT Family The proteins in this family are about 200 amino acids long and each contain 3 CXXC motifs. 25.00 25.00 28.60 28.30 24.60 24.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.50 0.71 -4.86 11 334 2009-09-11 05:14:47 2003-04-07 12:59:11 9 1 317 0 27 155 5 175.40 52 89.45 CHANGED hp....LPpF+YHPc..PlsoGuhcps..sssCsCCspspshhYoGshYsh--lp..slCPWCIADGoAAc+a-GsFpDDhslpss................sspphlcElhcRTPGYsuWQQEhWLuHCsDhCAFlGhVGhsEltsL..psl-slhs-ht....sutchpcLhptLs+sGp.hsuYLFpClaCGpahhahDhu .............sh..psLPpF+YHPc..PLpTG.uF-pD..sVpCsCCcQps..slhYoGPhYsh.....-E.l-...aLCPWCIADGSAAcKFsGoFpDDssl-..ss-.......................hP-EhlcELlcRTPGYpGWQ.Q.EaWLuHCGDaCAFlGaV.G.hs-lcDh...Dt.....hssLccDhc......huh+.p.-ltcsLp+sGc...spGYLFRCLHCGKh+LauDFp........................................................................ 0 10 17 22 +4874 PF03666 NPR3 UPF0171; Nitrogen Permease regulator of amino acid transport activity 3 Marshall M anon SWISS-PROT Family This family, also known in yeasts as Rmd11, complexes with NPR2, Pfam:PF06218. This complex heterodimer is responsible for inactivating TORC1. an evolutionarily conserved protein complex that controls cell size via nutritional input signals, specifically, in response to amino acid starvation. 27.50 27.50 27.60 28.20 27.40 27.20 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.54 0.70 -5.95 19 428 2012-10-01 20:21:22 2003-04-07 12:59:11 8 4 229 0 267 413 2 250.80 23 57.72 CHANGED GFsschLsplLsPt+phCNp+FElslDslhFlGhPlahtcsGpW+ppcppp...t.ttp.......................................ts..ppttpstp......................t.csshsMFplVFlhN.P.......hhEhs.+lccMacallp+lulsL+apQs+psYVt+EsctILpl+-...th.cpptthps..........hhppllppSSLA+slp-sacuIspscIAsLplss....hhhShQIPhpsch.pLPp.pl.Phh.uoaLoo...................................ts..p.p-th.p...............pthl..aauLLLLcDspsllp-lts.ssss.....hls...................chl+hhpP....shSlhplup..............................ssslsh.splcphAhHLlYWR+ARlI.PLss+ssYlVSP.Aslp..............................pL.pstptFpppFPshPoLPpFLshLS...tpP+sauslIP...S+-H+slYhphLuWLlRaGaVTQLpTFlalhlspcIKhc .....................................................................................................................................................................................................................................................................................h...h..l..phshhh...p.p.tal.pp.t.h....tp...................................................................................................................h...h......................................................................................................................................................................................h....shhhh..t.............h.................h..............................................hhp..ps.......sh..ht....................................h......h..hs.ahh.htpAhhl..lp...p...hahhss...................................................at..F..s...................L..hlt.hs.................s.......ht..hh.............................................h.hl..hhht.thh...................................... 0 88 131 210 +4875 PF03665 UPF0172 Uncharacterised protein family (UPF0172) Howe K anon SWISS-PROT Family In Chlamydomonas reinhardtii the protein TLA1 (truncated light-harvesting chlorophyll antenna size) apparently regulates genes that define the chlorophyll-a antenna size in the photosynthetic apparatus [1]. This family was formerly known as UPF0172. 24.40 24.40 24.60 25.70 22.60 24.00 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.95 0.71 -4.82 22 258 2012-10-10 14:49:21 2003-04-07 12:59:11 8 3 170 0 158 241 5 183.70 34 92.45 CHANGED hsplplospAYsKhhLHuhKYPpsuVNGlLluc..........................sspssp............lhloDslPLFH..tplsLsPhLElALspl-sasppps.hsIlGYYpANpph.sDss.ssl.At+lu-+luc....phssAsllhlcNpcls.sscsssht..................lap...ppts+Wspsc.t..hhtptppspchlpph...lpsttapplsDFDsHL-chppDWhNppLsp .........................................phclospAYsKhhLHuuKYP.p.sAVNGlLlup.......................................................pp..p..ss..p............lhl.sDslPLFH..tpLsLsPMLEl.ALs.......l-s.a.......s.p....p.......p.......u......hsIsGYYpAN...pph..pD..ss....s..s.l..AtKlAs+Is-..........tas.sA.sllh.lDNp+hs..p.ts..sshh..................lhp......ppss+Wpt..pc........t.....h......hp.p..tps....pp..hsuph...lcsps..appLlDFDsHLDDlppDWhN.tl.............................. 0 63 85 119 +4876 PF02476 US2 US2 family Mian N, Bateman A anon Pfam-B_2256 (release 5.4) Family This is a family of unique short (US) region proteins from the herpesvirus strain. The US2 family have no known function. 25.00 25.00 30.80 29.80 19.50 18.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.97 0.71 -3.65 15 202 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 32 0 0 136 0 150.10 66 66.65 CHANGED hLsSshsss.psaHLWVlGAADLCtPsl-plsss+...RLlssclsssWs.GtsW.lPs.......phtshhTss.........Ws.........Ph.sssps...ltss.sshchhYullss...................s.h.Psssspssspsssps.....spssCsss ....HLNSSLIIN.QPYHLWVLGAADLCKPVFDLIPGPK...RMVYAEIADEF..HKSW.QPPFVCGKLFETIPWTTVE..................HNHPLKLRAAGGEDTVVGECGFSKHSSNS..LV+PPTVKRVIYAVVDPARL.......REIPAPGRPLPRh......R.....PSEGGMRAPRRRSRA..PAPARSTAs..AAs.......................................................................... 0 0 0 0 +4878 PF03683 UPF0175 Uncharacterised protein family (UPF0175) Bateman A anon SWISS-PROT Family This family contains small proteins of unknown function. 26.40 26.40 26.50 26.50 26.30 26.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.23 0.72 -4.51 47 419 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 182 0 161 473 43 71.90 24 80.33 CHANGED plPctlh.shp.s.pth.pEl+htlAlpLYppstlShG+AAclAGl.o+hcFhchLuc+sls.hp..hs.c-LpcDlpss .....................................th...ht...t.t...pch+hthAlpLY...ppsclShupAAclAGh.sch-Fh.phLtccsls.hp...h.s..--lpp-lp..h.................. 0 47 129 154 +4879 PF03698 UPF0180 Uncharacterised protein family (UPF0180) Bateman A anon SWISS-PROT Family The members of this family are small uncharacterised proteins. 22.30 22.30 22.60 27.90 22.10 22.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.42 0.72 -4.14 18 218 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 204 0 53 136 0 78.90 47 96.57 CHANGED hp+.IuVEpuLoslp-tL+p+GY-Vlplc....spp......chpssDssVVTGhDsNhhGIpDssTpu.sVIcAsGhTA-ElsppVEp+lp ......t+IGVEsoLo.cVppALpppGaEVVsLp.....scp.......DspuCDssV...VT...G.......pD........o.N...h..hGI...sD.ssh.cu.sVIsApGhTs-EIsppVEsR..t.................................... 0 25 42 46 +4880 PF03701 UPF0181 Uncharacterised protein family (UPF0181) Bateman A anon SWISS-PROT Family This family contains small proteins of about 50 amino acids of unknown function.\ The family includes YoaH Swiss:P76260. 22.20 22.20 22.70 22.20 21.10 22.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.51 0.72 -4.66 18 721 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 718 0 66 197 1 51.70 70 86.32 CHANGED Mh.sshPuLoHEpQQpAVE+IQcLMucGhSSGEAItlVApElR.Ep+ppcppst ....MF.AGLPSLTHEQQQKAVERIQELMAQGMSSGpAIAlVApELR.AsHoGE+I.VA...... 0 4 20 46 +4881 PF03670 UPF0184 Uncharacterised protein family (UPF0184) Bateman A anon SWISS-PROT Family \N 20.90 20.90 20.90 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.81 0.72 -3.81 3 137 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 118 0 63 90 1 73.80 36 65.06 CHANGED MSGPNGDsshSVEDGups--D-FGppEYEAINSMLDQINSsLDcLEERNDcLpu+L+ELLESNRQsRLEFppQLu.cAPp-uSs ..........................................................................................p...p..hAtlNSpLDQLNSsLD+LE-+...sDHLcupL+pLhp........................pt......................... 0 17 22 38 +4882 PF04050 Upf2 Up-frameshift suppressor 2 Wood V, Finn RD anon Pfam-B_14721 (release 7.3); Family Transcripts harbouring premature signals for translation termination are recognised and rapidly degraded by eukaryotic cells through a pathway known as nonsense-mediated mRNA decay. In Saccharomyces cerevisiae, three trans-acting factors (Upf1 to Upf3) are required for nonsense-mediated mRNA decay [1]. 25.00 25.00 25.00 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.20 0.71 -4.49 16 283 2012-10-11 20:00:59 2003-04-07 12:59:11 9 11 232 2 203 287 0 170.80 27 16.56 CHANGED cussSss--Gh.....---shs-t-c-p-....So-Ec-sstsssppst.....co-uE-EplhVoRpp--hDPEsE..AEFDREF-KMMu..................ESh-SRKaE..++ssFDlPLPM+htscsss.......ssspsssE......tssssssTMsFoLhTK.KGNKQQTRsl-lPSDSohAhuM+sQQpA-pEEQQRIKpLVLN ................................................................................ttts.......ps........-ppt.......p.t...-...p-cp...........ppc.-p.p...p.pt..p....-..ttppp.......................psp.p-.cpp.hh......l...pt...t.t.......p..hs...s.p...t-....t-F.ppth.pKMht....................................E.u..h......p...p.RphE....+h..tt.....h..Dl....s.l.......Ph..phpsptpp...........................................ts..t-..................spstsssshsFslLo+...+GNK.QQ.........h..+plplPssSphAhshhpp.ppA...-p-E+p+lKpLsLp.................................. 0 60 102 165 +4883 PF01255 Prenyltransf UPF0015;UPP_synthetase; Putative undecaprenyl diphosphate synthase Finn RD, Bateman A anon Prosite Family Previously known as uncharacterized protein family UPF0015, a single member of this family Swiss:O82827 has been identified as an undecaprenyl diphosphate synthase [1]. 20.60 20.60 20.90 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.40 0.70 -5.22 94 5931 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 4761 48 1691 4210 2997 213.10 40 85.11 CHANGED I.MDGNtRWA+p+....shs.pttGHptGspslccllchshch..GlchlTlYAFSsENa.pRspp.EVshLhpLhpphl.pc.hpphp...p...psl+l+hlGch.stL.sppltptlpcspptT.p....sss.shpLslAlsYGGRpEIh..........cAs+plh...............................................pth.tspls.pp...................................................lsc..ph....l...pptLhs..ssh.P............s...DLlIRTSGEpRLSNFLLWQsuYuE..laFscshWPDFsttchhpAlppYptR...........pR+FGt .............................................................................................IMDGNGRWA+p+....s.h.s....Rs....h....GH...........+tG...hc.sl.cchlphst.ch....GlchLTlYAFSo...ENW.pRP.pp.EVshLMpLhhphl.cp..l.tph.......p....c.....psl+lch.l.G.ch...s..p..L..sppltctlppu.....p...p..h.T...t......................s.Ns....u.......lpLsl.AhN....Y....GG.RtEIs..........pAs.+pls......................................................................................................pp..s...t..p...u.p....l.p....s...p..c..............................................................................................IsE....ch....l.spaLhs....ssh..P...........cs.DLlIRTSGE...p.Rl..SN.FLLWQhAY...uE...h.............aFo.-..sLW......PDFscpc.hhpA.....ltpapp.R.cRRFG.s.............................................................................. 1 575 1096 1447 +4884 PF00449 Urease_alpha urease; Urease alpha-subunit, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The N-terminal domain is a composite domain and plays a major trimer stabilising role by contacting the catalytic domain of the symmetry related alpha-subunit. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.56 0.71 -3.97 117 1905 2012-10-03 00:45:34 2003-04-07 12:59:11 15 17 1537 48 464 1638 555 117.10 58 20.98 CHANGED hclsRptYAshaGPT....sGD+lRLuDT-LhlEVE+DaT......sY............G-EspFGGGKVIRDGMGQuptsssst......sl..DhVITNAlIlDa.hGIlKADIGIK-G+IsuIGKAGNPDh.sGVs................lllGsuTElI .........................h.ploRptYAshaGPT....sGD+lRLuDTsLhlElE+Dho.............sY.................G-Esp.FGGGKsIRDGMGQuptsst-t.........shDhVITNAlIlDa..h.........G..I..lKADIGIKcG+IsuIGK.AG.NPDl...sGVs................lllGsuTElI.................. 0 139 288 388 +4885 PF00699 Urease_beta Urease beta subunit Bateman A, Griffiths-Jones SR anon Pfam-B_405 (release 2.1) Domain This subunit is known as alpha in Heliobacter. 25.00 25.00 36.20 30.30 19.70 18.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.02 0.72 -4.16 165 1820 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 1525 48 471 1322 409 99.30 54 51.53 CHANGED IPGElh.s...ssGc...IpLNsGR.p..slslpVsNoGDRPlQVGSHYHFhEsNsA.LpF.........DRptAhGhRLsIsAGTAVRFEP.Gpp+pVpLVshuGpRplaGFsuhltGtL ..................lPGElh....h...ts..s.-......IplNsG....+...t..shslpVtNoGDRPlQVGSHaHFaEsNsA.LpF.............DR....ptA.....hGhRLDIsAGTAVRFEP.Gp..p+pVpLVshuGpRplaGFpuhlsG.......................... 0 134 289 393 +4886 PF00547 Urease_gamma urease_gamma; Urease, gamma subunit Bateman A anon SCOP Domain Urease is a nickel-binding enzyme that catalyses the hydrolysis of urea to carbon dioxide and ammonia. 23.90 23.90 24.10 24.70 23.80 23.80 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.08 0.72 -3.97 85 1774 2009-01-15 18:05:59 2003-04-07 12:59:11 13 11 1506 51 477 1161 409 96.50 60 54.59 CHANGED McLoP+Ep-KL.llasAupLAc+R+sRGLKLNaPEAlAlIostlhEG.ARDG.+..........oVA-LMshGpplLsc--VM-GVs-Mlp-lQVEATFPDGTKLVTVHsPI ..................McLTPREpDKL.hlhhAA.lAcRRpuRGLKLNaPEAlAlIostllE..........G.....ARDG.+................oVAELMphG..pplLs.................+-DVM-GVs-MIs-lQVEATFPDGTKLVTVHpPI.............. 0 139 295 399 +4887 PF01774 UreD UreD urease accessory protein Bashton M, Bateman A anon Pfam-B_1109 (release 4.2) Family UreD is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid [2]. UreD is involved in activation of the urease enzyme via the UreD-UreF-UreG-urease complex [1] and is required for urease nickel metallocenter assembly [3]. See also UreF Pfam:PF01730, UreG Pfam:PF01495. 20.40 20.40 22.20 22.00 18.20 18.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.25 0.70 -4.85 163 1645 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 1448 2 449 1350 498 205.80 23 73.12 CHANGED pshllpsu..GGlluG.Dplslslplsssu+shlTT.tuAsKlY+..........................u.sut...............................up.QphplplsssAtL-aLPp-sIhFssuphppphplcL...........sssAphlhhEhls...hGRs..u.tG..................................Epa........shsph.csphclhp.....ss..........c..l.hh.-phtL.psst.........tthss.ssh.........suhsshuollhhu...................ts.......p.th.hptl+..thh.......................t.th.hGs.ohh........sshlllRhLusssps...l+phh ..............................hhllssuGGlluG.Dchplslpl.pss..upshlTo.puAoKlY+.............................s..sst.................................................up.QphplplsssuhL-alPpssIsap.sAchtppsplpL......................pssupllhh-hlshGRs..s.pG..................................Eta........phsthpsphclhh.......cs....................ch...l....hh..-phhL.sssp.............t.hst...shh.................................................tshshhuolhhls.................................................................p.thhptlp....thh.........................t.sh..th.uh.otl............sshlhlRhhu.pst.lpth.h................................................................................................... 0 123 268 370 +4888 PF05194 UreE_C UreE urease accessory protein, C-terminal domain Finn RD anon Pfam-B_6279 (release 6.1) Domain UreE is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid. The C-terminal region of members of this family contains a His rich Nickel binding site. 21.90 21.90 21.90 21.90 21.70 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.28 0.72 -3.88 99 1267 2009-09-11 21:12:50 2003-04-07 12:59:11 7 3 1165 35 255 871 383 91.80 29 53.94 CHANGED sEslhplpu....ss.hpLs.........+hAaHLGNRHlPh......pltss.....tlhlt...tD+VlccMLct...........L.G.....hplpphptPFpPEsGAY..................tt...ctHs..........................HsHs ........................Ecllhlps.....p.sh.hphu.........clAacLGNRHlPs............plpps..........clhl.........hDtll.cchLcp..................................L.G.................hss.pctct..Fpstttsh..............................................sHsH....................................s................................................................................ 0 59 142 201 +4889 PF01730 UreF UreF Bashton M, Bateman A anon Pfam-B_2037 (release 4.1) Family This family consists of the Urease accessory protein UreF. The urease enzyme (urea amidohydrolase) hydrolyses urea into ammonia and carbamic acid [2]. UreF is proposed to modulate the activation process of urease by eliminating the binding of nickel irons to noncarbamylated protein [1]. 20.50 20.50 20.50 23.20 19.70 20.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.52 0.71 -4.08 171 1651 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1481 11 435 1261 476 150.00 22 63.76 CHANGED hVpc..tsslpsWlpshLppshspsDsshLttsac.uhtt.........sDh......ttltplsphhhAtptotEhRtpspphGtuhhclhsp.......................h.tts.....ssh.................................................sasluauhsutthslshppsltuaLauhlpNhlsAAl+llPLGQssuQclLtpLps....h ..........................................................................h.lpcttshttalpthLp...phs.hs-t..hhlt.tsac...Ah..tt.............................sDh.......ttlhclsphhhAp.p..s.cEhRttspphGpph..hcl..htph.....................................htp.thtptp...........sps....................................................................shslshuhhut..............thuls.........hcpsltsahauhspshlpAAlRhlPLGQhsuQcllhplt..h.............................. 1 117 257 356 +4890 PF04115 Ureidogly_hydro Ureidoglycolate hydrolase Wood V, Finn RD anon Pfam-B_9183 (release 7.3); Family Ureidoglycolate hydrolase (EC:3.5.3.19) carried out the third step in the degradation of allantoin. 20.60 20.60 20.60 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.42 0.71 -4.90 81 1067 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 957 8 269 684 700 164.00 40 89.59 CHANGED h.pp..lp..scPLTt-AFAPFG-VI-s.p................uss.......shhINpGpstRaH-lApl-s.s........su+shlSlFcu.pPpsLP...............................hplchlERHPhGSQAFlPlss.ps..................aLVVV.As..ss.............................................................ssss.sph+AFlssssQ....GVNYt+GsWHtsLhsLs....tsucFhVV......DRh.Gs.....us...Nh-Ehhhsp.shtlp .................................................................h..pLpl.PLopEAFusaGDVIEs..p..........................ttc...........hhhIN.sGhspRaH.DLAhl-hht...................psRslISlhRu.pP.ts.hP.................................................lslchLERHPhGoQAFlPhpu.cs...........................................FlVVV.A....s.s..............................................................-tP.c....uslRAFl..ss..GpQ....GVNYc+sVWHH.PLhuhp....pssDFlsl........DRu.us.........NC-..ths.......t.................................. 0 68 139 207 +4891 PF01014 Uricase Uricase Bateman A, Griffiths-Jones SR anon Pfam-B_1333 (release 3.0) Domain \N 25.00 25.00 26.00 25.80 24.30 23.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.72 0.71 -4.09 122 880 2012-10-01 20:59:24 2003-04-07 12:59:11 13 3 380 220 499 906 5 136.40 24 88.46 CHANGED php.GhsslplLKss...........+hosh...pc+lhpssVssphph.....t..t.sh.hpucs....ssh..shsss..................pcsoltsaAt.......ssSlpphhhphupchL.sphsp.lpslplplssp+ahc...l........sh.pG...........................cu.lhtsscp.puhlps ..........................t.hp.GhsslpVlKss...........+.ssh...tccl.hshsVssphph..................p..htt.sh...hpucs......ssh...shscs..................p+sslhsaAt..s.....ssSlp..phhhplupchL..sph..sp..lppsclpls.s.tcahp..h.......................sh.tG...........................t.....scu.lhhsscp.hs.lp............................... 0 154 286 410 +4892 PF01208 URO-D Uroporphyrinogen decarboxylase (URO-D) Finn RD, Bateman A anon Prosite Domain \N 23.90 23.90 23.90 23.90 23.70 23.80 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.88 0.70 -5.52 173 4750 2012-10-01 21:20:02 2003-04-07 12:59:11 12 17 3576 28 1416 3843 3275 317.70 30 91.34 CHANGED ht.........pcthlpAhpG.cs.s-..........+sPlWhMRQAGRhlsEYpth+t..stsah-hs.psP-lsuElolpPhcpas..hDAuIlF.....uDI.ll.scAhGh.plphhp.sp......G..Phlt....pslp..........s....p-...lp..pL...........ph.ts......t..lshVh-ulphl+c....cl.....ss....clPLIGFsG...uPaTLAsYhltGtssp..shpph+phha......pcPchhcp.Llchls-sshpYlpsQl.cAGAp...slplFD....oauuhLuspp....accaslPah.p+lhstlcphh...............thPlIhaspGs.....s...hlpths..c....sG.s-...s.lul...................Dhps.......sl....stsp..........................................ph.....slQGNlDP.shLh....us.ctlcp.......cspchlc...........httttsaIhNLGHG...lsPpsss-slpthl-sl+ph .....................................................................................................................h..pphhlcAhht..p...ss...................hsPlWh...MRQ............AGRhh..........PEYpth+t...ths..hh.ph.s....pss...-..l..ssE...lT.l..pPlcpas....hD.A.A.IlF.............SDI..hs...p.u.h.Gh.sl...th...t..ut..........G......P..hh.p......pslp........s..........htD....lp....pL...................................t..ss..............clshVhculchl+c...........cl......tt..........................cl.P...L.I..G...F..s.G.........u..P..a..T......L.A.s.Y.hl..EGts.........S+sapph....+.t.h.h.a......pcP.p....h...h+t.L.Lc.pl.scssh...t...Y.Lp........s...Q......l....c......A......G...A.p.................u.l.l...FD.............oW...u..G....h....L...s....s.......ps..............a.ppFsh.sah.p+l.l.stlp..ppts............................PlllF.s.p.Gs................u.t....hlp.ths.....p........sG..sD.......s...lG...l.....................D.W..p..s.........s.l......cstc......................................plu..sphsl...Q.G......N.h..D...P......s.h.Lh....u....s........p...t...l.cp..................clppllp...........hststG.alh.NLG....H..G...l...h..s..p...ss..sE...plpthlctV+p.h.............................................................................. 0 553 997 1240 +4893 PF01175 Urocanase Urocanase Finn RD, Bateman A anon Prosite Family \N 23.20 23.20 23.20 23.50 23.10 23.10 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.66 0.70 -6.47 57 1993 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 1787 16 520 1576 698 525.80 53 93.68 CHANGED .l+....As+G.....spLs..sKuWhpEAshRML.NNLDP-VAEcPc-LVVYGGhG+AARsWpsa-tIlcsLccLcsDETLLlQSGKPVGlF+THtsAPRVLIANSNLVPcWAsW-cFpcL-ptGLhMYGQMTAGSWIYIGoQGIlQGTYETFstsuRp+aG....G........sLpG+hhLTuGLGGMGGAQPLAssMsGusslslElDpsRIc+RlcptYLDchscsLD-AlthhccApppt.cslSlGLlGNAA-lhscLlcR.....Glh.....PDlVTDQTSAHDPlp.GYlPtGhol--hpchpp..p-P...pthhptucpShscHVcAMLthpptGs.sFDYGNNIRphAh-t.Gl..csA...........FcaPGFVPAYIRPL.FCcGhGPFRWlALSGDP-DIh+TDptlhELhP-sc+L.............ppWlchAc-+ltFQGLPARICWlGhG......-Rt+hGLtFN-MVtsGELpAPlVIGRDHLDsGSVASP.RETEuMtDGSDAluDWPlLNALlNsAuGAoWVSlHHGGGVGhGhS.HuGhVlVsDGT-tAscRlpRVLssDPuhGVhRHADAGY-tA.hpsA+-.ps........lclPhhp .........................................h.l+A.pGsplp..sK..uW.sEAshRMLhNNLDP-VAEpPc-LVVYGGhG+AARNWpsa-tIlcsLcpLpsDpTLLVQSGKPVGlF+THpsAPRVLIANSsLVPcWAsW-+Fp-L-ppGLh.MYGQMTAGSWIYIGoQ......GIVQGTY.........ETFspsu.RpHas........................G......sLpG+hhLTuGLGGMGG.AQPLAushAGusslslEsDpoRI-hRlcptYl.Dchsss.LD-ALshh..................pctpc.pt.cslSluLhGNAA-lh.cllc+........slp.....sD.llTD..QTSAHDPls.G.YlPtGhohE-hpphtp...p..D..P..pthhctucp..SMspHVcAMLshppt.Gs.sFDYGNNIRphAh-.t..Gl..csA...................FcFPGFVPAYIRPL.FCcGhGPFRWsALSGDP-DIh+TDttsp-.lhs.-.s.c+L............................apWlchAcE+ltFQGLPARIsWlGht...........................pRt+lGLAFNEMV+sGElp.APlVIGRDHLDsGSVASPNRETEuM+DGSDAVuDWslLNALlNoAuGAoWVSLHHGGGVGMGaS.HuGhVlVsDGo-cAscRlpRVLtsDPusG..VhRHuDAGY-hA.lcsApE.ps........lplPh..t............................... 0 165 289 419 +4894 PF02083 Urotensin_II Urotensin II Mian N, Bateman A anon IPR001483 Family \N 18.60 18.60 19.30 19.30 16.80 15.80 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.31 0.73 -6.26 0.73 -4.32 8 70 2009-09-10 16:31:46 2003-04-07 12:59:11 10 1 37 0 27 66 0 11.70 67 10.48 CHANGED ssss-CFWKYCV .....tsps-CFWKYCV 0 2 5 12 +4895 PF02393 US22 US22 like Bashton M, Bateman A, Zhang D, Aravind L anon Pfam-B_1016 (release 5.2) Family US22 proteins have been found across many animal DNA viruses and some vertebrates [3]. The name sake of this family US22 Swiss:P09722 is an early nuclear protein that is secreted from cells [2]. The US22 family may have a role in virus replication and pathogenesis [1]. Domain analysis showed that US22 proteins\ usually contain two copies of conserved modules which is homologous to several other families like SMI1 and SYD (commonly called SUKH superfamily) [3]. Bacterial operon analysis revealed that all bacterial SUKH members function as immunity proteins against various toxins. Thus US22 family is predicted to counter diverse anti-viral responses by interacting with specific host proteins [3]. 35.00 35.00 35.10 35.00 33.50 34.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.58 0.71 -4.10 36 718 2012-10-01 20:46:44 2003-04-07 12:59:11 11 4 53 0 8 635 0 131.60 19 44.51 CHANGED sttshtslpphsccppGpplslstst.....thhlhlsshpshh..............tttthpphttthl.spspphhllGhlsp..s............................................................................................................hsphllllsptGpVYsacs..............cplahlA.sslppFhctGlhphtthh ...................................t...s.ttlpphlpchpGpplsLthPt.....shhlhl.s..stpphh...................stphhpphhpt..hh.s..tsp....th.hslGslsthts....t......................................................................................................................................................................................psphl...llls.ptGpVasac.s.tp..............sp.lahlA.cslppah+.hGlhph....h.......... 0 3 3 8 +4896 PF00577 Usher Outer membrane usher protein Bateman A, Desvaux M, Eberhardt R anon MRC-LMB Genome group and Prosite Family In Gram-negative bacteria the biogenesis of fimbriae (or pili) requires a two- component assembly and transport system which is composed of a periplasmic chaperone and an outer membrane protein which has been termed a molecular 'usher' [1-3]. The usher protein is rather large (from 86 to 100 Kd) and seems to be mainly composed of membrane-spanning beta-sheets, a structure reminiscent of porins. Although the degree of sequence similarity of these proteins is not very high they share a number of characteristics. One of these is the presence of two pairs of cysteines, the first one located in the N-terminal part and the second at the C-terminal extremity that are probably involved in disulphide bonds. The best conserved region is located in the central part of these proteins [4-5]. 18.90 18.90 19.70 19.00 18.60 17.90 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.58 0.70 -5.79 34 6892 2012-10-03 17:14:37 2003-04-07 12:59:11 15 21 937 10 466 4642 66 502.70 31 67.01 CHANGED ahshpsGlNlGsWRLRsss..oaspspsp...........ttpappsphaLpRulspL+ucLslG-shTsuclFDohsFpGspLsSD-sMlPss.RGaAPsl+GIApo.sA+VTlcQNGhlIYpphVPPGPFpIsDl.sssss...GDLpVplpEpDGshppasVPhoolshhhR.Gph+YslsuGchcss......sppppsshFhpuohtaGLstshTlYGGshhup.cYpuhuhGlGtslGshGAlShDsTpupuphsspp.....scpGpSa+hpYsKshstssTslplsuYRYSocsahohs-hl.sp........................phphsp+..sphplsloQsl.us.....usl.lsssppsYW..........tssssspphpsuassshts.lsholshShscsptpppt.....DptlulslSlPhsph.s..............hhuohshspspsupss.psGl.Gshh.cpphsYslptuhsssspps....sshshsapushuplsuuhsasps...pphshulSGuhluasp.Glshupths...sThsllcssG.suGstlss....uspTDhpGhuVlshhssYppNplslDsssLPssl-lppsstpVlPTcGAlVhspF ..................................................................................................hthpsGlNlGsWRlRsps..s.a.spss.sp.......................tpap..ptah....pR....sl.slp.upL.tlG-.s.h..o.s...u...s..l...F.D..o.hs.a.p.GspLtoD.cpMLPss.pGa.APh.l..pG..lA.p...o..sApVolpQNGhhIYpotVsP.GsFtIsDL..sssss.....................GDLpVslcEsDGp..ppa.plPa.uol.P.hh.R.Ghh+Ysl...ssG....ch+st...............sttp.p...ps.Fhpush...aGls......s.shT...........h..Y......G.Gh.......hu...p..pYpuhs..hG.h..Ghs..h.......h.G..A.lShD.sTpup.u......p...h....s.s..tp.............p.pG.....p.....Sh.....R.h..t...YsK...p..h....s...p...o...s..T...sh..p....l.s..u..YR....Y.S.o.p.s.a.hshs-hh.tp.t..............................................p.ths..p..+......sphphslsQ.sl..st........huolhl.o.h..s..p..p.........s.YW..............sss....s..pspph.phu..as.s.....s..h.....t....t.....hs..h..ol......uhs....hsp.....st.....pppt......................-phh...l...sl....Sl.Phsth......................hohs.h..s.p.s..p.p..u..t...s.s.p...p..h.ulsG...shh....s...s...p...h...sYs..lpt.uhsp..p..sspss.........ssushsap.us...h..up..h..ss.......u....h......s....h.....s....p.....s..........p.........p..hs....h....uh......s..........Gul......l..s..a..st..G.ls.h..u..p..hs...........-Thsll.c.......A.....s.G...s..t..s..s........l......p.....sp........sstTs..hh...G...huVlshhosYppN...plslDss.s.l....s.s..s.s-l.pp.s.s.tp.lsP..s.c..GAlshspF............................................................................................................... 0 58 144 319 +4897 PF04871 Uso1_p115_C Uso1 / p115 like vesicle tethering protein, C terminal region Kerrison ND anon Pfam-B_6073 (release 7.6) Family Also known as General vesicular transport factor, Transcytosis associate protein (TAP) and Vesicle docking protein, this myosin-shaped molecule consists of an N-terminal globular head region, a coiled-coil tail which mediates dimerisation, and a short C-terminal acidic region [1]. p115 tethers COP1 vesicles to the Golgi by binding the coiled coil proteins giantin (on the vesicles) and GM130 (on the Golgi), via its C-terminal acidic region. It is required for intercisternal transport in the golgi stack. This family consists of the acidic C-terminus, which binds to the golgins giantin and GM130. p115 is thought to juxtapose two membranes by binding giantin with one acidic region, and GM130 with another [2]. 22.20 22.20 22.30 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.88 0.71 -4.23 6 224 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 191 0 157 229 1 127.70 25 12.86 CHANGED tclpsEuptppshAAKhh-h-s+h.tph.suLtQtpppLcpE.....hKuLpt...s..t...tcsspphsplcslKpcLctE.........u.p-ucsEh-DLLlLLuDp-pKlp+hcu+Lp-LGh-V..--......tD-u.tsp--D--E .........................................................................t...............ttt....-.p.....ph...............s..t.L..p.t.p.pp..hcpc...................hctLp....c...t.....p.....t....thp.pp...h........pptpsp..h.s..t..........L.p.s.t+pc...L..c...t.E................................s.p..c..s..cp....E.-DLLlLLuD.-pKlpphKp+LK-LGp.pVp---........t.p..--p.-cppt....p............................. 0 37 75 124 +4898 PF04869 Uso1_p115_head Uso1 / p115 like vesicle tethering protein, head region Kerrison ND anon Pfam-B_6073 (release 7.6) Family Also known as General vesicular transport factor, Transcytosis associated protein (TAP) and Vesicle docking protein, this myosin-shaped molecule consists of an N-terminal globular head region, a coiled-coil tail which mediates dimerisation, and a short C-terminal acidic region [1]. p115 tethers COP1 vesicles to the Golgi by binding the coiled coil proteins giantin (on the vesicles) and GM130 (on the Golgi), via its C-terminal acidic region. It is required for intercisternal transport in the golgi stack.\ This family consists of part of the head region. The head region is highly conserved, but its function is unknown. It does not seem to be essential for vesicle tethering [1]. The N-terminal part of the head region, not within this family, contains context-detected Armadillo/beta-catenin-like repeats (Pfam:PF00514). 25.00 25.00 27.70 25.80 20.00 20.70 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.77 0.70 -5.67 26 295 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 240 4 205 303 1 289.20 30 29.08 CHANGED GNtphQppFuplsV........s.h-...ss.........s..lsVl.sLLphhLt.ssh...psaDlRsAushClcuYhhsNpphptpFLpptIsuapsss..............................................s.psN....lhssLht..p.ph..sh-PYphWFuullLhHllh-sscs....KphhhpV......t..pGspssGE-slosIQs...........loslLlsslp.spD....................RlslGYLMLLssWLac-.sAVs-FLu-toslpsLls......sppssspsslVpGLsuhLLGlsYEFS.opsSPhsRtcLapLlhpplGp-sYhsKlppl+cpslapchp...psphshs.cp..tLP.........-laFDphFlcLhK-sasRlp+Al ................................................................................................................................st..Q-.Fuplps..................................s...ss.shsslssLLh.hlpp..p.............pshslRsAshhChps..ahhcNppsptpllps..h...lsuphsss..........................................................sthss..........lhssLh................ssDs...hps...Wh...AuVhLhHhl.-sspt....Kc.hhcV..................................phss.s.......G..ptslo...hlQp............hs.shL..p.....t....s.sc...............................................phplGhLhLLssWLhpss.AVscFLp..............psuslthLhtt............spp.s.p..pc.ll..pGLsAhLLGlsh.Fs.sp...s..s................hs+p.....plpplltpRlGp-pahp+lstlpcp.hapchp.........p.phstsp......................................phhFDppFschhKc..uhlh+ul............................................... 0 68 111 170 +4899 PF00582 Usp Universal stress protein family Bateman A, Griffiths-Jones SR, Kerk D, Studholme, DJ anon MRC-LMB Genome group Domain The universal stress protein UspA Swiss:P28242 [1] is a small cytoplasmic bacterial protein whose expression is enhanced when the cell is exposed to stress agents. UspA enhances the rate of cell survival during prolonged exposure to such conditions, and may provide a general "stress endurance" activity. The crystal structure of Haemophilus influenzae UspA [3] reveals an alpha/beta fold similar to that of the Methanococcus jannaschii MJ0577 protein, which binds ATP [2], though UspA lacks ATP-binding activity. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.68 0.71 -3.99 231 22843 2012-10-02 18:00:56 2003-04-07 12:59:11 21 106 4199 94 7308 17012 1965 137.10 18 62.93 CHANGED hhc+llVuhD..soppup.pAlphAhp...hupp..tstlhllaVhsstshtttsthhttttttthttttt..................................t.thhthphhhhtssssptlhphscptss-llVhGspu............hsshpc.hl...lGS.ssppllcpuss..PVll.l+ .........................................................................................................................................................pplllsl..D.........so.pp.....up......p....u....l..c..tAhp...........................hApp........s....u.....p....l...p.l....l....p..l.....h......s......s.........s.....h...t....h......s...t.....t..h....t......h.....t...h...t...t...t..h.t..t.t.t...t.tt.............................................................................h.t.t.h.s.t..h.....p..h..h...l.......t.....t.....u.....s.....s....t.....p....s....l.h..c....h.............s.........p.....p....t.......s..........s.........-.......l.lVhG.sps.............................ts.s..hpp..hh..............lG.S...su..ppl.l.p.....p...u..p..s...sVlll............................................................................... 0 2051 4696 6329 +4900 PF03253 UT Urea transporter Bateman A anon Pfam-B_3193 (release 6.5) Family Members of this family transport urea across membranes. The family includes a bacterial homologue Swiss:Q9S408. 25.00 25.00 26.30 26.30 19.50 22.30 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.92 0.70 -5.53 26 784 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 618 6 188 566 13 275.00 32 84.53 CHANGED phl.....-hlLRGluQVhF.NNPlSGLlILhulhl.....psshhulsullGslhSTLoAhllstc+utIssGLaGaNGsLVGlhluhF.sh........hsh.hhhlhhsuhhssllsuu.LtslhspaclPshThPFslsshLh..lhust+hs.h.st..h........pPssstss.shsthsls..phlpulhhGlGQVahpsNsloGhlhLlulhlsS.lhslaAllGSslGhlsu.LhlusshsslhtGLaGaNslLsslAlGuhFhhhshpotLhulhsslhsshltsulu.hhtslGLPshThPFslsoh.hhLlsssphphhc ........................hlchlL+uhuQVhh.sNshoGLhlLlulhl.....ss.tl.uluuhlGollushhAh.......hls..h......s..c.............s.pl..psGLhGa...NusLsulslslFhst.............phhhlhh.shluohhsshlssA.lp.p.l.....h.p.....a.....clPshThPFllssW.hh....lhh..ssth.phh....sshhh..................hPt......tss.....h.s..ph.ph..................phlpulh.GhuQVahts.s.s.luGllh.llGlhIsShhsulhAlluShluhhhs..hhL.....uu....s...............hss.....It.tGLa..GaNslLsuIAlGshF.t..s.h..p.shlhslh....u.s.lho.sh.lp......huh......sshhtshGlP.shThPFllsoW.lhLhss...................................... 0 43 66 119 +4901 PF01099 Uteroglobin Uterglobin; Uteroglobin family Finn RD, Bateman A anon Prosite Domain Uteroglobin is a homodimer of two identical 70 amino acid polypeptides linked by two disulphide bridges. The precise role of uteroglobin has still to be elucidated [1]. 24.80 24.80 25.10 24.90 23.80 23.30 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.16 0.72 -4.17 32 271 2012-10-01 20:54:19 2003-04-07 12:59:11 12 1 46 12 118 280 0 65.00 24 72.61 CHANGED Cssltphlpthl.s..o.spYct.LppapssstshpAttplKpCsD.phopcs+tpltphhtpIhpS......hC ................Cshh.phlpthl.s..o.stYcthLppasss.tshpAhtplKpCsD.pls.c.s+tpltplhttlh................... 0 9 9 22 +4902 PF03998 Utp11 Utp11 protein Bateman A, Wood V anon Pfam-B_6404 (release 7.3) Family This protein is found to be part of a large ribonucleoprotein complex containing the U3 snoRNA [1]. Depletion of the Utp proteins impedes production of the 18S rRNA, indicating that they are part of the active pre-rRNA processing complex. This large RNP complex has been termed the small subunit (SSU) processome [1]. 25.00 25.00 26.30 25.90 23.60 23.30 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.92 0.70 -4.55 50 346 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 301 0 243 331 3 230.20 32 92.06 CHANGED ++sH+ERuQPpsRp.+hG.lLEK+KDYphRAcDY+cKpppLKtL+cKAtp+NPDEFYatMhssKssc........Ghthtppt.....spthoh-pl..+LhKTQDhsYlcpptps-t+Klc..+hpppL..h..s.tsps................................................+HhlFsDscc-.pphp.tph........................possphlscpps+..p.ptlpt..h...........................t..pphpppptpphppLpp+hpRpcpLpplppchphp+clh..ppt..ppp+lhtsp..............................sssha+a+tpRKR ..............................p+pH+ERuQPttRp.+hG..lLEK+KDYphRAcDa+cKpppL+tL+cKAtp.+NPDEFYatMhss+sps...........Gh+htppp.................................pcpho.-.pl........+Lh+TQDhtYlchptpt-t+Kl-.+LpppL.....th.hsht...sps...................................................................................+HhhFsD.s.cc.......Ehc.p......hp.tp.hh...........................pstsphhsct.sc........phppltp...............................t.tthpphtcppppphppLpp+hpRtppLthhtpchp..hp+thh..........t.t.....phh+h..h.tt.t...............................................t.shahahtpRK+........................................................... 0 84 134 201 +4903 PF04003 Utp12 Dip2/Utp12 Family Wood V, Bateman A anon Pfam-B_10105 (release 7.3) Family This domain is found at the C-terminus of proteins containing WD40 repeats. These proteins are part of the U3 ribonucleoprotein the yeast protein is called Utp12 or DIP2 Swiss:Q12220 [1]. 26.00 26.00 26.00 26.00 25.90 25.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.08 0.72 -4.15 96 902 2009-01-15 18:05:59 2003-04-07 12:59:11 7 30 298 0 660 885 5 107.30 20 13.59 CHANGED s-pphlppll.pslssspIctsltpLPhsal.pLLchlsphhp.pps.....+lphhhtWlphllptHup.lssp...........................plhspLpslpphlppphpplpclhshN.ttLphLtsph ...................................ppphltpsl...pslss.s.plcpslt...pLP.h...........s.....hl.p.LLphlsphhp..pp.s................clphh.ht..WlphlLphH.ushlssp...........................................p.hhstLpsLpphlppphpplp...clhshN.ttLphlht..h.......................................................... 0 207 353 543 +4904 PF04615 Utp14 Utp14 protein Bateman A, Wood V anon Pfam-B_5404 (release 7.4) Family This protein is found to be part of a large ribonucleoprotein complex containing the U3 snoRNA [1]. Depletion of the Utp proteins impedes production of the 18S rRNA, indicating that they are part of the active pre-rRNA processing complex. This large RNP complex has been termed the small subunit (SSU) processome [1]. 21.20 21.20 23.80 21.60 19.70 19.70 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.68 0.70 -13.55 0.70 -6.37 39 482 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 291 0 309 487 11 532.10 22 84.48 CHANGED sS--...............................ps-pp.....--cchpcLhshlssLspcscp......................pp+pttsptptp.sEFsl.....sopst......KLslsDL.lsslp....ssphppstKtLsphppspp.........LcsPLsKcpQ-Rl-RpAAY-popcpls+WpssVppNRcAEpLhFPL....t..sstssshsphhsshpPpT-LEpcltslLpcSsLs.............s-cphsphE.-lphpchohEEh+tRRsELp+hRpLhaRcEtKAKRlKKIKSKsY++lc+Kc+t+...pp.pptLhcsss-tuc--hpch-cpRApERMoLKHKssSKWAKshhp.GhuphDp-sRpuhpE.LppsccLp..cKlpupp.sctspc..ssps.p-s.-cp........t.ps.pththppchpchtt.t..t............stlhshpFMpsuEscc.+cpscpEhctLp......cEhct.-stppc.t.................tppp.GRRpau.....ttpt.sppttppsppphcpthts-cppt.tsc.ptphppt.s........................tpspptpptpcppssscpNsahppspppspssppptpp.................tp.s.t.tpht.t...........ppcpKpppsstschhhspsp.............................sppp---tp.s..........hcpp-lIpc......AFAGDDVV.t-FppEKpcslcc-ssK.-lDhTLPGWGsWuGsGlppp...p..p...++hltKsctl.phcpRKDppLppVIINEK.ps.KKss+a.sspLPaPFco+pQYERSlRhPlG.-WsocpoaQchT+PRVlsKtG.lIcPhctPh ...............................................................................................................tp...................................................................................th.t........h................................................................................................tl.ltph.h..ht..........t...t.....t.hp....h....t..t.ttt....................................lt.h..PL.p........pc.hpRthsht.spp.ht.p.W..hl.........ttc..........putp.l.Fs..................................................h....t..s..ts.hptpl...thh.tst........................................t..c......hthtt...hs..cEhh....+ptc...hthhRtl.....h.ph+u+R.pKIKSKta++lh++t.c.+.............................................pth.......t..ss.........t..s.p.h...t.-htRh.ERhsh+Hp..p....upWA+ph.....hst..c.p.sRttht-...h...thtccLp..p+h..................t.tpt....p.........t..............................................t...h.tt.tt.................................................tt..t..hhpts.ttt.....tt..t.t.t.h......................pp.pt....pt..tt..............................................p.tcp.....t..................t..........t...t..p....tt..............t.......................t.....................................................................................tt..........t..........tt.h.ttt........t...............................................................................ttptp....t...t..p.....t.t...............................................................tt.ptp.t......................................pppthltp......AF.....s...s.D..-l..t-F.p-Ktp...pt....pts...........p.......h.....s.........s..L.PGW.Gp.WsG.s....hp.p......................p+.hh+h..............R+Dtph.pVII.s.E....c..hs..h...............p...tth.sp.lPaPap.p.t.paEtshphPlG.passttshpthhtPpl............hh+.G.lItPhp................................................................................................................... 0 127 183 260 +4905 PF03851 UvdE UV-endonuclease UvdE TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.10 20.10 20.30 20.20 19.90 19.80 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.82 0.70 -5.38 6 508 2012-10-03 05:58:16 2003-04-07 12:59:11 9 6 393 5 182 475 377 265.00 34 70.05 CHANGED hGYVshshtLtsspPshThotTphhphtccEt.............................cpLhclspsNl+shl+hLcaNhuasIphaRLSSslhPhAoHP..chGachhshhsppLpElGclss-ashRlohHPsQFTllsSs+ccVscsAlpDhsYHh+lLcuhtls-p....ulllIHlGGtatsKcssl-cF+cNhtcLPpslKpRlsLENDD+oYTsp-lLslCEchsIPhVhDaHHHsls.....hccssL-.sh.....RIhpTWp+pslp.KlHlSsPtsspshps+p-hhcuchlhsF.pph ..................................................................hGYss..hs..hhL.hs.s.sP...s.tho.hsp.htph.t.cp.c.t.............................c+LpclsppNLcshl+lL+aNhs.a..s..IphaRlS.S.pllPLAoHs........ht...a.....sa.....h.....t...........h...p...pthpclG....ch...spchshRlohHPs........QF.slL.sSsc.c.l..hpsulp-LpYHtchLc.shG.lspp.....shhslHlGGs.Y.G.s...Kctul....-R.FhcNa.p.p.L.....s.p.....pI...+c......plsL...EN.....DD...p..o......a...o.....hc-sL...lscc...h......s..I.PhVaDhHHHhhs..........................pcps.hc.hh...........plhp...TW.........p...........p..........p.sls...s.KhHhSsP+st..t...t.psHschhc.phhhsh...h.............................................................................................. 0 65 119 156 +4906 PF00580 UvrD-helicase UvrD/REP helicase N-terminal domain Bateman A anon MRC-LMB Genome group. Domain The Rep family helicases are composed of four structural domains. The Rep family function as dimers. REP helicases catalyse ATP dependent unwinding of double stranded DNA to single stranded DNA. Swiss:P23478, Swiss:P08394 have large insertions near to the carboxy-terminus relative to other members of the family. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.06 0.70 -5.13 36 18162 2012-10-05 12:31:08 2003-04-07 12:59:11 16 135 4762 24 4265 16492 6532 265.50 23 35.17 CHANGED LNspQppAlpt...hpushLllAGAGoGKT+llspRlsaLlpppt..lsPpp........ILslTFTNKAApEM+cRltp...................hltpt.........................hcthhluTFHohshclL+pphppls.......................hppsFplhDppD..................................phtllcclh......pt.hshstchhp............................................................................................................................................................thpthlsphKsphhpspphtp.................ts.htp.htphappYppphpppsh........................................................................................lDFsDLlhhshp..................................lhpp.............spplhpphpp+a+alLVDEaQDTNthQYpll+hLsspptp...........lhlVGDsDQSIYuaRGAclpNllphpc-as ..............................................................................................................................................................................................................................................Q.t.s..h.............tt.....s....h.....l..l...Au...........A.GoGKT.t...s.l...s...t....+........h....h.h.l...l..............tt.............h.t..s....t.p..........................................ll.slTFTp....t..AAt..E..h.c...pR...ltp..............................................................................................................hh.......................................................................................t.....t...h.hl......t....T.h.....H...u..h...s......p..h....l...p...t........h.......h.s............................................................................................................................h....t..h.p..h...pttp.....................................................................................................................t.h..h..t....thht...............................................t......h............................................................................................................................................................................................................................................................................................................................................h...........h....h...p...t........p.....p...t...h..h..............t.h...t...............................................................t..............t...h...h...t....t....h......h.....p....t......a....p....p....t....h....p....t....t..s..h....................................................................................................................................................................................................................lD.F..s..D...l...l..h.h.s..h..p.......................................................................................hh.pp........................................t.l..h...t...t.h..p...p...p....a...p.hlhl.DEa.QD.o.s...h.Q.h.t.....l....l.....p.....h....l...h.....t...t...t...tp............................................lh..h..V.GDscQuIY...s....a...RGA.p..p...h.t.h.pp................................................................................................................................................................................................................................................................................................................................................................................................... 0 1443 2832 3657 +4907 PF02614 UxaC Glucuronate isomerase Bashton M, Bateman A, Eberhardt R anon COG1904 Domain This is a family of Glucuronate isomerases also known as D-glucuronate isomerase, uronic isomerase, uronate isomerase, or uronic acid isomerase, EC:5.3.1.12. This enzyme catalyses the reactions: D-glucuronate <=> D-fructuronate and D-galacturonate <=> D-tagaturonate. It is not however clear where the experimental evidence for this functional assignment came from and thus this family has no literature reference. 25.00 25.00 25.70 25.50 22.20 20.60 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.57 0.70 -5.87 42 1495 2012-10-03 00:45:34 2003-04-07 12:59:11 9 6 1344 79 293 1024 317 436.20 44 97.60 CHANGED Mp.hhsc.sFLLs.scs...AcpLa+chApshPIhDYHsHLsPp-Ih-s+hapsls-lWLh....GDHYKWRhMRtsGlsEphlTG.susshEKFhAaAcTl.hshuNPlYcWoHLEL+R............hF...Glst.hlspcsAspIWcpsNthLt.s....sshpscplhppuNVchlsTTDDPhDsLchHptls.t.....cpsh..hpVhPuaRPDcslplct.psassalcpLutssshplpshsshhpALcpRhcaFcphGschuDHuls.phhasc.ss..phpthhtph.hssp.hottp.tpapohlhhhLsphhpcpuashQlHlGuhRssNsthhppLGsDsGhDSlsc.s..spsLppLLsphsppstlPKhllYsLNPp.sshhluohhssFp..tt.h..plQhGuuWW.....FsDs.tsGMhcQhpphup.uLhosFlGMLTDSRSFLSY.sRHEYFRRlLCsllGchlE....pGphPs-tphlschVcDIsasNuccaF ..........................................................Mt..Fhs-.sFLLp.s-hA+cLYHcaAcc.PIhDaHCHLsPppIA-.s.hpFcNlsclWLt.............GDHYKWRhM.......Ros.......G.......V..........s..........E.....c.....h.....h........T.....G.......-..........u.....o.....D....hE....K.......FpA.W...ApTl....P...pslGNPL.YHWo.HLEL.+R...............sF.....GIst....lLuspoA-cIasp.sN-h.Ls.p....ssFosRu.lhpp.hNVchluTT..DD.PlDsL-aHtplut.........Dsuas...h+VlPoaRPDKu.hs.....I.-t.ssFsca.l.t.+Luclus.s.s.Ip...passhhp.ALpcRh-aF.st.p.G.C+.sSDHul-...slhasp.......s.........s........-s..clcuIhs+t..luG.p.s..l.......opc.Ehs.pF+oslLlhLut..YtccGWV.Q.hHhGAlRNNNt.chFchLGsDsGaDSIsDps..huptLs+LLsphspps.LPKTILYsLNPp.DNcsluohhGsFQ......stuh.....s.GKlQaGouWW.....FNDp.+-GMpR......QhppLu.phGL....LSpFVGMLTDSRSFLSY.sRHEYFR.RILCphlGchVp....sGEhPs.D..sh..L.uchVpsIsasNAppYF......................................... 0 103 204 246 +4908 PF03786 UxuA D-mannonate dehydratase (UxuA) Finn RD anon COG1312 Family UxuA (this family) and UxuB are required for hexuronate degradation. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.21 0.70 -5.89 10 1615 2012-10-03 05:58:16 2003-04-07 12:59:11 8 7 1348 10 330 1058 320 354.40 48 97.28 CHANGED MchsaRWaGstsDsVoLpcl+Ql.GVcGVVuALacIPs.......G-lWshEcIhchKcpIEsuGLslsVlESVPVHE-IKlGsssR-+YIENYKpTIRNLAcsGI+VlCYNFMPVhDWoRTDLphcLsDGSpALtF-cpclsshsPp..lh+p.sustsas.............................LPGhE.ph.Lsph+phhptY+cIDpEcLa-NLuYFLccIIPVAEEsGVKMAIHPDDPPaP.IaGLPRIVost-shc+llchsDSPsNGIThCoGShGspssNDls-MI+cFuc..RIaFuHlRNlKtp..sscsFhEouHhs..GulDMhslhKAhh-psacG..........hhRPDHG+plaG-p....spPGYuLhsRhhGluYlpGLa-Alpp ....................................Mc.TaR.WYGs.....sDPVoLpclRQh..G.soG.lVoAL.H.c.IPs.........................G.E..l.W..sh-EIhchKthl......E.......s...........s......G......L.pasV.........VE....S..V.P..l..H..E....-....IKtt.s....s....s.h-paItNYppTLRNLApsGIcsVC...YN..F..M..P.V.hDWTRTDLpa...h.sDGS.puLtFDphp..hs.....uh.-hp...ll..c.p...s.u.p.s-as..................................................LPGhE...pth...tL.......sph+ph..L.....thYc..sIscscLR-NhtaFLcsIlPVAEE...sGl+MAlHPDDPPhs...I..hGL.PR.Iloo..h..-Dh.phhlc.s.V..sS..uNGh.ThCTGSh....G...s...c...s..-N........D...L.ss.hl+p.FGs...RIaFsHlRss......p......t....p.............s........s....p....sFaEuuHLs.......GslDMapllKA....ll....-.p.p.act..........shRPDHG..+ph.hsDh..............tspP.GY....uhhuRhhGLu.lpGl..Alp.t................................ 0 117 222 269 +4909 PF03223 V-ATPase_C V-ATPase subunit C Mifsud W anon Pfam-B_2945 (release 6.5) Family \N 23.10 23.10 23.90 29.00 23.00 23.00 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.06 0.70 -5.54 33 459 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 321 1 294 434 5 354.10 38 95.30 CHANGED ahllShPsptsstps.....hpph.hhs.thtsshssstcFslP.-hKlGT.................LDsLlshSD-LsKlDstlEuslpKltphlt-l...psppspltpshhssst................................................................................................................................................................................................................................................................................................................................................................................................................................................................slssYlpp....FpW...spuKYshcp.slppll...-hluppssplDsDl+s+hssYNssKusLpshpRKp...........................oGsLts+sLs-lV+t.....-cFl..cSEY..LsTlLVsVPKs.hp-a.psYEoLoch..............................VVP+Suphls.....pDsEasLasVsLF........................KKsh--FpppsRE+.KalVR.-FsYsEcthppt+pEhschtscccp.h............................................ssLlRhh+ssaS-sFhuWlHlKALRVFVESVLRYGLPssFtuhllpsst...Kst.c+l+phLtphasaLss....suhststcsc..hs.suL...........hsppEYaPYV ...........................................................................ahLlShPsptpstps.......hpp.hpthp...thp.ss..hu.sshcFsIP.-h.K..lGT...................LDsLlshSD-LuKl.DshsEu.....llpKlsphltcl..h.-.ss..........psplpp..phhsN...s...h................................................................................................................................................................................................................................................................................................................................................................................................................................................................slspY.l.p.p....FpW...shuKYsh..c.p.sLppls.................-hlscp.....lspIDsDlKs+hstYN.slKssL.ps.l.pRKp...........................s..G..sLhsRsLs-lVc..................-c...h....V...-SEYLh..TlLV.lVPKtshp..-W.psYEoLssh.......................................VVPRSophls..........cDs-.hsLasVTLF............................+Kss--Fpp+AREp.KFh.lR.-FpYs.Ect....hcpp+cEhs+lts-c+c.h...................................................u..LlRhh+ssaS.....EsFhuWlHl.KAL.RVFVESVL....RYG....LPs..sFtuhllpssp.......................Ks....++l+phLpphataLsu....suhst..hps.....t.....hp.sul.............hspp-YhPaV............................. 0 99 156 232 +4910 PF03179 V-ATPase_G Vacuolar (H+)-ATPase G subunit Mifsud W anon Pfam-B_1274 (release 6.5) Family This family represents the eukaryotic vacuolar (H+)-ATPase (V-ATPase) G subunit. V-ATPases generate an acidic environment in several intracellular compartments. Correspondingly, they are found as membrane-attached proteins in several organelles. They are also found in the plasma membranes of some specialised cells. V-ATPases consist of peripheral (V1) and membrane integral (V0) heteromultimeric complexes. The G subunit is part of the V1 subunit, but is also thought to be strongly attached to the V0 complex. It may be involved in the coupling of ATP degradation to H+ translocation. 22.90 22.90 23.00 22.90 22.70 22.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.38 0.72 -3.74 34 591 2012-10-02 21:03:42 2003-04-07 12:59:11 10 9 372 2 345 587 15 99.90 33 78.74 CHANGED upssGIQpLLpAE+cApchVscARct+scRLKQAKpEApcEI-pYRtp+Ep-F+chcspphGs..pssstpcl-p-TppcIpplppssppp+cpVlphLLphVhsl+ ........................pppGIQpLLpAE+cAtchVsc.A..Rc...........p......+s......cR.......LKQ.AKcEA.....ptEI.-p..YRtp+EcE.Fc.p.hps.pt..........hGu.......ptshspcl-.p-Tptcl..pplppthppp...pcpVlptLlphVhsl..................................... 0 105 184 265 +4911 PF03224 V-ATPase_H_N V-ATPase_H; V-ATPase subunit H Mifsud W anon Pfam-B_2481 (release 6.5) Family The yeast Saccharomyces cerevisiae vacuolar H+-ATPase (V-ATPase) is a multisubunit complex responsible for acidifying organelles. It functions as an ATP dependent proton pump that transports protons across a lipid bilayer. This domain corresponds to the N terminal domain of the H subunit of V-ATPase. The N-terminal domain is required for the activation of the complex whereas the C-terminal domain is required for coupling ATP hydrolysis to proton translocation [3]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.92 0.70 -5.31 37 456 2012-10-11 20:00:59 2003-04-07 12:59:11 9 17 312 1 294 475 12 285.20 24 62.84 CHANGED shlsphtsplRtp.....lsWpuhhcuphlopcshphl+pl-ph..pp.+tp...hl.ppsss.thsphhl......sllppl...pcp-slpYlLsLlsD.lLsps...thtphhhshtp.tp.p....sapshLp.hhppp.Dthls.h...ushllspllstsshptsp..................chL....thhhsh..Lps...hhss................................................................................pssth...........................hslpsLptlLptcpaRthFhp..scslphLhs.lL...................................................................................sspsslQL.YpsllslWlLSFpsphstphhppp..ll.hLscll+pos..KEKlsRlsluslhNLl...spstpt.........................................hhthhl.sphlt.hlppLppR+a....sD--lh-DlphLp-hL ............................................................................................................................................................................................................................................................ht..ttplhtp.lsWpshhput..hlotpphphlpth-ph.ptp..+tp.........hl..pppss..ths.phhl........slhpph.........p+ppslpYlLshls-..hLpps.....ptsphhhp.tt.tp.t..............at.hL.t...hh.s..pt..-.hh..h......uupllsplhshstp..pt..........................psL....phhhsh..lps.......h.p.s...........................................................................................................................ss.sth..............................phsh.psLphh.L.+hpp.hR.hahp...........ssslp.slhs...lL...........................................................................................................................sspsshQl.YphlhslWhLoFpsthst.p.h..p.t..................ll.hlscllptss...KE.KlsRlhlushpNhl..ppstt..........................................hh..h.l.....spl.t..lp.Lpt.p.+.a....sDtDl.cDlp.L.-hL........................................................................................................................... 0 106 168 246 +4912 PF01639 v110 Viral family 110 Bateman A anon Pfam-B_1518 (release 4.1) Family This family of viral proteins is known as the 110 family [1]. The function of members of this family is unknown. The family contains a central cysteine rich region with eight conserved cysteines. Some members of the family contains two copies of the cysteine rich region Swiss:P18560. 25.00 25.00 48.20 48.00 23.80 23.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -11.11 0.72 -4.49 10 107 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 11 0 0 103 0 107.00 40 82.88 CHANGED llsLLup......hsssptsspL.pTpsPPccELcYWCTYscsCcFCWDCQcGICKNKlhss..ssIhcN-.YlpsChVoRahsp..ChY-lst+I..hHsMsCSpP+sas.saclh ...............h.hhLLu.......hh.ththtpL.tTppPPccELpYWCTYscpCcFCW-CpcGICKNKlhcs.h.shIhcNc.altsCpVoRh...sp..ChY.hss.+h..hH.M-CSpPpsap................ 0 0 0 0 +4913 PF03402 V1R Vomeronasal organ pheromone receptor family, V1R Mifsud W anon Pfam-B_3057 (release 6.6) Family This family represents one of two known vomeronasal organ receptor families, the V1R family (after [4]). 25.00 25.00 25.00 25.00 24.60 24.70 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.93 0.70 -4.99 2 2399 2012-10-03 04:04:29 2003-04-07 12:59:11 9 5 78 0 1464 2009 0 245.10 33 86.01 CHANGED KLlstp+s+.hDL.IuhLSLhpLhhLlhhuhIAsDhFhsWttWsshhCpSLlYLaRhhRGLuLssoCLLsVL.sIhLSsRSSCLsKFKHKssHHISsAhL.h.VLYM.hSSHlLVSIIsTPNLTopDFhaVTQ.CSlLPMSY.hpShFSTLhAIR-sFLISLMsLSohYMVALLhRH+KQspHLpuTSLSsKASPEQRATRoIhhLhSLFVlh.lh-plVhpSRhhahsssl.Y.hplhMsH.YATVSPFVFIsTEKHIlp.hcS ..........................................................h......p.pshD..l.Il.tHLsl..sNh.l....hLL..o.p.G...l......t..s.....h..h..h..s..h..p.........h...s.....-.h.s.C.K.h.lh.Yl.hRVu.R.GLulC...oTCLL.SlFQAlTI.S.P..ss.S.p.hup.l.K.s.+.h.s..+....h...l...h...s.s.hh.h...h.W.l...l..s....h.h...ls....h.....s..h..h..h...h..h...h...u...s...p....N....s.....o....s..s....s.....h.....h..h..s..h...p.....a..C.s..h....h...s...h....s....t..h....h..p......h..l..h..s...s..h.....h.s.h....+....DlhF.l.u.LM.shu.S..GYMVh.l.L.aRH+..+p.VpH...l..Hss.s.hSs..+..ssPE.sRAs+oll.hL.V.s.haV...hhY.s..h.s...s...l...h..h......h....h..h........h..p..p...p..s...h..h..h..p....h..p..h..h.....h.s.......s.a.sslsPhl.hl........................t.............................................. 0 7 20 850 +4914 PF02830 V4R V4R domain Bateman A anon [1] Family The V4R (vinyl 4 reductase) domain is a predicted small molecular binding domain, that may bind to hydrocarbons [1]. 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.03 0.72 -4.13 52 616 2012-10-02 19:02:47 2003-04-07 12:59:11 13 22 405 2 314 617 58 62.00 25 18.38 CHANGED hhhph-sshpttshstsscPlCahhtGhhsGhhs........phhs+p..lhhcEspCtupGss...pCpahsc. ......................phcss.hht.th.st..ss...cP.lC..a.hhsGhhsGhhs.........shhu...pp...lhspEs..pCt.ut.Gcs...pCpFhsp........ 0 95 201 264 +4915 PF01496 V_ATPase_I V_ATPase_sub_a; V-type ATPase 116kDa subunit family Bashton M, Bateman A anon Pfam-B_446 (release 4.0) Family This family consists of the 116kDa V-type ATPase (vacuolar (H+)-ATPases) subunits, as well as V-type ATP synthase subunit i. The V-type ATPases family are proton pumps that acidify intracellular compartments in eukaryotic cells for example yeast central vacuoles, clathrin-coated and synaptic vesicles. They have important roles in membrane trafficking processes [1]. The 116kDa subunit (subunit a) in the V-type ATPase is part of the V0 functional domain responsible for proton transport. The a subunit is a transmembrane glycoprotein with multiple putative transmembrane helices it has a hydrophilic amino terminal and a hydrophobic carboxy terminal [1,2]. It has roles in proton transport and assembly of the V-type ATPase complex [1,2]. This subunit is encoded by two homologous gene in yeast VPH1 and STV1 [2]. 26.50 26.50 26.60 26.60 26.30 26.40 hmmbuild -o /dev/null HMM SEED 759 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -13.24 0.70 -6.68 16 3017 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 1209 4 1136 2604 291 408.80 17 89.37 CHANGED lstLt-lGlVphcDLNpcls.shQRchlp-h+Rts-h-+pLthlppplp+ttl.h...........ptthp.sh...p.........ph.chptphpclEscl+plpcshcpLccphppLpEhtphLcpspphhcpt......................cthp............htsl.hs.pthp.......tsphlcl....................hluGslspc+hsshcchLh+ss+G..hht.hplcpsh.-sp......hpsallhhpucphtp+lc+Is-shthphashs-pptt.p-hlpplppclp-lpphLcpspsplcphlsthtcplhsapphlpp-Kslacshshhshss..+slIhEuWsPtc-lsplppsLccssstsu...lsslhs.hcsp-pPPThh+ssKFspsFQsIs-sYG.lspYcElsPuhhhhlTFPFhFulMhGDhGaGllhhLhALhllhtcpphsstp.........aph+YIlllhGlFSlhhGhlYN-hFucshslFtSthths.......................hpt.sl....t.s......YPhulDshapsss..tl...s.hhhthSlllGllHhsaGlhluhhNthph+...p.hslhtshlspllalhsIhGhh.h.hh.ta.h..h...............sPslL.hhl.MFLhs....s........................lQshLlhhulhslPllllhtPlhlhtpthp...........................t.............................h...hG...h.htpsIasIEh..sLGsluphsSYlRLaALuLApupLSsVl.sMshthsh.hts......slhhhllhshhhllshsl.llMpuLSAhLHuLRLHaVEFhuKF.YpGsGhpFpPFuhp .........................................................................................................................................................................................................................h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.ha..............h...h............................................................................................................................................................................Ph..t..........ht...hht...h...st..ths......s..h....a..h...............hauhhhsDhu.uh...h.h..h.h.s...h.h.........h......................................................................................................h.h.........sh.shhh.G.hh.h.s.thhu...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.t.hSa.Rlhs..h....hst.................h...h.........................................................hh....h....h.hhht.h...shlps.RL..............h....................hE..t...............t........................................................................................................................................................ 0 428 699 955 +4916 PF02346 Vac_Fusion Chordopoxvirus fusion protein Bashton M, Bateman A anon Pfam-B_822 (release 5.2) Family This is a family of viral fusion proteins from the chordopoxviruses. Swiss:P26312 a 14-kDa Vaccinia Virus protein has been demonstrated to function as a viral fusion protein mediating cell fusion at endosmomal (low) pH [1]. 22.00 22.00 22.00 22.20 21.80 21.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.81 0.72 -4.54 11 335 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 61 0 0 153 1 42.00 54 24.57 CHANGED .plcslEcRLssLpcpapplhcsC+psscslcRLENHhETLR+uMlsLsKKIDVQTG .......................QRLTNLEKKITNVTTKFEQIEKCCKRNDEV............................... 0 0 0 0 +4917 PF02691 VacA Vacuolating cyotoxin Bashton M, Bateman A anon Pfam-B_436 (release 5.5) Family This family consists of Vacuolating cyotoxin proteins form Proteobacteria. These proteins are an important virulence determinate in H. pylori and induce cytoplasmic vacuolation in a variety of mammalian cell lines [1]. 18.90 18.90 19.30 19.10 17.70 17.70 hmmbuild -o /dev/null HMM SEED 981 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.36 0.70 -13.81 0.70 -6.90 2 1164 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 43 1 2 1174 1 244.10 38 85.28 CHANGED AFFTTVIIPAIVGGIATGsAVGTVSGLLuWGLKQAEpANKsPDKPDKVWRIQAG+GFspFPpKpYDLYKSLLSSKIDGGWDWGNAApHYWlKsGQWNKLEVDMpsAVGTYpLSGLhNFTGGDLDVNMQKATLRLGQFNGNSFTSaKDuAsRTTRVsFsAKNILIDNFlEINNRVGSGAGRKASSTVLTLpuSEtITSpcNAEISLYDGATLNLsS..N.SVcL.GpVWMGRLQYVGAYLAPSYSTIsTSKVpGEhNFpHLsVGDpNAAQAGIIAspKTpIGTLDLWQSAGLsIIsPPEGGYcsKspssP.............QN.......NP.NssQKTElQPTQVIDGPFAGGKDTVVNI.+lNTKADGTl+sGGFKASLoTNAAHLpIGcGGVNLSNQASGRTLLVENLTGNITV-GsLRVNNQVGGhAlAGSSANFEFKAG.DTpNuTATFNNDIpLG+hVNL+VDAHTANFpG.I.hG.......NG...............GhN..TLDFSGVTsKVNINKLhTAuTNVslKNFsIpELlVpTps.ShGpYThFuEsIGspSRIssVpLpTGhpshaSGGVpFKuGcKLVIDEhYauPWNYFDARNlpsVEIs++hh.usPtN.hGpotLMFNNLTLspNAsMDYup..sLTIQGcFhNNQGThNhhVpsG+VATLNsGptAuMhFNN.lDSsTGFYKPLIKINsAQsLhKNpEHVLlKA+.IsYs.Vus.Gss....uhSNsNLpEQFKERLALYNNNNRMDTCVVR..NhsDIKACGMAIGNQSMVNNP-NYKYL.GKAWKNhGIsKTANsopIuV..LG.NSTPTpssssTTNLPTNTTNNARFASYALIKNAPFAHSATPNLVAINQHDFGTIESVFELANRSpDIDTLYANSGAQGRDLLQTLLIDSHDAGYARTMIDATSANEITpQLNsATTTLNNIASLEHKTSuLQTLSLSNAMILNSRLVNLSR+HTNpIDSFAKRLQALKDQRFA ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 1 2 2 +4918 PF03077 VacA2 VACA; Putative vacuolating cytotoxin Griffiths-Jones SR anon Pfam-B_2866 (release 6.4) Family This family contains a number of Helicobacter outer membrane proteins with multiple copies of this small conserved region. 19.50 19.50 20.00 35.50 17.70 18.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.89 0.72 -4.38 10 460 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 51 \N 19 481 0 59.90 41 7.82 CHANGED uhlGYIsGsFcAppI.YITGsltSGNuhuo..GGGAsLsFsuusslslssAslssppsssppS ....hlGYIsGsFpAppI.YITGsltSGNuhso..GGGAslsFsussslslssAshssppsss..S... 1 11 16 19 +4919 PF04333 VacJ VacJ like lipoprotein Kerrison ND anon COG2853 Family VacJ is required for the intercellular spreading of Shigella flexneri. It is attached to the outer membrane by a lipid anchor [1]. 25.00 25.00 34.90 34.60 24.40 24.10 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.37 0.71 -4.86 10 1871 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1652 0 385 1266 1805 200.70 39 75.89 CHANGED sstsppDPLEuFNRshasFN.sslDpall+PlApGYpphVPcsV+sGlsNFhsNLsEPsohlNpLLQGcscpAhpshsRFhlNTThGlGGLlDlAotsG..Lpppscc.FGpTLG+YGVusGPYlhLPlhGPsTlRDssGsllDth..hPhhhhlss......shuhs+hulpsl-sRAphLss..-sLlcsS.DPYlhhRsAYhQp+pa+lp ......s...thsDPhEuaNRsM.asFN.ssLD...Yll+PVAh.uYp..shsPpPsRsG..........lsNFhsNL.p-PsshlNslLQGcstpuhtchsRFhlNT.hhGhGGlhDVA....uh.s.s......Lp+....pcpc..FGpTLGaaGV....ut....GPYlhLPhhGPtTlRDss...GphsDs...h.......P...h...h......h...hsh...................sh.u.h..u...t.....h...s...l.....ps.......l-...sRA.....pL.L.ss......Ds.L.h.c.s..u.s.DPYhhl..R-AYhQ++ch.h.t.................................................................. 0 94 207 299 +4920 PF04294 VanW VanW like protein Kerrison ND, Finn RD anon COG2720 Family Family members include vancomycin resistance protein W (VanW). Genes encoding members of this family have been found in vancomycin resistance gene clusters vanB [1] and vanG [2]. The function of VanW is unknown. 21.70 21.70 22.20 23.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.95 104 1141 2009-01-15 18:05:59 2003-04-07 12:59:11 8 16 702 0 306 1013 69 131.50 36 30.04 CHANGED tsRspNlplAuptlsGsllhPGE........sFSFNphlG.potpp.GYppu.l...l.hsu.....chss....ul.GGGlCQlSoTLasAshhAsLpllERpsHSh.lsYs......Ph....G.....pDATlsas....h.lDh+F+NsostslhIcs....ths.ssp...lssplau ......................................s.sRspNlpluApplsGslltPGE........sFSFNphlG..p.o.tpp.GYp..puhl...l.h.sG.......chsp............uh.GGGlCQ.....lSoTLYsAs..ht.A....s..L...pl..lERpsHShslsYs..........Ph..........G.....pDATlshs.....h..lDl+F+Nssstslhlps......hhs..ssp..lsspla............... 0 143 249 290 +4921 PF02557 VanY D-alanyl-D-alanine carboxypeptidase Bashton M, Bateman A anon COGs Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.72 0.71 -4.31 63 2350 2012-10-02 01:02:30 2003-04-07 12:59:11 12 58 1623 4 473 2508 550 132.00 31 49.45 CHANGED shhlcpcsupuhpph.hpAApcc..Gl....pLh...........h.hSGaRShppQptla..pt.sp..................tpstptstphsA.PGtSEHptGhAlDlussp........h........ptshtpsttscW..............................LpcsAtca.GFhlp..............a.Ppsppp.pGlsYEP...W.......HhRY.lGh ........................................................t...lptpstpshpph...hpA.A....pp...p.....Gh.....plt...............h..sSGa......RSac.pQpp..la.....pthhp................................................ppuppt.stp....h...uA.hP..G.....t.....S.....E..........Hp.....s.....GLAhDlss...s.s..............................................pttht.p.sttspW.......................................................................L..t...c.pA......t...c...a...G.FllR.........................Y..psc...p..p...T.GhtaEP.......WHhRYlG................................................................................................................. 0 157 322 407 +4922 PF04892 VanZ VanZ like family Finn RD anon Pfam-B_5529 (release 7.6) Family This family contains several examples of the VanZ protein, but also contains examples of phosphotransbutyrylases [1]. 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.73 0.71 -4.21 19 3861 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 2101 0 830 2978 740 136.10 18 66.89 CHANGED halhallhlhhhhhFt..h...hh............tshshsLhPhtt...........................shhphhhsllhahPhGhllshhhtphp.....shhpslshuh...hhu...LhhEshQhhhsht.......ssDIsDllhNTlGuhlG.hh....lhhhht+hh.+t .............................................................................................................................hh.h.......................................................hp..h......h.......................................................................hh.h.p..h.hhN..lh....ha....h...P...L.....G..h.....h.....h..h...h....h.h...t..p.ht....................................shh.t..s..l....h...h..u....h......hhu........l.....h...h.EhhQ.h.h.......h.s.ht.............ss-lsDl..hh.N....ol....Guh..l..G.hh.......lhh.hh.......t........................... 0 303 585 720 +4924 PF03490 Varsurf_PPLC Variant-surface-glycoprotein phospholipase C Griffiths-Jones SR anon PRODOM Family \N 20.70 20.70 20.70 21.50 20.60 20.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.66 0.72 -4.29 2 23 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 14 0 5 24 0 46.70 48 14.12 CHANGED FssstWpPQSWMpDhRS.ItchsIsQVhhVGuHsAuoaGlph.SPFGhDAP .......thtWpPQSWMcDLRS.It-huIsQlslsGoHNuuoYGIpptSPhuhDA...... 0 3 4 5 +4925 PF01992 vATP-synt_AC39 ATP synthase (C/AC39) subunit Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the AC39 subunit from vacuolar ATP synthase Swiss:P32366 [1], and the C subunit from archaebacterial ATP synthase [2]. The family also includes subunit C from the Sodium transporting ATP synthase from Enterococcus hirae Swiss:P43456 [3]. 22.20 22.20 22.20 22.60 21.80 22.10 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.84 0.70 -5.32 10 1321 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 1055 5 489 1086 162 301.60 22 94.11 CHANGED YlsARlRuh+u+LLs-pcascLlcscohEph+hhLEs.s-Yushlsslssh..ssptlE+ALppsLAcpachlhclusupl+phlchhLc+aDlcNIpsLI+uKhsstssEEllshhhPhG...sacphptls-usolEEll.ssLcGT.YtcsLpchLu..-h-.pshtlh.ptLhKtYat-hLchsh.....hpuc-pclhcEalchElDh+NlpshLRuKu.sGLosD-lp..lhlpGGpLtc.tLctLtpu-sh-tllutLEGTpYupslp-stpphtts............lpslEcsLcchllchhschuhhpPloVuslluYllpKEpEl+NL+sIA+sttpslcsEcIcc.ll ..........................................................................................................hpshlRshcstl.Lspppapplh.ps...c.s.h.-...............s.h.t.h.hL.ps....osYtt.h.l..sp......t......t...p.th-ttlpppLh.pp.a.phhhp....u..s.t.....tphlp.hhthpa..hcNlt....lllp....up.h....p..t..p....s..h.p..c.......l.h.........h...............s.........h..G...........h.....p.....p.....h.....t....t...l....h....h....s...p.s.....p....-lh...thl..h...........t..s.....h.....tt......hhp...phh........s..........p.............h....-.............p.......h.......s.l.p.h.l.h.s.h.l.h.c.........t..a.h.t..p..h.h.chsp...........t.s.tp..t-...lhpphl.t..hph...Dh.......pslhhhlpu.....ht....p.....ph....s...t.....s...t......ht..plh.sp.tG.p..l..s.c...t.h.h.tls.p..s.p..s..h-.phhsh...l...p...h..s....h.p..hl.hp.s.h.tt..t.................................pslE...c.h.hhpt..h.p..h.t.p.....uhh.tth...s.h...t..s..hh.ualhh+E.El+NlphIhpshtp.p.ht..ppIpphh.h..................................................................................................................................................... 0 184 295 405 +4926 PF01991 vATP-synt_E ATP synthase (E/31 kDa) subunit Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the vacuolar ATP synthase E subunit [1], as well as the archaebacterial ATP synthase E subunit [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.23 0.71 -5.04 17 1053 2012-10-02 21:03:42 2003-04-07 12:59:11 13 6 818 14 504 1038 112 184.00 22 89.44 CHANGED hIppEAc-KAp-IpscA--Ehsh.Kschhcppctplcphhc+tcKQs-hp+phthSshh.puRh+lLps+--llpslhcpscccLtplsc-p..tYpphLcsLlhpulhcLt.EsplllpsRccDhpLlcsh.lsphtpchctpht.ps..hhs-s........................c.shGGVlltstcG+IclsNTl-uRL-hhhpphlspIpctLF ...................................................................I.p-ApccAp.cI.t...cAccEhph..htc.hh....pp..t....p..t...p..h...pp...h...hc...+..tc.+.p.s...........c..h..pp..p....h....p....h....Ssh.t........p.s....Rh.....c...lLp.s..+.p-lls........clhp.c....u...........pcc.L.t.p.l..........s......p.........s..........p.....................t...........Y.p.phLpsL..lh..pu....h....h....p.......lt.....c...s..p.h......h......l.h..s.pp...pDhp...l...l.c.ph..h..th..tt......htt......th......t............t.................h.....h.....s-p......................................p.ht.G..GlhlhstsupIplssTh-shlchhh.pphhspltthLF.............................................................................. 0 194 321 422 +4927 PF01505 Vault Major Vault Protein repeat Bateman A anon Bateman A Repeat The vault is a ubiquitous and highly conserved ribonucleoprotein particle of approximately 13 mDa of unknown function [1]. This family corresponds to a repeat found in the amino terminal half of the major vault protein. 20.00 8.30 22.70 8.50 19.30 8.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -7.95 0.72 -3.90 85 662 2009-01-15 18:05:59 2003-04-07 12:59:11 13 12 77 227 332 619 5 43.50 31 23.60 CHANGED hhVLspspALtlcAhpsa....pD.tp.....s....................RhsG-cWllpu..P..tsYlPs ....................hllspspALpL+Alpsa.........pD..tp.......uhp..............................Rh..sG-cWLlpu...s......tsYlP..................... 1 173 214 257 +4928 PF01847 VHL von Hippel-Lindau disease tumour suppressor protein Bateman A anon Swiss-Prot Domain VHL forms a ternary complex with the elonginB Swiss:O44226 and elonginC Swiss:O13292 proteins. This complex binds Cul2, which then is involved in regulation of vascular endothelial growth factor Swiss:P15692 mRNA. 20.00 20.00 20.10 20.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.67 2 136 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 97 14 79 139 2 121.10 33 56.67 CHANGED RPRPVLRSVNSREPSQVIFCNRSPRVVLPlWLNFDGEPQPYPhLPPGTGRRIHSYRGHLWLFRDAGTHDGLLVNQTELFVPSLNVDGQPIFANITLPVYTLKERCLQVVRSLVKPENYRRLDIVRSLYEDLEDaPsVpKDlpRLoQE+ltpQ+ht- ..........................pS.tst..s.Vh.FsNp.osRsV..shWlsapGc..p.Y.ss..LtPGpth...clpTYhsH.WlFR..Ds.h..Tt..-..th.hV.pp..pc.............lahP.........................t.............t.....t............................s...............IphP..hh....oL+c....s.h..h.l.t...hh.t.....t.............................................................................................................. 0 25 39 61 +4929 PF02209 VHP Villin headpiece domain SMART anon Alignment kindly provided by SMART Domain \N 21.70 21.70 23.60 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.70 0.72 -4.26 15 900 2009-01-15 18:05:59 2003-04-07 12:59:11 14 43 134 38 458 817 14 35.90 47 4.18 CHANGED YLo---FpplFuMo+pEFhcLPtWKpppLKKchtLF ...aLSsE.-F.......p........plF.GMohpEFspLPhWKpspLKKcttLF... 0 137 202 310 +4930 PF04702 Vicilin_N Vicilin N terminal region Kerrison ND anon DOMO:DM04811; Family This region is found in plant seed storage proteins, N-terminal to the Cupin domain (Pfam:PF00190). In Macadamia integrifolia (Swiss:Q9SPL4), this region is processed into peptides of approximately 50 amino acids containing a C-X-X-X-C-(10-12)X-C-X-X-X-C motif. These peptides exhibit antimicrobial activity in vitro [1]. 26.70 26.70 28.60 28.60 26.60 25.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.99 0.71 -12.08 0.71 -4.59 4 32 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 13 0 1 40 0 123.70 35 28.90 CHANGED sGR+s.-DDPppcYEpCpRRCc.-spGp+EQppCEcRCcpphcEcpppp....EDPQppYppCppcCpppE...R.h.pCpQpC.cpaEpp.ppp..............................pRQap-CQpRCppQEQtPccpQQC.RcCREQYpE.p.+Gcc-ph.....ctccpcoEEG .......................thppC.p.Cp..tpt..pQppCpppCcppht.Ecpppp....................c......DPpppYcpCppcCpppc....c.p.p.CpppCpcpaEpc.ppp......................sp+pac-CQp+CppQEQtscpppQC.pcCccpYpEp.h+t.tc..pp.....tpctct-E.t................................. 0 0 0 1 +4931 PF00559 Vif Retroviral Vif (Viral infectivity) protein Bateman A anon Swiss-Prot Family Human immunodeficiency virus type 1 (HIV-1) Vif is required for productive infection of T lymphocytes and macrophages. Virions produced in the absence of Vif have abnormal core morphology and those produced in primary T cells carry immature core proteins and low levels of mature capsid. 25.00 25.00 28.30 28.30 21.70 21.70 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.46 0.71 -4.41 10 6238 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 93 4 0 6291 0 184.00 79 99.34 CHANGED pEKcWlVhlTW+VPc.RIc+W+SLVKYhhYhoKcLptWpY.hHHapssas.a.TsSRllIPL.uc.uc.LcVssYWpL.TPE+GWLSoYAVuIpW...hpcpYhT-VDPssADpLIHspYFsCFo-sAIR+AIRGc+llshCpFPcGHK.QVs...SLQYLALhAl...lpp+pp+scssss+phsccptsshphA+pc.tpspppsGp ............MENRWQVMIVWQV.DRMRIRTWpSLVK.HHMYl.S.KK.AKs...WhY.RHHYESpHP.+lSSEVHIPL..G.........D....A.....+........LVIpTYWGLHT...G.....E......RDWHL.G...Q.GVSIE.W............Rp+...RYSTQ.VDPsLADQLIHLaYFDCFS.-SA....IRpA.ILG+l..V..pP..RC.E.YQAGHN........K.VG......SLQYLALsAL..................ls.P..K.K..h.....K..P...PLPSV.p..K.L.TEDR.W..NKP.QKTKGHRGSHTMNGH.................................................... 0 0 0 0 +4932 PF01044 Vinculin Vinculin family Bateman A anon Pfam-B_1420 (release 3.0) Family \N 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 968 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.98 0.70 -13.84 0.70 -6.55 10 1199 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 103 65 681 1153 3 334.80 14 87.69 CHANGED .h+T+TlEplLEPlspQVopLV.h+p...uphctstshcLossVAuVppAssNhlchGc-hspposD..Lcc-MssAlpcVcssuchhcpAupphpcDPhSostRsphlcuARulLSusocLLlhhDpu-V+KIlcss+tVpDhLssscsspoh-DLsshhKsluPshs+lschlscRQQELs+.pHR-.LlsuhsslKchuPlLloAhKsalcp....P..slpEAtcNRsalsccMs-tlNpI.cVLQhTo...........oshp+shuuhuscLssAL-hLccs..........hI...lDsushsptRstPp.ccclcullSssutMADu.CshcsRtp......ths..........................................................................................................AEss.............t.................................................................................................................................................sscQALpsLloEhtpsAspps......chssLsssID+.........hptcsc................DL+cplR+AlsD+VSDsFhDToTPLhlLlEAA+u.....G+EcshcE+ApsFp-HAs+LspsApLusuhu..sNccsVchlptoAsQl-sLsPQVIsAA+ILhppPsSKsApEph-shKpQWtDpV+hLTstVD-hTsscDFLssSEsHIhcDls+ChhAlps.........t-scsLsssAuuIttRusRVlhVscsEhDN.EsshaTE+VppAschLcsslsPhhs-ttplAsNstcss....sspsac-s..chlsAsRhVtDAlp..........................................................sIpculLM..pcsPs-lsssophc.....pED..sssRssssscsss......csssssEoscE..tplh.EcKsplstQhpshhtstppLcpElsKWsspGNDIIuhAK+MshlMhEMo-hsRGcG..tTppDlIssAKcIA-Auscls+LA+plAcQCsDpph+psLLthhpRIshhspQLpIhSpVKAshhshus.........Elsspsh-.usppLlpsApNLMpSVhpTV+AA.sAShKhRoc.......uusplpWhhKsPhpp ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 189 244 434 +4933 PF02236 Viral_DNA_bi Vir_DNA_binding; Viral DNA-binding protein, all alpha domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_1651 (release 5.2) Domain This family represents a domain of the viral DNA- binding protein, a multi functional protein involved in DNA replication and transcription control. 23.50 23.50 23.70 38.00 21.60 23.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.80 0.72 -3.82 10 131 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 98 7 0 139 0 87.80 45 18.36 CHANGED PlVSAWp+uMElhstLhE+Y+VDsc...sh+hLP-pu..psa+KlspsaLNEc+hsl.LTFSSpKTFsslMGRFLtualpsaAGltsspas .........PlVSuWEKGM-sMssLME+Y+VDss.+sua+LMP-Qs..ElapKlC.poWlNEE+RGlpLTFoopKoFoshMGRFLpualhuauGIuppsWE....... 0 0 0 0 +4934 PF03728 Viral_DNA_Zn_bi Vir_DNA_Zn_bind; Viral DNA-binding protein, zinc binding domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_1651 (release 5.2) Domain This family represents the zinc binding domain of the viral DNA- binding protein, a multi functional protein involved in DNA replication and transcription control. Two copies of this domain are found at the C-terminus of many members of the family. 25.00 25.00 64.00 39.20 21.80 17.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.66 0.71 -4.03 22 260 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 98 14 0 276 0 122.10 35 50.61 CHANGED stCpshh+tsst...G.c.hChhsshhlspspsl-hssso.-suhtAhhc.P.shsh.ssN.htRssht.......hpNsDh+lss.Dlhp...shphspp.hu.sas-s.....hhsphtapp.thpappshhPpsp .....tCss.p+s....ss.s......G..+.hChhsshhlspscsl-hDssS.-..s.u.hA..hcpP..hsh.spN.hhRNsht..................hsNsDh+hss.Dhhs...s.QhSp+.hu.hFsEs.........shsphtapp.hhtahpsshPsh............ 0 0 0 0 +4935 PF00426 VP4_haemagglut VP4; Outer Capsid protein VP4 (Hemagglutinin) Finn RD anon Pfam-B_161 (release 1.0) Family \N 19.80 19.80 19.80 20.70 19.40 19.60 hmmbuild -o /dev/null HMM SEED 776 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.36 0.70 -6.57 9 3505 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 491 35 0 2781 0 337.90 67 99.90 CHANGED MASLIYRQLLsNSYoV-LSDEIpsIGopKopNVTVNPGPFAQTsYAPVsWGsGElsDSTsVpPsLDGPY.....QPoohp.PssYWhLlsPsssGVVhEu...TssoshWhAslLlEPNVppssRpYslhGpshQlsVsNsSpo+WKFh-hhKsossusasphsoLhos.+LtuhhKps.splasYpG-oPsuspshhssss.hsslshshps-FYIIPcSQputCsEYIpNGLPPIQNTRNlVPluluSRsIh..RAQsNEDIVISKTSLWKEhQYNRDIlIRFKFuNoIIKuGGLGYKWSEISFKssNYQYoYpRDGEpVsAHTTCSVNGVN-FuYNGGSLPTDFsISRYEVIKENSYVYlDYWDDSQAFRNMVYVRSLAAN.LNsVhCoGGsYsFtLPVGsWPVMpGGAVoLphAGVTLSTQFTDFVSLNSLRFRF+LoVpEPsFoIoRTRsopLY..GLPAuNPNsspEYYEhAGRFSLISLVPSNDDYQTPIhNSVTVRQDLERQLs-LREEFNsLSQEIAhSQLIDLALLPLDMFSMFSGIKSTl-AsKSMATsVMKKFKpSpLAsSlSpLTcuLSDAASSlSRuoSlRSs.SssSsWTslSpplosssssspslSTQsSsIS++LRLKEhsTQT-G.MsFDDISAAVLKTKIDKSsplstssLP-IITEuSEKFIPsRuYRlIccDpVaEsos-G+aFAYKV-TFEElPFDl-KFA-LVTDSPVISAIIDFKTLKNLNDNYGITRcQAhNLLRSDP+VLRsFINQNNPII+NRIEQLIhQCRL ................................................................................................................................................................................psYsVsL.DEIppIGSEK...o.QNV.TlNPGPFAQTpYAPVNWGHGEINDSTTVEPlLDGPY.....QPTTFsPPsDYWILIsS...NTsG...VV.YES...TNNSDFWTAVIAVE....PHVssssRQYslF..GENKQFNVc....NsS.sKWKFlEMF+u....SuQs-FhsR...RTL.TSDT+LsGlLKYG.GRlWTFHGETP+AT..TD.u..Ss.TuN...LssloIsIHo..........EFYIIPRSQES..KCsEYINNGLPPI..Q....NTRNVVPlSLSSR.SIpYp.....RAQ.....VN.E.DIsI..SKTSLWKEMQYNRDIhI+h............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +4936 PF03225 Viral_Hsp90 Vir_Hsp90; Viral heat shock protein Hsp90 homologue Mifsud W anon Pfam-B_2880 (release 6.5) Family \N 25.00 25.00 56.60 56.60 22.90 20.10 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.68 0.70 -6.21 17 124 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 41 0 0 124 0 433.40 32 96.48 CHANGED ssphhtchhphhatcs-hccphpchhshlhpp.tp.sp..ats..t...h.sasushhlpssplhsssssshEhhclhllahhhlp.shhc+osasscshh.sslshtpshtphc..ahshsl.......cshGCpaohpDlpp.....phss.st..hth.plaplsNShG+Llshp-lpstshhsF.plssspsssslssshssNchhhpClplacp...hssspusts+hphppchhcshlshLtspp..hpphscNP.LlhuhhhshhpphsspssuFccNhcslc.lpssahshlcclFthshphs..-scLhhs..lspspls.......pllsp.hhluphlc..pshhs.sss.ssLscclDthlsc.hl.p.ht...t.spshlhsuhLal.....aGpapTNtpRh.ppPpphphshpt......pshphphSslps..lhsplppphPch.......NlhRtWsstRuscAhtlF+shs..FpPhhasplPslhsYMpFDFaKtlshptloc-EhpshpsL+h.s...............................cs+oststpsppchcshlh ........................................................t.p..h.phhphhhtct.hpphhpp.hp.h.tp...................phtssh.hpttph..ss.ss.p..phhllh...h...th.phsshs.tshh..thp...p........hhshs........pp.GphashsD..Vcshhh....s.sss-h..sts-psasLSNSsG+LlsssElcshc...shsF....spshcs.sssssssphpsaLshCls...Lacc.....pshpssshhslhh.hasslhpsLsohs.hhcphscNP.LlsGhll-hhhc.pVahsoFcsNl-sl+hh.puphhsllhslashspspc...D-+lLhs...hsss-hsht..shlslhDshlllGs.lRpl-hhsp.ssshssLsp+lDhhlss.............sssc..Lpsshhas......a.uhapss.pRh.ppst.hph..t.......t.h.hphsslpt..hhs.lpp...ph.......shhRtahut+uphAhpla+phs..F.Ph...plPshhsah.hDFaKtls.shlotcEh.tLpplc..s...............................c..................th.......................................... 0 0 0 0 +4937 PF04530 Viral_Beta_CD Viral Beta C/D like family Waterfield DI, Finn RD anon Pfam-B_4973 (release 7.5) Family Family of ssRNA positive-strand viral proteins. Conserved region found in the Beta C and Beta D transcripts. 23.40 23.20 23.70 54.40 23.30 23.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.85 0.71 -3.95 12 26 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 16 0 0 26 0 117.70 38 71.27 CHANGED h.Hs.sCsCspC..sspsp.hsus.p........pssss.sEpsspt.sh.sshhsppYhhllsslhll..h.hhlYlh....sssssssssYaYQDLNSVclchG..sPlDP-VItAIHHaQcaPFGpsPth IhHS.sCsCscCphssssohsssspp.........sssosVEpTsct.hh.hsshsspYllhlsslsll....hslYhh....sss.ssshshFaQDLNuVpIchuh.sPlDPcVItAlHHWQ+hPFGcsPt... 0 0 0 0 +4938 PF00729 Viral_coat Viral coat protein (S domain) Bateman A, Griffiths-Jones SR anon Pfam-B_870 (release 2.1) Domain \N 23.00 23.00 24.00 24.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.15 0.71 -5.01 13 577 2012-10-04 01:49:40 2003-04-07 12:59:11 13 2 91 43 0 572 4 198.00 26 66.94 CHANGED psGuIhA..PVAhuph.lpsppP+hp.pspG....SsploHpEllsslssoou..hphsss.ss.t.phplNPhNsulFsWLsslAsNa-pYpFsSlplpYlPhCsoTosGRVAhhFDcDupDs.PssRspLushsthspoAPWs................tssLslPsD.......stcRFssDouosDtKLlDlGQ..........lllATYG..uuussslGDlFlpYoVphhpPpsos .....................................................t.....hA..PVA.upp.ls..shsPt.h...poss........sh.sl.ppsEhlss.lpp.....oss......hshp.........saslsPhNs....t..hspl.slApsYsha+hTphclhYlPps....u......s.sss......Gp.lthsa.hD...huDsh.Pssps...p...h...uphushlsousWt.....................tssls..h..s.hD...........t..h.a.t..s......s......p.lh.s.st..............hhh.s.u........ts......hGplhhphtl.h.......s................................................................................. 0 0 0 0 +4939 PF00747 Viral_DNA_bp viral_DNA_bp; ssDNA binding protein Bateman A anon Pfam-B_490 (release 2.1) Family This protein is found in herpesviruses and is needed for replication. 25.00 25.00 40.90 40.80 19.70 19.40 hmmbuild -o /dev/null HMM SEED 1122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.80 0.70 -13.80 0.70 -7.34 11 204 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 108 2 0 187 0 910.50 32 95.55 CHANGED Mpsc.....csssh..uPluPsuYlYhhtpsschh-hluhLShpsssSslslsPLLhsLTVEssFss....sV+o.hsshsG.ullhKlTo..FpPssahFHssctlss.sps..sLs+LC-cARp+FGhpuaps...ss+spTshtsLCsslGhsPspslhalVVspGFKEhlasGpLlsphttsspVplsss-ul+IPLYPhpLF...spphhs-..st.thshsctFl..psFYsssLSchLFhhVaTuhu.ALRhpsscsllcAuh+QhlpDsppssKLsPcKsapuasupphsu...........h-ps.........chMLsDshssELAhSasulah-ulY-.ssshsas-WPllpsu-spts+lsALssaph+LusHVuAhlFSsNSlLYhoclshhssscppssos....p.ohh+hhphssuhsusscps.pDu+tl.chsG.....ssupspcaospHLAhhCGhuPpLLu+hlaYLpRssshsssspups-lhpalss..soushCshCsGcsspoChpTshhRl+sRhPthspss++-PsVhsshSphYuDsDlLGNauph.sl-pc-s.scsu+ss............................................th..sol-RhhhhtphhcYscKt...phlssssGc..shslps+psFlsllsslppslDspVspFhpslhcsps...R-tltsuspuhslshsPaAsuFsPlhsahaaRolLuVlQsLAL.pspuahsDpPlpGpphscWhhppFQolhssFhsshhc+GFLss+slKlspssstsphhshpshtp.Gphstpsh-s+luRlSltsl+slRVKNRslapuups..shupsah++s.........sc++ps.lpGsLuFLLhpaHcpLFPss+hs......sL.FWpplppNphP.ssslhshE-hsshlpFlhphos-Ys-hsllDlsPsslhsaschhhpNplLphhGapsYhhohhshhsthopps.s..aPslLhpps..phuSstEash+spthpssuhssshTushoppshl+slhspRslVoluloIpKYsGhsGNcclFQhGplGahsGuGV-RNLsPss.u.....tshpFMR.p+allAT.hsshll++.sptssssa-s-ll+ptlhhIl..-utshssss.lhslscslusRsp..p.shDDhLahlDs.EhLAcSlhcphsplh-pGst-aSl-shpcVhcsstp..sth....suutsa-Fuuh ............................p.s....sssGPhualYhh.htt..hc.luhLuhpstss.ssslhPLlhGLTVEtsF.h....ssts.hpshsu.ul.hKlos...ats.shhFHssptl..s.sps...slpchCctARpcFGhpsats....s.th.TshttlCttlshpspphhhalVlspuhKEhlahsphh...phtthtpVpls.ttpsh+lPlYs.pL.F................th..p..t.......hshtpth...hsFas.slschLa.hlhsshu.uLRhppspslhcuuhc.hhc-stpsshLssphpahshtspt.ps.............s....................phhlssshss-hAhuhtshh.tusa..-...t...shppWPlhpstps.t............shhpALsta.h+hush.luAhlFSsNSsLY.ocls.hssss.tstu....................t.shh+.h.h.sshhs.hs.ps.p-u+hh...thps...............hshtspcas.pHLAhhCuhsPtLluphhaYLpRsshhths..hptp..shhpalss....s..ss..CphCttpsptsChpTshhRlctRhP.hstt.+tt.hVhsshsp.asDsDhLGsauth.hhcpt-s....sscs..................................................sh-Rhhh.....phhcp......h.s..ss.......u.p....p..lp.....sttsFhthhsslpphl-pps.phhpslhctpp....R-tlttuspshslshsPYu.u.sPlhthhhhRohhsllQslAL.pstshhhspshpsp.hp....tpFQslhtthhhs..ptGFlss+phpsshsp.hshhhs..hhtp.s.........t.shhshpsclsRlol.s.+phRlKNRslapuups........shs.pshht+s...........sppchs.lpGsluFLLhpaHptLF.Psst..s......s..FWptl.pNphP...sthhshc-hps.ltalhpho.pYtthshlslsPsslhphsp.hhtNthLphhsa......p....pahhshhshhhthspps.s...s.hhhh.t....huu..sh-h..h.hphhh.phs.t.h.t.hsshhsppphlpslhptRshVslslulpKYtGhsG.s.pclFQhGphu.h..Gs...+slss.............htF..R.p+allAs.hss.hlhtt.st.h..tss.h.a-sslhcphhhhh...cuss.hshss.hhshhchlu.sRs.p......ph-Dhhhhl-s.phlupphhchhtpl.pps.s.taSh-shhphhct.tt..........st.h.s....h............... 0 0 0 0 +4940 PF01443 Viral_helicase1 Viral (Superfamily 1) RNA helicase Bateman A, Ahola T anon Prodom_1256 (release 99.1) Family Helicase activity for this family has been demonstrated [1] and NTPase activity [2]. This helicase has multiple roles at different stages of viral RNA replication, as dissected by mutational analysis [3]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.76 0.70 -4.84 72 2712 2012-10-05 12:31:08 2003-04-07 12:59:11 13 100 786 0 67 6146 639 226.40 20 13.31 CHANGED hllcGVsGsGKSohlpphlpp...........hpsssthhstth......spssphpspplcsh.hshht.....schlllDEahhh..t.l.h..h...spsphsllhGDshQhshpsp.....hsph.sph....sphhshphshhhsshshthtspp..........h.pststpssth.....thsspsh......slthtshspphhpshthp..............shssp-spGhpacsVollhpppss...........spphhhVALTRpppslhhhs ......................................................................................................................................................................................................h....GssGsGKo....p.......l...h...phht................................h...s.h...h.phh.......................................p.hp..s...h.t...h......p....s..........h...h............s.....h...ht.........................schl.hlDEsh...h...h...p....s...h......h...h....l....h......t........h......h.....p..s.....p..........p......l.h..h.h..G.Dsp.Q...h.......s..hh.....s....h........t...t.....h....h..h....t...h.h..s.ph...........................s...h.....t...h.......p....h.s..h....R.s...st..pl.st..hlpsh....................................................t..sppps.h................thsspsh...................hth.hhs.h..p..t.....h..t.th.tp......................................................................shTspp.sQG.....h.....T.....a.c..t..V..............h.l......h...........t....s........p.....s....t...s..........................sppc.hhVALTRtppplhh................................................................................................................................................ 0 19 50 61 +4941 PF04521 Viral_P18 ssRNA positive strand viral 18kD cysteine rich protein Mifsud W anon Pfam-B_2612 (release 7.5) Family \N 23.10 23.10 23.20 105.30 22.60 23.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.90 0.71 -4.66 10 25 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 12 0 0 26 0 118.30 47 72.80 CHANGED hsshcsspKhRtplYppLGL..ssVpC+LsussGhsCGMPAAls....h-cucsc.....Lsh.DGaCGEKHcshshShAhR.splpshphcL-pLEc+cEsL+sphpthstst+ssps.s.sstKhs+hKs ..+slhCVSKYRtSVYKsLGL..ssVKCRLPuDCGVNCGMPAAFV....LEcGHPc.....Loh.DGaCGEKH+GYVlSGAWRpAQLRoLNtELDpLE+RtEpLKsQI+sLotst+sssA.sYsPpKls+hK.A.. 0 0 0 0 +4942 PF00998 RdRP_3 HCV_RdRP; Viral_RdRP; Viral RNA dependent RNA polymerase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_315 (release 3.0) Family This family includes viral RNA dependent RNA polymerase enzymes from hepatitis C virus and various plant viruses. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.55 0.70 -6.00 33 17276 2012-10-02 12:54:00 2003-04-07 12:59:11 18 48 263 228 1 13782 0 190.90 62 44.48 CHANGED hhshsushh...sppcphh.lst.hssshhhtsHNpshsNhpRulhERVahVp..tptp....lhs..........cPh.ssFcc.lpthtppltphlh...ptsslo.pchsp.YpGt+tphYp+AlpS.lshpslppc.DuhLcsFlKsEKhslssKsDssPR...lIpPRsPRYNVtlGpaLRhhE++hh+s......lspsatGsohhpGYospptuchlhptWppapsPsAlGhDsoRFDQHVShcALcaEHSlYpusatssp...pLtclLshQLhNpGsuhss-Gh.h+YphcGCRMSGDhNTShGNsllsshhspshh+ths..lcspLhNNGDDCVlIsEpsshcpspp...slpchatcaG.FsspsEcPsYclEplcF..CQspPlhsss.tashsRp.hsshuKDshuhsshpss..ssspsWlsulupsGhslsuGlPlhppFaphhhpsst......thstphhsspash.......shhthhuh.thshpstsssptsRhSaaL.uaGlsPppQhtlEsthsphplptp .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................V...T...E...p.DI.R..sE...E.....tIYQsCsLtPpA.....RpAIcSLTERLY...lG...GPhhNS......K.Gp...sC.GYRRCRASGVLTTShGNTL...T........CYl.KA..pAA.......C.......R.....A..............A....t......Lp......D.......sT.......M....L..V.......CGDD...LVVI.sESsGspEDtt....sh....................................................................................................................................................................................................................................................................t.............................................................................................. 0 0 0 1 +4943 PF02407 Viral_Rep Putative viral replication protein Bashton M, Bateman A anon Pfam-B_1223 (release 5.2) Family This is a family of viral ORFs from various plant and animal ssDNA circoviruses. Published evidence to support the annotated function "viral replication associated protein" has not be found. 25.80 25.80 26.10 25.90 25.60 25.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.04 0.72 -3.98 19 1309 2012-10-02 18:54:06 2003-04-07 12:59:11 11 2 283 2 9 1241 181 77.10 47 29.31 CHANGED uppWsFTLNhsst.......hs.l.hs-clpYAlhtcEcu.p.GpcHLQGalphK++.ppLsplKplhsu.puHhEhsR.Gosc-scsYCpKE ..........................h..hW.sFTl...ot...................t....hl...h............s.hpY.hlhGcEtspp.tTPHLQGahph..ppK..p.p.....hs.plK.thhss....psHlEKAK.GoDppNccYCSKE......................... 0 8 8 9 +4944 PF05101 VirB3 Type IV secretory pathway, VirB3-like protein Bateman A anon COG3702 Family This family includes the Type IV secretory pathway VirB3 protein, that is found associated with bacterial inner and outer membranes [2]. The family also includes the conjugal transfer protein TrbD family that contains a nucleotide binding motif and may provide energy for the export of DNA or the export of other Trb proteins [3]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.85 0.72 -4.00 24 967 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 663 0 249 799 41 88.50 20 43.83 CHANGED Muttt............sPla+uhsRPshl..hGsst-hlhhs.slhsslLhhsstshhsslhslslahhs....phhscsDsh.hptlah+.......phph+saa.....tsp ..................................ssla+uhTRPshl..hGls..hthhlhs....s.....hh...s.sh.....l......hl.....h....h...p......t......h.......h.......h.h.....l.l..s.h.s...l.ahht........hhss....ccDsh.Fhtlhhp.........p.p.t.ha...t..................... 0 42 134 187 +4945 PF04335 VirB8 VirB8 protein Mifsud W anon Pfam-B_1984 (release 7.3) Domain VirB8 is a bacterial virulence protein with cytoplasmic, transmembrane, and periplasmic regions. It is thought that it is a primary constituent of a DNA transporter. The periplasmic region interacts with VirB9, VirB10, and itself [1]. This family also includes the conjugal transfer protein family TrbF, a family of proteins known to be involved in conjugal transfer. The TrbF protein is thought to compose part of the pilus required for transfer [2]. This domain has a similar fold to the NTF2 protein. 20.40 20.40 20.50 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.36 0.70 -4.75 104 1418 2012-10-03 02:27:23 2003-04-07 12:59:11 8 5 855 7 317 1203 73 202.90 19 86.07 CHANGED putph.p-chspsptpp+.phhhl.....uhsuhhluhhssh.ulsh..hssh..pp.h.P...allp.VDptsGtsphlssh....pph....pst.s-shhphhlspalp.sRps.hssssh.p..psappshhh..osssstpphpsah.....t.t...psPhshhupps.h...pVclpSl.shh....ss.....p......shp.Vcapcph..pptsssh....tsp.pasuhlshth...pssh.sppphhtNPLGhhVpsaphspE ..................................................................ph.tpphtthptpp+thhhh......uhhshh..lsh..hhsh.u...lsh....hhsh....pp.hhP...allp..lDp.t..Gp..sp.hls..h...pst........php..s.cshschhlspal....p.hRps.hs..sl..p.....pshpps.hhh..osspst.pphpshh..p.....pssh.pph..tppt.l........plplt.Sl...shh.......ss.............s..................shp..Vc...a..ppph...hptssp.............ttp.papuhlsa.h...p..s.h..sppph......h......hNPhGhhVssaphst...................................................................................................... 2 56 177 240 +4947 PF00286 Flexi_CP virus_P-coat; Virus_P-coat; Viral coat protein Finn RD anon Prosite Family Family includes coat proteins from Potexviruses and carlaviruses. 22.90 22.90 23.40 23.10 22.20 22.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.69 0.71 -4.48 17 1308 2009-01-15 18:05:59 2003-04-07 12:59:11 15 4 134 2 0 1218 0 129.90 41 54.26 CHANGED SNshAosE-lttItsshps.htlPssslstshhplshhCscsuSSshss.pGss.h.s.usshtsls.uhh+tpso.LRphCphYA.llWNhhLspNpPPAsWushGap.psKFAAFDFFDuVpssAAlpPspGlIRpPTptE ............................thAsstplttlhthhtt.hGlspppht.p.shhclshtCucsuSSthsshhGss........s.....sh.sp.s-l.s..ul.h.+..phs.T.LRpFCthYA......lVWNhhLtpspPPANWsttGFptssKFAAFDFFsuVsss.AAl....p..P.hpG...lhRtPTctE........ 0 0 0 0 +4948 PF01347 Vitellogenin_N Lipoprotein amino terminal region Bateman A anon Pfam-B_1280 (release 3.0) Family This family contains regions from: Vitellogenin, Microsomal triglyceride transfer protein and apolipoprotein B-100. These proteins are all involved in lipid transport [1]. This family contains the LV1n chain from lipovitellin, that contains two structural domains. 21.00 21.00 21.10 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -12.79 0.70 -6.29 44 1005 2012-10-11 20:00:59 2003-04-07 12:59:11 17 42 272 1 433 992 1 434.80 17 32.52 CHANGED apss+pYpYpacupshsuL.p.husthuGhtlpuclplpsp.........spsphhhplppsphsphpthhsp.......cshhsssp..........lphphhshhppPhchphssGh.lsclhsspsssshtlNlh+GIlshLQls.hpsspth.pht....................EsslpGtCcThYslpp.sp.....................phlplsKo+shspCpp+hthphGhs.h.sphp..sppppps..lpposssphhlpsp.s..shhIppupsppphhlsPh.htp.ptupspu+ppLsLhphpsps...hs.ssshpshssLlYphssph.....................................................................................................................................................................................t...hPhhhhphssttt............plscplpp.l.ptstphscpcs..+.FspLlpllRssstcplpplhpphtsp...................phhphahDAlspuGTtsAlphlpphlps..cclpshEAApllsshspss.tsspchhchhhpLspssplppp.hLppouhLuaushlpphpss.....................psss.pchlp.ltpphp..pAlsctcp.chhlhl+uLGNhGpspslpshp.hL.upt.......plsthlRlthlhuLcpl..scppP+hlpsllhplahspsppsE..lRhsAhhhLhcopPshshlpplAphspp-s..shQVsuhlhStlc .........................................................................................................................p.Y.ath..ps.........t.h..........t....t...............s.shhhps.l.l.t.............................t.p...h.h.p..h......p..h...t.....h.....t...t.................................................................t...h......t..h....Phhh..th.............p..Gh...l.t.pl..hs..ts..s.h....h.Nlh+ulhshhQhs..hpt..tpt................................................Es..sh.G.Cps.Y.hp...........................................................................h.lh+..p.shppC...p.t.h......h....s.h.t...........................t....h....................h..h...p.t..t..................h.ps.s.p....h.h...............t.....h..h..s.p.pl.h....p...tt..........................sl...h.t.................................................................................................................................................................................................................................................................................................................................h.t....................th..hp...htp.................................tt........t.........h..hhp.hp.hp.pp.....l.thh....tph.............................................................................hhhpsls..hts..sh..hhhphh......tp.t....h...h..h...h...............s.s.....thhp...............h..........................................h......hhht.h.th.h...t.h..................................................................h.tth.h.......h..t...ht...........t...................ph.hhlhsl..t.Nh.....t...............t...h....h...hh.t................t.........ph.....hl.sht...........h.....p.hpthhh.lh.p...t.....c..hRhhA.h.hhhht..s.P.s...htth.h...h.....-...........ph.thh.s........................................................................................................................................ 0 145 187 351 +4949 PF05090 VKG_Carbox Vitamin K-dependent gamma-carboxylase Moxon SJ anon Pfam-B_6307 (release 7.7) Family Using reduced vitamin K, oxygen, and carbon dioxide, gamma-glutamyl carboxylase post-translationally modifies certain glutamates by adding carbon dioxide to the gamma position of those amino acids. In vertebrates, the modification of glutamate residues of target proteins is facilitated by an interaction between a propeptide present on target proteins and the gamma-glutamyl carboxylase [1]. 20.40 20.40 20.60 20.40 20.30 20.10 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.66 0.70 -5.63 10 332 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 226 0 183 348 246 332.40 23 68.03 CHANGED ulFRllFGlLMhl-lhpctuhGalDp+aL-Pphsh+F.hF-alpPLPhshMYhlallMhluAlsIMLGh+YRlSslhFhLsahYIaLLDKToaNNH.YLhuLLuFhLlalsANR...YaSlDuhh.NsslRss.cVPhWNYshL+sQlFIVYFaAGltKLssDWlpG...uh.s+HWLhssaclhl.s-Llsh.lVHas....GLlaDLhlsFLLhac+TRhhuhhhlshFHlMNSpLFsIGMFPYlMLAsoslFFsssW.........s+th...h.phLphlh..............h.t..phhpppsshsp...hh.......ccthhp.tppPhltp+lushFslhahh.QLhLPapHFlhpGps.WTpthYtaSWcMMl+o+ss.asphplcDspTGcpsalsPpsF...pQp+phpspsDhlhQYAppLupphsppshs.....plpIYhDlaVSlNGRhpQRlhDPclDlhpscWssFppssWlLP .......................................................................................................................hRh.huhhh..h.....h.............................h...........s.....h.......h........t......s................h..pa..h....h.p.h..h...s.h....s......h.hh..hl.ahl..h.h.l...hu.h.hhhlGh.hh......R.h.u.h....h.h....h....h.........l.....a.h.h.l...hh.st.s...s.a....s...+......h...l.......h.tl.l.hhhh...h..h..hsss..p......hhSlD.....sh.....h....p.......p...........h.......t..........p.........t.......p........l........s...........h........W.................shhl.lp........hQlhllYhh..AGltK.l.p...s...-...WlpG...........shh..p.ha........hhs...h...p..h......l...........s...p........l..h.......s........h......h.lt...h........h......h..u...ll..h-L..s.h.sh.hL..h....h..c.t.....o......R...h..uhh..hshhFH.h.h.su.h..l...h..s..IG..h..Fs.a..hhl.us.s..l.F..h.s.sph...........p............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 77 128 162 +4950 PF04649 VlpA_repeat Mycoplasma hyorhinis VlpA repeat Kerrison ND anon DOMO:DM04353; Repeat This repeat is found in the extracellular (C-terminal) region of the variant surface antigen A (VlpA) of Mycoplasma hyorhinis. Mutations that change the number of repeats in the protein are involved in antigenic variation and immune evasion of this swine pathogen [1]. 25.00 25.00 59.10 30.80 23.40 14.00 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.71 0.73 -5.60 0.73 -4.18 6 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 3 0 16 51 0 13.00 98 28.24 CHANGED KTENTQQSEA.GT KTENTQQSEAPGT 0 0 16 16 +4951 PF01660 Vmethyltransf Viral methyltransferase Bateman A anon Prosite Family This RNA methyltransferase domain [1] is found in a wide range of ssRNA viruses, including Hordei-, Tobra-, Tobamo-, Bromo-, Clostero- and Caliciviruses. This methyltransferase is involved in mRNA capping. Capping of mRNA enhances its stability. This usually occurs in the nucleus. Therefore, many viruses that replicate in the cytoplasm encode their own [1]. This is a specific guanine-7-methyltransferase domain involved in viral mRNA cap0 synthesis. Specificity for guanine 7 position is shown by NMR in [3] and in vivo role in cap synthesis [4]. Based on secondary structure prediction, the basic fold is believed to be similar to the common AdoMet-dependent methyltransferase fold [5]. A curious feature of this methyltransferase domain is that it together with flanking sequences seems to have guanylyltransferase activity coupled to the methyltransferase activity [5]. The domain is found throughout the so-called Alphavirus superfamily, (including alphaviruses and several other groups). It forms the defining, unique feature of this superfamily [2]. 20.70 20.70 20.70 21.10 20.40 20.50 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.28 0.70 -5.79 122 1584 2009-01-15 18:05:59 2003-04-07 12:59:11 12 52 396 1 1 1647 0 318.10 19 22.63 CHANGED hsaslstpttphLpph.....GI.hsshus.sHsHsssKslEs.hhhphlhshl.sp....sssh..ltlKssKh.phlpptt.........hs...........hhN.hlss+DhtRY.....ss....................................................................p.......................tp..h.h........ttp....hsahtDsLaahshpplsshh.ppsphppLhuolVhP.sEhhh............................tpp..ShhPplYshphp.....................ps..................................................p.......hhahP-uptutuYppsh.sthpaLpsspl..h.tt..................shslphlpShsupHlhhIp+.spths.....................................................................sphtoFt...ctlhhs.....................................phhh.p.hs...p...........phslspshhpplhpYlcol+phshpss.....................................................hu+lpphhsc ................................hs..t.phl.ph.....tl.h.pspt.h....tsHshstshcp...h....hphh........tphs......tshh........l-lGus.hphhhpsp.....................Hs....ssshhss+DstRh.....hp.....phtth.t.ph.t......t...hp..t.....................................................................................hC.p.s................................................................................................................tsCp.hps............t......hshhhpslY.....hshcshsphhhpptht.hhhsshhhs.sphhh.............................tpt..shhs..p.h..s....htctp..................................................p.....hha....stusht..Yscshp.lhpahpsshh.....h..st...............................shhh-hlh.s..hsshh..hh...plst..ssthh.........................................................................p.hs.......ptl.....h..............................t.hl.thhp....t..............hh.ht.....hpph.phl.......uhpphhhhsu....................................h.........p................................................................................................................... 0 0 0 1 +4952 PF00695 vMSA Major surface antigen from hepadnavirus Bateman A anon Pfam-B_168 (release 2.1) Family \N 29.10 29.10 29.30 29.20 29.00 29.00 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.32 0.70 -5.25 4 23454 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 96 4 0 17711 0 209.40 74 95.40 CHANGED hs.s.uKshsscch-Gs.lh.ph.Aup.lP....Gsh...GphsTh.H..lhs+s.psptlpTh.pshs.PtusupR.uhcpPTPhoPP.hshpPctspKsppuF+Qh.p-...tPpsssphsP..........s.hc.pss.sPllpstShl.oh.........sshltsPsLspc....sohuGlLusLlGL.VuFFLLTKILpIhppLDWWWhSLS.PtGp..CshQNotuQTSsHhssSCP.sCPGFhWhYLRhFIIFLLlLLlshshLhlh.s......................................................pShhhuKh.WE.sSAhFS.lS.LlP..Qh..hluLThhLhLIWMh...usplhshLo.hhsL.ALFFp .................................................................................................................................................................................................................................................-........................LT+ILsIPpSLDSWWTSLNFLGGsssC.GQNSQSPhSNHSPT..SCP.PhCPGYRWMCLRRFIIFLFILLL.CLIFLLVLLDYQGMLPVCP.LlP.....G...S....S....T.TSTG.....PC..+...TCTTP..AQGTSMaPSCCCTK.PoDGNCT.CIPIPSSWAFuKaLWEWASsRFSWLSLLVPFVQWFsGLS..PTVWL.shWMhWaWG.................................................. 0 0 0 0 +4953 PF03762 VOMI Vitelline membrane outer layer protein I (VOMI) Finn RD anon Pfam-B_3481 (release 7.0) Family VOMI binds tightly to ovomucin fibrils of the egg yolk membrane. The structure [1] that consists of three beta-sheets forming Greek key motifs, which are related by an internal pseudo three-fold symmetry. Furthermore, the structure of VOMI has strong similarity to the structure of the delta-endotoxin, as well as a carbohydrate-binding site in the top region of the common fold [2]. 25.00 25.00 25.50 25.50 23.60 23.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.75 0.71 -11.48 0.71 -4.32 22 120 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 63 2 78 127 1 149.20 36 64.05 CHANGED hlpsspsssaGsWtt.ptCPsGpaspGhplKhEssp....s...........D-TulNulpLaCpphs....................................................................................................sppphlhSs-usaGpWp.phpaCPtsthllGFpL+s-.ppt..tDcsussNhthhCt..........spt..ltGss..sa..GsWsssph.......................................................................Cst.G.slCGlpoplEsspu..............................................................htD.DTuLNslplpCC ...................psssss.aGsWs..p...hCPsG.aAsGFplKsEssQ....sh..s.............................DDTALNuIcLaCspst......................thlpStp.GtaGp..Wo.p..aCP.s...s.hlhuFpLRlE..s.ps.......uDsTA...sNNlpFpC...........ssst.Lp...Gsu...hsa..Gpau.p.h.p........................................Cst......ulCGlpT+lEt.pG.....hh.DDTuLNsl+hh.CC..................... 0 37 43 60 +4954 PF00434 VP7 Glycoprotein VP7 Finn RD anon Pfam-B_116 (release 1.0) Family \N 25.00 25.00 25.60 25.60 22.10 22.00 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.33 0.70 -5.52 8 4737 2012-10-01 19:08:57 2003-04-07 12:59:11 13 1 694 28 0 3775 0 272.50 76 99.95 CHANGED MYGIEYTTILhhLIShlLlsYILKolT+hMDaIIYRFLhVlVlluslss...AQNYGINLPITGSMDTAYsNSTQ-psFLTSTLCLYYPsEAsTEIsDsEWKsTLSQLFLTKGWPTGSVYFpEYuDIsoFSl-PQLYCDYNIVLh+YssslpLDhSELADLILNEWLCNPMDITLYYYQQTsEuNKWISMGoSCTlKVCPLNTQTLGIGCpTTsssTFEpVAooEKLVIpDVVDGVNHKlNlTssTCTIRNCpKLGPRENVAIIQVGGusILDITADPTTsPQspRMMRINWKKWWQVFYTVVDYINQIIQVMSKRSRSLDoAAFYYRV ......................IEYTTlLha.L.ISl.lLLNYILKS.lTphMDaIIYR.FL..LllV.l...l.sh..s+...uQNYGlNL.PITGSMDTsY..sNS.TQp.E..s...FLT..STLCLYYPTE.........A.........uTp.........IsDsEWKDTLSQLFLTKGWPTGSVYFKEYosIssF.SlDPQLYCDYNlVLM...KYD.psLELDMSELADLILNEWLCNPMDITLYYYQQosEuNKW..ISMGoSCTlKVCPLNTQTLGIGC.TTsssoFEpVAps.EKLsIsDVVDGlNHKlslTTs....TCTIRNCKKLGPRENVAlIQVGGuslLDITADP.TTsPQhERMMRlNWK+WWQlFY............................................. 0 0 0 0 +4955 PF00522 VPR VPR/VPX protein Finn RD anon Pfam-B_100 (release 1.0) Family \N 20.80 20.80 25.60 25.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.41 0.72 -4.17 12 5858 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 90 10 0 5609 0 89.30 80 93.70 CHANGED cEpsPts...pss.REsapEWlt-hlcElpcEAlpHFshcLLhtlhpahappatDp.st.....shchlpllQ+AlFlHa+pGCp.........cuRhGt .......................M.EQAPED...QGPQREPYNEW.TLELLEELK...s....E....A.VRHF.PRsWLHuLGQaIYE...TY..GD...TW...s...G.....VEAIIRILQQLLFIHFRIG.Cp..........HSRIG.IhtpRRsRNG...................... 2 0 0 0 +4956 PF03643 Vps26 Vacuolar protein sorting-associated protein 26 Finn RD anon Pfam-B_4396 (release 7.0) Family Vacuolar protein sorting-associated protein (Vps) 26 is one of around 50 proteins involved in protein trafficking. In particular, Vps26 assembles into a retromer complex with at least four other proteins Vps5, Vps17, Vps29 and Vps35 [1]. This family also contains Down syndrome critical region 3/A. 20.20 20.20 20.30 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.80 0.70 -5.62 8 653 2012-10-02 22:29:00 2003-04-07 12:59:11 10 9 323 8 411 631 10 247.70 41 79.71 CHANGED FGsss-I-IphsstcsRKhl-hct-......cGph-cthlahsGEoVoGpVslslKc.u+KlEHpGI+lEF............lGQIEhaYD+G..........................Npa-.FhsLs+ELAhPGELppspo.aPFEFspVEKs....YEoYtGsNV+LRYaLRVTlsR+h..oDlsKEhDlhV+.............sa...sshP.......................................-sN............ssIKMEVGIEDCLHIEFEYNKSKY....HLKDV.......IlGKIYFLLVR...IKIKaMElulI++EooGs.GPNsasEoETlsKaEIMDGAPVRGESIPlRlFLuGYDLTPThRslNpKFSVKYaLNLVLVDE-D .....................................................................................................h.h.h.t......tt..h.h............................pt.h.cth.la.h.sGEslsG.............t...V.sl.p.+p..sp+l...c.....HpGI+lph...........................lGpI..........Eh...a..h..-ps..........................p..h.-..Fls.hsp-.....L.sh.P....G.....c...l.........p....s..po.asFcF........p.......lcK...........Y.EoYpGhN..V....cl+Yhl+..sol.....Rph....s-l.s.+.p.h.-.h.h...Vp............................sh.....sh...P...........................................................-.hs.................................ss.I.KM.-.V.G...I..E.-..C..L.HI.EF.EYsK..ocY..........cL.+Dl...........IlG+I...YF..LLVR.....lK...IKpMElpll++Eos.......Gs....u...s.s.h...h.p.........E.....scTls+aE......IMDGuPs+G................EoIPIRlaLss...h..p..hoPThc.s.....lN.ppFSV+aaLNLVllD--............................................................................................ 0 146 219 321 +4957 PF03997 VPS28 VPS28 protein Bateman A, Wood V anon Pfam-B_6317 (release 7.3) Family \N 20.30 20.30 20.40 20.50 19.40 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.28 0.71 -4.68 6 333 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 284 19 223 325 4 178.50 41 76.05 CHANGED huELYAIlpsl-pLEKAYl+DslSsoEYsusCpKLlsQaKsshpplpsp....thsSIEcFscKYRLcCPsAlcRIc...............cuhPITlcc....................DpussuKsIA-lVppFITsMDuLRLNhpAhDpLaPhLs-LhsohN+lSclP..DF-s+pKlpcWLh+LspMuASDELo-sQsRQhLFDLESAYsuF.phLp ..............hAELaulIhTl-tLEKA..Yl+Dsl.os.sE......YTssCs+LLsQYKshhpp.hpst.....................ph.ss.l......-p........Ftccac.........l.-CPtAhcR.l.+...............pGhPhTlc...c........................................................s..p..u..s..s..u.phIA-ssppFIThMDsL+Lph.hAhD............p...L.aPhLp-Lhpohs+l......op..........................DFE.............s+.......tKlhpWLhpLspMpAo-ELs-p.QsRphhFDl-pAYpuF.thL.................................................................. 0 74 120 184 +4958 PF04133 Vps55 Vacuolar protein sorting 55 Wood V, Finn RD anon Pfam-B_25168 (release 7.3); Family Vps55 is involved in the secretion of the Golgi form of the soluble vacuolar carboxypeptidase Y, but not the trafficking of the membrane-bound vacuolar alkaline phosphatase. Both Vps55 and obesity receptor gene-related protein are important for functioning membrane trafficking to the vacuole/lysosome of eukaryotic cells [1]. 23.30 23.30 23.60 24.20 22.20 23.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.57 0.71 -4.36 27 382 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 262 0 242 344 0 116.20 37 87.97 CHANGED llhLShhluhGhlLlILuCAL..asNaaPLhsllhalluPlPshl.....spphtssssasspt..........h.-hupFlTGhlVlSuhuLPllLtHssl....IshsAhhholsGshllasoIlhahhhFsts--c ..............lluLSFhhAlGhhhllLuCAL..............apsaaPLhVlhhYlluPl.PhhI.....sp+h.s..s..ss..-.hsssu..........sh-huhF.lTshlVVSuhuLPllLAHu..s..l.........I.phsAhhhslsGshllasTIlsFhhhFtptpp.h.......................... 0 61 118 185 +4959 PF00558 Vpu Vpu protein Bateman A anon Swiss-Prot Domain The Vpu protein contains an N-terminal transmembrane spanning region and a C-terminal cytoplasmic region. The HIV-1 Vpu protein stimulates virus production by enhancing the release of viral particles from infected cells. The VPU protein binds specifically to CD4. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.92 0.72 -4.42 16 6152 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 58 8 0 5351 0 71.70 65 97.50 CHANGED Mhphph.....lullulhlslIl.sIllWsl....lahcY+ch++Q....+cIpcLlcRIRERAEDSGNES-G-pEE.LssLlc.....hGasNPhh...L .............M.sL..I.....h.u.I.V.uLlVAh.Il.AIVVWoI....VhIEYR.K.ll+Q....RKID.RLI-RIRE..RAEDSGNESEGD..pE..E.....LS.sL...VE........MGc.hs.hs..ls........................ 0 0 0 0 +4960 PF03852 Vsr DNA mismatch endonuclease Vsr TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.60 20.60 20.60 20.90 20.40 20.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.39 0.72 -4.36 7 1075 2012-10-11 20:44:43 2003-04-07 12:59:11 10 8 981 3 248 738 161 73.40 49 47.87 CHANGED sDhhss...tpRptsMp.ps+o+sT+PEhtLtphLaulGhRYRhpctsl.G.....pPDlVhsph+lslFlcGCFWHG+sCtht .........................Dhhsp...tpRS+sMp...uIto+D.Tt..Eht...L.tphL..pupGl.p.aRlp....c...t....s....LP.G................+PDhVlsc.....Y+sVIFsHGCFWHtH.c.C...hh........... 0 76 158 204 +4961 PF00093 VWC vwc; von Willebrand factor type C domain Sonnhammer ELL anon Published_alignment Family The high cutoff was used to prevent overlap with Pfam:PF00094. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.35 0.72 -3.86 20 3816 2012-10-01 23:42:56 2003-04-07 12:59:11 13 320 140 1 1998 3212 3 59.00 33 11.54 CHANGED Ch.psGphYpss-sWpss.........Cp.pCsCps........uplhCcplh..Cs.......ssCsss..................s.GECCs..hC ..................Ch.hsG..p..hYp.sG..-pWp.ss.............Cp.pCpCps..................................GpltCpp....ht....C..s............hsC.sps............................h.hssGpCCP..hC.............................. 0 372 529 1087 +4962 PF02020 W2 IF5_eIF4_eIF2; eIF4-gamma/eIF5/eIF2-epsilon Bateman A anon [1] Family This domain of unknown function is found at the C-terminus of several translation initiation factors [1]. 30.00 30.00 30.30 30.50 29.90 29.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.03 0.72 -4.03 77 1188 2012-10-11 20:01:00 2003-04-07 12:59:11 13 35 311 14 716 1099 4 83.00 29 12.25 CHANGED pctphshLtulpch...h..hc...........s.phh..sh.l......tllptLY.-tDllsE-sllpWhpc...t....stpspsp..lh.cpsptFlcW.LppAEEEsp--- ......................................ptplphLhulpphs....hc..........psphh..sh.l......pllpthY..-tDll.pE-sllpWhpc.....p..............h....s.tcspth.............hh..cp..s...psFlp.W.LcpAEEEsppp...................... 0 217 343 531 +4963 PF03716 WCCH WCCH_motif; WCCH motif Finn RD anon Pfam-B_3194 (release 7.0) Motif The WCCH motif is found in a retrotransposons and Gemini viruses. A specific function has not been associated to this motif [1]. 20.80 20.80 21.30 21.20 20.20 19.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.74 0.72 -6.95 0.72 -4.45 30 1359 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 739 0 2 1020 0 24.40 64 20.75 CHANGED PlhpsCsCPHCPRH.ppppshsppAa ......PlppPCCCPHCPRH.Kp.psMspQAH........... 0 1 1 1 +4964 PF00458 WHEP-TRS WHEP-TRS domain Finn RD anon Prosite Family \N 27.30 27.30 27.30 27.50 27.20 27.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.63 0.72 -4.29 65 996 2009-09-10 19:30:04 2003-04-07 12:59:11 15 80 141 15 582 981 8 55.30 39 11.86 CHANGED LhpplspQG-hVRcLKucKAsKs..p.lcsAVppLLsLKtpa...KphoGp-hpPs...s..s.u ................LhpplspQG-hVRcLK.......up....K....A.sKs....p.l-.........sAVppLLsLKtph....cptp.Gpchpsst.......s....................... 0 175 232 425 +4965 PF00110 wnt wnt family Sonnhammer ELL anon Prosite Family Wnt genes have been identified in vertebrates and invertebrates but not in plants, unicellular eukaryotes or prokaryotes. In humans, 19 WNT proteins are known. Because of their insolubility little is known about Wnt protein structure, but all have 23 or 24 Cys residues whose spacing is highly conserved. Signal transduction by Wnt proteins (including the Wnt/beta-catenin, the Wnt/Ca++, and the Wnt/polarity pathway) is mediated by receptors of the Frizzled and LDL-receptor-related protein (LRP) families [1]. 19.40 19.40 19.60 19.60 17.70 17.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.55 0.70 -5.23 104 10329 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 7256 1 979 7789 0 159.90 55 94.96 CHANGED hCsplsuLs....cQpplCpppsc........hhtulscGsphulpECQaQF+pcRWNCSshstt.................................shFsp.llppG..oREoAFlaAIouAGVsauls+ACopGpl.psCuCDppt....................pststtt......................................................................WcWGGCSDNl.caGhpFu+pFlDupE........ttpssRsl..MNLHNNcAGRpsVppphcpcCKCH...GVSGSCslKT....................CWp.plssFRplGshL+c+Ycs.AhcVphpppspttth...............................................................................................p.ph+.....s..........sppDLVYl-pSPsaCpps.ph.GshGTpGRpC.............Nco.Sp.........uh-u.CshhC..CGRGasopphphsc.cCpC+.Fp.....WCChVcCcpCppphphasC+ ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................+.T....................CWh.pL..P.s..F..R..s..VGDsLKDRFDG.A.SR.VMhsNsshp.t...ss.p.p.spsssptsst..................................................................................................+pRYpFQL+Pa.NP-HK............sP.............GsKDLVYLEsS..P.s..FCEK..NP+..............L.........G....I....G....THG.......R..tC..............................NDT...SI.................GVD..G..CDLMC..CGR.G.Y+T.p.p.h.hl.lE.RCs....................................................................................................... 1 210 287 607 +4966 PF01822 WSC WSC domain Bateman A anon [1] Family This domain may be involved in carbohydrate binding. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.83 0.72 -4.05 162 2216 2009-09-11 06:24:10 2003-04-07 12:59:11 14 209 242 0 1812 2200 48 80.00 26 17.70 CHANGED YhGC......as...........-s................hhsss.thssssho...sptC.hshCt....tt...u.ash......sulp..up...pCaC..Gsshsstt.t.sspt..............Cs.hsCs.Gss...s......ph.CGG ................................................hGCap.....................-s.......................tt.thh...st...ths.ssshT.......sptC.hsh.Ct..........tt......u..as.a............uGlp...hup.......cCa..C.........G..s...s...hs...s..s.s...tt.s.stsp....................C....s....hs.Cs..Gss....s..........ph..CGG.................................................................. 0 824 1197 1552 +4967 PF02165 WT1 Wilm's tumour protein Mian N, Bateman A anon IPR000976 Family \N 19.60 19.60 21.80 20.50 16.90 16.80 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.16 0.70 -5.32 8 147 2009-09-10 17:23:11 2003-04-07 12:59:11 10 10 53 6 33 118 0 217.70 64 67.01 CHANGED MGSDVRDLouLL...PPVSSLsuuuGuC.ulPVuGusQWAPlLDFH.PGS.PYuSL..............usHSaIKQEPoWG.uuDPaE...DPHCGLuA.FTVHFSGQFTGT.GsCRaG.AFG...........................pPssuQuRMFPNuPYLPuClDSsPshRNQ.GYusVAhDGsPSYGHTPSHHsuQFoNHSFKHEDslS.QooluEQQYsVPPPVYGCHsPoDoCsuSQALLLRNsYNS.DNLYQMsSQLECVTWNQMNoLuSohK.................uHuuuYEoDspos..PhLhSCSuQYHIHTHGV..FRGlQDVRRVPGlsPslVRS.SEooEKR ...................................................................................................s..HShIKQEPsWs.ss-Phc...-.ps.LuA.FTlHFSGQFTGo.usCRaG.sFG............................pss.t..s..RMFssssYLssCh-s..s.RNQ.GYusVsFDGsssYGHTP..oH.HssQFssHSFKHEDshu..QQsohG.-.QQYsVPPPVYGCHTPoDSCTGSQALLLRsPYsS..DNLYQMsSQLEChsWN.hN.LuuolK..................................................................sHuoGYES-spos..PhL..CuuQYRIHTHGV..FRGIQ.DVRRVPGlAPslVRS.SEosEKR................................................................. 0 2 5 15 +4968 PF03303 WTF WTF protein Mifsud W anon Pfam-B_4183 (release 6.5) Family This is a family of hypothetical Schizosaccharomyces pombe proteins. Their function is unknown. 25.00 25.00 64.20 62.40 22.10 21.60 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.78 0.70 -4.96 8 19 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1 0 19 27 0 238.50 41 80.87 CHANGED MKNNYTSLKSSlDEtDEhKTDHEIDLEKG.LPEYsSEEEusLPPYSDhu+l.......Ss.......sPN....................sHRcpcsScSsDNpos..LlKLLIShlulhVlNhsA.lCh.saKcuhFtcautu-hVLFGlhsh.lCslshIhLhYFYETWsKAV....KVTll..................shuhGLhshp+chhlhhahhah..IlChlLFsshppGpL.ls+uhltS.....sColSAtllhhl.sVsIPhaphcphhstLhpV.hlppshsls ..MKNNYssL+SslDEtsp.hKs......DpEIDLEKG.LPEYsSEEEuTLPPYSDhupl..........us.......sPN..............................s+Rcsc.ssco...scNuss..LlKLLIShhslhllNhsA.lChL.a+cuhFpsauhsphslFGhaCh.sCslshIhLhaFYETWTKAV....Khslh...................................................shuhGlhp.h+chhshhahhhh..lhphhLhshhppstLsL.puhltu.....pCSluAtlhhhl..lhl..ahhcphh.t.hps.hlppshh................................. 0 19 19 19 +4969 PF04932 Wzy_C O-Antigen ligase Moxon SJ, Bateman A anon Pfam-B_5033 (release 7.6) Family This group of bacterial proteins is involved in the synthesis of O-antigen, a lipopolysaccharide found in the outer membrane in gram-negative bacteria. This family includes O-antigen ligases such as E. coli RfaL [1]. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.45 81 1431 2012-10-01 22:04:45 2003-04-07 12:59:11 10 29 1205 0 366 4539 2057 165.80 18 32.83 CHANGED hshllh..hhslhhotSRuuh...luhhlshhhhhhhhhtthhtth................................................hhhhh....hlhshslsshh...............................................................................................................................tphhhplhstststs................................stRh.t.happuhphhtpp.Ph....hGhGh..................hht.h.......hh.hh.sthhtpsHNhalphhschG.lhGhlhhh ...................................................................................................................................................................................................................................................h...llh..hh.slh.h.o.tSRu..u.h.......l.u..h....h..l..s...h....h...l.....h....h.....h......h........h....h....t......h......t.....t...t..t...h...................................................................................................hh.hhh............hlhs..hhl.s.h.sh.......................................................................................................................................................................................................................................................h.p....p...h...h...t..p..l...h...p.tt.t..p..s..t.........................................................................................................shRh.t..ha....p....s.ul.p....h..h.....p...c....p...Pl......................hG..h.Gh..sth.....................................tthh.p.t.h.............h.h.....t...h..h..h.....t...h..h....t....p..s..HN.hL.phhsptG..ll.Gllhh.h............................................................................................................................. 0 159 261 325 +4970 PF00739 X Trans-activation protein X Bateman A anon Pfam-B_458 (release 2.1) Family This protein is found in hepadnaviruses where it is indispensable for replication. 21.50 21.50 21.90 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.79 0.71 -4.27 5 5949 2009-09-16 23:40:58 2003-04-07 12:59:11 14 7 78 2 0 4549 0 103.30 87 89.74 CHANGED MAARLCCQLDPARDVLCLRPVGAESRGRPlPGPhGuLPuuuuSAVSoDHGAaLSLRGLPVCAFSuAGPCALRFTSA..RRMETTVNsHp.LsKsL+KRTLGLSuMSToDL.EAYFKDpLFT-WEELGEEhRLpIFVLGGCRHKLl ....................RhhCQLDs.sRDVLCLRPVuAESpGRPhsGshGslsssSsssVsssaGuHLSLRGLPVCAFSSAGPCALRFTSA..Rp..METTVN.....A...+....p........L....PKV....LHKR........T........LGLS.A.MSTT.DL...EAYFKD.ClF.KDWEELGEEIR......Lhl.FVLGGCRHKLV............... 0 0 0 0 +4971 PF00860 Xan_ur_permease xan_ur_permease; Permease family Bateman A anon Pfam-B_1593 (release 2.1) Family This family includes permeases for diverse substrates such as xanthine Swiss:P42086, uracil Swiss:P39766 and vitamin C Swiss:Q9UGH3. However many members of this family are functionally uncharacterised and may transport other substrates. Members of this family have ten predicted transmembrane helices. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.20 0.70 -5.98 20 15557 2012-10-03 01:44:59 2003-04-07 12:59:11 15 19 3998 1 2877 9790 2601 366.40 23 85.08 CHANGED tphlhhGLQahhuMauusllVPlllutu..........t..lhuhshlsuGluTllQsh....hhGhplslhhGsSFsals......hhhhshG.............shslsulhGullluullhhlluhhGhh.hLh+hhPPlVsGsllhlIGLuLs.luhsthuss....ssu....hsssphhtlulsslshslhhshh...tpshhpphulLlGlhsGalluhhhG..hlshss...............ltstsahths.sh.ausPh......hshslllshhslsllslhEslGshhuluplssppht....p.slp+uhhu-GluolluuLhGu.hPsToaupNlGllslT+lhSphlhhsAullhllhGlhsKluuLhssIPssV.lGGshllhaGhlhuuGlphLppschc..pscNlhIlulolslGlulsth.hh ............................................................................................t..tlhhGlpph.lsM..hhshl.l.......s.Ph.ll...uts........................................t...h.........lh..sss....h.........l..suGl....uoll.sh...............hssh...l.shh...Gh...uhs..hhs...................hhhls.hG.............................................s.h.sh.t....s...............h....h...uu.s....hltu....ll....h.h....l....ls....h....h.....t....h........h...........h..........l.h.c..h.l.P.....ssl.......p.s....s..l.......s....hsIG.....L.L.h................l......u.....l.....p.......s....s.......u......h......h......h.................................ps..........................h.....s....................s.............p........................h....................h...................h..........u............h...........l......h..............h........l......h....s......l....h...l...thh............................hps..h..h...h..h...h......u...l.L..lu..l.lssh...ll.uh...........h...hG........hl.......s..h.ss............................................................lt.ps.s..h....h....t..h....s..............h..t...h....s.hsh..........................h..s....h...s.....h...l....h...s....h...h.s.h...h...l.ls..hh-................sh..Gsl..h......u..l...u......p...h...ss....h.hpt......................pl.p.+...u...lh.u...Duluohlu.uhh.Gs..sssT.s.a.s....Ess..........u.sluhs...........st..s...hhsus...l.hsl.lh.uh...h.s.hh.u..slhth.lP............s...........s.........l.....hu........u........s........sl.l...........h.h.Ghlh.hsu....lph.l.s.h..s..c..hs...pshs.hh.l.s.ulhls...huhuls.h...hh............................................................................................................................................................................ 0 745 1589 2296 +4972 PF04921 XAP5 XAP5, circadian clock regulator Bateman A anon Pfam-B_4702 (release 7.6) Family This protein is found in a wide range of eukaryotes. It is a nuclear protein and is suggested to be DNA binding [1,2]. In plants, this family is essential for correct circadian clock functioning by acting as a light-quality regulator coordinating the activities of blue and red light signalling pathways during plant growth - inhibiting growth in red light but promoting growth in blue light [3]. 25.00 25.00 29.20 28.20 23.60 23.10 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.56 0.70 -4.80 26 325 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 232 0 217 305 9 237.70 43 71.22 CHANGED KK+p+pppscLSFssD--E.........................................pppspspttp.cppp..pt.sh.............................p++lsKNPsVsTs.aL.D+sR-ccEpptR-pLRcEaltpQpt...............lKspEItIsFsYaDGosppt.sV+hKKGsoIhtFLp+spc.ht..........tc.....hpEh+psSsDsLMhVKpDlIlPHHYoFY-FIhsK..spGKoG.sLFsFDscc.................................shphhs-hpspc--opssKVV-RpWYERNKHIFPAS+WE......Y-PtKsasp.hhp ....................................................................................................................t.tttththttLSFs.--cctt.....................................................................................................tppptttt.t..........t.....t...................p...............................p+KluKNPs..V-TS..FLPD+-REccEsphR...........EcLRpEW.tpQEplKsEEIpITFS...........YW........D.....GoGHR...+....sVc..............hKKGs......oltpFLp+shchLt..................p-..............FpEL+ss.uV-pLMalKEDLIIPH....HaoFY..-FIlsK...ARGKS.........G..PLF.sFDV+-..........................................D.lRh.l.sDAssEK-ES.......HAGKVV.RpWYE+NKHIFPASRWEs.....YDPpKcac+Ysh.p.......................... 0 80 119 170 +4973 PF02625 XdhC_CoxI DUF182; XdhC and CoxI family Mian N, Bateman A anon COG1975 Domain This domain is often found in association with an NAD-binding region, related to TrkA-N (Pfam:PF02254; personal obs:C. Yeats). XdhC is believed to be involved in the attachment of molybdenum to Xanthine Dehydrogenase ([1]). 23.80 23.80 23.80 23.90 23.70 23.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.98 0.72 -4.36 129 2575 2009-01-15 18:05:59 2003-04-07 12:59:11 11 15 1586 10 764 2086 952 70.30 30 22.01 CHANGED tphhppspss....sLuTllpspGSsPRpsGupM..llps-G...p.hhGolu.GGs....lEtplhppApphl..psup.....sphhpash ............t.thhtpspss....sLsTllcspGSsPR.ps.G.ApM.lVpsDG............p...hhGol.u.GGs.................lEtplhpc..A.t.....p.h.l.....tssp......sphhph..h...................... 0 243 509 653 +4974 PF03894 XFP D-xylulose 5-phosphate/D-fructose 6-phosphate phosphoketolase Wood V, Bateman A anon Wood V Family Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 [1]. This family is distantly related to transketolases e.g. Pfam:PF02779. 21.20 21.20 21.70 22.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.19 0.71 -4.68 6 1182 2012-10-02 16:07:47 2003-04-07 12:59:11 10 11 939 16 425 1121 49 168.90 47 22.78 CHANGED spsLGpYlRDll+hN..spsFRlFGPDETtSNRLpusaEVTcRtWhpphhs.s.-...l.sctGpVsEhLSEHpCEGWLEGYLLTGRHGhFuSYEuFl+lVDSMlsQHuKWLchs.p-lsWR+cIuSLNllsoSsVW+QDHNGFoHQDPGhlsslLsKKs-...llRlYLPsDANoLLAVsD+Chpo ................................................p.lGpal+-lhchN.....scs.....FRlFGPDETtSNRLt.ula-sT...s+..tW....t...p.hh.s...t.s-..thhss..sGpVh.E.LSEHt.spGaLE....GYlLTGRHG..hFuoYEuFl+llDSMhsQ....HsKWL.chs.p-.......lsWRpslsSLNhlhTSpVa+QDHNGaoHQDPGhlshlhs.K.p.s-...llRlYLPsDANoLLulh-+sh+.................... 0 108 256 360 +4975 PF03469 XH XH domain Bateman A anon Bateman A Domain The XH (rice gene X Homology) domain is found in a family of plant proteins including gene X Swiss:Q9SBW2. The molecular function of these proteins is unknown. However these proteins usually contain an XS domain that is also found in the PTGS protein SGS3. This domain contains a conserved glutamate residue that may be functionally important. 22.20 22.20 25.60 33.50 22.10 22.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -4.49 12 174 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 21 0 109 154 0 119.60 41 25.99 CHANGED KRMGcLs.csFhcAspp+hs.....tp-sp.pAs.lpshWccplp-ssWpPa+h................ssph.tEllsE-DEcL+pLKp-hGE-VYstVppALhEhNEaNsSG+Y.ssELWNa+-sRKATLcEslshhhp...phpphKR+R ..................................................KRMGELD.+sFhpAspp+hs.............tc-sp.pAs.LsShWpppl.+s..ssWHPFKhlh..................................sssptpElls--DpKLppLKp-aG--lapsVtpALhElNEY..........N.s.SG..........RYsssELWNa+EsRKAT.lcEslpalhp...phct.+p+................ 1 16 53 82 +4976 PF03468 XS XS domain Bateman A anon Bateman A Domain The XS (rice gene X and SGS3) domain is found in a family of plant proteins including gene X Swiss:Q9SBW2 and SGS3 Swiss:Q9LDX1. SGS3 is thought to be involved in post-transcriptional gene silencing (PTGS). This domain contains a conserved aspartate residue that may be functionally important. The XS domain has recently been predicted to possess an RRM-like RNA-binding domain [1] by fold recognition. 25.00 25.00 26.60 26.00 24.20 21.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.60 0.71 -4.15 12 211 2012-10-02 20:46:34 2003-04-07 12:59:11 9 16 28 2 131 198 0 117.00 28 20.35 CHANGED pc-haVWPWhGllsNsssphp.cs+..sG.uusphcpcluc..FsPhcVpsL.Wspp.GasGhuIVcFupsWsGFcsAhth-caF-tptpGK+DWtptp..........t.ptsclaGWsA+t-DYpusshl .........t.p-hhVWP.hsIlhNsstph..pt..ssp..hGhus....pclhcphsp...Fss.h.+.sp.sl....astp....GHpGhullcFspshsGappAhphccpFptpspG+c-Wtppp......................t..ttptlYGalApt-Dhp............................................ 0 16 80 114 +4977 PF04555 XhoI Restriction endonuclease XhoI Kerrison ND anon DOMO:DM04409; Family This family consists of type II restriction enzymes (EC:3.1.21.4) that recognise the double-stranded sequence CTCGAG and cleave after C-1. 25.00 25.00 71.30 35.80 20.00 19.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.14 0.71 -4.90 6 48 2012-10-11 20:44:44 2003-04-07 12:59:11 8 2 42 0 13 50 2 174.20 43 80.90 CHANGED pKQh-oG+sDpGERuGVTuGKNMDGFlsLlhsllptNGLucA-Ia.ps+shLTLPGYFRPTKLWDlLVlp.....cGpLlAAVELKSQVGPSFGNNFNNRsEEAIGTAaDlWTAaREtAFGcp..RPFlGWLMLVEDsPcS+puVp-sSPHFPVF.EF+GASYlKRY-lLCpKLlhEpLYToAslIsSs+susttGcac-hSp .........................tsGctDtGpRuuVTuGKphDGFhtLlh-llptsGlscspla..ppthhsLPGYFRPoKpWDLLVlt.....cGpLlAAlEhKSplGsSFGNNFNNRsEEAlGoAhDlaTAa+EstaGpp..tPalGalhllEDsspSppsV+..s.p.p.PH.Fsl..-F................cGsSYhcRYclhCp+LltEpLYssusllhu.t.t...........s............. 0 4 8 12 +4978 PF00193 Xlink Extracellular link domain Sonnhammer ELL, Bateman A anon Swissprot_feature_table Domain \N 21.00 21.00 21.00 21.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.40 0.72 -3.90 12 1651 2012-10-02 16:37:33 2003-04-07 12:59:11 12 106 63 14 677 1338 95 93.40 40 15.67 CHANGED GhVFahpsss...ploFpEApptChppuAplATstQL.AAacs.Ga-pCsAGWLuDtoVRYPIspPRpsCuGsp...sGVRsh.......Gh..sspppYDsYCa ..............................GhVFahpssp....ploFpEAppsCt.pp.uApl...AossQLhuAact.G.h-p.CsAGWLuDu.oV..R.YP.Isp..P..R...t..tC.uGst............sGVRoh........Gh.....ssp.p.p.a.DsYCa..................................... 1 60 115 287 +4979 PF00867 XPG_I XPG I-region Bateman A anon Pfam-B_776 (release 3.0) Family \N 22.50 22.50 22.50 22.60 22.40 22.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.27 0.72 -3.93 207 2052 2012-10-01 19:52:02 2003-04-07 12:59:11 13 37 569 21 1236 1927 247 87.50 31 14.55 CHANGED chh.Glsal.AP..hEAEAQsAhLs....ppGh......lsulho-DsDsLlF....Gss....ll+ph.....p.tth...................p.hph...tphhpt....ht..l.sp.cp....hlshslLs.....GsD ...........................t.hhGlsals....A..P..hEA-AQCA.hLt..............cp.Gh......Vsulh..op..DoDhLlF......Gss.....tllp.phs......t.t......................................................................................th.lphhph...p..cl.hpt..........hs...h.sp...pp......hlslslLhGsD.......................................................................................................................... 0 441 726 1045 +4980 PF00752 XPG_N XPG N-terminal domain Bateman A anon Pfam-B_491 (release 2.1) Family \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.15 0.72 -3.75 10 1710 2012-10-03 20:43:45 2003-04-07 12:59:11 12 35 531 21 1161 1748 220 97.30 29 15.40 CHANGED MGI+GLhslLpshu....RpsclEsapG..+pLAIDuShaLYQFLpuVRpptGsslps.....sSHLhshFpRhp+lhpaGI+PlaVFDGssP.sLKppslsKRppRRpcs .....................................MGlp.u.Lhphlpshs...................c..t.h..p..........l....c....p.hp..G.............pp..lAl..............D..u..........sha........l...a..p....h.....h.h..u...s...p...t.p.....u.p.shts.................................sa..l...h..s....hhhR.ht.p.L.l...p.h.sI.+P.l.a...........VF..DG..t..s.s...hKpppht.cRppp+t..t................................. 1 407 670 969 +4981 PF01834 XRCC1_N XRCC1 N terminal domain Bateman A anon SWISS-PROT Domain \N 20.70 20.70 20.80 21.10 18.80 20.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.13 0.71 -4.74 3 182 2012-10-03 19:46:52 2003-04-07 12:59:11 11 9 89 13 107 163 1 131.20 46 23.24 CHANGED MPEISLRHVVSCSSQDSTHCAENLLKADTYRKWRAAKAGEKTISVVLQLEKEEQIHSVDIGNDGSAFVEVLVGSSAGGu..TAGEQDYEVLLVTSSFMSPSESRSGSNPNRVRMFGPDKLVRAAAEKRWDRVKIVCSQPYSKDSPYGLSFVRFHS .............Ms.lphpcVlSh.SSpD..s.pas.s-NLLp.s.-..s.......+...p.Whss.p.s.t.p.t..p..hpV.LQLE+..t...IptlDlG.NpGuAFlpl.VGpSuhs......-psa.sLLssoohMSPs-S..+sGpN.spVRhF...ssscllpssupcpWDRl+llCoQPas+c.saGLuFl+h+..................... 0 39 46 72 +4983 PF02162 XYPPX Rhodopsin_C; XYPPX repeat (two copies) Mian N, Bateman A anon IPR000216 Repeat This repeat is found in a wide variety of proteins and generally consists of the motif XYPPX where X can be any amino acid. The family includes annexin VII Swiss:P24639, the carboxy tail of certain rhodopsins Swiss:Q17094. This family also includes plaque matrix proteins, however this motif is embedded in a ten residue repeat in Swiss:Q25460. The molecular function of this repeat is unknown. It is also not clear is all the members of this family share a common evolutionary ancestor due to its short length and biased amino acid composition. 19.00 5.00 19.00 5.00 18.40 4.90 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -6.70 0.72 -4.32 7 228 2009-09-16 16:02:34 2003-04-07 12:59:11 12 13 64 0 33 227 0 14.50 85 15.07 CHANGED GY....PPQGYPPQuhPPQ .......uY....PP..Q.G.YP.P..QG.YPPt...... 0 20 27 32 +4984 PF04690 YABBY YABBY protein Mifsud W anon Pfam-B_5698 (release 7.5) Family YABBY proteins are a group of plant-specific transcription involved in the specification of abaxial polarity in lateral organs [1,2]. 21.00 21.00 21.00 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.27 0.71 -4.01 6 396 2012-10-02 14:16:02 2003-04-07 12:59:11 8 5 153 0 111 371 33 129.80 45 77.58 CHANGED sslhsso-+lCYVpCsFCsTILAVSVP.sSLaplVTVRCGHCTsLLSlslshts....phLsu.sp................lhpshpspspshs..hhppcssosptss..h........Spsppc-hP+.....sRPPEKRQRVPSAYNRFIKEEIQRIKAsNP-ISHREAFSsAAKNWAHFP+IHFGL ..................................................................................................................................................................CsTlLA..VulP.hpp...lh..phVTV+CGHCsNL...lsht............s.................................................................t........t...t...........t..t.p.....t......................................p.tp........Pc.......hs+P.P..EK+p.RlP...SAYNRFhKEEIQRIKAsN....P-IsHREAFSsAAKNWAp......................................................... 0 20 72 95 +4985 PF03895 YadA_anchor YadA; YadA_C; YadA-like C-terminal region Bateman A, Sandt C anon Sandt C Family This region represents the C-terminal 120 amino acids of a family of surface-exposed bacterial proteins. YadA, an adhesin from Yersinia, was the first member of this family to be characterised. UspA2 from Moraxella was second. The Eib immunoglobulin-binding proteins from E. coli were third, followed by the DsrA proteins of Haemophilus ducreyi and others. These proteins are homologous at their C-terminal and have predicted signal sequences, but they diverge elsewhere. The C-terminal 9 amino acids, consisting of alternating hydrophobic amino acids ending in F or W, comprise a targeting motif for the outer membrane of the Gram negative cell envelope. This region is important for oligomerisation [1]. 21.70 21.70 21.70 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.49 0.72 -3.97 110 2384 2012-10-03 10:38:27 2003-04-07 12:59:11 10 481 927 35 313 2186 103 77.90 30 8.25 CHANGED hc+csp...uGlAuAhAhuul..Ppssh..sGphsluhGsGsYpGpsAlAlGsuths.ssp..hhh+husu..........sopussususGsuapW ..........................cpchp...uGhAuAhAhuuL....sQs.t......sup..huhGsGsYp...GpsAlAl.Gsuhhs..ssp.......hhh+husoh..........sopu..chususGsuapa............... 0 79 157 241 +4986 PF02699 YajC DUF219; Preprotein translocase subunit Mian N, Bateman A anon COG1862 Family See [1]. 21.00 21.00 21.00 21.50 20.90 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.60 0.72 -4.31 176 4454 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 4168 1 876 2416 1646 83.30 32 76.43 CHANGED sh.hht.ll...h.l.lhhslhYFllIRPQp...K+tK....c+pphlssLp+GDcVlTsGGlhGplspl..s-shlhl-lus.......s..lclclp+su.Isplh .............s.h...ll..hl.l..lhhslh.Y...Fh..hl.....R.PQ.p.....K+tK.......c+.pp....hh.suLp+GDcVlTtGGlhGpls.cl..........ccs..hlslclss........s...sclphp+s.ulspl................... 1 300 579 737 +4987 PF04073 tRNA_edit YbaK; Aminoacyl-tRNA editing domain Finn RD, Eberhardt R anon manual; Family This domain is found either on its own or in association with the tRNA synthetase class II core domain (Pfam:PF00587). It is involved in the tRNA editing of mis-charged tRNAs including Cys-tRNA(Pro), Cys-tRNA(Cys), Ala-tRNA(Pro)[2-5]. The structure of this domain shows a novel fold [1]. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.15 0.71 -4.27 91 9872 2009-01-15 18:05:59 2003-04-07 12:59:11 10 31 4219 22 2098 6622 1977 122.00 22 38.51 CHANGED p.sstoh.pchuphhs.........hsspph..sKslllcs...................tcsp...hhlslltuspclshpplpphhu....p.clphA..stccltphh.G....hthGslsPhuh.........ttl.plllDpslhshp.p..lhssuspsstplplsstphhch ................................................................ph..tph.sp.h.h.s....................hs......pph...hKoLlhps.................................................scsp.......hllsll..us.p.cls.........p.K.l....tph..h......G..............s..p..c......lphA......s.cc.l...p.p..h..s....G................hhhGuluP.lGh............................pptl..lllDps....ltp..h.....s...p.......lhsuAsp.pshplhhsstphhp.h....................................................................... 0 669 1310 1755 +4988 PF02392 Ycf4 Ycf4 Bashton M, Bateman A anon Pfam-B_1026 (release 5.2) Family This family consists of hypothetical Ycf4 proteins from various chloroplast genomes. It has been suggested that Ycf4 is involved in the assembly and/or stability of the photosystem I complex in chloroplasts [1]. 25.00 25.00 30.00 29.50 20.20 22.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.08 0.71 -4.96 59 709 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 670 0 68 562 136 177.50 64 96.24 CHANGED p.sctlhpp.IhGSRRhSNYaWAhllhlGulGFLLsGlSSYltp...............sLlsh.........hsspp.......llFlPQGlVMsFYGluGlhlShYLWhsIhWsVGuGYNc........................................FsKppGh..................................................lpIFRWGFPGKNR+IplpaslcDIpuI+l-l.....pEGlsP+R.....slYL+lKGpp-IPLTRl..GpP.lsLsElEppAAELA+FLpVslEG .........................................................................R.ScplWIE.IsGSRKhSNFhWAhILFLGSLGFLLVGhSSYLG+.........NLIsl..........hs.S.QQ......................IlFhPQGIV.MuFYGIAGLFISSYLWCTIhWNVGSGYs+........................................FD+K-.GI..................................................VCIFRW..GF..PGhNRRIFLRFhhKDIQSIRlEl.....+E..G.l..s.RR.......lLYMEI+G.pt.s..IP.LTRT..s-N..lT.REIEQKAAELAhFLRVPIE.............. 0 16 41 59 +4989 PF01737 Ycf9 YCF9; YCF9 Bashton M, Bateman A anon Pfam-B_2211 (release 4.1) Family This family consists of the hypothetical protein product of the YCF9 gene from chloroplasts and cyanobacteria. These proteins have no known function. 20.90 20.90 21.70 22.10 20.50 20.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.82 0.72 -4.53 35 1482 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 1444 17 54 419 120 57.60 81 94.87 CHANGED lFQlslhALlhlSFlLVVGVPVlaASPpsWspoKsllasGuulWluLVhlVGlLNShVs ..........AFQLAVFALIATSSlLLISVPVVF....ASPDGWSSNK..N....VVFSGTSLWIGLVFLVGILNSLIS......... 2 11 34 46 +4990 PF03795 YCII YCII-related domain Yeats C anon Yeats C, Bateman A Domain The majority of proteins in this family consist of a single copy of this domain, though it is also found as a repeat (Swiss:Q9AJZ7). A strongly conserved histidine and a aspartate suggest that the domain has an enzymatic function. This family also now includes the family formerly known as the DGPF domain (COG3795). Although its function is unknown it is found fused to a sigma-70 factor family domain in Swiss:Q9A8M4. Suggesting that this domain plays a role in transcription initiation (Bateman A per. obs.). This domain is named after the most conserved motif in the alignment. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.94 0.72 -3.79 26 4283 2012-10-02 00:20:33 2003-04-07 12:59:11 9 20 2247 3 1546 3526 595 97.20 23 86.30 CHANGED Mhasllshspsssh.....ppptshhssHlsaLcpLtppGthlsu................GsthshDG..st.....GGhhlh-ssshspApphupsDPhsps..Gshpth.lh.......pap ...............................................................hahl.h.h...t..p..sst.................ppptshh..s.sHh...shl..p....p...l....t....cpG..p.llsu..............................Gs...h..s...s...s..D....u..........s.....t........t......hG.uhh.l.h..c.sp...o...h....-...p....A...p....thupp...DP..a..s..ts....G...h.h..p.....h.lh.h............................. 1 496 968 1287 +4991 PF02182 SAD_SRA G9a; YDG_SRA; SAD/SRA domain Iyer LM, Aravind L, SMART anon Alignment kindly provided by SMART Domain The domain goes by several names including SAD [1], SRA [2] and YDG [3]. It adopts a beta barrel, modified PUA-like, fold that is widely present in eukaryotic chromatin proteins and in bacteria [4]. Versions of this domain are known to bind hemi-methylated CpG dinucleotides and also other 5mC containing dinucleotides. The domain binds DNA by flipping out the methylated cytosine base from the DNA double helix [5].The conserved tyrosine and aspartate residues and a glycine rich patch are critical for recognition of the flipped out base [4][5]. Mammalian UHRF1 that contains this domain plays an important role in maintenance of methylation at CpG dinucleotides by recruiting DNMT1 to hemimethylated sites\ associated with replication forks [2]. The SAD/SRA domain has been combined with other domains involved in the ubiquitin pathway on multiple occasions and such proteins link recognition of DNA methylation to chromatin-protein ubiquitination [4]. The domain is also found in species that lack DNA methylation, such as certain apicomplexans, suggestive of other DNA-binding modes or functions [4]. A highly derived and distinct version of the domain is also found in fungi where it is fused to AlkB-type 2OGFeDO domains [6]. In bacteria, the domain is usually fused or associated with restriction endonucleases, many of which target methylated or\ hemi-methylated DNA [4]. 20.50 20.50 20.90 21.10 20.20 20.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.06 0.71 -4.92 34 554 2012-10-08 19:26:47 2003-04-07 12:59:11 12 47 169 32 371 572 41 149.00 37 23.53 CHANGED p+clGslPGlpVGDhFhaRhEhsllGlHtsphuGIDhhpsps..........hAsSIVsSGGY-.DDpDsuDsllYTGpGGps.t.tpp.....pspDQcLpcGNLALtpSh...............cptssVRVlRGhcptpp.........psthYhYDGLYpVpcaWh-pGpsG.htVa+a+LpRh...sGQ .......................................................................t.phhG.lPG..l.VGshahhRhphs.sGlHtshh....u....G..I.cttssps....................................AhSlVh....u......GG...Yc...DDh.D.pG-.hhYTGpG...Gps.hs.s.s+...................pspDQpL.....p.p..s.NhALthsh................c.pspPVRVlRuh+stpp...................hts.pts..hpYDG..lY+........VhchW...-h.....G..pp..G...hhVa+ahLhR.ss...................................... 1 105 204 288 +4992 PF04794 YdjC YdjC-like protein Waterfield DI, Finn RD anon Pfam-B_5925 (release 7.5) Family Family of YdjC-like proteins. This region is possibly involved in the the cleavage of cellobiose-phosphate [1]. 24.00 24.00 24.30 24.20 21.80 23.90 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.77 0.70 -4.92 114 1651 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 1420 8 307 1052 228 240.90 32 92.26 CHANGED +cLIlsADDFGlotuVNtuIlcuacpGlloSsolMsssPuh.pcAsphs+.p..ts.....tls.lGLHlsLos..GhP.l....h...pthssLlst..............p.GhFhphh........hthh.tphshcpltpElpAQlcpFhs.hGhsssHlDuHpHsH.hhPslhsslhplu.pchsl.....sh.Rh......shp.h................................ht.hstthptphtptGlshss.h...............hhthlhphhpphsp.t..................ssElMs.HPuhh......sst.lhshssh.....stpRtpEhphLsuspltthlppp ....................h.hLIlNADDFGLocuhNhGIlcuhcsGl...loSTohhsNssuh..c....cA.....l...p....Lu+....c....hP.................sLs..lGhHhsLTh..Gc.....P..l............sthPuLsc...............s.G..hhtchh...............p......hh...........p....s.s...l..s.h--l...ppEltuQhp+Fhp..hGt.pPoHlDoH.+H....l....H....hhP.p...lhsllhc.....hA...tchul..........sh..Rh............stpsh................................h.h............lps.spthpstah...sp.slspst.............................hhphLpphhpp.htp............................shElMs.HPAal..............-p..lhp..p.S.ua........shsRhpELclL.sSsplpthltp.p................................................................................... 0 103 183 242 +4993 PF00399 PIR yeast_PIR; Yeast PIR protein repeat Finn RD anon Prosite Repeat \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.97 0.73 -6.38 0.73 -4.44 17 804 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 57 0 500 869 0 17.80 68 21.99 CHANGED suAlSQIsDGQlQAToss ...tuAVSQIuDGQIQATTpT...... 0 82 223 407 +4994 PF03366 YEATS YEATS family Mifsud W anon Pfam-B_2273 (release 6.6) Family We have named this family the YEATS family, after `YNK7', `ENL', `AF-9', and `TFIIF small subunit'. This family also contains the GAS41 protein. All these proteins are thought to have a transcription stimulatory activity 22.00 22.00 22.10 22.20 21.10 21.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.94 0.72 -4.26 63 798 2009-09-11 12:00:18 2003-04-07 12:59:11 11 14 314 7 532 728 2 83.10 35 19.08 CHANGED oHpWslal.........cshss.....................c.c..h.uphlcKVpFpLH.toFss.Ph.RslcpP....PFclsEpGWGEFpltIclaahs......ptp.hsl.HpLplpttshp .......................THpWp..VaV.........+shpp....................................................................................p..c...l.stalcKVhFpLH...Sass.Ph........R.s......l...ccP..........P...FclsEsGWGEF.l.IclaFts......pt+.lslhHpLpLt.pt..s........................... 0 160 267 417 +4995 PF03543 Peptidase_C58 YerHae_surfAg; SurfAg; Yersinia/Haemophilus virulence surface antigen Griffiths-Jones SR anon PRINTS Family \N 22.90 22.90 23.10 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.42 0.71 -4.83 14 182 2012-10-10 12:56:15 2003-04-07 12:59:11 9 21 106 1 40 172 1 170.80 19 15.90 CHANGED Rsoh...ucaGscsshhhsts..............ts........GlC.uLsAcW....lp.css..spshhspLh.uut.cGp...hphpphhshpphphc..tsttsptph.................phppshLpctGlpPptc.hsht................tsupsshsshlpsIhcsGspphhsh+h....sphuuHslAstsp.spp.lsFFDPNaGEFohsscpp.....tsshapp.hhphpptspphhpllshchp ............................................................................................................................................................................................................................................................................................thp..tpshstllptlh.cssu.pshhs.hh......hptsuHAhAs...h...s...........p...sp....p...h......sFFDPNh..GthpFsstc....pFtta.hpta.htp.h....................t.................................... 0 12 23 31 +4996 PF03545 YopE Yers_vir_YopE; Yersinia virulence determinant (YopE) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 26.00 59.50 23.40 22.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.14 0.72 -4.03 8 184 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 159 8 7 96 0 69.90 55 15.70 CHANGED EpLt+-HspLAoGNGuLRSLsTuLpGIpcGSphpphps.AupLL-psluGIsLQQWGTsGGpAochVsSA ..poLpcNapchASGNGPLRSLhTsLQsLschscscpLp-hsosLhNhplGuhtFSQWGT...sGGsspcalscA............. 0 2 3 4 +4997 PF03887 YfbU YfbU domain Bateman A anon COG3013 Domain This presumed domain is about 160 residues long. It is found in archaebacteria and eubacteria. In Swiss:Q9EUM2 it is associated with a helix-turn-helix domain. This suggests that this may be a ligand binding domain. 25.00 25.00 31.00 29.70 24.70 24.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.07 0.71 -4.69 4 723 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 710 16 68 261 3 163.40 73 97.45 CHANGED MEMTssQRLILoNQYpLMshLDs......tNAp+Y+RLpsIlEpGYuLch+ELs.+EFusLsEpEC+pllDIhEMY+AlpsShssLsD......ppslsp+RloFhGFDu..spEu+hlsYVRFlVssE.GcYpcF.tsEHthNSQsPMhsKYpRML....ssW+sCP...+pYHLSssEIppIlNA ..................................................MEMTNAQRLILSNQYKMMTML.DP...........sNAER......YR.RLQ.TIIERGYGL..QM.R.ELD..RE.F.G.ELpE.ETCRTIIDIMEMYHALHVSWoNLpD..........pQuID...E......RRV......TFLGFDA..ATEARY..LGYVRF.MVNl.....E..G..RY...TH..FD..AGT..H.GFNAQTPMWEKYQRML....sVWHACP.........RQY....H.LSANEINQIINA......................... 0 11 24 46 +4998 PF02542 YgbB YgbB family Mian N, Bateman A anon COGs Family The ygbB protein is a putative enzyme of deoxy-xylulose pathway (terpenoid biosynthesis) [1]. 25.00 25.00 26.20 26.40 24.80 22.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.78 0.71 -4.40 167 3444 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 3393 163 843 2387 2171 155.90 48 76.19 CHANGED hRlGhGaDVHph........................spG......ct.LhLGGVcIPa..p...........p.GLhGH..SD..uDVllHAlsDALLGAs...uhGDIGpaFPso.Dsp.....aKsssSthLLpcshphlpppGa.plsNlDsTl.luppPKluPa.hstM+pslAphLslshspVslKATTsEpLGFsGRtEGIAAhAls..Llhp.t ..........hRlGpGaDVHph..................................sts.cs...lllGGVcIPa..c......................p..GLluH..SD..uDVhlHAlsDALLGAs...uLGDIGchFPDT.Dsp.....aKGA-SttLLccshc.hlpp........p..G..a.pluNlDsTl.IAQtPKhs.......P+.lspM+tslAcsL.sh.........s.h.sp..VsVKATToE+.L.GFsGRtEGIA.u.pAlsLLhc.t............... 0 298 571 724 +4999 PF02325 YGGT YGGT family Bashton M, Bateman A anon Pfam-B_983 (release 5.2) Family This family consists of a repeat found in conserved hypothetical integral membrane proteins. The function of this region and the proteins which possess it is unknown. 21.70 21.70 21.70 21.80 21.40 21.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.62 0.72 -4.04 170 4945 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 3567 0 1081 2737 2206 77.70 26 80.00 CHANGED lphh..lsl.ahhllllps...llSWl........s.....sshs....phlhplT-PlLpPlR.....Rll.P...sl.Gu......l..DhSPllshlllphlphhl.hth .........................hllplahhllllph...lhSWhss........shp.....sshs.........phlhplo-Pll.t..Pl...R.....Rll..P....sh..Gs.........l..DhS..sllhhllLthlphhlh..h........................... 0 315 665 900 +5000 PF04945 YHS YHS domain Bateman A anon Bateman A Domain This short presumed domain is about 50 amino acid residues long. It often contains two cysteines that may be functionally important. This domain is found in copper transporting ATPases, some phenol hydroxylases and in a set of uncharacterised membrane proteins including Swiss:Q9CNI0. This domain is named after three of the most conserved amino acids it contains. The domain may be metal binding, possibly copper ions. This domain is duplicated in some copper transporting ATPases. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.33 0.72 -4.11 23 1534 2012-10-03 05:12:49 2003-04-07 12:59:11 8 45 1045 41 530 1375 156 45.40 29 10.53 CHANGED sp-Pssuhpl....sphphcYpG+tYaFsS-sCcttFcp-PEcYhstt .....................DPVsuhtl.....t..p.ut....h..p.....h.p...YpGp.pY..aFCSppCtppF.ctcPpcYl........... 0 154 311 444 +5001 PF03755 YicC_N YicC_N-term; YicC-like family, N-terminal region Finn RD anon Pfam-B_3743 (release 7.0) Family Family of bacterial proteins. Although poorly characterised, the members of this protein family have been demonstrated to play a role in stationary phase survival [1]. These proteins are not essential during stationary phase [1]. 25.00 25.00 25.10 28.20 24.20 24.40 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.01 0.71 -4.30 185 2490 2009-09-10 21:41:45 2003-04-07 12:59:11 8 3 2470 0 578 1769 632 155.00 32 53.65 CHANGED lpSMTGFu+spt......pss.t.............tphshEl+SVNpRaL-lphRLPcthp.slEstlRchlppp.lpRG+V-splphpts..st.sstslp..........l.Npslhpphhpthpp.lppph..th....t.sls....hsp....lLphsuVlp........pp.t-...-t.pth....lhpshcpAlsplhphRpcEGppLtt. ............lpSMTuauRtch.....psp..h................................tphshElRSVNpRaL-hhhRLP.cp.hc.slEsslRctlppp.LsRGKV-splphcts...st..spsplt.........l.NppLscpllpthpp...lptph..st.......spls....hsc......lLc...h.P.GVhs.............sp.t.p-h..-shpt.......lhsulcp...........ALcshhssRppEGptLpt...................... 0 207 385 486 +5002 PF03853 YjeF_N YjeF-related protein N-terminus TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Domain YjeF-N domain is a novel version of the Rossmann fold with a set of catalytic residues and structural features that are different from the conventional dehydrogenases [1]. YjeF-N domain is fused to Ribokinases in bacteria (YjeF), where they may be phosphatases, and to divergent Sm and the FDF domain in eukaryotes (Dcp3p and FLJ21128) [1], where they may be involved in decapping and catalyze hydrolytic RNA-processing reactions [1]. 24.90 24.90 24.90 25.00 24.70 24.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.04 0.71 -4.65 82 3610 2012-10-10 17:06:42 2003-04-07 12:59:11 10 26 3275 37 1227 3081 755 164.50 29 35.13 CHANGED shhLMEpAupussc.hlpphh......................sptppllllsGsG......NNGGDGlssARhL..hptGhp.Vpl..........hhhtt.t.phssssppthphhpphs.......hphhp.tsttt...........................sclllDAlhGh.................Ghp.tslcs...t.....htpll.....p..tlNps.......psh.llulDlPSGl..sss.oGps...............s.s.......lp.AchTlohttsK.u ............................................h..tLMcpAGtu....ssp.hhtpth.........................................................................spsppl.lllsGs.G.............NNG..........GDGhlsARh..L...t.t...t..G.hp..Vsl......................hh.ht.s.....p.......hspp.....st.......t.sh.p.t..hp.p...h.u.................st..h...h...s.....t.....h............................................................................s-lllDAlhGh....................................G..lp.....tslcs........s.........hspll.............p..tlNpp.....................................sss..llAlDlP.S..Gl.....suc..oGts.........................uss........................lp.AshTlohtshK.s.................................. 0 398 764 1032 +5003 PF03739 YjgP_YjgQ Predicted permease YjgP/YjgQ family Bateman A anon COG0795 Family Members of this family are predicted integral membrane proteins of unknown function.\ They are about 350 amino acids long and contain about 6 transmembrane regions. They are predicted to be permeases although there is no verification of this. 27.40 27.40 27.50 28.60 27.30 27.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -5.71 131 5380 2012-10-03 05:18:07 2003-04-07 12:59:11 9 5 2456 0 1233 3656 4737 347.90 20 93.70 CHANGED RYlhcphlhshlhshhslhhlhhhh.phlphlsph.hpts..ls................hhtllphhh.htlPthlh.hhlP..luhlluslhshupLspssElsulpuuGlShhclhhPhlhhulhlslhshhhsth.lhP...huppphpplhpphhppshtt...........hpsshahptss.................hhlalp..phssssp......hpsl..hlhch...........pp..tshtp..........................................................hl..tAcpuphp.........stthhLpsuphhphstt.....................................................................phphhphpphphphs.hpspphpthttp.......................................................................pphsh.........................tpL...................t.hp.h......pttshs..htphp.hchap+luhPlsslhhslluhs..huhtt..Rtsptts.......hhhulllhhh.aahltthspshutp...shl.ss...hl..usahssl.lhhhlu...hh......lhh+ ...............................................................................+Ylh+phhts....hhh.s.hh.hLlsl.h.hh...h.....p......h.l.c....ls...ph...spus......hs....................h.t.h.s..l.t.hhh.hplPphl....hhlPluhhluslhsLup.LspcSElslhpAs.GhShhplhtss..hhhu.lhls.llshhlsta.lsP....hupppt.p......ph...h.s.p.tt.spssht.................th.p..p..u...h..at.p.pss...............................phla.lc....p..l..s.s..stt......................hpsl..hlhpht.......................ps....tpht..s.......................................................................lh..hAcpup.hp......................sphh.hLpsspph....ph..s.st...............................................................................phphpph...pp..h.p..ht...hs....hps..p..tl...s..s.h..t..h.t.....................................................................................................................................................................................................cphsh...........................................tpL............................................................................t..h..tttsps........ssphp..hp.hap+.lshPl.ss.hhhsLluls....h..u..h..ss....Rp.u.phhs............llsull..lh..hh..aahl.tp.hht.s.h.u..tt.....stl..ss................hl...uhhhssh..lhhh..lu..lh.Lh...................................................................................................... 0 387 789 1034 +5004 PF02326 YMF19 Plant ATP synthase F0 Bashton M, Bateman A anon Pfam-B_984 (release 5.2) Family This family corresponds to subunit 8 (YMF19) of the F0 complex of plant and algae mitochondrial F-ATPases (EC:3.6.1.34). 21.80 21.80 21.80 22.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.24 0.72 -3.41 23 331 2012-10-02 21:03:42 2003-04-07 12:59:11 10 3 267 0 25 589 973 79.00 31 61.04 CHANGED PQLDphTahoQFFWLslh......FhshYl..hlh......pphLPtls+ILKlRpphhpp.h...splppppp...httcshltcs.h..................hshl.pol ...........PQLD..ph..T..YaoQaFWhhlh......hhshYl..hls..........t.hlstIsclLKlRpph.l.ppp.........................................................h.................................................................................................................... 0 9 15 18 +5005 PF01514 YscJ_FliF Secretory protein of YscJ/FliF family Bateman A anon Pfam-B_736 (release 4.0) Family This family includes proteins that are related to the YscJ lipoprotein, and the amino terminus of FliF, the flageller M-ring protein. The members of the YscJ family are thought to be involved in secretion of several proteins.\ The FliF protein ring is thought to be part of the export apparatus for flageller proteins, based on the similarity to YscJ proteins [2]. 23.90 23.90 24.10 24.00 23.50 23.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.24 0.70 -11.00 0.70 -5.17 21 3384 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 2156 28 653 2296 453 200.40 28 45.09 CHANGED hpthpth.........hslhhhl..hlhsuspt.......tpLYsuLspp-uNcllshLtptsIsscppssssu....lhVspsphspAhhlLsspGL.P+pshsshtplFspsuLlsoshpEps+hhhAlppELucTlspIcGVhsARVHlsLPcc.sshscsspPsSASVhl+hpsshsls.s.lssI+pLVssSlsuLsh.....-ploVl.sstt.....spsspsh ............................................................hhh..............h....hhlsl..h..lhl.....hhh...s...t.ss..p...........hpsL.as.s..L..spp-..u...s..pllshLppt.sIs.achp...s........s.........us........s........lhV.sts.c.ht.p.....s.+h.h.Lsp.pG.L..P..p...s...s.t.s.uh....plhs..p.p.sh.u.sSphtE..psp....hppAlEtELs+.oI.p.sl.ss..VpsARVH.l........u...hPcp...sha.....s...c....p..p..p...s...s..oA.SVhlp.h.p.s...u.ts...Ls...tp.lsuI...hpLVuuuVsuLs.......psVollDpsu.phls...t.....stt.................................................. 0 201 389 515 +5006 PF04650 YSIRK_signal YSIRK type signal peptide Bateman A anon Pfam-B_3441 (release 7.5) Motif Many surface proteins found in Streptococcus, Staphylococcus, and related lineages share apparently homologous signal sequences. A motif resembling [YF]SIRKxxxGxxS[VIA] appears at the start of the transmembrane domain. The GxxS motif appears perfectly conserved, suggesting a specific function and not just homology. There is a strong correlation between proteins carrying this region at the N-terminus and those carrying the Gram-positive anchor domain with the LPXTG sortase processing site at the C-terminus. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.77 0.72 -6.95 0.72 -4.37 66 6800 2009-01-15 18:05:59 2003-04-07 12:59:11 12 627 727 0 247 4894 0 26.50 41 2.13 CHANGED tp+pp+YSIRKhs.lGsuSVhlusslhh ........p+pp+aSIRKho.lGsuSVlluohlhh...... 1 29 57 149 +5007 PF02295 z-alpha Adenosine deaminase z-alpha domain Mian N, Bateman A anon Pfam-B_11136 (release 5.2) Domain This family consists of the N-terminus and thus the z-alpha domain of double-stranded RNA-specific adenosine deaminase (ADAR), an RNA- editing enzyme. The z-alpha domain is a Z-DNA binding domain, and binding of this region to B-DNA has been shown to be disfavoured by steric hindrance [1]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.91 0.72 -4.20 8 330 2012-10-04 14:01:12 2003-04-07 12:59:11 12 22 95 39 79 339 3 62.40 29 16.39 CHANGED stss.cpplLsaLcplG.spssTAhALshplsh.K+-IN+hLYsLp+pGclp+psGsPPlWplssts ....................tp.pphlhphLpphu.tpsspslshshtLs.h..K+-lN+sLYcLp+pupV.h.p..ps.s.p.PPhWhls...t.......... 0 14 20 32 +5008 PF01559 Zein Zein seed storage protein Bateman A anon Pfam-B_181 (release 4.0) Family Zeins are seed storage proteins. They are unusually rich in glutamine, proline, alanine, and leucine residues and their sequences show a series of tandem repeats [1]. 24.60 24.60 26.10 24.60 21.30 23.80 hmmbuild --amino -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.11 0.70 -5.12 9 325 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 7 0 22 337 0 134.90 41 97.79 CHANGED IIPQCSLAP.uAIIPQFLPPVTSMGFEHPAVQAYRLQQALAASlLQQPIAQLQQQSLAHLTIQTIAsQQ.......QQQQ.FLPALSpLAssNPVAYLQQQLLASNPLALANssAYQQQQQLQQFLPALSQLAMVNPAAYLQQQQLLSSSPLAVuNAsTYLQQQLLQQIVPALo.QLAVANPsAYLQQ..LLPFNQLsVoNSAAYLQQRQQL..lNPLsVANPLVAAFLQQQ..QLLPYNQhSLMNPALShQQPIVGGAIF .......................................................................................................................................................................................................................................................................................................hp.QlshhN.....suY..QQ....hLP..FsQLss.s..ssuaLpQ.Q......h......s.....ssh......p..h....h................................ 0 0 22 22 +5009 PF01754 zf-A20 A20-like zinc finger SMART anon SMART Family The A20 Zn-finger of bovine/human Rabex5/rabGEF1 is a Ubiquitin Binding Domain [5-6]. The zinc finger mediates self-association in A20. These fingers also mediate IL-1-induced NF-kappa B activation. 21.60 21.60 22.20 21.60 21.50 21.40 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -6.89 0.72 -4.35 53 982 2009-09-21 14:31:35 2003-04-07 12:59:11 11 20 182 26 514 896 0 25.10 43 10.59 CHANGED shhC.psGCGFaGsstspshCS+Ca+ .......hhC.tsuCGFaGsstspshCScCa+... 0 109 204 332 +5010 PF01428 zf-AN1 AN1-like Zinc finger Bateman A, SMART anon SMART Family Zinc finger at the C-terminus of An1 Swiss:Q91889, a ubiquitin-like protein in Xenopus laevis. The following pattern describes the zinc finger. C-X2-C-X(9-12)-C-X(1-2)-C-X4-C-X2-H-X5-H-X-C Where X can be any amino acid, and numbers in brackets indicate the number of residues. 23.60 23.60 23.60 23.70 23.50 23.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.21 0.72 -4.08 181 1823 2009-01-15 18:05:59 2003-04-07 12:59:11 11 43 398 8 1124 1782 126 42.70 34 20.23 CHANGED Csh..Cpptsh..h....shp...Cp.CstpFCtpHRh.-sHsC........sthhptps .........................Csh..Ccppsh....h....shp.......Cc..CsphFC.t.pH.Rh.s......-sHsC.shshpt..s....................... 0 351 592 859 +5011 PF00096 zf-C2H2 Zinc finger, C2H2 type Bateman A, Boehm S, Sonnhammer ELL, Gago F anon Boehm S Domain The C2H2 zinc finger is the classical zinc finger domain. The two conserved cysteines and histidines co-ordinate a zinc ion. The following pattern describes the zinc finger. #-X-C-X(1-5)-C-X3-#-X5-#-X2-H-X(3-6)-[H/C] Where X can be any amino acid, and numbers in brackets indicate the number of residues. The positions marked # are those that are important for the stable fold of the zinc finger. The final position can be either his or cys. The C2H2 zinc finger is composed of two short beta strands followed by an alpha helix. The amino terminal part of the helix binds the major groove in DNA binding zinc fingers. The accepted consensus binding sequence for Sp1 is usually defined by the asymmetric hexanucleotide core GGGCGG but this sequence does not include, among others, the GAG (=CTC) repeat that constitutes a high-affinity site for Sp1 binding to the wt1 promoter [2]. 20.80 14.10 20.80 14.10 20.70 14.00 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.01 0.73 -7.19 0.73 -3.30 194 44392 2012-10-03 11:22:52 2003-04-07 12:59:11 21 6603 1241 52 26561 243772 896 23.40 34 6.87 CHANGED apCp...tCs...csFsppspLppHhpp...H ..............apCp.........tCu........KsF.s.p..p.s..s.L..ppHhch..H.............. 0 6055 9206 17573 +5012 PF00105 zf-C4 Zinc finger, C4 type (two domains) Sonnhammer ELL anon Prosite Domain In nearly all cases, this is the DNA binding domain of a nuclear hormone receptor. The alignment contains two Zinc finger domains that are too dissimilar to be aligned with each other. 21.10 18.00 21.10 18.90 21.00 17.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.87 0.72 -3.74 26 7638 2009-11-03 19:32:23 2003-04-07 12:59:11 13 91 562 105 3458 7043 2 66.30 47 14.76 CHANGED phCtVCGD+ASGhHYGlhoCEGCKGFF+Rolppshp..YsCttsps..ChIDKppRspCQhCRh+KClpVGMs+ ........................t.hChVC.G...D...c....u..o....G..h.....H.YG.......l.h...............o..CE.......G.........C.....K..............u.FF.....+.........R...........o.l....p...........p.........p.hp...........Y.p.....C....t...t.....s.......p...s.............C................I......s...........+................p.........p....R.....p..........p........Cp.h..CRh.pKClp.s.GMp........................................ 0 862 1129 2576 +5013 PF01396 zf-C4_Topoisom Topoisomerase DNA binding C4 zinc finger Bateman A anon Pfam-B_1854 (release 3.0) Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.19 0.72 -4.45 27 10157 2012-10-03 10:42:43 2003-04-07 12:59:11 14 102 3740 0 1974 7084 1633 39.50 35 11.65 CHANGED stpCPcCG.upllh+pu+hG...pFlGCoNaPcCcasp.hppp .................pCP..p..C......G....u.....p.....h.ll......+.p....u.....+hG.........hF.h..uCos..Y.....P....c....C.chsp.h...t................ 0 613 1165 1614 +5014 PF02928 zf-C5HC2 C5HC2 zinc finger Bateman A anon [1] Domain Predicted zinc finger with eight potential zinc ligand binding residues. This domain is found in Jumonji [1]. This domain may have a DNA binding function. 21.00 21.00 22.10 21.50 20.80 20.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.10 0.72 -3.90 52 591 2009-01-15 18:05:59 2003-04-07 12:59:11 11 55 220 0 364 545 6 54.70 37 4.03 CHANGED CphC+shsaLStlsCp.sps.schsCLpHhpplC.sC...sssp...........psLhYRash--Lpshl .........CthC+ssCaLSulsCp.s.ps..s.......pllCLpHs..pcLC...sC...sspc...........ppL.hYRYol--L.shh........... 0 88 162 259 +5015 PF01807 zf-CHC2 CHC2 zinc finger Bateman A, Griffiths-Jones SR anon Pfam-B_755 (release 4.2) Domain This domain is principally involved in DNA binding in DNA primases. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.25 0.72 -4.44 19 5345 2012-10-03 10:42:43 2003-04-07 12:59:11 15 58 4422 3 1153 4188 2239 96.10 38 16.85 CHANGED lspphIspltpphDIV-llupY.VpLKKpGcs.ahuhCPFHsEKTPSFoVsspKpaY+CFGCGsuGssIsFlMchcplsFs-AlccLActhslclshpps ........................................h.spphIpplhs..p...s..s..I.l.-lls...p..h....V.....p....L....+....K....p.....G.p.....s.....a........h.u.h..C..P...F...H...s.....E...K.....T..P...S..Fs..V....s.....s...........p......K.....p....h...aHCF...G...C...G....t.u.G...s.s.lsF......l.hc.h.....-......p.l.......s.F.s..E.AVcpLApphslpls...t.................................. 0 413 797 991 +5016 PF05207 zf-CSL CSL zinc finger Wood V, Bateman A, Mistry J anon Pfam-B_12353 (release 7.7) Domain This is a zinc binding motif which contains four cysteine residues which chelate zinc [1]. This domain is often found associated with a Pfam:PF00226 domain. This domain is named after the conserved motif of the final cysteine. 29.20 29.20 29.20 29.60 28.50 29.10 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.31 0.72 -4.46 63 560 2009-09-14 12:18:22 2003-04-07 12:59:11 8 13 302 5 396 552 5 58.50 38 39.82 CHANGED --lcl-Dhpa.cp...ppp...h..ahasC..CGDt..Fplotp-......................Lpcs..............-.lspCsoCSLhl+Vlas ....................................-clclEDhpa...-p.......spp.....h.....ahYPC..CGDp..Fploc--.......................................Lcps..........................-plss..CsoCSLhl+Vla.................... 0 126 209 321 +5017 PF05180 zf-DNL DNL zinc finger Wood V, Bateman A anon Pfam-B_9925 (release 7.7) Domain The domain is named after a short C-terminal motif of D(N/H)L. This domain is a novel zinc-finger protein essential for protein import into mitochondria [1]. 20.90 20.90 21.20 21.70 20.00 20.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.42 0.72 -4.26 36 372 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 277 1 263 362 4 65.70 43 36.22 CHANGED h.lsFTC........phCspRSs+phSKpAYp+GsVllpCPuCcN+HLIADpLuhFt.D..pch.....slE-lltpcG-pl ..............................h.lsFTC..........psC.s..pR.Ss.+phSKpAYc+Gs.VllpCsGCpsc.......HlIADpLshFp...-.......tch...........slE-lltt+Gcp........................ 0 85 151 222 +5018 PF02701 zf-Dof Dof domain, zinc finger Mian N, Bateman A anon Pfam-B_1250 (release 5.5) Family The Dof domain is a zinc finger DNA-binding domain, that shows resemblance to the Cys2 zinc finger [1]. 26.60 26.60 27.00 27.10 26.40 26.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.27 0.72 -4.05 12 858 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 68 0 436 848 5 61.10 65 20.51 CHANGED pssphltCPRCcSssTKFCYYNNYslsQPRaFCKsCRRYWTcGGoLRNVPVGGGsRKsKpsuo ...........p..pphLpCPR.Cs.S.h.s.TKFCYYNNY....slo....QPRHFCKs..C+RYWTcGGoLRNV...PVGG..GsRKsKpt..t.................. 0 71 291 367 +5019 PF04770 ZF-HD_dimer ZF-HD protein dimerisation region Kerrison ND anon Pfam-B_2002 (release 7.6) Family This family of proteins has are plant transcription factors, and have been named ZF-HD for zinc finger homeodomain proteins, on the basis of similarity to proteins of known structure [1]. This region is thought to be involved in the formation of homo and heterodimers, and may form a zinc finger [1]. 19.40 19.40 22.20 22.50 18.80 18.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.51 0.72 -3.91 36 349 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 39 0 213 332 0 58.20 56 24.97 CHANGED sssspY+ECLKNHAAulGGHAlDGCGEFMsu.utp.ss........ssuL+CAACGCHRNFHR+Espsss ................t...s+Y+EChKNHAAulGGa.AlDGCGEFMsu.Gp-...Go.....................suL+CAACGCHRNFHR+Esp..s............. 0 27 141 179 +5020 PF01422 zf-NF-X1 NF-X1 type zinc finger Bateman A anon Bateman A Family This domain is presumed to be a zinc binding domain. The following pattern describes the zinc finger. C-X(1-6)-H-X-C-X3-C(H/C)-X(3-4)-(H/C)-X(1-10)-C Where X can be any amino acid, and numbers in brackets indicate the number of residues. Two position can be either his or cys. This family includes Swiss:P40798, Swiss:Q12986 and Swiss:P53971. The zinc fingers in Swiss:Q12986 bind to DNA [1]. 21.30 21.30 21.50 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.30 0.72 -3.96 21 469 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 145 0 254 497 0 21.40 56 4.43 CHANGED CG.....HpCpphCH..GsC.s....C.p ...........CG............HpCpphCHt.GsCtP.....C.p.......... 0 57 98 163 +5021 PF00645 zf-PARP Poly(ADP-ribose) polymerase and DNA-Ligase Zn-finger region Bateman A anon Prosite Domain Poly(ADP-ribose) polymerase is an important regulatory component of the cellular response to DNA damage. The amino-terminal region of Poly(ADP-ribose) polymerase consists of two PARP-type zinc fingers. This region acts as a DNA nick sensor. 21.10 21.10 21.50 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.07 0.72 -3.74 54 620 2009-09-11 00:32:30 2003-04-07 12:59:11 13 68 222 32 406 653 6 80.80 30 14.72 CHANGED EYAKSuRusC+...pCp...pcItKsplRluphlpssth..........hthhpWaHhsChhtcthp.............tshsslp..Gacp.....Lp...-DQcc....l+ctlp ...................................EYAKouRu.sCK....tCp....ppI...t.Ksp..l.Rluth....s.ssht.................uth.pWaH..h..sChhppttph.........................tshsplc.Gapp.....Lp...-Dpcplpctl.t................................................ 0 154 220 329 +5022 PF00641 zf-RanBP Zn-finger in Ran binding protein and others Bateman A anon Prosite Domain \N 23.80 17.00 23.80 17.00 23.70 16.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.50 0.72 -4.84 24 3729 2012-10-03 10:42:43 2003-04-07 12:59:11 13 208 314 29 2137 3538 147 29.60 34 7.44 CHANGED +tG.pWpC..ssCs.hpNpspss+ChtCpus+.s ............u.sWpC.......ss..Cs.hhNh.spp..s.p.C...t..Cpss+............ 0 545 976 1540 +5023 PF02135 zf-TAZ TAZ zinc finger De Guzman R, Mian N, Bateman A anon IPR000197 Family The TAZ2 domain of CBP binds to other transcription factors such as the p53 tumour suppressor protein, E1A oncoprotein, MyoD, and GATA-1. The zinc coordinating motif that is necessary for binding to target DNA sequences consists of HCCC. 19.00 19.00 19.00 19.10 18.60 18.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.14 0.72 -10.63 0.72 -3.97 22 729 2009-01-15 18:05:59 2003-04-07 12:59:11 11 57 134 16 456 660 8 76.50 38 8.52 CHANGED hplpchL.hLlHApp..Cps.t...............spCsh..pCtph+pllpHhpsCpttp...hstChts+pllp....Hh+pCpctc.CsVshs ...............................t..lQppl.hLlHApp.....Cpp..................................ssC.s.l....PpCppMK....pllpHh.ppC............p....t...pp...................sut..Cs.s+p..llu............Hh...KpC....p...c......p....c....CsVshs................. 0 164 240 349 +5024 PF02953 zf-Tim10_DDP Tim10/DDP family zinc finger Bateman A anon Pfam-B_1207 (release 6.4) Domain Putative zinc binding domain with four conserved cysteine residues. This domain is found in the human disease protein Swiss:O60220. Members of this family such as Tim9 and Tim10 are involved in mitochondrial protein import [1].\ Members of this family seem to be localised to the mitochondrial intermembrane space [2]. 19.80 19.80 20.20 20.10 19.70 19.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.70 0.72 -4.68 113 1455 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 335 20 1012 1331 11 64.70 25 64.21 CHANGED .phlttEtphtphpphhs...clscpCacKClsp.......ssupLs....psEpsClspCss+ah-ssttlupphppp .............................h.tptphpph...t..phhs........pls..ct..CacKC.lss..........hssspLs..........spE.psClsp....Css+ahcssphlspphtp................. 0 311 524 818 +5025 PF02176 zf-TRAF TRAF-type zinc finger Mian N, Bateman A anon IPR001293 Family \N 22.80 20.90 22.80 20.90 22.70 20.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.45 0.72 -3.59 10 1745 2012-10-02 00:06:50 2003-04-07 12:59:11 13 81 229 11 1175 1418 42 55.90 26 18.63 CHANGED Hhps.CPhhslsCsstCst+hl.Rcclp-HlctsCspsclsCpF...GCppphptpsLQcH ......................H.tp.C..h.h...ltC..s.....p...t..s..s.p.p.............l....Rp.....pl.pp..Hh.p............pC.s.p......p............h..p.Cpa..................Cpt..p.h....tpphtpH............. 0 568 732 927 +5026 PF02207 zf-UBR zf-UBR1; Putative zinc finger in N-recognin (UBR box) SMART anon Alignment kindly provided by SMART Family This region is found in E3 ubiquitin ligases that recognise N-recognins [1]. 21.20 21.20 21.20 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.67 0.72 -4.26 99 1193 2009-09-11 04:39:18 2003-04-07 12:59:11 15 56 293 33 848 1163 4 71.30 30 4.46 CHANGED phCst.htpt..........pshYpChsCsh........sss....sslChsCashssHp..sHchhhhhsp...tsuhCDCG...ssps.......hp...p....Cph+..p ......................p.hCshshtst..........pshYpC.p..TCsh...............ss.s.........s.slChsC.tts.sHp..sHchphhhos...tshhC..........DCGssph......hp....t.......Cp...t........................ 0 316 455 695 +5027 PF03470 zf-XS XS zinc finger domain Bateman A anon Bateman A Domain This domain is a putative nucleic acid binding zinc finger found in proteins that also contain an XS domain. 21.00 21.00 21.50 21.90 20.60 20.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.30 0.72 -3.91 12 126 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 22 0 79 124 0 41.60 45 7.07 CHANGED CPaC.scKKps..Yp.hpsLLpHAsGlGtustp...u+cKApHhALA CPaCssc+..cps..Yp.hp-LLpHAsGl.Gtustp.+.u+c+A.sHhALA........ 0 11 47 67 +5028 PF01258 zf-dskA_traR zf_dskA_traR; Prokaryotic dksA/traR C4-type zinc finger Finn RD, Bateman A anon Prosite Family \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.99 0.72 -4.19 241 5127 2012-10-03 10:42:43 2003-04-07 12:59:11 12 11 2754 13 1228 3101 1711 36.80 38 28.18 CHANGED sshuh..Cp..pCG-tIsttR.hphh..s.ssthClsC...pppt-p ................paGh....C-..pCGp.IshtR.LcAh..P.ssphClcC.......pphtE.............. 0 410 784 1014 +5029 PF04071 zf-like DUF379; zf_like; Cysteine-rich small domain Kerrison ND, Finn RD anon COG2158 Family Probable metal-binding domain. 19.60 19.60 20.60 21.00 18.80 17.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.70 0.72 -4.28 24 168 2009-09-10 20:56:46 2003-04-07 12:59:11 7 9 158 0 79 154 6 85.40 33 50.46 CHANGED cphphhh..h...s.pcsCEYYPCH.apsQ...sChaCYCPhYPCtDpph.GcalpspsG.t.tlWuCpsCpllHcs-ssstllcphpphscphp....shc ..........t...aphasscsCcaaPCH..hpsp....NCLFCYCPLYsht-pss.G..p..ahh..sc.sG......lKsCosCtlPHct-s.hshlhpphtphht........pt............................ 0 23 57 68 +5030 PF04354 ZipA_C ZipA, C-terminal FtsZ-binding domain Mifsud W anon COG3115 Domain This family represents the ZipA C-terminal domain. ZipA is involved in septum formation in bacterial cell division. Its C-terminal domain binds FtsZ, a major component of the bacterial septal ring. The structure of this domain is an alpha-beta fold with three alpha helices and a beta sheet of six antiparallel beta strands. The major loops protruding from the beta sheet surface are thought to form a binding site for FtsZ [1]. 20.60 20.60 20.90 20.70 20.40 19.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.44 0.71 -4.67 59 1250 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1235 12 217 840 288 128.60 41 38.95 CHANGED sppllllp.VhAppspthpGspLlpsl.shGhca.G-MsIFHRHtc......sususVlFSlANhhpPG..sF-.-s.hppFsTsGlshFMpLPs.u.suhpsFchMlpsApplAccL.sG.lLD-pRshhTt........pshcpa+p+l ...........................................................p.ptVIlhp.VsA+p.s.ptlsGphLlsulppsGFhF.G.-.MsI.FHR.H.hs......sGsGssLFSlANMspPG..oF.D.s.....-....hs..-Fo..T.sGlolFMp...lPuhG....-..sh..........pNF+LM.LpoApclA--l.GGsVLDDpRchhTsQpLccYpsp.................... 0 42 104 163 +5031 PF00172 Zn_clus Fungal Zn(2)-Cys(6) binuclear cluster domain Sonnhammer ELL anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.94 0.72 -4.04 29 13354 2009-09-16 22:05:51 2003-04-07 12:59:11 13 237 239 36 10553 14063 8 39.10 32 5.66 CHANGED puCppCRp+KlKCct...ppP........CtpChpts..hcCp.hspppppt ................uCtpCR..p....R..Kl..K....C..Dt...............ppP..............................C..sp..Ct.cts......hp..Ct..ht......t................... 0 1813 5198 8893 +5032 PF00882 Zn_dep_PLPC Zinc dependent phospholipase C Bateman A anon Pfam-B_1401 (release 3.0) Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.45 0.71 -4.59 152 1056 2012-10-01 21:01:47 2003-04-07 12:59:11 13 12 595 25 245 1095 36 178.70 16 56.63 CHANGED sHhtl.spsht.....hl...h.t.................h.hthsshhhGuhhPDh........h...........................tths+p...................hhh......h.ptshltthspth.t............................hhhhuhhlGhhoHhlsDhhs..H...........................thhttp..hhpHsthEhthphhh..............................................h..httt.thth........h.thltpthtthhtthhht .......................................................Hhhlspphhp.........hh..t...................ltp.hpth.hG..shhPDh........h.........................th......hhpscp.....................................pth....pphtcah..ttshpphpp...................................................tphppts.ah.LGhhhHahsDhsp.......H.................................................h.phhshs....ttHsthEphh-phh.................................h..........................................th....................................................................................................................... 2 119 194 221 +5033 PF04228 Zn_peptidase Putative neutral zinc metallopeptidase Kerrison ND, Finn RD anon COG2321 Family Members of this family have a predicted zinc binding motif characteristic of neutral zinc metallopeptidases (Prosite:PDOC00129). 20.10 20.10 20.10 20.20 20.00 19.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.06 0.70 -5.18 8 1671 2012-10-03 04:41:15 2003-04-07 12:59:11 8 2 1494 0 416 1190 90 272.60 44 93.05 CHANGED McWcutRpSsNlEDRRupsGGuu.hGGGG........hh.thGGtsGlllllllLlGhhaGlDlosllG..........Gpssuss.ss.Qsstspuss...tpcEtspFssslLAsTEDTWsplFp.ctGppYppPsLVLaSpss+SACGtApSAsGPFYCPuDpKVYLDhoFas-hpp+hGAuGDFAtAYVIAHEVGHHVQNLLGIhsKlcptpp.ssocApANpLSVRlELQADCFAGVWAppsptct.h.LEpGDlEEAhNAApAIGDDTLQ+pupGhVVPDSFTHGTSpQRhpWFcRGapSGcPupCDTF ......................................................................................................................................Mchps..+p.Ss.N..l-Dp.R...ss........G.G.....su................h.hh...u...G..h....h....G.l.h....h.l....l.l.l.l....l.u...h.h.....h....G........l...D........s...s..l.hs...............................G.p.s...st.p.............p...s...s..t....s..h.sss....................p-cssc.F...s..s....s...l...L...u...s...T...E...DsWs.p...hF.p...p..h...G.....p..s..Y.p.p.P..+.....L.V....h..a..p....G...t...s...p........T......u......C.G...s...u..p...S.....s..h....G.......P...F...YC.....P.u..Dp..p...........VYl....D....l.S.F.a.c.-.h..c.s.....+h....G..............A......s.....G..............D...F...A.......QuY.....V..I...AHEVGHHVQ...pLL.GI.s.+.lpph..pp...ph....o..p.s..p..sN..pL..SV..RhELQADCa..AG.V...W....u+ph.......p.......p.......p..........s.......l........L....E....s...G.D.l..EE..A.LsAApA.IGDD..cLQpp.up..GpVV.PDS.FTHGTSpQRhpWFp+Gac.o.G....-..spCsTF.................................................................. 0 123 271 358 +5034 PF04298 Zn_peptidase_2 Putative neutral zinc metallopeptidase Kerrison ND, Finn RD anon COG2738 Family Zinc metallopeptidase zinc binding regions have been predicted in some family members by a pattern match (Prosite:PS00142). 20.20 20.20 20.50 35.60 20.10 19.70 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.47 0.70 -4.99 75 1143 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1121 0 242 727 493 219.40 45 96.58 CHANGED sshhlll.lsshlluhhAQh+VpSsFpKYScVpspsGhTGA-lA+clLcssGlh.D..VpVcpls.GpLTDHYDPpsKslRLS-sVYsusSlAAluVAAHEsGHAlQatpsYusLplRsslVPlsshuSsluhhlllhGllh........tstsLlhlGIlLFusuVLFpllTLPVEFsAS.pRAlthLpspG..llss.cEhp.uu+cVLsAAAhTYVAAAlsulhpLLhh...lhlhtsp .............................................................s..hhlhhlhhhl.lshhAQh+VpSsapKYSpV..psssGh..TGt-lAccILcssGlh..DVpVpp.ss..GpLTDHYDPpsKsVpLSpssYpusSlAusuVAAHEsGHAlQct.puYs.Lc.hRsuLVPVsNhGSsl..u...a.hll....hl..Gllh...........hsssllhlGIl.......Lh.u..huVL..F.plVTLPV...EFsAS.pRAhphLpstG..llsp..cE.ht.tA+KVLsAAAhTYVAAuhsulhpLlRl.lllhtt.p..................... 2 113 190 223 +5035 PF03854 zf-P11 ZnF_P11; P-11 zinc finger Griffiths-Jones SR anon PRODOM Family \N 25.60 25.60 26.20 66.20 25.20 25.50 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.96 0.72 -4.49 3 115 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 38 1 0 114 0 49.90 61 52.85 CHANGED GPhNCKSCWFcsKuLVcCsDHYLCL+CLsLLLSVS-RCPICKpPLPTKLR G.hNCKSCWFts+uLlcCsDHYLCL+CLslhLssSDhCsICtcPLPT+l.. 0 0 0 0 +5036 PF03367 zf-ZPR1 ZPR1; ZPR1 zinc-finger domain Mifsud W anon Pfam-B_1372 (release 6.6) Family The zinc-finger protein ZPR1 is ubiquitous among eukaryotes. It is indeed known to be an essential protein in yeast. In quiescent cells, ZPR1 is localised to the cytoplasm. But in proliferating cells treated with EGF or with other mitogens, ZPR1 accumulates in the nucleolus. ZPR1 interacts with the cytoplasmic domain of the inactive EGF receptor (EGFR) and is thought to inhibit the basal protein tyrosine kinase activity of EGFR. This interaction is disrupted when cells are treated with EGF, though by themselves, inactive EGFRs are not sufficient to sequester ZPR1 to the cytoplasm [1,2,3]. Upon stimulation by EGF, ZPR1 directly binds the eukaryotic translation elongation factor-1alpha (eEF-1alpha) to form ZPR1/eEF-1alpha complexes [1]. These move into the nucleus, localising particularly at the nucleolus. Indeed, the interaction between ZPR1 and eEF-1alpha has been shown to be essential for normal cellular proliferation [1], and ZPR1 is thought to be involved in pre-ribosomal RNA expression [2]. The ZPR1 domain consists of an elongation initiation factor 2-like zinc finger and a double-stranded beta helix with a helical hairpin insertion. ZPR1 binds preferentially to GDP-bound eEF1A but does not directly influence the kinetics of nucleotide exchange or GTP hydrolysis [4]. The alignment for this family shows a domain of which there are two copies in ZPR1 proteins. This family also includes several hypothetical archaeal proteins (from both Crenarchaeota and Euryarchaeota), which only contain one copy of the aligned region. This similarity between ZPR1 and archaeal proteins was not previously noted. 20.30 20.30 20.30 20.40 20.00 19.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.00 0.71 -5.08 14 842 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 422 2 572 806 66 154.90 34 67.74 CHANGED cssCPsCstpspT+h..hhssIPaF+EVllMSh.C-+CGa+ssElpsuutlpspG.Rhpl+lps.cDLsRcVlKS-ouolpIPELslEI.Ps.sttGplTTlEGlLpcshctLpps....tpcutcspptpphpcalp+lcpl...hpspp.hTlIl-DPhGNSalps ......................p.o.C.sCt.p.s.s.p.T.+h.......hhspI.....PaF+ElllhohpC-pCGa+...ssElpsuGtlp............pGh+hsLc.l...p...s......p.....D....LsRp.l.lK...........S-ouslp............I..P.EL....-lEl.Ps.s...tG...phTTl..EGlLpphh-tLpppt..................ht..........c......s..s...........sp..........ttp+hppFhp..+.....Lcph....................h.p...s...c..h...s...aTlll-DPhGNSalp........................................................................... 0 199 320 463 +5037 PF00791 ZU5 ZU5 domain SMART anon Alignment kindly provided by SMART Family Domain present in ZO-1 and Unc5-like netrin receptors Domain of unknown function. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.07 0.72 -4.35 30 1349 2009-09-12 23:26:40 2003-04-07 12:59:11 15 421 101 19 679 1023 3 99.30 30 7.68 CHANGED suhpspGhhsstGGpLp.sspoGVSLlIPsGAIspGpphEhYlslsc....cpst.................................hPP..hcpup.oLLSPlVsCGPsGshhhpPVhLplPHCAshs.t..-c...hhlpLKopssp ...................................................................hsphhhsspGGphp..h.p..s..GlpllIPstshs.t.s.p.php...ht....lshhp....c.ss..................................................................................PP....ht.cs.EsL..hS.ll...ph.G.....PsG...h..p.........F...h..p............PVhlplP..Hhushp..t...-p.....lhL+pps..p...................................................................................................................... 0 300 340 472 +5038 PF00569 ZZ Zinc finger, ZZ type SMART anon Alignment kindly provided by SMART Domain Zinc finger present in dystrophin, CBP/p300. ZZ in dystrophin binds calmodulin. Putative zinc finger; binding not yet shown. Four to six cysteine residues in its sequence are responsible for coordinating zinc ions, to reinforce the structure [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.72 0.72 -4.42 10 2746 2012-10-02 13:15:50 2003-04-07 12:59:11 12 308 324 4 1705 2687 25 44.50 31 4.44 CHANGED s+cpscCNhCppsshlGhRY+sL+s.sYDLCpuCFhoG+su+sH+M ..................h...thpCs..t..C......p...p........s....l..h.....G...h...R.a.+C..t..p..C...........s.....a..DLCpsC....a...t.....p.....t...htt.p.................... 0 562 809 1281 +5039 PF04358 DsrC DsrC like protein Kerrison ND anon COG2920 Family Family member Swiss:P45573 has been observed to co-purify with Desulfovibrio vulgaris dissimilatory sulfite reductase [1], and many members of this family are annotated as the third (gamma) subunit of dissimilatory sulphite reductase. However, this protein appears to be only loosely associated to the sulfite reductase, which suggests that DsrC may not be an integral part of the dissimilatory sulphite reductase. Members of this family are found in organisms such as E. coli and H. influenzae which do not contain dissimilatory sulphite reductases but can synthesise assimilatory sirohaem sulphite and nitrite reductases. It is speculated that DsrC may be involved in the assembly, folding or stabilisation of sirohaem proteins [2]. The strictly conserved cysteine in the C terminus suggests that DsrC may have a catalytic function in the metabolism of sulphur compounds [3]. 24.90 24.90 25.40 38.20 24.60 24.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.35 0.72 -3.81 87 1238 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1122 8 258 699 173 107.10 55 95.17 CHANGED tlphsGpplcsDc-GaLhs.hs-Wsc-lAphlApp.E.........sIs.....LT-sHWcVlpalRcaYtcap...hu.P.slRhLsKshutchG.cKus.oca...LYpLFP.tGPAKpAsKlAGLPKPssCl ..............h...apG+pI-TDs-GYLp-..spWoEslAhhlAcp.E.........GIp...Los-HWEVl+FVR-.FYhE.F.s..TS....P..AIRMLVKAhup.KaGp.EKGN.S+Y...LY+LFP.cG.PA.KQATKIAGLPKPsKCl................ 0 84 153 208 +5041 PF04252 RNA_Me_trans DUF431; Predicted SAM-dependent RNA methyltransferase Kerrison ND, Finn RD anon COG2428 Family This family of proteins are predicted to be alpha/beta-knot SAM-dependent RNA methyltransferases [1]. 20.10 20.10 20.20 21.90 19.90 19.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.15 0.71 -4.91 3 169 2012-10-01 22:53:19 2003-04-07 12:59:11 8 4 163 0 133 172 0 195.20 42 90.41 CHANGED PtlVIEHLEEssSEWL...LLEYpplAcphGs+hllTuupPE...............LRasss+luGVss-hccthsLcRu+VILLDLpAsc-LcPEDAocsoYIVVGGILGDHPPRGRTKcLpTuhhsultsR+LGscQhSlDGAl+TApLIAEG.hRLEEIEFEDsPEl+l-c....sS.sElsL+YAlPKlsGKlLLopGLL-LlKK-luYp-EDLu .....................................................hallEHh-........thu......t......Ws...hLEYtpI..tc...-...s...G...s...c.....l..l..h.os.....sp..................................hh....h..s..h..tph..t..ul..tp......h...p.....p...h..s...h......c..+.u+..VCLLDPtApp-L............s.............P-Du....s.p.........F-hFlFGGIL....G..................Dc.PPR.DRTuELRpp.....Gh.uRRLGshQMTTDTAVRVT+lllcc..phs......l-cI.alDaPEl+hsc.....................pE.uTEM..PFRYV....hs.pp........G.......cPIhPcGMl-LIKcDssps.-...h...................................................................................... 0 38 70 108 +5042 PF04359 DUF493 Protein of unknown function (DUF493) Kerrison ND, Bateman A anon COG2921 Domain This domain is likely to act in a regulatory capacity like Pfam:PF01842 domains. This domain has a remarkable property in that the C-terminal residue of every protein in the family lies up in the alignment. This suggests that the C-terminal residue plays some important functional role (Bateman A pers obs). 21.90 21.90 22.10 27.70 21.50 21.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.83 0.72 -3.61 166 1622 2012-10-02 00:29:19 2003-04-07 12:59:11 9 2 1588 3 338 823 238 83.00 41 87.90 CHANGED hcshl.-FPsp.ashKllGt....s................sss..hhpt..lhpllpphh.s...sstslph+.sSspGpYhSlol.slpspup-plcslYppLs.shttV+hlL ...............h.ppLl.EFPss.FsaKVhGp..A.....................ts-..ls-p..VlEVlp+Hs..P...s-assps+.sSS+GsYpSVol.olpAos.hEQl-slYc-Lu.p.hchV+hVL.................... 0 86 200 280 +5043 PF04205 FMN_bind FMN-binding domain Yeats C anon Yeats C Domain This conserved region includes the FMN-binding site of the NqrC protein [1] as well as the NosR and NirI regulatory proteins. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.82 0.72 -3.82 182 4808 2009-09-11 22:55:10 2003-04-07 12:59:11 9 81 2361 8 1029 3722 716 88.10 25 29.05 CHANGED Ghsu......sl.plhl.......sh.....ssp.spIpsl...plhp........p......................p...........E.......Tsuhu....t.h......................pphh......pphhspp....................................................s.tp.lDuloGATloSpultpulppulpth ..............................................................................................................................Ghtu..sl.plhVsl....stp..up....lt..ul....plhp.............p...........................................................p..........E.................TsGlu....spht...........................h.spah..........pphhupp..........................................................................................tts..ss..l..DsloGATlTopulhpulppsh................................. 0 408 712 888 +5044 PF04432 FrhB_FdhB_C Coenzyme F420 hydrogenase/dehydrogenase, beta subunit C terminus Kerrison ND anon DOMO:DM04087; Domain Coenzyme F420 hydrogenase (EC:1.12.99.1) reduces the low-potential two-electron acceptor coenzyme F420. This family contains the C termini of F420 hydrogenase and dehydrogenase beta subunits [1], [2]. The N terminus of Methanobacterium formicicum formate dehydrogenase beta chain (EC:1.2.1.2, Swiss:P06130) is also a member of this family [3]. This region is often found in association with the 4Fe-4S binding domain, fer4 (Pfam:PF00037). 21.30 21.30 21.40 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.36 0.71 -4.83 123 800 2009-09-10 23:45:23 2003-04-07 12:59:11 8 53 463 0 371 792 228 167.20 20 41.60 CHANGED ptshc+lullGtPCplpAl+chpp........t................hhhhlGlhCspshshpsh..phltchhuls.ps.V.t+..................h-hpp....Gc.....ahlph........t.cus.......hhplslcch............t.hhpss.........CphCtDhssph.....ADlosGs...........hGs..........t.Ghoh.....lhlRocpGpcllctsh.ptshl-spshsst..........htKl .......................................t...hc+lhhlGpPCplpul+ph.pp...t...h...................hhhlGlhCtss.hshpth.pchlpt..h.s.hp.....ps...lhc...................hchcp...............up........hhlph..................p...ssp..............hhphs.hpch.................................t.hhpsu......C.h.t.C.ts.h..ssph.....ADlolGs..................hus..............tGhoh.lllpop+Gpplhptht.pt..hlpht..............thttt................................................................................................. 0 116 255 326 +5045 PF04609 MCR_C Methyl-coenzyme M reductase operon protein C Kerrison ND, Finn RD, Mian N anon DOMO:DM04514; Family Methyl coenzyme M reductase (MCR) catalyses the final step in methanogenesis. MCR is composed of three subunits, alpha (Pfam:PF02249), beta (Pfam:PF02241) and gamma (Pfam:PF02240) [1]. Genes encoding the beta (mcrB) and gamma (mcrG) subunits are separated by two open reading frames coding for two proteins C and D [2]. The function of proteins C and D (this family) is unknown. This family nowalso includes family MtrC_related, 25.00 25.00 29.70 29.00 20.00 20.20 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.69 0.70 -5.59 18 122 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 65 0 81 118 17 201.70 34 82.52 CHANGED M........hhapGGVa+csplh-hlEDl....GGallQ+p.ht.-lhh.hhlP.p-hc.lct.sc.hhGplscuPLsGoElAVVu.SLup+HLPHssCDIuEYLR+sGupoNMlGLARGsG..........+plutlsscEpclIpEHDLAVathGNFcsCIhphpttLh+tl-.l.P.lVVsGuP.-h.h..........YluslGRh.tRh+p.p-lttLcphs--lu+llscpRtslscDP..hoPshVh-hlpppl.sl.t.htPhPls.QhsGhRlKlPYDchs-tIcclc ........................................................................................................................+GplsEs..tts-lslVu.S.uR+Hls+ssC-IophLRctGhp..s..sh..lsLstGsG..............tshhslptcEhc.IpcHclAVhphGNhcspIl.Ktphlhc.lc.lP.llVstuP.-h.................................................t...............................................................................t................................................... 0 17 51 68 +5046 PF04607 RelA_SpoT Region found in RelA / SpoT proteins Kerrison ND anon DOMO:DM04456; Family This region of unknown function is found in RelA and SpoT of Escherichia coli, and their homologues in plants and in other eubacteria. RelA is a guanosine 3',5'-bis-pyrophosphate (ppGpp) synthetase (EC:2.7.6.5) while SpoT is thought to be a bifunctional enzyme catalysing both ppGpp synthesis and degradation (ppGpp 3'-pyrophosphohydrolase, (EC:3.1.7.2)) [1]. This region is often found in association with HD (Pfam:PF01966), a metal-dependent phosphohydrolase, TGS (Pfam:PF02824) which is a possible nucleotide-binding region, and the ACT regulatory domain (Pfam:PF01842). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.55 0.71 -4.08 102 8755 2012-10-02 22:47:23 2003-04-07 12:59:11 12 55 4348 6 1707 5709 2333 115.20 41 20.29 CHANGED GRhKphaSIhcKhpc..+s.............h...thcclhDlhulRlls..pp.........hp-CYpslsll+....shac.hss.....chKDY.............Iu.pPKtNGYpSlHssl................hG.pshhlElQIRThtMcthAE.GluuHhpYKpsts .................................................................................................................GRsKclaSIacKM..pc...+p..................h...th.cclhDlhul...R...lls......pp................................lpDC.Yt.sL..u.h.lH.................sh..a..c...s.hss.........................chKDY.................................Iu...tPKs....N..G.YpS..l..HTsl..............................................hGsps..h..slElQI.RThpMcphAEh.GlAAHWtYKps.t..................................... 0 537 1087 1434 +5047 PF04226 Transgly_assoc Transglycosylase associated protein Kerrison ND, Finn RD anon COG2261 Family Bacterial protein, predicted to be an integral membrane protein. Some family members have been annotated as transglycosylase associated proteins, but no experimental evidence is provided. This family was annotated based on the information in Swiss:P76011. 25.20 25.20 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.45 0.72 -3.89 159 4300 2009-09-13 11:40:02 2003-04-07 12:59:11 8 3 2610 0 729 1905 66 48.10 36 57.58 CHANGED IllGllGAhlGshl..hshhGh....hts.htluuh..lsullGAllLLhlhp.hlpp ......lllGllGAhlGuhL..hs.h.aG.............sluuh.....lsullGAllllhlhphlh.......... 0 177 395 574 +5048 PF04264 YceI YceI-like domain Kerrison ND, Finn RD, Bateman A anon COG2353 Domain E. coli YceI is a base-induced periplasmic protein [1]. The recent structure of a member of this family shows that it binds to polyisoprenoid [2]. The structure consists of an extended, eight-stranded, antiparallel beta-barrel that resembles the lipocalin fold. 20.50 20.50 21.00 21.00 20.30 18.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.38 0.71 -4.28 135 3875 2009-01-15 18:05:59 2003-04-07 12:59:11 8 16 2238 16 1127 2995 1071 164.90 25 82.66 CHANGED sa....pl..............Ds..spoplsFpspc.....h.shsph.......G.pFs.p..hs....G....pl......sshsssth..shslchs..Sl..ss.....t....sthc..pc.lpst......paFcsppaPphsFpus.php.........h................ttt..hphp..GsLTl+GhT+s.lshss...plp.t.....................................................................ts.ps...............hshsup.spls.RscFGlshsthh.................luccV.plplplphp ................................................................................aplDs..sHoplsFphpH............h.shohh......GpFp.p....hs........G..slp.............s.ps..ssspl.........sloIsss...Slsoss......................spRD....pH.L+us.............DFhs.s..s..+aP..p.hoFpoo..plp......................t................pt..spls.....GsLTl+..GlT+s..l.slcs......phh.s................................................................................tu.ps............t.hsGhpus...sp...lp.RpDFGlshshsh.................luccV.plplplph.h................................................................... 0 378 777 993 +5049 PF04431 Pec_lyase_N pec_lyase_N; Pectate lyase, N terminus Kerrison ND anon DOMO:DM04067; Family This region is found N terminal to the pectate lyase domain (Pfam:PF00544) in some plant pectate lyase enzymes. 20.70 20.70 20.70 22.00 18.50 20.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.76 0.72 -3.88 15 105 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 30 0 39 93 0 50.40 37 14.34 CHANGED sIu.EaDEYWpcRu--A+ptshpAYcPcP.sVTs+FNpcVtcuhp..........................ssNsTRRsL ......ls...-chWtp+tc.A+ptshtAYhsDPhslTscFNtcVhcuhp........................................................sRRt...................... 0 4 22 30 +5050 PF05223 MecA_N NTF2-like N-terminal transpeptidase domain Bateman A anon Bateman A Domain The structure of this domain from MecA is known [1] Swiss:Q53707 and is found to be similar to that found in NTF2 Pfam:PF02136. This domain seems unlikely to have an enzymatic function, and its role remains unknown. 21.80 21.80 21.80 21.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.52 0.71 -4.03 17 954 2012-10-03 02:27:23 2003-04-07 12:59:11 6 6 730 10 155 737 7 115.00 23 18.53 CHANGED sssppphppFlsAhsct-hsphhshsspss.........cht-thpslasulpscslcl.........tshpspcsscsphslsachphpsshuch...hshshphphscp-sp.Wp.lcWpsuhlaPphpcsp .............................................t.pscpshppalsulsc..pcasphhp..ssp.u....p...t.sp.-hs-+hpplYsul..t.sc.slpl....................pphph.p..cs.s.c..sp.tplsaphphcT.shGpl.......................hshsh.phsh......s+....cc....s.....p...W+..lcWssuhIaPshpcsp..................................... 0 55 117 141 +5051 PF05224 NDT80_PhoG NDT80 / PhoG like DNA-binding family Wood V, Bateman A, Glass NL, Moxon SJ anon Wood V Family This family includes the DNA-binding region of NDT80 [1] as well as PhoG and its homologues. The family contains Swiss:Q05534 or VIB-1. VIB-1 is thought to be a regulator of conidiation in Neurospora crassa and shares a region of similarity to PHOG, a possible phosphate nonrepressible acid phosphatase in Aspergillus nidulans. It has been found that vib-1 is not the structural gene for nonrepressible acid phosphatase, but rather may regulate nonrepressible acid phosphatase activity [2]. 19.40 19.40 19.70 19.90 16.80 19.30 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.42 0.71 -4.29 16 421 2012-10-03 00:25:27 2003-04-07 12:59:11 7 11 202 16 306 408 0 168.80 30 24.03 CHANGED palsY+RNaFplssshsh.t...sphhptshh..........................tsspphplphFtlclpAhcs.s.....spsIpLlQ.........tospccKs.thsPshsslhst..P...h.h.tsp.hs..t...................................splsphspacRLQFppuTssNt+ppt.....QpaFpLpVpLhs...............................sptsssphlhltphposslIVRGRS....Pusapsp ...........................................................asC.++NaFQlosph..s..h.....sphlp.......................................................sstshhslptFtlplpuhcstt........spslplhQ...............................o.pccp.t...hpPshh.sl....s..................................................................................................................................phsp.hohpRLpFp...puTANN.tR++u...........QpaFhLhVpLhA.....................................t.t.sp.s..phhhlutthSt.lIVRups....Pupaps.p................................................... 1 70 146 243 +5052 PF05225 HTH_psq helix-turn-helix, Psq domain Lehmann M, Bateman A anon Lehmann M Domain This DNA-binding motif is found in four copies in the pipsqueak protein of Drosophila melanogaster [1]. In pipsqueak this domain binds to GAGA sequence [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -8.10 0.72 -4.41 27 1317 2012-10-04 14:01:12 2003-04-07 12:59:11 11 67 254 1 1002 1636 41 43.90 28 10.93 CHANGED sp-pltpAlpslp.....sGt.hSlp+AuphaGlPpSTL..cthctthshpp ................ptplttAl.pslp...........sGp.hS.l.p..cAAphYslPpoTLh.cthpth.....t.............. 0 324 585 881 +5053 PF05226 CHASE2 CHASE2 domain Ulrich L, Zhulin I anon Ulrich L, Zhulin I Domain CHASE2 is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in bacteria. Specifically, CHASE2 domains are found in histidine kinases, adenylate cyclases, serine/threonine kinases and predicted diguanylate cyclases/phosphodiesterases. Environmental factors that are recognised by CHASE2 domains are not known at this time [1]. 22.00 22.00 22.00 22.70 21.20 21.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.31 0.70 -5.19 160 950 2009-01-15 18:05:59 2003-04-07 12:59:11 6 44 512 0 409 1005 1318 318.30 20 45.69 CHANGED lslshhhlhhhhhhtts..............hp.....h....-hth.....aDhhhphp.....s...............sc..llIVsID-p.............Sl...........t......up....WP.WsRshhApLl.cpL.............tpt.tspsluhDl..lF.scP.s............................................sDptLupul..tps...ll...ls.h..............................................htttppts..sht.................................................................................................................................................stlttt..stshGhh.shhh..Ds.DG.....llR....+h.l...........hss.t.hhs....ululphh..............thhhsspshtht...t......................................................t...t..................hhlsa........tus....tt......sh.....plSh..t-l....L.....s..uphs.......................shl....cs.+lVLlGs.o....As.ult......DhhsTPh............t..................hsGVElcAshlsslLs....................sp.hlh.hsshs.hhhhlhhshhhulhh ...............................................................................................................................hhh....hhhhh.h.hhhhh.h.......................hpt....h.....-hhh.....aDhhhphp.......................s..s..s.....sp.lllVsID-p...........................ol.................p........phGp..............WP.asR....s....hhApllppL.............tpt...tsp..sluhDl..hh..sps..p........................................................................................sDptLspsl...p..p..s..........ls...lshh......................ht.tt..ttt....h........................................................................................................................................................s.ltt......ssthGhh..sh.h......Ds..DG.......hlR.....ph.l..........................h.t..ss..p..hhs...........ululthh.....hhhhttp.s.hhh.t.t.t.......................................................................................hs...............s....th..p.......................hhlsa..........tus..............tt................sh.........plSh..t-l...L...........p.....splss................................................shl...cs+lVll...Gs..o......As..uht.........D.hhsTPh.........ts............................hsGVplpAphlsslls.........................tp...hl..h.....h...s..t.h..h..t...h..h.hh.h..h.hshhhhh..h.................................................................. 0 126 275 353 +5054 PF05227 CHASE3 CHASE3 domain Ulrich L, Zhulin I anon Ulrich L, Zhulin I Domain CHASE3 is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in bacteria. Specifically, CHASE3 domains are found in histidine kinases, adenylate cyclases, methyl-accepting chemotaxis proteins and predicted diguanylate cyclases/phosphodiesterases. Environmental factors that are recognised by CHASE3 domains are not known at this time [1]. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.54 0.71 -4.49 180 1210 2012-10-02 01:04:29 2003-04-07 12:59:11 8 127 733 1 487 1312 72 135.90 19 20.96 CHANGED ssphlp+ohpllsph...pplhsthh-tEoG.RGYLLTucppa.LpPYpputsplpptlscLp..phs..s......-sP...tQppc....lpplpthhppth.shhcph...lshpcp.....t..shp...sshphltsspG+phhD.....plRphhs..ph.......tspE........ppllppRptp ..........................................t....lp+shplhsphpplht....tlhshEouhRGallo.s..c.p.s.....a...L.psYppu.t..t.p...hppplspLp..pL..s..t......csP........pQ.ppc....l.p.plpthhp...phh..p.hpth...lshtps..........t.t.p.....sshph..l.p.....s.....s.t.u+.thh-........plRphlsph.......psp.E.......pphhttRt..t............................................................... 0 132 288 404 +5055 PF05228 CHASE4 CHASE4 domain Ulrich L, Zhulin I anon Ulrich L, Zhulin I Domain CHASE4. This is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in prokaryotes. Specifically, CHASE4 domains are found in histidine kinases in Archaea and in predicted diguanylate cyclases/phosphodiesterases in Bacteria. Environmental factors that are recognized by CHASE4 domains are not known at this time [1]. 20.80 20.80 20.80 21.30 20.70 20.00 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.50 0.71 -4.65 95 529 2009-01-15 18:05:59 2003-04-07 12:59:11 8 79 398 0 207 499 16 160.10 17 23.09 CHANGED ppslppphpplpphspDaAsWD-sYpalps..pst.t....alpsNls.sphh..pshpl-hhhllstsGphl.a..tpshshpsst..h.hst....................sltphlsphp.h....................ttttpsshsGllhhsssPhlluupsIhssssps.....sspGsLlhsRhlcsshlspl.pphshhslplth..s .................t..hlp.phpphtphhtDaAhWD-shp..alts......tst..s.............ahpsNls..sphh..pshth-hlh.lh-spuphl.a..t.pst.sh.p.pstt...p.lst....................shpp.hlpphpth...............................sptppshsshhh...hs.st......s.hl.lusss.lpssssps......sstuh.Ll.hs+hlss..phlppl.pphshhslph....st............................... 0 64 129 167 +5056 PF05229 SCPU Spore Coat Protein U domain Yeats C anon Yeats C Domain This domain is found in a bacterial family of spore coat proteins [1], as well as a family of secreted pili proteins involved in motility and biofilm formation ([2]). This family is distantly related to fimbrial proteins. 22.40 22.40 22.40 22.50 22.20 22.30 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.07 0.71 -4.09 127 1343 2012-10-02 17:35:21 2003-04-07 12:59:11 10 3 332 0 336 1103 36 142.70 23 81.23 CHANGED sssshsls..sslsssC...slsus.............slsFG..............ssssssssssss.usl...slsC....o......sssshslslssGtssssss............Rphts..uss......hlsYpLYpDuupoph..Wusssssshsss.................sussp....sls...lYGcl.............ssssss.uG....sYsDTlslT ..........................h...shsss...hslhssC....slsus.......................slsFG..............ss.s...s..s.ss.s..s.ss.s...usl....slp.C..........o.........sssshslslssGtssssss............Rphts.....suss......hls.YpL.Yp....-u.up..o.p.h........as.ssssts.sssst..................sussp............sls...lY.upls.....spsss...s..sG........oYsDTlsl.................................................... 1 48 107 209 +5057 PF05230 MASE2 MASE2 domain Galperin M anon Galperin M Domain Predicted integral membrane sensory domain found in histidine kinases, diguanylate cyclases and other bacterial signaling proteins. 25.00 25.00 25.00 26.60 23.70 24.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.10 0.72 -4.01 21 590 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 566 0 64 308 10 89.60 59 24.31 CHANGED +RhYhsRhlGhslshhsluuslhstshsthlWlLLlhtuhlWPHlAaphApR..uppPhcsEptNLhlDuhhGGhWluhMtFssLPolslLsM .......RRlRLPRAVGLuGMF.LPIAosL..VopPssG..WWWLlLVGW..A.FVWPHLAWQlAuR...AlDPLp..pEIY.NLKsDAlLuGMWlulMGVNsLPSsAhLMh.......... 0 9 19 45 +5058 PF05231 MASE1 MASE1 Galperin M anon Galperin M Domain Predicted integral membrane sensory domain found in histidine kinases, diguanylate cyclases and other bacterial signaling proteins. This entry also includes members of the 8 transmembrane UhpB type (8TMR-UT) domain family [2]. 29.50 29.50 29.50 29.60 29.40 29.40 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.95 0.70 -5.40 33 2684 2009-01-15 18:05:59 2003-04-07 12:59:11 9 142 1051 0 372 1651 78 274.00 22 40.20 CHANGED hhlhhhhhhhlshhhsht.shshssssthslW.hPsGltluhhlhhshpthPsll......lushlhp........hhtthshhhshshshsssltshhushllchhhttttthpph.........................hhhh.sshhsshhsAssshshhhhhshhshs.hstshhhahlushsGsLllsPhshlhhphhhpphth........phh.p..ht.h.hh.hhhhhhhlhhhhshhh.hhh.s.........t...............hlhhssllasAhRaGhpGusl.hhhlhusllhhhththst.hsh..t......h.lp........hal..uhtshssl...hlushlpcpc ......................................h.hl.hhhlhhlh.h.hh..h..h....p.ht.h.h...s..s.........hs..l..a..hPhulh.l.u.hhh...h....h....s....h.t..h.hPslh..............huhhltp.............h..hht....h....s.....h...h.t..s....h......s......hh.....h.h...th...l.....s.l..ls.s....sll..Rhh.h..p..p.s.hshh.......................h.hh.h....hhh..hhhsshshhh.....h..h...h..p......h..s.h...s........h..s.ht..h...h.h.shlup..hs.GsLhh.sP.ls..h.h...h...h...c...h..l.h..p.....p.hh........................................h.h.p..........h.p..p..thh.hhlhhhsl.s.hh...h......h...h...h....s...p...........phhh.................hlhl.....lhah..A...h+hGh..tts..hl.............h.ls.slhh.hhshh..h..st...s.h....................................h..h.................hL...hh.ph...hs...sh....hushhpc.................................................. 0 81 199 290 +5059 PF05232 BTP Bacterial Transmembrane Pair family Yeats C anon Yeats C Family This family represents a conserved pair of transmembrane helices. It appears to be found as two tandem repeats in a family of hypothetical proteins. 25.60 25.60 33.50 26.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.30 0.72 -4.23 181 1557 2009-09-10 14:54:21 2003-04-07 12:59:11 7 2 633 0 358 1134 42 67.90 31 89.83 CHANGED +ohtpRlhHAlhFEhshlhlslPlhAahhshulhpshllsluhshhhhlasalaNhhaD+lhs..h.tsp .......+ohtpRlhHAlsFEshhlhlssPlhAahhshol........hpsh..sLslshslhhhsashlaNhhaD+lhs.t...hhh.... 0 55 137 243 +5060 PF05233 PHB_acc PHB accumulation regulatory domain Yeats C anon Yeats C Domain The proteins this domain is found in are typically involved in regulating polymer accumulation in bacteria, particularly poly-beta-hydroxybutyrate (PHB) [1]. The N-terminal region is likely to be the DNA-binding domain (Pfam:PF07879) while this domain probably binds PHB (personal obs:C Yeats). 21.40 21.40 21.60 26.40 19.70 20.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.99 0.72 -4.22 72 555 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 403 0 227 497 146 40.90 38 29.37 CHANGED hssshLpQlIphYGsshQuhhssYLEpShphFtctQpphpc ....hssshLsQlIphYGssMQGhMssYLEpShphFhchQpphpp......... 0 48 136 179 +5061 PF05234 UAF_Rrn10 UAF_rrn5; UAF complex subunit Rrn10 Wood V, Bateman A anon Wood V Family The protein Rrn10 has been identified as a component of the Upstream Activating Factor (UAF), an RNA polymerase I (pol I) specific transcription stimulatory factor [1] 20.80 20.80 20.90 21.10 20.10 20.30 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.44 0.71 -4.08 2 57 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 56 0 37 47 0 105.20 36 57.86 CHANGED Ms.N..sh.sh.Nllpt.GphshpAc-llt...D.uVPIP....cEl......D...........PDl.l+slp.aATphhLsph.+.hpshDEouLlsLGhLlpcWhcshlTsh.pE.tcp ..............................................................................................l.lPh..hp.h...............sphlP..D.D.L+slHYY.Ao.ph....s...L...s....+.....h.........chh..p...uhDETuLIsLGlLlEcWsc-hlst........t.tt................ 0 4 14 29 +5062 PF05235 CHAD CHAD domain Aravind L anon Aravind L Domain The CHAD domain is an alpha-helical domain functionally associated with the Pfam:PF01928 domains. It has conserved histidines that may chelate metals [1]. 22.70 22.70 23.10 23.50 22.40 22.60 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -12.01 0.70 -4.78 186 1090 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 934 2 406 1065 129 229.00 20 53.54 CHANGED pt.hhtpplp......phhtp..pssl.tps.........-s..EslHphRVulRRLRohlpha.ps.hh.....pthtthp.pcl+tluptLGssRDh-Vhh........tpht..............sthttt....................................................................shttlhptlppp+..pps....tpphtttLputchpphltpl....pp.hl.tt........................................................................................t...htphssptlp....cthc+ltcthpp.t................................................p..sp.p.h.HclRhtsK+LRYshEhht....sh.............sttt..hp....phhcpl+plQ-hLGphpDhsl.ttph.lpph ..........................................................h..hhtpthtphhtt...pttl..pps.........ss....-ulHphRVuhRRLRohLpha...ps.hh......s...tt...tss....tlppcL+hLuphL.......G.hRDh-Vhh......ppht...............htth...............................................................................................shpt.lhpthppp.p.......pt..s....tp.p....h..h...p..t..L...p....o..tca..tp.Llh.sl....pt....hl..s.t..................................................................................................................httshpphsspplc.....+thccl.h.ct.httht...............................................t..............p..sp.t.hHplRhtsK+lRYshEhht.......h..........................................tt......hp...........phhpthctlQshLGphpDhssttthl...h......................................................................... 0 122 261 346 +5063 PF05236 TAF4 Transcription initiation factor TFIID component TAF4 family Wood V anon Wood V Family This region of similarity is found in Transcription initiation factor TFIID component TAF4 [1]. 24.10 24.10 24.60 24.10 23.90 24.00 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.79 0.70 -5.11 26 410 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 239 2 271 367 0 237.20 28 39.18 CHANGED lNDVhusAGVsLpEEEthLhus.............ophhtshscpsccps......FLp.ttLtphlpcl.................................u..pcps...........hphss-lhpLlSpAscphLpsllpKhhllScHRpcuhK................................t.phthpS-VptpL+tls.hpKp-c-R+tp....cctthhL............Ec.pp....tcstt.s+t-s-chppRsANsTAthtlG.....s+KKaphhsusst..............Spss.....sht.tsstp.ssh......ptp.......hRtt.s..................Ish+DlLhsLEp-R.hs.p......phlhKuYt ....................................................................................lsDlhshuGVslpEEpt.lhts.................o..phhtp..hpp.p...pc.ps.....................F...L..s..Lpp+l.h.cl..............................................s...pcps..............lpphps-.llshlS......tAsppRLpsllcchhhluppRhps.h+.....................................................................sstp.ht..ssD.l+t.pL+hhpp...h-ph-cp....+..+pp......p.-p-.......hh...h+t....u.c..sp.........................stt.....-D..s-p.ph.Kp+sK.c....h.....ppt-.......tp.h.p.p+.s.ANhTAhtAlG............s+K..K...hph.sss.s...........................................stst...........s....s.tst.t..t.............................hc.t.p.....................................................lsl+DllhshEp-tth..p.......hh.....h................................................................................................................. 0 79 127 203 +5064 PF05237 MoeZ_MoeB MoeZ/MoeB domain Cortese M, Bateman A anon Cortese M Domain This putative domain is found in the MoeZ protein and the MoeB protein. The domain has two CXXC motifs that are only partly conserved. 26.90 26.90 26.90 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.74 0.72 -4.39 41 4751 2009-01-15 18:05:59 2003-04-07 12:59:11 8 17 3320 13 1252 3468 1698 83.20 33 27.48 CHANGED spsPCYRClaPp....st.sssCspu..GVlGslsGhlGslQAhEAlKllsGh..ucslsu...pLLhaDuhsh.cacpl+lp.+cssCssCGsps ..........................t.tsPCYRCLa.sp............stss.h.o.C.s.pu...G..V........luPllGl.lGolQAhEAl.KlL.s....Gh.........ups.h.sG...................+Llha....D.....uhs....h..p.......a+p.l.p.lp...+s.s..sC.s..sCGt..t................................ 0 397 765 1050 +5065 PF05238 CENP-N CHL4; Kinetochore protein CHL4 like Wood V anon Wood V Family CHL4 is a protein involved in chromosome segregation [1]. It is a component of the central kinetochore which mediates the attachment of the centromere to the mitotic spindle [2]. CENP-N is one of the components that assembles onto the CENP-A-nucleosome-associated (NAC) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [4]. 25.00 25.00 32.30 25.50 20.10 24.20 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.07 0.70 -5.42 29 193 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 158 0 124 186 0 374.10 25 94.03 CHANGED s.ss.slh+.hLs+Ls+ssLlsLshpWhppps.................................t.h..tt.lpplpp......hppLpptpsp........KRpll-+IlptsWppGlsLhQlA.lDhth.lh-+Psuh.pWsshplp..................tcsspslsphcsspFlspLppplpslhcsahhl...sRcssLPllhlRIhlhs.s..p.......................ts.psthsop+shYlsasssoPalapSh..sss.h.t...........................+llhpuls+ALotsppphtl...pssphss+s.LpuLhslpG.su....Rps..suhGuao.aA-ssh.........................-.sPL.ssh.pp....tp.................psppp.sppsspppp...........................+pcphspt+Fusospsh............l-phph+l.s..sup.s.t................................................hppspppsplplphsGsclhuGl+cLs......ctGllDspchPuWhT....G..Ecusosuhl+cG .....................................................s.ss.hlh+.tlh+lshspLhslhhtW...pp.......................................................ppLpphphp...........Kcpllp+llphs.ccu..lolhQlA.lDhha.h.p.+spph.h.....Wsshph.................................tpss.t.sl...sthc.ppFhpshppplp..hh+shhhh...hRctp..shlhlRIhh..............................tsthst.sph+shYlsa..spoPa.lahSh...sp....................................chhh.hlspALohsppphpl....thsLss+s.LcsLhslhh..t....tpp..tutusao.at-ts.h..................................c..sL.p......t..........................................ptt.h..ppp..tpc..........................................+hpphsptpFGssspst...............................................lchhp.h+lps.h.psp.sts..........................................................................st.ppp..phhlpFsGsclhtul+pLs.......................tGlhDs..hsshhTs..ccuhs.hhlcpt........................ 0 22 49 90 +5066 PF05239 PRC PRC-barrel domain Aravind L anon Aravind L Domain The PRC-barrel is an all beta barrel domain found in photosystem reaction centre subunit H of the purple bacteria and RNA metabolism proteins of the RimM group. PRC-barrels are approximately 80 residues long, and found widely represented in bacteria, archaea and plants. This domain is also present at the carboxyl terminus of the pan-bacterial protein RimM, which is involved in ribosomal maturation and processing of 16S rRNA. A family of small proteins conserved in all known euryarchaea are composed entirely of a single stand-alone copy of the domain [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.33 0.72 -4.11 114 6817 2012-10-02 14:14:57 2003-04-07 12:59:11 11 26 3995 121 1960 4958 1373 76.30 21 44.59 CHANGED .p.hhhsclhGhpVhsps.G...cplGpl..p-lhl-ht.ssclhhlhlshst...............cphhlPhp......thchtpctlhlpshptth .........................................hhhp-lhGhp..V.hsts..G...................pp...l..G..p.l...p-l.......l....css....u.....s.....c...lhs.......l..t.h.s.t.t.........................................cchl.lPah.............h.pl.cls.s.ctl.hlp......h................................................... 0 622 1275 1636 +5067 PF05240 APOBEC_C APOBEC-like C-terminal domain Yeats C anon Pfam-B01590 Domain This domain is found at the C-termini of the Apolipoprotein B mRNA editing enzyme. 23.50 23.50 23.90 24.30 22.80 23.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.01 0.72 -4.56 45 86 2012-10-02 00:10:39 2003-04-07 12:59:11 9 5 33 4 13 662 0 54.00 44 23.25 CHANGED cspapcGL+pLppuG....splplMsap-FpaCWcsFVcpp.............tps.FpPWcpLpc...p.phLp ......s.apcuLphLppu.G....AplsIMsYs...EFpaCWcsFV-pp.............spP..Fp.PWcsLc-...pSptL.p................. 0 7 8 9 +5068 PF05241 EBP Emopamil binding protein Moxon SJ anon Pfam-B_7320 (release 7.7) Family Emopamil binding protein (EBP) is as a gene that encodes a non-glycosylated type I integral membrane protein of endoplasmic reticulum and shows high level expression in epithelial tissues. The EBP protein has emopamil binding domains, including the sterol acceptor site and the catalytic centre, which show Delta8-Delta7 sterol isomerase activity. Human sterol isomerase, a homologue of mouse EBP, is suggested not only to play a role in cholesterol biosynthesis, but also to affect lipoprotein internalisation. In humans, mutations of EBP are known to cause the genetic disorder of X-linked dominant chondrodysplasia punctata (CDPX2). This syndrome of humans is lethal in most males, and affected females display asymmetric hyperkeratotic skin and skeletal abnormalities [1]. 31.00 31.00 31.20 31.30 30.80 30.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.48 0.71 -5.29 46 404 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 197 0 278 388 0 182.60 27 79.73 CHANGED llshhhsssssllssshhls......thh..phstsc+hshhWashsu.hlHhhhEGhFlhpptphss......................................spshh..........................upLWKEYupuDoRYhss.....DshllslEslTsllhGPLuhhlshtlhpp...................cshRahLQlllshupLYGshlYassphhcuh........shshsp.sp.hYaWhYalhhNslWlllPuhllhpohppls..tAh.sth.....pps .....................................................................h.....h.h.hhlh.hshhhs........hh.......ths.t..hc+hhhhWashs.................u.hl....HhhhE.G.Flh.phh.h.hs............................................................spshh...........................upLW...+...EY.u.pu...DuRYhss........DshllshEhl.Tsh..lhGPluhhhsahlhpp..................................c.shp.a.hl.plh.lu.l....u..plY.G.....shhYas..sphhsu..................s.shsp.s.p.hY.hW........hYhl.h..hNs.lWlllPhhllhpuhtplstuht.....t............................................ 0 68 140 213 +5069 PF05242 GLYCAM-1 Glycosylation-dependent cell adhesion molecule 1 (GlyCAM-1) Moxon SJ anon Pfam-B_7429 (release 7.7) Family This family consists of the lactophorin precursors proteose peptone component 3 (PP3) and glycosylation-dependent cell adhesion molecule 1 (GlyCAM-1). GlyCAM-1 functions as a ligand for L-selectin, a saccharide-binding protein on the surface of circulating leukocytes, and mediates the trafficking of blood-born lymphocytes into secondary lymph nodes. In this context, sulphatation of the carbohydrates of GlyCAM-1 has been shown to be a critical structural requirement to be recognised by L-selectin. GlyCAM-1 is also expressed in pregnant and lactating mammary glands of mouse and in an unknown site in the lung, in the bovine uterus and rat cochlea [1]. 25.00 25.00 36.40 36.40 20.70 20.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.74 0.71 -4.24 4 14 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 11 0 4 22 0 122.40 48 88.94 CHANGED hLstsKDElahcoQPTDA..uAQhh....hSpcpsSScDLSKEPSI.RE-LlSKDsVVIcSs+.PQNQpsp........cuLRsuooQpEETTc.ssuuAoToEGKLscLoppl.KpLtpslcthlshlcslhssASclVKP ........LscscDEhHhcoQPTDA..uAQhh....hSc-plSscDLSKEsSI.+EELlSK-sVVIcSs+.spsQpsp........cslRsus.Q.EETTc.ssp....uAoToEGKLscLuppl.+pLtpshcthhshlcslhssAscls+P... 0 0 0 0 +5071 PF05244 Brucella_OMP2 Brucella outer membrane protein 2 Moxon SJ anon Pfam-B_7448 (release 7.7) Family This family consists of several outer membrane proteins (2a and 2b) from brucella bacteria. Brucellae are Gram-negative, facultative intracellular bacteria that can infect many species of animals and man [1]. 19.20 19.20 19.40 21.90 18.30 19.10 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.70 0.70 -5.21 2 23 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 16 0 0 20 0 179.50 87 61.52 CHANGED MRSLQlEGsLhtu..LpTTPDSPIAAPLTVVLVRRRCGRICALQP.RE.VGDVDVATNLsSPFFDDRVIGNNTSDRTPAASIhQAANNVRHVsVDhV.....lsssVhATLFESDHSREAVSAuEGVtDLAGAVGASRDHVIVDDIsEVTGEGMEFtFlDTNhpTTchDIpcLp.GAuA........YAtI.....p.............................sIslhstlsVlAhh MRSLQlEGsLhtu..LpTTPDSPIAAPLsVVLlRRpsGRICALQPNRELVGDVDVATNLGSPFFDDRVIGNNTSDRTPAASILQAANNVRHVAVDVVVGRASVTTVVVATLFESDHSREAVSASEGVRDLAGAVGASRDHVIVDDIAEVTGEGMEFRFIDTNAQTTELDIRELHDGAAA.GFITIFTIYARIVRSIVEAQFGEGLEGAEFGFRTGGNAECETSALVPAIAVGTGVNVIAAL...... 0 0 0 0 +5073 PF05246 DUF735 Protein of unknown function (DUF735) Moxon SJ anon Pfam-B_7611 (release 7.7) Family This family consists of several uncharacterised Borrelia burgdorferi (Lyme disease spirochete) proteins of unknown function. 23.40 23.40 24.60 24.60 22.90 22.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.31 0.71 -4.57 2 138 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 28 0 10 101 0 180.50 68 99.27 CHANGED hcIPphhcNTpIcKFIpsEh-YtptlLpELKpL.pNF.oINVhpsIpS+YIAlhh.plhshFahKpph.pslssslsulIFAl+pIGTDESFhllFKsFLpsslEVoosE..sG.I.IpLhGsIKoshph.Iu..sKptp+LKKIhh+.....aAu.KKALs.N.hPKsYcpSlYtFIKhlIPIGRllK........IpsTcspph.Thsp .....MKIPNhhcNTEIcKFIhTEhEYAQtLLNELKpLNSNFlSINVhENIKSRYIAIWISQVLSIFYAKTQTLQSITSNINSVIFALRHIGTDESFRLIFKAFLNVDI-VT...TPE..AGVIDISLKGsIKTNFTTFISP..STKKGKR....LKKIllREKK.GYAASKKALVFNSLPKGYDHSIYAFIKtIIPIGRVLK........INspcGpNIITFNN......................... 0 6 6 6 +5074 PF05247 FlhD Flagellar transcriptional activator (FlhD) Moxon SJ anon Pfam-B_7623 (release 7.7) Family This family consists of several bacterial flagellar transcriptional activator (FlhD) proteins. FlhD combines with FlhC to form a regulatory complex in E. coli, this complex has been shown to be a global regulator involved in many cellular processes as well as a flagellar transcriptional activator [1]. 21.30 21.30 21.30 23.70 21.00 21.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.12 0.72 -4.03 29 772 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 693 6 122 317 10 100.70 60 90.55 CHANGED Ms.ss-lLp-I+-lNLSYLhLAQphlppD+A.sAhFRLGISc-hADlLtpLo...AQllKLAsoN.LlC+FRFDDpt.llshL.Tpp.....s+scslsphHAuILhAupsscpl ...............MpTSELLKHIYDINLSYLLLAQRLIspDKA.SAMFRL.GIsEE.MAssLuuLT..LPQMVKLAETNQLVCHFRFDsHQ.TIopL.TQ-............SRVD....DLQQIHTGIhLSoRLLp..s...................... 0 7 46 85 +5075 PF05248 Adeno_E3A Adenovirus E3A Moxon SJ anon Pfam-B_7497 (release 7.7) Family \N 25.20 25.20 25.40 37.00 21.60 24.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.78 0.71 -4.03 11 109 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 83 0 0 74 0 105.20 56 97.52 CHANGED MssssA-hsRL+.....HhcHCRR.+CFAR-slphsYFplPc-H.pG........PuHGVRlslpsshcS+hl+hhstRPlLsEpcpGsoplolhCICs.pPuhHpsLhstLCutYN+s .........pusuAELARLR.......HLDHCRRhRCFARE.utthIYFElPEEHPpG........PAHGVRITlEGsh-S+LlRhFoQ+PlLlE.R-pGsTTlTlYCICs.pPtLHEshCCpLCuEFNKs............... 0 0 0 0 +5077 PF05250 UPF0193 Uncharacterised protein family (UPF0193) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 23.30 23.30 23.30 25.90 23.20 23.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.56 0.70 -4.76 5 117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 86 0 77 119 0 176.20 35 81.17 CHANGED hAupWPSsRVAKG..GlF..HosKApYTKETQDLIKVLMEESKLThLQR++Is-+LRsGEPLPlPcpP+hpQphs...phLs........Msst++NhKKRSLpsI.tSGAac.EhYh.PtcsK..sEKpKpKLQppMA.Gp+l.PDsGhRK+hPR++u-csh-hsEsDRhsELL-EINERsEWLsEMEALGQGKKYRslIpsQIAEKLRELccLDR++SpEs-huht.l ...............................................................................................................t.l..G..sha..ps....stYo.tTp-Ll+lhMcESKlo..hpp+pl.cph+pG.psLPh......p.....Ppsspp.........t..s...........................hh......t........h...+sh...p....hpss.sAYppp.a+..P.p.ss..+........hEKpKp+.LQs..hhA..G..............-.t...+.............................s.........h.tt....cp..t.....c..DhhpE..LlpEIpERtEFLs-MEuLGp.G+.pY.+shIhsEIup+l+chctl-pphp.p.p......h............. 0 29 37 56 +5078 PF05251 UPF0197 Uncharacterised protein family (UPF0197) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 23.20 23.20 23.20 23.90 22.90 22.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.62 0.72 -3.67 6 160 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 121 0 102 147 0 75.10 54 77.54 CHANGED l-pMsRYlSPVNPAVaPHLAsVLLuIGhFFsAWFFVaEVT..........SoKasRslaKELLISLsASlFLGFGlLFLLLhVGIYV ................MpRYsSPVNPAlaPpL.slVLLuIGhFFTAWFFl...YEVT......................So..KhsRslhKELLlullASlFhGFGs....LFLLLhVGIYV.................. 0 37 49 79 +5080 PF05253 zf-U11-48K UPF0224; U11-48K-like CHHC zinc finger Bateman A, Andreeva A anon Andreeva A Domain This zinc binding domain [1] has four conserved zinc chelating residues in a CHHC pattern. This domain is predicted to have an RNA-binding function [1]. 21.10 21.10 21.60 21.30 20.60 21.00 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.14 0.72 -6.90 0.72 -4.20 84 695 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 184 2 441 662 1 26.70 35 12.17 CHANGED clhsCPasssHpl.tpclptHltp.Cpc .....hh.CPasssHpl.tsclppHltp.Cpp.... 0 121 173 310 +5081 PF05254 UPF0203 Uncharacterised protein family (UPF0203) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 21.50 21.50 22.00 22.30 21.20 20.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.60 0.72 -4.12 19 246 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 207 0 182 247 0 68.30 36 54.14 CHANGED Ms..ShutECs-hKccYDpCFNcWYuEKFL....KGctsp..stCschaccYppClppsLpc+sIp......ph.L-pu+cc ............Ms..Slu.pCs-lKccYDsCFNcWauEKFL...................K.G..pssp.......stCsplacpYppClpculcc+tIt......ph.httt+................................. 0 65 101 149 +5082 PF05255 UPF0220 Uncharacterised protein family (UPF0220) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 25.00 25.00 25.70 25.50 23.00 23.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.02 0.71 -5.00 24 399 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 275 0 246 338 0 149.70 34 93.88 CHANGED Mst..p..haRh.....ch.tsspt+phulYlAGsLFuhGaWhhlDAulaSpps.p.st...l.......HlsFlDWlPhlhSTlGhllVNSI-KupLp...s-uhup.............usssAhtARlhLFlGFuLhAGGluGolhVhllKallpth.shsslhhGlANllsNshIhlSsllLWhup.shEDEYs ...............................p...h+......ph..stptpshushsAGsL........FhhGaWlhlDAulh.sp..p......................................plsas..ahsGlhuoluhlhlNuVp.puplpu.-uast.....................GshsthtARlhLF.lGFhLhhGuLhuShhlLhh....talsp......................ths...sla.GlAshh.pNshlhhu.sllhhhup...hE-.a............................... 0 76 123 193 +5083 PF05256 UPF0223 Uncharacterised protein family (UPF0223) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 25.00 25.00 31.60 31.50 24.90 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.91 0.72 -3.87 35 928 2009-09-10 20:58:59 2003-04-07 12:59:11 7 3 925 2 80 364 1 84.60 50 95.57 CHANGED hsYpYPLDh-WSTEEllsVlsFastVEpAYE..pGlcp-cLLssY+pFKplVPuKuEEKpls+-FEcsSGYSsY+sV+pAKcssct.hlph ...hpYpYPlDh.s.WosEEhhtVlsFaspVEpAYE..suVpsccllsuY+pFKplVPSKuEEKpls+EFEpsS.GYShY+uVQtAKppsct.plp.l............. 0 18 40 63 +5084 PF05257 CHAP AXE; CHAP domain Bateman A anon Pfam-B_2845 (release 7.7) Domain This domain corresponds to an amidase function. Many of these proteins are involved in cell wall metabolism of bacteria. This domain is found at the N-terminus of Swiss:P43675, where is functions as a glutathionylspermidine amidase EC:3.5.1.78 [1]. This domain is found to be the catalytic domain of PlyCA [4]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.95 0.71 -4.47 47 5579 2012-10-10 12:56:15 2003-04-07 12:59:11 11 106 1721 22 443 2917 78 122.10 25 30.57 CHANGED pthttpssshst.tsastsQCs.astphhtphss...........h.phhGNAts..........Wstss.thpua........ssstsPpsGslhsa..........tsssssYGHVAhVppV...tsssslpl.EpN..........h..tshshtshcph.st.......s.hsaIh ..........................................................tht........hsttphYstuQCT...ha..shphh..hus...............h.s.hh...G....s...Aps......................Wss.sA...ttpua......................psspsPp.s.Gu.lh.h.............tsssusYG..HVAhVppV...............s.s.s...s...l..p.luE.N..............a.....t.s...h...hhs.+sh.t........t.htaI.......................................................... 0 112 217 369 +5085 PF05258 DUF721 Protein of unknown function (DUF721) Moxon SJ anon Pfam-B_7527 (release 7.7) Family This family contains several actinomycete proteins of unknown function. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.51 0.72 -3.77 167 2098 2012-10-01 19:58:36 2003-04-07 12:59:11 7 3 2068 0 633 1679 1349 88.60 18 55.78 CHANGED sls.pllpph.h..pp..ttht.pshthsplhp.tWppll...GsplAppspshpl.c...............su........s..LhltsssushtppL.phhptpllpclspth.s.....thlpclpl ...................................tthhpph.hpt....tthp.pshthtplhp.pWppll.............G.splu....spscshpl..c..............................su........s..LhltssssshuppL.ph.hpspllpplppth.s.........tslpplph................. 0 197 420 538 +5086 PF05259 Herpes_UL1 Herpesvirus glycoprotein L Moxon SJ anon Pfam-B_7535 (release 7.7) Family This family consists of several herpesvirus glycoprotein L or UL1 proteins. Glycoprotein L is known to form a complex with glycoprotein H but the function of this complex is poorly understood [1]. 19.60 19.60 20.10 20.90 17.50 16.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.40 0.72 -4.59 14 77 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 36 1 0 71 0 104.90 33 53.27 CHANGED shshtshhssslssILphsChs....sshhhsh.sspshspsl.sGIhl+scCssPEslLWacpsppAYWVNPalsspGhh..EDltcsthssss.........+stlhpsLssth .............................hhhtshhspplusILclsCls.s..cshsaph.sss.....shs.sl.sGIhl+hcCss.EslLW.cpstpAYWVNPalhltGhh..-Dlscsuhs.sss.........hctlhpuLssh.t......... 0 0 0 0 +5088 PF05261 Tra_M TraM protein, DNA-binding Moxon SJ anon Pfam-B_7584 (release 7.7) Family The TraM protein is an essential part of the DNA transfer machinery of the conjugative resistance plasmid R1 (IncFII). On the basis of mutational analyses, it was shown that the essential transfer protein TraM has at least two functions. First, a functional TraM protein was found to be required for normal levels of transfer gene expression. Second, experimental evidence was obtained that TraM stimulates efficient site-specific single-stranded DNA cleavage at the oriT, in vivo. Furthermore, a specific interaction of the cytoplasmic TraM protein with the membrane protein TraD was demonstrated, suggesting that the TraM protein creates a physical link between the relaxosomal nucleoprotein complex and the membrane-bound DNA transfer apparatus [1]. 25.00 25.00 34.30 33.90 24.00 20.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.28 0.71 -4.08 8 222 2012-10-02 15:10:05 2003-04-07 12:59:11 6 1 171 17 11 121 0 120.70 70 98.53 CHANGED MPKlQsYVSspVhEcINsIVEcR+tEGAc-pDVSFSSlSSMLLELGLRVYEhQhE+KESGFNQhEFNKlLLEsllKophhss+ILslpsLos+lsuss+F-apsllpsIcccVpEpM-hFFP-s-DE .MA+VplYlSs-lh-KINtIlE+RRpEGA+-+DlSaSusuSMLLELGLRVYEA.....QMERKESAFNQsE.FNKlLLECVVKTQSoVAKILGIESLSPHVSGNPKF.EYANMVEDIREKVSuEMERFFPcND-E........................................................ 0 0 0 6 +5089 PF05262 Borrelia_P83 Borrelia P83/100 protein Moxon SJ anon Pfam-B_6712 (release 7.7) Family This family consists of several Borrelia P83/P100 antigen proteins. 25.00 25.00 25.20 25.20 24.60 24.20 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.78 0.70 -6.22 3 126 2009-09-13 12:24:55 2003-04-07 12:59:11 6 7 67 0 21 122 3 520.80 46 87.98 CHANGED ++ElDpV.NtTlEFlNYsGPHDsVDSA-sIRGIGEsLAuAL+uGsAGDp..uRYuVIHsVDPpsKcGhDADIFIIGupAcVDHIsNlRcIlAGYLpAAYGYccpDAcTIA+FlTIYNAVYRuDLDaFKp+YKpVVTKsLTKE+AGLARRYDQWPGKTQIVIPLocptaSGsl.SuVDTsSISDKKVVE+LREDs-KsV-hRRDMlDL..............................................KERESQEuAKRAQ.hKcEuDpKQc-ADKtppcADpAQcsADKQR+EscQKQQE.......AKNuP-PAcTousKEDK+V...........A......EppK+EhEKuQpEscKssEEApKAKDpt......A--LK+E..uKupEK.......hAEcKotEAQ+-Rc-VAu........................................DhQKhhsps+AcsTutu--....AI-SSsPsYGLKVVDsp+hLSELVLlDLKTEscLRoSulpTIRsRsLYpcsKsLVAIAsT.SGNAslpLVcIDsKoLEVlKESspcIAupSsLl+sup.hhsul..pDDssKWhlu+assK.L-..hhStsclLPaTsa .......................................................................................................................................................................................................................................cKL+DFV..N.hDLEFVNYcGPYDSosTYEQIlGIGEFLA+s..Lh....su..sS..NSsh....hGKYalsRaID-pDKKu.SsDlFSIGucSpLDSILNLRRILTGYLhcuFcY-+uSAELIAKsITIYNAVYRGDLsYYKthYIpsuLKsLoKENAGLSRVYSQWAGKTQIFIPLK+sILSGpl-SDIDlDoLVT-KVVsuLLo.ENE.u.GVsF.AR..DlTDIQsEh+csDQcKIDhE.sshcp.c......................................................................sp.ppphps.pcp.pctos-ppcKE.IESQlDAKK+Q..KEEL.D.....cK..shc....LDKAQ.......QK..L.D...tuc.-sLDlQ.RcsV.+EKlQEsIschN+-KNLP.KPG.DV.SSPK.V....DKQLp.........tphQtp.p.........cssDEsQKREIEK..QIEIK..KsDEE.LlK.sKDt+.......AhDLpp-.....SKuSSKp................................p.hsEcKch.-......uK+phcsluc..............................................................................hsLQcscsQs+sps..T..shsp-........ul-SusP..VF...LEVI..D.Ph.T..NL.GsLQLIDLsTGscLKcSsppGIpR.....YGlY.ER-+D........LV...................VIKh-.SGc....AKLQ....lLs.KhENLKVlSESsF.EIs+sS.SLYVD.S+MILVsV..cDsussW+LAKFSscsL-cFlLSEscIhPFTSF................................. 0 11 14 16 +5090 PF05263 DUF722 Protein of unknown function (DUF722) Moxon SJ anon Pfam-B_6789 (release 7.7) Family This family contains several bacteriophage proteins of unknown function. 29.80 29.80 29.90 31.40 29.70 29.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.73 0.71 -4.01 7 87 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 63 0 15 69 1 127.80 31 92.67 CHANGED MADKLDRIIuDYlsG+LpA+IKuhE.Ralh+pKsD.NLGhRTA.sGsuc..uphLppEt.EsDcELh+L+cphphlshaacsLhtp.EKclIpL+apthtthoWYpVh.cLs......ls.ppA+phahpF+psIhph ..........................MAD+lDclluDYhsGhlpscIcth..c...chhh..+.ppsD..NlGhtsu.sssucsEscslhp...EsDccLt+Lcc.hphl-hhhpsLhs-....-KclI.pL+appt.tthTWhpVu.cLs......lscppA+phhhpF+p.l...h........................... 0 5 5 11 +5091 PF05264 CfAFP Choristoneura fumiferana antifreeze protein (CfAFP) Moxon SJ anon Pfam-B_6800 (release 7.7) Family This family consists of several antifreeze proteins from the insect Choristoneura fumiferana (Spruce budworm). Antifreeze proteins (AFPs) and antifreeze glycoproteins (AFGPs) are present in many organisms that must survive sub-zero temperatures. These proteins bind to seed ice crystals and inhibit their growth through an adsorption-inhibition mechanism [1]. 25.90 25.90 27.10 26.90 25.30 25.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.89 0.71 -4.40 4 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 4 17 0 49 3 59.70 45 103.26 CHANGED MKhhMLIMALA.IsTVSSDGoCsNTNSQlotNSpCV+SThTNCYIDNSpl...............................YsTTCTGSpYDGVaITSSTTTGTpISGPGCoISoCTITtGVsAPSAAC+ISGCTLpAN ..............................lo.NS.C.pSThTNC.lspSpl...........................pSpl.toTCTsSpas.GlhIToSToTsoph.................................... 0 0 0 0 +5092 PF05265 DUF723 Protein of unknown function (DUF723) Moxon SJ, Finn RD anon Pfam-B_6852 (release 7.7) Family This family contains several uncharacterised proteins from Neisseria meningitidis. These proteins may have a role in DNA-binding. 20.90 20.90 21.10 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.12 0.72 -4.17 6 74 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 23 0 6 29 9 60.50 46 34.44 CHANGED huhTFppAtochs-+FPc..lpLlcFsGltcPsoIsCPhHGsVshSsa.puhI+SKaGCPcCu .....................huhoFppAtu+hpp+FPc...lsLl-FsGlhtPsolpCPhHGsVphusa.puhl+.SK.h.G..CPcCu. 1 6 6 6 +5093 PF05266 DUF724 Protein of unknown function (DUF724) Moxon SJ anon Pfam-B_6894 (release 7.7) Family This family contains several uncharacterised proteins found in Arabidopsis thaliana and other plants. This region is often found associated with Agenet domains and may contain coiled-coil. 25.10 25.10 25.20 25.10 24.80 25.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.22 0.71 -4.68 15 90 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 12 0 62 86 0 176.20 27 27.11 CHANGED hhlP....FsK+tshWKshEohEVFKplPQuPHFsPLh.cscE-hREhsAlGhMloFhsLL-cVpsLplD-shSplpslspsFsELEKHGFNVpsPpSRIsK...lLoL+scpocph-EhKshEKchs-c...........p..-sthschEccIlELpRpt.hhpctKEst-.......pEIuphcopAsplcQplpss-h-FposhuAPW ..............................PFsKpt.hWp.hE.sh-VFchlPQpPHFpPL..ph..thREhhAlGhMhoFssLl-plppLpl-D.s.ts.hpshhc.slscLEc.pGFsVpslpsRLsc....LLsl+scpsphhcct+ph-cphtcc.......t....pph-pphtcl-pclh.....cLccpt..htphppttp.................t-lsphpspsptlppphtshchcFpshhst.............................. 0 10 24 45 +5094 PF05267 DUF725 Protein of unknown function (DUF725) Moxon SJ anon Pfam-B_6905 (release 7.7) Family This family contains several Drosophila proteins of unknown function. 20.80 20.80 21.10 22.90 19.90 19.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.77 0.71 -4.22 11 174 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 15 0 81 172 0 122.40 25 50.34 CHANGED sssshospCFshYlPhlNpluspasssYssClssAssptpslssphppsppslppSuppsCssh.psC.......sohssshshFpCaAssusssspshYsISuNAopuAsplpcphpslcspc.pCsNpTp+s .............shpCFshYhshhsplsspYstsYstChsstpssppplspphpppppplpssspphCssl.psC.......s..sh.sss.h.s.hFsCaussuspsspshaslusNAo..ptssplppphphlphpcttCsspupp.......... 0 11 12 50 +5095 PF05268 GP38 Phage tail fibre adhesin Gp38 Moxon SJ anon Pfam-B_7415 (release 7.7) Family This family contains several Gp38 proteins from T-even-like phages. Gp38, together with a second phage protein, gp57, catalyses the organisation of gp37 but is absent from the phage particle. Gp37 is responsible for receptor recognition [1]. 25.00 25.00 31.70 29.40 20.10 19.90 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.92 0.70 -4.96 8 76 2009-09-11 08:17:54 2003-04-07 12:59:11 6 1 63 0 0 45 2 168.10 45 71.99 CHANGED MAVVGlPGWIGpSAVsETGQRWMsuAApcLRlGlPsWMSsMAGRS+EIIHTLGADHNFNGQWFRDRCFEAGSAPIVFNITGDLVSYS+DVPLFFMYGDTPNEYVQLNItuGVsMYGRGGNG....sussusGosGGcsIQNDIGGRLRIsNsGAIAGGGGGGGG...su..suatsphssGGGGGRPFGsGG...suuthSGGsAS....louPGuGu...ussstasGGsGG-VGuuGGpuh...GtssppssGGAAGtAVhGSAPTWpNVGsIYGsRV .................................................sGWlGSSAVsETGpRWMuuAuspL+L.usPhaMSQMsG+Shp.h.holGt............................................................................................................................................................................................................................................................................................... 0 0 0 0 +5096 PF05269 Phage_CII Bacteriophage CII protein Moxon SJ anon Pfam-B_7453 (release 7.7) Family This family consists of several phage CII regulatory proteins. CII plays a key role in the lysis-lysogeny decision in bacteriophage lambda and related phages [1]. 21.30 21.30 21.30 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.11 0.72 -3.98 3 497 2012-10-04 14:01:12 2003-04-07 12:59:11 6 2 366 12 19 217 1 84.00 41 89.93 CHANGED MspAS....TR+EAsRIESuLLN+IAhLGQRKlA-AlGlcESQISRWKsDWIPKhSMLLAVLEWGVsDD..-lARLAKpVAclLTKKKRPsC.TERSE ...................M..ss....pp+..c..ssRhEosLL.pLuhlsQ+shAchlGspESplSR.......s.......-...ah...hhu...hL..hA...aGhss-........hu+h.h+hshs.hl.....T.pcKtPss.sE................................................... 0 0 4 12 +5097 PF05270 AbfB Alpha-L-arabinofuranosidase B (ABFB) Moxon SJ anon Pfam-B_7464 (release 7.7) Family This family consists of several fungal alpha-L-arabinofuranosidase B proteins. L-Arabinose is a constituent of plant-cell-wall poly-saccharides. It is found in a polymeric form in L-arabinan, in which the backbone is formed by 1,5-a- linked l-arabinose residues that can be branched via 1,2-a- and 1,3-a-linked l-arabinofuranose side chains. AbfB hydrolyses 1,5-a, 1,3-a and 1,2-a linkages in both oligosaccharides and polysaccharides, which contain terminal non-reducing l-arabinofuranoses in side chains [1]. 21.30 21.30 21.60 21.90 21.20 21.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.95 0.71 -4.49 11 334 2012-10-02 19:42:32 2003-04-07 12:59:11 8 53 174 16 174 358 3 131.80 27 13.91 CHANGED lS.l+soossYssRYlsHsuoTlNTpVVSSuSussl+pp.......ASapVpsGLAsuu.....ChSFESsDsPGoYlRH..tNFpLhlsANDGoctFpcDATFCPpsGlsu..pGs.SlcSasYPsRYlRHYsNlLalsusuG...a-ssssFpsDsoFhlt ....................................................................................l......tt.....s.....s.hps.sss............p.........us.atl.s.s.G.....L...us.ss.....slShES..sshPGhaL.R.H......tsh.pltLs...t..s...-.u.o..shFppDAT.F..phpsG...hss..sGh..........ohcShs..hPs.p..alR..H.h.s.....h..lhlst..........ttst.aptcsoFhht............................... 0 38 89 139 +5098 PF05271 Tobravirus_2B Tobravirus 2B protein Moxon SJ anon Pfam-B_7517 (release 7.7) Family This family consists of several tobravirus 2B proteins. It is known that the 2B protein is required for transmission by both Paratrichodorus pachydermus and P. anemones nematodes [1]. 20.90 20.90 21.10 197.70 20.40 19.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.56 0.71 -4.40 4 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 3 0 0 13 0 117.70 63 47.08 CHANGED ssDWuohWPNDpLFlsDhhpLVWFDhhsDhVchpHFsuQsssDLSsIPKuFlSFlDNRlPMCINHKGhVYIRVt.su--sYYQKFG-LDVSsFsDshLPPDh-FsFsKVshssscpl. NGDWuoKWPND+LFlDDFGKLVWFDlLsDlVcIoHFVSQsPTDLSsIPKSFISFIDNRlPMCINH+GWVYIRVKh-u--VYYQKFGELDVScFGDShLPPDFEFsFsKVTssVD+pLV 1 0 0 0 +5099 PF05272 VirE Virulence-associated protein E Moxon SJ anon Pfam-B_6573 (release 7.7) Domain This family contains several bacterial virulence-associated protein E like proteins. These proteins contain a P-loop motif. 21.50 21.50 21.50 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.18 0.71 -4.84 10 1163 2012-10-05 12:31:08 2003-04-07 12:59:11 6 20 659 0 186 1048 243 191.70 28 33.69 CHANGED YLcul..pWDGhPRl-ohFhD......aLGsEDspYsptso+hahhuuVARVa.....cP..GsKaDalllLpGsQGsGKSThlctL.GGc..WFoDo...lcshcsKDthptlpGsWIsEluELsuhs.Ku-lEsIKuFITRocDpaRssYG+pspsaPRpslhVGTTNccEaL+DsTGsRRFaPlpss.K..plsh....s-hhpt.h-QLaAEAhhhYcc .............................................................th..tWD.Gh.pR.lpt.....hhhp.........hls.......s....p....s..s..t.......h.s......p......h....++ahluhlA..+...sh.....ps....ss..+...h...c..h..s..ll.L..h.G.s.Q.G.hGKSTa....h.c.....t..L....s..sp.......a.a.oDs........l.p...t..h.....p..s..+....-.s..h.....p....t....l..pt.h....hllp....hs.E.hsu.hs...ppp..h.p.tlKshlo....cphsphRhsYs..c..p.s.pchsRpssFlGToN.p..p.-......a..L.p.DtT.Gs.RRFhslpVp.s....hch..........c.................hpQlaApAh.h...t................................................................. 0 95 149 176 +5100 PF05273 Pox_RNA_Pol_22 Poxvirus RNA polymerase 22 kDa subunit Moxon SJ anon Pfam-B_6584 (release 7.7) Family This family consists of several poxvirus DNA-dependent RNA polymerase 22 kDa subunits. 25.00 25.00 79.50 79.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.04 0.71 -4.65 10 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 43 0 0 34 0 182.10 69 99.44 CHANGED MNpHNV+YLAKILCLKsEIh+cPYAlISK-llp+Yss-lcYGDLVTlITVpHKlDss+TVFQVFNESSVsYoPlEcDYGEPIIITSaLQpGHNKFPlshLYIDlVASDlFP+FsRLos-ElsllsSlLQsGDsK..poLKLPKMLETElusKILYHKDhPLKlVRFa+NNMlTGlEluDRuVlsVl .MNQaNVKYLAKILCLKTEItRDPYAVIsRsVlhRYsTDIcYsDLVTlITV+HKIDohKTVFQVFNESSlsYoPV-DDYGEPIIITSYLQ+GHNKFPlNFLYIDVVhSDLFPoFVRLsssEssIVsSVLQsGDuK..coL+LPKMLETEIVsKILY+PsIPLKIVRFFRNNMlTGVEIADRSVloV.. 0 0 0 0 +5101 PF05274 Baculo_E25 Occlusion-derived virus envelope protein E25 Moxon SJ anon Pfam-B_6633 (release 7.7) Family This family consists of several nucleopolyhedrovirus occlusion-derived virus envelope E25 proteins. 20.70 20.70 211.80 211.40 18.70 18.00 hmmbuild --amino -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.15 0.71 -4.68 17 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 53 0 0 53 0 183.90 52 84.15 CHANGED upS.uDSlQhs.p.GphsVK.hNss+lKslRlhaGD..NcISKlhVuEpPLoYs-IlDcGN+.VGsNsVFlGslp-shsss................sssssRsTuNFsIKQFKNhFIVFKsl-.ocIcpsssMlRYEu-uMVYsLIDuosoolP-.LRDV.....SYPIsVhTsNussQLhLKEWsYTQINDuGTLFlKNEKSFR ....spS.SDSlphssp..GphsVK.h.Nss+lKslRlhHGD.....N...KlSKlaVAE+PLoYs-Il-cGN+pVGsNsVFlGTl.-sussS...............s.sssssRsTuNFDIKQFKNhFIVFKsl-ssKIccsssMlRaEuDGMVYCLIDussoolP-..LR-V.....SYPIsVYTsNussQLhLKEWsYTQINDuuTLFlKNEKSFR..... 0 0 0 0 +5102 PF05275 CopB Copper resistance protein B precursor (CopB) Moxon SJ anon Pfam-B_6721 (release 7.7) Family This family consists of several bacterial copper resistance proteins. Copper is essential and serves as cofactor for more than 30 enzymes yet a surplus of copper is toxic and leads to radical formation and oxidation of biomolecules. Therefore, copper homeostasis is a key requisite for every organism. CopB serves to extrude copper when it approaches toxic levels [1]. 20.80 20.80 22.10 20.80 20.60 19.80 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -10.93 0.70 -5.10 81 554 2012-10-03 17:14:37 2003-04-07 12:59:11 6 4 422 0 149 499 67 205.50 41 65.28 CHANGED ssthhthlhhDcLEhptsc..sssu.hsW-spuWhGsDhsRlhlKoEG........Ehppuc..h--u-.sphLau+AIuPaWDhQsGlRtD..htss.s....sRsaushGlQGLAPYaFEl-ushaluccGcsuhRlEuEY-lLLTQRLILQPclEsshhup-DsppulGuGLosh-hGLRLRYE.lsRpFAPYlGVsapppaGsTADhsRspGccsspsphVsGlRhWF ...................................s..th.hhlllDpLEhppsc..ss.ss..huW-spuW.lGuDhsRlal.+oE.G...........EpspGc...sEsu-...sphLau+ulu..PaWDl.uGl..RpD........hps...u..s.........sRsW..A.AlGlQG...LAPYhFEs-uosalupsGpsuhRLcuEYDlLLTpRLILQPphEsshhupcDsppuhG.sGLoss-hGLRLRYE.lp.RcFAPYlGVsasppaGpTu-hs+tpG-.p....s....ppspalAGlRhWF......................................... 0 40 89 124 +5103 PF05276 SH3BP5 SH3 domain-binding protein 5 (SH3BP5) Moxon SJ anon Pfam-B_6742 (release 7.7) Family This family consists of several eukaryotic SH3 domain-binding protein 5 or c-Jun N-terminal kinase (JNK)-interacting proteins (SH3BP5 or Sab). Sab binds to and serves as a substrate for JNK in vitro, and has been found to interact with the Src homology 3 (SH3) domain of Bruton's tyrosine kinase (Btk). Inspection of the sequence of Sab reveals the presence of two putative mitogen-activated protein kinase interaction motifs (KIMs) similar to that found in the JNK docking domain of the c-Jun transcription factor, and four potential serine-proline JNK phosphorylation sites in the C-terminal half of the molecule [1]. 23.00 23.00 23.10 23.50 22.90 22.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.60 0.70 -5.05 8 259 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 95 0 164 239 2 194.20 41 52.73 CHANGED Es-E...lDPRIQtELE+LNpATD-IN+hElELE.......................................................................EA+spFRplLhEuscKLcsluKKLGssI-KARPYYEA+chA+pAQhEsQ+AAhcFpRAsplhsAAKEpVuLAEQpLhppsp..phDsAWQ..EMLNHATp+VhEAEpp+scuEspHpcps+thppApp+lppLEcch+RuIpKSRPYFEhKtpapppLEsQKtpls-LEtcVppAKssYosALRNLEpISE-IHppRpstu....ssuscpsus .....................................................................................................................ht.plp.ELEcLNpuo-pINphEh....pL-.......................................................................-A+.ppaRplL..Eush+Lpt.sp+lG.p.s.l.-cu+PYaEA+phA+pup.EsQpAs.papRAsphhtAA+Ehl.........lAEQtlht..........cpt.....ph...DssWQ.....EMLNHATp+V.-AEpp+hcuch.HpcssthhptA.t+hptLp+p.L++uIt...KS+PYFEhKup.......a...Lcp...KtpVppLptplstA.KtpYp.AL+NLEpIS-pIHtpRpt..................s........................... 0 51 64 122 +5104 PF05277 DUF726 Protein of unknown function (DUF726) Moxon SJ anon Pfam-B_6757 (release 7.7) Family This family consists of several uncharacterised eukaryotic proteins. 22.60 22.60 22.80 23.00 22.40 22.30 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.19 0.70 -5.56 9 559 2012-10-03 11:45:05 2003-04-07 12:59:11 7 18 287 0 381 563 32 271.90 31 41.08 CHANGED ++hKRhhhlGLAsluGuhlIGlouGLtAPlluAGlushh......GssGsss...hLuus.uGsA.llsuhhushGAtlsuhtMp+RsuslcsFpFhPLpsspp............tslhVslss.h.hus.c-lptsWpsLs.s......s-hYuLtWEschLhphGpsl.slLhStshshshQpl.LttTl....................LsuLhuAlpWPhuLhKlu.llDNPWslshcRAhpAGchLA-sLhsRsh.GhRPlTLlGaSLGARlIa.CLhpLuc+.cthGllENVllhGsPssschc.WpphRoVVSGRhVNsYscsDWlLuaLaRssust..pluGhusls...hpslENlDsoslVpGHLsYpcphsplL+tlshc ..................................................................................................t....+hhhhGhAslsGuhllul..ouG.LhAPhluuuh...ushh.............Ghsuhsu.................lus.........h....u.t...s.......hhss.hshhG.uths.s.hhhtphh.t.tlppFthhslttstt....................................................plhlsl.h...h........t.p...p.shhhPa.p.hlt....................t-.aslhWEsphLhphGpsl.phl.h.st.h.h....s..s....hpph.lt.Tl................................................................................hssl.huul.hP.h....s..L.h.p.hu...h.lDNsa..slshs.RuttsG.thLA....c....s....Lh...p...+t......GpRP.lT...LlGaSLGuRlIa.CL.p...L...Ap..........c......t......shG....l..lps.Vhlh.G.uPhss..........p....t....pp....at..........h....+.pVV...u...GRhlNsY.....s..psD..alLu..................h...laR.........t.s.....t...h.........th....t..........lu.Gltslp..........t.......l-..Nhshoph.l.tu.Hh.pY.........hstlLpthth............................................................. 0 135 225 323 +5105 PF05278 PEARLI-4 Arabidopsis phospholipase-like protein (PEARLI 4) Moxon SJ anon Pfam-B_6763 (release 7.7) Family This family contains several phospholipase-like proteins from Arabidopsis thaliana which are homologous to PEARLI 4. 27.90 27.90 28.30 28.00 27.80 27.80 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.63 0.70 -5.02 6 68 2009-09-11 21:08:31 2003-04-07 12:59:11 7 5 8 0 42 67 0 216.70 24 49.53 CHANGED s-ptslhLFP-lhhs.psp.sScs.........................................................pp.ppptppEssEL..........................-hpShhS-Sa..VSVGpY+VRuSVSoTLQuIlDKHGDIAusSKLQShuTRSYYLEsLAuVVhELpSTPL+pLocsRVtEMlAVVKDlESVKIcVGWLRoVLEElsEAscaastpEssssEKEspE+clhhcKpEMEtppp-Lsc+EKElKEhRc+lpEhsu+LG-LEMKRsRL-KphshhuSKVEKF.cGcohlcc ....................................................................................................................................................................................................................................................................................h..s-sh.....VpVstYpVctShsshLptIlcKHGDIuusscLpShphRShYLEsLssllp-L..p.poslc.pLocsclp-hhuslpDl-ssplcVuWL+s...tLsE...lh..-s..hchh.......s.phctschc+ctpc+c.lpttcpEhEt...pp-LtphEpcht-hptph........http.....hsphp....th......h..p........................................... 0 14 24 27 +5106 PF05279 Asp-B-Hydro_N Aspartyl beta-hydroxylase N-terminal region Moxon SJ anon Pfam-B_6767 (release 7.7) Family This family includes the N-terminal regions of the junctin, junctate and aspartyl beta-hydroxylase proteins. Junctate is an integral ER/SR membrane calcium binding protein, which comes from an alternatively spliced form of the same gene that generates aspartyl beta-hydroxylase and junctin [1]. Aspartyl beta-hydroxylase catalyses the post-translational hydroxylation of aspartic acid or asparagine residues contained within epidermal growth factor (EGF) domains of proteins [2]. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.86 0.70 -4.52 9 236 2009-01-15 18:05:59 2003-04-07 12:59:11 6 14 40 0 49 199 0 177.80 40 49.29 CHANGED NGR+GGlSGu.SFFTWFMVIALLGVWTSVAVVWFDLVDYEEVL...............GKL.GlYDADGDGDFDVDDAKVLLG....lK-+.hsc...................................t.s..cEt-shsc.Ettlshctphpplc-ElKEQlpplhcchVhsc.......pp.ct.stE.p.-tc+hhhtsDsD-phcs.tssts+EEhEt.h..Ech.p.t.....cEhsscpcsscspEsVp..cstch+tcsscVs.psh-cp.....................pt.h-pt....Ech.h.ssEcppcsP ............................................ts..ShhoWhhVIALLGVWTSVAVVaFDLVDYcpVl..........................................GKL.ulYD.ADGDGDFDl-DAKVLLs..............hppt....p.........................................................t......h...tt.....th.t.p.....tc.p.t....t....c....................................................t............pt.............................................................................................................................................................................................................................. 1 3 7 23 +5107 PF05280 FlhC Flagellar transcriptional activator (FlhC) Moxon SJ anon Pfam-B_6773 (release 7.7) Family This family consists of several bacterial flagellar transcriptional activator (FlhC) proteins. FlhC combines with FlhD to form a regulatory complex in E. coli, this complex has been shown to be a global regulator involved in many cellular processes as well as a flagellar transcriptional activator [1]. 20.60 20.60 21.00 21.40 19.80 20.20 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.17 0.71 -4.80 23 831 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 727 2 135 359 15 173.70 64 90.40 CHANGED Mu..pKS....llsEucpIpLAhELIpLGARLQlLEoETsLSR-RLl+LYKEl+GsSPPKGMLPFSTDWFhTWpPNIHSSLFhNIYpaLhcpussssl-AllKAYRLYLEplst.pphEP.....l.......LSLTRAWTLVRFhDusMLphopCspCGGcFVsHua-.ppsaVCGLCpPPSRAGKo++suspt ..........................................................MuEKSIVQEA+DIQLAMELIsLGARLQMLES.ET.Q....LSRGRLI.KLYKE..L.R..G....SP...P.P.KG..M.LPF..S..T..DWFM..T..W.EQ.N.l..HASMFh.NAapFLL..Ko.....G..h.Cs..GV...DAVIKA..YRLYLE....Q..CPp.....s..c..-sP..L.......LALTRAWTLVRFVE......S......G.l......LpLSuCNCCGGsFI..T.H....A..H....Q....P.s.s.SF..sCSLCQ...P....PSRAVK+RKLSp.ss............... 1 10 53 95 +5108 PF05281 Secretogranin_V Neuroendocrine protein 7B2 precursor (Secretogranin V) Moxon SJ anon Pfam-B_6776 (release 7.7) Family The neuroendocrine protein 7B2 has a critical role in the proteolytic conversion and activation of proPC2, the enzyme responsible for the proteolytic conversion of many peptide hormone precursors. The 7B2 protein acts as an intracellular binding protein for proPC2, facilitates its maturation, and is required for its enzymatic activity. Processing of many important peptide precursors does not occur in 7B2 nulls. 7B2 null mice exhibit a unique form of Cushing's disease with many atypical symptoms, such as hypoglycemia [1]. 25.00 25.00 44.70 38.80 20.10 22.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.66 0.70 -4.95 12 117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 88 0 64 114 0 200.70 45 85.95 CHANGED hh......tlL.......hhsl.........tsAh.shsP..phhDplS..cschthhhcush-.................p.t.u..c.Ea.tHpu..LhG.QplpGGAtEG.p.........shst.hossslP.......uYssPPNPCPlGhT.tsDG..ClEsh.sTA-FSRcaQspQchh.DsEH.hasss.............................................sphp+sLlh+K..h+stp................tpcppcps.NPaLQGp+Lc.lsAKKsssph. ......................................................hhhh.......h......th.............sssh..uhss.......cssDpVS..-s-l.....pRLh+.GshE...........................phGlup.csEYssHpu...LhGsQpIp.GG..A...pE............GhQ.................+Lu......P.....G......N........IPN..ls....sEhT.scsl.P...................uYssPPNPCPl....GhT.scDG...CLEshssTAEFSREa....Qhp..Q....chh.DsEH.as.sh............................................................uphsKpLlhcK...h+stt...................Rpccsl.NPYLQ...G..p+.Lc.lsAKKus.hh.p........................ 0 18 23 43 +5109 PF05282 AAR2 AAR2 protein Moxon SJ anon Pfam-B_6782 (release 7.7) Family This family consists of several eukaryotic AAR2-like proteins. The yeast protein AAR2 is involved in splicing pre-mRNA of the a1 cistron and other genes that are important for cell growth [1]. 20.40 20.40 20.60 23.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.37 0.70 -5.53 41 321 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 253 2 227 311 4 325.70 23 83.00 CHANGED h............sslllhslP.ts.....hlGIDhhoassssp..F+GlKtlPs..G..hHFlahs.........................................................ppsshuhRhGhahhhp.........................tsclhlh+WspppEshh..ps...........pt....................pthptpspl.tphpph..LssYPh.........................................cshp.pWtsLT.shIs........pllp+lpshs........................................hhlsstpssppcpp...................................ttpptscpt.thtt..........................................................................psplpFs.l.....cpphppusss...........p-lTctshD+SahLppll................................pphpsp.ttlLGELQFuFlhhl.hhtsasuhpQW+pLlpLlssu.pphltp...ptph.....................ahpllclLhhQLt.............chsp-hhhD......hhppstFlt.hl........................htttltpphcplcshlppcashcl.........cs ...........................................hh..ssslllhslP.to........hGIDh.sapsssp.....F+GlK.lPP..G...hHFl.ahu.........................................................t...tphu.Rh..Ghah.hp.............................tp.lhlhcWsttpEpl....t.....................pt....................................................ch.t.tsph.tphcph...LssYsh.........................................pphp..pWhpLo.shl.o........shl..p+lts.s...ht........................................hls.sttt.tptcpp.........................................t...tp...h..................................................................................................tpphpFs.l........pphh.tssss...........t-hTptshDp...ShhLppll...............................................................tphtt...s..tplLGELQauFlshl.hh....tshpuhppW+.pL.l.p.Ll...........hps.ptsh.h.p....ctth..........................ahphlp.lLhhQLt........................phsp-hhh-.................hsts.shltphlp..................................................................t.........h..t.....t.....tl...hpthtphpthlpt..phtWph..t...................................................................................... 0 74 122 184 +5110 PF05283 MGC-24 Multi-glycosylated core protein 24 (MGC-24) Moxon SJ anon Pfam-B_6825 (release 7.7) Family This family consists of several MGC-24 (or Cd164 antigen) proteins from eukaryotic organisms. MGC-24/CD164 is a sialomucin expressed in many normal and cancerous tissues. In humans, soluble and transmembrane forms of MGC-24 are produced by alternative splicing [1]. 25.50 25.50 25.80 25.80 25.40 25.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.58 0.71 -4.50 4 165 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 61 0 82 152 0 116.60 30 77.58 CHANGED huRphh..hAAhCh.usLClLustps.sttssssths.hssssoshsssls.sT....psCE..phNsC.pClNsoh.ssohCsWhpCpsE..saCSutstV....usCp.tNoT-SCSs.sss.......sVsT.puTstPshps.uso.........sosppsTosssTNsTVTPssp.sRKSTFDAASFIGGIVLVLGlQAVIFFLYKFCKS ...................................................s...........................................................................................................................................................................spot.....ss..s................................................................................................s.tt....s.o...p...s...t...s.s...sT.s.oPs..s.........s...+...p....st......FDuuSFlGGIVLsLul.Alhahhh+Fh+u........................ 0 8 13 28 +5111 PF05284 DUF736 Protein of unknown function (DUF736) Moxon SJ anon Pfam-B_7619 (release 7.7) Family This family consists of several uncharacterised bacterial proteins of unknown function. 22.00 22.00 22.40 22.30 21.80 21.10 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.37 0.72 -4.21 66 445 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 185 0 238 454 42 101.00 34 87.91 CHANGED ssI.GpFs...psp.suapGplcTLolsscl.plVPsps..ss-...sAPDaRlhsu.........ss.ElGAuWp+pupc....up-YlSlpLDDPsF.stPlhAsLhp...sp-sctt..asLlWsR.Ppc .................sI.GsFs...pss..s.uasGplpTLsls.sch..pllPspp..ss-...pAPca..R..lhsu..........ssElGAAWp+pupc....Gc-YlSlpLDDPuF.suPl..hAsLhp....s-css.t...asLlWsRs........................... 0 36 145 190 +5112 PF05285 SDA1 SDA1 Moxon SJ anon Pfam-B_6906 (release 7.7) Family This family consists of several SDA1 protein homologues. SDA1 is a Saccharomyces cerevisiae protein which is involved in the control of the actin cytoskeleton. The protein is essential for cell viability and is localised in the nucleus [1]. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.31 0.70 -5.15 38 397 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 283 0 294 403 7 256.40 28 42.13 CHANGED Ms....EsLLQDLspYK.....sSK.-KuVhMAARuL...........luLYRE........VsP-hLp+KDRGK.sAuhulpssctp.........taGcpps.sssI.GlELLtca.......t..ct-pu...ss-s-sDsp.............W...............................ps...t-ss-ssDs..cGpWlsVpsD...p..........................................................oDsEDEcp............................t..t.t.....p.phpsccpsppsppptp-.........................................stcpphspluoo..RILTPADFtKLpELRtptul..sphhsttt......................pcpp--hlsuscIEuhsKht..Kps+EERlApspEGRp-RpcatS....+Ks+Kcsts.pSTTN+EKpR.+KNFhM.hl+K.ts+uKpKpSlp-+ppsLRsHlt+p.....K+tt .............................................................................................hscpLLpDLs..Y+.....pp+.sKsVhhAA+uL...........lpLaRp........lsPphLt++.RG+..sphthp.tt..t.................aGp.p..hs.l...GhE.l..Lt.t.............ttt.......tpptp.p-tt.............h..................................................................tt...ppp.ppDt........psth.h...s....spps......................................................................sDp-p.pt.......................................................................t....t.....t.tt....t............................................................ttttthttlsss..+lL.T.tD.ht+lp.hphptth...pth.ttt..............................................tptttchls.pplpt.hch.....Kts+.......-pRlttshtG+psRtcat................+tp+..t.h..tSpoN+cKt+.pKsh.M..h...h.tp...s+sKt....ph.S.hh.cpp..hhp..tt..+t.....p...t................................................................................. 0 106 161 235 +5114 PF05287 PMG PMG protein Moxon SJ anon Pfam-B_7710 (release 7.7) Family This family consists of several mouse anagen-specific protein mKAP13 (PMG1 and PMG2). PMG1 and 2 contain characteristic repeats reminiscent of the keratin-associated proteins (KAPs). Both genes are expressed in growing hair follicles in skin as well as in sebaceous and eccrine sweat glands. Interestingly, expression is also detected in the mammary epithelium where it is limited to the onset of the pubertal growth phase and is independent of ovarian hormones. Their broad, developmentally controlled expression pattern, together with their unique amino acid composition, demonstrate that pmg-1 and pmg-2 constitute a novel KAP gene family participating in the differentiation of all epithelial cells forming the epidermal appendages [1]. 29.00 29.00 29.40 29.10 28.10 28.90 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.55 0.71 -4.47 15 251 2012-10-03 03:07:01 2003-04-07 12:59:11 7 10 30 0 129 302 2 146.40 33 89.87 CHANGED MSYsCsSGNaSS+ShtsphphPsoSsso..shPoslshssslCoPSopQhuSolhcsCQETCsEPhpCQssssp..............................sssCQsoCapPpsSslsuPCpoThuGsluFsSS..SCpshuh..........tSpsC.slG.sGSsuapslstsossh.shshhot..aC+Pshh.........uS+oh.Qs...............oCapPsCuSuh ............MsasssotshSopSh.....tshhhhPsssss.s...hssslshtsshh.P.S...oh...Q...hso.LhssC...QETChEPssCp.oss................................spsCQ..ss......C..pPp.....sh...h......ssP.Cps.sh..s.tsh..........u.asSs..........uCps.huh..........hopss.slu....ssS.....sshpslshh.sp.sh.s.s.hst......hCpP..h....................s.psh.ps................s.....h................................. 0 14 14 42 +5115 PF05288 Pox_A3L Poxvirus A3L Protein Moxon SJ anon Pfam-B_7718 (release 7.7) Family This family consists of several poxvirus A3L or A2_5L proteins. 25.00 25.00 98.30 98.20 22.20 20.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.29 0.72 -4.24 12 91 2009-09-11 10:15:31 2003-04-07 12:59:11 6 1 41 0 0 44 0 66.90 66 95.24 CHANGED tYplsLcPP++C.SpC.sNLh-alp-Dccsl+hhLtSQPpKhplLKpFLshsRNKphhhKILDpEl+RVLs ..Yph.lp.PK+C.S.pChsNLhcalsEDuNs.I+hlL.SQPpKLKVLp-FLushRNKpFlYKILD-ElRRVLT. 0 0 0 0 +5116 PF05289 BLYB Borrelia hemolysin accessory protein Moxon SJ anon Pfam-B_7729 (release 7.7) Family This family consists of several borrelia hemolysin accessory proteins (BLYB). BLYB was thought to be an accessory protein, which was proposed to comprise a hemolysis system but it is now thought that BlyA and BlyB function instead as a prophage-encoded holin or holin-like system [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.27 0.72 -4.22 3 123 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 28 0 5 92 0 102.00 77 87.44 CHANGED MKLSKNNLELGLTSLSTLIDIFSKFED.EFDEsAHKGFFLVYELYSHYKLIYTANMERLESALTPsIscTLAPINEKINQCIDLVNSDEKNLKISNDLKFNcEGKP .......MKLSKDNlELGLTSLSoLIDIFSKFED.EFDEIAHKGFFLVYELYSHYKLIYTANMERLESALTPs.IstALAPLNEKINQCIDLVNSDEK.NLKISNDLKFNQEGKP.............. 1 4 4 4 +5117 PF05290 Baculo_IE-1 Baculovirus immediate-early protein (IE-0) Moxon SJ anon Pfam-B_7745 (release 7.7) Family The Autographa californica multinucleocapsid nuclear polyhedrosis virus (AcMNPV) ie-1 gene product (IE-1) is thought to play a central role in stimulating early viral transcription. IE-1 has been demonstrated to activate several early viral gene promoters and to negatively regulate the promoters of two other AcMNPV regulatory genes, ie-0 and ie-2. It is thought that that IE-1 negatively regulates the expression of certain genes by binding directly, or as part of a complex, to promoter regions containing a specific IE-1-binding motif (5'-ACBYGTAA-3') near their mRNA start sites [1]. 33.10 33.10 33.10 87.10 32.30 33.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.13 0.71 -4.45 14 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 40 0 0 40 0 136.70 43 54.30 CHANGED lpalsspspapashaIFlPYlKQLppIlpLFhNDaCCsKlVKphtssLssLlscSt+hL+sI+hhNcRhQVlNVFh-s..lYpCNICpDTSsEE+FLKPNECCGYpICNhCYANLWKaso..lYPsCPVCKTSFKSSssssppt ...................t.s.a.hNhalFlPYlKQLptllch.FpND.a.CCtKllpshthtLspLlscstchlcpIcshN+plpVhNVFh-s..sLYECNIC+-sSs-E+FLKPsECCG.YsICNhCYusLWKass..haPhCPlCKTSFKssp........... 0 0 0 0 +5118 PF05291 Bystin Bystin Moxon SJ, Wood V anon Pfam-B_7767 (release 7.7) Family Trophinin and tastin form a cell adhesion molecule complex that potentially mediates an initial attachment of the blastocyst to uterine epithelial cells at the time of implantation. Trophinin and tastin bind to an intermediary cytoplasmic protein called bystin. Bystin may be involved in implantation and trophoblast invasion because bystin is found with trophinin and tastin in the cells at human implantation sites and also in the intermediate trophoblasts at invasion front in the placenta from early pregnancy [1]. This family also includes the yeast protein ENP1. ENP1 is an essential protein in Saccharomyces cerevisiae and is localised in the nucleus [2]. It is thought that ENP1 plays a direct role in the early steps of rRNA processing as enp1 defective yeast cannot synthesise 20S pre-rRNA and hence 18S rRNA, which leads to reduced formation of 40S ribosomal subunits [3]. 28.00 28.00 28.20 30.20 27.90 27.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.76 0.70 -5.48 21 359 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 301 0 250 363 3 287.10 46 68.65 CHANGED Fhs..ps..............spoLuDhIhpKIpE+-sph..........ptthtptps..hP..thsP+Vl-lYcpVGplLS+Y+SGKLPKAFKllPslssWEplLhlTcPcsWTPpAhatATRlFsSshpsptAQ+FhphVLLsRVR--Ip..EsKKLNaHLYpALKKALYKPuAFFKGlLFPLspossCTLREAhIluSVlsKlSIPlLHSuAALh+ls-h.........-asGssShFI+lLL-KKYALPY+VlDulVhHFhRFps.p..................cpLPVlWHQuLLsFsQRYKsDlop-Q+-tLlcLl+t+sHh.tIoPEIRRELhsupsRssp.s.sth .............................................................t...phsLADhIhpKIp-+psp.................tt...h.t.t.....s.sh......s....plss+Vl-lYptVGplLo+Y+SGKLPK.sFKllPs..LpsWEp..lLhlTcP-p............WospAhYpAT.RIFsSshpsphAQ+......FhshVLLsRlR-DIt...EpK..............+LNhH.LY.........pALKK..ALaK.PuAFF.KGlLhPLs.poGsCTLREAhIluSlls+sSIPlLHS.uAAlh+ls-h...........-.asGss.olFl+lLL-KK...............YALPY+VlDulVhHFlRFcspp..................................cpLPVlWHQuLLsFsQRYKs......Dls.p-Q+-tLL-Llchp.s..H..pIsPEIRREL.sutsRs..................... 0 92 141 208 +5119 PF05292 MCD Malonyl-CoA decarboxylase (MCD) Moxon SJ anon Pfam-B_7770 (release 7.7) Family This family consists of several eukaryotic malonyl-CoA decarboxylase (MLYCD) proteins. Malonyl-CoA, in addition to being an intermediate in the de novo synthesis of fatty acids, is an inhibitor of carnitine palmitoyltransferase I, the enzyme that regulates the transfer of long-chain fatty acyl-CoA into mitochondria, where they are oxidised. After exercise, malonyl-CoA decarboxylase participates with acetyl-CoA carboxylase in regulating the concentration of malonyl-CoA in liver and adipose tissue, as well as in muscle. Malonyl-CoA decarboxylase is regulated by AMP-activated protein kinase (AMPK) [1]. 19.00 19.00 22.20 19.60 17.90 17.80 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.18 0.70 -5.76 3 443 2009-01-15 18:05:59 2003-04-07 12:59:11 6 9 296 2 221 458 837 259.70 34 71.11 CHANGED MLsEKFGsscEKLspAIshY..sKs-pthIpl+suAssSRscLl+slNchPGGTu+Vs-MRpplLA......SKsooSth+sLDlshs-lhsSWFSLGhLcLE+LsWooPu.ILpKltEYEAVH.ltGhp-hR+RLuPLsRRCFuF.HEsLAcEPLVFVEVALs-oVAcoIt-lhccGt.phpt--tTTAlaYSIossQPGLuGIsLGNFLIK+VlTcL+KDlPuVoTFuTLSPIPGFtpWLl+pLpupScasp......................pE+sl.hlSD.Sppt.NthEssETLLuVcss-WsTcK+pLsslE+ILMhLCARYLLsEK+.cG+ALDSVANFHLpNGAcLERLNWhGDRScKGIpQSaGIMVNYlY+ ..........................................................................................t..........................................................................s.h.tlhpph.......s...t...uht.Ll.thRtclht.......................t......tlt.l-tthpph...hsp..WFshuhL.Lc.ls..W.p.o.PsplLpKlhtYE...........AVH.....l.p.s.W...Dl+pRlts.......RRCauFhHsthst..................-PLlhlcVAL.spthsssl..t....l.......l.....t...t....................t............t................................t........p..........p...s...s..s.AlFYSISssQ.GLtGlshGshLIKpVlppLp.........p.........-h......Pp.........lppFsTLSPlPGFhpWL.......t........................................................................................................................................................................................................................................................................t.h.p..hhthsAhYl..h..tt+p....p.....G....h..s.h....sPVApFHLtNGAhl.c..lNahuDhS.+GhppShGhMVNYhY...................................... 1 76 127 174 +5120 PF05293 ASFV_L11L African swine fever virus (ASFV) L11L protein Moxon SJ anon Pfam-B_7869 (release 7.7) Family L11L is an integral membrane protein of the African swine fever virus (ASFV) which is expressed late in the virus replication cycle. The protein is thought to be non-essential for growth in vitro and for virus virulence in domestic swine [1]. 25.00 25.00 159.30 159.20 20.30 20.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.77 0.72 -3.85 2 12 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 0 0 10 0 77.30 92 93.55 CHANGED MLEPlLVMAPIPLlLIFLYSYFKIKLHKLITIALFLGCLFFILRDFCFPPMLWp.LpNhT.shNshLGNpSF.VpCpp MLEPILVMAPIPLVLIFLYSYFKIKLHKLITIALFLGCLFFILRDFCFPPMLWTQLHNITSSIN.ILGNNSFQVKCNp. 0 0 0 0 +5121 PF05294 Toxin_5 toxin_5; Scorpion short toxin Moxon SJ anon Pfam-B_7892 (release 7.7) Family This family contains various secreted scorpion short toxins and seems to be unrelated to Pfam:PF00451. 25.00 25.00 26.80 26.50 23.20 22.60 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.27 0.72 -3.74 6 25 2012-10-01 23:31:40 2003-04-07 12:59:11 8 1 11 2 0 29 0 32.20 66 72.07 CHANGED CsPCFTTDPpMppKCpcCCGG+..GhChGPQCLC ChPCFTTDPsMupKCp-CCGGp..GKCaGPQCLC. 0 0 0 0 +5122 PF05295 Luciferase_N Luciferase; Luciferase/LBP N-terminal domain Moxon SJ, Bateman A anon Pfam-B_7906 (release 7.7) Domain This family consists of a presumed N-terminal domain that is conserved between dinoflagellate luciferase and luciferin binding proteins. Luciferase is involved in catalysing the light emitting reaction in bioluminescence and luciferin binding protein (LBP) is known to bind to luciferin (the substrate for luciferase) to stop it reacting with the enzyme and therefore switching off the bioluminescence function. The expression of these two proteins is controlled by a circadian clock at the translational level, with synthesis and degradation occurring on a daily basis [1]. However This domain is not the catalytic part of the protein. It has been suggested that this region may mediate an interaction between LBP and Luciferase or their association with the vacuolar membrane [2]. 25.00 25.00 104.80 104.10 20.80 16.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.63 0.72 -4.18 13 14 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 8 0 0 14 0 81.60 58 8.43 CHANGED MAs..QLspFLsN-AKlDs+VluYMT+pLpL-SVSDFANYWTSsEYE+GVQDDIlupVusFpss.SKPsuKlQlARLRAAW+uAQ MAt..pLspFLss-AKlDs+VluYMT+pLpL-SVSDFANaWTosEYE+GVQDDIlupVssFpss.ScssuKlQlARLRAAW+uAQ. 0 0 0 0 +5123 PF05296 TAS2R Mammalian taste receptor protein (TAS2R) Moxon SJ anon Pfam-B_1498 (release 7.7) Family This family consists of several forms of mammalian taste receptor proteins (TAS2Rs). TAS2Rs are G protein-coupled receptors expressed in subsets of taste receptor cells of the tongue and palate epithelia and are organised in the genome in clusters. The proteins are genetically linked to loci that influence bitter perception in mice and humans [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.02 0.70 -5.53 20 1248 2012-10-03 04:04:29 2003-04-07 12:59:11 8 3 92 0 394 2541 0 286.50 29 96.76 CHANGED Mhssh.slh.hllhlsEhllGlluNuFIsLVNsh-Wl+p++lSslDhILhuLAISRIsLlhllllssahhlhhsshatsuthhphlshhWhhhNphSlWhAThLulFYhlKIAsFSHPlFLWLKhRlspVlsal...LLuolllsh..lsplhpshp...........phppNoThphchpchphhhshlhhp..lsshlPFllsLlShhLLIhSLh+Hp+pMppsuoGhRDPsTcAHspAlKsllSFllLahsYaluhllphhshhhscsplhhhhs.hhshhYPssHShILILGNsKLKpshhplLtph+ .......................................................................................................................hh..h.hlh.h.hthhlGhhuNuF.I..s.lV.Ns.h.-W..l..+..p..p.c.l.u..s.sDhILhsL.ulSR.lh..Lh...h..l.l.hl.....sh.h.hhh.h..ss..hh.s..h.p..h...h...hphh.t.h.h.W....hhhN.ph.olWhuos.LSl.F.Y..hl.K...I..A...s.F.o.....p...sh....F.L....a..L.K.....h...R....l....s..p..l....l.....s......h.l........L.L....G.....s..l...l...h.s..h....h..........h..h...h....l....s...h.....t..h.....p.........h....h...h..t......t.h....................php.p..N....h.....T...h......p...........h.........p......h....p........p....h.....t.....h....h..............h.h..hh.............ls.s...h....l....P....F..h...l........h....L.......l....S...h..h..L.......L.......lh...S.Lh..+...H...h+....p...M....p....h....p....s....p....u....s....p.....D....P....S....s....c.A.H.....l..+...........A....l+....s....l....l....S....FL....l....L..a....h.l.....a...a......l....u....h..h....l..s..h..h....s....h....h....h..............p...p...........p...h...h..h....h..h...s...t...h...l.....h.h...h..a.PusH.S.hlL.I..h.uNsK.L+pshhphh....h................................................. 0 26 90 163 +5124 PF05297 Herpes_LMP1 Herpesvirus latent membrane protein 1 (LMP1) Moxon SJ anon Pfam-B_5174 (release 7.7) Family This family consists of several latent membrane protein 1 or LMP1s mostly from Epstein-Barr virus. LMP1 of EBV is a 62-65 kDa plasma membrane protein possessing six membrane spanning regions, a short cytoplasmic N-terminus and a long cytoplasmic carboxy tail of 200 amino acids. EBV latent membrane protein 1 (LMP1) is essential for EBV-mediated transformation and has been associated with several cases of malignancies. EBV-like viruses in Cynomolgus monkeys (Macaca fascicularis) have been associated with high lymphoma rates in immunosuppressed monkeys [1] 26.00 26.00 26.30 29.00 25.20 24.10 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.39 0.70 -5.53 2 1117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 10 3 0 569 0 128.40 64 101.97 CHANGED ME+DLERGPPGP.RPPhGPPLSSSlGLALLLLLLALLFWLYIVMSsWTGGALLVLYSFALhLIIIILIIFI.RRDLLCPLGuLsLLLLMITLLLIALWNLHGQALYLGIVLFIFGCLLVLGLWIY.LEILWRLGATIWQLLAFhLAFFLslILLIIALYLQQNWWTLLVDLLWLLLFhAILIWMYYHG.RHoDEHHHDDSLPHPQQATsDSuHESDSNSNEGRHHLLVoGAGDGPPLCSQNLGAPGGGPDNGPQDPDNTDDNGPQDPDNTDDNGPQDPDNTDDNGPQDPDNTs...........DNGPHDPLPHNPSDSAGNDGGPPpLTEEVENKGGDRGPPSMTDGGGGcPHLPTLLLGTSGSGGDDDDPHGPVQLSYYD .................................................................................................................................................................................................................................................................................................NLtA.uGGP.sNuPQDPDNTDDNGPQsPDNTDD.....NuP.....QsPDNTD.........DNuP......QsPDN...................D...NGPHDPlPpsP.sDuAGNsuGPPpLTEEVENK.GGDpGP...PsMTDG..........G..........G..........G.....c..........s.H............................................................ 0 0 0 0 +5125 PF05298 Bombinin Bombinin Moxon SJ anon Pfam-B_5347 (release 7.7) Family This family consists of Bombinin and Maximin proteins from Bombina maxima (Chinese red belly toad). Two groups of antimicrobial peptides have been isolated from skin secretions of Bombina maxima. Peptides in the first group, named maximins 1, 2, 3, 4 and 5, are structurally related to bombinin-like peptides (BLPs). Unlike BLPs, sequence variations in maximins occurred all through the molecules. In addition to the potent antimicrobial activity, cytotoxicity against tumour cells and spermicidal action of maximins, maximin 3 possessed a significant anti-HIV activity. Maximins 1 and 3 have been found to be toxic to mice. Peptides in the second group, termed maximins H1, H2, H3 and H4, are homologous with bombinin H peptides [1]. 21.40 21.40 21.50 21.40 20.30 18.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.79 0.71 -4.72 3 225 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 4 1 0 219 0 135.80 84 98.03 CHANGED MNFKYIVAVSFLIASAYARSEENDEQSLSQRDVLEEESLREIRGIGsKlLGGlKTAlKGulK-LAS+alNGKRTAE-HEVMKRLEAVMRDLDSLDHPEEASERETRGFNQEEIANLFTKKEKRILGPVLSLVGuALGGLIK .....MNFKYIVAVSFLIAS.AYARSVpNDEQSLSQRDVLEE.ESLREIRGIGGKILuGlKTALKGAAKELApTYlptKRTAE-.HEVMKRLEAVMRDLDSLDaPEEAoERETRGFNQ-EIANLFTKKEKRILGPVLGLVGsALGGLlK.... 1 0 0 0 +5126 PF05299 Peptidase_M61 M61 glycyl aminopeptidase Studholme DJ anon Merops Family Glycyl aminopeptidase is an unusual peptidase in that it has a preference for substrates with an N-terminal glycine or alanine. These proteins are found in Bacteria and in Archaea. 21.30 21.30 21.70 21.60 20.90 20.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.59 0.71 -4.14 10 668 2012-10-03 04:41:15 2003-04-07 12:59:11 7 4 574 0 253 702 215 119.60 40 20.25 CHANGED .tsthsLlAHEahHuWNsKhhRPA-Lhs.sacpsstssLLWlaEGpTpYaGhllssRoGlhopcpsLctLAtshuphhs.psGRtapolp-oohDs.hlphtRspshsshspppsYYocGtllh ..................tphLuLsSHEYFHsWNVKpl+Psshts....aDhspEsaTphLWhaEGhTSYYDcLhLhRuGllotcpYLchLupslsph.p..ssGRh.hQolu-SSa-A.WhKhY...+...tD.p.Nus...Ns.h.l.SYYsKGuLl.u................................. 0 76 165 215 +5127 PF05300 DUF737 Protein of unknown function (DUF737) Moxon SJ anon Pfam-B_6933 (release 7.7) Family This family consists of several uncharacterised mammalian proteins of unknown function. 23.20 23.20 23.60 23.80 21.70 23.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.18 0.71 -4.33 21 182 2009-09-10 21:09:58 2003-04-07 12:59:11 6 3 44 0 74 148 0 140.90 33 75.13 CHANGED hDEpEplpVl+GIRLSEsVlsRMKEsSsPsstpp..ss.ssus.............................................................sspppl++tssppht.uLcps+ttoppp.sth....hppt.hKRhcpEQhhlQ-EluRlhc+E+pAAp-pLspulLRE+susccERt+AppL.......ARpLE-+EtEL++pDsFYKEQLuRlEE+suEhYKlToEQaHcAAocsEu+hK ...........................DEp-plpllpGl+L.S-sVlpRM+-s.s..ss.t........................................................................................................................................................................................................................t..ttp.........pp..tt..pttp...............................cthtp............+...hppERtuu..........p....c.p....hptul.pc+hpspcEp.cuthh........A+pLpp+-t..lpp.-saY+EQltplEc+s.phY+.osppappAApchcsph+............................................................... 0 5 13 32 +5128 PF05301 Mec-17 DUF738; Touch receptor neuron protein Mec-17 Moxon SJ, Pollington JE anon Pfam-B_6943 (release 7.7) Family Mec-17 is the protein product of one of the 18 genes required for the development and function of the touch receptor neuron for gentle touch. Mec-17 is specifically required for maintaining the differentiation of the touch receptor [1]. This family is conserved to higher eukaryotes. 21.20 21.20 21.20 21.40 20.90 21.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.44 0.71 -4.17 5 201 2012-10-02 22:59:21 2003-04-07 12:59:11 6 2 118 0 116 212 3 110.50 48 33.85 CHANGED SDc.QllYlhtDcsA..uu+utlhGLLKVGpKcLFLaDsptsppclEps.CILDFYVHEScQRsGpG+cLF-aMLpcEpsos+QCAlDRPSsKLLuFLuKHYGLc+sVPQuNNFVLaEGFFsc .................................ppphhYlht-sps...ssp..Gsl..lGhLKV.....GhK+LFlh..............Dp.ptt.p.p.E.hc....sh..ClLDFY......lHEShQRpGhG+cLFpaMLpcEpl.p.P....p..plA.lDRPS.KLLtFLpKHYsLpph..........lP.Qs.NNFVlF-sFFt........................ 0 50 61 93 +5129 PF05302 DUF720 Protein of unknown function (DUF720) Moxon SJ anon Pfam-B_6980 (release 7.7) Family This family consists of several uncharacterised Chlamydia proteins of unknown function. 25.00 25.00 122.80 122.30 23.50 20.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.79 0.71 -4.38 6 105 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 34 0 13 42 0 127.40 45 76.02 CHANGED lYFsI.shLh.SVtsspashuIhA..LQ-NTshQQphspEhhplphlpVPchpKpDs.........Nps.IQshQosNQpIoAsRQhIQppLSuApQpAQs.ppslNosss...QlLQsssALlpTLppl.olhANL ....hYFsI..hLhpSVtlsQpslulhAppLQ-NTstQQpLNpEpsplpassVPcstKpsp.........sps.IQsVQssNQsloAsRpsIQspLSuApQsuQlIpSslNTNsNIhQQlLQssoALlpThsplsSllANL 0 3 3 10 +5130 PF05303 DUF727 Protein of unknown function (DUF727) Moxon SJ anon Pfam-B_7004 (release 7.7) Family This family consists of several uncharacterised eukaryotic proteins of unknown function. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.33 0.72 -4.26 10 194 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 117 1 136 179 0 97.50 31 24.97 CHANGED shclEApAsVs-ltFuVspIsV.ScpLPpss-lsYlNVcThEuspYClELTppGaRlVSppaDplssc..............spltlsopYaETlYuLLDsISPsYREsFG...spLsQ+LccLp .........................................................EA.uhlp-l.htVtph.l...Sp.L.pss...DlhYlNlpThEsppaClclo.p.pGa+l...su.p..hDphssc.............................p.s.tha-..TlhsLLsplSPta+...csFu...ptLhp+Lpt.......................................... 0 46 70 111 +5131 PF05304 DUF728 Protein of unknown function (DUF728) Moxon SJ anon Pfam-B_7223 (release 7.7) Family This family consists of several uncharacterised tobravirus proteins of unknown function. 25.00 25.00 117.50 117.10 18.10 17.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.51 0.72 -3.86 3 35 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 8 0 0 27 0 132.70 84 97.52 CHANGED KCALssC..EVssQuNchTCSMKHANKYNRaLA-KauVKRKCECsNCGWFPAIpVpsDalEVYFCCGMKHLpKC+...............................ScNPKKccR....LNTPKRLFRDDVDFGLstLFu ....TCVLKGCVNEVTVLGHE.TCSIGHANKLRKQVADMVGVTRRCAENNCGWFVClIINDFTFDVYNCCGRSHLEKCRKRhEARNREIWKQIERlRAEcs.sTVKKS+NSKsSKKcFKEcc-FGTPKRFLRDDVPFGIDQLFA. 0 0 0 0 +5132 PF05305 DUF732 Protein of unknown function (DUF732) Moxon SJ anon Pfam-B_7356 (release 7.7) Family This family consists of several uncharacterised Mycobacterium tuberculosis and leprae proteins of unknown function. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.66 0.72 -3.93 65 1009 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 197 0 174 553 1 102.60 25 72.21 CHANGED thhusssuhsu...........huhAAP.A......pA..........s..D.................s...sFLssLp.psGIs..as..ssstAlthG+tVCstlsp.Gt........shspllsplttp.ts.shs...........tpAuhFsshAhssYC....Pphhsth ..................................................................................hs...hhhhshhuh..........hshAuP..A..............pA.....s..................sh..D..................................s.....sFL...ssLp..pt..Gls.......as...........sssp..u...lshG+.tVCptl.ss..Gt.............shtpl......ss...t..l..tp....p.......s..shs..............ppAst.F.s....s.h.AhptY..C....Pphht..h....................................... 0 22 84 143 +5133 PF05306 DUF733 Protein of unknown function (DUF733) Moxon SJ anon Pfam-B_7392 (release 7.7) Family This family consists of several uncharacterised Drosophila melanogaster proteins of unknown function. 25.00 25.00 37.60 37.60 21.20 20.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.95 0.72 -3.70 14 116 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 0 65 113 0 94.30 33 72.45 CHANGED hpPsluYtLFhYRpELsR+pt..chhRlSpoKlhLTcELIupph.p...........sh.tpsSs--Lpt.............LsREl.....a+ccLp................cplc.Rhpchpthuhpp..tctp ...h.PoluYtLFhYRpELpR+ps..phhRlSpoKlpLTcpLIupsh.p..........................sl.ppCSs--Lps..................................LsREl.....FK+cLp..................cplc.Rh+chpphshpp....................................... 0 9 9 36 +5134 PF05307 Bundlin Bundlin Moxon SJ anon Pfam-B_6974 (release 7.7) Family This family consists of several bundlin proteins from E. coli. Bundlin is a type IV pilin protein that is the only known structural component of enteropathogenic Escherichia coli bundle-forming pili (BFP). BFP play a role in virulence, antigenicity, autoaggregation, and localised adherence to epithelial cells [1]. 21.10 21.10 21.30 21.10 20.70 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.14 0.72 -4.12 2 81 2012-10-03 10:38:27 2003-04-07 12:59:11 6 2 47 3 1 100 12 90.00 68 53.31 CHANGED MVSKIMNKKYEKGLSLIESAMVLALAATVTAGVMFYYQSASDSNKuQNAISEVMSATSAINGLYIGQTSYsGLNSNILLNTSAIPDNhKcstpshlT ..........MVSKIMNKK.YEKGLSLIESAMVLALAATVTAGVMF..YYQ..SASDSNKoQNAISEVMS..ATSAINGLYI...G...QT.S.YoG...LsSs...I...L.L..NT.S.AI.PDNY.KDTpNphl......................... 0 0 0 1 +5135 PF05308 Mito_fiss_reg DUF729; Mitochondrial fission regulator Moxon SJ, Eberhardt R anon Pfam-B_6919 (release 7.7) Family In eukaryotes, this family of proteins induces mitochondrial fission [1,2]. 26.30 26.30 26.30 26.30 26.10 26.20 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.76 0.70 -5.01 15 225 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 59 0 110 182 1 204.40 35 70.71 CHANGED hplsl.WpsKPYGSoRSIVR+IGTNLPLtPCPRspFQll..Phsschssssssp...VPShADVuWlAsDEsEoasRlRs-lpspppp.+.sshhshc....RpsSlPsLppcEsphps.....tthss-sAlpKIoALEsELutLRAQIA+IVshQEtpssosush............sss.....o...ssohs.sssp..P....s..........PPPPPPsP.PssuLpsSsSsl............shhpER+p..ppsssscTlspspsKp....phPsML-lLKDMN+VKLR .................................................................l.WpsKsaGus..RSlVR+IGopLsLh..CsRspFphh..........s.h...sphs.......ppss......VsohADlhWlAt...-.E...tc...s.sRh...R..s..p..h.....hsh.....h.p.hh.hp......Rp....SlPslptpc..p..h..............................thss..Alp+h.ssLpsELutLRuQIApIVshpptps.hsss.................................sssuh..ss.........s...................................ss...sl.pssSs.................................shhtEp+t..pptsts.ps....spspp......hssMhtlLKDhp..phKh.......................................................... 0 16 23 52 +5136 PF05309 TraE TraE protein Moxon SJ anon Pfam-B_7677 (release 7.7) Family This family consists of several bacterial sex pilus assembly and synthesis proteins (TraE). Conjugal transfer of plasmids from donor to recipient cells is a complex process in which a cell-to-cell contact plays a key role. Many genes encoded by self-transmissible plasmids are required for various processes of conjugation, including pilus formation, stabilisation of mating pairs, conjugative DNA metabolism, surface exclusion and regulation of transfer gene expression [1]. The exact function of the TraE protein is unknown. 22.40 22.40 24.30 24.10 21.70 21.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.87 0.71 -5.15 17 480 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 351 0 62 301 17 177.20 31 92.14 CHANGED M-hphppsptphhhhth.hhsslhslhhhssllh...sathtscpcpsVhs.shstshslSssusDssYLc.hscshhhLpLNhoPcslDh.hpplLphscPuupspl+stLhcpsppl+ssslsstFhhsplcVsPpshpsclpGpL+TalGsptlss....-h+pYphpasacss.ltLssFtpl.ss...-p .................................................................pt.p.hhh...hhhthl...lsl.l...hsNll..........shpht.s.cpc.s.sl.s.P...sh..s..t..shsVSp.s.sAstsYLpphul.hhtLhLNVoPpsVDtp+psLLphlpPuspsph+...shLtccAc..pIKs.csVsosFh....s.pl.c.s.s.pt.spVplpGh..h+.shl..us.up....s....ph.+.pYhl.hch.p.s.uhhhLtth.p................................................................. 0 17 35 51 +5137 PF05310 Tenui_NS3 Tenuivirus_NS3; Tenuivirus movement protein Moxon SJ anon Pfam-B_7740 (release 7.7) Family This family of ssRNA negative-strand crop plant tenuivirus proteins appears to combine PV2 [1], NS2 [2], NS3, and PV3 proteins. Plant viruses encode specific proteins known as movement proteins (MPs) to control their spread through plasmodesmata (PD) in walls between cells as well as from leaf to leaf via vascular-dependent transport. During this movement process, the virally encoded MPs interact with viral genomes for transport from the viral replication sites to the PDs in the walls of infected cells along the cytoskeleton and/or endoplasmic reticulum (ER) network. The virus is then thought to move through the PDs in the form of MP-associated ribonucleoprotein complexes or as virions [2]. The NS3 protein appears to function as an RNA silencing suppressor [3]. 25.00 25.00 37.10 37.00 19.80 19.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.45 0.71 -4.39 9 133 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 12 4 0 134 0 172.50 48 84.62 CHANGED LLhpsshhplll........................phscH..ua.plhptssstEsh.hhlpsuIWlLoap+shssphhhshsshsusasphhlphcPps...spsKCWhCc....hspssL.hhh.s..lpGF.hssE.YhVshK-Hsu........Ehhh.sshKshY+sspKhcHcYllsost.Pl.sppa ..................h..ppshoph.hshcDh....p.....aclhppRhsphccH..pa.pLhptssDt.sht.hltshIWlhuac+shs-chRhsphhhsuohschhhplKPcs...spTpCWhC+....hpp-sLshtl.s..VpGFsssuEhYhVslpDHsG........c.schssh+shY+ssuKh+HKYllsost.Pl.St+a.. 0 0 0 0 +5138 PF05311 Baculo_PP31 Baculovirus 33KDa late protein (PP31) Moxon SJ anon Pfam-B_7777 (release 7.7) Family Autographa californica nuclear polyhedrosis virus (AcMNPV) pp31 is a nuclear phosphoprotein that accumulates in the virogenic stroma, which is the viral replication centre in the infected-cell nucleus, binds to DNA, and serves as a late expression factor [1]. 25.00 25.00 28.20 27.10 21.70 21.20 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.01 0.70 -5.13 16 50 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 45 0 0 47 0 275.50 34 95.87 CHANGED hssshsplhsKh..-susaNKssh-hlpssINhhEKKKIsYplhshPlhsD.............DKKssKRsKKhISNNKYILFNSWY..TK.RpssWPsSasMWNlhKspspspsFVplFDahEKlGKsIss+pusss.................spssssccpssht..hs.s-l.cEsNc+RsKLYsEFYclLspTFpsssAPusS.IYD.................-+LTRshlppulphFKs.lhhcl.......................ppsttstsussshssos.s.t.............RKRKps................htKpstttp+pppppt.ssshtMssDss-DoQ.MSp .............................................s.ts.tplhsKh..EsSshNKoph-hltshINhhEKKKIsYplhshPshsD.............DK.KssK+sKK..lhoNNKYILFNSaY..TKl+pspWPsSpsMWNlhKspspspsFlcIFDa.h.EKlGKsIps+ppssus.................spssssccppsht.thshs-l.cEss-pRsKLYsEFYplLshTFpsss...A..PusS...IYD.................chLTRshlppuhptFKs.llhch..........................p.s..ssssshssoshstt.p...........RKRKps.................stpp.tttpppptpt..sss.sMssDpspDop.MS.................... 0 0 0 0 +5140 PF05313 Pox_P21 Poxvirus P21 membrane protein Moxon SJ anon Pfam-B_7803 (release 7.7) Family The P21 membrane protein of vaccinia virus, encoded by the A17L (or A18L) gene, has been reported to localise on the inner of the two membranes of the intracellular mature virus (IMV). It has also been shown that P21 acts as a membrane anchor for the externally located fusion protein P14 (A27L gene) [1]. 20.70 20.70 20.70 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.38 0.71 -5.03 10 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 42 0 188.40 56 95.28 CHANGED MSYLSYYNMF-DFsAGAGVpDpELFTcEEEcSFLPKcss..suta....h.......shcs.aP.sILhpNDI+oLlGLILFVLAITTsPlIAlIMIulAShLlPhPSLVIAYCLuhQIh...NsssssslGMSIlCVshSl.lTlhlsSlS...+sshTIoYIILulLFClYAFNlo+hst.pspsss.....tCs+thpuGsKhst-tP ..........MSYLpYYN.MlDDFSAGAGVhD+-LFTEEpQpSFhPKDGGhhps-Y..............ushNca..uIhpNNDVRoLLGLILFVLALhSPPLISllMIhIuShLLPL.sSLVIsYCLshQhh...+sGsuNTlGMSIVCllAAl.IhMAlNshT..sSphhshISYIILhILFhsYVhNIpRpchh+...Shslo.....sCs+sapAGNKhss-hP...................... 0 0 0 0 +5141 PF05314 Baculo_ODV-E27 Baculovirus occlusion-derived virus envelope protein EC27 Moxon SJ anon Pfam-B_7811 (release 7.7) Family This family consists of several baculovirus occlusion-derived virus envelope proteins (EC27 or E27). The ODV-E27 protein has distinct functional characteristics compared to cellular and viral cyclins. Depending on the cdk protein, and perhaps other viral or cellular proteins yet to be described, the kinase-EC27 complex may have either cyclin B- or D-like activity [1]. 25.00 25.00 52.70 52.00 18.60 18.30 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.87 0.70 -5.34 19 59 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 56 0 0 53 0 272.50 40 97.46 CHANGED h+s..ptsKl...RTVTEIlsucsKhpK-YDls-hshKN.sSLcSa-phplhLsluKYMAMlssLphoQsLltlF+s+sssccIlolVhsSLuFVHNRhsPhlspFs.cMcFVlscspchuIPGEPIlF.....pps-cpslhChlDRsoIl+hLE+phDsshphpp.sscc.pthKlhcshpsstp+++cs..t.shp.................tsshplsEs-sTQYlTLLlIhEHAYlHYalL+shshhpYhcoLlsHolhspc..sshtsshsNLLLSKF+FslE-.-p.+pssss......hs...lh ..........h.+s...sKl...RTVTEIlsucpKlpKcYDLu-F-hKNLsSLcSa-shcIKLhluKYMAMLssLphTQPLLplFRs+sss+cIsulVhuSLuFVHNRhpPhVspFst+MEFVls-stchsIPGEPIlF......ps-cp......s......llChlDRsSIl+hLp+pFDschplsppspcp.pth+lhKshssstp++pppp...t.....................sshplsEs-sTQYlTLLlIhEHAYlHYaIhKNashhpYscSLlDHolFssK.+sshssphsNLLLSKF+FslE-.-p.pppsss...thh........................... 0 0 0 0 +5142 PF05315 ICEA ICEA Protein Moxon SJ anon Pfam-B_2792 (release 7.7) Family This family consists of several ICEA proteins from Helicobacter pylori. Helicobacter pylori infection causes gastritis and peptic ulcer disease, and is classified as a definite carcinogen of gastric cancer. ICEA1 is speculated to be associated with peptic ulcer disease [1]. 19.00 19.00 21.10 20.70 18.00 17.80 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.65 0.70 -5.05 2 70 2012-10-05 18:28:12 2003-04-07 12:59:11 6 1 40 0 4 69 19 165.40 58 93.91 CHANGED MphoKpELFLcLApPscpGlSRWVps.EFhGcYpsLpLGNGGSWCRpsSsLA+-ahlEFDKt.TsGNSIDtIRLNGaNpcphFpQ.I+pDIKshhpppsCsMhGVpGpSENTpIEIDHKDGRKsshRVSDlpTQph-DFQsLCKAsNDhKRQICKpCKEoshRasAppIsGNPYsFY.G-.pYs...GCVGCYQYDPlpYRKosscRIhsEuhphs...haphhYppcss .......................................................hpRWlts.EF.sthptLt.hu....Nst.....sWhRtsSshA+ca.lEFDKtp.o.GNSID+IRLNGapscpsF..NQsIRpDIKsaYpppsCsMpGspGpSENTpIElDHK..DGRKsD.RVS....D..hss.Qph.-DFQsLCKAsNDpKRQICKcCKEoGhRasAppI...GN....YsFY-G.........t.pYD..............GCVGCYQYDPlpYRKpsscRIhpEuhphs.................................................. 3 2 4 4 +5143 PF05316 VAR1 Yeast_VAR1; Mitochondrial ribosomal protein (VAR1) Moxon SJ anon Pfam-B_7802 (release 7.7) Family This family consists of the yeast mitochondrial ribosomal proteins VAR1. Mitochondria possess their own ribosomes responsible for the synthesis of a small number of proteins encoded by the mitochondrial genome. In yeast the two ribosomal RNAs and a single ribosomal protein, VAR1, are products of mitochondrial genes, and the remaining approximately 80 ribosomal proteins are encoded in the nucleus [1]. VAR1 along with 15S rRNA are necessary for the formation of mature 37S subunits [2]. 22.10 22.10 22.40 22.40 20.50 22.00 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.70 0.70 -5.85 5 28 2009-09-11 14:04:11 2003-04-07 12:59:11 7 3 26 0 9 39 0 290.00 34 79.83 CHANGED pKhLLKNhLLKMNsNp.MN.shchsp+Ns.h.sKYlpEhNNKGNKLQ+lNNMNNWssQlYNYNKNNsINshlsDKLlNKLLYKLMslK..hINNN....sst..p+IIIsKPhaKHolNKlNI+FYY....YNsNhpshNNNNNpYYhNMINKLMNILN.....NpN.hNMN.NluNILShYYNKKVhIEsIKLpYlYNNN-IhNKYISlhDh-KYNNGLssc.Yp+lLNNhMPKhNspNIpMNYINNINNhNNlKY.NNMI..L......sSNN.l.........NIpNIYNshsINpIsM-LLMaKYLIGWSILaKGRLNKN..lSRosKspLLNGShsNKhYhKs...........................NINpNYKLNYIPNNHNIhNhNNVN..KNGKYNIKVKLNaI ......................................hK.hLh.hp.p..hp........p.............................s..sspL......QplNphNsap..hYpaNps..l..hh.sphhppLLhKhh..hh.....ss.............pIlhS+.shhpHshN+.lsI+FYY.......hp..s....p...s...Np.spYYhshhscl..hNhhs........Np..s.........ssLsNlLShYY.N.KcVpIpPI+LpY.YhNo-IhophI...h.shsp.h.s.psl.hp.a.+hLpshhPhhNsp......It....hsYlss..hs.s......hN...p.ph.NNhh........ssN........................slpNl.Y...pshslpp..hs....shLhhKYLsGholhhKG+h...pp..hsRo.p.h.lhpGoFpNhhh.hs...........................p.h.p.spYKLNYhssNhsh.s.s....p..lN..Ks...GKasIKlKLNhI........................... 0 2 6 8 +5144 PF05317 Thermopsin Thermopsin Moxon SJ anon Pfam-B_7819 (release 7.7) Family This family consists of several thermopsin proteins from archaebacteria. Thermopsin is a thermostable acid protease which is capable of hydrolysing the following bonds: Leu-Val, Leu-Tyr, Phe-Phe, Phe-Tyr, and Tyr-Thr. The specificity of thermopsin is therefore similar to that of pepsin, that is, it prefers large hydrophobic residues at both sides of the scissile bond [1]. 25.00 25.00 52.70 36.10 20.20 20.20 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.80 0.70 -5.47 25 105 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 33 0 57 105 6 261.40 30 40.72 CHANGED hhhpsh.ls..hhhhhstss........PhGlusYul.........shslpTspVlGhhNIsSLp..uastsstt.......shuASLQLNslLp.sshhs.tshtaWlQNVl.......hF.Tsssp...hsalDNlWNhTu.shusloss.l....pGpGtl..........p.YYsYus......shshshPhohhLhlNsohsst.ushltFGYsl.psusl........YDsVslsss.....s.tsA.hhlsGhshss.............tGl.hhDsELVaGGsGsGpsssapshsupLuLaY.hsu..ssapshsssYsaGhDTuEoutslp .......................................................................................h.hs..........h.h.s.ss........PsGlssYGl............hslpTspVlGhlsIsslp....uh.shosst......sshuASLQLNshLp.hph.s.tphtaWlQNVl........F.ssssp...hphh-NlWNhTu.s.huslsss.l....pGpGhl.........ttt.YYsYss.........shhphshPhohhLhlss.ohs.st.sshlsFGYsh.psush......................hYDsVsl.ss........h.su.hhlsGh..s..hss.............hGh.hhDsELVhGGsGsG.p.sshhpph.suhLuLhY.h...ps....sshsshsshYsaG..hDTuEoussl.h................. 0 11 25 50 +5145 PF05318 Tombus_movement Tombusvirus movement protein Moxon SJ anon Pfam-B_4393 (release 7.7) Family This family consists of several Tombusvirus movement proteins. These proteins allow the virus to move from cell-to-cell and allow host-specific systemic spread [1]. 21.30 21.30 21.50 21.40 18.30 21.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.69 0.72 -3.35 6 85 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 24 0 0 86 0 64.50 39 91.02 CHANGED MDsp....ps.p.l.....tsGcpctuGp+GppK..s+RpVApcAl+....K..pussuosGGsWVhVADKlEVoIsFNF ......................s.p.........pV.......shstppthsus+GKpK....s++sVA+DAls....K..su.p.cussGusaVsVADK..IcV..sIpFNF........ 0 0 0 0 +5147 PF05320 Pox_RNA_Pol_19 Poxvirus DNA-directed RNA polymerase 19 kDa subunit Moxon SJ anon Pfam-B_6945 (release 7.7) Family This family contains several DNA-directed RNA polymerase 19 kDa polypeptides. The Poxvirus DNA-directed RNA polymerase (EC: 2.7.7.6) catalyses DNA-template-directed extension of the 3'-end of an RNA strand by one nucleotide at a time. 25.00 25.00 211.80 211.60 24.80 24.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.95 0.71 -4.50 12 52 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 36 0 165.80 63 95.59 CHANGED M--SsDI..sa.S---p..pY-E----...p.tEuhsooDlsshKpSsh+h.puhSpsh--t....pss.+plos+IpsIK+RYTRRISLFElTGIlAESYNLLQRGRLPLls-LSD-Th.+pslL+lllcEIEEGsCPIVIEKNGELLSlsDFDpcGLpaHLDYIhsIWKpQpRh ....MADoDDII.DY...-SDD.s..EYE---E-..-E-uESLETSDlss..pSuYKI.ESASopIEDA....poshK+lus+ISALK+RYTRRISLFEIsGIIAESYNLLQRGRLPLVS-hSDETh.KQNhLHVlIpEIEEGoCPIVIEKNGELLSVsDFDK-GLKaHLDYIIcIWKhQpRY 0 0 0 0 +5148 PF05321 HHA Haemolysin expression modulating protein Moxon SJ anon Pfam-B_7025 (release 7.7) Family This family consists of haemolysin expression modulating protein (HHA) homologues. YmoA and Hha are highly similar bacterial proteins downregulating gene expression in Yersinia enterocolitica and Escherichia coli, respectively. 25.00 25.00 25.60 25.50 24.60 24.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.76 0.72 -3.83 14 1466 2009-09-11 00:35:18 2003-04-07 12:59:11 6 1 560 3 79 307 5 56.20 58 82.15 CHANGED hphR+Cooh-TLEKlh-+p+.p..Lsss.EhpsFpuAADHRLAELsM.sKLYD..KlPssVWp ......h+hR+hpol-oLE+lh-+s+Yp..LoDs.....ELtsFYSAADHRhAELsh.sKLYD..+lPpSVW+............ 0 4 16 48 +5149 PF05322 NinE NINE; NINE Protein Moxon SJ anon Pfam-B_7029 (release 7.7) Family This family consists of NINE proteins from several bacteriophages and from E. coli. 25.00 25.00 30.30 30.20 23.80 23.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.15 0.72 -4.23 3 157 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 136 0 0 53 0 58.20 92 95.15 CHANGED MRRQRRSITDIICENCKYLPTKRSRNKhKPIPpESpVKTFsYluuLhDS+Ws...RaCs..R+TR .......MtRQRRSITDIICENCKYLPTKRSRNKRKPIPKESDVKTFNYTAHLWDIRWL...RHRA..RK........ 0 0 0 0 +5150 PF05323 Pox_A21 Poxvirus A21 Protein Moxon SJ anon Pfam-B_7034 (release 7.7) Family This family consists of several poxvirus A21 proteins. 25.00 25.00 29.10 28.90 18.50 17.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.51 0.72 -3.67 11 60 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 46 0 0 41 0 112.80 61 97.71 CHANGED MIoLFLlLCYFILIFNIIVPsIuEKLRpEacAas+Y+pl.ppcalCVDspLhsYsFssoGlpAphhlDsss.sPLPCS+hspscst..chlsC-..stsslhch+csCu+AYh-LFh MITLFLILCYFILIFNIIVPAISEKMRRE+AAYlsYK+L.sKsFICVDDRLFSYsFTTSGIKAKhAVDscs.lPIPCS+IN-VNsN...csLhCD..pDcsDIs.sFsRSChRAYuDLFF. 0 0 0 0 +5151 PF05324 Sperm_Ag_HE2 Sperm antigen HE2 Moxon SJ anon Pfam-B_7044 (release 7.7) Family This family consists of several variants of the human and chimpanzee sperm antigen proteins (HE2 and EP2 respectively). The EP2 gene codes for a family of androgen-dependent, epididymis-specific secretory proteins.The EP2 gene uses alternative promoters and differential splicing to produce a family of variant messages. The translated putative protein variants differ significantly from each other. Some of these putative proteins have similarity to beta-defensins, a family of antimicrobial peptides [1]. 20.10 20.10 20.80 22.90 18.20 18.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.28 0.72 -4.06 4 65 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 19 0 13 61 0 68.50 59 59.33 CHANGED KphLhP.hsSLLLVALLFPG.SpApplNHpsTEuPRc.pEEusGQGsNcSpLLHHpVKRh.llPRpPPY.Es-P ..............+QRLLP.hsSLLLVALLFP.G.SpARHVNHSuTEu.pEL...REtAsGQGTNtSQLL+HsVKRt.l.PRTP.Y.t................ 0 2 2 2 +5152 PF05325 DUF730 Protein of unknown function (DUF730) Moxon SJ anon Pfam-B_7197 (release 7.7) Family This family consists of several uncharacterised Arabidopsis thaliana proteins of unknown function. 27.20 27.20 27.60 172.60 26.70 27.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.63 0.71 -4.31 3 10 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1 0 0 10 0 117.10 82 83.76 CHANGED MEIRMRMRYGENRRRDKGVPIECDCNAKVVVATS+DPVTSGKLYFSCPYEISDGPGRGCGFKRWWTVALCDEFDMIKEEpsEMKKDLEAANK+VEuQsEKIFLMEKKFETLEKKYESlNKYS .MEIRMRTRYGENRRRDKGVPIECDCNAKVVVATSLDPVTTGKLFFSCPYEISDGPGpGCGFKRWWTVALCDEFDMIKEEToEMKKDLEAANKRVESQsEKIFLMEKKFETLEKKYESLNKYL...... 0 0 0 0 +5153 PF05326 SVA Seminal vesicle autoantigen (SVA) Moxon SJ anon Pfam-B_7065 (release 7.7) Family This family consists of seminal vesicle autoantigen and prolactin-inducible (PIP) proteins. Seminal vesicle autoantigen (SVA) is specifically present in the seminal plasma of mice. This 19-kDa secretory glycoprotein suppresses the motility of spermatozoa by interacting with phospholipid. PIP, has several known functions. In saliva, this protein plays a role in host defence by binding to microorganisms such as Streptococcus. PIP is an aspartyl proteinase and it acts as a factor capable of suppressing T-cell apoptosis through its interaction with CD4 [1]. 23.10 23.10 23.10 23.20 23.00 22.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.71 0.71 -4.34 11 63 2012-10-03 02:52:13 2003-04-07 12:59:11 6 1 32 1 27 173 0 111.50 44 84.82 CHANGED sLQhLapsossThLLlLCLpLtss....cuQ-N..pppslhhshclssss....spspEsTVpLsVpTpl+ECMVlKsYLhSNhslc.GuFNYpaTuCLCsp.PpsFaWDlhsscTsplsssVDll+EhsICPDD ......................thhhpsssshhhLllhL.Lths....puQ-s..sR+hlhhshplPpos....cts-ElTssLpVpT-L+ECMV.lKsY..L..h..S..s..h..s..l.-...GuF.N.Y...p..YT.uCLCs.s.P+TFYWDhh..ssc..T....spIsuVlD..llpEhsICP-D....................... 0 1 1 6 +5154 PF05327 RRN3 RNA polymerase I specific transcription initiation factor RRN3 Moxon SJ anon Pfam-B_7041 (release 7.7) Family This family consists of several eukaryotic proteins which are homologous to the yeast RRN3 protein. RRN3 is one of the RRN genes specifically required for the transcription of rDNA by RNA polymerase I (Pol I) in Saccharomyces cerevisiae [1]. 20.60 20.60 23.90 21.60 19.60 19.70 hmmbuild -o /dev/null HMM SEED 564 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.94 0.70 -6.40 33 404 2009-01-15 18:05:59 2003-04-07 12:59:11 6 12 278 2 283 410 5 426.80 23 78.29 CHANGED sstpsschotphhcp.....hV+cALpphp................pG...-sstacplpphh..........tshptt...-s.s.sphpplLpsLtssVstLDps.sssLVp.ulLsh.pWh.s+spsh.........lctYlpFLssL.............suup.spalstllshLlspFs..spt...ssp.........t....p..phhpphHthLppllchlPsusshL.shLsppFPat.scopcshhs.YlpNLL+ltpYs...spLptclhpLlh-+llclDV..........phps-h--l........-D-ppcth.tt.t.pstppth.-.-sss...............ttp.....s.ps-sp.....................................spphpsl+phsp+LDsllshlhsahcs............shsssphsp...............................s.slFcsLlshFpshILPTapo+asQFLlFahsphpsphs-.pFlspLhclsh........s.sspss.h..R.uAssYLuSalARA+alstpplphlhshLssaLspYltpp-sss...............tssshcc.atsFYussQAlhYlFCFRa+sLh.........................sps.........hsahsslcph.Lp+slhSKLNPLKhCsPsVVstFA+lup+hsl..sYsaoI..lEpNpR...................t+lsphhuts..................t....p.........L-uaFPFDPahL.pS+ch..lcs.YlpW..ptlsspp ...................................................................................................................t....................h..hh.t........................p.........ht.hh..h.................................t.......h..hl..h.t.h..lp....t...ht.l...lp..lhph..W....th.s..th...........hp..aht..hlh.L.................sss..s.aht.shthlhpthh............................................................t.hH..hlp.lh.phhP.s...hl.sh...l....ptaPhh.....ppstt......h....Yhp.Nlltl..hh...stlt..tlhthl...hp+hlplDl......................pht.hpph..........-ct...t.................................t.....p..pt.t..................................................................................................................t..t.hp.hhpplD.hh.hhhphhp...............h...p...t.................................................................................................................................................hap.LhthFpphlLss.atopasQa..lhFahsph.p.................t...............hhc...tFhthLhp.hh..........................................s...sp...ss.h.....R.sussYluSalARuthls...hl...............h.hhphLhpah...pt..a.htp.ptts................................................................ssht.h..at.FYushQAlhYhhsFRhcplh.....................................................................pt............hth..pl.p......hpphlh..s..p..lNP..L+hC..slst.F...utlspthth....ha.hhsl..lEps.pp..........................................l..hhs.......................................................................L-saFPFDPh.L..st.p.h..lt..a..ap................................................................................................... 0 116 178 239 +5155 PF05328 CybS CybS Moxon SJ anon Pfam-B_7102 (release 7.7) Family This family consists of several eukaryotic succinate dehydrogenase [ubiquinone] cytochrome B small subunit, mitochondrial precursor (CybS) proteins. SDHD encodes the small subunit (cybS) of cytochrome b in succinate-ubiquinone oxidoreductase (mitochondrial complex II). Mitochondrial complex II is involved in the Krebs cycle and in the aerobic electron transport chain. It contains four proteins. The catalytic core consists of a flavoprotein and an iron-sulfur protein; these proteins are anchored to the mitochondrial inner membrane by the large subunit of cytochrome b (cybL) and cybS, which together comprise the heme-protein cytochrome b. Mutations in the SDHD gene can lead to hereditary paraganglioma, characterised by the development of benign, vascularised tumours in the head and neck [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.54 0.71 -4.54 48 384 2012-10-03 07:11:12 2003-04-07 12:59:11 7 5 272 30 229 486 257 123.80 29 70.69 CHANGED hsphphlPp....Pthl.GolN..-sh...PtssthcGShHWshERlluluLlPLssssahs.ss.........sslhDusLussllhHsHhGFpusIhDYlsp+haG.thtphAhhlLthuoslshhGlY.hEop-s.GlscslpplWp ...................................................................h...........................t.......st..ssttpuShHWshERllusuL...lP..Lhs...s..sahs..............................sss...hDshL...u...s....s.L...l....l...HsHh...Ghp.uslhDYl.....phhs...thp....phu.h.h.hL.h.h.h.o.sh.shhGla.hpp......p-s.GlscslpplWp....................................... 2 70 121 188 +5158 PF05331 DUF742 Protein of unknown function (DUF742) Moxon SJ anon Pfam-B_3675 (release 7.7) Family This family consists of several uncharacterised Streptomyces proteins as well as one from Mycobacterium tuberculosis. The function of these proteins is unknown. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.37 0.71 -4.48 14 710 2012-10-04 14:01:12 2003-04-07 12:59:11 6 6 167 0 269 736 6 113.50 37 80.96 CHANGED scssshVRPYslTsGRocsst...slsLholVsAtsstP.................sshtPEctpILcLCps...shoVAElAAtLcLPluVV+lLLuDLh-sGtlss+tPhs......sphPDpsLLccVlsGLRpL ....................................................................................................................s..tsshVRPYslTu.GR...Tcssh.........sLsL.sl.Vssp.s.s.ss.....t..t..........................................hsht.sE..+ppIl.p..LCpp....shS...VAEluApLcLPlGVsRVLluD.LsssGhlpl.+p.ssss..............s.....t..........ts..D.....h..sLLccVLsGLRpL.............................. 0 76 213 260 +5159 PF05332 DUF743 Protein of unknown function (DUF743) Moxon SJ anon Pfam-B_4046 (release 7.7) Family This family consists of several uncharacterised Calicivirus proteins of unknown function. 27.70 27.70 27.70 120.10 27.60 27.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.30 0.72 -4.09 4 42 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 18 0 0 36 0 104.60 56 96.36 CHANGED NhGLsLlsolANAhhEGp+l-LsupuLuhpp+hh-sEpcaNhsRLsF-...........KppFppsh-L+lpGslhRhpthRAAGaRlNPYSNGpQlahDEsttApLpSYhsFYKs .NSILGLIDTVTNTIGKAQQIELDKsALGQQR-LALpRhsLDRQALsNQ...........VEQFNKlLEQRVpGPlQSVRLARAAGFRVDPYS.sNQ.aaD-thsA.hhSY+shaKs. 0 0 0 0 +5161 PF05334 DUF719 Protein of unknown function (DUF719) Moxon SJ anon Pfam-B_7667 (release 7.7) Family This family consists of several eukaryotic proteins of unknown function. 20.80 20.80 20.90 22.30 20.70 20.10 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.36 0.71 -4.25 6 168 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 81 0 95 154 0 158.60 48 33.17 CHANGED S..puu.WG....hWGuhupSlLSoAotslATl...hTpVppthpsslGlPsPpELutpsstEcAEpstpssspt-s......-sspGots.....hssuFG.hstlsssVpshGppVloGGLDsLEhIGKKTMslLtEsDPGhhpp+pLh....N+sssLSQVLREAKc+.EchpcshpQlphEppKt..hHathLFD-YpGL .......................................su..Wu....hWG..oW.G.K.S.lLSoAoATVups..............los.VhEK...A....t..soLtI.ss....sph.up.pst...ts.cp....t.............................................ptspssss............su.u.sh...G.hh.......Ssl.o..ssV...........Q.sT..GK....oV.loGGLDALEFIGKK..TMsVlAEuDPGFK+TKsLM............pRssoLSQlLREAKEK.Ec.pphup...plshEps......sHYGhLFDEaQGL................. 0 23 29 56 +5162 PF05335 DUF745 Protein of unknown function (DUF745) Moxon SJ anon Pfam-B_5169 (release 7.7) Family This family consists of several uncharacterised Drosophila melanogaster proteins of unknown function. 22.40 22.40 22.40 22.60 22.30 22.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.37 0.71 -4.77 14 190 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 39 0 120 201 4 174.00 35 58.26 CHANGED sshs...htsss+pKuSsIApKAAp-AKsAsDuQsuAuctAupplKppLA-KAhtAA+AAEAALAGKQQll-QLEpElpEActVVpE.ppuLpsopssApuAttssppAppplpsLpshlcsApsslssh-psAssAQpElsEKsQLL-AA+pRV-hLt+QlpsA+tDa-pTKpAAhKAACAApEA+Q+A .........tttt.......hpspptussIAppAAppAKuAsssQ.uAuptAA.psKtpLAp+.AhpAApAApA.sLsGKptllcpLcpchpEActsVpcEpspLppu.pssspuAtp.......ssppAppp....lpsLpshlpsApsshsssppsAssApp-Lup.....+sphlts.A+pRl-tLpcQltsA+tDappT+pAAhKAssAA.EAppps............................. 0 34 39 84 +5163 PF05336 DUF718 Domain of unknown function (DUF718) Moxon SJ anon Pfam-B_7227 (release 7.7) Domain This family consists of several uncharacterised bacterial proteins of unknown function. 23.40 23.40 23.70 23.40 22.70 23.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.32 0.72 -4.23 61 1431 2012-10-02 00:20:33 2003-04-07 12:59:11 8 7 1252 8 330 844 242 104.20 38 92.26 CHANGED pRhuahhpLc..Psph-EYc+pHsp..l...WPEllptL+psGlpsYSIaLc.tppshLFuhhEhps.....-tshs...thupsslsp+WWshMuslhc..s.ssspPsp..........ssLpcVF+Lc .........................RhAalhplp..P-..t..hcEYp+RHst.....l....WPEl...ps...s...L..+p..p.GspsYuIaLc..pp..........p.....s.....hLFuhlEhcs................Et.c..h..s.....tl.A..so....s....lsQ+WWcaMs....clh....ssP.D..s.......o....Pss.................spLpEVFaL............................. 0 100 218 277 +5164 PF05337 CSF-1 Macrophage colony stimulating factor-1 (CSF-1) Moxon SJ anon Pfam-B_7649 (release 7.7) Family Colony stimulating factor 1 (CSF-1) is a homodimeric polypeptide growth factor whose primary function is to regulate the survival, proliferation, differentiation, and function of cells of the mononuclear phagocytic lineage. This lineage includes mononuclear phagocytic precursors, blood monocytes, tissue macrophages, osteoclasts, and microglia of the brain, all of which possess cell surface receptors for CSF-1. The protein has also been linked with male fertility [1] and mutations in the Csf-1 gene have been found to cause osteopetrosis and failure of tooth eruption [2]. 25.00 25.00 35.40 43.70 24.20 23.60 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.66 0.70 -5.06 9 118 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 29 2 35 134 0 226.10 38 95.29 CHANGED hTstGuut.sPSsshh..Gsc.lLssLLsoNhlsEEsStcsS............ahlGsGplps.pph.cuphpsSstlshphhDQc.lD.....lpcAaLhs.s.McsThchpsNTPpts..........chsSshscDapE.spAplpTh+.p........................PLt.LEthKs.........................shp-p+sLhEh-hs.hScssspshA+hsShs.........LhspGpspQc.uSssPQhPt.VFhlLsPSlILVLLsVGGLLFYph+hRSHpDPQtsDSSstpPEsSsL.TQD.DRQhELPV ......................................t.tPsssh...G...lLsshLsop..hhEEsStpsS............ph.GsGplQs..p.th.ss.hpsSs.lshphhsQp.hc.....lppshL.h.s.hcsThphpspTPptp............chtSs..hh+DhpE.spsplpohh.p........................PLt.LEth+.shp-p+s.h....-h-hs.........hScsssp......shAchsS.s.........Lhspup.ppc.uSssPp.s......LsPS....hhsluGLha............p-spts-us...Pp.t.p.PL.T...D.s..pthphPs.......................................................... 0 2 2 7 +5165 PF05338 DUF717 Protein of unknown function (DUF717) Moxon SJ anon Pfam-B_7144 (release 7.7) Family This family consists of several herpesvirus proteins of unknown function. 21.30 21.30 26.00 65.80 21.10 20.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.65 0.72 -4.02 11 20 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 18 0 0 18 0 56.00 39 70.93 CHANGED sploEpDFppChpFFs+Pl.pllspsupuLsslchscossQpl-pLsLlLDLlGTE .sploEpDFp-CtpFFs+PLpcllsssucuLsslclscSssQplEpLsLLLDLlGTE 0 0 0 0 +5166 PF05339 DUF739 Protein of unknown function (DUF739) Moxon SJ anon Pfam-B_7696 (release 7.7) Family This family contains several bacteriophage proteins. Some of the proteins in this family have been labeled putative cro repressor proteins. 19.50 14.00 19.60 14.40 19.20 13.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.91 0.71 -4.37 11 78 2012-10-04 14:01:12 2003-04-07 12:59:11 6 1 77 2 9 39 3 67.70 43 73.40 CHANGED hsaD..........YScLpG+I...............lE+YGo..............................pasFApAls..lSE+olS.hKLNsKl..................................sWpss-ItKAh-.lLulsp-.............clspYFFph ...............................hsh..............atpLtG.l...............lEKYGo....................................pYsFAhAht...LSERolS.LKLNsKl...................................W..s...tshp.hlth..p......................h.................................. 2 4 9 9 +5167 PF05340 DUF740 Protein of unknown function (DUF740) Moxon SJ anon Pfam-B_7873 (release 7.7) Family This family consists of several uncharacterised plant proteins of unknown function. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 603 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.33 0.70 -6.15 6 176 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 19 0 119 155 0 210.00 18 72.48 CHANGED ssspssp.+RhSTSCcRHP-E+.FTGFCsSCLpERLSsL-ts.......SSSuptPso.Sssul+ulFt.s.sss..............u.hPELRRsKSFSs.+...........ssAusSsu.EPQRRSCDVRs.+soLhsLF.pDD-cplsSs......sss..p.....RcshVs-.lhEE-pEh..EcD--...t................Estcll-Epspc.................hp-EEtKsMKDa.hDL-Spp.+K.ssKc....utSFasAASVFSKKLQKW+pKQKhKKcc..sGsuuutss.....................u-huhGRRSCDTDP..............RFSlDuGRlShDDstaSFDEPRASWDGpLIG+oh......sP.....hPoMhSVlEDuP......lpRuDhph......Psss.lp.pcu-pD..h..PGGSsQTRDYYhD.sSSRRR+SLDR.SsShR+hu...ltElDElKsluNucsSs...........hsppppLRDpsh.s.sNhcsEotE.su.............p.sust.cKKs+RWuK.WsIaGLIaRKsss+.......cp-p.p+hs.ushVERSLSESW.s-LRsst......GsuPKMlR.........sNSslShRS..SG.pGG.............................G.puppp.phlpp+sKsupYSs.cssENGMLRFYLTPh+uptpuuGu ..................................................t..............C.hHP.p..hsGhCs.CLp-RL..lt.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s......................................................................................... 0 11 65 90 +5168 PF05341 DUF708 Protein of unknown function (DUF708) Moxon SJ anon Pfam-B_7259 (release 7.7) Family This family consists of several uncharacterised nucleopolyhedrovirus proteins of unknown function. 21.30 21.30 21.50 41.70 20.20 19.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.33 0.72 -4.25 24 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 55 0 0 53 0 105.70 44 78.30 CHANGED phpWpllss......shlEVsPc-REpAWKDLll.sLpsoPp..oaRTtlp+AshEpFDYKpPIhY-lKp+pLhlss-plhpALs..hPp.sshsshslsshplhhs....FIhslLl ...lRWp..lLNs......DclEVsPEcREpAW+-LlIsllpsoPt..TaRThlpKAshENFDYppPIlYslKsKpLllss...EplppALN..RPt.tshsshNlsshplhLs..FIhslLL..... 0 0 0 0 +5169 PF05342 Peptidase_M26_N Peptidase_M26; M26 IgA1-specific Metallo-endopeptidase N-terminal region Studholme DJ anon Merops Family These peptidases, which cleave mammalian IgA, are found in Gram-positive bacteria. Often found associated with Pfam:PF00746, they may be attached to the cell wall. 25.10 25.10 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.59 0.70 -5.40 19 698 2009-01-15 18:05:59 2003-04-07 12:59:11 9 66 278 0 17 423 8 240.30 39 13.51 CHANGED -pl-hhp-YThsTp......shs.s-sspsct.p-p.......hp.s.KKlEL+NlsslcLhp..-NGp.ppps.LsphPs..ssssYalKVpSss.K-shLPVsSIEEssc-GpslYKlTApsscLlQ.ct-sphp-sasaYltKttsc-sslYhsFpsLVcAMppN.sGTa+LGAsLsAspVphssss+SYlpGpFoGpLlGsp-GKcYAIaNLc+PLFssl.puuolcclsLKsVNIstp.....sclAolAppspsuopIcNV+VsG .......................................plchhptYplpTph.....sasp..upus...c.o.pt.c-.p...........hplshKKl..El+..NIssspLhp.........-sGp.p..phshL...sphPs..sspphalKVpSps.KsshLsVsS.IEEssh-GpplYKlsApApcLlQ..ctssph.pppYsaYlp.K..t.sc.-ssVYhsFppLVcAMpssPsGTF+LGAslsAspV..ssss+S..Y..lpspFpGpLh.o.s-G.KpYuIaNL...c.......+P.......LFssl.pu......uTVcslsLcsVsIsh...........-slAs.lApphpssopIcNV+VsG............................................. 0 2 4 8 +5170 PF05343 Peptidase_M42 M42 glutamyl aminopeptidase Studholme DJ anon Merops Family These peptidases are found in Archaea and Bacteria. The example in Lactococcus lactis, PepA, aids growth on milk [1]. Pyrococcus horikoshii contain a thermostable de-blocking aminopeptidase member of this family used commercially for N-terminal protein sequencing [2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.74 0.70 -5.64 69 4278 2012-10-02 19:46:12 2003-04-07 12:59:11 9 6 2184 65 699 4495 777 284.40 30 81.45 CHANGED Gsllup.ppG...su..+lhluAHMDElGhMVppIccsGhL+hsslGGhsspsl.up+lplhscpG...l.GVlGstsP..Hlhc.stccppshp....hc-lhIDlGsps+--AcchGlplG-hlsh...cschthlssph.lhu+AhDsRhGsslll-ll+pLpcpph...sslahsuoVQEEVGLRGApsustplpP-hslAlDsssu.uDsssssctp.....lGpGsslphhDss.....hlhc.plpchLh-hAccpsIPhQhc.hhssGGTDAuuhphs.ttGlPouslulssRYhHS.sEhhchcDlpsshcL .....................................................................................................................................................Gulhsp..h....pu....tpsu....+lhlsAHhDElGaMVppI....c....c.s....G..hlchps..l...G..G.....a.spsh.upplplpTcp....G....plsG.ll..u..s..hss.........Hhh.........p..s.........t.........tc.........p...p........p.....hp..-lhlDl...Gs..............p...o...c.-.E...s.c.p.h.GIcsG..D.h.....ls.........csp.h...t.h........h...s...sst...lhuK.A.....h.Ds.RhGss.hllclL..c.p.L.p..s..p..p..ls...........hsla.hs.soVpEE...V.G....l............RG....A....p........s....u......s....p....p.......................l......p.P...D......l...................h....l.u...l......Dsuss....sD...s......s......ss.php........hG.pGshl.t.h.h..Dss..........hlhp.p.l..p.c.a.l.h.s.l.A.....c.....c..p.....sIPa.........Q...h.........p....h....h............s...t...........G...G..TD.A...u...u....h...p.h.s...s..sGlP...s...s.slGl...ss..RYhHo....hphhchcDhtsshpL.................................................................................................... 0 268 469 596 +5171 PF05344 DUF746 Domain of Unknown Function (DUF746) Yeats C, Eberhardt R anon Yeats C Family This is a short conserved region found in some transposons. Structural modelling suggests this domain may bind nucleic acids [1]. 23.70 23.70 23.80 31.10 23.00 23.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.76 0.72 -4.44 10 78 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 34 0 16 77 0 63.60 41 34.53 CHANGED c+hcthIRhLSpPlSlh-AA-tlGsscssltchVchFRpalLpLDPSGpaEsRlRLGsRPspsss ......c+hcLFlphLS.PlSshpAuctlGot.ssltchlphaRpalLpLDPSGphEtRlRLGs+ssphss...... 0 4 8 12 +5172 PF05345 He_PIG Putative Ig domain Yeats C anon Yeats C Family This alignment represents the conserved core region of ~90 residue repeat found in several haemagglutinins and other cell surface proteins. Sequence similarities to (Pfam:PF02494) and (Pfam:PF00801) suggest an Ig-like fold (personal obs:C. Yeats). So this family may be similar in function to the (Pfam:PF02639) and (Pfam:PF02638) domains. This domain is also found in the WisP family of proteins of Tropheryma whipplei ([1]). 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.37 0.72 -3.94 108 2176 2012-10-03 16:25:20 2003-04-07 12:59:11 7 369 532 0 988 2359 2162 49.40 32 9.08 CHANGED oaoh....................ssssLPsGLol...........sssoGsloGT....ssss...............G.sashslssossss ...................................................sh.....................ssuLP..sGLoh...........................ssu....TGslSGT.........Poss....................................G.sa.s.lsVosTDss...................................................... 0 490 754 893 +5173 PF05346 DUF747 Eukaryotic membrane protein family Wood V, Bateman A, Murphy T, Mistry J anon Pfam-B_13582 (release 7.8) Family This family is a family of eukaryotic membrane proteins. It was previously annotated as including a putative receptor for human cytomegalovirus gH [1] but this has has since been disputed [2]. Analysis of the mouse Tapt1 protein (transmembrane anterior posterior transformation 1) has shown it to be involved in patterning of the vertebrate axial skeleton. 25.00 25.00 27.70 25.10 24.10 23.60 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.26 0.70 -5.39 36 347 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 271 0 241 343 1 312.30 35 47.39 CHANGED p.....ptlsttphsDllphhlllhsshlLp..........hl.DsS+hYHhIRuQusIKLYllaNlLElsD+LhsShGpDll-sLhpssh.........................................................................................................................................................................................................ttpthhchhshahlulhYlshHuhlLlhQsloLNVAlNSasNuLLoLLlSNpFsElKuoVF......KKF-+EsLFQlotuDllERFpLhlhLhll..ulRNhhph....................................h.tuap.....................tlhushlhVluSElh.VDWlKHuaIsKFNcl+splYpcahplLs.....tDhh............................t.hsts.hls....+RlGhsshshsslhl+hhh.hhphhh....................................................................................................hthlshhhlhlhsahhLlhhKllLuhhllpaupphhp.phc ...................................................................htstphsDllp.hhl.llhsh.hhh...........hl.DhSh.hYHhIR..uQ.usIKLYllaNhLEluD+LhuuhGQDll-sLa.ptt.............................................................................................................................................................................................................................................................................................................................................................................................ptp...p...thht.h..hhhh..ul....hY...s....hhHuh....hlhhQsh......oLNVAhN..Sa.s.............puLLTlhhSNpFsEIKu...........oVFKKF-KpNL...............F.Qlo.s....uDlhERFplhlhLhll..shRNh.ph.....................................................p.ah........................lhsshhhVlsoEhhVDhl.............KHAaIsKFNpI.p.sslYpcaht.Ls.........hDhh..............................................................s....sthsptls....RRhGh.slPlusLhlRsh.hpshp.............................................................................................................hhh.hhhhhhhah..hLlhhKll.uhhLlthupph.t............................................................................................. 0 83 142 206 +5174 PF05347 Complex1_LYR Complex 1 protein (LYR family) Wood V, Bateman A anon Pfam-B_15215 (release 7.8) Family Proteins in this family include an accessory subunit of the higher eukaryotic NADH dehydrogenase complex. In Saccharomyces cerevisiae, the Isd11 protein (Swiss:Q6Q560) has been shown to play a role in Fe/S cluster biogenesis in mitochondria [1][2]. We have named this family LYR after a highly conserved tripeptide motif close to the N-terminus of these proteins. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.93 0.72 -4.17 246 1703 2012-10-01 20:54:40 2003-04-07 12:59:11 10 21 313 0 1190 2012 12 60.50 22 42.65 CHANGED pp..lLpLYRplLR..pu.pp......hsshs..........+phh...ppplRspF+c..s+s...hp.-.ppl.....pth......lppupcpLph ...................pllpLYRphLR......ts..pp........................hst.ts............pphh.........ppt...lR..ppFcp....s+s......hs..-sppI..................pth..........................lppup..h..................................................... 0 377 651 958 +5175 PF05348 UMP1 Proteasome maturation factor UMP1 Wood V, Bateman A anon Pfam-B_18845 (release 7.8) Family UMP1 is a short-lived chaperone present in the precursor form of the 20S proteasome and absent in the mature complex. UMP1 is required for the correct assembly and enzymatic activation of the proteasome. UMP1 seems to be degraded by the proteasome upon its formation 21.50 21.50 22.20 21.50 21.40 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.42 0.71 -4.19 7 345 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 278 0 233 323 2 122.10 29 80.51 CHANGED shpshthtlsuhht.hcshphGhhsshsplhssHPLcss.cN.hptpQcphphphlRph.GlthPLKhsMEpplhpphpR.P.hlsSuphth-lLsGp.-slsFEDhhNsPppuEhhh...p.HthhEhpLGl ..........................................................................s................c......t....G...........s..ht....s.p..l.h....s..s..HPLEtp.ps.aptspcphphptlRplaGhttPl+htMEhc.llp.......p........sp.+.hP..............h.L...s.....S.......S..........s..l..thDlLp..Gp--slsaED.....lh.....s..ss.p..tp..-...ht............p.Ht.hEh+L.................... 0 71 119 180 +5176 PF05349 GATA-N GATA-type transcription activator, N-terminal Finn RD anon DOMO_DM03585 Family GATA transcription factors mediate cell differentiation in a diverse range of tissues. Mutation are often associated with certain congenital human disorders. The six classical vertebrate GATA proteins, GATA-1 to GATA-6, are highly homologous and have two tandem zinc fingers. The classical GATA transcription factors function transcription activators. In lower metazoans GATA proteins carry a single canonical zinc finger. This family represents the N-terminal domain of the family of GATA transcription activators. 20.30 20.30 20.60 20.30 19.50 20.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.77 0.71 -4.10 16 190 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 48 0 70 154 0 155.00 39 41.19 CHANGED MYQo....LAluuspG.s..uYs.ss.GuFlHS.........s..AoSPVYVPToR.VsoMl.sLP.....YLQsstsuppu.................ps..huuHsuWuQs.uu-os.uassuS................sHsPsu.......Fsau.pSPPhuuuuu....RDsu.............YpusLhhss.u...R.-QYush..sRsluGSYsSs......YsAYhoP-lu...s.SW......suGPFDuSV.LHuLQuRs.ushsu.R+ssh..-hL ..................................................................................MYQo....LAhsus...u.s....uYt.ss...usF.hHu...........s..usSPVYVPosR.Vsuhh.sLs.....Y.Lpsstsu.ts.....................ts...supssWs.Qs.us-us.sass.....................ssst.......F.sas...os.....s..hss.usu......R-su...............apu...sh.....u....R.-QYut....RshsGoYsos..........Ys.....s.Yhu...s.slu...s.uW.......suGPF-.usV.LHuLtuRs.ss.ss.Rtssh............................................................... 1 4 12 30 +5177 PF05350 GSK-3_bind Glycogen synthase kinase-3 binding Finn RD anon Pfam-B_18811 (release 7.8) Family Glycogen synthase kinase-3 (GSK-3) sequentially phosphorylates four serine residues on glycogen synthase (GS), in the sequence SxxxSxxxSxxx-SxxxS(p), by recognising and phosphorylating the first serine in the sequence motif SxxxS(P) (where S(p) represents a phosphoserine). Interaction of GSK-3 with a peptide derived from GSK-3 binding protein (this family) prevents GSK-3 interaction with Axin. This interaction thereby inhibits the Axin-dependent phosphorylation of beta-catenin by GSK-3 [1]. 25.00 25.00 26.70 32.10 22.90 24.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.62 0.70 -4.89 4 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 38 10 34 79 0 160.30 40 81.67 CHANGED MPCR+E.................SFLLLpQSVTlG.SsEVDpLVupIGEsLQLcsApcoPsSssts.G........hhAuhPss+uGs..................ssGuhRChhhcptpVR.GRuuPYsVs.sssGuSshs.p.h.................t.C+RGWhR..sssR+.........ts+stD-DDPHcLLQpLlLSGNLIKEAVRRLp.....huucsPsps.PGsh ...............................................MPst+c.............................salLLpQSVTls.StEVDtLVspIGEsLQLcssps.s.t.o.ss..G..............ush.sspst.............s..........tshtChhhcptthR.sRuuPY......sGsos................................+sW.R.......+c......................t.s+ss-.DDPH..cLLQpLlLSGNLIKEAVRRLp.t.................h............................................. 0 5 6 12 +5178 PF05351 GMP_PDE_delta GMP-PDE, delta subunit Finn RD anon Pfam-B_13828 (release 7.8) Family GMP-PDE delta subunit was originally identified as a fourth subunit of rod-specific cGMP phosphodiesterase (PDE)(EC:3.1.4.35). The precise function of PDE delta subunit in the rod specific GMP-PDE complex is unclear. In addition, PDE delta subunit is not confined to photoreceptor cells but is widely distributed in different tissues. PDE delta subunit is thought to be a specific soluble transport factor for certain prenylated proteins and Arl2-GTP a regulator of PDE-mediated transport [1]. 25.00 25.00 30.20 30.10 18.20 18.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.85 0.71 -4.42 23 362 2009-09-11 15:38:04 2003-04-07 12:59:11 6 4 137 20 215 325 4 144.60 44 72.54 CHANGED pspsNlasI-Fs+FpIRDhETGpVLFEls+ssss...........................................tt..p...ssuRhl+YpFsPpa.....................L+l+oV....GAs.lpFoV.G-cPlpsFRMIERHYF+spLLKoFDF-FGFCIPpScNThEpIYEhPslupphhppMlpsPaETpSDSFYFV-s+LlMHNKA-YuYs ....................spt.lhtlchspFplRDh-oGpVLFcls+s.ss........................................................................tsGRhlcYpFssth...........................L+h+sV....u..sp.lpFos.uscslpsFRhlERHYF+sp..lL+.....sa-F-FGFsIPs.SpNThEplYEhP.shSpth.tphlps..psps-ohaFs.-spLlh+s+sch.Y....................................... 0 80 100 155 +5179 PF05352 Phage_connector Phage Connector (GP10) Finn RD anon Pfam-B_13828 (release 7.8) Family The head-tail connector of bacteriophage 29 is composed of 12 36 kDa subunits with 12 fold symmetry. It is the central component of a rotary motor that packages the genomic dsDNA into pre-formed proheads. This motor consists of the head-tail connector, surrounded by a 29-encoded, 174-base, RNA and a viral ATPase protein [1,2]. 25.00 25.00 31.80 30.90 19.30 19.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.89 0.70 -5.19 4 23 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 22 39 0 24 2 271.50 30 88.42 CHANGED SYKo....Is-IQRp+....tNR..WFhaYhpYLhSLAYQhFEWEsLPsolDP.FLEKplHQhGaVuFYKDshhGYIAspGsLSGplshYNQPshap...ASSssYQKpFcLY.....Yc...Dh+Ecs.G......lVIYNNshthPTlshLELFAtcLAELKEhItVNQNAQKTPVlItAsDNN.LShKplYNpYEGNtPVIFspcphD..........oDsIcVFKTDAPYVlDKLsspKpsVWNEhMTFLGIpNANh-KKERhVsSEVpSNs-QIpuSuslaLKuRpEACchINEhYGLNlpVKhRh-IV ....................................................h...............................p..lpshhhplFpaEshPsslssh.LEp.l+Q.G.hVshh+Dthhs.hlhh.tshssthshYsp..s..shFp......uss.sa.pph...clhp....ap...chp.ps.s..............VVhhN....Nsh.hs.ssh-llEaYspcLA-lcto.hplNhpsp+sPhhItus-sN...plSlppLhsclpsGsPhl..hspcshs..........sDs..I-lh....s.ssssshls.cLppphpsphsEhhTaLGIpNssh-KKERhlscEApSNsphlpususIYhKuRpcAlchlNctYGLsIKsp................. 0 0 0 0 +5180 PF05353 Atracotoxin Delta Atracotoxin Finn RD anon Pfam-B_30981 (release 7.8) Family Delta atracotoxin produces potentially fatal neurotoxic symptoms in primates by slowing he inactivation of voltage-gated sodium channels [1]. The structure of atracotoxin comprises a core beta region containing a triple-stranded a thumb-like extension protruding from the beta region and a C-terminal helix. The beta region contains a cystine knot motif, a feature seen in other neurotoxic polypeptides [1]. 25.00 25.00 35.00 76.10 20.80 19.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.72 0.72 -4.12 5 7 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 3 0 8 0 42.10 65 76.62 CHANGED CA+KRuWCuKTEDCCCPMKCIYAWYNQQSSCQsTISulFK.cC CA+KRsWCuKsEDCCCPMKCIYAWYNpQuSCQsTIoulaK.cC 0 0 0 0 +5181 PF05354 Phage_attach Phage Head-Tail Attachment Finn RD anon Pfam-B_59968 (release 7.8) Domain The phage head-tail attachment protein is required for the joining of phage heads and tails at the last step of morphogenesis [1]. 20.80 20.80 20.90 22.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.38 0.71 -4.53 6 548 2012-10-01 22:58:23 2003-04-07 12:59:11 6 2 289 2 4 217 26 112.70 74 98.42 CHANGED MuDFDNLFDsAlutADcsIlcsMGhsAslTSGpLpGuplsGVFDDPEsISaAuuGlRlEsSsPoLFVKTuslspL+RsDTLTIss-sFWVDRIoPDDGGSCaIhLsR.GpPPsssRRR ..............................MtDFpNhFDAAlAtsDcTIhthMGhS..AphT..SGtQsGu.lpGVFDDP.EsluaAGpGVRlEGSSPSLFVRTDsVRtlRRGDTLTIstE......sFWV.....DR.lSP.DD.GGSC.....aLWLs..R..G.PPAVNRRR..................... 0 0 0 1 +5182 PF05355 Apo-CII Apolipoprotein C-II Finn RD anon Pfam-B_6456 (release 7.8) Family Apolipoprotein C-II (ApoC-II) is the major activator of lipoprotein lipase, a key enzyme in the regulation of triglyceride levels in human serum [1]. 22.10 22.10 22.10 24.50 18.80 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.69 0.72 -4.11 6 40 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 29 4 19 42 0 73.00 46 75.96 CHANGED AphsQQDEssSPALLsphQESL.SYW-SAKAAAQcLYpKTYLPAVDEKIRDlYSKSTAAlSTYAGIFTDQlLShL+G- ............p.sQpDE.sSsuLLopVQESLhSYW-oAKssApsLYcKTYlsslDEKlRDhYSKoosAhoTYsGIhTDQlhplLtGc.................... 1 1 2 6 +5183 PF05356 Phage_Coat_B Phage Coat protein B Finn RD anon Pfam-B_51500 (release 7.8) Family The major coat protein in the capsid of filamentous bacteriophage forms a helical assembly of about 7000 identical protomers, with each protomer comprised of 46 amino acid, after the cleavage of the signal peptide. Each protomer forms a slightly curved helix that combine to form a tubular structure that encapsulates the viral DNA [1]. 25.00 25.00 28.90 28.70 23.60 22.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.82 0.72 -4.29 2 14 2012-10-01 20:22:05 2003-04-07 12:59:11 6 1 13 15 2 14 0 80.70 65 99.56 CHANGED MKuMKppIAKFsPVtSFRNLCIAGoVTAAoShPsa.AuVIDTSuVEpAITDGpuDMpsIGGYIVGALVILAVAGLIYSMLRKA ..............MKQpIAKFsPVsSFRN..LCIAGoVT.AAoShPAF..A.....GVIDTSAVEuAIT-GQuDMcuIGGYIVGALVILAVAGLIYSMLRKA.... 0 1 1 2 +5184 PF05357 Phage_Coat_A Phage Coat Protein A Finn RF anon Pfam-B_7225 (release 7.8) Domain Infection of Escherichia coli by filamentous bacteriophages is mediated by the minor phage coat protein A and involves two distinct cellular receptors, the F' pilus and the periplasmic protein TolA. These two receptors are contacted in a sequential manner, such that binding of TolA by the extreme N-terminal domain is conditional on a primary interaction of the second coat protein A domain with the F' pilus [1]. 20.80 20.80 22.80 23.00 20.70 19.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.11 0.72 -4.39 3 41 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 15 21 0 89 0 63.30 45 24.64 CHANGED MKKllhAlshslPFYTH.........sATTsssCLuKPuhEsSho.NVWK-sco..RYANaEGCLashTGlVlss ....hpK...hhshslPhYoa............susspcsC.uK..............P....shEsShs.Nsap.sps..RYtNapGCLasATGVVVss.... 1 0 0 0 +5185 PF05358 DicB DicB protein Bateman A anon Bateman A Family DicB is part of the dic operon, which resides on cryptic prophage Kim. Under normal conditions, expression of dicB is actively repressed. When expression is induced, however, cell division rapidly ceases, and this division block is dependent on MinC with which it interacts [2]. 25.00 25.00 25.30 37.10 20.80 19.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.78 0.72 -4.49 3 416 2009-09-11 11:17:21 2003-04-07 12:59:11 6 1 215 0 1 106 0 61.40 74 95.88 CHANGED METLLPNVNTSEGCFEIGVllSNKsFTEDAINpRKhEp-LLN-lCIlSMLARL+Lh.KGp+Q METLLPNVNTSEGCFEIGVplSN.sFTEDAINpRKaEp-LLNclCIlSMLARL+Lh.KGptQ... 0 1 1 1 +5186 PF05359 DUF748 Domain of Unknown Function (DUF748) Yeats C anon Yeats C Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.73 0.71 -4.21 168 1412 2012-10-03 05:41:17 2003-04-07 12:59:11 6 20 431 0 618 1573 145 150.40 17 38.87 CHANGED htlplsplplp.s.upl..pasD.....pphs..shp..hplpsLshpl.sslu.ot.s.sp......sslp..l....puplspt...uslphpGpls..shs.hpsp..lplshc...slsLsshpPY.hsph.hshplppGpLsh....cLphp....hp.ps.pl.pspsplt..lc....pLp..ls-.....ps.ss.....pshh...sLs....l .........................................................................................lplsplplp.s..Gpl....papD...............tthp...shp......hplsslsh....sl...ssl..s..sh....s...sps.....................sslp....l...................pup.lsst...................uslshpGpls.........shs....php.........hplphp..slsLs.s....hp.sY....hsp.......h.....h.......sh....pl..p....p..GpLsh.......clphp........................hp.ps..pl..p.s...p...s...p.lt..ls....plp..ltp.....t..ps...................hh.......................................................... 1 137 349 518 +5187 PF05360 YiaAB yiaA/B two helix domain Yeats C anon Yeats C Domain This domain consists of two transmembrane helices and a conserved linking section. 20.40 20.40 20.40 20.50 19.80 20.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.30 0.72 -4.42 19 1736 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 843 0 190 635 32 52.40 37 64.76 CHANGED ahshsasuhlluhshhhlGlasus.hpLstKGaahhshhhslhuslslpKssRD ..........ahhhuhhshllulshhllGLW.sus...h...hLuEKGYahsslhhulFushuhQKshRD...... 2 43 112 151 +5188 PF05361 PP1_inhibitor PKC-activated protein phosphatase-1 inhibitor Finn RD anon Pfam-B_69711 (release 7.8) Family Contractility of vascular smooth muscle depends on phosphorylation of myosin light chains, and is modulated by hormonal control of myosin phosphatase activity. Signaling pathways activate kinases such as PKC or Rho-dependent kinases that phosphorylate the myosin phosphatase inhibitor protein called CPI-17. Phosphorylation of CPI-17 at Thr-38 enhances its inhibitory potency 1000-fold, creating a molecular switch for regulating contraction [1]. 20.70 20.70 21.10 22.10 20.30 20.60 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.88 0.71 -4.51 8 231 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 78 4 125 200 0 108.80 38 79.73 CHANGED MAup+lG+Rhppp.pSss+upu..pustslQ+RpARVTVKYNRKELQRRLDVEKWIDspL-ELYpG+E--MPE.EVNID-LLDLpoDE-Ro++LpslLpuCsssTEsFIsELLt+L+GL+KQptLpppGlchPp.ph.sph............cs ............................t.........................................t......+.+..pu+lTVKYsR+cL...p+...RLslEcWI.ppLpcLYp.....s..pE.-.-.......hP-...E.....IDlD-LLDhpo--pRsp....+l.p.clL...hs....Ch...+P...T....E...s...FIp-LLs+l+Ghp+.s.......................tp................ 2 18 29 61 +5189 PF05362 Lon_C Lon protease (S16) C-terminal proteolytic domain Studholme DJ anon Merops Domain The Lon serine proteases must hydrolyse ATP to degrade protein substrates. In Escherichia coli, these proteases are involved in turnover of intracellular proteins, including abnormal proteins following heat-shock. The active site for protease activity resides in a C-terminal domain. The Lon proteases are classified as family S16 in Merops. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.21 0.71 -5.02 25 7706 2012-10-03 01:04:38 2003-04-07 12:59:11 8 39 4472 58 2231 8849 3459 174.80 32 25.91 CHANGED slsscsLccaLGh.+F+hspA-ccDpVGlVTGLAWTpVGG-lLsIEushhP..GKG.+LplTGpLGDVMKESApAAhSYlRS+ApchGIcscha-cpDIHlHVPEGAsPKDGPSAGlsMsTALVSsLTGhsV++DVAMTGEITLRGRVLPIGGLKEKlLAA+RuGIKpVIlPc-Nc+DL.c-lPcsl+csLclhsVcplD-VL+hALst .....................................................................................................................h........................................................................................G...hh.s..........................................G.....h....h.........lE.....s...........................................................t.........p...............s.........t..............h..............h...o...G..........h........t.............p.......................h.......p....c.....u........h..................s...........t.....h....hp....................t.......................................................................................................................................................................................h...........t................t.....................p...h....H.lp...h...s.t...u.uh....s..h..D...G......PS..A.G..lsh..s..s..A.l...l.............S...s...L.....o...........s.......t.............s...l......c....p....s......l....A.......h...T...G-Iol...pG.c.VhPI..G.G.lcEKl........l....A........A..p.......R.......u.......G.h.......c...t...l.....l.....l......P...............p....p...............N........t.+......-......l......p..............-..............l.........s....p......t.....l..........p....p....t.h.....p.....lh.s.V.p.p.lc-slphh..t..................................................... 0 804 1468 1899 +5190 PF05363 Herpes_US12 Herpesvirus US12 family Finn RD anon Pfam-B_62991 (release 7.8) Family US12 a key factor in the evasion of cellular immune response against HSV-infected cells. Specific inhibition of the transporter associated with antigen processing (TAP) by US12 prevents peptide transport into the endoplasmic reticulum and subsequent loading of major histocompatibility complex (MHC) class I molecules [1]. US12 is comprised of three helices and is associated with cellular membranes [1]. 21.00 21.00 23.70 23.50 20.00 17.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -9.79 0.72 -3.47 2 19 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 1 0 19 0 80.40 53 95.56 CHANGED MSWALchsDhFLDs.Rss.RTYuDVpsEIpKRtREDREAARTAVpDPEhPLLpsPslhs-.As....ptohGsA+pppttsts.uP ......MuWALchsDsFLDs.Rss.RTYADVRcEIsKpuRED.REAARTAVtDPERPLLpsPulhP-hAs....sAohGsA+pppttshh.uP............ 0 0 0 0 +5191 PF05364 SecIII_SopE_N SecIII_SopE; Salmonella type III secretion SopE effector N-terminus Finn RD, Moxon SJ anon Pfam-B_18665 (release 7.8) Domain Salmonella typhimurium employs a type III secretion system to inject bacterial toxins into the host cell cytosol. These toxins transiently activate Rho family GTP-binding protein-dependent signaling cascades to induce cytoskeletal rearrangements. SopE, one of these toxins, can activate Cdc42 in a Dbl-like fashion via its C-terminal GEP domain Pfam:PF07487 [1]. This family represents the N-terminal region of SopE. The function of this domain is unknown. 19.10 19.10 21.80 45.70 18.80 18.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.40 0.72 -4.15 2 141 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 106 0 2 45 0 70.20 79 31.04 CHANGED TpITLSsQpaRIp+p-sp.lKEKoTEKs.hAKSIhAV+NpFIpLpopLS-RF..HppT-.ssTHFHRGsASEGR .TNITLSTQHYRIHRSDVEPVKEKTTEKDIFAKSITAVRNSFISLSTSLSDRFSLHhQTDIPTTHFHRGSASEGR........... 0 0 0 1 +5192 PF05365 UCR_UQCRX_QCR9 Ubiquinol-cytochrome C reductase, UQCRX/QCR9 like Finn RD anon Pfam-B_18986 (release 7.8) Family The UQCRX/QCR9 protein is the 9/10 subunit of complex III, encoding a protein of about 7-kDa. Deletion of QCR9 results in the inability of cells to grow on grow on-fermentable carbon source n yeast [1]. 20.40 20.40 20.40 20.70 20.20 19.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.59 0.72 -4.33 33 239 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 211 52 166 258 1 53.30 36 49.35 CHANGED hsslYpslFRRNSsalssIhsGAFsFEh.AFDsusspla-shN+GKhWKDI+tcY. ......h.ptlYphlh+RsSsassslhsGAFhFEt.uaDtusspla-phN+GKhWKDI+tph...... 0 51 86 138 +5193 PF05366 Sarcolipin Sarcolipin Finn RD anon Pfam-B_33603 (release 7.8) Family Sarcolipin is a 31 amino acid integral membrane protein that regulates Ca-ATPase activity in skeletal muscle [1]. 26.40 26.40 26.80 65.70 20.60 26.30 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.43 0.72 -4.54 2 17 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 17 1 9 12 0 30.60 87 100.00 CHANGED MthsTpELFlNFTlVLITVlLMWLLVRSYQY MthsTRELFLNFTlVLITVILMWLLVRSYQY 0 1 1 1 +5194 PF05367 Phage_endo_I Phage endonuclease I Finn RD anon Pfam-B_22152 (release 7.8) Domain The bacteriophage endonuclease I is a nuclease that is selective for the structure of the four-way Holliday DNA junction [1]. 25.00 25.00 25.60 25.40 22.60 21.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.73 0.71 -4.81 6 64 2012-10-11 20:44:44 2003-04-07 12:59:11 6 1 59 24 2 65 330 136.50 49 98.32 CHANGED MA..uahAKthppVtAaRSGLE-KsuKQLEuKGlKh-YEpahlPYVlPASsH+YTPDFlLPNGIhVETKGLa-o-DRKKHLLIREQHPELDIRhVFSSSRTKLYKGSPToYu-WCEKaGhpFADKLIPVtWL+EsspclPhshLKsKKGcK .........................................thtts.uaRSGLE-+lucpL-spGlpa-YEphclsYllPtp.sHpYTPDFlL.P.N..G..IhlEoKGha........-s-DR+KHLLI+EQaP-.L.DIRhVFSsSpo......KlYKGS.ToYu-WC-KaGhha.A.D.K....h.IPs-WL+E.ph.hs.shhh.ht........................ 0 0 0 1 +5195 PF05368 NmrA NmrA-like family Finn RD anon Pfam-B_25329 (release 7.8) Family NmrA is a negative transcriptional regulator involved in the post-translational modification of the transcription factor AreA. NmrA is part of a system controlling nitrogen metabolite repression in fungi [1]. This family only contains a few sequences as iteration results in significant matches to other Rossmann fold families. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.33 0.70 -5.11 25 4570 2012-10-10 17:06:42 2003-04-07 12:59:11 8 40 1637 75 2377 19410 5486 230.00 20 76.81 CHANGED ILVlGuTGh.GptlVcAulc.....sGapshALsRcs.................cschhpshpstGspll.pGD.ls...........................-+pSLscAlKtVDlVhssss........stpltpthpllcAhKcAG.VK+Fl.Sphuscsc..tspuh.PuhstF-pKtplc+hlcut......GIPaTaVhsshFss.Flsshst...ttsssspsphslhususscslhh...sEcDlGsaslpslcD.Pcph.pshhlphssshLohsElsslapKt...lGKs.........................................................V+Ysp ...................................................................................................lhlhG.A.T...G.p...h...G...t......t...l....l.c.......s.L.lp....................ss...h......p.....l...h.......s.....l.......s...R..ss............................................................p.p..........h...p.....t.....l.......t........t......t.......G.......s..........p........l.......h......p......u..D...h...s................................................................................s........s...u....L........t...p.....A.....h........p............G.......s.......-.......s......l........h...h..h...s..s...................................stch.....t.......p.....t......t.........s......l....l.......c.........A...........A....p........p...........A.......G.....V.....c.........+........h..........l..............h..............o..........s.........h.........s........t.........s........s.........t..............s...........................................................................h........s...........h........h.....s............t.........+......h....p.......h....c....c....h...l.....p.pp........................s.l...s...a....T...h..l......p........s.....u.......h.......a...h.............c................h.......h...s.....hh........................h.t....t............................h.......h.....h.h......s......s...s..s....t..hshh.....................sttD.h...u....t....h....s....s.t........l....l....p.............p.....s.....t....p.......h....................s.....p....h...h.........h.....s...s..........p........t........h........o......h.p......c.l.sthhpch....h..G..pp...................................................................................................................................................................................................................................................................................................... 0 562 1309 1950 +5196 PF05369 MtmB Monomethylamine methyltransferase MtmB Finn RD anon Pfam-B_58618 (release 7.8) Family Monomethylamine methyltransferase of the archaebacterium Methanosarcina barkeri contains a novel amino acid, pyrrolysine, encoded by the termination codon UAG [1,2]. The structure reveals a homohexamer comprised of individual subunits with a TIM barrel fold [2]. 25.00 25.00 148.90 148.60 16.50 16.50 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.52 0.70 -6.32 3 22 2009-09-11 12:54:46 2003-04-07 12:59:11 7 1 14 0 15 29 4 418.70 52 99.42 CHANGED TFRKSFDCYDFYDRAKVGEKCTQDDWDLMKIPMKAMELKQKYGLDFKGEFVPTDKDMMEKLFQAGFEMLLECGIYCTDTHRIVKYTEDEIWDAINNVQKEFTLGTGRDAVNVRKRSVGDKRKPIVQGGPTGSPISEDVFMPVHMSYALEKEVDTIVNGVMTSVRGKuPIPKSPYEVLAAKTETRLIKQACAMAGRPGMGVXGPETSLSAQGNISADCsGGMpSTDSHEVSQLNELKIDLDAIAVIAHYKGNSDIIMDEQMPIFGGYAGGIEETTIVDVATHINAFVMSSASWHLDGPVHIRWGSTNTRETLTIAGWACATISEFTDMLSGNQYYPCAGPCTEMCLLEASAQSITDTASGREILSGVASAKGVVTDKTTGMEARMMGEVARATAGsEISEVNKILDKLVuLYEKNYASAPAGKTFQECYDVKTVTPTEEYMQVYDGARKKLEDLGLVF ...................tKphssYDaaDRA+sGEKsopDDW..DlMpIPhKuhELKpKYsl-F.c.s.E.FlPTDcDMh-+LFpAGhEMLlECGIYCTDTcRllKYTEDEIh-AIsNs.KEasLGpG+DulsVhcRuhsDp...p...tPllQGGPTGuPlSEDlFhslHhSYAhE..VD.sI.VsGVhpolpG+sshPtSPaElhAu+oEsRLlKpAsthAGRPGMultGPETuloupGsl.uuDssGGhspsDSHEVSQLsELKIDl-AlshhAHYpssuslIMsEQMPIFGGYA.GGl.EETsIVDVATpINuhlMosAoaHLDGPVHlRWGSTsTREsLhlAGassusloc...Tch..lsGNQYYssAGPCTEMCLLEsuAQuITDTASGRElLSGVAuAKGVssD+oTGhEARhMGElARAsAGh-lo..ElNtIL-+LVuhYE...c...sassAPt...GKsFpECYDVpT...lsPT-EYhplYDtAtKcLc-lGL.............. 0 5 10 11 +5197 PF05370 DUF749 Domain of unknown function (DUF749) Finn RD anon Pfam-B_54547 (release 7.8) Family Archaeal domain of unknown function. This domain has been solved as part of a structural genomics project and comprises of segregated helical and anti-parallel beta sheet regions. 25.00 25.00 74.80 74.60 21.20 20.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.58 0.72 -4.04 5 27 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 27 2 19 26 2 87.80 44 89.91 CHANGED FlAsLlGIhTlKE.LsEEhcsFV+lKAulDK+ELcDDsclAIlNIcGTTSYaVlFLDsssSlEEI++ELEEs..GAclN+sScpIL++aL FlAsLluIhslcE.Lss.EhpsaVcl+Aul-cR.EL.ccsDplAILNIpGTsSYpVhFlDp.ssI-cIKc-Lcch..sAclNasSccIl+pal 0 4 9 15 +5198 PF05371 Phage_Coat_Gp8 Phage major coat protein, Gp8 Finn RD anon Pfam-B_31655 (release 7.8) Family Class I phage major coat protein Gp8 or B. The coat protein is largely alpha-helix with a slight curve [1]. 19.80 19.80 19.80 21.50 18.70 19.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.63 0.72 -4.41 7 29 2012-10-01 20:22:05 2003-04-07 12:59:11 7 1 21 13 4 45 1 51.20 51 69.77 CHANGED AEssss...AptAhDSLpspAT-hIu.sWshVsslVsAsluIKLFKKFsSKA ......................ss.....AtsAhDuLtspAs-hIGhsWslVVslVGAsluI+LFKKFoSKA 0 0 3 3 +5199 PF05372 Delta_lysin Delta lysin family Finn RD anon Pfam-B_45919 (release 7.8) Family Delta-lysin is a 26 amino acid, hemolytic peptide toxin secreted by Staphylococcus aureus. It is thought that delta-toxin forms an amphipathic helix upon binding to lipid bilayers [1]. The precise mode of action of delta-lysis is unclear. 25.00 25.00 62.60 62.50 17.80 16.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.34 0.72 -6.77 0.72 -4.58 6 86 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 79 3 2 18 0 25.00 94 64.33 CHANGED MAuDIISTIuDFVKLIl-TVpKFTK MAQDIISTIGDLVKWIIDTVNKFTK 0 1 1 2 +5200 PF05373 Pro_3_hydrox_C L-proline 3-hydroxylase, C-terminal Finn RD anon Pfam-B_32425 (release 7.8) Domain Iron (II)/2-oxoglutarate (2-OG)-dependent oxygenases catalyse oxidative reactions in a range of metabolic processes. Proline 3-hydroxylase hydroxylates proline at position 3, the first of a 2-OG oxygenase catalysing oxidation of a free alpha-amino acid. The structure contains conserved motifs present in other 2-OG oxygenases including a jelly roll strand core and residues binding iron and 2-oxoglutarate, consistent with divergent evolution within the extended family. The structure differs significantly from many other 2-OG oxygenases in possessing a discrete C-terminal helical domain. 25.00 25.00 26.90 26.30 20.50 18.40 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.21 0.72 -3.98 5 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 4 5 15 0 100.30 26 34.00 CHANGED spcEshtDsE..sssssPsVcsRKPaT-EscEuILu.lStlIu+tNFRDIVFlLSKlHFpYKVHssEsYDWLlEIuKRsGD-cLVEKAppl+RFhltcRAlGE ...................hshp..ssthpstlssRsths-pphEpLlu.lSplloctNaR-llhhLuKlHFp+-sssspsaDWLp-lA+RoGDsAllpKAcplRcahlpcRshsE......... 0 2 4 5 +5201 PF05374 Mu-conotoxin Mu-Conotoxin Finn RD anon Pfam-B_34209 (release 7.8) Family Mu-conotoxins are peptide inhibitors of voltage-sensitive sodium channels [1]. 19.30 19.30 20.50 20.40 18.20 14.80 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.30 0.72 -4.14 4 7 2012-10-01 22:06:18 2003-04-07 12:59:11 7 1 5 6 0 12 0 21.60 60 58.75 CHANGED RDCCTPPKKCKDRpCKPh+CCA ..R.CCssPKpC+sRpCKPp+CC. 0 0 0 0 +5202 PF05375 Pacifastin_I Pacifastin inhibitor (LCMII) Finn RD anon Pfam-B_35181 (release 7.8) Family Structures of members of this family show that they are comprised of a triple-stranded antiparallel beta-sheet connected by three disulfide bridges, which defines this as a novel family of serine protease inhibitors [1,2]. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -8.66 0.72 -4.44 36 190 2009-01-15 18:05:59 2003-04-07 12:59:11 8 32 46 12 121 244 4 37.20 36 11.86 CHANGED pC..sPGpsaK.p-..CNsCpCstsGh...ssCTh+uC.st.httsss .............sGpsa+..pD..CNsCpCs.sGp...hsCTpphCsst......s....... 0 66 91 112 +5204 PF05377 FlaC_arch Flagella accessory protein C (FlaC) Studholme DJ anon Manual Family Although archaeal flagella appear superficially similar to those of bacteria, they are quite distinct[1]. In several archaea, the flagellin genes are followed immediately by the flagellar accessory genes flaCDEFGHIJ. The gene products may have a role in translocation, secretion, or assembly of the flagellum. FlaC is a protein whose exact role is unknown but it has been shown to be membrane-associated (by immuno-blotting fractionated cells) [2]. 27.00 27.00 27.00 33.50 26.80 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.53 0.72 -4.07 10 60 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 54 0 41 64 2 55.90 39 17.60 CHANGED RlsELENcls+lsss...lsol++ENp-l+sslEcl-EsV....+-llsLYElVSp...plNP.Fl ..............RlsElEscls+lcss...lssl+pENcpl+csl-cl--sl....+clhsLYElVSp...pINP.Fl.......... 0 5 21 32 +5206 PF05378 Hydant_A_N Hydantoinase/oxoprolinase N-terminal region Yeats C anon Yeats C Family This family is found at the N-terminus of the Pfam:PF01968 family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.01 0.71 -4.76 64 2308 2012-10-02 23:34:14 2003-04-07 12:59:11 8 31 1319 0 1145 2398 1126 178.40 28 21.44 CHANGED plGIDhGGTaTDslhhctsp......................thltshKlhoo......csshpGlppulpphttp.........hsplshlhhuTTluTNAlLEpcG............spsuLlsstGapDhlphshttp.th................ps.h.hc.hltlstRhss.c............................Gp.lpsl.D.ptlcphlpplpsptlpulAVshhaShtNPpHEhpltcllcchs ..................................lulDhGGTFTDslh...hcss....................................................stltshKl.os.........................cs...sspGlhpslpphh.tp..h.t..................hspls.......t..lthGT...........T...luTNAllER+G............s+s.u.Ll..s.o..p..G.a.c.D.h.lt.l.s..p......p..t...p..s..ph......as.....h...............................s...h.hhp..p.......hh..t.l.spRlsh.c.................................................................G.pll.p.sl...-.t.p.p.lcpt.l.p....p.l....t....s.........p..G..lc..ulAlshhaSa..t............p...s..p..H..Etpltclhpc..h......................................................................................................... 0 363 745 976 +5207 PF05379 Peptidase_C23 Carlavirus endopeptidase Studholme DJ anon Manual Domain A peptidase involved in auto-proteolysis of a polyprotein from the plant pathogen blueberry scorch carlavirus (BBScV)[1]. Corresponds to Merops family C23. 25.00 25.00 33.70 43.10 24.50 22.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.85 0.72 -4.01 12 125 2012-10-10 12:56:15 2003-04-07 12:59:11 6 8 50 0 0 124 0 87.30 35 5.41 CHANGED .KNsCVIcAlupslpRpsscllpVLpcpsscclhc-ltpGtGlphhpLp.lFpsFDIsApVs.suchhhlNspGphptpFtlps-HhSa ....hNsCslpAlAsALsR+ps-VLslLscsss.sclh-pLtpGtGlshhhLtphFchFsIsAplshsGchhllNspG+lpuhFclps-Hlpa.. 0 0 0 0 +5208 PF05380 Peptidase_A17 Pao retrotransposon peptidase Studholme DJ anon Manual Family Corresponds to Merops family A17. These proteins are homologous to aspartic proteinases encoded by retroposons and retroviruses. 20.60 20.60 20.60 20.60 20.50 20.20 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.17 0.71 -4.38 8 650 2009-01-15 18:05:59 2003-04-07 12:59:11 8 62 40 0 609 764 6 127.10 25 12.22 CHANGED PTKRElLSplu+lFDPLGhluPlhl+uKlhhQElWpps......luWD-plPp-lppcWtsalcpLssLpQlRlPR.alshssspslpLHuFsDASpcAYuAAlYhRs.p.sssIpVsLlsAKTRVAPlKss.SIPRLELsAAlLLoRLssslpsphshtsschhsWT .................................................................o+Rtlhs.huphaD.PhGhhss..........hhl.hKhhhp.plhpt.t..........hsWc..p...ls..t..ph.......ppW.thhp..ph.......t.......p.t.h..p.hs...R...h..h.................t........t......p......h...p...lh...hFsDAS.p..tuausss....Ylhh..........t.t..t......s.p.h.lh..uKs+l.tP..h..c...t........ol..P.....+hEL.uh.hssph.....l.pth..t........................................................................ 0 393 436 605 +5209 PF05381 Peptidase_C21 Tymovirus endopeptidase Studholme DJ anon Merops A17 Family Corresponds to Merops family C21. The best-studied plant alpha-like virus proteolytic enzyme is the proteinase of turnip yellow mosaic virus (TYMV). The TYMV replicase protein undergoes auto-cleavage to yield two products. The auto-peptidase activity has been mapped to the central part of this polyprotein. 25.00 25.00 34.00 50.10 20.40 20.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.22 0.72 -3.93 13 44 2012-10-10 12:56:15 2003-04-07 12:59:11 7 4 32 0 0 51 0 105.50 35 5.58 CHANGED Ps.hsCLLoAlSspT+lSc-cLWcoLpslLPDS.LsNpElsoLGLSTDHLTALAahYshpsslaSscGslhaGlpsupppIsIoHTsGPP....uHFSs...hh.tssshsG .....PthsCLLsAlSstTslo.cpLWpsLsshLPDSLL.ssp-lsshGLSTDHhssLAahaphpsphhospt.lphGlpsAopphpIpHTsGss.......sHFsh...h........sss......... 0 0 0 0 +5210 PF05382 Amidase_5 Bacteriophage peptidoglycan hydrolase Studholme DJ, Garcia E anon Pfam-B_6845 (Pfam7.8) Family At least one of the members of this family, the Pal protein from the pneumococcal bacteriophage Dp-1 Swiss:O03979 has been shown to be a N-acetylmuramoyl-L-alanine amidase [1]. According to the known modular structure of this and other peptidoglycan hydrolases from the pneumococcal system, the active site should reside at the N-terminal domain whereas the C-terminal domain binds to the choline residues of the cell wall teichoic acids [2,3]. This family appears to be related to Pfam:PF00877. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.81 0.71 -4.58 8 235 2012-10-10 12:56:15 2003-04-07 12:59:11 8 30 204 0 32 361 31 134.80 28 32.91 CHANGED shEpuIAWMtARKG+.VoYSMs.hRsGPsSY.DCSSSVYhAL+uAGASsuGWhlNTEhhHsWLhcNGacLIucNtsWsApRGDIhI..WGt+.GuSuGAGGHsGMFIDusNIIHCNY...usNGIoVsNasppshhuGthhsYlY..Rhssuuoss .................................p..p.hltah.....t.p..c.sp..l.s..YS..M....R.G.....ss...u.......h....DCSuu..lhh.A..Lp.t.u.Gh......s..s.......u.h..h.s......s..Tpshhs.L..p.p..sG..a.ppl.s.....p...s...............h.......s....s....pcGDIhI..hGtp...GtSuG.s.sGHsslh...h....s..t....s..p...hIps...s..h..........s.s.s.............................................s.......................................................................... 1 13 19 26 +5211 PF05383 La La domain Bateman A anon Bateman A Domain This presumed domain is found at the N-terminus of La RNA-binding proteins as well as other proteins [1]. The function of this region is uncertain. 21.30 21.30 23.30 21.90 20.80 21.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.81 0.72 -4.17 33 1403 2009-01-15 18:05:59 2003-04-07 12:59:11 12 36 318 14 875 1319 11 58.80 37 10.47 CHANGED lh+QlEYYFSDpNLspDpFL+pphsc..-GaVPlpllusFp+l+pLo..............hphIlpAL+pS .........lt+QlEaYFS......cpNLs+.DtFL....hpph....c....p..........-......G....a.....VPlpllss.Fp+l+pLo...............sDhphlhcAL+pS............................... 0 260 430 669 +5212 PF05384 DegS Sensor protein DegS Moxon SJ anon DOMO:DM08518; Family This is small family of Bacillus DegS proteins. The DegS-DegU two-component regulatory system of Bacillus subtilis controls various processes that characterise the transition from the exponential to the stationary growth phase, including the induction of extracellular degradative enzymes, expression of late competence genes and down-regulation of the sigma D regulon [1]. The family also contains one sequence Swiss:Q8R9D3 from Thermoanaerobacter tengcongensis which are described as sensory transduction histidine kinases. 24.20 24.20 24.60 26.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.73 0.71 -5.02 21 178 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 173 0 64 164 0 156.90 37 40.64 CHANGED DcIlpchlcslpcSKc-IFcIuEpuRpEapcLppELcplKpclsclIpcsDcLEhppRhARpRLsEVS+pFppYSEp-I+cAYEpAcplQlcLslhRpcEppLRcRRD-LEpRLhslpcTI-RAEpLluQluVsLsaLsuDLcpluctlE-hpp+QphG ..........DpIlpchlcsl-sSKppIFpIuEpuRpEhcpLpcELcplKppltclIcplDpLEhpp+puRpRLsEVS+sFpcaSEc-I+pAYEcAcclQlcLshh+p+EppLRpRRD-LERRLtslpchlERAEpLluQlsVlLsYLssDLppls.hl-shpp+Q.hG................. 0 32 53 57 +5213 PF05385 Adeno_E4 M_adenovirusE4; Mastadenovirus early E4 13 kDa protein Moxon SJ anon DOMO:DM07617; Family This family consists of human and simian mastadenovirus early E4 13 kDa proteins. Human adenovirus type 9 (Ad9) is unique in eliciting exclusively estrogen-dependent mammary tumours in rats and in not requiring viral E1 region transforming genes for tumorigenicity. E4 codes for an oncoprotein essential for tumourigenesis by Ad9 [1]. 25.00 25.00 31.00 31.00 23.30 19.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.51 0.72 -4.10 8 88 2009-09-11 08:31:06 2003-04-07 12:59:11 6 1 68 0 0 64 1 107.20 50 90.15 CHANGED MsLPsLPsPPVscDpuuCIuWLGLAauollDllRsI++cGlhIoPEAEplLsuLREWLaauapTERp+R+DRRRRslCpuRTaFCapKYEpVR+pLh.YDssppTlSl..ts ....MlLPsLPsPPl.c-ppuCIsWLGhAYsslsDVlRsIRh-Glhlos-ApcLLpsLREWLYauahTERp+R+..D.RRRRtlCsuRstFCapKY-pVRKplH.YsssppTlslt.s...... 0 0 0 0 +5214 PF05386 TEP1_N TEP1 N-terminal domain Bateman A anon Bateman A Domain This short sequence region is found in four copies at the N-terminus of the TEP1 telomerase component. The functional significance of the region is uncertain. However the conservation of two histidines and a cysteine suggests it is a potential zinc binding domain. 25.00 25.00 25.60 25.00 24.30 16.80 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.37 0.72 -4.49 8 102 2009-01-15 18:05:59 2003-04-07 12:59:11 6 16 20 0 38 89 0 29.80 72 4.25 CHANGED hEKlHGHlSsHPDILSLENRCLshLPDLQP ..MEK.HGHVSAHPDILSLENRCLATLPDLKs...... 0 4 4 4 +5215 PF05387 Chorion_3 Chorion family 3 Moxon SJ anon DOMO:DM07741; Family This family consists of several Drosophila chorion proteins S36 and S38. The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary [1]. 21.20 21.20 21.20 21.50 21.10 20.70 hmmbuild --amino -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.97 0.70 -4.96 4 75 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 25 0 19 73 0 201.60 45 88.03 CHANGED Mspa.alLhlsAssL..........ApAuYGusGuuG.GhGuuthuAuuuA......tsusGGst.SG..........GshpuusApl.spPspsu+lspsQuphtu.p.s.sYp.hhppusslupSls.sp................................NptpllhppP.sPIIV+pP..PsphlsstPPhVVKsAPVlh+h.PuVlhpp.hlp+hPpPlplpPVYVpVhKPst..Et.hVstspQsYtpsuaGsSphSt............GYGuusutuusAuuAuStusupA ...hsh..hlhAlAA.s.l.........sSANYGsstu.....tu...Gptht.h..ussss...tthsssAsGGstsuu...........p.hsupAth...ps.s.s-tAthhttsQuphtu.p.s.sYp.h.ppupslspSls.sp...............................pNptpllhppP.sPIIV+pP..PsthlsstPPhVV+ssPVlh+h.Pullhpp.hlp+hPp.lplpPVaVpVhKPst..Et.hss.st.sYsQ.s.......p.htt..................uautu.stu.tstsssst.....s............................................... 0 2 2 11 +5216 PF05388 Carbpep_Y_N Carboxypeptidase Y pro-peptide Moxon SJ anon DOMO:DM08350; Family This family is found at the N terminus of several carboxypeptidase Y proteins and contains a signal peptide and pro-peptide regions [1,2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.68 0.71 -3.91 6 94 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 88 0 65 87 0 107.80 26 20.30 CHANGED M+LosSlL..suLAhshssApALul......QpPlu.......................h..ttpsll-psucslu......-sLcslsspl+shWsEMthpFPsplspLpapopPKhtlstKsss..WDF...........pVpssplsNY+L ............................................................................h....hs.st.Ahsh............Qp.ht.................................htpsh..hppht...ctht...........-.lc.sLos-s+slWsElthhaPcs....h.pphp...h...h...o...tPK..ts++scsc.WDa...........lV+su...cl............................... 0 13 34 58 +5217 PF05389 MecA Negative regulator of genetic competence (MecA) Moxon SJ anon DOMO:DM05333; Family This family contains several bacterial MecA proteins. The development of competence in Bacillus subtilis is regulated by growth conditions and several regulatory genes. In complex media competence development is poor, and there is little or no expression of late competence genes. Mec mutations permit competence development and late competence gene expression in complex media, bypassing the requirements for many of the competence regulatory genes. The mecA gene product acts negatively in the development of competence. Null mutations in mecA allow expression of a late competence gene comG, under conditions where it is not normally expressed, including in complex media and in cells mutant for several competence regulatory genes. Overexpression of MecA inhibits comG transcription [1,2,3]. 21.10 21.10 22.10 21.50 20.70 20.10 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.32 0.70 -4.80 42 1309 2009-09-11 15:15:24 2003-04-07 12:59:11 7 2 1134 28 147 674 7 222.50 30 97.79 CHANGED MclERIN-sTlKlhIoh-DLp-RGhshpDlhhspc+sEchFashh-Elcp-csFps.sGslshQVhshp.pGl-lhlTKsp.....pt..t....................ptss-plpchlcpph..............................t-hcspl-.............................hllpFsshEDlIsLucplp..tslts...pLYpacspYYLtlhh...pphsptphcshluhhhEauptoshot..h.hLpEYGchlhppsAlppl+p .......................MchE+Is-sTlKlhlohpDLp-RGhshtDlh.h.s.pc+sE-FFashh-El.....c.--.s.Fhs..sG.sL.h...h..pVhshp.cGl-VhlTKuc............cp..php...s.................................sp.ts-phpchlcppl..............t..p.........................................................p..p.t.php............................................................................ttppsthp..hl..lpFscl-Dllshu..cplph...p...t...hps..pLYp...h..s..s...c..YYLslhh.............p..p..h.sp.hhsphhuhlhEa..u...p...s...os...h...Tp..........t...hL...pEYGchlhscsAlpplp.............................................................. 0 45 89 117 +5218 PF05390 KRE9 Yeast cell wall synthesis protein KRE9/KNH1 Moxon SJ anon DOMO:DM07408; Family This family contains several KRE9 and KNH1 proteins which are involved in encoding cell surface O glycoproteins, which are required for beta -1,6-glucan synthesis in yeast [1]. 20.50 20.50 21.60 21.60 19.70 19.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.54 0.72 -3.70 5 120 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 87 0 79 106 0 98.00 42 36.75 CHANGED IDSKSFTVPYTcQTGKoRaAPMQhQPGTKVTATTWSRKYATSAVTYYSThsuoP-QtTTlTPGWSYTISSuVNYATPAPMPSDNGGWYsPpKRlSLoTRKlNs..hR+l ..............tsaTVPYshQT.GhoRYAPMQhQP.sT+l.T..A.p.....Ta..ohp..asT.SuVohaoT.hhs..s.ss.tTTlTsuhoYolsSt.NhAoPAPhPs-...hY....h..................h.......................... 1 11 35 63 +5219 PF05391 Lsm_interact Lsm interaction motif Bateman A anon [1] Motif This short motif is found at the C-terminus of Prp24 proteins and probably interacts with the Lsm proteins to promote U4/U6 formation [1]. 21.40 21.40 21.40 22.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.69 0.73 -6.64 0.73 -4.30 9 94 2009-01-15 18:05:59 2003-04-07 12:59:11 6 14 80 0 54 86 0 20.50 47 2.66 CHANGED sptsssst.hSNDDFRKhFL+ ..........sscss.hSNsDF+KhFL+.. 0 10 20 34 +5220 PF05392 COX7B Cytochrome C oxidase chain VIIB Moxon SJ anon DOMO:DM07697; Family \N 20.70 20.70 22.00 39.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.79 0.72 -4.40 8 75 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 43 51 35 92 0 75.90 63 98.05 CHANGED MFPLAKsALupLplpSIQQssARQ.uHpK+oP.DFHDKYGNulLASGusFClAsWoYssTQlGIEWNLSPVGRVTPKEW+-Q ...................MFPlu+sALspL.plRSIQQshARQ.uHpK+oP.DFHDKYGNuVLAuGAsFClusWsYssTQlGIEWNLSPVGRVTPKEW+cp... 1 2 3 9 +5221 PF05393 Hum_adeno_E3A Human adenovirus early E3A glycoprotein Moxon SJ anon DOMO:DM07622; Family This family consists of several early glycoproteins from human adenoviruses. 21.40 21.40 21.40 22.50 20.80 21.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.48 0.72 -4.32 3 49 2009-09-11 15:19:00 2003-04-07 12:59:11 6 2 27 0 1 39 0 97.20 47 42.85 CHANGED MTsTTNA.......sTATGLTSTpNhPQVSAFVNNWANLGMWWFSIALMFVCLIIMWLICCLKRKRARPPIY+PIIVLNPNNDGIHRLDGLKsCSFSLsV .........................t.ptss.s..ssh...............s..hTSLA.TNETsVshMasQsau....GL..D..I..T..FLVVCGIFILlVLLYFVCCKARcKSRRPIYRPVIG.......................ps.................. 0 0 0 1 +5222 PF05394 AvrB_AvrC Pseu_avirulence; Avirulence protein Moxon SJ anon DOMO:DM07798; Family This family consists of several avirulence proteins from Pseudomonas syringae and Xanthomonas campestris. 20.00 20.00 21.80 21.50 18.60 18.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.07 0.70 -5.45 6 40 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 22 4 2 32 1 292.60 48 94.26 CHANGED MGCloSKtssluSsshpsutosSs............pt+ushhs.LpGPsptScLpsapQuLVGsARWPDct......N+sssPcphcYscShYppSRthGuSlAsGcIsSFp-LWpcAp-WRhSRhsps-sl...FuosRsPNoc..FVTPLtcPYc.lh-RhsN+pDucschhpDphFhs.csKsaR.sGpIsGEsIPLTplosusDccA..-RhpchhpcLcspuhpDh...ucPNhIsHTsAEYlPpIhcHlEsLYppAlDsuLSppcALchlAclHWWsApAhPDcRGSAAKAEhssRSIApA+Gl-LPPhRpGIVPDlEAMhhuccpFVcsY.uhFEps .....................................................................................................................................................................................................ssh....uS.sstsuuopSp............thpushhs.LpGPsphScLo.cpQSLVGsARWPDct......N+ssT.cphcYs+ShYpuSRhAGuuIASGpIsSFs-LWpcAppWRhSRhuus-so..sFAs.RhPNoc..FVTsLppPYc.ll-RhpN+sDuchchh-sphhttlcsKsaR.pGtIsGEsIPhTplssusDcsA..sRhcsh..pc.cppthpp.httucPshIsHTsAEYl.pIhcHlEsLYhpAlDsoLpc+cAhcllAclHWWsAsAsPDcRGSAAKAEhusRSIApA+Gl-.hPPhRpGIVPDlEAMhp.SccpFVtpYsshF-p....... 1 1 1 1 +5223 PF05395 DARPP-32 Protein phosphatase inhibitor 1/DARPP-32 Moxon SJ anon DOMO:DM07433; Family This family consists of several mammalian protein phosphatase inhibitor 1 (IPP-1) and dopamine- and cAMP-regulated neuronal phosphoprotein (DARPP-32) proteins. Protein phosphatase inhibitor-1 is involved in signal transduction and is an endogenous inhibitor of protein phosphatase-1 [1]. It has been demonstrated that DARPP-32, if phosphorylated, can inhibit protein-phosphatase-1 [2]. DARPP-32 has a key role in many neurotransmitter pathways throughout the brain and has been shown to be involved in controlling receptors, ion channels and other physiological factors including the brain's response to drugs of abuse, such as cocaine, opiates and nicotine. DARPP-32 is reciprocally regulated by the two neurotransmitters that are most often implicated in schizophrenia - dopamine and glutamate. Dopamine activates DARPP-32 through the D1 receptor pathway and disables DARPP-32 through the D2 receptor. Glutamate, acting through the N-methyl-d-aspartate receptor, renders DARPP-32 inactive [3]. A mutant form of DARPP-32 has been linked with gastric cancers [4]. 21.20 21.20 21.50 26.20 20.20 20.00 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.36 0.71 -4.10 12 144 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 40 0 69 136 0 129.30 42 89.78 CHANGED MEs.pu.+KIQFoVPh.tspLDPcAsE.IRRRRPTPATLhhsS-pSSP..DE-psPp.htpsshphs.ppRpp.hshhsPohKtlQhlsEpHLpp.ushpEp.sp.tpp..sp.c.hs.sctpstpTpsp.....pSc.st.s.ppDups-s................G.pGshcpoh.ps.spccpsup....-Ps ..............M-s.ps.+KIQFsVPh....s.plsPctsE.......p........IRRRRPTPAoLhhhs-poSP..--ct.sp.......................hhp.p.ph..ppRhp.sshssPohK...tlQhhhpp.HLtp..tp....p.E..........................................................................................................t................................................................... 0 3 6 21 +5224 PF05396 Phage_T7_Capsid Phage T7 capsid assembly protein Finn RD, Moxon SJ anon DOMO:DM08648; Family \N 24.00 24.00 24.20 24.20 23.70 23.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.59 0.71 -3.97 6 46 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 41 0 2 42 65 122.10 41 41.72 CHANGED spLSEtSYtcLttAGYS+uFVDSYI+GQEALV-QYVsuVlcYAGGpEpFsAIhsHLEusNPsAupSL-uAlps+DLATlKAIINLAGtShsKpFG+pPpRSlTpRAhP.ApPsAsp+...EGFuspu ..spLS--SYpcLAcAGYo+uFVDSYI+G.......Q...EA..........LVcpYVpuVh-YAGGcEpFstlhsHh.csp..NPssupuLssAlpspDlATlKAllNLustohscpFG+pspRsloppAhP.Ap..stpp...puFtsp............................................... 0 0 1 2 +5225 PF05397 Med15_fungi GAL11; Mediator complex subunit 15 Moxon SJ anon DOMO:DM07536; Family GAL11 or MED15 is one of the up to 32 or subunits of the Mediator complex which is found from fungi to humans [1]. The Mediator complex interacts with RNA polymerase II and other general transcription factors to form the RNA polymerase II holoenzyme [2], thereby affecting transcription through targetting of activators and repressors [3]. This family is found in fungi and the small metazoan starlet anemone. 21.60 21.60 21.60 22.80 21.10 20.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.33 0.71 -4.11 19 89 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 83 0 62 99 1 114.00 24 9.50 CHANGED psp.hpp....l+plhpElu+ss.tLpslo..Los-pKstlpppLppspphhupV-sllshhahlopsccss+pLlQMRhhhKc.h....-ts.pGhallsP-hl-Klh.phpKaachh+tpl ..................................sp..chl+plhp-ss+sshphpsls..los-cKstlpppL..ppspphhupl-s.llshhh.hlspsEcsl+pLlQM+hhlpc.h....-....hhhps.hahls.-hlsphh.phpKaaphhhtph.............. 0 16 32 54 +5226 PF05398 PufQ PufQ cytochrome subunit Moxon SJ anon DOMO:DM07133; Family This family consists of bacterial PufQ proteins. PufQ id required for bacteriochlorophyll biosynthesis serving a regulatory function in the formation of photosynthetic complexes [1]. 22.10 22.10 22.90 22.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.57 0.72 -3.94 11 24 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 24 0 8 20 59 74.20 43 95.60 CHANGED MoDhTossPhh....+s++sP+..sEFhlYFAlIFlsAlPhAhlsWhhsslRptolsp+GPlARAWspAchITPhIFuA .MoDhssssPsp....+s++ss+...sEFhlYFAlIhluAlPlAhlsWhhsslRptplsc+GPhA+AWt-ActITPhIFpA......... 0 1 4 6 +5227 PF05399 EVI2A Ectropic viral integration site 2A protein (EVI2A) Moxon SJ anon DOMO:DM07597; Family This family contains several mammalian ectropic viral integration site 2A (EVI2A) proteins. The function of this protein is unknown although it is thought to be a membrane protein and may function as an oncogene in retrovirus induced myeloid tumours [1,2]. 25.00 25.00 40.70 40.70 22.20 22.20 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.63 0.70 -5.10 3 38 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 30 0 21 34 1 200.30 59 94.37 CHANGED MEHKGH.YLHLAFLMTTVF.SLSsGT+sNYT+LWAsSsouWsSlsQNtT.SRN.PEsNNTNPlTPEVsaKuouTscPpTSsPVPLsSTSspchaTPSuspNSPsTup..tNTSKS+GETFKKEVCEENssNhAMLICLIIIAALFLICTFLFLSTVVLANKVSSLKRSKQVGKRQPRSNGDFLASsGLWPAESDTWKR.sKELTGSNLVMQS..oGVLTAsRERKcEEGTEKLT ..............McppG.p.YLHLAFLhTsla.SLSsGT+uNYTcLWssssosWs..sshQNto.u+s.sps.sTsPhTsp...ss.h+s..suTsh...s...p...o...u...t.....ss.LsspSp..........clhhPs.s.spNSssTsp..htsToKS+uElFKK-lCEENssphAMLICLIIIAVLFLICThLFLSTVVLANKVSSL+RSKQsGKRQPRSNGDFL.ASsGLWPAESDTWKR.sKpLTGssLhMQS..oGsLTAsRERKcEEuTEKLs................................. 0 1 2 7 +5228 PF05400 FliT Flagellar protein FliT Moxon SJ anon DOMO:DM07567; Family This family contains several bacterial flagellar FliT proteins. The flagellar proteins FlgN and FliT have been proposed to act as substrate specific export chaperones, facilitating incorporation of the enterobacterial hook-associated axial proteins (HAPs) FlgK/FlgL and FliD into the growing flagellum. In Salmonella typhimurium flgN and fliT mutants, the export of target HAPs is reduced, concomitant with loss of unincorporated flagellin into the surrounding medium [1]. 22.30 22.30 22.40 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.73 0.72 -3.58 109 993 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 913 12 186 524 21 83.70 30 72.78 CHANGED hhpsuc.pscW.-plspLtpp.pppll.ptl.t..............tlsts.pppttphlcp....Ilsp-p......plpshhptphscLpptlpptppppphspsY ........................hLphAp.ctpW.-pLlt.Ehs.ahphl.ppltp................................s..sshopsh..pcphpshl+p....IL-N-p......pl+pLlQhRh-cLuphlspuppp+slhsuY................ 0 37 94 141 +5229 PF05401 NodS Nodulation protein S (NodS) Moxon SJ anon DOMO:DM07274; Family This family consists of nodulation S (NodS) proteins. The products of the rhizobial nodulation genes are involved in the biosynthesis of lipochitin oligosaccharides (LCOs), which are host-specific signal molecules required for nodule formation. NodS is an S-adenosyl-L-methionine (SAM)-dependent methyltransferase involved in N methylation of LCOs. NodS uses N-deacetylated chitooligosaccharides, the products of the NodBC proteins, as its methyl acceptors [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.23 0.71 -4.92 7 251 2012-10-10 17:06:42 2003-04-07 12:59:11 6 6 221 5 103 7241 2646 154.90 30 58.66 CHANGED opsssapLLpRELAA-DPWRLDuNsFEpcRHsQhLRLSLupGslusALEVGCAAGAFTE+LAPaCpRLTVIDVhPpAIsRsRpRhpc.uHISWlssDVpQFSosEhFDLIVVAEVLYYltDlsphRsAlcNLVpMLAPsGpLVFGSARDAsCpRWGHlAGAETVIuhLsEsLhEVERlcCpGsSssEDCLLu+FppP.uuS .....................................................................t.hht................s....sDP.W..thp....p....p....h..Y.E....p.....c...+....h...t.......t....h...h.....h.h...s.....L..s...p...t...c....a....p...p....s........h..El...GC..u.s.G....t....L.....o....t...p.....L.........A....t....R.....C........s.......c.......L..........l...s......h....D....l........u.....s......p........A.....l....p......h.........A......+.......p.........R.......l............s.........s..............h.......s.........c.........l......p.......h........h......p........s.........s..........l..............s.......p....................h..........P..........p........t........p.............F........D........L.I...V....l....S....E....l...h....Y....Y...L....s.....s........h....s....c....L......p.....t....h....l.p.....p....h...h.t....t...Ls.ss.G.....p..l....l.......................................................................................................................................thhht........................................................................... 0 19 51 77 +5230 PF05402 PqqD Coenzyme PQQ synthesis protein D (PqqD) Moxon SJ anon DOMO:DM07172; Family This family contains several bacterial coenzyme PQQ synthesis protein D (PqqD) sequences. This protein is required for coenzyme pyrrolo-quinoline-quinone (PQQ) biosynthesis [1,2]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.23 0.72 -3.96 330 1200 2009-01-15 18:05:59 2003-04-07 12:59:11 7 19 972 1 415 1109 164 72.60 20 58.04 CHANGED s.phllls.ppth................h..............................pLstsushlac.hl.-..u..t.p...o..lpplsptLtpc.a.ssstp....h.ppDltpalppLtp...psllp ...............................................hlll..pthh.......h.......................pLspsushlhphl.-.....G..p..p..o..lspIsp.tLtpc..a..ssstp.......h.t...pDlhpalpplpp.pthl............ 0 140 270 343 +5231 PF05403 Plasmodium_HRP Plasmodium histidine-rich protein (HRPII/III) Moxon SJ anon DOMO:DM07479; Family This family consists of several histidine-rich protein II and III sequence from Plasmodium falciparum [1,2]. 45.80 45.80 46.30 45.90 45.60 45.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.65 0.70 -13.54 0.70 -4.76 3 1003 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 3 0 3 1004 1 117.00 75 96.41 CHANGED MVSFSKNKlLSAAVFASVLLLDNNNScFNNNLhSKNAKGLNsNKRLLHEoQA....HssDAHHAHHsADAHH.........AHHAADAHHAHHAADAHHAHHAADAHHAHHAADAHHAHHAA.AHHAHHAADAHH...AHHAAD...AHHAAD...AHHAAYAHH...AHHAADAHHAAs.........AHHAADAHHAADAHHAAa.......uHpAHHAA......DAHHAsDAH...HtsDAHHsu.........sAHHAADAHHAADAHHAAc .........................................................................AHH............AHHs..ADAHHAHHsADAHHAHHAADAHH...AHHAADAHH..A...HHAAc...AHH...AHHAAsAHH...AHHAAD...AHHAAc...AHHAA.AHH...AHHAADAHHAss...AHHAss..A...H...H...A................................................................................................................................................... 0 3 3 3 +5232 PF05404 TRAP-delta Translocon-associated protein, delta subunit precursor (TRAP-delta) Moxon SJ anon Pfam-B_7178 (release 7.7) Family This family consists of several eukaryotic translocon-associated protein, delta subunit precursors (TRAP-delta or SSR-delta). The exact function of this protein is unknown [1,2]. 25.00 25.00 26.90 26.60 19.30 18.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.86 0.71 -4.80 16 146 2009-09-11 06:46:05 2003-04-07 12:59:11 7 4 109 0 73 146 0 155.00 43 90.57 CHANGED hhth...hllslsluhsuupsCssPplp.sosYoToDAsIsophsFlsEFoLpCsN.GApsluLaA-Vs.GKlhPVsRssDsu+YQVSWs.EhKpApSGsYpV+haDEEuYuslRKAQRsGEDhsslKPLFTVsVcHpGu.apGPWlsoEllAsllshllhYhAaosKS+l.u ....................hhh.....hhhhhh..hhshss.u.csCtpPp.lo.sShYTToDAsIsocssFIsEhoLpCpN.sspshuLYA-Vs......GK......hPVsRupDs...s+YQ.VSWoh-aKpApuGsYpV+hFDEEuYutlRKAQ.R.ssEDls.slpPLFoVsVcH+.Gs.apGPWVsoElLAAhlulllhYhAFosKoplp................................. 0 28 32 54 +5233 PF05405 Mt_ATP-synt_B Mitochondrial ATP synthase B chain precursor (ATP-synt_B) Moxon SJ, Fenech M anon Pfam-B_7506 (release 7.7) Family The Fo sector of the ATP synthase is a membrane bound complex which mediates proton transport. It is composed of nine different polypeptide subunits (a, b, c, d, e, f, g F6, A6L) [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.62 0.71 -4.78 31 515 2012-10-02 21:03:42 2003-04-07 12:59:11 9 8 409 4 214 1035 712 150.90 25 68.55 CHANGED hhshhhSKclhlhN-EsllshsFluFlhhshKphusshsphhDschcplpsphpps+stphps.lpcpl-ptp.p.p.l.schspsLhpsp+-ssthptcstppptpstlspEl+s+L-ohlphpsshRph.QcclsptlhspVtpplss.sp...hpppsl...ppultplc .....................hhhhhSpclhlhN-Ehl.s.uh.s.h.lu.h..h..h.h.s.h.K...p.h.G.s..s.h.p...phhDtphpplppphp..phps..sphps..lppph-p.t+..phpth...lph.p.p.h.lh...-...s.pc.......p...s..h.t.h..p...hp...t..t....p...p.....p..t...h...ltpp.lc...t...pLc.hh.l..p.h....ps.p....h........+..pp.Q.c...c...h...sp...h...l....h....t...p....Vt.p.phts..p.........ppphl....tpsls.l.................................................................................... 0 74 110 169 +5234 PF05406 WGR WGR domain Bateman A anon COG3831 Domain This domain is found in a variety of polyA polymerases as well as the E. coli molybdate metabolism regulator Swiss:P33345 and other proteins of unknown function. I have called this domain WGR after the most conserved central motif of the domain. The domain is found in isolation in proteins such as Swiss:Q9JN21 and is between 70 and 80 residues in length. I propose that this may be a nucleic acid binding domain. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.02 0.72 -4.02 109 1331 2009-01-15 18:05:59 2003-04-07 12:59:11 10 90 798 5 518 1209 72 76.20 28 11.75 CHANGED st....hhhphsshs...p..sssKFYtlpl..h......t........hplhtcWGR..........lGss.GQpphc...sFs..s.ppA.hptap+lhpcKp..ppG....a..tp....t.pht ...................ht...........h....s...p...pspKFatlpl.........................s.......hplahpWG+...........................l..Gsp..Gp...s...plc.................sFs..sttpA.hcthtK...lht-Ks..+cG.atp.p....s.......................................... 0 180 301 419 +5235 PF05407 Peptidase_C27 Rubella virus endopeptidase Studholme DJ anon Manual Family Corresponds to Merops family C27. Required for processing of the rubella virus replication protein. 21.30 21.30 21.40 364.30 20.10 21.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.05 0.71 -4.53 5 69 2012-10-10 12:56:15 2003-04-07 12:59:11 7 2 10 0 0 71 1 166.00 98 11.55 CHANGED WRCRGWQGMPQVRCTPSNAHAALCRTGVPPRVSTRGGELDPNTCWLRAAANVAQAARACGAYTSAGCPKCAYGRALSEARTHEDFAALSQRWSASHADASPDGTGDPLDPLMETVGCACSRVWVGSEHEAPPDHLLVSLHRAPNGPWGVVLEVRARPEGGNPTGHF WRCRGWQGMPQVRCTPSNAHAALCRTGVPPRVSTRGGELDPNTCWLRAAANVAQAARACGAYTSAGCPKCAYGRALSEARTHEDFAALSQRWSASHADASPDGTGDPLDPLMETVGCACSRVWVGSEHEAPPDHLLVSLHRAPNGPWGVVLEVRARPEGGNPTGHF 0 0 0 0 +5236 PF05408 Peptidase_C28 Foot-and-mouth virus L-proteinase Studholme DJ anon Manual Family Corresponds to Merops family C28. Protein fold of the peptidase unit for members of this family resembles that of papain. The leader proteinase of foot and mouth disease virus (FMDV) cleaves itself from the growing polyprotein and also cleaves the host translation initiation factor 4GI (eIF4G), thus inhibiting 5'-cap dependent translation. 21.10 21.10 21.10 21.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.28 0.71 -4.82 4 452 2012-10-10 12:56:15 2003-04-07 12:59:11 6 8 73 14 0 458 0 200.20 80 11.04 CHANGED MsTocsahl...........lhtsF......hch+slF.s+hstphEhT..........uhPNpcsNCWLNuLhQLaphlD.s...........................LF-thYhsPtshTl-sIc.lp-.Ttl.-LpcGuPPhlVla.lppphpsslGTso+PpplClhsGs.hsLuDFaAulhhKs.pHAVFhhsTS-GWhsIDD-chYP.TPcscsVLs...........asPYD.EsLss-apcphtphLR .......................................MNTTDCFIA...........LlaAl......REIKsLFLoR.sp..GKMEFTLaNGEKKTFYSRPNNHDNCWLNTILQLFRYVDEP...........................FFDWVYpSPENLTL-AI+QLEElTGL...ELHEGGPPALV...IWNIKHL.LaTGIGTASRPSEVCM.VDGTDMCLADFHAGIFLKGQEHAVFACVTSNGWYAIDDEDFYPWTPDPS.DV.L..V...........FVPYDQEPLNGEWKAKVQ++LK....................... 0 0 0 0 +5237 PF05409 Peptidase_C30 Coronavirus endopeptidase C30 Studholme DJ anon Manual Family Corresponds to Merops family C30. These peptidases are involved in viral polyprotein processing in replication. 25.00 25.00 217.90 217.80 20.20 15.60 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.82 0.70 -5.34 5 594 2012-10-02 13:45:52 2003-04-07 12:59:11 8 34 208 139 0 625 0 291.30 52 5.41 CHANGED GLWLGDpVYCPRHVIASsTohSsIDY-stLSllRLHNFSIsSss.sFLGVVSApM+GulLlLKVsQsNscTPKYoF+oV+PGESFNILACY-GsssGVYGVNMRSNaTIRGSFIsGACGSVGYNLcNGoVcFVYMHQLELGoGsHVGSDL-GsMYGGYEDQPslQlEussshhTsNVVAFLYAAIIN......GssWWLpSooloVEcYNcWAtsNGFTslsuoDAFohLAAKTGVsVE+LLcuIpsLNsGFGGKQILGasSLsDEFTPsEVl+QMaGVNLQSG+V+sh.....spsslhlhuFa .GLWLsDsVYCPRHVIsosschssssYsclLsphssHsFpVt..s..sN...VpLsVlutpMpGslLhL..pVshuNscTPKYKFsplpPG-oFolLAsYsGsPsGlYtVsMRoNtTIKGSFLsGoCGSVGasl-t.ssVpFsYMHHLELssGsHsGTDLpGcFYGsahDppssQhsssDshhTsNVlAWLYAAl..IN......uspWFlp...ssosol-DFNhWAhsNuaosl....sss..s..s....lshLuAhTGluVpclLsuIpc.L.psGasG+sILGpssLEDEhTPt-VhpQhuGVpLQuphh+hl....hupphhhhh....h............ 0 0 0 0 +5238 PF05410 Peptidase_C31 Porcine arterivirus-type cysteine proteinase alpha Studholme DJ anon Manual Family Corresponds to Merops family C31. These peptidases are involved in viral polyprotein processing in replication. 25.00 25.00 188.90 186.70 23.90 18.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.45 0.72 -4.17 3 229 2012-10-10 12:56:15 2003-04-07 12:59:11 8 8 11 1 0 264 0 105.00 92 4.26 CHANGED L+WclPIuaPTVECoPuGsCWLSAIFPIARMTSGNHNFQQRLV+VAuVlYRDGpLTP+HL.+ELQVYERGCRWYPITGPVPGlALaANSlHVSDcPFPGATHVLTN LRWTLPRAFPTVECSPAGACWLSAIFPIARMTSGNLNFQQRMVRVAAEIYRAGQLTPsVL.KsLQVYERGCRWYPIVGPVPGVuVaANSLHVSDKPFPGATHVLTN. 0 0 0 0 +5239 PF05411 Peptidase_C32 Equine arteritis virus putative proteinase Studholme DJ anon Manual Family These proteins are characterised by a region that has been proposed to have peptidase activity involved in viral polyprotein processing in replication. 22.10 22.10 23.10 24.50 21.40 22.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.62 0.71 -4.43 4 221 2012-10-10 12:56:15 2003-04-07 12:59:11 7 9 12 1 0 270 0 127.60 80 4.95 CHANGED sp+uC..L.t-.ss.-........G+CWhplF.sh.sh.stscEhchAspFGYQTptGVpGKYLtRRLQVpGLRAVsc..sGshlV.shts.pSWIRHlshAsEsV.ssF.clschcllPNsp....P.h.psp+hFRFGs+KaY ..........hQaGC..LPAD.TVPE........GNCWW+LF.DSLPPEVQhKEIRHANQFGYQTKHGVsGKYLQRRLQVNGLRAVTD.ocGPIVIQYFSVKESWIRHLKLsEEPSLPGFEDLLRIRVEPNTS....PLAsKDEKIFRFGSHKWY.............. 0 0 0 0 +5240 PF05412 Peptidase_C33 Equine arterivirus Nsp2-type cysteine proteinase Studholme DJ anon Manual Family Corresponds to Merops family C33. These peptidases are involved in viral polyprotein processing in replication. 25.00 25.00 29.30 39.20 19.90 17.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.63 0.72 -4.24 4 524 2012-10-10 12:56:15 2003-04-07 12:59:11 7 17 14 0 0 536 0 105.90 66 6.01 CHANGED sYSPPuDGuCGhHCISAIhNchhsssFsTpLsppsRsss-WhSDpDLhQhl.shRLPATl...GtCPSApYhLchsspHWTVThRpG.hAsutLu.ECVpGVC.utEssl .....YSPPAEGNCGWHCISAIANRMVNSsFETTLPERVRPSDDWATDEDLVNsIQIL.RLPAALDRNGACsSAKYVLKLEGEHWTVSVhPG.MSPoLLPLECVQGCC.EHKGGL.... 0 0 0 0 +5241 PF05413 Peptidase_C34 Putative closterovirus papain-like endopeptidase Studholme DJ anon Manual Family Corresponds to Merops family C34. Putative closterovirus papain-like endopeptidase from the apple chlorotic leaf spot closterovirus. 22.40 22.40 22.40 22.40 21.10 20.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.84 0.72 -3.93 5 13 2012-10-10 12:56:15 2003-04-07 12:59:11 6 2 6 0 0 13 0 92.00 74 4.88 CHANGED KlKFIKGKaDCLFsSlAclI+KKPEEVMhFlPHlLDRCISNKGCSLDDLRuICpuYEIKVECEGDCGLlElGSlGLPLGRLlLRGNHFoVsS .....KL+FIKGKFDCLFVSlAEIIHKKPEEVMMFlPHI....hDRClSN.+GCSLDDAKAICEKYEIKIECEGDCGLVECGo.GLSlGRMLLRGNHFoVAS. 0 0 0 0 +5242 PF05414 DUF1717 Peptidase_C35; Viral domain of unknown function (DUF1717) Studholme DJ, Mistry J anon Manual Domain This domain is found in viral proteins of unknown function. 21.30 21.30 82.20 80.70 19.00 18.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.28 0.72 -3.56 2 9 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 5 0 0 10 0 83.60 67 3.87 CHANGED sLlphllspD.S--LhssIEp...D+slSpchlE........cVhIhhG.ShlhhsD.hpMtslpht.GhsG+haC............pIKsscst.pS..tp DLLNFLVNEDIS-ELL-CIEE...DKGLSHEMIE........EVLITKGLSMVYTSDFKEMAVLNRKYGlNGKMYC............TIKGNHCELSS....KE. 1 0 0 0 +5243 PF05415 Peptidase_C36 Beet necrotic yellow vein furovirus-type papain-like endopeptidase Studholme DJ anon Manual Family Corresponds to Merops family C36. This protease involved in processing the viral polyprotein. 25.00 25.00 27.40 41.00 20.40 17.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.46 0.72 -4.14 2 8 2012-10-10 12:56:15 2003-04-07 12:59:11 6 2 4 0 0 10 0 103.90 76 4.92 CHANGED NLlSRPNNCLVVAISECLGVTLEKLDNLMQANssTlsKYHsWLpKKoPSTWpDCRhFADALKVSMaVKVLSDKPYDLsY.VDGAuSslTLaLhGKESDGHFlAA .NLVSRPNNCLVVAISECLGVTLEKLDNLMQANAVTLDKYHAWLSKKSPSTWQDCRMFADALKVSMYVKVLSDKPYDLTYEVDGAGSSVTLHLlGKESDGHFIAA... 0 0 0 0 +5244 PF05416 Peptidase_C37 Southampton virus-type processing peptidase Studholme DJ anon Manual Family Corresponds to Merops family C37. Norwalk-like viruses (NLVs), including the Southampton virus, cause acute non-bacterial gastroenteritis in humans. The NLV genome encodes three open reading frames (ORFs). ORF1 encodes a polyprotein, which is processed by the viral protease into six proteins. 20.20 20.20 20.20 20.60 19.70 20.10 hmmbuild -o /dev/null HMM SEED 535 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.95 0.70 -6.21 5 408 2012-10-02 13:45:52 2003-04-07 12:59:11 7 5 351 10 0 397 1 490.50 74 30.25 CHANGED K.DFSHIKLpLAPQGGFD+pGNTPaGKGsMKslTtuuLlARAsALlHERpDEFQLQGs...sYDFDcNRVoAF+pMAADNGlGllcsMRlGs+LKGVTTlEELKsALKsacIusCQIlYpGusYolcSDGKGuVsV-K...l.spssQsssElusAL+RLRsARARsYsSCsQ-llTSIIQlAGoAFVVsRtVKRh.............................chhu+PshGasttVsctutCEo....E-D--saphpspctcsEG.KKGKNKKGRGKK..asAFSoRGLSDEEYDEYKKIREERGGKYSI-EYLpDR-RYEEELAEscAsE-DFs-EpEhcIRQRlFhh+sTRKpRKEERupLGLVTGSDIRKRKPIDWsPsGsLWADD-RpVDYNEKIsFEAPPSIWSRIVpFGSGWGFWVSPoLFIToTHVIPsGspEhFGVPIspIsIH+SGEFs+FRFPKsIRPDVTGMILEEGAPEGTVsSVLIKRsSGELlPLAVRMGTpASMKIQGRlVsGQoGMLLTGANAKuMDLGThPGDCGCPYlYKRGNDWVVhGVHsAATRSGNTVIC ...........s.sDFSHIKLsLAPQGGFDKNGNTPHGKGVMKTLTTGSLIARASGLLHERLDEFELQGPsLTTFNFDRNKVLAFRQLAAENKYGLhDTM+VG+QLKDV+TMPELKQALKsISIKKCQIVYuGsTYTLESDGKGsVKVDR...VQSso...VQTNNELuGALHHLRCARIRYYV+CVQEALYSIIQIAGAAFVTTRIIKRh.....................NIQ.-LWSKPQVEsTEEsTsKDGCPK.....PKD...s..E.EFVIS..SDDIKTEG.KKGKNKsGRGKK..HTAFSSKG..LSDEEYDEYKRIREERNGKYSIEEYLQDRDKYYEEVAIARATEEDFCEEEEAKIRQRIF..RPT..RKQRKEERASLGLVT.GSEIRKRNP-DFKPKGKLWAD...DDRSVDYNEKLSFE.APP.SIWS..RIVNFGSGWGFWVSPSLFITSTHVIPQGAKEFFGVPIKQIQVHKSGEFCRLRFPKPIRTDVTGMILEEGAPE.GTVsTLLIKRuTGE.LMPLAARMG.THATMKIQGRTVGGQMGMLLTGSNAKSMDLGTTPGDCGCPYIYKRGNDYVVIGVHTAAARGGNTVIC........................................................................................................ 0 0 0 0 +5245 PF05417 Peptidase_C41 Hepatitis E cysteine protease Studholme DJ anon Manual Family Corresponds to MEROPs family C41. This papain-like protease cleaves the viral polyprotein encoded by ORF1 of the hepatitis E virus (HEV). 27.00 27.00 32.50 39.30 26.70 26.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.97 0.71 -4.69 4 183 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 14 0 0 193 0 150.30 78 9.88 CHANGED AQCRRWLSAGFHLDPRVLVFDESsPCRCRThlRKsAuKFCCFM+WLGQECTCFLQPAEGlVGDQGHDNEAYEGSDVDPAEsAhhDISGSYlVsGRpLQsLYpALslPuDlsARAuRLTATVcVopssGRL-CcThlGNKTFpToFVDGApLEsNGPEQYsL .AQCRRWLSAGFHLDPRVLVFDEusPCRCRoFL+KsAtKFCCFMRWLGQ-CTCFLpP.sEGhVG-pGaDNEAaEGS-VDPAE.AplslSGoYhVpGpQLpsLYpALsIPpDlAARAuRLTATVElususGRLpC+TlLGNKTFpTshsDGApLEANGPEQYVL............................ 0 0 0 0 +5246 PF05418 Apo-VLDL-II Apovitellenin I (Apo-VLDL-II) Moxon SJ anon DOMO:DM08067; Family This family consists of several avian apovitellenin I sequences. As part of the avian reproductive effort, large quantities of triglyceride-rich very-low-density lipoprotein (VLDL) particles are transported by receptor-mediated endocytosis into the female germ cells. Although the oocytes are surrounded by a layer of granulosa cells harbouring high levels of active lipoprotein lipase, non-lipolysed VLDL is transported into the yolk. This is because VLDL particles from laying chickens are protected from lipolysis by apolipoprotein (apo)-VLDL-II, a potent dimeric lipoprotein lipase inhibitor [1]. Apo-VLDL-II is produced in the liver and secreted into the blood stream when induced by estrogen production in female birds. 20.10 20.10 20.10 59.90 18.70 17.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.87 0.72 -3.80 3 14 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 11 0 5 15 0 75.10 74 79.88 CHANGED KSIFERD.RRDWLVIPDAVAAYIYEAVNKMSPRAGQFLlDlSQTTVVSGTRNFLIREToKLTILAEQMMEKIKoLWNTKVLGY KSIFERD.RRDWLVIPDAlAAYIYEoVNKMSPRVGQFLsDsuQTsVVsGTRsFLIRETsKLolLAEQLMEKIKsLWYTKVLGY.. 1 0 0 2 +5247 PF05419 GUN4 GUN4-like Finn RD anon Manual Domain In Arabidopsis, GUN4 is required for the functioning of the plastid mediated repression of nuclear transcription that is involved in controlling the levels of magnesium- protoporphyrin IX. GUN4 binds the product and substrate of Mg-chelatase, an enzyme that produces Mg-Proto, and activates Mg-chelatase. GUN4 is thought to participates in plastid-to-nucleus signaling by regulating magnesium-protoporphyrin IX synthesis or trafficking. 21.30 21.30 22.60 22.50 20.70 21.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.88 0.71 -4.34 79 375 2012-10-11 20:01:00 2003-04-07 12:59:11 7 23 109 3 134 482 116 131.80 32 38.92 CHANGED LpphLtspc.......accADcpTtpl..........hlplAsc.....p...pcsalhhs-lpshPspDLpoIDpLWlpYSpG+FGFSVQ+cIa.pul..Gss....................acpFss+lGW.+......ps......sp.....Wh..p....Y.sphs.a.....sls..APpGHLP...............h.hhsthhst.shtt ..................LpphLtptpaccADpcTtpl..........hhplusc................p.......pc..salhh.p-lc..p...h.PspDLpsIDpLWhpaS.pG+..FGFSVQpcIa.ppl..Gsp.............................appFss+l..GW..+..........pp..................sp.......W.h..p......a..sphsa...............sls......AP.pGHLP......sh.h...........th.................................................... 0 20 91 130 +5248 PF05420 BCSC_C BCSC_N; Cellulose synthase operon protein C C-terminus (BCSC_C) Moxon SJ anon Pfam-B_10335 (release 8.0) Family This family contains the C-terminal regions of several bacterial cellulose synthase operon C (BCSC) proteins. BCSC is involved in cellulose synthesis although the exact function of this protein is unknown [1]. 23.10 23.10 23.10 23.80 22.90 23.00 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.87 0.70 -5.32 29 686 2009-01-15 18:05:59 2003-04-07 12:59:11 6 129 625 0 98 549 12 338.60 55 29.89 CHANGED hSpLsshpsPlpschPh..hsG+hhh+sssVplsAGohsss...............hspFGsssstsssst........................sQsssGlululGacsc....sapuDIGTTPlG.FphsslVGGlpassclu.s..huaslssuRRslTsSlLSYAGs+DP.................t..........o...GppWGGVspsGsphshoaDtGps.Glau.shuaphlsGcNVpcNpc..hchhsGhYhpllppscpp.lolGlshhhhpYc+N.uhaTaGpGGYaSPQpYhululPlpastRpsp.ao..aclpuSluhpph+pcusshaPtpshhpsth..........................sshasusSssGhu..YslpAtsEYRlusphhlGuslslppupsYs.spuhlYlRYshssh .........................................................................Y.SDLKAHTTM.LQVD.APa..uDGRhFFRoDhVNMssGoFSTsu...........DGpacssWGTCTLpcCS.Gsc...............................................................................................SQuDoGASVAVGWcN-.......sWp..aDIGTTPM.G.FN..VVDVVGG.lSYScDIG....P...LGYTlNAHRRPISSSLLAFGGQKDu...........................s...........T..GpKWGGVRAsGsGlSLS..YD+GEANGVWA.SLSGDQLoGKNVEDNWR..VRWMTG...YYYKVIN.....pN.N....R.R.V...TlGLNNMIWHYDKDLSGYSLGQGGYYSPQEYLSFAlPVhW.RpRT.EN..WS..WELGuSsSWSHSRs+ThPRYPLhN..LIPsD.app...A...................scQossGG.SSpGFG..YTARALlERRVTuNWFVGsAlDIQQA.KDYsPSHhLLYVRYSAAG.W........................... 0 12 33 68 +5249 PF05421 DUF751 Protein of unknown function (DUF751) Moxon SJ anon Pfam-B_10849 (release 8.0) Family This family contains several plant, cyanobacterial and algal proteins of unknown function. The family is exclusively found in phototrophic organisms and may therefore play a role in photosynthesis (personal obs:Moxon SJ). 21.40 21.40 23.00 23.00 21.00 21.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.82 0.72 -3.68 28 118 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 112 0 51 110 86 60.90 38 59.89 CHANGED FasNVhRYPRYhIohhLGlhhshhpPlhsLh+p.PloulsllhhlluslhhlshsL+AMlsh .FasNVsRYPRYhIohsLGlhhslhpPlhtLh.Ks.PloAlhlluhllushhhlhhsL+AMlG.......... 0 15 36 47 +5250 PF05422 SIN1 Stress-activated map kinase interacting protein 1 (SIN1) Moxon SJ anon Pfam-B_10677 (release 8.0) Family This family consists of several stress-activated map kinase interacting protein 1 (MAPKAP1 OR SIN1) sequences. The fission yeast Sty1/Spc1 mitogen-activated protein (MAP) kinase is a member of the eukaryotic stress-activated MAP kinase (SAPK) family. Sin1 interacts with Sty1/Spc1. Cells lacking Sin1 display many, but not all, of the phenotypes of cells lacking the Sty1/Spc1 MAP kinase including sterility, multiple stress sensitivity and a cell-cycle delay. Sin1 is phosphorylated after stress but this is not Sty1/Spc1-dependent [1]. 19.40 19.40 21.80 21.00 19.00 19.00 hmmbuild -o /dev/null HMM SEED 523 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.56 0.70 -5.99 4 397 2009-09-11 11:45:00 2003-04-07 12:59:11 7 8 204 2 264 433 0 250.20 17 63.17 CHANGED +lpoDDs.......GhC-hllls.Dh..................phc+.p.ss.sGGussphptop.................DLu-sp-ss.scsh-I..a.-hsh++.cssohpc.E+Lc..............................hu+psuh+hKs.shp-...phpssE.cphFp+pEl.........................................s.ssKsPhSuhpShLpcpLtcsstQhsN.htEau+FsGpup...sso++IslalPhpss.-...+..PLplplhsoA+lpElIGalhhQYs.pth-s.Lps...slstYsLalsEDDGEl-pDFPPLDsscPluKFGFosLuLVp+hptsssoppcp.hsh..hs+t...I..sN.ch.h+.h..pu.p.pcts.c.pts........hcYh.ptssp..ssl-lcsolpsAtshphshlpcNuuRuc..............................................................c-.p.phhshpDhhpu.happFhl.hhc.htF..+p-hpLuIsG-hlcI......ptSspha.p.KssSIsus.ls.hCclhEcp....Alh+hhh.pNpsoppasFts.uAshst.lss ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 82 134 215 +5251 PF05423 Mycobact_memb Mycobacterium membrane protein Moxon SJ anon Pfam-B_10885 (release 8.0) Family This family contains several membrane proteins from Mycobacterium species. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.89 0.71 -4.44 7 764 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 115 0 144 501 0 124.40 29 72.34 CHANGED hlKRhWIPLVIllVVsVuGhoV.RlHuhFGScppsohsss..psscPFNPKpVpYEVFGPsGThAsIsYhDssucPp+lsussLPWShslsTTLsAVhuNlVAQGsusSIGCRIhVsGsVKsERlssplpAaTaClVKSu ...............................................................s.........................................................s............shsP..+.pVsYEVhG..s..s..G..s..hs..s..lsY.h.....D.....s..s.u..ps...pphps....ssLPW..S..hsl..s...s..........s.........s.u....s.s.s...s.l..s..A..p..u...s.....u..s..s...l.sCRIo.V.s......GtVp.s.cp..s....p.t..ts.s.C........................... 0 21 83 126 +5252 PF05424 Duffy_binding Duffy binding domain Moxon SJ, Bateman A anon Pfam-B_11112 (release 8.0) Family This domain is found in Plasmodium Duffy binding proteins. Plasmodium vivax and Plasmodium knowlesi merozoites invade human erythrocytes that express Duffy blood group surface determinants. The Duffy receptor family is localised in micronemes, an organelle found in all organisms of the phylum Apicomplexa [1]. This family is closely associated on PfEMP1 proteins with PFEMP, Pfam:PF03011. 25.50 25.50 25.80 25.50 25.20 25.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.43 0.71 -4.46 409 7405 2012-09-24 07:47:50 2003-04-07 12:59:11 6 68 16 19 63 7892 0 148.80 32 63.35 CHANGED GACAP.RRh+lCsp.NLE.pl.........s....s.p.......s.........h.........ps.............scsLLscVhh.uAKaEGpslhppa.s......pp.................s...............s......op...............lCTsLARSFADIGDIlRG+Dlapsssp...............pccl-psL+pIF...pcIhcplsst...............hpppYpt.s...s....sa.hpLREDWWssNRcp.............VW+AlTCsus...p.....s.s.p.YFpt..s..s.s...........ss.t.....s..s.....p.....p.cC.........pp..s..s.............s.......p.................sPT...hD....YVPQaLR ................................................................................................................................................................................................................................................................................................................................oh.hDIGDI.l..R....G...+........D......La.....huspt.......................+ccL.-cpL.....Kc.IF.p.p.Iapplppt...................hpt.Yt.s....s.......sa.hpLR..EDWWs.s.N.Rp.p..................VWc.AlTCsst....p....s.t....p...YF.+t.s..ss................tt.......p..s....p...p.pC......ts......t...........................t....p......................sP.T..h.D....YVP.QaLR........................................................................................... 0 56 57 61 +5253 PF05425 CopD Copper resistance protein D Finn RD anon Pfam-B_19002 (release 8.0) Family Copper sequestering activity displayed by some bacteria is determined by copper-binding protein products of the copper resistance operon (cop). CopD, together with CopC, perform copper uptake into the cytoplasm [1]. 30.10 30.10 30.10 30.10 30.00 30.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.89 0.72 -3.83 210 1897 2012-10-01 21:57:53 2003-04-07 12:59:11 8 15 1406 0 513 1578 303 102.10 27 27.84 CHANGED ttshpRF....uphuhhuV...ssll....loGlh.h....hhh.........................h...sshhs..osYG.....hhLhsKluLhlshhslushpRh.....hlhPpl......................tsspsht.tlpphltlEhslulslls...lsusL .................h..shtRF....SthuhhuV.......ssll....loGlhs.u..hhh......hs.................h...sslhs...osaG......plLLhKhsLlhlhlsluhhs.Rh...............hlh.P+h................................................ttspsst.......h.hht.hs...t.l.E.hslus.llls...lsuh.............................................................. 0 124 301 422 +5254 PF05426 Alginate_lyase Alginate lyase Moxon SJ, Mistry J, Murzin A anon Pfam-B_11800 (release 8.0) Family This family contains several bacterial alginate lyase proteins. Alginate is a family of 1-4-linked copolymers of beta -D-mannuronic acid (M) and alpha -L-guluronic acid (G). It is produced by brown algae and by some bacteria belonging to the genera Azotobacter and Pseudomonas. Alginate lyases catalyse the depolymerisation of alginates by beta -elimination, generating a molecule containing 4-deoxy-L-erythro-hex-4-enepyranosyluronate at the nonreducing end [1]. This family adopts an all alpha fold [2]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild --amino -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.78 0.70 -5.04 59 980 2010-01-08 16:30:20 2003-04-07 12:59:11 7 39 535 10 303 910 259 247.60 17 47.74 CHANGED slss+hth.ssussppDhhuts.ah......sstps............sshshhp+-Gph.........tphtsstpthtphupss.....tsLuhsathstpttastt..ssch.l+sWhlsststhpsplphup.........................h..h..h..pttsshsspp.pthcp.....Whsphhchhhs.t...tupsctstp......NNHuhWhshpVsuhAhhhscpchhchsh.......pp.htpshh.sQls.sDG...shPpEltR.............s+uhpYssasLtslshhAph....upp.tG...DLat....stsutslt+uhcalhsh..st.s.ppa ..................................................................................................................................................................tsht.h.p.stts.....hthuLtahho..s...pp...p..Yupt.......utch.l.......ps..W...hh...s...t..s.t.h.s.s.phphs............................h.t.l.ht...............hh.sh.hh.......psh.ssh.s..s.p..p.hpth.pp............ahp.p....h.hp.......h.hp...........ttpt..htt......................sNa.shhtshthhuhu.lhh....s....cc....s....h.hppul..............................................pt...h....ht...t....t....h..........s.p.....l.......t..............s......G...............pphtR...............................tpshtYsha...sLtshshhuph....utp...s.......Dla......s.psttl.htuhphhh.h.......................................................................................... 1 102 191 258 +5255 PF05427 FIBP Acidic fibroblast growth factor binding (FIBP) Finn, RD anon Pfam-B_19083 (release 8.0) Family Acidic fibroblast growth factor (aFGF) intracellular binding protein (FIBP) is a protein found mainly in the nucleus that is thought to be involved in the intracellular function of aFGF [1]. 20.60 20.60 20.70 22.80 20.10 19.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.27 0.70 -5.22 7 124 2009-09-10 22:14:15 2003-04-07 12:59:11 6 4 87 0 83 127 1 291.70 43 86.60 CHANGED ss-lDVFluNhTllD.ElYQLWl-GYoso-AVslhhppGhlcphGssuDlltSDshDHYRTaphlE+hLHtPsKLhpQ.hF.QlsPpppuhLIE+YYuhD-uhsREllGKKLSpts+KDLD-lupKTGlpLKSCRRQFDNhKRlFKsVEEh.GsLspNI+QpFlLs-cLAccYAAlVFhAs.RFETsK+KLQYLoFpDFhhCuptlhppWThsh.csts-..hDh-hDKEFL.DL+El+hLlsccc.l...KpLVshpL+sphh..sapEh-uNF+oh.RullsIAssLp+s+-lRshFlDLsEKhIEPh+tssWspppVphFhsphTpSVhsL-s.Rc....psLW-RYMtslpsCLh+MYHs .........................................................h.s-lDlFluNhTllD.-l..YpLWl-GaSss-AVsh.hhp.....pG....h..h..pp..........h.....s.Ast...s.....lltSDshDHYRTFphl.E.+hLcsPsKLh..p.QhhF.QlsPpppthLIE+YYshD-shsRElLGKKLSptp+KD.........LD-lupK........TslpLKSCRRQFDNhKRlFKsV.E-....h................G..........s..LspNI.ppcFlLSc..cLA+cYAslVFhAs.RFET..sK+..K..LQYLoFtDhhhCup.hhp.WThsh.....................p...-......hDh-hD+-FL.-L+-L+.h.Ll...sccc.l...hKpLVshtL+s......h..l........shp-h-sN.F+.sh.RullslAspLp+s+-lRshFl....DLsE+hl-PhRpstWshpplphFLsthst.....s.s..hpl..-h..+c..............psh....Wc....RYMtsl.p.Cl.phYc...................................................... 0 38 49 66 +5256 PF05428 CRF-BP Corticotropin-releasing factor binding protein (CRF-BP) Moxon SJ anon Pfam-B_11928 (release 8.0) Family This family consists of several eukaryotic corticotropin-releasing factor binding proteins (CRF-BP or CRH-BP). Corticotropin-releasing hormone (CRH) plays multiple roles in vertebrate species. In mammals, it is the major hypothalamic releasing factor for pituitary adrenocorticotropin secretion, and is a neurotransmitter or neuromodulator at other sites in the central nervous system. In non-mammalian vertebrates, CRH not only acts as a neurotransmitter and hypophysiotropin, it also acts as a potent thyrotropin-releasing factor, allowing CRH to regulate both the adrenal and thyroid axes, especially in development. CRH-BP is thought to play an inhibitory role in which it binds CRH and other CRH-like ligands and prevents the activation of CRH receptors. There is however evidence that CRH-BP may also exhibit diverse extra and intracellular roles in a cell specific fashion and at specific times in development [1]. 25.00 25.00 25.30 25.20 19.00 18.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.95 0.70 -5.59 7 97 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 73 0 52 94 0 264.40 44 90.03 CHANGED Mpsuucs.................phthl.L.lsshtupsRh.t.pssustshhhLhsuslpRE.p.-..a...hcChshhuh.G.aha.ust.t.sCuhalIuEPDpllpIchcpsslsCpsGshl...........................................................tlhDGW.LpGEhFPuppDH.LPh.cRhs-aCss......hss++hhRSSQNsAhl.aRl.s.GsuFohsVRhh.NshPCNllsQsspGhFTMss.tQtRNCSh.SllaPsslclssLplGt...pslthp..s.hsCtssGDhVElhGusGLDsupMhshuslChshss....sp.pIhCssosVRhVSoGpa.Npssh.hRphD..-h-hsp ..............................................................................................................t.p......t..aph.hcC.hphhuh.G..Fhahu.p.....ps.phhCuhahIuEP-phIplchc.lslsCps.Gshl...........................................................p.lhDGW.LpGEhFPSspDHsLPhp-RYhDaCss.................s.s++shRSSQNVAMlhFRltpsGsuFolol+p..N.hPCNllSQ..ossGtaThssstQ+RNCSa.SllYPsslplucLslGph............ps.lp..h...p....c......s.....hsCt..t...u..DaVElhGusGlDso.pMh.huslChshpss...sp.plsCssosVRhVSSG+a.NplohphR.l-..-lp...p............ 0 10 15 33 +5258 PF05430 Methyltransf_30 DUF752; S-adenosyl-L-methionine-dependent methyltransferase Moxon SJ, Eberhardt R anon Pfam-B_12088 (release 8.0) Family This family is a S-adenosyl-L-methionine (SAM)-dependent methyltransferase. It is often found in association with Pfam:PF01266, where it is responsible for catalysing the transfer of a methyl group from S-adenosyl-L-methionine to 5-aminomethyl-2-thiouridine to form 5-methylaminomethyl-2-thiouridine [1,2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.20 0.71 -4.45 17 1653 2012-10-10 17:06:42 2003-04-07 12:59:11 6 6 1635 15 373 1468 553 123.70 41 22.62 CHANGED schtphtctLhppash...h.Ghhclths.stssLcLhhGDs.pptlspls....tpsDAWFLDGFuPs+NP-MWs.plhptluchscssGshuTaouAuhVR+uLhpAGFp.lp+p.GFGpKREhlpushs ........................................................pht.huppL.tpa....Phs.......hsG.....h.....a.Rl..hhs......t...scl.....sL.....-LhaG..D.h.pc.h..l.spLs....................tplDAWFLD......G.FAP....u.K...N..P...-.....MWops.....LFsthA....+L.u.+...s.....G..u...TlA..TFTu......A..G..h.....VR..RuLppAGFs.h.p.K.....t.+.GF.G.pKREMLpGhh..................................... 0 84 217 310 +5259 PF05431 Toxin_10 Toxin_P42; Insecticidal Crystal Toxin, P42 Finn RD anon Pfam-B_19338 (release 8.0) Family Family of Bacillus insecticidal crystal toxins. Strains of Bacillus that have this insecticidal activity use a binary toxin comprised of two proteins, P51 and P42 (this family). Members of this family are highly conserved between strains of different serotypes and phage groups [1]. 28.60 28.60 28.70 32.90 27.80 28.10 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.40 0.71 -4.67 7 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 11 0 3 42 0 196.30 38 50.73 CHANGED PphPp.sopLpshPcho...sIs.sGs.Pp.....lhG.TLIPCIhVsD.sI.shssphKpoPYYlLc+hQYWpphhuu..hlsstp+pohp.chGhspsppsohlsslshpIssD...thpF.....GstTssh+pQlsppLph.hSpssp.htchpcph-hsNsp.stshphstFlhtppapLhRhNGo.lps..hhshDpchhslpoYPs ......................PphPp..sppL.shPcho...sID..Gh.Ppps.pslhGtTLlPCIMV.sDssl....sp...ss+hKsoPYYllc+ppYW..pphhuu..lls....stpccohs.csGlscssQsoMhsslshpIGuD..hGh+F............GstTpsIKsQloc-LphphSpoop.htcpsEppEhsNss.shshpasuallsocapLpRssGoplps..hhshDpchhsltoYPp............. 0 2 2 3 +5260 PF05432 BSP_II Bone sialoprotein II (BSP-II) Moxon SJ anon Pfam-B_12103 (release 8.0) Family Bone sialoprotein (BSP) is a major structural protein of the bone matrix that is specifically expressed by fully-differentiated osteoblasts [1]. The expression of bone sialoprotein (BSP) is normally restricted to mineralised connective tissues of bones and teeth where it has been associated with mineral crystal formation. However, it has been found that ectopic expression of BSP occurs in various lesions, including oral and extraoral carcinomas, in which it has been associated with the formation of microcrystalline deposits and the metastasis of cancer cells to bone [2]. 25.00 25.00 25.00 40.00 24.90 24.50 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.25 0.70 -5.13 4 53 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 33 0 23 49 0 263.80 57 94.51 CHANGED FSMKNapRRAKhEDSEENGVFKYRPRYYLYKHAYFYPsLKRFsVQuuSDSSEENGsGDSSEEEtEEE.sSNEEtN...NE-S-uNEDE-uEAENhTLSosThGYGu-sTsGTusIGLAAlpLP+KAGshtpKusKc-ESDE-EEEEE...EpEEpEAEV--NEQusNGTSTNST.EVssGNGoSGG....DNGEE.GEEESVTEAtsEGTTVA.......u.TTss..sGhpsTTP.pElhGTTsPPhuKsTTst..GEYEQT.GspEY-ssYEIY-uENGEP.RGDsYRAYEDEYSYYKGRGY-uYsGQDYY .........................FShKNhHRRsKlEDSEENGVFK..aRPRYYLYKHA.YFYPPLKRFsVQsu.SDSSEEsGDGs.S.pEEEtE.EE..TSNE.cEN.....NE-...Spt..NE..DE-..uEu..ENoTLSs...oT.uY.Gt.-sTsGsG.hGL.A.Al......plPKKu.Gt..htpK.us+p.cESD.E......-..EEEEE.....tENEcpEAE.VDENEQGlNGTSTNST.Es..-sGNGuuGG.........DsGEE..tEE.ESVTEAssEuT..Tss.......scTTssssuGapsTTPsp.ts...YtTTsP..PhtcsTTstYts.EYEpT...GsNEYDsGYEl.....Y-.sENGEP.RGDsYRAYEDEYS..Y...YKG+GYDuY-GQDYY........... 0 1 2 5 +5261 PF05433 Rick_17kDa_Anti Glycine zipper 2TM domain Moxon SJ, Bateman A anon Pfam-B_13382 (Rel 8.0) & Pfam-B_6 (Rel 24.0) Family This family includes a putative two transmembrane alpha-helical region that contains glycine zipper motifs [2]. This family includes several Rickettsia genus specific 17 kDa surface antigen proteins [1]. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild --amino -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -9.09 0.72 -4.29 174 3882 2012-10-03 03:18:43 2003-04-07 12:59:11 10 13 1511 0 736 2475 469 41.70 44 28.29 CHANGED hGshhGAssGulhGstl.ut......psthhusll.GAssGuhlGstlu ..............GsllGAlsGGllGspl.Gu.......GsGp.s..l.uTlu.GAlsGGlsGpplt................... 0 114 335 536 +5262 PF05434 Tmemb_9 TMEM9; TMEM9 Moxon SJ anon Pfam-B_12447 (release 8.0) Family This family contains several eukaryotic transmembrane proteins which are homologous to human transmembrane protein 9 Swiss:Q9P0T7. The TMEM9 gene encodes a 183 amino-acid protein that contains an N-terminal signal peptide, a single transmembrane region, three potential N-glycosylation sites and three conserved cys-rich domains in the N-terminus, but no known functional domains. The protein is highly conserved between species from Caenorhabditis elegans to man and belongs to a novel family of transmembrane proteins. The exact function of TMEM9 is unknown although it has been found to be widely expressed and localised to the late endosomes and lysosomes [1]. Members of this family contain Pfam:PF03128 repeats in their N-terminal region. 24.80 24.80 25.50 24.90 24.50 24.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.93 0.71 -4.46 6 196 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 89 0 107 165 0 131.20 46 75.14 CHANGED PY+chsG+Iapp.........NVo.KDCNCLHVV-P...hPV.G+DVEAYCLcCECKYEERSooTIKVsIIIYLSllGhLLLYMlFLhllDPLL.RKhssao..lpo-E-s-DspPhssAtsslstuRucsNVLNRVEaAQpRWK+QVQEQRKoVFDRHshL .......................................uplY.pp.........Nl.o..pc-......C...............sChcVV.cP....M.PV..GtD...lEAYCLhCECKYEERSosTIKVslIIaLollGhLLLYMlaLhLl-Pll...R+........sh.s.p.l..ps--...........-...........s..........tDt..ps.hss...............u...t....s........s..........sts...tu..ssVLp+V-htQpRWKhQVpEQR+slFDRHhhL........................................... 0 27 36 67 +5263 PF05435 Phi-29_GP3 Phi-29 DNA terminal protein GP3 Moxon SJ anon Pfam-B_14111 (release 8.0) Family This family consists of DNA terminal protein GP3 sequences from Phi-29 like bacteriophages. DNA terminal protein GP3 is linked to the 5' ends of both strands of the genome through a phosphodiester bond between the beta-hydroxyl group of a serine residue and the 5'-phosphate of the terminal deoxyadenylate. This protein is essential for DNA replication and is involved in the priming of DNA elongation [1]. 25.00 25.00 449.40 449.30 21.70 20.60 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.70 0.70 -5.18 3 7 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 6 0 8 0 265.90 66 100.00 CHANGED MARsSRIRITcNDKAEYAKLVKNTKAKIoRTKKKYGlDLSuEIslPsLESFcTREQFNKWKc+AESFTNRANpcYQFVKNcYGVVASKAKIsEIt+NTKEAQRlVDEpl-chcDKEaISGGKsQG.TVGQRhplLSPucVTGIuRPcDFDFsKVRTYuRLRTLEEuMccRosPsYYE++MpQLQpNFI+.SVEGSFNS..tADELVE+LKKIPPDDFYELFLhasEISFEpFDSEGssVEASEuhLpKI+SYL-cYcRGDsDLSLKGF MARssRIRIpsNDKA.YA+LVKNTKAKIuRTKKKYGlDLosEI-lPsL-SFpTRtpFNKWKcptpSFTNRAN.pYQF.KNtYGlVASKAKIsEIt+NTKEsQRlVDEphcthcDK.ahuGGK.QG.TltQRhthhSPupVTGIsRPpDFsFscVRSYuRLRTLEEuMt.+ssPpYY-++MhQhp.NFIc..VEtSFNS..huDELlEcLKKIPPDDFaELaLhhsEISFE.FDSEGpsVEssEu.l.KIhSYL-pYcRGDhsLsLKGF 0 0 0 0 +5264 PF05436 MF_alpha_N Mating factor alpha precursor N-terminus Moxon SJ anon Pfam-B_12643 (release 8.0) Family This family contains the N-terminal regions of the Saccharomyces mating factor alpha precursor protein. All proteins in this family contain one or more copies Pfam:PF04648 further toward their C terminus. 21.90 21.90 25.30 25.50 20.00 18.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.85 0.72 -4.30 4 57 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 33 0 25 50 0 85.40 49 57.30 CHANGED MKF.ohhoAsllAAsShsAs..ts.o---ou.pIPAEAlIGYLDLtGDhDIAVlPFSNuTssGLLFlNTTIsshApKEpussLsKRE .........M+Fs.Slhos.hhhAAs.S.shAu..s.....t..spo--..-tA.....plPAEAlIGYLDLtGDpDlAlLPF..SNuTssGLLFlNTTIAptA....tc.E...ps...s.oLsKRE.............. 0 2 12 20 +5265 PF05437 AzlD Branched-chain amino acid transport protein (AzlD) Moxon SJ anon Pfam-B_14345 (release 8.0) Family This family consists of a number of bacterial and archaeal branched-chain amino acid transport proteins. AzlD is known to be involved in conferring resistance to 4-azaleucine although its exact role is uncertain [1]. 22.70 22.70 22.70 22.90 22.60 22.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.19 0.72 -4.06 189 3465 2009-09-11 13:40:13 2003-04-07 12:59:11 7 2 2847 0 681 2161 1469 96.30 21 90.16 CHANGED hhlhIlshulsTalsRhlshhlhtp.hplsshlp...phLphlPs....ulLsALlssslh.....hsttth......ssspll...Aslsshllshhp+.s....hlhollsGhsshhllp ......................hlhllhh.u.l...sTalhRhlPhhlhpp...h....t....h.s....s......hht...........thLphlPh.......ullsulhhsslh.............hst.t.t.................thspll..uhhsss.hlth.h..p+.s....ll.lsllsGhlshhll................... 0 196 408 553 +5266 PF05438 TRH Thyrotropin-releasing hormone (TRH) Moxon SJ anon Pfam-B_14384 (release 8.0) Family This family consists of several thyrotropin-releasing hormone (TRH) proteins. Thyrotropin-Releasing Hormone (TRH; pyroGlu-His-Pro-NH2), originally isolated as a hypothalamic neuropeptide hormone, most likely acts also as a neuromodulator and/or neurotransmitter in the central nervous system (CNS). This interpretation is supported by the identification of a peptidase localised on the surface of neuronal cells which has been termed TRH-degrading ectoenzyme (TRH-DE) since it selectively inactivates TRH [1]. TRH has been used clinically for the treatment of spinocerebellar degeneration and disturbance of consciousness in humans [2]. 27.80 27.80 27.80 28.40 25.30 27.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.17 0.70 -4.94 9 77 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 52 0 28 66 0 185.50 44 86.80 CHANGED shlllLhuL.....sVstu.u....hQtpshsucspsuhD..-lLp+AEp.LLLRShLpphp--pst..ss....Qs-WlpKRQHPGKR.pc-.......................cKRQHPG+R--p........................tp.ts.hsp.+RQHPGKR..ht.....h.-pP...hhhS-loKRQHPG+Ral.....KRQHPG+RphcEEst..............lEKRQHPGKRh...........t.C-shsssGC..uul.L ..................hhl.lhhulh.s.hslstups....h.tps.httpt...-...lLppsct.lhLpp.lpphptcptt...st......t.pWlsKRQHPGKR.p--...................cKRQHPG+R--p.......................................s.s..htst.ss.pKRQHPG+R............sP....hLt.sloKRQHPGRRhlss...KtQ+shccct...--Epc..tthhs................EKRQHPGKRhh.........ssPCss.sssup..sth.............. 0 4 6 13 +5267 PF05439 JTB Jumping translocation breakpoint protein (JTB) Moxon SJ anon Pfam-B_14502 (release 8.0) Family This family contains several jumping translocation breakpoint proteins or JTBs. Jumping translocation (JT) is an unbalanced translocation that comprises amplified chromosomal segments jumping to various telomeres. JTB, located at 1q21, has been found to fuse with the telomeric repeats of acceptor telomeres in a case of JT. hJTB (human JTB) encodes a trans-membrane protein that is highly conserved among divergent eukaryotic species. JT results in a hJTB truncation, which potentially produces an hJTB product devoid of the trans-membrane domain. hJTB is located in a gene-rich region at 1q21, called EDC (Epidermal Differentiation Complex) [1]. JTB has also been implicated in prostatic carcinomas [2]. 20.90 20.90 21.00 20.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.77 0.71 -4.30 4 94 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 76 1 60 88 0 108.80 36 67.73 CHANGED A-uPs+cEKlssppsspsCWhsEcFsVspECuPCSsFcI..Kohs..cChpTGYhE+lsCSsScps.hRSC.RSALh.Ep+.FaKFEsssssluLl.hhlshhRpRQL-R+sh.+lc+Ql ...................................h.p..ss..tsCWhh.EcasVspECpPCosFph..+ohs...tCspT.GYhEplpCs..sSc...p....h+S......C.Rsuhh.Ep+hFWKFEusshslull.shlVhhRpR.L-R+shp+lc+Ql...................... 0 22 28 45 +5269 PF05440 MtrB Tetrahydromethanopterin S-methyltransferase subunit B Moxon SJ anon Pfam-B_15021 (release 8.0) Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 19.40 19.40 19.40 54.20 19.20 19.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.26 0.72 -3.95 5 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 53 0 38 51 5 96.10 35 92.20 CHANGED olV+IAPElsLVMDs-TGsVuctp.cDlIhlSM-PlaE+l-KLEulADDLlNSLDPSpPsLNSaPGREGVahhAGhaoNuFYGFllGLuIsuLLALIL .........hVplsPEhsLVhDspoGlluptt.c-llhlohsPl.-plscL-phAD-LhNSLDPcss.lpSaPGREGshhhAGhhoshaaGFhlGLhl.hsLlshh.h....... 1 8 24 32 +5271 PF05443 ROS_MUCR ROS/MUCR transcriptional regulator protein Moxon SJ anon Pfam-B_1733 (release 8.0) Family This family consists of several ROS/MUCR transcriptional regulator proteins. The ros chromosomal gene is present in octopine and nopaline strains of Agrobacterium tumefaciens as well as in Rhizobium meliloti. This gene encodes a 15.5-kDa protein that specifically represses the virC and virD operons in the virulence region of the Ti plasmid [1] and is necessary for succinoglycan production [2]. Sinorhizobium meliloti can produce two types of acidic exopolysaccharides, succinoglycan and galactoglucan, that are interchangeable for infection of alfalfa nodules. MucR from Sinorhizobium meliloti acts as a transcriptional repressor that blocks the expression of the exp genes responsible for galactoglucan production therefore allowing the exclusive production of succinoglycan [3]. 23.80 23.80 24.50 24.60 23.60 23.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.56 0.71 -4.38 127 729 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 320 1 355 661 38 123.90 40 78.76 CHANGED tsstsllcLTA-IVuAYVS..sNslsss-LPsLIpsVasuLs..............slss...ss....t..ts.....p..................c.pP.AV......s..l+KSl.pPDhllCLEDGK.taKoL.KRHLpocaulTP-pYRpKWGLPsDYPMVAPsYAtpRSpLAKphGLGp ..........................................................s....thlpLsu-IVuAaVu..s..Nslsss-LPsLItpVasuLt......................................plstst.........t..........p.............................................................................p.pP.AV.....s.l+KSl.psDhIlCLEDGK.paKoL.KRHLto+ashT.P-pYRpKWGLP.s-YPMVAPsYAtpRScLAKphGLGp....................................... 0 93 242 268 +5272 PF05444 DUF753 Protein of unknown function (DUF753) Moxon SJ anon Pfam-B_1957 (release 8.0) Family This family contains sequences with are repeated in several uncharacterised proteins from Drosophila melanogaster. 21.00 21.00 21.20 21.20 20.40 20.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.89 0.71 -12.93 0.71 -4.33 80 560 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 25 0 372 539 0 130.60 24 85.02 CHANGED hpCsosp.s.ssC...tssshst.sppCpst.t......C.hotl........ssss.......htRGChscht........pssCsss.ts.....CpsC.su..s.sCNs.t.hstsR.hpCapCs....sssC....tssss.psphC.ha.....pts-p...Casph....ssshspRGChosh...............ppt..ssssppChh..Csss ............................h..Cpspp..s.sp..C.....ss.sh.hppCpst.t.......C.hohh..........ps.st............htRGChssl.t..t......tttCsss......t...sC.ttC...su....s.....tCNs..t.h.......stsp..hpChpCs......sssC.......ss....tst.C.hh.....t.tpt...Chh.h.....ts..h.hGC.sp..................................s..................... 0 48 79 267 +5273 PF05445 Pox_ser-thr_kin Poxvirus serine/threonine protein kinase Moxon SJ anon Pfam-B_1974 (release 8.0) Family \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.67 0.70 -5.69 9 74 2012-10-02 22:05:25 2003-04-07 12:59:11 6 1 50 0 0 102 4 427.40 67 96.40 CHANGED ssh-tpa..hpspcps-TslLG.......DsIYFcalhSQlDhppsWu.Psl+ll+YF+s.Fs+psls+Iscp-YlNPSaFQ.KDcRFYPhNDDFYHlSTGGYGIVF+h.DcYVVKFVaEP.sppapPh-sTuEaTIP+FLYNNLKG...DE+cLlVCAhAMGlNYKlsFLasLY+RVLahLLLlhpIhDsp.Lsl.....paS++hhLKhFs-.+Kss.cFVKLlSYFYPhVlpSNlNVI..N.FsahhaF.......FEHEKRusYhY-RGNIIIFPLA+CSA-Klo.phstchGFt...........SlscYlKFlFLQluLLYIKIYELPsCsNFlHlDLKPDNILlFDS..pcslpIphsspsaVFcE.l+ssLNDFDFSQVusIh.NKKIKsSl+lE.pNWYYDFHF....FsHTLh+sYPEIp.pD.tFspuLpEahh..CpsKsTC-KFRL+VShLHPISFLtcFlt+s.lFSsWIN ............................................................................sss...EaQW..hSscchsDTVILG.......DslYFs.IhSQLDl+QsWA.PsVRLLpYFKN.FN+ETL.KIp-s-YINsSFFQQKDKRFYPlN.DDFYHISTGGYGIVFKI.DpYVVKFVFEs.sKhYS..PMETTAEaTVPKFLYNNLKG...DEKKLIVCAhAMGLNYKLTFLHoLYKRVLaMLLLLIQTMDGQcLSL.....+aSpKsFLKuFNE.RKDSIKFVKLLSaFYPuVIpSNINVI..NY..Fs+MFHF.......FEHEKRoNYp.Y-RGNIIIFPLAhaSA.DKVssEh..Al+LGFK................SLVpYIKFIFLQMALLYIKIYELPCCcNFLHsDLKPDNILLFDS...sEPIh.I.+lpc.ppFVFN...E.IKu.ALNDFD.....FSQVA.........uIl.NKKI.K.N......shKVE.HNWYYDFHF....FlHTLL..KTYPEIE.+DhEFosALEEFIM....C.oKosCDKaRLKVSILHPISFLE..KFIhRD.IFScWIN................................................... 1 0 0 0 +5276 PF05448 AXE1 Acetyl xylan esterase (AXE1) Moxon SJ anon Pfam-B_4814 (release 8.0) Family This family consists of several bacterial acetyl xylan esterase proteins. Acetyl xylan esterases are enzymes that hydrolyse the ester linkages of the acetyl groups in position 2 and/or 3 of the xylose moieties of natural acetylated xylan from hardwood. These enzymes are one of the accessory enzymes which are part of the xylanolytic system, together with xylanases, beta-xylosidases, alpha-arabinofuranosidases and methylglucuronidases; these are all required for the complete hydrolysis of xylan [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.90 0.70 -5.94 11 1046 2012-10-03 11:45:05 2003-04-07 12:59:11 7 10 743 111 195 2479 716 275.50 29 81.06 CHANGED MtlhDhsLp-LppY+upsstPcDFs-FWspsLcEhpplsschcLppsDaplstVcsY-LTFsGhsGu+IcuhhlhP+.sctphPsllcFHGYsus.sG-hp-hLpasusGaushuMDsRGQG.....GtSpD.sssssussh.GahhRGlh-..t-phaYRcVFLDssphl-llhShspVDccRluVhGuSQGGuLALAsAALss+.lKtlls.YPFLSDFcRsh-lsh.pcsYpEltpYF+.hssp+E+EpclFpTLuYhDlhNLAcRlKusVLhulGLhDplCPPST.FAAYN+lpsc.K-lclYs.auHEshssa.p-phhpaLpcl .............................................................................................................................htst....Pt.D.F-tFWpt.phpph...............p.....t..h........p......h...p..h..p.....h.....-..h....t....h...s........t.l..cs.a......c...l....s..a..p..u......h................t........s......s......p......lhuhl...h.h....P........c....................s.....p......t...t.........h......P..h...l.l....p........a..h...G............Y......h..s.................p.....t......h...................h....p.........h........l..............t..........a..........s....s..........t.....G.....a...u.s.lshDs......R..G..Qu..........................sho...pD......ts.........s...s..s....h.t......G...a..h.....h.......c...G....h....s..............-..p.h...a....a......+....c....l....a.........h......D.h.h.p....hl.-.h.l.t.s.h..s...p.....l...D...t.p......+........lushGs.SQG.G.ul.u...l.s.....s...u......u..........L..........s....s..........c.....l...pt......s...hs......h..P.....h...L......s...c....h..p..c.s...h.p....h..s.s........tt.s...Y....p..l..h...p...ah..p........s.....h.....p..t..p........t..phh.t.s..L..u.Yh.Ds..hph..A..p..+l.....p...s...s..h..h..sGh.DplC.......P.....o...aA....hYNtl.............s...s..............t......K...p...hh..l.h......huHtshtsh.....t.h......t.................................................................................................................... 1 79 144 172 +5277 PF05449 DUF754 Protein of unknown function (DUF754) Yeats C anon Yeats C Domain This domain appears to be found in a group of prophage proteins. 25.00 25.00 27.70 29.70 23.30 21.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.71 0.72 -3.96 13 238 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 203 0 35 191 1 82.60 35 88.67 CHANGED llsAllCusIslRL.lsY.RRsGARHRhhsShlAYlLslssGstslpllhu.....phtssuhhcshltslLslhVhpARGNVA+ll+ss ...........hlNshlCssIslpL.hhY.RR.s.suRHRshhSaLAalLhlshussPlphlhG.....ph.sssahtlhlNllhsshVlpARGNVu+lhc..t... 0 3 9 20 +5278 PF05450 Nicastrin Nicastrin Moxon SJ anon Pfam-B_15452 (release 8.0) Family Nicastrin and presenilin are two major components of the gamma-secretase complex, which executes the intramembrane proteolysis of type I integral membrane proteins such as the amyloid precursor protein (APP) and Notch. Nicastrin is synthesised in fibroblasts and neurons as an endoglycosidase-H-sensitive glycosylated precursor protein (immature nicastrin) and is then modified by complex glycosylation in the Golgi apparatus and by sialylation in the trans-Golgi network (mature nicastrin) [1]. A region featured in this family has a fold similar to human transferrin receptor (TfR, Swiss:P02786) and a bacterial aminopeptidase (Swiss:P80561). It is implicated in the pathogenesis of Alzheimer's disease [4]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.59 0.70 -5.12 6 234 2012-10-02 19:46:12 2003-04-07 12:59:11 10 9 139 0 136 395 62 196.70 31 32.33 CHANGED +lllVosRhDosohFcslulGA.SslsuhlslLuAAcuL....p+h.ss.Ss.p+NVlFlhFNGEoaDYIGSpRFlY-h-pGpFP.........stsIu.ssIDhhLElGsluptpu.t...hahH....utpspolsspsh-hLpphpcShuuss.tlh.sssssstlPPoShpSFLRcssshsuVlLssasspa.N+aYHSIhDcs-Nls.......................ps....s..thplsssAollAtuLY .........................hlhsssphD..otoh.......F...........s.....l.....u.s..GAposluuhlshLusAcsL............pch......s...s.h.....s...p.h.........+...slhFs.hFpGE.sa......s.YlGSp....RhlaD...h.cp..s.phs........................................l....p.......h.......csI-...t....hlEls...pluht.ss.t........lahHss....s.t.ps...ps..h....tspl.ch...l......ssl.........ppu..h..shs.....s.h.hhh.......t...s...s...................lPP.o.S.hppFL+......t....p...........s......hs..u....ll.l.ssa..t.t.t.a..N.p.a.YpShhDs.p.tlt................................................................................................................................................................ 2 50 74 112 +5279 PF05451 Phytoreo_Pns Phytoreovirus nonstructural protein Pns10/11 Moxon SJ anon Pfam-B_15695 (release 8.0) Family This family consists of Phytoreovirus nonstructural proteins Pns10 and Pns11. Genome segment S11 of rice gall dwarf virus (RGDV), a member of Phytoreovirus encodes a putative protein of 40 kDa that exhibits approximately 37% homology at the amino acid level to the nonstructural proteins Pns10 of rice dwarf and wound tumour viruses, which are other members of Phytoreovirus [1]. 19.70 19.70 20.00 20.40 19.50 18.10 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.08 0.70 -5.79 4 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 1 17 0 330.40 43 93.21 CHANGED MDssT-phlpLHh-llspcpGapIlpKaDAI+KLpL....stcuspsNISpAAhsplppaupppEAYlASDLs-R-l.ssshhKAlIFVPtSVlssGK..KDLlPYGVlssslIaVPETholLDslp.hlsp...++PlSshllhchh+slph-VlGSsYDoaYYCssScYGKNLIKhusshPs.PpplRLSlsDLshsAhppsHshssphl.phhs+hPsGFpPKsHlL+I.hssh-MEhhcphVpp.huccP.stFpYsDppNlLpRTThFSsscsFSahILWRGW.STYKEhLSQDQlopFhtt.GsuGshssahhTaPShF-EGslYlpYcah...TP....put+uts.hPshsph ............MDs-TpRllpLHs-llppcpGapIloKaDAI+Kl+L....sssussNNlSpAALsKLpcaAspuEAYlsSDLs-Rsl.ssslhKAlIFss+SVlosGK.lKDllPYGVhsusllalPETholLDsI..hlupp..++PhohsllhphsKslph-llGssYDoaYYCsuStYGKNLl+husthPshPsplRLSlGDLsYhAApShHshAspYl.+lFs+LPsGFoP+SHLh+I.hslL-M-pFKchVpphhA+ps.stFpY-DsKNlLpRsshFSsDHpYotlILWRGWuSTYtEhLSQ-QlophpupsGhAGDLGhaphTlsShF-EGtVYlpY+Fh...TPt........ptsSt+utslYPsL.th........ 2 0 1 1 +5280 PF05452 Clavanin Clavanin Moxon SJ anon Pfam-B_15887 (release 8.0) Family This family consists of clavanin proteins from the haemocytes of the invertebrate Styela clava, a solitary tunicate. The family is made up of four alpha-helical antimicrobial peptides, clavanins A, B, C and D. The tunicate peptides resemble magainins in size, primary sequence and antibacterial activity. Synthetic clavanin A displays comparable antimicrobial activity to magainins and cecropins. The presence of alpha-helical antimicrobial peptides in the haemocytes of a urochordate suggests that such peptides are primeval effectors of innate immunity in the vertebrate lineage [1]. 20.90 20.90 21.20 28.70 20.70 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.81 0.72 -3.99 2 6 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1 0 0 7 0 70.50 82 100.00 CHANGED MKThILILLILGLGIsAKSLEEpKu-EEKhh+hlGplIHtlGphVathuhshGDDQQDNGKFYGaYAEDNGKHWYDTGDQ MKTTILILLILGLGINAKSLEERKSEEEKlF+hLG+IIHHVGNFVHGFSHVFGDDQQDNGKFYGaYAEDNGKHWYDTGDQ 0 0 0 0 +5281 PF05453 Toxin_6 toxin_6; BmTXKS1/BmP02 toxin family Moxon SJ, Lee SC anon Pfam-B_16009 (release 8.0) Family This family consists of toxin-like peptides that are isolated from the venom of Buthus martensii Karsch scorpion. The precursor consists of 60 amino acid residues, with a putative signal peptide of 28 residues and an extra residue, and a mature peptide of 31 residues with an amidated C-terminal. The peptides share close homology with other scorpion K+ channel toxins and should present a common three-dimensional fold - the Cysteine -Stabilised alphabeta (CSalphabeta) motif [3]. This family acts by blocking small conductance calcium activated potassium ion channels in their victim [1,2]. 25.00 25.00 41.20 40.90 19.20 19.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.59 0.72 -3.71 5 25 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 10 5 0 30 0 28.00 61 66.19 CHANGED VGCEECPhHCKGKpAKPTC-Nu..VCsCNl ..VuCE-CP.HCpsKsA+spCDNs..hCsCps. 0 0 0 0 +5282 PF05454 DAG1 Dystroglycan (Dystrophin-associated glycoprotein 1) Moxon SJ anon Pfam-B_15784 (release 8.0) Family Dystroglycan is one of the dystrophin-associated glycoproteins, which is encoded by a 5.5 kb transcript in human. The protein product is cleaved into two non-covalently associated subunits, [alpha] (N-terminal) and [beta] (C-terminal). In skeletal muscle the dystroglycan complex works as a transmembrane linkage between the extracellular matrix and the cytoskeleton. [alpha]-dystroglycan is extracellular and binds to merosin ([alpha]-2 laminin) in the basement membrane, while [beta]-dystroglycan is a transmembrane protein and binds to dystrophin, which is a large rod-like cytoskeletal protein, absent in Duchenne muscular dystrophy patients. Dystrophin binds to intracellular actin cables. In this way, the dystroglycan complex, which links the extracellular matrix to the intracellular actin cables, is thought to provide structural integrity in muscle tissues. The dystroglycan complex is also known to serve as an agrin receptor in muscle, where it may regulate agrin-induced acetylcholine receptor clustering at the neuromuscular junction. There is also evidence which suggests the function of dystroglycan as a part of the signal transduction pathway because it is shown that Grb2, a mediator of the Ras-related signal pathway, can interact with the cytoplasmic domain of dystroglycan. In general, aberrant expression of dystrophin-associated protein complex underlies the pathogenesis of Duchenne muscular dystrophy, Becker muscular dystrophy and severe childhood autosomal recessive muscular dystrophy. Interestingly, no genetic disease has been described for either [alpha]- or [beta]-dystroglycan. Dystroglycan is widely distributed in non-muscle tissues as well as in muscle tissues. During epithelial morphogenesis of kidney, the dystroglycan complex is shown to act as a receptor for the basement membrane. Dystroglycan expression in mouse brain and neural retina has also been reported. However, the physiological role of dystroglycan in non-muscle tissues has remained unclear [1]. 22.00 22.00 22.10 22.00 21.60 21.90 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.91 0.70 -5.23 4 207 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 101 1 85 198 0 192.60 39 30.30 CHANGED shF+AhFsGDtptVhNDIpKKIhLVKKLAhuFGDRNoSoITL+sIT....+GSllV-WTNNTL...ppPCPc-Qlttlu++Ih-s-GpPRptFppulEP-a+hoNIoVssouSC+H..hpaIPh...c.IPotssss.sP-csscKSSpDDVYLHTVIPAVVVAAILLIAGIIAMICYR..KKRKGKLTl...EDQATFIKKGVPIIFADELDDSKPPPSSSMPLILpEEKsPLPPPEYPNpssPEThPLNQD.hGEYTPL+DEDPNAPPYQPPPPFosPMEGKGSRPKNMTPYRSPPPYVPP .......................s..Fphhhthc..t...h...s.D..lpKKhhLV++LA.huaGDpssooITlpsI..o......pGoh..lVt..a.Nsol.......hp..Cs.cptlt.hht.ph.l.h.p..p.p....h.p.....p.th....p...s...h..tsp.a.hhphshh..upCpp.....h..s.......h..s....................s.tp....hh.l..slllhhhhl.l.ss.Ihhhhhp......h++pGK........................................................................................................................................................................................................ 0 29 36 64 +5283 PF05455 GvpH GvpH Moxon SJ anon Pfam-B_16017 (release 8.0) Family This family consists of archaeal GvpH proteins which are thought to be involved in gas vesicle synthesis [1]. 22.00 22.00 22.00 22.00 21.80 21.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.05 0.71 -4.64 4 26 2012-10-02 21:54:05 2003-04-07 12:59:11 6 1 22 0 14 71 7 161.80 22 91.78 CHANGED .ssDDp........SDp.S...GLLDQLcsLlEsLAEIE.pEsG+ppspGpIDRGsARIDYDY-VSIGLGscuRsspss-tPss-sscsE.......phEpohHlETRssDDG.-lVVlADLPGVsD-c.lDVsL-sDpsALpl+scDcVVcRlsLDcPsspITshplpNQVLEIRLstos-opG.u-ss ...............................................................................s......................thhptlttllphLs.pl.t.pc.t......tp....hp..tpu..th....s....p..t...p.hphs....YsaslphG...lssc.....s...c........t...tt..t.s.hsp.t.csc..............p..tp.shc......l-...s+t.......p-Du.-ll.VlADLPGVsc...-c.lcVsl-s-pssLsl..p...s...s...c......c...h.l.c...RltL.s.p......s.s...c.ht..ph..shpNulLEl+lpttp.........tt..................... 0 7 13 14 +5284 PF05456 eIF_4EBP EIF4EBP; Eukaryotic translation initiation factor 4E binding protein (EIF4EBP) Moxon SJ anon Pfam-B_5573 (release 8.0) Family This family consists of several eukaryotic translation initiation factor 4E binding proteins (EIF4EBP1 ,2 and 3). Translation initiation in eukaryotes is mediated by the cap structure (m7GpppN, where N is any nucleotide) present at the 5' end of all cellular mRNAs, except organellar. The cap is recognised by eukaryotic initiation factor 4F (eIF4F), which consists of three polypeptides, including eIF4E, the cap-binding protein subunit. The interaction of the cap with eIF4E facilitates the binding of the ribosome to the mRNA. eIF4E activity is regulated in part by translational repressors, 4E-BP1, 4E-BP2 and 4E-BP3 which bind to it and prevent its assembly into eIF4F [1]. 20.20 20.20 22.60 20.80 20.00 19.20 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.40 18 197 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 97 16 104 192 1 107.00 47 76.84 CHANGED MSu....upQ...SpSpuIP.oR+VhlpDsuQLPc.sYSoTPGGTLaSTTPGGTRIIYDRpFLLshRNSPlA+TPPspLPsIPuVTh.sss.hs..........pspppsppopshpsc.......--sQFpMDI ...........................tp...o.opsIP....oRplhl..pDssQLPp.sYuoTPGGT.LaSTTPG...GT.RII.YDRKFLL-pRNSPlupTPP.ppLPsIPGVTsPss...spp............t.pp...ptsp.ptt..........t.....--sQF-MDI.................................................................... 0 30 41 66 +5286 PF05458 Siva Cd27 binding protein (Siva) Moxon SJ anon Pfam-B_5606 (release 8.0) Family Siva binds to the CD27 cytoplasmic tail. It has a DD homology region, a box-B-like ring finger, and a zinc finger-like domain. Overexpression of Siva in various cell lines induces apoptosis, suggesting an important role for Siva in the CD27-transduced apoptotic pathway [1]. Siva-1 binds to and inhibits BCL-X(L)-mediated protection against UV radiation-induced apoptosis. Indeed, the unique amphipathic helical region (SAH) present in Siva-1 is required for its binding to BCL-X(L) and sensitising cells to UV radiation. Natural complexes of Siva-1/BCL-X(L) are detected in HUT78 and murine thymocyte, suggesting a potential role for Siva-1 in regulating T cell homeostasis [2]. This family contains both Siva-1 and the shorter Siva-2 lacking the sequence coded by exon 2. It has been suggested that Siva-2 could regulate the function of Siva-1 [3]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.22 0.71 -4.70 5 98 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 64 0 54 105 0 146.10 34 86.71 CHANGED MPKRSCPFsDsAPLQLKVHVGQRELS+.GVsAERYoREIFEKTKQLLFpGAQAshD+lWsEGCuIsHpPEss+PG.TuAPc.AARGQMLIGPDGRLsRu.pAQASEAsPoG....sAshACSSCVRSVcuKusCSQCERAlCuQCl+s...CuuCuuLsColCTlsDau..DphEclLCoSCAhFEo ......................................................MsKR..Pat.ts.....QhKh+Vu.+plsp.GVhupph.pclaE..+ThpLLF.GApuh..htth.......tp....t....t......p..ps..t...s...th.p...hh.puQ.h.LlG.cG..cL...h..+s...spss.pss.ss...................ssh..s.CusC.h..R..s.ss.....s+...s.......sCupC-RslCspCsps...ChsCuslhCslCu..hs..shs...p...-pslC.sCt........................................... 0 15 20 37 +5287 PF05459 Herpes_UL69 Herpesvirus transcriptional regulator family Moxon SJ anon Pfam-B_15401 (release 8.0) Family This family includes UL69 and IE63 that are transcriptional regulator proteins. 25.00 25.00 32.70 32.60 20.70 17.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.40 0.70 -5.19 30 149 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 84 1 0 145 0 212.20 31 39.51 CHANGED tssttpploapcLlppsapLhppFptcshstphhpsl...Rctslpss.uLtshlAhsDEsLsWhKhphh+sLsl..pscD.IhsTusslhpsLhhKLpshh..pChLps........................................ps...thh+pLsph...................lpcps.pshps.lshslshlspaupsl.ttlpppshp......tltsh.spslpc.YpPGsshshlhpslpsHtppCssppCplthpthlsP..aspGhaFhss ............t...tttploa-pLhtpshtLhhpaphcshstphh+sh...Rphsltst.sLhphLA.sDETLuWhKhphh+s.LPl..pspDPlluTsuulhpsLhsKLpshh..tChLcs........................................ps.....lppLsch...............................hp+p..pshps.hphshshlsphuphl..slpphshp...............hltshsspthtt.YhPGsshshlhchLcsH.pcCps...chCcLphpphluP..YhaGhaFhs... 0 0 0 0 +5288 PF05460 ORC6 ORC6; ORC6_1; Origin recognition complex subunit 6 (ORC6) Moxon SJ anon Pfam-B_16189 (release 8.0) Family This family consists of several eukaryotic origin recognition complex subunit 6 (ORC6) proteins. Despite differences in their structure and sequences among eukaryotic replicators, ORC is a conserved feature of replication initiation in all eukaryotes. ORC-related genes have been identified in organisms ranging from S. pombe to plants to humans. All DNA replication initiation is driven by a single conserved eukaryotic initiator complex termed he origin recognition complex (ORC). The ORC is a six protein complex. The function of ORC is reviewed in [1]. 20.60 20.60 20.90 20.80 20.40 20.20 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.19 0.70 -5.35 28 219 2009-09-11 10:55:20 2003-04-07 12:59:11 8 3 186 1 150 205 0 235.70 21 72.06 CHANGED Lpcll........sphuts.hspcllshuspLhplSp..pppssltsppEluRshlCh.lAup...............+hppth..-lsh.hlch.PlpP+sap+hhshhcpsLtsps..........................................................t.s.t...................................................................................................................................................................................t..sh-lhplsspahlssplhs..ph.phhptacpphht....................p.httt...t...........phtstpppclp.hhslh..pthh....Wht.h..............p.....t.................................thGsMh.pspsha.hopc .................................................tlh........sphu.s....pllptAppLhphSc....ppsuhhttppEhu+shhChclAsp.................ph.phsh........Dhs..hlphs..sls.+hYpphhphhcphLshss............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 25 71 114 +5289 PF05461 ApoL Apolipoprotein L Moxon SJ anon Pfam-B_16464 (release 8.0) Family Apo L belongs to the high density lipoprotein family that plays a central role in cholesterol transport. The cholesterol content of membranes is important in cellular processes such as modulating gene transcription and signal transduction both in the adult brain and during neurodevelopment. There are six apo L genes located in close proximity to each other on chromosome 22q12 in humans. 22q12 is a confirmed high-susceptibility locus for schizophrenia and close to the region associated with velocardiofacial syndrome that includes symptoms of schizophrenia [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.26 0.70 -5.17 21 365 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 72 0 177 313 0 241.10 27 74.16 CHANGED psSpppL..LLTpccs.WppFVspAcLsR--s-sLhcsLtpLht.hshcDpsphp+s.pt...................................+chFL+tFPplKpcLc-pIccL+ALADplD+VH+ssTIoNVVuuSsussSGlLolLGLsLAPhTAGsSLsLoAsGlGLGsAAuVTuloTullEpsspstAcscAsclsssshsphcshtchltp.ssphhshsp.............shhpslcsltpsl+Al+hs+uNPpLs..usApphhssGplSspsupQVpcshtGTA...LAMo+sARIhGuAsuGlhLhhDVhsLVc-SKcLp-GAKoEoAEcLRphApELEcKLpcLsplYcsL ........................................................................................h..............................................................................................................................................................................tp.hFlp.Fsph+tcl....pp..pIpcLcslA-pl-chH+tssl.uslsuuSsu.ss.uG.lhollGLuLAPhThGsS.LhLou.sG..hGlusAuulTul..sosls-tsppppsptpspcl.hts.tppph.c.h..tp..h.htt.h.....thhph........................phh....p.....hpth...pphp.shp..hc.....h....stst.....h.ht...t.h.....s..............t......s...........p.......l..........tps.htsss..........hthspssphhshshsuh.lh....h....Dlh.ls.p.hcLtp.....G.u+sp.Attl+t.AtthpttL..h.t..t............................................................................................................................................................................... 0 41 58 83 +5290 PF05462 Dicty_CAR Dict_CAR; Slime mold cyclic AMP receptor Moxon SJ anon Pfam-B_16614 (release 8.0) Family This family consists of cyclic AMP receptor (CAR) proteins from slime molds. CAR proteins are responsible for controlling development in Dictyostelium discoideum [1,2,3,4]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.17 0.70 -5.53 6 214 2012-10-03 04:04:29 2003-04-07 12:59:11 6 7 115 0 165 697 4 215.50 19 52.21 CHANGED hps.pEIcsoYulLLIADFoSIIGChlVLIGFWRLKLLRNHlTKVIoCFCsTSlhKDlhSTlLTLo....ssAsps.GFPCYLYAIVITYGSLACWLWTLCLAhSIYpLIVKREPEPE+aEKaYahlCWGLPLISTIlMLuKsslchVGNWCWIGspYsGYRFGLFYGPFFhIWulSAlLVGLTS+YTYpVI+NuVSDNKDKHhTYQFKLINYIIVFLlCWVFAVVNRIlNGLshFPsssNlLHTYLSVSHGFaASVTFIYNNPLMWRYaGuKllhlFThFGaFVEsQ+RLEKNKNNNNPSPhusS+uuo .......................................................................................ttt..........................h......Sh..hu...s...hh.ll.hs.a...hh...p.......h+....p.............................+..h.l.h.h......h..s...h...ss...h...h....s...h....h......s...h....h..........h...................t....t......s......sh.......h.C...hQuh.h.....l.phh....u..s..h........h.....Ws.h...s..hA....hs...l...a....h...........s..h...h...p....p......s...p..h....c..+....h...c..h.....h....Y.h..l...h.s....a.G..l..P.h...l..s..s.....h...l.....h....h..............h.........t.....p......t......h......t..............h.......G........s....W..CW..l....p.........s...........p..............h.........s...........s..........h......R.......h....s................h.......FY.s...P.l..a...h....l......h....h....h.........h...h......h...h..h....h..................................................................................................................................................................................................................................................................................................................................................................hhhhhh........................................................................................................................ 0 74 115 151 +5291 PF05463 Sclerostin Sclerostin (SOST) Moxon SJ anon Pfam-B_16740 (release 8.0) Family This family contains several mammalian sclerostin (SOST) proteins. SOST is thought to suppress bone formation. Mutations of the SOST gene lead to sclerosteosis, a progressive sclerosing bone dysplasia with an autosomal recessive mode of inheritance. Radiologically, it is characterised by a generalised hyperostosis and sclerosis leading to a markedly thickened and sclerotic skull, with mandible, ribs, clavicles and all long bones also being affected. Due to narrowing of the foramina of the cranial nerves, facial nerve palsy, hearing loss and atrophy of the optic nerves can occur. Sclerosteosis is clinically and radiologically very similar to van Buchem disease, mainly differentiated by hand malformations and a large stature in sclerosteosis patients [1]. 20.60 20.60 21.40 20.70 20.50 19.70 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.33 0.71 -4.83 4 114 2012-10-02 16:54:34 2003-04-07 12:59:11 6 3 45 3 56 111 0 177.90 50 95.52 CHANGED hphsLl..LlClLh+uCh.........AFKNDATEll.uHs.tsh..cssNNuoLNpARNGGR+spss.hDpss.pQVGCRELRSTKYISDGQCTSlpPlKELVCAGECLPh.lLPNWIGGGa...YWSRRsuQEWRCVsD+TRTQRIQLQCpDGoT.RTYKlTlVTSCKCKRYTRQHNESSHs.pusSss+P........up+pRs+KRtuKsspppLo ..........................................hh..LhClLh+ush.........AFKNDATEIl.phs.ts................s.....ps......p....s..NsT..h.Np.AcNGGR..p....ps.....h-p.p.st.sphuCRELRST+YloDGpCpShpPlKELVCuGpClPh.lLPN....h.IGtGh.....W.Rpsu.p.-aR.Cl.sD+oRTQRlQLpC.sGss.RTYKlplVTuCKCKRaTRpHNpSph.p.....uhpsu+P........sp+..+..+t+suKsspt......................... 0 6 11 23 +5292 PF05464 Phi-29_GP4 Phi-29-like late genes activator (early protein GP4) Moxon SJ anon Pfam-B_16889 (release 8.0) Family This family consists of phi-29-like late genes activator (or early protein GP4). This protein is thought to be a positive regulator of late transcription and may function as a sigma like component of the host RNA polymerase [1]. 21.60 21.60 22.10 222.80 19.60 20.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.46 0.71 -4.24 3 7 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 8 0 8 0 125.40 73 99.32 CHANGED MP+TQRGIYHNLKESEYVlS..NGDVTFFFSSEMYhNKFLDGYQEaREKFNKKIsRlscT.PhNMDMLADITFYSpVEKRGFHAWLKGDNsTWQElHVYALRpMTKPsTLNWSRIpKPKLRERRKSMV MP+TtRGIYHNLKESEYVsS..NsDsTFFFSSEhYLNKFLDGYQcaRccFNKKIpRlssT.PhNMDMLADITFYSpVEKRGFHAWLKGsNsTWpElHVYALRhMTKPsT.sWSRIpKPKLtERRKSMV 1 0 0 0 +5293 PF05465 Halo_GVPC Halobacterial gas vesicle protein C (GVPC) repeat Moxon SJ, Bateman A anon Pfam-B_17013 (release 8.0) Repeat This family consists of Halobacterium gas vesicle protein C sequences which are thought to confer stability to the gas vesicle membranes [1,2]. 22.30 22.30 22.70 22.90 21.80 22.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.34 0.72 -3.95 29 50 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 7 0 20 45 0 32.00 32 48.53 CHANGED VssLhssIsshcppassspsuFcsYu--Fsss ..VssLhssIsshcpphssspsuF-uYu--Fss....... 0 10 14 20 +5294 PF05466 BASP1 Brain acid soluble protein 1 (BASP1 protein) Moxon SJ anon Pfam-B_16137 (release 8.0) Family This family consists of several brain acid soluble protein 1 (BASP1) or neuronal axonal membrane protein NAP-22. The BASP1 is a neuron enriched Ca(2+)-dependent calmodulin-binding protein of unknown function [1,2]. 25.00 25.00 77.60 48.80 23.30 22.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.10 0.70 -4.54 6 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 28 0 28 49 0 175.80 58 98.29 CHANGED GGKLSKKKKGYNVNDEKAK-KDKKAEGAuTEEEGTPKEs..EsQAAsEoT.EVKE.spEEKP-KDApsotsKsEEKEGEK-AsssKE-A.KAEPEKoEuuA-AKsEPsKs......sEpE.AAAsuPAsuuEAP..KAuEsS.....stsAEuAAPup.--.SKEEGEAKKTEAPAu.AupEsKS-uAP.ASDSKPSSsEAAPSSKETsAATEAPSSTsKApsPAAPA-EsKsuEuP..AANSDQTVAVpE .........GGKLSKKKK.GYNVNDEKAKDKDKKAEGAusEEEsTPKEs..EsQsAu-ss.EsKE..tcpKs-pDups...KsE-KEu-K-ss.spE-s.KuEsEK.oEu..s-upsEP.ts......scpp.tAssuPusss-As..Ks.sEst.......su-s.Asst..p.u+EpGEsKKTEAPAs..hpEsKS-sAP.ASDSKPuSsEAAPSSKET.sA....TEAPSST.sKA..tP.u...uss-p...sEsP..ssNS-QoluVp-.................. 0 1 3 9 +5295 PF05467 Herpes_U47 Herpesvirus glycoprotein U47 Moxon SJ anon Pfam-B_17115 (release 8.0) Family \N 25.00 25.00 80.50 80.00 18.20 18.00 hmmbuild -o /dev/null HMM SEED 677 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.45 0.70 -6.68 2 53 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 7 0 0 25 0 119.00 85 96.88 CHANGED MLHISpLGLFLuLFAIVMHSsNLIKYTSDPLEAFKTVNRHNWSDEQREHFYDLRNLYToFCQpNLSLDCFTQILTNVFSWsIRD.QCKSAVNLSPLQNLPRsETKIVLSSTAANKSIlASSFSLFYLLFATLSTYTADPPCVELLPFKILGsQLFDIKLT-ESLpMAhSKFSNSNLTRSLTsFTsEIFFNYTSFVYFLLYNTTSCl.SNDQYFcpSPKPINVTTSFGRsIVNFcSILTTTP....SSTSASlTSPHIPSTNhPTPEPsPVTKN.TcLpTDTIKVTPNTPTITsQTTESIKKlVKRSDFPRPMYTPTDIPTLTIRhNATIKTEQNTE....NPKSPPKPTNFENTTIRIPcTFESsTVsTNsTQKlESTTF.TTIGIcEIssNhYSSPKNSIYLKSKSQQSTTKFTDsEHTTPILKFTTWQssARTYMSHNTEVQNMT...................................................DRFQRTTLKSSsE.PTlQTLSVTPKpKLPSNVTAKTEVplTNNALPSSNSSaSITcVTcEsKpsRMSASTHtEINHTEIsphTPILNAHT.EKSTTPQhsFTAET.LTTSSKsAILTWSNLL.TTPKEPLTNTSLR.TsHITTQLTTSNRTQSAKLTKAplSSQTTNIYPQTITtRST-V ...........................................................................................................................................................................................EESLRMAMSKFSNSNLTRSLTSFTSKNFFNYTSFVYFLLYNTTSCVPSNDQYFKQSPKPINVTTSFGRA.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +5298 PF05470 eIF-3c_N eIF3c_N; Eukaryotic translation initiation factor 3 subunit 8 N-terminus Moxon SJ anon Pfam-B_17447 (release 8.0) Family The largest of the mammalian translation initiation factors, eIF3, consists of at least eight subunits ranging in mass from 35 to 170 kDa. eIF3 binds to the 40 S ribosome in an early step of translation initiation and promotes the binding of methionyl-tRNAi and mRNA [1]. 20.70 20.70 20.80 20.90 20.30 20.60 hmmbuild -o /dev/null HMM SEED 595 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.99 0.70 -6.43 8 593 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 302 0 373 561 13 373.20 28 67.47 CHANGED +ahpssssp-usDEEDsKRVVKSAKDKRFEEhssslcpI+NAhpIsDhlslppsFDcLsKthpKsh....hp-sstsPshYI+sLstLEDaLNch.ssK-uKKphSpuNuKALNoh+QKlKKsN+..paEDcIs+aRcsPEp-s-cssE---s-ssGssspscs.ssshtsp.lst.....s.pu.................................................................................t....t.tst......hs...h..c.hhhpcscEITassVhKKLpEIhsARGKKsTsRp.......pplcpLpcLscIAcpssp....plcIhFslISApFDlNs.slSsaMPI-hWppslpshpolLDILltsssIhls.phsDpsE..Ep-spc..shsGslRVpGslluFlERlDsEFhKSLQsIDPHos-YVERLKDEsphhsLlpplQcYlEcpGcs.....+sss+lhl+RlE+lYYKscplhch.......................pptt..ss.hsshchssct.shs-sScsLMDtLsshIY.Ksss-Rl+sRAMLCcIYHHALpDcFhpARDLLLMSHLQ-NIpHhDluTQILaNRTMsQLGLCAFRtGhlpEuHpsLs-LhSouRlKELLuQGl.tsR.HE+TsEQE+lERpRQ.lPYHMHINLELLEsVYLsCAMLLEIPpMAAspaDA .....................................................ht.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h......................................................................................................................................................................................................................................................p..t..............h.........................................................................................................................................................................................................ht.h....l.........t..t.....t.tuhL..h.a..uh.tpa..u+-hhh.uth.p.h..........-...p..............lhaNRhhsplGlsAF+.s.h.cspthL.-l......t.t+...........+-.ll.uQt........................t...s.pt...............ph.c+th..hPhH.Hlsh-ll-.h.hhsuhhl-ls..s....................................................................................................................................................................... 0 131 215 314 +5300 PF05472 Ter DNA replication terminus site-binding protein (Ter protein) Moxon SJ anon Pfam-B_17662 (release 8.0) Family This family contains several bacterial Ter proteins. The Ter protein specifically binds to DNA replication terminus sites on the host and plasmid genome and then blocks progress of the DNA replication fork [1]. 22.00 22.00 22.00 23.20 21.10 21.90 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.78 0.70 -5.29 23 673 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 604 5 60 325 12 282.80 59 95.91 CHANGED pclsspFcplppcLttLpptLsstphltA+VaslPsVt+spE+.cslspItVpphhGppAhshulctapclaIp.....pshSoKushRhPGllshsssstp...plhshIscINplKsshEplIsspss.spptRF..............EhVHpp.LPGLlTLpsYRplpslp...s.solRFuWtp......K+sl+sho+..cclLttLcKuhpssttssshs.cppWpttlspEhpplsp..LPpcspL+I+RPVKVQP.lAplaa.....psp.......pcphphssshPlIsl.....sspsPclspL.hsYDssshp....+h+spttshclLls..RLHlYh ......................................................................RLNsTFRQhEQcLAshsspL.pQap...LLlARVFSLPpVpK-sE.....H..sPLs....pIcVp.QHLG.p-ApsLALRHaRHLFIQQ....................QSENRS.SKAAVRLPGVLCYQ..VDNhoQ.tsLhspIQ+INpLKT....TFEclVTVES....t.....LPoAARF...........................EWVHRH..L.P.G.LITLNAYRTLTlLp...sPATlRFGWAN......KH.I.IKNLpR..DEVLuQLEKSLpSP..............RS.VsPWT.REpWQpKLEREYQDIAA...........LPQpA+LKIKRPVK....VQP.IARVWY......KGp........QKQVQHACPTPLIALINpDNGAuVPD.lGEL.hNYDADNlQH...RaKPQAQPLRLIIP..RLHLYV................................................................ 0 6 18 36 +5301 PF05473 Herpes_UL45 UL45 protein Moxon SJ anon Pfam-B_17674 (release 8.0) & Pfam-B_16138 (release 10.0) Family This family consists several UL45 proteins. The herpes simplex virus UL45 gene encodes an 18 kDa virion envelope protein whose function remains unknown. It has been suggested that the 18 kDa UL45 gene product is required for efficient growth in the central nervous system at low doses and may play an important role under the conditions of a naturally acquired infection [1]. This family also contains several Varicellovirus UL45 or gene 15 proteins. The Equine herpesvirus 1 UL45 protein represents a type II membrane glycoprotein which has found to be non-essential for EHV-1 growth in vitro but deletion reduces the viruses' replication efficiency [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.35 0.71 -4.89 11 167 2012-10-02 16:37:33 2003-04-07 12:59:11 7 22 100 0 44 725 1 141.50 17 27.58 CHANGED osospsptPLsshcs...h.............suspspRs+p.s.sss.......................hhhshhslGlllTshlllushl....hslPhsuhpsusCPspWhGlssuClRssspss.....spppAsssCuthsupllspusAptLhsllsshusssus.h.....................hssDultsClpsstussh..p..CspsuhsIC.tsRslSshuphIhcsRpuLpL .................................................................................................................................................................................................................................hhhh...s.h..h......hsh.....h..l.....h........s...hl.....................h....t..........s.............t........s....h......t....t.......t..............CPpc.Whsa...s..s.sC.h.thss.sst............sappApshC.p.s..h..s.....u..p...l...h...s.......s......s..................h..........hh.t............................................................................................................................................................................................................................................................................. 0 11 14 33 +5302 PF05474 Semenogelin Semenogelin Moxon SJ anon Pfam-B_18147 (release 8.0) Family This family consists of several mammalian semenogelin (I and II) proteins. Freshly ejaculated human semen has the appearance of a loose gel in which the predominant structural protein components are the seminal vesicle secreted semenogelins (Sg) [1]. 19.10 19.10 19.10 19.10 16.90 16.00 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -13.19 0.70 -6.22 4 125 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 26 0 7 129 0 273.00 43 97.46 CHANGED MKPIIhFVLSLLLILEKQAAVMGQKGGSKGQLsutSspFPatppu.pYSu.KDKQHsESKGShSIpHTaHVDspDHDpTRKSpQYDLNAQpKTTKScpa.tGpQELhNaKQEGR-+uKsKscFHhlVIHHKGGpspHGTQNPSQDpGNSTSGKGh.SQ.SNTcEpLhspGLuKEQsSsSGAQ+sRTQGGSQSShVLQTE-.Vs....NKQp.ETQNShpNKGpY.NVhEs+pcHSSKlQTSLasAppcRLQHGSKDlFo.........KNQ+QT+NhNQDQEHGQKAHKtShQsSST............................................................EERRLsauEpGIQKDsSKuShSpQTt-KhhsKSQpQlThPSQ-p..up+tNKhS.QSSusEERR.p.GEpGlQKsVspuph..pTE-KIHsKsQNQlTIPSQDQc........................................................................................................................SGQsAKGpSGQSsDREpDLLS+EQKsRHQctspGuhNlVIIEHEsspD++.sQ++spDpNslsT .............................................................................................S.h.lphhaHs..pstD................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 3 3 4 +5303 PF05475 Chlam_vir Chlamydia_vir; Chlamydia virulence protein PGP3-D Moxon SJ anon Pfam-B_18238 (release 8.0) Family This family consists of Chlamydia virulence proteins which are thought to be required for growth within mammalian cells [1]. 25.00 25.00 31.10 31.10 22.20 21.10 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.63 0.70 -5.28 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 20 0 1 29 0 207.60 73 99.67 CHANGED MGNSGFYLpNTQNCVFADNIKVGQMTsPLKDQQlILGTTSTPTAAKhTAc-GIslTs-ossSTNASITlSLDuEAlhK....LIL-QIpD-LVcuIIcNITsSLIQEVIDKIpoDPSLuLoKAFKNFsITNKIQCNGLFTKcNIGTLLGGTEIGoFTVTPDNAsSMFLISADIIASRMEGsVVLALVKEGDSuPCAISYGYSSGlPNlCSLRTsVuNoGssPVTFSLRVGGMESGVVWVNALuNG-cILGlTsTSNISFLEVKPQTNG .........MGNSGFYL.sTpNCVFADNIKlGQMppPLpDQQlILGTpSTPsAAKhTAs-GIslTlSNssusNASIsluLDAEpshp....LIL-pltcpIlDuIhcsIssShlQ-llDKIpoDPSLuLhKAFpNFsITsKIQCNGLFTsSNItTLLGGTEIGKFTVTPcsusSMFLlSADIIASRMEGsVVLALV+EGDopPCAISYGYSSGlPNlCSLRTslpNoGhsPsTaSLRVGGh-SGVVWVNA..LuNGssILGhTsTSNlSFLEVh.QTNu. 0 0 0 1 +5304 PF05476 PET122 PET122 Moxon SJ anon Pfam-B_18328 (release 8.0) Family The nuclear PET122 gene of S. cerevisiae encodes a mitochondrial-localised protein that activates initiation of translation of the mitochondrial mRNA from the COX3 gene, which encodes subunit III of cytochrome c oxidase [1]. 22.30 22.30 22.50 22.60 22.10 21.80 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.86 0.70 -5.14 5 30 2009-09-10 15:37:49 2003-04-07 12:59:11 6 2 29 0 15 26 0 249.50 45 89.80 CHANGED MLlIsRRLVsT.-VR+RLYspCLNt-hsulLspLRQIsVscMDhSLLpLhLs+SspaGHl-oIsYlWYKYVhRH+.LhVEP+LLCDIuslALtQGKLFIPuQlLpHYQsh.YG+Gppt...sEhppaEYELpRIKVEuFAKGTMcsToFsEKWKVFLQ-MDpTLPuss.aclRDFPaLT+A......LhQTDEps......htphLFu-pclsVsNcoSLPLLLNhVLLQ.ssFsL-sRlRlFpcFhpoYRuLPlpDolEIlIK+s..psYh..cLhphl....RGhEIsT ...................s+Rhlos.DlRpRllLssLNtchs-ALshLRpp..ppsshDspLLpshLARAsthAHs-oIsYhWYpaVM.++.LsVcs+LLC.-MAslALap-+hFLPuQhLpHYpsh.hc++ss....sEp-hlEYELhRIKVEuFA+GTMcuTuhREKWKVFLQ-MD.o.....LPupsshRlRDFPphTKu......Lh.ppDEps......huhhLFsc.p.lsIKNcaSLPLLLshlLhp..s.hssps+.+lhtcFh.p...oY+uL..s.L.hDu.pllh+c......cGacl..................ss.................. 0 1 6 12 +5305 PF05477 SURF2 Surfeit locus protein 2 (SURF2) Moxon SJ anon Pfam-B_18437 (release 8.0) Family Surfeit locus protein 2 is part of a group of at least six sequence unrelated genes (Surf-1 to Surf-6). The six Surfeit genes have been classified as housekeeping genes, being expressed in all tissue types tested and not containing a TATA box in their promoter region. The exact function of SURF2 is unknown [1]. 25.00 25.00 28.60 28.60 20.30 21.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.77 0.70 -5.24 8 115 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 75 0 69 99 2 210.00 30 81.53 CHANGED McElPpDV+uFLppHPsLcLs.-up...KVRChLTGHELPCpLsELQsYTcGKKYp......+Lspupss..FsYspaEPHIVsSTKs.pQLFCKLTLRHINKpPEHVLRHlpGKRYQ+ALppYE-CpcpGVcaVPApLhpKp+.+c...spcsuscpP.p+p-s.......hWtPsS....S-E-sutS-DSMoDLYPsphFs.Ks.sp.ps.t....t.-sFpTD.c-....--hthcspp.p...............pKRu...KKQsuuhpKKFKpsH+Kspphp.uSsK ..................................phtthLtppP..hph..sss.......+l+CtlTGHElPsp.hs-lpsYscuKKYp......+.L...p...s..p..p..hs.aspa-PHll..s.ss..c.p...ppLaCKLThcplNKpsEHlh+HlpG+Rap+tLpchEcspp..p..G....h.chh.....up...h..p........c.tp..p.pp.........pp.ss.st.....t.ppt......................hW.P.s......scpp....t.scDs.....hsDh...hss..t.h...tp.t.ttp....t.........................t.psh.....sc.....pp.......ccht......t...........................p........t.......t..tpc.p...............t..................................................................... 0 25 37 53 +5306 PF05478 Prominin Prominin Moxon SJ anon Pfam-B_18226 (release 8.0) Family The prominins are an emerging family of proteins that among the multispan membrane proteins display a novel topology. Mouse prominin and human prominin (mouse)-like 1 (PROML1) are predicted to contain five membrane spanning domains, with an N-terminal domain exposed to the extracellular space followed by four, alternating small cytoplasmic and large extracellular, loops and a cytoplasmic C-terminal domain [1]. The exact function of prominin is unknown although in humans defects in PROM1, the gene coding for prominin, cause retinal degeneration [2]. 31.00 31.00 31.30 31.10 30.80 30.80 hmmbuild -o /dev/null HMM SEED 807 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.42 0.70 -7.00 14 435 2009-01-15 18:05:59 2003-04-07 12:59:11 6 11 96 0 229 399 0 541.40 21 85.92 CHANGED luhststhssupst.....t.htas.hhssppapsps....hsstshsslashs+phls...sVpspP.hPp-hlht...lhpsshshtsphp.p.........clltaphGhllsul....IulLallLMPllGl...CFCCC.RCptsCGGchppp-pppssCc.RthhslhLllholhlhhGllhuFlsNphhppsltpospthppslcDlpsalsssspplctlhsp.YpphpspltppLsssu..phlsstlpsphcupllssLpplhp..............slptscctLpslssphpcL+ptuspLpssLpuhRpplpssLsp..Cpsp........Csslh.shs.ltls....hcasplPslsp.lpslpplscsshsshlpcGhsphpplstplpppspssl....shpptLsshssslcshupplstp.ltsshsphhpssppphcshts.lccYspYRahsulllsslllLllhh.h.hGLLsGlhG.c+pssPsccsCsussGGpFLMhuVuh.FLhshhlhllslhhFllGuNsppllCcsh...pspplh.p.hlDoPh.Lst....htluthL.........phshslpl.....uplhcsCcpNpslassL+Lpshaslschhslpp...hopc.lppphpsl+l...slssl....plLsspu+csLpshtsoslschsassalpplp+slopssLsshApcLctlAsshssss......................l+ssLppcApsl+slppshlsshpshlscLppslpsL-pp....ssphstslsplLsplptupthlsspspphltptocphhsphlshhppYlsaVhpplspclupCpPlushhDsuh.shhCshllD......PhNuFW....huluhCthhhlPslIhuVKLuphY++hc..ss.ss ...................................................................................................h.....................................................................................................................................h.h...hhhh....hh.h....h.hhh.h.hhh.hPhhuh...hhshC..R........h.h......t.......t.......ts......s.p.........p.......pt.....+.hh....h...L.h.h.h..hh.h......hu.h.h.h.ahs..sp.hpt.t..ht...............h.psh.shp.hhpp...............s..ph.p.hlh...ht...p...ph..tlptht.....l..h..l.t....t...t..h...h..tth.t...................................sh.......h..hp.....ht...t.ht.t.lp..p.pl...htt..C.......................C......t...h..t......t.....s.............................p.t....h.........p.........thp....h...t........p.h...t...tt..t.hpth...ht.pht......h................h....t.htp.........t.h..t......p.h......h.....h...hh.t...............t........t...............h....p....ph....t....h.hh.shhh..hlhhlh.hh.h.huLhhG.hu..................ttsuu.hhhh..............uhhh.Fhh..hhhhlshhhhhhGssh.phhCtsh.........tptphh.p....hh...Dp.h.h................h..................................t....lph.....tphhptCpp..s.tshathhp.hpp...hslp...phh.p..hp....h..t..h..t..hpth..ph......shp...h........hlt.t.t..p..L.th..ts..htthp...hh.t.htt..s.........h....s...l..thttpl.pt..st...................................ht....th...t.s.thp..th.......p.h.............h......t......th..plt.lpt....................th..tht.h..lttht.hp..l.t...p.ht.hh..t.sp...thhtt..t.hppYhphh...tht.phutCtPhsthh.pps.....h..hCphhhs......sh...NhhW.....hslhh..shhhh.lP.lhhshph.phap..........s................................................................................................... 0 60 78 167 +5307 PF05479 PsaN Photosystem I reaction centre subunit N (PSAN or PSI-N) Moxon SJ anon Pfam-B_18582 (release 8.0) Family This family contains several Photosystem I reaction centre subunit N (PSI-N) proteins. The protein has no known function although it is localised in the thylakoid lumen [1]. PSI-N is a small extrinsic subunit at the lumen side and is very likely involved in the docking of plastocyanin [2]. 19.60 19.60 20.00 20.80 19.30 19.40 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.67 0.71 -4.19 3 69 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 37 4 31 62 1 113.70 42 77.64 CHANGED Vusup+A.sAlsQuA+PARAssVsVcuSssRRSALLGLAAlLuuTAAlAuSANAGVlDEYLEKSKsNKELNDKKRLATSGANFARAaTVQFGSCKFPENFTGCQDLAKQKKVPFIoEDLcIECEGKDKaKCGSNVFWKW .................................................t...............t......t..s....t...s.p.RtuhlhLuus.lhssA..u....s..u...s..ApAull--YLcKSKANKELNDKKRLATohANhARuaTVpFGoCpFP.NFhGCp-LAhpptV.als-DhtlECcG.+.pth..CuSp........................... 1 6 17 24 +5308 PF05480 Staph_haemo Staphylococcus haemolytic protein Moxon SJ anon Pfam-B_18620 (release 8.0) Family This family consists of several different short Staphylococcal proteins, it contains SLUSH A, B and C proteins as well as haemolysin and gonococcal growth inhibitor. Some strains of the coagulase-negative Staphylococcus lugdunensis produce a synergistic hemolytic activity (SLUSH), phenotypically similar to the delta-hemolysin of S. aureus [1]. Gonococcal growth inhibitor from Staphylococcus act on the cytoplasmic membrane of the gonococcal cell causing cytoplasmic leakage and, eventually, death [2]. 25.00 25.00 32.10 31.90 23.70 23.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -7.91 0.72 -4.27 16 511 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 209 0 20 79 0 43.20 59 96.71 CHANGED MstLs-AIsssVpAGlspDWuplGTSIlsIVtsGVshlu+l...FG ..MpGLh-AItsTVpAA...spDusKLGTSIVsIVtNGVGlLGKL...FG..... 0 2 2 20 +5309 PF05481 Myco_19_kDa Mycobacterium 19 kDa lipoprotein antigen Moxon SJ anon Pfam-B_19467 (release 8.0) Family Most of the antigens of Mycobacterium leprae and M. tuberculosis that have been identified are members of stress protein families, which are highly conserved throughout many diverse species. Of the M. leprae and M. tuberculosis antigens identified by monoclonal antibodies, all except the 18-kDa M. leprae antigen and the 19-kDa M. tuberculosis antigen are strongly cross-reactive between these two species and are coded within very similar genes [1,2]. 26.90 26.90 26.90 27.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.10 0.71 -4.41 3 187 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 93 0 41 162 0 147.20 38 96.88 CHANGED M+RpLpsAVAGssILsAGlSGCSSGNKS.TPSSScTToSouTTAS...PGAAAGs.KVlIDGKDQNVSGSVVCTTAuGsVNIAIGGAATGIAAVLoDGNPPpVKSVGLGNVNGVTLGYTSGTGQGNASATKNGNoYKITGTATGVDMANPhQPVNKSFEIEVTCs .....................................................h.sGC..us..t.....t....s...stts.sss.ssu...........ss.s.s..uG.....s...pV..slDGpsps..s..ss.sVs..Csssu..........G..p..hsI...u.I...G..s.....s..s....s...G....l.uAVlo.s.G.ssP.pV.cSVu..L....GsV..sG......h..oLua..sp..GsG..t..G.sAsss+DGspYpIoGTAsGs.DhuNPhp..slsKsF-IcVTC............... 0 2 19 33 +5310 PF05482 Serendipity_A Serendipity locus alpha protein (SRY-A) Moxon SJ anon Pfam-B_19519 (release 8.0) Family The Drosophila serendipity alpha (sry alpha) gene is specifically transcribed at the blastoderm stage, from nuclear cycle 11 to the onset of gastrulation, in all somatic nuclei [1]. SRY-A is required for the cellularisation of the embryo and is involved in the localisation of the actin filaments just prior to and during plasma membrane invagination [2]. 19.00 19.00 60.30 59.50 18.80 18.30 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -13.02 0.70 -6.07 4 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 32 0 26 68 0 427.70 41 87.27 CHANGED MEpLLtQLslCsELIApG.oCssGpIuWLNEFCATFLDFAS-LKA+LPElAP+..h.EGGsNI-VETIFLCLTQVVTCITQLERTINIEus.uttsp..MTRhHFLDRIDWClRRlhsSLoQLc.pttssssssLEDHoFVELMDLALDHLEsaMEtLusposs..LhI.EEs-op-hhQLuSIVNHIVRHALAFANVALEuDKKALStLCETLLuECuTFhEsuuEhNPGHRKLEALSLERALYuLETaLNEALLHLIFsSll-LENssIp+LRcALQpp.-..Suhs-pLlSDFDTNMDRIQQIGVLAIAFSQDlKTKTIVRSCLASLESLDACIVPALQ..poo.usHHADILEHHFNpElLIFRNlIHEIIDSRSLINNYLDMLAES..IHlAsK..P+saL.LIVQMGuVlh-HFRLPVNYStL..S-.DGKRlHpDhlLIL+EC.AVVslus.VDPKRIVKRLKILYSVLAKLRDlID+s.h.+-.......s.s.......Sc+phTsATRThlR..+ShuKRQRSFV+QptsssVssPps..sSlusSlSp-uDLISFQLTEILRIs ...........................................................................................................................................................LNthCu.hhphsp.hp..h....................Ehhh.LCloQlhhClp.lEpshphpt....................o+.aFlDRlpaClp+lhh.h...........t.t.s...t....htc.sFlplhDhuLDhlt.h.ph..ppspt..............p.h.....s.hpLtshlspllppALAFANVAl.ppDK+ALouLCppllpECs..sFp..cp..s......t........c.sss.u....p....p....KLcAhoL..............EcALYtLEsalN-ALL+L...lFsshhDh-...pt...ulcKL+....sh...Lp........p.s.......sstsp..clIusFDsNhDRlpQIGlhAIAFupclKtKThlRSCLASlESLDssllPuLQ..........u..suushHu-lLppHFppEhhhF+sslpEIIDSpuLls.sh........L-hLs-p..I....cst....pp.....+phL......llQhu...tllh.cHFpL.lNh..psL....sc....p....G.pch....a......p...chlhlLpEC.p.A...l.....l...........h....ss....l-...Pp..............RIlKRhKILholLtKL+ssls..tt................................................................................................................................................................................................................ 0 6 8 20 +5311 PF05483 SCP-1 Synaptonemal complex protein 1 (SCP-1) Moxon SJ anon Pfam-B_19633 (release 8.0) Family Synaptonemal complex protein 1 (SCP-1) is the major component of the transverse filaments of the synaptonemal complex. Synaptonemal complexes are structures that are formed between homologous chromosomes during meiotic prophase [1]. 19.50 19.50 19.50 20.10 19.30 19.40 hmmbuild -o /dev/null HMM SEED 786 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.95 0.70 -13.70 0.70 -6.76 2 74 2009-09-11 15:44:23 2003-04-07 12:59:11 7 4 40 0 35 67 0 520.20 42 79.88 CHANGED hGGDSsaFKohNKCTEsDFthPhshosLSKNtENIDoDPAhQKlshLPhLEQVuNS.sCHYQEGlpDSDhENSEshSRlYSKLYKEAEKIKKWKVShEuEL+QKEsKLQENRKIIEAQRKAIQELQFtNEKVSLKLEEtIQENKDLIKENNATRHhCNLLKETCARSAEKTpKYEYEREETRQVYhDLNNNIEKMIhAFEELRVQAENuRLEMHFKLKEDaEKIQHLEpEYpKElNsKEpQVSLLLIQ.TEKENKMKDLTFLLEESRDKsNQLEEKTKLQsENLKp..EKpcHLTpELEDIKhShQRShSTQKsLEEDLQIATKTIhQLTEEKEsQMEE.NKA+ssHShVVTEhcsTsCoLEELLRTEQQRLEpNEDQLKllTMELQKKSSELEEMTKhpNNKEVELEELKplLuEcppLL.EpKQhEKlAEELpGpEQELhhLLQsREKElHDLElQlTshpTSEpaY.KpVc-hKTELEpEKLKNhELTupsshL.LENKcLsQEsSDMsLELKppQEDI.NsKKQEERMLKQIEsLpEpEhpLRsELE.VRcEhhQptDEVKCKLDKSEEN.....................CNNL+KQlENKsK.IEELpQENKALKKKuoAEsKQLNsYEIKVNKLELELtSsKQKFtEhhssYQKEIE.KKISEEpLLtEVEKAKshsDEAVKLQKEIDhRCQHKIAEMVALMEKHKHQYDKIlEERDSELGLYKs+EQEQSSh+suLEhELSNl+sELlSlKKQLElE+EEKEKLKhE.pENTAhLp-KK ..............................................................................................................p....s...plaSKLacEsEKIKpWKlph-u-lpQKEp+LQEN++hIEsQRKAIQELQ...FtNEplSlKLE-tIpENcDLhpcNNATRpLCNlLK-Tht+osEKhphaE.EREET+plah-.spNlp+MlhAFEpLRhQAEssp.EMp...KlKEph.phpcLcpchctEhp.KEcplulL.hp.p-KEschcclhh.LpEopcphspLpEtpp.ppE.LKpu.tcpctLhpcLp.hp.sh...pct.tsp.cslp....pLp.s...hcph.plhtpK...-sphEp.pph+..ps..ltphphshtpLpp.Lp.E.p.Rhpp....cp.p.p.h..-Lpp+sttltphhc.pp.pchp.lp..Lpp.Ltptp........pp....hpth.tphp.tpppl..h...l.....hptphtchp.....hts.ht.tpp....ph.phpt.phpp...t.h+p.pL..php.l..p.ptth..ph..h..p.p..................................................h......p.ph.......ph.....t...........................h......hc+......t....pp..tp..ct..hpppNchLKKp.stE.pp.p.h.phplspLp.Ehpshpp.htE.hpph.c-lc..pKph.t..tpL.tElcKh+hhssEulK.pcEh-h+CQpKIs-MlALMEKHKtpYD+hlEE+DuELt.hp.+EpEt.u.hctuLE.ELsphcs-l.pl+.pLc.ph.tc.K......................................................................... 0 6 8 17 +5312 PF05484 LRV_FeS LRV protein FeS4 cluster Bateman A anon Bateman A Family This Iron sulphur cluster is found at the N-terminus of some proteins containing Pfam:PF01816 repeats. 20.90 20.90 21.80 35.20 20.10 20.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.03 0.72 -4.39 4 74 2012-10-11 20:01:00 2003-04-07 12:59:11 6 6 68 1 40 71 0 55.80 44 21.33 CHANGED usEsl...s.hs-CRsCpFc.sLL.oGRCs.GcuCVss+puRpIDRFFRpNPpLAscY ........s..tslDWpGp.lcCssCsHc...slh.sp.G+..Cc.s+ACVpDRYARRIDRFFphNPsLAspY... 0 12 28 32 +5313 PF05485 THAP THAP domain Bateman A anon [1] Domain The THAP domain is a putative DNA-binding domain (DBD) and probably also binds a zinc ion. It features the conserved C2CH architecture (consensus sequence: Cys - 2-4 residues - Cys - 35-50 residues - Cys - 2 residues - His). Other universal features include the location of the domain at the N-termini of proteins, its size of about 90 residues, a C-terminal AVPTIF box and several other conserved residues. Orthologues of the human THAP domain have been identified in other vertebrates and probably worms and flies, but not in other eukaryotes or any prokaryotes [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.33 0.72 -3.81 139 1658 2009-01-15 18:05:59 2003-04-07 12:59:11 7 123 120 6 1163 1895 1 83.30 23 22.91 CHANGED ppCss......sC...........tpppppps.lphacFPp...cpp..hht.pWhpshptp................sppt.......tlCspHFpspsh.................ppppLpssA......VP.......olh..ttspt .........................Css..sC.......................tp..p.p..pp.s...lpha........p.F.....Pp................c..p.......hhp..pWhpshpppsh...............t..pppt..........hlCupHFp..sp.sh..........................tp.ptLp.sA.......lPolh.....s............................................ 0 346 432 912 +5314 PF05486 SRP9-21 SRP9; Signal recognition particle 9 kDa protein (SRP9) Moxon SJ anon Pfam-B_7787 (release 8.0) Family This family consists of several eukaryotic SRP9 proteins. SRP9 together with the Alu-homologous region of 7SL RNA and SRP14 comprise the "Alu domain" of SRP, which mediates pausing of synthesis of ribosome associated nascent polypeptides that have been engaged by the targeting domain of SRP [1]. This family also contains the homologous fungal SRP21 [2]. 20.90 20.90 21.90 23.20 20.60 20.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.54 0.72 -4.24 33 286 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 247 3 199 271 1 80.00 33 58.17 CHANGED alpshppFlctSppLhpupPspTRhohKYpt........................................sssslslKsh-spsu.ClKY+TsKut-luRLlshhG.husshs ........hpoh--FhctuppLatusPp.psRhshKYpa.............................................................scupLslKspDsts..CLpY+Tspup-lp+l..p.h.hupLhphh.s............................. 0 59 99 155 +5316 PF05488 PAAR_motif PAAR motif Yeats C anon Yeats C Motif This motif is found usually in pairs in a family of bacterial membrane proteins. It is also found as a triplet of tandem repeats comprising the entire length in a another family of hypothetical proteins. 23.00 23.00 23.00 23.10 22.80 22.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.68 0.72 -3.99 88 2205 2009-01-15 18:05:59 2003-04-07 12:59:11 8 110 944 0 577 1932 242 74.40 27 19.69 CHANGED lhh.GDp.TspGGpV................l.su..usshhhsGpslAhhG..........D..hssCPts.......usssIlp..G..s.s..shhhsGpslAhpGcpssC...........Gs.....pLluu .......................................................................................................................p.sG..l....................h..sG..ssshhht..Gh..ss.AphG.........................................................................D......hs.sCspp................ssshIsp.....G.s..s....slh...l...s.GpPsA.phGc...ps.s.C...................Gu........hlh................................... 0 76 220 365 +5317 PF05489 Phage_tail_X Tail_X; Phage Tail Protein X Yeats C anon Yeats C Domain This domain is found in a family of phage tail proteins. Visual analysis suggests that it is related to Pfam:PF01476 (personal obs: C Yeats). The functional annotation of family members further confirms this hypothesis. 20.60 20.60 20.60 20.60 20.50 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.81 0.72 -4.49 32 981 2012-10-01 23:00:54 2003-04-07 12:59:11 7 7 753 0 144 576 23 58.70 40 67.46 CHANGED MKsaAl.QGDTLDAIClRYYGRT.EGVVEsVLAANPGLAELGsVLPHGTAVELP...DVQTAPVA ...............................pshuh.pGDTl..DuLCaRaY..Gc...o..pG.ss...E.pVL.p..A..NPG.....L.....Aph....s....h..LPtGhtVplP...-lptt......................... 1 29 67 110 +5319 PF05491 RuvB_C Holliday junction DNA helicase ruvB C-terminus Moxon SJ anon Pfam-B_844 (release 8.0) Family The RuvB protein makes up part of the RuvABC revolvasome which catalyses the resolution of Holliday junctions that arise during genetic recombination and DNA repair. Branch migration is catalysed by the RuvB protein that is targeted to the Holliday junction by the structure specific RuvA protein [1]. This family consists of the C-terminal region of the RuvB protein which is thought to be helicase DNA-binding domain. 24.70 24.70 24.70 25.80 24.60 24.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.30 0.72 -4.35 126 4372 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 4328 12 944 2793 1004 75.80 54 22.41 CHANGED VDphGLDphDR+hLpsllcpasGGP...VGl-TlAAuluE-psTlE-VhEPYLlQpGalpRTPRGRlsTptAYpHLshs ...VDptGLDhhD++lLpslI-pFsGGP...VGL-TLA...AulGE.EpcTIEDVhEPYLIQpGFlpRTPRGR.lATspAYpHhGh.s..... 0 325 633 805 +5321 PF05493 ATP_synt_H ATP synthase subunit H Finn RD anon Pfam-B_3341 (release 8.0) Family ATP synthase subunit H is an extremely hydrophobic of approximately 9 kDa [1]. This subunit may be required for assembly of vacuolar ATPase [1]. 25.00 25.00 28.40 28.40 24.90 24.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.34 0.72 -3.90 34 346 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 230 0 214 332 0 64.60 36 75.96 CHANGED hslllsolhashhuhhu.hhsPK............u.spsllpshllLou.ssCaLhWhlsaLAQhsPLluPphsspp ...................h.slllholha...shl...G..hhss....hhsPK........................GsspsllpshLl.lTu.lCCaLhWhlsaLAQhNPLluPplpp......................... 0 74 113 167 +5322 PF05494 Tol_Tol_Ttg2 Toluene tolerance, Ttg2 Finn RD anon Pfam-B_3575 (release 8.0) Family Toluene tolerance is mediated by increased cell membrane rigidity resulting from changes in fatty acid and phospholipid compositions, exclusion of toluene from the cell membrane, and removal of intracellular toluene by degradation [1]. Many proteins are involved in these processes. This family is a transporter which shows similarity to ABC transporters [2]. 21.50 21.50 21.80 25.20 21.30 21.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.92 0.71 -4.70 160 1882 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 1658 1 420 1153 1732 167.90 28 80.72 CHANGED lppssscllshlpsspsth.......pth.....hphlcphltshhDhpthuphslG.p.......a.............+.psospQ+.....ppFhptFcphLhpoYu.stlspas..s........pp..lpht..thpss.....ppshVpoplhpsss...pslplsaplpp.....tss.p..W+laDlhlp...G.lSllpshRspFsshlppp.G.l-sLlpp.......Lppcs ............................ppsspcshspL+sppsph.ptss...shlpslVcpplhPalshchhutllLGph...a.............+.pAoPtQp........ptahpuFcphLhpsYu.psLs..Yp..s........Qs......hplt....hs........stshsslcssllssss...sslpl-aphpc......psG.s..WpsYDhhsE...G..lShlsohp.spaushlcpp.G..lDuLhtpLcp..h...................... 0 114 234 328 +5323 PF05495 zf-CHY CHY zinc finger Wood V, Bateman A, Mistry J anon Pfam-B_5537 (release 7.8) Domain This family of domains are likely to bind to zinc ions. They contain many conserved cysteine and histidine residues. We have named this domain after the N-terminal motif CXHY. This domain can be found in isolation in some proteins, but is also often associated with Pfam:PF00097. One of the proteins in this family (Swiss:P36078) is a mitochondrial intermembrane space protein called Hot13. This protein is involved in the assembly of small TIM complexes [1]. 23.30 23.30 23.30 23.60 23.20 23.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.78 0.72 -3.82 76 971 2009-01-15 18:05:59 2003-04-07 12:59:11 7 24 596 2 499 817 15 75.40 32 23.00 CHANGED CpHYcps...stl+ssCC..scaYsCt................pCHc-tp.........sHphpRhs.p........tllCshCppt..shsph.............tss.....phhCshCp........ha. ........................CpHYcpp...stl+ssCC..s+..a......Y.sCh................pCH.sc..t.p......................................cH.sh...p+hshp...................tllCuhCppp.shsph...........................tps..............hCshCphh................................................................................................. 0 165 308 434 +5324 PF05496 RuvB_N Holliday junction DNA helicase ruvB N-terminus Moxon SJ anon Pfam-B_844 (release 8.0) Family The RuvB protein makes up part of the RuvABC revolvasome which catalyses the resolution of Holliday junctions that arise during genetic recombination and DNA repair. Branch migration is catalysed by the RuvB protein that is targeted to the Holliday junction by the structure specific RuvA protein [1]. This family contains the N-terminal region of the protein. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.40 0.70 -5.32 110 5721 2012-10-05 12:31:08 2003-04-07 12:59:11 7 23 4390 12 1243 22353 7697 210.90 46 58.50 CHANGED Rl.lssp....tttp.....-....pshEtuLRPppLs-alGQcclKcpLplaIpAA+pRsEuLDHVLLaGPPGLGKTTLApIIAsEhGVsl+hTSGPslE+sGDLAAlLTsLpssDVLFIDEIHRLspsVEElLYPAMEDaplDIlIGcGPuARol+l-LP.FTLVGATTRuGhLouPLRDRFGlhtRL-FYss--LppIlpRoAclLslslss-GAtEIA+RSRGTPRIANRLLRRVRDaApVcuc .............................................................................h..............-..p..h-..ps.L..R...P........p....p....L....c.....-....a.....l.....G.Q...p....c.l...+....p........p......l.....c........l....a........I.............c............A............A...........+........h............R................s............-................s...............L............D..................H..............l...........L..l....a.G.P.PGLGK...TTL..A.....p........I....I...A...s...........E.......h..........G........l........s............l..........+............p........T..........S...............G..........P...........s.......l............-...............+..............s..............G..............D..........L................A..............A...............l............L............o..........s..........L..........p......................t..............D.............V...........L........F...........I.D...........E.....I.....H......R.....L...s...................s...............l......E..........E...........l...........L....Y..P...A........M......E....D.......a.............p....l.......D......I.....h......I.....G......p.......G.......s.......u...A.....+...o...l........+......l....-........L............s...................F......T..L..l.G..A.....T....T.....R......u..G............L...o..sP..L....R..D..RFG..l..h...+h-.a..Y..p.....t..-....L...ptI..l.t.....R...o...A.t..h..h...t...h...t........h...pt...p.u.A....hE.lA....pR.uR..G.TP..R.I.AN....RLL+RVRDaApVpts....................................................................................................................................................................... 0 440 849 1068 +5325 PF05497 Destabilase Destabilase Finn RD anon Pfam-B_4147 (release 8.0) Family Destabilase is an endo-epsilon(gamma-Glu)-Lys isopeptidase, which cleaves isopeptide bonds formed by transglutaminase (Factor XIIIa) between glutamine gamma-carboxamide and the epsilon-amino group of lysine [1]. 26.20 26.20 26.40 28.10 25.30 26.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.35 0.71 -4.01 28 193 2009-09-11 15:14:13 2003-04-07 12:59:11 7 4 73 4 107 201 1 109.90 35 68.13 CHANGED lossCLpCICcspS.GC.tsls.CchD.h.p.uCGhFpIppuYWhDsG+...PGss........apsCusDhsCusssVpsYMp+Yt....psCsss....CcsaARlHsGGPpGCc.....ps.shsYhpplppphs ......................sptCLtClCpstS.GCptshh.Cp.s.......sCGhFpIohsYWhDuGc......sscs.....tt..uapsCss..D.pCAspsVpsYMp+as..pcCssct..sCpDautlHphGshGCp.....s.p..h.s..h..tY.phhppCh.p................... 0 43 56 93 +5326 PF05498 RALF Rapid ALkalinization Factor (RALF) Finn RD anon Pfam-B_4453 (release 8.0) Family RALF, a 5-kDa ubiquitous polypeptide in plants, arrests root growth and development [1]. 21.30 21.30 21.30 22.00 20.30 20.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.46 0.72 -3.15 34 313 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 35 0 202 290 0 64.10 36 55.35 CHANGED tttt.shhsu.sspRhhttp......pYIS.YsALc+splPCs.pcGsoYY..sC.cssspANPYsRGCSsITRC+c .......................s.........s..tpRhlttt.........pYIS.YuA.Lp+s.plPCs..ppGs.SYY...NC.ps.s..........stANPYsR..GCotITRCRt.............. 0 40 107 152 +5327 PF05499 DMAP1 DNA methyltransferase 1-associated protein 1 (DMAP1) Moxon SJ anon Pfam-B_38340 (release 8.0) Family DNA methylation can contribute to transcriptional silencing through several transcriptionally repressive complexes, which include methyl-CpG binding domain proteins (MBDs) and histone deacetylases (HDACs). The chief enzyme that maintains mammalian DNA methylation, DNMT1, can also establish a repressive transcription complex. The non-catalytic amino terminus of DNMT1 binds to HDAC2 and DMAP1 (for DNMT1 associated protein), and can mediate transcriptional repression. DMAP1 has intrinsic transcription repressive activity, and binds to the transcriptional co-repressor TSG101. DMAP1 is targeted to replication foci through interaction with the far N terminus of DNMT1 throughout S phase, whereas HDAC2 joins DNMT1 and DMAP1 only during late S phase, providing a platform for how histones may become deacetylated in heterochromatin following replication [1]. 25.00 25.00 25.00 25.30 24.50 24.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.72 6 129 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 103 0 88 119 0 152.70 44 34.72 CHANGED EEEhLlsELRKIEsRKKEREKKsQDLQKLIotAD..........oTuptR.KpEKKhhKK..Klspp.ts.+pspclsVsE..ouGIKFsDhKuuGVoLRSQ+MKLPusVGQKKlKAlEQhLpEhtVDhsPsPTE-IsphFNELRSDhVLLhELKpAhusCEaEhpoL+H+aEALs.GKslshP ...........................................EEphLltEL+KIEtRKKERE++sQDLQKLIotAD.......................sssp.R....+.tE++h.KK.......Kls.p.p.........+p.s....pt.ssss-.............suGIKFsD.h.+uuGVpLRSQ+M.....K.....LPu...ulGQKKlKulEQhL.pE.luV.-.............LsP.sPTE-lsp.FNELRSDlVLLhEL+pAhusCEaElp.LRHpaEAhs.utsh...t................. 0 29 39 66 +5329 PF05501 DUF755 Domain of unknown function (DUF755) Finn RD anon Pfam-B_4891 (release 8.0) Family This family is predominated by ORFs from Circoviridae. The function of this family remains to be determined. 29.10 29.10 30.00 39.50 29.00 29.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.77 0.71 -4.06 21 74 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 29 0 0 75 0 125.50 27 50.26 CHANGED ps.cpshhsPIsSpKQYKLtsQppps.ptssTsGTo-sshLpppLLKECpKTupLhpL.ppl..............ppccpppsspspp+++K+p+psStppuKKhpsK++.......tpssSSSSSSssuoSSSppSS .........pph.tPlsShcpYK.hTppsps.pssSTsGTs-sssLtppLLKECppTppLh.phh.ppltpp.............ttpp....pp....ppps...s...ppppccpp.+p+phstp..pt++.ppp+++............pppsSSSSSSssssoosppo.s..................... 0 0 0 0 +5330 PF05502 Dynactin_p62 Dynactin p62 family Finn RD anon Pfam-B_4912 (release 8.0) Family Dynactin is a multi-subunit complex and a required cofactor for most, or all, of the cellular processes powered by the microtubule-based motor cytoplasmic dynein. p62 binds directly to the Arp1 subunit of dynactin [1,2]. 22.90 22.90 23.20 22.90 22.10 22.80 hmmbuild -o /dev/null HMM SEED 483 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.84 0.70 -5.73 11 399 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 215 0 284 414 0 252.30 19 75.82 CHANGED P.EpLlaCEDCHQIKCPRCloEEIssaYCPsCLFEoPSStlR.EtN....RCARNCFsCPhCsuPLuVssl-s...........ssuussppGPasLoCuYChWoohDlGlpF-KPssIphQLuK.....................hp....t....su+tht......-h+pshSoauoh-s............tpts...pp.sssLsh-s+FssLKuFYKsQlusssoussD.hth.chus....uFuuPs..uLsRhMslYsuuupl.shhs.spKKs+s+P.s.hR-ALssoEGLpl..ts.sEsshIpphss.......puasulASl-QhhhQ.sPss.........RFsE-LLPlPVLLRTKRSKRCcsC+HILlKPEhKspSsRFRI+LlAlsYIP..sl+...............................sL..ss...............s...s.sssslcsLpPh+shQllLTlKNhhF-PV+VTLuTPusT.PGRluoK...VTlLCPpF-IsuNpDsWpEALp............................hssus.c.uS+uushshst-+ssEuGKVW-+GRNWTolVLEVVPuoLs.......sttssp.phppccDs ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 99 143 231 +5331 PF05503 Pox_G7 Poxvirus G7-like Finn RD anon Pfam-B_4957 (release 8.0) Family \N 20.00 20.00 20.10 100.60 19.80 17.00 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.08 0.70 -5.85 11 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 42 0 0 50 0 356.90 62 98.52 CHANGED MtEt..KQS.....pIFsIloKullpSls+sh..shsssYIpuKAKpLhYssssoh+-ulINuIYspsEssIslsshscLhclLspL+ppusYVsNssEFWRLYNSLhRFTHspSFFssChPTIlsTLATLlTLlLuNcLLaAA-MVEulEsYLFs.upKs.uQELADLL-MKYGLINLVQYKIhPlllGppt......p..hhuu..uuss.ssassEl-+LhELPVKoshlsplYcFLsc+GlsToNNaAEYlAGLKIEElsp...........................tss.t...t.s..sht.ppptshtthc............lLcpApKYSKGHVLDGuVoSPlTtpt.lsspIP..............hosoDlcKFsILEYLYlMRVhANsIK+Ks..tpspsp.GIsLsINS.PFKoITlPus .........t.tEQ..RpS.....TIFDIVSKsIVpSVLRDI..SINoEYIESKAKQLCYC.suSKKESVINGIYNCCESNIcIhD+EpLLKILDNLRsHSsHVCNuoDFWRLYNSL+RFTHTTuFFNTC+PTIlATLsTLITLILSNKLLYAAEMVEYlENpLsS.SNKSMSQELA-LLEMKYALINLVQYRILPMIIGEs..........IIlAGhsGKpPhS-YSuEVERLMELPVKTDIVsssYDFLuRKGIcTSNNlAEYIAGLKIEElpKspphhs................shushANSshlKs+K...SIhPA..NIND+pIMEso+............hLDsuEKYSKGacsDGAVTSPLTuNsT.IooaIP..............ISASDMQKFTILEYLYIMRVMANNVKKKN...EuKNsGGVVhpINS.PFKlIslPp.s.... 0 0 0 0 +5332 PF05504 Spore_GerAC Spore germination B3/ GerAC like, C-terminal Finn RD anon Pfam-B_5052 (release 8.0) Family The GerAC protein of the Bacillus subtilis spore is required for the germination response to L-alanine. Members of this family are thought to be located in the inner spore membrane. Although the function of this family is unclear, they are likely to encode the components of the germination apparatus that respond directly to this germinant, mediating the spore's response [1]. 21.30 21.30 21.40 21.40 20.60 20.90 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -10.82 0.71 -4.48 137 1584 2009-09-10 14:51:44 2003-04-07 12:59:11 6 5 321 1 349 1395 6 166.40 21 45.64 CHANGED lsGsAlF.+s..cKhlu.hLstp-spshphlpschps.Ghlslphsppt................................................hlsh.plhpsc.sclcsph.cssch.p...hpl..plphcupltEhp.sths..h.s....tchlpplEcthpcplccphpphl.cchQ.chpsDshGhGchh+p..phscha.p.h.ppcW..p...ch.aspspl.clpVclpIppsGhhp ....................................sGhAlh..+s...sKhhu.hLstc-shshphlp..sc.hp......t..uhhp.hp.h.spst................................................hlsh..ph.h.psc...sclcsph..pss..p..phpl...clphcupltEht..pshs....h.p.....cphcplcctlpcplccchpphl.c.phQ.chpsDshGhGchh+p..pp.chW.p.h...pcpW....p.....ph..as...cs.pl.plplclpIppsGh..p.................................. 4 178 289 304 +5333 PF05505 Ebola_NP Ebola nucleoprotein Moxon SJ anon Pfam-B_8475 (release 8.0) Family This family consists of Ebola and Marburg virus nucleoproteins. These proteins are responsible for encapsidation of genomic RNA. It has been found that nucleoprotein DNA vaccines can offer protection from the virus [1]. 25.00 25.00 186.20 186.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.16 0.70 -6.46 4 104 2012-10-01 19:59:50 2003-04-07 12:59:11 7 1 21 0 0 58 0 391.30 83 98.86 CHANGED hDYHKILTAGLoVQQGIVRQ+lIsVY.VNsLEuICQhIIQAFEAGlDFQ-sADSFLLhLCLHHAYQGDa+LFLcSsAVpYLEGHGF+FEV+K+-sVpRL-ELLPssouGKNl+RTLAAhPEEETTEANAGQFLSFASLFLPKLVVGEKACLEKVQRQIQVHAEQGLIQYPTuWQSVGHMMVIFRLMRTNFLIKFLLIHQGMHMVAGHDANDuVISNSVAQARFSGLLIVKTVLDHILQKTDpGVRLHPLARTAKVKNEVsSFKAALSSLAKHGEYAPFARLLNLSGVNNLEHGLYPQLSAIALGVATAHGSTLAGVNVGEQYQQLREAATEAEKQLQQYAEoRELDsLGLDDQE+KILMsFHQKKNEISFQQTNAMVTLRKERLAKLTEAIT.........tASps+lus+asDDs-IPFPGPIpssspss..-DsPpDSRDTsIPssVlDP.Ds-.spYsuYp-sthssssDLsLFsLD-D-.DDo+shP........ptsp..Gpps.phpsh.+PP...PGspps.p.+tStshsssspppt..............PptspusRh..LoPlpEEs-s.DpsDs-spSLssLES-D-.....EQs.Dhot.....VAPPAPVY+s.tctctlspsppNs.spTsphtsp-uDh......sSpsppshEETYhHlL+oQGPF-AlNYYHhMpDEPlhFSTccGKEYhYPDSLEEsYPPWLoEKEtl-cENRalslcsQQFhWPVMSh+-KFhAl ..........................................................................................................................................................................................................................VuQsRFSGLLIVKTVLEFILQKTDSGVsLHPLVRTSKVKNEVASFKQALSNLARHGEYAPFARVLNLSGINNLEHGLYPQLSAIALGVATAHGSTLAGVNVGEQYQQLREAAp-AEhpLQ+htEppElpulu.DDpERKILcpFH.p................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +5334 PF05506 DUF756 Domain of unknown function (DUF756) Yeats C anon Yeats C Domain This domain is found, normally as a tandem repeat, at the C-terminus of bacterial phospholipase C proteins. 24.40 24.40 24.80 24.40 24.20 24.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.08 0.72 -3.50 86 1055 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 296 0 344 1014 25 89.40 29 23.80 CHANGED su+AlPYp.clpsphcsuss.plpLph..sN...s......G..psuu.......sFpVhs....pt.t.....h.s........stP.............................cpYsVpu...........Gpp.lpspW..sh.ssssGhYDLtVhG....PsG..FhR+FsGc ......................................................................ttshshp.cl...ps.....p.hc.....ss....s.....u....slpLph...sN......s.........G......pt..us.......sap.Vhs.........tt.h.....t.t.................stP..................................................................t.p.a.sVpu...........Gpp...lps..p.W...sh...sss...suhYD..l..s..Vpu.....ssG....FhR+FsGc............... 0 78 159 274 +5335 PF05507 MAGP Microfibril-associated glycoprotein (MAGP) Moxon SJ anon Pfam-B_8462 (release 8.0) Family This family consists of several mammalian microfibril-associated glycoprotein (MAGP) 1 and 2 proteins. MAGP1 and 2 are components of elastic fibres. MAGP-1 has been proposed to bind a C-terminal region of tropoelastin, the soluble precursor of elastin. MAGP-2 was found to interact with fibrillin-1 and -2, as well as fibulin-1, another component of elastic fibres this suggests that MAGP-2 may be important in the assembly of microfibrils [1]. 28.00 28.00 29.60 40.60 22.50 27.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.86 0.71 -4.36 4 107 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 40 0 49 96 0 120.30 53 77.79 CHANGED hhhGsKsLLhLsAhllsSshhPLGVsuQRGDDVp.spsETFTEDPNLVN....DPuTDET.........VLADItPSTDDLAu......s-KNsTsECRDEKFsCTRLYSVHRPlKQClHQlCFTSLRRMYllNpEICSRLVCKEHEAMKDE .......................................................hs.hhLLhh..h.lsp.t.h.lts.s.hsDpl...p.ps.h-sP.sh.h.s...............sPtsspp...............tl.t-...l...h..Pusss..ss........sscsssh....-CR-EpasCTRLYSVH+PsKQClpp.lCFhSLRRhYllNKEICsRhVCtccEhh+s-........... 0 3 7 16 +5336 PF05508 Ran-binding RanGTP-binding protein Wood V anon Pfam-B_37054 (release 8.0) Family The small Ras-like GTPase Ran plays an essential role in the transport of macromolecules in and out of the nucleus and has been implicated in spindle and nuclear envelope formation during mitosis in higher eukaryotes. The S. cerevisiae ORF YGL164c encoding a novel RanGTP-binding protein, termed Yrb30p was identified. The protein competes with yeast RanBP1 (Yrb1p) for binding to the GTP-bound form of yeast Ran (Gsp1p) and is, like Yrb1p, able to form trimeric complexes with RanGTP and some of the karyopherins [1]. 25.00 25.00 39.90 25.40 22.10 24.00 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.79 0.70 -5.53 18 126 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 112 0 95 128 0 299.00 40 58.01 CHANGED MDplLu+susQAVoF....AIRSGIolASsYAl+pso+aLcplsps-p......pclcpl+p+LcoKIcIlSsAIDLIcLhuAR..........GNTsLEuslsLscsL+t-IspFsp+lsphspt..............spsspspccslppl.cthKcLLsRI--AlPLlNLulTTSGsslsosL.....ssslSPuRLLQASsalsp..............................usspasts.p............slQlGPoFsLohYslFhs.s.+.t.........................-u.pplsWKEsht+upl+lhRh....................................scpscYuYpLcIhEDasDGRYH---..............tshphplslppIs+lFFosSGKLLplpsp....soPVLlLKhsps.ts ........MDthLs...+hspQAhsa....AIRSGIulsusYAlpphophlp...................p............lstppt......pclppLptcLps+IpIlSsAIDhIcLhAAR..........GNTsL.........ESsltLs+sL+.-Ipphsp+lsphsp..p............spsppppctplptl.ptl+pLLtRI--slPLlNLulTTSGssLoosL.....PsslSPuRLLQASsalot..................................................usppas.s.pp...........slQlGPoFsLolYhlFhshs.p...t................................................-u.ppssWKEsh++A+l+lhRhs...................................uptsca.....uYplhIhEDhDDGRhHs.-t............h..ssh+chlslppIsKlFausoGKlLpltsp....ssPVLlLKhDhp.h.............................................. 0 18 46 79 +5337 PF05509 TraY TraY domain Moxon SJ, Bateman A anon Pfam-B_8963 (release 8.0) Domain This family consists of several enterobacterial TraY proteins. TraY is involved in bacterial conjugation where it is required for efficient nick formation in the F plasmid [1]. These proteins have a ribbon-helix-helix fold and are likely to be DNA-binding proteins. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.04 0.72 -4.19 28 257 2012-10-02 18:44:02 2003-04-07 12:59:11 6 3 175 0 28 195 6 48.00 45 68.00 CHANGED lhlpLcpcsNchL.putc+SGRSKppEAtlRLpDHLpcFschhpsphpt ..........hpLD--TNphLltApsRSGRoKThEsthRLpDHLp+FPDFYstEh..p................... 0 4 9 15 +5338 PF05510 Sarcoglycan_2 sarcoglycan_2; Sarcoglycan alpha/epsilon Moxon SJ anon Pfam-B_9181 (release 8.0) Family Sarcoglycans are a subcomplex of transmembrane proteins which are part of the dystrophin-glycoprotein complex. They are expressed in the skeletal, cardiac and smooth muscle. Although numerous studies have been conducted on the sarcoglycan subcomplex in skeletal and cardiac muscle, the manner of the distribution and localisation of these proteins along the nonjunctional sarcolemma is not clear [1]. This family contains alpha and epsilon members. 27.00 27.00 27.20 28.10 25.60 24.30 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.25 0.70 -6.16 5 222 2009-09-12 20:59:17 2003-04-07 12:59:11 8 3 82 0 95 170 0 320.00 44 91.77 CHANGED stlhLLs..hhulLstspSs+ht......TlploVGcLFsHpLEschF....sasscssopp...loY+soLcGYPDLPsWLRYpQcssY+sGFLYGSPTscsAGp.lsIEITAaNRcoFET+RpcLhLsIscsps..LPYQAEFpI+NhNl-chLsospl-sF+stl+s.LWptsPc-LpVlNl-SALDRGGRVPLPPp.pKEGVaV+VGSsspFSopL+El...VpP..pl.uCoQcpsPVtosa-shhopF+lDWCsFpLV-Los.........pssp-ppshp.stG-GhLacDsaauPPs-clscRDYhs-FlloLAlPuhIsLlLhllLuhIMCFpREGhhc+cpc.................ssclQhsHHSuIp.......+SThcLRpMAcsp-lshPLoTLsSacshsu.............-R.PP.spcpsDsTsMPhhpssp ......................................................................h...........slh.t...tssp.........slhs.sGhlFVHsL-p.chF...........h.s.s.sp...........lTapssL.GaPDhPtWLRYhQRosapsGhLYGoPTs-shG+.plIElsAYNRcoF-TsRppLllp..Ihs.s...E...t....LPYQAEFhl+shsVEEhLs..S..pshscF.LsAlps.lWp....PtcLphlNITS..ALDRGGRVP....LPIp...shKEGVYlhVGusssFSsCL+.l.....sPtpph..pCs..p..t.tsllss.cph..hspFhlDWCplsL...V-hsh.........................ss.p.sh.................Gp.GlL...cs..s.....apPPp-s..s..Rs...ahsD..h..llTl.hlP...hlALlLhhlLualMCsRREGh..t.p..t....................................ss.tIQhlHHssIp.......tsTcELRpMu..tsRclsh.PLSTLPhFpshoG.............EhhPP.....ph-ssphPhh.sp..................................................... 0 20 27 57 +5339 PF05511 ATP-synt_F6 Mitochondrial ATP synthase coupling factor 6 Moxon SJ anon Pfam-B_9347 (release 8.0) Family Coupling factor 6 (F6) is a component of mitochondrial ATP synthase which is required for the interactions of the catalytic and proton-translocating segments [1]. 27.90 27.90 28.50 31.60 27.70 27.80 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.32 0.72 -4.10 17 159 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 102 5 80 157 0 92.10 42 71.49 CHANGED Mhh.....phsthhtssholphRRNlGlo.Ashhs..KthDPIQpLFlDKlREY..KppsoGG.hVDAuPEhp+-LpcElpKLpphYGGu...DMspFPpFKFp- ...............................................h.phhshth.+RN.luho.Ashhs...Kt....hDPIQ+LFlDKIREYpsKppu.uG.G...VDuuP-ap+-LpcELpKLpphYGsu.....DhspFPpFKFp-.......... 0 25 33 56 +5340 PF05512 AWPM-19 AWPM-19-like family Finn RD anon Pfam-B_6960 (release 8.0) Family Members of this family are 19 kDa membrane proteins. The levels of the plant protein AWPM-19 increase dramatically when there is an increase level of abscisic acid. The increase presence of this protein leads to greater tolerance of freezing [1]. 19.90 19.90 20.70 20.50 19.60 19.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.77 0.71 -4.19 12 134 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 29 0 80 129 1 126.30 45 74.10 CHANGED hLNLsMYlllluluuWulN+tI.s.Ghp.tsuhu....h.Phah..GNsATGFFl.hFullAGVVGsASsLsGhpH....lRsWpspSLsuAAouuhlAWuLThLAMGLACKEIp.......lGt.RshRL......RshEAFhIILohTQLLYlhulHuGh .............................hLNhsMYlIlhuluuWulN+tI.s....uh..tsshs.....................GNsATsaFl.hFuLlAGVVGhAS.slsGhpH....lRs.WpscSLsuAuusuhlAWsLTlLAhGlACKEIp.......lut..Rst+L..................+slEAFhIILuhTQLhYllhlHuG.h........... 1 15 51 67 +5341 PF05513 TraA TraA Moxon SJ anon Pfam-B_9521 (release 8.0) Family Conjugative transfer of a bacteriocin plasmid, pPD1, of Enterococcus faecalis is induced in response to a peptide sex pheromone, cPD1, secreted from plasmid-free recipient cells. cPD1 is taken up by a pPD1 donor cell and binds to an intracellular receptor, TraA. Once a recipient cell acquires pPD1, it starts to produce an inhibitor of cPD1, termed iPD1, which functions as a TraA antagonist and blocks self-induction in donor cells. TraA transduces the signal of cPD1 to the mating response [1]. 21.30 21.30 21.30 21.40 20.60 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.58 0.71 -3.94 2 236 2009-09-11 07:37:51 2003-04-07 12:59:11 6 2 185 0 14 133 1 112.60 63 97.55 CHANGED MNhshuhtGhsAPVKp+ua.asphshhshhphtthsh.AAl..hhh.tlu.hApupDLhAuGpssVKATFGtDS.VhhhlllAEllVGssMYhhTKNlhhLhGhsll.VFhsVGhshlt ..........................MsslLSVQGASAPVcKK.SF.FSK.F.TR...........LNMLRLs.R.AVIPAAV.L.MMF...FPpL..A.......MAA..p...G.pDLMASG.NsTVKATFGKDSSVVKWVVLAEVLVGAVMYMMTKN.V.K.F.LuGFAIISVFIAVGMuVVG..................... 0 0 1 10 +5342 PF05514 HR_lesion HR-like lesion-inducing Finn RD anon Pfam-B_6954 (release 8.0) Family Family of plant proteins that are associated with the hypersensitive response (HR) pathway of defence against plant pathogens [1]. 22.60 22.60 22.60 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.65 0.71 -4.35 10 108 2012-10-02 13:32:46 2003-04-07 12:59:11 6 3 32 0 56 176 63 133.20 48 77.07 CHANGED MuFlSFlGRVLFASlFlLSAaQEFsEF..GsDGGPAAKsLpPKaNhhssplosph....GhtVPpl-lKplVAAsIuLKGlGGLLFIaGSShGAYLLLlY.AhsTPllYDFYNYch-c.............................................................s-FspLFh+FsQsLALlGALLFFlGM ...................................................MuFhSFhGRVLFASlFlLS.AaQcFsEF..G.s..DG..G.PA.A..KtLp.P.K.h.sl.h.p.p.plo.spl....Gh.t...l..P.c.............l..-...lKp..llA.us..I.s..LKGlGGl...LF...l...hu..S.o..h..G..AaLLL.l..aLuh.h.TPI..l.YDFYN.Y.c.h.-.p............................................................scFspLhhcFhQNlALhG.A..LLFFlGM......................... 0 11 36 47 +5343 PF05515 Viral_NABP Viral nucleic acid binding Finn RD anon Pfam-B_6916 (release 8.0) Family This family is common to ssRNA positive-strand viruses and are commonly described as nucleic acid binding proteins (NABP). 21.50 21.50 21.50 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -3.85 8 209 2012-10-01 20:03:16 2003-04-07 12:59:11 6 2 29 0 0 374 1 97.30 38 84.68 CHANGED MHPaDFN................................LLC.....................................CLH..............................FS+PsLPpDL+lhIastsssspKLsR+sppNKPFpGTSKCAtRRRAKRYNRCFDCGA..aLhcsHsCK..lFsSRApSDsLuVIHEGPAKLYAERoaR.NScAtQLItsDL.hhK ............................................................................................................................................................................................................................................a.p.GpS+sAtRRRA+RYsRCacCGu.......hhhss+.C+..hhsotups-sLtsI+cGss+LhuEp................................................. 0 0 0 0 +5345 PF05517 p25-alpha p25-alpha Finn RD anon Pfam-B_6873 (release 8.0) Family This family encodes a 25 kDa protein that is phosphorylated by a Ser/Thr-Pro kinase [1]. It has been described as a brain specific protein, but it is found in Tetrahymena thermophila. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.33 0.71 -3.97 24 380 2009-01-15 18:05:59 2003-04-07 12:59:11 7 17 149 3 285 336 10 138.70 30 57.31 CHANGED lcpsFcpFs.aG......pustp-Mcs+pFuKhhKDspll..spK.lTsTDsDllFuK..VKupss++.IsappF.ccuLsplApK+tt........pspplhpclh..tutuPthsu....spussVchhsDpopYTGsHKpths..soscsp..GhuGhtphsDco.........ptsshshph .............................................................pp.FptFs.aG..........pusspphssppasKhh+Ds..tlh............Ds+...lTsTDsDllFsK.......l....K......u..+..ssc......p...Is..app..F..pcAl.p.c.lApc+ht........................ph.phhtplh.....tstsPthsu.......................ttsssspphsDpopYTGsHKpphs..tsscsp..uhuGh.phh-tp..................t.s......h................................................ 0 115 149 205 +5346 PF05518 Totivirus_coat Totivirus coat protein Moxon SJ anon Pfam-B_10221 (release 8.0) Family \N 25.00 25.00 34.70 34.60 19.90 19.90 hmmbuild -o /dev/null HMM SEED 759 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.20 0.70 -6.70 7 23 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 20 0 0 24 0 622.50 31 97.28 CHANGED LsuslssstuGshhss............ssaR+YcAtlphpusstGshpushssIaaElGt+ht........+sscshtssstpuhhl-suhs.sss.stsa.shsR+aps.......huspahhhslsullc+lutslAshslhssh.hsstuh+tspshpVsuLuohDu.psussuuV..alPpslc.shssclFslLh.sAsupGusVssDslpLDusTspPhlPslsssshttAhs.sLpllGu.MpuustGshaAhALTpGlHsVloVVuHoDEGGhhRDlLRpstap.PaGGlshuLp...pasulsshspsuhushsuhsDulALsTAAlVAHCDPh..hssshaPTlhssphsssst.ssu.........t...ttpltushs+FuphYhcuLuhhFultuss.thsspplhpAusphlstssp...RHLchsosAPaFWlEPTuLls.hshhGosAEttGYuuhshh.GssRopPsa-shchhGptsos.assaphchposRosshlshhss+PtsGLushcltphsssullLsG............ssssssstc+ttutssls-YlWtRGQoshPtPAEhhNhsuthslphRhhsas-Dtsss..pchPssc-hhcssVThpsst.hslssuspNstssts+RuRstuuptLsp....ARshGhssssph.Its.sPs.tss...st.shspsspsussspssss.sthsstshtssp.u.RGsPltssspHtshRAP.hsR...s.tGssshs.....s.ssss.s......................s.s..sssssss..hssssptsssstsp ....LsuslssspuGphhss............spaR+YcAslphsushtGshpsp.ptIhaElGtths........pttphht...ttuh.l-suhshsss.utpF.GhA++aoN.......Fuspa.h.slsuhsERlu+slAshslhss.....hs.sshttstshhVsuLuThDuP.su.tsuV..aIPRhlssshssclFsVLhtAssGpGuslsoDhlcLDsssppPhlPtlsssshstAhstALRllGuNM.sustGslFuhAlsRGlHpsloVVuHoDEGGhhRslLRpstFtsPaGGlp.uLp....asuLPshsssu.sshuuaVDulALtTAAhVAHsDPhh.hsGshaPThhsustssss.stsst...ss...stt.ttQlhushstFs..YhcuLutlFGhtsss...sttphhsshshhLstss....RHLchtoluPaFWlEPToLls.hshhGosAEttGhuuhsh..ssstshPsa-phphhupssss.aushthchposRpsshhstatspP.suLuslclcQhDssullhsG............sssps.sltc+htAssPlosYLWsRGQSshPAPuEhlNhsushGhhlRhhsas.--hsss..pclPsscEhhpssVohcVstPtGLs.sGssNttsppA+RARo+AsttLupuhhRARsaGtsssspM.l.sssPshts......t.thtphsttsh..st..t.s.shh.sts..s.t.s.pssPh.ss.ppts.chP.hst.hss..us.us.hss.s.s..s.sss.s..................................................t................................................. 0 0 0 0 +5348 PF05520 Citrus_P18 Citrus tristeza virus P18 protein Moxon SJ anon Pfam-B_10309 (release 8.0) Family \N 25.00 25.00 249.60 249.50 17.30 17.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.04 0.71 -4.75 3 112 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1 0 0 101 0 151.10 95 100.00 CHANGED MSGSLGNSTHVDLLRSDSRFLSGWWSFIVNVGDIIVRFALHVPSEDMLNSFSAISNCTIIADGSALLKDNTVVDRLESMNPLAYLLKLAKTTTTICFTMSNKVLFGTTKSEPLSCLAITSDRVLFKVIMGTNVDDSRCGCSIWFYNNGTFQNGLTRCNNLVALFSAT .........THVDLLRSDSRFLSGWWSFIVNVGDIIVRFALHVPsEDMLNSFSAISNCTIIADGSALLKDNTVVDRLESMNPLAYLLKLAKTTTTICFTMSNKVLFGTTKSEPLSCLAITSDRVLFKVIMGTNVDDSRCGCSIWFYNNGTFQNGLTRCNNLV........ 0 0 0 0 +5349 PF05521 Phage_H_T_join Phage head-tail joining protein Finn RD anon Pfam-B_7008 (release 8.0) Family \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.43 0.72 -3.70 131 1460 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 1110 2 227 1023 107 96.10 18 85.37 CHANGED s+Rlslp........ts.ps..p-....s..sGshh....ts.a........tsht..s.lWAplp...sh..supEh.htussstup...h...shplplRa.ps.....sl....ssshR....lh......h.s....schapIpu....lschs.ptp.hlplhs ..........................................p+lph...t....h....tt..ps....s....sGthh.......pp.a........ht.hh........p..saAplp.......sh.....sup-h.....hpu.....ssp.tsp...s........shplhlRh..pp.........cl..........os.sh+...lt...........a..p..............sphasIts...lpssst.ppp.hl.lh...................................................... 0 64 140 186 +5350 PF05522 Metallothio_6 Metallothionein; Metallothionein Moxon SJ anon Pfam-B_1360 (release 8.0) Family This family consists of metallothioneins from several worm and sea urchin species. Metallothioneins are low molecular weight, cysteine rich proteins known to be involved in heavy metal detoxification and homeostasis [1]. 20.90 20.90 27.80 23.90 17.70 19.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.22 0.72 -10.69 0.72 -3.70 7 30 2012-10-05 18:33:37 2003-04-07 12:59:11 6 1 19 2 5 34 0 66.20 52 80.30 CHANGED sssKssCs+-GppCsC.upppClpucCs.sscplCCu.....pCuNAuCKCusuCKCuuG...suCpcGsCss ......ssspssCs+EGppCsC.upCpChpu-Cs.ssccsCCu.......sKCGNAuCK.CGuuCKCuuG....suCscG.sCss.......... 0 5 5 5 +5351 PF05523 FdtA WxcM_C; WxcM-like, C-terminal Finn RD, Studholme DJ, Andreas P anon Pfam-B_6950 (release 8.0) Family This family includes FdtA (Swiss:Q6T1W8) from Aneurinibacillus thermoaerophilus, which has been characterised as a dtdp-6-deoxy-3,4-keto-hexulose isomerase [1]. It also includes WxcM (Swiss:Q93S92) from Xanthomonas campestris (pv. campestris) [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.58 0.71 -4.64 64 624 2012-10-10 13:59:34 2003-04-07 12:59:11 6 12 477 8 145 730 885 125.10 29 67.10 CHANGED hss.p..llclspl....sD..RG....sLoslEtt.pp..lPFcIKRlYYlacVPsstt.RGtHAH+chcQhllslsGShclh.........LDDGpp+..pphhLscPt.hGLalsshlW+phhsFSss.uVhLVLASchYDEsDYIR-YspF.lphhp .........................................thlph.th......tDt..+G....p.L.s...h.h...-t....p......l..P.F..p.lcRl.a.a.l.ass....su.hs...RG.tHAH.....+...p...hp.phhlsl.p.GS..h.p...lh.................................l..-..-...Gp..sp........pph......h..L...s...pss...pG.....L...h....lsshhW+phpsF.S..s.s..s..l.l.l..V.lA...schY-c..p-Y.IhsYp.aht...t........................................................ 0 52 109 131 +5352 PF05524 PEP-utilisers_N PEP-utilising enzyme, N-terminal Finn RD, Studholme DJ anon Pfam-B_69291 (release 8.0) Family \N 21.90 21.90 21.90 22.60 21.80 21.80 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.29 0.71 -4.32 186 5872 2009-01-15 18:05:59 2003-04-07 12:59:11 8 34 3547 22 1028 3717 559 122.50 27 19.06 CHANGED lpGlusSsGlAlGpshhhp....ttt..hphsppts.....tshpt.EhpR..hcpAlppsppcL.p..pltpp....h.tth....................s.....sspu.s.IhcuahhlLpDspLhpplpptIppsp.sAphAlppshpphtphhpshsDt.Yl+ERu ..................................pGlsuusGlAlGcu.hhltpsp..........hshpptsh..........pssst.EhpR.lpsAlppstppL.p.plpp+....h.tph..........................s....p-pu..u.IF-uHhhlLsDscL.hpplpph...I.pppp..sA-tAlppshcphsstapph.s..Dp..YL+ERA............. 0 310 592 817 +5353 PF05525 Branch_AA_trans Branched-chain amino acid transport protein Moxon SJ anon Pfam-B_1869 (release 8.0) Family This family consists of several bacterial branched-chain amino acid transport proteins which are responsible for the transport of leucine, isoleucine and valine via proton motive force [1]. 22.30 22.30 22.60 22.50 21.50 22.20 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.49 0.70 -5.90 21 3775 2012-10-03 01:44:59 2003-04-07 12:59:11 8 4 2395 0 374 2332 41 418.00 37 96.77 CHANGED pp+-hlhlGhMLFulFFGAGNLIFPPhLGhtuGpshhhAhlGFllTuVGLPlLulluluhsGt.ulcslus+ls.hauhlassllYLuIGPhaAlPRTusluFEhulsPhls....ssslsLhlaollaFulshhlSLpPuKllDplGKhLTPlLLlhlhlLhl.tulhpPhGshsss......p.stY.pspshhpGFl-GY.TMDsLAulsFGllIlsul+spGhp......sp+phhphslpuGlIAulhLullYsuLualGAsSsshh..........sssssGuplLsphspthFGshGpllLulllslACLTTulGLlousu-aFsclhP.tlSY+thshlholhShllANhGLspIIplSlPVLshlYPluIsLllLshhpphhpt..phsYpsslhhohlhull-ulpuhhhh........shlsphhphLPLtshGLuWLlPulluhlluhllsphp .................................................................h..+chlhlGhMLFulFFGAGNLIFPPhLG.tAGpphahAhhGFllTuVGLPlLs.llAluh....s..ss.....ul.p.s.l....us....+ls.hauhl...as.sllY..LsIGPhFAl...PRTA.ol.oaElGlsPhhs................sp........sh.........s.........Lh.l.................FollF.FulshhloLpPuKllDplG+hLTPlhLlhlsllhl.tu.h.lp.P.h.......G..s.h..sss................s..ts.Y...ps..s..sFhpGFlpGY.TMDsLA.ulsFGlllVsul..+..pp......Gls....................ppppl.s+.hslh..uGllAulhLsllYhuL..salGupSs...shh...........stssNG.uhlLsphspphF.GshGp............llLuhllhlACLTTulGLlsAsu-aFpp.hh............P.....p....l.....S..Y.........+shlhlhs.lhShll.uNlGLsplIphSlPVLh.hlYPluIsLl.l..Ls.hh..p...p......hh...tp...p...p...h...s....a....t..h.s.hhhs......hlhu.lh-s....l.pss.s.h.........................shl.s.s.h..h.phLPLtphGluWllPulluhlluhlhs..h................................. 0 93 201 293 +5354 PF05526 R_equi_Vir Rhodococcus equi virulence-associated protein Moxon SJ anon Pfam-B_7324 (release 8.0) Family This family consists of several virulence-associated proteins from Rhodococcus equi. Rhodococcus equi is an important pulmonary pathogen of foals and is increasingly isolated from pneumonic infections and other infections in human immunodeficiency virus (HIV)-infected patients. Isolates from foals possess a large virulence plasmid, varying in size from 80 to 90 kb. Isolates lacking the plasmid are avirulent to foals. Little is known about the function of the plasmid apart from its encoding a virulence associated surface proteins [1]. 25.00 25.00 31.80 31.70 22.50 17.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.08 0.71 -4.63 8 29 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 8 22 1 171.60 37 98.50 CHANGED Mhchhh..su+sluustlsAsslh...APuGsAsApslssuusuus................susss.ththspssstshtstsssp-pQYsVHGsVsSAlVYQ+h+lsV-......sGcTF-GDAGGLohPGuuuhWGTLFTsDLQ+LYc-TVSFcYNAVGPYLNINFFDScGslLGHlQuGulSoVlG..IGGGoGuWc .................................................................stthhs.hhsshshsh...sssGhAsAp.lssss.ss.................................sp...p.t..hspsssps.ttpsssp-ppYsV+GslsSAlhYQ+h..plpss......sGKsFsG-AGGlohPGuushaGTLF..TsDLp+LYs-TVSFpa.NAlGP.YLNINFFDupGslLGHlQuGulSTVsGlGGGoGuWp............ 0 0 7 8 +5355 PF05527 DUF758 Domain of unknown function (DUF758) Finn RD anon Pfam-B_6320 (release 8.0) Family Family of eukaryotic proteins with unknown function, which are induced by tumour necrosis factor. 21.20 21.20 21.90 23.40 20.20 21.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.10 0.71 -4.68 14 241 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 96 1 145 222 0 178.10 50 91.04 CHANGED h-sFsSKsLALQAQKKILSKMAoKohAphFIDDTSSElLDELYRloKEaTpN+pEApKllKsLIKlslKlGVLaRNsQFssEELtlspcF+KKl+psAMTAlSFaEV-aTFD+sVLuslLpECR-LL+plVppHLTsKSHGRIsHVFNHFuDs-hLssLYsPpusaRspLpKICsGlNKhL-EGsl ............-sFsSKsLALQAQKKlLSK.M.A.o....Kol.A.phh.I.DDTSSEl...L.DELYRl.oK......EaT.p.....s.....KKEAcKllKsLIKlslK.lulLaRNsQFst-.ELslh-+.F++Kl+..phAMTslSFap........V-aTFD..+....sVL....uplLpEC+-lL+plVp+HLTsKSHGRIscVFsHFuDs-FLssLYs...P.....ss.a+s.pLp+IC-GlNKhLDEtsl.............. 0 31 43 77 +5356 PF05528 Coronavirus_5 Coronavirus gene 5 protein Moxon SJ anon Pfam-B_7342 (release 8.0) Family Infectious bronchitis virus (IBV), a member of Coronaviridae family, has a single-stranded positive-sense RNA genome, which is 27 kb in length. Gene 5 contains two (5a and 5b) open reading frames. The function of the 5a and 5b proteins is unknown [1]. 25.00 25.00 28.70 30.50 21.30 16.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.95 0.72 -4.16 2 99 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 13 0 0 100 0 80.40 87 99.62 CHANGED MNNSK-NPFptAIARKARlYLREGLtCVYFLNcAGQAEsCPsCTSLV.pGphC-EHl.NNNLLSWpAV+.LE+QTPpRR.SN .MNNSKDNPFRGAIARKARIYLR.EGLDCVYFLNKAGQAEPCPACTSLVFQGKTCEEHIsNNNLLSWQAVRQLE+QTPQRQSSN............... 0 0 0 0 +5357 PF05529 Bap31 B-cell receptor-associated protein 31-like Finn RD anon Pfam-B_6449 (release 8.0) Family Bap31 is a polytopic integral protein of the endoplasmic reticulum membrane and a substrate of caspase-8. Bap31 is cleaved within its cytosolic domain, generating pro-apoptotic p20 Bap31 [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.22 0.71 -4.84 40 599 2009-09-11 14:14:48 2003-04-07 12:59:11 7 10 290 0 357 535 7 181.30 24 83.77 CHANGED MoLhaoLVFshLhsEhshhhlLslPlPpslR.+plhphhttshhspphphshhhhlshlllLFlDulpRlh+hssphpttpsspssss..........sphp..u++FauQRNhYloGFoLFLoLllsRshollpcLlphpcphcshppptpttsttt.................................tppsttssElpcLKcclpp.......ccpDlcsLKcQscsLp+EY- ..........................................................................Msl.ashl.hhhLh.sEh.slhhlL.hl.Ph..shR....pplhp............hh..t.h..h..st..hhp..hhh..............hhlhshl.l..l...L.h..l.Dulp.c.hh+hssphp...tptpstss...............................h..p.h.h+hF.huQRNhYl..s..GFsLF.Lsl........l..lpRlh..sl..l....p....plhphpsp.hcshpppspst.t..pttt...........................................ttttt.tt-.htpLppclpt.....................tptch...sh..+pQsctlpp-ap................................................................................... 0 105 188 282 +5359 PF05531 NPV_P10 Nucleopolyhedrovirus P10 protein Moxon SJ, Bateman A anon Pfam-B_7343 (release 8.0) & Pfam-B_6199 (release 10.0) Family This family consists of several nucleopolyhedrovirus P10 proteins which are thought to be involved in the morphogenesis of the polyhedra [1]. 29.80 29.80 29.80 29.80 29.70 29.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.65 0.72 -3.73 21 81 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 63 0 3 70 0 77.80 37 77.83 CHANGED MS..NILlhIRsDIpslssKVsuLQspV.......sslpsNlPshp.l.ttLDAQospLsslpo..............pVssIpsILs.................P-lPs ....MS..pNILhlIhscIpslssKVs.......uLQspV...........sslcssl....ss...l.tpL....Du.sspLsslps..............pVssIpslLs................hP-lP........................................................................... 0 0 1 1 +5360 PF05532 CsbD CsbD-like Finn RD anon Pfam-B_6755 (release 8.0) Family CsbD is a bacterial general stress response protein. It's expression is mediated by sigma-B, an alternative sigma factor [1]. The role of CsbD in stress response is unclear. 40.00 40.00 40.00 40.00 39.70 39.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.42 0.72 -4.29 119 3357 2012-10-02 00:15:32 2003-04-07 12:59:11 7 3 2275 2 706 1779 71 53.50 36 76.85 CHANGED -clcGphcchpGcsKEshGchTsscphpsEGctcpstGcspcphucsK...-tscc .........thcuphcphKGplKEshG+lTsDcphp.tEGct-pssGKsp-thspsK-psct.................. 0 169 400 559 +5361 PF05533 Peptidase_C42 Beet yellows virus-type papain-like endopeptidase C42 Studholme DJ anon Merops Domain Members of the Closteroviridae and Potyviridae families of plant positive-strand RNA viruses encode one or two papain-like leader proteinases, belonging to Merops peptidase family C42. 25.00 25.00 27.40 37.70 20.60 19.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.20 0.72 -4.16 8 200 2012-10-10 12:56:15 2003-04-07 12:59:11 7 8 8 0 0 172 0 73.20 66 10.95 CHANGED tchtDGhCYlAHhthlCAahsRsFccpDa....sLGsaPTVucL+sRlh+paGccALplslRGtYoSRslFHCDYsuuaspsh+slsua..lGG .A.KlRDGQCYlRHVYDVALYFGRRV..DLSV+...RTLGhFPTVGALKAYLVREYGR-SLKVPMRGTYT................................................ 0 0 0 0 +5362 PF05534 HicB HicB family Moxon SJ anon Pfam-B_6090 (release 8.0) Family This family consists of several bacterial HicB related proteins. The function of HicB is unknown although it is thought to be involved in pilus formation. It has been speculated that HicB performs a function antagonistic to that of pili and yet is necessary for invasion of certain niches [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.41 0.72 -4.39 24 1200 2012-10-02 18:44:02 2003-04-07 12:59:11 7 4 929 0 259 814 64 48.00 32 43.10 CHANGED YlpsCpp.GhpPc+taS..GpFslRlsP-LHccluhtAtppslSLNpalppsL .......................t.p....t.....s..t.t.hp....t....p...FsLR.lsppLHccLshtAtppslSlNpalhphL........... 1 79 171 225 +5363 PF05535 Chromadorea_ALT Chromadorea ALT protein Moxon SJ anon Pfam-B_7314 (release 8.0) Family This family consists of several ALT protein homologues found in nematodes. Lymphatic filariasis is a major tropical disease caused by the mosquito borne nematodes Brugia and Wuchereria. About 120 million people are infected and at risk of lymphatic pathology such as acute lymphangitis and elephantiasis. Expression of alt-1 and alt-2 is initiated midway through development in the mosquito, peaking in the infective larva and declining sharply following entry into the host. ALT-1 and the closely related ALT-2 have been found to be strong candidates for a future vaccine against human filariasis [1]. 25.00 25.00 34.70 33.80 19.70 18.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.10 0.72 -3.89 6 43 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 13 0 24 44 0 75.70 56 34.20 CHANGED EalsKGcFVETDGKKKpCcoHpACYDQREPQuWChLKps.QuWTs+GCFC-pKh+uCVIER..psssKLEYoYCuPccsWpCu ..............ss+GcFVcTDG+cKpCpSHpsCYDQREPpuWChLpcN.QuWTs+GCFCDsKL+SCVIER..pN....s....G+L....EYuYCsPcpsWpCp....... 0 16 16 24 +5364 PF05536 Neurochondrin Neurochondrin Moxon SJ anon Pfam-B_7411 (release 8.0) Family This family contains several eukaryotic neurochondrin proteins. Neurochondrin induces hydroxyapatite resorptive activity in bone marrow cells resistant to bafilomycin A1, an inhibitor of macrophage- and osteoclast-mediated resorption. Expression of the gene is localised to chondrocyte, osteoblast, and osteocyte in the bone and to the hippocampus and Purkinje cell layer of cerebellum in the brain [1]. 19.10 19.10 19.10 19.10 19.00 19.00 hmmbuild -o /dev/null HMM SEED 543 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.72 0.70 -6.20 8 217 2012-10-11 20:01:00 2003-04-07 12:59:11 6 15 138 0 160 237 0 430.60 20 74.69 CHANGED psssssL-cCLpLL+up+.Dop+FAuLLLVTKhl+usDhsuts+chlF...-AVGhpFLcRLLpotp...ussss....sccsahsLulolLusFCs..-PElAscppVls+IPhls-slppsss......hshl--sYpsLsulu.uoPpGs+sLlstGslshLsptYss...puashEpAlplLhsLlsshcspshp.-c..thpsllsplucpFssh-sspKFEL...hclLsslLspp....l...lpS..upphhcpLptGlssILps+losupRssALpLAAsLhps...hGspWlhsssp..............ss+FlhLllsluslEVRhsLsE.............pspshtp+pcslosCauLlEhhIphhsc..pt-psll-.....csphhpLhssLpEshusVlcaLpcst-................................-p.+-s.......hllAuVRlLGuWLAE-ssu.h+pclppLLsFhlclt+csapp...hpt...................shDulRalLPuLCplosEccsp+lLhspGusplLsD......sl.............lphhcc.ps..s...suEh.....ul.hhCshhhNlllstssh.hpctusFsuLh+sLlp...........................................................sssusctsslhhSsuhpuhWtDl...sphW ...........................................................................t......htph.thLpttp.DppphsuLhhlpphhp..s.tp..t.st.........lh.................culu...pF.p+LL.ot...............tt..............................t.hhtlulslLtsast.........phttp..ph.hstlPhl.phltttss..........................thhp-shphLhsls.up.tG.pthht...ssls.hlsphh.t............................psh....h-..shtlhhhl.ht..t..ht.......h....pt...h..tl.h.tt....lu..ht.t.psttthph.....................hphLs.hl......................................h..pl..hl.tll.ts.+.s...t................+..h.h.lhtthhph....hu.phh..t...................................t.ahhlllp.hhl-lph.h.p............................h.....tp.ls.sa.lhp.hlt...h.p........tt.....t..............................................h.phhp.hptshshhhthL.t..ph.........................................................p..t...pc..................lhu...lRhls.altpps.t.hp.th....tlhshhhph.tt....t........................................................................hphhLshhpthsh.......p.tstphhht...tt.thlhp..........h.....................................................t......................................................................................................................................................................................................................................................................... 1 53 94 131 +5365 PF05537 DUF759 Borrelia burgdorferi protein of unknown function (DUF759) Moxon SJ anon Pfam-B_7415 (release 8.0) Family This family consists of several uncharacterised proteins from the Lyme disease spirochete Borrelia burgdorferi. 19.90 19.90 21.30 20.10 19.70 19.60 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.61 0.70 -5.71 2 148 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 27 0 12 135 0 319.00 50 87.42 CHANGED MSDKFTIKFKGILDHAATKKAIEQDISKMEKYLKPKKSSLGSTKDIVKNNLSDKKKELS+QSKFESLRERVEKYRLTQTKKLhKQGMGFEKARKEAF+RSLMSDRDKRRLEYKELAKESKAKSKMLAASQGKGLVAKIAIGSALGNlIuNAMSKVGGGLlGF....hKKuVE-soKpc+hQ.LNpshYGs.KE+-slLK.IGtMKGFcRsLEKE-FLppA.VhKGslR-Lc.LN-...pNlhNAschAAMh+SoGhhS.sEsuVpAVsplLtG-hoEhashLK..sthG-KYlEshK.th....QpGuplcLcscI..hh-hhKDhpShtlhthssshEphpssLAshEQTLpsLTssVLcPllslIs.hhsK....ItNFs.hpslINsIhNuIpShhsh...FsKl+uhLPphhGGsGs-s....p..scspsssNs .......................MSDKFTIKFKGlLDHAATKKAIEQDIoKMEKYLKPKKSSLG.STKDIVKNNLSDKKKELu+QSKFESLRERVEKYRLTQTKKLhKQGMGFEKARKEAF+RSLMSD+DKRRL.EYKELAKESKAKsKMlAASQGKGL..VAKIAIGSALGNlIuNAhSKVGGGhlGF....hKKuVEppoKpc+hp....LNpsha.........sc.c.........E+........ptlh......t.......hltt......hKGFERcLEKE-FLppuolhKG.slp...-Lp..LNp...pNlhpAschAAhh+SoGhhu.sEpAlpsVsphLpG-hsphaphhp....sthG..pKYhEshK.th....Qpuu.ph.chc.cl..hhchh.pDhpShtlhthusphpphpssLsphEQoLtslTssllpPllthlp.hhth.....h.tap....ptllt.hh....pu....l....pshhs.................................................................................................................................... 2 7 8 8 +5366 PF05538 Campylo_MOMP Campylobacter major outer membrane protein Moxon SJ anon Pfam-B_7418 (release 8.0) Family This family consists of Campylobacter major outer membrane proteins. The major outer membrane protein (MOMP), a putative porin and a multifunction surface protein of Campylobacter jejuni, may play an important role in the adaptation of the organism to various host environments [1]. 19.90 19.90 21.00 20.90 18.50 18.50 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.48 0.70 -6.10 2 514 2012-10-03 17:14:37 2003-04-07 12:59:11 6 2 136 0 23 333 1 274.10 49 99.06 CHANGED MKLVKlSLVAALAAGAFSAANATPLEEAIKDlDVSGVLRYRY-ouN..pNhs.sSslss.KQDHKYRAQVNFSuAIuDNFKAFlQFDYNu.DGGhGsDsloNspcsLhVRQLYLTYTNEDVATSVIAGKQQLNhIWTDNuIDGLVGTGlKVVNNSIDGLTLAAFAhDSF.ttppssshlsQss.pphp............pssshtlD.stNlYGAAAlGSYDlAGGQFNPQLWLAYhspsAFhYAlDAAYSTTIFDGINWTlEGAYLGNSlDscLcD+hc.ANGNhFAL+GolEVNGWDASLGGLYYGcK-KsohssIEDQGNLGSLLAGEEIFYTsGSpLNGDhGRNIFGYVTuGYTFNETVRVGADFVYGGTKTp.hup..GGKKLEAVARVDYKYSPKLNFSAFYSYVNlD...sssESscHssVRLQALYKF ...............................F.S....s.AsPLEEAIKDlDVSGVlRYR.Y-os.........p.p........p..s....h.......s...........p.s....s.s.......ls.......s.KtpHpa+uphsFpu....AlsDN....Ftuhl...........ph...pY.s..s........-.......s......G..........h....G.........h.............s...t...........h...........p.......s...s...........p........p.t...h....VpphYLsYTspsh.s.TolhhGKQtlshhaT...Ds.s....sGTGl+VlNssIsGLTLA.uhAhDuh.tt...t.............................................................tslYususlGsa-.....hs.QL..WhAhhsp.s..u..h...a.......A...h.Dhshph.hhsshshtlputYhtsshD.sp.......htt.............................................s....suN......hauhphshph.shDhthGh.l..ha..t..pc.p....c....h..o.hsslEDpGph........hh.......sGt..plh....s..ptpt.h..s....ucN....ah..ahthGYoF.sc..hplGh-al.Gt.ps...............t..cc.......E...hssplsYtYS.KLsh.saYuh.hp..................................................................................................................................................................................................................................................................................................... 0 6 14 23 +5367 PF05539 Pneumo_att_G Pneumovirinae attachment membrane glycoprotein G Moxon SJ anon Pfam-B_7428 (release 8.0) Family \N 25.00 25.00 83.60 83.60 21.40 20.50 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.71 0.70 -5.09 3 103 2009-09-11 11:15:31 2003-04-07 12:59:11 6 1 3 0 0 83 0 160.30 60 99.79 CHANGED MGSKLYhIpGsSuuQlslKpsL+lucKllLuIVLSALGLTsToTIALSISISVEQuVLc-C.cTYhusssohaSsosspsTTTsoATTT+DhRGLQTTRTRKhESCuaVQIuYGDMHDRSssVLGGlDCLGLLALCESGPICQRDsps-DsshCRCTlcu+uVSCCKcPKousTTSpTTScPscs.osPsaPSQcsocScPsoQGcQT....oTAspploSTsshhTpcTuTsosucPQspPsPSppG.osos+csuSTsSpcooT.sGsupcHTQRh+TPPos-NsRoshppsTP..TT.hacTt+PTP+PTs-hpsssp.spoSPsulQuNPTTQ.N.lsCcchDPscPp+ICYpVGoYNsulo+sCcI-VPLCSTYspsCMcTYYocPFNCWRRspRClCD-GsGLIEWCCTS ...............................ch.p+hlLulVLSAhGLThTsTIslolsl.VEQshLcpC.csY.utstshassppppsTos.stTssts.ttLQsststK.ESChaVQls.GDMasRS.sVL.......................................................................................................................................................................................................................................................................................................... 0 0 0 0 +5368 PF05540 Serpulina_VSP Serpulina hyodysenteriae variable surface protein Moxon SJ anon Pfam-B_7432 (release 8.0) Family This family consists of several variable surface proteins from Serpulina hyodysenteriae. 22.90 22.90 23.10 44.80 17.40 22.80 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.34 0.70 -5.69 4 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 5 0 26 43 0 353.80 47 99.32 CHANGED MKKhLLsslAlLTIuSuSsFGMYGDpDsWIDFLTcGNQFRARMDQLGFVLGNsTIKGTFGFRoQslsTtLGsIl.oGNstNhs.LpsTISsGIGYTS-sFGIGlGYNYTY.........hssslGsHTPVLMlNALNNNLRIAIPVQIAVp+DshsKhs.....pspKDYLGISTDhQIRYYTGIDAFNtIRLYhKYGQsGYKss.....NsspEhFAQShGFEsRhYFLNTslG.NVTINPFIKVsYNTAL..+GsushVRAu-ohhss.....stsh.s.cP............th-.KaD+NPYDVpstAVLGlTANSDhVSLYVEPSLGYpApYhGKhto-p..h....KVpHsLhWGAYAELYIpPVQDLEWYFEMDlNNusS+.......p.sulPVsFuooTGITWYLPtL ............................................................MKKhhLhhhslLohu.sSlFGMYGsp.-sWIDFLscGNQhRARMDQLGFlLGNsTIKGTFGF+.upo....h..s.lGpIL..s.....s.spssht.LtsTISuGIGYTS-sFGIGlGYNYTa..........hu.sshssHTPVLhlNALNsNLRIslPVQIuVpssshsphs.........ppsYhGlS.T.D.sQIRYY..T..GIDAFNtIRlalKYGp.saKss.............................shs....p-hhApShGF-hRhYFLNTslG.NVTlNPFl+VsYsTAL...pGh...uphlts..h-..shhps...............h.th.sstssst................................ssh.sthaD+sPYclsltssLGlTANS..DlVSLYlEPuLGYpsph.h......G+h.ss.tst.........Klp..HtLuWuAYuELYIpPVpDLEWYFEMD...VNNussp.......p.sul......PVsFsuoTGITWYLPth............................................................ 0 7 7 7 +5369 PF05541 Spheroidin Entomopoxvirus spheroidin protein Moxon SJ anon Pfam-B_7488 (release 8.0) Family Entomopoxviruses (EPVs) are large (300-400 nm) oval-shaped viruses replicating in the cytoplasm of their insect host cells. At the end of their replicative cycle EPVs virions are occluded in a highly expressed protein called spheroidin. This protein forms large (5-20 mm long) oval-shaped occlusion bodies (OBs) called spherules. The infectious cycle of EPVs begins with the ingestion by the insect host of the spherules, their dissolution by the alkaline reducing conditions of the midgut fluid and the release of virions in the midgut lumen. The infective particles first replicate in midgut epithelial cells, then pass the gut barrier to colonise the internal tissues, mainly the fat body cells. Whilst spheroidin has been demonstrated to be non-essential for viral replication, it plays an essential role in the natural biological cycle of the virus in protecting virions from adverse environmental conditions (e.g. UV degradation) and thus improving transmission efficacy. In this respect, spheroidins are functionally similar to polyhedrins of baculoviruses or cypoviruses [1]. 25.00 25.00 61.80 61.20 21.70 21.30 hmmbuild -o /dev/null HMM SEED 944 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.53 0.70 -6.94 4 11 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 11 0 1 11 0 866.40 39 92.91 CHANGED sNlPlsscpIpKlsspKYEl+hhLKD-sppF-+h.l-hVVPLYDsss.houVTlESsssslEllELDpTHhRlhl+ssshcEhsa.hsFsssVsp-pVWKYlocLLLsNluhsssKhKLsNaplsLNsKHlphpclcpsLFIhFhDD.GhYGLIT+cNI.NssL.VsKDAoaIplFPQahYhphGRclYlNEKsThDVss-ssNlsLDhpKSVNIuVS...FlsI.YElssstQKcLLKsLlp+YGcFDVYNADTGLlYAKNLsIKs.sT.VIQV-+lPVpLKVKAYhKs.sG+sLChh+ITSST.sDPEYVsSpsAhLGsL.pVYKKFc...+ShLKlhhHscsosNVhPstsLhLELsDspsYshKsSssSRLsVGlYKlsKIYlcNscspIhLcpIcscacCs+pla+EhspL.+................cps+YTscs.FpIlsNsPchslalaG.IpNlsh+sKsshNL+LWGWIlcsDsSRal+hhsDGSlDLDLshKhspsDlsLhpAl+p+YhNslILEhANsY.ssslSLGNp+FpNIFDMc.cscoIspYTNFTKsRQDLNNhuClLGINIGspVNIpsLP..GWlss+EhcILp.Susscl+pFscuFCclsN+RFasMA+DllSLLFMCNYlNIEIsEulC-YPGYllLFARAlKVINDlLLlNGlspLAGYSISlPlcaGss-KTLPpp+pGGV-K+FKchFLKspL+-LM+Dp-FVQsPLYISTYF+ol.-sP.o-NYEKYLl-SusQSQ-lLQGLLNTpNo.DTNARVsSSVhG.asY......-.ssTuEacIu.............................sEALsKhsK.ho+.GNhGLlN+lsE....pC...s.cGh.-Nppl+sph.pp.FsCpPNNNsELIs+YGY+lhDLc+ItplhsshDs...sspcpshh.E-pt.h..s..h.h.a.....spssp....phs.ClppN.pp+ha.pcCscssoCs+......Rps........h...sGYc+sH ..........NIslplh-lpplNDupapV+Fs...-shpFstK.hshVVP.YssD....hVslEouDsNl.l.-hspT+Y..lhapsptsEhshhFoFsssVDp-plhpYlopLlhsNLu.hssK.hlhNhslhlNGhh.shhthspshaIhhaspssshhLloppsIhNpoLlVsKDsoalpl.sQ+lhsactphlYlsEpsT.DVs.sVsNlVLDL+puVsluVS...FlShshEls-ssQKcll.SLlp+hGcFDlhNADTGhVYt+NLslKpssThsh.V-p.sV+lpsKshhh.hcs+slp.hplhSs...Ds-YVsscouohthL.olYhKhhptIKSLLKIhhpDccssshh.GhshslEhpDhNshshphossoRLslGlYplchlYLsspcDpIp.phIc.........aK-YVs.sca-..............p-G+YpshssFhIhuNpPhIslalhGcIhsss.cscsshpLpLhG..lDhcsShY...as-suLsL...a+pcsssloLhcAlKh+alNsIlhcsANhh.sGsIolGN+Ka.NIhDh+tDscTlNpYsNhhhu+pshNs.uslLulslss.VNIQ-LP..sWLss+....LusSusD.l+shlpuFss.ss+caashs+sIlSLLhhsNaINhpIc-oLCcYPGhIhLFARhhKhIN-hLhl.G..phAhYSloh.scauss-phLPas+ptGl.KcFh+pahKsshpsLM+Dcsa.QsPLhI.s.hpslpps.....ssNhtp.lss.......SAsps.sLlptLLsopNspDsss+.Vl.SshGuF..........sNhu-pcIs.............................ppslSpllchhp+sGshGLl.+.s................-NpsI+ph+spspF.sp.Ns.scLIphhuap..DhpthhchhsssD-lP.ssscpshh-EDpt.hh.s..hsh.as....spssp.......s.Chpspspp+hh.spCspsssCs+.......+ps........a...sGh.hsH.............................. 0 0 1 1 +5370 PF05542 DUF760 Protein of unknown function (DUF760) Moxon SJ anon Pfam-B_7508 (release 8.0) Family This family contains several uncharacterised plant proteins. 20.20 20.20 20.60 23.20 19.60 19.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.61 0.72 -3.80 54 380 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 117 0 216 365 110 97.80 30 44.47 CHANGED ssLhpYlpphpP.-tlsplsc.............................................ssSs-lhpshcps....lpullGsL..P...................................sttFpsplpss+-pLupLlhushMoGYaLRphEpRhpL-psLp ...............LhchlpplpP..-plpplsc.............................................ssSs-lh-hhcps....lpslLGhl...P......................................spphsshlpss+ppLup....Lhsu.uhMsGYaLRphE.RhpLEcsL................... 2 48 141 188 +5371 PF05543 Peptidase_C47 Staphopain peptidase C47 Studholme DJ anon Merops Family Staphopains are one of four major families of proteinases secreted by the Gram-positive Staphylococcus aureus. These staphylococcal cysteine proteases are secreted as preproenzymes that are proteolytically cleaved to generate the mature enzyme. 21.60 21.60 21.80 24.20 21.40 21.50 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.85 4 395 2012-10-10 12:56:15 2003-04-07 12:59:11 8 3 221 9 4 118 0 172.80 60 44.73 CHANGED shpsQYVNpLKNFKIRETQGsNuWCAGYTMSALLNATYNTs+YNAEuVMRaLHPNLpGc-FQFTGLTPpEMl+aGpSQGRssQaLNRMsSYNEVDpLTpNNKGIAILGpRVESs.sGhHAGHAMAVVGNAKlNNGQ-VIlIWNPWDsGhMTQDAcSNlIPVSNGDHYpW.uSIYGY .........p..p.QY.NpLcNFKIREpQhsNuWCAGaoMuALLNAThNTspYpAcslMRhLaPplptQch.....s.u....hhP..p....pMIpa.GpoQ.GRs..ph.pths....o...YNpVDpLTKsNhGIhlLup.pVppp....Ns.HhGHAhAVVGN..AKlNs........QEh.....lIhWNPWDsth.hQDAcss..ll.lS..sccYpWYuShhGY............................... 0 2 2 4 +5372 PF05544 Pro_racemase Proline racemase Moxon SJ anon Pfam-B_7562 (release 8.0) Family This family consists of proline racemase (EC 5.1.1.4) proteins which catalyse the interconversion of L- and D-proline in bacteria [1]. This family also contains several similar eukaryotic proteins including Swiss:Q9NCP4 a sequence with B-cell mitogenic properties which has been characterised as a co-factor-independent proline racemase [2]. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.77 0.70 -5.60 14 1248 2012-10-03 03:02:41 2003-04-07 12:59:11 6 8 740 7 380 1522 561 313.20 35 95.24 CHANGED llDsHspGEssRllsGGssslsGsThhE+ppahtp-h..DtlRphlhhEPRGpshhpuslLhPPpcP-AshGhlhhEsssh.sMsGpsoIslsTsLlEsGllshp...pPtscl...L-sPuGlVcspscscsG...psppVplpNVPSFhathDstl-VsGl.GclpVDlAYGGsaaAlV-spphGhslssspAp-Lsshu.tlppAlscphth.HPcts-hstlsas.hsussspsps..suRNsVhhsstthDRSPCGTGoSARhAsLtA+GpLcsG-palpc.SlIGStFcG+l.thscluG+s............AIlPpIuG+AalTGhsphhlDPsDPaspGap ...............................lDsHssGEPsRll....h...u..G.hP..pl....G..tT.hhE+p..p.a.........h......tp.ch...........DtlRphLhhEP.RGassM.GulLs.P..Ps.......c....s....c.ADhGV.lFhcs......s......G.h.h.s.M..CGHuoIulsT.s.hl.E...p.........G..hls.s.p...........ps..t.....p.....hh..l.-.o..P.uG.l..V.ps.ph...p.h....c.su........cst...pVohp.N.V......Pu......Fh....h...p...p...sl.p....l.........-l...........s......s.........h.G......p.....lp.sD........lAaG..Gs.aY....ulV-....spp.h....G..h....c...l..ssp.s.....sppLhphuht....l+p..sl....pp....p....h..t....h.....H.Pp.t..slptlst..l.hh...ss.....s.....pptts...............ss+NsVhh............s.....s............s...t............l............DRSPCGTGT.SA+hAtLhA+GcLphG-.p.ah.pc.SI....l....G...S.h......F.....pGc......l....p..t.....s..p..l.u...s.h..............AllPplsGp.AaloGhsphhlD.s.p.DPhstGF...................................................................................................................................... 0 106 211 297 +5373 PF05545 FixQ Cbb3-type cytochrome oxidase component FixQ Moxon SJ anon Pfam-B_7570 (release 8.0) Family This family consists of several Cbb3-type cytochrome oxidase components (FixQ/CcoQ). FixQ is found in nitrogen fixing bacteria. Since nitrogen fixation is an energy-consuming process, effective symbioses depend on operation of a respiratory chain with a high affinity for O2, closely coupled to ATP production. This requirement is fulfilled by a special three-subunit terminal oxidase (cytochrome terminal oxidase cbb3), which was first identified in Bradyrhizobium japonicum as the product of the fixNOQP operon [1]. 22.80 22.80 22.90 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.19 0.72 -4.32 127 974 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 905 0 280 665 58 46.80 27 73.61 CHANGED h..............shhpuhhslhhhlhFlGllhWAa.pspp..+ppa--AAplPFc..-- ........................shhpuhhslhhhlhFlullhasa..pscp...+ppa--uAplshp.D-........... 0 64 169 223 +5374 PF05546 She9_MDM33 She9 / Mdm33 family Wood V anon Pfam-B_35269 (Release 8.0) Family Members of this family are mitochondrial inner membrane proteins with a role in inner mitochondrial membrane organisation and biogenesis [1]. 20.50 20.50 21.00 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.31 0.70 -4.78 17 144 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 138 0 112 152 5 193.70 44 44.52 CHANGED pphu..++hphhhDslQpslhsAopsLNDlTGYSuIEcLKpsIpphEpcLcps+ppl+psKttYspAIpcRSpSQREVNELLpRKcsWSPsDLERFTpLYRsDHsNpppEp-uppcLp-uEpcs-plpspLhpuILoRYHEEQIWSDKIRRsSTWGTahLMGlNllLFllhQLllEPWKR+RLVtuFE-KV+pAlct.tpppphthpphl ..............................s..hscphsphhDslQsslhsAsppLNDlTG..YSuIEpLKpplpt.Epclcps+ppl+pAKpsYpsAlspRusSQREVN-LLpRKcsWossDLER.FTpLYRsDHtNEptEtcupptLspAEpct-chtspLtpuILsRYHEEQlWSDKIRRhSTWGTauLMGlNllLFllhQlhlEPW+R+RLVpuFE-+Vppslcc.pt.........th....................... 0 32 65 98 +5375 PF05547 Peptidase_M6 Immune inhibitor A peptidase M6 Studholme DJ anon Merops Domain The insect pathogenic Gram-positive Bacillus thuringiensis secretes immune inhibitor A, a metallopeptidase, which specifically cleaves host antibacterial proteins. A homologue of immune inhibitor A, PrtV, has been identified in the Gram-negative human pathogen Vibrio cholerae [4]. 23.70 23.70 23.70 23.70 23.50 23.60 hmmbuild -o /dev/null HMM SEED 646 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.02 0.70 -6.49 5 820 2012-10-03 04:41:15 2003-04-07 12:59:11 6 51 467 0 195 739 218 459.10 29 56.97 CHANGED susQssasGsVRoDKVLVLLVEFuDhs...........HNsIsKpss.....pMYp-DYspEHYQDMLFGccsYoh.cGcslpShKQYYEcQSGGSYoVDGpVocWl+VPtsAAcYGuN.usGsDNpuP+uARDLVKEALc.......sAs-pslDLoQFDQaDRYDhNGDGNhNEPDGlIDHLMIIHAGVGEEAGGGsLG-DAIWSHRapluscshulEGTpusVs....haGGchAAaDYTIpPEDGAlGVaAHEYGHDLGLPDEYDTpYTGsGEPVuaWSlMSSGSWuGKIuGTEPTuFSupsK-FhQKslGGNWhNh.plDhsKLssspG+sssLDQosTKSsRPshV+VsLPpKoVEsIKPApGcatYYSs+GDDL+NTLST.slDLTsuTsAcFcFKuWY-IEADYDFlcVch..VopDGspThh-csGcpssssstcsss.....sGK...WIDtsYDLSsatGKKVcLpF-YlTDGGLAMcGFhlD-luLTVDGcssFSDDAEG.TSpasLsGFT+suG..T+cpsHYYllEWRNHsGoDsGLtp.........h.+aucthuYssGLVVWYsDsSYADNWVGlHPG+GFLGVVDSHPcALVhspsGclAcs...RaQltDAAFShcKTsuhplso...uTsuTasssuLuussoFDDc+sYhspQlPDuGRKlPchGLKhcVluQAcDsSsGsV ....................................................................................................t.............phhslLh-asD.......................................sth....t.............h......p...a..taapphh...as.p.................................u...p...p...h..........oh+pYapppSsspass........s....G...t.l..t......t.Whps.s.tptu...YGss.......s.s....p.........s................s....................s.......t...sp-h.lp-Alt.....................hsh..t...sh.slupaD....ctYD.susGs.pp.sDG.hlDplhllH.AGhGp...ps.G..G....G.h......t....s......s..AIWuH+......t...h....s............t................h......s..h....p..G.....sp.......th.................hhs.s.th...t.s...h..-...Y.....s......h..p.............P...........s.......u........u......hGVhsHEaGH.s.L.G.LPD.Y..D.T.............s...G...t.s..........p...s............l.....t.WolMuuGS..W.....s..G.......ph....s......s.s.t..ssshss....pt.a.p.th.....s....W..hp................h......t..t........................................................................ts.....Gp........aaSs...pu.ss.h...p.s....p...hpp......h....slst...s.s...p..A...p...h...phc...shaplE.........t.....p.......aDah.V....hs.sGt.......shhp................h....p...s......s....t..sts.........................................st........WhchphD.L..ota..tG.p.p.l.pltFpYhTD........uh......s.......h......p......GhhhDshtl...p......s...st..hh.sshc.....s......th...........h.........................G..a......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 83 136 171 +5376 PF05548 Peptidase_M11 Gametolysin peptidase M11 Studholme DJ anon Merops Family In the unicellular biflagellated alga, Chlamydomonas reinhardtii, gametolysin, a zinc-containing metallo-protease, is responsible for the degradation of the cell wall. Homologues of gametolysin have also been reported in the simple multicellular organism, Volvox. 20.60 20.60 21.40 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.17 0.70 -5.50 6 219 2012-10-03 04:41:15 2003-04-07 12:59:11 6 11 37 0 180 218 11 252.00 23 45.09 CHANGED RLLVhILDYSoC...Ga..usolTE-plRslFLGPNpDGsGGlApKYspCSYsKFuLN.sTAFhsVt.VslsCoosVT................uoCS...........WWslSp............tADsAA+All.............GlsAF..uoFoHasYVLPPGlp..CuWAGLAll.P.G+psaLpoSuYGlpRWu......TlMQEAlHNYGLWHSWRNshEYEDaSTAM......G....RGsACPNAuEhSRlGWATP...........AsGG.Gslsuushsss....GostsasLPAThlTGDsNaLRV...lPsWLsshhNuTsAKNLYluhRVsKsG....DuALsupausKVpVHEVNATMDN.uhsspahpSDR+IpFluusssho+usLsA ....................................................................................................................h...h...................................s..htthh.t.CShsph.h....t.s.......hhh....l....s.Cts.............................................ps.s........................h...h.shtt.........................h.u.c...th.s...ps................G....sh.......ss.ap...+.hhalhP...s..t...........C..s....a..s..G.....hu.l....s....G..p............s.h...h.t.h.s..s....h......p..h.s............................slhpEhhHNh.G..LhHu...........h............p............s........s................h.............EY....s...D.os...sM..........G...........pu....t.......C......N.As.ph...........phGW..hss...............h.........tls..ssth..............u.hh.thtl.sst.....s....ts.hlhl............sh............................................................................hahthR.tt.s.........................D....h............................................................................s............................................................................................ 0 70 174 179 +5377 PF05549 Allexi_40kDa Allexivirus 40kDa protein Moxon SJ anon Pfam-B_7591 (release 8.0) Family \N 21.20 21.20 21.20 22.10 21.10 21.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.85 0.70 -5.10 7 19 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 0 0 20 0 234.20 36 68.90 CHANGED pRTFFSNhshAL-ATpsLLsaVPPsRYslPssoLPLDELYGQLHALHpNSLEWLTHIsHssDpllshLNshs..u......psssLuclRsslppLsphlppltss.pphphphppsssoc.hpphpsl-TpLptLHt+lc.hsps.spssss.Pssossssss.sstsscs.LPhYQAtHPTt.CRoYGollasGss.+IPMDIhGRPASTAL+LplplsssspsTpVsYclhDDGhLLLS--lcTtHKLpH..SDsLALLHp+CPNFIYKI+scsLC ........pRsFFoNhshALsuTpsLlsaVPPsRYslPssoLPLDELaG.LHALHpNoLEWLTHIspss-plls.hssh...s......pss.Lscl+stlpsLsphlpplpps.pp...phpspp.tspp.hcplpulcspLttlp..h+lp.hspp.sps.st.Psso.spssso.sststps.LPsapApHPothCRoYGollasGhsh+IPMDlhGR.sSTAL+Lplphp.sspsTpVpaclhDsGhLLhS-plpT.H+Lp+..uDsLuLLHp+CPNFlY+h+spsLC...... 0 0 0 0 +5378 PF05550 Peptidase_C53 Pestivirus Npro endopeptidase C53 Studholme DJ, Finn RD anon Merops Domain Unique to pestiviruses, the N-terminal protein encoded by the bovine viral diarrhoea virus genome is a cysteine protease (Npro) responsible for a self-cleavage that releases the N terminus of the core protein. This unique protease is dispensable for viral replication, and its coding region can be replaced by a ubiquitin gene directly fused in frame to the core. 20.70 20.70 21.10 23.00 20.60 19.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.16 0.71 -4.79 11 688 2009-09-11 10:39:17 2003-04-07 12:59:11 6 19 89 0 1 634 0 153.70 75 17.82 CHANGED MELlcFELLYKTuKQ+PlGVhEPVYDpsGcPLFGEhScIHPQSTLKLPHcRGcA-l.TsLKsLP+KGDCRSGNppGPVSGIYIKPGPVaYQDYpGPVYHRAPLELFsEoQhCEVTKRIGRVTGSDGKLYHlYVClDGCILLKpAoRspscVLKWl+NhLDCPLWVTSC ...........................................................MELhsNELLYKTYKQKPsGVEEPVYDpsGsPLFGE+.u.sIHPQSTLKLPHcRGct-VsTNLtSLP++GDCRSGNs+GPVSGIYlKPGPlFYQDYpGPVYHRAPLEhFcEus.MCEsTKRIGRVTGSDGKLYH..IYVClDGCIl.lK.As+s.p.clL+WlhNhLsCPLWVoSC.................... 0 0 1 1 +5379 PF05551 zf-His_Me_endon Naegl_SSU_RRNA; DUF1519; Zinc-binding loop region of homing endonuclease Moxon SJ, Coggill P anon Pfam-B_7681 (release 8.0) Domain This domain [1] is the short zinc-binding loops region of a number of much longer chain homing endonucleases. Such loops are probably stabilised by the zinc and may be viewed as small but separate domains. The common structural feature of these domains is that at least three zinc ligands lie very close to each other in the sequence and are not incorporated into regular secondary structural elements. The biological roles played by these small zinc-binding domains are presently unknown [2]. 27.00 27.00 27.20 27.20 26.80 26.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.21 0.71 -4.17 11 70 2012-10-05 18:28:12 2003-04-07 12:59:11 6 2 41 18 21 82 4 122.00 30 40.93 CHANGED sCah.psspsst.psGY.plsh+ss..................tGpphYsH+l.shhAsscs.......t.h..h.tt.....uhplSHLCtNutChsPsHLllEspslNppRpsCp...tpsp.h.tst.hhpsC.H.....pPpC..lhshtthscshp ..........................................................................Ch..tsth....psGa.+hphhsp......................ttsp.YhHpl.shhAssptt...........phh.hl.psch.........uhplSHLC.........pNu.....tChpPsHLhlEs+s.NccRppCp............t+...h..ps...th.hhtsC.....H........pspC..hlshhhhsp........................................... 0 12 17 20 +5380 PF05552 TM_helix tm_helix; Conserved TM helix Yeats C anon Yeats C Family This alignment represents a conserved transmembrane helix as well as some flanking sequence. It is often found in association with Pfam:PF00924. 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.33 0.72 -4.30 143 2106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 1303 14 507 1422 120 52.30 28 24.15 CHANGED tpslsshhspll....salP........plluAlllLllGhllucllpphl.......splLpphsh.Dptls ....................p.hss..sh.ll......sahs........NlluAllIlllGhllA+hlsshV.......s+lhtpt.pl.Dttl....................................... 0 135 297 397 +5381 PF05553 DUF761 Cotton fibre expressed protein Moxon SJ anon Pfam-B_7657 (release 8.0) Family This family consists of several plant proteins of unknown function. Three of the sequences (from Gossypium hirsutum) in this family are described as cotton fibre expressed proteins [1]. The remaining sequences, found in Arabidopsis thaliana, are uncharacterised. 19.00 19.00 19.00 19.00 18.90 18.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.82 0.72 -4.59 85 622 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 28 0 391 568 2 34.10 34 13.27 CHANGED sps-.lsp+AEcFIp+F+cph+.LQ+...........pShp........p..hpphls.+u ......pp-.lDp+A....-cFIp+Fpcp....h+.lQ+............Sh........t.............................. 1 33 211 313 +5382 PF05554 Novirhabdo_Nv Viral hemorrhagic septicemia virus non-virion protein Moxon SJ anon Pfam-B_7684 (release 8.0) Family This family consists of several viral hemorrhagic septicemia virus non-virion (Nv) proteins. The NV protein is a nonstructural protein absent from mature virions although it is present in infected cells. The function of this protein is unknown [1]. 25.00 25.00 228.90 228.80 20.30 17.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.54 0.71 -4.58 3 37 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 3 0 0 29 0 122.00 86 99.65 CHANGED MTTQSAHSTTSFSPLVLREMITHRLTFDPSNYLNCDLDRSDISsTDFFETTLPRILcDLRASTRLPYLHVLDMRISLLERTHYMFRNVPSSPATTGRhSDPELlIISHAEMtlLTsGSESTS MATQPALSTTSFSPLVLREMITHRLKFDPSNYLNCDLDRSDISThDFFETTLPRIL-DLRASTRLPYLHVLDMRISLLERTHYMFRNVPSSPATTGRLTDPGLlIISHAEVGlLTsGSGLTS. 0 0 0 0 +5383 PF05555 DUF762 Coxiella burnetii protein of unknown function (DUF762) Moxon SJ anon Pfam-B_7710 (release 8.0) Family This family consists several of several uncharacterised proteins from the bacterium Coxiella burnetii. Coxiella burnetii is the causative agent of the Q fever disease. 25.00 25.00 202.60 202.40 21.30 20.60 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.65 0.70 -5.14 3 27 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 7 0 3 18 0 237.60 53 99.52 CHANGED M..saFhh.PKIDaQ-+......oNSQss.EcTLQEWLRGA..NssEcAslFIuSspppphPIhu-pp.sSuusLPKLcIc-IcsSIKhVcNpFKKaGLKDEsIlNYIL+aGGINGYsSLGpssLcclsplpGsspcsEN+.........+pTYhV+uKscVpYIEpFcllpls.hDRNsIGcF..luEVK.sollSKsG-IIHsCKKVplts.sApLchFKs+FGsQLphVETI+++LhElLsSLYpRlsplFN........EspNccsRssKpRhPGhu .........l.cIDhpph......psSpss.EcsLpEWLRGA..ss.EcAplFIsss.....PIhupp..sSuusL.KlcI.-hhssIh...s.FKh.GLKsE.IlNYILpaGGIsGYhoLGpsshcclshspGpspcsEN+.........+hoYhV+sKscVpYIEpFchhphs.hDRNpIGcF..lupVK.sollSKsG-I.HsCKKVplts.stpLp.F+p+FGsQLphVpph+h+LhElhsSLhpR....Fp........tthNctsRssppphPhhp. 0 3 3 3 +5384 PF05556 Calsarcin Calcineurin-binding protein (Calsarcin) Moxon SJ anon Pfam-B_7783 (release 8.0) Family This family consists of several mammalian calcineurin-binding proteins. The calcium- and calmodulin-dependent protein phosphatase calcineurin has been implicated in the transduction of signals that control the hypertrophy of cardiac muscle and slow fibre gene expression in skeletal muscle. Calsarcin-1 and calsarcin-2 are expressed in developing cardiac and skeletal muscle during embryogenesis, but calsarcin-1 is expressed specifically in adult cardiac and slow-twitch skeletal muscle, whereas calsarcin-2 is restricted to fast skeletal muscle. Calsarcins represent a novel family of sarcomeric proteins that link calcineurin with the contractile apparatus, thereby potentially coupling muscle activity to calcineurin activation [1]. Calsarcin-3, is expressed specifically in skeletal muscle and is enriched in fast-twitch muscle fibres. Like calsarcin-1 and calsarcin-2, calsarcin-3 interacts with calcineurin, and the Z-disc proteins alpha-actinin, gamma-filamin, and telethonin [2]. 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.97 0.70 -5.17 14 220 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 47 0 93 173 0 230.20 35 93.44 CHANGED Ms.tst..spp++ppustIhp-lst.stp-.t.......pLDLGKKlSlP+DlMLEELSLhsNRGS+hFKhRQ+RVEKFhaEs.......hsspspsphpphsss.ssp...............tph....ust...s.s...sst.ht.....sPpsluPG..usPhKtlP...............................................scKhppsslsKoYhSPW-cAhss-.-hltshtsphPtP.ttp.p.scYKSFNRsAhPFGGac+As+hhshphPc.hp.s...s..hssh.pslssRPSFNRTPhGWhspt....h............lshssEo--L ....................................s.pp+t.s.tlhh-lp................thsLGKKlSlP+DlMLEELSLhoNRGS+hFchRQ+Rs-KahaEs.............hpsps..h.s.....p..h......p..p.hs..ssp................................th......ut...t.......s.s.....ssss.tt..........sspshuPG..usshpthP...............................................scchppsslsKsYhSPWc..pAh.usc.phhtsht.ch.t....p.phscY+SFNRsAhPFGGhppusph..hp..hphPc........s...h.h.p.lstRPSFNRss.GWlsp............................................................................................................ 0 4 15 40 +5385 PF05557 MAD Mitotic checkpoint protein Moxon SJ anon Pfam-B_7761 (release 8.0) Family This family consists of several eukaryotic mitotic checkpoint (Mitotic arrest deficient or MAD) proteins. The mitotic spindle checkpoint monitors proper attachment of the bipolar spindle to the kinetochores of aligned sister chromatids and causes a cell cycle arrest in prometaphase when failures occur. Multiple components of the mitotic spindle checkpoint have been identified in yeast and higher eukaryotes. In S.cerevisiae, the existence of a Mad1-dependent complex containing Mad2, Mad3, Bub3 and Cdc20 has been demonstrated [1]. 30.20 30.20 30.50 30.50 29.30 30.10 hmmbuild -o /dev/null HMM SEED 722 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.47 0.70 -13.42 0.70 -6.44 5 348 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 251 6 232 343 4 476.80 20 85.32 CHANGED DDlcssTTshhst.ts.SplRSths+FLussLcuot.sLusSss...............suuSLpKQsppShsppc.........cAEpI+oKupLIQlEpEltptELcHKRAplELE+cuossA-pYE+Esc+NpELps+lKsLcEpEsshcschpEspEct+th+pKl.........DtsopKLppEKpDptp-A+-sluslsuclSEhQhpA.st-splpsLEoEhp-L+EQLEppp++h.......sEu-cKlQuLpsttsppu-pss+IKcLEpcLpphEs-stlVKo.+pcLhplPcLE+ElppL+-ENc+L+oh+csstLLcEElp-Lco+LERtEch+-clssLELEpEKLpsELpSWcsLtQshsL..sLsTP-DlSp+lstLQpc-lpLsE+ssSlsSss+pLEsopQsLQcchppssupltEt+cKpEcp+shsRRLQ++lsLlTKERDthRAlLcSYDcEpT.ostSsph.p+L.csEDllQcVcsapuchEs...pL..c-sp--lulQKc+scsLcpElchL+pQhsss-p.h..spEtssu...LRh+l-oLEuEpuRLRpEpplLEMcht+hsLpGDYshucTKVLHhSpNPuucAcppp+sslE+LQAEs-+LKcll+tLEcsssps.sDsp.ssuSplouKElA-LKKQVESAEtKpQRLKEVFpsKIpEFRcACYpLhGY+ID.l........sssopYRLTShYAEcc-DsLlFcusuSosu.MpLLEosaStolschI-lalccpsSIPAFLSALTLELFsRpT ..............................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................t..............................................................t......t............p...........t.................t..p...........tt....h.....................t...................ht.....t...............tt................................................................................................t...p..t.hpt..tt.....hlppp..t.ptp......ht......p.................th...t.php...h.tph..t.h........h.t....t........s............................s....s...............t...l................p.......h.............t...p.p.h.h.htp.st.hp........p.......htt.......hp....ptht...ph...t...p.h.........t....t.h.p.........p....................t......t............t......................p..t...........h..t...cl...ppph.lh.cEhchh.+....t...ltt..h...p.t......p....t........t.......t...................................tt....p.hpt...........t............................h.....p.h.....t.....h........p..t......t............t....t......tt.............t.t....................h..p.tplptp..........pthpp..pht........hp...p...l..t..p..............................t...........tph+lLp.hp.sPh...t.....t....p.php.LptE...tp.L.h..t.l.......t..........t..........t..........................t.h..............h......................................t....................h.....tph.c.tp...l..t...p...h...p....h+..pR......LK...pla...pt+..p-F+..csshtlh.........Gaplc.h...........hsp........s...p.h+lpSha.......................t.....p......p...p.............l...h....p..h.........t.....t...s................h.......................................t...h.........ht..l.t....t.phPshh.ush.shp.h...................................................................... 0 80 130 193 +5386 PF05558 DREPP DREPP plasma membrane polypeptide Moxon SJ anon Pfam-B_7798 (release 8.0) Family This family contains several plant plasma membrane proteins termed DREPPs as they are developmentally regulated plasma membrane polypeptides [1]. 23.20 23.20 27.30 25.70 23.10 23.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.45 0.70 -4.47 7 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 20 0 18 49 0 185.70 48 97.38 CHANGED MuYWKoKVLPKIKKlF.-KsG.sKKAAAAEhpKoFD-uKEthsKEFE-KKsELQPKVlEIYEAussEIKsLVKE..c.uGlKKpostVpKFl-ELsKIEFPGuKAVSEAsuKhGPuhlSGPlhalhEKVSTFls...pEpK.cEts............sAspspsptps........tscEK-lVlE.EcKKEEtAsPs.........ts.ss...-.t.tcc.spsssAsA..sEP.Ks .......MuYWKoKVLPKlKKlF.-Ksu.sKKA.AA.AEhhKoFDEuKEplsKEhEEKKTELpPKVVElYEAussElK.sLlK-..Ksuul.KKNSsuVpKFL-ELsKI-FPGuKsVSEAsuKhGsuhluGslhFlhEKVusFlP....cE.K..tcE...s..............sA.sptpptp.s................tspEKc.hsE..EtKccEtsss.s.................ttts.ss.s.s-tt.....tc...tt..tssss....stP.K............... 0 2 8 13 +5387 PF05559 DUF763 Protein of unknown function (DUF763) Moxon SJ anon Pfam-B_7805 (release 8.0) Family This family consists of several uncharacterised bacterial and archaeal proteins of unknown function. 20.00 20.00 22.00 63.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.11 0.70 -5.48 30 177 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 170 0 106 193 49 316.90 44 82.83 CHANGED GsA-LPLHsG+VP.WLhpRMp+LupsIs-lllcEYGscclLcRLucPhWFQuFusllGMDWcSSGoTTsshGsLKcsLs..sc-.lGlhVsGGKG+putpTPcELptlu-+hsLDu...ppLspsSRLsAKVDssslQDGapLYpHsFllo-cGcWuVlQQGMNscp+hARRYHWhust..psFsppPHsuIsG.hppstlLNlss+cuccsRcshl-LlpE.sPs+.lhpphpphhshhp............................h.h.tt+.lhtpslshchhtpsLptshEhsPpcFc-lLhlpGlGPpTlRALuLVAElIYGsPssapDPs.......+auaAhGGKDGhPaPV ................G.ADLPLHsG+VP.WLhpRMp+LuthIschllccYGtcclLcRLucPhWFQuFusVhGMDWcSSGhTTsshGALKcuLs...sp-lGlhVsGGKG+pSRpTPpELhtlu-phulDu...ppLspsSRLsAKVDssAlQDGapLYhHuFlloccGcWsVlQQGMNscp+hARRYHWhup......th.ps..FlppPHsuIsG..hppsp.ll...NLss+cuttsRps.l-Llp-.sPs+.lhpph.ppltsh.t.......................................................................thhhPsc+plhtp...s...lsh+c.lt..tsLtts.hEtsPpsFc-LLhl.GlGs+TlRALALVAEllaGsPspapDPs.............+FSaAhGGKDGhPaPV.......... 0 43 68 83 +5388 PF05560 Bt_P21 Bacillus thuringiensis P21 molecular chaperone protein Moxon SJ anon Pfam-B_7820 (release 8.0) Family This family contains several Bacillus thuringiensis P21 proteins. These proteins are thought to be molecular chaperones and have mosquitocidal properties [1,2]. 25.00 25.00 367.20 367.10 23.80 17.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.22 0.71 -4.90 2 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 0 0 11 0 182.00 93 99.96 CHANGED .sENtsFYKIFThcNNNhChNsTLLE+lFKNNl-EFDFSLVKpNLEHEKNCVITSTMNQTI.FENMNSpEMGpKsYSFhNQTVLNNKGNoSLEEQlSsIF.RCVYMpstKSSSYIK.LEQD.NplchhsSLlFIsPY+pNlh.IhPVsLpLTLlsKNVKpsS.pNlFSGDhHFNMVTMTaLT MTENGVFYKIFTTENNNFCINPTLLERVFKNNLDEFDFSLVKKNLEHEKNCVITSTMNQTISFENMNSTEMGHKTYSFLNQTVLNNKGNSSLEEQVSNIFYRCVYMEVGKSSSYIKPLEQDSNKIRYVCSLLFIVPYKNNITSIIPVNLQLTLLSKNVKQSSSTNIFSGDIHFNMVTMTYLT 0 0 0 0 +5389 PF05561 DUF764 Borrelia burgdorferi protein of unknown function (DUF764) Moxon SJ anon Pfam-B_7823 (release 8.0) Family This family consists of proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 58.90 58.80 21.90 16.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.12 0.71 -4.85 2 127 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 28 0 12 99 0 175.50 65 97.09 CHANGED MIlsLsp.lpaLI+IhpsFKhYh..pphEh-IlNTYNHPYLpKhsTsosNllsLK.-uhEtLhs+s.+sts.hcph.EFplpFplYhlshVL.pt.hDu.pphhhlYthh.-FLHpphaKaphppp.ps-.h.hlsaYlh.hSNhpssGLlslushauNhsaShs.hF...VtsIplLKpE+ ...MIhTLDhlLNHLhpIFKGFKAYATENNFECDIINTYNHPYLSKITsuSSNIIALKFDGTEsLFDHNsRuGsFYENALEFSlNFQIYIIAIVLNApDFDANSRMLhLYuMLS-FLHN+.s.HKYTLtpp...QP-YlsKINFYIYPhSNMQTVGLINLGTKYSNHAYSASlAFNASVKsIEILKEE.h...... 0 8 8 8 +5390 PF05562 WCOR413 Cold acclimation protein WCOR413 Moxon SJ anon Pfam-B_7803 (release 8.0) Family This family consists of several WCOR413-like plant cold acclimation proteins. 25.00 25.00 45.40 25.20 24.60 23.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.16 0.71 -4.76 15 123 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 37 0 50 124 0 164.80 42 87.45 CHANGED LsMKo................stsussllsSDhcELssAA......+KLAsH...AlpL.....uuLGFGso...hLpWlAshAAlYLLlLDRTNW+TNhLTuLLVPYIFhoLPollFshlRG-lGpWIAFlAlllRLFFP++FP-a....LELPuuLILLlVVAPslhAsphRso..hlGssIsLsIuCYLLpEHI+uSGGF.+sAFs+usGVSNolGIllLh .....................................................l.tchpthhhus......cphusp....shhh.....suh.shuss...hLpWlushAAlhLLllccstW+..TshhsuLLVPYlhhphPsslFshhRG-hGp..WlAFlAlllRLFFsppFPs.....LELPsu..hlLLllVAPphhssh.Rss..h..hGshlsLhIusYLl.pHlptuGGh.+puFspupslussluIhll.................... 0 11 35 43 +5391 PF05563 SpvD Sal_SpvD; Salmonella plasmid virulence protein SpvD Moxon SJ anon Pfam-B_7864 (release 8.0) Family This family consists of several SpvD plasmid virulence proteins from different Salmonella species. 25.00 25.00 150.00 149.90 18.60 17.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.49 0.70 -4.83 3 25 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 15 0 1 12 0 202.20 98 100.00 CHANGED MRVSGSASSQDIISRINSKNINNNDSNEVKRIKDALCIESKERILYPQNLSRDNLKQMARYVNNTYIHYSGNCVLLSACLHYNIHHRQDILSSKNTASPTVGLDSAIVDKIIFGHELNQSYCLNSIDEVEKEILNRYDIKRESSFIISAENYIAPIIGECRHDFNAVVICEYDKKPYVQFIDSWKTSNILPSLQEIKKHFSSSGEFYVRAYDEKHD MRVSGSASSQDIISRINSKNINNNDSNEVKRIKDALCIESKERILYPQNLSRDNLKQMARYVNNTYVHYSGNCVLLSACLHYNIHHRQDILSSKNTASPTVGLDSAIVDKIIFGHELNQSYCLNSIDEVEKEILNRYDIKRESSFIISAENYIAPIIGECRHDFNAVVICEYDKKPYVQFIDSWKTSNILPSLQEIKKHFSSSGEFYVRAYDEKHD 0 0 0 1 +5392 PF05564 Auxin_repressed Dormancy/auxin associated protein Moxon SJ anon Pfam-B_7941 (release 8.0) Family This family contains several plant dormancy-associated and auxin-repressed proteins the function of which are poorly understood [1]. 25.00 25.00 25.70 25.00 24.00 22.40 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.94 0.71 -3.50 21 211 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 59 188 1 110.90 35 91.34 CHANGED LWDDlVAGPpP-p.GLGKLR.+hosps..lslpt.s-up................sus..hpRSlohss..................sPsoPsTP..oTPs..oP.ouR.p.-NVWRSVFpPGSN.uT+shGuphFDKPspPNSPTVYDWLYSs-TRS+HR .......................................................LWDDsVAGPpP-p.GLGKLRKh.ohps.....shp...stut..........................tps.thp+Slsh.p........................sPsSPu.ss....ooPs........oP..h....o..sc....c.pps.....WRshh.pss..p..t.tpt.t.usph..pp.s...tPpoPTVYDWh.......................................................................... 0 13 38 49 +5393 PF05565 Sipho_Gp157 Siphovirus Gp157 Moxon SJ anon Pfam-B_7948 (release 8.0) Family This family contains both viral and bacterial proteins which are related to the Gp157 protein of the Streptococcus thermophilus SFi bacteriophages. It is thought that bacteria possessing the gene coding for this protein have an increased resistance to the bacteriophage[1]. 28.60 28.60 28.90 28.60 27.50 28.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.88 0.71 -4.66 29 450 2009-11-04 11:20:49 2003-04-07 12:59:11 6 1 390 0 49 359 53 153.40 29 97.10 CHANGED hpLYELsspatplh.phhpp...hDs-tltDTL-u..lppsh-sKs-shsplI+sl.......-uDscslKtEtcRLt-++Kuh-sclcpLKsYLtptMptsshc+l+s..shholulpKstsuVpl..--stlPscYh.....ts.KlDKpsltc.....sLKsGcclsGApLcpsc.sLpIR .....................................pLY-Lsspatpl.................p...........h-s...-..h....l....t..D..T.L-u.......lpsphcs....Ks-shsphl+sh.......puchc.hhcpEh+RLp....p....++.c...shpspscpLKsY.......LtptMpts......s..hc.....+l.cs....shhplsl.p.Ks.s..S..l...pl.....hD....E.........sh.l....Ptcah.........ppp.+hDKptltc.....sL.K.s.GpclsG.AcLhp.sc..sLhI+........................................ 0 14 28 36 +5394 PF05566 Pox_vIL-18BP Orthopoxvirus interleukin 18 binding protein Moxon SJ anon Pfam-B_7955 (release 8.0) Family Interleukin-18 (IL-18) is a proinflammatory cytokine that plays a key role in the activation of natural killer and T helper 1 cell responses principally by inducing interferon-gamma (IFN-gamma). Several poxvirus genes encode proteins with sequence similarity to IL-18BPs. It has been shown that vaccinia, ectromelia and cowpox viruses secrete from infected cells a soluble IL-18BP (vIL-18BP) that may modulate the host antiviral response. The expression of vIL-18BPs by distinct poxvirus genera that cause local or general viral dissemination, or persistent or acute infections in the host, emphasises the importance of IL-18 in response to viral infections [1]. 21.60 21.60 22.30 22.20 20.80 21.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.74 0.71 -4.12 2 55 2009-09-10 15:00:19 2003-04-07 12:59:11 7 1 18 1 0 49 0 123.30 86 99.62 CHANGED MRILFLIAFMYGCVHsYVNAsEhKCPNLsIVTSSGEFhCoGCVcaMPpFSYMYWLAKDM+SDE.sKFIEHLG-GIKEDETlpThDGtIsTLpKVLHVTDTNKFspYRFTCVLTTlsGVSKKNIWLK ..MRILFLIAFMYGCVHSYVNAVETKCPNLDI...VTSSGEF+CSGCVEHMPcFSYMYWLAKDMKSDEDTKFIEHLGD.GIKEDETVRTpDGsIsTLpKVLHVTDTNKFAHYRFTCVLTTlDGVSKKNIWLK........ 0 0 0 0 +5395 PF05567 Neisseria_PilC Neisseria PilC beta-propeller domain Moxon SJ, Bateman A anon Pfam-B_7966 (release 8.0) Domain This family consists of several PilC protein sequences from Neisseria gonorrhoeae and N. meningitidis. PilC is a phase-variable protein associated with pilus-mediated adherence of pathogenic Neisseria to target cells [1].\ This domain has been shown to adopt a beta-propeller structure [2]. 21.10 21.10 21.70 21.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.18 0.70 -5.40 9 608 2012-10-05 17:30:42 2003-04-07 12:59:11 6 13 415 2 192 644 232 326.40 25 29.43 CHANGED LGDIVNSPIVAVGt.....YLA........................TSANDGMVHIFKps.GuDcRuYNLKLSYIPGTMPRKDIpN.....p-STLAKELRsFAEKGYVG..DRYGVDGGFVLR+l..-.stQcHhFMFGAMGhGGRGAYALDLoKhDuN.P..ssssLF.....DVKcs...spNGpNRVc..LGYTVGTPQIGKTHNGKYAAFLASGYAoKc.IsSssNKTALYVYDLEs.sGT..lI+KIEVPGGKG...GLSSPTLVDKDLDGTVDIAYAGDRGGNMYRFDL.....SsssPsp.........WSVRTIFcGT..KP.............ITSAPAlS+LKDKR..VVIFGTGSDLSEDD..Vsss-tQaIYGIFDsDT..GT..sospsGpGsGLLEQsL..spEsKTLFLoshK ...........................................................................................................................................hGDllpS.........h..hs..s.....................hhh...........................................suANDGMlHhF.................s.....s......s.............s.......t.....................p............p....t.hu.....al.......P..................................p...sl.ht....p.....L....t......t..h...s....p..t....s......a.......t...........cp..Y.hV....D.G...s........s..h..............t....c..s................th.s.s................s..........h+.....shlhGuhstG...............G....+.............u..............h..............aAL....DlT...........t...................s..s...........s...s.h...t...hh....................................p.h.p...s.s.................t....t.....s...s..p.......LG..oh.u...p.P...............l........s........+.............h.............p............s......G...........p...........a....s..........s.......l.h..GsG......Y..........s.........s.......t..s.............................................................s....p......s..........u..........L............al...l...c........h.................p............s.....................Gs..............h...........l........t......p....l.........s....s....s..s...u....ps................G.L.....u...s....s..........s...l..l.....D.......p.............s....s..D.G...h.s.....D......h.....s...Y.......A...G........D.......h.....t.......Gsl.......WRF.D.l...............ss...s...ss.ss.................................h.s...h........t.................h.....t...s.........pP........................................Io.st...P....l............h............s...t..h.................h.l.h.h.GT.....G....p.........h....t..t.tD..........h...s..p...t.p.....thYul...h..........D..t.tt......................................................................ht............................................................................................................ 0 41 123 162 +5396 PF05568 ASFV_J13L African swine fever virus J13L protein Moxon SJ anon Pfam-B_7998 (release 8.0) Family This family consists of several African swine fever virus J13L proteins. 20.80 20.80 20.80 23.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.41 0.71 -4.76 2 103 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 12 0 1 95 2 169.90 80 100.57 CHANGED MDSEFFQPVYPRHYGECLSssssPSFFSTHMYTILIAIVVLIIIIIVLIYLFSSRKKKAAAAIEEEDIQFINPYQDQQWutVTPQPGhuKPAGAoTuSAGKPVhsRPsTN.....+PsTN+PVhDp.sMAsGGPtAASAsA........aPAE.YTTsTTQNTASQTMsA.ENLRQRsTYTHKDLENSL .............MDSEFFQPVYPRHYGECLSPVosPSFFSTHMYTILIAIVVLlIIIIVLIYLFSSRKKKAA.A.AIEEEDIQFINPYQDQQWAEVTPQPGTSKPAGATTuSs....GKPV.TGR......P.....ATNRPss.....s+PVT.....sN.....PV..........TDR.....LVMATGGPAA........AsAAA..................sAHPsEPYTTVTTQNTASQTMSAIENLRQRsTYTHKDLENSL................... 0 0 0 1 +5397 PF05569 Peptidase_M56 BlaR1 peptidase M56 Studholme DJ anon Merops Domain Production of beta-Lactamase and penicillin-binding protein 2a (which mediate staphylococcal resistance to beta-lactam antibiotics) is regulated by a signal-transducing integral membrane protein and a transcriptional repressor. The signal transducer is a fusion protein with penicillin-binding and zinc metalloprotease domains. The signal for protein expression is transmitted by site-specific proteolytic cleavage of both the transducer, which auto-activates, and the repressor, which is inactivated, unblocking gene transcription. Homologues to this peptidase domain, which corresponds to Merops family M56, are also found in a number of other bacterial genome sequences. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.98 0.70 -5.45 12 1807 2012-10-03 04:41:15 2003-04-07 12:59:11 6 87 857 0 421 2510 322 241.60 19 47.77 CHANGED hhlhthslssshslLllhllRhhl++hhushhsYtlWhlV.lthlhshh...............tsshthshspsPhstsssp.............shstsh.thshtslh.hL.hllWlsGsll.....htuhhhh+pp.hl+phs.hps....phLttstcpht...............sPhlhGhh+PpIllPssh..phsscEhchIlhHEhsHl+RtDhhhNhlsshhpslhWFNPllaluh+thchDpElACDtsVLst.ppcpR+pYucslLpshhusss..sshssph...sppsL+cRlhhl .....................................................................................................................................................................h...........................................................................................................................................................................................................................................................................................................................................h.................h...h....h....h........l.W.....l.h...h...h.....h..h..h.....h..h...h..h...h....s..hh.......h...t..................h......h..t....................h...........t....................t..hh............t........h..................p...ht..........................h..........l...h.........s....t....t....l.....t...s...P.....h..h......h.......u....h......h......c....s....h....l.l..l.P....p.t.............ph.....s..p.p.c.l....c.hIlhHEh......sHh.+.p.+D........h.l.hhh..l.....h.t.l....h..t.....h.l.h........W.F..NPh...l..a.hhh...pphppspEhtsD.ctVl........p..p.........t........p....p.tt....p.Yupsll....p....h..s...h..t.t...s.h..........................h..h...s....t..................tpp.lKcRlh............................................................................... 0 214 340 394 +5398 PF05570 DUF765 Circovirus protein of unknown function (DUF765) Moxon SJ anon Pfam-B_8063 (release 8.0) Family This family consists of several short (27-30aa) porcine and bovine circovirus ORF6 proteins of unknown function. 25.00 25.00 63.70 63.60 17.10 16.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.94 0.72 -7.27 0.72 -3.98 3 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 0 10 0 28.70 94 99.73 CHANGED MASSTPASPAPSDILSSlPQSERPPGRWT MASSTPASPAPSDILSRLPQSERPPGRWT 0 0 0 0 +5399 PF05571 DUF766 Protein of unknown function (DUF766) Moxon SJ anon Pfam-B_8021 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 25.00 25.00 26.10 25.70 24.00 24.00 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.29 0.70 -5.13 5 128 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 91 0 77 117 0 262.90 52 91.77 CHANGED ChGLYCGRTLLtsNSSp-h.....YS-CGACPRGpRoNsQphCsPCp-sLptYDWLYLGFMAMLPLllHhFFI-hstKs.pKpS+uplhpalSAllEsslAAllTlLlo-PlasL+IpSCcVphLSDWYThhYNPSPsYpTTV+CTQEAVYPLYTIVFVaYhFCLVhMhLLRPlLVsKI...LsVus+hK.ulYuALYFFPlLTlLHAVuGGLIYYuFPYIlLVlSLVuhAlHhSh.Kl-pohKsLl++.shp.++lllLhuHWLLLAYGllSL.ps.plcYDluLLs..LVPsPsLFYlFTlKFT-Pu .....C.GhYCG+sh..L.pss...............au-CG...sCPRGpRs.....Ns.pphCpPCs-sPphYDWLYLGFMAhLPLlLHWF..F.I-h...........hs...tK..pSpssLhpHloAlhEsshAAllTLLls-PlGsLhlpSCcVhh...LSDWYThLYNP.SPcYhsTl........HCTpEAVYPLYTIVFla..YAFCL.V.hMhLlR.P..lLl.pKl........Ls.+ss+....hK....SIYAALYFaPILTllpAVuGGLlYYuFPYIllllSllohAla.hSh..clcp..shc.............Lltp.........Kpl.llLhuHWLLaAYGIlSls..............php..p..hp....c...hshLs..LVPhPuLFYlhTs+FT-Ps................................................................ 0 27 32 56 +5400 PF05572 Peptidase_M43 Peptidase_M46; Pregnancy-associated plasma protein-A Studholme DJ anon Merops Family Pregnancy-associated plasma protein A (PAPP-A) is a metallo-protease belonging to Merops family M43. It cleaves insulin-like growth factor (IGF) binding protein-4 (IGFBP-4), causing a dramatic reduction in its affinity for IGF-I and -II. Through this mechanism, PAPP-A is a regulator of IGF bioactivity in several systems, including the human ovary and the cardiovascular system. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.62 5 563 2012-10-03 04:41:15 2003-04-07 12:59:11 8 39 269 10 378 715 221 143.40 28 22.67 CHANGED sspscsItsatW...DNpKYMNVaIQs-L.ssGuTsNSGsAWYPcoGMos-slARVsFNGtYLus...ssoSosFuuoLTHEFGHFLGLcHTFcGG....CccGsup......DcssDTPspsutch....usspslhN......CsG-hlNspNaMDYNs...CpsMFTQsQVsRMsssL- ................................................................................ttshth..............................................................................................................................................................................................................h.........h.....................h.......................h..........s.....h.....u.....p.....Ths..HElGHaLGLaHs...F..ps.s...............Cs.s......ht.......................Dh.l..sD.T...P.sp...s...p..s..p...s...........ss..s..t.s.h..ss...........................C..s..s...t......s....h............h.......p....NaMD..YosD.....sC.h.....s..p.F..TssQhsRMpt...t.......................................................... 1 151 254 320 +5401 PF05573 NosL NosL Moxon SJ anon Pfam-B_8116 (release 8.0) Family NosL is one of the accessory proteins of the nos (nitrous oxide reductase) gene cluster. NosL is a monomeric protein of 18,540 MW that specifically and stoichiometrically binds Cu(I). The copper ion in NosL is ligated by a Cys residue, and one Met and one His are thought to serve as the other ligands. It is possible that NosL is a copper chaperone involved in metallo-centre assembly [1]. 20.20 20.20 20.20 20.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.84 0.71 -4.50 50 701 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 544 2 243 576 51 139.30 25 73.14 CHANGED hhhLuuCscppss..t.s.sshplsspspC+hCGMhls-aPGPKuQlhhput..ps....haFsss+Dhauahh.....pPEps.+plpAlaVpDMup..ssWppPss....ppaIDAc......pAaYVhGSsppGuMG.splssFuscssAptFAsca.GGpVlpFc-Is.shl ........................................................................h...h.uCt.tttt.......s.htl.p.p.p.s.hC.t.hCsMsl..h..-..hs..t.tuplh..h....pss....cs...........hhFss.ht.shhtahh......................s..c....p.........s......c....p...h.ptlaV.pDhss..............................tpaI-Ac..........cAhYV..hsos.....h.h...u....s....M...G..shlsFusc.p.sAcpFspp...p....G...G+......lls.ac-ls...................................... 0 66 185 221 +5403 PF05575 V_cholerae_RfbT Vibrio cholerae RfbT protein Moxon SJ anon Pfam-B_8029 (release 8.0) Family This family consists of several RfbT proteins from Vibrio cholerae. It has been found that genetic alteration of the rfbT gene is responsible for serotype conversion of Vibrio cholerae O1 [1] and determines the difference between the Ogawa and Inaba serotypes, in that the presence of rfbT is sufficient for Inaba-to-Ogawa serotype conversion [2]. 27.90 27.90 27.90 28.00 27.80 27.80 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.81 0.70 -5.35 2 55 2012-10-10 17:06:42 2003-04-07 12:59:11 6 1 34 0 2 97 69 238.30 95 99.24 CHANGED MKHLIKNYVQKLIKTELDAIQSKSVHDNRNFIYNGEFLILESEFGhHCFPRVQLNHALSYKNPNFDLGMRHWIVNHCKHDTTYIDIGANVGTFCGIAARHITQGKIIAIEPLTEMENSIRMNVQLNNPLVEFHHFGCAIGENEGENIFEVYEFDNRVSSLYFpKNTDIADKVKNSQVLVRKLSSLDISPTNSVVIKIDAEGAEIEILNQIYEFTEKHNGIEYYICFEFAMGHIQRSNRTFDEIFNIINSKFGSKAYFIHPLSSAEHPEFNKATQDINGNICFKYVS ..................................................MKHLIKNYVQKLIKTELDAIQSKSVHDNRNFIYNGEFLILE...SEFGWHC.FPRVQLN.HALSYKNPNFDLGMRHW.....IVNHCKHDT..TYIDIGANVGTFC.GIAA.R.HI.T.Q.G.KIIAIEPLTEMENSIRMNVQLN..N..P.LVEFHHFGCAIGENEGENIF....EV.....YE...FDNR......VS...SL.YFQK.NTDIA.....DKV...K.N.S.QV.LV..R..KL.SSL..D..I.......S..P..TNS..V..VIKIDAEGAEIEILNQIYEFTEKHNGIEYYICFE....F...AMGHIQRSNRTFDEIFNIINSKFGSKAYFIHPLSSAEHPEFNKA.TQDINGNICFKYVS................................................................... 0 2 2 2 +5404 PF05576 Peptidase_S37 PS-10 peptidase S37 Studholme DJ anon Merops Family These serine proteases have been found in Streptomyces species. 19.50 19.50 19.50 19.50 19.40 19.30 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.67 0.70 -6.03 2 98 2012-10-03 11:45:05 2003-04-07 12:59:11 6 1 87 0 31 179 5 421.20 41 91.66 CHANGED AEPKAVDIKDRLLSIPGMSLIEEKPYTGYRFFVLNYsQPVDHRHPSKGTFQQRITVLHKDVNRPTVFYTGGYNVSTNPSRREPTQIVDGNQVSMEYRYFTPSRPAPADWSKLDIWQAASDQHRIFKALKPLYS+NWISTGGSKGGMTATYYERFYPRDMDGVVAYVAPNDVVNKEDSAYDRFFARVGTDECRDKLNGVQREALVRRAPLEKKYAAYAAENGYTFDTIGSLDRAYEAVVLDYVWGFWQYSTLADCADIPADAKNATDDAIWGSVDAISGFSAYTDQGLETYTPYYYQAGTQLGAPTIHFPHIEKKYIRYGYQPPRNFVPRSIPMKFEPWAMRDVDTWVRHNARHMLFVYGENDPWGAERFRLGHGARDSYVLTAPGMNHGANVAGLVPDQKARATARILDWAGVAPAKVQENPSAARPLATFDARLDQRDVEREPALRP ................................................................sst......Dlpc+L.ulPGhohlc.c...h...........s..uY.R.h...aVlpa.sQPlDH...+....+....P...p.p.GoFpQR.....l...hl....h...H..........+.....s....h....s....R...P...T....V..hh.TpG.....Y.s......s....u.......h.....s...P..p....h...p........E........o.pL.....l.......s.....uN.p.....lslEYRaFs.SpP......p....P....t......D.........W....s.....h.......L...o...lh.....QuAsD......H...+..lh.p.A.h..K....p....l.Y....s...t....+.........W...luTGhSKGG.TuhaYRpFaPcDlDsoVsY.V.AP...s.h.h.s.t.EDu..ta....-.t.F.h.......p.p....VG.T............t......-..............CR...c+l..pshQ..h..EsLhR+ss.......Lls+a.cp..ausppshTFc.hlGsl-cuaEhsVL-YsFuFWQau.s.s.s.Cs.s...IPu..tsAoDcpLasalssISuh.s.hasD...p...uhtsYssaaYQAuspLGh.shch.sa.h.cth..l....c....h....Gh.p.......sR.a.l......Pcpl..s..M.c.....F-.shhpclcpWl+csup+MlFlYGpNDPWoApshphhc.G..t+.spa.VastPGusHs.AplusLsts.p+tpAhutltcW....................................................t......................................................................................................................................... 0 13 22 29 +5405 PF05577 Peptidase_S28 Serine carboxypeptidase S28 Studholme DJ anon Merops Family These serine proteases include several eukaryotic enzymes such as lysosomal Pro-X carboxypeptidase, dipeptidyl-peptidase II, and thymus-specific serine peptidase. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.45 0.70 -5.91 12 1264 2012-10-03 11:45:05 2003-04-07 12:59:11 7 20 253 9 917 1384 70 339.30 23 80.92 CHANGED QpLDpFcsssscoapQRYahNspahpss..uPlFLhl...GGEushssthltss..thhphApcaGAhVh.LEHRFYGpShPhsshost.sl+a...LoSpQALtDlApFIpshs..phphtsss.WIsFGGSYuGsLuAWhRthaPcLlhGulASSuPlhAplDFh...EYhpVVtsSlpphus....p.ChsslppuFsplcpLhtospGppsl..ppthphssshspt...s-.pDht.hatslhu.atulVQYs..hDspss.shs.hslpphCphhh......ss......oss..s....hshhh.l.pphstt...shshtpsshsh.hhss...phpssu.....hts.sRtWhaQTCoEaGaaQosss....stthFusshPsshal.chChslFGsshspp.lptslttTN.h..YGG..p.suoNVlhsNGslDPWHsLG..htssssuollshlIpusuHCsDMhsspsuDsspLpsuRphl ...................................................................................hDp..........s....................p.............tpa.....p...RY.h.h..s..t..p..a.a...p...............s............u.......P.l......hhhh..........usE...s.........s...h...t.........h....h.......t..ps..........hhh.p......hAt.....phtuhhlh...h..EH.................RaY...G....pS...h......P...........h............s..........s............s......h......t.....p..........lpa...................LospQA...............L..sDhs..hh....p...............t........h..........p.........t................p.......h..............................t..............t......s.....s..............hlhhGG...............SYuGh..............Lu...AWh...........Rhp........YPc..l..hh.................uulAo.Sus..l..............t......h.......h.......s..h...........ta...h.p..lt...psh....s...................p..C.ttl.ptsh.....p..l..p.p.h.h........t..p...........t..t................ttl...........tp.ht.....h..s..........h..p................p.........................h..................................t..h..h.......pa.............................................................ht.........h..Ct..h..........................t....................................................................h.......h............h.........................................t.............................................t.......................s....ttW..aQ.sC.s.E..h....s....h..........s..s...ss...................t...h...h...s..p....h..s.....h...p....h..h....p.C..............ph....a.............s........................................................t.....h....s..............a.G.u..............................t......s...s....p...lh....a...ss...............G..hDPW.....p........h.........u..........h..........p........t........................s.................p...............s..........h......h..h......t..s.............s.Hp....Dh....................................................................................................................................................................................................................................... 0 328 536 770 +5406 PF05578 Peptidase_S31 Pestivirus NS3 polyprotein peptidase S31 Studholme DJ anon Merops Family These serine peptidases are involved in processing of the flavivirus polyprotein. 20.50 20.50 22.60 25.00 20.40 18.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.32 0.70 -5.13 4 226 2012-10-02 13:45:52 2003-04-07 12:59:11 7 34 30 0 0 244 0 175.10 90 7.31 CHANGED GPAVCKKITpHEKCHVNIhDKLTAFFGlMPRGTTPRAPVRFPTuLLKVRRGLETGWAYTHQGGISSVDHVTsGKDLLVCDSMGRTRVVCQSNNKhTDETEYGVKTDSGCP-GARCYVLNPEAVNISGoKGAhVHLQKTGGEFTCVTASGTPAFFDLKNLKGWSGLPIFEASSGRVVGRVKVGKNE-SKPTKLMSGIQTVSKNpADLTEMVK ..GPAVCKKITEHEKCHlsIhDKLTAFFGlMPRGTTPRAPVRFPTuLLKlRRGLETGWAYTHQGGISSVDHVTsGKDLLVCDSMGRTRVVCQSNNKhTDEoEYGVKTDSGCP-GARCYVhNPEAVNISGoKGAhVHLQKTGGEFTCVTASGTPAFFDLKNLKGWSGLPIFEASSGRVVGRVKVGKNE-SKPTKlMSGIQTVSKspsDLT-MVK...... 0 0 0 0 +5407 PF05579 Peptidase_S32 Equine arteritis virus serine endopeptidase S32 Studholme DJ anon Merops Family Serine peptidases involved in processing nidovirus polyprotein. 24.70 24.70 24.70 25.10 24.60 24.20 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.86 0.70 -5.44 6 275 2012-10-02 13:45:52 2003-04-07 12:59:11 8 13 15 6 0 344 2 299.70 70 11.60 CHANGED LThlWVpFF.....LlsVshhAGVsulslLlshWhLuRhTslsGlVTPYDlHhlTuoPRGu.....uulAoAP-GTYLAAVRRuALTGRsthFlPoshGSVLEGuhRT+psutNsVsVhGSohGSGGVFTIsGpsVVVTAoHVLu.sspARVossGasppLsFKssGDYAhAcs..ssWpGsAPplchu.pshpGRAYWpssoGlE.GllGpssAFCFTsCGDSGSPVlsEDGpLlGVHTGSNKpGSGhVTTPsGcTluhuslKLSEhupHauGPtVPlusl+LPcplIsDVcuVPSDLsuLl-SlPslEG .................................................................................LTILWLVFF.....LISVNhPSGILAlVL...LVSLWLLGRYTNlAGLVTPYDIHHYTSGPRGV.....AALATAPDGTYLAAVRRAALTGRTMLFTPSQLGSLLEGAFRTpKPSLNTVNVVGSSMGSGGVFTI.DGKlKCVTA.AHVLT.GNSARVSGVGFN...QM.LDFDV..KGDFA.IADC..PNWQGsAPKsQFCpDGWTGRAYWLTSSG........V......EPGVI.........GsGFAF.C.F.TACGDSGSPVITEAGELVGVH.TGSNKQGGGIVTRPSGQFCNVcPIKLSELSEFFAGPKVPLGDVKIGSHIIKDssEVPSDLCALLAAKPELEG........................ 0 0 0 0 +5408 PF05580 Peptidase_S55 SpoIVB peptidase S55 Studholme DJ anon Merops Family The protein SpoIVB plays a key role in signalling in the final sigma-K checkpoint of Bacillus subtilis. 21.70 21.70 21.90 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.18 0.70 -4.89 6 510 2012-10-02 13:45:52 2003-04-07 12:59:11 7 3 487 0 114 438 11 195.60 43 45.24 CHANGED hpc.Ku-upY+IGLWVRDSsAGIGTLTFY-PpoKKYGALGHsIoDsDTp+llslcsGpIlcSolsSIcKGspGsPGEl+GhFss-pcsIGslppNophGIFGshppc......hsNhpspsl.Vuhps-VK.GPAcILTsIDscplcpaDIEIVphs.QcuPssKuMVIKlTDs+LLccTGGIVQGMSGSPIIQNsKllGAVTHVFVNcPssGYGlaIEWMLc- ...............................t...tptpY+lGLalRDssuGlGThTFhcspstpaGALGHsIoD..D.Tt.p.l.lpsGplhposlhuIc+GppGpPGEhhu.h.a....p.p.p.h.lGsIptNo.hGIaGphppt............t...h......pcsh.lAhpppl+pG.s.A.p.I.l.Tslcs.p.clcpF-IEIhplh.pptsusKuMl.I.+lTDtcLLpcTG..GIVQGMSGSPI.lQ.sGKllGAVTH..V..F.V.N.D..PspGYG..la..IEhMLp...................... 0 64 95 101 +5409 PF05581 MCP_N Peptidase_S38; Vibrio chemotaxis protein N terminus Studholme DJ anon Merops Domain This domain is found at the N terminus of several methyl-accepting chemotaxis proteins from Vibrio species. 21.40 21.40 22.10 21.80 21.30 21.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.26 0.72 -3.87 2 277 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 155 2 39 173 3 101.50 42 16.53 CHANGED M+FSpKIVAASSsLLLsslALLShpQh.pVR-EIcShVpDSl.EhVcGVppThtpslsu+KulAQ.sTpllp.sPp..s.s+ollppP.lKsoFLhlGhGhEp ...MKFSHKIVsASShLLLsTlALLShpQhhplRsEIcshVpsSlpEhlcGVpsTlps.lsu+KuLAppsTpllphcPs..shlcsllppPhlKsoFLhlGhGhEp........ 0 8 13 28 +5410 PF05582 Peptidase_U57 YabG peptidase U57 Studholme DJ anon Merops Family YabG is a protease involved in the proteolysis and maturation of SpoIVA and YrbA proteins, conserved with the cortex and/or coat assembly by Bacillus subtilis. 25.00 25.00 26.50 26.50 20.90 20.00 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.91 0.70 -5.44 25 309 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 285 0 73 206 2 268.90 49 98.65 CHANGED hclGDlVsRKSYspDIhF+II-Icp.tpstchslL+GlchRLlADAPhcDL.hlcpschpc..pc+phppchpcsl+plhpctp............hhcpcpphpsssshppptphFphPG+VLHlDGDs-YLchCLclYcpLulsshGhtlpE+-QPcclhsLlcca+PDIlVlTGHDAhh...KsctshpDLssYRsS+YFlcoV+pAR.+apPshDpLVIFAGACQSpaEullcAGANFASSPsRVhIHALDPValsEKIAaTsls-sVslp-llcNTITGtKGlGGlETRGKhRhGhPhp.h .............h+lGDlVsR+SaspDIhF+Il-Ict.........clAILpG.clRLlADAPh-DLhtlcpcchpcpt+p..cpphpcsh+hhppchh..............h.c..pcp.ph...p....s..st..s..hp..pp..paFphPG+VLHlDGDstYLcpCLclYpclGlss.GlpspEpE.scclhcLlcca+PDILVlTGHDuhh...Ks..c..t..shtDLsuYRpS+aFVpuV+psR.+h.PshDpLVIFAGACQSaaEALlcAGANFASSPuRl.IHALDPValstKIuaTsh.-hVslhDVlcNTITGtKGlGGlETRGhhRpGhPhp............................ 0 31 58 64 +5412 PF05584 Sulfolobus_pRN Sulfolobus plasmid regulatory protein Moxon SJ anon Pfam-B_8140 (release 8.0) Family This family consists of several plasmid regulatory proteins from the extreme thermophilic and acidophilic archaea Sulfolobus. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.16 0.72 -4.01 6 48 2012-10-04 14:01:12 2003-04-07 12:59:11 6 5 28 0 10 65 3 69.50 40 58.43 CHANGED hEKLTlophILlpLSt+.CtTLEcLpE+TussKphLLVhLTRhaK+GIIhRKWp+aGG+KaREYCLKhREEll .....................p+Lp.pptI.LlhLu+t...CtTLE-LcctTsls+spLLVhLo+Lt+cGlItRcWt+.h..u.G+KaR.c.YCLK..h................... 1 2 3 10 +5413 PF05585 DUF1758 Peptidase_A16; Putative peptidase (DUF1758) Studholme DJ anon Merops Family This is a family of nematode proteins of unknown function. However, it seems likely that these proteins act as aspartic peptidases. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.87 0.71 -4.69 3 313 2012-10-02 15:32:34 2003-04-07 12:59:11 7 33 35 0 292 363 0 145.90 19 16.24 CHANGED VsVpNApGs..hTuCRLLFDSGSELSYISERCINsLGLARTPSRILVoGISusKA-sTRGsoplsIpSRlSssT.LsVpAHVLuKITSSLERpsIDsSlLsVFNDLphADopF..uolAPIDILLGSDYlWusITGpKI+DstGsLIAISSIFGWVITSltus+usoAT .......................................................h..............htsLhDsGSphoalopp.htpt..LtL....t..t..pp....h...h.t.hh..u...t...p....p..s..p.t.tt.htph...p...l.p....h.p..h.p...s...t.t........hp....l...pshs..ls..p.....l.....s.s.pl........hsl....s..t..p.......h..p..th..t..slpl.s.-....th..ppt..tp..hDlLlGs...Dhh.hpll.ttp.h..hpt...tst.hhh.pohhGal.ls.............ssh........................................... 1 167 170 284 +5414 PF05586 Ant_C Anthrax receptor C-terminus region Yeats C anon Yeats C Family This region is found in the putatively cytoplasmic C-terminus of the anthrax receptor. 25.00 25.00 30.00 30.00 21.90 19.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.36 0.72 -3.53 10 141 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 37 0 79 109 0 91.80 65 19.15 CHANGED VRWG-KGSTEEGARLEKAKNAVVpMP-EEhE.P...cPsP+ssPs+pPs.pcKWYTPIKGRLDALWALLRRQYDRVSlMRPpsGD+G..RCINFoRVps ..................VRWG-KGSTEEGA+LEKAKNAVVKhP-pEhE.P..p..sps.p.s.p+pPs.ppKWYTPIK.G+LDALWALLRRtYDRVSLMRPQsGDcG..R.CINFsR.............. 0 2 10 38 +5415 PF05587 Anth_Ig Anthrax receptor extracellular domain Yeats C anon Yeats C Domain This region is found in the putatively extracellular N-terminal half of the anthrax receptor. It is probably part of the Ig superfamily and most closely related to Pfam:PF01833 (personal obs: C Yeats). 25.00 25.00 25.40 36.80 22.90 20.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.33 0.72 -4.18 12 190 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 39 22 100 145 0 102.50 51 22.00 CHANGED lp+SChEILulcPSSVCss.........EsFpVVl+GsGFppu+p.ppVlCoFphNpohohsE+Ps.slcssalLCPAPhLpcsGpshplpVShNsGhSFISoulpITuopCosG ........................c+SChEILusEPSSlCsG.........EsFQVVl+GNGFppu+ss-pVLCoFplN-ohT.l..s.cKPh.sVccsalLCPA.P..lLccsGpp..hslpVShNsGhSFISSSlhITsTcCosG..................... 0 7 16 45 +5416 PF05588 Botulinum_HA-17 botulinum_HA-17; Clostridium botulinum HA-17 protein Moxon SJ anon Pfam-B_8286 (release 8.0) Family This family consists of several Clostridium botulinum hemagglutinin (HA) subcomponents. Clostridium botulinum type D strain 4947 produces two different sizes of progenitor toxins (M and L) as intact forms without proteolytic processing. The M toxin is composed of neurotoxin (NT) and nontoxic-nonhemagglutinin (NTNHA), whereas the L toxin is composed of the M toxin and hemagglutinin (HA) subcomponents (HA-70, HA-17, and HA-33) [1]. 20.80 20.80 21.10 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.97 0.71 -4.31 3 33 2012-10-02 19:42:32 2003-04-07 12:59:11 6 2 16 1 2 40 0 128.20 72 92.70 CHANGED MSAERTFLPNGNYKIKSIFSsSLYLTPlSGuLTFSNTSStNNQKWKLEYluEsNuFKISNVAEPNKYLoYNsaGFIsL....DSoSNcsYWhPIKIAlNTYIIsoLscVN.hDYAWDIYDsNsNIoDQPLLLLPNFDIpNSNQMFKLEKI ..............................MSsERTFLPNGNYpI.KSlFSs.S..L..Y..L..s.sSGuLoF.SNpSShsNQKWplEYhupspsF+hS..NVAEPNKYLuYss...aG...F.I.L.....sS.SN..pshW.PIK.IAlNoYIhhoLshVN..hDYAWsIYDsNpNIhsQPlL.LPNFDI.NSNQhhKLEKl...................................... 1 1 2 2 +5417 PF05589 DUF768 Protein of unknown function (DUF768) Moxon SJ anon Pfam-B_8463 (release 8.0) Family This family consists of several uncharacterised hypothetical proteins from Rhizobium loti. 21.00 21.00 21.40 21.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.98 0.72 -4.04 7 32 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 0 19 32 1 62.80 33 72.41 CHANGED MSTRGIsFLccWIucplP-TstuD.llSls-LTpKLhADAKAhGItpp-I-E-suSlaphIlcAl ...MSp+u.pFLcpWIu-pl.sss.spsD..h....l....uhs-.LscchhADActtGIspp-.I.pE-s..GslhchIhpu................ 0 0 13 13 +5418 PF05590 DUF769 Xylella fastidiosa protein of unknown function (DUF769) Moxon SJ anon Pfam-B_8396 (release 8.0) Family This family consists of several uncharacterised hypothetical proteins of unknown function from Xylella fastidiosa, the organism that causes Pierce's disease in plants. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild --amino -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.86 0.70 -5.18 6 104 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 12 0 12 77 0 212.10 35 45.08 CHANGED PPIDSRTGKP.MMAGPWENRDLSLEYFQLDFLGQYTVKHAFINGINIDRCYPGAPPDHVQVVMMoh+sGScoPGIlsphpG+.PPpPhspoAAFIGhcKSNTosch.PcG.oVLRp.Ds.....VsthhKFssLCoAcFsGGN-..IpFGIR........SAsSpSIpuhlpstssLspAs.+......-hhusR.................sFlc.sPcTETRWGNsWTWa.+AalPoPl...GDGlEhWM.TPIGsoGYYlsVphNFhEutRQKNTEsYQRA.........RcLMDGlLQSVVIQKp .........................................................................................................................................................................p............G.pslpp.............phs.hspA.hsGGsp..l.aGhc........suhu.ultttlpsh.thhpts........thhspp.......................................hlc.ssppEpRassshohh.+t..ss.sl...G.sGlE...hhh.TPlGs.uG..hosshphh-utt.pssc.hphA.........ppLhsGl..p.lhpK.................... 0 0 11 12 +5419 PF05591 DUF770 Protein of unknown function (DUF770) Moxon SJ anon Pfam-B_8473 (release 8.0) Family This family consists of several proteins of unknown function from various bacterial species. 20.70 20.70 21.00 21.00 19.70 20.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.99 0.71 -4.67 91 1558 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 971 0 276 902 48 153.90 37 85.07 CHANGED hh..scs.tsRVpIpY-lcssuupcclELPhshhVlGDhoG....tpspsslc-R+hlslD+-NFcsVMcphsPplshpVsNpL..ss....s....splsVcLsFcShcDFsP-slAcQVstL+cLLEhRptLssLpu.hsspsshcchlpclLpssshhcpLtsEhp.ht ..................h.hucs.tsRlpIpY-lcssGupcclEL..Phshh..VlGDhuu.........tp-p.......sslp-...R.....chlsID+.cNFssVMpphsPplsas.V....sN.s.....Lt....ss.....s..............splsVsLsFcShcDFsP-slA+p..Vs.pLppLLEhRptLssLps.hsspsshcph.....lpplLpssphhppLhpcht.hh............................... 0 50 118 197 +5420 PF05592 Bac_rhamnosid bac_rhamnosid; Bacterial alpha-L-rhamnosidase Moxon SJ anon Pfam-B_8527 (release 8.0) Family This family consists of bacterial rhamnosidase A and B enzymes. L-Rhamnose is abundant in biomass as a common constituent of glycolipids and glycosides, such as plant pigments, pectic polysaccharides, gums or biosurfactants. Some rhamnosides are important bioactive compounds. For example, terpenyl glycosides, the glycosidic precursor of aromatic terpenoids, act as important flavouring substances in grapes. Other rhamnosides act as cytotoxic rhamnosylated terpenoids, as signal substances in plants or play a role in the antigenicity of pathogenic bacteria [1]. 35.60 35.60 35.70 36.10 35.30 35.40 hmmbuild -o /dev/null HMM SEED 509 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.52 0.70 -6.29 25 1344 2012-10-03 02:33:51 2003-04-07 12:59:11 6 54 458 3 498 1369 219 462.90 20 56.56 CHANGED lpcsssuphllDhGQNhsGalRlc.VcuspGsplpL+auEhLc............cGslt..TpsL.........RsAcsoDpYIh...........pGc.tt-........papPpFTa+GFRYVclsGh..sp.....hsss..s..........lsuhVlaSD...hccsGsFcsScshlNpLacNshWohRu.NFhulPTDCPQRDERlGWTGDspshu.sAsahaDspshhs+WLpDltssQps.s.................G....shPslsPssh.......ssssssssWusuhlllPWslYppYGDppllccpYsuMctal-alppcsss......hh.thssaphGDWL............sssspTstsll.AoAaascssphhuchAphlGpsp.....D.AccYpshuccl+pAFpscal.sssG..............plsssT.....QTAhsLsLhasLV..........Pcstp.ttssppLschlcpsst+lsTGFl..GTshLhpsLsc..sGcp-lAYclLhpcshPSWhY.lsp.GATTlWEpW-uhh.......tsstMsSaNHYu.hGulspWhapsluGlp.............sstPGa+clhlpPtPs.usLs.A........cusacosYG.cIcscWchc..sGp.hpLplplPsNosApVhL ................................................................................................................h......t.hlh.DhGpphsGhhph...p.....h.........p.....s..........t...u.......t......t......lp.hh..h....uE.....ht.......................................................t..u.p..h...........t.h........................................................h...h.h.......................t.Gt.....t...........................h....st....h..s..h...h.u....FR..Yl..p..lpsh...tt...................t...p..................lpu..h.hhhs.s............ht.....t..u..p..F.p..s..u.s..........hl.N........plap.shhs.......hp...........s.shh.....s..h.....sDC..PpR.ER..hsWhGDhth.s...s.sh.h....h..s..s..t..t..h...hpchlps.h.ts...tp....p.........s...............................G..........hh...s...t....h....h...Pt................................t.s....ssa.....s...........h...h......lhhsap...h.Yhhh.G..Dp....p...hl......pp....h.as.s...h..p...p....hl.....ca.hhp.ptss....................................h.ssh....t.h.u...DWh.................................sht.tt..s..s.t..th.....h....s..ss.h.h.h.hshph..hsph....A..p....hl.G..c.pt...................................c..sppapp...ht.p...p.l.pp...s...h...pp..p.....a..h.s..ppu.............................................tht.s......s.o............psu.......sh....ul.hh.sl.h..........................s.p.p....tt.......t.....t..h..hp......tL............h.p.h..........h........t.......t.........p............s..........h.....t......h.........s......s..G......h.h..............u.......s...al....h......p....s....L.........sp..............tGt......t....ch..s..........hph...h.......h........p....p........hss.....Wh..h.l...p......p......G....A...T...ThW.Et..a..ss..............................s.h.sShsHhh.hu..ussta.hhphlhGlp...............stpsGa.c.phhlpPt.............sslp.s...................puph.....o....s..h....G..t....lp.sp.Wph..........p.......s.........st.....h.php..lplP...s..spu.l.............................................................................................. 0 220 361 461 +5421 PF05593 RHS_repeat RHS Repeat Yeats C anon Yeats C Repeat RHS proteins contain extended repeat regions. These repeats often appear to be involved in ligand binding (e.g. [1]). Note that this model may not find all the repeats in a protein and that it covers two RHS repeats. 20.80 20.00 20.80 20.00 20.70 19.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.35 0.72 -3.69 203 21340 2009-01-15 18:05:59 2003-04-07 12:59:11 9 591 1319 0 3320 21290 603 38.60 28 15.02 CHANGED YD.spGpLhs......hs.t.......ssGpss..p.ap.YD.stGp...ls....ph.....ssssGp..s ..............................................YD.ttGpLsp..........................hT..s..........................ssGpps.........p.Yp.YD..stGp...........ls............ph.........psstG............................................... 0 1007 1995 2696 +5422 PF05594 Fil_haemagg haemagg_repeats; Haemagluttinin repeat Yeats C anon Yeats C Repeat This highly divergent repeat occurs in number of proteins implicated in cell aggregation [1]. The Pfam alignment probably contains three such repeats (personal obs: C Yeats). These are likely to have a beta-helical structure. 21.50 2.30 21.50 3.30 21.40 -999999.99 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.60 0.72 -3.64 157 6916 2012-10-02 14:50:22 2003-04-07 12:59:11 9 294 419 0 1577 9015 69 75.00 16 36.58 CHANGED sssGphplpss.............................................................ht.spsutlssssslslp..usshhs.ssush.stss......................................hplsusGsLsspu .................................................................................................................................................................................................................................h..s.spsG.....t..l.t.u.s...s.s..l.sls......u.s.sl.s...s..s....G...t..l.....u..s..s...s..h..t..h..p..s..tt...ttt....................................................htlts.t.sth...................................................................................................................................... 0 229 664 1231 +5423 PF05595 DUF771 Domain of unknown function (DUF771) Finn RD anon Pfam-B_7023 (release 8.0) Family Family of uncharacterised ORFs found in Bacteriophage and Lactococcus lactis. 22.90 22.90 23.40 23.40 22.80 22.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -10.12 0.72 -4.07 18 367 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 341 0 27 185 0 87.50 33 84.35 CHANGED -stlllscpphp-lpptshcs.h....hsls-lp++h.s.+upshlpcpllpsP+FcKplchp....GhlhaPp.spus+.ahhps+chpcal-cpFpEIh .........................................shllls+pEYpEL.hthshcshW...........hshs-Lc++l..p..tS.cphlpcplLhcP+acKclchp.....GhVhYPs..psts+...WpFpA++hpcFl--aFsEI....................... 0 8 18 22 +5424 PF05596 Taeniidae_ag Taeniidae antigen Moxon SJ anon Pfam-B_8569 (release 8.0) Family This family consists of several antigen proteins from Taenia and Echinococcus (tapeworm) species. 20.80 20.80 21.70 25.90 19.60 18.60 hmmbuild --amino -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.99 0.72 -4.16 11 312 2009-09-11 08:34:58 2003-04-07 12:59:11 6 1 13 0 0 302 0 60.60 39 80.77 CHANGED --sc.sccsspslhKtluEl+c.FFpcDPLGp+lspLsK-hpphspthRpKlRpuLtEYl+sLhsE ..................csp..ppstpslhKtluEl+c.FFcpDPLGp+lspLs+-lstlspcl+tKlRtsLpcYl+sLhpE.... 1 0 0 0 +5425 PF05597 Phasin Poly(hydroxyalcanoate) granule associated protein (phasin) Moxon SJ anon Pfam-B_8339 (release 8.0) Family Polyhydroxyalkanoates (PHAs) are storage polyesters synthesised by various bacteria as intracellular carbon and energy reserve material. PHAs are accumulated as water-insoluble inclusions within the cells. This family consists of the phasins PhaF and PhaI which act as a transcriptional regulator of PHA biosynthesis genes. PhaF has been proposed to repress expression of the phaC1 gene and the phaIF operon [1]. 41.00 41.00 41.00 41.00 40.90 40.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.54 0.71 -4.39 31 264 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 196 0 80 242 32 129.00 33 71.12 CHANGED cKpsps.......pphsspl+cpAcQIWLAGLGAauKsppEGu+lF-sLV+-Gcplc++s+phs-cplsssp.......ppht....phssslps+AsspWD....+LEphF-cRVspALsRLGlPopc-lcALpp+lDpLstplccl.sppp ...........................hpppspphhtclcchu+pIWLAGLGAa...........u+.......s....ppE.......Gu...........+.......h.......F-sLVKcGEphEccspptssc..plspsp.......pphp.......sp.hsps+spstsphs....+lEpsFDcRVspuLsRLGlPo...+p-lcsLpp+l-pLptpl-plstt.t..................... 0 19 45 65 +5426 PF05598 DUF772 Transposase domain (DUF772) Moxon SJ, Bateman A anon Pfam-B_8195 (release 8.0) Domain This presumed domain is found at the N-terminus of many proteins found in transposons. 22.60 22.60 22.60 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.29 0.72 -4.08 160 5949 2009-01-15 18:05:59 2003-04-07 12:59:11 6 34 1706 0 1322 5334 675 71.70 25 21.55 CHANGED GRP.shssphhl+llllphhh.sl.Sscpltctlpcshtaphh.sG......p.........tsPDtoTls..cFR.pthtppt.......hhcplhpplh ....................................GRs.uhs.t.hhl+ll....llth.hh.ul.Sscplpctlpp.slt..h..phF.st....ht..p..........thPc.hoT.ls..caR..pthtppt..........hhpplh.................................................................................. 0 433 891 1052 +5427 PF05599 Deltaretro_Tax Deltaretrovirus Tax protein Nicot S, Moxon SJ, Studholme DJ anon Pfam-B_8606 (release 8.0) Family This family consists of Rex/Tax proteins from human and simian T-cell leukaemia viruses. The exact function of these proteins is unknown. Tax is the viral transactivator; is it a nuclear phosphoprotein that interacts with CREB, coactivator CBP/p300 and PCAF to form a multiprotein complex, which activates viral LTR and stimulates virus expression. Tax is also involved in deregulated expression of numerous cellular genes leading to T-cell leukaemia. Rex is a nucleolar post transcriptional regulator that facilitates export to the cytoplasm of viral RNA not or incompletely spliced [personal communication, Dr. S Nicot]. 25.00 25.00 26.40 26.30 20.00 19.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -9.95 0.72 -3.68 2 33 2009-12-01 13:32:35 2003-04-07 12:59:11 6 2 8 0 0 27 0 57.10 74 73.91 CHANGED M.hhSPLsRhhTEsshhIPSLRVWRLCopRLVs+hhhshFtPPssupPotHLstAssHLGPHRWTRhRLSSslPYPSsPLLPHPENL ......................hhIPpLRVWRLCTtRLVPHLWGTMFuPPTSSRPTGHLSRASDHLGPHRWTphRLuSTl............................ 0 0 0 0 +5428 PF05600 DUF773 Protein of unknown function (DUF773) Moxon SJ anon Pfam-B_8595 (release 8.0) Family This family contains several eukaryotic sequences which are thought to be CDK5 activator-binding proteins, however, the function of this family is unknown. 20.40 20.40 20.40 20.70 20.30 20.30 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.67 0.70 -5.72 14 218 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 140 0 135 217 5 387.50 32 91.65 CHANGED Mp-h.sLPIDIpouKLLDWLVDRRHCshcWQppVhsIRcKIssAlpDMP.Ep-cIhpLLuGuaIsYFHChcIl-ILKsTEusTKNlFGRYSSQRMKDWQEIlphYEKDNlYLAEsAplLlRNVsYEIPuLRKQlsKspQhtpEhpR+ct-hppsAAph+ccYhpsCKphGlpGpNl+pELlsll.pDLPuhhpclutss.us.LpcAl-hYpAFssasp.......p.sstplLPlLpaltc+G.NoTlYEa+pupsPhsVEcP..chp.........phs.-......sss--p....IDW..GD......t.s....ss.u..phtstpIDaGhsh-ss.s..........-........thsGIDWG..............D..upssshpIsl.-sGos.......t...uVA+GpDAhoLLEsspsRspFlDELhELcuFLsQRLsEhp..--ssllshothphuPsllttpTscplpsMLu-VcsllspLoshphQHLFhIpuSPRYV-Rls-pLcQKh+ptchLttptpths-KppEAtpppscLcP+l-hlhppTR-LQK.IEuDISKRYssRPVNLMG ..........................................lsI.DItht+L.-WLlsR+hsshpWpt.lhtl+t+IptAht..s.......hP..pt.pt.........h.......hp.hL.......p..s............s.I....pYhc....s.h.cIl.-lLpto.-.t.s..o+.slFG.pau.up.hhc..-W....ppIl.thYEK-shaLsEhuphlhcsVsYEIPul+KQltKhp...Q..p-hp++pt-hptsttphtppahtt..Ccph..Gl.p.................G........p..s......l.+tEL.hths.ppLPs.h...t.clhtth...tp.............ltpulchYpshst.hh.....................t...ttt...h..Ls.hL..phl.pps....s.shh..p.hpt.tt.t..s....hpps..t.h..........................t...............t.tpst.......IDa..Gs...........s......t.s...t......s..tlDhu......t.t..............................t.stI.sas..............................................t...ph.sh.pts.................t...th.u.p...u...p....th...plL-.sphRsphls-LhELc.FL.......t..Rh.Eh....p...pp...........s...................s.lthp..t.t..s..l...s..........pph.thls.lpthls.lss.phppLh.lhtS.+al-plsp.Lppphpt.thhh.tht.h.t+t.ch..pp.tt.tsphphhhtpo+pLpp...........hEtplSph.aps.R.VplhG..................................................................................................................................................................................... 1 55 77 110 +5430 PF05602 CLPTM1 Cleft lip and palate transmembrane protein 1 (CLPTM1) Moxon SJ anon Pfam-B_8636 (release 8.0) Family This family consists of several eukaryotic cleft lip and palate transmembrane protein 1 sequences. Cleft lip with or without cleft palate is a common birth defect that is genetically complex. The nonsyndromic forms have been studied genetically using linkage and candidate-gene association studies with only partial success in defining the loci responsible for orofacial clefting. CLPTM1 encodes a transmembrane protein and has strong homology to two Caenorhabditis elegans genes, suggesting that CLPTM1 may belong to a new gene family [1]. This family also contains the human cisplatin resistance related protein CRR9p which is associated with CDDP-induced apoptosis [2]. 19.70 19.70 20.30 19.70 19.10 19.60 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.54 0.70 -5.77 26 492 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 253 0 331 471 12 368.50 30 71.07 CHANGED tusltuhlpslhhhhshhhhhs.Fhs......sssssssssts....t.............................htshatpsp.lsltlYsSssshhsshts.s...Llhpccshshush.sp.........shslslPcp...l.ppNGoLahHhhls.psGh.h.st....satstphh.ahsts...LspYh.p+.hp+p+NLLuspc-p...p-pcpcts.............ssphhuaa+PNlTlsllsDpsshshsslPPsltpahplpssssp........YhPllalNpFW.L+cchh.lNpTs..ppLsLplshpsluhaKaphhsph-puhppttph.........Gtsts-..h.-plKchLLDT...N.YLLulThlVolLHhlFEaLAFKNDIpFW+p+.KshhGlSlRollhNsFhQhlIFLYLhDN..cTSaMILsspGhGlllEhWKlsKslclclph...........uhlPh........ltFpD+.t.hopsEp+Tc-YDplAh+YLhhlhhPLlsuYAlYSLlY ...................................................................................................................t.......h..hhhh.h.h...hht.hh.................................................................................................shh.t.t.............h..pl...l.aho.......p.t......tt..........................hhhp...ps.h.hssh..pt.....................thphp...hspp..................h..ppN.G..ol.ah...+haht...hsG................apt..thh....hh.t..........lsp....Yh.....p...tcphsLLsspppt............p...t...ppt...............................sp.hoaa+PplolsllsDths..hshs..plPss..ltp.ahp.h.th.stp......................................YhPllahspahp....htcchh...........l......Npos........ppLs.Lplshps.................luhh+aphh.h..p...hpp.sh.........tp.h.................................................................G...s.....tp-......h.DplKthhl-T...N.YLLulThhV.......ohlH...lF-h.LAFKN............DIpFWpp+.cs.h..GlSs+olhhpsa.phllhLYlhDp..pTshhlhhs.slGhhIEh.......WKlpKshplplp.............................uhhPh.....................hth.pt................o..........EppT.............cpYDp............hAh+YL.shlhhPLhhuhAlYSLlY.............................................. 2 122 178 264 +5431 PF05603 DUF775 Protein of unknown function (DUF775) Moxon SJ anon Pfam-B_8676 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 20.70 20.70 20.90 24.20 20.50 19.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.46 0.71 -4.80 35 312 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 278 0 220 310 3 192.20 30 94.21 CHANGED MFGslsuG+sspssspplss......spalhslsss...........pshsalslFl..................Lssssh.PsshuAuVYaph.P......................tsssaphLGslsssKPSAIFKlst................................hsssps..............psts.lplGISlEPhspltpphsthppspss...................................sstplAp+IlpshaNaLuSFsss..................spshVPhpshccWacKFpp+lppD..PsFL .....................................................hFGslhsG+.s....o...shptlsp.........spalhs...lssh...........pshsHlsVFl..................L.s..s..s..sh..Psshuuu.VYhph..P..............................ssssa..phLG..hlsspKPSAIFKlst....................................ttsts............................tsssssplGISlEshpslttphsshpst.st...........................................shpphAp+llpNhaNahuSFsss..................sspthVPhpshppWap.pFppRlpps..PsFh............................... 1 73 120 179 +5432 PF05604 DUF776 Protein of unknown function (DUF776) Moxon SJ anon Pfam-B_8747 (release 8.0) Family This family consists of several highly related mouse and human proteins of unknown function. 25.00 25.00 30.00 28.90 22.60 23.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.31 0.71 -4.06 4 60 2009-09-14 12:10:08 2003-04-07 12:59:11 6 2 38 0 33 51 0 171.40 52 62.78 CHANGED MEutuKDGEEEoLQoAFKKLRVDAEushsshps.cu.usRsusRouh-..ssKPK.hsS.K-oWHusoRKoSRGsVRTQRRRRSKSPlLHPPKFTaCSstA.PSs.....upLKH+..o.sEPsDstuscGpus....pusssSTALssssacsahhpPhtpss.cssppossc-tAspst........sutsu.sAsc.S ...............McSEAKDGEEESLQTAFKKLRVDAuGSlASLSVG.EGsulRAsVRoAsD..-sKPKssCuSKDSWHGSoRKoSRGAVRTQRRRRSKSPVLHPPKFhHCSThA.sS.s.....uQL.KHK......opsDssD.uuouhGhss.......sus...psS.suLss.sapthsh-Phtsolschsspsppcpt.u.sss.........sSpsoLtssp........................................................ 0 1 3 12 +5433 PF05605 zf-Di19 Di19; Drought induced 19 protein (Di19), zinc-binding Moxon SJ anon Pfam-B_8581 (release 8.0) Domain This family consists of several drought induced 19 (Di19) like proteins. Di19 has been found to be strongly expressed in both the roots and leaves of Arabidopsis thaliana during progressive drought [1]. This domain is a zinc-binding domain. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.33 0.72 -3.84 56 760 2012-10-03 11:22:52 2003-04-07 12:59:11 7 63 135 0 411 688 2 57.80 32 13.39 CHANGED spFsCPaCt.c.c.aDlssLCsHl--EHsh-u+.s.slCPlC.....u.......s.......+.......Vu..p..Dhlu......Hl...ThpHu ......tasCPY..Csc.t.hc.h.s.u.LtcHlppcHp.......p......s.sp.....s...s.......s...CP..lC........u.............t.............p.......hs..s..shhp......HlshcH.......................... 0 82 147 264 +5434 PF05606 DUF777 Borrelia burgdorferi protein of unknown function (DUF777) Moxon SJ anon Pfam-B_8755 (release 8.0) Family This family consists of several hypothetical proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 31.70 31.60 24.90 24.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.03 0.71 -5.10 3 129 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 27 0 9 109 0 173.30 65 98.02 CHANGED MocsYcIYRMNQRLYGHALAQEDlKNWIYSNIFIsRIGTVKEFKQQTQEAIVTIPEFEDLEIHTKNISNINLELSKGDsVLLLQSSINIFDKNDDIHFDKHHFYILSAISPKTLNLIsDTVKI+ANNpIEIANQoTSLKcILDsIISAINGIcIpGsusIDtoSL+......usTopINScINSLFK .........ppDYcIYRMNQRLYGpALuQEDlKNWIYSNIFIh+IGTVKEFKpQTQEAIVTIPEFEDLEIHTKNISNISLELSKGDsVLLLQSSlNIFDKNsDIHFDKHHFYILSAISPKTLNLISDTVKI+AN...........N.pIEIANQoTSLKpI..........LcsIVSAIsGIcl.....hG....ss....s...I-.sSL+......IATopINSsINSLFK............... 0 5 5 5 +5436 PF05608 DUF778 Protein of unknown function (DUF778) Moxon SJ anon Pfam-B_8777 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 22.70 22.70 24.10 23.70 22.20 21.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.79 0.71 -4.23 28 251 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 151 0 153 239 6 122.70 44 60.30 CHANGED lPllSWLhPaIGHhGICsSsGl.IhDFuGsYaV................................................................u...............hDshs..FGsP.................................................s+YapL...................................................................................ctpph..s.....................sspsaDsAlppusccF.pp+saNlhs.......sNCHSaVAtsLNth........................................................pa.......psptsWshlplshhlhlpG+a .....................................................................................................................................lPlloWhhPhIGHhG.ICsSsGV.IhDFAGsYaV................................................................S...............p.DshA.FGpP.................................................s+Yh.pL........................................................................................................................................................................c.p....ps...............................sspsWDsAlppuscca.pp+.......haNLhs............sNCHSaVAhsLNhh........................................................pY.......tssssWNhlpLshhhhltG+a.............................................................................................................................................................................................................................................................................................................................................. 0 65 91 127 +5437 PF05609 LAP1C Lamina-associated polypeptide 1C (LAP1C) Moxon SJ anon Pfam-B_8782 (release 8.0) Family This family contains rat LAP1C proteins and several uncharacterised highly related sequences from both mice and humans. LAP1s (lamina-associated polypeptide 1s) are type 2 integral membrane proteins with a single membrane-spanning region of the inner nuclear membrane [1]. LAP1s bind to both A- and B-type lamins and have a putative role in the membrane attachment and assembly of the nuclear lamina [2]. 19.70 19.70 21.60 20.20 19.40 19.10 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.51 0.70 -5.82 3 144 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 46 0 66 138 0 318.60 39 82.94 CHANGED MKTRRoTRL.pQHSQQsslQsSPApouRGLRDApuLS+D+pEDEsSSQP-oSQTlSKKTVRSP-EAsVSEDPls+LpRPPLR.PR.-ATpVQpKssF.EEGET.EDDp-SSaSsVTpl+lRSRDScESuDKsuRAuuHas-ShWuLP+SpuDFTAH-pQPSlLoTGsQKsPQEWssQAuRhRTRMtpcsILKSEhGNQSPSTS+pQsssQPss-Sh..VK+KtWW.......LLlLVAALASGlaWFFSTPs...VETTAVQEFQNQMsQLcsKYQGQDEKLWKRGpTFLEKHLNSSpPRoQPAILLLTAARDAcEsLKCLSEQIADAYSSFRSVRAIRIDGAGKAsQDSDTVKLEVDQELSsGF+NGQNAAVVHRFESFPAGSTLIFYKYCDHENAAFKDVALVLTVLLEEETLEASLGLKElEEKVRDFLKVKFTNSDTPNSYNHMDPDKLNGLWSRISHLVLPVQPENALKcGuCL ....................................................................................................................................................................................................................................................................................................................................t.........................................................................................t..................................................................................................................ts.........h.............h.h.hh..h..l.h..t....a..h........s............spt.s.tlpt..............F........sphppLpppa.uQst.LWpRu.hhLp+H.LpsspP.spPAhllhTAup..cuccs.L+CLupplAs.......AYo.o.........pp.sp..s...l.p.IDGss.+shtDSDhVKh.lD.cLosGFcsGppAAVV.H+FEpLPsGSTLIFYKYCDHENAAFKDVALlLTVLL.E.E.c.s.Ltsslu.pEhEE+VRDhLhsKFosSssssSascMDsDKLsGLWSRISHLVLPVts.ptlctt.C................................................................ 0 16 20 30 +5438 PF05610 DUF779 Protein of unknown function (DUF779) Moxon SJ anon Pfam-B_8830 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 25.00 25.00 31.00 30.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.14 0.72 -3.97 40 460 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 440 0 184 402 460 96.70 60 72.22 CHANGED pLIccLpucHG.sLMFHQSGGCCDGSuPMCYPpGEFhlGspDVLLGcI.................sGs.....PFYlSpsQFEYW+HTpLlIDVVsGRG.uhFSLEuPcGhRFLoRSRl ...LlcpLpscHG.PlMFHQSGGCCDGSu.PMCYPpG-FhVGcsDVhLGpl...................................sGsPhalStsQ..a-hW...KHTpLlIDV...VP...GRG...G...hFSLEsPcGlRFLoRSRl.......... 0 51 127 159 +5439 PF05611 DUF780 Caenorhabditis elegans protein of unknown function (DUF780) Moxon SJ anon Pfam-B_8886 (release 8.0) Family This family consists of several short C. elegans proteins of unknown function. 25.00 25.00 29.90 29.10 21.80 17.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.61 0.72 -3.26 3 44 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 6 0 44 25 0 68.00 71 70.29 CHANGED MADKSAYMGAGGYGSGYMGSNASSSGYAREDYAQGGNGGSsNQQQQGSGGNTNPGGQVFKARTDQSCYLGP .....MuDKSAYMSAGGYSSGYMGSNASSSGYAREDYAoG.G.S.GG..u...s..sss.sp..GSG...Gs.sN.sGuQVFKARTDQSCYLGP............................ 0 10 27 44 +5440 PF05612 DUF781 Mouse protein of unknown function (DUF781) Moxon SJ anon Pfam-B_8891 (release 8.0) Family This family consists of uncharacterised mouse proteins of unknown function. 25.00 25.00 41.60 40.30 19.70 23.50 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.18 0.70 -5.76 6 79 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 41 0 56 78 3 311.80 39 96.99 CHANGED MuslhohshlLlhsh.ussAhsop..ps.P..WccuPtpLSch.hpssshhlNPWVap-RMshY+ILlNuTshYhupaGs-NppNsLWGLPLQhuWQh+SGRLsDPT..t.osCG.EhsDshCISssSWWuslNYYLSlIPFLAAVcpGllususcQVpI.sPscssp+FCosYSDCssthP-sMs+WcpFFQhLhplpt..pusaucpDplLpahWsAHtpSLspuopuFsD+hsaYScsElpFppuWsphVDalAAspFsTohspohpFhpsLPPRlLpssD.psP.IsDFTspQNpsLlhlthlNcl..Npllhs.hlpLh+phhpochsR-hhpt.l..hh..P....oohlcllpchhss....ss .........................................hlhh.h..s...ths..sts.......-hhPPLWcpsPuphuDa.lcs....sthlIsPWsY.-..Rhu.hYKIL.lspTspYFspFuspNppNlLWGLPLQaGWQacT...GRLADPo..ptTsCGh-sGDphCISlcS.WWAs.hNYaLSslPFLAAV-oGlhGhuscpVplLP.Psc...sppcFCaslosCpouaPcsMs+WssFaQ........hl.........................p.s........t..s.shD.sL....L+ahWsAHsuoLp.....suhphFp-.R..hpa...YScsEssFscsWshsV-alAAspFsTsl.pohc.FQpu.LPsRhLhssD.hsPhIsDFoshQNtVLhhLphlppl...sp..hhG....h...............................................h............................ 0 9 14 23 +5441 PF05613 Herpes_U15 Human herpesvirus U15 protein Moxon SJ anon Pfam-B_8900 (release 8.0) Family \N 25.00 25.00 210.50 210.50 19.00 17.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.55 0.72 -3.71 2 6 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 8 0 109.30 77 80.20 CHANGED M-sW+RQRLQEhRELCPL.lLMoLSNhhSKlEhlYlKYLFpMDFsThYRaIhsuLTLohTVTKSlVIEhLFIlKRWp-IcQhFpLsl+KspDCaIVAQFsHIPlKRhlhh MDVWKRQRLQECRELCPLPlLMSLSNIlSKlEIIYVKYLFKMDFsTMYRFILPALTLSMTVTKSVVIEMLFILKRWEEIcQFFRLNIRKVNDCalVAQFTHIPVKRhllh. 0 0 0 0 +5442 PF05614 DUF782 Circovirus protein of unknown function (DUF782) Moxon SJ anon Pfam-B_8909 (release 8.0) Family This family consists of porcine and bovine circovirus proteins of unknown function. 25.00 25.00 231.10 231.00 19.10 16.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.55 0.72 -3.86 2 77 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 75 0 104.00 95 100.00 CHANGED MVTIPPLVSRWFPVCGFRVCKISSPFAFsTPRWPHNDVYItLPITLLHFPAHFQKFSQPAEISDKRYRVLLCNGHQTPALpQGTHSuRQVTPLSLRSRSSTFpp MVTIPPLVSRWFPVCGFRVCKISSPFAFTTPRWPHNDVYIuLPITLLHFPAHFQKFSQPAEISDKRYRVLLCNGHQTPALQQGTHSSRQVTPLSLRSRSSTFpQ. 0 0 0 0 +5443 PF05615 THOC7 DUF783; Tho complex subunit 7 Moxon SJ anon Pfam-B_8919 (release 8.0) Family The Tho complex is involved in transcription elongation and mRNA export from the nucleus. 27.70 27.70 27.80 28.80 27.10 27.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.63 0.71 -3.91 24 286 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 245 0 202 276 0 141.00 28 49.99 CHANGED -plh+.pRLlh.......s--+.hpplh+phhphtp.sst...................................cssptttpplhtphsth-hshh+hphhhp.sscpEpcpYpp.pcclcpslp....sh+pp.......htpL+ppLtcA+chhcp+p....caDpLuctIssps...sRpEppc...pLp ..................................-plh+.pRLLh.......s--+.hpplhKp.hhph.ss.ss...p.................................................ccttphhpchhtphuphEhuht+t..phl.hs.hNt+EhEpYpphppcI..psplp........ts+cc...........ItchKppLppA+ph+pp+p........................EYDsLAchIsppP.....sRp-ptppl............................................................ 0 64 104 157 +5444 PF05616 Neisseria_TspB Neisseria meningitidis TspB protein Moxon SJ anon Pfam-B_8925 (release 8.0) Family This family consists of several Neisseria meningitidis TspB virulence factor proteins. 19.90 19.90 20.40 19.90 19.80 19.50 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.62 0.70 -6.13 3 184 2009-09-11 07:48:12 2003-04-07 12:59:11 8 4 72 0 16 177 2 264.20 41 79.54 CHANGED ah.pF...ApPl+ISDsh.s.stptu+VpKFRPcuS+Fap...........S+lTEAuslEHIPTGAKA..RINAKITASVSRAGVLSGVGKLVRQGAKFGTRAVPYVGTALLAHDVYETFKEDIQARGYQYDPETDKFVKGYEYSNCLWYEDERRINRTYGCYGVDSSIMRLMSDYSRFPEVKELMESQMERLARPYWEhlRc..NRsDSh.F+NYNFsRCYFsWNGGsCsVuKG.DDuRoFISFSLtRNPKYKEEMDAKKLEEILSLKVDANPDKYIcATGYPGYSEKVEVAPGTKVNMGPVTDRNGNPVQVVATFGRDSQGNTTVDVQVIPRPDLTPGSAEAPcAQPLPEVSPAENPANNPsPcENPGTRPNPEPDPDLNPDANPDTDGQPGTpPDSPAVPDRPNGRHRKERKEGEDGGLLCDaFP-ILACsEMGEPS-NsFEDIuIPpsTs-cTWSscNhFPSSGVCPpPKTFHVF....GRQFusSYEPLCTlAE+lRFAVLluFIIMSAFlsFGSLu+E ..................................................................................................................................................huGsGKLsR.GAKh..uTRAVPYVGTALLAaDlYpTFKEDIpspGhpYDsETDKFsK.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................Cs.....................hpat.hC.h...ht.hlhh...h...h.............................................................................................................. 1 5 10 11 +5445 PF05617 Prolamin_like DUF784; Prolamin-like Moxon SJ, Bateman A anon Pfam-B_8935 (release 8.0) Domain Both DUF784 and DUF1278 members are found to be expressed in the plant embryo sac and are regulated by the Myb98 transcription factor. Computational analysis has revealed that they are homologous to the plant prolamin superfamily (Protease inhibitor-seed storage-LTP family, Pfam:PF00234) [1]. In contrast to the typical prolamin members that have eight conserved Cys residues forming four pairs of disulfide bonds, both DUF784 and DUF1278 domains only contain six conserved Cys residues that may form three pairs of disulfide bonds. These two domains may have potential functions in lipid transfer or protection during plant embryo sac development and reproduction [2]. This family has been merged with the DUF1278 family. 20.60 20.00 20.70 20.10 20.30 19.50 hmmbuild --amino -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.35 0.72 -3.85 75 362 2012-10-01 19:46:35 2003-04-07 12:59:11 6 3 20 0 247 334 0 67.40 25 51.37 CHANGED cChs.shhp.......hpsCss-Ihtshh...uc........tplussCC.pslhphscpCast.....hh.s..phshhss............lhshCs ................................hhsshhp.......sttCsh-Ihtslh...sp........sslussCC.pslhphscsCasp.......hh.s..thPhhhs..................lhshCs................ 0 90 128 152 +5446 PF05618 Zn_protease DUF785; Putative ATP-dependant zinc protease Moxon SJ anon Pfam-B_8936 (release 8.0) Family Proteins in this family are annotated as being ATP-dependant zinc proteases. 21.80 21.80 22.30 22.30 20.50 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.67 0.71 -4.49 11 1019 2012-10-02 15:32:34 2003-04-07 12:59:11 6 4 489 2 289 840 428 135.10 33 62.74 CHANGED lhG...plhL.....p.htA+hDTGAhTuSLpAsslhsap+-GppWV+F.....hhspcsppuhshEh.h.t...l+ttts............ptR.VlclhltlGsplhptEhoL.........p-RpthpaPhLlG.RuhhthhuhVDPup+alps+p ...................................................llG.hEhltlsslshs....hcA+lDTGApTSSLpA.......hc.......Ic........Fc...+.s.G..p.......p.........W.V+F......................hp.p..s.s.......p.p.s.p....h......hE..t........l...hchtc.I.+sSsuc...........sppRhVlchplp..lG.s.tph.p..hEhTL.........sDRspMsa.lLLG.RphhcsthlVDsuppalt.............................. 0 70 152 236 +5447 PF05619 DUF787 Borrelia burgdorferi protein of unknown function (DUF787) Moxon SJ anon Pfam-B_9013 (release 8.0) Family This family consists of several hypothetical proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 63.60 63.50 19.30 18.70 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.20 0.70 -5.79 2 137 2009-09-10 23:38:07 2003-04-07 12:59:11 6 2 27 0 11 119 0 334.20 72 97.88 CHANGED MPpDTISVSLhpsRl.ssp.NYYNPLLlYKo.......sshs.hhLsLoVsNaEc.lcpLEhppGptpDp.tK...EcpL..LppAMusFF..up-uLKSsshalY.sp.cElKcaLKsphHsFVVhlNpttDs.......hpsDa-th+ps..FhVhSTK-pplhplhKsKspoEhcp.IAlYSsNc.NLHLKFhu.YLHQASIFHAVNPYGM.LsuTPlhDDolIspLRpupINFYSLLNETG.DGh.AFKEuVsLuGsSIDEhFThaYIKNEuIhELIRIWNKNsRtNSKLSALpLsGuhsNtYTuulEChh+chhpRGLIl.YKplplplsso.tLpLpLpVslpYN.ShNuVsLlITsQ-Is ....MPQDTISVSLlDSRIQAS+PNYYNPLLVYKTAKIKVNKDuAsaKhLsLTVNNYEKpIETLEK-NGNGpDQFGK...EKTL..LKTAMSsFFNSoEESLKSAsLFIYKDK..P..EELKpYLKsHRHoFVVLINTp.GDs..S..D..DGLplYKDDYsKFKtsSsFFVFSTKEQEIKELFK.DKuNoEK-RNIAVYSNN+DNLHLKFIStYLHQASIFHAVNPYGMsLuA.oPLl.DDTlIsKLRsAKINFYSLLNETGLDGlsAFKEGVDLuGsuIDEtFTYHYIKNEAIlELIRIWNKNNRQNSKLSALQLSGARDNAYTSAIECLLKRFlDRGLIlpYKsLpLTLSsTsQLKLELSVNITYNFSINuVuLVITTQDIV....... 0 6 6 6 +5448 PF05620 DUF788 Protein of unknown function (DUF788) Moxon SJ anon Pfam-B_9014 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 20.70 20.70 20.70 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.14 0.71 -4.40 30 317 2009-09-15 15:32:55 2003-04-07 12:59:11 6 4 279 0 218 302 2 158.30 25 90.41 CHANGED MAspusKKpAppNpphLptlphhhlshsslallh..hhhhspss..shhs.......hhlhshsthhshatLcphu+Ppas.........tpupLlcsG.DLpttG.h..hEYhhDllYlohhsplhshl..osKhWal.aLllPsauhYKlhs.hl..........hs.hhutssttttpt.............pspSKRQpKhE+Rtp+ ..........................................................AscutKphhppNpptLphhhhhhlsssslahll...hhhhsp..ho.h.shhh.......hhhhshshhhs.hh.hhpphu+Ppas..................psGp...Lhc.....uG..DLs.htG....l.........sEahhDllhlThhsplhshh..oshhWhh.aLllPsauhY.hlas.hh......................htsh.hstsst.ttt................ttppK+pp+hc++................................................. 0 76 119 180 +5449 PF05621 TniB Bacterial TniB protein Moxon SJ anon Pfam-B_9028 (release 8.0) Family This family consists of several bacterial TniB NTP-binding proteins. TniB is a probable ATP-binding protein [1] which is involved in Tn5053 mercury resistance transposition [2]. 20.10 20.10 20.10 20.10 20.00 19.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.89 0.70 -5.66 2 273 2012-10-05 12:31:08 2003-04-07 12:59:11 6 5 203 0 59 418 28 254.90 42 95.42 CHANGED MDEYPlIDLSHLLPAAQGLARLPADERIQRlRADRWIGYPRAVEALNRLEsLYAWPNKQRMPNLLLVGPTNNGKSMIVEKFRRsHPsuoDADQEHIPVLVVQMPSEPSVIRFYVALLAAMGAPLRPRPRLsEMEQLALALLRhVGVRMLVIDELHNVLAGNSVNRREFLNLLRFLGNELRIPLVGVGTR-AYLAIRSDDQLENRFEPMhLPsWEAN-DCCSLLASFAASLPLRRPSPIATLDMARYLLTRSEGTIGELAHLLhAAAlsAVESGEEAINHRTLSMAsYhGPSERRRQFERELM .............................................................................................................Hl...h...ht.hss..p.-...RI..phlc.tsRWI.GYs..pA....phLsp.Lps.L.hs.h.P.p.pt.R..M...P...s....L.LlVGsoNNGKo.....h...Il.c+.F....p.+...p...H..........s..........s........s....s....D....s....c...t....c.....t....h.............PVlhlp.h....P..spP.s.....h....+h...Y.sul.L...s...tht...............A..P...h.......+.s.....p.........s....p.....l.....s.c...h..c..p.s..l...t..L...lR....c.h....s.l+M..LlIDEl.Hs..l.L.u.G.ss..h..p.....p..R.....p.h....L...N....h...L+.aL..uN..E...L...p..IP..l....V....u..V..G....T..............c..............-A...h....h.......A....l....p....o.D.......s.....QLt...s...R......F.....-......s......h.....h......L..P...h...........W.....c.....h....s.....c........-.....h.....p.....p...L.....L...sSF.t..ts.L....P...L..+.+..s.S.s.l.s..s....h...-.h.AphlhshSp.GslG-ls+LLhsAAlhAlcoGcEtIstchlp.......................h................................................................................................................... 0 9 29 48 +5450 PF05622 HOOK HOOK protein Moxon SJ anon Pfam-B_8981 (release 8.0) Family This family consists of several HOOK1, 2 and 3 proteins from different eukaryotic organisms. The different members of the human gene family are HOOK1, HOOK2 and HOOK3. Different domains have been identified in the three human HOOK proteins, and it was demonstrated that the highly conserved NH2-domain mediates attachment to microtubules, whereas the central coiled-coil motif mediates homodimerisation and the more divergent C-terminal domains are involved in binding to specific organelles (organelle-binding domains). It has been demonstrated that endogenous HOOK3 binds to Golgi membranes [1], whereas both HOOK1 and HOOK2 are localised to discrete but unidentified cellular structures. In mice the Hook1 gene is predominantly expressed in the testis. Hook1 function is necessary for the correct positioning of microtubular structures within the haploid germ cell. Disruption of Hook1 function in mice causes abnormal sperm head shape and fragile attachment of the flagellum to the sperm head [2]. 35.00 35.00 35.00 35.10 34.70 34.70 hmmbuild -o /dev/null HMM SEED 713 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.52 0.70 -13.24 0.70 -6.72 9 557 2009-09-11 15:29:52 2003-04-07 12:59:11 7 9 169 1 312 507 1 523.20 26 58.15 CHANGED Ms...c.KhELC-SLlpWLQTFpluAPCpslp-LosGVAhAQsLpQIDPuaFsEuWLs+IKpD.VGcNWRLKsSNLKKILpulh-YYp-lLupplS-thlPDltpIuEpSDssELGRLLQLILGCAVNC-cKQ-aIQpIMsLEESVQ+sVMsAIQELho+cpssosus-shssh-pQL++hh.-LpEs.p-+-ELAQRCpEL-hQlhhL.EEKsuLhpENphLp-chsph-..........oh.sPsplsu++a.pLQpQLEQLQEEsaRLEuA+DDhRl+Cp-LEK-lhELQpRN-ELToLApEspuLKDElDlLRpuuDKsspLEupl-oY+KKLEDLsDLR+QVKhLEE+NshYhcpTlpLEEEL+KAsusRuQlEsYKRQVQ-LHsKLspESp+A-KhtFEh+pLcEKh-ALpKEKERLltERDoLRETs-ELRCuQhQtspLs.ts...ussu.ot-sLAAElhPsEhREpllRLQ+ENKhL+htQEGu.pp+.stLQphL--ANppppcLcoppRLsppRIhpLptQlE-LQKtLppQGu+s-s..sspLKpKL-tHhcpLpEsp-Elp+KpthlE-LpPctsps.spKIsELpsALppKDp-h+AME-RYK+YlEKA+pVIKTLDPK.p....AssEl.hL+p..........QLsE+DtclptLEp-h.cts+.hR-.EEpLIloAWYNhGhshQ+cAh-uRLsshus.........sGQSFLApQRpsTssRRshssp..s....souK ...............................................................l.h.Wl.po.....h........t..............t.................t...p.....h....pLssGhhhtplh.pI.sst..hs.......p.....p.l.p.p.p...sssshpl...+...h...pNLphllptlhpY.......hp-h..Lt.p......l.............lPs...l...........hIu.....c......p....s....s.........t.......E......l...t..................+L.L..Ll...LGCA.V........pC.ppKp.........caIppI.t.L....-.ssQt...........sl....hst...I.Q...E.....lh.p...........p......p....p.......s.....s.......p.........s....t......t....s.......t.p........h...-......t..t..........................p........p......h....h.....p.......l......p...c..h..ht....c..p..-.p.h..........t...p.......p.....h...t....-...h...p..p..l.....L.p.p...E.+s....s.L...............t..p.....p........h..t...p.p.h..s..p..p................................................................p.t..s.p.p.h.h.u.h...c....hh...p....h..p.t.p....lcpLpp....E..........c....h.....p...t..t.......h.Dh....+h.....ch.....p...ph..........-....t..c.....l..tclp...p.c.sp.p...............Lhs.....A....c.p...spth+DElD.LRppup+....ss+LEsplpph+.c.+Lp-..hp.h+tpl..ctLcEpN.t...hhhpp...p.h.L......E-pL....c.......t.s.p.......s.......t....p.s...p...l.....c.......p.......hc.+.......p...h.............p.Lptc.h.pp.ptp.ctp.....p..h...h-hp...p....lp.......cchp.....t....l.....cp.....p....t...p....l..........p.h...t....p....h.......p......p.......h......c.pL...c.hs.................t....t.....t.tt..............................st.s.p...s......h......s.t.El...............................t..h..p..p....p..hh....+.........L......p....hE......N.....p....L................t.........p.....t..............................................t..............h.t..........p....lc.pt...p...........t...h.t.....p.lc...pp............p............p.p........p.......h.....l......p.t.p...h....tp...p...p...t.....t.....p..tt..........pt...........p...t.p.h...pt.......th....p........ph.........t....p.t............................h..pp.....................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................................................................................... 0 90 122 209 +5451 PF05623 DUF789 Protein of unknown function (DUF789) Moxon SJ anon Pfam-B_9113 (release 8.0) Family This family consists of several plant proteins of unknown function. 20.60 20.60 23.40 21.40 18.60 20.40 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.98 0.70 -4.94 19 248 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 38 0 149 236 2 246.60 32 51.85 CHANGED oNLERFLcusTPsVPsphhsp...sshps.psh.....pppphsaFtLsDLW-sFcEaSAYGhGVPl.......pLssu.-sVhQYYVPYLSAIQl.....ass.pphhs...................................Rp.s-su-st......S-u.Scsp...................thtpt.psushspst.....tsphGplhFpYhEpssPasR.PLs-KlspLupcaP...........tLpoL+SsDLuPuSWhSVAW......YPIY+IPss.slcDLsuCFLTYHoL..uoshpss.sp......................................t.spclsLPsFGLAoYKhpGslW..................ss..pts...p-....ppphtoLhpuADsWL+pl........pV.pHsDapFF .............................sl-pFltsso.P.l.......hsp........tthpt.pth...............t.pt..hF.LsDlW-tacEhSuY..Gh..u..Vsl.........hLsss...c..plh..QYYVPaLSAIQl.asp..tt..............................................pt..pps.ptt...t..t..sps.st.t..............................................ptt.p.pt.........pt.s..hFpYhEpp..Pa...tR.PLh-K.........lppLupphs.............tLhohcosDL.PsSWhuVAW......YPIY+IP.s....s.pcl.s.ssFLTaHsL..usshptt.........................................................................................t..tphsLsshGltoY+hp..u.phW.............................s...t..p.......pphh.tsL.psAssaL+th.......................ps..pH.DapaF............................................................. 1 25 93 119 +5452 PF05624 LSR LISCH7; Lipolysis stimulated receptor (LSR) Moxon SJ, Mistry J anon Pfam-B_9152 (release 8.0) Domain The lipolysis-stimulated receptor (LSR) is a lipoprotein receptor primarily expressed in the liver and activated by free fatty acids [1]. It is thought to be involved in the clearance of triglyceride-rich lipoproteins, and has been shown in mice to be critical for liver and embryonic development [2]. 25.00 25.00 30.60 30.00 22.50 22.10 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.42 0.72 -4.23 3 176 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 39 0 70 137 0 48.90 70 8.91 CHANGED cWLTVLlIllGALLLlLLlGICWCQCCPHsCCCYVRCPCCPsTCCCPE+ ....-WLhVllVlLGuhLlhLLlGI.C.WCQCCPHoCCCYVRCPCCP-pCCCPc..... 0 3 9 27 +5453 PF05625 PAXNEB PAXNEB protein Moxon SJ, Mistry J, Wood V anon Pfam-B_9269 (release 8.0) Family PAXNEB or PAX6 neighbour is found in several eukaryotic organisms. PAXNED is an RNA polymerase II Elongator protein subunit [2]. It is part of the HAP subcomplex of Elongator, which is a six-subunit component of the RNA polymerase II holoenzyme. The HAP subcomplex is required for Elongator structural integrity and histone acetyltransferase activity [2]. This protein family has a P-loop motif. However its sequence has degraded in many members of the family. 20.50 20.50 20.50 20.70 20.40 20.10 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.14 0.70 -5.62 30 316 2012-10-05 12:31:08 2003-04-07 12:59:11 6 5 262 3 225 365 7 338.60 25 89.19 CHANGED oSFp++tht......................p..p......hP.....GlRPS.spsupsoTSTGsssLDplLuGH.GLPlGohLLIEEsuoT-auulLL+YFuAEGlVp..............spclaVsu.h...sppaspcLPGhhpsssppcccs.......................sppps-cMKIAWRYpplschpsussssp........................sasHpFDLTK+L.sps.hsslsalshsss.........................................................................................s.....asullpplpshIppp.........sssslhRlsIPSLLsPshYsspsupsp.......llsFLHuLRuLlRt.ss.psssllTlPhsLas+....ssuLsphlcpLsDuVIcLpPFstp.........cttsss....tK..pGLl+lpKLPsLs-+u.shhscps....DaAFKLuR+K.FpIEpauLPP--s-ppsppsst.....................sstsscpsLDF ....................................................................................................................hs.GhRsS.s.hs.u.p...hsSTGhssLD.plL...uG............GLslG.s..lL...............l.E...E..........st.............hs.s......authLh.+hFhApGllp..............spplhlhu.h...........stthhpp...L.......Pu.h..tssptpp...p.p.s...............................................................s.pp..ppphKIAWRYpth.sphpss.sssp................................................tasH.taDlo+ph...p.h.....tsph.hhh....t................................................................................................................................................htpllpplpp.hlppt.................ttpplhRlsl.u.LhuPhh...h..s.s.p.....t..sp.t......................lhpFLasLRuLlR..........s.........s.h.....s..........sshlTl.PhpLh...........sssl......hthlpplsDsllpLpsFstp.................pttts.....hct.p...Gll+lc+lPhh...sshs...shhspst.........................-h.uF+lpR++.FhIc.hplP.s..sptttptttt......................................................................................................................................................... 0 83 128 187 +5454 PF05626 DUF790 Protein of unknown function (DUF790) Moxon SJ anon Pfam-B_9309 (release 8.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 20.20 20.20 20.60 20.60 19.60 19.90 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.14 0.70 -5.94 13 146 2012-10-11 20:44:44 2003-04-07 12:59:11 6 3 123 0 80 152 24 374.90 31 88.07 CHANGED MLPpELLcs+.tpcGcIhPpaus....-cl-LAcpVIphF+pplGcphG-LpcclcplE.ptpNYKhVRGhA+lL.....cRp.spF-ssoslDPhclRchLFct....G.ssocpERpclLpc..........sAcch......sso.p-lE+ulaADhEEE+lLsch..............................PsIoP--Ll+pYNLSLlQTLlFcAhclsl....plsss...aKcllRtIKtLGLMYpl.s.............................................t.t.hplclsGPASLh+hoc+YGsulAKLlPtllpscpWpl+A-ll......pscRlYpFcLsSppshhh.chppt......................................................aDSslEcpFspchptlh.uhclhREP-llplGppsaIPDFhlp+.ssp+VYVEIVGFWT.EYL++KlEKl+csshshLlllscE.L....t.tphchsspcVIhF++.KIshscVhthL+ ......................................................MLsp-Llph+..hpstplhPhahs.....pphpl...ApclIphFpttlGps..pu-LpcplppLp..p.................t.............Da....+lhRGLu+lL.................ccp.spFE.h.hu.sl-PtclRpplFpt..........sss..h.......p.......p.....p......R..ptsLpp..........lAppL......tl...pclpcuhYADLc-pphLsp.h...............................s..s...sP-pLlcpYNLuLsQsllacAsclpl.....pst..ss....aKtlh+h....lKhhtL..Mahlpt.............................................tsp.shplpl-GPsSLh.ctop+YGhphA+hlPsll.pssc.....WplpAplt............p....ts.p.+shphpLss.p.s.shh.s..c..h.s.s..s..p....................................................aD.StlE.psFupca..pp....h.....t.....p..sWpl.REs-hlsls....s..p.VhIPDFt.l..........p..................+..s.......sp.......chhlEIlGFWssEYLc+Khppl.cps.p.t...s...Lll.sssE..L....ussph..pt....hs..t.pllhF+p.plp.psVhphl................................................ 0 18 52 69 +5455 PF05627 AvrRpt-cleavage NOI; Cleavage site for pathogenic type III effector avirulence factor Avr Moxon SJ, Coggill P anon Pfam-B_9342 (release 8.0) Domain This domain is conserved in small families of otherwise unrelated proteins in both mono-cots and di-cots, suggesting that it has a conserved, plant-specific function. It is found both in the plant RIN4 (resistance R membrane-bound host-target protein) where it appears to contribute to the binding of the protein to both RCS (AvrRpt2 auto-cleavage site) and AvrB, the virulence factor from the infecting bacterium [1]. The cleavage site for the AvrRpt2 avirulence protein would appear to be the sequence motifs VPQFGDW and LPKFGEW, both of which are highly conserved within the domain [3]. 20.30 20.30 20.80 20.30 20.10 19.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -7.88 0.72 -4.91 22 371 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 42 2 166 352 0 36.90 43 27.04 CHANGED spcpspslPKFG-WDsss.PASA-GFTVIFsKsR--Kcos ......t...ptssVPKFG-W.D.sss...susupsaTslFsKsR-cKps.s... 1 22 94 132 +5456 PF05628 Borrelia_P13 Borrelia membrane protein P13 Moxon SJ anon Pfam-B_8766 (release 8.0) Family This family consists of P13 proteins from Borrelia species. P13 is a 13kDa integral membrane protein which is post-translationally processed at both ends and modified by an unknown mechanism [1]. 25.00 25.00 27.00 27.00 19.50 23.20 hmmbuild --amino -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.79 0.71 -4.36 5 121 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 37 0 9 87 0 126.40 51 78.98 CHANGED TsLpYESpKsslL.APFLLNLFLoFGIGSFVQGDYIGGGAlLGoQVLGGILllTGhIlss....ssssssp.huIoGtlLhGIGsLTIAuSYITSIIIPFTFANRYNANL+K+LGIuLAGFEPNFDIGIN.....G...FQLSFKKSY .............hhhY-opKpssl.sPFLLNLFLsFGIGSFsQGDhlGGuhlLGF.....shlGhhL.hhsGhh..L.sh..............s.spcs...hhhGthlh...hhGssThsso+lsplIlPFTFANpYNcpLKppLslsLuGFEPsFDluhs.....G.....FpLSFKKSY....................... 0 5 7 7 +5457 PF05629 Nanovirus_C8 Nanovirus component 8 (C8) protein Moxon SJ anon Pfam-B_9354 (release 8.0) Family This family consists of a group of 17.4 kDa nanovirus proteins which are highly related to the faba bean necrotic yellows virus component 8 protein whose function is unknown [1]. 25.00 25.00 151.80 151.70 17.50 17.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.07 0.71 -4.42 3 31 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 9 0 0 30 0 151.80 70 100.00 CHANGED MADWFcSsLKTCTHVCDFscIuuDS...QQ-lhCCDSM+GKLp-PRKVLLVSCpVSFNGSFYGuNRNVRGQLQlSMp-DDGVsRPIGYVPIGGYLYHNDYGYYEGc+TFNLDIESDYLKPDEDasR+FpVSIlN-NGLD-pCDLKCYVVHSlRIKV M..DWAESQaKTCTHGCDWKsISSDSu-NRQYVPCVDSGsGR.KoPRKVLLRSIEssFNGS..FpGNNRNVRGFLYVSIRDDDGtMRPVLlVPFGGYGYHNDaYYFEGcupspCDI.uSDYlsPs.DWSRDMEVSISNSNNCN-.CDLKCYVVCSLRIKE 0 0 0 0 +5458 PF05630 NPP1 Necrosis inducing protein (NPP1) Moxon SJ anon Pfam-B_9369 (release 8.0) Family This family consists of several NPP1 like necrosis inducing proteins from oomycetes, fungi and bacteria. Infiltration of NPP1 into leaves of Arabidopsis thaliana plants result in transcript accumulation of pathogenesis-related (PR) genes, production of ROS and ethylene, callose apposition, and HR-like cell death [1]. 21.00 21.00 21.10 21.10 20.30 20.90 hmmbuild --amino -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.56 0.70 -4.60 27 493 2009-01-15 18:05:59 2003-04-07 12:59:11 6 12 160 3 323 429 0 179.10 32 74.56 CHANGED sssshthcapPhlch.scGChPhPAlsssGssuGGLps........o.Guh................................suuC+-put..suQsYuRusp..N..G..hhAlMYuaYF.KDpshs..shGuHRHDWEaVVVWlss....ss.plhtVosSuHGsappt.s.sssph-Gs+sKlsYapshss.sHshchsssss-..........p.sLlsW.....sshss.ssRctL.......pssDFGsA.......slPh..KDusFtspLspA ...........................................................................s..phhhpapP.Lch..s.sGChsaPAVss.sG.s..s...uu.G..Lps.........o...Gs..s.............................................s.usCpsss....uQ.......l........Y..sRush..s....s......hhA..IM.YuWYFPKDp.s.....ss...............s..h....G.HRHDWEpl.l.V.Wlss......................ss..pl...h.u.l..u...sS...s.H.u..s....app.............t............s..........s............h.....s....G.....s..psp.ltYhtsh.h......sHthphsss..s........G.....p.....................htsLlhW..............-thss....suRsu.L.......p.s.s.s.FG.p.A........p.h.Ph..pDs.sF.tpLtt.................................................................. 0 86 163 303 +5459 PF05631 DUF791 Protein of unknown function (DUF791) Moxon SJ anon Pfam-B_9328 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.28 0.70 -5.76 3 263 2012-10-03 03:33:39 2003-04-07 12:59:11 9 6 145 0 180 646 62 301.10 33 72.54 CHANGED FYYLVFGGLAAVVAuLELuKouKDRlNToPuFNSFKNNYLLVYSlMMAGDWLQGPYVYYLYSoYGFuKGDIGpLFIAGFGSSMLFGTIVGSLADKQGRKRAClTYCIlYILSCITKHSPpYKVLMIGRlLGGIATSLLFSAFESWLIAEHNK.............................RGF-sQWLSlTFSKAlFLGNGLVAIISGLFANLLADsFGFGPVAPFDAAACFLAIGMAIILuTWSENYGDsSDsKDLlsQFKVAApAIASD.plhhLGtlQSLFEGSMYTFVFLWTPALS..PNDEEIPHGFIFATFMLASMLGSSlAuRLLuR+.LRVEuYMQIVFLlSAsoLhLPIVTshLVsPSpVKs-GLSLsuulQLLGF ......................................................................................................................................t....sF.pF..p.pal...hVY..L..h..h......su...D....W....LQ....GP...Y.l.Y....h.....L.Y.p.p..Y.u..hsct....p.....I.u.h.L...a...l..s.Ga..u.S..uh.l.h.....G.s.h..s.....G.....s.....L.....u.Dp.h.G.R+p.u....C.l.h..a..s.l.h.Y.........l..s....C.....l......o..............K..h.s.s..p.a..h.....l.L.h.....l.GRl.L..G.Gl....u.T.....o..LL..F....S...s.F.E......u...Wh..l..t..E...H.t.c...................................................................................+..s.a...s....t..p...h..L..sh..T.F.u.pu.s.......hh.N.ul..l..Alh.uG.l..hup....hl.s..s.........h.........h..u....h............t.............lu.PF..sA..h.shL...s...l.u..hs.l....lh.tsW....s..E....N...a.G..s.............................t......p...p..........s.....h...........................t..p.h.....t...t........u...h........p....s............l...h...s........D............p+....l....hhLGh.h..Qu.lFEu.uh..Yh..F..VFLWTPs..Ls.............spt....t...........t....l....P....h...Gh....lFus.FMhupM....lGSslh...hhhs........p.p....hp..s...t..hh..hshhh..sshsh.h...................................................................................................................................................................................................................................................................................................... 1 75 112 149 +5460 PF05632 DUF792 Borrelia burgdorferi protein of unknown function (DUF792) Moxon SJ anon Pfam-B_9387 (release 8.0) Family This family consists of several hypothetical proteins from the Lyme disease spirochete Borrelia burgdorferi. 25.00 25.00 36.90 36.70 23.10 22.30 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.46 0.70 -4.99 2 136 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 28 0 10 109 0 194.90 67 97.49 CHANGED hsIKNKN..NsNhEpKK.tEh.p-.......EIhpII+DlhTQlFsLFGADNFLlLFPR.Dh+GFGYVPQLFFIKPKTpLIoRTYNTSCSKRPsINYYDRKAEYVSYNsVMTGEpISLsGGILTShYKDMLSlhKhTVFGNhhhRFDuHLsKEQLANRlQAQVPFoIYSPTFGLKELAlITsLoFKDhPFIDEVEVSLShElVKTFcLEKYKG ........................................pph.tp...p.......EIo+II+DVlTQIFALFGADNFLVLFPRhDLKGFGYVPQLFFIKPKsELIoRTYNTSCSKRPV.INYYDRKAEYVSYNPVMTGEpISLNGGILTSLY.K-.MLSlLKMTVFGNohLRFDuHLsKEQLANRlQAQVPFSIYSPTFGLKELAlITSLoFKDTPFIDEVEVSLSlEIVKTFs..LEKYKG...... 0 6 6 6 +5461 PF05633 DUF793 Protein of unknown function (DUF793) Moxon SJ anon Pfam-B_9395 (release 8.0) Family This family consists of several plant proteins of unknown function. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.40 0.70 -5.71 7 188 2012-10-01 23:20:42 2003-04-07 12:59:11 6 6 24 0 111 233 0 230.20 22 80.18 CHANGED hPAT-aQu........hhShLSlRRsQhss......ssspppE.p....EL-sFQ++VAE+FhDL........usSs.........tslLSlpWltKLLDsFLsCppEF+uIlhstts..QISKsP.hDRLls-hh-RulKALDlCNAlhsGl-plRQap+hhEIsloALcpp........PLG-GplRRAK+ALhsLsIuh.s-....Ksuuuusss..ppNRShsRshs.........ht+RohG............ph+S.oWuVSRsWSAuKQlQAhsuNLssPRus-ssu...Ls.PVYhMoSVhlhVMWsLVAAlPCQ-R.GLtsHhs.sP+phpWAuslhSla-+Ih-E.K++-+Kp.usGLhcEhpphE+hu+.L.EhsDuhcFPhs--c.p.EVtpcVpEhhplCcthcsGL-PhpRpVREVFHRlVRSRoEhL-sL ................................................................................................................................................................................................h..hsh.ah..hh..h....ttht.hh...........h........-chh..hh-.sh+hLDlh.sht.tlt.l..pt..phhh.hsht.h....................................................................................................................................................................................shh.ht..hhhh.h.hhs.hh....s......................t......au.sh..l.p.l.t-.t...........tt............................h.htEht.hc.....h............hht............t................................................................t.....h...h........................................................................................................................................................ 0 21 78 93 +5462 PF05634 APO_RNA-bind DUF794; APO RNA-binding Moxon SJ, Eberhardt R, Barkan A anon Pfam-B_9606 (release 8.0) Family This domain contains conserved cysteine and histidine residues [1]. It resembles zinc fingers, and binds to zinc [2]. This domain functions as an RNA-binding domain [2]. 25.00 25.00 33.30 25.70 19.80 17.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.32 0.70 -5.13 7 186 2009-01-15 18:05:59 2003-04-07 12:59:11 6 9 21 0 116 189 0 158.30 36 77.96 CHANGED Ls......ptht.ts.spppp.psp.sDlP.....+.c+KPaPhPhKclpcRAKEchp.tp.t.p+.L..PPcNGhlV.pLVPVAcpVhpARphLlpsLspLl+..VVPVpsC+aCsEVHVGshGH.h+oCcG.ssstRpu.HpWssGslpDVllPlEuYHLaDphs+.RIpH-pRFshPRlPAllELCIQAGV-lP-aPs+RRppPlh.htt.cllD ...........................................tht.............................................................................thh.lApcshpA.aptlhpGlp+Lhc...hhsVpsCtaCsEVHVGshGHph+....h....Ctuh.cpptRsGpHsWppusl-DllsPh.saHlhD.h..Gt.......lp..c-.hRh.Ys+hPAVV....ELClQAGspl.P.c.............................................. 0 10 68 94 +5463 PF05635 23S_rRNA_IVP Ribosomal_S23p; 23S rRNA-intervening sequence protein Moxon SJ, Eberhardt R anon Pfam-B_9648 (release 8.0) Family This family consists of bacterial proteins encoded within an intervening sequence present within some 23S rRNA genes [1-3]. It folds into an anti-parallel four-helix bundle and forms homopentamers [4]. 23.30 23.30 23.40 23.30 23.00 23.20 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.20 0.72 -4.15 122 1034 2009-09-11 08:57:14 2003-04-07 12:59:11 6 5 486 10 434 1051 267 104.30 26 82.27 CHANGED psac...-LclWp+uhcLshplYc.lopshP.p.cpauLpsQlcRuulSIsoNIAEGhuRp..osp-ahcFLtIApGSssElcopLhluhclsalsp.pph......ptlhpphpclp+hlsuh .................................hppl.happuhphshplap.hs.p...ph..s.p...p....c...p..a...s.L.ss.QlpRu..........usSlsuNIAE.Gh..s...+p..opp-ahphLpIA.......tuShtE.hph.L..ls.hc.hs...a..l.s.p..p.ph......pt.lhpphpplhphl.t.h.................................................................. 0 178 349 412 +5464 PF05636 HIGH_NTase1 DUF795; HIGH Nucleotidyl Transferase Moxon SJ, Anantharaman V anon Pfam-B_9692 (release 8.0) Family This family consists of HIGH Nucleotidyl Transferases 19.90 19.90 19.90 19.90 19.80 19.60 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.40 0.70 -5.43 10 1483 2012-10-02 18:00:56 2003-04-07 12:59:11 6 2 1444 1 204 1046 11 364.90 35 97.14 CHANGED McssGIIsEYNPFHNGHhYHLppucc.spssltIAVMSGsFlQRGEPAIlsKWpRucMALpsGsDLVlELPssaSsQuA-aFApGAVcILspLu.l-sLsFGSEpsslccapclAclhs-+ttchcphl+p.L.scGhSYPpuppcsac.....phtG.c.thp...htoPNNILGlpYsKAlhptspsIpshoIKRpuAsYHDt-l...ccphASATuIRptl.......................tpclctlc+hVPssohcll....p.hsshsshscaFshLKYpllt.osccLcsIapVsEGl-sRlh+ulppupsac-hlcLlKTKRYThoRlpRlLTalLlNhcp.........psstssaI+lLGFocKGQpaLpphKKshs.....lPlIT+luptshct.....hph-l+AopIYpLs.....ppshspp-Fs+sPI ...........................................MphsGlIsEYNPFHNGHtYtlppu+p...t.s...s...l....h.I.s.lMSG..NF..lQ..RGEPAIlsKWsRAcMALpsG.sDLVlELPhhhulQuA-hFApGAV.plLstLu...l-.s...lsF.......G.o.E...ps.plps...hpplAchhp..cp....t...t..ph.p.ph.lcp.h.p.p.u.hSYPpthp.t.hh.p.........................p.h.h.t......p...hphs............tsPNsILGlpYhKAl...t.....h..s..p..s..IphhsIpRpu.....usaHst....ph...............ctp......hAS....ATuIRptl..........................................................p..p......s..t....l.cp.h....lP...psshthh........................s.....s...h...h..s......h..c...pa.......as..h....L........+.Y...pl...l..........t...otp..c......LpsIap..l..sE...Gl....-sRl.........pps....l....p..p.......u....p......o......hcc.....hlctl.KTKRYThsRlpRllhalLlshp...t...........................tppt.sp.hl+lLGhsc+Gp.paL+....plKpp.........lsl....l...o..+l.sp...p.......s.h..ph........................hthph+As..p..lYph........st.....ppsasp.s............................................................. 2 79 138 173 +5465 PF05637 Glyco_transf_34 galactosyl transferase GMA12/MNN10 family Wood V anon Pfam-B_6000 (release 8.0) Family This family contains a number of glycosyltransferase enzymes that contain a DXD motif. This family includes a number of C. elegans homologues where the DXD is replaced by DXH. Some members of this family are included in glycosyltransferase family 34. 20.40 20.40 20.50 20.40 20.30 20.10 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.58 0.70 -4.86 14 639 2012-10-03 05:28:31 2003-04-07 12:59:11 7 9 211 4 419 634 57 209.70 22 55.28 CHANGED pssclVllhuSs.pss.....s..tp.hltpslcN+l-YAc+H..........GYshhahpssshhh...thsss....WuKhPhl+psMpcaPcAEWIWWlDpDAllhshphsl.chhh..phLpphhhpp...........sshpph.cpshs.cupshhhlhspDasGlNuGSFLlRNspauthllDhWs..............-Phhtppsts...pcEQsALsallttHsplhsphuhls.................+hlNuYp.ut.t...............................hs.c-GDhllHFsGCpstssCt ..............................................................................s....hlhlhh.p.t.h............h..p.hpN+hcYschH...................Gaph.h.h.....tshhp.........tphtt................Wt.Kls..hlRphh...h......taP..c...sc......ahaalDt-shhhs.shp.........................................s.pth.p...h................p..s.....s..lh......p...p....shtu...l..NsGsFllR..s........spWu....h...hL.-.h.Wh...........................h.hh......t.h.......tt-.QsAl.....hl....h...t.............p................h....htp..hs.hl..............................................tchh.p.t...a..t.................................................ht..pch.s.hlsphsGCp.t............................................................................................................................... 0 104 243 356 +5466 PF05638 DUF796 Protein of unknown function (DUF796) Moxon SJ anon Pfam-B_9698 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 21.80 21.80 21.80 21.80 21.30 21.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.75 0.71 -4.15 152 2770 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 1013 13 450 1429 75 135.10 28 72.81 CHANGED alpl.s...G.....Is.........G.pstpp..s.HccpIplhuasasls.shs.......sutssGpsshpslphsKhlD+uSPhLhpuhss.Gcplppsplphh+.....ssG............p.t.cYhplpLps.shlsslsssss.............tt.hshEslslsYspIphpap ............................................Ylpl.s..G.......Is..............G..pshtp....s..Hcsp..I.lhuapasls....s..........su.....tsuGps...shpshphsKhlD+uo.PhLhpuh....ss......Gcpltp.....splph.hR......sssG.............p....cYhphpLps.shlsslshphs......................ttphp.hEplslsYpcIphpa.h......................... 0 69 183 306 +5467 PF05639 Pup DUF797; Pup-like protein Moxon SJ, Bateman A anon Pfam-B_9797 (release 8.0) Family This family consists of several short bacterial proteins formely known as (DUF797). It was recently shown that Mycobacterium tuberculosis contains a small protein, Pup (Rv2111c), that is covalently conjugated to the e-NH2 groups of lysines on several target proteins (pupylation) such as the malonyl CoA acyl carrier protein (FabD) [2]. Pupylation of FabD was shown to result in its recruitment to the mycobacterial proteasome and subsequent degradation analogous to eukaryotic ubiquitin-conjugated proteins. Searches recovered Pup orthologs in all major actinobacteria lineages including the basal bifidobacteria and also sporadically in certain other bacterial lineages. [1] The Pup proteins were all between 50-90 residues in length and a multiple alignment shows that they all contain a conserved motif with a G [EQ] signature at the C-terminus. Thus, all of them are suitable for conjugation via the terminal glutamate or the deamidated glutamine (as shown in the case of the Mycobacterium Pup [1]). The conserved globular core of Pup is predicted to form a bihelical unit with the extreme C-terminal 6-7 residues forming a tail in the extended conformation. Thus, Pup is structurally unrelated to the ubiquitin fold and has convergently evolved the function of protein modifier. 20.10 20.10 20.10 20.10 19.50 19.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.44 0.72 -3.48 15 351 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 337 8 108 262 19 65.20 44 96.99 CHANGED Mus+csGGGQpcsscptpt.t.....ssssussptRpcclo-DVDDLLDEID-VLEpNAEEFVRuYVQKGGp ............................ttt...........ttpt.p.......ptt....stssssuppppc..ch.s.--l.DDLLDEIDsVLEpNAE-FVRuaVQKGGQ.. 1 32 78 100 +5468 PF05640 NKAIN DUF798; Na,K-Atpase Interacting protein Moxon SJ anon Pfam-B_9801 (release 8.0) Family NKAIN (Na,K-Atpase INteracting) proteins are a family of evolutionary conserved transmembrane proteins that localise to neurons, that are critical for neuronal function, and that interact with the beta subunits, beta1 in vertebrates and beta in Drosophila, of Na,K-ATPase. NKAINs have highly conserved trans-membrane domains but otherwise no other characterised domains. NKAINs may function as subunits of pore or channel structures in neurons or they may affect the function of other membrane proteins. They are likely to function within the membrane bilayer [1]. 20.00 20.00 21.20 21.30 19.40 19.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.50 0.71 -5.36 13 270 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 79 0 133 229 0 164.50 53 73.51 CHANGED MGsCSG..RCoLlhlCsLQLlsALERQVFDFLGYQWAPILuNFlHIllVILGLFGTlQYRsRYllsYslWssLWVsWNlFIICFYLEV.GpLS+.............................DoDL.LTFNlShHRSWWhEpGPGClsp.ls..sushshcs++hloVsGCLLDYpYIEVlHSuLQIlLALhGFlaACYVsplhh-EEDSFDFhsu..................PhYho....s .......................................................ls.sLERQlFDFLGY...QWAPILuNFlH.IlhV...ILGlFGTlQYRsRYlhsYs.lWhsl.....WVsWNlFIICFYL.EV.GsLS+.............................DoDl...hTFNhShHRSWWhEpGPGChhp.s...ss...................hs.cspphloVsGClL-apYlElhHSu...lQIlLA.LhGFlaACYVsphhhEE..EDoh..ch.............................h........................................ 0 21 34 71 +5469 PF05641 Agenet Agenet domain Bateman A anon Pfam-B_2551 (release 8.0) Domain This domain is related to the TUDOR domain Pfam:PF00567 [1]. The function of the agenet domain is unknown. This family currently only matches one of the two Agenet domains in the FMR proteins [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.17 0.72 -3.82 34 609 2012-10-02 16:56:36 2003-04-07 12:59:11 7 25 73 4 302 580 6 67.00 32 12.42 CHANGED hppGstVEVpSccpsh..csuWatAhllpt.sct.....chpVcapshphpstts..............hp-plshpp.....l....RPsPP... ..................tcGscVEV..h..S.cs.ssh...suWahApVhhhpuc..............hahlcYsshs.ss..........................hsEhVshcc...................l..R.PssPs..t............................................... 0 52 141 206 +5470 PF05642 Sporozoite_P67 Sporozoite P67 surface antigen Moxon SJ anon Pfam-B_8657 (release 8.0) Family This family consists of several Theileria P67 surface antigens. A stage specific surface antigen of Theileria parva, p67, is the basis for the development of an anti-sporozoite vaccine for the control of East Coast fever (ECF) in cattle. The antigen has been shown to contain five distinct linear peptide sequences recognised by sporozoite-neutralising murine monoclonal antibodies [1]. 26.10 26.10 27.70 27.30 26.00 26.00 hmmbuild -o /dev/null HMM SEED 727 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -13.24 0.70 -6.67 3 31 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 10 0 10 32 0 320.00 20 85.01 CHANGED MpIlHFLLTIPVIFVSGGDKMPAGESPRTSKPSPLVTLESAlTQPSKDP...FKTVuALSKATKVWKSAVSlSDDSKTVPTPVSEPhITRSFQEP..................VSQESElQs-TEhNpuscGS+oDSE-DDD...............................................-pEEEDNKSTSSKsGKGSpKu.QPGVSoSSGSTTSsTssoTslSQTGLG..uSGSH....AQQDPuVu.sGV.........VGVPGLGVPGVGVPGsG...GsGshPGVGVstsGVuPGVGVG...........................G.GGV..........................PGVGl......................................tSsoSpEG--sDDpEc-t-s+shp.......................................................................PGVGlPGVtV...........GsoTSSoSTToPSoSTTTTTPoSSG.PSs.Gu.GoSsRNAVTRpTDSISGPIPSPG-PRAITGQM..........GEcEpFAVQFLGDFKPKPRRYEGscT-ssKLKKFIFEEVKSLVpTLINLKLAIANDFVEITEKLKKpNQNHVPKLKLLKGsQFDTKQKVANVLKGFNSLYFVlFMNLNLAKEVN+PEELAEhLWKLNTIPDKVu+EFELAlEKTKuSEKKsELEEAFKSIslGFKIAYYATNDILSSITNSVYsLIKLKNFGDDFVTEVRKSLQMVPHQKNLNGSSFIVKISEhMNKKGTEsQDQTSGuGS..............KGTEGGSLRG.DLTEEEVLKVLDELVKDVSEEcVGIGDLSDPSSRSPscKPAcLGPSLVIpNVPSDPoKVTPTQPSNLPQVPTo.....G.GNGT-Gs-QsPuusNG......................oGNGEGGKDLKEGEKKEGLFQKIKNKlLGSGFEVASlIIPMTTIIFSIVH ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ss....................... 0 1 5 5 +5471 PF05643 DUF799 Putative bacterial lipoprotein (DUF799) Moxon SJ anon Pfam-B_9829 (release 8.0) Family This family consists of several bacterial proteins of unknown function. Some of the family members are described as putative lipoproteins. 20.20 20.20 21.60 20.30 18.90 18.70 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.47 0.70 -5.02 3 303 2012-10-01 20:48:06 2003-04-07 12:59:11 6 5 289 0 76 216 14 207.60 45 91.96 CHANGED MKPLILGLAAVLALSACQVQKAPDFDYTSFKESKPASILVVPPLNESPDVNGTWGMLASTAAPLSEAGYYVFPAAVVEETFKQNGLTNAADIHAVRPEKLHQIFGNDAVLYITVTEYGTSYQILDSVTTVSAKARLVDSRNGKELWSGSASIREGSNNSNSGLLGALVGAVVNQIANSLTDRGYQVSKTAAYNLLSPYSHNGILKGPRFVEEQPK .......................................................................................h.hh........h.....hs.s..sl..h..L..ouC.s....st.....s...p.h..D.Y.o..uF..KpSKP+S.ILVLPPlNcSsDVpAoauhLo.psTh.P.LA.E..uGYYVhPVA.lV-ETFK.QNGLTsAuDI..puls.PsKL+cIFGADAsLYloVspYGTsYpllsSsTpVoAsA+LVDLRoGchLWs..G..S..A...o.s..p...-........s.u..s..s..u...s....u....G.....L.lGhLVs....AsVpQIsso...loD.cuapluuhsuspLLSs..sp.s...G.lLhGPR........usth........................................................... 0 15 39 58 +5472 PF05644 Miff DUF800; Mitochondrial and peroxisomal fission factor Mff Moxon SJ, Eberhardt R anon Pfam-B_9868 (release 8.0) Family This protein has a role in mitochondrial and peroxisomal fission [1]. 21.10 21.10 21.80 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.72 0.70 -4.74 9 222 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 83 0 105 189 1 189.30 36 94.98 CHANGED huE.scl..EhcYTcsISppMRVPc+lKssush.--p.hhspsss..........cph.MpVPERIVVhGpspch.shStPR-lpL.pSs....P..uhlplpTPPRVlTLo-+.hs.h-.Estssssss...........spE.+shsph+Rtpusucsu.hppsupls+sD..............uhh...................s......u....p.........................shsshssslEshosc.tls...sLRRQIlKLNRRL.hlEtENcpRtpREhlhYulslAaaLlpoahWLpR ...................................................................................................h..p.tasptIsppMRlPcpl+sss.....t.....p.......t........hpp.t...............tph...MpVP-RIllh.Gpstc................hs...P.t.-ltl.puh........P...t.ltlpsPPpllTLscp....hs.h.p.pt.....t..................p..h..s.phhc.ts....s........tptt...ptt.......................................................................................................................................................................ths...hts.hp...shs.c.plh....LRRQlhKLNRRl.hLE.pstpRtpREhlhYslsluahllpsahWhpR..................................................... 0 24 33 63 +5473 PF05645 RNA_pol_Rpc82 RNA polymerase III subunit RPC82 Moxon SJ anon Pfam-B_9884 (release 8.0) Family This family consists of several DNA-directed RNA polymerase III polypeptides which are related to the Saccharomyces cerevisiae RPC82 protein. RNA polymerase C (III) promotes the transcription of tRNA and 5S RNA genes. In Saccharomyces cerevisiae, the enzyme is composed of 15 subunits, ranging from 160 to about 10 kDa [1]. 21.90 21.90 22.30 22.00 21.80 21.10 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.51 0.70 -4.85 29 304 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 256 2 191 294 1 231.10 19 43.22 CHANGED sthhcLhpppal.plsp....s.....t.h.tpthpph..ssshsphcpps-sc.phptphtshhppsp.sh........t...t.ht..suh+t.......................hcssls..hphNh-+FphphRsptLlphs........................cs+lGpsouplYcthL+hhcppssshpts.............................shshosp-lt+tl.spshslpts............................................................................................................phlspaLpLLusss.htFlpchus...GpahVsapclhppL .........................................................................................................h.pLhtttal.ps.......................t....h...tt........s.....t.ph....hpt.....p..h...h.....htt..htphhpt..sph.t..............pt.tt..hs.t.sput+ptt.........................................pts.h..-sslh..hplNa-+FthphRsptllshs..........................ps+hspsouplh...cshLchs..chpssp.ht.........................................ohshohspIhcpL...tphs..lsts............................................................................................................................................................................................lppaLplLspss....hhtc.....t.......s.ahl.hpphh.............................................................................................................. 0 53 97 151 +5475 PF05647 Epiglycanin_TR DUF801; Tandem-repeating region of mucin, epiglycanin-like Moxon SJ, Coggill P anon Pfam-B_1480 (release 8.0) Pfam-B_13922 (release 26.0) Repeat The unusual mucin, epiglycanin, is membrane-bound at the C-terminus but has a long region of this tandem-repeat at the N-terminus [1]. It was the first mucin identified to be associated with the malignant behaviour of carcinoma cells [2]. Mouse Muc21/epiglycanin is thought to be a highly glycosylated molecule, which makes it likely that its function is dependent on its glycoforms. Cells expressing Muc21 are significantly less adherent to each other and to extracellular matrix components than control cells, and this loss of adhesion is mediated by the TR portion of Muc21 [3]. This family also now contains the repeat that was the C. elegans protein of unknown function (DUF801). 27.00 14.00 27.00 14.00 26.70 13.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -9.97 0.72 -4.17 6 290 2009-09-10 21:16:35 2003-04-07 12:59:11 6 13 12 0 150 263 0 55.90 52 110.76 CHANGED sossuoSTsSuuASTssNsuSS.sTSuuTSTsosouS.SsTSSGoSTAsNsASosTTuuSuTtssTGTpTTo ......tstuST.sSs.usSTuTNopSS..sTS.SGsSTsTNScS.STTSSGuST....ATNStSSsTs....................... 0 114 114 114 +5476 PF05648 PEX11 Peroxisomal biogenesis factor 11 (PEX11) Moxon SJ anon Pfam-B_2629 (release 8.0) Family This family consists of several peroxisomal biogenesis factor 11 (PEX11) proteins from several eukaryotic species. The PEX11 peroxisomal membrane proteins promote peroxisome division in multiple eukaryotes. 23.80 23.80 23.80 23.80 23.60 23.70 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.33 0.70 -5.08 61 883 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 300 0 540 829 5 206.20 18 85.16 CHANGED lsphlcalsposGRDKlhRslQYsu+hluhhh.pp...s..stphsshhcpl.....csphohsRKhhRhh+slstlpsutthh..sp.t......Dsll+hhsllppluhuhYhshDplsWlsphGlhpspstp....+hsphusthWhhuLlhullpslhplhthppphpphtt........................psscsspptp.................phtpcphshhhsllpshsDlhlslstL...uhl...phssshlGlsGhlSSllGlhsha ...........................................................................h..hhtp...ut-+hh............+h.hpah.sphh..h....................................t.t.............tt.httl.........................ptp...huh...sRp..............h...hRhh....p.........l....phhp...thh..th...........tp......................s..h..hphh...t.......h..h...p...p..hs.shYh...h...h....-pls....a.......l..s.....p..h....Gl.....hs......hpttt...................phtp...........h.ush....h....W.h.hullhulht....p..hhplhth.tpptp.phtt.......................................t........p.........................................................thhppp.h.hhhsllp..shsDhhhslstl......thh........ths.sshlGhhGhhSulhuhhp....................................................................... 0 168 289 432 +5477 PF05649 Peptidase_M13_N Peptidase family M13 Studholme DJ anon Swiss-Prot Family M13 peptidases are well-studied proteases found in a wide range of organisms including mammals and bacteria. In mammals they participate in processes such as cardiovascular development, blood-pressure regulation, nervous control of respiration, and regulation of the function of neuropeptides in the central nervous system. In bacteria they may be used for digestion of milk. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.46 0.70 -5.42 156 3507 2009-01-15 18:05:59 2003-04-07 12:59:11 8 35 1646 10 1445 3238 341 342.10 22 54.54 CHANGED s-DFYpassGsWlcpssIPs.cpsph..............us.FspLpc...pspppl+.............................pllcph...tt.t.............................................................................................................ssstp+htshYpushD.hssh...........cptGhpPlpshlpp.lsu.h................p............spts........................................................................ltphluph....hts..............shhsh.h.....VssDh..cs...s...sp....hlhlsp.......su.luL..PccsYYhppp..................................tphhpsYp.paltclhph......h......G..............................ht.s.............pspptAp...plhshEp.pLA............phphsp......tcpcc.pph................Ys.hsh.......s-Lpph.....h..ss......ls.............a.pphlpshhh.............................st........p...pllltp.....................ss...ahptlspllps...sshp.....slpsYlhaphlpshush..Lspshpptphpa.t......................sltGhtp..ts......+hcp.slsh.ss...st..lGpslGp............hYVcc..hFssc.u....Kpphpphlpslppuapppl...p.shsW.......Mssp....TKppAh.pK..............Lsshp.cI.GYP ...................................................................................................................................pcDFapass.G..t.....W..hpp..t..lPs......spsph......................................us..Ftt.L.t.c...p..pp..hlp.....................................phlpp........................................................................................................................................................................................ss..pph.t....phYpts.....hc...pth.........................cth.G......h....p...P..l..h.......t.hlpp...lt..s.....h..........................................sh.s....................................................................................h.phl.s..p.ht..htht.s...................hhsh.h......lssD.....ps...s.......pt.........hlh.hst................su..lhL........PspsYYhp.pp.........................................tphhptah..phhtphhth......h.............u..........................................................h.t.p......................psp.p...hp...p.llphEp...plA.......................phhhsp.........tcp...p..-..t..th.................................Yp.hs.h.......s-.hpph............hst...........................................hs...............h.ttahpthh..............................p.s.....p.......plll.p..........................p.....ah....p....t...h..s..p..ll...sp.............p.s..hp..................hhps.a..hh.h.p.hl.p........thssh.....Ls.p.php..p.h.pht.a.t...............................sl..Gh.p......s............ph+t......shph.sp...........s....h...s.slGt........................................hYspp.....hF....s...p...u..................K...t....p.........h.pp....h.l.pplhp.......sa.pppl............p...p........s.W.......h..st.p..T+p..pAh.tK..............lst..ht.tl.GYP.................................................................................................. 0 507 719 1169 +5478 PF05650 DUF802 Domain of unknown function (DUF802) Yeats C anon Yeats C Family This region is found as two or more repeats in a small number of hypothetical proteins. 22.60 22.60 23.10 24.10 22.50 22.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.57 0.72 -3.85 40 307 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 124 0 97 342 6 53.40 35 15.48 CHANGED losph-sosuslAs.......sWspALApQppuspuLsppLcsuLsphApsF-p+uuuLl ........losth-sosuslus.......sWpsALApQppuscsLupphpsuLsphusoF-p+SAuLl........... 0 13 31 67 +5479 PF05651 Diacid_rec Putative sugar diacid recognition Yeats C anon Yeats C Family This region is found in several proteins characterised as carbohydrate diacid regulators (e.g. Swiss:P36047). An HTH DNA-binding motif is found at the C-terminus of these proteins suggesting that this region includes the sugar recognition region. 21.40 21.40 21.40 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.50 0.71 -4.75 54 1274 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1153 0 179 746 9 132.30 47 35.86 CHANGED LsppLAppIVpRsMplIsaNlNVMsppGlIIuSGDspRlGphHEGAlhulspp+slpIspsssppLp.GV+.PGlNLPlhapscllGVlGITG-P-plptauELV+hsAELllEQuthhcphpWcpRtpEchlhpLl ..................LcsphAQcIVsRsMcIIs.p..N.INVMD.tcGhIIuSGDpcR.IGph.HEGAlLsLs.ps.....+.s.V-.I.-c.s.s.AppL+....G.V+.tGINLPLp.h.c.scllGVIGlTGE.P.-plRpYGELVpMsA.EhhlEQspLhc..lt.-pRh+EEhlhpLl............................. 0 51 97 134 +5480 PF05652 DcpS Scavenger mRNA decapping enzyme (DcpS) N-terminal Moxon SJ anon Pfam-B_9894 (release 8.0) Family This family consists of several scavenger mRNA decapping enzymes (DcpS) and is the N-terminal domain of these proteins. DcpS is a scavenger pyrophosphatase that hydrolyses the residual cap structure following 3' to 5' decay of an mRNA. The association of DcpS with 3' to 5' exonuclease exosome components suggests that these two activities are linked and there is a coupled exonucleolytic decay-dependent decapping pathway. 25.00 25.00 28.80 27.30 24.50 23.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.49 0.72 -3.68 20 268 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 220 18 181 258 0 107.40 33 32.37 CHANGED pphlscFchp+lLspsspsKsIsLLGpls.......s..csAIlllEKTsFph...-pl............................huhhss.................hpplcpltpNDIYtWhlshhtpcls............slKlsLIaPATppHI+KYspQ ...............................................t..lspFchp+lLsp...........ssp..sKpls.LLGp.ls...............s..csAllllEKssFth.....-ps.........................................hshhsu......................htclp..lhsNDIYthahshssp....p...hs..............................slKlslIaPAT-pHIcKYppQ............... 0 52 88 144 +5481 PF05653 Mg_trans_NIPA DUF803; Magnesium transporter NIPA Moxon SJ, Eberhardt R anon Pfam-B_9876 (release 8.0) Family NIPA (nonimprinted in Prader-Willi/Angelman syndrome) is a family of integral membrane proteins which function as magnesium transporters [1,2]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.95 0.70 -5.46 7 1396 2012-10-02 19:55:49 2003-04-07 12:59:11 9 14 342 0 935 1629 226 236.20 25 61.05 CHANGED hp.D.hhGlhLAlSSSlFIGuSFIlKKKGLh+hut.uuhRAupGGauYLpEhlWWuGhloMhlGEsANFAAYAFAPAsLVTPLGALSlllSAlluphhLpE+LphhGhlGChLsIlGSsllVlHAPpEppltSlpplhphhs-PuFllashhllssslhLlhhhtP+hGpppllVYIslCSlhGuLoVhSVKAlGIAIK.ohs.Gts.ltashsWlhlllhlhClshQlNYLN+ALDhFNsulVoPlYYVhFTT.slhsShIhFK-Wts.shsslhspLsGFhTllsGsFLLHsh+Dhs.sp ..............................................................................h.......Gh.lsh..u......shh...u..uh.....l..p......+...h..............u..h.h...p..t............t..............................t......s........h.....s.Y...h.....pp.h.....W..W.....h.G.h...h.........h..................h..........h.l.G..p....h.ss....F...s....AY....uF.......A....P.........s.lV..................sP..L..G.u.lu.l.....l...h..........s......slluthhL.p...E..plp........h..hs........h.......l........G.........s.h..lslh.G.s.s.l.l.....l....h...h.....u......s......p.p...p...t..h...........s...hp......ph.......t.h.h.h...p...........................s.....F.l..ha.h.h....h..........h........hh......h......h...h..........h...........hh......h......h...........t.................................t...................s.......t.........p...........p........h............h.....l........h..l.....ls.....u......h...................h.G..u....h....o....lh...ss....K.u.....lu.hhl.t.....h.t.....s............h..............h..a.h..h...h...h..hh.h..h..s.h..................hh..Qh..alNcuL...p.......hass..sh.......l.Ph...halhaT.....sl...hs.....u....hlhapt.a.t...s....p............hh..h..hhG..hhhh.h.Glhll.p.........t.......................................................................................................................................................... 0 287 502 740 +5483 PF05655 AvrD Pseudomon_AvrD; Pseudomonas avirulence D protein (AvrD) Moxon SJ anon Pfam-B_9946 (release 8.0) Family This family consists of several avirulence D (AvrD) proteins primarily found in Pseudomonas syringae [1,2]. 25.00 25.00 59.70 33.70 18.80 18.80 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.06 0.70 -5.55 3 71 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 50 0 13 69 0 260.20 35 96.63 CHANGED MQDLSFSoIENHLGPAKDRFFGDGFKHVEYSARHVNLTESA..VcAoIoLSYPANWSKKNcSuELlPHLSSIDALTISlNLSQDILLNcFKSID.....HCWVRRISIKAGsKPEEDLR-INA..KITKESQGLDSpGDThLIFGGNVGTMTVQLEFIIPAAHEI-TlKDS..............oEKSCYSLHFKNRTQFIDDIIFYSPLN.AIS+LFVAaDsE.PNFLPGGIEANYPNIlNPVDSLVSHAQIAQALLYKLDGLTR-ESNTLWMRsLNIIAENPAKRpAATRLLVTELKRANIVSlKGcNWRVAEVAGHMNGIThSSSVAHLLPL .................................tsl-phLGstcpRaFGpGa+psphphpph....thstpt.....hpu.ho....l..sYPu.sWScKsss.-lhPHLSoIDAlhlusplup....tllh...p..ph....p.u.l-.......psa.l++loI+AGpcP....E.DLp..s..l..s..A..plsppt.s.ps..ss..t..hhFt.spl.usMplplph..hh...s.....t...cph.....t.shpss..............sp.cphao.ta+s.......+pphlpclhh..ss.hs..shu.p.lhh...t.stt.sph....Ghpu.s.a.s...shhs.lDsLlshAQluQsLlY+LDslsRspSsTLWMRplsl..h..spp.....Ph...t..ht..t.t..lhs....ph.csphl.phtsppWR...sph...s..........s..t...hphpsplAH.L......................................... 0 1 8 13 +5484 PF05656 DUF805 Protein of unknown function (DUF805) Moxon SJ anon Pfam-B_2800 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 26.80 26.80 26.90 26.80 26.50 26.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.91 0.71 -4.28 159 3368 2009-01-15 18:05:59 2003-04-07 12:59:11 9 20 1884 0 465 1828 801 123.10 28 77.08 CHANGED pahsFpGRusRp-aW.hhhLh.hllhhh......hh.hlshhh........................................thhshlhhlhhl...shhl.......Ps....lA...lsl..RRLHDhs.+SGW...................ahLl.................................................hhl...........P.hlGh............................................lh...hll....hh.h................h.u...p.s......sNp.aGssPts ......................................................................hhsFpGRspRp-.a........W..hhhL.h...h.l.h.h.h.l....lh..hlthhh......................................................hhh.sll...s.h.l.a.s.l....h...shl..........Ps...........hA.......lslRRLHDps..+SGa...................Whll.........................................................................................................hhl....P..hlsh..........................................................lh....llh....hh.s................hs.u...sss..tN+.aG.ss..h............................................................ 0 122 259 367 +5485 PF05657 DUF806 Protein of unknown function (DUF806) Moxon SJ anon Pfam-B_7291 (release 8.0) Family This family consists of several Siphovirus and Lactococcus proteins of unknown function. The viral sequences are thought to be tail component proteins. 25.00 25.00 26.30 26.10 24.40 24.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.51 0.71 -4.09 9 73 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 68 0 13 71 1 122.90 31 97.19 CHANGED hpPsppsppllpshshshlDpVYhssIPcEhlsNsspThVLlTEstssPspauNssFpuhshGVElQIFYphsls.pDhp..ssEIpLh+thpcscWplssS+sHhhDPDTs....QhpKshYhs+sh.I ...........................hPl.pltpllpshshshlD..plahsslPpEhh..cssspThlLl..TEssss.stauNssFpshshsVElQIFYppshs.pDhp.......phElpLh+hhpsscWhlss.u.+sHshDPDTp....Qlppshahs+sh........... 0 4 7 10 +5486 PF05658 YadA_head Hep_Hag; Head domain of trimeric autotransporter adhesin Yeats C anon Yeats C Repeat This seven residue repeat makes up the majority sequence of a family of bacterial haemagglutinins and invasins. The representative alignment contains four repeats. 20.20 20.20 20.30 20.20 19.80 20.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.39 0.73 -7.61 0.73 -3.49 239 9521 2012-09-20 11:56:39 2003-04-07 12:59:11 9 620 916 22 1068 8054 662 27.80 37 11.89 CHANGED AsGpsusAlGssupA..sussosAlGssup .............AsGpsSlAlGssutA...sussSlAlGsso................ 0 242 496 794 +5487 PF05659 RPW8 Arabidopsis broad-spectrum mildew resistance protein RPW8 Moxon SJ anon Pfam-B_7373 (release 8.0) Family This family consists of several broad-spectrum mildew resistance proteins from Arabidopsis thaliana. Plant disease resistance (R) genes control the recognition of specific pathogens and activate subsequent defence responses. The Arabidopsis thaliana locus Resistance To Powdery Mildew 8 (RPW8) contains two naturally polymorphic, dominant R genes, RPW8.1 and RPW8.2, which individually control resistance to a broad range of powdery mildew pathogens. They induce localised, salicylic acid-dependent defences similar to those induced by R genes that control specific resistance. Apparently, broad-spectrum resistance mediated by RPW8 uses the same mechanisms as specific resistance [1,2]. 24.00 24.00 24.10 25.10 23.60 23.90 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.75 0.71 -4.66 10 146 2009-09-21 12:32:58 2003-04-07 12:59:11 6 17 20 0 59 159 0 131.50 27 27.49 CHANGED PluElhs.........GAALGluLQlLa-Alp+AKD+ShTT+sILcRLcATIpcIoPllscIDKlScch--s..R+VlEcLK+LLEcAssLVEsYAELR.RRNllKKaRYpR+IKELEuuL+WhlDVDlQVsQWsDIKELhAKMSEMsTK.....LDcItsQP .....................................clhs.........GusLGhsht.Lh....cslhcspc+uhph+shlc+LpsTlcsIpPhlhc.Icphs..pchsps...pc.hccLpphLccuhpLlcphsclp.phshh.++hp.Y...tp+lcpl-tplthhhpsplplp.htDlpclhsthtt............................................................. 0 11 45 49 +5488 PF05660 DUF807 Coxiella burnetii protein of unknown function (DUF807) Moxon SJ anon Pfam-B_7114 (release 8.0) Family This family consists of several proteins of unknown function from Coxiella burnetii (the causative agent of a zoonotic disease called Q fever). 25.00 25.00 112.70 111.80 21.70 16.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.61 0.71 -4.52 4 23 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 2 11 0 138.40 57 59.30 CHANGED MLtApLhF.GphslGPI.PcSYFGNsGAI.sPcIc.KlATGcYTIThLSuRslppTQPulVHIslhD-sNtsFCsRlsKulpFTYcPpNo-.FPWKhhhpIAlpEt-hhtIcpKp....l.VTh+hhpcupGslVssGhhptcshhNV .......MLtApLhF.GphslGPI.PcSYFGNsGAI.sPcIp.KhATGcYTIThLouRslppTQPssVHIsh.D-sNtsFCsRlsKslpFTYcPpNSp.FPWKhhhpIAlpcGDGohhDsp.....VMlThthtNpsstAlhssshsRspAclNs 0 2 2 2 +5489 PF05661 DUF808 Protein of unknown function (DUF808) Moxon SJ anon Pfam-B_7112 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 25.00 25.00 35.80 26.10 22.10 22.10 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.88 0.70 -5.59 47 1348 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1301 0 257 880 93 282.80 53 95.12 CHANGED MAGuSLLsLLDDIAolLDDVAlMTKlAAK..................KTAGVLGDDLALNAQQVoG..VpA-RELPVVWAVAKGShhNKhILVPAALLISAalPW.........lITPLLMlGGhaLCFEGhEKlhHKahHp..........p.cppptpuhscss........hDLsAaEK-KIKGAIRTDFILSAEIIsIsLGoVustshhsQlhVLusIAllMTlGVYGLVAGIVKLDDhGLaL.p+sus.....htctlGtuLLssAPhLMKsLoVlGThAMFLVGGGILsHGlPslHHh...lc.hstth.......ussluslsPsllNullGllAGulllhll ...........hAhuSLLsLLDDI.......Aol.......L.......DDlulMuKlAAK..............KTAGVLGDDLALNAQQVoG..VpAsRELPVVWuVAKGSLlNK.l.I.L.V.PhALlISAFhPW.........hITPLLMlGGua.....LCFEGsEKllHhh.s+....cc...........pp...p...pph.t.ss.............tDshthEKcKlKGAIRTDFILSAEIlsIoL.GhV..u...p.........s..s....lhsQllVLusIAlllTlGVYGLVulIVKlDDlGhaLs...c...+uus........................hhpulG+GLlhhAPhLMKsLSlVGTlAMFLVGGGIl...V........H.G.....l........s.....s........L........H.....H.h.............lEchst.t..t................uuhluhll.ssllshllGhlhGulllh........................................ 0 55 142 209 +5490 PF05662 YadA_stalk HIM; Coiled stalk of trimeric autotransporter adhesin Yeats C anon Yeats C Motif This short motif is found in invasins and haemagglutinins, normally associated with (Pfam:PF05658). 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.52 0.73 -6.66 0.73 -3.98 75 6882 2009-01-15 18:05:59 2003-04-07 12:59:11 9 631 823 15 787 5827 118 22.40 61 6.44 CHANGED ppIoNVAsG...ssuTDAVNsuQL ...ppITNVAuG....ssuTDAVNsuQL.. 0 128 332 567 +5491 PF05663 DUF809 Protein of unknown function (DUF809) Moxon SJ anon Pfam-B_7264 (release 8.0) Family This family consists of several proteins of unknown function Raphanus sativus (Radish) and Brassica napus (Rape). 25.00 25.00 107.20 104.70 20.90 19.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.18 0.71 -4.25 4 16 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 11 0 131.20 97 99.95 CHANGED MITFFEKLSTFCHNLTPTECKVSVISFFLLAFLLMAHIWLSWFSNNQHCLRTMRHLEKLKIPYEFQYGWLGVKITIKSNVPNDEVTKKVSPIIKGEIEGKEEKKEGKGEIEGKEEKKEGKGEIEGKEEKKEVENGPRK ...MITFFEKLSTFCHNLTPTECKVSVISFFLLAFLLMAHIWLSWFSNNQHCLRTMRHLEKLKIPYEFQYGWLGVKITIKSNVPNDEVTKKVSPIIKGEIEGKEEKKEGKGEIEGKEEKKEGKGEIEGKEEKKEVENGPRK.... 0 0 0 0 +5492 PF05664 DUF810 Protein of unknown function (DUF810) Moxon SJ anon Pfam-B_5709 (release 8.0) Family This family consists of several plant proteins of unknown function. 19.20 19.20 19.40 19.30 19.00 18.80 hmmbuild -o /dev/null HMM SEED 677 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -13.12 0.70 -6.25 10 230 2009-01-15 18:05:59 2003-04-07 12:59:11 6 14 75 0 160 222 1 430.80 22 48.43 CHANGED uLocsDLRETAYEIllAustuS.....tshhh...pthtppc..................................S+sKhhLuL+tpt....................................ttssupst+sc+shohsElMRhQMcloEphDsRlR+sLL+hssuplG+RhEsllLPLELLpplps.o-FsD..cEYcpWQ+RpL+lLEtGLlh+PslshscosppspcLRpll+s.....uhspslsouppoEsh+oLRpsshSLutR.upsGh...su-sCHWADGYP..LNl+LYptLLpusFD.hD-ssll-El-EllELlK+TWssLGIs.cslHslCaAWsLFpQaVhTu..E.tLLtsAhstLt-VutDtp.ssp-slYlK................hLsSsLSulhuWu-K+LhDYHcoF......scssl......sh-slVoluhssu+lLuEDsspth+cptt.........ss+-+l-pYIRSSl+sAFsc...........cuphtts+uspssttsLA.LAc-ss-LAhpEpphFSPlLppWHP.usGVuAupLHpsYGscL+paLuuhscLT.DsVpVl.uAscLEpsLVphh...usss.-DGGculh+.ch.PaElEshhusLVhsWlpt+l-clpphV-RshppEsWcPtSs.cpsaAsSAVElhRllcETl-tFFtL.IPh+sshLpsLscGlDpuhQpYsspshuuhGu+pshlPslPsLTRhpcsotlhshhKKchhssstscc+tspth..............shslspLCVRLNTLcalhocLcsLE+ ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................................................h............Pa..........ph....-phhhsh...lhpWlptp.pph..phlppshpp.....-.....p.......a.....p...sh........s.............t........p........p.taus.Ssl-lh+hhpp..............sl-th.h.tl........h...s.......c.......t...h...h....sLhpuls.....thht......Yhphh.....t.th....s...s....c..p..p.hh.P..h.s..LoR.ppt...h.....hh.+ct..............p................................................hh.p.hl+lNslpahhpplp.l-................................................................................................................. 0 27 89 125 +5494 PF05666 Fels1 Fels-1 Prophage Protein-like Yeats C anon Yeats C Family \N 25.00 25.00 47.40 26.90 19.70 18.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.27 0.72 -4.59 12 669 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 339 0 28 193 6 49.60 50 79.54 CHANGED sAhA..spssslhoPusGVlCDph..lCtD..........StGlShslTccYhGp+ .............u.ts.ssu..o.AsGVhCDsh..lChs..........S.stIs.phT+hhhhp+.................... 0 4 10 19 +5495 PF05667 DUF812 Protein of unknown function (DUF812) Moxon SJ anon Pfam-B_7417 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 30.00 30.00 31.60 31.20 29.40 27.80 hmmbuild -o /dev/null HMM SEED 594 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.71 0.70 -6.01 10 190 2009-09-10 22:47:38 2003-04-07 12:59:11 6 4 106 0 126 188 1 373.50 26 88.19 CHANGED M..-ElDpIIlHoLRQlGs-lsE-lpuLcsF..........ToshlVcsVs+CLplIsPsl..sLP+oLP.s.....uMApRFpluopLApACp-lG..YRu-IGYQTFLYsNss-lRpllMFLlEpLPR-p.suscus...pPlu.............psstLccpIspsLppQLp...............hPWlPphsR.............h...s..shptFpsp.hsl..sspsp.tt.ssthpphapp.hl.slo......tslssolLpsssuplshsschch-a.utsh......E-hcpcKppplpp+ltshhppstt....s......suucpls-llpp.t.t.........s..hspppt..........spphuss.usssssshssEcstpppcps-L-uLpppIcclpsplpphpsch+phpuplpplp-phpstcppssclEpphKlKc+TstLLsDsEsNl....sKLpullsuoup+hhpLssQWEs+RsPLl-phcpL+thpppp.scoppphccIcslRpcIcclspElpsKsphappL.spaEshs+s.VsRouYTcRILEIIuNI+KQKp-IsKILsDTRsLQKEINsloupL-RTFsVTD-LlF+-AK..+DEtsK+AYKhLAsLHssCs-LlcsV--TGslpREIRDLEcQI-sEct+s ......................................................................................................................................................t.....h.........p....l.th..........sst.hhphh.thl..lp........t........ths......h.P.........shst+hphs.tluphh....p..G..h...phua......p............p..hLYss...p.....h+tl..hhhLhppL.stt...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ht..h......h.t..t.ht..t.h......t.h..p...t.t.t...........p....tt.ht.h.tph..h.h.ps.t.....h......pLp.hhtttttph.plttpWp...thch....h.tphpthp...........ttp..h....tplp.hctphpth..chtt+pp.hppL.t.........phpphspt..h......s.......R..YhpRIhEhhtsl+KQct-I.+lh.-s+.lQpphs.httplpRpashs--hlappu+........p-...........h+.....psY+.Lstlct.httlhthl..hs.h.pphh....chp.pl........................................................................................... 0 48 64 98 +5497 PF05669 Med31 SOH1; SOH1 Moxon SJ anon Pfam-B_7443 (release 8.0) Family The family consists of Saccharomyces cerevisiae SOH1 homologues. SOH1 is responsible for the repression of temperature sensitive growth of the HPR1 mutant [1] and has been found to be a component of the RNA polymerase II transcription complex. SOH1 not only interacts with factors involved in DNA repair, but transcription as well. Thus, the SOH1 protein may serve to couple these two processes [2]. 25.00 25.00 25.00 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.63 0.72 -4.29 34 308 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 274 4 217 302 0 103.10 44 60.92 CHANGED sshsRFplELEFVQsLANPpYlpaLA...pp.........pYa..................p-.pFlsYL+YL.pYW+cPcYu+alhYP.pCLahLc.lLpsppFRpslsssss....hphl....pphhhpattht.pthph .......t.phRFplELEFVQsLANPpYL.s.............aLA...Qp...................tah.................................c-psFlsYL+YL.pYW+..cP-Yu+aLpYP.tCLahL-.LL.Qp.-.pF...Rptlsssps......hch.l..tppthhtW.tht.....t......................................... 0 73 119 181 +5498 PF05670 DUF814 Domain of unknown function (DUF814) Bateman A anon Pfam-B_738 (Release 8.0) Domain This domain occurs in proteins that have been annotated as Fibronectin/fibrinogen binding protein by similarity. This annotation comes from Swiss:O34693 where the N-terminal region is involved in this activity [1]. Hence the activity of this C-terminal domain is unknown. This domain contains a conserved motif D/E-X-W/Y-X-H that may be functionally important. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.88 0.72 -4.10 39 2812 2009-09-13 15:36:20 2003-04-07 12:59:11 8 19 2349 0 939 2335 311 89.80 31 16.57 CHANGED ahhoss.ahllsG+ssppN-hL.sh+hhcppDlahHscthsuuHVll+spstps.p........slp-AAphAstaS+.h+tuspssssY...........sphcalpK ............h...sssshhIhVG+NshQN-.L..sh...K..h..Acp..s.D...l..W.......h....Hs+.cl.PGSH.Vl..l+s..s..t..s.s.s.p.............................slp-A....ApLAuh..aS...........c...ucpu......sp.lsVca...............sth+pl+K............................................... 0 362 602 811 +5499 PF05671 GETHR GETHR pentapeptide repeat (5 copies) Bateman A anon Pfam-B_8059 (release 8.0) Repeat This pentapeptide repeat is found mainly in C. elegans. The most conserved amino acid at each position leads to its name GETHR (Bateman A unpublished obs.). The family also includes a divergent repeat in a microneme protein Swiss:Q26588. The function of this repeat is unknown. 21.50 21.50 21.90 21.70 21.10 21.20 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.12 0.72 -4.80 3 9 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 3 0 9 25 4 23.90 70 46.64 CHANGED GETHRGETHRGETRRGETHRGET+R ....shRGET+RGET+RGET+RGET+p. 0 6 6 9 +5500 PF05672 MAP7 E-MAP-115; MAP7 (E-MAP-115) family Bateman A anon Pfam-B_8157 (release 8.0) Family The organisation of microtubules varies with the cell type and is presumably controlled by tissue-specific microtubule-associated proteins (MAPs). The 115-kDa epithelial MAP (E-MAP-115/MAP7) has been identified as a microtubule-stabilising protein predominantly expressed in cell lines of epithelial origin [1]. The binding of this microtubule associated protein is nucleotide independent [2]. 28.50 28.50 29.00 29.00 28.10 28.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.28 0.71 -11.64 0.71 -4.87 3 338 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 64 0 136 296 0 159.00 47 22.95 CHANGED APuussNuuAsuKPoAGTTDsEEATRLLAEKRRQAREQRE+EEQERREQEEpDRpcREELtpRsAEER.sRREEEARRpEEEcAREKEEp..............hpRpAEEc....thpEQEEQER..lQKQKEEAEARuREEAERhRLEREKHFQQpEQERLERKKRLEEIMKRTRKSEVS-phKKpDsKs ................................................sssssK.ssAGTTDsEEAs+lLAE.KRR.AREQ+E+..E...EpE.R.......h...c...pE.....Ep.-.........R......t....t.....+.......E.E..t......p+.h...tE..E...............R.t+......pE.-E.uR+.h.E.-....c.......pt.R.c..c.tcp................................t.t..tcEc...........tppEp...EE..pE+..........lp+QKEEAEu+..u+EEAE+.RhEREchhppp.EQE....RhERKK....RlEEIMKRTR+...o-s...stp.pt............................................................................. 0 13 29 65 +5501 PF05673 DUF815 Protein of unknown function (DUF815) Moxon SJ anon Pfam-B_6403 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.51 0.70 -5.45 75 1159 2012-10-05 12:31:08 2003-04-07 12:59:11 8 7 1089 0 330 2454 848 243.70 43 76.12 CHANGED spshAapWc......s..uh.Lpsl.............ppscslpLspLhGl-cQ+ctLlcNTcpFlpGhPANNlLLaGuRGTGKSSLVKAllspasspG..........LRLIEls+-DLtsLPpllshl+....spP.p+FIlFsDDLSFEps-ssY......KsLKulL-GulpupPsNVllYATSNRRHLlPEhhsDsts........ssElHss-ulEEKlSLSDRFGLhLuFashsQ-pYLplVcpas.............pph....ulshst...........cpLcpcAlpWuhp+GuRSGRsAhQFhpclsGch .....................................................................................s.shAFhWp.........stpsh..LpPl.............tp..ss.t.lpLscLhGl-p.Q+.c....t......L.......h...c.......N...T.......c.........p.....F.....l..p...G.........h.........P.........A...........N..N.........s..L.L.aGuRGTGK.SSLlKA.ll.spats.pG.........................L...R..L.....l...E.....l....p..+.....c..D...L.........t...s.....L....s....c.......l..l....s....h....l...+...................p.p....s........+....F.....I.l.F...CD.......D....L.............S.......F..........Ec...s.-.s...sY...............K....s.....LK..u....l...L....-..G...u..l.p.s........p.P.s....N...VllY..AT.SN.RR.HLlsEphp-Nps.........ssElHsu-u.lEEKl.S..LSDR...FGLWLuFashsQccYLphVctah...........................pch......slshsp..............-pl+tcAl.pWuhpRGuRSGRsAhQFhpphtGp.h............................................................................................. 1 102 225 279 +5502 PF05674 DUF816 Baculovirus protein of unknown function (DUF816) Bateman A anon Pfam-B_7178 (release 8.0) Family This family includes proteins that are about 200 amino acids in length. The proteins are all from baculoviruses. This family includes ORF107 from Orgyia pseudotsugata multicapsid polyhedrosis virus (OpMNPV) and a variety of other numbered ORF proteins, such as ORF52 Swiss:Q91F03, ORF140 Swiss:Q9YMI8. The function of these proteins is unknown. 24.60 24.60 31.80 31.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.96 0.71 -4.54 14 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 0 57 0 161.60 54 71.62 CHANGED olDVD-Fu+QLIADKCssLIEsppMLPtslLullKpA+c-YhcsPoscNYpplK+LhsQTKYV--SI-YKNFNRtlhLIAhphhlNKu+-hFssY..Kshl-sshKRLppINPDlKSSP+AMLpHYpECL-....ph-sP+.tD-HHLloFuKEIhTKIFl-slc.ho.sNpuslshs ..lslDsFA+QLIsDKCSsLIEspshLPsNlLsIlKpARDcYFcsPosKNY-.lKKLh.pTKYhDDSIDYKsFNRRlLLIuhKhuLNKupsaFssY..KsllEsAlKRLspINPDlKSSP+AMLQHYpECLE....NlDsP+.sDEHHLlTFuKEIATKIFl-sl-.aSa.sNpSslph.p... 0 0 0 0 +5503 PF05675 DUF817 Protein of unknown function (DUF817) Moxon SJ anon Pfam-B_7331 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 25.90 25.90 49.70 49.70 25.80 25.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.60 0.70 -4.96 39 378 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 367 0 80 340 18 223.70 50 85.60 CHANGED phlhFshKQAhuCLFssllhhsLhlT.+ll..shssLsRYDhLLIhsLlhQhhhltotLEThDElKVIhlFHllGhsLElFKlp..hG..SWSYPEtuhhKlhGVPLaSGFMYASVuSYIspAWRha-lchtpaPshahsssLushIYhNFFTHHalhDhRWhLhshshllFhRTaVhFplpssph+MPLlLuFhLIuFFIWlAENIuTFauAWpYPsQtcuWphVcluKloSWaLLlIISh ...............phhhFshcQAhuCLFssllFlsLhlophh....shsslhRYDhLLIhsLlhQhhhlthtLEThDElKVIslFHllGhsLElaKsc..hG..SWuYP..-tuh.KlhGVPLaSGFMYAuVuSYIsQAWRhhclchppaPshhhshsLushIYhNFFTHHalhDhRWhLhshhhllFh+ThVhFpltst.p..h+MPLlLuFhLIuFFIWlAENIuTFaGAWpYPsQhpuWphVHluKloSWaLLVllSh.................. 0 23 47 64 +5504 PF05676 NDUF_B7 NDUFB7; NADH-ubiquinone oxidoreductase B18 subunit (NDUFB7) Moxon SJ anon Pfam-B_7077 (release 8.0) Family This family consists of several NADH-ubiquinone oxidoreductase B18 subunit proteins from different eukaryotic organisms. Oxidative phosphorylation is the well-characterised process in which ATP, the principal carrier of chemical energy of individual cells, is produced due to a mitochondrial proton gradient formed by the transfer of electrons from NADH and FADH2 to molecular oxygen. The oxidative phosphorylation (OXPHOS) system is located in the mitochondrial inner membrane and consists of five multi-subunit enzyme complexes and two small electron carriers: coenzyme Q10 and cytochrome C. At least 70 structural proteins involved in the formation of the whole OXPHOS system are encoded by nuclear genes, whereas 13 structural proteins are encoded by the mitochondrial genome. Deficiency of NADH ubiquinone oxidoreductase, the first enzyme complex of the mitochondrial respiratory chain, is one of the most frequent causes of human mitochondrial encephalomyopathies [1]. 21.90 21.90 22.50 22.00 21.80 21.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.52 0.72 -4.56 14 256 2012-10-02 15:44:21 2003-04-07 12:59:11 8 6 227 0 183 245 2 65.10 43 53.76 CHANGED .MlATpEEMptA+LPLt.RDaCAHhLIsLp+CRp-pa.hs.atCccERHsY-pCpY-DalhRhKch- ...........hATpcEMppA+lPLthRDhCAHhLIsLp+C.R.pcp.a.hs...a...pCccERHsY-cCp.Yc-ahhRhtch-............ 0 62 102 150 +5505 PF05677 DUF818 Chlamydia CHLPS protein (DUF818) Moxon SJ anon Pfam-B_7510 (release 8.0) Family This family consists of several Chlamydia CHLPS proteins, the function of which are unknown. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.21 0.70 -5.72 5 50 2012-10-03 11:45:05 2003-04-07 12:59:11 7 2 27 0 22 183 59 298.60 28 74.67 CHANGED soILcocP+PplshFSSc+A+puaE+RctHPlLaKILslIhcIlKhLIGLIlFlPLGLaWVLQKlCQNsILPuuuhLhop.phChc....splL+psFlspLcshhppscVSSs+RVsIQpD-LlIDoLuI+lPsAcPcRWMLISLGNS-sLEshshlttcc-...SlpclAKtsGANILVFNYPGVMSSpGsloRENLuKAYQACVRYLRD+.sGPpA+QIIAYGYSLGulVQAtALsKElsDGSDuToWlVVKDRGPRSluAlAsQahGsIGshlAsLsGWsIcSsKpSc-LsCPEIFIYusDp.cssLIGD.GLFp+EsClAsPFL-sPpl.chsGsKIPIuEpsLpHpsPLccsTIpcLAssIlc+LS .....................................................................................................................................................hthhKhlh.hhh..P.slha.h..hs.t..lhst....h............pphhp..a.h.tthpt...h.ppt.h..s...phpRlsIp.t..D.plhlDsht.l.p.h.s.....p....A....+ss.R.W.hLhuhGNupsaEp..hh....s....lhplAKp...h.....p..uNlllFNYPGVhpS........pGps..s.p.c.s.LspuapAsV.+.YLcDp..pGspAcpIlsYGaSLGu...u..Vp...A..t.ALppp.hp..sp-shpahhlKDRu.pSlushA....pp..........hh...s.......l.s..thhhp....lh...sWphs....u.K.ucpL...s...-l..hh.......hth......s............t....p........l.tc.thh..p.shAt.hhp.........pK..l......c.t.l.................................................................................................. 0 12 12 20 +5506 PF05678 VQ VQ motif Bateman A anon Pfam-B_7960 (release 8.0) Motif This short motif is found in a variety of plant proteins. These proteins vary greatly in length and are mostly composed of low complexity regions. They all conserve a short motif FXhVQChTG, where X is any amino acid and h is a hydrophobic amino acid. The function of this motif is uncertain, however one protein in this family has been found to bind the SigA sigma factor Swiss:Q9LDH1. It would seem plausible that this motif is needed for this activity and that this whole family might be involved in modulating plastid sigma factors (Bateman A pers. obs.). 20.40 20.40 20.60 20.90 19.90 20.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -7.23 0.72 -4.91 49 612 2009-09-11 20:58:54 2003-04-07 12:59:11 9 6 26 0 403 538 0 30.40 35 13.43 CHANGED thhsspllps-sssF+slVQcLTGtssssss .............hssphlp.s-.ss.sFRslVQcLT.Gtssss.......... 0 56 239 327 +5507 PF05679 CHGN Chondroitin N-acetylgalactosaminyltransferase Bateman A anon Pfam-B_8249 (release 8.0) Family \N 19.70 19.70 20.40 19.90 18.90 18.30 hmmbuild -o /dev/null HMM SEED 499 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.70 0.70 -5.79 13 643 2012-10-03 05:28:31 2003-04-07 12:59:11 11 14 100 0 386 631 1 400.30 22 63.56 CHANGED HlssCl+shhSs+cD.VplGRCVpchuGl.sCshsa..Qulhacsaphs........hcuastph+scph+sAlTlHPlpsPshhYRLHsYhtpl+lpchppcphtLcp-ltphoph.tsps.tpsphP....................LGlsPs...hpPpsRp-llpW-als..+phh.uscss.P+pplcusp+p-lsDllspsh-plNpptp....sl-F..+pLhhGYRRhDsh+GhDYhLDL........Lhh++tRG++............hsVpR+hal.+shoc...................................................lEll.sssasscu............oRlslllPLsu+..sshhh+FlcsaEpsCLcs..ppsstLhllhahcss-spphsp.......pplhppLcs+a..ssu+lsalslp......................ssshSpslAlDlAs+..+hsh-oLlhlssschsFps-FLsRsRhNTIpGhQlahPIsFspYcPch.........................tp...psstsss.hclspss..........GaFcchuaulsuhYpuDhlpuchthhsp...................chps.GlEslclh-hhlpss.p...................................l+VhRusEPuLVa........la+slsCD.ssL-ptphchChsoKsps....luSpcpL ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................th......a...h...........h...........t.......pp.........lp....t........p...t..tl....-...s.lt.tshc.lNpp.p..................hh.ph.....pc.l.hp..G..h...hRh-.h+......GhcYh.Lch.........h...hptp......p................................h..h+..+.h..l..hp.s.hu.t...............................................................................................................l.hl...s....h.....htp........................................shlplllP......ltsp..hc.hh.pFhps.atph..shps....pppshLhllhhhp...p.pt.h.tp............thh.tphp..p.ch......shtphphl..tlp............................t..thscstuLp.hu.sc........th...s...s.c.s..LhFhsDl.chhhsschLpps....Rhps.....l.GhpsaaPlhFppasPth...................................................t................tt....sh.ppts.............G.aacp.uaGhsshYpuDahph...tth.s.........................phptWGhED.h..c..lhcthlpss...........................................LclhRs...ulhp..............ha.+..h.C.........s......p..hs.t..hphCh.sth.t......uuh.................................................................. 0 115 143 250 +5508 PF05680 ATP-synt_E ATP synthase E chain Moxon SJ anon Pfam-B_6116 (release 8.0) Family This family consists of several ATP synthase E chain sequences which are components of the CF(0) subunit [1]. 21.00 21.00 21.00 21.20 20.10 20.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.10 0.72 -4.03 17 264 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 228 0 172 254 0 78.20 28 86.96 CHANGED hss...SPhlpssRaSALshGlsYGhh+hppLp...cpcpct+chctppthltptKht.tK++pscpc..hpthspuop.....................shht ..........h......SshlplhRaSALshGlhYGhh+.ptLp...cptptp+chctcpphlppsKtthtKpht..t..................................st.t.................................. 0 50 90 135 +5509 PF05681 Fumerase Fumarate hydratase (Fumerase) Moxon SJ anon Pfam-B_2085 (release 8.0) Family This family consists of several bacterial fumarate hydratase proteins FumA and FumB. Fumarase, or fumarate hydratase (EC 4.2.1.2), is a component of the citric acid cycle. In facultative anaerobes such as Escherichia coli, fumarase also engages in the reductive pathway from oxaloacetate to succinate during anaerobic growth. Three fumarases, FumA, FumB, and FumC, have been reported in E. coli. fumA and fumB genes are homologous and encode products of identical sizes which form thermolabile dimers of Mr 120,000. FumA and FumB are class I enzymes and are members of the iron-dependent hydrolases, which include aconitase and malate hydratase. The active FumA contains a 4Fe-4S centre, and it can be inactivated upon oxidation to give a 3Fe-4S centre [1]. 25.00 25.00 25.70 25.50 21.70 21.70 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.63 0.70 -5.32 170 3534 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2307 0 710 2256 603 274.30 41 60.95 CHANGED ltcsltchhpcsshhLssDhhpulpcAhcpEcush.u+tsltplLcNuclAspt..........phPlCQDTGhssl.FlclGp.pVphss...slp-slscGVccuYp..-ssLRhShVtc.Ph.pRpNTsc....N....sPAllchchl................sG...-plclphhsKGG..GSENhotl.....thL........sPup...ultcaVl-sltphGss..sCPPhhlGlGIGGTs-cAshLAK+.ALh+.lsptp..s..........chtcLEpElh-plNpLGIGsQGhGGpsTsLsV+lpphP...sHsAShP.VAlslsCtAs.R+sphpl .................................p.hstpAhpcsSahL.ssahppltp....hh.cc..c..........ust....sKhlhhphLcNuclAAcs..........hhPhCQDTGhsh.l.hsKhGpcVhhsG..........s.cpslscGVhpuYh..-ssLRhStss..s.shhcchNTGs..........N....h.PA..lch.h.V........................................................sG.....Dc..hchhshsKGG..GStNKohh................shL........sPu............slts.all-p.h.po....hGsu..ACPPh.hlulsIGGTutcsslhstK.A.h+..lDph..sstt.............u...+shcLEpELh-psppLGlGAQ.hGGphhshDl+lhphP....RHuAShP.VuhsluCu.As.Rph+hpl.................................... 0 239 456 591 +5511 PF05683 Fumerase_C Fumarase C-terminus Moxon SJ anon Pfam-B_2085 (release 8.0) Family This family consists of the C terminal region of several bacterial fumarate hydratase proteins (FumA and FumB). Fumarase, or fumarate hydratase (EC 4.2.1.2), is a component of the citric acid cycle. In facultative anaerobes such as Escherichia coli, fumarase also engages in the reductive pathway from oxaloacetate to succinate during anaerobic growth [1]. 21.80 21.80 22.00 35.20 20.10 21.70 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.30 0.71 -5.19 72 3515 2009-09-10 16:06:45 2003-04-07 12:59:11 7 8 2299 1 713 2175 505 198.00 43 47.11 CHANGED GoGsspLpsPs...lcpaPcl.shcss...sss..............................+R..VsLss..lTc..--lppacsG-sLLLoGcllTGRDAAHK+lh-h.lspGEsLP..VDhcs+hIYYVGPV...DsltcEsVGPAGPTTuTRMDpaTcphLcpsGLhuMIGKuERGstsl-AI+cHtusYLhAlGG.AAaLlu+uIKpucVVuFt.-LGM.EAIacF-VcDM.PVoVAVDscGsulHppuPtp ..............................................................G.h.t.Lp........hp.hPt......t.........tps.................................hclsLsp.Phpc....ppL.sph.luspl.LoGsIls.uRD.h.AHt+ltEh...l....p.p.......G......c..s......LP...............l..+..s..+sI..YYAGPs.............css..s..shssGS......hG..PTTusRMD.s.a.sc.p.h..sp.sGshlMluKGsR.u.p.p.sp.-A.C.+KHsuhYLsulG.G.AAsLuppsIKcl.E.sV.tas.ELGM.EAIWcl-V-DF.PshlhlDs+GNshapph..p.................. 0 244 461 596 +5512 PF05684 DUF819 Protein of unknown function (DUF819) Moxon SJ anon Pfam-B_9034 (release 8.0) Family This family contains proteins of unknown function from archaeal, bacterial and plant species. 20.70 20.70 21.60 21.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.95 0.70 -5.64 6 548 2012-10-02 17:06:44 2003-04-07 12:59:11 7 5 422 0 194 501 310 333.30 25 86.51 CHANGED uhLhuhuAhslWs...Ec+sKht..usVsuhllshLluhhhsslGLl...sspossasslhshLLPtsI.LLLhpsDLR+Il+.us+LLhhFLluSluhhlGsllAahlhp.h....lusDsW+hsAALhGSaIGGusNhlAhpssLsVssoshusulssDslhhulahhlLhhLsuht...pttshA-TsplpussschspspspEc+psss.schhhhlulShhlsuluthlushh.......h.hul.....susshsslhusslsLlhuFsshhsh..PuupcluplLlYshhsllGususlhslls.APhlhLauhhhlhsHlslhLulGKLF+l-LphlhlASpANIGGPsoAsshAsAhs.suLlssGlLhGlhGhulGTalGlhhG.hlpph .............................................................hlhh.hhuhhhhh.......p..p.....hh.h.t.......thlsuhl.hhhhhshlhsshs.lh........................................................t..s....ha..h.h.phllPhsl.lhhh..phDl+.........t.l.hp......u..s.+hlhhFhhu.shuh.hl.....G...s...l..uh...hl...h.t.h..................h....s.c........h........h.........p..h....h..us....lsuS.al.....GG.usNhsAht.p.......h..h...t......s..s..s.....s....h.....huhhlssDslhhslahhhL...hhhss...h...t....h...h...p..t...h..ht.s...c.t.pt.h......p....t.....h...t.....p....p...h............t.t........t....t...............t...p............................s..hhh.hl...u..h.u...hhhs.s..lup....h....hushh...............................h..................t.ha..hllhsh...sl.u..lh.h..s..h.....sp.....h...h.ph........s..Gu.pp.lushhlY.l.hlsslGh.th.s...lt..t.l...ls...s.Ph...hh...l.h.u..h..l..h..l..hlHhh.l.hl....h...lu.+.lh+...........h..............sl.....hh.lhlASp.ANl.GGsso.A.ss......hAs....Ahp..suLsssulLhullGYslGsahGhhhu.hl...h............................................. 0 70 128 167 +5513 PF05685 Uma2 DUF820; Putative restriction endonuclease Moxon SJ, Bateman A anon Pfam-B_7809 (release 8.0) & Pfam-B_8730 (release 14.0) Family This family consists of hypothetical proteins that are greatly expanded in cyanobacteria. The proteins are found sporadically in other bacteria. A small number of member proteins also contain Pfam:PF02861 domains that are involved in protein interactions. Solutions of several structures for members of this family show that it is likely to be acting as an endonuclease. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.28 0.71 -4.90 170 4083 2012-10-11 20:44:44 2003-04-07 12:59:11 7 12 450 7 1583 4539 272 163.40 17 81.91 CHANGED hp-a.hphs........ttth+hEh..hsGplhh..h...sshs.shHsp.hsstLhttl..t.h..hpt..ths..hshss.shshph....................tts.h.PDlshhptp.......hpphspt...................shsPslllElhSsssp...t-.htp.................KhthY.......t.thGlttaal......lD............spppp.......lplaphs..tt.......tap.hph......ttsphhph..sls.sht....lslp ..................................................................th..th.........t..t...p.h.Eh..hpGtlh.......h...........ss.s...Htt..h.ttlh.....thl.....tth.......hpt.....t.s......hhhhs..shtlth......s.............tpshhtPDlh.lhttp.........tph.pt...............................hstsPplllEll........S...s.u...sp...tDhtt.........................................K.h.t.h.Y.......t..phG.ltcYal......l-..............s.p...p...p.p...........lp.lapht....tt........tat....th........t...t..h....................t............................................................................................... 0 486 1227 1544 +5514 PF05686 Glyco_transf_90 DUF821; Glycosyl transferase family 90 Moxon SJ anon Pfam-B_6682 (Release 8.0) & Pfam-B_7101 (Release 8.0) Family This family of glycosyl transferases are specifically (mannosyl) glucuronoxylomannan/galactoxylomannan -beta 1,2-xylosyltransferases, EC:2.4.2.-. 20.50 20.50 20.80 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.28 0.70 -5.78 19 1264 2012-10-03 16:42:30 2003-04-07 12:59:11 7 25 380 0 784 1222 143 256.40 18 53.19 CHANGED s..ps.sCPsYFRaIccDLcPWcps.GI..oR-hlcRu..+ppAthRlsIlsG+lYlcpaccshQoRDhFThWGllQLLRcYsG+lPDlELhFsCsDhPhl+ptsap.t.......sPPPLFpYCusspolDIVFPDWSFW.GWsEl......Nl+.WshhhtclpcusscspWpcRtPhAYW+GNssVsp..Rt-Ll+Cs.hos.p.hsAclhpp............................Dhs.t.c.uh+pssl.spCpa+YpI.l-GhAhShp.KYILuCDShsLhlps.Yh-FF.+sLhPhpHYhPlcsttc..spsl+.tlpWupspsptApcIucpGSpFlpccLpM-hVYDYMaHLLsEYAKLL+aKPplPpsusElss-shsC.hp.....Gh.+phh.pSh...sssps.PCph.PsPhp..tht.hhp+ptp.h+pVcphE .............................................................................................................................................................................t.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.......t...........t...........a.t.p.+....shh.h.....a+....G..........t.........t.............t.................................R.........t...h...h..t..h.......t..............t..........................h......t.s.thh.t.............................................................p.t.t.............hh...p........h...s....h.t-...t...h.......p.aK......Y...l..l..sl..-.........G...s.s...h.S.s..p...h...thl..htssSlsl.t......t.....s..h..........a........h......................-.....aa.....h...p...............t........L.....h.....P...h....h....H....Y..l..P...lp..p.......s.................hp-....l........p...........h....l...............p........a................h.........p..........p........p.......s.........p.....t...A...p...pIu.p.p.u..p.p.ahp............p.....lp..tt.h......Y.hhpl.h........pauph............................................................................................................tt..................................................................................... 0 263 474 641 +5515 PF05687 DUF822 DUF822; Peptidase_M15_2; Plant protein of unknown function (DUF822) Moxon SJ anon Pfam-B_7149 (release 8.0) Family This family consists of the N terminal regions of several plant proteins of unknown function. 25.00 25.00 26.20 25.70 21.30 21.00 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.18 0.71 -3.85 15 154 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 22 0 105 163 0 141.80 46 37.09 CHANGED uuusRpPTW+ERENNK+RERRRRAIAAKIauGLRApGNYsLPKHCDNNEVLKALCpEAGWlVEsDGTTYR...KGs+Ps...t.-hsGsSss..sSPsSShp..........SPhsSYpsSPsSSuFPSPoph-.hsht..........ssllPaLpshs.........ssSuhs .........ussRhPoW+ERENNK+.RERRRRAIAAKIauGLRtaGNYpLPKHCDNNEVLKALCpEAGWsVEsDGTTYR..........KGs+Ps.........phhGs.Sss.....soPpSShp..........SPhsS.pssst.ssphsSssp.s.......................h.s............sss.................................................................................................. 0 15 66 86 +5516 PF05688 DUF824 Salmonella repeat of unknown function (DUF824) Moxon SJ anon Pfam-B_2973 (release 8.0) Family This family consists of several repeated sequences of around 45 residues. 23.80 23.80 23.80 23.80 23.70 23.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -8.15 0.72 -4.32 25 1631 2009-01-15 18:05:59 2003-04-07 12:59:11 6 24 187 0 31 888 3 47.80 36 14.57 CHANGED Dush...s....AuKsKpGEslslsVTs+Du.sG.sPlssssFsLpRscuhsRpss .................stsh...s.......AuKsKtGEslslsVTs+Du..sG.sPlssssFslpRu.suhsRps...... 0 0 0 13 +5517 PF05689 DUF823 Salmonella repeat of unknown function (DUF823) Moxon SJ anon Pfam-B_2973 (release 8.0) Repeat This family consists of a series of repeated sequences (of around 180 residues) which are found in Salmonella typhimurium and Salmonella typhi. Sequences from this family are almost always found with Pfam:PF05688. 21.00 21.00 21.20 21.40 20.90 20.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.47 0.71 -4.94 30 1826 2009-01-15 18:05:59 2003-04-07 12:59:11 6 36 210 0 42 1044 6 180.10 34 58.02 CHANGED GhTsAsGpsolsloQssuhGlKTslsAslssssssoss..hsVIFTVlTSPDospApMWGHMs-TlTs....sGhsF+RPhLsuEhsusss.......ohh-sNEsWuh.hs...sth.ssssst..Cs.hshlPshspLpuLYsspssst..lpTshGWPstts........YhSuspsssss.......apsls.LssGspsshsssss...shhoCl ..............................................GhTsusGssolslsQs.suhGl+Tslssshhs..s..s....ss..oss..hsVIFTVlTSPDos...pAphWGHMs-TlTs....sGhsFcRPh.LtuEhsusss........shh-sNEsWut.hs.....t.tssssst......Cs...hshlPs...hspLp....uLYss....h...s....ss.s....l.poshGWPspts...........Yhuu.spsssss.......atsls.Ls.sGsps.psssss.....thhoCl.................................... 0 1 2 22 +5518 PF05690 ThiG Thiazole biosynthesis protein ThiG Moxon SJ anon Pfam-B_1138 (release 8.0) Family This family consists of several bacterial thiazole biosynthesis protein G sequences. ThiG , together with ThiF and ThiH, is proposed to be involved in the synthesis of 4-methyl-5-(b-hydroxyethyl)thiazole (THZ) which is an intermediate in the thiazole production pathway [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.39 0.70 -5.30 28 2805 2012-10-03 05:58:16 2003-04-07 12:59:11 9 11 2724 15 705 5198 2724 239.00 49 90.63 CHANGED pIus+sFsSRLllGTG+YsS.plhpcAlcASGspllTVAlRRhstss..stssllchlc.pshplLPNTAGCpoAcEAlpsA+LAREsh......sssalKLEVIuDs+pLhPDshpTlcAAEhLlc-GFtVLPYsssDsllA++Lc-sGCuAlMPLuuPIGSGhGlpNhhsLclll-ps.cVPlIVDAGlGsPS-AAtAMElGsDAVLlNTAlApApDPlsMAcAhthAV-AGRlAa.AGhh.p+c.hAsASSP ..................................................................................................................h.IuscpFsSRLhhGTGKaso.plhpcAlcASGupllTlAlRRhsh.....t..p.......................p.............sslLchl......ts.......lslLPNTuGs+oAcEAlhsAcLAREsh.......sosalKLEllsD.s.+..hLhPDs..lETlcA..AEhLl.c-GFh.VLPY...s..s.sDPl.l.s.+R...L..c.-.s..GC.u.....A....V....M....P...L...G...A...P...I.....G.........S.........s...p....G..........l..........t..........s...............t.........h..........L....c...l..........I......l...c.....pu.....s......V..P...V..l..V..D..A..G.IGsPS......c......A....At.AM.E.h.G.sD.AVLl...NT....A...I...A...t...A...pD...P...l....t..MAc...Ah..+.h.A.V.-....A.GRhA..a..AGh...hs..p..p.p....h.A.pASSP..................................................... 0 222 452 596 +5519 PF05691 Raffinose_syn Raffinose synthase or seed imbibition protein Sip1 Moxon SJ, Eberhardt R anon Pfam-B_3204 (release 8.0) Family This family consists of several raffinose synthase proteins, also known as seed imbibition (Sip1) proteins. Raffinose (O-alpha- D-galactopyranosyl- (1-->6)- O-alpha- D-glucopyranosyl-(1<-->2)- O-beta- D-fructofuranoside) is a widespread oligosaccharide in plant seeds and other tissues. Raffinose synthase (EC:2.4.1.82) is the key enzyme that channels sucrose into the raffinose oligosaccharide pathway [1]. Raffinose family oligosaccharides (RFOs) are ubiquitous in plant seeds and are thought to play critical roles in the acquisition of tolerance to desiccation and seed longevity. Raffinose synthases are alkaline alpha-galactosidases and are solely responsible for RFO breakdown in germinating maize seeds, whereas acidic galactosidases appear to have other functions [2].\ \ Glycoside hydrolase family 36 can be split into 11 families, GH36A to GH36K [3]. This family includes enzymes from GH36C. 20.00 20.00 20.10 20.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 747 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.28 0.70 -7.00 11 570 2012-10-03 05:44:19 2003-04-07 12:59:11 7 15 210 0 281 620 6 368.90 23 73.23 CHANGED hsls-upLhl+s.pslLocVP-NVo....uo.ssss....s..........ssspGuFlGFots.cscS+HlsolGpLcsh+FhSlFRFKlWWoTQWlGpsGpDlphETQalllEhs......p.s.......pt.csYlVlLPllEGsFRouLQsGcsDcVcIClESGSopVcsSoFpsllYlHsussPapLl+-Ah+Al+sHLsTF+hLEEKolPulVDKFGWCTWDAFYLsVsPpGVhcGlKsLu-GGsPP+FVIIDDGWQSIspDsc...c..DutNlVhuGpQMssRLppacENpKF+cYcss..............................................................................s..ppssGhKuFlcDLKpcFpul-pVYVWHALsGYWGGVRP..tssc.h-uplh.PpLSPGltuTMtDLAVDcls.tGlGLVpPcpAp-hY-ulHSYLussGlsGVKVDVhplLEhLuppaGGRV-LuKAYacALosSlsKNFsGNGsIASMpHCNDFhaLuTKQIulGRVGDDFWspDPsGDP.GsaWLQGlHMlHCAYNSLWMGpFIQPDWDMFQSsHPsAEaHAASRAIuGGPlYVSDslG..sHNFDLLKKLVLPDGSILRspaauLPTRDCLFcDPL+DGpolLKIWNhNKasGVlGsFNCQGAGWs.cs++s+sasEsspsloGol+scDlEWpsptss...phshst-asVYh.ppuccLllhs..ussl.lTLcshpFELholsPVpcls.ttslpFAPIGLlNMFNSGGAlp..slcassp...........................sVclcV+GsG+FtAYSSp+Ph+Ctl-Gs-lEFpYps-.GhVslpl ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hht...sMt.....s.............................................................hRsu.....D....ah.......................Hh...shhsshh.h.t.....h.h....sDaDMF..................o........p...........................ut......h.hu..+hhsGsslYloD...s....t..........th............s.......ht.lt.h..h..................................................................................................................................................................................................................................................................................................................................................................................................................................................h............................................................................................................................................................................................................................ 0 71 176 235 +5520 PF05692 Myco_haema Mycoplasma haemagglutinin Moxon SJ anon Pfam-B_3547 (release 8.0) Family This family consists of several haemagglutinin sequences from Mycoplasma synoviae and Mycoplasma gallisepticum. The major plasma membrane proteins, pMGAs, of Mycoplasma gallisepticum are cell adhesin (hemagglutinin) molecules. It has been shown that the genetic determinants that code for the haemagglutinins are organised into a large family of genes and that only one of these genes is predominately expressed in any given strain [1,2,3]. 20.00 20.00 22.50 22.20 18.10 18.00 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.79 0.70 -5.77 31 148 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 6 0 49 106 0 382.60 42 66.32 CHANGED spAlS..pLps.KsNA-phAstFhKpsLsKspLous.............tssspp...QPuNYSFVGYSVDlss.............tppshPNWNFAQRpVWsussp.ss..........................................................stspssssLTDVSWIYSLu...GssuKYTLoFsYYGPo.TuYLYFPYKLVKsuDs.pVuLQYpLNssss.ptIsF............................................s..t..t.ttt...sspssssssphNssPTVs-INVAKVoLosLpFGpNTIEFSVPss......KVAPMIGN......MYLTSs....ssNpsKIYs-IFGN.s.sppsssouVTVDLLKGYSLAosaSTYltpFosL......sssssppssssYLlGaIGGss..sRsssss.........sNh.psPssssss....RTaTIYVNAPpsGsYaISGuYl........osssRsLpFsh....ssssss...NsVolsshspsNWoT....LuTFDT.............ssssssssossssc+TLsLpKGLNKIlluGs.....ssssAPaIGNLTFTLps ..........t..shp..pLpp.KpNAsphss.F.KpsLs+splsus.............tspsptQPuNYSFVuYSVDlss..............tpt.PNWNFAQRpVWsssst.ht..........................................................sttpssssLTDVSWIYSLs...GsssKYTLsFsYYGPs.TuYLYFPYKLVKsuDp..VuLQYpLNssss.ptIpF..................................................................sstssths.sPTVssINlAKVsLosLpFGpNTIEFSVPts........KVAPMIGN......MYloSs....tsNpppIYssIFGN.s.sppssposloVDhLpGYSLAosaSTaltpassh.......sst.ppsp.hYLlGalGGss..sRsshss..........Nh.p.P.ssssp....RTaThYVNAPpsGpYaIpGsYh........osssRsLphps.....sstss...NslTls.h.tpsNasT....LtpFDT.............ttsssssss.ssss++TLsLpcGLNKIll.uGs.....sssssP.lGNLsFTLp................. 0 48 48 49 +5521 PF05693 Glycogen_syn Glycogen synthase Moxon SJ anon Pfam-B_2874 (release 8.0) Family This family consists of the eukaryotic glycogen synthase proteins GYS1, GYS2 and GYS3 [1,2]. Glycogen synthase (GS) is the enzyme responsible for the synthesis of -1,4-linked glucose chains in glycogen. It is the rate limiting enzyme in the synthesis of the polysaccharide, and its activity is highly regulated through phosphorylation at multiple sites and also by allosteric effectors, mainly glucose 6-phosphate (G6P) [3]. 19.10 19.10 19.10 19.20 19.00 19.00 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.23 0.70 -6.28 7 829 2012-10-03 16:42:30 2003-04-07 12:59:11 8 5 520 24 319 797 26 398.40 37 87.51 CHANGED EVAWEVANKVGGIYTVl+SKA.Vos-EhGDpYshlGPhp-psh+sEVE.lEspsss........l+tslpuMps+GhplhaGRWLIEGsPpVlLFDluSuAatLscWKs-lW-tCpIGlPapDpEoNDAllhGahlAhFLtEFpspsp.........s.tVlAHFHEW.AGVGLhLsRt++lsluTlFTTHATLLGRYLCAGu.sDFYNNL-pFsVDpEAGKRpIYHRYClERAAsHsAHVFTTVScITulEAEHLLKRKPDllTPNGLNVhKFuAlHEFQNLHA.sK-KIp-FVRGHFYGHlDFDLDKTLYFFhAGRYEFsNKGuDhFIESLARLNahLKssss....csTVVAFlIhPApTNsFNVEoLKGQAVhKQLcDTlsplpcplG+RlF-pslp....Gp....lP.-hc-LLppsDhlhlKRslhAhpR..poLPPlsTHNMlDDusDPlLssIRRspLFNpptDRVKVlFHPEFLSSssPLlslDYE-FVRGCHLGVFPSYYEPWGYTPAECTVMGlPSlTTNLSGFGCFMpEplp..-spsYGIYIVDRRa+uh--SlpQLspaha-FsppSRRQRIIQRNRTERLSDLLDW+pLGhaYhcARphALp+saP-.a.h...sphhssst.........h+hsRPtSsPsSP..........osuphuS.psS-sEDs. .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................suhuh.h........hch.....th......lsT..lFTTHAThlGR.ls..uss..hshYs.L..as..........hDt.A.tc........h.........tl.p+ashE+tusH.scsFTTVSpITuhEspcLLc+ts..Dhlh.P.NGhp...s..hhst.h...ppFpsh+t.t+t...+l.phspu...c...h.hG....p.....................s...h...p...-.p.TL.hhhhuGRYEatNKGhDhFlEuLsRLNh......hp..tt........phsVlAFhhhPs.............................................................................................................................PhhTH.h....h.s...pD.lLs.l+phthhNp.tD+VKllFhPpaLs..ussslhsh-Y....-hl...h....Gsc...L...slasS...................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 100 174 261 +5522 PF05694 SBP56 56kDa selenium binding protein (SBP56) Moxon SJ anon Pfam-B_2816 (release 8.0) Family This family consists of several eukaryotic selenium binding proteins as well as three sequences from archaea. The exact function of this protein is unknown although it is thought that SBP56 participates in late stages of intra-Golgi protein transport [1]. The Lotus japonicus homologue of SBP56, LjSBP is thought to have more than one physiological role and can be implicated in controlling the oxidation/reduction status of target proteins, in vesicular Golgi transport [2]. 19.70 19.70 19.70 19.70 19.60 19.50 hmmbuild -o /dev/null HMM SEED 461 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.55 0.70 -5.99 28 437 2012-10-05 17:30:42 2003-04-07 12:59:11 6 7 298 1 210 468 423 379.30 42 93.69 CHANGED hpssPs.YsoPttAh.puPtEclsYVssl.ssss..+.PDhLAsVDVDPpSsTYupVlpRl.hPshGDELHHhGWNAC.SS..Ca..scsst...ERRaLllPuLpSSRIYllDT+sDPRpPpltKlIEPpElhp+sGhopPHTsHChssG.IhlSsLG..ss-G..sGsG.GhhLLDp-TF-lhGpWEhsptsthhuYDFWap.tassMloSEWGsPsthccGhsspclhsG.pYG++LHhWDhsp++hhQolDLG.s-sthsLElR.hH-PscshGFVGss.....LSuSlapaa+.-c....sp.W.sscKVIslPuc.s-..u..LPsh......................PsLlTDI.lSLDDRFLYlSsWhpG-lRQYDlSDPtpP+LsGpVhlGGllp+u................shPsspsppLsGGPQMlplShDG+RlYlTNSLYSuWDcQFY.P-hl..GuaMlplDlDsct.GGLsls.sFaV...DFGpt....hhAHplRh.GGDsoSD.ah ........................................sPs.a.oP...Ah..p.....u..PtEplhYh.sl..........st......s...ts.....DhluslDlcPpS.ssYsp....llpph.h.s.hsDELHH.GWNsCSS..C.....scsth....pRphLllPuLhSuRlYllDst.-P....ptPplh........K...........l..l-sp-lht+sshuhPHT..sHChs..sG.lhlSsLG.....s.cG..su.G...GhlllDs-.................F...........plh.G.pW...........Eh......s...........t.......t.s...........................huYDFWap..t+ssMlSo.-Wu..sPs..hhp...p..Gh..s....tc.lts.s..hYG....pplahWDhptpchhQolcLG.t.p.t.hsLElRhhHsPsts.GFVGss.....Lsuslhpaa+....p.s................up..WphccsIplss..sp.sh.LP.h......................PsLlTDI.lSlDDRaLYhssWhpG-lRQYDloDPtpPhLsGplhlGG.l.cs.............................h....psppltG.GPQMlp.lS.hDG+RlYhTsSLausWDc..QFY.Pc.hh.........su.h..hl..........lc.ssst..GGhpls.sFhV...DFsp....t........hhsHphRh.GGDsooD.ah.................................................................................................................................... 1 60 99 164 +5523 PF05695 DUF825 Plant protein of unknown function (DUF825) Moxon SJ anon Pfam-B_8370 (release 8.0) Family This family consists of several plant proteins greater than 1000 residues in length. The function of this family is unknown. 18.10 18.10 19.40 18.10 17.90 17.90 hmmbuild -o /dev/null HMM SEED 1390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.50 0.69 -14.39 0.69 -7.17 7 1423 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 537 0 50 1419 2 520.60 26 64.48 CHANGED MKcQp....hK.hIhEL+EI.+p.+s.pahhcSWTphNSVGohI+IFF+pERhlKLFDPRIhShLLSRNhpGSpSNpaFTIKGVlLhVluVLIYRhNp+.NMVERKshYLptlhPIPhN.ht.tNDT.EEuhGsoNhNhLIhs..LL.hPKGKpI.Es.hLsspEsThVLsITKht...............................pt.lAtI-..hKEKt.p.Lch........................................................................................................................................................................................................................pLKtS.s..h-hh-slp.EsSEYts.hNp+-...........................................................IpQ.cEcu..WcP.S.lphERpc............................c+.h.pphhPcEhc.cFlGN.T+SlRsaFSDRWS..ELaluS.....sPhE+.Th-pKLLKpp..lsFV..RcSEs....pEIlsLaKIITYL..QpTsSIauISSDPGhshl.Kpp.Dhsp......hpKN.hFshhphFa-ps+...hhcpsFcS..EERlpEhsDLFTLsITEPD.VYcptau...a....s.YtLsppphL..sclhNpRsc.KppSLhlL.slh...-ctc.ahcRIhpK.l..Isstp.....+.Khss.hsp.hhEuls..chIpshI.......puYlRslhNRFahhN+SsRNhc.tIppsQhGs-sLNpRThMKhpIppc..............................sYh.KWSsuocshQEaLEHFlSE...........QKscFpssFD.h.........................phphsp..IsWStshcK..KDh............s+..hh............hh.K.hhFhoKhlhhLuNsLshFh..VShGNhPIpRSEIpI.ELKGsscpLC.......NpLLESIGhpIVHLKKLKP..hLL--ass...SKFlIstuThu.Fh.NclP....hID.hpscpNp.cSFDNpDS.FShI.pDp-NWLNPsK.FppSSLIuSFYKAN+LRFhspsHHFpF.CpKRFPFsVE+s+.pNpsahYGQFLNlLhl+pKhFSLClGccKHsahp+sTISsIES........QVSNIFIPpDF...QSG-pp....YsL.KSFph.o+ssPhV+..RsIYSIh-hSsTPLTEsQIVNhERT.CQPh.D..hNLSDSEtpNhcphLN..FsSNMGLIHo.hS-c.L.SE.....KcKppsh..pl+ct..............................................scKtpMaphhQpDShFShLS.KWNLFpTYhP.FhTSsGYKYLs.lFLDsFS-lL.hhSpt.....VSIh........pDIh.......slSWcILQ.chshhph.lpoE............Ipppahp..NlhLucEhI+RN..sps..h.THLR.sNshEFhYSILhLlhVhGYLlhhalhhVSpAF.ELQp-hcplKSLM.PS.h..IELRKLlD+YP.SE.NSFhLKNlhlhshpplt.Sl.p.Rthuhshph.........l.ulhtpppshNlshID.p.llshlsss.spIs.ptsshplSHsu .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 3 30 41 +5524 PF05696 DUF826 Protein of unknown function (DUF826) Moxon SJ anon Pfam-B_7303 (release 8.0) Family This family consists of several enterobacterial and siphoviral sequences of unknown function. 25.00 25.00 26.50 28.10 21.50 18.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.73 0.72 -3.84 3 296 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 140 0 1 137 0 78.60 72 87.10 CHANGED MSEITSLVTAEAVKEVLRSEEVpSALKQKLRHNLEARLDAEVDAILDELLGs.AuP.sE..AG-oTAsD......s.ss-usEP.QP .MSEI..TSLVTAEAVK-VLRSEEVRSALKQKLR+NLEARLDAEVDAILDELLGss..AsPtsE......AGDsoApc...........usEP.pP................ 0 0 0 1 +5525 PF05697 Trigger_N Trigger; Bacterial trigger factor protein (TF) Moxon SJ anon Pfam-B_8447 (release 8.0) Family In the E. coli cytosol, a fraction of the newly synthesised proteins requires the activity of molecular chaperones for folding to the native state. The major chaperones implicated in this folding process are the ribosome-associated Trigger Factor (TF), and the DnaK and GroEL chaperones with their respective co-chaperones. Trigger Factor is an ATP-independent chaperone and displays chaperone and peptidyl-prolyl-cis-trans-isomerase (PPIase) activities in vitro. It is composed of at least three domains, an N-terminal domain which mediates association with the large ribosomal subunit, a central substrate binding and PPIase domain with homology to FKBP proteins, and a C-terminal domain of unknown function. The positioning of TF at the peptide exit channel, together with its ability to interact with nascent chains as short as 57 residues renders TF a prime candidate for being the first chaperone that binds to the nascent polypeptide chains [1]. This family represents the N-terminal region of the protein. 21.30 21.30 21.50 22.20 21.20 21.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.66 0.71 -4.39 152 4472 2009-09-11 09:24:54 2003-04-07 12:59:11 8 7 4374 14 995 3073 2205 145.40 31 33.23 CHANGED Mpsphp.p.hsshpt.plplplsspclppplppthpchs+....psplsGFR....GK.VPh..pl.......lcppaGp.p.lhp-slpcllp...cshpculpc....p....plp.sl.upP...pl............p.hp....hppsps..hp...ashph-lhP-l...clss.hcsl...pl..pp.ts.pls-c-l-pplcplp ....................MplohE.p.hpshpt.plolslss-plcstlcpthpc.lsK....p.l..p.lsGFRK..........GK...V..Ph...sl.......lcp+..a....G..t.p..lhp.-slsplls....csatcA.lp-..........p.plp..sl..upP.....pl...............ss.tp......hc.p.Gcs.....ht....assphpVhPEl...c.L..ss...h...c.s...l...cV..cc.ss..-VoD.p-V-ptl-pl............................... 1 347 667 850 +5526 PF05698 Trigger_C Bacterial trigger factor protein (TF) C-terminus Moxon SJ anon Pfam-B_8447 (release 8.0) Family In the E. coli cytosol, a fraction of the newly synthesised proteins requires the activity of molecular chaperones for folding to the native state. The major chaperones implicated in this folding process are the ribosome-associated Trigger Factor (TF), and the DnaK and GroEL chaperones with their respective co-chaperones. Trigger Factor is an ATP-independent chaperone and displays chaperone and peptidyl-prolyl-cis-trans-isomerase (PPIase) activities in vitro. It is composed of at least three domains, an N-terminal domain which mediates association with the large ribosomal subunit, a central substrate binding and PPIase domain with homology to FKBP proteins, and a C-terminal domain of unknown function. The positioning of TF at the peptide exit channel, together with its ability to interact with nascent chains as short as 57 residues renders TF a prime candidate for being the first chaperone that binds to the nascent polypeptide chains [1]. This family represents the C-terminal region of the protein. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.40 55 4501 2012-10-02 13:36:56 2003-04-07 12:59:11 9 11 4242 11 991 3166 2160 155.90 23 35.84 CHANGED oL--L+ppl+cplp.cphppthppphccpllctLlcpscl...-lPpuhlcpEhsph....hpphtpph.t.tGhshpph.p..t...stcphccphccpAc++V+hsLllsclucpcplplo---lppplpchAp.pYs..h.pspplhphhtpptpht....tlcsslhccKsl-hllp ............................................................ol-cL+tcl+cplp.cph.cpt.hc.sp...h+ppsl-tllcs.s.cl....-lPpuh...lcpElcp....h..............hppht........pph...t...G...h......s....cp..........t..........st-p.....h.+c.phcc....p.Ac+RV+hs.Ll.....L.s....tl....s..cs..cp..l..c..ss-.--...lpp.t.......l.p......c.hAp....pYs...........ss..cp.......lh......p...h...h........pptph......tl+pslhpc+sl-hlh.t.................................................... 1 347 658 841 +5527 PF05699 Dimer_Tnp_hAT hATC; hAT family C-terminal dimerisation region Albrecht M, Bateman A anon Albrecht M Domain This dimerisation region is found at the C terminus of the transposases of elements belonging to the Activator superfamily (hAT element superfamily). The isolated dimerisation region forms extremely stable dimers in vitro [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.63 0.72 -4.36 49 2390 2009-09-11 20:57:40 2003-04-07 12:59:11 9 113 188 1 1830 2467 15 79.20 19 15.44 CHANGED -LcpYhp-.shhp.p.........hclLpaWp...tppscaPhLophApclLulPloosuu..EpsFSss..s+hlsct+splp.pslpuLlshcphl .....................................................................................s.h.t..aWt........tpp.tp.aPt....LtplApp....l..L.s.lPsoosss....ERsFSsh......tph.h.s..c.p.Rsplp.pphpt..lhhhp....................................... 0 435 1044 1597 +5528 PF05700 BCAS2 Breast carcinoma amplified sequence 2 (BCAS2) Moxon SJ anon Pfam-B_7922 (release 8.0) Family This family consists of several eukaryotic sequences of unknown function. The mammalian members of this family are annotated as breast carcinoma amplified sequence 2 (BCAS2) proteins [1]. BCAS2 is a putative spliceosome associated protein [2]. 21.20 21.20 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.36 0.70 -4.88 18 311 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 249 0 219 303 7 197.40 30 87.94 CHANGED hss..p............hlDuLPYlDpthp.pssh..+pt....uspLlp-Eh+...phtsstshhpplP...tsshp.h.p.slhpsEh-Rhppt.....pP.httlDh.pRYclPtPsuspts....c.ptWppslpsutsthpHpshRhpNLELLscYGssuWchhN..............cpL-shlpphp+pLtphKcplp-lNtpRKhpQppsut+LppLEppWtpLlucsh-lEhAstpLE....tpltph+ccttp...............p ................................hss...............hhDuLPYhDpth..ssth......+pt.......sttLlppEhp.............thps.s..p.s......hhs......L..Ps.......s.s...............hs.h.p.shhppEh-.Rltsp..................pP...hthlsh..pRYc...hs...sP...ssspts...............shssWpp..slppuhsplpHtshR.hpNL.-.L...hpp.aGs.s...A..Wt...hhN................ppLpph.lpphpcpLpc.....h+..cplpplNhpRKttQ.p.suscLp.tLEppWhphVu+Nh-lEhsh.pLc.......ppl.ph+pp............tt................................ 0 72 117 173 +5529 PF05701 WEMBL DUF827; Weak chloroplast movement under blue light Moxon SJ anon Pfam-B_6516 (release 8.0) Family WEMBL consists of several plant proteins required for the chloroplast avoidance response under high intensity blue light. This avoidance response consists in the relocation of chloroplasts on the anticlinal side of exposed cells. Acts in association with PMI2 to maintain the velocity of chloroplast photo-relocation movement via the regulation of cp-actin filaments [1]. Thus several member-sequences are described as "myosin heavy chain-like". 33.00 33.00 33.00 33.00 32.90 32.90 hmmbuild -o /dev/null HMM SEED 522 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -12.94 0.70 -6.04 36 300 2012-09-25 08:00:20 2003-04-07 12:59:11 6 10 31 0 200 315 0 319.10 18 60.30 CHANGED uAPFESVK-AVSKFG.Ghl........DWKtt..psh..ERRpt.hcpEL-psQcElschKcph-sAEssKspslpEL-pTK+llEELKhpLE+sppEcpQA+QDoELAclRs.cEhEpGluscsSss.....uKspLEsA+pRassAlsELcuVKpELcpl+c-assllsc+-sAhp+AcEAhsuuKcsE+pV--LThELlAsKEuLEpu+AAHhcAEEc+hssshu+-pDphsWcpELcQu-cELppLppcl..husp-LcuKLcpsoshLhsL+sELsuahp.uclsp.s..............p.....hp.t.lsus+cELEEsKtsl-KApsEsssL+ssusSL+uELEpEKusLssL.+p...+EuhAuhsluuLcsELp+scpElthsps+EpcsccthsE.lsppLQQAupEu-pA+ttAcsAp-Elp+sKEEAEpAKAussshEtRLpAshKElEAAKASEclAlsulcA.LpEpc.sstpsps.t....SssslTLolEEYtpLS++ucEuE-hAsp+VusAluplEtAK-oEs+oL-+Lcpsp+-h-p+KpALctAh .........................................................................................................................................................h.tth..h..........p.th..ph..sp..httht.tlt........................................................................................................................................................................................................................................................................................................................................................................h.....-Lpthp.t.......lpphtt-...ht...htt...lpt..cl..tp.ptthtphpp....p.....t.tt..h..tlp.ch.....thttpht.h..tp.tt......................t.h...hpphtt-hpth..p.......p.ph..h.tphp..p..htthp.ph....p......t....t..hh..................................................................................................................................................................... 0 30 112 160 +5530 PF05702 Herpes_UL49_5 Herpesvirus UL49.5 envelope/tegument protein Moxon SJ anon Pfam-B_7354 (release 8.0) Family UL49.5 protein consists of 98 amino acids with a calculated molecular mass of 10,155 Da. It contains putative signal peptide and transmembrane domains but lacks a consensus sequence for N glycosylation. UL49.5 protein is an O-glycosylated structural component of the viral envelope [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.25 0.72 -4.12 8 35 2012-10-01 21:33:21 2003-04-07 12:59:11 6 1 24 0 0 48 0 93.50 35 98.32 CHANGED M.sS.shhphshslslssllllulspusss-...................tthsltppt...cFWcuuCSA+GVsIstuouAoVlFYluLlAVllALLuhuYpACFRLFTuShhpccW ...................................................h.hsh.hhlshhlhhuhspus.ss....................t.shh.ptt..hsFWcAuCSA+GVsIstsossoVhFYluLlAVhVAlluhAY+ACFRlhTsshhppp...................... 0 0 0 0 +5531 PF05703 Auxin_canalis DUF828; Auxin canalisation Moxon SJ, Eberhardt R anon Pfam-B_7298 (release 8.0) Family This domain is frequently found at the N-terminus of proteins containing Pfam:PF08458 at the C-terminus. It is a component of the auto-regulatory loop which enables auxin canalisation by recruitment of the PIN1 auxin efflux protein to the cell membrane [1]. 22.80 22.80 23.50 23.00 22.00 22.70 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.73 0.70 -4.55 11 176 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 19 0 114 162 0 205.20 37 58.36 CHANGED PpTPp-s.MEFLSRSWSlS.AoElS+ALts...................................t.t.ppshShsuspsspllh.t...............................stssusPlsPpc.ls..cssphh+ss...............................hTlG+Wh+c+ct..........K++-csRscsAplHAAVSVAuVAAAlAAVsuussss.s....................stpsuKhssAlASAupLlAo+ClEhAEthGAD+-plsoAVpSAVsl+SsGDlhTLTAAAATALRGAAsLKsRt.KEspp....AAlhPsE+ut ....................................................................s.ps.MEFLSRoWS.S.u.p.lspsh..................................................................tt......tt......hh...................................................sh...t..h.....p...h.h................................................................holu+Wh+cpct..............++K-csRhcpAplHAAVSVAulAAAlAAlsAuss.ts.........................s.tp.ss+h.shAlASAAsLlAupC.lEhAEthGAc+-plsusVpSAVss..posuDlhTLTAuAAT........uLRGAAsL+sRh.pc.h..........Ash.Ph-c.s...................... 0 12 62 89 +5532 PF05704 Caps_synth Capsular polysaccharide synthesis protein Moxon SJ anon Pfam-B_7575 (release 8.0) Family This family consists of several capsular polysaccharide proteins. Capsular polysaccharide (CPS) is a major virulence factor in Streptococcus pneumoniae [1]. 24.50 24.50 24.50 24.50 24.40 24.30 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.88 0.70 -5.24 28 400 2012-10-03 05:28:31 2003-04-07 12:59:11 7 9 331 0 91 614 130 240.00 21 70.01 CHANGED +...........hh.h..tt..h.hppphhhphLccphpphl.pashppppp.pppp........IWhhWhQGh.-sA.PclV+pClpSl++pt..ssaplllLoccNlccYlslPchlhcKhcpGhl...shspaSDlLRlpLLspYGGlWlDATlaho..sslss.hh......cssFFsh....ppstppspshshupW.......................hhluu.ppsp.llshhcchhhpYa.pppsphhD..YFlhchhhpls.......hcp..ph.pphhch.h..hsNtp...ahLtthlpptascphapplppp.o.slaKLoYKhphsptpp....poaap+l ..............................................hhhh.....................................................p....p..tp..+........IWhhWh.Q.G...-sh..P.pllcp.C.hpShc+hh..ssaclll..ls.....c.....c.....N.....l.p..cY..l...s.....h..P..c..h...lh..p+h..p.pst..h.........sh..s....ah....SDllRlsLLtpYGGlWlD..uolhho...psl.sphhh..........ppshash..........pps.t....tt.......p..........h.h...p.....pa............................................hlsu..pps.s.....h.hp.hh.pch.h..h........t...aW..ppp.p.........Y.hh.h.phhh.hh.......hp.......................s.s.......h...h..p.......t.........h....t........phph.................................................................................................................................................................... 0 14 42 68 +5533 PF05705 DUF829 Eukaryotic protein of unknown function (DUF829) Moxon SJ anon Pfam-B_7638 (release 8.0) Family This family consists of several uncharacterised eukaryotic proteins. 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.43 0.70 -4.67 43 555 2012-10-03 11:45:05 2003-04-07 12:59:11 9 10 206 0 411 571 34 210.60 20 72.88 CHANGED slll..........................lhGWh.Gups+altKYsshYpc..tGhpslhhpssshphhh.s......ptlpsshcplhphhtspptpt...............sllhHsFSssGhhhhtshh....p...hppppthtphh..lpGhl....hDSuPutsph.htshpuhuts.hsps..............................ht.hhhhhhththhhhhhhhh.pssphhppt...hpshppssht.......ssp......LalYScsDtllshc-lEpahsptcp..pGhp.VpttpacsSsHVsHh+paP..cpYhspltcah .................................................................................llllhuWh.sup.+pltKYsph.Ypp........uhp.hlh.hpsshhphhh.t.......................................ptht.hhp...t.lhth......h..p.p.tt.....................................llhHsFSsu...Ghhhht.hhh.........................t...hp.ptp........t..p.hh..............l.....t....uhl....................hDSsP..s..ph....shtshshs....h.h.....................................................................h..hhhh.hh.hh..h.h.h....h...hh..h....h.h......s..h...h.h...tp............hp....ht........t.........................h.p......halYS..psD.lhshps...l-p.hhpttcp....................tuh....lp...a..tosHs..sH.hp..t..........p.Yhthh.ph.................................................. 0 122 232 349 +5534 PF05706 CDKN3 Cyclin-dependent kinase inhibitor 3 (CDKN3) Moxon SJ anon Pfam-B_5217 (release 8.0) Family This family consists of cyclin-dependent kinase inhibitor 3 or kinase associated phosphatase proteins from several mammalian species. The cyclin-dependent kinase (Cdk)-associated protein phosphatase (KAP) is a human dual specificity protein phosphatase that dephosphorylates Cdk2 on threonine 160 in a cyclin-dependent manner [1,2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.11 0.71 -4.80 2 187 2012-10-02 20:12:17 2003-04-07 12:59:11 7 6 136 7 85 382 124 132.60 34 62.73 CHANGED MKPP.SIQsSEFDSSDEEPl--EQTPIpISWLsLSRVNCSQFLGLCALPGCKFKDVRRNlQKDTEELKShGIQDlFVFCTRGELSKYRVPNLLDLYQQhGIlTHHHPIsDGGTPDIuSChEIMEELsTCLKN.RKTLIHCYGGLGRSCL.AACLLLYLSDoISPpQAI ............................................................................................................................................................................Rs.lppD.hppL+.s..G...hps.l.hs.hhspsELsp.....h..p..V....s..s...L.h..c...hhpp.tG..h.....h.h..a..HhPIsDs..s..s.P..-...h....s...p...t..h....p.....l.h...p..E....L...t..t.t...Lp.......s..sp....+..sl.l..HChGGhGRo.s.L...lA.ApLLl.........l.............................. 0 29 44 58 +5535 PF05707 Zot Zonular occludens toxin (Zot) Moxon SJ anon Pfam-B_3320 (release 8.0) Family This family consists of bacterial and viral proteins which are very similar to the Zonular occludens toxin (Zot). Zot is elaborated by bacteriophages present in toxigenic strains of Vibrio cholerae. Zot is a single polypeptide chain of 44.8 kDa, with the ability to reversibly alter intestinal epithelial tight junctions, allowing the passage of macromolecules through mucosal barriers [1,2] 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.55 0.71 -4.84 17 575 2012-10-05 12:31:08 2003-04-07 12:59:11 7 5 398 2 85 1421 205 183.60 20 52.20 CHANGED hlhllsGtsGuGKThhAVsh.lhs....slp...p....GRhlhT.NlstLs.lc......ph.p.....hp................................h....thht.W.ps....s..psullVlDEspp..hassRsh.st...........................................................shl.shapptRHhGhDllllTQshshlccplR.s.Lsphth+hp+t.shth.tpaphsh..shp.......thtphhp.......hphhphPKptFuhYc.Sss.ps .......................................................................................l.hhpGhsGuGKo.h...s..l...h....l.......................tht..........p............s..R....l...hs...s....lp...s.lp...hp............th..t.....................................................................................................................t....p.h..h..pa...tt............................ts.u...l...ll.lDE.s..pp..h..ass.cts.....pth...........................................................................................................................................................................................sphh..ph.h....s.pH........R..H......h.......G.......h.......Dl..h..l..h....TQ.sh...s...p....l.....c.p...l.R...p....hs...c...hthch.t..p.t....h.....h.G..h.tth.hphph.......hhpt................ptht.........tthhphspthathYp.Ssp.t....................................................................................................................................................................................................................... 1 21 49 67 +5536 PF05708 DUF830 Orthopoxvirus protein of unknown function (DUF830) Moxon SJ anon Pfam-B_5425 (release 8.0) Family This family consists of several Orthopoxvirus proteins of unknown function. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.17 0.71 -4.24 33 2265 2012-10-10 12:56:15 2003-04-07 12:59:11 7 15 1518 6 287 1396 159 163.10 23 72.02 CHANGED plppGDllFhsss.sslsptlss.......SphsHsuIhhspss.....................s.hhVlEA.hsp.....sschssLpcFlpc......psphtVhRLpsh.....ttthppshphAhphlGpsYshsa..t.....................sc+hYCochVtcsY.pshGl.hsphphhtphhh................................sPpshhcssplptlh ..............................................................................................................hpsGDl.l.Ftp...s....t......u....sh.......sttIphh.op............u...huHluIh.ls..cs........................................................................hl..h.....Eu........hss..................sV..p..hs.s.L...p..p.ahpc...........spchs.lh.R.lssh..........s...pptpc.l.......s..p....p.......u.......p..h....l..s....p..s.....Y..s.hs..h..h.hp..........................................................................................s.sp.h.a....CSchVhcsa..tth.s...h..................h............................................................h............................................................................................................ 0 118 176 241 +5537 PF05709 Sipho_tail Phage tail protein Moxon SJ, Bateman A anon Pfam-B_5084 (release 8.0) & Pfam-B_10063 (release 10.0) Family This family consists of several Siphovirus and other phage tail component proteins as well as some bacterial proteins of unknown function. 20.30 20.30 20.30 20.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.63 0.70 -5.24 71 1621 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 917 9 143 1105 17 296.70 14 91.64 CHANGED hhhlhshshtsss.....pshpphs.stsG..th.hhs...sphss+plslshhlhst.....shtc...........hpth.+ccltphl..tsccs.hhLhhssc........................................ss+hahshhssp...hsh...cpt..sshs.phslsFhs.spP.at.o...........h..............................................................................................................................................tspthshstsh.hh..................slhNtG...........sspspPh.....lclphp...ssssh.....hslhs.....................................................................................................................................................................................................................................................................................................................................hpsG-....pltls....ptssslhlsuhp.hhsthshs...........pphhtltsGtNp.lplps....ssshplph.pa+..hah ...................................................................................................................................................................................h......h.....s......ph.phs..shsG......th....hhs.......t.p.hp.sh.plslthhhts........sht.s................hp..h.hcclhphh.......stc....p....h....hlh..h.ps.p.......................................ss..hh.ah.s..h..h..s....ss...hsh......ppt......hs.s.......phslphhs....pP.athu................................................................................................................................................................................................................t.tph.th.s.tsh....................................................slh.N.s.G...........shp..s.tsh..........lclpht......tss.sh.....ht.lhp..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hpsuc.....hlhlc.........pppp..pl..h..h...s....s....h......hhsth..s..ht.............................p.h...tlts.G..Np.lphp.........s.hp.hph..pa..ha.................................................................................................................................................................................................................................................................................................................................................................................................... 0 59 105 126 +5538 PF05710 Coiled Coiled coil Yeats C anon Yeats C Family This region is found in a group of Dictyostelium discoideum proteins. It is likely to form a coiled-coil. Some of the proteins are regulated by cyclic AMP and are expressed late in development ([1]). 21.40 21.40 21.40 21.60 21.20 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.63 0.72 -3.52 10 120 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 4 0 120 121 0 84.10 32 90.54 CHANGED MTIlASISSIGNsKSoSKSsluSFuSuo.S..hGSNSIuC....GuCGGuu...hshssssssuGlt......lGsslD..LsusssossGhsuuhhG...................us.....SCGC .....MTlhuSISSlu...ss...p..o.sS...K.S.s.l..u..S.huuu.o.S.....uSN.SlAC........GuCGGuu...........ss...ssGhh........hsh.sh...lsss..thstuh..hss..t.........................tC....................................... 0 102 120 120 +5539 PF05711 TylF Macrocin-O-methyltransferase (TylF) Moxon SJ anon Pfam-B_5055 (release 8.0) Domain This family consists of bacterial macrocin O-methyltransferase (TylF) proteins. TylF is responsible for the methylation of macrocin to produce tylosin. Tylosin is a macrolide antibiotic used in veterinary medicine to treat infections caused by Gram-positive bacteria and as an animal growth promoter in the swine industry. It is produced by several Streptomyces species. As with other macrolides, the antibiotic activity of tylosin is due to the inhibition of protein biosynthesis by a mechanism that involves the binding of tylosin to the ribosome, preventing the formation of the mRNA-aminoacyl-tRNA-ribosome complex [1]. The structure of one representative sequence from this family, NovP, shows it to be an S-adenosyl-l-methionine-dependent O-methyltransferase that catalyses the penultimate step in the biosynthesis of the aminocoumarin antibiotic novobiocin. Specifically, it methylates at 4-OH of the noviose moiety, and the resultant methoxy group is important for the potency of the mature antibiotic. It is likely that the key structural features of NovP are common to the rest of the family and include: a helical 'lid' region that gates access to the co-substrate binding pocket and an active centre that contains a 3-Asp putative metal binding site. A further conserved Asp probably acts as the general base that initiates the reaction by de-protonating the 4-OH group of the noviose unit [2]. 21.10 21.10 21.10 21.10 21.00 20.80 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.66 0.70 -5.30 16 452 2012-10-10 17:06:42 2003-04-07 12:59:11 6 12 353 11 134 489 715 213.40 27 70.32 CHANGED ssssppLYLDLl++lloNhlY-D.sh....................ssshlscssacscsRspGpDWPslAHTMlGh+RLcNLQcClEpVLtDGVPGDhlETGVWRGGACIhMRuVLtAaGlpDRsVWVADSFpGhPssssssHshDpthc.LHpaNclLuls..lEpVRpNFpRYGLLDDQVRFLPGWF+DTLPsAPl-cLAVLRLDGDLYESThDALssLYPKLSPGGaVIlDDYsl.PuCRpAVcDYRscaGIs-sIpcIDtsGVYWR+o ........................................................................................t................................................................................................................................h.t..h.t.p..sl..GshlEsGV.a+G...u...u...s...h........h...h...............t...sh...l.........t...s...h...s.........p.......s........R.......p..l..al.hDoFpG.h....Ps....s.sh....p.....p........h............t........c.....h........s..........................h........p......p.....h.....s................h.......t........s.....s...........h-p..V.....p......p...........s.....h..p...p...a.s..........h...h..s.....c......p.lc.....h.l..pGhF.p-.T....L.s....s.....s.....s.....h......c....p.......l.......All+lD....s.........D.h.YcS...Th.su.LptlY..s+..l........s...G.G.........hlll...DD.Y.....s....h...........s...s.....t....p....Al.p-...a...ht..p.ht...................................................................... 0 54 98 113 +5540 PF05712 MRG MRG Moxon SJ, Mistry J, Wood V anon Pfam-B_5530 (release 8.0) Family This family consists of three different eukaryotic proteins (mortality factor 4 (MORF4/MRG15), male-specific lethal 3(MSL-3) and ESA1-associated factor 3(EAF3)). It is thought that the MRG family is involved in transcriptional regulation via histone acetylation [1][2]. It contains 2 chromo domains and a leucine zipper motif [3]. 20.50 20.50 21.50 22.70 19.10 20.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.31 0.71 -5.41 33 687 2009-01-15 18:05:59 2003-04-07 12:59:11 8 14 280 9 443 658 7 206.40 31 57.75 CHANGED ssst.cusc+ppst............p.th....hpcspl...plplPspLKphLVDDWEhlTKpp..+lltLPs.phsVspILccahpp......h...p.ppspsss..........p.p.............................hlcElls..Gl+hYFspsLu..phLLY+hER.QYtplhppp........................................................................................................................................................................sshs.SplYGA.HLLRLhlp...LPphlutos.hDppolshLhpplpphlpalspp...pchFsppsY ......................................................................................................................................t.p.sppp............t..........ppcth...........hpc.pl...plplP-pLK.hLV..D.DW-hls+pp............pLh..tL.Ps....p.hsVssIL-cYhph...............hp...ppsssss................t.....................................................hlp.Ells..G...l+...pYFshhLs..s.LLY+..aERsQYt.clhtpp................................................................................................................................................................................................................................................................................................................................................................shs.SplYGA.HL....LRLF............V+...lsthL....uho......s....hspc.sl..t.......hLhp.hlp.pFL....paLscpt..sphFstp.................................................................. 2 114 174 307 +5541 PF05713 MobC Bacterial mobilisation protein (MobC) Moxon SJ anon Pfam-B_2832 (release 8.0) Family This family consists of several bacterial MobC-like, mobilisation proteins. MobC proteins belong to the group of relaxases. Together with MobA and MobB they bind to a single cis-active site of a mobilising plasmid, the origin of transfer (oriT) region [1]. The absence of MobC has several different effects on oriT DNA. Site- and strand-specific nicking by MobA protein is severely reduced, accounting for the lower frequency of mobilisation. The localised DNA strand separation required for this nicking is less affected, but becomes more sensitive to the level of active DNA gyrase in the cell. In addition, strand separation is not efficiently extended through the region containing the nick site. These effects suggest a model in which MobC acts as a molecular wedge for the relaxosome-induced melting of oriT DNA. The effect of MobC on strand separation may be partially complemented by the helical distortion induced by supercoiling. However, MobC extends the melted region through the nick site, thus providing the single-stranded substrate required for cleavage by MobA [2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.45 0.72 -3.99 27 1793 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 994 0 198 1402 105 47.80 25 39.04 CHANGED ll.......ppLutlGsNlNQIA+phNs.....thss.cph.h.httLh.plpppLpplpc ........................hpLsplGsNlNQIA+phNp.........tth..ttt..ph..................l..tl.t.......h........................................ 0 71 131 175 +5542 PF05714 Borrelia_lipo_1 Borrelia_lipo; Borrelia burgdorferi virulent strain associated lipoprotein Moxon SJ anon Pfam-B_7866 (release 8.0) Family This family consists of several virulent strain associated lipoproteins from the Lyme disease spirochete Borrelia burgdorferi. 20.80 20.80 20.80 20.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.37 0.70 -5.23 18 520 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 34 4 27 422 1 182.80 25 78.06 CHANGED ppppscchKNsLLsD..........L+NLIEpAsp-+-KYhK+hcEEPs-Q....YGIhu..FcpL.Wstu.spEslu-s.oc+ShpYR+hsYus.LNslcss.cLKchocIlh.........hutpsptlaNlhpplGtslDpllspLYsKKDsLsKL-Is.cLc+LKNshEKlLSlKphlScMlsQLLLDYpssps.I+TDssKLcsals.pl.pQh.cKpcEu-pLKspIhoI.tsl ..................................................pp.pp...p.h.hsp...........pp....h.c.s...t.....pchcp.h.h.K.....p.h.....c.tp..ppp......................h.thtt.......hchlth.........h................h-.h..tts.....pphph++hhYos.L.s.appp.clpphtpIlp..............t..thh....t.thlh....p.h....h.....hsl..php.....l-phlt.hl...p..p..p.Ds..L.p.php.p.plcpLhpph-p.LplKpphtctlp..phl.-......Ypp..N..p...stI+s...-.pt...Ltp.ahp...............p....tp..th..........h.................................. 0 17 17 17 +5543 PF05715 zf-piccolo Zf_piccolo; Piccolo Zn-finger Yeats C anon Yeats C Domain This (predicted) Zinc finger is found in the bassoon and piccolo proteins (e.g. Swiss:Q9JKS6). There are eight conserved cysteines, suggesting that it coordinates two zinc ligands. 30.00 30.00 30.10 33.90 29.00 29.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.50 0.72 -3.96 12 212 2012-10-03 17:27:21 2003-04-07 12:59:11 8 9 36 0 101 168 0 59.80 62 3.12 CHANGED s+shCPLCpTp...luo.s.sNFNTCTpC+spVCNLCGFNPsPHLTElpEWLCLNCQhQRAL ......s.pshCPLCpTp....lu.o.p.-.s....PNaNTCTpC+spVCNLCGFNP.sPHLo.E.h.pEWLCLNCQhQRAL........ 0 4 17 41 +5544 PF05716 AKAP_110 A-kinase anchor protein 110 kDa (AKAP 110) Moxon SJ anon Pfam-B_5702 (release 8.0) Family This family consists of several mammalian protein kinase A anchoring protein 3 (PRKA3) or A-kinase anchor protein 110 kDa (AKAP 110) sequences. Agents that increase intracellular cAMP are potent stimulators of sperm motility. Anchoring inhibitor peptides, designed to disrupt the interaction of the cAMP-dependent protein kinase A (PKA) with A kinase-anchoring proteins (AKAPs), are potent inhibitors of sperm motility. PKA anchoring is a key biochemical mechanism controlling motility. AKAP110 shares compartments with both RI and RII isoforms of PKA and may function as a regulator of both motility- and head-associated functions such as capacitation and the acrosome reaction [1]. 25.00 25.00 26.70 25.90 24.80 24.10 hmmbuild -o /dev/null HMM SEED 685 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.55 0.70 -13.31 0.70 -6.54 5 156 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 35 0 76 141 0 335.10 35 35.52 CHANGED oP+KSLSKIASELVNETVoACS+NhssDKAPGSGDRs.uo.QS.PsL+apSTLKIKESsKcGpGPDsRPGSKKSFFYKEVFESRNuGDA+EGGR.hPt-RKhFRs.-RPDDFosSIScGIMTYANSVVSDMMVSIMKTLKIQVKDTTI.ATIVLKKVLLKHAKEVVSDLIDSFMKNLHNVTGoLMTDTDFVSAVKRolFSHGSQKATDIMDAML+KLYSVlhAKK.PEplRKscDKSESYSLlSMKuGsGDPKsRN..LNFAoMKSEsKlREKspocs.ssKEcTCAETLGEHIIKEGLTlWHpoQQNpsKSsuhptu.....ppQhtss.-hshthP.D.sphs..tsPpsPEKsENFMs-SDSWAKDLIVSALLLIQYHLAQ.............................GGspDA+SFlEAAuoTNhsPssSPss+DEu+L+Ss.l.hs-.EpsEKKDLpSVlFNLIRNLLSETIFKs-csCEuKs+cp.lKE-+uspCERPl.......Sssss+hsED-E.TsGAlSGLTKMsssplDGpMNGQMVDHLM-SVMKLCLIIAKSCD..AuLAELGD-KSGDASR.oSAFP-NLYECLssKGTGoA.EAlLQNAYQAIHNEhRuhSuQPPEGCthPcVIVSNHNLTDTVQNKQLQAVLQWVAASELNVPILYFAGDDEGIQEKLLQLSAsAVEKGpSVGEVLQSVLRYEKERQLDEAVGNVTRLQLLDWLMuNL ........................................................................t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p..llV.N.s.psph..tpLpAsLQWlAASphslPhlYFhtsp-s.hEK..........h.plsthstcKuapVG-lhptVhpatK...c..p.pt..tp.sphp.LhDWLh.......................... 0 4 8 28 +5545 PF05717 TnpB_IS66 Transposase_34; IS66 Orf2 like protein Moxon SJ anon Pfam-B_5707 (release 8.0) Family This protein is found in insertion sequences related to IS66. The function of these proteins is uncertain, but they are probably essential for transposition [2]. 22.10 22.10 22.30 22.30 21.80 22.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.29 0.72 -4.62 135 3276 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 1155 0 503 2167 362 95.80 41 89.63 CHANGED sss+laLsstssDhR+GhsGLuslVppslth-PauGslFlFps+..RtD+lKlLaWDusGhsLhhKRL..EcG.+Fh.W.P..sspt..slpLos..tQLphLLcGlDhppsp.t..sh .......................s..pla.lssuhsDMRpGhsGLuthVppsh.p...-P.a.S.G.p.lF.lFp.G+..+tcplK.lLahDus.G.hs.LhhKR.L..E..p.G..+Fh......W..P...sscps...hhtLo...tQlsh........LlcGlshpt.t.....hh.............................. 0 117 266 386 +5546 PF05718 Pox_int_trans Poxvirus intermediate transcription factor Moxon SJ anon Pfam-B_5843 (release 8.0) Family This family consists of several highly related Poxvirus sequences which are thought to be intermediate transcription factors. 21.40 21.40 22.30 25.20 18.70 18.70 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.39 0.70 -5.94 12 61 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 44 0 375.30 68 99.35 CHANGED MccLFpaL+sIEDcYsRTIFNFHlh+ss-lusIYshh+-+IuspshFsclV.sscl+psIKKLlYCDIplTKHIINpssYsshssssppss..KhuQaFDIphssss..hStRTs-IF-pDKSSLlSYIKTTNKKpKlDYGEIKKTlHutspo..sYFSG++SD-YLSTTV+sspspPWIKoISKRhRlDIhspuIlT+GKSSILQTIEIlasNRTCVKIFKDSThHlILSKDKsEpuClsllsKLFpsYclLFtLlaslTtsptFtphtssuscllssssF-EKlshI+pht..c.YGlpNFKlGMFNLTastsIsaTVFPSLLct.sSKIKFFKGKKLNIVALpSLc-CtpYVp.ApslLctMpcRSphLsslsItosSV-pLKpLLh ....................MDsLFoFLH.EIED+YuRTIFNFHLISsDEI.GDIYGlMKERISuEshFDNIVhscDI+sAIK.KLVYCDIpLTK.HIINpssYPlaNcoups....KpspaFDINoDsus..ISpR.TVEIFEREKSSLVSYIKTTNKKRKVsYGEIK.KTVH..GGTsu..NYFSGKKSDEYLSTTVRSshsQPWIKTISKRMRVDIINHSIVTRGKSSILQTIEIIFTNRTCVKIFKDSTMHIILSKDKcE+GClchIDKLFhVYhsLFlLhcDIhpN-hFcEVAslss+VLouTshDEKLhlIK+hA..DsYGVsNFKIGMFNLTFIKul-HTVFPSLLD-.-SKIKFFKGKKLNIVALRSLEDClpYVocSEshl-hM+ERSsILNuIDIETtSVD+LK-LL.l............... 1 0 0 0 +5547 PF05719 GPP34 Golgi phosphoprotein 3 (GPP34) Moxon SJ anon Pfam-B_7957 (release 8.0) Family This family consists of several eukaryotic GPP34 like proteins. GPP34 localises to the Golgi complex and is conserved from yeast to humans. The cytosolic-ally exposed location of GPP34 predict a role for a novel coat protein in Golgi trafficking [1]. 23.00 23.00 23.80 23.30 22.60 22.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.53 0.71 -4.25 88 612 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 392 9 331 621 2 216.50 28 82.18 CHANGED Ls--lhLLuhcsp.pGp....hhssshphu..LsuuhLhELsltGclpls..........ps+......l.tsh..sspsss......................-s..lLcps....lptlt......ptc.spp.....spsWlpt.ps............+sh.pctl...tpp...Lssc.Gllcpccp..phLshh...hh...pas.hsD...sstcpplpp+lpssl....sttss............................................sscsss.......LlsLhtus.slhpphh.ss...............................ppscptlpp...........lsptshhst.........................................................slcpt......lpphpsAs ......................................LhEEl.LLL.uLc-c...pGhh...hhsssluhu..LpGshL....l....ELuhcGRlplp...................pc+........................l..hlh.ssssTG......................-s..lLD-uLchl+............spt..s.o.........lpsWI.chhsG..............+sl.R-+l.scs....LV-+.GlLps.E+p....sh.L.h.h..shssaP..ls-...ss.h+pclhc+lpssl.....p.ssts.................................................shRshu.......LlsL.AaAu..sllcssh..ssh..p...c........................ppscphltp..........h.p.hs.............................................................................tht................................................................................. 0 118 192 272 +5548 PF05720 Dicty_CAD Cell-cell adhesion domain Yeats C anon Yeats C Family This family is based on a group of Dictyostelium discoideum proteins that are essential in early development ([1]). Swiss:P16642 and Swiss:P16643 are located on the cell surface and mediate cell-cell adhesion. 19.00 19.00 20.50 19.10 18.50 17.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.52 0.72 -4.34 6 17 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 4 0 16 20 1 76.10 30 37.11 CHANGED IhN--GESTISGKuaPLPsPhIaPsPYhhRFhcYchEGpLWsNcEFclKSGKIEasGEEaDIPpSpsshhKh.D-ptshI.l ...........l.sptGcSoIpGhshshPs.h.aPsPahhph.h.p..YphEG.s.l.asppcFclpSuKlEhsGc.ca-lPsSpssh.c..-p...................... 0 15 16 16 +5549 PF05721 PhyH Phytanoyl-CoA dioxygenase (PhyH) Moxon SJ anon Pfam-B_5670 (release 8.0) Family This family is made up of several eukaryotic phytanoyl-CoA dioxygenase (PhyH) proteins, ectoine hydroxylases and a number of bacterial deoxygenases. PhyH is a peroxisomal enzyme catalysing the first step of phytanic acid alpha-oxidation. PhyH deficiency causes Refsum's disease (RD) which is an inherited neurological syndrome biochemically characterised by the accumulation of phytanic acid in plasma and tissues [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.38 0.70 -4.30 39 3239 2012-10-10 13:59:34 2003-04-07 12:59:11 8 56 1181 23 1723 3267 6124 205.00 16 65.62 CHANGED accsGalllcs....hlss.pplsthppphppl.............................tthtsstphphhhptpttsststhh.....ts.pthtthhcs.............hhsshscphlspssh.................htshh....hh+tsphGsss...shHpDhthh........ttP....tphls....hhlAl-Dhsh-NGshhhlPGSHph..t....hh...th.tt.............................tpthlsh.hctG-sllacsplhHuotsNpost...Rpu .................................................................................................................................................................................................pppGalllps....hhs...pph.t.t.h.p....p...t.h.t.p.h..............................................................................h.....t.t........t...h.....t...h...h..h.t....t...t....h.....h.t.....t.....t.hh....................t...hh.th.hhps.................................thhshh.pplhstphh....................................................h.pshh..........hh.p...s.t....Gsts............sh.H..........p...........D...h.t.h..h...............................hps..............phls...........hhl.sl.s..-.s...s.......c..s...G..........s...........hh..h..........l.....P...G.....S......H..p....h........h.................h..t.tth.tt........................................................................t..t..t....h.h....h..t..h....ps.Gsslhapsp.hh.H..s..u.s..tN..tott...R..................................................................... 0 633 1084 1476 +5550 PF05722 Ustilago_mating Ustilago B locus mating-type protein Moxon SJ anon Pfam-B_5804 (release 8.0) Family This family consists of several Ustilago mating-type proteins. The b locus of the phytopathogenic fungus Ustilago maydis encodes a multiallelic recognition function that controls the ability of the fungus to form a dikaryon and complete the sexual stage of the life cycle. The b locus has at least 25 alleles and any combination of two different alleles, brought together by mating between haploid cells, allows the fungus to cause disease and undergo sexual development within the plant [1]. 25.00 25.00 37.50 35.50 20.10 19.60 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.85 0.70 -5.10 2 41 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 6 0 1 40 0 123.50 69 42.78 CHANGED Il+KFARpDRsRMKhLVpAKh.uSs.Ss.s.uTpsSLssNLDDlLp-NLG+.LTPsDKppFEDDWsSMISWIKYGVKEKVGDWVYDLsAAsKKo.P+sG.sRsVTTsApRpPARKTtstspsKs+pAp.RASpTPShDST...StLESTPELShCSTADsSFSoFsSshShupasPFQp..pl.QSPolpsRGsRKVKALPKRAupp.PsE...lsN....................................................GpIPFl.........sLSsAFs ....ILKKFAREDRSRMK+LVRAKLSSSNQSoPPS.................................................................................................................................................................................................................................................................................s.......... 0 1 1 1 +5552 PF05724 TPMT Thiopurine S-methyltransferase (TPMT) Moxon SJ anon Pfam-B_5821 (release 8.0) Family This family consists of thiopurine S-methyltransferase proteins from both eukaryotes and prokaryotes. Thiopurine S-methyltransferase (TPMT) is a cytosolic enzyme that catalyses S-methylation of aromatic and heterocyclic sulfhydryl compounds, including anticancer and immunosuppressive thiopurines [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.50 0.70 -4.90 12 1046 2012-10-10 17:06:42 2003-04-07 12:59:11 6 7 838 11 423 2913 640 189.50 24 87.81 CHANGED hstshWpppWtssphsacQppsssLLh+ahss..ps.sshRsLVPhCGpuhDhhWhAspt+.VlGl-lS-hAlpchhp-ts.....pPp..hpphssh+c.............DhFshpsppl..hDhlaDRushsAlsPchRspYupphhpLL.ssuc..hhLlTLpYs.scts.GPPF.Vst.....tEhctlhs.sshcltplcp.tDsLss..chtt.slpthtEplahl.p+p .........................................................................................thWp.pp...a...t...p...s...p.....h...s...a...c......t....p..........s.......s.....s....h......L...h....p....a.......h...............p...........p.....................t...........................t..........t................t.....t......c....lLl.P........h.CGc.u..h..D..h..h..a.L....A..s...p...G...........a..p.V.hGl-l..S..p..h..A..l...p...p...h...h...p.cp.th..................p.p.....ht..p..h......s......s..h...pt...................t....t..l...p..h..h..t.u..Da....F......s.......h...s..........s....t......t......h......t.....t.....h..chl.a.D..p.s...h....h....s..A..L......s.........p.....h.....R....t.....p.......Y..s......ppht....pl..ltssup.............h.L..l.....s........h.....t...a.......s........t.....s....p.......h...t.....G...P..P...a.s.l.s..t............t-l.pth...hs....h...h...p.l....h.................................t.................................................................................................................................. 0 151 249 342 +5553 PF05725 FNIP FNIP Repeat Yeats C anon Yeats C Repeat This repeat is approximately 22 residues long and is only found in Dictyostelium discoideum. It appears to be related to Pfam:PF00560 (personal obs:C Yeats). The alignment consists of two tandem repeats. It is termed the FNIP repeat after the pattern of conserved residues. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.03 0.72 -4.00 34 6356 2012-10-02 21:32:02 2003-04-07 12:59:11 7 134 22 0 5165 6328 244 42.60 32 40.91 CHANGED FN.Qs.lphsslP.............................pulppLph.ussFsp.lp..hssLPpSlppLphupp ..............................................................aN..p....l..t.......s...s...l....P...................................................................s..o..l......p...pL.pF.....G....p.......p....F....N...Q....s..lp.............s...s....l...P..s...S..lp.p.Lphu................... 0 2305 5161 5165 +5554 PF05726 Pirin_C Pirin C-terminal cupin domain Yeats C anon Yeats C Domain This region is found the C-terminal half of the Pirin protein. 21.30 21.30 21.30 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.24 0.72 -3.95 34 2522 2012-10-10 13:59:34 2003-04-07 12:59:11 8 12 1617 3 1004 2315 1370 103.60 28 34.49 CHANGED alDlpLpsGuchphs.hspsapshlYll...pGs.s...........plsupp.........lstpplslhu...sGsplhlpus...pss+....hlLluGcPlsEPlhhaGPFVMsop-EIcpAhp-acpG..+Fs ................................................................hDlplp.s.Gu.p..hsls....h.s...p....s....a.....p.shlYll...cGs..l.........................................pl..supp.......................lp.ss..p.hslhs.............sGs.p...l...p...l.p....As.............psuc..............llLl..u.G..c..Pl.sEPls.ha..GPFV.Msoc-EIppAhpDapsG.ch........................... 0 307 618 837 +5555 PF05727 UPF0228 Uncharacterised protein family (UPF0228) Bateman A anon SWISS-PROT Family This small family of proteins is currently restricted Methanosarcina species. Members of this family are about 200 residues in length, except for Swiss:Q8TMK1 that has two copies of this region. Although the function of this region is unknown the pattern of conservation suggests that this may be an enzyme, including multiple conserved aspartate and glutamate residues (Bateman A. pers. obs.). The most conserved motif in these proteins is NEL/MEXNE/D, where X can be any amino acid, which is found at the C-terminus of these proteins. 25.00 25.00 30.80 29.90 21.10 21.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.83 0.71 -4.39 13 26 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 5 0 26 25 0 120.50 39 64.74 CHANGED MThNYoI-Ysoshhst+YYIhlDKDch.DltcE.LpK-......csWT.s....sI+KGDYYlIsVSEQAIcDcsFLslLcK.NlQVKKhVaChIpF..sDGS.......+s.WIPEcDAlRIKNELEpNEsVhoVthDYlhs .................hshNYsI-YNsshhsscYYIhVDcDch.slhpE...LpKs......csWh.s........lKKGcY.YlI.lSEpslpDcsF............LslLcKpsLQlKKhVhChIpF..ucGo.......cs.hIsEcDAl+IKNELEpNEpllhVth-.l..s........ 0 9 13 13 +5556 PF05728 UPF0227 Uncharacterised protein family (UPF0227) Bateman A anon SWISS-PROT Family Despite being classed as uncharacterised proteins, the members of this family are almost certainly enzymes that are distantly related to the Pfam:PF00561. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.98 0.71 -4.57 9 1870 2012-10-03 11:45:05 2003-04-07 12:59:11 7 4 1161 0 305 1448 549 179.10 38 93.88 CHANGED hllYLHGFsS.SPtsHct.l.hpahspcshhlshss.p.tcp.pphlpclcphltphsscp...shlsGsuLGGYaAphlu.hpGl+p..VlhNPsltPhcshsshlsc............Epa+.hphKslpsht.............hcp.s+hhllhu+tDElLD...pcssuchpshhphVh-ss.sHtFpshucalpplhuFts ............................................................................llYL.HGF...sS..SP..........t......s.........t....c..........t.....s..l....h....h......p...a........lst...c.h................l........p...h.............s............p........................................p....c...h.........h....c..............l.........-.....p....h......l....t.....p...p......s.s..cp................hllGsSLGG...YaA.......p.h....l....u..........h.....h.......s.....l...tt..........V..l....l.............N...P..u...l..p...P...a......E......h...p....s...h........l..s...p....................................................................................E.......p..h..p......h..s.....h....K..s.....h.ph.......................................................cs.....D.phhllhppsD......E......l......LD.......pps...s...t.....h.....h.....t.....s.....h.....h.....p.........h.....V...h....-ss..sH.tF...p..s.hp.s...ahp.p.IhsFh.s.............................................................................................................................................................. 0 55 142 229 +5557 PF05729 NACHT NACHT domain Bateman A anon [1] Domain This NTPase domain is found in apoptosis proteins as well as those involved in MHC transcription activation [1]. This family is closely related to Pfam:PF00931. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.90 0.71 -4.59 26 5874 2012-10-05 12:31:09 2003-04-07 12:59:11 7 749 661 0 4173 9193 904 159.70 19 16.53 CHANGED cslllpGpAG.GKTsLhpplshtWApuph.p......hphlFahss+phspts.......uLs-lltsphspsssshsc..............h...lhphspRlL..............hllDGhDElts..s...........p.t...hLLtsLlpcplLspsplllTsRssuhtp...ltptlpps...............hhpltuFspsptpphh.pcaassp .....................................................................................hlhlpG.sGhGKo.s....l....s.........p.........p.........h..........h.........h.........p..........h........s........p........s......p........................pt................h..th.....h.....a...h...h..s...h..p....p...h..s..t......p.t.......................sl.t.p....h..l......h.....p........t......h....s....p....h....t....t......h.tp............................................................................h...h.....p.....p...............t.....p.....l..l....................................................................h..l.l..D.G...l..DE...hp...t..p......................................................................h...h..l...t..........p...l.......l........p......t..................h....s.....p....s........p......l....h.......l...o.......o...R..s..t...t..h..............h..t.....h....................................h.l.........h............t...............t................................................................................................................................................................... 0 1337 2441 3535 +5558 PF05730 CFEM CFEM domain Bateman A anon [1] Domain This fungal specific cysteine rich domain is found in some proteins with proposed roles in fungal pathogenesis [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.52 0.72 -4.15 114 969 2009-01-15 18:05:59 2003-04-07 12:59:11 6 17 154 0 757 948 0 66.30 25 23.45 CHANGED thsslPsCAhsChsp.sh......ss..Cs...hsDhs....ChC...stsshtsslssC.....lt..ssC.s.ss.......ps..............ssh....................shss.shCu ..................t..stlPsC.u......h..s.Chtp.sh....s......ss...Cs........hsDhs........Cl..C........sps....s....ht...s......s...ltsC.......lt.....ss..Cs..sp.......-t...........................ssh....................shsp.shCs............................ 0 152 421 636 +5559 PF05731 TROVE TROVE domain Bateman A anon Bateman A Domain This presumed domain is found in TEP1 and Ro60 proteins, that are RNA-binding components of Telomerase, Ro and Vault RNPs. This domain has been named TROVE, (after Telomerase, Ro and Vault). This domain is probably RNA-binding. 21.30 21.30 28.20 28.00 20.30 19.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.09 0.70 -5.70 9 567 2009-01-15 18:05:59 2003-04-07 12:59:11 6 42 303 6 249 524 40 237.30 18 39.07 CHANGED lpsspGshsapssspscltp........h....LshuspssoaYtstpc.spsphpcll+tltpt-sphllphhhhhppctphtchsshlhAlAlhtp.s.....+...................tshchhscVs+hPocLhphlpa..................ph..tsspptsshs+slR+ulschasphs....thtLsKY...c.pRsuhph+DlhRhsH.cssss................thsthhpahhct........................h.h...h.pphhptpssptlphLpua.......p.....t.hpchhh........................................shE+.los+ls............stpVWcsLlps..hPhhAhLRNLsslscsGV.spsps....hVhpRLsctctlc+SRhaPachLsAhcshspucup.+sp..............................................................hpW.ss.plhp....ALEpAhthu.hcNlsPhss .......................................................h...................................................................h..hht..h.......p.s..h..hp....h.hpt.t..h...h.p...hhhs...hh............................t...................................................................hh..hh.thhp.sp.hhthhth................................................................................................................hst..pthh.phh.p.h.....t.thhph.......p..tt.t.p.tc.....p..t.......................................................hh......h.........................................................................................................................................................................................................................................................Wttlh.p...hsh.th.hhpLtph.t.s..h..t....................lhthltp...t........lhpuh.hPhphh.A.p.h..t........................................................................................................................................................lt.uh.hs....h.............................................................................................................................................................................................. 0 97 149 203 +5560 PF05732 RepL Firmicute plasmid replication protein (RepL) Moxon SJ anon Pfam-B_5929 (release 8.0) Family This family consists of Firmicute RepL proteins which are involved in plasmid replication. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.05 0.71 -5.03 4 349 2012-10-04 14:01:12 2003-04-07 12:59:11 6 2 297 0 26 301 50 118.70 27 83.06 CHANGED hppRhtshhpGoEpaINpsoGElhEhphlhhcppshNFsKsalppllphLDLlGNpKs+ls.allcNLN.psNslItTsRclAccTGhSLpTVppThKhLp-uNhlK+ps.GVhhlNP-lLh+Gscpc+h.lLlpFpph-pEspchp.........................................EsuLhpht.hK .....................................................................................................................................................................h.-..l......l..G.....p..K.....c......h..cllp.a..lh..-.s...h-...ps....s.........h....l.....h.....h...s...h.s......-..l......p...cc..l.....s......h..S.+.sT.lhpshKhLc-pp.ll...p.....+.h......p...........p...........G......h.........Yh...lN........................................................................................................ht.................................................. 1 7 19 23 +5561 PF05733 Tenui_N Tenuivirus_N; Tenuivirus/Phlebovirus nucleocapsid protein Moxon SJ, Bateman A anon Pfam-B_5998 (release 8.0) & Pfam-B_19756 (release 10.0) Family This family consists of several Tenuivirus and Phlebovirus nucleocapsid proteins [1,2]. These are ssRNA viruses. 25.00 25.00 25.10 26.50 18.10 24.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.34 0.70 -5.07 12 395 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 94 10 3 299 0 203.80 40 85.00 CHANGED -hpchh.chuspAlc.....csplhsalsphsYpGaDAt+ll.pl..Lpc+u...Gcsat+DlphhIVhphTRG.....sh.cKh...hsstGucpsppLls+Yslhp..Gssu.DsITLuRlAuhFsshohpslpslo..s+LsVsuoshssh.uhsh.hh.hhPpFhsL.ssohP....cshtphlhshHhLahh.ho+ph.ssh+tcp+s.....-llchhsp.hshshsup.lsspc....R+clltsFtl ..................phpchhs-hupptls.....hs-l.sastphtYtGaDstplI.tl...LK-pG.........G-shsK.DhphhIlhthsRG.....shhpKh...hsstGuppstsLls+YGlhpp..Gss.uhssITLuRlApshs.shoppssttlp..thhsVsusshshh.ups.shh.hhPpFhtL.s.ohs....cuhs+hLhssH.la.hphoKph.s.ch+ttp+s.....-hhp.hpt.hhhsspSt.h.pcp....++.h...h...................... 0 3 3 3 +5562 PF05734 DUF832 Herpesvirus protein of unknown function (DUF832) Moxon SJ anon Pfam-B_7683 (release 8.0) Family This family consists of several herpesvirus proteins of unknown function. 18.80 18.80 21.90 72.40 16.90 16.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.36 0.70 -5.15 13 32 2009-09-11 10:40:49 2003-04-07 12:59:11 6 1 24 0 0 29 0 222.20 27 50.08 CHANGED MphslPVhGlspppp....pcWppllssFtscsssspsls.L++hFc.......tcschuhLuSLllLhphltsspptpp+ts.Llpsh.ts+hlApplacclhspps.pps.h-phFt-C+sRLtLLLEpsCGChpChpsscuLpcsp.thhR.P+LpPHpppstutshLsplaNpslLssssslscapltsLh.sscphsshs.chpsEsshlusClhhCWLaalLppalps-lpslcpsl ..hp.slslhhlspcpp....ppapplhssFts..psssspslt.l+phap........pschuhLuuLlhLhphl.sstphtp+.s.Ll.shptshalspplah+lhspps.tps.hcphatcstsRLthllEpuCGC.pChpssctLpphp.thhcPP+lpPHpcpC.utshLstlhppslLssshslStasls-Lh.ssppa.thsssh...phEhshlsoCL.hCWlahllhcalpp-hthLcpsl. 0 0 0 0 +5563 PF05735 TSP_C TSPC; Thrombospondin C-terminal region Bateman A anon Pfam-B_1875 (release 8.0) Family This region is found at the C-terminus of thrombospondin and related proteins. 25.00 25.00 25.40 26.50 18.50 23.90 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.24 0.71 -4.75 10 444 2009-01-15 18:05:59 2003-04-07 12:59:11 7 85 95 6 222 377 2 188.30 65 21.57 CHANGED QsDPsWVVhspGsEIhQslNSDPGLAVGccsasuVDF-GTFFIsssoDDDYlGFlFuYQssu+FYlVMWKKusQoYWpspPFcAsApsGlpLKlVcSsTGPGptLRNALW+TGsTssQV+LLW+DPhslGWKc+TAYRWpLpHRPsI.GhIRlphY-Gs+LlhDSGNIaDsTl+GGRLGVFCFSQEpIIWSNLpY+CNDsl.P .................QIDPNWVVh.sQGhElVQTh.N...S.D.PGLA.VGa................stFsuV.DFpGTFaV.....N..T.s..p...DDDYAGFlFGYQ......s......S......SpFYVVMWKQ....s.pQTYWpssPhRAhu.sG.lp.....LKs.VpSs..TGPG..EaLR.NALW.HTGsTss.....QVRhLW..+.D.P.+Nl..GWKDhTuYRWpLpH.RPps.GaI..R....VhhY.EGpplhAD..SGslhDp..T..htGG.RLGlFsFSQE.lhaSsL+YcCp-.............................................................................................. 0 43 61 129 +5564 PF05736 OprF OprF_membrane; OprF membrane domain Bateman A anon Pfam-B_4079 (release 8.0) Domain This domain represents the presumed membrane spanning region of the OprF proteins. This region is involved in channel formation and is thought to form an 8-stranded beta-barrel [3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.14 0.71 -4.76 4 245 2012-10-03 17:14:37 2003-04-07 12:59:11 6 2 226 0 16 668 18 163.40 62 55.34 CHANGED MKLKNTLGhAIGSLlAATSFGALAQGQGAVEhEsFhKKpasDSV+phcs.G...GuSlGYFLTDDVpLsLoYDcsppsRusDsTGspKltGspouLcA.YHFGssG.DuLRPYVpuGhuHQSlsNl.ssGpsGRsQSThAssGAGsKaYFT-NhYARAGVEApYsLDpGch....-auAhVGlGVNFGGuu ......................................................................................TLGlsI.G.o.llAA....T..ShG..A..LA......Q...G.....Q.....G..A...V..E.hE....h....F...h....K..K.....a...D.....S...s.....+.....s........h.....c........s........G..........Gu...u.lGY.....FLTD....D....V..pL..t..L......u.....Y.s..c....s....H....s...s.......R...u.........D....s........s..t....s....p.....p.....I....K..G..s.s...T.....uL....DA..h..YH......F.ss......P.G...DsLRP.Y..VS.A..G.....F..u......a.....Q..S..l.......s....................ps....u.....+........s..G.....R...........D...p....S..T..F..........A......N.......l.......G....u..GsK.aY.FT-NFY..ARAGV.....E.A.pY..N...I.DpGcs...........E.W.uP..uVGlGVNFGGu.u.................................................................................... 0 2 4 11 +5565 PF05737 Collagen_bind Collagen binding domain Bateman A anon Pfam-B_5000 (release 8.0) Domain The domain fold is a jelly-roll, composed of two antiparallel beta-sheets and two short alpha-helices [1]. A groove on beta-sheet I exhibited the best surface complementarity to the collagen. This site partially overlaps with the peptide sequence previously shown to be critical for collagen binding. Recombinant proteins containing single amino acid mutations designed to disrupt the surface of the putative binding site exhibited significantly lower affinities for collagen. 20.80 20.80 20.80 20.90 20.60 20.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.58 0.71 -3.99 15 1660 2012-10-02 17:35:21 2003-04-07 12:59:11 7 115 482 8 145 1485 0 134.50 19 22.34 CHANGED ssshaKpGhhDsssss...l+WplslNtshpsl..csAslpDhlssGQsLshsSlclhchshptstpssths............ptssstshtpsssssFplshss...slssuhpIsYpTpIT-tstp......papNpA.....pLssssl-shpss .............................t...h.Kh.G....hs.....spssp.....lpWp.lslN.......t..s..pppl....pss.slsD.s......h.s......s........s........p.......p....l....s..............s...S.l..p.......l.....hp.s..s.....hsts..t.sh.s.....ht....................s..t...t.hs...s.t.s.s.s.s.....s.a.s.lpatp...........thspuahlpYpT..pl...s...st...s..p................ph.pNpu......ph..st.p........s........................................................................................... 1 80 110 128 +5566 PF05738 Cna_B Cna protein B-type domain Bateman A anon Pfam-B_366 (release 8.0) Family This domain is found in Staphylococcus aureus collagen-binding surface protein. However, this region does not mediate collagen binding, the Pfam:PF05737 region carries out that function. The structure of the repetitive B-region has been solved [1] and forms a beta sandwich structure. It is thought that this region forms a stalk in Staphylococcus aureus collagen-binding protein that presents the ligand binding domain away from the bacterial cell surface. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.39 0.72 -4.21 135 13573 2012-10-02 19:08:27 2003-04-07 12:59:11 8 388 1204 80 1267 12494 454 73.40 23 22.10 CHANGED lpGsphpLhspssssht.......................hsTsssGphphssL......ssGs.YpltEsp.sPsGYphsssshthsht.spt.......ttlp ..........................................................................................ltGspa..pl..h...s..p...s...s.ph.htp..............................................................................hs.T.....s...p..s.....G.....p.....h..p...h...s.....s..L..............................tsG..p...Yplp..E.....s......p...u....P.....s.........G..Y.p.h...s...p...s.s.h...t.h.phtttt..........thh............................................... 0 652 998 1144 +5567 PF05739 SNARE SNARE domain Moxon SJ anon Pfam-B_6285 (release 8.0) Family Most if not all vesicular membrane fusion events in eukaryotic cells are believed to be mediated by a conserved fusion machinery, the SNARE [soluble N-ethylmaleimide-sensitive factor (NSF) attachment protein (SNAP) receptors] machinery. The SNARE domain is thought to act as a protein-protein interaction module in the assembly of a SNARE protein complex [1]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.79 0.72 -4.28 105 4036 2012-10-03 05:55:03 2003-04-07 12:59:11 14 51 377 62 2609 3820 24 61.80 22 22.43 CHANGED cpccslpplppslt-Lcplhh-lus.lcpQschlDcI-ssl-pspsplcpuppc..l..pcshphpp ...................ppcpplpplppsltcLpplhh-lup.l..c.p.Qs.c......h.l..Dc.........I-psl-pspsplcpuppc..l...p+shp................. 0 824 1372 2046 +5569 PF05741 zf-nanos Nanos; Nanos RNA binding domain Moxon SJ anon Pfam-B_5908 (release 8.0) Family This family consists of several conserved novel zinc finger domains found in the eukaryotic proteins Nanos and Xcat-2. In Drosophila melanogaster, Nanos functions as a localised determinant of posterior pattern. Nanos RNA is localised to the posterior pole of the maturing egg cell and encodes a protein that emanates from this localised source. Nanos acts as a translational repressor and thereby establishes a gradient of the morphogen Hunchback [1]. Xcat-2 is found in the vegetal cortical region and is inherited by the vegetal blasomeres during development, and is degraded very early in development. The localised and maternally restricted expression of Xcat-2 RNA suggests a role for its protein in setting up regional differences in gene expression that occur early in development [2]. 21.30 21.30 21.40 23.40 20.90 18.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.26 0.72 -3.99 36 305 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 133 4 168 260 18 55.30 54 25.74 CHANGED CsFC+N.............NGEspplYpSHtL...............KDtcG+...VhCPlL.RsYsCPlCGAsGDpA....HTlKYCPhs ....CsFC+p...............NGEuctlYsSHtL...............Ks...cGp...VhCPlL.Rp.Y.sCPlCGAo.G-pA....HTlKYCPh.t... 0 34 47 129 +5570 PF05742 NRDE DUF833; NRDE protein Moxon SJ, Eberhardt R anon Pfam-B_6481 (release 8.0) Family In eukaryotes this family is predicted to play a role in protein secretion and Golgi organisation [1]. In plants this family includes Swiss:A9X6Y0, which is involved in water permeability in the cuticles of fruit [2]. Swiss:P54797 has been found to be expressed during early embryogenesis in mice [3]. This protein contains a conserved NRDE motif. 20.70 20.70 20.80 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.92 0.70 -5.09 12 889 2010-07-12 16:41:27 2003-04-07 12:59:11 7 4 729 0 464 881 260 231.70 27 87.70 CHANGED MCllhhtapsps....pacLllAuNRDEaasRPoh.ht.Wt.sss.plLuGhDlcp.......uGTWlGlspcG...+huslsNlpps.cp..st..hSRGtLV.s-aLsu.ssss...tpahcpLt...ppupcasG......FNLlhs-hp............clphhoNpsst..h...tLssGhashSNu..cssW.Ksptu+phhtchlt..pssp...ptl.hppLhplhssp...........thhsDspl.ppus.h.php+..h..Louhalc....stpYGTRusTllhVcpcu.csphhERphtspss..........hppschph ............................................................MCllhh...thpsps............th.LllhuNRDEa.....hpR.P..o.t...s..h........t.h.W...............t.......s..............s..............s....p.......l..luGp.Dhps.............GGTWlGl.s..p.p..G.................+hAsLTN..............hp..p......s....t....t...........................s...s................t.SR.GtLl.t.....pa.....L.......p......u....s.ss......................pa.h.pplt.........t.p..s...p.t.....Y..s..G..............................FNLlhu-hp............................t...hhh.....hs.....s.p.st.........t...............................................................l......s.............sG.........h.a..........s..lSN.uh................h.........s.....s.....s........W....Kh.h.t............tpthhtph.lt.............tt..tt........................pthh..thht.sp..........................................................hss....t..t.l.....pp.uh.......thtp...............h.u.s..hh.....lt.................t...YGT.Rspohlh...lp.tp......t....ph.hhEt...h........................h........................................................................................................................................... 1 145 263 376 +5571 PF05743 UEV Tsg101; UEV domain Moxon SJ, Bateman A anon Pfam-B_6022 (release 8.0) Domain This family includes the eukaryotic tumour susceptibility gene 101 protein (TSG101). Altered transcripts of this gene have been detected in sporadic breast cancers and many other human malignancies. However, the involvement of this gene in neoplastic transformation and tumorigenesis is still elusive. TSG101 is required for normal cell function of embryonic and adult tissues but that this gene is not a tumour suppressor for sporadic forms of breast cancer [1]. This family is related to the ubiquitin conjugating enzymes. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.49 0.71 -4.43 23 474 2012-10-02 15:28:41 2003-04-07 12:59:11 8 17 278 17 281 627 11 113.90 38 28.07 CHANGED shc-lhsllptap....sLpspscsasa.sDGpsppLLsLpGTIPlpapGss.YNlPlhlWl.csYPh......ssPlsalcPTssM......sI+hsc.aVDspGcl.aLPYLppWsh...sSs.....LlsLlpphts....sFsccsPl ..........................................................hpchhphht.a.....sLp.h.hc..sasa..scGp..sp..Lls..LsG.T.I.PV.ac.Gss...YsIPl..slWl.csYPh...........................sPP.ls.a.V.p...PTssM........hI+...suc...aVDs....s.G.+l...YlPYL..p..p..Wpp..........spSs..............LlsLlphhhs....hFuc-PPl................................................. 0 94 152 221 +5572 PF05744 Benyvirus_P25 Benyvirus P25/P26 protein Moxon SJ anon Pfam-B_6153 (release 8.0) Family This family consists of P25 and P26 proteins from the beet necrotic yellow vein viruses. 25.00 25.00 29.20 133.60 20.80 20.70 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.62 0.70 -5.03 3 211 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 182 0 213.60 72 99.80 CHANGED DlDphMPVFDlAY.DssHtPYll+RosHEVVhsVussGFICYPL.V..DLNDssslsshlYHN+lKTMRLpVsIpNNcusassFRAhsRFlVFSTPslSsWVNNGCpSLFSPFVGVNSsIDcpLL+RDS+GloVLaDRVY+VsRaT-hF.sVDFThNFRGPGNYsLsNusNaPsATTsDSIYVACVssWlsNsVFRLpSDSVuWVHSGLapGPVL-FGQsL.sAPDcD.DGVsDDG ....MGDILGAVYDLG.....HRPYLARRTVYEDRLILSTHGNICRAI....NLLTHDNRToLVYHNNTKRIRFRGLLCAhHsPYCGFRALCRVMLCSLPRLCDIPINGSRDFVADPTRLDSSVNELLV...SNGLVIHYDRVHcVPlHTDGFEVVDFTTVFRGPGNFLLPNATNFPRPTTTDQVYMVCLVNTV.NCVLRFESELTVWVHSGLYsGDVLDVDNNVIQAP....DGVDDs..... 0 0 0 0 +5573 PF05745 CRPA Chlamydia 15 kDa cysteine-rich outer membrane protein (CRPA) Moxon SJ anon Pfam-B_6389 (release 8.0) Family This family consists of several Chlamydia 15 kDa cysteine-rich outer membrane proteins which are associated with differentiation of reticulate bodies (RBs) into elementary bodies (EBs) [1]. 23.60 23.60 144.20 143.90 23.50 23.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.94 0.71 -4.41 5 37 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 35 0 4 18 0 131.10 62 84.93 CHANGED MS........ststsu..psllslVpshsup.ulspslpsposslTLlN.llGWs+h+hlpPlRsSKIlpSRAFQITLlVLGIlLVIAGLALhFlLpuQLGsNAFaL.lIPAlIGLVKLLlTSLsM.EcsCTPEKW+LCK+lLtToEDILDDGplNNSNKIF .....................M...........STVPVVQGAGSS.NSAQDISsposPLTLKp..............RISNLLSSTAFKVGLVVlGLLLVIA..sLlFLVSAASFVNAIYL.uIPAIlGCVNICVGILSM.EGaCSPERWhLCKKlLKTSEDIIDDGQINNSNKVF... 0 1 1 3 +5574 PF05746 DALR_1 tRNA-synt_1d_C; DALR anticodon binding domain Bateman A anon Pfam-B_196 (release 8.0) Domain This all alpha helical domain is the anticodon binding domain in Arginyl and glycyl tRNA synthetase. This domain is known as the DALR domain after characteristic conserved amino acids [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.55 0.71 -4.14 104 7484 2012-10-02 19:03:26 2003-04-07 12:59:11 10 22 4857 7 2023 5803 2941 115.90 26 19.29 CHANGED lQYAaARlpSlh+...+uspth.phttst.hh..............pc.pEhpLhtpLhpFscslppuscp...........hpPchlspYLhcLAstFspFY...p........ps....lls............scpp.h...........puRLtLspsstpsLcpuLpLLGlps.-+M .......................................................hpYAasRlpuIl+..........Kus..t..p..h......t.h...t.s..ss.hh................................pcs..pEhpL.h.ppltp..asc..hlp..ssspp................................tpPpt...lspY..Lh.c.Lussas.pFY..s...............ps..........hlls......................p-.s..p..t....................psRLs...Lhptstpslp..puh.t.h.l...Glp.s.ppM................................... 0 661 1256 1662 +5576 PF05748 Rubella_E1 Rubella membrane glycoprotein E1 Moxon SJ anon Pfam-B_6726 (release 8.0) Family Rubella virus (RV), the sole member of the genus Rubivirus within the family Togaviridae, is a small enveloped, positive strand RNA virus. The nucleocapsid consists of 40S genomic RNA and a single species of capsid protein which is enveloped within a host-derived lipid bilayer containing two viral glycoproteins, E1 (58 kDa) and E2 (42-46 kDa). In virus infected cells, RV matures by budding either at the plasma membrane, or at the internal membranes depending on the cell type and enters adjacent uninfected cells by a membrane fusion process in the endosome, directed by E1-E2 heterodimers. The heterodimer formation is crucial for E1 transport out of the endoplasmic reticulum to the Golgi and plasma membrane. In RV E1, a cysteine at position 82 is crucial for the E1-E2 heterodimer formation and cell surface expression of the two proteins. The E1 has been shown to be a type 1 membrane protein, rich in cysteine residues with extensive intramolecular disulfide bonds [1]. 25.00 25.00 89.80 89.70 18.90 16.00 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.81 0.70 -5.89 2 838 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 11 0 0 460 0 312.50 94 88.37 CHANGED LTAVVLQGYNPPAYGEEAFTYLCTAPGCATQsPVPVRLAGVRFESKIVDGGCFAPWDLEATGACICEIPTDVSCEGLGAWVPAAPCARIWNGTQRACThWAVNAYSSGGYAQLASYFNPGGSYYKQYHPTAC-VEPAFGHSDAACWGFPTDTVMSVFALASYVQHPcKTVRVKFHTETRTVWQLSVAGVSCNVTTEHPFCNTPHGQLEVQVPPDPGDLVEYIMNYTGNQQSRWGLGSPNCHGPDWASPVCQRHSPDCSRLVGATPERPRLRLVDADDPLLRTAPGPGEVWVTPVIGSQARKCGLHIRAGPYGHATVEMPEWIHAHTTSDPWHPPGPLGLKFKTVRPVALPRsLAPPRNVRVTGCYQCGTPALVEGLAPGGGNCHLTVNGEDlGAhPPGKFVTAALLNTPPPYQVSCGGESDRAoARVIDPAAQSFTGVVYGTHTTAVSETRQTWAEWAAAHWWQLTLGAICAL.LAGLLACCAKCLYYLRGAIAPR ..............................................................................................................................................................................FHTETRTVWQLSVAGVSCNVTTEHPFCNTPHGQLEVQVPPDPGDLVEYIMNYTGNQQSRWGLGSPNCHGPDWASPVCQRHSPDCSRLVGATPERPRLRLVDADDPLLRTAPGPGEVWVTPVIGSQARKCGLHIRAGPYGHATVEMPEWIHAHTTSDPWHPPGPLGLKFKTV.RPVsLPRALAPPRNVRVTGCYQCGTPA.LV...EGLAPGGGNCHLTVNGEDVGAFPPGKFVTAALLNTPPPYQVSCGGE.................................................................................. 0 0 0 0 +5577 PF05749 Rubella_E2 Rubella membrane glycoprotein E2 Moxon SJ anon Pfam-B_6726 (release 8.0) Family Rubella virus (RV), the sole member of the genus Rubivirus within the family Togaviridae, is a small enveloped, positive strand RNA virus. The nucleocapsid consists of 40S genomic RNA and a single species of capsid protein which is enveloped within a host-derived lipid bilayer containing two viral glycoproteins, E1 (58 kDa) and E2 (42-46 kDa). In virus infected cells, RV matures by budding either at the plasma membrane, or at the internal membranes depending on the cell type and enters adjacent uninfected cells by a membrane fusion process in the endosome, directed by E1-E2 heterodimers. The heterodimer formation is crucial for E1 transport out of the endoplasmic reticulum to the Golgi and plasma membrane. In RV E1, a cysteine at position 82 is crucial for the E1-E2 heterodimer formation and cell surface expression of the two proteins [1]. 19.30 19.30 19.40 594.70 18.70 19.20 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -12.04 0.70 -5.15 3 68 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 10 0 0 85 0 267.00 97 26.88 CHANGED GLQPRADMAAPPsPPQPPRAHGQHYGHHHHQLPFLGHDGHHGGTLRVGQHHRNASDVLPGHWLQGGWGCYNLSDWHQGTHVCHTKHMDFWCVEHDRPPPATPTPLTTAANSTTAATPATAPAPCHAGLNDSCGGFLSGCGPMRLRHGADTRCGRLICGLSTTAQYPPTRFGCAMRWGLPPWELVVLTARPEDGWTCRGVPAHPGTRCPELVSPMGRATCSPASALWLATANALSLDHALAAFVLLVPWVLIFMVCRRACRRRGAAAA GLQPRADMAAPPsPPQPPRAHGQHYGHHHHQLPFLGHDGHHGGTLRVGQHHRNASDVLPGHWLQGGWGCYNLSDWHQGTHVCHTKHMDFWCVEHDRPPPATPTPLTTAANoTTAATPATAPAPCHAGLNDSCGGFLSGCGPMRLRHGADTRCGRLICGLSTTAQYPPTRFGCAMRWGLPPWELVVLTARPEDGWTCRGVPAHPGTRCPELVSPMGRATCSPASALWLATANALSLDHALAAhVLLVPWVLIFMVCRRACRRRGAAAA 2 0 0 0 +5578 PF05750 Rubella_Capsid Rubella capsid protein Moxon SJ anon Pfam-B_6726 (release 8.0) Family Rubella virus is an enveloped positive-strand RNA virus of the family Togaviridae. Virions are composed of three structural proteins: a capsid and two membrane-spanning glycoproteins, E2 and E1. During virus assembly, the capsid interacts with genomic RNA to form nucleocapsids. It has been discovered that capsid phosphorylation serves to negatively regulate binding of viral genomic RNA. This may delay the initiation of nucleocapsid assembly until sufficient amounts of virus glycoproteins accumulate at the budding site and/or prevent non-specific binding to cellular RNA when levels of genomic RNA are low. It follows that at a late stage in replication, the capsid may undergo dephosphorylation before nucleocapsid assembly occurs [1]. 25.00 25.00 25.20 25.20 17.10 16.40 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -12.05 0.70 -5.02 3 110 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 10 0 0 117 0 208.20 97 34.49 CHANGED MASTTPITMEDLQKALEAQSRALRAELAAGASQSRRPRPPRQRDSSTSGDDSGRDSGGPRRRRGNRGRGQRKDWSRAPPPPEERQESRSQTPAPKPSRAPPQQPQPPRMQTGRGGSAPRPELGPPTNPFQAAVARGLRPPLHDPDTEAPTEACVTSWLWSEGEGAVFYRVDLHFTNLGTPPLDEDGRWDPALMYNPCGPEPPAHVVRAYNQPAGDVRGVWGKGERTYAEQDFRVGGTRWHRLLRMPVRGLDGDSAPLPPHTTERIETRSARHPWRIRFGAPQAFLAGLLLAAVAVGTARA MASTTPITMEDLQKALEAQSRALRAELAAGASQSRRPRPPRQRDSSTSGDDSGRDSGGPRRRRGNRGRGQRRDWSRAPPPPEERQESRSQTPAPKPSRAPPQQPQP................................................................................................................................................................................................... 0 0 0 0 +5579 PF05751 FixH FixH Moxon SJ anon Pfam-B_6803 (release 8.0) Family This family consists of several Rhizobium FixH like proteins. It has been suggested that suggested that the four proteins FixG, FixH, FixI, and FixS may participate in a membrane-bound complex coupling the FixI cation pump with a redox process catalysed by FixG [1]. 21.90 21.90 22.40 22.20 21.70 21.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.71 0.71 -4.38 118 910 2012-10-01 19:30:51 2003-04-07 12:59:11 6 3 891 0 266 726 75 141.60 21 86.55 CHANGED +phhsa....hllshhusllssslshhhhAhsshs.uLVs-shYctupshstpl.....sctcttpsLGhp.splphss..........splplpl...tpGtP.htstplshplh+PTpsppDh.shhls...tsssG..hYpu...h.....hpGpWplcl................pssspta+hptclh .......................thasa.......hllhhh..hsllhsslshl.h...lA.h..p.s.....s...slVs...-sY....Y.cpu.pshs.t.ph.....sph.p.t..tp.p.hshp..sp.l.phps..............................sthplph....tpu.....ts......sp..tlp.lhhh+P...sp.....tp.....pD.......h...pl.lp........tsG.......haps.shpt...........hpG.pW.lcl................p..t.st.ah......h.......................................... 0 74 168 218 +5580 PF05752 Calici_MSP Calicivirus_MSP; Calicivirus minor structural protein Moxon SJ anon Pfam-B_6811 (release 8.0) Family This family consists of minor structural proteins largely from human calicivirus isolates. Human calicivirus causes gastroenteritis [1]. The function of this family is unknown. 21.20 21.20 21.20 72.00 20.50 20.40 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.07 0.71 -4.65 14 126 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 121 0 0 88 0 165.00 49 99.90 CHANGED MSWhsGALtsuGSLVDhAGTISsIVhQQRQls.h....pcQNcl.pcWhs+QEsLQcct.-lo+-LulpGPstRVpuAlsAGFsslsARRLAGSsERVhYGhLDRPIhptushpu.IppT+HLpshpuALosFKpGTs.aGpPsPPphthspPt.pssssplNL.GasPsSSsl ..........MSWLVGALQssGuLVDhAGTVSsIVYQpRQls.L....cpQNpLhpsWMsKQEtLQ+ctM-Loc-LulNGPAhRVpuAl-AGFDsVSARRLAGSuERVIaGhLDpPIhptsshsu.lppTpHLsslsuuLuTFKpGTs.FGp.PsP....Pp.h.psGsPt....pss.sP.plNl.GapPGSSss........ 0 0 0 0 +5581 PF05753 TRAP_beta Translocon-associated protein beta (TRAPB) Moxon SJ anon Pfam-B_6857 (release 8.0) Family This family consists of several eukaryotic translocon-associated protein beta (TRAPB) or signal sequence receptor beta subunit (SSR-beta) proteins. The normal translocation of nascent polypeptides into the lumen of the endoplasmic reticulum (ER) is thought to be aided in part by a translocon-associated protein (TRAP) complex consisting of 4 protein subunits. The association of mature proteins with the ER and Golgi, or other intracellular locales, such as lysosomes, depends on the initial targeting of the nascent polypeptide to the ER membrane. A similar scenario must also exist for proteins destined for secretion [1]. 21.60 21.60 21.60 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.08 0.71 -5.12 22 254 2012-10-03 16:25:20 2003-04-07 12:59:11 9 7 174 0 139 284 8 166.30 32 74.51 CHANGED hh.hh.lhhhhs.shupp-s........sARlLspKplLscahVpsc-lsVpYsIaNlGsusAhcVplsD.suF.stcsFplVuG..phosph-plsPsssloHshllcP+p.GhFshouApVoY.....+ssccssphQhuho..ot.upssIlup+shs+pFost....hh-WhuFushshsshsl.....PhhlaasSKspYpth.pK ..............................hh....h.hhhl.hh.h.shsps-p............sAtllspKpl.Ls+.hhVtuc-lslpaslYNhGsu.sAh-Vpl.sD...sua....s......s-sF.......pl.Vu...G..........hshph.-.RlsP..u..u.sl....oH.shVlc.P.pp.hGhFshosAploY......hsppp.....st.l.t.huho........os.sthsIls.+th.s++Fs.t............hhcahshsshsh.shhh.....shhlh..SKpphtt....tt................................................................... 0 45 68 105 +5582 PF05754 DUF834 Domain of unknown function (DUF834) Bateman A anon Pfam-B_9258 (release 8.0) Domain This short presumed domain is found in a large number of hypothetical plant proteins. The domain is quite rich in conserved glycine residues.\ It occurs in some putative transposons but currently has no known function. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.25 0.72 -4.17 18 515 2009-01-15 18:05:59 2003-04-07 12:59:11 9 34 5 0 411 501 0 64.80 32 26.11 CHANGED psssssshpsTspsscpssuscpsGuss.RlDsDsGAPsVsGpstGADElscssA+shssss.ocuDstssG ........................................t...........s.sttptstscp..uG..GusR..l.Ds...ssGsPAV...s..spstusDclstssAcPptsss.p....p...tcs.tuss.................... 0 0 0 12 +5583 PF05755 REF Rubber elongation factor protein (REF) Moxon SJ anon Pfam-B_6903 (release 8.0) Family This family consists of the highly related rubber elongation factor (REF), small rubber particle protein (SRPP) and stress-related protein (SRP) sequences. REF and SRPP are released from the rubber particle membrane into the cytosol during osmotic lysis of the sedimentable organelles (lutoids). The exact function of this family is unknown [1]. 25.00 25.00 26.50 26.30 22.20 21.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.33 0.70 -4.59 10 99 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 31 0 41 94 0 182.90 39 85.39 CHANGED -pspsEEc+...LKYL-FVQsAAlYslssFSsLYtaAKDpSGPLKPGV-sVEusVKsVVuPVYpKFasVPh-lLKFlD+KV-soVp-LDR+VPPlVKpASuQAhohh..hs.hs.tlAsEVppsGlhtsAp.......shspshhs+......tLhspYEPhAEphAVpsW+tLNpLPLhPpVAplslPTAAahSEKYNcsVshsu-+GYsVAsYLPLlPTE+IuKVFtc-s ..................................t...ptpptc...LKaLtFVphAuhpshh....thuslYtaAKcpu.GPL.+sGVpsVEssV+sVluPVYp+FcslP.-lLtFlD+KVD-slpc....lDc+lPs.....hlKpssspAhshhptss.............s.tlsuEsppsGshtssp......................................lhscaEs.hA.tp.hulpsWptLNphPhhPpVsphslPsAAahoEKYNpslhshsc+GYthstYLPllPhEcIu+sFt...t................. 0 4 29 35 +5584 PF05756 S-antigen S-antigen protein Bateman A anon Pfam-B_7194 (release 8.0) Family S-antigens are heat stable proteins that are found in the blood of individuals infected with malaria [1]. 25.00 25.00 92.00 91.90 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.27 0.72 -4.01 4 57 2009-09-15 09:48:53 2003-04-07 12:59:11 6 1 6 0 1 58 0 62.60 79 62.61 CHANGED MNRILSVoFYLFFlYLYIYKTYGKVKNTDcELSNIYGsKYYLRsGhhNpKNGKGpKYEDLpEEtEGENDDEEDSNSEESsNDEENtLIEGQspu .......VoFYLFFlYLYIYKTYGKVKNTDpELSNIYGTKYYLRsGhFNcKNGKGpKYEDLEEE.................................. 0 1 1 1 +5585 PF05757 PsbQ Oxygen evolving enhancer protein 3 (PsbQ) Moxon SJ anon Pfam-B_6905 (release 8.0) Family This family consists of the plant specific oxygen evolving enhancer protein 3 (PsbQ). Photosystem II (PSII)1 is a pigment-protein complex, which consists of at least 25 different protein subunits, at present denoted PsbA-Z according to the genes that encode them. PsbQ plays an important role in the lumenal oxygen-evolving activity of PSII from higher plants and green algae [1]. 25.30 25.30 25.30 25.50 24.90 25.20 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.07 0.71 -4.90 18 175 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 69 5 82 172 2 161.80 26 81.35 CHANGED phsssusspssstRusls....lRAst.........t.t..pssRRAllG.LlAsulsuuuhspAshAs.........sh.htlshssP.sushsGs.spspARDhpLs.lKcR......FaltsLuPs..EAAtRsKcSAp-I.lsl+shI-+Ks..WsaVpspLRL+uuY..LRaDLsTllSuKP.c-cKpsLh-Ls.scLFpsl-sLDaAs+pKsss-ApKaYtcThssLs-VLApLu ..............................................................................................ht.................................................................................................................ts+sh.....t.t......hh...hsst..tshtRh+psApcl.hslcs..h....I...-ccs...WshlpptLRh+uua..L+hDLpsl....Isu.p.P.c..cc.+pslpcLs.scLFsslscLDhAs..+pKssspspphYtpshpsLs-lluhh........................................................... 0 20 52 70 +5586 PF05758 Ycf1 Ycf1 Bateman A anon Pfam-B_6040 (release 8.0) Family The chloroplast genomes of most higher plants contain two giant open reading frames designated ycf1 and ycf2. Although the function of Ycf1 is unknown, it is known to be an essential gene [1]. 29.20 29.20 29.40 29.40 28.40 29.10 hmmbuild -o /dev/null HMM SEED 929 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.64 0.70 -13.90 0.70 -6.83 44 1439 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 1082 0 33 1491 2 450.30 52 62.57 CHANGED SVVlVGLYYGFLTTFSIGPSYLFLLRAplh..........EEGTEKc.............lSATT.GFIsGQLhhFISIYYuPLHLALGRPHTITVLsLPYLLF+..FFasN.pK...cFh..sasp...sspNSh...RNhsIQslFLNNLIF.QLhNhhILPSShLsRLVNIYhFRCNNKhLFVTSSFlGWLIGHILFhKWlt.....LlLhWIc.........p.NpS.................Icp...........Ipss......KY..LVscltN.h...+I.....................................................................FSILLFIoClaYLGRhPs.PlhTKKL..pp...............................................schcct........ccppphch-p..ct.....ttscpcpchs.tpps......lh..ccpts.pp.........................................................................................................................p.hp.......pp........phhhF.cKPllTlLFDYcRWNRPhRYI...K.Ns+F-..ssV...RsEhSQYFFtTCpSDGKpRISFTYPPSLSTFh-MI.p++ls.....lhshcK.h...s--.....LYsp...WlhoNcpK+ssLssEFlNRIcsLD+u.....lh........................h-lLEp..RoRL..C..sscscpc.........hLsKh.....................YDPhLNGsaRGp...............I..cchhSss...............................l......hsc...s.hcs.hcphh.....lN+laslLh............sssapchEpc......s..........hcslspph...h..........l.p.s.p...............................................................................................................................................t.p....l........tph.........pppppphhphlhshlhss..pppph.p....cs...................................ltI.pEIp......KKVPRW...SYKLhs-......lEptct-...cp...sst-ppIRSRKsK.+V....llhss.......pp...tp.................................................................................................................................................................................pcptc..-...hslhRY.SQQSDFRRclIKGSMRAQRRKslIWchFQsslHSPL.......FLD+h-Kh...hhF.sF..slsphhc........................hla+..........sa..hp.Kppchch.s...pccphpc.............Kppc.ph.....pcpcRlp.IAEtWDsl.hAQhlRGhlLlsQShLRKYIlLPsLIIuKNIuRlLLFQhPEWsEDac-WsREhHlKCTYNGlpLSEs....EFPcsWLpDGIQIKILFPFpLKPWHcSKhp.........................t.p...hpcpt.c+c................................................................sF..CFLTVWGhET-lPF.GsP+KpP....SFFcPIh ......................................................................................................................................SVVVVGLYYGFLTT..FSIGPSYLFLLRA+VM..........EEGTEKc.............VSATT.GFIsGQLMMFISIYYAPLHLALGRPHTITVLsLPYLLFH..hF....hNN....pp.....p.hh.............s.GS............TT.RNSM..........RNL.SI...QCV.......FLNNLIF.QLFNH.F.I.LPSSsLARLVNIYMFRC.NNphLFlTSSFVGW.LIGHIL...F.MK.WhG.......LV.LhWIc.........pNs........................................IcSN........................................................KY..LVSELR.NsMs..RI...................................................................................FS..ILLFlTC.VYYLG..Rh.PS.PlhTKKL...KEs..........................................................................................................................................SchEEh.............ctcpcsc..lEss..Eh.......ctspp-.pct...tpp.........................t..........pp........h..............................................................................................................................................................................hp.t...p.p.cs....ch.thhcp...................ph.h.a...h.....-K....sh.lThhFsacRWNRPhRYI....t............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 4 26 30 +5588 PF05760 IER Immediate early response protein (IER) Moxon SJ anon Pfam-B_6450 (release 8.0) Family This family consists of several eukaryotic immediate early response (IER) 2 and 5 proteins. The role of IER5 is unclear although it play an important role in mediating the cellular response to mitogenic signals [1,2]. Again, little is known about the function of IER2 although it is thought to play a role in mediating the cellular responses to a variety of extracellular signals [3,4]. 21.70 21.70 27.70 23.40 20.50 20.20 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.20 0.70 -4.49 10 171 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 48 0 78 166 0 176.10 27 97.89 CHANGED MEstl-AQpllSISLtKIaNSRsQRGGIKLHKNLLVShVLRsARQlYLS-+YutlYhttpttpstsssspp........................................................................................susstlpPPSutt........................................lsspspSP-....sss-P....u.hpsssucs.ssust..sssssGs.psspsosLDhsocVlTTVEsuhLpp............................................sCss..p.u.t.t.spsssspRKhpsuuhtS-uucss...........uhsPs.KRuRhE-.sspshu-sp-up.........puNloNLISIFGSuFSGLLS+pssusps...hstp..........hCsKpALusLusWoRAIVAF ...........................................................hth-ApplhslSlhKhhpSRhQRGGl+LH+sL.lohVhRsARp......l......Yhst........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 10 18 33 +5589 PF05761 5_nucleotid 5_nucleotidase; 5' nucleotidase family Bateman A anon Pfam-B_2948 (release 8.0) Family This family of eukaryotic proteins includes 5' nucleotidase enzymes, such as purine 5'-nucleotidase EC:3.1.3.5. 27.00 27.00 27.40 27.30 24.90 24.70 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.40 0.70 -5.97 21 680 2012-10-03 04:19:28 2003-04-07 12:59:11 9 8 168 12 393 595 21 368.90 30 81.95 CHANGED lFsNRsLsLcsIchaGFDMDYTLAhYps.phEpLuF-hsh-+Lls.hGYPpslh.phpYDPpFslRGLhhDptpGNLlKlDtaGplt..ssaHGhc.Lss-Elt-hYssp.plphcpst..........pathlsTLFuLPEssLhAslV-ahp......................................sss.hphsappLacDV+cAlDpVHhcGpLKpclhpclp+Ylh+DsclshhLp+L+psGKKLFLlTNSsasYsDthMsYlhst..s..........sWRshFDlVIVsAcKPtFFs-sp.PhRplDscsGpLphsp................hssLc+G.........plYpGGsh.phpclhGh+.GsclLYlGDHIauDlhcoKKppGWRTshll.ELccElclhsscptthsp..LppLpslhsclpsphs...................................ctsshpt.slpphppchcplppshcphFN..aGuLF+s.GsppohFupQlpRYADlYsoplsNhLpYssthhF+us+s..hhPHEsss.ps ...................................lasspphphpplp...hhGFDhDYTLs.Yp..th.ctLhaphshchLl.p..htYP.pp..lh..p..h.p...a.-.s.s..Fs.h.R...GL...hhDh.hG.LlKlDt.at.l....hsh+G.h.p.l........tt.p.......-l.hchYssp..hl...ht.t.p..................phh..h.slFslP.EhhLhupll-hht..............................................................................p...t...p..h..h.s.....hphapD...V..p...s......Al......p...s....H.......h.......p..........G.............hh.......p.......t.......h.......h.......ps.p....+Yl...p.s.s.p.h...hhLpc........hpp.t.G.t.......p..lFLlTN.Ssapass..thMpa.....h.hs..................sWpshFDlllspAp...........KP.....tFFs...c...tp...sh+pl.s......pp..u.t....h...t....hs.p.................................................h.t...lp..pG................tlY...pt......G...sh.....t....h....h....c....l.............h........s.....h...c......G....p.....c......l..LY.hGDHlauDl..hpsph.t..puWRThhl..l.ELt..p..Elphhtppp...h.t...httLp...htph......................................................................................................................................p.tt....htt..h.tphp.h....t.h.p..as...hG.hh+s...p.ohF..pl.+auslYhuph.shh...a...hhahs........h.Hc.....s........................................................................ 0 140 203 293 +5590 PF05762 VWA_CoxE vwa_CoxE; VWA domain containing CoxE-like protein Bateman A anon Pfam-B_2956 (release 8.0) Family This family is annotated by SMART as containing a VWA (von Willebrand factor type A) domain. The exact function of this family is unknown. It is found as part of a CO oxidising (Cox) system operon is several bacteria [1]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.30 0.70 -5.21 17 2445 2012-10-10 16:07:06 2003-04-07 12:59:11 9 3 1560 0 642 2460 867 184.40 23 42.62 CHANGED Ltctltt....hhhthstcR...ph.spt+st+lDh++TlRtslpputp..........hthhh+p..Rppcs.clVlLlDlSGSMptaoshahtlhpAlhtsht..cschFsFuTplsclTptl.pttDsspuLhcs..p..h...huGGTcIupuLtphtphhstps...tusVlllSD.....uh-psshstLtpplscLppput+llhLsPh..tthsuacsp.stulcAhhspV.sthtPscphpsl ................................................................................h.................h..h.....t..h..........t..tphDh..p..tTl..p.....t..s.h...t..p..hh...................................h.h...tp........+..p...s....p...s........p.lll..hlDsSG.SM.........s...........s...........a..........s.........t.......h..........t.......h......t.......l....h..p.....u.......s.....h..s...p.hp..........+s.cl.h..h...F.s....T...p......l.......s.......c..........h..........p.........t......h.......s.....................s..........p..h...l......h.hh.........p..........ht.......G.G.............T....c.......lu....p.sh.t....h....h..h......p..h...h......t....t..........t...............p...s...s.....h..lllSD........h.h..t..t....t............s....h...h...p...p...l.t.c...h....p.....t.+..hhh....hss.......thsth.................................h........................................................................................................... 0 197 410 528 +5591 PF05763 DUF835 Protein of unknown function (DUF835) Bateman A anon Pfam-B_3039 (release 8.0) Family The members of this archaebacterial protein family are around 250-300 amino acid residues in length. The function of these proteins is not known. 28.10 28.10 28.80 37.30 26.90 28.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.79 0.71 -4.61 38 148 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 24 0 116 168 35 135.30 25 39.06 CHANGED G.uhhhsspp......hhthlcsh.p....sslsloRp...sPcpact.........lhWlT+sp...uc..s.....ulsPTpLthlh-hlhcalcp..tss....lVllDsLEYLlL.NGFculhKFLtsLKDhslhpsut.lIlhsc.cuhs-+-hslLc+Ehp ..............................p.....hh.hlpp.h.pth...ssLsloRp...sPcphp........th.hlWlopsp...sc....p........slsPopLt.hlhch.lhcalcp.ttps.....lVllDslEYLll.NG....FcslhKFLtsL+Dhs.lhpsuh.lllsl-.pslsc+phslLpRph........................ 0 19 27 73 +5592 PF05764 YL1 YL1 nuclear protein Bateman A anon Pfam-B_3088 (release 8.0) Family The proteins in this family are designated YL1 [1]. These proteins have been shown to be DNA-binding and may be a transcription factor [1]. 29.00 29.00 32.00 29.70 28.90 28.90 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.92 0.70 -4.76 37 325 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 271 0 234 329 1 213.70 27 44.43 CHANGED htsRs+RusAGp+hppLLptE.-t--...pp...................hFpE---DpEaptppp-ppp................................................................Dsshsssp.s-...........cssss--t-uE+t.....ptcc..+++hhtpcthhc.hh...........h+h+h..............................phps.sssc.c.....tpppc.p.h.ohhss........tRpSsRpoThpppptsht+Lp-p-t+Rtph.t......t+++tptpp+thTQEE+LtEAccTEchNh+SLp+acchEtpKKc......pshpp+ph.pGPhlpahSst ................................................................sRspRssAGp+hpp.LLptE.c.p..D-h..t..t............................hFp.E..-..ps...D.c-aptppp-p-s..............................................Ds-hsps-.s-.............psssss-t-tc+...........................++.+..hhh.ppth..htsht............h+hph......................................................t.pt.tt.t.p............t.p..p.tp........h.s.ht..........................tRpShRposh...p.pt.th..+.h...pcppt.....pc..ht.......................tp+ct..t....tppc.hTQEEhLtEA.t..TEchNhcSL...ppapch.Etc+Kc........pshtp+t..h.tGPhlpahSh.s............................................................................. 0 76 128 195 +5594 PF05766 NinG Bacteriophage Lambda NinG protein Moxon SJ anon Pfam-B_7000 (release 8.0) Family NinG or Rap is involved in recombination. Rap (recombination adept with plasmid) increases lambda-by-plasmid recombination catalysed by Escherichia coli's RecBCD pathway [1]. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.48 0.71 -4.55 19 590 2012-10-05 18:28:12 2003-04-07 12:59:11 7 4 389 0 37 440 99 170.90 48 96.91 CHANGED h..+ps+.+KCK..sCsshFsP.tpshQ.pVCSscCu......hthtp.....tppccpc+ptpptERpcl+tRKc+LK..s+u-ah+-sQtshNcalR.hRDh..shPCISC.......Gp...........hpsupacAGHYRosGAsspLRFp..NlHpQCshCNpahS.....GNlls..YRhsLlc+IGhEcVEhLE.usHpspKasI-EL+plpshY+uKh+cLc+......cuA ..........................................tp..cR+CK...CtphFhP.hht....sQ.hh...C.s..ppu............hth.tc.........ttpttcc..cp+...ppcpcp+c..ch+h+Kh...slK..s+u.hWl.+.p.AQpAhNtaIR..pR.....Dt.........s..hs....CISC.......Gs...........hp.u.up..aDAGHYRosuAsP....pLRFsEpNlHpQCs.sC.Np+.KS.......GNlVs...YR..lp..LIs...R...IGp...EAV-plE.....uNHst+RWTlEE..h+uIKucYppKLKcL+p.....uct........................................ 0 3 14 26 +5595 PF05767 Pox_A14 Poxvirus virion envelope protein A14 Moxon SJ anon Pfam-B_7009 (release 8.0) Family This family consists of several Poxvirus virion envelope protein A14 like sequences. A14 is a component of the virion membrane and has been found to be an H1 phosphatase substrate in vivo and in vitro. A14 is hyperphosphorylated on serine residues in the absence of H1 expression [1]. 25.00 25.00 25.00 25.80 23.60 23.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.19 0.72 -4.03 9 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 1 25 0 89.70 60 99.01 CHANGED MDhhthlpNhaSslllsGIlLLhsACIFAFl-FSKsspss-.sWRALSllsFIlGhllllGhllaohYsRaCt...sosshcssRh.Nso-IELs ..MDhMhhIuNYFS.GVLIuGIILLllSCIFAFIDFSKsT.u.P.Tc.TW+sLSIhuFILGIlITlGMLIYSMWG++CsP..sSsVIcs.....NpSDIplN.............. 0 0 0 0 +5596 PF05768 DUF836 Glutaredoxin-like domain (DUF836) Moxon SJ, Bateman A anon Pfam-B_7010 (release 8.0) & Pfam-B_2829 (release 14.0) Domain These proteins are related to the Pfam:PF00462 family. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -10.03 0.72 -3.72 48 1303 2012-10-03 14:45:55 2003-04-07 12:59:11 9 7 1259 7 450 2272 1309 77.50 25 77.65 CHANGED plhLau+ssCsLC-.......thpphLpt...hthptt.........hplphlDIs.....ssss..Lhp......................+Ys.clPVlthss............................tthhphtlst-pLtphLp ...............lhLa.s+t.sC.tLC-.......pspph.Lpt......l.t..t.chs.....................hplph.l...DIs.........psss........Lhp.........................cYs...........c...l.P.V.lhhss.............................tphhha.hctppLpt.l.h.................................................... 0 104 261 371 +5597 PF05769 DUF837 Protein of unknown function (DUF837) Moxon SJ anon Pfam-B_7035 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. One of the family members (Swiss:O02197) is a circulating cathodic antigen (CCA) found in Schistosoma mansoni (Blood fluke) [1]. 21.30 21.30 21.40 21.30 21.10 20.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.23 0.71 -4.75 9 231 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 103 0 133 212 1 152.70 36 73.36 CHANGED MosTl-plLhDAKslspRL+-+Dshu-sLl-potslp+pVcuM+........pYQE-hp.lpclu+.+sp....hhlpQENpQI+-LQQEN+EL+soLEEHQpALELIMuKYREphhphhhspKhs.sEhhhp...hpcphsp.lpsQh-+IsEMusVM+cAlplD-pphspppEplspLchENctLR-lLpIS+p ....................................................................shohpphlhDA+tLhpRL+-p-stu-sLlppstslpppl.t......uh+......................papE-h.p.p.l.pchuc...+sc.........hhlppE..Npp.l....c-LQpEN+...........ELphuLEEHQpALELIMsKYR....c...phh....plh.hsp+..h...t...s.t.hht..........h.p.....pp....p...sp.lptp....hc.pIpE....MutVMppAhp...hD-pp...t.p..pphl..pL.p.ENptLRchLtIsp................................................... 1 38 46 78 +5598 PF05770 Ins134_P3_kin Inositol 1, 3, 4-trisphosphate 5/6-kinase Moxon SJ anon Pfam-B_7042 (release 8.0) Family This family consists of several inositol 1, 3, 4-trisphosphate 5/6-kinase proteins. Inositol 1,3,4-trisphosphate is at a branch point in inositol phosphate metabolism. It is dephosphorylated by specific phosphatases to either inositol 3,4-bisphosphate or inositol 1,3-bisphosphate. Alternatively, it is phosphorylated to inositol 1,3,4,6-tetrakisphosphate or inositol 1,3,4,5-tetrakisphosphate by inositol trisphosphate 5/6-kinase [1]. 20.20 20.20 20.20 21.70 20.00 20.00 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.04 0.70 -5.41 5 297 2012-10-10 13:17:03 2003-04-07 12:59:11 6 6 113 8 186 306 8 257.30 33 79.58 CHANGED usshp+RhlVGYALosKKlKSFlQPuLluLARpRGI-lVslD.oRPLsEQGPFDlIIHKLhD............cEW+cpL-EFR-AHP-VsVLDPPcAIc+LHNRpSMLQlVu-Lcl...uscscRlGlPcQlVlhpD.uuulsculspAGL+FPLIAKPLVADGTAKSHEMSLlYcQEGLptLcPPLVLQEFVNHGGVLFKVYVVGEsl+VV+RhSLPDVSpccL..ssucGsFsFspVSsssuou-......-sEl..Dh..ulAEhPPcslLc-LA+ALRRuLGL+LFNFDlIR-spstDRYhVIDINYFPGYuKMPuYEsVLTDFlho ........................................................................................................................hhh..t+.tph.....th....s........ttp.sh...hl.ls...phslt..pQ..G..s.h.DlllHKlh.s..........................................................thh.pthp..cahp.t..HPps.hl.lDP..uIpp.........LhsR.p.hphl.t.c....lph...............t......tt.......p.....l.t.......s.P...h.....lp.....p.......s.......t......sh.....p.h...lt......ps..........uLphPh.lsKshV...Ac...Go.p..SHcMullas........p.cu.L.........p...t........l......p.....s.........P....h...........V..lQ.pFlNHsullaKVaVlG-ph.ps.VpR.SL....s..hs..t.t..p........sp.t..t....h.hh....p....tp.....l.Sp.ts..ss...........................tth....-............t..........hp.h..P...s.t....t....h.....l.....pp.....luptLRptLGlp.LFs.hDlI........h...............pt..t........s.............s..........p.a.hlIDlNhFP...G.............atthsta.t.hhphh..p......................................... 0 71 119 151 +5599 PF05771 Pox_A31 Poxvirus A31 protein Moxon SJ anon Pfam-B_7044 (release 8.0) Family \N 22.40 22.40 23.20 66.00 20.80 19.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.49 0.71 -4.28 6 59 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 30 0 0 49 0 112.70 75 84.00 CHANGED M.-IFphL+.hE+phFsshslslPKcKpshsaKshsFIFYpPKcsplp+YlssuslaHoDhlVhGKVlIsshKhLLhYMDLpYYGlohsGshY+LGpSIccLSLcppKlhs+hot Ms.SILNTLRFLEKTSFYNCNDSITKEKI.KIKHKGMSFVFYKPKHSTVVKYLSGGGIYHDDLVVLGKVTINDLKMMLFYMDLSYHGVTSSGsIYKLGSSIDRLSL.NRTIVTKVN.N...... 0 0 0 0 +5600 PF05772 NinB NinB protein Bateman A anon Pfam-B_4884 (release 8.0) Family The ninR region of phage lambda contains two recombination genes, orf (ninB) and rap (ninG), that have roles when the RecF and RecBCD recombination pathways of E. coli, respectively, operate on phage lambda. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.49 0.71 -4.49 13 563 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 435 2 37 310 28 118.00 46 85.64 CHANGED hLRspph+pNAlphIpplPsDscKPlVlpIp-tTRoLpQNu+hWAhLsDlScQV.WaG++LssEsWKslFsuuh....tpcp-slPGL-G.thVslGpSTu+MoVpcMu-LIEhlpAaGsppGV+aoD ..............................lRs.thppNAIpAl.pl.......Ds......ppPl..hlpIp..-.....p..RolcQNp+hW.A.hLsDlSR.QV.W....H.......Gc....hLssEs.WKslFTAuh..........t............p.....Q............cslPG......l........-G.GhV.....hlG.psTS+MpVuchsELIElhtuFGs.E+.G..V+aSD................................. 0 2 15 21 +5601 PF05773 RWD RWD domain Bateman A anon Pfam-B_3991 (release 8.0) Domain This domain was identified in WD40 repeat proteins and Ring finger domain proteins [1]. The function of this domain is unknown. GCN2 is the alpha-subunit of the only translation initiation factor (eIF2 alpha) kinase that appears in all eukaryotes. Its function requires an interaction with GCN1 via the domain at its N-terminus, which is termed the RWD domain [1] after three major RWD-containing proteins: RING finger-containing proteins, WD-repeat-containing proteins, and yeast DEAD (DEXD)-like helicases. The structure forms an alpha + beta sandwich fold consisting of two layers: a four-stranded antiparallel beta-sheet, and three side-by-side alpha-helices [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null --hand HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.71 0.71 -4.18 119 1878 2012-10-02 15:28:41 2003-04-07 12:59:11 17 79 325 8 1276 2204 23 119.30 20 19.67 CHANGED psp-.pppEl-uLpuIYs-ch....................................hpppsstphplpl........................................................ptt.sphs............................................................................slpLphphsp...sYP.ppsPt.lp...lpsstttt...........................................tphpplpppLpchhppth.....G.pshlaslhphlp-..hl ...............................................................................................................................................................................................................................................................ctpppElcsLpu..I.Y..scph..............................................................................htpt.s.s..p..h.p....lpl.................................................................tt.ttsthh..................................................................................................................................................................................................................................................sl..p..Lp..l..p.....hPp.......sY..P..p......psPh..lp.....lpsttthp...............................................tphp.p..l.tppLpphhppth.....G..psh.lasllphlp-..h............................................................................................... 0 383 606 961 +5602 PF05774 Herpes_heli_pri Herpesvirus helicase-primase complex component Moxon SJ anon Pfam-B_7045 (release 8.0) Family This family consists of several helicase-primase complex components from the Gammaherpesviruses. 25.00 25.00 53.10 50.20 21.00 18.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.59 0.71 -4.15 8 24 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 19 0 0 23 0 125.00 30 37.95 CHANGED Wl-oLphslsLhh...ssps.psILcslLslhassRcsTsFWLLPpualpptsppPslPhDCluPp.halhTcsGshp.Whs-asLPsslsYpsYlppllclhphlp.t..sstpscph..hL-sapplLsLF ..WlssLshslslhh...ssho.tsllcpllslhappR+sosFWLlPpsaspthshp.PslPhDCluPp.halhTpsGshp.WhtshsLPsslsYthYlpphlclhphl.........ssppscphp.hL-tapplLsLF................ 0 0 0 0 +5603 PF05775 AfaD Enterobacteria AfaD invasin protein Moxon SJ anon Pfam-B_7107 (release 8.0) Family This family consists of several AfaD and related proteins from Escherichia coli and Salmonella bacteria. The afa gene clusters encode an afimbrial adhesive sheath produced by Escherichia coli. The adhesive sheath is composed of two proteins, AfaD and AfaE, which are independently exposed at the bacterial cell surface. AfaE is required for bacterial adhesion to HeLa cells and AfaD for the uptake of adherent bacteria into these cells [1]. 21.50 21.50 22.50 34.00 20.00 21.40 hmmbuild --amino -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.20 0.72 -4.10 10 168 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 136 11 1 79 0 107.60 55 72.60 CHANGED +cshuupL+DGt+lATGRIhCREsHTGFHlWhNuRQssG+PG+YIlQGp+DopHcLRVRluGpGWpssst-GtpGllppGpE-QshFDVhsDGNQplsPsEYllSVSGpCl ...sh..FRAGHVPDGMVLAQGaVTYHGSHSGFRVWSDEQKAGNTPsVLLLSGQQDPRHHIQVRLEGEGWQPDsVsG.RGAILRTAADNASFSVVVDGNQEVPADTWTLDFKA...CAL. 0 0 0 1 +5604 PF05776 Papilloma_E5A Papillomavirus E5A protein Moxon SJ anon Pfam-B_7244 (release 8.0) Family Human papillomaviruses (HPVs) are epitheliotropic viruses, and their life cycle is intimately linked to the stratification and differentiation state of the host epithelial tissues. The kinetics of E5a protein expression during the complete viral life cycle has been studied and the highest level was found to be coincidental with the onset of virion morphogenesis [1]. 25.00 25.00 130.60 130.50 22.80 20.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.22 0.72 -3.83 5 25 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 0 0 24 0 91.00 80 99.74 CHANGED M-VVPVDlTssTTSsSLLPLVIALTVCllSIILIIhISEFlVYTSVLVLTLLLYLLLWLLLTTPLQFYLLTLSLCFLPAFslHQYILQTQQ MEVVPVQIAAGTTSTLILPVlIAFsVCFVSIILIlaISDFlVYTSVLVLTLLLYLLLWLLLTTPLQFFLLTLhVCYhPAhYIHpYIVpTQQ. 0 0 0 0 +5605 PF05777 Acp26Ab Drosophila accessory gland-specific peptide 26Ab (Acp26Ab) Moxon SJ anon Pfam-B_7275 (release 8.0) Family This family consists of accessory gland-specific 26Ab peptides or male accessory gland secretory protein 355B from different Drosophila species. Drosophila males, like males of most other insects, transfer a group of specific proteins (Acp26Ab and Acp26Aa in Drosophila) to the females during mating. These proteins are produced primarily in the accessory gland and are likely to influence the female's reproduction [1]. 25.00 25.00 31.40 30.60 22.10 18.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.05 0.72 -3.54 4 17 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 8 0 2 28 0 88.50 77 86.89 CHANGED MNYFAVLCIFSCICLWQFSDAAPFISVQSSSQSRSQKVMNGMLRTLYDYSVQDSVNDATGHLIHTHKADFNSDVMSP-EIEpVRQQLNMA ..........MNYFAVLCIFSCICLWQFSDAAPFISVQSSSQSRSQKVMNGMLRTLYDYSVQDoVNDATGHLI+THKSDFNSDVMSPEEIE+VRQQLsMA.... 0 1 1 1 +5606 PF05778 Apo-CIII Apolipoprotein CIII (Apo-CIII) Moxon SJ anon Pfam-B_7283 (release 8.0) Family This family consists of several mammalian apolipoprotein CIII (Apo-CIII) sequences. Apolipoprotein C-III is a 79-residue glycoprotein. It is synthesised in the intestine and liver as part of the very low density lipoprotein (VLDL) and the high density lipoprotein (HDL) particles. Owing to its positive correlation with plasma triglyceride (Tg) levels, Apo-CIII is suggested to play a role in Tg metabolism and is therefore of interest regarding atherosclerosis. However, unlike other apolipoproteins such as Apo-AI, Apo E or CII for which many naturally occurring mutations are known, the structure-function relationships of apo C-III remains a subject of debate. One possibility is that apo C-III inhibits lipoprotein lipase (LPL) activity, as shown by in vitro experiments. Another suggestion, is that elevated levels of Apo-CIII displace other apolipoproteins at the lipoprotein surface, modifying their clearance from plasma [1]. 27.10 27.10 27.20 29.20 27.00 27.00 hmmbuild --amino -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.30 0.72 -4.59 6 42 2009-09-11 01:40:59 2003-04-07 12:59:11 7 2 27 1 16 35 0 65.00 61 62.42 CHANGED EsE-uSLLuhMQGYMcpATKTApDALoSVQESQVAQpARGWMT-SFSSLKDYWSoFKGKFTsFWDSsPcs ..........EuEDsSLLuhMQG....YMp+ATKTApDALo.SVQESQVAQQA.....R..sWhTDuFSSLKDYWSoh+sKaSt..FWD.sPE.s....... 0 1 1 1 +5609 PF05781 MRVI1 MRVI1 protein Moxon SJ anon Pfam-B_7407 (release 8.0) Family This family consists of mammalian MRVI1 proteins which are related to the lymphoid-restricted membrane protein (JAW1) and the IP3 receptor associated cGMP kinase substrates A and B (IRAGA and IRAGB). The function of MRVI1 is unknown although mutations in the Mrvi1 gene induces myeloid leukaemia by altering the expression of a gene important for myeloid cell growth and/or differentiation so it has been speculated that Mrvi1 is a tumour suppressor gene [1]. IRAG is very similar in sequence to MRVI1 and is an essential NO/cGKI-dependent regulator of IP3-induced calcium release. Activation of cGKI decreases IP3-stimulated elevations in intracellular calcium, induces smooth muscle relaxation and contributes to the antiproliferative and pro-apoptotic effects of NO/cGMP [2]. Jaw1 is a member of a class of proteins with COOH-terminal hydrophobic membrane anchors and is structurally similar to proteins involved in vesicle targeting and fusion. This suggests that the function and/or the structure of the ER in lymphocytes may be modified by lymphoid-restricted resident ER proteins [3]. 20.10 20.10 20.40 20.80 19.90 19.90 hmmbuild -o /dev/null HMM SEED 538 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.63 0.70 -6.11 3 180 2009-09-11 20:56:11 2003-04-07 12:59:11 7 5 50 0 88 165 0 389.00 34 73.00 CHANGED psEPcDGALDVsRsppCPuPTusPlPGssLpGCsRMN-DsSp-ENGVG+ssuESluQpREhhspsSS.PL.RtTSSo-GTlTSS-sGp-ILsMASsDLDcKsLCcKEE-sRAASPslc...........tQGTSLAc-sIuhpsSsuVuKslspLEAuEE.E.......ToEpccKESAuG-oVlSslPcsoVKpVNl+QSENTSA...NEKEVEAEFLRLSLGFKCDWFTLEKRVKLEERSRDLAEENLKKEITNCLKLLESLTPLCE-DNQAQEIlKKLEKSIslLSQCAARVASRAEMLGAINQESRVSKAVEVMIQHVENLKRMYAKEHAELEELKQlLLQN-.SRSssPsEDDsspptRSpSLSL.SKPSSLRRVTIASLPR..........................NlGNsGhVSGMENNDRFSRRSSSWRILGoKQSEHRPSLhRFISTYSWADAEEE..........Ks-lKs+DssEPpGEEsVERTRKPSLSE++usTpcWD+uolsSolASWVTaLQuSaRKAN...+ALWLouAhIlLlAALMSFLTGQLFQouVEAAPTQpGDSWsS.LEHILWPFTRLpH-GPPPV ....................................................................................................................................................................................................................................................................................................................s.t.....t...h..t....t............................t.tthh..sps.hshth.ph.ptttE.c..........................s.t.ttc.....ttt.s........u.s..hs...s.s.l+..lphppu.ssSA......sEKEVEs.FlpLSLuF+sD.aTLEpRlp.tERpRsLsEENhcKEl.....p......Nhht.lp...........u.................ssL..cc..sptpEhhpKL.csIshLpphssRluSRAEhlGAlpQEpR.hS.KAsEVMhQaVENLKRh.YtK-HAEL.EhKp..lh.QNp....Ru.h....s.s.........-D....tssp...ph..+p.Shs...uKs...shRRVolAslP+............................................shtss...............s.s........s...h.s..t+sss.....t.s....s..........s.......c.hp...hsc.p--...........p...t......................................t...............p...h...hh...h...h..phs.......phlhh.h.hhh...llhshlhuhh.t...h....p.s....tts..................................................................................................................................................... 0 15 22 50 +5610 PF05782 ECM1 Extracellular matrix protein 1 (ECM1) Moxon SJ anon Pfam-B_7421 (release 8.0) Family This family consists of several eukaryotic extracellular matrix protein 1 (ECM1) sequences. ECM1 has been shown to regulate endochondral bone formation, stimulate the proliferation of endothelial cells and induce angiogenesis [1,2]. Mutations in the ECM1 gene can cause lipoid proteinosis, a disorder which causes generalised thickening of skin, mucosae and certain viscera. Classical features include beaded eyelid papules and laryngeal infiltration leading to hoarseness [3]. 25.00 25.00 27.80 27.80 18.50 18.10 hmmbuild -o /dev/null HMM SEED 544 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.10 0.70 -5.79 3 78 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 33 0 30 80 0 339.00 43 97.22 CHANGED MGTsSRAALVLACLAVASAASEGGFKASGQRELsPE.LhpHLQEVGYAAPPSPPLSRuLPhDHPcTSQHuPP.FEGQSEVQPPPS.EshPl.pEch.sh..Ps.ccsGPslPQEAIPLQKELPPPQVPIE...............QKEsKPAPhuDQSPPEPESWNPAQHCQQGRsRGGWGHRLDGFPPGRPSPDNLcQICLPsRQHVVYGPWNLPQTGYSHLSRQGETLNlLETGYSRCCRCRS+TNRLDCAKLVWE-uhoRFCEAEFSVKTpPHhCCp+QGEARhSCFQEEAPpPHYQLRACPSHQPsIS.G.ELPFPPGlPTlDNIKNICHL+RFRSVPRNLPATDslQRpL.ALhpLEtEFQRCCRQGNNHTCTWKAWEDTLDGYCDREhAIKTHHHSCC+YPPSPsRDECFARRAPYPNYDRDILTLDLSRVTPNLMGHLCGNpRVLSKHKQIPGLIQNMTARCCDLPFPEQACCAEEEKLAFI--LCGPRRNhWRDPALCCcLSPGDEQINCFNTNYLRNVALVoGDTcNAKG.GEQGPTpGTNuSPTSEPKEE ...........................................................................Mtsh.hAAhlLhhlAlsusAptGu..s.tphp.hP................................................................t....................t................................................tt.ls.Qpc...........t............................................................hp..t......u..l..sFPPGRPSs-NLppIClspRp+llYGPasLP.poGaSHLsRQGcslNhLEhGYopCC+...sp...s...p...spp.....LpCsc.sWccshppFCctEaSsKTpsa.CCp..pGpt..RhtCFpptuPpspY................................................................................................................................................................................................................................................................................................................ 0 1 5 11 +5611 PF05783 DLIC Dynein light intermediate chain (DLIC) Moxon SJ anon Pfam-B_7447 (release 8.0) Family This family consists of several eukaryotic dynein light intermediate chain proteins. The light intermediate chains (LICs) of cytoplasmic dynein consist of multiple isoforms, which undergo post-translational modification to produce a large number of species. DLIC1 is known to be involved in assembly, organisation, and function of centrosomes and mitotic spindles when bound to pericentrin [1,2]. DLIC2 is a subunit of cytoplasmic dynein 2 that may play a role in maintaining Golgi organisation by binding cytoplasmic dynein 2 to its Golgi-associated cargo [3]. 19.70 11.00 19.90 11.00 19.60 10.90 hmmbuild -o /dev/null HMM SEED 472 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.64 0.70 -5.82 8 500 2012-10-05 12:31:09 2003-04-07 12:59:11 6 6 234 0 324 498 9 276.10 25 71.25 CHANGED -EGQNLWSuILSEVSTcucSKLPSGKsVLVhGEcGSGKTTLluKLQGsEcsKKGRGLEYLYlsV+DEDRDDhTRCsVWILDGDLYHKGLLKFAlstpslc-TLllhVlDMSRPWohM-SLpKWsSVLR-HIDKL+IPPEEh+-hEQ+ls+sFQ-YlEPt-sh.sGSPQRRsspssu.D--SllLPLu-slLTpNLGlPllVVCTKCDAhosLEKEHDY+DEHFDFIQuHIRRFCLQYGAuLIYTSVKE-....KNLDLLYKY..lVHKlYGFsFssPAlVVEKDAVFIPAGWDN-KKI.uILHENFpslKsEDsaEDhIsKPPVRKhVH-KEl..tAEDEQVFLhKQQSlLuKQPsTss.RssESPsRussGSPRsssRouPssVuSsuPhso.sKKsDPNlKsuusS.EGVLANFFNSLLoKKoGSPGusus......GGsssstsosstsos+KoGpKslLTDVQsELDRhSRKs-phls.ssu.Tps ........................................................................................................................................h..h......G.t...sp.p.hh.......h....................................t...................................................................u.....LtYha.............h.p...h....h....c............t......t...p.................p.................s....s.....h......h....pha..L.s....s.s.....h....ts....Ll.p.....l.s...psl.....phhlhhhhDhop...P...W.hhcpL.pp.a.h.p.h....l....p.p.hl.......p..h.....p............pt...........h...p.....t.h.p.ph.p.hpp...................................................................................................................................t.......................s....h.....hhhhh..................tsc.h..............................h.tp.p......t.....hp....p..hDhl...hhRphhhth.......suu...Lha...h.s.........................ph....hh..p...............hhp.....h..............t.......................hhcpptlhlP.shDs.thI..thh...pth...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................pt........................................................................................................... 0 125 167 249 +5612 PF05784 Herpes_UL82_83 Betaherpesvirus UL82/83 protein N terminus Moxon SJ anon Pfam-B_7466 (release 8.0) Family This family represents the N terminal region of the Betaherpesvirus UL82 and UL83 proteins. As viruses are reliant upon their host cell to serve as proper environments for their replication, many have evolved mechanisms to alter intracellular conditions to suit their own needs. Human cytomegalovirus induces quiescent cells to enter the cell cycle and then arrests them in late G(1), before they enter the S phase, a cell cycle compartment that is presumably favourable for viral replication. The protein product of the human cytomegalovirus UL82 gene, pp71, can accelerate the movement of cells through the G(1) phase of the cell cycle. This activity would help infected cells reach the late G(1) arrest point sooner and thus may stimulate the infectious cycle. pp71 also induces DNA synthesis in quiescent cells, but a pp71 mutant protein that is unable to induce quiescent cells to enter the cell cycle still retains the ability to accelerate the G(1) phase. Thus, the mechanism through which pp71 accelerates G(1) cell cycle progression appears to be distinct from the one that it employs to induce quiescent cells to exit G(0) and subsequently enter the S phase [1]. 20.70 20.70 20.80 23.10 19.10 20.40 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.80 0.70 -5.70 18 118 2012-10-03 01:18:03 2003-04-07 12:59:11 6 1 26 1 0 132 0 340.30 31 63.36 CHANGED shs.luspllphlh.pscsslcPcEs+llcTGlsVpVsp.sollhlsp.sspspssht.pp...........LplKapshcsp.-hcs.lslsVpN.os+s...lssus..pPl.ulhVFALPLstVslssLplaps.............htppth.pssucsslpphstphWplRlsloplpWscppsphhtsshhasosFhhpspshPLptl.sssc.lsCSptssplp+sphhscs....hlhlaL......phpppsPPsplFlplul.hspp......................sclshp+NPcPaLp.pscNGFslhsP+slplp.sscpsplhlsssFcSsp.asulFhPpsIPGlSlsssshhsppslhlclpuhpc.slplcthpsLGtl+FFs+shlhh .........................................s.hu.luspllptlhspssssltPcET+lLpTG.lclpVsp.PSllhlsQhsscussspp.ppp..........Lplpasshs....sp.-hc..s...lsVsVpNsosRs...lsPup...csh.ulhVaALPLchVslsslsla+h............tscccp..+phssscAslctssp.....phWpsRlTVoslsWs+ppsphp......t......ss......h.hasouFlhsspslsLp..........p......l.s........sAcpLsCS.tsTpls+hpllscp....hlplal..........cp.hspss..P..sspLFhHlsl.pspc.......................tcl.shpRNPpP.ah..+s.a.c....cN.GFp..VlsP+shtl+.sGchsplhlcsuFpS..cp..ahuLhhPcsI.P.GLSIpsshlhsspplhlclpuhtc.slclcthpslGtlaFFcpchlh.t... 0 0 0 0 +5613 PF05785 CNF1 Rho-activating domain of cytotoxic necrotizing factor Moxon SJ anon Pfam-B_7489 (release 8.0) Domain This family consists of several bacterial cytotoxic necrotizing factor proteins as well as related dermonecrotic toxin (DNT) from Bordetella species. Cytotoxic necrotizing factor 1 (CNF1) causes necrosis of rabbit skin and re-organisation of the actin cytoskeleton in cultured cells [1]. Bordetella dermonecrotic toxin (DNT) stimulates the assembly of actin stress fibres and focal adhesions by deamidating or polyaminating Gln63 of the small GTPase Rho. DNT is an A-B toxin which is composed of an N-terminal receptor-binding (B) domain and a C-terminal enzymatically active (A) domain [2]. 25.00 25.00 131.30 130.50 18.00 15.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.69 0.70 -5.50 10 55 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 44 2 6 44 0 278.90 37 34.56 CHANGED slpphushs...hppLspGsIslLKG+Glluu+cspp..............Plshhlshl+aDsss.h.t.........pupDpshaG.lhuhsGssltsopsu.....PuoslGpaasptsLosNlsll+VsNGsRGssGl+IsLs-lpcupPlllTuGsLSGCTolsApKcshlYtaHsGps.sstusWhTup-GVpplhcstpsLupsssPslssspNNs.LV-aLucsFDpulIsYsG+sccssut.t........cNVslFDYsts.hscpstsRlGsuhsLlo.ssssslsVpoLuEDhslN..ususchsVL.ss.....h.Ksh ..............phupps...hptLs+GsIslLKGRGsluup+pps..............sl.FhIphsRaDp................puhDphhas.hhuhsGhshhss.su.....suohhupaacchsLospssII+VsNusRGssGIKIsLc-VpcupPlIlTuGsLSGCTTlsApKcuYlYthHTGpo.pshusa.TussGVpphhcshchLstps..hspltshhssD.LVsYhScsF-culIsYsuppcpssupht........cNVslFsY..h.hpshs.hthGsuhTlls.pssGslslpoLuEshulN..usssphslLps................ 0 1 1 3 +5614 PF05786 Cnd2 Barren; Condensin complex subunit 2 Moxon SJ anon Pfam-B_7477 (release 8.0) Family This family consists of several Barren protein homologues from several eukaryotic organisms. In Drosophila Barren (barr) is required for sister-chromatid segregation in mitosis. barr encodes a novel protein that is present in proliferating cells and has homologues in yeast and human. Mitotic defects in barr embryos become apparent during cycle 16, resulting in a loss of PNS and CNS neurons. Centromeres move apart at the metaphase-anaphase transition and Cyclin B is degraded, but sister chromatids remain connected, resulting in chromatin bridging. Barren protein localises to chromatin throughout mitosis. Colocalisation and biochemical experiments indicate that Barren associates with Topoisomerase II throughout mitosis and alters the activity of Topoisomerase II. It has been suggested that this association is required for proper chromosomal segregation by facilitating the decatenation of chromatids at anaphase [1]. This family forms one of the three non-structural maintenance of chromosomes (SMC) subunits of the mitotic condensation complex along with Cnd1 and Cnd3 [4]. 21.70 21.70 22.10 21.70 20.80 21.60 hmmbuild -o /dev/null HMM SEED 725 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -13.54 0.70 -6.02 15 462 2009-12-23 17:17:02 2003-04-07 12:59:11 9 6 285 0 333 491 3 510.80 21 88.67 CHANGED psp.t+psstusptchsoshp........lshNDDptE+ppR...........................RRhosh-ttsopsuuss.u...........st.pshhshhps.pIhpsacpClKLuo-NKIsscNuWshsLIDahsDl...ppK-u-hsNFQhAusTLDAusKIYusRVDSVas-shKlLuGL......u..p..pcspcpss-G..ss-sc-s-utthtpcsspcKs.++K..RsstuTls.shsslslp+h-hchsVDPLFpKsuAsF..DEsuupGLhLNpL.ulDupuRl.....lFDSs-sst.............................ppupsschssssplDhosLt..................................hs.Lspl-cpsICPSLssap..hs-sst..s..Fsctsp-hcs-sht..ttt...............shs-ctp.tshuFDlsu-sts.....................stusuG-ttphcstpEsh.cspcspccsspht-s.tssD....hshsLst........s.hSYFs.sphppsWAGP-HWKaR+h+.................pscsssposut+pKKc.KcshclDFsssl-....-hhFp.ssusoslohsphpppscspsh.LPsDhHasscpLl+LaLKPps+hsp............hs.cpcssspsss-hD-h.asNpp.....................tsDssshhPsh.utssD.-DDstsFsDssshh..ps.st..t.sutttstt.t.shhshhtsplhusP....................++Vphl.VsYA+sAK+lDh++LKpshWphlscph.sssp.ts..................................ctp-psppssptpFoplhpsLtshhP.sphtcslSsShuFlCLLHLANE+sLpLpss-s........L-DlhIpps ..........................................................................................................................................t......................................................................................................................................................................................................t.hpph.hch..scN.KIsttNsa....s....ht.LIDhhtp..h........................c........p....t....p....hN............F...ph.....A.........usoL-u....ssKIYs.RVDulhs-sh+llusL................................s..........t..t.............................t....t..t...t.................t.........................................t..t.........t.............t.........................t.........p...t.................t..T.l..phttlp..pph..-.........t.....h....DPhFtph..........sth..D.susttl.hhs.L...............t.pl.......hhsst........................................................................................................................h.............................................................t..l.....htth............................................................................................................................................................................................h.......................................................................................................................................................................................................................................Wt.u.......t.....aWpht..................................................................................p..p.t..............h.pa.........t......................h...h.t....................................................................................h..............................................................................hP......c.hth..t.h.ph.......hst..h.........................................................................t...........tt............................................................................................................................t......................................................................................................................h....s...............................................h..t.....l.aschu..KphDh+tLKpthap.ht.........................................................................................................................h.hpplhpt.l.....p..hs..............hpp.......lS..hhFhslLHLANEptL.l.....t...........tp............................................................................................................................................................................................................................................... 0 132 200 290 +5615 PF05787 DUF839 Bacterial protein of unknown function (DUF839) Moxon SJ anon Pfam-B_7480 (release 8.0) Domain This family consists of several bacterial proteins of unknown function that contain a predicted beta-propeller repeats. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 524 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.90 0.70 -5.57 15 1732 2012-10-05 17:30:42 2003-04-07 12:59:11 8 25 946 0 506 2061 586 374.30 27 79.74 CHANGED hsauslssssD.....chlsVPcGYchcllspWGD.lhss.Pth......s..tptt.hGhNpDGMuhFsl.......................sps.....+s.LLlsNHEYsssslhh.......................tstshstcpVptthAAtGsTllpl.c...ttta.hs.suthNRR...........................lptso.h.l..sGPsuGppll+ot...........ts.spGsplhGThsNCAGGsTPWGTaLTCEENhss...hFus....................pphG.....sth...........pFs...............sccaGaVVElDPhcP..posPhK+TALGRFpHEusslh..csuR.sVlYhGDD...cpssahY+FVsscphpsts.....ttspsLh-sGsLpVA+hsss....................GsscWlsLs.s............................................................ps....st..htsts-lhlpsRhAA.ctlGAT+McRPEslt.ps..h.....cVYhshTNss.tp......................h.....ssuNsRscNtaGpIh+ah.sss.....phsstp.pW-LalhuGsstssp.........s.stsshhssPDNlshDstGcLWlsTDGssst.t.......................huts.G......h.sts..pup.chhh.ssPtsuEhsGssFoPDucThFVslQHPG ..................................................................................................................................................h...............................................................................................................................................................................................................................................................................................................................................................................................................hhG.hh.sCus.u.o.PWsThlosEENhss............hF..s.st..tt............................t.p+.h...G.h....t...h....................................................................ca.shs..................spspcaG..allE..l..D....P..........hss......p....us.......s........h.......K+TA..LGRFpHEsst.h.h.............t.....s....sp......hlsYh.GDD..........pp.p.alYKFVuptthp.t.................tts.ph......L......s......pGsLYV..A+.a.s.ss........................................................GsspWl.Ls..s..............................................................................................hp...................tss.....h..s..tspl...h..l..s.RhAA.shhGA.....T........hcRsE.ht.p..st..................phahshop.t..........................................................................................Gt.l.h..t..h............t...................................t....h...............h......h.........s.................................................hssPDsh...h.h....s...............h.......l.h.l..pD.t............................................................................................................................................................................................................................................................ 0 144 336 453 +5616 PF05788 Orbi_VP1 Orbivirus_VP1; Orbivirus RNA-dependent RNA polymerase (VP1) Moxon SJ anon Pfam-B_7493 (release 8.0) Family This family consists of the RNA-dependent RNA polymerase protein VP1 from the Orbiviruses. VP1 may have both enzymatic and structural roles in the virus life cycle [1]. 20.00 20.00 20.30 32.30 17.50 19.90 hmmbuild -o /dev/null HMM SEED 1301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.07 0.70 -14.19 0.70 -7.37 6 192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 69 0 1 156 0 507.60 51 99.54 CHANGED VQsApLl+RulpRlhpGI+hctscutshYYKaS-phR+hRcK+GspYK+-s-.hE+phchptcpLYGLPVl+EuSWc-lhs...hpphppssLclalcSlLchp-L-PEEEFLRNY+Vp......cphpshp-FVEpRA+sEhQlaGDlslKtWsuhLhElupphKHpPLGLpVMucFVpRFGpPFHQNsRDLSQlcDashsYooPLLFEMCssESllEFNhhhRM+EEsIppLEFGsp+lsPhcLlREhFllCLPHPKKINNhLRAPYSWFVKhWGVGucclhVLpStGGDDRNSK-VhYssF++hpNhYu.slh+SpFYpco+cpNhpKlcEtlpYSQ-LGsHshshPlF.+ML+sVYpTsFsPpchSHlILASahLSIQTITGYGRAWVKNsuoDhEKQhKPssuNhlsRVsDhT+N.FIQAYpEAcc+GE-IVpPEDMYTShLRLA+NTSSGFSTplpVhK+YGPshc.+cpchI+IsSRIKALVIFpcGHcIFssEpLcKKYNTVE.YQoKGSRDVPIKATRhIYSINLSVLlPQLIVTLPLNEYFu+lGGSTpP-s++lGGKIIVGDLEATGSRVhDAADTFRNSuD.sIhTIAIDYSEYDTHLT.aNFRpGMlpGIRcAMp+YpcLRYEGaTL-ElIEaGYGEGRVssTLWNGKR+V.hKhsh-cYhpLsEp-Rl...cGsFK.sPhGVhPl+slclsp+lplc.Dsc-hlLVuPsDGSDLAh..lsTHLSG.ENoTLlhNShHNLAIGTlIQEElp+cssslloFhSEQYVGDDTLFYTpLpspcscthDpllcTIF-TIt+sGHEASsSKThhsPFSVEKTQTHAKQGlYIPQDRMMlISSERRKDIEDVpGYlRSQVQThlTKlSRGFSH-LAplILMhKToFlGaRKhKRTIhEsGtYRDR+FDSscEDGFTLhhlRsPLsLYlPV-WNGaGAHPuALNlVMTEEhFlDSlhhuphc-hhtPlh+Ihsss.PsWNETpuDKRtlsTcTKMSFFSKMARPsVpoVLsssElhctVcpLPLG-FSPspIS+TMMHSALLKEspARoLLoPsYEl-YQKplNsWlp.psshphpup-hpISTsYsKlFcVhFEsshtpuhhh..FPD.NLSPp.Fa.QKhhlGsRhSsRsRhSY.VDRIDSILRuDVVMRG.......FITuNpIlslLEclGhsHSAuDLshlFolMNl-s+VAE+LupYIsp-KIRFDAhKL.K+GICGDEFSMSLDVCTQsMlDcal+aP+pLTKTEhDAVsLYsSQllMLRAApGlshp+hclsVosEcK++aKVRtsRFpoHlPKh+hlK+L.hshcRLSuRhlpNQFV ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................LaYscLpopctphhsphhpsIFsTVtKCGHEASsSKThhsPaSV................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 1 1 1 +5617 PF05789 Baculo_VP1054 Baculovirus VP1054 protein Moxon SJ anon Pfam-B_7511 (release 8.0) Family This family consists of several VP1054 proteins from the Baculoviruses. VP1054 is a virus structural protein required for nucleocapsid assembly [1]. 20.40 20.40 49.70 49.00 20.20 19.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.26 29 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 57 0 0 56 0 332.90 39 96.61 CHANGED phspsso.+.ss.a+Pl+h.s+.pQC.hHPpRANC+lh+hhs..........c.........................pshh.HhThl.sshahshsscPYYphLlpspt...shtt.httahNAsphhshlplc.......sssEcahuI-EAGEpNhshl+hVlKslhchlss.s...............schYl.lhhcchalDhlYSph+sl.lLPQcMYslappp...................ptP..hsphhphholPps-c.........uhpSQ.IY+oFLlYNTVLTMhLKQpNPFN-..psKsIShIlRsLGpCP.....sN+sRlKsC-LpaGGs.sP..GHlMCPP+-MVK+Ia+YAKWshsPNNY+RYacLIsc.sshps......................t..shhLhDWhsFlssFpsYFh ....................hspCsS.K.sP.a+Pl+h.s+.sQChlHPp.RANC+lh+phs..........s..ps....................pshh.HhThl.sshahshsscPYYptLLpssp.....................phtt..RttalNAs.phhshVplc.......sssEcFhuIDEAGE+Nhssl+hVl+slhchhss.s...............schal.LhlDchalDllYStaRsl.lLPQchhslhpcp......................ptP..hsphhphhsVPso--.........uhpSQhIY+oFLlYNTVLTMlLKQsNPFNs..psKsISlIlRsLG+CP.....sNK-RlKCC-LpY...GGs.sP..GHlMCPPREMVK+IFHYAKWscsPNNY+RYa-LIsc.ss.ps.th..........s.tt..t.ts..sLhLhDW.NFls-FpsYFh............................................ 0 0 0 0 +5618 PF05790 C2-set CD2; Immunoglobulin C2-set domain Bateman A, Finn RD, Moxon SJ anon Bateman A Domain \N 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.70 0.72 -4.01 27 320 2012-10-03 02:52:13 2003-04-07 12:59:11 10 44 68 56 79 345 1 77.80 26 27.95 CHANGED losssss.ssLss-sLphs....s.hhp.plphcp.ppKs.cspphhhshp...p-sG.sapChsp......sts.pphpsshpl.shs .....hosssts.plLss-sLsho..h.t..s....s.hh.p.plp..hcs....stKs....h.....cspphhhshp..hp-sG.sapCpsp.........sps.pphpsphpl.s......... 0 5 5 12 +5619 PF05791 Bacillus_HBL Bacillus haemolytic enterotoxin (HBL) Moxon SJ anon Pfam-B_7539 (release 8.0) Family This family consists of several Bacillus haemolytic enterotoxins (HblC, HblD, HblA, NheA, and NheB) which can cause food poisoning in humans [1]. 25.00 25.00 25.00 25.80 24.90 24.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.05 0.71 -4.92 13 739 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 144 1 36 537 0 178.80 32 47.98 CHANGED pNhSLu..spshpculcptGSpshlhssYuhhIlpQsslslsslo..sh.sph.tslhpcQcsA+sNAppWlDphKPpllpssQsIlsYsTpFpsYYssLlpsl.....-ppD+sshpsslscLpspIppNQcsscpllp-LpsF+splscDopsFpscs......splpsILpuss.AsIstLcpcIpshpssIpps.s .......................................phths..stthppshpphGuphhhhphYuhhllppPslshpslo..p..sth.tslhpcQc.A+tsAppWlsph.KPQLlc.TspsIlsYs.....T.....p.....F.....psYYsoLlpsl...............sppDpssLpc....uls.cL....p....s....p....IppNQppsppll....p-Lpca+s.plspDspsFpscs......splpuILtups.usIstLppcIpphpssIptp..p........ 0 14 21 23 +5620 PF05792 Candida_ALS Candida agglutinin-like (ALS) Moxon SJ anon Pfam-B_7578 (release 8.0) Repeat This family consists of several agglutinin-like proteins from different Candida species. ALS genes of Candida albicans encode a family of cell-surface glycoproteins with a three-domain structure. Each Als protein has a relatively conserved N-terminal domain, a central domain consisting of a tandemly repeated motif of variable number, and a serine-threonine-rich C-terminal domain that is relatively variable across the family. The ALS family exhibits several types of variability that indicate the importance of considering strain and allelic differences when studying ALS genes and their encoded proteins [1]. Fungal adhesins, which include sexual agglutinins, virulence factors, and flocculins, are surface proteins that mediate cell-cell and cell-environment interactions. It is possible that both the serine/threonine-rich domain and the cysteine residues in the C-terminal and DIPSY Pfam:PF11763 participate in anchoring the terminal domains inside the wall, so that only the inner part of Map4p, including the repeat region, is sticking out as a fold-back loop then able to act in adhesing [3]. 25.00 11.00 25.00 11.00 24.50 10.90 hmmbuild --amino -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.61 0.72 -3.82 189 1619 2010-01-07 11:49:09 2003-04-07 12:59:11 8 56 22 0 1025 1644 0 32.10 46 32.08 CHANGED sThT.phWoGohsoTsTh...oss.....PGtTsTVl..Vp.lP .................sTsT..pa..Wo....t.S.h.sTToTl...Tss.......PG.u.TcTVl...lc..P..... 0 289 393 1025 +5621 PF05793 TFIIF_alpha TFIIF-alpha; Transcription initiation factor IIF, alpha subunit (TFIIF-alpha) Moxon SJ anon Pfam-B_7586 (release 8.0) Family Transcription initiation factor IIF, alpha subunit (TFIIF-alpha) or RNA polymerase II-associating protein 74 (RAP74) is the large subunit of transcription factor IIF (TFIIF), which is essential for accurate initiation and stimulates elongation by RNA polymerase II [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 528 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -13.00 0.70 -6.13 8 332 2009-01-15 18:05:59 2003-04-07 12:59:11 7 11 190 9 210 310 2 299.90 25 69.07 CHANGED uut...suos.spslpEYsVRVspsscK.+YalMR.......FNupcsV-hspWs...tsphER-.s.+.hht.pphh.-hGtGSEas+stREEuRRK+aGh..+ca+.-sQPWlLchst.c-G..+pF+Gl+EGGsoEpAoYYlFs+ss-uu.FcAaPlspWYNFsPlApYKsLThEEAEEcappR+KThNtFslhhhKthpss.s.hu-csE-c-t....pKuGsG+u+.....L+I+DhpsD--.-ussS-pu-ED--Etts.+Kc......s+hAKN+Kp..sDpK+p+RsuD-...-sh-.DS-DGD-EG+EhDYhSDsuusus-.-..........................E+E-hhusEcssKt-.cQs-cs-EcE-EcsEc-ut.SKctKKsKK.psKpsch..c-Sp-s-sSD-SDsDs-susshhpspKpKc.t+-EslsSusSusssStPupPossstsopuKRKhs...........spsSpsPsSsssKKlKhEssPpSs.tKuhPso.St................ssossuu-hslTEEsVRRYLh.RKPhTTpELLsKF+sKpssLSoEcsV.....sshApILK.+IsPlpKshpsphahsL+p .........................................................t......................................................................................................................................................................................c.t..PWhlc......c.s...........tta.G..hcu.u.s...........ssahl.hh.......st.....t...hphhPhppa..YpFp.htpapsLohEEAEptht..++pp....s.t..h.hh..+th..t..tt........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 63 99 165 +5622 PF05794 Tcp11 T-complex protein 11 Moxon SJ anon Pfam-B_7604 (release 8.0) Family This family consists of several eukaryotic T-complex protein 11 (Tcp11) related sequences. Tcp11 is only expressed in fertile adult mammalian testes and is thought to be important in sperm function and fertility [1,2,3]. The family also contains the yeast Sok1 protein which is known to suppress cyclic AMP-dependent protein kinase mutants [4]. 20.40 20.40 21.80 20.70 20.00 20.30 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.33 0.70 -5.77 30 596 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 254 0 373 557 18 379.30 23 61.79 CHANGED pLtH-Ishs.shph+...Ps.su.cu+hKcphpcsaW-tlppplstt....................sPpasphlpLhppl+-hLhSLL...ss+h+spIsElLDh.-LlpQQhc.+GulD...hsphupalhslhtphCAPhRD-tlpchpppl...................pclVptLRtIFplL-hMKlDhANapIpshRPhLhcso.V-YE+ctFpphl...p+sssslssopcWhpcstscthpphpptst.............................................t.ss.ssh.llhpuhlcL..lhptptspsFPETLhhDpsRLtpL+splppLsslusslllhpshlssthpss.........sphhs+lppplhsLlpt......sss+hscslpsIu.plspp............htpshsppstsslssshpsslputLtphhpss.ssVhplhcp+lhphlhstlh...............................ssspcshsssssshs.ht.............................ElpplupphspLssashpVaGs.aYhch..........lpph ............................................lhH-l.hs.phphc......s...su.............psch+chht.....psaWctlppplptt..............................................................sph.s..phltL...h..p..pl+-....hLh.s.hl...............................sch.+...sp.....IpEsLDh.-ll.pQphc...pG.shc........................httlspalhsh...htp.hCAPhRDpt.lpphtp.h.........................................t.phVp..hl....+tlh....plLchMK....h...........DhsNatl.....p.hp.PhL..hppu...lpYEpptFpchl........................ppts.......sL...shoppWhppshpph.ht.ph..pts..............................................................................................s..sh..hlh..puhlpL............h..s......t......ct..h.P.E.TlhhDptRltplptphpplh.hhusshLlhpshht.t..h..s..............................t.hp..plt.phhhs....llpt.......................p....p.pp......sh.sls..ph..t.......................................................tsh..t.t..t....s...sh....s..p.p......t....h...........s.l.....t.p...hp...p......s.slhplhpp+lhhhl.shlh.................................................................................t.ttt.....h.shsh.......................................................................clttlst.thhp.lhthp.tVaus..hY.th....h.................................................................................................................................... 1 87 166 269 +5623 PF05795 Plasmodium_Vir Plasmodium vivax Vir protein Moxon SJ anon Pfam-B_7631 (release 8.0) Family This family consists of several Vir proteins specific to Plasmodium vivax. The vir genes are present at about 600-1,000 copies per haploid genome and encode proteins that are immunovariant in natural infections, indicating that they may have a functional role in establishing chronic infection through antigenic variation [1]. 25.50 25.50 25.80 25.70 25.20 25.40 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.73 0.70 -5.25 125 471 2012-10-01 19:45:41 2003-04-07 12:59:11 6 4 6 0 383 877 0 296.30 13 89.84 CHANGED Lsutch.........Ycchsp........p.tp.........p.hsshCp.phpph.........pplp....clCpKlhctLc..hlsptp.pppct............ChalpYWlY-pltphhp......tp..p.h..hhsclhplhpph.ppth...........p.pt..............................................Cp..hphh....hcp..hcccKpLaDYacsacsIcpphpspspp..C..pp.Yh....................pYlpphppLYcc..a....ppp..Cspttp.................shCscahcp..ppcY.sPpslLspL.pCt.t.......................................tpttt...t...............tttht...t..t..........t.t...t..............................................................................................tt.....tth.....ss.hhshh........hh.hh.hhh.p.............................................htp.hppthtppppthpp.httphth.h.t.tstt...t........................pph.ls....Ypss .......................................................................................................................................................................h....t......h.................th..........phh.p..phhp.hp.........htp.......ttp....................C..ahpYWl..hsplhphht.p.................hhpphhp..hhpph.p..pt.................p..pp..............................................C.p....h.ph........hsp...hcchKtLa-Yh.p.s.ap.pl...p..p.t.....h....p..stp.......pt....C....pp.ah..................................pYlpthtp..h.Ypp....h..........ppp.....Cspttp.................thC.pa...h.ph................pph..ps...p...plltpl....pstt..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hh....h.h.p............................................................................................................................................................................................................................................................................................................................................................. 0 0 1 300 +5624 PF05796 Chordopox_G2 Chordopoxvirus protein G2 Moxon SJ anon Pfam-B_7672 (release 8.0) Family This family consists of several Chordopoxvirus isatin-beta-thiosemicarbazone dependent protein (protein G2) sequences. Inactivation of the gene coding for this protein renders the virus dependent upon isatin-beta-thiosemicarbazone (IBT) for growth [1]. 25.00 25.00 126.20 126.00 18.50 18.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.37 0.70 -5.21 12 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 48 0 213.00 60 96.15 CHANGED MsFR-LILFaLuKaLlTcsctuh+phlSLCRuFslchpcllscFhsp+hh+plopslppss.lLsElslsFPssll+-LlpLRLp+FsKslKtSh+LssshpGIsll.cspplhlhpsNDpLlsaLh+cYsPplYpYs.........stPsshsGu..............+lllCGhsploFasYhho.+IsoNpclcVlVT-pCIppLLp.tNtpLLcplF..c+usssls+sL+cIF....YSl MPFRDLILFNLSKFLLTEDEESLElVSSLCRGFEISY-DLIoYFPDRKYHKaIsKVF.EHsD.LoEELSMEFpDTTLRDLVYLRLYKYSKsIRPCYKLu-NhKGIVVI.KDRNIYIREANDDLIEYLLKEYTPQIYTYSs.........E+ssIsGS......................KLILCGFSpVTFMAYTTS.HITTNKKVDVlVoKKCIDcLlDPINYQILpNLF..DKGSGTINKILRKIFYSV... 0 0 0 0 +5625 PF05797 Rep_4 Yeast_TAF; Yeast trans-acting factor (REP1/REP2) Moxon SJ anon Pfam-B_7680 (release 8.0) Family This family consists of the yeast trans-acting factor B and C (REP1 and 2) proteins. The yeast plasmid stability system consists of two plasmid-coded proteins, Rep1 and Rep2, and a cis-acting locus, STB. The Rep proteins show both self- and cross-interactions in vivo and in vitro, and bind to the STB DNA with assistance from host factor(s). Within the yeast nucleus, the Rep1 and Rep2 proteins tightly associate with STB-containing plasmids into well organised plasmid foci that form a cohesive unit in partitioning. It is generally accepted that the protein-protein and DNA-protein interactions engendered by the Rep-STB system are central to plasmid partitioning. Point mutations in Rep1 that knock out interaction with Rep2 or with STB simultaneously block the ability of these Rep1 variants to support plasmid stability [1]. 25.00 25.00 28.40 27.90 21.70 21.50 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.07 0.70 -5.74 6 16 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 15 0 5 17 0 267.80 26 83.62 CHANGED pss+Elhphlp-shhhhshhslYsPppshllTs+GohplP-sYpcYPhLAlaYVpYhlpK.Pasll.s-L..-WP-PYVVlNTIh+RLpsH+ah.uNp.tsshs-cl+phIusul-IPpsp..csEhLsp.tKs+phstph..phh-s.ss+pclpcFFscLp-hsthushasshoKlclhlSCpuhh.upu.clphhApslR+lWltEhlhshsspcpc..tshDp..ssh-Dsp.....sptppsuus.p.scAp...........................uaWchlcsLpcp.p.pspp.pphchlAshlhuppcthtshpppRcpsRshLYhpl+thL.pphphpha+Gspshps.plKlSL+hpcp ......................................cth..hh.pshh.h...hla..tpp.l.T.pGhh.lPcpYtpa.hLAhhaltahhpp.sh.hh.ppl..-WPcPhlVhNTlhcRh.pH..h..tph..sthhppl+thlu.slphs.pt.....hht..tp.h.h..th..phhcs..sppclptFFphLpshsthtp.a.s.oKhhlhhSspshh.tps.phphhtpslRp.hltEhl.phhppcpp...hh-p..sst-Dsp.....sptp.su.stpsspup...........................thhp.lssLpcp.pspspp.sphchluthlhup.pt.tphphpppph+shlYh.l+thl.tsphplchYRGsphpps.slKlSL+.pp..................... 0 1 3 4 +5626 PF05798 Phage_FRD3 Bacteriophage FRD3 protein Moxon SJ anon Pfam-B_7781 (release 8.0) Family This family consists of bacteriophage FRD3 proteins. 25.00 25.00 142.30 142.10 21.50 19.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.38 0.72 -3.66 4 35 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 31 0 0 15 0 75.30 80 99.28 CHANGED MAKVsIDlVDFEYhEElIRNRYPELSIsSlpDopFaolplslpGPLEcLppFMsNEYCDGMcsEDAcFYhGLIcp MAKVDIDIVDFEYIEEIIRNRYPELSITSlpDop.FhShQIVIEGPLEDLTRFMANEYCDGMDSEDAEFYMGLIEQ 0 0 0 0 +5628 PF05800 GvpO Gas vesicle synthesis protein GvpO Moxon SJ anon Pfam-B_8221 (release 8.0) Family This family consists of archaeal GvpO proteins which are required for gas vesicle synthesis [1]. The family also contains two related sequences from Streptomyces coelicolor. 21.00 21.00 21.00 21.60 19.00 20.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.01 0.72 -4.12 17 117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 92 0 50 129 1 98.40 33 85.36 CHANGED tppptt.ps.......sttssphhsht-shptAppshp-Lht+chEulsuspps-D.GWpl.VEVlEpptlPDTpDllupYElpLDssG-lhuYcRlcRYcRGchcp ...............................................tttt.................t..shhpshctAtpplscLhGcp...s-uVoulpps.......--...GWpltVEVlEh...p...RlP...c...Tssl.....LAp.....YEVpLDscGcLhuY+RlcRYpRGpsc.t... 0 17 39 48 +5629 PF05801 DUF840 Lagovirus protein of unknown function (DUF840) Moxon SJ anon Pfam-B_8265 (release 8.0) Family This family consists of several Lagovirus sequences of unknown function, largely from rabbit hemorrhagic disease virus. 25.00 25.00 213.00 212.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.40 0.71 -3.95 2 29 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 9 0 0 26 0 113.10 93 97.04 CHANGED MSEFlGLsLAGAusLSsALLRpQELtLQKQALEsGllLKAcQLupLGFNP.EVKsllV.GpuhspNhRLSNMHNDuSVVNuYsVhNPsSNGIRpKIKShNNSVKIYNTTGESss MSEFIGLGLAGAGVLSNALLRRQELQLQKQALENGLVLKADQLGRLGFNPNEVKNVIV.GNuFSSNVRLSNMHNDASVVNAYNVYNPASNGIRKKIKSLNNSVKIYNTTGESSV 0 0 0 0 +5630 PF05802 EspB Enterobacterial EspB protein Moxon SJ anon Pfam-B_8424 (release 8.0) Family EspB is a type-III-secreted pore-forming protein of enteropathogenic Escherichia coli (EPEC) which is essential for EPEC pathogenesis [1]. EspB is also found in Citrobacter rodentium [2]. 21.60 21.60 21.70 21.70 20.70 21.50 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.07 0.70 -5.48 3 178 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 139 0 5 79 2 257.00 53 97.58 CHANGED MNTIDYNNQVhsVNSVSESTTGSuSuTAo.s.SIsSSLLTDGKVDISKLMLEIQKLLGKMVTlLQDYQQ+QLAQSYQIQQAVFESQNKAIEEKKAAATAALVGGAISSVLGILGSFAAINSATKGASDIAQKAoSASSKAlsAASEVATKALVKATESVADAAEEASSsMQQAMATATKAASRTSGVADDVAsSAQKASQlAE-AADAAQ....KASRlSRFhAAVDKITGSTAFVAVTSLAEGTKTLPTTISESVKSNHEINEQRAKSVENFQQGNLELYKQDVRRoQDDIASRLRDMToAARDLsDLQNRMGQSuRLAG ................................................................sNss.psTTsusu.s.ss...u.sSSLLTDG+VDls+LhLElQKLLtKMVslLQDYpQcQLuQSYpIQhAVFESQNKAI-E....K.KAA....ATA...ALlGG..hISSsL....GI.LGS.F.........A......Ah...N..s...AsKG..Au-lAp...p...su.S....sSSKu.......hs...AAS-....susKsLsKAoESlADss--AoSsMQpshsTss+AASRsSsVsDDlA....+ASphAEphADAAp....Ksuhhs+hhsuss+losoTsFlsVTSLAEGTKTLPTTlSESVKSsH-lsEQRhKSlENhQtuNL-hYKQ-VRRsQDDIsoRLRDhTossRDLh-l.NRMGQusRLAG.................................................... 0 0 1 2 +5631 PF05803 Chordopox_L2 Chordopoxvirus L2 protein Moxon SJ anon Pfam-B_8620 (release 8.0) Family This family consists of several Chordopoxvirus L2 proteins. 20.80 20.80 22.80 22.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.00 0.72 -4.36 8 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 40 0 0 39 1 87.00 50 96.83 CHANGED AEVLhsKL+sIEp-NhhNEKllDCIIpE.IE+ppaalhRPhlRLllDllIllIVls.lhlRllKRNYplLLlhhhhYlhhchhtYhtl ...hEVIsDRLDDIVcQNlADEKFlDalI+t.LE+QsPuILRPllRLhIDlLLhVIVIhIFTlRLVsRNYphLLsLlslslsLslFhah..L...... 0 0 0 0 +5632 PF05804 KAP Kinesin-associated protein (KAP) Moxon SJ anon Pfam-B_8674 (release 8.0) Family This family consists of several eukaryotic kinesin-associated (KAP) proteins. Kinesins are intracellular multimeric transport motor proteins that move cellular cargo on microtubule tracks. It has been shown that the sea urchin KRP85/95 holoenzyme associates with a KAP115 non-motor protein, forming a heterotrimeric complex in vitro, called the Kinesin-II [1]. 19.40 19.40 19.40 19.50 19.30 19.30 hmmbuild -o /dev/null HMM SEED 708 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.38 0.70 -13.27 0.70 -6.61 3 298 2012-10-11 20:01:00 2003-04-07 12:59:11 7 19 178 0 190 401 17 448.30 28 74.69 CHANGED VKGGSIDVHPTEKALIVNYELEATILGEMGDPMLGERKECQKIIRLRSLNAsTDIAALAREVVEKCKLIHPSKLNEVEQLLYYLQNRKD.....................Spp+SGA+K+EpouKhK.................DPPPaEGsElDEVANINDIDEYIELLYEDlPEKVRGSALILQLARNPDNLEELppNETsLGALARVLREDWK+SVELATNIIYIFFCFSSFSQFHuLIsHYKIGALCMsVIDHELKRHEoWpEELsKKKK...........................ssEEcP.....................................ts...++DYEKSh.........................................KKY+GLlKKQEQLLRVAFYLLLNLAEDsKlELKMRNKNIV+MLVKALDRDNhELLILVVSFLKKLSIasENKNEMuEhsIVEKLsKLlPC-HEDLLNITLRLLLNLSFDTGLRsKMIplGhLPKLVtLLuNDNH+tIAlCVLYHlShDDKsKSMFTYTDCIPMlMKMllEsssERlDLELIALCINLAsNKRNAQLICEGNGL+hLM+RALKF+DPLLMKMIRNISQHDGPoKsQFIDYVGDLARIIops-DEEFVVECLGILANLTIPDLDYEhILpEFpLVPWIK-KLcPGAAEDDLVLEVVlhlGTVAsDDSCAALLAKSGII.SLIELLNAKQEDDEIVCQIIYVFYQMVFHEATREVIIKETQAPAYLIDLMHDKNsEIRKVCDNTLDIIAEaDEEWAKKIKLEKFRWHNSQWLEMVESQQhDDuEQhL.YGD-chE.....PYIpESDILDRPDL ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t........hs...h.hh.L..hNhA.s....p.c.ph..h.p..tt.hlth.....Ll.ch.l..p....p..p..s...h..-..lhh......slshlppLu..h.h...-.N.Ksph...........s..p...s..hltt.Ls+hh................s..p.p..........c........l..p....sh....t....hLhN.loas..s.th.Rp...phlp.sGhlP...hLs.thl.........s.....s..t..p..h..p..................h...s..h.....p.....h....Lhpluh.D.-.p........+..............t.hh.s...T.-..s..l.........................l.hph.................l...hp.s.......p...cl..p......h.p........h......h.hhhN.Lu.s.p.c.........hs...c......s.....pt.L....t.Lh.c...h.h.......p......t...........s....l...l...h...p...h...lRNl...S.....H.....................p.........h.h..p.............a.........lt...s............ls.t....l.t......t........c..p..........c...p....h.hcsluhLtNLsh.....t.........ph..hh........................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 77 116 158 +5633 PF05805 L6_membrane L6 membrane protein Moxon SJ anon Pfam-B_7771 (release 8.0) Family This family consists of several eukaryotic L6 membrane proteins. L6, IL-TMP, and TM4SF5 are cell surface proteins predicted to have four transmembrane domains. Previous sequence analysis led to their assignment as members of the tetraspanin superfamily it has now been found that that they are not significantly related to genuine tetraspanins, but instead constitute their own L6 family [1]. Several members of this family have been implicated in human cancer [2,3]. 21.80 21.80 64.80 59.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.36 0.71 -4.75 8 302 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 46 0 161 252 0 186.50 40 95.13 CHANGED MCosKCuRClGhSLlsLALhsIlANlL.LaFPNG-TpYsocs+LSpaVWaFuGIlGGGLhhLhPAhVhluhcpcsCCGCCupEshGKRsuMLoSlluullGllGuuYCllVuulGLs-GPhChs......s.................pstWsYsFss......Tp...GpYLhDsolW.SpCpEPpHlV.WNVoLFSILLsluuLEhlLCLIQVlNGlLGsLCGhCps ........MCotpCu+ClGhoLlsLulhsllsNlL.LhFPsGcsp..h..sp...ps......plop.VWahuGllGuGlhhlhPAhhhlshtp.pssst.C...httt..t.supR......h..sM...h.....tSllhuslGllGusYChllSuluLtpGPhChh......................................sttWtYsFps.......................pt....usYL....h...s....p...ohW.........s....pChEPtplV.WNloLFSlLLsluslphlLChlQllNGllGslCGsCp......................... 0 6 14 57 +5634 PF05806 Noggin Noggin Moxon SJ anon Pfam-B_7925 (release 8.0) Family This family consists of the eukaryotic Noggin proteins. Noggin is a glycoprotein that binds bone morphogenetic proteins (BMPs) selectively and, when added to osteoblasts, it opposes the effects of BMPs. It has been found that noggin arrests the differentiation of stromal cells, preventing cellular maturation [1]. 19.60 19.60 19.60 19.80 19.10 19.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.83 0.70 -5.05 6 133 2012-10-02 16:54:34 2003-04-07 12:59:11 7 3 68 1 66 126 1 199.30 36 87.85 CHANGED hllushhLLl.LslhhctusCQHYYhLRPlPS-sLPll-L+EcPDPlaDP+E+DLNETpL+slLGs.pFDssFMSltsP.c-ptuGs--Ls-.-L....p.sGhMPtEIKsL-F.Ds..hGKK+KsSKKL+R+LQhWLWuYoFCPVlYsWpDLGsRFWPRalKsGSCaSKRSCSVPEGMsCKPsKSsHLTlLRWRClp............................................................R+suhKCAWIPlQYPVIo-CKCSC .................................................................s...............................................h.........p...Ds......hs....P+.pcL.spphL+phLGs.paDstaMuhs.P...t................t......h...s...u........sp...pl.s...t....p........tsthsp.-.l+....sL..-.h...c...........h.....Gp..+.....+..lu+....Kh+R+.......lp.WLWs.....o..a.....CPVhYsWpDLGsRFWPRYlKhGsC.h..o...c...+..S........CS..hPc..G.MsCKP.....u..cSsplolLRW+.C....p..................................................................................................................t..p..ss..ppCsWI.lpYPlIo-C+CSC.............................................................. 0 19 27 47 +5636 PF05808 Podoplanin Podoplanin Moxon SJ anon Pfam-B_8548 (release 8.0) Family This family consists of several mammalian podoplanin like proteins which are thought to control specifically the unique shape of podocytes [1]. 29.00 29.00 29.20 29.60 28.90 28.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.03 0.71 -4.68 4 44 2009-09-11 20:54:06 2003-04-07 12:59:11 6 2 33 1 24 54 0 151.90 41 76.82 CHANGED MW+VPVLhaVLGSAhhWs.ApGuohsRs.......EDDlVTPGspDuhVTPGlEDplsTsGATct..ES.GhAPLVPsppEpsT+..hE-LPTstsosH-tcEppST........TTlpV.....VTSHSt-K.........su-ETpTTscKDGLuVVTLVGIIVGVLLAIGFlGGIIIVVh+KhSGRYSP .......................................................................................................p..hhhhl.Gs..h...h..u.ttushs.s........EDsh.TsGht.sshshsGh..EDphsTsusopc...pS.uh.os.LVso...sspos.Ts.h.ph..EDhsT.s-ST.sHupcpopSs........Tss.sV.....sTSH..Ss-K................sss-TpTT......l-.KDGLuTVTLVGIIVGVLLAIGFIGGIIlVVhRKMSGRYS.......... 0 4 6 8 +5638 PF05810 NinF NinF protein Moxon SJ anon Pfam-B_8528 (release 8.0) Family This family consists of several bacteriophage NinF proteins as well as related sequences from E. coli. 20.00 20.00 20.20 20.00 19.90 19.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.25 0.72 -4.17 4 198 2012-10-03 10:42:43 2003-04-07 12:59:11 7 3 178 0 2 84 1 53.30 71 91.73 CHANGED MlsP.QspuYEpESltRAL.CAsCuppLcs.ElHVCEcCsAE.Lh.pDsNusMtEE.DDE ............MlsP.QshpYppESV-RALTCANCGQKLHVLEVHVCEcCCAE.LM.SDPNSSMYEE.-D-.......... 0 0 0 1 +5639 PF05811 DUF842 Eukaryotic protein of unknown function (DUF842) Moxon SJ anon Pfam-B_7096 (release 8.0) Family This family consists of a number of conserved eukaryotic proteins of unknown function. The sequences carry three sets of CxxxC motifs, which might suggest a type of zinc-finger formation. 23.80 23.80 24.00 23.80 22.60 23.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.28 0.71 -4.69 17 215 2009-09-11 00:15:37 2003-04-07 12:59:11 8 5 139 0 146 221 2 122.90 32 78.96 CHANGED cppppclpsAlpphlsslpcphlhp...........hQtphF+CuscChpDpssoh-plppClEpCpsPltcAQphlpsElupFQsRLpRChhsCpDchcsth..p.s...ctchsp...phEpCsspClDcplsh.LPshhpph+csL .....................................pttclptulpphlpslp+phl+p........................hQ...tthacCu.ApCCp....D.pps.......ohcpVppClE+CpsP.ltpA.QshlpsELppFQ...........sRL..pRCshpCpD+hcsp...hsss..s.p......c.phtp.............................ph-sCsspClDcahpl.lPshhcch+ps.............................. 0 47 72 111 +5640 PF05812 Herpes_BLRF2 Herpesvirus BLRF2 protein Moxon SJ anon Pfam-B_7251 (release 8.0) Family This family consists of several Herpesvirus BLRF2 proteins. 25.10 25.10 25.30 35.60 25.00 25.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.57 0.71 -4.29 15 36 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 23 6 0 31 0 118.10 37 90.26 CHANGED sshohE-LsscLs+LchENKsLK+plppuss............Ps-chLTssQKEslIsusls+LoupApcKIEt+VcppssslVT+pph-cslpslolRlcVShc-.tht..............sps+pRRu+S+o+ .s.shohE-Lstclp+LplENKsLK+pltpuss............ssDphLTsspKEuhIsussstLsutAt+KIEt+V+ppsstlVT+pphpssLtslolRl-VSh--stht............t.sps+pRRutS+oR..... 0 0 0 0 +5641 PF05813 Orthopox_F7 Orthopoxvirus F7 protein Moxon SJ anon Pfam-B_7318 (release 8.0) Family \N 25.00 25.00 36.50 36.50 24.60 18.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.93 0.72 -4.09 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 19 0 0 28 0 73.10 85 94.88 CHANGED MTLVMGSCCGRFCDAKNKNKNKKEDVEEGGEGYYDYKNLNDLDEFpTRVEFGPLYMINEEKSDINTLDIKRRYRHAIESVYF ..MTLVMGSCCGRFCD...AKNKN......K..KEDlEEttEGCYsYKNLNDLDEu.sRlEFGPLYMINEEKSDINTLDIKRRYRHsIESVYF........ 0 0 0 0 +5642 PF05814 DUF843 Baculovirus protein of unknown function (DUF843) Moxon SJ anon Pfam-B_7353 (release 8.0) Family This family consists of several Baculovirus proteins of around 85 residues long with no known function. 22.50 22.50 22.60 22.50 22.10 22.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.81 0.72 -4.19 15 82 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 82 0 1 50 0 82.30 40 94.80 CHANGED MhlhhshlsLIlluFlhsKspshSpLllhlLlLFVlFlhlLplYYl..ps-Ss.stDL.TccsK+lKKKpcLpcAFDAILNKNsSS ...............M.IhhhlLuLlslGhlhs+hcuhssLllhlLlhhllFhllLpl..a..hs..KT-Ss.spcl...o.p.K.....sKpsKKKRplppthDAllNKNpSS........ 0 1 1 1 +5643 PF05815 DUF844 Baculovirus protein of unknown function (DUF844) Moxon SJ anon Pfam-B_7453 (release 8.0) Family This family consists of several Baculovirus sequences of between 350 and 380 residues long. The family has no known function. 25.00 25.00 29.60 29.50 20.60 19.70 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.99 0.70 -5.60 22 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 52 0 0 57 0 349.20 38 96.85 CHANGED MosltLFLcIEpLKNpl.DppMphsIWsKFFPLLu-s...solsLshspl.-FLsssAph.upsshsppNAAlsSQa..............sssssssssstss.tpslhNlass....sstsssssssshshppY+psspKllpYYo.usToSo-F+VpDlVtsMlYLu+oP+Y+PLapLLEssh.pc-h-ChPsloscphpsll-hLRsLhshsohplDapslclh+sohs+shN.PlsRaP+Vhlhpsts....lspDKcsol--Lll-Rh-tlppLcsQphlsu...ss+IPaCsDsphIscLl+h.hcsaslsRMaYNAsNoIFYsTMENYAssNCKFslsDYNpIF+shDpl+Ehspp......hppsttuDsLslhLusss .MSsltLFLcI-pLKspI.DppMphsIWP+hFPLLuDs...sslsLshspl.pFLsssApt.upsshsssNAAlASQa...........ssssssssssssssstpsllNlFss........th.sspssssshshppaRpsspKllpaYoLssToSo-F+VpDlVhsMlYLu+oP+Y+PLapLLEssh.p--h-ChPshossphppllDhLRsLL-hPootlDasslclh+sohs+shN.PlsRas+lhlhpsss....lspDK+sTlEELllERuctIppLcPQQalsu...sscIPaCcDscFIscLlKh.hDsasLsRMaYNAANSlFYTTMENYAluNCKFslcDYNpIFKl.hDslR-hssp......hhtp..sppsDsLNlaLusts............ 0 0 0 0 +5644 PF05816 TelA Toxic anion resistance protein (TelA) Moxon SJ anon Pfam-B_7534 (release 8.0) Family This family consists of several prokaryotic TelA like proteins. TelA and KlA are associated with tellurite resistance [1] and plasmid fertility inhibition [2]. 30.00 30.00 30.50 30.70 29.10 28.60 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.91 0.70 -5.73 81 1233 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 1042 0 200 815 46 325.10 30 86.17 CHANGED pspphupplD.ts.tslhs.aGspsppphupaSpphLsp.V+sp..............-suslGchLspLh....pplcph-sschtt...ppp.shlu+..hhs.+httplpchhs+YpsspspIDpIhtpLpptcspLh+DsthL-phappshpaappLshYItAGchthpclcpph..lsthppcsp....tpt..hthpphs-hpphlptL-pRlpDLphsctlulQshPpIRLlQpsNppLscKIpouhssTlPlaKsplslAlsLtcQ+psscshpsls-sTN-LLppNA-hL+psoh-hs+pspcuslDlETLcps.psllsTl--shpIpp-upppRppsppcLpphpp-l+ppLhphc ..................................htphsppls..tsp.psllsaGsssQ..pphupaSp.phLsc.Vpsp.................-.lu.slGctLspLh.......pplcph.....ss....s-..l........ppc.shhp+....lFp...+...s...c..pslpchhs+YQslssplD+IshpLp+ppspLh+DhthL-pLY-pNppaacsLshaIhA.......Gctph.......pc.......lpsch..lPthppcup......sssQ..hshpplschpphl-cL-pRlpDLphu+plulQosPQIRhIQpsNpsLs-KIpoulhsTIPlaKsphslAlsLh+..Q+pussutctls-sTN-LLppNA-hLKpsslEsA+pspcuhl-l-T.......Lccotpsl...lpslpEohpIppcG+ccRptucpcLtphpp-lKpcLhp..p........................ 0 65 133 162 +5645 PF05817 Ribophorin_II Oligosaccharyltransferase subunit Ribophorin II Moxon SJ anon Pfam-B_7633 (release 8.0) Family This family contains eukaryotic Ribophorin II (RPN2) proteins. The mammalian oligosaccharyltransferase (OST) is a protein complex that effects the cotranslational N-glycosylation of newly synthesised polypeptides, and is composed of the following proteins: ribophorins I and II (RI and RII), OST48, and Dadl, N33/IAP, OST4, STT3. The family also includes the SWP1 protein from yeast. In yeast the oligosaccharyltransferase complex is composed 7 or 8 subunits, SWP1, being one of them [2][3]. 21.30 21.30 21.60 21.30 21.20 20.80 hmmbuild -o /dev/null HMM SEED 637 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.78 0.70 -6.32 9 364 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 255 0 205 337 3 381.10 26 91.52 CHANGED us.u.tshhhlllLsLhssshA.hoso+aLoss-hsRhppshsp....shsDLcsAYYulhsLp.LGhpssD..tppsCchlcsslssSS...h-slFYAupuppsLu.Ccls..lss-scshLhAulp-DuolsQ.IYauVuuLssh.....GLsl...supplhpALp.splSK..........E-olhAshhAhpsAutLuptAs.................Lsshl-clEDhssphDElsGshLQFEsGLssTALhVsusa+LuspluspsslptEQllpLsshhhS+pshpohp-sFsllpAhusLSsN+aalPlllhhsGsutl.pcpslL+lpVTsVLupPLotAsVplspA.sSpoopssllppsshs...hcssl...............apLNhhsspPssGhYshslpl...cu-hRhlhsp.spLKVpV..oscVuIsss-lullDpDp.uhusKpp+VsaPsKscsshsADSpQphsLpFQLhDssoGtslsPHQsFV+L+NpcTtQEllFVAEPDSpphYKFELDsutRtp-.FsphSGpYsl.LIVGDAslENshLWNVAcl.LKFs-......c.ssushpupshatPKPEIpHlFRpPEKRPPphVSssFTALlluPLllLhlLWhKLGsNlSNFshSsu....sIlFHlGhuAhLsLhhlaWh+LNMFpTLKYLulLGshTFLsGNRhLuphAs+p ............................................................................................................................................................................................................................................................h.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.h.l.....s....t..s...t.....h.p.sHQ.sFlhl......t.......p...p..o.t.-h.a.s.h............p.p......t....t.....hp..hp...l.sh..tph.....p.h......s..s......hph...lll..Gs.ss...hp..s..shh.hpl.........spl.lph.st.............................t...s..s.s.h....t....s..has............p.EIpHlFRts..tKpPPthlS.sFs.......s.h.l.......lssh.hhLhhhW.hp.l..G.......s...N..l.s.p.h....shu......s....................sh.hFa.h.u....hu.uh.h....s....l....h...h...l...a...ahphshFpTL.hhhhlushshlsGs+hLtp.t............................................................... 0 69 112 167 +5646 PF05818 TraT Enterobacterial TraT complement resistance protein Moxon SJ anon Pfam-B_7686 (release 8.0) Family The traT gene is one of the F factor transfer genes and encodes an outer membrane protein which is involved in interactions between an Escherichia coli and its surroundings [1,2]. 28.30 28.30 28.60 29.40 27.90 28.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.37 0.70 -4.92 15 363 2009-09-13 11:01:34 2003-04-07 12:59:11 7 3 289 0 41 252 5 196.60 62 87.08 CHANGED sL-VpTKMSpTIFL-PVsssc+pVYlpl+NTS...D+sl.slcspltssLpu+G..Y+llssP-cApYhlQsNVLps-Kts.ss.upshLpsGatG...AsuGAAlGsuluuhs..SuuuGuslGhGLAG....GLlGhsusAhV-DlsYpMlTDlQIpE+scssVhsspt....puslKpG......................sSuspsQ.TpocsoshpcY+TRVVosANKsNLchEEApPlLEcpLA+sIAGIF .....................NL-VpTpMS-TIWL-P....uuc+TVaLQIKNTS.....D.K-h...s...LpuKIssAlpAKG..YpVVouPDcAaYWIQANVLKADKMDLRE..uQG.aLspGYEG...AAsGAA.LGAGIT..uYN...SsSAGA.TLGVG..LAu..........GLlGMAA.D.....AMVEDlNYTMITDVQI.....uERT..+..s.sVpTDN.....VAALRQG...........................TSGuKlQ..TSTETGNpHKYQTRVVSsANKVNLKFEEA+PlLEDQLAKSIANIL............................................................................................ 0 9 21 33 +5647 PF05819 NolX NolX protein Moxon SJ anon Pfam-B_7801 (release 8.0) Family This family consists of Rhizobium NolX and Xanthomonas HrpF proteins. The interaction between the plant pathogen Xanthomonas campestris pv. vesicatoria and its host plants is controlled by hrp genes (hypersensitive reaction and pathogenicity), which encode a type III protein secretion system. Among type III-secreted proteins are avirulence proteins, effectors involved in the induction of plant defence reactions. HrpF is dispensable for protein secretion but required for AvrBs3 recognition in planta, is thought to function as a translocator of effector proteins into the host cell [1]. NolX, a soybean cultivar specificity protein, is secreted by a type III secretion system (TTSS) and shows homology to HrpF of the plant pathogen Xanthomonas campestris pv. vesicatoria. It is not known whether NolX functions at the bacterium-plant interface or acts inside the host cell. NolX is expressed in planta only during the early stages of nodule development [2]. 25.00 25.00 238.20 32.40 20.40 20.00 hmmbuild -o /dev/null HMM SEED 624 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -12.97 0.70 -6.16 5 94 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 40 0 19 88 1 324.40 28 76.06 CHANGED psDSsLhSohDsL.hp.hapLhuulppssp............ssusuhsA.sssuusD.QusQP.p.tT........Fs.tcl+tscAPPshpGS.oVTWpGGTLosoELQIlusLNsHKDphslpatpLpDKINDPsTPPDLKSALQuLpKDPpLF.AIGSQGDG+hGGKIKutDLh-FucpH.QV............................lTWsuGTLspspLEIhShLspH+D.hPlcauplptKINDPuTPPph+tAlpuhpQsPt.hhAhss....................................................h.sPlpupplTWsGGoLopsELpIVAsLNRHKDhCPlpWpsLpsKspDPuhPPDLKAAlpuLQQDPcLFaAIGSQGD.G+CGGKITAKDLS+FS..cHHuQVApYs-pQAcuYsQNYIPSDSs-sscPSVMTENDAMRELYRYSDYLPKcLsh-sFKQIVDGDScTKKCPPQVIAAAQYFlsHP-EWKuL..huGsp-+VuKsDFLQ+ASSuMHLTpsELcTLcTINSHQ-sFFGDGp-lTRDKLAoMucD-SLDPAVR-AAoQLLuDPLLFGLLNNAITGYKT+HuFFsFGGGHTVDSGsISpKDFp+FYssMTuANKTVQpPKTHsAsSsApQcAVADMhMG+ADQPDIKusKKsGGAFp+ulc-hLKh.SKlhDhhSsulSALuuIPllG ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 1 8 10 +5648 PF05820 DUF845 Baculovirus protein of unknown function (DUF845) Moxon SJ anon Pfam-B_7739 (release 8.0) Family This family consists of several highly related Baculovirus proteins of unknown function. 21.20 21.20 24.30 23.20 18.50 15.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.61 0.71 -4.36 20 63 2009-09-11 05:36:17 2003-04-07 12:59:11 6 1 59 0 0 61 0 118.20 55 54.02 CHANGED lsRlKYDucLLl+YLFDstspt.............ssssNlIKICKV+V+KTsGolLAHYYA+IplSNGYsFEFHPGSQP+TFQslH...o-GplltlhlLCDECCKcEL+sFV-GENsFNlAF+NCEoILCKR ..........................sRlKYDu-LLl+YlFDshss..............spshNVIKlCKV+VKKTsGolLAHYYApIplSNGasFEFHPG..S.QP+TFQslH......oDGhlItlhlhCD-CCKcEL+palcGENsFNlAF+NCESILCKR..................... 0 0 0 0 +5649 PF05821 NDUF_B8 NDUFB8; NADH-ubiquinone oxidoreductase ASHI subunit (CI-ASHI or NDUFB8) Moxon SJ anon Pfam-B_7830 (release 8.0) Family This family consists of several eukaryotic NADH-ubiquinone oxidoreductase ASHI subunit (CI-ASHI) proteins. NADH:ubiquinone oxidoreductase (complex I) is an extremely complicated multiprotein complex located in the inner mitochondrial membrane. Its main function is the transport of electrons from NADH to ubiquinone, which is accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space. Human complex I appears to consist of 41 subunits [1]. 20.40 20.40 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.26 0.71 -4.62 10 254 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 204 0 156 248 0 145.40 30 83.01 CHANGED +ulhsupplsp..sssulhhpusRsAuGh.sKDhhPGPYP+TsEERtAAAKKYshRsEDYpPY.PDDGhGY.GDYPKLP.shohccRDPYYsWDaP-LRRNWGEPlHhDhDhYscsRls....ou.sshsWpoMshalhuFlGhM...lhhaahsEsaPsY....pPVusKQYPasshhh..hsDspK.P..VsHYsF ................................................t..........................................................-Ypsh..sD..-shth.GDYPplP.shs.....pppRDPahsWDcsp.RRNaGEPlH...hD...hD...h...a...t..s..phs....pss..s...h..s......hth..hh..h...shluhh.....hhhhhh.hp...h.........pPshs+paPht............................................................................................ 0 51 78 121 +5650 PF05822 UMPH-1 Pyrimidine 5'-nucleotidase (UMPH-1) Moxon SJ anon Pfam-B_7840 (release 8.0) Family This family consists of several eukaryotic pyrimidine 5'-nucleotidase proteins. P5'N-1, also known as uridine monophosphate hydrolase-1 (UMPH-1), is a member of a large functional group of enzymes, characterised by the ability to dephosphorylate nucleic acids. P5'N-1 catalyses the dephosphorylation of pyrimidine nucleoside monophosphates to the corresponding nucleosides. Deficiencies in this proteins function can lead to several different disorders in humans [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.47 0.70 -5.06 11 291 2012-10-03 04:19:28 2003-04-07 12:59:11 7 4 132 17 172 406 7 214.30 40 78.27 CHANGED hTLS+ath.NGcRsPTsasIh-ss.p.ls--s+pchppLac+YaPIEIDPpholEEKhPaMlEWWsKoHsLLhpttlp+scIsclV+cscstLRDGhcphFcpLpphslPshIFSAGlGDllEpllRQA.sVhasNlKVVSNaMpFD-sGhLsGFpu.lIHTaNKNsosLc.sopYacpl.............csRsNIILLGDSlGDlsMADGVs......sspsILKIGFLND+VEcpl-+YhcuaDIVLlcDpThDVssuILphIh ....................................................................hTLo+ath.sG.p+s.sosasllcss.thlsp....-hppchhpLhppYaP..IElDPphohcEKhshMhEWasKuHsLl.hpttlp+tpltphVt..c.u....s..hhLR-..G..hcphFppLpppslPlhIFS.AGlGD.l.l.Epll+Q..................t......ss..h...a..............sN.l+lVSNa...MpFs..-s............G..hL.p.......G.......F....cu..p.L.I..HsaN.K....p.p..ss.l....p....ss...t..aa..p..p..l...............p.s.+...sN.....l.lLLG....DShGDlpM..uDGl........sh.pplL+IGFLN..-c...l..-......c.......hcpYh...csaDIVLlpD.p..ohclsptllptl......................................................................... 2 63 82 126 +5651 PF05823 Gp-FAR-1 Nematode fatty acid retinoid binding protein (Gp-FAR-1) Moxon SJ anon Pfam-B_7852 (release 8.0) Family Parasitic nematodes produce at least two structurally novel classes of small helix-rich retinol- and fatty-acid-binding proteins that have no counterparts in their plant or animal hosts and thus represent potential targets for new nematicides. Gp-FAR-1 is a member of the nematode-specific fatty-acid- and retinol-binding (FAR) family of proteins but localises to the surface of the organism, placing it in a strategic position for interaction with the host. Gp-FAR-1 functions as a broad-spectrum retinol- and fatty-acid-binding protein, and it is thought that it is involved in the evasion of primary host plant defence systems [1]. 25.40 25.40 26.00 26.00 25.30 25.30 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.70 0.71 -4.26 11 100 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 26 1 68 82 0 146.00 26 76.31 CHANGED sIPpEaK-LIPpEVs-ahpslTsEEKssLKElh+s.apcaKsE-EhlsALKEKSPsLapKAcKLcshlKpKl-uL.ss-AKAFlpclIApuRpl+sphlsGpKPol-pLKphscshlscYKALos-AK--LpcpFPhlsphhpsEKhQslhsphLs ..........................tpaKphlPt-lhphhpslos--Kthl+...-lhps.....a......t.....p.....a..p.....sp--h.lssLKcKSPpLap+hpcl..pshlcpK.lssL..ss-u+tFlccl........ls..ps+.p.l.hsphhs...Gp.p..s......h....p.c...l....Kph......scphhspacuL..spp..sKp..-Lcps..FPtlsphhpscchp.................................. 0 33 42 68 +5652 PF05824 Pro-MCH Pro-melanin-concentrating hormone (Pro-MCH) Moxon SJ anon Pfam-B_7863 (release 8.0) Family This family consists of several mammalian pro-melanin-concentrating hormone (Pro-MCH) 1 and 2 proteins. Melanin-concentrating hormone (MCH) is a 19 amino acid cyclic peptide that was first isolated from the pituitary of teleost fish. It is produced from pro-MCH that encodes, in addition to MCH, NEI, and a putative peptide, NGE. In lower vertebrates, MCH acts to regulate skin colour by antagonising the melanin-dispersing actions of small alpha, Greek-melanocyte stimulating hormone (small alpha, Greek-MSH). In mammals, MCH serves as a neuropeptide and is found in many regions of the brain and especially the hypothalamus. It affects many types of behaviours such as appetite, sexual receptivity, aggression, and anxiety. MCH also stimulates the release of luteinising hormone [1]. 20.60 20.60 20.80 20.90 20.00 20.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.92 0.72 -4.49 2 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 45 0 30 57 0 76.60 70 52.87 CHANGED h.SppstpKHNFLNHGLsLNLsIKPYLALcGSVAFPAENGVQsTESTQEKREhGDEENSAKFPlGRRDFDhhp...s+hYpsh.pl .........................+ssGSKHNFLNH.GLPLNLAlK..PYLALKGSVA.FPAENGVQNTESTQEKREhGDEENSAKFPIGRR..D..FDMLRCMLGRVYRPCWQV.......... 0 3 5 14 +5653 PF05825 PSP94 Beta-microseminoprotein (PSP-94) Moxon SJ anon Pfam-B_7865 (release 8.0) Domain This family consists of the mammalian specific protein beta-microseminoprotein. Prostatic secretory protein of 94 amino acids (PSP94), also called beta-microseminoprotein, is a small, nonglycosylated protein, rich in cysteine residues. It was first isolated as a major protein from human seminal plasma [1]. The exact function of this protein is unknown. 22.00 22.00 22.40 22.00 21.90 21.90 hmmbuild --amino -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.48 0.72 -3.71 6 122 2012-10-01 23:42:56 2003-04-07 12:59:11 6 1 52 7 63 145 0 86.20 31 74.93 CHANGED uC.lI.pchlPs-pocECTDLKGNKHPLNShW+TcNCEhCoCccspIoCCThsupPVGYD+++CQ+IFpKEsCpYSVVEKpsPuKTCsVsuWlh ............................................ttChD.h.c.Gp.h.+.slsopWpocsC.pCoC.h.c.s.G.lsC...Csts.tpPlsasp.ppCptlhcpcsCpaplVcKp-PppsCt................................... 0 8 10 18 +5654 PF05826 Phospholip_A2_2 Phospholip_A2; Phospholipase A2 Moxon SJ anon Pfam-B_7918 (release 8.0) Family This family consists of several phospholipase A2 like proteins mostly from insects [1]. 20.20 20.20 20.20 20.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.62 0.72 -4.05 23 357 2009-09-11 05:31:23 2003-04-07 12:59:11 7 7 104 1 215 359 0 92.80 33 37.16 CHANGED lhPGTKWCGsGNhAss.YsDLGstpcsDpCCRsHDpCsphIsu...hps+aGL.pNsshaTh.+CcCDppFpsCLps.ssssh.........uphlGphYFslhps.CathppPp ...................................hPGThWCG.Gs...Ats..hp..-L.G..h...ttsDtCCR...p..H..D.p.Cs.p..h.Its...hps.caul.pNhpha.T.............hp.....HCsCDp.chtpCLpt...hssth.........uphlG.haFs.llph.CFtht..t.............................. 2 73 90 162 +5655 PF05827 ATP-synt_S1 Vacuolar ATP synthase subunit S1 (ATP6S1) Moxon SJ anon Pfam-B_8145 (release 8.0) Family This family consists of eukaryotic vacuolar ATP synthase subunit S1 proteins [1]. It also contains BIG1 ER integral membrane proteins which are involved in cell wall organisation and biogenesis [2]. 37.60 37.60 37.80 37.90 37.50 36.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.70 0.70 -5.09 31 258 2009-09-11 20:52:15 2003-04-07 12:59:11 7 4 174 0 154 240 0 286.90 23 83.89 CHANGED ssPhllhSspt....h...hssssuphpsssplhshhpphLusCsoctYlhlsQP.......GlpssDFsttps.........hspLpshhptussslthshl........s.lshppLtphlpcpCss.sthlsssstp..ph.th.....csRllhlphs.Lssst......ppRtphLpspDphltpllsplsSsp..YT..............llhso...t....................................httpssptslatt.hsshsts.......hch-hp..................p.........p..pshssp...................................................................spppsssph..hhtcaQ............................aaosGlahullsslhhlslLhhulphlhS ...........................................................................................................................tsPhhhhSspt...............s.ppup.ltos.pl.s..hlp.hLp...s.spsshlhl.Qs.......tlphpDFsthts..........hspLp..ptlt....u.s..sslhhshl........s.hs.hspLtphlp.cphs....s..shhlshts.t...ph.t.........................stllhlchs.hs...ts..........ttpphLpt.s.D......phlsp.lls.pl.upp...YT................slhTuh...s...............................................................p.t..shh...hp..hp.s..............th-hp..........................................h.p...............p....t.tsp............................................................................................................................sspspss.h.......hhpcaQ................................................................................................FFoPG.IaMullsslhhlhIhhhGlphl.p............................................................................................................ 0 29 69 121 +5657 PF05829 Adeno_PX Adenovirus_PX; Adenovirus late L2 mu core protein (Protein X) Moxon SJ anon Pfam-B_8179 (release 8.0) Family This family consists of several Adenovirus late L2 mu core protein or Protein X sequences. 25.00 25.00 57.40 57.40 18.60 17.60 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.25 0.72 -4.46 18 117 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 86 0 0 63 0 44.30 69 55.74 CHANGED pppRp+thp+ch+GGF.LPALIPIIAAAIGAlPGIAusAltAupt ...............+RRRRAhpRRLpGGF.LPA.LIPIIAAAIGAIPGIASVAlQASp.h 0 0 0 0 +5658 PF05830 NodZ Nodulation protein Z (NodZ) Moxon SJ anon Pfam-B_8202 (release 8.0) Family The nodulation genes of Rhizobia are regulated by the nodD gene product in response to host-produced flavonoids and appear to encode enzymes involved in the production of a lipo-chitose signal molecule required for infection and nodule formation. NodZ is required for the addition of a 2-O-methylfucose residue to the terminal reducing N-acetylglucosamine of the nodulation signal. This substitution is essential for the biological activity of this molecule. Mutations in nodZ result in defective nodulation. nodZ represents a unique nodulation gene that is not under the control of NodD and yet is essential for the synthesis of an active nodulation signal [1]. 24.90 24.90 25.00 27.60 24.40 24.80 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.27 0.70 -5.85 6 136 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 108 5 13 135 8 169.20 59 89.55 CHANGED pp-RaVlSRRRTGFGDCLWSLAAAWcYApRTGRTLAIDWRGSCYL-pPFoNAFPVFFEPIpDIuGVpVICDDpINphSFPGPFFPsWWNKPSI-ClYRPDEQIFRERDELs-LFQAQ-Ds-ANTVVCDACLMWRCDE-AERpIFcSlpsRsEIpARIDAlYpEHFpGaSlIGVHVRHGNGEDIMDHAPYWADs-lAL+QVCsAIccAKALsHs+PV+VFLCTDSApVlDplSuhFPDLFslPKpFQAcQAGPLHSAsLGl-GGhSALlEMYLLuRCDTVIRFPPTSAFTRYARLhVPRVIEFDLscPuRLllIDcsupphsA ............................................................LAuAWpaAppTGRTLsIDWRGSCYLDpPFoNAFPVFFEPVp-IAGV.VICDDcINphSFPGPFFPsWWN....+PoIDClYRPDEQIFRERDELcpLFQupcDs-ANTVVCDACLMWRCDpEAERpIFRoIKPRsEIQARIDAIYcEHhts...lG.......................................................................................................................................................................................................................................... 0 6 10 12 +5659 PF05831 GAGE GAGE protein Moxon SJ anon Pfam-B_8207 (release 8.0) Family This family consists of several GAGE and XAGE proteins which are found exclusively in humans. The function of this family is unknown although they have been implicated in human cancers [1]. 20.80 20.80 20.80 20.80 20.40 20.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.48 0.71 -3.94 7 212 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 21 0 48 231 1 97.00 41 61.49 CHANGED MuaRuRop.R.sRPRRslpssph.lGshl..E.Po-E....pPppEEPPTcSQD.sPuQERE.DpGAuthQssclEAD.QELspsKTGsEsGDGPDsptthLPp.EphKhPEuG.....-tpsQV ............MshRuRop.R.sp..s..R.t..ps.sp..lGshh.pp...o-E.....pspptEPPTcsQs.ssupctp.spGAst.sQs.sc....hEAs.QEh..shsKsGsEptDGPDlpttslPs.EpsKhsEsG.............. 1 30 30 30 +5660 PF05832 DUF846 Eukaryotic protein of unknown function (DUF846) Moxon SJ anon Pfam-B_8404 (release 8.0) Family This family consists of several of unknown function from a variety of eukaryotic organisms. 20.50 20.50 21.80 20.70 20.10 20.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.90 0.71 -4.48 59 469 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 293 0 302 430 4 135.80 41 65.60 CHANGED pssHP..h....shhhalhh+hsullhYlh...sshh.......ps.alhpF.llhllLluhDFWhlKNloGR.hLVGLRWWs-ss.......p..cG...............p.spWhFEotsss....t.h.ssh-.............uplFWhslhssshh...............Wllhslhslhphphha...lhlshluhsLshsNhhuah.+C ..................................................................h..tHPlsshFHlhF+suAllhYlh.sshF...........sss.FlhtF..lhhlLLLuhDFWsVKNloGR.lLVGLRWWNplc..............-.-G.................c.SpWlFESpcss.......pph.sts-..................u+lFWluLhssP..ll...............Wllhshh...sl...hph.p.hpW...LhlVhhulsLshsNlhGah+C..................................... 1 111 166 246 +5661 PF05833 FbpA Fibronectin-binding protein A N-terminus (FbpA) Moxon SJ anon Pfam-B_8577 (release 8.0) Family This family consists of the N-terminal region of the prokaryotic fibronectin-binding protein. Fibronectin binding is considered to be an important virulence factor in streptococcal infections. Fibronectin is a dimeric glycoprotein that is present in a soluble form in plasma and extracellular fluids; it is also present in a fibrillar form on cell surfaces. Both the soluble and cellular forms of fibronectin may be incorporated into the extracellular tissue matrix. While fibronectin has critical roles in eukaryotic cellular processes, such as adhesion, migration and differentiation, it is also a substrate for the attachment of bacteria. The binding of pathogenic Streptococcus pyogenes and Staphylococcus aureus to epithelial cells via fibronectin facilitates their internalisation and systemic spread within the host [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 455 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.56 0.70 -5.95 114 2820 2012-10-02 21:21:44 2003-04-07 12:59:11 6 21 2261 20 808 2533 427 354.70 22 68.70 CHANGED DGhhlpullpELppp......LhsuRlpKlhQPppp...-lhlslR.s.tpppht.LLlSupsshsRlplT..pt.shtsPtsPssFsMlLRKaLpGutlhpIcQ.hsh-Rllplplpsc................s-h.Gctthhp........LllElMG+HSNllLl-pps....pIl-ul++ls.p.sp.hRslhPGptYhhPPs.pphsPh..phpp.c.phhph....l............tttls+tLhppapGlSshhucEls....hcss.......p.....h...thpplhpthpphhpplp.............................thpP..phhh.t...........psassl.s...hp....t.......ppasoluphL-paYtp+sppc+lp..p..pppcLp+h...lpsplc+tpcKlpphpcpLppscpu.....-pa+hhGELLpshlatl.ppGhpp.lpl.saas...sp...........................tl....pIsL-sphoPspNAQcYac+YpKhKpuhptlppplcpscpElpYL-sl.splp.............pu.s.p-lp-I+cELhppGYl+.....cppp.cpc+ppp ...............................................................................................sh.htthhpp.lp........lhst+lppl...p.........p.p.............pl.....hh.lp...t..........t.......t...........p..h.......L..ll.sh...........h.sRlpho........p.....t...h.....t...........s...........s.......s.s.........FshhLRKalpsuhlpplpQ.h.......t.-Rllphp.hppt..................................................................h.....hp.........lhlE.lh.G+.uNl.lLs..-........p...........p...........p....p........I.....l-sh..+.+.ls.p...........sp.......hR......lhs.u...Y............h........P........s.t.......t.........t.h.ssh.................phpt.t....thhph.....................h........................p...l......p......h....uh...u..hspch................h.................................................................................t.h.......t......t...h...t...t.hh.t..........................................................................................s...............................................a..h......................ht...........................atsh..sphl-taa..t....t....p.t..........p...p....p....c..lp.....p........ptppl....ch..........lppp...h.p...+.pp+lt.p.h.p..p...p.h.t.t.sppt...............cph.p..hu-ll.....ts.hh...tl...........p....s.........p.p...hpl.....sahp....tp....................................................................................................l..tI.L.s...t.h..ostpNA.ptYa....p....p....h.p....K..h....+p...t.h....p....hl....ppt..lp..ts.p.p.pl.t.a..hpp.h.t..lp................h...s...tl...ht.c.h........................................................................................................................... 0 311 543 709 +5662 PF05834 Lycopene_cycl Lycopene cyclase protein Moxon SJ anon Pfam-B_8336 (release 8.0) Family This family consists of lycopene beta and epsilon cyclase proteins. Carotenoids with cyclic end groups are essential components of the photosynthetic membranes in all plants, algae, and cyanobacteria. These lipid-soluble compounds protect against photo-oxidation, harvest light for photosynthesis, and dissipate excess light energy absorbed by the antenna pigments. The cyclisation of lycopene (psi, psi-carotene) is a key branch point in the pathway of carotenoid biosynthesis. Two types of cyclic end groups are found in higher plant carotenoids: the beta and epsilon rings. Carotenoids with two beta rings are ubiquitous, and those with one beta and one epsilon ring are common; however, carotenoids with two epsilon rings are rare [1,2]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.23 0.70 -5.54 20 839 2012-10-10 17:06:42 2003-04-07 12:59:11 7 25 565 0 264 2841 3261 323.20 23 76.95 CHANGED DllllGuGsAGhtlAtcl.....u+suLpVsLl-suPsh.hssNa.sWsschpDLu......LtsslpasWs.sttVths-pspphhu...huYuplspctLcctlhc+hs.tsushhhsu+Vsplspsss.......hshssGp.....pItuRhVlDupG.ssss......suhpsuhQshhGlElcl-ps.aDssthllMDaR.....spQp......cs.sFlYshPhSssRlhlE-TphussssLsh-sL+p+lhshhcs.hGlplpclhc-EpuhIPl..GGslssh.pc.....shshGssAGhsHPuTGYSlstulutAs....slAp.hls.s..............shuhpshtshasp-RtcpcsFF.hhshhLhlphch-uppphhcpFacLPpthhptFhuu+LolsDhlhluht...h.slssssshh ............................................................................................................DllllG.uG.sAGhslAtpl.......................s..p..h...u...l....p...V.....h.......h.....l...............-....................s...........p.......h................h........s......s......s.....h.......s...........W....s..........t..h..p..s.hs......................................l.t..s..h....l......p.....t...p.......W..........p.....s......s......h........l.....h.....h.........s....s.....t......p...h...t.....hs...................hs.Y.....s.........h...........l....s...........p.p..........t.........L..c..p.......t..h.......h.p......c...s.....h........t......t.u.......l.....p..h......h..p...u..p....V....t...p...l.htpss..........s.hl.h..s.s..s..Gt.......................pl...p.A..phVlDA...sG...hstp...................tsh..s...u....h......Q....h....h.......h.......G..h...h..c........s......p..t............s....a..s.........p....p.......h.h.h...M.D..hR............s..............................ths..s.F.h.....Y.....sh.....P..h.u.....s..s...p....h....h..l....E.....T.....hs..........s....p.s....h...s....h..pt.hpp..thhth.l..p........p....h..........sh..p....h.....p...l....c.........c.E....h...s.h.......lPh.....t..s....s.....h..pt...............hhth.Gssuuhs+Pso..G....Y....h....s....ts.tt.us........hlup..hh.ht..............................t.t....th..h.th...hsh.t..hh..h..p...t....h.h....hhs.......hhhth....tth....hh.p.ha..h...tl.....hh..tFhstt.....t.h.h.................h.................................................................................................................................................................................................................... 0 73 187 242 +5663 PF05835 Synaphin Synaphin protein Moxon SJ anon Pfam-B_8588 (release 8.0) Family This family consists of several eukaryotic synaphin 1 and 2 proteins. Synaphin/complexin is a cytosolic protein that preferentially binds to syntaxin within the SNARE complex. Synaphin promotes SNAREs to form precomplexes that oligomerise into higher order structures. A peptide from the central, syntaxin binding domain of synaphin competitively inhibits these two proteins from interacting and prevents SNARE complexes from oligomerising. It is thought that oligomerisation of SNARE complexes into a higher order structure creates a SNARE scaffold for efficient, regulated fusion of synaptic vesicles [1]. Synaphin promotes neuronal exocytosis by promoting interaction between the complementary syntaxin and synaptobrevin transmembrane regions that reside in opposing membranes prior to fusion [2]. 21.10 21.10 22.90 22.60 20.50 20.30 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.95 0.71 -4.02 8 251 2009-09-10 22:35:29 2003-04-07 12:59:11 7 3 90 11 158 207 1 124.40 40 88.74 CHANGED suFlsKQhlGsQLssVputLGsc.t-EGDs...sAp......EE-tEhpEAlREtEE+RKtKaRKMEpEREpMRQGIRDKYuIKKKEE......pp-spsh.....EGpLGRcKKoPcElAsEAsps----ttpuhhspslcpLss.plp-lhsK .................hsFhhKphlus.thKshst..h....l....GG-.......--t-s....pAp.................cc.EEhQctlt..pp...c.c.......ERcAcas+hEAER.EphRppI..RDK........YtlpKpEp.......c-sphphA........l..s.c.s..hc.....Ah.stpsp.-.E.E..E.p.p....s.l.s.lp.l.s.slpphhpK.............................................................. 0 32 47 91 +5664 PF05836 Chorion_S16 Chorion protein S16 Moxon SJ anon Pfam-B_8659 (release 8.0) Family This family consists of several examples of the fruit fly specific chorion protein S16. The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary [1]. 25.00 25.00 40.00 39.50 23.30 20.20 hmmbuild --amino -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.64 0.72 -4.16 5 37 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 17 0 8 39 0 107.10 79 78.20 CHANGED GSGu.YuDVVKssETAEAQAAALTNAAGAAASAAKLDGADWYALNRYGWEQGRPLLuKPYGPLDsLYAAALPPRSFVAEIDPVFKKSsYGGuYG-+olTLNTGAKLAVuAl .....G.YGASYGDVVKAAETAEAQASALTNAAGAAASAAKLDGADWYALNRYGWEQGRPLLAKPYGPLDpLYAAA.LPPRSFVAEVDPVFKKSpYGGSYGp.pAaLpTsSKLuVVAI.......... 0 1 1 5 +5665 PF05837 CENP-H Centromere protein H (CENP-H) Moxon SJ anon Pfam-B_8705 (release 8.0) Domain This family consists of several eukaryotic centromere protein H (CENP-H) sequences. Macromolecular centromere-kinetochore complex plays a critical role in sister chromatid separation, but its complete protein composition as well as its precise dynamic function during mitosis has not yet been clearly determined. CENP-H contains a coiled-coil structure and a nuclear localisation signal. CENP-H is specifically and constitutively localised in kinetochores throughout the cell cycle. CENP-H may play a role in kinetochore organisation and function throughout the cell cycle [1]. This the C-terminus of the region, which is conserved from fungi to humans. 23.00 23.00 25.30 25.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.10 0.72 -3.86 16 119 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 108 0 88 130 1 106.20 27 46.14 CHANGED LShthhphppsppsspccLpslchcpLpl+ppsppllpElhphsccpcshcc....s.chpppLcplcuch+po+p+hcshpslhpulllGSGVNWAEDccLpslVLc.p ...........................................................................ls..h.p.tptptsh.ccL.tslchcplplpppspphhtclhplppcpcpppc.....................s.chppplcplcpphctp+pphpsl+plhp.ullluSGVsWAcDtpL+clVLc.......... 0 18 34 60 +5666 PF05838 Glyco_hydro_108 DUF847; Glycosyl hydrolase 108 Moxon SJ, Bateman A, Eberhardt R anon Pfam-B_8737 (release 8.0) Domain This family acts as a lysozyme (N-acetylmuramidase), EC:3.2.1.17. It contains a conserved EGGY motif near the N-terminus, the glutamic acid within this motif is essential for catalytic activity [1]. In bacteria, it may activate the secretion of large proteins via the breaking and rearrangement of the peptidoglycan layer during secretion [2,3]. It is frequently found at the N-terminus of proteins containing a C-terminal Pfam:PF09374 domain. 21.20 21.20 21.40 21.60 20.10 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.63 0.72 -3.86 20 820 2012-10-03 00:09:25 2003-04-07 12:59:11 7 13 534 9 151 587 217 85.40 39 42.35 CHANGED phhsplls+EG............GYsscPcD.GG.......s.....TsaGITttThcshuh............su-l+sLTc-pAh.sIYctcYWp+h+hDpls...tuluhplFDsuVNpG ....................................hhstllucEG............G..YV....sc..PpD.GG............s.......TpaGIT.sThpAhuh......................................tu-h+s..L.T..c..spAh...pIacppYW..psphDpls...tsluhplhDsuVNtG............... 0 33 91 125 +5667 PF05839 Apc13p Apc13p protein Wood V anon Wood V Family The anaphase-promoting complex (APC) is a conserved multi-subunit ubiquitin ligase required for the degradation of key cell cycle regulators Members of this family are components of the anaphase-promoting complex homologous to Apc13p [1]. 21.50 21.50 21.80 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.93 0.72 -4.01 7 186 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 177 0 130 157 0 81.00 31 56.67 CHANGED hDup...hphctsths.lhssapp...DpLP..-Dlh.lP.scL.shp..t--.hhPDpcAshGhp+totpp+..................EssWpDLuLt-Lhppushlss ..............................hDSp....hphpt.hhsll.-tWpp...Dp.LP.h-Dlh...lP...sp..h.................................P-tcts.Gh..s..+to.ppp.........................................E.tWpDLuLppLhpss....s........................................... 0 29 57 98 +5668 PF05840 Phage_GPA Bacteriophage replication gene A protein (GPA) Moxon SJ anon Pfam-B_8738 (release 8.0) Family This family consists of a group of bacteriophage replication gene A protein (GPA) like sequences from both viruses and bacteria. The members of this family are likely to be endonucleases [1,2,3]. 20.90 20.90 21.00 20.90 20.50 20.80 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.27 0.70 -5.75 11 1036 2012-10-02 18:54:06 2003-04-07 12:59:11 8 4 599 0 93 1026 15 329.10 36 50.68 CHANGED s-hcchAschAsthpthhpphs-.........p...h.tthhpsYphlAph.shshslp...Pth...........pt...tp.h.sslhRhhs-cWWhcpLpphththhEp..hshs.Vs+ptusYsSpcslp-hRtppptsh-alKuhslcsE-u..pphsLt-hhhtS.SNPthRRsEhMsRl+GhEchAcppGhhulFhTLT.APS+aHAhhcss....+.......W.s.As.Pp..-spcYLssla.shhRAchp+ttlphaGlRVsEPHHD..............GTsHWHhlhFhps..............ccpcplsplhpsa........Alc.DpcEht.cGhh.................psRh+uEhlcspKGoAsG.YlAKYIuKNIDuptltt........chsc-o.u+php-suts..VhuWsphaRIRQFpFaGhsshtsaRELR+hsst ....................................................................phpphus.hus.h......hh.pthh....................t.............p..h..h.p.haphlut...shthp...h....Pha........................tt........p.h.sulhRhhsscWWhcpLhh.phphpEt.hh.A.hs.V............s+..c..............t......osYsSpcslpchRtp+pt.sh-ah+uh.l.s--u...phsL.shh.to.uNP.hRRsEhMsphtGhEhhActcG.h.ulFhTlT.sPS+aH.u....s..h....p.......s..G.....t............psp.................Ws.s...ut...s+....sspcYLs.sha..uhh....Rtthp...c..t..s.......l.........p....h.......aGhRlsEPHH.D..............GTsHWHhhhFh+s.........................................cp.hcpl.....ss.lhcca..........Alc-..D.p.cE..htpss.........................................................tsRF+uEhl......cs........p...+Gs.ssu.YIAKYIuKNlD...ut.u.lst.....................phsc-o....G+sh......p-...o...scp......shuWAphaRlRQFp.h.Gh.s.s.htsaRELRplst.t............................................................ 0 3 38 69 +5669 PF05841 Apc15p Apc15p protein Wood V anon Wood V Family The anaphase-promoting complex (APC) is a conserved multi-subunit ubiquitin ligase required for the degradation of key cell cycle regulators Members of this family are components of the anaphase-promoting complex homologous to Apc15p Swiss:O94688 [1]. 23.80 23.20 24.40 24.40 23.70 23.10 hmmbuild --amino -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -3.39 27 114 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 113 0 84 115 0 115.10 25 31.83 CHANGED sapLWhs...................ssps.pssppppsssttspp.................shhpsss.ss.......tsh..tt..tsssplusLhhcEcthctRcpsItshGaoaI+PhGlsKTMhth+..ccc......tEpEct.tsu ......................................hpLa.s......................s....pstpppt...p.........................h.p.st.ss..t.s...h..tttt......hppspLspLhh-EpthcpRctsIpshGasWl+PhGlsKTMhph+....EEc................tEpcEt.t........................ 0 14 38 68 +5670 PF05842 Euplotes_phero Euplotes octocarinatus mating pheromone protein Moxon SJ anon Pfam-B_8825 (release 8.0) Family This family consists of several mating pheromone proteins from Euplotes octocarinatus. Cells of the ten mating types of the ciliate Euplotes octocarinatus communicate by pheromones before they enter conjugation. The pheromones induce homotypic pairing when applied to mating types that do not secrete the same pheromone(s). Heterotypic pairs (i.e., those between cells of different mating types) are formed only when both mating types in a mixture secrete a pheromone that the other does not. The genetics of mating types is based on four codominant mating type alleles, each allele determining production of a different pheromone. The pheromones not only induce pair formation but also attract cells [1]. 25.00 25.00 25.80 69.00 23.50 22.60 hmmbuild --amino -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.24 0.71 -3.81 6 8 2009-09-11 05:26:38 2003-04-07 12:59:11 6 1 1 0 0 9 0 134.00 47 88.96 CHANGED FKMTSKVNTKLQSQIQSKFQSKNKLASTFtTs.phK..s..-s.....hTGC.sTN..hC...u.sCusTsss.t.C.......sssGQNshDhha.hWhs.C..........hssYssCl..usssYthYSu...CG...C.sshs.ushtD....hhts.Chsa FKMTSKVNTKLQSQIQSKFQSKNKLASTFQTSSpLKstC..Dshpp..lTGC.sTN.ssC.hpu.sCSuTGsDp-hC.......ssVGQNllDhhFtpWus.C..........aNDYssClpaAspsYshYSusEhCGC.sshp.usapD....hh-uhCss.h 0 0 0 0 +5671 PF05843 Suf Suppressor of forked protein (Suf) Moxon SJ anon Pfam-B_8911 (release 8.0) Family This family consists of several eukaryotic suppressor of forked (Suf) like proteins. The Drosophila melanogaster Suppressor of forked [Su(f)] protein shares homology with the yeast RNA14 protein and the 77-kDa subunit of human cleavage stimulation factor, which are proteins involved in mRNA 3' end formation. This suggests a role for Su(f) in mRNA 3' end formation in Drosophila. The su(f) gene produces three transcripts; two of them are polyadenylated at the end of the transcription unit, and one is a truncated transcript, polyadenylated in intron 4. It is thought that su(f) plays a role in the regulation of poly(A) site utilisation and an important role of the GU-rich sequence for this regulation to occur [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.91 0.70 -4.67 30 583 2012-10-11 20:01:00 2003-04-07 12:59:11 9 42 284 15 447 699 16 211.40 22 22.92 CHANGED ohsalthM+shRRspG.......lpuuRplFpcAR.ccsclTaclYlAsALlEaassKDhshAtKIFElGhKhFss-spallcYL-aLIplNDssNsRsLFEpslsp..lssp....................................s+tlap+ahcYESpaG.-LsslhpLEcRhtphFP-.........-splphFscRYph.shsshpsp-ltt.tpphtsp.hh.............................................................spss.+Rslpp.s.p..................................................ststssss.sptt.pssstP............................lPpsIshLLshLPsspha..sus....hhssccLlcllp.psslP ..........................................................................................................................................................................................t...................t.R...hhtps...h...........................p.....pl..hh..t.h..Ahh.....Eahht...pc.....p............AhplFEh..uL....+..p.....asc...p...s....c...h..hh.tYl.-....a.l...h...c.h..s...-.....t.....s..p.................s..R...slF.ERs.l.sp....lsscp.........................................................................p.lap+alpaE....pph....G...cht....slh....p..l..c.p...Rhh..p..h..h.p..................................t..h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 173 257 377 +5672 PF05844 YopD YopD protein Moxon SJ anon Pfam-B_8937 (release 8.0) Family This family consists of several bacterial YopD like proteins. Virulent Yersinia species harbour a common plasmid that encodes essential virulence determinants (Yersinia outer proteins [Yops]), which are regulated by the extracellular stimuli Ca2+ and temperature. YopD is thought to be a possible transmembrane protein and contains an amphipathic alpha-helix in its carboxy terminus [1]. 25.00 25.00 27.20 27.20 21.70 21.10 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.89 0.70 -5.38 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 76 1 11 45 2 279.90 45 93.75 CHANGED MIsSDYsNusopsuslTEshsspsptsRossTsHEAAu.sppspuhppsslPtLs+PShshDsctVophsup.L-Soh-MMSLLFcLARpARE.Gl.QRDIENKhlIusQpAQVDEMRpGAKLMIAMAVVSGVMAusSAlhGuFShuKuuKsIKQ-KuLsuNIAGRppLIDsKh-thussGpp..sRttlG+lWpssQsu...DpsALpsLsKcF-pssu+tQlhNTVhQSlGQMuNSAVQVpQGcSQAcAKEDEVpATIuQsEKQKAEDsMSFNsNFMKDlLQLhQQYuQSQNQAWKAAFGVs ..................................................tshh......s.uup.sAhsoEsht.ps-h..+s.sTtppAtsltp..uuht+spsstL.pPppslss...shlpputu-.LsuoLolLhLLhclA+cAREhGl.QRDIENcAsIsAQKsQVsEMpsGApLMIAMAVVSGlhAusS..sVsuuhuhhKssKtlKQEps.LNsNIsGRcpLIDsKhpthustupp.ssRcplG+lW....pspQss...DpstLthhs+cFchpsupspshNushQ...slGQMANSA..lQVcQGhSQAcsKEcEVpAoIAtspKQKAE-sMsasssFMKDVLpLhpQYspScspAh+AAhGVV............................ 0 2 4 6 +5673 PF05845 PhnH Bacterial phosphonate metabolism protein (PhnH) Moxon SJ anon Pfam-B_9057 (release 8.0) Family This family consists of several bacterial PhnH sequences which are known to be involved in phosphonate metabolism [1,2]. 25.00 25.00 31.70 31.70 19.20 18.40 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.83 73 670 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 649 1 125 414 89 188.20 48 96.50 CHANGED LtsGFs..........-sVtcuQ....psFRullcAhA+PGpltpl..........t...ss...Pssls.sAsuulhLTLhDt-TPl.......WLsss.hssssltsaLpFHsGuPlsspsppAsFAlhssspshs....sLspashGos-YPDcSsTLllplsuL.........ssG...........................slpLpGPGI..csptpluhs.LPsshhsth...............ptspsh..FPhGlDllLssGs.plhuLPRTT+lcs ................psuFh.PVpDAQ+oFRpLLKAMSEPGlIVsL.............................pphp.pu.......hpPLs.hAosuVLLTLsDsDTPV.......WLuss.ls.s-hVspsLRFHTsAPLVspPcpAsFAls....--uhust...........pLsuhusGTshsPEtuATLIlQVs.SL.........uGGc....................................hLRLoGsGI..t-cRhIAPp.LP-....shlccL...............spRsps..FPLGIDlILTCG-..+LlAlPRTT+VE.................... 0 22 65 91 +5674 PF05846 Chordopox_A15 Chordopoxvirus A15 protein Moxon SJ anon Pfam-B_9149 (release 8.0) Family This family consists of several Chordopoxvirus A15 like sequences. 21.10 21.10 25.20 24.60 18.70 18.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.05 0.72 -3.93 8 45 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 37 0 0 27 0 90.50 59 96.22 CHANGED MFVDDsTlIlYs+...WPoCLlssss+alsFPssN..ShTFcsh.hKIsp.shpSVLLlNPShlcLLKICVYl+RhpWcGcIhILFEpcNKPPPFRL .........................MFVDDNSLIIYST....WPSsLocooG+lIshPcN+..SaTFKEs.FKlD...........E.SlKSILLVNPSuIDLLKIpVYh+RIKWhGcIalLFEpENhPPPFRL.. 0 0 0 0 +5675 PF05847 Baculo_LEF-3 Nucleopolyhedrovirus late expression factor 3 (LEF-3) Moxon SJ, Mistry J, Carstens EB anon Pfam-B_9292 (release 8.0) Family This family consists of LEF-3 Nucleopolyhedrovirus late expression factor 3 (LEF-3) sequences which are known to be ssDNA-binding proteins [1]. Alkaline nuclease (AN) and LEF-3 may participate in homologous recombination of the baculovirus genome in a manner similar to that of exonuclease (Redalpha) and DNA-binding protein (Redbeta) of the Red-mediated homologous recombination system of bacteriophage lambda [2].\ LEF-3 is essential for transporting the putative baculovirus helicase protein P143 into the nucleus where they function together during viral DNA replication [3]. LEF-3 and other proteins have been shown to bind to closely linked sites on viral chromatin in vivo, suggesting that they may form part of the baculovirus replisome [3]. 25.00 25.00 44.40 74.10 19.30 18.90 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.20 0.70 -5.54 19 45 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 42 0 0 46 0 347.40 34 89.27 CHANGED pcctssss.uupsu..phKcshKpVsG.cLlsKshlSlsNchaY..hF+FLlDNpscsYYGstppFpsLh.spsYclsLsa.cp......+l.Isca.cpspsh-psls.VKp..hLptpDF-s--hVsVlAKL+hGFKhlss.ssYKhVFplNhtcstsssp...lhQVEChANhKplusshKs..pshpsts-LhcaahcspsphFsLhRVKC.Qposts...FhNashpshTplEhs.ppsspsh.s.sp.sphs.NISRuNK+lhptpls.phpsEp....pupc+FolpaphtDp.......schhKusaYlcspssppts...ppt...................splpKLphDlsQLsshlpcslhcshIYVssDssss...shNlL.GLTKaDhDs.ppYpsl .................t..ph.stss.s.sssp..ph+cphKpVsG.pLlsKshhSINNEsaY..hF+FLl-NtscsYYGssppFpshc.spsY-lsLsYscp......+lhIsca.cpspsh-phls.l+c..hlptp-F-s--sVoVlAKhKaGFKhlsu.shYKhVF.lshtss.tssss...ssQVEChuNhK+lssshKs..cslpsps-LhcahhcspsphFsLaRlKCQposss....aKNasltshTplEhs.ppsspsh.stpp.sshs.NISRuNK+lhptpls.plpsEp....pus-RFslpaphp-p..............schlKusaYlcsppspptp....ps...................splpKLpsDlNQLssLIpssll+VhIYVssDssss...NhNVL.GLTKa-lDs.spYpsl....... 0 0 0 0 +5676 PF05848 CtsR Firmicute transcriptional repressor of class III stress genes (CtsR) Moxon SJ anon Pfam-B_9312 (release 8.0) Family This family consists of several Firmicute transcriptional repressor of class III stress genes (CtsR) proteins. CtsR of L. monocytogenes negatively regulates the clpC, clpP and clpE genes belonging to the CtsR regulon [1]. 21.90 21.90 22.20 26.70 21.10 21.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.87 0.71 -4.25 48 1181 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1166 2 147 497 0 148.80 48 97.41 CHANGED Mp.NlSDlIEpYlKplLppop...hlEI+RuElAspFpCVPSQINYVIsTRFThc+GYlVESKRGGGGYIRIhKlphscpt.pllpplhptI.GssIoppsApslIppLhE-plIocREupLhhuslscpsLs.hthstcsplRAclL+uhLppLph .....+NhSDlIEpYIKplL-pS.s.........hlEIpRuplAspFpCVPSQINYVIpTRFT.p+GYlVESKRGGGGYIRIsK..lchp-.p.p...ph....lppl...l...p..hI.G.p......p.lSQ.ptupslIptLl-cplITcREupLlhus.ls......c......c.....sLu...........pcshlRAplL+plLpplp.h...................................................................................... 0 60 102 126 +5677 PF05849 L-fibroin Fibroin light chain (L-fibroin) Moxon SJ anon Pfam-B_9321 (release 8.0) Family This family consists of several moth fibroin light chain (L-fibroin) proteins. Fibroin of the silkworm, Bombyx mori, is secreted into the lumen of posterior silk gland (PSG) from the surrounding PSG cells as a molecular complex consisting of a heavy (H)-chain of approximately 350 kDa, a light (L)-chain of 25 kDa and a P25 of about 27 kDa. The H- and L-chains are disulfide-linked but P25 is associated with the H-L complex by non-covalent force [1]. 25.00 25.00 46.80 45.70 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.45 0.70 -5.03 5 31 2009-09-10 22:28:38 2003-04-07 12:59:11 6 1 12 0 2 33 0 225.20 41 90.96 CHANGED PSVTIsQYS-NEIPPclDNGK..SSulI-RAFDlV.DGGDsNIYILNlQQILNDhAsQGDutSQApAVAQTlAIlu-LSuGIPGDACAAAsVlNAYosuVRSGNsuuhRpALus..aIp+luoNlsLIsQLspN............PsSlRYSsGPuGsCuGGGRSYsFEAAWDuVLusucsh.SSLlNEEYClAKRLYNAFNlRSNNlGAAITAsulssVspVspplhuolsslLRulANGGNAsGAAAsApstLsNAA ..................p-ls.ph-.Gp..spsllspAaplV.Dsu-hsIahLslpphlhD.hAsQsD.ssSQuhAluQThGIluELouslsGDuCutupllsSYsshhp.oG.NpAs..hppAlss..YssplspslstlshLhpN............PstlR.psus.st..uCuGG.GRuYsaEtsWD.hLusus.....t.sLlNEphChu+RLYsuhstRSNsluAAhsAussssspQlhctshspIsshLpssssGssssshstshpptlspu..................................... 0 1 2 2 +5679 PF05851 Lentivirus_VIF Lentivirus virion infectivity factor (VIF) Moxon SJ anon Pfam-B_9439 (release 8.0) Family This family consists of several feline specific Lentivirus virion infectivity factor (VIF) proteins. VIF is essential for productive FIV infection of host target cells in vitro [1]. 25.00 25.00 210.60 210.30 19.90 19.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.77 0.70 -5.69 4 25 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 0 0 26 0 235.20 54 97.13 CHANGED huDEDWQVSRpLFsVLQGGl+sAMLYIosl.schEpt+hK+shKKRLtchEstFIhpLR+AEGI+WSFHTRDYHlGaV+EhVAGoo.PsuLRLYVYISNPLWHppYRPsL.saNpEWPaVNhWIpstFMWDDIEsQpIhpuscsu.GWsPGMlGlVIKAFSCsE+Kh-hTPs.lIRGEIDPpcaCGDCWNLhClRNSPPsoLQRLAMLApG+.scSW+GCCNpRFlSPaRTPsDL.VlQspssaphLaphcL .........................................M.Ylpc...ppppEpphKpFKc+LuhpEh+WIR+LRhsEGILWSFHTREWHsshV+ELVAGTG...sLKLYCYISpPlW+.RYRPTl.cWNppWPYuNlWlT-tFMW-.IppppIhhsGcVostaPPGaIuLllKAYoCpp+K+DlThtcIIhG-hc.pKWCuDCWsLIllRNTPPhTLQRLAhLALGRKlhsWhCKssaRFhpsRhTPLDpcIl.sssspEsLWh...h........ 0 0 0 0 +5680 PF05852 DUF848 Gammaherpesvirus protein of unknown function (DUF848) Moxon SJ anon Pfam-B_9475 (release 8.0) Family This family consists of several uncharacterised proteins from the Gammaherpesvirinae. 24.30 24.30 25.10 28.60 24.20 24.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.81 0.71 -4.35 13 27 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 22 0 0 24 0 140.60 32 96.32 CHANGED pssspK-hltpsLEuslNK+suVSlhDRFGpssuLFcpQappTpcul+stpph+cppclcshlsslcspIpp+p+ElutLpp.hsh+KlschEcLsD+lpEL+--lchEL-slpt......supp-shsupssplc-sIhpWRLEsLPcVPs .....ssspK-hltpsLEuslNK+suVSlhDRFGtssslFptQappspcul+shpph+cpppltshlsslcpplpppppElshLpt.hsh++lpchEtLp-clt-L+--lphcl-tlp.......ttpp-shssppsp.c-sIhpWRL-pLPpVs... 0 0 0 0 +5681 PF05853 DUF849 Prokaryotic protein of unknown function (DUF849) Moxon SJ anon Pfam-B_9059 (release 8.0) Family This family consists of several hypothetical prokaryotic proteins with no known function. 23.20 23.20 23.30 23.20 22.90 23.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.60 0.70 -5.51 131 1281 2012-10-02 01:07:48 2003-04-07 12:59:11 7 8 668 31 447 1226 1571 267.90 29 87.36 CHANGED +lIITsAlTGuhpT.spsP.tlPlTPcEIAcsAlsAhcAGAullHlHsR...pscsGp...P.op-schapchlstI+pt..s-.hllslT...TGGu................sshsh...c-Rlthltth......pP.EhsoLshGohNFu...........................-hlapNo.s.lcphhpphp.ptGl+PEhEsaDsGc.lhphtphlcc.Gll.c..sPhhlphVh..Glh....uGh.sucscslh...hhhpphsc......shp.W.........oshuhG.+pQhPhsshuhhhGGpV.RVGLEDslahs+GpLA..oNAphVc+ssplscphGtclATPsEARphLuL ..................................................................................................................llITsAlsGuhto.spsP.tlPlTPcElupsAhpuhcAGAullHlH..sR.......sp.s.Gp.................s.ot-sshapchlptI+pt....s....s-.hl.lp.lo...oGuu.......................sths.......ppRh.thlpth........tP-hsoLshGohNas...........................shla.ss.s.lcphhpthp.pt.Gl+sEhEhaDhup..lht.h.tp.h.............hcc..Gll..p.....s..P.h.h.hphlh.......Gl..........sGh..sssspslh......h.h.h..pphsp.............sst.W......................................sshuhG..+pp..h..s..h..sshuhhhGG.p.l.RVGLEDslah...s...c.....G.p....hA.....oNuphVccshplhcth.utplAo.stEARphLtl................................................................. 0 95 238 346 +5682 PF05854 MC1 Non-histone chromosomal protein MC1 Moxon SJ anon Pfam-B_9146 (release 8.0) Family This family consists of archaeal chromosomal protein MC1 sequences which protect DNA against thermal denaturation [1]. 20.50 20.50 21.00 20.90 19.90 19.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.16 0.72 -4.02 5 61 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 47 2 35 62 51 93.70 51 83.19 CHANGED ssKRNFALRDcDGNEhGVFSGKQPRQAALKAANR....Gco-.cAs.s.IRLRERGTKKVHIFcGW+hcVcAPcs+PsWMPscIoKPNVKKpGIEKLE ...........t.sKRNFsLR-pcGpE..huVFoGppPRQAALKAApR........Gpss..tt..t....IRLRE..+GT.cKVHlacGWthp.sAPc..sp.P.s..WMP..s.......cI...sKspVpKpGlE+l-......................... 0 6 26 33 +5684 PF05856 ARPC4 ARP2/3 complex 20 kDa subunit (ARPC4) Moxon SJ anon Pfam-B_9272 (release 8.0) Family This family consists of several eukaryotic ARP2/3 complex 20 kDa subunit (P20-ARC) proteins. The Arp2/3 protein complex has been implicated in the control of actin polymerisation in cells. The human complex consists of seven subunits which include the actin related proteins Arp2 and Arp3 it has been suggested that the complex promotes actin assembly in lamellipodia and may participate in lamellipodial protrusion [1]. 22.20 22.20 23.80 23.30 21.90 22.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.93 0.71 -4.87 21 366 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 294 15 220 305 4 158.30 66 89.93 CHANGED MusslpPYLsAVRpTLpAALCLpsFsSplVERHNKPEVEl........s.........sS..tEL........lLsPlhluRs-pE..+sLIEsSlNSlRlSltlKpu....DEl-cILs+KFs+FhtpRA-s...FhILRRKPlp...............GYDISFLITNh................HoEpMh+pKLVDFIIpFhp-lDKEIS-hKLslNuRARhsAcpaLppF ......................................MotoLRPYLssVRsoLpAALCL...........pNFuSQs..VERH....N+PEVEl........p...................oS...EL........LLpPlhIuRNE..........pE............+VLIEsSINSVRlSItlKQA....DEIE+ILsHKFhRFhh.RAEs...FhILRRKPlc...............GYDISFLITNa................HTEpMhKHKLVDFlIpFME..ElDKEISEMKL.lNARARhVAEpFLp.......................... 0 72 122 181 +5685 PF05857 TraX TraX protein Moxon SJ anon Pfam-B_9375 (release 8.0) Family This family consists of several bacterial TraX proteins. TraX is responsible for the amino-terminal acetylation of F-pilin subunits [1,2]. 22.10 22.10 22.10 22.50 21.90 22.00 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.72 0.70 -4.70 27 1213 2012-10-02 17:00:17 2003-04-07 12:59:11 6 5 883 0 132 947 36 217.00 23 91.75 CHANGED sst-hLKhlAllhMlhDHls...hlhhsshsh................hhh......lGRlAFPlFshllAhNht+.......stsht+hhtRLhhFullupssahhhss...........shhshNllFThhlshhslhhlcptphhthh....................hhhslhhhlshhshs-YshsG.......................lhhslshahhhpp..........................s.hhshslhhhshshhh...........tphhAhhslsllslh...........ststhph.hh+hhFYhaYPsHLhlLhllth ..................................................h..tthlKhlAhl.hMllDHls.....hh....h....h..p.......h...............................hth......................lGRhuhPlFsahhs..sh.+.......T+sppcahhRLhhaul...ls...phs...h.hlhsh..............................s..h.h.t..tNlh.h.o....l..hluhhhlh.h....h....c....t...hp...t...h..tth.........................................hh.hhlhh.hh......h..h....h.s.....sau..hs.u..........................................ll.h...h...hlh...ahhhcp.....................................h..phh...h.hs.s....hhhhhh.shshh............................................stth.h.h.hh.sh.sh.lhhh......................t.tp.u.hp.t.h.h..+.hhFYhFYPsHLhllsllt.h............................................. 1 41 72 102 +5686 PF05858 BIV_Env Bovine immunodeficiency virus surface protein (SU) Moxon SJ, Bateman A anon Pfam-B_9413 (release 8.0) Family The bovine lentivirus also known as the bovine immunodeficiency-like virus (BIV) has conserved and hypervariable regions in the surface envelope gene [1]. This family corresponds to the SU surface protein. 19.90 19.90 20.80 27.80 19.20 19.80 hmmbuild -o /dev/null HMM SEED 548 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.94 0.70 -6.55 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 9 0 0 73 0 176.00 30 83.22 CHANGED MDQDLDRsERGEtcGcS-EhR.LlQE-lDcGRLTs+EALpsWh..NNGEIHPWVLsGMhShGVGML..hGVYCplPssllWlLlhQLCIYhulGETSRcLDssSWpWVRuVhIluILGTLoMAGTsLA-ss.u.................ol..NIT..shp...DT-.....Ph......hLhhLhLssILGlLGlIlshRRSNstsILuARDslDWWLSANpEIP.KFshPIILISSPLAGhhGaaVMc+a.chhctGCQhCGSlS.MWGMLL.EIGRhLs+REWsVSRlhVILhISFSWGMahs+..VpAptpHlAMVTSPPGYRIVNDTScAPWFCFSsAPIPoCpSSpWGsKYapEKlNpTlVcQlhcptEtHoRAoWIE.PDLFEEVlYELALLSANuS...........hQV+ssNsTDlCsopNSopssspTM...ThLcLRtplSsTWlsNSSLQFsVHWPaVLlGhNsSp.ostsaNsssWIATNCMDPIpLNcSp.........c-L.KNa.sRsloCVsu..shophs.tpsTLCGaNTsCLpFGp+uhSTNSLlLCQ+Nsh......sNcpFaSLSHSFSKQASt+WILVKVPSYGFVVVNDTtsP .................................................................huhulGhl..hGlhh.lstshhhhLh.pl.slhhuhGEssRpl......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +5687 PF05859 Mis12 Mis12 protein Wood V anon Wood V Family Kinetochores are the chromosomal sites for spindle interaction and play a vital role in chromosome segregation. Fission yeast kinetochore protein Mis12, is required for correct spindle morphogenesis, determining metaphase spindle length [1]. Thirty-five to sixty percent extension of metaphase spindle length takes place in Mis12 mutants [1]. It has been shown that Mis12 genetically interacts with Mal2, another inner centromere core complex protein in S. pombe [3]. 25.00 25.00 25.20 26.80 24.60 23.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.85 0.71 -4.38 18 240 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 205 0 163 219 0 146.10 27 49.73 CHANGED hlT-ahGasPhohls-lINAls-hhacshsAhEpslhc+hph.....................Gschhs.........pEIcpGotKlcsLLcsplDctF-KhElalLRslhslP.stLL.cshh+.ht+psphphpp........ppptcs-hphcpt.hp-LEpphplptpLccp .............................................h.scahuasP......ohls-llNuls-h.lac.shsulEpsLhpp.st.........................................ssp.hs...............-IcpGs.pplcsllcsplD+tF-+aEhasLRslhsl......P...stL.....l....t.shh+.hpphp.phshpt.............................ppptch-.phpph...hpcLppptphpt.Lpp............................................................ 0 45 83 128 +5688 PF05860 Haemagg_act haemagglutination activity domain Yeats C anon Yeats C Domain This domain is suggested to be a carbohydrate- dependent haemagglutination activity site ([1]). It is found in a range of haemagglutinins and haemolysins. 19.60 19.60 19.90 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.44 0.71 -4.36 57 2008 2012-10-02 14:50:22 2003-04-07 12:59:11 8 357 848 3 480 2049 208 124.30 28 7.14 CHANGED spsssssspsssst....sssshslsshshsssuuhshpsappFsls......psuslh.s..p....................sssspsIlNcV..susssSpIpGhlc.s.t......spAslhlhNPNGIhhsusuplNsushhlsTsssshpts ...................................ss..............ht.......sss.sh..s...hls....Ish.s......ss.u.u.huhspa.p.pFsVs..........ppu...slh...N...p.............................sssupsILNcV......s..u....s..............s...sSp..lpGhlc.lhG.......tpA..pVhlsNP.....sGIsh.susuh.l.N.s.sphsloTupspht..s......................... 0 60 245 377 +5689 PF05861 PhnI Bacterial phosphonate metabolism protein (PhnI) Moxon SJ anon Pfam-B_9004 (release 8.0) Family This family consists of several Proteobacterial phosphonate metabolism protein (PhnI) sequences. Bacteria that use phosphonates as a phosphorus source must be able to break the stable carbon-phosphorus bond. In Escherichia coli phosphonates are broken down by a C-P lyase that has a broad substrate specificity. The genes for phosphonate uptake and degradation in E. coli are organised in an operon of 14 genes, named phnC to phnP. Three gene products (PhnC, PhnD and PhnE) comprise a binding protein-dependent phosphonate transporter, which also transports phosphate, phosphite, and certain phosphate esters such as phosphoserine; two gene products (PhnF and PhnO) may have a role in gene regulation; and nine gene products (PhnG, PhnH, PhnI, PhnJ, PhnK, PhnL, PhnM, PhnN, and PhnP) probably comprise a membrane-associated C-P lyase enzyme complex [1]. 22.70 22.70 22.90 52.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.10 0.70 -5.93 46 678 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 652 0 125 469 126 346.70 67 97.64 CHANGED MYVAVKGGEcAIpsAapLLsppRRGD....suhspLolsQIppQLsLuVsRVMoEGSLYD.-LAALAlKQApGDhlEAlFLLRAYRTTLPRhuhotPl-TusMtlcRRISAsFKDlPGGQlLGPTaDYTHRLLDFsLhuE...........................................stsPps..................................tscshscs...hP...+Vh-hLppEGLlpsp.........................s....sss.....p.ssDlTREPLsFPssRutRLQsLARGDEGFLLALuYSTQRGYG.cs..HPFsGElRhGpVsVplss.EL.......GFslplG-lplTECp.hVNtFpGsts.....psPpFTRGYGLsFGpsERKAhuMALlDRAL...pstEhsEss..uPAQDpEFVLuHsDNV-AsGFVpHLKLPHYVDFQuEL-LlR+hRpctspt ...........................MYVAVKGGEKAI-AAHtL.-scRRGD....sslPELSVuQIcQQLsLAVDRVMTEGulhDRELAALAlKQAuGD.VEAIFLLRAYRTTLs+lusScPlDTspMRLERRISAsYKDIPGGQLLGPTYDYTHRLLD.FoLLAs.........................GEsPshs..............................................................ss-uptpssP+VhuLLu+pGLhchE..............................p...DsG.......upPsDITRpP.saP....s....oRouRLQpLhRGDEGaLLALAYSTQRGYG.RN..H.PFAGEIRsGhl-VpI....sPEEL.......GFAVslGElhhTECE...M.VNtF.lssss.............EPP+FTRGYGLVFGhSERKAMA..MALVDRAL...pAsEaGEcs..s.uPAQDEEFVLuHuDNVEAuGFVSHLKLPHYVDFQAELELL+RLppEps.pu................ 0 22 66 91 +5690 PF05862 IceA2 Helicobacter pylori IceA2 protein Moxon SJ anon Pfam-B_9436 (release 8.0) Family This family consists of several Helicobacter pylori specific IceA2 proteins. The function of this family is unknown. 25.00 25.00 57.10 26.30 23.20 16.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.85 0.72 -4.26 2 35 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 20 0 0 32 0 51.90 64 104.66 CHANGED MAlVlKVVNGKIQEaENG.aKRTYsSNhlsspssGtlVAssTuKGKVEc.........h ......MAlVlKVVNGKIQEYENGsaKRTY.GSNsVsVpluGuIVAssTuKGKVEEYcNG............... 0 0 0 0 +5692 PF05864 Chordopox_RPO7 Chordopoxvirus DNA-directed RNA polymerase 7 kDa polypeptide (RPO7) Moxon SJ anon Pfam-B_9596 (release 8.0) Family This family consists of several Chordopoxvirus DNA-directed RNA polymerase 7 kDa polypeptide sequences. DNA-dependent RNA polymerase catalyses the transcription of DNA into RNA. 25.60 25.60 25.70 107.00 25.50 25.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.22 0.72 -3.88 7 45 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 40 0 0 20 0 63.00 78 99.96 CHANGED MVF.LVCSTCGRDLSEtRY+LlIcctpLKcVLtslpp.CCRLKLSTQIEP.RNLTVpPhLDIN MVFQLVCSTCG+DIScERY+LII++coLKcVLsoVKNpCCRLKLSTQIEPQRNLTVQPLLDIN... 0 0 0 0 +5693 PF05865 Cypo_polyhedrin Cypovirus polyhedrin protein Moxon SJ anon Pfam-B_9652 (release 8.0) Family This family consists of several Cypovirus polyhedrin protein. Polyhedrin is known to form a crystalline matrix (polyhedra) in infected insect cells [1]. 25.00 25.00 499.30 499.10 21.50 17.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.58 0.70 -5.16 2 14 2009-09-11 12:22:05 2003-04-07 12:59:11 6 1 4 3 0 17 0 247.00 95 99.77 CHANGED MADVAGTSNRDFRGREQRLFNSEQYNYNNSLNGEVSVWVYAYYSDGSVLVINKNSQYKVGISETFKALKEYREGQ+NDSYDEYEVNQSIYYPNGGDA+KFHSNAKPRAIQIIFSPSVNVRTIKMAKGNuVSVPD-YLQRSHPWEATGIKYRKIKRDGEIVGYSHYFELPHEYNSISLAVSGVHKNPSSYNVGSAHNVMDVFQSCD.AL+FCNRYWAELELVNHYISPNAYPYLDINNHSYGVALSN+Q MADVAGTSNRDFRGREQRLFNSEQYNYNNSLNGEVSVWVYAYYSDGSVLVINKNSQYKVGISETFKALKEYREGQ+NDSYDEYEVNQSIYYPNGGDA+KFHSNAKPRAIQIIFSPSVNVRTIKMAKGNuVSVPD-YLQRSHPWEATGIKYRKIKRDGEIVGYSHYFELPHEYNSISLAVSGVHKNPSSYNVGSAHNVMDVFQSCDLALRFCNRYWAELELVNHYISPNAYPYLDINNHSYGVALSN+Q 0 0 0 0 +5694 PF05866 RusA Endodeoxyribonuclease RusA Moxon SJ anon Pfam-B_8996 (release 8.0) Family This family consists of several bacterial and phage Holliday junction resolvase (RusA) like proteins. The RusA protein of Escherichia coli is an endonuclease that can resolve Holliday intermediates and correct the defects in genetic recombination and DNA repair associated with inactivation of RuvAB or RuvC [1,2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.94 0.71 -3.70 96 2230 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 1256 7 191 1349 208 112.50 30 86.93 CHANGED ls.h..........P..psptp.......R.hs.......sthh.s.pcsppa+..ptlthhhtpthhth....h..p...........h..lplphhh.hscph....h...........................DlDNh...hKslhDuls..........th................lapDDsQ..lschpsp.phhspp.....s+ltlplppl .............................................................sh.sP...sssshh........R.hp..........tsphahs.ptspca.+.p.t.l.t.h...hlppphh...chs.....h..................................lplc.hhh...sscph........p.................................................DLDNl....hKAshDALs...................+u.........ulhh.DDpQ.....lschp.l..h...ht.....hs.s................s+ltlpIpc.......................................... 0 62 115 159 +5695 PF05867 DUF851 Protein of unknown function (DUF851) Moxon SJ, Coggill P anon Pfam-B_9669 (release 8.0), Jackhmmer:Q9N4S5 Family \N 25.00 25.00 25.50 25.50 21.80 21.20 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.63 0.70 -5.13 7 28 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 8 0 28 23 0 198.80 29 48.97 CHANGED splscspNlEVpsR-VD-sphERVhcKapcEK+pssphsts...lDEKSKlLMDRV.sKKPa.hK..pp-pG.hLhDE.SoFYppsss..KK+.csSh...-DoYsphPKLtDVpKhsspNVascp...GVPFWAVplEPsEEDhpss-.sIoVGoEHlEhY+sKplsLpTI.sTKLhLsElQPLs-LhKRD-lHFsPcLVFSNTlRSLlpspth-....tc.+p...........psKs--ptcst...+lpF-ppcscpa.hYpRsN ...........................pspplplpppplDptphc+lhcKhpppKtp.........s.........lD-Ks+lLhDRl.sKKPa..K.....ppppt.hh..h.D-.o..sFac.p...........pKh.............ptp....-DsYs.hPKLtsVhKhst.psla.pts....s.V.PFWAh..hhc.PsEEDh..............ps.s.-.ssIsVso-HlEhapp+clsLpo.h..ppp.lhhs.atPhs.LhcRDchaFpscllFSNTlRohlpht................p............pt...cptt......plph........h..Y.............................. 0 11 15 28 +5696 PF05868 Rotavirus_VP7 Rotavirus major outer capsid protein VP7 Moxon SJ anon Pfam-B_9690 (release 8.0) Family This family consists of several Rotavirus major outer capsid protein VP7 sequences. The rotavirus capsid is composed of three concentric protein layers. Proteins VP4 and VP7 comprise the outer layer. VP4 forms spikes and is the viral attachment protein. VP7 is a glycoprotein and the major constituent of the outer protein layer [1]. 24.00 24.00 24.10 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.83 0.70 -5.21 3 112 2012-10-01 19:08:57 2003-04-07 12:59:11 6 1 9 0 0 90 0 217.90 65 99.92 CHANGED MshLLLLVlAAsAsAQLsIlPpocPEICVLaADDathDsNpFsGNFTNIF+oYNSVTLSFaoYcSosYDVIDIISKcDhSSCsILAIDVscuoMDFNTFLQSsNECoKYAAsKlHYlKLPRsEEWFuYSKNLSFCPLSDSLIGlYCDTQLssTYFslScuusYDVTDIPEFTEMGYVFHSND-FYIC+RIS-csWlNYHLFYR-YusSGTVS+pVNWGNVWSGFKTFAQVVYKILDIFFNsKRNlEPRA ...............................lhP.spPElClLaA.sDh..ps..spastNFTpIFcSYNsVTlShhsYsSsNYDV..IDI..LS+hDa..StCp...ILAIDVhcPpM.DFl...oFLQSsNpCSKYuupKIHY.KLsps-EWFVY..SKNLKFCPLSDsLIGhYCDTQlssTYFsLSss.p+.Y-VTDlPEFTphGYsFaS.s.-sFYlC+RloEctWlNhHhFY..R..c.ssSGsluptIsWsNVWoshpTFAQhlYKILDlFFN.spRshEPRA............................ 0 0 0 0 +5697 PF05869 Dam DNA N-6-adenine-methyltransferase (Dam) Moxon SJ anon Pfam-B_9691 (release 8.0) Family This family consists of several bacterial and phage DNA N-6-adenine-methyltransferase (Dam) like sequences [1]. 27.10 27.10 27.20 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.41 0.71 -4.84 17 691 2009-01-15 18:05:59 2003-04-07 12:59:11 6 13 478 0 57 456 132 166.20 39 83.69 CHANGED .ptopp.cpstDpWpTPptlFhulsuhFG.....asLDssusccNAhCspaaTtE-NuLppDWspc......Gsh.FhNPPYS+............hp.alc+Ah-ppt+usphVhhlhsusspsh..Wacc....ADcltaIp...............GRluF..s...sstptpstssp...uuslhIac.h..htstthohls+ppLhtpGp.hht.h.tt ....................................s...upp.tc.spDpWpTP..lFtulsh.FG.....FhLDss..u...s....s.c...N...A....h.C..s..t..ahT........t......p...D.......sA......Lsp-.Wsp.+........................GAl.asNPPYSp.................................hc.alc+A..uptp..pt...t.....p..h.Vhh.lhsts.....sV.h..Whpc.h..h.............sDc...lt.h.Ip........................G.RIsF..P.....hsh-.Kpssot.....uuhlhlac.h....t.hhohls+stL.shGt.........t.......................................... 0 14 31 43 +5698 PF05870 PA_decarbox Phenolic acid decarboxylase (PAD) Moxon SJ anon Pfam-B_9737 (release 8.0) Family This family consists of several bacterial phenolic acid decarboxylase proteins. Phenolic acids, also called substituted cinnamic acids, are important lignin-related aromatic acids and natural constituents of plant cell walls. These acids (particularly ferulic, p-coumaric, and caffeic acids) bind the complex lignin polymer to the hemicellulose and cellulose in plants. The Phenolic acid decarboxylase (PAD) gene (pad) is transcriptionally regulated by p-coumaric, ferulic, or caffeic acid; these three acids are the three substrates of PAD [1]. 24.60 24.60 24.80 29.40 23.80 24.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.05 0.71 -4.76 8 316 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 298 19 83 233 0 152.10 50 91.04 CHANGED LssFlGKHFIYTYDNGWcYElYlKN-+TIDYRIHSGhVGGRWVKDQcVaIV+LuculYKlSWTEPTGTDVSLshs.s-++lHGsIFFP+WVt-cPE+TVCFQNDHIsLMcpYREtGPTYPphVVsEFAoITalcDsG.sN-sVIAsAP.u-Ls--assshp ......................LpsFlGpHhIYTY....D........NG....WcYEhYlKN-pTlDYRIHuGhVuGRWV.KDQpspIVpl.s.culYKloWTEPTGTsVuLsh..s-+hlHGsIFFP..+..Wlh...-cPEhTVCaQN..-HlslMct.R-th.sYPphVlsEFAsITahtcsGhsN-pVIspsP.sthspDh.ts.h............................................... 0 16 40 61 +5699 PF05871 ESCRT-II DUF852; ESCRT-II complex subunit Moxon SJ, Wood V, Mistry J anon Pfam-B_9765 (release 8.0) Family This family of conserved eukaryotic proteins are subunits of the endosome associated complex ESCRT-II which recruits transport machinery for protein sorting at the multivesicular body (MVB) [1]. This protein complex transiently associates with the endosomal membrane and thereby initiates the formation of ESCRT-III, a membrane-associated protein complex that functions immediately downstream of ESCRT-II during sorting of MVB cargo. ESCRT-II in turn functions downstream of ESCRT-I, a protein complex that binds to ubiquitinated endosomal cargo [1]. 21.00 21.00 22.60 22.40 18.50 18.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.63 0.71 -4.25 25 310 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 278 16 218 304 1 140.00 35 58.95 CHANGED .hYsFPPFFTlQPNssTRppQLssWssLlLsYC+ap+lapLs....l.......h-s...s.............s.sLFpNps..IpR........+Lu.ptlchllsplhcp....u...pAEal..s............ps+o.....................phhIhWRps-EWusllhpWlpcsGppsoVhTlYELspGDpTp.spEFHsL-ps .................YsFPPFFT......l..........QPs...h.....sTRppQlptWss.......LlLs.Ys....+.pp+hapls.........l.......-t......p......................................s.sLFpNpp....IpR............+Ls.-shphllctltcp.........G...........psEal...c............ps+s............................................................phhIhW+p..s-EWu.sllhpWlpcsG.........ppsoVhTlYELspG.-sTt.....spEaash-..s....................................................... 1 77 121 179 +5700 PF05872 DUF853 Bacterial protein of unknown function (DUF853) Moxon SJ anon Pfam-B_9798 (release 8.0) Family This family consists of several bacterial proteins of unknown function. Swiss:Q8YFZ2 is thought to be an ATPase. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.68 0.70 -6.15 9 1311 2012-10-05 12:31:09 2003-04-07 12:59:11 7 6 1266 0 310 1381 194 478.50 52 96.43 CHANGED pshhssssssp.lhlthphuNRHGLIsGATGTGKTVTLQhLAEuhSsAGVPVFhADVKGDLSGlutsGpss-KlttRhtphGls.sap.pAhPVhhWDlaGc.pGHPlRsTISEMGPLLLuRLLsLNDsQpGVLNlsF+lAD-cGLLLLDLKDLpAlLpal.uDpAcphsspYGNlosASlGAIQRsLLsLEpQGAcpFFGEPALclpDlMRTc.ssGRGsIslLuA-KLhp.sP+LYuTFLLWLLSELFEpLPEVGDsDKPKLVFFFDEAHLLFsDAPKsLl-+VEQVVRLIRSKGVGVYFVTQNPhDlP-sVLuQLGNRlQHALRAaTP+-QKAVKsAAcTFRsNPsFsstpsITpLGsGEALVSsL--KGsPshVpRshlhPPtSRlGPlospERstllpsSPltGhY-pslDR-SAYEhLst.+sutuscttppshsttpt..............................psuhhGshpshthus...pst.R..ushppslsEuhsKSssRolssQluRplVRGlLGoL....+R .......................................................................................................ts....tpsspplhLh...thANRHG..LIsGATGTGKTV.T.LQ.h....LAEuh...S.ch.G...VP.VFhAD.VKG..DL.o..GlA.p.s...G....s....s....s....-.K....l....h....t....R.hp..p.l.....Gls...-...apspA..PV...hhWDla....GE........pGHPVRAT...................l...........S..-hGPLLLuRLLsL.....N.....DsQpGVLsllF.+lAD...-......p...G.....L..........LLLD........hKDLRAll.......pal........u.......-..s..u..............+..............p........h.p.sp..YG...NlSsA......SlGAIQRuLL....sLEpQGA.s....pFF..G.E.....P.hL-l.pDhM...RTD....ss....G...+GlINlLuA..-+Lhp..tP.+LYu..sF.LLWhLS....EL.a.E.p.L.....P.....EsG.D..h-..K...PK.L..VF.FFDEAH.....L.L...F.......s......D....A......P.......p......s....L.....l.-.....+....l..E...Q....VV.RL.I...RSKGVGVaFVTQNP..tD.lP-sVLuQLGNRVQHALRAFTP+DQKAVKs..AApThRsNP.s..a.D.sppAIspLGsGEALlShL-tK.G.sPosV...-Rshlhs...PpS+hGPlotc.ERsslls.p.SPlh.G+Y-pslDR.ESAYE..hL.pp.p....h.p.ss....s.p...t.....t.p..s....s...s.s.p..t.p.t...........................................................t.ss..hh..s..s.h..p..s.h.hh.u....................s..ts+..........sttpps....l.s...p.......sh...s....K.S.s.sR............plsp........pI....l...RGlLGol........................................................................................................................ 0 80 193 258 +5701 PF05873 Mt_ATP-synt_D ATP synthase D chain, mitochondrial (ATP5H) Moxon SJ anon Pfam-B_9814 (release 8.0) Family This family consists of several ATP synthase D chain, mitochondrial (ATP5H) proteins. Subunit d has no extensive hydrophobic sequences, and is not apparently related to any subunit described in the simpler ATP synthases in bacteria and chloroplasts [1,2]. 21.30 21.30 21.30 21.30 20.60 21.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.00 0.71 -4.60 5 370 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 281 3 234 361 1 149.30 28 75.57 CHANGED AuR+lAppoI-WuuLuEplPsNQKAphsulKohsETapoRVusLPEcPPsIDWAYYKpNVs..+uGLVDuFcKKY-ALKlPaP.......E..DKYoupVDAEcKtssKsIupacspScsRIQEYcKcLEKlKshlPaDQMThEDasEsFP-otLD.l+K.saWP........HTPEEpl ............................................Ahp...t.lDWsthsp..p..l..s..s..p..p.t..sths.........uhKp+s.-.phppcl.t.t.L.s-pPssIDauaY+sslt...psulVDchEKp...apuh.K.l.s..hs..............h..sc..httl..-u.E...pp..shpssp..phh..ptsptclpphpcpLpplcs.h.h.PF-phTh--hspshP-ht............................................................... 0 67 113 179 +5702 PF05874 PBAN Pheromone biosynthesis activating neuropeptide (PBAN) Moxon SJ anon Pfam-B_9874 (release 8.0) Family This family consists of several moth pheromone biosynthesis activating neuropeptide (PBAN) sequences. Female moths produce and release species specific sex pheromones to attract males for mating. Pheromone biosynthesis is hormonally regulated by the Pheromone Biosynthesis Activating Neuropeptide (PBAN) which is biosynthesised in the subesophageal ganglion (SOG) [1]. 21.50 21.50 22.40 22.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.91 0.71 -11.46 0.71 -4.95 10 47 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 27 0 11 55 0 136.30 57 89.94 CHANGED Ma.tophlslhlllsl..sVlAosND..lKD-G.DRGAHSDRuG.LWFGPRLGKRSL+lSoEDNRQAFa+LLEAADALKYYYDQLPY.Ep.QADEPET+VTKKVIFTPKLGRShu...h--+sa-NVEFTPRLGRRLoDDMPATPuDQEhYR.DPEQIDSRT+YFSPRLGR..TMsFSPRLGRELuY-haPsKlRVARSsNKTpST ...........................................................................................................................................KKlIFTPKLGRSlu....c.p.pa-slEFTPRLGRRLu-DMPA..TPuDQ..E..hYp.DPEph-SR.T+YFSPRLGR..TM..sF..SPRLGRELuY........................... 0 5 6 11 +5703 PF05875 Ceramidase aPHC; Ceramidase Moxon SJ anon Pfam-B_9877 (release 8.0) Family This family consists of several ceramidases. Ceramidases are enzymes involved in regulating cellular levels of ceramides, sphingoid bases, and their phosphates, EC:3.5.1.23. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.91 0.70 -5.26 41 599 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 310 0 390 567 261 225.10 22 87.94 CHANGED sGaWG.sTSslDWCE-NYslS...YlAEahNTloNhlFllhuhhuhhpsh+.................pchctpa..hlshlGhhlVGlGShhFHhTLpY...................hhQLLDELsMlassslshashhsphp...................shphphhluhhlhshushlThhahh.hppPslHpsuaullsshllhhshhhhpp..l.s..................................tsc+plhphhhhulslFlhGahhW.lDphhCshhpphRpth..hPhuhhhEhHGWWHlhTGlGsYhhllhhtaLcshhpsp..ppphphha..ht.....slPtlhh ..........................ht..oS.hsa............CE.tsY.hs...hlA.EhhNT..loNhhahh.huhh..s.hh...hhp...........................pt.h.p...th....hlh.hh..h.h.h.lV.G.lGShhFHh.TLp..a...................h.hQ.h.h.DELshl.ashhhhhhh.hhshth.........................................p.p.h.hh.hhhl.hhhs...hhho.hhhhh.....p..s...hhp.p.h...s..ashhshhlhhhshhhh....th.......................................................................htt.phhth...hh...h....uhhh.....ahhuhhhW.hD..phh.Cphhpthp..h.............................hHuh.W.......Hlhhu..hu...sYhhh.hh..hhph.......t.t....h...h...................h....................................................................................................... 0 100 198 301 +5704 PF05876 Terminase_GpA Phage terminase large subunit (GpA) Moxon SJ anon Pfam-B_9892 (release 8.0) Family This family consists of several phage terminase large subunit proteins as well as related sequences from several bacterial species. The DNA packaging enzyme of bacteriophage lambda, terminase, is a heteromultimer composed of a small subunit, gpNu1, and a large subunit, gpA, products of the Nu1 and A genes, respectively. Terminase is involved in the site-specific binding and cutting of the DNA in the initial stages of packaging. It is now known that gpA is actively involved in late stages of packaging, including DNA translocation, and that this enzyme contains separate functional domains for its early and late packaging activities [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.87 0.70 -6.16 53 1654 2012-10-05 12:31:09 2003-04-07 12:59:11 7 10 711 0 162 1653 633 434.80 36 87.54 CHANGED h.lssc..u..........uupsGpW.......ctspsPYht-lh-slussp..................hccVshhpuAQlGtTp.hhhshluYhIcpsP.ushLhl.PTpstAccascs+lsPhlcsoPsLp........pplus....h.p+cssNolhpKcFs.G.GhlhhhGAsSsssL+spssRhlhhDElDta..stsl.....ssEGcPhsLAcpRspoF...sptKhlhsSTPshcs.....................sopIpptappSDpR.+aaVPCPHCGchp.....lp.app......l+a....sps..........pscsAtahC...cCushIp-ccKst...hh..tt.Gc.....Wlspss.........................................tsscpsuFalsuhYSPh...tsWsplspcalpA.......ps-spt...........LpsFhNTpLGcsW.cpc.u-..th-hppLhsRt..E.sa..............tup......VP........susl.hLTAGlDVQ.pc......RlElplhGWGpstEs.....WllD+tllh....G..............cPsstt..sattLsplLpcpat+s.sGsp.hslsshulDoGs...........................ttTpp.VYsas+pp.t...............ttpVhslKGsss..stshlppsptpsss..ttt.....ultLahVussshKsplh......spLphp...........t.ssGhlHFP.....p.....t.....hsppaacQLsAEphh..+htc.Gh..hhtWp+hp..pRNEALDshVYAhAA...thhhshp.....phcW ......................................su.t.s..a.........thhPa.h...t.hsshusp...................hctVshhtsApsGho...hl.s...hhsY..hl....pc....ts.Lhh....TpttAcpa.+p+lt.hhcs.Pt..lh.........tl..ss...............pc.psNslh.Kp.F...s.h.hhhhGh.usp.hpppshchhhhs-hs..ta.....s.h.......ptEGssh.LuppRhps...........h.lh.SoPt.hch......................sstI..phhp.pusph..+a..ahsCP..H...CGc..........ht...ats........................lph.........................p......................tspsshh.C...............psss.hlttpphp............................p..uh.........altctp.............................................................sc.hsFa.h.s.husa...poW.pllhchhps..................hts.tt..........h+shlNTshG.sa....t..sc...p.ctphl.p+h......h.....................tt.s.VP...............st.s..h..hLsAslDsQ.hp.......Rh.h.lh..Ga....u.tt....Et.....allD+..l.h..t.........................c.sp........h.hhls.t.h.h.p.+.p.a.ht..sstp..htl.thshDsG.G...........................h.sp..sYph.+ppt................hh+lh.hKGsuh..ht.....t..l..h.shsh...........ps.....p.t..................sl.L.h.ltssshKc..l.......sthhhp...................pshs.hh.h+FP...............p..............hs...hppLshEp.....+..c..Gp.................W.pp.t....ttNEAhDhhVYA.Ah...hhh............................................................................................................................. 1 49 97 131 +5706 PF05878 Phyto_Pns9_10 Phytoreovirus nonstructural protein Pns9/Pns10 Moxon SJ anon Pfam-B_9947 (release 8.0) Family This family consists of the Phytoreovirus nonstructural proteins Pns9 and Pns10. The function of this family is unknown. 20.30 20.30 20.50 20.80 20.20 20.20 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.75 0.70 -5.42 3 17 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 10 0 1 17 0 296.10 38 94.27 CHANGED uGKLQDGVAI+RIsDAIshFsNYohG-LlssRchsluTLHslR+NlGLAWPslLhNCalHTSSHhGVMKFlLDIAhShRFGDFTLLGusGssDPFsDlclIaTKoCpsLGhsDs-FLphsDsFuYhhsSFL-cEul+usVDMplGIHNIED+YVhRhESIhcFI+tYYTtSh-DV..lsWLEKL-uAcuGlLuspKSKcQMRuElsplRscIss+IpLYINoacNSap-HaRElAcpYsslWs..losGssAcEsps-AT.....sSuSpsTusuAELs-VsctuDsNEscLp...F+Rc-DAucsAsSchsSLSG-DutpG ..............................................................................GKLQDGlAI+RIsDAIphFppYphG-L.psps.tlspL+slRtslGLAWPslLhNCalHoSSHhGVh+FllDIAhoh+hGsFTlLGusGs.DPFsDlslIaoKoChsLshsDssFLp.s-pFu.hhsuFLptpulputV-MphGlHsIED+YVhRhpsI.chlphYastS.c-l...sWl...KL--ussuhh.tpho+tQMps.ltpsRshIss..cIppYlNpppsS.p-HhRplAppYsp.Ws..lsssssut.spssss...........ssts.sss.suphssss.hsc.s-s......hptp.-ttp.hss.hss..upsu..G.............................. 1 1 1 1 +5707 PF05879 RHD3 Root hair defective 3 GTP-binding protein (RHD3) Moxon SJ anon Pfam-B_9973 (release 8.0) Family This family consists of several eukaryotic root hair defective 3 like GTP-binding proteins. It has been speculated that the RHD3 protein is a member of a novel class of GTP-binding proteins that is widespread in eukaryotes and required for regulated cell enlargement [1]. The family also contains the homologous yeast synthetic enhancement of YOP1 (SEY1) protein which is involved in membrane trafficking [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 743 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.42 0.70 -6.61 8 439 2012-10-05 12:31:09 2003-04-07 12:59:11 7 16 244 0 317 440 4 457.00 25 63.95 CHANGED hGSQSoGKSTLLNHLFGTsFssMDA.spRpQTTKGIWlA+sssl...........cssILVMDVEGTDGRERGEDQD.FERKSALFALAsSEVlIlNMWEHQlGLYQGANMuLLKTVFEVNLpLFhpp.....ss+KoLLhFVIRD+lGsTPLENLpcoLpcDlp+IWsSluKPtuhEsoslsDFFDVpFsALsHK.hpp-pFpppVppLcpRFhpu.............hssGsFts-YH+clPADGFohYA-pIW-pIcsNKDLDLPTQQlLVApaRC-EIusEshpsFhssh....Ep..p.cEsspuuslssLGppLsslhpcslpcYDspASRYccuVYppKRppLcpKlss+lpssaQshLssL+pshl-sFcpulspulcuGp......sFucuVpsptpcslpcFccpscohsl.pssWss.cchhtKLs+DI-spsuplRstcLpcLsschE+plpspLS-sVphhhsssu..................+-sWDslhphFcppscsAlpthpsthsuF....-hs-s.pssphltsL+phSWshlcsKs+EEh..splLh+L+-RF--lFRYDsDGhPRlWpsc-DI-uIa+pARppoLpLlsVLohhRLuDss-sl.ps.hhuhpssssssusp..csl.ss.sslspsphcpltsptpILTcspppslhspFK+ps-hshs-A....KRStlsspTplPsahallLlVLGWNEFMslLRNPLahhlhhlsusssasLa.pLsLhG............PshphlpsshsplhslAcc+hcpshpsscsh+uutphcssusscspps ..................................................hGsQSoGKSTLLNtlFsspFth.st...t.......t.............psT+G..hahsh............................................................hll.hDhEGhcu...E..p.u..c.p.ps....aE+.....p...uLFsl..uh..uclhllNha.........ts.s.hpslhp..hth.h.............................h.hs...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tt................................................................................................................................................ 1 98 205 280 +5708 PF05880 Fiji_64_capsid Fijivirus 64 kDa capsid protein Moxon SJ anon Pfam-B_9976 (release 8.0) Family This family consists of several Fijivirus 64 kDa capsid proteins. 25.00 25.00 31.60 31.50 19.10 18.80 hmmbuild -o /dev/null HMM SEED 561 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.66 0.70 -6.17 5 168 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 0 162 0 486.90 86 99.98 CHANGED MsDI+LsIAPDLIHsGVPQRLSDTIILNDRPpITLLsHF-sLFcEsNIIKuP+suSspoTVNIYIKp-LL+RLHDRLuoV-ToTLPNIoQl....KEal+sFFpN-lQsIFpsLsNN-lsssFVGVTT+GLSLFAsAKsDAEQIERVQI-TLTEGNlTLKPhSADGlEVILDDsYINlVsKllGh-VpKLI-KCC+ElPAchGIlT..DEVKhhlpTGKLRlDGGYDYNCPSSoTDVT+YGsYDc.FScpMFscLspFaNlSLolVPVuALKslHlh-cELspLDuDKSLLEQsWoAlTSFlESaclKTK..............lKsDDsDchcLscLsssKsNpcusp....AolssoDKsh...........l-WYp+sF.sscT-KulsLsRsElhE.tAcsoSsllc+VKltFushhFEclsssupEKolhV...-TsuG-MTLDpYRuIu-VLNuIWKRGKDlAVcsFDYIKLGlEKA.oHLuslLhKKYNlTlDDIlNFI-KGPSYLAsLsKlsDWsLIAKlIIsSVLPsIIQuVYKoDPSsslMNSlLIo+AsNLI+sD+cRLpcKspucss.s.uNTsc+-sssKlllcKlso .....MADIRLDIAPDLIHNGVPQRLSDTIILNNRPTITLLSHFNsLFHEpNIVKSPHlASSQTTVNLYIRKHLLTRLHDRLQTVETSTLPNITQL....KEHIpSaFcNEHQPIFQTLTNNNLSDEFLGVTTFGLSLFATSKLDAEQIERVQIETLTEGNVTLKPFSADGLEVILDDSYIGIlGKIsGLEVHKLLDKCCREVPAQMGILT..DEVKLLhRoGKLRIDGGYDFNCPAS.TTDVTHYGGYDQ.aSRQMFE+LNLFaNISLSIIPVSALKTlHlFEKELSsLDADKSLLEQTWSuVuSFlETWpVKoK..............sKs-D.DEYELTuLSsLRpshDGsS....sSSPasDKKF...........I-WY.KTF..uKhEKGSSLR+sElE-KssouTSshsKpVKIHFsVQYFDEhKsNGHEKSVsV....TpKGEMo.L-.YRKIGElLSAIWKRGKuLAsPChDYIKLGVEKA.aHLAPVIMKKYNLTIDDIIpFI-hGPSYLAKLDKIDDWSLIuKLIITSVLPNIIQAVYKTDPSNNVMNSVIISRANNLLKuDRDRLlKKAhoANsSo.SN.SspEHsQKIVLNKVTR.............................. 0 0 0 0 +5709 PF05881 CNPase 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP or CNPase) Moxon SJ, Mazumder R anon Pfam-B_9997 (release 8.0) Family This family consists of the eukaryotic protein 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP). 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP) is one of the earliest myelin-related proteins expressed in differentiating oligodendrocytes and Schwann cells. CNP is abundant in the central nervous system and in oligodendrocytes. This protein is also found in mammalian photoreceptor cells, testis and lymphocytes. Although the biological function of CNP is unknown, it is thought to play a significant role in the formation of the myelin sheath, where it comprises 4% of total protein. CNP selectively cleaves 2',3'-cyclic nucleotides to produce 2'-nucleotides in vitro. Although physiologically relevant substrates with 2',3'-cyclic termini are still unknown, numerous cyclic phosphate containing RNAs occur transiently within eukaryotic cells. Other known protein families capable of hydrolysing 2',3'-cyclic nucleotides include tRNA ligases and plant cyclic phosphodiesterases. The catalytic domains from all these proteins contain two tetra-peptide motifs H-X-T/S-X, where X is usually a hydrophobic residue. Mutation of either histidine in CNP abolishes enzymatic activity [1]. CNPases belong to the 2H phosphoesterase superfamily. They share a common active site, characterised by two conserved histidines, with the bacterial tRNA-ligating enzyme LigT, vertebrate myelin-associated 2',3' phosphodiesterases, plant Arabidopsis thaliana CPDases and several several bacteria and virus proteins [2]. 25.00 25.00 25.80 30.30 17.40 17.20 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.56 0.70 -4.95 5 71 2012-10-03 21:31:48 2003-04-07 12:59:11 7 6 44 11 32 67 0 204.90 55 53.52 CHANGED LPLYFGWFLuKR-E-sLRKTup-FLEpLGNLKAFKK+LptFsuED..K+KlDLlpYFuK.sPslLHCTTKFCDYGKAAGAEEYAQQEVVKKSYoKuFTLoISALFVTPRTsGARVELTEpQLtLWPsDADKE......L.PsDsLPRGSRAHITLGCAA-VEsVQTGIDLLEFVKLQKAG+-GEpVGELsG...GKLoYauNGMWMLuLu+KIEV+oIFSGYYGK.GssVPppGuKKGtplhppCTIl ...........................LPLYFGWFLsKcupEp.l+psu.sFLcpLsshcAFKKc.l..pp.F..ss..ts..ccclDLhpYFs+.....sPGlLHCTTKFCDYG..KAsGA-EYAQp-sVKcS.Y.uKu....FpLolSALFVTP+TsGARVc....LoEpQL....LWPsDsDK............................lsss-sLPtGSRAHlTLGCAusVEsVQTGLDLLEIlp.pKuG...pcGEpVuEl...st..........................GpLh.hGpGRWhLsLs+phclcAlFoGYYGK.tcs.Vsspuspct.t.hppCoI.................... 0 8 10 14 +5711 PF05883 Baculo_RING DUF855; Baculovirus U-box/Ring-like domain Moxon SJ, Bateman A, Dlakic M anon Pfam-B_9633 (release 8.0) Domain This family consists of several Baculovirus proteins of around 130 residues in length. The function of this family is unknown, but it appears to be related to the U-box and ring finger domain by profile-profile comparison. 21.20 21.20 21.30 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.88 0.71 -4.33 25 57 2012-10-03 15:03:13 2003-04-07 12:59:11 6 1 56 0 0 68 5 133.80 38 95.21 CHANGED MLlTlph.pD+ctala+hFpchWsp.slECtICh-cIss..cGVVslT-suslNLEKMFHspChcRW..p...ppcpRDPFNRsl+YhFsFPPco.c-spuhL-ch+.uFI...GD-ctD+hasspapR.ssspp.hlDlELDFsphL .......................MllTlph.pD+ptaha+hFpchWs.ph.shECpICh-cIss..cGVVslo-sutLNL...EKMFHspClc...RW...p.....ppc....sRDPFNRsl+YaFsFPPcohcECpshL-ch+.sFI...GDcctDchapp.apR...lpstp.hlDlELDFpphh........................ 0 0 0 0 +5712 PF05884 ZYG-11_interact DUF856; Interactor of ZYG-11 Moxon SJ anon Pfam-B_9445 (release 8.0) Family This family of proteins represents the protein product of the gene W03D8.9 which has been identified as an interactor of ZYG-11. ZYG-11 is the substrate-recognition subunit for a CUL-2 based complex that regulates cell division and embryonic development [1]. 25.00 25.00 25.50 41.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.88 0.70 -5.33 6 31 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 8 0 31 27 0 260.50 29 89.67 CHANGED hPsssp-G....Psusssss....uNtG.ssstp-AuIshVh+TYupaGpDlpQhsscGlRphspslpp.ossslpphpoohppcosc.............llssLh-ho.PtpluopsIlEhFuhSSlLLllAolSSllGGYlLAPlFGIlIsslGAAIhusLVlPhhusY..LNtcsGSluupRltLllhshsQGVLhGauhsasalsupPFsslTslluSFuYPllshp.sTuRsslLGsssGsSlhhHhslGhlpGuLossYFlLouhYTluAlsLIQ..IAhRsQocss.hphYshlLVuhhlsuKshVYGlFGssc .........................................................................t....................................................h.t-hpt..........psh+.h..shpp.sssphtphtsphp.pspp................llssl.-.hp..P.thss.slhchF.shoollLlshuluohlGuYlLuPlhslhhsthGAslluslllPshshY...Lst...............c...stuhst..p..Rh.LlhhuhsQGlLhGaulua.shls.u.pPhshlTsllhuFshslls..sp....ss.sRsslLuhssGsuhhhplshGhlpGuLoh.sYhlLosLYohuuhshlQ......l.Ah+.s.hss.s.hphYphlL.....Vs.hlhuKshsYulhGst.t............. 0 9 13 31 +5714 PF05886 Orthopox_F8 Orthopoxvirus F8 protein Moxon SJ anon Pfam-B_9539 (release 8.0) Family This family consists of several Orthopoxvirus F8 proteins. The function of this family is unknown. 19.90 19.90 21.90 26.40 18.30 17.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.07 0.72 -4.14 2 53 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 30 0 0 35 0 62.00 73 96.53 CHANGED MEGSKRKH-SRRPQQEQEQ.RPRTPPSYEEIAKYGHSFNVKRFTNtEMCLKNDYPRIISYNPPPK ..............MEGSKRKH-SRR.QQEQEQ.RPRTPPSYEEIAKYGHSFN.VKRF...T....N.-E...MCLKNDYPRIISYNPPPK... 1 0 0 0 +5715 PF05887 Trypan_PARP Trypano_PARP; Procyclic acidic repetitive protein (PARP) Moxon SJ anon Pfam-B_9554 (release 8.0) Family This family consists of several Trypanosoma brucei procyclic acidic repetitive protein (PARP) like sequences. The procyclic acidic repetitive protein (parp) genes of Trypanosoma brucei encode a small family of abundant surface proteins whose expression is restricted to the procyclic form of the parasite. They are found at two unlinked loci, parpA and parpB; transcription of both loci is developmentally regulated [1]. 40.00 40.00 40.20 40.30 39.90 39.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.30 0.71 -4.37 2 71 2009-09-10 16:45:05 2003-04-07 12:59:11 6 12 35 4 18 70 31 104.10 51 41.22 CHANGED MAPRSLYLLAlLLFSANLFAGVGFAAAA-tstspslsKGGKGK...............-.-stPE...EstPE.ps..EstPE.ps..EstPE...........EsEPEPEP..........GAATLKSVALPFAlAAAALVAAF ...................................................................................................................tPc...sE..scPEP..E....PEPEPEPEPc..s...sEscPE...psEPEP.EPEPEP...E..PEPEPEPEPEP-Pssu..sh....................................... 0 6 17 17 +5717 PF05889 SLA_LP_auto_ag Soluble liver antigen/liver pancreas antigen (SLA/LP autoantigen) Moxon SJ anon Pfam-B_9614 (release 8.0) Family This family consists of several eukaryotic and archaeal proteins which are related to the human soluble liver antigen/liver pancreas antigen (SLA/LP autoantigen). Autoantibodies are a hallmark of autoimmune hepatitis, but most are not disease specific. Autoantibodies to soluble liver antigen (SLA) and to liver and pancreas antigen (LP) have been described as disease specific, occurring in about 30% of all patients with autoimmune hepatitis [1]. The function of SLA/LP is unknown, however, it has been suggested that the protein may function as a serine hydroxymethyltransferase and may be an important enzyme in the thus far poorly understood selenocysteine pathway [2]. The archaeal sequences Swiss:Q8TXK0 and Swiss:Q8TYR3 are annotated as being pyridoxal phosphate-dependent enzymes. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.11 0.70 -6.00 5 254 2012-10-02 18:26:03 2003-04-07 12:59:11 8 5 169 15 177 1048 87 327.70 33 79.28 CHANGED MDTsscsc..ssGlGEREuRVhocLspcslacFsHGlGRSGsLl-sQ.PKAsGuSlhs+LTNcLlpchL+hLGl+tlcssFVVPhATGMSLuLChouhR.++scAKhVIWPhhDcK...SslKAsppAGFchtlVEsll-GDpllTDlssVEctl.cchssEslLsVlSTsosFsPRsPDslcEIAcICu-YDVPHlVNuAYGIQspphhchlppAh+hGRlDAVVpSlDKNFhVPVGGuIIAAhc-salpEIScsYPGRASAsPstclLIoLLuLGssuYtcLh+cQKEsFshLcEpLccLuEchs..............EpllcsP-NPISSAhTlcsls.hpp...ts.spLuucLasR+VTGsRsVpsssshsT..........S+hppY.ssYlslsSAIGl+-EDV-pllcRL-csLc ...............................................................................s.p..t....hthGEREsRhhs.lht...h.............th..hHGlGRSGsl...t.Q.PKAhGuolh.plss.hh.phh+.....hGh........t..h.tt...s.h..hlP...hs.T.G.M.s.l.s.L.sh.s.h.......p...t....+...................p.u...............c.....h.....llas...p....l.....D.pK............os.a.Ku....h..tA...G....h...p.s..h..l....l...p...s...h...h.......p......s......-.......t....l...h...s...s....l...p...s...l..c..ttl....pc...h..G.......p...................s....l..L..s.l.h.o..T.sosF....u....PR................h..s...D..p.............l.....tpluplCtphslPallN......sAYu...l.......Q....s..p....h..h.p........l.p.t.s.....p...h.............GRlDs.hVpShD.KNF........hV.Pl.G...Gul..l..A...u.....s...t..p...h.................l...p...t.....l.......u...p....Y........sGRA........oto........s.................l-........l........h.h....o.L..L.......t...L..Gp.s.u..h.tp.h.lt.pp.h..t..a..h.lpp..p..l..p..chupph.s.......................................c.p.lhps.s.cN.IS....hsh.sh....p....s....h..t.t....tt....................p.th...h.luuhL..apRtl.oG.s.R....l....phts...............t..........sY.lshssslGhptp-l..phhhpplcchh........................................................................................................................................................................................................ 0 68 98 148 +5718 PF05890 Ebp2 Eukaryotic rRNA processing protein EBP2 Moxon SJ, Mistry J, Wood V anon Pfam-B_9615 (release 8.0) Family This family consists of several Eukaryotic rRNA processing protein EBP2 sequences. Ebp2p is required for the maturation of 25S rRNA and 60S subunit assembly. Ebp2p may be one of the target proteins of Rrs1p for executing the signal to regulate ribosome biogenesis [1]. This family also plays a role in chromosome segregation [2]. 18.80 18.80 19.30 21.20 17.50 15.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.03 0.70 -5.09 28 350 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 297 0 244 346 2 252.80 33 78.17 CHANGED --sDh-Lp-h..tsphcsshsl.....hpcp+hsINNstuLpssLccIph......t.lsasE+.sls.......us..psh-.plpDhp........................DDhpREluFY+Qu.sAVhpAhspLcchsVshpRPsDYFAEMlKoDEHMpKl+pKLlsEtsuhctSEcAR+tR-lKKFGKpVQstplQcRp+EK+-hl-cIKph+Kp+p..........tt.-phDht..........h--sst.....tsstp+s..............tt..+stss...tKRptKspKFGaGG+K+tpKpNstpSosDhsGFss+h...................+ut...h.................sspRPGKu+R ...............................p-..lpp.h....t.....c.sthsh.....h.tp+.hsl.Ns.ssuLpppLpclph............phsahE+hsls...............................us......pshs.ph.Dhp............................DDh..p..R.EhuFY+Qu.s..AshpuhspLcphs.lPhpRPsDYFAEMsKoDpHMp.Kl+p+..LhpcpsshctuEcs++.RphKKaGKpVQ.sp............pl..p..cR..pc-K+..chl-pl.+ph+Kt+p....................cthDhh..................pcttt.......tpptppt............................ttttpt.tss..........tKRp..tK.spKaGa....G.G+K.+..ht.KpNstt..Sts.D.hsuap.tpth..........................+ut.........................tttR.GKthR.......................................................................... 0 85 133 202 +5719 PF05891 Methyltransf_PK DUF858; Hydroxy-O-Methy; AdoMet dependent proline di-methyltransferase Moxon SJ anon Pfam-B_9735 (release 8.0) Family This protein is expressed in the tail neuron PVT and in uterine cells in C. elegans [worm-base]. In Saccharomyces cerevisiae this is AdoMet dependent proline di-methyltransferase. This enzyme catalyses the di-methylation of ribosomal proteins Rpl12 and Rps25 at N-terminal proline residues. The methyltransferases described here specifically recognise the N-terminal X-Pro-Lys sequence motif, and they may account for nearly all previously described eukaryotic protein N-terminal methylation reactions. A number of other yeast and human proteins also share the recognition motif and may be similarly modified [1]. As with other methyltransferases, this family carries the characteristic GxGxG motif. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.46 0.70 -5.04 7 406 2012-10-10 17:06:42 2003-04-07 12:59:11 7 5 288 3 286 1393 487 201.70 37 81.52 CHANGED ctcphYpcAlsYWpsVpsoV-GhLGGa...G...plsshDl.GScsFLppLhtc.hs...stppphhALDCGAGIGRlTKslLhchh...........scVDlVEPVppFlspucp.Lup......tpt+ssshassGLQ-aoP-....ts+YDlIWhQWClGHLsDp-LluFhpRCptuLpPsGhIVlKENhsppsh.hhDcpDsSlTRs-shh+plFccuGLpllupchQcGhPc....ELasV+MYALp ..........................................................................................YtcuhpYWp.p...l..suosc...Gh.L...G.....Ga......s......plsph...Dl..pu.Sp...pFLpc.l......h.t....t................................st...s....t.h....p....hALDCGA..GI.G.R....l......T...+p.l...L..h....h..h...........p..p..V......D..l...V..E...s.sp.pFl...p..ps..p..p......ltt.....................t.t.t...+.l....t...p...h..a..s..h.G.L.....Q..-..a..s...P.p........................t.p.Y.Dl.IW.h....Q.W.s..l.GH..L...T..D......p.c.......L..l..p.FLpR.C.+pu.L.........p...............s.....G......h..l....l..l...K...E.....N..h...s..p...........p.....s.....h.............hh.....D........p........p...D.....S......S..V..T....R.....s............p...h..h+p....lhpp..A.GLplltpchQpsh...Pc.........p..lhsVhhaAL............................................................. 0 89 151 227 +5720 PF05892 Tricho_coat Trichovirus coat protein Moxon SJ anon Pfam-B_9763 (release 8.0) Family This family consists of several coat proteins which are specific to the ssRNA positive-strand, no DNA stage viruses such as the Trichovirus and Vitivirus. 25.00 25.00 27.50 26.90 19.00 18.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.38 0.71 -5.09 11 513 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 30 0 0 510 0 176.60 48 77.61 CHANGED hutptclRstl..........cshLhAths.spcs..ucsGhs+sh.......YLcolFG.IAlhGTSccTpah....................spVs.ltsphupc..hsshuphslpphVsph+saussss-GslpuhThRQlCEPFAppA+-sLlhhtphGsaopLhpKhscsGtKpPpVMFDFNsGLshptLo.tpctsVIpshNpRLFRTEGAKuVFsApuSssEpul-l .....................................................................ucot..hE........hLcolFuNIAl.GTS-pT.pFh....................-hsspVKshtspc.....sl.GphsLpElVshlKsFpsTSS-sslsshTFRQlCEsFAspARshLVcht.+GlaTNLaspMPcsGpK.PplMFDFspGLsM...hhhs.hspppVIssMs+RLhpTEhAKu..EAp.uSsppsLpl............ 0 0 0 0 +5721 PF05893 LuxC Acyl-CoA reductase (LuxC) Moxon SJ anon Pfam-B_9766 (release 8.0) Family This family consists of several bacterial Acyl-CoA reductase (LuxC) proteins. The channelling of fatty acids into the fatty aldehyde substrate for the bacterial bioluminescence reaction is catalysed by a fatty acid reductase multienzyme complex, which channels fatty acids through the thioesterase (LuxD), synthetase (LuxE) and reductase (LuxC) components [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.27 0.70 -5.75 13 358 2012-10-02 17:28:28 2003-04-07 13:39:30 9 4 316 0 112 2771 1191 335.20 21 69.60 CHANGED hsslslppllshlhpsupcWpssp.phh..h...htphhuYspphhp..hct...hhhhCp+suLhcll-p-L.up.chLD-al...pts...sY.+AhP+GlshHlluGNVPLlslhSllculLsKNssllKsSSSDPhhsssLlpohh-lDsst.........slscolSVlYa.cuscspLscplh..spADsVlAWGG--Alchlsp.ptsspschlcFGsKhShull-ssA.....slspAsculAcDIChaDQpAChSsQslalp...ssslcEFspcLuptLs+hscllP+up.shsEpAthotpct-.....shhsth..tVh.pspspsWhllhSptt........thhspPL.pRolhl+tlscl.-llphlpps.ps..QTlul.hshpuphthhsp.LuttGVpRIscsGhhshacsGtsHDGhhsLpRLV+a .....................................................................................................................h........................................................................................................t.....t.l................h........l.......t.............t...h...p..t...l....c..p..ah..........p..t...........t.........p...shsh.uh.sh.Hl.h.u.G.Nl.P.h.l.u..h.a.u.l.l.p.u.l..L.s.tNt.s.l.l..KhSu...s...-...sh..hhshl...h....p....ph....h.p...l..s.s.p.....................l.tp.p..l.s...l..l...a....ttt.........p.t..p..l....s.c..ph......s..p...s..D.....s..l...l.....u...a.G...G...s...p........s...l..c....h.hp....t....hh....s.s...p....s....p...h....l..t.....a....u.s.+.h.S...h..ulls.sps.......php.p.hh.p.t.hA.p.Dlh....h....ac..Q..uCtS...s.p.p.l.al.....t.............p.h.c.th.hp.........t.........l...ht.........t.........h........t....t...h....................h...h..s.....t....t....................s...........t....t....t..s...t..h...t....h.........t.............................h...tt....................h..................t.......t.h.............l....h.....p...........................ps...h......t......sh............l...h....l..s...ph.phh.........l.......t........QT.huh...h.....t.p.h......hs.......hh.hG..s.Rhs..G..........................aDG...h..l.phsp................................................................................................................................................................................ 1 42 80 95 +5722 PF05894 Podovirus_Gp16 Podovirus DNA encapsidation protein (Gp16) Moxon SJ anon Pfam-B_9825 (release 8.0) Family This family consists of several DNA encapsidation protein (Gp16) sequences from the phi-29-like viruses. Gene product 16 catalyses the in vivo and in vitro genome-encapsidation reaction [1]. 20.90 20.90 21.40 27.10 20.80 20.60 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.24 0.70 -5.11 4 17 2012-10-05 12:31:09 2003-04-07 13:56:03 7 1 16 0 0 18 2 330.60 31 93.00 CHANGED M-KKh.aap.pKlLSYstlhshlI..GARGlGKoYAhKph.lcchlppGcQFlYLRhYKoEltKspNpaFsDlt.paPspcFhVKG....p+hYlc..tchhGahIPLSshQu.KS.uYPNV.TIhFDEFlt..EKs.ssY.PNt....VcshlslhsTV.Rt+...-cVRslChuN...AVolhNPYFlhFth.Ps.NppapspsphllphlssccatsthRcsRFGphIsGhA..Yt-hSlDNpFsss......oclFV.++o+supasFuIhhsstphul.........WlDhppuhhahspu+sPcscpl.aALTscDL..sEsthLlhshpsshhLpphsSsa+KGhL+F-spVlRphhh-lht..hI .............................................................h.ph.s..h.hhh..Gt+slGKoashp.h.hpchhp.GtphlhlRh.csphtph.ts.ah.sc..l......a.s..hc.VKs............................+chhhD..tKhh..GahhsLSshpph+usuYPpVpsIlaDEFh...-.Kc...NhsYlPNE....sstL.slh-oVhRhR...-cl+sIsLuN...usollNPYFs...aash...s.sKp.hp..tps-sL.l.ph.s.scsapsEp.p....tpLhcGou..YuchSLDNcFhss......p.h.l.+...t.....ptspKhlasIhas.sthlG.V.........Wh..sh.phhhhlspspDPsppsl.h.hp.sDh..ptt...l...hsphhh+hLssthhst.LhFDs..hRhlu.p.................................................................................................................................. 0 0 0 0 +5723 PF05895 DUF859 Siphovirus protein of unknown function (DUF859) Moxon SJ anon Pfam-B_9891 (release 8.0) Family This family consists of several uncharacterised proteins from the Siphoviruses as well as one bacterial sequence Swiss:Q8K6J6. Some of the members of this family are described as putative minor structural proteins. 20.10 20.10 20.50 20.50 19.60 20.00 hmmbuild -o /dev/null HMM SEED 624 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -12.95 0.70 -6.26 11 110 2009-01-15 18:05:59 2003-04-07 14:01:24 7 5 86 0 4 104 3 484.90 28 85.00 CHANGED Ms-FaSNNDRGY+lpLhV-QVuQ.sssNNTSpVRh+LsLhNs....TsTFupYsCsuhVphpGQplsaSupPShlohspolpLIDpTlTlsHsuDGoKThuhsApFsGSGGaSPGTLsIuupsaTLosIPRuSoVS....VusshlGsslTIsIsRpSusFTHslpYp.aGspsGsIuo.slsTSsoWTssl.DhAspIPNSTSGpGTIhVcTYssGshIGop..osshphsl.PsSV+PohoGIoLoDsNosAppllsuss.FlQIhSNl+VsFNGAoGtYGSTI..pGYaAEI........VGtNQosspNGGsLG..hhNasGpsTlRApVoDSRGRpS-sh-spIolLcYFuPuLsFostRsGpsssplslhRpA+IAPLoVNGlQKNpMpLTFclusluossaosDsGsASGsWoshopLssos..AsLuGsYsusKSapVhGpLpDpFoSTsFphsVssEpVVh.......oh-+sG.lGIGKhhE+...GuLDVsGDIYA......ssp.IQQaQLT.NNGts........hpps...Nhl.-sGthhlssSu.sNPs...u...hh...hp.pss.p.hhQTFhuss..hhh...p.sh..ssWtsWpcaupscshsh............hpTsWp.sGhps..shat+sGsshThp.....hph.sst......cshslsshPpchhss...ahhslsu ................................................................................h.h.h...p....s..up.shssNoSpVphphhl...ps.....shoas....h..s......s....h......l.lsGpphp....o...spssh...........s.s..pp.h.L.hstphT.lsHsuDGoK.ohs......h.Aph...ss..s...sGhp...s..sl.s...h...stshsLssIPRuS..o..lo..........ss.sshl.G.s.sholsIs..Rt..SosF...oHsl..pYp...au...ut.pts....l.us..sh.s.T....osoaT.sl....s......hA...splPsusSGssslhlp..TasGus...lGsp.....ohshshsl..Ps..o..hcPohs.u..l.olo-ssshstpllssss..FlQhhSplplsh...ssusG.YGSTI..puYpsEl........sststssssss.u....thG...hhshsGphTlpApVsDSRG+pSsshshplsVltYhsP.th.s.a.sshRsspsss........lp..lhhpspI.u.Plslsu..pKNhh.p.loapss.hss....s.hshsputAu....ass.....pphstss..hsh..u.G.....sassspSa.lhshlpDtFs.s....s.p.h...phs.lss..tpVlh.......shs...psG...lulG...Khh-p.............G..sl-s...G..sh.h......ts..h.....p.h.pLh......................s.h..sG.hh.....ss......spP......................................................................................................................................................................s............................................................................................................................................. 0 0 1 3 +5724 PF05896 NQRA Na(+)-translocating NADH-quinone reductase subunit A (NQRA) Moxon SJ anon Pfam-B_3622 (release 8.0) Family This family consists of several bacterial Na(+)-translocating NADH-quinone reductase subunit A (NQRA) proteins. The Na(+)-translocating NADH: ubiquinone oxidoreductase (Na(+)-NQR) generates an electrochemical Na(+) potential driven by aerobic respiration [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.35 0.70 -5.20 77 764 2012-10-02 20:27:15 2003-04-07 15:35:01 6 4 731 0 166 1388 819 253.40 47 56.78 CHANGED hI.pIKKGLDLPIsGsP.p........Q.tIp.susssppVAllGp-YhGh+.PoMhVc.GDpVKtGQsLFpDKKssGVhFTAPuSGpVssIsRGt+RVlpSlVI-l-..s...p........-p.sFppastt..pl...ssLsp-pVpppLlpSGLWTAlRTRPaS+lPss-u.sPpuIFVoAhDTsPLAADspllIspppps......FpsGLslLu+LTsG+.Valspsssss.lPhs.....shssVp.hcpFsGPHPAGLsGTHIHalcPl.uts+sVWplsYQDVIAIG+LFhTGcl .................................................IpIKKGLDLPIsGtP..p.........Q..sIp...su....slppVAl....lG...--YlGM+..PoMtV+tGDpVKKGQsL.F.E...DK.K.s..PGV.h...F...TuPsSGpVs...s....I.....s......R.....G....t...+....R.....V.....L.....Q....S......V...VIcl-..G...s........-pls..F..s.c..a.s..s.p...pL....usLsp-pV+...ppLlpSGL........W....T....A....l.R.T.R..P...F....S...K...l....P...s....s....-...u...pP....p.....u...I...F.V..o.AhDTNP.L.AADPpllIppp.p..cs...........FpsGL.s.l.L...o.+...L..T.c..u...K...V..al.C..p...s.....s...s..s.......s...lPtp.........shss...Vp.....hcpF.sGPHPAGLsGTHIHFl..pPV..uts+s.....VWpI...sYQD.VIAIG+LFhTGcL................................................................................................................... 1 45 95 138 +5727 PF05899 Cupin_3 DUF861; Protein of unknown function (DUF861) Moxon SJ anon Pfam-B_2000 (release 8.0) Family This family consists of several proteins which seem to be specific to plants and bacteria. The function of this family is unknown. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.32 0.72 -4.59 33 1583 2012-10-10 13:59:34 2003-05-01 16:21:48 7 10 1032 14 492 2347 591 76.00 27 46.50 CHANGED tstsushptGlWpsoPGcaphphsp.pEhs....allpGclpls...........s-....sGps......hplpsGDhhhhPtGh......pusWclt-s.l+....KtY ...............................h.t.ptshhhshac....s.ss.G...p.............h....p....h....h....h....s...p....t...E..hh...........al..lpGplpl.s...........st.......sGcs......h.p.lssGDhhhhPsGh.........pssWcs.th.h+Kha....................................... 0 106 251 370 +5729 PF05901 Excalibur Excalibur calcium-binding domain Moxon SJ anon Reference 1 Domain Extracellular Ca2+-dependent nuclease YokF from Bacillus subtilis and several other surface-exposed proteins from diverse bacteria are encoded in the genomes in two paralogous forms that differ by a ~45 amino acid fragment, which comprises a novel conserved domain. Sequence analysis of this domain revealed a conserved DxDxDGxxCE motif, which is strikingly similar to the Ca2+-binding loop of the calmodulin-like EF-hand domains, suggesting an evolutionary relationship between them. Functions of many of the other proteins in which the novel domain, named Excalibur (extracellular calcium-binding region), is found, as well as a structural model of its conserved motif are consistent with the notion that the Excalibur domain binds calcium. This domain is but one more example of the diversity of structural contexts surrounding the EF-hand-like calcium-binding loop in bacteria. This loop is thus more widespread than hitherto recognised and the evolution of EF-hand-like domains is probably more complex than previously appreciated [1]. 21.00 21.00 21.10 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.86 0.72 -3.61 68 1070 2009-01-15 18:05:59 2003-05-01 16:38:01 6 44 876 0 230 825 85 37.90 39 17.10 CHANGED shpsCsphpusstAthaht..................psshss.....cL...........DtDsDGluCE .............................................atsCpphpss.s.us.lhts.................................cPuYss...........+L................DRD..p...DGlACE..... 0 63 151 202 +5730 PF05902 4_1_CTD 4.1 protein C-terminal domain (CTD) Moxon SJ anon Reference 1 Domain At the C-terminus of all known 4.1 proteins is a sequence domain unique to these proteins, known as the C-terminal domain (CTD). Mammalian CTDs are associated with a growing number of protein-protein interactions, although such activities have yet to be associated with invertebrate CTDs. Mammalian CTDs are generally defined by sequence alignment as encoded by exons 18-21. Comparison of known vertebrate 4.1 proteins with invertebrate 4.1 proteins indicates that mammalian 4.1 exon 19 represents a vertebrate adaptation that extends the sequence of the CTD with a Ser/Thr-rich sequence. The CTD was first described as a 22/24-kDa domain by chymotryptic digestion of erythrocyte 4.1 (4.1R). CTD is thought to represent an independent folding structure which has gained function since the divergence of vertebrates from invertebrates [1]. 25.00 25.00 26.60 26.60 23.90 18.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.67 0.71 -4.03 9 519 2009-01-15 18:05:59 2003-05-01 16:43:59 8 12 80 0 154 372 0 107.90 60 12.64 CHANGED ts-Isocchslstspspshs.pshph..ssss.-sslLlospTITuEohSTTTTTHlT....K........................TVKGGlSETRIEKRIVITGDsDlDHDQALAQAI+EAKEQHPDMSVT+VVVH+ETEl .......................................................................................p.....hp-sPllpTETKTI..TYE....us........ph...D.s.s.....s.....sh-...sGV..LhoAQT.ITSE.T....s.S....TT.TTTHIT.....K........................TVKGGISETR..I..EKRIVITGDuD.ID.HD.Q....A.....LAQA.IKEAKEQHPDMSVTKVVVHpETEl..................... 0 21 33 77 +5731 PF05903 Peptidase_C97 DUF862; PPPDE putative peptidase domain Bateman A anon L. Iyer Domain The PPPDE superfamily (after Permuted Papain fold Peptidases of DsRNA viruses and Eukaryotes), consists of predicted thiol peptidases with a circularly permuted papain-like fold. The inference of the likely DUB function of the PPPDE superfamily proteins is based on the fusions of the catalytic domain to Ub-binding PUG (PUB)/UBA domains and a novel alpha-helical Ub-associated domain (the PUL domain, after PLAP, Ufd3p and Lub1p) [1]. 29.20 29.20 29.20 29.40 28.90 29.00 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.12 0.71 -4.43 41 818 2009-01-15 18:05:59 2003-05-02 09:40:48 9 22 288 2 549 790 22 135.60 31 40.10 CHANGED t.VhLpVYDLo...p....................shhLGh...........Gl..aHoul........laGp.Eahauu..........sGlh..................pspPtp.....p.G.p.pcsl......lGpTclsppthcpal...pplup..c.....apussYcLls+NCNcFosclsphLsGcpI.....PsalpcLsphshphshsptl...hshhh ...................................................h..VhL.VYDls......p............................hhhlGh................Gl..aH.oul..................laGh.EahaGu............sGlh.........................pspPtp............s.sh.phccsl.............hlGp..Tp..hstp..phpphl.....cplup...p.........apussYcLlp+NCNcFosplsp.h.L..s...G.......p.......t...I..............Ppa.lscLsphs.....hht.h........................................................ 0 198 333 456 +5732 PF05904 DUF863 Plant protein of unknown function (DUF863) Moxon SJ anon Pfam-B_7732 (release 8.0) Family This family consists of a number of hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 25.00 25.00 32.60 32.60 18.90 21.50 hmmbuild -o /dev/null HMM SEED 805 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.64 0.70 -6.27 7 79 2009-01-15 18:05:59 2003-05-02 10:12:57 6 3 15 0 56 66 0 465.10 22 72.35 CHANGED hhs..shSh...........ctuostpssl...shh.tpsspV.pspss........KhR++MlDLQLPADcYlDs-ptps.Gpp....................cphpssspp....p.ppssssss....lsl....psopGluDLsEP...............Vpsppspshu..hohDhhu+hsss.ucspsptlphs...............s............................EssptKsss+ssu....stp.ls.ssp..p.h.spu.p........P.....ps..phthhcERophshE....hppts.th.hps...l-tusssphPp.s..h.p.......s..h..hsH..uu....osuhsp+s...hssQppPhh........................phcosupsssuhssp..shspNthhpG.SssStpp.h.hsuhs.p..s.s.tt.sspt.hhp....................shp.s.ps...................tpppp.sslPWLp..t...p....................................t.hsLpss.........................ps..tssh..hclp.ssspppppIhh..h.tp..hs.....cp.s.......uhhhust.pclpp.h....hsLshN.ss-hsh........s...ptccstct...AssRshIDLN..ssoEDp.Epsshsup.th......psKh.h.IslEs..s.Es...-.p..........pcps........t.ssss.sEhh+hAAEuIVAI.....spcs.ssuSs.s-h..pp..LpWFs-hhtoptp-h.pp.-h..t...........sssphD.FEuhTLpLpETpt-EYhscPhsP..E..ph-c.os....s.s+PRRGpARRGRp+RDFQ+DILPGLsSLSRHEVoEDlQhhsGlh+usshsWp.SGhs+....hpussRGR.....hpss..sp..........sssssshsp..ss............DpulsGWGpsTRRsRRpRCP ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t....AAp.ll.h.............hp..sp.......l.hFsp.h..............p....t...h.....t.ptt.......sttshD.FEthTLpLpEhp.--hhshs.hs..t..p.cp.........p+.hpsphRRuRp.h.+DFQ+-ILPuLsSLSRpElsEDlphhtshhpupthphp.os.h.sc.ts..tsts.R.s+..................................................................................................................................... 0 6 31 43 +5734 PF05906 DUF865 Herpesvirus-7 repeat of unknown function (DUF865) Moxon SJ anon Pfam-B_9911 (release 8.0) Repeat This family consists of a series of 12 repeats of 35 amino acids in length which are found exclusively in Herpesvirus-7. The function of this family is unknown. 20.10 20.10 24.10 87.20 17.70 16.30 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.70 0.72 -4.37 2 12 2009-01-15 18:05:59 2003-05-02 11:12:20 6 1 1 0 0 12 0 35.00 95 98.36 CHANGED MGSHPFRQEpPpPHNPLTFKPVKTTGTAVsFSAGF MuSHPFRQERPQPHNPLTFKPVKTTGTAVAFSAGF 1 0 0 0 +5735 PF05907 DUF866 Eukaryotic protein of unknown function (DUF866) Moxon SJ anon Pfam-B_8299 (release 8.0) Family This family consists of a number of hypothetical eukaryotic proteins of unknown function with an average length of around 165 residues. 22.90 22.90 23.20 23.10 22.10 22.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.17 0.71 -4.74 9 382 2009-01-15 18:05:59 2003-05-02 11:19:09 8 12 290 2 254 373 4 144.50 32 84.37 CHANGED MV+huLplpAsLENlpsLpPst....psa.aahKlKCsNCGElo-Khptlohs-pVsh.uG+usu.Nhs.KCKhCuREsSIsIlsushpshsh-DS..tchpsIssF-CRGhEPl-FtPpssWtApusEoGo.Fc-IDLpEt.-Ws-YDEKspsoVuIhphcppFp...hhK ..............................hhL.lpAp.L.cslsplpP.s.........psa...ahh+lpC.ssCtEhptphhhl...sh...-..ph..th....u...u+......G.p............A..shl.K........C.KhC.t.................+-sShs..l.....h...s...........s.....p....h..............p.....s....hs..h.c......ss.....................tchpsl...ltF-CRG.lE.s-FpP............p......s............t..ahspuh.oGshF..p....sl.cL..p........-t....-WhDYDEKstppVulhclphph....h.................................................................................... 0 80 135 205 +5736 PF05908 DUF867 Protein of unknown function (DUF867) Moxon SJ anon Pfam-B_7778 (release 8.0) Family This family consists of a number of bacterial and phage proteins with no known function and is present in Bacillus species and the Lambda-like viruses. 27.10 27.10 27.60 30.10 26.30 26.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.19 0.71 -5.22 37 384 2009-01-15 18:05:59 2003-05-02 11:22:44 6 3 199 2 70 273 50 188.60 34 69.65 CHANGED Ypsap-Lppspp.tpDapIphpph.s.oplllhA.HGGuIEsGTSElscthu.....ssaShYlFEGl+tps.NpsLHlTSs+FD...EPhslphlpppchslolHGYtspp.......ppshlGGpDcthuctlscpLpptGFss....tstsscluGhpssNIsN+sp......sutGVQLElSsu.RcshFcshshppcsht.....spshhcalpul ....................atshs-Ltpppt.....DapIcsppp.s.SplhllA.HGGGIEsGTo...ElA.ctlA.................phssashYhFcGl+pp.......s...N..p.pLHlTSo+F...D....-Phhhchlc.p.p..phslSlHGhsusc.......phlhlGG.p.....Dcp.L.tptlscpLpp.t.....GasV..............tssP.pcluGhpssNlsN+sc......p.s..sGlQLELostlRcthFcshphspcshpp....s.hhh.pasp.................. 0 14 34 60 +5738 PF05910 DUF868 Plant protein of unknown function (DUF868) Moxon SJ anon Pfam-B_8013 (release 8.0) Family This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 20.60 20.60 22.40 21.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.14 0.70 -4.94 19 260 2009-01-15 18:05:59 2003-05-02 11:39:36 7 9 24 0 155 257 0 251.40 32 81.23 CHANGED tsuspssVTslYpscl..ss+.tl.lplTWo+shh.....ups.Lolslssss................................pshs+hsh.pPhhFt+++GS+....................sh.pssusp...l..-laWDLosAKF...uu...uPEPlsGFY.VAVVsssEllLLLG......Dhc+-sh++stss.......sushlu++E+la..........G.p+hasTKApF..t-sGppH-lsI-C.......sssG.....tp....s.plhlslDuKhVlQV++LpW+FRGNpTlhl.DGhsV-lhWDVHsWLF..........u..suss...p..........AVFhF+sp....tu.-p.....................phh.pttstsssp...p....................t.sFsLllhAaKh .........................................s...pshsoslYpspl........ttt.th.lsloW..s+shh......spu..Lslslp..sss.......................................p..h+hp.h..pP.hh.h..h+++.....GoK......................ph...pst.us.p......l..claWDLssA+F......uu...uPEPhss.aY.Vul.VsctEhsLlLG.........Dhpc..-uh.......++ptst......s.h...psshlu++E+lh...........G.p+hasT+ApF...t-.pGp.H.-lsI-s......................tttu............t-....s..phhlplDuch.llpV++LpWKFRGNpsl....hl..ssh.VcVhWDVHsWLF...................................s..ss.s.......p...........AlFhF+st........s.pt..................................................................p.................................................................tFsLhl.Ah+............................................................................................. 0 12 87 123 +5739 PF05911 DUF869 Plant protein of unknown function (DUF869) Moxon SJ anon Pfam-B_8094 (release 8.0) Family This family consists of a number of sequences found in Arabidopsis thaliana, Oryza sativa and Lycopersicon esculentum (Tomato). The function of this family is unknown. 40.00 40.00 45.50 41.20 39.80 37.50 hmmbuild -o /dev/null HMM SEED 769 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.73 0.70 -13.58 0.70 -6.30 12 276 2009-01-15 18:05:59 2003-05-02 11:43:02 6 6 18 0 199 290 0 328.00 19 77.52 CHANGED KEsLltQHuKVAEEAlsGWEKA-sEshuLKppL-sss.tphshE-RsuHLDuALKECh+QlRps+EEpEpplpDshtppopph-phchpLEt+lt-hppclhcssuENssLoc.Lpt+pphl.clscp+s.h-sphpsLpspL-usEKE.soL+YEl+slpKELEIRscE+shSh+SA-sAsKQHLEslKKIuKLEAECQRLRshVRK+LPGPAAlApM+.EV-th..upshs-sRppts.u..p....................................pp...hpccs..hht+hhthE-EsKhL+-uLuc+ssELQhScNhhAcpsuKLp.h-sph.................................t.hsEpt..-DthSsupSWA.suLlSELpph.KpcKt.spuphtpssu.clpLMDDFLEMEKLAs...lsstussssppsspstsscppsthst..pppt..ps.................................p........t+hltplp..plLccIctphsphppupspsppp.p...................................sh.pp.pshpp..hs..pcLpshluph.....pchht.Lpp.hphhp-...tpsshpchlp-hshshphhlsp.psLp-...........ttplp..h.h...................................................................pchEphcspptphth.........p..p.ph.....pE.EpplpplpspLp.tspcSpshsEsplcshspp.culc.+t.clc.chp.hp.plppL-sELpcE+tspp-thsKsp-lcpclp+.ppptsttph.......cp-h+h+Q-pEIuuAAEKLAECQETIhsLG+QLpoLp.s.p-thlso ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t................................................................................ 0 29 121 163 +5740 PF05912 DUF870 Caenorhabditis elegans protein of unknown function (DUF870) Moxon SJ anon Pfam-B_8400 (release 8.0) Family This family consists of a number of hypothetical proteins which seem to be specific to Caenorhabditis elegans. The function of this family is unknown. 27.10 27.10 27.20 27.50 26.60 27.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.63 0.71 -4.27 6 55 2009-01-15 18:05:59 2003-05-02 11:45:43 6 4 5 0 55 53 0 106.80 24 63.69 CHANGED pFpAtIcCslstsaWCG-LYllEcDshh....................................................................HDlLppcKFCTSEppKph+FTVsPtsDF.oscaEhsYhhNHNCTADGcshCVKPpcopcVssast.+oVcFsI-AtsNGcsppCcsP ............t..th.lpCs.........t.....hWCuplhlaE.Dh.h.h...........................cDll.t.p.p.pFCo..s.pt.p....c...p...a.ca..s.h...p...s......suD....h..o..................spYEhshhlpHNCos.sG..p.h....h..Cl.p...p...h.s.h.....t.h.................................................... 0 15 19 55 +5741 PF05913 DUF871 Bacterial protein of unknown function (DUF871) Moxon SJ anon Pfam-B_8510 (release 8.0) Family This family consists of several conserved hypothetical proteins from bacteria and archaea. The function of this family is unknown. 21.50 21.50 21.50 21.50 20.80 21.40 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.62 48 1290 2012-10-02 15:38:38 2003-05-02 11:49:29 6 2 773 2 132 806 4 344.50 34 98.71 CHANGED tLGlSlY.scs.thccscpYlchupphGFsclFTSLh.sc...sst.cphhpphpclhphApphshclhlDlsPplhppLsloapsLshhcc.hGlsGlRlDhGFospphAphops..slcIpLNhSs...........spchlcplhshpsshsplhusHNFYP+..TGLuhcaFhcpschh+chG..l.couAFlsup.ssppGP..lh-...GLPTLEpHRths.hspspcLhtsshIDclllGsshsScc-Lcpluph.pcphhpLclp...shs-h-tcll.cphHppRsDsuc.VlRSspoRh.....hht.ppsh.sp.ss.s.hp+GslsIDNptYG+YpGElQIshp-hssct+sNVVGclhc--l.LLchIcPhpcFphhtp .....................hlGlSlY.pcs....hpcsctYlchutchGFpplFTS.Lh.sp....ccp.cphhpcapcllshApphshclllDlsPslhcpLshoap......s......L.p...........a.pc.hGhhGlRlDhGho.s..pp.uthotp..slK.IpLNhSs...........spphl.cslhshps..shs...p..LhuCHNFYP+..TGLuh-aF.cpschh+chu..l.p.o.hAFlsup..s.sph.G..P...lpc...GLPTLEpHRph..p...hst..uccLht..oshl..D-VlIGsshsSccplcplup..h..pcp..hhpLclp....tths-hppphl.h.p.p.hH.........h.RsDhsc.llRSs......poRh...................hhp..ppsh.....sp..po...t.t.hp+GslsIDNptaG+YpGElQIslp-..hss.cs.+sNVVGpIhc--l.LLshlp.sh.ppFphh.p................ 0 42 78 105 +5742 PF05914 RIB43A RIB43A Moxon SJ anon Pfam-B_8571 (release 8.0) Family This family consists of several RIB43A-like eukaryotic proteins. Ciliary and flagellar microtubules contain a specialised set of protofilaments, termed ribbons, that are composed of tubulin and several associated proteins. RIB43A was first characterised in the unicellular biflagellate, Chlamydomonas reinhardtii although highly related sequences are present in several higher eukaryotes including humans. The function of this protein is unknown although the structure of RIB43A and its association with the specialised protofilament ribbons and with basal bodies is relevant to the proposed role of ribbons in forming and stabilising doublet and triplet microtubules and in organising their three-dimensional structure. Human RIB43A homologues could represent a structural requirement in centriole replication in dividing cells [1]. 25.00 25.00 32.00 25.30 24.50 20.20 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.17 0.70 -5.83 13 202 2009-09-10 21:00:25 2003-05-02 12:53:47 7 4 109 0 131 193 4 312.80 31 95.74 CHANGED Ma+lcls...D.cEstAlE+RRstEpERpsRIFNu+sRshGVDlcALcpQVpE+KhpEpsE+p+-cuYsspphp.DclsthhccEpcpcp+pLs+clppFRppaQ+hEspREFDL..DP-sl+K.shPsRluD-Dsp...hGsSShQpFpGEDLppssR++hQpEQ.RpWlcpQhpE+ppActpc+pADplh-phthphDpRAtcLuphEcpsRptlppAspcaNcA.Accpptccppc+pp-p--NhAEIpNploSDhLTENPsVA....pSshuPaRVlstpWKGMoPcQlpsIRcsQ-pQhpEKccpRppEpth-tpWpppphphuRAhhhLEcpccRhp+ph+cpLsphNppLAtEQ..cAppcaLpcplYsNpPTspYapQFNToSR .........................-.....tltt++ph-.pRppRhhss+.RhhGlDhpsLptQlt-+c.pcthEp.tcptthstp.ht.cplhthh-pctpcpp+tlp+tl.ppap.pp.h.Q.p.cppREaDL..DPtthp....................K.thPsc.us..sDsp....u.SuhQh.....F.GEDLshtpRp+hQpcQ.RtWh.pQ.pEppptctpcctt.-.thhsptphph.-ppttcltph-ppsRpthttshtpaNps.....Atcptpccppc+pp-pcsshsEItp.lp.uD.hLoE.sPp..A....tst..hu...s.pR..ll..s...paKGMo.-Qhptlpp.hQppQhp-p...tph.cp..t-pth-tpWp.ppphp.s+shh.h-cpppc.ppph.pctls...p.N..pLApcQ..ptppp.h.t.lh.p.NtsstpaatQFspssR......................... 0 54 63 92 +5743 PF05915 DUF872 Eukaryotic protein of unknown function (DUF872) Moxon SJ anon Pfam-B_8741 (release 8.0) Family This family consists of several uncharacterised eukaryotic proteins. The function of this family is unknown. 22.10 22.10 22.10 22.20 21.20 22.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.77 0.71 -4.22 13 281 2009-01-15 18:05:59 2003-05-02 12:57:19 7 3 132 0 171 286 2 107.10 29 77.02 CHANGED Y..psLss.D-D.............hcs...........pFspssss.p+ss...............h+sIhhAhhLLllGshLllhGhhlhss..t.ussspuhshhlhGhlhFlPGhYastlhahAh+Gh+GasF.plPsF ....................................................................................................................ht.........p.................................hKsIhlAhhLhllGs...h.LlllGshlhss....h.ht.....sssspuh.shhllGhLhFlPG.hYalh.lhYhA.h+Gh+GaoasplP.a.................... 0 52 86 126 +5744 PF05916 Sld5 DUF873; GINS complex protein Moxon SJ, Studholme DJ anon Pfam-B_8759 (release 8.0) Family The eukaryotic GINS complex is essential for the initiation and elongation phases of DNA replication [1-3]. It consists of four paralogous protein subunits (Sld5, Psf1, Psf2 and Psf3), all of which are included in this family. The GINS complex is conserved from yeast to humans, and has been shown in human to bind directly to DNA primase [4]. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.75 0.72 -3.76 176 1118 2010-01-07 10:55:00 2003-05-02 13:00:16 6 12 372 22 798 1076 14 113.40 15 50.89 CHANGED sclPhahut..Lhc....................pshspl..p..phhshc..hpthptp...............................tps.t..hplt...................................................................hthsph..........................p.+p.........................hlps..hh....ctRhtKlpphshph.......................................ttstthh..spLottEh ..............................................................................................................................................plPhahu....L..hp................................p...hh..pl.......phhphp...hpthpt-...............................tpp....hslp..................................................................................ahp.h.sph.h......................................................cch+p.............................................hlps...hh.ctRhpKlpphs.hph.............................................ttstthhptLs..E....................................................................... 0 230 409 648 +5745 PF05917 DUF874 Helicobacter pylori protein of unknown function (DUF874) Moxon SJ anon Pfam-B_8835 (release 8.0) Family This family consists of several hypothetical proteins specific to Helicobacter pylori. The function of this family is unknown. 21.90 21.90 22.10 22.10 21.60 21.80 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.50 0.70 -5.61 2 170 2009-01-15 18:05:59 2003-05-02 13:02:31 6 4 43 0 11 184 0 203.70 45 100.99 CHANGED M.sl+sh........s.hAshhhph.pphKphpshpNlhhSlhGh..thhctl+s.lKKp.KKSs........hhCtpsKphDDhl......tp+.N..sWap.u.GlThhsuhLhssC.AsDpsKp.EltQtpKEAENARDRANKSGIELE.......QE+QKTpp.............KsEQE+QKTEQEKQK...........tspstIclEQppQKT.ppppch.ppQKDhlpcsEQNCQENHNQFFIKKLGIKuGIAIElEAECKTPKPsKTNQTPIQPKHLPNSKQPHSQRGSKAQEhIAYLQKELE.LPYSQKAIAKQVsFY+PSSIAYLELDPRDFpsTEEWQKENLKIRSKAQAKMLEMRsLKPDPQAHLsTSQSLLhVQKIFADVsKEIcsVANTEKKsEKAGYGYSKRM ...............................................................................................................................................................................Iclt.......QEpQKT.....................................................................................................................................pt.......p..................................................................................................................................................................................................................... 1 10 11 11 +5746 PF05918 API5 Apoptosis inhibitory protein 5 (API5) Moxon SJ anon Pfam-B_8916 (release 8.0) Family This family consists of apoptosis inhibitory protein 5 (API5) sequences from several organisms. Apoptosis or programmed cell death is a physiological form of cell death that occurs in embryonic development and organ formation. It is characterised by biochemical and morphological changes such as DNA fragmentation and cell volume shrinkage. API5 is an anti apoptosis gene located in human chromosome 11, whose expression prevents the programmed cell death that occurs upon the deprivation of growth factors [1,2]. 22.20 22.20 22.30 22.40 21.90 22.10 hmmbuild -o /dev/null HMM SEED 556 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.78 0.70 -6.01 3 205 2012-10-11 20:01:00 2003-05-02 13:15:00 6 6 113 2 125 191 0 385.30 36 88.30 CHANGED pIEKLYEFsERLSESsDK.SQNV-DYEGIIKhSKTohKsKQLASQLIPRYFKFFPSLATEAFDAahDlhDDsDlGVRVQAIRGLPLFCKDTPDhlSKIlDVLVQLLNTEEPVERDAVHKALMSLlRQDTKASlTALFpHsusT.TTDEQIREKVL+FIRDKVlPLKGELLKPQcEMERHITDLIKKSLpDVTG-EF+MFMDFLsSLSIFGGKAP.ERMQELVEIIEGQADLNup...FphsDs-hl-RhIpChphAhPaFuRGAPSS+FLsYLNK+IlP..sFDpL.....PEERKLcLLKALAEMSPYTTAQ-.ARQlLPSIVpLLh.YMP........htcTs.phpFoasECLLYAhHpLu+KsPNATNSLCGYKIVTGQPSDRLGEDFSEhNKDFTERLThVEDLTKATMKKLTQGMoEHsKAMSsAKTDEEKuplKTK+.QsTTTGLRTCNNILAMTKsLatplPsFp+DhslsLSWhVssp....shlh++HtshoFh.............s.hspspohhGKRPA...NGuGNNV.uAKKuRsSN..QpQlVNKuuEGIS+sGu..SatGRGRsRGpGR+u..GGGRGRGp.sRGFW ......................................................................................t....lhp..thLsp.u.t..-....pp........p.tYp.IlpusK..u.s.sK.tKpLAuQhIs+FFppFPpLuppAlsA.hDLsE..D--...............h..t.lRhQAI+tLP.hsps......-.hs+luDlLsQ....LLt.......s.--ssEhphVppuLhslh+.DsKuoLsulFppl............p...s.-.....-.......hRE+....slpFlt..sKlhsh...phhp....pEhE..chlht..K.K.sL.p.D..VT..........u.pEF.hhM.phLtshp....ht.....shpuhpp.Llcll.p..QApLptt................hpss...Ds-...pl-Rhlp.Cht.A.hPhF..........sc.sspS...opFl.sahsc.p.llP...hstl...........................phpL..clLKhhAEhssa.......s.......s.s.-.....pp.l....lhphLhc.YMP.......................tt.p.p.t.....phpFoaVECLLashH.pLu++.hPs.ht.h....................u-phKDFp.R.....Lphhtc...hhp..shl+pL...p.uh...............pscstptl...K....scc..phphsuL+hssNI.slh+sLac.sP.ahss....ls.LSWh..tp..............................................................................................................................................................................pt........................................................................................................................................ 0 46 63 92 +5747 PF05919 Mitovir_RNA_pol Mitovirus RNA-dependent RNA polymerase Moxon SJ anon Pfam-B_9906 (release 8.0) Family This family consists of several Mitovirus RNA-dependent RNA polymerase proteins. The family also contains fragment matches in the mitochondria of Arabidopsis thaliana. 25.00 25.00 27.70 27.30 18.60 18.60 hmmbuild -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.78 0.70 -5.55 14 122 2012-10-02 12:54:00 2003-05-02 13:27:16 6 2 72 0 14 89 0 240.50 44 75.64 CHANGED hphc.psphhsh.......+uGP.......suhShhoslhchhuhhtps..hphlphl...................h...h.hlsahpph..hhpsh...................hhsphtsh................lGKLulhc-stGKsRlhAhsDhhoQhlLpPLHshlFshL+plPp.DtTFsQp.shshhppchppt......aaShDLSAATDRhPlslQpclLshlhus.phupsWtsLLlsRsY..........tssss.lpYuVGQPMGAhSSWAhhsLoHHllVphuAh+s..ths.FscYlILGDDIVItscpVAppYhplMsc.LGVcIS.sKohlS..psoaEFAKRaht.stt-lSslslp..........uIlpslpp.hthhhs.lhchht+Ghp.......phls.h.ch.t..hlhh....tt.hhhsthhh....ht.......slphtpsllshpphhphltphh............t..hhs.thh................hshlhthlphshstphpslhhcphsphhhh.sphpsh.h.........hpp.pch.ph......L.tuh.hshh.plpchhsthhpptshsl.thhpshh...hshcsl ...................................................................................................................................................................................................................................llAMhDahoQhhL+PlHstLFphL+clPQ.DRTFsQssh.ch.ssccs........haShDLoAATDRFPIslQcclLhhl.............auc..phAsuW.tsllVpcta............hp.ths.lpYsVGQPMGAaSSWshFTLSHHllVphsAhhs...thspFp-YllLGDDIVIpNcpVAKpYlplMsp.LGVslScuKohVS....c.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 4 8 14 +5748 PF05920 Homeobox_KN Coprinus_mating; Homeobox KN domain Moxon SJ anon Pfam-B_4610 (release 8.0) Family This is a homeobox transcription factor KN domain conserved from fungi to human and plants. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.08 0.72 -4.25 99 3978 2012-10-04 14:01:12 2003-05-09 13:11:53 6 46 463 5 1329 4786 10 40.00 54 12.00 CHANGED WLhpphpp..PYPoc...p.....pK..........p..pL......uppT..ulohp..........Q.........lss.....WFINAR+R ........................................WhtpHh..ca...PY.....PoEs.........................-K..............................h...tL......App.T..GLs..p.................................Q......................................lsN......W......F.INtR+R....................... 0 325 626 968 +5750 PF05922 Inhibitor_I9 Subtilisin_N; Peptidase inhibitor I9 Yeats C anon Pfam-B_52 (release 8.0) Domain This family includes the proteinase B inhibitor from Saccharomyces cerevisiae and the activation peptides from peptidases of the subtilisin family. The subtilisin propeptides are known to function as molecular chaperones, assisting in the folding of the mature peptidase [1], but have also been shown to act as 'temporary inhibitors' [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.73 0.72 -3.41 160 3390 2009-01-15 18:05:59 2003-05-19 17:25:16 11 124 1056 23 1630 3258 93 81.50 20 11.78 CHANGED pYIVhhcpt.........................................hstsp......hssttphhp...............................................tsslhpsYct.......shsGauupLspppl.cplp.ppPsVthlp.Dphhplp ..........................................................................................................YIVh.hppt........................................................t.tp.................tthtphhp....................t...............................................tttsplhps...Ypp..............shsG......au.u.p.l......s.p....p.p.....l..cp.l........p......p..p..Ps....V.t.t....Vp.-phhph........................... 0 332 967 1374 +5751 PF05923 APC_crr APC cysteine-rich region Yeats C anon Yeats C Motif This short region is found repeated in the mid region of the adenomatous polyposis proteins (APCs). In the human protein many cancer-linked SNPs are found near the first three occurrences of the motif. These repeats bind beta-catenin [1]. 19.90 19.90 19.90 19.90 18.50 19.80 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.34 0.73 -6.92 0.73 -4.15 30 697 2009-09-11 05:34:41 2003-05-20 13:04:25 7 65 69 3 367 643 0 25.90 45 5.86 CHANGED s-ssppas.sEcTPhs.FS+so..SlSSLo ..........-s.ppas.lEsTPhs.FS+so...SLSSLo.. 1 42 74 178 +5752 PF05924 SAMP SAMP Motif Yeats C anon Yeats C Motif This short region is found repeated in the mid region of the adenomatous polyposis proteins (APCs). This motif binds axin [1]. 19.60 19.60 19.70 20.20 19.50 19.40 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.28 0.73 -6.64 0.73 -4.65 11 249 2009-01-15 18:05:59 2003-05-20 13:43:35 6 48 46 2 126 209 0 20.40 55 2.10 CHANGED ---.ElLpECIsuAMP++pp+ ...s-D.-lLpECIsSAMP++pp+.. 0 7 18 54 +5753 PF05925 IpgD Enterobacterial virulence protein IpgD Moxon SJ anon Pfam-B_1987 (release 9.0) Family This family consists of several enterobacterial IpgD like virulence factor proteins. In the Gram-negative pathogen Shigella flexneri, the virulence factor IpgD is translocated directly into eukaryotic cells and acts as a potent inositol 4-phosphatase that specifically dephosphorylates phosphatidylinositol 4,5-bisphosphate [PtdIns(4,5)P(2)] into phosphatidylinositol 5-monophosphate [PtdIns(5)P] that then accumulates. Transformation of PtdIns(4,5)P(2) into PtdIns(5)P by IpgD is responsible for dramatic morphological changes of the host cell, leading to a decrease in membrane tether force associated with membrane blebbing and actin filament remodelling [1]. 20.70 20.70 21.10 20.80 20.00 20.40 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.80 0.70 -6.29 2 189 2009-01-15 18:05:59 2003-05-20 14:51:26 7 3 152 1 10 130 1 503.80 70 94.95 CHANGED MpIpsh.hHpsShpot-uaKuhpcT...pGhpllShQthtsuc...........Rp.GsphhN..YLQpQ.TS.puht.LashcRshlhhtAhhL.GtpsshlpuMhpQh-hhKl.............ucs.K+Nl.EL...IutchptQ.Gl.sKct...htsth+ph.sp.LNNpsWpsIppslsaNG+pYs.p.hPAucMKIGsKsIFsptYpGKGlCshsT+p.+HhsNhWhSpV.Vc--GK-..lFsGIRHGVlSsYt.cKss..RtVuAcNKAcElhoAALaS+.ELLspALuGcsVsLKlVSsuLLTsoslhGtEtoMlcDQh+AhppL.op.Gc.h+LhIRNpDG.LQpVKlp.cVssFNhGVNELALKhGhGh+ssDphNuEulhpLLGsshh.puh.GGWsuEhltp.PsshpsV.hLApQIK-Ihppp.p+pDsGEPYKLuQRhAhLAapIsAVPsWNCKSGKDRTGM.DuEIKREIIphHpTt.hSt.sSh.sSttp+lFpplLhNSGN.t.pE.NTGssGNKVMKpLs.psLpLSYtcRlGD.pIWp.VKGhSShl ........................................................................................................MpIpsh.hHpsShpot-uaKuhpcT.hapGMQILSGQGKAPAKAPDsRPEIIVLREPGATWGN..YLQHQKTSNHSLHsLYNLQRDLL...TVAATVL...GKQDPVLTSMANQMELAKVKADRPATKQEEAAAKALKKNLIEL...IAART.QQQsGLP.AKEAHRFAAVAFRDAQVKQLN...N..QPWQTIKNTLTH..NG..H.....H...YT..NTQLPA.AEM.K....I..GAKD..IFPS.AYpG.KGVCSWDT+NIHHANNLWMS.........TVSV.H.E.DGKDKTLFCGIRHGVLSPYH.EKDPLLRpVGAENKAKEVLTAALFSKPEL..LN...+AL...p...GEA...VSLKLVSVGLLTA...SNI.....F......G...........K.EGTMVEDQM..RAWQSL.TQP.GKM.IHL.....KIRNKDGD....LQTVKIKPDVAAFNVGVNELAL.K.....L.G...F..GLKASD+YNAEALHQLLG..N.........DLRPEARPGGWVGEWLAQ.......Y.........P.D..N.....Y...E......V.....VNTL...ARQ...IKDIW.KNN.HHKDGGEPYKLAQR.LAML......AH......EI.......D....AVPAWNCKSGKDRTGMMDSEIK.REhISLHQ..THM..LSAPG.S.LPDSGG....QKIF.QK..V..L..LNS...GNLE.IQKQNT.GGA.GNKVMKNLS..PEVLNLSYQ.KRlGDENIWQSVKGISSLI.......................................... 0 5 6 8 +5754 PF05926 Phage_GPL Phage head completion protein (GPL) Moxon SJ anon Pfam-B_1860 (release 9.0) Family This family consists of several phage head completion protein (GPL) as well as related bacterial sequences. Members of this family allow the completion of filled heads by rendering newly packaged DNA in the heads resistant to DNase. The protein is thought to bind to DNA filled capsids [1]. 19.80 19.80 21.70 20.60 19.60 19.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.51 0.71 -4.35 33 714 2009-01-15 18:05:59 2003-05-20 15:03:08 6 3 529 0 73 507 5 136.30 41 87.08 CHANGED sIsNs.GFWPDlsltchRcthRl-uolsspRLcpAllsAhupVNs-Ltsa+spppuuGassLusVPu.spls.GcuhhltpYcRAVashA+AsLhE+YpshDsTss.GpcpAcclspohs-LhRDucaAIpclhGpsR.ssVELI ..................................................IpNs.sFWPDlslp-hRpthRl.sus.Vo..s..sRLppshhuAlucVNsELhpa..+ppppst..GassLA-..V.......PA......s.........p.l.............s..G........cShplhaYppAVashA+A.LhER.YpshDsTsp..G...s++u....-plspsts-LWRDs+WAIucltstP.R.shV-LI.............................. 1 8 29 51 +5755 PF05927 Penaeidin Penaeidin Moxon SJ anon Pfam-B_2675 (release 9.0) Family This family consists of several isoforms of the penaeidin protein which is specific to shrimps. Penaeidins, a unique family of antimicrobial peptides (AMPs) with both proline and cysteine-rich domains, were initially identified in the hemolymph of the Pacific white shrimp, Litopenaeus vannamei [1]. 25.00 25.00 25.50 26.30 20.30 20.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.98 0.72 -3.44 8 55 2009-01-15 18:05:59 2003-05-20 15:26:31 6 1 11 2 0 54 0 72.10 65 97.04 CHANGED MRLVVCLVFLASFALVCQGQuYKuGYTRPlPRP.......Pa.....G.tshtsh.slC.suC+tLohSpARuCCsRLGRCC+htKG ................................MRLVVCLVFLASFALVCQGQsY+GGYTRPlPRP...........Pa.....G..t.....Phhsh.suC.suC+..sIohSpAR.uCCpRhGRCCHlhKG.. 0 0 0 0 +5756 PF05928 Zea_mays_MuDR Zea mays MURB-like protein (MuDR) Moxon SJ anon Pfam-B_3145 (release 9.0) Family This family consists of several Zea mays specific MURB-like proteins. The transposition of Mu elements underlying Mutator activity in maize requires a transcriptionally active MuDR element. Despite variation in MuDR copy number and RNA levels in Mutator lines, transposition events are consistently late in plant development, and Mu excision frequencies are similar [1]. 20.80 20.80 21.20 23.40 18.10 19.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.37 0.70 -4.90 4 19 2009-01-15 18:05:59 2003-05-20 15:35:39 6 1 2 0 1 21 0 203.00 85 94.63 CHANGED MDhPPSKVVADAVEAARAAAVAASEARCAVFVAEKEAKAAVQFAAIAVDKVEAVKASSNVDLVDFKYHVNIKNSLRYAIQEMRRQoKLLHSVQKLCSTIPEVQGGKIGKVRGHLEHVCKELDKTSIVCEEDLETKNPTWDLYDNPSVDDEHPLDDDELGDGYSTEDPELWEMVF-DFKWEEIKANVSFEEHYRVINYRFEEINDRNM .......MDhPPSKVVADAVEAARAAAVAASEARCAVFVAEKEAKAAVQFAAIAVDKVE....AVKA......SSNV...DLVDFKYHVNIKNSLRYAIQEMRRQoKLLHSVQKLCSTIPEVQGGKI.GKVRGHLEHVCKELDKTSIVCEEDLETKNPTWDLYDNPS.V.DDEHPLDDDELGDGYSTEDPELWEMsFEDFKWEEIKANVSFEEH.RVINYRFEEINDRNM.............. 0 1 1 1 +5757 PF05929 Phage_GPO Phage capsid scaffolding protein (GPO) serine peptidase Moxon SJ anon Pfam-B_1730 (release 9.0) Family This family consists of several bacteriophage capsid scaffolding proteins (GPO) and some related bacterial sequences. GPO is thought to function in both the assembly of proheads and the cleavage of GPN [1]. The family is found to function as a serine peptidase, with a conserved Asp, His and Ser catalytic triad, as in subtilisin, and as represented in MEROPS:S73. The family includes SwissProt:P25478 from Enterobacteria phage P2 which cleaves itself and then becomes the scaffold protein upon which the bacteriophage prohead is built - a mechanism quite common amongst phages [2]. 21.50 21.50 22.80 22.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.87 0.70 -5.15 5 734 2009-01-15 18:05:59 2003-05-20 16:35:53 6 3 536 0 77 566 6 255.80 42 93.46 CHANGED SKaFRIAVEGATTDGRsIpR-WI--MAAoYDPcVYGARINLEHIRuhLPDGsF+AYGDVTALKAEEI-.sG..sGKLALFAQIEPT-DLlslNKu+QKlYTSMElsPKFADTGKAYLVGLAVTDsPASLGTEhLoFu.toAKsNPLAsRKQNP-NLFTsAEEssLEFEE.....lsE..oVssuLhs+VKsLFs+K-ASD...DARFuDVpEAVEsVAEHVQs..utTEppLuEtE+Ahot.cQplsspt-cpspsFscLKsoL-+T-uhuQptRPsATGGGu..slLTDC ....................KhFRluVEGsT.sDGRpIptpalppMAcoYsP.p.V.YsApINlEH........h+..u..hh...P...s...u....F.p.paG-VsuLp.uE..-..Is-..ss...LtGK..hALaAclpPT....-c.Llph.s.+.puQKlaTShE..lpP..p.FAsTG+AYLVGLAsTDsPASLGTEh.LpFs.....tps.p........p.ss.......l...s...sp........+.....t..s.s...t..s....L...h.os..A....-s.t..l..E..h....p-................tp...p.t.s.sh.hs...+V.p..ul......h...s...+...pptuD....................DApF..s...cl..pc....A......Vp.h..VApp.p...p...h.s...th...pttLs-.c..pp.........t......cptl............p.....t......p....pthstL...pppLp...p.p-u....p.......R...pATG.usu....s.hss............................................................................................................................................................... 0 10 33 56 +5758 PF05930 Phage_AlpA AlpA; Prophage CP4-57 regulatory protein (AlpA) Moxon SJ anon Pfam-B_2048 (release 9.0) Family This family consists of several short bacterial and phage proteins which are related to the E. coli protein AlpA. AlpA suppress two phenotypes of a delta lon protease mutant, overproduction of capsular polysaccharide and sensitivity to UV light [1]. Several of the sequences in this family are thought to be DNA-binding proteins. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.25 0.72 -4.24 23 2885 2012-10-04 14:01:12 2003-05-20 16:46:44 7 8 1195 1 461 2198 442 49.80 31 65.72 CHANGED pRhlRhtEVhphsGluRosIYchhc......cs.cFPppl+LGsRuVuWhpuEl-pWl ............................hlchtplh.phs.G...l..o.cs.tl.Yc.h.lp.........cG..pFP..p.s.l.+..l...G...............R...s.....s..s.......W.h..poElctWl.................... 0 82 241 355 +5759 PF05931 AgrD Staphylococcal AgrD protein Moxon SJ anon Pfam-B_2868 (release 9.0) Family This family consists of several AgrD proteins from many Staphylococcus species. The agr locus was initially described in Staphylococcus aureus as an element controlling the production of exoproteins implicated in virulence. Its pattern of action has been shown to be complex, upregulating certain extracellular toxins and enzymes expressed post-exponentially and repressing some exponential-phase surface components. AgrD encodes the precursor of the autoinducing peptide (AIP).The AIP derived from AgrD by the action of AgrB interacts with AgrC in the membrane to activate AgrA, which upregulates transcription both from promoter P2, amplifying the response, and from P3, initiating the production of a novel effector: RNAIII. In S. aureus, delta-hemolysin is the only translation product of RNA III and is not involved in the regulatory functions of the transcript, which is therefore the primary agent for modulating the expression of other operons controlled by agr [1]. 20.50 20.50 20.70 20.80 19.60 19.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.30 0.72 -4.33 21 248 2009-01-15 18:05:59 2003-05-20 16:54:19 6 1 222 0 8 56 0 45.10 49 97.59 CHANGED hplhshlhchhstlFphIGslAuhssCsuaFDEPEVPcELTcLac .........ppLhNhFh-hlsslhcsIG.lAuhssCshlhDEsEVPcELTpLaE.... 0 2 2 8 +5760 PF05932 CesT Tir chaperone protein (CesT) family Moxon SJ, Bateman A anon Pfam-B_2921 (release 9.0) Domain This family consists of a number of bacterial sequences which are highly similar to the Tir chaperone protein in E. Coli. In many Gram-negative bacteria, a key indicator of pathogenic potential is the possession of a specialised type III secretion system, which is utilised to deliver virulence effector proteins directly into the host cell cytosol. Many of the proteins secreted from such systems require small cytosolic chaperones to maintain the secreted substrates in a secretion-competent state. CesT serves a chaperone function for the enteropathogenic Escherichia coli (EPEC) translocated intimin receptor (Tir) protein, which confers upon EPEC the ability to alter host cell morphology following intimate bacterial attachment [1].\ This family also contains several DspF and related sequences from several plant pathogenic bacteria. The "disease-specific" (dsp) region next to the hrp gene cluster of Erwinia amylovora is required for pathogenicity but not for elicitation of the hypersensitive reaction. DspF and AvrF are small (16 kDa and 14 kDa) and acidic with predicted amphipathic alpha helices in their C termini; they resemble chaperones for virulence factors secreted by type III secretion systems of animal pathogens [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.52 0.71 -4.26 52 1366 2012-10-01 22:01:34 2003-05-20 17:02:43 8 4 525 41 194 749 35 112.90 19 76.61 CHANGED pLlpphuppluh...slshscsusssLth..spthhhlhhsppsspLllts.luphssstt......hhpplLphNh.h.tphtsshlulc.pssplhLptphshpt...lsts.phpshlpthlstscphp ......................................................hlpphupplsh...slt...h..s..p..s.shhtlhl....sp.hhhh.h..h.s.t..p...sp..h...l.hlhshlst.s.sss.s..........hhtplLph.N.h.h.tppssstlu.hs.pstpllLhhphshsp...hssp.pltshlpshlpphcth............................ 1 71 103 148 +5761 PF05933 Fun_ATP-synt_8 Fungal ATP synthase protein 8 (A6L) Moxon SJ anon Pfam-B_2993 (release 9.0) Family This family consists of fungus specific ATP synthase protein 8 (EC:3.6.3.14). The family may be related to the ATP synthase protein 8 found in other eukaryotes Pfam:PF00895. 24.20 24.20 24.30 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.34 0.72 -3.93 18 148 2012-10-02 21:03:42 2003-05-21 09:35:49 8 1 141 0 36 135 596 47.90 49 86.49 CHANGED MPQLlPFYFlNplsauFlllolLlYlhSpYILPphlRLalSRhhIs+L ..MPQLlPFYFhNQlsauFlllslLlYlhSpYILPphlRLaloRhaIsKL......... 0 12 25 31 +5762 PF05934 MCLC Mid-1-related chloride channel (MCLC) Moxon SJ anon Pfam-B_2711 (release 9.0) Family This family consists of several mid-1-related chloride channels. mid-1-related chloride channel (MCLC) proteins function as a chloride channel when incorporated in the planar lipid bilayer [1]. 25.00 25.00 26.20 25.40 24.00 23.90 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.21 0.70 -12.78 0.70 -6.21 2 95 2009-01-15 18:05:59 2003-05-21 09:46:09 6 3 52 0 42 97 0 343.90 40 92.71 CHANGED hpLhLh.sLhLsssYu.asD-WIDPoDMLNYDAASGpM+p.....................oQt.hthusEhc.sPDhoCusEhpEhhpKL-sLphpl-..EpK+hEc.cSpSpslF+RYLNKILIEAG+lGLPDEshschHYDAEllhphEhL.EIQpFLNstDWpsGALDDALSshLlpFKaHs.EpWKW+FEDSFGVDsYslhMllLClLClVhLlATElWTaltWaTQL+RlLIlShlhShGWNWMYLYKlAFAp+QAElAKhpshsp.CupKhsWotSla-Wh+uuhTapsDPCpcYachLlVsPhhhVPPTKALAlTFTsFlTEPLKHIGKGhGEFlpALhpEIPhhhplPVLIhhAlhlLuFhYGAGpuV....plhR+lsGPEpE.P.slcPpcppRpc.I-..........Dh+h.sthG...............pl.R-pDV..........spl.+t.slsDh-upppPsV.......shsscPp-TGtlhupsTsc..spEspp.tKsl..StpDppsNTEus.At.p..........................Rsp-PVspsht ..............................................................................................................................................hlh...h..hh...s..t...sD-.WlDPtDMLNYDusotpM+p..............................t.......p.....s....t.......s.p.tp...h..pl..h..pht..ppp.pht.p..ps............pst..lF+RaLp+hL.-ht+hGhspt............YDsplhhphp.h.El.thlttt.t.hp..uslspuls..ph..Lh...ph+.ps....ptWpWpFEs.Fsl.-..shhhlh...lslLhlVhll.s..TplaohltW....ahQ.lpRl.hhlsFlhShhWNWhaLYKhAaAp+...pspl...s..Kh.t..hsp.Cs..cc.hsW....tslh..-ahptth.Th..ps.DsCpcYaEhLlVsPhh.VsP.oKALulThTsFlsEPL.KalGpGhuEFlcuLh+-lPh.hhp.lP.VLl...h..hslsl.lshhahss.th....................................................................................................................................................................................................................................................................................................................................................................................... 0 10 12 25 +5763 PF05935 Arylsulfotrans Arylsulfotransferase (ASST) Moxon SJ anon Pfam-B_3266 (release 9.0) Family This family consists of several bacterial Arylsulfotransferase proteins. Arylsulfotransferase (ASST) transfers a sulfate group from phenolic sulfate esters to a phenolic acceptor substrate [1]. 24.20 24.20 24.60 24.40 24.10 24.10 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.83 0.70 -5.99 33 999 2012-10-05 17:30:42 2003-05-21 09:52:45 6 8 517 6 144 1072 349 451.30 30 83.36 CHANGED hlhlNPY...ssuPLoAllthsspphsslploVpsK.tpsuhsIsasss....ppthps+s..........lPVhGLYscatNpVplphsp..sGcs...hpcshplpssslssthsst..........pspshK.sssthtspLYhlss..................................s.pshsahlDspG-lRWahs.sthhstt............hcphpsGphhhsps...........pp.hhchDhhG+..hlpp.tcLsssa.............shpH-hhphsN.......GNhLltsup.p.h......s.......psscolcDhllElD.psGpllchWDlhclL..DPhRs..h...............................ptshsshsGss.su+sWhHlNultYDtpDDSlIlSuRHQsullKIs.....cspclpWIluspc..GWscp.......h.pphlLpPlct.G..h...............pusFcasasQHsshhlss....p.....lhlFDNGssRuhppsshs..............c..YSRuVcY+IDppshTVpQlWpYGKp+.G.chYSslsSss..-ahsc.sshhlhSusssh.............t.tt.........................lsEhchps...pcsth-hphpss.t.........sYR .......................................................................................................................................................................................h.l.hsPY..s.sPLoAhl....ts...h.ts.lplplhsp...tps..ltaphs.......h..sa...........lPlhG..........Lh.ph.Nplplphtp....uph......p.th...h.sssh..........................phps.p.sssthpstL.Yhhsp..................................t.hsshshhhDppGp..h..R..aa.h.s..sth.h.s.........................hp.p...h....p.G.t..hh.h.sts..........................pc.h.h.c..aD.h.h..Gp.....hlhp......hcL..s..pta....................................................p.h..p.Hs.h...pt.s.N...............G.s..hllt.s...u..p....shhp.........s.............................hphpT.lc.....D.t.I.l.....E.V.......D..p....s....G.....p..lV.c.....W...c.hh.clL......DshRs.s.hh....................................................................pp.hu..s..h...sG...su...ss+sW..hHlNSl.......s.......Y..........D...s......p......D...............D............u..IIlSuRHQsullpIs..........csp.pl.+..WIL.u..ssp...uW..ptt........h....tth..lLpPVctpG..l........th............tps..sFcaoa.sQHsuahlspt...............h.lhsF.DNG.ssR..uh..-p.Ps..h.s........................................hp..YSR..hVp.....Y...p...I....D.p..p..p..h..TV......p...plW.....p........Y...G........K..-........c....G................c.......a..Y...S.s.hsSss...ch.ss...sshhhatushs.h...............tp...s....................hlpEhp.tp.ppshhEhph.us.................................................................................................................................................... 0 48 86 119 +5764 PF05936 DUF876 Bacterial protein of unknown function (DUF876) Moxon SJ anon Pfam-B_3279 (release 9.0) Family This family consists of a series of hypothetical bacterial sequences of unknown function. 19.60 19.60 20.00 20.10 19.20 19.50 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.11 0.70 -6.03 124 1520 2009-01-15 18:05:59 2003-05-21 09:55:25 7 6 940 0 277 1100 65 410.80 30 94.64 CHANGED HFQQQ-Rah-thhctpspshssas..WGhppLp.lDpphLshG+lslppspGlhPDG.Thashstss.sh.PssLslss........................sstspl.VaLAL.Pltpssssp.ss.spt..............ssssRa..tspttcltDtpssssp.......................tplpluphslc...Lhhpp-..spssa...sslslARlhc..ps-GslhL.Dps.FlPPhLshtu.sshLtphlpclhshlptRucsLutRh.ssssp.s..usu-lscFhh...LpslNchtPhlpHl.hptsplHPEpLYppLhplsGcLsTFo........ps+psss.hPsYpHcshtssF.ssLhptLRphLssV..lpppshslsLp.pp...phuhhhuhlpDsplh.p..supFlLuV.+Ashssc.pLpppFs...ppsKlGus-clcpLVs.pLPGlsLpsLsssPp.plPh+sGhtYFpL.-.psushWpph.tpuuuhAlalsuph..s.sLcl-laAl+ ....................................HFQQQpRah-ahhppphpshs.sa...WGhsplp.lsp-hLs.G+ltlppspGlhPDG.ThFsh...Psps..sL.Psslslps........................sptssh.laLAl.Plhpssttp.hs.spp..................pthuRa..p.ptt-l..pDhputtts........................tslpluphpl+...Lhhpp-.......sps..u.a...hslPls.Rlhch...pss...GsltL.Dcs.FIPPhlshpu..ush............L.....t.ph...lpclhshlptRupsLupRh..ss.ssp.t......usA-Vu-Fhl...LphLNRhpshlpHh.tp.hstlHPEpLYppLspLsG-LhTFo..........s+tsss.hssYpHcs.tssF.psLhstl+phLssV..L.spslslsLp...ph...p.uhhhAslp.-spLh..ppusalLAV.+AshPs-.pLpppFs...tpsKluus-clcpLVsspLPGlsLpsLsssPp.plPa+sGhpYFpL..D..pputhWpph.tpuuuhAh+luGsFs..sLphpLWAl+.............................................................................. 0 46 113 193 +5765 PF05937 EB1_binding EB-1 Binding Domain Yeats C anon Yeats C Family This region at the C-terminus of the APC proteins binds the microtubule-associating protein EB-1 [1]. At the C-terminus of the alignment is also a Pfam:PF00595 binding domain. A short motif in the middle of the region appears to be found in the APC2 proteins (e.g Swiss:O95996). 25.00 25.00 53.00 53.00 19.50 21.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.20 0.71 -3.89 2 49 2009-01-15 18:05:59 2003-05-21 10:46:52 6 13 34 8 26 49 0 167.40 79 6.09 CHANGED RSGRSPTGNoPPVIDsV.-pup.p.csuKDsps+pNsGNGsVPh....LENR.pSFIpVDu.DpKGT-.Ksh.NN....pETNEsolsERTsFSSSSSSKHSSPSGTVAARVTPFNYNPSPRKSsu-soouRPSQIPTPVsNsTKKRDSKT-oT-SSGoQSPKRHSGSYLVTSV .........................RSGRSPTGNTPPVIDSVSEKGNsshKDS...KDNQuKQN.VGNGS..sPhRThGLENRLNSFIQVDuPDQKGTEsKPGQ.sNPVPsuETsESSlsERTPFSSSSSSKHSSPSGTVAARVTPFNYNPSPRKSSADSTSARPSQIPTPVNNNTKKRDSKTDoTESSGTQSPKRHSGSYLVTSV....... 0 1 4 10 +5766 PF05938 Self-incomp_S1 Plant self-incompatibility protein S1 Moxon SJ anon Pfam-B_3292 (release 9.0) Family This family consists of a series of plant proteins which are related to the Papaver rhoeas S1 self-incompatibility protein. Self incompatibility (SI) is the single most important outbreeding device found in angiosperms and is a mechanism that regulates the acceptance or rejection of pollen. S1 is known to exhibit specific pollen-inhibitory properties [1]. 21.10 21.10 21.20 21.10 20.50 20.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.90 0.72 -3.95 45 443 2009-01-15 18:05:59 2003-05-21 10:49:09 6 15 25 0 271 427 0 103.40 25 60.77 CHANGED spVhIhNcL....s.....ssps.Lt..lHCpSp..-cDLGhchlps....sppasapF.....css..hh.tsThFhCphph....sstt.ptpFclYpsppc.t........tp.shWps+c...DGhahhppthsh................phphsWp ..........................h.tlhlhNph.....s.....ssps...lp..lHCpSp..-.c.D...LG.p...h...lt.......upph.p.a.pF.......c.s....hh..tsThF.h...Cshpa.......st...........hphpFcsapsp.c.-.ttt.............tp.ChWplpc....-G....lYhhppp.t.t.......................................................... 0 78 141 181 +5767 PF05939 Phage_min_tail Phage minor tail protein Moxon SJ anon Pfam-B_3296 (release 9.0) Family This family consists of a series of phage minor tail proteins and related sequences from several bacterial species. 21.10 21.10 21.20 22.10 20.90 20.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.41 0.72 -4.10 33 1252 2009-01-15 18:05:59 2003-05-21 10:57:39 8 7 595 0 60 582 108 107.20 47 95.89 CHANGED hETFsW..pspsss.ssssp.pV+pspFGDGYpQpsusGlNscppsasloasGscsphts.....lpsFLcRHuGs+uFhWTPPhsshslah..ssphpssshuushhsloATF-QsFp ...............hcTF+W..cl+ssM..pVsopPsVppV+FGDGYpQRtssGL..NspL..+TYs.V.T.hp....V.s+.p-.sps.......L-uFLscH.G.G.h+AFLWTP.P.a.s.h+p.l+V....sC..t..p..W..Ss....p......s.uh....hh.s..phoApFEQVV.............................. 1 5 23 41 +5768 PF05940 NnrS NnrS protein Moxon SJ anon Pfam-B_3395 (release 9.0) Family This family consists of several bacterial NnrS like proteins. NnrS is a putative heme-Cu protein (NnrS) and a member of the short-chain dehydrogenase family [1]. Expression of nnrS is dependent on the transcriptional regulator NnrR, which also regulates expression of genes required for the reduction of nitrite to nitrous oxide, including nirK and nor. NnrS is a haem- and copper-containing membrane protein. Genes encoding putative orthologues of NnrS are sometimes but not always found in bacteria encoding nitrite and/or nitric oxide reductase [2]. 23.50 23.50 31.80 26.30 23.40 23.40 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.47 0.70 -5.56 122 829 2009-09-11 14:42:48 2003-05-21 11:06:32 7 3 628 0 253 740 124 369.60 30 94.48 CHANGED t.s...lhp.hGFRPFFLhuulauslulsl..Wlhh...h.sGths.hs........hsslhWHsHEMLFGFusAllsGFLLTAlts.WTGp.slpGtsLh.......sLhsLWLsuRlsh.hhs..........hshhlshhlDhuFlshhuhhlu+.lhpu.+p.....hRNlhhlshLhlhshsNhhha.....hthh.tts.shtttshpsulhhlshllsl..lGGR............lIPhFTpshlt........sttst....shs.h.l-tsslhsshhshht.....hhhs...shl.suslhhsAuslphlRhh+.WpshtshppP.LLahLHluYhalslGhhlhu....huhh........shs.shusulHslslGulGshlLAMhoRsoLGHTGRs..L.tssts.hshuashlhhAAlhRlhsshhh...shhhhhltluushWshAFulashpYsPh..Lh....pPRh ..........................h..slhp.hGFRPFFLhuulaAslulhl..Wlhh.....h..sGth.s...h.........hsslhWHsHEMlFGFusAllsGFLLTAlts.WTGh.ss..spG.tsLs.......sLsslWLsuRlhh..hhs......................hs...hhl..ss...hl...-...shFhh..hhuhhh....up..ll.t.u.+s........hRNh..h...hlshlhhhsh.ss.hhha.....hthh....tts.h..h.....htshpsul..hh.h..s.lllsl..lGGRllPhFTpptls..........................................t..thh.................shhh...h-thsl.hshlhhhlh..................hhhh..........s.l...suslhhsA...us..h.p.hh.Rlhc.Wpsh.tshpcP..lLhhLHluYhalslGhlhhu..................huth.......s.h...s.ssulHhhslGulGshhLuhhsRsuLGH.TG.Rs...l...hss.s..hthAahhlhhAAllRshushh......shh....hhhltluuslWshAFslashpYsPhLhpsR............ 0 68 149 209 +5769 PF05941 Chordopox_A20R Chordopoxvirus A20R protein Moxon SJ anon Pfam-B_3744 (release 9.0) Family This family consists of several Chordopoxvirus A20R proteins. The A20R protein is required for DNA replication, is associated with the processive form of the viral DNA polymerase, and directly interacts with the viral proteins encoded by the D4R, D5R, and H5R open reading frames. A20R may contribute to the assembly or stability of the multiprotein DNA replication complex [1]. 25.00 25.00 68.10 67.80 17.90 16.50 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.98 0.70 -5.69 12 69 2009-01-15 18:05:59 2003-05-21 11:13:31 8 1 42 0 0 58 0 326.60 57 77.69 CHANGED MoopsDLppLKELL+L+cslclu-ppsp-+YNuLV-WAopsYW+lulp+lssscsSIscYYpss+scs.FtLcsGcYhFLshpFGssalYh+G.shhELGSG.sthpIscch+shh-tllsc.sDlcFLRFVhF+ppWllEDshSchps.PhshLchAup.Glpsssalplc.lccsshFs--DYsslcphhtsh...csFhhsulChl+-Gs.cRpllDFhphsaspVcsI-LE.lssNhalPplITtsGpplLV+DlpHLlcS+s+hsoFVsV++h+shhlLs-psstss.E.o+uEsLpRIlcph.Gs-aFlN.GpYlSKl.sshslpplos+.LGl.hsCssl- ...MTS.SuDLoNLKELLsLYKSL+FSDuAAlEKYNSLVEWuTsTYWKIGV..pKVAslETSISDYY-ElKN...KP.FsI-PGhYIFLPlYFGoVFIYSKG.NMhELGSG.NohpIPD-hRSACsKVLcusssI-FLRFVLhN.NRWIhEDAlSKYpS.PVNIFKlASEYGLNlspYLcI-.IEEDTlFs-EhYulIE+uFc.....DpF.thSIsYIKhG-h+RpVVDFFKaSFMYIESIKl-+IG......DNlFIPolITKSGKKILVKDVDHLIRSKVREcoFVpVKKKNoFoILhDhsGsGo.E.TRuEVI+RII-.oI.GRDYYVN.GKYhSKV.GsAuLKQLoNK.LsI.s-CsTV-..... 0 0 0 0 +5770 PF05942 PaREP1 Archaeal PaREP1/PaREP8 family Moxon SJ, Bateman A anon Pfam-B_2248 (release 9.0) & Pfam-B_9342 (release 9.0) Family This family consists of several archaeal PaREP1 and PaREP8 proteins the function of this family is unknown. 21.00 21.00 21.00 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.45 0.71 -4.22 116 500 2012-10-01 22:14:54 2003-05-21 11:19:01 6 1 40 3 319 475 0 112.80 25 72.44 CHANGED s+LctAhh.hc.AtchLccG.htpAutKhapAhcthlpAlAtt.....h...hptcpphtttt.................hhptlsplhpphs.pplhhhhshAhs.LHtht..hh-sp..hshschpsppcshccll .............................hl..Ahh.hp.A.chLc....cG........hhpAutKhapAhcthlpALuht...t.h....hppctch.tpp................................hhptlsplhpc.lG.ttlh.hh.sh...Ahp.LHtht..hh-sphthschppptpshccl............................................................................... 0 102 136 238 +5771 PF05943 DUF877 Protein of unknown function (DUF877) Moxon SJ anon Pfam-B_2566 (release 9.0) Family This family consists of a number of uncharacterised bacterial proteins. The function of this family is unknown. 19.40 19.40 19.70 20.00 18.60 18.60 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.21 0.70 -5.99 95 1739 2009-01-15 18:05:59 2003-05-21 11:22:27 7 7 993 0 325 1173 101 394.70 43 83.71 CHANGED l-phIup..IDptlSpQ...lstIlHpscFQpLEuuWRGLcaLVppo-...sspslKIclLsloKcELhcDhc..cus-hsQSsLa+plYpp.EaGphGGcPaGsllGDYpFspssp......DlplLppluplAAuAHAPFluuuuPphFGh-sap-L.ssh+DlsplF.-ss-..YspW+uhR-oEDuRYlGLshPRhLhRhPYus.cssPlc...sFsacEp........ss..scccYLWuNAAauhAsplscuFscaGWsspIRGspuG....GtVps..LPsH........hapss.GshphKsPTEltIoDRREtELuc.GFIPLshtKsoDhAsFauupSlp....KPpha.........psppAssNu+LuupLPYlhssoRhAHYLK.VlhR-pIG..Sapp.pp-lEp.LNpWlppYVsssss.ssp-h+A+pPL+pAcVp......VpElsGpPGaYpsshhl+PHaQh-shsssLpLVucLs ......................................pthIAc..lDpplScQlshIlHps-FQplESsWRGLchLVppT-.....hccslKlcl.L..sloKc-Lhc.Dhc..pus..-hsQ.SslaK+lYpp...EYG.p.hGGEPhusllusYtFspos.......DlpLLptlupVuAsAHuPFIuusuPphhth.cSapEL.ss.+DLsplF...-ssc.YspW+uhR-S-DoRYlGLThPRhLhRlPYu....s.cssPVc...sFsatEp.l....................ss.....sHpcYLWuNuAauhAsplscSFtcauWC.stIRGspuG......GsVc..sLPsH........hatot..GtlphKhPTElhIoDRREhELAc.GFIPLshpKsoDhAuFFu.ApSlQ.....KPt.a.............pst-AssNucLu.upLPYlFh..hsRhAHYlKsl.R-pIG..Sa+-.Rp-hEp.LNpWItpYVsspps.ssp-s+u++PLpsAc.V...Vp-.l.cG.p.PGaYpsthhlRPHFQh-GhshsLpLVucLs.............................................. 0 64 150 236 +5772 PF05944 Phage_term_smal Phage small terminase subunit Moxon SJ anon Pfam-B_2645 (release 9.0) Family This family consists of several phage small terminase subunit proteins as well as some related bacterial sequences [1]. 21.00 21.00 21.50 25.00 20.40 18.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.50 0.71 -4.49 36 711 2009-09-11 11:40:06 2003-05-21 11:27:24 7 4 530 0 70 559 9 130.40 42 55.90 CHANGED hL.hpLspDpppLKslpShpcKsphKRc.lLPpYhsalpGsL....suGpGtQD-llspshlWtlDsGDhcsALclApYAlcas.LshP.-pFpR.ohsshlA-ElsshAt....pshpsupsh-sthhpp...........sh....clssst..DhPDpl ...............LhpLttDpccLKulpSpptKAthK+E.LL..P..catsWl-GsL............pu....Gt.u.t..Q.D..-.Vlh.plMlWtlDsGDhssAL-IucaAl+as.LsMP.tpapR.sssshlsEElus.sA.....ps.t.t.su...p...s.h...D....s....s....hLhp...................sh.....-lssst..DMPDp............................. 0 8 30 51 +5774 PF05946 TcpA Toxin-coregulated pilus subunit TcpA Moxon SJ anon Pfam-B_3639 (release 9.0) Family This family consists of toxin-coregulated pilus subunit (TcpA) proteins from Vibrio cholerae and related sequences. The major virulence factors of toxigenic Vibrio cholerae are cholera toxin (CT), which is encoded by a lysogenic bacteriophage (CTXPhi), and toxin-coregulated pilus (TCP), an essential colonisation factor which is also the receptor for CTXPhi. The genes for the biosynthesis of TCP are part of a larger genetic element known as the TCP pathogenicity island [1]. 25.00 25.00 66.60 65.80 19.30 18.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.70 0.71 -4.26 14 136 2012-10-03 10:38:27 2003-05-21 12:21:49 7 3 59 7 5 90 0 125.20 64 58.82 CHANGED uuLVsLGKlSsDEA+NPFoGsshsIhuh.cNuAu....NKuFAIpVsGLTQsQC+oLlTsVGDhFsYVsVpsuuusAhutLsDFtss.usAs.sGsGllKSlussupsLsLs-I.HlppLCp....ssusFuVshG .sGLVSLGKlSADEAKNPFTGTsMsIFSFPRNuAA.....NKAFAIoVsGLTQAQCKTLVTSVGDMFPaIsVKpuAh.sAlADLuDFETosAsAA.....TGsGVIKSIAPuSsNLNLTsITHVEpLCs....GTusFoVAFG........ 0 1 3 5 +5775 PF05947 DUF879 Bacterial protein of unknown function (DUF879) Moxon SJ anon Pfam-B_3751 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 29.80 29.70 23.10 21.80 hmmbuild -o /dev/null HMM SEED 602 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.70 0.70 -6.16 134 1615 2009-01-15 18:05:59 2003-05-21 12:25:26 7 4 942 0 303 1242 93 573.10 31 98.90 CHANGED lLcYYpcELsaLRchutEFAppaPclAscLuhpt...spssDPaVERLlEGFAFLsARlppKLD--aPchTcuLLphLaPpYLpPlPShollphpPs.s...tlspuhplsRGotlpopssp.............tst....................CcFRTsp-lsLhPlplspuph....psssts...................puslclpL.....pssss..shupLs...........lcpLplaLsG.spthutpLachLhppshshhlp.sssp..............hsL.ssss...lp.sGFss--uLL..Phst.psFpGaRLLpEYFshP-+FhFhclsGL.......t..htts.........pphclhlhl.....tppsst.httplsspphpLaCoPllNLFp...ppu-PlplstppsE.Yhlhs.cpppstshElaSlcpVpuhpps..............tppp...............ah..Paauhpc...ttttst..pst.Yaph..RRcsphhsp..p.............salollcts........tpshp.......sh.ctLolclh.CTNRcLPpp.Lsh......sphshshtpu.ssstslpslpsPotPhts....hss.............phtWRLlSpLuLNaLSLh..........pusps.L+plLpLYshp.......ssstspcpl-ulhslpscssschls.....sshsRGlclpLslDpssF..sssshaLFusVLc+FauhasolNoFspLplhspppsc.hhpWssphGppsll .............................cYYpcELsaLRch.ut-FAptaP+lAthLuht.....tps.sDP.VERLlEuFAFLouRlppKlD--aPEhTcuLlphLaPpYLpPhPSh...ullphpPchp.....phspshh.lsRsotltopsst.............tst........................CpFpospDlpLhPlp.lppsph....pssstt.....................puslplph................pstss...hshu.pLs.................LscLphaLsG..-phhsspLathLtpphhshhlp.sssp..................hsL...spth.......lp.hGFss..p-uLL....P.hst...s....s....apG....YpLLpE..YFsaP-+FhFhclsGl.............................sthhtth..tspthplhlhh.......p.p.sh.hthplstsphpLaCsPlINLF....+cu...-...slplstp.psE.Y.lhs.sp.p.p.s...t.caE.laSVspVhuhpps..............sspp....................a.PF.uhp+...psthst.....pst..Yaph...R..pcpphhspthp.......................phalullcts..........tssh.........stcslSlpl.h.CTNRcL.Ppp.lt.......sshshshpss..ssstshpslptPotPhhP....hcs.............phtW+LlSpLu...hNaLoLh........................tssps.L+phLt.lasht.............psphspp..pl-....ulhplcppsls+hhs.............s.....hhsRGlphpLsl-ppsF...stsshaLFGpVLp+FhuhYsulNoFsplslhstpptc.hhpWs.+hGpp.................................................. 0 46 112 205 +5777 PF05949 DUF881 Bacterial protein of unknown function (DUF881) Moxon SJ anon Pfam-B_4053 (release 9.0) Family This family consists of a series of hypothetical bacterial proteins. One of the family members Swiss:Q45543 from Bacillus subtilis is thought to be involved in cell division and sporulation [1]. 25.00 25.00 25.50 25.40 23.20 22.20 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.64 0.71 -4.75 103 1279 2012-10-02 00:20:33 2003-05-21 12:37:39 7 1 506 2 348 917 13 150.00 28 57.10 CHANGED ppclcphphhuGhssVpGPGlplTlsDsstt.............s.s.hhlpsp-.lhpllN-LhsuGAEAIuINs.........pRlsssotIcss..Gs..slhlss.phhss....P..asIpAIG-....sssL.psuls.sushhphhpth......ulplplpppc......plpl.suhsss....phpaAcss ................p..phpphthhuGhssVpGPGlplTlsDssts.................................t.s..hl.pspD..lhpllNpLhsuGAEAlsINs.....................pR.lsssotl+ss....Gs..slhlss.psh.u.s....P...YsIpAIG.c....spsL.psuls.....s..u..s...h.lpthcph......Gls.hp.lppp.c......plsl.suhsss..thpaAp.................. 0 139 265 314 +5778 PF05950 Orthopox_A36R Orthopoxvirus A36R protein Moxon SJ anon Pfam-B_4070 (release 9.0) Family This family consists of several Orthopoxvirus A36R proteins. The A36R protein is predicted to be a type Ib membrane protein [1]. 25.00 25.00 34.90 34.90 20.30 19.50 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.06 0.71 -4.30 3 57 2009-01-15 18:05:59 2003-05-21 12:45:40 6 1 22 0 0 43 0 129.00 92 68.68 CHANGED MMLVPLITVTVVAGTILVCYILYICRKKIRTVYNDNKIIMTKLKKIKSSNSSKSSKSTDSESDWEDHCSAMEQNNDVDNISRNEILDDDSFAGSLIWDNESNVMAPSTEHIYDSVAGSTLLINNDRNEQTIYQNTTVVIN-TETVEVLNEDTKQNPNY ..MhLVPLITVTVVAGTILVCYILYICRKKIRTVYNDNKIIMTKLKKIKSS..NSSK.SSKSTDsESDWEDHCSAMEQNNDVDNISRNEILDDDSFAGSLIWDNESNVhAP..STEHIYDSVAGSTLLINNDRNEQTIYQNTTVVINE.TETlEVLNEDTKQNPSY..... 0 0 0 0 +5779 PF05951 Peptidase_M15_2 DUF882; Bacterial protein of unknown function (DUF882) Moxon SJ anon Pfam-B_4115 (release 9.0) Family This family consists of a series of hypothetical bacterial proteins of unknown function. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -4.78 10 1137 2012-10-02 01:02:30 2003-05-21 12:54:22 8 4 1078 0 236 840 393 149.00 54 64.95 CHANGED puuucsRsL+LaplHTGEKsEhsYhcsG+YspcuLp+ls+lLRDaRRNEss+MDPRLFDLlaplhppoGocsh.IpVVSGYRSPATNuhLRoRS+GVAKKS.HMlG+AMDFaIPGVsLK+LR-sulchQsGGVGYYPpSGSsFVHhDVGsVRpW .....................................................hu.oscPRhLsLpNL.H.TGEsl.c.sc.Fa.c.G.c.s.Ylp-pLs+L..NHFhRDaR...s.N...cl.+sI.DPt.L...FDpLa..c....l...Q...s...hL...G..o..+...c...P.....lplI...SGYR..S.sTNsp.LR..u.+.....S...+..GVAK..+SYHh...+G..Q..AMDF..+I...............-..G......l..s........Lup....l.Rc..A......A..L......u...h....+..AG...G..V...G..Y......Y..PR..S....N...............FVHIDT.....G.PsRpW..................................... 0 56 120 171 +5780 PF05952 ComX Bacillus competence pheromone ComX Moxon SJ anon Pfam-B_4222 (release 9.0) Family Natural genetic competence in Bacillus subtilis is controlled by quorum-sensing (QS). The ComP- ComA two-component system detects the signalling molecule ComX, and this signal is transduced by a conserved phosphotransfer mechanism. ComX is synthesised as an inactive precursor and is then cleaved and modified by ComQ before export to the extracellular environment [1]. 19.80 19.80 19.80 20.50 19.70 19.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.60 0.72 -4.46 11 61 2009-09-11 08:33:53 2003-05-21 13:51:57 7 1 39 0 11 48 0 54.90 33 97.18 CHANGED M......Q-llsYLlcNPEVLcKltsG-AoLlGlsscpspsIlcuFpch.hhotts.h.W.s................t ......M...Q-llsYLlcNP-VlcclppGcASLlGlscc...phpuIlcuFpphphhp.hp...W...p............. 1 5 7 8 +5781 PF05953 Allatostatin Allatostatin Moxon SJ anon Pfam-B_4313 (release 9.0) Repeat This family consists of allatostatins, bombystatins, helicostatins, cydiastatins and schistostatin from several insect species. Allatostatins (ASTs) of the Tyr/Phe-Xaa-Phe-Gly Leu/Ile-NH2 family are a group of insect neuropeptides that inhibit juvenile hormone biosynthesis by the corpora allata [1]. 33.50 2.10 36.40 2.50 26.90 2.00 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.30 0.74 -5.77 0.74 -2.90 21 453 2012-10-01 21:03:17 2003-05-21 14:00:26 7 15 40 0 100 478 0 11.20 60 36.41 CHANGED KRssp..YuFGLG .........KRst.......YuFGLG. 0 52 66 88 +5782 PF05954 Phage_GPD Phage late control gene D protein (GPD) Moxon SJ anon Pfam-B_4333 (release 9.0) & Pfam-B_12199 (release 10.0) Family This family includes a number of phage late control gene D proteins and related bacterial sequences. This family also includes Bacteriophage Mu P proteins and related sequences. 23.50 23.50 23.70 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.12 0.70 -5.22 283 5310 2012-10-01 22:58:23 2003-05-21 14:04:18 6 63 1555 10 1086 5029 137 285.80 18 48.10 CHANGED pc.uls.p.hphplsh...h...................sssslthtt...hlGpslslpl..................................t.....hpGhl.................sph......t..............hus...cts.........Y...plslpshhthl........shp.p.cs+h.......a...ps.polscIlpplhs...............htLpt...t......hshhs.........assQap.EoDhsFlpRLhccpGlhah..hpp.tt...............p..pLllsDss.ss.....h............stsl.hhttsssss......tpslppap.htpphtsspspspsash...cpsptt.................h...t.............................................ss..hph..hpa.....................sst....................spup...phAch+h..-thpspspphpu..tussps.........l.tsGthhplsst............ts.hsp.......pallhplpaphp..ssh .................................................................................................tthp.p.hphplph..h...........t..........pt.sh.htt.......hhsp.hplpl..................................................................p...hpGhl.....................sth......p..........hsp................stt.......hh........a......plpl....ps..h..h..hhh.............shp...p..sp+h...................................a........ps....p...o....l.....pIlpplhp.............................p....htlpt....p................h..t.hc...............a...ss.Q...at...Eo..DhsFlp.RLh.....tc......t.........Glhhh..hcp..t......................p.pLlhscss..ps....................................sttl.shh.s...sttps....................shp.php...ht..t..p.......h..t.s.s...p...lt..hps.ash.......cpspht...........................................................h...t.......t..p...................tts..hth......hpa.........................tsp.............................tpup..thu.p..h.ph......-thppts..........t..phpu...........tu.st.t..............l..hsGthhplps...................psthsp.....pahlspspaphp.p..t................................................................................................................................................... 0 223 517 791 +5783 PF05955 Herpes_gp2 Eq_herpes_Gp2; Equine herpesvirus glycoprotein gp2 Moxon SJ anon Pfam-B_4360 (release 9.0) Family This family consists of a number of glycoprotein gp2 sequences from equine herpesviruses. 25.00 25.00 318.80 318.00 20.60 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.61 0.70 -5.02 5 19 2009-01-15 18:05:59 2003-05-21 14:10:55 6 1 9 0 0 20 0 226.70 69 33.06 CHANGED KNFMEASCTVET..NSGLAIFWKIGKASVDAFNRGTTHT....RLMRNGVPVYALVSTLRlPWlNVIPLTKITCAACPTNLluGDGsDLNSCToKSTTIPCPGQQRTHIFFSAKGDRAVCITSELVSsPTITWSVGSDRLRNDGFTQTWYGIQPG.VCGILRSEVRIsRssWRhGtso+DYLCElsASD..AKTSDYKVLPNAH...STSNFALVAATTLTVTILCLLCCLYCMLTRPRASV KNFhcAoCTVET..N.GLuhFWKIGNASVDAFsRGTTHT....RlMRNGVPVYALVSTL+lPWlNVIPLTcITCAACsoNhltGstsDLsSCTsKSTTIPCPGQQRTHIFFStKGDRAVCITSELsS.PTITWSVGSsRL+NsGFoQTWYtIQPG.VCGILRSEV+IpRsoWRhGuso+DYLCElosSD..uKTSDYKVLPNAa...STSNFALVAATTLTVTILCLLCCLYCMLTRPRASV....... 0 0 0 0 +5784 PF05956 APC_basic APC basic domain Yeats C anon Yeats C Motif This region of the APC family of proteins is known as the basic domain. It contains a high proportion of positively charged amino acids and interacts with microtubules [1]. 25.00 25.00 71.10 38.20 18.60 18.30 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.47 0.70 -5.32 4 93 2009-01-15 18:05:59 2003-05-21 15:32:18 6 35 37 0 46 80 0 308.10 51 13.43 CHANGED slhRGRTVIYsPustspsps.......soP.pKpuPPspspsusssKsPS.uQpRS+SLHRsuK.SphusLS.PsRSuTPPuRhuKoPSSSSSQTSssSpPh.p.pshsTpsuG...slPG.....PusuL..VP..pSPuRshhA.........pp+KTQ+SPVRIPFMQpss+..PsPLu.....pshsEPusRGhsthpussGA.uu+LtLlRMuSshSSGSE.SDRSG...FhRQhTFIKESPu.hLRR+RsEhSSh-StusospsASPpRupsthPAVFLCSSRCpEL+su..............PcQu.s.t...pQppPtuRss.u.h......ssRRToSESPSRLPl.Russu+.........sETsKRaASLP+ISlhRRssSusSlhouSu- ...............ul.RGRThIalPuspspopS.......ToP.pKpuPPh+s...s.u.....KsPS.uQptopS..RsuKsS.p.u-LS.ssRpso....ut.sKu.PSp..SuSpsSTPS+Ph.p.hspshQos...G.sph.PG.....Pssp.l.op...lP.ssS.PupAspp......ppsps+ho.pSPsR....Qs.sK...ssLu..so...shspsEsuS+G.hsthpsusGu.sp+lpLsRMoSs+SSGSE.SDRSt...hlRQ.TFIKEuPo.hLRR+hpE.uS..hE...S.husoops.sSPpRupsthPsl...Ssphs-hphu..............Pp.sss.....tptpPttRts.hu...........RopSESPSRLPl.Russh+..........EpsK+.uSLP+lSshRRssSuoSlhoAsu-............................................................... 0 2 7 18 +5785 PF05957 DUF883 Bacterial protein of unknown function (DUF883) Moxon SJ anon Pfam-B_4421 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 40.00 40.00 40.00 40.00 39.80 39.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.97 0.72 -3.59 59 2283 2012-10-02 00:15:32 2003-05-21 16:01:49 8 2 1022 0 347 911 46 93.30 38 87.31 CHANGED cplts-l.cpLhsshEcLLcpsustuspphppLRp+hpptLcps+splssstcs....stc+u+puscss-cY.......V+-pPWpulGluAu.lGhllGlLlu..RR ...........................t.clps-l.ppLs-oLE-VLcS.s....Gsp...ucpc....hpcl....Ru.+A....c....p.sL....ccs+tRls....psscs.......ltppu....+pAsspADcY...........V....+EcPWpu....lGlu.A.A.VGlllGlLLuRR...... 0 46 135 243 +5786 PF05958 tRNA_U5-meth_tr tRNA (Uracil-5-)-methyltransferase Moxon SJ anon Pfam-B_4661 (release 9.0) Family This family consists of (Uracil-5-)-methyltransferases EC:2.1.1.35 from bacteria, archaea and eukaryotes. A 5-methyluridine (m(5)U) residue at position 54 is a conserved feature of bacterial and eukaryotic tRNAs. The methylation of U54 is catalysed by the tRNA(m5U54)methyltransferase, which in Saccharomyces cerevisiae is encoded by the nonessential TRM2 gene. It is thought that tRNA modification enzymes might have a role in tRNA maturation not necessarily linked to their known catalytic activity [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.10 0.70 -5.76 9 7304 2012-10-10 17:06:42 2003-05-21 16:10:13 6 37 4224 7 1556 6444 1014 275.40 29 64.81 CHANGED sYstQLp-Khs+LpshhAPFpuP-.EVFcSPspHYRMRAEFRlWHE..s--haYhMF-QtsK....p+lRl-pFPhASphINcLMstLlssh+ssssL++KLFQV-FLoTLSG-hlloLLYH+pLD-pWcpsApsLps.....thslslIGRu+tpKIslcpDYVsEpLsVsGRpahYRQlEsuFTQPNusVNpKMLEWAh-sspsppG.DLLELYCGNGNFoLALAppFc+VLATEIuKsSVsAAQaNIstNslDNlpIlRhSAEEFTpAhsthRpFpRL+..GIDLcSYphsTIFVDPPRAGlDs-ThcLVptY-RILYISCNPETLppNLppLpcTH+loRhALFDQFPYTHHMEsGVLLp++ ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..............p.................s.............s.........p.........u.FhQ...s..N...s........t.........s....p.......p.....h....h....p....h....A....h....c......h.........h.............p.......h...........p...............t..........p.......c................l.....l......DhaCGh...Gs..h..u....L.....l.................A..................p.....p....s............p..................p........V..h...G..l.......El...s.p..u..ltsA..p..t..NA.......t.h.N...s...l...s....N...s....p.....a..h...t...h.t..u....p.p..h.h.......t.h.......t...............................................................t.....-.s..l...l..l.DPPR.sG..........h.s...p.......p.......h...h.....c.......h........l..........t..............p........h......p..........+......I...l.YlSCNP..s.T.L.A......R...D.................l..........p...............h.....L..............s..............p..........s..............Y............c............l........p........c...l......t.....................h........DhFPpTsHlE..s.lslLp+.h........................................................................... 0 504 946 1296 +5787 PF05959 DUF884 Nucleopolyhedrovirus protein of unknown function (DUF884) Moxon SJ anon Pfam-B_4679 (release 9.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of unknown function. 25.00 25.00 134.90 134.60 18.80 18.60 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.19 0.71 -4.98 27 58 2009-09-11 07:37:26 2003-05-21 16:12:33 6 1 54 0 0 55 0 184.70 33 92.00 CHANGED MslpLYpspsp...-s.loFphspshNSlhhahFphtss.ssssp............TRLVSGYEps+....sIshphsssss..............csuYllSChRsPplhhcLh..pp.astslshsVV+..spsp.............splWHVluV+KspEstph.p+lpulpVps...u.-ph..asKpLltlsGNlPusFlpuLsp.shsstp-lcslplhtPplpl .MslpLYpspsp...-s.IoFphssshNSlslahFphtsssssssp..............TRLVSGYEsu+....sIshphsssss..............csuYllSClRhPhlhpcLh..pp.aopPluhllV+..spsp.............splWHVluVRKspEhtss.p+lpulhssp..sG.-ph..asK-LlhlsGNlPusFlsuLp+.shsshp-lcslplhhPplpl....... 0 0 0 0 +5788 PF05960 DUF885 Bacterial protein of unknown function (DUF885) Moxon SJ anon Pfam-B_4405 (release 9.0) Family This family consists of several hypothetical bacterial proteins several of which are putative membrane proteins. 22.10 22.10 22.10 22.30 21.80 22.00 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.00 0.70 -5.35 150 1292 2009-01-15 18:05:59 2003-05-21 16:37:19 6 10 669 6 493 1292 1570 510.40 24 89.45 CHANGED sP.hs.ohhGht.tt...........tphs....D..h.Sspshppphpthpphltp..........................L.pslchs....sLsspsplshplhph..hppthtthpap...............t.hslspht..........uhhstl.shh.st.p.hp.........shp-..............................................hcsalsRLpshsphhsp.hhpphcts..htpGhh.P...chhhctslsph.........................................pshls................tshp.......ps.hhsshtph.t............thstpppsplttpsppslppplhPAhpph....hsahp..sphhsss...........pstGhhphP..........................sGcthYpht...lcthTT.o.chos--lHplGlpEVsRlpuEMctl.hpph.....Gh...p........s.....................sltchhphLps-....pah..p.........................s...t..ppllsphpthhpp............hp.stls.chF...t..plPc.sshtVchlss.......hhpsssss.uhYts.ss....Dss...pPGhaalNh.hs..........hpphspasht....sLshHEuhPGHHhQluhspEh.ts.lPthRph..s.....hh.o.AasEGWALYuEp.Lu.cEh.GhY.p.....cP......................................................hsc.................hGpLp.hchaRAsRLVVDTGlH..sc...............tWoR-p..............................................................................................................Alsahtcso.shsp..................................................................................................ssspsEl...-RYhs.h..PGQAhuYKlGtlcIhcL.RpcAc...ppL...Gcc...FD.......l+pFH-hlLppGulPLslL-ppl...ppal .......................................................................................................................................................................G.tt.ts...........phs-..h.Sstshptphthhpphltp..........................L..pths.s........tL.s.s..p.pp..lshp.lhp....t..ph..p...thp.ttpht..............................hslstht.............uhhtt..l....th..h..s.....hshp..............s.pc.........................................................................................................................hp.shhtRLps.lsthhsp..hhtp..hcts....httGhh.s......phhhpth.ltph...........................................................................................pshhs..............t..............tshhh.sshtph.s.............hststp..splptph.tpslttthhsAapph................hsalpsphhsts...........pp.uh..ph.s..........................sGcphYphh...l+thss.s.sh.ss.........c-la.p.hGhpEltRlps-hppl.tcpl........sh......t...........s...................................shtphhphLcs...-....th........................................................tt......ptlhp.hp..t.hhcc................hp.stls..phF......plPc..t.h.lc.l.s..........hh.t..sstss..uh...Yhs...ss.....Dts.....cPG...hhahss...s..............hpphs.pa..p....h...........shshHEuhPGHHhQhuhstph..ts.lstaR+h..s............hh.o.uasEGWALYu.....Et.Lh.p-h..Ghh.p......DP.............................................................................hpp........................hGhLs.hphhR..AsRlVlDhGlHhp.......................................tWoppp..............................................................................................................Ahsahtpps.shsc.....ss....spsEl.....cRYhs...h....PGQAhuYplGphplhcL...RcpAc...pph...Gsp....FD...............l+tFHstlLp.GulPlslLcptlt..................................................................................... 0 191 354 434 +5789 PF05961 Chordopox_A13L Chordopoxvirus A13L protein Moxon SJ anon Pfam-B_5061 (release 9.0) Family This family consists of A13L proteins from the Chordopoxviruses. A13L or p8 is one of the three most abundant membrane proteins of the intracellular mature Vaccinia virus [1]. 24.10 24.10 29.60 35.80 24.00 24.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.37 0.72 -4.04 8 55 2009-01-15 18:05:59 2003-05-22 10:44:54 6 1 35 0 0 40 0 68.40 66 99.31 CHANGED MIuDllLlIICVsIIGLIVYGIYNKKoosppspPus...EcY.KhEslKTuYVD+LKsuHLsSFYKLF.Sup MIGILLLIGICVAVTVAILYuhYNKIKNsQNPsPus...lNsPPPEs+N.TKFVNNLEKDHISSLYNLV.SS.u 0 0 0 0 +5790 PF05962 HutD DUF886; HutD Moxon SJ, Rainey P anon Pfam-B_5160 (release 9.0) Family HutD from Pseudomonas fluorescens SBW25 is a component of the histidine uptake and utilisation operon. HutD is operonic with the well characterised repressor protein HutC. Genetic analysis using transcriptional fusions (lacZ) and deletion mutants shows that hutD is necessary to maintain fitness in environments replete with histidine. Evidence outlined by Zhang & Rainey (2007) suggests that HutD functions as a governor that sets an upper bound on the level of hut operon transcription [1]. The mechanistic basis is unknown, but in silico molecular docking studies based on the crystal structure of PA5104 (HutD from Pseudomonas aeruginosa) show that urocanate (the first breakdown product of histidine) docks with the active site of HutD. 25.00 25.00 30.70 30.60 23.20 23.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.27 0.71 -4.56 106 985 2012-10-10 13:59:34 2003-05-22 10:47:25 6 6 936 6 178 684 43 176.70 33 90.44 CHANGED lchsch.ssPWKNGGGtTcEIsh.aP.........sGu......shcsFs...WRlSlAslupsGs..FShFsGl-RhlslLpGsGhpLphs...........ut.p....pth..........htPhp.shuFuG-sslsupLhsGs.spDFNlMsRcsthpuplphh....ss..............shph........susssl......lashsG.s....hp.......ls.s...........sphtLpst-sl................hhcs......................ptsl...lpsp....ut.lhhlpl ..............................chpchsss.W+NuuGpT+EIss..a..P..............................sut......c-Ft.....WRhSlAolussGs..FShFPGh-RhlolL-G.sthhL.pss.............................sp...s....cs................LpPhp.PauFuuDtslpucLhsGt.shDFNlMoRhsh.tpuplc..hh......pt.............................................shph...........supssl..........laslsG..s.......hp..lss............................phhh.sspssh....hhcs............pppl......hpst....uthhh............................................................................... 0 33 72 125 +5791 PF05963 Cytomega_US3 Cytomegalo_US3; Cytomegalovirus US3 protein Moxon SJ anon Pfam-B_7187 (release 9.0) Family US3 of human cytomegalovirus is an endoplasmic reticulum resident transmembrane glycoprotein that binds to major histocompatibility complex class I molecules and prevents their departure. The endoplasmic reticulum retention signal of the US3 protein is contained in the luminal domain of the protein [1]. 25.80 25.80 26.10 39.90 20.10 25.70 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.37 0.71 -4.95 3 63 2009-01-15 18:05:59 2003-05-22 10:51:40 6 1 10 4 0 61 0 179.40 54 97.57 CHANGED MKLVhlLAuLAsLl.LshuDusPRPlssIsS....clTuh.sHFpVEENcC+lHMGplYFRG+lSGNFTc+HF...VsaGIVSpSYhDNLpVouEQaca.cAGsYFEWNVpGtsVsasVDpVDVpLSosWGDPKKWAoCVPQVRsDYoS.shhWYhQhuMhccoWuhhhhsVhhYsLShhhLshhhVhholp.......MRFF ............................MKPVLlLAILAVLh.LRLADSVPRPL-VVVS....EI+S..AHFRVEENQCWFHMGMLaaKGRMSGNFTc+HF...VsVGIVSQSYMDRLQVSGEQYHHDERGAYFEWNIGGaPVsHTVDMVDITLSTRWGDPKKYAACVPQVRhDYoSpoI.WYLQRuMRccsWsLlh..RTllsYhhulslLVLlsVtVSs+.......hRF............... 0 0 0 0 +5792 PF05964 FYRN F/Y-rich N-terminus Yeats C anon Pfam-B_1170 (release 8.0) Family This region is normally found in the trithorax/ALL1 family proteins. It is similar to SMART:SM00541. 21.00 21.00 21.00 21.50 20.80 20.90 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.57 0.72 -4.51 45 555 2009-01-15 18:05:59 2003-05-22 10:52:30 9 76 143 1 361 515 8 52.30 35 2.49 CHANGED huoLtlhsLGpllssp..sFHspphIaPlGYpusRlYhS....hpcspc+shYpCcIh- ...................GulhlpslGpllspp......t..acspptlaPlGYp..soRlYaS....hpcsp.++ChYpCpIh.t................ 0 123 177 264 +5793 PF05965 FYRC F/Y rich C-terminus Yeats C anon Pfam-B_1170 (release 8.0) Family This region is normally found in the trithorax/ALL1 family proteins. It is similar to SMART:SM00542. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.72 0.72 -4.17 48 561 2009-01-15 18:05:59 2003-05-22 10:57:39 9 65 144 1 363 525 9 85.00 28 4.25 CHANGED sspPhFcls.s--......s...hshpusSssssWppllcplpp..h+pppt...........tslsG.-hFGLspsslhpllEsLPsscpCppY...phchtc .................................................t..tPhFcIp..s--.........s....hshpusSspssWp.pllctlpp.....hRppsp......................hth...tt...lsGtchFGlspsuVhpllEpLPGscpCppY.....pFch......................... 0 122 175 266 +5794 PF05966 Chordopox_A33R Chordopoxvirus A33R protein Moxon SJ anon Pfam-B_4799 (release 9.0) Family This family consists of several Chordopoxvirus A33R proteins. A33R plays a role in promoting Ab-resistant cell-to-cell spread of virus [1] and interacts with A36R to incorporate the protein into the outer membrane of intracellular enveloped virions (IEV) [2]. 22.50 22.50 22.50 22.80 22.40 22.40 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.34 0.71 -5.52 6 66 2012-10-02 16:37:33 2003-05-22 11:01:42 7 1 42 2 1 66 0 170.40 47 96.24 CHANGED hsspps.Dh.c.s-ptsuFhGSTIYGsKL+.KKphtKKs+sluIsLRIullsSllSLhsIsshLAlQhspCcssp-t.psspulsshs.hosptphstSth.....cpCpGIha-GtCYphssEspo.Fs-AspsCtscuusLPu.ssLhpc.........WlhDYL-GTWG--Gtsl........sKpps.-lpssDlSsEhRsYFCV+Shs ................................................................hhss..........-sc-ppTuhhuuTlYGcKlp.u.Kp...K++RsIulCIRI...ShVISLLSh.ITh.oA.hL.hl.c.L...NpChSss.Eushoctulsssu...u..o.spc.csuuSsop.......................cSC...s.G...LaY.pG.u.CYIhHS-hph.FsDApAsCsscuSoL..Ps..oDllsT.........WLhDYlE-TWGsDGNsI........sKoos...-h...p-uDlSpEsRKYFCVKoh...................... 0 1 1 1 +5796 PF05968 Bacillus_PapR Bacillus PapR protein Moxon SJ anon Pfam-B_4892 (release 9.0) Family This family consists of the Bacillus species specific PapR protein. The papR gene belongs to the PlcR regulon and is located 70 bp downstream from plcR. It encodes a 48-amino-acid peptide. Disruption of the papR gene abolishes expression of the PlcR regulon, resulting in a large decrease in haemolysis and virulence in insect larvae. A processed form of PapR activates the PlcR regulon by allowing PlcR to bind to its DNA target. This activating mechanism is strain specific [1]. 21.10 21.10 21.20 22.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.20 0.72 -3.90 3 118 2009-09-10 20:50:31 2003-05-22 11:30:16 6 2 101 0 3 32 1 47.80 83 93.33 CHANGED MKKLLIGSLLTLAMsWGISLGDTALEKsQlISHssQEVQLASDlPFEF ....MKKLLIGSLLTLAMAWGISLGDTALEKSQlISHNDQEVQLAuDlPFEY. 0 0 1 1 +5797 PF05969 PSII_Ycf12 DUF888; Photosystem II complex subunit Ycf12 Moxon SJ anon Pfam-B_4945 (release 9.0) Family Ycf12 has been identified as a core subunit in the photosystem II (PSII) complex [1-2]. PsbZ has been shown to be required for the association of PsbK and Ycf12 with PSII [2]. 19.60 19.60 19.70 19.70 19.40 19.40 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.97 0.72 -7.29 0.72 -4.32 40 232 2009-01-15 18:05:59 2003-05-22 11:34:02 6 2 219 13 31 179 2 32.40 54 71.40 CHANGED M..NhElIsQLssLsLIllAGPlVIsLLuhRpGN.L ......M..NlElluQLssLuLIlluGPlVIsLLAhR+GNL...... 1 11 26 31 +5798 PF05970 PIF1 DUF889; PIF1-like helicase Moxon SJ anon Pfam-B_4988 (release 9.0) Family This family includes homologues of the PIF1 helicase, which inhibits telomerase activity and is cell cycle regulated [1][2]. This family includes a large number of largely uncharacterised plant proteins. This family includes a P-loop motif that is involved in nucleotide binding. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.09 0.70 -5.51 13 2557 2012-10-05 12:31:09 2003-05-22 11:41:20 9 118 754 0 1533 3113 626 193.20 19 32.75 CHANGED pLssEQ+pla-pIlpulhs.spGthFFVsGaGGTGKTaLWpslhstlRS..cscIVLsVASSGlAuLLL.GGRTAHSRFtIPlshsEtSpC..sI+puoclA-LlpcsuLIlWDEAPMssRpCFEALDRoLRDIlppps....sKPFGGKsVVlGGDFRQILPVlp+GuRspIlsAslssShLWpc.s+.lLpLT+NMRLtssthstpctcclpcFupWlLslGsGclssssstt.......shIplPpDLLlppss.cslcsllsclYsc..llpshss.sahppRAILsPsN-sVsclNsallspLsG-E+pYLSsDols.ssps..c.-hlYPsEFLNSlsssulPsHhL+LKlGsPVMLLRNLs.ohGLCNGTRLhlTpL ..........................................................................................................................................................................................h..h..h........G......uGsGKo....hhhp..............h...............t...........h..........................................h..h.hh....us.o....u.....h.u..u...........h..............t..........G.........................T.h..H....p..h......h.....t......h.................s...........................t......................................................................................................................................................................t.............h............t.....h.p.....hllhDE.....hsMh.....t.....t................h..t....h......l.......p.............h...p.h..................................t...F..G..G..h.......ll..h..GDhhQl........P.............l.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 583 1068 1392 +5799 PF05971 Methyltransf_10 DUF890; Protein of unknown function (DUF890) Moxon SJ anon Pfam-B_5064 (release 9.0) Family This family consists of several conserved hypothetical proteins from both eukaryotes and prokaryotes. The function of this family is unknown. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.91 0.70 -5.27 4 1276 2012-10-10 17:06:42 2003-05-22 12:58:10 7 11 1113 3 405 2121 363 263.00 45 81.71 CHANGED tA.psuLHPRNRH+u+Y.DFstLhpssPELcpaVhlNPtGc.SlsFADPhAVKALN+ALLtpaYuVs.WDIP-GaLCPPlPGRADYIHalADLLucusss.hss..thRuLDIGsGANCIYPLlGspEYGWpFlGo-lDs.ulspAptIlpuNspLsstIclR+Q.ppptIFsGlIttsE.pYDhThCNPPFHAShs-AptGopRKhpNLtp.s..........s.s.s.LNFGGQtsELaCEGGEssFlt+MhcESptFApQVhWFooLlSKusNLssLpcpLcplGAsclphhEMAQGQK.SRFlAWoFhs .............................................................................p...+ssLHPRN+H.ps.cY..Dh.stLsps.P-.L.ppalh.......h....s......s....t..G.........c.S.l........DFusP..AVKtLN+AL.LtcaYuls................WD..IP...t...saLCP.PlP...G..........RADY..IHa.l.....A.....D......L.....L.........u........p.......s.........s......s.......s...lst......................psp.hLDI..G..sGA.NC..I....YP.L.l..........G.s..p.....c..Y........G........W..p..F.s...G.o...................-lsst........ulssAp....tI.l..p..sN...ss......L..sp.t......I..c..l..R....p........Q........p.....p....s...t.........t.................I...F..........s....G.l....l...p...p.....s.....E.......pa...........D...hThC.NPPFH..sS...t...t...t...A.....p...t......G...o...p...R......Khp..s..Lth................................................t......LNF....G.Gpp...p...E.L.W....C-..GGElsFlp+......M.I..pES..t..t........a..t...c..........p...........V..hWFToLlS..+..tp..NL...sslhctL...pc..........h......G......s..s....p.....l....hhhE..M..uQ...G.......Q.K.t.S.RhlAWoFh............................................................................................ 0 123 207 320 +5800 PF05972 APC_15aa APC 15 residue motif Yeats C anon Yeats C Motif This motif, known as the 15 aa repeat, is found in the APC protein family. They are involved in binding beta-catenin [1] along with the Pfam:PF05923 repeats. Many human cancer mutations map to the region around these motifs, and may be involved in disrupting their binding of beta-catenin. 19.40 19.40 20.10 19.50 18.90 19.00 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.92 0.73 -6.36 0.73 -3.62 9 272 2009-01-15 18:05:59 2003-05-22 13:01:02 6 30 58 2 142 273 0 16.00 53 1.96 CHANGED c.-QPhDYShKYuEcp .-.DpPhsYSlKYu--p...... 0 13 22 62 +5801 PF05973 Gp49 DUF891; Phage derived protein Gp49-like (DUF891) Moxon SJ, Bateman A anon Pfam-B_5075 (release 9.0) & Pfam-B_6067 (release 14.0) Family This family consists of hypothetical bacterial proteins of unknown function as well as phage Gp49 proteins. 23.90 23.90 23.90 24.00 23.80 23.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.05 0.72 -3.97 226 2071 2012-10-03 00:18:00 2003-05-22 13:01:19 9 4 1184 0 551 1798 191 86.10 24 77.55 CHANGED calpsL....cp.........ht...sclhtplppl.ptG........hhsshp.....pGlhElRl..chss..saRlhashctsp....hl.llLpuhsKpo...........pp-IchAcphhc ......................................hpth...spp.........ht...tplhhtlphltptG.........shspphp........pslhElRh....phss...hhR.lh.as.hchsp..........hl..llLpuh.pKpo.......p....p.h.pp-IchAcphh...................... 0 143 347 476 +5802 PF05974 DUF892 Domain of unknown function (DUF892) Moxon SJ anon Pfam-B_5115 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. 23.70 23.70 23.70 23.80 23.60 23.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.80 0.71 -4.42 111 1538 2012-10-01 21:25:29 2003-05-22 13:04:39 7 2 728 12 337 830 49 154.30 41 91.00 CHANGED slc-lalctL+DlYsAEcQhhcsLschuctAps.scL+suhcpHlpETcsQlcRL-plhcth.GtpspuhpCcuhpGlltEupp.lhcp.hpcps.lpDAsllsuuQthEHYEIAuYGoLhshAcpLGh........s.-ssplLppsLpEEpssDctLsplA.p.....shssttus ...................................p-hahchLpDsauhEKQhpphLtphA...........pths.....N....sc....L.p....tt....hcpHLpET+sQIppl-pll-pp.s.lphp......shK..suMpulhtpupp...lh.t...s..pcp..V+suh...uu.thE+aEIA.s......YsoLhshAcplG.........s.cuhtlLcphLpEEKthsphLpphh.......psspphh....................................................... 0 95 191 252 +5803 PF05975 EcsB Bacterial ABC transporter protein EcsB Moxon SJ anon Pfam-B_4764 (release 9.0) Family This family consists of several bacterial ABC transporter proteins which are homologous to the EcsB protein of Bacillus subtilis. EcsB is thought to encode a hydrophobic protein with six membrane-spanning helices in a pattern found in other hydrophobic components of ABC transporters [1]. 25.90 25.90 26.10 30.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.26 0.70 -6.00 52 1578 2009-01-15 18:05:59 2003-05-22 13:16:12 7 2 1026 0 136 965 1 282.60 25 96.02 CHANGED hppLapcRhpphhpcph+Yl+alhNsHhllhLlh.llGhhuhhYsphLpph..sssh..hh...hllsllhhhllh.hGpltThlccADplFLLshEpc.hpsYlppuhhaShlh.hhh.sllhllhhPlhh..thshshhthlhhhlhhlhhKhh.hhhphphhhhp.............phh.hhhhhhlshhhhhhhh..........hhhhhhhhhlhhhhhhhh.th.ptphhsWcthIphEpc+ttphh+hhshFT-V.plppps+..RRpaL.Dhll...+hlttpp.ppsahaLahRuFlRus-ahulhlRLshluhlhlhh........lsp...alshllshlhlYllshQLhslappactphhhpLYPlsppp+hpuhppllphllhl.sllhslhhhl.thphhhsllhlhssllh ..........ttla.pRhtthhpp..hY.phhhNshhhhhhlh.hhuhhhh.Ysphlpph......s.ph....hh.....h.h.hs.lh.hhhh...ht.ltohhctsDhlFLLshEpp.hp.ahptthhhohhh.hhh.hlh.hhlhhPlhh.....ths.h..hhhhhl.....h.hhhhhhh..hhhph..hh....................................................................................................th...hs.Wphhlt.Epp+h..hhphhshF...T...s...V.t.lp.p.p.st....+RtaL..chll...phhth.....ttha..LahRsalRss-hhslhhRLhhluhlhhhh..........l........hlshhlshlh.YllhhQhhshapt..ht.ht..hh.plaPlstt.t.p.tsh.p.hl.hhh.h.hhl.h...lhhh.hhh..t.hhhhhhhhhh.hh........................................................... 0 25 62 96 +5805 PF05977 MFS_3 DUF894; Transmembrane secretion effector Moxon SJ, Eberhardt R anon Pfam-B_4880 (release 9.0) Family This is a family of transport proteins. Members of this family include a protein responsible for the secretion of the ferric chelator, enterobactin [1], and a protein involved in antibiotic resistance [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 524 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.56 0.70 -6.39 10 4536 2012-10-03 03:33:39 2003-05-22 13:26:29 8 16 2237 0 1344 14705 3116 396.30 20 94.58 CHANGED sSuaAPLRpssFRslWlAolluNlGshhQsVAuuWLMTolSsSPlMVALVQAAuTLPlFLLulsAGAlADsaDRR+lMlsuQslhAsuSshLslLutlGhloPWhLLuhTFLhGsGuALssPAWQAoVs-lVsRcDVPAAVuLNSlGaNlsRSlGPAlGGlllAuaGsAssFAlsslSYhslIsslhpWKhcsssssLPpEsltsAl+AGLRaothSssl+sVLsRuslFGLuASAlhALLPLVARDpLuGsAhsYGlLLGuhGsGAlsGAlhss+LRcRlsu-pLlthAsluhAhssLsLALosshWlsslsLhluGAuWlsALoslssuVQhusPcWVhGRsLSlYhslhhGGhAAGSalWGsVAEshulssALlsuAsuLLluuhluhhhtLPptcs.DhsPtsc..aspPhVAhsLcPRsGPlllpIEYpIsc-ctssFLcsMtEhR+lRpRsGAcsWsLtRDLpsPppWlEpFhssoWh-aLRppcRlTpADttVpp+lpAhHsGspPPplp+hl...cRPs ...........................................................................................................h.................s....a.t.h....h...h...h...u..p...h...l.o......h....u..........h........h.......h.......t...l........A....h.....s....h........h.......l........h........p.......h.......o........t.......o.......s.......h..h.....l................u....l.....l..s...h.....h...p....h....l.........P...h.....h.....l....h.......u.....h.......h.....u.......G.s.....l.......A.....D............R....a.s.....+..+....+....l........h.....l.....h...s.....p.......h....h....t..s.....l.......s....s....h....h......L.......s........l.......h........s.................h.......h.......s.....h.......h....s.......l..........h.......h.......l....h....h....h....s....h....l....h....G....h....h...s....u....h....t......s....P...u....h...p....u...h....l...P...p..l...V..s....c......c......p..............L...s......p....A....s....u...l......s...............s...h......s...........h...............p.............h....u.........p...l.....l...G..........P....s.......l..u.........G.......l..........l...........l..............u.............h.................h..............G............s.........s.............h..................s..........a...............h..........l.......s.........u........h............s........a......h....h.......s......h.....h......s.........l..........h.....................t...........l...........t............h...............t..............h.............................................t........s..............h..............s..............p..........p................p................h.......h............t........s..................l...........h.............t.......G.............h......c.............a.....l.....h......p.....p..............h.....l........h........h......h..........h........l.....h.......s.....h....l....h.....s........h.........h..........s.......s.....s......h.....h......s........l.......h............P....h...........h...u......p.........p......h.......h........t..............h.........u........s....t..........t..............h..........G.......h........l........h...u....s....h.....u....l.....G..u..l..l..G...u...l.....h...h....s......t......h........t........p.....p.......h...t...........t.......t......h.......h........h.......h.........u.........s........h.........s.......h.......u.........l........s.....h.......h....h.........h..............u.....................l............s.................s..........s.............h..............h.........l...........s.........h...........h.......s.......l........h.....l....h..........G.......h.......s...h......h.......h......s....h......s....s.........h........p.............s.......h.....l.............Q....t.....t.....s...P..s....p......h......h....G....R......l.......u......l....h...t....h....h....h..h.....u.......u.......s........l....G...s....h...h...h....G...h....l....u....p....h....h.......u.....h...h........h....s.....h.....h.....h.....u.....u....h...s...h..l..h...s..s....h..h..h..h...................................................................................................................................................................................................hhthh..................................................................................................................................................................................................... 0 468 932 1185 +5806 PF05978 UNC-93 DUF895; UNC-93_Ce; Ion channel regulatory protein UNC-93 Moxon SJ, Pollington J anon Pfam-B_4965 (release 9.0) Family This family of proteins is a component of a multi-subunit protein complex which is involved in the coordination of muscle contraction. UNC-93 is most likely an ion channel regulatory protein [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.74 0.71 -4.73 11 630 2012-10-03 03:33:39 2003-05-22 13:29:58 11 10 174 0 479 1149 24 133.40 22 29.75 CHANGED LGhuahhlFoAasopuFItEuVIcSlp-ps.ss.IssaAGYYuhAllYhsFohusLhsPslVshlosKWuhlluuhsashF.hGFLahNpaahYhoSAllGhGuuhlWsGpGsYLoppso+cThE+Noul.WAlhpsSLlhGGlhlhhh.aphpsstuh .........................................................................................................t.t...G..hhu..hullY..ss.h....s.lus.h.h....s.P....s.l.l....ph.l..G....s.K...h....ol..hl....u....s.h.s....Y..s..h.....a............h.s.....u........h.....a.....s......p.......h............a..............h....h....h....s.u.u.sl.l..G.h...uuu...............h.lW.su....p.u....s...a..........l.......o......p...........h.....s......p..........t...........t.....p..........h........t........p..........................sh.......h.h..............................................tt.................................................................. 1 173 248 405 +5807 PF05979 DUF896 Bacterial protein of unknown function (DUF896) Moxon SJ, Bateman A anon Pfam-B_5209 (release 9.0) Domain In B. subtilis, one small SOS response operon under the control of LexA, the yneA operon, is comprised of three genes: yneA, yneB, and ynzC [1]. This family consists of several short, hypothetical bacterial proteins of unknown function. These proteins are mainly found in gram-positive firmicutes. Structures show that the N-terminus is composed of two alpha helices forming a helix-loop-helix motif. The structure of ynzC from B. subtilis forms a trimeric complex [2]. Structural modelling suggests this domain may bind nucleic acids [3]. This family is also known as UPF0291. 21.50 21.50 21.50 22.70 21.40 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.07 0.72 -4.38 39 1638 2009-01-15 18:05:59 2003-05-22 13:33:07 7 4 1273 5 166 633 4 62.90 48 80.57 CHANGED cchlcRINELA+KpK.spGLTscEptEQppLRpcYLcsFRsuh+splcslpVlDtp.GpDVTP-KlK ........chlcRINELA+KcK.s.pGLTt-EptEQppLRc-YlcsaRpsh+ppl-slK.l..l..D.c.GpDVTP-Kl+............ 1 59 108 136 +5808 PF05980 Toxin_7 toxin_7; Toxin 7 Moxon SJ anon Pfam-B_5254 (release 9.0) Family This family consists of several short spider neurotoxin proteins including many from the Funnel-web spider. 27.00 27.00 27.20 29.50 26.80 25.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.16 0.72 -3.91 30 91 2012-10-01 22:06:18 2003-05-22 13:43:09 7 2 7 4 0 96 0 33.90 49 50.57 CHANGED GCLs+N+aCsshoGP+CCSGLpCKplSIhcohCl .uCLhhsQaCNALus.+CCssapCKhls.h-uhCl. 0 0 0 0 +5809 PF05981 CreA CreA protein Moxon SJ anon Pfam-B_5258 (release 9.0) Family This family consists of several bacterial CreA proteins, the function of which is unknown. 25.00 25.00 52.10 51.90 23.20 19.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.69 0.71 -4.61 61 1188 2009-01-15 18:05:59 2003-05-22 13:50:15 7 3 1169 0 227 627 37 130.60 57 80.30 CHANGED lGpVussa.hlGsD..Ill-AhcDPcVpGVTCalSpscsGslcthh..shhEDsS-uSIACRQsG.PIphs.....cp.c....pGEpVFpcppSllFKsL+VsRhaDtcpsoLlYLsYSc+ll-GSsKpSlSsVPlhssps .................IGsVsTVF+hhGsDc+IVVEAFDDPcVcsVTCYlSRAKTGGIKGsL....GLAEDsSDAuISCpQV...G..P.Ipls.....D+lK.....pGEVVF++RoSL.lF...KoLQVVRhYDsKRNsLsYLuYSDKl......l-......GSPKNulSAVPlhshtt.................... 0 42 100 159 +5810 PF05982 DUF897 Domain of unknown function (DUF897) Finn RD anon Pfam-B_8040 (release 9.0) Family Family of bacterial proteins with unknown function 21.40 21.40 23.80 37.70 21.30 21.30 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.99 0.70 -5.64 7 322 2012-10-02 17:06:44 2003-05-22 13:58:16 7 2 288 0 114 310 521 322.00 38 95.31 CHANGED PslhFFhhGhlhuhh+S-LtlPtslspsLohhLLhsIGh+GGhtlpp...pslst.hlhshssulhLuhLlshhshhlLp+hsplcphDAhAhAuhYGSlSusTaAsAlohLpc.Ghsa-uahsAhlslMEhPAllsulhl...........................................................slhttp.s....sttshspsp.......t........tpllcEuhhssulslLlGGlhIGlloG.puh.hlp...sFh.slFpGlLslFLL.MGhpAuc+lt-Lpphu.hhllaullsPllhuhlulhlG.......hhs..GhssGshllhAVLuASASYIAuPAshRhulPcANPolYlusSLulTFPhNlhlGIPLa.hthAplhh.t ..........PhlLFFhhGhlusll+S-LclPtslhchLolYLLlAIGh+GGhpLsp...sslss.llh.hlsulhLuhllPlluahlLpthsphspsDuAulAuaYGSlSAsTFs...suluhLp...s....u..lsa-uahsshlAlMEhPAIlluLhL..............................................................hph..t.tp..s............t..t.....................h........................htpll+EshhssullLLlGGllIGhl...s...G.....p...Ghp.lp......sFhsshFpGlLslFLL-MGhsAuc+Lpc.L.p.p.s.G.h.hlsFullhPlltuhlGlhlu......hhh...shusGssllhulLsASASYIAsPAAhRhAlPEAsPolhlsuSLGlTFPaNlhlGIPLY.htl.uph...h........... 0 27 78 100 +5811 PF05983 Med7 MED7; MED7 protein Moxon SJ anon Pfam-B_5278 (release 9.0) Family This family consists of several eukaryotic proteins which are homologues of the yeast MED7 protein. Activation of gene transcription in metazoans is a multi-step process that is triggered by factors that recognise transcriptional enhancer sites in DNA. These factors work with co-activators such as MED7 to direct transcriptional initiation by the RNA polymerase II apparatus [1]. 25.00 25.00 25.10 25.00 24.90 24.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.84 0.71 -4.46 33 334 2009-01-15 18:05:59 2003-05-22 14:01:54 6 7 289 7 234 313 1 170.70 35 71.90 CHANGED tsouhaP.PPP.ahchasppp............................................-lphhhPPs.....P..ppssYpsFGp.aphc-.............tlPs.L-spGlppLYsps...........................................ts+tpEL+KLs+SLLlsFLE.......................Ll.....slluhsP....pphcp..Klc-lphlhlNhHHLLNc.YRPHQuREoLIhlhcpQlcc++pplcplcptt-clcphlp ................................................................................suh.P.PPs.ah.+.aTscN.........................................................................ph.t.hh..PP............sP...hpssYphFGspapsc-...................hl..s.LEs..p.GlcpLaPtp..................................................................................hD++pEL+KLs+SlLlNFL-.......................Ll.......................slLhpsP.........................pphcc.....Kl-Dlph.LFlNhHHLlNc.aRPHQAREoLhhhhE.Qhcp+pppscchpcph-clpchl.p.......................................... 0 79 128 191 +5812 PF05984 Cytomega_UL20A Cytomegalovirus UL20A protein Moxon SJ anon Pfam-B_5345 (release 9.0) Family This family consists of several Cytomegalovirus UL20A proteins. UL20A is thought to be a glycoprotein [1]. 25.00 25.00 156.60 156.50 24.20 21.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.32 0.72 -3.81 2 14 2009-01-15 18:05:59 2003-05-22 14:13:27 7 1 6 0 0 20 0 101.60 84 97.33 CHANGED MuRRlhlLuLLAVoLsVALAAP.QKpKRSVpsEpPusotsGsshT.ptplp.opsGphsDssE-...-YDVLI...-Go...pP..ppssEp.psptKEspp MARRLWILSLLAVTLTVALAAPSQKSKRSVTVEQPSTST..NuD.NTTPSKNVTLSQGGSTTDGsEDYS.G.-YDVLITDsDGopHQQP.QEKTDEHKpEHTKENEK 0 0 0 0 +5813 PF05985 EutC Ethanolamine ammonia-lyase light chain (EutC) Moxon SJ anon Pfam-B_5363 (release 9.0) Family This family consists of several bacterial ethanolamine ammonia-lyase light chain (EutC) EC:4.3.1.7 sequences. Ethanolamine ammonia-lyase is a bacterial enzyme that catalyses the adenosylcobalamin-dependent conversion of certain vicinal amino alcohols to oxo compounds and ammonia [1]. 23.00 23.00 23.10 29.80 22.80 22.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.61 0.70 -5.29 41 1134 2009-01-15 18:05:59 2003-05-22 14:16:49 6 4 1071 12 212 698 20 234.30 47 80.75 CHANGED ssssts.sasc.L+phTsARIuLGRuGsu....lsTpplLcFphsHApARDAVasslDh..ttltttlps.........hsh.lplpSpAsDRspYLpRPDLGRcLsscuhtpLpspsstt..............sDlsIVlADGLSutAl..pppuhslLtsLhstLts....shpluPh.llspQuRVAluD-IGEhL........sA+hlllLIGERPG..LoosDSLGhYlTat.P+sG.psDA...........cRNCISNI.+.ssGLs.spAut+lshLlppuhchphSGVsLK .....................s.hcs-shpcLRp.TsARl.shGRAGsp.....hTpslL+FhADHupA+DuVhptls.........c-hlcs......................shlpV+SpssDKshYLsRPDhGRR.LssEuhcsL+spssts..............sDVplVluDGLSosAI..psNhtplLssLhsuLct......tGlpluss.hhV+huRVtltDpIGElL........GA+lVllLlGERPG..Lusu-SLSsYh......sYs.P...+.....su.psEA............cRsCISNI.H..puGhP.....ssEAAuhlspLh+chLcpKtSGlsL.................... 0 58 119 168 +5814 PF05986 ADAM_spacer1 ADAM-TS Spacer 1 Yeats C anon Pfam-B_1693 (release 8.0) Family This family represents the Spacer-1 region from the ADAM-TS family of metalloproteinases [1]. 20.80 20.80 20.80 21.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.37 0.71 -10.35 0.71 -4.22 23 1464 2009-01-15 18:05:59 2003-05-22 14:39:08 9 158 90 0 835 1201 1 112.10 30 10.54 CHANGED cslpGsFscpp.t.tGYhcllpIPsGAspIpIpEhphSs.....saLAl+sss..GcahLNGpatlshspp.hphsGoh.hcYsp..sss....t.EpLp..uhGPlpEsLhltlLsp..tspssul+YcahlP ..........................hlpGsaspst.......hG...Y...p.llpIPsGApp.Iplpph.phus...................................saL...A.l....+sps....GcahLNG...s.as...l...s...hs..tp...h......ph.s.G.o........h..hcYpp....sss..............thEplp..uh..G..P..hp-sLhl.......lLht..............t.....p.....sss...lcYpahl............................... 0 119 178 440 +5815 PF05987 DUF898 Bacterial protein of unknown function (DUF898) Moxon SJ anon Pfam-B_5347 (release 9.0) Family This family consists of several bacterial proteins of unknown function. Some of the family members are described as putative membrane proteins. 28.00 28.00 28.80 28.00 27.90 27.70 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.10 0.70 -5.55 83 1019 2009-01-15 18:05:59 2003-05-22 14:40:54 8 9 864 0 134 586 125 248.30 35 91.26 CHANGED .pshphpFsGsu.uEaFtIWlVNhLLollTLGIYosWAKVRp+RYFYuNTplsGpsF-YpusshpllhGhL.lulslllhahlssphs........shhu..............hshhlhhhhhhPall.h...+uhRFphtpTsaRslRFsFpGshtpuhhshlhhslLs.sl....oLGlhh..........................................PhhhtphpcahhspspaGsppFshc...ssstshatshhh......shhlhlhhhhhhshhhh.hhs.hh.ht........................................hshhhhhhhshhlhhhhhhs...ahpsthtNhhhspspl........sthpF..pSslpstphhhlhlsN..........................................................hllhllTLG...................................LhhPastlR.hhR...ahscshsltu.s.slsplhusp.ppp..suAhu-t...huDsh ...........s..pphhFpGsu.tpaFhlhllshLLTIITLGIYssWuhh+hc+ahhtshplssp.htathpshtlhhshl.hhh.......................................................................h.hhh.hhhhh.hhhh....tshpaph..othpsl+h.htsthhthhh.h.hhh.hhhhh.......shhh..hh................................................................shh.......h.......h.hpt..h....G...ht.p.......htht.hhhhhhh.......hhhhh.hhhhhhhhh...........................................................................................................................hhh..hhhhhhhhh.........hh.st..phhhsthth........t.h..ph...tsphp.hthhhhhh.............................................................hh..hohG........................................lhhshh.hh.hhph.hpth.l.....sh.th.......tt................................................. 0 42 86 108 +5816 PF05988 DUF899 Bacterial protein of unknown function (DUF899) Moxon SJ anon Pfam-B_5437 (release 9.0) Family This family consists of several uncharacterised bacterial proteins of unknown function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.55 0.70 -4.90 51 520 2012-10-03 14:45:55 2003-05-22 14:44:00 7 1 316 0 226 988 197 222.60 38 92.43 CHANGED h..sp.slsoRpcWhtARppLLscEK-hTRppDplAspRRtLPh.Vcl-cs....YpF.....-sspGp.soLsDLFs.GRsQLlVYHFMF..uP...............sa.ppuCsuCShluDphsGthsHLpp+DlsLssVSRAPlscltsa+cRMGWp.hPWhSShsuD...FNhDFsVshstt.............................................................p-hPGh.SsFhRcs....GpVaHTYSo...........huRGh-slhusaphLDhTPhGRsEs.................htWhR+HD.......c...Y ......................................................slsspppWhsARctLLscEKt..hTRttDtlAA...pRRtLPh.VcV-.cs....Y.pF..........-uspG.t..soLsDL..Fp..G.RsQ.LlVY+..aMa.....uP..................sh..ctuCsuCShhs.Dph....G.tl.sHLpp+DsohssVSR............A............Plscltsa+pRMGWp..hsWhSot......s.us...FstDass.ht..t..........................................................p-hsGh..os.Fh.R............-s.....sp.VaHT..Y.S.o...........huRGh-tlhssashLDhsPhGRpEs................h.WhphpDt....................................................... 1 55 114 164 +5817 PF05989 Chordopox_A35R Chordopoxvirus A35R protein Moxon SJ anon Pfam-B_5472 (release 9.0) Family This family consists of several Chordopoxvirus sequences homologous to the Vaccinia virus A35R protein. The function of this family is unknown. 18.80 18.80 19.10 19.30 17.70 18.70 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.10 0.71 -4.66 9 54 2009-01-15 18:05:59 2003-05-22 14:47:44 7 1 39 0 0 44 1 157.80 49 96.24 CHANGED .shhsllTshGVlpIpscph.+sh.sDLGI.sl..l-hlGPYtluolplp.lssshlpppslpcCYlutNGhllpCScpspLshPlpplasuaho+sshllChDc..h.+Lhl-sphQPFYlhoShslhcu..cIlEVYNLYpcGDYphIlNPSssFLphLscpsshCLoDpsGWlIlDu.Ksc ...MDtshlITshGlLpIsD.....Tlh-DLsI.oI..hDhIGPYlIGsIchspIs.lc-IchuDMpcCYFuhpGpllspspsphhphslhpIpsAYho+ssh.l.IsCDh..chhL.lcspaQP...FalhsulslhsA..pIlEsYNLYptGDYphIlNPSsshhhpL..ppshCloDGsGWhIlDs.Ksp..................................... 1 0 0 0 +5818 PF05990 DUF900 Alpha/beta hydrolase of unknown function (DUF900) Moxon SJ, Bateman A anon Pfam-B_5475 (release 9.0) Family This family consists of several hypothetical proteins of unknown function mostly found in Rhizobium species. Members of this family have an alpha/beta hydrolase fold. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.76 0.70 -5.04 13 508 2012-10-03 11:45:05 2003-05-22 14:51:33 7 13 338 0 213 832 107 214.60 24 44.33 CHANGED spsutpshlsscltpsts+clllFVHGaNssF--AlhRhAQIsaDhs.......................hsu..ssllFoWPSpup...lhs.............YsYD+E..SssaSRsALEchL+hLAps........sscclpllAHSMGsaLshEuLRQluhcst..................shss..KlspVlLAAPDlDl.DVFspQhp...........shG+.s........chTlasSp-DRALp.lSphlsGsssRlGshsss.....pptltstGl.....slVDloclcuuDh.lNHspFussPpll....pLlusR ................................................................................ts..........................t.ccl...h..l...FlHGa.N...s....s....a...p.......-.....u........h..h+....h.A...Q.l...s...p...s.......t................................hps.....ssll....Fo..W...P...S.p..u..s.......h..hs.......................................Yh..h..D.+c......us..p..h....o..pss.......L.t....c......hL....p.t.l.....s..ps..........................................s...scclpllAHSMGshlshc...u...Lc..phshpst...................................................................th.t....+lss.l.lLs.u.P..Dl.Dh..-..s..Fp.ppht..................th.s.p.t.........pholasSpcDpALt.hSp..h..l........t....u..s....h.....R..lGt.hss............h.......tl.............hlDh...sph..ts.........t.....h.t..Hs.haht....h............................................................................................................................................ 0 53 129 169 +5819 PF05991 NYN_YacP DUF901; YacP-like NYN domain Moxon SJ, Bateman A anon Pfam-B_5522 (release 9.0) Domain This family consists of bacterial proteins related to YacP. This family is uncharacterised functionally, but it has been suggested that these proteins are nucleases due to them containing a NYN domain. NYN (for N4BP1, YacP-like Nuclease) domains were discovered by Anantharaman and Aravind [1]. Based on gene neighborhoods it was suggested that the bacterial YacP proteins interact with the Ribonuclease III and TrmH methylase in a processome complex that catalyzes the maturation of rRNA and tRNA [1]. 24.60 24.60 24.90 25.40 24.50 24.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.22 0.71 -4.50 69 1318 2012-10-03 20:43:45 2003-05-22 14:58:10 6 12 1288 0 277 838 38 157.20 34 60.56 CHANGED LlVDGYNlIsu.....WscLcclt.cts........L-sARppLl-hLssYpu.aps.hclllVFDAphltG.tpchpph....tslpVlaTc-scTADsaIE+hstpltp.t.....ppVhVsTSDtspQhhlhupGAhRhSuc-hhp-lppsppplp.p........chpp....tpp..ppphhp.......ptlspchhcphcchRc .........................LlVDGYNhIus......WspLpplt.....cts....................................L-...pARppLlctl......u.s.Ys..u..hpu..hc.......lllVFD.AhhstGh..p...p.p.h.ph.....slcVlF..T..+E..s..E...TADsaIE+hstc.hts.t.............ppltVsTSDhsEQhs.l.a.upG.AhRhSu+ELhpclppscp.plp.p..............phpp....hpp....pp.hht..............h.spc.htphcchh......................................................................... 0 115 208 250 +5820 PF05992 SbmA_BacA SbmA/BacA-like family Finn RD anon Pfam-B_8139 (release 9.0) Family The Rhizobium meliloti bacA gene encodes a function that is essential for bacterial differentiation into bacteroids within plant cells in the symbiosis between R. meliloti and alfalfa. An Escherichia coli homolog of BacA, SbmA, is implicated in the uptake of microcins and bleomycin. This family is likely to be a subfamily of the ABC transporter family. 20.00 20.00 20.40 20.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.07 0.70 -5.23 10 817 2012-10-02 13:23:42 2003-05-22 14:58:56 7 3 763 0 82 1485 1772 298.70 62 77.94 CHANGED s+cWQhWSlLGTulILFshaFsVQluVtlNsWYGPFYDLIQcALusPsulThs-FYpulhsFhtIAhluVsluVlNsFFVSHYlFRWRTAMN-YYhu+WspL.RHIEGASQRVQEDTMRFuplhEsLGlullculMTLIAFLPlLasLSspVshLPllG-lPauLVhAAllWSLhGTlhLhlVG..IKLPGLEFsNQ+VEAAYRKELVYGEDc.ssRApPtTLcELFssVR+NYFRLYFHYhYFNluRlhYLQsDs.lhshllLsPoIVAGplTLGlMsQIoNsFG+VRGSFQYLlNSWoTIlELhSIYKRLRuFEupIp ...............................................................................................PH+WQ.h.W.S.IL.G.o.u..L..I..I.Fl..T.WF.h..V.p.V.uVAl..NAW...Yu....PF...Y.DLIQ...s...A...L.......o.......o...P.......c....p.......V...o.......l....pp....F......Y..p.....p....l.u.............VFLG.I..All.AV.lI....u.V.L.....N............sF...F............VS.H.YV...FR.W....R.....T......A.....M...........N...........E...........a....Y.........M......u.......p.......W.............p..........p..........L.........R..............H..........I..........E........G.......A.........A...........Q............RVQEDTMRF...A............o............T.......L............E.......s............h............G.....V.....S.......F....I.sAlMTLIAFLPVLlTL..S..s...H..V..s..c...........L...........P...I...l...G....a....I....P..au....LVhA..AIlWSL..h...G.TGL..L.A.V...V.G..IKLP.G.LEF+NQ.R....V.E.....A.AY..R.........K..E.L........V........Y........G.E........D........D........A........s.........R........A.........s.........P.......s.....T........V.R..E.....L.....F...o....u...VR.........+.......N..Y...F...R...L.YF..H.Y.......h.......Y....F.......N....IA.......RILY.......LQ....V..D.N.....l.......FuLalLFPS.IVA...GsITLG.LMoQITNVFGQVRG...uFQYLINSW.....TTl.......VE.....LhSIYKRLRuFE+pL.c.............................................................. 1 16 35 59 +5821 PF05993 Reovirus_M2 Reovirus major virion structural protein Mu-1/Mu-1C (M2) Moxon SJ anon Pfam-B_5584 (release 9.0) Family This family consists of several Reovirus major virion structural protein Mu-1/Mu-1C (M2) sequences. This family is family is thought to play a role in host cell membrane penetration [1]. 25.00 25.00 61.20 54.00 17.70 17.50 hmmbuild -o /dev/null HMM SEED 674 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -13.03 0.70 -6.71 2 56 2009-01-15 18:05:59 2003-05-22 15:04:45 7 2 25 36 0 67 0 614.20 48 98.52 CHANGED GNspo.VpThNlTGDGNsFpPou-hoSTAsPulsLpPGhLNPsGh.WhslG..TSVso.suL...........................AlVs.p.t.hshVsEthhtsFoKA.hslph.p.hhpth+shsss.hhSshhThsssYVGsoAtpALsN..pps.VhhsThQsM.lp.oItAth+slthWp.cLcsA.ThLssslshGpsSCshpSllthhcD.LP.DsLhhpYPpEhhsshh+R.sulp....Dsp.sss.scslstVAuuh.AsoupsssL.tpSphhpQAhshhsut-.-lItu.sPlPssVFs.s.tPtsYpsthlK.--ApW...Isch.sosh.hpshs.oGssatlpL..GsTRVlshsthhs.hhsLDluGp.h..-Ts.D.ssphsuFllhQoplPhp.hTsAupIsthTVVshsth.A.s.uhss..QshhshhpLthsaE.Eplsth.P.h..YhLuTFl.sushosoNhopPslhDuLLThoPl.htEsThKGtVVstlVPAplhus.TsEplsuuLsNDAhhhh.s+hsKlAp........A.KhDDsAsss.hS..uV....QhtlSQltps.ss....P..lL.+hApRAhphFlusPShhlp.uhPVLopps.h.ALspGVtTuLRTtSLutGVpsAspKLpuppSlpshpQuFhDpluohaPu. ...........GNAoSlVQshNlpGDGNhFsPSAEToSoAVPSLSLsPGhLNPGGhsWhhIssphssosPuuLRhMTotDlsphu..shsNSoGhlPopuhh.sss.s.c.EsLsVlT-HAIspFsKhpMAhEhsR-aLDtptVsstSsphpsalsYVDCaVGVSARQAhsN.FpppVPVITpoR.s..ahs.ShQshLpsLs.WEhDlRphhTlLPTssshGcloCcM+SVVphlD-QLsDsSLCRhYPcpAAsAlA+RNGGI+...Whssspsps.u.AsNslAASshushAsssPLsEKSstsEpuMcLVsssslDIlsSpsPlsuSVauhsscPpsYNlRTL+l-EAhWLR.h.tosshshphp.hssppuTphHh.L...ptGopVlNL-QhusMhF.lsluGKsYK.cssa...DPss++lsllVhQSKIPFEtWTsASQIsuIs...VutVplaA.t-SShss...ppIIupTSLuYhFERETlsssssElNhYLLsTa..sss.osss.s..P..DsWDulhTloPLouGpVTlKGssV-pVVPuDLlGuYTPEuLsAALPNDAuhlhts+AsKlAc........AIKh-DDussDEsSPhSsPIQG.LAlpQ..L-T..s..t..G.sRhhpP...uhLp+lASRAhphFlGDPpoILpQusPVLpDsslWsuhsQGV+sulRTKSLSAGV+oshsploAspSlQsW+QGFLsKlpshF............... 0 0 0 0 +5822 PF05994 FragX_IP Cytoplasmic Fragile-X interacting family Finn RD anon Pfam-B_8072 (release 9.0) Family CYFIP1/2 (Cytoplasmic fragile X mental retardation interacting protein) like proteins for a highly conserved protein family [1]. The function of CYFIPs is unclear, but CYFIP interaction with fragile X mental retardation interacting protein (FMRP) involves the domain of FMRP which also mediating homo- and heteromerization [1]. 20.50 20.50 20.60 21.30 20.40 20.40 hmmbuild -o /dev/null HMM SEED 820 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.27 0.70 -6.76 9 299 2009-01-15 18:05:59 2003-05-22 15:10:11 6 10 123 1 179 271 13 647.30 51 67.18 CHANGED scscclhsls.puLplLSphsshVhEh.huaKlhsPss.p...sNctsstt............spcYERAsRYNYoctE+huLlpllAhIKuL...pplhtctEollsptlpcsI.spLQ-FspppLcEsl++ss+....KK-hlpsllpsl+shhsDa.suhc.tpcsthpsc+c........pst.clpl...........L.hlRo.LpSLlssc.thsc+p...hppphcupplpsIctFhpcShaastLLshspsLppssDLSpLWa+EaaL-ls.........................+RlQFPIchShPaILocalLpsp-.PuLh-hlhYPLslYNDuAthALhph+pQaLYDEIcAEsslCh-phsaclu-psFshh+phuushhhDKphhsphpp.th.h....pP..........susRapsLLpQp+lpLLGpsIDlstLluQRlNtthhcpL-tAIsphEupslpulltl-tLl-l.+lTHpLLucah.slssF-shhppAppsso..u.pu+IhlHl.....a.cLshDhlPsYsapuss.R.......h..ttt.tRccss..t.ha.husKslstuasshhpthssFlus.HF+slsRlLshpulsllhpplLc.....hlpsshtsalpshhphhspls+l..PhhshGs.ushshFptplpshlpas-l+.clhpshRplGNslhFsthl-pAL.hpcpstshhpAhshps.hsps...hpcsp-..s.....hppLcphauhLp.hs.l.................s-lhh..phpsshulhc.lLpchcsFl.s.........htss..ssulhsl-oss...pFttlWSslQFlaChPhs.ss-h..........ss.phFG-GlhauusslIsLhsQpc+F-slshChHllplpc....ssscDch.+hlslp+hl-phppaplhssphFtlhsp ......................................................................................................................................................pthh-lsLpGlQLLSpWoutlhEh..hoWKLlHPocch.....pNc.cCPsp................AEEYERATRYNYosEEKhAhlElIAMI....KGL...pslMsRhEolhspAIRpslYutlQDFsQhsLc-PLRpAh+....pKKshl................sll.slRcshsDW.t..u.hc..s....s-ssh+ucK-s..........psshplplsphss..................................................................................tssssQLYMlRThlESLlu-+.sss....K...+...s.............h.+p.pl-..u...hl.tl-pFacpSaaasaLlshS.csLppssDLSQLWaREFaLEhT........................t+RIQFPIEMShPWILTDHILc....op-.................suhhEhVLYsLDLYNDSA.aALstFp+QFLYDElEAE..V...NLCFD...Q..FVaKLu..-pIFsaYK....hAuShLLDK...+h+.s.-sps.uh.h....hP.....................ssRY-.o.LL+QRHVQ..................LLGRSIDLN+LIoQRlssshh+ul-.AIsRFEupDL.o.uIV...EL-hLL-lNRhoH+LLscah.sLDsF-uMhcEANHNVo...............u....PaGRITLHV.....FhELsaDFLPNYCYNuuTpRFV............................+s.hshs....pt.....pR-K...sp..sp......a..laGSK...........LNhAapphhs.YpsFlGsPHhpslsRLLGYpGlAllhcpLL+.h....c.llps...s...l...h...pal...csL.h.psMPK......C+L..Pph-YGSsGlLpaaptpLpslhpYs-lKo.hhpshRElGNsllFshLlEpuL.s.cEssDllpAAP.FpsllPcsh.......hKc...s..p..c.p.sp...........hp+L-spauslp..hss.lp+hus.p.............................................................phthA+-uDLLT+ERLCCGLShFEslLsRl+saLpcs.....................lWp..G.s..P.sN.GVhclD-Cs...EFHRLWSAhQFVYClPss.spEh.........................os.EphFG-GLpWAGC.lIsLLGQQ+RFchhDFsYHlL+Vp+.......................Du+Dc...lc..sl....L...c...+....hs-RlR+aplLNsplhslL................................................... 0 74 97 142 +5823 PF05995 CDO_I Cysteine dioxygenase type I Finn RD anon Pfam-B_8006 (release 9.0) Family Cysteine dioxygenase type I (EC:1.13.11.20) converts cysteine to cysteinesulphinic acid and is the rate-limiting step in sulphate production. 21.40 21.40 21.90 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.97 0.71 -5.10 8 920 2012-10-10 13:59:34 2003-05-22 15:12:25 7 10 728 11 390 865 131 155.50 22 77.31 CHANGED Mp.........pssshpssossshlspLtphhpppthsVsphttLhpshpsshs-Wt...hhhhaDtspYTRstl.tGssch-lhLlsWsPGpuoslHDHusStGshplLcGpLpEotashsct...............h....shhspphhhpsGsshhpspttlH+l...........tNsSsscsAVSLHlYhPPhsshsha-pp ......................................................................................................................................................................l...th................ph......h........s.......p.p...Y..tpphl...ps.........s..s.............c..hs.l........h..l..ls..W.sPGps..o..slHD..Hsssash.hs........lLp.G.p..l..p..............E..p.tap.hspp.....................................h......h.s..p.p.h.h...h.t.st..s..s...h.....h........s..t.....ts.l.Hcl.......................tss.s..t..sp...s.ulSlHlY...........................s........................................................... 0 107 212 322 +5824 PF05996 Fe_bilin_red Ferredoxin-dependent bilin reductase Moxon SJ anon Pfam-B_5667 (release 9.0) Family This family consists of several different but closely related proteins which include phycocyanobilin:ferredoxin oxidoreductase EC:1.3.7.5 (PcyA), 15,16-dihydrobiliverdin:ferredoxin oxidoreductase EC:1.3.7.2 (PebA) and phycoerythrobilin:ferredoxin oxidoreductase EC:1.3.7.3 (PebB). Phytobilins are linear tetrapyrrole precursors of the light-harvesting prosthetic groups of the phytochrome photoreceptors of plants and the phycobiliprotein photosynthetic antennae of cyanobacteria, red algae, and cryptomonads. It is known that that phytobilins are synthesised from heme via the intermediary of biliverdin IX alpha (BV), which is reduced subsequently by ferredoxin-dependent bilin reductases with different double-bond specificities [1]. 25.70 25.70 26.20 26.30 25.30 25.60 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.30 0.70 -4.88 62 239 2009-09-10 22:50:27 2003-05-22 15:14:14 7 4 127 41 111 271 579 213.70 26 84.15 CHANGED hpplppthpph...slpPh.slssshpthpuphstp....phhIpshhapstthR+l+lptscsGsuLplLssVhaPs.pa.DLPlFGsDl..Vshsst.llA.llDlpPl...ppc.ta.pcahpsLtslh.........ph.shhspspplPt-up.aFSPhhlasR.....ssspp.csthhsthpcYLplahphhppApsh......sss.p..................htclhpuQppYspaptcpDPuRtlLp+hFGppWu-cYlcphLF- ........................................................t...h.t.htth.thtsh..lsttht..hpuphttt....phhlpshsaps.phR.+.h+hthhcsGpshplLpsVhaPps.........pa.D.......LPlFGsDl..lshsst..l.hs..hlDlpPl.......tpptp.h.....ppYhp...tLtsLh..........p..t...h..h.s..pspclst.up.hFS.shslasR..........ssstp.....tppthhsthpcY....Lphahp.hhpp.up..........sstp..................................................................................htphhpuQppYsphptc.pD.s+tlLp+hFGppWu-calpphLFs............... 0 31 72 102 +5825 PF05997 Nop52 Nucleolar protein,Nop52 Finn RD anon Pfam-B_8003 (release 9.0) Family Nop52 believed to be involved in the generation of 28S rRNA [1]. 19.70 19.70 19.90 19.90 19.40 19.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.44 0.70 -4.91 35 436 2009-01-15 18:05:59 2003-05-22 15:24:39 7 5 303 0 284 427 4 187.10 27 44.25 CHANGED tphths+pLAus-cpsR-+ulc.pLppalsppspp..........sch-hhKlWKGLFYshWhsDKPlhQpcLAsclu.pLlpshpssp..................sthtFhpuFhpoMs+EWssID+aRlDKahhLhR+hlppthphLpcppWct...phlpchhpllhcp...hltsps......hspGlthHlh-lal.....-ELtclhtt........................................lsspslhtllcPFhpl.htpspschLhp.....plpppla..cpl ..........................................t....hhppLAus-phhRppulc.tLppaltsppth..........................st.-hhKlWKGLaYshWh.pD+P..lhQppLupplu.pLlt.sh.stt...................................s.hhFhpsFapohs+EWt.uIDphRlDKahhLlRhhltt............shp..h.lpp............p..................t.Wpt...............phlpthhphh.pt.....hh.sps...................stGlthHhh-lal.....-ELtcsht.t.............................................................................httt...hhlpPahph.htps.sphlhp.phtptlh...h................................................................................... 0 93 146 224 +5827 PF05999 Herpes_U5 Herpesvirus U5-like family Finn RD anon Pfam-B_8027 (release 9.0) Family This family of Herpesvirus includes U4, U5 and UL27. 25.00 25.00 37.00 36.00 18.60 17.00 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.62 0.70 -5.89 14 74 2009-01-15 18:05:59 2003-05-22 15:37:14 6 2 28 0 0 71 0 432.70 41 71.31 CHANGED lcsLpFh--hsthaptlTKhsAlGpYussplhshs.p+sTlutahRhLsEhhstLahphcthh...p.clcsEshtpLtshLsshhspthcssh.spstl..FacscaFtphpssLh.hatlLCuCsEC...R.phFhhatthsht.......++s......stsl+lpshsth..ss.............hcLPhh.HLsttpshsLpsplu+DLGh.Sh..IppslEcp+.LPI.shs.pluhs.pc+slLplhoNIVhhLFllppl+phIhpELshhhchasctltcLphsh........................................Etth..hLssssshpshphhs.......hhc+l...+...hthlpssusp.phhpphp-slchuFpls.atphs.......hhhHhhhltp.....................................ssh-...LstssNLl.aF...tst..p.h.hl.t...slp-s.Ls....s........T+...hhsG...........shshhhpGhpaFt.......................chc.+ththKhhslc+ht.chhh ...................lc-hpalPEhScuuKpIuhAlATGQYsVsoLlsY+.shGTho+YLRpLCshs--LahRL-Gsl....sLhL-sE-+ElItRhLPsslC+tLslcY+sp+sAh.FF+soFhsRsEuAL+clYusFCpCG-.s...R..cthspststsus............................ccus........hS.usssSpascL..Ecs-.LRLppstp.................LGsh+LPAIRHLTAs-ssRVpsuVuRDLGF.uc......WSpoLscca.FLL.PsGh.usus.Pc+GYAhYLASNsVLsLplIRlLRssIR+EasAslRhLoG-Vp+LlRhh.........................................cspu..ALlRpuhuQs.sppR.......tLpRh...+..h.c-lcRhphspssFlcsFCDaL-lspRIPDY+ulS.phpREhLhLHsF+LRR.hhs................................ssosE...ss+spRLlha..l++Gcs.spDtstL.phuosLSDsELS....N........s+...p+As.....s.........lVuhsssulch...........................csHhp+h-RLaVRRhRs+cV.s................................. 0 0 0 0 +5829 PF06001 DUF902 Domain of Unknown Function (DUF902) Yeats C, Bateman A anon Pfam-B_3539 (release 8.0) Domain This domain of unknown function is found in several transcriptional co-activators including the CREB-binding protein, which is an acetyltransferase that acetylates histones, giving a specific tag for transcriptional activation. This short domain is found to the C-terminus of bromodomains. The 40 residue domain contains four conserved cysteines suggesting that it may be stabilised by a zinc ion. In CREB this domain is to the N-terminus of another zinc binding PHD domain. 25.00 25.00 28.50 28.50 23.70 23.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.39 0.72 -3.82 8 198 2009-01-15 18:05:59 2003-05-22 15:46:14 8 23 88 31 105 157 1 41.80 80 1.84 CHANGED PVMpthGYCCG+KhsFsPQsLhCYGKpLCTIsRDpsYasYps ....PVMQSLGYCCG.RKhEFSPQTLCCYGKQLCTIPRDAsYYSYQN...... 0 30 41 68 +5830 PF06002 CST-I Alpha-2,3-sialyltransferase (CST-I) Moxon SJ anon Pfam-B_6887 (release 9.0) Family This family consists of several alpha-2,3-sialyltransferase (CST-I) proteins largely found in Campylobacter jejuni. 23.10 23.10 23.10 24.80 22.00 23.00 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.95 0.70 -5.14 7 193 2009-01-15 18:05:59 2003-05-22 15:52:33 7 2 103 15 9 148 3 226.00 41 89.60 CHANGED .KslIIAGNGPSLKsIDYphLPK-aDVFRCNQFYFED+YaLG+cIKuVFFNPsVFhpQYaThppLlpNsEYpl-pIhCSsh.Nhshl-spp.hc......hptaFsssp.Gap.hlKpLctF.s.alKY+ElY.spRITSGVYMCslAlALGYK-IYLsGIDFYtsstshYsF-spppNlhphhsshpppcsp..hHShphDlpALphLpKhYtlplYuLsPsS.LspahsLus...shs.sFhl.cK.psYhpDILlssp.s..ph.t.h.hK+.+ltpNlh.hL.h+DlLch.p-lK+hhKEK ..................K.lIIAGNGPSlKpIDYSh.LPpDaDVFRCNQFYFED+YaLGKchKtVF..aNs.hh.pQhhThhpLlpNpEYchE...Ihhosh.Nhtphct..c.hhc...........................h.paasssp.uas.hhppLct.F.s.ahpapch..a....sp+hToGlYMhssAlAhGYK-IYLsGID..FY........pphs.ahFcpppppl.p....ht.ppptp..hHoh..p...hDlpAL.hhpcp..YtlplYslsPpS.Lspah.LuP........p.s.tFh...cK...shhp.-hlhss....tph.........tp......p....h...p...h..................................................................................... 0 3 7 9 +5831 PF06003 SMN Survival motor neuron protein (SMN) Moxon SJ anon Pfam-B_7026 (release 9.0) Family This family consists of several eukaryotic survival motor neuron (SMN) proteins. The Survival of Motor Neurons (SMN) protein, the product of the spinal muscular atrophy-determining gene, is part of a large macromolecular complex (SMN complex) that functions in the assembly of spliceosomal small nuclear ribonucleoproteins (snRNPs). The SMN complex functions as a specificity factor essential for the efficient assembly of Sm proteins on U snRNAs and likely protects cells from illicit, and potentially deleterious, non-specific binding of Sm proteins to RNAs [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.80 0.70 -5.28 3 460 2012-10-02 16:56:36 2003-05-22 15:59:04 7 15 193 8 279 592 1 149.70 19 54.01 CHANGED GQSDDSDIWDDTALIKAYDKAVASFKNALKNEDCopPuEs-EpNPGsKRKNNKKNRSRKKCNAAPLKcW+VGDSCNAVWSEDGNlYoATIoSIDtKRGTCVVsYTGYGNcEEQNLADLLsPsoD...s-pt.pEsNlNETEaSTDESDRSS+SHcs+s......pNpspu+sS.WNsRFPPsPPPsPPGF....GRHGEKhc.shPPFLSGWPPPFPsGPPMIPPPPPMSPDusEDDEALGSMLISWYMSGYHTGYYLGLKQGRMEAAluKcuHpK ....................................................................................................................................................................................................................................................t......t..W.p..sG-pC.Ah.a.tpDG..p...h..Y.ApIttl..s...p...tt..ssh...lhatsY.sNt..E.......hts...lh...........tt.............t....t......t........................................................................................................................................................................................................................................................................................................................................................................ptt..................................................................................................... 2 91 127 207 +5832 PF06004 DUF903 Bacterial protein of unknown function (DUF903) Moxon SJ anon Pfam-B_7037 (release 9.0) Family This family consists of several small bacterial proteins several of which are classified as putative lipoproteins. The function of this family is unknown. 21.10 21.10 21.60 28.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.13 0.72 -4.44 30 1612 2009-01-15 18:05:59 2003-05-22 16:01:35 7 1 599 28 138 407 4 49.80 54 68.54 CHANGED shVlsppDGppIlTpsKPchDc-oGhhpYcchs.GcctpIN+DpVpplpEh ..sYVMpTpDGRpIlTDGKPplDsDTGhlSY+Dtp.GNppQINRsDVpphlEh.......... 0 10 29 85 +5833 PF06005 DUF904 Protein of unknown function (DUF904) Moxon SJ anon Pfam-B_7038 (release 9.0) Family This family consists of several bacterial and archaeal hypothetical proteins of unknown function. 30.00 30.00 30.10 30.00 29.90 29.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.42 0.72 -3.68 35 833 2009-01-15 18:05:59 2003-05-22 16:04:09 7 2 821 4 109 271 20 75.10 62 94.17 CHANGED MshElL-cLEpKlppAl-TIsLLphEl-ELK-cptt.t.......ptpppL.pENpcL+................pEpssWpcRLcuLLGKlcpl .....MSLEVFEKLEAKVQQAIDTITLLQMEIEELKEKNssLspEs........QphpcpREpLE+ENppLK................cpQsuWQERLpALLG+MEEV.............. 0 15 45 82 +5834 PF06006 DUF905 Bacterial protein of unknown function (DUF905) Moxon SJ anon Pfam-B_7072 (release 9.0) Family This family consists of several short hypothetical Enterobacteria proteins of unknown function. Structural analysis of the surface features of the protein YvyC has revealed a single cluster of highly conserved residues on the surface. Additionally, these residues fall into two groups which lie within the two largest of the three cavities identified over the surface. The conclusion from this is that these two cavities with, Leu 58, Glu 75, Ile 82, and Glu 83 and Pro 86, conserved, are likely to be important for the molecular function and reflect the cavities found on the surface of the FlaG proteins in Pfam:PF03646. 22.30 22.30 22.30 22.50 22.10 22.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.46 0.72 -4.60 10 520 2012-10-01 21:18:35 2003-05-22 16:06:47 7 3 290 1 14 219 2 65.50 62 87.11 CHANGED LhsLP-GPFTRcQApA...VuAsYpNVhIED.DQGsHFRLVVR...ss-GphVWRsWNFEPsAGchLN+YIpopGIh+ ...........h.hLP.-GsFoRcQA.A...VsstYRNVFIED.DQGsHFRLVlR....s-GphhWRsWNFEssAGhhhNphlts.GIl+...................... 0 2 4 9 +5835 PF06007 PhnJ Phosphonate metabolism protein PhnJ Moxon SJ anon Pfam-B_7179 (release 9.0) Family This family consists of several bacterial phosphonate metabolism (PhnJ) sequences. The exact role that PhnJ plays in phosphonate utilisation is unknown. 25.00 25.00 54.90 54.80 24.20 17.80 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.78 0.70 -5.62 32 691 2009-01-15 18:05:59 2003-05-22 16:13:10 6 3 653 0 123 424 122 272.60 76 95.83 CHANGED psssYNFAaLDEpoKR.IRRulLKAlAIPGYQVPFuSREMPhshGWGTGGlQlThulIGtsDsLKVIDQGuDDosNAVsIR+FhppTs.GVsTTc+Ts-ATlIQTRHRIPEpPLpcsQILVhQVPhPEPLRhlEPpEscs+pMHA.t-YuhhaVKLYEDIs+aGcIspoacYPVhVNsRYlMcPSPIP+FDsPKhcpssAL.LFGAGREKRIYAVPPYTpVcsLsF-DaPFclppaps.......sCuhCGuocSaLDElhhcDpGs+ha...CSDoDYCpp+htptpt .........sh.sGYNFAYLDEQTKRMIRRAILKAVAIPGYQVPFGGREMPMPYGWGTGGIQLTASlIGcsDVLKVIDQGADDTTNAVSIRpFFpRVo.GVsTTERTs-ATlIQTRHRIPETPLoEDQIllaQVPIPEPLRFIEPRETETRpMHALEEYGlMpVKLYEDIARFGHIATTYAYPVKVNGRYVMDPSPIPKFDNPKMcMhPALQLFGAGREKRIYAVPPaT+VESLDFDDHPFoVQpWD-.......PCAlCGSscSYLDEV.Vl.....DDsGsRMFV..CSDTDYCcQppptt.pt.............. 0 22 65 90 +5836 PF06008 Laminin_I Laminin Domain I Yeats C anon Pfam-B_1925 (release 8.0) Family coiled-coil structure. It has been suggested that the domains I and II from laminin A, B1 and B2 may come together to form a triple helical coiled-coil structure [1]. 30.50 30.50 30.70 30.60 30.20 30.40 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.71 0.70 -5.07 9 327 2009-01-15 18:05:59 2003-05-22 16:14:52 9 117 70 0 165 325 1 235.80 22 9.88 CHANGED p.......lhuls.hTGsh.tsh+l.tphcshpppLpphhtthstpppplpsh-pt.lpsLtp-s-sLtccsspshspupplppsscpThppApsLtttIcpltpslpplhpphtthsp...pt.pssspslppthtpup+hLpplRtR.s.hpp.ppsA-t-hctAptLLsclpphhpp.pt-spuLhpslpcpLscaps+LpDhcphLccAtspsp-ApcLsttsptshp.thpcpppclpcppphhscpLssupshLtpssthLpthspsh ....................................t...p....l.sls..sush....t.hch.hphps....hpp.pLpphhsthp.......tp.plp.h-pp..hpsL..p-hc..pLtp+s........p..ts.ttcupp.h....tpssppshppAppL.p.lpplhpp....lp...t..l.pp.h..thst....tpphss..tplpphhtpspphlp-hRp+...s..httppp.A-tEhptAp....t..LLpplpphhtp.ptc....sp....sl..hps...lpcpLscaps+LpDhpphLpcA..hsps+pAp.....plsttNptphp.thpcphpplpptppphpphLppups.Ltpsp.hhp.hpp..h............................... 0 21 33 81 +5837 PF06009 Laminin_II Laminin Domain II Yeats C anon Pfam-B_1925 (release 8.0) Family It has been suggested that the domains I and II from laminin A, B1 and B2 may come together to form a triple helical coiled-coil structure [1]. 27.90 27.90 27.90 28.20 27.80 27.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.75 0.71 -4.42 8 340 2009-12-23 11:36:11 2003-05-22 16:21:42 7 125 84 1 171 348 0 130.80 28 5.22 CHANGED uKphAscAscosspVLctlpslspsltphppsluplsssht.hpt.hp..stspsltsAssuV+sLpcpAspLl-+Lcslcphcss.t.....LScNlucIKcLIuQARctAspIKVuspFcscoslcl+s.pcssshsshTsl ........................................................................KthsppAscpstpVhptl..pslp.pslpp.h.....pps...hsphpss..............t...........s.hspsl...s..sA.ss....s..VpsLp...p..phsp.Lhc.KL.csl.......c....php....s............locsIscI+EL.IsQARct.As.p..lpVuhpF.s..Gpsslpl+s..shsshpshTs.............................. 0 24 38 93 +5839 PF06011 TRP DUF907; Transient receptor potential (TRP) ion channel Moxon SJ, Mistry J, Wood V anon Pfam-B_5564 (release 9.0) Family This family of proteins are transient receptor potential (TRP) ion channels.\ \ They are essential for cellular viability and are involved in cell growth and cell wall synthesis [1]. The genes for these proteins are homologous to polycystic kidney disease related ion channel genes [1]. 28.50 28.50 28.60 28.50 28.30 28.40 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.48 0.70 -5.91 75 554 2009-01-15 18:05:59 2003-05-22 16:36:52 7 15 147 0 432 554 9 419.00 23 50.82 CHANGED ulsassAsluGlullsouhsushut..............................u.......ssAscl......ussshslFtYaQshuhsGMhuVs.hPslhtuaspNFtWShGlIphsFhQphhsha....hpuTGG............ssoshhsshtsh........sh.s........................................................R..............th.h...............................................................t.s...ss..hhhp.......GlpRluhhssIcposhahTu.hhhallhlhhlslhlhhhKhhlclh...................h.+tthh.psschtpa......Rppah...slltG...slhRllh.lhas.lsllshapFspsD..............................................................................................................S..suslllAslhlllhhullsas................................................sh+llhhu+cphphhpss..uhhLYsD.......pslp+aGalYspa+ss.taaahlsslsYhhl+ulhIuhsQs..sGhsQslslhllEhlhllslhhhRPahsKpo.NhhNI.sIsllphlsslhhlhFsshas.stsspulhG.llhhllpAshslhLhlhl...llssllslhp+sP.cs+hps..........................................hpDsR....sSFhppts .......................................................................................................................h.ltasssslsshullsoshhu.hhu................................................s....sshsph.................usshhs...lhtahQthshhGhhuls.hPshht........sa.spshtW.Sh.G.ll...phs...h...hpph..hs.ha...tsoGG.....................sss..h.h.s..tsh.........................................................p........................................................................................t..............hhh.........Glpphu..hhslptsshh..hTu.hhhah.lhlhhhhhhlhhh+hhl.chh......................................................................................h..+hthh..tpph.ta......Rp.pah...sh.htG...slhRllh..lha..hs..lhshap..h...st...ts......................................................................................................................S........sushllAsl.hl.hh.hhshh.s.ah..............................................................sh.phhhhs+.p.ht..ts.......shhLasD................thlp+aGhlY.spa+ss.taaahlshlhahh.l+uhhluhsQs................sGhs..Qslslh.llEhhhllhl.hhh+Pa...hs...+po.Nhhs.l.hlsslph.lsshhhlhF.ss..h...s..sthsp.....sh.....hG.hlh...hllpu...shslhL..hlhh.........hlpsh.ht..lhpc.ps.chphp....................................t.............................................................................................. 0 105 229 366 +5840 PF06012 DUF908 Domain of Unknown Function (DUF908) Yeats C anon Pfam-B_6534 (release 8.0) Family \N 25.00 25.00 26.40 30.30 21.60 24.50 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.77 0.70 -5.13 25 318 2009-09-11 20:50:55 2003-05-22 17:01:12 7 40 239 0 225 312 1 287.90 26 8.83 CHANGED sccLllslLcFophLLEpCsNRslYsSh-.+LssLLsosshcllhusLcllhthupRh..tpt.....st.h..p.hppp.th.hp+lhplAtsastssh..........................................................sstscthuLschh...........tccph.pphsplphpYYhss.......................................................t....+pstttts............................t.ttpsssspsuhphhclsppphpppslp-lhcpthpp..lP............pchpa-hhp+lRhs+uhsssph.....RppllshRlhAIuslshlh...spststs+lhph-s.......hhppLs-Llphs...s.....plspplpshulpuLpulup..++sphss........llssLusslsHGlLhpllRphlspl ......................................................................cpLlhtlLpFotlLlEpsh.sRplYuSh-....+LssLLsosshpllhusLpllhhhupR............p.h.tphhtpphp..hp+.l.pLApsa.............................................................................sspppshuLspts......................tph.tphs.slphpaYsps............................................................................................................................................ptp..ts..spshphhcl.p.ph.h..s.p.s.......tplhpphhp....lP............pppphtlhp+lRlApuhsstpp.....R.phlphRLhAlshLsh.t.....s.pp.hspll.ts.................lh...ppLs-..l.lphs...s...............................ph.hp...l..pshuLpsLsulsp..........cp...s+hss...............llsshus..s..s.HGhL.hllRpslpt........................................................................................................................................ 0 71 121 188 +5841 PF06013 WXG100 DUF909; Proteins of 100 residues with WXG Moxon SJ, Studholme DJ anon Pfam-B_7198 (release 9.0) Family ESAT-6 is a small protein appears to be of fundamental importance in virulence and protective immunity in Mycobacterium tuberculosis. Homologues have been detected in other Gram-positive bacterial species. It may represent a novel secretion system potentially driven by the Pfam:PF01580 domains in the YukA-like proteins [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.81 0.72 -3.98 120 3120 2012-10-01 21:44:22 2003-05-23 10:31:35 7 26 880 30 526 1700 48 84.80 18 66.58 CHANGED plplsspplppsAsphpptssplpshlp.plpsph.stl.tu..sWp.GsuupuFpsthp.phptsh.pphhptLpplsppLppsuppapps-p ....................hphs.ttlp.st.u.sphps.t.upp.lcsh..hp.plpsph..psl..tu....sWp...Gt.u.sssap.s....t.h....s.p....hppsh.pplh...p.hLpplspplppsusshtpt-t................... 0 169 352 458 +5842 PF06014 DUF910 Bacterial protein of unknown function (DUF910) Moxon SJ anon Pfam-B_7253 (release 9.0) Family This family consists of several short bacterial proteins of unknown function. 25.00 25.00 31.00 30.90 19.30 17.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.87 0.72 -4.05 26 915 2009-01-15 18:05:59 2003-05-23 10:36:21 6 1 911 3 89 305 0 61.60 50 83.25 CHANGED MKTLYDVQQLLKpFGhhVYlGcRhaDIELMtlELccLYcusLlD+..csYhpAcllLp+E+phE ...M+ThYDVQQLLKpFGhllYhGcRlaDIELMplELpcLYcutLlD+..p-YLpAchlL++EHchE...... 0 19 44 67 +5843 PF06015 Chordopox_A30L Chordopoxvirus A30L protein Moxon SJ anon Pfam-B_7254 (release 9.0) Family This family consists of several short Chordopoxvirus proteins which are homologous to the A30L protein of Vaccinia virus. The vaccinia virus A30L protein is required for the association of electron-dense, granular, proteinaceous material with the concave surfaces of crescent membranes, an early step in viral morphogenesis. A30L is known to interact with the G7L protein and it has been shown that the stability of each is dependent on its association with the other [1]. 25.00 25.00 36.40 36.30 18.30 18.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.49 0.72 -4.37 10 57 2009-09-11 08:38:21 2003-05-23 10:41:39 7 1 46 0 0 32 0 72.70 57 98.13 CHANGED -EDINEuNF.HLLosLSNssp...DsEFuATLSsl+ElIStINhKlLuINKKSKKNsRss-p...hsaVs+..REssRY .hEDlNEANFsHLLhNLSNNKD..lDspauuTLSll+ELlStINhKIhsINKKSKKNo..+o.lEp....lpasuu..REhsRh.............. 0 0 0 0 +5844 PF06016 Reovirus_L2 Reovirus core-spike protein lambda-2 (L2) Moxon SJ anon Pfam-B_7350 (release 9.0) Family This family consists of several Reovirus core-spike protein lambda-2 (L2) sequences. The reovirus L2 genome segment encodes the core spike protein lambda-2, which mediates enzymatic reactions in 5' capping of the viral plus-strand transcripts [1]. 18.00 18.00 19.80 19.80 16.30 16.20 hmmbuild -o /dev/null HMM SEED 1289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.02 0.70 -14.02 0.70 -7.60 4 56 2009-01-15 18:05:59 2003-05-23 11:11:48 6 2 22 3 0 58 0 1162.20 43 99.73 CHANGED MAsVaGlpLsspLsosTsR+hhp.hpaDpLlosLpsssh.tp.aRuLc.psss.shoslQha.PLNuas.ssthltpshsacsWccaIp-+hpALss.LhRtYPlss.usRhlNPllhsAlsuuFLsspshsthLshLFls...cssIsslhssshohopch.sstcslhTPAGpKYlpLpuYsso.sssPshFuKclssYApsaYasshssp.phhapasuusslL.HFDpPT.G.HhLlPshuss.uhsohllstsshlLlESsL-phphNApAssuosVsRlDQsYHslhusp.sspsoLthRLssLSLLAlpGhQhss.lPspPThusVuuFluRLhu.Gcspphl..Rsc.lhlh.-SPhshsssst..YlphpssphshoIGshshlsDsspPltaLPQYc.ATs.shspupDuhpcssh.PLhspauhhhoGsAhhphhDhstcss.VassspLsphPssYFst-cthpRsLFSphRuhuDRShlKDsAslpahushlsPsssp.lLssuhSMAYlGASusHussDpPlIIcslhuGolPGVPhPpplpQFGYDVspGoIhDlshshPTGTFtFVYSDVDQVpDussDlsAosRtshuhLshshphTssGGshVVKlNFPTpshWpplFpphusphoolaLlKPhlsNslElFLl.FusR...ssuuLpsosulhhFLlshatR.psLscshsplP.hGs...lDDGsoshGlsslcl.sPshSshstshtlsshuhhsuhsssppShhsh.-SpGspssTIhu+RT.hSppRhsRLt.VPhlhssolshQpRhhssss.pLFsspusssTplh.Luhhhshhssussh.sls+hLDLGTGPEsRILSLlPsshsVThsDsRPsAps.usas.shTsalphDYLssuhhsustsDslTsIhSLGAAsAuAuhsLhsulpQLIphlssusspplaLQLNsPLsssuSl..sllEIDppsppYhFsshtRsEPYus.sAL.phlcslhPussloWhThSsohcWhcYslt.uoolo.sDIshAhphS+hsPIh+IDhsphPhch.PsshhVGtpsslplsuhsspsshpsphsuVplhossssNhsuhhuslul.assspscasLshsPspPGIhsh..llshsss.lshGShsIssPssolslsaP..upLDFT.uGsDsclssssaYcLulF..........lhhDGpaphsNP-+.t.hsoAussRVlpalhDluDsalLhYlCDVossslGchIthPLs-l.phsaPsNsslhhShPasuststlp.sGs.hssLss.shVLPsuhtlhshSTultsuhPTahVPsGsYshVhl ...........MAplhGlRLusoLSuPs.cphsppaThc-hhSsLchss...cPW+sL+sptss.slsAVpLhhPLpGhl.sh.hhthshsaspaEpahp.hLpsLt.plLRhYPIusYpschlNshlsNAlVuAFLSN.sahchLshLhls...cs..lpDlhssGhslppah..hctssl.ssAGpKa.lQ.hpsYs.s...ssDPsLFuKpLpsYuhsaYh.shpsh.pahhpH.SsussLlHaD+PoNG.HhLlso.oshs.Assh.lsAhsulLLESCLpQ.thNsps.supPVsR.spshh.h.us.a.ttpsoLpYpLhsLS.hhhNGYQhsc.hPtp.ssthluuhlupLhs.uts.Tsl.Pp.thhplhhDSP.shsGtst.salp+.tht.Ghplusl..hpssspsVtWhPQas.spu.ssD.u.s.lu+hTpLPLRscYushWsGsAL..asshsRppt.Vh..p.hsQhPssYFssD-pauRShFS.hRtltDRSLlKDTAsLhah.p.lsssps+chl.supohsYhGASusHus.sQP.lIcPhhpGplsGV..P.SV+QhGa-VspGsIsDlthPhsoGsa.FVYSDVDQV.sGcsDLshSSphhpS.Ls.hh+hThsGGShVlKhNFPTphVWpaI.ppl.P.hoShhLhKPhVoNNlELahl.Fu.+....puuhpsousVhhFhhsphtRYcsLps..pplPShG....lDDuhoVoslphlsl..suhSshpptttluh.ulhsslGst+hSlt.Y.-SatsplhsIh...oPtStphhsRLtYlPhl.Ppol-VQtRshhsusP.lFs..hsss.sp...LohhYshtloussa.-...schhLDLGTGPEA+.LphlPss.PVThsDhRPhs.PSGCWs.shTsFLphDYLssshlhustuDlVoClLSLGAAsAstshThctuhpQLlp.h.scusspslhlQlNCPhs.sstsl.+thLElspTNppYhF.phGR.EPa.shsuL.cIscshhPshslphhshs.sLpWhchAlhpssoloSssIhlA.hh.+ahPlhhhchcthshph.ssshhVGpsholsl.sapspssapshhssshhhohpsss.suh.stVos..ssspsphsLshs.ussGIho...llts.ss.lSLGShVl-uPDsslo.haP..ApLDholuGTDl-lplssaYclhh.F..........sh.DspapIhp.Dp.......us...sohshNhhhDhuDhahhhhlpDVpspslGhaI.+.L..Lso.shPsstchFLShP.-...hhVp.sGs..s.h.sutshs.PpsW.slssohsh.suhPoahVPPGc.YsLs......... 1 0 0 0 +5845 PF06017 Myosin_TH1 M_tail; Myosin_tail_2; Myosin tail Yeats C anon Pfam-B_12631 (release 8.0) Family \N 31.10 31.10 31.10 31.10 30.90 31.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.14 0.71 -5.01 27 904 2009-09-13 17:10:37 2003-05-23 11:23:47 8 48 257 0 561 839 2 186.30 23 19.42 CHANGED h+htAsclhpsKKccptpSl....RtFhGDYluhcp.psphpphhtspt........t........pllFust....VsKhsR....psKsppRtllLTspslYllt............sthphtlK++lslsplpulSlSshpDshhllHh......psptD.llpssaK.........oEhlohLpcphpp.tt..pLplp.husslphph+p........st.pshphthstssss...hhcsspsthhphsss .......................................+..Asplhts.....+KpphttSl.....R.FhG...DYLshpp.....s.sp...htp..h.htppt.............................................plhFush.......VpKhsR............ptKsppRhllL.......TspslYll.............................sttphpl.Kp.pl.s.lss...........lpulS.l...Ss.hpDshhllHl...................................ppcsDhl..l.ps.s.ht............hEhlohL.hpt.hpp..tt.....plp.lp.hus.phphphpt.............tt.t..hphth.s.ts.....t...............htt.tp.........t....................................................................... 0 191 263 401 +5846 PF06018 CodY CodY GAF-like domain Moxon SJ, Bateman A anon Pfam-B_7573 (release 9.0) Domain This domain is a GAF-like domain found at the N-terminus of several bacterial GTP-sensing transcriptional pleiotropic repressor CodY proteins. Presumably this domain is involved in GTP binding. CodY has been found to repress the dipeptide transport operon (dpp) of Bacillus subtilis in nutrient-rich conditions [1]. The CodY protein also has a repressor effect on many genes in Lactococcus lactis during growth in milk [2]. 29.50 29.50 29.90 29.60 28.60 29.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.92 0.71 -4.99 17 1124 2012-10-02 14:34:25 2003-05-23 11:26:54 9 5 1084 9 136 463 0 176.60 45 67.49 CHANGED sLLpKTR+lNslLQ+u....sspslsFp-huppLu-VIcsNlallS++G+lLGYuhp...tphps-Rhcp.hhp-+pFP--YspsLhplt-TpuNlslssphosFPhEscp.FtsGlTTIVPIhGuGcRLGTLlLsR.scpFsDDDLlLuEYuuTVVGhEIL+t+s-ElEEEARp+AsVQMAIso ..........................pLLpKTR+lNslLQ+s.........htptlsapcluppLu-llcsNsaIlup+G+lLGYshp....ph.p..s.-Rlcp...hh.p...p...+...p...FP-.-.Yspt.lhplh-TcuNlslc..ss.ho.lF.PsE.s.+-hFs.s.u.l.TTIs.PIhGu.GcRLGTLllhRs..cccFsD-DLlLuEYuuTVVGhplLptpp-E..lEcEsRc+sAVpMAINo...... 0 61 93 115 +5847 PF06019 Phage_30_8 Phage GP30.8 protein Moxon SJ anon Pfam-B_7692 (release 9.0) Family This family consists of several GP30.8 proteins from the T4-like phages. The function of this family is unknown. 25.00 25.00 36.40 51.50 21.00 20.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.60 0.71 -4.42 2 31 2009-09-11 08:57:46 2003-05-23 11:33:00 6 1 30 0 0 23 0 112.20 68 99.49 CHANGED MpTINLNATlKs+sasG....hhsEh.WhlhuVpGDll.spTs-G.usDFsapIplcNFFTG.IYcLNoslhGpI.....EppEhG......WY..sARpRAEpLIEKhKthGhlD.t+WphlK .........MKTINLNAsVKTKCFNG....KYcETMWFLMAVEGDIIEVETTEGMGTDFTFTIQVHNFFTGWIYELNTVIVGKI.....EQNELGE.....WYYVTARQRAERLIEKMKKVGKLDMpHWKVVK.. 0 0 0 0 +5848 PF06020 Roughex Drosophila roughex protein Moxon SJ anon Pfam-B_7712 (release 9.0) Family This family consists of several roughex (RUX) proteins specific to Drosophila species. Roughex can influence the intracellular distribution of cyclin A and is therefore defined as a distinct and specialised cell cycle inhibitor for cyclin A-dependent kinase activity [1]. Rux is though to regulate the metaphase to anaphase transition during development [2]. 25.00 25.00 25.10 30.20 21.80 21.50 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.03 0.70 -5.76 3 54 2009-01-15 18:05:59 2003-05-23 11:40:35 6 2 16 0 8 55 0 283.40 60 98.36 CHANGED MNAPEEHKcTPLEVIHEFIKGVDDGTIRRDLGEDCILSYYSRNVRGAKAITGFLRTQLTpRYKHEsFEEAAplAKGDELLLQARFGRSFDuERRRIYEEKERsGTTsLHLHAESDDEEVNEEFSTTLITPPRPSSYNLNoLKYVEACGLLNRRDEHlYGGLDLGESCAVHLTLGYRSTaLPGGQVSGFEICLAVYDRGLTSLNRSTLlPPPhuISFuRRANARCNPTTDDEuDoEEDSPPPTuRRGVRRTLFTEENTQEEE..........DuDPDPIPEVE....QEQPAPQQAEETAREAVNIPVDLPTPsETTNsSSYTPRKRhQTTNGNEVPPKRTPGPQRMRF ...................................cTPLEVIHEFIpGVDDGoIRRDLuEDCILSaYSRNVRGAKAlTGFLRsQLThRYKH-sFEEAtplthGDELLLpARFGRSFDhtRRRIYEEKERsu.sT.....LHlH.sE.SDD...Eps...Nc....EFSoo......LITPPRPSSY.N.L..pSLKYVEuCGLLN+.Rs.EH.VYGGLDhG.EoCAVHLTLGYRpT.LPGGpVSGFEICLAVYDRGL.psLpRSTLss.P.huhS...hsRRup...hRCNsTTDDEuDsEED.PPPTuRRsVRRTLFTEENTQcEE..........................D.D.s.Pl.E.p............................QpQP..APQ..Q.s....ppsup...............s....VDlsTP.chTshsShTsRKR.Qt............................................................................ 0 1 1 4 +5849 PF06021 Gly_acyl_tr_N Glycine_acyl_tr; Aralkyl acyl-CoA:amino acid N-acyltransferase Moxon SJ anon Pfam-B_7828 (release 9.0) Family This family consists of several mammalian specific aralkyl acyl-CoA:amino acid N-acyltransferase (glycine N-acyltransferase) proteins EC:2.3.1.13. 25.00 25.00 32.30 25.90 22.50 23.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.39 0.71 -4.91 10 198 2009-01-15 18:05:59 2003-05-23 11:49:27 6 4 36 0 101 166 0 181.00 41 67.51 CHANGED Ml.hLQuuQhLQMLEKSLRKSLPESLKVYGTVFHINcGNPFNLKALVDKWPDFpTVVlRPQEQEMTDDLDHYTNTYplYSKDPcpCQEFLuosEVINWKQHLQIQSSQSuLsEsIpsLAAoKSsKVK+opsILYhssETtK+LsPSLh-sKp.LssssG+P+...ulcQchFKLSoLDVoHAuLVN+aWpFGGNERSpRFIERCIpsFPo ................................Mh.LpssphL.hLcp.LcpplPE..SL.K.VYGsl.hplN+GN..PFph-llVDpWPDFpsVlsRPQ.cp-MsDDhDaYTNsYplaoKD.pphpchLtps-VINWcQthQI.Q..u...Qp.sL..s.-sl.pplA..ss.Kplp..V.chp.pt..hLhhh.phh.p......t..s.s..s.........csp........t.tp..p........t.ppt.h.+.ho.LsloaAsLVNchWphGGNE+ShRaIpchIpsFPo...................................... 0 4 8 26 +5850 PF06022 Cir_Bir_Yir Plasmodium variant antigen protein Cir/Yir/Bir Moxon SJ anon Pfam-B_8754 (release 9.0) Family This family consists of several Cir, Yir and Bir proteins from the Plasmodium species P.chabaudi, P.yoelii and P.berghei. 20.40 20.40 20.50 20.50 20.10 19.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.06 0.70 -5.17 119 1443 2012-10-01 19:45:41 2003-05-23 12:01:26 6 3 6 0 1275 1447 0 235.70 29 83.60 CHANGED M..scp....h..C......ppFptlpphhsDc.Lspssp...YpFpspt..hppYC.ss.s.............Cs..................sD..l-KIsAGsLaLhsp.hats.s.hp..p.s+sphs.lltYIhIWLSYhLsh.csppphs..slp-FYspaIpsssc.Ypp..............pIsss.psYs..sYK-lI-++ptlhs..hshpplSKFY-AFKhLCsMYsph.....st.psssCsphhppAscFVcKYcc.LNp.......sss..scsosYspl...LSoLSsDYsNhKpcCss.....ptph..ssLPshcp.......................................................p.ssSSSSIssKL....lslL.IFs.AIulFLGIu...YK .....................................C....t.h..l.p.hssp...t.ptp...hph.p.t......hptY.Css..t................pC.p...................ss..hc+l.suuhlaLhpt.hhtp..t...t.............tpt..p..hh.Yhh.lWLu...YhL.s.pp.p..p..t..h.s.....slppFYspaIps...s...p...p...Ypp................lpts......pt...hp......s..ac.-.lI-c+pph.hs..hs..h....p...lScaY-sFK.LCphYsph.....s.t...pss...p.spp......h..p..p...Ap..c.Fs....cc.Ycc.....L.pp.......s......s.....sc....s....s.....sY.ppl...LssLSsDYssh.....Kp.ppss...........h....sslsp..h.pt...............................................................................hspsssh.h..spL....h.lh.Ihs.uhshhlGl.YK................................................................................................................................................................................................................................................. 1 0 903 1275 +5851 PF06023 DUF911 Archaeal protein of unknown function (DUF911) Moxon SJ anon Pfam-B_8782 (release 9.0) Family This family consists of several archaeal proteins of unknown function. 20.20 20.20 20.50 20.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.06 0.70 -5.20 8 70 2012-10-11 20:44:44 2003-05-23 12:12:43 7 2 49 0 43 95 1 261.40 34 98.55 CHANGED Mhhphp.h..+hL.+Rl+th.tscsVsEELRGWpWspPPVpPpsh.ltLolSDls.sYCsTtRDVYL+hVLshRGc.sstlhhGpsIHcsatpulc-l+phhhuucs.s.sh.p.hhtpth....h..phhc....huctlacalsphhpuchscl+...ut.sttsc.Sluhhs..lPhhsEasVDGSsLGLSshlpVDA..hhLs..lVlEhKsGpapcpHcLALAGYALAlESthElPVDaGhLlhl.shNssl.+hpsclhhIu-sLRpEFLEtRDcsh-lltsssDPGluhcCsssCPFhchCp....t ............................................................................................h..+.L.+chh.sh.ttsslsEELRGWsappPPlpPhth.htLulS-lu.tYCsTtRDlYLR.+.Vhthpsc....s.t.shhhGphlHclhtp.shp...php+hl.t.s....h............t..h...............h....h..t....th..............hspph.p.................hsctlhchh..sh..plhuc.hpc.hh.......st....h...........Shs....s......lP..l.h......sEht....VD.Go.LGLS.t..L.ps.......DA.h.h.hs........ll.lEh....K.h..Gp.....h..p...c..h.HcLuLAGYALAlEuthElPlDhGhllYl.sh.s...tsh...chphcshhIussLRpcFl-tRDchh-hltpttD.PG.hs......p...Cs.tsCPFhphCpt............................................... 1 19 29 35 +5852 PF06024 DUF912 Nucleopolyhedrovirus protein of unknown function (DUF912) Moxon SJ anon Pfam-B_8809 (release 9.0) Family This family consists of several Nucleopolyhedrovirus proteins of unknown function. 26.00 26.00 26.60 26.00 25.90 25.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.59 0.72 -3.96 24 60 2009-01-15 18:05:59 2003-05-23 12:15:59 7 2 52 0 9 56 1 102.10 28 58.08 CHANGED LDVPY-RLGspp.+V-YIPLKLALsD...............sssssssststss..s..spsshsptsss..........pphshhpllllulluhlslhlLL...YsIYYFVILR-...+pp.......ss....t.pPsal ............................................LDlPY-RLuspp.pV-YIPLKLAlsD...............stsspssspppsss.s..hs.pss.hsstsss..........tspts.hhsllllulluhhslhllL...Ys...IY...YFVILR-+pppts............................ 0 8 8 9 +5853 PF06025 DUF913 Domain of Unknown Function (DUF913) Yeats C, Sammut SJ anon Pfam-B_6534 (release 8.0) Family Members of this family are found in various ubiquitin protein ligases. 24.70 24.70 26.30 25.10 18.90 23.60 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.09 0.70 -5.71 11 332 2009-01-15 18:05:59 2003-05-23 12:19:24 7 42 240 0 227 319 4 324.60 25 10.30 CHANGED hSRslchLDshlcuspsAhuhh.osu+GhDslsshIcaEsppshc...........ssptuuuh.spppssshs.......ap...IsYhppphL+hhh+Fls+hhp......uuGhscsLRNLlD..spLlsSL+hlhcpAclFGupVaSsAssllSsFIHNEPTSaullsEAGLocuFL-Als.............................................................ssIhsusEAlosIPsAhuAICLNuSGLcLFpo.p.sAlcsaFcIFpSPpHlKsLpcs.......-hssuLGoshDELlRHpPuLKsslhsulIchlscluhLs+shshspshuu+h.......................................thhssshststthsts.hhp.hsso.sspspsss......................s..thshs-hlhsVuRFLtuhhpNpusCp.FIcpsGlEhlLslhsLssLPhDF ..................................................hshphl-hhht........hshh..s.tpGhshh..hlphElphshp..............................................................p.s..ts..s.p.hp..h..................hp.......h..pptthlKshLph.lp+hhp..................ssshsctlRp....lhD..usL.puL+pIlpN.sc.haGssla.hAsslloshlapEP.o.huslp-sGLs..psh.Lculh...................................................................................................................psl....sop-slsslPsshuAlCLNspGLp.hhp..p..pshc.phh.clhhSspal.s...hppp........................-hssslGsuhDELhRHpPsL+sshhssl.lphlpclsthsps..t..t....h.s.......................................................................................................................................t..................................................................................................................................h...l..hhp.l.s......p...s....t...thp.t.Fl.t.ttGl..llp.hhth.ths........................................................................................ 0 72 122 187 +5854 PF06026 Rib_5-P_isom_A Ribose 5-phosphate isomerase A (phosphoriboisomerase A) Moxon SJ anon Pfam-B_5144 (release 9.0) Family This family consists of several ribose 5-phosphate isomerase A or phosphoriboisomerase A (EC:5.3.1.6) from bacteria, eukaryotes and archaea. 26.00 26.00 26.20 28.80 25.70 25.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.95 0.71 -4.95 58 3557 2012-10-04 00:26:15 2003-05-23 13:17:25 9 10 3267 42 923 2338 895 171.00 42 74.26 CHANGED lsuVsTSppopthspphGI..sltslsph...splDlslDGADElD.sp.hshIKGGGuALhREKIVAssuccFllllDpoKhVp.pLG..pa..PLPVEVlPhuhphlh+plpp....hGs..ps..plRhs.t......tshlTDNGNaIlDspht...I.pD..PttlpcplpplsGVVEsGLFssh.A-hlllGspcGsp ......................uVsoSptospphcpl.GI.....l.h...s..l.s-V........s.p...lDlhlDGAD....El....s......sp......hphI....KG.....G......G......u..A......LhRE.KIVAssuc+.aI.sIs.DpS.K.h..Vc..hLG...pF.............PLPVEVlPhutstVtRpltc........................hGs..pP...phR.s.t...........................hlTDNGNhIlDlphh.......pI........c...................Phtlpptls.tlsGVV-sGLFssh..A-..h..lllGst-Gs.................................... 0 249 520 743 +5855 PF06027 DUF914 Eukaryotic protein of unknown function (DUF914) Moxon SJ anon Pfam-B_7017 (release 9.0) Family This family consists of several hypothetical proteins of unknown function. Some of the sequences in this family are annotated as being putative membrane proteins. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.09 0.70 -5.60 5 558 2012-10-02 19:55:49 2003-05-23 13:27:53 7 12 225 0 378 1427 836 252.40 26 71.49 CHANGED pclpch.hTK+hLIuluLGQlLSLLlTusAhTSuYLAc+.tlNsPshQTFLsYsLLsLVYTshhlaRpGscphh.hIKRKWWKYhLLALlDVEANYLVVKAYQYTolTSlpLLDCWAIPsVllLSWhFLKsRYRlh+IlGVVlCIlGVVhlVsSDVlAGcRu..uGSNPllGDhLVLuGATLYAVSNVsEEalVKsLspsEllGhlGLFGAIISuIQluIFE++-LtuIHWos-huLLalGFALsMFLLYSLhPILIKsoSAThaNLSLLTSDhWSLlIGlFlFHYKVsWLYhLAFsTIhlGLIlYSspppcpsEsscsplpshpccG.ss-sscTsc .......................................................................................................h............................................................................................................................................................................................................................................................h.....h.h.a....h...h.h....u.h...h.D..l..p.u.N.a..h....h.s..h.Ah..p....Y....T.o.l..s.S.....h.p.LLsshs.Is.....h.s.h.l..l..S.hhhL...t.h...Ra.phhphlG.lhls.l.h.G...lsh.l....s......h..u..D.......h........h......s............s.........p............p.......t..................s.....u...........s............s...............l.....h...GD.....l.l...s...lhu.A.s......l.....Yu...l.ssV..h........c.E....h...h......V........p...p.....h...s...h.....h...c...h....l..u..........h...h.....G...l..F..G...h......l......l....s....u.....h....t...h.......h.......h........h......c.....h......t......t.......h......t..h.............h.....s................t.......h......h................h...h...h..h......s.....a...s..h....s..h....h....h.h...a......h.h..s...h...h........h.....h..h...s.s.u..s.......h..s....l..u..l....L.....o.....s..sh...a....ul.......hht.h.hh...a.t...............h.p.h...h.a....l.u..h....hhh..h...hG....hh..h.......................................h..................................................................................................................................... 1 129 205 295 +5856 PF06028 DUF915 Alpha/beta hydrolase of unknown function (DUF915) Moxon SJ, Bateman A anon Pfam-B_7094 (release 9.0) Family This family consists of several bacterial proteins of unknown function. Members of this family have an alpha/beta hydrolase fold. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.57 0.70 -5.29 13 1150 2012-10-03 11:45:05 2003-05-23 13:34:06 6 5 577 4 145 1113 409 233.40 31 85.76 CHANGED psp.psstpphhPTIaIHG.uGospShstMlsplhscts.sspcsLs..hsVsp-GplphpGplsKcsppPhIclsFccNcs..uohccpupWlcplhscLpppYphpphNhVGHS.GGhuhshYhhcYusccshPplpKhVsIuusFNslps.sts.shh.hhppt.spspTshaptlhpp.ppplssslcVLsluGshpssppoDGhVshsSShoh+alhtpsucsYpcphhsGcpAsHSpLHENspVschltpFLapp ............................................................t......h.pp.hP.T.lalHGau.....uo.t...........pu.ptMlpphp.cps..s......s..........ppllp...........spV..spsGpl.p..h.p..Gp..........l.s..ps.s..ppPll.pl.t..Fcs.N+p...........sshppp........ut........a.....hppslpt.L..p.p............p.....Y.p...h...p..p..hNhV..GHShGsl...sh..s.hYh.............hp..a....s....p.........c......p...s..l....P........plp.KhVsI.....u.....u.sa...Ns...h.....................s..........s.t..........................p...................................................t.t.u....P.....s.....p...h...s......ph...........apph..hth.......c...p.hhs.p.slpVLs..IhG......s..................p..........s..........s....pSD.GtVs.sSutul+Ylltsp.sp.sYpEhph..p...G..p..s..ApHSpLH.-.N.t.pVs.phlhpFLat................................................................................................................... 0 31 73 109 +5857 PF06029 AlkA_N AlkA N-terminal domain Yeats C anon Pfam-B_13157 (release 8.0) Domain \N 26.00 26.00 26.10 26.30 25.50 25.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.48 0.71 -4.03 90 1096 2012-10-02 11:58:57 2003-05-23 13:41:40 6 11 1039 41 244 792 63 115.30 42 29.96 CHANGED slpL..sYRPPacWsthLsFhutRAlsGlEtl...........................ss..s...........YtRolplss......s.....p......................G.hlplphs.........stp......ptlplpl....sh...sthpsLt.slluRlRRlaDLDADPtsIsstL..pshhsshlstpPGLRlPGuaD .......h.hpL..sapPPaDWshhLuFLAuRAVsGVEpV...........................s-s.......h.......Y....tRols..lsp...........t........p.................................Gllssps.s....................................t.p...........cs..Lclslss............shhsshs....pslA+.h.pRLFDLcssPptlsusL..........us.Lsss..cPGLRlPGshD....................... 0 58 130 199 +5858 PF06030 DUF916 Bacterial protein of unknown function (DUF916) Moxon SJ anon Pfam-B_7106 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.00 21.00 21.00 21.00 20.70 20.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.63 0.71 -4.32 23 922 2012-10-03 16:25:20 2003-05-23 13:51:56 7 3 256 0 77 620 6 120.50 33 35.26 CHANGED FuVpsllPcNQssps..oYFDLphcPspppsLplclpN.soccplplclssssAhTNssGhlsYspss.sphD..poLcashschlchscp.....lslsspps+plslslpMPscsacGllhGGlahp-c ........................FsVpshlP.-N.Ql..ccp..poYFDLthpPsp..pQp..lplplpN.poc.cclslpls..lssAs.T.NsNGll-Yspsp..tchD.....poLp......hsls-llph..scp........l.plsscpppslshp.lphPp....c.sF.sGlllGGlhhpp...................... 0 27 62 67 +5859 PF06031 SERTA SERTA motif Bateman A anon Pfam-B_7533 (Release 9.0) Motif This family consists of a novel motif designated as SERTA (for SEI-1, RBT1, and TARA), corresponding to the largest conserved region among TRIP-Br proteins [1]. The function of this motif is uncertain, but the CDK4-interacting segment of p34SEI-1 (amino acid residues 44-161) includes most of the SERTA motif [2]. 25.00 25.00 30.30 29.20 24.10 23.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.63 0.72 -4.66 15 271 2009-01-15 18:05:59 2003-05-23 13:59:28 8 2 72 0 175 259 0 37.50 46 11.67 CHANGED lLslSLcKLpphcs.sEssLRRSVLIsNTLRRlQsElc ...lLslSLhKLp.phct...sEPsLpRSVLIsNTLR+IppEh..... 0 23 39 93 +5860 PF06032 DUF917 Protein of unknown function (DUF917) Moxon SJ anon Pfam-B_7195 (release 9.0) Family This family consists of hypothetical bacterial and archaeal proteins of unknown function. 26.20 26.20 26.50 29.80 25.60 26.10 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.88 0.70 -5.90 51 579 2009-01-15 18:05:59 2003-05-23 13:59:58 7 16 394 2 281 514 91 333.10 33 69.65 CHANGED l..otpDl-sluhGuulLGoGGGGsPahGphhstptlpps....tplcllsh.--l....sD-shllssuhhG..uP.s.......lshE+......ls.sGs.EhhpAlctlpc.hhGpc.lsulhshEhGGsNulhshh..sAuthGlPllDuDuMGRAaPphpMsThtlt.Ghss.....sP.hsls-tcG.............sssll.ss........hss..hhsEclsRshss..phGutsshuthPhsuppl+ctu.lhsTlShuhcIG+Alppuctppt..shlpsllchh...sG.......phLF.pGKls-lc...Rc.spsGFshGpsplpGhpt................pspphpl.FQNEhLlAtps.............spslsssPDLIslLDt-supPlsopp...........l+YGh+VtVlulPssshhpotp.Gl-hsG....PpsFG..h.shsatPlp .............................lstpsl-sluhGuulLGoGGGG..sPahuthhshptl.cps.....tslcl....lss.--l......s--shhhs......suhhG..A..P..s.............VhhE+........hs..pGs..Ehhcshctltc...h.........h...s...........pc.hsuhhshEh............GGsNuhhslh..sAA..ph......G...lP..llDuDsMG.RAaPph......pMsThtlt..Ghss.........sP.hslsDtpG....................shslh....p.s.............hss...ths.EplsRshss........phG..........u.t.shhuhhPhsGppl.........+phu..lhsslohuhclG+sltt.sptppt.....phhpsllchs...sG..............hhLF.pG.K....lh.-Vp........Rc.s......psG.FshGpshlpuhst...............................tspphtl.FpNEpLhAtcs........................spslAhsPDLIshlDhco.....upslsopp.............................l.+YGh+.VtVlulsssshWpotc.GlchsG....PphFG...h..chca.sl................................ 0 113 191 243 +5861 PF06033 DUF918 Nucleopolyhedrovirus protein of unknown function (DUF918) Moxon SJ anon Pfam-B_7213 (release 9.0) Family This family consists of several Nucleopolyhedrovirus proteins with no known function. 25.00 25.00 179.60 179.50 19.30 17.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.68 18 39 2009-01-15 18:05:59 2003-05-23 14:01:47 6 1 36 0 0 38 0 152.40 46 91.30 CHANGED LEFDslslDLRHVsF..stsuts-.......tEYIlFLNVKRAhYpNFplssDhSLETLAlalYpssphslsG.sphs+sssas-h....lsaNpsD+spSllI-Lss-..ARlVVAKplpssEpYHQRlSGalDFE+R....Hpps....shlc.ssstRstLDREhEIKLhp LEFDslslDLRHVpF..stsst..ss.......pEYIIFLNVK+AhYpNFplssDhSLEoLAhalYcpsphslsG.sphp+ssshs-h....lsaNcpD+NpSllI-Lsp-..ARllVAKplpssEpYHQRVSGalDFEpRHsps.........hlc.ssppRstlDREhEIKLhp. 0 0 0 0 +5862 PF06034 DUF919 Nucleopolyhedrovirus protein of unknown function (DUF919) Moxon SJ anon Pfam-B_7250 (release 9.0) Family This family consists of several short Nucleopolyhedrovirus proteins of unknown function. 21.00 21.00 21.80 21.00 19.80 19.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.95 0.72 -4.35 19 53 2009-01-15 18:05:59 2003-05-23 14:04:59 6 1 52 0 0 48 3 60.20 35 44.30 CHANGED spspsLppQLscIsptK+plsIc.pHaE+l++lTKsspElpcl-p+lhchR.cFLsaGlppF ......p.pppsLcpQLscIsptK+p.lslp.pHaE+l++ITKsspElppl-p+LhchR.pFLpausppF.............. 0 0 0 0 +5863 PF06035 Peptidase_C93 DUF920; BTLCP; Bacterial transglutaminase-like cysteine proteinase BTLCP Moxon SJ, Sammut SJ, Eberhardt R anon Pfam-B_7277 (release 9.0) Family Members of this family are predicted to be bacterial transglutaminase-like cysteine proteinases. They contain a conserved Cys-His-Asp catalytic triad. Their structure is predicted to be similar to that of Salmonella typhimurium N-hydroxyarylamine O-acetyltransferase Swiss:Q00267, in Pfam:PF00797, however they lack the sub-domain which is important for arylamine recognition [1]. 20.50 20.50 21.00 21.50 19.80 20.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.25 0.71 -4.84 28 779 2012-10-10 12:56:15 2003-05-23 14:06:43 6 1 470 5 245 627 58 155.80 33 71.69 CHANGED MssGuhTStPlGHYEFCpcpPsECssss.....tsssslpLTsphWpplhcVNtsVNpsIpPhTDh-laG.hcEhWuYP..sstGDCEDYsLhKR+hLhc.sGhPsusLLlTVVRpssG-GHAVLTVRTD+GDFlLDNLsscVhsWs-TsYpaLKRQSpscsGcWVslpcscss.hVuS .......................................................................................................s...t.t....h....p...pLtpVNphhNpplp.hs.DhclaG.....pcDYWuhP....h....s.......s....sG....DCEDaslhKhhpLhp.hGlssspLhl..o.h....V+.....s.............p.....s.......p.......u.HhV.Ls..h...t..T..s..p..u..-.......hl.LDNlssplhshs.p...................................tt............................................ 0 47 124 172 +5865 PF06037 DUF922 Bacterial protein of unknown function (DUF922) Moxon SJ anon Pfam-B_7397 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.40 20.40 20.40 21.40 20.20 20.30 hmmbuild --amino -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.14 0.71 -4.91 18 220 2009-09-11 05:34:11 2003-05-23 14:39:37 6 3 125 0 60 151 15 158.70 36 77.91 CHANGED aYuIsGpTus-L.csLuppGPhht.pupRa.GtTphchshc.hsYtpp.sGtCslssscspLplphpLP+hp..ppsss-lphhW-shhuslc+HEcsHucIA+shs+clEpslhuL..tscssCpplcthlsphssclhpcacppppcFD+lEtsstsphpthlL ................................hYsIsGpTss-L.culuppGPhlt......tp..+h.utTphphshc.hcascp.sssCpVtsspspl+hphoLP+hp..pphuPtlthhW..DshhssI+RHEcsHschA+shsp-lERphhuL...scscCpplRtslsKhhschhpspcpppppFD+VEhsNhsNh.phlL................................. 0 9 28 39 +5867 PF06039 Mqo Malate:quinone oxidoreductase (Mqo) Moxon SJ anon Pfam-B_7465 (release 9.0) Family This family consists of several bacterial Malate:quinone oxidoreductase (Mqo) proteins (EC:1.1.99.16). Mqo takes part in the citric acid cycle. It oxidises L-malate to oxaloacetate and donates electrons to ubiquinone-1 and other artificial acceptors or, via the electron transfer chain, to oxygen. NAD is not an acceptor and the natural direct acceptor for the enzyme is most likely a quinone. The enzyme is therefore called malate:quinone oxidoreductase, abbreviated to Mqo. Mqo is a peripheral membrane protein and can be released from the membrane by addition of chelators [1]. 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.68 0.70 -6.32 17 1946 2012-10-10 17:06:42 2003-05-23 14:49:47 10 3 1474 0 309 1213 419 469.70 48 94.87 CHANGED ppspsDVlLIGuGIMSATLGshL+ELcPsWpIplhERL-ssutESSNsWNNAGTGHSALCELNYTsEtsDGoIDIsKAlcINEpFplS+QFWuaLVcpGhlpsPcsFINslPHhSFVhG--NVsaL+KRapALpppsLFcsMcaS-D.-plpcWhPLMM-GRsssp.lAAThhstGTDVNFGtlT+pLhppLppp.shplpas+-VpDl+Rss-GsWpVslpshpsuppp.slpu+FVFIGAGGuALsLLQpSGI.EuKsauGFPVuGpFLtssNP-llcpHpAKVYGKAsVGuPPMSVPHLDTRhl-GK+sLLFGPFAsFSsKFLKpGShhDLhpSl+ssNlhsMLusGhcphsLsKYLlsQlhhSp-cRhssLRcahPpA+s-DWcLhsAGQRVQlIKcstcpG.GhLpFGTElVsucDGolAALLGASPGASTAsslML-lLc+sFs-+hp..pWpsKlK-hlPSYGhcLsscspLhcclpt.oucsLpL ......................................................pppcsDVlLIGuGIMSATLGohL+ELpPpWsIplhERL-ssAt.ESSNsWNNAGTGHuALsELNY...T..s.p.ps.DGo...l.-IpKAlpINEpFplS+QFWuahVcpGhlp.sP.csFIpsl.PHMSFVhG-cNVpFL+pRapALppp.sLFcsMca..o-..Dh..p..pl+cWhP..Lh...M...cG....R....c....s....s....p........l.A.A.T.th.-.tG...TDVNFGtlTRpLhp....p.L.....p.....p.....c......s..sp..lp..h..spEVpslc....+.....p.....s.....-.s.....p......WpVpl..p.s....h......p.s..G....p.t..p...phcu+FVFIGAGGu.AL.LLQKoGIPEuKchuGFP..VuGpFLhsp.N.P-llpp.HpAKVYGKAslGAPPMSVPHLDT.R.h.....l.D.GK+sLLFGPFAs..Fo...sK.....F.....LK...s......G...S...h..h...DL..hp..S..l+...s.....s....N....l.....h.....s.Ml......s......s.G..lc...NhsL....s.KYLlsQl.....h.....h..o.....c......-c.....R...h..psL+caaPp.A+.sED..W.c..Lh..p..A.G.QRVQlIKcst.cp...G....GsLpFGTEVVsupD.G.olAALLGASPGASTA.s.slMLclLc+..s..Fs-chp..pWps+l.KphlPSYG.h.p.Lsp.c.th.hcchhp.TscsLtL........................................................................................................................................... 0 65 172 257 +5868 PF06040 Adeno_E3 Adenovirus_E3; Adenovirus E3 protein Moxon SJ anon Pfam-B_7475 (release 9.0) Family This family consists of several Adenovirus E3 proteins. The E3 protein does not seem to be essential for virus replication in cultured cells suggesting that the protein may function in virus-host interactions [1]. 25.00 25.00 29.90 27.40 24.20 20.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.92 0.71 -4.18 4 61 2009-01-15 18:05:59 2003-05-23 14:54:17 6 1 40 0 0 51 0 125.00 48 70.68 CHANGED IKsEhphoau.....L.LhQPhL.sh.Qhhpt..p+TF.ll.soosSshP......LP.TNp.pophppRapRsLhpoNTTh.+TGGELRG.PTs....sPW.VsGLlsLGlVAGGL.LlLCYLYhPChoYLVVLCCWFKKWG.. ...............h.......phs.....LsLapPhl.GTYpC.pGPCpHTFsLVssTssSThs.......PETsp..tph.......l..s.oNTs..+TGGEL+s.P.T-t..hsPaEVVGallLGVVhGGhlhlLs.hYLPCas.lhlhhCWh++hG.......... 0 0 0 0 +5869 PF06041 DUF924 Bacterial protein of unknown function (DUF924) Moxon SJ anon Pfam-B_7600 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.80 20.80 20.90 21.10 20.40 20.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.34 0.71 -4.55 154 813 2009-01-15 18:05:59 2003-05-23 15:02:49 6 15 724 2 309 757 800 176.50 40 82.71 CHANGED lLcFWF.............ptstsp......p.W.FtpssshDppl+pRFts...hhp.tAspGc...Ls.......pWtss.s............pGtLAhlILLDQFsRNlaR...soscAFAs.DshALslAppAlspGhD..p..plss.......pRhFhYhPhhHSE...sls.QcpulpLapph.....ss....................tp..............................slcaApcH+clIcRFGRFPHRNslLGRpSTs-EhtFLpp.sGt ................................................................lLcFWF........................pttt.p.......t...W.F.......s......p.......s.....s......s...hDtpl+p+F..tshhp.tAtpuc.....Lt................................pW..pps..s.............pGtLAhlIlLDQFsRNhaR.......so..sc..AFA..s....DshALslApp.A.l.sp.G..h.D..p....pLsst.....................pRhFhYhP..ahHS...E...shs..Qcpu...lpLappl.......s.s.....................................t.t.......................................s.hcaAhcH+sIIcRFGRaPHRNslLGRtSTsEEhtFLppsG................................... 0 92 175 252 +5870 PF06042 DUF925 Bacterial protein of unknown function (DUF925) Moxon SJ anon Pfam-B_7663 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. This family was recently identified as belonging to the nucleotidyltransferase superfamily [1]. 25.00 25.00 25.60 25.40 23.10 23.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.08 0.71 -4.67 40 786 2012-10-02 22:47:23 2003-05-23 15:05:00 6 3 739 1 139 566 26 158.90 40 86.99 CHANGED L..ctscp....LsL.P-hhLuAGhlRshVWstLcshss..ssls.....DlDllYFDst...-hohct-tplpppLp.phhP...thsW....-V+NQARMHlhpss........sPYpSopDAlupasEpsTAlGlcls.............tssplclhAPaGL--lashplpPN.st....c.phslappRl.tKpWpppWPpLplht ..............LphhcpLtL.schalAAGhVRNhlWshLpspss.hss.s.....DlDVIaFDs....cho.cpphtlE.p+L.ppphP..................phpW..plKNQuhMHh+ssc........sPYsSopDAhS+aPEpsTAlGlRLs..............................cc.sphELhAPaGL-DlhshpV+Ps.P+....ctchplYppRltpKsWpp+WPpLph..t.................. 0 33 66 101 +5871 PF06043 Reo_P9 Reovirus P9-like family Finn RD anon Pfam-B_8265 (release 9.0) Family \N 25.00 25.00 219.50 219.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.86 0.70 -5.37 4 51 2009-01-15 18:05:59 2003-05-27 10:08:25 6 1 7 2 0 34 0 262.00 57 99.91 CHANGED MAD.pRRsFGuYpIpEIThppsQsshNssp....QssSsTpsphusp+hPlLDDGIacLlshLlcGssF-copYsGF-YSHLPsLERsFNTASsYVscpaplhhEphsLctY-hppsISlpuP-FolsLEa.hKscspuppp..EN-...shcslss+lls...........LlslhsR-pE-.hsppl.EGEsAllslhKMalpGFLhaLGcN.ssYD+QLsIEKYRPLLluIlGYEahhuhcs.pKclN+laYpLATFsNYPFslLRapLpSllssPs.IcpcItK-GLFK.IsosshhG.sppoV..hhRGIssSpSFLN.K+YRphRoRhsuNVcplIpsDhSplchss .......sFGuYpIpElhhppsQsNhNsNo....pNopsTpsphuhp+hPlhsDGlatLhs.LLcGssF-KohYpGaDYSHLPNLEpsFNTASsYVstpYcIshsEhpLcsYshscohSVh.P-FohsLEa.lKscppoDpss..cENE............phKPpT++IVs.............pLlsLhNR-p.E.hsEpl.cGEhAlIslFKLYIpGFLhHLs.N.s.......................................................................................................................................... 0 0 0 0 +5872 PF06044 DRP Dam-replacing family Finn RD anon Pfam-B_8314 (release 9.0) Family Dam-replacing protein (DRP) is an restriction endonuclease that is flanked by pseudo-transposable small repeat elements. The replacement of Dam-methylase by DRP allows phase variation through slippage-like mechanisms in several pathogenic isolates of Neisseria meningitidis [1]. 22.70 22.70 23.10 26.80 22.20 22.60 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.84 0.70 -5.39 6 296 2012-10-11 20:44:44 2003-05-27 10:14:51 7 5 224 2 16 88 13 170.80 60 96.61 CHANGED MsLaFNppLuKsYKSpSQIlRVLSEsWVt+puYCPNCGsp.lspFsNNpPVADFYCspCpE-YELKSKK...uplushIsDGAYpTMIERIpSDsNPNFFFLTY.sc-acVsNFllIPKHFFTP-hII+RKPLussARRAGWIGCNIsLsplPEuGKIFLVKDpQlh-s-pVhcpapKsLFLRppsh.pSRGWhL-IhpCIDKls.spFoLsplYpFEscL+h+aPpNNaIKDKIRQQLQlLRDKGhIEFlGRG+YRKl ......................................................................MpLaFshpLscp.ppsop+hRlhoEsWl.+puYCPsCGspPhp+F....tNN+PVADhaCspCpEpaELKSKp.......tshu..s..sls..DGAYtTMhcRlpuDsNPNFFF...............................................................psuplhp.p.VhcpappsLFLRppsh..pp+GWhltlhpCID.pl..ppFoLppMYcF.E.scLp..FspNNHIK-KIRQQLQILRDpphIEFhG.RGhY+K.......... 0 7 14 15 +5873 PF06045 Rhamnogal_lyase Rhamnogalacturonate lyase family Finn RD anon Pfam-B_8355 (release 9.0) Family Rhamnogalacturonate lyase (EC:4.2.2.-) degrades the rhamnogalacturonan I (RG-I) backbone of pectin [1]. This family contains mainly members from plants, but also contains the plant pathogen Erwinia chrysanthemi. 19.90 19.90 20.10 20.00 19.60 19.50 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.30 0.70 -5.04 13 168 2012-10-02 23:57:29 2003-05-27 10:28:12 6 17 36 0 104 163 0 162.80 35 28.74 CHANGED M...................................................p.p......VllDNGllpVTlSsPsGhlTGl+.YsGl.cNlLch..scppsRG.............YWDlVWs.sGpps.....hh-h..................lcGocFcVIsps-EQlElSFsRTas.S.csssl.....PLNlDKR...aIMh+GsSGhYoYuIhE+lpsWPulslsphRlsFKLspcK.........................................FcYMAluDsRQRhMPhssDRsssRG..psLAYpEAVhLlcPp-spa+...GEV .........................h.............t.............lhlcNGllplohopPtGhloulp..YsGh...pNllc......spt.....pspG.................YaDhsWp....uttt..........hth..................................................................................................hpssphpll.p....spp.lElSFhp.a..s.p.sp.h..............slsl-h..+........hlhhcusSGhYsYuIh-+.tthP.shsl.sphRlsFKLppcp.........................................FpYMAluDspQR.MP.sp.DR...ps..t.LuY.EAVhLspP.psph+...GpV.............................................. 0 12 62 85 +5874 PF06046 Sec6 Exocyst complex component Sec6 Finn RD anon Pfam-B_8361 (release 9.0) Family Sec6 is a component of the multiprotein exocyst complex. Sec6 interacts with Sec8, Sec10 and Exo70.These exocyst proteins localise to regions of active exocytosis-at the growing ends of interphase cells and in the medial region of cells undergoing cytokinesis-in an F-actin-dependent and exocytosis- independent manner [1]. 30.10 30.10 30.50 30.50 30.00 30.00 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.79 0.70 -6.43 38 580 2012-10-03 17:31:52 2003-05-27 10:42:54 8 14 271 2 371 521 5 457.30 20 72.59 CHANGED FpplstlhctFcctlhtlhtshlphscps.sshlVcll+Il-tEEppDpchtthpphtt......................................................tpsRsa+c+hhcslcpulpppap.....pstpthssctth.thL-plt.alhs-LhhVpptlsshaPs.caplhchahp.hYHptlpshLt................pl....sss-hsutcllslLsW.scpYts..hhsphthh..................tsplpPllsssth.ppLhccYhphltpplp-WhsplhcsEpppahp....spsP-.....................ct-G.............hatspsshphaphlppQlplA...usohpscllthslcphsphlpphppshhphlc-chcp..............................................sshh-aLlAhsNsphtst-..........hhsshppcatshls.....p.ht.thsslhsshsclsspshptllchlat.D..Lpshhscla..o....ppWhss.........pshcplssTlp-Yh.sDhpp.lp.sh.hphhhpphhcpllhpYlptlhp.......+phthp.spctpphsc+lppDspthhphFpphh........thhhsphphlphlhthl.....hps.s.l........................hh.hpslhspY.Dhshs......alpulLpsRsDhs+uphppllpphpphhtshp............slhscl .............................................................................................................tht.h.p.htptl..hh.p.h.hst....tt...thlhphhhllp...E-t..ctth.t.t..............................................................................s+ta+phhhphlp..tshptpht.................t....t.....t..........h..............Lp.ht.hhhp-L.h.stphh....hhP..capl............hphhhp.hYHpthtphlt................pl.......t.t...ht..stphht.........l.ltW..ph..Y.t...hhtp.tht...................................tpltshl....p.......ppL.ppahp..h.tphppa.htphhp.t-hp.t..a.p.....tt.Pp....................................p.pG......................hh..o..h.s.h..l.h.phl....p...pplphu.....................stshptphh..hs.lpth....thlpp..h.ppth..ph........hccphtt................................................................................thhhphhlAhhN.sp.thhp...........................h.ph...ppph...........t.................................th..tth.sshspl....tpp.......shphllp.la..D..lpshh......tcl.h...s.......pcWhss.................pshctlhsT.hpcah.t-hpp..lp....s.h..hph.hhtphtcpllhc.Ylptlhp....................................t+h.hp......tpp.....ppp.hsp+hh...p-hptlhthFpphs.........................p.h.p.hl.t..hhthl..............hps.s.l..........................h.tht..shhp.pa.sDh..p.p......altslL.thR....s.shspp.hptlht..htt................................................................................................................ 0 97 158 257 +5875 PF06047 SynMuv_product DUF926; Ras-induced vulval development antagonist Finn RD anon Pfam-B_8083 (release 9.0) Family This family is from synthetic multi-vulval genes which encode chromatin-associated proteins involved in transcriptional repression. This protein has a role in antagonising Ras-induced vulval development [1]. 25.00 25.00 40.30 40.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.34 0.72 -4.30 10 233 2009-01-15 18:05:59 2003-05-27 12:38:40 6 4 173 0 162 228 3 102.50 61 26.07 CHANGED ppshsYGuALLPGEGsAMApYlpcGKRIPRRGEIGLTSEpIpsFEclGYVMSGSRH+RMsAVRlRKENQVYSAEEKRALAhhstEERtKREscllupF+ElIc++ ......................s..hsYGpALLPGEGuAMApYVpp..G..K..RIPRRGEIGLTS-EIusFEssGYVMSGSRH..RRMp..AVRlRKENQlYSA-EKRALA.FNpEERpKREs+llusF+-hlpc.......................... 0 68 90 125 +5876 PF06048 DUF927 Domain of unknown function (DUF927) Finn RD anon Pfam-B_8364 (release 9.0) Family Family of bacterial proteins of unknown function. The C-terminal half of this family contains a P-loop motif. 20.60 20.60 20.60 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.85 0.70 -5.14 39 800 2012-10-05 12:31:09 2003-05-27 14:04:52 6 16 569 0 136 598 113 272.40 28 41.09 CHANGED FpLs.....psGlhhhthscppps.......hhlsuPltlhAcscDs..p...stsauhllp.aps.DGph.+phshstplLtus.uschhtpLlshGhs.hsspt....hptLspaLpph.....pst.pspssspsGWp....ts.........sFlhscpsh..ussstp.lhh....pstpht..................tthptpGTlcsWpcpluphstGNshLhhul...usAhuusLL...phhs.hpusshHhhGsSSsGKoTshplAuSVaGsPs........thh+SWpuTsNuLEuhAutpsDshLsLDElupscs+-..sssl.lYhluNGpGKtRuspsGss..+sspp ........................................................................................................ptpGhhh.t...tppt.s...............hhlssPltlhsphpD...t.....sss.t.hllp.......sspp..pphs.hst.shl....spcthhpLhph..Gls.lssppt......ppL.spalpt......ssh.hsphsspsGWh.....tu...................................sal.h.s.s.p.l.....hss...s..tp......lhh.......sspsht.......................th.p....spG...Th.csW..pppltp..sp.G...N.h..l.h..hul....ssu.LuusLL......p.hs...hpuh..hhc...h.hGp....SSsGKo.TshplAs.S.VaGsPs..........hhpoWp.uTpNuLEuhAutps.sh.lslDE...l....u....p..s........ss..+p.........sssh.hYsl....ssGpGKtRushsGps..+s.p.t............................................. 1 49 87 118 +5877 PF06049 LSPR Coagulation Factor V LSPD Repeat Yeats C anon Yeats C Repeat These repeats are found in coagulation factor V (five). The name LSPD derives from the conserved residues in the middle of the repeat.They occur in the B domain, which is cleaved prior to activation of the protein. It has been suggested that domain B bring domains A and C together for activation ([1]). 30.00 1.60 33.50 1.60 29.60 1.50 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.07 0.77 -5.52 0.77 -3.41 33 291 2009-09-16 13:35:40 2003-05-27 14:42:21 7 11 8 0 203 308 3 9.00 64 11.44 CHANGED oLSPDLsQp .TLSPDLuQT.... 0 0 0 0 +5878 PF06050 HGD-D 2-hydroxyglutaryl-CoA dehydratase, D-component Finn RD anon Pfam-B_8369 (release 9.0) Family Degradation of glutamate via the hydroxyglutarate pathway involves the syn-elimination of water from 2-hydroxyglutaryl-CoA. This anaerobic process is catalysed by 2-hydroxyglutaryl-CoA dehydratase, an enzyme with two components (A and D) that reversibly associate during reaction cycles. This component contains one non-reducible [4Fe-4S]2+ cluster and a reduced riboflavin 5'-monophosphate [1]. 28.40 28.40 28.60 28.40 28.20 28.30 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.25 0.70 -5.63 163 2269 2009-01-15 18:05:59 2003-05-27 14:48:43 8 13 1163 10 497 1880 94 306.50 21 44.28 CHANGED hshhsPpElltAu.sh.l...sht..l...........tht.phhh.sstppslssshCshl+s.hu...............................sshh.hschl..l..ssssC-spp+hhp..hhsphts.................hhhhchPpptpp............t......shphatcphc.c..........lhttlcphpupplsp-tl...tpslchhsptcct.hpchhcht...tthPsslss.tphh.hlhptthh.hs.tthhphhptlhschptth.cts.h.........tptthRlhlsG.sPhh.sshph..phlE.psGshllspphsh......sh+hhtshl.ts....................s.luc+hh.p.hss..sh........pp+hctltchhccaps-GllhhshphCcshs..............htthhlcchlcct.slPhlhl-sDh....pssshuplpsRlcAFhE.l ................................................................................................................phhh.pclhhAA.sh.s........sht....L.........................p.h...p.p..ht.pslstshsshl+p.hs..........................................................................................sshh..h.sshh..l..spshss.sph+hhp...hhu.tht.........................................hhhhphs.p.pt....................t...........shthhp..pchp.c........................................lh.plp.hhsp.......l..scpsl.....t.slt..l.hsptpct.lpphhchs...........tt.P.s.lss...tphh.t........hl...hsupat...h..........pphphhhs.pht..tth..ctsph.........hpsh.+h..h.hss.hPhh..s.ph.......phlE...ps....Gh.....shp....sh............hh+.h.h.thh.ts...................................l..hpphh....t.hsh................ctcht.hpphhccap....csl..hthhhpt.sp.sht.....................h.p.h.lcch.lcp...s.lPhh.l-s-.......psss.sGplhs+hpshhp.............................................................................................................................................. 0 271 417 461 +5879 PF06051 DUF928 Domain of Unknown Function (DUF928) Finn RD anon Pfam-B_8442 (release 9.0) Family Family of uncharacterised bacterial protein. 21.00 21.00 21.10 22.30 20.70 20.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.42 0.71 -4.70 25 177 2009-01-15 18:05:59 2003-05-27 15:01:16 7 5 39 0 58 178 1 184.20 25 72.24 CHANGED pptGGu...........oRGs......Cs.........tsptsLTALlP...p......ss......hGlTssppPThah....YlPtssssts...pFs....LtD-p.scplYpss..hslss.psGIlulsLP..ssss.sLphGcsY+Whhs..lhC..sspssuss......hVcGhl....pRVphssslppplppss.shcpsslYAcsGlWYDsLssLA.pL+pspPpsstltpcWppLLc..SlsL.pplustPll ..................................t.ptuuuoRss........C.............tsp.s.lssLlP...tsp................................hGhTsstpPThah....YlPtssspph........................pFs........Lh..-pp......p...p..lYpps................hs.l...s...s..p.......s..GIlslsLP......tsss..sL.......cs.s...ppYpWths..lhC....sspsp.s..ss...............hVpGhl....pR..lp.hs......sslpppl.........pp.........ss...s..........hppstlYAp....pGlWaDsLspLu.plppppPps.thtttWppLLp.....ulsL..ptlsp.sh....................... 0 3 38 58 +5880 PF06052 3-HAO 3-hydroxyanthranilic acid dioxygenase Finn RD anon Pfam-B_8515 (release 9.0) Family In eukaryotes 3-hydroxyanthranilic acid dioxygenase (EC:1.13.11.6) is part of the kynurenine pathway for the degradation of tryptophan and the biosynthesis of nicotinic acid [1].The prokaryotic homolog is involved in the 2-nitrobenzoate degradation pathway [2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.95 0.71 -4.79 5 387 2012-10-10 13:59:34 2003-05-27 15:14:34 7 5 349 8 226 543 220 145.40 44 74.95 CHANGED MhslssINlcpWVcEN+GsFpPPVCNKCMH.p-QLKVMaVGGPNpRKDYHIEEGEEaFaQLKGDMVLKVlEcGssR....DllI+pGEhFLLPuRVPHSPQRFANTVGLVVER-RhssEpDGLRaYVusoscsLaEcWFYhpDLGTQLsPlIc-Fa ..................................................slNl.pWl-ENpcLL.pPPVs.Nhpla......p.s...s..ahVMlVGGP..N.tRp.DYH......l.s.p.s................E.a.FYQhcGsMhL+.lh.-..c.....G.....ph..+.....D.l..I+EG-hFLLP.................uplPHSPtR.h.u.sT...lG.LVlERc.R..p.s.p.h.DslcWYC...s..p......C.......s......c............h......laEt.F.h...p..D..lsTplhshhpcF................................. 0 68 127 184 +5881 PF06053 DUF929 Domain of unknown function (DUF929) Finn RD anon Pfam-B_8458 (release 9.0) Family Family of proteins from the archaeon Sulfolobus, with undetermined function. 25.00 25.00 32.80 32.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.87 0.70 -5.29 18 134 2009-01-15 18:05:59 2003-05-27 15:16:42 6 2 92 0 25 86 8 229.60 41 83.04 CHANGED p+llhlslllllll.lllhlhhsph...............l..t.........spsshs....phhKlSspshussGpstlhaloW.GCPhGAssSWsLYlALp+aGsl..slphphSss........scshsshPGLlFhsa.s.....p..lpFpshY....lYsphlsto.hNGpsl....sphlthG.p.lppp.....lPstlYpllppYpsps..........s.s......Hls.sIlIoGstGoYhhsGs...ls...Pshl......Ssh.......sssYVlpp.Lcsss.......IhsuAshIpphIpc ..........................+lhYlslulLAls.lIIhuslltlppopps.........................................................hps.pstlucAlsSIspssYspVusGo.....plsKl..u.N.p..s....s..pcG..KVclaYVGuEaCPaCAhERhPLssALSRFGsF.SuL+splSSP.......tp+phuNIPTlTFcNY................................cYoSpY........Vsh...-uhEhu.DRpGc.I..................us.....LPpstpslasc..Ysstp...........................uIPFS.hhGshssssP...Sh....Phhh..............pc.....sPptVlcs.LsNPNStpAp.tIluuANLhTApICp....................................................................................................................................... 0 5 10 23 +5882 PF06054 CoiA Competence protein CoiA-like family Finn RD anon Pfam-B_8535 (release 9.0) Family Many of the members of this family are described as transcription factors. CoiA falls within a competence-specific operon in Streptococcus. CoiA is an uncharacterised protein. 20.30 20.30 20.40 20.50 19.80 20.20 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.47 0.70 -5.57 14 1177 2012-10-11 20:44:44 2003-05-27 15:38:20 6 7 1110 0 142 756 21 285.00 25 86.93 CHANGED M...L.sAhsppGphlslhpttp.p.......cppaaCPsCtuplhl.....KpGphhhsHFAHcshpsCphhtEsEStpHLtsKthLYpalpp.pc.p.................VplEhYlsElpQhsDlhlNpp...........................................lAlElQCSplshpclpcRTpuYpptGhpVhWllG....pplhhpc............................ploplppphlahsp.shshahach-ht..cph+h+ahhhpcltG...................................................+hahtpcphsht.sphhphhp........................................hsa.pp.h.ph.h.h..pht.hIppplhhtp.h.hp.pc..........chYp................pGpsllphhhtsha......................................htshp.h...p....p...pthhphpp.slchahcsa......................hhhhpcptpt.hpplhsshahhp..h ........................................M...h.hAhstptph.l.h..hp.....t.............cptahCPs....CtppVhL.....+.pG...p....hhsH...FAHc.p.h.p.s.C..p....h..h.........E..s.........ES..cHhtsKthLhp..hhpp..ps.p.........................................Vp...l.Ehh..L.......sc...l.p....Q...hsDl.h.l..sp.p...............................................hAl.El..QC.Ssls.......cplhcRopuYpphGhpVhWl..LG.............pph..hhcp................................................................................plp.h..p.h.thh...hhs....ph..thaha.ph..p....p.hhhha.hh.p......ts............................................................................................................................phah.t.......pth.....tph.plhp....................................................................................p.....h..h...h................h..l.p.p.h.........................hh...................t..h........h.............................................................................................................................................h.......................................................................................................................................................................................................................... 0 36 78 111 +5883 PF06055 ExoD Exopolysaccharide synthesis, ExoD Finn RD anon Pfam-B_8604 (release 9.0) Family Among the bacterial genes required for nodule invasion are the exo genes. These genes are involved in the production of an extracellular polysaccharide. Mutations in the exoD result in altered exopolysaccharide production and defects in nodule invasion [1]. 22.10 22.10 22.70 47.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.96 0.71 -5.18 82 395 2009-01-15 18:05:59 2003-05-27 15:49:10 7 1 292 0 170 394 61 182.00 27 87.18 CHANGED pslsshLcplt....................psppsph..olt-llcshGc+uFusl..lllhuLhhlhPls..lPGhs.olhulhlhlluhQhlhGRcphWLPphltcRplspcplppslpphpshsph.l-+hh+s.RLphlssssh.hplhsuhslhlsshh.hL.h..lPFssslP.uhAlslhululhsRDGllhllGhlhshsshshhshlh .......................t..hsphLpphh................t...ptpsspl..olt-llstlucRu..FGhl..lllhuLPshlPls...PGho.slhGlslllluhQhhhGpc.psWLPphltc+slpppplpthlpphtshlpp.lE+hh+P.Rlthlssshs.tpl.hGlhlhlhulhlhLP....lP.h.oNhlP.uhulhlhulGllpRDGlhhlhGhlhshhshshhhhh.h............... 0 48 110 136 +5884 PF06056 Terminase_5 Putative ATPase subunit of terminase (gpP-like) Bateman A anon Pfam-B_7152 (release 9.0) Family This family of proteins are annotated as ATPase subunits of phage terminase after [1]. Terminases are viral proteins that are involved in packaging viral DNA into the capsid. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.65 0.72 -4.41 16 859 2012-10-04 14:01:12 2003-05-27 16:53:51 7 16 638 0 101 1339 101 56.20 37 11.50 CHANGED -sRcpAphLYapGapss-IActLshp.stTVauWpcR-pWcshssl.pplptulcsRlsp ..................-sRppAt..hL.Y..a....Q...G....a....ps.s.cIAchLsh...........p..ssTVauWKc.RDp.WcchsP..l..pphp.shtuRhhp........................... 0 19 53 78 +5885 PF06057 VirJ Bacterial virulence protein (VirJ) Moxon SJ anon Pfam-B_7524 (release 9.0) Family This family consists of several bacterial VirJ virulence proteins. VirJ is thought to be involved in the type IV secretion system. It is thought that the substrate proteins localised to the periplasm may associate with the pilus in a manner that is mediated by VirJ, and suggest a two-step process for type IV secretion in Agrobacterium [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.15 0.71 -4.52 12 339 2012-10-03 11:45:05 2003-05-28 10:33:45 6 5 263 0 92 319 18 180.10 39 47.26 CHANGED pDphAlhhSGDGGWR-lD+clustLQppGlPVVGlDSLRYFWsERoPppsAsDLsRlIchYpc+WpsppVlLlGYSFGADllPtsaNcLPssp+spVp.huLLuLu+psDachplpGWLGhss-Gtt.s.ss-ltplssshl.ClYGp--cD.ssCPsLctpss-sluhsGGHHFDtDYtsLAcpllsuhcsRh .........................DhlslhhSGDGGW.R.-lDKsluptLpppGlsVVGlD..........S..L..RYF.W......s....c+....oP.....pph....AsDLsclhppYptcWttp...cl...hL...lGYSFGADlhPhsascLsss.psplphlsLLuhu.ppusFplclpGWLGhssct....sssPtlspls.s..s.hlhClYGp-EcD...shCs..s.......pt.p.shchlpLPGGHHFDpc.Y.slAptllpthpt.t................................................................ 0 19 38 64 +5886 PF06058 DCP1 Dcp1-like decapping family Finn RD anon Pfam-B_8271 (release 9.0) Family An essential step in mRNA turnover is decapping. In yeast, two proteins have been identified that are essential for decapping, Dcp1 (this family) and Dcp2 (Pfam:PF05026). The precise role of these proteins in the decapping reaction have not been established. Evidence suggests that the Dcp1 may enhance the function of Dcp2 [1]. 20.60 20.60 23.00 20.90 20.50 20.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.34 0.71 -4.07 9 430 2012-10-04 00:02:25 2003-05-28 11:45:17 8 7 272 7 284 392 3 112.70 32 29.81 CHANGED hspttppl...sLssLpphDPYIpcIlDhuuaVAlYpFssptscWpKs-lEGThFlYpRuspPhauahIhNRhshcshlEslspshchplpssFLlYR....ssp..pIhulWFYspp-sp+IhpLhppll ..............................................t...tppl...sLssLpRh.DP...h.IppIlph.uupsslYpFs.p......s.s....p.WpK.p-..l.EGoLF.l.....h.....p....R.......s.........s....p........P..........p.......a...shhlhNR...hs...h..........c.Nhscs.l..........sp..s....h.-...hp..lpt..s....a..lla+.........................ssp....pl.hulWFaspp.-pppl.thh................................................... 0 76 137 220 +5887 PF06059 DUF930 Domain of Unknown Function (DUF930) Finn RD anon Pfam-B_8283 (release 9.0) Family Family of bacterial proteins with undetermined function. All bacteria in this family are from the Rhizobiales order. 25.00 25.00 32.90 45.10 19.90 19.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.23 0.72 -3.77 20 145 2009-01-15 18:05:59 2003-05-28 11:50:12 7 1 106 0 50 101 3 99.30 44 51.86 CHANGED ltpLssppRlpQlCssEAhcplt+spssapPDplsuashssshhsGsslcAsGAAFRSpspWYclsF+Ccsss-shsVhSFsaplGctIP+ucWschtL..hs ....Lc+L-PssRLEQtCDlEAME+Is+-pstapsD+VlAYuaucPhhptNolcAsGAAFRS+pcWY+LuapCcsDs-phslpSFuYtIGsclPp-pWc++hLh.P.. 0 7 23 32 +5888 PF06060 Mesothelin Pre-pro-megakaryocyte potentiating factor precursor (Mesothelin) Moxon SJ anon Pfam-B_8552 (release 9.0) Family This family consists of several mammalian pre-pro-megakaryocyte potentiating factor precursor (MPF) or mesothelin proteins. Mesothelin is a glycosylphosphatidylinositol-linked glycoprotein highly expressed in mesothelial cells, mesotheliomas, and ovarian cancer, but the biological function of the protein is not known [1,2]. 17.50 17.50 17.60 17.50 17.20 16.80 hmmbuild -o /dev/null HMM SEED 625 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.30 0.70 -13.09 0.70 -6.47 3 103 2009-01-15 18:05:59 2003-05-28 12:04:28 7 2 42 0 65 83 0 409.20 29 80.15 CHANGED MALPTARPLLGSCGSPICSRSFLLLLLSLGWlPhLQTQTT+TSQEAALLHAV.sGssDFASLPTGLFLGLsC-EVSGLSME+AKELAMAVRQKNIsLpsHQLRCLARRLP+HLTsEELDALPLDLLLFLNPAMFPGQQACAHFFSLISKANVDVLPRRSLERQRLLsuALKCQGVYGFQVSEADVRALGGLACDLPG+FVA+SSEVLLPWLAuCsGPLDQuQpKAVREVLRSGRsPYGPPSTWSVSTLDALQGLLsVLDESIV+SIPKDVlAEWLQ+ISRDPSRLGSKhTVlHPRFRRDsEQKACPPGKEPpcVDEsLIFYQNWELEACVDGTLLAsQMDLVNEIPFTYEQLSIFKHKLDKTYPQGYPESLIQQLGHFFRYVSPEDIRQWNVTSPDTVKTLLKVSKGQKMs........AQVIALVACYLRGGGQLDEDhVKALssIPLSYLCDFSPQDLHSVPSSVMWLVGPQDLDKCSQRHLGlLYQKACSAFQNVSGLEYFEKI+TFLGGASsEDLRALSQHNVSMDIATFKKLQVDALVGLSVAEVQKLLGPHIsDLKTEEDKSPVRDWLFRQ+QKDLDoLGLGLQGGIPNGYLVLDFNVREAFSSsAsLLGPGFVLsWIPALLsALRLS .....................................................................................................................................................................................................................h..h...hh..h...................C..hh..h.t.thp.h..t..s....t.t.l..tuhtC......t....h.t....................lsttph.h.........LGsLsC....sh.sst.l......t....us.....lL..tL.tC....ls.sQt.uhp..hl.ttts.hG.Ps.Ws.tslptLt.L..h........ht..........h...h.th.........p.........th........hh......h.h......t......t.............t.................t.....h.........................t........h..............h..p........t.t......tt.............h......s.............s..t..l...pp.p...h..h.h...Ypp......p.hcs........ClssthLtsp.hs.l...th.sh.shpp.plhKt..KLsphY..Pp..G.hPEs.lphls.hhhhh.o..p-ItpWslTS.-TlhuLLt.s....s.......p...............spstsllt+a..lttt.G.p.lstshh....h..h.s.t.hLC.hs.pplpsl...sp.hh.h.st.s..clssCs..pphslLas..KAc.A...F......t.....s.......s..t..s.....t.Ya.hhpsaL................G................G.As...........h.....p-LptLuptNlsM...DhsTFhpLp.p.l...LsltpVptLLG.pl.sLpt.cpps.lptWh.p..pptLst.LGls........................................................................................... 0 4 14 26 +5889 PF06061 Baculo_ME53 Baculoviridae ME53 Finn RD anon Pfam-B_8086 (release 9.0) Family ME53 is one of the major early-transcribed genes. The ME53 protein is reported to contain a putative zinc finger motif [1]. 25.00 25.00 121.70 66.10 23.40 20.20 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.36 0.70 -5.45 21 63 2009-01-15 18:05:59 2003-05-28 12:06:23 6 2 54 0 0 62 0 311.00 28 87.23 CHANGED +DLRh+FLSpps+plhpull+FAoNYVpGhhpspshh.hsp.t.hphc......hsppopCs.tCtt+F+csschh....Lasllcp.h...s............ssp.s+FchsCpcCtppht.....phssaplYPplsLtslccLsctGFhppYlFPlchchppppcphhhhst....cshacslcpIlpc.KcsNEpIhpIsLp..ThGtllhpEs.psshlc+hps.h.t...ss-ls..hhsssSphhphlpspt.ps..hTYFhEVht+sapph.hs..alsahs+sstth.....CphC+.sKhY..cps....PVLaCS+CGFTsshaFp.....p...hsslhap.csVp..spp...hpsptlhYYDlshat ......................pDLRh+FLSppppplhpuhlcFAssYlpGhhp.pph..hth.t....p.......hhppopCp.pCptpF+pspp.........hLasllcp.h................sspss+F+hsCtcCtpphph........hslhELYPslsLtsVc+LsphGFlp+YlFslshshphpp+phhlhsh....pclapslppIlpp.KpsNEpIppIpLp..ThucllhcEshpshhlcptpp.......ss.lp..hh.tsSphhphlpstp..s..hTYahcVhh+happh.hs...alsaashsspt......CphCp.sKhY..psp.....PVLaCSpCGFTsshaFp.........h.pphtYh.ppVp....hpp.....h+sp..tlhYYDhphh................................ 0 0 0 0 +5890 PF06062 UPF0231 Uncharacterised protein family (UPF0231) Finn RD anon Pfam-B_9027 (release 9.0) Family Family of uncharacterised Proteobacteria proteins. 25.00 25.00 25.00 25.00 24.50 24.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.54 0.71 -4.23 38 786 2009-01-15 18:05:59 2003-05-28 12:11:05 6 3 779 0 93 332 7 116.70 58 95.91 CHANGED M-YEFp+s.lsGphhAchSMsHEslGpWhsEElupshpplsplhstIpplcsuptt..-hpLhGpEhoLhlss-ElhVpAN.....sLth-..p-.EhE-shphYDpESh.AhCGLEDFpphLpuWpsFlp ............MDYEFLRD.lTGsV+VRhSMGHEVVGHWFNEEVKcN.LuLLDEVEpAA+slKGSER...sWQpsGHEYTLWhDGEEVMV.RAN........QL-Fu...GD...EM..EE.G.MsYY...DEESL...SLCGlEDFLpVlsAYRsFl.................... 0 8 29 63 +5892 PF06064 Gam Host-nuclease inhibitor protein Gam Finn RD anon Pfam-B_9063 (release 9.0) Family The Gam protein inhibits RecBCD nuclease and is found in both bacteria and bacteriophage [1]. 25.00 25.00 34.10 34.10 20.20 18.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.33 0.72 -3.89 2 349 2009-01-15 18:05:59 2003-05-28 12:30:04 6 1 243 4 3 150 1 94.40 91 97.43 CHANGED MNAYhh.DRlEAQsWsRHYQQlAREEKEuELADDhEKGLs.HhhESLChDcL.RHGAsKKuIoRAFDDDVEFQERhuEalRYMsEThu+HQlDI-SE. .........MNAYYIQDRLEAQSWARHYQQIAREEKEAELADDMEKGLPQHLFESLCIDHLQRHGASKKAITRAFDDDVEFQERMAEHIRYMVETIAHHQVDIDSEV............... 0 0 0 1 +5894 PF06066 SepZ SepZ Finn RD anon Pfam-B_9064 (release 9.0) Family SepZ is a component of the type III secretion system use in bacteria. SepZ is a gene within the enterocyte effacement locus. SepZ mutants exhibit reduced invasion efficiency and lack of tyrosine phosphorylation of Hp90 [1]. 22.70 22.70 22.80 72.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.19 0.72 -4.00 4 76 2009-01-15 18:05:59 2003-05-28 13:21:08 6 1 53 0 1 38 0 96.70 75 99.39 CHANGED M-AANLSPSGsVLPLAsoINGNssVDEsTGVMpPENGssRslRlhAGlALusoALAAlGsGIAuhCs-suSpp.hLGLGIAuGVLGGlTulGGuLAMKYA .M-AANLSPSGuVLPLAATINGNsSVDEpTGVMpPENGsSRslRIlAGlALuTTALAALGTGIAhACopsuSop.aLGLGIAoGVLGGVTAlGGGLAMKYA 0 0 0 1 +5895 PF06067 DUF932 Domain of unknown function (DUF932) Finn RD, Pollington J, Bateman A anon Pfam-B_9083 (release 9.0) & Pfam-B_002957 (release 23.0) Family Family of prokaryotic proteins with unknown function. Contains a number of highly conserved polar residues that could suggest an enzymatic activity. 20.10 20.10 20.40 20.10 20.00 19.60 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.48 0.70 -5.06 43 1425 2009-01-15 18:05:59 2003-05-28 13:23:55 6 4 644 0 189 1175 404 205.70 51 80.09 CHANGED sLushSsRYphlpspplL.....psLpccu.t...shhpotutl+s.GR+has..hhRhtcsspl.......ssthss.llLhsSHDGosuhphhhsshRlVCsNTLshu..usssu..sl+V.H..psssshclt-utpplt.h..pta-phptphcthtphplsps-tpsahpsslshp..................s.tpstpssstshsplhphhcscsp........psohWushNuVsEals+tths.Rss...............cp+tspultGh..sspLpptta ...............................................................HpSRSc.+YsYIPTIslL........csLpcEG.FQ.....PFFACQoRVRD....GRRtaoK.HMLRLR.R.supI.........sGpclPEIILLNSHDGoSSYQMlPGhFRhVCpN...GhVC.....GpshG........ElRVPH..+Gsl..V...t.c..VI.E.G..AYEVl.............u.....lFD+lp-pt-AMp.....slhL..ssst..ppshApAALshR...............................Ys......-c+pPV..Tssp.IL..o.P.RRpEDh.............tpDLWosaQplQENh.lKG.Glo..GRSA.........pG........++h+TRAlpuIDsDl+LN+ALW............................................................................................................... 0 37 102 151 +5896 PF06068 TIP49 TIP49 C-terminus Moxon SJ anon Pfam-B_9170 (release 9.0) Family This family consists of the C-terminal region of several eukaryotic and archaeal RuvB-like 1 (Pontin or TIP49a) and RuvB-like 2 (Reptin or TIP49b) proteins. The N-terminal domain contains the Pfam:PF00004 domain. In zebrafish, the liebeskummer (lik) mutation, causes development of hyperplastic embryonic hearts. lik encodes Reptin, a component of a DNA-stimulated ATPase complex. Beta-catenin and Pontin, a DNA-stimulated ATPase that is often part of complexes with Reptin, are in the same genetic pathways. The Reptin/Pontin ratio serves to regulate heart growth during development, at least in part via the beta-catenin pathway [1]. TBP-interacting protein 49 (TIP49) was originally identified as a TBP-binding protein, and two related proteins are encoded by individual genes, tip49a and b. Although the function of this gene family has not been elucidated, they are supposed to play a critical role in nuclear events because they interact with various kinds of nuclear factors and have DNA helicase activities.TIP49a has been suggested to act as an autoantigen in some patients with autoimmune diseases [2]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.24 0.70 -5.75 16 859 2012-10-05 12:31:09 2003-05-28 13:25:51 8 13 383 10 602 1792 868 352.50 52 84.15 CHANGED RIusHSHI+GLG......LD-sh................................................................pscpsup.........GhVGQhpARcAAGVIlchI+ptKhuGRulLlAGsPGTGKTAlAluIucELG.pcsPFsslsGSElYSsEhpKTEsLhQAFR+uIGlRIKEppEVhEGEVV-lph....-pPhs...spshptsplsLKTschppphcLusclhEsLpKE+VpsGDVIhI-psoGplp+lGRS.stup-aDl..psscaV.hPcGElpK+KElVpsVTLHDlDV.hNuRs...QshlulF....pscpsEIpsclRppINchVscalEpGpAEIVPGVLFIDEVHMLDIECFoaLNRALESshuPIllhATNRGhspIRGT.DhhSPHGIPhDLLDRlLIIsThPYsccEI+pILcIRspEEclpls--Ah-hLscIGpcoSLRYulpLlssApll .....................................................................................................................RluuHSHI+GLGLc.p.s.h............................................................................................p..sc...s.up.......................Gh.VGQ..pA.RcAu.Gll.l.....c.hI......+p.......tK.......h...A..G.......R...A...l...L...lAGsPG......TGKTAlAhuluQp...LG...scsPFsshsGSEl.aShEhpK....................TE.s.Lh.ps.FR+uI.GlRIKEp.pElh....EGEVsElp..............-p.....shs.........spsh.ppshl..sL.KT..s.chpphhcLssphh-ulpKE+VpsGDVIhI-tsoGtlp+lGR.S...s..h..u..p-..aDh...............tsp.c.aV.hPcG-lpK+KElV.psVTLH-lDV.h.NuRs................Qsh.luhh....pscpsEI.psclRppINp....h..Vscah-pGhAElVPG....VLF.IDEVHMLDIE...CFo..aLNRALES.sh...AP.I.V..Ih.A..oNRGhspI...RG............T.....s............h..pSPHGIPlD.LLDRllI.Ip..T.sYstcElc.pIlp...........IRupp.Eslplsp-.A.lshLs.clGt...coSLRYAlQLlosApl.l............................................................. 0 228 347 506 +5897 PF06069 PerC PerC transcriptional activator Finn RD anon Pfam-B_9117 (release 9.0) Family PerC is a transcriptional activator of EaeA/BfpA expression in enteropathogenic bacteria [1]. 22.50 22.50 22.80 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.02 0.72 -3.95 4 532 2009-01-15 18:05:59 2003-05-28 13:31:51 6 3 323 0 17 287 0 72.20 37 68.03 CHANGED M.l+D+lAcpLEtKGhYRRAA-RWucVMl.lp.oDtcR+hss.+Rs.ClpKup+sPls.s.Nho-l+pAVs+sapcMGlshsscclFRpYpss ...........h.....phApcLEu+GhaRRAAshWhtshtpsc.o-spRp.hhtRRppCLpKuu.......+......shs.......................................s..................... 0 1 1 8 +5898 PF06070 Herpes_UL32 Herpesvirus large structural phosphoprotein UL32 Finn RD anon Pfam-B_9125 (release 9.0) Family The large phosphorylated protein (UL32-like) of herpes viruses is the polypeptide most frequently reactive in immuno-blotting analyses with antisera when compared with other viral proteins [1]. 25.00 25.00 55.60 55.40 19.40 19.00 hmmbuild -o /dev/null HMM SEED 839 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.42 0.70 -13.57 0.70 -6.08 9 59 2009-01-15 18:05:59 2003-05-28 14:01:11 6 2 25 0 0 58 0 674.30 31 98.07 CHANGED Ms.....LsFltLs++sVspLspFLcsL.pcssVDLcpHP+llpcC...stcpLpRcosLaNpLlLWLtYYcpLph+pPDhpsLhc-hchptsslschspthshhp.spuhscLsslspssFts-hpscscll.pshhs...LA+h...tttp.lshG......hsFlNL+sc-spclccNLpssppNMhphpslcls..pptNssLVsslNKLlYLG+lllslppSWpcLpctCLs+Ipphp+pLl+pl+pshuFsusYspNlLc+sV.-GpospslLchLtEDasIacsu...............LEasD................................................................................................................................shpSpp-cc--.sssp..shh.................................................................................s.ptssh.sp-Esssccc.t.tu.tst....sppp..soh..Dlpsuspsst.sss.tsspothh.E...........................Pttustss.s.ph.u.....s.p.Phhuss..ssphsP.p..........s...thshtsshh..........hss.s.hs.s..shstshsshsshchpspsth.sp.p.PsspcsosssophssspR....tsps......p.sspspEshss..pctpsoDspsVhphtph..t..h.......sspssp+ucpphhht.phssstssss......spspPshs.h..sssssshsphhp.s.....h.ss..ph+shtsh.spshss.sosl+P.shssts...sssptlssspsssssttsthPuutus.........................p..p..............st.thpss.utph.....................hpssshscl.tthsthshtssssus..hp...........s.hopthsuss..........................................stsshsthtsspss--sltsllp+hptctpp........................pc .........................Mu.LtFltLp+psVstlhpFLppL.ph.slsLptHP+lltcC...ttppLpRcohLaNchhLWLtYaccLph+pPD...hpslLpchc.ptstlsphs..hshP.hs..thcuhsclslh-cs...atsDhh.cs-l.l.+ul.s...LA+h...ph.p.hspuh.....huFlpL+sp-Vpclt-NlcsstpNhhhhcslclp.shp.ssshV.shN+LlYhG+LhhslppSWppLpchCLsRIpphp+hLltth+.p..uFspsYspNlLc+sV.sGsoA.sLLchL.EDFtlahsu..................LcasD.t.......................................................................................................................hhhpSt.-scs.Dss-..............................................................................................ttssh..t-..Esssptt..........................D.tss.tph...........................................P.hu.tssss..t.ssh.s.sstppshssst..shphpP..................h..ssh...........hss.t.................phphsspsps.sp.p.Psp.pssss.ou.psstt.....shu....tsctsspcpEt......c.s.puoDstpVhphtc..............ssphspp....up.phhhthphsp..Ttss......sppcP.ht.hh.ss.ssshsphhtss..............php.tp.h.sss.ssssohl...shssth...sp.p.l.ssttsps.t.st.Pputsp.........................p..p.s.....p.......ss..hhsp.usph..........................hpsh..shpth...psstts...............s.hotthtut.............................................................................t............................................................................................................. 0 0 0 0 +5899 PF06071 YchF-GTPase_C DUF933; Protein of unknown function (DUF933) Moxon SJ, Studholme DJ anon Pfam-B_10000 (release 9.0) Domain This domain is found at the C terminus of the YchF GTP-binding protein (Swiss:O13998) and is possibly related to the ubiquitin-like and MoaD/ThiS superfamilies. 20.10 20.10 20.70 20.50 19.70 19.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.65 0.72 -4.19 44 5012 2012-10-03 10:59:06 2003-05-28 14:04:36 8 16 4777 7 1296 3379 2253 82.60 63 22.65 CHANGED LtTaFTsGscEVRAWTI+pGspAPpAAGlIHoDFE+GFI+AElluY-Dhlph.s...oEstuKEsGKhRhEGK-YlVpDGDllpF+FN ..........LtTYFTAGsc.EVRAWTI+pGspAPQAA.GlIHTDFE.+GFIRAEslu.Y........-Dhlp.h....s......uEpu..............A..KEAGKhRhEGK-YlVpDGDlh.pFRFN.................. 0 463 849 1106 +5900 PF06072 Herpes_US9 Alphaherpesvirus tegument protein US9 Moxon SJ anon Pfam-B_7164 (release 9.0) Family This family consists of several US9 and related proteins from the Alphaherpesviruses. The function of the US9 protein is unknown although in Bovine herpesvirus 5 Us9 is essential for the anterograde spread of the virus from the olfactory mucosa to the bulb [1]. 22.40 22.40 22.40 44.00 22.20 22.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.01 0.72 -4.14 10 46 2009-01-15 18:05:59 2003-05-28 14:28:17 6 1 28 0 0 46 0 57.90 49 47.07 CHANGED CYYSESDNETAs-FL+RlG++Q.spR+..RRRpChlsluhlhsslllCslSuhLGulluhhl CYYSESDsETAs-FLhRhGRpQ.shR+..RRRRshhsluhlhsslllsslSuhlGulluhhh.. 0 0 0 0 +5901 PF06073 DUF934 Bacterial protein of unknown function (DUF934) Moxon SJ anon Pfam-B_8947 (release 9.0) Family This family consists of several bacterial proteins of unknown function. One of the members of this family Swiss:Q8YEW3 is thought to be an oxidoreductase. 25.00 25.00 25.20 25.10 23.70 23.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.45 0.72 -4.36 75 595 2009-01-15 18:05:59 2003-05-28 15:08:50 7 3 527 0 191 481 128 109.70 38 66.70 CHANGED GVhlss---sc.pL....ts..pLspl..sllAlsFPsFsDGRuaSpApLLRcRhGapGELRAlGDVLhDQlhaMpRsGFDuFslcs....stshpsAhc.shscFosh....YQsuss.....pstsha..pRp .....................uVhlssc--sp...sL....ts......cLspl..slIulpFPsFsDGRuaShAcLLRpRhGapGE..LRAhGDVLhDQlhahpRsGFDuaslcs....cpshc....c....shc....sh....scassh....YQsust....t..s............................. 0 42 107 148 +5902 PF06074 DUF935 Protein of unknown function (DUF935) Moxon SJ anon Pfam-B_10021 (release 9.0) Family This family consists of several bacterial proteins of unknown function as well as the Bacteriophage Mu gp29 protein Swiss:Q9T1W5. 20.30 20.30 22.30 21.70 19.90 20.10 hmmbuild -o /dev/null HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.43 0.70 -5.79 54 588 2009-01-15 18:05:59 2003-05-28 15:19:29 7 5 459 0 91 595 77 428.60 21 92.12 CHANGED M.sp...........s.ht.....t.ttphpsph..tthshhtt.h...psupGlo.sschssILcsstpGs.lps.t-Lh..h.t+DsHltuplppRKtAl.....hul-Wplpssss.sstcp+hA-hlc-hlpchs...........hccllh-hhDAlhhGaushEl.hW..tpps...shahspslph+s.caFphsppst..........cl+hpsss.......Gp...L.shpalh+pp+ucss..shttGLh+hlhWsahFKphul+.aspFhEhYGhPhtlGKYss.uAspp-+ss....LLcAltslspsAuulIPcsMplEhhcAus....uusssapphlcas-pplSKAlLGQThT..op....sssuspAtuplHs-VRcDlhpuDsc.lspTlsptLltslhtlNh...tsstphPphph.ppEstDhpt.......hu-shppLssh.Ghchst...pahp-chsls.sptspsshshsstss............stth.........................................stttt.t................pstlDshhsph............hsphpts.hcshlpslhshlppusoh--hhppLh.phh.sphsssphtchlucuhhsAclhGphss ..................................................................................................................................................................................................................................................................................................hh....p...hhpt.tts..........plh.....p...DspltuthtpR+tul.....hshphpl.ssps...........t.tchlpchh..pph...................hp.ch.lhphh.-ulhhGaushEl...a..............thps.........th.hh.ttlhh+s....p...h.F...t...hs..p.s.t...................h.hphp.ss....................G..........l..s...hta.lhth.hp.upts.....t.uLhphhha.hhhKphulc.ahpFhEh..a..G..h..Phh...lGKhss...ss.....spp-hpp..............L...hpu...ltp...h..s.ps..u..u.ul.......lPp.............s..............p.............l..-....hhpsss......sssssa..p..ph.hcat-pplu+slLGpsl..T..op....spsuohAhupsHp-.....Vpp-lhpuDsc.lttslsptll............h.......hhtlNh.........sspthP.ph.htt...p...-s.tD.....hpt............hupth.p.L.s.s.h.G.h.p.lst.....pal....pcph.slststtt.p.thht....s......................................................................................................................tp..hpthhtth...........htt..ht.hht.h......th...ht....t.ts..tth.......t......tl.............t.h..phs.tth.......p.ht.hh..uph.G.................................................................................................................................................. 0 39 71 83 +5903 PF06075 DUF936 Plant protein of unknown function (DUF936) Moxon SJ anon Pfam-B_10047 (release 9.0) Family This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 20.10 20.10 21.60 20.80 19.30 19.20 hmmbuild -o /dev/null HMM SEED 580 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -13.03 0.70 -5.59 13 173 2009-09-11 12:47:55 2003-05-28 15:22:28 7 4 21 0 122 161 0 420.20 24 96.01 CHANGED LsPGVLhKL.Lp..sMsos.lK..ssGEHRSu......LLQVhsIVPA.Luuu...-LaPspG.FalKlSDSoHSsYVSLss.-csDLILSsKlQL.GQFIaV-+...L-suoPVPll+GlRPlP.GR+Ps.VGsPcDLhusc..s.hpt.........................................................shppp+tssssRphshssss+p......................shsuSPssht.h.hshp..................phsptpsuhhhsup..hptsSPsstppsss........t...................Guuph+Kpssshst............stpsps+s.stppssttpsssspsPs+sp............ptth.tthpchshsssspssspospppss...t...................p.hphsstslsWsSLPssLs+LGK........EshcpR-sAthuAlcALcEAoAsEsll+sLptFoELop..suKt-sPtssl-pFLphappltpsphh.hpSlststsssstsp.......................pppsuhp.lpu...AlsTshsshslh...........................................ppp.sshpph..sth.pptppspsstpt..............ptsGls-oscLAppLppEupsWFLcFlEcuL-pG.hctp.............ptpssuclut......hLSQLKpVscWLDplsssppp..................-pl-pLR+KlYthLLpHVposAuulss ....................................................................l.sGlL.+l.lp...t.pss..p......sphRss......lLQVhtllPh.lstt.......p.h...tp.G.FhlplSDu.+uhYsp.s..tpt-hll.ssp.pl.Gphhalp+...l..p....u.PVPhhhsh+sls.tRt...h..hGs..Ptshh.........................................................................................................................................................................................................................................................................................h................t.............................................................................................................................................................................................................................................................t.........slss..l...up.....................thhp.+p.A..huhpu.p-A.hsp.llp.hp.auplp...ssp.tss..hhp.tFhth.t.h.p..h................................................................t......tt.t.....................................................................................................................................................................................................tthtt.hpLt..l..-.ttWFhtalEthL-.t..th......................ttpht.............hhps.pWlp..................................................................s............................................................... 0 14 78 104 +5904 PF06076 Orthopox_F14 Orthopoxvirus F14 protein Moxon SJ anon Pfam-B_10072 (release 9.0) Family This family consists of several short Orthopoxvirus F14 proteins. The function of this protein is unknown. 21.90 21.90 22.40 125.40 21.00 17.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.50 0.72 -3.92 2 33 2009-01-15 18:05:59 2003-05-28 15:26:25 7 1 19 \N 0 20 0 72.90 86 100.00 CHANGED MKH+lYSEGLpISsDhNSIIuQ.ST.DhDIEIDEDsIhELLNILTELGCDVDFDEsFSDIsDDlLESLhEQDh MKHRlYSEGLuISsDLNSIIuQQS....ThDhDIEIDEDDIMELLNILTELGCDVDFDENFSDIADDILESLIEQDl 0 0 0 0 +5906 PF06078 DUF937 Bacterial protein of unknown function (DUF937) Moxon SJ anon Pfam-B_7321 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 24.40 24.40 24.40 24.80 24.30 23.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.90 0.71 -3.94 89 984 2009-09-10 15:56:24 2003-05-28 15:43:14 6 6 888 1 262 664 43 125.00 30 71.04 CHANGED l-plhu....t..htt............................hsthlGh.....................................................ssspsssuh..ssh..lssll.......uuh......ttt.sts..................................sGhsu........................Lhstlppssh......................................ssthtshlu..su.s.sh.............usshlsplhsu....psshsplupp...oGlst..spltphLshhhPhllshLssp .................................................h.........................................................................................................................................................hstl..l.........Guh.....tt........t..ts...................................GGlps........................LlppL.ps.u..G.L....................................Gshl...s...o..Wlu....ptsNpsl...............uscp.Lpssl.G.....ssslssLup.+.........hGlss.spssshLuphLPpllDtLoPp............ 0 53 152 207 +5907 PF06079 Apyrase SHAPY; Apyrase Moxon SJ anon Pfam-B_7593 (release 9.0) Family This family consists of several eukaryotic apyrase proteins (EC:3.6.1.5). The salivary apyrases of blood-feeding arthropods are nucleotide hydrolysing enzymes implicated in the inhibition of host platelet aggregation through the hydrolysis of extracellular adenosine diphosphate. [1]. 20.30 20.30 20.50 35.50 19.70 19.40 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.87 0.70 -5.26 20 238 2009-01-15 18:05:59 2003-05-28 15:48:53 6 4 129 8 104 237 2 249.00 43 77.24 CHANGED LDpsStspcct..soWtShl++GpLphsssppslolcW-pt.pltlpS+huhKGRGMELSELlsFNGKLYolDDRTGllYcIpcs..cllPWVILsDGDGsssK...............GFKuEWsTVK-ppLYVGShGKEWTTosGphhNpNPhWVKsIshcG-VpphNWhspYcplR.sAhGIpsP.GYlIHEussWSsptppWaFLPRRsSpEpYsEptDE+pGsNhllousEsFpclcslcls..shsPs+GFSSFKFlPsTcDplIlALKopE.....csGp.sATYlosFsl-G+lLLsEp+I.sDsKYEGlEFl ................................................DptSt..t.p.tp..poahShl+hG.Lphs.......s...............s.t.plslch-.t...phhlpo.....phu.pGRGMELSELlhFNGKLYohDD+TGllac..l..ct......s.........c.........hlPWVILs-GDGs.s.p.K.....................................GFKuEWh...TVK.--cLYVGuh.....G+EWs.sspGphlspsshWVKhl.shpG.cVpphNWsspYptl+.p.AhGIp.P.GY.hhHEus.WSthhppWhFLPR+sSpp..Ys-t.DEp.h.GsN.hll.ss.t.sFtp.lp.s.pls.....h..st+GFSuF...........KFlPs....opDplIlALKo.E..........tsp..htoalhshsl.p.G.......phlh.-........p.l.ts.KaEGltFh.......... 1 48 57 86 +5908 PF06080 DUF938 Protein of unknown function (DUF938) Moxon SJ anon Pfam-B_8833 (release 9.0) Family This family consists of several hypothetical proteins from both prokaryotes and eukaryotes. The function of this family is unknown. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.24 0.71 -4.78 6 529 2012-10-10 17:06:42 2003-05-28 15:51:31 7 5 465 0 233 728 387 188.80 39 90.05 CHANGED +.aAPAspRN+-PILuVLpphlssssp...lLEIASGTGQHAsaFAsthPshpWQsSD.spphht.SItA....atcptulsNltsPlpLDlots.hsspt.h..........ssslDulhshNhlHISPWsss.GLFsGAGclLsptulLhlYGPYspsGchTusSNtsFDpoLRsRsspWGlRDlc-lhuLAtppGLpLpchlsMPANN+sllFp+ ..................................husAs-RN+pP..ILp......V.L...pp...h..l....sss.sp..........VLEluSGTGQHusaF..At.th..........P......t..l....p.WQ.P.S.D.hstpt....ht...........SItA.............ah..t.p......s.....s.l..s.N.....l.t.s.P.l..t.L...D..ls.ts....a.sht..................................................shDulhsh..N.h.l.HIs..sas.s...s.pu.LFtuA..u............p.hLt.s........s..Gh.L........hl.YGPaphs.GphT.ut.SNttFD.t.pL+p+.s.P.t.h..Gl..RDhc.s.lttLAttpGLt......LtchltMPA..NNhhllap+........................................................................................ 0 62 122 184 +5909 PF06081 DUF939 Bacterial protein of unknown function (DUF939) Moxon SJ anon Pfam-B_8873 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.76 0.71 -4.31 33 2302 2012-10-02 19:04:43 2003-05-28 15:53:42 6 4 1115 0 242 2189 63 142.20 28 47.03 CHANGED h+IGhRTlKTulussLAlhlAphL..sLs.hssAGIlsILslQsTppcSlpsuhpRhhuslluhlhuslhhtlhGa.sslshGlhlllhIslslpL+lp.cGlssusVlllplhsppshsahh.hhschhLlhlGlslAhLlNlaM ...............................plGhRTlKTulAshLu.h....l....l....s.t.h.L.............sl.s.....h....h.h....Au....l.s.A.l.l.u.l....p..s..oh.p..p.S..l.p.hu.hsRlhush...lG..shlu...l....l.....h......h...h.....l............h.......G........p.......s.......h....h.......s....h......u.....l....h...........l.....h......l......h....I.....s...l......s......h.......t.......h.......p........l........p........t...........G........l..........s..........s..........u..........s.....l....h........l....l..........t...........l.....h..........s..........t..........p..........s......t..........s..........h..........h....h......h....hsc.h.lh.hhIGlhh.AhllNhhh.................................................................. 1 87 155 210 +5910 PF06082 DUF940 Bacterial putative lipoprotein (DUF940) Moxon SJ anon Pfam-B_9128 (release 9.0) Family This family consists of hypothetical bacterial proteins several of which are described as putative lipoproteins. 21.10 21.10 25.80 24.90 20.20 20.20 hmmbuild -o /dev/null HMM SEED 658 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.83 0.70 -5.97 38 1147 2009-09-11 20:50:12 2003-05-28 16:06:17 6 5 706 0 120 707 920 573.90 47 92.49 CHANGED sS.ssaGss.GLlpMPoARMts-Gphuhshsts.........sp..........Yp+asholQlhPWlEsohRYopl......psh.as.........s.h..SGsppapD+uhDhKhRLh..cEota.lP.plulGlcDhuGTG............lFsGEYllAoKph...........Gsh-hTlGlGWGhlGspsslsssh......h..spRs.t...htspGGplshspaF+Gssuh.FGGlEYQsPaps.LpLKlEY-ussYpp-hssp.....shppcSs.aNaGshY+hschhclpLuYp+G............sTlshuholptNh.....sshpss.hhsssssshpsp......ssp..............................................................tshpss...........................hpphhpphhtpsGaphtplt.hcss..slslthc..p.s+YcspscAhtRsApllusshPss.lcpa+ll.pptshPhsps..............plct.tthpshtphphh..s.sphtstsphsps...h.sps......thhppth.ca....sauluPhlsQShGsP-shahaslGlpssusYphssshhlsGslthslh..sNaDchph.............................sus.sLPRVRT.hRpYsppssspLspLpLsahtphupshYspshuGYLEpMauGVGuElLY+P.hsusaAlGl-hNal+QRDh...-shhGh.............t-hcs..........................hT.......G...HsosYap.s.....pshhhplssG+YLAsDhGsTlslu+pFcsGlhlGAaAThT...DlSu-EaGEGSF.sKGhalSIPhDhhhscPopspushshtPloRDGGQhLspphpLY-hTcsp ...........................................................SQSDFGGs.G.LLQhPoARMA.EGEh.SlNY+DN...............DQ.................YRaYSsSVpLFPWLEsTlRY..T..DV...RT+pYS........phEsF...SGDQoYKDKuFDhKLRLW...EE...u...YW...lP.QVAlGtRD.I...uGTG.....................LFD.uE....YlVASKth...........GPF.DF.T..LGhuW.GYhGsuGN.lpNPhCp.....hSD+aC.Rs............pp.AGslsh....SphF+GP.A.Sl.FG...GlEYQ.....T....PWpP...LRLKLEY-GNsYppDFA.G.........+L.QtS.+..FNVGAlYR.s.s.sW.ADlNL.SYERG............N.ThMFGhTL...RTNF.....NDLRPs..hpDss+PtYpPt.....Ppsth.......................................................................phos......VAsQLThLKYNAG..htsPpIQ..h+s...c.....TLYhoGpQ..hKY.RDo+Eul.RA.NRI.lhNsLPp..........G.l...........cpIplT.pp.R..s.hs.VTT.....................ETDV.ASL+p...pLsGps....t...........hp.tp.p.h.s.......puht...............................pGahIccsRa.....saphsPs..LsQSl.GGPEsFYMaQLGlhuoAchWhTDHLLh..sGulFsNlh..NN.YDKFp...o.........................PtDS.pLP.RVRT+lR-YV..pNDVYlNNLQANYFtcLG...N.GFYGQVYGGYLETMauGsGuElLYRP.lDusWAhGlDsN.YVK.QRDW...csM.M+F..............T.DYSs..........................hT..........G...alTA...Y.WsPs.....hhpsVLhKhSVGQYLAtDKGuTl-luKRFDSGVsVGsaAslo...sl..St--..Y.GE.Gs.F.o..KGhYlSlPhDLho.sPsRsRAsluWoPLTRDGGQ.LuR.KapLYsMTu-........................................................................................... 0 24 69 97 +5911 PF06083 IL17 Interleukin-17 Finn RD anon Pfam-B_9152 (release 9.0) Family IL-17 is a potent proinflammatory cytokine produced by activated memory T cells [1]. The IL-17 family is thought to represent a distinct signaling system that appears to have been highly conserved across vertebrate evolution [1]. 21.00 21.00 21.70 22.70 20.90 20.30 hmmbuild --amino -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.09 0.72 -3.92 20 312 2009-01-15 18:05:59 2003-05-28 16:46:01 6 4 67 10 176 271 0 81.80 39 48.52 CHANGED pRSlS..PWsYpls.......cDssRYPphlsEAcCLCpGClsup.GcEshsh.sSVPlhppllVLRRp............spssstuachchhpVsVGCTC.Vh ......pRSlSPWsY.pls.......pDs.sRaPp.ls-ApCh.CpG..C..l.......s....s......Gp...c.s..tsh..sSlPlhpphhVLRRp.............spssthsachchhtVsVGCTC.V................ 0 39 51 81 +5912 PF06084 Cytomega_TRL10 Cytomegalovirus TRL10 protein Moxon SJ anon Pfam-B_8875 (release 9.0) Family This family consists of several Cytomegalovirus TRL10 proteins. TRL10 represents a structural component of the virus particle and like the other HCMV envelope glycoproteins, is present in a disulfide-linked complex [1]. 20.20 20.20 21.20 260.60 19.20 19.20 hmmbuild --amino -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.96 0.71 -4.41 5 22 2009-09-11 05:08:22 2003-05-28 16:48:09 6 1 6 0 0 21 0 149.10 91 87.75 CHANGED VCAETTVATNCLVKTENTHLTCKCSPN.......sTSsTGNGSKCHAhCKC.RVTEP...ITMLGAYSAWGAGSFVATLIVLLVVFFVIYAREEEKNN.TGTEVDQCLAYRSLTRKKLEQHAAKKQNIYERIPYRPSRQ.KDNSPLIEPTGTDDEEDEDDsV .VCAEsTVsTNCLVKoENTHLTCKCsPN.......soSNTsNGSKCHAMCKC.RVTEP...ITMLGAYSAWGAGSFVATLIVLLVVFFVIYAREEEKNN.TGTEVDQCLAYRSLTRKKLEQHAAKKQNIYERIPYRPSRQ.pDNSPLIEPTGTDDEEDEDDDV 1 0 0 0 +5913 PF06085 Rz1 Lipoprotein Rz1 precursor Moxon SJ anon Pfam-B_8925 (release 9.0) Family This family consists of several bacteria and phage lipoprotein Rz1 precursors. Rz1 is a proline-rich lipoprotein from bacteriophage lambda which is known to have fusogenic properties. Rz1-induced liposome fusion is thought to be mediated primarily by the generation of local perturbation in the bilayer lipid membrane and to a lesser extent by electrostatic forces [1]. This family Rz1 and the Rz protein Rz (Pfam:PF03245) represent a unique example of two genes located in different reading frames in the same nucleotide sequence, which encode different proteins that are both required in the same physiological pathway [2]. 27.90 27.90 28.50 32.30 26.60 27.80 hmmbuild --amino -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.23 0.72 -4.54 7 97 2009-01-15 18:05:59 2003-05-28 17:12:01 6 2 60 0 3 60 0 40.80 57 61.59 CHANGED CsSpPss...C...lKPPsPPAWhMpPssDhpp.LNtIhSsScpp ...........CtSsPPV......C...scPPsPPAWhMpPssDhQp.LNuIISsSEst... 0 2 2 3 +5914 PF06086 Pox_A30L_A26L Orthopoxvirus A26L/A30L protein Moxon SJ anon Pfam-B_8938 (release 9.0) Family This family consists of several Orthopoxvirus A26L and A30L proteins. The Vaccinia A30L gene is regulated by a late promoter and encodes a protein of approximately 9 kDa. It is thought that the A30L protein is needed for vaccinia virus morphogenesis, specifically the association of the dense viroplasm with viral membranes [1]. 21.00 21.00 21.00 22.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.75 0.70 -4.88 3 96 2009-01-15 18:05:59 2003-05-28 17:20:21 7 2 32 0 0 88 0 208.20 60 42.94 CHANGED MANIINLWNGIVPTVQDVNVASITAFKSMIDETWDKKIEANTCISRKHRNIIHEVIRDFMKAYPKMDENRKSPLGAPMQWLTQYYILKNEYYKTMLAYDNGSLNTKFKTLNIYMITNVGQYILYIVFCIISGKNHDGTPYIYDSEITSNDKNLINDRIKYACKQILHGQLTMALRIRNKFMFIGSPMYLWFNVNGSHVYHEIYDGNVGFHNKEIGRLLYA ...........MANIINLWN...GI.VPhVQDVNVASITAFKSMIDETWDKKIEANTCISRKHRNIIHEVIRDFMKAYP.KM.DEN.........+KSP.LGAPMQWLTQYYILKNEYaKTMLAYDN.....G...SL.NT....KFKTL.NI.Y......M..ITNVGQYILYIV.FCIIS.GK.NHDGTPY...I...YD...SE.ITSN.DKNLIN-RIKYACKQILHGQLThALRIRNKFMFIGSPMYLWFNVNGSpVYH-IYDtNsGFHN+EIGRLLYA........................................ 1 0 0 0 +5915 PF06087 Tyr-DNA_phospho Tyrosyl-DNA phosphodiesterase Finn RD anon Pfam-B_8155 (release 9.0) Family Covalent intermediates between topoisomerase I and DNA can become dead-end complexes that lead to cell death. Tyrosyl-DNA phosphodiesterase can hydrolyse the bond between topoisomerase I and DNA [1]. 22.00 22.00 23.00 22.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.78 0.70 -5.86 27 520 2012-10-02 13:01:53 2003-05-29 09:34:23 7 27 273 43 378 517 1 347.80 22 66.87 CHANGED Phplhhoslhshpsp.......tsssslsLc-lLus...plppsh.a....sFhhDlsaLlsphs.phhp...plhhlpG...ppt.phhphttt.....th.NlphltsthstsF...GsHHoKMhlhhac-s....h...+lVl.osNhh.tDWs.....hsQshWhSshl........spsssst.........stscF+p-LhpYLppYt......tsl....hpplccaDFSsl...sstLlsSsPGpap.........ph..p..aGaspLtplLpcpss.s......tpspphsl.lsQsSSIGolsss.t.ahpsphhssLhhss...............................tt.ppppsphpllaPoscpl+sShs.GasuGuslha+hp....p+ppp.hl+..shhp+Wtup............sptRspshPHlKsYh+hs..........pphpslpWhllTSANLSpsAWGs......htptssphplpuaEhGVLh......Ppths..................h.shshpts..t.....................hth.hPac.lPhpsYs ............................................................................................................p..slplp.-llt.....l..sh......sahhDhtalhtth....th..h.............lhh.lh..s.........t...t.t..th..t.......................h..plphh.s.h......h...........ushHoKhhllh....htpt..........h...+lll.ouNhh........DWtp..............hpps..........lahpsh...Phh.............tttt................................p.stFpt...cL.htaL.tht........t.h.................hp.l.p.p.h.Dauts...pshhltSsPGhat.....................................p......hGhhtLtphlpphs............................p.....h..l.......hQh...SSlGs.....h............tthhp..sh..............................................................................hpllaPo.pplpp.Sht..G...h.....u...u.s.s.l...........hp..........p.............t.......hlc.......h........h..t.................................................ttRptsh..s.HhK.....hhhths.............................................t...pph.sWhhls..SuNLSpsAW.Gt................................p.ttsph.lpsaEhGVlh........Pt.ht......................................................................................................................h.hPh..hs............................................................................. 0 116 204 316 +5916 PF06088 TLP-20 Nucleopolyhedrovirus telokin-like protein-20 (TLP20) Moxon SJ anon Pfam-B_7657 (release 9.0) Family This family consists of several Nucleopolyhedrovirus telokin-like protein-20 (TLP20) sequences. The function of this family is unknown but TLP20 is known to shares some antigenic similarities to the smooth muscle protein telokin although the amino acid sequence shows no homologies to telokin [1]. 23.50 23.50 23.60 26.50 22.30 23.40 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.04 0.71 -4.58 21 50 2009-01-15 18:05:59 2003-05-29 09:38:37 6 1 47 1 0 53 0 192.40 31 97.19 CHANGED MAossssTsD.IsVpsplsc-.tccslLsFhlc-EY+LpKhulGAaslpllsop.LssLtpstp..sslssG-YsllaNhsps.spplsslLhslKssslKKusslF+lhhhs................................................ophhhsuss............sppsspsps---s..................sss-shshsts................ssssssK+QKLDcptps ......MAossssTsD.IuVaspl-cE...tscs..lLoFlVc-EYHLKKLuVGAYslsIl-oph.LssLtp.ptp......ssluCGsYllhaNhscp......sstlNsILFNh+.sshLKKusslF+lhahs.............................................................................................tptpsphhtss.s...........sppsspsps---s.......................psss-ssshsts.........................t.stssssK+QKlDcs...................................................................................... 0 0 0 0 +5917 PF06089 Asparaginase_II L-asparaginase II Moxon SJ anon Pfam-B_7673 (release 9.0) Family This family consists of several bacterial L-asparaginase II proteins. L-asparaginase (EC:3.5.1.1) catalyses the hydrolysis of L-asparagine to L-aspartate and ammonium. Rhizobium etli possesses two asparaginases: asparaginase I, which is thermostable and constitutive, and asparaginase II, which is thermolabile, induced by asparagine and repressed by the carbon source [1]. 25.00 25.00 34.20 34.10 24.50 17.90 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.82 0.70 -5.75 87 514 2009-01-15 18:05:59 2003-05-29 09:45:46 7 4 476 0 211 516 948 313.30 37 94.11 CHANGED hlclhRGu...................llEShHpGpsVVsDscGp......llhthGsssps.sasRSuhKPhQALsllpoGAspth.........................................slsscc.LALsCASHsGpshHsctstshLtchGLs-ssL......pCuschPhst.st.thhtsstpPstlppNCSGKHAGhLssspthGhshcsYlpscHPlQptltpslp-lsuhss..sph..uhDGCusPsaAhsLpslApuaAphAsu...t.t...s.t.pttsht+lhcAMttaPphVAGpGRhsTpLMcshsGplluKsGAEGVhssul..........sstGlGlAlKlpDG.usRAttssssplLtplsh..........l....sss.hstltphhtsslhst.pGh.sGpl+ss .................VclhRGs...................hlEShHpGpssVlDs.cGp......llhshGDscps.sasRSuhKPhQALsllco....Gsspth.........................................shssp-LAlsCASHsGE.tHlptstshLtcs..G.....LspssL......pCsschPhpt.sh...thhps.stp...PoslppNCSGKHAGhLAs.stt...........t...........G...........h......s......h...............csYlp.sHPlQ....ptltpsltclsGhsh..sth...GhDGCusPsaAlsLpsLA+uaAphAsu...........t..s........s.t.tttshpRlhcAMtsaPthVAGs.sRhsTtLMctssGtlluKsGAEGVhshul..............sc...G...hGlAlKlpDG..u.sRAttsssstlLpp.Lsh..........h...s.s...t.ttLtpht..p..s.....hhst....tGh.lGplp.s................................................ 0 69 144 187 +5918 PF06090 Ins_P5_2-kin DUF941; Inositol-pentakisphosphate 2-kinase Finn RD anon Pfam-B_9098 (release 9.0) Family This is a family of inositol-pentakisphosphate 2-kinases (EC 2.7.1.158) (also known as inositol 1,3,4,5,6-pentakisphosphate 2-kinase, Ins(1,3,4,5,6)P5 2-kinase) and InsP5 2-kinase).\ This enzyme phosphorylates Ins(1,3,4,5,6)P5 to form Ins(1,2,3,4,5,6)P6 (also known as InsP6 or phytate). InsP6 is involved in many processes such as mRNA export, nonhomologous end-joining, endocytosis and ion channel regulation. 22.70 22.70 22.80 23.30 21.70 21.30 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.34 0.70 -5.39 38 330 2009-09-11 15:15:53 2003-05-29 09:47:06 7 11 270 17 238 346 0 342.40 19 82.54 CHANGED YhuEGsA...Nllhphssss..............................hht+lLRlcKhtts................................ttpppchhcahc..phl+..PLhu....p.lsstplVtls..pphlppl..spplp.........................Rst.R.th...hlssscshuLLlsDlsshst....................................................sslslEIKPKWha...............................p....hpsphCRhCshpthK..................t.tthsptppaCPL...DLh.....usshpclhpAlcs.hsp......spssh+h.htt..........................................................lschLhp...............................................sslLp+LhphQc........................................................ts.pthshpth.shspphtluMTh+DCohhIphp.pt............................................h.hpsplhDLDhK..shpKhppah.ch-pcL ...........................................................................................................................................ahuEGssNllh...thtt..t.......................................................hlLRhhKh..s................................................p..pahp....phhp....hhs..................phlhs..p..lltls......hphlptl..st.lt................................R...+hth..........hs.htpshuhhlsshs..hs...............................................................................................................................................tsshslElK..PKhhh........................................................................................t..t.hptphCRhChh...phhK......................tt..tt...hpp..ppaCPL-Lh.....................uss.hpchhtAlpthhtp.......................spssh+h..tt..............................................................................................hthltphLhp..............................................................................................................................................................................................sslLtcLhp.h.Qt.......................................................................................................................................................................................................t...t.p..ths.h.tt...p..h......pp..hhluhTh+DColhlphp.tt..............................................................................................................................................hpsplhDLDhK..shtph.hah.ph-tp...................................................................................................................... 0 82 128 193 +5920 PF06092 DUF943 Enterobacterial putative membrane protein (DUF943) Moxon SJ anon Pfam-B_7843 (release 9.0) Family This family consists of several hypothetical putative membrane proteins from Escherichia coli, Yersinia pestis and Salmonella typhi. 25.00 25.00 25.10 28.50 23.20 24.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.11 0.71 -4.93 16 442 2009-01-15 18:05:59 2003-05-29 09:58:29 7 2 256 0 34 201 0 148.80 46 96.19 CHANGED MKp+pptllh...ullLhuslh..YhhW.thRPVEI................lsVHpcss......stIlV+sFPlTD+GKIsWWhcNKshLKpKYsIPcss.cGhaolhhWDFG-GYc...........cpupsDhhCF-DhK.opcNCI-K.Nhlhslcphp.ssphhFshssspY.hp.........csGclsK..htp- .................................MKtc.pKllh...sLhLlssl....Y.hW.phRPVpI................lhsapDss.....sshllVcchPhTDpsKINWaLcppsclKspaslPcsu.csaasI...DhGsGap...........EtshpDLhCFsshK.os-NCIsK.NhLMsIpch..cNshha.hh....DtspYQLs.........ppsKIp+shp.s............................ 0 5 16 23 +5921 PF06093 Spt4 Spt4/RpoE2 zinc finger Moxon SJ, Kerrison ND, Finn RD, Bateman A anon Pfam-B_7906 (release 9.0) & COG2093 Domain This family consists of several eukaryotic transcription elongation Spt4 proteins as well as archaebacterial RpoE2 [2]. Three transcription-elongation factors Spt4, Spt5, and Spt6 are conserved among eukaryotes and are essential for transcription via the modulation of chromatin structure. Spt4 and Spt5 are tightly associated in a complex, while the physical association of the Spt4-Spt5 complex with Spt6 is considerably weaker. It has been demonstrated that Spt4, Spt5, and Spt6 play roles in transcription elongation in both yeast and humans including a role in activation by Tat. It is known that Spt4, Spt5, and Spt6 are general transcription-elongation factors, controlling transcription both positively and negatively in important regulatory and developmental roles [1]. RpoE2 is one of 13 subunits in the archaeal RNA polymerase. These proteins contain a C4-type zinc finger, and the structure has been solved in [3]. The structure reveals that Spt4-Spt5 binding is governed by an acid-dipole interaction between Spt5 and Spt4, and the complex binds to and travels along the elongating RNA polymerase. The Spt4-Spt5 complex is likely to be an ancient, core component of the transcription elongation machinery. 25.00 25.00 27.10 30.20 24.40 23.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.85 0.72 -4.10 37 482 2012-10-03 10:42:43 2003-05-29 10:20:38 8 1 446 10 332 435 72 70.90 37 68.11 CHANGED hRAChhCpllpohspF..pGCsNC..hhtht.ts.pplt-sTSpsFcGllulhcPpc.SWlA+a.c.lsphhPGhYAlpVs ...........RAChhCpllp..ohsp....F.....p.....GCsNC..hh.t.....ts.t.....s.csTSssFcGllsl..hcPpc.SWVA+a.p....l....sthhsGhYAlpVp.............. 0 105 189 271 +5922 PF06094 AIG2 AIG2-like family Finn RD anon Pfam-B_9771 (release 9.0) Family AIG2 is an Arabidopsis proteins that exhibit RPS2- and avrRpt2-dependent induction early after infection with Pseudomonas syringae pv maculicola strain ES4326 carrying avrRpt2 [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.73 0.72 -3.52 90 2098 2012-10-02 16:39:48 2003-05-29 10:21:23 7 27 1701 10 779 2120 898 101.30 28 64.81 CHANGED lFVY...GTLpps.psppthlt.........tsphhspsspht.......hhhhthtsYPsll............ssst................................tVpGp.lapl...sp....pt..LptLDphEt.......pss.hYcRtplplt.............hssstt............pAalYhhp ..................................................................lFVY...GoL..ppt...pss.p.phht....................ssph.h.u..p.h..t..hht..........h..h.h..h..s...h.s.t..Y...P..uhls.......................sss........................................................pVpGE..V..Y.p.l.......ss..............sp......LspLDthcs.................psstYpR.p..h.lp.s.........................................hs.......................pAahYlh........................................................................... 0 261 470 649 +5924 PF06096 Baculo_8kDa Baculo_8Kda; Baculoviridae 8.2 KDa protein Finn RD anon Pfam-B_8370 (release 9.0) Family Family of proteins from various Baculoviruses with undetermined function. 25.00 25.00 36.30 35.80 19.70 19.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.29 0.72 -4.09 15 32 2009-01-15 18:05:59 2003-05-29 10:31:12 6 2 30 0 0 25 0 64.50 44 78.56 CHANGED ssYslppFYNssR+PLKsTTLHsGNIspssYEsVhhhR..+LhC+Esh..Puc+-hpFps.+saNKEN ......s.slppFYNssRpsLKsTTLHDGNlstpsYcsVh.sR...+LhCpEsh..suc+-hKhpp.psaNKEN........................... 0 0 0 0 +5925 PF06097 DUF945 Bacterial protein of unknown function (DUF945) Moxon SJ anon Pfam-B_9171 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 22.40 22.40 22.50 24.10 22.30 22.30 hmmbuild -o /dev/null HMM SEED 460 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.37 0.70 -5.71 50 1334 2009-09-11 15:55:27 2003-05-29 11:00:00 6 4 964 0 145 797 18 449.70 33 95.70 CHANGED MKK...hhhGlllulsslhsuusaasGpphEsphpptlsphNt.........sssl..plphhsacRGhFSSpsphplpssssthpt.............shplshssplpHGPhshsp...lpphphhsthh.sp.......sthhp.hhthhtspsPhphssplsasGssssslslsshsh....ptssssls...auGsphphshs.pshpphshpsshsslplsh...........sttplplsslphs.sstptsphs......hhlGspshplcplslssp...............................spphhplpshsht.psphsp........ss...splsuphshslsslphssp.slGssplshplsplDupuhpphhphYpspst..................s...hpptthpplhsslstLLpssPplslssLohcs.spGchshsls..lsLsss..sptttts.......tplhp..shphssphslstshlschht.......................................ph.t..ts.........................t.uppplpthsshhhh.plhs.cssslpsshphssGplsl.NGpch...Pltph ..........................................................MpK....sAsGVIlALull..WsG.GsWYT.GpplE.splpch....lpphNtth+.........hst.psslploYpNacRG....h..FoSphQhhl..p..sut.s..lc..............................sGQpllhspsVcHGPhPlst...Lt+hN.....llPuhAuhpspL.........sNN-hspsLF.ths..K.s.cs.Phphss..Rh.u....a..u.G.ss.so........slslt..PhpY......pptstKlu.....F.usup.hphsuD...pshp.s...l...s.lpGcs.pshhlshs....................................ptsplohpslphD.upuphspht.......hhlG.pp.chplpp.hsIssc...............................Gcchs..l-uhp..hpscoch..sp...........Dt....ctlNspLsYpLs..s..Lph.....p.........s....p.....p....L......GS....Gcl....oLtssplDupA...h+QF.ppYsttspt.h....................tsthups..hhQpt.s....sp.t.......hhstLPhLhcucPslslsPluWKN..upG.E......shNLs..l.ltDP.....upsptss...............sp.hc.....lKoLshcl..plPh.shsoEhhpQhs..............................................................................h..EGh.pt-pAp................................+hAcpQlp.u...h...shGQM..hpLhT..l...p..sN..slshp.L.......pYss..Gplsh.NGQcM...s.E-F................................................................................................... 0 32 63 112 +5926 PF06098 Radial_spoke_3 Radial spoke protein 3 Moxon SJ anon Pfam-B_9453 (release 9.0) Family This family consists of several radial spoke protein 3 (RSP3) sequences. Eukaryotic cilia and flagella present in diverse types of cells perform motile, sensory, and developmental functions in organisms from protists to humans. They are centred by precisely organised, microtubule-based structures, the axonemes. The axoneme consists of two central singlet microtubules, called the central pair, and nine outer doublet microtubules. These structures are well-conserved during evolution. The outer doublet microtubules, each composed of A and B sub-fibres, are connected to each other by nexin links, while the central pair is held at the centre of the axoneme by radial spokes. The radial spokes are T-shaped structures extending from the A-tubule of each outer doublet microtubule to the centre of the axoneme. Radial spoke protein 3 (RSP3), is present at the proximal end of the spoke stalk and helps in anchoring the radial spoke to the outer doublet. It is thought that radial spokes regulate the activity of inner arm dynein through protein phosphorylation and dephosphorylation [1]. 23.80 23.80 23.80 24.10 22.50 23.70 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.73 0.70 -5.41 14 179 2009-01-15 18:05:59 2003-05-29 11:14:06 6 7 117 0 124 177 8 265.80 39 61.68 CHANGED hsY........uNIMaDRRVhRGNTYutp...h.up.-.........pppppuppcuhs+K+t+phhss+.........oPsss-GRcphsVQTE.YLEELpD+l...Es-h-sQTDsaL-RPsoPhaVPtKoGtDstTQI..-.....GELFDFDhEVcPIL-VLVGKTlEQALlEVhcEEELtplRppQcpa-phRsAELsEsQRLEtpE+RppcEKERRhpQc+pthcc-+EspcKlsARsFApsYLu-llssVhssLcspGaahD.lcp-lEpsFhPWLhpEVtppltpp....h.u+slLDplIc-lsp.ph.thhptpp .........................................................................................................htNlMaD+RVhRGsTau.....h.....s..p...p.h.....................ppp.pptp++shs+..Kptpp...h...p.p.........TP.PlcGRpHhslQTE.hY..LE..ElsD+h...Eh-h..-sQTD.sFL.D+PsTPh........FlP.........sKoGhDsuTQI...-.....G-.LFDFD.hEVc.PlLEVLVGKTlEQuLlEVhEEEELssL+tpQptacphRssELsEsQRLEtp.E+Rpp-EKERRhpQphphhcpcpEsppKlsApshAptY.lusLlssVhssLc.spGaFhDslcc..-lEhsFhPWLhp-Vtpphppt....hhuRp....lL.......-.llpplhppp........t................................................ 0 65 76 101 +5927 PF06099 Phenol_hyd_sub Phenol hydroxylase subunit Moxon SJ anon Pfam-B_10062 (release 9.0) Family This family consists of several bacterial phenol hydroxylase subunit proteins which are part of a multicomponent phenol hydroxylase. Some bacteria can utilise phenol or some of its methylated derivatives as their sole source of carbon and energy. The first step in this process is the conversion of phenol into catechol. Catechol is then further metabolised via the meta-cleavage pathway into TCA cycle intermediates [1]. 25.00 25.00 32.80 32.30 17.90 15.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.70 0.72 -4.14 23 83 2009-01-15 18:05:59 2003-05-29 11:20:04 6 1 69 0 24 79 5 58.60 42 70.33 CHANGED pstP..sh-hp.p+YVRVpucpssuFVEF-FuI.GpPELhVELlLPpsAFctFCtsNpVhcLss ......p..s..h-.h.p+aVRVpucpsssFVEF-FuI.GcPELhVELlLPpsAFcpFCptppVhphs........ 0 1 13 22 +5928 PF06100 Strep_67kDa_ant Streptococcal 67 kDa myosin-cross-reactive antigen like family Finn RD anon Pfam-B_9995 (release 9.0) Family Members of this family are thought to have structural features in common with the beta chain of the class II antigens, as well as myosin, and may play an important role in the pathogenesis[1]. 23.00 23.00 23.00 23.00 22.90 22.80 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.63 0.70 -6.15 8 1128 2012-10-10 17:06:42 2003-05-29 11:20:18 6 6 924 0 207 793 40 447.70 47 85.31 CHANGED -sKSAYlIGoGLAuLAAAsFLIRDGQMsGc+IHILEELslsGGSLDGhphschGYVsRGGREM.ENHFECLWDLFRSIPSLEhsssSVLDEFYWLNK-DPNaSpCRlIcKpGpRlDsDGcFTLopKuhKEIhcLhhTsEEcLsDhpIc-lFS--FFsSNFWlYWpTMFAFEsWHSAhEMRRYlMRFIHHIuGLsDFSALKFTKYNQYESLVLPhlpYLKsHuVsFcYsspVpNIpVDhosuKKlA+pIhhp.+sGcccoIcLT.sDLVFVTNGSITESSTYGssspPAPhTcEL..GGSWpLW+NLApQSs-FG+P-KFCpclPppsWhhSATlTscsc+I.sYIE+ls+RDPhuGKsVTGGIlTlpDSsWLhSaTIsRQPpFKcQscN-lllWlYuLYSDpcGDYIKKPhcECTGpEIspEWLYHLGVPp-cI--LA.pcssNTIPVYMPYIToYFMPRshGDRPhVVPcsSsNLAFIGNFAET....sRDTVFTTEYSVRTAMEAVY ....................p+pAYllGuGlAuLAAAsaLIRDupMtGppIpI..l..E.ch.......s..hs....GGS.h.....D.....G....t...............t...............................s.............p..cGalsRGGREM...-.pHF..EshWDLa+oIPS...LEhs.s.hSVLDEa..hh..l..s....c...c....D...P..s..h.u...p..sR...................ll....c..p..p..G...p...c.....h...s...s..s.uphsLspps.h-lhcLhhss...E...-...cLtshpIp-hF..s.-..-..FFpoNFWhhWpT.MFAFE.WpSAhEh+RYhhRFlaal..sGlschouL+FT+YNQYEShllPhlpaLcspGVpFpass....pVpslp..h..c..h..s...s..s.....p......K......l.....A.p....p...I.hhp....p...s.......G.........p...p..............c...pIpLs.sD..L..VFlTNGSlTEuoohG.Dp.so.......P.....A.....s...............s......p..-h.....uuuWpLW+plAp.....p..s......s..p..FGpP-hFsp.....shscpsW..hShThTs..p...s.p.p.l.h.shI.cplsppcP...hsG...p.hsTGGIlThpDS..sWhhShslsRQPpF+sQs.c.sphssWhYuLascp.Gsal+Kshh-CTGpEIhpEhLYHL.......Gl....s.......p.......p...p......h......cclu..t.......p.tssslPshMPYITuhFMsRshGDRP.VlPcsShNLAFlGpFsEo.....cDsVFTsEYSVRTAM.AVY............................................................ 0 53 115 164 +5929 PF06101 DUF946 Plant protein of unknown function (DUF946) Moxon SJ anon Pfam-B_10066 (release 9.0) Family This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 19.80 19.80 20.00 19.80 19.50 19.70 hmmbuild -o /dev/null HMM SEED 536 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.84 0.70 -6.51 15 441 2009-01-15 18:05:59 2003-05-29 11:22:46 6 38 192 0 316 436 14 234.70 18 27.46 CHANGED s.spsFsLPusLPsWPpGpG.FAsGpIsL.GcLEVspl....osFp+VWsstputscstGsTFacPs.ulPcGFasLGaYsQPNs+PLpGaVLVA+Dluus................ssL+pPlDYTLVhsossht.spps.........sGYFWLP.PP-GY+AlGhlVTssspKPsL..DcVRCVRuDLT-pCEspshI..hs...ssu............hslassRPscRGhtupGVtVGTFhCpsts.ssc...sssIuCLKNlcss.LpuMPNlcQIcALIcpYuPhlYFHPcEpYLPSSVsWFFpNGALLa+KGcps...s.PI-ssGSNLPpGGsNDG..pYWlDLPsD-ps.+EpVK+GsLcSuclYVHVKPshGGTFTDIsMWlFhPFNGPAplKluhh.slsLu+IGEHVGDWEHFTLRISNFoGELWplYFSQHSGGpWVDAsDLEFhp........suNKPVVYSS+HGHASFP+PGhaLQGss..GlGIRNDsA+SchhlDooh+YcIVAAEYLGsulV..EPsWLpYMRcWGPplsYsscsElcplhphLPh.hLRhsh-sllpplPhELhGEEGPTGPKEKsNWpGDE ............................................................................................................................h.h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hGpHlGDWEH.hlRh........u................hahSt..H..t...t.Gt..h....h......t..h....p....h.............................+shhasu.tsHu.as..G....................................................................................................................................................................................................................................................................................................................... 2 86 186 267 +5930 PF06102 DUF947 Domain of unknown function (DUF947) Finn RD anon Pfam-B_9959 (release 9.0) Family Family of eukaryotic proteins with unknown function. 23.00 23.00 24.60 24.40 22.50 22.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.33 0.71 -4.43 30 317 2009-01-15 18:05:59 2003-05-29 11:27:36 7 9 285 0 227 311 3 168.00 32 54.68 CHANGED ++psK+uPhEhSSK+.VsphRplhts..........p+s..hhRDPRF-....shuGp.............hs.shh++sYsFL.-DhRppElpclccpLccsK.....ss.......................cE+-cL+pplpshcschcshpp.+cpEpcllp-a++pE+-tlp.pGKp..PaaL..KKS.....-h+Khlhtc..Kacph..............cs+plcK.....hlE++RKKhsuKE+K ..................................+tsKctPhEhSoK+...Vsp..hRplhts............ppp.....hhRDPRFD.........shsGp.............hs..p...h.hcKsYsFL.s-h.RppEhpp...L.+cpL+cp+.........ss...........................................cc+cclcptlpphcsphpspcp.+cpcpclhpch++cc+ct..hc..pGKp..PaaLKKS..........Eh++hhLtp....+acph..............p.stpl-+hl-++RKKpsuK-++........................... 1 78 128 189 +5931 PF06103 DUF948 Bacterial protein of unknown function (DUF948) Moxon SJ anon Pfam-B_10104 (release 9.0) Family This family consists of bacterial sequences several of which are thought to be general stress proteins. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.80 0.72 -4.12 42 1305 2009-01-15 18:05:59 2003-05-29 11:30:03 6 7 1253 0 178 574 36 89.90 36 61.21 CHANGED IuslIhAlAFllLVlalhhsLtpl....pps...lppsspTlpslppplssltppospLLsKsNpLh-DlstKspslsPlhpAluDlupSVpcLNsu ....................lAulIsAlAFllLllall....hsLhpl..................p+s.......lDcsscTlc.......sls.......spV.......sslh...ccoscLLsKsNtLh...-D....lss.....KlsslsslhsAVsslupSVpsLNs.................................... 0 49 111 147 +5933 PF06105 Aph-1 Aph-1 protein Moxon SJ anon Pfam-B_7927 (release 9.0) Family This family consists of several eukaryotic Aph-1 proteins.Gamma-secretase catalyses the intramembrane proteolysis of Notch, beta-amyloid precursor protein, and other substrates as part of a new signaling paradigm and as a key step in the pathogenesis of Alzheimer's disease. It is thought that the presenilin heterodimer comprises the catalytic site and that a highly glycosylated form of nicastrin associates with it. Aph-1 and Pen-2, two membrane proteins genetically linked to gamma-secretase, associate directly with presenilin and nicastrin in the active protease complex. Co-expression of all four proteins leads to marked increases in presenilin heterodimers, full glycosylation of nicastrin, and enhanced gamma-secretase activity [1]. 25.00 25.00 26.90 26.00 20.50 23.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.64 0.70 -5.12 14 233 2009-09-11 12:57:32 2003-05-29 11:43:55 7 3 130 0 130 218 1 207.00 40 91.12 CHANGED TlssFFGCsFlAFGPulALFhhTIApDPlRVIlLlAuuFFWLVSLLlSSllWa.lhhslpsp.........lhFGlhhSVlhQEsFRahaa+LL+...............Ks-cGLpul......scpsphs...................................................................hsp+hlAYVu.GLGaGlhSGsFuhlNlLuDuhGPGTlGlcu...........sSphaFlsSAhhoLshlLLHTFWuVlaFsuC-pppahplshV.....VsoHLhlSsLTLLN..shapsoLlssYhlhllhulhAahssGGsltsh........p+hlssc ....................hhhhaGCshlAFGPshuLFhholAtcPl+.lIhLluuu...FFWLlSLLluSllWh.hhh.lpsp............lhh...uhhhSVhhQEhFRh.haa+ll+.............................Ksp......cGLt.sl.....stp.t.p..s..........................................................................shc.hAYVu.GLGFGlhS.....ulF.shl.NlL.....ucuhG..PGshGl..cu...............ss..aFLsSAhhohsllLLHsFWullhFsuh....-c...pp.....a.........htlhhV..........lhsHLhsShhThl.N......hhtssll..s.ahlhl.huhhAhhhsGushtsh.......................... 0 44 63 93 +5934 PF06106 DUF950 Staphylococcus protein of unknown function (DUF950) Moxon SJ anon Pfam-B_8992 (release 9.0) Family This family consists of several hypothetical proteins from different Staphylococcus species. The function of this family is unknown. 25.00 25.00 42.70 42.20 22.20 18.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.40 0.72 -3.99 3 163 2009-01-15 18:05:59 2003-05-29 11:48:11 6 1 123 1 4 53 0 108.80 74 96.00 CHANGED MTLoQQLKpYITcLFQLs+sETWcCEulEElAEDILPE+YIcsSPLuHKILpTaTYYNDELHElSIYPFLhYhsKQLIAIGYLD.NFDMDFLYLsDT+psIIDERYLLpcGG ...KThTQELKQYIT+LFQLSNNEsWECEALEEAAENI.LPERFINcSPLsHLTLETYTYYNDELHELSIYPFLMYANNQLISlGYLD.HFDMDFLYLTDTpNsIIDERHLL+pGG.... 0 0 0 4 +5935 PF06107 DUF951 Bacterial protein of unknown function (DUF951) Moxon SJ, Eberhardt R anon Pfam-B_8994 (release 9.0) Family This family consists of several short hypothetical bacterial proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 25.20 38.50 21.90 24.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.75 0.72 -4.45 42 1332 2009-01-15 18:05:59 2003-05-29 11:51:50 6 4 1322 0 177 594 6 58.70 54 86.09 CHANGED caplGDIVcMKKsHPCGo.......scWEllRhGADI+IK.ChsCu+hVMlsRtcFEK+lKKllp .........YplsDlVEMKKsHsCGs.......N+WcIlRhGADI+IK.CpsCsH.VMhsRp-F-+KlKKll......... 0 75 122 152 +5936 PF06108 DUF952 Protein of unknown function (DUF952) Moxon SJ anon Pfam-B_8995 (release 9.0) Family This family consists of several hypothetical bacterial and plant proteins of unknown function. 21.60 21.60 23.00 21.80 20.40 20.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.10 0.72 -4.12 102 618 2009-01-15 18:05:59 2003-05-29 11:55:22 7 18 597 3 250 535 577 93.40 34 69.98 CHANGED IsstspWptuptsGpatsuslDht...-GFIHhSTspQ......lt.pTAs+aFuu...pssLlLL......slDsstLus..s..............L+WEs.................uc.uGtLFP........HLYG......sLslsAVhtsps ...................hstppWptApt..s.Gphtsusl-h.....-GFIH.h.S.o.s.p.Q..................................lt.tsA.s+aFsu.........ps-LlLL......tlDsstLss..s..................l+aEs............................uc.suthFP...............HLYG......sLslsAVlts..s........................ 0 78 169 211 +5937 PF06109 HlyE Haemolysin E (HlyE) Moxon SJ anon Pfam-B_9001 (release 9.0) Family This family consists of several enterobacterial haemolysin (HlyE) proteins.Hemolysin E (HlyE) is a novel pore-forming toxin of Escherichia coli, Salmonella typhi, and Shigella flexneri. HlyE is unrelated to the well characterised pore-forming E. coli hemolysins of the RTX family, haemolysin A (HlyA), and the enterohaemolysin encoded by the plasmid borne ehxA gene of E. coli 0157. However, it is evident that expression of HlyE in the absence of the RTX toxins is sufficient to give a hemolytic phenotype in E. coli. HlyE is a protein of 34 kDa that is expressed during anaerobic growth of E. coli. Anaerobic expression is controlled by the transcription factor, FNR, such that, upon ingestion and entry into the anaerobic mammalian intestine, HlyE is produced and may then contribute to the colonisation of the host [1]. 25.00 25.00 26.40 26.20 22.20 21.70 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.91 0.70 -5.41 2 433 2009-01-15 18:05:59 2003-05-29 12:19:47 8 1 323 25 1 123 0 198.90 81 97.36 CHANGED hsADpTVEsVKsAI-TADtALDLYNKhLDQVIPWpTFs-TlKELSRFKpEYSQuASsLVG-IKoLLMsSQD+YFEATQsVYEWCGVsTQLLsAYl.LFsEYsEKKASAQKsILIKVLDDGIhKLpcAQpSLhsSSQSFNsASGKLlALDSQLsNDFsEKSsYFQuQVDKIRKEAYAGAAAGVVutPFGLIISYSIAAGVVEGKLIPtLKpKLKSVpsFFpoLusTVKpANpDIDtAK.KLpsEIusIG-lKTETETTRFaVDYDDLMLp.Lp-uApKhI.oCNEYQKRHGKKs...l.. ..hA-pTVEVVKsAIETADGALDLYNKYLDQVIPWpTFDETIKELSRFKQEYSQAASVLVGsIKsLLMDSQDKYFEATQTVYEWCGVsTQLLuAYIhLFDEYNEKKASAQKDILI+lLDDGlsKLNEAQKSLLsSSQSFNNASGKLLALDSQLTNDFSEKSSa.............................................hpN+LKusQsFFTTLSNTVKQANKDIDAAKLKLTTEIAAIGEIKTETETTRFYVDYDDLMLSLLKtAAKKMINTCNEYQpRHGKKTLhEVP............... 0 1 1 1 +5938 PF06110 DUF953 Eukaryotic protein of unknown function (DUF953) Moxon SJ anon Pfam-B_9087 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 23.10 23.10 23.10 23.50 23.00 22.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.57 0.71 -4.66 6 278 2012-10-03 14:45:55 2003-05-29 12:22:26 6 11 184 2 189 321 7 113.10 28 71.30 CHANGED sspGa-EFpcslcp..........tppsKslashFoGuKD.ssGcSWCPDCVpAEPVIc-uLKc.sst...-shFlhspVG-RshW+DPsssFRpssphKlTulPTLL+asss.p+..Ls-cpstpssLVEhhFsE ........................................................t......th.phhpt.............pp.s.p..s..l..alhFh.usc.D..ssGp.SWCPDCVpAEPs....lpp.s...h.....pp....hsp.......sshhlhsp.V..G.p+.s.h.W.+.c....s..s.NsFRp...phplpslPTLl+a...p..s.............s....t+.........L.tt.ph.t.phlt..................................................... 0 69 105 157 +5940 PF06112 Herpes_capsid Gammaherpesvirus capsid protein Moxon SJ anon Pfam-B_9200 (release 9.0) Family This family consists of several Gammaherpesvirus capsid proteins. The exact function of this family is unknown. 25.00 25.00 25.80 25.30 21.10 20.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.93 0.71 -4.16 8 23 2009-01-15 18:05:59 2003-05-29 12:31:16 6 1 19 0 1 19 0 156.30 31 92.09 CHANGED MuptRl+cPhVQGRLEc-aPsc.LlschssLsQsNMoss-Yshs+RsYLVFLIApapYDpYlcpppGIpRK+H.......LpuL+upt...............pssstpsSuhSussuuuouls..................lSuoSso.uhoSuPuSLsuu....................suhosSsusussossssppKK ..Ms.hRl+cPhlQGRLEpDaPspPLlschpsLsQsNhosspYthspRsYLVFLhAQapY-tYlppptGlpR+cH.............lpshRspt.........................t..hsuuhSussuussusss.su..............lusouho..ohoSussSLtuu....................ootssususutsp....t.................................................................. 0 0 1 1 +5941 PF06113 BRE Brain and reproductive organ-expressed protein (BRE) Moxon SJ anon Pfam-B_9280 (release 9.0) Family This family consists of several eukaryotic brain and reproductive organ-expressed (BRE) proteins. BRE is a putative stress-modulating gene, found able to down-regulate TNF-alpha-induced-NF-kappaB activation upon over expression. A total of six isoforms are produced by alternative splicing predominantly at either end of the gene.Compared to normal cells, immortalised human cell lines uniformly express higher levels of BRE. Peripheral blood monocytes respond to LPS by down-regulating the expression of all the BRE isoforms.It is thought that the function of BRE and its isoforms is to regulate peroxisomal activities [1]. 20.70 20.70 24.70 24.50 18.60 17.40 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.17 0.70 -5.40 2 136 2009-01-15 18:05:59 2003-05-29 12:45:02 7 3 79 0 78 128 0 247.10 45 81.89 CHANGED MSPEVALpRISP.LpPhlsSVVhNG+VGLDuTNCLRlTDLKoGCTSLTPGPsCDRFKLHIPYAGETLKWDIIFNApYPELPPDFIFGEDA-FLP-PStL.pLspWssuNsECLL.lVKELlQQYHpaQCpRLRESSRLhFEYpoLLE-PpYGcNMEIYAGKKNsWTGEFSARFLLKLPVDFSNIPsYLLK...............................................................................................Vlpaps-hhp+lslh................................F ...........................................................................................................................sDRFpLhIPY..sh-.tl+WDlIFsuphPphsPDFIF.G.......-.D......sc.FhP........-......s......s...t......l..t...s.....LspW...........ssssPcsLL..hllpELl...ppY+paQppRl.t..E..s..s..RL...hFEhpTLlpc.th.s...phcl.hs..shpps....ht.s.hl.lphs...lD..hs...p.l.P....h..h....c......p...sttsh..sh...l.s.sa.pssp.ss..ts.hP..cL.hL..SPp.l-.cuLhusssl+lPsas.s..G.hCLh-Yl.PplpphLpppV.p.slpshchRRcaItAhhshaGp.s.lEhDs..hhpKhohLh...tsFs..FLVH.ltlP.....h..FP+pQPslhhQSs.H................................................................ 0 26 38 59 +5942 PF06114 DUF955 Domain of unknown function (DUF955) Finn RD, Yeats C anon ADDA_4938 Family Family of bacterial and viral proteins with undetermined function. A conserved H-E-X-X-H motif is suggestive of a catalytic active site and shows similarity to Pfam:PF01435. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.45 0.71 -4.39 77 5058 2012-10-03 04:41:15 2003-05-29 13:02:17 8 46 2529 3 1113 3932 890 119.90 16 37.22 CHANGED scphGlplhhhs............hsttsttthth.t.......................sthlhlssp.hsttcptasluHELuHhhhpspt........................ht.tttpthEhpspth....AsthLlPpphhhtthtpthp.hp...........................lsptaplohphhthclp ..............................................................................h...........................................hh.hh.........................pthI.h.l.s....p....p....h....s....t..s.....p....p....t..a..sluHEL..u.Hhh.hp...ptt.....................................................t..t..t..p..p...p....t....h..E..h.....p....A....s.tF............AsthL..h..P..p........h..h....t....h....h.........t....h.h....th.................................................ltphh.tlohthh..t...................................................................... 1 408 776 964 +5943 PF06115 DUF956 Domain of unknown function (DUF956) Finn RD anon Pfam-B_9146 (release 9.0) Family Family of bacterial sequences with undetermined function. 25.00 25.00 49.50 49.30 24.70 18.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.50 0.71 -4.49 22 736 2009-01-15 18:05:59 2003-05-29 13:05:51 6 1 702 0 71 320 4 116.70 50 94.62 CHANGED MsQSlNTKV-LsssGTuYhGhu.cYGKIhlGDcuFEF.YN-pNVccaIQIPWpplshVhAsVh..htGK.hIsR.FpIhTc+tGpFhFuSK-otclL+hlRcalss-+lV+u.olhpsI++tF+ ..............MsQSlNppV-LsssuTSahG..lu..chGKhhlGDpuhEF.Ys-pNVccaIQIPWsclstlhAsVh......G+.hIsR.FplhTc.K.GpFhFASKDstplL+hhRcalssD+lV+..o.hhpsIppth................. 0 19 38 53 +5944 PF06116 RinB Transcriptional activator RinB Moxon SJ anon Pfam-B_9294 (release 9.0) Family This family consists of several Staphylococcus aureus bacteriophage RinB proteins and related sequences from their host. The int gene of staphylococcal bacteriophage phi 11 is the only viral gene responsible for the integrative recombination of phi 11. rinA and rinB, are both required to activate expression of the int gene [1]. 21.80 21.80 26.00 25.70 21.70 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.52 0.72 -4.35 6 399 2009-01-15 18:05:59 2003-05-29 13:10:11 7 2 218 0 7 163 0 48.60 80 91.72 CHANGED MIK+ILKIhFhluMYEluKYlTcELhlhLTuNDDVE.sPpDFs..sDHhHLN..th. ...........MIKQILRLLFLLAMYELGKYVTEQVYIMMTANDDVE.APSDa.............lhtt. 0 3 3 7 +5945 PF06117 DUF957 Enterobacterial protein of unknown function (DUF957) Moxon SJ anon Pfam-B_9300 (release 9.0) Family This family consists of several hypothetical proteins from Escherichia coli, Salmonella typhi, Shigella flexneri and Proteus vulgaris. The function of this family is unknown. 25.00 25.00 27.50 27.00 24.30 23.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.07 0.72 -4.43 5 548 2009-09-11 11:38:57 2003-05-29 13:13:47 6 2 255 0 4 177 0 63.60 84 81.52 CHANGED MphLTshoALDVLIuWLpDNIDpGS-IIFDNDEDNTDSAuLLPslE+ARpslRcLcuLu..pl+usR ........MKSLTTETALDILIAWLQDNIDCESGIIFDNDEDKTDSAALLPCIEQAREDlRTLRpLQ..LLcQNR..................... 0 0 0 2 +5947 PF06119 NIDO DUF958; Nidogen-like Yeats C, Myerscough N anon Pfam-B_1159 (release 8.0) Family This is a nidogen-like domain (NIDO) domain and is an extracellular domain found in nidogen and hypothetical proteins of unknown function [1]. 25.00 25.00 25.00 25.60 24.80 24.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.30 0.72 -3.28 27 552 2009-01-15 18:05:59 2003-05-29 13:30:38 9 172 103 0 395 611 5 85.80 32 7.07 CHANGED sNTFQslLs...oDuspoasl.FhYsp..lpWsssp.....t...t...th.ApsGFsuG-s........sphasl.Pusc..tslhsLhps.oNsGhsGhWhF+l.sst ...............NTFQsVLs......oc.us.....t....oasl.F.Ysp..l.....pWsssp.........t..........t.h.s......A....psG.Fss.Gsu..................sphasl.PuSpp......psl......h....s.....ltps....oN...s...u..hs.GhWhF+lst.t............................................... 0 136 197 302 +5948 PF06120 Phage_HK97_TLTM Tail length tape measure protein Moxon SJ anon Pfam-B_10088 (release 9.0) Family This family consists of the tail length tape measure protein from bacteriophage HK97 and related sequences from Escherichia coli O157:H7. 23.80 23.80 23.80 28.00 23.50 23.70 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.95 0.70 -5.08 2 304 2009-01-15 18:05:59 2003-05-29 14:09:55 6 6 199 0 11 409 2 245.40 55 27.07 CHANGED QpsLNSsTuVhshlhsGAhG...LlGGlPGllMLGAGAWYshYQpQEQARpSAhpYAsTIEplRsph+pMS.sphosN.upsp.uLctQNphIppQcpKltplpstl..YpthLAssuso.ssahhN......-AspchAs.sstLAVEptRLpQM.sKppphQpllpslpcptl....ppssE.stshpSLh.M......asRL....Npl..uRQuhAs.P.+.....lPt...sppQppAlEKupRpLELStLpG.sKthtphtasAsDLsLsss......RpthlshtlEohRp.pA..Npsp+KGs ...............................QptLNssTuV...GoRLhoGALG......LVGGlPGLl....ML....G..A.u.AW.YTL..Y...QNQEQARESARQYAhTIDEIspKssuMSLPEhoDNEu+TRtALsEQNRLI-EQtS+l+uLpcK...It..s.Yp...h.sLss.su.hss.s.Ghh..l...s...s...tsVT-sLApATcp.LA...VEQsRL....sQMQp..KupS.IQ-lL..A.GLE-RRV.A....LI...RQQA.AEQN...+..sYQShLl.M..N.GQaT.E.....FNRLLGL.GNELLQQ..RQ.GLVNVPLR..................................LPQAT....LD..DKQQoALspoERE.LAL.SRLKGE.tKERsRLGYAADDLGF.VG.-....sYQpARQpYIsNuL-AWRNNps..NKPKu.................................................................................. 0 0 1 9 +5949 PF06121 DUF959 Domain of Unknown Function (DUF959) Yeats C anon Pfam-B_25471 (release 8.0) Domain This N-terminal domain is not expressed in the 'Short' isoform of Collagen A [1]. 25.00 25.00 27.00 27.00 24.90 24.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.57 0.71 -4.67 3 37 2009-01-15 18:05:59 2003-05-29 14:10:13 9 19 24 0 16 34 0 173.50 48 14.16 CHANGED LLpCpLsuAcA-ssSLSs.p..sWLWhPpT-sS.sAoolucPQuSoPVQSTE....sTTTHVVPRsGpTEpuTTPASSE.PsEl.....lE-GcQpss.GssssTPTVs.shhssAuSPDh..........sEENIAGVGAEILNVAcGIRSFVQLW.EDoVsscS...ApT.VPDTsl.PhVLAoP..lSSsPQsssTTLW.SSuIPSSPuApTTEAGT...LuuPTpLP .........LL.CsLsuApAchhsLs......WLW.sppsss.htsslscPpss.sVQsTt....ssTTHVsPps..G.TE.tTss.uSsc.P.E.......Etupt.........oPo.......sAtSPDh..........pEENIAGVGAKILNVAQGIRSFVQLW...cDoss.scS...ApT.sssoss.PhsLPsP......SSsPQpssTTLh.SpshsSSPsspTTEAGT...LssPT.................................. 0 1 2 7 +5950 PF06122 TraH Conjugative relaxosome accessory transposon protein Moxon SJ, Coggill P anon Pfam-B_10166 (release 9.0) Family The TraH protein is thought to be a relaxosome accessory component, also necessary for transfer but not for H-pilus synthesis within the conjugative transposon [2] [3]. 25.00 25.00 28.50 28.40 24.40 24.30 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.28 0.70 -5.64 41 471 2009-01-15 18:05:59 2003-05-29 14:14:35 6 3 341 0 63 366 37 338.70 34 76.51 CHANGED hsshF.sshsh.uNsTsPuuapuQstGaaoGGulhsRssh..cshpLhslshPshpAGCGGIDhFhGuFSFINu-plVphh+sIsuNAs....uaAFpLALpslsPplspshppLpphsptlNphshsSCphApulVsshh..spt..sss.ppphsps...hu..spsuhhsDahsu.pptstsssppss...............hstsssssttpp.hhhstNlsWpulp+sshh...........tsspphtEhlMSlsGTlIh.......sssussh.hst.hssssshlss......LlsGss...t.......hclapCs...ss.....spCl.......s.pshslstt.............pulpsplpphl..............puIhsKlts.c.....ssLos....scpsFl.ss..oslPlhchlphtssh.thshusp.hpphu-hlAh-lhhphLpph ....................MsphF.sph...tuNsTpPusapuQstGh.hsGGSlhsRspl..ps..hpLlShshPshsAG...CGG.IDhahGSFSFINu-Qlhphl+pIhuNAs....GYhFpLALpshsPchpssh-hLQchspplNphshsSCphAQulVsshh......spt....pss.pppssps...lu.......spsshhuD..asuu.pptss.suuppss......................hspAssp..pccthhhshNlhWpALp+sphh................................tuspcLtEhlMoloGollh.................sssuphs.h.ss.hs.ss..ps...hlps....................hhc.GGs.............................sclYpCs..ss..........spCL.......sssssolspt......................................puLpsplpphL.........................suI.sKhss..c.....ssL.ss....pEKuFl.ss..oslP....lhphl..hss.....ususshh.phs-hIuh-hhhpalp-............................................................................................... 0 18 33 50 +5951 PF06123 CreD Inner membrane protein CreD Moxon SJ anon Pfam-B_10187 (release 9.0) Family This family consists of several bacterial CreD or Cet inner membrane proteins. Dominant mutations of the cet gene of Escherichia coli result in tolerance to colicin E2 and increased amounts of an inner membrane protein with an Mr of 42,000. The cet gene is shown to be in the same operon as the phoM gene, which is required in a phoR background for expression of the structural gene for alkaline phosphatase, phoA. Although the Cet protein is not required for phoA expression, it has been suggested that the Cet protein has an enhancing effect on the transcription of phoA [1]. 19.70 19.70 20.00 19.80 18.10 19.40 hmmbuild -o /dev/null HMM SEED 430 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.13 0.70 -5.94 48 880 2009-01-15 18:05:59 2003-05-29 14:19:36 7 2 798 0 101 509 60 380.70 47 94.10 CHANGED pslhhKhhhluhLhLLLlIPLhhlpslIpERpphpppslscIupShuupQplsGPllslPap.cphpppp....stpthpphpp.....................hhhlLP-pLslsuphpsc.RpRGIYps.VYpupsplpupFshs...t.p.phhstpplh.........hspshLsluloDh+GIpsssplplsG.p.slsh.........pPGs.............thsthsp..Gl+sslsthshpps.........tlsashsLpLpGoppLullPlGcsoplsLpSsWPHPSFsGsaLP..spRplossGFpApWpsothupshsphh..........ttss.sshsttu.............huVshlpPV-tYpps-RAsKYulLFIsLTFhuFFlhEllpphtlHPlQYlLVGLALslFYLLLLSlSEHlGFshAYlluususlhLluhYlsslL+uh+puhsauuhLssLYulLYslLptEDaALLhGSllLFhlLuslMhlTR+lDWYs ...............................................................................s.sLhaKhssLhshhlLLLIPlhhlcplIsERucYRscV.ssIppSoSGsQ+llGPllslPhoEhhpsp-......ppKpsppp+sh....................hhahLPEsLhVcGs.ss.Et.R+hGIYpupVapu-hsl+AcFcls.........ph..p-l.s..t..sslt.........hucPalVluluDsRGIsslps..plsG......p.sL.sl............................EPGs.......................................ultpsspGl......HhP..Lsps.shtpp.............................sLplsh.sLsLsGTuslSlVPsG+sochsLsSNWPHPSFhGsFLP...scRclo..ts....GFpApWpoShh...AsN.hsppa......................sss..pph.sh...shsu......................FoVulhsPsDpYQlo-RAsKYAILhIsLTFhuFFlFEsLosp...RlHPhQYLLVGLuLVh..FYLLLLuLSEHlGFshAallASLhsslh.ulYLpuVL+uh+puhlF...shuLlhL.GlhauL.LpStDsALLlGoslLhlsLuuhMalTRplDWYt.......................................... 0 29 52 73 +5952 PF06124 DUF960 Staphylococcal protein of unknown function (DUF960) Moxon SJ anon Pfam-B_10198 (release 9.0) Family This family consists of several hypothetical proteins from several species of Staphylococcus. The function of this family is unknown. 25.00 25.00 25.80 25.70 24.10 23.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.94 0.72 -3.74 11 633 2009-09-11 15:24:04 2003-05-29 14:22:46 6 1 565 4 34 211 0 93.80 41 90.95 CHANGED sRYAShGlVoSLPs-lIDshWhII....DppLKsVhsL-sllpFpLlNspGplol+FSQcpssshlshDhshhasPha..Pp+VallDpss+ET....IlLPcE .......tRaAShGlsoSLPs-lIDohWhII....Dc.hLKsVhcL-plLpFpL.h..s.s.pG.....p....l...o.h+FSppp.sst..l-....aDasp..a..cspa..Pt+VhVlDpDspET......ILLPEE........ 0 8 15 25 +5953 PF06125 DUF961 Bacterial protein of unknown function (DUF961) Moxon SJ anon Pfam-B_10221 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 26.20 26.00 24.40 23.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.18 0.72 -3.84 12 903 2009-01-15 18:05:59 2003-05-29 14:24:23 6 1 354 1 46 274 8 100.40 42 88.16 CHANGED .hchlVs-hpcTFGsLcFuu.sc.lhtps.sGphs....hpRoYsLhSslQ.uc.I.VolPApAG.KcFs.ptcVcLlNPhlsshuhthhp.G..hssahlcADDlVh ..................hphlV.Dh-pTFGpLcFuu.cc.Vh.ps........tsGssos...hpRoYsLhSssQ.uc.I.VslPAp..ss.K..cF....sYptcVcLlNPhhs.shuhthhp.G.....ssahl+ADDlVh..................................... 0 22 33 35 +5954 PF06126 Herpes_LAMP2 Herpesvirus Latent membrane protein 2 Finn RD anon Pfam-B_9147 (release 9.0) Family Family of Kaposi's sarcoma-associated herpesvirus (HHV8) latent membrane protein. 20.80 20.80 21.20 139.90 19.90 20.70 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.76 0.70 -6.09 2 12 2009-01-15 18:05:59 2003-05-29 14:30:16 6 1 3 0 0 12 0 457.10 70 99.80 CHANGED ph+hahWshWhhALlsChhClolh.ClhT....NoI.hhusl.Vhh.LFsohhNhYsQ...Spp.+a.uS..lG...IluCIsht.WshoTpsolohsClhhhulLSllTAhLuLhtphpssKhpl-pGlLChshhhVLIhsMhlph.NsWpps.hFhPl.hhL.l.FlahFATspssslKLsusV.hICuGllhuhPs.hChoHoChushhuhslSsIalGhTGlhhTh++pWhssp+GlhoFLLLQGGVLsT.ThThtlLhIpppp.sN.cGp.hLLhsChhhLaChasWQSFpKASLssGhLaLhhAWopsGsCVpLVhLhssGhTpGlhohlICl.slhSThQulLVhYLY+Ep+lVuhNsh.ppRh.IYT.cps.Htps.......NHLuppl...PPLPsh.h..sRl.SpsTD......RspsopshsplEhQplpp-..pshsYASILssss..spcsSsp.sQSGhS..pVsssushplD...sshQPsD-lYEEVLFPps ....hhhFFWNLWLWALLVCFWCITLV.CVTT....NSIDTMASLLVMCILFVSAINKYTQAISSNNPKWPSSWHLG...IIACIVLKLWNLSTTNSVTYACLITTAILSLVTAFLoLIKHCTACKLQLEHGILhTSTFAVLhTNMLVHMSNTWQSSWIFFPISFTLSLPFLYAFATVKTGNIKLVSSVSFICAGLVMGYPVSCCKTHTCTATAAGLSLSSIYLGFTGIISTLHKSWAPPKRGILTFLLLQGGVLTTQTLTTELLAITSTT.GNIKGHEILLLVCLIFLWCLYVWQSFNKASLVTGhLHLIAAWSHTGGCVQLVMLLPSGLTRGILTMIICISTLFSTLQGLLVFYLYKEKKVVAVNSYRQRRRRIYTRDQNLHHND.......NHLGNNVISPPPLPPFFRQPVRLPSHVTD......RGRGSQPLNEVELQEVNRDPPNVFGYASILVSGAEESREPSPQPDQSGMSILRVDGGSAFRIDTAQAATQPTDDLYEEVLFPRN............. 0 0 0 0 +5955 PF06127 DUF962 Protein of unknown function (DUF962) Moxon SJ anon Pfam-B_10320 (release 9.0) Family This family consists of several eukaryotic and prokaryotic proteins of unknown function. The yeast protein Swiss:P25338 has been found to be non-essential for cell growth. 24.00 24.00 24.00 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.21 0.72 -4.09 66 1112 2009-01-15 18:05:59 2003-05-29 14:30:38 6 4 665 0 460 999 358 125.90 25 82.98 CHANGED apoFt-FaPFYLspHpssssRtLHalGoslllhhlhhs.lhssp......................................................................h...hhllsh.lsGYuFAWlGHFhaEKN+PATFcaPLaSlhuDahMahphlsG+l ......................................................................................................................................................tshtcahshY....hs.Hps.hNh.tlHhlulshll...h...slhhh....lsph.s................................................................................................................................................h.hhsl.u..lhl...lGa..s...h.Qa..l.....GHh.aEtp+PAh.hcs.l...uLhhs.hhlht.lhh..h.............................................. 0 141 254 370 +5956 PF06128 Shigella_OspC Shigella flexneri OspC protein Moxon SJ anon Pfam-B_10333 (release 9.0) Family This family consists of the Shigella flexneri specific protein OspC. The function of this family is unknown but it is thought that Osp proteins may be involved in post invasion events related to virulence. Since bacterial pathogens adapt to multiple environments during the course of infecting a host, it has been proposed that Shigella evolved a mechanism to take advantage of a unique intracellular cue, which is mediated through MxiE, to express proteins when the organism reaches the eukaryotic cytosol [1]. 23.60 23.60 23.60 35.20 23.50 23.30 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.81 0.70 -4.91 3 72 2009-01-15 18:05:59 2003-05-29 14:34:55 6 1 24 0 2 67 1 245.20 77 66.94 CHANGED VEISNH.QcK+PLN+KHHTVDFGANAYIIDHDSP...YGYMTLTDHFDNAIPPVFYHEHQS.FLDsFsEVscEVSRYVHGuKG+pDVPIFNTKDMKLGlGLHLIDFIRKSKDQuFKEFCYsKNlsP.VuLDRIINFVFQsEYHIPRMlST-NFKKlKLR-ISLEEAVcASNYEEINspVTsKKhAlQALaaSIsNpK-DVALYLLSNFcFT+QDVAShc+.......sLY.DlEYLLScHGASsKVLEYFIN+GLVDVNsKFcKsNSGDTMLDNAlKYcNuEMIchLLKaGAh.D++a .......VEISsH.QcphPLNppHHTVDFGANAYIIDHDSP...YGYMTLTDHFDN......AIPPVFYHEHQS.FLDpFpEVs-EVSRYVHGspGppDVPIFNTKDM+LGlGLaLIDFIRKScDQuF+EFCYsKNlsP.VsLDRIINFVFQ.EYHIPRMlST-NFKKl+lR-ISLE-AlpASNYEEINppVTsKKhAhQALhhSl..sNtKtDhALYlLSpFphT+QDVhphc+.......pLY.DlEYLLStcsushKVLEYFINpGLVDVNp+FpKsNSGDsMLDNAhK.csucMIchLLK.GAh.sp+a............................................. 0 2 2 2 +5957 PF06129 Chordopox_G3 Chordopoxvirus G3 protein Moxon SJ anon Pfam-B_10417 (release 9.0) Family This family consists of several Chordopoxvirus specific G3 proteins. The function of this family is unknown. 22.30 22.30 22.40 22.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.26 0.72 -3.91 12 50 2009-01-15 18:05:59 2003-05-29 14:37:30 7 1 42 0 0 33 2 107.80 52 98.97 CHANGED usLl.lhFFllFLllsYahsahPTNKhpluVpphs.-ttlhKptssshss.hhsThlFscs-phlssplpshYcupputVslhpsscKhsFpLshccDVRsLLPILLLSK .u.sLLYLllFllFlsluYYFoYYPTNKLQhAVhEpscENAII+QRN--l..Po.oL-TsIFTcssolsSupIpLYYNSshG+llhuhN.uKK+TFNLhcDsDIRTLLPILLLSK.. 0 0 0 0 +5958 PF06130 PduL Propanediol utilisation protein PduL Moxon SJ, Bateman A, Finn RD anon Pfam-B_10447 (release 9.0) Domain This family consists of several bacterial propanediol utilisation protein (PduL) sequences. The exact role of this protein in propanediol utilisation is unknown. Sequences containing this domain usually have two tandem copies (Bateman A, pers. obs.). 25.00 25.00 26.90 30.00 19.20 18.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.26 0.72 -4.16 125 1579 2009-01-15 18:05:59 2003-05-29 14:40:30 7 10 646 \N 291 1010 41 75.90 35 69.09 CHANGED phsVhlusRHlHhopcDscthhs.............psGphsshcs...sGs+.shhcsVhlhuss+hpsphclsts-ApuhGlp .........s..slhVupRHIHho.pDsctLhs.............psGphsshch...sGs+.shhcsVhlhss.s+htsphcIsts-ApshGl..... 0 156 224 254 +5959 PF06131 DUF963 Schizosaccharomyces pombe repeat of unknown function (DUF963) Moxon SJ anon Pfam-B_10581 (release 9.0) Repeat This family consists of a series of repeated sequences from one hypothetical protein (Swiss:Q96WV6) found in Schizosaccharomyces pombe. The function of this family is unknown. 21.00 21.00 21.30 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.47 0.72 -4.68 8 150 2009-01-15 18:05:59 2003-05-29 14:45:17 6 5 8 0 146 151 2 35.90 79 70.47 CHANGED ITSSoslNSSTPITSSTslNSSTPIsSSSlLNoSTP .....ITSSoVLNSSTPITSSTVlNoSTPITSSoVLNSSTP....... 0 144 144 146 +5961 PF06133 DUF964 Protein of unknown function (DUF964) Moxon SJ anon Pfam-B_10600 (release 9.0) Family This family consists of several relatively short bacterial and archaeal hypothetical sequences. The function of this family is unknown. 22.10 22.10 22.40 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.31 0.72 -4.03 233 2444 2009-01-15 18:05:59 2003-05-29 14:52:23 6 3 1220 12 316 1032 15 107.70 22 85.69 CHANGED slhDpAppLsctlppo-EapshcpucptlpsspcupplhpcFpphQpph.pphpphGc..s..ccsppchpphtpplptpshlppahpuppplsplls-lsphIspsls-h .........sIhDpAppLsctlpps-phpshcpucptl.psspcsp..plhpcapphQpph..pphp.phGc.hs.....ccstpchpphtpplptpsllppahpsppphspllp-lsphItpsls-................. 0 99 196 262 +5962 PF06134 RhaA L-rhamnose isomerase (RhaA) Moxon SJ anon Pfam-B_10641 (release 9.0) Family This family consists of several bacterial L-rhamnose isomerase proteins (EC:5.3.1.14). 19.60 19.60 19.60 19.80 19.40 19.40 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.41 0.70 -5.57 5 861 2012-10-03 05:58:16 2003-05-29 14:57:12 6 2 844 20 102 524 40 402.70 65 97.99 CHANGED MslKpsYEpAKpcYppWGIDVEEAL+pLKQlPISIHCWQGDDVsGFElscGELSGGIDVTGNYPGKApTPEELRpDLEKALSLIPGKHRVNLHAIYAETDtEsVERDEIEP+HFENWV+WAKcpGLGLDFNPTLFSHPKAcDGLTLAHPDc-IR-FWI-HCIASRKIuEYFGKELGTPuLTNIWIPDGYKDIPSDRLTPRKRLcESLDcIFuEEIDEpYNlDAVESKLFGIGSESYVVGSHEFYMGYALoNcKLCLLDTGHFHPTEsVSNKISSMLLYoDcLALHVSRPVRWDSDHVVlLDDELREIALEIVRNDALD+VpIGLDFFDASINRIAAWTIGTRNMIKALLaALLpPsucLKcLQEEGDYTcRLAlhEEhKTYPFGAIW-hYCEpMsVPVGE-WLcEVKtYEKEVLLKR ..............................................................................................plEQAaElAKpRaAAlGlDVEcAL+pL-+lPVSMHCWQGDDVsGFEN.P-.GuL.T.G..GIQATGNYPGKARNusELRuDLEpAh.............p.LIPGsKRL.NLHAIYL...E.oD...p.s..VsRDpIcPEHFcsWVEWAKtNpL.GL...D...F...NPoCFSHPh.Su.D.GF...TL...SHsDcpIRQFWI-HCKASRRlSAYFGEpLGTPSVMNIWIPDGMKDI..T.VDRLA.P.R.Q.RL.ls.ALDEVlSE.K.lcPAHpIDAVESKLFGIG...AES..YTV..G..S..NEFYM..GY....A....sS....R......p.s...s.L..CLDAGHFHPTEVISDKISAshLYVPpL.L.LHVSRPVRW.DSD..HV..V..L...LDD...ETQAIAsEIV...R.c.....c.L..h...D.........R...V...H...IG..L..D.FF.DASIN...RIAAWVIGTRNMpKALLRALLEPTspLRcLEssGDY.T.u.RL.AL.LEEpKSLPWpAVW-hYCQRpcsPsGu-WL-sVRsYEKclLSpR................................................................................................. 1 34 68 84 +5963 PF06135 DUF965 Bacterial protein of unknown function (DUF965) Moxon SJ anon Pfam-B_10661 (release 9.0) Family This family consists of several hypothetical bacterial proteins. The function of the family is unknown. 25.00 25.00 27.30 39.10 24.40 17.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.63 0.72 -3.93 34 1369 2009-01-15 18:05:59 2003-05-29 15:32:34 7 1 1357 0 173 496 5 78.80 59 90.15 CHANGED hDcTMpFchscpc.pppl+-lLpsVYpALcEKGYNPINQIVGYLLSGDPAYIspap-ARsLIR+lERDEIlEELV+sYLp ................DcTh+Fc.h.c-sp..cp.clp..-sLpsVYpuLpEK.GYNPINQIVGYLLSGDPAYIPRaNsARN.IR+hERDEIlEELV+hYLc.. 0 76 119 147 +5964 PF06136 DUF966 Domain of unknown function (DUF966) Finn RD anon Pfam-B_8637 (release 9.0) Family Family of plant proteins with unknown function. 22.40 22.40 22.70 42.50 20.70 20.10 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.35 0.70 -5.02 15 135 2009-01-15 18:05:59 2003-05-29 15:43:41 8 4 19 0 91 127 0 321.00 29 72.73 CHANGED VsVVYYLoR.NGp..LEHPHFlEVhluSpsG.......LYLR........DVIsRLssLRG+GMAuhYSWSsKRSYK..NGFVWaDLu-.DDlIhPs...sspEYVLKGS...............Ellcss.p....................s..sp.thps......p...spp.p....ssshsshs++pstsh...........................................uShshsEY+lhKsp-.tt.......tt..huuDASTQT--shptpp.h...................................sspscsspls+p-h.SPssos......tuss-oLEsLh+A-uphhpu.phhppppht..................+h+AoulLMQLISCGuhSsKc.........tpuhthspph+sphspu.hsp...tts................ph....tphphE-KEYFSGSLlE..TKppp........thsuLKRSSSYNs-Rusph ....................VtVVYYLsR.sup..LE.HPHahEVshsu..p.s...........LhLR...................................DVhpRLssLRG+GMsshY......SWSsK...RsYK..NGaVWpDLs-.-DlIhPs....pu.s.EYVLKGS...............Elhptsss..................................................................p.t...pp........t.......t.t........tss.ss...p.t.pptts..............................................................................................................................................................ss.s...tchps..cspt..................hstssuTpTccptpt.t.t.........................................................ttptptsplspcph...ssssss.......tsps....pshc.slhpscs...p.hh.ts.p..p..p...t.........t......................+h+sus.lLh.QLIo.CG.uhuspp..........tshh.h....hp.ph..............................................h....ht.th.cp-hFSGullp.pt..t...........s.L+pSsuhs..h................................................................................................................................................................................................................................................................................................................. 0 17 54 74 +5966 PF06138 Chordopox_E11 Chordopoxvirus E11 protein Moxon SJ anon Pfam-B_10685 (release 9.0) Family This family consists of several Chordopoxvirus E11 proteins. The E11 gene of vaccinia virus encodes a 15-kDa polypeptide. Mutations in the E11 gene makes the virus temperature-sensitive due to either the fact that virus infectivity requires a threshold level of active E11 protein or that E11 function is conditionally essential [1]. 21.00 21.00 22.80 51.10 19.20 18.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.54 0.71 -4.05 8 48 2009-01-15 18:05:59 2003-05-29 16:57:42 7 1 34 0 0 33 0 127.00 59 97.10 CHANGED MELVNIFLESDsGRVKLth-.ssptCtpp.tsphh+AlchFlslL+KYIcV-cSTFYLVIKDpDIFYFKhDKGplo.l-NEFaTFscsLhFlc.sa..scITGIpFllTDTMslpIhP+sshtVlupSsNp+aY MELVNIFLETDuGRVKhsIcNs-chCsop.......hIs+FlElLucYI+l-pSpFYLVVKD.KDIFYFKCDRGSISlVsNEFYV.FDEsLLFVc.Da..opVTGVEFIVT-TMPs+IlPKssaAVISsssN+KFY...... 1 0 0 0 +5967 PF06139 BphX BphX-like Finn RD anon Pfam-B_8664 (release 9.0) Family Family of bacterial proteins located in the phenyl dioxygenase (bph) operon. The function of this family is unknown. 24.70 24.70 24.90 177.70 23.50 24.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.80 0.71 -4.58 7 27 2009-01-15 18:05:59 2003-05-29 16:59:10 7 2 26 0 4 18 1 135.70 63 97.89 CHANGED MK..........psRsFLlAlGlFYLhNLlGTLPFtshuLhshMYPGVthpsutPhFpLLpDAWhVVGLQLuAIGlVALWGARDPhRYh.AllPVVIsTElVsGlWDhYSlsWu+.AlhFuLsTLllHslWIsWuLhshRuspppt.......t ....MKpuRlFLIAlGlFYlhNLlGTLPFuohGLFuhMYPGV-LcsGsPlFsLLpDAWAVVGLQLGAIGsVALWGARDPhRYh.AVlPVVIATEVVDGLWDFYSIlWSHEAhWFGLsTLlIHslWIsWuLaAWRAhtpp.sh. 0 1 3 4 +5968 PF06140 Ifi-6-16 Interferon-induced 6-16 family Finn RD anon Pfam-B_9299 (release 9.0) Family \N 25.20 25.20 25.20 25.30 24.80 25.10 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -4.20 27 261 2009-01-15 18:05:59 2003-05-29 17:08:26 8 6 81 1 139 237 0 77.60 48 56.27 CHANGED AslG.Gsh...AVsAsPllLuAlGFTuuGIAAuSlAApMMSuAAlANGGGVAAGSlVAsLQSsGAAG..LSssusslluusGuslGuhl .....................hhG.ush...sV...sus....Ph....s....Lu.....AlGFTuuGIAAuSlAApMMSsuAlA.NGGGVuAGSLVAsLQSlGAsG.....luh.s.u...p.h.hlussGushsh..h...................... 0 52 68 93 +5969 PF06141 Phage_tail_U Phage minor tail protein U Finn RD anon Pfam-B_9209 (release 9.0) Family Tail fibre component U of bacteriophage. 25.00 25.00 26.90 26.50 20.40 19.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.62 0.71 -4.13 8 798 2009-01-15 18:05:59 2003-05-29 17:25:50 6 2 357 23 15 274 3 127.30 64 99.43 CHANGED MpKHopIRpAVLsALcpphusssoaFDG+PuFl-.Ep-LPAVAVaLTDApYTGphlDpDsWQAsLHltVFL+AQAPDSELDhWMEp+IaPALp-VsGLssLIsTMsspGYDYQRDDEMAhWuuADLoYpITYsh ........M.KHT-IRAAVL-ALc.cp.....cs.u......ATh.FDGRPulh.........D....EpDhP.AVAVYLT.-ApY.TGEplDs.DTWpApLHIEVFL.A.Qs.PDSELDtW.......ME..S+I...YPAhs...sIPALS..sL..IsoMls..pGY-YRRDD-huhWuSAD.LTYsITYEM............ 0 0 3 8 +5971 PF06143 Baculo_11_kDa Baculovirus 11 kDa family Finn RD anon Pfam-B_9424 (release 9.0) Family Family of uncharacterised Baculovirus proteins that are all about 11 kDa in size. 22.20 22.20 22.20 22.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.88 0.72 -4.62 16 47 2009-01-15 18:05:59 2003-05-29 17:40:57 6 1 45 0 0 42 0 85.70 38 88.72 CHANGED Mppssuplsst.s...............sSll-tDQLsQlVoRNRoFl+DFlLVICuhlVFVhIlLFllLlhsI.psh-h.ptp+.phppshLuNhDh+ ...............Mp.pstthhs...s...............sSshstDQLpQlV..sRN+oFl+-FlL.VlCuh.llFVhlllFlhLl..hsI.pshEh.....psp+.phppshLtNhDhR...................... 0 0 0 0 +5972 PF06144 DNA_pol3_delta DNA polymerase III, delta subunit Finn RD anon Pfam-B_9452 (release 9.0) Family DNA polymerase III, delta subunit (EC 2.7.7.7) is required for, along with delta' subunit, the assembly of the processivity factor beta(2) onto primed DNA in the DNA polymerase III holoenzyme-catalysed reaction [1]. The delta subunit is also known as HolA. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.62 0.71 -4.75 24 4182 2012-10-05 12:31:09 2003-05-29 17:51:43 8 10 4065 11 910 3675 1752 168.00 20 49.90 CHANGED YllhGp-.hLlpcsppplhptshppshp-hshhph-hsps.hp..lhpphpohshFus++llhlp.s.........psthstp.hppLpphlpphsscslLllhus..KLscchc...hhKhLpp...puphlcsps.....ctpplhpalpphspphslplspcuhphLhthhpsshttltpplp+Ltl ........................................................................llhGs-.hL..hpctt.s...t...lh.pt.....h......h......t....p.....s..h......p............-....h.p.....h..h...p..h...-......h......p...p...s.........h..p.............l....h.....s.......p.s.ps.h...s.h...F....u...s..c......+...llllc.s..........................ppt.spp...t...h....c..t....L.h.p....h..h....p....p.........s.....s....s...sl...L...ll..ht...s....+....lsc.ppc......................hhctLp..p.........pu.h..h..l.p...s......t.p...........ct.p.pl....pa.lp......pc.h....p.p.t....s...l.....p.....l.....s.....s.....s.....A.......h.p.......hL....h......p.......t....h.......s.......s.......s....lhtltpplp+Lt........................................................................................... 1 308 602 775 +5973 PF06145 Corona_NS1 Coronavirus nonstructural protein NS1 Finn RD anon Pfam-B_9242 (release 9.0) Family Bovine coronavirus NS1 encodes a 4.9 kDa protein [1]. 25.00 25.00 29.10 29.00 18.20 16.90 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.90 0.72 -7.33 0.72 -4.64 2 37 2009-09-11 06:11:25 2003-05-29 17:53:54 6 2 34 0 0 20 0 28.80 90 77.53 CHANGED MphKFVFDLLsPDDILHP.NHVp.IIRPI MpTKFVFDLLAPDDILHPSNHVNLI.IRPI.. 0 0 0 0 +5974 PF06146 PsiE Phosphate-starvation-inducible E Finn RD anon Pfam-B_8639 (release 9.0) Family Phosphate-starvation-inducible E (PsiE) expression is under direct positive and negative control by PhoB and cAMP-CRP, respectively [1]. The function of PsiE remains to be determined. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.88 0.72 -3.74 96 1842 2009-01-15 18:05:59 2003-05-29 17:55:37 7 5 1500 0 409 1034 1208 68.20 32 49.00 CHANGED lsslLhlllhlElhphlhtYhcpp.cl..tlphllhhAlhAlsRtlIl.htsthps..h.....htlussllhLuh ..............thlhhFlhFEhluh.l.l.p.Y.hpss...Hh..PlRahlhIuIoAllRhlI.ls.cc.s.shss.................lhhuuulLlLs................................. 0 88 221 323 +5975 PF06147 DUF968 Protein of unknown function (DUF968) Finn RD anon Pfam-B_9463 (release 9.0) Family Family of uncharacterised prophage proteins that are also found in bacteria and eukaryotes. 30.10 30.10 30.20 30.20 29.80 30.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.32 0.71 -4.41 23 1469 2009-09-11 20:49:19 2003-05-29 18:00:05 6 7 701 0 60 1205 78 190.80 45 67.20 CHANGED sspcLp.shsppplssh...slptplphsDs+plohtphph.aAltNDl......tphh...PpsshR+hhph.hphhpuh.....-sclshup.s.so...pphtpclh..................chllshsh-s-sPhsahppsthpp...cpalha.ltspsCslCG....+tssDhHHl....IG+Gps+hthctaDhallsLCRcHHpElHp.sspsF-cKYthph..lhlpcpls+tLsls ...............................................................................s.s-pLp.phsppNhssWhlsllccshshspp+clolsELsW.WA.lpNpl.................sDsL..............PEushR+uL....t...l.t..c.tlpohh....RESDIlPu.c.psA.T...sIl.pp+sK.....................cpVlultVDPESPt........Sahp+.......sKh+.Rh.........ccYTRW..VKoQsCss.C.....G.......KP.AD.......DPHHl...........I.GHG..p.GGMG.T.....Ku.aDlFsL.PLCR....cHHs......E......LHA..s...shuFE..-KaGSQl...LlhRFls+Ahsh.......................................... 0 11 27 47 +5976 PF06148 COG2 COG (conserved oligomeric Golgi) complex component, COG2 Finn RD anon Pfam-B_9559 (release 9.0) Family The COG complex comprises eight proteins COG1-8. The COG complex plays critical roles in Golgi structure and function [1]. The proposed function of the complex is to mediate the initial physical contact between transport vesicles and their membrane targets. A comparable role in tethering vesicles has been suggested for at least six additional large multisubunit complexes, including the exocyst, a complex that mediates trafficking to the plasma membrane. COG2 structure reveals a six-helix bundle with few conserved surface features but a general resemblance to recently determined crystal structures of four different exocyst subunits. These bundles inCOG2 may act as platforms for interaction with other trafficing proteins including SNAREs (soluble N-ethylmaleimide factor attachment protein receptors) and Rabs [2]. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.47 0.71 -4.28 19 316 2012-10-03 17:31:52 2003-05-30 09:15:08 6 10 269 1 222 448 1 134.30 27 23.60 CHANGED LsFscspFht................................................ssFssspFlpp.ppptssL-pLpp-LctY.phLcppllcLlNpDYtD.FVsLSssLhGh-cplppl.................cssLtphpcclpuh+splptthpclpsplpc+pplcpp+....phLchLlphtpsls+lEcll ...............................................tpttFht..................................................ssF.D.s...-pFls..........p...s..........c.....+..ts..sL-s...L...+...s...-...Lcth...h....phLpspll-LINcDYsD.Fls.......LSs...sLsG..h.-.....c....t.....lppl..................ps..sL...t...p...h...+.c..c...l..ts.l..+s.p.....l.ppthptlpptlpcpcplcppc.......thlptllpl.hcplpclEph................................................................................................................................ 0 73 126 185 +5977 PF06149 DUF969 Protein of unknown function (DUF969) Finn RD anon Pfam-B_9723 (release 9.0) Family Family of uncharacterised bacterial membrane proteins. 24.20 24.20 24.30 29.80 24.10 24.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.23 0.70 -5.09 29 934 2009-01-15 18:05:59 2003-05-30 09:23:58 7 2 917 0 119 446 13 214.90 46 92.60 CHANGED hhsLlGlsllllGFhL+aNslLVVhlAullTGLsAGhshhclLsslGpuFlssRhlolhhLl.LPVIGLLERaGL+E+ApshIs+l+uATsGRlLhlYLllRploAAlGL.uLGGHsQhVRPLlAPMAEuAAcsphG....cLscpp+-+l+AhuAAsDNlGhFFGpslFlAhGulLLhpuhLcph.Ghp.l-shplulauIPTAlsAhlIauhRhhhhD+pLt+ ....hlhLlGIsllVlGFhL+hsslLlVhlAGllTuLluGhuhs......clLphlGcsFlssRslslFllh.LPllGLLERaGLK-pAtsLIpKlKuhTsG+lLhlYhhlRplsAAhul.slGGHsQhVRPLls..PMAEuAAc.sphG................cLscc.c-clKAhAAAs-NhGhFFGpslFlAsGulLLlpuhlcph....Gh.c..lp....shplAlhuIPsAlhAllltulphhLhD++Lt+.......... 0 26 59 93 +5978 PF06150 ChaB ChaB Bateman A anon Pfam-B_7743 (release 9.0) Domain This family of proteins contain a conserved 60 residue region. This protein is known as ChaB in E. coli and is found next to ChaA which is a cation transporter protein. ChaB may be regulate ChaA function in some way. 20.60 20.60 22.50 22.20 19.50 18.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.76 0.72 -4.17 56 786 2009-01-15 18:05:59 2003-05-30 09:37:16 7 2 707 1 126 393 17 63.10 55 67.80 CHANGED LPtslp.pLPt+ApcIahcsFspuhcpa..ts...........EpsAt+lAWsAV++cYhK.hssc...W..ls+s ..........LP-oV+pVLPuHAQDIY+EAFNSAW-QY..KDttcRR.scsSREETAHKVAWA.AVK+-YtK..s-DDK...W+KKp............. 1 32 76 101 +5979 PF06151 Trehalose_recp Trehalose receptor Finn RD anon Pfam-B_9846 (release 9.0) Family In Drosophila, taste is perceived by gustatory neurons located in sensilla distributed on several different appendages throughout the body of the animal. This family represents the taste receptor sensitive to trehalose [1,2]. 20.30 20.30 20.40 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.33 0.70 -6.12 8 405 2012-10-01 21:54:26 2003-05-30 09:38:08 8 5 31 0 147 529 0 255.80 24 92.58 CHANGED Mppot..hcK.........................................pahp.soFHcAluPVLhlAQhFulMPVsGlpup.cPccl+FpWpSlphhaoLlhhlhshschuhuhphVhssulshcolssLlFhlssllsalsFlpLARpWPpIhRpWstVEphhhpssYp.ht+tshu++lpllullllssuLsEHhLhhsSuhh.sshphppCc..hs..shpsYhhpppsplFhlhsYosahshhhcahNsshTFlWNFhDIFlMhluhGLutRFpQLspRlcpht+psMspsaWpclRpcalsLscLlchlDcAlSsllLlShuNNlYFICsQlL+SFpshs.shhctlYFWFSLlaLluRThhlhLsASSIsDEu+csLpsLRpVPocuWCsEVpRFuEpltoDpVALSGh+FFhLTR+LlhuMAGTllTYELVLlQhppsscltp....C .........................................................................................................................................................................................................................hhhuphhshhPl.sl......ttl.pF.thhs..hhhsh.......h.h....hh........h.h.h.h....h.h..h.p......shph........p............sh.h.h.hhh..h.hh.hu.pW.s.lhh.htth-...h.......ht.......ththtpp...l.hhhhhh....h.hu.........................................................................................................................................................................htph...h.......................................................................h.......+..h.th..hh..hp..t.ht..h..............................................................................................................h.........h.a......h...h....s..h...hhl.R..........h........h...h...s.u.l.......t..................h.........h........................a....t...........p........h......h........h...h.........hsu.t.a.hp...h........................................................ 0 46 60 120 +5980 PF06152 Phage_min_cap2 Phage minor capsid protein 2 Finn RD anon Pfam-B_9879 (release 9.0) Family Family of related phage minor capsid proteins. 21.30 21.30 21.60 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.08 0.70 -5.47 9 238 2009-01-15 18:05:59 2003-05-30 09:44:04 6 7 205 0 35 224 6 309.60 23 78.56 CHANGED M.....tl..sspph.h.st.hsDlYstLpt-lhsphlcplKsptsh......upshhpWQhpKLsclthLsppslchlu+hoGhspctlpphlcssGhpshpsh-ptlscs.................lppt.........hpshtshl.phLsuYtpQshscL.NhlNpTlLpo......s.tsYpsIIpcT.................sstllsGhKT.ppAlcpsltcahcpGlsu.hlDKuG++WohEuYsRsVl+TTstpsaNchcpc+hc-YGlclshhSpHsuAR.tCuPlQG+ll..ssputsppp.ssK...Y.ul.s....huYGsPuGhhGlNCpHphhPaIsGVsp.....spp.phD.c-stcshphppcQRhhERpI+phKcphhhtcphsDcchhthtpptVRphQstl+thlpss ...........................................................................................................................................................h.phapthp.plh..hhcplht................tp...Wph.phpphthhpp..p.hphlsph.s.stptlpphhp...pthhphhpp..hpp.h...t.........................................httt.....................t......t..pphl...psh....p...ps.hpsh...p.hspsh.tp...............htphYpph.lpcs.................h.hp.h.s.Ghh..oh.ppA.lppsl.hp......hhppGl.s..hhD.p...u.....G.....+...paph-s...Yschslposstpshsphptpthp-h...Gh-..hh.lStHssAR........s........t.........C......u.......hQGclh.........hht.............s.c....a.s.l.s...........hthu...p....ssG.........hhGhNC+Hhhhsah..Glsp...............pp...hs.......c......p......s.....p.............hp...hpppQ..Rh....hERpIRptKcchh....h.tc.th....t.c.......tp................hpthpttlp..thptthpphlpt........................................................ 0 15 29 32 +5981 PF06153 DUF970 Protein of unknown function (DUF970) Finn RD anon Pfam-B_9915 (release 9.0) Family Family of uncharacterised bacterial proteins. 21.10 21.10 21.10 21.10 21.00 20.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.30 0.72 -4.09 11 826 2012-10-01 21:59:08 2003-05-30 09:47:45 6 1 766 3 149 414 5 106.90 52 98.97 CHANGED MKLlIAIVQDpDuscLhcuLs-psFcsTKLAoTGGFLKuGNTThlIGlED-+V-cllolIK-sCpsR-plVsshuPhusssDsYlPaPVEVpVGGATVFVlsVEpFhph ................MKlIlAIVQDpDSscLucpLlcss.a.RATKLATTG....GF....L+uGN........TTF.llGl-D-RVD-lLslIcp.sCpsRcQ..hV.o.s......ss.h...s...s.o..s-...u.a....l..PY.PV.EVpVGGATVFVhPV-tFcph.......................... 0 68 112 135 +5982 PF06154 YagB_YeeU_YfjZ YagB/YeeU/YfjZ family Bateman A anon Pfam-B_7771 (release 9.0) Family This family of proteins includes three proteins from E. coli YagB, YeeU and YfjZ. The function of these proteins is unknown. They are about 120 amino acids in length. 25.00 25.00 27.00 27.00 24.10 23.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.23 0.72 -3.80 10 822 2009-09-10 17:55:17 2003-05-30 09:48:58 6 2 339 6 28 413 9 90.00 81 87.89 CHANGED sssPpWGLpRslTPpFGARLVQEGNRLHYLADRAGlsGpFS-spsp+L-cAFPphlKQLEhMLtSGELsPRpQHCVTLYpsGLTCEADTLGSaGYVYIAIYPTpt .....................PphsARLVQEGN+LHYLADRAGIRGtFSDADAYHLDQAFPLLMKQLELMLTSGELNPRHQHTVTLYAKGLTCEADTLGSCGYVYLAVYPTPc................... 0 5 7 15 +5983 PF06155 DUF971 Protein of unknown function (DUF971) Moxon SJ anon Pfam-B_10230 (release 9.0) Family This family consists of several short bacterial proteins and one sequence (Swiss:Q8RZ62) from Oryza sativa. The function of this family is unknown. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.14 0.72 -3.57 109 1053 2009-09-11 11:45:35 2003-05-30 09:50:28 7 18 818 6 442 927 860 84.50 30 39.78 CHANGED oplplpp.ts.+hLplsasDGp...................p...........hplssEhLRVtSPSAEsp..............GHtsspp..............hh.suKpsVsIpsl-PVGsYAl+lsFsDGH-oGlYoWsYLh ..........................................................................l.hpp..tp.+hLplpass..Gp...................p..hplstchLRshsPuA-sp..............s+ss..spc.....................hh..ss+.psVp..lt..s.l...c...s.l.G.p...YA..l.plsF....s..D...G.Hso.......GlasWsaLh............................................ 0 150 254 350 +5984 PF06156 DUF972 Protein of unknown function (DUF972) Moxon SJ anon Pfam-B_10235 (release 9.0) Family This family consists of several hypothetical bacterial sequences. The function of this family is unknown. 30.00 30.00 30.00 30.30 29.80 28.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.72 0.72 -3.70 43 1035 2009-01-15 18:05:59 2003-05-30 09:54:09 8 1 1029 0 114 334 1 105.50 41 94.11 CHANGED M-Kp-la-plsphEppltphhpplspLKppltpllEENspLclENp+LRc+Lpchpt.........................ppppsppt........hucuh-NLt+LYpEGFHICs..haYGp+R.p-E-.ClFCLs ................................MDK+-lF-slsphcppltphhp-lpplKppltpLlEENssLclENpcLRc+Lscl-t...................................tcs.hc..............htcu+-NLtplYpEGFHlCs...aYGp+R..p-E-ChFClp.................... 0 36 66 90 +5985 PF06157 DUF973 Protein of unknown function (DUF973) Moxon SJ anon Pfam-B_7947 (release 9.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 21.30 21.30 21.40 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.91 0.70 -5.44 8 109 2009-01-15 18:05:59 2003-05-30 09:57:02 6 3 37 0 58 97 7 227.70 21 91.95 CHANGED sElpGlpKLRsGsLahlluslluhIshIlhls......................s.h.hhs.h.shltsulhllIlsll...lslluhl+l+SGFsILs.usu+DlGhGtTGshLlllGhllllIGsl...lslh..................lhuh.llaIGsILslIGtILlGluhY+lGchYsssllKlGGILllIsI............lsFIGaILsYlGLscVhsph...............tPhsshps.usplpQVGhGsL+uNGhAplTlYSphpssIlSApI-GTNhpso..pIsPthLpsGpNsIpIpFs.sssshlsGohYhlsLsls...Nu.slplsllYQP ................................................................h.ult.l+pG.lhhhlh.ll.hlh..hhhh..................................................hh..h...h...hhh.hthl......lhl...ls.h.h.h..h.ppGF..Lt.pht...shthG.....hus.l.hl.luhlhhl..huhl...hsl.......................................h......lh...hlu..llh..hlGhlhl.s.h.shhplGpha....p.pshhphuGIlhhlsh.......................................lshlGhI.lhYh...ulspl..ht................................................................................................o.....tl.p..l...sh...h.s..........s..l..G.N.l.h.h........h..tt.Y.l.l..t...s.....h.h.h............................................ 1 14 23 48 +5987 PF06159 DUF974 Protein of unknown function (DUF974) Finn RD anon Pfam-B_9042 (release 9.0) Family Family of uncharacterised eukaryotic proteins. 25.00 25.00 25.80 26.20 24.00 23.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.76 0.70 -4.92 19 285 2009-01-15 18:05:59 2003-05-30 10:02:37 8 7 233 0 202 302 2 228.30 33 55.68 CHANGED LsLPpuFGslYLGETFushlsssN......................sospsVpsVslcAEhQTsop......................+lsLtsssss..ssst.........................................lpsspslppllpa-lKE.GsHlLsCoVsYpps.........uGc....p+tFRKhapF.s.psPLsV+TKhtshts..........................chhLEAQlENho..psslhL-pVpL.-ssptapssslsh-sshss..ssph.t..........................................sh.LpPs-.scQhlFhlp.cst................sppstthcspsslGpLsIsWRoshG-+GpLpTupLtpp ..................................................LsLPtsFGslalGE..TFushlslpN.......................ssspsl.pslhl.cA-lQTsop......................+lsLs..ss.s.ss.......ssp.........................................Lpssss.lscllca-lK.......EhGsHlL....sssVoYsst..........................sGc..........................phhFRKhapF...s.hpP.L.sV+TKhhshpst....................................................chaLEAQlpNho..sushh..h-.................pVp..L.-ss..a..pssplN.....sptsp...s..p...............................................sh..L..p.Pt-..spQalaplc.ctt...........................htps.hhhcuh.s.sl.G.cLsIsW+o.shGE+GpLpTupLtp.h.............................................................................................. 0 69 106 160 +5988 PF06160 EzrA Septation ring formation regulator, EzrA Finn RD anon Pfam-B_9703 (release 9.0) Family During the bacterial cell cycle, the tubulin-like cell-division protein FtsZ polymerises into a ring structure that establishes the location of the nascent division site. EzrA modulates the frequency and position of FtsZ ring formation [1]. 32.70 32.70 32.80 32.70 32.40 32.60 hmmbuild -o /dev/null HMM SEED 560 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.75 0.70 -6.33 37 1051 2009-09-13 07:39:20 2003-05-30 10:10:28 7 3 1032 0 101 562 3 545.60 34 97.35 CHANGED lllulllllllhhhsshhh+++hhcpIccLEpcKpclhshslp-Elpcl+pLpLsGpopppFccWcpcas-lsssphsclEchlh-AEshscpa+Fh+A+ptlsplcshlsthEpplcpIpptlscLhps-ccNptplpphcctYcpl+KslLspsapaGsuhstlEcpLppl-pcFppFspLspsGDalcAcclLpplccchtpLpphhccIPsllpclpsshPsQLp-LcsGYcchhppsYhhsc.hslcpclpplppplpps.stL.ppL-l-cspppsppIpccIDpLY-lhE+ElpA+phVcpptpplsphlp+spppsppLttEl-+lppsYtLscsElpps+phppclpplppphpphtcplspppssYStlp-phcph.cpLcpIccpQtclpcplpsLcc-EtpA+cplppacpclpplcRhlc+pNLPGlPpsalphhhpsssclcplsccLsph.lNh-plsctLp.spsslcpLcccop-llpsAsLsEpllQYuNRYRspppplppuhpcApplFcp.acYppul-hlupALEplEPGuhcRlpcsY.p ..............................................................................................hlluIllll.l.l..shslth..h.l..R....++.pppp.l-tLE-RKpE.lhsLPls-ElcplKph.p.Lh.Gp.o.pst.FccWppcWs-lospph...uclEppla-AEsh.scpF+Fh+Apptlsphpptls.hEpslppIhpsLs-Lhcp-ccNstclpcsh-hYc-hp+pVhsspcpaGpAhsplEcpLcslpscFspF.sLsssGD.lcAppllsshccchhtLpphh-cIPsLlpchppplPsQLpDLc.t.GY+cLhc.ps..Yphs.c.hcl-pclpplppplcpspttl.ppL.-L-pAptcstplp-cIDslYDlhE+ElcA+chV-pppshlschLp+h+cpNppLtpElpclppsY.hLs-..s-hppl.R..php..s-lpplppsh....pclt.pphpcp.shsYS.lp-pLcpl.cplpsIcccQhcl.......pcpLpplcc-EhpAccplpphpsclcpl+Rhhc+pNLPGlPpsalp.hhhpss...pplccl.ppLppp.INl.cplschlphsssshpsL-ccopcllpsAsLsEpL.lQYuNRYRppcp.....plpcuhscA.clFcp.acYctul-hhupALEplEPGlsp+ltppa............................................................. 0 24 53 77 +5989 PF06161 DUF975 Protein of unknown function (DUF975) Finn RD anon Pfam-B_8494 (release 9.0) Family Family of uncharacterised bacterial proteins. 29.20 29.20 29.30 29.40 29.10 29.10 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.88 0.70 -4.63 12 1830 2009-01-15 18:05:59 2003-05-30 10:32:24 6 8 936 0 225 1227 109 171.80 22 89.60 CHANGED M........oppElKphA+ppL+spWGsslLlhllhhl...........hhhshhlshhtshs.p.h..............llhhllsshlphushhshlcls+p..ppsphppshssF...cpFhphlls.lLhslhhhLaull.hh.h.l.h...................................hhhh..hl.llh.ullhs..htYS.s.alLh-p.c....hGshcAlscStphMKGhKWchFlLpLSFlGW.............................hlLshloh....................Glsh........laLl.........PYhpTsphhFYcsltttp .............................................................................................................h......th+..t...h...p................hh......h........................................................................................................................................................................hh..h..h....h....h..u.........hh..lth...ct....tp...t....h..t.........h...hh......t.h.........hh.h.....l.h..h.h.h.h..h......................................................................................................................l....h.....h....hh.hhhh.....htat.....s.a.lh...hp.t.p.........hs.....phhptShthMpG.+hph.hhL..Lphls..W.................................................hhLs..h..h....sh..................................u.ls.........................................hhl........................PYh.....hs.h.aYttl............................................................. 0 77 151 182 +5990 PF06162 DUF976 Caenorhabditis elegans protein of unknown function (DUF976) Moxon SJ anon Pfam-B_10032 (release 9.0) Family This family consists of several hypothetical Caenorhabditis elegans proteins of unknown function. 24.40 24.40 24.40 24.40 23.30 22.90 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.94 0.71 -4.73 6 29 2012-10-01 19:48:29 2003-05-30 10:42:21 7 3 6 0 27 54 0 151.70 37 68.88 CHANGED MLSMCN+aPhF-ssc..RhVh.phacllTVFDss.puppPSsAVIVa-ELsKus.ssphLsh.KMEpSYtKVDplspchsspph+aAIHLuSHS.KNsIQIhpoAaSsGYTpcDKcGplPEGsKVKCsGsETsh+TpVsCEcVVK-VNEah-ps+pKFG-Lclpshpc .................................................hCp.....................sl.VTuFs..s..s.h.Et..p..sPS.ssV.l......DELhKps....ssphl.h..K..h.phuY-cVsc+l....PEL.h.p.p..p.s.chslHLu.sHslcN..sIhhpppAFusGYsppDh.sG.h.lPE.GN.+..s.....p.s.....s.....sp-.....p...sh+oplcC-pLVccVsE+hsLDGp.K.aGGLpVcpS-............................... 0 12 13 27 +5991 PF06163 DUF977 Bacterial protein of unknown function (DUF977) Moxon SJ anon Pfam-B_10135 (release 9.0) Family This family consists of several hypothetical bacterial proteins from Escherichia coli and Salmonella typhi. The function of this family is unknown. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.73 0.71 -4.39 3 656 2012-10-04 14:01:12 2003-05-30 10:44:53 6 3 360 0 19 337 25 123.10 59 90.83 CHANGED MAKPFTQEEREcIKuRIIGLVRcsGRhTlsQLEstTGAsRaosc+hLR-lLAsGDlYpsG+hGlFsSEQAh+sWppAtcKh..........sD.sLIhp.PDGEIRRYDSpQNI...ICsECRKSEVMQ....RVLAFYQGNFQ ..................MAKsFTpEEREcIKupllELVRpSGRcTlRQL..EsKTGATRahhphLuR-LVASGDVYp.SGh..G.lFPSEQAh+DW..ppARcKh...........sDPsLIhpLPDGEIRRYDR+.NI....ICpECRcSEsMQ....RVLAFYpGsh....................... 0 3 9 16 +5993 PF06165 Glyco_transf_36 Glycosyltransferase family 36 Finn RD anon Pfam-B_9110 (release 9.0) Family The glycosyltransferase family 36 includes cellobiose phosphorylase (EC:2.4.1.20), cellodextrin phosphorylase (EC:2.4.1.49), chitobiose phosphorylase (EC:2.4.1.-). Many members of this family contain two copies of this domain. 20.60 20.60 20.90 20.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.03 0.72 -4.21 116 1154 2010-01-08 16:32:43 2003-05-30 10:48:30 6 29 637 23 350 1064 71 109.20 27 9.38 CHANGED DssGpahYlRD.poGchWSsohpPs....p.sss...tpYp..spaG.uhupFpppp......sslpsplphh.Vsh-c....ssElp+lpLpNpuscsRplplToYsEh.VLusstsDpup.thophhspo ............................................DpsGcalYl..R...Dtp..........o...G.........c...hWSsohpPs...................p..pss....t.pYc.........sca....GhuaopFppp.t......sslpsphphh.Vs.hcc.....ssElpclplpN...pos..csRp.lplhuYsEh.sL.u.s.h.p.s-ssphhhsph............................ 0 112 228 276 +5994 PF06166 DUF979 Protein of unknown function (DUF979) Moxon SJ anon Pfam-B_10323 (release 9.0) Family This family consists of several putative bacterial membrane proteins. The function of this family is unclear. 25.00 25.00 63.00 63.00 21.50 21.30 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -11.96 0.70 -5.46 30 939 2009-01-15 18:05:59 2003-05-30 10:50:52 7 3 918 0 123 458 15 303.60 46 97.51 CHANGED L-hlYhLhGllhlhhAhhohpD+sp..P+RaGoulFWulhulsFlhGs.................hlPshssGslVlshullAuhttlshGphppsssp...ptcptAp+LGN+LFlPALhlPllollsuhh.hspl................uoLlulGlGsllAlllAhhhT+sp.stpslpEucRLl-ulGWAulLPQhLAsLGslFssAGVGcslupllushlPtssthluVlsYslGMALFThIMGNAFAAFsVhTAGIGlPhllsphGGNPAlhuAlGMhuGaCGTLhTPMAANFNlVPAALLEl+D.+.uVIKsQlPsAlsLLllsIhLMYaLsF ...............-hhYhlhGllhlhsAhhshcD+ss..PpRhGouhFWslhulsFlhGs.................hlPs...hshGhlVllhullAhhptVphGphc.phsc...p...ctptpAp+LtNKlFlPALhlsllsllhuhh.hspl.................................sollululusllAhlsuhhh..T..+sp.PppslpEusRhlpplGhuuILPQlLAsLGslFssAGVGcllu+llusllPs-shFluVhsYslGMsLFThIMGNAFAAFsVlTAGlGlPhllth.GuNPAlhuAluMhuGYCGTLhTPMAANFNlVPAALLEh+D.cNGVIKAQhPsALslLlhpIhLMYhLsF................................ 0 26 61 94 +5995 PF06167 Peptidase_M90 DUF980; MtfA; Glucose-regulated metallo-peptidase M90 Finn RD anon Pfam-B_8651 (release 9.0) Family MtfA (earlier known as YeeI) is a transcription factor A that binds Mlc (make large colonies), itself a repressor of glucose and hence a protein important in regulation of the phosphoenolpyruvate:glucose-phosphotransferase (ptsG) system, the major glucose transporter in E.coli. Mlc is a repressor of ptsG, and MtfA is found to bind and inactivate Mlc with high affinity [1]. The membrane-bound protein EIICBGlc encoded by the ptsG gene is the major glucose transporter in Escherichia coli. MtfA is found to be a glucose-regulated peptidase [3], whose activity is regulated by binding to Mlc available in the cytoplasm, which in turn has been released from EIICBGlc during times when no glucose is taken up. A physiologically relevant target for this peptidase is not yet known. 20.90 20.90 21.30 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.39 0.70 -5.00 74 1002 2012-10-04 10:46:43 2003-05-30 10:52:23 7 4 957 2 233 631 88 237.40 45 88.21 CHANGED Mh.......h..............htht+cppltppshs....shWpphlpp.lPhhptLsss-ptcL+phsplFLscKpapGspGlplTDph+lsIAAQACL.lLpls.....hsaYsshppIllYPssFlsppptt.....Dcs..GlVHchcpshsGEuWpp.GPVlLSWpslhtus............st-G....aNVVIHEFAHKLDhhsGs.AsGhPsL....tss.h.......shppWspshppsacphppphptspp...................shlDsYuAssPAEFFAVsoEsFFppPptltppaPplYptLspFY+ .................................................hhhh..................phpcthp..p.pslP......WpcsLs..lPlLssLotpEpt+L.hslAptFLppK+ls.s.....lpGhE.LsshhpspIAhhhCLPlLcLG.....l-Wh-GF+EVLlYPusFVVcccac.....D-hGlVH...stchlpSGpSW.pQ.GPllLsW.DlQcoh............ssuG....aNLlIHEhAHKLDhc..NGc.AsGlP.h.l.........sh.p........-lsuWc+.sLc.s.Ahsslpccl-hss-t.................tusIDsYAAocPAEhFAVlSEYF......FoAP...-LhtscFPuLapphspFYp......................... 1 73 138 190 +5996 PF06168 DUF981 Protein of unknown function (DUF981) Finn RD anon Pfam-B_8691 (release 9.0) Family Family of uncharacterised proteins found in bacteria and archaea. 28.50 28.50 29.00 29.80 28.30 28.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.34 0.71 -4.52 13 68 2009-01-15 18:05:59 2003-05-30 10:55:04 6 1 57 0 29 64 5 187.60 30 95.22 CHANGED Mu.........FIDsLslhLhsLuhshllhAhhhlpshlshc.......cculcsuhhPhh.sLGlhhhloGlauphTW......PLPuSY...NILFhDsahlhGlhlluhululapshc...............LcshuhhuLhlGLhsIhYGssshtasL.Tt-P...............l.AhLuLYhlsGLAulL..ssslhlDphctpt.............lhhllthlhLlluullAhaIGhpAlhuHL..s.sas ..........M...FlDsLslhLhhlshshllhAhhhhtshhsht...............................cpshscshhPhh.slGlhhllhGlauphTW......PLPu...SY....NILFhDsahlhGlslluhululhh.s..hc..........................Lpshuhh..ulhhGLhsIhhGsuhhsauL.TpcP................h.uhLuhahhhGLAulh..ssslhhpp.p.t..............hhhhlhhlhLhluulluhhhGhtuhhuHlst..s....... 0 13 18 24 +5997 PF06169 DUF982 Protein of unknown function (DUF982) Moxon SJ, Eberhardt R anon Pfam-B_10431 (release 9.0) Family This family consists of several hypothetical proteins from Rhizobium meliloti, Rhizobium loti and Agrobacterium tumefaciens. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 22.20 22.20 22.20 25.10 21.30 21.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.49 0.72 -4.24 51 308 2009-01-15 18:05:59 2003-05-30 10:56:52 7 2 50 1 157 327 1 74.20 29 77.43 CHANGED ppPVtlthss.tthp.tlpost-AhchL.pcWPh.p..cG.tapsAl+sCtsAlsGphssppARcAFlsAAccAsl.hh.s .......t.pPVtlthss..s.thp.plpost-AhchL..pcW.Pt..p..cG.tapsAlcsCtsAlsG..ptsspsARcAFlsAAccAslhh..t.......... 0 7 49 64 +5998 PF06170 DUF983 Protein of unknown function (DUF983) Moxon SJ anon Pfam-B_10629 (release 9.0) Family This family consists of several bacterial proteins of unknown function. 21.30 21.30 21.30 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -9.95 0.72 -3.87 62 469 2009-01-15 18:05:59 2003-05-30 10:58:27 7 4 239 0 180 411 137 84.30 38 61.56 CHANGED FcuaLKlsspCssCG.-hsatcusDGPAahsIlllGalllshhlhl-hsapPshWlphsl..ahPlsllhsLhLLpslKGhllulQas .....FcuFLplsspCssCG.Dasatc....u.sDG.PAahlIl..ll.GallVshhl...hlEhsh.s.s.shWlahsl..ahPhsllh.oLhlLpslKGhlluhQa............ 0 50 108 132 +6000 PF06172 Cupin_5 DUF985; Cupin superfamily (DUF985) Finn RD, Bateman A anon Pfam-B_9217 (release 9.0) Family Family of uncharacterised proteins found in bacteria and eukaryotes that belongs to the Cupin superfamily. 25.00 25.00 25.60 25.20 24.60 24.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.87 0.71 -4.27 19 1200 2012-10-10 13:59:34 2003-05-30 11:06:00 6 9 1116 34 350 843 354 132.00 40 81.62 CHANGED ApplIcpLsLpsHP.EGGaa+ETaRussphs.s..............RshsTuIYFLLsp..sshSpaHRl.cu-EhWHaauGssLplhhhsssGphps.....hpLGhDl.ttGpps......QhlVPsGsWhuu....................ps.ssaoLVGCsVAPGFcFcsFELhc ...................................pphIcpLpLtsHP.EGGaa+cTh+ustphst..............................................RuhhTuIYFL.Lps.........ssh............S+..aHRl.su.DElWa....aa.uGs.sLplahl.s......s......-...G..p...h.ps............................hpLGhDl...tpGp.p.......................QhsVPtGshhuu..............................................pssssau...LVuCh........VuPGF-FccFELh............................ 0 101 206 283 +6001 PF06173 DUF986 Protein of unknown function (DUF986) Moxon SJ anon Pfam-B_10711 (release 9.0) Family This family consists of several bacterial putative membrane proteins of unknown function. 22.20 22.20 22.30 22.50 21.40 22.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.99 0.71 -4.39 12 608 2009-01-15 18:05:59 2003-05-30 11:09:38 7 1 599 0 47 234 2 144.70 70 97.43 CHANGED TslhLllhIshhLhYAhYDpFlMsh.hKGKThLpVpLK++s+lDulIFlsLIuILl.YpNhts..pGs.lToaLLssLsLlulYluaIRtP+llFKppGFFauNlFIpYu+I+pMNLSEDGlLVI-L.pp+RLLlplpplcDLEKIhphhsphp ....TDLVLILFIAALLAaAIYDQFIMPR.RNGPTLLuIsLLRRGRlDSVIFVGLIs...ILI.YNNVTs..HGAhlTTWLLSALALMGF.YIFWI..RsPKIIFKQ+GFFFANVWIEYuRIKsMNLSE....DG...VLVMQL.EQRRLLIRVRNIDDLEKIYKLLlss........................ 0 4 14 32 +6002 PF06174 DUF987 Protein of unknown function (DUF987) Finn RD anon Pfam-B_9329 (release 9.0) Family Family of bacterial proteins that are related to the hypothetical protein yeeT. 25.00 25.00 36.80 36.70 24.50 23.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.15 0.72 -4.46 3 583 2009-01-15 18:05:59 2003-05-30 11:12:23 6 2 284 0 11 152 0 65.30 91 89.63 CHANGED M+IIo+tcAMcIaRQHPuSRLFRYCTGKYQWHGSAsHYTGRDVsDIoGVLAVYAERR+DusGPYlc ...............MKIITRGEAMRIHpQ...HPASRL...FPFCTGKYRWHGSAEAYTGREVQDI.PGVLAVFAERRKDSFGPYVR..... 0 4 4 8 +6003 PF06175 MiaE tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE) Moxon SJ anon Pfam-B_10761 (release 9.0) Family This family consists of several bacterial tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE) proteins. The modified nucleoside 2-methylthio-N-6-isopentenyl adenosine (ms2i6A) is present at position 37 (3' of the anticodon) of tRNAs that read codons beginning with U except tRNA(I,V Ser) in Escherichia coli. Salmonella typhimurium 2-methylthio-cis-ribozeatin (ms2io6A) is found in tRNA, probably in the corresponding species that have ms2i6A in E. coli. The miaE gene is absent in E. coli, a finding consistent with the absence of the hydroxylated derivative of ms2i6A in this species [1]. 20.70 20.70 21.20 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.73 0.70 -5.00 6 1063 2012-10-01 21:25:29 2003-05-30 11:15:23 6 3 665 4 298 910 594 146.00 32 95.80 CHANGED phLsPlhpFLpCsTPpsWlEpAht.tsLDllLLDHptCEhKAAtoAh.Llt+Ys..................................................PhspcLlschstLh+EEL+HFcQVhplhctRsIshsslsAuRYApuLhutVRTpEPppLlD+LlVGAaIEARSCERFAtLAPaL.DsELAKFYsSLL+SEARHapDYLpLApphuuc.DlScRlphhutlEAELIpoPDsEFRFHS ...................................L....Ts.tWl..s...tp...hL.DHh.CE.KAutsAh.hhhpa.................................................................tL.t.h..lhpEEh.Ha..Vhthh.t+sh.h.......ts.Ystthht.h..p....p...........p..........hh.hD.Llhuuhl.EARSpERhthls..............h.....-......pl.tpaYhtLh.SEupHat.ah.hA.ph.........t..........................ltt+ht.hh.hEtp....ll....t.......t.......phH........................... 0 81 175 256 +6004 PF06176 WaaY Lipopolysaccharide core biosynthesis protein (WaaY) Moxon SJ anon Pfam-B_10767 (release 9.0) Family This family consists of several bacterial lipopolysaccharide core biosynthesis proteins (WaaY or RfaY). The waaY, waaQ, and waaP genes are located in the central operon of the waa (formerly rfa) locus on the chromosome of Escherichia coli. This locus contains genes whose products are involved in the assembly of the core region of the lipopolysaccharide molecule. WaaY is the enzyme that phosphorylates HepII in this system [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.50 0.70 -5.14 4 512 2012-10-02 22:05:25 2003-05-30 11:21:56 6 5 478 0 34 372 53 198.00 52 81.44 CHANGED MIhpp+I+shsVFhK-NssKYhplhsDFLoYshpslKVFRsIDDTKVhLIDTcYG+hlLKVFuPKsKpsERFhKShlKGDYYEpLhhpTDRVRsEGlpulNDFYLLAERKTLpas+sYIMLIEYIEGVELsDhP-IsE-lKscIppSIcpLHpHGMVSGDPH+GNFIlppstlRIIDLSGK+soApRKAKDRIDLERHYGIKNElKDhGYYhLIY+KKlRphlR+lKGK .................................................................................................................................MI.p.ph.pshphahccss.hY.plhp-hLshphphlKVhRs.I-.DTKV.L..IsT..th.G.hlhKVauP.Kh.Kh.sERF..h.KS....h....l.K.t...DYYc....pL.hhp.TD.RV...Rs.....EGh.pslN.Da.a.L...LAE.+.K....T....L..p...a....s+h..Y.......lMlIE.........YIEGlcL..s-...h...................-......I.......s......-......-.......l....K......s........c.......l......p.....p.S.....I.....c.......c.....L.....HpH.....GM.....VSGDPH+G.N.F.I.l...p..p..s..t.....lR......lID..L......S..GK..+..s..o..t....h..K......A+..D.R.l..sh..E..R..H.h..sItNp.....l....+D.h.GahhlIa+.pK.l+p..h..l+clKsK.............................................................. 1 8 19 25 +6005 PF06177 QueT DUF988; QueT transporter Moxon SJ, Rodionov D, Bateman A anon Pfam-B_10800 (release 9.0) Family This family includes the queT gene encoding a hypothetical integral membrane protein with 5 predicted transmembrane regions. The queT genes in Firmicutes are often preceded by the PreQ1 (7-aminomethyl-7-deazaguanine) riboswitches of two distinct classes [1-2], suggesting involvement of the QueT transporters in uptake of a queuosine biosynthetic intermediate. 21.30 21.30 21.40 21.30 21.00 21.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.86 0.71 -4.24 45 1266 2012-10-03 02:46:00 2003-05-30 11:23:59 6 1 1098 0 152 637 4 152.20 33 91.44 CHANGED slstsAllAAlYllLTlh..lsslSaGslQFRlSEhh.NhLsh...as+.+YIhGlslGshIuNlh.Ss.h....Gll..DllhGshuTLlsshlshhltchhpph.hh.......phhlsslhhslsh.hhIAh.pLshhhp.......l....PF...........hhT....a.holulGEhlsh.llGssllhhlsKplc ..................lsphAll.AAlYllLTlh....lsslS..a.G.shQFRlSEhh..N.hL..sh...ash...KYlhGlslGshluNla....os..h.......Ghl.DllhGshsTllslsls..hhltt...phtt...h..............phhhsulhholsh.h.h..l.Ah...Lsh.hhp...................l......P..F.............ah..o....a....hosulGEhhsh.llGs.lhhhlsK+l................................ 0 69 110 132 +6006 PF06178 KdgM Oligogalacturonate-specific porin protein (KdgM) Moxon SJ anon Pfam-B_10852 (release 9.0) Family This family consists of several bacterial proteins which are homologous to the oligogalacturonate-specific porin protein KdgM (Swiss:Q934G3) from Erwinia chrysanthemi. The phytopathogenic Gram-negative bacteria Erwinia chrysanthemi secretes pectinases, which are able to degrade the pectic polymers of plant cell walls, and uses the degradation products as a carbon source for growth. KdgM is a major outer membrane protein, whose synthesis is strongly induced in the presence of pectic derivatives. KdgM behaves like a voltage-dependent porin that is slightly selective for anions and that exhibits fast block in the presence of trigalacturonate. In contrast to most porins, KdgM seems to be monomeric [1]. 20.40 20.40 20.50 21.20 20.20 19.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.56 0.70 -4.84 7 1133 2012-10-03 17:14:37 2003-05-30 11:29:46 8 1 545 2 73 394 5 209.70 33 93.83 CHANGED lusss.A.shl-hRctYppsoct..cshltlupphspGhGh.l-s......sss.pspt.s-httshNEls.hsh.aKssDphslpPGh.lpstsssSsYpPYL+spYphssshslslRYRapapphous.hcs-pss.ct.chssahsYplhc...paphsap.p.ahcsspap...ussccppaEhssphtY+h.sppWpPYlEls.lshsss....sscR.QsphRVGlpYpF ........................................................................sh.s.A..shl-.Rct....Y....t........sS..ct....cshlt...lu...ph.s.GhGh.l-u.........................sssh.p.sp..+.h..s.-httsssElp.hsYhaKh..s.D.pholpPGh.l.h............c.................ss.Ss..uo.tYt...PYl+lsashs..s..shshulRYRYsa...psh....s.....s...s...s...h............s............u............-.........h...........ss...p....ssa..ch........ssYhsYplss.....cF.sasapsp.h.ahc..s..s.cap...............huNscKa.sa.EhshshpY..+h....s....p...p....apPYhEl.s.....lsppss............sD..cR...Qspa..RlGlpY.F................................................................ 0 3 17 46 +6007 PF06179 Med22 SURF5; Surfeit locus protein 5 subunit 22 of Mediator complex Moxon SJ anon Pfam-B_10889 (release 9.0) Family This family consists of several eukaryotic Surfeit locus protein 5 (SURF5) sequences. The human Surfeit locus has been mapped on chromosome 9q34.1. The locus includes six tightly clustered housekeeping genes (Surf1-6), and the gene organisation is similar in human, mouse and chicken Surfeit locus. The Med22 subunit of Mediator complex is part of the essential core head region [2] [3]. 21.20 21.20 21.20 21.20 20.30 20.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.44 0.72 -4.03 21 283 2009-09-11 07:34:40 2003-05-30 11:36:54 7 11 233 15 196 266 1 107.50 27 61.35 CHANGED LL........p+lcpslpslhspFpcllchAp.............................spssphstssp-shthpscssplVRAs-sLLpLscslKEhhIL................ssh.slscshtppppphctcp.p.sp.ltpLh-chhsslhph ..................................................................p+lcsslpslhpsFppllphAp.............................spssphspspp-saphcsc..ussl...lRAuEsLhpLspclKphhlL................schsslsEs.hpp..pppphctpp..tphpp.l.plhcch..phh......................................... 1 61 104 155 +6008 PF06180 CbiK Cobalt chelatase (CbiK) Moxon SJ anon Pfam-B_10975 (release 9.0) Family This family consists of several bacterial cobalt chelatase (CbiK) proteins (EC:4.99.1.-). 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.52 0.70 -5.30 8 718 2012-10-01 23:23:09 2003-05-30 11:41:05 6 9 587 5 138 730 108 247.40 37 87.61 CHANGED KAILlVoFGToYp-s+chTI-pIE+clscpFPDY-lh+AFTSshI..I+KLKcR-s............lplsTPtpALp+LtcpGYc-VIVQsLHlIPGcEYEclhchV.........pcapssFccl..clGRPlLth.......s-DYcphlcsl+cphs.h..+sEslVaMGHGosHtususYupLsplhcs.psasslaVusVEuYPsl-sVlcclccpshcc.....VpLhPLMlVAGDHApNDMAuD-p-SWKsthcssGhcV.ps......hlcGLGEhcchpsIalcHI+sslp ............................................................pAlLlVSFGToap-opctsIsuhEcclttsa.P-........hclhcAFTSphI...lc+L.cp+cs...............l.p...l.ssPhpALpcLtppGYpc..VhlQsLHlIsGtE.Y-.c.lh.p.p.V......................p.t...h.....c...s..t.....F.p......c.l....plGp.PL.Ls................p.tp.Da.........pplhpA.l................p.........p....p.........hs.....................h............t...p..........scslVhMGHGo.....p...H...........A...........u..s....Y....u.s.L...c...p.hhps......t.s.h...............p...sa...l....GsV..........E..u...a.P..p...l.....-.p.ll...p.pL..+....p....p......s..l..c.p..................VpLhPhMlVAGDHApND......M............A.........u....-...........-..........t.D..................SWK..st..h..ptt.G..h..p.s...p.s....................hL.p.G.LGE.sslpphalpHlcpAh.t................................................................ 0 71 113 129 +6009 PF06181 DUF989 Protein of unknown function (DUF989) Moxon SJ anon Pfam-B_11062 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. The haem-binding domain towards the C-terminus has been merged into Cytochrome_C, Pfam:PF00034. 22.40 22.40 24.10 23.10 22.30 22.30 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.09 0.70 -5.34 60 485 2009-01-15 18:05:59 2003-05-30 14:07:51 6 4 442 0 164 459 1793 262.90 44 72.00 CHANGED Mtu....al...h-WlsLhlRWlHVIsuIAWIGuSFYFlhLDtuLpcsss....hpcGlpGEhWtVHGGGFYHlpKYhlAPspMP.Ec.LHWFKWEuYsTWLSGhALLsllYahsAplYLI...DPs.....hh-L.oshtAlululusLslGWllYDhLC+..SsLGc.......psslLullLallllssuauhsplFoGRAAaLHlGAhhuTIMsuNVFhlIhPsQRtlVAslcuGcsPDPp...hGtpAK.RSsHNNYhTLPVlFlMlSNHYPhsauspaNW.lIlullhlhGshIRHaFNt+Hss.....p.ppshashsusshlhslhhhluss ...............................uhhh-Wlshhl.RWlHVlsuIAWIGuSFYFlhLD..u.Lpp.ss....h.pGs.G-.WtVHGGGFYphpKYhlAPspMP..-cLpWFKWEuYsTWlSGhsLLhllYhhssshYLlD.s.....h.h.sh....u.sh..Altlulu.LshGWllYshlCc........LGp...........psthLhlll.hhhllshuashs.p.....l..FoG.R.AA..a..LtlGAhhuTIMsuNVFhlIhPsQ+tlVu....s...lhs.....GcpPDPt......hG...ctuK.R...ShHNsYhTLPVlFhM.....l.S.N.HYs.hsauspaNW.l.lhsllhlhGshIRpaFsh+..Htt.....p.t..hhshhs.sshhhhhh.hhht.s............................................. 0 34 89 125 +6010 PF06182 ABC2_membrane_6 DUF990; ABC-2 family transporter protein Moxon SJ, Eberhardt R anon Pfam-B_11079 (release 9.0) Family This family acts as the transmembrane domain (TMD) of ABC transporters [1,2]. The family includes proteins responsible for the transport of herbicides [1]. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.43 0.70 -4.81 12 2005 2012-10-03 10:13:34 2003-05-30 14:10:04 6 3 813 0 438 1383 277 223.40 23 87.05 CHANGED llhhssshhhhhllapp..sssluGWsht-hlhlhGh....hhlhpslhshhhtsshtclscpVccGshDhhLLKPlshhh.lhhcphs.htlhcl.......lhGhhLlsYsshhlslshTshpllhhllsllsGslIhhulhhhhushuFWhhcs.thh.tlhhul...hshupaPhshasthl+hhhsFllPhshhshhPAphhLG+hsh....hhhlhs.hlhullhhslothhWptGlcpYpSsG ......................................................h....hhshhhhtslapp......hs.lp....G......aohpphhhhhhh.........hlsph.l.tp.h.h.h...s...h.h.hhsc..l.+....cGplshhLlRPls........h....hphl...h....pp...h....s.........p...h....l.h.h.l..............hlu..h..l..l.l..h..h..h..h..........t.......h.......s...l..s......h.....s..h...h...p....h.......l..l....a.l....l..sl...hhu..h.l.l.hhtl...plhhu..........hhu.Fah.pp...h...t...u...h...h...h...l...h...hsh...hs.h.uh...h..Plsha.....s.....p....h....lp..hl..l..sa...lPFuh.hsahPsthh...l..s.c.h.ss................hh.s..l.h.h.....hh...hsllh.hslu.hhlWcpulcpaputG..................................................... 0 170 299 381 +6011 PF06183 DinI DinI-like family Bateman A anon Pfam-B_3085 (release 9.0) Family This family of short proteins includes DNA-damage-inducible protein I (DinI) and related proteins. The SOS response, a set of cellular phenomena exhibited by eubacteria, is initiated by various causes that include DNA damage-induced replication arrest, and is positively regulated by the co- protease activity of RecA. Escherichia coli DinI, a LexA-regulated SOS gene product, shuts off the initiation of the SOS response when overexpressed in vivo. Biochemical and genetic studies indicated that DinI physically interacts with RecA to inhibit its co-protease activity [1]. The structure of DinI is known [2]. 20.30 20.30 20.70 20.40 19.90 18.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.06 0.72 -3.95 11 1722 2009-09-11 10:20:43 2003-05-30 14:12:56 8 2 573 1 94 517 4 64.00 40 80.24 CHANGED lPsGul-ALpsELp+RlpppaP-..stVpV+tuSusuLSlsGsscc-..KchlpplLpEhWEs.DsWhs ..................hssGAl-ALtsELo.+RlpttaP-..spV.pV+.h.s.uu.ssLolssspc--..KpplpchLpEsWEsADpWhh........ 1 1 15 53 +6012 PF06184 Potex_coat Potexvirus coat protein Moxon SJ anon Pfam-B_11093 (release 9.0) Family This family consists of several Potexvirus coat proteins. 25.00 25.00 164.00 163.80 19.80 17.20 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.98 0.71 -4.85 2 45 2009-01-15 18:05:59 2003-05-30 14:13:12 6 1 3 5 0 45 0 148.20 92 83.07 CHANGED hstRpNRRttSRsutho...DhhYsoLT.tuTToapRpsFPsLtsMGDRsFQVluhpI.ssSAuPhhYpARLYsPtDsDsVtuTGlQh.GTsPRThRhpshsGQNsWF.GNTppspsIlAIDGLhoppusssPpNsVhVphsaRlAPSELQSu MVGRRNRRQRSRVSQMT...DIMYGSLTLGSTTTWTRKNFPGLANMGDRPFQVISAKIVVSSASPMLYQARLYSPHDDDNVGSTGLQMSGTTPRTHRMRALPGQNTWFSGNTSSTQVIVAIDGLKTKToDVTPQNAVAVQISYRVAPSELQSA.... 0 0 0 0 +6013 PF06185 YecM DUF991; YecM protein Moxon SJ, Bateman A anon Pfam-B_11108 (release 9.0) Family This family consists of several bacterial YecM proteins of unknown function. 25.00 25.00 44.80 43.70 18.50 18.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.33 0.71 -5.07 6 775 2012-10-02 15:00:03 2003-05-30 14:16:58 7 2 766 1 89 341 6 181.20 59 95.31 CHANGED spp.shpplhpcLssF.pKIppLuchLsLDLothphDHIALRVNspQoAchhppha.cpGcllS-NhINGRsIhlIcL-pPLphusapI-slELPaPup.KpYPpEGWEHIElVlPupsps...hspchhthhstls.papt.ppsslpVKlSSPKuEGERLsNPTIAhpa.....sssCIKlHPauIKcIlcSE ..........................p.h-ELpDluuDLPRFppAlccLupRLGL......sl.osLpADHISLRCHQNsTAERWR+GFpQCG.ELLS.ENhINGRPICLFKLcEPV.........pVu.......HWp.........hsllELPWPGE.KRYPHEGWEHIEIVLPG..-PET....LsAR..A.LA..LL......oD.............-....G...........L...................o..PGIsVKTSSPKGE+ERLPNPTLAVTD......G+sTIKFHPaSIcpIVASE......................................... 0 11 29 61 +6014 PF06186 DUF992 Protein of unknown function (DUF992) Moxon SJ anon Pfam-B_11128 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.20 21.20 21.60 34.60 21.10 21.10 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.75 0.71 -4.86 29 199 2009-01-15 18:05:59 2003-05-30 14:18:40 6 1 134 0 76 154 10 145.90 43 86.49 CHANGED sussuhssssssssspst.lclGsLpCps..suusGallGSsppLsCsFpsp..utss-tYsGslp.+hGLDlGhTppothsWuVhAP..ssphspGsLuGsYsGsuAsAolGlGlGANlLVG.GospuluLQPlSlpuQsGlNlAsGlsplpLp .........ss.hhshssss.s.usp.spst.sclGhLsCcl..usulGhllGSspclsClF...+spt.sss.-pYsGsIp.KlGlDlGhTspupLuWuVaAP..ssshstGuLsGpYsGAoAsAulGlGhGuNlLsG.GStpoIuLQPlSVQGQpGLNlAsGlssLpLp....... 0 11 33 47 +6015 PF06187 DUF993 Protein of unknown function (DUF993) Moxon SJ anon Pfam-B_11260 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 24.00 24.00 26.20 27.00 21.40 23.70 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.30 0.70 -5.93 21 227 2012-10-03 05:58:16 2003-05-30 14:21:26 6 2 203 1 82 200 28 375.10 58 98.45 CHANGED ltLPssDGslcsYpLtusshh...tsssuPh.....huRlAaAAAHVVADPhusscPh.ussulDW-uTLAFR+aLWsLGLGVAEAMDTAQRGMGLDWssApELIcRShuEApuh........uup.....lAsGsGTDHLs....PusstsL-sVlsAYcEQlssVEtsGG+lILMASRALAtsA+uPDDYtcVYu+lLspsccPVILHWLG-MFDPALpGYWGSs-hcsAh-TsLslItspusKVDGIKlSLLDps+ElshRcRLPcGVRhYTGDDFNYsELItGDsptaScALLGIFDAIAPAAStALpsLspGDsspa+slL-PTVPLSR+IFcAPTpaYKTGlVFLAWLNGaQsHFpMlGGhpSuRulsHhA-lFRLADpAGLLsDP-lAstRM+tlLult.Gl .........................lpLPsscuslpsYpLtus.ss...ts...t..ssPt.....hsRlAaAAAHVVuDPhsDssPh.sssAlDW-AThAFR+HLWuLGLGVAEAMDTAQRGMGLsWsuApELIRRShAEA+ss......GAc...........lAsGsGTDHLs......................suts...tsL--VlcAYEEQluhlEptGGRhILMASRALApsA+uPDDYh+VYu+lLuQsccPVlLHWLG-MFDPuLcGYWG.Sp..ch-sAhcTslslIpspssKVDGIKISLLDtc+E...lslRpRLP.......c....GV+hYTGDDFNYsELIt......GDs......p............taSHALLGIFDAlAPtASsAlssLssGDsppF+ull-PTVPLSR+IFcAPTpaYKTGlVFLAWLNGHQsHFsMluGhQSsRulhHhA-lFRLADpAslLscP-LAspRM+pLLAlaGl............. 0 21 52 68 +6016 PF06188 HrpE HrpE/YscL/FliH and V-type ATPase subunit E Moxon SJ anon Pfam-B_11055 (release 9.0) Family This is a prokaryotic family that contains proteins of the FliH and HrpE/YscL family.\ These proteins are involved in type III secretion, which is the process that drives flagellar biosynthesis and mediates bacterial-eukaryotic interactions [1-2]. This family also V-type ATPase subunit E. This subunit appears to form a tight interaction with subunit G in the F0 complex [3]. Subunits E and G may act together as stators to prevent certain subunits from rotating with the central rotary element [3]. Pfam:PF01991 also contains V-type ATPase subunit E proteins. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.19 0.71 -4.55 10 187 2012-10-02 21:03:42 2003-05-30 14:28:14 7 1 172 0 31 328 31 164.80 25 79.76 CHANGED MLs+RpIsLs.ususL.pPllRREpLAsshpAcslLp-ARpQA-plLspAcpcA-thpp....pApApFWppAsuhLpslQpQREplpppsloss-pLLspA.LppLLDETspupRhpALLRQLlsuQhsEusATLhCHPsptssVAcWLsspu.phtWpLpsDsoLssDoL+LsTApGsFslsWsshpctLl .......................................................................................s...llptt.Ls.p.phpApsl.LppA.+p...QApp...llppAcpc..A.ptlhp......pu..t...p....pA....tp.h....h.......p...............p.....ss...s.h....Lt...t....hp..tp.t....-s.L....p.p....tl........h.p....t.spc....llp..p.u.L...p...plL.c.c.s...s.......t.t......p......h..p.t.L...l.pp...L.....h....tt...................h....l...h..t..s...............................................................................................h............................................................................................................... 0 10 19 26 +6017 PF06189 5-nucleotidase 5'-nucleotidase Moxon SJ anon Pfam-B_10008 (release 9.0) Family This family consists of both eukaryotic and prokaryotic 5'-nucleotidase sequences (EC:3.1.3.5). 20.20 20.20 20.20 30.30 19.00 20.10 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.69 0.70 -5.58 43 426 2009-01-15 18:05:59 2003-05-30 14:40:48 7 3 318 0 174 428 360 259.30 45 76.67 CHANGED cYQh-+EspsLpPGsAFPhV++LLulNpthtp.........p.hVEVVLLSRNss-TGLRVFsSIpHYGLs.IoRAsFouGcSPapYlsAasssLFLSustpDVppAlcsGhsAApllsssspssp..............ssp.LRI......................AFDGDAVLFSDEoEplappp.G...LcAFpcpEpppsppPLssGPa+sFLtsLpplQpp...........h....ss..hpsPIRTALVTARuAPAH.ERVI+TLRsWslclDEAhFLGGlsKuphLcsapscIFFDDQpsHl-uAu..ptlsouHVP.aGluNp ........................................pYQhp+Esp.LpPGsAFshV+..................tLLslNpp..htp...........p.hV..EVVLlS..RN.sspoG.l..RlhNSIpHYuLs..IsRusFTuGcuPhs.YlcAatssLFLSussc-VpcAlpt.GhuAAs..lh.ssst...p.t.stt..............ssp.LRl......................AFDGDAVLFSDEoEplappp.G.........LcsFhpaEptptspPLstGPh+sFLpsLscLQpp...h..........sp....psPIRTuLVTARSAPuc.tRsl+TLRpWGlclD.EAhFLGGhsKushLcthpPcIFFDDQhhHl-uAp..ttssuuHVPaGlsp................. 0 31 73 120 +6019 PF06191 DUF995 Protein of unknown function (DUF995) Finn RD anon Pfam-B_11307 (release 9.0) Family Family of uncharacterised Proteobacteria proteins. 19.50 19.50 19.60 19.80 19.40 18.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.36 0.71 -4.82 14 170 2009-01-15 18:05:59 2003-05-30 14:50:04 7 4 93 0 32 83 2 145.20 43 80.92 CHANGED sshlLssulsususstusssss....sApshousElaplYpsKoWhWcs...GAGYFsscsRpFpAWopcssutSaupGRWhlossGpLChcAsW+spsGsusupT.....CFsHRpts.GsIYQ+R-PDGsWYVF++ssspssDEatKLhcuDhVu .............................................sslLLhussuhAsss.tus...sspct..huspsMoAhcLhpLYts+oWpWsD...GuG..Y...aosK..........c+...pFsAhscpssu+SaupGRWhlTDsG+lChKAtW..psttGs.....s....s.Ap.T.....CFtHRht.D.GslYQ....+.+ps..s.GpWYVFRHsPhQcsDEApKLlpuDhl............................................... 1 2 14 19 +6021 PF06193 Orthopox_A5L Orthopoxvirus A5L protein-like Moxon SJ anon Pfam-B_10342 (release 9.0) Family This family includes several Orthopoxvirus A5L proteins. The vaccinia virus WR A5L open reading frame (corresponding to open reading frame A4L in vaccinia virus Copenhagen) encodes an immunodominant late protein found in the core of the vaccinia virion. The A5 protein appears to be required for the immature virion to form the brick-shaped intracellular mature virion [1]. 22.40 22.40 22.60 22.40 21.30 22.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.23 0.71 -4.31 13 77 2009-01-15 18:05:59 2003-05-30 14:53:24 6 1 37 0 0 67 3 234.50 51 91.58 CHANGED MDFhsKaSptLspoupsKss...hhhp-Ehssss++spsl-htLKSpEphYQ+QLREQLA+cNMhtssctsI...............................................Ppp.tTNooSshsNl....puDss.spsoSll...p..sulpDIlpcas......................................................................................SVcc-hppLQsEos-LVosltsAREhThcAIspIh.pLsKtFp..phpc ......................MDFFNKFSQGLAESSTPKSS...IYYSEEKD.sDpKKDEAIEIGLKSQESYYQRQLREQLARD.NMhsASRQPIQPLQPTIHITP....................................L.ssosAPTPKPRQ.QTNTSSDMSNLFDWLSsDss.s.sSSLLPALTP.NuVQDIISKFNKDQKTTTsPSTQPSQTLPTTTCTQQSDGsISCTTPTVTP.QP...shsssssTPTssssssssptp.NPGAsSQQNLDsMuLKDLMSsVE+DM+QLQAETNDLVTNVhD.AREY...TRRAIDQIL.QLVKGFE..RFQK....................................... 0 0 0 0 +6022 PF06194 Phage_Orf51 Phage Conserved Open Reading Frame 51 Finn RD anon Pfam-B_11352 (release 9.0) Family Family of conserved bacteriophage open reading frames. 21.10 21.10 21.10 50.20 21.00 20.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.97 0.72 -3.88 3 378 2009-01-15 18:05:59 2003-05-30 14:54:32 6 1 199 0 3 179 0 75.10 89 98.57 CHANGED MTD.SssKEYLNQFFGSKRYLYQDNERVAHIHVVNGTYYFHGHIVPGWQGVKKTFDTAEELEsYlKp.HGL+aEEuKQLoLF ....MTD.sARKEYLNQFFGSKRYLYQDNERVAHIHVVNGTYYFHGHIVPGWQGVKKTFDTAEELE.hYIKQ.HGLEYEEQKQLTLF.. 1 3 3 3 +6023 PF06195 DUF996 Protein of unknown function (DUF996) Finn RD anon Pfam-B_11375 (release 9.0) Family Family of uncharacterised bacterial and archaeal proteins. 26.00 26.00 26.30 26.90 25.30 25.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.86 0.71 -4.18 16 81 2009-01-15 18:05:59 2003-05-30 14:56:10 8 2 55 0 50 89 2 129.20 25 69.88 CHANGED llullGhVLlLlulttlScths-cclFppaLhuhIhslluhllhhshlh.....huhhhhps.hsshhs.h.hhsh..lhshLushlllallhlluu.......YFh++AactluphoGlshFcpAuhlhaIGAlLhllh.lGhlIllluhI ................lullGhlLlLlulttlucthscpclFpphLhuhIlsll.uhllhhlhhh.................hthhhh.h......h.....h.........h.sh..............hsshlsshllha....lhhllus..........hah++uachluphoGhshFpsuuhhhhhGAlLhlll.lGhllhhluhI................ 0 15 24 34 +6024 PF06196 DUF997 Protein of unknown function (DUF997) Finn RD anon Pfam-B_11382 (release 9.0) Family Family of predicted bacterial membrane protein with unknown function. 20.70 20.70 21.00 22.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.92 0.72 -4.47 24 837 2009-01-15 18:05:59 2003-05-30 14:58:27 7 1 834 0 88 313 6 75.50 55 89.98 CHANGED pQApKEAhaAluLsllYhlhWhlhAYh.susss.......hhGFPhWF.hSCIhhPllFhllsahhVKhlF+DlsL-csp.ps ....l.QAHKEARWA..LuLTL.lYLAsWllsAYLP.G.s.u.sG............hTGhPcWFEhACILsPLlFIhLCWhMVKFIa.RDIsLED-D..ut........ 1 16 39 63 +6025 PF06197 DUF998 Protein of unknown function (DUF998) Finn RD anon Pfam-B_11425 (release 9.0) Family Family of conserved archaeal proteins. 27.00 27.00 27.00 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.46 0.71 -4.98 111 925 2012-10-01 21:22:51 2003-05-30 15:01:43 8 6 680 0 301 768 89 184.90 17 80.56 CHANGED llusslhhhs..hlhsthhhsua.shhppslS-Lus..........sstsalh..ssuhlhhulhhls..hulhhhh.....ttshttphushlls...lhulu.hhhsG..lastsss.....................hH.hhsuhlsalthslusl....lhu..................hhhthhphhtsluhhhhhsshhhhhhs..................hhGlhpRhhshsh.hsWlhhhuht ...................................h.hsshhhhhs...hhhtt.hh..ts..s.h..s.....hppslStLusht...................ushthha..p.hshllsGh.hhlh..........hshhlhtt................ttph...h.tthss..sh..ls...................hhGlu..hlhsG....lash.sss........................thlH..shs.u....h...l....sh...hshhlshl...........lhs.................h.htt..ht.h..hph.hh.h.lshlhhhhsslhhhst...........................h.h.hGlhpRl.shhhh..hhahhhhs..h............................................ 0 104 188 251 +6026 PF06198 DUF999 Protein of unknown function (DUF999) Finn RD anon Pfam-B_11426 (release 9.0) Family Family of conserved Schizosaccharomyces pombe proteins with unknown function. 25.00 25.00 258.50 257.50 19.30 18.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.88 0.71 -4.41 3 9 2009-01-15 18:05:59 2003-05-30 15:35:05 6 1 1 0 9 8 0 141.20 83 55.62 CHANGED MVEDVCNVDLEQGLDLCKPEKVNKQSQRSRQSRQSLFTNTIKPQKDKMNIKTNKIKEFLNDLFTEFSKFHNSYYPDGRISTRSKhRWPLLIIWSIIIVFAIDKKFEVK-FLSIW.....INENRFYSEIWGPIAIYVCLFVLLLLuLI MVcDspNVDLEpGL-LCKPEKVNK.........QsLFTNhIKPQKDKhNIKTsKIK.FLNsLFTEFSKFHsShYPDGRISTRSphRWPLLIIWsIlIVFAlDKpFEVpcFLSIW.....INENRFYSEIWsPIAIYlCLhVLhLLuLI 0 9 9 9 +6027 PF06199 Phage_tail_2 Phage major tail protein 2 Finn RD anon Pfam-B_11427 (release 9.0) Family Characterised members are major tail proteins from various phage, including lactococcal temperate bacteriophage TP901-1. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.45 0.71 -4.10 63 727 2009-01-15 18:05:59 2003-05-30 15:44:01 6 8 629 0 118 479 169 135.50 20 78.71 CHANGED stpG+Dllltlphsss...s....tatplushpspslshsu-sl-so...op-......s.sthtphlsssGsppsolSusGl.ht.s..s...ssppplcptahsuphsc........apl.................h...hsshu.............p..hpGsah...loul-hu.ustsspsoaslshsusGtlsh ....................................................................pGcDhllhhphsst..t......shhphuh.p.spp.ls.hs.t-ossss....opD.............................thltssGs.hps.o.lSssul..hp.s....c.........stsptlccshhssthh-.........acl.........................t...hssts.................p....hpu.ah.......lsuhphs.ust-shsphphphsssGthp..................................................................... 0 35 76 96 +6028 PF06200 tify Zim; tify domain Bateman A anon Pfam-B_3326 (release 9.0) Domain This short possible domain is found in a variety of plant transcription factors that contain GATA domains as well as other motifs. Although previously known as the Zim domain this is now called the tify domain after its most conserved amino acids. TIFY proteins can be further classified into two groups depending on the presence (group I) or absence (group II) of a C2C2-GATA domain. Functional annotation of these proteins is still poor, but several screens revealed a link between TIFY proteins of group II and jasmonic acid-related stress response. 20.90 20.90 22.90 21.50 20.00 20.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.17 0.72 -7.26 0.72 -4.69 48 482 2009-09-14 12:07:42 2003-05-30 15:52:11 9 12 39 0 250 488 0 35.50 41 13.34 CHANGED ssssssQLTIFYuGpVhVF-slss-..KAptlhplAup ...........stssQLTIFYsGpVhVF.Dslss-..KA.pslhhLAu............... 3 42 154 208 +6029 PF06201 PITH DUF1000; Thioredox_dimer; PITH domain Yeats C anon ADDA_10869 Domain This family was formerly known as DUF1000. The full-length, Txnl1, protein which is a probable component of the 26S proteasome, uses its C-terminal, PITH, domain to associate specifically with the 26S proteasome. PITH derives from proteasome-interacting thioredoxin domain. 23.70 23.70 25.60 25.00 22.10 22.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.96 0.71 -4.44 56 613 2012-10-03 19:46:52 2003-05-30 16:00:39 8 18 298 2 434 619 10 146.30 31 56.41 CHANGED sLhspIDhsplpsLNp.....sss....susttslhcshppc.t...............hlpS.Ds.......D.-QLllpIP..Fss.slKl+Slhl.p..u....................ssspsPpplKlFhN..ppslsF-ssp.shpssQslc.................Lsp.......shpshhphs....L+hs+FpsVpsLolal..........psN.u...s-pT+ItaIslhGps ......................................LhshI-hsplpsL..Nc.......................ssp..........tuht...slhcshp.p......................alcS..D..s........D.EQLllplP..Fss.sVKl+.Slhlpus.........................ssspsP+plKlFh..N..tpshsF-sup...shp....Ps...Qslp..............................Lsp..............shpssh.phs....L+.hs.+Fp.sVps......Lo.lFh................psN.G........t-pT+ltalslhGp................................... 0 153 247 362 +6030 PF06202 GDE_C Amylo-alpha-1,6-glucosidase Bateman A anon Pfam-B_3607 (release 9.0) Family This family includes human glycogen branching enzyme Swiss:P35573. This enzyme contains a number of distinct catalytic activities. It has been shown for the yeast homologue Swiss:O93808 that mutations in this region disrupt the enzymes Amylo-alpha-1,6-glucosidase (EC:3.2.1.33). 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.53 0.70 -5.74 14 1196 2012-10-03 02:33:51 2003-05-30 16:24:27 9 30 973 0 546 1693 78 369.30 22 41.39 CHANGED huospFlscp..........................t.ts.ollAGa.WFu....c.WGRDohIuLsGlhLlsGRa--A+sllhsFuphh+.......+GLIPNthss.sspsh....YNosDAoLWalpulpcYhchssD.hthL....pchaP......................slppIlpsahpGsca....................sltlD.pcuLlhsGs...shTWMD.......AtlsGh.slTPRsGpslEINALWYsAL....+hhpphup.hltc.............tpppYpplApplpssFc+tF...hs.................................................................................scpshlhDslsssp.....pDhplRPN.lhAloL.....s...slhss-pt.thlchspcpLLsPaGL+TLsPcD.sYpGhYsG.sp.sRDtA......YHpGTlWsWllGhalcAhh+hspp...ppt...........ls.hpslLp.h.tal.pushuslsEla-....u-ssasspGChsQAWSVuEllR ...............................................................................................................................................................h.................................hhAGhPaFs.....p..aGRD.s.h.I....u.h.......u..l..h..Lh...s..t......p.....p.............u.c.t.h.l.....h......t.h.s.t..h...t..........................G.h...l.......sp..h..t........t........sp.........t..st...................Y......s....s....s..D..ushaalhs..ltpY...hch....s.s...-....hphl.......pch.h.P..................................................................................................h.lp.p.h...l...p....h...h.......h.....p.....u.....h.p.............................................................p.h.thc.......p.s.....u..h.....l....h....t....s....s....t.....h.........s....ts..W...h...D..................................st....p.s.h....shs.P....c..t..s....t......s....l.E.......l..pu.h.hY.s.A.l.......ph.hu..p.l.s.p..t.h.h.t.t........................................................hstp...h..p..phApplc.p...sF..pc.tF....hs.................................................................................tc..t.s.hh.t...p...h..l.cst.p...........ts.h...p.l.R...s...N.......h.....h.....s....h..s.....l...........................s.........sl.h..s.......c...p.s...t...p...s...l....c..h...h.....p...p...p...L..h..s...s.....h....Gl....R....T..L..ss.................p..................p..h..s..................Y.p...s........................hu......................YHpGshWs...a.huhhhpuhh+.hsh...............................................................hp..h.t..t.hht......h........t..h......t.t......t.........ht...tls.E...l...hs.........ts.s.......s.....h.......h.......s..hus...sQAWusuth.............................................................................................................................................................................................. 0 183 334 452 +6031 PF06203 CCT CCT motif Bateman A anon Pfam-B_314 (release 9.0) Motif This short motif is found in a number of plant proteins. It is rich in basic amino acids and has been called a CCT motif after Co, Col and Toc1 [1]. The CCT motif is about 45 amino acids long and contains a putative nuclear localisation signal within the second half of the CCT motif [1]. Toc1 mutants have been identified in this region. 20.60 20.60 20.60 20.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.14 0.72 -4.33 42 1411 2012-10-01 19:54:00 2003-05-30 16:45:03 9 19 143 0 549 1397 13 44.30 54 10.78 CHANGED Rcspl..hRY+-KR+pRpFsKpIRYtsRKthA-pRsRlKGRFs+psp .............RcApl..hRYREKRKp...R........p..F.....c..K..+....I.....R........Yt....oRKt........hAEpRPRl+.GRFs+ps.s....... 0 133 347 463 +6032 PF06204 CBM_X Putative carbohydrate binding domain Finn RD anon Pfam-B_9110 (release 9.0) Family \N 21.10 21.10 21.10 21.40 20.20 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -9.20 0.72 -4.49 57 1056 2009-01-15 18:05:59 2003-05-30 17:12:43 6 25 595 23 316 973 50 63.10 32 5.20 CHANGED sshGsFssss+EYl........hhlhss.pT.....PsPWlNlluNspaGhhlSpsGuGYoahpsu+phclT.WpsDsl ........................thGhFsppspEYl................h.sp.ssT.................PtPWsNlLuNs.paushlSssGuG...Yo.a.tss+phplT+acsDss....... 0 99 204 247 +6033 PF06205 GT36_AF Glycosyltransferase 36 associated family Finn RD anon Pfam-B_9110 (release 9.0) Family \N 21.00 21.00 21.00 21.50 20.80 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.82 0.72 -4.24 111 1114 2009-09-11 09:10:52 2003-06-02 09:55:43 6 25 623 23 331 1025 61 89.50 26 7.62 CHANGED upstas...phspthAah.sss.....tstsuassDRscFlGRstslssPtAl....tpstLSsosGsslDPsuulptplpLtPGcptclsFhlGhupspcp .......................................p..p.thtp.hsah..sss...............tpssua-sDRppFlGp..s..psh...........ss......PtAl................ttspLos.os..us...shDPsuulp.t.p.l.p.LtPGcptclsFllGhupspp.t..... 0 103 213 259 +6034 PF06206 CpeT DUF1001; CpeT/CpcT family (DUF1001) Moxon SJ, Bateman A anon Pfam-B_11004 (release 9.0) Family This family consists of proteins of proteins belonging to the CpeT/CpcT family. These proteins are around 200 amino acids in length. The proteins contain a conserved motif PYR in the amino terminal half of the protein that may be functionally important. The species distribution of the family is interesting. So far it is restricted to cyanobacteria, cryptomonads and plants. It has been shown that CpcT encodes a bilin lyase responsible for attachment of phycocyanobilin to the beta subunit of phycocyanin [1]. 20.30 20.30 20.40 20.80 19.90 20.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.94 0.71 -4.61 51 207 2009-01-15 18:05:59 2003-06-02 10:29:07 6 2 125 0 80 226 183 162.10 27 83.45 CHANGED hphschLuGcaoNppQAhcs.......PshaA+IplhaR..PLs.....h.hhpuhuhasEQsYshsstpPYRp+lh+lh.p....sstlhlcNaslc-spcatGAupc.sc..hLpplss-sLphh.pGCs..hhhpppss.t....apGplEPGppChl.R.cGptTYLsSphcl...spp.phhShDRGaDscTccplWGShsG.PacFp+ .......htlsphhsGcasNppQAhps.........P..sh..aspl.clhhc..Pls............h..s......shshahEQ.saths..ppPYR.Rlhclh.s......pspl.lpsYtlcc..p..pahusspc..p........hhptls..p.....plt.h...sCs..hhh.phpsp..s.............apGpspPGptChs.p...pGptoalsophpl....ppp..phhohD+GhD.psspplWGuhtG.sacFp+.......................................... 2 18 50 71 +6035 PF06207 DUF1002 Protein of unknown function (DUF1002) Moxon SJ, Bateman A anon Pfam-B_10868 (release 9.0) Family This protein family has no known function. Its members are about 300 amino acids in length. It has so far been detected in Firmicute bacteria and some archaebacteria. 23.80 23.80 25.30 24.90 23.60 22.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.30 0.70 -5.03 47 783 2009-01-15 18:05:59 2003-06-02 10:30:15 6 2 723 0 105 508 7 227.60 38 70.76 CHANGED LGtsLoppQ..KpphLchh.....s.stsssphlpVTsp-.pcYLsshssp.tphuopshSSuhlphpspG.......pGlpVcs......pNIThVTspMYtNAhlTAGlpcAplhVsAPhtVoGouALTGlhKAaE...us.GpplspcpKpsAs-Elsssupl.......s-shup-csssllsclKc-lscpp..p.....ocp-lppIVpphhpphsls....Lossphspllslhtclpshslsh...sphps....QLsp.lp .................................LGsuLo-sQ..KppsLctL.....sssc..st...plhTlTssshsKhhs.sss....ss....uhphaSSuhIpphsss.......sGlpVcI.s....cNITpVTpsMYpNAAlTuGlccAcIpVuA...Phs..VoGpuALsGlYtuhE....ss.GtplsppsKclApEELpshSslsttp.......pscss.h.s.s.sKLssAlA-IKptlAKtp.ps.....hTc-Dl+..pIVcc...s....lcshsls......lTssQls.Ilshhhp.lppssl.p....ssshsppLscl.................................... 0 32 63 82 +6036 PF06208 BDV_G Borna disease virus G protein Moxon SJ anon Pfam-B_10516 (release 9.0) Family This family consists of Borna disease virus G glycoprotein sequences. Borna disease virus (BDV) infection produces a variety of clinical diseases, from behavioural illnesses to classical fatal encephalitis [1]. G protein is important for viral entry into the host cell [2,3]. 25.00 25.00 149.00 148.90 20.50 19.90 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.66 0.70 -5.58 2 36 2009-01-15 18:05:59 2003-06-02 11:26:44 6 1 6 0 0 36 0 431.60 79 99.96 CHANGED MQ.SMSFLhGFGTLVLALSApTFDLQGLSCNTDSTPGLIDLEIRRLCHTPTENVlSCEVSYLNHTTISLPAVHTSCLKYHCKTYWGFFGSYSADRlINRYTGTV+GCLNNSAPEDPFECNWFYCCSAITsEICRCSIpNVTVAVQTFPPFMYCSFADCSTVSQQELESGpAhLSDGSTLTYsPY..pSEsVNpTLNGTILCNSSSKIlSFDEFRRSYSLANGSYQSsSINlTChNYTSSCRPRLhRRRRDTQQIEYLlHKLRPTLKDAWEDCEILQSLLLGVFGTGIASASQFLRGWLNHPsIIGYIVNGVGVVWQCHRVNlTFMsWNESTYYPPVDYNGRKYaLNDEGRLQTNTPEARPGLKRVMWFGRYFLGTVGSGVKP+RIRYNKTSHDYHLEEFEASLNMTPQhsIASGHETDPINHAYGTQADLLPYTRSSNhTSTDTGSGWVHIGLPSFAFLNPLGWLRDLLAWAAWLGGVLYLIsLCVSLPASFARRRRLGRWQE .........MphSMS.LIuFGTLVLuLSApTaslQuLpCNTDSTPuLIDLEIRRLCHssTENVISCEVSYhNHTohpLsAsHsSChKYHCKTYWGFFGSYSsD+lIsRaTGssctClNsS.sEDPF.CNWaYCCSAIss-lCRCSIoNspVuVpoFPPFMYCSFADCSTVSpp-LpsGpAhLSDGShLhasPY.Lpp-VVNpThNGTIhCNuSSKlVSFDtFRRSYuLtNsSYpspSlNlTCsNhoSpCpsR.hRRRRDhpplpYLsHKLRPhLtDAWEDCEILQSLlLGsFGoGhuuASQFLRtWLNHsDIlGYIVNGlGVlWQCpRVNVoFhsWNESTYYPPVDhNGp+haLNDEuRLQTsoPEAhPGLKRVMWaGRhaLGTVsSG.+P+Rl+YN+oSHDYHL-EF-hShNhTPphulAoGHETsPINHAaGTQusLLPYsRSSNlTSTDTGSGWVHIGLPSFAFLNPhGWLRDlhuWAAWLGGlLYLlpLshSLPs.hARRRRLGRWpE. 0 0 0 0 +6037 PF06209 COBRA1 Cofactor of BRCA1 (COBRA1) Moxon SJ anon Pfam-B_11228 (release 9.0) Family This family consists of several cofactor of BRCA1 (COBRA1) like proteins. It is thought that COBRA1 along with BRCA1 is involved in chromatin unfolding. COBRA1 is recruited to the chromosome site by the first BRCT repeat of BRCA1, and is itself sufficient to induce chromatin unfolding. BRCA1 mutations that enhance chromatin unfolding also increase its affinity for, and recruitment of, COBRA1. It is thought that that reorganisation of higher levels of chromatin structure is an important regulated step in BRCA1-mediated nuclear functions [1]. 18.70 18.70 19.20 19.70 17.70 18.60 hmmbuild -o /dev/null HMM SEED 475 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.56 0.70 -5.89 6 144 2009-01-15 18:05:59 2003-06-02 11:29:37 8 8 89 0 96 136 2 349.50 39 69.02 CHANGED MPSlQPVVMsVhKHLPKVPEKKLKlVMuDKELY+sCAVEVKRQIWQDNQALFGDEVSPLLKQYIlEKEssLFSo-lSlLHsFFSPSPKTRRQGEVVQKLTpMIGKNVKLYDMVLQFLRTLFLRTRNVHYCTLRAELLMSLHDLDlsEICoVDPCHKFTWCLDACIREKFVDuKRARELQGFLDGVKKGQEQVLGDLSMILCDPFAINTLAhSslRpLQELlup-uLPRDSsDLhLLLRhLuLGpGAWDMIDSQVFKEP+lD...sEllT+FLPhlMSlhVDDpsFNl-sKLPs-EKpsshh....PssLP-sFp...................................+FlpENRVACElGLYYsLHIsKQRNKNALhRLLPuLV-shsDhAFsDIFLHLLTupLsLLuDEFusEDFCoulFDsFLLTuhopKENVHRHsLRLLhHLHpKVsPs+lEsLhKuLEPspQpsEsl+ELYspLt-Klps...p+ssPssp.-ssuh-.sLtuVsssssh .......................plpPVVMtlh+ph...c...ls-+hL......chlhtDc-LYpsssh-VKRQIWpsNpuLFuDEVuPLLppYIhE.......KEthL.hs....phs...p..........FF.t..s...PKsR.R..Q..up..l.l.+LspMlG.psl+LYDhlLQF....LR...TLFl+T+NlH....YCTLRAELLMuLHDh-lt....-IhshDPCHK.........FsWCLDACIR-+.VD.KRsRELQuFL-.sl.K+Gp.EpVLGDLSMlLsDPaAhNhLshSsl+.Lpc..Ll.s.p-.sLPR-s...Lh.LLLRhLuLG...uA..WpMI.-oQsF.KE.P+h-...hpllspFLPhhhuhh...l-D.h.hp.l.pt....+L..ss-c+tsh.h....ssshs.-....sh....................................................tal.....pppthup..luhaYhl+h......h......p.+.....sp.....s..l..+lL.shLsp..shs.c..hu.h.t.D..FL..H..lhs.Lh.....h..s-...ch...s....cp.hs.p.lh-tF.hhts.s.ppssp+H......hL+hlhhla.+..l....shhtt.l.ct.hpP.tt.....t........p..t.h.thh......................................h...................................................................................................................... 0 47 56 76 +6038 PF06210 DUF1003 Protein of unknown function (DUF1003) Moxon SJ anon Pfam-B_10814 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 24.60 24.60 27.40 24.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.48 0.72 -4.13 71 827 2009-01-15 18:05:59 2003-06-02 11:31:43 6 4 766 0 249 630 135 107.60 40 53.07 CHANGED sGShtFllhasshlslWlslNshhhht..ht.................................FDPYP..FILLNLhLSh.AAhtAPlIhhuQNRQss+DRlpucpDhplNl+uEhElphLtcclstlttp.tphhtctpphpt ............hGohpFllhhslllssWlllNh..hshht....hp.................................FDPYP..FILLN...LhhSs.AAhtAPlIhhuQNRQ-cR.D..RlpsppDhcl....shpuct-hchLtcclstlthp.t-hhsc...hp.p................................. 0 81 162 212 +6039 PF06211 BAMBI BMP and activin membrane-bound inhibitor (BAMBI) N-terminal domain Moxon SJ anon Pfam-B_11792 (release 9.0) Domain This family consists of several eukaryotic BMP and activin membrane-bound inhibitor (BAMBI) proteins. Members of the transforming growth factor-beta (TGF-beta) superfamily, including TGF-beta, bone morphogenetic proteins (BMPs), activins and nodals, are vital for regulating growth and differentiation. BAMBI is related to TGF-beta-family type I receptors but lacks an intracellular kinase domain. BAMBI is co-expressed with the ventralising morphogen BMP4 during Xenopus embryogenesis and requires BMP signalling for its expression. The protein stably associates with TGF-beta-family receptors and inhibits BMP and activin as well as TGF-beta signalling [1]. 19.00 19.00 20.20 19.70 18.30 17.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.62 0.72 -4.05 4 68 2012-10-03 01:43:02 2003-06-02 12:43:19 7 2 50 0 36 72 0 100.80 68 38.57 CHANGED puSLlSlWLQLELCAMAlLLTKGEIRCYCDAPHCVATGYMCKSELNACFT+lLDPQNoNSPLTHGClDSLhsos-sCpucsscs+otsosP...LECCHEDMCNYRGLHD ..............sshh.lWLQLELCAMAl.LL..TK.GE...IRCY.CDAsHCVATG...YMCKSELs.ACFSRLL..DP..Q.N....oNS.P...LT.HGCLDSl....AS....T..u.DlCpA+Q..ApNHS...G....o.shP.........oLECCHEDMCNYRGLHD...... 0 7 11 20 +6040 PF06212 GRIM-19 GRIM-19 protein Moxon SJ anon Pfam-B_10760 (release 9.0) Family This family consists of several eukaryotic gene associated with retinoic-interferon-induced mortality 19 (GRIM-19) proteins. GRIM-19, was reported to encode a small protein primarily distributed in the nucleus and was able to promote cell death induced by IFN-ß and RA. A bovine homologue of GRIM-19 was co-purified with mitochondrial NADH:ubiquinone oxidoreductase (complex I) in bovine heart. Therefore, its exact cellular localisation and function are unclear. It has now been discovered that GRIM-19 is a specific interacting protein which negatively regulates Stat3 activity [1]. 25.00 25.00 25.20 27.90 20.70 24.60 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.63 0.71 -4.44 11 296 2009-01-15 18:05:59 2003-06-02 13:27:30 7 4 250 0 189 281 4 119.70 37 86.33 CHANGED suu.sK.QDhPPPGGYtslsa+RshPKpshSGauMFAshhGhhsaGhathhpts+Ec+R.pIE.hsARhAlhPlLpAE+DRcaL+.L++Nh-EEAclMKDVPGWKV....GEsVF+T...-+WVsPhhpEhYshpsh- .................s.....h.QDhPPsGGY.tslpY+Rs.lP.p.+G.hs.........uhshhsshhuhhsaGaaplhps.s+cp.c.c......L.thEchtuRlsLhPlLQAEcDRc.hl.+phppshpcEtp.l..McsV...sWcs........................sp....pVapo...s+ah.Ph...........t....................................... 1 68 106 154 +6041 PF06213 CobT Cobalamin biosynthesis protein CobT Moxon SJ anon Pfam-B_10956 (release 9.0) Family This family consists of several bacterial cobalamin biosynthesis (CobT) proteins. CobT is involved in the transformation of precorrin-3 into cobyrinic acid [1]. 23.00 23.00 23.10 23.70 22.90 22.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.76 0.70 -5.27 6 469 2009-09-11 12:48:28 2003-06-02 13:36:36 7 7 445 0 122 386 1322 275.30 25 44.87 CHANGED +soss.sssEPFKRAlssClRuIAGst-lEVuFus-RPuLsusR......ARLP-lP++sopp-hAlsRGLuDSMAL+hApHDP+lHu+htPpGt.sARAlF-AVEQARVEAIGApsMsGVAcNL...ouML-DKYpKushscl..ss+uDAPlEEALuLhVRE+LTGcssPpu...........AcplV-LWRsalEpKAutDlcpLuusl-DQpAFARlVRDMLuu.....................h-hAEEhGD-pspsDpE-s..D.-DsPpp-pp--pus-EppGs-s...u.uc-...u-uos--tEpu-hEuu-uos-DhsD-sDt .......................................................................................................t.........pshKpAlstss+AlutchplcVsa.....ss....s....ps...t..h....supp.........hcL..P....p..h.s.cc.hs..tp.ch..s..hs..RGhuDuhAL+ltapDsplHtpht.Ppss....ARsl......a-AlEpsRlEulGucths.GsupNL...sshl..p...p..c.hpcs.p.hs..ph..pscs.s..usL.ttu...l..uhh...l...R....p+loGps.s.Pss.....................stpll-....L....hRph.....l..pp....puusp....Ls.sLt.......ps..l.p......DQt.......u...FA.......c.hs...+c..hlss....................................h..p.h..s.....p..c....u..p.c..p..p...s...-....p..pps......p...ppps.p....p.p.......p...p.pp.t...u..s.p..p.p...ps.ssp..........s.s.pp.........spss..p.p.....p..t....p..p...tt..pt..s..p.t.t....................................................................................... 1 33 78 95 +6042 PF06214 SLAM Signaling lymphocytic activation molecule (SLAM) protein Moxon SJ anon Pfam-B_11112 (release 9.0) Family This family consists of several mammalian signaling lymphocytic activation molecule (SLAM) proteins. Optimal T cell activation and expansion require engagement of the TCR plus co-stimulatory signals delivered through accessory molecules. SLAM, a 70-kDa co-stimulatory molecule belonging to the Ig superfamily, is defined as a human cell surface molecule that mediates CD28-independent proliferation of human T cells and IFN-gamma production by human Th1 and Th2 clones [1]. SLAM has also been recognised as a receptor for measles virus [2]. 25.00 25.00 25.30 79.80 23.00 21.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.65 0.71 -4.19 4 58 2009-01-15 18:05:59 2003-06-02 13:48:11 6 1 36 6 14 52 0 122.10 64 38.47 CHANGED MDPKGhLShphLLhLSLAhcLShGTGtulMsCPc.ILtpLGSslhLPLssE.pIsKSMNKSIHILVTMAcSPtsolcKKIVSLDLscGusPRaLpDGY+FaLENLoL+ILcSR+EsEGWYhhoLEENV ....................MDPKG.LLSLshlLFLSLAFchShGTGtshMNCPc.IltpLGSslLLPLTpE.tIsKSMNKSIHIlVTMAcS.tsSVc+KIVSLD.sEuGsPRYLcDtY+FaLENLoLtILESRKEcEGWYhMTLEcNl.. 0 1 1 1 +6043 PF06215 ISAV_HA Infectious salmon anaemia virus haemagglutinin Moxon SJ anon Pfam-B_11275 (release 9.0) Family This family consists of several infectious salmon anaemia virus haemagglutinin proteins. Infectious salmon anaemia virus (ISAV), an orthomyxovirus-like virus, is an important fish pathogen in marine aquaculture [1]. 19.60 19.60 20.80 22.90 18.90 18.60 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.43 0.70 -6.16 3 464 2009-01-15 18:05:59 2003-06-02 13:54:06 6 2 3 0 0 388 0 318.40 88 99.80 CHANGED MARFIILFLLLAPVYSRLCLRNYPDTTWIGDSRSDQSRVNPQSLDLVTEFKGVLQAKNGNGLLKQMSGRFPSDWYTPTTKYRILYLGTNDCTDGPTDMIIPTSMTLDNAARELYLGACRGDVRVTPTFVGAAIVGLVGRTDAlTGFSVKVLTFSSPTIVVVGLNGMSGIYKVCIAATSGNVGGVsLINGCGYFNTPLRFDNFQGQIYVSDTFEVRGTKNKCVLLRSSS-sPLCSHIMRNVELDEYVDTPNTGGVYPSDGFDSLHGSASVRTFLTDALTCPDIDWSRIDAASCEYDSCPKMVKDFDQTSLGNTDTLIMREVALHKEMIuKLQRNITDVKsRVDAIPP...NIFISMGVAGFGIALFLAGWKACIWIAAFMYKSRGRIPPSNLSVA .......................RLCLRNaPDTTWlGDSRSDQSRVNPQSLDLVTEFKGVLQAKNGNGLLKQMSGRFPSDWYTPTTKYRILYL....GTNDCTDGPTDMIIPTSMTLDNAARELYLGACRGDVRVTPTFVGAAIVGLVGRTDAITGFSVKVLTFsSPTIVVVGLNGMSGIYKVCIAATSGNVG..G..VTLINGCGYF...NTPLRFDNFQGQIYVSDTFEVRGTKNKCVLLRSSSDTPLCSHIMRN.VELDEYVDTPNT.GGVYPSDGF.DS.LHGSAS...VRTF..LTDAL...TCPDIDWSRIDAASCEYDSCPKMVKDFDQTSLGNTDTLIMREVALHKEMISKLQRNITDVKh....R.V..-Ah.s....................NIF.....IS.MGVAGFGIALFLAGWKACIWIAAFMYKSRGR........................ 0 0 0 0 +6044 PF06216 RTBV_P46 Rice tungro bacilliform virus P46 protein Moxon SJ anon Pfam-B_11281 (release 9.0) Family This family consists of several Rice tungro bacilliform virus P46 proteins. The function of this family is unknown. 25.60 25.60 25.60 793.10 24.80 25.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.33 0.70 -5.62 2 12 2009-01-15 18:05:59 2003-06-02 13:59:52 6 1 2 0 0 13 0 388.60 77 100.00 CHANGED hNhEhPYSIHhlsKN+VPIYcQGNLFHoEhouRLSHlSpGLlDHLFTF.SDNTERVRKLHllu-alY.hE.EhpoYp.Eh.pLp-QVSpLp.pspp.+tpltp.+-hlEGLREPlKKPIYTTQDKEtLRsFFCcERSMEYIYaHIKRLAppSYYSHLNKLQKDhE.hRGhYhSFLTNVKFLVLhEsGhWTs.sIEsp.hspS.L.loQccGEc.hpKGll.hN.E.EuG.Y.LostFluDLYAHGFIKQINFTsKlPEGlP.lItEKl.pYKFPGuNTlLIE+EIP+WsFs.MKRpTphRTplYIhpsaRsFYGaSPl+sYEPITPEEFGh-YYSWEphhE-Dct-VVY.ShsT+..KVs+E..aAWPpcDuDhMSCoTShtEEahHRht.A hNhEhPYSIHhlsKN+VPIYcQGNLFHoEhouRLSHlSpGLlDHLFTF.SDNTERVRKLHllu-aLY.hE.EhpoYp.Eh.pLp-QVShLp+pspp.Rtpltp.K-hIEGLREPlKKPIYTTQDKEtLRsFFCcERSMEYIYaHIKRLAppSYYSHLNKLQKDhE.hRGhYhSFLTNVKFLVLhEsGhWTs.sIEop.hspS.L.loQccGEc.lpKGll.hN.E.EuG.Y.LostFluDLYAHGFIKQINFTsKlPEGlP.lItEKl.pYKFPGuNTlLIEREIP+WsFs.MKRpTphRTplYIhpsaRsFYGaSPl+sYEPITPEEFGh-YYSWEphh-EDEu-VVY.ShsT+I.KVsRE..aAWPpcDuDhMSCTTShE-EahHRh-... 0 0 0 0 +6045 PF06217 GAGA_bind DUF1004; GAGA binding protein-like family Moxon SJ, Bateman A anon Pfam-B_10604 (release 9.0) Domain This family includes gbp a protein from Soybean that binds to GAGA element dinucleotide repeat DNA [1]. It seems likely that the this domain mediates DNA binding. This putative domain contains several conserved cysteines and a histidine suggesting this may be a zinc-binding DNA interaction domain. 26.10 26.10 26.20 34.20 23.80 26.00 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.28 0.70 -4.45 10 153 2009-09-12 21:59:08 2003-06-02 14:01:58 7 3 38 0 65 172 2 273.70 39 97.75 CHANGED MDscGh....Rphu.aac..............Ps..sshK.phuhQLMSols..-RDst............+-pNl.httppuuh.........tpRD.........................Mua..pDsWlsp.........+Dsthhsh..hpss............s..hthlsso+shcphQ...pp......pht-sshs.h.c.c...s.PlspssssspcstpspttppspsPKssKsc..KsK+tsssspcpsstss....c...............................sKss+Ksh-l.sINtVuhDhSslPsPlCSCTGsspQCYKWGsGGWQSACCTTslShYPLPhsTpRRGARluGRKMStGAFpKlLp+LAuEG.aDLSNPlDLKsHWAKHGTNKFVTIR ..........................................................MDsss.......Rphs.....ac...............................hh.......hplMu..hs..-Rsst..............pcts..h..h.ttptuh.........ttRD.............................hsh..R-shlsp............pcpthhsh..hpps........................................ht.h.....ts.pph.pt.p..................................ht.tsshs.......c..p.......t.s..l.p.p...ss..t.tp...s.t..s.K...p.p....ppsp.psK.s.psp....+sp+....sthspcpsp.ps....c...................................................s+ssh..Ks...-...l.sl..Nt....lshD..SshPsPVCSCTGsspQCY+WGsGGWQSACCTTslShYPLPhssp+RtARluGRKMStGAFpKlLp+LAuEG.aslusPlDLKsaWAKHGTN+alTI+............ 0 8 43 52 +6046 PF06218 NPR2 Nitrogen permease regulator 2 Finn RD anon Pfam-B_11335 (release 9.0) Family This family of regulators are involved in post-translational control of nitrogen permease. 27.50 27.50 29.50 28.60 26.90 27.30 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.37 0.70 -5.78 16 363 2012-10-01 20:21:22 2003-06-02 14:04:55 6 7 244 0 279 366 2 293.80 27 83.34 CHANGED GF.P.IpsIFYohFHPTpGoKlhaQsPsssl.................tsshhsFDol+sYlIPKPpLCNKLlTlp..hscY+llGYPVsI.psscYuRNuF.FNhsFVFsh-.scossYEssl+RLG+hhplLEEQophLSct-p..s.h....................................................................st.pIpsLlpplapDLNNYuEChIPlDsuNoIsIKLFPlhPsPss.lpsapVPIhslpLpsllD.sNWD.TMhKIlPaINGlNSlt+IucLADs-hpLs+pCIQHLlYYsCltllDIFQFSNIYAPTsplpsFlpss.shucECQsYVhssssp.............................st.t..s......................................................................plPopssLFpLYpSLsQGpTlK-Whtpphpt....l..ssIDlRRhIpFGll+ulIYRlapYPl....hsshs.......sptcchhph.tp......................hDclssctshs.tslpct..............pslhc .......................................................h.....l.slhhs.F...................G...shl.hp.P.t.l......................................................................................................Fsshp.alIs+.thptp..h..lphp.............thpl..l..uhP..l.h..t..p....p.......Yt..RN.....t........h.F..NhshVht.p....p...............s.............h.....c......hlp+.huthhh..hE.p.sthlSp......t...........................................................................................................................................t......l..l....h..lhp-L.....N..t.........t....shl.l......sp....s..slplKlh........h.........sPs...h...p......pVPl....h..p...h..t.......th..t....pWD.Th.pllsaIsGhppltpIu...upsp.tLsh.s..lppLhaYtsl...........l...hshh.aush.....Yh..p.....h.....tp.h...h.t........s................h.p...tC.tal.....................................................................................................................................................................................................................................l..hthh.tth..p.............u......shtphh................lD.R+hl.FGl.cthlhph.pash...................................................................................................................................................................................... 0 108 156 235 +6047 PF06219 DUF1005 Protein of unknown function (DUF1005) Finn RD anon Pfam-B_11366 (release 9.0) Family Family of plant proteins with undetermined function. 20.40 20.40 32.60 24.80 19.70 19.40 hmmbuild -o /dev/null HMM SEED 460 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.60 0.70 -5.82 9 119 2009-01-15 18:05:59 2003-06-02 14:07:22 7 3 18 0 76 127 0 316.70 42 96.87 CHANGED MDPCsFVRlhVGsLuL+h.......ssssspsssussc.ps...sChCcI+l+sFPhQhsslPLlshs-ups.......ssspolAAsFaLscu-lctlhp+spah..........usLclsVYs........GR+GssCGV..suut+LlG+hplslDLcuu...-uKsslhHNGWlslGppptpp.ts....sAELHLpV+sEPDPRFVFQFDGEPECSPQVlQlQGshKQPlFoCKFupcpsu..ps.p............t.W..ouhso-p..-ptu+ERKGWplTlHDLSGSPVAhASMlTPFVsSPGoDRVoRSNPGAWLILRPsGst...........oWcPWGRLEAWRERG....hpDoLGYRF-Ll.-G....usulshAEooIS.uc+GGcFsIDh.tt......ssh.s.ttphphuuhustuuus..tss.t....................GFVMSupVpGEGKsSKPhVcVuspHVTCsEDAAhFVALAAAVDLShDACRLFs++LR+ELpp ...................................MDPpsFlRl.lstLul+hP..............................t..t...s...sC..Cpl+l.th....P......p......s.hPhh...t.............ts.ussF.Lststlphh.t..s...................Lpl.las........G..u......t..t.....CGh......ttphlGhhpl.ls..ths....ucs..hhhasGWhsluptt............tsphpl.V+hEsDPRalFpFss-sthuP.lhQl.....p......G......s.....h+QPhFoCK...Fuhcp.s................................t.....tt.t+E.RKGW.lhlHDLSGSsVAhA.MsTPFVsSs.GsspVuRSNPGuWLIlRP.uss.............oWpPWGRLEsWREc.G...........D.slsh+hcLl...s.......sssl.hu-ssls.sppGGpFsIDh..............................................................t..GFVMustV.pGEu.+sS..+.Ph.VpluhtHVsChtDAAhFhALuAAlDLSh-AC+.Fpp+hR+chp......................... 0 10 47 64 +6048 PF06220 zf-U1 zf-U1; U1_C; U1 zinc finger Moxon SJ anon Pfam-B_10606 (release 9.0) Domain This family consists of several U1 small nuclear ribonucleoprotein C (U1-C) proteins. The U1 small nuclear ribonucleoprotein (U1 snRNP) binds to the pre-mRNA 5' splice site (ss) at early stages of spliceosome assembly. Recruitment of U1 to a class of weak 5' ss is promoted by binding of the protein TIA-1 to uridine-rich sequences immediately downstream from the 5' ss. Binding of TIA-1 in the vicinity of a 5' ss helps to stabilise U1 snRNP recruitment, at least in part, via a direct interaction with U1-C, thus providing one molecular mechanism for the function of this splicing regulator [1]. This domain is probably a zinc-binding. It is found in multiple copies in some members of the family. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.08 0.72 -4.39 11 661 2012-10-03 11:22:52 2003-06-02 14:07:45 7 17 296 1 456 687 10 36.70 41 15.13 CHANGED MP+YYCDYCcsaLTHDo.SVRKtHsuG+pHtpslpcYY .........+.aaCDYCcsalscDosSlRKpHpsGppHpcNVpca...... 0 152 235 353 +6049 PF06221 zf-C2HC5 Zf-C2HC5; Putative zinc finger motif, C2HC5-type Finn RD anon Pfam-B_11300 (release 9.0) Domain This zinc finger appears to be common in activating signal cointegrator 1/thyroid receptor interacting protein 4. 24.30 24.30 25.20 25.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.67 0.72 -4.21 33 300 2009-01-15 18:05:59 2003-06-02 14:20:43 8 6 263 0 212 293 1 53.90 44 10.63 CHANGED s+ptCsCtAppHsL......ssNCLsCGKIlCspEG.......sPCsFCG.sslhs.....sppppplhpt .....s.p+.CsC.ApcH.L......hsNCLsCG+IlCppEG.......GPCh.FCG..s.lho.....pp-.p.h...p............ 0 72 119 178 +6050 PF06222 Phage_TAC Phage tail assembly chaperone Finn RD anon Pfam-B_11379 (release 9.0) Family \N 25.00 25.00 39.10 39.10 22.10 19.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.56 0.71 -4.59 6 219 2009-01-15 18:05:59 2003-06-02 14:27:42 6 2 157 2 7 135 0 121.60 75 96.51 CHANGED hspsLRslALsshuGaRHKT.VsVPEW-GspVsLREPSuEAWhhWpEll+...G.ucDD-s.loVsEKhpR.....NhpADVpLFlDVLhDpshQ...RVFS-DDppQVttlYGPVHuRLLppALEL....lsssE-A+K. .......MtKDLKTLALARLSGFRHKT.VKVPEWtNVSVVLREPSAEAWYLWQEVLN.........G.DGE.DDDT.LSVVAKTRR.....NLEADVTLFCDVLCDTDLQ...RVFoPDDREQVLAVYGPVHARLLRQALEL....IADAESARKK................................... 0 0 1 5 +6051 PF06223 Phage_tail_T Minor tail protein T Finn RD anon Pfam-B_8084 (release 9.0) Family Minor tail protein T is located at the distal end and is involved in the assembly of the initiator complex for tail polymerisation [1]. 25.00 25.00 26.20 25.00 24.10 23.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.37 0.72 -3.61 4 777 2009-01-15 18:05:59 2003-06-02 14:39:43 7 2 329 0 11 264 0 96.40 61 80.47 CHANGED +LAREFRRPDWRpMLuEMSATELGEWu-aFsppSFSDsLLDAcFAoLKuLlsuLVoGsscsA.tDFSLLscpEuh.E+TD-ELMpLGEGIoGGVRYGPDSpPG ..........htRsDWRtMLAGMoSTEYADW++FYpTHYFpDs.LDMHFS...GLpYsVLSLFF.u..DPDM.HP....uDF..SLL..s...c.c..E....u.....p.E....h..E....D..-h..LMQKAAGLAGGVRF.GsDG............. 0 0 1 5 +6052 PF06224 HTH_42 DUF1006; Winged helix DNA-binding domain Finn RD, Bateman A, Eberhardt R anon Pfam-B_8424 (release 9.0) Family This family contains two copies of a winged helix domain. 24.00 24.00 24.00 24.40 23.80 23.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -12.14 0.70 -5.10 119 1653 2012-10-04 14:01:12 \N 7 6 1193 0 494 1415 196 321.90 24 76.36 CHANGED lccLsslQhss....spstahsLauRls.sa...psstLpph.......hpc+plhchashcuo..ll.shc-h.hhphthtshhttt....t...............hphhstphpph.hstlhptl..tspuPloss-lttthttp...............tht....sspthhp.hlhhpG.lshsshp.uh.........phashs-ch...........................................................................................................................................ppps.....hctLlpchlpuhG.sAoss-lttaht..ls.t.....ps+tslt..thhtttcL.hthplc.....t.t....hashs...........s.....sthsssss.......h........plLssFDshlhs+pc..ppclh.s..hta+hcla.sss.sph.........shslLhsGpllGphchcsc..cps......usLplps...h...hscs.......................tts...slttphpchupa ..............................................................................................................lp+huhlQhss....s+usalsLauRls..sa........s.p..hL-cu..........htct..clhch.Wupcus..hh.Ptp.Dahh.hc.t..thhs.tphs......hhh...........................tshh.p.ptpt..htp...llptl...psc.G...P...........l..pus-hppthttt.....................th.h.php.........st+p.........h.....L-..tL........h..ts.........G.cl.........hsst.+.p..shp.........................+sYcL..sc+l...............h....Pp........tps..............................................................................pscu.........hthhlcphhcuhG..ss.......t.p....plssa..hp....Lphs.......shps.h..hp.....thht.t.t.pl..lsVp.l-.................t.ht....hahps..................................................c............tth.stt.ss.........s.......tlLuPFDsl.lhs+c+....tppLa.s.hpa+hEs.a.sssspp.............shslLhpGpllGph-h+hc..+pt..........shLplhs........hhhpt.....................htt...sltpthpphAta.................................................................................................. 1 183 350 445 +6054 PF06226 DUF1007 Protein of unknown function (DUF1007) Finn RD anon Pfam-B_8585 (release 9.0) Family Family of conserved bacterial proteins with unknown function. 21.00 21.00 22.00 21.90 20.80 20.20 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.44 0.70 -4.59 46 668 2009-01-15 18:05:59 2003-06-02 14:57:21 8 5 643 0 141 463 41 197.50 28 83.76 CHANGED pthtlhhuslssh.........sAtAHPHsal-hpspllhssss..lsul+htWpFD-haSuhll...thcpspcuphsp..pcLpplupsshpsltphpYFThlh.ssGcpl+as.p.....................Ps........chthshcs....s........plhltFslsLsp.Phshp.tt......lslpsYDPoYalshsasp.c..sslplssts.tsCphplhpPsssss...........tpuLshspts......ssshGthFAp+lplpC ..................................tt...hhhshhshh.......uhAHPHsFlchpspllhcspt..lsulchpWphD-hhSuslLh........cs.tsut.ss.cthpcLutpl.h.ssl.hs..p+Y...FTplh....t..s...Gp..p..lKhp..p.....................Ps..............phthshcs.....p................................plllpFsl.sLsc.P.sls..sps......hshssaDPoYYlshpYsp...c.......sslshs..s......ttCphplhpPpsstt.h.........tht...toL.sptt.ts.......s.slGp.FApplplpC............................................... 0 29 75 108 +6055 PF06227 Poxvirus Orthopox_N1; dsDNA Poxvirus Moxon SJ anon Pfam-B_10619 (release 9.0) Family This is a family of dsDNA viruses, with no RNA stage, Poxvirus proteins. 21.10 21.10 21.20 21.80 20.00 21.00 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.21 0.71 -4.71 37 475 2009-01-15 18:05:59 2003-06-02 15:01:29 7 1 40 23 1 377 0 156.50 20 92.13 CHANGED M...............................................sp.s.............................................................hphsahhtsphs.tpcslpsllpcYhhaRsh....hhtppphG+lFpcLhpaDp.Ahpha...G.slp.....phhpshh..s.......hpshphhhphphshp.......hhpsptlIGlhuhluchhs.........thhslh.hhhphls.pphpp .............................h......................................................................................................pashtcphs...p.sslpsllp-Ylhapsp.....t.p.shGpla+plhoacp.Ahcha...u.slp...............plhpph..................hpshs.al+hhhshs.......h.pspshIGlsuhlschhs.........phhpshhhhhph.l.s...h..t........................ 0 0 0 1 +6056 PF06228 ChuX_HutX DUF1008; Haem utilisation ChuX/HutX Moxon SJ, Bateman A, Eberhardt R anon Pfam-B_10620 (release 9.0) Domain This family is found within haem utilisation operons. It has a similar structure to that of Pfam:PF05171. Pfam:PF05171 usually occurs as a duplicated domain, but this domain occurs as a single domain and forms a dimer. The organisation of the dimer is very similar to that of the duplicated Pfam:PF05171 domains [1,2]. It binds haem via conserved histidines [2]. 25.00 25.00 25.00 25.90 24.60 24.20 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.47 0.71 -4.80 38 458 2012-10-01 19:49:10 2003-06-02 15:03:01 8 6 445 9 79 1005 54 137.40 45 67.28 CHANGED lApcLsloEh-VlpALPtchsshhsu..sphpplLpslspWGp.VTsIl+stsuIaEh+ushPpGc.hu+GYaNL......htppst..LcGHL+h-shstluhVs+PFh.Gpc...otSltFFsppGcslFKlYLGRDc.pRpLhspQlptFcsLt ..........................lAcphsso.h-VVc.sLP.pp..........h.s.h.s.sG........s+hsslh.-.s.l.s.c.WGs..VTTlV+os.ssIhEhpGt.lPp....G......p......h......u....+.......G....Ya...NL.........hu+cG....lpGHlKh-NsspIALlp+sFM..Gh-............otSltFFsppGp.shFKIaL.GR..D.-...+RpLhs-QVstF+sL........................................ 0 14 41 60 +6057 PF06229 FRG1 FRG1-like family Finn RD anon Pfam-B_8590 (release 9.0) Family The human FRG1 gene maps to human chromosome 4q35 and has been identified as a candidate for facioscapulohumeral muscular dystrophy. Currently, the function of FRG1 is unknown [1]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.13 0.71 -4.84 13 253 2012-10-02 19:42:32 2003-06-02 15:04:21 7 11 189 1 166 289 1 170.60 35 65.39 CHANGED uSYlcAhssGhhThGtPHcs.spsPsPcElFsAs+l.u-p+luhKoGaGKYLuls..scGlloupu-AIuspEQaps...........lFpss+sAhhu..............ssssFlSssp.tp..h.upS+psupsEhlclRucssp.......shpsphclthpsRhcs+.pt.p.lKthQ....chpc+cL......RLsc--s+cLK+ARc-GshHEslLDhRsKhKsD+Y ...................................................lpshspG..shutPHp............................p.asAs+.l...u.sp...p...lu.hKo.u..a..G+YLu..hs..pp..G..l..lsupo-AlushEpaps.....................................lh.p....s....s..p..hAhhu.............................................ssssFlsssp..st....h.upuc...su....up....p-..hlclRusApc..................phppp..cl...p.s.+....t....ps..+tp..php.s..Kthp..........phpc+cL...................+l..scc-s..+hLK+A++-GphHEtLLDhRsK.hKtD+a........................................................ 0 58 83 127 +6058 PF06230 DUF1009 Protein of unknown function (DUF1009) Finn RD anon Pfam-B_8705 (release 9.0) Family Family of uncharacterised bacterial proteins. 25.00 25.00 57.40 31.60 21.60 24.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.07 0.70 -11.13 0.70 -4.97 83 491 2009-01-15 18:05:59 2003-06-02 15:08:55 6 4 486 0 172 434 1229 213.30 35 75.92 CHANGED lGpllchL+ppGlscllhsGsl.pRPth.sslc......Dhpslpllsthh..uh.ptGDDuLL+ulhphhEp.pGhplluuc-lhscLLs.spGsLsptpPspppppDlthGhplspslGshDlGQusVVpsGtVlAlEuhEGTDshLtR..................supLst...........ttsuVLlKssKPsQDhRhDLPslGspTlcssscAGlsGlulEAGpslllDpptslptA-ctGlhlhu ..............Gcllchh+ppslscllhhGsV.pR.P.th..ps...l.p.......Dhtshplls.h....tl..spGDDsLLpullshlEp.pGhcVlusp-lhssLls..s.sGsloctpPssp-ppDIphGhchAptLu.tlDlGQusVVpsGtVlAlEu.hEGTDthlpR...................................sucLtp................puuVLVKhsKPtQDhRhDlPsIG.pTlcshtcuGhsGlAlEAG+slll-.p.pcslthAsctGlFlh........ 0 74 121 142 +6059 PF06231 DUF1010 Protein of unknown function (DUF1010) Finn RD anon Pfam-B_11467 (release 9.0) Family Family of plasmid encoded proteins with unknown function. 25.00 25.00 27.80 28.00 16.80 15.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.90 0.72 -3.98 8 110 2009-01-15 18:05:59 2003-06-02 15:14:07 6 3 46 0 6 54 0 88.70 68 92.27 CHANGED MpAtoYSSAhPCS..................+GFpsFLASSACsASATSYaSCSAAPLpWPSAFSWAAPhhKAGRSlLAFGSN...sAlK.....P......TRlhRuAYLuR .........................QTAFSFSuslQphhChFSG......LRLhuLRtFsVFLASSPCVASASSYtFCSAVPPRW+SsFSpLAPVAKh+LSVLASGSN...ISVK.....P......TRILRSAYLuR................................ 0 0 4 5 +6060 PF06232 ATS3 Embryo-specific protein 3, (ATS3) Finn RD anon Pfam-B_11504 (release 9.0) Family Family of plant seed-specific proteins. 20.90 20.90 21.00 21.10 20.70 20.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.82 0.71 -4.54 15 103 2012-10-02 11:40:54 2003-06-02 15:18:41 6 3 23 0 59 169 0 120.20 37 65.84 CHANGED shptsppscptus...CsYsVpIcTSCsSPthTpDpISlsFGDAhGNpVYssRL............cDPsu..u.......sF-+CooDoFp.lpGsCht.pIChLYLhRsGsDGWhPEoVpl...Ys....ps.pPV..TFhaNphlPc.sVWYGaNhC ...........h.h.........tstp..CsYslhIpTSCsSst.hTpDpl.Slt.F.GDu..GNplass+L................csPss....t.......sFEcCusDoFp.l..pGsCh....tIC..alYL...h..R..s...G...p...D...GWhP-hVpl.....as.......tsp......p.ss......sF.as.p..lPp.s.sWYGashC............................................ 0 4 43 53 +6061 PF06233 Usg Usg-like family Finn RD anon Pfam-B_11528 (release 9.0) Family Family of bacterial proteins, referred to as Usg. Usg is found in the same operon as trpF, trpB, and trpA and is expressed in a coupled transcription-translation system [1]. 20.70 20.70 20.90 24.80 20.00 19.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.74 0.72 -4.12 27 204 2009-01-15 18:05:59 2003-06-02 15:31:43 7 1 184 0 72 150 47 81.30 60 86.78 CHANGED h.LpGYGLTTAEIhY+hPDHPplLQoaVWQ-YDLAPcFPtLhcFL-FWpcElEGsLHSVRhsHp+LIpPuEaRsVsG.hpLH .........h.QLtGYGLTTAcIhY+MPDHPpLLQoaVWQDYDLAP-FPtLpcFl-FWQcclEGPLHSVpasHp+LIusuEWRsVpGEhhLH..... 0 14 39 48 +6062 PF06234 TmoB Toluene-4-monooxygenase system protein B (TmoB) Moxon SJ, Iyer LM, Burroughs AM, Aravind L anon Pfam-B_10626 (release 9.0) Family This family consists of several Toluene-4-monooxygenase system protein B (TmoB) sequences. Pseudomonas mendocina KR1 metabolises toluene as a carbon source. The initial step of the pathway is hydroxylation of toluene to form p-cresol by a multicomponent toluene-4-monooxygenase (T4MO) system [1]. TmoB adopts a ubiquitin fold [2]. Although TmoB is a component of the T4MO system, its precise role remains unclear. 25.00 25.00 43.20 43.00 21.50 20.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.77 0.72 -4.12 10 40 2012-10-03 10:59:06 2003-06-02 15:32:22 7 1 34 35 21 39 1 84.00 43 95.51 CHANGED MALFPlhusFEtDFVlQLVuVDT-cTMDpVAttsAHH...........SVGRRVss...pPGpllRVR+pGssphhPRssplu-uGlpPhEslEllF-c ...MAlFPlhusFptDFVlpLVsVDocsTMDpVAstsAaH...........oVGRRVts...pPGtslRVR+pGst...phhPRshplA-uGlpPh-slElhap.s.. 0 10 13 18 +6063 PF06235 NAD4L NADH dehydrogenase subunit 4L (NAD4L) Moxon SJ anon Pfam-B_10667 (release 9.0) Family This family consists of NADH dehydrogenase subunit 4L (NAD4L) proteins from the mitochondria of several parasitic flatworms. 22.20 22.20 22.40 28.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -9.98 0.72 -4.20 12 62 2009-01-15 18:05:59 2003-06-02 15:37:12 6 1 44 0 0 69 0 85.30 41 99.60 CHANGED MlolhLlhssllllShhLshs+hhNhLIlLENaNVLlLLhCLlhoht-s.+hlFIshMsl.slEl.lhLlVlspshcpuShh.hluh ....MlslhLlh.ssllllShhLshtRhlNhLIlLEsFNVLlLLhsL.Lh.oht-s.+hlFlslMslhTlElslhLlVlsplhcpushh..ssh... 0 0 0 0 +6064 PF06236 MelC1 Tyrosinase co-factor MelC1 Moxon SJ anon Pfam-B_10673 (release 9.0) Family This family consists of several tyrosinase co-factor MELC1 proteins from a number of Streptomyces species. The melanin operon (melC) of Streptomyces antibioticus contains two genes, melC1 and melC2 (apotyrosinase). It is thought that MelC1 forms a transient binary complex with the downstream apotyrosinase MelC2 to facilitate the incorporation of copper ion and the secretion of tyrosinase indicating that MelC1 is a chaperone for the apotyrosinase MelC2 [1]. 21.40 21.40 25.70 38.40 20.70 20.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.70 0.71 -4.14 9 48 2009-01-15 18:05:59 2003-06-02 15:43:53 6 1 34 23 15 62 1 126.10 37 87.36 CHANGED lssAAslA.AuushuusspAssAsstuttst..ss.s.........sshssuFDEsY+GRRIpGtssuuGtt..............tauGuatVhlDGh.LHlM+sADGoWlSlVsHYcssPTPhAAARAAVDELp.GA+LhPhPss ..............................................h.hh............h.shhs..h.ssuusstustss.tsssst........sss.sssF.DEsY+GRRIpGsss....s...uustt.............ttuss..a....pVplDGR.LHVMRp....ADG....oWlSsVsHY-sa..sTP......LtAARAAVDELt..GApLtsh...s.... 0 3 9 15 +6065 PF06237 DUF1011 Protein of unknown function (DUF1011) Finn RD anon Pfam-B_11463 (release 9.0) Family Family of uncharacterised eukaryotic proteins. 25.00 25.00 35.70 29.30 24.00 22.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.29 0.72 -4.13 17 198 2009-01-15 18:05:59 2003-06-02 15:47:15 7 3 89 0 126 188 1 100.00 46 22.74 CHANGED phsssphsalhhlluhlNuhoNGlLPSVQSYSCLPYGshAYHLussLSslANPlACFlAhFl.hRSlshlusLol.lush..FuuYlhshAuhSPsP.LhssshG ...........................h..tsphsaLhsLlAhsNALTNGVLPSVQSYSCLPYGphAYHLussLuuhANPLACFLAM.hl....sR..SLshLusLol.lGsh..huuYlMshAslSPCP.LhspssG......................... 0 35 46 86 +6066 PF06238 Borrelia_lipo_2 Borrelia_lipopr; Borrelia burgdorferi BBR25 lipoprotein Moxon SJ anon Pfam-B_15000 (release 9.0) Family This family consists of a number of lipoproteins from the Lyme disease spirochete Borrelia burgdorferi [1]. 25.00 25.00 26.80 26.60 22.40 21.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.62 0.71 -4.02 2 99 2009-09-11 01:58:06 2003-06-02 15:48:34 6 2 26 0 6 66 0 103.50 81 97.31 CHANGED MQNNTIGLGLNLLSSLTNIAKTDTNIDHNYINTFSKVIDFFYKTYISTLKSMETAESTKIFEEIpDILKYNIEIIEAIShDKsK+IITSLKApRNKIMKEYIKhLKRuENA .....................................MQNNTIGLGLNLLSSLTNIAKTDTNID..HNYINTFSKVIDFFYKTYISTLKSMETAESTKIFEEIQDILKYNIEIIEA....ISTDKSKRIITSLKApRNKIMKEYIKILKRGENA............................ 0 4 4 4 +6067 PF06239 ECSIT Evolutionarily conserved signalling intermediate in Toll pathway Finn RD anon Pfam-B_9306 (release 9.0) Family Activation of NF-kappaB as a consequence of signaling through the Toll and IL-1 receptors is a major element of innate immune responses. ECSIT plays an important role in signalling to NF-kappaB, functioning as the intermediate in the signaling pathways between TRAF-6 and MEKK-1 [1]. 24.30 24.30 24.30 35.60 24.20 24.20 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.64 0.70 -5.49 11 101 2009-01-15 18:05:59 2003-06-02 15:57:31 6 5 84 0 66 104 0 212.70 39 51.63 CHANGED psptpssusppscps...hhsps.pstpp.hpt...lsshcslFcps..pp+sKssFlchlchFpc+s.V+RRGHVEFIYsAL++MpEaGVE+DLpVYptLL-VFPKthFhPpNlFQphFhHYPRQQpCuIclLEQMEsaGVhPstEschlLlplFG++SaPhpKahRMhYWhPKFKplNPaPlPpclPs....Dsl...-LApluLcRMss.DLpuclTVaQ.hs.spsu.s..c..csaIVuh ....................................p................................tp..p.tps...Ls.hcphFpps..tp+sKssFlpslchFtp+s.l++RGHV-FIYsAL++MtEaGVE+DLtVYptLLDVFP.K.thFh........PpNhhQphFh....HYP+QQpCulslLEQMEsa...G........VhPspEhchlLlplFGc+uaPhpKahRhhYWhP+FKN..lNPaPlP.cslPp....Dsl...-LAphuLc+hss.Dlpuclolap.hs.tsc-shs.....psaIVu........................ 0 24 29 49 +6068 PF06240 COXG CoxG; Carbon monoxide dehydrogenase subunit G (CoxG) Finn RD anon Pfam-B_9339 (release 9.0) Family The CO dehydrogenase structural genes coxMSL are flanked by nine accessory genes arranged as the cox gene cluster. The cox genes are specifically and coordinately transcribed under chemolithoautotrophic conditions in the presence of CO as carbon and energy source [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.47 0.71 -4.51 15 764 2012-10-02 19:24:03 2003-06-02 16:18:11 8 6 531 3 309 754 612 139.90 26 60.58 CHANGED Gphclsss.ppVachLsDP-plssClPGspSlpttu.sEacsplplplGsl+usa....suclphsplcsssphh.lpscGpGttAGhuhssshhhhspssst.oclsasscsclGGhlAplGu+llcussc+llschhpslusclu ...............................p.plssshppVWssL...sD.sch.ltsClP....G.s.p.p......l....p..t....t.....u...ts...p.....a.p..u....s.....lp.l.cl..G..P...l..pupa................pGpl..pls..c...h...c....ss..p....p..hs...lsuc.G.pu..u..t.u.u.......h..st....u.....s....s.hs..h..h..h........p..s....s........s.....s...T.......plsass...csp....l.u........G.+.l...AplG..s..+.l..l.sssu....c.+lhspFhsshstth.t...................................................................... 0 93 183 250 +6069 PF06241 DUF1012 Protein of unknown function (DUF1012) Finn RD anon Pfam-B_9320 (release 9.0) Family Family of uncharacterised proteins found in both eukaryotes and bacteria. 32.90 32.90 32.90 33.00 32.20 32.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.31 0.71 -4.73 2 84 2009-09-11 06:58:58 2003-06-02 16:22:51 7 5 32 0 50 86 2 169.20 58 22.88 CHANGED ARAIIILPTKGDRYEVDTDAFLSVLALpPIQKMESIPTIVEVSSsNhYDLLKSISGLKVEPVENsTSKLFVQCSRQKDLIKIYRHLLNYSKNVFNLCSFPNLsGhKYRQLRLGFQEVVVCGlLRDGKVNFHPNDsEELMETDKLLFIAPL...KKphLYTDMKhENhTVsp..DTRKQVaEcK+SRLpKIIhRPpKSLSKGSDShKGP .........ARAIIVLAp.ctNADQSDARALRsVLSLsGVKEGL.+GHlVVEhSDlDNEsLVKLVGG-lVET..VVAHDVIGRLMIQCARQPGLAQIWEDILGF-NsEFYlKRWPpLDGhpFcDVLlSFPDAlPCGl...K...p...s.G.KIllNP-DsYV.Lp.....E......GDElLVIAE................................................................................................................DDDo....................................................... 0 6 32 41 +6070 PF06242 DUF1013 Protein of unknown function (DUF1013) Finn RD anon Pfam-B_9390 (release 9.0) Family Family of uncharacterised proteins found in Proteobacteria. 20.60 20.60 20.70 106.40 20.20 19.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.65 0.71 -4.19 35 348 2009-01-15 18:05:59 2003-06-02 16:32:47 6 1 348 0 115 277 1365 141.00 57 63.33 CHANGED LEVpuIADG-VApGIpGhDPlssGQLTp-EI-+scpDPsh+L+lt.sp..shhscpK....KGsRYTPVS+RQDRPDAIhWll+NHPElsDuQIsKLlGTTKsTIpuIR-RoHWNusNIcPpDPVsLGLCoQh-LDttVp+..Aucct ..LEV+uIADGEsApGIKGhDPlssGQLTpEEIp+uEcDPsa+LKLp.sp...stl.s..c.pK.+..KGPRYTPlS+RQDRPsAIhWLl+NHPELpDuQIu+LlGTTKsTIppIRsRTHWNsuNlpPhDPVsLGLCSQlDLDhtVp+Aucpp..... 0 32 69 82 +6071 PF06243 PaaB Phenylacetic acid degradation B Finn RD anon Pfam-B_9426 (release 9.0) Family Phenylacetic acid degradation protein B (PaaB) is thought to be part of a multicomponent oxygenase involved in phenylacetyl-CoA hydroxylation [1]. 20.70 20.70 21.00 21.90 19.30 19.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.12 0.72 -3.85 43 641 2009-01-15 18:05:59 2003-06-02 16:40:32 6 3 601 2 212 450 72 92.20 56 92.12 CHANGED ppWPlaEVFlRucpGLsHpHsGSLHAsDschALptARDlYTRRpEGlSIWVVtuspI................................................................................sASsPs-..............+s.hFcPupDKsYRHsoaY.....clP..........................-s......lp+h .......................WPLaEVFVRuKpGLsH...+...HVGSLHAuDscMALcsARDlYTRRs.EGsS.IWVVtuupI.............................................................................sAS..s..P-E+uthF-PApsKlYRHPTFY......plP...DtltH................................... 0 56 132 180 +6072 PF06244 DUF1014 Protein of unknown function (DUF1014) Moxon SJ, Coggill PC anon Pfam-B_11009 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 21.30 21.30 22.60 22.50 21.20 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.66 0.71 -3.90 14 313 2012-10-02 14:16:02 2003-06-02 16:51:30 7 4 282 0 212 300 4 148.50 32 66.20 CHANGED T+AQItcphpc-pptpct.pptppt.......-h.hEc..shNhph.-pspsE.............................................ARol--AIusLSlsc.....shD+HPE+RhKAAapAaEEspLPcLKpEpPshRLSQhKQlLhKEWpKSP-NPhNQ+ ........................................................................................................................t............t...t..t...t.pttp..t.ttth.....t..hp...c....ht...s.....p....t.....t....tt..-...............................................................................................Aps.lDsAls....sL.olssp.............thDRHPERRh.KAAYpAFEEpcLPclKp..E..p..P..uLRh.....sQhKphlhKcapKSP-NPhNQ........ 0 75 120 176 +6073 PF06245 DUF1015 Protein of unknown function (DUF1015) Finn RD anon Pfam-B_9451 (release 9.0) Family Family of proteins with unknown function found in archaea and bacteria. 19.70 19.70 19.80 20.20 19.60 19.50 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.30 0.70 -5.97 97 830 2009-01-15 18:05:59 2003-06-02 16:53:50 6 6 778 0 268 769 822 388.30 33 97.45 CHANGED plpPF+ulRsstchstpluu.....PYD.VlsspEtcphhptsPaohl+lphPchshs.ths..pp..YppAtcphpcahppGlLhpDppPshYlYcpphs.s.........ps...ppGllussslc-Yp.....pshI++HEtThts+cpDRlphlcsssANsuPlFlhYpc..spplspllpphh.p....ppPhh-assp.sGlpHplWhlsDtphlpplpphF.tthp................slYIA..DGHHRhsoAhphtpcpcppss.ths.upE.saNahhshL..hscspLpIhshpRlV+sL.suhs.cpalpplpph.Fplpphs.....................................................t.htssppppauhY.lss.paYtLph+p....t...t.p..ss.........lppLDVslLpchlLpslLGI..ps.Rs-p+IsaltGhcslh.....h.ctVcsGp.hplAFhlhPsslcplhsluDsGplMPsKSTaFtPKLtSG..Lhl+pl ...................................................................l+PF+ulR.Pst..p..h.st.......plA.u.....PYD.VlsscEs.cphhp.ssshShl+l.psEhshs..shs.hsp..............Yp+AscshppahpcGhlhpDpcsshYlYc.phs.u................+sQpGlVssssl--Yt.............ssh..I++HEhTht-+EtDRhchlcsssAphsPlFlsYcs.....ppt....l...p.pllppht.p..........ppPhY-Fss.....DGhpH.phWhls-p..phlpplsptF.t.p.h.s...................slYIA...DGHHRsAoAsp....lttc+cpp......t............s....t....ht..sp...............c....taN.............aFLull..hscspLpIhsYNRVV+..cL.....pshs.cpF.L.pp.lppt..F..p.lcphs.........................................................p.hp..P.s..p..hpsauhY.lss..p.aY..pLss+t...t..th.sp.........cs............................................lpsLDVsl.Lpsh..lLs.lLuI.....pD.Rs..-..pRI-FluGl+ultt............ctVssup.hplAFslaPsshcplh.slADsGplMPPKSTaFEPKLcSGLhl+ph.............................................. 1 143 226 256 +6074 PF06246 Isy1 Isy1-like splicing family Finn RD anon Pfam-B_9462 (release 9.0) Family Isy1 protein is important in the optimisation of splicing [1]. 20.20 20.20 20.30 21.10 19.50 19.50 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.73 0.70 -4.98 31 342 2009-01-15 18:05:59 2003-06-02 17:00:10 7 8 291 1 245 340 3 234.50 38 85.85 CHANGED MARNsEKApSMLsRapptppsphthhct....+RPphsscsssLspAE+WRppll+EIo+KVscIQssuLu-apIRDLNDEINKLh+EKppWE.+Ip.pLGGPsYp+hus.p..hhDscGp..clsG....scGY+YFGtAK-LPGV+ELFE.ppppt.........tps++sRtc........hh+plcssYY.Ga.tDE......-DshLlthEtphEcphhpp.h.p......................t..thtttp.pphht.h.t..t.................................................................th.lPopp-lpp.llc+++pcLLp......Kh ............................................MARNsEKA.ohLsR.apptptt-tth.................hc.........RRPhhsocspslspsE+WRt.pll+.EIo+KVspIQs.............suL......u.-apIRDLNDEINKLhREKtpWEhpIc.pLGGPsY....t+.hus+...hhD...p.-G+...ElPG..............s+GY+YFGtAK-LPGV+ELFEpps............................ccs+tc...............h.h+pl.Ds.pYY.GY..hD-.......-DuhLl.h.Et.phEcphhtph..h................................................................................tttp...........p...h.....t..tt.......................................................................................................hhth.lPo.p-lpp.llc+++.cLlpc......................................................................................................................................... 2 89 138 204 +6075 PF06247 Plasmod_Pvs28 Plasmodium ookinete surface protein Pvs28 Moxon SJ anon Pfam-B_11346 (release 9.0) Family This family consists of several ookinete surface protein (Pvs28) from several species of Plasmodium. Pvs25 and Pvs28 are expressed on the surface of ookinetes. These proteins are potential candidates for vaccine and induce antibodies that block the infectivity of Plasmodium vivax in immunised animals [1]. 26.00 26.00 26.20 27.30 25.00 25.90 hmmbuild --amino -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.65 0.71 -12.07 0.71 -4.73 11 164 2009-01-15 18:05:59 2003-06-02 17:02:55 6 2 22 5 12 166 0 197.10 58 88.67 CHANGED KVTVDTlCKNGhLIQMSNHaECKCspGaVLhsENTCEcpV.cCcc.-slsKsCG-YuhChspus.sp.psh+CsChsGYhlspslChPscCpshsCG.sGKCIlDssNssps.sCSCsIGKl....pNsKCTtsGcTcCsLKC.KtsEcCKhsstaYcClscsuustsusutsp................................................sss.......SlhsGhSlhsILsLhllal...hl ...KVTs-T.CKNGalVQMSNHFECKCN-GaVhh.sENTCEEKp.-Csp.pslNKsCG-YuhCssst.ssppcsh+CGCI.GYTlhp-..lCsPs+Cps...l.CG.pGKCILDssNsNso.hCSCsIGps...--sKKCsKsGcTcCoLKC.KsNEcCKpspsaYKClsKsuustsuuutss..................................................s.us........SlhNG.uVhsIL.h...hah..s............................................. 0 3 5 10 +6076 PF06248 Zw10 Centromere/kinetochore Zw10 Finn RD anon Pfam-B_9476 (release 9.0) Family Zw10 and rough deal proteins are both required for correct metaphase check-pointing during mitosis [1,2]. These proteins bind to the centromere/kinetochore [2]. 19.30 19.30 19.60 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 593 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.86 0.70 -6.36 12 279 2012-10-03 17:31:52 2003-06-02 17:10:26 8 11 205 0 185 344 1 435.20 19 61.30 CHANGED h......................tpsstlph-.Dhptpls+lpp+lcchpscVpshIsccYs..-FhsshpusppllppsctLsc-l.DlLps.lps-lhtsLpsuss-hsphpppLccsthslpllppL..chcsth.....cphspALpsKsYltsAchLpch+shLctlps.cspp.............LchhpuLphEhplptppLl.pLsccap+Ll.aphsop.......pshhpspLpLsptpsp.........LssVl.Ahsh.GpLppplcthsphLlc.llcPLIspPshhsss-pppc.s.....lhLsappp.......pophc+sssppVap+lhhllcsLpphhhslssppp..........lhpllGchlhcclschlIccCLshuIP..ssuschpp.appllppsp-FEptLK-htals.s-s.osL.cYAcslssaFssK+CpcVlssARpLMpp-hashVh..........lsPp........................................................................t..t.cstppluptohshPsChlScSsp.cLMcLhhpsLpEs.ssusttsuspLhholcslhpLahssVPphHcc.LtplPQhuAlaHNNCMYluHaLhshuaphph.....h...hsoFsDlVsshRphGscsFhsQhphQ+upLh-tLsutp.Fpph.ssccphos .................................................................................h..t.....phptpl................p.ht....p.....ph........ph...........hpp...s.......pth...htp....spthtpcl...........th..h.pt....h....t...p................................h.tt..plpss...scht.lpp..plp..p.p.hhph...lp.p....l.....tphpphl............p.ph..ptshtp...t.phhtuh..p......hlpphp.thl..p......................p......................thhp.lp.c.....ht..pppl..tLtp.apphl.h.p.......................phphtt.t.t.tt...............lp.phl.uht.h.t.l..t....th..tp...htp.lhp.hlpP.hl........s...h..............t....................l...ph..............................tt.t..s.ttlhtpl...h.lhph.lpp.l.................................................h.p.huphhh.......p..h.phllpphL..ulP...ps.pphtp.ap.pl.lp.pstp.Fppt.Ltp..........hthh............t.................p...........tL.paspph..hahp+hp.phl.ts.Rplh..t......t.............................................................................................................................................................................................................................................................................................................................................t...................................................................h...............hh.ha...............t.....sh....h..Nsh.ah...h...............................................................................................h......hp.....t...h..............h........................................................................................................................................................................................ 0 64 100 147 +6077 PF06249 EutQ Ethanolamine utilisation protein EutQ Moxon SJ anon Pfam-B_11530 (release 9.0) Family The eut operon of Salmonella typhimurium encodes proteins involved in the cobalamin-dependent degradation of ethanolamine. The role of EutQ in this process is unclear [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.69 0.71 -4.67 4 785 2012-10-10 13:59:34 2003-06-02 17:11:56 7 4 744 3 116 676 368 139.80 48 71.45 CHANGED hoKELlEpLl+pVlcEKhu.tpst.p.s.h+phD.SGIhulKL.sspsscpsRhDTGNP+DVVhTpDLhTL-ESsRLGAGlMEMK-TTFsWTLsYDElDYVIEG+L-llIDGRplsAcAG-lIhIPKGSpIcFSVss.A+FlYVsYPAsWpS ................................................................................................hscpLltpLhcK.VhpEK.uhp.........t..p...s.h.pphs...GhuulKll-usol+hs.RFDs.up.Pc...sVuhT.DL..VT..t.-..-...u...s.s..MuAG.FM.ph.....-...s..u...h..F..P..W..T..L..s..Y......D...E..IDh..VL..E..G..-..L...cV..c..c.-...G.....c..T...h.l..A+..AGDVhFIPKGS..o..I..cF..u..T..s..u..s..s.+..FlY.VsaPANWQS........................ 0 41 76 99 +6078 PF06250 DUF1016 Protein of unknown function (DUF1016) Finn RD anon Pfam-B_9571 (release 9.0) Family Family of uncharacterised proteins found in viruses, archaea and bacteria. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.79 0.70 -5.46 7 1895 2012-10-11 20:44:44 2003-06-02 17:15:21 6 10 943 0 343 1531 191 214.50 28 90.17 CHANGED lht-I+phI-puRpcshpuVNupLshhYWpIG+hIlEtpptGp.+AtYGtpllppLu+cLsp-as..+GFospNL+pMRpFhhhasc..hhtsLhtpLoW.....oahplL.plcst.pRsFYhcpshcptWSsRpLptQIsohhYERphlS+c.pcshpp....h.stpphpsc.sl+DPYlL-FLuLt-p..a.E+-LEpullp+LppFLLELGsGFuFVuRQ+RlplDsccFaIDLLFYphtL+Chlhl-LKlGcFphp.hGQMphYLpah-ch.+pPs-sPsIGlILCssKscs.lcYsLptpppslhsuEYKh.LsscEpLtpcL.pt ...............................................................................h...h.ttp.thh.thN..hh.hha..lGp.l.t........pttaGtthlttlu..tlt.th....pG..hs.psl..hh.ah..a.......................................................h.......h......p............t....h.....................t...........p...................l..........s...........h....................p.h......h...t....h.t.p.....tt...a.Y...s.htttWo.t..L...htt..hat+.....s...........................................................s....hh+s.h.h-hht...h.....t........h...Etphcttlhtph.pahhEhGtsF.a.spp..h..h.t.t......p.hhhDh........lhaph.hpthlh.h-lK.t.ht.t.huphthYhthhpt........t.-.t.slGlllCtttt....hph.h.......t.hhhupa.........h.............t.h.......t.......................................................................... 0 95 224 306 +6079 PF06251 Caps_synth_GfcC DUF1017; Capsule biosynthesis GfcC Finn RD, Sammut SJ, Bateman A, Eberhardt R anon Pfam-B_9574 (release 9.0) Family Many bacteria are covered in a layer of surface-associated polysaccharide called the capsule. These capsules can be divided into four groups depending upon the organisation of genes responsible for capsule assembly, the assembly pathway and regulation [1]. This family plays a role in group 4 capsule biosynthesis [2]. These proteins have a beta-grasp fold [3]. Two beta-grasp domains, D2 and D3, are arranged in tandem. There is a C-terminal amphipathic helix which packs against D3. A helical hairpin insert in D2 binds to D3 and constrains its position, a conserved arginine residue at the end of this hairpin is essential for structural integrity [4]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.48 0.70 -5.19 17 1054 2012-10-03 10:59:06 2003-06-02 17:25:22 6 19 714 4 140 613 338 216.40 40 55.92 CHANGED tsshsVplhtssp...phplsassshRLtplVtps.............pshshaW.uAsLh-..tpupscpppQplLtpLuphup.....pssschtushppLtpQLptlplstRlhhsLD.DhlRlscptNP.L.pGpYhLhlsP+sspVolhGhV..p.......psGshsapsttsspsYLpptsLlssA-pS.salIpPDGpspptPlAYWNcpah-hsPGuhlaVsF..t.osh.pphs..sLNppIlpLLpNRhsh ..........................................................................................................................h.stssVpla..Gp...ppsholsssEplhpLVsQP...................hhsp...WWP.uA.l.ls..-p......A......p......u..t.......Ah..p.......pp.....Q....tlh.s.p...LA.p..htA...................-uss..D..s..A..A......s.I.......ps......l.....Rp.....Q.....lps..Lc....l......T......G...R.......h..l..p.L.D.P....D..hVR..V...s..E....p....u....N.P.PL...p.............GsYTLals..spP.s.TV...TLhGhl..S.........psGp.P.ap.sGR.sVssYL....p....s....p.s...h.L.u.G.AD.+s.........h.s.aVlhP-Gc.o..p..p........A..P..V...A..h....W...N..K...R..H..l..E.....P...PGShlalGh..usplh....stp.s..sLNspIlpsLTQRlP.t............................................ 0 33 78 110 +6080 PF06252 DUF1018 Protein of unknown function (DUF1018) Moxon SJ anon Pfam-B_11125 (release 9.0) Family This family consists of several bacterial and phage proteins of unknown function. 27.00 27.00 28.90 27.70 26.40 26.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.61 0.71 -3.59 54 451 2009-01-15 18:05:59 2003-06-02 17:38:09 7 3 363 0 82 413 9 116.80 26 75.27 CHANGED hDD-sYRshLtp.....hsG+pSspchs.spLppVlcthcp.pG..F............hppstt...................p.ts...t..hs.......pss.hcKIcAlWhphtptsshtp.sct.ALssal+Rhh...........tlsslpaL..p.sppsppllpuLKp...Wpp.Rt ......................................hDD-sYRshLtp.....h.s.GKs..Ss+chshspLp...pVlpthcp..pG..F...................p....pttt...................p.ss.............t.shs...........pts.hp...KIpAlWtphstth...........s..p.....uLs....tal++hh..............sls.p.....lpaL..s..sppsppllpsLKp...hppR................................................................ 0 30 58 73 +6081 PF06253 MTTB Trimethylamine methyltransferase (MTTB) Moxon SJ anon Pfam-B_11132 (release 9.0) Family This family consists of several trimethylamine methyltransferase (MTTB) (EC:2.1.1.-) proteins from numerous Rhizobium and Methanosarcina species. 25.00 25.00 25.80 25.70 19.40 17.60 hmmbuild -o /dev/null HMM SEED 505 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.52 0.70 -6.74 8 451 2009-09-10 22:07:33 2003-06-02 17:43:21 6 2 119 2 177 465 1554 462.60 30 97.03 CHANGED tRpuGRtuR+A.Russuspphsh.plspshsshplLs--plEtIHcsuh+lLEElGI-shs-.pAl-lacpAGAcVt.cstRVRh-+ulltEhlposPusFTlHARNPp+slplGGpslsFussuGuPpVpDl-+GR.R.GsLtDapNhs+LApthsslHhhGsslCEPhDlsss.pRHL-slauplphSDKsFhsospu+tRApDulEMhtIstGt..-phcssP.sshoIINsNSPhhaDtsMsculhshActGQsllloPFsLuGAMuPVTlAGALsQpsAEALAGlALsQLlRPGAPVlYGsFsSsVDM+SGAPuFGTPEsupushsuGQLARRYsLPaRTuuu.suSphsDAQAAYEoshuhauAlLuGuNllhHuAGWLEGGLssSYEKhllDh-hLtthhcalpulslsE-sLuhDAItEV...GPGGHFhGspHThc+YpTAFYpPhluDapsFEpWsEpGupDttpRAsclWKchLA-YEtPhlDsAlcEsLc....-alA+R+schG ...........................h...............................................hphLsc-plptI...apsuhclLE-lGlphhss...pAlclh+puG.A.p...V............ps......p..pV...+hscsllcchl.t.pAPp..pFTh.puRss...c..c.slplG..G..p..p.hhau..ss.hus.........P.hl.hD.h.c.s....s+...R........uoh....pDhtshs+Ls.phhsslchs.u.s.ss.stP..pD..ls.....ss..ppcl.s..h..hhs..p..l.phocKs........hh.....ss......s......s..s.....pps......pc.....s.......l..-M........sp.....lhhGt.......-...hc..pps.....hh...h.s.h.hs.ss..SPLpaspshhpslhthuctsQssllss.hshuGAhuPsolAGslsQs.AEsLAGlshsQllpPGsPsla.Gsasosh.DM.+..o.G.usshGoPE.shhshusuQl.A.+.h.a.sLP.h+suuu.hssuKhsDsQAuaEsshohhhuhhu.GsNhlhHuuGhL-uhlssShEKhlhDs-hlshhp..+hh.cG..l..ph..s.........-..c..s..l..uhD.sI.c.c.V.......G..P.s.GHaLuptHT...ppaps.s.a.ahP.tlh.Dpp..shcp...Wp.tt.G.t.hshhp+AptthcchLss....a...p......P...l....DsslcctLp....thl....................................................................... 0 84 140 150 +6082 PF06254 DUF1019 Protein of unknown function (DUF1019) Finn RD anon Pfam-B_9681 (release 9.0) Family Family of uncharacterised proteins found in Proteobacteria. 19.60 19.60 21.50 20.70 19.20 18.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.17 0.72 -3.82 12 911 2009-01-15 18:05:59 2003-06-02 17:45:42 6 1 343 4 26 400 0 86.30 43 56.64 CHANGED phs-........slspNRQKlF.RWLcs..DopttR-plptLhPAIhusL...PhEh+sRlps.sshh.hlspht.KEhs-Appulhh......hh+-ls-tIssh .............................hPc..ALupNpQKIF.RWlcp..DT.DthEKhpALlPAI.cAh..............P.h..Lhs+...hpstpShha........+Els-s+-pl...........l+-sD-hlts................................................................................ 0 2 7 12 +6083 PF06255 DUF1020 Protein of unknown function (DUF1020) Moxon SJ anon Pfam-B_11136 (release 9.0) Family This family consists of several MafB proteins from Neisseria meningitidis and Neisseria gonorrhoeae. The function of this family is unknown. 25.00 25.00 46.30 41.60 24.20 24.10 hmmbuild --amino -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.92 0.70 -5.23 15 288 2009-01-15 18:05:59 2003-06-02 17:46:46 8 13 49 0 16 255 0 227.60 44 62.34 CHANGED DuFIRDshQRQHYEPGGKYHLFGssRGSVu-RoGpIslhpspoHplGNLLI...QQAuIcGsIGYpsRFSGHGHEVHSPFDNHAS+SsSDEuGuVD-GFTVYRLHW-GaEHHPADGYDGPQGGsYPtPsGARDIYSYalKGTApsIKLNhTDsRShuQRlhDcasNAGSsFosRADEAsK+haEasPcLDRhGNuMEslNGlAsGAlNPFluAGEulGlGDIlpGsspulshAA.......M+sLGsLSsEuKhAsIssLuDsAthcKsutsAl+cWAspNPNAApTlEAllNVh.AAhslhth.................K+spLsssAhPhGKuAVSssFutu....YpsP.tuRs ............DshlppshptpHaEPsGKYHLFGs.RGpltpRshtlthhp..s..huslh....ppsshpG.lGY.s+FSsHGaE.HuPFDpHsucSsSc.tGssstG..FolYplphpG.EhHPtDuYDGPpGusYP.PtGARD.YoY.lpGsupph+hN...........ssptshtpRh.cp.tshuu.h.ptss-u.+hhhc.ssphsh.uNtM-.lpGls.tGAhNPhlsuhphlGhGshh.uh..s.s.ss.......hpslu.LssEsphAhhs.Lts.A.h.cpshpuscpWhptpPNhstTspsshslh.AA........................KhspLs.sths......sKsss....................h............................................ 0 5 5 5 +6084 PF06256 Nucleo_LEF-12 Nucleopolyhedrovirus LEF-12 protein Moxon SJ anon Pfam-B_11198 (release 9.0) Family This family consists of several Nucleopolyhedrovirus late expression factor-12 (LEF-12) proteins. The function of this family is unknown [1,2]. 25.00 25.00 141.00 121.10 20.30 20.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.15 0.71 -4.51 12 26 2009-09-11 05:10:50 2003-06-02 17:51:17 6 1 25 0 0 23 0 176.70 38 87.59 CHANGED spcpFppRLsYVssIsshMpcTLchMshpGthocsDAsTLClADDTAAWlCGRscsss.FsSFRl+hut..F.ppsspsLp+FhFEESLtQ+h.....tshs+YTYMNYsl.hcs.lAIKLsVhht..chcsD........sLsYFVshtsupspshs.........................csssshphpLs.......hEchlEEhlspussp ........pppFspRLpaVssIsshh+csl-hMsppsthopsDAsoLClADDTAAWlCGRlssss.FVSFRl+hst..F..c+sspsLc+FtFEESLtQph.....pshs+YsYMNYol..pshlAlKLlVahc..shpss........sLsYFlpststpspppsp.h.+....................cpss.hphsLsh......hEphlE-shsptlV........................ 0 0 0 0 +6085 PF06257 DUF1021 Protein of unknown function (DUF1021) Moxon SJ anon Pfam-B_11556 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 26.30 30.20 20.80 19.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.51 0.72 -4.29 44 1201 2009-01-15 18:05:59 2003-06-03 09:26:38 6 1 1190 2 152 402 3 76.60 46 88.55 CHANGED hspsLssIKpsl-s+lGc+lpl+AssGRKKhhc+pGlLccTYPulFVVc..LDpp......psshcRVSYSYoDlLTcsVplpa ...............sppltcIKppl-sHlGpplhlpApsGRK+p..hpRpGlLtETYPSlFlV-..L-ps......psshp+VSYoYoDlLTEslplpa.... 0 56 98 125 +6086 PF06258 Mito_fiss_Elm1 DUF1022; Mitochondrial fission ELM1 Moxon SJ, Eberhardt R anon Pfam-B_10595 (release 9.0) Family In plants, this family is involved in mitochondrial fission. It binds to dynamin-related proteins and plays a role in their relocation from the cytosol to mitochondrial fission sites [1]. Its function in bacteria is unknown. 21.80 21.80 21.90 23.40 21.20 20.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.77 0.70 -5.48 31 342 2012-10-03 16:42:30 2003-06-03 09:28:41 6 2 271 0 131 321 1802 296.30 25 87.47 CHANGED sGpcsQshGLucALuh................................shphpplthpp...................................h.th..h.ssshs...shPcLlIuuG+pTthhshhl++htsst.hsVtlhcP+hs.hppFDLllsPpHD......................pl.sssNV.....lhohGulptlssttltpstt.....ht...+splslLlGG.soppaphssp.htplssplpslhppts.h.plhlTsSRRTPssstshLpphht.....hstl.la..stss....sNPahshLuhADslhVTsDSsSMlsEAsuoG.sPVhlhsls..s.pps.+hschhspLhppGhl+sasuh..hpt..ht.htPLs-s.....s+sApplhpchth ......................................................................................................sGppsQshuLsptLsh..................................................................................................hp..h..hplphpt............................................................thh........s.hh......sht...t....h...th.htt..hpt.....PslllusGpps.t.hshhl+....+....hh..ts.hhslplh.....c......P.phs....hstFDhllsPpHD.............................................th.tssNVlhhhGulptlssttlttst.th.....t...h.....h.................psh.l.slllGG.ssp..p.apaspp.htplhptl.pplhpsps..hslhlosSRRTPtps.tshlpptht........t..s..pl.la......stss.....N.P.ah.u.hLuhA-tlllTsDSlSMloEAsuoG.pPVhlh..shs.....t...pps...+hpp.hhppLhpp.thhp..h.s.t...h.t.........h.s..lp-stchAt.lht....t.................................................................... 0 45 94 109 +6087 PF06259 Abhydrolase_8 DUF1023; Alpha/beta hydrolase Finn RD, Bateman A anon Pfam-B_9720 (release 9.0) Family Members of this family are predicted to have an alpha/beta hydrolase fold. They contain a predicted Ser-His-Asp catalytic triad, in which the serine is likely to act as a nucleophile [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.30 0.71 -4.78 6 556 2012-10-03 11:45:05 2003-06-03 09:31:10 7 6 239 0 146 545 20 174.30 31 36.72 CHANGED DPtspshAApssGDl-sA-+VSVsVPGlsoss...ppohsshhttstthttcshstpptAu.ss..ssAsIAWhGYpsP........ussuhsssTsspAcsGAscLsthlcsLpAso..sstcloLFsHSYGSlVsuLAhcs.....ss.VsDlVlhGSPGhcAsssApLGs-.u+sasMpus-DaIppl.Pplus .....................................................................................tpshAslulG.ss.D.s.A.cc.VuVh.VPGssosl...................pssh..s..s...h.p.p.s.ts...h..t..........t....p............h.....hp..tp.t..s.u....ss...........ssAsIAWhGYcs.P.......................ss.s.h.hp..s.s.ss.s.h.A.....csG.AspL....sp.Fhpu...L................p.....u............s.......p.....ss.......s...s..c.............lTllGHSYG.S.l.lsu.h.A.hpp.................s.h.t.ss-.l.V.h..h..G...S.P..G..h..............s....................s...s........s..........s......u.......p......L.sh..s...............u.+s..as.hp.u..s..s.D.lt.......s.............................................. 0 60 119 141 +6088 PF06260 DUF1024 Protein of unknown function (DUF1024) Moxon SJ anon Pfam-B_11584 (release 9.0) Family This family consists of several hypothetical Staphylococcus aureus and Staphylococcus aureus phage phi proteins. The function of this family is unknown. 21.20 21.20 22.30 22.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.91 0.72 -3.83 3 370 2009-09-11 11:00:23 2003-06-03 09:32:37 7 1 210 0 4 184 9 79.50 82 96.81 CHANGED MNNREQIEQSVISASAYNGNDTEGLLKEIEDVYKKAQAFDEILEGlsNAhQ-ulKEsItLDEAlGIMsuQVlYEYEEEQENE .......MNNREQIEQSVISASAYNGNDTEGLLKEIEDVYKKAQAFDEILEGlsNAhQculKEsItLDEAVGIMsGQVVYKYEEEQEN-................................ 0 3 3 4 +6089 PF06261 LktC Actinobacillus actinomycetemcomitans leukotoxin activator LktC Moxon SJ anon Pfam-B_11552 (release 9.0) Family This family consists of several Actinobacillus actinomycetemcomitans leukotoxin activator (LktC) proteins. Actinobacillus actinomycetemcomitans is a Gram-negative bacterium that has been implicated in the etiology of several forms of periodontitis, especially localised juvenile periodontitis. LktC along with LktB and LktD are thought to be required for activation and localisation of the leukotoxin [1]. 25.00 25.00 27.10 140.60 22.50 20.80 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.99 0.71 -4.50 2 18 2009-01-15 18:05:59 2003-06-03 09:42:17 6 1 11 0 0 10 0 139.60 91 99.45 CHANGED MSuTEYAPFYLRFIQFPsNEV.LhEYWpLVQN.VpKlp+ITVhhAQIlGhhuEKsIWKYQuTFNDGhLEsEVAK.-VSLTLRNSALLVASAIVIHFKSNFTNLLILSQITQYppahPh.KKSKY.PLYhSCLLRRRLTEFKITLLPLPWG MSGTEYAPFYLRFIQFPSNEVLLYEYWKLVQNFVQKVSKITVRLAQIVGILGEKTIWKYQSTFNDGMLEGEsAKQEVSLTLRNSALLVASAIVIHFKSNFTNLLILSQITQYCRHRPKPKKSKYFPLYLSCLLRRRLTEFKITLLPLPWG...... 0 0 0 0 +6090 PF06262 DUF1025 Possibl zinc metallo-peptidase Finn RD anon Pfam-B_9726 (release 9.0) Family This is possibly a family of bacterial zinc metallo-peptidases. Although they carry the HExxHxxGxxD motif, they are missing a final methionine which would class them as Met-zincins. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.09 0.72 -4.20 78 911 2009-10-19 15:30:34 2003-06-03 09:47:56 6 14 671 2 290 712 69 87.30 28 61.27 CHANGED hssVslhV-Dh..Pstt.hsphtht...pLLGLYcGlsLo-+ush...uthP..DpIhLaRcPlLchh......s-cs-..Ls-.VpcsllHEIuHHFGlSD--lct..lsh ..................................................tl.h.ltt...s.....................................lhG.ap.Ghslsp...ps........ssthP..s+IslYRcPl.phs.........ssc--..Ls-..hVpcsllHEluHaaG...ls--clctht......... 0 93 203 253 +6093 PF06265 DUF1027 Protein of unknown function (DUF1027) Moxon SJ anon Pfam-B_11526 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 81.90 81.60 23.40 22.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.93 0.72 -4.11 32 1012 2009-01-15 18:05:59 2003-06-03 10:12:50 6 1 1008 1 97 433 0 85.50 54 52.69 CHANGED lplsshpacllcNh+-uF-tEthtpRaS-lLsKYDYIVGDWGh-QLRL+GFYcDsspps.hDp+IosLpDYLhEYCNFGCAYFVLc .....lplsshtacLlcNa+-uFc.EpFtpRaS-l.LsKYDYIVGDWGh-QLRL+GFYcDsspcs...hsp+ISsLpDYLhEYCNFGCAYFVLc......... 0 20 49 73 +6094 PF06266 HrpF HrpF protein Moxon SJ anon Pfam-B_11646 (release 9.0) Family The species Pseudomonas syringae encompasses plant pathogens with differing host specificities and corresponding pathovar designations. P. syringae requires the Hrp (type III protein secretion) system, encoded by a 25-kb cluster of hrp and hrc genes, in order to elicit the hypersensitive response (HR) in nonhosts or to be pathogenic in hosts. The exact function of HrpF is unknown but the protein is needed for pathogenicity [1]. 21.70 21.70 22.60 23.50 21.30 20.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.39 0.72 -3.85 7 64 2009-01-15 18:05:59 2003-06-03 10:41:15 7 1 59 0 13 36 0 73.40 46 98.66 CHANGED M.ShsuLQRRLDsshpcApsplD-AshpuuEu.ho.sD.aAFhEAuhphusAShAssQpLpsKHGLsKAlIstIp .....MhShpuLQppLDoshsRAposlDDhsLsuupG.hot-DhpAFsEASpphusAS.AAsQphpstHtlsKslI-uhp.. 0 0 3 8 +6095 PF06267 DUF1028 Family of unknown function (DUF1028) Finn RD, Bateman A anon Pfam-B_9747 (release 9.0) Family Family of bacterial and archaeal proteins with unknown function. Some members are associated with a C-terminal peptidoglycan binding domain. So perhaps this could be an enzyme involved in peptidoglycan metabolism. 25.20 25.20 40.70 32.70 22.70 22.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.13 0.71 -4.87 78 406 2009-01-15 18:05:59 2003-06-03 10:49:56 7 10 357 2 175 408 353 190.90 39 75.84 CHANGED TFSIlA+sspTGphGlAluSs.lAV....Guh.lsas+AGVGAVATQu.hssPth.GspuLchLppG.hsAppslpplhssD..sttchRQlsllDspG....psAsaTGspshsh...tuphsGps..hustGNhLssspVlpAMspuFpsss..................st...Lu-RLlsALcAGpsAGGDtR....u..hpSAALhVss....ptsa..........shlDLRVDhpsc..P...ls-L ...................TFSIlA+..ss.......c...oG.......p..hGlAluSp.hAV.....GAh.sPascAGVGAVATQs.h...ssPsh..Gs...puL..chLc.p......G.....hssppuLctllssD.............sttphRQlsllDspG...............psAsaTGpcs...hs..h.............tuth..sG..c....s.................hsstGNhLuutpVlpAMspuFpsss.....................ut....Lu-RLlsALpA...G.sAG.G-tp......s...hpSAA..LhVls...ptsa...........sh.lDLRVD.pps...PlscL................. 0 57 107 144 +6096 PF06268 Fascin Fascin domain Moxon SJ, Bateman A, Finn RD anon Pfam-B_11660 (release 9.0) Domain This family consists of several eukaryotic fascin or singed proteins. The fascins are a structurally unique and evolutionarily conserved group of actin cross-linking proteins. Fascins function in the organisation of two major forms of actin-based structures: dynamic, cortical cell protrusions and cytoplasmic microfilament bundles. The cortical structures, which include filopodia, spikes, lamellipodial ribs, oocyte microvilli and the dendrites of dendritic cells, have roles in cell-matrix adhesion, cell interactions and cell migration, whereas the cytoplasmic actin bundles appear to participate in cell architecture [1].\ Dictyostelium hisactophilin, another actin-binding protein, is a submembranous pH sensor that signals slight changes of the H+ concentration to actin by inducing actin polymerisation and binding to microfilaments only at pH values below seven [1]. Members of this family are histidine rich, typically contain the repeated motif of HHXH [3]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.38 0.72 -3.97 23 817 2012-10-02 19:42:32 2003-06-03 12:37:00 8 24 166 30 411 843 24 107.60 21 63.95 CHANGED ssta.ssc+hsscLsAstts.psh...........plapLphpss...shs..L+.opsG+YLshtscGpltspsp....tssstFplchp....u+hshhptspG+YLs..ussupLpu.pusssupsELasl ............................................................................................thh.sspp..t.h.p.lsustt..p.tph...........ph.apLp..hssp..................shth...l.....+..o.p..sG+..Ylsh.ts.cG.s..ltssss.........ssssh...F.p.lphp...........sph...s..h..h..ptssG+YLs...hss.sG...pLt.u.pupssu..tsEhath......................... 0 103 156 257 +6097 PF06269 DUF1029 Protein of unknown function (DUF1029) Moxon SJ anon Pfam-B_11672 (release 9.0) Family This family consists of several short Chordopoxvirus proteins of unknown function. 25.00 25.00 63.20 62.70 17.20 16.10 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.75 0.72 -3.96 11 43 2009-01-15 18:05:59 2003-06-03 12:41:20 7 1 37 0 0 21 0 53.00 63 99.48 CHANGED MITNYEPlILlsIlshullsNhplSpKsKlDIIFhlpollFhWFlFHFlHSVh MISNYEPLlLLsIhCssLLhNFslSSKTKIDIIFslQoIVFhWFIFHFVHSsI.......... 0 0 0 0 +6098 PF06270 DUF1030 Protein of unknown function (DUF1030) Moxon SJ anon Pfam-B_11673 (release 9.0) Family This family consists of several short Circovirus proteins of unknown function. 25.00 25.00 128.20 127.60 16.20 14.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.85 0.72 -4.43 2 10 2009-09-11 20:47:49 2003-06-03 12:43:03 6 1 6 0 0 7 0 53.00 95 88.33 CHANGED MVFIIHLGFKWGVFKIKFSELYIHGYTDIVVLVVaTVFERSAEAYhVaISpuL MVFIIHLGFKWGVFKIKFSELYIHGYTDIVVLVVFTVFERSAEAYVVHISRGL 0 0 0 0 +6099 PF06271 RDD RDD family Bateman A anon Pfam-B_1111 (release 9.0) Family This family of proteins contain three highly conserved amino acids: one arginine and two aspartates, hence the name of RDD family. This region contains two predicted transmembrane regions. The arginine occurs at the N terminus of the first helix and the first aspartate occurs in the middle of this helix. The molecular function of this region is unknown. However this region may be involved in transport of an as yet unknown set of ligands (Bateman A pers. obs.). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.86 0.71 -4.28 114 4557 2009-09-14 12:06:15 2003-06-03 12:48:13 7 44 2499 0 1260 3598 868 135.70 17 61.94 CHANGED hushhpR.hhAhll.Dhllhhhlhhlhshhhhh.............................................hhhhhhhhhhhhl.........hhhhahhhhhth..tGpT.lGKhlhsl+lls..............psupth....sht..pshlRthh....................................................h.hhhhhhhhlshlhhhh......spc+..p..slHDhlu.s .............................................................sshhpR.hhAhll.Dhhl..lh.h.l.h.h.h.h...hhhhhh..................................................................................................................h.h.h.h.h.h..h..h.h.h...h.hl............h.h.h.hah..h..h..hhth......tGpTlG+thhtl+lls............................tsu.p.p.l.......shh.......p.sl...l.Rthh.........................................................h.hh..h.h..h..h.h..h..l..sh..lhshh...........s.tc+.....p.shtDhhu.................................................................... 0 414 844 1088 +6101 PF06273 eIF-4B Plant_eIF4B; Plant specific eukaryotic initiation factor 4B Moxon SJ anon Pfam-B_11679 (release 9.0) Family This family consists of several plant specific eukaryotic initiation factor 4B proteins. 28.00 28.00 28.40 28.20 27.90 27.90 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -13.00 0.70 -5.74 6 148 2009-01-15 18:05:59 2003-06-03 12:55:18 6 5 33 0 89 145 2 231.40 23 78.73 CHANGED MS...KPWGGl...GAWAh-AERAE-EptppA......sA.......tAsAsotsFPSL+EAAsupu....p+KhKphoLSEFphssYsts.........potGLTpp-hh.LPTGPRQRSt-Ehp.uRLGGGFpSY...t.........................GGGRpsYuGGFDsDpRs.tsRsSDhspsS..................................RAD-lDDWu+sKK..PhPSh-ptcpu.RYsu.......................hGGGGGG............uht.......Ghts.stusDsDpWuRuc..ss.........hscsG..................cERPRLlLpP.Kt-ssuo.s.Pss..tpS+PuPFGAARPRE-lLAEKGLDW+Kh-o-IEtK.....TSRPoSupSSRPuSApSspstSsh.p....Gs-sss+sRPKVNPFGDAKPREVlLpE+GKDWRKIDhELEH...RcV-RP...ETEEEK.LKEEIppL+tcLccppuh..p.ttstp-.......ptls-hIpphE+pL-hLshELDsKVRF........G.+.hpRPGSGAuRsusaspps.scupsh-sp..ht.M-RPRSRGssDshs+PsD-.RpuFQGS+..ERG ......................................................................................................................................................................................................................................................................................t....................................pu-..tpW...+............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 19 56 75 +6103 PF06275 DUF1031 Protein of unknown function (DUF1031) Moxon SJ anon Pfam-B_11618 (release 9.0) Family This family consists of several Lactococcus lactis bacteriophage and Lactococcus lactis proteins of unknown function. 19.90 19.90 19.90 119.10 19.30 19.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.66 0.72 -3.94 2 29 2009-01-15 18:05:59 2003-06-03 13:04:35 6 1 24 0 3 21 0 79.00 63 97.37 CHANGED Msthph.shhKL.sLAhpsphs.satp.SVhIosRTKtsHELsplYhDIpshaNpuKpMpWspLaphh.KpLTcsh.l.. .MlKTNFlTLKKLaGLA+NNNasss+K-LSVKISGRTKaNHELSQLYLDICNKYNHSKQMKWp-LYKIL.cELTcshtI-.. 1 1 1 1 +6104 PF06276 FhuF Ferric iron reductase FhuF-like transporter Moxon SJ, Bateman A anon Pfam-B_11690 (release 9.0) Family This family consists of several bacterial ferric iron reductase protein (FhuF) sequences.\ FhuF is involved in the reduction of ferric iron in cytoplasmic ferrioxamine B [1]. This family also includes the IucA and IucC proteins. 20.80 20.80 20.90 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.83 0.71 -4.28 156 3190 2009-01-15 18:05:59 2003-06-03 13:08:12 7 11 1404 17 525 1984 20 162.10 18 31.46 CHANGED ptWhppYhphhlsPllthh..hpaGlsh.......psHhpNhllhhpst.GhPsp..hhh+Dhtsthhh.st.......h.............t.tt..hh.....p.hhpphhhslhhsplt.lhstLsp........stlscpthWphltphlpthhpphsphttt............hhtthhhtsslstcs.......................Rhtstsptts ....................................................................................tWhppYhphhl..Pllthh...hpaGluL..........-sHh.QNhllth.......cp.........G.....hssp....hhl.+Dhpst.hh.hhpt......................................................................t.ps...hhht....p.hhpphhtsl.....l.spLttlltslsp.......t..ssls...EphlWphltphlpphhsp.hsphssp....................hthhc.h.h.t..p.hshct....................th.......t.................................................................... 0 119 287 418 +6105 PF06277 EutA Ethanolamine utilisation protein EutA Moxon SJ anon Pfam-B_11716 (release 9.0) Family This family consists of several bacterial EutA ethanolamine utilisation proteins. The EutA protein is thought to protect the lyase (EutBC) from inhibition by CNB12 [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 473 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.58 0.70 -5.89 19 699 2012-10-02 23:34:14 2003-06-03 13:13:58 6 2 662 0 49 1989 1083 442.30 60 97.76 CHANGED cp.lhSVGIDIGToTTQLlFS+LplcNhAushsVPRlpIl-KEllY+SslhFTPLhspstIDsctl+pllcppYcpAGIs.psIcTGAVIITGETARKENAppllpsLSuhAGDFVVATAGPcLESIIAG+GuGAtphSc-+pssVlNlDIGGGToNlAlFcpGclhsTuCLclGGRLI+hDs.stclsYIuPthcpllcchulp..lplGspsshpplpplsctMsphLhpslshp.hoshhphLhhsc............slss..stplcs.....loFSGGVA-tlYpptpt....-.FcaGDlGhlLGpAl+psshhp..phplhpssETIRATVlGAGsHTsplSGSTIhhs.p-.hLPl+NlPVl+hst--ct.s..pslspulppplshFcl..csppptlAlAhsGptsssatplpslApulhpuhpphhppphPLllVl-pDhuKVLGpsLhshLshptsllCIDulplpsGDYIDIGpPlhsGpVlPVllKTLlFs ..................................................................................................................................+.pLLSVGIDIGTTTTQVIFS+..LELs.N......cAuVSpVPRaEhIcR-IsapSPVaF..T...PlccQutlcpscLKsLIL..c..QYp.uAGI.sP.EuVDSGA.lIITGESAKp..+..NARssVhALSpSLGDFV..VASAG.P.H.L...ES.V.I.A..G.+GA.G..AQ.o...L..S..E..Q..R..h..C.R.VLNIDIGGGTuNYALFD.AGKlSGTACLNVGG.....R.....L..........LE.....TD......u....QGR....V.......V...Y.....A.....H..p..P..G..Q...h...I....l.....D..E.....s...F...G...u.......G....T...-....u....+....u....L...o....u.u....Q..L......s......Q..V........s....R..R.....M......A.....p.....L........IV.....EVIsG...s.......L...S....PL....AQ...u.L...M..Q..T.s............LLPA....sls...P.El.....ITL.S.GGVGECYRpQPA.......DPFCFuDIGPLLATALH-HPR..LR......EMNVQFPAQTVRATVIGAGAHTL..SLSGSTIWLE..sV.pLPLRNLPVslPh--.......................pDLssAWpQALh...Q.LDL..-PcTDsYVLALPAoL.PVRYAAlLTVIsALlcFVA.Ra..PNP+.PLLVVAtQDFGKALGMLLRPQL.......Q.L.PLAVIDEVhVRuGDYIDIGTPLFGGSVVPVTVKSLAFP.......................................................................................................................................................................... 1 24 36 43 +6106 PF06278 DUF1032 Protein of unknown function (DUF1032) Moxon SJ anon Pfam-B_11729 (release 9.0) Family This family consists of several conserved eukaryotic proteins of unknown function. 23.70 23.70 25.10 24.10 22.30 23.30 hmmbuild -o /dev/null HMM SEED 565 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.85 0.70 -6.03 4 230 2009-09-11 13:33:15 2003-06-03 13:15:43 6 10 116 0 163 219 1 251.70 20 56.13 CHANGED MNFhEAALLIQGSAClYSKKVEYLYSLVYQALDFISsK++s+QhousspDGspcsss..APppsEp-FhuLDDlssscs....NVDLKcDphssslhIlPLhP.ALVsP-EhEKpssPLhSppGElLASRKDFRhNTChPpPpGsFhL-.lGhs.hpslp.h........httsQ+-.scuEppPMEl..S.susPs.s.phSpE...tsEussssGGh....-E-A-sGAE........thtlEssE.hppcpushpsRtYhLR-R.ptssEssupLpETPDPWpsLDPF-Sh.EsKPF+KGKsYoVPsGl-E.hsupKRKRKGssKLQDFtpWa.sAah-ts-ucRuRRKGPTFADMEVLYWKHlKEpLcs.RphpRRcusp.....paLsctEptlhPlEE-RlEDslE....s.GsADDa.-sE.....-hsE.stus.spEsA..Dl-s.sh.spLpYEELV+RNV-LFlssS..........QKasQETtLSpRlR-WEDsIpPhLQEQEpHssFDIHpYGDplsupaSplspWpsFApLVAGp.saEVCR.MLASLQLANDYTVEloQpPGL-suVDTMuLRLLTpQRAHpRFpTYsAPShsQ. ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lttWp..h...L.......p..p..apht..sp.hht.h.........................................................................h......................................hsp.h.s.L.h.................................................................................................................................................................................................... 0 66 97 134 +6107 PF06279 DUF1033 Protein of unknown function (DUF1033) Moxon SJ anon Pfam-B_11732 (release 9.0) Family This family consists of several hypothetical bacterial proteins. Many of the sequences in this family are annotated as putative DNA binding proteins but the function of this family is unknown. 25.00 25.00 62.50 62.30 23.20 21.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.84 0.71 -4.29 16 683 2009-01-15 18:05:59 2003-06-03 13:18:02 6 1 679 0 40 200 0 112.80 45 97.96 CHANGED MYpVlchhGDaEPWWFh-sWp-DIlppppF-sap-AhpaYpphW.chcppaspapo+pshhsAFWNpsDpcWCE-C-EsLQpY+SlhLLpspcsh.s..phh.thtppst....t..Cplpt ..MapVlcMaGDaEPWWFh-sWpEDIVppppF-sa.DAhKaYpphW.chcppas.Ypo+pshhssFWsspDp+WCE-C-E.LQpY+SLhLLpsp.pVl.s.ppht.hhphpst.....t...Cph..h............... 0 6 15 30 +6108 PF06280 DUF1034 Fn3-like domain (DUF1034) Yeats C, Moxon SJ anon ADDA_1269 Domain This family consists of several domains of unknown function which are present in several bacterial and plant peptidases. This domain is found in conjunction with Pfam:PF00082, Pfam:PF02225 and is often found with Pfam:PF00746. This domain has a structure similar to an Fn3 domain [1]. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.68 0.71 -3.66 47 863 2012-10-03 16:25:20 2003-06-03 13:19:24 7 76 562 3 234 844 9 117.40 23 8.11 CHANGED lsLpph.ssp..hsFslsl+NhuspshoYplsst.....shTch.....s..ppGhhhtstts.t....................shshsssslTVsAsuotslslslss.sthtt....................pshalEGalphpsscss..............plolPYh .....................................................lsLtpl.scp...hpaslpl+N.hus...p..s..hTaplssp........shT-t.............s...pps.p.h..stp..t.hs..................................shshs.tsslTlsAsuo.tslssslsssp.s.hsp.......................sh.alE..G.FlpFpuspst..............slolPah...................................................... 0 57 142 195 +6109 PF06281 DUF1035 Protein of unknown function (DUF1035) Moxon SJ anon Pfam-B_11733 (release 9.0) Family This family consists of several Sulfolobus and Sulfolobus virus proteins of unknown function. 25.00 25.00 46.10 43.70 21.30 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.67 0.72 -4.19 8 29 2009-09-11 12:40:51 2003-06-03 13:21:24 7 1 15 0 1 33 0 63.20 65 61.94 CHANGED VLFsPIlSpVN.LTTstoaTTh...SGTl..TposhlsNPQYVGSSNATlVsLVPlFYLLVLIIVPAVluY+lYKpE VLhssIhSpVNsLT...........SGTs..........PphsGo.NATLlsLVPLFYlLVLIIVPAVlAYKIYK..c.. 0 0 0 1 +6110 PF06282 DUF1036 Protein of unknown function (DUF1036) Moxon SJ anon Pfam-B_11760 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.10 21.10 21.50 21.40 20.70 18.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.72 0.71 -3.77 23 269 2009-01-15 18:05:59 2003-06-03 13:24:19 6 4 205 0 101 224 15 113.00 37 65.94 CHANGED sApA-aclCNpTsshVslAlGY+sss..uWsoEGWWplssssCpsl.lpGsLssRaYYlYApDss....tts....pWsGchphCsp-.scFsI..tGsp-ChsRGapcsGFtElDTup.ppsWTlpLT- ...........................ApADh+lCNsTpshVulAlGY..+s.ts......uWsTEGWWplss.usCcTl...lcGs.L.s..uRYYYlYAcDsp.............tuu........pWsGp..s.phClt....-...pcF.pI...pGsp...D...ChuRGap+sGFtEhDTuc.ptsWhlpLo............................................ 0 25 55 70 +6111 PF06283 ThuA DUF1037; Trehalose utilisation Moxon SJ anon Pfam-B_11803 (release 9.0) Family This family consists of several bacterial ThuA like proteins. ThuA appears to be involved in utilisation of trehalose [1]. The thuA and thuB genes form part of the trehalose/sucrose transport operon thuEFGKAB, which is located on the pSymB megaplasmid. The thuA and thuB genes are induced in vitro by trehalose but not by sucrose and the extent of its induction depends on the concentration of trehalose available in the medium [2]. 31.90 31.90 32.10 31.90 30.90 31.80 hmmbuild -o /dev/null --hand HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.44 0.70 -4.63 91 1197 2012-10-03 00:28:14 2003-06-03 13:27:10 6 87 601 10 469 1171 683 219.50 23 52.37 CHANGED +lllascs.tthpp....h..h..............shtssltphhpt....stthpssshp-..tpshsc.....pp..........LsphDlllhhsst...t..............sc.phtpslpcaVt.sG.tGllslHsuthsth............p..........apc.lhG................................................................................................................................................uphphHs...........stpphplh..lhsssHPlspGl...s.pt.....................aph.p-EhYshph.hPpss....pVLhoh..............................tsGcsashuashph..............................GtGRlFYpshGH......p..........sh.sa.pssshpphlhsulpWAs ....................................................................................hlhtt....tt..pt....p...h..............shtttltp.hhp...............ttsh..p..s..s..s..t...p..s....p..s...hsp............................pp..........L.s..p.hDl.llhhsst........tpt.........l.........................ss..pth....pt.....l....tph....lp.sG.tGllslH.u.s....hs..sh............................p..................app..lhG..................................................................................................................................................u..t.h....pt....Hs...............shpphplh........lh...s..s.....s..HPlsp..Gl.......pp.................................aph..p-E.hYs..hph...sPp...t..............hpl..Lhshp..................................................tsup.s..aP.....huWsh.ph..............................GtGRlFYsshGH.........................p................st..sa.p.ssshpphlhsulpWu................................................................ 0 213 376 431 +6112 PF06284 Cytomega_UL84 Cytomegalo_UL84; Cytomegalovirus UL84 protein Moxon SJ anon Pfam-B_11948 (release 9.0) Family This family consists of several Cytomegalovirus UL84 proteins. The open reading frame UL84 of human cytomegalovirus encodes a multifunctional regulatory protein which is required for viral DNA replication and binds with high affinity to the immediate-early transactivator IE2-p86 [1]. 25.00 25.00 74.40 32.90 18.10 17.80 hmmbuild -o /dev/null HMM SEED 530 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.70 0.70 -6.09 4 46 2012-10-03 01:18:03 2003-06-03 13:30:03 6 2 15 0 0 42 0 419.40 53 97.45 CHANGED MPRAD.TLRNtsR.uRsRKus.............................................................................h.sD-TILTLTDpHclKpPLs++GTY+LIQLHlDhpPp-LQHPFQILLSsPLQL.....................................Es.ss+pDscERGlLCsVuNtDSDIhPshSLFPApsG.CpIlRulIDpQLTQMuIVRLSLNlFAL+IhsPLh+pLPLRRKsspHsALHDCluLHLP-LTFps...............Dts.sscss.TPpLTlpV+pALCWHpsEGGISGPRGLTSRISVRLSEuThpshGPulFGpL.hDs.-SP.DLsLSSLhLYQDslLRFNVThcsuppphPSsP.VSFRLRLRRcTVpRPFFSDsPLPYFlPR+ps..ssGL-VplPY-LoLKsSHhlRIYRRFYGPaLGLFlPHNRQsLpMPVTlWLPRTWLEIoVlssssH...................................lP+soVLGpLYFISSKHTLNRGpLSALTaQVKSuLHspP.......pQLSlLGASluLQDLlPMRlusP..-PpsppQQpssTppspPVTlAMVCsch ......................MPRsD.sLRNtsR.sRsR+s.............................................................................h.tsspTILTLTDpHcl+pPhs+cGTY+LIQLHlDhpPp-LpcPFQILLSTsLQL.....................................Es.ss+pDscERGlLCsVuNpDSDlhPuhSlFPApsG.CpI.lRulIDpQLTcMuIVRLSLNlFAL+llsPLl+pLPLRRK..sscH..TALHDCluLHLP-LTFcs....................s-ts.TspLTlp.V+p.ALCWH..psEGGISGPRGLTSRISsRLS-sThpsLGP....ulFGpL.hDs.-SP.DLsLSSLhLYQDshLRFNV.....T..ssppphPusP.VuFRLRL.RRpTVpRPFFSDsPLPYFlPs+ps..stGL-VplPY-L..oL..KsSHpLRIYRRFYGPaLGlFVPHNR.QsLpMPVTlWLPRoWLEloVlssspp..................................phPRsslLGcLYFISSKHoLNRGpLSAhsHQVKSo.LHups.......p.QLSlLGASIuLpDLlPMRlusP..pPps..pp.ppssTp.spPVTlAMVCsp.h................................................ 0 0 0 0 +6114 PF06286 Coleoptericin Coleoptericin Moxon SJ anon Pfam-B_11996 (release 9.0) Family This family consists of several insect Coleoptericin, Acaloleptin, Holotricin and Rhinocerosin proteins which are all known to be antibacterial proteins [1]. 25.00 25.00 30.70 30.60 23.50 18.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.90 0.71 -4.29 3 16 2009-09-10 22:13:49 2003-06-03 13:36:34 6 2 8 0 2 17 0 111.00 48 95.23 CHANGED MMKLYIlFGLIAlSAAYVVPEtYYEP.YP-sAua-utRlEP..lSPAEL+aspslsDEsEl-.P.hYIRERRSLQPGAPNFPMPGSQLPTSITSNVEKQGPNTAATINAQHKTDRYDVGATWSKVIRGPGRSKPNWSIGGTYRW ......................................................................................................................................RRSLQPGAPNhs..spp.P.h.plossluR.p.s.s.NTpusINlQ+KG-saDhsAGWSKVlRGPsKuKPTWHVGGTYRW. 0 2 2 2 +6115 PF06287 DUF1039 Protein of unknown function (DUF1039) Moxon SJ anon Pfam-B_12079 (release 9.0) Family This family consists of several hypothetical bacterial proteins from Escherichia coli and Citrobacter rodentium. The function of this family is unknown. 25.00 25.00 26.70 26.90 20.10 18.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.07 0.72 -4.08 5 254 2009-01-15 18:05:59 2003-06-03 13:44:08 6 1 241 0 11 57 0 65.90 55 72.96 CHANGED lAAVNHuLhupAHAILNALPplIPDccsptlCEAlLLFGLN-pscAs+tLAs.hssE-AQsLRuLls ..hAGVNHuLhups+AhLsALs.IlPDKKlpLlC.AlLLhGLN-.hcAsKlLuD.hD..-A.hLphLh....... 0 2 5 8 +6116 PF06288 DUF1040 Protein of unknown function (DUF1040) Moxon SJ anon Pfam-B_12122 (release 9.0) Family This family consists of several bacterial YihD proteins of unknown function [1]. 20.80 20.80 21.20 21.30 19.10 20.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.66 0.72 -3.75 15 736 2009-09-11 13:49:48 2003-06-03 13:47:46 8 2 731 1 66 206 2 85.40 78 96.16 CHANGED MKCHRlNELlELLpPtWp.K-P-LsLlphLpKLApEuGapGsLs-LTDDVLIYHLKMRso-KsEhIPGLKKDaEDDFKTALL+ARGl ........MKCKRLNEVIELLQPAWQ.KEPDLNLlQFLQKLAKESGFDGcLsDLTDDILIYHLKMRDSAKDAsIPGLQKDYEEDF..KTALLRARGV......... 0 5 21 45 +6117 PF06289 FlbD Flagellar protein (FlbD) Moxon SJ anon Pfam-B_12586 (release 9.0) Family This family consists of several bacterial FlbD flagellar proteins. The exact function of this family is unknown [1]. 20.60 20.60 20.60 21.40 20.50 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.66 0.72 -4.60 55 409 2009-01-15 18:05:59 2003-06-03 15:07:04 6 1 404 0 147 323 3 59.70 43 81.93 CHANGED MIpLTRLNGc..pFhLNu-hIEpIEssPDTsITLssG+KalVpEss-EVlc+llpa+pcIht ....MIplT+L..NGp.....tFhLNschIEplEp..sPD.TsITLssG+Khl.........VcEos-EVlc+lhpa++clh.h...... 0 75 120 133 +6118 PF06290 PsiB Plasmid SOS inhibition protein (PsiB) Moxon SJ anon Pfam-B_12789 (release 9.0) Family This family consists of several plasmid SOS inhibition protein (PsiB) sequences [1]. 20.60 20.60 21.80 26.90 19.50 18.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.74 0.71 -4.77 5 311 2009-01-15 18:05:59 2003-06-03 15:14:08 6 3 206 4 8 165 0 137.20 79 96.56 CHANGED MKTlLTLsVLpTMoupEhEDaRsAGEDhRRELSHAVMRDLTsPuGWsVNAEYRSEFGGFFPVQlRFTPuH.-+FcLAVCSPGDLSPhWllVFlosGGQPFSVVpshcsasPEtIoHTLuLlAsLDAsGYShASIIolLutEGuQ .....MKTELTLNVLQTMNAQEYEDIRAAGSD.RRELTHAVMRELDAPsNWsMNGEYGSEFGGFFPVQVRFTPAH.ERFHLALCSPGDVSQlWh.LVL.......VNuGGcPFAVVQVQ+tFuPEAlSHoLALAAoLDAQGYSVNDIIHILMAEGGQ............... 0 1 1 5 +6119 PF06291 Lambda_Bor Bor protein Moxon SJ anon Pfam-B_12850 (release 9.0) Family This family consists of several Bacteriophage lambda Bor and Escherichia coli Iss proteins. Expression of bor significantly increases the survival of the Escherichia coli host cell in animal serum. This property is a well known bacterial virulence determinant indeed, bor and its adjacent sequences are highly homologous to the iss serum resistance locus of the plasmid ColV2-K94, which confers virulence in animals. It has been suggested that lysogeny may generally have a role in bacterial survival in animal hosts, and perhaps in pathogenesis [1]. 21.90 21.90 23.10 22.20 21.00 20.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.97 0.72 -10.15 0.72 -4.06 2 335 2012-10-01 23:27:00 2003-06-03 15:22:13 6 3 252 0 24 139 10 94.10 75 96.65 CHANGED MKKMLhusALAhLITGCAQQTFTVtNK.TAVsPKETITHHFFVSsIGQ+Khl.tsKhsGGsENVVKTETQQTFVNuL.GFIThGIYTPhEsRVYCSQ ....................................................MKKMLhus.AL.A.h.LI.TGCAQQTFT..Vt....N..K.PT...A...V..s..PK...E.T..I..TH.HFF..V..S..GI.GQ.cKTVDAAKICGG.A.E.N.VVK.TETQQTFV.NGLLGFI..T..LG.IYTPLEARVYCSQ........... 0 5 10 14 +6120 PF06292 DUF1041 Domain of Unknown Function (DUF1041) Yeats C, Moxon SJ anon ADDA_1192 Domain This family consists of several eukaryotic domains of unknown function. Members of this family are often found in tandem repeats and co-occur with Pfam:PF00168, Pfam:PF00130 and Pfam:PF00169 domains. 20.70 20.70 20.70 21.00 20.20 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.36 0.72 -3.87 13 481 2009-01-15 18:05:59 2003-06-03 16:54:33 12 20 88 0 229 460 0 98.60 43 7.23 CHANGED psVV+cClcsshpssYphlhspsp..hsp..t.....................tphs..ppssss+pL-.hh+LhpLslsllppsccpYu.shpt.Fs..hslhp.pAEshWoLFusDMctALEtp .....................................ppVV+cClctsh.ssYphlhp.s+..hspt.t..........................t..c.hs......cp.sssh+pL-.h.+LhpLh..lpllpps....cppas..............s..........h.........pt.Fs...slht.pAEshWsLFA.DMchALEtp.............. 0 50 65 143 +6121 PF06293 Kdo Lipopolysaccharide kinase (Kdo/WaaP) family Studholme DJ anon Krupa A, Srinivasan N Family These lipopolysaccharide kinases are related to protein kinases Pfam:PF00069. This family includes waaP (rfaP) gene product is required for the addition of phosphate to O-4 of the first heptose residue of the lipopolysaccharide (LPS) inner core region. It has previously been shown that WaaP is necessary for resistance to hydrophobic and polycationic antimicrobials in E. coli and that it is required for virulence in invasive strains of S. enterica [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.23 0.70 -5.26 14 2201 2012-10-02 22:05:25 2003-06-03 18:59:37 9 20 1403 0 513 8874 747 187.20 22 64.55 CHANGED spaWttpucslspspGRtssWFs.tph....pssh++YaRGshhu+ls+D.atas.hspspuhtEFpllccLcctGlsVP+slAushh+huht..apA.lLoEclssspDLsshltp.......tphppthhctlGphItchHpsslsHsDLss+sILlctpt....t..chhlIDh-+sthph..ss+WppcsLspLhRSh........slthscpDattlhpuY ..................................................................................................................................................t.................h.ht..hhhtt..h....t....phhp.s...h...h......h....s......h.........t.......sh..+Eap.hl..cc...L.p...c.....h...G....l...s..s.....s....p..s....l..u....h...t....t...t.....t....h.....s....s.........h....p...u....h...l....l..T..E.......c.l.......s....s.....s....h...s...lps.h..h.t..p........................psp.h..p...p..........t......l...l........c....t...l.u....p..h....l....t.ch...H.t.s...s.lpHs...Dhhl..pplL.l.+tst.............................thchh...lI..Dh...c..+...up......h.....+..h........t....spc...h..p....p...c..D.L..h.tL.h...................h...t..t...h..h....................................................................... 0 135 275 414 +6122 PF06294 DUF1042 Domain of Unknown Function (DUF1042) Yeats C, Coggill PC anon ADDA_2448 Family Spef is a region of sperm flagellar proteins. It probably exerts a role in spermatogenesis in that the protein is expressed predominantly in adult tissue. It is present in the tails of developing and epididymal sperm internal to the fibrous sheath and around the dense outer fibres of the sperm flagellum [1]. The amino-terminal domain (residues 1-110) shows a possible calponin homology (CH) domain; however Spef does not bind actin directly under in vitro conditions, so the function of the amino-terminal calponin-like domain is unclear [1]. Transcription aberrations leading to a truncated protein result in immotile sperm [2]. 25.00 25.00 25.50 25.80 24.70 24.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.90 0.71 -4.64 6 395 2012-10-03 10:10:54 2003-06-04 11:42:13 6 13 119 1 270 374 4 139.10 24 23.64 CHANGED LspWLpp...lspolss+shsRsFSsGhLlAEll++aaPp.lDhpsassusSlphKLsNWupl.pKsLsKLshplscslhcplhsspPGssEhLLapLhthL...p+pcppulsshptpshp.posh+h.slpppshp-ulhphhpp....u..p.lcpstslt ..............................lhpWlpp....lsloh...+sh.p....R.D.F.SsG.hLl.AEllp.pa.h.Pphl-hcsass.u.sShptKlsNWspL.................c.............+s.......Lp+...L...s..h.p..ls.p.....p.hhcsl..h.psp.s.Gsspt....lL.hpLhttl..........ppp...p...t.t....t......................................................................ttt..................................................................... 0 107 137 206 +6123 PF06295 DUF1043 Protein of unknown function (DUF1043) Moxon SJ anon Pfam-B_12007 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 28.80 28.80 29.10 28.90 27.80 28.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.71 0.71 -4.50 5 884 2010-01-13 14:53:39 2003-06-04 14:20:45 7 2 879 0 118 373 38 126.10 54 92.06 CHANGED YAlIGLlVGlIIGhlIsRLTssplKpQpslcKELEsuKtEL-pQRpELpDHFAcSAELL-TLu+DYpKLYQHMAKoSssLLPpLstpsNPFspptt.ps-cSNspE.....lppQPRDYupGAoGLl+spcKc ........................YALIGLVV.GI.IIGA.V...A.MR.F......G.NRK.....L......RQ....Q..p....uLQ.hELE.....K.sKsEL-EYRpELVoHFA+SAELLDTMA+DYRQLYQHMAKSSosL.L..P...E..l.s.s........c.....s.NP.F+.sRLs......E.....S...EA..S....NDQu............PV.ph.PRDYSEG.AS.GLLRsttK.c........................................................... 0 20 48 87 +6124 PF06296 DUF1044 Protein of unknown function (DUF1044) Moxon SJ anon Pfam-B_12045 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.46 0.71 -4.27 36 879 2012-10-03 00:18:00 2003-06-04 14:22:33 7 2 693 0 167 538 38 103.50 27 90.35 CHANGED MRlFKs+hFs+aA++ct.lsDpsLtcAlcchtpGhlDADLGGGVhKKRlucs.GpGKpGGaRollha+t..uc+hFFlaGFAKs-p.u....NIoscEltsh+plApthhshocppLspLlpsttlhEl ............................................................................p..t....h.t....h.......t.h.........t.s...h....t.......s....L...G..G..G.....lhK..hR..l..s..ps..tp........GKpGGhR....sI......h.h.h....ps.....s.p.p.ha.a..h.h.hYuKs-h.s.....sIospEhtsh+phuphh.......................................... 0 34 101 130 +6125 PF06297 PET PET Domain Yeats C, Moxon SJ anon ADDA_3209 Domain This domain is suggested to be involved in protein-protein interactions [1]. The family is found in conjunction with Pfam:PF00412. 25.00 25.00 25.80 25.40 24.50 24.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.37 0.72 -4.38 18 570 2009-01-15 18:05:59 2003-06-04 14:23:54 9 11 111 0 287 491 0 98.40 44 17.57 CHANGED phshpssssoccDssssp..pYsWsPPGL.s.chlppYhphLP--KhPhlGStGt+aRp+QLlaQLPsaDp-sphCcuL.s-cEt+phc.FspppKcEuL.GhGsV+hhs ...............h...hpp.shuccsssh.sh...pYtWsPPGl..p.c....sppYhp.h.L....Pc-KhPhssS.Gt....pa..R..h+QLhpQLPsHDp-sphC.ps.L...s.......-c.E.t.+Ehc.Fsp.ppKcEAL.GhGsV+...s....................... 0 56 76 170 +6126 PF06298 PsbY Photosystem II protein Y (PsbY) Moxon SJ anon Pfam-B_12212 (release 9.0) Family This family consists of several bacterial and plant photosystem II protein Y (PsbY) sequences. PsbY is a manganese-binding protein that has an L-arginine metabolising enzyme activity [1]. 20.40 20.40 23.30 20.90 20.00 20.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.59 0.72 -4.43 22 176 2009-01-15 18:05:59 2003-06-04 14:34:32 6 4 112 0 72 159 18 35.00 45 49.48 CHANGED MDhRlllVlhPIllAuuWAsFNIG+sAlpQlpp.hhp ........D.RlllVlhPll.AhuWsLFNIhpsALsQlp+.h..p..... 0 16 49 63 +6127 PF06299 DUF1045 Protein of unknown function (DUF1045) Moxon SJ anon Pfam-B_12127 (release 9.0) Family This family consists of several hypothetical proteins from Agrobacterium, Rhizobium and Brucella species. The function of this family is unknown. 24.30 22.60 25.60 23.80 24.20 22.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.87 0.71 -4.56 43 281 2012-10-03 21:31:48 2003-06-04 14:48:08 7 3 253 0 93 273 102 158.20 40 65.60 CHANGED TLKAPFpLAsGtstspLhsAspsFAtphsPhslstLtlsplGsFlA..LlPssss..ssLppL...AussVppFDsF...RAPhots-luRR+sstLospQcphLpcWGYPYVh--FRFHMTLTGpL...stsppstlhshLpstasslh..psltlsslALFsEsssGssFpl ..............TlKAPF+LAssto..pscLhAAhtpFAustsPhslstLplpp...l........u....uFhA....LsPspss......stLppLAsssVpsFDtFRAPho.-.A.E.l..ARRcsptLotcQcp.LtcWGYPYVh--FRFHMTLTG.l...stpcps...t..lppsLcphass.h.ls.psltlsslALFsEsptGuPFp.............................. 0 13 48 65 +6128 PF06300 Tsp45I Tsp45I type II restriction enzyme Moxon SJ anon Pfam-B_12151 (release 9.0) Family This family consists of several type II restriction enzymes. 25.00 25.00 25.30 25.00 21.40 20.60 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.78 0.70 -4.94 3 36 2012-10-11 20:44:44 2003-06-04 14:53:36 7 2 22 0 3 35 5 222.00 67 99.39 CHANGED Ms..NhWTcLSIEYANQRSYLDDLFQVYsTIP-SlRoIsEKlWSNVEcAFK....RKDN....LuLIcELLN....LDLFPIKDSYlA...YLK+DKo.....ALE........RNPRTINRICGRLYEMGLNKIaEKsSEPKETNRQIGPMFKcWh+cKSLG................VEP....VDLusFIANE-DAIL..cASDclMtDFAKEaLGYpHc.....KGLDFlARFN+o.....YlIGEAKFLTDFGGHQNAQFNDAIuTIEAc.sl+AIKVAILDGVLYIcupNKMaKulsohYK-aNIMSALVLR-F...LYQ.......L ...................................................M....NhWTchSIEhANQRsYLDcLFpVYPhIP-ulREIDuKlWSNlEYHFK....pKDN....LuLIsELLN....LDLFPIKDSYhA...YLKRDKS.....ALE........RNPRTINRICGRLYEMGLNKIFEKCSEPKETNRQIGPMFKcWlNNKSLG................VEP....VDLNDFIANEsDAIL..+ASDslMuEFAKoHLNY+Hc.....KGLDFlARFNKK.....YIIGEAKFLTDFGGHQNAQFNDAISTI-sP....NlKAIK.VA.ILDGVLYIcSNNKMpKhLs.TpY+NapIMSALVLR-F...LYQI.............................. 0 2 2 3 +6129 PF06301 Lambda_Kil Bacteriophage lambda Kil protein Moxon SJ anon Pfam-B_12201 (release 9.0) Family This family consists of several Bacteriophage lambda Kil protein like sequences from both phages and bacteria. Induction of a lambda prophage causes the death of the host cell even in the absence of phage replication and lytic functions due to expression of the lambda kil gene [1]. 23.70 23.70 23.90 23.80 23.40 21.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -7.95 0.72 -4.53 3 292 2009-01-15 18:05:59 2003-06-04 15:00:54 6 2 219 0 6 103 1 42.10 66 73.92 CHANGED MDQoLMAIQoKFsIAsFIGDcKMaREAV-AaKcWI.h.hhRpK ....MDQsLMAIQoKFsIAsFIGDEKMaREAV-AaKcWI.l.hhRpp........ 0 0 0 1 +6131 PF06303 MatP DUF1047; Organiser of macrodomain of Terminus of chromosome Moxon SJ anon Pfam-B_12243 (release 9.0) Family This family, many of whose members are YcbG, organises the macrodomain Ter of the chromosome of bacteria such as E coli. In these bacteria, insulated macrodomains influence the segregation of sister chromatids and the mobility of chromosomal DNA. Organisation of the Terminus region (Ter) into a macrodomain relies on the presence of a 13 bp motif called matS repeated 23 times in the 800-kb-long domain. MatS sites are the main targets in the E. coli chromosome of YcbG or MatP (macrodomain Ter protein). MatP accumulates in the cell as a discrete focus that co-localises with the Ter macrodomain. The effects of MatP inactivation reveal its role as the main organiser of the Ter macrodomain: in the absence of MatP, DNA is less compacted, the mobility of markers is increased, and segregation of the Ter macrodomain occurs early in the cell cycle. A specific organisational system is required in the Terminus region for bacterial chromosome management during the cell cycle. 20.00 20.00 21.70 21.10 19.50 19.00 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.02 0.71 -4.29 13 737 2009-01-15 18:05:59 2003-06-04 15:36:37 7 1 719 0 67 231 1 145.90 71 97.72 CHANGED MKYQQLENLEsGWKWpYLlKKHREGEsIT+alEpStA-suVppLlplEppPscV.cWIcpcMsPcLcN+hKQuIRA+RKRHFNAE+QHTRKKSIDL-YtVWpRLSthupchGsTLSETIshLl-EAE+KEpYucphSuLKpDL+cLLs .................................MKYQQLENLESGWKWKYLVKKHREGELITRYIEuSAAQEAVchLLuL.....ENEPVhVssWI-cHMNP-LlNRMKQTIRARRKRHFNA.EHQHTRKKSID..LEFhVWQRLAGLAQRRGpTLSETIVQLIEDAEpKEKYAspMSSLKQDLQALL.G.............. 0 5 19 47 +6132 PF06304 DUF1048 Protein of unknown function (DUF1048) Moxon SJ anon Pfam-B_12247 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 29.60 29.60 29.60 30.10 29.10 29.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.21 0.72 -3.71 16 339 2012-10-01 22:34:14 2003-06-04 15:38:59 6 1 278 4 47 204 1 101.10 38 82.38 CHANGED ppsl-pK+Ea+tacsRscsLPcDYptsaccIp+YlaphG.sschtshhtIhssll-LFEcuAA-G+sVt-llG-DVAuFsDpLls-sts..atDKaR-pLNcsls ...................t.Khls-KKEY+thh.tRl.tALPpDYphlaccIppYhWp.Fu..stsGtsM....hplhp-llDLFEpuAA-Gpplh-lsG-DVusFADpLlssst...s...as.cK.+ccLNcpl............... 0 25 39 44 +6133 PF06305 DUF1049 Protein of unknown function (DUF1049) Moxon SJ anon Pfam-B_12262 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild --amino -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.13 0.72 -4.50 84 2233 2009-09-13 05:33:55 2003-06-04 15:41:11 6 3 2122 0 510 1349 240 65.60 24 60.14 CHANGED hpNsphVslsalhupsp..hsLulllhhsFshGhllGh.Lhshhhh..h+h+hphpphp+......plpphppplsp.p ...........................pNs.p.Vshsa..l.h......s...p.sp....h.........sLs...l..l..lh..sshs..hGhllGh.llshhhh.......h+.h+h....pl.tchp+.......clpphppp.....t................................................... 0 138 316 417 +6134 PF06306 CgtA Beta-1,4-N-acetylgalactosaminyltransferase (CgtA) Moxon SJ anon Pfam-B_12320 (release 9.0) Family This family consists of several beta-1,4-N-acetylgalactosaminyltransferase proteins from Campylobacter jejuni [1]. 20.20 20.20 20.30 21.00 19.30 20.00 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.27 0.70 -5.23 2 149 2012-10-03 05:28:31 2003-06-04 15:48:29 6 3 96 0 12 116 1 256.00 46 86.71 CHANGED MLFQSYFVKIICLFIPFRKIRHKIKKTFLLKNIQRDKIDSYLPKKTLlQINKYNNEDLIKLNKAIIGtGHKGYFNYDEKSKDPKSPLNPWAFIRVKNEAITLKASLESILPAIQRGVIGYNDCTDGSEEIILEFCKQYPSFIPIKYPYEIQIQNPKSEENKLYSYYNYVASFIPKDEWLIKIDVDHhYDAKKLYKSFYIP+ppYcVlSYSRlDhha.p-pFalh+sppGpILKtPGDCLhIpNhNLhWKElLI-..sssaKhsTtps..pNh+ShE.LKh+pRIhFpTELNNYHFPFlKpaRtpDhhphNWlSl--FhK.YhpplpppI-.pMlphcTLKKlahpLh ......................................................................................................................t..........HpGaFsaDpcupssKSPLNPWAFIRVKNEulTLcsSLcShLPAIQRGVIGYNDC....sDGS.cEl...ILEFCKpYPS......FIPl.p.Y.....P...........Y...............El.....p.l..pN.....s.p.......S...-......c.N..p.LYs.......YYNYs..hSFIP......psEWlIKIDsDHhYDAKK..L..YK..SFYhs.cpph..csl.YsRlsFhh.stp...h..al.pstp.ht..hh.t..GDphll.s.s..a.E...................psh..shE.Lt.h+p.hhhh..p.sEL.paHFPhhK..Rptsh.ph.shlsl-..-F.hK..atc....ppIp.pMlpcchl.phh.p..h................................................................................................................... 0 6 8 9 +6135 PF06307 Herpes_IR6 Herpesvirus IR6 protein Moxon SJ anon Pfam-B_12322 (release 9.0) Family This family consists of several Herpesvirus IR6 proteins. The equine herpesvirus 1 (EHV-1) IR6 protein forms typical rod-like structures in infected cells, influences virus growth at elevated temperatures, and determines the virulence of EHV-1 Rac strains [1]. 25.00 25.00 105.90 105.80 24.10 17.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.50 0.70 -5.00 4 38 2009-01-15 18:05:59 2003-06-04 15:55:52 7 1 12 0 0 18 0 133.80 66 83.56 CHANGED spustchh.Ns.sthlGhsYFRRCRpEhNEGaaApVPsGYFPVtPSphPshV.Vcu......hsGEsluFpslPsP+h-hRFaKQLpDGTFVRLPF.YP-EhYEsE.pPhtsRaYlpADscssSS....ssPSsLhEphFscVPsult.EthspWsGPK+lPlPscRYVLKhGaE..hp.pVTEDAFp.lsTphLRL-htst..................ss...sAt.s.ptspVpts .ssssu.....sTGPGCEGGLGGWRLFKACRHEQEDGLYAMLPPDYFPVVPSSKPLLVKVPAPGASPDRTGGAVHFECVPAPRRPLQFFRQLYDGT........................................................................................................................................................ 0 0 0 0 +6136 PF06308 ErmC 23S rRNA methylase leader peptide (ErmC) Moxon SJ anon Pfam-B_12332 (release 9.0) Family This family consists of several very short bacterial 23S rRNA methylase leader peptide (ErmC) sequences. ermC confers resistance to macrolide-lincosamide streptogramin B antibiotics by specifying a ribosomal RNA methylase, which results in decreased ribosomal affinity for these antibiotics. ermC expression is induced by exposure to erythromycin [1]. 25.00 25.00 53.40 53.40 17.10 17.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.96 0.72 -7.11 0.72 -3.90 3 48 2009-01-15 18:05:59 2003-06-04 16:01:11 6 1 29 0 4 15 0 27.50 96 98.65 CHANGED MLVFQMR....NVDKTSTVLKQTKNSDYADK MLVFQMR....NVDKTSTVLKQTKNSDYADK 0 1 1 1 +6137 PF06311 NumbF NUMB domain Yeats C, Bateman A anon ADDA_13736 Family This presumed domain is found in the Numb family of proteins adjacent to the PTB domain.. 19.90 19.90 20.60 20.60 19.70 19.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.25 0.72 -3.88 8 177 2009-01-15 18:05:59 2003-06-05 14:09:23 7 4 74 0 80 195 0 86.40 63 15.01 CHANGED sPaAIPRRHAPs-.LhRQGSFRGFsulupp.......SPFKRQLSLRlN-LPSThQRpp................-hpspsPlsEhEst..uts......D.SISuLCsQIosuFS ................PaAIPRRHAPlEQ..LsRQGSFRGFPALSQK.......SPFKRQLS....LRlNELPSThQR+T.........................DF.lKss.V..P..ElE......G-u............-.SISuLCoQIosuFu............................................ 0 16 22 47 +6138 PF06312 Neurexophilin Neurexophilin Moxon SJ anon Pfam-B_12369 (release 9.0) Family This family consists of mammalian neurexophilin proteins. Mammalian brains contain four different neurexophilin proteins. Neurexophilins form a family of related glycoproteins that are proteolytically processed after synthesis and bind to alpha-neurexins. The structure and characteristics of neurexophilins indicate that they function as neuropeptides that may signal via alpha-neurexins [1]. 20.70 20.70 21.00 20.90 20.20 20.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.63 0.70 -5.10 6 508 2012-10-03 16:25:20 2003-06-05 14:41:06 7 8 43 0 338 454 0 177.90 29 47.06 CHANGED Rlht+cssssssshtp.oshtss.........p-aWDhLus.o-lpt........s.sRsKR+Pll.pTGKhKKhFGWGDF.aSNIKTVKLNLLITGKIVDHGNGTFSVYFRHNSTGpGNVSVSLVPPSKlVEF-lpQQhhlEsK-SKs.FNCRlEaEKlD+upKTuLCsaDPSKlCaQ-QTQS+VSWhCSKPFKllC.lYIuFYSsDY..+LVQKVCPDYNYHS-TPYhsSG .....................................................................thht............................................................................................................t...Kha..G.GDF.pup.l.p.o.....s....p.....L....p.....h....h..ss..G+l.s.D.asNGTapV.Fph.hsGps.sl..SVpLV.PScuVph....................................p........ph......h...c.t.p...s+s....Fps.p.h..c...........t.p.....s.p..c.s...p..c..s....s......l...s...................s.......s......s.......p......h...Cphppsps...ptsWhC.KP..h..+..l...C...hl.s..ah.p.st....................................................................................................... 0 100 120 202 +6139 PF06309 Torsin Torsin Moxon SJ anon Pfam-B_12047 (release 9.0) Family This family consists of several eukaryotic torsin proteins. Torsion dystonia is an autosomal dominant movement disorder characterised by involuntary, repetitive muscle contractions and twisted postures. The most severe early-onset form of dystonia has been linked to mutations in the human DYT1 (TOR1A) gene encoding a protein termed torsinA. While causative genetic alterations have been identified, the function of torsin proteins and the molecular mechanism underlying dystonia remain unknown. Phylogenetic analysis of the torsin protein family indicates these proteins share distant sequence similarity with the large and diverse family of (Pfam:PF00004) proteins. It has been suggested that torsins play a role in effectively managing protein folding and that possible breakdown in a neuroprotective mechanism that is, in part, mediated by torsins may be responsible for the neuronal dysfunction associated with dystonia [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.65 0.71 -4.25 8 383 2012-10-05 12:31:09 2003-06-05 14:44:56 6 6 102 0 205 556 182 119.00 39 36.68 CHANGED ChFt-CCs...-scIshshpuL-pDLsspLaGQHLspclVV+olKualpss.pPpKPLVLSFHGaoGTGKNaVucIIAcNhYRsGL+SsaV+hFlAThcFP+sppl-pYKhcLpsplppohptCpRSlFIF .........................................C.h.th.Ct......pp.....s..hp...........uLcp-Lpp+LaGQHLApc........l..l........lpAl.p.u.a.......l...s.s.......p.PcK..P.L.s.L.S.hHGaoGTGK....N....aVupllA..csl...a.p.p.G.......h.c.SsaV+...hFlu.s.......hH....FP.Ht..p..plp..h..Y.......+............cpLpphlpsslstCsp.SlFlF.................................................. 0 48 64 114 +6140 PF06313 ACP53EA Drosophila ACP53EA protein Moxon SJ anon Pfam-B_12718 (release 9.0) Family This family consists of several Drosophila ACP53EA accessory gland (seminal) proteins. 25.00 25.00 34.90 30.30 19.40 22.50 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.30 0.72 -4.05 6 87 2009-09-10 23:46:03 2003-06-05 14:46:14 6 2 13 0 18 93 0 90.20 30 76.51 CHANGED asKhhcCscVus-ulupLlcpsIPsVppLtpCsDY...pPtpsKspShltal+lsYpFhKKhlhccPcCLlhhlspussLl+Pahcpl-oLpClsE ....hp+hLcCscluhcususLhpcsIPsl+pLtpCssF...pP..pst..sh.shhthlclsYpFl++.lh.sp.cCLlsslpchhshlpPalppl.shpCh..... 0 4 4 11 +6141 PF06314 ADC Acetoacetate decarboxylase (ADC) Moxon SJ anon Pfam-B_12720 (release 9.0) Family This family consists of several acetoacetate decarboxylase (ADC) proteins (EC:4.1.1.4). 29.80 29.80 29.80 29.80 29.60 29.60 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.49 0.70 -5.02 72 670 2012-10-01 19:21:02 2003-06-05 14:46:38 6 12 525 20 280 644 110 225.10 20 74.15 CHANGED shPl.hss....shs............st-hlhlpYcTDscslppllPt.sLclsp.......slVth.hhchssso.uhh..........sYpEsu.sl.Vp.ap................Gp....p.GsYshhhalD...scsslsuGRElhGaPKKlup.plpp.............s.ssslsGslch.suhplspsshshctps.s..tshtthpt...........s.hhh+hlPs.hsu.tsp...lscLVphthsshsht......tsasGs.usLpL.tsp.shssls..........pLsl.hclluuhahhsshs......lshsphlhc............L .....................................................................h...............................stphhh..hh..apss.ct...l..p...pl.l......P.........t....s.....Lchsp...............slVt...h..hh..c..hs.css..u..................sYsEhuh...s.l.....Vp..ap.........................sc...............t.usa..shthhlc...sp.slsuGREl.aGaPKphAs.hs.lpp.........................................pssphtusl...sh...su.....htlsphshshtt...s......h..htt.....................hhhchlsp....ss....................hsphl.h...hpsh.th.................thh.Gs..uplpl.......t..............t.h.t.ls..........pLsl..hch.l...tu.hh..................................................................................................................................... 0 83 180 237 +6142 PF06315 AceK Isocitrate dehydrogenase kinase/phosphatase (AceK) Moxon SJ anon Pfam-B_13100 (release 9.0) Family This family consists of several bacterial isocitrate dehydrogenase kinase/phosphatase (AceK) proteins (EC:2.7.1.116) [1]. 20.80 20.80 20.80 27.90 20.60 20.70 hmmbuild -o /dev/null HMM SEED 562 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.86 0.70 -6.23 39 830 2009-01-15 18:05:59 2003-06-05 14:47:17 6 3 792 6 133 519 192 541.90 62 96.03 CHANGED lAcsILpGF-paathFpplTttAcpRFEpuDWpulQpAs+cRIphYDc+VppssppLcp....hsspths..............tshWtplKptYhsLlhs+.p.ElAETFFNSVhs+lac+phhps-hlFVpsshs....csc..sh.pshh+sYhss.....ssLppslpcllp-ashphPa-DLpRDlphlhptlppphss.......hthssshplclLpslFYRNKuAYlVG+lhs..ssp.....hPFll......PlLpscsG...tLhlDslLhspsclullFSFuRuYFMV-sphPuuhVpFL+sLhPpKspuELYoulGhtKpGKT.FYR-hlpHL.cpS..sDpFllAPGIKGMVMhVFTLPSasaVFKlIKD+FussKpho+....tpVpsKYpLVKpHDRVGRMADT.EasslsFP+pRFSs-LLpELpphsPSplc...hpGD....pllI+HLYlERRMsPLNlYLcpAs-p.........p....lcpAlpE....YGpAIKpLAAANIFPGDMLhKNFGVTRHGRVVFYDYDEIpYlT-sNF.RplPtPcp.E-E...........huuEsWYSVuPpDV..F.PEc.FshFlhussplRchFhchHu-Lh-ssaWpshQppI+pGpltDVaPYcpptR ......................IAQTILQGFDAQYGRFLEVTuGAQQRFEQADWHAVQQAMKsRIHLYDHHVGLVVEQLRsI...Ts.u.p.s.sD............................ssFLhRVKcHYTcLLPDYPRFEIAESFFNSVYCRLFcHRsLoP-+LFIFSSQP.......ERRaRol.PRPLu+cFaPc.....cGhcshLhRlloDLPLRL.WpNpuRDIcYIlppLpEsLGsc........pLscs+lQVANELFYRNKAAWLVGKLls....ssu..s....LPFLLPI......Hps...-........cG...................cLFlDTCLTosAEASIVFGFARSYFMVYAPLPAALVEWLREILPGKoTAELYhAIGCQKHuKTEsYREYLsaL.p.ts...............sEpF..ItAPGIRGMVMLVFTLPGFDRVFKlIKD+FAPQKEhot....A+V+ACYQLVKEHDRVGRMADTQEFENFVL-K++IuPALht.LhpEuspKIsDLG-......pIVIRHLYIERRMlPLNIaLEQs-sQ.................Q....LRDAIEE....YGNAI+QLAAANIFPGDMLFKNFGVTRHGRVVFYDYDEICYMTEVNF.RDIPPPRYPEDE............LASEPWYSVuPsDV..F.PEE.FRH.aLCuDPRltsLFEEMHADLFcA-YWRuLQsRI+-GHVEDVYAYRR+QR............................................ 0 19 60 96 +6143 PF06316 Ail_Lom Enterobacterial Ail/Lom protein Moxon SJ anon Pfam-B_12325 (release 9.0) Family This family consists of several bacterial and phage Ail/Lom-like proteins. The Yersinia enterocolitica Ail protein is a known virulence factor. Proteins in this family are predicted to consist of eight transmembrane beta-sheets and four cell surface-exposed loops. It is thought that Ail directly promotes invasion and loop 2 contains an active site, perhaps a receptor-binding domain [1]. The phage protein Lom is expressed during lysogeny, and encode host-cell envelope proteins. Lom is found in the bacterial outer membrane, and is homologous to virulence proteins of two other enterobacterial genera. It has been suggested that lysogeny may generally have a role in bacterial survival in animal hosts, and perhaps in pathogenesis [2]. 20.80 20.80 21.70 20.80 20.00 20.70 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.13 0.71 -4.80 2 1349 2012-10-03 17:14:37 2003-06-05 14:47:45 6 3 430 0 22 1298 2 177.20 65 97.07 CHANGED MRKlCAsILSAslhLssuGsPA.AuEHQSTLSAGYLpspTDhPGSDDLNGINVKYRYEFTDTLGLITSFSYANAEsEQKTHYsDTRWHED.VRNRWFSVMAGPSVRVNEWFSAYAhAGVuYuRVSoFuGDYhplTssctKpp-hLTtSDsuRhSpTuLsauAGVQhNPTEslsVDlAYEuSGpGDWRT-uFIVGsGY+F .....................................MRKlhAsILSA.s.lh.L.ss.u.G...s..P.A..h..A.u..E..c..Q..uTLSAGYL..psp........T...s...s.......P.......G.......S......D......s.........L....p....GINVKYRYE.F.T.D....s..LGhlTSFSY.A....ssc........s...c..Q..h.T....+.Y............S.....D........T..R.......W.......H.E....D.SVR.N...R......WFSVMAGPSVRVNEWFSAYAM..AGVAY..SRVSTFSGDYLRVTDNKGKTHDVLTGSDDu..R..HSNT..SLAWGA.GVQFNPT.ESVsIDlAYEGS.......G...S....G....D.....W.R.T...D....GFIVGVGYKF.................................................. 1 2 6 10 +6144 PF06317 Arena_RNA_pol Arenavirus RNA polymerase Moxon SJ anon Pfam-B_12490 (release 9.0) Family This family consists of several Arenavirus RNA polymerase proteins (EC:2.7.7.48) [1]. 32.40 32.40 32.60 39.20 31.70 32.30 hmmbuild -o /dev/null HMM SEED 2207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -14.22 0.69 -15.07 0.69 -7.99 14 233 2009-01-15 18:05:59 2003-06-05 14:48:14 6 1 46 2 0 207 0 1086.60 42 99.88 CHANGED M-EplsElKDLlpKals-c-cLucQKhsFLuQscs+hlLhEGLKLLShhlElDSCctsuCpHNocpKoV-sILp-pGllCPoLPhVlPDGaKLsGssLILLEsFVRssPpsFEpKacpDhsKLhsLKcDLppsGITLlPllDGRosY.schhPDWssERhRaLLhcLLchuppssch.hEEpEYpRLscSLSsspsps.GlEslpsLK-sphsa.-+Lhc.hatGlNsclstscl+tclhclaphFRpcl.cpGhhcspFh+o-+ppLlppLsp..hh....hstttpsslsph.hshs+hhsllt.Lhhp.lcptpptpp.ssp.ph......p.ahplLShhNKlKuLKVhNTRRpsLLhLDlIhLshhhclhctpstthpp.....hhlGpthhuVNDRLlsl.tsh+.lcKhlppp...................spahphsp+hLp+uppsL.hsslohhhhtsD..h-hhhplu.phh.c....th+PshpYppspppthssstpphc..........t-php.L+tLSSlSLuLVNSMKTSuos+lh.NEtshtRY.tsVRC+EsYhQcF.hhcuhthhLlYQKTGEpS+CYSl.hsc......hsEhsSFYuDPKRFFLPIFSp-VLhphs-pMlSWL-hss-....lpslcsLL+hLlLsILssPSKRsQpaLQGLRYFlMAaVs-hHHh-LhsKL+.-hlTsoEhllh+LhssLhphlLspuscs...hop+FKFlLNlSYLCHLITKETPDRLTDQIKCFEKFlEPKlcFsslhVNPspsu..TtEpE-thlcuhc+FhSKchpsps-...hKpPGVS+-llShhlShFNsGpLhhpu-LKp...DPhpsShouTALDLuSNKSVVlsKhs-.GphlhpYDapKLVSssVsplsEpFppKGKYpLs.c-h-atI.+sLosLV.spppspssp-chs...........-pLsE.....-tschFcpl+psVphsls+hcpspphp...st...tpppshss...Lcplhssc....thh+hI+uElShH.VcDFDsslLst-hYcElCsuVY.sssh+spYFhpcshc.CPLshLhKNLoT+sYp-pEaFpCFKhlLlQhshsphhG+YcHhp+ppLsFp.-sh+Lc--VRISpRESNSEAlu+tLshohhTsAuL+NLCFYS-ESPTpaTSlussoG+LKFuLSYKEQVGuNRELYlGDLpTKhhTRLlEDYhEAlspphchoCLNsEKEFE+AIhsMKhsVs.GpluhShDHSKWGPhMSPhLFLshLpsLchc..cstsclc.p-sV.slLpWHlHKlVEVPFNVlpAhhpualKRpLGLMspsupoloE-FFaphFp.pGhVPSHISSVLDMGQGILHNoSDhYGLIoEpFINYsLchlasshssuYTSSDDQIoLhcpshhch.-...p-s-EaLplLEFHpaLSspLNKFVSPKSVhGoFsAEFKSRFFVWG-EVPLLTKFVAAALHNlKCKpPpQLAETIDTIhDQsVANGVslclVstIpcRT.pLlcYSsYP.sPFLhhpcpDVKDWVDGoRGYRlQRpIEslhs-s..sphIRsssRKlas+lKpGclaEEahlshlupsss-uLpshhphhus-p.phc-.LpapWLNLsuatsLRhVLRpKlh.oushsL-cEclPTLlKTlQSKLSpNFTRGApKlLuEAINKSAFQSSlASGFIGhCKohGSKCVRssptt.lYIKclhsplpsppt..s.hss.thslhhCppsLcp.........lspa.RPlLWDYFslsLoNAhELGsWVhupsph.....sphshhhsNPs.aaslKPpss+.LED+lGhNHllaSlRR.YPplFE-HLhPFMSDLsss+hpWssRlKFLDLCVALDhssEsLullSHllKhKR-EhYIVhps-LuhuHsRpspslspEhslSspclCpNFhhQlhF-ShlpPhVhTTpsLtSFhWFschhpLP-.-uttcLG.hosFl.clhppslpRsMahcDLthGYsh.shchsphhls.ssplhs.sLsspcphpslp-hhthhus.cshslclslolcap+pSpchchcRphshphphps.hphssl............hplcslshhhSGulssHhlLDshpLlhspPhFpGcps..hslsshhhcs.lsts.sptshhpplclshssahh-L.scasacllGP-sp.sPLVlccGtlhcssc+lushpsplpspslhhhlssL..-cp-tltshLssLa.ahtsss.p..t..lchsppshthhh-tacphLscsLcshs-Wh-FtsaulCaSKShsslMIpsspGshRLKGhhCR.LhtsspshE .............l..h+.hh.p.h.p..th..Q+..hLsQ.p...lhhEGhKLLS.hlEl-Ss.tpsC.hN..ph.lp.lL.c.tlhsPsLs.lhPDGaph.spsllLLEsFVRss..pFEpKa.tDh.KL.plptDl...sl.LlPllDGRssa.sp.hs-Ws..+hR.hlhplhp..p...t..hE-.EY.RLh.SLt.h.sp..Gh-sh..l.c.t..a..pl.t.hh.sl.sphp...hh.tl.p.h..ap..l..tt.....h...p...hlt.h.............................h..p..p...hh..l......................................h..hhS.hNKlKu.+lhNTRR..hh.hDhlhh...h..................hh.t.h..sNDRhlsh..h.p.h.phh.tp......................h...h..hh.p.................l.......p...t.......................P.h.Yp..t...................................hphLSplsLuhhNSMKTu.ss+hh.Np.t..pa.t.V.hpEsa.Q.h....t....LhYQKTGEpo+CYul...................SFYsDPKRaF.slhSt.Vl.thh..MhsWlt...........ht..hhhlhl.hls.PoKR.Qh.lQshRYhlMAhssph...pL.pKl....ho.s-..h..h...lh..lht..t.p....hsthFKahLNlSYhCHhlTKETPD.......................................................................................................................................................................................................................................................................................................................................................................................................................................EuluctLsho.hhsuhL+NLCFYSp-SPppasuhusssGpLKFuLSYKEQVGuNRELYlGDLpTKhhTRLlEDa.Euhs..hphoCLNs-pEFppAlhsMp.sVp.u.hshSh....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +6146 PF06319 DUF1052 Protein of unknown function (DUF1052) Moxon SJ anon Pfam-B_12539 (release 9.0) Family This family consists of several bacterial proteins of unknown function. 22.00 22.00 22.70 22.30 21.40 20.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.96 7 250 2012-10-11 20:44:44 2003-06-05 14:49:30 7 1 247 1 81 215 98 150.30 46 88.32 CHANGED MsIlp.hs.sPLhDGRQSEpAhhVRRGsQRLLhphtausLPELsLASGRRADLluLotKGEIWIlEIKSSIEDFRVDRKWP-YRhHCDRLFFATHssVPt-IFPEECGLlLSDGYGAchlREAPEH+lAsATRKulhLpFuRsAAtRLhhAEhsshcs ..............................................+.stpAh....hltRGVpRLhh.s.hG.huslsElsLs..sG..R.RADllAlutK....GEI....WIVEIKSSlpDa.+uD...+..KWs-YRsaCDRhFF.As...cs...s.l...P...p......-......l...h......P-.-..sGLI..........lu..........D.....u....Y.....G.......A.........-..IlR-APEH+.LAuApRKulhL+FARsAAtRLhthp.....ss............................................. 1 22 51 61 +6147 PF06320 GCN5L1 GCN5-like protein 1 (GCN5L1) Moxon SJ anon Pfam-B_12526 (release 9.0) Family This family consists of several eukaryotic GCN5-like protein 1 (GCN5L1) sequences. The function of this family is unknown [1,2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.65 0.71 -4.28 10 239 2009-09-11 00:23:16 2003-06-05 14:49:52 8 9 196 0 159 223 4 113.00 30 62.23 CHANGED hlKEHpp+Qupp+phQE+h++EAIsuAssLTpuLVDslNsGVAQAYlNQK+L-sEAKpLpssuusFuKQTsQWLpllEsFNsuLKEIGDVENWuRsIEsDM+sIsssLEhsYcspp.sps....S ......................................................pt.p...-ct+p-Al.tus.s....s.lspsLs....c.p.L....sstVsp...ha...hNp++L-pEs+p.Lpt.pssphsKQ.osp.Wlp.hs-shspuLK......El.GDlpNWAc.lEhDhpslsp...sLchs.ctt......s......................... 0 44 78 122 +6148 PF06321 P_gingi_FimA Major fimbrial subunit protein (FimA) Moxon SJ anon Pfam-B_13339 (release 9.0) Family This family consists of several Porphyromonas gingivalis major fimbrial subunit protein (FimA) sequences. Fimbriae of Porphyromonas gingivalis, a periodontopathogen, play an important role in its adhesion to and invasion of host cells. The fimA genes encoding fimbrillin (FimA), a subunit protein of fimbriae, have been classified into five types, types I to V, based on nucleotide sequences. It has been found that type II FimA can bind to epithelial cells most efficiently through specific host receptors [1]. Human dental plaque is a multispecies microbial biofilm that is associated with two common oral diseases, dental caries and periodontal disease. There is an inter-species contact-dependent communication system between P. gingivalis and S. cristatus that involces the Arc-A enzyme [2]. 30.50 30.20 30.50 30.20 30.40 30.10 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -12.17 0.70 -5.20 32 270 2012-10-02 11:46:57 2003-06-05 14:50:11 6 5 62 6 21 280 1 299.40 14 54.52 CHANGED hsuCop--sst.ssss..............Aplolsl.......tpstssssssssp....sEstIpsLslhlFs..tssth.h........scspsshpss.............h............tst....sG.stplhllAN...................................t.h.hsspshs...cht..shhtplsts.sts....tssLsMouthtss..shhsup..................ph..h.............................sssts..............p..t.ssslpLpRssA+lslsthp..........hphssp.pphtshhh..............hpplalhps...........+pp.s.h......................................................phaGs.shsp...........hssst.......................ahhu........phsshs...sthsss.s.........hsstshh......sssh .............................hhuCsp-p.ps.s..sssst..........t.....splolsl..........tsstssp.sssssp.................sEs..p....IsslplhVFs.....tss..phh..h..................s.psspshtss...........hht...............pss.....sG.ppplhllAN..........................ts.h.ssspsls......chp.....shhsphssp...stp.............t.s...slhMouthtss.....shhssp...........................................................................................p..ttsss..lplpRssA+l..sl....hp...........................h.hs...t..ts..hhshh................................ppl.ahhth................ppp.t.h..............................................................................t.hss.th............................t..............................a..t........................................................................................................................................................................................................................................................................... 0 2 16 21 +6149 PF06322 Phage_NinH Phage NinH protein Moxon SJ anon Pfam-B_12973 (release 9.0) Family This family consists of several phage NinH proteins. The function of this family is unknown. 26.00 26.00 26.50 27.40 24.50 25.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.03 0.72 -4.35 4 231 2009-01-15 18:05:59 2003-06-05 14:50:30 6 2 200 0 2 53 0 62.60 66 96.08 CHANGED MTaoVKTIPDMLlEsYGNQTEVARhLsCsRsTVRKYstDKEGKhHAIVNGVLMVHRGWsctpDt ..MTaTlKTIPDMLlEsYGNQTEVARhLsCsRsTVRKYhtDKEu+tHAIVNGVLMV.HpGhpt.hD.h................. 0 0 0 1 +6150 PF06323 Phage_antiter_Q Phage antitermination protein Q Moxon SJ anon Pfam-B_12730 (release 9.0) Family This family consists of several phage antitermination protein Q and related bacterial sequences. Phage 82 gene Q encodes a phage-specific positive regulator of late gene expression, thought, by analogy to the corresponding gene of phage lambda, to be a transcription antiterminator [1]. 25.00 25.00 29.00 28.90 19.40 18.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.52 0.70 -4.97 9 404 2009-01-15 18:05:59 2003-06-05 14:50:50 6 1 228 0 15 190 0 214.30 70 99.44 CHANGED Mssp..LpalRpQLhsATADLSGuTKGQL.AahEpAph.DTspYpRK...+lRltDcsTGchITlsNPPlsGKQShAKGuuIsLVp.VEaSTuSWRRAVhSL-EcppAWLLWsYS-shsacaQVsIspWuWpcF...psphss++luuKThERL+pLlWLAAQsVK....sELtGc-h..YphpE.LAsLVGVopcNWScsast+WhsM+chFhpLDppALLsss+sRSpQKss..ppslAcls ..........................MNsQY.LQaVREQLhlATADLSGATKGQLEAWQEpA.F.DTGpY+RK.....K.RhhD...EVTG.....K....MIThDNPPIsGKQShAKGoSIsLVSPVEFSTSSWRRAVLSL-EHpKAWLLWCYStSlpWEaQlsITQWAWsEF...psp.usRKIAGKT.-RLKpLIWLAAQsVK....uE.LhGtEs..YEYQ-.LA.LlGVToKNWScTFTc+WVAMKHIFhpLDS-ALLhVhRTRSKQKAsF.pQslAKl............. 0 0 3 10 +6151 PF06324 Pigment_DH Pigment-dispersing hormone (PDH) Moxon SJ anon Pfam-B_12230 (release 9.0) Family This family consists of several eukaryotic pigment-dispersing hormone (PDH) proteins. The pigment-dispersing hormone (PDH) is produced in the eyestalks of Crustacea where it induces light-adapting movements of pigment in the compound eye and regulates the pigment dispersion in the chromatophores [1]. 20.30 20.30 21.30 22.30 20.10 20.00 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.90 0.72 -6.13 0.72 -4.59 7 54 2009-01-15 18:05:59 2003-06-05 14:51:14 6 1 44 0 18 66 0 17.80 76 21.14 CHANGED NSELINSLLGlPKlMN-A NSELINSLLGLPKsMN-A.. 0 3 5 15 +6152 PF06325 PrmA Ribosomal protein L11 methyltransferase (PrmA) Moxon SJ anon Pfam-B_12272 (release 9.0) Family This family consists of several Ribosomal protein L11 methyltransferase (EC:2.1.1.-) sequences. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.03 0.70 -5.29 31 4673 2012-10-10 17:06:42 2003-06-05 14:51:37 8 31 3837 31 1275 19899 6047 251.40 29 79.81 CHANGED sWhElplpssp-stEs....lsshL....h-.h....G.uhuVsl.......p.Dup..........t.-psl..a..E.......sh....spt..l..pt............stlh.Ah.......astcs..-.htt.......lls.....t.....lp.t.....ht......hhh.....pts.......phpl.....ppls-pDWspsWpcha+Ph+huc+lhIsPoW.c-h...Pp....sstlslcLDPGhAFGTGTHPTTphCLphLEph.............lpsG..c....sV..l.DlGsGSGILuIAAh+LGApclhuhDlDshAVcuup-NhchN......plss..pltl...h...L.........t-...hs........p.........tph...DVlVANILAsslhpLssplhthlKssGhllhSGIlpcptctV.t.c.sh.p.p.uFpl.shhpcp-Wsslsup+p ...........................................................................................................................................................................................h......h......t.......s.....s.h...h........p.............................................................................t........................................l...u.h.......h.......t...t...............t................hh.t.........l.t..t.............................t..th.....................................t.h.p.h.......ptlt-.pDW.ptW.h.p...a.c......Ph..ph........s.....c..p...h.....h.I.h.PoW..pph.....t.............sth..l.lDP.Gh...A.F.G........TGp..Hs.T.T.t....hs..l..p...h....L.....-..p.h............................................h..s...u.....c.........sV...l...D..l..G..s..G.S..G..I......L..u....I...A........A.......t.........+.......L..............G.....A.........p........c..........l.....h.....u......h....D.....l......D....s............h.......A.......l.....p....s......u.........c.....-......N.....s.....c....h.....N............................s..l.ss.......pl...p.l.........h...........h...........................t...-.....h.................................p..............tph.D......l....l......l......A......N........I.......L........A........s.................l......h.............h.........h......................h..........t....h...h.......t.........s....G.h...h.h..h.......S...G......l...l...t...p...p........h...p.......l..h..p..th....t.t...th....t.l.........h....h..t.p...tt.Wstlhh.............................................................................................................. 0 424 747 1006 +6153 PF06326 Vesiculo_matrix Vesiculovirus matrix protein Moxon SJ anon Pfam-B_13088 (release 9.0) Family This family consists of several Vesiculovirus matrix proteins. The matrix (M) protein of vesicular stomatitis virus (VSV) expressed in the absence of other viral components causes many of the cytopathic effects of VSV, including an inhibition of host gene expression and the induction of cell rounding. It has been shown that M protein also induces apoptosis in the absence of other viral components. It is thought that the activation of apoptotic pathways causes the inhibition of host gene expression and cell rounding by M protein [1]. 20.80 20.80 21.40 39.40 20.30 18.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.41 0.70 -4.90 11 133 2009-01-15 18:05:59 2003-06-05 14:51:58 7 1 41 2 0 94 0 202.50 45 96.55 CHANGED M.ppLpKhht.tt.t..s+pppsp......h...tPPuYp-.............PoAPl....hGh..-th-ph-..thph.phhhcsplplpoppPhcohp-shpslstW.cpYsG.hsp+PFa+hlhlhsuppL+tss...sssspscYsuphpGpshlhHplu.hPPhhph.csaspsaphtttptslslshplphssopcpssslhchh.....shpcpschpphhphaGL.hpppssspalh .................................................................................ap....htEPTAPo............LGIIQsKCKRAD........WLIKSHLTITTNYEIKEWuTWDRAISDILDLYDGNPVFKPILLFVYYVLAYNARKIP....GPoNGVRYGAYFDELTTV..WHAI...PELMNQEsDYSYNHRVlHRKIQYVISFKIQMSSTKRRTSPIESFIE..VTsEGLKHTPQFTTILDRARFVYSLTGGRYVIH.................. 1 0 0 0 +6154 PF06327 DUF1053 Domain of Unknown Function (DUF1053) Yeats C anon ADDA_1330 Family This domain is found in Adenylate cyclases. 21.10 21.10 21.10 21.10 20.90 20.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.41 0.72 -3.67 12 508 2009-01-15 18:05:59 2003-06-05 15:02:07 9 11 76 0 281 378 0 96.70 28 8.88 CHANGED schhpsWss-+PFtplpptsp.stt.t.............................................ulshtpsph........ppstppsts.--El...D-hltpsI-thSspch...+p-clppahLhFp-suhE+cYspht-shF ................................................................pWs.st.pPF.ph.tt.pp.hs..pt.p.h..................................................................sls.tppp.............cstppp.tps--.El...s-hlhpuI....DupSsc+h...........+u-clpp..hsLhFpc.psLE+c...Yppts.shF.................................................. 0 40 60 162 +6155 PF06328 Lep_receptor_Ig Ig-like C2-type domain Yeats C anon ADDA_13736 Domain This domain is a ligand-binding immunoglobulin-like domain [1]. The two cysteine residues form a disulphide bridge. 20.30 20.30 20.30 21.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.97 0.72 -4.11 12 304 2012-10-03 02:52:13 2003-06-05 15:36:22 6 14 53 4 105 260 0 86.50 28 10.89 CHANGED hsc.sshhhsss.llplGSshohsCllK..ppChp..psssphIlW+hp...h.Pppphphlscps..SplThsshtshps.......sLhsChlspuspcph ...........h....h..hhPsp.ll.plGSslohpCllK..ppst......ssup...pIl.Wphsht.tlPpppYphlscps..SpVThss.hsttps.......slhs..Ch.t.tp..p...................... 0 6 14 37 +6157 PF06330 TRI5 Trichodiene synthase (TRI5) Moxon SJ anon Pfam-B_13220 (release 9.0) Family This family consists of several fungal trichodiene synthase proteins (EC:4.2.3.6). TRI5 encodes the enzyme trichodiene synthase, which has been shown to catalyse the first step in the trichothecene pathways of Fusarium and Trichothecium species [1,2]. 23.80 23.80 24.40 24.60 22.30 23.70 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.26 0.70 -5.81 3 159 2009-09-10 20:52:56 2003-06-05 17:43:25 6 2 67 40 56 165 0 252.40 45 91.90 CHANGED M-sFPTEYFLsTuVRLLEsI+YRDSNYTREERIENL+YAaNKAApHFAQPRQQpILK.VsPKRLpASLRTIVGMVVYSWuKVSKEsMADLSIHYTYTLVLDDS.SDDPHssMLoFFDDLQAGREQKHPWWsLVNEHFPNVLRHFGPFCSLNLIRSThDFFEGCWIEQYNFpGFPGSDDYPuFLRRMNGLGHCVGGSLWPKELFDEQKpFLEIToAVAQMENWMVWVNDLMSFYKEFDD.RDQsSLVKNaVTCDEITLDQALEKLTsDTLHSScQMl-VFuDKDPclM-TIECFMHGYlTWHLCDaRYRL+EIYE+sccQETcDAtKFRKFF-QAAcVGAV-sEEWAaPsVAaclEVRAspcu-VK-uQ.pAlLoSlEss ....................................................................................................................................................................as+lo+.EhhsslSIahTYsllLDDS...p-DP..h..ssMhsYa.sDLpuGc.pQ.t.H.P.WW.tLVNEHFP.NVLRHFGPFCSLNLIRSTlDFFpGC........W....I..E.........Q...a....N...F..tG..a...P...G.......Sp...D..YPpFLRRhsGLGcshusslWPKc.FsEp...ph....FlE......h.s......sAls.phpsahsalNDlhSFYKE...s.tp-phshl.pshshspthoh.puLc.cl.sp-slpu...s.c.phhtl..h...s.ptss.phhtshptahpGahhhHh..t.RY+h.-h.......................................................................................................... 0 17 28 48 +6158 PF06331 Tbf5 REX1; Transcription factor TFIIH complex subunit Tfb5 Studholme DJ, Wood V anon SWISS-PROT Family This family is a component of the general transcription and DNA repair factor IIH. TFB5 has been shown to be required for efficient recruitment of TFIIH to a promoter [3]. 20.30 20.30 20.80 30.30 20.00 19.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.13 0.72 -4.20 30 254 2009-01-15 18:05:59 2003-06-05 18:43:11 7 1 225 6 186 224 3 67.20 35 85.84 CHANGED MspAh+G.lLlcCDPulKplllplDs.pphs....aIlE-LD.-THLllp...sshlphlKpclschhspspass.p ..............MspshK..G.VLlpC.DPuhKphlLplDpppshs..........FIlc..DL..D.-sHlhlp...sphlphLcp+lschh-pssa....t.............. 0 57 94 151 +6160 PF06333 Med13_C TRAP240; TRAP_240kDa; Med13; Mediator complex subunit 13 C-terminal Studholme DJ, Wood V anon [1] Family Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function. Med13 is part of the ancillary kinase module, together with Med12, CDK8 and CycC, which in yeast is implicated in transcriptional repression, though most of this activity is likely attributable to the CDK8 kinase. The large Med12 and Med13 proteins are required for specific developmental processes in Drosophila, zebrafish, and Caenorhabditis elegans but their biochemical functions are not understood [4]. 19.10 19.10 19.80 19.10 18.60 18.60 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.60 0.70 -5.94 23 356 2009-01-15 18:05:59 2003-06-06 10:13:06 7 8 224 0 227 344 0 402.80 26 25.44 CHANGED Vlallsshspss..........uhlshsttFhsL.....pthpspsph.ts-lhLpIlPhchlhss....pphhl.s.schtsLuhslYspC....Ps.tt.tph......sss.hshlscshPcpIsF+lhss..........................pssshpcsshLHlAYs+ShDpcWlsAAWSDspGphppTpoasls.pspshpp........shpplps-IWphohsL...........hspstu+hplllsRlss.lsc-ELtpWppLssp.t.......................spplsLsllsVssssclh...........................................sss.s...s......htspspooPssssts...........shhossphtsusss.tss...................................s.................tss.t.ts-thlh....Dhsp-saullhspsLspssslpch..AltSGYLl+ps...................sss..cshhphcVNllpss..........psshcp..............lL+plLppYRsLusLspshGlpctpt..........................uhlPWHlsslt+ ...............................................................................llahl.s.s..s.................t..p..shhhhtLhpta.phh.p.t.h.....t..tsph.lQllPhp.hlhp.....pp.hh...pphtplAhpsaspC....................s.t.t...pshh............hsh.hhh....hs...ph.Pp.l.......plhss........................................sps.p.c...ssLaluYs....h..S.hDp.RWlhAuhTD.hGchhcTthhs.lshs.sp............ttpp.....................shp...shpclWc.hshsl............hphs..sh.WRlVIsRl.........G.....t.lscsElc..sWshLlspts...........................................................................................ss.loh...slls.h-spsshhh...........................................................................................p.ss.s...hst...............ssthsTPtss.s.ps..............hshsoss.h.tsssss..s.............................................................................................................................................s.ts.......hs..s..s....t....h..........t..tp....ps...t..hh...p...ct...pp.............t..lhp.....sLu.GYhlpps...........................................................................................ssh..cs.h..thp..ls....hpss.................t.h.p..................................lL+.lLptYpsLuhLshs...s.ht.ctp............................ss.lPhHh.sh.................................................................................................................................................................................................................................................... 2 55 103 176 +6161 PF06334 Orthopox_A47 Orthopoxvirus A47 protein Moxon SJ anon Pfam-B_13263 (release 9.0) Family This family consists of several Orthopoxvirus A47 proteins. The function of this family is unknown. 25.00 25.00 81.80 81.70 19.90 24.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.80 0.70 -5.27 2 47 2009-01-15 18:05:59 2003-06-06 10:35:14 6 1 18 0 0 40 1 220.20 90 98.68 CHANGED MGNKNIKPSKENRLSIL.KD+MDSFKRGS.....FREKS+ATIQRFSSLRREHlKVDHPDKFLELKRtIYtIIQKSSSIDVDKRTKLMSNIKTMMINPFMIEGLMTSLEsLDPDNKMSYSSVMILGEFDIINISDNcAAFEFINSLLKSLLL.........EYSISNDLLYsHINALEYIIKNTFNVPERQLILRsQYLTPIFSDLLKYAGLTIKSNILMWNKpFIKPVSDLYTShpLLHCVT. ..........................................MGNKNIKPSKENRLSIL.KD+MDSFKRGSWATSSFREKS+ATIQRFSSLRR...EHlKVDHPDKFLELKREIYtIIQKSSSIDVDKRTKLMSNIKTMMINPFMIEGLMTSLEsLDPDNKMSYSSVMILG.EFDIINISDNEAAFEFINSLLKSLLLLNTpQLKLLEYSISNDLLYsHINALEYIIKNTFNVPERQLILRGQYLTPIFSDLLKYAGLTI.KSNILMWNKpFIKPVSDLYTShRLLHCVTV.... 0 0 0 0 +6162 PF06335 DUF1054 Protein of unknown function (DUF1054) Moxon SJ anon Pfam-B_13269 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.10 21.10 21.40 22.20 20.80 20.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.27 0.71 -4.64 22 540 2009-01-15 18:05:59 2003-06-06 10:37:23 7 2 532 2 56 275 3 191.60 48 95.26 CHANGED FopcDFcsFplcGL-sRMpslpppIcPKhptlGpphushLos.sG.cEhasHlAKHARRoVNPPsDTWlAFussKRGYKhhPHFQlGLW-s+lFlWhAlIaEs...pKtphupthhcphshlpp.lPscahlShDHscscsh.hsphs...LpphhcRhpsVKKuEhLlG+pls+c-slh.hss-chhphlcpsacpLLPlY .....FpsKDFcsFsV-GLDtRMpAlpp.I+PpLcpLGEhFusahospT.G.-sFasHVAKHARRoVNPPcDTWVAFussKRGYKMLPHFQIGlaccplFlhFulhaEsc..sKsphA+s.F.cch.pt.lpp....LPcDahl...SlDHhKPsthhl+.-hspcpLpcslpRhtsVKKuEFhlu+tIsPp-tph.posctFlAhlEpTacphLPhY........................................................................... 0 21 35 48 +6163 PF06336 Corona_5a Coronavirus 5a protein Moxon SJ anon Pfam-B_13277 (release 9.0) Family This family consists of several Coronavirus 5a proteins. The function of this family is unknown [1]. 25.00 25.00 28.10 28.10 21.60 20.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.14 0.72 -4.10 3 85 2009-01-15 18:05:59 2003-06-06 10:42:25 6 1 12 0 0 84 0 63.90 84 99.93 CHANGED MKWLTSFGRAVISCYKuLLLTQLRVLDRLILDHGP+RoLTCARRVLLVQLDLVYRLAYTPTQSLV .MKWLTSFGRAhISCYKuLLLTQLRVLDRLIL-HGP+RsLTCuRRVLLhQLDLVYRLAYTPTQSLV. 0 0 0 0 +6164 PF06337 DUSP DUF1055; DUSP domain Yeats C, Bateman A anon ADDA_8548 Domain The DUSP (domain present in ubiquitin-specific protease) domain is found at the N-terminus of Ubiquitin-specific proteases. The structure of this domain has been solved [1]. Its tripod-like structure consists of a 3-fold alpha-helical bundle supporting a triple-stranded anti-parallel beta-sheet [1]. 29.10 29.10 29.20 29.10 28.60 29.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.54 0.72 -3.60 139 806 2009-09-10 16:33:45 2003-06-06 10:51:54 7 46 214 12 544 743 3 98.80 29 10.54 CHANGED spthYllst.p.........Whppappalpts...ptt.....................................................................................................................s...........sI...s.....Nsslhpspt............................plc....pltpstD.ahll.........sp.psWphlhph.Y.u...uustlhppsht ..............................................................................................t.hYllst...p.........Wacp.Wcpa.V...thp..s.s.................................................................................................................................................................PG...........PI...D.....NosLhpsp.s.s...............................pL+.t....pLhpstD.Yhll............PpcsWphLhsh.Y.G.....Gs.slsR.h..p.................................... 0 219 288 414 +6165 PF06338 ComK ComK protein Moxon SJ anon Pfam-B_13324 (release 9.0) Family This family consists of several bacterial ComK proteins. The ComK protein of Bacillus subtilis positively regulates the transcription of several late competence genes as well as comK itself. It has been found that ClpX plays an important role in the regulation of ComK at the post-transcriptional level [1]. 19.10 19.10 19.10 19.30 18.70 18.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.78 0.71 -4.82 19 719 2009-09-11 05:13:32 2003-06-06 10:58:32 6 2 443 0 58 336 0 150.30 29 87.47 CHANGED M......ppph..c...........YhIsptTMslhPhhpstpph..ocllEh-ssahs..h+PhcIIc+SC.+aaGSSYtGRKcuTpclhGlopKsPIhlsPppslYaFPTpSsppt-ChWluapaIcphcphctpcs.hlhFtNGpolpLslShpShcsQhtRouhL+hchpcchctptp .......................................................................p.h.lptts.MhlhPhh..st.th..ocllchcpshh...hps.clIc+SC.+haGsoa...G.++ttopclhs...I..spK.sPIhl..sPh......h.hhFPTc...S...p.....p.sps...lWlshpa.Icshc...t...l..cs..ppp....hlpF.N.s..po.lplcl..ShpslppQhpcohhlhh.hp.p.p...st....................................... 0 15 32 49 +6166 PF06339 Ectoine_synth Ectoine synthase Moxon SJ anon Pfam-B_14542 (release 9.0) Family This family consists of several bacterial ectoine synthase proteins. The ectABC genes encode the diaminobutyric acid acetyltransferase (EctA), the diaminobutyric acid aminotransferase (EctB), and the ectoine synthase (EctC). Together these proteins constitute the ectoine biosynthetic pathway [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.50 0.71 -4.21 60 443 2012-10-10 13:59:34 2003-06-06 11:03:08 7 3 411 0 130 434 87 124.80 50 95.48 CHANGED MIVRsL.p-ltsT-Rc..Vpup..sWpSpRlLLccDsMGFSFH.ThIhAGoEpchaYpNHlEuVYClpGcGclEslssGcsatIcPGshYsLscHD+HhLRApp..-h+hsCVFNPPlTGpEVHDEsGsYsh.s ..............................MIVRsl.c-hppT..-Rc..Vsup.......sWcSpRhlLtcD.sh..GFSFH.TsIaAGT-oc....lH.Y.pN.H.l...E.uVYClpGEGElE..s...l..........t.....s....G....c..sasIp.P.GolYlLDpHDcHhLR..utp..-hphsCVF..NPPlTGpElH.DpsGsYsl.t.............. 0 41 87 113 +6167 PF06340 TcpF Vibrio cholerae toxin co-regulated pilus biosynthesis protein F Moxon SJ anon Pfam-B_13058 (release 9.0) Family This family consists of several Vibrio cholerae toxin co-regulated pilus biosynthesis protein F (TcpF) sequences. TcpF is known to be a secreted virulence protein but its exact function is unknown [1]. 25.00 25.00 432.60 432.40 18.40 18.20 hmmbuild --amino -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.97 0.70 -5.31 5 44 2009-01-15 18:05:59 2003-06-06 11:25:21 6 1 39 2 1 18 0 316.70 87 93.87 CHANGED FNDNYSSTSTVYATSsEA.......TDSRGSE..HLRYPYLECIKLGMSRDaL-N.CVTVSFPTsS-hFYGAYPucEE...GKpRTKEDFQARLLSGDYsuLEKLYlDaYLAQTTYDWcIPTRDQIEoLVNLAK-GKLoosLNsEYlcGRFLTKsD.GcYsIVsVGsV.ADsoPVKLPAlVSKRGLMGToslVNAlPsEIaPNIK.....lYpsTlSpLcPGssFcAshEYDssELpKHGYSs....lsDVpucVLVGVPuuc.GVIYsPVYpEos+sYahSsNhPGKclYslSTNDlHNGapWSssMFSNuKY+TQlVLTKuDGSGV+LYSKAYSEpF FNDNYSSTSTVYATSNEA.......TDSRGSE..HLRYPYLECIKIGMSRDYLEN.CVKVSFPTSQDMFYDAYPSTESD.GAKTRTKEDFSARLLAGDYDSLQKLYIDFYLAQTTFDWEIPTRDQIETLVNYANEGKLSTALNQEYITGRFLTKEN.GRYDIVNVGGV.PDNTPVKLPAIVSKRGLMGTTSVVNAIPNEIYPHIK.....VYEGTLSRLKPGGAMIAVLEYDVSELSKHGYTN....LWDVQFKVLVGVPHAETGVIYDPVYEETVKPYQPSGNLTGKKLYNVSTNDMHNGYKWSNTMFSNSNYKTQILLTKGDGSGVKLYSKAYSENF 0 1 1 1 +6168 PF06341 DUF1056 Protein of unknown function (DUF1056) Moxon SJ anon Pfam-B_13260 (release 9.0) Family This family consists of several putative head-tail joining bacteriophage proteins. 21.80 21.80 23.20 23.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.07 0.72 -3.94 6 36 2009-09-11 05:07:47 2003-06-06 11:28:42 6 1 35 0 6 37 0 63.10 37 99.65 CHANGED M.......IFKphFshIWclFDVLhFILAhIslslTsFhhshshGsIololshhLhGhlSElloc..pKGGD ............M.IhKphFphIWp.hFD.lLhFILuhIslslssFhhshshGslululshlLsGhlSEhlsp...Kt......... 1 2 3 5 +6169 PF06342 DUF1057 Alpha/beta hydrolase of unknown function (DUF1057) Moxon SJ, Bateman A anon Pfam-B_13294 (release 9.0) Family This family consists of several Caenorhabditis elegans specific proteins of unknown function. Members of this family have an alpha/beta hydrolase fold. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.87 0.70 -5.60 6 46 2012-10-03 11:45:05 2003-06-06 11:38:02 7 2 10 0 44 763 581 261.50 31 89.71 CHANGED EPKhhpKLVKFpscpt+hV-lpAVYEDohsSGSshGTVVuFHGSPGSHNDFKYlRsph-chsIRFIGlNYPGFc.TsuY.sQpasNhERpsYocALL-cL-lc..GKlIhhGHSRGCENALQTAssh......ssHGlVMlNPsGhRhHKGIpPh.RhpolsalaphLPp..hlusuh......hathhculGhKVpcGEEAsuAhRuhhshuLEcQlt.I-KLpcps.sKphIsFGGKDaLlEEEIVhEsLc+apGLsHFsh.ccpIoEc-htKIhcSFsssQpGsSVFlAcDsHFQNKopA-LlAEss+uhFD ....................................................................................................hph......tp.hplp.As.YpDs..hs...s..G..os..h....G...T...V.l.u.h.HG.o.P...G.S..H.p.D.F.K.Y..lp...s..h..L....-.p..h...s..lR.hIs...lNaPGF....t......o.s...............s.h....p................p..p...a.....s..N....ER...p..s.a.s.....p...u...l....lp.pL.s..lp.....s+.l..lhlGHSRGsE.NA...Lph..u..sth...................s..h..G...hs....hl.N...s...sG....h+...H..+..u...Ip.P..h.....+.h..t.h.......ls.h...lhp...h..l.p....hhhsth......................hht.hh.chl..Gh..+...l..p...s..G...c.....A......hsuh+s..h.ph..shcpp.h....l..cchpcps..h...........+hhlsauupDaLlEccI..Ehht..ta..p.shpHa..h.ppp..s-...c.p...h....c...Ihpsa.st.s.pptsol.htp-sHa.pKppAchlucsstthh.......................................................................................................................................................................... 0 21 26 44 +6171 PF06344 Parecho_VpG VPG_P3B; Parechovirus Genome-linked protein Yeats C anon ADDA_6741 Domain This family is of the Parechovirus genome-linked protein Vpg type P3B. 25.00 25.00 37.30 36.00 17.80 16.50 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.28 0.72 -6.56 0.72 -3.98 4 49 2009-01-15 18:05:59 2003-06-06 12:02:01 6 2 11 0 0 50 0 20.00 88 0.92 CHANGED RAYNPTLPVAKPKGTFPVSQ RAYNPTLPVsKPKGTFPVoQ 0 0 0 0 +6172 PF06345 Drf_DAD DRF Autoregulatory Domain Yeats C anon ADDA_2536 Motif This motif is found in Diaphanous-related formins. It binds the N-terminal GTPase-binding domain; this link is broken when GTP-bound Rho binds to the GBD and activates the protein. The addition of DAD to mammalian cells induces actin filament formation, stabilises microtubules, and activates serum-response mediated transcription ([1]). 22.10 22.10 23.30 22.70 19.80 19.10 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.16 0.73 -6.14 0.73 -3.85 3 216 2009-01-15 18:05:59 2003-06-06 13:25:51 6 12 65 11 89 181 0 15.00 91 1.47 CHANGED GVMDSLLEALQSGAA GVMDSLLEALQSGAA 0 13 19 50 +6173 PF06346 Drf_FH1 Formin Homology Region 1 Yeats C anon ADDA_2536 Family This region is found in some of the Diaphanous related formins (Drfs) ([1]). It consists of low complexity repeats of around 12 residues. 40.00 40.00 40.50 40.40 39.90 38.60 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -12.57 0.71 -12.86 0.71 -4.72 4 113 2009-01-15 18:05:59 2003-06-06 13:37:54 7 15 26 0 46 81 1 139.50 65 14.81 CHANGED PPPPPLP..uGssIPPsP...................PLPsGssIPPPPPLPGGssIPPPPPLPGsAu.IPPPPPLP.GssuIPPPPPLP.GusuIPPPPPLP.GussI.PPPPPLPGGuGI.PPPPPLPGtsul.PPPPPLPsGsGl.PPPPPhPG..AsGIPPPP..PGhGhPPPPP.FGhGVPssPsLP ............................................PP.sPPL.s..GssIP.sP...PPLP...GsuI.PP.PPP....LPG.su.IPP.PPPLP.Gs..uIPPPPPLP.G..s.uIPP.PPPLP.G.s.uI.PP.PPPLPGus..GI...P...P.P....PPLPGts.Gl..PPP....PPLPsGsGl...P.P.P..PPhPG..usG.IPPPP...PshGhPPP.P....t.......................................... 0 6 6 10 +6174 PF06347 SH3_4 Bacterial SH3 domain Moxon SJ, Bateman A anon Pfam-B_13248 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. These are composed of SH3-like domains. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.55 0.72 -4.42 29 1011 2012-10-02 18:48:24 2003-06-06 13:57:49 8 42 560 0 333 3517 2117 55.40 27 37.89 CHANGED cssplslRpuPsssupVs.shlcsGlssplt.pspspWp+lp.ssGtpGWltpstLhG .............................sstlslRsuPu.tsutlh..sh....l..c.t.G.h..s.l.p.l......t....p.....p.....p.....s.......s.....W....pcl.....c...s.....s....G.....h......p.....GWltpshL.G.................. 1 110 232 271 +6175 PF06348 DUF1059 Protein of unknown function (DUF1059) Moxon SJ anon Pfam-B_13303 (release 9.0) Family This family consists of several short hypothetical archaeal proteins of unknown function. 20.50 20.50 20.80 20.50 20.20 20.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.71 0.72 -4.15 17 231 2009-01-15 18:05:59 2003-06-06 14:01:29 6 2 177 0 99 216 25 56.20 26 83.59 CHANGED hK.pFpCuslGhsCuachp.AsoE-EllctlttHA+hsHGhsclPp-hlscl+psI+cs ..................h.Ctt..h..s..hs..Cshtlp.Aco-sEllctsspHAtssH.s..hs..s.s.-hhppl+t.h....................... 0 21 46 78 +6177 PF06350 HSL_N HSL; Hormone-sensitive lipase (HSL) N-terminus Moxon SJ anon Pfam-B_13329 (release 9.0) Family This family consists of several mammalian hormone-sensitive lipase (HSL) proteins (EC:3.1.1.-). Hormone-sensitive lipase, a key enzyme in fatty acid mobilisation, overall energy homeostasis, and possibly steroidogenesis, is acutely controlled through reversible phosphorylation by catecholamines and insulin [1]. 25.00 25.00 25.10 29.80 17.70 24.40 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.84 0.70 -5.34 6 166 2009-01-15 18:05:59 2003-06-06 14:21:03 7 6 104 0 86 151 0 251.30 43 37.71 CHANGED hDpptlFsoLhslCc-NhuaFu.....uspsEsupRLpGsasulp-HhcsscPLVppltohhHcaDFDppTPGNGYRShlaVVcsCltHhl+huRYlsuNRsoIFFRpua.ltElEAYsphLspLpshLhasQpLhphs.ssGcLF.tsEu+ssEchlpchsTlpphCFYGRCLGFQFssSIRPhLphluIGMsuFuEsYhppp..usIuhAsSShaTSGKYhlDPELRupch.pIoQNhcVcFhKuFW.lsEoElhpoLsShsuopVKVNRllolPPEslpLPhspsscho................VsIssPpAHhGsG.PVpsRLIStshRcG..u .................................................................hhtsL..htlsp-NhsaFs...........ts.schupRl.sshhtl.p-phhtlcshltplh.thsp...haDh....D.pT..PuNGYRSll.psspsClhHhlp.hs.+hlts.sRpolFFR..t.sa.htElEAY.hthLsp.Lp.........shh....hs..ppLh.ths....psGt...L..F....s-p..........thstphlpchsolcpsCFYGRCLGF..Q.Fssul+shLphluluhsoauEp..Ytppp....suluhsspSlhooG+ahlsPELRutch.cIhQshclcFhKuFWslsE....plh..........pplsshsusslpVschlplPsp.shphPhts....s.phs.......................VsIssP..AHhGsu.PV.hRLlShchRpG..u............................................. 0 32 39 65 +6178 PF06351 Allene_ox_cyc Allene oxide cyclase Moxon SJ anon Pfam-B_13374 (release 9.0) Family This family consists of several plant specific allene oxide cyclase proteins (EC:5.3.99.6). The allene oxide cyclase (AOC)-catalysed step in jasmonate (JA) biosynthesis is important in the wound response of tomato [1]. 21.60 21.60 22.20 22.20 18.90 21.00 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.04 0.71 -4.82 10 96 2009-09-12 22:21:42 2003-06-06 14:28:10 6 2 48 19 35 100 1 168.90 62 71.60 CHANGED -su+PuKVQELpVYEINERDRGSPAYLRLSpKsV.......NSLGDLVPFSNKLYoGsLcKRlGITAGIClLIQHpsEKKGDRYEAIYSFYFGDYGHISVQGsYLTY.EDTYLAVTGGSGIFEGsYGQVKLpQlVFPFKLFYTFYLKGIp.DLPpELLussVsPSPoVEPoPAAKACEPHAVlsNF ..............................................................................p.upPsKVQELpVYEINERDRsSPAYL+.L.Sp.Kps...........NuLGDLVPFoNK............LYoGsL...pKRlGITAGlClLIQHhPEK...........pG...........DRYE.AIYSFYFGDYGHISVQGsYLTY..EDo..YL..AVTGGSGIFEGsYGQVKLpQlVFPFKLFYTFYLKGIs.DLPtELlspsV.PSPsVEPsPtAKAsEPpAslsNF................... 0 10 25 30 +6180 PF06353 DUF1062 Protein of unknown function (DUF1062) Moxon SJ anon Pfam-B_13377 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.80 21.80 22.00 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -3.97 15 241 2009-01-15 18:05:59 2003-06-09 11:21:01 7 1 234 0 34 162 2 140.90 45 72.15 CHANGED lNAp+KhLDlWLIYKCspCDpTWNhslFpRhslccIsPtLL-uLpsNDtshlctaAashtsL+RNstcls.t.s-acltcchhsssssphs...lclplphshPhsl+LspLLtppLtLSRoclcpLh-pGhIpssstt...sctL+pc .....................VNuQKKlLDVW.IYKCo+CDhTWNloLFSRlsVucIs+-LasRLhuNDsuslphaAaDsulLKRNsAELu.upPD..F+Ip-R.hll.ul.s.uapp......lsVsVRlupsFpVpLLSlLK+QLhLSsAElKRhlEsGpIoGlohK..hu+KL+..s....................... 1 14 20 25 +6182 PF06355 Aegerolysin Aegerolysin Moxon SJ, Bateman A, Macek P anon Pfam-B_13415 (release 9.0) Family This family consists of several bacterial and eukaryotic Aegerolysin-like proteins. It has been found that aegerolysin and ostreolysin are expressed during formation of primordia and fruiting bodies. It has been suggested that these haemolysins play an important role in initial phase of fungal fruiting. The bacterial members of this family are expressed during sporulation [1]. Ostreolysin was found cytolytic to various erythrocytes and tumour cells [2]. It forms transmembrane pores 4 nm in diameter. The activity is inhibited by total membrane lipids, and modulated by lysophosphatides. The potential use of aegerolysins is reviewed [8] with special emphasis on their properties which would allow thier use in therapeutics. 20.60 20.60 21.10 21.80 20.10 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.63 0.71 -4.28 18 123 2009-01-15 18:05:59 2003-06-09 11:40:34 8 1 67 0 67 114 0 120.00 26 84.40 CHANGED uQWVplcIhsphsptslpI+NspLshG+aacsss+cpploss-lsthhIts.ssptplsusGRpss.oGTEGshDlh...Du..-p+IsplaWDsPauu.+sNpaphpcpsscYtlEhushs.psuGslGsVslcl....s+p .......................................phlt..hplhs.phs.tt.sltlcNspLph.....GKaa...p.ss.......cspclossplsthshts.ssph...plsusGRpsssoGTpGshDlh........ss..s..p+l..spl..hWDs.Pasu..sNpaph...ps...ts.....sp...Ytlthsuhs..p.sshGplslpl...t........................ 0 11 37 56 +6183 PF06356 DUF1064 Protein of unknown function (DUF1064) Moxon SJ anon Pfam-B_13437 (release 9.0) Family This family consists of several phage and bacterial proteins of unknown function. 24.60 24.60 25.00 24.80 24.10 24.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.66 0.71 -4.10 5 417 2012-10-11 20:44:44 2003-06-09 11:42:11 6 1 361 0 39 303 62 112.80 39 85.21 CHANGED +SKYNAKKVEaDGIVFDSKsEspYYQpLcsshsusph-+l-lQPKFELhsK.FRKsGpl.RsIcYlADFslRp-u.cllEVIDVKGMlT...c-a+IKAKLFch+Yhp......hKs.KhpGpsahps ..................................hsKYsuKK.lp.h.cGIsFDSKsEscYYph.Lcp....p..pss....p.h..s...cl-lQP+FEL.s............p....h...p...........p.p....................R...sIpYlADFslap.ps...t............hl....c.VlDVKG...hhT.....cshplKtKlFchp....Y.p......h.h.h..p........................................ 0 13 26 29 +6184 PF06357 Omega-toxin Omega-atracotox; Omega-atracotoxin Moxon SJ anon Pfam-B_14633 (release 9.0) Family This family consists of several Hadronyche versuta (Blue mountains funnel-web spider) specific omega-atracotoxin proteins. Omega-Atracotoxin-Hv1a is an insect-specific neurotoxin whose phylogenetic specificity derives from its ability to antagonise insect, but not vertebrate, voltage-gated calcium channels. Two spatially proximal residues, Asn(27) and Arg(35), form a contiguous molecular surface that is essential for toxin activity. It has been proposed that this surface of the beta-hairpin is a key site for interaction of the toxin with insect calcium channels [1]. 21.70 21.70 23.10 22.20 20.80 20.50 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.12 0.72 -4.37 2 17 2012-10-01 22:06:18 2003-06-09 11:58:20 6 1 5 2 0 20 0 37.10 79 71.75 CHANGED SssCIPSGQPCPYsc.CCStSCTaKpNENGNoVpRCD ..SsTCIPSGQPC.PYNENCCSpSCTaKp.NENGNTVKRCD. 0 0 0 0 +6185 PF06358 DUF1065 Protein of unknown function (DUF1065) Moxon SJ anon Pfam-B_14830 (release 9.0) Family This family consists of several Benyvirus proteins of unknown function. 25.00 25.00 50.00 49.90 20.30 18.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.58 0.72 -4.33 2 32 2009-01-15 18:05:59 2003-06-09 12:33:53 6 1 5 0 0 18 0 99.80 96 94.41 CHANGED MVLVVKVDhSsIVLYIVAGsVVVShLYSPFFSN-VKAuuYAGAlF.suGCIMDRNSFAQFGuCDIPKaVA-SIo+VAhKEhDADIhschssVss+hVsLhEslh.lh.RlF MVLVVKVDLSNIVLYIVAGCVVVSMLYSPFFSNDVKASSYAGAVFKGSGCIMDRNSFAQFGSCDIPKHVAESITKVATKEHDADIMVKRG-VTVRVVTLTETIFIILSRLF 0 0 0 0 +6187 PF06360 E_raikovi_mat Euplotes raikovi mating pheromone Moxon SJ anon Pfam-B_14708 (release 9.0) Family This family consists of several Euplotes raikovi mating pheromone proteins. Diffusible polypeptide pheromones, which distinguish otherwise morphologically identical vegetative cell types from one another, are produced by some species of ciliates. In the marine sand-dwelling protozoan ciliate Euplotes raikovi, pheromone molecules promote the vegetative reproduction (mitogenic proliferation or growth) of the same cells from which they originate. As, understandably, such autocrine pheromone activity is primary to that of targeting and inducing a foreign cell to mate (paracrine functions), this finding provides an example of how the original function of a molecule can be obscured during evolution by the acquisition of a new one [1]. 25.00 25.00 51.10 50.70 17.60 17.20 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.55 0.72 -3.74 6 6 2009-01-15 18:05:59 2003-06-09 13:02:30 6 1 1 6 0 12 0 35.30 37 62.72 CHANGED DhC-pAsAQCshThCp.hCt...sp.hC.lsVhss..Cs DhC-pAsAQCshThCp.hCt...sp.hC.lsVhss..Cs 0 0 0 0 +6188 PF06361 RTBV_P12 Rice tungro bacilliform virus P12 protein Moxon SJ anon Pfam-B_14960 (release 9.0) Family This family consists of several Rice tungro bacilliform virus P12 proteins. The function of this family is unknown [1]. 25.00 25.00 25.50 67.40 23.60 18.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.42 0.72 -3.82 3 11 2009-01-15 18:05:59 2003-06-09 13:05:48 6 1 2 0 0 11 0 100.10 90 100.00 CHANGED MSADYPTFKEALEKFKNLESDTAAKDKFNWVFTLENIKTTADVNLASKGLVQLYALQEIDKKINNLTAQVSKLPTTSGSSS.AGAIVPAGSNTQGQYKAPPKKGIKRKYPA MSADYPTFKEALEKFKNLESDTAuKDKFNWVFTLENIKTTADVNLASKGLVQLYALQEIDKKINNLTAQVSKLPTTSGSSS.AGAIVPAGSNTQGQYKAPPKKGIKRKYPA 0 0 0 0 +6189 PF06362 DUF1067 Protein of unknown function (DUF1067) Moxon SJ anon Pfam-B_15074 (release 9.0) Family This family consists of several hypothetical Mycobacterium leprae specific proteins. The function of this family is unknown. 25.00 25.00 25.40 213.20 23.90 22.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.32 0.72 -3.43 2 7 2009-01-15 18:05:59 2003-06-09 13:07:25 6 1 1 0 0 7 0 97.00 97 76.46 CHANGED MTTPTPQGHDMHTKTPLPRGANNYPHTHACIDIAFSTAQVPSPWHHQHVDQAASTTDMLTCAALIVSTAAKHTKPHRKQAVSHPsTKTPQHSppR.p MTTPTPQGHDMHTKTPLPRGANNYPHTHACIDIAFSTAQVPSPWHHQHVDQAAsTTDMLTCAALIVSTAAKHTKPHRKQAVSHPPTKTPQHSKTRQQ 0 0 0 0 +6190 PF06363 Picorna_P3A Picornaviridae P3A protein Yeats C anon ADDA_6741 Family This family consists of the P3A protein of picornaviridae. P3A has been identified as a genome-linked protein (VPg) and is involved in replication ([1]). 21.10 21.10 21.10 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.36 0.72 -4.01 4 57 2009-01-15 18:05:59 2003-06-09 14:59:20 6 4 18 0 0 59 0 99.70 76 4.63 CHANGED phhs.lcE.hphphspLIEthEshhtPpsSsFtCFAsph.s.K.+.pAscKVhsWsps+hpphhsFV.RNKuWhTlhSsloShlSILhLVhhhaKKEcpc-E ..........DAIPYIDEYLNIEMSTLIEQMEAFIEPRPSVFKCFAoKl.usps+.KAuKEVV-WFSsKIKSMLSFVERNKAWLTVVSAVTSAISILLLVTKIFKKE-SKDE......... 0 0 0 0 +6191 PF06364 DUF1068 Protein of unknown function (DUF1068) Moxon SJ anon Pfam-B_14602 (release 9.0) Family This family consists of several hypothetical plant proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 25.00 25.00 35.60 33.60 21.80 21.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.29 0.71 -4.66 3 84 2009-01-15 18:05:59 2003-06-09 15:51:09 7 5 22 0 54 84 0 152.60 49 70.17 CHANGED MA+HTAALKlGLALLGLSMAGYILGPPLYWHLTEALAtVSAoSCPACsCDCSohPLLTIPptLSNuSFsDCAK+DPEV.....NEDTEKNYAELLTEELKQREAEShEKHKRADsGLLEAKKlTSSYQKEADKCNSGMETCEEAREKAEcALVEQKKLTShWEERARQKGWK-GuTKPsVK ..............................hhlhuls.shhlhGPsLYW+............h..........tp......sh...s........................t.........ts.......s.....CssC.....CDC.....s.s..sl.hpls........ph..pDCu+pDP-l.....scEhEKsas-LLsEELKLpEt.ApEppc+ss.hsLlEAK+lASQYQKEA-KCNuGhETCEEARE+AEusLscp+KLTuLWEpRARQhGWccs.......t......... 0 6 37 46 +6192 PF06365 CD34_antigen CD34/Podocalyxin family Moxon SJ anon Pfam-B_14609 (release 9.0) & Pfam-B_17463 (release 8.0) Family This family consists of several mammalian CD34 antigen proteins. The CD34 antigen is a human leukocyte membrane protein expressed specifically by lymphohematopoietic progenitor cells. CD34 is a phosphoprotein. Activation of protein kinase C (PKC) has been found to enhance CD34 phosphorylation [1]. This family contains several eukaryotic podocalyxin proteins. Podocalyxin is a major membrane protein of the glomerular epithelium and is thought to be involved in maintenance of the architecture of the foot processes and filtration slits characteristic of this unique epithelium by virtue of its high negative charge. Podocalyxin functions as an anti-adhesin that maintains an open filtration pathway between neighbouring foot processes in the glomerular epithelium by charge repulsion [2]. 24.40 24.40 24.60 24.70 24.30 24.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.41 0.71 -4.67 8 185 2009-01-15 18:05:59 2003-06-09 16:06:09 7 2 43 0 85 157 0 179.50 35 45.08 CHANGED lLsLsEsosCEcatpsscEpLlc.llCcshpushs..tstColpLApspssppsllhslsscsplssp...hphL+c+ps-LcchGIp-hshpcpusccshpc+..hs.hlIslVshuu.lLLlhslhshYssapRcS.+hspQRLsEELphVENGhHDNPTL-Vh.EsuSEMQEK+.ssLNGEhs-....SWhsP...hss.sKcDl.-....EEDTHL ................................................................hLphscs..C..t.hppp.pstp.Lhp.llCpt.ttphss...tts.Cpl.Lupsp.p.ppllh...slsscptlssp....hphLccp.ppcL.c.clGIpsh..shts.sstpshpp+......hlIsLVs.G..hLLslhhhshYhhh.pRRS.p.stpRL...sE-h.h.sE.NG.psssslpsh..tspsEhQcK..sslNtth.t-.........p...hhs......sp.h+pch.......tDTcL................ 0 3 7 29 +6193 PF06366 FlhE Flagellar protein FlhE Moxon SJ anon Pfam-B_14631 (release 9.0) Family This family consists of several Enterobacterial FlhE flagellar proteins. The exact function of this family is unknown [1]. 25.00 25.00 31.20 30.90 24.80 23.00 hmmbuild --amino -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.23 0.72 -4.18 13 533 2009-01-15 18:05:59 2003-06-09 16:11:06 8 1 520 0 49 181 1 97.60 72 81.20 CHANGED GssLspRG.sssSsPLpssssls..sGhlToVsWRYplsussPsGLpl+LCsto..RCVtLsG.usGpTcuFsG.sAspshpFlaclsGpGpL.PsLpVpSNQVlVNY...R ...............GlTLsaRG.ShSSuPLosp.pPs.......u.G.lMTLVAWRYQLhG.P...TPuGLRVRLCSQS..RCVEL-G.QSGTTsAFuGlsAsEPLRFlWEVPGGGR.L.IPsLKVppNpVIVNYR.......... 0 3 15 29 +6194 PF06367 Drf_FH3 Diaphanous FH3 Domain Yeats C anon ADDA_2536 Domain This region is found in the Formin-like and and diaphanous proteins [1,2]. 20.00 20.00 20.10 20.00 19.90 19.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.06 0.71 -4.91 37 1087 2012-10-11 20:01:00 2003-06-09 16:19:41 11 51 230 17 649 1003 4 187.70 26 17.15 CHANGED suappllcAhhshtthtt..psRF.psllsshcp...................pspplchpsusMthINtllsss...p.-.........lphRlHLRsEhhthGLpcllschcph....cs-pLppplphFcppctcDhp-lh..........p+hc........shpl-hccssplhchl...hpplpcopupshh.lSlLQHlLLh.......psctttt.phhcll-phlsplslppph.t.........ppctslphslppL.......lc ..........................................uachlLpAhsphtphpt..ppRF..psllpthcp........................................................ppsslphtlAsMphINsllpus......-..-..............lpaRlHLR.EF....hphGL.....p....p.hLp..c.L.+ph................cs-p.Lp....hQlp.hap-p.tttD.tpLh..............................................cchc.........................shph...s.h.....c.......ps..p...p...l...hphl...........hpp.l.pc.o.ps.pshh...lShLp+hLhh...............pc...ttt.phhpl.l-phlpplsh.pph.p...........p.php..ht............................................................................................................................................................ 1 194 270 470 +6195 PF06368 Met_asp_mut_E Methylaspartate mutase E chain (MutE) Moxon SJ anon Pfam-B_14693 (release 9.0) Family This family consists of several methylaspartate mutase E chain proteins (EC:5.4.99.1). Glutamate mutase catalyses the first step in the fermentation of glutamate by Clostridium tetanomorphum. This is an unusual isomerisation in which L-glutamate is converted to threo-beta-methyl L-aspartate [1]. 25.00 25.00 72.00 44.00 21.70 21.00 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.49 0.70 -6.30 5 223 2009-09-11 06:01:28 2003-06-09 16:20:39 6 3 212 6 51 172 5 425.00 53 90.69 CHANGED lPs+KRFu+tLc+Asp-GKlLsQPRAGVuLLDEHIcLLcsLpcEGuADLLPoTIDSYTRpN+YEcAtlGLc+St-uspShLNGFPlVNHGVcuCR+llcuV...-hPlQVRHGTPDARLLAElslAuGFoSaEGGGISYNIPYoKuVsL-col-sWQYCDRLlGhYEEpGl+INREPFGP.LTGTLVPPSlSpAluIlEGLLAsEQGVKsITVGYGQsGNlsQDIAAl+AL+ElupEYLssauasDlsloTVFHQWMGGFPcDEuKAaulIuhuuslAuhSGsTKVIVKoPcEAsGIPTusuNAsGL+sT+phLsMl-cQ+ls.h-sl-pEpulIKcEs+uILcKlFELGcGDlA+GTV+AFEsGVLDIPFuPScsNAGKhhPsRDssGslRlL-hGsVPlsc-lcphH+c+lcERAchEGRElSFQMVlDDIhAVScGRLIGRP .............................lPcpKpFuhtLhcAcpcG+TLsQPRAGVALh-EHIcLLcsLp-E..sDLLPoTIDuYTRlNRY-EAtsGIccSl-uGpShLNGhPlVNHGVsuCR+lsEsl...ptPlQlRHGTPDARLLAEIuhAuGFTSaEGGGISYNIPYuKcVoLE+SIccWQYsDRLhGh.YE.Ep.GlcINREPFGP.LTGTLlPPhISpulAIIEGLLAlEQGVKSITVGYGQsGsLsQDlAAIpuLRELucEYhppaGa...sDhcloTVFHQWMG..GFPcDEuKAFulISaGAulAuhuGATKVIsKoPHEAhGIPTstANhpGL+so+Qh.L.s.Mls-.QchP...s.ssl.-hE.h.ElIKpEsRAlLsKVaELG..sG.DlA+GTVhAFEAGVLDVPFAPuttNA.GKllPsRDNsGAIRlL-sGslPlsc-Ih-hH+chlt.ERA+hEGRpsoFQMVlDDI.AlS+u+LlGRP......................... 0 21 37 46 +6196 PF06369 Anemone_cytotox Sea anemone cytotoxic protein Moxon SJ anon Pfam-B_14701 (release 9.0) Family Sea anemones are a rich source of cytotoxic proteins. Cytolysins comprise a group of more than 30 highly basic proteins with molecular masses of about 20 kDa. Cytolysins isolated from the sea anemone, Heteractis magnifica, include magnificalysin I (HMg I), magnificalysin II (HMg II) and Heteractis magnifica toxin (HMgtxn). These are highly homologous at their N-terminals. HMg I and II have molecular masses of approximately 19 kDa, and pI values of 9.4 and 10.0, respectively. Cytolysins isolated from other sea anemones Actinia tenebrosa (Tenebrosin-C, TN-C), Actinia equina (Equinatoxin, EqT) and Stichodactyla helianthus (ShC) exhibit pore-forming, haemolytic, cytotoxic, and heart stimulatory activities [1]. 25.00 25.00 25.00 25.20 20.90 24.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.21 0.71 -4.86 3 46 2009-01-15 18:05:59 2003-06-09 16:28:07 7 1 24 19 21 55 0 153.10 36 87.33 CHANGED -VAGAVIDGASLoF-ILcKVL-ELGKVcRKIAVGVDNESGtTWTALNTYFRSGTSDVlLPacVPNoKALLYoGRKS+GPVATGAVGVLAYhMSsGNTLAVMFSVPFDYNWYSNWWNVKIYcGKRRADQ+MYEELYYN.NPaRGDNGWapRNLGY.GLKhRGFMTSuG-AKLpIHISK ...............................................................................hpRplslthpN.oshph.hs.t...sYh.SGtsp..ls..l...tpshhhshpKspGs.sATGuVGVlsYt..h......ss......u......p......T......lAlMFSVPaDYNLYSNWasVtl..ap.sp+..p..sDpphYcphY.s.........s..s.....h....cs.c.s.sht..pt..........uh.....ul.......c.......hcuhMss.GpAhlplcl............................................ 0 3 8 17 +6197 PF06370 DUF1069 Protein of unknown function (DUF1069) Moxon SJ anon Pfam-B_14815 (release 9.0) Family This family consists of several Maize streak virus 21.7 kDa proteins. The function of this family is unknown. 25.00 25.00 28.20 204.90 20.50 18.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.24 0.70 -4.96 2 13 2009-12-01 13:51:58 2003-06-09 16:30:04 6 1 13 0 0 7 0 197.60 99 100.00 CHANGED MGSKCKAIAGGNDHVQSSSLDGTNSSVRYILPLRSHSQPTRELVEVDVALARLPTsIRWRNVRTNPPVGLHVEQPTPFHHEAMTQLPAHLPCRRPGFEGVRVGKYSLRGRSLSAGSGVIHQPHHTGSGVGVPAGCRVSNEVVVDGNLVRQSLAGVAVAlVRSSGIGVDEVTYASGGDRYHGGPGMLECLDLEGWPIGLAASPLQPS MGSKCKAIAGGNDHVQSSSLDGTNSSVRYILPLRSHSQPTRELVEVDVALARLPTCIRWRNVRTNPPVGLHVEQPTPFHHEAMTQLPAHLPCRRPGFEGVRVGKYSLRGRSLSAGSGVIHQPHHTGSGVGVPAGCRVSNEVVVDGNLVRQSLAGVAVALVRSSGIGVDEVTYASGGDRYHGGPGMLECLDLEGWPIGLAASPLQPS 0 0 0 0 +6198 PF06371 Drf_GBD Diaphanous GTPase-binding Domain Yeats C anon ADDA_2536 Domain This domain is bound to by GTP-attached Rho proteins, leading to activation of the Drf protein. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.08 0.71 -4.94 20 1367 2012-10-11 20:01:00 2003-06-09 16:33:03 8 48 240 17 827 1229 2 151.50 21 17.18 CHANGED htps.s-cplpphFtcllc-h...sLsccc+...cshhshshpcKWphlhpcppsphpt.............................pppspptSPcaYlcpLpssshsp..............ppLcSLpVuLpopPluWVppFhphpGhssLhslLpphppcpsps............p.shcpcacll+CLKAlMNNpaGhcpsLs..pppslhhlupSLsosp.pTpphsh-lLosLCl .............................................................................................................................................th............sls...p.t......t...h...t.h..s..hK.hphh.p...................................................................................................................................tt.......st.hh.p.h.t............................................phlppL..c...ht......L......p...........s......p....h.........s.........WlppF...........t..........p..Gh....shLhp....hL.tt.......h............t.........................................................................p....h.hlhC.l.+AlM.......N...p.p..................G.h.......p...h..h.ht............p.p.s.......l....ls.u.l.....s.....tp.........t...hhh...s..h..clLshlC........................................... 0 248 348 591 +6199 PF06372 Gemin6 Gemin6 protein Moxon SJ anon Pfam-B_14816 (release 9.0) Family This family consists of several mammalian Gemin6 proteins. The exact function of Gemin6 is unknown but it has been found to form part of the Pfam:PF06003 complex. The SMN complex plays a key role in the biogenesis of spliceosomal small nuclear ribonucleoproteins (snRNPs) and other ribonucleoprotein particles [1]. 25.50 25.50 25.90 25.80 25.10 25.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.24 0.71 -4.93 2 70 2009-01-15 18:05:59 2003-06-09 16:41:22 7 1 63 2 51 78 0 152.90 40 95.14 CHANGED hspWhchS..papsas.K.V+llhs-.+phcGWlhssDPVStsllLsp.hE.GthSsoslhGHulpphEh.pEh-aph...EKL.asatph-spGa...DLEc++oslh+WLEKN+lsVT.p...pchlCVhGVLsI-PPYs.EsCpSSN.IILpRIQcLIQsh.sspp ............................tW..hsPhpapsYlhKpV+Vpss.-tpcacGWlhTsDPVS......usl.VLl.....shhE.c.u.p..h..o.l.pslhGHAVpsVEsls..-.u...-.....p.....ps+....E+L...chFh..st...-.s..p.u..hS......E-...LccRKs.....sL+cWLccN+IPlsE....pucstpoL.sVA.GVLTI-PPYsPEsCsSoNpIILuRlQsLIpsh................ 0 15 18 34 +6200 PF06373 CART Cocaine and amphetamine regulated transcript protein (CART) Moxon SJ anon Pfam-B_15325 (release 9.0) Family This family consists of several cocaine and amphetamine regulated transcript type I protein (CART) sequences. Cocaine and amphetamine regulated transcript (CART) peptide has been shown to be an anorectic peptide that inhibits both normal and starvation-induced feeding and completely blocks the feeding response induced by neuropeptide Y and regulated by leptin in the hypothalamus. The C-terminal part containing the three disulfide bridges is the biologically active part of the molecule affecting food intake. The solution structure of the active part of CART has a fold equivalent to other functionally distinct small proteins. CART consists mainly of turns and loops spanned by a compact framework composed by a few small stretches of antiparallel beta-sheet common to cystine knots [1]. 21.10 21.10 21.60 22.40 19.80 18.20 hmmbuild --amino -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.56 0.72 -4.02 9 78 2009-09-10 15:42:36 2003-06-09 16:46:50 6 2 44 1 33 82 0 70.40 66 65.26 CHANGED sss.spEK-L...ltALpEVLEKLQsKRIs.hEKKhGpVPhCDlGEQCAlRKGuRIGKLCDCPRGosCNaFLLKCL ......................p..p.ppEKpL...l-ALQEVLcK.....LcoKRI..PlaEKKaGQVPhCDsGEQCAVRKGARIGKLCDCPRGosCN.FLLKCL.......... 1 1 7 15 +6201 PF06374 NDUF_C2 NDUFC2; NADH-ubiquinone oxidoreductase subunit b14.5b (NDUFC2) Moxon SJ anon Pfam-B_15334 (release 9.0) Family This family consists of several NADH-ubiquinone oxidoreductase subunit b14.5b proteins (EC:1.6.5.3). 21.70 21.70 21.90 24.10 21.30 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.68 0.71 -4.09 5 104 2009-01-15 18:05:59 2003-06-09 16:51:15 6 2 82 0 56 110 0 105.50 39 95.41 CHANGED uRsss-PLcFLPD....EARSLPPPKLsDPRLlasGlLGYCoGLhDNhlRRRPVhhAGLHRQLLFlTuFVFAGYYalKRcNYhYAVRD+-MFuYIKLHPEDFPEKDKKTYGEVLEsFHPVR ....................................s.........hLss....csp.LPP..Ptlhs.st..lahGh.hGassu..l..lsNhl....p....RRPhh.uGlHRplLasoshhhhGYalsKhpshh....aAt+DpphhpYlcLHPEDFs...c...p.-+..Kpau..-lhE.ahPlR........ 0 14 19 38 +6202 PF06375 BLVR Bovine leukaemia virus receptor (BLVR) Moxon SJ anon Pfam-B_14559 (release 9.0) Family This family consists of several bovine specific leukaemia virus receptors which are thought to function as transmembrane proteins, although their exact function is unknown [1]. 26.40 26.40 26.90 26.90 25.00 25.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.33 0.71 -4.39 16 148 2009-01-15 18:05:59 2003-06-10 10:48:18 6 9 89 2 77 143 0 137.40 48 14.42 CHANGED .opEElc+RREARphEQsNNPaYLKuuspspsutss.........htsh-cIPlspI-Lsl....PLc.....lsG.......hhtSDKYlptppppp..........pcKscK........KcKK++++.....pKcupph....sss---D......tPlchVNpshtE.MPEGAp.SD...pcccstcssDPHRALDIDL- ....................................................h.sEEELtRRREAR+pEQANNPaYlKSSPospKphpss..............sslE+IP..VspIDLoV....PLK..............VPG.......hshSDpYlKh--cR+.........tp.cKcK+..........K+K+pccc.....cKttp+hpts.toES-EDh.....sPsp.Vshss.tE.MPEsAhsSD.....-...-cpDPNDPa+ALDIDLD.................. 0 26 31 58 +6203 PF06376 DUF1070 Protein of unknown function (DUF1070) Moxon SJ anon Pfam-B_14060 (release 9.0) Family This family consists of several short hypothetical plant proteins of unknown function. 25.00 25.00 30.30 30.30 21.20 15.90 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.51 0.72 -4.09 15 109 2009-01-15 18:05:59 2003-06-10 10:50:05 7 2 18 0 64 105 0 34.80 61 50.00 CHANGED Asus...t..s.DGpslDQGIAYlLMhsALslTYLhH ............APAP.....AP.oSDGpuIDQGIAYlLMllALlLTYLlH.. 0 4 35 50 +6204 PF06377 Adipokin_hormo Adipokinetic hormone Moxon SJ anon Pfam-B_14600 (release 9.0) Family This family consists of several insect adipokinetic hormone as well as the related crustacean red pigment concentrating hormone. Flight activity of insects comprises one of the most intense biochemical processes known in nature, and therefore provides an attractive model system to study the hormonal regulation of metabolism during physical exercise. In long-distance flying insects, such as the migratory locust, both carbohydrate and lipid reserves are utilised as fuels for sustained flight activity. The mobilization of these energy stores in Locusta migratoria is mediated by three structurally related adipokinetic hormones (AKHs), which are all capable of stimulating the release of both carbohydrates and lipids from the fat body [1]. 22.90 22.90 22.90 24.20 22.50 22.00 hmmbuild --amino -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.44 0.72 -3.82 15 60 2009-01-15 18:05:59 2003-06-10 11:50:57 6 4 44 0 22 63 0 52.90 42 44.60 CHANGED QLNFoPsW..GKRuususu................................tssC+s.ss-slhhIY+lIQsEAp+llpCpc .QLsFSPsW..GKRSssssu................................tssC+s...ss...-sLhtIY+hlQsEAp+hlpCpp........ 0 8 10 19 +6205 PF06378 DUF1071 Protein of unknown function (DUF1071) Moxon SJ anon Pfam-B_14587 (release 9.0) Family This family consists of several hypothetical bacterial and phage proteins of unknown function. 20.50 20.50 26.80 27.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.05 0.71 -5.10 11 191 2009-01-15 18:05:59 2003-06-10 12:28:58 6 1 183 0 11 110 95 162.20 42 77.30 CHANGED pohFEpLusIcVsc+lEK....KssLoYLSWAaAWsplKKthPsATapl+cFst.th.t.sh.........sahcophGahVpVsVTl....cslocpphLPVhDaRNKsl.................tK..PosFDINpolhRCLVKAlAhaGLGLYIYuGEDLP.........ch.pp.pp.pshppp.tpppsthsp ...................................olFEpLsshsVN-+hEp..............K...s..sLsYLSWuaAapElKKlpPssohcltEas..s.p.s.sY..........ahsstpGa.hVpVuVTV....csh..TcspaLP..VhDaRNK.ul...............................tK..sTsFDINKuhpRChVKA.lAh.H.GLGLYIYsG.EDL.Ppss...p.tclp-c.pph.s.pQphtc.tsh..ppt.......................... 0 3 8 9 +6206 PF06379 RhaT L-rhamnose-proton symport protein (RhaT) Moxon SJ anon Pfam-B_14617 (release 9.0) Family This family consists of several bacterial L-rhamnose-proton symport protein (RhaT) sequences [1,2]. 19.00 19.00 19.70 19.70 18.90 17.70 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.36 0.70 -5.46 2 639 2012-10-02 19:55:49 2003-06-10 12:32:38 7 2 582 0 70 330 44 313.70 70 99.30 CHANGED MsNAIhhGIhWHLlGAASAACFYAPFKQVKpWSWETMWSlGGlVSWLILPWTlShLLLPDFWtYYGpFsluTLLPVFLFGAMWGIGNINYGLTMRYLGMSMGIGIAIGITLIlGTLMTPIlpGpFDVLltT.GGRMTLLGVFVALIGVuIVohAG.LKERtMGIpAEEFNLKKGLlLAVMCGIFSAGMSFAMsAAKPMHEAAuALGlssLYVALPSYVlIMGGGAllNLuaCFIRLAplpNLSlKADFSlA+PLlIoNILhSALuGLMWYLQFFFYAWGHA+IPtQYDYMSWMLHMSFYVLCGGlVGLlLKEWKsus++PVAVLslGClVIIlAANIVGLGMAu .....................................................MspAIhhGIhWHLIGAASAACFYAPFK+VKcWSWETMWSV.GGIVSWlILPWsISALL.LP......s...FWA...YYu.pFsl.S...TLLPV...FLFGAMWGIGNINYGLTMRYLGMSMGIGIAIGITLIVGTLMTPI......I........N.......G........N..F...D.V.L.Is..T.E.G..G..R...M.TLLGVhVALIGVGI.VTRA..GQLK.....E...R.K.M.........G.....I.....K.......A......E......E......F.....NLK......K.....GLlLAVMCGIFSAGMSFAMNAAKP.M....HE.AA.A....A.L.......G.V.........D....P.....LYVALPSYVlIMGGGAllNL...GFC..FIRLAKVK..........sL.....S..l...KADFS.L.....AK....P....LI..IpNlLLSALGGLMWYLQFFFYAWGHARIP..AQ..YDYh..S..WMLHMSFYVLCGGlVGLVLKEWpNAGRRPVsVL..SLGCVV..IIlAANIVGlGMA........................................................................ 0 23 47 59 +6207 PF06380 DUF1072 Protein of unknown function (DUF1072) Moxon SJ anon Pfam-B_14592 (release 9.0) Family This family consists of several Barley yellow dwarf virus proteins of unknown function. 25.00 25.00 35.80 35.60 19.70 19.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.17 0.72 -4.29 8 42 2009-01-15 18:05:59 2003-06-10 12:58:32 6 1 7 0 0 41 0 38.70 54 93.28 CHANGED MDDLHVIAVClLAhTVLoG....lGAVhGCChGChpss.sssps MDDLHVIAVCl..LAhTVLoG....luAVlGCChGChps..su....... 0 0 0 0 +6208 PF06381 DUF1073 Protein of unknown function (DUF1073) Moxon SJ anon Pfam-B_14928 (release 9.0) Family This family consists of several hypothetical bacterial proteins. The function of this family is unknown. 26.40 26.40 26.40 28.10 25.00 24.50 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.80 0.70 -6.00 25 555 2009-01-15 18:05:59 2003-06-10 13:00:25 6 4 357 0 63 498 42 335.50 22 77.28 CHANGED shsphphtuhYpsshls+phl-hsA-DhsRpshplputst....................cphpplcsphcclplppplp-slpasRLaGuuhllhtl..........cspphspPl..s.-plttuuhctlsslcthhlpss...hlspsshussaGcPphapl.s.st..............stcIHpSRllhh..........................ththPh..pht+.s.phaGpSllpp.lh-tlpshDsotsusupL.......lactplssl+ssshpplhsssp...splhcclshhpphcu.pulhllDs.......p-chpphssshu.GLc-llsphhptluusucIPhs+LhGpsPsGhNuoG-uDhcsYYDplpuhQ-pclpsslcpLhchlhhSthu........spslpa..cFsPLhpho-p-+A-ltpppu-uspthlss .................................................................................................................h........thY.p.shhhtphl-..u-Dhh+psh..lputt.....................cphsthp.t.thpcl...p..lpptlhpslh....t...R..haGh.uhlhlhs..........pttshppPl.......p.c..th...hthchl.h.h...ph.h....lsssh..hhp.csh...u........sshs...p.sp.a.l.........................uhcIHcSRllhh..........................th.h.h.....p.s...hh...G.shlpp..lh-tL.pshssshsu.ssph....................lhpttlp..hhpsp........p.....hpphhs..tt..........t...................psltp.pl.phlpphp.sNp.G.hhhhss..........p-ph-shp.h......s.hu.sLc-h.l....s.httpluussphPhs+Laupp.spGhsusGcuDh..pNYYDhlpu....lQE....p.....hhps..sh....p+...lhchht..hs..............th.h..cFssLh.hoppppsphthphsphhpth...h............................................................................... 0 13 33 46 +6209 PF06382 DUF1074 Protein of unknown function (DUF1074) Moxon SJ anon Pfam-B_14962 (release 9.0) Family This family consists of several proteins which appear to be specific to Drosophila melanogaster. The function of this family is unknown. 21.30 21.30 21.40 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.46 0.71 -4.31 3 76 2012-10-02 14:16:02 2003-06-10 13:06:12 6 4 15 0 41 84 4 103.50 23 66.99 CHANGED Mussplptsp.hhst.lSluup...D.sspthsphhsh.IR.Rs.phstshhphtp.++htthGRKRGRKEYCPPIYKRQKVARVTNNGYLNFMTEYKKRFYGLSPQDMVHYAAKQWTQLSMAEKEAFKSKKPSTITLKSPAQYVACEMKSDVAGGQQSSCQRQSPSARLRESERRSSRSKTLCRSA ................................................t................................................................................................................s.................+p..p..p...u.....l..o.s....s.uYlNFlRpa....++....+....a..s....s..Lps.p-llppAA+tWspLoptcKpta...cp.................................................................hh...t............................................ 0 7 7 30 +6211 PF06384 ICAT Beta-catenin-interacting protein ICAT Moxon SJ anon Pfam-B_15027 (release 9.0) Family This family consists of several eukaryotic beta-catenin-interacting (ICAT) proteins. Beta-catenin is a multifunctional protein involved in both cell adhesion and transcriptional activation. Transcription mediated by the beta-catenin/Tcf complex is involved in embryological development and is upregulated in various cancers. ICAT selectively inhibits beta-catenin/Tcf binding in vivo, without disrupting beta-catenin/cadherin interactions [1]. 25.00 25.00 28.30 26.60 24.50 20.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.51 0.72 -4.20 8 132 2009-01-15 18:05:59 2003-06-10 13:28:22 6 4 77 3 83 108 0 76.50 47 44.90 CHANGED MsRDltlGKlucDlYsQQKVEILhAL+KLGEpLoP-EEAFLsspAuAshSQFEKVop.slGuGcK..lhAhAuSplEsspK .............MsR-hhsGKhsc-hYhQQKVElLhALRKLGppLTssEEtFLps...A..Gssh..SQ.hpplsp..s...lspGsc..lhAhup.pscctp.................... 0 22 28 52 +6212 PF06385 Baculo_LEF-11 Baculovirus LEF-11 protein Moxon SJ anon Pfam-B_15073 (release 9.0) Family This family consists of several Baculovirus LEF-11 proteins. The exact function of this family is unknown although it has been shown that LEF-11 is required for viral DNA replication during the infection cycle [1]. 21.20 21.20 27.30 27.00 21.10 18.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.05 0.72 -3.89 24 60 2009-01-15 18:05:59 2003-06-10 13:31:41 7 1 58 0 0 54 1 94.50 40 79.17 CHANGED CLTRS-VYAllREsINp+K+sh-scNVsAHl.-.ssFss.ppYIRssls+hhIhpucppp...+plshHtpRlpplFsLp.poLcpEYpts.hs+h.tspp CLTRSEVYALlREsINp+K+shpscN.VsAHl.-..ssFps.ppYIRtNls+.hhllsucppp...+plshHtcRlsplFsLp..p...oLcpEYpps.ls+httt.p...... 0 0 0 0 +6213 PF06386 GvpL_GvpF Gas vesicle synthesis protein GvpL/GvpF Moxon SJ anon Pfam-B_15376 (release 9.0) Family This family consists of several bacterial and archaeal gas vesicle synthesis protein (GvpL/GvpF) sequences. The exact function of this family is unknown. 23.60 23.60 24.20 24.20 22.70 22.70 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.62 0.70 -4.79 68 377 2009-01-15 18:05:59 2003-06-10 13:37:45 6 6 145 0 171 417 19 222.00 22 91.36 CHANGED h.sp.ulYlYuIl...sssssh............t...............hsh.Glssss..Vhs....l..phssluAlVSssstsp...pstccsl...........hsHppVlcplhcp.s..slLPh+FGslh.ps.......tcs..lpphL.psttcphpptLsclpGphEhulKlhhsp.t..hhpplhtps.plpthptphttt..........s..stsh..pph...phsphlpptlppcppphspplhctLpslutpsp.pcs....................................hs-chllNsAaLVspsctspFsptlcplspphss...lslchoGPhPPYsFs..shph ......................................hYlYulh.........tt...................................th..Glssts...lhh...........l.......thssluAlVu.p.ss.tt....pstccpl................hsHppVlptlhtt..s...slLPh+FGslh..ts........pcs.lpphL.ptptpphtptLpplpG+sEhslKshhsp......tth..tts.th....tt..ttth.ttt..................s...sts.a.tph....phtphhtpthttptpphspplhptLpshu.psthpts.............................................spp.lhshuaLlsps......pt......s.t.Ftptlppltpphs...t......hplchsGPhsPYsFst............................. 0 67 130 164 +6214 PF06387 Calcyon D1 dopamine receptor-interacting protein (calcyon) Moxon SJ anon Pfam-B_15400 (release 9.0) Family This family consists of several D1 dopamine receptor-interacting (calcyon) proteins. D1/D5 dopamine receptors in the basal ganglia, hippocampus, and cerebral cortex modulate motor, reward, and cognitive behaviour. D1-like dopamine receptors likely modulate neocortical and hippocampal neuronal excitability and synaptic function via Ca(2+) as well as cAMP-dependent signaling [1]. Defective calcyon proteins have been implicated in both attention-deficit/hyperactivity disorder (ADHD) [2] and schizophrenia [3]. 25.00 25.00 38.30 38.20 22.10 21.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.03 0.71 -4.92 3 132 2009-01-15 18:05:59 2003-06-10 13:45:12 6 2 41 0 72 117 0 169.40 55 98.17 CHANGED MVKLGsNFuEKGuKpPulEDG..FDTVPLITPLDVNQLQhusPDKVVVKT+TEYQP-p+sKGKtRsPpIAEFTVs.p.t.scRh.solLVuFALAFLuCVVFLVVYKAapYD+.oCPDGFVLKHKpCIPtoLEuYYoEQDPSuRc+FYTVIuHYsLAKQSsTRuluPWhSuhuttK................................sh+EsEsPcKuu ............MVKLGsNhu-KssKtPss.....EDG..FpTlPLITPL-VspLQhPsP-..KVl.V.K.T+TEYps-pK.KGKhRsPKIAEFTls...t.sc.+h.solLlhhALAFLsClVFLVVYKsapYD+.uCP-GFVhK......pppCIPtuL-uYYopQ....D.ssuR....p+....FYTVIs..HYslAKQohoRuluPWhos.hut.p................................s.pEscsspp..u................................... 0 6 9 25 +6215 PF06388 DUF1075 Protein of unknown function (DUF1075) Moxon SJ anon Pfam-B_14186 (release 9.0) Family This family consists of several eukaryotic proteins of unknown function. 25.00 25.00 25.20 27.90 20.40 24.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.86 0.71 -4.54 12 137 2009-01-15 18:05:59 2003-06-11 11:03:00 6 1 80 0 83 122 0 130.30 34 90.73 CHANGED thGphhR.h......Ruhuos.phpc..............tt.stpsPtus...pph.ppsst.++Psph-K+hLlWoGRaKo.-EIPphVS.EMl-suRNKhRVKlsYlMIuLTlluChh.MllSGK+AscR+ESlsphNL-h+s+apEt........uhpupuc ...................................................................t..................................................tp......tsts....spt.h.......pclsh...++Posa-KKlLlWoGRFKpt--IPph..lS...EMl-sA+NKhR...VKlsYlMIuLTllGChh.hlhpGK+AspRp..E...olsphNlc++tch+..Ect..............t....................................... 0 17 23 49 +6216 PF06389 Filo_VP24 Filovirus_VP24; Filovirus membrane-associated protein VP24 Moxon SJ anon Pfam-B_15734 (release 9.0) Family This family consists of several membrane-associated protein VP24 sequences from a variety of Ebola and Marburg viruses. The VP24 protein of Ebola virus is believed to be a secondary matrix protein and minor component of virions. VP24 possesses structural features commonly associated with viral matrix proteins and that VP24 may have a role in virus assembly and budding [1]. 25.00 25.00 333.70 333.60 20.20 19.30 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.75 0.70 -5.16 3 32 2009-01-15 18:05:59 2003-06-11 11:17:39 6 1 20 4 0 28 0 252.00 62 100.00 CHANGED MAKATGRYNLVsPK+-hEKGVl..FSDLCNFLlTPTVQGWKVYWAGlEFDVsQKGMTLLsRLKoNDFAPAWAMTRNLFPHLFKNPpSsIQoPIWALRVILAAGLpDQLLDHSLIEPLoGALsLISDWLLTToToHFNlRTppVKDQLShRMLSLIRSNIlNFINKL-TLHVVNY+GLLSSIEIGTooasIIITRTNMGFLVEVQEPDKSAM..-o++PGPVKFSLLHESsLKPaopscpSuhpSLIMEFNSpLAI MAchosRYNL..PppshEKuls..Lsshspalhp.oltGWpVhWush.FclsppGMsLLH+LKoN.hsPtWp.TRNLFsHLFpNPpSTI.pPhhALRllLusuLpDQ.LpQSLI.shpuhlphlS-WLLhpsTothplpsphlt..Lo.cMhpllhuslhpFhNKL.sLHVVN.pGh.SSIEIthosppIIITRsNMGFLVEVpc.Dhpsh..pohhstsVhFuLltEusL+taoQhppup..sLh..hNSplAI 1 0 0 0 +6217 PF06390 NESP55 Neuroendocrine-specific golgi protein P55 (NESP55) Moxon SJ anon Pfam-B_16185 (release 9.0) Family This family consists of several mammalian neuroendocrine-specific golgi protein P55 (NESP55) sequences. NESP55 is a novel member of the chromogranin family and is a soluble, acidic, heat-stable secretory protein that is expressed exclusively in endocrine and nervous tissues, although less widely than chromogranins [1]. 22.60 22.60 22.80 22.70 22.30 22.50 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.00 2 34 2009-01-15 18:05:59 2003-06-11 11:26:26 7 6 21 0 16 42 3 183.70 46 67.10 CHANGED MDRRSRsQ.hRRARHNYNDLCPPIGRRAATALLWLSCSIALLRALAoSssRAQQR.AAQRRoFLNAHHRS.....AAQVhPEs..sESDHEcp-hEPpL..PEC.EYpp--a-hE..SETEsES-IESETEh....ETEs-TAPTTEPETEPEDE.G...P+tsTFpQSLTpRLpAL+LpSsDASPpRA.PoTQEsESsppGEEPp....DpDPRDPEEp.E.+cEEppQ.+RCKs++Ps.RRD.SPESPs++GsIPIRRH ....................................................................................................................................................s.sp...spSDp-pp..phc.pL..sEs..-ap.p-h-hE..oETEsES-lEoET-..h....ETEs-Tt.PtTEPETE..PED-pG....P...+.tsTF......sQ..SLTpRLpAL+lp.SsDsp.pps.PosQpspssppGEE.p..P....pD.+sPEEp..-.+.p.....pQ.+RCKs+.+.s..RRD.SPESPs++GPIPIRRH........................................................................... 1 1 4 7 +6219 PF06391 MAT1 CDK-activating kinase assembly factor MAT1 Finn RD anon Pfam-B_16773 (release 9.0) Family MAT1 is an assembly/targeting factor for cyclin-dependent kinase-activating kinase (CAK), which interacts with the transcription factor TFIIH [1]. The domain found to the N-terminal side of this domain is a C3HC4 RING finger [1]. 25.00 25.00 27.00 27.00 19.90 19.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.40 0.71 -4.73 6 318 2009-01-15 18:05:59 2003-07-03 16:31:22 8 7 278 1 239 314 1 168.20 31 55.13 CHANGED LRKspFRsQlFEDspl-KEV-IRK+lhKIaNKpp-DFs.uL+EYNDYLEElEpIlaNLspslDl-pTccKlctYcKpNK-sIt+N+t+hssEQc.LEptLchE+cpcpc+R.thpp-Epppcht.KcpsKppllD-LpoSshssshlls.p+Ksps.hphEtthcK.cph+psshssuh+pu...phshsslp+lcEthasapPlp ...............LR+spF+hQhF-D.tV-KEV-IR+RVhpl.aNKp--DF....s.oLc.-.YNDYLE..c.......l..E-I..laN.Lss...s..l..D...l..t...p..T...ctclp..pYpp-Np.p.I.t.pNp.t.+.h..........pp.-ppt........hp.ph.ph.Epp.t...p..p+..+...tp...tptpph..t.tt+pthlspLt..........t.u.....p..........s...t.hlt...t.ttt.....t.p.................................................................................h............................................................ 0 76 130 197 +6220 PF06392 Asr Acid shock protein repeat Finn RD anon Pfam-B_20230 (release 9.0) Repeat The Asr protein is synthesised as a precursor and the cleavage is essential for moderate to high acid tolerance [1]. 21.00 21.00 21.10 21.00 20.10 20.90 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.33 0.72 -6.55 0.72 -3.57 8 470 2009-01-15 18:05:59 2003-07-03 16:34:35 6 3 450 0 18 119 1 19.30 85 18.79 CHANGED HHKKQH....KsAsEQKAQAAKK ...HH.KKQH......KAAPAQKAQAAKK. 0 1 3 8 +6221 PF06393 BID BH3 interacting domain (BID) Finn RD anon Pfam-B_16321 (release 9.0) Domain BID is a member of the BCL-2 superfamily of proteins are key regulators of programmed cell death, hence this family is related to Pfam:PF00452 . BID is a pro-apoptotic member of the Bcl-2 superfamily and as such posses the ability to target intracellular membranes and contains the BH3 death domain. The activity of BID is regulated by a Caspase 8-mediated cleavage event, exposing the BH3 domain and significantly changing the surface charge and hydrophobicity, which causes a change of cellular localisation [1]. 25.00 25.00 27.20 48.50 24.30 19.80 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.28 0.71 -4.95 7 65 2009-01-15 18:05:59 2003-07-03 16:42:05 6 2 36 5 27 64 0 163.70 51 94.68 CHANGED M-.cV.sNGs....-phpphLlhuFLp...psscspFpcELpsLupE..LPs...th.tp..sELQTDGNRsu+..hpsthEs.DuEspE-llRpIAtpLAphGDcL-+p..I+PtlVsuLssphhNpoLsEE-hppsLAsslppLhpohPsDhEpEKshLllsMLLsKKlAspsPSLL+cVF+TTVsFIsQNhhsYlcpLsRp..- ......................ts-phTsLLlauFLp...ssssspFpcELcsLGpE.......Lss......hpt......-ELQTDGNRsS+.....hhtchEs.DSEsQ.......E-..llppIAtpLAplGDph-+u..I.PtLVssLAhQhhNsshSEEDRpppLAsAlEplh..Qsh..Pt..DhEpEKshLlLuhLLAKKVAsHoPSLLRcVF+TTVNFINQNLhsYlRsLsRN................... 0 1 2 7 +6222 PF06394 Pepsin-I3 Pepsin inhibitor-3-like repeated domain Finn RD anon Pfam-B_13438 (release 9.0) Domain Pepsin inhibitor-3 consisting of two domains, each comprising an antiparallel beta-sheet flanked by an alpha-helix. In the enzyme-inhibitor complex, the N-terminal beta-strand of PI-3 pairs with one strand of the active site flap region of pepsin [1]. The two domains are tandem repeats of sequence, and has therefore been termed repeated domain. 20.60 20.60 21.00 21.20 20.40 20.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.49 0.72 -4.32 13 63 2009-09-11 00:18:44 2003-07-03 16:51:00 8 2 18 2 35 64 0 70.30 29 53.54 CHANGED P+ss+p.oFsThssT.sthhhsGChVpsN+lYlsstalRDLTssEtpELpsFcpchssYpstlppplppphpsLhus ....................p..sFss.hsso.s.hhhsGChVpsN+lYssuhhlR-LTspEhpELppappchstYpp.lppthppphpsh............................ 0 14 18 35 +6223 PF06395 CDC24 CDC24 Calponin Wood V, Studholme D anon Pfam-B_32837 (release 8.0) Domain Is a calponin homology domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.67 0.72 -3.92 17 130 2012-10-03 10:10:54 2003-07-03 16:58:00 6 10 124 0 99 201 1 89.40 42 9.83 CHANGED DPVTpLWphFppGsPLChlFNslpP..cp.LsV.......ssss-h+hCKtulYcFlhusKpcLsass--hFsISDlaussTsshlKVlpVlsplL .........DPVspLWphhRpGhPLhhlaNulp.P.....p.p...L.s.l........................ssss-t.+..tsKtu.lacFl.AChp....c...L.s....a.ssp......-hFhIoDL.a...u.s..s.ToGFVKVlpsVsplL................. 0 26 55 87 +6224 PF06396 AGTRAP Angiotensin II, type I receptor-associated protein (AGTRAP) Moxon SJ anon Pfam-B_15509 (release 9.0) Family This family consists of several angiotensin II, type I receptor-associated protein (AGTRAP) sequences. AGTRAP is known to interact specifically with the carboxyl-terminal cytoplasmic region of the angiotensin II type 1 (AT(1)) receptor to regulate different aspects of AT(1) receptor physiology [1,2]. The function of this family is unclear. 25.00 25.00 26.50 26.00 22.90 22.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.10 0.71 -4.55 6 84 2009-01-15 18:05:59 2003-07-03 17:24:07 6 4 70 0 53 93 0 144.00 36 83.59 CHANGED MELPAVNLKlIlhVHWLLToauCls..hsGuYuasNFsILAhGVWAlAQRDSIDAIpMFLsGLluTIFhDllhIuIaaspss.......ltDhhRFSuGMAIlsLlLKPlSChhlYHMaRERGGph.htpGFlGsSp-R.SSYQsIDu.-sPADPhss.ps.u.ss.RGY .......................t.PhlplKsIhhlHalLsoWuhl...s....hsuuYtasNFslLshulWAltp+..DS.l-Alph.hLs.hhshoIhhDIlplulaa.sp.hs............h..tsh.+FusuhuIlsLlL+.PlSshhlY+hapcR....GGph.......h....s.h..t...ssppp.puYpsIDp........P.tt................................................................... 0 17 20 34 +6225 PF06397 Desulfoferrod_N Desulfoferrodoxin, N-terminal domain Finn RD anon Pfam-B_11142 (release 9.0) Domain Most members of this family are small (approximately 36 amino acids) proteins that from homodimeric complexes. Each subunit contains a high-spin iron atom tetrahedrally bound to four cysteinyl sulphur atoms This family has a similar fold to the rubredoxin metal binding domain [1]. It is also found as the N-terminal domain of desulfoferrodoxin, see (Pfam:PF01880). 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.95 0.72 -4.56 8 428 2012-10-03 19:45:42 2003-07-04 09:46:30 7 6 397 31 132 306 13 35.50 48 24.00 CHANGED scphpVYKCplCGNIVEVLcsGGGpLVCCGcPMcLh .......h...hchYKCppCGNl.V.E.VhpsG.GG.p..LsCCGc.Mch..... 0 65 116 129 +6226 PF06398 Pex24p Integral peroxisomal membrane peroxin Wood V, Studholme DJ anon Pfam-B_56111 (release 8.0) Family Peroxisomes play diverse roles in the cell, compartmentalising many activities related to lipid metabolism and functioning in the decomposition of toxic hydrogen peroxide. Sequence similarity was identified between two hypothetical proteins and the peroxin integral membrane protein Pex24p [1]. 24.50 24.50 24.60 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.55 0.70 -5.66 54 690 2012-10-01 21:10:52 2003-07-04 09:52:51 6 64 241 0 464 811 3 261.70 22 35.45 CHANGED s.hosslLsssssplsppLuhhasal...scllpllTWossshhhohLllhsahslh.Yh..............thl.h.hshllhslhh.uhlaha....ts.h..............................................................................tp.pppPoLc......................................................-llhsLp...............slps+sshllpshstls.hh.........hpspsloshlFhhlhloslhlhls........hhllsh..+hlhlhsG.shlloaHsphpcshtphh...............hlsuhchp.............................................................hp....phh.shhpp..chh......h.ps.ps.p......................hph.laE.QR+h.luh...uWos.hhos-chshs.......sc...thsp.Ps..........................h-chp.Pp.....................sWcWs-..........pcWclDhsspthlp............sppc................GWlY..........................................ss...appsph.pDuhscasRRRRWhR ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hph.tlaENQR....h.....h...s...........sW.s..s........t....hh...s...-.ch.....sao..............st...ttpp.................................................................hcsh.p..h.Pp......................tWcW.s..............................cWplDh...s..t.hh..............................................................................ptpt......................................GWhYs...............................................................sh..........t..p.p....t...tcph.p.....ph.sRRR+WhR.................................................................................................... 0 129 242 373 +6227 PF06399 GFRP GTP cyclohydrolase I feedback regulatory protein (GFRP) Finn RD anon Pfam-B_63435 (release 9.0) Domain Tetrahydrobiopterin, the cofactor required for hydroxylation of aromatic amino acids regulates its own synthesis in via feedback inhibition of GTP cyclohydrolase I. This mechanism is mediated by the regulatory subunit called GTP cyclohydrolase I feedback regulatory protein (GFRP) [1]. 25.00 25.00 28.20 32.70 23.90 23.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.80 0.72 -4.39 4 69 2009-01-15 18:05:59 2003-07-04 10:35:58 8 1 53 35 39 61 0 80.00 67 93.65 CHANGED PYlLISTQIRhEsGPThVGDEaSDPpLMshLsAcKhpsLGNNFpEYaVc-PPRlVLsKL-+lGYRVlSMTGVGQTLVWCLHKE ......PYlLISTQIRhEsGPTMVGDEpSDPcLMpaLGApKpssLGNNF.EYaVsDPPRlVLDKLE+pGFRVlSMTGVGQ..TLVWCLHKE......... 0 10 13 22 +6228 PF06400 Alpha-2-MRAP_N Alpha-2-macroglobulin RAP, N-terminal domain Finn RD anon Pfam-B_44514 (release 9.0) Domain The alpha-2-macroglobulin receptor-associated protein (RAP) is a intracellular glycoprotein that binds to the 2-macroglobulin receptor and other members of the low density lipoprotein receptor family. The protein inhibits binding of all currently known ligands of these receptors [1]. The N-terminal domain is predominately alpha helical [1]. Two different studies have provided conflicted domain boundaries [2,3]. 27.30 27.30 27.40 44.40 26.40 27.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.51 0.71 -4.44 10 108 2009-01-15 18:05:59 2003-07-04 12:32:49 6 3 73 7 51 104 0 113.90 50 33.65 CHANGED lsLl.hlllhhsshssuspuuKYSREsN.........EshssscRcsscEFRMsKLNQlWEKAQRL+LoslKLs-LHoDLKIQEKDELsWKK.LKs-GhDcDGEKEA+LRRsLslIho+YGL....DG+KDscplc .....................h....h.hhhh..sstttusKYS+EtN..........p...s.ssh+.p.....tp.FRMtKLNQlWEKApR.h....+..L.osV+Lu.-LHuDLKIQE+DElsWKK.LKs-GLDc....DGEKEA+LpRsLs...VILu+YGL....DG++Dsp.l.p.............. 0 12 15 32 +6229 PF06401 Alpha-2-MRAP_C Alpha-2-macroglobulin RAP, C-terminal domain Finn RD anon Pfam-B_44514 (release 9.0) Domain The alpha-2-macroglobulin receptor-associated protein (RAP) is a intracellular glycoprotein that binds to the 2-macroglobulin receptor and other members of the low density lipoprotein receptor family. The protein inhibits binding of all currently known ligands of these receptors [1]. Two different studies have provided conflicted domain boundaries [2,3]. 21.80 21.80 22.00 21.90 21.20 21.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.78 0.70 -4.48 11 128 2009-09-10 22:25:31 2003-07-04 12:54:44 6 3 87 4 66 123 1 200.30 44 62.32 CHANGED -hhcDP+L-KLWpKA+TSGKFScEEL-sLhREFpHHK-KIcEYpsLL-olu+sE..................................EhacNlIsPp-.sshKppsLps+Hs-LK-+hRsIspGaDRLR+loHpGasotpEFpEPRV.-LW-hApsuNFTccEL-Sh+EEL+HFEsKlEKHpHYQcQLElSHpKLKHhtthGDp-Hls+spE+ashLE-+hKEhGYKVKKHhQDLouRIS..th+HNEL ................................hhcD.+LpKLW.pKAcsSGKFSsEELcpLhcEF.HHc-KlcEYpsLL-slu+s-...............................-hpc.NsIs.sp........-....sthKt.pslps+...cs..-L.K-+hRsIppGhDRL.+.+.lopp.Ga.s.stpEFpEPRVh-LWclA.p.su.N.FTpcEL-.Sh.+.....EEL+HFEuKl-KHpHapcpLclu+pKh+....tthGDtE+lscppE+ashLE-+hK..cl.uhKV+KhhpDLps+I...hh+HsEL.................................. 0 16 21 44 +6231 PF06403 Lamprin Lamprin Moxon SJ anon Pfam-B_15493 (release 9.0) Family This family consists of several lamprin proteins from the Sea lamprey Petromyzon marinus. Lamprin, an insoluble non-collagen, non-elastin protein, is the major connective tissue component of the fibrillar extracellular matrix of lamprey annular cartilage. Although not generally homologous to any other protein, soluble lamprins contain a tandemly repeated peptide sequence (GGLGY) which is present in both silkmoth chorion proteins and spider dragline silk. Strong homologies to this repeat sequence are also present in several mammalian and avian elastins. It is thought that these proteins share a structural motif which promotes self-aggregation and fibril formation in proteins through interdigitation of hydrophobic side chains in beta-sheet/beta-turn structures, a motif that has been preserved in recognisable form over several hundred million years of evolution [1]. 25.00 25.00 27.60 27.40 23.40 22.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.23 0.71 -4.43 2 8 2009-01-15 18:05:59 2003-07-07 09:25:55 6 1 2 0 1 7 0 122.90 78 98.60 CHANGED MAAshQALLVlALLHLATATPVlsKppVSThSTGaLGHPVGGLGYGGLGYGGLGhuGLGVAGLGYGGLGYPGAALGGsYTHHAA...................LGGLGYPLGIGAGVVAPHVVpuKlAAPLAPVVAAI ...........................MAAshQALLVlALLHLATATPVlsKppVSThSTGaLGHPV.GGLGYGGLGYG..GLGhuGLGVAGL.GYGGLGYPGAALGGsYTHHAA...................LGGLGYPLGIGAGVVAPHVVpuKlAAPLAPVVAAI......... 0 1 1 1 +6232 PF06404 PSK Phytosulfokine precursor protein (PSK) Moxon SJ anon Pfam-B_16071 (release 9.0) Family This family consists of several plant specific phytosulfokine precursor proteins. Phytosulfokines, are active as either a pentapeptide or a C-terminally truncated tetrapeptide. These compounds were first isolated because of their ability to stimulate cell division in somatic embryo cultures of Asparagus officinalis [1]. 25.00 25.00 38.10 34.70 21.10 19.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.46 0.72 -3.21 39 143 2009-01-15 18:05:59 2003-07-07 09:27:33 7 2 32 0 69 132 0 80.10 35 86.39 CHANGED lLLL.h.s.sh...stAARs....tPs.tspp.tt..........stsppscttt...................................sCc...t.ts-EE.C.LhRRsLs.AHhDYIYTQc+ps ...........................................h.LlL.h...ph....stAAR........Ps.tttp..t....................ttttsptt...................................thh..ttE.............................sC-...t.ts-.EE...C.L.hRRsLs.AHlDYIYTQc+p.... 0 8 37 52 +6233 PF06405 RCC_reductase Red chlorophyll catabolite reductase (RCC reductase) Moxon SJ anon Pfam-B_15577 (release 9.0) Family This family consists of several red chlorophyll catabolite reductase (RCC reductase) proteins. Red chlorophyll catabolite (RCC) reductase (RCCR) and pheophorbide (Pheide) a oxygenase (PaO) catalyse the key reaction of chlorophyll catabolism, porphyrin macrocycle cleavage of Pheide a to a primary fluorescent catabolite (pFCC) [1]. 24.60 24.60 25.30 25.30 24.10 24.50 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.55 0.70 -5.44 8 86 2012-09-25 10:41:40 2003-07-07 09:28:14 6 2 40 10 34 91 13 202.40 42 84.35 CHANGED AHRElMlsLsuplEsRLGup..LLPsslPsDVp.FcN..tuGsApGSLclRSGspuSs.....................................IDFhLtSWlHCclP..tGGAlNITSLslaLNuSTDAPHFlhEhIQuSPTSLlllLDLlPRKDLsLHPDYLccYYEsTtLD.+pRppltcL.PcspPYhSsSLalRulhSPTAlhsoI-ssp..utttplEEIlpsclussAK-VLplWL-pCs..ssscE...ls-sERpthtKRDpllRpKoIElDLsuslP.RhFGt-VAsRVltsIRcAF .................................t.h..h.t...tpht....hLPsslPsDVp.aps...s..Gs...AtuSLclRsGttuS......................................IDFhltSWlHsclP..tuuulsITol.saLNuSTcAPpFlhEhIQuossSLlllLDL.sRKDLsLpP-YLccYYpsTtLD...ppRppl.cl..PpspPYhSsSLalRSshSPTAlhhplts.tt.......tth-pllp..ltshu.thh.thWlpths..tttt......h.t.tpt..h.tRD..hppt.hEhs.s.phs.phFs.thsspllt.h.t......................................... 0 4 18 25 +6234 PF06406 StbA StbA protein Moxon SJ anon Pfam-B_12747 (release 9.0) Family This family consists of several bacterial StbA plasmid stability proteins [1]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.76 0.70 -5.49 7 732 2012-10-02 23:34:14 2003-07-07 09:29:06 6 2 457 34 70 604 104 269.10 33 89.60 CHANGED M+lhlDDGSTNIKLAWh.E.cGcl+shlSsNSFK.-Wsssh...sst...pshNY.lDG.c+YoaD.lSssulhTTcspYQYSDVNllAlHHALhpSGLtPQsV-lsVTLPloEaaDpssQsNhtNIpRKKsNlhR.lp..lps.u-sFsI+pVsVhPESlPAuFpsLtt..lsphES.LIlDLGGTTLDluplhGphsGISclasssplGVSllTcuVhpsLsh.ssocsSpahADclIppRpDpsaLpphIpstschstlhpslpct.cpLtp+VhpslupFsshs+.VhlVGGGApLltsAlKpthth.st+hhhsssPQFsLV.uhhth .......................................................................................................................................l.hDDGopshKl.h..p..ttt.hh...lo.N....S.Fp.t.at......h....t.....tshNY...l.s..s.....pasa..c.h...o.....p.......l.Ts..ph..t..a...QYsp..hNhl..AlpHALh.p..o.Gl.....t.s.p....Vs..lsVT.LPlo...-a.hs...p.ps.Q.s.tp.I..pR...KptNlh...+..lp.........hpt.....up.....s.F...s...I...c...sV...pVh..PE...Sl.PA..s..ap.h.Ltp...........hs.....t....h....-....p.....h...L.....I.......lDlGGTTLD..l..u..h..l....t..u..p..hs..s.....l.o...p..s...h..s..ss...p...lG........V.S.hhsculhps.h........tt....s....s.............h...c...s..S.....p...hh...A.spl.............Ip..p.R......p-....t...........s...a..l..p...p.h.....Ip...s...t.......s.p...hs...........tlh.s.s...I....p....pt.....cpLtp...c.Vhp....s....l......s.....p....F.....p...s.......h.s+....V.......hllGGGA..tl..l.....ts.ul.+pthth....ttp.hhh.tssQhsLs.uhh............................................................................................................................................................ 3 21 40 57 +6235 PF06407 BDV_P40 Borna disease virus P40 protein Moxon SJ anon Pfam-B_15995 (release 9.0) Family This family consists of several Borna disease virus P40 proteins. Borna disease (BD) is a persistent viral infection of the central nervous system caused by the single-negative-strand, nonsegmented RNA Borna disease virus (BDV). P40 is known to be a nucleoprotein [1]. 25.00 25.00 53.20 53.20 18.70 17.70 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.29 0.70 -5.60 3 171 2009-09-10 21:38:57 2003-07-07 09:32:44 6 1 17 2 2 111 0 184.30 67 98.87 CHANGED MPPKRRLVDDADAMEDQDLYEPPASLPKLPGKFLQYTVGGSDPHPGIGHEKDIRQNAVALLDQSRRDMFHTVTPSLVFLCLLIPGLHAAFVHGGVPRESYLSTPVTRGEQTVVKTAKFYGEKTTQRDLTELEISSIFSHCCSLLIGVVIGSSSKIKAGAEQIKKRFKTMMAALNRPSHGETATLLQMFNPHEAIDWINGQPWVGSFVLSLLTTDFESPGKEFMDQIKLVASYAQMTTYTTIKEYLAECMDATLTIPVVAYEIRDFLEVSAKLKEEHADLFPFLGAIRHPDAIKLAPRSFPNLASAAFYWSKKENPTMAGYRASTIQPGASVKETQLARYRRREISRGEDGAELSGEISAIMKMIGVTGLN ..........................................................................................................................................................................................................................PHEAIDWINuQPWVGSFVLuLLTTDFESPGKEFMDQIKLVAuaAQMTTYTTIKEYLsECMDATLTIPsVAhEI+-FL-sosKLKtEHuDhF.aLGAIRHsDAIKLAPRsFPNLASAAFYWSKK............................................................................................. 0 2 2 2 +6237 PF06409 NPIP Nuclear pore complex interacting protein (NPIP) Moxon SJ anon Pfam-B_16418 (release 9.0) Family This family consists of a series of primate specific nuclear pore complex interacting protein (NPIP) sequences. The function of this family is unknown but is well conserved from African apes to humans [1]. 27.00 27.00 27.30 27.40 26.20 26.20 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.83 0.70 -5.65 5 342 2009-09-13 05:25:39 2003-07-07 09:37:51 6 5 26 0 83 271 0 107.00 34 36.57 CHANGED VINTLuDHcHsGscFpGs......PWlhIIIsFLRpYKhsIhLCTohLsVSFLKTIFhScNGHDGSTDVQQRAWRSNRpRQcG.................NKIGLKDVITLWRHVETKVRAKI+KhKVTTKIN+HDKINGKRKTAKcH..LRKLSMKECEHAEKERQVSEAEENGKLDMKEIHTYKKMFQRAQELRRRAEDYHKCKIPPSARKPLCNWVRMAAA..EHRHSSGLPYWPYLTAETLKNRMGHQPPPPTQQHSIsDNSLSLKTPP..ECLLpPL....PPSsDDNLKssP .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 69 69 71 +6239 PF06411 HdeA HdeA/HdeB family Finn RD, Bateman A anon Pfam-B_63431 (release 9.0) Domain HdeA (hns-dependent expression protein A) is a single domain alpha-helical protein localised in the periplasmic space. HdeA is involved in acid resistance essential for infectivity of enteric bacterial pathogens. Functional studies demonstrate that HdeA is activated by a dimer-to-monomer transition at acidic pH, leading to suppression of aggregation by acid-denatured proteins. The gene encoding HdeA was initially identified as part of an operon regulated by the nucleoid protein H-NS [1,2]. This family also contains HdeB [3]. 21.90 21.90 21.90 22.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.26 0.72 -4.02 33 942 2009-09-11 09:58:26 2003-07-07 12:09:41 6 2 576 13 43 189 4 94.30 39 85.95 CHANGED htlshhhhshsssshs.........ssu.ss.spss..........ssppMTCcEFlsLsspthssVshWhhshsp.phKssDhV..Dhppl-sl.sPpllchCKcsPppplh-h ...............................................hht...uhIhhGhlhhssl...........sNAtsA.tAs-..........sspshTCp-Fl....s....LssphhssVshWshs.ss.phKstD.sV..Dlpth-ss.sPtll-hC+psPQsslhc................... 0 8 19 28 +6240 PF06412 TraD Conjugal transfer protein TraD Moxon SJ anon Pfam-B_11863 (release 9.0) Family This family contains bacterial TraD conjugal transfer proteins [1]. Mutations in the TraD gene result in loss of transfer [2]. 21.90 21.90 22.30 22.00 21.60 21.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.93 0.72 -4.38 53 460 2009-01-15 18:05:59 2003-07-07 13:19:58 6 4 282 0 117 388 14 64.10 29 63.06 CHANGED hpsttR+ccTRchIpLGGLVlKAG.......Lst.-+uhlhGALl.hucphcsstt......pphapttGpphFptc .................p..pR+tcTRchItLGuLVsKAs.......lcsh-+phlhGhLlshuchhptspt.......ttapttGpphhpt................ 0 18 57 85 +6241 PF06413 Neugrin Neugrin Moxon SJ anon Pfam-B_11274 (release 9.0) Family This family consists of several mouse and human neugrin proteins. Neugrin and m-neugrin are mainly expressed in neurons in the nervous system, and are thought to play an important role in the process of neuronal differentiation [1]. 21.80 21.80 22.60 21.90 21.70 21.40 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.45 0.70 -4.53 6 219 2012-10-04 14:01:12 2003-07-07 13:23:26 6 6 192 0 152 221 0 144.00 29 49.44 CHANGED MEssGAP.RpLTW-AhEQIRYL+cEFPEpWoVPRLAEGFsVSTDVIRRVLKSKFlPolEpKLKQDtKV...........hK+hu.s..h..L.usususKhLsuGp...........SlSsuLLhPGcEsuS.tspsHShALKshcpp.ppsssstppps+sKplQsLc-h.shVssssshGc.tp....ssoDspttccthsGsL.SDpcLEELptsEhGc...ss+VVQRGREFFDSNGNFLYRI ...............tth.....sP.+.+Lo.-sh-tIRhL+pp.P-paosspLA-pFplSPEsIR.RILKS.KWp.P.o.......t...-....p.c......c..ptt................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 40 74 118 +6242 PF06414 Zeta_toxin Zeta toxin Moxon SJ anon Pfam-B_12374 (release 9.0) Family This family consists of several bacterial zeta toxin proteins. Zeta toxin is thought to be part of a postregulational killing system in bacteria. It relies on antitoxin/toxin systems that secure stable inheritance of low and medium copy number plasmids during cell division and kill cells that have lost the plasmid [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.02 0.71 -5.22 40 1247 2012-10-05 12:31:09 2003-07-07 13:29:24 7 17 949 8 252 6029 2517 168.70 21 56.57 CHANGED hsphhss........pt...sp.cpPhullluGQPGAGKopltcthhpph......psshlpl-sD-hRpha................PpYpth.p...t..s.psuphsptt....ushhs-plhspAhcp+hslll-uThpss-hspc.hhcpL+.....c.tGYplplhhlsss.chShttstpRappp...t...............G.Rhlstctactsa.....psh.cslpplcppt..hhsthh.lh...........spss.splYcsphs ..........................................p................ppPh.h.hl.l.uGtsGAGK.oolh....p..h.h.hp.p.h.............tsshl....h...I.........s.....u.....D..p.....h......+.p.....h..............................................................s.p..a.....t......................p.....s..p....h...s..p........................u..s.....p....h..s....p...p....h...l....p...p......h.....h...p...p.....t...h......s....h.l....h..E....s....T.....h....p......s..h....s.......h.........p......p..hhphh+.................................p...tG...Y.p...l.p...l..h.h...l.s..s........p......t...lu.h....p....s.h.Rhtp...............................................................u.+hs.sp..p.thp...hh.....................tt.h..psh.t.th............p.h...lh...........p............t................................................................................................................................................ 0 67 159 208 +6243 PF06415 iPGM_N BPG-independent PGAM N-terminus (iPGM_N) Moxon SJ anon Pfam-B_1338 (release 10.0) Domain This family represents the N-terminal region of the 2,3-bisphosphoglycerate-independent phosphoglycerate mutase (or phosphoglyceromutase or BPG-independent PGAM) protein (EC:5.4.2.1). The family is found in conjunction with Pfam:PF01676 (located in the C-terminal region of the protein). 21.20 21.20 21.20 21.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.45 0.70 -5.01 35 2910 2012-10-03 05:58:16 2003-07-07 13:45:27 8 9 2783 9 666 2087 892 216.40 43 44.36 CHANGED IspuIc-GcFapN.slhpshscscpsssslHlhGLlSsGGVHSHhcHLhALlclAtc+Glc+ValHsFhDGRDVsPpSutsalcclpshhpc.huhG...cIATluGRYY.AMDR.DpRW-RlcpAYcshs...Gcup.phpsulpslpsu...YspshoDEFlhPolIss...t.suslcDsDuVIFaNFRsDRARQlocshsppc.FcsFpRpph...pl.paVshTpY-sslss.slAF ......................IspuIc-GpFh.p.N.tsLhsAlcps+p.....s.s.p..s.lHlhGLlSsGGV.......HSH.cHlhAh.l-lAtcp.G..s..c..+.lYlHAFLDGRDssP+SAtshlcchppthsc......lGhG......clAolsGRYY.AMDR.DpR....W.-RVc..cAYchls..............u.........c.G..p......................p.s.s.o............Alpulpsu...............Y....s......c.......s................s.......DEFVpPsllts....................susl.....pD.GDulIFhNFRsDRA..Rplocshss.........t...........-...F............c.......s............Fp...............Rpph..................sl...pa.l.shTpYs.sslps..hha............................... 0 260 470 585 +6244 PF06416 DUF1076 Protein of unknown function (DUF1076) Moxon SJ anon Pfam-B_2653 (release 9.0) Family This family consists of several hypothetical bacterial proteins exclusive to Escherichia coli and Salmonella typhi. The function of this family is unknown. 25.30 25.30 25.50 39.00 22.20 25.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.57 0.71 -4.38 12 614 2009-01-15 18:05:59 2003-07-07 14:06:50 7 1 106 2 3 296 0 106.30 47 63.59 CHANGED M.pspupspppc...............s....lpsKIspssFsVsspchpCspptlpCPITLshPEcGVFV+NutsSplCoLaDpsAhocLlpcsuhHPLSREPlossMIlu+-cChFDts+tsFsIh ..........................p...pp...............tt.p.L.sKIppCsFsVs.pchpCspphlpCPITLs.PEcGVFl+NSpsSplCoLYDpsAhscLlccshsHPLSREPIosSMIVp+-pChFDsp+tsFllh........ 0 0 0 3 +6245 PF06417 DUF1077 Protein of unknown function (DUF1077) Moxon SJ anon Pfam-B_6645 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 25.00 25.00 43.10 38.10 22.10 21.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.61 0.71 -4.80 30 340 2009-01-15 18:05:59 2003-07-07 15:38:58 7 7 291 0 233 300 6 118.90 39 66.13 CHANGED sppspppppppLthKKAW-lAluPhKslPMNlFMhYMoGNSlpIFPI.MhlhMhlhsPlKulho.ssssFcslcss....................plhhs+llYllhplhshul.........ulaKhpsMGLLPsssSDW.....Luappspph ..............ts...ptpppptLhhKKuW.-lAluPhK.plPMNhFMMYM.oG.NolpIFsI.MM.VhMhhhpPlpulhu.ssssFchhcss............s.............................phhh.+llYlhhplhslul.........ulaKhpuMGLLPTptSDWLua.t...t............................ 0 85 134 195 +6246 PF06418 CTP_synth_N CTP synthase N-terminus Moxon SJ anon Pfam-B_226 (release 10.0) Family This family consists of the N-terminal region of the CTP synthase protein (EC:6.3.4.2). This family is found in conjunction with Pfam:PF00117 located in the C-terminal region of the protein. CTP synthase catalyses the synthesis of CTP from UTP by amination of the pyrimidine ring at the 4-position [1]. 22.30 22.30 24.40 23.50 21.80 21.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.89 0.70 -5.31 98 5282 2012-10-05 12:31:09 2003-07-07 15:48:15 9 16 4818 13 1383 3730 3009 264.10 58 50.53 CHANGED sKaIFVTGGVlSSLGKGlsuAS.lGtLLcsRGhcVohhKlDPYlNVDPGTMSPaQHGEVFVT-DG.............uETDLDLGHYERFlshsho..+psNlToG+.......IYpsVIpKER+G-YLGpTVQVIPHlTsEIKppIp.....ps.sp.............t...sDllIlEIGGTVGDIEShPFlEAlRQhph-h.GpcNshalHlTLVPalpsuGEhKTKPTQHSV+-LRslGIQPDlllsRo-cs.lscsh+cKIALFCsVspcsVIsshDlp.sIYcVPLhLccpGlsphlhccL.......pLs...tps.sh..scW ...........................................TKYIFVTGGVVSSLGKGIsAAS..........LutLLcsRG...Lp............VT.h.K....l....................D....PYINVDP...GTMS......P...a......QHGEVFVT-DG.............AETDLDLGHYERFl.c.hs.....ho..+psNlTTG+..........IYp..........s.V..l.cKERRG.D.YLGs..............T.V..Q.......VI.PHITspIK-+lhcsuct...............................t...sDVlIsE.........I.........GGTV.............GDIESLPFLEAlRQhph-l.........G.+..-sshalHlTLVPYl....t..uuGEh.......KTKPTQH.SVKE..............LRu.....lGIQPDl....LlsR..o..-.c.s....ls.p.sh+pKIAL.FCsVs.t.cuVIpshDsc.oIYclPhhL.ppQGlDphlsc+LpLs....s.psshspW.............................. 0 458 871 1151 +6247 PF06419 COG6 Conserved oligomeric complex COG6 Wood V, Studholme D anon Pfam-B_10345 (release 8.0) Domain COG6 is a component of the conserved oligomeric golgi complex, which is composed of eight different subunits and is required for normal golgi morphology and localisation. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -13.04 0.70 -6.39 29 369 2012-10-03 17:31:52 2003-07-07 15:53:51 6 10 276 0 265 379 2 501.50 27 82.22 CHANGED DuLptLsphshpNThpsRRpLRh-lc+cllcsNsphlc-FtpVscpLcclssslsplspsscphppplssspppTpsllp-sssLhpp.ccplchKpplLpuFps+FhLsptEhthLssstp......sls-cFFplLs+scpI+cDCchLL..us-..spphGLclM-psspplpsAhp+La+ah...Qcca+sL.....sh-ssph...sssl+cAlphLt-RPsLFpssLDpaspuRcpsLs-sFhsALTtsss.............ust.....+PIEhsAHDPlRYlGDMLAWlHSssVsE+EsLcuLFthpscph.cs.p..hp....................ctls-lls+.lpuluRsL+.RlEpllps.....cccslhhYclsNLLpFYpshFs+l.lsssS.....LlpslpsLpchuhppahshhpsplsslpss........ht.sssD..LtPPcaLt-hLppLpslhcs..hpsShss.............sscp............sphphllppsl-PhlphsppsuttLsshpp..............slahlNsLhhhcoslssasasp........c+lp.lpsplsphtspLhphQhphllppoGLsslhshlp.h.........................p.p.htslssh.hhp.psL.tpssppLscFLPoA.LhDhpssL.ppLpSsphspslsccusctFsc.aphlcthlhsstpt.......................hh.+sstclt.slL ..................................................t.Lst.hhtsohpsRRpLRtpl-cc.lt.stphlptFttlt...c......................p................Lcpltpplpthspsspp............hppplp.ss....p....tpTts.lltpsspLptp...ppplph+pplhpsFhpcFp.....LotpEhthLpss...................sl....sppFFpsLt+sppI+pcsc.hLL..ttp.....................ppphGl-l.M-phshh....ptuhp+Lh+Wh..........................ptchcsl...................s.-ssph.................sshlppuhp.hL.pc.R.P.sLa.p.sl-phupsRcpsl.ctFhpALTtuss..........................................st.............+PIEh..pu.HDPlRYlG..........DMLAWlHpshssE+EhlpsLh.....h.tst.p...................................................................................pthppllsc.hpuls+sl+.RlEpll.s................................p....s.lhhaplssLLtFYp.h....hpth..lttps...............L..hpsltphp..s.phahs.hp......phttl.tp.................................ss.D..L.Ps...l.phlt.Lhtlhts...hpsohh....................ttpp.......................................tph..llpthl-Phlphsp.t..u..tt...hts.p...................................................thahlNsh.hhpsslt.aphsp.............phhp.lpttlpt.hppLhp.php.llppsGLt.hhphlp.........................................tshsp...hp..tl..tth..phstaLs.ts.....hh..pl.thl.ssthhpplhppshp.hst.at.l.thlht..p....................................hh.hs..plt.h.............................................................................. 1 104 152 220 +6248 PF06420 Mgm101p Mitochondrial genome maintenance MGM101 Wood V, Studholme DJ anon Pfam-B_35151 (release 8.0) Family The mgm101 gene was identified as essential for maintenance of the mitochondrial genome in Saccharomyces cerevisiae [1]. Based on its DNA-binding activity, and experimental work with a temperature-sensitive mgm101 mutant, it has been proposed that the mgm101 gene product performs an essential function in the repair of oxidatively damaged mitochondrial DNA [2]. 27.00 27.00 36.30 35.80 18.80 18.70 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.17 0.71 -4.93 27 148 2009-01-15 18:05:59 2003-07-07 15:57:53 7 2 140 0 116 150 1 168.10 61 59.67 CHANGED lDWspSaaGLuppPFocEsuclLhsPlsspDlEIKPDGllYLPEIKYRRILN+AFGPGGWGLAPRG-olVs.....s+hVoREYALlspGRLVSlARGEQsY.F.sscuIPTAoEGCKSNALMRCCKDLGIASELWDPpFIRpFKpcaspEsasEHl...sTKKKKKlWh+K...ppphsYPaK .....................................lDWopSaaGLuspPFscEssclLltPlsscDlEI.KPDGllYLPEIKYRRILN+AFGPGGWGLsPRuEolV.o......sKhVTREYALlspGR..........LVSlARGEQ-YF...u.cG...IPT..AoEGCKSNALMRCCKDLGIASELWDPpFIRcFKtpaspEsFVEHV...sTK+K+KlWhRK...-cplpYPaK............................ 0 40 69 100 +6249 PF06421 LepA_C GTP-binding protein LepA C-terminus Moxon SJ anon Pfam-B_425 (release 10.0) Family This family consists of the C-terminal region of several pro- and eukaryotic GTP-binding LepA proteins [1]. 22.00 22.00 23.00 23.70 21.70 21.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.35 0.72 -4.14 45 4879 2009-01-15 18:05:59 2003-07-07 15:58:45 7 24 4651 11 1211 3357 2420 106.40 65 17.86 CHANGED lNG-hVDALShIVH+spAhp+GRplspKLK-lIPRQ.F-lsIQAuIGu..KIIARETIKAlRKs....VlAKCYGGDloRK+KLLEKQK-GKKRMKplGsVElPQEAFlulL+h ................lNG-pVDALShIVH+DpA.pRG+tls-KLK-LIPRQ.F-lsIQAAIGs...+IIARp.....TlKAlRKs....VL.....A.....K....CY..GGD.l.SRK+KLLEKQKcGKK..R..M..K..p..l.Gs...VElPQEAFlAlL+....................... 0 417 786 1029 +6250 PF06422 PDR_CDR CDR ABC transporter Gauthier C, Studholme DJ anon Pfam-B_1005 (release 8.0) Family Corresponds to a region of the PDR/CDR subgroup of ABC transporters comprising extracellular loop 3, transmembrane segment 6 and linker region. 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.33 0.72 -4.36 167 1227 2012-10-03 10:13:34 2003-07-07 15:59:49 7 44 184 0 900 1245 0 97.70 26 7.84 CHANGED shlPs.Gs..sYt.shs.ssppsCu.ssGuhsGps.....hVsG-s.YlptuasYphuHhWR.....NaGIllAFhlhFhslhllssEhhpsspupG-sLlFpRuph.pt......................tttt.....sDtE ..................................hlPt..G..s...sYt...shs.....tppsCs..hsGuhs...Gp..s......hVsGc.s.YlptsasY............phuH..hWR......................NaGIlhuFhlh.Fhhhh.lls.s..Ehh....p...h....tts...t.u.p.h.lla.+sph.th....................ptt............................. 0 239 480 779 +6251 PF06423 GWT1 GWT1 Wood V, Studholme DJ anon Pfam-B_15982 (release 8.0) Family Glycosylphosphatidylinositol (GPI) is a conserved post-translational modification to anchor cell surface proteins to plasma membrane in eukaryotes. GWT1 is involved in GPI anchor biosynthesis; it is required for inositol acylation in yeast [1-2]. 29.60 29.60 29.60 29.60 29.50 29.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.07 42 299 2009-01-15 18:05:59 2003-07-07 16:01:32 7 11 251 0 222 304 3 145.70 30 28.81 CHANGED tNREGlhShh.GYlAIFLhGpsh.Ghhllspphsh.......................................ttthhphlhpLhhhshlhhhlhhls.......thshslSRRlANhsYVlWVsuhNsthLhhasll-phhhsshh.....................................sspllpAhNpNGLslFLlANlLT..GhVN .........................................................tNREGlhShh.GYluIaLhGh...sh.Ghhlhspptp...........................................................................................................pphhphh..hpLhh.huhhhhhhhhlsp........shs.slSRRhANhsYll.WlhAhshh...hLhh...hh.lh-t...l...hh.sh.................................................................................sstllpAhNpNsLhhFLlANllT..GlVN................................. 0 72 114 179 +6252 PF06424 PRP1_N PRP1 splicing factor, N-terminal Wood V, Studholme DJ anon Pfam-B_6467 (release 8.0) Domain This domain is specific to the N-terminal part of the prp1 splicing factor, which is involved in mRNA splicing (and possibly also poly(A)+ RNA nuclear export and cell cycle progression). This domain is specific to the N terminus of the RNA splicing factor encoded by prp1 [1]. It is involved in mRNA splicing and possibly also poly(A)and RNA nuclear export and cell cycle progression. 20.90 20.90 20.90 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.50 0.71 -3.83 33 327 2009-01-15 18:05:59 2003-07-07 16:04:43 7 42 271 0 240 327 4 141.10 45 16.43 CHANGED sPsGYVuGlGRGATGFoTRuDhGsu+t.........................................pppp-cDsschs-............stst.uLFupst.h..Dc-DcEADcIYppIDc+MccRR+pc+Ep+pcp-hpchctpp.......PplppQFuDLKRsLusVo---WtsIPEsGDhTtK ..................................AP.sYVsGlGRGATGFTTRSDl..GPAR-uss..t.t...............................................tttppp.ppp-c-.-cchp-...............s.st..uLF.uss...Y...DcDD-EADtIYp.tlDc+..MDcRRKc+REtRp+pEhEcactpp..........PKIppQFuDLK.............RpLusVo--EWtsIPElGDhpt+............................ 0 85 135 200 +6254 PF06426 SATase_N Serine acetyltransferase, N-terminal Wilbrey A, Studholme DJ anon Pfam-B_1192 (release 8.0) Domain The N-terminal domain of serine acetyltransferase has a sequence that is conserved in plants [2] and bacteria [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.29 0.72 -3.78 111 1759 2009-01-15 18:05:59 2003-07-07 16:08:33 9 11 1568 37 424 1080 487 92.10 42 34.01 CHANGED lWppl+pEAcpssppEPlLuualauoILpHsolcsALuapLusKLuss.phs..s.hLp-lhpcAhts.cP............plspusp..sDlhAlh-RDPACpp.ahpPlLaaKGFpAlQu ...........................................lWppI+tEActhsppEPhLuuFhauol.LpHpsLtsALuahLAs+....Lsss......h...s...u.......htlc-lhcc.shts...cP.................phhtsst..sDIpAVhpRDPAscp.aspPLLYhKGFHAlQs................................ 0 116 251 340 +6255 PF06427 UDP-g_GGTase UDP-glucose:Glycoprotein Glucosyltransferase Studholme D, Wood V anon Pfam-B_4648 (release 8.0) Family The N-terminal region of this group of proteins is required for correct folding of the ER UDP-Glc: glucosyltransferase. 21.20 21.20 22.60 22.50 20.90 20.60 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.20 0.70 -5.03 20 379 2009-01-15 18:05:59 2003-07-07 16:09:48 6 11 262 0 258 355 5 194.50 35 14.04 CHANGED SDhhhphsuhh...ush.ppcs.....Rhsh.phls.spposlpl..s.tppcs........shhclsAllDPlo+puQKlssllphLschl..slsl+lahNPps....phs-lPlKsFYRaVlp.sp.pF.sssGthss.PtAhFsslPsspLLThsl-sP-uWlVpshcu..paDLDNIhLpclsss..........................VsApYcLEplLlEGaspDhssssPP.RGLQLpLs..opssshhs..........DTIVMAN ....................................................................t...........Rhph.phhp...spao.slpl..s.....ps.cs.............hhcllAllDPlocp.AQ+hsslLh.lLpplh..sspl................+...lahN.sps.......cls.-..hP..l.K.p........FYRaVL-.sp..pF.ss.s...s....phs..t....PhApFh.slPpssLLTlshpsPpuWhVp.....shco..saDL.DNIhLpplpss............................VpA.YELEalLlEGHshD.......h....ss.s...p......P....P.RGLQhhLG...Tp..pp.Phhs..............DTIVMAN.............................. 0 86 135 206 +6256 PF06428 Sec2p GDP/GTP exchange factor Sec2p Wood V, Studholme D anon Pfam-B_10665 (release 8.0) Family In Saccharomyces cerevisiae, Sec2p is a GDP/GTP exchange factor for Sec4p, which is required for vesicular transport at the post-Golgi stage of yeast secretion [1]. 22.30 22.30 22.60 22.90 22.10 22.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.33 0.72 -4.16 20 379 2009-01-15 18:05:59 2003-07-07 16:11:44 6 8 194 24 259 345 1 101.60 31 20.52 CHANGED scltccccp+tpsEpppspLppElE-LTASLF-EANcMV....usA+tEppshchKNcpLccQL+Ep-sll-sLQtQLpsLKplhhs.....................hpspp.....spppptsst ........................p.pLp.ccppppchpph+pplppELE-LTAuLF.............EEApcMV....tpAp............h+p...sp.......hE+pLcEscscl-sLQtplttLKplh.p.....................................pt........................................................... 0 71 121 196 +6257 PF06429 Flg_bbr_C DUF1078; Flagellar basal body rod FlgEFG protein C-terminal Moxon SJ anon Pfam-B_807 (release 10.0) Domain This family consists of a number of C-terminal domains of unknown function. This domain seems to be specific to flagellar basal-body rod and flagellar hook proteins in which Pfam:PF00460 is often present at the extreme N terminus. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.41 0.72 -4.06 463 11734 2009-01-15 18:05:59 2003-07-07 16:15:12 8 19 2105 7 2780 7802 1705 79.40 24 23.35 CHANGED ssLp...phussL.a...ttss.............Gs..s...hs.s........ssss..............p...l..tpGhL..EsSNVsslcEhsshIsspRsYEhssKhlpsuDphhpp.sspl .........................................................................................t...........h........................t..........................s.ss.....................tl....tpGhl....E....tSN.V.sl.spEhsshIpsQRuYpu.Ns+sls.o.s.cphhpphlp....................... 0 883 1749 2237 +6258 PF06430 L_lactis_RepB_C Lactococcus lactis RepB C-terminus Moxon SJ anon Pfam-B_717 (release 10.0) Family This family consists of the C-terminal region of RepB proteins from Lactococcus lactis (See Pfam:PF01051). 20.30 20.30 22.60 47.70 19.90 18.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.65 0.71 -4.06 13 139 2009-01-15 18:05:59 2003-07-07 16:23:41 7 3 51 0 9 140 2 119.00 64 33.07 CHANGED KRhADDNSYKL-..DcsY.EDKspKEpsEcpLhhcAMcS.YTKLLhEpFLLSshDhTDsulMAGLQKNVYPLYDELK-LRGLNGVK-HLSYVuSKQEsYSKRNlAKYLKKAIEQYLPTVKRQDL ...............KRhADDNSYKL-..DpsY.csKtpKpcsEctLhh.pAMcS+YT+LLhEshLLSsh-hpDTslMuGLQcpVYPLYDELK-LRGLNGVKDHLSYVuSKpEsY..S..K+N.lAKYLKKAIEQYLPTVKRQDL....... 0 1 1 7 +6259 PF06431 Polyoma_lg_T_C Polyomavirus large T antigen C-terminus Moxon SJ anon Pfam-B_214 (release 10.0) Family \N 22.40 22.40 23.00 24.00 21.10 22.30 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.37 0.70 -5.82 3 1776 2012-10-05 12:31:09 2003-07-07 16:44:43 6 5 48 37 0 501 0 116.30 75 64.60 CHANGED TKQVSWKLVTEYAlETKCEDVFLLLGMYLEFQYNsEpCKKCpKKDQPsHFKYHEKHYANAtIFADSKNQKSICQQAVDTVLAKKRVDSLHMTREEMLTERFNaLLDKMDLIFGAHGNAVLEQYMAGVAWLHCLLPKMDSVIYDFLKCIVaNIPKKRYWLFKGPIDSGKTTLAAALLDLCGGKALNVNLPLERLNFELGVAIDQFMVVFEDVKGTGAESRDLPSGHGINNLDsLRDYLDGSVKVNLEKKHLNKRTQIFPPGIVTMNEYSVPKTLQARFVRQIDFRPKsYLRKSLpsSEFLLEKRILQSGMTLLLLLIWFRPVADFAsuIQSRIVEWKERLDpEISMYTFS+MKaNVuMG+sILDasREEDSEsEDSGHGSSTESQSQCsSQVS..-sSGsDoQSQsS......aHlCKGFQCFc+P .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................SSTESQSQC.SQVS..EASGADTQEHCT....................YHICKGFQCFKKP..................................... 0 0 0 0 +6260 PF06432 GPI2 Phosphatidylinositol N-acetylglucosaminyltransferase Wood V, Studholme DJ anon Pfam-B_33496 (release 8.0) Family Glycosylphosphatidylinositol (GPI) represents an important anchoring molecule for cell surface proteins. The first step in its synthesis is the transfer of N-acetylglucosamine (GlcNAc) from UDP-N-acetylglucosamine to phosphatidylinositol (PI). This step involves products of three or four genes in both yeast (GPI1, GPI2 and GPI3) and mammals (GPI1, PIG A, PIG H and PIG C), respectively. 23.90 23.90 32.70 24.00 23.80 23.50 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.95 0.70 -5.06 37 324 2009-01-15 18:05:59 2003-07-07 17:06:08 6 9 277 0 237 310 4 271.90 29 80.42 CHANGED W+KlLY..hcQsYPDNYo..D.poFLppLppNssltpa.........sahpllh-shslsQplssVslhhllFshlhppp..............................................lss.slhhhssshshhualh...hthht.t.s...................................................................................................................hhpsh+ohlllhhhlhhLSPlL+oLTcohSoDoIauloshlhlhalhhaDY..u.............htsh.stt...................htsslShNhulhuullLASRLsoshpVFshllhulplF...sLhPhhppp.l+ths.hhphhhshhhshhshhsh.............hhhshhhhlhahhhllhlshlsPh.......ahltlQpa.............KspIpGPWD ..............................................................................................WcKlLa...+Q.sa..PDNYs..D.tsFLcpLpcNhphp.Y.........................sah.tllhcssslsQplsslslahhha.hhhhptt........................................................................................l.sPh..lhhhsshh.shlGalh...hthh..t.t......................................................................................................................................................h.phhtsl+.osl...lh..hhhhhsLS..PlL...+oLTcohSoDoIaAho.hhhhlhp....lh.haDYu...........................................ht...st............................sslShNsAl.huossLASRLs..........oshcsFshhhhu.lplF...sLhPhhppp.l+...th....s....t.ht.hh..lshhh.sh.hshhsl....................h.hshhhhhh.hh..h..h..h..h..hh...hh..sPh.......hhlt.hQpa.............KppltGPWD................................... 0 79 127 193 +6261 PF06433 Me-amine-dh_H Me-amine-deh_H; Methylamine dehydrogenase heavy chain (MADH) Finn RD anon Pfam-B_20644 (release 9.0) Domain Methylamine dehydrogenase (EC:1.4.99.3) a periplasmic quinoprotein found in several methyltrophic bacteria. Induced when grown on methylamine as a carbon source MADH catalyses the oxidative deamination of amines to there corresponding aldehydes. MADH is a hetero- tetramer, comprised of two heavy chains (H) and two light chains (L). The H-chain forms a beta-propeller like structure [1]. 19.50 19.50 19.50 19.50 19.40 19.20 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.11 0.70 -5.81 5 83 2012-10-05 17:30:42 2003-07-08 09:40:06 6 2 76 105 32 104 19 332.60 33 86.65 CHANGED DARRVYVhDPuHFAAlTQhYsIDGcouRlLGMTDGGFLPNPVVASDGSFFApASTVYSRIARGKRTDYVEVlDPpTapPIADIELP-uPRFLVGTYsWMsuLTPDNKsLLFYQFSPuPAVGVVDLEGKuFcRMlDVPDCYHIFPouNsoFFMHCRDGSLt+Vuaus-GsoK.hKsTEVFHsEDEYLINHPAYSs+SGRLVWPTYTGKIFQADLSupcAcFLsPIEAFTEAEKADsWRPGGWQQVAYHRApDRIYLLuDQR-cW+HKsASRFVFVlDAcTGKRLsKIELGHEIDSIuVSQDAKPhLYALSsGsKTLaIFDAsTGKELuSVDQLGRGPQlIhTuD ............................................tt....hhl.D....t.h....s.+.laV...hD..uDs....t+..l..LG.lsuuasss.hs.l.o.s..Dt+phYlAsTaasRss+GpRTDllphaDssTLphsuEI.lPst.+t...s.s.s.p.tsh.uhosDG+hhaVhNhTPusSVoVlDhsu+KV.l.p.pl-sPGCshl..Y..P..s..u..s..s.s.Fo.uLCtDGshhsVsLDssGK.s.s...pp.p..o..s..hF...s.sc.D.s.lFppssh...st..hsuphhF.soYsGpVhsschous.s...s.....p....h....t..t.shp......l........h.s-u.c.+............t...........t......sWRPGGaQ.hAlpt.....spp+hY.V.L.M.H.pG.s.p...toHKDsGTpVWVhDhpo+cRlsRhsL.t.p...t....ss...SlsV..op.D-.cPllaulss.ss........uslhVhDAt.o..G.c.h+shc.tlGps........hh.................................. 0 7 17 24 +6262 PF06434 Aconitase_2_N Aconitate hydratase 2 N-terminus Moxon SJ anon Pfam-B_2605 (release 10.0) Family This family represents the N-terminal region of several bacterial Aconitate hydratase 2 proteins and is found in conjunction with Pfam:PF00330. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.27 0.71 -4.91 94 1563 2012-10-01 19:37:30 2003-07-08 12:39:19 8 6 1505 2 323 1212 908 208.10 67 25.23 CHANGED loVFKV.sGETNTDDLSPAs-AaSRPDIPLHAhAMLcschss............slppItcLK.pKGhP....lAYVGDVVGTGSSRKSAsNSV.LWahGcDIPaVPNKRsGGlllGGp.IAPIFFNTsEDSGAL........................PIE.sDVsplphGD..VIsIh..PacGclpc.....tsG...................................................................................................................c...llupFpLpPsslhDEVRAGGRIPLIIGRuLTsKARpsLGL...ssSslFtpP......ppPss.su+Ga ...................................................................................lTVFKV.sGETNTDDLSPAP.DAWS..RPDIPLHALAMLKNsR-GItP-...sG.hGPl+pIEtLp.pKGaP....lAYV.GDVVGTGSSRK.SATNSV.LWFhG-DI.PaVPNKRuG..Gl..sLGGK.IAPIFFNTMEDAG..A..L........................PI..E...l..D..V.Ss.LsMGD..VI..D..lY..PYcGclpp.....tsG.........................................................................................................................E....llAsFcLKo-VLl.DEV.RAGGRIPLIIGRGLTsKAREuLGL...st..S..-lFRpscs.sA-.Ss+Ga........................................................... 1 74 178 263 +6263 PF06435 DUF1079 Repeat of unknown function (DUF1079) Moxon SJ anon Pfam-B_1911 (release 10.0) Repeat This family consists of several repeats of 31 residues in length and seems to be exclusive to Moraxella catarrhalis UspA proteins. The UspA1 and UspA2 proteins of Moraxella catarrhalis are structurally related and are exposed on the bacterial cell surface where can function adhesins [1]. This family is commonly found with the Pfam:PF03895 family. 21.00 21.00 23.40 21.80 20.50 19.40 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.45 0.72 -3.94 4 220 2009-01-15 18:05:59 2003-07-08 12:51:55 6 19 12 4 9 218 0 29.60 52 16.20 CHANGED lushhc.tcsascpQoEsIcuLsKASutNTs lssh.ELtpQ.sp+po-.IcsLsKASutNTp.... 0 0 9 9 +6264 PF06436 Pneumovirus_M2 Pneumovirus matrix protein 2 (M2) Moxon SJ anon Pfam-B_2318 (release 10.0) Family This family consists of several Pneumovirus matrix glycoprotein M2 sequences. This family functions as a transcription processivity factor that is essential for virus replication [1]. 25.00 25.00 134.20 134.00 22.00 17.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.90 0.71 -5.00 4 75 2009-01-15 18:05:59 2003-07-08 13:06:16 6 2 17 1 0 72 0 156.70 58 82.92 CHANGED pWPsHsLLVRpNaMLNplL+shDRo.DsLS.ISGAuc.DRTp-asLGsssVlpsYIsu.sNITKpSAChuh.pllppLppsDlKphRDpcVssS.+ltlaNhVlSYI-.s.KNstphIp.LKRLPt-hLKKhhKhhI-lpsulphpNssssslpDttNs pWPs+sLLlRpNaMLNplL+shD+S.DsLS.ISGAuc.DRTp-asLGsssVlpuYIss.sNITKpSAChuhppllppLppsDl+phRDNc.ssS.+VtlaNhVlSYlE.s.KsstphIp.LKRLPt-hLKKhhK.hlDlptuls.pNsppsslpDos..s... 1 0 0 0 +6265 PF06437 ISN1 IMP-specific 5'-nucleotidase Studholme D, Wood V anon Pfam-B_43910 (release 8.0) Family The Saccharomyces cerevisiae ISN1 (YOR155c) gene encodes an IMP-specific 5'-nucleotidase, which catalyses degradation of IMP to inosine as part of the purine salvage pathway. 25.00 25.00 84.50 84.40 23.60 19.20 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.37 0.70 -5.76 18 157 2009-01-15 18:05:59 2003-07-08 13:32:53 6 4 137 0 127 156 1 373.80 47 89.08 CHANGED MoSRYRVEY......tLKsHR+DpFI-W.............IKGLLAV.PFVLHut..t...................t....s.tthspcs+cRYuEIFpDVEpLIsc+Ithspp.sp.................spSRL+hLVPSIGsFFT.LPLp-AFhhpDc+RsISpRRhVuPSFNDlRhILNTAQlhuLsctt....................pLcLlTFDGDVTLY-DGtSLssssPVIsRLl.................cLLppslplGIVTAAGYsc...Ap+YhpRLpGLLcAlpsss.sLsssQKpNLlVMGGESNYLF+assss...stsLp.lscccWlLPcMpsWsppDIsplLDhAEpsLpchhppLsLP....ssIlRK-RAVGIlP..........pssh+lhR.EpLEEhVLslQppLEp...........................stsu..pcl.FCAFNGGsDVWVDIGDKuLGVpsLQ+aa...........t..ssIpsocTLHVGDQFhSsG.ANDFKARLuusTsWIAuPpETVplL ........................................................................p.....................pst..................................................lcuLLAs.PFVLau..t..........................p...hstcscppYs-IhcDlEthIp-+.Ithppt.st...................................t.S+LphLVPolG.sFFT.LPLpcAFhhpDppRhISpRRaVsPSFNDlRhILNoAQlhulsc.t.............................tLcLlTFDGDVTLY-DGtsLp..sssPlIs+ll.................cLLcpshplGIVTAAGYsp...As+Y.pRL+GLL-Alpss....sLsspQKpsLhlMGGESNYLFcasts...........htLp.......lsc...pcW.....h......hs...cMtsW.....s.c.....p..DIpplLDhAEpsLpphhppLsLP....ssIlRKcRAVGIlP...............................ssh+h.R.EpLEEhVLslQppLEh..............................................st.su..pclPFCAFNGGs...........DVaVDIGsKuhGVpshQpaa..............ssIpsscTLHVGDQFhssG.uNDFKAR..............susThWIAsPtETVplL......................... 0 35 73 108 +6266 PF06438 HasA Heme-binding protein A (HasA) Finn RD anon Pfam-B_27216 (release 9.0) Domain Free iron is limited in vertebrate hosts, thus an alternative to siderophores has been developed by pathogenic bacteria to access host iron bound in protein complexes. HasA is a secreted hemophore that has the ability to obtain iron from hemoglobin. Once bound to HasA, the heme is shuttled to the receptor HasR, which releases the heme into the bacterium [1]. 26.30 26.30 27.30 32.80 25.50 26.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.36 0.71 -4.92 7 96 2009-01-15 18:05:59 2003-07-08 15:17:58 7 2 67 21 10 66 4 204.00 39 99.08 CHANGED MohSlsYsusauuaolusYLs-WuusFGDlsHtsups..sssNTGGFssGs..hsGoQYAlpSosss.tAFIAsGs....L+Yoh..sPu........HTLaGpLDoluhGcsL...uGGsu.us..asLss.-VoFssLsLsu.hupG+s......G.sVHpVlYGLMpGsouuLtsslsslLt..shGlSlNsTFDpluAAsss.s...husss.VshVGVQ-sspDhhLAA ..........................MosoIpY.sopausaoloSYLpcWusp....FGD..lsptsups...........ps....pGuFssus..........FsG..o..QYAl..s...Sop.......u..s.s..tu.h.......I....ApGD.......L+Yoh....Pp..............HThaG.p..l.DoLphGcsL...........s.su.su.sG....hpL-phclsFs.sLDlsu-h-su+shhp.........shpG.shHKulYGLM+GNssP.hL-hl........c....................ApGIslsTshKDluIASQhtsss..huDA..P.hlDTVGVh-.s.p-hLLAA......................... 0 1 3 5 +6267 PF06439 DUF1080 Domain of Unknown Function (DUF1080) Yeats C, Eberhardt R anon ADDA_10682 Family This family has structural similarity to an endo-1,3-1,4-beta glucanase belonging to glycoside hydrolase family 16. However, the structure surrounding the active site differs from that of the endo-1,3-1,4-beta glucanase. 26.30 26.30 26.30 26.40 26.20 26.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.51 0.71 -4.40 189 1312 2012-10-02 19:29:29 2003-07-08 16:01:14 6 119 323 12 583 1442 1367 194.00 21 46.62 CHANGED sh...tsLF..s........Gc..sL.sG....Wp...sh..................................................................h.D.G.........................hlh.........................ssptsshlhoc...p.pas.sFpLcl-a.+.................ss..ss..............NSGlhh+sp........................................h..uh.EhQlhs......sttt..........................ptsGulY............................................................st...spsshssscWNphcI.hspusp............lp.h.hlNGhhlhchsphpst.t..............................ppGhIuLQ.sHu..........s............lp..FRNIpl+ .................................................................................................................................................hh.LF.s.Gp..sh.su....Wp...thttt...............................................................................................................lp-.G.........................hlh.........................tssssu..lh...o.....p.....c...pa.s.....sF.pLcl-a.+h...............st..su.............................NS.G....lhhpsp......................................h..uh.EhQlhs.....sttt.........................ttsGu.lY.................................................................st..spsshs...sG.cWNp....hcI..hs.p.usp..............lp..h.hl..N.G..h..hl..hchsphss.h................................................tpGhIuLQ..sHu............s.............tlt...FRNlplc....................................... 0 323 505 562 +6268 PF06440 DNA_pol3_theta DNA polymerase III, theta subunit Finn RD anon Pfam-B_27631 (release 9.0) Domain DNA polymerase III (EC 2.7.7.7) is comprised of three tightly associated subunits, alpha, epsilon and theta. This family contains the theta subunit. The structure of the theta subunit shows that the N-terminal two thirds is comprised of three helices while the C-terminal third is disordered [1]. The function of the theta subunit is poorly understood, but the interaction of the theta subunit with the epsilon subunit is thought to enhance the 3' to 5' exonucleolytic proofreading activity of epsilon [2]. 32.30 32.30 32.40 36.10 30.60 32.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.41 0.72 -4.39 11 627 2009-09-13 11:27:20 2003-07-08 16:11:56 6 1 540 7 57 179 3 74.00 76 95.14 CHANGED M..saNLApLScE-hDKlNVDLAASGVAaKERhNMPVlsE.VEREQPEHLRpYFRcRLtaYRptSppLs+hs.....-P+h .......pNLAcL-QsEMDKVNVDLAAAGVA...FKERYNMPVIAE.sVEREQPEHLRuWFRERLIAHRLASVsLSRLPYEPK........... 0 2 12 37 +6269 PF06441 EHN Epoxide hydrolase N terminus Yeats C anon ADDA_11591 Family This family represents the N-terminal region of the eukaryotic epoxide hydrolase protein. Epoxide hydrolases (EC:3.3.2.3) comprise a group of functionally related enzymes that catalyse the addition of water to oxirane compounds (epoxides), thereby usually generating vicinal trans-diols. EHs have been found in all types of living organisms, including mammals, invertebrates, plants, fungi and bacteria. In animals, the major interest in EH is directed towards their detoxification capacity for epoxides since they are important safeguards against the cytotoxic and genotoxic potential of oxirane derivatives that are often reactive electrophiles because of the high tension of the three-membered ring system and the strong polarization of the C--O bonds. This is of significant relevance because epoxides are frequent intermediary metabolites which arise during the biotransformation of foreign compounds [1]. This family is often found in conjunction with Pfam:PF00561. 22.50 22.50 22.80 22.80 22.40 22.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.64 0.71 -4.15 143 1051 2012-10-03 11:45:05 2003-07-08 17:03:29 7 12 454 6 597 1123 575 110.20 35 27.04 CHANGED pPF....plplscspLs-L+pRLptoRa...Pspts.sss............hp.hGsshshlccLscYW............................ts.sa..D................WRptEscLN...paPpFpTpI........-..................GlcIHFlHh+S......pps.sAl.PLllsHGWPGShhEFhclI ...................................pPFclplsppplsDL+pRLptoRh.....sst.....sss................ap.hG.hshshlcclscYW...................................ps..sY..D..................WRptE..ttLN...p.aPpFpTpI.......-..........................GlsIHFlHh+o.............................tps.sAl..P...L.l.lsHGW......PG..ShhEFhclI................................................................................ 0 184 337 490 +6270 PF06442 DHFR_2 DHFR; R67 dihydrofolate reductase Finn RD anon Pfam-B_27527 (release 9.0) Family R67 dihydrofolate reductase is a plasmid encoded enzyme that provides resistance to the antibacterial drug trimethoprim. The R67 dihydrofolate reductase does not share significant similarity to the chromosomal encoded dihydrofolate reductase [1]. 25.00 25.00 27.20 92.50 22.80 22.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.64 0.72 -4.35 3 33 2009-01-15 18:05:59 2003-07-08 17:07:36 6 1 13 8 0 18 0 78.00 81 96.15 CHANGED MscSSNEVSsPVAGpFAFPSNATFGLGDRVRKKSGAAWQGQIVGWYCTsLTPEGYAVESESHPGSVQIYPVAALERVA ....................M-pusNEVSsPVAGpFAFPSNATFGhGDRVRKKSGAAWQGQIVGWYCTpLTPEGYAVESEuHPGSVQIYPVAALERls.. 0 0 0 0 +6271 PF06443 SEF14_adhesin SEF14-like adhesin Finn RD anon Pfam-B_36358 (release 9.0) Family Family of enterotoxigenic bacterial adhesins. 21.90 21.90 22.10 22.70 21.60 21.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.03 0.71 -4.60 2 40 2009-01-15 18:05:59 2003-07-08 17:32:33 6 1 19 7 0 37 0 129.40 63 98.20 CHANGED +St.ulhlhALhsCGuApAAshVGshAsVpAslshuAQNThsAsWoQDsuhoGsuVsAhQKlGTLsIphTGsHsuV.luGcssu.SGGlhTlPFhsstGQslFRGRhput.hps.ssshlst.u.GW+lsuoQ-shNlslpsF.stsslPAGpaTATFYlQQYQs ................................................ssTluAQNTTSANWSQDPGFTGPAVAAGQKVGTLSIT.ATGPHNSVSIAGK.GASVSGGVATVPFVDGQGQPVFR.GRIQGANIN..DQANTGIDGLA.GWRVASSQEThNlsl.............................. 0 0 0 0 +6272 PF06444 NADH_dehy_S2_C NADH dehydrogenase subunit 2 C-terminus Moxon SJ anon Pfam-B_1662 (release 10.0) Family This family consists of the C-terminal region specific to the eukaryotic NADH dehydrogenase subunit 2 protein and is found in conjunction with Pfam:PF00361. 21.50 21.50 21.60 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.06 0.72 -3.66 136 25512 2009-09-10 16:36:32 2003-07-08 17:37:30 6 3 12030 0 41 25653 0 54.00 43 16.22 CHANGED LaFYLRlsYshsLThsPsssssth.pW..Rhps.pp....shhls.hhhsholhhL.PlsPhlh.s .....LFFYLRLuYssTlTlsPs.ossphp.pW....+hpp..sp.......shh.lu...hh...sshShhLL.Pl.o.Phlh..................................... 0 5 8 18 +6273 PF06445 GyrI-like AraC_E_bind; GyrI-like small molecule binding domain Finn RD, Anantharaman V anon Pfam-B_36124 (release 9.0) Domain This family contains the small molecule binding domain of a number of different bacterial transcription activators [1].\ This family also contains DNA gyrase inhibitors. The GyrI superfamily contains a diad of the SHS2 module, adapted for small-molecule binding [3]. The GyrI superfamily includes a family of secreted forms that is found only in animals and the bacterial pathogen Leptospira [3]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.75 0.71 -4.19 88 6066 2012-10-02 11:08:51 2003-07-08 18:33:54 10 24 2274 26 1074 4279 124 155.40 16 64.70 CHANGED hphclhphsshplhulcppsshsp............lsphh.pplhthttppth.................tssshhulhhspst......spspphchthshthst........................ptstshphhplsuu.paAshpap.Gsh......ssltpshpplastalspssh....thtsssshEhYhss.........ttpphhs.-lhlPlp ......................................................................................................................................phplhph.s.php.lh...u....h.....p.t..p...h.s.p.t...............lsphh..p..ph..ht.hhhppth......................ssth.hul..h.h.s.ps.........t..t..sp....p...h..p.h..p..h..s.h.ss.sp........................................psspshp..h.pl.s..u.G...pYA.sh.p.hp...Gsh...........psht.c..shtt.lap..alsp.p.sh......................phtss.s...p....h....E..h..Y..hs..s.t..........pstphph..-lalPl.................................................................. 0 387 718 907 +6274 PF06446 Hepcidin Hepcidin Finn RD anon Pfam-B_41732 (release 9.0) Domain Hepcidin is a antibacterial and antifungal protein expressed in the liver and is also a signaling molecule in iron metabolism. The hepcidin protein is cysteine-rich and forms a distorted beta-sheet with an unusual disulphide bond found at the turn of the hairpin [1]. 21.90 21.90 26.30 25.80 19.90 19.70 hmmbuild --amino -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.71 0.72 -3.56 20 197 2009-01-15 18:05:59 2003-07-09 10:43:59 7 1 91 5 26 181 0 55.70 42 64.69 CHANGED chEEhtspDsssAttpptss-shth...hRpKRps.phshCtFCCsCC..+hpG..CGhCC+F .....................pLEEsh.upDs.ssA.stpEhsh-Shhhs.t.Rp..KR.p......t..hC+.FCCsCC..phps..CGhCC+F......... 0 1 3 13 +6276 PF06448 DUF1081 Domain of Unknown Function (DUF1081) Yeats C anon ADDA_12201 Family This region is found in Apolipophorin proteins. 25.00 25.00 25.00 25.20 21.90 23.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.51 0.71 -4.21 22 170 2009-01-15 18:05:59 2003-07-09 11:20:46 6 14 99 0 78 163 0 116.10 27 3.77 CHANGED sschppp...sCh.shlc.hhGlphChsh..shssl.ps......puh..........PLusPshhcl.lcts-.phctYphpushctpp....ss.....cpl+htlcs.Gupss+-upsslpas+cpcsh............................louclptsshsuc .............+.shssCp..shhs....GLphCssh..sas..sssss.................sus.........shaPLoGssphpl.Lc.ss.phcpYshphsachpc....ts........................cshchshps..G.........sp..pcsshshp..hspppts.............................hp.pl....h...t.................................................................... 0 21 28 55 +6277 PF06449 DUF1082 Mitochondrial domain of unknown function (DUF1082) Moxon SJ anon Pfam-B_2173 (release 10.0) Family This family consists of the C-terminal region of several plant mitochondria specific proteins. The function of this family is unknown. This family is found in conjunction with Pfam:PF02326. 25.00 25.00 44.80 43.80 23.00 20.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.51 0.72 -4.08 5 109 2009-01-15 18:05:59 2003-07-09 11:38:49 6 3 77 0 7 82 0 49.70 77 30.50 CHANGED DLLGK+RKIThISCFGEISGSRGMERNILYLISKSSYST.ss..oGWtIT...C+N DLLGKR.R.KITLISCFGEISGSRGMERNIhYLISKSSYST..........sGhtITC+N........ 0 1 4 6 +6278 PF06450 NhaB Bacterial Na+/H+ antiporter B (NhaB) Moxon SJ anon Pfam-B_5993 (release 10.0) Family This family consists of several bacterial Na+/H+ antiporter B (NhaB) proteins. The exact function of this family is unknown [1,2]. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 515 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -12.95 0.70 -5.88 4 838 2012-10-02 15:12:49 2003-07-09 11:48:14 7 2 810 0 104 725 503 497.90 75 99.01 CHANGED M.hohupAFh+NFLGpSP-WYKlAIlsFLIINPIlFFhISPFVAGWLLVAEFIFTLAMALKCYPLQPGGLLAIEAlhIGMTSAppV+cElhANlEVLLLLlFMVAGIYFMKQLLLFlFTKlLlpIRSKhLLSLuFChAAAFLSAFLDALTVlAVlISVAVGFYuIYHKVASGpsh.tDpDhosDs+IpE....hp+s.LEpFRuFLRSLhMHAGVGTALGGVhTMVGEPQNLIIAcQAsWpFGEFhlRMSPVTlPVFICGLLTChLVEKh+lFGYGtpLPDpV+cILsDaDcppccpRTpQDKhKLhVQAlIuVWLIsuLALHLAuVGLIGLSVIILATSFTGVTDEHulGKAFpEuLPFTALLsVFFSVVAVIIDQpLFuPlIpaVLssE-p.TQLuLFYlhNGLLS.lSDNVFVGTVYINEsKAALhsGhIThcQF-LLAVAINTGTNLPSVATPNGQAAFLFLLTSALAPLIRLSYGRMVaMALPYTlVLoIVGLhuIpFh.LtssTtaFhshGhIhs ......................................MclShGpAhh+NFLGpSPDWYKLAlllFLIlNPllF.hl.uPFlAGWLLVAEFIFTLAMALKCYPL.PGGLLAIEAVhIGMTSs....cH.................V+EElAANLEVLLLLMFMVAGIYFMKQLLLFIFT+LLLuIRSKhlLSLuFClAAAFLSAFLDALTVVAVVISVAVGFYGIYH+VASu+s....-.-..sDhpDDS.+I..-.c....c.h+ssLEQFRuFLRS...LMMHAGVGTALGGVMTMVGEPQNLIIAK.sAG......Wc..FG-F..FL.RMu.P.VT.V..P..V..L.I..C..G..L.L.TChLVEK..h..R..hF.G..YGp.pL..PE..+.V.R.cV..Lp.pFDcpu.R+.p.R.T..+..Q.D...K..l.+L.......I.V..Q..A..l..I..GV...W.L.V...s....A.L.A.LH....L......A..E.VGL....I.G.LS..V....I.IL.ATu...hT..G.V.TD.EHA.I.........GKA.F.......p.....Eu.L.PF.T..A.L..L..TV..FF.S.VVAVIIDQpL.....Fu.PII..Q.FVL.Q.....A...S..-.H....uQ..L....o...L..FY.l......F..NG..L...LSSISDNV.FVGT.l...Y..I...NEAKAAhEs.........G....sI.ohc......Q.a.EL......LAVAINTGTNLPSVATPNGQAAFLF..LLTSAL...APLIRLSYGRMVW..M..ALPYTlVLTLVGLLsVEFh.LsPsTEWhhphGWIu.o..................................................................................................................................................... 0 21 42 77 +6279 PF06451 Moricin Moricin Finn RD anon Pfam-B_56760 (release 9.0) Domain Moricin is a antibacterial peptide that is highly basic. The structure of moricin reveals that it is comprised of a long alpha-helix. The N-terminus of the helix is amphipathic, and the C-terminus of the helix is predominately hydrophobic. The amphipathic N-terminal segment of the alpha- helix is mainly responsible for the increase in permeability of the bacterial membrane which kills the bacteria [1]. 25.00 25.00 25.50 25.10 19.30 19.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.84 0.72 -4.46 2 23 2009-09-10 20:06:58 2003-07-09 11:52:21 6 1 10 3 6 26 0 39.20 59 54.24 CHANGED uKIPItAIKpsGKAlGKGLRAlNIASTApDVasFhKPKKR+ ..uKIPltAIKKuGKAIGKGLRAINIAuTAHDVaoahKPKK++.... 0 3 6 6 +6280 PF06452 DUF1083 Domain of unknown function (DUF1083) Moxon SJ anon Pfam-B_2203 (release 10.0) Domain This family consists of several domains of unknown function exclusively found in bacterial xylanase proteins (usually at the C-terminus) although it is tandemly repeated in a number of family members such as Swiss:P38535. This family is always found in conjunction with Pfam:PF00331 and usually with either Pfam:PF02018 or Pfam:PF00395. The function of this family is unknown. 20.80 20.80 20.90 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.34 0.71 -4.41 35 615 2009-01-15 18:05:59 2003-07-09 11:58:29 6 91 276 3 254 667 374 179.60 17 25.09 CHANGED I.DGps.DssWssAp...lshsphh....tus.....sssoush+sLWD.-csLYlLscVsDsl.lscsssss....a-pDulEIFlDpsNsK...sstYpssDhQY+lsasNpso....hssss...........hssshpousphss.sGY.....llEstIsh............psl.....sssssph...lGFDltlND.ssssGsRpuhhsWsDsosss......apssusFGslpLttp ................................................lDG.h..-t.W.ppst....hs.....h.h................ts.t..........ssh..psp..s+.hha..D..c....psLYlh..s.......p....l...p....Ds.....p.....h.....p.....p......tttss.............ha...p...s.Ds.lE.l.a.l...D....ssssp.......psta.....p.......h......p.....s......s....t..h.........t....h.....t.h..s...s..h..t.s........hphtt..................................................hs.sth..p...s..t.sp.........h.........s.........s...sua..........hhEhtIPh.....................psl........t..t.ts.ph.....huhs...h..........h.p...............................................................t........................................................................................................................................ 0 130 216 243 +6281 PF06453 LT-IIB Type II heat-labile enterotoxin , B subunit (LT-IIB) Finn RD anon Pfam-B_61882 (release 9.0) Domain Family of B subunits from the type II heat-labile enterotoxin. The B subunits form a pentameric ring, which interacts with one A subunit. Thus, the structural arrangement of type I and type II heat-labile enterotoxins are very similar [1]. 25.00 25.00 139.10 139.00 20.60 19.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.54 0.71 -4.44 2 7 2009-01-15 18:05:59 2003-07-09 12:17:58 6 1 2 15 0 7 0 121.70 64 99.77 CHANGED MS.KKIItAFVlMsullSsQsaAGsSpaF+s.CNpTTAslVtGVpLpKYIuDlNsNTcGhYVVSsTGGVWhIstu+DYPDNhhouEhRKhAMAAlLSsh+VNhCApsuSSPNhIWAhEL-tE MsFKK.IuhhhlhhsIsSl.sYAGVScpFKDpCspTTAclVpuVQLsKhhSDlNpso+GIYlsSSTGtsWaIPGGp.YPDNaLSsEMRKIAMAAVLSssRVNlCASpA.oPNHlWAIELttE.. 0 0 0 0 +6282 PF06454 DUF1084 Protein of unknown function (DUF1084) Moxon SJ anon Pfam-B_12888 (release 10.0) Family This family consists of several hypothetical plant specific proteins of unknown function. 25.00 25.00 25.20 25.20 24.90 24.20 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.83 0.70 -5.37 10 304 2009-01-15 18:05:59 2003-07-09 12:39:16 6 6 72 0 206 287 1 218.40 26 53.16 CHANGED husosssult....ssWWDplNE.SshWQDsIFauLuulYGLVSsVALlQLIRIph.RVPEYG..WTTQKVFHLhNFlVNGVRAl.lFuF+.....+pV.plpPcllp..tlLLDlPGLsFFoTYsLLVLFWAEIYaQARu...LsTD+LRsuFaolNuVlYhIQIsIWlhlWhpPss....sl.hluKhFhAslShhAALGFLLYGGRLFlMLRRFPIESKGR+KKL+EVGhVTsICFoCFLIRClh....hslsAFDscAcLDVLsHPlLNhlYYllVEILPSuLVLFILRKLPPKRspsQYHPIp ....................................................................................hhh.........................................h.lshhhhhluhhuhhQ.l.h.......ph.......h....+.........p.hs......hs...hp+lhhhh.......h...l..s......s......h................h.Rsh...hFhhh.....................ph.....ht.........t..hhp......hlLhshPsh...ha.aosasll.llaW...sclha.....p.u.ps........h..t...p..t....l...p.........hhhl.Nsh..lY...h.h....tl..hl...al...h......h..h.hp...t.....................hh.hh..th......h.h....u....sl.....h...h.h.uh....u.FhhYGhpLahh..l.........p............p...........h.........p......t..p.t.....p......p..p...c..h............c..lshlshl....s.hsFhh+shh.......hh......h......s.....h.................t....h...t..........h.....t.....l...h...........hhhahhsEhlPs.h.lLahh............................................................. 0 75 157 185 +6283 PF06455 NADH5_C NADH dehydrogenase subunit 5 C-terminus Moxon SJ anon Pfam-B_3060 (release 10.0) Family This family represents the C-terminal region of several NADH dehydrogenase subunit 5 proteins and is found in conjunction with Pfam:PF00361 and Pfam:PF00662. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.01 0.71 -4.90 78 9824 2009-01-15 18:05:59 2003-07-09 12:48:11 6 10 5550 0 166 9403 1036 165.20 32 35.29 CHANGED YShRllaashhGpsphsshsslNEss.hllsslhtLuhGSIhuGhhlohhlhP..psshhshPhhlKhhALhVollGllhuhplsshohp.hp.s......shppF.shhhFhP.lspphhsphsLhhGpplspp.hDpuWhEhhGspGlhph.hshophhpshQpu..lKhYLhhFllslllhhhhh ...........................................................................YShRlh...a..ashhG.p.....p...a.....o....l..p...s....l..N...-.p....s....h.h..lp..sh.htLhhhSIh.............uG...........................h...........l...............s..................................I...............h.................P......p................s......................h............h................s...................h.................P............h............................L...K......h......h.AL....h..V....o....l....l.G.h..h.l......u.............h.......-...........l...s....p...h....o.......................p.....h....h.....................p.....p.................t..................s...............h.hF..shh....hahPsl.ph.hhshh.s.Lhh.u..plhpp..hD.sW...E.h..h.Gsp.u.h..ph..h..p..o.hh.hp...Q.p.s..hK...h..Y...hh.hhlhhhhhhh........................................... 0 50 91 119 +6284 PF06456 Arfaptin Arfaptin-like domain Finn RD anon Pfam-B_5314 (release 7.5) Domain Arfaptin interacts with ARF1, a small GTPase involved in vesicle budding at the Golgi complex and immature secretory granules. The structure of arfaptin shows that upon binding to a small GTPase, arfaptin forms an elongated, crescent-shaped dimer of three-helix coiled-coils [1]. The N-terminal region of ICA69 is similar to arfaptin [2]. 32.00 32.00 33.60 33.40 31.20 31.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.31 0.70 -5.01 11 535 2012-10-03 12:17:00 2003-07-09 12:49:09 8 12 97 10 285 472 0 209.60 33 55.43 CHANGED lhpKlc.......ppa+po+QlhpcphG+tpcp+h.ssDsEL-splElL+sspcpYtsllchscshpptlhplspsp+tLGchFppluh..Kspphscthstsu-sh+hluKpt.sLhsslphhlsclsTahs+sIsDThhTlcphEssRhEYcuhphclK-hspELsPpsstphshaRpsQsph.........ppsKc+a-KL+sDVh.Kl-LL-ps+spslsppLttapsslutaappsuctL ..........................................p+hp.......ppahpT+Qhh.cphG+tpc......ssD..s-.L-sp..lE.........lh+sspcphpsllch..sct...hppp.hhplsppp.ptLGchhpshu....co.thtcthshsucs.phhs+p.t...sLh.ss.lshhhp.slsThhp+sIpDThhTlpphEp..s.RhEYc.uhhh.hc-.h...s...hsPps........t..tp..h.+h..ctsQtph.........................pt.t+.p.pa-Kh+tDVh.KlchLtts+sphhpppLhha...p.ssl.taattstp.h...................................... 0 83 103 191 +6285 PF06457 Ectatomin Ectatomin Finn RD anon Pfam-B_63420 (release 9.0) Domain Ectatomin is a toxic component from the Ectatomma tuberculatum ant venom. It is comprised of two subunits, A and B, which are homologous. The structure of ectatomin reveals that each subunit is comprised of two helices and a connecting hinge region, the forms a hairpin structure that is stabilised by disulphide bridges. The two hinges are connected by a disulphide bond [1]. 25.00 25.00 82.10 82.00 18.30 17.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.84 0.72 -4.25 2 2 2009-09-11 06:57:38 2003-07-09 13:43:24 6 1 1 2 0 2 0 34.00 47 95.77 CHANGED hsphlh.TlCPTlcshAKKCpGsIAThIK+cCsK hsphlh.TlCPTlcshAKKCpGsIAThIK+cCsK 0 0 0 0 +6286 PF06458 MucBP DUF1085; MucBP domain Galperin M, Moxon SJ, Bateman A anon Pfam-B_4243 (release 10.0) & Galperin M Domain The MucBP (MUCin-Binding Protein) domain is found in a wide variety of bacterial proteins. The domain is found in bacterial peptidoglycan bound proteins and is often found in conjunction with Pfam:PF00746 and Pfam:PF00560. 22.60 22.60 22.60 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.82 0.72 -3.39 115 1918 2012-10-02 15:23:12 2003-07-09 13:51:03 7 136 317 12 279 1856 8 103.00 26 27.90 CHANGED hspssshTlpYlct.s......Gspls.ss...........................-hlT..G..hh...........................................scsa...ssss...sI..sGY...............................sht..sh..sss.ss.hshssss.sV ..................................................h.sKslTRTIpYhts.s..............Gpp.....s......t....t.....s.....h.....s.Q.s..l........sao...................Ro.............ssh.DpVT........Gplsh..............................ssWss...............tsssa....stl...ssP......sl......sGY................ssshsssstpsV..................................ss.ssp...sh....phsVs..Yptpst...................................................... 0 49 98 163 +6287 PF06459 RR_TM4-6 Ryanodine Receptor TM 4-6 Yeats C anon ADDA_12111 Family This region covers TM regions 4-6 of the ryanodine receptor 1 family. 25.10 25.10 26.30 26.30 25.00 24.70 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.99 0.70 -4.50 21 301 2009-01-15 18:05:59 2003-07-09 14:40:33 7 42 92 0 144 243 0 247.80 36 6.13 CHANGED sMPDPTQstl+G.....-h.ptccsth.ptssstp.h.......ustcEschhs-.....hhGlthcKEGu..h...hssssGLuDhuph.s.tssospsssht+p.........................ttttpsps-sphsDhcsGEK.....pp.sc.ppp...ppsh.t.httppp+spctcc..thhhphatt.............lphhppKhlNYLARNFYNLRaLALFlAFAINFILLFYKVosp.ss---c.t.........................................tsthss..----..-csh.hahLp......EooGYM.tPsLphLAllHTlISFhClIGYYCLKVPLVI ...............................................................................................................................hP-PTt.tl+s...........ph.pt...t...t..h..pt....t.t.h.......t.tt-t-hhs-.........hhGht.......c+EGu..p......htspsGLuDh...p..s...t.s..s..ttsshtct.........................................................t.ttptc..ps..cscc....A..-hEsGEK......ttt..ctp..t.............sp..p.p....tptpsp+h....tcs..ps.h.sphatt..................lp.happK..hLs..YLARNFYNLRaLALFlAFAINFILLFYK.Vosp.sstc.tpt.t............................................s.sshsp...-c--...--shlaahLp......ESo.GYM.pPsLRhLAllHTlISFhClIGYYCLK.....VPLVI.................. 0 20 31 85 +6288 PF06460 NSP13 Coronavirus NSP13 Yeats C anon ADDA_12678 Family This family covers the NSP13 region of the coronavirus polyprotein. This protein has the predicted function of an mRNA cap-1 methyltransferase function ([1]). 20.10 20.10 20.10 20.30 20.00 19.90 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.66 0.70 -5.55 8 426 2012-10-10 17:06:42 2003-07-09 15:53:49 7 37 250 4 0 450 56 280.30 61 5.15 CHANGED uu-WpsGYuMPsLYKhQchsLE+CNLaNYGtslsLPsGIMhNVAKYTQLCQYLNoTTLsVP+NMRVLHLGAGSDKGVAPGoAVLRpWL..........PpssILVDNDlsDYVSDAchSlhGDCsTlhh-sKaDLlISDMY....DsppKslst-NsSK-GFFoYl.suhI+-KLALGGSlAIKlTEaSW.NtcLYcLhp+FuaWThFCTuVNsSSSEuFLIGlNYLGc.ss+spIDGssMHANYlFWRNoshhphSt.SlhDhSKFshKhpuTsVVsLKpppls-hVhuLlcpGKLLlRsssphlhhusphVsss ....................................................ussWpPGYuMPsLYKhQphsLEhCsL.NYGtslsLPsGIMMNVAKYTQLCQYL.NTs.TLsVPaNMRVlHhGAGSDKGVAPGo..sVL+QWL............PsGolL.VDNDlsDaVSDAcsolhGDCsolhhpsKaDLlISD.MY....DspTK.plsttNsS.K-GFFTYl.sshI+pKLALGGSlAlKIT...EaSW.NtcL.Y.cLht.c.FuaWThFC.T.sV.N.AS...SS.EuFLIGlNYLGc..sK.pIDGpsMHANYIFWRNoshhphSuYSlFDhuKFs.LKL+uTsVlsLK-sQlNDhVhuLLc+G+LLlR-ss+hlssuD.LVN........................................... 1 0 0 0 +6289 PF06461 DUF1086 Domain of Unknown Function (DUF1086) Yeats C anon ADDA_2403 Domain This family consists of several eukaryotic domains of unknown function which are present in chromodomain helicase DNA binding proteins. This domain is often found in conjunction with Pfam:PF00176, Pfam:PF00271, Pfam:PF06465, Pfam:PF00385 and Pfam:PF00628. 25.00 25.00 30.70 29.20 19.80 19.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.89 0.71 -4.41 4 315 2009-01-15 18:05:59 2003-07-09 16:22:00 6 49 98 0 143 249 0 148.60 65 9.03 CHANGED .ssup........pctspsspRP.R++tR-sp-c..sPLhtt.GtplcVLGFNtpQRchFlpslMRaGh...s.hp.p..V.cL+tKo.c.FKtYu.LFh+HlsEsssDNSsoF........uDGVP+EGLsppcVLsRIulM.Ll+cKVQ.hEcasGc.shPphh. .........tsssE-s--p.......cs..t..sRR..sR+.p..L.R..s-+DKP...LPPLLARVGG.NIE.VLGFNsRQRKAFLNAlMRaGMP..............PQDAF.soQWL......VRDLRGKSEKEF.K..AYVSLFMRHLCEPG...A...DGuETF........ADGVPRE....GLSRQ.HVLTRIGVMSLlRKKVQEFEHlNGcaShP-L..h................ 0 29 51 89 +6290 PF06462 Hyd_WA Propeller Yeats C anon ADDA_3361 Family Probable beta-propeller. 20.70 20.70 20.70 20.90 20.60 20.30 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.05 0.72 -7.38 0.72 -4.30 73 1060 2009-01-15 18:05:59 2003-07-10 09:19:19 7 43 91 0 606 1024 6 32.70 33 16.01 CHANGED stVWAls.pc.........GplhhRp.......GlopssPpGss..Wppls ........tVWAls.pp........................GplhhRp.......Glos.p.s.PpGss..Wppl........ 0 133 188 369 +6291 PF06463 Mob_synth_C Molybdenum Cofactor Synthesis C Yeats C anon ADDA_4938 Domain This region contains two iron-sulphur (3Fe-4S) binding sites. Mutations in this region of Swiss:O14940 cause MOCOD (Molybdenum Co-Factor Deficiency) type A. 24.00 24.00 24.60 24.10 23.60 23.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.86 0.71 -4.45 66 3853 2009-01-15 18:05:59 2003-07-10 14:48:39 8 19 3317 8 1051 2921 1184 127.60 32 37.19 CHANGED hplRFIEhMshGpspph......cphlshpplhstlpppath..hsst.tps...susAchapl...........ssst.up.....................lGhIsshops.FCssCNRlRLTu-GpL+sCLatpps.h-LRshL+sss.stt....LtphlppultpKtttath ..............................................plRFIEhMshG.p......s.s.....th...tt...........pphlotpplh..sp.l...p.p...c.h...l......tt.hp...tpp.............suPAphaph................s.sht...uc.............................lGlIsshocs.FCuoCNRlRlou-GpLh.hC......LFupp....s..hsLRshL....Rss.......s....pppt.............Ltpt.lpps.l.ppK.ppa..h.............................................. 0 333 659 885 +6292 PF06464 DMAP_binding DMAP1-binding Domain Yeats C anon ADDA_4672 Domain This domain binds DMAP1, a transcriptional co-repressor. 27.00 27.00 27.00 27.70 26.80 26.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.26 0.72 -3.47 20 421 2009-01-15 18:05:59 2003-07-10 16:02:57 6 20 154 0 223 376 0 97.80 28 7.19 CHANGED ssssLPt-VRc+Lp-L-h-hspGslTpKGYpKK+s+LLppFL.................................tscsppphtsppsth+cc.chpp-lapps.VpAhLsKptppchuhshsscccssh ...................s.......clpt.pLtpL-.-.hp.pGDITpKGYpK++spLLt...tal......................................................................t........t...........tst.......p.ph.phtst.psp..php..........pcha.htt.lpuhls+...p.c.........p................................................................................................... 0 35 62 128 +6293 PF06465 DUF1087 Domain of Unknown Function (DUF1087) Yeats C, Sammut SJ anon ADDA_2403 Domain Members of this family are found in various chromatin remodelling factors and transposases. Their exact function is, as yet, unknown. 20.80 20.80 20.80 21.10 20.40 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.18 0.72 -4.05 14 335 2009-01-15 18:05:59 2003-07-10 16:07:05 8 54 109 0 159 267 0 64.50 60 3.87 CHANGED pthttcp-ps.s-shtt..ps.cssaW.cLL+++YEpppt-ctppLGKtKRsRKQVsYs-t.sslps .....................c..Etcc.EElEREIIKQE..E.s..V.D...PDYWEKLL...........RHHYE..QQQEDLARsLG.KGKRlRKQV..N...YNDu.uQED............ 0 34 65 106 +6294 PF06466 PCAF_N PCAF (P300/CBP-associated factor) N-terminal domain Yeats C anon ADDA_4771 Domain This region is spliced out of Swiss:Q92830 isoform 2. It is predicted to be of a mixed alpha/beta fold - though predominantly helical. 19.70 19.70 43.40 23.00 19.20 18.00 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.75 0.70 -5.05 3 179 2009-09-11 14:37:09 2003-07-14 11:28:16 6 8 84 0 111 162 0 215.60 57 31.42 CHANGED RIuQRKAQVRuLPRAKKLEKLGVYSACKAEEoCKCNGWKNPpPsss.PRtDLQQs.sssLoEpCR..SCcHuLAuHVSHLENVSE-EMNRLLGhVlDVENLFMSVHKEEDsDTKQVYFYLFKLLRKCILQRs+PVVEGSLG.cPPFEKPsIEQGVLNFVQYKFSHLuo+ERQTMhELAKMFLNpLNYW+LEoPSQRRtRSssEDlSsYKlNYTRWLCYCHVPQFCDSLPRYETT+VFGRTLLRSVFTlsRRQLLEKs ...........................t.s..pKl.KLuhaSuCp..upp.CKCsGWK....sPpssst.....s+..............hDlp..p....h.....sshs-.CR..oCpHsL.s.sHluHL.-slSE-EhNRLLGhVlDVEpLFhsVH...KEE..D.s.D.TKQVYFYLF.K.LLRKsILp.hs+PVlEGsLt....pPPFE+PsIp..p..uV.NFV.YKFSHL.s.s+EpQTh.hELuKMFL.plNaW+LEsPoph...RtR..s...s.-.Dh....usYKlNYTRWLCYCpVPphCDSLP+YETopVFGRoLL+SlFsshRRQLL-p........................... 3 27 34 74 +6295 PF06467 zf-FCS zf_MYM; zf-MYM; MYM-type Zinc finger with FCS sequence motif Yeats C anon ADDA_4806, Iyer L Domain MYM-type zinc fingers were identified in MYM family proteins [1]. Human protein Swiss:Q14202 is involved in a chromosomal translocation and may be responsible for X-linked retardation in XQ13.1 [2]. Swiss:Q9UBW7 is also involved in disease. In myeloproliferative disorders it is fused to FGF receptor 1 [3]; in atypical myeloproliferative disorders it is rearranged [4]. Members of the family generally are involved in development. This Zn-finger domain functions as a transcriptional trans-activator of late vaccinia viral genes, and orthologues are also found in all nucleocytoplasmic large DNA viruses, NCLDV. This domain is also found fused to the C termini of recombinases from certain prokaryotic transposons [5]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.75 0.72 -4.32 56 1780 2012-10-03 05:12:49 2003-07-14 11:43:49 9 35 200 2 719 1712 47 41.30 25 16.45 CHANGED ttssspspCphCpp.h..stpt......hphp......uphppFCSpsChspa ..........t...hhthpCshCpp.h.tptpp.........hphp......GphcpFCSpsChspa...... 0 60 95 259 +6296 PF06468 Spond_N Spondin_N Yeats C anon ADDA_5023 Family This conserved region is found at the in the N-terminal half of several Spondin proteins. Spondins are involved in patterning axonal growth trajectory through either inhibiting or promoting adhesion of embryonic nerve cells ([1]). 20.90 20.90 20.90 21.60 20.60 20.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.17 0.71 -4.62 17 420 2009-01-15 18:05:59 2003-07-14 12:07:36 8 35 159 3 258 396 23 167.40 30 35.50 CHANGED tA+YclsFpGhWSppoHPKcaPhh..ss+aSsllGuoHossYphWp.GphASsGl+phAEtGsshtL-pElct..tup+l.....................................polhpshu...hsuh.sssGpssuphcVDs.pHphlShlshlsPSPDWhVGVsul-LCpss.sWh-phsl-LaPaDAGTDSGhTapSsNtsTlPt-hlppITop.PscPtuPFYsPcupphsP .....................................Yph.h.t.Ws.t.aspp.aP.................spaSsll....G.suHsss.a.p.haphG.........ph..A.SsGl+phAEtGsst.tLt...p.-hpt..........t.sp..t..h.........................................................olhpshu............t.h....s.s.s.u..p....s...ps..ph...p.l-p...p+.ph.lShh...shlsPSPDWhV...G...l...s...u...hcLCp....ts..sW..hpp.hs...hsL...hP..aDAGTDuG.o....a...uss..p.P.t.h....hp..s.....p..ssah....................................................... 0 79 111 198 +6297 PF06469 DUF1088 Domain of Unknown Function (DUF1088) Yeats C anon ADDA_5036 Family This family is found in the neurobeachins. The function of this region is not known. 20.80 20.80 20.80 20.80 20.70 19.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.18 0.71 -4.84 6 183 2009-01-15 18:05:59 2003-07-14 13:31:26 6 14 83 0 96 151 0 165.00 61 7.11 CHANGED EGRLLuHAMKDHlVRVANEAEFILNRQRAEDVHKHA-FESpCAQYsADRREEE+MCDHLIsAAKaRDHVTAsQLlQKIlNILTsKHGAWGs.u.sSpsp-FWRLDYWEDDLRRRRRFVRNPaGSoHsEATLKAuhEa...........sssE-plhph+cshpSQshsspN..scsEL.....lL-uD ..............EGRLLsHAMKDHlVRVANEAEFILNRQRAEDVHKHAEFE..............Sp..CAQYuAD+REEEK..MCDHLIoA....AK+RDHVTAsQ.Lh.QK........IlNILTNKHGAW............G...s...........s......u......t.............S..........p..........h.....+-FWRLDYWEDDLRRRRRFVRNPhGSTHsEAoLKu.AlEa................................uss.E.-tl.hKuKpsh+SQs..lssQN..sEsElhL-u............................................ 0 26 35 61 +6298 PF06470 SMC_hinge SMC proteins Flexible Hinge Domain Yeats C anon ADDA_5420 Domain This family represents the hinge region of the SMC (Structural Maintenance of Chromosomes) family of proteins. The hinge region is responsible for formation of the DNA interacting dimer. It is also possible that the precise structure of it is an essential determinant of the specificity of the DNA-protein interaction ([1]). 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.20 0.71 -4.14 102 3948 2009-01-15 18:05:59 2003-07-14 13:45:41 8 39 2700 16 1605 3541 451 116.80 25 9.96 CHANGED hpu.lhGhls-llpl..sppac..t.AlpssLGsplp.sllVcstpsApphlphlc.pt........p.....hG+s.shlsLsplpspthp......................................sst.llphlph.s...phptsl..phllussllscsl-pA..tpls .................................................h..GlhGtlu-Llpl...cp...cap....t..AlEsuLG.s..........s.hp.pll.V..-..s..p..c..s..Appslph.L+.pp................................p......hGRs..TFl...PL.s..p..l.psps.h.s..tt.tt.........................................................................shssh.hh.-.ll.p....h..-.........tphps.sh....p....lLG.ssllscslcpAppl.............................................................................. 0 578 968 1353 +6299 PF06471 NSP11 NSP11 Yeats C anon ADDA_6050 Family This region of coronavirus polyproteins encodes the NSP11 protein. 25.00 25.00 37.30 36.70 18.30 17.80 hmmbuild -o /dev/null HMM SEED 594 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.02 0.70 -6.32 11 410 2009-01-15 18:05:59 2003-07-14 13:59:22 7 36 245 13 0 420 0 562.60 60 10.13 CHANGED scLQu..p..ssGLFKDCuKs.pslpPAaAsTalSlsDcaKss-sLAVplss..sssloYp+lIShMGF+hDlslsGYpsLFlTRDtAlRpVRuWlGFDVEGAHAstsNlGTNlPLQlGFSTGVsFVVpPpGhlsTcpGsshcsVsAKAPPGEQF+HLlPLM+KGpPWsVVR+RIVQMluDhLssLSDhllFVhWAtGhELTThRYFVKlGhEppCp.Cu+RATCYsSssss...YuCa...+HulGsDYVYNPahlDIQQWGYsGsLosNHDthCsVH+sAHVASuDAIMTRCLAIHDCFsKsV-WslpYPhIuNEppINpuCRhVQphll+AAlpsh+ssslaDIGNPKGI+CVsp.-scWpaYDppPlsp...sVKpLcYsYtsHtp.FtDGLChFWNCNVDpYPpNulVCRFDTRshSpLNL.GCNGGSLYVNKHAFHTPAaD+pAFt+LKPhPFFYYsDosCEshp.......cpV..sYVPL+ossCITRCNlGGAVCpKHAs.Y+pYlEuYNhhssAGFolWVs+sFDsYNLWpTFop...LQSLENlAYNVVKpGpFsGlsGELPVAIlNDKVal+ssssDshlFsNpToLPTNVAFELaAKRplphpPsls ........t.LQs.....psTGLFKDCSKphs.GlHPuaAsTahulsspaKss-s.LAVslss.hspshTYp+LIShMGFKhsh.....slsGYpshFITR-EAIRpVRuWlGFDVEGsHAst-slGTNlPLQlGFSTGlsFVVpspGhVsTcpGspFphVsAKuPPGEQFpHLIPLMp+GpPWpVVRhRIVQMluDpLpsLSDpVVFVhWAtG.hELTohRYFVKIG.EpsCs.Cs+RATCasSposs...YuC..W...+H....ul...G...hDYlYNPhhlDlQQWGYoGsLp.NHD.aCsVHtsAHVASuDAIMTRCLAla-CFsKcVsWslpYPhIuNE.plNsuCRhlQ+hhl+AAlhs.+hsllaDIGNPKuI+CVtp.-lpa+FYDtpPlss...sV+pL.YsYtsH+DpFtDGLChFWNCNVDpYPsNulVCRFDTRsLSsLNLPGCNGGSLYVNKHAFHTPsFD+sAFppLKshPFFYYsDSPCEhhs......sspl..DYVPL+SssCITRCNlGGAVC+KHAp.YRpYl-uYNhhsoAGFolWlh+sFDsYNLWpTFop...LQSLENVAYNlVppGHFsGhsGEhPsuIls-+Vhs+ssshDstIFpNpToLPTNVAFELaAKRsI+.hPpl...... 0 0 0 0 +6300 PF06472 ABC_membrane_2 Ald_N; ABC transporter transmembrane region 2 Yeats C anon ADDA_6479 Family This domain covers the transmembrane of a small family of ABC transporters and shares sequence similarity with Pfam:PF00664. Mutations in this domain in Swiss:P28288 are believed responsible for Zellweger Syndrome-2 [1]; mutations in Swiss:P33897 are responsible for recessive X-linked adrenoleukodystrophy [2]. A Saccharomyces cerevisiae homolog is involved in the import of long-chain fatty acids [3]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.51 0.70 -5.28 26 2221 2012-10-02 13:23:42 2003-07-14 14:08:31 10 15 1284 0 895 2380 1271 271.60 24 45.53 CHANGED hphsssFhpplhpLl+lhl.PphhspcshhLhslshhLlhRThlslhluphsuplhpsllp..............pshctFhhtlh.pahhlulssohlsshlcYhpscLtLpaRppLocalaspYLpsp.saY+huslDs....plsNsDQplTpDlppFs...........s.ssusLaoslsKPllDlhlasapL....hpssGttus..............hhlhsYlhhuus.ll+tlssPhucLssccp+hEGcaRahHsRLlsNuEEIAFYpGpchE+pplpppapsLlpphptllph+hhhshh-shlhKYhhsslGallsulPhF .................................................h........phh..l.hp.hh..s..p.t..hpt.............hh.h.....l..............h..h..l..hh..h........h.l...h..ts.h........l.s....lhls..p.hs.....s.phh...s.u.L....p....................................................................p.sh..ptFh..p..hlh....h............a.......h...........h.............lhh...hh.....s..h.h.s................s.....h..............h.......pa........l.p....p.h.L....tl.pWR....phLTcth...hsp.ah...............s............s....p...s........YY+l..p...h..hsp.........................t.h.-.N.P................DQ....RIspDlpths............s.sshsLh........h........s...l.lpsllsll...hFshh.L....aph..uG.shsh.......................................hhhh.l...ashhu..sh...l.h.......p............h.lu..p...................lhpLshppp............+hEusa.........R...hshs+lpcp.......uEpIAh........ap..G...c...p.h...............Ec...p...............pl...pppFps.lhpsh.....pp.......l.......hptp..hh.......h.......shhpshh.....phhhshhshllhus.h................................................................................... 0 255 477 704 +6301 PF06473 FGF-BP1 FGF binding protein 1 (FGF-BP1) Moxon SJ anon Pfam-B_14221 (release 10.0) Family This family consists of several mammalian FGF binding protein 1. Fibroblast growth factors (FGFs) play important roles during fetal and embryonic development [1]. Fibroblast growth factor-binding protein (FGF-BP) 1 is a secreted protein that can bind fibroblast growth factors (FGFs) 1 and 2 [2]. 25.00 25.00 41.80 25.60 20.00 21.60 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.86 0.70 -4.76 9 118 2009-01-15 18:05:59 2003-07-14 14:09:56 7 2 40 0 70 108 0 192.60 24 96.26 CHANGED lphLohL.LLlsthhhspstK..psKstptussppt.p...s...............pG+FsTK-pssCoWtl...hpts.pslsL+VcCpptsps.....asCtasGpPppC.tapscsptYWKQlsppLR+p+phCpssp.lLKoRlC+Kssspuph.................+hlopsh.ssptscpchppssspcpstsptpss.cpp........t.ttshssphPch.s.....psss.tppp+hAhEaCsEoWpSLCsFFlshhpG ..........................................shl.hLhs.hhhsthtp......t.cttttpt....stt..............................pG+F.op.c....p.t....sCshth.............pt.t.tthpLplpCp....p.....spp.......a.C.atGpPp.C.tatsp.phYW+QlhttL++.+.phCpsst.sL+splCp.+.ts...puph...................phhspsh.st...t....sppp.......tt...t...st.....t...........t................t.............t...t...hh.s.......s.ph...............pss.s...p..ptchs.paChcpapSlCsFFlshhps............................... 0 3 11 31 +6302 PF06474 MLTD_N MltD lipid attachment motif Yeats C, Bateman A anon ADDA_7289 Motif This short motif is a lipid attachment site. 25.00 25.00 25.10 25.30 23.40 24.20 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.45 0.72 -3.76 11 113 2012-10-01 23:27:00 2003-07-14 14:17:28 7 3 100 0 15 67 2 26.30 54 5.12 CHANGED Mp...................hsphuhlhh.sLLsGCQo ......................htVphShVhA.hLLVGCQS 0 3 3 9 +6303 PF06475 Glycolipid_bind DUF1089; Putative glycolipid-binding Moxon SJ anon Pfam-B_14397 (release 10.0) Family This family has a novel fold known as a spiral beta-roll, consisting of a 15-stranded beta sheet wrapped around a single alpha helix. It forms dimers. It has some structural similarity to the E. coli lipoprotein localisation factors LolA, Swiss:P61316 and LolB, Swiss:P61320. Its structure suggests that it may have a role in glycolipid binding. Its genomic context supports a role in glycolipid metabolism [1]. 25.00 25.00 28.10 26.80 22.00 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.00 0.71 -5.13 30 291 2009-01-15 18:05:59 2003-07-14 14:32:26 6 2 259 2 90 250 11 175.50 34 93.19 CHANGED tslpWcsh...-ssGhEplpl....ppsusultssuhlh.sppsups.hulpYclpsDssWpo+phplsshhutt.tplpltp-tcGpWhh.sGp...shssl-GslDlDluhoPFTNTLPIRRLsLst..GpstplsssalphPshp.lshspQpYopl..ss....phY+Ycusstu.......AclsVDccGhVlDYPsLacRl .........................t..lpWpsh....-.hsthEplpl....ph.ssptlpsputll..utpssps.auhpYclt.sDtshts+chslpshhutt..ppLpltpD..tcG...t.....Whs..tssp...............thsshs.GslDlDlshoPFsNsLPIRRLGLtp......ucst..slsVlYVslP.....-.....hs.VssspQsYosh.ss..............pth+acossss.......sslsVDs-GhVlDYPsLhcRh........ 0 24 50 68 +6304 PF06476 DUF1090 Protein of unknown function (DUF1090) Moxon SJ anon Pfam-B_14862 (release 10.0) Family This family consists of several bacterial proteins of unknown function and is known as YqjC in E. coli. 21.10 21.10 21.10 21.20 21.00 20.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.50 0.71 -4.42 40 769 2009-09-11 09:53:20 2003-07-14 14:36:26 7 2 736 0 83 307 8 111.90 51 89.98 CHANGED slsshlsssshus...ttt..hsGCssKtpsIppQIphA+taGNpp+lsGLcpALppVpscCoDsuLtp-+ppKltc.tcpcVsERpp-LpcAppc.......Gcs.-KIsK+pcK..LuEAppEL...pcA ..............lu..AlsLhsl.S.uuohA.....sohCQcKEQsIp.KEISYAcKHpNQsRI-GLpKALSEVRANCoD...opL+A-HpKKIA.......c.pK-.......EVAERQpDLsEAKpK.......GDA.DKIsKRc+K..LAEAQ-ELKK.l...................................... 0 4 19 50 +6305 PF06477 DUF1091 Protein of unknown function (DUF1091) Moxon SJ, Bateman A anon Pfam-B_14929 (release 10.0) Domain This is a family of uncharacterised proteins. Based on its distant similarity to Pfam:PF02221 and conserved pattern of cysteine residues it is possible that these domains are also lipid binding. 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.23 0.72 -3.55 534 1212 2012-10-01 19:31:57 2003-07-14 14:38:56 8 13 19 0 714 1209 0 83.10 21 48.20 CHANGED lphplhp+t.....ssacs.h.lashs.hDhCca...l...................p........hhphhaphhp..phS.Nh.scs....CPa...............stp....h..hlc.s.......hhhst......phlP.........l..P.pGp..Yhl .....................................hhhp........ssac.h..las.hs.h-hCch...l..pp.....p.........p...s......hhphh..aphhp.....ph.o..N....h..scs.......CPh.........................pss..h....hlc.s.....hhhst..................phlP....h.PpGpYhh.................................... 0 114 175 487 +6306 PF06478 Corona_RPol_N Coronavirus RPol N-terminus Yeats C anon ADDA_7507 Family This family covers the N-terminal region of the coronavirus RNA-directed RNA Polymerase. 25.00 25.00 84.40 84.40 18.70 18.00 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.25 0.70 -5.76 11 440 2009-01-15 18:05:59 2003-07-14 15:04:52 8 32 218 0 0 416 0 326.60 64 6.25 CHANGED G.SSAARLEPC.NGT-sDhVhRAFDI....NKcVAslGKaLKsNCsRFppl.Dcc.......DuaFVVKRsTcSsh-HEQohYshLKsCsAVAcHDFFsa+cs+shhsNluRpcLTKYTMMDLsYALRpFDEpNC-lLKEILVhhGsCcpsY...F..-sKsWYDPVEN.DIaRVYApLGplVspAhLKsVtFCDsMVcpGlVGVLTLDNQDLNGsFYDFGDFlpshPGhGVPlssSYYSYMMPlhuMTNCLAuEsFhcuDl.upsaKsaDLLcYDFTEaKhsLFsKYFKYWs.pYHPNCsDCtDDpCllHCANFNsLFSTTIPsTuFGPLsRKlFlDGVPhVsTsGYHFKpLGlVaNpDVshHssRLS .............SusARL.PCusGsssDVshRAFDIhN..ppsAGhupaLKsNCsRFQc......l....Dc.....c.....ss.L...DuaFVVKRsThosYp+EpshYphl....K........s........s........ssVApHDFFpFchstshlspIsRpcLTKYTMhDLsYALRHFDcpsC-sLKEILVpYus...Cc-sY....F.ppKDWYD.VENPcIhpVYt+LG.lVppALLpsVpFsDshV-tGlVGVLTLDNQDLNGpaYDFGDFlpsAPGsGVslsDSYYSYhMPllsMTcsLssEpahc.Dl.tpsa+paDLLpYDFT-cKhpLFsKYFKYWs.psYHPNCh-C.DDRCIlHCANFNlLFShllPsTuFGPLVRKlFVDGVPFVVosGYHaKELGVVhN.DVshHp.RLS.. 0 0 0 0 +6307 PF06479 Ribonuc_2-5A Ribonuclease 2-5A Yeats C anon ADDA_8069 Family This domain is a endoribonuclease [1]. Specifically it cleaves an intron from Hac1 mRNA in humans, which causes it to be much more efficiently translated. 22.50 22.50 22.50 22.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.90 0.71 -4.29 32 447 2009-01-15 18:05:59 2003-07-14 15:15:02 7 36 256 47 292 416 4 126.20 36 13.94 CHANGED c+LpFLpDVSD+hEhcsRcs.SshLphLEs.sutpVl..p.scWpp+lspshhssLs+a.R+..............YpssoltDLLRslRNKpcHYcEh.sccl+chlGslP-sahpYFspRFPcLLltsYpslt...phppcchFppYaps ...................................................................pcLpFlp.DVSD+hEhE.s...p................c..............u....s.......l.l.p...tLEp..s.......uptVl.......t..scWpppl.sts.l.h..s.sL.t....+.a...Rp.........................Ypuso.ltDLLRAlRNKpcHYcEl....P.........tcl..........p.p..............t............l...........Gs...l....P-..s.......a..hp...........YFssRFPpLLhp.sYpshp....thpp.-phFp.Ya................................ 0 100 156 224 +6308 PF06480 FtsH_ext FtsH Extracellular Yeats C anon ADDA_8169 Family This domain is found in the FtsH family of proteins. FtsH is the only membrane-bound ATP-dependent protease universally conserved in prokaryotes ([1]). It only efficiently degrades proteins that have a low thermodynamic stability - e.g. it lacks robust unfoldase activity. This feature may be key and implies that this could be a criterion for degrading a protein. In Oenococcus oeni FtsH is involved in protection against environmental stress ([2]), and shows increased expression under heat or osmotic stress. These two lines of evidence suggest that it is a fundamental prokaryotic self-protection mechanism that checks if proteins are correctly folded (personal obs: Yeats C). The precise function of this N-terminal region is unclear. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.29 0.72 -3.88 164 4553 2009-01-15 18:05:59 2003-07-14 15:26:04 10 9 3836 1 1188 3236 1943 99.30 18 15.13 CHANGED hhlahllhlllhhlhhhh....................ttt....................pspplsa..opF.........h.phlp...psplppltlp...........tpph....................................................h.s.ttpsspthtshhhss....sth.pp.............ltpthtpt.slphssp.t ..................................................hlalllhlllhslhp.h.................................sssps.....................................sspplsY..SpF....................l.pplp.......pGpV.......c.plplp...........spphs...............................................................................................................s..pppss.......sp.....h.....ps....h...h.s.ss...........t.....pp..............L.sthhpt.slp............................................................................................. 0 356 719 981 +6309 PF06481 COX_ARM COX Aromatic Rich Motif Yeats C anon ADDA_8118 Motif COX2 (Cytochrome O ubiquinol OXidase 2) is a major component of the respiratory complex during vegetative growth. It transfers electrons from a quinol to the binuclear centre of the catalytic subunit 1. The function of this region is not known. 20.90 20.90 21.40 20.90 20.50 20.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.97 0.72 -4.29 98 1360 2009-01-15 18:05:59 2003-07-14 15:39:44 9 4 1276 4 243 775 21 47.10 36 14.66 CHANGED K....so.spsLs.tssYtpLu...pPSEppPVpaauoVpssLFpsllspahtttp ........KtS.spsLs.hssapcLA.........tP..SE.....h.......s....VpYFSsVcPsLFpslls+ahstt...... 0 42 105 169 +6310 PF06482 Endostatin Coll_NC10; Collagenase NC10 and Endostatin Yeats C anon ADDA_828 Domain NC10 stands for Non-helical region 10 and is taken from Swiss:P39059. A mutation in this region in Swiss:P39060 is associated with an increased risk of prostrate cancer. This domain is cleaved from the precursor and forms endostatin. Endostatin is a key tumour suppressor and has been used highly successfully to treat cancer. It is a potent angiogenesis inhibitor ([1]). Endostatin also binds a zinc ion near the N-terminus; this is likely to be of structural rather than functional importance according to ([2]). 23.20 23.20 23.70 23.90 23.10 22.80 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.15 0.70 -5.03 11 375 2009-01-15 18:05:59 2003-07-14 15:52:32 6 57 98 17 154 356 3 193.10 29 28.00 CHANGED GsusGssshpohpsMlspu+phPEGsLlalh-cp-LYlRVRpGa+plhLsshsPlssss................................PPsshhh........................................s...t.hhspP.phsphp...................ts.hhppsPtPssssss...cscpp.tPsLHLlALNsPhSGsMRGIRGADFpCFQQARssGLtGTFRAFLSSRLQDLYSIVRRADRtslPIVNL+D-VLFsSW-ulFoGutu.hpsusRIaSFDGRDVLpDssWPQKhVWHGSsscG+Rhs-oYCEsWRTsspusTG.ASSLt.uG+LL-QputSCpssaIVLCIENSFMTptpK .............................................................................s.............................................................................................................................................................................................................................................................L+hhALNtP..s.G.sh......p.............AD....h.CapQucthGh..hs....T.a+AFLSS......+lQsL.plVp.s-R..thPllNh+splLF.sWpshFs.s.p.......t.u..h.....p..t..laSFsG+slhtcstWP.K...lWHGS.s...tG...R........ppaCcsW+stt....h.u.uu.l.............s............t..hl.t.Q..p.........t..CttthhlLClE............................................................. 0 36 49 101 +6311 PF06483 ChiC Chitinase C Yeats C anon ADDA_8334 Family This ~170 aa region is found at the C-terminus of Pfam:PF00704. 20.50 20.50 22.50 20.50 18.90 20.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.05 0.71 -4.54 20 239 2009-01-15 18:05:59 2003-07-14 16:00:39 6 10 178 0 35 173 3 178.60 51 18.04 CHANGED EYhMGsThTohhY-KFssA.oPYG.sp.tushshPspulDlslshosFtlGDsNYPIsPclplTNNSssslPGGochpFDlsTSsssphpD....QSGhGlpVlsSGps.sGsNlGGL-s-FHRVuhoL.....PuWpoLAPGuoh-lshsYYLPlSs.PSNasVshsGppYulpt-aPpLP...hss.suuGs......G ..EYthGsTMTphhY-KFpsA.oPYG.s+...hussshPspslDlsVslsuFplGDpN.YPINPKlsFTNNoslsIPGG..ocFpFDlPsSusDssKD....QSGuGLKVIsSGHT.pusNlG.GLcGshHRVAhoL.....PuWcoLPAGuoY-lDMVYYLPlSG.PuNaoVplssppYuhph-..PsLPss-.hsssGss.ss........... 1 10 16 28 +6312 PF06484 Ten_N Teneurin Intracellular Region Yeats C anon ADDA_8545 Family This family is found in the intracellular N-terminal region of the Teneurin family of proteins. These proteins are 'pair-rule' genes and are involved in tissue patterning, specifically probably neural patterning. The intracellular domain is cleaved in response to homophilic interaction of the extracellular domain, and translocates to the nucleus. Here it probably carries out to some transcriptional regulatory activity ([1]). The length of this region and the conservation suggests that there may be two structural domains here (personal obs:C Yeats). 20.20 20.20 27.90 38.40 19.40 18.80 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.21 0.70 -5.34 4 409 2009-09-11 06:34:01 2003-07-14 16:08:25 7 28 37 0 154 368 0 172.60 36 11.44 CHANGED RSLTp.RpDTE+RYTSSSADSEDuKls..KSYSSSETLKAaDpDSRhsYGsRVKDhVH+EsDEFSRQGssFoL+-LGhGEssPsHhusYRoDMGLPHpsYSlSsuSDADTETDGlMSPEHAVRLWGRS.TKSGRSSCLSSRANSNLTLTDTEHENTEN..........................GPPLHCSSASS.SPl-QhP....PPPS.AANpsQttLLGsS.....uApsupDS-SE-EFuPNSFLVKosSGNlhsPttAsup..ssaQNHSRLRTPPLPLsHsHoPS..HHsASINSLNRuNYTpR.SN...PSPAPTDpSsssEsPsu.Q-SlpsQDNWLLNSNlPLETR.................................HFLFKPG.GTSPLaCTTSPGYPLTSSTVYSPPPRPLPRNTFSRPAFsLKKPYKaCNWK ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................shp.t............p....................st.........s.hppsWlLsSNhsLEo..R..........................................................pFLFK.u.Gooshhssss......sY.shsosoVYosPsR.LPRsohs.R.hFphpKs.+hCsW+................................... 1 7 22 53 +6313 PF06485 DUF1092 Protein of unknown function (DUF1092) Moxon SJ anon Pfam-B_14522 (release 10.0) Family This family consists of several hypothetical proteins of unknown function all from photosynthetic organisms including plants and cyanobacteria. 20.10 20.10 20.50 31.90 18.10 18.00 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.74 0.70 -5.44 37 156 2009-01-15 18:05:59 2003-07-14 16:19:01 6 2 107 0 74 169 167 260.20 36 87.46 CHANGED WELDFYSRPll-pcGKKhWELLIssssps........FcaschCPuscVNShWLpsALpcAltt........uhttPp+lRsaRspMpohlp+AspplGlpshPSRRTauLhcWLpcRpcplYPpp.GY.s..hssss.sh.tssPhPLP-ulpGDp...WsauuLshuslp-ht.-W...slsFss....lhPls....hsLss-........t.lPGlhlFSppRuLsLAuWluGLEPspLphpss......pLlLEuG.sD+Wlluslp.sspspttupsapps+ppupGLpFlulQssPpspsFsGFWlLc- ...........WELDFYSRPllDtpGKKhWELlIC-ssts........hpasphCPssplNShWLppAlppshtp........uh.hPpplRhFRspMpshIp+Aspcl.....ulpshsSRRThuLhpWLp-R.pplYspp.Gapt.....ts.ssls..hsp.ssPhsLP-sLhG-p...Ws...FlsLshuslp...-...h....-h...sls.Fup....hhPls....hsLsss........shIPGlhlaSs.RuhsLAuWhuGLE.stLphpss.............tLlLEsGhs-+Wllushp..sspstt....tApsaEpsKptupGLHFLulQs.sssupsasGFWLLpp........... 0 17 55 70 +6314 PF06486 DUF1093 Protein of unknown function (DUF1093) Moxon SJ anon Pfam-B_15034 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.50 21.50 21.50 21.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.50 0.72 -3.62 59 1369 2009-01-15 18:05:59 2003-07-14 16:22:07 6 2 510 4 84 672 2 79.20 26 65.84 CHANGED sshhttcshYspIsss...ucppspp.............tpYpYphpuaccsGcc+plpa...sus.....+pL+psuYLKlphpspp..........VpsacEVpcc- .............................shhttcshYsplsps........upchsp..............pthpYphsuasccGcccplpasus........ppL+.pssYLKlhh....ps+c............Vpsa-Elpcc.................................. 1 23 44 63 +6315 PF06487 SAP18 Sin3 associated polypeptide p18 (SAP18) Moxon SJ anon Pfam-B_15078 (release 10.0) Family This family consists of several eukaryotic Sin3 associated polypeptide p18 (SAP18) sequences. SAP18 is known to be a component of the Sin3-containing complex which is responsible for the repression of transcription via the modification of histone polypeptides [1]. SAP18 is also present in the ASAP complex which is thought to be involved in the regulation of splicing during the execution of programmed cell death [2]. 20.80 20.80 20.90 21.60 19.50 18.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.62 0.71 -4.31 26 277 2009-01-15 18:05:59 2003-07-14 16:37:32 7 8 228 5 202 279 0 126.60 42 57.63 CHANGED t.t.lDRcpTCPhLLRlF.hppspaasls-as............................splP.tsELQIYTWhssTLRELspLl+-.....s..ssR++GTphsFtllaPDt+p.........upYls+-lGoshs..Gt+ss..................................D-sKTLpst+FpIGDalDluI ...............................s...lDREKTCPh..LLRVF.hp.sGpHHph..s.-Fs.....................................t....uslP.psELQIYTW.......hDuTL+ELosLl+-............shstsR++GT+hsFslVaPDh+p..................stahh+-lGsshs....Gpcss........................................................................D-s+TLtsh+FpIGDYlDlAI.................................. 1 70 107 158 +6316 PF06488 L_lac_phage_MSP Lactococcus lactis bacteriophage major structural protein Moxon SJ anon Pfam-B_13945 (release 10.0) Family This family consists of several Lactococcus lactis bacteriophage major structural proteins. 20.20 20.20 20.20 31.10 19.70 20.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.74 0.70 -5.36 2 78 2009-01-15 18:05:59 2003-07-14 17:14:50 6 6 55 0 7 54 0 214.50 62 72.90 CHANGED MKLDYNSREIFFGNEALIVADMsKGSsGKP.FoNHKIVTGLVSVGSMEDQAETNSYPADDVPDHGVKKGATLLQGEMVFIQTDQALKEDILGQQRTtNGLGWSPTGNWKTKCVQYLIKGRKRDKlTGEFVDGYRVVVYPHLTPTAEATKESETDSVDGVDPIQWTLAVQAT-SDIYLNGDKKVPuIEYEIWGEQAKDFsKKMESGLFIhQPDT.LAGtlTLVAPslsNVpTtTKGNNDGTIVlPsTLKsSKGpsIKVTuVIKDs+GpVATNspLAPsVYIVTFSA-GYtDVpAGVuVTs+s .........................................................................................................................................................................................................................................................................................................LAGsl..TLVAPshs.s.sTTus.KGssDuThslPsTLKDScGusVtVTSVIpsupGpssTN.GpLusGsYhVTaSA-GY-DVTtulsVTD..... 0 2 2 2 +6317 PF06489 Orthopox_A49R Orthopoxvirus A49R protein Moxon SJ anon Pfam-B_14072 (release 10.0) Family This family consists of several Orthopoxvirus A49R proteins. The function of this family is unknown. 25.00 25.00 79.10 78.80 19.20 18.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.21 0.71 -4.26 2 38 2009-09-11 15:19:44 2003-07-14 17:19:49 6 1 18 0 0 29 0 147.10 88 99.93 CHANGED MDEuYYSGNLESVLGYVSDMHTcLASIoQLVIuKIETIsNDILNNsIVNFIMCRSNLNN........lYhh.c.pIY.......................aY+..................................................SpDlpERh. .....................MDEAYYSGNLESVLGYVSDMHTELASISQLVIAKIETIDNDILNpDIVNFIMCRSNLDNPFISFLDTVYTIIDQEIYQsELINSLDDNEIIDCIVNKFMSFYKDNLENIVDAIITLKYIMNNPDFKTTYAEVLGSRIADIDIKQVIRcNILQLSNDIRERYL........... 0 0 0 0 +6318 PF06490 FleQ Flagellar regulatory protein FleQ Studholme DJ anon Pfam-B_13480 (release 9.0) Domain This domain is found at the N terminus of a subset of sigma54-dependent transcriptional activators that are involved in regulation of flagellar motility e.g. FleQ in Pseudomonas aeruginosa. It is clearly related to Pfam:PF00072, but lacks the conserved aspartate residue that undergoes phosphorylation in the classic two-component system response regulator (Pfam:PF00072). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.16 0.72 -3.90 47 334 2012-10-01 22:20:39 2003-07-14 17:20:29 6 6 327 0 79 856 142 112.00 30 23.81 CHANGED +lLll-ssspRppsLssILcFlGEp....sphhsssph.....tstthpsphpuhslhhssss......ptlpsltpthsthPlLllGcpsts....phss......llGpl-hPlsYspLs-hL++sQh ..............+lLlI.-DD.upRRpsLssIL...c.FlGEp....spsh.s.u.p.ph.....sphshs.ssh.ps.h...h..l..h.st.ssp......thtphLp.pl..h..s...t..s..s..a.lPlLlhscps.s......chss..........................hlG.p...L-hPhsYspLp-.uL++sp................................................. 0 17 34 59 +6319 PF06491 Disulph_isomer DUF1094; Disulphide isomerase Moxon SJ anon Pfam-B_14101 (release 10.0) Family This family of proteins has disulphide isomerase activity, EC:5.3.4.1. It has a similar fold to thioredoxin, with an alpha-beta-alpha-beta-alpha-beta-beta-alpha topology. It has a conserved CGC motif in the loop immediately downstream of the first beta strand. This motif is essential for activity [1]. 25.00 25.00 31.10 31.10 19.30 17.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.67 0.71 -4.32 28 820 2009-01-15 18:05:59 2003-07-14 17:22:22 6 1 476 4 121 346 200 135.90 57 94.58 CHANGED Y.cplVpPMRpELopuGFcELpTsE-V-pshp...p-GTTLVllNSVCGCAAGlARPAAstAlpp.-++PD+LVTVFAGQDKEATs+sR-.YF.hsaPPSSPShALhKDG-lVahIcR+cIEG+ssptIsppLtsAF-caC .....YMpplVpQhRsElspuGappLpTuEsVcchhp...pcGTTLVhlNSVCGCAuGlARPAAspulph..-+pP-+LVTVFAGQDKEATs+hRE.YF.tshsPSSPSaALlKssclVchlcRHpIEG+-h.slhtpLpssF-c.C............ 0 44 86 107 +6321 PF06493 DUF1096 Protein of unknown function (DUF1096) Vella Briffa B anon Pfam-B_15011 (release 10.0) Family This family represents the N-terminal region of several proteins found in C. elegans. The family is often found with Pfam:PF02363. 25.00 25.00 26.20 26.20 18.20 18.20 hmmbuild --amino -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.74 0.72 -4.01 10 23 2009-01-15 18:05:59 2003-07-15 10:07:56 6 4 5 0 23 16 0 53.70 49 14.66 CHANGED pusslR-KRQ.uCGCA..ssQPoCuCQpusps.............QpSCSC.ps.QP.........ouCuCA ..t.usslR-KRQsCuCAP.spQPpCuCQ.psshs..s............tQ.sCoC.psssP.Q.......suCsCA................ 0 6 12 23 +6323 PF06495 Transformer Fruit fly transformer protein Moxon SJ anon Pfam-B_13780 (release 10.0) Family This family consists of transformer proteins from several Drosophila species and also from Ceratitis capitata (Mediterranean fruit fly). The transformer locus (tra) produces an RNA processing protein that alternatively splices the doublesex pre-mRNA in the sex determination hierarchy of Drosophila melanogaster [1]. 26.10 26.10 26.60 26.10 25.20 26.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.69 0.71 -4.40 4 112 2009-01-15 18:05:59 2003-07-15 14:07:55 6 5 25 0 9 111 0 154.90 62 79.37 CHANGED MDADSSupp.RDoR..................RcuRpKE.KlPYFADElREpDRlRpLRpRtpppTR..............RoRSRSRSpSu-RssppRR+RpRSpsRp+Sto.........Rp+osSS....pRRRRpRS.pR.hs.sP+IIsh.V.VPstDa....YG.........hSuM.tuhsYth.PRP....PPa......PPhPFRYRt.sPFhstPRF .............MDADSS.stp.R.D.TR.........................RRPRQRE....KMPYFADELRERDRVRNL.RKLKTT..QKR.TP..TPPPRERRSRpARoRSRSRTHSsEQSRCpRR.RSR.....SYV.....R.QRS..GS.........RH.Qo.SS.Ss.....sRRR+SRSRSR...RSRTPRIITVPVPVPAAEY.uYAYs..........h......................................................................................................... 0 2 3 6 +6324 PF06496 DUF1097 Protein of unknown function (DUF1097) Moxon SJ anon Pfam-B_15055 (release 10.0) Family This family consists of several bacterial putative membrane proteins. 22.30 22.30 22.30 23.20 22.20 22.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.94 0.71 -4.38 53 1004 2009-11-03 13:31:05 2003-07-15 14:16:43 6 2 730 0 102 428 24 140.50 53 85.64 CHANGED AlosGlLuulWshl...AsshsLs.sW.....sGFlussoaFAt.tsGhpGhhtohsoshoGlhWAhlhltuuuhh.sh......shhuhlhsulsshhMshtAph.phL.uFlPGsFlGssuTFAs..................ssshhhllsuLllGslhGashphhuthLhp ...................................AlTTGILSGlWuWV.....AsuLG...Ll.oW.....AGFLGCTuYFAsPpGG...hKGLhhShsT.hSGhVWAhlIIhuS..uhhspl........plluYll.TullAF..lMClQ...A....+p...h....LL.SF...lP.GTFIGsCATFAu..........................................tGsW...p..lVLPSL....hlG.hlFGYhMcsoGlaLu.t............ 1 23 50 72 +6325 PF06497 DUF1098 Protein of unknown function (DUF1098) Moxon SJ anon Pfam-B_15446 (release 10.0) Family This family consists of several hypothetical Baculovirus proteins of unknown function. 21.30 21.30 21.50 40.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.19 0.72 -3.93 22 51 2009-01-15 18:05:59 2003-07-15 14:19:53 6 1 51 0 0 48 0 97.70 30 85.45 CHANGED ppp......ppsppsspsss............sssplLpuLsp...posAphIlsDsStsKpssLppLuppStsAK+llcuIps.sp-slpl.ss.cslslLcllsDIasNph ..............p......ppstps.ss.sp...........hssschLpsLNp...pTsAshIlsDso.sKppuLphLuppSssAKplL...-slps.ssssl+L.sshcslslLcllusIaDNph.. 0 0 0 0 +6328 PF06500 DUF1100 Alpha/beta hydrolase of unknown function (DUF1100) Moxon SJ, Bateman A anon Pfam-B_15719 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. Members of this family have an alpha/beta hydrolase fold. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 411 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.38 0.70 -5.90 5 738 2012-10-03 11:45:05 2003-07-15 15:59:26 6 3 717 10 104 985 155 396.10 61 97.14 CHANGED tsSKNLSETLFpsHKQAKETSoLTQYMPoSp..slLDsl-pcoupuWYRpLRRLQWlWQGlDPlEhE-VLARIASSKHSRTcD-WLDTVMGYRSGNWsYEWs+lGMhHQ++AsEcssE.sAu-phFsAALhYSIAGYPHLKuDNLAlQAQVLAN+AYpEAAK+osYTlKQLEFPapc.uKIoGaLHLPs.TDuPaPVVLVSAGLDSLQTDMWRLFRDYLAP+DIAMLTIDMPSVGaSS+WPLTEDSSpLHQAVLNpLsslPWVDHaRVGLlGFRFGGNAMVRLuFLEs-KlKACVsLGAPVHDLFoSPcKLQpMPKMYLDVLASRLGKusVDlcSLuGQMsAWSLKVQGFLSGRRTKTPILAhuLEGDPVSPYSDNQLVAhFSssGKAKKIsSKTIocGYEQSLDLAIcWLEDELp ..................................................................................................MoQANLSETLFKPRF..KHsETSTLV...RR...h..s+..Gup..sslQ..S..A...LDG.c.olsHWYRMINRLMWIWRGlDPpEIL-VQARIVMSDAERTDD-LaDTVIGYRG...GN...WI.YEW..A....pQA....M.s...W....Q...Q...KA....s...t....E....p....D....s....hoGR.a.W.LH.AAsLYsIA..A..Y......P...H.....L.....K.....G.....D...-....L....A....E....Q..A....Q....A....L.....u.....N.......R...A....Y.E....E...AA....Q..R..L....P.Go...h..Rp..h.EF.s.....l.........P......G.....Gu......P..........I..T..G....F......L......H.....M...P........K.......G...............D.....G......P...F....P....T.V.L.M..C.GG....L.....D...u..h.....Q....T...D.......Y.......Y......s...L....Y....E...+...Y...FA.PRGIAMLT..ID.........M.........P....S...........V..G......F.......S........S.....K.......W....K.....L.....T.....Q.....D....S....S..l..L.....H.....Q.....+.....V.....L...+....A....L....P....N..V......P.......W......V...DH...TR..VAA.FGF.R.F.GA.NV.A.V....R.LA..YL.E.u.s...R......L...KAV.A..C......L...GP...V...V...H...s..L...LS....D.........p....pQ.....p....p.....V.....P...E.....M.....Y....L.DV...L..ASRL.GM.H...D..A..S......D-.A.......L..R.VE.L.N...RY.SLKVQGLL..G.RR...CPTPM.LSGaWKNDP.FSPEED.SRLIToS.SuDGKLlEIPFsP..V.Y+..NFD.+uLpEIocWIE+RLC.................................................................................................................................. 1 16 42 73 +6329 PF06501 Herpes_U55 Human herpesvirus U55 protein Moxon SJ anon Pfam-B_15779 (release 10.0) Family This family consists of several human herpesvirus U55 proteins. The function of this family is unknown. 20.40 20.40 20.40 21.40 20.10 20.20 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.53 0.70 -5.91 3 7 2012-10-03 01:18:03 2003-07-15 16:04:15 6 1 5 0 0 51 0 426.60 42 95.40 CHANGED oS-Ls+llQlslDaN-.lQuc.Nao.lhlcCllS+T+PVLLLITDGTEScsEDVhFsu+sLcccpsIKI+lhPlsRoI.Ps+VplFulPIapIoSSLlIpD.shh..KENhDPpcaEQaGlhspsIuTTNlIs.lpsVpNcslElolTlFNIsWccSsYQsplccosG++LpTlaoVFSlNTspCPYWpshF+..SshPlC+VpMISEPsVSVYKIEFssPlLpVFLRshsLsspNsRFsVspEChLRLsF.ScPshsoVoLNlsMPYFKICuDtKslEVFFP-cMoLssNcsKcIsLRGTFpNhsAVGLFIPcposVlp.aPFlWpPpEoF+l+VoC-RospVTEHDIIG+VYFIo++lFR+sF+PsusuDhKSclEtspNoscsFclaFLGNcFFussLP-LTLHPhhshcYEclQusuNIpp.s.NcsPShpRlRl .............................................................................ospLs+llQlshDhNc.l.sp.sap.lhlcChlSpT+PslLhITDGTEScsEDVhFssphLcpppsIKI+lhPlsRol.Ps+lplFulPIah.osuL.IpD.shh..KENhDPhhaEQaGlhshsIuTTNlIs.hpsVpNcslEholhlhNlsWpcSsYQsphhcp.G++LhhlaolFSlsTppCsYWpshFc..ShhPlC+VpMISEPslSlYKIEFssPhLplFLRshsLsppppRFslspEChLRLsF.S.PshsoVoLNhsMPYFK.IC.uDtK.slEVFFPscMoLs.NpsKcIsLRGpFpNhph.VGlFIP.spopVhp.aPFlW.spEsh+l+loC..-+.ospVTEH..DhlG+laFlo.++lFR+.sF..+shu.suDhKShlpt..spNo.scsFclaFhGNsF.u...L.P-LTLH.Ph.....cYEch.upuN.ph.s.scp.Shh+hRl..................... 0 0 0 0 +6330 PF06502 Equine_IAV_S2 Equine infectious anaemia virus S2 protein Moxon SJ anon Pfam-B_15780 (release 10.0) Family This family consists of several equine infectious anaemia virus S2 proteins. The function of this family is unknown. 18.90 18.90 20.80 22.30 17.00 15.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.31 0.72 -4.11 2 41 2009-01-15 18:05:59 2003-07-15 16:07:45 6 2 2 0 0 41 0 65.30 75 83.19 CHANGED MGlFGKGVTWSA.HShG.SQGE.QPL.PNpQpp.ohR+p.ha..N.IVIhhsl+ptWQppcpQ-TKK .....MGLFGKGVTWSALHSMGV.SQGEYQPLSPNKQNQQTH+KtIhWYINPIVIMhAIKpKWQRQETQDTKK.... 0 0 0 0 +6331 PF06503 DUF1101 Protein of unknown function (DUF1101) Moxon SJ anon Pfam-B_15836 (release 10.0) Family This family consists of several hypothetical Fijivirus proteins of unknown function. 25.00 25.00 77.70 77.60 18.50 18.00 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.07 0.70 -5.64 5 109 2009-01-15 18:05:59 2003-07-15 16:10:46 6 1 9 0 0 40 0 317.00 82 99.67 CHANGED M-RuoREHsKFSKANT+sEs+pMRhYKDDSsD-lsYSEIsVGlooooP+MuLSDYFSuVSloF-sEtRl-ElcPhlYuDLpF.p-pYspDVDLNLLlWQLLSuNQDS+ALCVNlLRMlsTluhGNAaIsc.GpY+Y.spsTs-pTss-DlDuLRlluRlAKIlIKsshsKsD.L+ssQcpLIpYapG+ua+SloLoWDSKSlLsolHGYS.TSEslLDaYIRpKL.DLFKuLpssNLVYGGNYpLVYQlLFYYYIlTNGRaSoGFosR+-S..IKoYslPNDsPusCNso.PRKPoLSLMaIRAlLlIsLIKDYSPlKplPlYlppLElEcPhpNoshlTDuGIRoEs-shssosslsts..LPsFSSsuu ..........................pEMRIYKDDTAsGLCFSEINVGCTSooPKMuLSDYFSSVSCSFDGEMRhPDlPL+hYGDLHF.H-QFTNDVDLDLLCWQLLSSNQDSRALCVNILRMlTuLSLGNAFISE.GRYHY.AlDTTEpTSAEDsDALRhLuRlAKIVIKNslcppD.lshAQQsLIYYYFGsSapGIHLNWDS+SSQ.SlHGYS.TSEsCLDHYIRMKI.DLFpGlRsKN.VYGGNYQLVYQALFYYYllTNGRFSSGFsVRKDS..IpSYFlPN-sPSsCNVo.PR.KPSLSLMFIRAlLlhhLIKDYSs.......................................................s.................................... 0 0 0 0 +6332 PF06504 RepC Replication protein C (RepC) Moxon SJ anon Pfam-B_15903 (release 10.0) Family This family consists of several bacterial replication protein C (RepC) sequences. 22.80 22.80 23.30 23.50 22.50 22.70 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.98 0.70 -5.46 5 105 2012-10-04 14:01:12 2003-07-15 16:17:02 6 1 87 0 15 72 6 265.80 61 93.70 CHANGED tscsAMsaDLTHARHDPAHCLAPGLFRSLKRGERKRLKLDVTYsYG-DcplRFsGPEPLGADDMRlLQGLVAlAG.....P+GIlLoPEPcSEuG+QLRLFLEs+WDAlEpDAMVVKGSaRpLASElGYATDGGusFKAIRESIERLWsVSVIVp+GuKRQGFRLLSEYASDEs-GRLFVALNPRIAEAIlGcRPHTRI-MAEVRALQTDPARLlHQRLCGWIDPGKSGRVELDTLCGYVWPD-ANuEAMKKRRQTARKALsELAAVGWTVNEYAKGKWEIoRPKPsu ..................................................................................................h.....pasLoHsRHDPAHCLAPGLFRuL...KR.....GER...K.RsKLDVT.Y.cYG-G.ccIEFsGPEPLGADDLRILQGLVAMAG......PsGLVLuPE.PpTEuG+.QLRLFL.....EP..K.WEAVst..D.AMVV.KGSYRALA+EIGhts.DuGssh..KtIp-CIERLWcVSI...IA..Q.....sG.R..K..RQ.....GFR...LLSEYASDEs.DG..RLYVALNPLIApAVMGG..uQHVRIsMcEVRALco-sARLlHQRLCGWID.PGKo....G+sslDTLCGYVWP.sE.A.su...uTMR...K....RR.Qp.lRcALs.ELs........A........L.GWTVsEaAtGKa-IsRPKssu.............. 0 4 9 12 +6333 PF06505 XylR_N Activator of aromatic catabolism Studholme D anon Pfam-B_2890 (release 9.0) Family This domain is found at the N terminus of a subset of sigma54-dependent transcriptional activators in several proteobacteria, including activators of phenol degradation such as XylR. It is found adjacent to Pfam:PF02830. 20.80 20.80 21.00 21.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.09 0.72 -4.60 16 223 2012-10-02 19:02:47 2003-07-15 17:16:43 6 10 157 0 87 229 15 101.30 39 18.53 CHANGED DLspplpFusp-GcIWLs-QRMLLlHsuuLuuLR+ELlpslGh-+ARGhhhRhGatuGtRDAclsRchRssusthshFhAGPQLHsLEGhV+Vpslph-....hDhp .........DLpppLpFsspsGpIWLs-pRMlLl+ssuhu.s.LR+ELIcsLGh-pAR.......GlhhRhGYtu....Gh+DAcls.+chh.ss...s...s.hphFhsGPpLHsLcGhV.+VpslphchD..p......................... 1 17 50 76 +6334 PF06506 PrpR_N Propionate catabolism activator Studholme D anon Pfam-B_10794 (release 9.0) Domain This domain is found at the N terminus of several sigma54- dependent transcriptional activators including PrpR, which activates catabolism of propionate. 23.10 23.10 23.10 23.10 23.00 22.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.60 0.71 -4.87 47 943 2009-01-15 18:05:59 2003-07-15 18:17:36 6 25 691 6 118 633 15 172.70 37 30.19 CHANGED plstchst.hclp..lhpsshp-ulphhcph.tt.ctsDlllutGu.sushl+ppl.slPVV.lpsouaDllpALtpA+....chs.....sclulVsappsh..u....hpphpphl.sl..slpthsapstc-scstltplpppG..hplllGsulsschA.pphGlpulLlhS.cpolcpAhccAhclsphtcpc...tp+ ..........................s.l.hca-p.ssls...l.phsh-.cAls......hlcct..ts..ccsDsIIusGu.suuhLKs...+l..s.....l...PVl.I+sSGaDl....LpALsc.At.....c..hs........usIGlV.sa..ppsl..su....lhsapcth..sl.....clcpts.a...h....o.....c...E.-......uc....splp.cL+....usG..h-slVGuu.l.l...s.D.lA...cc..t...Ghs..u.lhl..hS..ssoVRpAhp-Alchschpppt....t.............. 0 31 67 92 +6335 PF06507 Auxin_resp Auxin response factor Studholme D anon Pfam-B_2015 (release 9.0) Family A conserved region of auxin-responsive transcription factors. 21.10 21.10 21.70 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.66 0.72 -4.12 32 687 2009-01-15 18:05:59 2003-07-15 18:33:44 8 14 62 0 312 710 0 77.80 45 10.75 CHANGED AupuhsstohFplhYpPRsosS-FlVshp+ahcuhp.p.aslGMRFKMtFEsEDusc++a...tGslsGls-hDPh+WssScWRsLp ...........................Au+AsussohFplaYpP....R........s.S.s.uEFllshs+ahc.uht..ppholGMRF+M..t.F.Es...E-.u.uc..+...........Ra..........hGTIsGlsD...h...D.P.h+WssScWRsLp.......................... 0 40 196 261 +6336 PF06508 QueC ExsB; Queuosine biosynthesis protein QueC Studholme D, Eberhardt R anon Pfam-B_715 (release 9.0) Domain This family of proteins participate in the biosynthesis of 7-carboxy-7-deazaguanine. They catalyse the conversion of 7-deaza-7-carboxyguanine to preQ0 [1-3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.56 0.70 -5.05 33 3226 2012-10-02 18:00:56 2003-07-15 18:54:36 8 14 2983 7 768 3867 2443 199.30 40 86.40 CHANGED +AlVlhSGG.DSTTsLhhApcph..hEVhsloFsYGQR.HptEl-sAcclucths.........l.c+cllDlshLpplu...sSuLTcssht.lsc....ph.t.....cslPsTaVPuRNhlFLSlAuuaA-slGupsIhhGVsppDaSGYPDCRsEFlcuhppslsLu......tsptlpI.psPLh.LsKu-IhcLutcLG......lshchThSCYpG......tcu.uCGcCsuChLRpcGapchs ........................................................................................................................................+AlVlaSGG.D.ST....T....C.L....h..h....A....h......p...p..a.......p..c....V..p.......sl..o....F.....s...Y....G...Q...R.....Hc..........t.......E...l...-...s.........A....c...p...l.....Apc.l.G.............l........p....H..+..l.....l....D..l..s....h...L.....s.p.....l.u...........s.suL.....T.c....cs.h.t...lsp...............tt.................ssl..P.s.Ta....V.P..u.RNhl...F....LohA.u.sh......A....ph....su...c......t......l...h..h...G.....V....s...p.....s..D.....a..S........G........Y...P.......D..C..R.....sp.......F..l....c..uh...p...t.sl.s.Lu............hspsh..p.l...cTPL..ha..l.sKA-hh...tLu.p...p.h.G..............hsh.pp...ThoC..YpG....................utuCGcCsuCpLRtpGhpphh.................................................................................................................................................... 0 218 457 625 +6338 PF06510 DUF1102 Protein of unknown function (DUF1102) Moxon SJ anon Pfam-B_16043 (release 10.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 22.20 22.20 22.60 24.40 21.70 22.10 hmmbuild --amino -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.95 0.71 -4.39 9 41 2009-01-15 18:05:59 2003-07-16 09:38:25 6 2 17 0 35 48 0 144.00 39 74.21 CHANGED sIVoDDsELIDLTPlQPYuYl..ssGKLslDISssNP.NYPGY.GcGlSPsSpYsF-EhFsVSNcLWEs...shPIsVpIp.Spsstlphauu-h-s..ussG..........sslsFoVthsssVslGM.Fsssscs.G.s.ptplsIcAhthusE ......................sIVsDDsELIDL.sPlQPYAYl..ssGcLsIDlSssNP.NY.P.G...............Y............GpGlSPsSpYsF-EVFpVSNcLWEs............h.sI.....sVpIs.Ss...ssplphausshss..ssss...........sslsFsl..G-sVplGM.hsssGt.s.G.shptplsIpAhthtsp................................................ 0 5 14 25 +6339 PF06511 IpaD Invasion plasmid antigen IpaD Moxon SJ anon Pfam-B_16150 (release 10.0) Family This family consists of several invasion plasmid antigen IpaD proteins. Entry of Shigella flexneri into epithelial cells and lysis of the phagosome involve the IpaB, IpaC, and IpaD proteins, which are secreted by type III secretion machinery. 25.00 25.00 26.00 25.60 24.60 24.20 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.21 0.70 -5.44 3 264 2009-01-15 18:05:59 2003-07-16 15:13:00 6 2 198 33 19 191 0 261.50 40 88.15 CHANGED tNhouSssPulsAuRupssosGpuAEpVpAsVcoTTu..........a-TpcsIppSsAAhKtppuQQTLpcTPstEl-EsssppTluspphsusLNuLAKSGaulSAEQ+EsL+Ssh.....SAPspAchuGuPM.....AtstpsISDuELWDMISssIucIsDsYLGVYENVVusYTDFYQAFSDILSpMAGWISPGG.DGNoVKLNVDSLKuALooLKKcYo......NKcslLFPAQo.suGhpTuSEuEAcKWlKELGLPD......SCV.KAusGGYVVlVDMTPIssMlsDLsuLGSGoELELDNAKYQAWQSGFKAQEENLKNTLQTLTQKYSNANSLFDNLVKVLSSTISSCLETAKSFL ......................................................................................................................................................................................................h...p..tp.Lshlu+..pt..s..lstptp.p..Lp..............SAPp...p.s..hss..h..............lSctElWshlupsIssIs-..sYLtVYEslVusYTpaYQsFS.-.lLSp.h.uGWloPGc.DGNolKLs.VsSLKstlppL..lsKYs...............sp.La.....Pups.......h....s..sopt-A..p.pWlpELshs...............upl.ptpsuG......YVV......hlshs...PlppMlpsl....sul......G...u...s.......u....t..l........hss...Ac.YQAWpuuFpuQc-NhpsslQoLspKYSpANShaDNLlKVLSusISo.h-oAKsaL................................ 0 3 6 11 +6340 PF06512 Na_trans_assoc Sodium ion transport-associated Vella Briffa B anon Pfam-B_16808 (release 10.0) Family Members of this family contain a region found exclusively in eukaryotic sodium channels or their subunits, many of which are voltage-gated. Members very often also contain between one and four copies of Pfam:PF00520 and, less often, one copy of Pfam:PF00612. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.87 0.70 -4.39 62 1005 2009-01-15 18:05:59 2003-07-16 16:03:13 8 31 158 0 361 1059 0 206.90 31 13.21 CHANGED DsEhNNLQlAlsRIpRuhsalKpsltshhp.thht+h....pphst...........................cttsp.hph.hsphhtst...................pth.t........h.tsuhpph.....pppp.tshhh...Nsshsl........sVPI..........AssESDh-......p............---tsSppS..................................t--scc......chpt.....................sSpSEsSTlDhcss.....tEt............-th..sc.t--.h-....P-cCFs-sCh++aPsht.lDhspshhphWWsLR+TCapIVEHsaFETFIIFMI ..........................................................................................................................................................................................................................................DsEhNNLQlAlsRIp+ulsalKpplh....phhp....thht.p.p......tthtt.......................................p..pp..p...htp...h.tst..................................................p.ptt..................tht.suhp..t.h...................pp.p..hshht.....ssshsl....................sVPI..........A.suESDhEs.p......................................s--hsSpss............................................................................................................-tspp...............................................................ssSsS-......sSTl..Dht.s...t-..................................................................p.h......-.tp....-.........P-sCFs.-.............sC....h...........p......+a...C..tp.ls..h.ppuhG+.hWWsLR+TCapIVEHsWFEoFIlFMI....................................................... 0 36 56 189 +6341 PF06513 DUF1103 Repeat of unknown function (DUF1103) Moxon SJ anon Pfam-B_16075 (release 10.0) Repeat This family consists of several repeats of around 30 residues in length which are found specifically in mature-parasite-infected erythrocyte surface antigen proteins from Plasmodium falciparum. This family often found in conjunction with Pfam:PF00226. 25.20 25.20 26.50 25.30 24.70 25.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.38 0.70 -4.94 2 22 2009-01-15 18:05:59 2003-07-16 16:04:55 6 6 2 0 3 23 1 120.30 56 36.58 CHANGED KVLGEGDKEDVKEKNDGKKDKVIGSEKTQKEIKEKVEKRVKcKCKKKVKKGIKENDTEGNDKVKGPEIIIEEVKEEIKKQVEDGIKENDTEGNDKVKGPEIITEEVKEEIKKQVE-GIKENDTEGNDKVKG.EIITEEVKEEIKKQVEEGIKENDTEupDKlhG.EIITEEVK........EGlKENDTEsKDKVIGQEIITEEVKKEIEpQEEK ..........................................................................................................................................cl.....-p.pc...........hcc..pc..slKEpDTEsKDKVIGQEIIhEEVKcEh.....ctl+c.....NcsEsKDcVIsQEIlsE-Vpct.......t........ 0 3 3 3 +6342 PF06514 PsbU Photosystem II 12 kDa extrinsic protein (PsbU) Moxon SJ, Bateman A anon Pfam-B_13782 (release 10.0) Family This family consists of several photosystem II 12 kDa extrinsic protein (PsbU) proteins from cyanobacteria and algae. PsbU is an extrinsic protein of the photosystem II complex of cyanobacteria and red algae. PsbU is known to stabilise the oxygen-evolving machinery of the photosystem II complex against heat-induced inactivation [1]. This family appears to be related to the Helix-hairpin-helix domain. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.15 0.72 -4.14 24 89 2012-10-03 02:11:09 2003-07-16 16:26:02 6 2 78 17 32 170 35 91.40 44 59.54 CHANGED stsDtchst.hG..pKlDLNNusVRsFpphPGMYPTlAuKIlpsuP...YcsV-DlLslssLo-cQKpllccah-sFsVscPpssh.tG.DRlNsGlY ................sDtKlsschG...pKlDLNNuslRsFpphPGhYP............oLAuKIl....p....s...u...P....YcsV-DVLslsGLo-pQKpllcp.hc..pFsVTsPp.th..pG.DRlNsGhY.......................... 0 10 23 30 +6343 PF06515 BDV_P10 Borna disease virus P10 protein Moxon SJ anon Pfam-B_16237 (release 10.0) Family This family consists of several Borna disease virus P10 (or X) proteins. Borna disease virus (BDV) is unique among the non-segmented negative-strand RNA viruses of animals and man because it transcribes and replicates its genome in the nucleus of the infected cell. It has been suggested that the p10 protein plays a role in viral RNA synthesis or ribonucleoprotein transport [1]. 25.00 25.00 27.40 61.40 23.90 17.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -9.98 0.72 -4.15 3 40 2009-09-11 07:51:39 2003-07-16 16:31:46 6 1 7 0 0 39 0 81.20 67 99.97 CHANGED MSSDLRLTLLELVRRLNGNATIESGRLPGGRRRSPDTTTGTVGVTKTTEDPKECTDPTSRPAPEGPQEEPLHDLRPRPANRKGAAIE ...MSSDLcLTLLELlRRLNGsuTlESGRLsGGRRRSPDTTTGoIGVTKTpEssKEChDPTuRsAPcusQEEPhHDLRPRstsRKGAslE 0 0 0 0 +6344 PF06516 NUP Purine nucleoside permease (NUP) Moxon SJ anon Pfam-B_15961 (release 10.0) Family This family consists of several purine nucleoside permease from both bacteria and fungi [1]. 21.10 21.10 21.10 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.80 0.70 -5.60 31 328 2012-10-01 20:25:13 2003-07-16 16:35:58 6 4 268 0 136 310 52 309.80 42 85.63 CHANGED lssKVhIloMFE.........sEhpsWh-t.....sh.........s+sIslPGhs..assl+Cssc.ulCtlsTG.GphNAAuolsALshsspFDLo+TYaLlAGIAGlsPptuTlGSssWA+YsVpssLpaEIDsREl.....PssWsoGYhshsspp..PsphPsssYs..........TEVFpLNstLpshAhpLo+ssp..LsD.ossspshRtpYssts........AppsPtVlpsDshoucsaapGshLs-hhpsasclhTsGsGsYsoTupEDsAThcALtRhupsGhlDhsRlhlLRTuSNFspPssGto..uhcpLhpts....tGGhssAl-NlYpsGsslVcsIlspWsp.acsGlss ...................................ssKVhlloMFt............sEupsWl-p........hth.........scp.l.slPGLSs.YPslcCssp.slChlsTGhG.sNAAuolhALshSs+FDLp+T..........YFLlAGIAG.l-PppGTlGSAsWA.+.YsV-huLpa..-l...DsR.Eh.....Pt....sW..s.sGah..u...lss.......p.p..Ps..ppPs..h.s.at...............TEVFpLNspLtshAhuLo+s.lp..LtD.ospupAhRt+Ystts..........AspsPpVhpCDThousTaapGshLu-thpsWoKlhTcGpGsYCoTtpEDNAThpALpRuApsu+lDhsRlhlLRTuSsFDRP.hsGpo..uh-sLhshs.......GGFs.Ah-NhYpsGsslVpsIlspWsp.acpul.s........................................................................... 0 35 73 113 +6345 PF06517 Orthopox_A43R Orthopoxvirus A43R protein Moxon SJ anon Pfam-B_16577 (release 10.0) Family This family consists of several Orthopoxvirus A43R proteins. The function of this family is unknown. 20.90 20.90 21.70 53.10 18.20 20.10 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.45 0.71 -4.75 2 47 2009-01-15 18:05:59 2003-07-16 16:39:31 6 1 19 0 0 35 0 194.30 91 99.60 CHANGED MMhKW.ISILThSIhPVLuYoSSIFRhH.sp-lELCYG+L.FD+l.N.VNIKY.P.aIPYRYNFINpTLTVDEhcc.NVhFT+u.FLKa+YuS.LssuLlVuLuspLKYNDlpC.VNVSChIKNLhTpTSTILTSKHhTYSL+RSpC.hIIGYDSIIWYKD.IsD+......YNGIYDFTAICMLIASTlIVhlYhhK+IKMN. ......MMMKWIISILTMSIMPVLAYSSSIFRFH..SEDVELCYGpLYFDRIYN.VVNIKY.P..HIPYRYNFINRThoVDELDD.NVFFTHGYFLKHKYGS.LNPSLIVSLSGNLKYNDIQCSVNVSCLIKNLATSTSTILTS.KHKTYSL.HRSpCIsIIGYDSIIWYKD.INDK......YNDIYDFTAICMLIASTLIVTIYVFKKIKMNS....... 1 0 0 0 +6346 PF06518 DUF1104 Protein of unknown function (DUF1104) Moxon SJ anon Pfam-B_16082 (release 10.0) Family This family consists of several hypothetical proteins of unknown function which appear to be found largely in Helicobacter pylori. 25.00 25.00 27.50 28.70 24.80 24.10 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.06 0.72 -3.85 12 166 2009-01-15 18:05:59 2003-07-16 16:41:54 6 1 63 1 18 153 0 89.50 44 61.91 CHANGED ADFSKposcELhphuuslssp-hsDhthElcKRhtchpht-u+pa+pph+pthpcphsphohc-tpca+ptl+cthpc.+l-shSscEtcchGl ....pDFSphsDc-LlchAGpV.ssp-llDY+hElpKRhctMst--pK.p.F+tph+chAcKNlupMS.cDac+h+c-l+cslcc.+hKsho.cEh+t.GL............... 0 4 10 18 +6347 PF06519 TolA TolA C-terminal Moxon SJ anon Pfam-B_16081 (release 10.0) Family This family consists of several bacterial TolA proteins as well as two eukaryotic proteins of unknown function. Tol proteins are involved in the translocation of group A colicins. Colicins are bacterial protein toxins, which are active against Escherichia coli and other related species (See Pfam:PF01024). TolA is anchored to the cytoplasmic membrane by a single membrane spanning segment near the N-terminus, leaving most of the protein exposed to the periplasm [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.19 0.72 -4.01 9 916 2012-10-03 21:09:15 2003-07-17 14:43:38 6 1 772 6 109 684 85 92.70 45 28.50 CHANGED ssstpuGssGs-lspYuu.IpptIQp+hhcsssatGKsCslcI+LuPDGh...lhslps.uGDsslCpAAluA.A+ssKlPhsPos.sVYEphKshsLsh ....................................sstpsussu.u-l.ssYAupIpsAI..p..u....+....h.h.-....s....s.s.a.t.....G.K...p..CsL..+I+..L..A..PD.Gh....L..l....s...l...p...s..c....G...G...DP..A....LCpAAluA...AKhAK....lP..+P..P....Sp..sVYEh.aKsusLcF.................... 0 13 34 72 +6349 PF06521 PAR1 PAR1 protein Moxon SJ anon Pfam-B_16232 (release 10.0) Family This family consists of several plant specific PAR1 proteins from Nicotiana tabacum and Arabidopsis thaliana. The function of this family is unknown. 25.00 25.00 26.00 64.70 22.40 20.90 hmmbuild --amino -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.18 0.71 -4.65 4 68 2009-09-11 00:11:32 2003-07-17 16:32:16 6 1 14 0 38 70 0 144.60 49 87.47 CHANGED sIsCENLscsoCuFAISSoGKRCVLEKph+RSGEEsYTCRTSEIEA-KlpNaIETDECIpACGVDRpsLGISSDuLLEupFTpKLCSstChcaCPNIVDLYFNLAAGEGVYLP+LC-pQtGpuRRuMuEI+SSGl...............lAPuPtSpl...cssNhhhsPAhuPh ...lhCEpLst-sCAFuVSSSGpRCVLE+thhpsGpht.YpCpTSElh.s-..+.lppaIETDpClcACGVDRtoVG..I..SSDuLh.Es..pFopKLCSspCappCPNIVDLYhNLAAGEGlaLPcLCpspcsss.RRt.h.h-...hhSsu................hAsts.................................................................... 0 3 32 35 +6350 PF06522 B12D NADH-ubiquinone reductase complex 1 MLRQ subunit Moxon SJ anon Pfam-B_16238 (release 10.0) Family The MLRQ subunit of mitochondrial NADH-ubiquinone reductase complex I is nuclear [1] and is found in plants [2], insects, fungi and higher metazoans [3]. It appears to act within the membrane and, in mammals, is highly expressed in muscle and neural tissue, indicative of a role in ATP generation [3]. 21.50 21.50 21.50 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.32 0.72 -4.28 55 447 2009-01-15 18:05:59 2003-07-17 16:38:21 6 4 210 0 280 434 1 69.80 27 72.13 CHANGED hp+P.plhPLhsslususshssh.lsR.hhtNP-VphsKcsp...........s-.hpcas.pp.pKahpspt.......-hhshhppt.sch ....................hcP.plhPLhs...hlususshAshhhhRhhhtsP-Vp.hs+psp....................s-shpcht.pt.hKhht.p........ph....p............................................. 0 82 142 207 +6351 PF06523 DUF1106 Protein of unknown function (DUF1106) Moxon SJ anon Pfam-B_16281 (release 10.0) Family This family consists of several hypothetical bacterial proteins found in Escherichia coli and Citrobacter rodentium. The function of this family is unknown. 21.10 21.10 21.10 80.30 21.00 17.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.02 0.72 -4.16 2 25 2009-09-10 17:00:18 2003-07-17 16:40:58 6 1 18 0 1 16 0 89.10 89 70.05 CHANGED MASLWK+LFY.sGpRRRYFEptEHSFSIlCGRLRGIVlThKCSpGIIYLSIKVsPNNppHlhLYpKK-YlFDKLKElFPDEAIEFoIEYEN MASLWKRLFYSSGRRRRYFEEGEHSFSILCGRLRGIVLTIKCSNGIIYLSIKVSPNNRNHVFLYHKKDYVFDKLKEIFPDEAIEFTIEYEN 0 0 0 1 +6352 PF06524 NOA36 NOA36 protein Moxon SJ anon Pfam-B_16330 (release 10.0) Family This family consists of several NOA36 proteins which contain 29 highly conserved cysteine residues. The function of this protein is unknown. 25.00 25.00 39.60 39.50 23.00 22.60 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.43 0.70 -5.17 3 242 2009-01-15 18:05:59 2003-07-17 16:49:12 7 2 207 0 71 226 0 192.60 70 97.66 CHANGED MPKKKTGsRKKAEKQRERcKEIRuS..sVDLA+HPCNAsMECDKCpR+QKoRAFCYFCNSVQKLPICAQCGKpKCMMKoGDCVlKHAGVYoTGLuMVGAICDFCEAWVCHGRKCLoTHACsCPLpsApClECERGVWEHGGRIFpCSFCpsFLCEDDQFEHQASCQVLEuENFKChSCNRLGQYSCLRCKsCaC-DHVRRKGFKYDKs.KslPCPKCGY-ToETKDLSMSTRSaKFGRQTpGtcS.DD-pGYGuYapNhuSscYGDstussYGYcuDDDE--.SusDYDEEpDtDDDDsE-sD-..TopNcG-c-sDstA ................................................................................p..+..+QKsRAFCYFCpulQRLPhCApC..GKsKCMhKo.GDCVlKHsGVaoTG...............L.uMVGAICDFCEAWVCHGRKCLooHACoCPLtDA........sClECERGVW-HGGRlF+CuFCpsFL............................................................................................................................................................................................................................................................... 0 24 28 49 +6353 PF06525 SoxE Sulfocyanin (SoxE) Moxon SJ anon Pfam-B_16349 (release 10.0) Family This family consists of several archaeal sulfocyanin (or blue copper protein) sequences from a number of Sulfolobus species. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.56 0.71 -5.03 6 66 2012-10-02 17:41:00 2003-07-17 16:56:28 6 2 39 0 29 130 19 176.80 30 82.35 CHANGED IssllVlIlllGlulY............................s.sphshlSosooosoToooos...............PutlsLPYsSsNKTVFIhLsVpoous..tFNaNGTSsGphKIYIPAGWsVhVpFhNpESLPHNLslVQNsTshPNssslSuDGKIlhhVGsosSNYtssGISSGpoAsGlhssluAGhYWlACGIsGHAcSGMWssLlVSsNVTsPYslh ..........................................................................................................llhllhhuh.hh...............................s..t.p..o.s..sosps.os.tsot..................................sst.....hs.sss...p.+TV.lhlh.shss.us........hNasGoup...G....ph.....plhl..PA......GhsV......hlphhN.p.p.u.l..s.....Hshhl..l....s...s..s.t.h.P...s.....ss...slu.t...G+..I.....l.h..h.l.Gs...o.s...us..h..s...p.G.l....u.........Gpss...s.s.h...h.....sls...AGhYhlsCsIs.GHAtsGMWs.llVSsshp.P....t.......................................................................... 0 7 14 26 +6354 PF06526 DUF1107 Protein of unknown function (DUF1107) Moxon SJ anon Pfam-B_16434 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 25.00 25.00 47.30 47.20 19.90 16.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.83 0.72 -4.16 21 718 2009-01-15 18:05:59 2003-07-17 16:58:32 7 1 677 1 82 194 5 63.70 70 91.41 CHANGED hRhFcpYpPpplA+aVKsLF+GplaIpGlGtFcFDpG+lLl..Ppps-ppphpshpEVNppIppLp .MKIFQRYNPLQVAKYVKILFRGRLYIKDVGAFEFDKGKILI..PKV+DKhHLSVMSEVNRQVMRLQ... 0 6 23 51 +6355 PF06527 TniQ TniQ Moxon SJ anon Pfam-B_16755 (release 10.0) Family This family consists of several bacterial TniQ proteins. TniQ along with TniA and B is involved in the transposition of the mercury-resistance transposon Tn5053 which carries the mer operon. It has been suggested that the tni genes are involved in the dissemination of integrons [1]. 25.10 25.10 25.10 25.10 24.90 25.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.10 0.71 -3.96 43 576 2009-01-15 18:05:59 2003-07-17 17:04:04 6 10 398 0 177 503 23 138.90 19 29.75 CHANGED hLshp.lsh.hssEshsSahuRLAttp..shssh...................psahtc.hG..hshpslhpsp........sslstlAphuGhss...spLtthsh.................................spstspt....hpl..tuchlspphl...ptsp.hRhCPtCL.tpDhtt.......stttshhRhtWplsslpsCspHpshL ......................................................................................................................hh....h.sEsltSaltRhAhtp.......thts.h.............................t.phhtp.....hs......hs...ht..t.hhtst................t.ltt.l..u..t....h.....s..s.h.s.s.....spL.p.ph..sh.......................................hphtpt...............hph.......htth.h.sp..phh...........t.t...h+....h....CPhCL...............ptsah+.htWpl..s..hht.sCspHpshL................................................................................................................................... 0 27 90 129 +6356 PF06528 Phage_P2_GpE Phage P2 GpE Moxon SJ anon Pfam-B_15359 (release 10.0) Family This family consists of several phage and bacterial proteins which are closely related to the GpE tail protein from Phage P2. 20.90 20.90 20.90 22.30 20.50 20.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.97 0.72 -4.51 17 525 2009-01-15 18:05:59 2003-07-17 17:09:05 7 2 406 0 55 217 1 39.10 54 85.46 CHANGED MADIAslFHWsPu-htsMoLsELhcWRE+AhhRSG.sss- ...MADIAsIFHWsPu-h.sMolsEllsWR-+..AhtRSGsscp......... 0 7 24 42 +6357 PF06529 Vert_IL3-reg_TF Vertebrate interleukin-3 regulated transcription factor Vella Briffa B anon Pfam-B_16154 (release 10.0) Family This family includes vertebrate transcription factors, some of which are regulated by IL-3/adenovirus E4 promoter binding protein [1]. Others were found to strongly repress transcription in a DNA-binding-site-dependent manner [2]. 25.00 25.00 84.80 69.30 22.60 20.10 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.06 0.70 -5.47 2 48 2009-01-15 18:05:59 2003-07-17 17:13:27 6 2 37 0 24 43 0 293.00 70 72.99 CHANGED SSsuYAQEIQKLSsSTsVYFQDYQoSKsslsSFVDEHEPuhVuuSCISVIKHSPQSShSDhSEhsSVEHTQtS.hQusCRSPENKF.lIKQEPlELE...REsR--RGoYpsSIY.sYMGooFshYSHSPPLLQVptSoSNSPRTSEsD-GVVGKSSDGEDEQQVPKGPIHSPVEhppVHAT.VKVPEVNsSALPHKLRIKAKAMQVKVEAhDs-a-uhQKLSSPhDh.uKRHF-LEKHGspshsHSShsPFSVQVTNIQDWSLKsEhWHpKELssKhQsuhKTGVVElKDssYpVSEuENLYLKQGIANLSAEVsSLKRhIsTQ.ISASDSt ......................................SSsAYAQEIQKLSsSTAVYFQDYpoSKus.h.suFlDEHEPohVuSSCISVIKHSPQSSLSDlSEsS.SlEHoQtS..shQusCRSPEsKFQhIKQEPhELEsasREsRDDRGoYpuSIYQNYhGsoF.s.GYSHSPPLLQVNRSSSNSPRTSETDDGVVGKSSDGEDEQQVPKGPIHSPVELKpVHATlVKVPEVNS..SALPHKLRIKAKAMQIKVEAhDsEa-uTQKLSSPlDMouKRHF-LEKHsssshVHSS.LoPFSVQVTNIQDWSLKsEHWH.pKE...LsuKhQsuh........KT.GV..........VEhKDsuYpVS-sENLYLKQGIANLSAEVsSLKR.LIsTQ.ISASDS.... 0 1 3 6 +6358 PF06530 Phage_antitermQ Phage antitermination protein Q Moxon SJ anon Pfam-B_3979 (release 10.0) Family This family consists of several phage antitermination protein Q and related bacterial sequences. Antiterminator proteins control gene expression by recognising control signals near the promoter and preventing transcriptional termination which would otherwise occur at sites that may be a long way downstream [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.71 0.71 -4.56 4 887 2012-10-04 14:01:12 2003-07-17 17:17:27 7 1 490 0 56 475 2 118.00 44 91.35 CHANGED hRDIp.V..........LERWGAWAtssptslpaSPIAAGF.KullPh+spsRP.CsDDDGhIIssshspLpp.psschhsLLlsYYl+G.St+AIA++pthScspItK+Lp+AEGhI-GsLSlhsVRL-hDthlctt ...................................................hRDIp.V..........LERWGAWAs..s...s.p...p.c.lsa.Ss......IAAGF.....KG.L.l.....P.....p...+....s.K..o.R..P.p.C..s.DDD...uh.l.IsuChAR.L.p.+....psp...ch.a-....LLlsY....YV.h.GhohhulAc+......+.....psS-.shItK..cLp+AEGhl-G.hL.hhLsl+LEMD..h..p...................................... 0 7 18 33 +6359 PF06531 DUF1108 Protein of unknown function (DUF1108) Moxon SJ anon Pfam-B_16830 (release 10.0) Family This family consists of several bacterial proteins from Staphylococcus aureus as well as a number of phage proteins. The function of this family is unknown. 25.00 25.00 32.70 32.70 22.50 22.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.83 0.72 -3.83 4 313 2009-01-15 18:05:59 2003-07-18 11:23:49 6 1 192 0 4 97 0 84.90 65 99.43 CHANGED MYYKIG-lppKlIsVsGFDFKLtVhKpchuIpIpVhDhpsssIcuhhVsDENDLYhAhDlhpQuI.EWIEpNTDEQD+LINLVM+W .MYYchG-lppKlIsVsGFDFKLtlhKpchuIpIpVhDhpsssIcuhhVsDENDLYhAhDlhpQuI.EWIEENTDEQDRLINLVMKW.... 0 2 2 4 +6360 PF06532 DUF1109 Protein of unknown function (DUF1109) Moxon SJ anon Pfam-B_17952 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.60 21.60 21.90 21.90 21.30 21.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.82 0.71 -11.38 0.71 -4.90 54 293 2009-01-15 18:05:59 2003-07-18 11:29:26 6 3 227 0 122 289 32 192.70 31 93.87 CHANGED LusshpPlpthshspphuhuhhsuhssuhllhhhh..hGlRsDlspshtsshFahKhuhshsLulsAhhssh+LuRPs.uptttthhhlslshshlhlsuhhplhss.sssthh.shlhGpshh..hChhsIsllulPshsuhhhulRp.hAPoc.thAGAsuGLsAGuluAhsYuhaCs-sussFlulWYsluhhlsuhlGAllGsRlLRW .............................................LssphpPVpphshtphhhhshlhuhs..sussl.hhh..h..h.G.h.R.s...Dlsts.h.t.s.shF......hhKhshs..hhlussuhhshhtLuRPt..ptshh.hh.hl.sls..hsslhh.uuhhphhts....sst.sh..shlhG.tshh.......hC.hhIsllSlPsh.suhl.hulRp.hAPs+PslAGhsAGLsuGuhushl.Yu.haCs-suhsFlulWYslulshssslGAlhG.hhLRW......................... 0 29 59 86 +6361 PF06533 DUF1110 Protein of unknown function (DUF1110) Moxon SJ anon Pfam-B_18243 (release 10.0) Family This family consists of hypothetical proteins specific to Oryza sativa. One sequence (Swiss:Q943P1) appears to be tandemly repeated. 24.10 24.10 24.20 24.40 24.00 24.00 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.21 0.71 -4.48 6 38 2009-01-15 18:05:59 2003-07-18 11:34:57 7 3 6 0 21 23 0 156.90 35 84.03 CHANGED MAAE..AWRuRFRcRVsEAAp...RhEsVcEsLAsAhsHL...susMlAuD..tAAAARsRIQLAhGtLt-ASpsLA.AMSlMpuAcLLsh+.Gs.hs..h.httIupLGDp....YLAE+sAshKL+tAtcDAc-AastVDtCRGHLDAlLLLLDH.s+LPuVsshI-pERh.AAsuDLpAAIGpschGsEhAVsARQDVSG ...............c...sW+shF.pRVs.sst...phctlpt.Ltss..tl.....s.h.s..........Asssts+h....ttL.pASppLu.AhuhMtuAcLLAh+..Gsu.....sstt.hs....slspL....sDp....hh....s.ppAhh+LptAtpcAccAastl-tCRGHLsAlhhLLc+....thssVpshlptEhh.sAss.LpsA............................................................. 0 0 1 13 +6362 PF06534 RGM_C Repulsive guidance molecule (RGM) C-terminus Moxon SJ anon Pfam-B_18263 (release 10.0) Family This family consists of several mammalian and one bird sequence from Gallus gallus (Chicken). This family represents the C-terminal region of several sequences but in others it represents the full protein. All of the mammalian proteins are hypothetical and have no known function but Swiss:Q8JG54 from the chicken is annotated as being a repulsive guidance molecule (RGM). RGM is a GPI-linked axon guidance molecule of the retinotectal system. RGM is repulsive for a subset of axons, those from the temporal half of the retina. Temporal retinal axons invade the anterior optic tectum in a superficial layer, and encounter RGM expressed in a gradient with increasing concentration along the anterior-posterior axis. Temporal axons are able to receive posterior-dependent information by sensing gradients or concentrations of guidance cues. Thus, RGM is likely to provide positional information for temporal axons invading the optic tectum in the stratum opticum [1]. 21.60 21.60 21.60 21.60 21.10 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.40 0.71 -5.09 23 234 2009-01-15 18:05:59 2003-07-18 12:07:50 8 6 64 0 115 193 1 170.90 49 43.56 CHANGED Cs-QKlYpApoD-..LPuAFsDGopsGG-............psspoL...........cIhEpssG...pHVEIpA+YIGTTIlVRQlGpYLThAlRhPE-lspshp.-ppDlp..LClpGCPtsppIDhpphhtps.....................t.st......shshc....sAps+C+-p........hsVpDhYFpSCVFDLLTTGD...ssFThAAhsAhcDsctLcPspp..+h+l .................CsDQKVYQApsD-..LPAAFsDGSpsGGD...........ttsusSL...........pIsE+ssG...pHVEIpA+YIGTTIlVRQlG+YLTFAlRMPE-lspuh-...-ppsLp......LClpGCPhsppI-hpthptps.......................s......s..sash-....oAps+C+Ep........LPVEDlYFQSCVFDLLTTGD...sNFThAAasAL.EDl+hL+sspc+hH........................................................... 1 17 28 67 +6363 PF06535 RGM_N Repulsive guidance molecule (RGM) N-terminus Moxon SJ anon Pfam-B_18263 (release 10.0) Family This family consists of the N-terminal region of several mammalian and one bird sequence from Gallus gallus (Chicken). All of the mammalian proteins are hypothetical and have no known function but Swiss:Q8JG54 from the chicken is annotated as being a repulsive guidance molecule (RGM). RGM is a GPI-linked axon guidance molecule of the retinotectal system. RGM is repulsive for a subset of axons, those from the temporal half of the retina. Temporal retinal axons invade the anterior optic tectum in a superficial layer, and encounter RGM expressed in a gradient with increasing concentration along the anterior-posterior axis. Temporal axons are able to receive posterior-dependent information by sensing gradients or concentrations of guidance cues. Thus, RGM is likely to provide positional information for temporal axons invading the optic tectum in the stratum opticum [1]. 24.00 24.00 24.30 24.00 21.50 23.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.31 0.71 -4.23 14 210 2009-01-15 18:05:59 2003-07-18 12:35:16 7 4 61 0 111 164 0 164.30 57 41.22 CHANGED C+lp.+Csupaspsp.p.s..htt................sstaCpuLRsYshChpRTARsCRGs..LsYHSulptlpcLhppaNCScsusTsps....................s.tsss.csCsY....................sp......apaCGLFGDPHLRTFpccFQTC+VpGAWPLIDNpYLsVQVTNsPVs.GSs....ATAToKlTlIhKs ...............................................C+I..+Csu-ahusTut.ps.hh.s.s......................ss-aCpALRuYAhCT+RT.A.RsCR.Gc..LsYHSAVhGIcDLMsQ+NCS+-GPTSps.......................................s..ptpsss-.C..pYcpp.......................httcsssPs......YhHCGLFGDPHLRTFpDcFQTCK......VpGAWPLIDNsYLsVQVTNsPV.lsGSu....ATATsK...lTIIFKs....... 0 14 25 63 +6364 PF06536 Av_adeno_fibre Avian adenovirus fibre Vella Briffa B anon Pfam-B_16053 (release 10.0) Family This family contains avian adenovirus fibre proteins, which have been linked to variations in virulence [1]. Avian adenoviruses possess penton capsomers that consist of a pentameric base associated with two fibres [2]. 24.90 24.90 25.00 82.70 18.50 24.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.38 0.72 -4.29 6 42 2009-01-15 18:05:59 2003-07-18 14:02:22 6 1 6 6 0 36 0 96.10 66 20.54 CHANGED YhsSGsusLppaTA+u.NSSs.uFsCAYYLQQW.opGLlhoSLYLKLDpsphushPostsspNA+aFTFWVuua.pphN.StIpssTloPSTsphss TFVSGSsSLsoYNAshVNSSupsFSCAYYLQQWNlQGLLhTSLYLKLDSsTMGsRPGDhsSsNAKWFTFWVSAYLQQCNPSGIQAGTVSPSTATLo........... 0 0 0 0 +6365 PF06537 DUF1111 Protein of unknown function (DUF1111) Moxon SJ anon Pfam-B_16636 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 499 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.50 0.70 -6.22 6 817 2012-10-03 10:02:11 2003-07-18 14:21:28 6 16 490 0 304 843 222 260.40 28 76.15 CHANGED YTcloA.GGcTT.TFDASsSG.HGFSTPAsNLssspLA.HLpGDtpFETuFTTAPNuEH......PELDGLGPVFNNADCNSCHQRDGRNSTPpluuGpsRVKLGS-....AGIFLRIStAsspsChpG...oAsNNYCAPIsVPsFGuQLFH......RGVLpARsDWQpN.FhGQADVYLSYEhpoVoYs..........DGopVoLKKPlFpVENPYDAPGEoctSsNlTSsLLQsDVL......................MGWRNGMPVFGLGLLEAIuEAsILAhVDEsDoNpDGISGRANaVFDAlKAQuGDspPVSLGRFGWKANTPSVRVQSLGALRGDhGITNPLF..P-ESItGTuLHDSYLTRTGFVDTGss.sGtPEASAEFSDs....VVFYAETLAVPARRNVssspVREGA+LFsQlNCouCHsPoFsTKsSG-..lGGhPMs-uLKGQTIYPFoDMLLHDMGEGLADuRPDFLAoGsEWRTRPLWGIGLTQTVNPQAGFLHDGRAATLEEAILWHGGEAptSppsFMuLops-RuQLlsFLMSL ............................................................................t...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...............h.h..a.t.l...usP..t.c................t................h................t................t.....t.s......GtthF..p......htC..CHhsp.h.T......................................................................p............l.P.aoDhllHDhG........t..h..........sD...........t.............h..............p...................s.....p....ut...........a.RTsPLWG....hG....h..............................t.....................s...t...t....h.L.HDGRA.csh.EA..lhWH.......u.....G.............-ut......utp.h.t.hs.tpRttlltFlpu....................................................................................... 0 99 179 252 +6368 PF06540 GMAP Galanin message associated peptide (GMAP) Moxon SJ anon Pfam-B_16759 (release 10.0) Family This family consists of several galanin message associated peptides. In rat preprogalanin, galanin is C-terminally flanked by a 60 amino acid long peptide: galanin message-associated peptide (GMAP). GMAP sequences in different species show high degree of homology, but the biological function of this family is unknown [1]. 20.40 20.40 20.40 35.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.01 0.72 -4.25 6 53 2009-01-15 18:05:59 2003-07-18 14:33:41 6 4 37 0 24 47 0 56.90 53 47.48 CHANGED GKRELpPE.--h+PGuhDRsluEsNlVRTIlEFLoFLHLKEAGALDpLPslP.AtSuEDspcS ...GKRELpsE.--h+sG....uhc.Rsls.-sNIVRTIIEFLoaLHLK.Es...GAL-pLP....sh..SsE-ht.................. 1 1 3 7 +6369 PF06541 DUF1113 Protein of unknown function (DUF1113) Moxon SJ anon Pfam-B_17933 (release 10.0) Family This family consists of several bacterial proteins of unknown function. 19.70 19.70 20.10 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.08 0.71 -4.47 52 996 2009-01-15 18:05:59 2003-07-18 14:35:25 6 5 523 0 214 836 24 148.40 27 59.94 CHANGED hhhFhlYSFlGWlhEsl...asu.lpc..++al.NRGFLhGPhCPlYGhGulhll...hhLtthpp..........shhhlFlhuhllsoslEYlsuallEplFpt+.aWDYSchhhN.........lpGRlCLhhSlhWGlhullhlchlpPhltphlphlPhhht.hhlshllhlhhlhDhlholh ..........................................................hhFhlauhlGWhhEsh....a...ss....lpp.......tc.ah....tcGh..Lh.G.P....hs...P...lY.GhGslh..lh.......hh..lp.hpp.....................shhhl.alhu...hllso...slEYlouhhlEt...hF...p.hp...hWDYSs.h...hN.........lpG..p...lsL.hulhW.Glhull....h...lphlpPhl..ph.h.t...h.l....h.hh...hhshhhhhhhhhDhhhoh.h........................ 2 87 144 175 +6370 PF06542 PHA-1 DUF1114; Regulator protein PHA-1 Vella Briffa B, Sammut SJ, Pollington J anon Pfam-B_16084 (release 10.0) Family This family represents the protein product of the gene pha-1 which coordinates with lin-35 Rb during animal development. The protein is expressed during embryonic development and functions in the cytoplasm. PHA-1 acts in a parallel pathway with UBC-18 to regulate the activity of a common cellular target [1]. 21.50 21.50 23.00 22.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.75 0.70 -5.75 8 118 2009-01-15 18:05:59 2003-07-18 15:08:16 6 3 5 0 117 123 0 344.80 28 74.95 CHANGED VN+oFNhullcpIRp-acpVll+s....hcp..Dhtpc.........pp.spulhlNsp+lphpphp...pahRFLKpss+I+Vp+lhlc-h.....cphshps-l..HpsIhcsLlsssh..pplcEFhGhssIC.pGChtCtcIutsCpsYGPlQhsh...hhttc+HF-tLpls-hhlthlh..........lps.p.s.......thh-phIssploCD+LplhLsshh..pthsh......lsR-llDhlltpWpVKoVcl........tah............hptphshlphppchhs..hpTpPtupa.sL-pV-Islphu...spshtp.h+ppt....Sl.sF-NhIuNV+RIFPTpclplcLPppl..h.hs.ssh-cFlpsllcMs.+-sp....RNS+Ish+La....sppl..ph...........lsshhpcspsh+...........................p.l..h..........p.ph.pupsaph ................................................lNKshN..hLphlRppapphplch..............................ps..hlaI..Nh++lp.ppl..........................sa...F...FLspsstV+V....cclhhcpl.......................t..htpph....HchIhppLIG.sst..................pplpplIGh-...-l.C..tG....C..pC....plApc..Ch.-.YGPlphps..hpphp..sppa+cLploDtLhcpIAp..........lppspppp......shppLsphIhs..sISCDpLslhl..s.E....p..h............pps..........................hPREVl-hll+KWsVKSlcl............phhpp............tWhphs...hFTplc...h.sc.......h........ppppsch....+hp+VpVsls.S..hsp.sh.hp....ht........ts.apNhIsNlRRhF.scplohp.hs+hh.....h...slcchhpsllphhph-pp....ppLpl.shpha................................................................................................hp....h....h........................................................................................................ 0 15 16 117 +6371 PF06543 Lac_bphage_repr Lactococcus bacteriophage repressor Vella Briffa B anon Pfam-B_16088 (release 10.0) Family This family represents the C-terminus of Lactococcus bacteriophage repressor proteins. 20.10 20.10 20.10 20.40 19.50 20.00 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.43 0.72 -4.43 4 31 2009-09-11 15:56:52 2003-07-18 15:42:07 7 3 23 0 5 22 0 49.90 55 35.06 CHANGED cpppcslDLADlV........DDsKVDWDcWVSFDG+PLoDEVKcAMKthhGKcLpD .....ppEslDLAcLV........DDsK.VDWD+WVSFDG+PLTD-VKpAhKhlhGK+LpD... 0 2 2 4 +6372 PF06544 DUF1115 Protein of unknown function (DUF1115) Vella Briffa B, Sammut SJ anon Pfam-B_16104 (release 10.0) Family This family represents the C-terminus of hypothetical eukaryotic proteins of unknown function. 27.60 27.60 27.80 27.80 27.50 27.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.61 0.71 -4.46 37 568 2009-01-15 18:05:59 2003-07-18 15:54:08 7 14 302 0 389 542 2 126.60 26 28.30 CHANGED hhsa+I....ts.p+Rh+lhpsA+phpLoGhsl.hs..psullsVEGspcshccah+h.l++lpW......tt.tchptppppsp.h................................p...hhhps..tp+pF.pta.p.t..............s.s.sphhchLpc.......ps.stphaphsh ..............................................hsa+l...h.t.s.s..p+RhKlppsAppLtLTGhsl.h..........t......c......ss............lllVEG..spcu...hccat+Lhl+RlpW...................pp.ptppp-s.tp.t...........................................................h.hhhc..u..t.....p..pR..tF...ppa..p.p.................p.s.sthpchLpc........ps.hpchaphh.................................................... 0 135 200 296 +6373 PF06545 DUF1116 Protein of unknown function (DUF1116) Vella Briffa B anon Pfam-B_16143 (release 10.0) Family This family contains hypothetical bacterial proteins of unknown function. 20.10 20.10 21.00 20.50 18.90 18.00 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.28 0.70 -5.25 21 940 2009-01-15 18:05:59 2003-07-18 16:05:16 6 5 543 4 68 423 39 213.70 60 44.22 CHANGED lphtPCHcasAVGPMAGlhSsSMPlalVcs.pstGscuassLNEGlG.KlLRaGAhsp-VlpRLcWhccsLuPsLppAlt.....tpGsl-LpslhApALpMGDEsHsRNtAuosLLl+pLsPhll....p.sshspppht-VhpFlussDtFFLNLsMAAsKshh-AAp...slssSolVTsMuRNGscFGIRluGh.GscWFTuPAssspGhhFsGaut-DAssDlGDSA ...........lsF.PCHchsAVGPMuGlTSASM.MhlVcN....tT.GNcAYsNhsEGlG.KVlRFGA.spsVlsRLpWMRDVLuPhLpsAlt.....httsIDLpsMhAQulpMGDEhHpRNhAuosLLhpALsPtIh.........p..hsasp.....pphtEVh-FlusoD.FFLslhMAhCKAAMDAut...tIctuolVTsMsRNGspFGlRVSGL.G.spWFTuPsp.p.s.pGhhF.sG.as.EDussDhGDSA........................... 1 30 46 57 +6374 PF06546 Vert_HS_TF Vertebrate heat shock transcription factor Vella Briffa B anon Pfam-B_16244 (release 10.0) Family This family represents the C-terminal region of vertebrate heat shock transcription factors. Heat shock transcription factors regulate the expression of heat shock proteins - a set of proteins that protect the cell from damage caused by stress and aid the cell's recovery after the removal of stress [1]. This C-terminal region is found with the N-terminal Pfam:PF00447, and may contain a three-stranded coiled-coil trimerisation domain and a CE2 regulatory region, the latter of which is involved in sustained heat shock response [1]. 20.50 20.50 21.00 20.70 20.00 19.90 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -12.22 0.70 -4.99 6 211 2009-01-15 18:05:59 2003-07-18 17:17:15 6 3 60 0 80 199 0 220.80 29 52.29 CHANGED uaSuoshhuPDssspsGPIISDlT......ELspuSPssosssSl-sp...sS.PllhIKE.....EPsSPupSPc.sE..P.t.phssGssh.ssT.hSPsshls..SILpEs-Ps............................s.phCLSVACLDK..................................sELpDHLDoIDssL-sLQsMLSu+uFSlDoosLhDLFSsSlsh..sDhslPDhcsSLASIpchLSstc.....stspEupsupscosKQLlpYTA.PLhhh............suus-s.uSsDhPh.h.ELt-uShhop..t..E.PT.uLLs.p.pPhsc-PshS ............................................................................................................................sthht.ct..hpss.II.DlT.........................E..h...s.P....ss....sh......-..p.............t..P.lh..lhE.....................E.sss.....pu....sp.pp...........s................sps....sp....s.h...s.sss.ls..S.Lppp.psss...s.........................................hs..slshL..s+...............................................................................................................sEL.DaLDuhDssL-shQshLsuctaolDss.hL.hDlFssuh........h..............sshtu.pp.hhps.........h..p...s...p..p......s....sppLlpYT.u.P.hhh....................s.s.....ss...s-hPh.h.EL......tss.h..p..............Po.uhL..............t.................................................................. 0 6 11 38 +6375 PF06547 DUF1117 Protein of unknown function (DUF1117) Vella Briffa B anon Pfam-B_16251 (release 10.0) Family This family represents the C-terminus of a number of hypothetical plant proteins. 25.00 25.00 33.40 33.20 21.60 19.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -11.02 0.71 -3.99 7 55 2009-01-15 18:05:59 2003-07-21 17:18:57 7 5 15 0 37 55 0 107.60 53 28.91 CHANGED sVG.LTIWRLPGGGFAVGRF..uGGpRuGERpLPVVYTEM.DGGhNss...suPRRISW....uSR.....ssRSpER.tG.ltRhFRNhhuhFuphpsupSsSut.tStssp..p.....pspp..olhSpSsRRR ..sVG.LTIWRLPGGGFAVGRF..uGGtRsuERc..L..PVVYTEM.DGGFNsu.........uuPRR..I.SW.....ssp............su+upps...tGhltRhFRNhFusFGR..hpsss.S.s.Sup..t.ut........................................................... 0 3 22 31 +6376 PF06548 Kinesin-related Kinesin-related Vella Briffa B anon Pfam-B_16517 (release 10.0) Family This family represents a region within kinesin-related proteins from higher plants. Many family members also contain the Pfam:PF00225 domain. Kinesins are ATP-driven microtubule motor proteins that produce directed force [1]. Some family members are associated with the phragmoplast, a structure composed mainly of microtubules that executes cytokinesis in higher plants [2]. 24.00 24.00 24.60 24.40 22.50 21.90 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.78 0.70 -5.94 10 95 2012-10-05 12:31:09 2003-07-22 10:00:52 6 7 24 0 51 91 0 320.50 36 43.48 CHANGED uPT-SLAASLQRGLcII-.HppssAsRRSoVuhSFcphshpPs.tsssKlsuuVQo.Pc-ptspt...s.hLCssC+pptsssus.p-.ssh.p..h..hsss.............KuspKVlttAl+REppLEphCs-QAuKIpQL.............oullsQh+csp..c.pp......spt...............lL+pt.p.scsEslppphEsKclpEEhc.shpph.h-luE+EsLLcEIp-LKsQLp...hssotoosphpuSLLthohQlRp......sh..ttsscstpcsL-cER.+WTEuEScWISLTEELRl-LEusRphAEKhchELcpEK+CoEELcDALpRAMhGHARhlEQYsELQEKYs-LLp+HRplh-GIsDVKKAAAKAGhKG.sGoRFAsALAAELSALRlEREKERchLKcENKuL+hQLRDTAEAVQAAGELLVRLREAEEAsolAcERsstsEpEs-KLKKQlEKLK+KH-pElsThKQ...hLAES+LP .........................................................................t..p.pLAuSlp+Glplh-.apts..h.ppu.hthsht.ht.p.........phssth.................hh...sp......t................................................................pt...h.t.s..cph.hct.C.cQsscI.pL..............................................................................pth.pc......tph...hs-+E.L.tEItpL+tpLp.............p.t.t...........h....p.p.............................ps.....t......h.E.EScWlsLs--LRh-l-sp+.hhtc.p.ELp.EKhsstElp-AhphAh.GHuRhlEpYs-LpEha.tLhthHphh.tGlt-VKptAu+A.uh+G.tt.tFhtuhusElosl+hp..+E.......ppNctLp.QLpDTAEAVpAAGELLVRL+EAEcAhs.upcphh.hppEspchhpph-cLK++ac.Eh.shpphL.-uph............... 0 7 26 35 +6377 PF06549 DUF1118 Protein of unknown function (DUF1118) Moxon SJ anon Pfam-B_17963 (release 10.0) Family This family consists of several hypothetical plant proteins of unknown function. 22.50 22.50 22.60 22.90 22.40 22.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.54 0.71 -3.95 5 63 2009-01-15 18:05:59 2003-07-22 11:33:40 7 3 28 0 33 64 1 105.80 48 53.76 CHANGED +LEK+KVLSsVEKoGLLS+AEcLGlTLSSlE+LGLLSKAEDLGLLSLlENsAuhSPuALASlALPLLlAAIAAVVLVPDDSssLVAlQAVlAuALVlGGouLFVGSVVLuGLQESD ..................plEchKlLopsEKAGLLStAEchGloLSslE+.LGLLSKAE-L..GlLShs..ps.u....oPus..Lho..lA.LsLLlAussslhlVP-Dssh.VslQsV.lAhshslGuuuhasuSsllusLQpu........ 0 13 24 29 +6378 PF06550 DUF1119 Protein of unknown function (DUF1119) Moxon SJ anon Pfam-B_17985 (release 10.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.18 0.70 -5.27 5 64 2012-10-02 13:41:03 2003-07-22 11:36:36 6 2 60 0 45 154 100 273.30 36 93.22 CHANGED suLFVlVQlLALLLusPhpsuEhQ..AaEsPTpslNSIa.YIVhlLlFThFVLIsIK+cKKWlIptlIYlslVu.VlaYVFhlLLoll.slAGhlNllSlllAIuLshLLYtYPEWYVIDlAGshlAsGlSALaGISFGVlPAVVLLIlLAVYDAISVYKTKHMIoLAcGVMDL+LPILFVIPcspsYSFls-oF-su--tEA..........tssaFIGLGDAVMPSILVVSAAlFl-o..l.uGlpYssLsAMuGTLlGhlVLhhhVhRGRPpAGLPaLNoGAIAGFLIGsLhuGl ....................................................................h..hhlhlQlhALhLs.....shtt.sshp..shEsPpsssNslh.Ylshl.Llh.TuhhLhhh+..hshphllphll....hhshhh..hhhYVa..tsll.s...hh..............h...h..................hhuhlhu.l...u.ls.sh.L...h....h.YP.E....WYV.ID.hsGllhu..A.G.su.ulFGISh.u.ll.P....s.llLLslLAVYDAISVYtT....cH........MlsLAcG.....Vh.-h....+lPllhVlPpphsY..Sahc.....t.....s.....htt.tt.pt.t.t...........................................................tsAaFlGlGDslhPolLVs........SAsh..F.....h..s.s.....s......l.............h...l...s....l..P..u...L.s.AhlG...ol.hGh.h.l.L.h.h.h.V.h.+.G+spAGLPhLNuGAIhGallGslhs........................................................................... 0 10 37 42 +6379 PF06551 DUF1120 Protein of unknown function (DUF1120) Moxon SJ anon Pfam-B_17948 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild --amino -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.42 0.71 -3.78 7 319 2012-10-02 17:35:21 2003-07-22 11:42:21 7 1 164 0 52 239 31 98.50 32 43.39 CHANGED LKVTGplssuACTPpLoGGGsVDaGphpsssLpss....chspLGhKsholsIoCsusstlAhsupDsRtsT.......t..tsshsh....shs..Gtt..s.ssttFGLGpossGtKIGAau .......LKVpGpls.suuCTsp..l.s.s.GG.llDaGpls...supL.ps.T......p.s.s.pL...u.p.+.s.hol.oIsC.s.u..sTh...luaossDNRssS.......................................GlG.................................................................................... 1 7 17 40 +6380 PF06552 TOM20_plant Plant specific mitochondrial import receptor subunit TOM20 Moxon SJ anon Pfam-B_17991 (release 10.0) Family This family consists of several plant specific mitochondrial import receptor subunit TOM20 (translocase of outer membrane 20 kDa subunit) proteins. Most mitochondrial proteins are encoded by the nuclear genome, and are synthesised in the cytosol. TOM20 is a general import receptor that binds to mitochondrial pre-sequences in the early step of protein import into the mitochondria [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.21 0.71 -4.70 6 100 2012-10-11 20:01:00 2003-07-22 11:48:36 7 11 46 1 51 128 5 147.30 31 55.26 CHANGED FDRLlhFEHsRKsAEsTYtpNPLDADNLTRWGGALLELSQFQsls-uKpMIpDAISKLEEALlIsPtKH-AlWClGNAaTSauFLosD.sEA+.pF-hAopaFQ.AhsppPsNplY+KSLEhssKAPcLHhthhppt.hp..hGst........ssussSsKohKpKKoS-hKYDlhGWVILAsGVVAWluFAK ..............................................hF-.spctuptthtpsP.D..s-s...LspWGtALlELup...h....pps.....-u....h.p.hlp-AlsKhccALtlsPppa-AlaslGsA........houhu.......h.hp....s-....p....tpA....pphFcc....AsphFppAhs.cP...ssphYppuLph...................................................................................................................................................... 0 16 36 43 +6381 PF06553 BNIP3 BNIP3 Moxon SJ anon Pfam-B_18014 (release 10.0) Family This family consists of several mammalian specific BCL2/adenovirus E1B 19-kDa protein-interacting protein 3 or BNIP3 sequences. BNIP3 belongs to the Bcl-2 homology 3 (BH3)-only family, a Bcl-2-related family possessing an atypical Bcl-2 homology 3 (BH3) domain, which regulates PCD from mitochondrial sites by selective Bcl-2/Bcl-XL interactions. BNIP3 family members contain a C-terminal transmembrane domain that is required for their mitochondrial localisation, homodimerisation, as well as regulation of their pro-apoptotic activities. BNIP3-mediated apoptosis has been reported to be independent of caspase activation and cytochrome c release and is characterised by early plasma membrane and mitochondrial damage, prior to the appearance of chromatin condensation or DNA fragmentation [1]. 26.00 26.00 55.60 29.30 25.90 25.90 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.48 0.71 -4.93 14 210 2009-01-15 18:05:59 2003-07-22 11:56:35 7 2 85 6 101 187 0 181.60 45 95.99 CHANGED MSssttss................EsuLpsSWVELphsssssspss...............................st.p.lPssSp..sG-hE+lLL-AQ+Epup.SSR....sSSps-SP.ps.oP..pssth.hssp..spps...ppp-cshct..t......KssDWlhDWSSRPENlPPK-FhFcH........PK+.........osshShRpotshKc.ulFSs-hLhlhlPollLSHlLulGlGlaIGKRLsh.spsoh ..................................................ttLpuSWVELphssssss.ss.......................................................................................................s...t.p.lPuosu.hsGD.hEKILLDAQHESup.SS+.......sSS+CD..S..........P.ts.ps..pp..sst...hssc..psp+s..soQSEE-shEtc+-h-.........hpKsuDWlhDWSSR..P..ENlPP.K..EF.hF+H.....................P..KR.........osoLSMRpous..M...KKGGlFSuEFLKlFlPSLlLSHl.LA.lGLGlYIG+RLssso.s..h............................ 0 17 28 61 +6382 PF06554 Olfactory_mark Olfactory marker protein Moxon SJ anon Pfam-B_18049 (release 10.0) Family This family consists of several olfactory marker proteins. Expression of the olfactory marker protein (OMP) is highly restricted to mature olfactory receptor neurons in virtually all vertebrate species from fish to man. 25.00 25.00 76.60 76.50 19.10 17.70 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.74 0.71 -4.68 4 40 2009-01-15 18:05:59 2003-07-22 12:01:40 7 1 27 7 9 43 0 138.60 65 94.58 CHANGED hELsFs.DhQLTchMRLRVQSLQQ+GpK+QDGE+LL+ssEpVYRLDF.ScQcLpFspWNVsLpuPG+lsITGTSQhWTPDLTpLMTRQLLEPsulFWRps...-s-slcC.EADAQEFGERIAELAKlRKVMYFLIsFt-GssPtslcCSlsFpp ........h-hshs.D.pLTc.MRLRVpSLpQRGpKRQDGE+LLpPsEuVYRLDF.ppQ.+.LpFpRWsVsLcpPGKVTITGTSQhWTPDLTNLMTRQLL-PsAlFWRc-...-s-shchpEADA.EFGERluELAKlRKVMYFLhsFt-GscPsslcsSlsFp... 0 1 4 8 +6384 PF06556 ASFV_p27 IAP-like protein p27 C-terminus Moxon SJ anon Pfam-B_18169 (release 10.0) Family This family represents the C-terminal region of the African swine fever virus IAP-like protein p27. This family is found in conjunction with Pfam:PF00653. It has been suggested that the family may be a host range gene involved in aspects of infection in the arthropod host, ticks of the genus Ornithodoros [1]. 25.00 25.00 26.80 26.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.89 0.71 -4.25 2 20 2009-01-15 18:05:59 2003-07-22 12:41:11 6 2 16 0 0 9 0 129.00 90 57.50 CHANGED ATLGIIGLKKMIDSYNDYasNEV.VKHKNRVYTHKRLEDMGFSKsFMpFILANAFhPPYRKYIHKIILN-RYFTFKFsAaLLSFHKVNLDNQhTYCMTCGIE.IpKDENFCNACKsLNYKHYKhLNFSVKL ...........ATLGIIGLKKMIDSYNDYYNNEVFVKHKNRVYTH.KRLEDMGFSKPFMRFILANAFIPPYRKYIHKIILNERYFTFKFAAHLLSFHKVNLDNQTTYCMTCGIEPIKKDENFCNACKTLNYKHYKTLNFSVKL........... 0 0 0 0 +6385 PF06557 DUF1122 Protein of unknown function (DUF1122) Moxon SJ anon Pfam-B_18183 (release 10.0) Family This family consists of several hypothetical archaeal and bacterial proteins of unknown function. 25.00 25.00 33.90 65.80 24.60 23.60 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.16 0.71 -5.10 6 37 2009-01-15 18:05:59 2003-07-22 12:43:05 6 1 37 3 20 29 0 155.50 43 86.76 CHANGED lG.pl...pLcu.clhQs+hpE.psFplhl...ss+..pLspshhFpGRt.YYhPWIEI.shsP.LR........ptslEscLacFlhshLssuG+lFVpYhcD+EThptL.+GhsPAsTcLGFpLLKtGFTWFKsWYaPEGh.EGG.KlQApKPLoc-ccpRpLppLhcElK.phltph.sStlt ....hhhht.......s.sl+ps+hpE.hshpLhl....ssc..plspsphFpGRs.YYsPWlEl.sh.Phhp.........spslEcphacllhpaLsPGu+LFVpYlcDpETtptL.+GhsPs-T.LGhpLLpsGFTWFKDWYFPEGG.EGs.KLQusKPlst..th+pLp.lht-lc..p.h.........t.......... 0 7 10 16 +6386 PF06558 SecM Secretion monitor precursor protein (SecM) Moxon SJ anon Pfam-B_18197 (release 10.0) Family This family consists of several bacterial Secretion monitor precursor (SecM) proteins. SecM is known to regulate SecA expression. The eubacterial protein secretion machinery consists of a number of soluble and membrane associated components. One critical element is SecA ATPase, which acts as a molecular motor to promote protein secretion at translocation sites that consist of SecYE, the SecA receptor, and SecG and SecDFyajC proteins, which regulate SecA membrane cycling [1]. 25.00 25.00 28.50 28.50 24.80 24.70 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.94 0.71 -4.53 10 529 2009-01-15 18:05:59 2003-07-22 12:49:01 7 1 522 0 41 182 1 145.30 72 84.14 CHANGED MVAASLGLPsLssuupsss..Pupuoops+psSspssFssLALLcsssRRP..oFoVDYWHQHAIRTVIRHLSFAh.APQslPsupEs.......sPLQsQHLALLDTLsALLTQEupPPsllR...phspssFhP...usaplulWIuQlQGIRAGPptLS ..............MVAASLGLPALS.NAAEsNA...PAKATopNHpsS.........AKVNFuQLA.....LLEAssRRP.......Na.SVDYWHQHAIRTVIRHLSFAM...AP..QTLP..VAEEo........hPLQ.AQHLAL...LDTLSALLTQEG..TPsptsh....Rl-aAaFsP..QAp.FSoPlW.ISQAQGIRAGPQRL.... 0 1 9 24 +6387 PF06559 DCD 2'-deoxycytidine 5'-triphosphate deaminase (DCD) Moxon SJ anon Pfam-B_18211 (release 10.0) Family This family consists of several bacterial 2'-deoxycytidine 5'-triphosphate deaminase proteins (EC:3.5.4.13). 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.94 0.70 -6.07 17 532 2012-10-03 01:18:03 2003-07-22 12:52:18 6 3 521 4 109 933 819 232.00 47 87.95 CHANGED hstsh.....sGILP-csItshhusGtIssshshsssQIQPASLDLRLGspAaRVRASFLPGsuRoVs-RL.--hthHclDLocGAVLETGCVYlVPLhEuLuLPsslsAsANPKSSTGRLDlFTRlITDputtFDplsAGYsGPLYsEISPRTFslLVRsGsRLsQlRFRpGpshLs-s-LttlHtppsLssu..tsh.ssGlulSVDLtuptss.LVGYRAK+HoGVlDlD+hGsacst-FWEPlhspsup...LILDPGtFYILsS+EAVplPPshAAEMsPasshVGEFRVHYAGFFDPGFGpssAGGsGSRuVLEVRuHEsPFlLEHGQsVGRLVYE+MsstPssLYGtsluSNYQuQGLKLSKHFtu ..............................................................................p...........LsDpDIc.Ahl-.pG..t.luh...s.s.t.s...s...-pIpsAolDlRLGs..+...s..a.R.s....+....Au....F......L..s.....G.P.p.c.....p.V.us.t.L..-+..l....h..........c...E......I...s....L..s...-..G....t..s..L..c.s......Gplh.lss.......hLESlsLP.u-lsuhhsu+SShuRLsLhs+VhAc..............................................................................................................................................................................................................................................................................................................................h............................................... 0 27 64 85 +6388 PF06560 GPI Glucose-6-phosphate isomerase (GPI) Moxon SJ anon Pfam-B_18250 (release 10.0) Family This family consists of several bacterial and archaeal glucose-6-phosphate isomerase (GPI) proteins (EC:5.3.1.9). 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.26 0.71 -5.26 6 215 2012-10-10 13:59:34 2003-07-22 12:56:42 6 3 164 25 77 297 86 161.90 26 66.57 CHANGED VR+hpDhtulhhDEpsapth.E.hsc-ssY-Vp-htps-c-u-......LpaslThl.PGplGcEahhTpGHaHsp.....ss+PElY.slcGpushLLQp.P-s..........-ltVltscttsslhVPPhauHpolNsG-ssLlhsshhsuDsupcY-sIs+tuGhphhllcsGt..h.ppssss.c .......................................................h.tt..thh.s.............t.sphhYp.l.h.thht.ppctp......L.aslThl.hPGplGp.Eh...hhTpG.HaHsh.........hsps....ElY.slpGcuhhl.L.Qc...-s............csh..s..lpsptGshlhl..PPsauHp....oINs.G.cp.....s.Ls..hushhsps..huhc..Ytslpptthht.hh..h.h.ppGt............................................. 0 22 51 67 +6391 PF06563 DUF1125 Protein of unknown function (DUF1125) Moxon SJ anon Pfam-B_18065 (release 10.0) Family This family consists of several short Lactococcus lactis and bacteriophage proteins. The function of this family is unknown. 25.00 25.00 28.70 28.30 24.10 23.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.84 0.72 -4.27 2 19 2009-01-15 18:05:59 2003-07-22 13:02:46 6 1 16 0 3 7 0 54.60 66 81.59 CHANGED MTV.lKsphsphIlpFchGsDIEsFSsuFLa+KIKhhcIKNt.-L.hhLEDTKND ..MTV.lKsphsphIlpFchGsDIEsFSsuFLa+KIKhhcIKNt.-L.hhLEDTKND 0 1 1 1 +6392 PF06564 YhjQ YhjQ protein Moxon SJ anon Pfam-B_18260 (release 10.0) Family This family consists of several bacterial YhjQ proteins. The function of this family is unknown. However, the family does contain a P-loop sequence motif suggesting a nucleotide binding function. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.51 0.70 -5.01 9 638 2012-10-05 12:31:09 2003-07-22 13:44:36 7 3 570 0 80 5749 2006 228.30 57 95.90 CHANGED MslLuLQGlRGGlGTTSlsAALuWALQhLGEsVLVIDhSPDNLLRlpFNlDacppcGWARA.LDGpsWp-uuhRYsstLDLLPFGpLsssEhEs.pt.hpsshutaspslptLpttupYcWlLlDLPt...GhSslTRphlphsDpsLsllpsDANCHlRLHQQsLPsusclLlNchphuSQLQpDLaQLWL.QoppcLLPl.llHRDEAhuEsLAuKQPLGEYRsDSLAAEElhTLANWCLlpht......puss .................................................................................MAlLGLQGV.RGGVGTT..T...lTA.A.L..A....W.u.L..Q.h....L..G.......E....N.V..LV..l..D.....A.......s...P.....D.....N......L.........L.........R....L........S......F........N.......V........D.......F.......s..........H.....p.......p........G........W.......A........R..........u.....h......L.........D.......G......p.......D...........W.......R..........D.........A.....G......L.........R.........Y.......T......S...........Q....................L.....D.....L....L........P...............F.....G........Q..........L....o.......h......p.....E......p.........E.......N..............s........Qp...........W....Q.......p......c.......L.......u.......-........I.........s...o....u....L.........Q.....p..L........K......A.............S.......G...R......Y..p..W..I..Ll..D.L.Pt........s.uS...l.T...+.Q.L..........l..S.L.C...DH.oL..A....l...l.......p............V............D..............A.............N............C.............H............I..........R.........L............H...........Q............Q..........A.......L...........P..........s..........G.........A.........H..........I........L.....I........N.....s...........h.......R.......I....G.........S....Q....l...........Q.D...D......l.....Y....Q....l....W..L....Q...S.......Q.....R....R.........L......L.....P.....h......l.......I.H......RDE.A.M.A.ECLA.u.K.Q.Pl..GEYRSD.uL..AAEElLTLANWCLLp.uG..hKo....................................................................................................................................................................................................... 0 5 16 51 +6393 PF06565 DUF1126 Repeat of unknown function (DUF1126) Moxon SJ anon Pfam-B_18695 (release 10.0) Family This family consists of several eukaryote specific repeats of around 35 residues in length. The function of this family is unknown. 21.20 21.20 21.90 21.20 18.70 19.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.06 0.72 -4.47 100 901 2009-01-15 18:05:59 2003-07-22 13:48:32 7 10 125 2 576 907 19 33.10 40 13.93 CHANGED llpYaLsDDTlplhE.shscNS.Gh.stGpaLcRp+ .......hlpYaLpDDTlplhE.sh..hcNS..Gh.stGph.l+Rp+.... 0 273 335 466 +6394 PF06566 Chon_Sulph_att Chondroitin sulphate attachment domain Vella Briffa B anon Pfam-B_16515 (release 10.0) Domain This family represents the chondroitin sulphate attachment domain of vertebrate neural transmembrane proteoglycans that contain EGF modules. Evidence has been accumulated to support the idea that neural proteoglycans are involved in various cellular events including mitogenesis, differentiation, axonal outgrowth and synaptogenesis [1]. This domain contains several potential sites of chondroitin sulphate attachment, as well as potential sites of N-linked glycosylation [2]. 25.00 25.00 71.60 70.70 22.40 21.40 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.84 0.70 -4.74 3 37 2009-09-11 06:23:37 2003-07-22 13:53:27 6 4 24 0 17 45 0 225.00 74 43.01 CHANGED lPsPEAGSAl..........EAEssV+SVPAWEsRANDTREcAGsPAAG-DET....ShpEsGSEpAsVGPGVGPEEuLEASAAVTuTAWLEA-SPG..............LGGVTAEAG.SGDoQuLPATLPTPDEALGoSosSsAlPEATEA........SsP......PSPuPGDKPSLlPELPKESPlEVWLNLGGSTPDPp.............uPEPTaPhQGTLEPpPASDIIDIDYFEGLDGEGRGsDhG+FPGSPGTSE+HPDsGGETPSWSLLDLYDDFTPFDESDFYPTTSFY....DDLEE ............................................-suSAh..........EA-t.h+us.shEs+ANsTp-tsu.PsAG---s....ohpt.G..up....u...........hGPEEs.LpASAAVTuTAWLEs-oPG..............LGGsT.sEsG.SGDsQuLPATL.sPcEsLspSshPPAhPEATEA........SsP......PSPTPG.....DK.SP.....usELP....KESPLEVWLNLGGSTPDPp.............GPEPTaPFQGTLEPQPASDIIDIDYFEGLDGEGRGADLGSFPGSPGTSEpHPD..T-GETPSWSLLDLYDDFTPFDESDFYPTTSFY....DDL-E........ 0 1 1 3 +6395 PF06567 Neural_ProG_Cyt Neural chondroitin sulphate proteoglycan cytoplasmic domain Vella Briffa B anon Pfam-B_16515 (release 10.0) Family This family represents the C-terminal cytoplasmic domain of vertebrate neural chondroitin sulphate proteoglycans that contain EGF modules. Evidence has been accumulated to support the idea that neural proteoglycans are involved in various cellular events including mitogenesis, differentiation, axonal outgrowth and synaptogenesis [1]. This domain contains a number of potential sites of phosphorylation by protein kinase C [2]. 29.40 29.40 36.20 37.20 21.80 29.30 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.78 0.71 -3.98 2 68 2009-01-15 18:05:59 2003-07-22 13:57:31 6 5 29 0 34 83 0 77.70 59 21.61 CHANGED LYLLKTENoKLR+T.KaRTPSELHNDNFSLSTIAEGSHPN...........................DDPuAPHKlQDPLKstLK-EEshsI..ShuPc.EGuKG-.sshGVsCLpNNLs ...............LalLKTENsKL.R+psKaRs.sSEhHNDNFSLSTIAEGSHPN.................................pD-ssh...................p.php-shK.......s...K--tshsl..ShsPc.Est+.................................................. 0 1 4 10 +6396 PF06568 DUF1127 Domain of unknown function (DUF1127) Moxon SJ anon Pfam-B_18606 (release 10.0) Domain This family is found in several hypothetical bacterial proteins. In some cases it represents it represents the C-terminal region whereas in others it represents the whole sequence. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -7.85 0.72 -4.54 170 1368 2009-01-15 18:05:59 2003-07-22 13:57:49 6 3 640 0 307 914 205 40.10 32 58.60 CHANGED ts..lhpthtpa....pptR....pop.ppLspLo-+pLpDlGl.sR..u-lpp ..........................h.tlhpshpp.W...........pct+..............pst.p....tLpphoDcpLcDIGl.sR...p-l............ 0 40 125 194 +6397 PF06569 DUF1128 Protein of unknown function (DUF1128) Moxon SJ anon Pfam-B_18651 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 20.60 20.60 21.40 21.40 20.10 19.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.27 0.72 -4.25 15 417 2009-01-15 18:05:59 2003-07-22 14:07:06 6 1 417 0 40 132 0 68.40 50 95.73 CHANGED hSppopEslphMI-pI+cKLplVNhullcs-cFssspa-DLc-IY-hVM+K-oFSPSEMQAIs-ELGsLRK ........ht..spE....MVtuI+EKLphVNtGllcs-cacsss.E-LpDIY-aV.p.REphSPSEhpAIA-cLGpLR+.... 1 11 23 32 +6398 PF06570 DUF1129 Protein of unknown function (DUF1129) Moxon SJ anon Pfam-B_18737 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 29.10 29.10 29.20 29.10 28.90 29.00 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.24 0.70 -5.01 23 957 2012-10-01 22:34:14 2003-07-22 14:10:58 6 1 920 0 81 444 0 203.50 30 88.26 CHANGED cLTKKNp-alaphp+pL.hpsuho---hpthLp-hlPcll-sQKpGhTARpLaG.sPophspplhps.pptscssp............hhhh.................LDsuLhhlulhslhhGlhshFuptstssh.....GllTLllsuhsuGhshhhhh+alh......hc+scRsshhKthhhlshshllWhslashsuh.LPsslNPsLsshlhllIGulAhulRaal++KYsIpuu .......................Lop+Nppalh.h..ppph..htssh..o-p-.hctllpcllspllpsQp+GhoApsLaG.sPsphAcuhtpptt.h.pcpsc...........s.hh.hh..............................hDssLh...l...h.....u.l...h...u....llp.u.l....h....s..aF.sp...t.supsh........Gl..l...ol.lh.hu....l.l.uG.hsh.h.h..h.ahalhth.......shspSp+..s..sha..+tl.hlll.h..shh.ha.h.l.lFh.s...suh...L.s..ss.lN..s.h..L..s.s..hshh.Ilu...slhhsl.+.aal+++hsl.s....................................................................................................... 0 15 37 60 +6400 PF06572 DUF1131 Protein of unknown function (DUF1131) Moxon SJ anon Pfam-B_18811 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.60 20.60 23.00 21.10 18.20 17.00 hmmbuild --amino -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.93 0.71 -4.78 8 552 2009-09-14 12:02:21 2003-07-22 14:28:26 7 2 547 2 43 181 4 169.20 76 88.35 CHANGED SuhsWSuhuPaNWF...GS.ShEVT-QGVGsITAuTPLsEsAIscALsGDYRLRSGMcTssGplVpaFpAhKscpltlVIsG-.cGsVsRIDVhDscIsTssGVKIGTPFSDLYSKAFGsCppusuD.-ssuVECtAspSpHlSYlFoGcWuGPEsLMPsDDsLKsWcVSKIIW+p ................SSVNWSAANPWNWF...G..S..STcVSEQGVGcLTASTPLpEpAIADALDGDYRLRSG.MKTsNGN.VVRF.FEsM..KG.D..N..VAM.V.ING-..QGT.lSRIDVLDSDIPussGVKIGTP.FSDLYSK.A.F.G.NCQKAcG.D.Ds.puVECKAEGSQHISY.FS.G.EWSGPEGLMPSDDTLKN.W.KVSKIIWRR................... 0 4 12 26 +6401 PF06573 Churchill Churchill protein Moxon SJ, Bateman A anon Pfam-B_19061 (release 10.0) Family This family consists of several eukaryotic Churchill proteins. This protein contains a novel zinc binding region that mediates FGF signaling during neural development (unpublished obs Sheng G and Stern C). 25.00 25.00 36.20 35.90 18.20 17.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.71 0.71 -4.07 7 78 2009-01-15 18:05:59 2003-07-22 14:37:27 6 3 49 1 36 61 1 99.20 68 61.12 CHANGED MCssCVpcEYPDRGshCLEsGSaLhNFVGCupCspRDFVLIsN+.ssp--DsEEIlTY-HhCKNCcHVIAcHEYTFoVsDDaQEYTMLCLLCG+AEDolSlLPDDPRQ.sPLF ....MCGsCVcKEYPsRGNTCLENGSFLLNFsGCAsCsKRDFhLIoNK.Slc.EED..GEEIVTYD.HlCKNCHHVIARHEYTFSlhDEaQEYsMhChLCGp.u-DohSlhP-DPRp.t.L........................ 0 10 12 19 +6402 PF06574 FAD_syn Flavokinase; FAD synthetase Moxon SJ, Mistry J, Eddy S anon Pfam-B_18632 (release 10.0) Family This family corresponds to the N terminal domain of the bifunctional enzyme riboflavin kinase / FAD synthetase. These enzymes have both ATP:riboflavin 5'-phospho transferase and ATP:FMN-adenylyltransferase activity [1]. They catalyse the 5'-phosphorylation of riboflavin to FMN and the adenylylation of FMN to FAD [1]. This domain is thought to have the flavin mononucleotide (FMN) adenylyltransferase activity [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.74 0.71 -4.60 62 4652 2012-10-02 18:00:56 2003-07-22 15:00:27 7 15 4320 17 995 3311 2386 148.80 31 49.28 CHANGED h.tt.pssllslGsFDGVHhGHppllspshphA....pppsl.ssVhTF-P+Ppphhp............pptshc.LoshccKhchlpphG.lDhlhhlsFsppFuplouccFlpphLlppLps+tlllGhDFpFG+pppGshphLpphupths.apVphlssh..phsstclSSTt ....................................pssslsl.GsFDGVHhGHptllpphpc.hA.....................c..p..p..s..l............s.s..VhTFcPpPtp..hht.......................ppsshp....Lsshc-Khchl....tp..h..G......lD......hlhh.l..pFs...ppF...u...p...los.ppFlpphLh.tpL.p.s.+.hlllG.DFpFGp...p.+.p..Gs.hphLpph..ut.....t.....h....s..Fp.lph...ss.ph.....p...p.s.....+.l.SST........................................................................................................ 0 342 672 856 +6403 PF06575 DUF1132 Protein of unknown function (DUF1132) Moxon SJ anon Pfam-B_19091 (release 10.0) Family This family consists of several hypothetical proteins from Neisseria meningitidis. The function of this family is unknown. 25.00 25.00 83.70 83.50 22.20 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.39 0.72 -3.70 4 69 2009-01-15 18:05:59 2003-07-22 15:24:27 7 1 39 0 3 29 0 94.10 64 89.32 CHANGED hALYKYQPSSKYFGQSMAlIAppEF.EFs+ssKpasllEsFSaFLN+RlsHNIWKIYFSDESshaI+.....cS.csG+pl+cF.asEhsDs.ssFsslFs LALYKYQPSSKYFGQSMAlIAQpEF.EFs+.sKp..sVI-CFSaFhN+RlpHsIWhI.FSDpSphhI+......cS.csG+placF.asEhsDs.ssFsslF......... 0 3 3 3 +6404 PF06576 DUF1133 Protein of unknown function (DUF1133) Moxon SJ anon Pfam-B_19184 (release 10.0) Family This family consists of a number of hypothetical proteins from Escherichia coli O157:H7 and Salmonella typhi. The function of this family is unknown. 21.70 21.70 21.70 21.70 21.60 21.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.19 0.71 -5.09 4 284 2012-10-04 14:01:12 2003-07-22 15:29:15 6 3 180 0 2 194 0 164.10 76 95.54 CHANGED MI.PopsGKSGEhlRLpTLESlWIQGKLRMWGRWSYIGGGpuGNMFNpLLuStKlTKTAlNEALRRMKKuGIcKPELEAFhREhlsuKpKShLAaCoDsEuLpIDtVIupVLsc..asGLhullhpRYchR.hSKRpMAEpLpspHP-WshhTCcRRI-sWLulAEphLYsPMsDuFG .......................MIYPp.TGKS.GEHLRLpTLEuVWIQGKLRMWGRW....SYIG.GG....KoG.NMFNQLLsSKKLT.K...TAINEALRRMKKuGl-KPELEAFLR-MIsGKQK.SW..LuHCT.D.uEALhIDRV....IuEV.LAc..aPGLlslL+Q............RYcGRGMoK.............RKMAELL.N-..............s.H.P.............EW............shp............TCE+RIttWLtVAEahLYhPMh.uF.................................... 0 0 0 1 +6405 PF06577 DUF1134 Protein of unknown function (DUF1134) Moxon SJ anon Pfam-B_19217 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 27.80 27.80 28.00 27.80 27.30 27.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.96 0.71 -4.74 26 198 2009-01-15 18:05:59 2003-07-22 15:31:14 7 1 193 0 77 155 20 160.70 59 75.36 CHANGED ssoYsp-EllsuucsFFGpsotGLApllEcsFpchGpPNGYIlGcEuSGAhlsGLRYGEGsLapKssGc.++lYWQGPSlGaDhGGsuuRshhLVYNLssspsLYpRFsGlsGSAYlVuGhGhshLppssllLlPIRoGVGhRLGlNlGYLKaTpp.TWsPF .................ssoYot-ElVcuGHcFFGssStGLAsslE+AFppaG.h.PNGYILGEEGSGAFluGLpY.GEGTLYTKNA.G.c....++laWQGPSLGaDaGGpGoRsMhLVYNLsslpsLYpRauGVsGSAYlluGlGhssL+pssIVLVPIRo.GlGARLGlNlGYLKhotpPTWNPF............ 0 20 46 58 +6406 PF06578 YscK YOP proteins translocation protein K (YscK) Moxon SJ anon Pfam-B_19248 (release 10.0) Family This family consists of several YscK proteins. The function of this protein is unknown but it belongs to an operon involved in the secretion of Yop proteins across bacterial membranes. 20.60 20.60 20.60 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.59 0.71 -4.93 9 84 2009-09-10 17:11:24 2003-07-22 15:37:43 7 1 78 \N 13 49 2 201.50 43 94.20 CHANGED sulTsaQhRFCPAuYlccualPsphhhlLs.LPpWRspPslNthLL--huL....-suachPssLGuLALhPputLppLLthLGulLHGpAlRpslLupslpplhsllGp-GtRhLlpQh-lLIGsWPsGWQ+PLPpplc-shhEpsuLpF...WL.....uAsp-hs.sWtpRLsLRLs.ssssssW.lupppRsLApsLChKIAKQVsPQChHLLK ........................................hlTsaQLRFCPAtYlH.spLPuhh......h.p..lLshLPpWRc...s..u..tLNuhLL-cau.L....Dssac..pPpsL..GuLsLhPQutLE..tLLshLGhlLHG.pAlRpslLusslp.p.LhsllGp-shR.ll.Qt-LLIG.sW...PstW.QRsLPsph-pth....h.psGLsF...WL.....AAhpstspsWs+RLsL..RLshsss.stsW.l....sEppRsLApsLChKlsKQVhPpC.HLhK............................................................................... 1 3 5 6 +6407 PF06579 Ly-6_related Caenorhabditis elegans ly-6-related protein Moxon SJ anon Pfam-B_19267 (release 10.0) Family This family consists of several Caenorhabditis elegans specific ly-6-related HOT and ODR proteins. These proteins are involved in the olfactory system. Odr-2 mutants are known to be defective in the ability to chemotax to odorants that are recognised by the two AWC olfactory neurons. Odr-2 encodes a membrane-associated protein related to the Ly-6 superfamily of GPI-linked signaling proteins [1]. 21.00 21.00 22.20 21.30 20.20 19.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.94 0.71 -3.93 4 68 2009-01-15 18:05:59 2003-07-22 15:46:10 7 1 9 0 59 59 0 113.00 30 62.49 CHANGED CMS.hYpshapa..hp+hYpcPtsFospCcDsph-so.h.os.C..pohCVTlhps.cVhuG.hht+tYhRGChsslhh+GaNpo..+Tluhhpp.p.C+shotopLF.sstpc-..t.uclplCSChGshCNhS ........................CMS..Ypsha.....lpphYhhP+sFTcpCpc.pht.tu.hsss.C..pohCVolhEs.sl..............h.ut....h........htptaIRGChsclhhpGhNps..tThth.hpppp.Cpphptp..pLa...th.tp....pplplCoChsshCN................. 2 21 30 59 +6408 PF06580 His_kinase Histidine kinase Vella Briffa B anon Pfam-B_794 (release 10.0) Family This family represents a region within bacterial histidine kinase enzymes. Two-component signal transduction systems such as those mediated by histidine kinase are integral parts of bacterial cellular regulatory processes, and are used to regulate the expression of genes involved in virulence [1]. Members of this family often contain Pfam:PF02518 and/or Pfam:PF00672. 21.30 21.30 21.60 22.70 20.80 20.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.71 0.72 -4.06 82 7029 2009-01-15 18:05:59 2003-07-22 15:49:28 8 139 2576 0 1632 5611 368 82.50 32 15.92 CHANGED clchLpuQlsPHFLaNoLssIpthsphs..sppstphlhpLuchlR..hsl...psppphloLp-ElphlcsYltlpph..Rascclph..ph ...................clcsLpuQINPHFLFNoLssI..p..t..hh..ph..s...s......cc..spphlhpLSphhR..............hsL......pps............p...........ch.....lsLpcElphlcsYltlp.ph..Ras.c.+lphp.................... 0 783 1309 1483 +6409 PF06581 p31comet DUF1135; Mad1 and Cdc20-bound-Mad2 binding Moxon SJ anon Pfam-B_19462 (release 10.0) Family This family is involved in the cell-cycle surveillance mechanism called the spindle checkpoint. This mechanism monitors the proper bipolar attachment of sister chromatids to spindle microtubules and ensures the fidelity of chromosome segregation during mitosis. A key player in mitosis is Mad2, and Mad2 exhibits an unusual two-state behaviour. A Mad1-Mad2 core complex recruits cytosolic Mad2 to kinetochores through Mad2 dimerisation and converts Mad2 to a conformer amenable to Cdc20 binding. p31comet inactivates the checkpoint by binding to Mad1- or Cdc20-bound Mad2 in such a way as to stop Mad2 activation and to promote the dissociation of the Mad2-Cdc20 complex [1]. 24.10 24.10 24.20 25.50 23.80 24.00 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.64 0.70 -5.52 3 64 2009-12-03 16:30:54 2003-07-22 16:00:29 7 2 48 2 39 60 0 238.90 56 86.91 CHANGED SsAAAPsL-WYEKPEETHAsEV-.LETVhPPAQEPSNsuEPFCPRD.LVPVVFPGPVSQEsCCQFTCELLKHILYQRpQLPLPYEQLKHFYRK.VPQAEDss+KKsWhATEARNRKCQQALAELESVLSHLcDFFARTLVP+VLILLGGNALSPKEFYELDLSRLAPFGVDQGLNTAACLRRLFRAIFLADPFSELQTPPLMGTIVMVQGHRDCGEDWFQPKLNYRVPSRGHKLTVTLSCGRPSVPAMASEDYIWFQAPVTLKGFHE ................................................Lchh-cst-spsspl-.L-p...sspcs.ps.E.hpPcD.hVPVVFPGPVSQEGCCpFTCELLKHIhYQRQQLPLPYEQLKaFYRK...oP.....QA..E-hh+KK..spsssEso....sRKCQQsLuELESVL...SHL.E.shFAR.TL.VPRVLILLGGsALSPKEFYELDL..S.pLuPhS...h...-pSL.oTuAC........LR+LFRAlFhADsFSEL.QuPPLMGT...lVMsQ.GHRDCGEDWFRPKLNYRVPoRGHKLTVTL..S.....C....G...c.Pu.....l...............s......h.......A..h..EDYIWFQAPVTLKGF+E.............................. 0 7 9 20 +6410 PF06582 DUF1136 Repeat of unknown function (DUF1136) Moxon SJ anon Pfam-B_19544 (release 10.0) Repeat This family consists of several eukaryote specific repeats of unknown function. This repeat seems to always be found with Pfam:PF00047. 20.30 20.30 20.70 20.30 19.70 20.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -7.48 0.72 -4.17 78 617 2009-01-15 18:05:59 2003-07-22 16:05:55 7 56 46 0 370 869 0 29.10 36 5.14 CHANGED lhh-oQcPp.....uL..c+IppLEsspp...+...pps ....llh-oQHPp.....uL...c+IppLEsspt...t+....-........... 0 135 170 323 +6411 PF06583 Neogenin_C Neogenin_C-term; Neogenin C-terminus Vella Briffa B anon Pfam-B_16188 (release 10.0) Family This family represents the C-terminus of eukaryotic neogenin precursor proteins, which contains several potential phosphorylation sites [1]. Neogenin is a member of the N-CAM family of cell adhesion molecules (and therefore contains multiple copies of Pfam:PF00047 and Pfam:PF00041) and is closely related to the DCC tumour suppressor gene product - these proteins may play an integral role in regulating differentiation programmes and/or cell migration events within many adult and embryonic tissues [2]. 25.00 25.00 29.10 27.50 20.30 21.60 hmmbuild --amino -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.40 0.70 -4.86 10 213 2009-01-15 18:05:59 2003-07-22 16:44:25 7 31 76 1 91 194 0 242.30 41 21.17 CHANGED KDLKPPDLWIHHEchELKslDKSscssPsh.TDTPI.posQDlsPlssohpopsp..s+psSYpGtEoE-uhS.......SLAuRRuhRsK..MMhPhDoQPsps.VlSAhPl.oLDsspa....G.LsSPspuY..HtphShhshshssho...shst..pspusposssTPpsshlPsusupst......................................................................................................................s.ssEEssupSlPTA+lRP......oHPLKSFAVPulPsss..u.hpPtlP.oPlhsQ....................................suso.hstppVKTASlGphG+sRSP.hP...VoVPoAP-lhEsu.+hhEDo-s.......................................sYEsDELScEMAsLEGLMKDLNAITuu ...............................................................................................................................................................................................KDl+PPDLWIHHEchEhKsh-Kus.ssss.h....p-oPl..ps.s.QDl.ssss.pu.pop.p..p+ps...S.....apGt-sE-s..S........oLuuRRuhRsK..h......MhPhDuQs.s....ps...h......t.sh.sh..-pspa.....................Ptpta..h...s.......s....hho.....ht.............sp.ph.ss.pss..hhssspsp....................................................................................................................................stppssupolsouphRP......sHPL+SFusP...hlPssh...s..tstl...oPhhsp........................................tu.s.h.hp.VKTASlG.hhG...+.sRsP.hs....VsVPsAP-l..E..ps...+hh-Dsts.........................................saps--LopEMApLEGLMKpLNAITss....................... 0 20 28 56 +6412 PF06584 DIRP DIRP Studholme DJ, Guo, JH anon Pfam-B_2017 (release 9.0) Family DIRP (Domain in Rb-related Pathway) is postulated to be involved in the Rb-related pathway, which is encoded by multiple eukaryotic genomes and is present in proteins including lin-9 of Caenorhabditis elegans, aly of fruit fly and mustard weed. Studies of lin-9 and aly of fruit fly proteins containing DIRP suggest that this domain might be involved in development. Aly, lin-9, act in parallel to, or downstream of, activation of MAPK by the RTK-Ras signalling pathway. 21.90 21.90 22.20 37.30 20.70 19.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.40 0.72 -4.19 14 225 2009-01-15 18:05:59 2003-07-22 17:01:08 8 4 120 0 127 239 1 104.50 44 15.67 CHANGED FYSslD+shFp.cs-FtphL.cphs.h+sp+LTRsEWphIRpphGKPRRhSttFlpEERpcLpphRcplRph...pthp.pt...phshhcsL.....Pp-..lsh.PLslGp+VhAhh.t ..............FYSsID+....PhFp..c...s-Ft.hLtEp................h...P.L+.op...+LTRsEWshIRR.M....GK.....P.RR.hSsuFhcEERpcLcppRpplRtL.......Qpp+hss..........su.hcsL..................Pcc..lPh.PLslGs+VoApL+.s................................. 0 44 67 99 +6413 PF06585 JHBP Haemolymph juvenile hormone binding protein (JHBP) Moxon SJ anon Pfam-B_19686 (release 10.0) Family This family consists of several insect-specific haemolymph juvenile hormone binding proteins (JHBP). Juvenile hormone regulates embryogenesis, maintains the status quo of larval development and stimulates reproductive maturation in the adult insect. JH is transported from the sites of its synthesis to target tissues by a haemolymph carrier called juvenile hormone-binding protein (JHBP). JHBP protects the JH molecules from hydrolysis by non-specific esterases present in the insect haemolymph [1]. The crystal structure of the JHBP from Galleria mellonella shows an unusual fold consisting of a long alpha-helix wrapped in a much curved antiparallel beta-sheet. The folding pattern for this structure closely resembles that found in some tandem-repeat mammalian lipid-binding and bactericidal permeability-increasing proteins, with a similar organisation of the major cavity and a disulfide bond linking the long helix and the beta-sheet. It would appear that JHBP forms two cavities, only one of which, the one near the N- and C-termini, binds the hormone; binding induces a conformational change, of unknown significance [1]. This family now includes DUF233, Pfam:PF03027. 27.40 27.40 27.70 27.50 26.40 27.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.35 0.70 -5.56 92 924 2009-01-15 18:05:59 2003-07-23 12:02:26 6 11 65 12 627 1020 0 226.30 17 89.17 CHANGED Mphhhhhh.........hlhhhhhshtttt....................................st.hptCphs.........hspCltpshpp.hh.phss.G..lPch...slssl-Plplsplplpt.....tshshphshpshplhGhss....hplpch....phchp.phphphphthP.plphpG.cYphpGp.llh.......lslpGpGphphshpshphphphphphht..t...ssp..pahpl..pphchphc.l.sphphphpNLFss.sptLspshsphlNpshptlhpphps...slpcshsphhhshhsclhpph.............Phcclh ...............................................................................hhhhh.h....................................................s..ht...Ctht...............hspClhpshpt.hh..htp.G........lPph..slssl-P.hh...l........s.p...hphp......ttsshthphsh.pshplh.Gh.ss............hplpph.............phshp..ph...p.....h..p..h..phphP..p.lph.p...u.p...Y.phpGp..lhh............lslp.upGp.hphsh.ps.hphphp.hphphhp...t.....................ts..pahpl.....pph.phphp..l.tph.p..hph..p.sl.hs.t...sp...l.s.p..hhsphlN....pshptl....h....pphps.....ltpthsp.h.hhs.hhs.phhpphshpph........................................................................ 0 184 250 520 +6414 PF06586 TraK TraK protein Moxon SJ anon Pfam-B_19687 (release 10.0) Family This family consists of several TraK proteins from Escherichia coli, Salmonella typhi and Salmonella typhimurium. TraK is known to be essential for pilus assembly but its exact role in this process is unknown [1]. 26.00 26.00 26.00 26.10 24.50 25.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.35 0.70 -5.23 26 481 2009-01-15 18:05:59 2003-07-23 12:08:45 6 7 366 0 71 334 15 227.20 29 80.82 CHANGED M...........hhsllhusslhussh..........tstuspslshss............suphslslSssshNRlslssD..+Is.........slsssputh.....p.pc....psssGslhlp.......shsstshThalpTccGhs..aSltlsPcsssu.colhls..............shstsppcs...tsaEssssY.pptlsplspuhhs....Gphss.Gaphtslspps.......................h......hstsLphp.ttsasGsphpuhhac..lcNtsspslslpEpcFhp.....sust....Alhlspt..pLtPGppsplal .............................................................................h.hhs.........h.....................huspslshss.......................................GsphslslSsss.NhhslP.s-..clh..........................................slss..s.sth.......t.pp.........psss.G.sllls..........ossppP.hohhlp..sc..p..G..h..s...hSlphlP.+.c.ssu.+slpLs...........................ssht.sssccA......ssW..E.s.u.....s.PY...pshlhsl.pultt.......GclPs.Gat.h.ss.pcs..........................ht.........ssssl..shh..tsthas..Gsc.hphhhat...VcNts.pslplpEpsFap.....sush......AVh.hpp..tpLhsGtchclYV.................................................................. 0 19 37 55 +6415 PF06587 DUF1137 Protein of unknown function (DUF1137) Moxon SJ anon Pfam-B_20097 (release 10.0) Family This family consists of several hypothetical proteins specific to Chlamydia species. The function of this family is unknown. 20.10 20.10 21.60 179.40 17.00 19.40 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.87 0.71 -4.57 3 37 2009-01-15 18:05:59 2003-07-23 12:11:45 6 1 36 0 4 15 4 159.60 70 98.01 CHANGED MTKFLFaGLFCSLuLLllACsThVAIIKVDsICDVSCMNK.HFpcAPPFLKIKKLGV+KQIsSPE+QFFaC+IDKSCMELHFSsoSYuCKElLS+LSGHIpTQshEKtMpFRGNGGLLNYQDsSLsVYDCRFpVDPl..asoPDAE-Eh....AsGGMKTLSLSLL MTKFLFHGIWClVVLlLCAClTALAVVKMGcFTNPTLVHQDssTPAPPFLKIKKLGVRKRIISPEKQhFYCTIDKSCMELHFSNTSLHCRELLSHLTGsLQTETsERAMFFRGTGGLLNYKDYSLSVYNCCFSINs....ssP-uscEh....AEGGMKVLSLSLL. 0 1 1 3 +6416 PF06588 Muskelin_N Muskelin N-terminus Moxon SJ anon Pfam-B_20299 (release 10.0) Family This family represents the N-terminal region of muskelin and is found in conjunction with several Pfam:PF01344 repeats. Muskelin is an intracellular, kelch repeat protein that is needed in cell-spreading responses to the matrix adhesion molecule, thrombospondin-1 [1]. 19.60 19.60 19.70 19.70 19.40 19.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.37 0.71 -4.63 4 139 2012-10-03 19:46:52 2003-07-23 12:22:06 6 16 96 0 85 129 2 172.00 54 26.47 CHANGED .pchLsYpIapaSSYSusYlPpNILlDsPpDtsSRWSspT......Ns..QYllLKLc+sAlVpsITFGKacKsHVCNlKKF+VaGGh-pcpMh.LLpuGLKNDsshETFsLphKp.p...p.hsspYlKIVPLhuWGssFNaSIWYVEL+GlDDs.hlpsph+.hshhhEtcul+hCL+aFRppGah-hapsLpcpoplplEHs ..................................phLsatlaphS.SaSss.YlP.c.......NILVDpPsDQoSR.WSopo...........................N...PP.QaLlLKLc+PAIVpsITFGKYEKoHVCNlKKFKVaGGh..s.-..ENM.............sELL.puGLKND..stETFsLKHcl-..............p.....phFPsRaIKI...VPLh.....SWGP.SFNFSIWYVELpGI-DPc.lVp....sslphYspaREpEAI.RL....CLKHFRQ.psYh-AFcuLQ+pTpltLEHP......................................... 2 40 44 63 +6417 PF06589 CRA Circumsporozoite-related antigen (CRA) Moxon SJ anon Pfam-B_19386 (release 10.0) Family This family consists of several circumsporozoite-related antigen (CRA) or exported protein-1 (EXP1) sequences found specifically in Plasmodium species. The function of this family is unknown. 22.20 22.20 22.90 25.00 21.40 21.80 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.81 0.71 -4.91 3 21 2009-01-15 18:05:59 2003-07-23 12:28:07 6 1 8 0 7 21 1 134.10 69 98.50 CHANGED MKILSllFL.LFslILsN-ALG-N...sNGchGSGNspKKpsKptSGEPLIDVHDLISDMVRKEEELVclTK+KSsYKLAToVLAoALGVVSAVLLGGAGLVhYNTEKGRHPFpIGuSKsGDuA.scssS.sssEPoP...uP....p-ssPsAophcDssLsSGsEu ....MKILSVFFLALFF.IIFNKESLAEK......TNKGTGSGVSSKK.....KNKKGSGEPLIDV....HD.....LISDMIKKEEELVEVNKRKSKYKLATSVLAGLLGVVSTVLLGGVGLVLYNTEKGRHPFKIGSSDPAtst..................................................... 0 2 3 6 +6418 PF06590 PerB PerB protein Moxon SJ anon Pfam-B_19494 (release 10.0) Family This family consists of several PerB or BfpV proteins found specifically in Escherichia coli. PerB is thought to play a role in regulating the expression of BfpA [1]. 25.00 25.00 127.20 127.10 21.90 19.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.89 0.71 -3.85 2 14 2009-09-11 14:43:35 2003-07-23 12:35:24 6 1 4 0 0 12 0 102.70 94 100.00 CHANGED MKNNLREEKEVVhDGChNVLSLPStWKAITPKKNNsTSEIIVFFIPPKASYHIILKYspTKHCELFFSDHITGEQDlIYSQsAFFSHVINHIIALVDVLNKKSYASNVIKFLITMEGGGDILSESKRAP MKNNLREEKEVVFDGCMNVLSLPSGWKAITPKKNNsTSEIIVLFIPPKASYHIILKYNKTKHCELFFS................................................................ 0 0 0 0 +6419 PF06591 Phage_T4_Ndd T4-like phage nuclear disruption protein (Ndd) Moxon SJ anon Pfam-B_19553 (release 10.0) Family This family consists of several nuclear disruption (Ndd) proteins from T4-like phages. Early in a bacteriophage T4 infection, the phage ndd gene causes the rapid destruction of the structure of the Escherichia coli nucleoid. The targets of Ndd action may be the chromosomal sequences that determine the structure of the nucleoid [1]. 25.00 25.00 26.10 25.30 21.00 20.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.00 0.71 -4.64 2 31 2009-01-15 18:05:59 2003-07-23 13:29:45 6 1 28 0 0 26 0 148.50 58 98.82 CHANGED .KYhThpDL.ssGupVlusl+sGEa..Go..pK-hhS+.GFYFhV.up.DhR......VuARFaVGpQRSKQGhsAlLSHIRQtRSQLARThusNNl.YsVhalsAppMKPLTTGaGKGQLALAFTRNHpSEYQTLpEMNRhLADNF+FlLQuY ..............................pYMTlpDLpsAGATsIGslK........sGEa.hhGsP.pKD.....I.LocPGFYFlV.uchsu.....ssVuARFYVG.NQRSKQGFsuVLSHIRpRRSQLARTIA..sNs..lsY.sVaYlPASKMKPLTTGFG..KGQLALAFTRNHpS-YQTLEEMNRMLADNFKFlLQAY.... 0 0 0 0 +6420 PF06592 DUF1138 Protein of unknown function (DUF1138) Moxon SJ anon Pfam-B_19518 (release 10.0) Family This family consists of several hypothetical short plant proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 25.00 25.00 25.50 25.50 20.30 19.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.38 0.72 -3.97 5 64 2009-01-15 18:05:59 2003-07-23 13:30:54 8 1 30 0 35 50 0 72.60 62 80.72 CHANGED uKYIIGuLsGSFAlAYVCD+aIADcKlFG..............GTTP+TVosKEWGpATDEKFQAWPRTAGPPVVMNPISRQNFI.VK ...........sKYIluuLlGSFAlAYlCDhhlu-KKlFG..............GTTP+TVosKEWapsTDc.KFQ.AWPRT.AGP..PVVMNPISRQNFI.VK........ 0 4 18 27 +6421 PF06593 RBDV_coat Raspberry bushy dwarf virus coat protein Moxon SJ anon Pfam-B_19630 (release 10.0) Family This family consists of several Raspberry bushy dwarf virus coat proteins. 25.00 25.00 25.20 405.60 23.80 17.10 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.60 0.70 -5.17 2 18 2009-01-15 18:05:59 2003-07-23 13:33:28 6 1 3 0 0 17 0 272.80 94 100.00 CHANGED MuKKAVPP.VKAQYELYNRKLNRAIKVSGsQKKLDASFVGFSEuSNPtTGKPHADMSMSAKVpRVNTWLKNFDREYW-NQFASKPlPRPAKQVLKGSSSKSQQRDEGEVVFTRKDSQKSVRTVSYWVCTPEKSMKPLKYKEDENVVEVTFNDLsAQKAGDKLVSILLEINVVGGAVDDKGRVAVLEKDAAVTVDYLLGSPYEAINLVSGLNKINFRSMTDVVDSIPSLLNERKVCVFQNDDSSSFYIRKWANFLQEVSAVLPVGTGKSSTIVLT MSKKAVPPIVKAQYELYNRKLNRAIKVSGsQKKLDASFVGFSESSNPETGKPHADMSMSAKVKRVNTWLKNFDREYWDNQFASKPlPRPAKQVLKGSSSK......SQQRDEGEVVFTRKDSQKSVRTVSYWVCTPEKSMKPLKYKEDENVVEVTFNDLsAQKAGDKLVSILLEINVVGGAVDDKGRVAVLEKDAAVTVDYLLGSPYEAINLVSGLNKINFRSMTDVVDSIPSLLNERKVCVFQNDDSSSFYIRKWANFLQEVSAVLPVGTGKSSTIVLT. 0 0 0 0 +6422 PF06594 HCBP_related HCBP_repeat; Haemolysin-type calcium binding protein related domain Moxon SJ anon Pfam-B_20041 (release 10.0) Domain This family consists of a number of bacteria specific domains which are found in haemolysin-type calcium binding proteins. This family is found in conjunction with Pfam:PF00353 and is often found in multiple copies. 21.40 21.40 21.60 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.93 0.72 -4.32 116 953 2009-01-15 18:05:59 2003-07-23 13:37:28 6 179 153 0 316 1046 112 43.60 28 9.86 CHANGED D..Lllplsso......sDplplpshasss........shtl-plpFADGTsWstssl ......................DLllplt..ss........sDplplpsaFpss...............shpl-plpF.uD.G..oshshsp.............. 0 121 261 274 +6423 PF06595 BDV_P24 Borna disease virus P24 protein Moxon SJ anon Pfam-B_20092 (release 10.0) Family This family consists of several Borna disease virus (BDV) P24 proteins. The function of this family is unknown. 25.00 25.00 36.00 36.00 19.70 19.50 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.41 0.71 -4.55 3 112 2009-01-15 18:05:59 2003-07-23 13:42:16 6 1 11 0 0 86 0 143.90 73 99.99 CHANGED MATRPuSLV-SLEDEEDPQTLRRERSGSP.RPRK.VPRNALTQPVDQLL+DLRKNPSMISDPDQR.TGREQLSNDELIKKLVTEL...AENSMIEAEEVRGTLGDISARIEAGFESLSALQVETIQTAQRCDHSDSIRILGENIKILDRSMKTMMETMKLMMEKVDLLYASTAVGTS.APMLPSHPAPPRIYPpLPuuQssD...phDIIP ..................................................LLcpl+KNPSMISD.DQR.TGREQLSNDELIKpLVTEL...AENSMIEAEplRGoLuDIuARlEuGFESLSuLQVETIQssQ+sDaSDSI+lLGENIKILDRSMKTMMETMKLMMEKlDLLYuosAlGss.APMhPSHPuPs+lYPpL........................... 0 0 0 0 +6424 PF06596 PsbX Photosystem II reaction centre X protein (PsbX) Moxon SJ anon Pfam-B_20149 (release 10.0) Family This family consists of several photosystem II reaction centre X protein (PsbX) sequences from both prokaryotes and eukaryotes. 25.00 25.00 31.00 31.00 20.30 19.70 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.75 0.72 -4.28 22 156 2009-01-15 18:05:59 2003-07-23 13:45:41 6 2 118 15 61 149 93 38.30 48 46.53 CHANGED MTPSLu.NFl.SLlhGuhlVVlPlssALlhlSQpD+lpRs ....hTPSLp.NFLhSlluGu.lVllsIssAllhVSphD.VcRp. 0 17 43 56 +6425 PF06597 Clostridium_P47 Clostridium P-47 protein Moxon SJ anon Pfam-B_20156 (release 10.0) Family This family consists of several P-47 proteins from various Clostridium species as well as two related sequences from Pseudomonas putida. The function of this family is unknown. 19.30 19.30 19.30 19.30 18.30 17.50 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.56 0.70 -5.97 12 113 2010-01-04 16:41:40 2003-07-23 13:50:17 6 2 34 0 16 91 2 408.90 26 80.93 CHANGED ThGWDhVauhshphVNct.hphppts.spF..S..hucttplpGsFssWpIhsG......Gsup.lplthPltp...Gsht.hpssshslsGhssslplcLsah.....P.....t.s..otsssphpl.s..hpss.tpps.............t.....tss..........h.pshhptllhphl......pNhcphsalFuolNls..h.pt.phpWhpPs.hpYAYts.sss.psuhLGlLuhsssRsh...u.pLppplDsshlsssupsGhLIScplFlcsllLPsLspsF.ssustssFclhspssp.......hpltsstplsltslpsuuhhYpPhlspFslp.lpsshlphphps+s-ls.Glshhhpshshpshphhhpsssppplsap.spsP...phopcsahsshslIsthlhuslsthlhts................Itutltstls...................s.hs.hlphsss.lpWss...shhslosssLssshphpG .............................................................ThGWDhVhusshcslNct..hch...ts.......tcF.....sh..pspthph....p..GpFssWplhsG......GsGpplRlKhPIKs.....uhhp......h.....ps....sshshs..s.s..s....h..lplc.Lsah.....................s....p..p.s.....s....ts....s...phph.h.h.psppsp.......pcs.....................t...............p.hhptlhtphh........pNhpphshIFusl.ls...t...chpWhpPsthpYuas......s..ss...........s...........s......ssuhLulLshlss+sh...o.p.ppslDss.hl....s.p....so..p.s.....uhlIScphFlcphlLPs.L.spth.pthssssFplhsp.sp.......htlpNsp....pl...sht..slp...s...s...s...h.hh...P...hl...s..p..hplp..lps..shlhlphp.......st.s..cls...Ghpthhphhp..p...hph.htsssppphsac...hpps....phsppst.s.hphl..th.lhuslsthhhts....................ht..s.h.h.l...............................s..h......l.as....p..php.h.h.s..................................................................................................................... 0 1 7 12 +6426 PF06598 Chlorovi_GP_rpt Chlorovirus glycoprotein repeat Moxon SJ anon Pfam-B_19883 (release 10.0) Repeat This family consists of s number of repeats found in Chlorovirus glycoproteins. The function of this family is unknown. 21.00 21.00 24.40 22.00 20.40 20.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.64 0.72 -4.10 36 321 2009-01-15 18:05:59 2003-07-23 13:53:39 6 11 7 0 0 321 19 34.10 45 25.53 CHANGED aFlGNGohLoulsss....lsustshDIhGNVsus.uNV .aFhGNGotLTGlsss....lsushshD...I.h.GNVhus.uNV.... 0 0 0 0 +6427 PF06599 DUF1139 Protein of unknown function (DUF1139) Moxon SJ anon Pfam-B_20355 (release 10.0) Family This family consists of several hypothetical Fijivirus proteins of unknown function. 25.00 25.00 26.10 25.90 21.20 21.20 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.05 0.70 -5.25 3 19 2009-01-15 18:05:59 2003-07-23 13:56:02 6 1 7 0 1 19 0 275.40 53 99.24 CHANGED M-psps-EYuSY.pStslEFDPpcPpINLVNsDFDEsDYsDL-VlLLosDasclpslAl.RIKNAP-YTsEIFEclDslssFs-LFDoEIlEcWsDhDsFhDLRls-sEs-FEhlSShLT+HhQollsspPslLWoolspLAKpSVIQus-D.FsllNYWchMNRRWEhIs-ELRlsFlFRAFcLKusQhtpVS+ILSsSLhFPGLNLIGK+ShIPMhoNaSIPEYLDHWFPTDDYcSDNYLpFIRFsElssscWKKIVVQaYLRpVFS..+VRTKl.......LIAssDVDaWYoLFMRTLIF+SMl+TKsLIKslLNa ...................................MsYshuscYuSh.+SssLEFDPpDPElNLlNQ-FDEsDYpDL-VN.LS-DLSslNLlAT.RIKNuP-YTsEIF-ShDsP.PFApLl-pEIu-EWCDhsNFhDLRlV-sEs-FEFVSSHITcHLLIlLNSNPNlLWTussLLsKlSLlQcssN.F-llNYWcAMsRRW-lIsD-LKhGFVFRAFsLKuNQFElloKLLSDSLhasGIslIGK.SMlPMlTlHSIs-YlDHWF.T-sapSDNFhSFI+htpITVPKWKKlVVQFYLRQlFS..RsRTpV.......LhApsDlDaWYslFM+TLlFKSMh+TKphlKplLN........ 0 0 0 1 +6428 PF06600 DUF1140 Protein of unknown function (DUF1140) Moxon SJ anon Pfam-B_20379 (release 10.0) Family This family consists of several short, hypothetical phage and bacterial proteins. The function of this family is unknown. 25.00 25.00 25.30 74.20 22.00 21.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.29 0.72 -3.91 3 56 2009-01-15 18:05:59 2003-07-23 13:57:53 6 1 51 0 5 32 0 100.70 57 95.67 CHANGED MTsEDIVpsYpclILKcIFKEIpp.hKoKERA-ls+pKlAEoGhuVRTSRHWKAsuNlEFYI+EhpctLcQLtELDRpa+WScKLHQDRapFVoKYscVLEEY..RptN ......-llppYpshlLKhIhhcIpp.hKpKE+A-lst.chAEsGs..sVRTStaWKusuNhEFYhpEh.cthsthtElDR.hpWSc+LHQ-phKFVpKY.clhEcY..RpuN..... 0 2 4 4 +6429 PF06601 Orthopox_F6 Orthopoxvirus F6 protein Moxon SJ anon Pfam-B_20433 (release 10.0) Family This family consists of several Orthopoxvirus F6L proteins the function of which are unknown. 21.30 21.30 21.30 38.70 21.00 19.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.53 0.72 -4.05 2 33 2009-01-15 18:05:59 2003-07-23 14:00:38 7 2 20 0 0 17 0 71.80 88 96.03 CHANGED MSKILTFVKNKIIDLIpNDQIKYSRVIhIEESDSLLsVDEVaANHGFDCVEMIDENI.NENlEQYKT-SFhp MSKILTFVKNKIIDLI.....NND.QIKYSRVIMIEESDSLLPVDEVHANHGFDCVEMIDENI.NENlEQYKT-SFhp...... 0 0 0 0 +6430 PF06602 Myotub-related Myotubularin-like phosphatase domain Vella Briffa B, Bateman A anon Pfam-B_795 (release 10.0) Domain This family represents the phosphatase domain within eukaryotic myotubularin-related proteins. Myotubularin is a dual-specific lipid phosphatase that dephosphorylates phosphatidylinositol 3-phosphate and phosphatidylinositol (3,5)-bi-phosphate [1]. Mutations in gene encoding myotubularin-related proteins have been associated with disease [2]. 21.40 21.40 21.40 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.10 0.70 -5.68 24 1877 2012-10-02 20:12:17 2003-07-23 14:09:10 9 82 275 6 1093 1689 23 293.90 29 40.75 CHANGED pLaAFtapsttst.t.p......ua........................phhchhpEacRh..........................................................Gls..spt........WRloslNcsYplCsoYPttllVPpsloDp...tLtpsupFRuppRlPVloahH.cstAsIsRsSQP....LlGh..spRstc............DE+ll.psl..................................................htspspscchhIhD.........................sRsphsAhAN+ApGuGhEspstY..................pschhFhsI...tNIHshRpShp+l...................hcss.....hptss.spalosL-sotWLpHlpslLpuushlsptl...c.cpsSVLVHCSDGWDRTsQlsSLApLhLDPYYRTlcGFpsLlEKEWluFGH+FucRsGHh................................................................tspsspphSPlFlQFLDCVaQlhcQFPsuFEFsEtFLlplhcHhaSCpFGTFLsN.sE+ERtcp ...........................................................................................................................................................................................................................................................th...............................p..t-hp.R...................................................................shs.....................W..+l....o.th...Np...p.a....p..............l...........s.oYPth...........lhVPt..th...s....-p........l...t..s.uthh...............pttRhP....................s..ls.........a...................h......p....t..s.t.....s.sl..h.R.....s..u...p.............su......t.ps.p.............................................--ph.l....thh.................................................................................................................................................................................................p...hl...hD........................................................................................t+s....h.t.t..............t....h.h.u..c...............h.......................................................................................hp...h...ht.h...........sl...+.....hp....p....ohp.pl...................................................hphs......................s.......tp.ah.....p..tlc.........po..t....W.L..p.h.....lp........hLptuh.l..sp.hl...............p...tt...sVl.lp........p....p.-G.hD.h..T...s.p.l...su.LsplhL......D.P.a...a........RT....l..cGF.............s......L...l-+-WlshGH.FtpR..hs.ph...........................................................................................................................................................................................................................t.t..p.pp.uP.l.......F........h.FlD.sVaQ...............l..h.pQ.......aP.ttFE....Fsp.ha........L..hh.lh.ph...........hus.auoFLhs.sttpR...h............................................................................................................... 0 353 476 753 +6431 PF06603 UpxZ DUF1141; UpxZ family of transcription anti-terminator antagonists Moxon SJ anon Pfam-B_19606 (release 10.0) Family The UpxZ family of proteins acts to inhibit transcription of heterologous capsular polysaccharide loci in Bacteroides species by interfering with the action of the UpxY family of transcription anti-terminators. As antagonists of polysaccharide locus-specific UpxY transcription anti-terminators, the UpxZ proteins exert a hierarchical level of regulation, insuring that only one of the multiple phase-variable capsular polysaccharide loci per cell characteristic of this genus is transcribed at a time. 21.20 21.20 22.40 21.80 20.30 20.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.29 0.72 -4.02 13 133 2009-01-15 18:05:59 2003-07-23 14:15:26 6 1 25 0 19 121 0 104.90 51 69.86 CHANGED plpsLQpsAHELLYLGhDGuPIYoDcFspLNpEVhppsssLasp..+GsTsEEEAsLCLALLMGYNATlYs.pGDKEp+hQslL-RsasVL-pLPASLLKspLLThCYG ..............hpsLpphAH-LlYLGhDGuPIYoDchspLNpEVacpsssLYsp..+GsTsEEEAslCLALLMGYNAohYs.pG-KEp+lQplLcRsaslL-pLPASLLKspLLThCYG....... 0 5 11 19 +6433 PF06605 Prophage_tail DUF1142; Prophage endopeptidase tail Vella Briffa B anon Pfam-B_16284 (release 10.0) Family This family is of prophage tail proteins that are probably acting as endopeptidases. 27.70 27.70 28.00 27.70 27.40 27.60 hmmbuild -o /dev/null --hand HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.75 0.70 -5.51 23 808 2009-09-16 13:04:28 2003-07-23 14:19:08 6 22 565 1 80 610 5 344.80 18 55.62 CHANGED hs.plNsptsloFolhpscpNtpsashlsp..cphlha.....sspcYhIpphstps.Gpsht.hplpApH.lh.-lpsphlhpph..........sGs.........................holcshhc..hshpsoG..aoYpltss.hsshphp...shGs.pstL-lhppllcpa.GsEapsss+plp.lh..cplG.ppsshhlRataNhsslshphDssslhTtI+GYGc......................................................................................................................................................................................................................................................................................................................tppsstsphh.....h.hcYpSP.A.ch..aGh......+husslpD-RhTstssLcpthKpplp..csPchSlslsh.phpp...hhth.pcstlGDhlhlls-s...lGlshclRlVuhpp.hshssph...clshush......ppshsclhsplspsh+thp ......................................................................................................................................h...t.Nttt.lshsh....st..p.........phl.p....t.hl.h.......ptp.p......Y..hI.h.hs.p...tu.pt....lplpApc..hh.c.hp.p.p.hh.....sph.................sss........................................................hohpphhp......h..s...p...tos.....h.p.a.p.hh..ss...hp.p.h.p.hp.........phsp..pstl..-hh......p......phlppa....s..t.-....h.........h......h.........s.........s.....p..........p....l.......t....hh..p.p.....h....u.pc......ss..h..h..lphthNh..pslp.hphsspplhTtI..h..sa...G.p...........................................................................................................................................................................................................................................................................................................................................................................h..s.s.t..p..st......s.tht............h.h...p.......a.p.s....s.s...ch....aGh.............................h.u............s..h...p.-.pc...h...p..s...t...c..s.......Lh....ch..s.cp...plp........sp....Pp..hoh...sls..hh.l.t...........hsh.pphphGDsVh.lhpcp....hG..hs.s..plRllchpp.....pshhst....h....pplslush.................pcshh.p.h.p.thtp......p................................................................................................................................................................................................................. 0 36 62 77 +6435 PF06607 Prokineticin Prokineticin Moxon SJ anon Pfam-B_19802 (release 10.0) Family This family consists of several prokineticin proteins and related BM8 sequences. The suprachiasmatic nucleus (SCN) controls the circadian rhythm of physiological and behavioural processes in mammals. It has been shown that prokineticin 2 (PK2), a cysteine-rich secreted protein, functions as an output molecule from the SCN circadian clock. PK2 messenger RNA is rhythmically expressed in the SCN, and the phase of PK2 rhythm is responsive to light entrainment. Molecular and genetic studies have revealed that PK2 is a gene that is controlled by a circadian clock [1]. 21.30 21.30 21.50 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.47 0.72 -3.89 7 187 2009-01-15 18:05:59 2003-07-23 14:30:20 6 5 59 2 84 172 0 94.80 39 70.82 CHANGED M+.phhphslLL....Lll........ssucuAVITGAC-+DhQCGsGhCCAVSlWlRulRhCTPhGppG--CHPhSHK.....................VPa.GKRhHHTCPCLPsLsCs+hsss+Y+Ch .................................................................hhh....hhh...........s.....hh.l....oGuC-.+DspC....Gs.G....h.CCA...h..S.hWh+.u..l..phCsPhGppG-pCH..Pho+K........................l..P..a.....s.......c..R.ha..ppCPChPsLhCpph..................... 0 26 29 41 +6436 PF06608 DUF1143 Protein of unknown function (DUF1143) Moxon SJ anon Pfam-B_19953 (release 10.0) Family This family consists of several hypothetical mammalian proteins (from mouse and human). The function of this family is unknown. 20.40 20.40 20.40 20.40 20.20 18.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.21 2 50 2009-01-15 18:05:59 2003-07-23 14:32:26 6 2 35 0 31 49 0 132.60 55 82.63 CHANGED hAspC.hspphhLQRQNLsCaLpNPHhGSlI.ADGHGEVWTDWNsMSKFhQYGWRCTTNENuYSNRTLhGNWNQERYDL+NIVpPKPLPSQFGHhFETTYDssYspKhP.STHRFKREPHhFPGHQPELDPP+YKCTtKSTYMssYScs .................................h.hahtNPph.GSllpAsGHGEVWTDaNshSKFhQYGWRCTTNEssY.SN+TLhGNWNQERYDl+slV.QPKPLP..SQ.FuHYFETTYD..oSY.N....s.K...hP.S..........T..H...RF......K....REPHhFPGHQPELDP..PpaKs..T..t.+SshM.sYs........................................................ 0 10 13 17 +6437 PF06609 TRI12 Fungal trichothecene efflux pump (TRI12) Moxon SJ anon Pfam-B_19969 (release 10.0) Family This family consists of several fungal specific trichothecene efflux pump proteins. Many of the genes involved in trichothecene toxin biosynthesis in Fusarium sporotrichioides are present within a gene cluster.It has been suggested that TRI12 may play a role in F. sporotrichioides self-protection against trichothecenes [1]. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 599 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.97 0.70 -6.33 2 214 2012-10-03 03:33:39 2003-07-23 14:37:16 8 8 86 0 151 7876 315 457.40 22 86.41 CHANGED MTssV.EcGlDLESQPDDRhRApALAToAsELP-GYYpSPRllASFAuFShNVsATYFVLQASASALPNILQDlGQSENpuLFSTLWThGQAVSILhMGRLTDRFGRRPFVIhTHIlGLVGAIVGCTAsKFNTLLAAMThLGVAAGPAGuSPLFlGELMSNKTKFLGLLhVShPslshs.huPYhGQRLuIQGsWRWIFYIYIIhSsIAVhLIllWYaPPSFtQLHGKKspKR-ELAKlDWIGIhLV.sGsSLFLLGVSWGGpPNsPWNSuKlIGLhoSGhGoLVlFALYEVaGKP.pPhlPPuLFKDTRGFVCILlISSIMGuMpLsLsIhYPQQVlNIFGSSLKNWpETAWMoATASFGThAGVhlLGslFHLlRHIRWQILVGAhWLTAFLGAMSSlNRDNKNuAIALShhoGFVVuWAQDITMLhVQFITTDEsLGVAFuVVAAuRPFhGSIFTAAFISlYoNpYP+EluoHLoSAhRGTshPQuSFsSLLEAApoGRh-AVpALPGMTspIuuVVSpAMADSYTASYANVYYFAMALGVIPIIASLCMRDhDpYLTDHVPHQlYDRKpAcKDVL-GsS-o.sSPhIhShs-hc. ......................................................................................................................................h...............................................................................................................................................................................................................................................................................................................h.....h.....s......h..........h..................h.....s...........u...h.........h..............h....h....G.p.......l..o..D..h..h.G.R.+............h..h....l...h..s....t....h...l..h.l..l..G...........I...l...s..s..s......A........p...s.........h..........s.....t....h...l..u...u.......s...h............h.......G...h........u....s......G.......h......t..........h....h..........h........h......h.......u.....-.....h...........h...........s................+......h......+...........h.........h.......s......h.....h........h......h.........h.....s...................l.....h..................................h.....u.....s......h.......h.........u.........p..........h.........h.....s.........h.....t.........s.........s..........W.....R..a........h......h.....h..h...h....h..........l.....h...s.......s......l...u.....h....l....h......h......h..............h..........a..........Y..............a............P..............P..........s.....................h...................p..........h............p..........s........p.......t..........h.......p.........p.............t................p.....................l.......t............c........l........D.....a........l........G...h..h...L......h..h..s...G.l.s....L..FL.l....G....l....................s......a..G..G..............t......................s......W..s.S...u.+.l..l..u...........l.l.....h....G..h.......s..h..L..l.s..F....s..l...a....E.......h....a.......s...t.................p....p.............P.....h......h.....P.......p...L...h.p.......p............R....s......h...s....h....h....l....l.....l......s....h...l.......G......h..........h...h..s....h.......h....h...a..P....p....t.h.....h...s...l..a..s.....p...s...............................h....s..h...h.......s...h.....s...........h....s.h......u....h.h..h.u..s....h....l..............h....h......l....h.....p.......h....h....t...c...h......+......h...........h...l.......h..u...s......h...h..h...s...s...h...h..G...s....h....u...s..h..p.....p..s........t...h...h....h....u.h........h...h.....s....s.........h..s...h...u.h....h...............s........h....h......s....p...h......h....s....s......t.....p.................l..u...h..s.......u..l..h....h.......s..........R.........h....h....G....u....l....h.h.s....h....a...........s..l.h........s.......t.h...............h...s....t.........l....s.............s...h...h................s...h.s...........t...........h............h...h..t......h.......h.............s....................h........l........s.u.h.s.......t.......h...t...hh.......u....h.t........uas.u...h.......s..ahh..s.hs.h.uhl..shh.sshhh.....s...t...............................................hh............................................................................................................................................................................................................................................................. 0 41 82 125 +6438 PF06610 DUF1144 Protein of unknown function (DUF1144) Moxon SJ anon Pfam-B_20026 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 32.00 32.00 21.40 19.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.76 0.71 -4.43 14 617 2009-09-10 16:34:19 2003-07-23 14:39:04 8 1 613 0 40 182 3 138.80 78 95.31 CHANGED Mh..pss.phRsAsADTFAMVVaCFlsGMhIElhlSGMoFEQSLuSRLLSIPVNIhIAWPYGhaRDahlRput+luss+.ah+sluDllAYVoFQSPVYAsILhsVGAshcQIlTAVoSNhVVShhMGlsYGYFLDhCRRhFRVs ....MF...SPQSRLR.HAVADTFAMVVYCSVVNMhIElFLSGMSFEQShhSRLVAIPVNILIAWPYGhYRDLhMRsARKlSPo.G.WhKNLADlLAYVTFQSPVYlA.ILLsVGADWHQIhAAVSSNIVVSMLMGAVYGYFLDYCRRLFKVS............ 0 4 10 25 +6439 PF06611 DUF1145 Protein of unknown function (DUF1145) Moxon SJ anon Pfam-B_20029 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.60 20.60 21.60 20.90 18.50 18.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.76 0.72 -4.34 33 759 2009-01-15 18:05:59 2003-07-23 14:41:03 7 1 737 0 88 256 38 60.30 54 67.24 CHANGED hllsLGKhlhLhhWhhllhNLlhPFssslslhlslhhshhllMHslQlllhpush.tc..s ......MLINlGRLLMLsVWuF.LILNL..VpPFP+PLN..IFVNVALlFhlLMHGhQLALLKSTlPKDu......... 0 8 25 56 +6440 PF06612 DUF1146 Protein of unknown function (DUF1146) Moxon SJ anon Pfam-B_20141 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 28.30 27.60 24.20 23.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.15 0.72 -4.05 27 920 2009-01-15 18:05:59 2003-07-23 14:47:08 6 1 916 0 83 368 1 47.40 37 64.26 CHANGED LhFIsluaWALpul.+h-phh+ps..+shQsplLhlhloIulGhhVSsFFL ......llhIhluaauLpul.+h-phhKps..pstpl+lLhlFluIslGahVSsFhl.......... 0 18 43 62 +6441 PF06613 KorB_C KorB C-terminal beta-barrel domain Bateman A anon Pfam-B_20369 (release 10.0) Domain This family consists of several KorB transcriptional repressor proteins. The korB gene is a major regulatory element in the replication and maintenance of broad host-range plasmid RK2. It negatively controls the replication gene trfA, the host-lethal determinants kilA and kilB, and the korA-korB operon [1]. This beta-barrel domain is found at the C-terminus of KorB. 25.00 25.00 26.50 25.50 24.40 23.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.74 0.72 -4.52 4 67 2012-10-01 19:11:18 2003-07-23 15:26:40 6 3 57 6 10 53 6 59.80 61 17.01 CHANGED uDPD+hKKsll.VEHDsRsAtllLNRRPsstGhhal+YEDsGt-hEs-hGslKlshLhEu ....sDPDKLKKAIlQVcHD...sR.PARLlLNRRPsu-GaAWLKYEDDGpEFEADLusVp.LVALlEG. 0 1 5 8 +6442 PF06614 Neuromodulin Neuromodulin Moxon SJ anon Pfam-B_20438 (release 10.0) Family This family consists of several neuromodulin (Axonal membrane protein GAP-43) sequences and is found in conjunction with Pfam:PF00612. GAP-43 is a neuronal calmodulin-binding phosphoprotein that is concentrated in growth cones and pre-synaptic terminals [1]. 25.00 25.00 51.40 51.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.32 0.71 -4.33 3 57 2009-01-15 18:05:59 2003-07-23 16:13:59 6 3 36 0 20 47 0 160.20 64 70.86 CHANGED sKKDEuPuu-uVENKcGEAoTATEAosA-ouKsDEPoKDG.........SoPoEEKKGpGuuDsuoEQPAPQAsss...SEEKsASA.ETESATKASTDNSPS.KAD-APsKEEsKKADVP.hlTshAsTTPAAEDATAKAssQPppETuESSQsEEKpDAVEETKPoESAQQEEuKEEEuKADQENA ...........................pKKDEuP.s.A-GVEKK.GEGsss.TEAAPAsGsKs-E.s.uKAG.........EoPSEEK.KGEG...DsuoEQsAPQAsAs...SEEKA..GSA...ETESATKASTDNSPSSKAEDAPAKEEPKQADVP.AVTsA..AATTPAAEDAAAKATAQPPTETuESSQuEEch.-AV-ETKPpESApQ-EsKtEEscADQEpA................... 0 1 1 5 +6443 PF06615 DUF1147 Protein of unknown function (DUF1147) Moxon SJ anon Pfam-B_20563 (release 10.0) Family This family consists of several short Circovirus proteins of unknown function. 25.00 25.00 32.70 53.20 20.20 19.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.99 0.72 -4.14 3 8 2009-01-15 18:05:59 2003-07-23 16:21:12 6 1 6 0 0 6 0 54.50 98 100.00 CHANGED MYTSLWGHLGVVKANGLLILQTRKPHTGNHLETSGGMVTMVKKWLLLMTFMAGCRGMIY MYTSLWGHLGVVKANGLLILQTRKPHTGNHLETSGGMVTMVKKWLLLMTFMAGCRGMIY 0 0 0 0 +6444 PF06616 BsuBI_PstI_RE BsuBI/PstI restriction endonuclease C-terminus Vella Briffa B anon Pfam-B_16289 (release 10.0) Family This family represents the C-terminus of bacterial enzymes similar to type II restriction endonucleases BsuBI and PstI (EC:3.1.21.4). The enzymes of the BsuBI restriction/modification (R/M) system recognise the target sequence 5'CTGCAG and are functionally identical with those of the PstI R/M system [1]. 25.00 25.00 30.50 30.40 23.20 22.80 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.64 0.70 -5.62 22 135 2012-10-11 20:44:44 2003-07-23 16:25:22 6 3 130 2 34 117 7 280.90 40 82.08 CHANGED EutpIhhp.uhstp.ps....EpSALs..............s-upp.hhshsshhshtcpthu+.YAsNTREolRcpTl+phl.ushAl........s-psshPssSs+ssYplpsphhsL.........LcuhsoshhpcpLsthhhpRhsLlpphA.spttspIsVphPsGcphpLuPG.pS.LhKsllEpFAPRFhss.stllalu-o......GsKhshhDcpLhpp.LGlslcuctchPDllLh..tcsah......lhlEsVsScGPlstcR+ppLtpLhpsu...pssllaVTAF.sRut..hp+hls-lAWcT.VWhAspPsHLI+h.sGtphltPap .....................Atpllhpluhsttpps....-puAlshLAlhslp.tptW....ssA......ps.....hlshp.s.lhsahccta.........s.+..........YAsNoREThR+pohHQFhtAslsl.ss-cPs+ss.NSPpssYplpsthlsl.........L+sas......ost....ac..ppLs..saltp+tsLhppaApp+ptsplPVplssGpphpLSPGtpspLh+ulIE-FAPRFsss.uhllYlGDT......upKhshh-tthLtt.LGlslss.HsK...hPDVVLa.ps+sWL......lLlEuVTScGPVss+R+tELtcLhtsu...suGll.aVTAF.sRst...h...p+alu-lAWETEVWlA-tPsHhIHa.NGs+FLGP.......... 0 9 24 27 +6445 PF06617 M-inducer_phosp M-phase inducer phosphatase Vella Briffa B anon Pfam-B_16267 (release 10.0) Family This family represents a region within eukaryotic M-phase inducer phosphatases (EC:3.1.3.48), which also contain the Pfam:PF00581 domain. These proteins are involved in the control of mitosis [1]. 21.70 21.70 22.70 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.95 0.70 -5.32 8 226 2009-01-15 18:05:59 2003-07-23 16:31:08 8 5 44 1 76 243 0 186.80 32 45.42 CHANGED LDSPushDsptspE.................hslpphpShsQ+LLGsSPAh+p.osSsuLDp..hp.tD..th.sssENKENEua.FKhP.h+.h....hpuhh.htpt+-hhsQRpsSAP.hMh.Ss.p+h-l...E-ushhhLuoPpo..ssspsss....cED......DGFlDhL-uEpLc--.tsPsu........MtsLhoAPLVhphpc.ptt...hhsRs.tLaRSPShPsSlsRPhLKRlERspDc-sPspsKRR+Ssos..ptc.tcsppP+p...pl...hp.t.SLCchsIEsl .........................hDpsu.hDsp...p.....................htt.psh..cLhtpSPs.h..opspu.ct........s.....th..s..spsKEN...-s..hK...........................pp.hhpp..ut..........o.s......p..p.....tt.p.h.hh..p.....ssstt.p........................tED......pGF..h-hhps-.l....cs.-....st.........h..LhssPhh.............t.....................s....+s....tLacSPSh.sp....sh...Rs...hLKR..-+.p-ps..P.psK++ps...s.............p....ps.p.p....h............S.sp.pIpph............................................................. 0 4 7 25 +6446 PF06618 DUF1148 Protein of unknown function (DUF1148) Moxon SJ anon Pfam-B_20595 (release 10.0) Family This family consists of several Maize streak virus proteins of unknown function. 25.00 25.00 248.80 248.60 21.10 17.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.68 0.71 -4.13 2 13 2009-01-15 18:05:59 2003-07-23 16:45:23 6 1 13 0 0 6 0 114.00 99 100.00 CHANGED MRSHTPPEGTVIMVVPtCWSVWIWRDGLSALLPALFSPAEEGFFLETRLFQFESSPRFLLDVDMADCLSRAWNKWPGIALTWWCWYIRDRVAPIHSLSTKYpLLRGSALASLQN MRSHTPPEGTVIMVVPACWSVWIWRDGLSALLPALFSPAEEGFFLETRLFQFESSPRFLLDVDMADCLSRAWNKWPGIALTWWCWYIRDRVAPIHSLSTKYQLLRGSALASLQN. 0 0 0 0 +6447 PF06619 DUF1149 Protein of unknown function (DUF1149) Moxon SJ anon Pfam-B_20513 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 32.30 32.00 21.00 17.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.73 0.71 -4.21 12 561 2009-01-15 18:05:59 2003-07-23 16:47:41 6 1 557 5 40 153 0 123.10 52 97.03 CHANGED MplhR-pEFVspYHaDsRNhtWEcENGsPETslcVsFQLlcp-cttp.......sTsllslLpFhIVhDc..FVISGhISQhs+lhsRllscPsEhsQ-ElcpLutPLl-hlcRLTYEVTEIALDcPGlsLEF ............................Mpl+R-.pEFVspYHaDsRNhtWEpENGsPETcl-VsFQLlp..c..-pENp.......sTullllLsFhIVFDc...FVISGsISQlNHIpsRlls-sSElsQEEVEpLuRPhlshlpRLTYEVTEIALDhPGlNLEF........ 0 5 14 24 +6448 PF06620 DUF1150 Protein of unknown function (DUF1150) Moxon SJ anon Pfam-B_20612 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 26.10 36.90 24.30 17.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.35 0.72 -4.16 48 249 2009-01-15 18:05:59 2003-07-23 17:04:49 6 1 246 0 83 175 52 74.80 38 89.73 CHANGED sphs.t...thussplsYVRslpss-Lspch........splsssppLaAlHuA-GpplALscDRchAFshAppp-hpPVoVH ........h..ho.pphApLGtGclAYVRplcs--lspphPt.....hs.lss.u.hpLaALauAsGpPIsLsDsRpsAhtsAtpc-LpsVolH... 0 21 51 61 +6449 PF06621 SIM_C Single-minded protein C-terminus Moxon SJ anon Pfam-B_21144 (release 10.0) Family This family represents the C-terminal region of the eukaryotic single-minded (SIM) protein. Drosophila single-minded acts as a positive master gene regulator in central nervous system midline formation. There are two homologues in mammals: SIM1 and SIM2, which are members of the basic-helix-loop-helix PAS family of transcription factors. SIM1 and SIM2 are novel heterodimerisation partners for ARNT in vitro, and they may function both as positive and negative transcriptional regulators in vivo, during embryogenesis and in the adult organism [1]. SIM2 is thought to contribute to some specific Down syndrome phenotypes [2]. This family is found in conjunction with a Pfam:PF00989 domain and associated Pfam:PF00785 motif. 21.50 21.50 21.50 21.50 21.40 21.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.76 0.70 -4.83 5 112 2009-09-10 21:39:22 2003-07-24 10:21:55 7 9 43 0 54 89 0 243.70 41 40.29 CHANGED SPosTu.......sEsRKuuKSRso+sKoKsRTSPYP..QYSuFpsDRSESDQDSsWGGSPLTDoASPQLh-ss-csuo....SCsYRpYSDPtSLCYG.FPL..DcHsLScc+sHhHocsC-...uusCEuuRYFLGTPQuGREsWWcsARSlLPLsKSSPENtcua.ElousHtA.lHslcuRGHWDEDSAVSSsPDu..uGSsS-SGDRa+s-pa+SSPpEPSKhETLIRATQQMIK......EEEsRLQh+KhPs-hsLAspsuLuKuap.......spasQushsusVCRusu.s.......................s..spSPssLSRLSS..PpP..............................Dclo+Ss .................oshsT........p-sRKhsKs+.op.K..oK.R....Ts.PYP..QYSuFph-+..Es.s.p.upWtuS..PhsssAuPQ..p.t...scsus......hhh.paS...ShpYG.FsL..Dpphhsp.c+..hh.sp....hsp...Gu.CEsuRaFLuT..su.tEs.Wt.upshlP..op.usscsh.t...sshs+hs..s.phcu...uth.t...cc..s.s..s..Ps.....ssut.cptstht.t.http..........p..s.t......shhts....phht...........sph...h.................................................................................................................................................................................................................................................................. 0 2 6 20 +6450 PF06622 SepQ SepQ protein Moxon SJ anon Pfam-B_21060 (release 10.0) Family This family consists of several enterobacterial SepQ proteins from Escherichia coli and Citrobacter rodentium. The function of this family is unclear. 22.70 22.70 22.90 23.70 22.40 22.60 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.06 0.70 -5.53 2 115 2009-01-15 18:05:59 2003-07-24 10:27:28 6 2 105 0 1 46 0 275.20 84 98.26 CHANGED Mp.LNSQ.NMKINDFYLPLLsVIGhGRLYITsEGHACHAYFREVSGNGhRFTLshSGYEGpFWlSEEQhhQWCpELFPYS-SRLIPEDhIKLMlLWshpssLPEsDsSVDDVQFThLNKDlYPVIENNNGcNRLNVIILchTVQSLQYLIN-NWQhVPHSNTlFFDGYIAPGWTDYPlTcLolGDSLRLYHVDDSpERcCWLVINNPLATVKLsDNNL.lsDVQAAD.LChISNEsVMsRIYCsIGTIpVDIHhLRNhKKDDhIsSsGYHLFGGsRLIRNNThIAYGoIlKINEDFY.slSlVCD .................Mp.LsSQLNMKINDFYLPLLPVIGTGRLYITScGHACHAYFREVSGHGIRFTLTYSGYEGRFWISEEQFIQWCQELFPYSESRLIPEDhIKLMILWV...MQTALPE.GDVSVDD...VQFTMLNKDlYPVIENNNGENRLNVIILETTVQSLQYLINDNW......QFVPHSNsLFFDGYIsPGWTD...YPVTEL...pVGDSLRLYHVDDSpERcCWlV.INsPLATVpLsDNN.L..lsDV..AADLLsslSNEsVMsRIYCsIGTlHlDIHMLRNsKKDDIIsSsGYHLFGGCpLIRNNTTIAYGSIVKINEDFYFTVSlVCD..................................................... 1 0 0 1 +6451 PF06623 MHC_I_C MHC_I C-terminus Moxon SJ anon Pfam-B_21327 (release 10.0) Family This family represents the C-terminal region of the MHC class I antigen. The family is found in conjunction with Pfam:PF00129 and Pfam:PF00047. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.72 0.72 -6.86 0.72 -4.47 14 3480 2009-01-15 18:05:59 2003-07-24 10:35:40 6 6 87 0 199 3309 0 27.80 76 7.99 CHANGED NoGGKGGsYs.AsupDSuQuSDVSLsssK .........S.SGtKG....GSYSQ........A.ASSDSAQGSDVSLTA............. 0 71 71 72 +6452 PF06624 RAMP4 Ribosome associated membrane protein RAMP4 Moxon SJ anon Pfam-B_20959 (release 10.0) Family This family consists of several ribosome associated membrane protein RAMP4 (or SERP1) sequences. Stabilisation of membrane proteins in response to stress involves the concerted action of a rescue unit in the ER membrane comprised of SERP1/RAMP4, other components of the translocon, and molecular chaperones in the ER [1]. 21.00 21.00 21.00 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.85 0.72 -4.43 16 291 2009-01-15 18:05:59 2003-07-24 10:42:24 7 7 191 0 175 245 1 60.90 43 70.61 CHANGED MuusQRM+htNcpaspNlspRGNVPKSh+sp.-cKaPVGPhLLuLFlFVVCGSAlFQIIppIph ..............................sppRh.+.h.uNcK.a.s.K..N.l.T...p.R...GsVscop...+p.....p...p...p...chPV.GPhLLu...lFlFVVsGS...ulFQIIpshp............ 0 57 96 143 +6453 PF06625 DUF1151 Protein of unknown function (DUF1151) Moxon SJ anon Pfam-B_21020 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 25.00 25.00 31.80 31.00 19.70 19.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.79 0.71 -4.38 9 174 2009-01-15 18:05:59 2003-07-24 10:44:28 6 3 74 0 98 151 0 114.10 50 72.76 CHANGED hucPshl-ssssELIhP+KLlNPstsSts+Q-LHRELLhNpKR.GLslppKPELQ+VhEKRKccpllcppc..EEpptp+.....o-LEpELh+RpQ+L...EphEhcptp.pEEpcp.....PEFl+V+upLR+ .................AcP-Yh-tsss.ELI+P.+KLlNPVKsSRsHQ-LHRELLMNpKR...GLuhpsKPELQ+VhE+R+RsQl..lKp+c...EEtpt++.........osLEtE.Lh+RQQ+L...-QLE.pptt.p..EEpcs........PEFl+V+tNLRR................... 0 15 29 60 +6454 PF06626 DUF1152 Protein of unknown function (DUF1152) Moxon SJ anon Pfam-B_21029 (release 10.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 20.60 20.60 20.70 21.80 20.50 20.30 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.90 0.70 -5.30 14 77 2009-01-15 18:05:59 2003-07-24 10:46:02 7 3 71 0 40 74 6 273.70 29 79.54 CHANGED LllulGGGGDVVuAhslsthltp...G.csslGslsWERhVhDPhPGPlshsplpp.sptlsctlhlssscoashRGGRhhhsQsspsucsLspp.V..ahlD.hcGsptlscuLpchhth.thDtllGVDsGGDlLApGsE-sLtSPLADulsLAsLs+lc....puhLtVhG.GuDGELsh-YllpRlu-lA+pGGhluhhGlscpssclLccllchssTEAStlslt.Ah+GcaG.hpIRsGsRpVhlsshuslhFhhDPpslhphsphA+ll.csotol-EAsctLp.phGlhTElchEcsL..Apt .......................................hhhuhGGGGDlhsuhhhhthh.t...t....pshluslsW-Rhh..DPhPG...Ph.hpphpt..h..lsttlhhls.pshs.t...ssh.hhsphsp..hhp..hht.....hhhlsspsGs.....psltculcphhpphthDtllhVDsGGDlLucGsEpsLtS...PL...tDuloLAuls.plpt......sshlsl.hG.GsD..G..E..LstshlLppluclspt..sGhLuhhul..sp.........p..s......s......p......h......l......cp....ll.......p...p.sso....E.A....Stl...s.......ht...Ah+Gh.aGph.....pl..Rsus..p..plhlsslsulhahhD.pt.l.hpts.hhphl..psotslc-Apphlp.thsh.oEhphE..h...t..................... 1 19 28 35 +6455 PF06627 DUF1153 Protein of unknown function (DUF1153) Moxon SJ anon Pfam-B_21038 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 23.50 23.50 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.98 0.72 -4.12 22 282 2012-10-04 14:01:12 2003-07-24 10:47:34 6 1 259 2 92 204 26 84.70 58 93.15 CHANGED Mah+KhpGP+tVsLPDGoshTpADLPPssT+RWVApRKAuVVcAVttGLlot-EAhcRYsLS-EEFtuWtpAlscHG.suL+sT.plQcYR ...................psp.+hVIGPDGoPLTlADLPPssT+RWVlRRKAEVVAAVRGGLLSl-EACpRYsLTsEEF.hSWQpuI-cHGhsGLRsTRIQpYR.............. 0 24 58 71 +6456 PF06628 Catalase-rel Catalase-related immune-responsive Vella Briffa B, Coggill P anon Pfam-B_16304 (release 10.0) Family This family represents a small conserved region within catalase enzymes (EC:1.11.1.6). All members also contain the Catalase family, Pfam:PF00199 domain. Catalase decomposes hydrogen peroxide into water and oxygen, serving to protect cells from its toxic effects [1]. This domain carries the immune-responsive amphipathic octa-peptide that is recognised by T cells [2]. 20.90 20.90 20.90 21.50 20.50 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.86 0.72 -4.11 343 4316 2009-01-15 18:05:59 2003-07-24 10:55:15 7 11 2870 287 1207 3539 63 67.50 25 12.26 CHANGED chctsp..D.pasQssthap.sh..sssE+p+llsshstpL....upsspt....lppRhls.hhtpsDsshuppVA.cuLs ..................p.tpt.-.pasQstthap.sh....sst....-....+p+.l.....hsshutpL....spls.ct......IppR.hls.hhtcs.DsshuptVAcsLt........... 0 326 674 986 +6457 PF06629 MipA MltA-interacting protein MipA Moxon SJ anon Pfam-B_8359 (release 9.0) Family This family consists of several bacterial MltA-interacting protein (MipA) like sequences. As well as interacting with the membrane-bound lytic transglycosylase MltA, MipA is known to bind to PBP1B, a bifunctional murein transglycosylase/transpeptidase. MipA is considered to be a structural protein mediating the assembly of MltA to PBP1B into a complex [1]. 21.00 21.00 21.00 22.80 20.90 20.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.07 0.70 -4.94 16 1671 2012-10-03 17:14:37 2003-07-24 11:34:39 7 3 1070 0 339 1085 89 223.70 28 84.76 CHANGED phslGs...uushssc..h.uu.cchpltshPhlshptts....hhhcssshuhtlhsssshpluhssthshstt.....csssptt........ptlss.................+cushssGlthth.hs.hplpsphp..tsl.ssscGhpsslshshshplu..phplssuhulsatsppasptYaGVssppuAtSsLsp.YsusuG.hsssluhsspYhls-phshtstsshoRLtusstcSPlVccchphshhhu.......hsYpF ........................................................................................................................................t.holGu...G..s.uhspp....acs....c..p.s..s.h...s..l..P..l..l.s..Ycu-s...............Fah.c.s.......h.....s........h.....G..a..h.L....h.....p..s..s.....s.....s..p.....lul..s.uh....a....ssh.th...........................cs.sc..ss.ctth..........................ptL-c.........................................Rc.u..oh..h.uG.l.u...h......t...a.....h...p......p.....a.G....h....l.c..s.s..hs.........sDs..h....s...s..u...s......G.......h....s.......h.......c.......h.u.hh...a.p.h.p....hu............sh......s...l.oPulGl..pasocs..hscYYYG.VoppE.u...s......+.........S........G.....l.....tu...Y.....s...s...s.s..u...as...sh..lp.lous.Y....s..hs....tc.....W.....s.....lhusupYo..+L.s.s-.l.s.D.SPh...V-...+....sh...sh...hh...ssG...loYcF.................................... 0 55 144 243 +6458 PF06630 Exonuc_VIII Enterobacterial exodeoxyribonuclease VIII Moxon SJ anon Pfam-B_11449 (release 9.0) Family This family consists of several Enterobacterial exodeoxyribonuclease VIII proteins. 25.00 25.00 28.90 28.40 24.60 24.60 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.14 0.71 -4.95 4 316 2009-01-15 18:05:59 2003-07-24 11:35:34 6 4 227 0 12 263 0 174.40 44 23.76 CHANGED huhh.hhhscKA+Kp.G..suhhWsoschEuss.ApLshhllcuGhc.sDahKsVtsNhPVVN-LPPEGphDhTFCp+YpLu.cDuhTahhIPGs..PuosAtDps....sssssTp...sGEDhTE....EEN..h.lS........stpLPlRahs.H.....hTasupDth.hhHlsRApc..tlTsLthspcsShlpsL .......................................................shhs.hhscKs+Kp.G.KsuhhWsosch.Ssu.uphshhhscuGhc.....cDahKsVtsNhPVVNDLPPEG.hDhpFCsRYphu.cDuhThhhIs.hs..ss.s.ss.t-ps.......................ssss.NTs....hsuEDhoE............hE-s...h.lS.....................stpLPlRahs.H.....hT.asupDth.thHlspspc.ssVTALthspcsShLpsL................................................................................................ 0 1 2 7 +6459 PF06631 DUF1154 Protein of unknown function (DUF1154) Vella Briffa B anon Pfam-B_16329 (release 10.0) Family This family represents a small conserved region of unknown function within eukaryotic phospholipase C (EC:3.1.4.3). All members also contain Pfam:PF00387 and Pfam:PF00388. 19.80 19.80 19.90 19.90 19.30 19.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.33 0.72 -4.30 11 207 2009-01-15 18:05:59 2003-07-24 11:38:13 6 15 89 0 109 225 1 45.30 44 4.24 CHANGED +c..shsPlslEsL+ppKuahKlhKKQQKEL-oLKKKHsKE+suhQK ........h......ltshslE-LKQpKualKlh...KKQpKELcsL+KK.HtKcpsshp....... 0 23 31 68 +6460 PF06632 XRCC4 DNA double-strand break repair and V(D)J recombination protein XRCC4 Moxon SJ anon Pfam-B_21077 (release 10.0) Family This family consists of several eukaryotic DNA double-strand break repair and V(D)J recombination protein XRCC4 sequences. In the non-homologous end joining pathway of DNA double-strand break repair, the ligation step is catalysed by a complex of XRCC4 and DNA ligase IV. It is thought that XRCC4 and ligase IV are essential for alignment-based gap filling, as well as for final ligation of the breaks [1]. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.14 0.70 -5.59 5 165 2010-01-05 15:47:52 2003-07-24 11:41:38 7 2 131 30 88 172 0 255.40 26 82.27 CHANGED MERKVSRIsLoSEPslsaFLQVuWEKTLGSGFVITLTDGHSAWTGcVSESDISQEADDMAME+-KYVDELRKALVuuAGPA.....ssYsFsF.....SKEopaFSaEKsLKDVSFRLGSFcL-KVsNPAEVIRELICYCLDTIAEpQAKNEHLQKENERLLRDWNDlQGRFEKCVsuKEALEsDLYpRFILVLNEKKAKIRSL+.chLsElQEhEK..slKpKpETo.......ssSDposDcDulYD...GSTDEEsEuPsss.........................StpsPAsls+DDSLlSS.DlsDIAPSRKRRQRMQKNLGTEPKhAoQEpQhQEKEKsssusPposKK-tsSAEsMSLETLRNS.SP-DLFD ...............................................................................................................................................h.-s.s..sa.sphp.t.lt......ttt.t....tp...aht.htphh...t.ss............apht...............sp.p...ph.h.hc...K.ph..p..s.l....ph....RL..G.s.h.pLppstss.sp.hlp.........-.lhsas...lsshsc.ps..csp...c...L..pp....cs...-...+...L........p...-h...s...clp...sphE+hlssKcthEs-Lap+FlhVLNEKKsKIRsLp.ch...L.....sp.....sp...p..pc.....shppc..t.pss.................hps..c.sss.cc....s............D......t.o...s.c-c.p.pt......................................................................................................................................................................................................................................................................................... 0 20 38 62 +6461 PF06633 DUF1155 Protein of unknown function (DUF1155) Moxon SJ anon Pfam-B_21101 (release 10.0) Family This family consists of several Cucumber mosaic virus ORF IIB proteins. The function of this family is unknown. 25.00 25.00 86.00 85.80 20.70 17.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.28 0.72 -4.69 2 5 2009-09-11 08:20:59 2003-07-24 11:44:03 6 1 1 0 0 8 0 41.00 79 91.52 CHANGED MSATLSTTLSFEPPLSLLAEPGTWFADTMDFc+pp.lthhpp MSATLSTTLSFEPPLSLLAEPGTWFADTMDFRKKHSVRWYpp. 0 0 0 0 +6462 PF06634 DUF1156 Protein of unknown function (DUF1156) Vella Briffa B, Eberhardt R anon Pfam-B_16387 (release 10.0) Family This family represents a conserved region within hypothetical prokaryotic and archaeal proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.20 21.20 24.20 22.70 20.50 19.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.25 0.72 -4.22 43 252 2009-01-15 18:05:59 2003-07-24 11:47:17 7 11 214 0 122 263 47 69.50 31 7.82 CHANGED sslPlcpIsttut+EKph..ppG.phosLHhWW.ARRPLuusRAllhAuLls.....................t.th-tpphhpll .......shPlptlsttut+E+ph..ptu.p.psLHhWW.ARRPLussRAllhAsLls................................t.chp.h....................................... 2 53 89 106 +6463 PF06635 NolV Nodulation protein NolV Moxon SJ anon Pfam-B_21143 (release 10.0) Family This family consists of several nodulation protein NolV sequences from different Rhizobium species [1]. The function of this family is unclear. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.41 0.71 -5.16 2 48 2012-10-02 21:03:42 2003-07-24 11:47:37 7 1 43 0 13 247 7 191.10 32 93.17 CHANGED MTADh.ssPAAPphRsLGPLIPAupLEIWcsAhpAhAAAERH.Q+VRuWARhAYpREhApG+sEGhpAGAEEMAtLIuQAssElApRKAVLEppLPQLVhEIlp-LLGAFDsGchLV.sVRHAIEppYpssEVCLHV.PhpsDhLApEFtsaDG.-GRP+lRIcsDPsLSscpCVLWSEaGNVDLGLsAQhRALRLGFG.LoEcuE. ..................................................t..h....................ll......As.pht.hh-Ah.thlsAA.+ccA..ppl+......spA..p.p......t.......aE...p.p.R..A....c....Gac-G.h...p.p...G........sc........c....h.......A..t.......L....l.......A..p....s.......s.....u....c....s.......s........c..h.h.......A.......s...L.......E...p....p....lsp.......L..V...lph....lRpl....L.G...t..h..D.....s..p...E..hll.+.s.lppAl...st.h.tps.p.p.lsL+VsP.s.c.V....-...hL....p.p.ph....s..p..h..s...s........s.h....p.l.pI...sDspLusspClLtophGsV-hGL-sQLcALRhuh..........t....................................................... 0 3 6 9 +6464 PF06636 DUF1157 Protein of unknown function (DUF1157) Moxon SJ anon Pfam-B_21250 (release 10.0) Family This family consists of several uncharacterised proteins from Melanoplus sanguinipes entomopoxvirus (MsEPV). The function of this family is unknown. 22.70 22.70 23.20 475.70 18.10 22.60 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.27 0.70 -5.86 5 5 2009-01-15 18:05:59 2003-07-24 11:49:41 6 1 1 0 0 5 0 367.40 38 96.28 CHANGED IYFV..IVGIIFlIaaIIasl....NpYlshhpNs.NaNhos.hhppls......lspc.hl-FhIIpGSh.-NlIlsRshDGlQslKlIPTFVFLoNalps-lIsYGGF.NsoLsFPsNNIhN+S.sWIYGGWFIluNGNGDaICV+SNN.sNI+RaNcscsllKaL..sYNsucsN.....DsIIloNIlYKhc.EIlLINIscIussNs.ss-YYsILGNILlYIKsNaINDKpFIIsGpsGLNSKYIpLAI-TIF.lDslISSCYDGuITYVcNN.lasQSSFIlIDKcLCPYGVRFGlRY.LENshspaNLlLYATIYNcNpcspplEYFNDcLSINIFESIKSpsNYsDcYVNW-cIDISsLssNFE.sslsLlDDss.oYo+cpL-sIlNK IYFV..IVGIIFlIaaIIasl....NpYlshhpNs.NaNhos.hhppls......lspc.hl-FhIIpGSh.-NlIlsRshDGlQslKlIPTFVFLoNalps-lIsYGGF.NsoLsFPsNNIhN+S.sWIYGGWFIluNGNGDaICV+SNN.sNI+RaNcscsllKaL..sYNsucsN.....DsIIloNIlYKhc.EIlLINIscIussNs.ss-YYsILGNILlYIKsNaINDKpFIIsGpsGLNSKYIpLAI-TIF.lDslISSCYDGuITYVcNN.lasQSSFIlIDKcLCPYGVRFGlRY.LENshspaNLlLYATIYNcNpcspplEYFNDcLSINIFESIKSpsNYsDcYVNW-cIDISsLssNFE.sslsLlDDss.oYo+cpL-sIlNK... 0 0 0 0 +6465 PF06637 PV-1 PV-1 protein (PLVAP) Moxon SJ anon Pfam-B_21397 (release 10.0) Family This family consists of several PV-1 (PLVAP) proteins which seem to be specific to mammals. PV-1 is a novel protein component of the endothelial fenestral and stomatal diaphragms [1]. The function of this family is unknown. 22.10 22.10 22.30 22.10 20.70 21.70 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.58 0.70 -5.91 2 56 2009-01-15 18:05:59 2003-07-24 11:54:25 6 2 31 0 29 44 0 320.80 46 79.83 CHANGED MGLuM-+.usYARsGsppRGCWYYLRYFFLFVSLIQFLIILGLVLFMlYGNVHsoTESsLpATEhRA-uLYSQllGLoASQuNLoKpLNhohhsK-slMQ.hLssRR-h-RINASFRQCQGDhlhYhN.pRahAAIILSEKQCp-QhK-hNKoC-ALLFhLspKVKTLEhElAKEKslCoKDKESlLhsKR.sEEQL.tCsKsREhQpQEpQlscEpLpKVQuLClPLDp-KFphDlhshWRDSlI.RoL-sLsY..aa.LhsEhASlRRsC-phPulMooKlEELARuLRAsIERVsRENu-LpRQKLEhppuhpAuQEA+t+stpEAQAREspLpAECuRQTQLALEEKAsLRtpRDNLt+ELEt+KREhEQLRhElslR.SALDTClKsKS.PhhPs.Rs.GPsPNP.PIDPASLEEFK++ILESQR.Pss.PsA.sSG .......................M-+.usY.R....sG.....s...ps+GCWYYLRYFFLFVSLIQFLIILGLVLFMVYGNs.HsoT.E.upLps..TEpRA-sLhupllsLpuppsNLoK-LNlTspuK-ul.QhllssRR.......DL-RINASF+Q.CQ...s-hh.a.sp.+ahsAIlhSEctCtp..php-hNpoCpuhhhhLtpKsK..sl..EhEh.ppKhlCsK-K-u.hhhsKp.s.EpQhttC.sctpt.p.QEhQlsptpLppVpshChslDppKh..ph.plWR-Sll.+sLssh.....sa..ahs..s-.....h.tlp+.CcphPtlMtsKlppLAptL+hsItpVstENtclpRQK.thppshtuspctttt..pEsttp......phpt-ss+QspLALEEK.ssLpcc+-tL.+pLEt+c+phtthphplslp.tsL-sCl+sK.......................................................................................................................................................................................................................................................... 0 1 5 12 +6466 PF06638 Strabismus Strabismus protein Moxon SJ anon Pfam-B_9849 (release 9.0) Family This family consists of several strabismus (STB) or Van Gogh-like (VANGL) proteins 1 and 2. The exact function of this family is unknown. It is thought, however that STB1 gene and STB2 may be potent tumour suppressor gene candidates [1]. 25.00 25.00 28.40 25.90 21.10 20.80 hmmbuild -o /dev/null HMM SEED 505 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.80 0.70 -6.19 10 386 2009-01-15 18:05:59 2003-07-24 11:55:16 6 3 160 0 106 275 0 335.00 52 94.50 CHANGED H+-R.pR-RH+s.pS+.....cspSRu-KSVsIss....P.spPhhsus.............sstpht.-sQDDNWGETTTAVTG.TSEcSISpEDlsplsK-hEDss.shsCcRYLGhulushLGLLAhlTPlAFllLPplhW..............R-cLcsCGssCEGLalSLAFKLLILLIuoWALFhRps+AslPRlFlFRALLllLlhLhs.hSYWLFYGVRlL.cu+........-csY+GIVpYAVSLVDALLFIHYLAlVLLElRHLQPtFslKVlRSTDGESRaYslGpLSIQRAAVWlLEpYY+DFPVYNPsL.sls.....KpRtuK+huGFKVYsV...DGs.ssss.suQSRAhlAAAARRRDsSHNEhYYEEAEaERRVRKR+ARLVsAVEEAFTHIKRh.........psE-p.....t.Psp.MDP+EAAQAIFPSMARALQKYLRoTRQQshHTMESILpHLAFCITHsMTPKAFLEpYLssGPTlQYs+-pttscpWTLVSEEsVTsuL+cGssFpL+psDFSLlVTV++IPal+LoEEaVDPKSHKFVL+LQSETSV .....................................................................................................................................................TTA.........VT.G..TSE.tS.ShED.lu...p.hsh-.............pDo...s...G...h...s.C...pRYhGsTV.A....h.sL.uh..luFloPlAh....llLPpl.h.a..................h.p.L.sCsstCcGlhlSlAFKLllLhIGhWAl......F..h.R.....p.p.uslPRlFVFRAhhLVllhlhs.hSYWLFYhVpI....-up................sh.sYp....ul.V........pYssShsDsLlFIHYluVlLhEl.Rp....L..pPhahlKlVRSsDGESR.YslGpLSIQRAAlalLpp.YYp-Fsla................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 31 38 66 +6467 PF06639 BAP Basal layer antifungal peptide (BAP) Moxon SJ anon Pfam-B_21444 (release 10.0) Family This family consists of several basal layer antifungal peptide (BAP) sequences specific to Zea mays. The BAP2 peptide exhibits potent broad-range activity against a range of filamentous fungi, including several plant pathogens [1]. 25.00 25.00 26.10 25.80 20.90 20.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.54 0.72 -4.27 3 15 2009-01-15 18:05:59 2003-07-24 13:29:27 6 1 4 0 4 14 0 74.00 36 76.60 CHANGED MlLLASFVuHA+IISGETKEsSNTRSMTMT.TRuuuplIluDNKsuLCYLDuthLEYlC++Tp+CY+oLKcCLEaC .............................MlLLAS..hVhHAphlsGps+EsoNstShTMT..T..puusphllu.....-scsulCYL....cuh.hhYsCc+Tp+CY+sls-CLt+C 0 0 2 3 +6468 PF06640 P_C P_prot_C-term; P protein C-terminus Vella Briffa B anon Pfam-B_16385 (release 10.0) Family This family represents the C-terminus of plant P proteins. The maize P gene is a transcriptional regulator of genes encoding enzymes for flavonoid biosynthesis in the pathway leading to the production of a red phlobaphene pigment [1], and P proteins are homologous to the DNA-binding domain of myb-like transcription factors [2]. All members of this family contain the Pfam:PF00249 domain. 20.00 20.00 20.00 28.60 18.80 19.80 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.74 0.70 -4.34 5 26 2009-01-15 18:05:59 2003-07-24 13:58:00 6 4 9 0 7 24 0 179.70 52 62.13 CHANGED AIAIDMSKLQSA-+RRGGRTPGRSPKoSu............o+oKQsDsDpPGsEAtuss.............uAASSPR..HSDtAR..SsVVDP-..PNQPN.SSSGSTGss.EtssSuEDATGPWlLDPIELGDL.W.EAES...EMDALMsIGssupDuAslEGL-Al..sspAQVDDLF..DMDWDGFAAHLWGGPEQp.DHuA..plQQAAEPpssAu..........................tAuAAuAsuCoPDE+cLEAFEoWLLSDSF ............AlsIDhSKLQSA-+RRGGRTP..GpsPKuus............p+sKpsDsspPttcAt.uss.............uuASSPR..pSD.......VVsPs..sNQPN.SSSGSsGss.-tssSpEDAoGPWsL.-Pl.E.hGDL.W.EA-S...........EMDALhshGssu.DusslpGltsl..sspAQsDDLh..DMDWDGFAAcLWGsP..t...p+uu...lppAuEP.....................................sshusssD-..LEuFtoWLLSDSh........ 1 0 4 7 +6471 PF06643 DUF1158 Protein of unknown function (DUF1158) Moxon SJ anon Pfam-B_21508 (release 10.0) Family This family consists of several enterobacterial YbdJ proteins. The function of this family is unknown 25.00 25.00 31.80 83.20 18.30 18.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.92 0.72 -3.66 3 460 2009-01-15 18:05:59 2003-07-24 14:09:29 6 1 452 0 20 95 2 79.10 88 99.95 CHANGED MKHPLETLlTAAGILLLALLSCLLLPAPSLGLTLAQKLVshFHLMDLNQLYTlLFCLWFLLLGAlEYYVIRFVWRRWFSLER .....MKHPLETLhTAAGILLMAFLSCLLLPAPALGLTLAQKLVohFHLMDLSQLYTLLFCLWFLVLGAIEYFVLRFIWRRWFSLAD... 0 1 1 11 +6472 PF06644 ATP11 ATP11 protein Moxon SJ, Hammonds G anon Pfam-B_21093 (release 10.0) Family This family consists of several eukaryotic ATP11 proteins. In Saccharomyces cerevisiae, expression of functional F1-ATPase requires two proteins encoded by the ATP11 and ATP12 genes [1]. Atp11p is a molecular chaperone of the mitochondrial matrix that participates in the biogenesis pathway to form F1, the catalytic unit of the ATP synthase [2]. 22.00 22.00 22.40 23.10 20.20 21.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.85 0.70 -4.90 22 313 2009-01-15 18:05:59 2003-07-24 14:22:25 6 7 259 1 214 313 1 231.70 28 77.06 CHANGED hptppsshhc+YcsKlpphupppshpslp....cLcpthcp.............................ppt.tshsppspphpshct..s..s......st.ts...........KsLsShlclE+lcshstcE..lctlWcthat.....pcsslsAslP.h-pYcthhspA+psPhFlLPLPRp.................pGhEhaalQWp......................ssphlFTsLtpYKl+t-hAtPahslpaah-Lsp-KslVLMpGplpscst.............losp-AQhLh.slQpFY.............sttspp+hpLLcsFs+ts.pcFchpcllp.h ...............................................................t....t.h.t+YtpKl.phtpp..s......p....tlct.....................................................................................t...t..t.........t...................................s.....................................KsLsslhclchlcp.h.ss..cE...lptlWpthat......spps.lsAsls.sppapthhspAppsPhFllPLPRp............................pGhEhahhQat.....................................ssplhFTsLtpYp...h+s.-hAtsphslpaas..-.Lt..............c............pK.........GlVLMpGph...ssh..................lssp-AphLs.plQhFY....................tptsppphpLlcpFspts.ppFchppllpp.h...................................... 0 68 118 178 +6473 PF06645 SPC12 Microsomal signal peptidase 12 kDa subunit (SPC12) Moxon SJ anon Pfam-B_21331 (release 10.0) Family This family consists of several microsomal signal peptidase 12 kDa subunit proteins. Translocation of polypeptide chains across the endoplasmic reticulum (ER) membrane is triggered by signal sequences. Subsequently, signal recognition particle interacts with its membrane receptor and the ribosome-bound nascent chain is targeted to the ER where it is transferred into a protein-conducting channel. At some point, a second signal sequence recognition event takes place in the membrane and translocation of the nascent chain through the membrane occurs. The signal sequence of most secretory and membrane proteins is cleaved off at this stage. Cleavage occurs by the signal peptidase complex (SPC) as soon as the lumenal domain of the translocating polypeptide is large enough to expose its cleavage site to the enzyme. The signal peptidase complex is possibly also involved in proteolytic events in the ER membrane other than the processing of the signal sequence, for example the further digestion of the cleaved signal peptide or the degradation of membrane proteins. Mammalian signal peptidase is as a complex of five different polypeptide chains. This family represents the 12 kDa subunit (SPC12). 21.40 21.40 21.40 22.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.54 0.72 -4.17 30 303 2009-01-15 18:05:59 2003-07-24 14:34:31 8 4 253 0 216 311 3 76.70 32 70.93 CHANGED MDFpGQ+hAEplhplllslsulluhllGYhhQphuholalhhsGhslosllslPsWPh.Yp+pPlpWtpstsp.........sscp ..........hDapGQ+hAEplhphllhhs.............ul........l...uFlhGYhhpshphslalhhuGhslosLl..slPsWPh.Y.p..+pPlcWhsst.t......sptt........... 0 73 122 179 +6474 PF06646 Mycoplasma_p37 High affinity transport system protein p37 Moxon SJ anon Pfam-B_21350 (release 10.0) Family This family consists of several high affinity transport system protein p37 sequences which are specific to Mycoplasma species. The p37 gene is part of an operon encoding two additional proteins which are highly similar to components of the periplasmic binding-protein-dependent transport systems of Gram-negative bacteria.It has been suggested that p37 is part of a homologous, high-affinity transport system in M. hyorhinis, a Gram-positive bacterium [1]. 20.10 20.10 20.20 22.00 19.60 17.70 hmmbuild --amino -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.08 0.70 -5.41 6 52 2012-10-03 15:33:52 2003-07-24 15:07:28 6 1 48 5 17 60 2 353.50 34 84.94 CHANGED sKFhcsaSpphs.KLccs........cVslohsh.lDDutoplsslposps..DFAFlsSpulso...Ns.pchsshlQTLTsuFKFDpshD.aYsDGs....LcphAcchsplFscs....P.YpsWsDEsQ............cWsGs+YpFlY..-P.scLlsFYRGMILIsGo-pplspIKcAWNpKNWssFpNaGIh.pGposSuG+ahL.-pLl+KHFs.tp.s..sLtpDhsssPsKYpssp...GR-IGpsssh+IsFDDtsSFAWTcNcpsups...YsPp.............pNsKlEILohT-PhlYDIGlFsKslspchtshIuEsFIpLAKsspDhYGPslGYNGY+hIsDFsKEVlclhpKAhG ............................................tFhc.hpppFN.cL.Kptp.tp+ph.cVphslps.spDp.pshlspL.psscs.....Dluhssssphls..........pspppsls..tl......QTt.ThtFpassss..sthYpDGs.p.pDsLRp......hAcctsclasch....s...YssWp..-pp...p............pasGs+Yp.hY....c.s...sc....lsp.aYRGhIhIs..G...s-..psppcIhKAW-sKcW-sFhpaGIl.au.cssSuGKY+hp.sLl++HFs...pphs......slpp....Dhp.pp.....spYhstt...........uspl..Gp..p..s....c.........h.......+......IuFDDE.G.S.a..u..WTcscpsupp.....apssc...................................................p.NsplchLTlTsPhsYD...lslh...c.p...u...lscpplcLloculhsLupsppso.YGshsGYN..tYp..hIps.p...hhp.htt................................ 0 9 15 16 +6476 PF06648 DUF1160 Protein of unknown function (DUF1160) Moxon SJ anon Pfam-B_21501 (release 10.0) Family This family consists of several hypothetical Baculovirus proteins of unknown function. 21.40 21.40 21.40 22.60 19.90 21.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.56 0.71 -4.34 14 51 2009-01-15 18:05:59 2003-07-24 15:13:58 6 1 49 0 0 44 1 123.50 30 93.34 CHANGED FlsplhpshshssKVAhVstpLcpaLp-ht..pD-pFppKhhpllcMFlspcIsl-slhsllsuVDuh.cLTcpQI-YLssplatNppllpIlpsFl-tp+Ls---Is-lupFLVpElssAhhYp ..................................FhsplhpohshssKVuhVphplppaLp-hp...pDcsFppKhppllpMFlppclss-slhslhsssDsl.cLocpQl-YLhsplhpN..........splhpIlpp..al......c........tpc........L........sc--.lstlupFLVpchspA....t............................................ 0 0 0 0 +6477 PF06649 DUF1161 Protein of unknown function (DUF1161) Moxon SJ anon Pfam-B_21545 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 25.00 25.00 37.10 43.10 18.70 17.70 hmmbuild --amino -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.46 0.72 -4.04 24 623 2009-09-11 10:51:00 2003-07-24 15:16:02 7 1 568 0 68 233 4 53.30 70 55.37 CHANGED CEclKu-IptKI.ANGV..ouaTLEIVsN-pss..........ssupVVGpC-ssT+KIlYpR .CERlpSDIsQRIINNGVPtouFTLoIVPNDQVD.Q................PDSQVVGHCANDTHKILYTR. 0 5 17 44 +6478 PF06650 DUF1162 Protein of unknown function (DUF1162) Vella Briffa B anon Pfam-B_16458 (release 10.0) Family This family represents a conserved region within several hypothetical eukaryotic proteins. Family members might be vacuolar protein sorting related-proteins. 20.30 20.30 20.30 20.30 18.70 20.20 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.96 0.70 -5.19 39 689 2009-01-15 18:05:59 2003-07-24 15:23:28 7 58 271 0 493 739 4 250.30 20 8.17 CHANGED hplslauPYhllN+TuhsLthcscshttpstsp.s.............................chttPhhFSFsc.s...tpps+shl+lsc.op........WSp.....shShDulGsstplth.sssp.p...........................................tth....lGlslppGpGcYth...TKlVTluPRallpN+hs.slpltE.ss.........p.......ph....................h.pltssp..................................hhPhahh.ppstp.ppLs....l..ph.t.....ssp....WSusFtlscl.ushal+l..........................................hp..t...sstpthl+l-lhhcsuThFlphsstppp.hPaplcNh...o..............................cpp.hhaaQpss...p.................thp.hthplsPpshhsYAWD.Psutp .............................................................................................................................................................................................................l.lhsPYhhlNcT.s..h...L.hp.t.p..t............t.........p..................................................................p...shha.......satt..p...........ptsts.lpl....sp.ut...........................................WSp..........h.uh......-....shu.......s.h.t.tlhh......t.tpt.......................................................................lG.lslp...upt....p....h........T+lVoh...s......P+allp.Nc..s....s.h.pl.phtp.ss............p......ph.......................................................h.plts.sp...............................................................................................p.h..sh.ah.......pt.s.tp....ph.lp..........l.....ph.t.......spp........................WS.ts.Ftls.c....ss.....h..hlpl.................................................................................................................................................................................................................................................................................................tpp.t...h...lpl-l.t...h.....p.....t................s.................o....h................h......l.php....p..p..p..t..........P.hhl.Nh...o..............................p...l.ahQt..........................................................h.l.Ptp....h.asWs.Phh..t.............................................................................................................. 0 190 271 399 +6479 PF06651 DUF1163 Protein of unknown function (DUF1163) Vella Briffa B anon Pfam-B_16509 (release 10.0) Family This family represents the C-terminus of hypothetical Arabidopsis thaliana proteins of unknown function. 19.40 19.40 22.40 75.80 18.60 17.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.34 0.72 -4.43 6 20 2009-01-15 18:05:59 2003-07-24 15:30:57 6 1 2 0 17 20 1 68.50 55 39.95 CHANGED hsGlIsKsIMcDIKERp-lRFGSRLhLPDCRcsTsGpMsYsCDEspLRFEPGSppKAThFsc.aPpChhl .hsGlIsKcIMcDIKE++EV+FGSRlhLTDCRcsToGsMsYsCDEssLRFEPGSEhKAT.hFGs.aPpCh.h....... 0 6 6 6 +6480 PF06652 Methuselah_N Methuselah N-terminus Moxon SJ anon Pfam-B_3337 (release 10.0) Domain This family represents the N-terminal region of the Drosophila specific Methuselah protein. Drosophila Methuselah (Mth) mutants have a 35% increase in average lifespan and increased resistance to several forms of stress, including heat, starvation, and oxidative damage. The protein affected by this mutation is related to G protein-coupled receptors of the secretin receptor family. Mth, like secretin receptor family members, has a large N-terminal ectodomain, which may constitute the ligand binding site [1]. This family is found in conjunction with Pfam:PF00002. 21.50 21.50 21.60 21.70 21.30 20.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.64 0.71 -4.72 10 146 2009-01-15 18:05:59 2003-07-24 15:38:27 7 8 23 6 75 182 0 156.10 28 34.30 CHANGED CsaaDTVsIScup+L.NGSYlYEsllIPA+LTGcYDF+llsDGs+hpVp.cHlRGCVCKLKPCIRFCCP+cplhss..upChsshs-..pLschc...Pal.slTLsDGSlsph.....+hps-hlVpp-..pP.C-cMasLsc.pphhDcaoL...........FENGolhR+hD.phhLsKpEYCLpPhpa...sssSl..cIhP+sC .......................................CsahDTVslot....th..........suSY.apslllPsph..h..u...Ysa.hh...ts..p..s..pH.l.RGC....sCpl+sClRhCC.P.tphhst.....spC.sshpp....hhthp..shl.slT.h.s.sso..s.ph.....chhpchh.lppc...hP.Cp..ch..a....h...lsp........pasL...........a.ENGolh+p.D....p.thlspp-YClt.h.h.....sp...sh....lhsh.C......................................................... 0 14 17 58 +6481 PF06653 Claudin_3 DUF1164; Tight junction protein, Claudin-like Moxon SJ, Coggill P anon Pfam-B_21553 (release 10.0) Family This is a family of probable membrane tight junction, Claudin-like, proteins. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.11 0.71 -4.52 24 100 2012-10-03 00:20:40 2003-07-24 15:41:17 6 4 7 0 96 107 0 156.80 19 88.16 CHANGED Mth...................lhhlshh..lhluhlhshlulFo.sWhs.psh.................shGllPa.s.....s......ahshuuhhhalohshhhhhh..lhhhhshtplp+pGastphRphFhhIuhhshllslLplsAhlLhulshsphttph...s.................tLGYSua.lslsoAllshsss...uLuhtl................ucpp.h...........p ............................lh.lshh..lhhuhl.LshlulF.o.....ssWhs.p..................................thGl.l.Pa.s...tt.s................Whss.suhhhhl...shshhhhhl...lhhhhhhhplhppshst.p...h.+..hhhhhlshhuhlhslhhhl.uhhlhu..ssh..sph.t..h.....................tlGa.ShW.lslsu...sl.lshhsh...hluhhh...........t...................................... 1 31 34 96 +6484 PF06656 Tenui_PVC2 Tenuivirus PVC2 protein Moxon SJ anon Pfam-B_15006 (release 10.0) Family This family consists of several Tenuivirus PVC2 proteins from Rice grassy stunt virus, Maize stripe virus and Rice hoja blanca virus. The function of this family is unknown. 25.00 25.00 1054.90 1054.70 16.90 16.60 hmmbuild -o /dev/null HMM SEED 785 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.32 0.70 -6.75 5 33 2009-01-15 18:05:59 2003-07-24 16:25:15 6 1 6 0 0 34 0 784.10 74 94.19 CHANGED lplSCstsssPC+psphLNGYYIEcGuICYN+AsINLaET..CFoGKYDp+lPVH..FSc...FGGpRYlcCDDEIlscso.LVGFpQscaTSKsLPINscNu-LVSYspuc.cuFhGlVYVEslcYC...spsscscpIlNcusuLph.slCpDGlLhuusSEC-lsVuETEFclPSCussKLPlYDDpIcVCpNssC+NVoCTsSslClsYDRMDFlhRlKNY-CS+oY+YalYllILllIIllChssIsIINILlhLKP...VFWLlKKllaulsGLCHR+PslpcsElDMupVRVV-Ds--GLLlsE-S+APNSNVs-plhpKAR+h-NGLIYIPYILhhLsLlhoous..CpDLVSSLSNIEVCsupsCcasSKlpLTLhNTPQDFCFKopSDVYKIRlspIoV+CLSRPLYYTNSYKRsIu+-DWKCFEG.cCosDsSpSIWDKS-S.LHYDYC.VsDFHlFSYC..PFYH.YNWKRIpYcPTS+LACoV+KCs-sQFEIsGYlo.KNGpVl+ElSGaoSsY-usIloIoLLSYNot+LP+EYVECDuKAYcRsuNDLGSFDKELhGSIQCPT+-DAhpLTsKCpTKlpul.EDpssIpY-EsDGlspLscTtoEPL+sVlVSpsGISLDThDlaPVTLoIpopccIoSIlTS+IShNsTsCcIKGVERKlKKTlI+VcosoKllLSDlLsCcDLAsCSLTFNs-....c+uECaTTSY+ssuoGssIpC+FlYSGDo...IhCKYsVSPl-IsVVSPplDloSF-uVKcSoQNWssFlh-hIRDNPKLTIVASILPIGhlLKTlKu .VKVPCSARAPPCKLTYELNGYFIENGLICYNRASVNYFET..CYTGNYDYKLPLHPSFSK...FGGHVYLSCDDAILQNVS.LVGIQQTEYTSSPLLITNSNSEKISYSNLK.TGFLGIVYAVETRACIQPDQAKKPEEIINHGVAI..KPSCTDGVLYYINSACEVNVSDQTFSIPSCESVKLPTYDDTIEVCDKGGCQNVTCHPGEICDKYERMDMIMRIKNYQCSHIYRYSLYSIILFFVIVIVFTLITIMNILFFLKP...AFWLLKKVLYSMVGLCHRRPVVDEVSVDMSTVRVVDEAEEGLLVVEDSIAPNTNVSDKVKRKGRKVENGLIFIPYlLMILLLVCSAES..CQDLVSSISNIERCTNNSCDFISKMKLTLLNTPQDFCFKTSTDVYKIRFNSVRVMCLSVPLYYTNSFKRVISREEWKCFEGEGCRTDGTHSIWGESTS.LSFDYC.VTDFHIFSYC..PAYH.YNWKRIEYEPTSSRACTIMKCMDTKFEIVGYIQ.KNGHVLKELGGITSKYDSPLVSISLSNYNSARMPREYAECDGKAYLRTANDLGSFDKELLGNIQCPTKEDAVVLSSKCKTKILSN.EDLPVIRYlERDGVDMLEHVKSEPLKDVLVSSSGISLGTLDLFPVELNLQFKEAITSIITSKISLNGTSCKITGIERKFKKTTVSIESSNKVYLSDILACEGLAVCPMILNNI....KKGTCITTTYYSVTVGSMIKCKFIYSGDT...LMCKYDVSPLEITVISPSLDVSSFEAVKTSTTNWMELLAGIVKDNPKLSLVASIIPIGLILKTIR.S 0 0 0 0 +6485 PF06657 Cep57_MT_bd DUF1167; Centrosome microtubule-binding domain of Cep57 Moxon SJ anon Pfam-B_9878 (release 9.0) Domain This C-terminal region of Cep57 binds, nucleates and bundles microtubules. The N-terminal part, family Cep57_CLD, Pfam:PF14073, is the centrosome localisation domain Cep57 [1]. 24.40 24.40 25.10 31.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.41 0.72 -4.03 29 210 2009-01-15 18:05:59 2003-07-24 16:32:35 8 7 119 0 127 209 0 76.60 34 11.72 CHANGED pcsohpsutsstpuL...upllptLpDEhsHhphchpcLtt.hpplDsohspcpRcpLtpclcpLlcclEtKu-QIhpLhcl .....................s.psSpuss-sL...u-lLpsLQDEhspMsh..-HppL...h+plpc...o..spch+csLcpELEpLlc+MEtKucQIsKL++h..... 0 27 44 77 +6486 PF06658 DUF1168 Protein of unknown function (DUF1168) Moxon SJ anon Pfam-B_9807 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 22.10 22.10 24.30 24.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.20 0.71 -4.67 36 257 2009-01-15 18:05:59 2003-07-24 16:56:40 7 5 228 0 194 254 3 141.50 35 69.22 CHANGED Pc+thtlPctscphs.t.....ssPEhVpNV.GSSAGAGSGEFHVY+ptRRREYpRlchh-cpsc+-ppscpappKpcEpcctsEEKTtK+RtKRpK+KpKpppt+ptcpts................ttppspppspssssssccs...........t...tsthtsc ...............................p+.h.lPpt.pp.hs.p......ssPEhVpNV.GSSAGAGSGEFHVYRphRRREYpR.chM-ppsc+cctcpEFpp+hccpcptsEE+TsKpRtKRpK+Kp++ttt+ptptpp..............................tttt.tttttpt.ppttpttptt.......................ssttt.t................................................................................... 0 68 105 155 +6489 PF06661 VirE3 VirE3 Vella Briffa B anon Pfam-B_16550 (release 10.0) Family This family represents a conserved region within Agrobacterium tumefaciens VirE3. Agrobacterium tumefaciens (a plant pathogen) has a tumour-inducing (Ti) plasmid of which part, the transfer (T)-region, is transferred to plant cells during the infection process. Vir proteins mediate the processing of the T-region and the transfer of a single-stranded (ss) DNA copy of this region, the T-strand, into the recipient cells. VirE3 is a translocated effector protein, but its specific role has not been established [1]. 19.50 19.50 22.90 24.00 18.50 17.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.95 0.70 -5.31 4 15 2009-01-15 18:05:59 2003-07-25 11:30:11 6 1 9 0 3 14 0 270.50 55 49.60 CHANGED psRLEsP.+K+KYsuDhpllsKLDsGFRGEIuYKlhGNcpLRlDpspELT+E+Gll+KTKpVLKRsssTGplYLuhHERKoWsSVooHpYupDGoLRtKHVKYKDGRFEE+WERDEsGsLlRTRYhNRGRLssthh+PlSEEhuAPYcSGs-pRLYRcLTRQsGS++ETFERDDKGNLELlupKRhGFS+pohKutDRpTSpTpIRKLGGAFSKSYRSLLDKEGNElGRDllSHRRLaNKRSAlYD-uTGQLKShKHTFGKIYKuEupYLsAclKcVSKKILGVTVtR+LssLSEcEhpAp+LRshEsutH+QAWQc ...........ARLEsPppK+KYsuDMpllDKL-sGFRGEIuYKhhGNcpLRlDsspELT+E+Gll+KT+cVLK.RsspTGsVYLuhaE+KoWtSV..oSH.YupDGoLRsKHVKYKDGRFEEKWERDEsGtLhRTpYhNR.sRL....FpPlSEclusPYcSGs-NRLaRcLTRppGS+pETFERD-KGNLELIGpKRhGFS+sSsKu.DRpTSpTpIRKLGGAFSKSYRSLLDpEGNElGRDI.SHRRLhNKRSAlYD-uoGQLpSsKHTFGKIYKSEotYLsAclKcVSKKILGVTVtR+LssLScpEh-AQ+LRstEustH+pAWQc......... 1 0 2 2 +6490 PF06662 C5-epim_C C5-epim_C-term; D-glucuronyl C5-epimerase C-terminus Vella Briffa B anon Pfam-B_16571 (release 10.0) Family This family represents the C-terminus of D-glucuronyl C5-epimerase (EC:5.1.3.-). Glucuronyl C5-epimerases catalyse the conversion of D-glucuronic acid (GlcUA) to L-iduronic acid (IdceA) units during the biosynthesis of glycosaminoglycans [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.19 0.71 -5.01 18 182 2012-10-03 02:33:51 2003-07-25 13:05:58 8 7 150 0 112 191 17 180.00 42 36.94 CHANGED Q-.ppGuWshshshphhps.t..LssGWhSAMuQGpAlSlLsRAYphTp.DpcYLsuAt+Als.aplsspcGGlhsshhsh......hsWYEEYs.....ToPsoaVLNGFIaoLlGLYDhs.............stphsscApplFppGlcSLKthLshaDs.GshohYDLpahs.hspsPsluthpYHshHlp.LphLhsIs.s..-.happhhc+Wpu ................................D.ppGGWsh.VpRplh.psht..LpsGWhSAMAQG.........pAlSsLs.RAYhhTp.DcpYLpuAh+Ahtsa+hs...upp..sGVhuh.Fhsp.............asWYEEYP.....TsPsSaVLNGFhYSLlGLYDLpp..........sstphuc-A.ppLappGhcSLKthL.PLYDT..G....SsohYD...LRHhh......lshuPNlAphcYHshHlpQL.hL.sls.p...s.hh..tphsc+Wp......................................... 1 36 52 86 +6491 PF06663 DUF1170 Protein of unknown function (DUF1170) Vella Briffa B anon Pfam-B_16582 (release 10.0) Family This family represents a conserved region of unknown function within MAGUIN, a neuronal membrane-associated guanylate kinase-interacting protein. This region is situated between the Pfam:PF00595 and Pfam:PF00169 domains [1]. All family members also contain an N-terminal Pfam:PF00536 domain. 21.90 21.90 23.60 24.40 17.10 16.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.12 0.71 -4.37 6 166 2009-01-15 18:05:59 2003-07-25 15:36:33 8 13 39 0 95 132 0 172.60 47 23.89 CHANGED huss..oK+-ssAlpDLYIPPPPutPYoPRDEpGshss-sts+.ph.shsVsKGSESPNSFLDQEsR+R.FslsEpDplsashph-psh.hss+hR-sTPTYG+LRPISMPsEhNWhu-s-D.uKh+RpuR.pEsSLhRYhS....s-+Is..sEE.......ahhuRsusccphhR............................u++KScpusosu...chSLLsSh ............................h.sossK+-psAl.DLYIPPPPu..PY......PR.DEpGshsspshpp..ph.shPstKGSESPNSFLDQE...R+R.Fsl.s-.pDpl..sYph-psh..stth.REpTPoY...G.K.R..PlSMPs-hNWhGth-s.s+.+t.cuR...t...EsuLhRYhS....NE+Is..tEE.......a.hhpRsspccphh+................................................sKKKutpuso.o...c.SLLsS.............................................................................................................. 0 2 8 50 +6492 PF06664 MIG-14_Wnt-bd DUF1171; MIG-14_Ce; Wnt-binding factor required for Wnt secretion Vella Briffa B, Pollington JE anon Pfam-B_16593 (release 10.0) Family MIG-14 is a Wnt-binding factor. Newly synthesised EGL-20/Wnt binds to MIG-14 in the Golgi, targetting the Wnt to the cell membrane for secretion. AP-2-mediated endocytosis and retromer retrieval at the sorting endosome would recycle MIG-14 to the Golgi, where it can bind to EGL-20/Wnt for next cycle of secretion [1]. 20.90 20.90 21.10 21.10 20.60 20.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.80 0.70 -5.59 15 334 2009-09-13 15:31:17 2003-07-25 16:08:38 7 4 113 0 203 313 0 260.00 30 52.85 CHANGED tpC-plslhclGhLcaspYllslphss.p.h.............plp.slphphhphNssFTpl.lhh+hlFhlhohlhhshFsppl+plshpsWhlEQKhhshLhslLllhNsPhahhslhhsssahllLsslhQulFhshLhhFWLlhhcplhl...pspRcsl+s.YhsKllhlsllhlshllhshhp+sppLcDPhaShhssshshpshhsahhlsslh...YlLaLlahIl+sasplcspth..h.....................................................Ra+alhlhslhshshsllhhhhs...........hsph.thtpo..SApFhshYulhNlYlasltalYuPScp .......................................pCs.l.hhclG.lsap.Yhlslph.s.pph...................plp..slp...h..h...hhp..NsuFop.l.lhh+hhhhsh.s.hllh.....sh.....a..h.+..pl..............p.hhs.h.s...hhlE..Khh.hLh.hhhhhN.....Ph..h.ohhh....s.....h.h.....hh..h.sslhQuhF.hshLL.FWl.hhht.....thhh............ptp+.p.plp.......h..Yh......p.....lhhlshh.hshhhhshh....p....pss.p.LpsP.hYphhss.shsh.p.hh.....hhFhhlsshh......Yhla..Lhahlhpshpplpth.....l............................................................................RhKFLhhhohh..shsholhhhhhp..h.th.....................t..hs.......h.sh..s........uu.FhuhYGhhNhYlaslhhlYuPut..................................................... 0 98 111 160 +6494 PF06666 DUF1173 Protein of unknown function (DUF1173) Vella Briffa B anon Pfam-B_16904 (release 10.0) Family This family contains a group of hypothetical bacterial proteins that contain three conserved cysteine residues towards the N-terminal. The function of these proteins is unknown. 25.00 25.00 31.70 30.70 18.30 23.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.20 0.70 -5.63 17 141 2009-01-15 18:05:59 2003-07-28 10:16:29 6 2 109 0 50 143 15 350.90 28 91.03 CHANGED patlsspshst........sssplQshLspAappttR...PhCLC..p..tssltMYlA+hs.sp.allKRMPtoGtcHss..uCsSYEss.phSGLGtlhGsAlp-.scsGpssL+lsFuLo+tssRss......Psupsst..ss.slcsssp+loLRuLLHaLW-cAtLscWsPshsGKRsWuhVRphLLpAAcphps+uhsLu-hLaVPEsFps-pKstlupR...RpstLushth..t.Gpp+hhlllGElKphssuR....Gc+lsl+Hh.PshP..hhlcschacRlt+pFssELchWcuspst.....+LlsIAphs....hsssGhspl--luLMhVoEpWIPhEostEt.LlctLssp+RRFlKsLRashssstPhssslLhDspspssuhalVsutsspca..ctslcchl.scsthss..WhWcsup ....................................tt.s.th.pthLtpAa.t..ppt.p......shChC.....p..pss.....h.hhl..tph.......s...sp...ahltRhPsoGtp...H..ss..sCs.ac......sssch.SGhu..........t.hhusslpt.spsGphsl+LshuLscts.spts............stststt....t..ss.pss..tpphoLhulLchLWppAtLspWtPshs.G...+R.shsh.V+ttLlpAApphtsp.tt....s.LschLal.t.aps....pp+ptlttc............ptthluphth....ts.pchhlllu.lpchsss+h.....upp...lsl+ph.ss...hs........hhh..spshac+lp+RFstE...LthWpssp............+llslut.hs............hstsuhspltcluLMhVo.cp.WIPh-osaEhhlhcpLstppRpFlKsLRashsts.........tshsshhLhDst..t......s.sshh.V.u.hs.s..pa...cpthcphh...spphhss..W.hWcst.t.................. 0 8 21 39 +6495 PF06667 PspB Phage shock protein B Moxon SJ anon Pfam-B_21806 (release 10.0) Family This family consists of several bacterial phage shock protein B (PspB) sequences. The phage shock protein (psp) operon is induced in response to heat, ethanol, osmotic shock and infection by filamentous bacteriophages [1]. Expression of the operon requires the alternative sigma factor sigma54 and the transcriptional activator PspF. In addition, PspA plays a negative regulatory role, and the integral-membrane proteins PspB and PspC play a positive one [2]. 24.30 24.30 24.40 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.59 0.72 -4.04 36 768 2009-01-15 18:05:59 2003-07-28 11:43:46 7 3 748 0 106 251 47 74.20 63 97.85 CHANGED Ms...h.hlhsPlllFhlhVAPlWLlLHY+oKppsupGLopc......-pppLppLhppA-+Mp-RlpTLEpILDAEsPsWRp+ .....................................MS..ALFLAIPLTIFVLFVLPIWLWLHYS...N.R.u.s.p...up.L..S.Qu......E..Q..QRLtQLsD-AcRMRE..RIQALEsIL.DAEHPNWR-.............. 1 20 44 74 +6496 PF06668 ITI_HC_C ITI_HC_C-term; Inter-alpha-trypsin inhibitor heavy chain C-terminus Vella Briffa B anon Pfam-B_16674 (release 10.0) Family This family represents the C-terminal region of inter-alpha-trypsin inhibitor heavy chains. Inter-alpha-trypsin inhibitors are glycoproteins with a high inhibitory activity against trypsin, built up from different combinations of four polypeptides: bikunin and the three heavy chains that belong to this family (HC1, HC2, HC3). The heavy chains do not have any protease inhibitory properties but have the capacity to interact in vitro and in vivo with hyaluronic acid, which promotes the stability of the extra-cellular matrix [1]. All family members contain the Pfam:PF00092 domain. 21.50 21.50 22.50 21.50 21.20 20.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.07 0.71 -4.88 29 404 2009-01-15 18:05:59 2003-07-28 12:08:42 7 13 48 0 193 292 0 177.50 33 20.30 CHANGED soGlsVNGQlIGsct..s.tp+pcTYFupluIshpp.chp.lElTspplsl.cuppp.shsWp-osslspsulplols+ppslsVolsssloFsllLH.phhKtpPhppDaLGFYlhsoc+hSupsHGLLGQFhpt.-hclhchhsGsDspK...............s-AohpVKucplsVTRthpKDYppc....GppVsCWFV+N .........................................................oGlpVNGpllGs.t....tppppTYFuplslhhpp........c..hp...lElo..scp.Isl....p.G....spp..shsWpcosh.l..t.p.....ss...lplo...ls.....+pps..lsl...olscslsFsl.llH.phh+.p............sh..........pp.-aLGhYlhso.cphSspsHGLL.G...........Q..........F...hpt.chclhs..sutssp+...............scA..shhVKupplsVT......Rt.p+DYpps.....Gp..plsCWFl+N........................................ 0 19 28 87 +6497 PF06669 X_fast-SP_rel Xylella fastidiosa surface protein related Moxon SJ anon Pfam-B_21796 (release 10.0) Repeat This family consists of several Xylella fastidiosa surface protein specific repeats which are found in found in conjunction with Pfam:PF05662, Pfam:PF05658 and Pfam:PF03895. 22.60 22.60 23.60 22.80 21.70 22.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.36 0.72 -4.04 2 73 2009-01-15 18:05:59 2003-07-28 12:12:23 6 31 27 0 9 81 0 68.30 60 6.35 CHANGED NAVQsQASQPVTFoGNEGuVKRoLGQuVVISGESSTAGTYSGGNLKSVVDEAAGtIHLQLADSPKFGNVl .........NAVQsQASQPVTFoGNEG.o.VKRoLGQuVVISGESST.A.G.TYSGGNLKSVVDEAAGuIHLQLADSPKFGNVl................ 0 3 8 8 +6498 PF06670 Etmic-2 Microneme protein Etmic-2 Moxon SJ anon Pfam-B_22009 (release 10.0) Family This family consists of several Microneme protein Etmic-2 sequences from Eimeria tenella. Etmic-2 is a 50 kDa acidic protein, which is found within the microneme organelles of Eimeria tenella sporozoites and merozoites [1]. 21.90 21.90 238.40 64.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.40 0.70 -5.75 2 8 2009-01-15 18:05:59 2003-07-28 12:17:37 6 2 2 0 0 10 0 302.50 73 99.96 CHANGED MARALSLVALGLLFSLPPSSAVRTRVPGEDSFSPESGVLSGTDAPERRP..............lVPGLsEGNCGRLTVRNGLSVDETIKVTSAGWTKSERDFIVSLVADETRKVVQLRESEGASGASGPGPAPAEKPPSGQGSAEEAPKGEGGQEKPSVPLIAVRIHGSGGDKGESAPQSAVLLYGNDESEPTEVPLETAAGPTTPLMVLITQQNPKEVEVRVLAWIST.......................DATTGKGSWKENSVVVGSSLSGRDLTVNLSDCGPSSLRVYGSASADLVTVKEGMCEADDPELIALTRPHTSAASPLPAEEGDVAQDAQQSAGAQQEAEsQEVGEPQQEAAAAEQGSSAAESDTQQSS ...MARALSLVALGLLFSLPPSSAVRTRVPGEDSFSPESGVLSGTDAPERRP..............lVPGLVEGNCGRLTVRNGLSVDETIKVTSAGWTKSERDFIVSLVADETRKVVQLRESEGASGASGPGPAPAEKPPSGQGSAEEAPKGEGG.QEKPSVPLIAVRIHGSGGDKGESAPQSAVLLYGNDESEPTEVPLETAAGPTTPLMVLITQQNPKEVEVRVLAWIST.......................DATTGKGSWKENSVVVGSSLSGRDLTVNLSDCGPSSLRVYGSASADLVTVKEGMCEADDPELIALTRPHTSAASPLPAEEGDVAQDAQQSAGAQQEAEAQEVGEPQQEAsAAEQGSSAAESDTQQSS......... 0 0 0 0 +6499 PF06671 DUF1174 Repeat of unknown function (DUF1174) Moxon SJ anon Pfam-B_22180 (release 10.0) Repeat This family consists of a number of Caenorhabditis elegans specific repeats of around 36 residues in length which are found in two hypothetical proteins. This family is found in conjunction with Pfam:PF00024. 19.70 10.10 220.50 10.10 15.30 10.00 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.33 0.72 -4.31 36 528 2009-01-15 18:05:59 2003-07-28 12:22:07 6 7 5 0 394 512 0 23.10 67 46.28 CHANGED SGEETTTuAVTEASG...EETTTu...AVTEu .....sctsTs..uA.......VTEASG...EE.oTTu...AsTEu........ 0 67 130 394 +6500 PF06672 DUF1175 Protein of unknown function (DUF1175) Moxon SJ anon Pfam-B_21722 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 210 residues in length. The function of this family is unknown. 20.60 20.60 20.60 20.60 20.50 20.30 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.42 0.70 -5.00 13 358 2012-10-10 12:56:15 2003-07-28 13:11:22 6 3 342 0 32 141 11 193.80 79 93.81 CHANGED .hhhthpspssstcucs...thLssppStsFRsWFVRIAppQlRps.ss+W..cp+DCAGLVRFAspEAL+tHDucWhcssGhs........s+Yh.PchsL.usphhhtpphp.....pssGp..husassAhsLlppNo+Flu+D...lspApPGDL.lFFcQtDsp...HLMIahGc.........allYHTGshsc.s-stlRsVolppLhpa.DscWpPhssNPsFlGlYRhsFLs ............................tlLhLlsalsCsV.............AHS...EMLNVEQSGLFRAWFVRIAQEQLRQG.PSPRW..YQQDCAGLVRFAANEs.LKVHDSKWLKSNGhS..................SQYLPPEMTLTPpQRQLAQNWN.....QGNGK..TGPYVTAINLIQYNSQFIGQD...INQALPGDM.IFFD.QGDAQ....HL.MVWMGR.........YVIYHTGS........A...TK.T..........DNGM.RAVSLQQLMTWKDTRWIPNDSNPNFIGIYRLNFL.A...................... 0 10 15 25 +6501 PF06673 L_lactis_ph-MCP Lactococcus lactis bacteriophage major capsid protein Moxon SJ anon Pfam-B_21754 (release 10.0) Family This family consists of several Lactococcus lactis bacteriophage major capsid proteins. 19.40 19.40 20.10 19.60 18.00 18.80 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.11 0.70 -5.67 2 7 2012-10-03 06:22:39 2003-07-28 13:15:30 6 1 7 0 0 9 0 346.70 95 81.31 CHANGED MGsNETQEhMKQAIEAGVKVRELEsKVEELNKEREELKKEREAsIPSEKPpDsERKFMRELGsKMsEMPEQGFLREFANuusLNVVNSLGSITSKYARKSGIYDGAMKARFQGLTLAEDGVDDTFIpGTFKAGTDKNKuQTAoKRSLRPQMAEAYLQMDKATVRGVNDSGALSEYVMSEMVNRVIQKVEaNMILGSsDGSNGFYGLKTATDGWTKQIEYTDLF-GITDAVAECSISDAITIVMSPQTFAELRKAKGTDGHSRFNELATKEQIAQSFGAVNLETRVWMPKDEVAVYNHDEYVLIGDLNVENYNDFDLRYNVEQWLSETLVGGSIRGKNRSAYLpKKuS ................................MGANETQEIMKQAIEAGVKVRELEAKVEELNKEREELKK.EREASIPSEKPQDsERKFMRELGDKMsEMPEQGFLREFANuuDLNVVNS..LGSITSKYARKSGIYDGAMKARFQGLTLAEDGVDDTFIpGT.FKAGTDKNKuQ.TATKRSLRPQMAEAYLQ..MDKATVRG..VNDSG.ALSEYVMSEMVNRVIQKVEYNMILGSADGSNGFYGLKTATDGWTKQIEYTDLFEGITD..AVAECSISDAITIVMSPQTFAELRKhKGoDGHuRFNELATKEQIAQSFGAVNLETRVWMPKDEVAVYNHDEYVLIGDLNhENYNDFDLRYNVEQWLSETLVGGSIRGKNRSAYLKKKuS.......... 0 0 0 0 +6502 PF06674 DUF1176 Protein of unknown function (DUF1176) Moxon SJ anon Pfam-B_21791 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 340 residues in length. Members of this family contain six highly conserved cysteine residues. The function of this family is unknown. 34.80 34.80 35.00 35.10 34.50 34.70 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.28 0.70 -5.33 26 714 2009-09-11 20:41:29 2003-07-28 13:20:13 6 5 643 0 80 389 14 321.60 47 92.18 CHANGED lshsshsss.....shthpacDWpVsCDNspsCpAsuhsscp.s.........lslhlpRpAGssssl.phclph.up.tts.........stthpLhlDGcshuhhssphptt.t..................phsssplsAlLpslppupplslt..sust.......hplShsGhsAuLLhhD-hQuRlGTssAllcK..GspPssuVssAsshPsl..ssssssst.....shstpptpthtpssh..........hpsspsp..tttssh..pst................lstLssspsLlhhsC.hsGAYN.ps.shWllscp.............tshpsphlshp..................sssas..suplsshtKG...RGluD.CGstspWha....DGpp..FhLsptsppupCcshtuus.....sW.hsahsp ............................................................hs..hphlWAA......PuQ+sFuDWQVTCNNQNFCVARNsG-HpG..........LVMTLSRSAGA+TDA.lLRI-t...GGltsPcAp.....cutIAPRLL.LDGcPLu.h..sus+W+loPahLh...............TsDsATITAFLQhIQ-.up.AITL+..sGsQ........TlSLuGLKAALLFIDAQQKRVGSETAWI+K..GsEPPLSVPPAPALKpV..Al.lNPTPs.....PLS.E.ERsDLLDYus............WRhNGhc..CS..LDPLRREsp.................VoALTDDKA..LhhlsC..EAG...AYN.TIDLAWlVSRK.............Ksh.so.RsVRLRL.......PFssutEos-........hELMNAsFDEKoRE..L..V..TLAKG...RGLuD.CGIQsRWRa....DGpR....F.RLVRYAsEPoCDsWHGPD....AWPTLWIT.R....................................... 0 9 26 58 +6503 PF06675 DUF1177 Protein of unknown function (DUF1177) Moxon SJ anon Pfam-B_21818 (release 10.0) Family This family consists of several hypothetical archaeal and and bacterial proteins of around 300 residues in length. The function of this family is unknown. 25.00 25.00 63.10 62.90 19.80 19.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.76 0.70 -5.81 22 133 2009-01-15 18:05:59 2003-07-28 13:23:24 6 2 116 0 50 126 4 262.00 54 88.23 CHANGED clpplss......pGsTDFl+lhlsGptG+..GGsAPTlGIlGRLGGIGARPpphGLVSDADGAlsAlAsALKLhcMpc+GDhLsGDVllsTHICssAsshPHcPVsFMsSPVshspMN+pEVcPph-AILSIDTTKGNRllpa+GFAIoPTlKEGYIL+lS--LlcIhphsTG+.shshPlTpQDITPYsNslYHlNSIlQPs.lsTsuPVVGVAITucssVPGsuTGAoc.sDl-pAuRFslEVAKtaspGpscFYDppEatcLpphYGshspLQphGcp ...................p.hpplpup.....pGsTDFl+lhIPGppG+.tGGsAPTLGIlGRLGGIGARPphlGhVSDuDGAlsAlAsAhKLhcMpp+GDhLsGDVllsTHICPcAPTpPHcPVsFMsSPl-htsMNcpEVs.sphDAILSlDTTKGNRllN++GaAlSPTVK-GYIL+lS--LLclhphsTGc.shsaPlTpQDITPYGNGlaHlNSIlQPu.sATsAPVVGVAITopssVPGCuTGAoH.sDlstAsRFslElAKtFGpGpspFYDppEappLhphYGshs+LQshGp.t.... 0 12 25 39 +6504 PF06676 DUF1178 Protein of unknown function (DUF1178) Moxon SJ anon Pfam-B_21872 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. 26.50 26.50 26.90 30.90 24.20 26.40 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.99 0.71 -4.00 79 438 2009-09-13 03:22:43 2003-07-28 13:25:38 6 3 420 0 154 372 1244 143.00 40 98.30 CHANGED MIhasLpCs.psHpFEuWFtSussF-sQtspGLVsCPhCGSspVpKslMAPplssuppt.....................tssPssssssts............................pttltclpccltpso-.VGccFAcEARchHh.GEsscRsIaGcAos-EActLlEEGIsVhPL..P.....hsscpp...s ...........MIhasLpCs.pu.HpFEGWFtSus-F-pQtpptLVsCPsCGospVpKtlMAPplstupsptt...............................................s.sh.t.t....................................................ttl.tthtpht+pVhpss-.VG-+FA-EAR+IHa.GE.s..tRuIhGpAos-EscsLhEEGI-lhPL..P...h.st...t.................... 0 37 91 118 +6505 PF06677 Auto_anti-p27 Sjogren's syndrome/scleroderma autoantigen 1 (Autoantigen p27) Moxon SJ anon Pfam-B_21881 (release 10.0) Family This family consists of several Sjogren's syndrome/scleroderma autoantigen 1 (Autoantigen p27) sequences. It is thought that the potential association of anti-p27 with anti-centromere antibodies suggests that autoantigen p27 might play a role in mitosis [1]. 23.20 23.20 23.20 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.23 0.72 -4.19 14 282 2012-10-03 10:42:43 2003-07-28 13:32:39 7 6 238 0 193 271 18 40.90 37 20.80 CHANGED cl.cphuchLhpGApMLscpCspC.GsPLFc..KsGclaCPsCp ....t.h.phhuchLLpGhpMLscpCs..p..C....G..sPLh.......c.......+.p.G.c.h....h...CssCp....... 0 83 124 160 +6506 PF06678 DUF1179 Protein of unknown function (DUF1179) Moxon SJ anon Pfam-B_21899 (release 10.0) Family This family consists of several hypothetical Caenorhabditis elegans proteins of around 106 residues in length. The function of the family is unknown. 25.00 25.00 27.10 26.00 24.00 23.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.54 0.72 -3.72 4 16 2009-09-10 23:27:31 2003-07-28 13:37:01 6 1 4 0 15 15 0 97.00 34 92.33 CHANGED MaSFshIhcsIsL.FGs.FLLlssllpCp..SKKKF+sDt+scLV.....uPhppssscpspt.tsPsuQsPPs+hPhEpospht.....-DTLANV+SlPPcpp.t.ct.chKpKp .......................h.hlhpplhL.....hlLhhsll.pCp..pKKKFcsDt+ApLV.....sPss.sssc....ssps.psPsuQ.........sPPs+pPlEposp.t.....EDTLANV+SlPPEKS...pp.chKh..t....... 0 7 7 15 +6507 PF06679 DUF1180 Protein of unknown function (DUF1180) Moxon SJ anon Pfam-B_21907 (release 10.0) Family This family consists of several hypothetical mammalian proteins of around 190 residues in length. The function of this family is unknown. 24.80 24.80 24.80 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.14 0.71 -4.35 7 117 2009-01-15 18:05:59 2003-07-28 13:39:38 7 1 66 0 80 126 0 144.30 30 90.58 CHANGED M......uhhs.h.ssshLLhLLs.L...sulhpcAssAss.....pPtspp.s..PPuPosu.......................Psuh..hts.tGssussSsGusLss..sss..spPhhpRALhVhsshSAhlllYFllRshRhR+Rp+KTRKYGVlcTshtshEhsPL.EpDDEDDD.TlFDhp..RR .......................................................................................................h.t......................................................................................ssst.......................................sst..................tstt...s.p...s.s.s.ssslhsp.s..s.......h......-h...s.h.hp...pul..hVlhuhouhlllhhll....R.shRh+....+.+t.......+K..oR........+YGllsss...-shEhs..PL...-pD...DE--DpTlFDsph........................................................ 0 15 19 43 +6508 PF06680 DUF1181 Protein of unknown function (DUF1181) Moxon SJ anon Pfam-B_21912 (release 10.0) Family This family consists of several hypothetical proteins of around 120 residues in length which are found specifically in Trypanosoma brucei. The function of this family is unknown. 25.00 25.00 167.70 167.00 17.70 16.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.69 0.71 -4.43 2 14 2009-01-15 18:05:59 2003-07-28 13:41:54 6 1 3 0 10 14 0 112.40 87 82.70 CHANGED MFQLVSssEVNKAYVSYPGGVSMCLRFPFCHCVWAHAMTLIEISGHYHRWVARGtSEDWDYSNSFVVVCsVLLENIAssEREGKCHLTFHAATSMH+sYMLVALpGKsVKAKVSFRF+EV ....MFQLVSsAEVNKAaVSYPGGVSMCLRFPFCHCVWAHAMTLIEISGHYH+WVARGESEDWDYSNSFVVVCsVLLENIAsNEREGKCHLTFHAATSMH+sYMLVALpGKVVKAKVSFRFKEV. 0 0 10 10 +6509 PF06681 DUF1182 Protein of unknown function (DUF1182) Moxon SJ anon Pfam-B_21992 (release 10.0) Family This family consists of several hypothetical proteins of around 360 residues in length and seems to be specific to Caenorhabditis elegans. The function of this family is unknown. 25.00 25.00 26.60 26.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.66 0.70 -5.14 2 21 2012-10-03 04:04:29 2003-07-28 13:44:35 8 1 6 0 20 18 0 203.70 36 63.80 CHANGED h.N.......................NhsLsslTsTPhTYRDRIhhEF.lpuTshlFshhLsIFhhhR.tlhhohKsTIhFVTLGoFlLslPLhhhQhahVh.L.uhhpPhYTlhVCohlKshsSSTTSshQVLPhAVulYRYhhVVhpt+.ssWFVlsVH.IlohIFhlhAhLNaPhGE.ppND.ChsLRFSpuMEhVRI..TLhhNhhAlhlNhsIhpFVK+a- .................h...................................s.ThTshpaRDRIhhEahlpssshlhshhLsIF.lhh+.plhhphK..............sTIhFVTlGoFlLslPLhlhQsahVhhL.ss+pPpY.o.lhlCohlKshsSuTT.SstQVLPhs................VulYRYhlVVhct+hs.saFVlsVHsllohlFhlhAhLNaPhG-.ppNDhChhLRFSpuMEhVRI......TLhhN...hhAlllNh..sIhpFVK+a-...................................................................... 0 9 10 20 +6510 PF06682 DUF1183 Protein of unknown function (DUF1183) Moxon SJ anon Pfam-B_22014 (release 10.0) Family This family consists of several eukaryotic proteins of around 360 residues in length. The function of this family is unknown. 30.80 30.50 40.90 36.50 29.70 30.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.27 0.70 -4.82 14 202 2009-01-15 18:05:59 2003-07-28 13:50:05 7 6 157 0 140 197 2 268.30 35 85.91 CHANGED h.hLlaLLlh.......stssps..hsspc.....tlLLpcVpsLTLhtsRhTouRRsuPlPQLKClGGSu.tCptapPcV..lQCtNpG..aDG.DVQWpCcA.sLspsa+hGps-VsCEGYcts-DtYlL+GSCGlEYpLpLTEtGccKhspth.pph.sthps.........................ps.ppt.t.....sshlhhlhhlhllAahlYthhhps.t.pttshsspsGhss...................tsss.tsts..uPPPPh.p........sshsss.s.sstssss.u...........tstts......tPGFW..TGhusGu..hhGYLhGppps..........pphstshss...........ta....ss.t..sptoupussusssou...T+ouSGaGuT+RR .......................h........................t...........ts.....tlLLpcVpsLTLp.t.s.+hTouRRssslPQLpCl........Gs..ot.hC...s..h...apP.cV..hpCpNpG...............aDs...DVQWpCpA......s......Ls...tpa+h..G..p.ssVsCEGYcts-DsYlL+GSCGlEYp.......LthTchGt.p+htp.h.t.t.th........................................................stt.............hshlhhl.hhlhslsahl...Yphhhts..t.t..........s.ttsshst.......................................hhtt..s..s..s..sPP..PPh............................t.ss.......st.............................tt.ts......tPGFW..oGhusGu....hhGYhhGpppt.................................t...s..s...............h...........st.....................s..s......s.t..ustsss.....p+puoGaGuTpRR...................................................................... 0 55 76 110 +6511 PF06683 DUF1184 Protein of unknown function (DUF1184) Vella Briffa B anon Pfam-B_16725 (release 10.0) Family This family contains a number of hypothetical proteins of unknown function from Arabidopsis thaliana. 25.00 25.00 26.90 39.90 22.70 23.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.26 0.71 -4.67 4 36 2009-01-15 18:05:59 2003-07-28 14:53:55 6 2 2 0 29 36 0 122.20 37 85.12 CHANGED R..pNK+.LppSRYSPY.....luThcp........EpQKEEAIRLGVELSLFVAEAMFLLSDsL.................................aVhETYIKPKNGVY..................FshGlt.LspIV.IL.....pssplVKspsFc+hNQELKKLEE+LRSsK-sSEANGFsREtI+SsILpLWKSLFEso..hh.s.KshsLEhF ................................................................................pp.K-EslpLGV-LSlalAEuMhlLsDsl.........................................................................................................h.thhpp..hppl-pplts.p.h.......R-.hc.ph..hWcp.h.................................................. 0 9 9 9 +6512 PF06684 AA_synth DUF1185; Amino acid synthesis Moxon SJ, Eberhardt R anon Pfam-B_22398 (release 10.0) Family This family of proteins is structurally similar to proteins with the Bacillus chorismate mutase-like (BCM-like) fold. This structure, combined with its genomic context, suggest that it has a role in amino acid synthesis [1]. 20.00 20.00 24.00 35.60 19.80 17.10 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.00 0.71 -4.91 55 331 2009-01-15 18:05:59 2003-07-28 15:01:51 6 3 227 4 131 326 890 173.60 46 89.66 CHANGED IRKhlshlE-hht.EGGtss......spPl+psussAVlc.NPaAGR.aVEDLpP.lhchut.LGthLspcllssLGs.scplEuYGKuAlVGssGElEHu.ALhHs.hhGtshR-sl...............................ssupullPuop+hGusGstlslPltHpssualRSHFsohchslsDAPpsDElllsLuhosGGRsHs......RlG ......................IRKhlshlE-ohh.EGG.+ss......spP.l.+hsussAVl+.NPaAG+...aVEDLpP.lhshus.LGphLsccllsslGs.uctlEuYGKAAlVGhsGElEHAuAllHs.+.....hGsthRcAl................................s...uKuhls.sspRGusGsslplPlhaKssthhRSHa.olphplsDAPtsDEIlVslususGGRsHsRlG.................. 0 20 56 86 +6513 PF06685 DUF1186 Protein of unknown function (DUF1186) Moxon SJ anon Pfam-B_22662 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 250 residues in length and is found in several Chlamydia and Anabaena species. The function of this family is unknown. 19.80 19.80 19.90 20.10 19.60 18.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.33 0.70 -5.21 7 84 2009-01-15 18:05:59 2003-07-28 15:06:29 6 3 79 0 27 61 0 224.90 33 79.09 CHANGED M..ptll.ph.h.pthh.c.tlcthl.p+pthhP.Ll.hLcph.pph.pllsctsa.hHlYAhaLLAQFREppAaPLIl+hhShst-..lhhclsGDllTEDLuRILASVssGclphIppLIEspsls.YVRuAAIpuLlsLVuptplSREplIpYFtpLhpt+Lc+csSa..lWsSLVsssssLhPtELh.pIp+AapssLl-shFIshEDVEp.lshpp.-psLpcLhps..apLIpDslt-hEpWhp.....shEs ..................................hppllppLhh.pp.shlPc-AlctAl..pppplsP.LLplL-cAhcc...ss....-lhs-ssh.....psh...l.aAhaLLAQF.REoRAhPLll+Lh.uhpp-...h.cslhG.DslTEDLu+ILASVs-.-hshlpcL...I-ssplssYV+sAAluuLstLlshpplsR-pslpaatpLl...shtL.ccp..sh......shssLl.s....shssLhstELh..IpcsFp...tsLl..-...h.l.sh.........-cV.p.ht.p...-t.ht.h..t......lp.................s.................................................................................................. 1 8 18 21 +6514 PF06686 SpoIIIAC Stage III sporulation protein AC/AD protein family Moxon SJ, TIGRFAMs, Coggill P, Bateman A anon Pfam-B_22771 (release 10.0), TIGRFAMs Family This family consists of several bacterial stage III sporulation protein AC (SpoIIIAC) and SpoIIIAD sequences. The exact function of this family is unknown. SpoIIIAD is the an uncharacterised protein which is part of the spoIIIA operon that acts at sporulation stage III as part of a cascade of events leading to endospore formation. The operon is regulated by sigmaG [1]. 21.00 21.00 21.20 21.50 19.00 20.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.60 0.72 -4.17 87 1045 2009-01-15 18:05:59 2003-07-28 15:12:29 6 3 398 0 251 634 10 57.50 29 83.45 CHANGED hIhKIsGlullsthlstllKcus.sphAhhlslsutllllhhslshlsslhpslpplh ....hlhKIsGluhlsthhsplhK..csG.ppshAthlpLsGtllILhhslshlsslhcslpslh....................... 0 135 210 225 +6515 PF06687 SUR7 SUR7/PalI family Moxon SJ, Coggill PC, Bateman A anon Pfam-B_22775 (release 10.0) Family This family consists of several fungal-specific SUR7 proteins. Its activity regulates expression of RVS161, a homologue of human endophilin, suggesting a function for both in endocytosis [1,2]. The protein carries four transmembrane domains and is thus likely to act as an anchoring protein for the eisosome to the plasma membrane. Eisosomes are the immobile protein complexes, that include the proteins Pil1 and Lsp1, which co-localise with sites of protein and lipid endocytosis at the plasma membrane. SUR7 protein may play a role in sporulation [2]. This family also includes PalI which is part of a pH signal transduction cascade. Based on the similarity of PalI to the yeast Rim9 meiotic signal transduction component it has been suggested that PalI might be a membrane sensor for ambient pH [4]. 24.10 24.10 24.10 24.30 24.00 24.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.57 0.70 -4.77 99 777 2012-10-03 00:20:40 2003-07-28 15:22:48 7 5 142 0 570 851 0 230.30 18 62.52 CHANGED hthlshll...hhsuhlhhlhssluss..t.........lpphahhph.sstth...................................................................................................................................t........................hpauhaGaCts.......................................t.pstpCop.spsuashssh..p.ht.p......................................thslPsshps......t......h...hphhhhhhlluhh...hohlshlh.hl.h...........................................uhhtt............................................................hhshlshlhshluh..lhshlusslssshalh..hht.hpp...........husphhshsWhslussllshlh .......................................................................................................................................................h..hhshhhhhsuhlhhl.....l.shluss......s.......lpshhhhph..shtt.h................................................................................................................................................................................................................................................................................................................................................................................................................................t.h..ph..................hphulauaCts.........................................t.tt.ss.C.....op..spss.a.s.hssh......p.http.......................................thsl...P.s..shps............p...h..h.....hph.....hh........shhhluhh....hshl...shlh.sh..h................................shhhp........................................................................hhsh...h...s..........hlhshluh.lhshlushls....ss.h.as.h...httthppt...............htsphGsp..h..h.sh..hWhuh...shsllshh.h..................................... 0 135 305 487 +6516 PF06688 DUF1187 Protein of unknown function (DUF1187) Moxon SJ anon Pfam-B_22781 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of around 62 residues in length. Members of this family are found in Escherichia coli and Salmonella typhi. The function of this family is unknown. 20.70 20.70 22.20 40.90 19.00 18.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.16 0.72 -3.85 2 125 2009-09-11 12:17:23 2003-07-28 15:25:53 6 1 113 0 2 55 0 58.80 68 86.50 CHANGED YpIsATIcK.GGsPspWT+YSc..hTtpECcK.hStcKEAGhshp.cV+l.sF.Cccl.sp .YKITATIEKEGGTPTNWTRYSKSKLTKSECEKMLSGKKEAGVS.REQK.VKLINFNCEKL.SS............. 0 1 1 1 +6517 PF06689 zf-C4_ClpX ClpX C4-type zinc finger Bateman A anon Pfam-B_465 (release 10.0) Domain The ClpX heat shock protein of Escherichia coli is a member of the universally conserved Hsp100 family of proteins, and possesses a putative zinc finger motif of the C4 type. This presumed zinc binding domain is found at the N-terminus of the ClpX protein. ClpX is an ATPase which functions both as a substrate specificity component of the ClpXP protease and as a molecular chaperone. The molecular function of this domain is now known. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.41 0.72 -4.44 130 4410 2009-01-15 18:05:59 2003-07-28 15:29:21 8 8 4219 9 955 2727 1965 40.10 59 9.68 CHANGED phpCSFCGKspscVc+LI.AGs.s...saICcEClclspcIlpc- .........h.lhCSFCGKoQc...-...V...+KL......I.....A..G.P.u.........VaIC....DECl-LCs-IIcEE....... 0 326 634 816 +6518 PF06690 DUF1188 Protein of unknown function (DUF1188) Moxon SJ anon Pfam-B_22902 (release 10.0) Family This family consists of several hypothetical archaeal proteins of around 260 residues in length which seem to be specific to Methanobacterium, Methanococcus and Methanopyrus species. The function of this family is unknown. 26.60 26.60 27.20 60.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.64 0.70 -5.24 8 32 2009-01-15 18:05:59 2003-07-28 15:31:43 6 2 30 0 21 29 2 244.00 40 96.65 CHANGED hchGITEoVKTlpS+l+ltDIlp-IucKKAsAIptaLEsEcFc..pAlIFGuYLoGualApsLsKcs.EVhlVDIpPaL+cll..spsI+Fh.......chhsshss.s.DLlVDLTGLGGlsP-hLS+LsPcVLIVEDPpGsh.DtsIpphsNTpERLs...pusK+GlL+TactuhsoKTSGTMTLoV-slp-uss-lpElDGVLYAlPsL+aaEtlLF+.E+DhcpFLsclspPAlTVSSLc-...h-sDElLpcNluhIpShVcE ................chGITEpVKThcSch+lhDIlpcIscKKupAlp.aL-utchc.......psllhGuYLoGsalAptLpc.t.pcVhllDIpPal+pll..spsl..cFh.........chhps..ph..s.DLllDhTGlGGl..ss-hLuchs.scVhIVEDPpush.DptIhchsNT.cRls....sutc+GlL+Ta..uhhSKTSGTMTLTl-slhcusp-lpcl-GVLYulssLcaaEtllF+.E+DhcKFlsplstPAlTlSolsc......-s-cllppNlscIpShl....................................... 0 6 11 17 +6519 PF06691 DUF1189 Protein of unknown function (DUF1189) Moxon SJ anon Pfam-B_22923 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 260 residues in length. The function of this family is unknown. 20.70 20.70 20.80 21.00 20.50 20.30 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.49 0.70 -5.06 12 358 2009-01-15 18:05:59 2003-07-28 15:44:05 6 2 333 0 41 223 1 238.10 27 93.78 CHANGED MNlFpphhKSlYSP+DIAthRFQuIGKoIlYlhLLollsslPssYahsoslppuhsshppslpcDlPsFpIpsGcL....ps-tppsIphppsshsIlFDsosohpscp....ltsppsululLK-chll.sssGpsQphsYshhss.slsKcDlhshlsphculh..llslhslllalhssAhpFIpVolLAhIGhllpshtp+pLsY+plWploAYSlTLsTVFFsIMcsLphsVPsshhlsahVshIlLaLslKElP ...............................phhp.hhh.uh.a.sspchtph+..ht.thhKsIlYlhlLshlhslP........hshhhh....sslp........pslphspptIsc......clP.D.FpIc.NGpL.......p.s.c.tp.ps..h..h..p.ps...........sh....l...hs....FD.P.s..up.hsppp............l...s....pt.p....u....l...hhL...p.Dchll...hs..........s.......G.....t.o...p.o........h.........sYs.........p...hh...h...........slsp...psl.p....s...hl.s..t..h.....csh.....h........h.hlh.l.hh.llla...lh.plhhhF...l....s....l..h.l...lA.h.l..G...t.h.....h.u.th........p..p....pl.sa..tp..shplssYuhTlPslhhhIhph......h..p.....hh..hssuhhlh..hhlshlhha.lsh+p...................................................... 1 16 31 35 +6520 PF06692 MNSV_P7B Melon necrotic spot virus P7B protein Moxon SJ anon Pfam-B_22334 (release 10.0) Family This family consists of several Melon necrotic spot virus (MNSV) P7B proteins. The function of this family is unknown. 20.80 20.80 22.60 22.10 19.40 19.40 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -8.99 0.72 -4.19 2 21 2009-01-15 18:05:59 2003-07-28 15:45:34 6 2 3 0 0 21 0 60.50 83 73.55 CHANGED MACY+CDSSPGDYSGALLILFISFVhFhITSLSPQGNTYVHHFDNSSlKTQYVGISTNGDG .........MAChRCDSSPGDYSGALLILFISFVFFYITSLSPQGNTYVHHFDNSSVKTQYVGISTNGDG... 0 0 0 0 +6521 PF06693 DUF1190 Protein of unknown function (DUF1190) Moxon SJ anon Pfam-B_22972 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 212 residues in length and is known as YjfM in Escherichia coli. The function of this family is unknown. 21.80 21.80 22.40 22.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.35 0.71 -4.19 26 1138 2009-01-15 18:05:59 2003-07-28 16:26:18 6 2 707 0 103 463 18 162.00 45 76.05 CHANGED cscps...sslapsss-Chpsssshst.....pCpsAYppAhscttcsAP+YsoptsCcu-FGtspCst.......................spsppu...............Gt..ahPhMuGahhuph...hsssst.h....ssQPlapots.....t....s.tspahsusGpshssuhpss...pshssscs........shtspssso+sl..SRGGFG..posuupouhGu ................................sDtssohYpsssDC.ssssst.us...........................CssuaNNAhpEt.psAPKasTp-sC.ucFG.tpC...............................................pspppS...................WhPlhuG.ahhuRl.............hts..s.t..........spQslasSts.su............suhshapssoGc..hstutsG.........+p...h.sscT........................uhus..tss.....T..s.....oTV....oRGGFG.cSsutpupht............................................................. 0 15 40 66 +6522 PF06694 Plant_NMP1 Plant nuclear matrix protein 1 (NMP1) Moxon SJ anon Pfam-B_22388 (release 10.0) Family This family consists of several plant specific nuclear matrix protein 1 (NMP1) sequences. Nuclear Matrix Protein 1 is a ubiquitously expressed 36 kDa protein, which has no homologues in animals and fungi, but is highly conserved among flowering and non-flowering plants. NMP1 is located both in the cytoplasm and nucleus and that the nuclear fraction is associated with the nuclear matrix. NMP1 is a candidate for a plant-specific structural protein with a function both in the nucleus and cytoplasm [1]. 25.00 25.00 25.50 25.50 20.90 24.80 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -11.89 0.70 -5.50 2 45 2009-01-15 18:05:59 2003-07-28 16:31:52 6 4 27 0 22 47 0 238.30 52 89.86 CHANGED MAuKQMEEIQ+KLuhLpYPRANAPAQSLLFAGhERYtLLEWLFF+LLGD+SPFoQQNhQGDulDRDEEssRIQaLAEIApFLGITsosDsEAIQGRGSYE-RhEhL+LIVDLVEAShYADNPEWSVDcQltKDlQLlDuIAEKQuQIFSEECKLFPADVQIQSIYPLPDIu-LEhpLS-.op+h.sLQpMVp-LASKasYNPsE-Ys-sEhKLRtaLpSFL-Ts+oFNTIYTKEI+PWTHMMEVPQLHGFGPAANRLLEAYphLhKFLGNL+sLRDSasAhAAGS..osusEPSSVT+IIS-CEoALThLN+sLuILSsSlAREp ...........................................M-.lppKL..LtYsRupssuQpLLaA.GhERYtLL-WLFF+LLGD+SP.FoQQshQG-.uh.RDEEssRlQhL..hcIAph............LGlssphD.-sIpGcsoacpphthlp.I......l-lV...-ushhu-N...EaS..........lD-QhtKDlpLlDuIu..E+Qu...lFS-..ECKLFPsD.V..Q.I.............Q.....S..h..slPDls-LE.phuc.s+h.......h.....sLQphVp-LAuKas.Y.NP.sE-as.EsE.p.L+tpLpoFL-os+sFNhIYoKEI+PWTHhMElPQLHGhGPAANRLLEuYphLLKFLuNL+sLRDSasAhuhGS...........s......ustPSSVo+IlS-CEsALThLNcsLuILSsSlAR-..................................... 0 6 17 20 +6523 PF06695 Sm_multidrug_ex sm_multidrug_ex; Putative small multi-drug export protein Vella Briffa B anon Pfam-B_16747 (release 10.0) Family This family contains a small number of putative small multi-drug export proteins. 26.10 26.10 26.20 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.52 0.71 -3.95 36 377 2009-01-15 18:05:59 2003-07-28 16:38:41 6 1 337 0 147 343 71 120.40 33 70.77 CHANGED ELRGAIPhul.shGls.hpuhllullGN.lLPl..PhlLhhlchlhpahpc.hthhpt....lhshlhc+scc.pspplc+a...u..alGLhlFVAIPLPGTGsWTGuLsAhlLslch+cuhhAlhlGlllAull ....................ELRGAlPhul..sh....G.....ls....h.tuhllu...l.lGN...hlPl.............Phlhhhhcplh..pa........htc....p..h..htt..........hhs.h...hhc+u...cc...tsppl..c+h...u....ahGLhLFVuIPL.PGTGAWTGsLhAslLshch+puhhAlhlGlllAulI........................................................ 0 64 112 132 +6524 PF06696 Strep_SA_rep Streptococcal surface antigen repeat Moxon SJ anon Pfam-B_22674 (release 10.0) Repeat This family consists of a number of ~25 residue long repeats found commonly in Streptococcal surface antigens although one copy is present in the HPSR2-heavy chain potential motor protein of Giardia lamblia (Swiss:Q24984). This family is often found in conjunction with Pfam:PF00746. 21.70 21.70 21.70 21.70 21.40 21.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.60 0.72 -6.88 0.72 -4.10 32 550 2009-01-15 18:05:59 2003-07-28 16:42:00 6 17 71 12 39 523 0 24.90 51 8.42 CHANGED AsYpAtLupYps-LAplQKsNu-tc ..AsYpApLupYps-LA+VQKsNA-sc... 0 3 11 25 +6525 PF06697 DUF1191 Protein of unknown function (DUF1191) Vella Briffa B anon Pfam-B_16754 (release 10.0) Family This family contains hypothetical plant proteins of unknown function. 26.40 26.40 26.40 35.50 25.90 26.30 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.99 0.70 -5.59 11 98 2009-01-15 18:05:59 2003-07-28 16:46:46 7 4 18 0 70 84 0 258.40 34 85.36 CHANGED po..pttslpsuphLDshlpDauh...Ruhp.+h+TGllYpl.sLPuNL..SGIchussRlRuGSLRRpGs.cas...-FslPsGlsVpPhscRlllVhpNLG.NaSs.lY...Ysls.G..YcllSPVLGLLsYsAsspusssp......pls.lhus.tsPIplsF..s.hsssstss.......usshCssFs.sG...sss.hss..ss.sCtsp.c.pG...HaulVl.ssps.tss..st..................ttp.ttW+.....hhsuhssGshlLGlL..llstls+hK+...+p+hc-MERcA.psE..sLc.suhVG+oRA.PsAssoRTpP .....................t.......tsscsLDthlp-hAh......+uh.....p..+TGhlass...plP.uNL......oGl..clu..slRlRuGSLhppGs...pat....pFtlPstlhs.......pPhs.c.RlslVapsLG...NaSs.hY...Y.sls..G..YphluPVlGLhsYsusshssss.......pls.lhus..tsPIhlpF..sshpssst..s........ssspClsFshsG......psp...hp..shsssssChsp.p.pG..HaulVV.psps..ss.sss..........................................ptp.phWh.....hls....u....h....ssGhh.h...LslL...s..hl.lh.hhs..+h++.cp+hppMERpA.psEsLp.hshVGco+A.PsAssTRTpP.......... 0 8 45 56 +6526 PF06698 DUF1192 Protein of unknown function (DUF1192) Moxon SJ anon Pfam-B_22780 (release 10.0) Family This family consists of several short, hypothetical, bacterial proteins of around 60 residues in length. The function of this family is unknown. 24.30 24.30 24.40 24.40 23.80 24.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.86 0.72 -4.16 27 170 2009-01-15 18:05:59 2003-07-28 16:48:47 6 1 169 0 63 127 143 58.50 47 83.82 CHANGED DD.-hP+p+ss...p.lupD.LohLSV-ELppRIuhLpuEIsRlcsthspKpAs+sAA-ulF+ ...................D-.Dhs+.+su.......t.lupD.LulLSVsELcpRIALLpuEItRLcA-ht+KssoRuAAEALF+.... 0 17 37 47 +6527 PF06699 PIG-F GPI biosynthesis protein family Pig-F Wood V, Studholme DJ, Finn RD anon Pfam-B_8602 (release 8.0) Family PIG-F is involved in glycosylphosphatidylinositol (GPI) anchor biosynthesis [1-3]. 21.70 21.70 21.80 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.67 0.71 -4.36 25 274 2009-01-15 18:05:59 2003-07-28 17:00:18 6 13 240 0 190 266 2 180.90 27 70.52 CHANGED sallshhhlhlhhh..hhhh.hht...sshssLth.slhshslhpllYhhlp...hsssspp...............spp.................pshhhtlhull...lslllosPhhhhlllLaGAPl.lpplhcTaLhulHhShLshhPlhslhusshps..WpclFshpt.hs...hht.....sslssllGuWlGulPIPLDWDRPWQpWPIollsGAalGhhlGh.hlushh ...................................................................................................................h.........h.h.......h....ht....s.h.hhh....h..hhh..hha.hhlp....sststt.......................................................................................thh.thhphh...lhhlhus.hhthlhlLaGAPl..h.ph..hhc....Thlh.....uhhhohLsh..h.Ph.....hh.....lhGs...........s.hps..................Whpl.h....sh..t...t.shs...............hht................sslusllGA.WlGAhPIPLDWDRsWQtWPlshshGuhhGhhlGhhlu...h............................. 0 53 97 153 +6529 PF06701 MIB_HERC2 Mib_herc2 Artzt K, Studholme DJ anon Pfam-B_6026 (release 8.0) Domain Named "mib/herc2 domain" in [1]. Usually the protein also contains an E3 ligase domain (either Ring or Hect). 19.40 19.40 19.50 19.80 19.30 19.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.46 0.72 -4.01 9 640 2009-01-15 18:05:59 2003-07-28 18:16:04 8 155 98 2 410 595 5 64.20 47 5.03 CHANGED GsRVVRGsDW+Ws-QDGs.uthGpVhp.......tupcuhlsVpWDsGspNsYRhGhcGpaDL+ls-ss .....GsRVVRGsDW.....c..W..sc.QDGGpG...p......hGpVsp.......................................t..s..sc........uhlsVpW.D.sG.s.p.ss..YRhGhc.Gt..aDL+lhs..................... 1 148 173 300 +6530 PF06702 DUF1193 Protein of unknown function (DUF1193) Vella Briffa B anon Pfam-B_16766 (release 10.0) Family This family represents the C-terminus of several hypothetical eukaryotic proteins of unknown function. Family members contain two conserved motifs: DRHHYE and QCC, as well as a number of conserved cysteine residues. 23.60 23.60 23.60 23.60 23.50 23.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.56 0.70 -4.93 19 305 2009-01-15 18:05:59 2003-07-29 09:31:20 7 7 92 0 209 297 0 201.40 42 44.38 CHANGED Fhhssu..............sNhCFaGcC.YYCcs-aAlCG..ps-hlEGSlshaLPs..hhshppaRsPWpRoYpcscp.AcWEssssYCss.VKpp.PYDpGtRLLDlIDhulFD...................FLIu.........Nh.......DRHHYEoFpchsst.......shl.............................................laLDNu+uFG+ss+DEhSIL..APLhQC.ChlR+STh.RLpllp..s.ttLocshcEuhtpD....lsPlLscsHlpAl-RRLthlhpslcpChcppG..tpVl.pD ............................................................hosusNhCFaucC..YYCpppcslCG..ps..c..hEGSlshaLPs....hh.htphRpPWtRoYpc.tph.A....pWEh.....s...sYCpt...V+ph.P...Y..s..pu..RLLDlh...DhslFD...................F.LhG....................NhDRHHYE..sF..pt..s.st................shl...................................................................laLDNu+G.F...Gp.spD..EhSI.L...uP.....L.QC.C............h...........IR+STh.+LphLtt...s.....htLopl..hccuh.t.pD......lsP...l...L...s-....sHLpALDRRLthlLpsVcpChcp......G...ttVh............................... 0 51 67 138 +6531 PF06703 SPC25 Microsomal signal peptidase 25 kDa subunit (SPC25) Moxon SJ anon Pfam-B_22374 (release 10.0) Family This family consists of several microsomal signal peptidase 25 kDa subunit proteins. Translocation of polypeptide chains across the endoplasmic reticulum (ER) membrane is triggered by signal sequences. Subsequently, signal recognition particle interacts with its membrane receptor and the ribosome-bound nascent chain is targeted to the ER where it is transferred into a protein-conducting channel. At some point, a second signal sequence recognition event takes place in the membrane and translocation of the nascent chain through the membrane occurs. The signal sequence of most secretory and membrane proteins is cleaved off at this stage. Cleavage occurs by the signal peptidase complex (SPC) as soon as the lumenal domain of the translocating polypeptide is large enough to expose its cleavage site to the enzyme. The signal peptidase complex is possibly also involved in proteolytic events in the ER membrane other than the processing of the signal sequence, for example the further digestion of the cleaved signal peptide or the degradation of membrane proteins. Mammalian signal peptidase is as a complex of five different polypeptide chains. This family represents the 25 kDa subunit (SPC25). 22.80 22.80 23.30 23.40 22.70 22.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.98 0.71 -4.66 32 365 2009-09-10 23:37:10 2003-07-29 10:05:20 6 7 290 0 231 339 3 149.90 27 76.60 CHANGED slKpshD-ulsshltph....G..........appsapLhDs+LhlGhsulslAshuhhhDh..hhsa.po+.shhhhsVshYhlLsslLhhashhhEKshlahuppct.....sscplpls...........ophpK.....asPhYpl..plshpcspstt.....p..hphpts.hscaFstsGhhhtshapphlsp.lssl ..............................tlKpslDDulpphLhpt.....t.....................atEsatLhDsRLhlshhushlAhhuhhaDh..hhsFspo+....shlhhsVh............s...........Y...Fl...h.ulLslasha.h..E.Kshhhhuhpct.............ssct.....hplu.............Sphc.+.............asshYpL..plshtsspstt..........pp..hphsps.hspa...........FcpsGhlh.shapp.lsphhp..h....................................... 0 84 131 191 +6533 PF06705 SF-assemblin SF-assemblin/beta giardin Moxon SJ anon Pfam-B_22934 (release 10.0) Family This family consists of several eukaryotic SF-assemblin and related beta giardin proteins. During mitosis the SF-assemblin-based cytoskeleton is reorganised; it divides in prophase and is reduced to two dot-like structures at each spindle pole in metaphase. During anaphase, the two dots present at each pole are connected again. In telophase there is an asymmetrical outgrowth of new fibres. It has been suggested that SF-assemblin is involved in re-establishing the microtubular root system characteristic of interphase cells after mitosis [1]. 30.00 30.00 30.30 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.61 0.70 -5.23 6 409 2009-01-15 18:05:59 2003-07-29 10:19:12 6 3 47 0 84 409 2 191.20 61 89.87 CHANGED ssousKLEHVsE+Fuuhas-lEpEKQsRRlsEuoRhpllpEuls+LEKulEAElKRRAESDKQlQsHFEuEl+sLpERptpQlsDhpsulKsAl-uLup+lpDLHoll+-ER-pRRsDIEHLAsSLVsKVNECVuAlDEERspRhpcpshhhK+luEDlhplpp+lDTEKssR-u-lSuLRoE.lH-sluNRNlsDEQFcshVLDElsulKuALshEREERlAEDDEIVQAlNDYT+ALQ-GL+lVss .........................................................................hcsR...RVDDDTRVKMIKDAIAHLD....RLIQTESRKRQuSFEDIRE...EVKKSADNMY.LTIKEEIDT.MAANFR.KSLAE.......MGDTLNNVETNLQNQIAI..HNDAI...AALRKEALK.SLNDLETGIATENAERKKMYDQLNEKVAEGFARISAAIE...KETIARERAVSAAT..TE.Ls.ssh............................................................................................. 0 59 69 80 +6534 PF06706 CTV_P6 Citrus tristeza virus 6-kDa protein Moxon SJ anon Pfam-B_22973 (release 10.0) Family This family consists of several Citrus tristeza virus (CTV) 6-kDa, 51 residue long hydrophobic (P6) proteins. The function of this family is unknown. 25.00 25.00 30.10 30.00 20.00 18.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.69 0.72 -4.48 2 15 2009-09-14 12:01:29 2003-07-29 10:22:59 6 1 2 \N 0 16 0 50.70 86 98.70 CHANGED MDCVIQGFLTFLVGIAVFsAFAtLIIIVITIYRCThKPVRsASPYGTHATl MDCVIQGFLTFLVGIAVF..SAFAGLIIIVITIYRCTTKPVRNsSPYGTHATV................. 0 0 0 0 +6535 PF06707 DUF1194 Protein of unknown function (DUF1194) Moxon SJ anon Pfam-B_22986 (release 10.0) Family This family consists of several hypothetical Rhizobiales specific proteins of around 270 residues in length. The function of this family is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.22 0.70 -5.35 35 226 2012-10-10 16:07:06 2003-07-29 10:26:08 6 6 125 0 81 566 155 199.90 36 75.78 CHANGED ss.DltLlLAVDVStSlDtpEhplQR-GaAsALssP-VhpAlhu.GshGclAlshhEWuGsspQplllsWolIcsspsApshAspltssPpttspt.TuIusAlsa.usshhspsshtuhRRVIDlSGDGssN....pG.sPs..tAR-tshstGl.sINGLsIhssss.s..........sLssYYpssVIuGPGAFVlsspsac-FscAlRRKLlhEluuhs ...........................................................................................s.DltLlLAlDVStSlDt...sEh..pl..Q..pcGh.A...t.A.L.p.s..sp.lh.pA.l.hs...u....G.plAlshhEWuu.spQ........plllsWphIss...tsupshAspl..tst.......s.....p.t.....tsph..TulusAlth.us.s.l.h..s.....ps.s....h.......uhR..+....VIDlSGDG..sN..........pG...sPs......tuRctsh..s.....p.G..l...sIN..GL...sIhspss...........................sLs.t.YYpspVIuGP......G.....AFVhsspshp.-FscAlRcKLlhElus.s.................................................. 1 15 38 51 +6536 PF06708 DUF1195 Protein of unknown function (DUF1195) Moxon SJ anon Pfam-B_22828 (release 10.0) Family This family consists of several plant specific hypothetical proteins of around 160 residues in length. The function of this family is unknown. 25.00 25.00 78.30 78.00 21.70 22.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.11 0.71 -4.80 7 56 2009-01-15 18:05:59 2003-07-29 10:29:54 6 2 19 0 25 53 0 141.80 53 85.58 CHANGED M+tsct...hPsoTsosssp....hsupppsuttu..hhG+GRYKhWALAAIhLLAhWSMhTGoVoLRWSuGsl...sphs-DlshPhhDDLDVLEMEEREKlV++MWDVYTpot..pl+LPRFWQEAFEAAYEELsSDsssVp-AAlSEIA+MS.lRplpl-sss.cSs .................................s..............thts....utsu..lhGKGRYKhWALAAIhLLAhWSMhsuoVoLRWS.u..GsL...sphssDlssPlh.DDLDsLEMEEREKlV++MWDVYTpop...clRLPRFWQEAFEAAYEELsuDs.ssVR-AAlSEIA+MS.l+plpl-.ss.p..p................. 0 3 14 20 +6537 PF06709 DUF1196 Protein of unknown function (DUF1196) Moxon SJ anon Pfam-B_23162 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 51 residues in length which seem to be specific to Vibrio cholerae. The function of this family is unknown. 21.70 21.70 24.90 24.70 19.40 18.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.76 0.72 -4.03 2 41 2009-01-15 18:05:59 2003-07-29 13:17:12 6 1 18 0 4 14 0 48.50 92 96.46 CHANGED MTVPLEAFVMCVFLMPTLPFKGVAKGIYAKQHSIKsaHIHKTKMLHlDIFR MTVPLEAFVMCVFLMPTLPFKGVA.KGIYAKQHSIKsHHIHKTKMLHIDIFR. 0 4 4 4 +6539 PF06711 DUF1198 Protein of unknown function (DUF1198) Moxon SJ anon Pfam-B_23016 (release 10.0) Family This family consists of several bacterial proteins of around 150 residues in length which are specific to Escherichia coli, Salmonella species and Yersinia pestis. The function of this family is unknown. 25.00 25.00 42.00 41.80 20.80 19.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.85 0.71 -4.39 5 511 2009-01-15 18:05:59 2003-07-29 14:21:16 6 1 506 0 25 130 1 147.00 77 97.65 CHANGED MlWIILATLlVVFIVGFRVLTSDTRRAI+RLSERLsIcPVPIESMIDQMGKTAGsEFIRYLcRPsEuHLQNAAQVLLIWQssIVDuSDcNlphW+RLLpKARLAAPLT-sQlRLALGFhRELDPDAaELssFQpRYNphFpPE-GVaW ...MIWIMLATLAVVFVVGFRVLTSGuRKAIRRLS-RLNIDVVPVESMlDQMGKoAGcEFLRYLHRPDESHLQNAAQVLLIWQlVIVDGSEQNLhpWHRlLQKARLAAPITDAQVRLALGFLREhEP-MQ-INAFQhRYNAFFQPtEGVHW................. 1 1 5 14 +6540 PF06712 DUF1199 Protein of unknown function (DUF1199) Moxon SJ anon Pfam-B_23160 (release 10.0) Family This family consists of several hypothetical Feline immunodeficiency virus (FIV) proteins. Members of this family are typically around 67 residues long and are often annotated as ORF3 proteins. The function of this family is unknown. 25.00 25.00 111.00 110.70 21.60 18.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.53 0.72 -4.15 5 6 2009-01-15 18:05:59 2003-07-29 14:25:47 6 1 2 0 0 4 0 51.80 73 79.54 CHANGED MLaRNScsVPAuIYRSNNIFsNNQuSGSMETSTISSPSRRIRNNFLGLLGTR ML.RNuchVPs.IYRsNNIFssNQsSGSMETSTISSPSRRIRNNFLGLLGTR 0 0 0 0 +6541 PF06713 bPH_4 DUF1200; Bacterial PH domain Moxon SJ, Bateman A anon Pfam-B_23245 (release 10.0) Domain This family consists of several hypothetical proteins specific to Oceanobacillus and Bacillus species. Members of this family are typically around 130 residues in length. The function of this family is unknown. Members of this family have a PH domain like structure [1]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.36 0.72 -4.11 46 441 2012-10-04 00:02:25 2003-07-29 14:29:16 6 2 256 0 64 351 8 72.70 29 53.89 CHANGED TpYpl..psspLhlpsG.hh+pc.IslpcIppIp.sps.hs....usAhohcplpIpYs..........thsp.lhISPcccccFlptLpcps .............+Ypl..psssLhl+tG.hh+pc...Islc-I+pI..cpstp.h........u.ths.hcpLEIp.Ys..........tacs.hhl..PpccccFlshlccps............................................ 0 24 53 57 +6542 PF06714 Gp5_OB Gp5 N-terminal OB domain Bateman A, Mesyanzhinov VV anon [1] Domain This domain is found at the N terminus of the Gp5 baseplate protein of bacteriophage T4. This domain binds to the Gp27 protein [1]. This domain has the common OB fold [1]. 25.00 25.00 27.10 25.80 24.70 23.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.17 8 60 2009-09-11 05:11:09 2003-07-29 14:46:33 6 7 59 3 0 62 590 132.00 37 21.49 CHANGED GlHPsQKs+u-shGlsTE-LLWMoshpslTSAAlSGIGpSPTGlVEGTaVaGaFLDKapQsGllLGTYsGIYp-KPssscGFsDPsGpYP...RYlGNDVNlLARGGhp.clu.sp..soshI...........QDtNoslAlsPD-pPhsEI.sDssPc ........G.Hs..+.ps-spGlPTEcLPWholl.PsooAuhS.G.lGtSsTGll.pGotVhGaaLD.ch.pps.slllGoh...sGhhpppssppcGFsDPsGpYP...hhlGsDsshLstGs.....phh.st..ps..h...........pstNhshuh.sst.s.st...sspP.......................................................................... 0 0 0 0 +6543 PF06715 Gp5_C Gp5 C-terminal repeat (3 copies) Bateman A, Mesyanzhinov VV anon [1] Repeat This repeat composes the C-terminal part of the bacteriophage T4 baseplate protein Gp5. This region of the protein forms a needle like projection from the baseplate that is presumed to puncture the bacterial cell membrane. Structurally three copies of the repeated region trimerise to form a beta solenoid type structure [1]. This family also includes repeats from bacterial Vgr proteins. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.33 0.74 -7.26 0.74 -3.66 89 445 2009-01-15 18:05:59 2003-07-29 16:39:43 7 22 205 27 84 435 64 24.00 35 5.73 CHANGED usto.hsVtuNpohsVsu.spopsls .......tspopsVtsNRTpsVss.NcT.ols.......... 0 16 36 60 +6544 PF06716 DUF1201 Protein of unknown function (DUF1201) Moxon SJ anon Pfam-B_23269 (release 10.0) Family This family consists of several Sugar beet yellow virus (SBYV) putative membrane-binding proteins of around 54 residues in length. The function of this family is unknown. 20.90 20.90 22.30 22.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.85 0.72 -4.15 2 5 2009-01-15 18:05:59 2003-07-29 16:55:40 6 1 3 0 0 5 0 51.60 68 96.63 CHANGED MDCVLRSYLLLAFGFhICLFLFCLVVFIWFVYKQILFRsTs.SNEARaN+STVV MDCVLRSYLLLAFGFLICLFLFCLVVFIWFVYKQILFRNTPPSNEARFNRSTVV......... 2 0 0 0 +6545 PF06717 DUF1202 Protein of unknown function (DUF1202) Moxon SJ anon Pfam-B_23300 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 335 residues in length. Members of this family are found exclusively in Escherichia coli and Salmonella species and are often referred to as YggM proteins. The function of this family is unknown. 21.10 21.10 21.30 22.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.83 0.70 -5.15 4 434 2009-01-15 18:05:59 2003-07-29 16:59:04 6 1 410 0 5 129 3 303.60 78 92.48 CHANGED hLhoG.shADspsPTENILK-QFpKQYHGILKLDuITLKNLDucGNQATWSAEGDlSSu-DLYThVGpLADY.llEpTWTKDKPVKFSAMLTSKGTPASGWoVsFYShQhAASD+GRslDDIKTNsKYLIVNS-DFNYRFuplcuuhssQKsSIsuLcc-lpALDKphlsApKtADAYWGKsAsGKphTRt-AFKKlpppRD-FNKpNDSpsFAhKY-KEVYQPAlsAC+KQSEcCYEls....IQQKRDhDIpEQRRQsFLKSpcLsRKlQsDWITLEKGQYPLshKVpclppppssIhMKIcDINpu...aK+ ..........................................MLMTGNuWADGEPPTENILKDQFKKQYHGILKLDuITLKNLDAKGNQATWSAEGDVSSSDDLYTWVGQLADYELLEQTWTKDKPVKFSAMLTSKGTPASGWoVNFYSFQAAASDRGRVVDDIKTNN.KYLIVNSEDFNYRFSQLEoALNsQKNSIPALEK-VKALDKQMVAAQKAADAYWGKDANGK...QMTR..E-AFKKI.HQQRD-FNKQNDSEAFAVKYDKEVYQPAIAA.CHKQSEECYEVP........IQQKRDFDINEQRRQTFLQSQKLSRKLQDDWlTLEKGQYPLTMKVSEINSKKVAILMKIDDINQANERWK.K............................................................... 0 2 2 3 +6546 PF06718 DUF1203 Protein of unknown function (DUF1203) Moxon SJ anon Pfam-B_23313 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 155 residues in length. Family members are present in Rhizobium, Agrobacterium and Streptomyces species. 20.60 20.60 21.40 21.00 20.50 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.45 0.71 -4.28 20 173 2009-01-15 18:05:59 2003-07-29 17:06:05 6 1 171 0 60 169 7 117.00 36 73.61 CHANGED GhPCRpsL+cuctG-plLLLuYpPa........ssssPYsEsGPIFlptpsspttsu.-slP...........sl.sutshslRGYsu-s+Il..sucllsss-.......hsuthcclLscs-VAalHVRsAcpGCYtsRl-R ..GhPCRhsL+cupsG.Ep.llL.lsYp.h........ssssPYptsGPIFl+ttssssh....s..s....t....lP...........hl.psch...hslRuYsucshlV..sucVspuss.............lcptlc.clFs.ss..-V..sYlHl+sAc.GCatsclcR............ 0 10 36 47 +6547 PF06719 AraC_N AraC_N-term; AraC-type transcriptional regulator N-terminus Vella Briffa B anon Pfam-B_16798 (release 10.0) Family This family represents the N-terminus of bacterial ARAC-type transcriptional regulators. In E. coli, these regulate the L-arabinose operon through sensing the presence of arabinose, and when the sugar is present, transmitting this information from the arabinose-binding domains to the protein's DNA-binding domains [1]. This family might represent the N-terminal arm of the protein, which binds to the C-terminal DNA binding domains to hold them in a state where the protein prefers to loop and remain non-activating [2]. All family members contain the Pfam:PF00165 domain. 24.20 24.20 24.30 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.65 0.71 -4.92 170 1635 2012-10-10 13:59:34 2003-07-29 17:09:29 8 4 1088 0 429 1324 58 152.60 33 50.71 CHANGED Ts..lsulplhR.sspsst.h.sslYcPulsllsQGpKpshlG.ccsapYcstcYLlsolslPhpsplhpASs-pPhLultlclDhshls-Llhchs.ts.tstsstt.................ulsssphsssLh-AhhRLlcLLcp.P.p-hslLuPhlhREIhYRLLpGspGstL ....................................lsslplhp.sp.ps.h.s.hs..ss.h.Y.cPul.sllhQGpK.hsh..l..u..c.c.s..a....p..YDsscYLlloVsLPhpscs...Aos..-tPhhulpLslDht.lt-Llhchspstt.hps.shst.................Glssuslsp..pllsAs.RLL.c..l.h..c..p.P..hD..t.p.l.Lus.IhREIlY+lLpGspGstL........................... 0 101 207 325 +6548 PF06720 Phi-29_GP16_7 Bacteriophage phi-29 early protein GP16.7 Moxon SJ anon Pfam-B_23362 (release 10.0) Family This family consists of several bacteriophage phi-29 early protein GP16.7 sequences of around 130 residues in length. The function of this family is unknown. 25.00 25.00 47.60 46.80 21.40 20.60 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.70 0.71 -4.48 2 6 2009-09-10 16:38:04 2003-07-29 17:11:43 6 1 5 10 0 7 0 129.30 60 99.11 CHANGED MEAILMIGVlsLCVIFLLSGRNNKKhQEARELEDYLEDLNpRlsQRTQILSELNEVIoNRSlDKoVNhSACElAVLDLYEQSNIRIPSDIIEDhVNQRLQoEQ-VLNYIETQRTYWKLENQKKLYRGSLK .......................MEAILMIGVlsLCVIFLLSGRNNKKKQEsRELEDY..LEDLNpRlsQRTQILSELNEVIoNRSlDKoVNhSACElAVLDLYEQSNIRIPSDIIEDlV.NQRLQoEQEVLNYIETQRTYWKLENQKKLYRGSLK.. 0 0 0 0 +6549 PF06721 DUF1204 Protein of unknown function (DUF1204) Vella Briffa B anon Pfam-B_16832 (release 10.0) Family This family represents the C-terminus of a number of Arabidopsis thaliana hypothetical proteins of unknown function. Family members contain a conserved DFD motif. 21.00 21.00 21.00 25.10 20.50 20.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.41 0.70 -5.11 2 9 2009-09-11 06:55:55 2003-07-29 17:14:31 6 2 2 0 4 16 0 169.10 40 38.05 CHANGED c+.s.Lhc+.lcShShEAsphKsthtsLAhp+phhup...-hsRsp.Dhcc.+cKhsELtsRhhSEhKRLRpRR.cYAp..pppAL.+hAs.Fpu..........DhlthpPKFh-aNQVsGNlthL-tLVEuGElEhKSs-hM.RLlADt-tLcAEVcuFtIT-l.csDFDVhTLFtcl..-p+.....sus.s.TEuEc.t-spsEstuQtt.chsG.h.scsthAss.- ...................................................................................................................p.KhscLtsRhhuE.KRLRppR.chAc..p..pptL.+htp.hptchptl+hcl.DcphthpPKFh-h........NQVsGslthL-tLl-sGphEhKSs-hM.RLlADt-tLcAEVcuFtIT-l.csDFDVhTLFtcl..-p+.....sus.s.TcuEc.t-spsEstuQtt.chsG.h.scsthAss.-.................. 1 4 4 4 +6550 PF06722 DUF1205 Protein of unknown function (DUF1205) Vella Briffa B anon Pfam-B_16780 (release 10.0) Family This family represents a conserved region of unknown function within bacterial glycosyl transferases. Many family members contain Pfam:PF03033. 20.70 20.70 20.70 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.38 0.72 -3.75 33 513 2012-10-03 16:42:30 2003-07-29 17:22:06 7 8 340 18 67 391 0 95.10 41 24.87 CHANGED lDssPsShch...s...su..hssl..shRYVPYNG...uVlPs..WLhps....ssRs.RVslTLGsottp.h.tsst.sslscllsulucL.DsElVsTlsspptttL.uslPsNVR .................................IDssPPShpl........s..........su.....pPsl...sMRYV....PYN..Gu....AVh.s.Whccs.........scR.t.R.lhloLGTspsh...st.........Ghc.......................hluhlh-..u..su....-l.DAEl.llpls.ss.sps.sL..psLPsNVR............. 0 21 47 59 +6551 PF06723 MreB_Mbl MreB/Mbl protein Moxon SJ anon Pfam-B_471 (release 10.0) Family This family consists of bacterial MreB and Mbl proteins as well as two related archaeal sequences. MreB is known to be a rod shape-determining protein in bacteria and goes to make up the bacterial cytoskeleton. Genes coding for MreB/Mbl are only found in elongated bacteria, not in coccoid forms. It has been speculated that constituents of the eukaryotic cytoskeleton (tubulin, actin) may have evolved from prokaryotic precursor proteins closely related to today's bacterial proteins FtsZ and MreB/Mbl [1]. 56.40 56.40 56.40 56.40 56.30 56.30 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.76 0.70 -5.84 31 4314 2012-10-02 23:34:14 2003-07-30 09:48:34 8 4 3147 5 973 7141 4347 321.20 54 95.81 CHANGED usDluIDLGTANTLV..YV+G+GIVLsEPSVVAlcps.....sppllAVGpEAKpMlG+TPusItAlRPh+DGVIADa-hsEpMLKaFIp+lps..ppth.hpP+lllClPoGlTsVE+RAlp-uuppAGA+cVhllEEPhAAAIGAGLPVpEPsGsMVlDIGGGTTElAVISLGGIVhSpSlRVAGDchDEuIlpYlR+pasLlIGEpTAEpIKhEIGoAh.spcsc...ph-l+GRDlloGLP+slplsupElp-ALp-slstIl-u.l+psLEpTPPELuuDIh-+GIlLTGGGALL+GLDchlpccTslPVhlA--PLsCVAhGoGcsL-phctlpp .............................................................................................................s.pDluIDLG.TA.N..T..L..l....YV...+...G...+.........G...I........V.........L...........s.........E.............P.......SV..VA.lcpc............................sppl..h....AV..G.c......-.....A......K....p....M....l....G.....R....T................P..G....N....I................t.......A.......I......R.........P..............h...............K.............D.......G.......V..........I..............A........D..........F..............p.........l.......T........E.......p..........M....L..p..........a......F.I....c.p.lps..........pp...h.h......s......pP...Rl...llCVPsGsTpVE.+R.AlcE...uA..h..t..A..G..A..R..-.Va.LIEEP..h..AA..A....I..G...A.G....L.....P........V....p....E.....P....s....G....s.....M..VV...DIGGG.TT.EVA..V..I.....S.....L...........G...........G.............l....V.....h.....S.....p.....S.....l.....R......l....uGD......+.h......D....-....A....I.....l....s....Y....l.............R....+....p......Y.............s...........l.........L.......I......G...............E.............p.............T.............A.........E.......c...I........K...h..........c.....I...G.....o......A..h...s...s.....s....c..sc..............ph-...V.....R............G............R............s..........L............s............s............G.............l............P...........+...........s.......h...........s...........l....s....S........s....E.........l...........h....E.A....L.........p.E.sl..s.........t.........I....V.s.A....V.+.s.sLE..p...s......P.........P.....E.....L......A......u.....D.I..h.-.........+........G.........l........VL.TGG.GALL+......sLDclL....t-E.T.....u.......l.....P.V.h.l.A-..-.PLsCVAhGsGcuL-.h-h..pt................................................................................................................................. 0 366 693 848 +6552 PF06724 DUF1206 Domain of Unknown Function (DUF1206) Yeats C anon Yeats C Family This region consists of two a pair of transmembrane helices and occurs three times in each of the family member proteins. 22.30 22.30 23.20 22.90 22.20 22.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.23 0.72 -4.22 149 831 2009-01-15 18:05:59 2003-07-30 09:49:12 6 3 248 0 368 915 30 72.50 25 76.68 CHANGED Rh......GhsupGllahhlGhhs........ltsAh..st......supssutsuultpLhsp.PaGthLLsllulGlluaulaphhcAhhcph ............RhGhsupGllahhluhhs..........lth...Aht...s.......supssstsuultpL....hsp.P.hGphLLhllulGlluhulaphhpAhhtt.......... 0 107 231 321 +6553 PF06725 3D 3D domain Bateman A anon Bateman A Domain This short presumed domain contains three conserved aspartate residues, hence the name 3D. It has been shown to be part of the catalytic double psi beta barrel domain of MltA [1]. 23.80 23.80 24.00 24.20 23.70 23.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.45 0.72 -4.00 41 3329 2012-10-01 21:39:58 2003-07-30 11:02:56 6 38 2021 13 572 2394 113 70.00 40 19.85 CHANGED polAVDsslIPlGohlalpu.................hthslAtDTGuAIKGs.+lDlahuosscAsp.hG..p+psplalLt .................h.slAVD.plIP..lGohl..alps........................................ht..hhl.AtDTG.G..AI...K....Gs..+lDlah.Ghs...s........cAsp....h.G...++ps.c.ValL............................. 0 203 382 471 +6554 PF06726 BC10 Bladder cancer-related protein BC10 Wood V, Moxon SJ, Coggill PC anon Wood V Family This family consists of a series of short proteins of around 90 residues in length. The human protein Swiss:O60629 or BC10 has been implicated in bladder cancer where the transcription of the gene coding for this protein is nearly completely abolished in highly invasive transitional cell carcinomas (TCCs) [1]. The protein is a small globular protein containing two transmembrane helices, and it is a multiply edited transcript. All the editing sites are found in either the 5'-UTR or the N-terminal section of the protein, which is predicted to be outside the membrane. The three coding edits are all non-synonymous and predicted to encode exposed residues [2]. The function of this family is unknown. 19.60 19.60 20.20 19.80 18.60 17.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.15 0.72 -3.95 11 187 2009-01-15 18:05:59 2003-07-30 12:38:41 7 1 168 0 112 155 0 61.80 41 52.91 CHANGED MaCL+ahlPlLllPh...s.uhhpAsshFhh.laLluhhlcppPCsaCullhhhhhhs.s..psshssh .....MaCLp.hhL.PlL.lIPh...p..hh.upshFhh...hall.....uhhl.++PCshCullhLhhhhh.s....ssWus........... 2 23 43 82 +6555 PF06727 DUF1207 Protein of unknown function (DUF1207) Moxon SJ anon Pfam-B_23454 (release 10.0) Family This family consists of a number of hypothetical bacterial proteins of around 410 residues in length which seem to be specific to Chlamydia species. The function of this family is unknown. 20.30 20.30 25.20 22.50 15.80 18.40 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.18 0.70 -5.63 8 77 2009-09-11 12:14:12 2003-07-30 13:56:56 6 1 75 0 30 57 13 311.30 36 83.55 CHANGED +hlcssh.sll.csss..loLPsDplhups..spIsphlp-lPhlTuVEIsE...............pp..sshsptsstsss..cpps.sscs.sshtslWLPpGp.LFpPLlADPRtsp.SAuaR..ascplsGs...+lGuVuFG-slPhhR.sslu+apssh-lGLQuGVFSsFDLDsPSosLlNoDFhVuhhhShtsupaShRhRlYH.SSHLGDEFLLp+..sshsRhNLShEulDLhlSachtP..lRVYGGsGaIhp+-so.sl+PahhchGlELR.uPatl....s.LhucPlFAhch+saEppcashD.ShhhGhEau+FpshG...RKlphllEYapGaS.pGQFhREpscYhGhthpYtF ....................................................................................................................................................................p.t..sh...shhpslaLPp.ss.LFsPLlADPRpsp.SAuhR.....hs-ps.hGp................pluussFGuchhhhR..h-h..upaps..sh-lG....lQG.uVFSsFsL-p.....spu...........sh.VNo.DFhlus.hh.sas.........h....s...........ca...ShRhRlaHlSSHLGD........EF.....l.......Lsp..........Ps.h.p...R........h.N.hScEulDhhsShch........sPp....lRlYGGhGYll...s+...-.o.stcPhhhpuGlElR.....Pash.........ssLcupPlaAh.c...h+....haE-pcashD.o..hhsGhEau.....+hps.lG...RKlRshlEYapGaSpcGQFh+-pssYhGhtlpYsF......... 1 12 16 27 +6556 PF06728 PIG-U GPI transamidase subunit PIG-U Wood V, Moxon SJ anon Pfam-B_7677 (release 9.0) Family Many eukaryotic proteins are anchored to the cell surface via glycosylphosphatidylinositol (GPI), which is posttranslationally attached to the carboxyl-terminus by GPI transamidase. The mammalian GPI transamidase is a complex of at least four subunits, GPI8, GAA1, PIG-S, and PIG-T. PIG-U is thought to represent a fifth subunit in this complex and may be involved in the recognition of either the GPI attachment signal or the lipid portion of GPI [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.54 0.70 -5.60 64 358 2012-10-03 03:08:05 2003-07-30 14:05:33 8 7 283 0 247 623 14 321.40 29 81.80 CHANGED hsuhhlRlhl...hhsshs.phLss+lEl...................STPlsSa+plpE.GhaLhppu.l...........sPYsGul.hH..........psPLlLhlhshl..........................h.............hsllahlhDllsAhhLhth...........................................................thppstpph.tt..t........................hsshhlushYLhNPhslhoslutSTsshsNhhlhhslhsss.p.........sp...............hhhuslslAhuoa.lohYshhLlhPllhhhhp........................................................t.phh.hhhp...hhhlahsslhsLhh.hSah.lsu.......................sas...FlpssYshhlph...p-Ls...PNl..GLaWYFFsEhF-pF+sFFlhlFplhs.hhallP.LolRLpcp...........PhhhhhlhlulhulFK..sYPolu...............DsuhaLuLlslapp.l......................hthh+ash..lss.sshlhshlLuPlhaaLWIhtGoGNANF.FauloLla .....................................................................................h..shhhRhhl....hhssh.....lstpsEh...................soPlsS..a+p.............l........E.GhhLhp.s...h...........sPYsGsh.hH.........................psPll.l.hhthl............................................................htllahhhDhlsAhhLhhh................................t............................................ptht.....t.........................................................shhluhhYLhNPhsl.h.oC.lu.tS.osshsshhlhhhlhtsh.p...............sp................................shh.uhhhl.uhuoa.hShYPhhLhsPhlhhhhp............................................................................................t.hh...h..hht............hhhhh.h.s.slh.........hlhh...hShh..hhs.......................sap......al.tsYGhhl.h......DLs......PNl...G...La....W...Y....F....F.h....E.....hF....-.p....F....p.........F..F.....lhl...Ft.lph....hha.hhP....Lsl...+h..+cp..................Ph.h.h..hhhhlulhulh...K..sYPolu.....................D...suh...a...huhlslatp...l.........................h..h.h.+..h.....hhh...hhhlh..s.hLhPhhaaLWIhtGouNuNF.aauhoLsa......................................................................................................................................................... 0 91 140 206 +6557 PF06729 CENP-R NRIF3; Kinetochore component, CENP-R Moxon SJ anon Pfam-B_23665 (release 10.0) Family This family consists of mammalian kinetochore sub-complex proteins CENP-R, also referred to as nuclear receptor co-activator NRIF3 proteins. NRIF3 exhibits a distinct receptor specificity in interacting with and potentiating the activity of only TRs and RXRs but not other examined nuclear receptors. NRIF3 as a co-regulator that possesses both transactivation and transrepression domains and/or functions. Collectively, the NRIF3 family of co-regulators may play dual roles in mediating both positive and negative regulatory effects on gene expression [1]. CENP-R is one of the 15 components that make up the constitutive centromere associated complex (CCAN) part of the kinetochore. A sub-complex of CCAN, consisting of CENP-P/O/R/Q/U self-assembles on kinetochores with varying stoichiometry and undergoes a pre-mitotic maturation step. Kinetochore assembly is a cell cycle regulated multi-step process. The initial step occurs during interphase and involves loading of the 15-subunit constitutive centromere associated complex (CCAN). Kinetochores are multi-protein megadalton assemblies that are required for attachment of microtubules to centromeres and, in turn, the segregation of chromosomes in mitosis [2,3]. 27.00 27.00 32.30 32.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.65 0.71 -4.51 11 37 2012-10-08 12:26:51 2003-07-30 14:33:12 7 3 28 0 15 54 0 117.50 55 78.39 CHANGED +K+slssYSPTTGTpQhSPaSSPTSsptQ-h+NGPSNGp...cp.sc.slscRtpspspD.DtFMsLhScV-pS.EcIhElhpNLoSlQALEGSRELENLIGlSpuSChLppEhQKT+cLMoKspK.cLhcKpputlPs+-h ....+KKslhsYSPTTGTpQhS.FuSPoSsccQcHRNG.SNtp..pcp.sc.sLocptpspTc-.DtFhhLhSKVEp..EcIhchhpNLoSlQALEGSRELENlIGlSpsSp.LptEhpKTptLMops.c.pLhc+.pstlPt+t...... 0 1 1 5 +6558 PF06730 FAM92 DUF1208; FAM92 protein Moxon SJ, Eberhardt R anon Pfam-B_23546 (release 10.0) Family This family of proteins has a role in embryogenesis. During embryogenesis it is essential for ectoderm and axial mesoderm development [1]. It may regulate cell proliferation and apoptosis [2]. 23.60 23.60 23.60 23.90 23.40 23.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -5.01 3 180 2012-10-03 12:17:00 2003-07-30 14:52:42 6 5 78 0 107 155 0 185.00 52 69.77 CHANGED MhRRs....L-sRDAQTKQLQ-AVoNVEKHFGELCQIFAAYVRKTARLRDKADLLVNEINuYAuTETPNLKL..GLKsFADEFAKLQDYRQAEVERLEAKVVEPLKsYGTIVKMKRDDLKATLTARNREAKQLoQLERTRQRNPSDRHVISQAETELQRAsMDAoRTSRHLEETIsNFE+QKlKDIKsIlSEFITIEMLFHGKALEVaTAAYQNIQNIDEDEDLE ..................................R-sQs+.lpsslsNsEKHFG-hCplFAu.YsRKTARLRDKuD.......LVppl.tsYAsoEs.P.pL+h..GL+sFA-...chAKl.QDYRQAE........VE.RLEuKV.VpPLKtYG..sllK.pR--lKtshpARs+EhKQhppLE+hRQ+sPS.D.Rp.h...I.s..p...AEo-Lp+AshDsoRoo+pLEEs.......lssFE+QKl+DlKpIhu-FlpIEMhFHuKALEVhosAaQslpplD.-cDL.............................. 0 23 29 51 +6560 PF06732 Pescadillo_N Pescadillo N-terminus Moxon SJ, Wood V anon Pfam-B_77615 (release 9.0) Family This family represents the N-terminal region of Pescadillo. Pescadillo protein localises to distinct substructures of the interphase nucleus including nucleoli, the site of ribosome biogenesis. During mitosis pescadillo closely associates with the periphery of metaphase chromosomes and by late anaphase is associated with nucleolus-derived foci and prenucleolar bodies. Blastomeres in mouse embryos lacking pescadillo arrest at morula stages of development, the nucleoli fail to differentiate and accumulation of ribosomes is inhibited. It has been proposed that in mammalian cells pescadillo is essential for ribosome biogenesis and nucleologenesis and that disruption to its function results in cell cycle arrest [1]. This family is often found in conjunction with a Pfam:PF00533 domain. 25.00 25.00 32.10 32.10 23.40 21.60 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.89 0.70 -5.15 9 361 2009-01-15 18:05:59 2003-07-30 15:27:31 6 7 309 0 244 367 12 258.90 46 46.38 CHANGED KpKhcpGsAspYlTRppAl+KLQLoLsDFRRLCIlKGIYP+EPK+.KKKsNKGSot.+saYhtKDIpaLhHEPIlpKFR-YKlFh+Klp+Ah.u+t-hpssK+Lhss..+PsYpLDHIIKERYPTFhDALRDlDDALSMlFLFSshPsop+..........lpsphlppC++LssEahpYVhco+uLRKsFlSIKGlYYQAElhGpclTWllPa..tFspshsoDVDF+VMhTFlEFYpsLLuFVNFKLYpsLsLhYPP+l-sphcppttst.uhcht.susphshths.stpttcsh ..........KKhppGpApsYITRspAl+KLQlSLsDFRRLCIhK...........GIYP.REP+p......KKK.ss.KuuosspTaYahKDIpaLhHEPllpKFR-hKsah+Kl.p+Ah.u+s-hssscRLccN........+PpapLDHIlKERYPTFlDALRDLDDsLohlFLFushPs.ssp...................l.sph.lphCpRLshEa.tY.lhts+uLRK....sFlSIKGlYYQApl.......Gp.....p..lhWlsPa..pF..spp.hs..sDVDaRl..MhTFlEFYpT..LLuFVNF+LYps...l..sLh.Y.PPpl-tptpptt..t.thtsh.......h..........................ttt.t............................................. 0 91 139 204 +6561 PF06733 DEAD_2 DEAD_2 Vella Briffa B anon Pfam-B_1713 (release 10.0) Family This represents a conserved region within a number of RAD3-like DNA-binding helicases that are seemingly ubiquitous - members include proteins of eukaryotic, bacterial and archaeal origin. RAD3 is involved in nucleotide excision repair, and forms part of the transcription factor TFIIH in yeast [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.63 0.71 -4.86 33 3488 2012-10-05 12:31:09 2003-07-30 16:01:32 10 48 1979 5 1199 2928 287 131.00 24 17.76 CHANGED YsSRTHoQlpQslcEL++l.thh................hpsls.LuSRcplClpsclpc.tt..tpsls...thCpphtpph...........................C.aapsh.....ph.t....pphhhsslhslE-Lhc......hGcptphCPYass.RchlppA-lllhsYpYLlssphRpth.......lphcsslVIlDEAHNl.csstp...hhShclopppL ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s......h.t..........C........Pa...........h.......h......u....Rc....t......h..............p...AD........VV..lss...ap.h.l.....h.s..s.h..h..t..ctsh....................s.c.s...p......l.l.......l.........hDEAH..pLs-hupp..hhuhsls.................................................... 1 423 690 993 +6562 PF06734 UL97 UL97 Vella Briffa B anon Pfam-B_1747 (release 10.0) Family This family represents a conserved region within viral UL97 phosphotransferases. UL97 participates in the phosphorylation of the nucleoside analog ganciclovir (GCV) to produce GCV-monophosphate [1]. 25.00 25.00 43.90 42.60 19.40 19.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.24 0.71 -4.92 8 208 2012-10-02 22:05:25 2003-07-30 16:47:50 7 1 30 0 0 154 0 170.10 60 38.99 CHANGED NlLIsss.cssppIh+AVLCDYSLSEsHPp.aNcRCVVVFQpT+TlRhlPsSpa+Ls-hYHPAFRPlsLQKllll-P+ApFPss.uspRaCss-LCALGpVlsFCLlRlLDcRGhccVRtssEstLFphAspACcAhtpcclsuhuDAChhlLAcQLuYhusLLG.--ss-hhs+hhcFlcsps-ps .NVLIcVNPHsPSEIlRAALCDYSLSEPYP-.YN-RCVsVFQETGTARRIPNCoHRLRECYHPAFRPhPLQKLlls.-.P+upFPss.uhpRaChS-LsALGpVluFCLhRlLD.......+RGhcEVRhsoEshLFthAstACRALtptplscsuDAChLlLAtQhuYsAsLLG.-cssslhu+hhcFlEs+hsp......... 0 0 0 0 +6564 PF06736 DUF1211 Protein of unknown function (DUF1211) Vella Briffa B anon Pfam-B_2594 (release 10.0) Family This family represents a conserved region within a number of hypothetical proteins of unknown function found in eukaryotes, bacteria and archaea. These may possibly be integral membrane proteins. 25.00 25.00 29.00 25.60 24.90 24.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.05 0.72 -3.84 95 961 2009-01-15 18:05:59 2003-07-30 17:11:10 6 3 572 0 286 777 47 86.70 33 41.94 CHANGED pRl-AFoDuVhAIhhTllVL.............-l...........................phPp...........sthhpsLh.shhsphhsYllSFlhluhhWhsHHphaphlp..plsptllhhNhhhLhhls..LlP ............................pRl-AFoDuVlAIlhTllVL.............-l...........................phPp...................ssshpuLh..phhsp.hhsYllSFlhluhhWhsHHpl.Fp.hlc..clstpl.hhhNhh..hLhhlohlP................................... 0 97 171 217 +6565 PF06737 Transglycosylas Transglycosylase-like domain Bateman A anon Bateman A Domain This family of proteins are very likely to act as transglycosylase enzymes related to Pfam:PF00062 and Pfam:PF01464. These other families are weakly matched by this family, and include the known active site residues. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.69 0.72 -3.68 18 1273 2012-10-03 00:09:25 2003-07-30 17:12:09 9 31 590 2 302 1024 36 75.60 45 27.18 CHANGED sAssssWDtlApCESGGNW.ulNTGNGaYGGL.QFstuTWpuhGGspYus.sAstAoRppQIulA-+lhssQGhuAWPsC ..............s..ssssWDt..lApCESGGN.......W.ul.......N.....T.G.....NG.a.....a.GGl.QFstuTWpuhG.G..st.a.t..s....pss....tA........o+........p....pQ....IslAp+lh.......s.s.QG...hG...AWPsC............................................ 0 77 214 286 +6566 PF06738 DUF1212 Protein of unknown function (DUF1212) Vella Briffa B anon Pfam-B_1646 (release 10.0) Family This family represents a conserved region within a number of hypothetical membrane proteins of unknown function found in eukaryotes, bacteria and archaea. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.05 0.71 -4.84 181 3852 2012-10-02 11:53:07 2003-07-30 17:20:57 7 11 2765 0 655 2941 60 188.90 23 54.74 CHANGED hGchLlpsGupshRVEcshpclu.pshGl..p.spshhsssulhloh.....ttsspshophppl.pspslshp+lsplpplscpltp..splsl.c-s...ppcLcpIpppt.....Yshhhhs.luhuluuu..uhshL.hGGs..hhchhhuhlsuhl......shhlphh.hs+tphsthhhp.hhuuhlsuh...........................luhhhh........th..shsh.shshh.hhuulhhLlPG ....................................................sGphLLpsGucshRV--shpRlu....p.s.h....G.h....sp..spshlsssuIhhoh...................sspsh.ophpcl..p..s.......pslshp+lsplpplscp..htp.............pp..l..sl..cps.......pp...cLppIptt..p.....Y.s.h.h.hh.s..lusu.lu.s.u..sFshL...h..G..G..s.....hhs.slhshluusl........................uhhlpth.l..s.+h..t..h.h...h.ls.....hl........uuhlssh...........................lu.hhhh......................ph..sh...s....t..s...s.hh...hhuulhhLVPG........................................................................................................... 0 207 396 545 +6567 PF06739 SBBP Beta-propeller repeat Yeats C anon Yeats C Repeat This family is related to Pfam:PF00400 and is likely to also form a beta-propeller. SBBP stands for Seven Bladed Beta Propeller. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -7.54 0.72 -4.17 52 656 2012-10-05 17:30:42 2003-07-31 09:37:18 6 61 93 0 325 796 561 36.30 30 14.83 CHANGED tahp..lG.......ssssps..spuIuhDppGN.lYlsGpTsu..sat ...............asp..lG............ss.ss..s......upu....lulD.ss.GN.lYlsGtTsu..sh......... 0 218 271 288 +6568 PF06740 DUF1213 Protein of unknown function (DUF1213) Vella Briffa B anon Pfam-B_2177 (release 10.0) Family This family represents a short conserved repeat within Drosophila melanogaster proteins of unknown function. Approximately 50 copies of this repeat are present in each protein. 21.30 21.30 22.20 21.30 19.90 21.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.32 0.72 -3.84 29 411 2009-01-15 18:05:59 2003-07-31 11:07:59 7 12 12 0 314 459 0 29.70 64 20.54 CHANGED KEsSRPESVsESlKD-s........tcs..cSRhESl ...KEsSRPtSVAESVKDEs............EKo.ttSRRESl.................. 0 53 53 203 +6569 PF06741 LsmAD Ataxin-2-like; Ataxin-2_N; LsmAD domain Vella Briffa B, Bateman A anon Pfam-B_2543 (release 10.0) Domain This domain is found associated with Lsm domain [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.41 0.72 -3.88 26 396 2009-01-15 18:05:59 2003-07-31 13:05:57 8 12 263 0 244 409 5 70.30 42 7.87 CHANGED FG.VpSTaDEcL..YTT+L-+uss.pa+ppppcA-+lA+EIEsp.sotshHlt-ERuh..h.s-sshDEE-pYSuVpRs ....aGVpoTYDpsL..YTs.L-+sss.pa+cREtcAsclA+EI...Euo..............sp.Rst.....htscsshs..EE-KYSuVpRp............. 0 77 130 192 +6570 PF06742 DUF1214 Protein of unknown function (DUF1214) Vella Briffa B, Finn RD anon Pfam-B_2721 (release 10.0) Domain This family represents the C-terminal region of several hypothetical proteins of unknown function. Family members are mostly bacterial, but a few are also found in eukaryotes and archaea. 25.00 15.00 25.00 15.10 24.90 14.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -11.29 0.72 -3.56 331 1606 2012-10-01 20:00:45 2003-07-31 13:11:51 6 13 655 9 542 1441 270 108.30 21 30.86 CHANGED ss.t-uhY.hh....st.hDssuph.....lsGst....t.apl......phs.s.thPss........t.FWSloh..Y....stt.......hhts....s....schs.............ls..s..............................ssDG.ohslhlu.s......ss.s.........................N..WL.................t.s................ahlhhRhYt..P.pp .............................................................ss....tshY..h....s..hDssGp......LsGsp.......t.htl.....phs.s.thPss...........t.FWol..o.h....Ys.pt.h.....hhts.......s.........hsp.hs......lss.....................h..ssDG..olslhlusps..Pss.s...........N.Wl.....h.s.s.......s..hhlhhRhYh..s...tt............................. 0 109 283 416 +6571 PF06743 FAST_1 FAST_Leu-rich; FAST kinase-like protein, subdomain 1 Vella Briffa B, Fenech M anon Pfam-B_2858 (release 10.0) Family This family represents a conserved region of eukaryotic Fas-activated serine/threonine (FAST) kinases (EC:2.7.1.-) that contains several conserved leucine residues. FAST kinase is rapidly activated during Fas-mediated apoptosis, when it phosphorylates TIA-1, a nuclear RNA-binding protein that has been implicated as an effector of apoptosis [1]. Note that many family members are hypothetical proteins. This region is often found immediately N-terminal to the FAST kinase-like protein, subdomain 2. 24.80 24.80 24.80 25.30 24.70 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.09 0.72 -4.26 23 389 2009-09-10 21:09:11 2003-07-31 14:39:19 10 9 79 0 206 364 2 70.70 27 10.78 CHANGED lssllhsFupLN..YcPsst-pFaspltptLpsphsths..PptllsllaSLshlppaPhshlsplhsssFlscL .............ltpllhsFupLN..Y....p...P...s.pp.....c.p.Fh.....spltptLtpc..hsths......Ptt....llsllaSLshlpphP..phlptlhsspFlpph..... 0 32 49 104 +6572 PF06744 DUF1215 Protein of unknown function (DUF1215) Vella Briffa B anon Pfam-B_2952 (release 10.0) Family This family represents a conserved region situated towards the C-terminal end of several hypothetical bacterial proteins of unknown function. A few members resemble the ImcF protein, which has been proposed [1] to be involved in Vibrio cholerae cell surface reorganisation that results in increased adherence to epithelial cells line and increased conjugation frequency. 21.00 21.00 21.20 21.00 20.90 20.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.34 0.71 -4.48 111 1351 2009-01-15 18:05:59 2003-07-31 14:55:03 7 9 894 0 251 1140 28 123.70 27 10.91 CHANGED lpphss........phPsslpshhsplsspuhphlhpsuts.pLsppWpspVhp.appsluGRYPFs.suspDlulsDFschFuss.GhlcpFhpppLtshlc.ssss.Wphcshss..sh.....slssshLpthppAppI ........................................................up........phPsPlpshlsp....lssp........uhphlht.tutp..lspp...Wpss..Vspsap..psluuRYPFs..sospDsols-FpcaFuss.GhlDsFapppLtshl-..ssss..ah.csssst...sh.....slpss.hLsthppAppI...................................... 0 35 96 172 +6573 PF06745 KaiC KaiC Vella Briffa B anon Pfam-B_2234 (release 10.0) Family This family represents a conserved region within bacterial and archaeal proteins, most of which are hypothetical. More than one copy is sometimes found in each protein. This family includes KaiC, which is one of the Kai proteins among which direct protein-protein association may be a critical process in the generation of circadian rhythms in cyanobacteria [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.26 0.70 -5.24 26 1953 2012-10-05 12:31:09 2003-07-31 15:32:50 8 20 758 154 980 16013 6155 206.50 22 73.85 CHANGED lpTGI.GhD-lh.p.............GGlPcspslLloGssGTGKTlhuhQFLhpGhhchGEsGlaVolEEss....psl+cshc.phGWDlpchEccGhlsllDuhsssht............ptslcpLhppLppsl+chsA..+RllIDSlosL..hhppssh.sRphlhpLhphl.cthGsTulhsophs......stpshus.GVEc.slDGlIhLchcchst............chhRolhIhKMRuTsHshpcaPF-Ioc.pGIhlhsttt ......................................................oGl.tlDphl..t............................GG..l...ts..phhL.l....t.G.ss...GoG.K......T..hh....u......h.............p.........F.....l.........h...........p...............u.....h........p........p.............................u.......-............t..............s......l......a...l....s..h.c.....E..s........................tp..l.......h...p.....p..........h.........p.......s..h.......G.......h.........s........h........p.......t........h...................t..........p......t.............l......h....h.h..p.....h..st..............................................ph.p.t..h....h....p..t......l....t..p....t....l....c....p...h....p.s...........p..h.....l..l......l..D.....S....l........ss.........l............................h..........h............t.............s....................t...........t.........h.......+........p..............l........h.....p.......l.......h.....p....h.l.......p......p....h....s......h......o......s.......l...h..s.s...p.h.t................t.t..t..h..s..t..t..l......p.p.......l..s..Ds.l.lhLc....h.....tt..............................phtR....h.lplhKhR.s......s.t..t.t.t......t...h.h.aplsp..tGl.lh...h........................................................................................................................................................................................................... 0 262 574 792 +6574 PF06746 DUF1216 Protein of unknown function (DUF1216) Vella Briffa B anon Pfam-B_3048 (release 10.0) Family This family represents a conserved region, within Arabidopsis thaliana proteins, of unknown function. Family members sometimes contain more than one copy.It has been reported that this domain will be found in other Brassicaceae. 19.70 19.70 20.40 19.80 19.30 19.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.93 0.71 -4.39 14 50 2009-01-15 18:05:59 2003-07-31 15:42:00 6 3 5 0 38 49 0 129.60 28 39.42 CHANGED FFEpLKuhMph.ssh....t.pssKshps-MKupAstLhcAMuuhp....stSuchppplppph.phhKTLp.hp.h.tphh.t..tctcpptohphThpQppElcpslsKWppllopFVcosspspup..ShspSS.........ssssssSS ...........................FFppLKuaMphhsss....t.ssuK.sh.sch+upustLhcAMuuhp.uph.upSuc.ppplppshhphhKTl..h...h.t.h.....hc.cppustphThpQppElhpshsKWppslspFVcosspspup..S.s...uS...t...........s.t......tt........................................................................................................................ 0 21 21 21 +6575 PF06747 CHCH CHCH domain Westerman BA, Poutsma A, Steegers E, Oudejans CBM, Bateman A anon Westerman BA, Poutsma A, Steegers E, Oudejans CBM Domain we have identified a conserved motif in the LOC118487 protein that we have called the CHCH motif. Alignment of this protein with related members showed the presence of three subgroups of proteins, which are called the S (Small), N (N-terminal extended) and C (C-terminal extended) subgroups. All three sub-groups of proteins have in common that they contain a predicted conserved [coiled coil 1]-[helix 1]-[coiled coil 2]-[helix 2] domain (CHCH domain). Within each helix of the CHCH domain, there are two cysteines present in a C-X9-C motif. The N-group contains an additional double helix domain, and each helix contains the C-X9-C motif. This family contains a number of characterised proteins: Cox19 protein - a nuclear gene of Saccharomyces cerevisiae, codes for an 11-kDa protein (Cox19p) required for expression of cytochrome oxidase. Because cox19 mutants are able to synthesise the mitochondrial and nuclear gene products of cytochrome oxidase, Cox19p probably functions post-translationally during assembly of the enzyme. Cox19p is present in the cytoplasm and mitochondria, where it exists as a soluble intermembrane protein. This dual location is similar to what was previously reported for Cox17p, a low molecular weight copper protein thought to be required for maturation of the CuA centre of subunit 2 of cytochrome oxidase. Cox19p have four conserved potential metal ligands, these are three cysteines and one histidine. Mrp10 - belongs to the class of yeast mitochondrial ribosomal proteins that are essential for translation [2]. Eukaryotic NADH-ubiquinone oxidoreductase 19 kDa (NDUFA8) subunit [3]. The CHCH domain was previously called DUF657 [4]. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.01 0.72 -3.96 53 994 2012-10-02 15:44:21 2003-07-31 16:10:01 8 11 296 4 623 930 5 35.40 25 22.76 CHANGED Ctt-hpphhpChppssp...p..hspCpphhpthpp.Chpp ..........CspchcphhsChppssp....p...hppCpct.pshpp.Chh..... 0 169 274 457 +6576 PF06748 DUF1217 Protein of unknown function (DUF1217) Vella Briffa B anon Pfam-B_3199 (release 10.0) Family This family represents a conserved region that is found within bacterial proteins, most of which are hypothetical. Some members contain multiple copies. 22.80 22.80 24.60 23.00 21.20 22.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.89 0.71 -4.14 53 429 2009-01-15 18:05:59 2003-07-31 16:22:50 7 6 178 2 178 422 24 141.70 27 69.03 CHANGED AaGL--.hhs+Ahl+KlLps..shs-ssoaAN+Ls.........Dp.RYpchusAFsFss.t....Gsh................................................sspssstt......................................................................................................................psssspYhppph-ppsGspspulRlALYFpRps.........sslso..................................shclLuss.............sLpphhpTuhGLs.sshushc...l-tQhthlcctl.......phschsD.Pppht ..................................................AaGL-s.thscuhl+KlLps..shsD......s....uhsNpLs.........Dp.RYtphApuFsFssp.Gsh.........................................................................................................................ss..Qopsth.....................................................................................................................pshhstYh........p....p.....s....h..-.p....psus......p.......sp....ss.c...........A.....h.....Yapcph...............sslss..................................s.-lLuDp.............tLhshhhsuhGls.sphst.t....................................p.............................................. 0 36 78 108 +6577 PF06749 DUF1218 Protein of unknown function (DUF1218) Vella Briffa B anon Pfam-B_3286 (release 10.0) Family This family contains hypothetical plant proteins of unknown function. Family members contain a number of conserved cysteine residues. 21.70 21.70 21.70 22.60 21.60 21.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.12 0.72 -10.31 0.72 -3.53 39 580 2009-01-15 18:05:59 2003-07-31 16:26:16 7 5 148 0 209 417 0 76.60 39 56.25 CHANGED sAhlhLhhuQllssssstC....hCs...ups.......hhs.ttsp+shulhhhlloWlshslAhshLlsGustNuhpth........................s.sChhl+cGlFuuuAs.......lsLhsslh ...........................................psllhulo+C....hCh.....G+s..............lsP....uusRuhsllhFlsSW.lTF.........llAEuCllAGutp.NAhHs.............................................................................................. 0 27 118 166 +6578 PF06750 DiS_P_DiS Bacterial Peptidase A24 N-terminal domain Yeats C anon Yeats C Domain This family is found at the N-terminus of the pre-pilin peptidases (Pfam:PF01478). It's function has not been specifically determined; however some of the family have been characterised as bifunctional ([2]), and this domain may contain the N-methylation activity (EC:2.1.1.-). It consists of an intracellular region between a pair of transmembrane. This region contains an invariant proline and two almost fully conserved disulphide bridges - hence the name DiS-P-DiS. The cysteines have been shown to be essential to the overall function of the enzyme in [1], but their role was incorrectly ascribed. 23.00 23.00 23.20 23.80 21.90 22.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.27 0.72 -4.19 156 2675 2009-01-15 18:05:59 2003-08-01 09:33:18 8 8 2521 0 532 1905 721 94.00 36 36.27 CHANGED llG...hhlGSFLNVVlaRlP......tth.......................................slhhP............t.SpCPpCppplpha-sIPllSaLhL+G+.C+tCpstISh+YPllEllsullh...hhshhthuh ............................lhGhhlGSFLsVlIhRlP.......ht.h..............................................................slh.h.P.p.S+CspCpppl+hh-.IPllSa.L.h.L+G+.C......RpCp.spIuhpYsLlElls.u.llh.lhhhhhh..h.................... 1 185 349 455 +6579 PF06751 EutB Ethanolamine ammonia lyase large subunit (EutB) Moxon SJ anon Pfam-B_6325 (release 10.0) Family This family consists of several bacterial ethanolamine ammonia lyase large subunit (EutB) proteins (EC:4.3.1.7). Ethanolamine ammonia-lyase is a bacterial enzyme that catalyses the adenosylcobalamin-dependent conversion of certain vicinal amino alcohols to oxo compounds and ammonia. The enzyme is a heterodimer composed of subunits of Mr approximately 55,000 (EutB) and 35,000 (EutC) [1]. 25.00 25.00 31.30 31.10 21.50 21.50 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.43 0.70 -6.15 12 1141 2009-01-15 18:05:59 2003-08-01 09:56:23 6 3 1070 18 217 709 31 435.60 63 95.67 CHANGED sYpFcsL+-VhAKAs.t+SGDpLAGlAAsSupERVAA+hlLuclsLpchhpssllPYEpDEVTRlI.Dshstttas.lpphTVu-hR-WLLscpss...ssplptlu.GLTsEMlAAVoKLMpN.DLIhsApKhcVss+hpoTIGlpGphSsRLQPNHPTDDPpGIsASlL-GLhaGsGDAVIGlNPssDossulsclLchh-clhp+acIPTQsCVLuHVTTphEAIcpGsPlDLlFQSIAGoEtuNpuFGlslAlLcEApphuLshpp.ssGsNVMYFETGQGStLSusAHaG.....VDQ.ThEARsYulA++acPhLVNTVVGFIGPEYLYDuKQlIRAGLEDHFhGKLhGlPMGsDlCYTNHhcADQsDh-NLhsLLssAGsNalMGlPsuDDVMLNYQTTuaHDshhlRclhGL+PhsEFEpWLpchGhhtt.sG+Ls.tuGssuhF .......sYpFcslK-VLAKAs.hRSGDhLAGVAAsSupERVAAK.VLu-hs..............ls-............lhNsPVIsY..E..-DpVTRLI.DshscsAas..p..I+paTlu-LREalLS-c.To.....s.sc..lthlpcGLTsEhVAAVuKlMsNtDLIhuA+Kh.V..lp+hpTTIGl....PGshSsRLQPNcspDDlpuIAAplh-GLsaGsGDAVIGlNPVTDslcsl...s+lLchltslIp+asIP..T..Q..u..CVLAHV..TTQ..IEAI.cR.G.A.PssLlFQSIuGoEKuN.csFGlsL.............A.hL.cEA+tsuhphs.R..............h.......sGp.....NshYFETGQGSALSAsApaG.....sDQ.ThEARsYGlAR.......+a-...............P..............FL.............VNTVVGFIGPEYLYss+QIIRAGLEDHFhGKLhGlsMGCDsCYTNHA-ADQND.-NLhhLLusAGCNaIMGlPhuDDl.........MLNYQTTuFHDshslRplLsLRPuPEFEcWLEpMGIh...s..s.G.+LoppuG-ss................ 0 60 124 172 +6580 PF06752 E_Pc_C E_Pc_C-term; Enhancer of Polycomb C-terminus Vella Briffa B anon Pfam-B_16811 (release 10.0) Family This family represents the C-terminus of eukaryotic enhancer of polycomb proteins, which have roles in heterochromatin formation [1]. This family contains several conserved motifs. 19.70 19.70 19.80 20.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.56 0.70 -4.85 5 124 2009-09-11 20:40:39 2003-08-01 10:02:32 7 4 42 0 55 110 0 215.70 53 29.54 CHANGED uFTAEQYQQHQQQLVLMQKQQLtQLQQpQQsssSosssss......................pulVSKTLDSASAQFAASALVTo-.QLLuhKSKE-sVhtsGVNGVVsuSGsYKuLHl................TsSAlsph..s..SuuSossuPosslsoSsupoossHtlsAhu...........sAosQsLhGNNlpLoVPosluTVssVoPlssRHlsRTLusVPsSALKLAAsANs..pVPKV.TuSSolD.luRENHEsEKPALNuLAENTVAMEVT .......................uhTtEQaQpHQQQLs.MQ+QQ..LAQlQQp....Qts....ppSSppTp.p..............................shpshhSKTLDSASApFAASAlVou...hhu.thK-pssht..slNGVl.sS.G.......s.KsLa.................TshAL..............uSo.h.sus.h.posuHoshsH.l.sAhs...........ouosQsL.hNN..pLTsssplssVssluPlNs+h.sRT.u.s.s.P.oALKLA..ss..Ass...pVPKV..ssSSu..lsolsRENHEsE+.uLNsIA-sTVAMEVT...................................... 0 3 8 22 +6581 PF06753 Bradykinin Bradykinin Moxon SJ anon Pfam-B_7085 (release 10.0) Family This family consists of several bradykinin sequences. The skins of anuran amphibians, in addition to mucus glands, contain highly specialised poison glands, which, in reaction to stress or attack, exude a complex noxious cocktail of biologically active molecules. These secretions often contain a plethora of peptides among which bradykinin or structural variants have been identified [1]. 20.70 20.70 20.90 20.90 20.40 20.40 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.55 0.72 -6.57 0.72 -4.36 2 35 2009-12-03 16:03:04 2003-08-01 10:06:19 7 5 8 0 0 37 0 16.50 85 34.95 CHANGED LQRRPsGFTPFRGKFHSQS s...RPPGFSPFRGKFHSQS.. 0 0 0 0 +6582 PF06754 PhnG Phosphonate metabolism protein PhnG Moxon SJ anon Pfam-B_6667 (release 10.0) Family This family consists of several bacterial phosphonate metabolism protein PhnG sequences. In Escherichia coli, the phn operon encodes proteins responsible for the uptake and breakdown of phosphonates. The exact function of PhnG is unknown, however it is thought likely that along with six other proteins PhnG makes up the the C-P (carbon-phosphorus) lyase [1]. 25.00 25.00 26.20 26.20 23.70 22.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.69 0.71 -4.58 50 665 2009-01-15 18:05:59 2003-08-01 10:41:11 7 3 645 0 125 413 78 145.30 51 96.70 CHANGED sshssRptWMulLA+A.....ssscLsshhsslsh....PsaphLRsPEsGhVMVRGRhGGoGusFNLGEhTVTRssV+Lss.....Gp..lGauYlhGRD+p+AElAAlhDALhQpsstt.......splpppllsPLttttsspctp+AAcsAATKVDFFTM..VRGED ...........................s.pAsRQ+WMuVLA+o.....pss....ELsA+hpA.L.sl....sssYclIRAsEoGLV.lpuRMGGTGcp.FhhG-sTlTRAuVRLsD..............................GT....l..GYSaVLGRDKpHAEtsALlD.ALhQps...c.ph.......psLpcsL..IsPLcAcphsRhAs...RpAE...lsAoRVDFFTM..VRG-s.................... 0 22 66 91 +6583 PF06755 DUF1219 Protein of unknown function (DUF1219) Moxon SJ anon Pfam-B_4928 (release 10.0) Family This family consists of several hypothetical proteins which seem to be specific to the Enterobacteria Escherichia coli and Shigella flexneri. Family members are often known as YeeV proteins and are around 125 residues in length. The function of this family is unknown. 25.00 25.00 28.30 25.80 23.60 23.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.38 0.71 -4.57 8 829 2009-01-15 18:05:59 2003-08-01 10:48:13 7 4 345 0 38 369 4 110.90 74 90.28 CHANGED MpT.PssssptApsCPSPVsIWQpLLoaLL-QHYGLTLNDTPFuDEsVIpEHI-AGISLsDAVNFLVEKYsLVRIDRcGFSsppQSPhloulDILRAR+ATGLhppps..sl.........tt+ ..................MKTLPsohsRtASpCPSPV.sIWQpLLoRLLDQHYGLTLNDTPFADERVIEQHIEAGISLCDAVNFLVEKYALVRTDQPGFSsss.pSQLINSIDILRARRATGLMTRcNYRTVN.sIThG+ast..................... 0 3 8 21 +6584 PF06756 S19 S19_C-term; Chorion protein S19 C-terminal Vella Briffa B anon Pfam-B_16839 (release 10.0) Family This family represents the C-terminal region of eukaryotic chorion protein S19. In Drosophilidae, the S19 gene is known to form part of an autosomal cluster that also contains s16, s15 and s18 [1]. Note that members of this family contain a conserved PVA motif, and many contain Pfam:PF03964. 25.00 25.00 110.80 110.00 21.50 19.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.76 0.72 -4.12 6 18 2009-01-15 18:05:59 2003-08-01 11:00:52 6 2 14 0 8 21 0 76.80 66 38.58 CHANGED uuss.uGsYGGt....huPRaTVQPAGsTLLYPGQNSY+sYsSPsEYSKVlLPVRAAuPVAKLYlPEN...sYGupsGY ..............tuh..uGNYGtR.tGhhuPRWTVQPAGATLLYPGQNsYRsYVSPPEYSKVlLPVRsAuPVAKLYlPEN...pYGsQhs.......................... 0 1 1 4 +6585 PF06757 Ins_allergen_rp Insect allergen related repeat, nitrile-specifier detoxification Moxon SJ anon Pfam-B_5947 (release 10.0) Family This family exemplifies a case of novel gene evolution. The case in point is the arms-race between plants and their infective insective herbivores in the area of the glucosinolate-myrosinase system. Brassicas have developed the glucosinolate-myrosinase system as chemical defence mechanism against the insects, and consequently the insects have adapted to produce a detoxifying molecule, nitrile-specifier protein (NSP). NSP is present in the small white butterfly Pieris rapae. NSP is structurally different from and has no amino acid homology to any known detoxifying enzymes, and it appears to have arisen by a process of domain and gene duplication of a sequence of unknown function that is widespread in insect species and referred to as insect-allergen-repeat protein. Thus this family is found either as a single domain or as a multiple repeat-domain [3]. 21.60 21.60 21.60 22.70 21.00 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.95 0.71 -4.79 38 335 2009-01-15 18:05:59 2003-08-01 11:05:02 8 5 49 0 131 369 0 174.00 23 79.51 CHANGED stsLpsDhp-Fl.sLlPhcp....ltplstcYhhsDschppslpYLpos-Fpplapplhs.hs-lpshlpalc..spGlslhs.......hlNplsshl...sls.l..........pPp.hh............pstulsuhlc-llulLPhcclpALap-Khps.os-Fpthactlpo.-Fpplhppht....ssp-lppllpcL+.cpGl...Dlcpllchl ....................................shptchp-Fh.slls..hcp....l.plhtpYhhpDschppslpal.pss.p...F..tphhpplts.hP-hp.slhsalp...ppslslst...........hl..cplpph.l.....th.ph......................s.s..hp......................pspshpsals-slslLP..hsplpuL..apcK.hps.sstFpphhcslpop-acplhpslh....pscphpslhppLp.ppul...clchlhp..h.................... 0 31 38 108 +6586 PF06758 DUF1220 Repeat of unknown function (DUF1220) Moxon SJ anon Pfam-B_6292 (release 10.0) Domain \N 20.30 20.30 20.60 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -9.07 0.72 -4.17 15 961 2009-10-27 09:18:29 2003-08-01 11:14:26 8 17 26 0 206 961 0 64.40 48 40.55 CHANGED EE..-QpPssPRLs.....tEL.-sEE.EVLQDSLDcChoTsSshh-ls-SsQPYcSsFaohEEpcVuhALsVD ...................EE..-QsPssPRLS....................pELh-scEs.EVLQD...SLDcCYSTPSshhEhsDSsQP.YpSs.ha.o...LEEpcVuhuLDls................ 0 156 156 156 +6588 PF06760 DUF1221 Protein of unknown function (DUF1221) Vella Briffa B anon Pfam-B_16837 (release 10.0) Family This is a family of plant proteins, most of which are hypothetical and of unknown function. All members contain the Pfam:PF00069 domain, suggesting that they may possess kinase activity. 25.00 25.00 91.50 32.80 24.10 23.30 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.45 0.70 -5.09 5 29 2009-01-15 18:05:59 2003-08-01 13:54:25 6 3 13 0 22 30 0 205.70 50 30.18 CHANGED +DDIp.INQRQCsLLLDlauLAacoVAsElRuNLRF-EKpTKWKsLEQPLRELHRVFREGEAYVRpC..LDPK.cWWAKAIshapNTDCVEaHIHNLLsCluVVlEAIEsAGElSGhDPDEluR+RLVaSRKYDK-WpDPKLFpWRFGKpYLVo+DlCuRhDpAW+EDRWlLlptLQEKKsSuSs...sLoKpE+RLADLLhKsL.ss.p..sGKLaPSSlLLGSKD .......pD-lp.IN.RQCsLLh-hhshAapslutElRtpL+h-E+t.TKW+sLEpPLRELaRlh+-GEhYVRpC..L.-s+..s......WWu+Ahshppsp-CVEaHlHNLLhChslVlEAIEsAG.El.oG..D.-Eht.R.+RLlhucKYD.+-h.DP+LFp.a+FGKpYLloc-lssRh-tAW+EDRWlLhphlcE++tsuu........loKpE+RLADlLhtth..............GK.LaPuSlLlso....... 0 2 11 17 +6589 PF06761 IcmF-related ImcF-related; Intracellular multiplication and human macrophage-killing Vella Briffa B anon Pfam-B_3476 (release 10.0) Family This family represents a conserved region within several bacterial proteins that resemble IcmF, which has been proposed [1] to be involved in Vibrio cholerae cell surface reorganisation, resulting in increased adherence to epithelial cells and increased conjugation frequency. Note that many family members are hypothetical proteins. 20.20 20.20 20.30 21.30 19.50 20.10 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.89 0.70 -5.13 95 1440 2009-09-11 20:39:52 2003-08-01 16:13:50 7 11 892 0 268 1260 33 297.20 22 27.04 CHANGED llssLsslpphstths....sssshhh.chGLapusplssssppsYpptLpphllPtlhpplpppLpssh.....................psscthYpsL+sYLMLs.cts...c.hcsshlpsahtp....pWp.pphsss....ht....pptLttHLstlh.............pts.tshshsssLlppARphLsph....shspRlYpplcpph...spthschsLsctsGss..........sstlF...........pppsspshtt.s..lPGlaTtpGacphFhsplsphspphtp-p.WVL.Gpt....t...sthsssshp........pLtpclpphYhsDYsspWcshLsslclt......shss........lspuhphLpsLuus.sSPLhpLlpslscpTpL .............................................................hLs.lpp.h.t.tht..h..ctsshhh.chGLYpuppltststpsYhphL...pph...hLPslhppltptLpss.....................................................................ssscpthssL+sYlMLs...c......p........p..hs...s.thlp...pahtp..............pWp..p..phssp..............ht....pptlhtHlpthh..............................pts.....h.p.sp...plltps+p..Ltph....shtpRlYpp.lttp.h......st.............t.s....shsLtchl.Gss..........hstlF..............tptss.............lPshaT+pGapphh.tphpp.hsp.tttt-s.WVLsptt.............p...sh.sps.shp.................plppplpphYhsDYsspWpshlsslplp........shss........lspshp.hLphlsss.suPlhpLhpsltcpTp..................................................................... 0 40 100 180 +6590 PF06762 LMF1 DUF1222; Lipase maturation factor Vella Briffa B, Eberhardt R anon Pfam-B_3454 (release 10.0) Family This family of transmembrane proteins includes the lipase maturation factor, LMF1. Lipoprotein lipase and hepatic lipase require LMF1 to fold into their active states [1,2]. The precise role of LMF1 in lipase folding has yet to be determined [3]. 29.30 29.30 29.90 32.70 28.90 29.20 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.42 0.70 -5.67 25 402 2012-10-02 13:32:46 2003-08-01 16:39:26 9 7 240 0 234 358 72 338.00 35 69.50 CHANGED auFtW-uLLLEsGFLulFLuPs................psssstlslaLhRWLlFRlhFGuGllKl.R........ssssW+-LTuLpYHaETQPhPsPlSaahHpLPchh++hpssusahsElllPaLlFsP.p.lphhuuslhlshQlhllloGNauFhNaLTllL...................shuslD.......DshLshl.hst......................................................................................................................................p.ss.t..........................................................................sss.sshhhhhlslhlslhlhsLSh........hslhshlstc......Qh..............................................................................................................................................................................Mppsh.ssa+lVNoYGuFuslT..+pR.ElllEGot-st.......ssWc.....................................EYEF+sKPGslpR........hPphluP.YH.RLDWhMWFAAhsstp...............................................tssWhhsFlt+LLps-ps.......sLsLLtcs....PF.sspP.......P.palRAphY..cY+Fosh.pchtpsGt........WWpRphlt.pahPslsLss ..................................................................................................................................................................h.FtW-shLLEsGFLslhl.us........................ths...ss.hlshh.hhRWLlFRl.hu.....u.GllKh.p........ts.sWh..sLTsh....aHaETQPhPsPhuaahH.p.h...Ph..h++hpshus...ahhpl..slPhh.hFhP..............p..ht..........hhuhhh.llhQl..hllhoGNasahNhLTlll...................shuhhD.......st.h...thh..hs..................................................................................................................................................................................................................................................................................................................................................................................................................................................h...s.hhht.hls.hhhthh..lhhlsh............ss..hshhu.p................p...................................................................................................................................................................................................................................................................h..ptsh.s.hplsNoYGh.Ftphs..................ptR.ElllEGotss..t................s.Wp.....................................-YEFhhKPG.s.pR........hP....huP.ap.RLD.W.MWFAAh.ssh........................................................ttsWhhthh.+LLp...sc.t.......sl.pLltp..s.....PF..p.pP..........P.palRuphYpYpFop....tt...tptt.......................WWhRphh.t.ahPsht...p....................................................................................................... 0 97 133 191 +6591 PF06763 Minor_tail_Z Prophage minor tail protein Z (GPZ) Moxon SJ anon Pfam-B_6085 (release 10.0) Family This family consists of several prophage minor tail protein Z like sequences from Escherichia coli, Salmonella typhimurium and Lambda-like bacteriophages. 20.00 20.00 20.10 20.00 19.80 19.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.25 0.71 -4.65 8 914 2012-10-01 22:58:23 2003-08-04 11:55:40 6 3 408 0 33 448 7 176.00 56 97.65 CHANGED lKGLEpAIcNLsulD+phVPpASAhAlNRVAppAlStosppVA+ET........ulPhKLV+pRsRLp+Aosc.KspA+I+VNRGNLPAIKLGoApVRLS+R.....G.tu.h.htGSVL+lGpaRFpcAFIQQLsNGRWHVMcRlsG..............KsRYPIDVVKIPlAuPLTpAF-pphcRlhcp-hPKpLtaALppQLRLhLpR ...................................................................................lKGL-pAlcNLspls..+pAVPtAoAhAINRVAssAIupousQVARET........tV..RKLV+cRsR..L...p.+Aosc....pspAR.IhV.N..RG...sLPsI.K..LGps.....hhs.pR..............G......ptss.Sl..Lh.l.Gp+.R.h.sAFI.Q..p...L.t.NG....R.WHVMQRlsG......................KNRYP.I.DV....V.K...IP.hAs..P..L.spA..Fcpsh-...RI...h+EcLPKpLuYALppQLRhslKR....................................................................................................... 0 6 13 21 +6592 PF06764 DUF1223 Protein of unknown function (DUF1223) Moxon SJ anon Pfam-B_6655 (release 10.0) Family This family consists of several hypothetical proteins of around 250 residues in length which are found in both plants and bacteria. The function of this family is unknown. 26.80 26.80 27.00 30.40 25.70 26.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.31 0.71 -4.50 72 362 2009-01-15 18:05:59 2003-08-04 12:34:50 6 3 322 1 146 334 61 196.70 33 76.90 CHANGED VVELFTSQGCSSCPPADphLuc...Lupc.............s.sVlsLuhHVDYWDYlGW+DsFupstaTpRQ+uYuptt..stptlYTPQhllsGppphsGsctspltstlptttst....htlslphs..ts.t.plplpssssssht.........hplhlshass.ttssplp+GENtG+plsasNlVpshptl...utWsG.ts.shphsh....shsstts............tssVhlQp ...................VVELaTSQGCoSCP.PADphluclusc..............s.sVlsLuaHVDYWDY.lGW+DsaAsppaTpRQpuYs....p..th..stctlYTPQhllsGpsp........hsGsstssltstlsssttp.....sslslphp..tps...s..plslslssst.tst................tshclhlshaps.shssp.lpcGENtG+slsapplVpshppl...uhWsG....tshph.p.lsh..s...tt.ts...........usslhlQ.......................................... 0 36 85 111 +6594 PF06766 Hydrophobin_2 Fungal hydrophobin Vella Briffa B anon Pfam-B_3587 (release 10.0) Family This is a family of fungal hydrophobins that seems to be restricted to ascomycetes. These are small, moderately hydrophobic extracellular proteins that have eight cysteine residues arranged in a strictly conserved motif. Hydrophobins are generally found on the outer surface of conidia and of the hyphal wall, and may be involved in mediating contact and communication between the fungus and its environment [1]. Note that some family members contain multiple copies. 21.20 21.20 21.40 26.10 20.90 20.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.96 0.72 -4.29 45 136 2009-01-15 18:05:59 2003-08-04 13:34:33 6 3 55 24 75 116 0 64.80 45 56.32 CHANGED hCP.uG.Laus.PQCCuosVLGlAsLDCpsPsssssssssFpshCAut.GppspCCslP.lsGQulLCpsP ..............Cs.uG..Laus.PQCCuTDVLG.lAsLDCtsPspssssussFpshCAsh.Gp.p.ApCCslP.lh....GQulLCpss............. 1 9 33 59 +6595 PF06767 Sif Sif protein Moxon SJ anon Pfam-B_7884 (release 10.0) Family This family consists of several SifA and SifB and SseJ proteins which seem to be specific to the Salmonella species. SifA, SifB and SseJ have been demonstrated to localise to the Salmonella-containing vacuole (SCV) and to Salmonella-induced filaments (Sifs). Trafficking of SseJ and SifB away from the SCV requires the SPI-2 effector SifA. SseJ trafficking away from the SCV along Sifs is unnecessary for its virulence function [1]. 27.60 27.60 28.20 28.40 26.90 27.50 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.16 0.70 -5.67 4 347 2009-01-15 18:05:59 2003-08-04 13:38:12 6 5 122 2 7 127 0 249.90 46 72.77 CHANGED MPITIGNGaLKSEIhIsPPupT+EsWWKlLWE+lKDaFFSTG+AKADShlHEMLFuDsPPTRERLs-IFFELKALACASH+DRFQVYNPHEsDuTIIaRIhDENtcsELLRITQsTDTFSCclMGcsYFLh+-pPslLKSaPQMThTINKRYSElV-.sLPSTLCLpLAGsPhLSVPLcNI.tYLYSEhcKsNLDEWKsQEKssYLAsKIpSGIEKsh+hLpHANISESTQQpAFLETMoMCGLKslETsPP.THIPI.KhVcEVLLADKtFpsFLsoDsssSQSMLAEIIEsISDpVF+ALFRhDPQAIQKMAEEQLTTLHlRuppQpGs.LCCFL ...................MPITIG.pGaLKSEIh...opssp...soKES...a...a...pl.LWEKIK...DFFFoTt+AcADpCI+ELh.a.spp.sPTspRL..p-lFhcL+ELASsSCR-pFpl.s.casDspI..Ihp..h.D.pN.sEN...LpIh.ppDta.h-lMsphhhhhcs.psshLK.a....sphshhhpphhs....E.lsh.hPppL.Ls.uGu.h...hsVsLcNIcthLhs.hcKGpLstWKtQE+hshluu+Ip.GItps......ssIs-uhppph.hhcsht.ssLKphth.ssasp.slpphV.psLhtschh.shL.pss...Sts..hLs-lhEhlu.pVapulFp.s.p...................................pp....................... 0 0 0 3 +6597 PF06769 Plasmid_Txe DUF1224; Plasmid encoded toxin Txe Moxon SJ, Mistry J anon Pfam-B_7662 (release 10.0) Family The plasmid encoded Axe-Txe proteins in Enterococcus faecium act as an antitoxin-toxin pair. When the plasmid is lost, the antitoxin (Axe) is degraded relatively quickly by host enzymes. This allows the toxin to interact with its intracellular target, thus killing the cell or impeding cell growth [1]. This family contains many hypothetical proteins. This domain forms complexes with Axe antitoxins containing Pfam:PF02604. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.69 0.72 -4.16 13 1544 2012-10-03 00:18:00 2003-08-04 13:57:46 8 6 1142 20 262 983 101 78.40 41 89.94 CHANGED FTp-uW-DYhaWQpsD+Khlc+IN+LIc-spRs.PFcGIGKPEPLKtcLoGaWSRRIscEHRLVYhls....DsplsllusRYHY ...................................as.puapD.Yh.aW..p.p.p.D.+.+.h.l.K+Isc....LIc-.lp...R.s......P..a.p......G...h..G.KPEsL...K.t.s.Ls..G.h...WS.RRIs.cc.HRLVY.p..V..s.....-..c...p...l..h..I..h..us+h.HY..................................... 0 87 185 230 +6598 PF06770 Arif-1 Actin-rearrangement-inducing factor (Arif-1) Moxon SJ anon Pfam-B_6086 (release 10.0) Family This family consists of several Nucleopolyhedrovirus actin-rearrangement-inducing factor (Arif-1) proteins. In response to Autographa californica multicapsid nuclear polyhedrosis virus (AcMNPV) infection, a sequential rearrangement of the actin cytoskeleton occurs this is induced by Arif-1 [1]. Arif-1 is tyrosine phosphorylated and is located at the plasma membrane as a component of the actin rearrangement-inducing complex [2]. 21.10 21.10 22.30 22.00 20.70 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.80 0.71 -11.43 0.71 -4.94 17 37 2009-01-15 18:05:59 2003-08-04 14:04:58 6 1 34 0 1 39 0 193.90 30 63.07 CHANGED hsplhhhV...........hulhGslsp+aAlLl-hE.sspuVhNhShl.shlaGshlhhssshshhthh......tchp.........pshYhpshlsllshhshlphhlFlsh.s.lhc.GHlPsLDshhRcYDpcSlCWsGls......htDsNul..........sp..NChhh........shh..hhCltCRhEhhpcEPThhpppphslhhhslhllllpsashal.hcchpp .........hhhh......h....hshhshlhulhGhhsscaAlLl-hE.sspuVhNhSsh.hhsaGhhlhhsshhsl..hh...............sphp......pshYhthhhhlhsslshlphhlalsh.shlhc.GHlPsLDVhhRcYDh-uhCWsGIV.....hhDsNsI.........hsp..NChhh........shhhhCstCRt.hhpsEsThhpppphslhhhlhhlhslpsasLah.hpchh.t.......... 2 1 1 1 +6599 PF06771 Desmo_N 1111; Desmo_N-term; Viral Desmoplakin N-terminus Vella Briffa B anon Pfam-B_3693 (release 10.0) Family This family represents the N-terminus of viral desmoplakin. Desmoplakin is a component of mature desmosomes, which are the main adhesive junctions in epithelia and cardiac muscle. Desmoplakin is also essential for the maturation of adherens junctions [1]. Note that many family members are hypothetical. 25.00 25.00 74.70 73.50 23.40 18.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.99 0.72 -4.06 20 59 2009-01-15 18:05:59 2003-08-04 14:20:02 6 1 52 0 0 60 0 86.30 44 11.83 CHANGED +Y+GsDVs....spTVpNLL+TIsohSpps+s..ssss-hlp+IRsIIlhaRPsLpp.p.scLs.sscLllcuLps..sss.......+pI...THNaNYKYDYN ....+YtssDVs....spTVpsLLpTINoMSpRsKs..sss-chLp+IRsIIlhaRPpLps.+.hDLp.lPpLlhpuLts..sss.......ppI...THNaNYKYDYN... 0 0 0 0 +6600 PF06772 LtrA Bacterial low temperature requirement A protein (LtrA) Moxon SJ anon Pfam-B_8368 (release 10.0) Family This family consists of several bacteria specific low temperature requirement A (LtrA) protein sequences which have been found to be essential for growth at low temperatures in Listeria monocytogenes [1]. 21.60 21.60 21.70 21.90 21.30 21.50 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.01 0.70 -5.54 9 969 2009-01-15 18:05:59 2003-08-04 14:56:32 6 11 713 0 309 814 19 321.40 22 81.25 CHANGED pc+so.hELFFDLlFVhAlsQlo+tLhpshshstus.....phllhhhulWWsWhppoahsNhhss-ph.sRlhhhs.MhhsllLusulstsF.....psps.hhFsluYshMplupul.aLhAs+.ttshusppshhplushlhluulhWlsuuLh...tstRhhlahlulsl-hhsPh.......hstpshcph.lchtHlAERhuLhsIIsLGEol....lslssshhphshshtshhhhh.uFlsslshWhlYFtpstptss++tpssu.......hhasYuHl.IsuuIhlhuVu.-hslppshscshhtthh.....huusllFLhGhhhhptshtttthss+hlshshL......hLhsshLsshhlpshssllhlsVAhh.hss ....................................................................................pps.shhELFFDLlFV..h....Al....s....p..hsph...lhpt.st.h.h...........pallhhhhlWhhWhtpohasNta..sp.s.....s...h...h..hh...h...h...hhp...M...hh..hl...hh....u...s...u......l.....s.........s..a................................ppph......hh.F...h....l.........s....h..s..h.h..pls.h.sh.p.a......l...h.t....h...p......hs..s.t..c....t...s..h.....h..t....hs..h..h.h...h.l.s..s.h..h....h..h........hu..h....h...h..s.........tth....p.......h....h...h..h...h..l...u..l...h...l.p...h..h.s.sh.....................................ht..p..h..h....t...h.h......s...l...p.h....Hls..ERhuLhsIIhh....G...Esl...................l....u.ls...s....s.....h....h.......t........h.....s.....h...............h.....h.....h.....h.....h.......th.....h.....l.....hhs......lahh...Y.a.....s.....p...p.h..h.s..c..p.h.p.p..ts...............hhh.hY.s.H.h.lh.........hu.l..hh...hs.su.h.t...h...h...........t.......t........h......t.....t....h............h............h...h........................hhu.hh...l.a..hh....u..hh...hhh....h.......h..t...h...........p..hh....hh...........hhhh...h.l..............h......h...........................h.................................................................................................................... 0 85 174 256 +6601 PF06773 Bim_N Bim protein N-terminus Moxon SJ anon Pfam-B_8427 (release 10.0) Family This family represents the N-terminal region of several mammal specific Bim proteins. The Bim protein is one of the BH3-only proteins, members of the Bcl-2 family that have only one of the Bcl-2 homology regions, BH3. BH3-only proteins are essential initiators of apoptotic cell death [1]. 19.10 19.10 19.90 44.30 18.40 16.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.97 0.72 -4.21 2 63 2009-01-15 18:05:59 2003-08-04 15:02:05 6 2 30 0 17 67 0 37.00 90 23.84 CHANGED QPSslSs-Csp.EGGQLQsspR..phRP...GAPTSLpo.. QPSDVSSECDR.EGtQLQPAE...R..PPQLRP...GAPTSLQTEP. 0 1 1 4 +6603 PF06775 Seipin DUF1226; Putative adipose-regulatory protein (Seipin) Moxon SJ, Coggill P anon Pfam-B_8703 (release 10.0) Family Seipin is a protein of approximately 400 residues, in humans, which is the product of a gene homologous to the murine guanine nucleotide-binding protein (G protein) gamma-3 linked gene. This gene is implicated in the regulation of body fat distribution and insulin resistance and particularly in the auto-immune disease Berardinelli-Seip congenital lipodystrophy type 2. Seipin has no similarity with other known proteins or consensus motifs that might predict its function, but it is predicted to contain two transmembrane domains at residues 28-49 and 237-258, in human, and a third transmembrane domain might be present at residues 155-173. Seipin may also be implicated in Silver spastic paraplegia syndrome and distal hereditary motor neuropathy type V [1]. 19.20 19.20 19.70 19.20 19.10 18.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.08 0.71 -4.80 30 347 2009-01-15 18:05:59 2003-08-04 15:05:20 9 9 225 0 212 337 0 188.80 26 50.23 CHANGED shhhllhlu....lhhY..........ssaYYsalPp..lshpcslahpassss...................PhAplsls.........................tphlhs....sQsYclslcLchP-SshNh..plGhFhVslsh...............................hopsups..................ls.............posRsshLpYcSshlchhpolhhhPhalhGhp.......cEpQplpVphhp..ca.cs............tsPssu.................................................lplplps+p........hQlYsA.pLplpAchsGlRalhYsa.lhShlVhsssha .................................................................s..hhlhhhu....hhha.h.hY.htahPp...lth.pslahp....appsp..............................PhApssls......................................................................................................................tphlhh........sQsY..clslpLph.PcSshNt......plGh...Fhl.shph...........................................................................hstssph..........................................ls............posRssh........LpYc..Sshlphhps.hhh...s.....hhlhGht.......pppp.hlplphhp....ph...p...t..........................ssss.................................................hhlplpspt................hpl.YsA.plplpAchsGl.R........hlhapa.hhoh.hlhh...................................................................................................... 1 72 124 175 +6604 PF06776 IalB Invasion associated locus B (IalB) protein Moxon SJ anon Pfam-B_3703 (release 10.0) Family This family consists of several invasion associated locus B (IalB) proteins and related sequences. IalB is known to be a major virulence factor in Bartonella bacilliformis where it was shown to have a direct role in human erythrocyte parasitism. IalB is upregulated in response to environmental cues signaling vector-to-host transmission. Such environmental cues would include, but not be limited to, temperature, pH, oxidative stress, and haemin limitation. It is also thought that IalB would aide B. bacilliformis survival under stress-inducing environmental conditions [1]. The role of this protein in other bacterial species is unknown. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.30 84 752 2009-01-15 18:05:59 2003-08-04 15:18:53 7 2 278 12 212 542 263 127.70 25 66.93 CHANGED ausWsltCtpstt..spp............CplhQ.ltsp.psphlhphslhphss.....t.sssh..hplhlPhGhhLssGlslplD......ssp.stp..hsashChs.....sGChApls.lssshlsth+...pGspsslshhssss........ps.lslslo.L....pGFssAhcsls ............................................................................................tsWphhCsptt.....spp...............CthhQplhs....p......p.s...ph..s.hs..h.slhphts................t.tphhhhl.hhPhGhhlssGltlplD.................stp.hhp.....htas.hChs.....suChApss.lss.cllssl+...pG.p.s.hslphhssss........ps..lshsls.L......pG.FutAhssh................ 0 35 108 144 +6605 PF06777 DUF1227 Protein of unknown function (DUF1227) Vella Briffa B anon Pfam-B_3660 (release 10.0) Family This family represents a conserved region within a number of eukaryotic DNA repair helicases (EC:3.6.1.-). 20.30 20.30 21.20 22.70 19.10 18.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.68 0.71 -4.50 30 339 2009-01-15 18:05:59 2003-08-04 15:21:32 6 14 283 0 228 329 6 141.90 50 19.11 CHANGED pplpclKpsDpp+Lp-EYc+LVcGLcpus..scp.t-.hhss....PVLP--...lL.........cEAV.PGNIR+AEHFluFL+RhlEYLKsRl.......+lp+VlsE.......................oPhoFLp+lcc.................pstI-pKsLRFCuERLpsLlpTLcIsclc-assLphlAsFATLVuTYpcG ..............................p.plpchKpsDtp+LpsEYp+LV-GL......+pAs....s...Rp....pDthhuN..............PlLP-D...lL.............pE.AV.PGNIRpAEHFluFL+RhlEYLKs.Rh.........+VpHVlpE..................................oPsuFLpclpp.................hshI-+KPLRFCuERLpSLl+TLE.....ls....-.lpD.assLp.lAsFATLVuTYp+G................................................................................ 0 80 129 194 +6606 PF06778 Chlor_dismutase Chlorite dismutase Vella Briffa B anon Pfam-B_3770 (release 10.0) Family This family contains chlorite dismutase enzymes of bacterial and archaeal origin. This enzyme catalyses the disproportionation of chlorite into chloride and oxygen [1]. Note that many family members are hypothetical proteins. 25.00 25.00 26.10 25.80 23.90 23.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.27 0.71 -4.60 15 1117 2012-10-02 00:20:33 2003-08-04 15:31:48 7 6 979 83 232 766 77 187.80 35 69.25 CHANGED --RpuhlsEhpsh.hpph...pcptcus.tslYslsGh+AD..lhlahhppsh-cLpplpscFp+op.luchshssaShlSlhc.SsYhs..............psclpupLhsclP...t+YlshYPMsKsh....sWYhLPhE-RcclMc-HGhhu+sassc..V+phhosShGlsDaEWsVsa-ucDltpapclVpchRhsEApp+hsc.tsPFhlG ........................................................................................pRtthhp-htphhpph.........tpp.ppss.puhYslsG.+AD..lhlahht......shcp.Lpph.scF.p+op....lu..ca.h.P..saShVulhc.upYht..................................sal..ApLhs.chP.....sca..lshYPhsKpt.....sWYhLs.EERp+hht...-...H...GhhG.Rp.Yss......V+p.hsuShuh..s...D...aE...Whlsh.u..-..-.lh..p..hhclhh-hRhsEspt.+.ht-..ssFasG................. 0 88 169 216 +6607 PF06779 DUF1228 Protein of unknown function (DUF1228) Vella Briffa B anon Pfam-B_5646 (release 10.0) Family This family represents the N-terminus of several putative bacterial membrane proteins, which may be sugar transporters. Note that many family members are hypothetical proteins. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.84 0.72 -3.96 12 952 2012-10-03 03:33:39 2003-08-04 16:37:05 9 2 879 0 193 2042 466 85.10 42 22.01 CHANGED sluMGlGRFsaTPlLPsMht-tthohusuualAoANYhGYLsGAlhAuhuhtt..up.RhhlhuGhhAoslLhhuMuhhsuhhshhl ................................luMGlGRFhYT.....P..hLPlMh....A.....-....s.....t..h.....o..h..sphua.lASuNY.sGYLsG.u.Ll.hu.hu...thc.........sc.h.+...h.L...h...s...uh...l.A...o..u..l..L.h..Lu.M.A.h.h.sshhlh...................................... 0 27 80 135 +6608 PF06780 Erp_C Erp_C-term; Erp protein C-terminus Vella Briffa B anon Pfam-B_4561 (release 10.0) Family This family represents the C-terminus of bacterial Erp proteins that seem to be specific to Borrelia burgdorferi (a causative agent of Lyme disease). Borrelia Erp proteins are particularly heterogeneous, which might enable them to interact with a wide variety of host components [1]. 22.80 22.80 22.90 28.60 22.50 22.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.10 0.71 -4.35 12 100 2009-12-15 09:40:32 2003-08-04 17:05:26 6 2 23 0 5 82 0 136.90 47 40.69 CHANGED I+sLscKIDcINpDIDuIpspoo.........Vuuc-VhDKITGPlYDcFTDss...suIYpsW..ssLEp-.E-ptLt+LLcELp-sRssLRoKLNEuNp.h....htpsEPpLK-sVsVS-IKpDLEKLKSpLE-VKcYLcspsNFEEIKtYlpsS ......IcslscKIDcIsp-IDuIptpos............VuucEVcDKlTGPlYDchTDus......sSIYssW....ssLE.-.E-ptL..tcLl.cELp-sRssLRoKlpEu...h.....hpscsphK.-sV...pVu-IKpDLEKLKStLEcVKcYLcspsNFE-IKthlps...... 0 5 5 5 +6609 PF06781 UPF0233 Uncharacterised protein family (UPF0233) Moxon SJ anon SWISS-PROT Family \N 22.10 22.10 22.20 23.40 22.00 22.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.39 0.72 -4.36 23 435 2009-01-15 18:05:59 2003-08-05 10:43:42 7 2 428 0 115 274 249 89.00 38 89.48 CHANGED MP+SKsR.p+ssp....sspssscsssphp..tssPsWasslMhuLMllGLlWllVaYlossp........hslssLG..sWNlslGFGlhllGhLMThRWR .................................................................................MPcSKlR+pssh........psss...s..pR.ss.+.hps...sssssWasslhluLhLlGLlWLlVaYluusp............l..hhssLG...sW.NhsIGFulhhsGhlhohtW+...... 0 36 85 106 +6610 PF06782 UPF0236 Uncharacterised protein family (UPF0236) Moxon SJ anon SWISS-PROT Family \N 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 470 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.49 0.70 -6.07 4 519 2012-10-03 01:22:09 2003-08-05 10:45:30 6 3 191 0 142 510 9 310.10 18 93.00 CHANGED MK.....pll...s.s.htp.h.h.EpLhcsshphpEL.pplhcplsKlhtElltthLEElDphltEs.RcKp+ahlccKc.p+sLhThhGsloFcRsYYhs+...E-G+a..saLLDcsLthscppRlocslchcsVEtAVp.sSYR+uucpltp.ss..hlS+pAl+phlhEss........ph..p.spp++VRhLYIEADt.ahuhQ...............ctputps+LlhIHEG...cp.susc.cLlNp+aahs...EuscDhWtclpcal.ppYch-shcc.lhINGDuAsWI+EGhp.ahs+upa.LDRFHLs+.lh+shS+pP+h+Ecsp....+tlpcsDccGlhtllpchhsstc-EpccccIpchh+hIcs..puI..R.YRc.........pGlpuhuAttplschaSuRlpphshuWScpGL+tMh+lhshphsGsshtclchsc+p....s.hthspcpItpApc+lppphs-plppth.slppG+hs.Iapsh.uL+.uthl .........................................................................................................................................................................................................................h............................................................hh..hlpth...Dt..l.h........pp.............c...t...tt.aph......ppp...........+sl...hhhG.plpap.R..p..h.a..hpt.....ptt..p..........a..lDp.hth.t.h.tphs...h....hhp..u......sac.pss...h.............lot.tlhphl..ht.......................................t..h...........l...hlE...sDt.hl.........................t....t....t..................t...h......hh..p................................t..h.t............h................t............th......th.t.hh..pta.t.....t......lhh.uDuu..........h.....h.p.....t.h.....h.t...........h..........pt........a.....h.D.aHh.p.l........phh..t.................t....h.....t.hh......phl........p.....t.h...h.p.p....h...t................t...........t.....t.........p.h..t.hhphl.p..........h..................................h...hu.....t.....p...p..h.h..s...Rhptt.h.Wp.tGhp.h.thh...h....p..t................................................................................................hh.............................................................. 0 43 92 104 +6611 PF06783 UPF0239 Uncharacterised protein family (UPF0239) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 47.30 46.50 24.60 24.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.76 0.72 -4.06 2 75 2009-09-10 19:00:48 2003-08-05 10:46:39 6 1 65 0 48 74 0 82.80 48 94.00 CHANGED .u.shshS.schs-.shhp.LlRYGLalGAlFQhlCl.Ahll......ps.s.sSpPcouEVTc+.ttsV.oh.++.+K.pKKKR. ..........MuuplshSPPElPEPTahEsLLRYGLFlGAIFQLlClLAIIl..P........h.sK.Sc...c.s...-...s.....p.sS...-s...+...ou.Ess+.K...sK...s...s.ssphp.++sKKEsKKKR............. 0 7 11 30 +6612 PF06784 UPF0240 Uncharacterised protein family (UPF0240) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 25.70 25.30 24.70 24.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.07 0.71 -4.59 8 106 2009-01-15 18:05:59 2003-08-05 10:47:43 6 1 87 0 68 102 0 162.80 34 90.81 CHANGED MG...uhluRtlRpFNlENRAc+hIS+pKPsPAPKHPSTpcsLccplopaP-lhcEls+KDspLLopLK-VYVsSpDshs.......pscuspspspsK.h+Ls+....-apashh-scslPKGKlolhEALpLlNNH+hpP-pWTAcKIApEY+LcppsVssLL+YFhTFEV+I.Pscs..K+slpo+ .........................................................MG...uhlsRhh+sFNlE...sRAcR.I.u+.K.....P..psAP+asos..p.h....pcp.hp.pPchtccls.pKDs.pL.hsh...LKcV.YVsSpDPss...........t.t..s.css.t..sp..p.p..+.hpl.s+................shpashh..p...pplP+...G+lolhpALp.......lLs..sHp......hpPppWT..A-KIApEY....pLc.....ccVpslLcYF.hsFpVhl...Pspp...pp................................ 0 20 25 45 +6613 PF06785 UPF0242 Uncharacterised protein family (UPF0242) Moxon SJ anon SWISS-PROT Family \N 24.30 24.30 24.60 25.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.34 0.70 -5.79 3 41 2009-01-15 18:05:59 2003-08-05 10:49:24 6 1 40 0 7 21 0 372.60 60 90.66 CHANGED MLhVKKhlHsC.uRYapYLhPVVAlLLPLVCaPFLSpSQKhYGYFVFolISSLGWFFAIGRRE+QLKTAAGQLLQTKIRKLTEpDEGLRpIRESlEERQpESsQL+lQNQKLlNQLhHlRGVFlKTKG-hQKLEsLltHL+EENQCLQlQLDALlQECsEKpEEsQELNRELAETLAYQQsLNDEYQATFoEQHNMLDKRQlYIGKLEuKVQDLMCEIRNLLQLESsIsENLPu+sl.AsSp-lstQLlSELKKIAFKsENIEAASSLTASRYlRTDoSVHNYSLECRQLFDuLREENLGMLFVYAPQSQRAVFANALFKTWTGYGlEDFLKh-SDVVISGhsQWccDL+o.SRpERSGKIVIKTKu+GplPFYYCLssLNKGPLspHVLGVLYPl+t-sLpu ..........................hhhsKphhH.h.uRYapYL.PllAlLLPlssaPFLS-pQhhYu.FlFsllSSLGWhFAIGhRE+QL+sAAGQLLpsKIRKlTEpDEGL+pIREolEcRQpEoppL+lpNpKLlpQLhpsRp.VFhpsKG+hp+hEpLs++L+EENQpLQlQL-Ahs+EpsEK..EsQEL.pELpEsLAYQQpLpDEYQATFsEQHsMLDKRQsYIGpLEuKVQDLMCElRNLLQLE....utpp.NLPucss.....uSp-Vs.uQLl..Eh+KIVF+VEshEAAsSLTA.RYhRoDs......SsHNY.....SLsCRQLFDuLREENLGMLFlYAPhuQRlLFANuLFpsWTGYGlEDFLpc-SDVVlEGhuQWEcDLh.op.S.RsERSGKIVIKTKsaGshPFYYClssLsKGPFspHlLGVLYPA+hshhp............................ 0 3 4 6 +6614 PF06786 UPF0253 Uncharacterised protein family (UPF0253) Moxon SJ anon SWISS-PROT Family \N 21.60 21.60 21.70 46.10 21.40 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.12 0.72 -3.86 8 619 2009-01-15 18:05:59 2003-08-05 10:50:18 7 1 618 0 44 123 2 66.00 80 98.68 CHANGED MphY..CEhlRchYupIGSGDtGYlPcAIsCAlKsLN-IAuD-uLPpcVREcAAaAAANLLlSDacDc ....MEKY..C.ELlRKRYAEIASGDLGYVPDALGCVLKVLNEhAAD-ALSEuVREKAAYAAANLLVSDYVNE.. 1 3 11 26 +6615 PF06787 UPF0254 Uncharacterised protein family (UPF0254) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 109.70 109.30 22.50 20.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.92 0.71 -4.71 11 29 2009-01-15 18:05:59 2003-08-05 10:51:15 6 1 29 0 20 29 1 161.60 46 94.29 CHANGED MIpVATAECFTHGKIGpcIHthApGY............chh.cashsh.p....hsVhVlAuhFIPolpGlcoLLshc.sPcP..chshc.hsKsYsEEpDhcVAphMAcAlKchhssDIuIGTTAGIG+GuIsIlo-cpphlhoSDlauDLhps..EpIhpRQcsGIp+ulchFhplLp .MIoVATAECFTHG+IGhcIHthAsGY.....................ch...ch.hs..t.........tslpVluuhFIPolpulcolLslc..PEP..Dhshc.hsKsYsE-pDhcVAhlMAculKchhssDIuIGTTAGlGRGuIsIlT-csphlhTSDVau.DLlps..-sIhcRQcsGIcKulchhlclLp. 1 5 10 16 +6616 PF06788 UPF0257 Uncharacterised protein family (UPF0257) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 26.00 25.10 24.50 22.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.59 0.70 -5.13 5 467 2009-01-15 18:05:59 2003-08-05 10:53:10 8 1 451 0 19 170 1 225.00 80 98.48 CHANGED VKKhlLlshLsllLsGCDNscuLlSFTPEMASFSNEFDFDPLRGPVKDFTQTLMNEpGEVoKRVoGTLSQEGCFDTLELHDLENNTtLALVLDANYYRDAEThEKRVRLQGKCQLAELPSAGVoWETDDNGFVVSAoGKEMcVpYRYDuEGYPLGKTTKosDpTLSVsATPSsDPRKKLDYTAVSLLN-+plGNVKQSCEYDsYANPVsCpLlIVDESVKPAVpR+YTIKNoIDYY ..........................hKh.LL.sLLshhLsGCDpscsh.SFTPEMASFSNEFDFDPLRGPVKDFTQTLMDEQGEVTKRVSGTLSEEGCFDSLELLDLEN...NT...l....VA.....LVL.DAN..YY..RDAETL.EKRVRL..QGKCQ.L.A.E.LPS.AG....V.S.W.ETDD.NGFV...IK...A..SSKQ...MQ.M...E...YRYDD.QGYPLGKT..TKSND..K..T.L..S.V.S.A..TP.STDPI.KKLDYTAVTLLNNQRVGN....VKQSC.EYDuHANPVDCQLIIV.D.EGVKPAVERVYTIKNTIDYY................. 0 1 2 10 +6617 PF06789 UPF0258 Uncharacterised protein family (UPF0258) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 65.00 64.80 21.00 18.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.92 0.71 -4.18 3 82 2009-01-15 18:05:59 2003-08-05 10:54:07 7 1 37 0 53 66 0 142.50 52 26.69 CHANGED KDW......p+ccKcupsQtDpPGosusshpsKuusLVEQVFssHlaPQ.......uLoS+hKlNPLass.RhsEht-sGRGRPSWTl-DY...........A+spGccuRlTPL.DLQTQESL.......NPNNLEYWMEDIYTPGYDALLRRKEApLRRs+VCKlhALIsAAVsTlILVlVIPICTl+S ..........................................................Qh-....tt..hp......pps..h.l-pshssc.hPs.........SLpotMKsNPLYsDhRLsEhhE.++spPSWTIEEY.....................s+pu.pps+.sth.sLpsp.p........NPNsLcaWh-DlYTPGYDoLLK+KEsch+RuKlC+lhuLIhhussslILVllVsIsTh........ 0 2 6 19 +6618 PF06790 UPF0259 Uncharacterised protein family (UPF0259) Moxon SJ anon SWISS-PROT Family \N 27.90 27.90 27.90 28.20 27.80 27.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.69 0.70 -4.93 6 639 2009-09-11 20:38:19 2003-08-05 10:55:36 6 2 623 0 78 301 78 229.20 59 95.61 CHANGED MsITspsLh+DTapFhpspIhsIlhlSlLuAFIollIshlhpPsstpLhslhp...shchpuopS.lh-llpsMol-QpplLL+hSlsppFSuLIGsshLlGulITlIshlSptK+h.SlhpuIshsh.hlP+LhlLhFlTThllQlGhhLhllPGIhluIlLSLSPIILshc+hslhsSI+hShpIoapNl+lluPullhWlssKhlLlhlhSpFslls.pluhlIhNhhhN.llouILIIYLFRhYML....LRs .....................................................MSITApSVYRDoGNFFRNQFh....TIL..LlSLLCAFIT..VVLGH.s.FSPS..DAQlAQLs-....G.sl..u..GS..uG....LF-.L....VQNMoPEQQQILLpASAAS....TFSGLIG...NAILAGGlILl..IQLVSAG....pRV..SALRAIGA.SAP..l.LPKLFILIFLTTLLVQIGIMLlVVPGIIMAIlLALAPV.MLVp-.KM.Gl.FAuMRSSMRLs....W.A.N.MR.L.V.A.....PAV..l.....u.WLLAKTLL.L.LFA.o..S.FA.s...L..T..Pp...l.G..A.V.LANTLSN.LISAlLL.IYLFRLYML..IR.Q.......................................................... 0 10 28 53 +6619 PF06791 TMP_2 Prophage tail length tape measure protein Vella Briffa B anon Pfam-B_3868 (release 10.0) Family This family represents a conserved region located towards the N-terminal end of prophage tail length tape measure protein (TMP). TMP is important for assembly of phage tails and involved in tail length determination. Mutated forms TMP cause tail fibres to be shortened [1]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.17 0.70 -4.71 30 1212 2009-01-15 18:05:59 2003-08-05 10:57:24 8 8 525 0 82 1164 33 176.50 41 24.61 CHANGED pptttshp+uGhSscphstAhRtlPAQhTDIssuLuuGpsshhlhhQQGGQlKDhFGGhGsAh+ulsshlhullsPhsl...uAA........usuuLuhAaYpGupEscpascALllT......GstAGsTuupLt...........shAcplus.ssuThusAupsLspL.....suoGphsupphptlupAssphp.csTGpul--hlppFs+LucDPlcAshcLscphpFLTsupYpQIpu ..........................................................................................................s....hthppAGhSstphthshp.......h....lstQhsDlsspLAsGps.hhlhhQQGuQ.......lts..ua....G....G..........h.........h.....htsh......uGhl...sh....Ms....sh.s.......hss................................AsuuLu..h.A...a.Y.p.G.......p....pp....h....ptFN+sLhLo..............G.s.uGh....Tu.sphh......................................shucuhpu..sGhT......p.psutsL.....u..tl..................................VtuGsh.sut.pht..lspusspht..pusG...p.Vscs.hptFt+L.psDPssuhhuhspph+.lTApQlt.lt.p.............................................................................................................................. 0 6 36 56 +6620 PF06792 UPF0261 Uncharacterised protein family (UPF0261) Moxon SJ anon SWISS-PROT Family \N 18.70 18.70 19.10 19.10 18.00 17.50 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.16 0.70 -6.00 49 512 2009-09-11 06:27:46 2003-08-05 11:40:03 6 9 446 0 153 391 143 339.40 39 90.09 CHANGED tpplhllGThDTKscELpalpshlcs..........tGhsslhlDlu..shsssshss.....-lotp-VApstssshpslhss...tD.RspAhpsMucuhsthltphhcptplsGllulGGosGTuLsssuM+.uLPlGlPKlhVSTlA.SG..slssYlGssDIsMhhSVsD.luGLNpIo+plLuNAAsAlsGMs.shptsp.........................spKPhlGlTMFGVTT.ssVstlpstL-sp...GaEslVFHATGsGG+AMEcLscpGhlsuVlDlTTTElsDhlhGG.lhsAGs-...RhsAAu+sGlP.VlSsGAlDMVN..FGuhcTlPc+aps.....RhhatHNsplTLhRTos-Estpluchlup+LNp...spGsVthllPhtGlSslDt.GtsFaDs-ucsALhssLcpslps..shplhclstHINDstFAcssltthtchhs...p .............................plhlhuThDTKspEhhaltph.ltt...........tGhpshhlDluhh.....t.......ts.........slstppVAtttst..sh...ttlhst........D.+spAhthMu.uhpthl.ph.p...........lsulluhGGosGTslhssshp..tLPlG.hPKlhVSThA..SG..ssssYl..GssDlsMhhSVsD.luG...LNplSRtVLuNAusAluGhst.t.h..p..pst...........................ppKP.hlGlTMFGlTT.PslpthpttLppp.....sY-slVFHATG.s.GG+uMEpLh.pp.thhsuVlD....hTToElsD.lhGG..Vhssssc...RhpshucstlP.lhusGAlDMVs..Fs.s.po...l.Ptpats...............R.hatHN..tlolhRoos-EstthuchlutKLst.........spu...lthhlP.tGhSthDt.Gtsaa-s....cA.tAhhp..tlctsl.t..........phplhchshpINcstFApthht.h.phh.p......................... 0 39 99 125 +6621 PF06793 UPF0262 Uncharacterised protein family (UPF0262) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 26.20 26.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.84 0.71 -4.51 24 243 2009-01-15 18:05:59 2003-08-05 11:41:08 7 2 240 0 86 194 178 157.40 58 97.24 CHANGED hspsp.tR....LhclcLDEtolupssPDlEHERsVAIaDLlE-NpFs.Ps...Gst....sGPYpL+Lult-sRLlF-IpsEsss.lhsalLSLsPFRRllKDYFhIC-SYYpAIRouoPspIEAIDMGRRGlHNEGSplLp-RLpGKl-lDaDTARRLFTLICsLHh+G ...........................................................................................s...psRLscVpL.DE.oIuRuoPDl.EHERuVAIFDLlEEN.sFp.PsGcpt.......sG...PY+L+LSlh-sRLlFsIpp...EsG....ssl.ssHlLSLoPFRRlV+DYFhICESYYpAIRoA.oPSpIEAIDMGRRGlHNEGSpsLp-RLcGKl-lDFDTARRLFTLlCVLHW+G... 0 25 55 66 +6622 PF06794 UPF0270 Uncharacterised protein family (UPF0270) Moxon SJ anon SWISS-PROT Family \N 20.10 20.10 20.70 20.90 19.70 19.00 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.03 0.72 -4.22 53 891 2009-01-15 18:05:59 2003-08-05 11:42:09 7 2 881 1 129 338 38 69.50 64 94.00 CHANGED MI.IPap...pLss-TLpNLIEcFl...LREGTDYG-pEhSLppKlpplpppLcpGcsVlVaSELcE.....olsIhsKpph ..........Ml.IPWQ....-LuPETL-NLIESFV...LREGTDYGE+.Eco...LEQKVA-V++QLpsGEAVLVWSELHE.....TVNIMP+spF............. 0 19 51 95 +6623 PF06795 Erythrovirus_X Erythrovirus X protein Moxon SJ anon Pfam-B_9167 (release 10.0) Family This family consists of several Erythrovirus X proteins which seem to be found exclusively in human parvovirus and human erythrovirus. The function of this family is unknown. 21.00 21.00 21.10 182.00 20.50 20.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.69 0.72 -3.96 8 19 2009-01-15 18:05:59 2003-08-05 12:00:56 6 1 4 0 0 16 0 80.90 95 98.78 CHANGED MDSYLTTPMPYHPVAVhQNLEEKMQYYLVKTYTSLGKLAYNYPVLTMLGLAMSYKLGPRKVLhTVLQGFMTLGIANWLSWE MDSYLTTPMPYHPVAVhQNLEEKMQYYLVKTYTSLGKLAYNYPVLTMLGLAMSYKLGPRKVLhTVLQGFMTLGIANWLSWE 0 0 0 0 +6624 PF06796 NapE Periplasmic nitrate reductase protein NapE Moxon SJ anon Pfam-B_9066 (release 10.0) Family This family consists of several bacterial periplasmic nitrate reductase NapE proteins. Seven genes, napKEFDABC, encoding the periplasmic nitrate reductase system were cloned from the denitrifying phototrophic bacterium Rhodobacter sphaeroides f. sp. denitrificans IL106. NapE is thought to be a transmembrane protein [1]. 25.00 25.00 30.30 29.70 20.70 18.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.69 0.72 -4.66 42 268 2009-01-15 18:05:59 2003-08-05 12:06:26 6 1 240 0 73 165 7 54.70 49 93.40 CHANGED Mupssts......tpp+ppEl+sFlFLsshLaPlLoVshVuuYGFhlWhhQl.lhGP.PGt ..............Msp.s..ps....spc+uhEh+uFLFlsVlLFPlLSVuhVGGYGFlVWhhQh.lhGPPG....... 0 13 25 52 +6625 PF06797 DUF1229 Protein of unknown function (DUF1229) Moxon SJ anon Pfam-B_9402 (release 10.0) Family This family consists of several hypothetical proteins of around 415 residues in length which seem to be specific to the bacterium Leptospira interrogans. 25.00 25.00 400.70 400.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.67 0.70 -5.63 2 57 2009-01-15 18:05:59 2003-08-05 12:32:44 6 1 38 0 1 51 0 393.60 94 99.59 CHANGED MSTHFSLKSASVITDYLFKFRIFSLPAICWICSTLIGFGTVNGRLSLFVIGLSFIISIFLLKNIKWNISSTFSFLLVISFLLAYFFFYKTPNMPQHLDGKLNPILYVFKAFPTLFSFFIIhALPSLKQKKLFFIGIALGMFVFAIINSIATLVYLEPPYYGKAYHFFYKMEYNSPGITILASMLPIVLFCFNGYLLKIDKKLKWQNlFFIFVFLISLFISFLFSARTLFFLIIANIIILVLIRLWKIYSIPNKGIYYKFIIGFLILFVSCSSIYFFLKETYIGQRIMNGIYSEKLNHHVDYWNTlKKDFFIYPKITIGSEYTFWYHNIFFDSHKTSGPITALILYIYSVFhFLIuLKKSLKRDYRSFRYFHFYICFIPYLMTTIPWESSESQMVALFAGLGALITTVDDQTPEM ....MSTHFSLKSA.SVITDYLFKFRIFSLPAICWICSTLIGFGTVNGRLSLFVIGLSFIISIFLLKNIKWNISSTFSFLLVISFLLAYFFFYKTPNMPQHLDGKLNPILYVFKAFPTLFSFFIIFALPSLKQKKLFFIGIALGMFVFAIINSIATLVYLEPPYYGKAYHFFYKMEYNSPGITILASMLPIVLFCFNGYLLKIDKKLpWQNVFFIFVFLISLFISFLFSARTFFFLIIANIIILVLIRLWKIYSIPNKGIYYKFIIGFLILFVSCSSIYFFLKETYIGQRIMNGIYSEKLNHHVDYWNTIKKDFFIYPKITIGSEYTFWYHNIFFDSHKTSGPITALILYIYSVFIFLIALKKSLKRDYRSFRYFHFYICFIPYLMTTIPWESSESQMVALFAGLGALITTVDDQTPEM............... 0 1 1 1 +6626 PF06798 PrkA PrkA serine protein kinase C-terminal domain Vella Briffa B, Bateman A anon Pfam-B_3917 (release 10.0) Family This is a family of PrkA bacterial and archaeal serine kinases approximately 630 residues long. This family corresponds to the C-terminal domain [1]. 25.00 25.00 25.50 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.61 0.70 -5.09 43 1353 2009-01-15 18:05:59 2003-08-05 12:42:19 7 5 1291 0 335 779 130 252.60 58 39.39 CHANGED lpKh+lYsGEsl..hs.psc.slpEl+cpusp...........EGMsGlSsRalhctlupslsp...psppsslNPlplhppLEpslcp....tplspEs.cc+Ylc...hlc.hl+pEYpEhltcElp+Aah..uhcEthppLhspYl-plcAalpcp.......+l+Ds.TGc.h-PD..Echh+uIEEplGI.spptpcsFRpElhsal..uphsppG.cphsa.....ssaE+L+....cslE+KLaushc-hlplhohsup..hsDcEppc+hcphlpRh.pchGYs-psApcllca ................................................................hSKMRVYDGEoLK..DTDPKAK..ShQEYRDhAGV.......D...............EGMsGLS...TRFAFKILS+VFNF........D+s....EVAANPVHLhYVLEQQIER....EQFPpEp.tERYL-.....FlKsaLhP+YsEFIGKEIQTAYL.ESYSEYGQNIFDRYVsYADF.WIQDQ.......EYRDP.-TGplhDR-u...........LNsEL.EKIE..K.........P.......A.........G.........I...SN...........P....K.........DFRNE.IVNFVL.........RARAsNsG...+NPsW.......TSYEKLR...........sVIEKKM...F.S.N.T.E-.LLP.VI....S....FNAK...........sS....s--QKKH-DFVsRM..hE+GYTcKQVRLLsEW...................................................................................................................... 0 95 195 270 +6627 PF06799 DUF1230 Protein of unknown function (DUF1230) Moxon SJ anon Pfam-B_9232 (release 10.0) Family This family consists of several hypothetical plant and photosynthetic bacterial proteins of around 160 residues in length. The function of this family is unknown although looking at the species distribution the protein may play a part in photosynthesis. 25.00 25.00 28.90 53.10 20.90 20.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.92 0.71 -4.33 30 130 2009-01-15 18:05:59 2003-08-05 12:58:07 6 1 106 0 65 129 126 141.00 42 70.98 CHANGED CPVP.-QpPlNEYppLpsSWhFoWsshsttsah+pLhhhWllshhlss.slAuuSashpcsshchllsusluuhllshLlllRlYLGWsYVtcRLhSpsVpYEESGWYDGQhW.KP.-hlt+D+LlupYpV+PlLsRLpp...ohshhs .....CPVPhEQpPlNEYppLppShhFuWsshs..hhsa..ht+L...........shl.W.h.hs.hhlsu....Pl.Au.uSas..spppsl+hlLuussuuhhllsLlllRlYLGWsYVtcRLhStsV.YEESGWYDGQhWhKPsElLsRDRLluoYpV+PlLpRLppThsh.s........ 0 17 44 58 +6628 PF06800 Sugar_transport Sugar transport protein Vella Briffa B anon Pfam-B_4126 (release 10.0) Family This is a family of bacterial sugar transporters approximately 300 residues long. Members include glucose uptake proteins [1], ribose transport proteins, and several putative and hypothetical membrane proteins probably involved in sugar transport across bacterial membranes. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.90 0.70 -5.31 17 1288 2012-10-02 19:55:49 2003-08-05 13:02:27 7 5 732 0 128 848 304 260.80 40 91.36 CHANGED hsllssKl.GGpPhpQhhGhTlGALlFulllhlhppsshs...phhlhullSGhhWulGQhhQF+uhchlGVS+sMPISTGhQLVGsoLhGVlsFtEWssshphllGhlAlllIllGhhLTuhpccpct..pp..pshp+ulhhLllSolGYhhYsll.sph......hslsGhsslLPQAIGMlluullhsh........chhhpKhohhNllsGlhWuhGNlhhlhSst..hsGlATuFSlSQhuVllSTlGGIhhLsEKKT++EhlhlhhGllLllluulhh .............................................................................sllssKh.GGpPhpQhlGsTlGA.L.lFu...l...l...l.h..l...h.....s..p.....s..s.....h...s...........sslllu....llS.Gh.h.Wu.hGQhtQh.+uh.......p...hlGV..SpsMPl.ST.G..hQL...Vu..soLaGVlhh...t..E...Ws..s.h.......t.....p....h....l.l.Ghl.A.l.l.l..l.l.l.Gs.h.h.T.u.hp-cppt...........ts.......stsh.++u.l.h....h.L....l.l.S....ol.....GY..h...s.Ysll.sph..................hs...l.....s..........G........h.s.A.lL.P...Q..u.l..G.........M..l...l.u.......u...llhsh.................pch..h.h...p....K...h...o...h.h...N...ll...s.Glh.Wu...h.GN....L.h....h.llStt...tsGl..A.Tu..F.sLSQh.u.Vl.lSTLGGIhhLsE.....+..K.....T.....+...+...p...h....h...h...l....l...l...Gl..l..LIll.uull.h........................................................................ 1 41 76 103 +6630 PF06802 DUF1231 Protein of unknown function (DUF1231) Moxon SJ anon Pfam-B_9856 (release 10.0) Family This family consists of several Orthopoxvirus specific proteins predominantly of around 340 residues in length. This family contains both B17 and B15 proteins, the function of which are unknown. 25.00 25.00 105.70 105.60 18.30 17.60 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.28 0.70 -5.56 3 51 2009-01-15 18:05:59 2003-08-05 13:11:15 6 1 18 0 0 39 0 300.00 94 99.99 CHANGED MSRKFMQVYEYDREQYLDEFIEDRYNDSFITSPEYYSAEKYMCRYTTLNHNCVNVRRCALDSKLLHDIITNCKIYNNIELVRATKFVYYLDLIKCNWVSKVGDSVLYPVIFITHTSTRNLDKVSVKTYKGVKVKKLNRCADHAIVINPFVKFKLTLPNKTSHAKVLVTFCKLKTDITPVEAPLPGNVLVYTFPDINKRIPGYIHVNIEGCIDGMIYINSSKFuCVLKLHRSMYRIPPFPIDICSCCSQYTNDDIEIPIHDLIKDVsIFKNKETVYYLKLNNKTIARFTYFNNIDTAITQEH-YVKIALGIVCKLMINNMHSIVGVNHSNTFVNCLLEDNV ..MSRKFMQVYEYDREQYLDEFIEDRYNDSFITSPEYYSAEKYMCRYTTLNHNCVNVRRCALDSKLLHDIITNCKIYNNIELVRATKFVYYLDLIKCNWVSKVGDSVLYPVIFITHTSTRNLDKVSVKTYKGVKVKKLNRCADHAIVINPFVKFKLTLPNKTSHAKVLVTFCKLRTDIT.IEAPLPGNVLVYTFPDINKRIPGYIHlNIEGCIDGMIYINSSKFACVLKLHRSMYRIPPFPIDICSCCSQYTNDDIEIPIHDLIKDVsIFKNKEhVYYLKLNNKTIARFTYFNNIDTAITQEHEYVKIA.LGIVCKLMINNMHSIVGVNHSNTFVNCLLEDNV. 0 0 0 0 +6631 PF06803 DUF1232 Protein of unknown function (DUF1232) Vella Briffa B anon Pfam-B_4265 (release 10.0) Family This family represents a conserved region of approximately 60 residues within a number of hypothetical bacterial and archaeal proteins of unknown function. 20.80 20.80 20.80 20.80 20.70 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.68 0.72 -4.51 172 1698 2009-01-15 18:05:59 2003-08-05 13:26:27 7 20 1207 0 565 1384 202 40.10 37 27.25 CHANGED htst.hh.lhuuLhYhlsPlDlIPD..hlsslGhlDDhsllshs .............h.hs+hh.lhuuLsYhl.P....lDlIPD..hlsslGalDDlsllsh........ 0 189 354 460 +6632 PF06804 Lipoprotein_18 NlpB/DapX lipoprotein Moxon SJ anon Pfam-B_10405 (release 10.0) Family This family consists of a number of bacterial lipoproteins often known as NlpB or DapX. This lipoprotein is detected in outer membrane vesicles in Escherichia coli and appears to be nonessential [1]. 20.00 20.00 20.20 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.65 0.70 -5.67 6 1056 2009-01-15 18:05:59 2003-08-05 14:05:53 6 3 1001 10 191 616 78 267.20 36 77.58 CHANGED sYLcossLcshpsP....tsuhPph.ssYsIPQsshpGulGKpVDIRPPpQsLpLIsGARs-p..sGEsoLhLlcp-cts.clWppVtchLp-+pIPlpppsDut..lpTDWVsWsptDE-hphtuRYpIShhEsssppuhpVsLlsh+csstlpssshos+pRYNshMhNllouphDpshR-pApcpApchVppIslshGoDcoGhPllIsRuPYNVlWQRLPslLPKhGFsIc-RspSQGTlpsKYtussDc.WpplGsKs....-LKoGsYph.lGDLGNRoSlshTDssG+slspu.hcuLVsVLpAlls .....................................................................................................................................................sYLpu.ssht.lp.sP...sGhh.h.Ph.......tss.........sYsI..P...s...s...p..s...p...Gu...lG...c.sl..D..I...R...P.P.......t..Q......s..LsL..l...s...Gu...R.sph.....sG-..su...l...l....l..c.....s.....s.....p.....s..s.....plW....spVhph....l....pt....+..s.hsl..pp..cs.Dus.....lpT.............D.......W..............l...........p.........W.s..............c........h.........D.............E................D.................p.......h.............c.u..R..Y..p..I....o...ht......p..shp..t..ulp..V....pLl....s.hc....p.........u..t.t........s....s...s...ss.....s..h...QR.....Y..............s...s...t...MhN.h.lsst...LDpst...s.c..s..........s..............ss.....t.......t.....s....c.......t..s...............s..s.....h..........sl....p...........u....u..D...c.....o......G....h......P.h...Ll.lRuPFshlWpRLPsAL-+lG.hcVsDpsRSQGshtV....p..Y........ps....h........s.......-.......u.........t......W......p.c........L..G...u.pc................ss.L..s..u.G..c........Y+l.pV..G...D...L....s..N..R..o.S.l.ph.lDscG+sLopupscslVuVhpAsh............................................................................ 0 31 84 141 +6633 PF06805 Lambda_tail_I Bacteriophage lambda tail assembly protein I Moxon SJ, Iyer LM, Burroughs AM, Aravind L anon Pfam-B_7725 (release 10.0) Family This family consists of tail assembly proteins from lambdoid and T1 phages and related prophages, e.g. the tail assembly protein I (TAPI). Members of this family contain a core ubiquitin fold domain [1]. The exact function of TAPI is not clear but it is not incorporated into the mature tail. Gene neighborhoods reveal that TAPI co-occurs with genes encoding the host-specificity protein TapJ, and TapK, which contains a JAB metallopeptidase fused to an NlpC/P60 peptidase. It is proposed that the TAPI protein is processed by the peptidase domains of TapK [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.60 0.72 -4.16 15 1136 2012-10-03 10:59:06 2003-08-05 14:19:07 7 4 504 0 59 868 33 71.30 56 35.56 CHANGED shsplpLtGslt......p+aG+htthsVpThuEulpALusplsuacphh.pth.....atlhhst+N.uccsl.t....ttshsGssI+IlPhstGu ...............................h.................................p..T..u..A..EAIRALu.hQl.PuFRppls..............................-GWYQVRIuG....cDsutstL.ApLp..Es..LssGuVIHIVPRlAGA.................................. 0 6 22 42 +6634 PF06806 DUF1233 Putative excisionase (DUF1233) Moxon SJ anon Pfam-B_9240 (release 10.0) Family This family consists of several putative phage excisionase proteins of around 80 residues in length. 21.30 21.30 21.40 21.60 21.10 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.33 0.72 -4.43 13 472 2009-01-15 18:05:59 2003-08-05 14:24:08 7 1 322 1 19 108 0 65.40 48 88.75 CHANGED VI.....ls.PscWVsEclLhAlTGLptusIp+ARcpuWhpG+EY++VuPDGpPcss.upshYNhtpIspWIcp....pP .........l..sscWlsEp.LhshpGl..pttsl.p.+h.Rcp.saMpG+EYKHlu.s-GpPtcp..S.ChYNhccIspWIEpQt..su............... 0 1 5 12 +6635 PF06807 Clp1 Pre-mRNA cleavage complex II protein Clp1 Moxon SJ anon Pfam-B_9787 (release 10.0) Family This family consists of several pre-mRNA cleavage complex II Clp1 (or HeaB) proteins. Six different protein factors are required in vitro for 3' end formation of mammalian pre-mRNAs by endonucleolytic cleavage and polyadenylation. Clp1 is a subunit of cleavage complex IIA, which is required for cleavage, but not for polyadenylation of pre-mRNA [1]. 28.90 28.90 28.90 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.34 0.71 -4.54 16 510 2009-09-11 15:53:51 2003-08-05 14:32:10 9 17 273 2 375 523 8 194.00 21 39.16 CHANGED GWVpGhGhclLlchlcsasss....hVlVl..sp.E+LhscLppthhsp......................lpllclsKutGVs...........................pRspchR+phR-ppIhpYFh.G.hpssh...hPashsspFsDl................pha+l.....hssssL.lshpspsssh+l......thslhppllAlSh............tpts-sll.......ssslsGFhhlptVDlp+chhslLoPsstp.L.PsshLlhus.lpa.cs ................................................................................................................................GWhps.GhphLhchlpthpss....hllhl.....s....p....c+..hh.s.pLp..p.ph..spp..............................................lp.llt..l.scs..sus.s......................................................................pp.s..pp..h.t...pp.hRc.plp..pYFa....u.........h...p...ts..h....................Phs...ts.s..h..ssl..................................................................p.acl........shssss.....l.....h.....h.p.t....p....s...s..tcl...............................sl.t.p.llulsh................................................................st..t.t.p..s..hh......................p.ssshGashlpsl...............D......h..p..+p...hhpl.LsPh..sh.....L.....s.p...hLl.hst.h...................................................................................... 0 144 216 312 +6636 PF06808 DctM DctM-like transporters Vella Briffa B, Bateman A anon Pfam-B_4075 (release 10.0) Family This family contains a diverse range of predicted transporter proteins. Including the DctM subunit of the bacterial and archaeal TRAP C4-dicarboxylate transport (Dct) system permease. In general, C4-dicarboxylate transport systems allow C4-dicarboxylates like succinate, fumarate, and malate to be taken up. TRAP C4-dicarboxylate carriers are secondary carriers that use an electrochemical H+ gradient as the driving force for transport. DctM is an integral membrane protein that is one of the constituents of TRAP carriers [1]. Note that many family members are hypothetical proteins. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.27 0.70 -5.92 18 7152 2012-10-02 15:12:49 2003-08-05 14:32:48 7 20 1871 0 2023 7864 11933 404.70 26 85.53 CHANGED lshhhlLhhhuhPluhulhlsul..hhhhhhsss...........sh..lhspphhs.......u....lsshsLhAlPhFlLhGslhppsGhuccllchssullG+h+GGhuhsslhuuslhuulSGSulAsssslGuhhlPhM++pGYssshAuAlhAuuushu.llPPshshllauh..............hsslSluslFhAGllPGll....................................hhlslhhsshhhAp.+psh.s............................tscsphtphhpuhhpuh...........................................uLhlsllllhulhhGh.....aTsTEAuuhulshu..Lhlullh..........h+chshcclhcs........Lhpsucss....usVhhllAuuulhuahlohsslshtlushlhulss..................st..hhhLllsslhhl..llGhhl-ssAsllIlsPlhhPlhtphGlDP................lahGlhllhshslGhhoPPVGhslaluuuIu..............p.sshhpsh+tllPalhshhssLlLlshlPtl ..................................................................................................................................................................hhhlhh.h..h....hu.h..P.l.u.h...u.l.shsuh.......hh..hh..h..h..ssh..............................................................sh...h.h..spp.h.hs.............................s....h.s.s.h.s...L.h.A....lP..hF...l...L...h.....G.....tl...hppuGlucclls.hs.tt.h.........hG..+....h.......G....G..Lu.....hs....s..l.l...u...shlh...uu.l.S...G....SusAsssuhG....sl....hlP....hM........h.+.t...G.Ystp.h.u.sulh...suuu..sl.G.....l..IP.....PS.hs..h.ll.auh............................hss..hS....l......u......pL........F....h....A..G...l....l...P....G..l..l..................................................................................................................................................h...s..h...s...h...h....h....h....s...h....h....h.....up......+.t...s...h..s.s................................................................................................t....p..h....s....h.....t........p....h......h......p....s.....h...h....p....u....h............................................................................................................................................................................................................s.L...h..l....l....l..l...l..G....u..l....h.s....Gl..........................hT.P...T...E....A.....u....u........l....u......s...h.hu........l.h..l.uh.h................................................................h+p.h..s...h....p..p...l..h.ps.................................................................lhp.s....s.pso........uh.l.....hhl..lus.............us....h.h.u.h....h.h..s..h.t.t.l..s.t.h.l...s....p.h.l.h....s.l.s..s.............................................s.........hh.h.L..l..l..l....l..h....h.l....llG.h..hh.....-h...s....s.....h..hl.l..h.s....P..lh.hP.l.h.....h....p....h......G.......l.D.s........................................................................................la.F..Glhhhh.s....ht.lu.h....l....TP...PlG.hs.l.F.s.s.s.u..lu.....................................p..hpht.p.lh.+.s....l.l.....P..a...h.h...s...h.h.h..s.Lhl.ls.hhPt..................................................................................................................................................... 0 616 1374 1743 +6637 PF06809 NPDC1 Neural proliferation differentiation control-1 protein (NPDC1) Moxon SJ anon Pfam-B_10407 (release 10.0) Family This family consists of several neural proliferation differentiation control-1 (NPDC1) proteins. NPDC1 plays a role in the control of neural cell proliferation and differentiation. It has been suggested that NPDC1 may be involved in the development of several secretion glands. This family also contains the C-terminal region of the C. elegans protein CAB-1 (Swiss:Q93249) which is known to interact with AEX-3 [2]. 19.20 19.20 19.30 27.80 18.70 19.10 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.26 0.70 -5.65 2 114 2009-01-15 18:05:59 2003-08-05 15:07:05 6 6 79 0 62 116 0 192.70 37 65.07 CHANGED MATPlPPPSPRHLRLLRLLLSGLlLGAALpGAsAt+PDsssCPGSLDCALKRRA+CPPGAHACGPCLQsFQEDQpGhCVPR..hppsPu.....sphEscI...........shLhQELA..cKEuGHS...s.PL.cstQ+L.EP.ATLGFSthGQtLE.GLPST.GTsoPhPHTSLuS.sSSsPVpMSPLEPpGtpGsGLsLVLILAFClAuuAALuVAuLCWCRLQREIRLTQKADY.ATAKuPsSPusPRISPGDQRLApSAEMYHYQHQRQQMLCLERHKEPPKEL-oASSDEENEDGDFTVYECPGLAPTGEMEVRNPLFDHusLSAPlPuPpS.PsL. ...................................................................................................................................................................................................................................................................................................................................................................................................................................h....hlhshhh.s.ss..h.Al.h....s.lshh....p.h..ppp.+.hstc.sDa...s.shus.s..uss.t.s.t......s.s.G.Dp+LApSApMYHYQHQ+QQhlshE.......ppp...p............sp..t..hp..s.s.pSD--NE-....GDaTVYECPGLAP....T..GEMEV+NPLF..cts........................................................ 0 21 27 45 +6638 PF06810 Phage_GP20 Phage minor structural protein GP20 Moxon SJ anon Pfam-B_8431 (release 10.0) Family This family consists of several phage minor structural protein GP20 sequences of around 180 residues in length. The function of this family is unknown. 31.00 31.00 31.50 31.20 30.40 30.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.97 0.71 -4.78 21 296 2009-01-15 18:05:59 2003-08-05 17:04:18 6 1 261 0 41 246 4 154.40 25 79.64 CHANGED pcplcplht.cupslt....Ks+hscs...pp....chcsl+pQlspRDpQlp-Lpc.pspDs--Lppcl-cLcppNc....-appclpphphssAlchALschsu+sscslhuhlspDplcLcc-s.lhGL--QIcsL+EScsYLFsppp....pPssuh.......sssssu .........................................................................................p.hptl.......t.tspplp......K...s...chsph.......pp.......phcslcpp.......lppt-cplccL+c.....p.....stss--L....ppplccLppcs...cp....ttchppchpphph..ssAlchALps..h.....pA...p...s...s...cs.lh.uh...l.D..h-...plclsc-u..lpGL--tlcsLK..cSc..s..aLFtppp...t..pPt.t...........sss............................................ 0 23 36 39 +6640 PF06812 ImpA-rel_N ImpA-rel_N-term; ImpA-related N-terminal Vella Briffa B anon Pfam-B_4308 (release 10.0) Family This family represents a conserved region located towards the N-terminal end of ImpA and related proteins. ImpA is an inner membrane protein, which has been suggested to be involved with proteins that are exported and associated with colony variations in Actinobacillus actinomycetemcomitans [1]. Note that many family members are hypothetical proteins. 20.60 20.60 21.00 20.60 19.80 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.68 0.72 -4.20 143 1962 2009-01-15 18:05:59 2003-08-05 17:19:59 7 7 932 0 298 1333 37 61.00 28 14.43 CHANGED DWstlpchutpLLtpcoKDL+lssaLspAhh.ctpGhsGlspGlpLLssllppaW.-slaPths ..........sWstlpchuhpLLtspuKDl+lssahhhuht.+ptGhsGhspGlpLLsthlpp.aW.ss.haPp..h................. 0 43 112 200 +6641 PF06813 Nodulin-like Nodulin-like Vella Briffa B anon Pfam-B_4440 (release 10.0) Family This family represents a conserved region within plant nodulin-like proteins. 24.70 24.70 24.80 24.90 24.60 24.60 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.63 0.70 -5.12 15 586 2012-10-03 03:33:39 2003-08-05 17:21:06 8 13 122 0 360 786 6 218.90 26 41.90 CHANGED +WhsllAulWlQusuGssasFu.sYSusLKSsLGhsQppLNhLuVApDLGcslGhlSGlssphhPsWsVLhlGAshshlGYGlhWLsVopplstLPhWhlhlhlsluuNSpsWhNTAsLVoCl+NFPtsRGlVlGlLKGYsGLSuAlaTplYpulhss....cssshLLllAllPslVsLsslahlRsp......spssppps-sthFhhhhhlulhlAsYLlshsllpphhshopuhhhshsulhl.lLLlsPLslPl ......................................................Wh.h.hhuuhhlt.hhsG.ss.YhFu........hYSstlK.s.t.h.s......h.sQppls..h.luh....hp.s.lG.t.s..h.G....h.h...u...Ghl......h.p......h......h.............s................s..hhllhlGu.h.hshh.G.Y...h...h...h..........a..L..s..........l...s......t......t...........h......s...................s........h.h.h........h.s...l.h.h.h...l.u..ss..u....t.s..ah...................s.Tu...sll.......osh....pNF..P..p..s..R..G.sslulh+uah.G.L....SuAlhop.lhp.s..hats...........s.s.ss...h...l.L...hlAhh....ss.h.....l....s.l..h.h.h.......h......h.l..+...h.............................ttt....tpt...hh....h......h........h.h...h...slh.hu..h.aLhhh..hht.t.........h.sp...h..h..hhh.h..hhlh.Phhls................................................................................................... 0 73 240 310 +6642 PF06814 Lung_7-TM_R Lung seven transmembrane receptor Vella Briffa B anon Pfam-B_4367 (release 10.0) Family This family represents a conserved region with eukaryotic lung seven transmembrane receptors and related proteins. 25.20 25.20 25.20 25.30 25.10 24.90 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.75 0.70 -5.46 15 836 2012-10-03 04:04:29 2003-08-05 17:23:36 8 8 290 0 523 853 12 251.00 26 55.43 CHANGED lscoGhYslhhh.sp.p......t.psshshpsplsa+NspGYLsAp-hPLh.hYshMslsYslhullWhahhh+ah+DllplQhaIsullhluhsEhsFaah-YthhNspGhs...spshslas.lhuuhKtolstlLlLllShGYGlV+PpLGshhp+lhhlulhhhlhuslhhlhppsuths-tp....thlhhhlPl..uhl.hhhlhWIFpSLsc.ThcpL+h.+RshsKLplYR+Ftsslshullhohsahhhp.lhh...pthsshpptW+.tWlhs.saW.cllshslLllIshLWRPopNs. ......................................................s.Y.lh..h..h.................................s..h....p...h.ph.p.h....p.h...ps........s.....h..u......a.Lsus....phP.h.............hYhh..hslhYh...lh.u.h................hWhh.h.h........h..p...h..h..p.............s......l..h.plphh.h...su...l...lh...hthlphh...hh.hh.pa...p..hs..............p.pG............................sps....hhlhh.lhphh+tshhhhllllluhGauh.lK.s..h......L...u.......p........t.............p....+...l.hh.lsh....h.lhusl....h.........l..........l...........h..........................s......................s..s.t...................................h.lhhhlsl............shh...hh....hh.W..............h....p..p.............L...pp...oh.p...p.h....+..................h....tp...p............hs..KLp.....la...............R+.ah.h.ll.h.hlh.....h..ohh...hhhh..h......................h...pWp....W.h....shh.ph.hhhhh....hh.hhh......hha..RPstss............................................................................................................................. 0 155 276 408 +6643 PF06815 RVT_connect rvt_connect; Reverse transcriptase connection domain Bateman A anon Bateman A Domain This domain is known as the connection domain. This domain lies between the thumb and palm domains [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.26 0.72 -4.14 47 20194 2009-01-15 18:05:59 2003-08-05 17:58:29 8 56 614 372 0 19630 0 90.20 85 13.88 CHANGED sYYcPpKsLhAclpKhGpsQWsYplhQ..pp+..sLKsGKau+t+ssHsNshcpLApslpKIucEuIVIWG+lP.pFcLPlp+Es..WEpWW.s-YWQsoWIP-hEFlsT ..............................................................VYYDPSKDL.....IAEIQKQGQGQWTYQIYQ...EPFK...NLKT.GKYARhRuAHTNDVKQLTEAVQKIuTESIVIWGK.TP.KF+LPIQKET..WE....................................... 2 0 0 0 +6644 PF06816 NOD NOD; NOD1; NOTCH protein Guo J, Studholme DJ anon Guo J Family NOTCH signalling plays a fundamental role during a great number of developmental processes in multicellular animals [1-2]. NOD and NODP represent a region present in many NOTCH proteins and NOTCH homologs in multiple species such as NOTCH2 and NOTCH3, LIN12, SC1 and TAN1. Role of NOD domain remains to be elucidated. 19.40 19.40 19.60 19.40 19.00 19.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.64 0.72 -4.88 26 289 2009-09-11 05:54:01 2003-08-06 11:46:38 8 241 92 8 132 222 0 56.10 43 2.77 CHANGED PtpLApGsLlllVh.lsP-ph..ppsussFLRcLSplL+TsVph+pDspGpsMlaPahGp ....PtpLApGsLVlVVL.hsP-pL..hpsut.sFLRp.LSplL+Tslph+pDupGp.MlaPYaG...... 0 23 35 81 +6645 PF06817 RVT_thumb rvt_thumb; Reverse transcriptase thumb domain Bateman A anon Bateman A Domain This domain is known as the thumb domain. It is composed of a four helix bundle [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.30 0.72 -4.40 41 66982 2009-01-15 18:05:59 2003-08-06 12:13:26 9 77 671 372 39 66594 2 64.00 90 13.78 CHANGED KWplQpIpLPcp-..phTVNDIQKLVGcLNWAu.Ql.YsG.I+sKpLCKLlRG.sKsLs-hVthTcEAchElpcN+ .......................KWTVQPIhLPEKD......SW.TVNDIQKLVGKLNWA......S.QI..YsG..IK..V+..QLCKLLRG.sK.ALTEVlP.LTcEAELELAEN................. 0 13 13 14 +6646 PF06818 Fez1 Fez1 Vella Briffa B anon Pfam-B_4593 (release 10.0) Family This family represents the eukaryotic Fez1 protein. Fez1 contains a leucine-zipper region with similarity to the DNA-binding domain of the cAMP-responsive activating-transcription factor 5 [1]. There is evidence that Fez1 inhibits cancer cell growth through regulation of mitosis, and that its alterations result in abnormal cell growth [2]. Note that some family members contain more than one copy of this region. 26.40 26.40 26.90 27.60 26.30 26.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.61 0.71 -4.30 8 300 2009-01-15 18:05:59 2003-08-06 14:17:55 10 3 66 0 170 232 1 152.80 38 31.58 CHANGED TKWEVCQKSGEISLLKQQLKESQAELuQKuuEIluLRuQLREuRupLpspEtphpcLc-uh+T+sLELElCEsELQR++sEAELLREKss+l-tElscL+-Ahuup..........t..s..h...............................ESDE...AKupcts......................tsshpsLRppl-RLRAELptERp+tEcQusuFEcER+lWQEEKEKVIRYQKQLQpsYlpMYpRNppLE+cLpp ...............................................................................................................................opWEVC...QKuGE..ISLLKQQL+-uQsElspKhuElluL+s..p.L+..-sRuphptp-tph.tL....pt.uh..........ps.ct.phc..tptph..................................p.......................................................................................................................tpt...st..t............................................................................................h.th...p.tphppLpsEL....ERpttcp.t.sFptERhsWptEK-+VlpYQ+pL.Q.sYlpMYpRNptLEptlp....................................................... 0 17 33 82 +6647 PF06819 Arc_PepC Archaeal Peptidase A24 C-terminal Domain Yeats C anon Yeats C Domain This region is of unknown function but is found in some archaeal Pfam:PF01478. It is predicted to be of mixed alpha/beta secondary structure by JPred. 21.20 21.20 21.20 62.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.18 0.72 -4.14 10 35 2009-01-15 18:05:59 2003-08-06 14:21:48 6 2 35 0 27 37 0 111.70 35 30.52 CHANGED Vh.phllhlhplpLIhtllpALTsttls.ccKpV-ELKEGDILt-hIhlpssG.Vhh-pushhcRhKphLcsEpscslp....ccllsssuEGLocEpIEcLKKLssEGKlpsEhpV .hl..phhlhlhtlpLl.hllhuLpsttls.-c+pl-ELKEGDILt-hIhhcsst.Vhh-psshhcRlKphlpstphps......tchllhssuEGLocEsIEhLK+LhpEGKlpsch.V..... 0 4 7 18 +6648 PF06820 Phage_fiber_C Tail_fib_C-term; Putative prophage tail fibre C-terminus Vella Briffa B anon Pfam-B_5030 (release 10.0) Family This family represents the C-terminus of a prophage tail fibre protein found mostly in E. coli. All family members contain a conserved RLGP motif. 25.00 25.00 26.60 25.90 22.70 21.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.07 0.72 -3.84 2 1275 2009-01-15 18:05:59 2003-08-06 14:42:52 7 14 118 0 2 871 0 62.30 64 34.03 CHANGED hphRLGPAsIlEosppGhhPtpD.tlITtlshL.stDppplQshhp.LQlhhuDG.WpsltGhc .........hphRL.GPAsIlEospNGhhP-pD.tlITtlshL.stDtpQlQslhR.LQlhhuDGsWpsltGhc................. 0 0 0 2 +6649 PF06821 Ser_hydrolase DUF1234; Serine hydrolase Vella Briffa B, Eberhardt R anon Pfam-B_4941 (release 10.0) Domain Members of this family have serine hydrolase activity. They contain a conserved serine hydrolase motif, GXSXG/A, where the serine is a putative nucleophile [1].\ This family has an alpha-beta hydrolase fold [2,3]. Eukaryotic members of this family have a conserved LXCXE motif, which binds to retinoblastomas. This motif is absent from prokaryotic members of this family [3]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.11 0.71 -4.66 24 1243 2012-10-03 11:45:05 2003-08-06 14:56:12 8 3 948 5 341 1631 220 168.30 26 89.49 CHANGED hLIVPGapsSus.sHWQoaWpcphssu.pRVcQt-W...ppPshs-WlstLppslsttsts.......sllVAHSLGClsss+hs.....hpttc..pVtGALLVAPsDs.............p...tstthtsFuslPpp.LsFPo.....llVAScNDPasshppApphApsWGuthlslGpuGHINs-SGaGsWPpGhhlLsp ......................................................................................................................................hlIl.G..h..p..u..s..ss...sH..W...sh.h....pc.p.l..st....t.......h........c...V.....p...........h.......sa...........pp.P...s....h....s....p........W.....h.........t..t....lp...p.......tl..s....t...t....p...p.s..........s.hlVuHSLGslssl+ah..............ttpp...pl.t.G.h.l..LVA.s.h.c..t............................................................t.....t..s.....h.h..p..s.h...s.....s....h..s...p...p..t..h..s.h.ss....................hl.....l..u.S.cNDsa.ss..h..p.p.u.p....pl..A.p.sh..s.....u.p..hhtl..spuG.Hhss.psGassastshth...p.......................................................................................................................................... 2 59 167 254 +6650 PF06822 DUF1235 Protein of unknown function (DUF1235) Vella Briffa B anon Pfam-B_4988 (release 10.0) Family This family contains a number of viral proteins of unknown function. 25.00 25.00 39.30 39.20 20.70 18.30 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.75 0.70 -5.42 11 73 2009-01-15 18:05:59 2003-08-06 15:05:26 7 1 38 0 0 65 0 226.30 54 91.54 CHANGED hIPaFs.s+I......+splhcLss.s.......hhpYht..HsphIlpEIc+al...s-plhssshlslphacpppshp...pspspshS+lllClpuAp+GGslllpsphss.p+pllp.sssphllLSPLucYsVopVp+GshlllslclsIPSMc....lhhhstpsl+asNslplLhPhpts-lsFsl+plhDhpsscllCEQllINpcWYTlls..sssp+lhlPSh..ChGpoh..chsaspss.cc-hlpp..llshpsPF-.hlhPp+sIYsuh.lt...E+llYG+l ...............hhPhFshSKI......pshl...sNss.......pphYhcsEHp+IIocEIsRpM...DEsVLLTN.ILSVEVVNcNEMY....HLIPHRLSpIILCISSl..GGCVISIDNDlN.sKNILTFPIDHAVIISPLuchsV..VsKGsshllllcsDIPShR....llsoh.sssIhYsNsLsLls.lshS..VFlIRploDhh.s++lC-QIFhNs+WYolIs....hssKpasIPSs..ClGhos...spYlssoh-pDhlh+..lhNl-hPFD.hla.+hpsYsSlsl+...EpILYG+I. 0 0 0 0 +6651 PF06823 DUF1236 Protein of unknown function (DUF1236) Vella Briffa B anon Pfam-B_5056 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function. Some family members contain more than one copy of the region represented by this family. 20.60 20.60 20.60 21.70 20.30 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.19 0.72 -4.25 57 317 2009-01-15 18:05:59 2003-08-06 15:12:02 7 9 104 0 164 322 7 64.40 33 37.89 CHANGED YVppp.l.....tshsh..ptclsVGsslPcs..Vplpsl..Psp.....s...sYpYslVN-.chVlV-PpoRcIVpllc ..............................hlppp.s...shth...phclsVGsslPcs...Vp..lpsl...Psp......s......sYpYslVs-.chVlV-PpT+clVpVlp... 0 22 74 102 +6652 PF06824 DUF1237 Protein of unknown function (DUF1237) Vella Briffa B anon Pfam-B_4981 (release 10.0) Domain This family contains a number of hypothetical proteins of about 450 residues in length. Their function is unknown, and most are bacterial. However, structurally this family is part of the 6 hairpin glycosidase superfamily, suggesting a glycosyl hydrolase function. 20.10 20.10 20.70 20.60 19.30 19.70 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.29 0.70 -5.87 51 1064 2012-10-03 02:33:51 2003-08-06 15:25:53 6 9 860 15 263 760 58 389.20 45 85.81 CHANGED D.-LupLF.cNsaPNTL.........DTTlca...............................................ttspspTFVlTGDIsAhWLRDSosQltsYlslsccDt...........pLppLltGsIspQschllhsPYsNAFpss.......sstput.t.sDps.........p..t................s.hVaEpKYElDSLsh.lpLutpaacpT.G-s........shhssp.WhpAlcpllclhcpppp.................................tpsh.ssYsFpRpTshuo-TLshsGhGsPls..TGLlpSuFRPSDDAolasahIP.uNhahsstLcphucl...ht.tthtsc...................LuppspphupclcpuIpcaGllp..........H.scaGclaAaEVDGaGuthhMDDANlPSLLuLP.hLGals.hcDs........................................................................................lYpNTR+hlLSpp.NPYahcGpthcGIGGPHlGhphsWPMSlllpuhTo.............sD-pEItpsLphlhsosuGhGlhHEShcssss..pcaTRsWFAWANohFuchllc ...........................................t..chsphF.ppsassTL.........-TTVch..................................................c-ssTFVlTGD.IPAMWLRDSoAQl.pPYL......hlAccDs....................pLpphItGllpRQhphlhhDPYANAFN.s.........sht....G.........ap..s.......DcT..............chs..........................s.hlWERKYElDSLCYPlpLAYhhW+pT...Gco......s.Fsp.....p..ahpAhcpILclaphEQc......................................t...t.p.SPYpF...R.....sTs.....p.......tpDTLspsGhGs....sssh..TG.MsWSuFRPSDDACpYuYLlP.SNhFAVVsLthlpEI....hps.lt....h.p.c..p.....................................lstcspcLtcEIppGIcpauhsp.............p..sph.......tcl.YAaEVDGhGs..t..l.MDDu.N.VP.SLL.uhP.YLGass..h-D.........................................................................................l..YQsTR+hlLSpc.......NPYaap..Gp.....hupGIGSsH....................os......h...pYl.WPluLuhp...GhTo.............pD....csEhcphLchLlsoDu.GTGhMHESFc..sssP.....spaTRpW.................FuWANhhFsELllc.................................................................................... 2 95 173 226 +6653 PF06825 HSBP1 Heat shock factor binding protein 1 Wood V, Studholme D anon Pfam-B_20266 (release 10.0) Family Heat shock factor binding protein 1 (HSBP1) appears to be a negative regulator of the heat shock response [1]. 24.00 24.00 24.00 24.00 23.90 23.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.72 0.72 -4.39 20 305 2009-01-15 18:05:59 2003-08-06 17:00:52 7 4 201 2 193 288 2 50.70 47 56.10 CHANGED sp-LosaVpsLLpQhQs+FpsMScpIls+ID-M.......................upRID-LEpols-L..hspsGs- ..........h.p-LTshVpsLLpQhQs+FQsMS-p...IlsR......lD-M.......................usRID-LE+sIsDLhspsGh.......... 0 60 94 143 +6654 PF06826 Asp-Al_Ex Predicted Permease Membrane Region Yeats C anon Yeats C Family This family represents five transmembrane helices that are normally found flanking (five either side) a pair of Pfam:PF02080 domains. This suggests that the paired regions form a ten helical structure, probably forming the pore, whereas the Pfam:PF02080) binds a ligand for export or regulation of the pore. Swiss:Q8L3K8 is described as a aspartate-alanine antiporter ([1]). In conjunction with Swiss:Q8L3K9 it forms a 'proton motive metabolic cycle catalysed by an aspartate-alanine exchange'. The general conservation of domain architecture in this family suggests that they are functional orthologues. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.21 0.71 -4.76 34 4439 2012-10-02 17:06:44 2003-08-06 17:46:47 7 8 1308 0 585 2491 77 168.70 31 61.72 CHANGED slslslslGhhlGplph......hulsLGsusGsLlsuLllGpht........hshshsthlpslGLslFlhslGlpuGssFhsulpps.Glphhhhulllsllshllshllu+..lhchshshssGhhuGuhTsosulusus-t.sp.shss............luYussYshGhlhhllhs.tllhh ....................lhlslslGlhlGpl.pl..........tulpLGhuhG.....sL.....hsulllGp....h.....t..h.hh..................hs.sh..p...h.lpp...hGlhLFlhsVGlpuGs.sFhs.s.ltps..G....h....phh.hhulllsh.lshlls.hll.u...+...lh........c....h....s....h....s..hhhG..hhuGuh.Tss.PuLuhusst........sp...s...s.hsu................................................................luYAhsYsluhlhhllsspllh.h................................. 0 157 335 485 +6655 PF06827 zf-FPG_IleRS Zinc finger found in FPG and IleRS Bateman A anon Bateman A Domain This zinc binding domain is found at the C-terminus of isoleucyl tRNA synthetase and the enzyme Formamidopyrimidine-DNA glycosylase EC:3.2.2.23. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.82 0.72 -4.26 143 6847 2012-10-03 10:42:43 2003-08-06 17:58:27 9 22 3732 62 1464 4839 1582 29.50 34 5.63 CHANGED uphC.R..Chph.hpchth..stctshhCs+Cpph ..........Gc.CtR..Ct..s.....h..lpclsh.....ss+sshhCscCQp..... 0 441 915 1228 +6658 PF06830 Root_cap Root cap Vella Briffa B anon Pfam-B_5867 (release 10.0) Family The cells at the periphery of the root cap are continuously sloughed off from the root into the mucilage, and are thought to be programmed to die [1].This family represents a conserved region approximately 60 residues in length within plant root cap proteins, which may be involved in the process. 20.80 20.80 22.50 22.40 18.60 18.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.73 0.72 -4.13 18 153 2009-09-10 23:47:36 2003-08-07 11:46:05 6 5 23 0 81 151 0 55.20 54 17.43 CHANGED uFKFasLSscVcGVLGQTYRsDYVNp.lcluusMPVMGGsccYtTSuLFusDCsVuRF ......sFKFa.sL..ospVcGVLGQTYRssY.ls....lcluss...MPlM.GGtscYts.SsLFusDCtVuRF...... 0 9 43 70 +6659 PF06831 H2TH Formamidopyrimidine-DNA glycosylase H2TH domain Bateman A anon Prosite Domain Formamidopyrimidine-DNA glycosylase (Fpg) is a DNA repair enzyme that excises oxidised purines from damaged DNA. This family is the central domain containing the DNA-binding helix-two turn-helix domain [1]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.00 0.72 -4.23 19 5357 2012-10-02 21:21:44 2003-08-07 11:51:56 9 25 3675 70 1277 3734 2235 91.20 32 32.59 CHANGED LGP-PL.........p..sh...............t.htptltc...p....+.....+slKshLLDQplluGlGNIYsDEsLFtutlcPpphAssLstpchphlhpslpplLpcAlphstsshph.s ...............................................LGP-PL.............sssh.ss.......................p.h..l..tp.t.l.tp......+......p.................ps.I..K.shLL..DQ.p.ll.....u...GlGNI.......YssEsLap...Atlp.Pp+.....ussL......o......t......t............c......h......p......t......LhpsltplltpulptGGoo....th...................................... 0 400 799 1061 +6660 PF06832 BiPBP_C Penicillin-Binding Protein C-terminus Family Yeats C anon Yeats C Family This conserved region of approximately 90 residues is found in a sub-group of bacterial Penicillin-Binding Proteins (PBPs). A variable length loop region separates this region from the transpeptidase unit (Pfam:PF00905). It is predicted by PROF to be an all beta fold. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.98 0.72 -4.06 111 1158 2012-10-03 16:25:20 2003-08-07 12:26:00 7 11 1137 0 250 947 46 88.70 30 12.10 CHANGED ststts..tts.ss.pIhhPscGuhltls.s..........t........ttplslcstuu.ps.........hhWhl.......sGp.l.spstptcphthts....tsGhapLol..lDssGpssp..lph ..............................ss.s.....ssp.hsLplsusp..-.Gu.hlphhsst.........s................pssLs.lpssGG.su........chaWaL...................NGcsl........s..pp.....s.c.slshph....tptGc..a....pLhV..hD-sGphss..Vph............... 0 69 147 199 +6661 PF06833 MdcE Malonate decarboxylase gamma subunit (MdcE) Moxon SJ anon Pfam-B_10907 (release 10.0) Family This family consists of several bacterial malonate decarboxylase gamma subunit proteins. Malonate decarboxylase of Klebsiella pneumoniae consists of four different subunits and catalyses the conversion of malonate plus H+ to acetate and CO2. The catalysis proceeds via acetyl and malonyl thioester residues with the phosphribosyl-dephospho-CoA prosthetic group of the acyl carrier protein (ACP) subunit. MdcD and E together probably function as malonyl-S-ACP decarboxylase [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.39 0.70 -5.16 9 323 2012-10-02 13:07:06 2003-08-07 12:47:24 6 2 311 0 94 1349 480 245.50 37 82.37 CHANGED shhsuLhss...tpuhsssstVlcGphst.sst.lsVlussN.......putlGl.EuhuLAtsV.-sl....ppts+pPIlsllDssSQthuRR-EhLGIppsLAthupuhshARhuGHslIGLlhGcAhSGAFLAaGhtAscLlALs..Gs.l+sMshsuhARVTphoVEtLcsLAtosPshA.sl-NYsphGhlpthhssppsps.uss.hs.lpp..tt..tDt........pRhsGtpRtupphspclhccsh .....................................................................hhsuL...h.st.......hpuh..s.s..ss...p....lh..sG..p..h..st...ss+hluVV...ss...sss.h..................RstpGEVG..L.......E..G..as..L..A.......ps..V..-s.l....stcpss..KRPIlsllDVsSQAYGRREEhLGIapALAsuucuYApA.Rh.AG..H..PlIGLlsG+Ah...SG...A.F...L...A...H...G...h.Q...AsRLlALss..Gshl+sMuKtuA.A......R.V..........T.h..R.........o..........V..........-.......pL......E.....p....L....Ap.o.lPshAYs.lcNY........sp.L........GhlpplLs..s......s..s.c.s.s.s.s..st..l.s..h.lppslsp.shtDh................pchtutpRtupthspchhpt..................................................................................... 0 12 40 65 +6662 PF06834 TraU TraU protein Moxon SJ anon Pfam-B_10708 (release 10.0) Family This family consists of several bacterial TraU proteins. TraU appears to be more essential to conjugal DNA transfer than to assembly of pilus filaments [1]. 25.00 25.00 29.50 25.80 23.40 24.10 hmmbuild --amino -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.02 0.70 -5.29 58 720 2009-01-15 18:05:59 2003-08-07 12:50:46 6 3 485 0 138 610 39 276.00 32 88.84 CHANGED uphlss..hsssshsCl.shplsGhsh.....................hhhC.ss.sshh+sulpluaapPstlVsshpssts.Phltstplshu.....................shsp.tttsssppspssh....sFhpschhuaPhhhhh.shlss...........................................................................................................hsC.ssssh....halSplDsh.Wpssh..thl.....PEAll.Gt.htl..uussuss...................saGslYPhsGh.sspssshpuuullupRhsshlp...R..pshlat..shus................................uhshh..hstPlhc...pppa+aQhlhPhssss......Ctsasposhh.t...spphssss.ssauahlWRhhsCCtt ......................u.thhsslsshthsCl.shpluulph....................hhhC..ss...s.hhh+hultluaapP.tllsshpsPGs.s.lt..hthhshs.............................tstt.s.ssptppcpssh....sFhpschataPh.h.h..hl.shhss.............................................................................................................................hsC.tsush...........lhYLSElDPh.Wpss...h....sthlt............PEAllhus..hs.uAssu-s....................................................has.ts.Gs..hYPhsGa..s.....spssshpuusllupRhsthlp....R...Qshlhp..shut.s......................................uhCh....hssPlh.....+ppa+aQhltPhssss..............................C.pshs.c...oshhht.........sps.s.....s...s.....t....psauahlWRhhsCC........................................................... 0 30 71 111 +6663 PF06835 LptC DUF1239; Lipopolysaccharide-assembly, LptC-related Moxon SJ anon Pfam-B_11065 (release 10.0) Family This family consists of several related groups of proteins one of which is the LptC family. LptC is involved in lipopolysaccharide-assembly on the outer membrane of Gram-negative organisms. The lipopolysaccharide component of the outer bacterial membrane is transported form its source of origin to the outer membrane by a set of proteins constituting a transport machinery that is made up of LptA, LptB, LptC, LptD, LptE. LptC is located on the inner membrane side of the intermembrane space. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.79 0.71 -4.87 64 2204 2012-10-01 21:43:16 2003-08-07 12:52:59 8 16 2061 1 556 1696 1443 166.70 19 80.55 CHANGED hllhhllsuhh.hhhhptpsst.........sthpspps-ahspshpsppaspsGp.hpaclpuschc+as.ssst.........shhspPslhhapssp....................WplpuppuplsptsphlpLhssVtlpphtspsp..............hpplpTsphplphpsphhpo-psVplpsssh..thsGhGhcuslcspphpLhspV+...sp.a- ..........................................................................................h..hhh.lhsh...hhh...tt.................ptpp.ssh...h.......pp..hph.h........h.s....p.Gt...h.....pap.lhu.p...c.h.p.hhs...sps.h..........sh.h.p.p...P...h...h..pha.spsp..............................ssh.p.lpAccu..p.lss.....s.p..hlh...L.hu.s.Vplpsh...ttp..............................htplpTc.p...hplshp...s..p...p.l..p..o...-..c......Vp..l..h...s...ss..h......ph......s.G.h.t.hc..ssh..ps..p.phplh.p.pVps.............................................................................................. 1 161 349 464 +6664 PF06836 DUF1240 Protein of unknown function (DUF1240) Moxon SJ anon Pfam-B_11130 (release 10.0) Family This family consists of a number of hypothetical putative membrane proteins which seem to be specific to Yersinia pestis. The function of this family is unknown. 22.00 22.00 22.00 22.00 21.50 21.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.17 0.72 -3.77 16 192 2009-01-15 18:05:59 2003-08-07 12:54:50 7 2 47 0 51 115 0 92.90 45 71.30 CHANGED DcIpFSusVhIhhFSsPLlhYhhshulashIpN+hPKapcphs+hLshlAlhShllSFPlSFYVcYKLKupuYlVCsRISWMSPNsYVKDlpLCc .............DhIhFShtlsl.hlhusPLLhYh.hhuh.ahhI........hN+.........s........KhN...s.hlsphLs...hlAl.luhllSh.hShYlsYpL+ptGYlsC.sRh...S.WMS..P....NpYVKDlpLCc............ 1 0 22 26 +6665 PF06837 Fijivirus_P9-2 Fijivirus P9-2 protein Moxon SJ anon Pfam-B_11357 (release 10.0) Family This family consists of several Fijivirus specific P9-2 proteins from Rice black streaked dwarf virus (RBSDV) and Fiji disease virus. The function of this family is unknown. 25.00 25.00 30.60 189.80 24.20 22.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.39 0.70 -5.20 3 20 2009-01-15 18:05:59 2003-08-07 12:58:00 6 1 6 0 0 20 0 206.50 68 99.93 CHANGED MDs.ppSV.hDoYTFpCPFEL...AKIclcuhpNsMp-VoNFsslF-hshSDSElDD+VDsLElcVEcsssPLl+RtYGKVG+IlahIlSFLFFGIFKLVLKhFYHLF+CVhCNPLoRsllSIIFTIlFYhhLhVsIYLLaaFFGDsIIpslNsLNp.cSsNFlNSTQshsuKVEEpVtKIIQsspLlFuppDpNslQpN-KussVsNGsTsNaTLFN MNP.QSSVNlDTYTFNCPFEL...AKIQIEShKPIMQDFSNFD-IFERsLSDSEIDDRVEpLElDVEuKVDPlVRR+YGKlGHIIlMIISFVFFGIFKLTLKMFYHLFRCVCCNPLIRGIhSIlFTILFYhLlhVsIYhVYaFFGDQI.tsYNoLsphcsSshINSTp.....VEEKVcNIIH-GSLFFGohDpsTGplpElEpQVsNGGTVNYTLFN. 0 0 0 0 +6666 PF06838 Met_gamma_lyase Alum_res; Methionine gamma-lyase Vella Briffa B, Haft D anon Pfam-B_5307 (release 10.0) Family This is a putative pyridoxal 5'-phosphate-dependent methionine gamma-lyase enzyme involved in methionine catabolism. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 405 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.14 0.70 -6.17 14 999 2012-10-02 18:26:03 2003-08-07 13:00:36 6 5 922 20 186 1314 419 385.50 52 96.17 CHANGED h.plhcpAEtslhshFcplDchschNQh+VLpAFpccRlS-pHFssoTGYGYDDlGRDTL-pVYAcVFGAEuALVRPQhVSGTHAIusALFGlLRPGDELLhlsGpPYDTLEEVIGl+Gp.stGSL+-FGIsY+pVsLptpGclDapslppulp..spTKlltIQRSpGYuhRPShoIs-ItchIphVKplNPslIlFVDNCYGEFlEppEPscVGADLhAGSLIKNPGGGIApsGGYlAGKp-hlEtuuYRLTuPGIGpEsGAohsphp.haQGLFLAPHVVuEAlKGAhhsAtlh-clGasspPpasspRoDlIQulpFsscEKhIAFCpuIQtuSPIsualpP.PusMPGYEDcVIMAAGTFIQGSSIELSADGPlRtPYhuYlQGGLTasHVKlAlhtAlsplh ....................................................................................................h.tlhpclEppltshaccl-ch.s.hNQhKVLpAFpcp+lo..-.pcht.s.o.T.G.YG.Ys...Dh...G...R...Dp....LEcl...YAp..lFtu.E.s.AL.VRPQllSGTHAIuhALhulL...RP...GDELlh...lT...G.pPYD.TL..pEVIG.l............p.Gp..uh....GS.L.t.E.aG.lsYcp..lsLp..-t..Gpl.Dh-sltpsl.p..p.p.T.KllsIQRS+GYu.pRsShsls.............cIcchIs..hl..K.plp...P....s.lllFVDNCYGE.FlEppEPscl.G.AD....LhAGSLIKNP.GG.Gl.A..s.GG.Y.I.s..G.+.c.-.Ll.Et.s...u..Y.RLTuPGIGpEsG..A..oL.s..s.h..p..p..h..YQ........Gh....FLAP....+.V...VupAlKGAlFs..A..t..h..hE.chGh.p.ss...Ppast...R.oDlIQsVpFpstEphIuFCpuIQtuSPlsuahsP.PshMPGYED-VIMAAGTFIQGSSIELSADGPIRsPYtsYlQG.GLTYpHsKlAlhpAlpphh................................................................................................................... 0 77 135 161 +6667 PF06839 zf-GRF GRF zinc finger Bateman A anon Bateman A Domain This presumed zinc binding domain is found in a variety of DNA-binding proteins. It seems likely that this domain is involved in nucleic acid binding. It is named GRF after three conserved residues in the centre of the alignment of the domain. This zinc finger may be related to Pfam:PF01396. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.49 0.72 -4.03 14 1335 2012-10-03 10:42:43 2003-08-07 13:01:28 7 90 280 0 771 1259 16 45.30 32 10.05 CHANGED shCs.CG.phshhhssp.sG.NpGRpFYpCPhsc......tCsFFpWsDps ............hCp.C....u..p..s..h..h.hs.sp.p.s..u..sNp....GRp.........FasC..stspt..................ttC..s.FFpWt-........................ 0 155 290 523 +6668 PF06840 DUF1241 Protein of unknown function (DUF1241) Moxon SJ anon Pfam-B_11380 (release 10.0) Family This family consists of several programmed cell death 10 protein (PDCD10 or TFAR15) sequences. The function of this family is unknown. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.92 0.71 -4.97 6 159 2009-01-15 18:05:59 2003-08-07 13:03:01 6 3 106 19 95 131 2 130.10 42 55.91 CHANGED hs--ss.lsuh........sL.llhcPlhscLE+.cchs.....AsQpL+sAhhKuEppsPGhohDllssIlc+t..-lsVNhsEolLRhtutss-....EYphsRpEscFpELs+KAtsLKpILS+IPDEIsDR+sFLETIK-IASAIKcLLDAVNpVachlPs....hosKpAlEc .............s.......hsSh........sL.slhhPlFscLE+..hshu......AAQoLRuAhhKAEppsPGlTpDllhpIlc+t..slplNhsEolL.Rhtusts-....Eahl.pRsE.tFQ-Lsc+uhuLKpILS+IPDEIsDRhpFLpTIK-IASAIKcLLDsVNplhp...hh.......ps+pAlEp................. 0 34 42 70 +6669 PF06841 Phage_T4_gp19 T4_Gp19; T4-like virus tail tube protein gp19 Moxon SJ anon Pfam-B_11507 (release 10.0) Family This family consists of several tail tube protein gp19 sequences from the T4-like viruses [1,2]. This famiyl also contains bacterial members which suggest lateral transfer of genes. 19.20 19.20 19.30 19.20 18.70 19.10 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.57 0.71 -4.53 97 689 2009-01-15 18:05:59 2003-08-07 13:07:47 7 4 303 0 272 645 2092 139.90 19 81.40 CHANGED ssFpFtVp.h..............ssht..................suFpcloGLshchpshpa+c..Guss......hhh....+hPGthcassloL+RGls...p......spp..lhpWhp......pshpsth........................++slslpLhscsu.p...................l.h.sWplhpAaPs+h...........su.sslsA....susplAlEolcLsa-thp ...........................................................h.F.lp.h..................ss.........................hth..p.pssuhp......hp......hps.hphtp...Gsts............hhh......phs.G...p.hp.a.s.s.l.oLcpslsp........spt.hh.pWhp.............pstssth..p.....................................ccshslpl....h.spss..p.........................................sl.h..paplhsuaP.sch...........ss.ssLsu....sssp.l.shpslplsacth................ 1 94 202 251 +6670 PF06842 DUF1242 Protein of unknown function (DUF1242) Moxon SJ anon Pfam-B_11544 (release 10.0) Family This family consists of a number of eukaryotic proteins of around 72 residues in length. The function of this family is unknown. 25.00 25.00 25.60 28.20 24.70 23.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.70 0.72 -4.45 32 317 2009-01-15 18:05:59 2003-08-07 13:10:49 7 9 230 0 201 288 0 35.50 49 38.14 CHANGED LLhVlLLlICTCoYl+thhPullDp....s+.sGhhGhFW .....LLsVlLLlICTCsYl+phhPullDp..............s+.sGhhGlFW.......... 0 63 105 161 +6672 PF06844 DUF1244 Protein of unknown function (DUF1244) Moxon SJ anon Pfam-B_11743 (release 10.0) Family This family consists of several short bacterial proteins of around 100 residues in length. The function of this family is unknown. 21.20 21.20 22.70 21.80 20.00 18.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.31 0.72 -4.15 69 402 2009-01-15 18:05:59 2003-08-07 13:15:05 6 6 397 4 128 343 734 67.20 61 61.82 CHANGED VQNIDLMNLAGFCRNCLS+WYp-AAp-pGlplsc-pAREhlYGMPYs-WKupaQscAosEQhAuFcts ...........VQNIDLMsLAGFCRNCLS+WYpsAA-cpG.l-lshD-AREtVYGMPYsEWKupaQscAosEQhAAFct.p............................ 0 37 73 98 +6675 PF06847 Arc_PepC_II Archaeal Peptidase A24 C-terminus Type II Yeats C anon Yeats C Domain This region is of unknown function but is found in some archaeal Pfam:PF01478. It is predicted to be of mixed alpha/beta secondary structure by Prof. 26.20 26.20 28.10 26.90 26.00 19.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.23 0.72 -3.56 21 99 2009-01-15 18:05:59 2003-08-07 13:22:34 6 2 90 2 63 95 11 90.10 24 34.07 CHANGED hhKchlhhhhGh+hclp................p....hc.cpthhLsptp-.tt.h...hh.hpsh.psssphptplpcatcc....cp..lWVTPGlPFllsIshGalluhlhGDh.l .............................................................................h......h.h.u..hpht.................t....ht.t.hh.hhptct......................h...h.htshtp.sspphpcthpphscc........cp..lWVoPGlPallslhlGallullhGDhh..... 0 15 39 53 +6676 PF06848 Disaggr_repeat Disaggregatase related repeat Moxon SJ, Mistry J, Adindla S anon Pfam-B_11958 (release 10.0) Repeat This family consists of several repeats which seem to be specific to the Methanosarcina archaea species and are often found in multiple copies in disaggregatase proteins. Members of this family are also found in single copies in several hypothetical proteins. This repeat is also known as DNRLRE repeat and is predicted form a mainly beta-strand structure with two alpha-helices [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. It is found in some cell surface proteins. 27.60 27.60 28.30 27.70 27.30 27.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.13 0.71 -4.86 16 49 2009-01-15 18:05:59 2003-08-07 13:26:16 6 15 13 0 31 50 4 168.90 48 32.12 CHANGED DNRLREuSP-sVap-osaIDlGuhsssG+YRDlhhF-LScYsssspIss....ATLSLYWYYPsspsRPEDTVlElYRPAuuWNPsYVSWNpRDcslsWpNsGGDWYDKNGVLQGSTPYATlTlKGSsLPDNRYYELDVTDLVKEYVSGKYENTGFLIKARoEssNYIAFYSs-sssEsQcPKLsl ....................DNRLREuSP-sVap.sosaIDVGuhsss.+Y...RDlhhFDLSpas..s.ssclss....AsLSLYWYYPs..up..sRscDTllElYRPAou.....WsssYVoWNp+DpslAWpNsGGDWYD+sGlhQGsTPYAolTl+GSpLP.D.N+YYElDVT-LVpEYlSG+YEN...TGFLIKuRsE.ss.NYIA........FYSs-sssEsQhP+Lpl............................................................................... 0 13 14 14 +6677 PF06849 DUF1246 Protein of unknown function (DUF1246) Vella Briffa B anon Pfam-B_5448 (release 10.0) Family This family represents the N-terminus of a number of hypothetical archaeal proteins of unknown function. 25.00 25.00 113.00 112.20 21.60 20.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.55 0.71 -4.35 43 187 2009-01-15 18:05:59 2003-08-07 13:58:34 7 1 109 19 117 182 116 124.90 42 35.14 CHANGED IuTluSHSALpIhcGAKcEGF+TlslCpcuR.-phYpcFs.............hsD-hlll-cap-lh..chtpcLhcpNuIllPHuSalsYlGh-plEs.htVPhFGNRplLRWEs-Rstcpp...LLccAGI+hP+tac .IuTluSHSALpIhcGAKcEGFcTlslsp.+sR.cthYpcF..............hhD-hlll-pas-ll..chtccLhcpNuIllPptSFlsYl....Gh-plEs.hpVPhFGNRplLRWE.-Rstcpp...LLccAGlchP+ha... 0 29 58 87 +6678 PF06850 PHB_depo_C PHB_depo_C-term; PHB de-polymerase C-terminus Vella Briffa B anon Pfam-B_5697 (release 10.0) Family This family represents the C-terminus of bacterial poly(3-hydroxybutyrate) (PHB) de-polymerase. This degrades PHB granules to oligomers and monomers of 3-hydroxy-butyric acid. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.16 0.71 -5.10 11 544 2012-10-03 11:45:05 2003-08-07 14:31:15 6 2 381 0 215 658 140 200.80 49 46.95 CHANGED GGPIDsRtsPTsVNpLApc+sh-WFcpNlIhpVPhsYPGtGRcVYPGFLQLuGFlSMNhDRHlpAHhDhatcLV+GDG-pA-+HpcFYDEYLAVMDhoAEFYLpTVcpVFpcatLPpGchtpcGphVDhssIpcsALhTVEGENDDISGlGQTpAApcLCsuIPcs+KtaahQsuVGHYGVFsGpRaRppIhPplt-FIcchs ....................................................................GGPI.DsRtsPTsVNpLAp...p+shsWFcpNlIppVPhsaPGtGR+V..YPGFLQhsuFhuMN.-RH.h.puHh-happL.lcGDs.s.sA-tHRcFYDEYh.A.VhDhsAE...aYL-TlcpVFQcatLsp.Gphph...c.....G......c.....h.....VcPssI.+csALhTlEGEpDDIoGsGQTcAApcLCoulPssp+p+ahtsssGHYG.lFsGpR.WRppIhPtlccFIpp.s.............................. 0 42 108 152 +6679 PF06851 DUF1247 Protein of unknown function (DUF1247) Vella Briffa B anon Pfam-B_5762 (release 10.0) Family This family contains a number of hypothetical viral proteins of unknown function approximately 200 residues long. 25.00 25.00 29.90 194.50 19.10 18.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.89 0.71 -4.55 17 44 2009-01-15 18:05:59 2003-08-07 14:34:31 6 1 39 0 0 43 0 148.50 51 72.01 CHANGED LGDVlQpMGRp.shLLtc..KKD--FcIspph-LS-ps+-YLNhLQpEKLapCRLCYp+sDph..RC-FH++YlFspstchphD-YVpFLNS-MGllSaVELYYsYLus...ss.W+hsAphhL+cLTsFpSlp-LLsaYNYphspDsDsssaEhMD LGDVlQpMGRp.phLLpc..KKD.--FcIsEph-LS-ps+-YLNhLQpEKLapCRLCYp+scsh..RC-FHKKYlFccshchtsD-YVpFLNS-MGlISaVELYYoYLus....ss.W+hsAphhL+-LTuFpSlpcLLsaYNYshssDsDpssaEhMD. 0 0 0 0 +6680 PF06852 DUF1248 Protein of unknown function (DUF1248) Vella Briffa B anon Pfam-B_5811 (release 10.0) Family This family represents a conserved region within a number of proteins of unknown function that seem to be specific to C. elegans. Note that some family members contain more than one copy of this region. 21.80 21.80 21.90 22.70 21.40 21.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.10 0.71 -5.00 10 63 2012-10-02 22:59:21 2003-08-07 14:38:07 7 2 5 0 60 63 1 176.10 26 53.84 CHANGED p-slDllhNPs-chlDsaMKhaGNpRhsFK+EDIupW+cSFsDtY+htlhsLKGTs+lItosHslpF+PLsss.c.psahalGhuWIcP-YRG+ssh+lh-shsppct+s.psDNhlApssphupsFW+KhpG...+sDhGH...plhYlSaYchpDhplP-cL-hsG...IsVKNAREVPc+DIlcYDpol ..........................h.tsl-llhNPspchhDpah+...hh.Gp..p.R.hsF+psDlphWppuFcc.YphhhsshKs...............T..........s.....cllussphhpapslp...s.c.tshhhhGhhahsP-YRupsh.h.+.lhsphhhc...hpp.s.ssN.ssu.p.sss...phtp..happhhG...tpchuc....hhYhShYchs-lhlPcs.LshsG...lhlKss.p-VstcDllpYDpsl................................................................. 0 18 22 60 +6681 PF06853 DUF1249 Protein of unknown function (DUF1249) Moxon SJ anon Pfam-B_11475 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. 25.00 25.00 31.30 31.00 19.20 19.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.54 0.71 -4.48 50 853 2009-01-15 18:05:59 2003-08-07 15:20:06 7 1 849 0 120 359 264 119.90 59 83.45 CHANGED h+Lh+LLPstcp..sp...shphphs......................shph......plcllEso+YToh.............................................lclspptsts..........hhssPplpVRlYHDA+hAEVlsspphp+lcshYsYPNtpMaphDEKhQlNtFLu-WLpaCLcpG.....pph.tsl .......................QLRRLLP+sDu..sGE.....oVuYQVu............................sspY....RLTIlESTRYTTL.............................................VpIc...QTsPulo.........aWSLPSMoVRLYHDAhVAEVCSSQQ.Ia.RFKARYDY.P.NKKLHQRDEKHQINQFLADWLRaCLAHGAhA..lss....... 0 23 49 86 +6682 PF06854 Phage_Gp15 Bacteriophage Gp15 protein Moxon SJ anon Pfam-B_11759 (release 10.0) Family This family consists of bacteriophage Gp15 proteins and related bacterial sequences. The function of this family is unknown 19.90 19.90 20.90 20.70 18.70 19.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.38 0.71 -4.75 9 188 2009-01-15 18:05:59 2003-08-07 15:23:09 6 1 166 0 19 164 6 160.00 27 87.85 CHANGED pLs-slhchapacsp-YplDLSFcsVL+la-lhcDcplosspKsplslclLh...........ttphhph.cctsplhlcIhppaIsh-p..c-tlphDlcGN.MP.......ptp..pc+hhsappDA-aIaASFhQsYpIsLlcppsK......................LpWhcFpALLsuLs-sThhppIIpIRphEhsp..ptstcERpplhKLKstYpL ..................................................................................................L...h.p.h.htsttatlshsFppslphhplhp.Dp.plst..+h.hhlphhh.......................sh..tp.t...hhh.....hhp...phl.ht....t......c...ss.h.....................t....ppthhshp.Duch..IYuuFhptY.sIcL.hcp.p..sc......................hpWhcFpALhpuLs.-c...T.hppIltIRsh-.sp....ttspc...c+cphpchpphYtL.......................................................... 1 12 17 19 +6683 PF06855 DUF1250 Protein of unknown function (DUF1250) Moxon SJ anon Pfam-B_11942 (release 10.0) Family This family consists of several short hypothetical bacterial proteins of around 70 residues in length. Members of this family seem to all belong to the order Bacillales or Lactobacillales. The function of this family is unknown. 21.10 21.10 21.50 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.19 0.72 -4.44 47 1637 2009-01-15 18:05:59 2003-08-07 15:27:40 7 2 984 4 102 381 1 46.20 35 64.48 CHANGED hAspsapDpsFPKpspcacclSsYLEh.susah.shssFDcuaptY. ..LA-hhhcDtuFPKpscc.....acplpsYlcp.ss.....shp.shsshDchaphY.................... 0 20 45 79 +6684 PF06856 DUF1251 Protein of unknown function (DUF1251) Moxon SJ anon Pfam-B_12000 (release 10.0) Family This family consists of the N-terminal region of several hypothetical Nucleopolyhedrovirus proteins of unknown function. 19.80 19.80 20.30 23.70 18.00 17.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.72 0.71 -4.61 20 50 2009-09-10 22:02:51 2003-08-07 15:30:53 6 1 41 0 0 50 1 118.40 34 54.34 CHANGED chslssKs+sshh+a.................KVplplss..sscalQATFsstpcpVslVN.psppc.IlFDGFsc..-DEutTsPFlV.GsLhul....pspp.hthpVRchscAhEs.pTlL+lFlNEAhl..psshss ................hslss+s+pshh+a.h...............+Vhlpl-u...sspalQATFpst..pcpVslVN.psppp.lhFDGFsc...-DEupThP..Fll.usLpsl.....pssp..shcV+-hscAhEp.sThL+lFlNEAhl..tsph.s............. 1 0 0 0 +6685 PF06857 ACP MdcD; Malonate decarboxylase delta subunit (MdcD) Moxon SJ anon Pfam-B_12010 (release 10.0) Family This family consists of several bacterial malonate decarboxylase delta subunit (MdcD) proteins. Malonate decarboxylase of Klebsiella pneumoniae consists of four different subunits and catalyses the conversion of malonate plus H+ to acetate and CO2. The catalysis proceeds via acetyl and malonyl thioester residues with the phosphribosyl-dephospho-CoA prosthetic group of the acyl carrier protein (ACP) subunit. MdcC is the (apo) ACP subunit [1]. The family also contains the CitD family of citrate lyase acyl carrier proteins. 25.10 25.10 26.20 26.00 22.90 22.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.80 0.72 -4.07 40 1378 2009-01-15 18:05:59 2003-08-07 15:34:27 6 3 1149 0 198 624 11 86.00 40 84.14 CHANGED cIpcsAlAGTLESSDl.VplsPu.s.sslplplpSs..Vt+QFGppIcpllpcsLpphuVpssplplsDKGALDCVl+ARlpsAltRAsc .........cIppsAlAGTLESuDlhIpltPhps..p......s...........l..clplsSS..VpcQFGctI+pslh-sLs..+hsVpusplslcDKGALDCll+ARlpsultRAu.t.......... 0 44 96 143 +6686 PF06858 NOG1 Nucleolar GTP-binding protein 1 (NOG1) Vella Briffa B anon Pfam-B_5853 (release 10.0) Family This family represents a conserved region of approximately 60 residues in length within nucleolar GTP-binding protein 1 (NOG1). In S. cerevisiae, the NOG1 gene has been shown to be essential for cell viability, suggesting that NOG1 may play an important role in nucleolar functions [1]. Family members include eukaryotic, bacterial and archaeal proteins. 20.90 20.90 20.90 22.30 20.70 20.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.76 0.72 -4.09 25 503 2012-10-05 12:31:09 2003-08-07 15:51:30 9 15 386 1 340 554 80 57.60 51 10.12 CHANGED sIEhQAIsALsHLpuslLFlhDhScpCGY...olcpQhcLacpI+slF.spPlllVlNKhD ............sIEMQuITAL.AHLRus.lLYhhDlSEQCGa...olcpQl.pL.Fcs.I+PLF....s....N.K...P....lllVhNKsD........... 0 115 195 278 +6687 PF06859 Bin3 Bicoid-interacting protein 3 (Bin3) Vella Briffa B anon Pfam-B_5564 (release 10.0) Family This family represents a conserved region of approximately 120 residues within eukaryotic Bicoid-interacting protein 3 (Bin3). Bin3, which shows similarity to a number of protein methyltransferases that modify RNA-binding proteins, interacts with Bicoid, which itself directs pattern formation in the early Drosophila embryo. The interaction might allow Bicoid to switch between its dual roles in transcription and translation [1]. Note that family members contain a conserved HLN motif. 21.00 21.00 21.20 21.10 20.90 20.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.38 0.72 -4.07 5 294 2012-10-10 17:06:42 2003-08-07 16:17:37 7 11 155 6 192 292 6 98.80 41 22.77 CHANGED cFDVILCLSVTKWVHLNWGD-GL++hFRRIYppL+PGGlLILEPQuWDSYcKR+KlSEslppNYpsIcl+PDcFpcaLlsscVGFpohE.LsulssusSKGF.cRPIhlFpK ............paDllLCLSlTKWlHLNaGDpGLpphFp+laphL+s...........G..GhLllEPQsWpsYp...+t....c....p........h...s............c.......p...h....h....pp....a.plp......h...hPp...p..F.t....phLht....l.GFt................ht................s.....h.pR.l.hh............................................................ 0 71 95 146 +6689 PF06861 BALF1 BALF1 protein Moxon SJ anon Pfam-B_12069 (release 10.0) Family This family consists of several BALF1 proteins which seem to be specific to the Lymphocryptoviruses. BALF1, inhibits the antiapoptotic activity of EBV BHRF1 and of KSBcl-2 [1]. 25.00 25.00 27.50 27.10 21.10 20.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.05 0.71 -4.86 2 16 2009-01-15 18:05:59 2003-08-07 16:34:37 6 1 13 0 0 16 0 171.80 39 88.56 CHANGED M.....TDsVF.cs......APs.ss-DchscushLhhRsMhAsahpD..pGLsh.thlhhRLIKt.hKK-cKhaA-lsspsus.sshHuHlphlhohhRAlY-DHhD.W.RlRslhshsVsaAhRNh.sDpEsAuhlLsuhAcaLsLYRRhWhuRhGGh.huLRRtFPlpWhhhtls..hhp. .............................................clucoS.lhh+shaAVho.pD..c-.LslsttVLscLlKtSl++sh+las-.Lssps..uchuuccs+lphlhsllRtsYsDphD.apRLpssLsYsslahshsh.sDpcssuhVhsslA+ahshaRphWhuRlGGhspuLR+pFPspWshspLp.aLpp.h...... 0 0 0 0 +6690 PF06862 DUF1253 Protein of unknown function (DUF1253) Vella Briffa B anon Pfam-B_6227 (release 10.0) Family This family represents the C-terminal portion (approximately 500 residues) of several hypothetical eukaryotic proteins of unknown function. 19.10 19.10 19.10 19.10 19.00 19.00 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.50 0.70 -6.08 36 368 2012-10-05 12:31:09 2003-08-08 10:01:07 7 4 289 0 255 543 41 379.60 36 61.37 CHANGED KoRs+llKNsp+Lpcptp................t-.-hRDQGFTRPKVLIllPhRssAhchVph.Llplhss...............pphcN+p+FpcpF.......ppptss............ppK.....PcD....................applFpGNsD.DhF+l.....GlKh..o+Ksl+LY......SsFYsSDIIlASPLGLchllps...........................................................sp......KKc-hDFLSSIEllllDpschl.MQNWpHltplhcalNphPpc.t+s.sDFSRlRhWhlsspA+haRQTllhosa.sPphNSlhsppstNhpG+l+hpshhpt.....................................uslsp.........lslpl+QhFpR..hc........usSlhsssDsRFcaFsssllPplh........sshtsssLIalPSYhDalRlRNYh+..........ppsloFusIsEYosppclsRuRphFhpG+tplLLhTERhHaa+RYplKGl+pllFYu.Pp.PpFYsEll.phlstost...............hc.s.ssspslYSKaDuhpLERIVGscRAsphlpup.pc...sapF ..........................................................................shll+sstpltt.t.t.....................p.-hRDQGhTRPKV.LlllPhRpsshchVph.lhp..lh.s.......................p.ps+pRF.ppas...........tts...............................+.....PpD..........................apt.lF..t.G..N...D...DhF+l.....Glph............s++o.......l+la......utF..YsSDIllASPLGL+hhltt................................................................ttt...........+cp-hDF.L.S....SIEllllDpA-hh.hMQ.NW-Hl.....hlh.p.plNh.Ppc.....tHs..sDhuRl.R..WhLss.u+ahRQTllhosh..s.sp.hsulhs.p....h...sh.....NhtGplphps..t........................................................usltp.........lhl.pl....QhFpR..hc.........sps.hh.p..s.D..s..RFpaF...sp.....pl..l.Pphh...........ss..stsLIa..lPSYhDaVR...lRNa.hp..........ppplsFstIsE......YoptpplsRARphFhpG.c..tphLLhTERhHaa+RYp.l+...G..l+pllaYthPp.PtFYsElh.sh.lttstt.....................................tthsspsla.o+aDshpLpplVGspRstphhpsp..tssa.F............................................................ 2 101 152 218 +6691 PF06863 DUF1254 Protein of unknown function (DUF1254) Vella Briffa B anon Pfam-B_5911 (release 10.0) Domain This family represents a conserved region about 130 residues long within hypothetical proteins of unknown function. Family members include eukaryotic, bacterial and archaeal proteins. 22.20 22.20 22.20 22.30 22.10 21.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.93 0.71 -4.31 340 950 2012-10-01 20:00:45 2003-08-08 10:59:30 7 11 469 9 332 860 184 127.90 25 28.84 CHANGED hNphhphp.thhs.tp..pslls...sNsDTlYohuhlDL.pssPlllplP.s.h.s.Rah.shtlhDhappsh.....................h.....st........ststuGp.ah..l.ssPsa...........................................ts.p.......................hh...cusTshshllhRshs......ps.ssD..hss.l.....pt.lQcthp..lts ..............................sph.hhp.th.sssp...pslss.sNsDTlYohualDL.sssPlllplP.s..h.t.RahshthhDhappsh................sshst..........stspuGp..al.l.ssPsa........................pG.h...........hh.hhcssTphshlhhRshh......ps.spD...hts.s.pt.lpcthplh................................................................ 0 80 165 252 +6692 PF06864 PAP_PilO Pilin accessory protein (PilO) Moxon SJ anon Pfam-B_12430 (release 10.0) Family This family consists of several enterobacterial PilO proteins. The function of PilO is unknown although it has been suggested that it is a cytoplasmic protein in the absence of other Pil proteins, but PilO protein is translocated to the outer membrane in the presence of other Pil proteins. Alternatively, PilO protein may form a complex with other Pil protein(s). PilO has been predicted to function as a component of the pilin transport apparatus and thin-pilus basal body [1]. This family does not seem to be related to Pfam:PF04350. 22.50 22.50 22.70 25.40 20.70 22.40 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.40 0.70 -6.12 9 257 2009-01-15 18:05:59 2003-08-08 11:01:29 7 1 201 0 33 208 3 375.10 26 91.58 CHANGED salAGLcWpshsp.tp+ssRphu+s.........tsAshhluhphtspptpuphphsuhls....h.httpphaSLAhhhLshhtssGYGIapLspt....pahFlAsssGl.ushuDlVGotsplhpAhppFLshN.sPpp............uWpshusspassshpohstsLSs.....pthRts+LstVtps.phhhhullhlLhuhhh.ua.happ..scsthhststphtActph..ptss.s.....ls.PWAshP.hssFLptChslhpslPVolAGWRhshucCss-G....lRhpYpthsGuTlscFupRlp-la..sppPsFsLs-GuppGslhlPhshp.sppshpsEslPssusQl.+hsSahQphplplslsElpss.shst.Dtps....hP...W+EYsFphpTplsPctlhs..phs-sGlRhsSlshpLps.GpFpYphcGpl..YAp ................................................................................................................ahssLpWp.hst....t....p......pp.....t+.t..........shphhh........ttp..tphhh.suhlt....................thpth..aSLAhhhts...hp...s.....h..hu.........l..apl...s-t.......chhalAsh.sG..t....sh..u..D.lsGs.pp-l.t..pth....ph.ahshs...t.s.t..............tW.p.lh......p....s...c.hs..s..s...p....p..h...t.t..Lss.....................tp..+hs.....pLs.....l....t.p...ptph..hhs..ul....l...h.h..l..s...s..s.u.hs.s.a...a..p.p.p..t...c...s..h..h...s...t...stph.t.Actph.t....pp.s..p.s.s........ls..P.Wss.PshssFl.cuCss...h...h.pt.P...lult.G.W+hstupCssps.....hphhYpp..tsu...uTh..t..sF.t....pscpla......sshPshsl.sG.p..usl.shsl.....s.shs.ht.D-sls.ssp..l.clhoh...hQphpl..p......t.l..s.....E...h....s.........s......s....hss.stps.............P..pW+paphsh.p........osls.P.p.tlh...hhp....t.sGlRlpplthplpt..uphpap.pGplYup.......................................................................................................................... 0 10 18 24 +6693 PF06865 DUF1255 Protein of unknown function (DUF1255) Moxon SJ anon Pfam-B_12498 (release 10.0) Domain This family consists of several conserved hypothetical bacterial proteins of around 95 residues in length. The function of this family is unknown 25.00 25.00 38.90 38.10 21.90 21.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.04 0.72 -3.72 85 1071 2012-10-10 13:59:34 2003-08-08 11:06:28 6 2 1050 2 221 522 128 93.60 55 95.56 CHANGED hhcsNhYFDGpVpShuhphs-hscpTlGVMhPG-.....YpFuTutsEhMplluGshpVpLPGps.....cWpsasuG-sFpVsANSsFpl+Vp...psosYlCpYh .....MlpuNpYFsGKVKSIGFspussG+ASVGVMs.GE.....YTFuTupPEcMTVlSGALpVhLP.sss.....-WpsapAGpsFsVPGpScFcLpVs...E.sTuYLC+Yl................ 0 53 122 175 +6694 PF06866 DUF1256 Protein of unknown function (DUF1256) Moxon SJ anon Pfam-B_12377 (release 10.0) Family This family consists of several uncharacterised bacterial proteins which seem to be specific to the orders Clostridia and Bacillales. Family members are typically around 180 residues in length. The function of this family is unknown. These proteins are related to peptidase family M63 and so may be peptidases. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.94 0.71 -5.05 13 439 2012-10-01 20:58:29 2003-08-08 11:11:13 6 1 326 0 117 354 6 163.40 45 84.31 CHANGED hlphs-tpAhhpltshLppal.......scclVllCIGTDRssGDuLGPLVGphLKphphs.hpVYGTLc-PVHAlNlccplccIcKcapsPaIIAIDACLGcs..pslGcI.lcstPlcPGpuVsKpLPsVGDlSIsGIVNlos.hEah...lLpssRLphVhchAcsIupu .......................h..hpp..u.tplssh...Lhshl..h........ppsl.lllC..IGTDRSTGDuLGP......LV.Gop.L.......cp.....h...t.h...s..p...h...pV.aGTL...-c.....P....VH..........A..........h..........N...Lc.............-...plppIpp....pas.ssaIIAlDA.CLGch..ps..l..G.p.IplupGPl+PGuuVsKcLPsVGDlpIsGIVNls..Gh.hE..ah...VLQNTRLslVMcMA-lIup.u......................... 1 58 95 104 +6696 PF06868 DUF1257 Protein of unknown function (DUF1257) Vella Briffa B anon Pfam-B_5975 (release 10.0) Family This family contains hypothetical proteins of unknown function that are approximately 120 residues long. Family members include eukaryotic and bacterial proteins. 21.00 21.00 21.90 21.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.24 0.72 -4.14 40 175 2009-01-15 18:05:59 2003-08-08 11:32:11 6 1 109 0 65 168 172 102.10 35 80.65 CHANGED pALpDLGhphcp.sp.ptVRGY..cGQT.hpA-lsl...p.ssuhDIGFpWNusp..YELVsDLphWp..Qslsl-cFLsploQ+YAhpolLspospp.....GFplsEpppspDGSIcLV .........................uLpDLGl...shcp...tp..tsVRGY..pGQp..hpA-lll....p.s....ssYDlGFphNuss..Y-Llu.D..hWp....pphshppFl..splsQ+YAhpplLscsppp.....GapssppppttsGshpl................................ 0 19 47 63 +6697 PF06869 DUF1258 Protein of unknown function (DUF1258) Vella Briffa B anon Pfam-B_6065 (release 10.0) Family This family represents a conserved region approximately 260 residues long within a number of hypothetical proteins of unknown function that seem to be specific to C. elegans. Note that this family contains a number of conserved cysteine and histidine residues. 26.50 26.50 27.30 26.90 26.30 25.40 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.70 0.70 -5.30 3 40 2009-09-13 14:59:23 2003-08-08 11:37:06 7 5 6 0 40 38 0 189.30 29 31.56 CHANGED AEEARKLWpYsKNNFSTHsYCNtCGK.VLuspcKCNhCssuPVATFVRIGuFSQI+ELVEoYlD-ILEIREQLKsGRNl-HNLuSPFFS+aWcsESpNHL+LSTVlSIDGV+ISGNKKKLWPVSLlLVDLPoGLMQKSTNlILEGIVECSENPSTsLWNALIPhIhSDVEuHoGRV+NITFoC+ITTCSADQPAKRAFFGF+uHSSuhSCFFCLSPETLYK+GGssRKEpRPGaLTIlDScNGcNGFosKsSKIV.HVl ............................................................t.Ch....tp.t.t.......s..............s....t............t.hhpa..hRhshhtQl.pllptah.cIhpl+cpLppupphpHsLsu.ahp.chht..p.E..pt.ptL.plohl.hulDGlpl...tG.s.p....pKlWPlohhllDLPsu.MQ+ssslllpulhEsppsPSThlWNtlhshlhsDhptt.ttlts.hphphhIho.hsuDQ.....P................A.....+R..shauh+uHpup.SC.ashs.tThhKhts................................................................... 0 4 13 40 +6698 PF06870 RNA_pol_I_A49 A49-like RNA polymerase I associated factor Finn RD anon Pfam-B_20222 (release 10.0) Family Saccharomyces cerevisiae A49 is a specific subunit associated with RNA polymerase I (Pol I) in eukaryotes. Pol I maintains transcription activities in A49 deletion mutants. However, such mutants are deficient in transcription activity at low temperatures. Deletion analysis of the fusion yeast homolog indicate that only the C-terminal two thirds are required for function. Transcript analysis has demonstrated that A49 is maximising transcription of ribosomal DNA [1]. 20.10 20.10 20.20 22.00 19.00 20.00 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.15 0.70 -5.89 24 337 2009-01-15 18:05:59 2003-08-08 12:05:22 7 10 258 10 242 330 1 321.60 20 82.87 CHANGED psspsscFsLYKcc..........p..pschllpGE.s-+LcYsGps.ssss.ps......cYhVGlaD.popplclapA.hhsh...........cssscsccshct.thcp.sss+shppR.suLGEAFGT+K......uK+AlsshccN+lc........u-pLpcsthcls-sltpsstshsspp-h.p.sssss..+PhP.sNl-AsslE-lYsl-sIIspcEashlc.lsshltppcspccLp.hhP..sp.upaVtc+L............................................tpLtptpsh.....c+lplLaYlSlLlslhpp.........RplppKpsLhp+ht.....sP-lllsslLc+FThsp.....pstphphhI..ssppc-KLlsYllslhLHlc.sFhV-lssLu+-Lslcss+lhpla+slGsplKt.s.sphpthulscussssaKlAoL+lPh..+hPchpcct...+R .............................................t..............................ptphlltup.ptplpahupp.t..t.t...............pah...lulhs.pstphp.lh.s...thh.h........................................t...ttp....t..........t.....t.ttp.htt+....ptLhpsF.GopK......t+.+sl...ps.hp..Nt....ls...............................sp....t....h.....p.....t.....s....t..thh........p....s...h............p.s...p...t..s.......s..........pph.t......t.pts.....p.lP.hshsAs..psp-lY.hcs................ll..s..t....t.....h.p..h..lt........t..h......tt.tt...tptl..........t...spalh....ppl...............................................pt.l.........t...........tp......t...........pp......hph...lhalph..Llthhtt.......................................+ph..+pt..hhtt..............................hPchl.ptlhppFss........................p....hh...stp.tshlhsahhslsLhl............s....sa...t.s...-...hssLtp..-L..p..hp...pphhphh+tlGsplpt..................................tt.tthc..hupLtlPh....hPt..tptt................................................................... 2 81 127 193 +6699 PF06871 TraH_2 DUF1259; TraH_2 Moxon SJ anon Pfam-B_13298 (release 10.0) Domain This family consists of several TraH proteins which seem to be specific to Agrobacterium and Rhizobium species. This protein is thought to be involved in conjugal transfer but its function is unknown. This family does not appear to be related to Pfam:PF06122. 26.30 26.30 27.30 31.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.22 0.70 -4.88 8 33 2012-10-03 00:09:25 2003-08-08 12:48:21 6 1 21 0 11 38 0 194.50 55 98.47 CHANGED MLDAALIKcCADPSLKPAIVEQFlsuAGSsDPLAVTV+SGsRllLVPKsTTsDEAMAlIRQalGpslVRVGLTQaPAGVGVp-Au-LKPDLVDACENlRhGTAhFAKVhRIVsKWYGNPTucDVhPQlF-DAlhAWKTG.FEGluVFpA-DPGsussspssstss-cs.pscsouc.ssustssspsups.s-ss+AGIRIDLStIGu .MlDAALIcpCADPuLKPAIVEQFlttAGSsDPLAVTV+SGsRllLVPKspTsDEAhuLlRp.lG+s.lVRVGlTQaPAG.lGlh-AupLK.sDLVDsCcNlRhGTALFAKVhRIVsKWYGNPTsp-VhPQlF-DAIhAWpTGhFEGsuVF+A-DPGsss....hspssstpptps.tppsssp.tsus..s..sstsups.sDsspAGIRIDLStIGu......................................................... 0 1 4 8 +6700 PF06872 EspG EspG protein Moxon SJ anon Pfam-B_13549 (release 10.0) Family This family consists of several EspG like proteins from Citrobacter rodentium and Escherichia coli. EspG is secreted by the type III secretory system and is translocated into host epithelial cells. EspG is homologous with Shigella flexneri protein VirA and can rescue invasion in a Shigella virA mutant, indicating that these proteins are functionally equivalent in Shigella. EspG plays an accessory but as yet undefined role in EPEC virulence that may involve intestinal colonisation [1]. 25.00 25.00 69.10 69.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.04 0.70 -6.12 4 134 2009-01-15 18:05:59 2003-08-08 12:57:48 6 1 111 10 1 57 0 370.00 72 98.16 CHANGED cShhhNuhpNsSA.hsL-uslcVsSshcpsWs-hohAE+LLKlLohGlasPcYotsERpphpcLLolLcPlhPtsNEhG+VtApFSDGSSLRISVTpSE.IEsplpTP-s-Kl.olhLEuNEQNpLLpSL.PlshHMPYIpsH+uLsph-lsstpuM+pLlsFsuKLSTolIPasspTcPLSGsTPFsSlahDThRGL.....GNoKlSlNGV-IPtcAQtLLpstLGLKDTtSSPspNlIppGIshcpAppIlpcSpssp-.....QKAhlsshLCpPEhsoAICSAFYQSFsVPAhhLpH.RIphASpa.upRSLshP..............NAsl...sIuISpSSsGulaVoSpsGshIMAPc-RsNtlGhhT.RTSYEVP.Gs+Cp.sEhsRslpP..+YuuSEsY.pN .......MINGLNN..sSASLVLDAAh+VNSsFKKsWs-MSCAEKLLKVLSFGLWNPTYoRSERQoFQELLTVLEPVhPhPNELGRVpApFSDGSSLRISVTNSELlEAEIRTs-NEKI.olLLESNEQNRLLQSL.PIshHMPYIQVHRALSEMDLTDssSMRNLLuFTSKLSTTLI.PHNsQTDPLSGPTPFSSIFMDThRGL.....GNAKLSLNGVDIPssAQKLLRDALGLKDTHSSPsRNVIspGISRHcAEQIARESSGSDc.....QKAEVVEFLCHPEAATAICSAFYQSFNVPALoLTHERISpASEYNuERSLDsP..............NACI...NISISQ.SSDGsIYVoSHTGlLIMAPEDRPNEhGMLTNRTSYEVPQGVKCpIDEMVpsLQP..RYuASETYLpN........ 0 0 0 1 +6701 PF06873 SerH Cell surface immobilisation antigen SerH Moxon SJ anon Pfam-B_13151 (release 10.0) Family This family consists of several cell surface immobilisation antigen SerH proteins which seem to be specific to Tetrahymena thermophila. The SerH locus of Tetrahymena thermophila is one of several paralogous loci with genes encoding variants of the major cell surface protein known as the immobilisation antigen (i-ag) [1]. 22.00 22.00 23.60 22.10 21.50 21.20 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.07 0.70 -13.87 0.70 -5.85 11 200 2009-01-15 18:05:59 2003-08-08 13:01:24 6 9 2 0 192 207 0 156.00 17 99.89 CHANGED MpsKsLlICLll.......pphhlSVhSshsGssVsCss..ssssCssosssssPshtG.....CSW...........sGsssssCtIsDCsClsss...ssoGLTDhFCpSCtuss......ssaANsAGoACVuoSuSCssspps.....sWssuDCsLCsPsTPAhsut.......uCsACSu.....hoSuaTDusCsACuo...............suoshspslFANoAGouCVAuSASCsSsSRuss...AWTsuDCthCsPso......Phhsus.p.sssoSCsusossToGhTDupCNuCu...................ssuSssspslFANsAGSuCVAoSAoCsouspuss...sWTsuDChhCsPsTPuh...hu..ssooClA......Csuhoos.WTDAsCsuCh.sASsssps...............lFAsusGSuCVAuohSCNtosRuSNpWTDuDCALCNG............................Tuss.uNQYASuDGSSCQuTp......sSuThSuphhlShLLlhSuLLI ....................s..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 192 192 192 +6702 PF06874 FBPase_2 Firmicut_FBPase; Firmicute fructose-1,6-bisphosphatase Moxon SJ anon Pfam-B_13194 (release 10.0) Family This family consists of several bacterial fructose-1,6-bisphosphatase proteins (EC:3.1.3.11) which seem to be specific to phylum Firmicutes. Fructose-1,6-bisphosphatase (FBPase) is a well known enzyme involved in gluconeogenesis [1]. This family does not seem to be structurally related to Pfam:PF00316. 21.50 21.50 21.60 21.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 641 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.95 0.70 -6.48 33 868 2012-10-02 19:15:56 2003-08-08 13:13:26 6 1 816 0 91 645 36 621.00 51 98.14 CHANGED h+YLcLLScpaPTlspsuTEIINLpAILNLPKGTEHFlSDlHGEYEAFpHVL+NGSGsl+cKIcDlFu.ssLscsEKppLATLIYYPc-KL-ll+p..ppc.slc-WY+hT.....LhRL....Icls+hsuSKYTRSKVRKALPc-FuYIIEEL.Lacpsctss.KcpYYspIIpoIIclscA--FIlAluplIQRLVVDHLHIVGDIYDRGPts-hIMDpLh...pYHSlDIQWGNHDlLWMGAAuGscsClANVlRIusRYuNLshlEDuYGINLhPLAsFAhchYp.csPC.sFpPKh...st.phsppEhphls+hHKAIuIIQFKLEuplI+R+PEFcM-cRhLLc+Isa-cuTIsL.sG+pYtLpDTsFPTlDPpsPYcLTpEEc-ll-+LhtSFhsSEKLp+HhcFLhsKGuhYLhYNuNLLaHGCIPLsEDGshcphpI..tGcpYpG+pLLDhh-phlRcuah.pcspppcchupDhhWYLWsGtsSPLFGKccMTTFERYFIpDKpTHpEpKNPYYpLR-cEclCcpILcEFGLss....cpuHIINGHsPVKsppGEsPIKANGKllVIDGGFSKAYQppTGIAGYTLlYNSYGhpLVoHpPFpSpccAIpctpDIlSsphllEpsspR+pVpDTDlGpcLppQIpDLctLLtAY+pG ................................+YLcLLuppasohpchuTEIINLEAILpLPKGTEHFlSDlHGEYEAFpHVLRNGSGsl+pKIp-lFt.spLsppEhs-LssLlYYPE-KLpLlcp....ppp..p..h.psWYhhTlp+L....Iclh+hsSSKYTRSKVRKALPcpasYIIEEL.Lacss.c.h..ps.Kc.sYYppIlpplIplppA--a...IluLuhhIQRLllDHLHlVGDIYDRGPtPDpIMDpLh...sY.H..S..lDIQW..GNHDlLWhGAhuGScsClANllRIsARYsNL-llEDuY.GINLRPLhsaAtchYp...s-s...tFpPKht..tpp..phsppE.p.ls+hHpAIulIQFKLEstlIcRRP-FcM-cRllL-KIsa-pssIs.l.pGp.......pYsLp....DosF.Tl.....s.....s....csPhcLstEEc-lhs+LhhSFppSEKLp+HhpFLhpKGShYLsYNuNLLhHGCIPlsEsGphcshpl..p.G.c.p.Y.sG+pLLDhh-hhlRcua.....s...pt........p....p....pc....chusDhlWYLWsGchSsLFGKctMoTFERYFIs..D......KtoH+EcKNPYYp.L..R..-.ct..p......hsc+ILcEFGLss....-puHIINGHTPVKphcGEsPIKAsGKhlVIDGGFSKAYQppTGIAGYTLlYNSaGhQLVuHpsFsupEcslpps.tDhhSh+pll-p.phpRphl+DTshGccLppQIptLchLhcsh..t...................................................................... 0 39 65 82 +6703 PF06875 PRF Plethodontid receptivity factor PRF Moxon SJ anon Pfam-B_13241 (release 10.0) Family This family consists of several plethodontid receptivity factor (PRF) proteins which seem to be specific to Plethodon jordani (Jordan's salamander). PRF is a courtship pheromone produced by males increase female receptivity [1]. 19.40 19.40 19.50 19.70 19.20 18.90 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.30 0.70 -5.12 2 260 2012-10-02 01:28:15 2003-08-08 13:18:57 6 1 58 0 45 239 0 204.80 59 94.64 CHANGED RSTSLLTFLVVSLSTATSLAMAEINDVADLSSDTIVhFSEVQKFAEDIQSSADSLLPTYLSFQGAPLSDPDYQLPHIKVsNLPTAAMDYDTFh+QTDETRLpNNLYFYSAIVEFLKEAMTEQEDLNPAELuLKAKFEEAMANSNTLISKISDIMTQMGMSVTITLPKPLVVPFcGSAYFpKKLRGGVVCKEYKERVhLTKRDF.hLAcKYQG.L ............................................RSTuLLTFLVVSlSoATSLsM......A......-hs......DVA-LSpDTIVLFSEsQKFAEclQSsADSLLsTYLSFQGAPLSDPDYp....LP+.IK.V.s.NLPTAsM.DYDTFhpQTDEsRLpNNLYFYSAIVEFL+tAMTEQE.DLNPAELuLKAKFEEAMANSNTLISKISsIMTQMGMS...V.....TITLP..c..PL.V..VP..F...c.G.SAhFpKKLRGGVVCKEYKERVhLTKRDFphLAcKYQG................................................................. 2 1 4 13 +6704 PF06876 SCRL Plant self-incompatibility response (SCRL) protein Moxon SJ anon Pfam-B_13253 (release 10.0) Family This family consists of several Plant self-incompatibility response (SCRL) proteins. The male component of the self-incompatibility response in Brassica has been shown to be encoded by the S locus cysteine-rich gene (SCR). SCR is related, at the sequence level, to the pollen coat protein (PCP) gene family whose members encode small, cysteine-rich proteins located in the proteo-lipidic surface layer (tryphine) of Brassica pollen grains [1]. 21.80 21.80 21.80 22.00 21.50 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.14 0.72 -4.01 43 154 2012-10-01 23:31:40 2003-08-08 13:22:27 7 1 12 1 49 160 0 65.60 27 86.40 CHANGED Fll....SHsQ.-VEAshh........C..tppsasG.pCussGscp............Chpphpp.....hcpcshpCpCs......tthpsp+hCsC..ph ......................hp.-VEA.shhpt......C...tp..tt.hs.G.pCu..ssGs.p.p............Chpthpp.......hpppshpCpCp.........t.pppphCpCp.h.............. 0 25 26 28 +6705 PF06877 RraB DUF1260; Regulator of ribonuclease activity B Moxon SJ, Bateman A, Eberhardt R anon Pfam-B_13601 (release 10.0) Domain This family of proteins regulate mRNA abundance by binding to RNaseE and inhibiting its endonucleolytic activity [1-2]. A subset of these proteins are predicted to function as immunity proteins [3]. 23.30 23.30 23.30 24.80 23.10 22.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.79 0.72 -3.38 67 1287 2012-09-25 12:41:39 2003-08-08 13:24:37 6 5 1101 1 189 635 25 103.70 42 67.55 CHANGED h.pppspcllptLhcsGs.-.st.a.lEaahhhsc.cphcchutchhptGac.lps.tp.............-c.ss..shathphstphhhshctIsptpppl.plApchssp.YDGWGs .............Qc-ETR.IIcpLLEDGS.DP-uLYsIEHHlss.cDh-sLEKAAV....-AFKhGYE.Vs-sEEh.........EsE-Gc....hlhssD.lhs.Ess...Lss-hIDuQV-pLhsLAEKassp....YDGWGT................... 0 32 79 135 +6706 PF06878 Pkip-1 Pkip-1 protein Moxon SJ anon Pfam-B_13784 (release 10.0) Family This family consists of several Pkip-1 proteins which seem to be specific to Nucleopolyhedroviruses. The function of this family is unknown although it has been found that Pkip-1 is not essential for virus replication in cell culture or by in vivo intrahaemocoelic injection [1]. 25.00 25.00 116.10 115.90 24.30 19.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.69 0.71 -4.36 19 40 2009-09-14 12:00:35 2003-08-08 13:28:33 6 1 39 0 0 37 0 162.40 33 93.32 CHANGED stIcphpsK.NslhcQa-pKVhsahpKss.s................-sttp-hhhLuAplaG.pEQL.uLpp..ssscpc+.h-FlsDl.s-L.Dhssp-lcphhptsc.s......ahhpKYpssph.pthppsac.p..pFlKhhcpFlsKRps.h.............hp........sss....sshL-ELVhLKsslIKHLCshEpLs ....pIcplpsKcsslpcpY-pKVhsah+Kss..................-stts-hhhluAplaGhcEQLhuLpp..stsccc+.l-FlsDl.s-L.Dhss--l-plhttps.s......hlspKYpssp.l.....scslppsa-pptppFlKllcpFlsKRss.a.............++........ssssplL-ELVhLKsslIKHLCshEpLh. 0 0 0 0 +6708 PF06880 DUF1262 Protein of unknown function (DUF1262) Vella Briffa B anon Pfam-B_6733 (release 10.0) Family This family represents a conserved region within a number of proteins of unknown function that seem to be specific to Arabidopsis thaliana. Note that some family members contain more than one copy of this region. 19.80 19.80 20.60 41.10 18.60 18.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.33 0.72 -3.69 9 75 2009-01-15 18:05:59 2003-08-11 13:15:06 6 3 13 0 52 73 0 99.20 45 26.52 CHANGED EGPsSGlLVlp....DEcutsc..pCaGhCh..csplpGLPFPQNptLoVca.........psGpGcsphsa..tDsVVFlPVlsQPLSSNRYYsl+tsG+HuGcssAso.+EEDtVoCCFC ..............EGPNSGhLVIp....DE-upsp...s.....C..aGhsh..c.s.pl.c.sLPFPQNppLsVpa.............................ptstsppp.sh..h-sVlFIPVLsQPLSSNRYYslct.pGKH.pGpspssu..+E-Dhss..CCF................. 0 10 25 36 +6709 PF06881 Elongin_A RNA polymerase II transcription factor SIII (Elongin) subunit A Vella Briffa B anon Pfam-B_6598 (release 10.0) Family This family represents a conserved region within RNA polymerase II transcription factor SIII (Elongin) subunit A. In mammals, the Elongin complex activates elongation by RNA polymerase II by suppressing transient pausing of the polymerase at many sites within transcription units. Elongin is a heterotrimer composed of A, B, and C subunits of 110, 18, and 15 kilodaltons, respectively. Subunit A has been shown to function as the transcriptionally active component of Elongin [1]. 25.30 25.30 25.30 25.50 23.90 25.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.16 0.72 -3.67 21 318 2009-01-15 18:05:59 2003-08-11 13:51:23 6 5 229 0 221 299 0 106.30 27 22.28 CHANGED lp-lGslPaplLcPlLc+s.os-QLhclEcts..PtLhp-oD-LWpcahpRDF....................ccph.p-.-.......sWR-hYh+hp-cpcp+h....cpLppplppspsp+spt.+p.....hhlpsh ..................lt-lGs.lPaplLc.PlLp+s...os-QLhclEc..ps.....stLhp-...oD...cLWppahp+DF...........................................cp.p..t.spc.h-........................oW+-hYh+hpcpp-pcl........ptLppshpstptpK.pt.pt...h.....h....................... 0 66 111 176 +6710 PF06882 DUF1263 Protein of unknown function (DUF1263) Vella Briffa B anon Pfam-B_6668 (release 10.0) Family This family represents a conserved region located towards the C-terminus of a number proteins of unknown function that seem to be specific to Oryza sativa. 21.60 21.60 24.30 23.80 21.10 18.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.36 0.72 -4.43 8 155 2009-09-10 17:20:14 2003-08-11 14:02:11 7 12 3 0 132 154 0 86.80 58 53.41 CHANGED MG+..sp.cSAGSRCNV..tutLSADDhTGVRPVTDRSFLSTTRQFShLHVCPSSYNDFLAMVuMKPGhYLsGTDVPoPGssTPAPARDECLEALIlPTGhGEACsRPPlAT .....................................................................................................o.hsDFLAMluMKPGM.LsGTDlPTPGVsTPAPARDEs.EALIIPTGRGEACsRPPVAT........................... 0 0 0 0 +6711 PF06883 RNA_pol_Rpa2_4 RNA polymerase I, Rpa2 specific domain Finn RD anon Pfam-B_4721 (release 10.0) Domain This domain is found between domain 3 (Pfam:PF04565) and domain 5 (Pfam:PF04565), but shows no homology to domain 4 of Rpb2. The external domains in multisubunit RNA polymerase (those most distant from the active site) are known to demonstrate more sequence variability [1]. 21.30 21.30 22.40 26.50 21.20 20.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.91 0.72 -4.01 31 319 2009-01-15 18:05:59 2003-08-11 14:20:06 7 28 270 0 214 329 3 58.50 41 5.23 CHANGED lGalssctApplsssLRhhKlt..........s.ppplP..............pLEIuaVPso...ps..GpYP.GLYlFossuRMhR ...........lGass...c.tuttlucsLRhhK..Vp...................u..ppplP..............pLEIuaVP.o.....ps..GpYP.GLYLFossuRMhR............ 0 77 123 181 +6712 PF06884 DUF1264 Protein of unknown function (DUF1264) Vella Briffa B anon Pfam-B_6839 (release 10.0) Family This family contains a number of bacterial and eukaryotic proteins of unknown function that are approximately 200 residues long. Some family members are annotated as putative lipoproteins. 25.00 25.00 34.70 27.90 18.80 19.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.12 0.71 -4.81 28 339 2009-01-15 18:05:59 2003-08-11 15:00:49 6 7 284 0 133 261 2 167.10 51 67.11 CHANGED shsPlcplstHLsu..........FHhYucDPsR.plEApHYCs.+lsE.......DhtQCllYDussssA+LlGlEYlISt+LFpTLPs-E++LWHoHsaEV+SG.Llh............................Ps.....lPps....AE+stMcclhshYGKTaHhWQs....DRGDsLPLG.PpLM..huFop-uplc........tLlcpRDc+h.Glss...ct+RcpRt-.lpts ...........................p..sPlctIssaLsu..........FHhYusD......hst....QhEAHHYso.hLNE...............DlhQslIYDus.sps.ARL........hGVEYIISE+LFcT.LP.sEEKKLWHSHpYEV.......KSG.Lls.................................PG.........lPps........s-+shMpcllsTYGKTWHTWps.....DRsc.......sLPhGhPtLM..MuFT..sD..GQlcs.......sL.lt-RDcRh...GlDTpth..+cpRp-.ls......................... 0 41 76 110 +6714 PF06886 TPX2 Targeting protein for Xklp2 (TPX2) Vella Briffa B anon Pfam-B_6863 (release 10.0) Family This family represents a conserved region approximately 60 residues long within the eukaryotic targeting protein for Xklp2 (TPX2). Xklp2 is a kinesin-like protein localised on centrosomes throughout the cell cycle and on spindle pole microtubules during metaphase. In Xenopus, it has been shown that Xklp2 protein is required for centrosome separation and maintenance of spindle bi-polarity [1]. TPX2 is a microtubule-associated protein that mediates the binding of the C-terminal domain of Xklp2 to microtubules. It is phosphorylated during mitosis in a microtubule-dependent way [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.73 0.72 -3.90 20 364 2009-01-15 18:05:59 2003-08-11 16:21:19 6 9 95 0 225 353 0 56.10 38 11.62 CHANGED h+o-cRAccRpEF.pKlcEKppthctppppsEtppcpEEEptl+QLRKpLV..aKApPh ........h+s-cRAccRp..E.....F.p+lcEKppthE..tp+pptctppc..............EppEp-.l+pLR.K.p.Ls..aKApPh...... 0 58 137 180 +6715 PF06887 DUF1265 Protein of unknown function (DUF1265) Vella Briffa B, Pollington JE anon Pfam-B_7101 (release 10.0) Family This family represents a conserved region approximately 50 residues long within a number of proteins of unknown function that seem to be restricted to C. elegans. The GO annotation for this protein indicate that its a protein involved in nematode larval development and has a positive regulation on growth rate. 25.00 25.00 59.40 58.40 18.10 18.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.35 0.72 -4.30 7 20 2009-09-11 00:32:59 2003-08-11 17:07:06 9 1 4 0 17 22 0 47.90 52 15.25 CHANGED EELhpNhEDlhYVppLLllAcsu+hpslpssC.ATll.aHhpDFhR.h .EELhKNaEDlhYVCNhLIlA-Du+FsslpsCClATllhYHFsDFhR.... 0 10 13 17 +6716 PF06888 Put_Phosphatase Putative Phosphatase Vella Briffa B anon Pfam-B_7115 (release 10.0) Family This family contains a number of putative eukaryotic acid phosphatases. Some family members represent the products of the PSI14 phosphatase family in Lycopersicon esculentum (Tomato) [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.64 0.70 -5.20 9 337 2012-10-03 04:19:28 2003-08-11 17:25:44 7 8 166 0 194 898 85 208.20 31 78.97 CHANGED hlllFDFD+TIIDtsSDshVVcthssppl.ppLpsoh....WNphMsRhhp.LpsQG.hs.t-l+psl+slPlsPthlchl+th..ps.th-LhIlSDANpFFI-phLcttulpshFsc.IhTNPushDu.pGpLpltPYHs.....Ho.Cs..hCPsNhCKGhVl-chhspt..pcGhthcRllYlGDGssDaCPsl+LppsDhshPR+GaPhachlsc....ssthl+ApVh.WssGt-lpchLhtll ........................................................................lllF.DF...DpTIl-..p.....s..........S..D.........s..h.........l......l.....p.......t..h.........s..t.....p.........p.....h...........p...p.....L...t....t...p.....h....p.....................W..s..p..h..M...........s....c.hhp.............L...........t.p..p.......u.....h....p............p.......c........l..t....p.....s.........l..................p.p.....lP....h...s..s..th....h.p.hl..ph.h..............t...t.........s.............h-..lh..llSDu.NshaI-phL....ctt.u...........l..p.p.....h..Fs..c..lhoNP.u.t.h.......s..t.....pG......p.....L....p....l....p......P...aHs.............................Hs..Cs.....hC....P....s..N...h...CK..t.......tl..l.p.....c.hhpp........................t..u.h................hp.+..llYlGDGssDh....CPshp.LtttD.hsh.sR......+..s......a....s..h..chltp...........p...hp..u.plh.Wpsu.-l.p.h................................................................................................... 3 60 104 151 +6717 PF06889 DUF1266 Protein of unknown function (DUF1266) Moxon SJ anon Pfam-B_13878 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 235 residues in length. Members of this family seem to be found exclusively in the Enterobacteria Salmonella typhimurium and Escherichia coli. The function of this family is unknown. 21.80 21.80 22.20 21.90 20.70 21.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.90 0.71 -4.46 66 1230 2009-01-15 18:05:59 2003-08-12 09:14:11 6 6 645 0 86 576 3 174.40 29 66.70 CHANGED pptLppsWGIsc+c....shhpplptlh..spGcp...............................................................hthhpphtp.htthttttspphththphhpth..................htttuhhAWDhuRhshlsRh...uhhsGalsccEshphhhpsuppspptasSWc-ahtuYhhGpthWtssssppphtttth...............pLhpssp.........uPat.................t.ls........Wp .............................................................................................................h..sLpspWGIpsp-shhphl.phh......sssHu........................................................sphts.htphhcps.....p.hpchhsths.-p.sp.....thsp..alutp..............................suttsIhAWDhsRMuaLoRh...ushNsalsEE-uhal..psth+A..pchacoWccYhsuYhhGRhYW.....ps.sps.c..pphhht...chht.h................plhtssc....paat...........sLPWp.................................................................. 1 32 58 75 +6718 PF06890 Phage_Mu_Gp45 Bacteriophage Mu Gp45 protein Moxon SJ anon Pfam-B_10848 (release 10.0) Family This family consists of Bacteriophage Mu Gp45 related proteins from both phages and bacteria. The function of this family is unknown although it has been suggested that family members may be involved in baseplate assembly. 30.10 30.10 30.30 30.10 30.00 30.00 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.98 0.71 -4.79 8 376 2009-09-13 01:58:40 2003-08-12 09:25:51 7 2 328 0 52 300 4 162.70 32 86.85 CHANGED uRAVVoulssspKpQslQl+lhAGEspDcVE+LpsYGaoSsP.sGAEAllshsGGcRSHuVsVVVsDRRaR.pGLpsG-VuLYccEGcplpLT+sGclI.sus.......Kolplp.......uuppspF-oP.sphTGslcsssD.u..............Gsshus.cpstshsh.GHtH+-susGus ...........................RuslshlssutphQsl.Qlp.h.husEstsslE+hp.YGF.oS.s.s...sGuEulllhlG....GcRS+uVllslpc.ccYRhpG.LpsGEsAlYs.c.pG.p..p.lpLp.+.tGhl.lcsss..............................cs...lplp...............AsspsphpsP...lps.oGp.lpspss.s...............................utshss....t.h....shs.........sHpHppsstt..s........................................... 0 12 26 42 +6719 PF06891 P2_Phage_GpR P2 phage tail completion protein R (GpR) Moxon SJ anon Pfam-B_10918 (release 10.0) Family This family consists of P2 phage tail completion protein R (GpR) like sequences. GpR is thought to be a tail completion protein which is essential for stable head joining [1]. 25.00 25.00 27.00 26.80 24.40 24.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.44 0.71 -4.37 48 720 2009-01-15 18:05:59 2003-08-12 09:29:35 6 5 527 0 86 497 3 130.00 37 85.41 CHANGED M..........hK.ppL+shLhssl..sphtsss-p....lcsal-sGplhsssp..u.uh.........chpYshslhlpcass...csshlhsslhsWLpspps-...p.c..tcppshpFcl-l.scsss......DlpIplp.LsEclhltcct......sGtlph....pttt.s ..........M...KspSLRpALscul...shhpsNP-p....LplFV-sGslhsTut..ohSa................-aRYslslhlpDasG...-.shLhsPlLsWLc-NQPDlh..ssp..h+ppshsFps-I..cs-ss......DlslsL..LTERVllsps.......pGs..thpth.................... 0 8 34 60 +6720 PF06892 Phage_CP76 Phage regulatory protein CII (CP76) Moxon SJ anon Pfam-B_13444 (release 10.0) Family This family consists of several phage regulatory protein CII (CP76) sequences which are thought to be DNA binding proteins which are involved in the establishment of lysogeny [1]. 21.30 21.30 21.70 21.50 20.60 20.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.70 0.71 -4.92 15 350 2009-01-15 18:05:59 2003-08-12 09:37:11 6 2 313 0 50 250 8 153.50 44 91.22 CHANGED -apsSKps....+FDpACpsFAspaNlscLAccsGhss.QhLRNKLNPpQPHpLTssELltITchT.sDpTllsuhLhplsslsss.Pssps...uc....sl.ppslcsoupsG-luptAlph.uscRlTpspKcpllppApuuIppLuLlhtslEsRhQ.AsPshuhssDslsu .....................DaphSKHs....HFDpACRuFAlcH..N..h..s...pL..Ac+sG..Mss....QsLRNK..L..N..P.p.Q.P.H.p.L.TssElhhLTDlT..EDuTL......lDGhLAQIp..CLP.s.V....P...lNEs.....uc....tpLs.chVhsAT....A-lGclAus...AV...S..s-.hhTsutR...+shlsslNushRhhuLhAhslpuRlQ..usPshsuuVDslo.u....................................................... 0 7 19 38 +6722 PF06894 Phage_lambd_GpG Bacteriophage lambda minor tail protein (GpG) Moxon SJ anon Pfam-B_11957 (release 10.0) Family This family consists of Bacteriophage lambda minor tail protein G and related sequences. The role of GpG in tail assembly is not known [1]. 19.20 19.20 20.40 20.20 19.00 18.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.77 0.71 -4.51 7 773 2009-01-15 18:05:59 2003-08-12 09:47:44 6 2 336 0 10 332 0 122.70 54 90.90 CHANGED MFLKoEpFpasGsSVsLSELSALQRIEaLpalppcstph-sp......uccpssh.phslchsAaLVuhSLWcu.........s.sp-spplpppVhtsWsh-AluputphVLhLSGM......psssppsspss ...........MFLKTEpFEYNGVSVTLSELSALQRIEaLAhlppcAEQ.t...Eos.........uscpl...sl.....cc...hlcTuAaLVAMSLWHsHs.Ks.p.sShsEsVppIcQEVlTTWPs-Alup..ApssVLpLSGM..hshpsssssppstc..t.................................................................................................. 0 0 1 4 +6724 PF06896 DUF1268 Protein of unknown function (DUF1268) Moxon SJ anon Pfam-B_11819 (release 10.0) Family This family consists of several bacterial and phage proteins of around 115 residues in length. The function of this family is unknown. 21.90 21.90 22.40 22.10 21.70 21.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.44 0.71 -3.74 4 65 2009-01-15 18:05:59 2003-08-12 09:56:36 6 1 61 0 12 57 1 118.20 23 90.05 CHANGED Mc...IKlsclpc+oaEVKTS.+NlcKMacaQLshActp-pIusups.p.sclshsh.......LcshltFloslLsLsKcEh-KL.t-LEhpchhclsshlVthh.GhoD-pI-pu.sccsDs..uc ...............................................l.h....p..l....p+shpVpsospslc+hpch....Qlthhctp...s.plp..csp.t.h....phhch...phph.............lcchhcFlpplLsLsccph-Kl.-c.l.-hpchtchsshlsh+lp..GhsDcplchs.tpp.....p................ 0 3 6 10 +6725 PF06897 DUF1269 Protein of unknown function (DUF1269) Moxon SJ anon Pfam-B_14034 (release 10.0) Family This family consists of several bacterial and archaeal proteins of around 200 residues in length. The function of this family is unknown. The family carries a repeated glycine-zipper sequence- motif, GxxxGxxxG, where the x following the G is frequently found to be an alanine. As glycine-zippers occur in membrane proteins, this family is likely to be found spanning a membrane. 22.40 22.40 22.40 22.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.01 0.72 -3.99 40 415 2012-10-03 03:18:43 2003-08-12 10:27:11 7 3 359 0 124 303 40 101.70 31 38.74 CHANGED AluGuhWGhLlGllFhsPLlGh....AlGAuuGAluGu.Lo..DhGIsDsFl+-lucsLpPGooALFlLl+csssDKVlpclptas..GcllpTSLSc-cEppLpcALspu .......................................htGuhhGhLlG.llh..hs..slhGh.......................AlGAusGAluGs...hs...D...hG...Ic...D...s...h...h...cclupsLss.Gouulhlls.cc.ts.-cVhs.s.lp.shs..upllppsls..tt..l.t.....t.............................. 0 53 82 106 +6726 PF06898 YqfD Putative stage IV sporulation protein YqfD Moxon SJ anon Pfam-B_13823 (release 10.0) Family This family consists of several putative bacterial stage IV sporulation (SpoIV) proteins. YqfD of Bacillus subtilis (Swiss:P54469) is known to be essential for efficient sporulation although its exact function is unknown [1]. 25.00 25.00 26.40 29.80 24.60 23.50 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.05 0.70 -5.76 8 428 2012-10-01 23:48:22 2003-08-12 12:30:40 6 2 409 0 89 349 7 353.90 27 95.03 CHANGED KhpappFh+GhVplclpGhsIE+FLNhshppsI.lhsl++hsspslshplsLpDhKKl+plsK+spCKlphlcR+GhPFllhRh+++hulllGhllFllllahLSshlWcIDIpsscshsEa-lRppLs-hGVKpGshpFsl-.lpKlp+cLhpshssIhWlGVclcGTol+lcVVEKppP...-.tpps-PpNlVAKKcGhIpRlaVpcGpslVKlsDhVKKGDlLVSG.lGpEspcpt............VsAcG-VhAcTWYEsplcV.LpsphpshTGcshssaalphtuhplsl...shpcpEFccacp.pcp+sh.hh.aphPhphs+pphYEspppptchsK-EAVccutKhuccclpcplucsuclhscKVh+cclEsGKl+LplhhpV.EsIu ...............................................hh.Ghlplclp.G.hshE+FlN.shpptlhlas.l........p+hs.ps.hhhph.lp-h+cl+slh++sps+lpllp+hG.hPFhht+hh+.ppsh.hlGhllFhhhlhhhSshlWpI...-I..p...Gs..p....s..ophtlhptL...c.c.h.Gl+.Ghhphpls.spclpcplpp.ph.s.s.lsWlulclcGTphplcls...E...+p.pP.................phtp.pppP...........p...........slVApKculIpchhsppGpslVphsDhVcKGplLlS..G.h.h......s..p..c..tp...t........................V..p....AcGpVhuc..T..aYpt.ps..plslppp...hp...hh.TGpphpp.hhlphsstpl..hl.........s..h..tp....ppac.ph.c.p.p...pp.ph.p.h.h.t.h.h...l.P...lthtpphhhEhcphptph...o....c...cpAhphutchspc.plppp.l....s.pp.s.pIls.......c.......p...l.h......p..h...c..s..splchplhhps.EsI............................................... 0 47 74 79 +6727 PF06899 WzyE WzyE protein Moxon SJ anon Pfam-B_13849 (release 10.0) Family This family consists of several WzyE proteins which appear to be specific to Enterobacteria. Members of this family are described as putative ECA polymerases this has been found to be incorrect [1]. The function of this family is unknown. 25.00 25.00 29.40 28.60 22.90 22.70 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.74 0.70 -5.79 3 583 2009-09-11 10:39:43 2003-08-12 12:41:03 6 2 559 0 43 231 0 431.40 85 99.60 CHANGED MTLuQFGGLFVVYLLullFIATLTYpEFRRVRFNFNVFFSLLYLLTFYFGFPLTClLVFRFGVuVVPVEhLLpALLSATuFYGIYYVTYKTRLRpRsus.PRsPlFTMNRVETNLTWVLLALIALuTVGIFFMQNGFLLFRLcSYSQIFSSDVSGVALKRFFYFFIPAMLVVYFL+QDpRAWlFFLlSTVAFGILTYlIVGGTRANIIIAFALFLFIGIlRGWITLWMLAAAGVlGIVGMFWLALKRYGLNVSGDEAFYTFLYLTRDTFSPWENLALLLQNYD+IDFQGLAPIVRDFYVFIPSWLWP-RPSLVLNTANYFTWEVLsNHSGLAISPTLIGSLVVMGGVLFIPLGAIVVGLIIKWFDWLYEpGKAEoNRYKAAILQSFCFGAVFNMIVLAREGLDSFVSRVVFFCVIFGACLllAKLLYWLF-oAGLI++...RTsuLshsNAts ............MSLhQFSGLhVVWLLsTLFIATLTWFEFRRVRFNFNVFFSL.LFLLTFFFGFPLTSV...LVFRFDVGVAPPEILLQALLSAuCFYA.VYYVTYKTRL...R..............KR.V........u.......D...VP.....R..RPL.F..TMNRVETNLTWVILMGIALVSVGIFFM.HN.GFLLFRLsSYSQIFSSE.VSGVALKRFFYFFIPAMLVVYFLRQ...DSKA.WLFFLVSTVAFGLLTYMIVGGTRANIIIAFAIFLFIGIIRGWISLWMLAAAGVLGIVGMFWLALKRYGh...NVSGDEAFYT.....FLYLTRDTFSPW....ENLALLLQNY..DN..IDFQGLAPIVRDFYVFIPSWLWPGRPShVLNSANYFTWEVLNNHSGLA.............I..SPTLIGSLVVMGGALFIPLGAIVVGLIIKWFDWLYELGNREsNRYKAAILHSFCFGAIFNMIVLAREGLDS..FVSRVV.FFlVVFGACLhlAKLLYWLF-SAGLIHK...RTpS..psQVEG............... 0 1 11 27 +6728 PF06900 DUF1270 Protein of unknown function (DUF1270) Moxon SJ anon Pfam-B_13907 (release 10.0) Family This family consists of several hypothetical Staphylococcus aureus and phage proteins of 53 residues in length. The function of this family is unknown. 20.60 20.60 20.60 23.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.89 0.72 -3.82 2 250 2009-01-15 18:05:59 2003-08-12 12:43:48 6 1 175 0 2 69 0 52.40 89 98.91 CHANGED MSNIYKSYLVAVLCFTVLAIVLMPFLYFTTAWSlAGFASIAhhIFaKEYFYtc .MSshYKSYLlAVLCFTVLAIVL.M.PF.LYFTTAWSIAGFASIATFIFYKEYFYEE... 1 2 2 2 +6729 PF06901 FrpC RTX iron-regulated protein FrpC Moxon SJ anon Pfam-B_14005 (release 10.0) Family This family consists of several RTX iron-regulated FrpC proteins which appear to be found exclusively in Neisseria meningitidis. FrpC has been shown to be related to the RTX family of bacterial cytotoxins. FrpC is found in the meningococcal outer membrane. The function of this family is unknown although it is thought to be a virulence factor [1]. 25.00 25.00 162.40 162.10 19.50 19.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.83 0.70 -4.88 2 63 2009-01-15 18:05:59 2003-08-12 12:52:41 7 1 29 0 4 51 0 244.80 90 99.31 CHANGED MRPYATTIYQLFILFIGSVFTMTSCEPVN..............EQTSFNNPEPMTGFEHTVTFDFQGTKMVIPYGYLARYTQDNATKWLSDTPGQDAYSINLIEISVYYKKTDQGWVLEPYNQQNKAHFIQFLRDGLDSVDDIVIRKDACSLSTTMGERLLTYGVKKMPSAYPEYEAYEDKRHIPENPYFHEFYYIKKGENPAIITHhNNRlNQsEEDsYSTSVGSCINGFTVQYYPFIREKQQLTQQELVGYHQQVEQLVQSFVNNSsKK ....................................MTSCEPVN..............EQTSFNNPEPMTGFEHTVTFDFQGTKMVIPYGYLARYTQDNATKWLSDTPGQDAYSINLIEISVYYKKTDQGWVLEPYNQQNKAHFIQFLRDGLDSVDDIVIRKDA.CSLSTT.MGERLLTYGVKKMPSAYPEYEAYEDKRHIPENPYFHEFYYIKKGENPAIITHRN..+....phtEssYSTSVGSCINGFTVcYYPFIRE......K......QQLTQQELVGYHQQVEQLVQSFVNNsSKK.... 0 4 4 4 +6730 PF06902 Fer4_19 DUF1271; Divergent 4Fe-4S mono-cluster Moxon SJ anon Pfam-B_13906 (release 10.0) Domain Members of this family contain three highly conserved cysteine residues. This family includes proteins containing divergent domains which are most likely to bind to iron-sulfur clusters. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.32 0.72 -4.15 22 670 2012-10-03 08:56:42 2003-08-12 13:05:51 6 12 636 0 116 1472 82 63.60 50 63.97 CHANGED YpGtplslhhstslCtHuupCl+...stPpVFc.tc+.PWl.....pP-suss....cplhphlspCPSGALsahcc ...........YoG-cIDVaaN.ssIC.p.HSGNCVR...Gss.c..lFsl....c...R...K....P....W...I.................hP..D...p.s.sl..............s.s...l.l.cV..I.-.o.CPSGAL+YpcK............................................ 1 45 79 101 +6731 PF06903 VirK VirK protein Moxon SJ anon Pfam-B_13955 (release 10.0) Family This family consists of several bacterial VirK proteins of around 145 residues in length. The function of this family is unknown [1]. 20.50 20.50 30.20 39.70 19.70 16.40 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.11 0.72 -4.26 14 134 2009-01-15 18:05:59 2003-08-12 13:13:00 7 1 124 0 27 101 2 97.70 37 70.18 CHANGED spsLsshsplhp.ALssGKsVslslDLspCpsp...sssssssps+GGhpIcuahIssDsoluFuDpHFTlssc...G+PlhpFlRYplps-Gssphssh....shshPsap .........s.sLsshsslhpALssGcsVslslDLupCpsc....tss.ssps+GGhplcuahIpsDsoluFSDsHFTlssc...scPIppFhRYplpssGsspFsshhhshssh........... 1 6 10 17 +6732 PF06904 Extensin-like_C Extensin-like protein C-terminus Vella Briffa B anon Pfam-B_6925 (release 10.0) Family This family represents the C-terminus (approx. 120 residues) of a number of bacterial extensin-like proteins. Extensins are cell wall glycoproteins normally associated with plants, where they strengthen the cell wall in response to mechanical stress [1]. Note that many family members of this family are hypothetical. 23.50 23.50 23.60 24.90 22.80 23.40 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.40 0.71 -4.50 81 626 2009-01-15 18:05:59 2003-08-12 13:17:00 7 3 464 0 171 507 50 171.40 35 62.79 CHANGED sshChssLsttG....sph..sslssts....tsuCultssV+lpt.....suslslsssshh..sCshAtuhstWhcpslpPAApphhGp..slsplcphuoYuCRshsst...........u..u+lSEHApusAlDluuFpLsDGpcIoVhcsWp..s......spctpaL+pl+cuAC.thFsTVLGPch.sshHpDHFHhDh......uGht....hCR ............................................s..Ch.th.Lp...tts....h.h..pp.....h.sshp...ssupCslspslclps.......husltls.s..u.h.hh..sCshAhshAhalcpslpPsApphh.tp..plspIcphGoYuCRsh.pp.............s...u+hSEHApu..............sAlDluuFpLuD..G......+c.IsVh.cs.W.t...p...............tcpt...s....aL+...slps.......uuC.....phFssVLG.Psh....Nu.sHt...sHFHlDh...........suhthC+.................. 0 33 86 125 +6733 PF06905 FAIM1 FAIM; Fas apoptotic inhibitory molecule (FAIM1) Moxon SJ anon Pfam-B_13985 (release 10.0) Family This family consists of several fas apoptotic inhibitory molecule (FAIM1) proteins. FAIM expression is upregulated in B cells by anti-Ig treatment that induces Fas-resistance, and overexpression of FAIM diminishes sensitivity to Fas-mediated apoptosis of B and non-B cell lines. FAIM1 is highly evolutionarily conserved and is widely expressed in murine tissues, suggesting that FAIM plays an important role in cellular physiology [1]. 20.70 20.70 21.30 20.80 20.30 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.25 0.71 -5.10 7 135 2009-09-11 08:59:28 2003-08-12 13:19:24 8 6 84 3 95 143 0 165.10 43 80.59 CHANGED Mo...................DlVAhWDVsLuDGlH+IpFEHGTTSGKRVlaVsG+EllR+-WMFKLVGcETF.lGstpsKATIpI-AlSG..FuYEYoL-IsGKSLcKahEsRsKso+sWlhplDG.....t-hRlVL-KDTM-Va.....sNGpph-TtGEFs-sGo-T+FohusapChIpuhSSGpKRpGIlHpLllDGhc......lsps ..........................-lVAhWsVsLs.DslH+IEFEHGTToGKRllhVDG+.EhlR..+....-.WMF.KL.V...GcETF..plG...ps........+ssI...pIDAl.....uG.....Fs.YEYoLclsGKSLc+.......ahEspoK.phpoW.l.hpl.s.G................p.chRlVL-K..-T.h-l...W.........................sNGpph.Eo.su...EFV-sGT-.T+Fplu..sp.s.shIpAh.SSGp++pGIlHsLhlssptls.h.................................... 0 22 27 74 +6734 PF06906 DUF1272 Protein of unknown function (DUF1272) Moxon SJ anon Pfam-B_14128 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 80 residues in length. This family contains a number of conserved cysteine residues and its function is unknown. 23.10 23.10 23.20 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.32 0.72 -4.12 35 286 2009-01-15 18:05:59 2003-08-12 13:22:27 6 1 282 0 95 231 22 55.90 62 72.70 CHANGED M.LELRPNCEsCD+DLPP-u.-AhICoFECTFCssCs-shh.pslCPNCGGpLltRPhR ...M.LELRPNCEpCDpcLPsDS.-AhICo.aECTFCAsCs-ph..pshCPNCGGELVcRPhR.. 0 14 45 64 +6735 PF06907 Latexin Latexin Moxon SJ anon Pfam-B_14203 (release 10.0) Family This family consists of several animal specific latexin proteins. Latexin is a carboxypeptidase A inhibitor and is expressed in a cell type-specific manner in both central and peripheral nervous systems in the rat [1]. 25.00 25.00 26.10 26.10 18.90 18.10 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.50 0.70 -5.18 4 94 2009-01-15 18:05:59 2003-08-12 13:28:15 7 2 41 3 42 84 0 188.10 41 88.21 CHANGED slsss+thhphAApsALHahNa+pGSPStLhlLtpVQcupuhh.PpcGpKhalhFSTEpY.......cGE.phGpCoAcVha..hsppPtPsVNsTCs+hhsKpphQEEDaphYcphppLKpPLcu.sIPDsaGpIsssh+.lWcLAalGSSYVMWcpoTpsoaYhLsQloSV+QhppsDDsI-FDaTVLLHEluTQEIIPC+haLVWaPG+PlKVKYpC..-ppu.EE ......................................hsssph.htpAAtss.phhNhptGoPptlhhlttVppup.hp........PtpGpKaplpFosEch..........p.tp.hhspCoAcVha....tspcs..tP.t.lphThpt.ht.Kp..pc-D.thYpph+p.h+pPLpu.sIP.D..saGplsPphpPlhcLAhlusuY..lhWppoTEpphYhhsplpoV+Qh.pps.D.DhI-hDYslLLH-h.soQ.EIIPhphpllWhPthshK.VKaps...hps.............. 0 2 5 17 +6736 PF06908 DUF1273 Protein of unknown function (DUF1273) Moxon SJ anon Pfam-B_14270 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 21.60 21.60 22.10 22.60 21.00 20.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.02 0.71 -4.61 41 1168 2012-10-01 21:16:48 2003-08-12 13:44:34 6 5 1090 1 113 625 5 170.30 37 95.22 CHANGED M...pplhVTGY+uaELGI.Fp-cDPclplIKpAlc+pLhphlE..-.G.l-WlIhuGpLGhEhWusEVsh-L+p-.YPpl+lAllhPFpspuppWsEsNQt+hpslhppsDFscslsc.psYp.........sPtQh+phspFhlc+TDuslllYDpEpcGpsKYhhchhcchpppp.sYslphlsh--Lp-hs...p- .....................................M.pslhVTGY+uFE.Lu.l.Fp-.c.cP.clp.hIKpsl++cL...pphl.-..-..G..l-.WllhsG.pLG.hEhWssEVshEL..+..p-..Y.s....lpl..AslhPF..psau.ppWNE.s...NQt+hsphhppsDaVc.lhp.p.Yp............sPtQh+phspFhL-....po-sslLhYD......cE..p.c......up..s.cY.hhpthppht......sY.hchloF-cLpphhp.t.................................................................. 0 35 66 91 +6738 PF06910 MEA1 Male enhanced antigen 1 (MEA1) Moxon SJ anon Pfam-B_14358 (release 10.0) Family This family consists of several mammalian male enhanced antigen 1 (MEA1) proteins. The Mea-1 gene is found to be localised in primary and secondary spermatocytes and spermatids, but the protein products are detected only in spermatids. Intensive transcription of Mea-1 gene and specific localisation of the gene product suggest that Mea-1 may play a important role in the late stage of spermatogenesis [1]. 19.00 19.00 19.40 19.40 18.80 18.30 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -4.60 2 97 2009-01-15 18:05:59 2003-08-12 13:52:09 6 2 84 0 51 84 1 122.60 50 88.78 CHANGED MAsVVLGGDTMGPERIFPNQTE-LG.HQGPoEGTGDWSSEEPEEEQEETGuGPAGYSYQPLNQDPEQEEVELAPVG-G.DssADIQDRIQALGLHLPDPPLESEDEDEEGAsALssHSSIPMDPEHVELVKRTMAGVSLPAPGVPAWApEISDAQWEDVVQKALQARQASPAWK .....................................................................................................................................................................................Y.PLs.............t.t.............................t.......t............t.t.....hth..h.h....sp...ss...ps..tDE.-E....E...GAsAhs...s..+S..SIPMDPE.HV..E.l+psMAslsLPs.ulPsWAptls-tpW.cc..l.ptlptpp..................... 1 10 15 30 +6739 PF06911 Senescence Senescence-associated protein Vella Briffa B anon Pfam-B_7525 (release 10.0) Family This family contains a number of plant senescence-associated proteins of approximately 450 residues in length. In Hemerocallis, petals have a genetically based program that leads to senescence and cell death approximately 24 hours after the flower opens, and it is believed that senescence proteins produced around that time have a role in this program [1]. This family extends to the higher vertebrates where the full-length protein is often a Spartin, associated with mitochondrial membranes and transportation along microtubules [2]. 21.60 21.60 21.80 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.68 0.71 -4.58 59 334 2009-01-15 18:05:59 2003-08-12 13:54:44 7 9 197 0 227 312 3 172.70 26 36.16 CHANGED slspG......uspls+GlhtsushsuptlppGuphhpp+.............h...ps.....spp.......s..............h.....plsPps...............ppplc....cl+phopsstplo....pthlssVspsustlusslspphst..............................tthtshhs..lhhuolpuhuplh....DulEpuu+slhpssussosphVpH+YGppAGpls..pcshsssusssh..sshsstpltp+Alh .............................IhsGuuhlupGllpsu-.hsuptlppGush.hpp+............................h.......pP.......ppc.......P...................................sphsPss...............pcplc.................ts+phottusplo....pt.h..lssVsps.usshutplusphtt.............................................................t.....tp..t.hpsths....lhsuSlpu...Fuslh....sulEpAu+plhsssossssphVpH+YGppAupso..ppshsustsssh...sshsl.tlth+Al...................................... 0 86 137 184 +6740 PF06912 DUF1275 Protein of unknown function (DUF1275) Moxon SJ anon Pfam-B_13896 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown although a few members are thought to be membrane proteins. 25.40 25.40 25.50 25.50 25.20 25.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.30 0.70 -5.13 176 2258 2009-01-15 18:05:59 2003-08-12 13:56:48 6 14 1641 0 642 1872 52 202.00 21 85.04 CHANGED htluhhLu.hsuGhlDAhualsh.tthFsut.hTGNhshluhslu..tssht.shth....lhslhuFlhGshhushl............httttthtpththsLhlpsslLhhsuh...lshthss.........................hhshhhluhshGhQsushp..plssssl.sTThhTGsls....shuhsls...phlhtp...st....................hpphhhhhshlsuFhhGullGuhhhphhu.h..hulhhssslhhhlsh.hh..h ...............................h..hshhLs.hsuGh.....lD.Ahsa....lsh.......t.............hasut..T..G..........Nlshluhsls....pssh.t...t....shth.........................lhslhsFhhGshhsshl............................hpphp..h..tph.hhhslhhp..sll.lhh.suh...lstths.............................................hhslhhluashGlQssshp...pl.ss.h.......s.h.sosh.hTGslpphs..hsls..........phhhsp.......pp.......................hpc.hhhhhsllhuFhlGu.lhG.u.hh...h..t..h.hs..h...ts.lhhsshhhhhhhh...h...................... 0 157 349 499 +6744 PF06916 DUF1279 Protein of unknown function (DUF1279) Vella Briffa B anon Pfam-B_7316 (release 10.0) Family This family represents the C-terminus (approx. 120 residues) of a number of eukaryotic proteins of unknown function. 21.80 21.80 22.20 22.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.12 0.72 -3.68 31 454 2009-01-15 18:05:59 2003-08-12 15:35:11 8 12 287 0 318 442 4 100.30 29 39.87 CHANGED p+hKplh+cYGhlulusalslShhshuhhYlhVpu....GlD.lsshlpplshs............................................................pshsp.h.ts..shuphslAYul+Khh.tPlRlslTluhTPh ................................................phKphh+cYGhsslslalslShlshshhY......hhlps.....Gl.-...lsshhphlshs......................................................................pshsp.t...h.tsu...thuphslAYulaKhh.hPlRhslTluhTP.................................................................................... 0 108 170 257 +6745 PF06917 Pectate_lyase_2 Periplasmic pectate lyase Moxon SJ anon Pfam-B_14500 (release 10.0) Family This family consists of several Enterobacterial periplasmic pectate lyase proteins (EC:4.2.2.2). A major virulence determinant of the plant-pathogenic enterobacterium Erwinia chrysanthemi is the production of pectate lyase enzymes that degrade plant cell walls [1]. 21.30 21.30 21.50 36.40 20.70 20.90 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.86 0.70 -6.26 5 122 2009-01-15 18:05:59 2003-08-12 15:38:44 7 1 69 3 19 88 5 490.50 53 98.49 CHANGED AolhusuupAQDs-RLouVKpYlDNVLsKAuDpY..t+PTPLLADGVDPRTGKQlEWIFPDGRsAVLSNFSAQQNLMRVLlGLSsLTG-s+Y+pRAEcIVRYaFs+YQD-o.GLLYWGGHRFVDLKTLQPEGPSEKEMVHELKNAYPYYDLMFuVDs-ATARFI+GFWNAHVYDW+sLETSRHG-YGK...........sMGuLWQScFEQQPPFFATKGLSFLNAGNDLIYSAS...LLY+aspDsGALsWuKRLAcQYVLPRDsKTGLGVYQFTQPLKR-.EPsDDuDTHSKYGDRAQRQFGPEFGssALEGNMLLKGRTSTLYSENALMQLpLuKcLGssGs-LLcWTlDGLKAFA+YAYNsssNTFRPMlANGpDLSsYsLsRDGYYGKKGTVLKPY.PAsNEFLLSYARAYsLssDs-LW+VARGIApsQGLG-lGsssGu-lKlNMpTsNs-PYALFALlDLYQAoQss-YLcLA-+lGDNllcpRalcGFFlAssc+pYAcVDsIEPYALLALEAAlRNKPsuVAPFLNGAGFTEGuYRhsDGosRVSTRDNELFhLpsGEhLpPNsKK ....................h..........h.hshLosl+pYVD.V.shupsph...pPoPLLADGhDshTtQ.h.W.FPD..G.+csslSNFuuQQNhhRshsuLS.lotDPpYpppAc..spYahQp.h..cpS.GLhYWGGHRFlsLcTLpsEGPupK-pVHELKpthPYYDLhholDp-tThpFlpGFWpAHV.DWchL-huRHGpYuK.......................hssl......uph.p.P...tTKGLoFlNAGsDLIYuA....hhhcYptD..................tuAhsWuK+LhcQYVLsRsscTGLsVYQFops.pRp..PsDDsp...TpShaGDRApRQFGPEF..G.hAhEuNhhh+.chps......Lh.-NsLh.Lplh+p..t.ss-lLpWslDGLKsah+aAYs.psNThRPhhssGQDhSsYsLsRDGYYGtKGoVlpPa..hss-aLlshsRAat....l.spD..Lhclhtslh..c.tLu-ls.........ps.t+t......hph.phppssusPYhLhALl-LhptsQssphhoLA.plGDslh+p+Yh.cGhFhtSsp+pYhclDs..PhALLsL.AAhpNK.tAls.FlssuGahcGsY+hs.GpuRh.hc.s.la........................ 0 1 8 12 +6746 PF06918 DUF1280 Protein of unknown function (DUF1280) Vella Briffa B anon Pfam-B_7457 (release 10.0) Family This family represents a conserved region approximately 200 residues long within a number of proteins of unknown function that seem to be specific to C. elegans. 25.00 25.00 25.40 27.30 24.80 24.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.25 0.70 -5.15 7 110 2009-01-15 18:05:59 2003-08-12 15:40:44 9 4 9 0 110 87 0 176.60 23 29.56 CHANGED phhs+pshpp.Rh-phLcthpp...sss-hspFlpphl+hlsppsp.sa+..hoscEThhhht+hcLScsph+phKphh+phlGh-lLsShcplppL+ppLus.spYKlop.hssp....G+hlphhplh....cl.csltcRlEpLstpspLl.Ds..tscIhlslhuDpGutpTKlClsItNsppPNSshpllhluhasssDststlpcahsslh..Lschppl ..............................................................................................................h.......h...ph..hht.htp................t....p.p.hh.thh.phhtpp........php..ho..pshh.......hh.phphopt.hp.hKphhpp....hhh-.h.....sshpphtphpp.hss.p....aphp..........spth.h.....th.h.....slpphltpRLppLttpspLh..hcp....tspIhlsluGDKGuspsKlsl.ltNlppPNsspsllhluhapusDshpslpp.htslhpphNpl............................ 0 27 56 110 +6747 PF06919 Phage_T4_Gp30_7 Phage Gp30.7 protein Moxon SJ anon Pfam-B_14625 (release 10.0) Family This family consists of several phage Gp30.7 proteins of 121 residues in length. Family members seem to be exclusively from the T4-like viruses. The function of this family is unknown. 29.60 29.60 30.20 69.90 22.80 29.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.67 0.71 -4.12 2 33 2009-01-15 18:05:59 2003-08-12 15:45:57 6 1 32 0 0 18 0 121.30 86 99.55 CHANGED MNYINFERKYVSNGIAGS.-sICLWKHpNGoVCEIEQhMTPNYVYMRFENGITVSITMcGSNFKIALDDDFRQRDLGTHPCWNGsNRKLLVKTWIRHILSNRAKPEHLEAIFDVVLNEFDI ....MNYINFERKYVSN...GIAGSIDTICLWKHQNGSVCEIEQYM..TPNYVYMRFENGITVSITMEGSNFKIALDDDFRQRDLGTHPCWNGsNRKLLVKTWIRHILSNRAKPEHLEAIFDVVLNEFDI.. 0 0 0 0 +6748 PF06920 Ded_cyto Dedicator of cytokinesis Vella Briffa B anon Pfam-B_7154 (release 10.0) Family This family represents a conserved region approximately 200 residues long within a number of eukaryotic dedicator of cytokinesis proteins. These are potential guanine nucleotide exchange factors, which activate some small GTPases by exchanging bound GDP for free GTP. 20.60 20.60 20.60 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.90 0.71 -4.97 25 1240 2009-01-15 18:05:59 2003-08-12 15:48:52 8 27 253 8 667 1086 2 172.80 32 10.33 CHANGED T.F-+spslp+Fha-sP..Fohs..G+.sp..G....slccQa+R+TILossssFPhl++RlpV..hpcppl-l............oPIEVAI-plppKstELstthspp...........shphLQhhLQGslsssVNtGPhchAcsFLsp..h....sth.tcphp+L+pshc.........cFhptsppALclpcpLhttc.tp..hpccL-psa.pphpppLps ...........................................F.+s.plp+FhappP........Fp.hs.........s+...tc..........s..................phpc.ahc+TlLT..Ts.p..s...........FPhlh+....RhpV......hp.p..p.p.h.pl....................................oPlEsAI-phppKsp...-...L...pp.h..h....pptt...................................sshp......LphhLpGsV......ss.s...V.N....u.......G.......s....hpY...........tc..s.....Fhscp.h.....................c..stc..+lp...pL+phhp.......................p.h...t..hhtpuLpl.pcc.h.l.t.t.-...pt........hpccLpppa.pphtpplp.t........................................................................................ 0 199 276 456 +6750 PF06922 CTV_P13 Citrus tristeza virus P13 protein Moxon SJ anon Pfam-B_14711 (release 10.0) Family This family consists of several Citrus tristeza virus (CTV) P13 13-kDa proteins. Citrus tristeza virus (CTV), a member of the closterovirus group, is one of the more complex single-stranded RNA viruses [1]. The function of this family is unknown. 25.00 25.00 184.90 184.70 20.00 18.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.56 0.71 -4.24 3 58 2009-09-11 02:40:01 2003-08-12 16:01:31 6 1 1 0 0 53 0 114.90 92 100.00 CHANGED MSIRRVWLKVMAVITVLWYGKEPSISEGYNALMNDDFKFIDTHFSNVSYAKKCYDLANFDLDFLRIVIIPLSGGTVNESRADRTNVSEIVESHVSDRDRM+ILLRNKRIQIPSLLPCDN MSIRRVWLKVhAVITVLWYGKEPSISEGYNALMNDDFKFIDTHFSNVSYAKKCY-LANFDLDFLRIVIIPLSGGTVNESRADRsNVSEIVESHlSDRDRMsILLRNKRIQIPSLLPC.... 0 0 0 0 +6751 PF06923 GutM Glucitol operon activator protein (GutM) Moxon SJ anon Pfam-B_14714 (release 10.0) Family This family consists of several glucitol operon activator (GutM) proteins. Expression of the glucitol (gut) operon in Escherichia coli is regulated by an unusual, complex system which consists of an activator (encoded by the gutM gene) and a repressor (encoded by the gutR gene) in addition to the cAMP-CRP complex (CRP, cAMP receptor protein). Synthesis of the mRNA, which initiates at the promoter specific to the gutR gene, occurs within the gutM gene. Expressional control of the gut operon appears to occur as a consequence of the antagonistic action of the products of the autogenously regulated gutM and gutR genes [1]. 25.00 25.00 30.20 29.90 19.40 19.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.12 0.72 -4.41 26 859 2009-01-15 18:05:59 2003-08-12 16:06:25 6 1 822 0 71 301 39 109.20 43 81.79 CHANGED hhhL..IlhhshAallQhlLGahQl+pFN+sapplp.....ppG..+VulG+ppG+h+sGsllllulD.cpspllcuphMpGlTVFARh+slsthpGhsltplp...shlhtps+hsppAl .........................l..sLlslAlIAas..sQluLGhaQIp+FN+sacpLp.....QpG..+VulG..Rs.uG...RF..K.s.pslVhlAlD.-ppRls-shhM+GlTVFAR.pclsshsGhclt-lp....scllhscD.LsQpAl........................... 0 23 39 56 +6752 PF06924 DUF1281 Protein of unknown function (DUF1281) Moxon SJ anon Pfam-B_14730 (release 10.0) Family This family consists of several hypothetical enterobacterial proteins of around 170 residues in length. Members of this family are found in Escherichia coli, Salmonella typhimurium and Shigella species. The function of this family is unknown. 25.00 25.00 35.20 35.10 23.20 22.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.61 0.71 -4.76 4 277 2009-01-15 18:05:59 2003-08-12 16:09:33 6 1 198 1 17 220 2 126.30 69 46.55 CHANGED LhITGpss.lDtlppWssGchlPhYR+AlpQSI+LFLAGCAGlLpP..TcshcYs.aPsLlutGsGtsSspNlAFp+WLsLLppsVsLDtpssp.I-RLYhQSGluphKWEslPssA+cphsslhspphuDWFGls ...........MaFSGEPAQIAEIKRLASG.AVTPhYRRATNEGIQLFLAGSAGLLQs..TEslcaEPCPGLTAA.G......R...G.sVSPENI.AFTRWLTHLQsGVLLDEQNCLMLHE...LWLQSGTGpRRWEsLPDDsR-oITshFTsKRGDWCshW..................... 0 0 4 13 +6753 PF06925 MGDG_synth Monogalactosyldiacylglycerol (MGDG) synthase Vella Briffa B anon Pfam-B_8075 (release 10.0) Family This family represents a conserved region of approximately 180 residues within plant and bacterial monogalactosyldiacylglycerol (MGDG) synthase (EC:2.4.1.46). In Arabidopsis, there are two types of MGDG synthase which differ in their N-terminal portion: type A and type B [1]. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.30 0.71 -4.35 12 972 2012-10-03 16:42:30 2003-08-12 16:37:42 6 10 725 0 253 763 37 160.40 26 40.98 CHANGED HppuAculp........pthptt.h.pphplhhhDshp.hssh.hphhhps.Yhhhl+h.....u...slathhahhsp.ch.....pshhsthtthhsp.cltshlpchpPDllIsTHPh.tth.l.hL.+p+phh.pshhs.sllTDats.H.hWh+.tlDthalssp-htcchlp.pGlspsplhshGIPl ......................................................................................HhpsApulh........pthpp...........sp.....hplhhhDh..ht.t.t.p.P.h..h..ss.lhpph..Y.hp..h.ch...................h.....ph.at.h.hah..h.s..p.......c.hh........ppph.h.......h....h....p...h....hhh..p....+...lh.p...ll.........p.c......tcPD..l..Il....sTaP.h.........h.l.t...l...+.......p.......c...th....h........s.......l.P.ls...sVlT.D....a.......h.H..phW..lp.st..s.-.tYa.Vuoc.......cs.+p.phlp..hG.lssspltsoGIPl........................................................ 0 109 193 226 +6754 PF06926 Rep_Org_C Putative replisome organiser protein C-terminus Vella Briffa B anon Pfam-B_6517 (release 10.0) Family This family represents the C-terminus (approximately 100 residues) of a putative replisome organiser protein in Lactococcus bacteriophages [1]. 21.40 21.40 22.30 47.80 20.30 20.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.12 0.72 -3.89 3 25 2009-01-15 18:05:59 2003-08-12 16:56:25 6 3 20 0 5 17 0 91.90 62 35.97 CHANGED NSLLS+FLDoFINFSSKNISKRAMApsEFlKLPSFQK-QAVIGAcNYIQsYKNEHPDDcTG+YSVNuYsFL-NsMFMsYQEKVKADTGYDEDLGF ..NSLLS-YLDoFIpFSSKNIuK.RAMApsEFhKLsSEpKcQAVIGAcNYhpWYKpEsP-DcTtKFSlNuYsFL..-sshFcsaQpKVKsc...cEsLGh........... 0 3 3 3 +6757 PF06929 Rotavirus_VP3 Rotavirus VP3 protein Moxon SJ anon Pfam-B_14798 (release 10.0) Family This family consists of several Rotavirus specific VP3 proteins. VP3 is known to be a viral guanylyltransferase and is thought to posses methyltransferase activity and therefore VP3 is a predicted multifunctional capping enzyme [1]. 18.10 18.10 52.30 52.30 17.00 16.20 hmmbuild -o /dev/null HMM SEED 684 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.31 0.70 -6.64 3 582 2009-01-15 18:05:59 2003-08-13 11:02:47 6 3 240 0 0 463 0 551.40 85 83.42 CHANGED MKVLAL.RRulspsYADTQVYsHDssKDYYENAFLISN.lTTHNILYLD.YSI....KslEILNKSGIAALIhIshD+LtILI+SNFTYDYphcIVYLHDYSYYsNNE.IRTDQaWLTpTNIEEYLLPGWKLTYVGpsGpETRGHYsYSFhCQNTATDDDIIY-YIYS.....NulDF.........pNFlLptlpcRMTTAVPFcRLSNRIFR-+LFS...K..KssINIGPRNESMFThLc..YPpIpNYSANualVSDLI+LsQE+WlGK+ISQFDIGQFKNMhNVLTsIYYYYNhYKuKPpIYMIGSAPSYWIYDl++Y.sFsIETWDPLDTPaSocHHKchFsIsDlcKLKDNSILYIDIRSDRssADWRcWRKpVEEETIpNLEIuYKYLcsGKs+lCClKMTAMDlELPhouhLLHaPTTcI+SEaYLLlDpahLpcpKRFVPKGlhYSFINNIhTDNVFISssYKVKspN-.aVVALYuLSN-hNsR-KVIcasNsQKpolITVRlNNTFcsEh+lsFKosYDaTFLPSDFosc.sTIlTSYDGYlGlFGLSISL-SKuTGNNHLFIlsusccYsplDoFuoHhGISRRSHSlRFSESATThSGYIFRDloNGKFNLIsTNlENuVSGHVYNALlYYRYNYoFDLlRWIpLHupDcVcIcGG+YYEHAPsELlYACcSAhVFApLQsDLTll+YSNclpcYIpsp ...........................MKVLAL.RHSVAQVYADTQsYlHDDSKDEYENAFLISN.LTTHNILYLN.YSl.....KTLcILNKSGIAAVElQS.DELFs.LIRCNFTYDYENNIlYLHDYSYYTNNE.IRTDQHWITKTDI.DYLLPGWKLTYVGYNGKsTRGHYNFSFhCQNAATDDDIIIEYIYS..............NELDF.........QNFLLRKIKERMTTSLPIARLSNRVFRDKLFPSlVshacKVIN.VGPRNESMFTFLN...FP.TIKQFSN..GAYlVKH.TIKLKQE+WLGKRVSQFDIGQYKNMLNVlTTIYYYYNLYaSKPIIYMLGSAPSYWIaDlKQY.SDFpFETWDPL..DTPYSo.h......HHKELFa.pD.VsKLKDNSlLYIDIRTDRtNhDWKEWRKlVEpQTlsNLNIAYKYLuTGKAKVCCVKhTAMDLELPIoAKLLHHPTTElRSEFYhIlDlWDhhs.I.KRFIPKGVhYuaINNlhTENVFIQ....PF...KLKs.p.s-...YIVALYALSNDFNsR...pDVIpLINpQKpuLI....TVR........hNNTFKDEPKVsFKNIY......DWTFLPTDFEhp.-SIITSYDGCLGhFGLSISLuSKPTGNNHLFIlsGTDKY.KLDQaANHMuISRRSHQIRFSESATSYSGYIFRDLSNN.NF..NLIGTNVENSVSGHV......YNALIYYRYNYsFDLKRWIYLH.ShGKsulEGGRYYEHAPIELIYACRSA+EFAhLQDDLTVLRYuNEIEtYINK.V........................................................ 2 0 0 0 +6758 PF06930 DUF1282 Protein of unknown function (DUF1282) Moxon SJ anon Pfam-B_14843 (release 10.0) Family This family consists of several hypothetical proteins of around 200 residues in length. The function of this family is unknown although a number of family members are thought to be putative membrane proteins. 23.40 23.40 23.60 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.39 0.71 -4.60 19 937 2012-10-01 22:34:14 2003-08-13 11:06:06 7 8 874 0 113 703 106 178.10 39 84.19 CHANGED llspPupp.applcpcccshthhhlhhllhhhlhs.ssshhhssh..hhthhst.............s.phohts....uhhhulhshhhh..hhhlhuhh...hphhschas..upss...hppshhhuuYsssPhhluuhsshh.slhhsh.............................shhlhhsaslhLlahGl.hhh..sl.cppcuhhhssh.....hllhsl .................................................................LaoHPscE.hpsIp..p.Es.E.ol.u+.a....Ys.a..HV..LLhA...u....IP...VlC.....Aa..I..G.T..T.QlGW.s.hG-G.s......................l.pLohho...........ulslAVlhYslhL.....AGVAlMGtl.....I.aW.M.A.RsYs..p+PS.....ls.+ChlFAGYs...AT...PL.F...L.u.GL..V.AL..YPllWLsu..........................................................llGslAL.h.YosYLLYLGlPoFh....sI....s+.-EGl.FSSSsl....ulGl.....Llltlhhh.h................................. 0 19 46 85 +6759 PF06931 Adeno_E4_ORF3 Mastadenovirus E4 ORF3 protein Moxon SJ anon Pfam-B_14868 (release 10.0) Family This family consists of several Mastadenovirus E4 ORF3 proteins. Early proteins E4 ORF3 and E4 ORF6 have complementary functions during viral infection. Both proteins facilitate efficient viral DNA replication, late protein expression, and prevention of concatenation of viral genomes. A unique function of E4 ORF3 is the reorganisation of nuclear structures known as PML oncogenic domains (PODs). The function of these domains is unclear, but PODs have been implicated in a number of important cellular processes, including transcriptional regulation, apoptosis, transformation, and response to interferon [1]. 25.00 25.00 37.60 37.60 17.70 17.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.53 0.71 -4.02 7 84 2009-09-11 07:31:22 2003-08-13 11:13:02 6 1 67 0 0 45 0 111.80 67 97.19 CHANGED CLRMsVEGALpcLFsMpGhsLpp.hhcIIptW+sENYLGhVQsCuhMIEEh-ss.uFulllFL-VRV.sLLEAsVpHLENRlhFDLAVhFHQcSGG-RCHLRDL+FplLtsRLE .....CLRMpVEGALpELFpMsGlDLppphscIIQGWKNENYLGMVQ-CshM.I..-El-su.uFsllLaLDVRVEsLLEATVEHLENRlsFDLAVhFHQHSGG-RCHLRDLHFpVLRDRLE................ 0 0 0 0 +6760 PF06932 DUF1283 Protein of unknown function (DUF1283) Moxon SJ anon Pfam-B_15035 (release 10.0) Family This family consists of several hypothetical proteins of around 115 residues in length which seem to be specific to Enterobacteria. The function of the family is unknown. 25.00 25.00 94.50 31.80 22.40 17.80 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.88 0.72 -3.96 9 515 2009-01-15 18:05:59 2003-08-13 11:31:22 6 2 510 0 40 138 1 84.80 85 74.80 CHANGED sTs+lVl-SGDoAhS+psAp.sKEQWNDT+uLRpKVNpRsEKEaDKhDtAhDu+DpCppSsNlNAYWEPNT.RCLDRRTGRs.IsP ............ETsKLVIESG.DSAQSRQcAAMEKEQWNDTRsLRQKVNKRsEK.......EWDKADAAFDNRDKCEQSANINAYWE....PNTLRCLDRRTGRV.ITP...... 0 1 8 23 +6761 PF06933 SSP160 Special lobe-specific silk protein SSP160 Moxon SJ anon Pfam-B_14947 (release 10.0) Family This family consists of several special lobe-specific silk protein SSP160 sequences which appear to be specific to Chironomus (Midge) species [1,2]. 25.00 25.00 25.80 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 756 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.58 0.70 -6.55 2 5 2009-01-15 18:05:59 2003-08-13 11:37:41 6 2 3 0 1 5 0 618.40 65 77.63 CHANGED MNIKVILVCALVAIFFAQVAEGGPIANFVGFlISLLFSLFEVMlSVVFDVKSFTSLSssTsNuThPuFAopVGGGRFosIhpuoFNhIAMISANlQAIQSGSGSsSusSSSSssSTosSsoTTSssoTTooNSTT.......SSNSTSSGLTouASVVSLIDThAWVYQDSSVGIAYLMVSILALFYGQSVSAPPYsDLGIPALPANsSGsGVPQSVQI+AAIsYlNlTI.FIshTGQQFEDLQGPVsTDCGCPNTTSVAPLVhEWEAIhAALpAhAsuoASuNSTosSsSTosoTTsSsSTTTTNSTToTNSTSSSNSoTIAGsIDIuANhTlALQsLQALLhQEuTCAPsLAANAKKSGVR-huPCKsu..uSuCA+SGp+KVKRKARLEKMRAKsRRAVGNR.KGSMKKRVRSRAKKFGcAAKSGVRRYRKNIKaVYIPPVMASLNAYAALMASLS-SIS.QS-SALNSTDpACNSTussTDtAVIsATssVTDMFsNFTAMVlNNTVAaPNCTQaADMAL.MlSQIN-QIIuCGSQSDsApSSIYhNVTIsIVAMAQEYNNFASMSDKCTRSFANSWLWMYIKWVFYRMGMTSGlPNFLACQTKApSSLTAhLASFNATlsATIuAASANsSpVQSSEAuCIESSLu-AusILtMaEsAYQNCssPGSV..PstssTTTooTTTTTTTTTTAAPTTTTTKAANAPFTYPLCsL.MossCshGhssCTYPhISSAGCCPSGKTLNTGLGGRGCCK .........MNIKVILVCALVAIFFAQVAEGGPIANFVGFlISLLFSLFEVMlSVVFDVKSFTSLSsPTsNuThPuFAopVGGGRFosIhpuoFNhIAMISANlQAIQSGSGSsSusSSSSssSTosSsoTTSssoTTooNSTT.......SSNSTSSGLTouASVVSLIDThAWVYQDSSVGIAYLMVSILALFYGQSVSAPPYsDLGIPALPANsSGsGVPQSVQI+AAIsYlNlTI.FIshTGQQFEDLQGPVsTDCGCPNTTSVAPLVhEWEAIhAALpAhAsuoASuNSTosSsSTosoTTsSsSTTTTNSTToTNSTSSSNSoTIAGsIDIuANLTlALQNLQALLhQEuTCAPsLAANAKKSGVR-huPCKsu..uSuCA+SGp+KVKRKARLEKMRAKCRRAVGNR.KGSMKKRVRSRAKKFGKAAKSGVRRYRKNIKaVYIPPVMASLNAYAALMASLS-SIS.QS-SALNSTDpACNSTussTDtAVIsATssVTDMFsNFTAMVlNNTVAaPNCTQaADMAL.MlSQIN-QIIuCGSQSDsApSSIYhNVTIsIVAMAQEYNNFASMSDKCTRSFANSWLWMYIKWVFYRMGMTSGlPNFLACQTKApSSLTAFLASFNATlsATIuAASANsSSVQSSEAuCIESSLuDAAsILtMaEsAYQNCoAPGSV..Ps-T.sTTTTTTTTTTTTTTTAAPTTTTTKAANAPFTYPLCsL.MossCshGhssCTYPhISSAGCCPSGKTLNTGLGGRGCCK....... 0 1 1 1 +6762 PF06934 CTI Fatty acid cis/trans isomerase (CTI) Moxon SJ anon Pfam-B_14967 (release 10.0) Family This family consists of several fatty acid cis/trans isomerase proteins which appear to be found exclusively in bacteria of the orders Vibrionales and Pseudomonadales. Cis/trans isomerase (CTI) catalyses the cis-trans isomerisation of esterified fatty acids in phospholipids, mainly cis-oleic acid (C(16:1,9)) and cis-vaccenic acid (C(18:1,11)), in response to solvents. The CTI protein has been shown to be involved in solvent resistance in Pseudomonas putida [1]. 25.00 25.00 26.10 34.10 23.60 23.40 hmmbuild -o /dev/null HMM SEED 694 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.53 0.70 -13.15 0.70 -6.28 33 247 2009-01-15 18:05:59 2003-08-13 11:44:16 6 2 227 0 54 225 50 647.60 51 88.45 CHANGED AssPTRLF.lDApssppWRp+GFasVLs...ps.tushpAuLhh+MLpLKpppPLsssshL.s-shDhulsRsppCPs..cEassYtpspPptGMPaGhPuLossEassLtpWLppGA.....shstps.s.ospEtppIspWEsFLNpsuhKppLsARYlYEHLFLAHLYF..........tssp...sscFFcLVRSpTPsGpPlchIuTRRPaDDPGsc...........hYYRLhPlpuoIVcKTHhsYsLssp+LpRhppLFhss-a.pVspLPuYpsphuANPFpoFpsIPscARYpFMLDpApahlhsFIKGPVCRGQlALNVIpD+FWVhFhcP-ps.hsssssahpppsshLtLPupt-ssss.hhs.WhpYuc...ppscYhcs+pphhschh.ts....hshshlWsGs.s.NcNAhLTlFRHFDSASVhKGLlGphPKTsWllDYPLLERIaYhLVAGFDVYGNVuHQL.TRLYMDhLRhEGEsNFLpLLPtcpRcshhssWYpssu.plpp.alpts..hshspPoulpapos.sPKpEhhpplhp.+hsslts.sp.sls...sst..............hppscph...Lsplsuhtus.ulsh...LPE....lohlhl................csssG..cpp.lYSLl+NpAHsNluhLhuEchchpPppDoLTlh.GllGSYPNhhapl.pps-lssFVstlpshcsppD.a.ppll-+aGlRRosPpFWpa.Dplpphh+pspPlEuGlLDhNRaENc ...................................................................................................................AssPTRLa..DApsTppWRptGFaPVLs...ps.tuNhpAulhARhL..KcppPLPtpspL..cshDhSlsR.pppCPo..pEh-patpspPphGMPaGhPsLospEYsTLhpWLppGA......shspth....shospEts.lspaEshLNpsutKppLsARYlYEHLFLuHLYF..............s.tp.ps+FFpLVRSpTPPGp..............PlchIsTRRPaD.DPGs-..........+lYYRLhP.QsTIVcKTHhPasLspp+ltph+phFlssDY.pV.spLPuYpPphuANPhpsFhslPscARapFhLDNAp.hlhuFIKGPVCRGQlALNVIpD+FWVhFhDP-+sph.spsspFhtppsstLpLPupp-sssh.slo.W.l..pYut...ppscYhcA+s-hhsphhtsG....lohshlWcGs...s.NsN.AsLTlFRHFDSASVlpGLlG-sPKTsWlhDYsLLERIHYLLVAGFDVYGNhGHQLhTRhaMDaLRhEGEsNFltLLPtDhRcp.husWYQspu.phptal.pts.shsh-pPTulpa.oc.sPKpELhtpLtc.pltsl.u.sc.sIs...pst.............hhtpsEts...Lpplsphtut...GLhs.......lPp.......lsMLhl................cspp.G....ctp...laTLl+NpAHoNlutLhsEptpt...pPtpDsLTll.GVlGSYPshhhsl.pp.splsphsptltshcs-.pDY..tLl-+aulRRSsPpFWsauDtlppah+pspPlEhGlLDhNRaEN......... 0 11 26 42 +6763 PF06935 DUF1284 Protein of unknown function (DUF1284) Moxon SJ anon Pfam-B_14822 (release 10.0) Family This family consists of several hypothetical bacterial and archaeal proteins of around 130 residues in length. The function of this family is unknown, although it is thought that they may be iron-sulphur binding proteins. 25.00 25.00 25.30 26.60 24.00 22.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.83 0.72 -3.85 30 215 2009-01-15 18:05:59 2003-08-13 11:48:14 6 2 214 0 76 160 6 105.20 33 74.01 CHANGED GYSssFlcNhscIhpcl.t...ppstpIplVsssDcICtsCPsptcst.Cp....sps...pVpchDc....pVlchL...slcsGphhshschhppltcphps.sclcclCpsCcWhphsh ......GYSssFVcNhctIst.+L.....spspsIplVsusD-ICsshsst.s.csp...Cp............s-s......VpthDc......tVhchL......uLc........sGc...hph.s.p.hht.p.h.hcchhs.sslcchCpsCpW.sls......... 0 25 52 62 +6764 PF06936 Selenoprotein_S Selenoprotein S (SelS) Moxon SJ anon Pfam-B_15061 (release 10.0) Family This family consists of several mammalian selenoprotein S (SelS) sequences. SelS is a plasma membrane protein and is present in a variety of tissues and cell types [1]. The function of this family is unknown. 21.30 21.30 21.50 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.48 0.71 -4.76 5 99 2009-01-15 18:05:59 2003-08-13 12:01:14 6 4 75 1 58 110 0 156.90 34 90.36 CHANGED M-.hcD-sshsNc.lPshEspuL..sFLpsoVGphLSpYGWYLLlGCVllYlLIQ+Lst+h.RulpsRpoptshospDP-pVVRRQEAlEAARhRMQE-LDA+AscFKEKQ+QLEEEKRRQKIEhWDRMQEGKSYK.sSu+lsp-uosEsSoSSul.KPKo...EKKPLRuoGYNPLTGsGGGSCSWRPGRRGPSSGG ..............................h...........................h..sls.hLusYGWYll.hsslllYllhp+.lptc.h...+......shp.p...pp....p..t.s...t..s.s....h-P-sVs++QEAlt....AARh+.MQEELsAp..s..c....ca....+...EK....+phEEEK......R+pcI......chW-shpp..G+u.h+..st.p.....tppss..tsu......suosh.K.K....cp+sL.RsusY..NPLsGp.uGussuaRPs+Rs.sstGG..................................................... 1 25 28 42 +6765 PF06937 EURL EURL protein Moxon SJ anon Pfam-B_14914 (release 10.0) Family This family consists of several animal EURL proteins. EURL is preferentially expressed in chick retinal precursor cells as well as in the anterior epithelial cells of the lens at early stages of development. EURL transcripts are found primarily in the peripheral dorsal retina, i.e., the most undifferentiated part of the dorsal retina. EURL transcripts are also detected in the lens at stage 18 and remain abundant in the proliferating epithelial cells of the lens until at least day 11. The distribution pattern of EURL in the developing retina and lens suggest a role before the events leading to cell determination and differentiation [1]. 22.00 22.00 22.20 23.80 20.60 21.90 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.99 0.70 -4.98 6 69 2009-01-15 18:05:59 2003-08-13 12:32:25 6 2 45 0 39 56 0 244.50 59 93.19 CHANGED MpEEEQFVsIDLNDDNICSVCKLGTDp-TLSFCHlCFELsIEGVP+SsLLHTKSLRGH+DCFEKaHLIANQDCsRSKhS+SsYEtVKsIlSKKINWIVQYAQNKDlso-oEsSKsoQH.LhsaRHQs-+KLLPQ.DSQVPRYSuKWh.EuNuuuhSshuQplLEp+csp-FsLGhLppssst......LWs.sp.spsQKpEpssSus.sp.QRpasHaSREEL.NpMoltEL+QLstpLLpQIQ-VFEELottVQEKDSLuSELHVRHlAIEQLLKNCSKLPCLQhGRAGhKu ...............................................................................MNEEEQFVNIDLNDDNICSVCKLGTD+ETLSFC.HlCFELNIE...GVPKSsLLHT+SLRGHKDCFEKaHLIANQcCP............RSKLSKST.YEEVKoILSKKINWIVQYAQNKDlDSDSECSK...ssQH.pL...hsFRH+s-cKLLPQFDSQVP+YSAKWl.-GssuulSsps.QpIL...EppcssDFtLuhLpsssus..........LWs.sp.s.psQ+pEcs.us..stsssppp.p.+YSREEL.ssMolsElcQLsscLhpQIQ.-VFE-LsttVQEKDSLuSpLHVRHlAIEQLlKNhSKLP....pLQh.GRs.Gh+.................................................................... 0 7 9 19 +6766 PF06938 DUF1285 Protein of unknown function (DUF1285) Moxon SJ anon Pfam-B_15060 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. The structures revealed a conserved core with domain duplication and a superficial similarity for the C-terminal domain to pleckstrin homology-like folds. The conservation of the domain- interface indicates a potential binding site that is likely to involve a nucleotide-based ligand, with genome-context and gene-fusion analyses additionally supporting a role for this family in signal transduction, possibly during oxidative stress. 25.00 25.00 32.20 31.20 24.90 24.30 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.93 0.71 -4.23 81 468 2009-01-15 18:05:59 2003-08-13 12:35:09 6 2 462 3 150 424 462 143.40 43 74.16 CHANGED hcItpDGoWaYpGoPIsR.sLV+LFSolL++-s-....caaLVTPVEKlsIpVEDAPFlAV-hchp.t.puptp.sLpFpTNluDhlhsG.s-HPLRlp...............................hssps..sEs.sPYlpVRss......L-AhlsRslaYcLl-luppps...stshhulhSuGthFs ...hcIptDGoWaY.GoPIsRtsLV+LFo.olL.++-sD.....caaLVTP.VEKlsIcV-DAPFlAV-hchp.s.ps....psp.s.LpFpT.NlsDhlpsss-H.PLRht...............................hpsts.....sp..pPYltVRss......LcAhlsRslaYcLVph.upttc..........pG.thhulhSuGthF...................................................................... 0 44 85 116 +6767 PF06939 DUF1286 Protein of unknown function (DUF1286) Moxon SJ anon Pfam-B_15105 (release 10.0) Family This family consists of several hypothetical archaeal proteins of around 120 residues in length. All members of this family seem to be Sulfolobus species specific. The function of this family is unknown. 25.00 25.00 32.50 45.10 24.30 23.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.83 0.71 -3.90 8 53 2009-01-15 18:05:59 2003-08-13 12:41:15 6 1 24 0 15 48 0 109.80 56 66.48 CHANGED MKLpTHYVFohGLLTLlsShhh.......sFYhSLllSullSllGNoLIDRLGHpEIpspYGh.IssRTPLTHThPRSlhWGLlsulP.lIhhLtahYGa...........phlhhlLlsGllsGPSHMLL ...........M+L+THYlFSsGLLTLLsShlh.....p.FYhuLllSullSVlGNoLIDRlGH+EItT+YGh.IPVRTPLTHTlPRSVlWGlloslP.hIlLLhaaYGh...........ppllLllLLsGVlVGPSHMLL. 0 4 6 14 +6768 PF06940 DUF1287 Domain of unknown function (DUF1287) Moxon SJ anon Pfam-B_15116 (release 10.0) Domain This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. This family is related to Pfam:PF00877. 25.00 25.00 38.30 38.20 22.00 19.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.24 0.71 -4.83 25 504 2012-10-10 12:56:15 2003-08-13 12:43:34 6 2 497 0 44 231 32 160.30 69 78.88 CHANGED llsuAcpplsssltYDsuYhplsYPsGDVPtspGVCTDVVlRAhRp.hslDLQchVHEDM+pNFusYP..+hWGLp+PDsNIDHRRVPNLpsaFpRputsLslotss.......p-YpsGDIVoWtL.sss.LsHIGIVSD+cpssGh.PhllHNIGsGsp..EEDhLFpa....cI...sGHYRa ............IADGARpQIGsTLFYDPAYhpLoYPGGDVPpERGVCSDVVIRALRS.QcVDLQKLVHEDMAKNFApYP..QKWpLKRPDSNIDHRRVPNLETWFoRHsKT.....+PhSKNs.......SDYQAGDIVSWRLDN....G....LAHIGVVSDs.hA..R.D.GT.PLVIHNIGAGAQ...EEDVLFuW....+M...VGHYRY...... 0 17 30 38 +6769 PF06941 NT5C 5' nucleotidase, deoxy (Pyrimidine), cytosolic type C protein (NT5C) Moxon SJ anon Pfam-B_14848 (release 10.0) Family This family consists of several 5' nucleotidase, deoxy (Pyrimidine), cytosolic type C (NT5C) proteins. 5'(3')-Deoxyribonucleotidase is a ubiquitous enzyme in mammalian cells whose physiological function is not known [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.28 0.71 -4.79 6 770 2012-10-03 04:19:28 2003-08-13 13:06:10 7 7 677 18 215 585 488 168.80 25 85.77 CHANGED R.lhlhVDhDGVhADFhGthlphasccFtcs..lh.E-hpu.ashcpaG.hpPspt-plapa.hpp.uFFtsLEPlPGAh-sL+clusht.c.VhIsssshh+Y.H..ult-KhpWl-pHhshl.htsllls+-KslVtuDlhIDDpPcNltphps.sh.pILFsssaN+Hlc...sphRssSWp-sachI..lpuKth .....................................................................p....lslDhDtVLsDh....hst....hl....ch...hs..........at....t..p........l..p.........h.c..t..h....p...s.....h......h...c.t.......................-..ht.....t.h......h..hph...h.cp..s.s..a...F..p.......p...l...c.s...h..P.s.A....E...sl.+cLs.c...p.........h...........l.a.I.so.sshch.p..............sh.p..-........K...h.c..W.Ltc.a....hs........h..........l....s.............p.....c..h...l.....h..........s.....t.........c.......K.......s..........l.........l.........p......u...........D........h....L........IDD..pPcp..h.......t................h..t..s..t.s.......lha.ss......saN..p...p...........h.RlpsW..p-h.t..........ph.................................................................................... 0 84 130 168 +6770 PF06942 GlpM GlpM protein Moxon SJ anon Pfam-B_15323 (release 10.0) Family This family consists of several bacterial GlpM membrane proteins. GlpM is a hydrophobic protein containing 109 amino acids. It is thought that GlpM may play a role in alginate biosynthesis in Pseudomonas aeruginosa [1]. 25.00 25.00 25.10 27.50 22.20 24.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.27 0.72 -3.79 14 669 2012-10-01 21:19:26 2003-08-13 13:30:26 7 1 664 0 57 200 0 106.30 68 95.42 CHANGED lKsLlGAhsVLlIulLSKoKsaYIAGLVPLFPTFALIAHYIVGoERuh-sLRsThlFGlaSlIPYhsYLhulYYFohhhpLstsLssAslsWllsAslLlhsWs+hp ....IKAALGALVVLLIGlLuKTKNYYIAGLIPLFPTFALIAHYIVASERGIEALRsTIlFSMWSIIPYFlYLloLWYFoGhMRLPsAhlGuVsCWGlSAWlLIhCWIKhH......... 1 7 19 38 +6771 PF06943 zf-LSD1 LSD1 zinc finger Moxon SJ anon Pfam-B_15249 (release 10.0) Domain This family consists of several plant specific LSD1 zinc finger domains. Arabidopsis lsd1 mutants are hyper-responsive to cell death initiators and fail to limit the extent of cell death. Superoxide is a necessary and sufficient signal for cell death propagation. LSD1 monitors a superoxide-dependent signal and negatively regulates a plant cell death pathway. LSD1 protein contains three zinc finger domains, defined by CxxCxRxxLMYxxGASxVxCxxC. It has been suggested that LSD1 defines a zinc finger protein subclass and that LSD1 regulates transcription, via either repression of a pro-death pathway or activation of an anti-death pathway, in response to signals emanating from cells undergoing pathogen-induced hypersensitive cell death [1]. 20.20 20.20 20.30 20.20 19.70 20.00 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.87 0.72 -6.91 0.72 -4.33 23 365 2009-09-10 17:13:47 2003-08-13 14:14:43 7 6 52 0 186 373 2 25.00 54 21.25 CHANGED Cs....GCRslLtYPhGAsuVRCuhCpsV .......Cu....uCRslLhYPhGAs..SV+CulCpsV.. 0 32 115 160 +6773 PF06945 DUF1289 Protein of unknown function (DUF1289) Moxon SJ, Eberhardt R anon Pfam-B_15170 (release 10.0) Family This family consists of a number of hypothetical bacterial proteins. The aligned region spans around 56 residues and contains 4 highly conserved cysteine residues towards the N-terminus. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 20.20 20.20 20.30 20.50 20.00 19.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.81 0.72 -4.49 164 1514 2009-01-15 18:05:59 2003-08-13 14:58:21 8 8 1024 0 413 1099 1383 49.30 37 55.23 CHANGED pSPClslCphst.sslChGChRoh-EItpWpphsssc..+ptlhp..plstRtst .............SPClu..lCph-t.........pshCpGChRot-ElhsWpphocsc..+ptVhp.hhppRh..h.................. 0 68 178 296 +6774 PF06946 Phage_holin_5 Phage holin Moxon SJ anon Pfam-B_15309 (release 10.0) Family This family consists of several Listeria bacteriophage holin proteins and related bacterial sequences. Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the build up of a holin oligomer which causes the lysis [1]. 22.40 22.40 22.90 22.60 22.30 22.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.13 0.72 -3.82 2 44 2009-09-10 23:38:34 2003-08-13 15:04:36 6 1 41 0 4 35 2 88.70 41 94.32 CHANGED MEFGKELLVYMTFLVVVTPVFVQAIKKTELlPSKWLPTVSILlGAILGALAT.LDGSGSLATMIWAGALAGAGGTGLFEQFTNRuKKYGcDD. .............................plLhahohLsllssl..h..VQuIKK.T.cl..lPsKalPsVSlllGslLGAlAs.lss.ss.sLsshlWA...Gu....lAGhuuTGLFEthTp..R.......................... 0 1 1 4 +6775 PF06947 DUF1290 Protein of unknown function (DUF1290) Moxon SJ anon Pfam-B_15248 (release 10.0) Family This family consists of several bacterial small basic proteins of around 100 residues in length. The function of this family is unknown. 24.40 24.40 39.60 39.60 24.30 24.30 hmmbuild --amino -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.95 0.72 -4.08 36 498 2009-01-15 18:05:59 2003-08-13 15:07:14 7 1 497 0 137 265 5 88.80 52 78.09 CHANGED slPstausYLulAllAALDolFGulRAtl-cpFDsclFloGFFuNslLAuhLsaLGDpLGV.s.LahAuVlVFGlRlFsNhAhIRRhLlc .......lPsshpPYLsIAllAALDAlFGulRAhLccpFcs+VFVsuFhuNVllAALlValGDpLGV..s.L.sAslVVhGlRIFsNsAuIRRhlh.t....... 0 58 106 124 +6777 PF06949 DUF1292 Protein of unknown function (DUF1292) Moxon SJ anon Pfam-B_15310 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown. 21.30 21.30 21.30 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.77 0.72 -3.63 79 1823 2009-01-15 18:05:59 2003-08-13 15:12:29 6 4 1459 0 278 887 96 76.20 32 73.45 CHANGED EpGsEhphpllhsh-s.-..s.ppYlllhPht......pp.-p....t...-lhhhph.tp...--s...spttLhsl..EsD.-Ea-hVtcshpshh.sE .............EpGNEp..ha-llh.sh-s.E.....s.KpYllLhPss..............tp----.............ph...El.shph.ts...--s.....sp...u.cLtsI...EsD..-EW-hlpElasohh-E....................... 0 123 207 245 +6778 PF06950 DUF1293 Protein of unknown function (DUF1293) Moxon SJ anon Pfam-B_15399 (release 10.0) Family This family consists of several bacterial and phage proteins of around 115 residues in length. The function of this family is unknown. 25.00 25.00 25.00 36.00 24.50 15.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.31 0.71 -4.19 4 44 2009-01-15 18:05:59 2003-08-13 15:18:40 6 1 39 0 3 27 0 108.10 62 98.04 CHANGED MA+s.VFVLGIsIhWNstpG-p.ApLNlSRPL+pVNuEKFKRRTlGEsGDVNPpaDQPLhID+cYAh+LE+oGAhVPRREY-lcltlNP-DPLAGuIVsELIPVDsElKKHFpASLK .........MAp....lhGhsIph.stpG....so.ApLNV.RPlcpVNsEKFtphslG.ssDl.P..cQPLhI-.sYAhhL.cTtAhVPpREYplRht.NP-DPLt.slssELIPhDp-lKKaFptohK.......... 0 0 0 1 +6779 PF06951 PLA2G12 Group XII secretory phospholipase A2 precursor (PLA2G12) Moxon SJ anon Pfam-B_15422 (release 10.0) Family This family consists of several group XII secretory phospholipase A2 precursor (PLA2G12) (EC:3.1.1.4) proteins. Group XII and group V PLA(2)s are thought to participate in helper T cell immune response through release of immediate second signals and generation of downstream eicosanoids [1]. 32.50 32.50 32.90 37.40 28.00 32.30 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.55 0.71 -4.65 8 176 2009-01-15 18:05:59 2003-08-13 15:23:26 6 5 97 0 98 157 1 169.50 40 81.12 CHANGED sLuL.hL........ssutusc............EscssDWhhsLpoIRsGl+plcsYhsuAL-LlGGcDGlCQYKCpsGps.....PlPRPGY+sP.PNGCGS.hFGl+....hDl.GIPu..MTKCCNQHDhCYDTCGpsKpcCDpcFphCLcsICsDlpKoLGhspsVc.ACEosVchLFsoVhpLGCKPYL-SQRuAClCphEEKsEL .............................hhhhhhh.................................pt.hsDh..sltslRsuhcplptYhsuhl-Lh.G...G.psGhC.p.Y.+....Cp.......Gpt.....PhP..R....uY+sstPNGCG...S.hhGlp....hsh.GIP..u..hTKCCNpHDhCYDTC........Gs.sKtcCDtcFphCLppIC..s..cl...p+.o..L......G..............hsppVp.......A.C-osschlFssVh.p.LGC+Pa..hsSQ.RsAChCt.E-+t-L........................... 0 25 39 62 +6780 PF06952 PsiA PsiA protein Moxon SJ anon Pfam-B_15432 (release 10.0) Family This family consists of several Enterobacterial PsiA proteins. The function of PsiA is unknown although it is thought that it may affect the generation of an SOS signal in Escherichia coli [1]. 25.00 25.00 27.70 27.70 23.30 23.30 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.62 0.70 -5.30 7 330 2009-01-15 18:05:59 2003-08-13 15:30:40 6 2 215 0 9 171 0 217.40 69 97.79 CHANGED M.spSpALVsLpstpQAAhpAlhhsEppRcpGspLsuhPas+sFhRlL.sGSuRIssoshRpIsuhhhsscp+hssL.Qh.tALDhLIpStGEhCPLPLohDVtucLFPpVhatcs-RRhp+pslshpRp.RpcuRcsEQphhhhQNLLuQA.sELsFpSPETVsoWYsRWSDc.h-tp-LtshFWpWpsRFsSLsuh-hhphps-PLatVMaEl.hhsREssctlRthERW.VPNKLts .........MSARSpALVPLSsEQQAAhpAVApTEcRR+QGsTLu..taPYAuAFFRCL.NGSRRISLoDLRFFhPuLTsEEhHGNRL.QWLhAlDhLIEopGEVClLPLPuDAucRLFPSVhFRhpERpR+KosLsh..QKYSRQpsREAEQ+thtYQsLlAQApIELAFHSPETVGSWauRWSDc.Vs.....EHDLETlFWpWspRFPSLuuh-RapWQ-.PhWpVIhEAuhAuR-AsctVRphERWMVPNKLc.p.................................................... 0 1 1 6 +6781 PF06953 ArsD Arsenical resistance operon trans-acting repressor ArsD Moxon SJ anon Pfam-B_15383 (release 10.0) Domain This family consists of several bacterial arsenical resistance operon trans-acting repressor ArsD proteins. ArsD is a trans-acting repressor of the arsRDABC operon that confers resistance to arsenicals and antimonials in Escherichia coli. It possesses two-pairs of vicinal cysteine residues, Cys(12)-Cys(13) and Cys(112)-Cys(113), that potentially form separate binding sites for the metalloids that trigger dissociation of ArsD from the operon. However, as a homodimer it has four vicinal cysteine pairs [1]. 20.70 20.70 20.70 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.58 0.71 -3.95 38 439 2012-10-03 14:45:55 2003-08-13 15:32:12 6 7 386 8 103 319 11 115.90 37 90.33 CHANGED Mp+..lplFDPAhCCSTGVCGs-VDppLlpFuADlcWl.KppGlplcRaNLuppPhuFscNssVpshLcpuGt-sLPllLVDGpllhsGcYPoRpEL.....spahGlshspstts...........................ssuC.Cuusst....CC ..........................................................MpplplF-PAhCCSTGVCGsplD.tLlphusshphl...+.p..p.........Glp.lpRaNLuppP.tFspNptVpphLpppGt-sLPlhLVDGclshsGpYPopcEl.....ucahulsh.p.........................................sssC....Cu.st.......CC........................................ 0 45 72 86 +6782 PF06954 Resistin Resistin Moxon SJ anon Pfam-B_15476 (release 10.0) Family This family consists of several mammalian resistin proteins. Resistin is a 12.5-kDa cysteine-rich secreted polypeptide first reported from rodent adipocytes. It belongs to a multigene family termed RELMs or FIZZ proteins. Plasma resistin levels are significantly increased in both genetically susceptible and high-fat-diet-induced obese mice. Immunoneutralisation of resistin improves hyperglycemia and insulin resistance in high-fat-diet-induced obese mice, while administration of recombinant resistin impairs glucose tolerance and insulin action in normal mice. It has been demonstrated that increases in circulating resistin levels markedly stimulate glucose production in the presence of fixed physiological insulin levels, whereas insulin suppressed resistin expression. It has been suggested that resistin could be a link between obesity and type 2 diabetes [1]. 25.00 25.00 25.30 25.90 23.70 24.80 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.20 0.72 -10.72 0.72 -3.71 11 72 2009-01-15 18:05:59 2003-08-13 16:39:31 6 2 28 12 30 67 0 82.10 54 76.16 CHANGED ssopCol-pll-cKIK-sLuphp..Pssl.....pKp.LSCsSVpopG+LASCPuGhsVTGCuCGauCGSWDIppcsTCHC.QCu.sl..DWTsARCCpL ..................sto.ssl-phlpcKIp-hhssl......psl........pph.LpCpSVpSpGcLAoCPsGhsVTGCuCG.uCGSWDlR.s.-TTCHC..QCu....sh..DWTsARCC+l... 0 6 6 7 +6783 PF06955 XET_C Xyloglucan endo-transglycosylase (XET) C-terminus Vella Briffa B anon Pfam-B_20045 (release 10.0) Family This family represents the C-terminus (approximately 60 residues) of plant xyloglucan endo-transglycosylase (XET). Xyloglucan is the predominant hemicellulose in the cell walls of most dicotyledons. With cellulose, it forms a network that strengthens the cell wall. XET catalyses the splitting of xyloglucan chains and the linking of the newly generated reducing end to the non-reducing end of another xyloglucan chain, thereby loosening the cell wall [1]. Note that all family members contain the Pfam:PF00722 domain. 20.50 20.50 22.20 21.10 19.20 17.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.04 0.72 -4.26 134 896 2009-01-15 18:05:59 2003-08-13 17:05:34 7 5 108 13 411 924 0 52.80 34 18.85 CHANGED C................sssssW.hs.pp......Ls..spptpph+WVpppaMlYsYCsDppRaPts..hPs.EC ..............................ttsttW.hs..pp........Ls.sppppphcWVpppaMlYsYCsDpp.R.aPt......s...hPs.EC........ 0 61 251 337 +6784 PF06956 RtcR Regulator of RNA terminal phosphate cyclase Studholme D anon Pfam-B_17814 (release 10.0) Family RtcR is a sigma54-dependent enhancer binding protein [1] that activates transcription of the rtcBA operon. The product of the rtcA gene is an RNA 3'-terminal phosphate cyclase [2]. This domain is found at the N terminus of the RtcR sequence. RtcR, and other sigma54-dependent activators, contain Pfam:PF00158 in the central region of the protein sequence. 22.70 22.70 22.80 23.00 21.20 22.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.19 0.71 -4.95 17 470 2009-01-15 18:05:59 2003-08-13 17:23:00 6 3 455 0 52 277 17 182.20 74 34.98 CHANGED h++pVslGFLGosLD.....tGpuspRWp+WRPoVuLCQp.D.L.lDRLELLH..ss.................RtpsLA-pltpDIspVSPcTEVchp.hplcsPWDFcEVYusLaDFscuYsFDsEpE-YLlHITTGTHVAQIChFLLsEuRalPA+LlQTSPsRc...ptppssGohslIDLDLSRYDpIAsRFspcpp-sluFLKSG .......................MRKTVAFGFVGTVLD....YAGRGSQRWSKWRPTLCLCQQ.ES.LVIDRLELLH..Ds.................RSRSLFETLKRDIASV.SPETEVVuVEIELHNPWDFEEV...YACLHDFARGYp..FQPEKEDYLIHITTGTHVAQ.ICWFLLAEARYLPARLIQSSPPRK.......KEp.P.+....usG-VTIIDLDLSRYNAIASRFAEERQQTLDFLKSG............... 0 14 22 37 +6785 PF06957 COPI_C Coatomer (COPI) alpha subunit C-terminus Vella Briffa B anon Pfam-B_20121 (release 10.0) Family This family represents the C-terminus (approximately 500 residues) of the eukaryotic coatomer alpha subunit. Coatomer (COPI) is a large cytosolic protein complex which forms a coat around vesicles budding from the Golgi apparatus. Such coatomer-coated vesicles have been proposed to play a role in many distinct steps of intracellular transport [1]. Note that many family members also contain the Pfam:PF04053 domain. 19.70 19.70 21.30 20.60 19.50 19.10 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.43 0.70 -5.94 5 402 2009-01-15 18:05:59 2003-08-13 18:19:08 6 18 285 10 268 405 8 362.60 35 33.49 CHANGED KGhFEGuLhohuttuAsu+tsh.lss-csA.hs.tshus-DWG.EDsD.lslDt....DuhspucDGLu-s--GE.sc-s-EEGGWDV.E-DLsLPPELDsPK.suGsA-SuhFVsPspGhuVSQpWsNsSsLAuDHlAAGSF-TAMRLLHDQLGVlNFuPaKsLFLcsYAuSRooaRAhusL.Pu..lslaPpRNWSEou.KN..usPAluaKLsQLscRLQuuYQhTTsGKFsEAVEKF+SILlSIPLlVVDoKpEVAEAcQLIsIsREYllGLpMElcRK-LPK-sL-QQKR.sELAAYFTHCcLQPVHhILsLRSAlNlFFKhKNaKTAAoFARRLLELuP+P-VApQsRKVLQACE+NsTDusQLNYDh+NPFVVCGuoYVPIYRG+P-VoCPYCGAsFsPshEGplCTVC-VApIGKDulGLRISPLQsR .....................................................................................................................................................................................................................................h....s.........p........t....tt...p.....s..........t.tt.....s............t......t.t--...s..u..G.W..Dh.......--.......D..lpls...-.........h...ph....s.t.....................s...s.t......p.s..s.hss....sstG.......s......sp..h...Wsp.N.S.tLssDHltAGuF-oAhpL.LpcQlGllsFtPhK.hFhphatsu+ohh.uhssh.Ps....lh.s.h.pRshp..-s..s....pp.....shPslsh.plsp.Lh.p.c...Lp.t.u.Yph.hssuKhp-AlctF+sILholsll.s.Vcs...cpE..hsEsppllplsREY..llu.Lph..ElcR+p..l..s.......p.....s...sh.......c.......p..........+.....R.hE..LAAYFT+spLQssHhhLsLpsAhshhaKhKNatoA............usFAc...RLL....-.....h...........u.....s......p......s......c.....h.............A.p...p.....sRKlhs.ts.E.+..s..s..p..D..shpl.s......a..D............a......s.......s...F......slCuu.oas...P..I..Y.c......Gp.ss.......hpCPhsGupYp.s.pa+..GplCplsplsplGtss.GL+l................................................................... 1 98 152 226 +6786 PF06958 Pyocin_S S-type Pyocin Vella Briffa B anon Pfam-B_20020 (release 10.0) Family This family represents a conserved region approximately 180 residues long within bacterial S-type pyocins. Pyocins are polypeptide toxins produced by, and active against, bacteria. S-type pyocins cause cell death by DNA breakdown due to endonuclease activity [1]. 22.50 22.50 23.00 22.50 21.70 21.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.85 0.71 -3.82 39 258 2012-10-01 19:36:59 2003-08-14 10:59:20 7 25 167 8 54 270 1 134.20 31 24.52 CHANGED hpphttstsslshsVRhph...pstsGph..p........lhsl+T..s.tsssspVsVhps.thsppsshap........sssssshslhaoPssss.....tssssosss........................sss.sssshsssslststsp...spshPs.ptt-hcD...hIlsFP..tsSGltPlYlh ...........................phAtstuossh.VRhph...ps.sGp...p........lhul+T..utss.shspV.Vtth.phstpostYc......hh.scs.sss.slhWTPssss.....hpssspTuss........................sss.....hsshhstslsssssp...stshPhs-.c-acD...aIlla.P..hsSGl.PlYlh................. 0 3 12 30 +6787 PF06959 RecQ5 RecQ helicase protein-like 5 (RecQ5) Vella Briffa B anon Pfam-B_20083 (release 10.0) Family This family represents a conserved region approximately 200 residues long within eukaryotic RecQ helicase protein-like 5 (RecQ5). The RecQ helicases have been implicated in DNA repair and recombination, and RecQ5 may have an important role in DNA metabolism [1]. 25.00 25.00 26.00 26.30 19.60 23.70 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.18 0.70 -4.69 2 54 2009-01-15 18:05:59 2003-08-14 11:51:20 6 4 30 0 22 45 0 185.70 53 21.94 CHANGED KSCuAtAE.sEPs-YDIPPsSHVYSLKPKRlGAGFsKGsCsFQTATELhtpo+.pcQAPps.htGtpEPPuhsCsL.DEDtScPhPG.ptcs.GuSsphGsPSPEKKsKuSStGS..AKuRASKKQQLLATAA+KDSQsIsRFhCpRsESPsL.ASsPcuEsAsPSCtsV......PtKhT.pEsGAtGH.sAs.QTEt..RERP ..................SCuApucssEPsEYD.IPPASpVYSLKPKRVGAGFPKGSssFQTATELhcpop..p-QAPps..psGcpEPPSpsCsL.DEDpScPLPGPp..sEsPGuSApsGsPSPEKK.......sKuSosGSslAKuRASKKQQLLAsAA+KDSQsIuRFFC.p.R.s.cS.Psh.s.SuPcA.Es.As..PS.Ct.GVpus..hsPEKho.cED.GA..tG+.sAssQTEt..REt.................................. 0 1 1 6 +6789 PF06961 DUF1294 Protein of unknown function (DUF1294) Vella Briffa B anon Pfam-B_3405 (release 10.0) Family This family includes a number of hypothetical bacterial and archaeal proteins of unknown function. 25.00 25.00 27.20 25.50 24.50 23.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.64 0.72 -4.01 144 1488 2009-01-15 18:05:59 2003-08-14 13:39:17 8 8 1458 0 268 969 50 54.40 44 47.80 CHANGED sllsFhlYuhDKptAp....cspW........RlsEpoLhlluLl.GGhsGAhlutphhRHKTpKttF ....NllsFhlYuhDKppAc.....+.ptW....................RlPEpsLhlhulh.GGshGAhlutphF+HKTpKhhF......... 0 88 162 221 +6790 PF06962 rRNA_methylase Putative rRNA methylase Vella Briffa B anon Pfam-B_3461 (release 10.0) Family This family contains a number of putative rRNA methylases. Note that many family members are hypothetical proteins. 20.70 20.70 20.70 21.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.81 0.71 -4.48 17 1285 2012-10-10 17:06:42 2003-08-14 13:50:07 7 7 1268 14 179 706 33 135.50 43 72.68 CHANGED +VYuFDIQcpAlppTpp+lcphh...p.....lcLlhsuHEplpcal.p....sl+uulFNLGYLPuuDKpIsTpscoTlpAIpclLsllp.sGlIslVlYaGH-tGchEKcAVhpFsppLsQpcapVhpaphlNQhNsPPhllhIEK ..............pVYuFDIQcpAlcpTpp+Lcptt...h.s.....................spLlhsGHE.s.lc...p...alsc.................lcA...uIFNLGYLP.......u..u..D..Ks.....llT+.PpTT.lpAlppllpl...LshsGhIslllYaGHpsGphE+-AVLcasppL.sQppap.V.hpYphlNQhN.sPPallhIEK....................... 1 74 125 155 +6791 PF06963 FPN1 Ferroportin1 (FPN1) Vella Briffa B anon Pfam-B_3588 (release 10.0) Family This family represents a conserved region approximately 100 residues long within eukaryotic Ferroportin1 (FPN1), a protein that may play a role in iron export from the cell [1]. This family may represent a number of transmembrane regions in Ferroportin1. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.28 0.70 -6.10 19 342 2012-10-03 03:33:39 2003-08-14 15:26:02 7 7 156 0 257 446 7 355.50 26 83.29 CHANGED ppslhhLYluHhLusWssRMacFussLalsplassoLLhsuVYuLVcohSslllushlGphlD+.sRl+..slRpsllhQplshslusuhhhhlhhhsp.h...............shlhhslasssslluslspLAssAsolslERDWlVVlu.st.sssLsphNushRpIDLhsclluPllsuhlhuhhShphushhlhshNlsshhlEahhlhpVYptsPtLtcppctppscsptspp...............................tt.t.phthhpphtphhcps.hssWptYhpQsVFLAuhuLuLLYhT.VLSFGshMssaLhpp.GhsshhluhhRususlsGlsGTashPhltp+lGhlRoGlaulhhQhsCLhssVsuhhhssss.................................................................stlsshhLhsGVAhSRlGLWuFDLuVppllQ-...................slsEucRshluusp.uLQslh-LLpashsIhhspPcpFthhshISausl ....................................................................t.....hla.h.uh.huth......ssRhapFuhslh.hhtl.a..s......t...sLh.sulhuhh.shshhhhushlGphlD.p..s.Rht...shph.l....................hhpphshhh.......sshhhhhh..h.h...............................th.hh..h...hhhh.hhhhusltpLsshs.tl....slpRDW.................l...........lsls....t....t........p.....tttL.st.....hNuhh+pl................D.h.splhuPhhhu..lhs...h.........u..........................h............h..shhhh.shshhShhhchhhh.tl.Yp.ss.t....Lt.ptt.t.tt.......................................................................................................hht.th..t....htth......htsa......Yhp.ps.sh.....h.....suhuhu....hLYhT..VL......u...........a.ss.hhssa....hhpp...Ghss.hluhh...huhuulhulh.uThhhshl..p.+h.G..h.l.+sGh....hu.hhhQ.....h.hLhhslh.sh...hh.s..................................................................................hh.sh..hhhh.ul.h.huR..............h.............GLW.aDLsh....tplh...Qp...................tl...tspRshhsuspsuhp.hh-lhthhh..sl.hhspPptFthhhhlSh.hV......................................................................................... 0 80 137 210 +6792 PF06964 Alpha-L-AF_C Alpha-L-arabinofuranosidase C-terminus Vella Briffa B anon Pfam-B_3625 (release 10.0) Family This family represents the C-terminus (approximately 200 residues) of bacterial and eukaryotic alpha-L-arabinofuranosidase (EC:3.2.1.55). This catalyses the hydrolysis of nonreducing terminal alpha-L-arabinofuranosidic linkages in L-arabinose-containing polysaccharides [1]. 28.50 28.50 28.70 29.00 28.30 28.20 hmmbuild -o /dev/null --hand HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.15 0.71 -4.50 145 1287 2009-01-15 18:05:59 2003-08-14 16:17:07 7 28 613 70 462 1241 79 201.20 22 33.87 CHANGED DEWusWh...........................tss..thhtpt.ohcDALstushLssht+puDpV+hAshAQllNlls...............................slhhpss.t.phhhsssYashphaupats.ussLssplpss................................................................................................................................................................................tstsshshl..sssAsh...spsstp....lhltllNh.s.t.tshslplslp...............................Ghps......pssptphL...suss.hpspNoh....ppsppVtPp............pt....p....ssph.shsLPsh......S ............................................................................................................................sEassh.............................ts.....thhtpt.shpsALstushLhsht+p.uD...h...VchAsh..A.Llssls....................................sllhhss..t..phhtssoYashphaupp..ts.upsLssplps.............................................................................................................................................................................................................tthpssshlsss.Ash......spcstp.....lhl...t..l.lNhssp...........shslslslp...............................Ghps......pssptphL...s...u..s..s....hpsp.Noh....ppPpp.........VtPp..............ppth.p.ht..s.sph.phplsshS........................................... 0 167 325 398 +6793 PF06965 Na_H_antiport_1 Na+/H+ antiporter 1 Vella Briffa B anon Pfam-B_1828 (release 10.0) Family This family contains a number of bacterial Na+/H+ antiporter 1 proteins. These are integral membrane proteins that catalyse the exchange of H+ for Na+ in a manner that is highly dependent on the pH [1]. 19.80 19.80 20.50 20.40 19.10 19.10 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.40 0.70 -5.68 17 2392 2012-10-02 17:06:44 2003-08-14 16:48:31 7 9 1869 3 527 1704 1782 354.40 44 91.60 CHANGED lpcFlcsEuuGGllLhhuAslAlllANS.sLupsYhuhlcs.htht...h.hsholtpWINDGLMAlFFLllGLElKREhltGpLushppshLPslAAlGGMllPALIYhhlNhssPtshpGWAIPsATDIAFALGVLuLLGpRVPsSLKlFLhuLAIlDDLGAllIIAlFYossLshssLhhAhhslhlLhsLNRhsVppLhsYlllGllLWhslhpSGVHATlAGVhluhhIPhcttpu.....................................p..pSPlc+LEHtL+PWVuFhllPlFAFANAGVSluGhsls.l.ssPlsLGlslGLhlGKslGlFhhualAl+LsLAcLPcGhsWsplhuVulLCGIGFTMSlFIupLAFss....thhs.sKlGlLhGSlhuAllGhhhL .....................h.pcahpp-ssGGllLlhuullAhlhANS..s...........h.u.t.hY.pshh......c...h..............l......t......h....t............h......s......t........h.t......l......s...hslhh.WINDuLMAlFFhllGLElKRE..lh..p.GpLssh.cpAshPhlAAlGGMllP.....ALlYh..............h..............h..............N...h.......u..s.............s.............................s....h.pGWAIPsATDIAFA.LGlL.u.L.LG.s+lPhuLKlFLhuLAIlDDLGAIllIAlFY...T..s...s...L...u...h.hsLsl.Ahhs....ls..l.L..........h.l............L........N..........h........h........t...........l........p........p........h...hh....Y.l.ll..G.l.l.L.....WhslLcSGVHATLAGVlluhhIPhctp..p.s...........................................................................................puP.....h.c+lEHsL+PaVAalI..lP..lFAFANAGVslp.u.h...s...l.................s.sl......ss.............lsL....GIhhGLhlGKslGIhhh.saLul+ht.l...ApL.PpGhsap.plhululLsGIGFTMSlFIusLAFss........tst..h..hs.huKlGILh..GShhuAllGahhL.................................. 1 161 342 452 +6794 PF06966 DUF1295 Protein of unknown function (DUF1295) Vella Briffa B anon Pfam-B_3514 (release 10.0) Family This family contains a number of bacterial and eukaryotic proteins of unknown function that are approximately 300 residues long. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.79 0.70 -5.07 12 924 2012-10-01 22:51:20 2003-08-14 16:53:11 7 9 605 0 506 2016 1927 217.80 24 75.13 CHANGED hallsthptpsshlDhhWusshslluhhshhh......stu.hshRphllhhLsslWulRLuhalhhR.shptG..EDhRasphRcphstt.........phhh.hahhQulhlahluLPlhlstustss......sh.tshshlGhslalhGhhhEshuDtQL.sF.................KtsPtN+G+.hhssGLWpaoRHPNYFGEhlhWWulalluhss..hpsh..hslhuPlhhThLLlaVSGlPlhEtph.+phttt.ta+pYpcpT ......................................................................................................................................ahht.h.tp....t.hh.Dhhhuh...s...s.h.hshhh.hhh.......................tts....s.hp.ph...l...l...h.hh..s...hlWuhRLsh...a.hh..h.R...h.......h....t.........s.....-D..Ra........t.....th+..p..p..hs.........................h.thhh..ha..h..hQ..u.....l.h.l..h...h..l.u...l.Ph....h..h....s.........s.........t.....ss...................sh....s..h...h.s....h.l..u..h...s..l.......a...l....h.....G..h...h....h.....E..sl...u.....D......h....Q...h...h.p.F................................................+..p.....s....s...t.....s.............c...........G...........c.....hh..........s.....s.........G.L.W.pa..oRHP....N...YFG......Eh..hh..W...h....u...l......h..l.h.u..h....s.s...................ht......s..........h.........................h..s..l...........h....u...P.l.....h.....h.hh.l...L.h..hh..o...G.hsh.hEp....h..c.ph...............s..tYppYppps.................................................. 1 210 340 440 +6795 PF06967 Mo-nitro_C Mo-dependent nitrogenase C-terminus Vella Briffa B anon Pfam-B_6998 (release 10.0) Family This family represents the C-terminus (approximately 80 residues) of a number of bacterial Mo-dependent nitrogenases. These are involved in nitrogen fixation in cyanobacteria [1]. Note that many family members are hypothetical proteins. 19.30 19.30 20.30 47.80 16.70 15.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.92 0.72 -3.83 13 139 2009-01-15 18:05:59 2003-08-14 17:19:46 6 5 53 0 56 140 8 83.00 57 56.74 CHANGED -lLpPlRpWLDulcIpst+lAHhlC+LIPsQCPFERDlsLFGRhlhHIPPLCKLNPLYEplVuLRFRALsYLADECGEDIopYs ........lLpPlRpWLDsl-lcssclA+h..lC+lIPuQCPFERDIpLFG+plhHIPPLCKLNPLY-plVuLRFRALsYLAD-CGEDlotYh........ 0 5 41 56 +6796 PF06968 BATS Biotin and Thiamin Synthesis associated domain Finn RD anon Pfam-B_5417 (release 10.0) Domain Biotin synthase (BioB), EC:2.8.1.6 , catalyses the last step of the biotin biosynthetic pathway. The reaction consists in the introduction of a sulphur atom into dethiobiotin. BioB functions as a homodimer [1]. Thiamin synthesis if a complex process involving at least six gene products (ThiFSGH, ThiI and ThiJ). Two of the proteins required for the biosynthesis of the thiazole moiety of thiamine (vitamin B(1)) are ThiG and ThiH (this family) and form a heterodimer[2]. Both of these reactions are thought of involve the binding of co-factors, and both function as dimers [1,2]. This domain therefore may be involved in co-factor binding or dimerisation (Finn, RD personal observation). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.92 0.72 -4.14 127 5028 2009-01-15 18:05:59 2003-08-15 13:00:51 8 10 3397 6 1177 3436 382 97.40 30 26.94 CHANGED slNh..LpPh.pG......os.h.........ts.....t.lssp-hl+hlAhhRlhhPp.stlpluuG+pthhcc.h.s...lshhuuusuhhsGsY..........................hssss.cshscshphlpchG.h......ps ...................PINh....Lhss.pG.....Tsh............ts..spsls.sh-hl+hlA.shR.lhhPp.s.tlRlSuGR.p.hh.cp..hts.....hshhu.uuNu...l...h...hGsh........................................................................Lhosspps.cpDhphlpchGhp................................................................ 0 421 778 1014 +6797 PF06969 HemN_C HemN C-terminal domain Finn RD, Bateman A anon Pfam-B_833 (release 10.0) Domain Members of this family are all oxygen-independent coproporphyrinogen-III oxidases (HemN). This enzyme catalyses the oxygen-independent conversion of coproporphyrinogen-III to protoporphyrinogen-IX [1], one of the last steps in haem biosynthesis. The function of this domain is unclear, but comparison to other proteins containing a radical SAM domain (Pfam:PF04055) suggest it may be a substrate binding domain. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.95 0.72 -3.91 87 5381 2012-10-04 14:01:12 2003-08-15 13:44:13 11 4 3710 1 1096 3872 632 66.00 23 16.25 CHANGED lstp-thpEhlhhsLRh..ptGlshpphppphs.t.....h.t.h.tthlpphtpp..GLl....phss..stlplTppGhh ..........................stc-phcE.hhhsLRh..ppulshsphp.ppa.Ghs..........htphh...tp....lp...p...hhpp......GLl...............p.h.s.s.....stlplTpcGhh............................. 0 314 660 891 +6798 PF06970 RepA_N Replication initiator protein A (RepA) N-terminus Vella Briffa B anon Pfam-B_1808 (release 10.0) Family This of family of predicted proteins represents the N-terminus (approximately 80 residues) of replication initiator protein A (RepA), a DNA replication initiator in plasmids [1]. Most proteins in this family are bacterial, but archaeal and eukaryotic members are also included [2]. 30.00 30.00 32.70 31.90 29.90 29.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.60 0.72 -4.19 55 1268 2012-10-04 14:01:12 2003-08-15 15:21:35 6 6 685 0 75 856 25 73.30 38 29.74 CHANGED ppFaplPKsLhpsppY.cpLSs-AKlhYulLhDRhpLSl........+Ns....WlDcc.GplYhlaosc-Ltp..hLssuc.pKll+lKKEL ..................paaplPK.hLhp.s............pp.a..cplSs-AKlhYulLhDRhpLSh............+Ns......W..lDc-..GplYlla.ospcLhc..hLsp.uc.pKlh+lhcEL............. 0 29 43 58 +6799 PF06971 Put_DNA-bind_N Put_DNA-bind_C; Putative DNA-binding protein N-terminus Vella Briffa B anon Pfam-B_3389 (release 10.0) Family This family represents the N-terminus (approximately 50 residues) of a number of putative bacterial DNA-binding proteins. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.25 0.72 -4.20 17 1817 2012-10-04 14:01:12 2003-08-15 15:45:00 8 3 1640 24 349 1010 155 49.40 47 22.82 CHANGED ppIPcAshKRLPhYaRhLctLhppslcRlSScpLu-tlslsuupIR+DFS ........pIP.c.A.ThKRLPlYYRhlppLps..pG.l.c+lSSpplu-AltlcSAoIRRDFS. 0 159 268 317 +6800 PF06972 DUF1296 Protein of unknown function (DUF1296) Vella Briffa B, Eberhardt R anon Pfam-B_4035 (release 10.0) Family This family represents a conserved region approximately 60 residues long within a number of plant proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.60 21.60 21.70 26.00 21.40 21.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.06 0.72 -4.01 7 106 2012-10-01 23:03:33 2003-08-15 16:05:18 6 3 19 0 70 100 0 57.30 55 7.87 CHANGED lPsth+phlQsl+ElVss..S-t-IauhL+EssMDPsEssp+LLpQDsFHEVKSKR-KKKE ....lPsusRKhlQulKEIVss.ho-t-IYAsL+ECsMDPNEssp+LLsQ.D.sFHEVKpKR-KKKE........... 0 13 40 54 +6801 PF06973 DUF1297 Domain of unknown function (DUF1297) Vella Briffa B anon Pfam-B_3819 (release 10.0) Domain This family represents the C-terminus (approximately 200 residues) of a number of archaeal proteins of unknown function. One member is annotated as being a possible carboligase enzyme. 23.80 23.80 24.90 31.30 23.50 23.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.09 0.71 -4.98 40 191 2012-10-10 13:17:03 2003-08-15 16:23:52 7 2 111 19 120 188 129 185.00 43 52.81 CHANGED EapcKhcplhcpGlIspcslcpspIEEYllGshashpaFYSPlpcc........l...EllGlDpRhEoNlDGhhRlPAppQl.ch....slpPpallsGphPsslRESLL.pVF-hG-+aVcAo+clhsPGlIGPFsLQoll...sc-...L-hlVF-lSsRIsGGTNlahs.GSPYStLhascshShGRRIA.......hEI+pAlcpscL-cllT ...............-a.cKhpphhcpGllppcslppshIpEYllGs.h.hpaFYSPlpsc.................l...ElhGhDpRaEoslDGlhR.lPAppQl.cl....plpPpallsGphPsslRESLL.cla-hG-+aVcuscclhs..P.GlIGPFsLpoll......sccL-llVF-lSsRIsuGTNhahs.GSPYShLhascPhShGRRIA.......hEI+pAlcpspL-cllT.............. 0 30 60 89 +6802 PF06974 DUF1298 Protein of unknown function (DUF1298) Vella Briffa B anon Pfam-B_4362 (release 10.0) Family This family represents the C-terminus (approximately 170 residues) of a number of hypothetical plant proteins of unknown function. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.68 0.71 -4.42 14 691 2009-01-15 18:05:59 2003-08-15 16:32:11 8 13 255 0 271 581 59 145.90 22 32.32 CHANGED KsSKCR..WGNahGhlllPFsl.uLcsDPL-Yl+pAKuhhDRKKpShEAhhoYhhh+hllKhFGhKsusslhpRhhsNTThsaSNllGPsEEISFhGHPIsYlAsSsYGpPpALsIHa.SYssKhhIsluVDsslIPDPH+LCDDhEESL+hhKsA ............................................................................s.........GN.hu..h..h..h..h..s.h......h....t......s...sP....l.phlpt.......h..p..t..th...p...t......t.+...p.....p...h...p......t......h.........h...h..........h.....t....h...h....h....h..h...h...u.......t...h...h....t....t...l.....h....t...t...h...............s......h........s.l..s...lSNV.PGPp-.....lhhsG.u......l..s..t..h.hs...s.s.h.s..t.s.p...........u....L..sl..olh..SYssp.lshulhssp..ssls...-spclssthtcuhppl.................... 1 71 176 236 +6803 PF06975 DUF1299 Protein of unknown function (DUF1299) Vella Briffa B anon Pfam-B_3488 (release 10.0) Family This family represents a conserved region approximately 50 residues long within a number of proteins of unknown function that seem to be specific to Arabidopsis thaliana. Note that many family members contain multiple copies of this region. 25.00 25.00 45.20 45.20 19.90 17.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.39 0.72 -4.10 4 21 2009-01-15 18:05:59 2003-08-15 16:40:27 6 4 1 0 0 21 0 46.50 84 17.30 CHANGED HDEas-stDQEAYlILSDDEsNGTAPTEKESQPpKEETTEVPKEEsV HDEHDETEDQEAYVILSDDEDNGTAPTEKESQPQKEETTEVP+Epph 0 0 0 0 +6805 PF06977 SdiA-regulated SdiA-regulated Vella Briffa B anon Pfam-B_2520 (release 10.0) Family This family represents a conserved region approximately within a number of hypothetical bacterial proteins that may be regulated by SdiA, a member of the LuxR family of transcriptional regulators [1]. Some family members contain the Pfam:PF01436 repeat. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.59 0.70 -5.43 24 899 2012-10-05 17:30:42 2003-08-15 17:25:23 6 9 524 1 91 519 74 223.90 40 78.35 CHANGED stulhLssY+ssl-.u+slsGlccs.lSuLTYsPppcoLFuVsNp.sspLlELShcG-lLR+lPLtG.FsDsEulpalusGphslsDERpppLhhlplsspTpslshs-...hpphsLGhs..tssNKGFEGlAaDstspRLhluKERsPhtlaclpu.......ss.sltsssptsh.t..pplhl+D...LSuLpaDtpoGHLLlLSDESRhllElD.tpGc.lShhoLhtGhsGLccslPQAEGlAhDccGsLYlVSE.PNLFYhFc ..............................................................................................................................................................................p.sh.hpsYcssls.tK.lsGlps.s.lSuLTass....pspp.LFushNp..P.........u..........t..l..V..ch....o.p.p.G.c.l........l.+p.hP.L........sh........h.........p........DhEsIEa.l.GsN.p...as.l.u.....-..ER..c.......h.....t..lhhhpl......................o.......t..s..p..l...t.......hh.cl.p.lshp....pspNpGFEGLAastpscpha.h.h......K..E...+.....pP.....lp....lhcspt.................................ssspLphucstslp........tthplp.D.......l......SGhp..a..s.tp.pspLllL.ScES+hl..hEls..pGc.hls.....hsLs+G..p..GLpc..sI.Q.uEGIAhD.s.p.s.slYIVS.E.PN.hFY+Fo.......................................................... 0 20 42 75 +6806 PF06978 POP1 Ribonucleases P/MRP protein subunit POP1 Vella Briffa B anon Pfam-B_7848 (release 10.0) Family This family represents a conserved region approximately 150 residues long located towards the N-terminus of the POP1 subunit that is common to both the RNase MRP and RNase P ribonucleoproteins (EC:3.1.26.5) [1]. These RNA-containing enzymes generate mature tRNA molecules by cleaving their 5' ends. 22.20 22.20 24.00 24.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.44 0.71 -4.63 38 354 2009-01-15 18:05:59 2003-08-18 13:52:12 6 10 265 0 265 358 4 151.60 31 21.10 CHANGED FlpuRphEIcslppuhppuKtstsoRsFQplPRpLRRRsASHNl+RlP+RLRsRAt+EMhpsss.....................ttphhh+hcst++l.ph.tp...............................................................................................ltp.s.spt+ap+RQcp......p..pWL.THlWHAKRh+Msc...............hWGaplPh..sPTpKsaRssaR.....tspcsslsaDhSYhss....ltlpGs ...............................................................................................................................................................................................................................................FhtuRthElpth.tuhpppp................sp..hsF.Qp..lPRphRRRshSHNs+RlP+.RhRthshpch.pstt.......................................h.p.c.ttt.................................................................................................................................................................................ht...p....ca..t+Rp.pp........p....hWL.THlWHAKRh+Mhc...............hWGatls...ps..stKsaRssaR..................t.pptslh.DhSYhssltlps.t..................................................................................................................................... 0 104 157 222 +6807 PF06979 DUF1301 Protein of unknown function (DUF1301) Vella Briffa B anon Pfam-B_8295 (release 10.0) Family This family contains a number of eukaryotic proteins of unknown function that are approximately 160 residues long. 21.70 21.70 23.90 23.70 20.20 20.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.76 0.71 -4.01 7 128 2009-01-15 18:05:59 2003-08-18 14:35:39 7 4 104 0 91 127 1 128.50 34 59.20 CHANGED uVKhFShSTSlsulsh.P.lLLcpuhthsu..hshtlhhhGlhGFFTFlTPlLLHhlTKtYVhcLaa-ssp-pYTAsThshhLpcppThF+.cDVslP-lsthFToFhscsKuLhVsPsLFPsPpcY.+lMGYDK .......................................VKhFShSTShhulshhPhl...hhpss..ths.u....lsh..p.shhsuh.luhFohh...TPhlLH.alT..KsYVh+L..aa.p...ss............o.D..........oYpAhThs.hlhpcppp...........sF+.sDV....p....lP.-.....ss.+hF...TTFhAcs+shhVssthF.ss.pca.+lMGY-............................ 1 28 43 70 +6808 PF06980 DUF1302 Protein of unknown function (DUF1302) Vella Briffa B anon Pfam-B_7023 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function that are approximately 600 residues long. Most family members seem to be from Pseudomonas. 19.90 19.90 20.70 20.10 19.50 19.70 hmmbuild --amino -o /dev/null HMM SEED 554 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.66 0.70 -5.94 64 345 2012-10-03 17:14:37 2003-08-18 15:53:48 6 3 208 0 139 351 210 566.70 31 94.05 CHANGED hsFshG.-...lpssaDoolShGsuhRspstspshh...........................................tssstssssDDGsLNac+.G-shSphlKuhp-LpL+assh.......GhFlRGphaYDhthp-sstp....................................s.ss.pscpttspsscLLDAaVatsasl..GcpP.sslRlGcQVlsWGEShF...ltsG...I.NshNPlDls.thptPGuElKEsllPsshl.suhuLocsLSlEuaYQ..acWcpotlsssGTaFSssD.hhucGsps..h............................h.hhss.ss.sh...............................shhh.css-tcuccsGQaGlslRahsttL..-TEaGhYahNYHs+hPhlussssshsshs......................................................................spYhhpY.sEDIcLaGhSFsTslG......ssuluGElSYRsstPl.tlsssplh...........huuhssts...................................shss...Gt.hp.......G...Ytct-shQhploslphhst..........shGAspholluEluhsalhul...ststhphs.t.ssht..............................................t..tstspshsTpsuaGY+lthphsYsslas.GlsLsPslsauHDlpG..hoPs...sssF.sEGp+ululGlshsYp...spapsslsYssFaG.............Gshs......................shsDRDaluholshoF ............................................................................pFshG..-...lpupaDoolohGsuhRspstspsll................................ststs.stssDDGslNFc+..G-shSphhKulp-LcLpatsh.......GhFlRGphaYDhthpDpspp.....................................hhs...s.ss.ppct.hpupsscLLDAFVatsasl..GstP.hslRlGcQVlsWGEShF...hts...G...I.suhNPlDls...thptPG.uEl.KEsl.................lPlshl.huhsLT-sLSlE...uaYQ..hcWcpohlsssGTaFSssD.....hh.u.cGspthhh................................................hss..tt......................................shhh.c..us-p.cscc..u..GQaGlsl+ahsptL..-TEaGhYahNYHu+..tPh...luspsushsshs..s..................t................................................................spahhpY.PED.I+LaGhSFsT..s..lu......ss.uluGElSYR.stPl.tlssspll...............hushssts...........t........................ss.s.........GtthpGahRt-shQhphohh.phas...........shGA-pholluEluhs+ltsl...shsphphs..tsss.ht........s..s........................................shsp..tspssshsTpsuaGYclthphsYssVas.GlslpPslsau+DlpG.hoPs.....h.ssF...tEGpKulolGlshsYp...spapsslsYssFaG.......uphs...................................shsDRDaluhohphsF................................ 0 33 60 105 +6811 PF06983 3-dmu-9_3-mt 3-demethylubiquinone-9 3-methyltransferase Vella Briffa B anon Pfam-B_6583 (release 10.0) Family This family represents a conserved region approximately 100 residues long within a number of bacterial and archaeal 3-demethylubiquinone-9 3-methyltransferases (EC:2.1.1.64). Note that some family members contain more than one copy of this region, and that many members are hypothetical proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.46 0.71 -3.75 25 2984 2012-10-02 15:00:03 2003-08-18 16:55:30 8 7 1713 10 686 1992 166 128.00 27 87.56 CHANGED sKIossLWF...D.spAEEAspFYsSlFsNScltslschspst.su.....tGsVhsspFslsGpsFhuLNGG..PpFpFscAlSFhlsCcsQcElD+hWstLstsGu........ppu.tCGWl+D+aGlSWQll ..........................................................sPaLhF....s....up...ucEAhsaYpp.....s.F.............s........s........u.....p..l.....h.....p.h.......p........p....h......s....p......s.....t.....st................ps.....p.....l...h.....au.....p......h......p.l..s.G.p...s...h..h....s............D.......u..s............s.s...h.....p.......h..........s.......s....u.......h.....S.......l....h...l..s..............s.........c..........s..p.....p.....E......s....c......c...h...aspLsssGpl.h.............pthhts.taG.hlpD+FGlsW.l.......................... 0 219 460 593 +6812 PF06984 MRP-L47 Mitochondrial 39-S ribosomal protein L47 (MRP-L47) Vella Briffa B anon Pfam-B_6890 (release 10.0) Family This family represents the N-terminal region (approximately 8 residues) of the eukaryotic mitochondrial 39-S ribosomal protein L47 (MRP-L47). Mitochondrial ribosomal proteins (MRPs) are the counterparts of the cytoplasmic ribosomal proteins, in that they fulfil similar functions in protein biosynthesis. However, they are distinct in number, features and primary structure [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.02 0.72 -4.08 8 327 2012-10-02 11:59:50 2003-08-18 17:11:16 8 5 288 0 229 380 11 87.70 41 34.85 CHANGED LhEFF-s.....c+sh..sEpc.s.KsG...+uWssppLRpKShsDLHpLWYshLKE+NMLhThcpchpcpphs...hPuPERlcKV+pSMcslcpVlpER .....................................L.pFFss.............pps.h....s.p..p.c...sp.tG.......RuWss.pELRtKSh-DLHpLWaVhLKE.RNhLhTh...cpctp...+ppht..............hss.s-.R.h....c.+...Vc.........poMppIcpVlpER...................... 0 77 126 190 +6813 PF06985 HET Heterokaryon incompatibility protein (HET) Vella Briffa B anon Pfam-B_8200 (release 10.0) Family This family represents a conserved region approximately 150 residues long within various heterokaryon incompatibility proteins that seem to be restricted to ascomycete fungi. Genetic differences in specific het genes prevent a viable heterokaryotic fungal cell from being formed by the fusion of filaments from two different wild-type strains [1]. Many family members also contain the Pfam:PF00400 repeat and the Pfam:PF05729 domain. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.32 0.71 -4.02 164 2623 2009-09-13 05:28:11 2003-08-19 10:54:52 6 149 90 0 2311 2726 2 139.30 21 21.73 CHANGED YtsLSYsW....Gsst.........................tttl......................hplsps.lpsAlphs.+.p.........h.....uh.cY.....lWlDslCIsQ....s.ss......-hspplspMtplYppAthslshhusss.............................pt.....................h.shp.h.....pthtt.tht.............................................................................t..hhpp....ltp......s.WasRuWslQE .....................................................................YhsLSasW.........Gssp................................................................................p...t.h...h..t...........................h.t.l...s...p....s.......l.p.s.Alphh...+.p...............h..............sh..ca..............................lWlDs......l...C.I....s.Q................s....st.............t.......-.........hs.ppls.t...M.tp..lYpp......Athsls.hhu..sss................................................................s..tt...................hh.p...............t..............................................................................................................................................t............h..p.......t..h..h....pRhWshQE................................................................................................................. 1 357 1215 1922 +6814 PF06986 TraN Type-1V conjugative transfer system mating pair stabilisation Vella Briffa B anon Pfam-B_8400 (release 10.0) Family TraN is a large cysteine-rich outer membrane protein involved in the mating-pair stabilisation (adhesin) component of the F-type conjugative plasmid transfer system. TraN is believed to interact with the core type IV secretion system apparatus through the TraV protein [1][2][3]. 21.60 17.10 23.00 17.90 20.30 16.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.24 0.70 -5.02 17 647 2009-01-15 18:05:59 2003-08-19 13:20:02 6 8 342 0 93 486 42 199.50 26 41.21 CHANGED stCphspc.pCh...Essts+h..lsGlsl...........sh..sCWctptsYpCss...............................ss..TCstlpsp.......sCphspppCh.p...pptuhChptphsapC.pps..ssss...phhCusssaClsGsChpsppctss..-FscusutLuAlspAsp-hsss..........shplFpGputpCp+thhG.hssCCss.pGaG......slu...............ltp...CsstEcsLuptcppshsh.lGoYC..ucK..llusChp+KcsYCsFpSKLu+llQpQG+p.QLGhuaGosKpssCpu............lTs .........................................................................h.................................h...............................th.....Ch.t..p..p..h.s.......................................s..sCt.h.tp.......tCt..h..p.ppCh.................tttu.Chh...hYp...Ct.p..............tt........p......t.....s.ss..t..t.....t.............pt..thtp.hs.hthh.t.ht...thts.................h.hFtGp...........C..p.h.....h..h.s....h........sCCpp..tsh.s.........t........................................................................lht.....CsppEhtlsttc.tt....ph.sh.lGpaC.sp+.....hlu...sCl.pc..+cs.aCpFpSpLupIlppps+......Q....L....t....h.....s....aGss.p....ps....pCpuho.s................................................................................. 0 21 47 72 +6816 PF06988 NifT NifT/FixU protein Moxon SJ anon Pfam-B_5696 (release 10.0) Family This family consists of several NifT and FixU bacterial proteins. The function of NifT is unknown although it is thought that the protein may be involved in biosynthesis of the FeMo cofactor of nitrogenase although perturbation of nifT expression in K. pneumoniae has only a limited effect on nitrogen fixation [1]. 20.80 20.80 20.90 21.90 19.80 17.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.03 0.72 -4.50 36 178 2009-01-15 18:05:59 2003-08-19 13:33:27 6 3 167 1 86 157 12 63.60 47 82.34 CHANGED M.KVMlRcssp.ssLssYVPKKDLEEslVsh-..pschWGGhlsLuNGW..pLtlPshsssspLPlTV-A M.KVMlR+sss..sLosYVPKKDLEEslVp.hE..psshWGGhlpLuNGW..pLtlPphsscspLPlTl-A... 1 22 52 68 +6817 PF06989 BAALC_N BAALC N-terminus Moxon SJ anon Pfam-B_5793 (release 10.0) Family This family represents the N-terminal region of the mammalian BAALC proteins.\ BAALC (brain and acute leukaemia, cytoplasmic), that is highly conserved among mammals but evidently absent from lower organisms. Two isoforms are specifically expressed in neuroectoderm-derived tissues, but not in tumours or cancer cell lines of non-neural tissue origin. It has been shown that blasts from a subset of patients with acute leukaemia greatly overexpress eight different BAALC transcripts, resulting in five protein isoforms. Among patients with acute myeloid leukaemia, those overexpressing BAALC show distinctly poor prognosis, pointing to a key role of the BAALC products in leukaemia. It has been suggested that BAALC is a gene implicated in both neuroectodermal and hematopoietic cell functions [1]. 25.00 25.00 51.00 51.00 20.00 19.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.52 0.72 -3.64 2 32 2009-01-15 18:05:59 2003-08-19 13:40:07 7 1 21 0 15 28 0 51.80 76 40.27 CHANGED MGCGGSRADAIEPRYYESWTRETESTWLTYTDSDA.PSsAAsDSGPEAGGLpA MGCGGSRADA.IEPRYYESWTRETESTWLThTDSDuP..PSu.A..As.........DSGsEAGG................. 0 1 3 7 +6818 PF06990 Gal-3-0_sulfotr Galactose-3-O-sulfotransferase Moxon SJ anon Pfam-B_6301 (release 10.0) Family This family consists of several mammalian galactose-3-O-sulfotransferase proteins. Gal-3-O-sulfotransferase is thought to play a critical role in 3'-sulfation of N-acetyllactosamine in both O- and N-glycans [1]. 20.20 20.20 20.20 20.30 20.10 20.00 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.61 0.70 -5.81 3 598 2012-10-05 12:31:09 2003-08-19 13:49:40 6 17 61 0 434 689 22 279.80 24 77.98 CHANGED MhPhLttLQ+ts+hhht+tLlLslLhhooFLLLlaphA..shHucLt.sTP.FuuSCsP.t..............EuPPRsN...IsFLKTHKTASSTVLNILFRFAE+HNLoVALP.sGsRscFsYPphFuA+aVc.....ussupp.FNIlCNHLRFchsEVc+LMPssTlYlTILR-PAthFESSFpYYssYsPAFR+lPuus.L-AFLpuP-sYYcsucH.pAhYA+NsLaFDLGhDN-hsPps-puYVpu+ItEVERRF+LVLIAEYFDESLVLLRRLLsW-LDDVLYFKLNARuASsVuRLSuEstu...RARoWNALDu+LY-HFNATFWR+V.AchGRcRlcREVccLRcARcRLtolClsDGsAL+sAAQIRDcpLQPWQPSGKsDILGYNL+sGluspssplCpRLlMPElQYhscLhApQ .............................................................................................................................................................................................................................................................................................................................................................................s....h..pp.........lhFl..KoH.KouooT..l..sl....l.h.R..............au..p..cpL.ph..s.........hP.....t.......t.........t..h....t.a.......P............h..t..t.p....h...l..........................t.............as...l....h.s.p......H..h..........c.......a..p..........h...t..t...l.t.pl.h....s.s..s....s...h...........a.hoIl....Rc.Ph.t........hcS.F..t..a..a.........t............................................h...........h....h............t........h..................s.......................ltt....F..l..p.....p..P.............t....a....h......p..............t......t.....................t...........h.........h........h...+......N.......h.....h....a....D.h.G...............................................t........t.............t................h.p......t...h.lt.tlp.p....tFpLVhlhEaa-ESl..lLL+chhs...W......phcDll........h...h....t.......h....s...................p....t............t...p..............................t.......h..s........t...t..................ph..p...tas.h...Dhh...LYpaFNt.........ohWp+l...tt......G...t......h.tpht....hpth.tt..h..t.C....................................................................................................................................................................... 0 290 302 342 +6819 PF06991 Prp19_bind MFAP1_C; Splicing factor, Prp19-binding domain Vella Briffa B anon Pfam-B_8343 (release 10.0) Family This family represents the C-terminus (approximately 300 residues) of proteins that are involved as binding partners for Prp19 as part of the nuclear pore complex.\ The family in Drosophila is necessary for pre-mRNA splicing, and the human protein has been found in purifications of the spliceosome. In the past this family was thought, erroneously, to be associated with microfibrillin. 20.90 20.90 21.50 21.30 19.90 19.90 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.94 0.70 -5.12 5 330 2009-01-15 18:05:59 2003-08-19 13:57:47 6 8 269 \N 245 315 2 231.40 35 59.81 CHANGED EtElhptE-EccupEEEEE.ESEpEE-TDSEDDME.....PRLKPlFlRK+DRs...TlQE.+E+EttKpctL.EtEAKRtAEERK+pTtKIVEEslKKELEtcKs......p.cAsls..sV-TDDE.N-ppEYEAWKLRELKRlKRDREEREshEREKtEIE+hRNMTEEERRspLRpNPKVlTNKssKGKYKFLQKYYHRGAFFh.DE--E........VhKRDFSpATtED+FDKTILPKVMQVKN..FGRuGRTKYTHLV-EDTTcasSsWsusss.NpKF...s++AGGhRssF-+PsuKKR ..........................................................tct.p...p..tp..p.p.-p...psp..E..p.......o..-sE--...................hl.+P.lFl.pKppR.............o.htp...ppt.t.........c..p...pp.h...c.t..p....t....c...c.h....h.cp..R..+...p.s.p.....llc-ph.c+-httppt................t.........ss........hp......sss.T.Ds....ssc........t.........EYpAWKlRELcR...l.KR-R-..t.......h.-..th-+E+tElERhRshT-..EERpt.......-.......c..t..t..s.c..............p..p.c...t.....KG...Kh.....tFh.QKYaH+..GAFa...Dp.tc.p..........l.h+R..D...hs.s.s.p.h.-Dp.h.s+phLPchhQV+s...hG+pGR.T.KYpcLhspDTs.....pa..s......s.....h........................................................................................ 1 81 132 197 +6820 PF06992 Phage_lambda_P Replication protein P Moxon SJ anon Pfam-B_6611 (release 10.0) Family This family consists of several Bacteriophage lambda replication protein P like proteins. The bacteriophage lambda P protein promoters replication of the phage chromosome by recruiting a key component of the cellular replication machinery to the viral origin. Specifically, P protein delivers one or more molecules of Escherichia coli DnaB helicase to a nucleoprotein structure formed by the lambda O initiator at the lambda replication origin [1]. 21.10 21.10 21.10 21.50 20.60 19.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.42 0.70 -5.08 2 541 2009-01-15 18:05:59 2003-08-19 13:59:08 6 2 340 0 27 376 18 200.50 54 94.62 CHANGED MKNIAAQMVNFDREQMRRIsNNMPEQYDEKPQVQQVAQIINGVFSQLLATFPASLANRDQNElNEIRRQWVLAFRENGIToMEQVNAGMRVARRQNRPFLPSPGQFVAWCREEASVhAGLPNVSELVDMVYEYCRKRGLYPDAESYPWKSNAHYWLVTNLYQNMRANALTDAELRRKAADELspMTARINRGEsIPEPVKQLPVMGGRPLNRsQALAKIAEIKAKFGLKGASV ....................................MKNIAAQMlNFDpEQMRRIANshPEQY.D-.+s...QV..p.Q.V....A..p.IINuVFSQLhAsFPA.S.l..AN..R-.QsE.l.NE..l..RRQWVLAFpENGITTMEQVsAGMRVA..RRQ..sRPFLPSPGQFVAWC+E-Au.V.hsGLPsluELlDMlaE...YCR+RG...L...Y.P.Du.E.sYPW..........+S.NA.aYWLVTNLYp.sMRAsuLTDuELRR+AuDELspMss.RIsRG...EsI.P.EPV.KQLP.shu.s.RPl..spsp.AL.u+IAEl+AKhGLKGu......................................................................................................... 0 3 8 15 +6821 PF06993 DUF1304 Protein of unknown function (DUF1304) Moxon SJ anon Pfam-B_7246 (release 10.0) Domain This family consists of several hypothetical bacterial proteins of around 120 residues in length. The function of this family is unknown. 23.80 23.80 25.20 25.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.63 0.71 -4.36 73 1298 2012-10-01 21:33:42 2003-08-19 14:01:57 7 7 1246 0 231 804 18 108.50 39 89.68 CHANGED ssLVAl.hHlYIhhLEhFhaspstst+.sFshs..-hhptsps..LutNQGlYNGFLAsGLlhGlhhsss......shtlhhhhLhsVllAulaGuhTu.s+.+I........hhlQulPAllALlhlhl ..................hlVAl.EHhYIhhLEhhthsS..ct.ss+.sF.shsh.-phpppslp.LhtNQGlYNGhLulhL...lau...l..ahups..........hplshhhllhllsAAlYGul.Tu..s+..pI..........lhpQGh.sAlLALlull................................ 0 65 139 189 +6822 PF06994 Involucrin2 Involucrin Vella Briffa B anon Pfam-B_8443 (release 10.0) Family This family represents a conserved region approximately 60 residues long, multiple copies of which are found within eukaryotic involucrin, and which is rich in glutamine and glutamic acid residues. Involucrin forms part of the insoluble cornified cell envelope (a specialised protective barrier) of stratified squamous epithelia [1]. Members of this family seem to be restricted to mammals. 20.90 20.90 25.30 20.90 19.00 20.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.13 0.72 -4.00 38 508 2009-01-15 18:05:59 2003-08-19 14:22:41 6 23 16 0 38 498 0 41.10 53 104.60 CHANGED Q-..spE.ELHLG+QQp.......csQE.....E..LaLGc..p.pQcspE.ELH..LGc ....................QE..s.EPELpLGKQQp......Qcs..E....PE..LpLGK..QQQQEspEPELpLsc..... 0 0 0 0 +6823 PF06995 Phage_P2_GpU Phage P2 GpU Moxon SJ anon Pfam-B_7670 (release 10.0) Family This family consists of several bacterial and phage proteins of around 130 residues in length which seem to be related to the bacteriophage P2 GpU protein (Swiss:O64315) which is thought to be involved in tail assembly [1]. 25.00 25.00 25.00 25.00 24.60 24.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.26 0.71 -4.72 55 1047 2009-01-15 18:05:59 2003-08-19 14:26:17 6 5 735 0 148 714 14 118.70 37 69.33 CHANGED G.FhFsl.pTssapphpRptsaRasspsRlGt+PuhQalGsup-pIoLsGllaPchpG........GthsLptL+thAspGpshsLlpG...sG.pl.hGhaVIpsls-TpohahssGssR+l-FolsLc+hs-s ..............................GhaVFtl...pT...lsa..QplppppsaRassssRlsc+..su.hQalGssp-pIsLuGlL..hP.El.s.G...........G.choLp.tLcphAc.pG.+u.asLl...-.....G.....sG...pI...aG..h..aVIpslspTp.opFhtsGts++I-FoLoLcRsD-................ 0 28 71 113 +6824 PF06996 DUF1305 Protein of unknown function (DUF1305) Moxon SJ anon Pfam-B_9388 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 300 residues in length. The function of this family is unknown although one member (Swiss:Q93IT4) from Salmonella enterica is thought to be involved in virulence [1]. 20.30 20.30 20.60 20.30 19.50 19.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.71 0.70 -5.39 121 1497 2009-09-11 01:13:09 2003-08-19 14:44:15 6 5 955 0 274 1048 48 297.70 28 87.63 CHANGED h...tpstpa...sFaphl+.ll-..............p..htspt....s......hG....psttstp-slRhpspsuLuFssu-l...ssl.....................tt..psss....hp..lpsshhGLhGssu..PLP.haaoEhl..hpR.............ppp-sshtsFlDlFpHRhhsLaYRuWpchphs.sshcp..ss.........s.......-tasphLhuLsG.......hu...t...........htt.............................................ptshs......sst.hLtauGhlsppsRospuLp....slLsphFs..ssVplcpahspWlplstsppspLG............tt...sspLGtsshlGpclhDtpu+hclpluPlshppatpaLP...........sG.p.....thtp.....LtpllchalGtphsa-lpLhlptcplssspL..Gsst......pLGhsuaLup ..........................................................................................t..pshpasFaphhc.hLc................p....htstp...............hu..tttpstp-tlRhpsssuluFsss-ltsl.................................................tp...pssst.........hclpstahGLhGspuPLP..ha.YhEhl..hp+.............tcpcsshtsFhDlFsHRhhshha+sWpchp.hh.hsacp....ss............p..................-tauthlhuLsG.......ls..tp......pt.....................................................ch.sls........ppt...hLuauuhLspt..sR..oscslp....slluphF......s...sslpl..ppahh+hlslssspp..spLG............tt...sshL.GtssllGpplhDhps+hplplssLshppahpFLP................sG.p........ph.s..Ltthlchals.phsa-lpL.tl.t..pp..ss.s.hpL..........Gssp................pLGaouaLG.......................... 0 44 112 192 +6826 PF06998 DUF1307 Protein of unknown function (DUF1307) Moxon SJ anon Pfam-B_10058 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. Some family members are described as putative lipoproteins but the function of the family is unknown. 21.70 21.70 21.70 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.56 0.71 -4.08 14 1049 2009-01-15 18:05:59 2003-08-19 15:49:10 6 3 820 1 25 350 1 121.10 40 78.69 CHANGED EocTY..pus.hsGh.clplTYoY.KG.DcVlKQoscsplpYssLGlspcEt.....A.....cchlps.scpYpslcGlp.clcYpDctshEplslDYpKsDhccl.ppLsGht.s...s.csuKsIShccotchLcppGaKc ...................................................................EoKpa...sus.ls.Gp.-ltlT..YsY..KG.DKVLKQoocspIp.Y..ssl.G.ssscEp........A...........tKhLc.s..l...us...p.Y.KsI..tG..VccKlsYp.Ds..h..A..E.plsIDhpKlDhcsL......ptluGh...phs.......s..cssK..sI....SMtph.ppllctsGaKE.......................... 0 7 9 18 +6827 PF06999 Suc_Fer-like Sucrase/ferredoxin-like Vella Briffa B anon Pfam-B_8856 (release 10.0) Family This family contains a number of bacterial and eukaryotic proteins approximately 400 residues long that resemble ferredoxin and appear to have sucrolytic activity [1]. 29.50 29.50 29.50 29.70 29.40 29.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.82 0.70 -4.34 28 603 2009-09-13 18:23:08 2003-08-19 16:01:51 7 12 371 0 360 589 30 219.30 22 64.87 CHANGED sLhGTupshppalllpp.........stsWsp..pt..tt.......slhpt......lsshlppptpshthth.lshpsscps................tspssp................................lllhsthlhht.....hcthpp-hltslLshshs.......................t...th..httspc..ps.hlhVCoHusRD+pCulhG.slhpchcpphsspsh.p...................................Vh.sSHlGGH+FAssllla..................spGhaaG+lsPcplssllcp....hscsphlhchhRGht .......................................................................................................................L.Gssts.htpal.lltp...........tsWsp....chhptp..........th.tt........................lt.t.h.hp.t..t.h.t...t.h..t.........t.h...hhh...psst.....................................................t.t.p...sp...............................................................................................................shlhsthhhh..h..........htt.h....p.h.h..p.t.hls..hsht.............................................................................t.............t.stt...psshlLlCoHup....R..D....t+..CGl.t.G.s..ltpphpcpltt....t....sh...........................................................................................lh.sSHlG...GH+aAuNllla................................................................................spGhaaG+lpP...c.s.sp.sllct..........hhpG.cl..l......hhRG.............................................. 0 98 225 312 +6828 PF07000 DUF1308 Protein of unknown function (DUF1308) Moxon SJ anon Pfam-B_13288 (release 10.0) Family This family consists of several hypothetical eukaryotic sequences of around 400 residues in length. The function of this family is unknown. 25.00 25.00 26.30 25.60 19.30 24.50 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.13 0.70 -5.81 24 258 2009-01-15 18:05:59 2003-08-19 16:33:44 6 6 206 0 175 260 1 339.80 24 77.84 CHANGED +LpsplpuEL+hLp+lpsthssh.....................................................h.pplpSoNLsaaculhpthcp.ppslsult+sF......................shpc.sts.........plhVDlVussGpp..WVKV.stsscpLhhphttps..............................p.sshsllc.ApshlpAupts.sta+p.Pclhhlh........................................hss..ssl.hcpl+phG.....lsl...............................hhtstststss.....h.......tsshtph.s........s..htshssplNlDsTsLlAhVSslSa.sssph.............hpc+hltcQhchE.+pcPlLsp.L.sthcu+cL.........................................................................................................................lsscpAscchp-Il-TlGGssE+pRuppLhtpl.hl.s.....................................Pst...phppLht.ttlphpslplFuhssthchshhTsNtth.lcuApppGhhhsVhp+ss+sLo .......................................................................................................................................h.p.lttEhp.Lpph.t....................................................tth.oo...Nhsahpslhp....hpp...........pslsul.p.F................................................thtt..............tlhVDlVs..s..sG..tp..WlKshshps.ctLh...hhtpu............................................................p.s..phs.llcpApchlpAup........tp....h..tapp.....P+..lhhhh...................................................................................................ssh.s.h..hcplc.phG........lp.....................................................................t.ptst.....................t.s..p..t..h..t...s....................p...hsphspp...l..NLDhTsLlshVSsloa.sssph....................................................hppchlp..cQh.....p..E........pp.....p..............lhsp.L.s..hhp....s..+.pL.........................................................................................................................hssppAhpphppIlphlGsssE+pRsp.Lht.p.......s..................................................................ph.pl........p.p....ph..a.shs.thph.hhss....s...t....th....lpts..p..uh.....hh...s................................................................................................................................................ 0 49 91 138 +6829 PF07001 BAT2_N BAT2 N-terminus Vella Briffa B anon Pfam-B_9101 (release 10.0) Family This family represents the N-terminus (approximately 200 residues) of the proline-rich protein BAT2. BAT2 is similar to other proteins with large proline-rich domains, such as some nuclear proteins, collagens, elastin, and synapsin [1]. 25.00 25.00 25.20 25.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.38 0.71 -3.95 10 243 2009-01-15 18:05:59 2003-08-19 16:36:34 6 1 87 \N 112 219 0 174.00 54 10.12 CHANGED MS-+uGQssKuKDGK.KYuoLSLFcTY...KGKS.LEsQK..ssVssRHGLQSLGK.VuluRRMPPPANLPSLKAENKGNDPNVslVPKDGTGWASKQ-Qs-scsoss.sspsPEopPsss.ttsss.....pP.ss..sss.ls.uus+SWA.sSsppGupG-Gspt..plssap+cEFPoLQ.AAG-QDKusK-psssDpu ...........................................................................MS-+uGpssKuKDGK.KYuoLsLFspY....KGKS..lE..s..QK..ssVss.R.H.GLQSLGK.....VuhuRR.MPPPANLPSLKAE..NKGND.PNVsl.VPK..D.G.T.GW..AoKQ..-...Qp...-s....c..s...s.....s....s...t..ss...psPEs.P.s..t.ttssss.................ps.sust..s.sspVs..susKS.....WA...ssp...tu...up...s.s...G..h..t...lspa......ppEFPoLp.....AAG-Q-KssKEpts....s...................................................... 0 25 38 65 +6830 PF07002 Copine Copine Vella Briffa B anon Pfam-B_9705 (release 10.0) Family This family represents a conserved region approximately 180 residues long within eukaryotic copines. Copines are Ca(2+)-dependent phospholipid-binding proteins that are thought to be involved in membrane-trafficking, and may also be involved in cell division and growth [1]. 20.70 20.70 20.70 21.40 20.60 20.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.90 0.71 -4.45 18 1157 2012-10-10 16:07:06 2003-08-19 16:56:31 11 19 130 0 676 958 9 139.10 46 27.26 CHANGED SLHaIusp.psNsYppAlptlGpslpsYDsDchhP...AaGFGu..phs..-hsVoHs...Fslshssps.spCsGlpGllssY+pslPplpLhGPTsFuPIIstsschApt....psuupYaVLLIloDG..lT............shppTh-AIVpAScLPLSIllVGVG ..........................................SLHalsPh...t....NtY......pAlhuVG.pllQcY..Do...D.....Kh......FP.................A.aGF..GA....pl..Ps...c.h.........p.V.SHp..............Fsl..............Nh...s...sp.s.....s...C.p..G.l......pG.l...l.puYppsLs.p....lpL......h........GP.T........s.......Fu.PlIscsAch.A......t................p...s...u......s.....p....Y.al.................LLIlTDGs...lo...........................Dh...p..pT.pcAIVp....AS.p..L.PhSIIIVGVG............................................................... 0 224 337 496 +6832 PF07004 SHIPPO-rpt DUF1309; Sperm-tail PG-rich repeat Vella Briffa B anon Pfam-B_8734 (release 10.0) Repeat This family represents a short conserved region carrying a PGP motif that is repeated in eukaryotic proteins of sperm-tails. Shippo orthologues from some species may include up to 40 Pro-Gly-Pro repeats. 20.00 3.00 20.00 3.50 19.90 -999999.99 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.70 0.73 -8.34 0.73 -2.80 323 5278 2009-01-15 18:05:59 2003-08-19 17:13:05 7 51 129 0 3778 5557 23 31.00 24 51.20 CHANGED sPGPG..s.Y...sspp...................................ttssh...shus+pptt ......................................................sPGPG....s.Y...........sspp....................................................................................t.sth......sht.t....h.................................................................... 2 2124 2616 3195 +6833 PF07005 DUF1537 Hop; PF07005; Protein of unknown function, DUF1537 Vella Briffa B anon Pfam-B_8609 (release 10.0) Family This conserved region is found in proteins of unknown function in a range of Proteobacteria as well as the Gram-positive Oceanobacillus iheyensis. 20.90 20.90 20.90 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.29 0.70 -4.67 131 1723 2009-01-15 18:05:59 2003-08-20 09:55:19 6 19 1153 4 415 1360 256 211.80 26 49.09 CHANGED Qoptp.....luhlshssl....tG..st..slpsplsphtspuh.phslsDAhsspDLttlupAstphs..........LhsGuuG..luhuLst.....thtttshhst.tst.s.......................................ss...sllluGSsSssTppQlsth...psshss....lplDsttlh.....t.......ts.h.....ppshshstsphsps...ss.llassssspsspsspt.phthtttu.h............lppsLuplstpl.h...........pt.u..lppllluGG-TSusVsps.Lulsulclss .......................................................Qst..ss.lshpsl......tpG..st....slpttLspl....p.....pp..Gh..phsVlDAls-pcLphhu.pAltp.ts..............LlsGuuG..luhuL.up.....p.hsptstttt...t..............................................sstsllluG...SsSphTppQlsth....c..p..t..s..ss...........hplD..stphl.......................p...htshh.......ppl..hphshsthspt....ts...llhso..t..s..s..p....s.htslpp.....th.t.....t.....p..thupt...........................lpphhuplstpl.h...........tp.s......lst.h.llsGG-TSu.sVsps.LGlpuhclt....................................... 1 108 236 339 +6834 PF07006 DUF1310 Protein of unknown function (DUF1310) Moxon SJ anon Pfam-B_10153 (release 10.0) Family This family consists of several hypothetical proteins of around 125 residues in length. Members of this family seem to be specific to Listeria and Streptococcus species. The function of this family is unknown. 30.10 30.10 30.20 30.30 29.50 30.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.40 0.71 -4.60 18 373 2009-01-15 18:05:59 2003-08-20 10:41:32 6 2 106 0 22 303 0 116.40 36 85.44 CHANGED K..+WllllhhhLlslhhlGuthYhcccpc....pc..EMhpllpS-EAKcVaEctLKslDscAhTccGlI+oYcIDcpSIcpNPMGGI.VcLlINcDscLhlpasLp+ss.pGcLcuuuhshSscLscLL .....................hlhhhhlslhsh.h.hl.G.st.hhhs.p...cp........pp...EMhpllpScEsK.plhEctL+plDscAhTc...c...GhIpSYcIDcpSlcpNPMG.G.I.lpllIN.sD.clhlphslp+.....ps.....Gc.lpssu..hs..hStcLscLL.................................. 0 5 6 17 +6835 PF07007 DUF1311 Protein of unknown function (DUF1311) Moxon SJ anon Pfam-B_10506 (release 10.0) Family This family consists of several bacterial proteins of around 120 residues in length. Members of this family contain four highly conserved cysteine residues. The function of this family is unknown. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.54 0.72 -3.89 182 2203 2009-09-11 12:01:10 2003-08-20 10:44:19 7 31 1231 2 507 1711 82 90.10 22 47.59 CHANGED sC....psst..sssphth.Cs..stp..hpthDpcLspsY...pphhpphs.............tttt........ptLc...p....uQ+sWlph.R-tpCthttt..........t...........tsCh..hphsppRhtpL ..........................................................C....ttst..st.hphhh..Ct...s.tp..hpthDtcLspsY...pphhpphp............t.tp....................stLp.p......uQ..psWlph..R-t.pCt.h...t.............................hsChhphsppRhttL............................................ 0 105 233 375 +6837 PF07009 DUF1312 Protein of unknown function (DUF1312) Moxon SJ anon Pfam-B_10829 (release 10.0) Family This family consists of several bacterial proteins of around 120 residues in length. The function of this family is unknown. 29.60 29.60 31.60 30.10 29.30 28.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.37 0.71 -4.26 40 812 2009-01-15 18:05:59 2003-08-20 10:52:32 6 4 615 16 142 554 21 112.80 29 80.84 CHANGED ll.llllllluhhslhhh.............t.psspsthAlIpl-G+lhpplsLsc.ptspphpl....psspthNllclcsspl+hp-us...CPDplCV+pGhIs+sGps.IlCLPp+llIcIp...uspp.pp ............................................................................lIhhlllhu.hhslhhh......................tttppsssthAl.l.pl.....-G..c..hc.phsLsp..t..ppphpl...................sspsthN.hl-.l.cssclRlp-us...CPDplCV+pGaIs+.sG.......po..IlCLPc+lllclcuspt..s.............. 0 74 124 133 +6838 PF07010 Endomucin Endomucin Moxon SJ anon Pfam-B_10834 (release 10.0) Family This family consists of several mammalian endomucin proteins. Endomucin is an early endothelial-specific antigen that is also expressed on putative hematopoietic progenitor cells. 20.90 20.90 20.90 23.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.73 0.70 -5.05 3 39 2009-01-15 18:05:59 2003-08-20 11:05:11 7 2 26 0 17 52 0 160.40 53 87.87 CHANGED MRLLQVTlLFFLLSNSLC+SEsuK-stNsSLs...ETSTTKASlTTPshVSl.pNsNKPTsGTPPKGTTsS-lsKTSLMoTlsSLTTPKHEltTTTcGVlKNESSThKlTVsNsTlSNAVSTLsSSQNKTENQSSIRTTEI.ssTslL.sDApPKpTuT.oSASLTTA+ThSQlQ..DTEDGKIAoToSTTPSYSSIILPVVIALIVITLLVFTLVGLYRICWKRDPGTPENGNDQPQSDKESVKLLTVKTISHESGEHSAQGKTKN .........................................................................................................................................................................................................s.....................................s.s...........t.tp.sss.t.s......ShSu.II.LPVVIALIVITLSVFlLVGLYRhCWKsDPG.s.EsGs.-QPQSDKESVKLLTVKTISHEoG....................... 0 1 1 4 +6839 PF07011 DUF1313 Protein of unknown function (DUF1313) Moxon SJ anon Pfam-B_10989 (release 10.0) Family This family consists of several hypothetical plant proteins of around 100 residues in length. The function of this family is unknown. 20.40 20.40 20.50 60.40 20.30 18.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.98 0.72 -4.32 12 107 2009-01-15 18:05:59 2003-08-20 11:07:00 6 2 42 0 55 103 1 87.10 52 69.78 CHANGED stutDuclhpshppSFsQVQulLDQNRhLIpclNpNHpSRhsDsLsRNVuLIRELNsNIp+VlslYuDLSssFupsh..ctupp....ussosss ...........s.tsDsKlhpsFp+SFsQVQslLDQNRlLIsEINQNHES+hPDNLoRNVuLIRELNsNIpRVVsLYuDLSsoFs.+oh.csssp....upss..s................ 0 9 37 47 +6840 PF07012 Curlin_rpt Curlin associated repeat Moxon SJ anon Pfam-B_10299 (release 10.0) Repeat This family consists of several bacterial repeats of around 30 residues in length. These repeats are often found in multiple copies in the curlin proteins CsgA and CsgB. Curli fibres are thin aggregative surface fibres, connected with adhesion, which bind laminin, fibronectin, plasminogen, human contact phase proteins, and major histocompatibility complex (MHC) class I molecules. Curli fibres are coded for by the csg gene cluster, which is comprised of two divergently transcribed operons. One operon encodes the csgB, csgA, and csgC genes, while the other encodes csgD, csgE, csgF, and csgG. The assembly of the fibres is unique and involves extracellular self-assembly of the curlin subunit (CsgA), dependent on a specific nucleator protein (CsgB). CsgD is a transcriptional activator essential for expression of the two curli fibre operons, and CsgG is an outer membrane lipoprotein involved in extracellular stabilisation of CsgA and CsgB [1]. 20.70 20.70 21.10 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.46 0.72 -4.23 22 2923 2009-01-15 18:05:59 2003-08-20 11:21:28 7 10 529 0 240 818 103 32.30 40 50.36 CHANGED GssNssslp.QtGssN......tuhlsQh.GssNpsplsQtG ..............sssp.hplp.Q.t.GssN............sAh.ls..Qp..Gs.ss.cssloQhG.......... 0 40 67 153 +6841 PF07013 DUF1314 Protein of unknown function (DUF1314) Moxon SJ anon Pfam-B_10999 (release 10.0) Family This family consists of several Alphaherpesvirus proteins of around 200 residues in length. The function of this family is unknown. 25.00 25.00 90.20 89.90 22.40 16.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.14 0.71 -4.88 5 24 2009-01-15 18:05:59 2003-08-20 11:24:38 6 1 18 0 0 20 0 170.40 41 75.45 CHANGED c+sLLcuLuGRslDLPGGG-.LtIssssGps...hs+FspuGouRsshl+aIGRAhTsGssRcFlIatu+-..GsVYGYEsuTGLHhLAcSLHDFLsp+GLSQRDLhVlcGshhcuclssLsh..phh+soS-ssplssp.oschstsTsssuctspSoo+Rs...oLuslss....shpsp+lls+GS scsLLcsLuGRslDLPGGs-.ltIssssGps...ht+appsGsp+hshs+hIGRAhs.GssRcFllhhs+s..uslaGYEsuTGLHhLApSLH-FLpppGLSpRDLhlhcushhshphpslsh....hpsoops.plshp.sss..hsTtsssptppooocRs...sLsslss.....hpspphls+GS............................. 0 0 0 0 +6842 PF07014 Hs1pro-1_C Hs1pro-1; Hs1pro-1 protein C-terminus Moxon SJ, Vella Briffa B anon Pfam-B_11205 (release 10.0) Family This family represents the C-terminus (approximately 270 residues) of a number of plant Hs1pro-1 proteins, which are believed to confer nematode resistance [1]. 25.00 25.00 68.20 63.10 23.00 22.60 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.88 0.70 -5.06 5 41 2009-09-11 08:44:19 2003-08-20 12:04:16 7 3 20 0 24 44 8 248.10 56 59.96 CHANGED MpRCsYTLGLGEPNLAGKPsLcYDAVCRPsELHALK.......csPYsD+I-NpENQsLaTIHQILESWIauSspLLsRIssRIEcc+FEKAAsDCYlLERIWKLLAEIEDLHLLMDP-DFL+LKcQLpIKSou.cs-AFCFRSKGLVEVsKMSKDLR+KVPsVLuVEVDPsGGPRlQEAAMKLY..cRKoEaE...KIHLLQAMQAVEuAsKRFFFGY+QLVAAMMGSAEANANRslh..SsESsDSLoQlFLEPTYFPSLDAAKTFLG-FWs+c ......MptCsaTLGLGEPNLAGKPsLcYDtVC+PpElHuLK.......posa..hh..c.NpENcsLaThHQIlESWlpsuppLLpRlscpIcs+saEtAApDCallERlWKLLs-lEDLHLlMDPDDFLRLKpQL........ul....+......oss..p...s........u.....uFCFRS+tLlcls+hs+DL+++VPplLuVEVDPpGGPRlQEAAM+LY........pc+sthE...............KlaLLQAhQAlEsAh+pFFauY+QllssVMGouEupustshh..ss-uhDsLoQhFLEPsYFPSLDAAKTFLupaWsp.p............... 0 8 17 20 +6843 PF07015 VirC1 VirC1 protein Moxon SJ anon Pfam-B_11309 (release 10.0) Family This family consists of several bacterial VirC1 proteins. In Agrobacterium tumefaciens, a cis-active 24-base-pair sequence adjacent to the right border of the T-DNA, called overdrive, stimulates tumour formation by increasing the level of T-DNA processing. It is thought that the virC operon which enhances T-DNA processing probably does so because the VirC1 protein interacts with overdrive. It has now been shown that the virC1 gene product binds to overdrive but not to the right border of T-DNA [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.35 0.70 -5.28 4 74 2012-10-05 12:31:09 2003-08-20 12:08:44 6 2 60 0 21 1997 816 212.80 28 93.83 CHANGED MpLlThCSFKGGAGKTTALMGLCuuLAucG+RVALFEADENRPLo+W+ENAhppusWDstCElasADELPLLEsAYEpAEspGFDYALADT+GGuSELNNTIIASSsLLLIPTMLTPLDlDEALuTYRYllELL.luEsLsIPsAILRQRVPssRLToSQphh.-MLEpLPlhDsPMaERDAFAAMK-RGMLHlsltNhupsPoMRLhhRNlctAh--lshluchlpcsLEu ........................M.llshsS.KGGuGKTT..u.h..hhLu..s.tl....A.....p.....p..G.....p....p..Vs.....llDAD....s..N.........p....s...l.s.p....W.....t..p..t....s.......t..t...s.......s...h......s......s........h...p......l....h..s..s..s...c......s...h..l...c..th....h...p....p...s...p..t..p.s.h...s.al.l.lDh...c.G....s..u...o...t..h..s.s...h...s..Iu.p..u.D.lllI...Psthoth..Dh...c....pAhp.sh.phlt..c.......h...psh..p..h...p....I...P..h....s.V..l.h...o..+..s...s....s....t....t..h.pp...t.p..c.h.h.p.-h.....h......p...p......l.....P.......l...h...p...s.......hh-.R....sAa..pu..h.hp.h.G..h.............................................................tt................................................................................................................................................... 0 7 17 19 +6844 PF07016 CRAM_rpt Cysteine-rich acidic integral membrane protein precursor Moxon SJ anon Pfam-B_11042 (release 10.0) Repeat This family consists of several 24 residue repeats from the Trypanosoma brucei cysteine-rich, acidic integral membrane protein precursor (CRAM). CRAM is concentrated in the flagellar pocket, an invagination of the cell surface of the trypanosome where endocytosis has been documented [1]. 19.10 19.10 27.40 19.60 16.00 15.40 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -7.89 0.72 -4.60 2 132 2009-01-15 18:05:59 2003-08-20 12:15:35 6 4 4 0 60 150 0 23.90 95 108.27 CHANGED .sDDCNITGDCNETDDCsITGDCN .ETDDCNITGDCNETDDCNITGDCN.... 0 2 60 60 +6845 PF07017 PagP Antimicrobial peptide resistance and lipid A acylation protein PagP Moxon SJ anon Pfam-B_11014 (release 10.0) Family This family consists of several bacterial antimicrobial peptide resistance and lipid A acylation (PagP) proteins. The bacterial outer membrane enzyme PagP transfers a palmitate chain from a phospholipid to lipid A. In a number of pathogenic Gram-negative bacteria, PagP confers resistance to certain cationic antimicrobial peptides produced during the host innate immune response. 21.90 21.90 22.90 24.40 21.40 18.60 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.96 0.71 -4.89 10 610 2009-09-10 16:46:51 2003-08-20 13:10:46 6 3 581 4 62 268 7 143.80 76 77.22 CHANGED pNVupTWsps.spDLYlPslTWHNRasYDcEKIDsYNE+PWGuGYGhSRYDEcGsWHGLYhMAFpDSHN+aEPIsGYGapKhWpPsp..pDa+hGlGaTAulTARc-a.sYIPIPslLPLASluY+plohQuTYIPG....TYNNGNVLFuWhR ..........................NlApTWpQPEHYDLYlPAITWHARFAYDKEKT..DRYNERPWGuGFGhSRWDEKGNWHGLY..AMAFKDSaNKWEPIuGYGWEpTWRPL.s.D.-NF+LGLGFTAGV..TAR....DN.W.NYIPlPVLLPLASlGYGPsTFQMT...YIPG....TYN.NGNVYFAWMR................................ 0 4 23 43 +6847 PF07019 Rab5ip Rab5-interacting protein (Rab5ip) Moxon SJ anon Pfam-B_11031 (release 10.0) Family This family consists of several Rab5-interacting protein (RIP5 or Rab5ip ) sequences. The ras-related GTPase rab5 is rate-limiting for homotypic early endosome fusion. Rab5ip represents a novel rab5 interacting protein that may function on endocytic vesicles as a receptor for rab5-GDP and participate in the activation of rab5 [1]. 21.40 21.40 21.60 21.50 21.10 21.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.67 0.72 -3.85 38 446 2009-01-15 18:05:59 2003-08-20 13:25:37 7 12 273 0 301 409 2 82.00 27 60.80 CHANGED pslhasRphh.........ullhGlsuGlLsLpGhh.Ghlhahlsshhhshhhhs..phtpss.........cpaatshpclh.hpGlhsulss..........Fl....lsWshh..........Y ............sl.ahRphh.........ullhGlsuGlLsLpGhh.GFlha..hlhshhlsh.lhhs...pht.p.s.................................cpha.t.u.hhplh.ppG..hhs..uhhs........................Fl....lhWhhhY............................................... 0 96 159 244 +6848 PF07020 Orthopox_C10L Orthopoxvirus C10L protein Moxon SJ anon Pfam-B_12732 (release 10.0) Family This family consists of several Orthopoxvirus C10L proteins. C10L viral protein can play an important role in vaccinia virus evasion of the host immune system. It may consist in the blockade of IL-1 receptors by the C10L protein, a homologue of the IL-1 Ra [1]. 25.00 25.00 34.80 32.80 22.10 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.31 0.72 -3.45 4 34 2009-01-15 18:05:59 2003-08-20 13:30:38 6 1 19 \N 0 28 0 62.90 80 98.48 CHANGED MSucGt....SpGGhapsFhps.sGusKts+ohoSGGG........uMWGG......................GuSuGlpuGVpGGVNGGVNuGsuKI ...MSSKGG.....SGGMWSVFIHGHDGSNKGSKTYTSGGG...........GMWGG.......................SSS..GVKSGVpGGVNGGVKSGTGKI.... 0 0 0 0 +6849 PF07021 MetW Methionine biosynthesis protein MetW Moxon SJ anon Pfam-B_11086 (release 10.0) Family This family consists of several bacterial and one archaeal methionine biosynthesis MetW proteins. Biosynthesis of methionine from homoserine in Pseudomonas putida takes place in three steps. The first step is the acylation of homoserine to yield an acyl-L-homoserine. This reaction is catalysed by the products of the metXW genes and is equivalent to the first step in enterobacteria, gram-positive bacteria and fungi, except that in these microorganisms the reaction is catalysed by a single polypeptide (the product of the metA gene in Escherichia coli and the met5 gene product in Neurospora crassa). In Pseudomonas putida, as in gram-positive bacteria and certain fungi, the second and third steps are a direct sulfhydrylation that converts the O-acyl-L-homoserine into homocysteine and further methylation to yield methionine. The latter reaction can be mediated by either of the two methionine synthetases present in the cells [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.11 0.71 -4.93 6 667 2012-10-10 17:06:42 2003-08-20 13:50:55 7 7 656 0 204 5525 4101 186.90 43 86.71 CHANGED RsDachItchI.PGSRVLDlGCGDGsLLpLLp-pKpVcGpGlElspsGVscClA+GLsVlQGDhDpsLspasD+uFDaVlLSQTLQATRNPcpVL-EhLRIG++sIVSFPNFGHW+sRhpLhs+GRMPVTcsLPYsWYsTPNIHFCTltDFhpLCc-lshpl-cpsAlcttut.h...hspah.NahGchulFhl .............................................................R.Dh.p..l..Itc.h..l......s...s...S...R...V...L.DL..G.C.G........D.G....p....L....L....p....h.......L.....t........c......p...........+.........p......s........p....G...........h....G.....l.........E....l..........D.........p........s.......s........l..........h.........s......s.........l......u..........+..........G.........l......s........V.............I....p..........t......D........L......-.........c.....G.............L.......s......p.................F..........s......D.....p......S...F...D...h.V.l..L...S....Q.......T...L....Q...A...l....c.....p...P....-.....t......l.....L.....c....E....h....h....R...l.......G....+.....p.....s..I.....V.....o.F.....P..N......F.....G...a........W...+.....s..R.h.....p....l..h..........p......G.+.M...P......V.....o..cp....L......P.Yp.WYs..T............P..N...I.+.......h...s.TlcDFE.tLstch.s..l..cl.l..c...R.hs.l.stpp..............hsphhPNLhuphAlahl.................................................................................................................................... 0 67 135 169 +6850 PF07022 Phage_CI_repr Bacteriophage CI repressor helix-turn-helix domain Moxon SJ, Bateman A anon Pfam-B_11145 (release 10.0) Domain This family consists of several phage CI repressor proteins and related bacterial sequences. The CI repressor is known to function as a transcriptional switch, determining whether transcription is lytic or lysogenic [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -9.03 0.72 -3.89 14 693 2012-10-04 14:01:12 2003-08-20 13:57:29 8 9 476 2 136 1419 82 63.90 28 33.13 CHANGED sslcRlhcAYGFpscppLusaLulu+SThu......................................................................................................shhtR-phP.u-hllpCuLETGsSLpWLssGcGphh .......................hlcRlhpsh...sh..p..s......p..p..pLuch.LGlupuTlu......................................................................sh.h.t..R...s..s..h..P...s-.h.ll..p.h.u.h...c..s..G.lslpW.LhsGpGt................................................... 0 33 82 113 +6851 PF07023 DUF1315 Protein of unknown function (DUF1315) Moxon SJ anon Pfam-B_11170 (release 10.0) Family This family consists of several bacterial proteins of around 90 residues in length. The function of this family is unknown. 25.00 25.00 32.80 32.80 20.30 19.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.27 0.72 -4.10 26 846 2012-10-03 10:59:06 2003-08-20 14:00:57 7 1 840 0 114 362 43 88.00 57 95.55 CHANGED h-pllpshTPElYppLppAVElGKWPDGpsLTsEQ+-sshQAVMhYps+Hshssccho....GplshKoKpphpt.......................pslthhp ....l.--lIsuMTPEVYQRLsTAVELGKWPDGVALTpEQKENsLQlVMLWQARHNs-AQHMTIsTsGpMVMKSKQpLKccFuhs........................scshshhc..................... 0 17 40 80 +6852 PF07024 ImpE ImpE protein Moxon SJ anon Pfam-B_11208 (release 10.0) Family This family consists of several bacterial proteins including ImpE (Swiss:Q93EC9) from Rhizobium leguminosarum. It has been suggested that the imp locus is involved in the secretion to the environment of proteins, including periplasmic RbsB protein, that cause blocking of infection specifically in pea plants [1]. The exact function of this family is unknown. 25.00 25.00 38.40 77.50 23.60 18.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.53 0.71 -4.16 36 372 2009-09-10 16:41:21 2003-08-20 14:24:06 8 4 347 1 71 256 11 123.30 43 45.34 CHANGED sussFsWlsDuDsRLGPshEll.ssGpYhWlPaupIpslchpsPusLhDllWpPspls....ltsGststualPsRYsuotps........................ssAh+LGRcTcWp-.hups.shhGhGQ+hahssss-huLh-lpplph ........sssFsWluDSDoRLGPVhEll.suGsYhWlPFupIpSLchspPspLpDLlWpPspls....LhsGssttualPsRYsuScpu..................................sDulRLuRcTsWp.........-....sucs.....sshulGQKsWlT.s...pG-huLhDltphpF...... 0 13 30 52 +6854 PF07026 DUF1317 Protein of unknown function (DUF1317) Moxon SJ anon Pfam-B_12646 (release 10.0) Family This family consists of several hypothetical bacterial and phage proteins of around 60 residues in length. The function of this family is unknown. 20.60 20.60 20.90 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.92 0.72 -4.09 2 125 2009-09-11 12:57:57 2003-08-20 15:10:15 6 1 114 0 4 77 0 57.50 79 96.27 CHANGED MKHsHDsIpVGtlhhsYSsh+pGWlhPGhplh+NPhpA.phAEEhNNhhtulpsc...Ls ...........MpHPHDNIRVGuITFVYSVTKRGWVFPGLSVIRNPLKAQRLAEcINNKRsAVCTKHL.LS.................. 0 1 2 3 +6855 PF07027 DUF1318 Protein of unknown function (DUF1318) Moxon SJ anon Pfam-B_11321 (release 10.0) Family This family consists of several bacterial proteins of around 100 residues in length and is often known as YdbL. The function of this family is unknown. 20.80 20.80 21.00 20.80 20.70 20.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.04 0.72 -3.75 54 739 2009-01-15 18:05:59 2003-08-20 15:21:49 7 2 731 0 142 388 33 94.30 54 81.96 CHANGED hhuhsu......hAh.sLspA+upGlVGEphsGYLulVt..sss-s.psLVpsINtpR+ApYpclApcN.slol-pVuphAupKhl.p+stsGpalps.sGpWh.+K .....................................L.hosss.hAL..TL-EARoQGRVGEThsGYL......s.....s......l+.............s......D......AE.....T..ppLVp-INttR+AuYQQLAc.........p......N.sl...o....l..--...lAK...lAGQ.KLV.tRA+PGEYVQGINGKWlRK......... 0 43 80 112 +6856 PF07028 DUF1319 Protein of unknown function (DUF1319) Vella Briffa B anon Pfam-B_10434 (release 10.0) Family This family contains a number of viral proteins of unknown function approximately 200 residues long. Family members seem to be restricted to badnaviruses. 28.10 28.10 28.20 59.30 26.30 28.00 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.64 0.71 -3.94 11 50 2009-01-15 18:05:59 2003-08-20 15:23:18 6 1 29 0 0 50 0 127.20 41 80.65 CHANGED Spps...pL-YLDLAopsKsS..sp-LAHNLplsscRlsLts+V...........................plpplppI.+p.ctpLcclcpclcpLpc-LpsLcp-alpR+PLSKp-Vc-LVlcIoEQPKhIEcQoEtLocELpccV-cl ............t...pL-YLDL.tots...Klo..Np-LuHNLplsh.RhsLss+V...................................plcphppI.cp....lcchp.pclctLppcLpsLpp-alc++PLoKp-VccLVlcIuEQPKhIEcQs.tLoc-Lppclccl................. 0 0 0 0 +6857 PF07029 CryBP1 CryBP1 protein Moxon SJ anon Pfam-B_11415 (release 10.0) Family This family consists of several CryBP1 like proteins from Bacillus thuringiensis and Paenibacillus popilliae. Members of this family are thought to be involved in the overall toxicity of the bacteria to their hosts [1,2]. 19.50 19.50 20.80 20.30 19.20 18.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -4.49 5 45 2009-01-15 18:05:59 2003-08-20 15:29:38 6 1 21 0 4 42 0 155.90 37 85.48 CHANGED pElPcplo.ctppKlPFCCVVSIP+GFchVusscPKLVYsLssLSllKETCRKsVpVD-CGpAEVDLHVLKVKGCIPFIuNlEVcPIsscpsCoosPHccpISLSCp-oVCVDHVLKCSVssLPDacIDCcaVsVsDLplpPl+EssCQFVKIoGpFsFaYl ........................thp.ctpp+lPFCCllSlPpGFplhss....s.p.KlVYslssLuhlKETC+KslpV--CGpsElDLpVLKlKGCIsF.lsNh.plc..Phsppphsoo.s....s.........+sppIsLSCpsoVsVDplL..KhSVs..p..LPchpIsspplplpDlplpsh....pEssp...phl+loGtFpFh........................................................... 0 3 4 4 +6858 PF07030 DUF1320 Protein of unknown function (DUF1320) Moxon SJ anon Pfam-B_13638 (release 10.0) Family This family consists of both hypothetical bacterial and phage proteins of around 145 residues in length. The function of this family is unknown. 20.70 20.70 21.20 20.80 20.60 20.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.61 0.71 -4.41 48 429 2009-01-15 18:05:59 2003-08-20 15:31:54 7 1 362 0 75 343 19 130.70 24 89.87 CHANGED YsThsDLhsphuppplhp.................L..sscs.........ssths.shlppAls-AsstIDuaLtuR.Ys...LPL..ssVPslLpphssslAcYhLhspcst.....s-t......lpccYc.pAl+hLcplusGclsLGlss.........ssssssssststhpuspp ................................Yso.pD.lh.tphspp...tlhp..........................L.....ospp...........ssthsps.hlppAls-.Asu.IDuYLtsR....Yp...............l.P.l..............sss.Psl..LpphssslAh.Yp.L.tspps.........s-p........lcccYc.pAlchLcplusGcl..sLulss........ttts..tt.s...h.....tt.................................... 0 32 58 67 +6860 PF07032 DUF1322 Protein of unknown function (DUF1322) Moxon SJ anon Pfam-B_13233 (release 10.0) Family This family consists of several hypothetical 9.4 kDa Borrelia burgdorferi (Lyme disease spirochete) proteins of around 78 residues in length. The function of this family is unknown. 19.60 19.60 20.00 21.40 19.10 19.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.73 0.72 -3.78 2 124 2009-01-15 18:05:59 2003-08-20 15:53:41 6 2 29 0 8 73 1 72.70 69 85.59 CHANGED MsK............sEhtppYFphlD.l..cssKYYFPllhsICoYcDVKKh.YcELLEVNRlAslKLpKEhYEhhLuh ..............MpKhN+DIDKAIASLNEoRKKYFNLLDEI..KNDK..YYFPVIMNICSYcsVKKLPYDELLEVNRlA-lKLEKE.LYELILSK............... 3 5 5 5 +6861 PF07033 Orthopox_B11R Orthopoxvirus B11R protein Moxon SJ anon Pfam-B_13245 (release 10.0) Family This family consists of several Orthopoxvirus B11R proteins of around 70 residues in length. The function of this family is unknown. 25.00 25.00 141.50 141.50 20.80 19.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.36 0.72 -4.50 2 31 2009-09-11 15:28:16 2003-08-20 15:58:21 6 1 12 0 0 30 0 71.10 94 81.57 CHANGED DTsssNVEDIMNEIDREKEEILKNVEhENNKNIpKNHPsEYIREALVINTSSsSDSIDKEVIECISpsVGI ..DTDV.TNVEDIhNEIDREKEEILKNVEIENNKNINKNHPSEYIREALVINTSSNSDSIDKEVIECISHDVGI 0 0 0 0 +6862 PF07034 ORC3_N Origin recognition complex (ORC) subunit 3 N-terminus Vella Briffa B anon Pfam-B_10452 (release 10.0) Family This family represents the N-terminus (approximately 300 residues) of subunit 3 of the eukaryotic origin recognition complex (ORC). Origin recognition complex (ORC) is composed of six subunits that are essential for cell viability. They collectively bind to the autonomously replicating sequence (ARS) in a sequence-specific manner and lead to the chromatin loading of other replication factors that are essential for initiation of DNA replication [1]. 22.30 22.30 22.30 22.80 20.60 22.20 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.95 0.70 -5.47 13 289 2009-01-15 18:05:59 2003-08-20 16:27:21 6 7 223 0 199 294 2 270.20 24 40.30 CHANGED K..R............Kputspcsh..hpssspsscchhsphRacsapphWpplcsph-clQcphNt+lh-sLlcFlccstpstptp...h.tp.............pclPTAALls...............GlNhsDH.hpFpsLoppL+sssssaVshLpS+DCsu........lKthlpplltQL......hcsssslctc-p-...........ht.p+sphslssLsoWYp..................pppphsssp.pp................................PlVVIlcDhEsFsspVLQDFIlIhSpalpclPllLVFGlATossulHplLPapVSShLslclFQo.ussppLssVlDclLLosphPF+LSuKVhplLpsIFLYHDFSlpuFI+Gl............KhuhlEHFhspPLSsL ..................................................................................................................t.p..................................p.thphhp.hWtt.pt.ph..p.........pl.pphptphhppl.pFlppt..t...........................................tplPsuhlhh......................GsNh.ssp.hhhppL..tp..tl.p...p.....p........s........s.....h...l...s.L.p.ut-sss.........................................l.+thlppllpph....................................hs.p..s..p.t..p.....................................t..ph...th....s...hp.Lt..saat........................................p..pht..................................................................................................................................................................lVllhcDhE.uFs.splLpDhlhl...h.....S..p..a........h..p........c...........l...........Phh....llhGl..A..T..os..ph..hc...phLsps.sss..h.......Lph....p..h....F...p......sspphlsplhcpllh....s....s.p....h....s..........h...p..l..utplhp.hLhs.hah...pshSlpsFlpul............phshhpHFaspPLShL.................................... 0 59 105 159 +6863 PF07035 Mic1 Colon cancer-associated protein Mic1-like Vella Briffa B anon Pfam-B_10041 (release 10.0) Family This family represents the C-terminus (approximately 160 residues) of a number of proteins that resemble colon cancer-associated protein Mic1. 20.80 20.80 21.20 23.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.69 0.71 -4.89 5 183 2009-01-15 18:05:59 2003-08-20 17:08:55 7 2 131 0 123 177 0 150.30 34 25.00 CHANGED chhtccsYlluIlMEYLRSLsctpIssptpLacMlIcpLApuscFscLppFVoY+lLp-SKsLAhhLLShuspsosl+QLGlDML+RluhAHDhIlEVLLpcGplL-ALRaA+chtslssVpusKFLEAApposDsQphaAIhRFFoE+p.+h.hh.uFssshcTsE ............................................h..pallullhEYlRSLsp...hp.Is....sp.ch.LacllIptLlppp.t.ahhLcphlpY+VlsDSKslA..phL..LShp..........s..h............a..........ss.s..pQLuLDML+RLs.up-tIlElLLsctplLtALRah+sh.....tt...hss.........l...ss..p+.........FL-AAhpspDs.lhaslh+FFpp..........F..................................... 0 47 65 97 +6865 PF07037 DUF1323 Putative transcription regulator (DUF1323) Moxon SJ, Bateman A anon Pfam-B_12705 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 120 residues in length. This family appears to have an HTH domain and is therefore likely to act as a transcriptional regulator. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.52 0.71 -3.94 10 925 2012-10-04 14:01:12 2003-08-21 10:48:22 6 1 468 0 36 265 4 117.00 62 94.57 CHANGED MTPEELAchoGYoRQTIN+WVRKcGW+TpPhPGVpGG+ARLlHlsppVREFIpsss..........RlsEssAtY.sssscusl.s.llsslcpMTssEQcQLssLLsREGIsGLLpRLGIR-s ......................MTsEELAchhG..h..uR..QTlN+Wl..Rc.cGWp...Tp.hPGV..pG.G.+.AR...Ll..H.VspQVREaIpNs...........cs.EshAEh..Asuu.cAPh.t.llohscpMTssEQcphsphLsREGI..pGhLpRLGIc-S.................................................................................... 0 3 3 21 +6866 PF07038 DUF1324 Protein of unknown function (DUF1324) Moxon SJ anon Pfam-B_12735 (release 10.0) Family This family consists of several Circovirus proteins of around 60 residues in length. The function of this family is unknown. 25.00 25.00 140.50 140.30 19.30 18.10 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.11 0.72 -3.94 2 9 2009-01-15 18:05:59 2003-08-21 11:20:11 6 1 6 0 0 7 0 59.00 97 94.82 CHANGED MpCTLVFQSRFCIFPLTFKSSASPRKFLTNVTGCC.ATVTRlPLSNKVLTAVDRSLRCP MTCTLVFQSRFCIFPLTFKSSASPRKFLTNVTGCCSATVTRLPLSNKVLTAVDRSLRCP 0 0 0 0 +6867 PF07039 DUF1325 SGF29 tudor-like domain Moxon SJ, Bateman A anon Pfam-B_13304 (release 10.0) Domain This domain is found in the yeast protein SAGA-associated factor 29. This domain is related to members of the Tudor domain superfamily such as Pfam:PF05641. The SAGA complex is involved in RNA polymerase II-dependent transcriptional regulation. The membership of the tudor domain superfamily suggests this domain may bind to RNA. 25.30 25.30 25.40 30.20 25.10 25.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.91 0.71 -4.53 20 328 2012-10-02 16:56:36 2003-08-21 11:28:18 6 4 260 14 233 311 5 129.80 33 39.46 CHANGED phG-pVAAphp.......psstc.........p-WIlucVlphsscs..p+YEVhDsDs-...ppt...taphot+clIPLPp.......s.cssstspFPpGopVLAlYPp.....TTsFY+AhVhss.................sp....ptsspYpLcF-D-p.sst....hpVsp+aVlshsp ....................................h..GspVAh+sp..........ttstc................ppWIlscVlp.hstps...p+......YEVpD.-s-..............ppp.......paphotpplIPl.Pp.......t.pssshs.F.stGppVLAlYPp.......TTsFY+AhVhus.............................................t........pt.p..s...s...YplpF-s-ptsst.....hpVspRaVlsh..p.......................... 0 86 129 193 +6868 PF07040 DUF1326 Protein of unknown function (DUF1326) Moxon SJ anon Pfam-B_11597 (release 10.0) Family This family consists of several hypothetical bacterial proteins which seem to be found exclusively in Rhizobium and Ralstonia species. Members of this family are typically around 210 residues in length and contain 5 highly conserved cysteine residues at their N-terminus. The function of this family is unknown. 25.00 25.00 29.30 29.30 24.80 24.70 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.14 0.71 -4.52 34 220 2009-01-15 18:05:59 2003-08-21 11:31:31 6 2 187 0 80 216 231 181.30 31 86.66 CHANGED htsCsCshhCPC.hs......t.PTp..GtCcuhhuhcI-cGpasslcLsGLsluhlhchPGs.ht-G.....shcssh....alD-cAo-tQhcALhpIhoGps....Gu.hushusllu.chhusthsPIcacscspt......ps..lplsshhphth-shhpPhsGsstcs.hslssssa..........hphucsspsph..pshGh.sachss..ppu ....................h.tsCsCshhCPC.hu......tsPTp..G....pCcuhhua+I-cGpas-lcLsGLsluhls....ph.P....Gs....lt...cG.....shcssh....alD-RAossQtcALhpIaoGps....GGhhuhFuph.lu.chhuschAPIca-scscp......ts....lcls.sh.hp....s.hsPhsGsstcs.hslssssap..h.st...hthucss.psph...cshGh.shshsspp............................................................................................ 0 23 49 63 +6869 PF07041 DUF1327 Protein of unknown function (DUF1327) Moxon SJ anon Pfam-B_11630 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 115 residues in length which seem to be specific to Escherichia coli. The function of this family is unknown. 25.00 25.00 25.50 25.30 24.30 23.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.48 0.71 -4.17 3 518 2009-01-15 18:05:59 2003-08-21 11:33:54 6 1 251 0 5 248 0 105.10 69 80.18 CHANGED MTQDYELVVKGVRNFENKVTVTVALcDKc+FDGEIFDLDISLDRVEGAALEFYEAAARRSlRQVFLDVAAGLCEGDEpLPEKRPlILEAQsVhITY+GKLPGhITGSLKTPPs .....MppcYELlVKGIpNa.sKlTVTVALc.tth.sh.l.DlsISLDRsEGAsLEFYEAtA++ps+QhFhDVAAGLCEGDt.LPEKRPlILEAQsVhITY+GKLPGhITGSLKhPP.......................... 0 1 1 1 +6870 PF07042 TrfA TrfA protein Moxon SJ anon Pfam-B_12321 (release 10.0) Family This family consists of several bacterial TrfA proteins. The trfA operon of broad-host-range IncP plasmids is essential to activate the origin of vegetative replication in diverse species. The trfA operon encodes two ORFs. The first ORF is highly conserved and encodes a putative single-stranded DNA binding protein (Ssb). The second, trfA, contains two translational starts as in the IncP alpha plasmids, generating related polypeptides of 406 (TrfA1) and 282 (TrfA2) amino acids. TrfA2 is very similar to the IncP alpha product, whereas the N-terminal region of TrfA1 shows very little similarity to the equivalent region of IncP alpha TrfA1. This region has been implicated in the ability of IncP alpha plasmids to replicate efficiently in Pseudomonas aeruginosa [1]. 20.20 20.20 20.60 20.20 20.10 19.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.93 0.70 -5.59 2 183 2012-10-04 14:01:12 2003-08-21 11:42:26 6 2 103 0 39 169 15 194.10 48 81.52 CHANGED K++TAGt-LAcpVSEAKQsALLKHTKQQIK-MQLSLFDlAPWssohtAhPNDhuRSAlFTsRNKKlPR.ALQspsIaHVNKDVcITYTGlELRADDDELVatQVLEYAptTslGEPloFTFYELCQDLsWSINGRYYT+AEECLoRLQAoAMtFoSpRlG+LESVSLl+RFRVLDRGK+TSRCQV.IDtEIVVLFAGDHYTKFlWEKYRKLSPs.stRMFDYFuoH+EPYPLKLETFRLMCGSDSTRsKKWREQVGEAC-ELRtsGLVEpAWVNDDLVHCKR ......................................................................................................................s..hhRsslFs..........................................th.l.apG.pL......pt........la.....t.h..h.p...tpt..hs.....t.....h.h...s.hthhptlsht.......st...t.ht.h.tsltR..ht.s.th...t....h.....pt.t..t.t.......................hh.t........................p....t............t............h....h..ht.p.hh.hat.t.ho....h...ht.hR+Lo..P.s.....A..RR.h..F..DY...auoH...+..-.P...Y.PlKLETFRLMCGS-S.s..R.sKKWREQ.l.ucAC-EL+csG.L.V-s.AW....l.s..s........................................................................................................................................... 1 7 19 31 +6871 PF07043 DUF1328 Protein of unknown function (DUF1328) Moxon SJ anon Pfam-B_12535 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 50 residues in length. The function of this family is unknown. 21.00 21.00 21.70 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.05 0.72 -3.97 105 1343 2009-01-15 18:05:59 2003-08-21 11:48:34 8 1 1124 0 357 687 48 39.40 58 70.82 CHANGED llFhlIAllAulhGFuGlAuuAAuIAKILFalFLllhll .....lIFLVIALIAAsLGFGGl.A.GsAAGhAKIlFhVhllLFlV.................. 0 81 194 276 +6872 PF07044 DUF1329 Protein of unknown function (DUF1329) Moxon SJ anon Pfam-B_12608 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 475 residues in length. The majority of family members are from Pseudomonas species but the family also contains sequences from Shewanella oneidensis and Thauera aromatica. 23.10 23.10 23.20 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.11 0.70 -5.52 104 761 2009-01-15 18:05:59 2003-08-21 11:53:27 6 4 350 2 275 746 454 269.20 22 78.32 CHANGED sIoApN...hspYp..........cpLosGQhAhlKpYPs.a+hsVYsT+RosshPptlhcss+pNAspscLsss..Gsu.lp...shh.su..lPFP.l.....P..psGhEllWNahhRa+...Gt.shppthsphsspssGshs..hsphppphhhhhhh.stsstp..........sshhhhhh.phpuPu+luGsslLl+-slDp.htp.sRpAWhYssG....QRRVRRAPshAYDsPt.sus-GhtTsDshchFNGusDRY.-WKLlGK+..EhalPYNsYcl.........tsssh+as-llpssHlNP-hhRYELH...RVWVVEATLKsGtRHlYuKRsaYlDEDoWphs.hsDtYDs+GpLWRsspsah........h.hY-ssssh.ssspshaDLpuG.RYhs.shss...cpst.shph......s.ph.pp....spFossAL.Rp ...............................................................................................................................................................................sas.............ttu.......phh.p..h.t.......................h.....sGp...........t....thh......t.t......................tshh.hhh.hhptPscltGsshLshphhsp.....p.pptW.hY.su.....RRVRRhsshs..hcsPhhus.ssh.....hDsh..ph.apst.p.c..a.paphhGpp...hh............................................................php.+...csahlpup.p.....pch...as++hhalDp......Dsa.hh.hh-.YDtp.GpL..a+h..h..........h...p....h.h..h...th.hhhsl.st.+hhh.thtt....tt......h...........h.......t.apstsl.t............................... 0 91 161 228 +6873 PF07045 DUF1330 Protein of unknown function (DUF1330) Moxon SJ anon Pfam-B_12466 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.76 0.72 -4.11 136 1006 2012-10-02 00:20:33 2003-08-21 11:56:04 6 4 649 38 383 984 2554 64.20 30 62.05 CHANGED sptap.cYtptsssshtpaGGc.hLs+...uGphpslEG..hp.schVllcFPohcsApsaYpSs-YQ.tht ....................................ptat.pYtptsss.s.h.p.p..a..GG.c.....hl..s+....G...u..ph........csl...E...G.......t.sh...s+s..VllEFPohpsAhshYpSs-YQts...... 0 92 214 298 +6874 PF07046 CRA_rpt Cytoplasmic repetitive antigen (CRA) like repeat Moxon SJ anon Pfam-B_12798 (release 10.0) Repeat This family consists of several repeats of around 42 residues in length. These repeated sequences are found in multiple copies in Trypanosoma cruzi antigens, Swiss:Q26907 contains 23 copies of this repeat. 21.40 21.40 22.60 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.60 0.72 -3.97 5 164 2009-01-15 18:05:59 2003-08-21 13:07:06 6 8 12 0 5 162 0 41.10 84 82.01 CHANGED EAEKRKAAEAAKsAEAEKQRAAEATKVAEAEKQKAAEATKVA .....EAEKQKAAEATKVAEAEKQKAAEATK..VAEAEKQKAAEATKVA..... 0 0 0 5 +6875 PF07047 OPA3 Optic atrophy 3 protein (OPA3) Moxon SJ anon Pfam-B_12863 (release 10.0) Family This family consists of several optic atrophy 3 (OPA3) proteins. OPA3 deficiency causes type III 3-methylglutaconic aciduria (MGA) in humans. This disease manifests with early bilateral optic atrophy, spasticity, extrapyramidal dysfunction, ataxia, and cognitive deficits, but normal longevity [1]. 25.70 25.70 26.70 26.70 24.60 23.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.53 0.71 -4.38 19 343 2009-01-15 18:05:59 2003-08-21 13:16:55 7 6 247 0 243 347 2 139.30 29 65.35 CHANGED PlhKLusLul+plSKPlAshlKppApps.hFRsh.......lhls.AQhhHhh-hphph+hhGh.tpss.......................................pIpPLNE-tAlplGu-LLGEhhIFs.....................VuuuhllhEapRpupp...-s+KEEthppclppLcpchpcLphtl.-phppp ...................................PhhKLssLhl+plSKPl.....A........st.l.....KppA+.........p.......p.h.F.Rph........h.lshAQhhH..........th-h+h+.h.th.hs..h..t.psh...........................................................................................pl.+PLsEppAl-h.........GAcllu.EhhlFs.....................VuuulllhE.hhRpppp....ptp+c..-....php....p....chppL...ppphpplt.t.p.....t................................................................. 0 76 126 198 +6876 PF07048 DUF1331 Protein of unknown function (DUF1331) Moxon SJ anon Pfam-B_12865 (release 10.0) Family This family consists of several Circovirus proteins of around 35 residues in length. Members of this family are described as ORF-10 proteins and their function is unknown. 25.00 25.00 93.80 93.80 24.60 17.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.82 0.72 -4.49 3 11 2009-01-15 18:05:59 2003-08-21 13:19:41 6 1 6 0 0 8 0 35.00 96 95.77 CHANGED MSTAQEGVLTVVALTVYPKVRERRVLKMPFFLLQR MSTAQEGVLTVVALTVYPKVRERRVLKMPFFLLQR 0 0 0 0 +6879 PF07051 OCIA Ovarian carcinoma immunoreactive antigen (OCIA) Moxon SJ anon Pfam-B_13720 (release 10.0) Family This family consists of several ovarian carcinoma immunoreactive antigen (OCIA) and related eukaryotic sequences. The function of this family is unknown [1,2]. 25.00 25.00 25.70 26.20 24.40 24.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.38 0.72 -4.66 13 172 2009-01-15 18:05:59 2003-08-21 13:37:10 6 3 78 0 83 165 0 101.90 43 49.93 CHANGED usu..ptsstpts.ptss.........tsuhsYhhop-Et+Vh+ECNcESFahRuLPlusluhllTtulVppGaLpsssRFGulPKVslAsllGYhlGKlSYhpsCtEKhhcL.NS.LG- ......................tt.s...........................huhtah.opEEp+lhpECpcESFWhRulPhussSMLlTQuLlpp.GhLuupP+aGSl.PKlhhAslhGYhsGKlSYhpsCQEKF.cpLc.N.S.LGp............................ 0 20 28 45 +6880 PF07052 Hep_59 Hepatocellular carcinoma-associated antigen 59 Vella Briffa B anon Pfam-B_10544 (release 10.0) Family This family represents a conserved region approximately 100 residues long within mammalian hepatocellular carcinoma-associated antigen 59 and similar proteins. Family members are found in a variety of eukaryotes, mainly as hypothetical proteins. 22.70 22.70 23.10 23.10 19.00 18.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.47 0.72 -3.66 46 244 2009-09-14 11:59:59 2003-08-21 14:00:37 6 3 211 0 181 244 2 100.10 35 33.04 CHANGED cpFs..spTspps.....Dp+M...hpYIEpcLt+........R+t..............ttppssstspstsp.ps..t.............Lhc.hPspLp.............tptt..ttphh..uuIsE...VDLGh-s....Kl+NIEcTEcA++cLh .......................pFsscTspp-....cDtc...M........hcYIEsELtK...................R+shs..................tppp.pppsp.p.stpc.............tLac.lP-plc....................tppspp.hSsphh.....uGIsE...VDLGl-u......Kl+.NItpTEcAKtcL........................ 0 62 94 139 +6882 PF07054 Pericardin_rpt Pericardin like repeat Moxon SJ anon Pfam-B_13724 (release 10.0) Repeat This family consists of several repeated sequences of around 34 residues in length. This repeat is found in multiple copies in the Drosophila pericardin and other extracellular matrix proteins. 21.20 21.20 22.50 21.20 20.00 20.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -7.95 0.72 -4.05 5 240 2009-01-15 18:05:59 2003-08-21 14:29:02 6 13 10 0 103 238 0 33.30 73 44.13 CHANGED QPGYGSQPGVGuQTGAGQPGYGoQPGIGGQTGAG .....QPGYG..oQPGl.G.GQ.T..G....u..G..QPGYG..oQPGlGGQoGhG.... 1 40 40 83 +6883 PF07055 Eno-Rase_FAD_bd scADH; Enoyl reductase FAD binding domain Vella Briffa B, Coggill P anon Pfam-B_10602 (release 10.0) Family This family carries the region of the enzyme trans-2-enoyl-CoA reductase, at the very C-terminus, that binds to FAD. The activity was characterised in Euglena where an unusual fatty acid synthesis path-way in mitochondria performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. The full enzyme catalyses the reduction of enoyl-CoA to acyl-CoA. The conserved region is seen as the motif FGFxxxxxDY [1]. 25.00 25.00 27.10 33.60 23.00 22.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.93 0.72 -4.03 65 659 2012-10-10 17:06:42 2003-08-21 15:13:17 7 3 532 5 160 538 83 64.90 50 16.50 CHANGED tlD-csRlRhDDhELcs-VQspVppLWsplTs-Nl.h-loDapGYKpEFLpLFGFsl-GVDY-ADV ......lDscsRlRhDDaELcsDlQpplpcLWsQlTs-Nl..ppLoDasGYKpEFlpLFGFsl-GVDY-ADV............. 0 41 82 120 +6884 PF07056 DUF1335 Protein of unknown function (DUF1335) Vella Briffa B anon Pfam-B_10636 (release 10.0) Family This family represents a conserved region approximately 130 residues long within a number of proteins of unknown function that seem to be specific to the white spot syndrome virus (WSSV). 25.00 25.00 142.30 141.60 19.40 18.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.64 0.71 -4.41 5 17 2009-01-15 18:05:59 2003-08-21 15:24:03 6 2 2 0 0 12 0 128.90 46 28.59 CHANGED DKsFpFSPLYRhIoc+LSsAsh+cc-phIVoTDFLlGLGFSscNVo+pLKuMEpsh.ppGhppohVPVsDICHRppYKGchIsNPIsuSaSspCLIVPLshLGtlFSpssaPSutsl-sYhusLssAVllY .DKsFpFSPLYRhlpcpLSpAsh+cp-hhIloTDFLlGhGaospsVs+pL+sMEphh.ppshtpohVsVh-ICHRhpYKGthIsNPIhtSaSspCLIVPlshLGhlFupssaPSutslcsYhusLh.AlllY 0 0 0 0 +6885 PF07057 TraI DNA helicase TraI Vella Briffa B anon Pfam-B_10638 (release 10.0) Family This family represents a conserved region approximately 130 residues long within the bacterial DNA helicase TraI (EC:3.6.1.-). TraI is a bifunctional protein that catalyses the unwinding of duplex DNA as well as acts as a sequence-specific DNA trans-esterase, providing the site- and strand-specific nick required to initiate DNA transfer [1]. 20.60 20.60 20.60 22.90 20.30 19.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.45 0.71 -4.13 8 282 2009-01-15 18:05:59 2003-08-21 15:48:57 6 11 224 2 13 255 4 121.40 78 9.48 CHANGED VuLSRhKpHVQVYTDN+psWssAlsp..usptuTAHDlLEP+sDRpVtsAsRLhuTAppLccTAhGRAVL+puGLtpspohA+FIuPG+KYPpPaVALPsaD+NGKpAGlhLssLsscDss.hRsltGE ...VALSRMKQHVQVYTDNRQGWTD.AINN..AVQKGTAHDVhEPKsDREVMNA-RLFSTARELRDVAAGRAVLRQAGLAGGDSPARFIAPGRKYPQPYVALPAFDRNGKSAGIWLNPLTTDDGNGLRGFSGE............ 0 1 1 9 +6886 PF07058 Myosin_HC-like Myosin II heavy chain-like Vella Briffa B anon Pfam-B_10658 (release 10.0) Family This family represents a conserved region within a number of myosin II heavy chain-like proteins that seem to be specific to Arabidopsis thaliana. 19.60 19.60 37.30 20.70 19.30 19.10 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.01 0.70 -5.36 3 95 2009-01-15 18:05:59 2003-08-21 15:55:57 6 5 21 0 62 100 1 292.60 55 56.63 CHANGED MVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLoQSVRELEEAVLAGGAAANAVRDYQRKFQEMNEERKTLERELARAKVoANRVATVVANEWKDuNDKVMPVKQWLEERRFLQGEMQQLRDKLAIoDRAAKSEAQLKEKFpLRLKVLEEoLKGPoSSuoRsTo.uRSpSNGPoRRQSLGGAEs..KFTSNGuLSKKsPSSQLRtSLTusuooVLKHAKGTS+SFDGGTRSlDRSKlLlNGP.uNasLNcKuoEsopcuE.....p.sSE+KsEpEDscAsoEDSVPGVLYDLLQKEVIoLRKAucEKDQSL+DKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKE ...........................hVDDlQN+NQELhKQIEICQEENKILDKhHRQKVuEVEKLoQTV+ELEEAVLAGGAAANAVRDYQRphpEhNEE++TL-RELARAKVoANRVAs.VVAN..EWKDsNDK.......VM....PVKQWLEE...RRh..............LQGEMQpLRDKLAluERsA+uEAQLK...........-KhpLRLKVLE-uL+ssss.s.s.sps.....s......s+o.....s.........su...s.RR....pS.......lGus...-shsph.s.N..Ghh.++ps..s.p.RsSl.s....sss.....l.l..+.puc.........t..s..S.....+ShDG...u..s+ul-.......p.t....K.hh..hsG..............h.....h.p.........p.tsctspttp................................p.ssp.p.t.....p.....phss....s.....pspD.VsGhLYDhLQKEVlsLRKusc-KDpoL+DKD-AIEMLuKKV-TLTKAMEVEuKKhRRElAuhEKE.............................................................................. 0 14 40 51 +6887 PF07059 DUF1336 Protein of unknown function (DUF1336) Vella Briffa B anon Pfam-B_10173 (release 10.0) Family This family represents the C-terminus (approximately 250 residues) of a number of hypothetical plant proteins of unknown function. 20.70 20.70 22.70 21.00 20.30 19.00 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.49 0.70 -4.94 20 403 2009-09-14 23:05:42 2003-08-21 16:08:14 7 20 43 0 274 397 12 194.50 33 37.25 CHANGED Wo.ssssssFplRGcoYhp.DKpKhPAssh.hhp.lGlDhFpusc.+............hccIupphst.stpstpp...tls.......alhllNlQlPs..............u-shSlVlY.....Fthpcshpc.........tsLlcRFlctD.....DuF+.......ppRlKllssllp.G....sWhl+psV..hspKPsLlGptsppsah....cGsN......YhEIDlDlus.ShlA+sllchhhuhlsslllDluhhIQupstEELPEplLsslRLNcl- ....................................Ws..sssssFhlR.GpsYhp.....c...........+p..............Khs.usp..hhp.lusDhhpssp...+..................................hcclstp.........t........s...thhtp................th.........hhhllNlQ.l.Ps...............................spthShVhY.............ahhpp.h.t.........................pllp+Flp..sD........csF+..............................spRhKllst..lsp.u............................shhl+.phV....sppssLl.s.p..thp.pp.ah............pGss........................Yh.ElDlDlts..ShlAptsl.phhhs.hlpslslDhuhllpu...ppt-ELPEp..lLGssRlspl-........................ 1 101 204 245 +6889 PF07061 Swi5 DUF1337; Swi5 Wood V, Studholme DJ anon Wood V Family Swi5 is involved in meiotic DNA repair synthesis and meiotic joint molecule formation [1][2][3]. It is known to interact with Swi2, Rhp51 and Swi6 [1]. 20.60 20.60 20.90 32.70 20.50 18.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.87 0.72 -4.20 20 177 2009-01-15 18:05:59 2003-08-21 16:39:21 6 8 155 0 119 162 1 83.60 31 40.18 CHANGED phpshpsphssLcp.pht.....phcpplu-hpup............psPpppVc+HI+LLHEYN-lKDlGQtLlGhIADtRGl+tt-lacEaGlshs- ................................................................p.......phtplcp.p.t............thpp..plpph.tsc..............................t..tstptlccHIphLHEYN-lKDlGQtLlGhlA.sh+GVphp-lap-..aslshp.................... 0 39 62 92 +6890 PF07062 Clc-like Clc-like Vella Briffa B anon Pfam-B_11218 (release 10.0) Family This family contains a number of Clc-like proteins that are approximately 250 residues long. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.46 0.70 -5.08 5 78 2012-10-03 00:20:40 2003-08-21 16:52:45 7 3 13 0 70 76 0 179.30 26 62.97 CHANGED plllhsSLlLolVuhsLouAAlhTPSWQVVDlREaRuhHQHGLWLDCsRtpp...Hllp..s.hs-sPLHCsYKFDhDuh.sYppsl-shD....psussGEsc+HpFaGWH+ulLhlhlhShlhAuLSlhSGlCAsCssupA....lhaoIhlslAuLluslu-uIFFlsAaRVDsRFl.pGlVGTYEQ+lGYAFYLplsGshlallAllhAsLsoYhoFlsuc .........................................t....hhshlhhlluhhL.hhuhhoPuWQ.l....sphp-hpt..hppGLWhsChp..p...................................t..........hpChY+Fs....ss...ht....t..c..............tssttsc.p.pcpF........a.....s.W+huslhhhhhuhh..huhlul...hh...uhCu.sh.shu....lhhslhh.hhsslhuhhu.hlFhhhuph....h.......-s+al....uhsspYE......pc..hGhuaYlthhush.h.hhuhlhuhhhsh......p................... 0 26 34 69 +6891 PF07063 DUF1338 Domain of unknown function (DUF1338) Vella Briffa B, Bateman A anon Pfam-B_10864 (release 10.0) Domain This domain is found in a variety of bacterial and fungal hypothetical proteins of unknown function. The structure of this domain has been solved by structural genomics. The structure implies a zinc-binding function, so it is a putative metal hydrolase (information derived from TOPSAN for PDB:3iuz). 20.90 20.90 22.30 21.60 20.70 20.50 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.84 0.70 -5.36 94 1224 2009-11-20 14:21:51 2003-08-21 16:56:52 8 5 1137 6 261 849 170 336.20 41 87.14 CHANGED tpphhpuL.phYhp.csPpstslh.cLlsp....sspplhp..........................DHhAlRoh........sssshslssls+hFtshGatssGh........Ych.sstp...lpuptFc.s...............csst........s+lFlScLcl-plo...sphpphlppll..........hsttshphl............pth.........................thlsthLts..................hpW..pt...sshssYppLtpESchsAWlss.pGhphNHhTspl.............................-Icplppthpp.....pGhshp....ut............................................................................................lcGsPp.....hhLcQoShhA.p.slpF............t-s.....sptphsusFhEhtpR....................................................................................................st........t....p.hha-sFlssSAstIFcSTh ................................................................ppFupAhSsMYppE.VPpYuTLl.-LVuc.......lNhsllptp.thtt..................sthtRls.ERHGAIR..........lGostELusLpRhFAhhGMhPVuY........YDL.spAus...PV+STAFRPl...............DsssLth.....sPFRV.FT.ShL+lEh...lp..pttLRppstclL.....tpRplFospshpLl............cph-tp.............GG.hsttpAppFltps.Lcs..................F+W..cpps.VsppsYcsLppEpclhADVlsh.GsH.INHLTPRs............................LDIDpVpshMsc.....pGlp.K.....sh............................................................................................IEGPPcpp.sslLLRQTSF+ALcEsVhF.............ssp.......hpGshsuRFGElEpRGhALT.cGRthY-thht...........................................................................h.........t......tshsps........hhttGhh.tpPlsYEDFLPsSAAGIFpSNL.......................................................................... 0 52 116 189 +6892 PF07064 RIC1 DUF1339; RIC1 Vella Briffa B, Wood V anon Pfam-B_11581 (release 10.0) Family RIC1 has been identified in yeast as a Golgi protein involved in retrograde transport to the cis-Golgi network. It forms a heterodimer with Rgp1 and functions as a guanyl-nucleotide exchange factor [1]. 20.90 20.90 24.10 22.50 18.40 16.50 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.61 0.70 -4.94 21 304 2009-01-15 18:05:59 2003-08-21 17:06:16 8 12 258 0 221 323 0 244.90 30 21.67 CHANGED sLc-uLWhasG.pthp.........sWhsh.s.......t......tp.....lsc.l.ls..l-hYPLulLhsculllGsps-hh............pppsssashhchphcoplaL..lLpthL...........hps...........hstpAhplspphppLsYFsHsLElLLHpVL--Esss..............s.psslLPsVluFLpp...aPpaL-lVVpCsRKTElppWphLFshlGsPp-LFccCLppppLcTAuuYLllL...................pshE.stus......spppsl+LlphAlcppcW-Lst-LsRFLtulDsststh.p ...............................................................................................................l.psLWh.tG.tthh................lWh.sh.............................t.t..p................h..t.l.ls....hphYPLulLhppu..l..llGspschh................................php...hth.....shhphphp.o....plaLpplLcphL....................tps...........hs.pAltlu...pp...h.....p.p..LsaF.sHsLElLL.H.p.VL--.Esssp..................................................................sh.cslL...splhphlpp................F.s...a............LclVVpCsRKTE....hthWp.h.LFs.hl.u.s.Pp...-LFE.......cCLp.tppLc.TAuuYLllL............................................pshE...ssss..............sp...ppus.pLlpt....A........hp.......p........sc........W-Lst-LhRFLtulspstp....p.................................... 1 73 119 183 +6893 PF07065 D123 D123 Vella Briffa B anon Pfam-B_10915 (release 10.0) Family This family contains a number of eukaryotic D123 proteins approximately 330 residues long. It has been shown that mutated variants of D123 exhibit temperature-dependent differences in their degradation rate [1]. D123 proteins are regulators of eIF2, the central regulator of translational initiation [2]. 19.30 19.30 19.30 19.30 18.70 18.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -12.13 0.70 -5.34 23 384 2009-01-15 18:05:59 2003-08-26 10:47:36 9 6 285 0 263 392 8 253.20 27 73.76 CHANGED WYsh...F+ph.Th+ohlI.PLPc..shlcYLtpDG..lhlspppss...........psssssphs-..pt.sDspstp..........ssscFPEhp..pplp-AIppLGGtVhPKLNWSAP+DApWIsssso...lpCpsssDlaLLLKuS-alsHDLsc..satt.s.D............cpsttt........saELVLRcWh.clpPuhEFRCFV......KspcLluloQRDh.sYY-aL..tc....c-plpstIppFa....c-pl...h.pF....sscsaVhDVYlsp.....s+lhlIDhNPauc.pTDsLLFoWsELpshtt..........ptpsp.-hRll..........sstsphts.ppasp.plPhDhl-sops ....................................................................................................................................................W......att..s.pshhl..ls......thhpYL.tDs..hhls...t.........................................pttt.p..p....t...tppttt.................................pas-h......p..tplppsI.pp...hG...Gt.......V....hPKLNWSuP+DAhWh....s.so.......................hpCpshs-laLLL+uSs.hls+DL..pp...satt.s.s...................................................t....h..............th.LlLRcah..sl..p..PuhEFRCFV................+pppLl.uloQR-h...p...aa....s.al....t...........ppplhptIpp...Fa.................pppl...........t..pa.......psalhDl.Yhsp......................t+lhllDhNPau...h....T..csLLFsWpE.l.t.t........................................................h.ph...........................................s............................................................................. 1 101 163 227 +6894 PF07066 DUF3882 Phage_Lacto_M3; Lactococcus phage M3 protein Moxon SJ anon Pfam-B_13997 (release 10.0) Family This family consists of several Lactococcus phage middle-3 (M3) proteins of around 160 residues in length. The function of this family is unknown. 22.10 22.10 22.20 22.30 21.80 22.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.10 0.71 -4.38 2 63 2012-10-03 01:22:09 2003-08-26 11:41:59 6 1 61 0 2 51 62 148.30 62 96.44 CHANGED MpKhLAIDFSTus.....TGYAFRp..ssphhVGSI.Ahs.pKsshERsh.IssuIp-lI-casLhsYalhIEpPIhshppKtsIoLhpsNG.FlGshcshhN.GYs.lsNSKWCuYHLIpGKpt.RK.pSlElLKuhslVssNshsDshADAasILhYsEp .............................MSKALAIDFSTSN.....TGYAFRNP.LTNE..YVV.GSI.A.GGKSKDPL.E.RAKlIADGITEl....IEHYNLFD...YF.....IYIE.E.PIITFKSKGNISLIRANGSF.LGV..M.RNR...HN.IGYVDls.NSKWCGYHL...IKG.KSthRKs...QS.IE.IL.KS.a.sIVP..cc.....cIN.DD.ADAFCILLYVES.................................................................. 0 1 2 2 +6895 PF07067 DUF1340 Protein of unknown function (DUF1340) Moxon SJ anon Pfam-B_14083 (release 10.0) Family This family consists of several hypothetical Streptococcus thermophilus bacteriophage proteins of around 235 residues in length. The function of this family is unknown. 25.00 25.00 263.00 262.80 24.40 23.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.42 0.70 -5.20 3 13 2009-01-15 18:05:59 2003-08-26 11:44:03 6 1 13 0 0 14 0 235.10 71 100.00 CHANGED MSGKYcYAGLTKELHQRLVlEFsALKEcHsRTFTKHIMETKQCNRppARKYFQRFDNVIKERSKLSPuTLDDMREYLT-GLVNDLQEYLSEHYSAsSsSCKPDTsKTNAGLTEELF+QYRcEIcsLRAAHPNsFAsYIMEVKGCSpQQA+TIpTAINTIYTEIGILTPRKVIQLEGLLSRELFGKIAKYVFNKYEWPESLDSEVDRIYLEYRTKGDLGLEKESVKRALYKAIYMGL .MK.+YEYAGLTKELHQRLTlEFDAL+EcH+R.TLTKYIMETKpCsRhpARKYFQRFDNVlKERSKLSPuTLDDMREYLTDGLsNDLQEYL.KpYosRsspC+PDsDKsNAGLscELFhQYRcEIppLRAsHPNshssYIM-VKGCopQpAsoIpTAINTlYTElGILTPRKVIQLEGLLSRELFGKIAKYVFNKYEWPESLDSEVDRIYLEYRTKG-LGp-KESVKRsLYKAIuMGL 0 0 0 0 +6896 PF07068 Gp23 Major capsid protein Gp23 Vella Briffa B anon Pfam-B_12245 (release 10.0) Family This family contains a number of major capsid Gp23 proteins approximately 500 residues long, from T4-like bacteriophages. 19.20 19.20 20.40 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 493 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.80 0.70 -5.97 4 1586 2009-01-15 18:05:59 2003-08-26 11:48:55 6 2 183 1 0 1357 2718 119.50 36 80.33 CHANGED Ll-KWKELLEGEG..LPEIAN.SKQAIIAKIFENQEKDFpsuPEYKD-KIAQAFGSFLTEAEIGGDHGYsApNIAAGQTSGAVTQIGPuVMGMVRRAIPNLIAFDICGVQPMNSPTGQVFALRAVYGKDPlAAsAKEAFHPMYuPDAMFSGQGAA.KtFsslsAostTssGDIYp.HFFpETGosY...LQAohtVTIsutAssuthhDAEIhKQMEAGsLVEIAEGMATSIAELQEGFNGSTDNPWNEMGFRIDKQVIEAKSRQLKAAYSIELAQDLRAVHGMDADAELSGILATEIMLEINREVVDWINYSAQVGKSGMTphsGSKAGVFDFQDPIDIRGARWAGESFKALLFQIDKEAVEIARQTGRGEGNFIIASRNVVNVLASVDTGISYAAQGLAoG.FsTDTTKSVFAGVLGGKYRVYIDQYAKQDYFTVGYKGuNEMDAGIYYAPYVALTPLRGSDPKNFQPVMGFKTRYGIGVNPFAESohQAPuuRIQSGMP ........................................................................................................................tt.........................................................................................................................................................................................................................................................................................................................................................................................................................................th.......s........s....s....s..s...s.a.....scMuFoI-KhoVsA+oRAL.KAEYohE..LAQDLKAlHGLDA.EsELuNILo................................................................................................................................................................................................................................................................................................................................................................................................ 2 0 0 0 +6897 PF07069 PRRSV_2b Porcine reproductive and respiratory syndrome virus 2b Moxon SJ anon Pfam-B_13261 (release 10.0) Family This family consists of several Porcine reproductive and respiratory syndrome virus (PRRSV) ORF2b proteins. The function of this family is unknown however it is known that large amounts of 2b protein are present in the virion and it is thought that this protein may be an integral component of the virion [1]. 25.00 25.00 113.80 113.70 22.30 22.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.60 0.72 -3.69 2 55 2009-01-15 18:05:59 2003-08-26 11:57:46 6 1 6 0 0 55 0 72.30 84 100.00 CHANGED MGSMQSLFDKIGQLFVDAFTEFLVSIVDIIIFLAILhGFTIAGWLVVFCI+LVCSAlLRsRPslHsEQLQKI. .MGuMQSLF-KIGQLFVDAFTEFLVSIVDIIIFLAILFGFTIAGWLVVFCIRLVCSAlLRuRsAlHsEQLQKIL. 0 0 0 0 +6898 PF07070 Spo0M SpoOM protein Moxon SJ anon Pfam-B_13263 (release 10.0) Family This family consists of several bacterial SpoOM proteins which are thought to control sporulation in Bacillus subtilis.Spo0M exerts certain negative effects on sporulation and its gene expression is controlled by sigmaH [1]. 20.40 20.40 20.50 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.36 0.70 -5.00 9 513 2012-10-02 22:29:00 2003-08-26 12:03:17 6 1 383 0 138 438 6 209.40 36 81.77 CHANGED FKKhLAShGIGuAKVDTlLppsslhPGtslpGpV+lhGGsssQ-I-pIpLcLss+Y.s.EscDsc...........sppshshspaplstuFsIpsGEc+shPFslslPapTPlTh.....t.chpVhlcTsLDIAtAlDssDhDhlsVcPpPhh-ulLpAl-pLGh+l+ps-sEps+h...GtpLPFVQEhEasPs.G.Y+t.hcElEllFhtssssL-llhElD++tpG.pGhLu- .......................hKKhLAShGlGuApVDTlLpp.....tshhsGpplpGpl+lpGGsssQ.pI-pIplpLssphhh..E.s..s..Dpc.....................ht.p.s.hsltphplst.s.FslpsuE.p+plPFphplPh-.TPlTh.......s.ttpValcTsL...DIstAl.DPsDcDhlpVpP.PhhpsllpAlp.p.LGF+h+p..s-sEpu.h....htpp.l.P.F.hQEhEFhPs.utatGthcElElshl...h.s.s.c.tlc.llhElDR+......sGhh..................... 0 31 86 118 +6899 PF07071 DUF1341 Protein of unknown function (DUF1341) Moxon SJ anon Pfam-B_14024 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 220 residues in length. The function of this family is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.50 0.70 -4.81 13 679 2012-10-03 05:58:16 2003-08-26 12:09:50 6 3 511 18 56 391 4 207.80 59 88.40 CHANGED Y+sRVCLNVLAuSl-NAc-IY-AAEGHVLlGVLSKNYsoV-sAlsDM+cYsphl-NAlSVGL.GAGDPpQStMVucIScplQPQHlNQVFTGVGsSRALL...GQs-TllNGLVSPTGpsGhVcISTGPlSSp.ptcuIVPlETAIAhL+DMGGoSlKFFPMGGLppc-EYpAVAc.ACAccuFaL.EPTGGIDL-NFEpIlpIAL-AGVc+VIPHlYSSIIDpt ..................................................................Y+sRVsLNVLAtshpNA+-IY-AAEGHlllGVLSKsYsoV-pAVs-MKcYtAtls.u.lSVG.L.GAGDPsQ.thVucIutphpPpHVNQVFTGsG...h..oRuhL....Gtp-T..hlNuLVSPTGpsGhVpISTGPhSSp..ussuhVslETAIthlpDMGusSlKaFPMsGLpph-EapAVAp.ACA+psFhL.EPTGGIDL-NFtpILp...IAL-AGVp+lIPHlYSSIIDp................. 1 9 21 39 +6900 PF07072 DUF1342 Protein of unknown function (DUF1342) Moxon SJ anon Pfam-B_14075 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 250 residues in length. Members of this family are often known as YacF after the Escherichia coli protein Swiss:P36680. The function of this family is unknown. 20.80 20.80 21.10 23.50 20.60 19.80 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.19 0.70 -4.68 63 930 2009-01-15 18:05:59 2003-08-26 12:13:27 6 1 924 2 168 466 123 206.70 50 83.59 CHANGED LRLEtLhpplpthhstsct..hppchshpsLF-ll-lh-RsDl+o-LlK-LE+QpppLpsapshPslDpptLppllpclcpstssLhsss.+hGptL+-scaLsulRQRhuIPGGsCsFDLPuhHhWLpps.cpRpp-lppWhssltPLtpulsllLcLlRpouphpp.hApsGhaQpsh..pu...pLLRlcls.s..shaPpISGpKhthuIR ..............LRlEaLlpQLshslshsDp..ssslpFFRsl.-LLDVhE.Ru-.lRoELLKEL-RQppcLp.sWhsVPGVDQspl-uLlppLcssuusLhoAP.RlG.QhLREDRlluhVRQRLSIPGGCCSFDLPoLHhWLH.L.P.psQRcsplpsWluoLsPLspALohlLcLIRpSusFR.+Q.suhNGFYQcNus.-A...cLLRLpLsL..-..pplYPpISGHKsRFAIR...................... 0 29 82 126 +6901 PF07073 ROF Modulator of Rho-dependent transcription termination (ROF) Moxon SJ anon Pfam-B_13280 (release 10.0) Family This family consists of several bacterial modulator of Rho-dependent transcription termination (ROF) proteins. ROF binds transcription termination factor Rho and inhibits Rho-dependent termination in vivo [1]. 20.80 20.80 20.90 22.10 19.50 20.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.46 0.72 -3.97 31 733 2009-09-11 04:47:18 2003-08-26 12:43:39 7 1 727 1 80 248 6 74.20 59 89.79 CHANGED .IsCcpYDYlElACh++h.lpLpL+sG-plpGpAh-htppt.....cKpEaLh.....................lcpp.uspptlcLDpIsuhsshp.sPcFuplhl ..PINCDDYDsLELAC.HHLhLTLpLKD...G.EpLQAK...AsDLlpR......KNVEYLl.....................lEs.u.....GEopELRLDKIsSFS....HPEIGTVVV....................... 1 9 26 52 +6902 PF07074 TRAP-gamma Translocon-associated protein, gamma subunit (TRAP-gamma) Moxon SJ anon Pfam-B_13437 (release 10.0) Family This family consists of several eukaryotic translocon-associated protein, gamma subunit (TRAP-gamma) sequences. The translocation site (translocon), at which nascent polypeptides pass through the endoplasmic reticulum membrane, contains a component previously called 'signal sequence receptor' that is now renamed as 'translocon-associated protein' (TRAP). The TRAP complex is comprised of four membrane proteins alpha, beta, gamma and delta which are present in a stoichiometric relation, and are genuine neighbours in intact microsomes. The gamma subunit is predicted to span the membrane four times [1]. 21.00 21.00 21.40 21.80 20.80 20.60 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.04 0.71 -4.73 9 147 2009-01-15 18:05:59 2003-08-26 12:49:58 7 3 109 0 80 132 1 154.60 62 87.79 CHANGED EEELLLQDFSRNVSTKSSALFYGNAFIVSAIPIWLFWRlHpM-lhsShllFslhThsSTaLlAhAYKNsKF.LKHKIAh+RE-AVoREVspcL..uDDKKhoRKEKDERILWKKNEVADYEATTFSIFYNNALFLslllhuSFalL+ohsPohNYllShusAuGLlALLSTG ...................EE-LLLQDFSRNlSsK..SoALFa.GNAhIV.SAlPIWLaWRIapMDLh...p..S.ullaslhTLlSTYLlAFAYKNlKFlLKHKlAtKREDAVo+EVoRKL...ADs+K.MSR.KEKDE.............R...............ILWKKNEVADYEATTFSIFYNNsLFLslVIluSFFlLKNFsPsVNYIlSlusuSGllALLSTu................ 0 30 36 60 +6903 PF07075 DUF1343 Protein of unknown function (DUF1343) Moxon SJ anon Pfam-B_13635 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 400 residues in length. The function of this family is unknown. 25.00 25.00 25.10 25.00 19.30 19.20 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.30 0.70 -5.61 109 613 2009-01-15 18:05:59 2003-08-26 12:52:12 6 11 514 0 245 629 524 355.00 36 82.47 CHANGED VGLlsNpouls................t............shpp.........................slDhLhpt.sl...plptlFuPEHGh+Gs...spAGt.plssshDscTGlPlhSLY...................Gps...+.......................+PosphLc..s......lDlllFDIQDVGsRaYTYIsThthsMEAs.....................................uctshphlVLDRPNPhsGthl-GPlL-.ta............cSFVGhaP.lPhtHGhTlGELAphhsschhl....................................s....scLpVlshcsWpR.........shhasp.ssh.al.PSPNhPssposhlYPuhslhEG.T...........slStGRGTspPFp....lhGAPa..lc.s......tlhtthpt.t...l.GshapshtFpPp......hp+..apGchCtGlplp.l......s.pthcshpsuhhhlphlpc.hY............spphth.t..........th.......h.h-hLsGsspl+ptI.........ps.Gtshcc....ItppappclppFpphRpp.YLL..Y ...................lGLloN.Tuls.................p....p.h.pp.......................hl.DhL.hp....p......sl.....plsslFuPEHGhRGs..............spAGt...plss.thDs...p..TGlPlhSLY......................Gcs...+...........................+PsschLp..s........lDlllFDIQDVGsRaYTYIsThthsMEAs.....................................Ac..tsh.....p......hlVLDRPNP.sG..t..h..l-GPl.L.-..pa............pSFVGhhs.lPhhHGhTlGELAphhNsE.hhl..............................................................s....scLpVlshp.........sWpR.............sh.hast.tth..a....l.PSPNlPs.poshlYPuhsl.hEG..T...........sl.S.GRGTshPFp....llGAPh..lc...s.............thhtth.pt.t............lsGhtF.ps.htFpPt.........hs+....apuphCtGlplp.l...........s.pthp.s.h.c.hsh.h.hlphltc...hY..............spphtht....................th........thhDhLsGssplRptl.........pt..Gtsh..pc....lpptWppslppFpp.hRppYLLY...................................................................... 0 119 188 224 +6904 PF07076 DUF1344 Protein of unknown function (DUF1344) Moxon SJ anon Pfam-B_13761 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of around 80 residues in length. Members of this family are found in Rhizobium, Agrobacterium and Brucella species. The function of this family is unknown. 23.10 23.10 23.10 24.90 23.00 23.00 hmmbuild --amino -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.68 0.72 -4.40 9 104 2009-01-15 18:05:59 2003-08-26 12:55:15 6 1 91 0 25 57 3 61.00 56 67.66 CHANGED .As-sEGpIpplDpcuholTLDDGKTYpLPtEhch-uLcsGhKVlVhYspss.GcphlsDlp ....hApDsEGpITcIsKDocTITLDDGcTYKLPuEFD...luAlsPGMKVLIhYDlVD.tpRhITDIQ.................. 0 1 11 14 +6905 PF07077 DUF1345 Protein of unknown function (DUF1345) Moxon SJ anon Pfam-B_13768 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 230 residues in length. The function of this family is unknown. 24.00 24.00 24.10 24.20 23.80 23.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.08 0.71 -4.76 56 475 2009-01-15 18:05:59 2003-08-26 12:57:27 6 1 441 0 171 432 34 171.60 32 81.43 CHANGED hshshs..hstplLlGWsssshlYLslshhhhhp.tsspclRcpApppDcstsllhhlsslus.......huSlsAIshtLssu+p.t...spshchslshsolhhoWhhlpshFulHYA+hYY...........ttstptsGLpFP...s............pppPcYaDFlYFSFsIGhTsQsSDVslso+phRRlsLhHullSFhFNoslLAl ..............................................................................hh....t..hthlluWsshhhlYLlhhhhhhhp.h.ss.ppl+phAtppDpst...hhlhhlshhAs.......lsSlsulhh.Ls...ssp..p.......sphhphslshh...olhhu.WhhltshFulHYA+hYY...........ttstttsuLpFP....t..................ptpPsYhDFlYFSasluh...ssQToDVslsopphR+ssLhpullSFhFNssllAh................... 0 40 93 132 +6906 PF07078 FYTT DUF1346; Forty-two-three protein Moxon SJ anon Pfam-B_13991 (release 10.0) Family This family consists of several mammalian proteins of around 320 residues in length called 40-2-3 proteins. The function of this family is unknown. 19.50 19.50 20.00 20.50 18.50 19.40 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.09 0.70 -5.12 2 70 2009-09-14 11:59:21 2003-08-26 13:00:14 6 4 40 0 32 87 0 230.00 53 92.94 CHANGED RFuTRLhGATATPPP.PPKARSNENLDKIDMSLDDIIKLNRKEGKKQNFPRLNRRLQQSusRQFRMRVRWGIQQNSGFGKsSLSRRGRVhPGKRRPYGVITGLAARKATGIRKGISPMNRPPLSDKNIERYFPALKRKssLLRQNEVQRK.VAsLKRPNQLNRKNNIPsNFTRsGNKLSHQKDTRQATFLFRRGLKVQsQLNoEQLlDDVVAKRTRQWRTSTTNGGILTVSIDNPGAVQCPVTQKPRLTRTAVPSFLTKR-QSDlKKVPKGVPLQFDINSVGKQTGMTLNERFGILKEQRAsLTFsKGGSRFVTVG ...................................................................thshoLDDIIKLN+KE...t..p......+.........p.phsp..hp..R+.h.ppss....sp.......php...h.R.s.RW.GlQQ.pu.G.h..G+..stl.s..R....Rs+h..hP...GKRRs.GVITGLAARK.ssul+KGlSPhNRsPLSc.K.....shp..p.h....PhlpR...pss..R...ps-hQR...+............hs.lp+stthpR.p...............h....R.s........h..ptQ+-sR..QApFLF+R.G........LK.VQsQlp.tphhs...ss.pR...TRQW..RoSsssuGILTVSIDNPsAhp.....p..P.s.+s.RL...sRsshPsFlhK+-ps-.K.KlPKGVPLQFDI..NSVGK.QTuMTLNERFtILK-QRsshs..s+.GSRFVTVG................................ 0 1 3 14 +6907 PF07079 DUF1347 Protein of unknown function (DUF1347) Moxon SJ anon Pfam-B_14317 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 610 residues in length. Members of this family are highly conserved and seem to be specific to Chlamydia species. The function of this family is unknown. 24.00 24.00 24.30 75.50 23.30 23.90 hmmbuild -o /dev/null HMM SEED 550 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -12.73 0.70 -6.30 6 40 2009-09-14 11:58:54 2003-08-26 13:26:39 6 2 37 0 7 25 0 547.60 57 85.61 CHANGED spQQ+.tLLCFQGFlLQKQ+phpQSEcIFSKIYcEhpsu.FlhKEElLGGRILNAFFL-NI-hM-hllspLcQpsupSsYLsLFcsLlsYKQKpacpAlpsLShWpsplcpocssLLDlNIppLhSDFlL-sIpAHSLIEhGcFuEGRsILNRIIcKlLKREssWsu-sYDphVLMLSRSYFLELppSppscIYPDYYEMILFYpKKl+ulDQpuYccFlPp-ELhShlM-HlFVVPc-+LsPLMQllchWE+aYhsPNYsLVlcsLlc+Fhosspplt+lCpuIss.c.IEpLKc+LI-sFuclLStpVpplpTscApQsLuLLKILDsslShSEKLllSscsLpcIlupDDtpaTpL+cYLsLWEpIQSYDlDRQQLVHYLhhuAKpLW+pGssD-KALNLL+LILpFTsYDIECENlVhLFlKQsY+QsLStHuhsRLLKLEDFIo-sGLTsIsluEtEIANFLADAEaLaupG-Y+KCYlYShWLTKlAPSs.sYRLLGLCLhENKpYpEAW-hLppLP.NcchaDSKVQKALlLCQKHlsKDhtso ........pQpphYlLCsQGFoLQhQ+KFQESEcIFSRIaccpsSuPFlLpcELLpGRILNAYFLNNLsLMscplsELE+hsG.spsaLhhFKALaAY+sKpYchAl-sLSpWhu+VcpTcshhLDTNl.cLFSsalLEcIuA-SLIts+RauEGRlllNplhNKlFsREasWssDhYNRlVLMLGpSYLLELpEuspuDLhP-YYEhILFYpKph+uhDttAYcpFhPEs.LVsTIMQHlFVlPEspLPhaMphLhMW.ENpYVHPcYSLVlEphcstllp.Du.csp+ICpAIAcS.K.Ic+LKE+LI-hFu-.LshsVpQspTlpAcQYLALLKhLDPcsS.hu+KL..LLSpKplhNhVCpDDuQYo+LKDYLhLWEEt-.tDVDRQQLVHYLhauAK+LW+tGQ.-EtsLcLLKtILhFopp-h..tC.NpshchVKahYsQALuh+shT+LlhlENFl-EVGLPphlsS-AE.IANpLADApYLFu+GDY+hChlYSSWLs+VAP.SscAhpLLGLsLhEpK-YsEALEshpcLP..sE-hasSpVpKA.lLC.Kalu+ppcpp............... 0 1 2 6 +6908 PF07080 DUF1348 Protein of unknown function (DUF1348) Moxon SJ anon Pfam-B_14137 (release 10.0) Family This family consists of several highly conserved hypothetical proteins of around 150 residues in length. The function of this family is unknown. 21.10 21.10 21.70 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.74 0.71 -4.39 7 617 2012-10-03 02:27:23 2003-08-26 13:50:36 6 5 563 4 278 595 44 138.20 62 88.57 CHANGED RPPLPPFTtETAhpKVRhAEDuWNSRDPs+VuLAYTpDShWRNRuEFhpGR-tI.tFLoRKWp+Eh-YRLIKELWAFsGNRIAVRFtYEW+DDuGpWaRSYGNENWEFDEpGLMp+RcASINDlPIuEu-R+a+W.sLGcRPsD ................................RPPlPPF.T.t.EoAhpKVRhAE.DuWNoRDPp+VuLAYTsDSt.W.R....NRuEF.lsGRpcIhsFLpRKWp+El-YRLIKELWAFs..sNR.............IAVRFAYE.W....H......D...D.u.G....p.WFRSYGNENWEFs.c.s....GLMp+RaASINDlPIpEu-RhF+W...P.Gt.RPDD........................... 0 67 157 220 +6909 PF07081 DUF1349 Protein of unknown function (DUF1349) Moxon SJ anon Pfam-B_14150 (release 10.0) Family This family consists of several hypothetical bacterial proteins but contains one sequence (Swiss:P40893) from Saccharomyces cerevisiae. Members of this family are typically around 200 residues in length. The function of this family is unknown. 28.00 28.00 28.80 28.40 27.90 27.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.08 0.71 -4.93 37 701 2009-01-15 18:05:59 2003-08-26 13:54:08 6 18 553 4 274 639 31 174.40 25 72.45 CHANGED pWhNcPtpaphss.ctlplsTssp.TDFWppTaYGF..ppcsuphlhhpspsc.F.ohpl+lps.sapshaDQuGLhlhlD-c......sWlKsulEa.sDGhspluoVVTp.shSDWuss.h...ssscphahRloRpssshplph.ShDGppaphlRlsh.a.................sspshplGhhuCSPp.psGhpspFs-hplsss..psh ...........................................hh.p.P.t.hphp......p...ptlplps.tsp.TDhWpc.....o..h..Y.u..F.........ptp.s......u.........h..h......h.....hp.h.....t..........s...........s..F..phplplph...sh..p...p..........haD..QuGLhlhh...sp.p...............................pWlKs..ulEa.....s-uhs...p.lu..oV.l.Ts.....s...hSD......Wu.sssh.................sss..p.....p..h..h.h...RlsR....p....t..s..s.......h....tl...h...u.h...-.............u...........p.p.ap..h...R.lsh..ht.........................tsp..shp...lGhhsss.P.......p......ppG......hp...sp..Fpphplp.......h......................................................................................... 0 73 157 225 +6910 PF07082 DUF1350 Protein of unknown function (DUF1350) Moxon SJ anon Pfam-B_14167 (release 10.0) Family This family consists of several hypothetical proteins from both cyanobacteria and plants. Members of this family are typically around 250 residues in length. The function of this family is unknown but the species distribution indicates that the family may be involved in photosynthesis. 21.70 21.70 21.80 24.50 20.40 21.60 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.63 0.70 -5.31 3 176 2012-10-03 11:45:05 2003-08-26 13:57:17 6 3 104 0 94 183 162 222.70 31 76.11 CHANGED EW+EIRGNWVLVPppPlGlIHFLGGAFVATAPpLTYRWLLEcLGcAGYVVIATPFVNTFDHtAIApSVLN+FEhsLERLp+pGulssuhLPlYGLGHSMGCKLHLLIGSLY-VERAGNILlAFNNYPAKQAIPa........lDpFsTul.............ulEFTPSPpETN+LIQEsYsVRRNLLIKFsNDDIDQTAuLRsILps+FuDMVTApsLPGNHLTPLGQDlKWQTGuEFSPLDALGQWlKQSLapDLspLp+slLcWLNP .................................tts.ahhhP.......p....Ph...ullcFlGGuFlus.s.PploY+hhLEpLu.p.p.G.a.hllAsPa.ss..s.....FDH.thAppl...ht.p....Fc.p.......s...h.p..tL.....p........p.........................h................t........L...P......h......a.......u............lGHShGshlHLL..l..........s.s...........h...h....ss.....p......+...s.......us..lhhuFNN..hssppulPh...........................hp..ph..s........h......................................................................t...EFsPoPpE.....T.pl.lpp...Y...t...hpps..LLl..+FpsDpl.D.po..L..ph.Lp..t.......c......s......s..h.......h..p......hh.Ls.GsHhtP........htt..................................................................................................................................................................................................................................... 0 29 65 86 +6911 PF07083 DUF1351 Protein of unknown function (DUF1351) Moxon SJ anon Pfam-B_14178 (release 10.0) Family This family consists of several bacterial and phage proteins of around 230 residues in length. The function of this family is unknown. 21.60 21.60 21.60 22.50 21.30 21.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.18 0.70 -4.71 14 177 2009-09-10 23:43:51 2003-08-26 14:05:05 6 2 167 0 23 155 3 211.50 23 68.34 CHANGED p-lp..lphpPApIp.hshEphcpplsplspcY.pshlVTs-shppcKpphAcLpKltKslsccRhclc+phspPhs-F-thhK.plhp.lcsslspIspslK-h--+p+p.+hcpl+thlschsschtl-tp.hcph.....hpssahssshshKK.lhcplcphlptchpchpphpsscpsIppt.........stp.....slssssYlchLcp.psls-lhsphcsD....h-hp+pp ....................php..lphpsutIp..sh-plcptlsphlscY..psh..h...sT...s..-slp-sKp...sRAcLN....Kltptl-spRKclK+phscPhc-F-pplK.c.hh...t..lc.pshspIspslKpaE-pp+ptRhcplpthlschstpht..lsh.p.hc.h.........hpspahsc.....sh......shKK..hhcc....lss...hh....tt...hpchpphcpspphlpph.............stt.............hshss.sal.phlcp...shs-lht.hcps....h.h................................................................... 0 7 16 21 +6912 PF07084 Spot_14 Thyroid hormone-inducible hepatic protein Spot 14 Moxon SJ anon Pfam-B_14186 (release 10.0) Family This family consists of several thyroid hormone-inducible hepatic protein (Spot 14 or S14) sequences. Mainly expressed in tissues that synthesise triglycerides, the mRNA coding for Spot 14 has been shown to be increased in rat liver by insulin, dietary carbohydrates, glucose in hepatocyte culture medium, as well as thyroid hormone. In contrast, dietary fats and polyunsaturated fatty acids, have been shown to decrease the amount of Spot 14 mRNA, while an elevated level of cAMP acts as a dominant negative factor. In addition, liver-specific factors or chromatin organisation of the gene have been shown to contribute to the regulation of its expression [1]. Spot 14 protein is thought to be required for induction of hepatic lipogenesis [2]. 19.80 19.80 25.40 21.00 18.80 18.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.01 0.71 -4.36 10 191 2009-01-15 18:05:59 2003-08-26 14:17:21 7 4 83 1 106 186 0 134.70 30 90.42 CHANGED MQls.DohspKpSLhNAMNRFluAVNNMDQTVMVPSLLRDVP....Lsppstc...............phpssss......ts.h.stpsDMYsaYlLLKSIRNDlEWGlL.+p......u..Esst+cc...ssssutsscEs...s-tDLEpQFHYHLpGLHoVLSKLT+KAspLTNRYKcEIGhushup .........................................p....KpslhssMpRahusVpsM-pTlMlPSLL.RDl.l.tptt.p......................................................t.ttDhYphY.hLKu.I+s-l-aG..............lh....pt.......................t...p.t.t..ppp..................p...s...tt.pt...ttp.-L....E...tpF+hHlpGLaplLscLTppAp.LTp+Ypp.hG......t............................................... 0 22 32 69 +6913 PF07085 DRTGG DRTGG domain Bateman A anon Bateman A Domain This presumed domain is about 120 amino acids in length. It is found associated with CBS domains Pfam:PF00571, as well as the CbiA domain Pfam:PF01656. The function of this domain is unknown. It is named the DRTGG domain after some of the most conserved residues. This domain may be very distantly related to a pair of CBS domains. There are no significant sequence similarities, but its length and association with CBS domains supports this idea (Bateman A, pers. obs.). 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.00 0.72 -4.43 171 2967 2012-10-03 03:17:47 2003-08-26 14:50:23 7 34 2600 6 655 1966 228 107.60 30 20.44 CHANGED -lsctLsucllsusptttcp.lpchhluAMshpshlphlct.....sslllssGDR..sDl.luAltss.......huulllT.Guhpsssplhclscptt....lPllssshDTassuphlsph ................................................................-lschlsupllsss.-th...p+c..lpphsl.u..Ahs...l.p...s...h..lc..a...hcs.............GsLlls.uDR....s-.l.hlsAhhss..............huulLlT....G...G...ac....s....s...sclhcLscctt..............lPllpss...hsTapsuthlpp.................................. 0 237 457 587 +6914 PF07086 DUF1352 Protein of unknown function (DUF1352) Moxon SJ anon Pfam-B_14369 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 190 residues in length. The function of this family is unknown. 22.20 22.20 23.00 22.70 22.10 22.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.35 0.71 -4.66 10 145 2009-01-15 18:05:59 2003-08-26 14:54:03 7 3 112 0 88 133 0 171.20 36 94.47 CHANGED MASRuGPRAuGTDGSDFpHRERVAuHYQMS.....sshKSElKKLIhlHhLIWlLlsApluVupL........tLlS+cpVuhPYQWEYPYLLSllPSllGLhuhP+NpISYLVlSMIStGLFuluPLlYGshthFPtupcLa+HGKAhchhhshoAls.............lMYllhVlAlQVHuWQlYYSKKLLDuWhssTpcKK++ ...........MASRu.G.sRsuGTDGSDFpaRp+VAs+YQhS..........sphKuclKhhl.....hhHhllahlhhA+.lssshL................hh.lpc.pls.Ph.W.E...Y..s.ahl..SllsohlGlhuh.RNplshLhh.h.l.uhhlhul...hPllau.h...hhsts.phhppscu.ch.hs.shs..............lhYhhhhl...uhQlHsa.plY.auhpLlpuWhststtc....................................................................... 0 29 41 64 +6915 PF07087 DUF1353 Protein of unknown function (DUF1353) Moxon SJ anon Pfam-B_14433 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 100 residues in length. The function of this family is unknown. 22.50 22.50 22.50 22.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.30 0.72 -4.52 35 353 2009-01-15 18:05:59 2003-08-26 14:55:52 6 3 263 0 55 251 186 91.10 36 62.82 CHANGED pacLhps..........hhhpssu.....hhhhVPtG..ap.TDhASIP+hhhslh.sPa.ucYhpAAllHDaLhspt........................ths+cpADplFhcuhtshG..Vsth+thhhahAV ..........................................aclhps..........Yh.p.ps....hl.VPtG.....Fh.TDhAS....lPRl......F.W......s.lh..PPh....GcYhpAullHDaLhcps..........................hps++pADhlFh-uMphhG..Vs+aKthlhYhAV..................................... 0 14 33 45 +6916 PF07088 GvpD GvpD gas vesicle protein Moxon SJ anon Pfam-B_14302 (release 10.0) Family This family consists of several archaeal GvpD gas vesicle proteins. GvpD is thought to be involved in the regulation of gas vesicle formation [1,2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 484 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.68 0.70 -6.18 5 21 2012-10-05 12:31:09 2003-08-26 15:10:38 6 1 15 0 11 437 87 367.10 29 79.27 CHANGED lcRFFsG-sG+TLLINGAPGTGKTLFTIRGLDVLcR-uDVLYVSTRVDQ-TVaEMY......FcsHuSLD..KTulL.............DLhQDPFtLPhDVDVPFEpLsL-SLLEWVDtIsAsuc+LTIAFDSWcLlYEYLAsRHDsPPD..IcTVTNQLVsLARsuGlRLlLVoETAssSsLEYIVDGVVTL..pVK-D-RGRTRRsLRLEKLRGVRIGNRLQP...FTLADGQFpuITPVELlTscTssspuTWEPts.NssA+FSTGIGDLD..+ILSGGaNR...GSVVHLDLGsDLSRDAWSVLsLPAIRNFLupEMGVAVVPP+EGSPGLLHNDL...NsVLo+uVFDTaCHVFETYAGPo+ut........stpaLsshhTso.SDAlsP.......................stlssp-........asoPlEGGpLcYDPYhEhlEplRcQS-GPLLHVISMDTAapAFETRLGDFANYVA.....LHNDusILITKsGTtLRTRADRVADMHFRLEp.SG-AIsLYGENPLTPLLGIGlccSpsIPKIpLTEMV ..............................................................ppFFptcsGpoLLI.pGtPGTGKThF..o..lc.h.L...s...s...l.p..c..c..t.s..s.h.Yl.S.....T.R.V.s.p.-.sl.a.cha...............h...ct.t..ls............ht..ll...........................c.h.p..p....D.....h..t.....h.........h...t....h...s....l.....Ph.............h...s......l....-.s....l...h...p.a.l....c.p...l.s.s....t...s..c.p...s.....hl.s.lDS...W.....h..lh-h.L.u.s...c.a.ssspc.......l...cslp..s...pLs...........h.....h...c.....c.t.s.....s.+..L...l...l....V......h....E....s....s...c......p.......p.....s........L.........-.....Y.ls.DGVVoL...........phc......ps...p..G.R.hh...R.LpL-KLRGlpIpppha....FTL.t..s.G..pFp..s..hss.s.p.h.h.spp....h.hcPhs...ss.pscaSTGhtDhD..pIhsGGhph...GShlhL-hspsls.pha.hls.h.shtNhlp.thtshll...................................................................................................................................................................................................................................................................................................................................................................................................................... 0 6 9 10 +6918 PF07090 DUF1355 Protein of unknown function (DUF1355) Moxon SJ anon Pfam-B_14563 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 250 residues in length. The function of this family is unknown. THe structure of this domain was solved by the Midwest Center for Structural Genomics (MCSG). The structure has been classified as part of the Class-I Glutamine amidotransferase superfamily. 24.40 24.40 24.50 25.00 24.20 24.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.27 0.71 -4.90 13 830 2012-10-03 00:28:14 2003-08-26 15:19:49 6 10 622 13 194 554 141 168.00 35 34.26 CHANGED +VLllG.ESWstphpHhKGFDpFsol+acpGAchLLpslcsushclcaMPAHp.utpsFPhsh...EpLssYDsllLSDIGuNThLL.stsahc.p.sPstLcLl+-YVspGGuLlMlGGYhSFpGIpu+Apa+pTPlt-VLPVs..........sLshDDR.......VEhPpGhtspshu..cHPlspGLus......-W...P.lLGaN...cl ............................................................+VLhlu.tp.H.t.hp.+.s..h.............K.-.tu.ssh.lh.slhp.s.s...c..D..hhPscp...s.hsFPpsh.....-pL..spaDsIllscluusshL...................sstLc.hI.t.cY.V.p.p.G.Gu.LLMlGG..SF......upusappTs.Lu.-VLPVp.............h.s.ss..cp............lE...pP.....tsp..hps.h..u...cHPlspshs........................ca...s.hhth........................................................................ 0 79 129 155 +6919 PF07091 FmrO Ribosomal RNA methyltransferase (FmrO) Moxon SJ anon Pfam-B_14605 (release 10.0) Family This family consists of several bacterial ribosomal RNA methyltransferase (aminoglycoside-resistance methyltransferase) proteins [1,2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.60 0.70 -5.39 14 272 2012-10-10 17:06:42 2003-08-26 15:26:17 6 1 225 6 8 986 235 174.10 50 65.63 CHANGED uutKYRslsPssVcRlspcth.spptshtpshKtsKp+LHplhGAal..tts.htthL+pltp.....sssssD.puhpsh.t............t....hhuhHuST+ERLPh..LccFYsplFutlss.PsoVhDlACGLNPLAlPWhshssss.sYhu.DIDpshh-hlsshLshlssttpsplp..Dllss.ssssssDlsLlLKslPsLEpQctGuuhcLlctlsu.hlVVSFPT+oLGGRs+GMtpsYstthEuhsstcsap.hpchphusE.Llal ...............................................................................pphsshcp.cccuphh..........................................................KuLQhl...oApCVKQVEVIRA..R....................R....LLcGpAST...o.....GY.F-N........IEH..C.I....DE....E..FG..p...s..o..l...N..D...K...LLLlGS..G....A..Y..PMTLIQV..A.K.E.T..GA..SV....IGIDIDsQA.V.D.LGR..R..I.....VN.V...L...A........P...N...E.......D..I.sIo........DQc.V.u.....p..L..K.D..I.pD.V.T...a........................................................................................................llhppshsh......................................................... 0 6 7 8 +6920 PF07092 DUF1356 Protein of unknown function (DUF1356) Moxon SJ anon Pfam-B_14617 (release 10.0) Family This family consists of several hypothetical mammalian proteins of around 250 residues in length. The function of this family is unknown. 22.00 22.00 22.00 22.00 21.80 21.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.18 8 192 2009-09-13 19:50:27 2003-08-26 15:34:03 7 5 60 0 95 173 0 197.30 43 86.84 CHANGED pED..............ps-chLsccspptsIuQFPYVEFTGRDSlTCPTCQGTGcIPptQ.NpLVALIPYSDQRL+PpRTKLYVhLSVlLCLLsSGLVlFFLFPRSVhVsssGlKSVpVsFscpsphVhLslTuoLNIoNsNFYsVpVsslouQVQahKsVIGptphoNlohItPLup+QlsaTVpsclusp.oYhYhaCTlsoIKVHNIVlaMQsoVphSYhuHspQsolEoYcYVDCGuNoT ..............................t...................t.t..spttppt.huphPh.chsG.p.sul...TCPTCQGoGcIPp..tpcspLVALIPauDQRL+PpRTKLYV.hhu....VhlCLLhuuLs..l......FFLFPRSl.Vp.s..G..lpss..hVs..as...........tpp....l.hLslTshLNIoNsNaYsltVps...lospV......a......p.sVlGphph.ssh...Is.Ph.s.p..h.asl.s..h...ttt.....o..ha...hCTh.pIpVHplllhhp..solph.oYh.uH.pQ.s.ppapYVDCttNso......................................................................... 0 19 29 50 +6921 PF07093 SGT1 SGT1 protein Moxon SJ anon Pfam-B_14698 (release 10.0) Family This family consists of several eukaryotic SGT1 proteins. Human SGT1 or hSGT1 is known to suppress GCR2 and is highly expressed in the muscle and heart. The function of this family is unknown although it has been speculated that SGT1 may be functionally analogous to the Gcr2p protein of Saccharomyces cerevisiae which is known to be a regulatory factor of glycolytic gene expression [1]. 27.00 27.00 28.50 27.10 26.80 26.80 hmmbuild -o /dev/null --hand HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.94 0.70 -6.13 33 327 2009-01-15 18:05:59 2003-08-26 15:46:34 6 13 251 0 233 314 3 469.20 23 84.08 CHANGED sVcYpla............t.tsspsphpplpphhtclhscls.hh.....ps.YIWQ.+-sFpLphpptp....................sssaltGpTpa.GDsl-D.EWaIVaLLhclo+pa..sslhs+VhD.sDGEFLLIEAAchLPcWLsP-supNR.VaIppGcLpIIs............t.spt.spslolppAlphlp.spsp.thhtSsplpsslppR.lcsY..Pcp.lppsl...HcAplhlPtplAtlL+p..cPpLlusAVpAFYhR..DPl.sl+.....sspshphF.....................Pcsh.VpsoVpFT+s.L..YAQLhpQpFs.Psph........................hthPssscs..tactt-.......LGhKL............................usGFEhLhspsctptpssp...........ss.thpshLcsLpcsGhhp.s.l.sspchpphhp.s............................................................................t...t...spDDDsWLs.ls.--L-p.Lpp....+ttpppthp.spppp.....s.....................................sLpclspphpsFlsc.tusa-Gs......-h.................................................................................................................................................................................................spsspcpslsh........D....t-pFhc.hhcchLshsss-.ss..s..............................sshtc.csp-s-sps--.............t.p............stpplpphhppM-pELttoshhp.....................stttpt.pstsp............................tptsstsp.t.....pslDlD ..........................................................................t.lpp.h.phht....h.............tYlWp.ppshp..lphh.tp........................................h...hlh..G.hhpa.GD.slcD.EWhlValLhplocpa....splhh.+...l...D.sDGE..FLLIEA..Aph.LP..pWl.p..P.-.s..........s.pNR.V.alppGpLtlls................................................sps..sl.pAlph..lt..spst..thht.........sttlptth.tR.lpt..a.....Ppp...hpp.sh.........HpshshlP.tt.l.u.tlLcp..pPphlu.AlpuF.h.R..Dsh..sh+......................tspt.h..hF........................................................s.p.ph.lhsslphT+s.h..YApLh..pp..pa....ss.....................................hs..tp......thtthp.................lGhKl...........................................................spGhEhlhp...p.t.pptt...........................................s..ht.hhppL.pps..t....l.sst.ph.phhp....................................................................................................pDs-pWhp.hs.pph.-p.Lpt.............t.tt.t..t.tt..t....................................................thtphspphptFlpp....us...hcGs.......c..h...........................................................................................................................................................................................................p.p.pt.hph...................................-.......ppF.p..hhcph.hs.......tt....tt...........................................................................t..t....p.tt............................................................................................................p.tthpph.tth-tELtttth.p........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 80 131 195 +6922 PF07094 DUF1357 Protein of unknown function (DUF1357) Moxon SJ anon Pfam-B_14833 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 225 residues in length. Members of this family appear to be specific Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 21.90 21.90 22.20 40.60 21.70 21.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.43 0.70 -4.90 3 130 2010-01-12 09:55:36 2003-08-26 16:05:37 6 3 27 0 10 107 0 192.30 66 94.63 CHANGED MpsKE-pEDSpclsSpsspV..psDsllISApEFEEY++aK-psNscSKtoH+DLSINERlo+ELAEVpEREulpcpLLhEApRINEIDTLAscaLSNHFNKEsLLAKGYSLKEIlpAQuRELIRKYVssEQIKAIAKVssl-HIDGchLEQLlcLAKVNIKQRKNAEsNa+olsplRsNh.VKhcE+sSLpNSNFhPIN+TELscAM.Npacph+IQFYpNpK+ ................hhEcEEKEDLpsQsK--pQl..KuDTKVISspEFEEYh+hKEQu.N......s.K.....s.K.EosRDLSINERITKELAEVEERERlEKQLLLEAERINEIDTLAKAHLSsHFNKEsLLAKGYTLKD.IMQAQRRELVRKFVPIEQIKAIAKsSDISHIDGEILEQLVSLAKVNIKLRKNAsSsSSSVDuIKGNIhhKSEERsSLLDSNFVPINFTEFVQAISNTYKQRRhQFYENLKR........... 0 5 5 5 +6923 PF07095 IgaA Intracellular growth attenuator protein IgaA Moxon SJ anon Pfam-B_14923 (release 10.0) Family This family consists of several bacterial intracellular growth attenuator (IgaA) proteins. IgaA is involved in negative control of bacterial proliferation within fibroblasts. IgaA is homologous to the E. coli YrfF and P. mirabilis UmoB proteins. Whereas the biological function of YrfF is currently unknown, UmoB has been shown elsewhere to act as a positive regulator of FlhDC, the master regulator of flagella and swarming. FlhDC has been shown to repress cell division during P. mirabilis swarming, suggesting that UmoB could repress cell division via FlhDC. This biological function, if maintained in S. enterica, could sustain a putative negative control of cell division and growth exerted by IgaA in intracellular bacteria [1]. 25.00 25.00 26.10 31.00 23.70 21.20 hmmbuild -o /dev/null HMM SEED 706 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.52 0.70 -13.02 0.70 -6.78 10 600 2009-09-10 22:06:59 2003-08-26 16:21:33 6 1 546 0 41 315 4 641.80 75 99.32 CHANGED MSTIVllLAhlLAClllsGhhlth+tR.R+.tlshh.uFucsTsRKLTu-ERuAIEsYL.pp.sp.......hhsoPssouuoussspLsLsspSDsVYslT+AITRYGlooD-PNKWRYYLDSlEVHLPPFWEQYIs--NsVELI+TsSlPLVISLNGHoLp-ahp-ts.u.sLppsuss.....pASIRpEESEplELLNIRKET.EE+ALs+PsGl+EAsLIsAuFLLaFFuLloPsVhlPWLsusAlLLluhGLWsLFpPPucssLRElHCLRGTPKRWGLFGEssQGQIsNISLGIIDLIYPsHWQPYIApDLGpKTDIDIYLNRQVVRQGRaLSLHDEVKNFPLQ+WhRNhllsAGSLLVLlLLlhaVPLslPlKLoloWL+GAQTlElToVspLEcAtLRlGDTL+spGTGMCYl.sPsp..hSsppsosFhPFDCSuIYWNsAsPLPlPES-Tl-KAoALlpoVN+QLHPp.-s-s+VNPsLuSAIQKSGMlLLDDFuDIVLKTQDLCss-.sDClRLKNALVNLGNsKDWsoLVKRAcuGKL-GlNVLLRPVSAEoLEsLVsooTusFlhRETs+AApuLNSPPPGGFLItSDEGKQLVspPhPsl......sLaDYsuhEQWpELQRLusMLLHTPFpAEGIlTslsTDANGTpHIsLHS.PDpsoLWRYlGTTLLLLshlsshlaNuVhslRRhp+sRpRhp-IQ+YY-sCFNspLhPss....c ...................MSTIlIFLAALLACSLLAGWhh+V+SR.R+.pLPWssAFsDAQTRKLTPEERSAVENYL..-.sLoQ........lhQVPG...P..TG..A..SA.APISLs.LNAESNNVhhLTHAITRYGIoTDD.PNKWRYYLDSVEVHLPPFWEQYINDENsVELIhTDoLPLVISLNGHT.L.Q..EYM.QEoR..GYALQs.ssST.........................QASIRGEE.SEQIELLNIRKETHEEYALSRPpGLR..EALLIVASFLhFFFCLIT.PDV..FVPWL.sGGAlLLLuAGLWGLFA.PPu..K..............SuLREIHCLRGTPRRWGL.FGENsQEQIN.NISLG.IIDLlYPAHWQPYIuQDLGQQTDIDIYLD.R.HVVRQGRaLSLHDEVKNFPLQHWLRSTlIA.uGSLLVLFM..LLFWIPLDMPLKFTLSWMKGAQTIEATSVKQLscAGVRVGDTL+lSGTGMCNI+ouuT..W.S..u.po..NSPFhPFDCSQIIWN....DApuLPLPES-LVNKAsALopAVNRQLHPK.PED...-....SRV......S..ASLR......SAIQKSG.MVLLDDFGDIVLKTADLC.......SAc.....DDCVRLKNA.....LV.N.L....G.NSKD....WsALVK..RANAGKLD..GV..NVLLRPVSAESL-NLVsTSTAPFIo+ETARAAQSLNSPAPGGFLIsSDEGS-hVDQPWPSs......sLYD.YPsQEQWsAFQ+LAQMLMc...TPFsAEGIVTpIaTDANGTQHIuLHsIPD+SGLWRYLuTTLLLLsMlsSAlYNGlQAaRRYQRHRTRMhcIQtYYESCLNPpLhss.p................... 0 1 9 24 +6924 PF07096 DUF1358 Protein of unknown function (DUF1358) Moxon SJ anon Pfam-B_14731 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 125 residues in length. The function of this family is unknown. 20.90 20.90 20.90 21.50 20.50 20.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.51 0.71 -4.44 5 99 2009-01-15 18:05:59 2003-08-26 16:24:23 6 4 80 0 65 112 0 110.40 41 68.73 CHANGED MtothspsGpossu..psssu-cERKFRl+GGAFLGsVAusuAlAGFS+TLuhAKKuDPcaF.sKGlpuolAL.EoGoSLALRALGWGTLYAaLGTGsICFGlWKLoGA+sMpEFRpKMGoIFPRI .............................................tststcc+hFhlpuuh..FLGsVuusuhLuGFspTLuhAKKpsPcaF.s+Gsh..ustuL.EoGuoLALRALGWGoLYAhsGsGllsaulWKhhGl+shp-FR.KMtshhPtl............... 0 23 29 44 +6925 PF07097 DUF1359 Protein of unknown function (DUF1359) Moxon SJ anon Pfam-B_14784 (release 10.0) Family This family consists of several hypothetical bacterial and phage proteins of around 100 residues in length. Members of this family seem to be found exclusively in Lactococcus lactis and the bacteriophages that infect this species. The function of this family is unknown. 25.00 25.00 133.60 133.50 20.50 19.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.30 0.72 -3.97 2 9 2009-09-11 08:58:17 2003-08-26 16:27:01 6 1 6 0 3 4 0 98.10 68 94.24 CHANGED M.pEhpl..pcpItphppKhsRLpplIHtl+pQ....cll.DchpsscIppssKFthpLs...uhhcsshpIsVGTLIsLLcpNIEsNTslhsELstcLGI-lc MsQEITlDFSEQIAKsQTKIsRLKchIHcVRcQ....KIVLDDlKNNHhs+DTKhELNLG...GVLKCSVKINVGTLIPLLEQNIEDNTsLIpELAKELGIDIK 0 3 3 3 +6926 PF07098 DUF1360 Protein of unknown function (DUF1360) Moxon SJ anon Pfam-B_14863 (release 10.0) Family This family consists of several bacterial proteins of around 115 residues in length. Members of this family are found in Bacillus species and Streptomyces coelicolor, the function of the family is unknown. 20.90 20.90 21.10 21.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.37 0.72 -4.31 21 265 2009-01-15 18:05:59 2003-08-26 16:44:47 6 1 226 0 60 183 34 103.80 37 75.40 CHANGED llslAoaRLTRLIVaDpITsalRpPFhcptcph..p.GpsEshhss+u..stlRphlGELLSCYWCTGlWsAshlhhualhhPchsp.llhlLuIAGuAul.lEshlu+ ...........................lhsLAsaRLTRLIVaD+I.TuhLRpPFhcc.......hc.......hs.......-........s...Gsspshpt.s+u....pulR.phlGEL..LoCaWCsGlWlushlh..ss..h...sal..P....phupsl..lhlLAlAGuuul.lEshhu.h...................... 0 16 41 49 +6927 PF07099 DUF1361 Protein of unknown function (DUF1361) Moxon SJ anon Pfam-B_14870 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown although some members are annotated as being putative integral membrane proteins. 25.00 25.00 26.30 26.30 21.30 23.70 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.28 0.71 -4.43 31 569 2009-01-15 18:05:59 2003-08-26 16:47:59 6 3 546 0 84 337 14 166.00 36 79.45 CHANGED lWNlFLAhlPhtluhhlphh+sp........hlhhlhsllWLlFhPNA.YllTDllHLp................hhtshsht.ahthhhlhsushhulhhGhhShhhlhphhpph........h.thhlhhslhhLsuhGIYlGRFhRhNSW-llspPpsllpplls.l.....ptcthhFllhhshl.lhlh .................................hhNlFLAYIPhELu...lLL....phhK.p....................h.lahlh.uhlallhhPNs.YhlTDLlHLp............htashhh.shshspWhhFsh..Llhulhhulhlshhsh..hpl....hp..h...hppp...........hh...phlllssLhaLsuhGIYIGRF.........hRLpShalhs.pPhpllpclhpsl.....shc.ph...hFlhhhshhQhhl.h.......................... 0 25 59 75 +6928 PF07100 ASRT DUF1362; Anabaena sensory rhodopsin transducer Moxon SJ anon Pfam-B_14972 (release 10.0) Family The family of bacterial Anabaena sensory rhodopsin transducers are likely to bind sugars or related metabolites. The entire protein is comprised of a single globular domain with an eight-stranded beta-sandwich fold. There are a few characteristics which define this beta-sandwich fold as being distinct from other so-named folds, and these are: 1) a well conserved tryptophan, usually following a polar residue, present at the start of the first strand; this tryptophan appears to be central to a hydrophobic interaction required to hold the two beta-sheets of the sandwich together, and 2) a nearly absolutely conserved asparagine located at the end of the second beta-strand, that hydrogen bonds with the backbone carbonyls of the residues 2 and 4 positions downstream from it, thereby stabilising the characteristic tight turn between strands 2 and 3 of the structure. 25.00 25.00 28.30 30.40 20.70 20.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.55 0.71 -4.37 25 78 2009-09-25 12:58:53 2003-08-26 16:50:45 6 2 67 25 33 77 31 117.60 44 90.17 CHANGED slGc+pWsIs-GYIPspSsGsp.thsSHEslClLNsuDc-A+lplTlYFsDR-PlGPaclsVsuRRThHlRhN..-L.-PpsIPpsssYAsllES-VPVVVQaoRLDoR.QuphALhoTlAYss ..t.lGcppWsIs-GYIPspSsGs.............hsSHEslClLNsuDpsAclclolaFpDR-PlssaclsVsARRTpHlRhs..-LtcsEslPpsssYAhllcSDlPlVVQaoRLDop.QuphALhoThAYs.s..... 0 12 23 28 +6929 PF07101 DUF1363 Protein of unknown function (DUF1363) Moxon SJ anon Pfam-B_14992 (release 10.0) Family This family consists of several Trypanosoma brucei putative variant specific antigen proteins of around 80 residues in length. 19.50 19.50 23.20 85.50 17.70 17.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.05 2 14 2009-01-15 18:05:59 2003-08-27 09:26:29 6 1 4 0 0 11 1 102.00 73 95.07 CHANGED MSRHGNIDIGCGAGNTMDATFRSCTPHESFYYLSINHDLKAREAQNNNTNSDTICFSTHLHKRSNRRLDRRCEYIFGICSIKGNSAARRKKFLpTPLCQRYlNNCLKYMHSICHYQTRPGRTSS ..................MS.cGNIDIGCGAGNTMDAsFRSCT.HESaYYLSINHDLKAREAQNNNTNSDThpFSTpLHKRSNR+LDRRCEYIFGhCSIKGNSAARRKKFLKTPLCQRYLNN................................ 2 0 0 0 +6930 PF07102 DUF1364 Protein of unknown function (DUF1364) Moxon SJ anon Pfam-B_14821 (release 10.0) Family This family consists of several bacterial and phage proteins of around 95 residues in length. The function of this family is unknown. 29.50 29.50 29.50 29.50 29.10 29.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.38 0.72 -4.11 13 531 2009-01-15 18:05:59 2003-08-27 09:33:10 7 4 387 1 29 247 9 91.50 59 93.90 CHANGED pscLRcAARGcpCplRIPGlCNtNPETTVLAH...hRhsuhpGsGhKscDhhusaACSuCHDtIDuRs+t...hspEphchhthcGlhRTtthLhccGhl ........................MusLRKtARGRECQVRIsGl.....CNG...NP..EToVL.AH.......hRhAG.hCGT..G..hKPsDLluAhACSuCHDEIDRRT+h.......lDs..c-s+hhtlEGlhRTQsIhlKEGhl............................ 0 3 9 19 +6931 PF07103 DUF1365 Protein of unknown function (DUF1365) Moxon SJ anon Pfam-B_14846 (release 10.0) Family This family consists of several bacterial and plant proteins of around 250 residues in length. The function of this family is unknown. 25.00 25.00 26.80 26.50 24.50 24.10 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.76 0.70 -5.33 104 812 2009-01-15 18:05:59 2003-08-27 09:37:11 6 11 766 0 319 782 1520 234.30 30 80.99 CHANGED uslahGpVhHpRhpPhpHpFpYplhhhhlDLD..-ls.................th...phst.hauhs+h.shhuF+cpDa.........................ushpts......ltshlpph...tGhp.s..GclhLLspsRhhGasFNPlShYaCacpssp.....L.psllAEVsNT.as-RHsYllsh...............tstphpscKsFHVSPFhshchpYca+hs.s......sc.cltl.....plp.p......................pt.................hhsAolshpRpP......LosssLhcshlphPhhol+llsuIaWQAL+L.alKtlPhhs.+Pss.pp ......................................................................tlh.GplhHpRh....t..P...h..p..HpFpY..phhhhhlDLD........cls...............................................................pl......th.h..hhu.....p....t..h...sh...hp..FcppD.ah..................................................................s.sh.pts.....ltsp.lpph...........pGhp.s...GclhhLspsRhh.Ga.h.FNPlohaasa........cp.ssp.....................L.phllAEVsNT.asERHsYllss.................................tthphphsK.sFHVSPF.sh.p.t.pYp...a+hp.s................sc..pltl.........plphp....................................pt..................tthhsAols.....hp.tps........lo.s.t....s...l.....hp.....hhhphPhhs.h+lhh.....tIaWpA..l+L.ah.K..t.s.Phhs+Pt...p............................................................... 0 91 197 265 +6932 PF07104 DUF1366 Protein of unknown function (DUF1366) Moxon SJ, Sammut SJ anon Pfam-B_14849 (release 10.0) Family This family consists of several hypothetical Streptococcus thermophilus bacteriophage proteins of around 130 residues in length. One of the sequences in this family, from phage Sfi11 (Swiss:O80186) is known as Gp149. The function of this family is unknown. 21.60 21.60 21.60 22.00 21.40 21.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.45 0.71 -4.47 7 78 2009-01-15 18:05:59 2003-08-27 09:44:18 6 1 46 0 9 64 0 113.10 35 67.89 CHANGED M.hEatup..ph.sstustusKVlLcspD...Gu.lPlhLPsEhhDhopsElLpps.clIYQc.aPp+AEsEKFs-Ls.......t..th.ptp.sKhEphhphuosTL.slIsphhtccshsD-sl ..............h...plsuKYPphDuoGulsuT+VIlss-D...GuhIs.h..l.pDhhspssoEllctsLEpahcppas-hAhuEthpKlD..............-..cKh.pppssK...stcs...spsA......................shh............................. 0 0 6 8 +6933 PF07105 DUF1367 Protein of unknown function (DUF1367) Moxon SJ anon Pfam-B_14892 (release 10.0) Family This family consists of several highly conserved, hypothetical phage proteins of around 200 residues in length. The function of this family is unknown. Some proteins are annotated as IrsA (intracellular response to stress). 25.00 25.00 27.30 25.20 18.40 24.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.24 0.71 -4.90 9 383 2009-01-15 18:05:59 2003-08-27 09:48:02 6 3 265 0 27 249 3 156.50 44 98.30 CHANGED MA.phphIKpusGhLlPATP-sp-hLp.+hKhGsVlhu-F....KpVRNPtFHRKFFALLsLGFEYWEPsGGslospEpcLlpGaspaLstasGpc..ssLt-hAspYLtplutcR.ssslulpKSF-AFRcWVTVEAGaYclhphPDGolt+cP+SISFAuMD-hEFppLY+usLsVLWpaIL..sRpFsopp-sEsAAsQLhsFs ...................MA.c.lphlKp.usGlLlPATscst-h.......Lp.plKlGt.hlh.A-.F....+ps..RN...AFH++FF.t.LLpLGF-YWpPsGGslostEpcLlpGaspaLst...sGpc....ssL.....psA-pYLpplAppR..st..s..h..ulhKSF-AaRtWVTlpAGaYs.th.hP.DGoht++s+SIuFusMD.-sEFpplYKusLsVLWpaIL...RpFpo.ppsENsAuQLhpFA.............................. 0 0 7 17 +6934 PF07106 TBPIP Tat binding protein 1(TBP-1)-interacting protein (TBPIP) Moxon SJ anon Pfam-B_14830 (release 10.0) Family This family consists of several eukaryotic TBP-1 interacting protein (TBPIP) sequences. TBP-1 has been demonstrated to interact with the human immunodeficiency virus type 1 (HIV-1) viral protein Tat, then modulate the essential replication process of HIV. In addition, TBP-1 has been shown to be a component of the 26S proteasome, a basic multiprotein complex that degrades ubiquitinated proteins in an ATP-dependent fashion. Human TBPIP interacts with human TBP-1 then modulates the inhibitory action of human TBP-1 on HIV-Tat-mediated transactivation [1]. 30.00 30.00 30.20 30.00 29.90 29.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.19 0.71 -4.71 12 266 2012-10-04 14:01:12 2003-08-27 11:17:55 8 8 222 0 184 281 2 151.20 26 63.87 CHANGED -ApshlhcYlpcpNRPaSsp-lhsNLpp..tluKssVpKsL-pLspps+IhpK.YGKt.KIYhssQpphc.ssst-lpclchc.lppLppclptlppphpplppcl+pLppsLsst-lhcplppl+cclpphcc+Lcslcp.shp...lo.--hpplhpppphhpptapKRK+hhp ...............................thlhpahpppNRPasspslhsNLpp...tlsK.stl...KsL-pLspp......sclhtK...h.....GKp..+lY..hspQs........ph.p............ss........st-.lpthctp....lt....pLppplpplppphpphc.scLppLpuphost-.hppplppLcp.-....hpphpp+Lpplc......s...sssh........ls.s..--tp......pl....cphpphpppapcRK+hh............................................ 0 74 108 153 +6935 PF07107 WI12 Wound-induced protein WI12 Moxon SJ anon Pfam-B_15477 (release 10.0) Domain This family consists of several plant wound-induced protein sequences related to WI12 from Mesembryanthemum crystallinum (Swiss:Q9XES3). Wounding, methyl jasmonate, and pathogen infection is known to induce local WI12 expression. WI12 expression is also thought to be developmentally controlled in the placenta and developing seeds. WI12 preferentially accumulates in the cell wall and it has been suggested that it plays a role in the reinforcement of cell wall composition after wounding and during plant development [1]. This family seems partly related to the NTF2-like superfamily. 21.50 21.50 21.50 22.30 21.40 20.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.66 0.72 -4.17 7 77 2012-10-03 02:27:23 2003-08-27 11:26:04 6 4 22 0 43 83 0 103.80 44 66.72 CHANGED MRLLTG.us.usuSFpFpPpSVsuF...GssVlAEG.sDsspplhWVHAWTVs...sG..lITplREYhNTsLTVTRlu.sssuptst...................pspslWpSphssRAtKslPuLlLAl ....................................M+lLTG...ts.........p.....p..s.......u..........FpF.Ppulsuh.......Gs.sVlAEG......t.p..stp.........h...YWVHAWTVs...sG..lITQlREYFNTsLTVTcls..s.t.......................................ttstslWpSph.....s..-.ht+SlPGLVLAI........................ 1 5 30 38 +6936 PF07108 PipA PipA protein Moxon SJ anon Pfam-B_15507 (release 10.0) Family This family consists of several Salmonella PipA (pathogenicity island-encoded protein A) and related phage sequences. PipA is thought to contribute to enteric but not to systemic salmonellosis [1]. The family carries a highly conserved HEXXH sequence motif along with several highly conserved glutamic acid residues which might be indicative of the family being a metallo-peptidase. 25.00 25.00 28.80 109.90 24.30 23.20 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.27 0.71 -4.94 4 164 2009-01-15 18:05:59 2003-08-27 11:31:57 6 1 116 0 4 64 0 174.20 86 93.04 CHANGED sssFPDlPEHussPS.lRLuaDplATNS-h+LcP.EplsEYhISGsGGIDPDhEIDDDhYsECapcLSpILpsAYTQStTFRRLMNYAYDpELaDlEpRWLLGAGEsFuTTVTsE-LpsSpGRKVIsLNLD-ssD.sshPEpYESs-GP.p.FDTpRSFhHEIVHALTpLpDcE-NHPRGPVVEYTNIILKEMGppSPPRItY ........................................phVEYLISGAGGIDPDTEIDDDTYDECYDELSSVLQNAYTQSETFRRLMNYAYEKELHDVEQRWLLGAGEAFETTVAQEHFKLSEGRKVICLNLDDSD..DSYTEHYESNEG..QLFDTKRSFIHEVVHALTHLQDKEENHPRGPVVEYTNIILKEMGHPSPPRMsY.. 0 0 0 3 +6937 PF07109 Mg-por_mtran_C Magnesium-protoporphyrin IX methyltransferase C-terminus Vella Briffa B anon Pfam-B_12015 (release 10.0) Family This family represents the C-terminus (approximately 100 residues) of bacterial and eukaryotic Magnesium-protoporphyrin IX methyltransferase (EC:2.1.1.11). This converts magnesium-protoporphyrin IX to magnesium-protoporphyrin IX methylester using S-adenosyl-L-methionine as a cofactor [1]. 19.70 19.70 23.30 22.40 17.40 15.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.11 0.72 -3.94 15 218 2012-10-10 17:06:42 2003-08-27 11:38:22 6 12 204 0 98 221 233 97.50 38 40.22 CHANGED shDsLIHYsspDssthLu+LuShspppllhoFAP+TshLshh+pIGcLFPtusRosthh.Hu.pslp+tlss....tGapls+pthlostFYhSphLEhs. ..ChDVLIHYPppcssphls+LuuhscppllhoFAP+TshLslh+tlGclFPtss+s..Tphh.hpEpsltctl..tp.....sG..a..pltRpphssssFYhSpllEhh.................. 0 26 59 80 +6938 PF07110 EthD EthD domain Moxon SJ, Bateman A anon Pfam-B_15539 (release 10.0) Domain This family consists of several bacterial sequences which are related to the EthD protein of Rhodococcus ruber (Swiss:Q93EX2). In Rhodococcus ruber, EthD is thought to be involved in the degradation of ethyl tert-butyl ether (ETBE). EthD synthesis is induced by ETBE but it's exact function is unknown, it is however thought to be essential to the ETBE degradation system. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.11 0.72 -3.05 57 827 2012-10-02 00:20:33 2003-08-27 11:42:09 6 16 516 4 432 846 223 86.60 19 56.80 CHANGED suho.ccFpcaapphHuslstplhs.......h.ta....................h..ttthhhhtsstsshsaDuhsphha...p..shcshhshhpsPthtt....ltsDc.tpFsDts .............................................hs.ttat.caapppHs.s.l.stph.u.................................................h.hhttths.t.s..hsss...sssa....cuhschhF......c...shcsh.t...s....u...hs...ss...phpt...........hhsD...pFh...t......................... 0 94 256 355 +6939 PF07111 HCR Alpha helical coiled-coil rod protein (HCR) Moxon SJ anon Pfam-B_15548 (release 10.0) Family This family consists of several mammalian alpha helical coiled-coil rod HCR proteins. The function of HCR is unknown but it has been implicated in psoriasis in humans and is thought to affect keratinocyte proliferation [1]. 20.70 20.70 20.80 20.80 19.40 20.60 hmmbuild -o /dev/null HMM SEED 739 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.47 0.70 -13.35 0.70 -6.53 3 174 2009-01-15 18:05:59 2003-08-27 11:50:30 7 5 41 0 36 159 0 342.80 49 90.71 CHANGED MAPTWASDlPLVQuPupQDVLERRLDsQRspVTMWGpDsu.uDuQ-PGRRGRSh-LEtSQALSQQAELISRQLQELRRLEEEVR.LRETSLQQKMRLEAQAhELEALAhAEKAGRAEAEGLRAALAGAEhVRKNLEEGuQ+ELEElQRLHQEQLSSLTQAHpEALSSLsSKAEGLEKSLsSLETRRAGEAKpLAtAQ+EADhLRcQLSKTQEELEAQVTLVEsLRKYVGEQVluEs+SQsWELERpELL-TlKHLQEDRAuLQATVELLQVRVQSLTHILALQEEELTRKlQPlDsLEPEFsRKCRSLLsRWREKVFALMVQLKAQELpHuDSTsQL+tQVAELQEcVTSQSQEQAILQRSLQDKsAEVEVERMuoKuLQhELSRAQEARRRhQQQlASAEEQL+LVVsAVsSSQttLpoTMA+VEpAlAQLPSLSNRLSYAVRKVHTI+GLlARKVALAQLRQESSPP...APu.ssDLSlELcQLREERNRLDAELQLSARLIQQEVGRAREQGEsERQRLuEVAQQLERELQcoQESLASlGtQLEAARpGQQESTEEAASLRQELTQQQElYGQALQEKVAEVETRLREQLSDTERRLNEARREHAKAVVSLRQIQR+AAQEKERNQELRRLQEEARKEEGQRLoRRLQELERDKNLML..............QRLLsVLPSulsKK....sSPRPlEsSuStSlPAAsPsRESlKGSLTVLLDsLQGLSEAIS+-EslC.tDN.cs+oSsNPPsoP .......................................................................................oht...htsSpu...LSQQAElIsRQLQElp.LEtEl...LREsS.lp.Qph+LEsQA.....EL-..tLt...t-p...sup....sE..sEtL+ttlutut..+............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 9 14 20 +6940 PF07112 DUF1368 Protein of unknown function (DUF1368) Moxon SJ anon Pfam-B_14994 (release 10.0) Family This family consists of several proteins with seem to be specific to red algae plasmids. Members of this family are typically around 415 residues in length. The function of this family is unknown. 25.00 25.00 33.20 33.00 18.90 18.60 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.40 0.70 -5.87 5 9 2009-01-15 18:05:59 2003-08-27 11:53:47 6 1 4 0 0 10 0 326.00 37 95.41 CHANGED EVcR..IoclllcS.chSKQYFLYKScslV.SsSsSEDDWpYCLLILKFlsPpsslKsIopILEaFL+cDRYRDKFsccauYLsTTIpKVIlpSSppNLlGssYspso...+-lIQ.sulDShaspss.I+sS.CosssVLKIsNFhShFaLcpsKYSsplhDaclshososNsolKISMhGGLLNYFDLMlFLuILYsY....KlsopLssssI.......lcINFSs.l-hMLpsNGo.sR+KYlNSLcKLSKVHL-s..p.h.shlsshpsspcphhsFSGsLLoFEpLSssp.Tpssl.hLSpPll..+hhcS.ssYSlVNWsSFlsLssoplRLlYFYFCLNVKsSpY.FTpFolcpLlccLYsusshuSohRhh+SclRKhLhhlh-sppshlDF-FpLVhs.spSpplISuIKVRRs+lhl.R .................clhp..I.p.l.pS.p..+pYhlYh.pshh.p.SsSEDDWpaClLlLpalpP.sshchhsplLchFL+pDRaRsKhppp.sYLppTItKsI..u.p.pllGs.h..p....tp.lQ..ulsp.hsp.s.lp.S.sphpsVLKl.NhhShhaLppsphSs.h.chthshs.spssslKlS..hGlLNaFDLhlFluILYta........K..s.h..ssl.......lsINhSs...hshhhpssGp..RtKalNSL.KLS+Vplcs.......h.shh.tsptphhphoupLLsFptlphpp.T.hpl.hLSpPll..+hhp.hsNYSlVNWtSFs.LsssplRLlYFYFCLpVKsSpY.FTpF.lcpllpcLYhusshpoohR.hp.phRp.L..h.p....h.-hphpl.hp..pp..hIp.IKVRR.phh..h.. 1 0 0 0 +6942 PF07114 DUF1370 Protein of unknown function (DUF1370) Moxon SJ anon Pfam-B_15274 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 200 residues in length. Members of this family seem to be specific to mammals and their function is unknown. 25.00 25.00 25.50 25.30 24.00 23.60 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.14 0.71 -5.08 6 118 2009-01-15 18:05:59 2003-08-27 12:38:11 6 1 72 0 70 120 0 175.20 33 88.12 CHANGED su+h+cshIIEhIs+KFcpLPEs-RsLhpaGolYlGhNAuhuGllANSLFRRlL+VoputlsouLPMAsLPFLTTsloYpshVosPL.oGDLsCETCslhRuuLlGlVsGGlYPlhLAlPVNGGLAARYposPLPpKGNIlpaWhslSpPVhKKMuFPLlLQshFGsalGS+HYplh.KsLphs.P- ..........................................hh.hlt+php.L.ct.-..pph..aGsshlGssAuhsGlhuNslFR+tLpV.p.p.u.tlpohLPh.uslPFLoTs...lsY+hhVsp.P.Lho...usl.s.CtsChhhRuuLlGllhGslYPshLAhshNutLAs...+..Ypos......LP....pK...G.....p....ll....haWhplspPlh+tMhhsllhQshFuhalu.....ppatlh.chhph...................................................... 0 10 14 35 +6943 PF07115 DUF1371 Protein of unknown function (DUF1371) Moxon SJ anon Pfam-B_15275 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 110 residues in length. The function of this family is unknown but members seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). 24.90 24.90 25.10 25.00 24.50 24.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.51 0.72 -3.90 3 117 2009-01-15 18:05:59 2003-08-27 12:40:45 6 1 31 0 10 64 2 106.60 68 99.47 CHANGED MDLRIGNNFELVFNNDFSLVDGIEEQKQRLFIFLKTLRGSLSYAPNWGLDYhLLLKLLKINNL-AVKNYFaEISKELNLDLINlSsoIQD+KlHISFFFsG.DVLNMEFcL ...........MDLRlGNNFELV.FNNDLSL.VDGI-EQKQRhhIFLKTLRGSLSYAPpWGLDYhLLLKLLKINNLcAVKNYFaE.ISKEL.N.LDLINISsoIQD...pKspISFFFoG.DlLNMEFsL............. 0 6 6 6 +6944 PF07116 DUF1372 Protein of unknown function (DUF1372) Moxon SJ anon Pfam-B_15278 (release 10.0) Family This family consists of several Streptococcus bacteriophage sequences and related proteins from Streptococcus species. Members of this family are typically around 100 residues in length and their function is unknown. 25.00 25.00 36.00 35.80 18.50 17.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.53 0.72 -4.07 7 50 2009-01-15 18:05:59 2003-08-27 12:44:28 6 1 46 0 3 42 0 88.30 43 95.42 CHANGED Mp........c....s.htlAshlllluhlhshshlhhhhs...s.+.spsllIapVDNuss.MaGKlTsKphltthYTlsstAYGKFLVTKEQYcsIpVGDDIPsYLKG ..........................................hh...................hhshhllluhshshhthhhhht.......c..phcsllIYKsDNsGuElaGKVs-KphlGcLYTlThpsYGhFlVTKEQY-plcVGD-l........ 0 0 0 0 +6945 PF07117 DUF1373 Protein of unknown function (DUF1373) Moxon SJ anon Pfam-B_15084 (release 10.0) Family This family consists of several hypothetical proteins which seem to be specific to Oryzias latipes (Japanese ricefish). Members of this family are typically around 200 residues in length. The function of this family is unknown. 25.00 25.00 25.30 28.00 22.50 24.00 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.64 0.70 -4.60 3 35 2009-01-15 18:05:59 2003-08-27 12:58:46 6 1 1 0 19 10 0 190.70 68 97.25 CHANGED M.RVLWIsCLLIGSIoCLPQGGss..ssu....hPPYoGQ..ScPSYE+PSGQ.SGYSosPGYYSuGTpTuGGS..GSPPMWYSASYPEQEPAKPTYQRPAQSSGYGSYGuVDSSYSGSGSQQSGSQGAQSGAPGSQHQVEQESWSSSSDDEDEPEFTPVSEEDQVYASKTRSRYNQKRLLFSQFRYTPTEPRVPQEPVFPYPSKSHQGKGSAKGSR ......................RhLWlSCLLIGoIoChP.QtGhs...s...........hh....YoGQ...tPSYE+PStQuS...G...YSS..GhYSuuT...NTAGuS..uosPMWYSASYPEQEPAKPTYQRPA.QSSGYGS.ss...stt.SYSGSGSQQSGSQGuQSGsPGSQHQVEQESWSSSS..D..DEEEPEFTPVSEEDQVYA.KSRSRYNQKRLLFSQFRYTPTEP.RVPQEPVFPYPSK.SHQGKGSAKGSR..... 0 0 0 19 +6946 PF07118 DUF1374 Protein of unknown function (DUF1374) Moxon SJ anon Pfam-B_15191 (release 10.0) Family This family consists of several hypothetical Sulfolobus virus proteins of around 100 residues in length. The function of this family is unknown. 20.70 20.70 21.40 21.30 20.00 19.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.94 0.72 -3.88 15 27 2009-09-11 20:35:31 2003-08-27 13:01:18 6 1 6 7 0 32 0 87.10 29 91.45 CHANGED cFc-LK.sllchFFc-pplpElsLcFpcpl.lTEpEacELIpsschhpphpsp....sIhsDhYpYhEspNphhKL.IcYa+cs-.KIhIhEIchWR- .....hcplc.pllphFFcspplpEhslpFcp.lclsEp-acpLlt..s.hpthtst......tlhh-.a.Yhp.ssthlKl.lpYh+css.KIhlhEIphaRc. 0 0 0 0 +6947 PF07119 DUF1375 Protein of unknown function (DUF1375) Moxon SJ anon Pfam-B_15247 (release 10.0) Family This family consists of several hypothetical, putative lipoproteins of around 80 residues in length. Members of this family seem to be specific to the Class Gammaproteobacteria. The function of this family is unknown. 20.30 20.30 22.60 22.30 19.50 17.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.65 0.72 -4.15 41 1264 2009-01-15 18:05:59 2003-08-27 13:05:19 7 3 710 0 129 455 12 66.60 48 72.28 CHANGED L..lssh....hLsGCuolhohs.ssppsh................hYsGsphshphhpt.................shhhhslshlDLPhShllDTLLLPashh ..................................................h.h.....h...hLuGCuSlhS+ThstpGp.....................YPGsphssp.hu.....................................+.lsILDlPFShVhDTLLLPhDl.a.... 0 10 27 78 +6948 PF07120 DUF1376 Protein of unknown function (DUF1376) Moxon SJ anon Pfam-B_15380 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 95 residues in length. The function of this family is unknown. 21.20 21.20 21.30 21.20 20.90 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.77 0.72 -3.85 24 221 2009-01-15 18:05:59 2003-08-27 13:08:51 6 4 156 0 60 198 23 86.00 31 30.39 CHANGED sa.aphaluDalp-..TtaLostEcGsYhhLLstha.....ps.ppsls.sDcthLsRlstspscc.tsshshllspF...ptpcspatpcRh-cEltph .............sa.hphaIuDal.s-..Th+L.S..s.pE.+GsYhhL.hhpYa.....ps..scPlP..ppspLA+lsph..oscca...tsshs.lhc.F...htpsstahp.tRh-c-lst.h......................................... 0 9 29 42 +6950 PF07122 VLPT Variable length PCR target protein (VLPT) Moxon SJ anon Pfam-B_15500 (release 10.0) Repeat This family consists of a number of 29 residue repeats which seem to be specific to the Ehrlichia chaffeensis variable length PCR target (VLPT) protein. Ehrlichia chaffeensis is a tick-transmitted rickettsial agent and is responsible for human monocytic ehrlichiosis (HME). The function of this family is unknown [1]. 20.20 20.20 23.80 24.30 19.90 20.00 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.11 0.72 -4.28 4 120 2009-01-15 18:05:59 2003-08-27 13:17:43 6 4 3 0 4 113 3 29.60 65 67.36 CHANGED DLQQSSsSDLHts.pVEL.sPSKEtVQLEs .....DLQQSSsSDLHtS.pVEL.sPSKEtVQLEs 0 4 4 4 +6951 PF07123 PsbW PsbW_2; Photosystem II reaction centre W protein (PsbW) Moxon SJ anon Pfam-B_15117 (release 10.0) Family This family consists of several plant specific photosystem II reaction centre W (PsbW) proteins. PsbW is a nuclear-encoded protein located in the thylakoid membrane of the chloroplast. PsbW is a core component of photosystem II but not photosystem I [1]. This family does not appear to be related to Pfam:PF03912. 25.40 25.40 26.20 29.20 22.70 25.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.90 0.71 -4.11 9 66 2009-09-11 00:41:56 2003-08-27 13:39:30 7 1 36 0 30 63 0 117.30 50 97.19 CHANGED MsuluAsus....splsu+uhstssthhssh.s.sshslPuhps++...hhsshppc..hhpsh.....s.tsuhhussuu.hs....utPAhALVD-Rhss-GTGLPLGlssshLGaILhGVFshIWuLYhlhs+s....lp..cD-DS.GLuL ...............................................................shGLPthtttt..tlpCshppc.t..t......huh....uuuhhAAsuuhhs....usP.AhALVDERhSTEGTGLsLGLSNNLLGWILlGVFGLIWuLYhlYoSs....L-....ED--S.GLSL........... 0 7 21 28 +6952 PF07124 Phytoreo_P8 Phytoreovirus outer capsid protein P8 Moxon SJ anon Pfam-B_15606 (release 10.0) Family This family consists of several Phytoreovirus outer capsid protein P8 sequences [1]. 21.20 21.20 556.10 556.00 20.40 19.80 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.40 0.70 -5.82 4 19 2009-01-15 18:05:59 2003-08-27 13:57:44 6 1 8 13 0 19 0 423.20 64 100.00 CHANGED MSRQsWlETSALlEsISEYlV+s.GDTFpGLThsDloTLSNLhsQLSlusVGFLND.RTPLQsMSNpFVsFISTTDRCuaMLp.sWFDSDltPsV.TDNFIssYIKsRhSsPlSDslRQlNNLSLpPpts.KlhSpQNAlhKALD.PYuoPl-PpcLhRuoAst.sGNhspRRuLsTsLstGAps.sFhVuE+c+IlFGpRS.NslsAupYpINVPsaaSsLsVTsARlYFTNSFlGsTIsNVpVNA.NGsssVAsIpVPTDsNshsVDSDulVSFSLuGGsINVsTuVshTGFAIAIEG-FphQMNRsQSYYThsSITh.sslsIDDFGlosaLEsFR.RLhACGQsEIFS-uMNpLT.sLIsNYhssstssshlAFsSPWYRhSERhtTILoF.ptslsLppR+LhVRHLWVIhSFIAVFGRYYssN MSRQAWl-TSALlEsISEYss+CoaDTFpGLThsDhssLSNLhsQlSVuSVGalsDPRsPLQsMSspFVsFISTsDRpuYMLpKsWFsSDltPsV.oDsFIATYIKPRhphshSDVLRQlNNhALQP.tsPKLIsRQ.uVhKuhDIPYSTPIpPpDlhRSsAss.sGNVuphthLuTP.l..AQNsTFhVuEpc+IlFGhRShssIssGNaQIsVPPWhSsLsVssARlYFTNSFhGsTItsVpssAVsGsDsssTlTVPTDsNshlVsSDSVVSLSLuGGsINVThuVshTGasIAIEGcFsM.hNtS.uYYTLoSlTh.sss.IDDFGLSAFLpPFhhpLRAsGQsEIFSpuMNsLTpsLIppYMsAstAss.IAFsSPWaRFSERARTILsh.tsllshssRKLIIRHLWVIhShIAVFGRYYpsN. 1 0 0 0 +6953 PF07125 DUF1378 Protein of unknown function (DUF1378) Moxon SJ anon Pfam-B_15650 (release 10.0) Family This family consists of hypothetical bacterial and phage proteins of around 59 residues in length. Bacterial members of this family seem to be specific to Enterobacteria. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 25.90 44.70 21.50 17.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.86 0.72 -4.02 3 166 2009-01-15 18:05:59 2003-08-27 14:01:00 6 1 103 0 3 53 0 58.30 75 98.88 CHANGED MTFVpplLLYFCTVVCsLYLVSGGYKVIRNYIRRKIDcAAAEKISASQSAGSKPEEPLI .MTFlpplhLYFCTVVCsLYLlSGGY+AhRDaWRRQIDKRAAEKI.SASQSAGSKPEEPLI.................................. 0 0 0 3 +6954 PF07126 DUF1379 Protein of unknown function (DUF1379) Moxon SJ anon Pfam-B_15837 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 25.00 25.00 31.00 30.90 18.70 18.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.63 0.71 -4.84 33 709 2009-01-15 18:05:59 2003-08-27 14:03:06 7 1 704 0 77 265 9 154.00 63 85.88 CHANGED hLcLuscMlFposaptKhLlssAhspstFsV-DAuhYhpahEpl.sp.lslocsppsplsLNAsAApRFhKPhMPKSWaFpsp..stsh.PppGclhp..LpsstppuphlVlEsu-pAoLChLls.pphtLsssKslt.h-sIKVMpDRltPhpst...hph .....MLDLANGMLFRSRFuRKMLTPDA..FsPsGFCVDDAALYFSFEEKC.RD.hsLSKEQ+A.ELVLNALVAIRaLKPQMPKSWHFVuH...GchWsPhsGDAAsVaLSDTtEQVNLLVVEs..GENAALCLLAQ.PsVVlAG.RsMQLGDAIKIMNDRLKPQls.....hssh.. 0 6 22 48 +6955 PF07127 Nodulin_late Late nodulin protein Moxon SJ anon Pfam-B_15657 (release 10.0) Family This family consists of several plant specific late nodulin sequences which are homologous to the Pisum sativum (Garden pea) ENOD3 protein. ENOD3 is expressed in the late stages of root nodule formation and contains two pairs of cysteine residues towards the C-terminus which may be involved in metal-binding [1]. 29.90 29.90 30.10 30.00 29.80 29.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.15 0.72 -3.84 92 434 2009-01-15 18:05:59 2003-08-27 14:53:18 6 9 8 0 2 491 0 55.00 33 72.20 CHANGED MucllK..FVYsh....IlFlSLFLl.stsss.........hhtCpsDsDCPph.......hs.hhh+Cl.sthCp ...............MscllK..F.VYsh....Il..FlSL.FL.l.sp.ssp.................hhtCpsDs.DCPph.........h.hs..hhh+Cl.p.thC..................... 0 1 1 1 +6956 PF07128 DUF1380 Protein of unknown function (DUF1380) Moxon SJ anon Pfam-B_15699 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 140 residues in length. Members of this family seem to be specific to Enterobacteria. The function of this family is unknown. 21.40 21.40 21.70 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.76 0.71 -4.36 3 549 2009-09-11 02:05:09 2003-08-27 14:58:00 7 2 244 0 12 281 0 130.40 49 93.64 CHANGED MYGTCETLCRhLsEQYPAETPLNLIIWSPADIEALADGMEYSlSEHDlRAVLARMDsIPEEQRLESGVSAGAVM-LI-QVKENsptVTVPADLLETLLpTAEQALW+REWTARD+NLPVPESVTRRLADsAKVRALLKs ..........MYsTscplhR.t.LsschPsspslhhVlho.t-lptl.Ap...DhSLoDcElcTVhtRL-Dh.E...ctts-suI.sp...ssVh.Elhpp.hp...c.s.......RQVTVPA.hLtplhthAtp.h........................................................................... 0 1 3 9 +6957 PF07129 DUF1381 Protein of unknown function (DUF1381) Moxon SJ anon Pfam-B_15743 (release 10.0) Family This family consists of several hypothetical Staphylococcus aureus and Staphylococcus aureus bacteriophage proteins of around 65 residues in length. The function of this family is unknown. 25.00 25.00 34.70 42.70 24.90 19.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.95 0.72 -4.56 6 367 2009-01-15 18:05:59 2003-08-27 15:00:33 6 2 205 0 9 129 0 43.80 83 63.12 CHANGED pQYLITpFpDSTGpsHscls+AR-NpohTlVEAESKEEAlcKYc .TQYLVTTFKDSTGRpHTHIT+AKsNQpFTVVEAESKEEAKEKYE..... 0 3 3 9 +6958 PF07130 YebG YebG protein Moxon SJ anon Pfam-B_15760 (release 10.0) Family This family consists of several bacterial YebG proteins of around 75 residues in length. The exact function of this protein is unknown but it is thought to be involved in the SOS response. The induction of the yebG gene occurs as cell enter into the stationary growth phase and is dependent on is dependent on cyclic AMP and H-NS [1]. 27.50 27.50 28.10 40.00 27.20 27.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.47 0.72 -4.29 32 761 2009-01-15 18:05:59 2003-08-27 15:05:55 7 1 755 5 92 274 16 74.10 64 75.84 CHANGED MAVhspYVV.R-GhE.......KMTFsSKKEADAYDKMLDlADsLsshLppusltl-EsptEpLuhaLAppK-sltphLKus .....MAVEVKYVV.l.R-GEE.......KMoFTSKKEADAYDKMLDhAD......lLssWLs..pSPl..th.--pQREuLSLaLAEpK-lLusILKs.u.......... 0 16 31 64 +6959 PF07131 DUF1382 Protein of unknown function (DUF1382) Moxon SJ, Eberhardt R anon Pfam-B_15770 (release 10.0) Family This family consists of several hypothetical Escherichia coli and bacteriophage lambda-like proteins of around 60 residues in length. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 34.00 33.80 22.60 22.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.82 0.72 -3.87 2 320 2009-01-15 18:05:59 2003-08-27 15:09:15 6 2 199 0 0 80 1 48.90 76 93.40 CHANGED Mp+ASPs-LRpslEhAp.LAp.GlRFVPIPs.TDtEFtTLus.hupKlE.hAAcAEhpEpp ....................................PIPVETDEEFHTLAsSLSQKLEMMsAKAEA-ERD.......... 0 0 0 0 +6961 PF07133 Merozoite_SPAM Merozoite surface protein (SPAM) Moxon SJ anon Pfam-B_15860 (release 10.0) Family This family consists of several Plasmodium falciparum SPAM (secreted polymorphic antigen associated with merozoites) proteins. Variation among SPAM alleles is the result of deletions and amino acid substitutions in non-repetitive sequences within and flanking the alanine heptad-repeat domain. Heptad repeats in which the a and d position contain hydrophobic residues generate amphipathic alpha-helices which give rise to helical bundles or coiled-coil structures in proteins. SPAM is an example of a P. falciparum antigen in which a repetitive sequence has features characteristic of a well-defined structural element [1,2]. 24.30 24.30 24.30 24.30 24.20 23.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.24 0.71 -4.37 4 426 2009-09-10 23:03:07 2003-08-27 15:43:46 6 3 6 0 17 335 0 156.00 42 29.44 CHANGED GWEFGGGs..sp.tssEcKKpc.lLEplploSWDKEsIsKENEDVh-EhpE-s---EEc.p.............EElEEsE-.-sEpEllE-c.pEEE...c-E-sscpc-.EKcspN-lss.........p.pD.pAQsLISpp.KcN-cs.KKoAEsllpsLhuLlpspNplDuTl+cLhpEhhcaFsNp ................................pKcc.hLEhlploScDcEsIsKcNED.V+EEhEEptE-p.EE.--p...................E.ElEpp...p-EE.T-E.EssEEcp.EEpE.....p...cc-.EE..sc.c....c..c.....p.p..EK.c..s.Ncpps................p.t....o.cpD.cAppLI.........Sps.KsNs-s.KcsAEoIVKoLhuLhpttst...s................... 0 6 6 15 +6962 PF07134 DUF1383 Protein of unknown function (DUF1383) Moxon SJ anon Pfam-B_15868 (release 10.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 375 residues in length. The function of this family is unknown. 25.00 25.00 48.50 48.90 18.00 17.00 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.24 0.70 -5.52 18 40 2009-01-15 18:05:59 2003-08-27 15:58:01 6 1 36 0 0 42 0 313.60 31 85.79 CHANGED -plhpQlap.tslPYIoKKtlNDpLpsplhtp............................sstpFaccsacsVhssh..sshhVlpGGAAhAsHlsst..psh...L...psLDh-hYht......................sshpp..alpLspLpccLp.ssspshhcslspllpslch..h..........................psslllhKsY.NtAlch.s....ssVphcLsc+...lKsshophN...--ahLVRFShNVcM..hS..tsu.lc.Yps.p.hlpphshhsFsVaFlslplh.....+pPh..s-......hphhslFsss.....VhVpslcpllsDQlpCLLaslF.s+sphKlcpRhspIpuLhsphspp.shssshpscc...hhtl.+ppspshohpplKclLhhhGPtLGs+tL .......................................................................-plhppLa..tslPYIoKK.INDpLpcplltp............................sstpFa+cshcsshtsh......sshhVlpGGAAlAsHlspp.pst......LpslDh-hY..............................sshpp..hltLppLp.cpLp.pssppaasplcplhtslph.............................psplllhKsY.NtAhchs....splphcLNc+...lKsshoplN.......--ahLVRauhNVcM..pS....tsshh.aps.pshhpshshhsaslaFlslplh.....+pshshsc.....shphhslFGhs.....VhVpslcpllsDQlcCLLhslF.Np.paKlcpRls+lpsLhs.hsp..shso.shpppp...hhpl.+pps.pphohpplKplLhhhGPtLGs+tl....... 0 0 0 0 +6964 PF07136 DUF1385 Protein of unknown function (DUF1385) Vella Briffa B anon Pfam-B_12671 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 300 residues in length. Some family members are predicted to be metal-dependent. 22.10 22.10 26.10 24.40 19.60 18.70 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.67 0.70 -5.29 57 654 2009-01-15 18:05:59 2003-08-28 10:03:17 6 3 631 0 173 542 224 241.30 41 75.66 CHANGED lPhlRGhhsLh-ohslGh+sLsaSAshhtt-......t................................p-c..hsphthhhol....llShshulsLFhllPshlus...hh..p.h....h...sphh.hsllEGllRlslFl...sYlhhIShh.cDIcRVFpYHGAEHKsIpsYEsGt.LTVE...NVp+aSplHPRCGTSFLhhVhllSIll....Fshlsh...........tshhhRll..RllLlPlVuGloYEll+hsu+pps..ls+lluhP....GLhLQp.LTT+EPDDsQlEVAItulctsl.s.cp .........................................................................lPFlRGlsull-uhshGhKpLsaSuphhtt-.....pp....................................................................c-c.hsph..thslsl.........llShlhuhslFhllPshluphh.....p.h........hs...sphhhsllEGll+lh.lhl...sY...Ih.hIShh..-I+RVFpYHGAEHKsIpsYE.sshtL.TVE...NVpK.oRLH.R...CGTS.Fl.lhlh..llulhl....a.h..l..lsh...............ssl...hhRllsRllLlPVVsGlSYEl.l+hsu+hcs..sll+lLuhP....GLhLQh.L.TT+EPcDcQlEVAIsuhctllt..p........ 0 90 146 160 +6965 PF07137 VDE Violaxanthin de-epoxidase (VDE) Vella Briffa B anon Pfam-B_12679 (release 10.0) Family This family represents a conserved region approximately 150 residues long within plant violaxanthin de-epoxidase (VDE). In higher plants, violaxanthin de-epoxidase forms part of a conserved system that dissipates excess energy as heat in the light-harvesting complexes of photosystem II (PSII), thus protecting them from photo-inhibitory damage [1]. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.45 0.71 -4.47 7 98 2012-10-03 08:47:39 2003-08-29 13:30:32 6 4 50 4 43 95 4 180.50 60 51.01 CHANGED EFNECAVSRKKCVPpKSDVGEFPVPDPSsLVpNFNMtDFsGKWaIoSGLNPTFDAFDCQLHEFHhEss.KLVGNLoWRI+TPDuGFFTRoAVQ+FVQDPsQPulLYNHDNEYLHYQDDWYILSSKIENKPDDYIFVYYRGRNDAWDGYGGAVlYTRSssLPESIlPELp+AApSVGRDFssFIRTDNTCGPEPPLVERl .................................................EFN-CAVS+KKCV......PpKuDl..G-FPVPsPssLVcsFNhsDF..s..GKWYIoSGLNPTFDs.FDCQLHEF+.s.E.s.s...K..LluNLoWRI.c.....TP...D...uGFFTRoAlQ.+FVQ..DP.s.pPuI.......LY........NH..DNE..a...LHYQDDWY....IlS.SKl....-Nc.DD...YlFVYYRG+N.DAWDGYGGAVlYTRStsl.PcSIlPELc+AAKpVGhDFsp.FlcTDN...TCGPEPsLhtRl............................ 0 20 35 40 +6966 PF07138 DUF1386 Protein of unknown function (DUF1386) Moxon SJ anon Pfam-B_16196 (release 10.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 350 residues in length. The function of this family is unknown. 21.40 21.40 23.00 95.40 20.20 19.90 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.09 0.70 -5.31 8 20 2009-01-15 18:05:59 2003-08-29 13:55:16 6 1 18 0 0 19 0 319.90 47 97.50 CHANGED MSLSSKLLVYAYYG.sYNhsHc+YGESYHLYRIVcE+Los.....oYVsssSCVRRDIsTARpL..ssGtlsF--ARphLDls-sAspLosWYpsG-.ooGlCu-VQpVLspIDpasPL-+RVppGu.........sIauLDs.hs-Isp-hsssLQslIGR.FhHFsRsssLsHVA-VFDPs..l+ssGWWYpKFCVLTYMHRlhssuVPsELhsRLpcAVsKaI+Ps...........DcuNCA.AlAsVYGRFCGIGR-HFu+HKsssh+ILFQYMRuchT.ssERassFuVIKDFGRpCKETYpDL+spADsLYIpuoTD+pKNALFDLLCCsNAs-IDsDCYDYIV...spFYs ...........................................................MSLuuKLlVYsYYu.pYNtsHchYGESYHLYRIVpEaLoc.....SYVsshSClcRDlssARRL..psGshsFD-AhphlDss-oscpLSpWasT.G-..opGlsssVppVLppIDshsPlshRVppG.t.........pIFuL-s.........pEIsp-h....sDsLQhllGR.FhaFhRsspLh+lAsVFsPs..tcs...sGWWYsKFCVlTYhHRIhtpuVP...sELh.sRLpcAVpKaI+.p..........t-phNCscslA-lYGRFCGIG+EHFu+HKhsCh+ILFQYlR.GcsT.p--cFsCapVIKDFGRpCK-sYcsL+shhDhLahauhoDKcKNuLFDLLCshsspEIDlDCa.YIh...cpFh.... 0 0 0 0 +6967 PF07139 DUF1387 Protein of unknown function (DUF1387) Vella Briffa B anon Pfam-B_10471 (release 10.0) Family This family represents a conserved region approximately 300 residues long within a number of hypothetical proteins of unknown function that seem to be restricted to mammals. 25.00 25.00 25.60 25.20 24.90 24.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.10 0.70 -4.81 6 164 2009-01-15 18:05:59 2003-08-29 15:07:34 6 4 51 0 64 117 0 236.20 51 55.81 CHANGED WoVTGKK.KNtKKKKsKsKscusspsA....t+..shs.ppssPss-.cs.sNGaHsNGuth.DoESlDSLSEtL-ohSLDA+E...scssh.-hsspsssshpsG.sch.ppK..hpssPcuopss.......p.stt..sp.pp..t.ss.hhpp............hu+KhusNIE+SVKDLQRCTsSLuRYRVllKEEMDSSIKKhKQTFAELpSCLMDREVALLAEMDKVKuEAM-IL-sRQK+AEELK+LTDhAupMoEEQLsELRADIKHFVSERKYDE-LG+AsRFosDlEsLKpSIpoaGpVsHPKssYSsRot.CS ....................................................................................................................WshTGKK.KN.K+K+sKsK.tttspsu....p+..sh..p.tss..p......NG....+hNGs.p.DopSsD..ShsEt....ul.sRE......c...uh...-....spsh...p.hlppu...t.h....p.+..h......tt....ps.pt...............p.s..tt.t.s.p..s.....thss.h.ps..........................................thsKKh....G.sNIEKSVKDLQRCTVSLsRYRVhlKEEhDuSlKKhKtuFAELpsClhD+EVuLhAEMDKVKtEAMEILhuRQKKAE.LK+hTDlAspMuE.QLsELRA-IKHFVSERKYDE-LG+sARFoCDlEpLKtpI..hGpl..oHPKNsYSsRo..C.................................................. 0 9 14 26 +6968 PF07140 IFNGR1 Interferon gamma receptor (IFNGR1) Moxon SJ anon Pfam-B_15930 (release 10.0) Family This family consists of several eukaryotic and viral interferon gamma receptor proteins. Molecular interactions among cytokines and cytokine receptors in eukaryotes form the basis of many cell-signaling pathways relevant to immune function. Human interferon-gamma (IFN-gamma) signals through a multimeric receptor complex consisting of two different but structurally related transmembrane chains: the high-affinity receptor-binding subunit (IFN-gammaRalpha) and a species specific accessory factor (AF-1 or IFN-gammaRbeta). The vaccinia viral interferon gamma receptor has been shown to be secreted from infected cells during early infection [2]. The structure has been halved such that the N-terminus of this family is now represented by Tissue_fac Pfam:PF01108. 20.70 20.70 25.70 24.80 19.30 18.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.26 0.71 -4.45 12 134 2012-10-03 16:25:20 2003-09-03 09:36:26 6 2 72 6 18 111 0 134.40 36 41.50 CHANGED YsptshCshhhhYsshhpascopph...pYslc-p.CspstCplshsop...pplCVos.Gspps..h...pT-tSp-VCls.h.....................ssth.VsshhlKphsDlpphpptls+slpp+hcppop.t+..phY.shlsshhtthlp..............................- .......................YccpshC.hhhlYss.hphscScEsh......pa.slc.-..DCspTtCplshsso...pchCVoApGsocsah....hT.hS.pEVCl...........................s.phsVhsCh..I+phpslpp.hp.ths+sl.hphhpp....top..........................tthh........................................................... 0 1 1 3 +6969 PF07141 Phage_term_sma Putative bacteriophage terminase small subunit Moxon SJ anon Pfam-B_15957 (release 10.0) Family This family consists of several putative Lactococcus bacteriophage terminase small subunit proteins. The exact function of this family is unknown. 22.80 22.80 23.10 28.50 19.60 22.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.07 0.71 -4.71 2 45 2009-01-15 18:05:59 2003-09-03 09:51:03 6 1 44 0 0 19 0 172.70 91 98.00 CHANGED MQTQpGGRPTILPKMYEEPLFSQIIDKIESGCNDREIYTSLHCSAKTFRKWRDDNIKAYDEAKGIARGNLLELAESALASKLTVRTLKETETIYDADGNVEKVKVKEKELDKDSLVAMMVAKAGNPELYNPTEWRRLQQEESSAHDLKAKIEELDDYKLSKYcTPcIEVPcGFE ...MQTQNGGRPTILPKMYEEPLFSQIIDKIESGC.ND.REIYTSLHCSAKTFRKWRDDN...IKAYDEAKuIARGNLLELAESALASKLTVRTLKETETIYDAD.GN.....VEKVK.VKEKELDKDSLVAMMVAKAGNPELYNPTEWRRLQQEESSAHDLKAKIEELDDYKLSKYETPKIcsPEGFE................................................ 0 0 0 0 +6970 PF07142 DUF1388 Repeat of unknown function (DUF1388) Moxon SJ anon Pfam-B_16000 (release 10.0) Repeat This family consists of several repeats of around 29 residues in length. Members of this family are found in the variable surface lipoproteins in Mycoplasma bovis and in mammalian neurofilament triplet H (NefH or NF-H) proteins. This repeat contains several Lys-Ser-Pro (KSP) motifs and in NefH these are thought to function as the main target for neurofilament directed protein kinases in vivo [1]. 25.30 25.30 25.30 25.30 25.20 25.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.43 0.72 -4.23 28 466 2009-01-15 18:05:59 2003-09-03 10:27:34 7 28 45 0 52 575 2 29.10 50 24.82 CHANGED PspsKoPuEsKoPs-sKoP..uEsKoPscpK ...............PscsKSPscsKSPscsKSP..scsKSPsctK...... 0 5 5 7 +6971 PF07143 CrtC Hydroxyneurosporene synthase (CrtC) Moxon SJ anon Pfam-B_16004 (release 10.0) Family This family consists of several purple photosynthetic bacterial hydroxyneurosporene synthase (CrtC) proteins. The enzyme catalyses the conversion of various acyclic carotenes including 1-hydroxy derivatives. This broad substrate specificity reflects the participation of CrtC in 1'-HO-spheroidene and in spirilloxanthin biosynthesis [1].\ This family also contains the members of the old Pfam family DUF2006. Structural characterisation of DUF2006 family member Swiss:Q82US3 has revealed a lipocalin-like fold with domain duplication. 26.20 26.20 26.50 26.30 25.60 26.10 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.18 0.70 -5.13 69 733 2009-01-15 18:05:59 2003-09-03 10:38:50 6 6 601 2 267 685 396 285.70 23 77.79 CHANGED hEWWYlsAphps....GpthssphshhtsshsP...................t.sstphhhspsAlh..........ssppphhhcchuRuu..lup........sssshpsh.......h-saphpups..................hslplstsutpht.........lclplpspss.hlh.sptGhoh+s.sts.....p.Auahau..tPhhplpGslsh.sut..php.........lp.......GpuWhD+pauo.......psLsps.suWDWhsh.....plss..Gps....LhhaplRppss.........................satsushhhss.Gps....h....s.ttlphps..hthsss.suhphPhtWplch.....ssps....lslphpslhpsu.h....ssshsYa......EGsltlptshs..........uhuahch.pGh ................................................................................................................................EWWYhsu.lp........stt....G.p.huhQhshhR.sht.....................................t.ssa.ts.plhh....uphul.s..........sttt...hhh..tp+huRsu.........hu.....Ash...........sspshphh.......lc...s.Wphputs.........................hshplph....ssp..shs...........................................lpLpl...ps...ptP....lhpGppG.hshKs..s.s........t.ASaaao........hPhhp.l...p...Gslsl..sup.......shp..........................Vp..................G.p.u.WhD+...EW.......uo..................phL..s...sst...p.GW.DWhsl.....pLss....Gpt......Lhha.p.lRppss.........................sh.htushh...ss..s....Gps......l.....tspplphps.........h.....phs..........s...........sst.p..hPh........t..Wplpl....sshs.........lslplpsl..tst.h........shth..sYW........EGs...lt.lp..G.sh.p..............uhGahch.oGY............................................................................................................ 0 67 142 211 +6973 PF07145 PAM2 Ataxin-2; Ataxin-2_C; Ataxin-2 C-terminal region Albrecht M, Studholme DJ, Vella Briffa B anon [1] Motif The PABP-interacting motif PAM2 has been identified in various eukaryotic proteins as an important binding site for Pfam:PF00658. It has been found in a wide range of eukaryotic proteins [1]. Strikingly, this motif appears to occur solely outside of globular domains [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.47 0.73 -6.31 0.73 -4.08 53 834 2009-01-15 18:05:59 2003-09-03 11:18:39 10 35 156 12 410 759 0 17.60 49 3.57 CHANGED stpSpLNPsApEFVPsst .....hpSpLNPNApEFlPss.... 0 62 129 229 +6974 PF07146 DUF1389 Protein of unknown function (DUF1389) Moxon SJ anon Pfam-B_16027 (release 10.0) Family This family consists of several hypothetical bacterial proteins which seem to be specific to Chlamydia pneumoniae. Members of this family are typically around 400 residues in length. The function of this family is unknown. 21.50 21.50 21.50 30.10 20.90 21.40 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.86 0.70 -5.37 15 75 2009-01-15 18:05:59 2003-09-03 11:21:53 6 2 13 0 20 46 1 296.70 27 70.81 CHANGED shslslslSlllsuluhslhlhshhpthh.............ptpsIPpG...........................FpplI+cpYPcslaclVhpppLolpElRhllsuL...........ppshsphspcL+pKlcsF.Gl-pLpsshcstcL.............ssL-clLlcNCPLYWLp+F..IclGs+phs.........pssp........hpsssYWlu+lGh.......usstsTIFs.psalluplloc-EYphLhsHA+NsTWspsc..VpslppRl.hshlspthtttpcpspsplsp.....hhpcth.shLLtLCpHGlSW-QLQLlcplsscphsFLstl...-pup.hsstlpphshuhhs.t........alsEss.p.F-PsluLhTacEa+ ........................lshhluhhhssluhslh.hshht.hh...............p.IPpu...........................applIpppaPpslhshlhpppLolpElRhllpsl.............psshsp....hsppL+p+lpsF.GlcplpsshpuhsL.............ssL-slLlppCPLYalp+F..IphGs+pl..........ptpt........hp.ssYWlschGh.......ssstsTIFp.hsalluphlSccEY.hLhp+Acss...s...Wsptp..VsulppRl..t.htphhh...ptcpshsphhs.......hhpc..tphLLhlCpHGlSWEQLQLl+pls.hcshtaLstl...-pus.hsttlpphhhshhs...........ahsppspp.a-spluLhTasEh........ 0 0 0 18 +6975 PF07147 PDCD9 Mitochondrial 28S ribosomal protein S30 (PDCD9) Moxon SJ anon Pfam-B_16045 (release 10.0) Family This family consists of several eukaryotic mitochondrial 28S ribosomal protein S30 (or programmed cell death protein 9 PDCD9) sequences. The exact function of this family is unknown although it is known to be a component of the mitochondrial ribosome and a component in cellular apoptotic signaling pathways [1]. 25.00 25.00 28.20 25.70 24.20 24.00 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.37 0.70 -5.70 14 222 2009-01-15 18:05:59 2003-09-03 11:37:19 7 2 90 0 141 204 0 322.20 25 75.44 CHANGED Mushpt...hp.l.............ttt....s.p.pp.ss.....sssshYPPIh.opshps+us+p+phcpa...................httlptssslcEKl.hhsthph................+ahsa......PpshshNu.caaQahTKTsh.suLP...s.t...............................................ptp..shschhhutlcshhpthhLQcpaappccc..........cppchsst.hLspLspslhshLupp.stLspsplDhsPplphaWsRGtph.s.........+shp+th.cshcaQlDD.............+PthplRhpppLs.ahs.-tp......hstclPshc.........acPptLsLapcpacsth.ssuhhs...ssssaGhspFphhschhpR...........cp....s.tsQhEs.h+ApuIhShFAWhsAQA.hYpGFhstsDlTcPhsoQsVITDG+haSFaCYQLNTLuLsspsst.sNsRpNhsWGTpuh.LY-p...............l.pcsclhGaN-.....sslppllpFhlNps .......................................................................................................................................................................................................................................................................................................................................................................................p....hps...h.hhT+Thh........t.L.P.............................................................t...t.hp.h........h...t.................................p......hltpl...hht.h....p...........s.Ltt.t..h.s.p....hthhW.htt...................................hQhps....................pst.tlphtp.Ls.hhs.p............hsh.l..sh..............................s..shls.l.ppp.h.p...h..suhh...........s....hsas.as..ahhh..s.t.....hR...................................................tt.ht..s.h+ApsIhtsFuhshA....pA..th..G...........hps..ps...ls.pPhlsQuV.hTDG+hFpFhsaQLNTlsLss.....ss....s....hKNlsWss.p..sh.LYpp....................................l..pss...t...hhshps.....cshhphlthhh............................................................. 0 45 57 101 +6976 PF07148 MalM Maltose operon periplasmic protein precursor (MalM) Moxon SJ anon Pfam-B_16111 (release 10.0) Family This family consists of several maltose operon periplasmic protein precursor (MalM) sequences. The function of this family is unknown [1]. 19.50 19.50 20.60 19.60 18.10 19.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.33 0.71 -4.71 36 702 2009-01-15 18:05:59 2003-09-03 11:41:22 7 2 687 0 64 315 6 135.60 62 45.04 CHANGED sstpShhAAaplP..uspGshplpLoShl..spslFsPsVllLDpphpssphhssspFpYp.sshhsssRlpuplpls....s.uppphYlllYToppDLstoTpl.cP..AKshAcuhGpshPtlsDssstHossGplclplp ..............................sGIoGPVAAYSVP..ANI..GELTL...TLT.....SEV....NKQs....SVFAPNVLILDQNMTPSAFFPSSYF..TYQcPGVM.SADRLEGVMRLT...PALGQQKLYVLVFTTEKDLQQTTpLLD.P..AKAYAK.GVGN...ulPDIPDPVARHToDGllKLKVK.............................. 1 8 22 46 +6977 PF07149 Pes-10 Pes-10 Moxon SJ anon Pfam-B_16134 (release 10.0) Family This family consists of several Caenorhabditis elegans pes-10 and related proteins. Members of this family are typically around 400 residues in length. The function of this family is unknown. 21.10 21.10 22.50 22.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.19 0.70 -5.69 5 17 2009-01-15 18:05:59 2003-09-03 13:21:49 6 2 5 0 17 15 0 278.90 21 92.35 CHANGED MNcTEYphHLLARTI+TGcDEhITPlIpQLs-hpVSMDILEKHNlPsLVscaAPaNpsApSLu+slLsWKN-clppEKPplLKcFsctst-c+aP-pFV....L+LLTuLMsF-DL-LV+ssFcILsph-LoL--aEcaGIaE+AtpFQtQhcEADELIsKl-..hLpsEhLEEsEpEpptsclhssh-cc...................u-sESGlFT--E..........cccspcp.llhEICMlaLApCIKoGNs-VISuAIpFsGsacaPLALYRKYDIQhLIYsaGs+s-DAcpLhDaIEclEclElssE+lEAFK+Fl+..sshEp.csVTDSVMolLpGFL........E-sDDahVcsTLcFFLsMPISL-QFcps+VEssLcNLEptssupLullLKhKIp-L+p ...........................Msthph.h.hLA.hltst..p.t.Io....hhp.......L.s.h....hchh-ptNhPhllt.....p.ss....A.ph...hh.hK..phtpE...hhptFht....hh........tt.h...............lpLhh.h........t..p..hhp.sh.hl.t..h.ht.h....h...h.....ph.-t..lh.p..............t.......-..............................................................................................-l.hh.hht.lpstspthls..............hulph...htt.phsLplhpKY-IttLlh.ths.p.pptA.p....Lh.pIpphpp.tht.pp.phh..hhp........t...sshht.h..ah..................................................................................................................... 0 6 7 17 +6978 PF07150 DUF1390 Protein of unknown function (DUF1390) Moxon SJ anon Pfam-B_16182 (release 10.0) Family This family consists of several Paramecium bursaria chlorella virus 1 (PBCV-1) proteins of around 250 residues in length. The function of this family is unknown. 20.00 20.00 20.20 20.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.53 0.70 -4.99 25 54 2009-01-15 18:05:59 2003-09-03 13:27:10 6 1 10 0 3 52 2 190.20 31 82.66 CHANGED csaphslYpCuCGYcTsssuNAoKHKKT.uCsHchpscpc.cFVLcEDh...phtsspshsstshsssss.ssshsphlsp.psIsIsLslP-poshssIh-slps.phhpElcsu.-PppIPAlLF+aTRGht.....utpphI+Y-sDKslVpcKDP.sG+-sspcLKKY+scYls-sssla-cshclsahPpplpcsh+-hppPphssG+KKsc.lsuA-slKhsAoGsHhhYKhPtE ...chaphplYhC.s..CGYcThspusAs+HpKo..pCtcc.hhpppp.pFlhcc-h...t...stsh......t.sshtt...s.pp.lp...phshsLhlP-tshhpslhchlps.ph.p-lpst.pspphPullachT+u.t.....tt.thlphcsDK..lhctc..sGp.htpslpKhtpcah.cssshhppsh.l.h.sp.hpp.hc-hppsphs.G+K+sp.lsuu-sl+.hAsGsHhhYKhPhE................................ 0 1 1 3 +6979 PF07151 DUF1391 Protein of unknown function (DUF1391) Moxon SJ anon Pfam-B_16216 (release 10.0) Family This family consists of several Enterobacterial proteins of around 50 residues in length. Members of this family are found in Escherichia coli and Salmonella typhi where they are often known as YdfA. The function of this family is unknown. 25.00 25.00 32.20 76.00 22.70 21.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.54 0.72 -4.47 4 588 2009-01-15 18:05:59 2003-09-03 13:43:20 7 1 261 0 3 201 0 48.70 87 78.29 CHANGED pplDLGNNESlVhGVFPNpDGTFTAMTYT+SKTFKTEuGA+RWLtRpss ..DTIDLGNNESLVCGVFPNQDGTFTAMTYTKSKTFKTEsGARRWLtRNo.s........ 0 2 2 3 +6980 PF07152 YaeQ YaeQ protein Moxon SJ anon Pfam-B_16245 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length which are often known as YaeQ. YaeQ is homologous to RfaH, a specialised transcription elongation protein. YaeQ is known to compensate for loss of RfaH function [1]. 25.00 25.00 28.00 26.10 22.10 21.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.87 0.71 -4.94 64 1039 2012-10-11 20:44:44 2003-09-03 13:49:14 7 2 1018 5 215 584 86 173.00 52 96.12 CHANGED MAL+uTIYKsplpluDhDRsaYpshpLTlApHPSET-ERMMlRLLAaALpAs..E.p....LpFTK.GLS.ssDEPDLWpKsLs.scIplWIElGpPDEcRl+KAss+AccVhlasY.upssplWWpptps+lsphcNLsVhplspsthpsLupLspRsMpLploIp-Gplalosspp..s.lpls.ph ...........MALKATIYKAslNlADhDRsha.hDtuLTLARHPSETpERMMlRLLAahhaAc.....ER.........LpFT+.GLs.s-DEP-hWh+s.c.huI-LWIELGlPDE+RI+K..ACs...pA..tc...Vs.LasY..suRAAplWWpQspuKhspasN....LoVahLD--plupLushA-RTMsLQsTIQ.DGslWLSDscs.s.lElphps................. 0 39 107 166 +6981 PF07153 Marek_SORF3 Marek's disease-like virus SORF3 protein Moxon SJ anon Pfam-B_16263 (release 10.0) Family This family consists of several SORF3 proteins from the Marek's disease-like viruses. Members of this family are around 350 residues in length. The function of this family is unknown. 21.90 21.90 22.40 28.80 15.70 21.80 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.12 0.70 -5.77 3 18 2009-01-15 18:05:59 2003-09-03 13:53:59 6 2 8 0 0 14 0 263.20 39 91.13 CHANGED MSRusAslFDDMDIPRGRFGKPPRKITclNFWslLsDELTsGIVQCMESRERLALlHTsSsN-Go.hSFDIHKDMWCQMVLWSAYRFFSTM-+MFSI-oIoNFs-TDVsEoGpW+IaCRTWDlRDAsKMKhlGPFLPALFSFHLENWTTMLSIGIsKGYDRHNsRshFMshPShKNVLlGAlEVARaAVVLoLPICEYRTPhGLPDDpIGNAIKLCCAQMQANRLEcTGIopDutpKsNDuSEEELYYRslH-LVKouREHCcSsE......sshDlsPTI+cpp.psIphphsSsh.hGhtshuR.hNsGch+Yp+.plhRNhPlRVPRSRLuNSKILQTFRcshsRSsha...l ............sRs.AhhFsshDhPRGRFhpP.h+.sshsaW.hhsDEhspGIhQChEuRERlu.lp..p.spG...paD.phDMWsQhslWSsY+hhthhp+.FSlcplh.hscpslstsu.ath.hpsWDlRDusKh+hlGPhL.AhFShHlENWTshLSIuhstG.asppsphs.hMsh.us+.sslhsuhEVAR.hlVLsLPls-YRsP.GLPDDshGNAI+sCCApMQtpRLpcsths.D....hpsssEEEhYYRslpc....lIpstRcas.ssp.......................................................................................h.............................. 0 0 0 0 +6982 PF07154 DUF1392 Protein of unknown function (DUF1392) Moxon SJ anon Pfam-B_16270 (release 10.0) Family This family consists of several hypothetical cyanobacterial proteins of around 150 residues in length which seem to be specific to Anabaena species. The function of this family is unknown. 25.00 25.00 172.30 172.20 17.70 17.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.88 0.71 -4.35 7 15 2009-01-15 18:05:59 2003-09-03 13:56:02 6 1 3 0 11 15 0 152.00 43 98.45 CHANGED Mss.IspLEpCWYlSPPWGpphsPltlsLhE+VYLpos+ohGYCCGVpWppDtW.Ysl.scpshlpsscppIIupGphpshslpKPtFtLG-hV.hcFsscusKpRlILGltLlpsuWhYhVEhhSPsL..............stttsh.pRhuhVp-cDLVpV MhstIssLEoCWYlSPPWGpplPPltlsLlE+VYLpos+ohGYCCGVpWpc-sWhYsIhsss-IlasocspIIuTGplpsholpKPsFtLG-hV.lcFts-uPKpRlILGl.LlcpsWhYtVEhtSPsL..............opsssh.sRhuhVs-tDLVcV...................... 0 0 6 11 +6983 PF07155 ECF-ribofla_trS DUF1393; ECF-type riboflavin transporter, S component Moxon SJ, Eberhardt R anon Pfam-B_16301 (release 10.0) Family This family is the substrate-binding component (S component) of the energy coupling-factor (ECF)-type riboflavin transporter. It is a transmembrane protein which binds riboflavin, and is responsible for riboflavin-uptake by cells [1,2]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.18 0.71 -4.53 62 2634 2012-10-03 02:46:00 2003-09-03 14:01:05 7 8 1367 0 389 2679 75 161.70 23 88.03 CHANGED p..ss+plVhhulhsAlhhllsh.h..lpIPss..so....lplspuhlhlhAhlFGshhGhlsGhlGtsLhDhhsG.Ys.hWhsa.llscGlhGhlsGhhspph..php.......hphhs..llhhslhhlhuhhluhsll..................uhtshltulhuslh.pslsuhlluhhLh..tsht+ .............................t..ps+plshhulhs..Alsl.l.lsh....h.......hp...I.PhP................lp..l.s.s.u.h..l..h..lhu..hla..G...s.h..hGh.......l.......l.......Gh.......l.......u...t.h...lt.....D.....h............h....s.G.....Ys....hW...h....s.......a.......p......lls.....p....G.........l..h.....G.....h....l.........s.........G.....h.....h..t.p.p..h.......ph............................p...h.hs.........hl....h....h.s....l....h..t..l....l.u....h..l...s..tslh..........................................t.ss..h.t...s.....h.h...t.....s......l....h.u.....s....lh....ps.l...s....shl....luhlLh..sht......................................................................... 0 159 250 313 +6984 PF07156 Prenylcys_lyase Prenylcysteine lyase Vella Briffa B anon Pfam-B_12448 (release 10.0) Family This family contains prenylcysteine lyases (EC:1.8.3.5) that are approximately 500 residues long. Prenylcysteine lyase is a FAD-dependent thioether oxidase that degrades a variety of prenylcysteines, producing free cysteine, an isoprenoid aldehyde and hydrogen peroxide as products of the reaction [1]. It has been noted that this enzyme has considerable homology with ClP55, a 55 kDa protein that is associated with chloride ion pumps [2]. 21.30 21.30 21.30 21.40 19.80 21.10 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.08 0.70 -5.57 5 325 2012-10-10 17:06:42 2003-09-03 14:06:43 9 14 182 0 225 329 7 298.00 29 68.63 CHANGED FEEosWalINllKLVWRYGlSsLRM+MWVE-VLDKFMRIY+YQuHcYAFSuVEcLL+ulGtsDalshlppTLpEsLpcAGlSppFlNEhVusVhRlNYGQSsDINAFAGAVSLuuAsuuLWSVEGGNKlVCSGLLptSKuNL.IsuoVpSI.....T+aosNspchYpVsYKssstspSDFYDIVVVATPL..DcshSNIT.FtNFcPPI--hpspYQpTVTTlV+GcLNooYFGs+PhDpFsLusILTTDDuslFhssluIl.....sShRcK.....sscGstVWKlFSRcsLo+sQLccLFpSYshsV+KsW.tAYP+YcsPpppPsFILHD.+LYYLNuIEsAASsMEhSAIAA+NlALLAYcRWNscpDh..IDQcsLhc+LKTEL ...................................................hhshh.+lhW+Y..G.h.t...h+.................hp.hhc...phlpc..Fh.+.l...Y..c..............t...............h.sFps.l.pphh.sh...........G........hhthhptohtc....L.....p.s.G...h.......s.p...pFhp.-lltsshRlNYG.Q.s...s.lsuh.............s..Ghho.hs..s.u.t.s.uhhuVcG.GN.h.l...hpthlp.....t.op........u........sl...l.p.s.p.Vtpl.......................p.ss...s..st........h.tl...........h....p.........s......t.......t.....t......t......t.t..p........h..a....Dh....VllAsPh...p.t.....lt.........s..hp....s.h...p....................a.phhsT.h.h..p...u......l...s..s...phFsh...s..p..hs....t...slhTs..........t.......s.........p........h............h...s.........shh..........................p......................pt.thlaKlFS.p.lp...pp...............lppl.............F......................t...........h........................................s....................p.......h.W............u.......YP..h..h..t...s...........p...h....................s.hhL...ts.....t............l.....aYhsuhE........hh.SsMEhsuluucN.sAh.Lhh.pph...................................................................... 1 73 112 170 +6985 PF07157 DNA_circ_N DNA circularisation protein N-terminus Vella Briffa B anon Pfam-B_12343 (release 10.0) Family This family represents the N-terminus (approximately 100 residues) of a number of phage DNA circularisation proteins. 19.90 19.90 19.90 20.10 19.10 19.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.94 0.72 -3.85 9 365 2012-10-10 17:06:42 2003-09-03 14:09:03 7 3 318 0 54 329 4 93.30 41 21.62 CHANGED h+cthtcASFRGVPFhlEc-p.tssGRRl.hHEYPhRDpshsEDhG+phpphplouhllGcDhhspR-+...Ll-AL-psGsGpLVHPhaGphpVpl ...................................WpcpLtcASF...RGVPFhV.psp..sssGRRlthHEYPhR..Dp.sasEDLG+tspphslsAhll.......Gc.......D.......hhspR-.c...LlsALc........psGsGpLVHPhaGchpVtl................. 0 13 25 43 +6986 PF07158 MatC_N Dicarboxylate carrier protein MatC N-terminus Moxon SJ anon Pfam-B_16346 (release 10.0) Family This family represents the N-terminal region of the bacterial dicarboxylate carrier protein MatC. The MatC protein is an integral membrane protein that could function as a malonate carrier [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.64 0.71 -4.39 5 578 2012-10-02 15:12:49 2003-09-03 14:11:11 6 4 502 0 89 495 47 146.70 43 38.33 CHANGED MssELloILlLllsFlIATspsINMGALAFAuAFllGolllGLcscElLAGFPuDLFLTLVuVTYLFAIApsNGTlDWLV+sAVRtVRGRluAIPWVMFllAuLLTAhGAluPAAVAIlAPVALoFAs+YRIcPLhMGLMVlHGAQAGG .........................................................h...pllhl.Al.sluIuIG.a..h.pKIN.IG...llA..Is..Fu.Y...lIush.h..M.........G.l.sP.........K..-..l..l....p.......h..W...P..s..u..l..F...FsIhu..V..SL..F....as.h...A.psN.GTL-hLAp+llYRs.Rs.+..Ps.h.L....P.hll...alhusll.oALGAG..aas..s..hA.l...hs....P..lAlhlCpKh..s.hsP..L...lGAhsl.saGApuGu......................... 1 27 59 69 +6987 PF07159 DUF1394 Protein of unknown function (DUF1394) Moxon SJ anon Pfam-B_16260 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 320 residues in length. The function of this family is unknown. 20.10 20.10 21.10 20.20 19.30 18.40 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.84 0.70 -5.30 7 319 2009-01-15 18:05:59 2003-09-03 14:55:21 7 7 109 0 197 239 1 249.30 46 56.05 CHANGED tsphFLDFENAQPTEsE+ElappVstVLpcu-ulLt-LpuY+GAupEIR-AIpNPss.phQEcAWpuVsPLVsKLKcFYEFSlcLEpsL.pLLtuLs......uss.sPppHLE+cQALAKQFAEILHFTL+FDELKMpNPAIQNDFSYYRRTlSRpRlsNh..-sEscVsNEhANRMSLFYAcsTPMLKsLScAToKFVSENKoLPlENTTDCLSTMAsVC+sMLEsPcapSRFps.EETlhFChRVMVGVIILYDHVHPVGAFsKoSpIDhKuCIKVLK-QPssosEGLLNALRYTTKHLND-oTsKpI+ ..............................t.phFlDFE.....sApP.o-t.Epplaspl.shVLp-.upslLt...................cLpsY.+.GA.uppI............R..c....A..I..p..s..P..sc..........plQ..E+AWsuVsPLVsK..LKcFYcFSh.+...L.-pslpsLLttLs......ss.h.s.s..ppaLE.cpQALsK....phAEILcFsLpFDELK...MpsPAIQNDFSYYRRsl...S..R....+...l.....s..........s..................-....-.spl.....ssEhAN+...MS.LFa..ApuTPM.LK..sLSpAT...cFVs.-..sp.s.sl-ss.o-sLuThs.s.VCthMlEsscahsphps.p-oh.hshRVMVGhlIL..YDalcs.GAFs+t.u.plch+....ssl+lL...p.t.ps...................tspsLLssL+..aoopHhs.p.sTsttl............................................................................................... 1 66 85 147 +6988 PF07160 DUF1395 Protein of unknown function (DUF1395) Moxon SJ anon Pfam-B_16376 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 250 residues in length. The function of this family is unknown. 25.40 25.40 26.30 36.90 25.10 25.30 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.54 0.70 -4.87 6 100 2009-01-15 18:05:59 2003-09-03 14:57:27 7 3 84 10 70 109 0 220.70 30 87.04 CHANGED +ls.l+Ell.hRshh.sPshcsTLSsIssslsulchLLpchcp.lpppcps.sptKcLhpsohcpppch.tlctpsPsphs.csp.ssppssspsLlpcst....t.........cEpu.sppP.+ps...+.h.aITs-EFcSlPsYM+GRLTL-plNthlschssalsuKs+IlupspKpLocssRElhpcaR-lEsKsst+..G+aFFlEsDIKshssLKLDpoFtshlssLRHlpRlpEsRsGsLT .........................................................................................hpc.l.............ss.hps..Lptlspp...lhslpphLpphchplp.pc.p.t.sphKcLhpshppp...c.cl..pclppp.lP...spl.....P...................ts....ps.s..psh.h.p..t.st.ts.pt.....................hpsp.p.t...spcs.+....ct..c........lt......ph.a......ITs-EFsulPpYM+GRLT...h-plNssl.p.-l.Nps.lhuKY+ILppP.+...K..s..hs.ss.s+p.......hhpca.h.-p.E..s..K-op...G.paFhlEsDI+ch.s......slK.lD.+p..h+.slLslLRHhpRlpElRsu................... 0 18 30 50 +6989 PF07161 DUF1396 Protein of unknown function (DUF1396) Moxon SJ anon Pfam-B_16343 (release 10.0) Family This family consists of several putative lipoproteins from Mycobacterium species. The function of this family is unknown. 24.00 24.00 24.00 24.00 23.90 23.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.03 0.71 -4.64 17 288 2009-01-15 18:05:59 2003-09-03 14:59:51 8 1 134 7 53 132 1 191.50 36 78.19 CHANGED Dsstl..lpcuspsT+slpSsHlsloVsGp.....lssL.slpol-GDlTssPtss..ApGssplhht..uttl.sscFllsDuslYssls..ssasshGsu..tslYDsuhl..LsP-pGlusVLu.shssspspGcEslsGpsTs+l....oGslsAssVstIsPs..hsus.tslPsTlWIs--..................sstpLspstl.chssGss....VplTlScWscsVsV ...................................................................................................-At.L..lcpuscAT+slpSs..HlslsVsGc..............lssL.....slpo...l..-uDlo.s..sPt.s...ApGssplhht......sssh...sscFlVh.s.splYsc...Ls........ss........aosh..Gsu........tsl...Yssuhl......LDPs.pG...........L.........uslLu.slp.ssps.pG..p-sID.Gl...sT.s.+l...............oGsls.Ass............lst..ls...Ps...............hsp......s...t.lPsTVWIsps..................Gs.pp.LVphpl...-hspGs...........VplThScWGc.Vsl.............................................. 0 18 43 50 +6990 PF07162 B9-C2 B9; Ciliary basal body-associated, B9 protein Vella Briffa B, Coggill P anon Pfam-B_12595 (release 10.0) Family The B9-C2 domain is found in proteins associated with the ciliary basal body. B9 domains were identified as a specific family of C2 domains [1]. There are three sub-families represented by this family, notably, Mks1-Xbx7, Stumpy-Tza1 and Tza2 groups of proteins. Mutations in human Mks1 result in the developmental disorder Mechler-Gruber syndrome [2]; mutations in mouse Stumpy lead to perinatal hydrocephalus and severe polycystic kidney disease [3]. All the three distinct types of B9-C2 proteins cooperatively localise to the basal body or centrosome of cilia. 20.80 20.80 20.90 21.10 20.70 20.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.95 0.71 -4.47 45 444 2012-10-10 12:23:49 2003-09-03 15:04:25 6 10 128 0 283 416 10 153.90 27 49.58 CHANGED clhlhGpI.uAp...sapps.sL..............as+atlhtGs.sWp...........hlsGhp...................pGtTQhshsss...schsh...............asaPh-lphpsps.pG..................................WP....pLhlplau................hDshG+.ppltGYGhstl......Php.sG.pHplplshW+.P.......hsohhppLpphhlGusPpLpc..shlts..........spsRhtl+oco.sGpVplplsllh .............................................................lhl.Gpl.sAp...thp..s..sL..............as+atl.hGt.....s.......Wp............hsG.p.....................pGhoQhsps....pt......s.phsh...................asaPl-lphtsps..G..................................WP......plhhpVhu.....................D..a.sp.phltGYGhshl......P.......h.......s..PG......p..........Hph........ph...s......hapP..............................ss..h..hpphtp.....hhhGttsphtc..phltt....................t.sR.h.hp.sts..tG..lphphphh.h....................................................................... 0 122 153 228 +6991 PF07163 Pex26 Pex26 protein Moxon SJ anon Pfam-B_16379 (release 10.0) Family This family consists of Pex26 and related mammalian proteins. Pex26 is a type II peroxisomal membrane protein which recruits Pex6-Pex1 complexes to peroxisomes [1]. Mutations in Pex26 can lead to human disorders [2]. 25.00 25.00 41.60 29.00 22.30 20.50 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.93 0.70 -5.52 3 66 2009-09-11 08:51:28 2003-09-03 15:14:33 7 2 40 0 27 66 0 266.40 54 93.40 CHANGED MKSDSSTSAAPLRGLuGPLRSSEPsLAlPAsuPAVcLLEEAuDLLVVHLDFHAALETCERAWQSLssaA.AEEP.uGTplEVKCSLCVVGIQALAEMDRWREVLSWVLQYYQVPEKLPPKVLELCILLYSKMpEPGAVLDVVuAWLQDPuNQulP-YGoLAELHVhRlLLPhG+LSEAEELl...VGSAAFoEEQRl-ALQAIHsARQQpspppoQEHSsSp.......EsQKlspEGSlSpKLLSLlMLLRRLWuSAVSHlhSlPFRKuLLAALILCLLIlRFDPAuPSSLPFLY+LsQLFpph.pAshGRLYhLAsRS ...................................M+SDsSsSuAsh+GhsGsLRSSEPstssPshusAV...sLLEEAADLLVVHLDF+AAL-TCERuhpSLss.s.sEEs.suoslEVKCSLCVVGIQALAEMsRWpEVLSWVLQYYQVPEKLPPKVLELCILLYSKhpEPuAhL-VsuuWLpDPuNQsLPE.YsuLAEhHl.+VLLPLGploEAEELl...VGSsAFsEEpRhssLpslcpuRQQ......ppppcs..uo-.............Espc.sp-...G...uhspK.h..Lul.hLlpphhssuso...+hhShPF++uhLAALlLsLLllRhDP.AuPSSlsal.+Lh...pLhp.h.tuh.u.h.............................................. 0 4 6 13 +6993 PF07165 DUF1397 Protein of unknown function (DUF1397) Moxon SJ anon Pfam-B_16395 (release 10.0) Family This family consists of several insect specific proteins. Swiss:Q25513 is annotated as being a haemolymph glycoprotein precursor. The function of this family is unknown [1]. 24.90 24.90 24.90 27.40 22.10 24.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.74 0.70 -5.14 17 111 2009-01-15 18:05:59 2003-09-03 15:42:10 6 3 35 0 77 116 0 191.90 28 72.67 CHANGED -l..p.s..php..-lpchlcspCpcss.....GsDp...happlEpusps.hscClpGllDhsslppEIccApPsG-LDpVFsKYCt+tspuhpClsshpstlpPCL-p--ppphsshhshhppLLsFlCaK-GDpIALFIAEpGPEChpppp-sltpChspshsua.......sshp.up.hshsc..hu.pQCs-hsphpsChlccLEpCpssTPuNlsEShF+alR+toPC ........................................t..phtphpphhppcC.css........usst....hhpplppuhhp.hspClpshhs..hsthptEhpp.spPpGs.LDsVFpK.........YC..+hs.p.uhpClpsFsstlpsCLsp.-Eppt...s..hhhplhpplLsFlCh+sGDpIAl..FlucpGsEC.....hppp.pcslt.pChs..pshsth................psh.phs..p........thsp......hs.cpCs-ltphcsCllpc.LE.p.Css.s.suNlh-uhFchltptosC.................. 0 23 31 67 +6994 PF07166 DUF1398 Protein of unknown function (DUF1398) Moxon SJ anon Pfam-B_16404 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 130 residues in length. Members of this family seem to be found exclusively in Escherichia coli and Salmonella species. The function of this family is unknown. 20.80 20.80 21.00 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.59 0.71 -4.23 6 734 2009-01-15 18:05:59 2003-09-03 15:46:05 6 1 607 1 25 177 1 121.40 41 91.93 CHANGED QlllFpchF-pVRpDhNaphFYSELKRHNVSHYIYYLAT-Nl+IVhcNDssVLlKGt+pllpV+ho+NppLIcsost+ahStEloFpcYpcsLAsAGVFRWITslcEpKRaYYohDNoLLapEsl ...........................................hhhpchF-pVRp-.NFshFa...p-LKcpsluaYIYalATsNl+Ilpcs-sslll+us+s..llpVssopNpshIcpshttHhsGcpoFcpYCssLApAGVF+WlsDlppppRpYa.shDNpLLahEsl........... 0 11 15 22 +6995 PF07167 PhaC_N Poly-beta-hydroxybutyrate polymerase (PhaC) N-terminus Moxon SJ anon Pfam-B_16456 (release 10.0) Family This family represents the N-terminal region of the bacterial poly-beta-hydroxybutyrate polymerase (PhaC). Polyhydroxyalkanoic acids (PHAs) are carbon and energy reserve polymers produced in some bacteria when carbon sources are plentiful and another nutrient, such as nitrogen, phosphate, oxygen, or sulfur, becomes limiting. PHAs composed of monomeric units ranging from 3 to 14 carbons exist in nature. When the carbon source is exhausted, PHA is utilised by the bacterium. PhaC links D-(-)-3-hydroxybutyrl-CoA to an existing PHA molecule by the formation of an ester bond [1]. This family appears to be a partial segment of an alpha/beta hydrolase domain. 20.50 17.00 20.50 19.00 20.40 16.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.98 0.71 -4.83 37 1217 2012-10-03 11:45:05 2003-09-03 16:00:08 8 8 704 0 326 1286 296 145.90 45 28.33 CHANGED .pD+RFusssWppNPhachltQsYLlsu+hlpphl-ps-..lDscs+pRscFhscQhssAhuPSNFlhoNPpsl+cslposGpSLlcGhppLhcDltputu..plppoDpsuFpVG+NlAsTsGpVVacN-LhpLIQYcPtTEpVac+PLLlVPPhINKaYILDLpPpNShlcas .......................................tD+RFts.sWptpPhaphhhptYLh.tchhtphhpshp...ls.ppcp+htFhhpph.hsAhuPo.NhhhhNPt.hhcphhpotGpsLhpGhtphhpD...l...tps.t......h.p..ss.t.psFpl..G...c.....sl..A...s...TsG.pVVacNclhpLIQYp.P...h..T.......c....p....V..h.t.p..PlLIVPPhINKaYILDLpPcNShl+ah................... 1 81 187 258 +6996 PF07168 Ureide_permease FAE_3-kCoA_syn1; Ureide_perm; Ureide permease Vella Briffa B anon Pfam-B_11634 (release 10.0) Family Heterocyclic nitrogen compounds may serve as nitrogen sources or nitrogen transport compounds in plants that are not able to fix nitrogen. This family represents ureide permease, a transporter of a wide spectrum of oxo derivatives of heterocyclic nitrogen compounds, including allantoin, uric acid and xanthine; it has 10 putative transmembrane domains with a large cytosolic central domain containing a 'Walker A' motif. Ureide permease is likely to transport other purine degradation products when nitrogen sources are low. Transport is dependent on glucose and a proton gradient [1]. The family is found in bacteria, plants and yeast. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.11 0.70 -5.76 4 165 2012-10-02 19:55:49 2003-09-03 16:15:55 6 4 88 0 57 194 24 229.70 39 66.75 CHANGED MlLuLhFLGTWPAlLTLLERRGRLPQHTYLDYolTNLLAAllIAhohGEIGtspPscPsFhTQLsQDNWPSVLFAMAGGllLSlGNLuoQYAaAFVGLSVTEVITASITVVIGTTlNYFLDs+IN+AEILFPGVuCFLIAVFLGSAVHuSNAuDsKpKL......pshps.pp.tolpshEhcsh.....ppp-LEpGc..s-pscsGTAsFllELEc+RAIKVFGKSphIGLslThhAGlCFSLFSPAFNLATNDQW+TLc+GVP+LsVYTAFFYFSISsFllulILNlpFLY+PllGLP+SSlKAYlsD.sGRtWAlLAGhLCGFGNGLQFMGGQAAGYAAADAVQ .............................................................hlohlhhGoWsshhpLhp++tRh.ph.hYhDYols.lLsullhAhThGphG....ps.sFhspL..s..Q..sshsSlhhAhhGGllhsluNlhhphAhAhsGhSVs.slu..sulslVl.GsslNYhh...s.sph..scs....lLFsGVushllAl.hl.s..uh.sat............................................................................................................................................................................................................................................................................................................................................................................. 0 18 39 47 +6999 PF07171 MlrC_C MlrC C-terminus Vella Briffa B anon Pfam-B_6316 (release 10.0) Family This family represents the C-terminus (approximately 200 residues) of the product of a bacterial gene cluster that is involved in the degradation of the cyanobacterial toxin microcystin LR. Many members of this family are hypothetical proteins. 25.00 25.00 30.90 30.80 20.10 19.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.84 0.71 -4.71 15 567 2009-01-15 18:05:59 2003-09-03 17:08:38 7 6 358 1 215 590 412 175.50 31 35.98 CHANGED llADstDNPGGGusGDsTtlLcullc.Rshpss......ulusIa.DP.AVptstAAGtGAslsL+lGGKhustuupPlcscspVp+Lscsshps....htss.hslGssAsl+lt..G..lclIlsosRsQsa-.shFpslGl-PtspclLVVKSusHF+AsFtPlAppllhstuPuuhssDsspLsa++l ........lluDstDNPGuGusuDsThlLctLl.c...p.s..h..pss...............shuslh.DP.t...Asp..tshsA....G...h..........GApl.slplGu...+.....s....s....s....h....s....u....t...Pl..c.spspVtt..lscsthhs.........hts..t.h.p.hGssA.slcls......G.....lc....lllsop+p.phh-.shFpthGl.-Ppp.tc..llVlKSs.aa+u......sFts..lA..s....t.ll.hstuP..Gshs.p.Dhsplsap+................. 0 35 109 166 +7000 PF07172 GRP Glycine rich protein family Bateman A anon Pfam-B_15819 (release 10.0) Family This family of proteins includes several glycine rich proteins as well as two nodulins 16 and 24. The family also contains proteins that are induced in response to various stresses. 30.00 30.00 30.50 30.20 29.80 29.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -11.19 0.72 -3.51 15 154 2009-09-11 20:33:22 2003-09-04 11:05:36 6 3 52 0 60 154 0 95.50 33 73.37 CHANGED MA.SKA.hlLLGLhhsssLLluSEVuAtp.......usp.KsEscssVpssph.....................ttGGYsGGGGsthsGGGh.sGGGsass........................GttGh.tt....GGGYpGGG ...........Mu.SKs..hlLLuL.hhAslLLluS-VuAtc..........ttt..ps.p.sp..ss.....Vpssphhu..t.t....................ttGG.....as..GuG..Gt...t..........G.....G...G.....h.........ss.s.G..u.asG..............u.G..tGh..st.......G...tGh....t................................................. 0 12 34 47 +7001 PF07173 DUF1399 Protein of unknown function (DUF1399) Vella Briffa B anon Pfam-B_13062 (release 10.0) Family This family represents a conserved region approximately 150 residues long within a number of hypothetical plant proteins of unknown function. 21.20 21.20 21.20 21.20 21.10 20.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.83 0.71 -3.88 16 332 2009-01-15 18:05:59 2003-09-04 13:51:26 7 9 120 0 256 357 6 114.40 26 21.11 CHANGED pYpp.Ccphas+l.l...sptultsphppp.uh.pscclWpphaPpEsachphsstos.......tsstplspDLlusVhRQppFst+hs.......oPahp-s.aLppAhtRYptFlpllpp.tspc...hhLVPThDIDLhWHTHQLashsYhsD .........................................................................................................................................................................hshDLssulhRQtt.F.htphs..................................pshht.p..s..lpcultRYp.t......F.ltlhpp....psp............thlVPTLDlDLhWHTHQL.sP.htYhp.................... 1 86 161 234 +7002 PF07174 FAP Fibronectin-attachment protein (FAP) Vella Briffa B anon Pfam-B_16585 (release 10.0) Family This family contains bacterial fibronectin-attachment proteins (FAP). Family members are rich in alanine and proline, are approximately 300 long, and seem to be restricted to mycobacteria. These proteins contain a fibronectin-binding motif that allows mycobacteria to bind to fibronectin in the extracellular matrix [1]. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.20 0.70 -5.12 8 90 2009-01-15 18:05:59 2003-09-04 14:53:17 6 4 83 0 15 77 0 271.90 62 85.80 CHANGED McQsDspupRR+GL.soLAlAAVoGAoA....sulALPAsAsADPs.PsPs........sPPususAsPAPssP..sPuPsssPs................PuDPNAssP........P..ssADPNAPsPPPsDPNAPsP........ssPpsGRlDNAsGGFSYVVPAGWh.SDAopLsYGpALLSKhssEss.PspsPP..NDTpVlLGRLD.KLaAuAEsDNsKAAsRLuSDMGEFFMPaPGTRlNQpTlPLc.ANGhsGsASYYEVKFoDsNKPNGQIWsGVVGsPsAsusscG..sPpRWFVVWLGTANNPVDKuAAhsLApSIRPWsPPP.PPPsssPus ...................................McQVDss.TRRKGhhAsLAIA.AhuuASh....VTlAl...P.A.T.A.sADPE....PsPs.............ssPPS.TAsAP.......P.A.P.As.PsAPP.sPuAss.sPt............spPuDPNAAPP..........P.....AD.PNAPPP.PslsPNAPt.P..............sRIDNsVGGF.SFsLPAGWV...ESD.AuHLDY....GSALLSKsTG-PPhPsQPPPVANDTRIVLGRLDQKLYASAEAssoKAAsRLGSDMGEFaMPYPGTRINQETlsLD.ANGsoGSASYYEV...KFSDsSKPNGQ.IWTGVIGSPsAsussuG.....sPQRWFVVWLGTANNPVDKGAAKALAESIRPhssPP.PsPAPAPut.................................................. 0 2 8 12 +7003 PF07175 Osteoregulin Osteoregulin Vella Briffa B anon Pfam-B_16589 (release 10.0) Family This family represents a conserved region approximately 180 residues long within osteoregulin, a bone-remodelling protein expressed highly in osteocytes within trabecular and cortical bone. A conserved RGD motif is found towards the C-terminal end of this region, and this is potentially involved in integrin recognition [1]. 22.10 22.10 22.60 38.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.87 0.71 -4.42 3 78 2009-01-15 18:05:59 2003-09-04 16:19:44 6 1 41 0 14 72 0 174.70 55 34.65 CHANGED csNED+SSu..................GNQ-NIHpsLtASVYP-PTVsKGTEDGcDAlLHLhDQ-RYGAALLRNITQPVKSLVTGsELpuEcNKEK+PQSVLSVIPADVNsAKsaSKDpKNQQRDLLsQsSPVKS...KHT+RsRRSTHYLTHLPQIKKIPSDFEGSGSPDLLVRGDNDVPPFS .........................................p..sEsptSs..................oNKENsHssL+MSIYPcSTGN+GsEDGDDAlSKLHDQEEYGAALIRNNMQH.lM.uPVTshc...L..LGEENKEsKPRNVLsKIP..Auh..NYAK.ApSKDK.Ks.QRDuQuQpsPVKS...KST...H+..hQHNhDYLKpLsKVKKIPSDFEGSG..YsDLQpRGDNDlSPFS............. 0 1 1 1 +7004 PF07176 DUF1400 Alpha/beta hydrolase of unknown function (DUF1400) Vella Briffa B, Bateman A anon Pfam-B_16606 (release 10.0) Family This family contains a number of hypothetical proteins of unknown function that seem to be specific to cyanobacteria. Members of this family have an alpha/beta hydrolase fold. 23.00 23.00 23.20 24.10 22.40 22.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.45 0.71 -4.22 10 255 2012-10-03 11:45:05 2003-09-04 16:53:21 6 4 69 0 99 279 187 126.00 27 30.55 CHANGED AAEplsLpYuPFpc.olsVp-LppFAcTGplsssL+hahphl..sspphppLRthLspRhplsPVtlupll....sSPlGcplLsplupllpssscpssthALRuAllpuAssPs.GlollsllpsYPopslcls ...............AAEplhlpa.us.hph..o...lsl.......psLcpaAc.s.......G.p....h.s......s...p..Lphahphh...stpp..hpplRphLppplpl..........ssshhsphL....po.hGctlLpp.l.upl...l...p...st.s...s.......p..s...uh....AlRu..All.pu.At............p..sp..slollshLcsaPspslpls............ 0 6 65 95 +7005 PF07177 Neuralized Neuralized Vella Briffa B anon Pfam-B_16611 (release 10.0) Family This family contains a conserved region approximately 60 residues long within eukaryotic neuralized and neuralized-like proteins. Neuralized belongs to a group of ubiquitin ligases and is required in a subset of Notch pathway-mediated cell fate decisions during development of the Drosophila nervous system [1]. Some family members contain multiple copies of this region. 20.50 20.50 20.70 20.60 18.80 20.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.18 0.72 -4.10 6 930 2009-01-15 18:05:59 2003-09-04 17:04:25 7 17 92 2 581 865 1 68.60 35 23.45 CHANGED PLp.FHs.s+GuNlplscstplA+R.cuSFCculsFSsRPltIsEpltl+lt+hppsWsGuLRlGhTspDP ............t.hp.FH..h+Gpplp.lss.ss.p....sAp....R......p...s..s.Fsp...G.lVaSsRPL+ssEhhpl+l..sch....s.s.p..Wu....G.ulclGlTshsP.................. 1 163 205 374 +7006 PF07178 TraL TraL protein Moxon SJ anon Pfam-B_16378 (release 10.0) Family This family consists of several bacterial TraL proteins. TraL is a predicted peripheral membrane protein which is thought to be involved in bacterial sex pilus assembly [1]. The exact function of this family is unclear. 20.80 20.80 20.80 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.11 0.72 -3.97 22 417 2009-01-15 18:05:59 2003-09-05 09:50:48 6 2 336 0 58 226 12 91.80 35 93.44 CHANGED cpYphPcpLscsp+lhhaslDEhls.hllhashGhhss+...hlhGlhlushhahul++hKtGcussalhchhYWaLPsshht..h+tsPsSphRhals .........achPcpLssts+hhhhslDEllP.shlslshG.lho.u+.......hL..hGl..ss..ulllahsh++.hKcG+usuaLhchlYWahPsshht....h+slPsSphRpal.................................... 1 15 31 46 +7007 PF07179 SseB SseB protein N-terminal domain Moxon SJ, Bateman A anon Pfam-B_16678 (release 10.0) Domain This family consists of several SseB proteins which appear to be found exclusively in Enterobacteria. SseB is known to enhance serine-sensitivity in Escherichia coli [1] and is part of the Salmonella pathogenicity island 2 (SPI-2) translocon [2]. This entry contains the presumed N-terminal domain of SseB. 19.90 19.90 20.30 20.40 19.80 19.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.52 0.71 -4.30 133 1525 2009-01-15 18:05:59 2003-09-05 10:51:25 7 20 1195 0 280 1063 109 116.10 25 48.96 CHANGED Lcpslp...th..tps........ttstpthhptL.....hp.....uclhlPl.spsss.................................tt...tsphphh.hlp....tt-Gpp.....hlssFTohcplpphts.........pttsh.hshshpslhphh.......tps.tulllNPtss.t....thhlstptlttl .................................................thLtts.tsc.......tpp.thhcsL........hc.....uplaVPsssstsp...................................p...ssslsl..phc.........pp-.Gps.....hlPhFToh-tLpphsp................cppsh..hsh...sscsLhphhh.......ts.p...slhLNstss.s........sh.h.sctlt............................ 1 77 194 247 +7008 PF07180 DUF1401 Protein of unknown function (DUF1401) Moxon SJ anon Pfam-B_16789 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 135 residues in length. Members of this family appear to be found exclusively in the Enterobacteria Escherichia coli, Citrobacter rodentium and Salmonella typhi. The function of this family is unknown. 25.00 25.00 25.00 26.40 24.10 24.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.96 0.71 -4.43 2 589 2009-01-15 18:05:59 2003-09-05 10:56:52 6 1 423 0 13 126 0 109.00 50 76.67 CHANGED hsupsppsshhIPsSV+pYsGEPLYIlVuhWChLQppWlpRNpIAEAFtIshRRASalIsYlpp+pp+ls.hsRa.ohsN.+h+RhEIalhcV..pssP.pp..psGP....KR...RVGNG.hu.uN.lWNch........Ih++Kc-p ...............h.t..................Ys-+PLYLLIA-WMMAENRWVhAREIShpFDIEHsKAlNTLoYIL.S.E.V..sEIsCEVKM.....................................................................................................sp..uttspppphlhhVs................................. 0 1 4 10 +7009 PF07181 VirC2 VirC2 protein Moxon SJ anon Pfam-B_16860 (release 10.0) Family This family consists of several VirC2 proteins which seem to be found exclusively in Agrobacterium species and Rhizobium etli. VirC2 is known to be involved in virulence in Agrobacterium species but its exact function is unclear [1,2]. 25.00 25.00 57.70 57.50 19.60 15.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.28 0.71 -4.91 5 16 2009-01-15 18:05:59 2003-09-05 11:12:19 6 1 9 1 4 13 0 179.10 54 99.72 CHANGED MGIRKPALSVuEARRLAAARPEIVHPsssluSQ-sAsspLPE+AG+EDRpssPssAKRpcSsDRQSMLTVDALSSosuPEKlQVFLSARPPAPEVSKIYDNLILQYSsSKSLQMILRRALuDFEsMLADGSFSsAPKSYPIPpss.EKsVIVQTSRMFPVSLLEVARNHFDPLGLETARAFGHKLATAALASFFAcEKso...+sp MGIRKPALSVuEAR.RLAuARPE...Il+Ps.slso.psssssphPEcucpc-RpstPssAcRptssDpQshL.....TVDALSossuPEKlQVFLSARPPAPtVSchYDsLlhQYSsSKSLQMILRRALsDFEsMLtDGSFptAspSYPIsp.s.pK.llVQTSRMFPVsLlElARsHFDPLGLETuRAFG+KLATAALASFFstEKss...t... 0 1 3 3 +7010 PF07182 DUF1402 Protein of unknown function (DUF1402) Moxon SJ anon Pfam-B_16561 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 310 residues in length. Members of this family seem to be found exclusively in Agrobacterium, Rhizobium and Brucella species. The function of this family is unknown. 20.10 20.10 21.00 24.80 17.80 17.50 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.80 0.70 -5.54 9 105 2009-01-15 18:05:59 2003-09-05 11:14:38 6 2 93 0 23 69 1 300.30 69 93.33 CHANGED shhsssssApAhplVPsGNRsAEQPsIPGASuRRT+AspToa-tKYcKVhsLLpsD+sLhuKI+psAuAYGIDPIHIVGAIVGEHTYNVDAYDRLQoYYVKAhSYuupsFpFuYcGEslspFlpRPpFupCtt...hp-SYsLWSCREsVWsssFRGKoVsGpuaPssRFuAsFFQPFYAGQTFGLGQlNPLTALploDhVsRlSGa.KLstscspuVY+sIMDPDlSLsYlAAhIRcSIDAY+pIAshDIStNPGlTATLYNVGsPcpRAssLtspNp....uGtt.hLPpENYYGWLVNDKL-EL+uLL ......uh.hluSpuAcAlTVVPPGNRNAEQPsIPGASA+RT+thSTTYE+KY.QKIYsLL++DuSLRuKIRSTAAAYGIDPIHIIGAIVGEHTYNVDsYDRLQTYYVKAhSYlspulSFGYsGESIuQFlcRPEFAsCh+...hKDSYSLWoCREsVWNu-FRGKoVGGKAYPNNRFSAVFFQPFYAGQTFGLGQINPLTALQMSDMV.NRlSGLPKLDADDuNAVYKTIMDPDLTLPYIAAoL+pSIssYRpIADFDISKNPGITATLYNsGGopARAcsLAsENu+R+AAGpEPhLPpENYYGWLVNsKLDELKALF.. 1 3 10 14 +7011 PF07183 DUF1403 Protein of unknown function (DUF1403) Moxon SJ anon Pfam-B_16581 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 320 residues in length. Members of this family are mainly found in Rhizobium and Agrobacterium species. The function of this family is unknown. 21.80 21.80 21.80 24.40 20.00 21.40 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.88 0.70 -5.05 19 66 2009-09-10 21:27:44 2003-09-05 11:24:43 6 2 36 0 25 69 7 256.30 41 97.24 CHANGED phs...ssusss.sh.PplPuWltstss-............s.p-sAFtuGAALssLDsllR..ppssWtGsWRpRLALcuAssss+htGRsE-EsuLRDAlhLppsGD..cPGPAGclahAWRcLsppsss...s...lhtlsshLG.....culuslsspltsh....hpuspsuPl.suAtlhuslhshtPpAElluhhLADslLAppLsWs+slPLLusph......t.shRsh.sstsclR.scsshtpAlphAlssuAspAlR.AsElsRRAs+LhAVAPKLRuKGAssslchlLs-DAlsu..........S.....shstLSchAARRLF-RLhsLGAVRELSGRsoFRlYGL ..........s.............s.hPuWshstttp..............s.pDsAFhuGuALssLc.llp.....tp.s.tuhhRpRLALpuAtsssth.GRsEctssLRDAhhLhtsGD...sGPAGphhhuaRphstpsss...........ht.ls......culsslssth.th....hpsttsssh.tuA.hhtthht...s...pu.........-hhuhhLADhsLAptLsWs+.VPLLusth..................ttsclp.tttsh.hAsphAlhpushpAlc.us-lsRRAt+LhAVAPKLRuKGAstslphhLscDAlss.........u.....shs.hschAARRLh-RLhpLGAVRELoGRsoFRlYGl.............. 0 1 16 22 +7012 PF07184 CTV_P33 Citrus tristeza virus P33 protein Moxon SJ anon Pfam-B_16614 (release 10.0) Family This family consists of several Citrus tristeza virus (CTV) P33 proteins. The function of P33 is unclear although it is known that the protein is not needed for virion formation [1]. 25.00 25.00 247.30 247.20 17.40 16.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.00 0.70 -5.59 2 230 2009-09-10 15:37:21 2003-09-05 11:29:21 6 1 1 0 0 186 0 201.30 88 100.00 CHANGED MFAFASENQDlLEEKIFRRRThHRKYhDDVVRDFTIDlGYDhVDRDPTVLADYhSLYFFLLNND.lGPLAASIhVSPPVsGTHKIRAHlDNQPNpEGNVTYlKTVDKSRFsIhIKAhPAsMRGaYShRAFLuuDVsStpsEFICSFVGSRFLCCsTQTISENLsKVCSSSF.FRsVScTAhNEFSVshDDVEDVKYVRKRAEGF.HCADPYPPRCYRssNLGDsSGVQSRTIEEEGYRTDTsGNVAVolPNTPLVNAVSPYVAEYNENsRSRISLIRRVCCYAVCVLVVSLLIMSGLLsIIhI MFAFASENQDlLEEKIFRRRThHRKYhDDVV+DFTIDlGYDssDRDPTVLADYFSLYFFLLNNDNlGPLAASIlVSPPVsGTaKIRAHVDNQPNpEGNVTYVKTlDKSRFsI+IKAlPAsMRGYYSFRAFLouDVASERSEFICSFVGSRFLCCCTQTISENLuKVCSSSFFFRAVSETATNEFSVsTDDVEDVK................................................................................................................ 0 0 0 0 +7013 PF07185 DUF1404 Protein of unknown function (DUF1404) Moxon SJ anon Pfam-B_16616 (release 10.0) Family This family consists of several archaeal proteins of around 180 residues in length. Members of this family seem to be found exclusively in Sulfolobus tokodaii and Sulfolobus solfataricus. The function of this family is unknown. 27.00 27.00 27.50 27.50 26.90 26.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.02 0.71 -4.68 14 66 2009-01-15 18:05:59 2003-09-05 11:40:19 6 1 19 0 23 53 0 153.20 32 84.69 CHANGED hhslhLllhslNPaoEph.hhsshlaMhuHYuLahuGhLlGhhhh+t.....shhh..hl..............lGhhhslhWHlPhaFsluus.h.hRllp-loLllGGlLhGuShpthshhhK.lsLhuLWMhuDohLuIlhllusshYosh.h.aSPYssppLshsGlhMFlhMsllhsallh+h.l.Ksl ...........................................hhhhhh.lNPhs..h.....h.hMhsHY.lhhuGhhluhhhh+t...............s.l...hh................lGhh.shhWHhPhhFslusthhhhRllpclohhlGGlllGSulptlphshK.lhLhuLWMhGDolLullLhlusshYos....h.sYsspph.hsuhhMFlhMslhhhhllhch.hpt..................................... 0 3 4 18 +7015 PF07187 DUF1405 Protein of unknown function (DUF1405) Moxon SJ anon Pfam-B_16845 (release 10.0) Family This family consists of several bacterial and related archaeal protein of around 180 residues in length. The function of this family is unknown. 25.00 25.00 41.90 41.90 21.70 21.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.09 0.71 -4.52 31 478 2009-01-15 18:05:59 2003-09-05 13:01:54 6 1 459 0 77 287 1 164.10 48 83.05 CHANGED TlaG.ahWY............t....QLspTPhhha.FVPDSPsAoLFhslsLluhL...............hs+sh.....shlcALAhVsLlKYGlWslshplhhhht..s..sshshhthhLllSHhuMulpuhLah.ah.+hphhtlslAslWhhhNDslDYhhh..hPhhsh.ht.a.hspluhhs.............hhlulhslhlshaLshp ....hIYG.YIWY............u...pQL....scT....P....h.....hFhlFVPDSPTAhLFhllulhhhL...............hpKp..........ulI-ALAaVTLhKYGlWAVlMNllhhhp......pGsIs.hpGhhLhhSHshMAVQAlhahPha..+hshhtlsVAhlWshhNDhIDYhahQ.hPhYsh...lppa..lhpIG..hho..................hhLSl.huLhlhh.hs.+.......................................... 0 20 52 69 +7016 PF07188 KSHV_K8 Kaposi's sarcoma-associated herpesvirus (KSHV) K8 protein Moxon SJ anon Pfam-B_16868 (release 10.0) Family This family consists of Kaposi's sarcoma-associated herpesvirus (KSHV) K8 proteins. KSHV is a human Gammaherpesvirus related to Epstein-Barr virus (EBV) and herpesvirus saimiri. KSHV open reading frame K8 encodes a basic region-leucine zipper protein of 237 aa that homodimerises. K8 interacts and co-localises with human Pfam:PF04855, a cellular chromatin-remodelling factor, both in vivo and in vitro. K8 is thought to function as a transcriptional activator under specific conditions and its transactivation activity requires its interaction with the cellular chromatin remodelling factor hSNF5 [1]. 25.00 25.00 440.70 60.80 18.00 17.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.69 0.70 -5.13 2 10 2009-01-15 18:05:59 2003-09-05 13:08:26 6 2 3 0 0 9 0 181.40 89 86.96 CHANGED MPRMKDIPTKSSPGTDNSEKDEAVIEEDLSLNGQPFFTDNTDGGENEVSWTSSLLSTYVGCQPPAIPVCETVIDLTAPSQSGAPGDEHLPCSLNAETKFHIPDPSWTLSHTPPRGPHISQQLPTRRSKRRLHRKFEEERLCTKAKQGAGRPVPASVVK.................................................AEVCDQSHSPTRKQGRYGRVSSKAYTRQLQQ .MPRMKDIPTKSSPGTDNSEKDEAVIEEDLSLNGQPFFTDNTDGGENEVSWTSSLLSTYVGCQPPAIPVCETVIDLTAPSQSGAPGDEHLPCSLNAETKFHIPDPSWTLSHTPPRGPHISQQLPTRRSKRRLHRKFEEERLCTKAKQGAGRPVPASVVKVGNITPHYGEELTRGDAVPAAPITPP.PRVQRPAQPTHVLFSPVFVSLKAEVCDQSHSPTRKQGRYGRVSSKAYTRQLQQ. 0 0 0 0 +7017 PF07189 SF3b10 Splicing factor 3B subunit 10 (SF3b10) Moxon SJ anon Pfam-B_16870 (release 10.0) Family This family consists of several eukaryotic splicing factor 3B subunit 10 (SF3b10) proteins. SF3b10 is a 10 kDa subunit of the splicing factor SF3b. SF3b associates with the splicing factor SF3a and a 12S RNA unit to form the U2 small nuclear ribonucleoproteins complex. SF3b10 and SF3b14b are also thought to facilitate the interaction of U2 with the branch site [1]. 18.50 18.50 20.10 20.70 18.30 17.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.50 0.72 -4.15 19 296 2009-01-15 18:05:59 2003-09-05 13:40:43 6 5 270 0 214 245 2 77.40 49 82.45 CHANGED D+hphpsQLEpLQu+YlGoGHsDTT+.aEWhsNlpRDohuSalGH.shLsYhulu.cNEshu+lRhphL.........................p+MltPssss...Pt .......D+hphpsQLE+LQuKYlGTGHAD.TTK.aEWhsN.+RDoauSahGH.slLsYhAlA...EN..E.u.huR.l.Raphh.........................cKMlpPsGsPP............... 0 75 116 176 +7018 PF07190 DUF1406 Protein of unknown function (DUF1406) Moxon SJ anon Pfam-B_16883 (release 10.0) Family This family consists of several Orthopoxvirus proteins of around 185 resides in length. Members of this family seem to be exclusive to Vaccinia, Camelpox and Cowpox viruses. Some family members are annotated as being C8 proteins but their function is unknown. 20.20 20.20 20.40 20.50 19.80 20.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.40 0.71 -4.55 4 194 2009-01-15 18:05:59 2003-09-05 13:49:14 6 3 31 3 0 180 0 168.20 33 67.41 CHANGED M.sI+hIshLhlIhs...lushp.ss.YpP............FNKLsIsL-IYsh-sl.ssYTssN......hl.hpchhIol.op.Csshhso.-l.sspDhp.lthhhhDp.phQpps+hCplshchpCph.pcP..lt..p....phShpu-.....hpChpslch.Ishlp....DpphL+.paTL+Iss...............thsl ......................................................P............FNhlsVclslYsV.N..sShTps.s.......sps.s.sIsTpEhTITl.pcs..CsPsFcs..sh.aolhsplshsuaFps-.sc.hQspsKhCTlshcl+Csp....psp.....s......lpphpt...c...spphs.puE.....GcCLsulcl.I.Y.N...Tssp.hc.p.p.l.........t.hhst.h........... 2 0 0 0 +7019 PF07191 zinc-ribbons_6 DUF1407; zinc-ribbons Moxon SJ anon Pfam-B_16889 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of around 70 residues in length. Members of this family have 8 highly conserved cysteine residues, which form two zinc ribbon domains. 25.00 25.00 25.00 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.94 0.72 -4.19 20 619 2012-10-03 10:42:43 2003-09-05 13:53:40 7 1 612 2 48 200 3 69.30 60 95.42 CHANGED -hhCPsCpppL-hsu...tphHCspCppcaphpAhCP-CpppLEcLpACGAssYFCp.pCNpLhSKpRVcFphp .............ElpCPpCpps.L-pss............spA+CsoCsc.....s.....hphpAlCPDC+QPLQVLKACGAVDYFCQ..pGHGLISKKRVcFVl.u.... 0 8 17 32 +7020 PF07192 SNURF SNURF/RPN4 protein Moxon SJ anon Pfam-B_16890 (release 10.0) Family This family consists of several mammalian SNRPN upstream reading frame (SNURF) proteins. SNURF or RPF4 is a RING-finger protein and a coregulator of androgen receptor-dependent transcription. It has been suggested that SNURF is involved in the regulation of processes required for late steps of spermatid maturation [1,2]. 25.00 25.00 74.40 74.10 23.10 16.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.37 0.72 -4.12 2 30 2009-01-15 18:05:59 2003-09-05 14:02:14 6 1 21 0 11 31 0 67.40 87 89.83 CHANGED MERuRDRLHLRRTTEQHVPElEVQVKRRRTASLsNQECHlY.RRSQQQQlPVVDFQAELhQAFLAETPRGG .........RDRLHLRRTTEQHVPEVEVQVKRRRTASLSNQECQLYPRRSQQ..QQVPVVDFQAELRQAFLAETPRGG 0 2 2 2 +7021 PF07193 DUF1408 Protein of unknown function (DUF1408) Moxon SJ anon Pfam-B_16879 (release 10.0) Family This family consists of several hypothetical Lactococcus lactis and related phage proteins of around 75 residues in length. The function of this family is unknown. 21.50 21.50 21.90 144.50 19.70 20.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.57 0.72 -4.45 2 18 2009-01-15 18:05:59 2003-09-05 15:35:39 6 1 18 0 1 7 1 73.90 76 97.15 CHANGED McTpIhNGRcVhhlPTslG.haaDL.KREshG.Vl.pTacR.DGohYhhpRp+scpE....KAAMLNpClSDWth METTIINGRKVRllPTsVGQIYHDLIKRENRGVVVFETWcRPDGSLYMTSRKKNKQELAADKAAMLNECISDWKK 0 1 1 1 +7022 PF07194 P2 P2 response regulator binding domain Finn RD anon Pfam-B_7970 (release 10.0) Family The response regulators for CheA bind to the P2 domain, which is found between Pfam:PF01627 and Pfam:PF02895 as either one or two copies. Highly flexible linkers connect P2 to the rest of CheA and impart remarkable mobility to the P2 domain. This feature is thought to enhance the inter CheA dimer phosphotransfer reactions within the signalling complex, thereby amplifying the phosphorylation signal [1]. 21.30 21.30 22.00 21.50 21.20 21.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.50 0.72 -4.13 80 704 2009-01-15 18:05:59 2003-09-08 09:47:26 6 18 549 1 219 650 12 84.00 26 13.32 CHANGED psYplcVplscsshlKusRAahlhcsLc-hG.-llcohPshE-l.Es-p....F..cpphplhlloppst-clcphlt.sluEl......cpVplpphp .......................haclplplpc.sshhKulRAhhlhcsLcc..hG.cll+ohP...s..hEcl..Es-p........F..tppFplhhho.p.p..s..t..-clcphl...pluEl......ccVplpp..s........................ 0 88 155 181 +7023 PF07195 FliD_C Flagellar hook-associated protein 2 C-terminus Yeats C anon Yeats C Family The flagellar hook-associated protein 2 (HAP2 or FliD) forms the distal end of the flagella, and plays a role in mucin specific adhesion of the bacteria [2]. This alignment covers the C-terminal region of this family of proteins. 25.40 25.40 25.40 25.50 24.80 25.20 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.66 0.70 -5.07 35 2213 2009-09-13 17:36:45 2003-09-08 11:04:28 7 11 1857 0 507 1693 494 236.90 25 45.96 CHANGED ApsAplslsG....h.slpssoN.plssslsGVolsL................ppsopssp.......slsl.spDssshppslpsFVcuYNplhsplsshos.....................t.tt.pppspsGsLtGDusl+slpspL+shhts....t.sss...........hpsLsclGIosspt.p...........GpLpl...DcsKLcpslp..pssssltph.....................................................................FsG.........................................ssGlspp..............lpstlpshs.....pss.Ghlp...................scpssLscphpplspphpshsc+hcshpp+hppp..Fsth-phhsphss ................................................................................................................................................ApsApls..l.sG...............l..pl.pp.soN..plsss...l.pG.l.T.lsL............................pp.s....s.ttst...........slsl...spDs.s....s.sppslpsaVs.uYNsLlsshsshop................................................t..pps..tpsGsL.hG.D.u.s.lps.l...p.splcshlss..........s.t..sss..................hpoLsplG.Iohsps...............................GpLp.l.....................Ds..sKLpp..A..Lp..p..s..s.su.ltph...........................................................................................................................................................................................................................................................................................................................................Fs.u.....................................................................................................................................p.sGl.ssp.......................ltshlss..hh......................sss...Gh..lp.....................................stps...u...lspplpplscphpshspp.hcsh.s+..hptp.....Fsth-shhsphp........................................................................................................... 0 175 326 420 +7024 PF07196 Flagellin_IN Flagellin hook IN motif Yeats C anon Yeats C Motif The function of this region is not clear, but it is found in many flagellar hook proteins, including FliD homologues ([1]). It is normally repeated, but is also apparently seen as a singleton. A conserved IN is seen at the centre of the motif. The diversity of these motifs makes it likely that some members of the family are not identified. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.66 0.72 -3.98 132 2915 2009-09-13 07:46:12 2003-09-08 15:29:45 8 27 1152 5 475 2258 345 56.80 25 13.44 CHANGED slslphssttt..........lshsss..sshss.lsstIN...........sspsGVpAoh.......ssp.......upLsl...puss ..................................lslptssttts................l.shpss..sslts.l....sstIN............ss..ss.G.V.pAol.............sps..........upL.lput....................... 0 140 264 375 +7025 PF07197 DUF1409 Protein of unknown function (DUF1409) Vella Briffa B anon Pfam-B_16557 (release 10.0) Family This family represents a short conserved region (approximately 50 residues long), sometimes repeated, within a number of hypothetical Oryza sativa proteins of unknown function. 21.10 21.10 21.20 21.20 20.50 21.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.47 0.72 -3.90 51 427 2009-01-15 18:05:59 2003-09-09 17:00:33 7 14 7 0 308 422 0 46.30 47 12.38 CHANGED RLEsppl-.L........AcLcEh...pApls.Eh......tclhs.sthlE...pp+....hK.Lcp ..........................RLEApplD.L..........ApLc.Es.............pApls.Eh.....................tclhs.sthlEppp....h+Lcp............. 0 1 1 7 +7026 PF07198 DUF1410 Protein of unknown function (DUF1410) Vella Briffa B, Bateman A anon Pfam-B_13132 (release 10.0) Domain This family represents a conserved domain approximately 100 residues long, multiple copies of which are found within hypothetical Ureaplasma parvum proteins of unknown function, as well as related species. 21.00 15.50 21.10 15.50 20.90 15.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.27 0.72 -3.66 67 980 2009-01-15 18:05:59 2003-09-09 17:11:28 6 16 37 0 55 835 4 68.20 20 15.61 CHANGED spphtstplpl........phpDpsppphpl.....................phphsppppslt.....hssLpss.....p..p..Yplsclshsspphphs ...................................................hhNpplph........hhpDps.sppapl...............................pspl.spssphhh.....h..hss...Lsss.....p.....pYplsclhhsspp................................... 0 51 55 55 +7027 PF07199 DUF1411 Protein of unknown function (DUF1411) Vella Briffa B anon Pfam-B_16764 (release 10.0) Family This family represents a conserved region approximately 150 residues long that is sometimes repeated within some Babesia bovis proteins of unknown function. 20.70 20.70 20.90 21.20 19.80 19.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.10 0.71 -4.86 6 33 2009-01-15 18:05:59 2003-09-09 17:23:38 6 3 15 0 12 26 0 151.80 31 67.59 CHANGED MSGHLVCKSGFGLGKVAKLMLASVVVLSAFSsNVWA.....scAEsspcPh.tph-GGhhot...............h--hpKL..pshsDlspT-NTEKTsLuDlSVKLGPhc+sM..hlKcVDVDMphLN+RIollLp+lTsaGP-SPsFGlSENIVKsLNK+GoIElPKcLAQpLCphcoGKhppYEWppFpDtFuoalA-hlss .......................................h..............................................sp.s..tphpshh.oh...............h.pss...--hpKh..p.h..lpp.sc.T...EKo.l.h.lh.lp.ts.hcpshphplp.sssVp.h.hspccLoolLt.tlsphsPto.hhulScNllchLsKpp.p...-h.pclAptLsplcothhth.E..................h................... 0 12 12 12 +7028 PF07200 Mod_r Modifier of rudimentary (Mod(r)) protein Vella Briffa B, Wood V, Mistry J anon Pfam-B_16631 (release 10.0) Domain This family represents a conserved region approximately 150 residues long within a number of eukaryotic proteins that show homology with Drosophila melanogaster Modifier of rudimentary (Mod(r)) proteins. The N-terminal half of Mod(r) proteins is acidic, whereas the C-terminal half is basic [1], and both of these regions are represented in this family. Members of this family include the Vps37 subunit of the endosomal sorting complex ESCRT-I, a complex involved in recruiting transport machinery for protein sorting at the multivesicular body (MVB). The yeast ESCRT-I complex consists of three proteins (Vps23, Vps28 and Vps37). The mammalian homologue of Vps37 interacts with Tsg101 (Pfam: PF05743) through its mod(r) domain and its function is essential for lysosomal sorting of EGF receptors [2]. 27.00 27.00 27.00 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.97 0.71 -4.29 17 469 2009-01-15 18:05:59 2003-09-10 11:27:18 8 9 245 5 303 465 1 139.30 27 52.13 CHANGED lpphShs-LpcLlpDc-thpc.hhtphsp..lpslppp+-phhspscpLAccNLthpspLcph+splp.pchppLpplppphpphhpchcch.upphSspslhphLQhsspcs-E-oEph..sccFLcGchsl-sFlppahphRphhHhR+hp.EKL ..................................pthohppLpclhp.sp...ph...lpp.hh...p....hsp....lpp.h....ptp..p-thls.sNcpL..A...............c..pNL..th.p..spLpp.hR...splp.pphpplpplppphpppp.p.ch..cc........h..pshosps.Lhth..LpststctEc.....-o-sh......s-pF..L.-...........G........c........h....s........l..p.sF.....lppa.phRphhHhR+.p.-+...................................... 0 82 131 214 +7029 PF07201 HrpJ HrpJ-like domain Vella Briffa B, Moxon SJ, Bashton M, Bateman A anon Pfam-B_16649 & Pfam-B_11026(release 10.0) & Pfam-B_1285(release 5.4) Domain This family represents a conserved region approximately 200 residues long within a number of bacterial hypersensitivity response secretion protein HrpJ and similar proteins. HrpJ forms part of a type III secretion system through which, in phytopathogenic bacterial species, virulence factors are thought to be delivered to plant cells [1]. This family also includes the InvE invasion protein from Salmonella. This protein is involved in host parasite interactions and mutations in the InvE gene render Salmonella typhimurium non-invasive [2]. InvE S. typhimurium mutants fail to elicit a rapid Ca2+ increase in cultured cells, an important event in the infection procedure and internalisation of S. typhimurium into epithelial cells [2]. This family includes bacterial SepL and SsaL proteins. SepL plays an essential role in the infection process of enterohemorrhagic Escherichia coli and is thought to be responsible for the secretion of EspA, EspD, and EspB [3]. SsaL of Salmonella typhimurium is thought to be a component of the type III secretion system [4]. 22.30 22.30 25.40 23.00 22.20 22.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.85 0.71 -4.23 36 767 2009-01-15 18:05:59 2003-09-10 11:43:52 6 4 513 10 60 405 7 160.10 23 46.32 CHANGED s...........hhpphEctutsLspRch...........tptppppphpphhchl-ch.-tpt-p...phcpLpptLtt.....hssssplhshlpthhsDsucthlsL....pthlpp....ps.spthtptlppslppLpp..cputplpuGlNs...Ahtuttaut.tsstphptLRsLYppsl...tsp.tshsshhpslhppa .......................................................tt.h.psh-chuhuLsphtt....................+tp.tcp.pphtpthp+llcpptps...........chcplhthh................tss.tph.s.hpthhs-suptllsL....pthLpp......cslpppl+cplpphLpcltt...pcshpl..Ghp......AlhutLhut...s......stshptLRphYcphl....psp.tshsphapclhsp.................. 0 16 26 41 +7030 PF07202 Tcp10_C T-complex protein 10 C-terminus Vella Briffa B anon Pfam-B_13039 (release 10.0) Family This family represents the C-terminus (approximately 180 residues) of eukaryotic T-complex protein 10. The T-complex is involved in spermatogenesis in mice [1]. 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.42 0.71 -4.73 8 224 2009-01-15 18:05:59 2003-09-10 11:56:30 8 9 126 0 134 206 9 150.70 39 21.12 CHANGED KlEplhssGpRlIhFPNGT+KEISADGcoVpVpFFNGDIKcs.hs-tpVlYYYu-sQTTHTTYPDGLEVLpFPNsQpEK+asDGoKEIpFPDsTlKhLpscGp...EEslaPDGThlplp+sG-KhIpFsNGQ+EIHTspaKRREYPDGTVKTVYssGpQET+YosGRlRlKDc-GplIhDoc ..............................................................Ehh.htsGpp.lhasNGshKcl...t..s......Dt...ts.......shlpF.hNGDlKph..hs-...t.p.l..lYaYA..s.sp..ThHTTa..P.............-.Gl.EllpF.ss.sQ...........hE+..+aP.DG..p.....K.EIhFPDsol...K.l.scGp...E.EohaPDGTh.hphp.h..sGs.Khl..h.sN...G.p.c...El.+T.tt.hK.+............+ca.PDGThKhlas.s.GppEoph.sGph+hc...G............................................ 0 63 74 99 +7031 PF07203 DUF1412 Protein of unknown function (DUF1412) Moxon SJ anon Pfam-B_16907 (release 10.0) Family This family consists of several Caenorhabditis elegans proteins of around 70-75 residues in length. The function of this family is unknown. 25.00 25.00 25.80 34.00 20.80 16.00 hmmbuild --amino -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.70 0.72 -4.05 7 36 2009-01-15 18:05:59 2003-09-10 12:39:10 6 1 5 0 36 28 0 51.90 69 68.96 CHANGED GII.R-RR..psYassNGVVNNhVoDshhGGPTSLGWAQVPHlhSPMFSPVFG+ ................GlI.RcRR..psYWhoNG.VV.NNhVSDNlsGGPTSLGWAQVPHhaSPMFSPVFGK.. 0 9 17 36 +7032 PF07204 Orthoreo_P10 Orthoreovirus membrane fusion protein p10 Moxon SJ anon Pfam-B_16940 (release 10.0) Family This family consists of several Orthoreovirus membrane fusion protein p10 sequences. p10 is thought to be a multifunctional protein that plays a key role in virus-host interaction [1]. 21.20 21.20 21.30 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.43 0.72 -4.25 3 41 2009-01-15 18:05:59 2003-09-10 12:45:47 6 3 23 0 3 35 1 88.50 50 66.91 CHANGED MhpMsSGSCNGATSVFGNVHCQAAQNTAGGDLQATSSlIAYWPYLAtGGGlIlIIII.luLlYCC+AKVKsDAsRSVF+RELlALoSGKsNAsPPuYD.V ..........................sGsCsGhsulFGsVHCQuupNoAGGDLQATSslhs.YWPYLAuG..G.GhlLIlIl.luLlYCC+uKhKscusRssa+RELVALopuphpshssshp..................... 0 1 1 3 +7033 PF07205 DUF1413 Domain of unknown function (DUF1413) Moxon SJ, Bateman A anon Pfam-B_16942 (release 10.0) Domain This family consists of several hypothetical bacterial proteins which seem to be specific to firmicute species. Members of this family are typically around 100 residues in length. The function of this family is unknown. 25.00 25.00 25.10 26.30 23.70 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.09 0.72 -4.44 28 386 2009-11-04 10:27:19 2003-09-10 12:53:35 6 2 261 0 18 117 1 70.20 40 75.51 CHANGED hccthptlcplsssssFph+..DLasc.......pWsphspsp+p.plG+hFhphVppsstht..h.hh.s+cssspphYpK ........h+ctlhtlps.sctosFsF+..DLFS+.......pWlshShA-RQ.cst+tFta.VKphscV...h.hs.ut.csu..thplYp............ 0 8 13 18 +7034 PF07206 Baculo_LEF-10 Baculovirus late expression factor 10 (LEF-10) Moxon SJ anon Pfam-B_16893 (release 10.0) Family This family consists of several Baculovirus specific late expression factor 10 (LEF-10) sequences. LEF-10 is thought to be a late expressed structural protein although its exact function is unknown [1]. 19.70 19.70 28.70 62.20 19.60 19.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.35 0.72 -4.33 21 48 2009-01-15 18:05:59 2003-09-10 13:04:46 6 1 47 0 0 45 0 71.10 41 93.58 CHANGED opsh.ts..DllssIL+cNLpLlDNsYlILNVlDpcss..p.......lcshClGEIsuhQs.cpssp.cuhSsoSsoSELpS .....s..s..ts.sDllssILKcNLpLlsNsYIILNVlDpcss...p.........l+shClGEIsuhQT.csssp.-shSsSSsoSELpS... 0 0 0 0 +7035 PF07207 Lir1 Light regulated protein Lir1 Moxon SJ anon Pfam-B_16937 (release 10.0) Family This family consists of several plant specific light regulated Lir1 proteins.\ Lir1 mRNA accumulates in the light, reaching maximum and minimum steady-state levels at the end of the light and dark period, respectively. Plants germinated in the dark have very low levels of lir1 mRNA, whereas plants germinated in continuous light express lir1 at an intermediate but constant level. It is thought that lir1 expression is controlled by light and a circadian clock. The exact function of this family is unclear [1]. 25.00 25.00 39.60 39.50 20.40 19.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.12 0.71 -3.96 5 52 2009-01-15 18:05:59 2003-09-10 13:11:41 6 1 31 0 16 46 0 122.90 41 93.23 CHANGED MQsAuShslolsus.soosoupShshhhPpphps.pAsRpsphRl+Auuuuss.DsuTVDYsSsh.SVFPAEACDTlGGEACsu-MYPEVKLcs-ussss..sAsoEsVDREYLEY.s-PKTVFPAEACDDLGGEFC-P- .........................................................s..p.....h.h............t.pthphts...u.s.sss-suTVDYs.Ssh.SVFPAEAC-slGGEACp.u.-MYPEsKLpspu.tsss......psssEsl-REYLpY.s-PKTVF.sEACDDLGG-FC-s......... 0 1 9 13 +7036 PF07208 DUF1414 Protein of unknown function (DUF1414) Moxon SJ anon Pfam-B_16906 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 70 residues in length. Members of this family are often referred to as YejL. The function of this family is unknown. 25.00 25.00 48.30 47.80 20.30 19.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.81 0.72 -4.25 27 785 2009-01-15 18:05:59 2003-09-10 13:16:00 6 1 783 13 90 230 8 44.00 72 58.77 CHANGED HcAPsDLSLMlLGNhlTNllsppV.sssQRtslA-pFucALtpSl ..HKAPTDLSLMVLGNMVTNLINTSl.APAQRQAIApSFAcALQSSI. 0 8 28 61 +7037 PF07209 DUF1415 Protein of unknown function (DUF1415) Moxon SJ anon Pfam-B_16932 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 20.90 20.90 23.20 22.60 19.50 19.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.19 0.71 -4.53 56 495 2009-01-15 18:05:59 2003-09-10 13:18:37 7 3 477 0 142 398 77 170.90 45 88.00 CHANGED pptlls.pTcpWlccsVlGLNLCPFA+p.ht+spIRasVo-AsshcslLpsLhpELphLttssst-l-.TTLllhPps..LpDFhDYpDhlDhA-sLLtphsh-GlhQlASFHP-YpFsGsssDDspNaTNRSPYPhLHLlREsSl-+AltsaP-.sEsIPERNIphhccLGt-....tappL .........................p..tlltpTcpWLpcsVIGLNLCPFA+tshh.+....pp...........lRhhVS-Aps.-s...lLp..-LhpELptL.tt...ssspcl-.TTLllh.Pph....hpD.FhDYND.hl.-hA.-sll..pp..s..hEGl.....hQlAoFHP-YpFs...Go...-....sD.DhpNaTNRSPYPhLHLlRE-Sl-+Alps...a.P.D....s-sI.E+NIsplccLGt-thpp.h.................. 0 41 83 116 +7038 PF07210 DUF1416 Protein of unknown function (DUF1416) Moxon SJ anon Pfam-B_16939 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 100 residues in length. Members of this family appear to be Actinomycete specific. The function of this family is unknown. 22.10 22.10 22.10 22.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.84 0.72 -4.37 9 209 2012-10-02 19:08:27 2003-09-10 13:24:12 7 3 183 0 68 190 15 85.90 65 81.51 CHANGED sulDltKEsVIpGpVh.psGpPVssAYVRLLDusGEFTAEVsoSAoGpFRFFAAPGoWTVRALsssusu.-tpVsAcsuslppV-lsV .........AuVDlEKETVITG+Vl..suDGpsVGGAFVRLLDSosEF.TA.EVVASATGDFR....FFA.AP..GoWTLRALSsuGNG...Dus.VpPsGuGlHE.VDlpl........................ 0 23 53 63 +7040 PF07212 Hyaluronidase_1 Hyaluronidase; Hyaluronidase protein (HylP) Moxon SJ anon Pfam-B_16578 (release 10.0) Family This family consists of several phage associated hyaluronidase proteins (EC:3.2.1.35) which seem to be specific to Streptococcus pyogenes and Streptococcus pyogenes bacteriophages. The substrate of hyaluronidase is hyaluronic acid, a sugar polymer composed of alternating N-acetylglucosamine and glucuronic acid residues. Hyaluronic acid is found in the ground substance of human connective tissue and the vitreous of the eye and also is the sole component of the capsule of group A streptococci. The capsule has been shown to be an important virulence factor of this organism by virtue of its ability to resist phagocytosis. Production by S. pyogenes of both a hyaluronic acid capsule and hyaluronidase enzymatic activity capable of destroying the capsule is an interesting, yet-unexplained, phenomenon [1]. 25.00 25.00 37.30 56.60 19.60 19.60 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.75 0.70 -5.40 2 54 2009-01-15 18:05:59 2003-09-10 13:38:03 6 2 22 8 6 52 2 239.80 70 77.07 CHANGED NKPslsthApK.ETspKlpp....KADKpsVYhKAESK.ELDKKLsLpGGlhTGQL+hKPsuslthSSSTGGAlNIDhSpS+GAuhVhYoNpDToDGPLM.LRosK-TFsQSs.FVDY+GpTNAVNIsMRQPoTPNFSSALNITSuNEsGSAMQlRGsEcALGTLKITHENPSlcAsYDKNAAALSIDIVKK..osGtGTAAQGIYINSTSGTTGKLLRIRNhNcDKFYVpPDGGFauhtsS.lDGNLplKsPhuN-HAATKtYVDtcltcLKtLlstK .......................................NKPslsuhApKpETssKIscL.S.KADKssVYhKAESKhELDKKLsLpGGlhTGQLpFKPN.SsIc.SSSsGGAINIDMSKScGAuhVhYoNpDToDGPLM.LRosK-TFsQSA.FVDYpGpTNAVNIsMRQPoTPNFSSALNITSuNEsGSAMQlRGlEKALGTLKITHENPslcApYDcNAAALSIDIVKK...psGpGTAAQGIYIN.STSG.TT.GKLLRIRNh.s-.DKFYVpsDGGFauttsSplDGNLplKsPTuscHAATKsYVDpclpcLKtLl.cK............................. 0 1 5 6 +7041 PF07213 DAP10 DAP10 membrane protein Moxon SJ anon Pfam-B_16910 (release 10.0) Family This family consists of several mammalian DAP10 membrane proteins. In activated mouse natural killer (NK) cells, the NKG2D receptor associates with two intracellular adaptors, DAP10 and DAP12, which trigger phosphatidyl inositol 3 kinase (PI3K) and Syk family protein tyrosine kinases, respectively. It has been suggested that the DAP10-PI3K pathway is sufficient to initiate NKG2D-mediated killing of target cells [1]. 20.80 20.80 20.90 21.50 20.60 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.76 0.72 -4.23 3 40 2009-01-15 18:05:59 2003-09-10 13:48:29 6 2 25 0 16 39 0 71.20 48 94.25 CHANGED MlPPGHILFLLLLPVAAAQTTPGSCSGCGPLSLPLLAGLVAADAVsSLLIVGVVFVCARLRSRPAQEDGKVYINMPGRG ...............M...GtlLhL.L.L...LsV...uA..sQ..............sos..............G..SCSGCGsLSLPlLAGLVAADAVhoLLIVssVFhCAR...RppPsp.ccs+VYlNMPsR............... 0 2 3 6 +7042 PF07214 DUF1418 Protein of unknown function (DUF1418) Moxon SJ anon Pfam-B_16971 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 100 residues in length. Members of this family are often described as YbjC. In E. coli the ybjC gene is located downstream of nfsA (which encodes the major oxygen-insensitive nitroreductase). It is thought that nfsA and ybjC form an operon an its promoter is a class I SoxS-dependent promoter [1]. The function of this family is unknown. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.39 0.72 -4.36 7 515 2009-01-15 18:05:59 2003-09-10 13:56:04 7 1 507 0 26 140 0 94.00 67 98.88 CHANGED MRolGsLP+sVLILEhLGMlLLslAhLSlNpYLoLPushuoPpAullMIFlGlsLMlPAAlslhWRlAphhuP.L.....hs+PPp.sp....otc-KpsDusH .....MRuIGcL..PKuVLILEaIGMhLLAVAL.LS.ls-.L.SLPcPFupPp..............lt............ILMIFLGVLLMLPAAVlllhpVAKtLAPQL.....MsRPPphSc......S-REKcNDuNH................ 0 1 5 16 +7043 PF07215 DUF1419 Protein of unknown function (DUF1419) Moxon SJ anon Pfam-B_16972 (release 10.0) Family This family consists of several bacterial proteins of around 110 residues in length. Members of this family seem to be specific to Agrobacterium species and to Rhizobium loti. The function of this family is unknown. 21.60 21.60 21.70 22.30 21.50 21.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.60 0.72 -4.20 15 46 2009-01-15 18:05:59 2003-09-10 13:58:33 6 1 32 0 27 48 1 105.70 53 56.29 CHANGED Ms..sPlRKVFpGVAcRcQMFRhFDRHuQRPsRacuDsusLYsGEWFEIspspHDYMhEILPPLWMRu-MFAMREFLTGslTSVFFsL+IDGRhRaFHGYCDLSD+sSPEcM .................p..sshRKlapGVAsRpQMFchFDRHsQcPsphcsDsusLYsGEWFEIucspHDYMhEILPPLWhR....G....sMFAMREFLTGolTSVFFsLpI.DGphRaFHGYCDLuD+uSsEcM.............. 0 2 15 20 +7044 PF07216 LcrG LcrG protein Moxon SJ anon Pfam-B_16974 (release 10.0) Family This family consists of several bacterial LcrG proteins. Yersiniae are equipped with the Yop virulon, an apparatus that allows extracellular bacteria to deliver toxic Yop proteins inside the host cell cytosol in order to sabotage the communication networks of the host cell or even to cause cell death. LcrG is a component of the Yop virulon involved in the regulation of secretion of the Yops [1]. 21.70 21.70 23.00 24.80 21.20 19.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.10 0.72 -3.96 4 75 2009-01-15 18:05:59 2003-09-10 14:19:17 7 1 70 0 9 31 0 95.30 52 99.54 CHANGED MKps..s-aoc.....TlppAELAItDSDcRscLLpEMhtGLGluspAsplLFtu...ssE.hpsAEpELLsElpRpRppQPp.QshpG+RsRRPThMRGhlI .....................MKss..s-asc.....TlcpAELAItDS-cR.ucLLpEMhtuL.GLsPpAsplLFuG..tssE..h+sAEcELL-El+RpRppQPQ...ps..tcG+RPRRPTMMRGhlI 0 1 3 4 +7045 PF07217 Het-C Heterokaryon incompatibility protein Het-C Moxon SJ anon Pfam-B_16951 (release 10.0) Family In filamentous fungi, het loci (for heterokaryon incompatibility) are believed to regulate self/nonself-recognition during vegetative growth. As filamentous fungi grow, hyphal fusion occurs within an individual colony to form a network. Hyphal fusion can occur also between different individuals to form a heterokaryon, in which genetically distinct nuclei occupy a common cytoplasm. However, heterokaryotic cells are viable only if the individuals involved have identical alleles at all het loci [1]. 20.60 20.60 21.60 21.40 20.30 19.80 hmmbuild -o /dev/null HMM SEED 606 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.11 0.70 -6.32 20 311 2009-01-15 18:05:59 2003-09-10 14:28:57 6 5 127 0 157 385 0 398.00 44 72.79 CHANGED Mushh....ssLLlhhll.LllLPspstAFGAGNIASISpVEG+NWRHGDIEDhLtTlAhh+.......G+KaoshhVKRVYFGNWLRDYSQAlDVGTLK.uVsA-TIRILVWVLuFhoFGYATuEFEVTsERLGsYRPEEHIDNPKsYADspDARcYDsRLRGPVcp.tEL-IDPcTGMKNYIANE....sGGW.uTSAuYlRhohuRsIHaGRhYo.....cpGp-pDLpEALRhLGQuLHsLED.FuAHoNYsELALhEhGa+s......VFPHsGssTplpl.pG++..VYPLVTGTFGuVDFlHSVLGEAoDHhTQSElsEl-tALssAcssssss......................s.uslpuLLuKlP..........G.....sGDshsscA--LcppSpAQptpNpp..t............................................t.s...lsshpP-h.......ssh...cshtQIYPILEFRDclV+uIsshI...EKIPGLpsLlEpIoEsLTlFVhoLLAPFl+PIIsploppLpsGSSuVIsoStpcQaEsWssspCoDPTHSMLSKDHFSNlLNEPAG+VASsIlpYVVPRllhAW-ssslsscpVl--hLp.VFHHPAlRs.....psEh+RpMFcsVcpWhpspscctpc.LpchLSuEGV+sG+N+......psGspspuHupGpshshsts..s................uputsuuuhhpphppp ...........................................................................t................................................................................................a.......................................................................................................................................................................thcs..l.......................hthD..ouhKtYl.....t.........t.......h..TSsshl+h.ht+sIchGRhYs.....tpspctDLhEALRhLGpuLHsLE.D.FsAHSNYsELuLh-hGp+p.......VF.PHVGssTplpl......p.....G+p................VaP....LVTGTFGuVDFlHSVhGE..hsD+h...o.....QsElpphp.tl.tupt.tt.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 47 88 130 +7046 PF07218 RAP1 Rhoptry-associated protein 1 (RAP-1) Moxon SJ anon Pfam-B_16981 (release 10.0) Family This family consists of several rhoptry-associated protein 1 (RAP-1) sequences which appear to be specific to Plasmodium falciparum [1]. 20.50 20.50 251.30 106.30 20.40 20.40 hmmbuild -o /dev/null HMM SEED 782 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.51 0.70 -6.44 2 109 2009-01-15 18:05:59 2003-09-10 14:37:53 6 2 16 0 7 110 1 637.70 65 99.73 CHANGED M.hhluSLlllFasLapNVusGIslNG-pphspph.sp-FN.DDhN.Wh.lsctpFLNoapcphSs.SFlEsKuSh-sG.......SpsscuSppGpG+...............DShsDhpFhss.s.s.KsupPpuspspu.tSSSsupopuSu.SssKSuS.aGtSs.S-.SshcSuptSu.....VGucE.t...........pEhY......FshpashpphpcphsILKN-hshVtpcEth.hDEphcchcpc+Kcth.KhhtEuD.E..hhsEp.pFlcpphp-ppltGuFocFhSpLNPFKK-.h..KpElShhTa..Is.plssKE..h+sLGlutpYp.Y.pohLYsCPNss.hFDohEsLptcl.c.+c+Euhhsphh-ppKECLKNhGlhDhELsDspsKhGssIGShGEhHlRLYEhENDLhKapPslDYhTLADsYKL.KNcl.pLppVNFCLLNPKTLE-FLKKKEIh-LM.G-D.ItYcEpFspaMp.SIsCHlESLIY-Dl-uSQDhthVLK.sKSKLallpsGLoYKu+KLl.KlasEIQKNP-.laEKLTWIY-NhYhlKR.aThhAhcsVC.pYlpHs..phYTpLp.hhshIl-.sRhYuuCFKNlhlYNAlISGIHEphKphhKLhPRpshL.DhHFpulhcKE.K..KhhpTsalh..aDPoVtuYAhhpl-RhsMVolINsaFEAKKKtLohhlupMKhDhholpNE-.KIPNDKuANSKLss+LhphaKAEI+caFKEMR.pYshLIphRY+uHhKKNYhhaKRL- .............................SLlllFasLapNVusGIslNG-pphupph.sp-FNhDDhN.Wh.lsccpFLNoapcphSs.SFLEsKSSh-sG.......SssscuSppGpG+...............DS+sDhpFhAsss.s.KTSpPpuspsuu.+SSSsup...o+uSS.SN........s....KSuS.aGcSspS-.SshcSuppSu.....VGucE.s...........cEhYs.....FsYKashsphpcpIsILKNEhshVtpcEth.hDEphcchcpc+Kcth.Khht-oD..E..hhsEp..pFlcpphp-ppltGuFocFhSpLNPFKK-.h..KpElShhTa..Is.plspKE..h+sLGluhpYp.Y.pohLYsCPNss.hFDohEsLptcl.c.cc+Euhhsphh-p.pKECLKNhGlhDhELsDspsKhGssIGShGEaHlRLYEhENDLhKapPslDYlTLADsYKL.KNcl.pLppVNFCLLNPKTLE-FLKKKEIh-LM.G-D.ItYcEpFspaMp.SIsCHlES...LIY-Dl-u..........SQDht...h....V...L.K.sKSKLallpsGLoYKu+KLl.KlasEIQKNP-.laEKLTWIY-NhYhlKR.YThaAhcsVC.pYlpHs..phYopLp.hhshIl-.sRhYuuCFKNlhlYNAlISGIHEphKphhKLhPRpshL.DhHFpulhcKE.K..KhhpTsalh..aDPoVtuYAhhpL-RhsMVolINsaFEAKKKtLohhlupMKhDhhSLpNE-.KIPNDKuANSKLss+LhphaKAEI+caFKEMR.pYshLIphRY+uHhKKNYhhaKRL..... 0 1 2 5 +7047 PF07219 HemY_N HemY protein N-terminus Vella Briffa B anon Pfam-B_16745 (release 10.0) Family This family represents the N-terminus (approximately 150 residues) of bacterial HemY porphyrin biosynthesis proteins. This is a membrane protein involved in a late step of protoheme IX synthesis [1]. 28.20 28.20 28.80 30.00 27.00 26.60 hmmbuild --amino -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.08 0.72 -4.50 19 1471 2012-10-11 20:01:00 2003-09-10 15:05:05 8 28 1455 0 312 955 302 108.30 35 26.03 CHANGED GhVhIphsuhph-sSlhtslhhllhhhsslhl.......lhhllpplhpsstpsptahcpp+RcRuppulppGlhshupGchstAc+hht+u.uch..s-pssLthLhuApAAptpGc ...............GYVlIp.hss.a.pI..Es.Slssh.llhlllshlllah.......l.hll+plhpsss+spsa.assRKc+RAcpthppuLlcLuEGDa....ppuEKhhs+...s...Acp.............u-p...P.........slshL.lAAcAApppGD...................................... 0 64 156 231 +7048 PF07220 DUF1420 Protein of unknown function (DUF1420) Moxon SJ anon Pfam-B_17056 (release 10.0) Family This family consists of several hypothetical putative lipoproteins which seem to be found specifically in the bacterium Leptospira interrogans. Members of this family are typically around 670 resides in length and their function is unknown. 18.10 18.10 18.40 18.40 17.80 17.30 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.19 0.70 -6.16 2 13 2012-10-03 03:08:05 2003-09-10 15:58:07 6 1 9 0 3 11 68 573.50 57 99.04 CHANGED MKFGLDANlsYPPLSVlYSIhLIhGCDFLGFYILKLFEspLGclKNTWhRWQAPLhGALLLSV.LYPLAL.uhTsRhhMcSsAhhLslLGhhNlshFlKphppNhs.lsh.p.Nl.+Slpap..h-h....cp.K+sF.LFK.hh.hhsKFsLIshNchLNlFIhLLhluYGhLALCPlTNADSLDYHIGVAIEILNQGKMPsF.GWFHGRLAGSGEVLNALGLAIGAEQFGSLLQFsGLLuIYGILuFYSFhEK..tsDG.VWRcIIIIAFLSSPVLVFLVSSsKPQLLQlGMTSFAlsLLLEIhSKhKTDKNKL.hF.LICILIMSsTQAKFSFFLSAFLIGLhSlh.LGSIRLahYGlLIulFFhlLI.FPulFWKIKNasSohIDsllhPLPG.saPGVppFEssLRNYpDSsL.FPLSLlhPNpFGVlTTlIG.GLFLlIFVKPIss.psFlLShhIIlFVILG.LhGQpASRFFLEPFlWhLISLIslsshth.Nl+Fs+psluhhllLQAshThsIl.lGIYQLhPGVFSIp.REKVMsQYuNGYsLMKWsG.sLPK-AVLLSQHRSlALSERKTLSLDWh.FVsFsShlASPYL+pIKDENVTHILhhG-hSKsoPF.GCIGNsIGKTKSp.shRNPhs.pDaFTsILsEFp.spLsQCuN.lL ..........................MKFGLDANlsYPPLSVlYSIhLIhGCDFLGFYILKLFEspLGclKNTWhRWQAPLlGALLLSVlLYPLAL.uhTsRhhM+SsAhhLslLGhANIshFlKphppNhsSlsYhp..Nlh+SlpapS.h-h....cp.K+sF.LFK.hh.hhsKFsLIspNchLNlFIhLLhluYGhLALCPITNADSLDYHIGVAIEILNQGKMPsF.GWFHGRLAGSGEVLNALGLAIGAEQFGSLLQFsGLLuIYGILuFYSFAEKF.psDG.VWRcIIIIAFLSSPVLVFLVSSPKPQLLQlGMTSFAlsLLLEIFSKhKTDKNKLhhFS.LICILIMSsTQAKFSFFLSAFLIGLhSlhpLGSIRLFFYGlLIulFFFlLI.FPulFWKIKNasSoaIDsllpPLPG.saPGVppFEssLRNYpDSsL.FPLSLlaPNpFGVlTTlIG.GLFLlIFVK.PIVsp+sFlLSlhIIlFVILG.LhGQKASRFFLEPFlWhLISLIsLsShup.Nl+Fs+psluhhllLQAshThsII.lGIYQLFPGVFSIplREKVMSQYANGYoLMKWsGhsLPK-A.VLLSQHRSIALSERKTLSLDWh..FVDFsStlASPYLKpIKDENVTHILhaG-TSKsoPFpGCIGNsIG..KTKSppVsRNPFN+pDaFTsILsEFp.DpLPQCuN.lL................ 2 1 2 2 +7049 PF07221 GlcNAc_2-epim N-acylglucosamine 2-epimerase (GlcNAc 2-epimerase) Vella Briffa B anon Pfam-B_17012 (release 10.0) Family This family contains a number of eukaryotic and bacterial N-acylglucosamine 2-epimerase (GlcNAc 2-epimerase) enzymes (EC:5.3.1.8) approximately 500 residues long. This converts N-acyl-D-glucosamine to N-acyl-D-mannosamine. 19.90 9.10 19.90 9.10 19.80 9.00 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.40 0.70 -5.43 20 1825 2012-10-03 02:33:51 2003-09-10 16:14:30 6 21 1288 23 552 1700 239 297.20 21 69.11 CHANGED Gh.Fp...p.LctcGpshsssc..tchhspsRplasFAhAtt.h.G.hsG.hh.s.h...ssHGlcaLppshR..DsppG...Gaahsl..s.ssu.s.hDssccsYsaA...FllhAhus.A...htuGts...cA..cshh...s....cshsll......-p+hhctctsh.ht-phstsashh...........G....pNsp..MHhhEAh.LAhh-sos..-tp..aL........spAtplschhhpchhs.....tsshhlhEaFchpWss..shst-...t.hcshhhpPGHthEWuhLl....lp....l.t......ttptsschl.spAcpLattulspuhshctu.GlsYh.hshpupsss....ps+h..hWspsEsltAA...stLtpt.oG-..pc...YhphhpphhshhtpahhD..t.hGtWacplstcsp..lspsl.uuposhYHhhsA ..................................................................................................................h.at.h.hs.t.p.G.p..h.t...t....thhhpsR..............lasau...huht.....h........s......p...st....hh...p..h...............scp.uh.c....hh.p.t.t.h.t.....Dt..phG..............G..a..a.htl..............s.stt...s......hc...t.......p.....+..p......hY.....sps..............F.....s...lh..uhup...s..........h.t.s.G.p.s....ps..p..ph..h...p...pshphl...................pchh.hs...t.t...t....s.h...hh.pp.h.s.t.shp..............................ps........................tN.s.t.....M.Hhh..E.uh...l.....h....h.h.c....s..pt.............-tt.......ah............................pph...pl....hph...hh....p.phhp...........ttp.h...h.l..Ehh....t..........ps...thp.............................s..h............hp..P........G.H.h...E...huhLh..........hp....ht.....................t.tt.....thh....tt....A.......t.l.......h....t...h...s..h...p...t...u..h..........s....p........t...u...Gh....h....hh....h..s.h...p...s.p..shs...................pp+h....hWs.s....Esh..hsh.............h.h..h...h....ph........o...uc........tp.............h..ph.hpph...hp.a.hh.p.ah..h...........D....p............G.t.W.a.t...l...stpsp...st.t...t.........s............................................................................................................................... 0 172 349 448 +7050 PF07222 PBP_sp32 Proacrosin binding protein sp32 Moxon SJ anon Pfam-B_17278 (release 10.0) Family This family consists of several mammalian specific proacrosin binding protein sp32 sequences. sp32 is a sperm specific protein which is known to bind with with 55- and 53-kDa proacrosins and the 49-kDa acrosin intermediate. The exact function of sp32 is unclear, it is thought however that the binding of sp32 to proacrosin may be involved in packaging the acrosin zymogen into the acrosomal matrix [1]. 20.70 20.70 98.50 21.10 18.80 19.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.79 0.70 -5.19 3 56 2009-01-15 18:05:59 2003-09-10 16:20:49 7 3 28 0 25 44 0 194.90 54 48.61 CHANGED Mh+sAAGFLLMLLcVLLLPLoPsoAE-STPASTPGSPLSsTEYERFFALLTPTWKAETTCRLRATHGCRNPTLVQLDQYENHGLVPDGAVCSNLPYASWFESFCQFA+YRCSNHVYYAKRVhCSQPVSILSPNTLKElEuSAEVsPTTMToPIusHsTlTE+QoFQPWPERLsNNVEELLQSSLSLGGp-Qssu...t+PKQEQhlp++QE.lQEHKpEEuQcQEEQEEEE..cEEEAKQEEGQGT-- .........................................p.Asu.l..LLcVLLLshs.PssAp-o...u.sTPGSPLSsoEYERFFuLLTPTWKAETTCRLRATHGCRNPTLVQLDQYENHGLVPDGAVCSsLPYASWFESFCQFsQYRCSNHlYYAKR..VhCSQ..PlSlLSPsoLKE....l-sssEVs.soo.hT.sP.hss+hpsTE.pQs.psWs-RLpsNlc...cLLpuuhSLsGpcp..t..........tpc.....+tQt.http..pttp..p-Qtc............................................................. 0 1 1 11 +7051 PF07223 DUF1421 Protein of unknown function (DUF1421) Vella Briffa B anon Pfam-B_17006 (release 10.0) Family This family represents a conserved region approximately 350 residues long within a number of plant proteins of unknown function. 29.00 29.00 29.30 29.30 26.90 28.40 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.77 0.70 -4.67 9 104 2009-12-15 14:25:35 2003-09-10 16:29:41 6 4 24 0 56 102 0 285.30 31 67.71 CHANGED L+EVQsuVQlL+DKQEIsETQhpLuKLQls+tp..ppscspu.ps-sptpsssP..ht.p..t.hP..phshAhP......u.........lPss.us.Ps.p.psQ..........Psp.Qh..........................................................P.phPsp.lp.....s.P...........ttpPaasPP...u.QsptsPsQQ.....hp..PP.........................Q.tPsPtsPppppY....QsPPQhsQhpQ..............sPsPQ...hP.sssh.Pc....PY....tuhPPs...Psshs.p.P.usssP...hasss...sthYsssuuhPso.s..pt..PPP..t...............YshSGsPsp.u........usssatPptsup...utusuY...PphssAp....LPQuhshuSuP.shtt.....SspShpptPlsDll-KVsoMGFsRDQVcuslp+lTEsGQsVDhNslL ............lpEVptuVQlLpDKQElsEsphpLuKLQhsptp...pptp..ts.......h.p..ss.s..............h.........s.h.s...........s..............ss.........sp...................................t.p...........................................................s.p.s........................................hhs.s....t..p.......ssQQ......p...s..............t..t.s.P....s..pph...........Q..s..sQ.......Q......................pss...p...h...ss..............P.......Pa......s.sss......s.s.........s.s...s.....h..t.s.........h.ts.....sh.................sst....t................s........................a..su....s......s....s.......................hts.t.st.....shtss.Y....sp...ssAp....hPpt.....ssss.................ustshpphshs-hl-KlssMGas+-pV.puslp+hsEsGQslDhNslL.......... 0 8 34 46 +7052 PF07224 Chlorophyllase Chlorophyllase Moxon SJ anon Pfam-B_17130 (release 10.0) Family This family consists of several plant specific Chlorophyllase proteins (EC:3.1.1.14). Chlorophyllase (Chlase) is the first enzyme involved in chlorophyll (Chl) degradation and catalyses the hydrolysis of ester bond to yield chlorophyllide and phytol [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.96 0.70 -5.63 4 48 2012-10-03 11:45:05 2003-09-10 16:40:43 6 3 28 0 29 968 169 233.40 35 75.56 CHANGED sSssohssFpcGpapssplsl-............ssp.ssssPPKPLlIsoPsEtGsYPVlLFlHGhhLpNp.YSplhsHIASHGFIVVAPQLaplhP...PuspsElcsAApVhsWhs.sLpthLP...ssVpuNlphhAlsGHSRGGKTAFAlAL...Gau.....plpFSAlIGlDPVAGTuKsppTsPplLTYcPsSFsLs.IPVsVIGoGLG..thsshhssCAPsslNHcEFapECK.uspuHFVAsDYGHMDMLDDsh.uhhuh.huthhCKNGpc.+ssMRRhlGGIVVAFLpsahcsDsp-hhtIlts.ShuPhpL..usEhcsssshh .................................................................h......ssFp.G.ht..sp.hplt.............................ss...sP+......sL..h.l...h..sP...s..........t..G...s..a..PV.llFhH......G......h....h.....l.......h....N.....p......a.....Y...s.....p....l...ht....H....l...u.S.......H.G...aI.l..l...A........P.........Q........l....h........p....h....hs..................s...s..s................p.......-.....l.....c.........t........s...u..............p...........l......h...........s..........W......l.....s.....t......s..........L....t...t........h..L.P..........................t...V..p....s......s........l...p..+........l...slsGHS+GG.+.sA.F..A.lA...L.......shu....................plp.h...u...s..L.l.G.lD..P....V....sG.....hs.K.....t...p.h......P.l.......L.s....h...sp.SF..sh..s....hP........s.hV..I..GoG..L..G..........h....s.s.......CAP...tthNH..tpFa.cCp....st.a...Fl.stcYGHhDhLDD.....s.....h.u.....hhC.p..p.u.p..t...+p.MRchsuGlhVuFLps.h..sp....t.th..Ih....t........................th..................................................................................................................................................................... 0 4 20 26 +7053 PF07225 NDUF_B4 NDUFB4; NADH-ubiquinone oxidoreductase B15 subunit (NDUFB4) Moxon SJ anon Pfam-B_17132 (release 10.0) Family This family consists of several NADH-ubiquinone oxidoreductase B15 subunit proteins (EC:1.6.5.3). 20.70 20.70 20.90 21.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.49 0.71 -4.32 10 149 2009-01-15 18:05:59 2003-09-10 16:45:59 7 1 114 0 79 140 1 106.10 34 93.10 CHANGED cYKsuPLuoLPpTLDPsEY.slSPEpRRApsERLAlRApLKRcYLLQhNDP+....R+ulIED.......PALsRWsaARshNlYPsFRPTPKTSLLGllaGhGPLlFWYaVFKTDRD++-chIpcG+L....DRsFslSh ...................................a.thS.-tpch..pRhthRspL+ppYhhphssPp.....ct..ull..D.......PALhRa.aAps.h.shY.pFRPTP...Ksu.hlG.h.h.h.uls.Pl..hhhhhshKo-RDtpcp.hppGph...c........................................................... 0 20 32 55 +7054 PF07226 DUF1422 Protein of unknown function (DUF1422) Moxon SJ anon Pfam-B_17087 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 120 residues in length. The function of this family is unknown. 20.80 20.80 20.80 23.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.37 0.71 -4.36 14 660 2009-01-15 18:05:59 2003-09-10 16:54:00 6 2 654 0 56 195 3 115.90 70 97.14 CHANGED Mspps.....ppE+tTLlLALlAGLshNushusLhoS.VsFSlFPlIALVLulYsLaQcYLppshspshPhlusAsFhlGlhhYSAllRApYPplGSNFhPhllulsLlFWIGhKLthhpppt .........M..KQu.sQD+GTLLLALlAGLSINGTFA...ALFSSIV.PFSVFPlISLVLTVYCLHQRYLNRTMPVGLPGLAAACFILGVLLYSTVVRAEY.PDIGSNFFPAVLSVIhVFWIGtKhRNRKQ.-s................ 0 3 15 34 +7055 PF07227 DUF1423 Protein of unknown function (DUF1423) Vella Briffa B anon Pfam-B_17028 (release 10.0) Family This family represents a conserved region approximately 500 residues long within a number of Arabidopsis thaliana proteins of unknown function. 25.20 25.20 25.40 25.40 24.70 25.10 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.80 0.70 -5.82 10 189 2009-01-15 18:05:59 2003-09-10 16:56:15 6 7 34 0 96 177 0 301.30 29 51.54 CHANGED AREpV-llA-+M+chP-EaLEcLKpsLRuILEG..sup++-EhhhLQ+hVQoRSDLTscTLs+AHRVQLEILVAI+TGIQsFLHsslolSQssLlEIFLYKRCRNIACpStLPAD-CcCcICoN+cGFCsLCMClICsKFDFuVNTCRWIGCDlCSHWTHTDCAIR-u.IssGsSsK.uusGsuEMhF+CpACs+TSELhGWVKDVFQpCAPsWctEoLl+ELDaVs+IF+uSEDsRG+pLFhKs-ELl-KLKuplA-s.sAs+hILtFFQEl-.Duu+ShEss.-suRhI..APQ-As..N+IApVVpEulcpMphVu-EKhRhhKKARhul-sC-+ELc-KA+clupLchERp+KK.QlDELEoIVRLKQAEA-MFQLKAsEAR+EAERLpRIslAKo-KsEEEYASpYLKh+LsEAEAEKpaLaEKIKhQpcSp+s.pu.....GcssphhMhSKI+-LL .........................................................h......................................................................................................C+Nh..sCps..l.sspth.Cp.lCsppsuFCptChCslC.p+.F.D.sh.s.....ssp.WlsC........D........h......CuHhsHs-CAl+cthht.G.shp....ushu......h-h.FpChuCs+so-l...h.G.aVKclhphs.t.shph-sLh+cLphsp+lhpuScc.+...u+pLh.hscphht+Lc.st............................................................................................................................................................................................................................................................................................................................................................................................................................ 0 14 63 81 +7056 PF07228 SpoIIE Stage II sporulation protein E (SpoIIE) Vella Briffa B anon Pfam-B_17063 (release 10.0) Family This family contains a number of bacterial stage II sporulation E proteins (EC:3.1.3.16). These are required for formation of a normal polar septum during sporulation. The N-terminal region is hydrophobic and is expected to contain up to 12 membrane-spanning segments [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.18 0.71 -4.60 173 6344 2012-10-03 01:39:20 2003-09-11 09:29:08 7 400 2137 23 2480 8434 1928 192.30 20 35.38 CHANGED sssphslsluDVhG....+GlsA..Ahhhshlpssl+shs.tpshs..............................Pspllpclsphl.tphpss........................hasTshhsh........h-.tstplphssAGHss.sllhps.ts........sshhhh.tssGhs..lGl.ss..................hshpstphpLtsGshLllaTDGlh..........Est.....shssth....phl..phhtt............spphsctlhpplhp.....................hs..phpDDhsllll+hp .................................................................................................................................................................t..sphtlsluD.sh....G.........+G...l...s..A....A..h.h.h.....s..h...l.....p........s....h.....h.....c...s..h....s...p..p..s..hs...............................................................................ssp.ll...p.p....l.s.......ph.l.......t.p.hpps..................................................................hh..s.T...h..hhsh......................h-........p....s........t.......p........l....p..h.....s..s.AG.H..ss..s.h.l..h.cs...t............................ps.h.hh.h....t.s....s..u...hs...........l.G..l.....ss.....................................................hph..p..p..t...p....h...p....l...p..s...G....-.......h.l..llhTDGlh.........................................Esh.................s.ptp.h....h..t...p..phh..................phltt...............t.s......spp..h..hpt.l.h.ptltp.........................................htt.t...t.hpDDhsllhhch.t..................................................................................................................... 0 977 1887 2293 +7057 PF07229 VirE2 VirE2 Moxon SJ anon Pfam-B_17380 (release 10.0) Family This family consists of several VirE2 proteins which seem to be specific to Agrobacterium tumefaciens and Rhizobium etli. VirE2 is known to interact, via its C terminus, with VirD4. Agrobacterium tumefaciens transfers oncogenic DNA and effector proteins to plant cells during the course of infection. Substrate translocation across the bacterial cell envelope is mediated by a type IV secretion (TFS) system composed of the VirB proteins, as well as VirD4, a member of a large family of inner membrane proteins implicated in the coupling of DNA transfer intermediates to the secretion machine. VirE2 is therefore thought to be a protein substrate of a type IV secretion system which is recruited to a member of the coupling protein superfamily [1]. 25.00 25.00 35.30 34.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.89 0.70 -6.11 4 10 2009-01-15 18:05:59 2003-09-11 09:47:11 7 1 7 1 4 10 0 533.60 58 97.82 CHANGED MDPpupssutN.s.ph.tshtsts.ss.Ktp++cslsSoohs-IpMTsupEThE....GoPs+T-sluspLDt-.lDSpSStsSuSscGNQuElppELSuLFupMs.LPGpDRRsDEYILVRQTGpDtFsGTsKGsL-HLPTKAEFNAuCRLYRDGAGNYYPPPLAF-RIsVPspLpspht.hEs+-psK.pFQYKL-VWN+AHAEMGITGTcIFYQTDKNIKLD+NYKLRPEDRYlQT.+YGRRElQKRYpHpFQAGSLLPDIhIKTPcNDlHFVYR.AGDpYANKpFpEFE+hIKp+YGS-TEIKLKSKSGIMHDSKYLESWERGSADIRFAEFAGENRAHN+phPtATVNMGpQPDGQGGhTRDRaVSV-aLhQshPNSPWuQALK+GELWDRVQlLARDGNRYhoPpRLEYSDPEHFsplMsRVGLPtSMGRQSaAsSlKFEpFssQAAVIVAsGspLRDI+DLSsEcL.QQloppcVLlADRNEpGQRTGTYTSlsEYcRLhh+LPsDAApLLuEPsD+YSRDFVRP-Psh.PIsDSR..RsYEsRsRuQoVNuL ...............................................................................................MDPpu.ssucNls.stttshpsss.scsKRpK+cslsSoThsDIpMTsupEThE...pGSPT+TEslus+LDts.lDSpSStsSuSsHGNQuElppELSuLFuNMu.LPGpDRRsDEYILVRQTGQDtFsGTsKGNL-HLPTKAEFNAuCRLYRDGAGNYYPPPLAF-+IsVPtQLpspht.hEsKEpsK.RFQYKL-VWN+AHAEMGITGTcIFYQTDKNIKLDRNYKLRPEDRYlQTEKYGRREIQKRYcHpFQAGSLLPDILIKTPcNDIHFVYRFAGDsYANKpFpEFE+sIKp+YGS-TEIKLKSKSGIMHDSKYLESWE.RGSADIRFAEFAGENRAHN+QFPsATVNMGpQPDGQGGhTRDRHVSVDaLhQsAPNSPWuQALK+GELWD....RVQlLARDGNRYhSPsRLEYSDPEHFTpLMNRVGLPsSMGRQSHAsSlKFEpFDuQAAVIVAsGPsLRDI+DLSsEKL.QQLop....KDVLlADRNEKGQRTGTYTSVAEYERLhh+LPsDAApLLuEPoDKYSRDFVRPEPAh.PISDSR..RsYESRPRuQoVNSL........................................................................ 0 1 3 3 +7058 PF07230 Peptidase_S80 Phage_T4_Gp20; Bacteriophage T4-like capsid assembly protein (Gp20) Moxon SJ anon Pfam-B_17388 (release 10.0) Family This family consists of several bacteriophage T4-like capsid assembly (or portal) proteins. The exact mechanism by which the double-stranded (ds) DNA bacteriophages incorporate the portal protein at a unique vertex of the icosahedral capsid is unknown. In phage T4, there is evidence that this vertex, constituted by 12 subunits of gp20, acts as an initiator for the assembly of the major capsid protein and the scaffolding proteins into a prolate icosahedron of precise dimensions. The regulation of portal protein gene expression is an important regulator of prohead assembly in bacteriophage T4 [1]. This family represents the protease responsible for the proteolysis of head proteins, a critical step in the morphogenesis of many tailed phages, Cleavage facilitates the conversion of the prohead to the mature capsid. All these cleavages are carried out by action at consensus S/A/G-X-E recognition sequences at 39 cleavage sites. Evidence of multiple processing sites in nine phiKZ proteins appears to represent a built-in mechanism by which the phage ensures that the majority of the propeptide regions are removed, and emphasizes the essential nature of processing in phiKZ-head morphogenesis [2]. The family is classified by MEROPS as a serine peptidase. 25.00 25.00 49.00 48.90 18.50 17.60 hmmbuild -o /dev/null HMM SEED 501 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.58 0.70 -6.18 11 1179 2009-01-15 18:05:59 2003-09-11 09:57:38 6 2 283 0 0 965 2471 203.10 65 98.50 CHANGED M........lcLFuhhhc.-p.phpcchpscstShssPcssDGAp-lps......uuasuhh.ph.Gs...thpspp-LIppYRplstpPEVDsAls-IVNEAIV.-.tsccsVpl-L.ssochSpslKc+IpEEFccll.clLsF-++upchFR+WYVDuRIaaHKlIDhcpPpcGItELRhlDPpplchlREhhpp..s..sGsplh+uhc.............EaalYssps.phs...hsuth.tuss.tl..KIs+DAIsYuHSGLhD..tscphlluYLH+AIKPsNQL+MlEDAhVIYRloRAPERRlFYIDVGNhPppKApQYlpsVMpphKN+lVYDusTGclKNpppthSMhEDYWL.RR-GtpsTEVoTLPGuQshG-h-DVcYFp+KLYcALplPhSRhss-s..uGhslGt.usEITRDEL+FoKFltpLpp+FpplFpD.LKTpLILKsIIT.-EW--.t-pIphsFppDsYFsElK-hEILppRlNhlspl-P..alGKYaSp-Ylh+cIL+hTD--Icp.ccQIEpE ..........................................................................................................................................................................................................................................................................................................................................................................................................................NLPKsKAEQYLRDVMsRYRNKLVYDAsTGE...lR.DD+KaMoMLEDFWLPRR.EGGRGTEIoTLPGGQNLGElsDl-YFpKKLY+uLNVP.SRl-s..-s...G..F.N..L.GR.S..oEIhRDElKFsKFVuRLRKRFSplFsDhLKTQLlLKsllT.sEDW-p.M.p.-.HIQYDFlhDNaFsELKEsElhsERLshlst......................................................... 0 0 0 0 +7059 PF07231 Hs1pro-1_N Nematode_res_N; Hs1pro-1 N-terminus Vella Briffa B anon Pfam-B_17124 (release 10.0) Family This family represents the N-terminus (approximately 180 residues) of plant Hs1pro-1, which is believed to confer resistance to nematodes [1]. 25.00 25.00 105.10 104.40 22.10 21.20 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.27 0.71 -4.69 5 36 2009-01-15 18:05:59 2003-09-11 10:30:34 7 2 19 0 22 37 0 176.50 48 40.19 CHANGED MVDLDWKpKMVsS..DhPsKSPKLShpc.....slSlPp.hplPslSo-ISsAAPulCSAYEhYLRLPELRpLWSSp-FPpWsNEPILKPALQALEITFRFISsVLSDsRPYINRREWNRRLESLsTcQIEIIAsLCEDEEp...hRGouPsAsLSuG......................s.o.sppaSEhSLLPRLATWpKSE-VAQRILhoVE ..........................................................................hss.sl.p..ssssshstAYE.YLRLPELppLWpupsFPsWssEsllKPALQALEITFRhlSsVLSDPRPYhsRREWsRRLESLAspQlEllAhLCEs--p.....ttpAPhAcLSuu......................s.shspphSEsSLLPRLAoWpKScslAp+IhauIE. 0 8 16 19 +7060 PF07232 DUF1424 Putative rep protein (DUF1424) Moxon SJ, Bateman A anon Pfam-B_17284 (release 10.0) Family This family consists of several archaeal proteins of around 320 residues in length. Members of this family seem to be found exclusively in Halobacterium and Haloferax species. The function of this family is unknown. This protein is probably a rep protein due to conservation of functional motifs. 20.20 20.20 23.00 22.10 17.70 16.90 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.03 0.70 -5.68 2 13 2012-10-02 18:54:06 2003-09-11 10:49:46 6 1 10 0 1 12 0 291.40 36 69.10 CHANGED MApps....G.+LRcElThDTSRuV+AsShupAl-pFpuWYsDQRsTQhVVEpt.hGEpVGFchPNRFTPEYREMLYAKAQSLERGLREcWGsLLHTuMVTLTASoT---Gt.RP.l-HhcDLLpSWpAVhcALt+sLEsREaEYLAIlEPpp......uGYsHIHLGVFV+GPVVAEQFpsVLDAHl+Ns.sAGc-AHp.Vh--stDEsAVplRRSu+PsRpsGlENLGAYLAAYMAGEYGsEs.tMPtpVRAFYAsMWAoGpQWFRPSNGAQchMQPEp.D-t-SlEEWEMVGIAPEGDLtD.IIEVDPppPRsDPYRRLRTPPPGG ............................................................t.+AsoWucAlspFpsaasDtRssphshEsp..G....-psuhphP..sRFsP-YpchhYAKupuLER....GL....R..cc.......aG....phL......HTuMVTLTASSssps..G+.tPPl-H.......hc-Llc..SWcA......VR+AL.tRV......L-......u+......EW-YhsIhEPHp................uGYsHlHlGVaVcGs.....V.....VAEpFpPVlDuHVcNC.sAGc-AHp.h..h--..sts-csVpV+Rsup.sts..sGVE.......NLGuYLAAYMAG.YssEshEhPhphpAahAshWAouRQhhc.SsuApEhMps-.............................................................t.............. 0 0 0 1 +7061 PF07233 DUF1425 Protein of unknown function (DUF1425) Moxon SJ anon Pfam-B_17314 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 125 residues in length. Several members of this family are described as putative lipoproteins and are often known as YcfL. The function of this family is unknown. 25.70 25.70 25.70 26.80 24.40 25.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.96 0.72 -4.42 37 877 2009-01-15 18:05:59 2003-09-11 10:56:45 7 1 872 2 101 375 20 92.40 42 74.46 CHANGED lhhssssLusplslsslpssps.sshhpussplpsphpsshplpYRFhWYDspGhplpsp.ssW+slhlpGppphplpulAssspAsca+lhl+p .....................l.Vh-solLuAGlosEpPslo.sS..DhpssAoupL.h...NcppcPls.VpYRFYWYDscGL....E..h....+...P.L........E.t...sR.o..lsIPAcssVoLhusAshhsA+csRLYLa.L. 0 11 35 69 +7062 PF07234 DUF1426 Protein of unknown function (DUF1426) Moxon SJ anon Pfam-B_17431 (release 10.0) Family This family consists of several Banana bunchy top virus proteins of around 120 residues in length. Swiss:Q9IGU4 is annotated a movement protein whereas most other family members are hypothetical. The function of this family is unknown. 25.00 25.00 189.00 188.90 20.20 19.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.56 0.71 -4.36 3 31 2009-01-15 18:05:59 2003-09-11 11:09:30 6 1 3 0 0 31 0 116.50 82 98.88 CHANGED MALTTERVKLFFEWFLFFGAIFIAITILYILLVLLFEVPKYIKpLVRYLVEYLTRRRVWMQRTQLTEATGDuElsRulVEDRRDQQPuVlPasP+VIPP.QsR...R-DQAhRusAGPMF MALTTERVKLFFEWFLFhuAIFIAITILYILLVLLFEVPKYIKpLVRYLVEYLTRRRVWMQRTQLoEATGDVElGRGIVEDRRDQQPAVIPpuSQV.PS.QsR...RDDQGRRGNsGPMF..... 0 0 0 0 +7063 PF07235 DUF1427 Protein of unknown function (DUF1427) Moxon SJ anon Pfam-B_17474 (release 10.0) Family This family consists of several bacterial proteins of around 100 residues in length. The function of this family is unknown. 21.10 21.10 21.10 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.17 0.72 -3.91 32 341 2009-01-15 18:05:59 2003-09-11 11:15:13 6 2 240 0 124 290 13 72.20 43 92.05 CHANGED KsYllSLuAGlLVGllYuLlsVRSPAPPlIALVGLLGhLsGEQllPls+phlsutthtst.httth..ph........ttttt.............ttppp.s .....h.YllSLusGlLVGllYullpV+SPAPPll.A.LlGLLGhLlGEQllPhs+phhtt....................................t........................... 0 19 46 82 +7064 PF07236 Phytoreo_S7 Phytoreovirus S7 protein Moxon SJ anon Pfam-B_17475 (release 10.0) Family This family consists of several Phytoreovirus S7 proteins which are thought to be viral core proteins [1]. 19.80 19.80 20.30 20.80 17.10 19.70 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.56 0.70 -6.18 4 20 2009-01-15 18:05:59 2003-09-11 11:20:55 6 2 11 1 0 21 3 470.80 42 87.70 CHANGED hsAIVCluLLoE+sVLoRsLsDpsKcFYpulpGphh.tscsl..Tc+huppSl.lR+lVPosTlILDaKsEsFI+pN.shSL.DIsGSPSNTAPKTsFpSIMPSLSsLFssPFlQGAFRHslISoh.GptTsLLILVVGPPSGF.cp.sVuSuuSsV-ssoNAcIDLsclVulNosMhppTpLVSAuulpAMulsDVhl+CsSLDplLlshAlKYF+sYVsshpsuuhspATpIhLNosFcELFslpsscpthlts-shtsspsphRGlVLPhGHGp..ssLpp+HP-lFI-.-ulFoc-E+ucLsch+hp..-sDsaEE...hFsphVpKalppGcYGN+VIluH+sspLsssGlpIlGhaplss.pslcpplp...ShKsclDhs+pNWctlpspshVsssTlppLHctIL.Dhhssospull.ss...uscspEtIslpFhNGFPpcKaullpLEKsGIpVVu..phsDss.lVlsNsPshVSRutKuptp......psRu+hDAtplslDTDclSpsphIphl+oh .........MoAIVsVsLLSEpsVLsRsLsDhVKshYptlpGssssst-sI..pt+hospol.lp+hlPhsTVhl-ahDpcaIptNPshShM-IlGSsuNTAPKTTFQSlhPSLSALFGVsFIQGAFhHpVISop.GsphShLlLVlGPPusFh+psSVuuuSSlVpV-SsucIDLsDsVtINuhMlpsTKLVSASulpAhuls-V..KCsSLDshlIhpAlpYF+pYs.ttphGohDsATplhLNhPhcElFSsc..oupuc.Lpsc..sa+-.scupltGlVLPsGHGK..oTLsScYP-lFlplpt.hsc-cppth.....s...suDshEp...happhIp.pplscGcYus+V.lLucssccL.ssGl+llGsa+lss...spVccpls...shKschDphcpsWcshSG.shVoscslshLHDKlL.DllospsKulhhscp..........ph+scppIslhFKNGFP.t+aShspLpKpGIsVVp..-Ls-ss.lllsNtPppssRps.tpsK......sphpphcus.+VslDTDslop-phIsKlKs.L........................................ 2 0 0 0 +7065 PF07237 DUF1428 Protein of unknown function (DUF1428) Moxon SJ anon Pfam-B_17402 (release 10.0) Family This family consists of several hypothetical bacterial and one archaeal sequence of around 120 residues in length. The function of this family is unknown. 21.00 21.00 21.20 21.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.23 0.72 -4.12 72 547 2009-01-15 18:05:59 2003-09-11 11:24:00 6 3 516 2 124 281 36 102.90 66 86.44 CHANGED sYlDGFVlAVPsuN+-sYpchAppAutla+.EaGAlchVEsWGD................DVPpG.........clTsF.pAVpscssEsVVFSWlhWPsKtsRDsuhp+hM....sDPRhps..s..sMP.FDGK ...pYVDGFVVAVPA-KKDAYREMAuKA.APLFK.EFGAL..RlV..ECWAs................DVPDG.........KVT.DFRMAVKAEEsEpVVFSWIEYPSKEVRDAANpK....MM....u.DPRMKE..hG-..sMP..FDGK............. 0 37 76 97 +7066 PF07238 PilZ PilZ domain Bateman A, Galperin M anon Pfam-B_17421 (release 10.0) Domain PilZ is a c-di-GMP binding domain [3] which is found C terminal to Pfam:PF07317. Proteins which contain PilZ are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias [5]. This domain forms a beta barrel structure. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.29 0.72 -4.00 191 5848 2009-01-15 18:05:59 2003-09-11 11:30:13 9 113 1860 15 1946 4774 350 102.40 16 32.82 CHANGED ppRpt..........Rhphphss..................hhttsspth.........p.....sthhDlShuGh..tlph.....tp..thphsp...........l........plph.thsstth................................htupVhphptp..............thGl...pF.......ths.pptppltphlht ................................................................hRp........R.ht.ht.h.s......................................hhht.s.s..phh..............................p..........spl....hDlS.tuGh..tlth....sp..........thphGp..............tl............plpl....phsptsh.............................................h.........htu.pl..h...phptp........................tlGl..pF.......ths...t..t..l.th...h.............................................................. 0 680 1242 1622 +7067 PF07239 OpcA Outer membrane protein OpcA Moxon SJ anon Pfam-B_17433 (release 10.0) Family This family consists of several Neisseria species specific OpcA outer membrane proteins. Opc (formerly called 5C) is one of the major outer membrane proteins and has been shown to play an important role in meningococcal adhesion and invasion of both epithelial and endothelial cells [1]. 25.00 25.00 25.20 25.00 23.90 24.90 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.73 0.70 -5.46 2 56 2012-10-03 17:14:37 2003-09-11 11:44:44 6 1 36 2 2 42 0 202.30 53 89.45 CHANGED QELQTANEFTVHTDLSSISSTRAFLKEKHKAAKHISVRADIPFDANQGIRLEAGFGRoKKNIINLETDENKLGKTKNVKLPTGVPENRIDLYTGYTYTQTLSDSLNFRVGAGLGhpSSKDSIKTTKHTLHSSRQSWLAKVHADLLSQLGNGWYINPWSEVKFDLNSRYKLNTGVTNLKKDINQKTNGWGFGLGANIGKKLGESASIEAGPFYKQRTYKESGEFSVTTKSGDVSLTIPKTSIREY .....................................................sEaTV+TDlSp.o.ppA.LKEKHKstKpIuhRADhPFD.hppGhRhEsuauRsKKshhslpp.spp.hG........psV.EpRhDlYsGYTYTQsLS-uhphRsGhGLGaEp.KDu.ltspKtT.l.....popRpuahsKsHADLhSpLGsGWYlNPWuEVKhDLsu+hKhNssVsslppDINtK.TpGWGhGlGANIGKpLs-SsuIEAGPFYKpRsaKcSGEF.slst................................. 0 1 1 1 +7068 PF07240 Turandot Stress-inducible humoral factor Turandot Moxon SJ anon Pfam-B_17438 (release 10.0) Family This family consists of several Drosophila species specific Turandot proteins. The Turandot A (TotA) gene encodes a humoral factor, which is secreted from the fat body and accumulates in the body fluids. TotA is strongly induced upon bacterial challenge, as well as by other types of stress such as high temperature, mechanical pressure, dehydration, UV irradiation, and oxidative agents. It is also up-regulated during metamorphosis and at high age. Flies that over-express TotA show prolonged survival and retain normal activity at otherwise lethal temperatures. Although TotA is only induced by severe stress, it responds to a much wider range of stimuli than heat shock genes such as hsp70 or immune genes such as Cecropin A1 [1]. 20.70 20.70 24.50 52.70 20.60 17.90 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.88 0.72 -4.49 9 49 2009-01-15 18:05:59 2003-09-11 11:50:11 6 1 7 0 12 57 0 82.40 32 60.05 CHANGED susDcsoKh+sh.pLlshYc+assph.LsspcRtphcchlpcacscp....hVDGVPuQGGhhstlhttll..hu.tlssuhhcthhE ...usD-sTKhcsl.cLlshYc+assph.Lssp-+ppl-chlpcaccpp....lVDGVPuQGGhhtplltphls.suptlssshhp..................... 0 8 8 12 +7070 PF07242 DUF1430 Protein of unknown function (DUF1430) Vella Briffa B anon Pfam-B_17033 (release 10.0) Family This family represents the C-terminus (approximately 120 residues) of a number of hypothetical bacterial proteins of unknown function. These are possibly membrane proteins involved in immunity. 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.35 0.72 -3.94 19 961 2012-10-03 05:18:07 2003-09-11 13:29:22 6 2 507 0 38 409 3 99.70 36 16.07 CHANGED Ilhlhhs..lhlhhphshhYFcpa++clhIK+laGhshhcpappalhhplhshlhshshhhlhpp..s.llhhhslllhlhphllhlhhp.+hpp+thhpll.KG .................Ilsluou..ILLF.ohNhLYFEpFRRplhIKRluGhphhEhHtp..YL...luQhulh.lh.Ghllohllp+......-hh.luhLslhlFhhpuLLpLhhQ.pKEp+hshtllKG................. 0 13 21 31 +7071 PF07243 Phlebovirus_G1 Phlebovirus glycoprotein G1 Moxon SJ anon Pfam-B_17508 (release 10.0) Family This family consists of several Phlebovirus glycoprotein G1 sequences. Members of the Bunyaviridae family acquire an envelope by budding through the lipid bilayer of the Golgi complex. The budding compartment is thought to be determined by the accumulation of the two heterodimeric membrane glycoproteins G1 and G2 in the Golgi [1]. 25.00 25.00 44.80 44.00 21.40 21.10 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.14 0.70 -6.35 4 348 2009-01-15 18:05:59 2003-09-11 13:41:35 6 4 57 0 0 288 0 338.10 54 53.03 CHANGED sHlpNRPGpGtat..hssp-DusC+.lsYGssCpuFDahLchs+aPFFpSahcH+ohLEAhtDsI.Itppss.oCsl..sot+sstCh+EtthhKt+CPsshsusaYlsspGclssVKCcpNhELoEDCs.CRphptpu......pKh.hPLQDhhCQpsps-hossKphhKGlCcIGlhshKcCc.phoosaEsVsFhlhKs+tKlYl-phcl+sc-slpp-sFlCY....th.ss-ssuss+ttLKpVcVspCKtVssSpsKhCoGDpsFCS+YsCpsphs-saC.hAsGSGslplplsGVWlpPhClGYERVlVcREh.ts..s.pcsCDTCloEChcptllV+STGFcIouAVACSHGuChSspQpPSTplhlPYPGhupSlGGDIGlHhoH-ssssSsHhVsHCPP+DsChsHuCllCsHGllNYQCHTsLSAhllshllh.hlhhhLhllppsLhll+lhPthLlsPlsWlshFhtWhh+ph+h+stssIsRlNccIGWh-puths.pcscchp++..tPhsR.psshLhlL.LlhossS .............................................................................................................................................................................MASVKCP.P.K.YELTEDCNFCRQMTGAS.....LKKGSYPLQDLFCQSSEDDGSKLKTKMKGVCEVGVQALKKCDGQLSTAHEVVPFAVFKNSKKVYLDKLDLKTEENLLPDSFVCFEHKGQYKGT..M.D..S..GQTKRELKSFDISQCPKIGGHGSKKCTGDAAFCSAYECT....AQ.spshC.hhptsu.l.lphsGhhhhPhChGapchhsphp..t...pp.t.CssChhcC..ttlhlhohs.cl..AssCupt.C...................................................................................................................................................................................................................................... 0 0 0 0 +7072 PF07244 Surf_Ag_VNR Surface antigen variable number repeat Yeats C, Fenech M anon Yeats C Family This family is found primarily in bacterial surface antigens, normally as variable number repeats at the N-terminus. The C-terminus of these proteins is normally represented by Pfam:PF01103. The alignment centres on a -GY- or -GF- motif. Some members of this family are found in the mitochondria. It is predicted to have a mixed alpha/beta secondary structure. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.81 0.72 -3.56 395 14038 2012-10-01 23:48:22 2003-09-11 13:48:50 10 36 2604 32 3439 10633 6054 78.00 20 34.08 CHANGED hhlp..plph.p.G.........t..........p.thpsp...tlp....p..h.....p.....h....ptG.............shasppplpp....shppLp.......spGa.....hspV.......ps..psp....pt....tt.......h..slshplpEs ................................................hlpplph..p..G..........s........................p.th.psp........hlp.........p..l....t..............l...........ctG......................................................chasp.pp.lsp...........shc.pLp..........spGa..............h.s.sV..........ps....php.......s..............sp............l..slhhplpEt.................................. 0 1092 2111 2811 +7073 PF07245 Phlebovirus_G2 Phlebovirus glycoprotein G2 Moxon SJ anon Pfam-B_17508 (release 10.0) Family This family consists of several Phlebovirus glycoprotein G2 sequences. Members of the Bunyaviridae family acquire an envelope by budding through the lipid bilayer of the Golgi complex. The budding compartment is thought to be determined by the accumulation of the two heterodimeric membrane glycoproteins G1 and G2 in the Golgi [1]. 25.00 25.00 27.30 25.60 24.10 24.50 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.62 0.70 -6.23 4 259 2009-01-15 18:05:59 2003-09-11 13:53:40 6 21 64 0 61 265 0 373.20 38 33.86 CHANGED CS-pllAsSKlspCsscGSsThCplSGTlhl+AGsIGuEoClhLKGsc-sppKFloIKTISSELsCREGpSaWTo.YsPpCLSSRRCHLVGEChus+C.pW+sspsStEFoGlscsplMpEN+CFEQCGGlGCGCFNVNPSCLaVHuhL+Ss++EAl+VFsClDWsHRlsLElTsasGchphVsLsu.oTpFhsWGSloLuLDuEGIoGTNSaSFlcSuuGuFAllDEsaS..PRcGFLGEIRCsSEuuAloAHcSClhAPsLIcYKPMhDplECTTsLIDPFAlF.RGSLPQTRNGpTFouSh-K+TVQAFTsGtV+AsloLshDsaEVsF.sptssCsAoFlNlTGCYSCstGARVClplpustssshascscDpSlsllhslpssspD.CpVLHFopP.Vc.-hhYSCsup.+shlIKGTLlAhsPaDDR++pGGsSsVVNPKsGsWsh.sWhSGLhsWLGGPLKThLhILGalALuIlhhllllllstpuVsph...slKKKs .........................................................................CSE.l...AsS+lhpCsp.....pGsp..s..pCphoGosll+hGslGuEuCL.h.lKG....p....-s.p.pp..h......lpIK.TlSSE..L..sC+EGpSaWTspasspChSoRRC+hh...G-Chss+C.papsNphS.sEashhtcs.pshttstCh-pCGGhuC.G.CFsssPSCLFh+tahpssp...tphhclapC.sW..s.....phh..l..p..l...T......s.....t...p...ht.......h.....h.....h..s.ssp......hphs.o.l...s.l........s....t.l.....s.s.....ssh.s...hp................t.........th.....ulhsts.s........sh.GhlG.ElpCsocpssh..p..sCh.hs.slhthp..hsth...C...s...phhs..shh...h.s..t..lP.th..h..ss..h.p..attphtppsh...psh.s.st.lpsthpl..h.c.s.hpl.h.t...pCput.h.h.ploGCYSC.tGup...h...phphpu..pt..p..s...h...h..pt.ttph.........s..ssps.........h.h..s..s.....lp.ph.hsCs.t...p..h.lpG.L...h..t................t.....p.................s..t.htth...h..t...........h.h....lh...h.hhhh.................................................................................................................................. 0 3 13 61 +7074 PF07246 Phlebovirus_NSM Phlebovirus nonstructural protein NS-M Moxon SJ anon Pfam-B_17508 (release 10.0) Family This family consists of several Phlebovirus nonstructural NS-M proteins which represent the N-terminal region of the M polyprotein precursor. The function of this family is unknown. 23.90 23.90 24.00 30.70 22.50 23.80 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.72 0.70 -5.23 3 151 2009-01-15 18:05:59 2003-09-11 13:59:48 6 2 22 0 0 160 1 102.90 40 14.73 CHANGED IasLLsVL.TlulLVsuslpLcophSSSRs-TCFSsoTsPEhIEtYWth-StKc-.MPph-L+CRhs.-uDhKhhop.shIStI+EVpTSssEL+hSCGsps+SLGtlITsDGLNN.hhGshIIsCsTs.a.psIssG.t.sRl..DapplKppA...-EK-thIph.+TK-sE-.....D.+sQlhhpEIpQlKNplpK+RN-....lhRGQE+RDAKplscEoMARlusL+pc.h.LT..........DElpphKsphp.hRp+.h..pTVlPA.........LhosALLS..suPlsA ..............................................................................................................................................................................................................................................................................................................................................................................................................................hpshss...................................... 0 0 0 0 +7075 PF07247 AATase Alcohol acetyltransferase Vella Briffa B anon Pfam-B_17349 (release 10.0) Family This family contains a number of alcohol acetyltransferase (EC:2.3.1.84) enzymes approximately 500 residues long found in both bacteria and metazoa. These catalyse the esterification of isoamyl alcohol by acetyl coenzyme A [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.56 0.70 -5.95 9 349 2012-10-02 12:01:53 2003-09-11 14:19:53 7 8 173 0 229 370 1 388.30 14 80.46 CHANGED +hGhlEpYahtcstpphaoNFsVhuphNptls+p...LshALRplll+pshLspslh.th..p..thhsussahspP.........l.cplchsDVl.................catc.s.phsshh.phlsphhhshssss.PhW+Llllsstpst.....ppal.alssHshhDGlSuspFacDLhcpLsphcsssh..................-hlhcY.....scDhs....KLPtPIpptl-h..hsshhslshhhhss.lhpth.hpch..............ppshhsshsssptutpshp.hlphossclppI+spsKpp...csTlTsalpusahluLtch.h.......shhp.sphshchtlPhssR+alP-ctE...papYGshVuuspa..hIpshshppsp.pph.......WsLsphappplspuhpstcphpshGs...ph.llchhNlcchhpsch.ppp..RusshlSNlGhhs......pssptaplpDhhFuQssu.ht.sFsLsVlSTssGGhNhslohhccs.pppsp.......ccFhshhcphl .............................................................................................................................................................................................................h..sht.hh.p....h................................................l....lphpt...hl.....................................t........t............t.........h...p...h...p...t...h.........h.....h.................p....P.W+.lhl..l...pt..............................h.l..hhhpHshhDGhSuhhFacshh.ptLp.t.h.pt............................p......lhp...................ts...................l....P.....s..hp.hhth...........hh........h.............hh...p.....h......h...htp........................................sh...h....tht..h..t......s...p..s.hhh.h..l...pls..s...p..p..hpplhpts+pp....tsTlTuhlpuhhhhultp...........hhtt.tt.h.t...hpht....h.shsh.Rpa..h..sp.t............t...................h.s.s..hl........s..t.....p......h............h......th.p..h..................pth.............................Wphspphp.p.plp..p..t.h.p.p..t.......t....hh..............l.....h.p..h.h..s..h............t.hh..t.ph.....t.p.......ttsh.lSNl.Ghhp.................................ttt.h............ltph.hFsps.......ht...h..hshh.s.h..t.h.h.hs................................h............................................................................................................................................................................ 0 50 118 198 +7076 PF07248 DUF1431 Protein of unknown function (DUF1431) Vella Briffa B anon Pfam-B_17470 (release 10.0) Family This family contains a number of Drosophila melanogaster proteins of unknown function. These contain several conserved cysteine residues. 21.10 21.10 21.20 21.20 20.90 20.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.00 0.71 -11.84 0.71 -4.25 17 156 2009-01-15 18:05:59 2003-09-11 14:24:22 7 2 17 0 90 156 0 139.20 29 58.24 CHANGED Cpps..Cst.h..phDhpaY+PSDKttRcYQpTWsEC...s.hhhK.K+lCt.t.hhsPphtRRptptpstss..t...............s.h.ph....tt.CP+lphP.sC+suRtPPpCchs.Rt.osC.pKhps.YPSFSECp+ttlsphs..PhECpCLctsshCphapth++p ...............................................C.....h..phD.haY+sSDKttRcYQpTWsEC....s.hh..h+.KchCs.tthh....s.h...RRp..t..p.h.tss.....................t....h.........pt.Cs.+.hths.sC+su.+hsspCphh.+t.sp.C.pKhts.aPSFSECp+..h....h...shECtChp.hs.shCphht.ht......................................... 0 15 16 57 +7077 PF07249 Cerato-platanin Cerato-platanin Vella Briffa B anon Pfam-B_17594 (release 10.0) Family This family contains a number of fungal cerato-platanin phytotoxic proteins approximately 150 residues long. Cerato-platanin contains four cysteine residues that form two disulphide bonds [1]. 21.30 21.30 21.50 21.30 21.20 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.53 0.71 -4.10 5 199 2012-10-01 21:39:58 2003-09-11 16:37:23 7 4 96 2 128 194 0 112.80 36 70.21 CHANGED lSVSYDsGYDDAsRSMssVSCSDGsNGLlTKY.WPTQGuVPGFPRIGGlsuIAGWNSPsCGTCaKlTY.NGNTIaVlAIDuAuuGFNIupoAMscLTNGpAspLGRVD.AsYpQVusSsC..Gl ......................................ploYDssYD.su...st.SlssVuCScGsNGL..hs+h..apT.........GplPsF.......PhIGGs....slsGW.N..S.s.s..CG.s..CacloY.....s......G.....c..oIhllAlDp.........u.s.u.....G...FNl...uhsAhstL.TsG.p.....A.....s.....p.....h.....Gp..ls......sshppVssopCu............................ 0 40 73 109 +7078 PF07250 Glyoxal_oxid_N Glyoxal oxidase N-terminus Vella Briffa B anon Pfam-B_17519 (release 10.0) Family This family represents the N-terminus (approximately 300 residues) of a number of plant and fungal glyoxal oxidase enzymes. Glyoxal oxidase catalyses the oxidation of aldehydes to carboxylic acids, coupled with reduction of dioxygen to hydrogen peroxide. It is an essential component of the extracellular lignin degradation pathways of the wood-rot fungus Phanerochaete chrysosporium [1]. 24.30 24.30 24.30 24.30 23.90 24.10 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.93 0.70 -5.30 8 414 2009-09-11 10:08:30 2003-09-11 16:40:41 6 32 162 0 296 444 14 222.20 27 32.88 CHANGED MHhtLl.+ss+VlhaDRTsaGsSpluLPsGt.CR.ssPp.DtstKhDC.oAHSlLaDVsTNslRPLslpTDTWCSSGulpssGoLV.......QTGG.t-...G-+slRhFoPCs....ssoCDWlEhsss..Lus+RWYuTNpILPDG+hIllGGRc..........sFNYEFaPc.sp.sspsssl.FLt-TcD.spENNLYPFVaLLPD...GNLFIFANsRSIlaDacpN..pVVKEaPpIPGG.sR..NYPSoGSSlLLPL.....hpssslssEVLVCGG .............................................................................................................................................hh.u..as........s.s.th.c....s...l.t....l...h....o.ssaCuuG.s.hl.s.s..G.phl............................psG....G.......t.s......................Gt.p.s.l...R.h....hs.P..ss........t.ss..s.WhE.sst............Lpps..RWYsosthLs.D..G.p.l.lllG.....Gpp.................................sss..h.E.hh.Pp........t......sst...s..hth....h..ht...h....pc......st..........tNLYPalalLPs...GplF.....lhus....p....p.uhlh.D.tst....ph.hpp..hP...s...lP.......u......s...s......R...sYP.sGuushLPl.............s..t.....splllCGG............................................................. 0 131 220 266 +7080 PF07252 DUF1433 Protein of unknown function (DUF1433) Vella Briffa B anon Pfam-B_17690 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 100 residues in length. 25.70 25.70 25.90 25.90 25.60 25.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.17 0.72 -3.78 17 1163 2009-01-15 18:05:59 2003-09-11 17:04:15 6 2 257 1 16 477 0 88.50 47 68.38 CHANGED aacpQcpRIslYlKYNlpc...hKSlpFTphcpoPMGs.hIcGYINssKchcFpAhIustcs...pFpsshshstE.Ls+LlK..c..tK...SVsEI..Kc ...................YaccQpcRIsLYhK.a.Nhpc...hKol..+F.T.s.h.c.puPMGu.hsIcGYINcN.K.c.cFsAahsspcN..aQF.ss.sl.h.hocc..LucLlK...cp...hK....Ss--I.Kcc............................... 0 5 6 13 +7081 PF07253 Gypsy Gypsy protein Moxon SJ anon Pfam-B_17444 (release 10.0) Family This family consists of several Gypsy/Env proteins from Drosophila and Ceratitis fruit fly species. Gypsy is an endogenous retrovirus of Drosophila melanogaster. Phylogenetic studies suggest that occasional horizontal transfer events of gypsy occur between Drosophila species. Gypsy possesses infective properties associated with the products of the envelope gene that might be at the origin of these interspecies transfers [1]. This family contains many members with full-length matches; however, it also includes a number of very short sequences and short matches of sequences with other unrelated domains on them, which cannot be excluded. These matches may represent remnants of once-functional genes. 20.30 20.30 20.70 20.50 19.20 20.00 hmmbuild -o /dev/null HMM SEED 472 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.66 0.70 -6.17 4 136 2009-11-18 11:40:33 2003-09-12 09:50:16 6 8 26 0 22 137 1 259.90 27 82.28 CHANGED s+ITDaS+ANYIPVhDGcVLVa-ppshl+HSuNlSEYtshID.ET-plp-SF.PpSHM+KLLpVDssHLRshLslLplHH..RlARSLDFLGTALKVVAGTPDssDh.+I+hTEspLV-uNspQIhINocTQpQIN+LTDTINKllpupKuD.lDTsHLaEsL.sRNRllspEIQNLhLTITLAKhsIVNPsILDHsDLcollcp....sTPI...hpllEsucI+VLQSpNhIHhlItaP+lphpC.pKVslhsVsHpcTlLRLc-sslAECc+-hauVps.CohTs+sTFCcpu.....pp-oCsppLHAGssApCH.sQsSHL+tIp.VD-GlllIN-tsApVpsDsssclhlpGTaLITF.pcSATINtopFhNhcpslsKtPGllpSPlLNI.u+c.lLSlPhLHRhs.pNLhpI+shpc-Vsutt.sphhhshGsllssGlhhuhslhLshRt++uotphp+slcphs.sE-GppLctGlVpN ....................................................................................................................................................................................................................................................................................................................................................................................................................htu..upCp..h..tp....h..lptu.hh.hhst..h.h...tt..p..ltGoallpa.ppph.lstp.a.s...............................sh..lp..p.t................................................................................................................................. 0 0 0 14 +7082 PF07254 DUF1434 Protein of unknown function (DUF1434) Moxon SJ anon Pfam-B_17586 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 135 residues in length. Members of this family all appear to be Enterobacterial proteins. The function of this family is unknown. 20.70 20.70 21.10 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.78 0.71 -4.34 9 557 2009-01-15 18:05:59 2003-09-12 09:52:45 7 2 555 0 49 179 2 124.40 72 96.04 CHANGED MsLWpsDLRVSWRoQhhSLLlHGllshllLLsPWP...u.YsslWLlLLoLVVF-ClRSQRRIpupQGElpLLsDspLpWptpEWpIl+pPWhl+.GhlLpLppsss.p+Rp+LWLAADSMspsEWRcLRpLLL....pp ..................MVLWQSDLRVSWRAQWlSLLIH.G.L.V.AA.V..I..LLMPW.P....LS..YT..PLWM.l....L...LSLVVFD....CVRSQ.R.R....I.N..uRQGEI+LLMD...GRLRW..Q.G...QE.......WoIVK...sPW.hlKSGMhLRLR.s-u.G.........+.........R......Q....HLWLAADSMDEAEWR-LRRILLQQ........ 2 4 14 30 +7083 PF07255 Benyvirus_14KDa Benyvirus 14KDa protein Moxon SJ anon Pfam-B_17595 (release 10.0) Family This family consists of several Benyvirus specific 14KDa proteins of around 125 residues in length. Members of this family contain 9 conserved cysteine residues. The function of this family is unknown. 29.30 29.30 30.10 213.00 27.00 29.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.80 0.71 -4.25 2 7 2009-01-15 18:05:59 2003-09-12 09:58:37 6 1 3 0 0 7 0 122.60 80 96.84 CHANGED MthssSlsVaVtcsIT......psschFSlKhupWpLFTstVaVpYRths-cEpslKDssRLHFphpCVpCspKlphKtpN+sH.calppGalRhsRNFSIlGsCucC...h-Shs.tDEhcpslV MGMVDSLCVFVGRVITEGSESVEGVERFSIKFSEWKLFTTAVYVEYRQLGEKECSLKDVGRLHFNMSCVKCCpKLKCKKQNKNHSKHVQNGYLRKVRNFSILGVCGDC...CESFTLADEKHHVIV 0 0 0 0 +7084 PF07256 DUF1435 Protein of unknown function (DUF1435) Moxon SJ anon Pfam-B_17631 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 80 residues in length. The function of this family is unknown. 22.10 22.10 22.40 24.10 22.00 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.58 0.72 -4.00 7 529 2009-01-15 18:05:59 2003-09-12 10:16:13 7 1 520 0 37 208 0 77.10 68 86.83 CHANGED MLpRsLuSGWGVLLPGsllssLuahsLSh-thRlLIVluhLhTssMLYHppLRHalLLPSClALluGlhLhhhshp.G .....MLQRTLGSG.WGVLLPGlLIsGLhaADLSsDQWRIlIlhGLlLTslMLYHKQLRHYlLLPSCLALIuGlMLhlMNlNQG...... 0 1 9 21 +7086 PF07258 HCaRG HCaRG protein Moxon SJ anon Pfam-B_17801 (release 10.0) Family This family consists of several mammalian HCaRG(hypertension-related, calcium-regulated gene) proteins. HCaRG is negatively regulated by extracellular calcium concentration, and its basal mRNA levels are higher in hypertensive animals. HCaRG is a nuclear protein potentially involved in the control of cell proliferation [1]. 20.70 20.70 20.80 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.74 0.71 -4.73 74 1021 2009-01-15 18:05:59 2003-09-12 10:30:12 9 19 136 1 629 1011 16 157.10 16 82.18 CHANGED lshlsplspp.t.......htplhp....hshptlpttpstp................................htphtp.ht.....hstp..................phpt.lhpulthllppuspt...shsppphpppL.ppht......lsp-.ptpslsphatppppplpptltppsht.hsp.Lpsl..........pWRlclpluSsph..pc...htpPhhhl.pLplpss.......tp.pslshEhshsplppllpplcphppphpsh .................................................................................h........s............h..hhp.hhp..l.h..t.t.t.p.................................h..thtt..ht.......h.s..t......................phpt.hltshth.lhpp..s.spt........shst..pphp.ppL..t.p..ht....................h.sp..-.tt.......phl.s.p.h.at...........p.p.p.pp.lp.pt.h....h.p..pt.......ht...hsp..ltsh..........pW..+ls.hp.h.u.o.sph...pp...........htpPhhhl.pLplpps.......................tp.p.th.thphsh..tphpphhtplpphtt.hpt.......................................................... 0 238 284 439 +7087 PF07259 ProSAAS ProSAAS precursor Moxon SJ anon Pfam-B_17813 (release 10.0) Family This family consists of several mammalian proSAAS precursor proteins. ProSAAS mRNA is expressed primarily in brain and other neuroendocrine tissues (pituitary, adrenal, pancreas); within brain, the mRNA is broadly distributed among neurons. ProSAAS is thought to be an endogenous inhibitor of prohormone convertase 1 [1] may function as a neuropeptide [2]. N-terminal fragments of proSAAS in intracellular Pick Bodies (PBs) may cause a functional disturbance of neurons in Pick's disease [3]. 21.70 21.70 21.80 82.20 20.90 21.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.28 0.71 -4.76 2 25 2009-01-15 18:05:59 2003-09-12 11:08:19 7 2 17 0 12 20 0 165.00 77 74.38 CHANGED MAGSPLLCGPRAGGVGILVLLLLGLLRLPPTLSARPVKEPRSLSAtSAPLVETSTPLRLRRAVPRGEAAGAV.thu............tRututsttucDppARVL.tphLR.....uWtS.........PRsp..PLAPDDDPDAPAAQLARALLRARLDPAALuAQLVPAPAsAPRPRPPVYDDGPphP..cctGscp.Dh ......MAGSPLLtGPRAGGVGLLVLLLLGLhR.PPsLsARPVKEPRuLSAASsPLAETusPRRFRRAVPRGEA.AGAVQELARALAHLLEAERQERARAEAQEAEDQQARVL.AQLLR......sWGu.........PRsSDPsLu..DDDPDAPAAQLARALL.RARLDPAALAAQLVPAP..A..A..Ah..R...PRPPVYDDGPsGPDsE-AGDETPDV.. 0 1 1 2 +7088 PF07260 ANKH Progressive ankylosis protein (ANKH) Moxon SJ anon Pfam-B_17517 (release 10.0) Family This family consists of several progressive ankylosis protein (ANK or ANKH) sequences. The ANK protein spans the outer cell membrane and shuttles inorganic pyrophosphate (PPi), a major inhibitor of physiologic and pathologic calcification, bone mineralisation and bone resorption [1]. Mutations in ANK are thought to give rise to Craniometaphyseal dysplasia (CMD) which is a rare skeletal disorder characterised by progressive thickening and increased mineral density of craniofacial bones and abnormally developed metaphyses in long bones [2]. 20.70 20.70 21.10 20.70 20.20 20.60 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.25 0.70 -5.45 2 82 2009-01-15 18:05:59 2003-09-12 11:17:30 6 2 49 0 46 68 9 277.50 65 67.24 CHANGED Mhch.S.TsYWPLIRFLlPLuITNIAIDFGEQALNRGIAuVKEDtlEMLASYGLAYSLMKFFTGPMSDFKNVGLVFVNSK+-t.KAlLCMsVAGlIAhlhHsLIAYoDLGYYIINKLHHVD-SVGuKTRKAFLYLAhFPhhDAMAWhHAGILLKHKYShLVGsASISDVlAQlVFVAILLpSpLEChEPLLIPILuLYhGALVRhTslsLGYYpNlHDhhPESous-.GGDATIKKMLSFWWPLALILATQRISRPIVNLFVSRDLtGSoAuTEu.................hT-hpuh.PhF..................TtIcphphsshuLSLoL.....Wo ...........................llpFhlPLuloslshshucQALNRGIAuVKEDAVEMLASYGLAYSLMKFFTGPMSDFKNVGLVFVNS.K..RD.RsKAVLCM.V..VAGslAuVhHsLI.AYoDLGYYIINKLH.HVD-SVGsKTR+AFLYLAAFPhhDAMAWhHAGILL.KH.KYShLVGsASISDVlAQ.....lVFV.AILLHSpL.......ECtEPLLIPI..LSLYMGALVRhThls.L..GYYpNIHDhIP-pSGsEhGGDATI+KMLSFWWPLALILATQRISRPIVNLFVSRDLtGSSAATEAVAl.LTATYPVGHMPYGWLTElRAVYPAFDKNNPSNKlhsssssVTtsHIK+FTFsChALSLoLCFlhFWT............................................................................. 1 12 17 25 +7089 PF07261 DnaB_2 Replication initiation and membrane attachment Moxon SJ anon Pfam-B_17543 (release 10.0) Family This family consists of several bacterial replication initiation and membrane attachment (DnaB) proteins, as well as DnaD which is a component of the PriA primosome. The PriA primosome functions to recruit the replication fork helicase onto the DNA [2]. The DnaB protein is essential for both replication initiation and membrane attachment of the origin region of the chromosome and plasmid pUB110 in Bacillus subtilis. It is known that there are two different classes (DnaBI and DnaBII) in the DnaB mutants; DnaBI is essential for both chromosome and pUB110 replication, whereas DnaBII is necessary only for chromosome replication [1]. DnaD has been merged into this family. This family also includes Ftn6, a cyanobacterial-specific divisome component possibly playing a role at the interface between DNA replication and cell division [3]. Ftn6 possesses a conserved domain localised within the N-terminus of the proteins. This domain, named FND, exhibits sequence and structure similarities with the DnaD-like domains Pfam:PF04271 now merged into Pfam:PF07261. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.39 0.72 -4.38 110 3374 2010-01-07 15:44:22 2003-09-12 11:29:14 6 17 1459 3 391 2143 21 75.80 23 26.80 CHANGED lhphhcpp..hup.LSsh-hcplppalpp...phss-llphAlcpAl.tp..schs..h+YlcpILtsWpcpslpTlcpspphtcp .........................hphhppp....ht..t...los.h-h....cplpphlpp........phss-....llphAlchuh...p.......schs.........hpYlp.p.ILpsWpc.p.s..lpThcpspth...t............... 2 151 271 328 +7090 PF07262 DUF1436 Protein of unknown function (DUF1436) Moxon SJ anon Pfam-B_17809 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 160 residues in length. The function of this family is unknown. 25.00 25.00 25.10 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.63 0.71 -4.75 9 103 2009-09-11 13:43:21 2003-09-12 11:34:26 6 1 77 1 17 82 1 145.10 44 86.54 CHANGED pch.sthphTscFlSlpoYSGhshhshDshtssplLsPDlsspslGcsILpALScSRhl..s.--.t.sFhDhEptp.pYcsWlpplMtpYuY+o++uLFKsMh.suIpllNs.IpIpPo+H-KLEuWosst.psu-hVhl..slDsSPEElGuuL+.AFp+C ........................h.s.h.hstchh.lpohuthshhs.D..hsspl...Ls.DsDspsLGpslLQALusSRTh............cs.Epp.-FFcpEch+p.cYc-WlspLhsphGYKT+RALFKNMhsssIh..lpNuslcIoPS+HsKLEAWsuh....-uDsVlL..slD..sSPEEIGAuL+LALSRC.... 0 3 8 12 +7091 PF07263 DMP1 Dentin matrix protein 1 (DMP1) Moxon SJ anon Pfam-B_17812 (release 10.0) Family This family consists of several mammalian dentin matrix protein 1 (DMP1) sequences. The dentin matrix acidic phosphoprotein 1 (DMP1) gene has been mapped to human chromosome 4q21 [1]. DMP1 is a bone and teeth specific protein initially identified from mineralised dentin. DMP1 is primarily localised in the nuclear compartment of undifferentiated osteoblasts. In the nucleus, DMP1 acts as a transcriptional component for activation of osteoblast-specific genes like osteocalcin. During the early phase of osteoblast maturation, Ca(2+) surges into the nucleus from the cytoplasm, triggering the phosphorylation of DMP1 by a nuclear isoform of casein kinase II. This phosphorylated DMP1 is then exported out into the extracellular matrix, where it regulates nucleation of hydroxyapatite. DMP1 is a unique molecule that initiates osteoblast differentiation by transcription in the nucleus and orchestrates mineralised matrix formation extracellularly, at later stages of osteoblast maturation [2]. The DMP1 gene has been found to be ectopically expressed in lung cancer although the reason for this is unknown [3]. 19.20 19.20 19.70 19.60 18.60 19.00 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.11 0.70 -5.77 4 486 2009-01-15 18:05:59 2003-09-12 11:45:14 6 2 401 0 21 468 0 337.60 57 98.59 CHANGED MKToILLMFLWGLSCALPVARYQNTEScSSEEhKGcLAQoPsPPLESSESSEESKlSSEEQANEDPSDSTESEEtLGhDcpQY.aRPAGGLS+SsGptG-DKDDDED-SGDDTFGDDDuGPGPEEtp.GGsSRLsSDEDSsDTTQSpEDSsPQG-sSAQDTTSESR-LDsEDEsDSRPEGGDSTtDSESEEpWVGGGSEGESSHGDGSEFDDEGMQSDDPtohRSERGNSRhSoAGlKS+ESKGcDcEpASTQDSs-SpultasSRKhFRKSRlSEEDsRGEL-DsNoh.EVpSDSTEs.....stLoQS+EcS+uESQp-ScENpS.E-SQcVQDPSSESSQEssLPSQENSSESQEEsluESRGDNPDNsTSau.EDQEDS-SSEEDSLcp.SsSESpSpEEQADSESpESLp.SEESPESsE-pNSSSQE.GLQupSuSsESpSQ......-ShSEEDD.SDSQDSSRSKE-SNSTESsSSSEE-uQsKNhElESRKLTVDAYHNKPIGDQDDNDCQDGY .................................................................................................................................................................tt.oRhts.........s-DSuDsTpSp...E-Sssp.E-......us.......p....D..s......s..S......-......S+sh.-pE....................S-SEEaWVGGGS.E.G.-.SS.HG.D..............GSEF...DD..EGMQSDDPD....ol+S..ERupSRMs..S..Au.l..+SKE...S..psp.s.c.c.p.ussQD.S.s-.SQSV-psuR.KhFRKSRIS..t..EDDpu-hssSsoh..E.spSDSTEso............ppus.u.....Q...S+Es.SKS..-SQEDSp..E.s.QSQED..SQs.pDs...SSES.S....QEsDh.PSQE.....sS..SESQE..E..lh.ucSRGDNP..D.s...s...o.s...p...u......ED.Q-D..S.-...SS...EEDSLspsSsS.ESpScEEQAD.SE...SsES..L.p.SEES.ESoE..-ENSS.SQE.GLQSpSuS.s.E..SQ..S...........p......................p.....S.S.E....E.-..s..ps......Q-...s.s....c...S..........ts....S.sp-......p...-h-s.................................. 0 1 1 5 +7092 PF07264 EI24 Etoposide-induced protein 2.4 (EI24) Vella Briffa B anon Pfam-B_17540 (release 10.0) Family This family contains a number of eukaryotic etoposide-induced 2.4 (EI24) proteins approximately 350 residues long as well as bacterial CysZ proteins (formerly known as DUF540). In cells treated with the cytotoxic drug etoposide, EI24 is induced by p53 [1]. It has been suggested to play an important role in negative cell growth control [2]. 31.50 31.50 31.50 31.50 31.40 31.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.81 0.70 -4.91 191 1863 2009-01-15 18:05:59 2003-09-12 11:56:43 6 6 1751 2 510 1310 173 215.70 27 82.70 CHANGED suhttulp...tht................psphhthllhslhlsll....lhsshhhh..hhthltshh................................hshl.sh...............................hhhthhhhlhhhll...hhhl...hsslsshlsuhF.......hshluctl................................ptc.phsp.......hsthtshspul...hpplttlhhhlhhh..lhsl..hL...hh.lP..sl...s.hh..llahhlsu.....................ahhuh-ahshs.........h......shpctc.thhcpptsthhuhGhhhsh..............hhhlPllNllh.Pl .............................................................................................................................................................uhhhhhpshthh...............pPtltph.lllPlllsll....Lhsu....hhah....hh.stlssa...ls.slhshhs.........................sWlphh...........................allh.lsslshlll..........hsah...hss..lushI.uusF.......ss.hlAEpl.........p.............................................sst..phss........shhshhtsls...+hh...tc.ph..p.p....hhh.h.l...sts....llhl....lL....ha.lP....sl...Gthlssllaalhsu.................................ahlulp.Yt-as............hspp.........hshcch+.shl+.p.p+h.t..shtF.G.slsuL........................hshlPllNLhh.Pl................................................ 0 150 292 412 +7093 PF07265 TAP35_44 Tapetum specific protein TAP35/TAP44 Moxon SJ anon Pfam-B_17587 (release 10.0) Family This family consists of several plant tapetum specific proteins. Members of this family are found in Arabidopsis thaliana, Brassica napus and Sinapis alba. Members of this family may be involved in sporopollenin formation and/or deposition [1]. 25.00 25.00 163.30 163.10 18.70 17.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.68 0.71 -4.34 5 15 2009-01-15 18:05:59 2003-09-12 11:59:58 6 1 7 0 5 15 0 121.30 65 99.95 CHANGED MSpISKVSSLCLLLLshFFLSSQPALSLRuPKhQ.SEPsS.................PEolhsDSSSsMsKI.DpssAKSMIAGFFSHKFPLpGWPF.KYPPFoM...VNPNIP..TNPS.....GAQEEoEKLPSSPSKGNKDGGNA .MS.IS+VSSLCLLLLV.hFFLoSpPALSLRuPK.Q.SEsAS.................PQThhDDSSs.Ms+I.D..pAKSMIAGFFSHKFPlpGWPFPKYPPFoM...VNPNlP..TsPS.....GAQEESEKlPSSPSKsN+DGtNA 0 2 2 2 +7095 PF07267 Nucleo_P87 Nucleopolyhedrovirus capsid protein P87 Moxon SJ anon Pfam-B_17842 (release 10.0) Family This family consists of several Nucleopolyhedrovirus capsid protein P87 sequences. P87 is expressed late in infection and concentrated in infected cell nuclei [1]. 25.00 25.00 29.20 29.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 654 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.36 0.70 -13.24 0.70 -5.84 6 50 2009-01-15 18:05:59 2003-09-12 13:14:04 6 4 38 0 0 51 0 343.30 28 61.12 CHANGED MD-p.NSLLIA+LAGpILTRDhsAVssIhHoPE+SLsQKLDsLpshlpuhssus.s.ss......suclphNShllsQshllRapsLphAVsFL+Ruspl..slspphLspIQssLppYcsYVspsshDpsl.l-saLNpAEssYtEIR...pscIh.FlK...................phpt.t.ppscups.sspsssu.....sPh......................................+.ssphhpthtN...pouhochspplpuu...hh.hth.h.th.p.pDpchphha.....................................tsh.pps.....EhPlIsNEpDFD+hsI-QLs...DYI++Nhss.phsas.sHsS...VcDVRpFAKslWRtps................ssspTPh..........ph.oPtpTPh.h.....s-p..uhQoP..sPt............................s.ps...hph.p................s.hPspssssss.ss.p+++RRRsssL..hs.....................S-E-E..............s-.cEsDaEp-RKRRREEDKNFLRLKALELSKYAGVNERMEKIVQVT+AMQcTYDYCNCKNTIsGTPsAssFspLL+RLNTYNLuHVEMTVNFYELLYPLTLYNDESN.......RIluYIFAAuNYFQNCAKNFspMRs-FNpaGPFsQIDSMVMFVIKFNFLCDLQsFFGpIDsLPsLuQPNhplHsVLlMRDKIVKLAFNuLQYsTssKo-NRRDPKHLQRLIMLMNADFNII ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sst..satshlpplpphNLohlp.sVpFYcLL.PLshYs...s...sp...s.......pllh...aIhtussYF.NsAcNFs.hRtshps.t.htphDphshFhI+aNFLh.hRpFhspl......s.shss..N.+IhNVLhhhDplVpptasplpaph...........................t.......................... 0 0 0 0 +7096 PF07268 EppA_BapA Exported protein precursor (EppA/BapA) Moxon SJ anon Pfam-B_17945 (release 10.0) Family This family consists of a number of exported protein precursor (EppA and BapA) sequences which seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). bapA gene sequences are quite stable but the encoded proteins do not provoke a strong immune response in most individuals. Conversely, EppA proteins are much more antigenic but are more variable in sequence. It is thought that BapA and EppA play important roles during the Borrelia burgdorferi infectious cycle [1]. 24.30 24.30 24.80 25.70 24.20 24.20 hmmbuild --amino -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.58 0.71 -4.50 7 89 2009-01-15 18:05:59 2003-09-12 13:19:13 6 2 21 0 2 84 0 119.70 57 82.41 CHANGED sIccNYs+AKKsFSKEDasLIpKRLDNYsFpsEYsKShhFuh.APcIRGsLRKIGIKEpuVhLDALDllGYLIKsKlhtphhhl....h.sI.pLIpGhPsuIFNaL.lQLsSDKIDYsEKYG-cAR-pFcpSYpKsKIssVK .......IccNYscsK+uFScEDFsLIppRLDNYsFc..sEY-KShhhuc.sPcIRGpLRKItIKEpuVhLDALDlltYLIK.Khhp...lhh.........sI.pLItGYPsu.....IFsYL.IQ.LsSDKIDYAEKYG-cA..Rp..pFccuYp+DKhssVK............................ 0 2 2 2 +7098 PF07270 DUF1438 Protein of unknown function (DUF1438) Moxon SJ anon Pfam-B_18024 (release 10.0) Family This family consists of several hypothetical proteins of around 170 residues in length which appear to be mouse specific. The function of this family is unknown. 25.00 25.00 115.70 50.70 21.80 16.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.06 0.71 -4.33 5 19 2009-01-15 18:05:59 2003-09-12 13:30:23 6 2 3 0 4 16 0 126.80 58 89.06 CHANGED MhpSFsGFVKDTSDTEEHALPSAQsLPALSTRCSKSETLCFuKEpSHCSEDGWIlDWDLY.h.CVFESVDYLRSY+RLNCAMKKGTEVFQSESQR-PQVSPGDVDNspDKDTEEPDQPhPSLLREKGLELETCDGGDCPDQDPASDSsRHLG ...hp.h.uFV+soScottHALPoAQVLPA.sstCshspsLpFutE.SHC.EDGWIspWs.ash.CV.tShDYLR.hppLNsAMK+GTElFQStoQRcPph..G.h.....psstcPDQP.PuL..LRcKGL-LtTC-GGDCPDQDPuSDSsRpLG. 0 0 0 1 +7099 PF07271 Cytadhesin_P30 Cytadhesin P30/P32 Moxon SJ anon Pfam-B_18052 (release 10.0) Family This family consists of several Mycoplasma species specific Cytadhesin P32 and P30 proteins. P30 has been found to be membrane associated and localised on the tip organelle. It is thought that it is important in cytadherence and virulence [1]. 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.35 0.70 -5.13 3 109 2009-01-15 18:05:59 2003-09-12 13:35:59 6 8 24 0 13 105 16 133.40 46 68.80 CHANGED McLss.hRhKKLFllutLLhsolLFSuLIlLATul.LVpcNsoE........psLsVVLHpsED.TssIQGRsITEQPWFIPTVAGuFGFSALAIILGLAIGLPIVKRKEKRLLEEKERQEQlAEQLQRIS.-QpEQQAlE.pPstEspsQupsQPAs.sVsssP.....QPQVQ.sFGPp.QQRls..RsGFP.QPN.Mu.RPGFN....QMPPH....PGMsPN....RPGFNP.P....GMsPRPGF........PH....PsMsPNMp..RPGF.pPQP...........GtFs+PGsPhsPNMt.RPGFsPN.GMsPsPuMtsPRuGhPP .......................................................................................................................................................................................................................................................................................................PNpMGMRPGFN.....QMPPQ.....MGGMP..PNph..hRPGF.NQMPPp.........sGM.PPRP...NF..............PNQ..M..PNMp..+PGh.pspP...........Gh.................................................st..t........................ 0 3 6 12 +7100 PF07272 Orthoreo_P17 Orthoreovirus P17 protein Moxon SJ anon Pfam-B_18125 (release 10.0) Family This family consists of several Orthoreovirus P17 proteins. P17 is specified be ORF2 of the S1 gene and represents a nonstructural protein which associate with cell membranes [1]. 25.00 25.00 53.90 53.90 23.20 22.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.94 0.71 -4.40 6 41 2009-01-15 18:05:59 2003-09-12 13:42:50 6 1 25 0 0 29 0 135.80 57 98.51 CHANGED MpthR+poFsVpcF-...FsP.hlpphhpPshoAlstsDsstYhNI-lspoHPhhssLssLLup..PssVHVpLlRRauL.SoLssICEaDCsLltlssh.h..sosspsS+lVVHaDstoQSsAAKRSRslDslhDFEhEYK.WRFus.h MQhLRHTTFcVpRFs...FsPlslpEhAhPSFTAITusDPopYFNIELPpoHPLhSpLPsLLSp..PCcVHVpLIRRFALaSTLSSICEYDCALLhsspAIh.LssusppSplllHWDGtSQSlAAKRuRphDTllDFE+-YK.WRFsu..l.. 0 0 0 0 +7101 PF07273 DUF1439 Protein of unknown function (DUF1439) Moxon SJ anon Pfam-B_18280 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 190 residues in length. Several members of this family are annotated as being putative lipoproteins and are often known as YceB. The function of this family is unknown. 20.70 20.70 21.10 20.70 20.50 20.30 hmmbuild --amino -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.69 0.71 -4.49 38 831 2009-01-15 18:05:59 2003-09-12 13:49:14 7 2 797 2 138 426 24 148.20 48 78.82 CHANGED loEpElppa....Lscch.phpcphGhsGlhssplplsslslpIG.pp.ssclslsusupsplssh.hsshssplplplcutPhYDp-csAlYL+slcLlchslpspphpsslps.lhs.lhphlsphLsspPVYpLcpsc.hspulh+phspslcVcsGc ......................................IoEQEINQu.....LsK+N.NFsK.cIGl.....P....GlsDA+IsLoNLsSQIGR-E..PNKVo..LTGDAplDhsSL.FGsQcAshKLpLKAlPsFD+....EKGAIaL.....+-MEVVDsoVp..PE....KM.....QoV.....hQo.LlPYLNQu.L.+.sYFNQpPAYVL+EDsSpuEAhAKKLAKGIEVKPGc.................................. 0 19 54 94 +7102 PF07274 DUF1440 Protein of unknown function (DUF1440) Vella Briffa B anon Pfam-B_17831 (release 10.0) Family This family contains a number of bacterial proteins of unknown function approximately 180 residues long. These are possibly integral membrane proteins. 30.90 30.90 31.10 30.90 30.40 30.70 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.92 0.71 -4.17 27 965 2009-01-15 18:05:59 2003-09-12 13:51:36 7 1 896 0 73 342 0 139.50 50 77.98 CHANGED llSuhVKhGWEs.hPPRTP...............................pRstsNPPt....thL.p.lGlssp.spssYhausp.plsasuhllHauFSlsFA..lhYs.lluchaPplphhpGssaG.lhlalshHhllhPhhthsPss.........hc.PhpEHhSEhlGHIlWhWoI-.....ll .....................llSuhVKhGhEVshPPRoP.................................pRsthNPP.....hhL.-.LGlss...spssYTFusp.hhsaVu.lsHhhFSIVFA..lsYC.llAEhaPKlpLWQGhlhG.lllhlhhHhIhhPlhshsPsl.........a-hPatEalSElhGHlVWhWoIEll.............. 0 11 30 56 +7103 PF07275 ArdA Antirestriction protein (ArdA) Moxon SJ anon Pfam-B_17857 (release 10.0) Family This family consists of several bacterial antirestriction (ArdA) proteins.\ ArdA functions in bacterial conjugation to allow an unmodified plasmid to evade restriction in the recipient bacterium and yet acquire cognate modification [1]. 21.00 21.00 21.10 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.27 0.71 -4.00 38 992 2009-01-15 18:05:59 2003-09-12 13:53:08 6 11 631 4 85 617 101 159.80 30 62.10 CHANGED clYVAsLupYNpGpLhGpW..lssstDt--ltt.hcclhhs......tp.........................................EEaAIHDaEshs...thplsEYssl-clsclsph.l...........p-hs.-st.tth.thh....saaus.p-hsp..cslcsh.ahuphcs.p......DhApthl--ss......hptlP.pp..L...ppY..hDYEAauRD....lplssshh..hsppGhhpst ...................................................................tlYlushu+Y.N.pG..plsGtW...hohP.h...Dh..--hp...c.clsls.......-p.........................................cE..ah...Ip..Da...E.hP.........hpl.s......Ea..s.slt.clN..clhch..l..............p-ls..cthtpt.hpthl......................shhu..s...h...p-h.p...h..csltph....h.......s-s.c.s.p....................DlAphhl-..-sG.......hl.s-lP....p.s....l...psY....hDY-AYGRD....Lthsuphh..hspcGhh...h........................... 0 37 63 74 +7104 PF07276 PSGP Apopolysialoglycoprotein (PSGP) Moxon SJ anon Pfam-B_17916 (release 10.0) Repeat This family represents a series of 13 reside repeats found in the apopolysialoglycoprotein of Oncorhynchus mykiss (Rainbow trout) and Oncorhynchus masou (Cherry salmon). Polysialoglycoprotein (PSGP) of unfertilised eggs of rainbow trout consists of tandem repeats of a glycotridecapeptide, Asp-Asp-Ala-Thr*-Ser*-Glu-Ala-Ala-Thr*-Gly-Pro-Ser- Gly (* denotes the attachment site of a polysialoglycan chain). In response to egg activation, PSGP is discharged by exocytosis into the space between the vitelline envelope and the plasma membrane, i.e. the perivitelline space, where the 200-kDa PSGP molecules undergo rapid and dramatic depolymerisation by proteolysis into glycotridecapeptides [1]. 17.30 17.30 18.20 22.10 14.70 17.00 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.96 0.74 -6.04 0.74 -3.39 3 74 2009-01-15 18:05:59 2003-09-12 14:03:55 6 3 4 0 0 72 0 13.00 96 81.03 CHANGED DDATSEAATGPSG DDATSEAATGPSu 0 0 0 0 +7105 PF07277 SapC SapC Vella Briffa B anon Pfam-B_17881 (release 10.0) Family This family contains a number of bacterial SapC proteins approximately 250 residues long. In Campylobacter fetus, SapC forms part of a paracrystalline surface layer (S-layer) that confers serum resistance [1]. 25.00 25.00 35.10 34.80 19.70 17.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.20 0.70 -5.04 83 327 2009-09-11 20:32:24 2003-09-12 15:17:17 6 4 209 0 133 355 145 216.40 28 86.79 CHANGED L.sspcHtsl+lpstps.hsaupshphlPlhssEFtpAsspaPIlFs.csspst...ahslAlhGlcpscNLFls.-GpWpus..YlPthlRRaPFhl..............tpspsssphslslDtsustl..s....ps-G..psLF.....spsGpsopaLpphhphLpphppstptTptFhptLtchsLLpshslslsh......tsupppslsGhaslsEc+LppLss-s................lhcLp+sGhLthIYspLhSLsplppL ........................Lss.pHtsl+lp..stps.htausshphlP....lhssEaspsstpaPIlFt.....csspt.........atslAlhGlpts.cN..L.Fl......s..sGpWpus..YlPshlcRaPFhl...............ts..tppsphslslD.sustl......s...........ps...cG....psLF.....tppG.psopaLpphhphLpphppshptTptFhptLtchsLLpshplslph........tsGpptplsGhaslsEc+Lpt.Lssps................lhpLpc....pGhLthlYstLhSltplppL................ 0 36 85 107 +7106 PF07278 DUF1441 Protein of unknown function (DUF1441) Moxon SJ anon Pfam-B_17966 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 160 residues in length. The function of this family is unknown. However, it appears to be distantly related to other HTH families so may act as a transcriptional regulator. 26.50 26.50 27.60 27.80 25.50 26.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.77 0.71 -4.57 9 388 2012-10-04 14:01:12 2003-09-12 15:26:14 6 4 271 0 19 155 8 132.20 50 88.03 CHANGED cLNIsQlAtloGLHRQTVssRLtsls.PAsGospphKLYtLsDllsshhst.hsssssp.hDPs.....-RKAWaQSEp-RLKhE+EptpLlPssEVtppauslsKAlVQVLETlPDlLERDsuLoPstLscVQpllD-lR-plsptlh-sss- .........................................................................LNIsQLAAloGlHRQTVsuRLpsls.sAsGptpphKLYhLsDllsthht..hP..s.ssst..tc..hDPp.....-RKAWYQSE+ERLKhEpEptQLlPso-VcppaulhsKAlVQVLETlPDhLERDpGLpsstlscVQpllD-lR-plthtlh-sss-......................... 0 1 7 12 +7107 PF07279 DUF1442 Protein of unknown function (DUF1442) Moxon SJ anon Pfam-B_18012 (release 10.0) Family This family consists of several hypothetical Arabidopsis thaliana proteins of around 225 residues in length. The function of this family is unknown. 20.80 20.80 20.90 21.30 20.10 20.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.56 0.70 -5.24 4 79 2012-10-10 17:06:42 2003-09-12 15:28:21 6 3 17 0 57 104 0 202.80 33 86.19 CHANGED MKLVWSPETAScAYIDTVKSCcshcssusAEhluAhAAGWNs+LIVETWScGDsIAoSVGLsVAupHTsGRHlCIVPspcScotYltAMpttsoo.shsEslV....uEpsEcsMcclpGVDFLVVD.+p+EFs.ssL+.Athus+GAVlVC+Nuhp+uhssFpWpclL.Rsp+VVRoVhLPVuuGLEIsHVuAssuGsuts....+pRWI+HlDpRSGEEHlF++ ..................................................................................WSs-sA.cAYlpTlc..................s.....p.............p...h....p..............p....s..s.....s...........A...........EhlSAhAAG.NA+LlVpshup..u.........u..s...sToluLAsAA+..p...TsGRh..lCl.lPspps.hpthtpshtph.u.h.p....phsEhl...l..........G-t...scplh..tp.h.p.s.lDFllVDs+...p.c-ah..clL+....h..sc.hu..t..p.G.....A.VlVspN...u....hp..p..t.h.s.s.h..p.a.p.......tsh......pt.pph.V...+oshLPlG.pGl-ls+lusst......s.......spt..........tp.t....................p+Wlh+lDppoG-.ahh+........................ 0 9 38 50 +7108 PF07280 DUF1443 Protein of unknown function (DUF1443) Moxon SJ anon Pfam-B_18027 (release 10.0) Family This family consists of several Baculovirus proteins of around 55 residues in length. The function of this family is unknown. 25.00 25.00 56.40 56.20 22.20 19.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.00 0.72 -4.39 23 52 2009-01-15 18:05:59 2003-09-12 15:30:27 6 2 52 0 0 46 0 43.00 38 74.87 CHANGED llllllFllslhlLhhL+LN+tQhpchLaYQYpYIPcsLlshV .....hhllllFlhslhhLhhL+lN+tplpchLaYQYpYIPcsLlshV 0 0 0 0 +7109 PF07281 INSIG Insulin-induced protein (INSIG) Vella Briffa B anon Pfam-B_17905 (release 10.0) Family This family contains a number of eukaryotic Insulin-induced proteins (INSIG-1 and INSIG-2) approximately 200 residues long. INSIG-1 and INSIG-2 are found in the endoplasmic reticulum and bind the sterol-sensing domain of SREBP cleavage-activating protein (SCAP), preventing it from escorting SREBPs to the Golgi. Their combined action permits feedback regulation of cholesterol synthesis over a wide range of sterol concentrations [1,2]. 25.20 25.20 25.50 25.30 24.80 25.10 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.75 0.71 -11.36 0.71 -4.97 9 298 2009-01-15 18:05:59 2003-09-12 15:33:33 7 4 190 0 172 256 0 182.60 36 62.95 CHANGED psl+uslLFuFGVlaullshpL.......+cNahlhPVcLcsp+a.suW.h.uhaGhuusslG.lhPhlDoa..hGcs.................cchshpWsuVsRsVuAFVGIuaAh++LsapSoTQuSlTLAhssslLWYLhDRo+sGFhLSTlluluGssllhsLts.................ssh...hGlhphhoQ-.hhVRsWlsulLFsAsVsFGNIGRQLA .....................................................................h...pshlLF.h.GhhhuhllphL..................ppp.h..h.l.....h.s..........c....l.....s...h...h...ss..sW.........h.h...s.......h..h...G..s..u..usllGhLhPhlDph....hs-s.............................................................pp.h....pppWsuV.hRslusF.....V......GIsaA....t.........Kl.......s.........ass..s....l...QlS........LTLAhLslsL...WahFDR.S+o....GhhLu.lsl.uhlusl..hh.hls....................................................................tu.lh.phso..-h.lhlt...sWlsslhFsuslshGNIGRpLA.......................................................... 0 35 71 119 +7110 PF07282 OrfB_Zn_ribbon Transposase_35; Putative transposase DNA-binding domain Bateman A anon Pfam-B_4755 (release 10.0) Family This putative domain is found at the C-terminus of a large number of transposase proteins. This domain contains four conserved cysteines suggestive of a zinc binding domain. Given the need for transposases to bind DNA as well as the large number of DNA-binding zinc fingers we hypothesise this domain is DNA-binding. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.25 0.72 -4.32 72 6607 2012-10-03 10:42:43 2003-09-12 16:00:36 6 25 1414 0 1912 6323 252 69.10 32 19.70 CHANGED atphtphlcYKAphh.GhplhhV...ssta.TSppCstCGphtpp.........hss+hapCsp..CGhp.hcRDhNAuhNIhpc .....................ahphpphLpYKsp........h.h.G.t.p.l.l..t.V........s.ta...o..Sp.....p........C.......s......s.....CG..phtpp.....................tls.pR.p.a.....p.....Csp........C...G...hp....tc........RDh........NA.A.hNIht............................... 0 559 1281 1646 +7111 PF07283 TrbH Conjugal transfer protein TrbH Vella Briffa B anon Pfam-B_17942 (release 10.0) Family This family contains TrbH, a bacterial conjugal transfer protein approximately 150 residues long. This contains a putative membrane lipoprotein lipid attachment site [1]. 20.40 20.40 20.50 23.50 20.30 19.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.46 0.71 -4.25 14 125 2009-09-10 15:49:04 2003-09-12 16:08:24 6 2 104 0 26 94 9 114.40 33 73.22 CHANGED ussssptlAsDsVpQLstlYPPApTplpL.....pQtssDsFGtALlpsLRt+GYAVhEhssssttspu....................slsLpYVlDphssosLYRLTl.lGsQoloRsY.lspsushhPAusWs.RKE .......s...stptlAsDhlppLsphYsP.A.+..T.phpL.....p.p.p.sss.sFGpALlpsLRpcGYAllp.ssssstst................................l.LpYll-phss.ps..LhRlo..lsspplsRsY.hspssshhPuu.hs.h................................................... 0 3 13 21 +7112 PF07284 BCHF 2-vinyl bacteriochlorophyllide hydratase (BCHF) Vella Briffa B anon Pfam-B_17961 (release 10.0) Family This family contains the bacterial enzyme 2-vinyl bacteriochlorophyllide hydratase (EC:4.2.1.-) (approximately 150 residues long). This is involved in the light-independent bacteriochlorophyll biosynthesis pathway by adding water across the 2-vinyl group [1]. 25.00 25.00 25.70 45.20 21.70 21.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.92 0.71 -4.33 26 115 2009-01-15 18:05:59 2003-09-12 16:40:04 6 1 97 0 54 112 133 138.10 55 81.21 CHANGED YTPEQhuRRsuSlWThVQuILAPlQFllFLlSlsLVlhYhssG..pGahhsolSIlhKTlhLhhIhlTGAlWEK.VFG+alFAstFFWEDVsShlshAhHshYhlhha.sthsspshhhlAlsAYsoYVlNAhQFLl+.lphu .YTPEp+tRRDuosWThVQGILAPlQFlVFLlSLsLVlpYLsTG..pGahhAThSlllKThlLasIMlTGuIWEKsVFGpYLFAPuFFWEDVhShlVlALHouYlsslh.shhsspt.MhlALAAYuoYllNAuQFLlK.LRhA..... 0 10 20 37 +7113 PF07285 DUF1444 Protein of unknown function (DUF1444) Vella Briffa B anon Pfam-B_18053 (release 10.0) Family This family contains several hypothetical bacterial proteins of unknown function that are approximately 250 residues long. 22.40 22.40 22.40 22.40 22.20 22.30 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.47 0.70 -5.06 18 530 2009-01-15 18:05:59 2003-09-12 16:42:35 6 4 507 0 70 307 1 250.70 46 91.43 CHANGED Msscphp-hLccRLs.psshsapaDRcc-sLRIcc+pstKGlTlsLssllAKaccpt-cAl-EllYYVpEulpsM..ppcsp.hpsp-ppIaPVIRuTSFPpco+-Gp..sLlac-HTAET+IYYALDLG+oYRLIDEphLcc.shTcccI+EhAhFNlRsLssshKpDcVAG...NhFYFlsoNDGYDASRILNcuhLp-hpppspGchsluVPHQDVLI.lADIpN-sGYDVLAQhoMcFFspGpVPITuLSFlY-pGcLEPIFILuKs+.+c ............................................................................................................................MsshpM+-cLcc+Lp..+.chcFpas+.....Ec.-sLRI.pp..c.sp.KG.ls..lp..LsullAK.YEsp...K....E..c.h....l-E.ls..........Y.YVpEAlttM...tccs......p.sp....spIhPVIR...uT.....S......Fsc....p..s..........+......pGh..............sF.l.h.c-.....HTAE.TtlYYAlDh..G.K.o.........Y.RLIDEphLpch.pLT.c.p.p.lREhulFNl.Rp....L....ss.p..h...p...p.DpV.pG.........NhFY..FlNoNDG.Y.DAS..RIL.N.suh....Lpc.hct..p.....h..p.G.-.M..l.VAV.PHQDVLI.I.ADI...cNcsGYDlhA...ph...TMcFF...sc...GhVPITSLSFsYcpG+lEPIFILuKNphp.................................................................. 0 24 45 61 +7114 PF07286 DUF1445 Protein of unknown function (DUF1445) Vella Briffa B anon Pfam-B_18180 (release 10.0) Family This family represents a conserved region approximately 150 residues long within a number of hypothetical bacterial and eukaryotic proteins of unknown function. 25.00 25.00 26.20 28.30 21.20 24.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.74 0.71 -4.53 77 578 2009-01-15 18:05:59 2003-09-12 16:55:20 7 5 524 3 217 535 202 139.80 52 46.66 CHANGED VuFllGCSFoFEpALlpuG..lslRHl-pspNVPMY+TNlsspPAGhFsGs.hVVSMRPhsssclh+AlpITuRaPss.HGAPVHlG.DP.ptlGIpDlspPDaGDsV.slcsGE............lPVFWACGVTPQsslhsu+...sshsITHuP...GaMllTD ...................VuFllGCSFSFEpALhcsG..lslRHlp..p.spNVPMY+TslsCpsAGh.Fp.....Gs.hVVSMRPlss.spshcAhpITu+aPsV.HGAPVHlG..cP..p.....tl..GIp...Dls+P...DaGDsV.pl..c.sG.E.............................lPVFWACGVTPQuslhsu+.....sshsITHAP...GaMhlTD................ 0 58 114 173 +7115 PF07287 DUF1446 Protein of unknown function (DUF1446) Moxon SJ anon Pfam-B_17949 (release 10.0) Family This family consists of several bacterial and plant proteins of around 400 residues in length. The function of this family is unknown. 20.10 20.10 20.10 20.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.86 0.70 -5.98 79 939 2009-01-15 18:05:59 2003-09-15 10:38:20 6 15 655 0 436 933 748 308.70 30 63.05 CHANGED IGsuuGahG...DRhpsshcls....p......tGslDaLsh-hL....................AEhThulht.......ps+hccPs.tG................Yssthhp..phcssLshshc......+Gl+lloNtGuhNPpu.....sActltplApchGLs.l+VAhVp.....GDDlhspltt.....tst..p.hpsut.sh.t...t...hl............SANAYLGAhuIscALctG.ADlVlTGRVsDsulhluPhhacFGWs.hsDaDcLAtuslAGHLlECGuQsTGG.Fuc..ac-l....s-.htpl..........GFPIAElssDGsslITKhs.GoGGhVohsTVpEQLLYElpcPtsYlsPDVsuDasslplp.....phGsDR..................V+V..sGspGps....Pss.hKVslshhsGahspsphshsG.ssht+Aclstchlpc+lttt.........sclch-LlG ...................................................................................................lussuuh.u...Dp..sh.thh.....p...........s..lshlhh.-hL....................uEhsh.hht..........tp...t.pst.....G........................Yt.......hhp..thc.....hLshs....hc......p.sl+l.lsN.....s.....G...uh..sP..tu........hsp.tlpplupc..hGls...h+lAhlt...GDslhshhtphh..........t.......h...s....h.........p.............h..........................sssAY...lGu.sIhcALct....G..A..D..lVlsGRssDsulhhu.s.hh.htasWt.....hs..D......aD.pL..utuhhsGHllECusps.oG.G.ast.........ht.h...............t.h.p.GaPl.uEltsc....GphlloKh.....s..oGGhlo.sTlstQhLYElpsP.tYh.sPD.......Vsschsplp.hp.....ph....u.s.c.R..................Vpl..sGsc....G.p..s.s.....P.s.s...hK.lsh..shhsGapsts.hshsG.t...h.t+u.chhtc.lpptltth.............................................................. 0 119 255 370 +7116 PF07288 DUF1447 Protein of unknown function (DUF1447) Moxon SJ anon Pfam-B_18163 (release 10.0) Family This family consists of several bacterial proteins of around 70 residues in length. The function of this family is unknown. 25.00 25.00 25.50 25.30 24.90 24.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.34 0.72 -4.19 33 1005 2009-01-15 18:05:59 2003-09-15 11:09:40 6 1 979 0 87 267 1 70.70 51 96.82 CHANGED IYKVaYQcsppcsPhREpTcoLYl......EA-ochcsRphl-cp..sYNIEFIp.LsGpaL-YEKpsssFclsEa .....IaKVFYQEs+ccsPhREsTcoLYl......Eu-uchpsRphlc-p..sYNIEFIphLsspaL-YEK.E.s..u.s.FplsEh...... 0 18 42 65 +7117 PF07289 DUF1448 Protein of unknown function (DUF1448) Moxon SJ anon Pfam-B_18223 (release 10.0) Family This family consists of several eukaryotic proteins of around 375 residues in length. The function of this family is unknown. It appears that this family includes a divergent GRAM domain. 20.40 20.40 30.00 27.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.04 0.70 -5.52 13 172 2012-10-04 00:02:25 2003-09-15 11:11:37 6 9 121 0 110 157 4 282.90 47 87.44 CHANGED thWpDR-l+FD.lsspplphp..sGEtllshlssVEDTKGNsGcpGpLhVTNLRlIWaupsps+hNLSIGassItslss+.ps..pS+lRG.sopuLaIhu+..ssssRFEFlFT.............................ssstpssp........lFsolhsVa+sYpooplYR-LKLRuAllp.sGpLplLPpEplhs+lsGVhNlSu-QGsL.GoFhlTNlRlVWaAshN-sFNlSlPYLQIpulRlR-S.KaGhALVIETsppuGuYVLGFRlDPh-+LschhKElsSL+psastpPlaGlphphc....-ssttt.............psp..................s.p.pp--sE.l-schcsD...thsuYhAs..sspt.....sp-Rc....PlYssELGLAlE+l+sGhoLp-LWslh ...........s.hWpDR-l+FD..lstpthphR..sGEhll-plsslEDTKGNsG.....-pGpLhVTNLRllWaSh.shs+.hNLSlGasslhslss+.hs....pSK.........lRG.topA......LYIhs+........hs..soRFEFlFT.............................slsstss..........lFsolhsVa.+AYcoo+hYR-lKLRu.A.llp.stpLhlLPpEplas+lpGVWNLSo..D...............QGNL.GoFhlTNlRlVWaAshN-sFNlSlPYLQlp..ul+lR-S.KFG.ALVlcosp.puG.GYVLGF+lDPhE+LpphhKElsSLHpsastpPlFGVpaphc....-t..t.h.......ct................................s.p.h.--hE.l-p.p.pp-......shsuYaAs..ssp........ttpRt.......Psas.cLGlAhEtlhpGholpsLWpl.................................... 1 50 59 90 +7118 PF07290 DUF1449 Protein of unknown function (DUF1449) Moxon SJ anon Pfam-B_18269 (release 10.0) Family This family consists of several bacterial proteins of around 210 residues in length. The function of this family is unknown. 21.60 21.60 22.10 22.10 21.00 20.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.25 0.71 -4.98 3 565 2012-10-02 01:11:51 2003-09-15 11:14:22 6 1 525 0 47 238 9 190.70 56 95.09 CHANGED FA-YNoPYLFAIAFVllIGlLElloLIsGHhLSGALDAHLDHYDulSoGsluQALHYLNIGRVPALVVLCLLAGaFGLhGILIQHuuIMVWQuPLSNLllVPlSlllSVhAVHYouKIlAPWIPRDESSAlsEEEaIGuMAlITGHsAsuGsPCEGKlTDpFGQIHYLLLEPE.cGK.FcKGDKVLIVCR....LSATRYLAEpsPa ..............................................................FA-YNoPYLFAIuFVlh..IGl..LEhhuLIhGHh.......LS.......GA...L.........D....A....H..L........D...H.........Y.........D...u......l......o...o..G..........h...u...Q.....A......L.H....YLNIGRlPALVVL...CL...LA...GaF.G.LhG..ILlQH...u.s.lhl...WQ...uP...LSNL....h......l.....V....Pl.Sll..h..olhAVHYoGKllAPWl.PR..Dc..SS...A...loEEEaIGuM.AlI...TG.Hp...A...suGsP.CEGKlTDpFGQIHYLLL.E.PE..cG.K..F.pKGDKVLIlCR.....LSATRYLAEps.a........................................... 1 10 23 35 +7119 PF07291 MauE Methylamine utilisation protein MauE Moxon SJ anon Pfam-B_18306 (release 10.0) Family This family consists of several bacterial methylamine utilisation MauE proteins. Synthesis of enzymes involved in methylamine oxidation via methylamine dehydrogenase (MADH) is encoded by genes present in the mau cluster. MauE and MauD are specifically involved in the processing, transport, and/or maturation of the beta-subunit and that the absence of each of these proteins leads to production of a non-functional beta-subunit which becomes rapidly degraded [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.17 0.71 -4.66 5 457 2012-10-02 13:32:46 2003-09-15 11:20:18 6 7 374 0 184 485 112 150.00 21 71.32 CHANGED pslLs-PsVshhlRlFLALLLAAAAIPKLRHsEEFaGVVRNFRLLP-aLuRPVAhVLPllELulAVGLllPsLAslAAslAAALhlVFulAIAINVuRGRTpIDCGCFRNGhK.QRISWhhVhRNluLAuhALulAAlLPsAssuSlp-uATGLAAAuhLhLLYhuASLLuGLPAu+susclsKG ...................................................hhh..........hhhphhlu.hl.hlhuuhtK...lt...sh...t....s...F...t.p..t.l...t...s..Y.p...l..L..s....p...t.h..s...t....h.A....hhlPhhElhs.Gl...hLl.h.......u..h...h.........t...h.h..u...u..h..l.uh..h..lh..h..l.Fs..s.ul.....s.h.s.l.h.p..s..t.h.pl...c...CGC.....h........G.........s..s........st............l.u.....h.....h.........l.h..c.sh...h...h.h.hh...s....h....................................................................................................hhhh.................................................... 1 82 146 168 +7120 PF07292 NID Nmi/IFP 35 domain (NID) Vella Briffa B anon Pfam-B_17864 (release 10.0) Domain This family represents a domain of approximately 90 residues that is tandemly repeated within interferon-induced 35 kDa protein (IFP 35) and the homologous N-myc-interactor (Nmi). This domain mediates Nmi-Nmi protein interactions and subcellular localisation [1]. 25.00 25.00 25.10 25.90 24.90 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.97 0.72 -3.94 4 209 2009-01-15 18:05:59 2003-09-15 11:26:57 8 4 39 0 112 182 0 86.40 37 52.28 CHANGED ALITFEc.cVAppVlp.pcHpVphE-spL+VpspPl.LssssphQVpsplSph+l.VTGhPs..cLSEEphhDKLEl.FuKoRNGGG-V- ..............AlITFtcpt...VAppllphpcapltl.........p.psp.hpVplpPh..sphpchQl..psplSp..+p.lL..VoslPs..tlsE-pl.p.DhLElaFpKspN..GGGEVE.......... 0 4 13 43 +7121 PF07293 DUF1450 Protein of unknown function (DUF1450) Moxon SJ anon Pfam-B_18439 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 80 residues in length. Members of this family contain four highly conserved cysteine residues. The function of this family is unknown. 20.90 20.90 20.90 21.50 20.70 20.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.65 0.72 -4.00 14 826 2009-01-15 18:05:59 2003-09-15 11:43:57 6 1 437 0 89 264 0 73.70 40 85.85 CHANGED MpPllEFClSNLApGoptAhEtLE+DPNlDVlEYGCLoYCGhCupuhFALVNGEhVpG-oPE-LVcNIYpaIEENs...hF ...............................................p.h...hpcL.E..p.D..Ps.s..-llEhGC.oYCG.spcpsFAhVN..scsVtu.T.EELlpplhpplcc............... 0 29 56 73 +7122 PF07294 Fibroin_P25 Fibroin P25 Moxon SJ anon Pfam-B_18451 (release 10.0) Family This family consists of several insect fibroin P25 proteins. Silk fibroin produced by the silkworm Bombyx mori consists of a heavy chain, a light chain, and a glycoprotein, P25. The heavy and light chains are linked by a disulfide bond, and P25 associates with disulfide-linked heavy and light chains by non-covalent interactions. P25 is plays an important role in maintaining integrity of the complex [1]. 25.00 25.00 45.70 39.70 19.40 19.10 hmmbuild --amino -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.05 0.71 -4.83 5 11 2009-01-15 18:05:59 2003-09-15 11:48:57 6 1 8 0 5 8 0 184.30 46 86.22 CHANGED NIVRPCaLDDLKCIuDNLAANS+CpsNVRGpIPSpYsIPsF+FEsPFFNATYIDaNLIsRN+DpCRVSEFFFNl+ocsuVLolDCPNLsaESsRTllQHtSLpEDoVYSYaINGTYPLIRLTTNLssusclNLCSuaTFADVs.uLPIF+IDPND+pTANaLSRDLoLLNIYERETFaaRustLARaFINSLICDFGCp .pIhRPChLsDhcCIpDpLAANSpCpss.tGplPopYpIPhFpF-sPaFNATYlDaNLlsRNpDpC+VSEFahNh+ocpsVLolDCP.LsaESsRTlhQHtS.hpEDshhSaaIpGoYPLIRLTTsh.pusphsLCSuaTFADls.uLPIF+IsPpDp.TApaLS+DLoLLpIYEREphhh+ts.LhR.FlsphlCDFGCp................. 0 4 5 5 +7123 PF07295 DUF1451 Protein of unknown function (DUF1451) Moxon SJ anon Pfam-B_18524 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 160 residues in length. Members of this family contain four highly conserved cysteine resides toward the C-terminal region of the protein. The function of this family is unknown. 24.10 24.10 24.50 24.20 23.80 23.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.00 0.71 -4.42 31 784 2009-01-15 18:05:59 2003-09-15 11:51:52 6 4 781 0 106 305 19 140.40 57 90.58 CHANGED lhppl....p-s.pps.cpLpchlcpucchhptts-hTc-ElsLlspaL+RDLccatcphp-.......t.hpushhthlcpoLWptLupITD+TplEWtELhpDhcHpG..hYpoGEllGhGtLsCppCG+phphppsshlssCscCspptFpRpsh ...............................L.VASL....oE.RL+NGERDIDsLVEpARcRVhpsGELTRsEl-plocAVRRDLEEFAhSYEE................pppDSVFhRVIKESLWQELADITDKTQLEW....REVFQDLsHHG.....VYHSGEVV.GLGNLVCEKCHaHLsl.Y.T.P.-.V.LshCPKCGHDQFpRRP.F....... 0 23 49 77 +7124 PF07296 TraP TraP protein Moxon SJ anon Pfam-B_18635 (release 10.0) Family This family consists of several bacterial conjugative transfer TraP proteins from Escherichia coli and Salmonella typhimurium. TraP appears to play a minor role in conjugation and may interact with TraB, which varies in sequence along with TraP, in order to stabilise the proposed transmembrane complex formed by the tra operon products [1]. 25.00 25.00 52.10 53.30 21.50 20.70 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.34 0.71 -5.35 3 150 2009-01-15 18:05:59 2003-09-15 11:56:11 6 2 115 0 4 90 0 182.40 72 98.69 CHANGED MANNhlop.ts+AshYsVAuVLRWLaWsVKYAVIaPLATMALlVlLVFRTGsTTPGQpLVKEIspVRQpAPuG.FPVpDCssP....usoVsSPlPspLQEsCsh+ITDAA-YAA-IDQSLoQslhhLWuhLALlYTulAVlhG+pPVR+Ghl+p.tVVsuDosLss.thI.tcsEVhT.csshsGQI++s.....sD+pu+sEGDKNEHT .........MuNNhSuRQAsHAsRYVVApVLRhLFWCLKYsVILPLATMALMALFVLWKDNTTPGKLLVKEIsFVRQTAPAGQFPVuECh.............hsoSD......S...ss.u...plp.-IC+YRAADAADYVRETDRSLMQLVTALWATLALMYsSlAAhTGKYPV.RPGKMK.ClRVVTADE+L.........KEVYTEDASLPGKIRKC.VYhPDDRTNRNNGDKNEHA........................ 0 0 0 2 +7125 PF07297 DPM2 Dolichol phosphate-mannose biosynthesis regulatory protein (DPM2) Moxon SJ anon Pfam-B_18649 (release 10.0) Family This family consists of several eukaryotic dolichol phosphate-mannose biosynthesis regulatory (DPM2) proteins. Biosynthesis of glycosylphosphatidylinositol and N-glycan precursor is dependent upon a mannosyl donor, dolichol phosphate-mannose (DPM). DPM2, an 84 amino acid membrane protein expressed in the endoplasmic reticulum (ER), makes a complex with DPM1 that is essential for the ER localisation and stable expression of DPM1. Moreover, DPM2 enhances binding of dolichol phosphate, a substrate of DPM synthase. Biosynthesis of DPM in mammalian cells is regulated by DPM2 [1]. 25.00 25.00 28.20 28.00 21.00 20.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.87 0.72 -3.70 12 227 2009-09-13 06:50:04 2003-09-15 12:00:32 7 4 200 0 152 198 0 74.20 39 80.80 CHANGED tsD+lVGhuhlshShhlFsYYThWlllLPFlDssHslHpaFLPRsaAlhlPllhsllhhhhlGsFluhVMlKop+..KKu .........h..DphlGhhhlhsuhhlFhYYThWsl.lhPF..l....D..s...s..H.slppaF.P..RtaAlhIPlhhhllhhshVGsFluhVhl+spp..Kc..................... 0 36 75 120 +7126 PF07298 NnrU NnrU protein Moxon SJ anon Pfam-B_18406 (release 10.0) Family This family consists of several plant and bacterial NnrU proteins. NnrU is thought to be involved in the reduction of nitric oxide. The exact function of NnrU is unclear. It is thought however that NnrU and perhaps NnrT are required for expression of both nirK and nor [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.58 0.71 -4.92 102 551 2012-10-01 22:51:20 2003-09-15 12:54:25 6 2 458 0 223 592 305 193.30 25 85.69 CHANGED lLlhulhlFhusHhh.h.ssshRsthh.utlGctsa+uhaullSlsulslllhuattAchs....LWsss..shhttlsslLhhlAhlllsuuhh.sp..................IhthsRHPhLhuhtlWAlAHLLsNG-lssllLFGuhhsaAlhshhhhc+R........sts.hs.hhsssuhhs..................shhslshGlllahslhh.hHshL.hGlsP ........................................................................................................hllh.LhlFhs.tHSlhh..tsshR.thhh..thl..G....t..t..t...a+uhY.ullSlsu.lsl..l..lht..atttphs......lWss.......shhphls.hh.Lhh..lu.al..hlhsuhh.hpt.......................................lhphsRHPh.h.lG..h..h..l..W.....A.....h.....uH.hlss.Gs....h....hsl.lhhsuh.hhatlhshh..hcp+.........ttt..ht.......sshh.......................hhhhhhulhhhhhhhh..hH.hl..t............................................................. 0 59 136 172 +7127 PF07299 FBP Fibronectin-binding protein (FBP) Moxon SJ anon Pfam-B_18450 (release 10.0) Family This family consists of several bacterial fibronectin-binding proteins which are thought to be involved in virulence in Listeria species [1,2]. 22.70 22.70 22.80 23.30 21.70 22.60 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.37 0.70 -5.00 15 400 2009-01-15 18:05:59 2003-09-15 13:05:05 6 1 377 6 70 289 14 187.30 36 93.28 CHANGED FIcsaQYNaIKpplppLlsuasosNDcsslpsl+uhst-KIhslFsclssEpcpllcslhslp.s+pcA-+aLpcLp.YVlPFppsospplpKLF+KsKKLKlPshcphDh+phoYLGWNDtGop+KaIlhhp.csKLhGlpGslss..pspKGlCoICs...pco-VuLFhupsK.uss.sGTYs++GNYICtDSppCNcplTslspLccFlccl+ ................................hIps.QaNhIKpph..LhpsatosNDttslpslpuhs.-KI.tlF.p.t.............t...p.t.llsplhshp.sptcAEphL.plp.hVhPF.tsos.plpKlF...........KsKK.....LKl.P.s.hpphD.h+choYlGWpD.h.u.o.p.R.K.allshp..cs......+hlGl....p....Gohss.......ph....pKu...l....ColCp...sp...s...pVu..hFhuppK....usu....sG.s.asppGNYICpDu.tCNpphpphphLppFhc.lp................. 0 26 55 66 +7129 PF07301 DUF1453 Protein of unknown function (DUF1453) Moxon SJ anon Pfam-B_18607 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. Members of this family seem to be found exclusively in the Order Bacillales. 21.60 21.60 21.60 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.00 0.71 -4.34 20 613 2009-01-15 18:05:59 2003-09-15 13:44:42 6 1 432 0 59 304 1 143.10 50 92.91 CHANGED M........hslhSolhAlhMuhhlhhl.RhKAuc+Ps..osKKIILPPl.....F.MSTGALMFl..hPhFRlosh-llEAlhlGhl.FSlhLItTS+FElR.sscIYlKRSKAFsaILlGLLllRlshKhhluspIDhupLuGMFalLAFuMIVPWRIAMalpY+Kl ............h...hlhS.llAlhMushlhhl.RhKA..tp..pPl..stKK.IlLPP.h......F.MuTGA.LMal.......hP.hFRL.Ts.h..EhLEAhhlGLl.FShhLIhTS+FEl+..sscIYhKRSKAFshILIuLL.llRhshKhalS.....s..p..lD.GpLuGMFFLLAFsMIVPWRlAMhhpa+KL............................. 0 17 34 45 +7130 PF07302 AroM AroM protein Moxon SJ anon Pfam-B_18608 (release 10.0) Family This family consists of several bacterial and archaeal AroM proteins. In Escherichia coli the aroM gene is cotranscribed with aroL [1]. The function of this family is unknown. 23.10 23.10 23.20 29.60 22.90 22.70 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.31 0.70 -5.01 15 531 2012-10-03 04:39:14 2003-09-15 13:50:37 6 2 525 0 48 185 10 220.50 70 98.21 CHANGED +lAllTIGQuPRs-VlPplpcals-shclschGhLDuLScpEI.schAPpss-.csLlThLsDGppVplS+pKlpctLQpsIptL-ppGh-lIlLhCTGpFs.sLss+ps.LLEPp+IlssLVsullss.pplGllVPlt-Qhs..hptpKWphltpsshhssAoPah.uopsclhpAuppLttpGADllVLDClGYsppp+chlpcthslPVlLupsLlARl....suELls .........................................S.LAILTIGlVPMpEVLPLLTEYIDE.-.pIoHHSLLGK.....L...S...REEV..MAE...YAPEsGE.DslLT.LLNDN...QLAH....VSR+KVERDLQuVVEVLDNQGYDVIlLMSTAsIu.SMTARNoIhLEPsRILPPLVuSI..V-c..HQVGVIVPVEEhLs...sQAQKWQl.....L.Q.+sPVaSLuN.PlH....sS....E...Q....clIDAG+ELLscGADVIMLDCLGFHQ.RHRDlLQKpLDVPVLLSNVLIARLAAELL.V........... 0 6 18 33 +7131 PF07303 Occludin_ELL Occludin homology domain Vella Briffa B, Bateman A anon Pfam-B_18556 (release 10.0) Domain This domain represents a conserved region approximately 100 residues long within eukaryotic occludin proteins and the RNA polymerase II elongation factor ELL. Occludin is an integral membrane protein that localises to tight junctions [1], while ELL is an elongation factor that can increase the catalytic rate of RNA polymerase II transcription by suppressing transient pausing by polymerase at multiple sites along the DNA [2]. This shared domain is thought to mediate protein interactions [3]. 25.30 25.30 25.80 26.10 25.20 25.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.51 0.72 -3.28 18 504 2009-01-15 18:05:59 2003-09-15 14:23:59 8 8 105 3 247 398 4 98.70 38 19.03 CHANGED YPP.IsSscpRpcYKp-Fss-apEY+pLpAEl-slsc.+hspL-ppL.cpLsc...sSpcYpslt....cEYp+.lK..KpsssYppKKpRCcYL+sKLsHIKphls-YDc .................YssIpSt-QRppYKp-Fss-YsEY+pLpAclpslsc.+FpcL-spl.cpLsp.........uopEYp...............plp.plhpEYp+h+............KpsPsYppcKpRCcYL+sKLuHIKphI.t-YDp............... 0 29 55 137 +7132 PF07304 SRA1 Steroid receptor RNA activator (SRA1) Moxon SJ anon Pfam-B_18506 (release 10.0) Family This family consists of several hypothetical mammalian steroid receptor RNA activator proteins. SRA-RNAs likely to encode stable proteins are widely expressed in breast cancer cell lines. SRA-RNA is a steroid receptor co-activator which acts as a functional RNA and is classified as belonging to the growing family of functional non-coding RNAs. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.97 0.71 -4.50 4 239 2009-01-15 18:05:59 2003-09-15 14:28:43 6 16 207 1 163 255 0 157.30 24 18.02 CHANGED hsPPs.........PsSstsssPP.GpG...PsSup.......lpPssscP-....s-tshcsshhsLcpslpsspsShcpplssDIoRRLplLp-.WtGGKLShsV++RMshLspELpstpWDuADcIHhSLMVDHVsEVSQWMVGVKRLIAEsRpLs.c.ht..pputu............QsS .....................................................................................................................ssssss..............P........t..s...s..ss...s.t...s.............s....Ps.s.s..................................ht.s..s...st...s.p............spt....l....p..ll.p.s....L.pp....sh.p.......t........s....s....s.s.....h....pK...p........h.......c...Dsp+RLslLF-+.L.s.s.s.c.Lo.ps.s.h.cph.tpLspuLps+c.acsAppIHhslh.s..s..+s...s..EsupWhs..........GVKRLIsht...................ss.............. 0 41 81 130 +7133 PF07305 DUF1454 Protein of unknown function (DUF1454) Moxon SJ anon Pfam-B_18833 (release 10.0) Family This family consists of several Enterobacterial sequences of around 200 residues in length which are often known as YiiQ proteins. The function of this family is unknown. 25.60 25.60 25.70 75.50 24.30 25.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.31 0.71 -5.22 4 549 2009-09-10 15:53:58 2003-09-15 15:29:10 7 1 541 0 42 161 0 197.90 76 99.52 CHANGED M...K.hshLahhhhhuLslsssA.cApT.s-T.TssAPYLLsGAPoFD.SISQFRE+FNusNPoLPLNEFRuIsopsD+sNLTRAASKINENLYASTALERGTLKIKSlQhTWLPIQG.PEQKAA+sKA.EYMuAllRsFsPphSpsQSpp+LppLLssGKsKRYaocsEGAlRYlVADsGEKGLTFAlEPIKLALSEoLEG.NK ..............................MKPGCTLFhLLsSAL..Tlo...s.T.A...H.AQ..o....s-o.sTTAPYLLAGAPTFDLSISQFREcFNsQNPoLPLNEFRAIDSSsDKAN.LTRAASKINENLYASTALERGTLKIKSlQhTWLP...IQG....PE.Q...............KA............A........KAKA.EY......MAAlIRTlsPLhTKs...QS...QK...KLQsLLTA.GKsKRYYsET.EGAlRYVVADNGEKGLTFAVEPIKLALSEoLEGhN........ 0 1 10 25 +7134 PF07306 DUF1455 Protein of unknown function (DUF1455) Moxon SJ anon Pfam-B_19038 (release 10.0) Family This family consists of several hypothetical putative outer membrane proteins which appear to be specific to Anaplasma marginale and Anaplasma ovis. 25.00 25.00 241.90 241.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.57 0.71 -4.09 2 8 2009-01-15 18:05:59 2003-09-15 15:31:47 6 1 4 0 1 7 0 130.00 89 99.62 CHANGED MSCRAF.L+GlLhFsLLPGSLAT.ARP.LLtVsstsuhtQTsGGG..hsttASsThG+LssAsAsSAspS.tl.uSsVhEsGh.sphhSPAQ..ELhusALQPS.TPSSWVFGRTAhSGVRuFLERTVFLVF MSCRAFSLKGLLAFTLLPGSLATAARPSLLRVGGEASGQQTSGGGFHAAGGASATRGRLTSASAVSAPQSFGVLGSTVWEDGFLPSVFSPAQ..ELLSAALQPSPTPSSWVFGRTAISGVRGFLERTVFLVF. 0 0 0 1 +7135 PF07307 HEPPP_synt_1 Heptaprenyl diphosphate synthase (HEPPP synthase) subunit 1 Vella Briffa B anon Pfam-B_18592 (release 10.0) Family This family contains subunit 1 of bacterial heptaprenyl diphosphate synthase (HEPPP synthase) (EC:2.5.1.30) (approximately 230 residues long). The enzyme consists of two subunits, both of which are required for catalysis of heptaprenyl diphosphate synthesis [1]. 29.40 29.40 29.70 29.40 29.30 29.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.18 0.70 -4.88 15 387 2009-09-13 14:07:00 2003-09-15 15:36:15 6 2 383 0 41 188 0 175.40 39 77.92 CHANGED HsYLtcaIstPhlDEDKLhLLashhs-us..lpppct-cYllTsMLVQlALDTH-cVos.tst.sspspKsRQLTVLAGDYYSGLYYpLLScscDIslIRsLApuI+EINEpKIpLYp+pspsl-plhpSVspIESALlp+lu-+Fthsp.WpphsscaLlh+RL.pEpcha.pttsS.lhcsltph....cstsshcslhp-thcclpcthpphlcp .............................................................................................................................olLhGDhhSuhaYpLLAEhsDlsh.ptlucAIhEINEhK.sL.a....pp.A.h...s...s...h...E.I.pulVpIEohh.hhT...h...sHFtl...................................................................................s........................................................... 0 13 27 33 +7136 PF07308 DUF1456 Protein of unknown function (DUF1456) Moxon SJ, Yeats C anon Yeats C Domain This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. 21.70 21.70 22.00 21.80 21.20 21.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.99 0.72 -3.94 117 1896 2009-01-15 18:05:59 2003-09-15 15:37:37 8 2 957 0 262 965 68 68.40 41 85.24 CHANGED NNslL++LRhALslpcschlclhthus..hplo+s-luuhh..+K.........c-cc......sYp.pCsDphLpsFLsGLhhppRG ....NN.lL++lRhAhslps..s..DllcILshss..hclott-lsuhhRK.................c-cc...............sap.cCsDphLptFLpGLhhcpRG........... 0 60 125 200 +7137 PF07309 FlaF Flagellar protein FlaF Moxon SJ anon Pfam-B_19331 (release 10.0) Family This family consists of several bacterial FlaF flagellar proteins. FlaF and FlaG are trans-acting, regulatory factors that modulate flagellin synthesis during flagellum biogenesis [1]. 20.60 20.60 21.40 21.60 20.40 19.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.48 0.71 -4.21 50 272 2009-01-15 18:05:59 2003-09-15 15:41:24 6 2 230 0 103 219 28 112.40 32 93.77 CHANGED M........stpAYupstp.sstosRphEtpsLs+ssppLppup.....spsstshpth......cALhhNR+LWohhtsDlups-NsLPpcLRAsllsluhFlh+copclh..ttp.s..slpsLI-INpsIhsGL ...................ht.tYtcshp..sstss+-pEttlhs+uhshLpsAp......tpsspshpsl......-Alhasp+LWohlhsDLs.ss-NsLPp-LRAsllSlulaVh+cspclh..psps..pshpsLI-IspsItcGL................... 1 30 63 77 +7138 PF07310 PAS_5 DUF1457; PAS domain Vella Briffa B anon Pfam-B_18761 (release 10.0) Domain This family contains a number of hypothetical bacterial proteins of unknown function approximately 200 residues long. This region is is distantly similar to other PAS domains. 20.60 20.60 20.60 21.40 20.50 20.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.50 0.71 -4.62 10 302 2012-10-04 01:10:46 2003-09-15 15:59:54 8 1 218 0 104 274 26 135.90 26 68.99 CHANGED M+psSotplFuYWsclRsucs....sP+RuDI-Pucl+sLLuDsFlLps-usGshsFRLAGTRLCsLhGc-L+spsFsuLas.ssRpclscllssVhccsssslsslsutspsGss.lchElLLLPLpscsssssphLGlLs .............................................shhtlhsYWpp.l.pt..s.cs..........hPtRps.lDPt.cl.t.p...hLssl..F...lL..E......p..p.....s.t.....u..p...h.ph.RL....A..GT+lssla....Gp.-l+...Ghph.ss...l..astp..s.........p.t..t.l.t..chlps.Vhppt.ssshhths......u.h....shsu....pt..lph-h...lLLP.Lt.sssst..phlGsh.s............................................................. 0 44 69 80 +7139 PF07311 Dodecin DUF1458; Dodecin Moxon SJ, Anantharaman V anon Pfam-B_18876 (release 10.0) Family Dodecin is a flavin-binding protein [1],found in several bacteria and few archaea and represents a stand-alone version of the SHS2 domain [2]. It most closely resembles the SHS2 domains of FtsA and Rpb7p, and represents a single domain small-molecule binding form[1]. 21.00 21.00 21.10 27.40 20.00 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.73 0.72 -3.94 80 611 2012-10-02 11:08:51 2003-09-15 16:03:34 7 2 529 137 255 516 39 65.40 41 90.96 CHANGED +lYKhlEllGoSspSh--AlpsAlscAu+Tl+slcWhEVtEh+uclcs.G+VscaQVslKVuFcl-s ......plYKllEllGoSspSh--AIpsAls+Au.....cTl+slcWFEVs-hRGclcs.GclsaaQVslKVGF+l-........ 0 69 170 227 +7140 PF07312 DUF1459 Protein of unknown function (DUF1459) Moxon SJ anon Pfam-B_18877 (release 10.0) Family This family consists of several hypothetical Caenorhabditis elegans proteins of around 85 residues in length. The function of this family is unknown. 20.80 20.80 21.00 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.06 0.72 -4.06 2 33 2009-01-15 18:05:59 2003-09-15 16:05:31 6 1 5 0 33 26 0 81.00 47 98.45 CHANGED MFQKohIVhhhALFCISSsQVlYoPEVVuSPYYYuuuPVA.SAYPYAYAYGAsAYPTAaYGWGSNKGQQA.uSA.PTQKLTNNQ .........MFpKshh.sl..hls.h.F.sl.uSsQllao.PphVsu..PYYYAu.ussu..AYP.sYu..Y.u..AAAYPos.auWGSNKsp.p..u..s.u.sA...PTpp..LsNN.................. 0 8 16 33 +7141 PF07313 DUF1460 Protein of unknown function (DUF1460) Moxon SJ anon Pfam-B_18925 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 260 residues in length. The function of this family is unknown. 21.30 21.30 21.40 21.40 21.10 20.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.34 0.70 -5.14 25 430 2012-10-10 12:56:15 2003-09-15 16:07:39 7 5 420 3 68 330 60 209.60 41 72.81 CHANGED tlSptFLGTPYtAspLhGussss..EpLVlcFcGlDCFTalDYVtALp+uss......pssFlcsLhchRYtsGcl.sFhsR+HFFo.DWstssspt..scDlTsplSs...........thlohsKpLNp............Ks-GspalsGLslhcRsl.........sYIPustl........sppVlspL+oGDaIGIYoph........sGLDVTHsGlhltsssu....shhRNASStpss.pVVDsPFh-YlpspP...GI .....................................................................p.lSptFLGTPYpA....sTLhtsssts..EtLVlNFsGlDCFTalDYV.ALupuss...........ppsFhcsLtphRYts.Gcl...uYhsR+HFFo.DWhsssspp..AcDlT.s...plSs..............phlshsKpLNp..................................................................K.s-GuEalsuLulh.Rpl.............sYIPu..ctI.........................spp..V.h..sp..L+sGDhIGlYosh.................................................sGL.DV...oHlGIs..lp.ccsp.........lhhRNASSlttp...+.VV..D..pP..Fh.-Yh+spP.....GIl....................... 0 15 36 58 +7142 PF07314 DUF1461 Protein of unknown function (DUF1461) Vella Briffa B anon Pfam-B_18854 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 200 residues long. These are possibly integral membrane proteins. 24.00 24.00 24.20 24.70 23.60 23.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.36 0.71 -4.54 40 875 2009-01-15 18:05:59 2003-09-15 16:09:30 6 1 849 0 104 563 21 180.00 35 82.83 CHANGED h+.......p.h....hslhhhlhlluhulhhslhht..h.hY.hplchhplschss............hshsplhpNaspLhsYLssshppt.Lphssh.sSssG..........hhHFt-VKpLFhhshhlh.lhsllhhhhhhhhhhpcpphhhhh.pshhl..hh......llPlllhhhh.hlsF-paFshFHplhF.sNshWlFDPspDPlIphLPEpFF ............................................................................................hhht........hhhhs.hlhll.ululhlTIhht..a.h.Y.h-IpaLsls.p..h.l.h..........................ls.ps.lhpNaph.L..hsYLssPapph.LphPcF..sSssG..........................ltHFt-VKpLFhls.hVh...l.l...sl...sh..h..ha..l..p..hl..hK...+.ph.....l...th.hh......+.....shhh..hh.............llPl..h.lulhh..hlsF-pFFslFHpllF.ssDsWLFDPspDPlIhlLPEpFF............ 0 37 66 84 +7143 PF07315 DUF1462 Protein of unknown function (DUF1462) Moxon SJ anon Pfam-B_19094 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 100 residues in length. The function of this family is unknown. 25.00 25.00 32.90 109.50 24.00 23.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.14 0.72 -4.02 18 399 2009-01-15 18:05:59 2003-09-15 16:17:21 6 1 399 1 35 144 0 94.40 59 87.33 CHANGED sVYGA-slCASCVNhPSSK-TaEWLpAALpRKYPsp..sFchpYIDIp.pPs-s..-cpp.caup+Ih-DEhFYPLVllsDElVuEGNPpLKsIapphE .hVYGA-VICASCVNAPoSK-TY-WLQshLtRKYPs...sFpasYIDIpc-s-s..-.cchpFhERI.pDELFYPLlshNDEhVA-G..phKpIhchI-. 0 9 20 29 +7144 PF07316 DUF1463 Protein of unknown function (DUF1463) Moxon SJ anon Pfam-B_19113 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 140 residues in length. Members of this family seem to be found exclusively in Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 25.00 25.00 25.50 57.20 18.90 17.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -4.59 2 115 2009-09-10 14:48:10 2003-09-15 16:19:18 6 1 27 0 9 71 0 135.40 73 98.69 CHANGED hpaYsLc.laFShsss.lcoGpLEhooEPsshAhhSoED+shPl.ShRDP+TlsalFslEVohGShDYhLLTcLSsEQFYp.sV.KpcKhhcLsFNDphuhKIISN.AhFsE.PoRpYSA-s-pVpFpI+AINCphpKss .....MQFYDLREVYFSIGG.sQLHSGKLELTSEPTTRAVlSoEDKGhPVISLRDPKTITYlFNIEVTLGSaDYILLTELSDEQFYNMDVpKpDKMLDLsFNDRIATKIISNYAIFTEEPSRSYSAEAEKVoFEIRAINCQKoKPN..... 0 5 5 5 +7145 PF07317 YcgR Flagellar regulator YcgR Moxon SJ anon Pfam-B_19142 (release 10.0) Domain This domain is found N terminal to Pfam:PF07238. Proteins which contain YcgR domains are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias [3]. 21.10 21.10 21.30 21.10 20.80 20.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.02 0.72 -4.30 7 656 2009-01-15 18:05:59 2003-09-15 16:24:08 7 2 640 2 95 297 19 105.70 50 43.27 CHANGED pFlhps.htIsshLR-LpKpps.lplp.tspGp..hlS+ILslsPppt.hlhDaGutEp-NptsLputplshlspspGsKlEFsssplppscapshPAFpstlPppLahl ..........QFLKpNPLAlLGVLRDLpKsslPLRlS.Ws.sGQ...hISKILslsP-K..Lll...DaGSQsc-Nh..AVL+Apc.lsIsA.ETQGAKVEFTl.p.Q.L.pp.u.EY.p.LPAFIT.s.PsoLWFV........... 0 9 42 68 +7146 PF07318 DUF1464 Protein of unknown function (DUF1464) Moxon SJ anon Pfam-B_19143 (release 10.0) Family This family consists of several hypothetical archaeal proteins of around 350 residues in length. The function of this family is unknown. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.07 0.70 -5.63 11 65 2012-10-02 23:34:14 2003-09-15 16:32:55 7 1 62 0 43 226 2 317.30 32 93.36 CHANGED GIDPGTsSaDlsul--..GpVhhctslPTspVtcsPthllchlp-s.....ss-llAuPSGYGLPlh+sp-ls-.....c-IhLhTLsssuctu.....hG......LRshlp.h.uu+sl....ssahlPGVIHLsoVPsaRKlN+IDhGTADKlAosshulhp.sc.....Ycsh....sFILVElGtuFoAslAVpsGpIVDGhGGTh.hsG..ahuuGhhDGElAYLhu....plsKphlFpGGht.hss.......................sapthhE.lhKslsshhuoh.csc.....IIlSGRhtphs-htcclct+htch...........h..t..t.sKEuApGuAlIAsuluGGha+cll-hLtl.cSuGTslDal+L ..............................................................................................................................................................GlDPGTc..Shslhhl--..Gplhh..h.......plsop.V....t.cs..shh.l......lchlpch...............ps-hlshPSGaGl.P..ltp.h.pclsc.......c-lh.l..hTlhcs.tphs........hG......Lpchlp.h..tppph....s..sahI.PuVI..c.LsoVP.t...aRKhNpID...hGTA.DKlAs..sshuh.t..pthtl.apps......sFIllEhGhsasuslsVcsG+IVD...G...h..G...G.T.h......h....s......G...h...h...s.G......s..l...D....u....E...l..........A.Yhl...s..............chs..K...ppl......F.....p.....u.....G.h..hst.................t.............t...t..........shcthhE.ll+tVs...sh.h.s.s.h...chp.....Il.l.SG+h.t..p.....-htcchcpphtth..........h............sKE.AA..GuAhIAsulsGGha+chlphl...cupGosl-alp.............................................................................................................................. 1 13 21 34 +7147 PF07319 DnaI_N Primosomal protein DnaI N-terminus Vella Briffa B anon Pfam-B_18931 (release 10.0) Family This family represents the N-terminus (approximately 120 residues) of bacterial primosomal DnaI proteins, although one family member appears to be of viral origin. DnaI is one of the components of the Bacillus subtilis replication restart primosome, and is required for the DnaB75-dependent loading of the DnaC helicase [1]. 20.20 20.20 20.30 21.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.23 0.72 -3.74 37 1017 2009-01-15 18:05:59 2003-09-15 16:51:55 6 3 1013 1 99 468 1 91.90 35 30.28 CHANGED MEslscslpc.hhpppshppchpplhppllsDs-VpuFlpp+ptcLopchIp+uhsKLaEalppppchpt....tsssslhpGYpPpLslspthIDVsY ........................MEsltphlpc...ps..p.c.....a.....t.....pchpc.l.h.pclhpDPDVpsFlppct.cLTsp.Ip+SlsKl.EYlsp+c+att....sDss.lscGYpPpLslspshlDlpY............... 0 22 49 74 +7149 PF07321 YscO Type III secretion protein YscO Vella Briffa B anon Pfam-B_19036 (release 10.0) Family This family contains the bacterial type III secretion protein YscO, which is approximately 150 residues long. YscO has been shown to be required for high-level expression and secretion of the anti-host proteins V antigen and Yops in Yersinia pestis [1]. 27.50 27.50 29.10 28.80 27.40 27.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.92 0.71 -4.39 12 104 2009-09-13 06:09:03 2003-09-15 17:16:14 7 1 98 0 19 64 2 149.40 38 69.50 CHANGED M....lppLhcIKphRt-cApptltpQp.tlssA+tcpppAppshpDa+.WRhpEEpRLauphptphlth+-l-chp....ppluhLR-ppApLtpplscttpplctEpptLppppptlppsp+ppEKhsELtcppps-ttt.pphpEEhE.EEFtp.......ph .....M.lcpLhclKplRh-RAE+AlppQphplpsAttcppcApps.pDY+.WRhcEEpRLFsptpspslsp+-LEpap....ppluhLRE+EApLEpcsAchtcpLcpERccLppspctlppA++pppKFhELtcppps-ptsppchpEEtEtEEFhphp....... 0 7 9 14 +7150 PF07322 Seadorna_Vp10 Seadornavirus Vp10 Moxon SJ anon Pfam-B_18930 (release 10.0) Family This family consists of several Seadornavirus Vp10 proteins found in the Banna and Kadipiro viruses. Members of this family are typically around 240 residues in length. The function of this family is unknown. 25.00 25.00 42.20 41.80 20.90 20.60 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.74 0.70 -5.11 2 12 2009-09-21 14:23:59 2003-09-16 09:38:08 6 1 4 0 0 12 0 228.30 59 92.13 CHANGED MDVLSKuSLKELLAHLE+TPLEEAlSY+IGTlPYQNVLIsRNEYYNQ.YPDsTSLIDGVuREGQRNVNGLIMSIISYVVSGSGHYIPNIGahLLRRSILDILTKHDTGLsTNNlNYslIARNLTVSKMNCEQRKRMLICFKLLAYKDGN.NDYEhYLNQNIsLKQIAPNFIPGDMRTVhpNpDpLuIVGIPAYRLTQSTELSIRDDNAKSYKlGYVDWYNSNSFLRERs-FNLIpLKDRD. ......LSKSSLKELLAHLE+TPLEEAISYKIG.TlPYQNVLISRsEaYNQLYPDsTSLIDGVuREGpRNl.GLIMSIISYVVSGSGHYIPNlGhhLLRRSILDlLT++DTGLsTNNlNYslIARsLTVsKMNCEQRKRMLICFKLLAYKDGN.sDY-sYLNQNloLKQIAPsFIPsDMRTVlSNsDpLSIVGIPsYRLTQSTELSIRDDNAKSYKlGYVDWYNSNuFLRERN-FNLhpLKDRs.h...................... 0 0 0 0 +7151 PF07323 DUF1465 Protein of unknown function (DUF1465) Moxon SJ anon Pfam-B_19346 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 25.00 25.00 28.70 72.80 22.20 20.50 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.81 0.71 -4.66 26 190 2009-01-15 18:05:59 2003-09-16 09:55:55 7 1 189 2 72 161 18 157.20 49 90.36 CHANGED lphucphstSphFpsLapEGMsLVEETAuYLD.....GtGRspu+sLsRpu....ulsYAsESMRLTTRLMQlASWLLlpRAspcGEMotpQsppEKs+lphssss.....sssshscLP.thpcLltRSpcLptRltRLDcplhst.ssssttt......sPVssQlshLcs.....AF .......h.phuc+hshSssFcslYtEGMsLVEEsAuYLD.........GcGRp-A+sLs.RsA............uhhYAuESMRLTTRLMQlASWLLLQRAs+pGEMTcsQssuEKs+V+Lcsss..hspsusuas-LP.sht-LlcRShRLQsRVp+lDcclast.s.shpts.....tNPVstQlshL+sAF......... 0 20 42 52 +7152 PF07324 DGCR6 DiGeorge syndrome critical region 6 (DGCR6) protein Vella Briffa B anon Pfam-B_19101 (release 10.0) Family This family contains DiGeorge syndrome critical region 6 (DGCR6) proteins (approximately 200 residues long) of a number of vertebrates. DGCR6 is a candidate for involvement in the DiGeorge syndrome pathology by playing a role in neural crest cell migration into the third and fourth pharyngeal pouches, the structures from which derive the organs affected in DiGeorge syndrome [1]. Also found in this family is the Drosophila melanogaster gonadal protein gdl. 23.30 23.30 23.90 24.90 23.20 23.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.26 0.71 -4.83 8 130 2009-01-15 18:05:59 2003-09-16 10:42:51 6 3 87 0 75 129 1 160.30 45 79.11 CHANGED Mpca.usspt.........hs-pspQ.......QERHYaLLS-LQsLVK-LPSuaQQRlSYosLoDLAhALLDGTVFEIVQGLLEIQHLTEKNLYNQRhKLcsEH+sL+Q-Lt+KHK-ApQ..sC+sHNLulLKssQp+EhEulEpRl+-EQphMDcKIVLELDQKVhDQQSTLEKAGVPGFYlTsNPQElplQMNLLELIhKLQQhp..ssK ..................................................................................Qp+hY.Llp.tLpphhpcL.PpphQpRl.uYshLosLA.sLlss.olF-IVpuLhElQHlTE+pLhppRhplpscap..........h.tpth.hpKhp-spp......t..sH..tLsllpttpp+chc...t...hp.phc-E.p.hDpKIllELDp..KVsDQQSTLEKAGVsGFYVTsNPpElplQMpLL-hI.h+Lppht.................... 0 26 32 53 +7153 PF07325 Curto_V2 Curtovirus V2 protein Moxon SJ anon Pfam-B_19350 (release 10.0) Family This family consists of several Curtovirus V2 proteins. The exact function of V2 is unclear but it is known that the protein is required for a successful host infection process [1]. 25.00 25.00 147.90 147.80 21.90 20.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.48 2 21 2009-01-15 18:05:59 2003-09-16 10:53:44 6 1 17 \N 0 27 0 122.70 76 100.00 CHANGED MGPFpVsQFPcNYPAhLAVSTSCFhRYNKWCILGI+pElEuLTLEEGEsFLtFQKEVKKLL+hKssFpRKCpLYEcIYKKYl.ssPEcKGp.spshsEEEED.ataEcIPMEEsCspcpssElcDV MGPFRVDQFPDNYPAFLAVSTSCFLRYN+WCILGIHQEI.EsLTLEEGEVFLQFQKEVKKLLRhKVNF+RKCuLYEEIYKKYVtNVsEKKGE.SSKCVA.EEEE.DhY-..aEEIPMEEsCsKcQc.EVcDV.. 0 0 0 0 +7154 PF07326 DUF1466 Protein of unknown function (DUF1466) Moxon SJ anon Pfam-B_19433 (release 10.0) Family This family consists of several hypothetical mammalian proteins of around 240 residues in length. 25.00 25.00 31.50 31.00 18.80 18.70 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.57 0.70 -5.06 2 48 2009-01-15 18:05:59 2003-09-16 11:36:18 6 2 29 0 22 46 0 208.10 54 95.22 CHANGED MASpWQsMtTSVRRRSL.+pEQLEcpc.hpsssuH.ET..GsLGSLCRQFQRRLPLRAVsLsLtsGPSWKRLEoPEPtQQGL..AARSAKSALGAhSQRIQESCQuGTKWLhETQVKsRR.KRGAQKspGSPs.SLSQKsTRLs.....+ustDsh.ttHhRLSspMGsHsH.hpR.RREAAhRSPhSSTEPLCSPSESDSDLEPsGAGIQHLQKLSQcLDcAIhAEEptph.....hp ................................MASpWQuhtsSV..+RRSLpcpEQLE-pcthpPs..suH.ETSsGALGSLCRQFQRRLPLRAVsLN.LssGPSWKRLEoPEPtQQGLQAAARSAKsALGAhSQRIQESCQ.SGTKWLVE...T..QVK..ARR...R.+R..GAQKsuuoPs+S....L........Sp+...ST..RLou..s..sss...p...ust.....s.....shppp......t+pLSs....hG.scApP..hRRSRR.-AAh.RSPYSS....oEPLC......SP.p...ESDSDLEPVGuGIQ+LQKLSQcLD-AIhsEEptph.s............................. 0 1 2 6 +7155 PF07327 Neuroparsin Neuroparsin Moxon SJ anon Pfam-B_19487 (release 10.0) Family This family consists of several locust specific neuroparsin proteins. Neuroparsins are produced by the A1 type of protocerebral median neurosecretory cells of the PI-CC system and display pleiotropic activities: inhibition of the effect of juvenile hormone, stimulation of fluid reabsorption of isolated recta, induction of an increase in hemolymph lipid and trehalose levels, and neurotrophic effects [1]. 21.40 21.40 22.40 26.20 20.50 20.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.59 0.72 -3.79 4 14 2009-09-11 14:17:53 2003-09-16 11:54:31 6 1 11 0 5 23 0 102.50 37 87.08 CHANGED MKPAAALAAATLLIAVILFHRAEANPISRSCEGANCVVDLTRCEYGEVTDFFGRKVCAKGPG-+Css....atpCGsGhcCpsthCoGCSl+TLQCa.h-uhs.Spp .........................................hhhlhhhlhLh.pps.tt..p....Rpsc..sstCs...s...Dh.s+CcYG..Vp.DhCGpcsCAKGPG-+CGG.....phaGhCG-GLhC.sCs..+CsGCSlcolpCa........................... 0 2 3 5 +7156 PF07328 VirD1 T-DNA border endonuclease VirD1 Moxon SJ anon Pfam-B_19558 (release 10.0) Family This family consists of several T-DNA border endonuclease VirD1 proteins which appear to be found exclusively in Agrobacterium species. Agrobacterium, a plant pathogen, is capable to stably transform the plant cell with a segment of its own DNA called T-DNA (transferred DNA). This process depends, among others, on the specialised bacterial virulence proteins VirD1 and VirD2 that excise the T-DNA from its adjacent sequences. VirD1 is thought to interact with VirD2 in this process [1]. 20.30 20.30 20.90 48.20 19.70 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -4.40 2 14 2009-01-15 18:05:59 2003-09-16 11:58:56 6 1 8 0 5 13 1 144.40 61 98.06 CHANGED MSptoRsTSS-hAlNQ+cslpVEGFKVVSsRLRSAEYEoFSaQARLLGLSDSMAIRVAVRRIGGFLEIDAcTRc+MEAILQSIGhLSSNluhLLSAYAEsPp.DLEAlRsERIAFGcuFAsLDGLLRSILSVSRRRIDGCSLLKsAL ........MSpts+sTSSDh.lsp+cuspl...EGFKVVSsRLRSAEYEoFScQARLLGLSDSMAIRVAVRRIGGFLEIDA-TRpcMEAILpSIGsLSoNIusLLsAYAEsPpsDLEAlpAERhAFGcuFAcLDGLLRSILSVSRRRIDGCShL+-AL.. 1 1 4 4 +7158 PF07330 DUF1467 Protein of unknown function (DUF1467) Moxon SJ anon Pfam-B_19588 (release 10.0) Family This family consists of several bacterial proteins of around 90 residues in length. The function of this family is unknown. 23.00 23.00 23.40 33.30 22.40 22.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.86 0.72 -4.20 55 237 2009-09-11 13:34:12 2003-09-16 12:52:07 7 1 236 0 85 195 673 83.90 36 91.74 CHANGED MulsoulslYhllWahshFslLPhtl.+oQsEs.ucll.....sGTcsGAPsphpht+KslhTTllusllaulhhhlhhsGhlolccls....phh .....MslhoulAlYhllWWhsLFslLPhGl.RTQsEp.s-ls......GTssuAPsps+ltRthlhTTllusllaslhhhlhhsuhlslccls...h......... 0 22 51 62 +7159 PF07331 TctB DUF1468; Tripartite tricarboxylate transporter TctB family Moxon SJ anon Pfam-B_19347 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. This family was formerly known as DUF1468. 27.00 27.00 27.00 27.20 26.90 26.70 hmmbuild --amino -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.82 0.71 -4.53 199 1745 2009-10-20 16:26:54 2003-09-16 13:02:42 6 4 965 0 571 1522 1240 141.90 21 86.17 CHANGED tt-hhsullh..hslGhhhhhtu.hshshssst.....thGPuhFPhhluhlLhllGhhlhlpuhht.............................ttstthhsths........h+........slhhllsul..lhaslll....................p..........slGhllush............lhh.hhsshshs.........pt....phhtsl...llul......slsshs..ahlF...shhLslsL.P ........................................................................t..phhhuh.l.h...lhlu.hh.hh.htu....hph....ththsh........shG.Pt.h.aPh.hluslh.hl.h....ul.h.ll.lp.shh............................................tpssshhtp.hs...................h.p..............p.l.hhhl...shh......lhas.h.h.h....................p.........hlG..Fhluos................lhh.hsh.h.h.h.hu......................tp..........ph.hhsh.....lhuh....shslhh...ahlF...sthLslsLP........... 0 158 355 476 +7160 PF07332 DUF1469 Protein of unknown function (DUF1469) Moxon SJ, Finn RD, Sammut SJ, Bateman A anon Pfam-B_19352 (release 10.0) & COG5393 Domain This family consists of several hypothetical bacterial proteins of around 140 residues in length. The function of this family is unknown. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.46 0.71 -4.38 97 1760 2009-01-15 18:05:59 2003-09-16 13:06:22 6 4 1530 0 487 1143 145 115.40 24 80.40 CHANGED lspLlschssplspLlcsElcLA+sElpccsppsutusuhlsuAullshhulhhLhhslshuL..shh...........s.h...hAhLl....VuslhhllAullshhGhpclc...tshsPpcThcplccDhphlcpp .................................h....hpclss.hsphlcsclcLAtsE...lpc...cttp.hhp...hlhhhuhshlhuhhuLhs....L.hhhl.h.h.ul..phh..h....................sAhlh.ssslhlll.Ahlhulhshpp.h+...psph.ppThcpltpDhphlct.t........................................... 0 139 307 413 +7161 PF07333 SLR1-BP S locus-related glycoprotein 1 binding pollen coat protein (SLR1-BP) Moxon SJ anon Pfam-B_19392 (release 10.0) Family This family consists of a number of cysteine rich SLR1 binding pollen coat like proteins. Adhesion of pollen grains to the stigmatic surface is a critical step during sexual reproduction in plants. In Brassica, S locus-related glycoprotein 1 (SLR1), a stigma-specific protein belonging to the S gene family of proteins, has been shown to be involved in this step. SLR1-BP specifically binds SLR1 with high affinity. The SLR1-BP gene is specifically expressed in pollen at late stages of development and is a member of the class A pollen coat protein (PCP) family, which includes PCP-A1, an SLG (S locus glycoprotein)-binding protein [1]. 22.70 22.70 22.80 22.90 22.60 22.60 hmmbuild --amino -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.59 0.72 -3.74 37 134 2012-10-01 23:31:40 2003-09-16 13:35:05 7 1 17 0 112 138 0 55.50 27 69.59 CHANGED sppppupp.....Cpphl.........sspCssspCpshChpp....h+Gs...GpChs........ttphtChCtY.C ........t.....ttth....Cpphl.........sssCs..hspCpstCtpc....apGs...GpChs...............ttthpChCpY.C....... 0 43 57 62 +7162 PF07334 IFP_35_N Interferon-induced 35 kDa protein (IFP 35) N-terminus Vella Briffa B anon Pfam-B_17864 (release 10.0) Family This family represents the N-terminus of interferon-induced 35 kDa protein (IFP 35) (approximately 80 residues long), which contains a leucine zipper motif in an alpha helical configuration [1]. This family also includes N-myc-interactor (Nmi), a homologous interferon-induced protein. 22.90 22.90 23.00 25.10 22.80 22.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.45 0.72 -4.01 6 78 2009-01-15 18:05:59 2003-09-16 13:47:53 8 3 29 0 36 73 0 71.60 41 25.89 CHANGED LltEIpcENhpLKcEIQKLEsELQpssRE.QI+EDlPcsKlKFoSsEsP....EsssQhuslSpShQss.KlsYELQKGQ .........lptlpcEphpLKpclQ........ELQp....tp+...-..l+c......DlP..csKlpFsssEsP....tpspQhpslupShpsss+lsY.LpcGp..... 0 2 3 9 +7163 PF07335 Glyco_hydro_75 Chitosanase; Fungal chitosanase of glycosyl hydrolase group 75 Moxon SJ anon Pfam-B_19431 (release 10.0) Family This family consists of several fungal chitosanase proteins. Chitin, xylan, 6-O-sulphated chitosan and O-carboxymethyl chitin are indigestible by chitosanase [1]. EC:3.2.1.132. The mechanism is likely to be inverting, and the probable catalytic neutrophile base is Asp, with the probable catalytic proton donor being Glu. (see the Chitosanase web-page from CAZY). 21.00 21.00 29.00 26.50 20.80 20.50 hmmbuild --amino -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.23 0.71 -4.34 43 185 2009-01-15 18:05:59 2003-09-16 13:51:45 6 12 121 0 107 196 0 160.70 34 51.35 CHANGED MDlDCDGss........................hpC................p..sss.shQspTuFp...............................................h....shpsLsApthPYlVls................sshas.pspG..lpstslsAVls..ss+lhYGlhGDo..........sus....shlGEASlulAcsh..sp.......shsGssG..psss...DVh.......................Y.................IsFsGsc..ss.Pstth....spshpshppsl..pshG-cLlspl .......................................................................................MDlDCDGts........................................................sspC....................................s..sss.shQspTuFp...................................................t.shpsLsAshhPYVVhss...................sshasspptG..lcshSVsAVVs....ss+l.............hYGl..hGDT..........NGs.............shhGEASluhApsC.....sp.........sh.sGssG.....psss....DVl.......................Y...................IsFsGsc...uV.Putst....spshtp.tpol..tthGsphlt..h................................................................. 0 19 51 85 +7164 PF07336 DUF1470 Protein of unknown function (DUF1470) Moxon SJ anon Pfam-B_19432 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. Members of this family are found in Streptomyces, Rhizobium, Ralstonia, Agrobacterium and Bradyrhizobium species. The function of this family is unknown. 23.40 23.40 23.40 23.50 23.20 23.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.68 0.71 -3.82 101 865 2009-01-15 18:05:59 2003-09-16 13:55:16 6 4 373 1 337 872 14 127.80 18 66.25 CHANGED usphuLDhlNTsshts.......stsh-tLsss..ssltsWlttpshhsssssstttt...............httst...sLR-...slpplhput...tsspt...........slstlNphltpss.stspLst........t..ththphttsssss..tshlss...lAt.shspllsssph ...............................................phsl-hlNThh..h.t...........tt.hDhLsss........pslttWh.....p....tt..s.hsss..ttssttth..................................................tth+.......plR-...slctlhput..........sstt.................slsh.lNphltpss..stsplst......................ht.ht.....h.h.ssss..sh...........sth..lus...lAt..shhpllst...h........................................ 0 130 251 303 +7165 PF07337 CagY_M DC-EC Repeat Yeats C anon Yeats C Repeat This repeat is found in the CagY proteins - part of the CAG pathogenicity island - and involved in delivery of the protein CagA into host cells ([1]). It forms part of a surface needle structure, and this repeat may form an alpha-helical rod structure ([1]). A conserved -DC- and -EC- can be seen in regularly spaced in the alignment. 20.70 20.70 21.50 20.70 20.10 20.00 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.49 0.72 -4.46 13 2220 2009-01-15 18:05:59 2003-09-16 13:58:32 6 49 41 0 30 2236 1 32.50 51 48.99 CHANGED +AahDClspA+sEpE+p...EC.KLLocpt+chLpc ..+AYhDClspA+sEpE++...EC.KLLosEt+KhLpt......... 0 29 30 30 +7166 PF07338 DUF1471 Protein of unknown function (DUF1471) Moxon SJ anon Pfam-B_19452 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 90 residues in length. Some members of this family are annotated as ydgH precursors and contain two copies of this region, one at the N-terminus and the other at the C-terminus. The function of this family is unknown. 21.40 21.40 21.70 21.90 21.30 21.00 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.39 0.72 -4.32 81 6384 2009-01-15 18:05:59 2003-09-16 14:09:38 8 3 551 5 450 1630 15 55.80 31 59.41 CHANGED slpshGslossut....so.s-lpptluc+A-ppGAphYhIhptps....ssphcuoAtlY+ ..............hp.hG.slSsout....uo.s-hcptluc+AccpGAssYpIsphpp.......sss.....hcuoA.lYK.......... 1 15 79 263 +7167 PF07339 DUF1472 Protein of unknown function (DUF1472) Moxon SJ anon Pfam-B_19493 (release 10.0) Family This family consists of several Enterobacterial proteins of around 125 residues in length and contains 6 highly conserved cysteine residues. The function of this family is unknown. 19.60 19.60 19.90 20.30 18.90 18.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.56 0.72 -3.70 2 74 2009-01-15 18:05:59 2003-09-16 14:16:46 7 3 48 0 0 59 0 78.20 61 47.89 CHANGED AWPCGFSshWP.pRVRAVPCLHLSRAGtDARVRFAAAVTRSLLPVCRDFPVVHPLRFRGLTLQLPsAVCVRLRLPLRPVHPRLIARLLWRHGTARCRthC- .....................................AWPCGFSVMWP.pRVRAVPCLHLSRAGhDARVRFAAAVTRSLLPVCR..DFPVV+PLRFRGLTLQLPsAVCVRLRLPLR.....PhhP.tL........................................... 0 0 0 0 +7168 PF07340 Herpes_IE1 Cytomegalovirus IE1 protein Bateman A anon Pfam-B_22587 (release 10.0) Family Expression from a human cytomegalovirus early promoter (E1.7) has been shown to be activated in trans by the IE2 gene product. Although the IE1 gene product alone had no effect on this early viral promoter, maximal early promoter activity was detected when both IE1 and IE2 gene products were present [1]. The IE1 protein from cytomegalovirus is also known as UL123. 19.60 19.60 19.60 19.60 19.30 18.60 hmmbuild -o /dev/null HMM SEED 392 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.20 0.70 -6.02 2 150 2009-01-15 18:05:59 2003-09-16 14:24:06 6 2 14 0 0 143 0 212.90 60 59.56 CHANGED MESSu.KRKMDssNPDEGPSSKlPRPETPVoKAssFLpoMlpKEVNSQLsLGDPLFP-luE-sLKoFEcVTc-CsENPtKDlLtELVKQIKVRVDhVRp+lKpHMLpKYTQh-EKFTuAFN.MGGCLQsALDILDKVpEPFE-MKCIGlTMQsMYENYlVsE-pR-hWhtClK-LHDVuKsAAsKLGsALpAKApAKK-ELpRKMhYhsh+plEFFTKNSAFPKTTNGsStAhAALQsh.QCSP-ElhsaAQ+IhKhLDEERDKVLhHIDpIFMDILTTCVETMsNEYKVTSDAsMMTMYGuISLLoEFCRVLSCYlLEEoSVMlA+pP.ITK.-llSsMpRRIpEICM+VFAQYlLGsDPLRVCSPSV-DLRAIAEESDE-EAIsAashAT .................................ssPt.GsS.s........K.s+.-s..h..p.pAstaLpphLttEhp..s.lsLGDPLF...s.hs.pp..hcohEplhppt.pss..........................................................................TMQsMYENYIVPEDK..REMWMACIKELasVoKGAANKLGGALpAKARAKKDELcRKMhYMCYRNlEFFTKNSAFPKTTNGCSQAMAALQNLPQ.C.S.P.DEIMs........................................................................................................................................................................................... 2 0 0 0 +7169 PF07341 DUF1473 Protein of unknown function (DUF1473) Moxon SJ anon Pfam-B_19856 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. Members of this family seem to be found exclusively in Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 25.00 25.00 38.10 38.00 21.20 18.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.99 0.71 -4.69 2 128 2009-01-15 18:05:59 2003-09-16 14:28:22 6 1 27 0 8 89 0 145.10 72 98.82 CHANGED MRYKhKILT+sKTacYsLKslPhYEWDpVLGF.sppDt.l.KLN-lphL+EITpLMIS.tFLDEFY.ILsppRca.phYK.hLshIlahsQashFph.pshKKPuLVYlppapspsGDalpaDYIsEpapY-hlhTS.pS.s.Np............pEhVsc ...MRYKMKILTKNKTYEYPL+VLPVYEWD+VLGF.NQSDA.lhKLNEVKYLREITSLMISPKFLDEFYlI.LDpNREFISYYKDYLVAIIYTAQFNTFHlDNDLKKPALVYLSEYENNVGDFVsFDYIN.ENF-YEKVsTSLoSso.NSp-................Lhsh......... 0 5 5 5 +7170 PF07342 DUF1474 Protein of unknown function (DUF1474) Moxon SJ anon Pfam-B_19882 (release 10.0) Family This family consists of several bacterial proteins of around 100 residues in length. Members of this family seem to be found exclusively in Staphylococcus aureus. The function of this family is unknown. 21.40 21.40 21.50 56.10 21.30 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.40 0.72 -3.78 5 167 2009-01-15 18:05:59 2003-09-16 15:10:26 6 1 114 0 3 77 1 96.30 55 95.33 CHANGED MNWEIKNLhsDLEVLKEKFEDLKDsHGWHFEEhYsHEPNHsLNKDEhI+EGsSYHERRIHN-QMhDLhHlYhcpFDcIlcKFcEIEKASS-.sFG-cSDDA .MNWEIKsLMCDlEllK-KlpDlsspHuWFsE-hFs...Ncl.oKcEhIsauhSYhEHRIpN-phh-LhplYLK-FspLIpKF+EIEKASS-..sFu-.SDDA................ 0 0 0 3 +7171 PF07343 DUF1475 Protein of unknown function (DUF1475) Moxon SJ anon Pfam-B_19887 (release 10.0) Family This family consists of several hypothetical plant proteins of around 250 residues in length. Members of this family seem to be found exclusively in Arabidopsis thaliana. The function of this family is unknown. 21.10 21.10 21.50 21.20 20.30 19.00 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.06 0.70 -5.03 2 48 2009-01-15 18:05:59 2003-09-16 15:13:07 6 2 25 0 21 46 101 187.80 39 93.19 CHANGED MAssu.lssh+sVhsshhsLMLuTLVYThlTDG.Ph..R.-lhTPWhVsTllDFYlNlssIusWllYKEssW.uShhWslLLhhFGSlsTCsYlhlpLhclpsptsSpDPh.hLhLR....pGsh.ccKsShVlhuRhlFuhLGshMhusllYTshT.G.PF+h-LLhPWMsshLlsFYIsVhslSVWVsaKESshI.shlWlsLLIshGSlsTsuhIVlQLFplS.hDPlYhVL...Lps+sK.lNu..Gph. ........................................................h.hh+slhsshhhlMlu.sLlYThhT...DG.Ph..R.-lh..T..PWhssTllDFYlNlhslusWlhYKEssa.uuhhWhlhLhhhGShsTssYlh.phhclpsttsspsPh.hlhlR....pss..ppKp...VhhuRhlFuhLGhlMhuslsYTsh..TsG.PF+...h-.....LL..sPWMssoLlDFYIsVhslSVWVsaKESohIsshlWlsLLIshGSlsTsuY.IllQLhplS.tDPlhhVL....................tt..................... 1 9 15 18 +7172 PF07344 Amastin Amastin surface glycoprotein Vella Briffa B anon Pfam-B_19245 (release 10.0) Family This family contains the eukaryotic surface glycoprotein amastin (approximately 180 residues long).In Trypanosoma cruzi, amastin is particularly abundant during the amastigote stage. 23.60 23.60 23.80 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.28 0.71 -4.61 76 430 2012-10-03 00:20:40 2003-09-16 15:13:54 6 2 19 0 67 443 0 156.20 30 75.97 CHANGED FlAFl........hVLVuT.PlDM..F.Rhpst.......s..psClTLWG..hKpsCpsspYshpssth.atsC....ssRhppF+sApAFAlISIhlauAAhlhGhl....hLhCC..............................................shhRhlCLsLNllGslTlslVWAsMsssYth........-sthC.s..thp..................pth.paGu..................GFuLhlsAWlL-llNIlhL ..............................................hlAFlFVlluT.Pls........F.+sctp.........ss........psClTLWG........hK........pcC.p.s.sp.Y..phshsph.at......pC........s..shhphFRh..ApAFuIISIhlhhuAhlhGhh.....hhh.s.h..............................................psh+..h..hs..hh...LslluhlTs..slVWssMsshYp.............ss.C.s.......thp..........................pth.paGs....................GFsLhVhu...WsLphlshhh..................................................... 0 54 59 67 +7173 PF07345 DUF1476 Domain of unknown function (DUF1476) Moxon SJ anon Pfam-B_19680 (release 10.0) Domain This family consists of several hypothetical bacterial proteins of around 100 residues in length. Members of this family are found in Bradyrhizobium, Rhizobium, Brucella and Caulobacter species. The function of this family is unknown. 25.00 25.00 25.70 25.60 23.20 22.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.29 0.72 -3.78 53 246 2009-01-15 18:05:59 2003-09-16 15:17:52 6 2 219 2 87 209 311 101.80 49 93.76 CHANGED M.ToFDDREcAFEsKFAHDpEhpFKApARRNKLLGLWAA-hLGhoGs-A-AYAppVVpADFEEuGD-DVhRKltuDLss.tuht.s-spIRs+MschhspA.ctQl ........................M.TshDDREcAFEpKFAhDpEhcFKAcARRNKLLGLWAA.Ep.LGhsss-A.-AYA+-VVtADFEE....AG....DEDVhRKVpuDLsu.tGlsho-ppIRt+MtphhtpAhpQl.............. 0 28 56 65 +7174 PF07346 DUF1477 Protein of unknown function (DUF1477) Moxon SJ anon Pfam-B_19762 (release 10.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 100 resides in length. The function of this family is unknown. 21.60 21.60 84.20 83.90 21.20 19.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.71 0.71 -4.06 19 41 2009-01-15 18:05:59 2003-09-16 15:20:19 6 1 40 0 0 37 0 112.00 31 96.21 CHANGED M.......................pt.t.phpsutsp...........sltshss......su+phFYphshs....alpph.sssphshsTltslhDtIIphEpslFs+ShVLNhllsFLlspSDGss.lQstl.splLsaLLpKYh ...................h...........th.pstsp...........slsslhs.....psu+phFYplshu....alpphh.hsspssl.TlpshhDtIIphEcslFs+ShlLNhlVsFLlspSDGss.lQstl.splLsaLLpKY................ 0 0 0 0 +7175 PF07347 CI-B14_5a NADH:ubiquinone oxidoreductase subunit B14.5a (Complex I-B14.5a) Vella Briffa B anon Pfam-B_19436 (release 10.0) Family This family contains the eukaryotic NADH:ubiquinone oxidoreductase subunit B14.5a (Complex I-B14.5a) (EC:1.6.5.3). This is approximately 100 residues long, and forms part of a multiprotein complex that resides on the inner mitochondrial membrane. The main function of the complex is the transport of electrons from NADH to ubiquinone, accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space [1]. 25.00 25.00 27.60 27.30 19.70 17.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.30 0.72 -4.22 9 115 2009-09-11 15:22:33 2003-09-16 15:42:13 7 1 89 0 77 113 0 95.00 41 80.28 CHANGED o.hlQ+lRsFLLGR..caphsLRapDtlucRTQPsPpLPcGPuHKLSuNYYspRDuRREVsPPlsl.hpspKtLhAtpsuuht......p+hssPG.psasW- ...........o.hlpplRsah.G+...ptph..tLRap-..lupR..TQPPPpLPsGPuH..KLSsNYYhTRDuRREshPPhll..hss...p.Kt.Ls..us...ps....s.....tp..tss...s...p+ssoPu..hhtW.......................................... 0 25 31 55 +7176 PF07348 Syd Syd protein (SUKH-2) Vella Briffa B, Zhang D, Aravind L anon Pfam-B_19909 (release 10.0) Family This family contains a number of bacterial Syd proteins approximately 180 residues long. It has been suggested that Syd is loosely associated with the cytoplasmic surface of the cytoplasmic membrane, and that interaction with SecY may be involved in this membrane association [1]. Operon analysis showed that Syd protein may function as immunity protein in bacterial toxin systems [2]. 25.00 25.00 26.00 25.50 24.60 24.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.71 0.71 -4.94 43 745 2012-10-01 20:46:44 2003-09-16 16:03:31 7 2 729 2 81 305 25 173.90 60 95.43 CHANGED lppALpsFhppYhptappppuphPtsth..s....hsSPCl...........psp-stlhWpPl..........+s.sschsslEcAL-lpLHssIpsFasshauuchtupap......stplpLLQlWsc-DFp+LQcNllGHLlMp++LKpssTlFIussss.-tpllolsNhoGpVhLEphGppp+chLAssLspFLspLpP .......T.AQAL+sFTsRYCDAWpEcHtSaPlSEELYG....VPSPCIl..........uoo-DAVaWQPQ.........PF.s.u.E.pNlNAVERAhDIslQPsIHsFYTT....QFAGD.MpAQFu......Dh+LTLLQsWSEDDFcRVQENLIGHLVTQKRLKLsPTLFIAThEp.EL-VISVCNLSGEVhpETL.......GT+pRThLAusLAEFLsQLcP.......... 0 9 25 51 +7177 PF07349 DUF1478 Protein of unknown function (DUF1478) Moxon SJ anon Pfam-B_20105 (release 10.0) Family This family consists of several hypothetical Sapovirus proteins of around 165 residues in length. The function of this family is unknown. 25.00 25.00 25.10 25.10 20.20 18.40 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -10.94 0.71 -4.86 9 46 2009-01-15 18:05:59 2003-09-16 16:04:07 6 2 46 0 0 37 0 144.80 55 98.42 CHANGED MA.Ps.pspcplsth.hhLT......RLspLsRPHPhLLhLIRNNPMGP+sAW+WLL.LsQSN.MSLpQYuTALQShVLLLGhhsCPRELhhDLa+FIPTLh+Th.IsptCG.uGsssLRssapSLuLssLLusSLLLShHpGLh.R.SshpsCCLTLpLMLA.hsQFh. .........................MAPs.pspphhsth.hhLT......RLAQhlRPHPhLLhLIRsNPMGPHsAWpWLL.LVQSN.MSL+QYATALQSFVLLLGTTsCPRELh.hDLaRFIPTLT+T.hhshtCG.GGsssLRsshpSLsLushLusSLLLS...thh...ssh.....h........................... 0 0 0 0 +7178 PF07350 DUF1479 Protein of unknown function (DUF1479) Moxon SJ anon Pfam-B_20226 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins, of around 420 residues in length. Members of this family are often known as YbiU. The function of this family is unknown. 19.60 19.60 19.70 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.38 0.70 -5.99 29 709 2012-10-10 13:59:34 2003-09-16 16:07:15 7 6 609 3 194 467 337 392.90 54 91.46 CHANGED ssL.Ps....papplKppLh...sp.....ctlhtoWc+lltslpcclcplpptGss..slPplsFsDItssphs........sphttpl+cRGssVIRuVhsccpAhpWpp-ltcYlcpN...........sp.htshs..sscPplYplYWSpsQlcARtHPphhtsppFhs.pLWp........st..sspshhshcpslsYADRlRhR.PGss.........................................................................phuLusHlDuGSlERWp-csY..pplYcplFc..GcWE.caDPa-A...spRssupppha.......tususCShFRoFQGWhALSshtPspGTLplhPlsc.uhAYhlLRPhFssss........................................................................sschsGAhPGpu.phssph.HPcLpL.cshlsIPclpsGDhVaWHCDllHuV-stHpGpssSsV......................hYIPusPhstpNstYltcQRcuFlpGpsPPDFstts.................EssatGRss.pc..lsph.....GhpuhGl ............................c....AAIRphKpuLRAplG.D....sQtlFspLscsIAspVsE...Issl...+ApGps...................VhPhlsauDI..t.sGp.lo........spp+tpIKRRGCAVI+GhFPREQALu..WcpshlDYLD+N+FDEh...............h+ustDs.aFGoLu..AS+PpIYslYWSpAQhpARQSccM.s.t.AQsFLN.R.LWp..................hE...pDGKpa.........FsPDhsllYsDRIRR.RPP.GoT.........................................................................SpGLGAHsDSG.uLERWLhPuY...p+VassVFs...Gs..hp....p............YDPWcA...AHRT-VEEash...............csospCSVFRTFQ..................GWTALSD....h.hP........G......pGhLHVlPIPc..AM.AYlLLRP...LhDDVP.........................................................................-D-LCGsAPG....Rs....L....sl....SE.p....W..HPL..Lh.cALoSIPpLEAGDoVWWHCD................VIH....S.Vs......sVc.NtpG.auNV.........................................................MYIPAA.PhCEK.NlAYs++.+sAhpcGsSPsDFPp-D....a.............EosacGRh.TltD..Lshc.....GKRALGh............................................... 0 48 99 165 +7179 PF07351 DUF1480 Protein of unknown function (DUF1480) Moxon SJ anon Pfam-B_20253 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 80 residues in length. The function of this family is unknown. 25.00 25.00 43.00 42.80 18.70 18.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.65 0.72 -3.99 6 520 2009-09-11 08:41:35 2003-09-16 16:08:45 8 2 509 0 27 99 0 76.60 83 99.24 CHANGED MsKTsV+IuuFEVDDApLSSss.cs.-pTlSIPCKSDPDLCMQLDGWDEpTSIPAlLDGKcpLLY+pHYD+ppDAWVMRls .........KTSVRIGAFEIDDuELHGES.PG..-RTLTIPCKSD...PDLCMQLDAWDAETSIPAlLNGEHSVLYRT+YDQQSDAWIMRL.A... 0 1 6 17 +7180 PF07352 Phage_Mu_Gam Bacteriophage Mu Gam like protein Moxon SJ anon Pfam-B_19455 (release 10.0) Family This family consists of bacterial and phage Gam proteins. The gam gene of bacteriophage Mu encodes a protein which protects linear double stranded DNA from exonuclease degradation in vitro and in vivo [1]. 24.70 24.70 25.20 28.10 23.90 24.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.71 0.71 -4.68 27 345 2009-01-15 18:05:59 2003-09-16 16:14:02 7 1 306 2 48 280 5 146.00 27 85.80 CHANGED sh-plshsl+clu-lppchpclcsthscclscIc-thpsph...........cslpsclphlpptlpsascsp+sEhs...ctKohphshGplpaRtppsssth....sh-sllctL.+ph....Gh.pca...........I+scE-lsKcslhppsc.............sspslsGlplppt.-sFtlcs ..................................sh-plshsl+clu-lppcht+lps.......th.......scplscIpcphssph...........csLppclchlppslpsascsp+cEhs...........cpKohshshGclsaRhpssssph.p......sh-sllchL.+ph...........GL...pca............I+s.KE..ElsK-Alhpp.c..............sstslsG.lplhpt.-sFhlcs................................................................................... 0 27 44 47 +7181 PF07353 Uroplakin_II Uroplakin II Vella Briffa B anon Pfam-B_19993 (release 10.0) Family This family contains uroplakin II, which is approximately 180 residues long and seems to be restricted to mammals. Uroplakin II is an integral membrane protein, and is one of the components of the apical plaques of mammalian urothelium formed by the asymmetric unit membrane - this is believed to play a role in strengthening the urothelial apical surface to prevent the cells from rupturing during bladder distension [1]. 20.40 21.30 20.40 21.30 19.30 21.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.31 0.71 -4.94 4 38 2009-01-15 18:05:59 2003-09-16 16:33:11 7 1 28 0 18 34 0 160.30 65 91.72 CHANGED MASsLPVpTLPLILILLAVLuPGAA.DFNISSLSGLLSPALTESLLVALPPCHLTGGNATLMVRRANDSKVV+SSFVVPPCRGRRELVSVVDSGSGFTVTRLSAYQVTNLsPGTKYYISYLVpKGsSTESSREIPMSTLPR+NMESIGLGMARTGGMVVITVLLSVAMFLLVlGhIlALALGARK ..................................Mss.hPl.TLP.hhLlLLslLuPGuA....-FNISSL...SGLLSPALTESLLVALPPCHLTGGNATLhVRR..ANDSKVVpSuFVVPPCRGRRELVSVVDSGuGFTVTRLSAYQVTNLsPGTKYYlSYhVpKG...suTESSp....ElsMSTLPR+phEoIsL.GM.ARTGGMVVITVLLSVAMFLLVlGhIlALALGs+K.................................................. 0 1 2 4 +7182 PF07354 Sp38 Zona-pellucida-binding protein (Sp38) Vella Briffa B anon Pfam-B_19996 (release 10.0) Family This family contains a number of zona-pellucida-binding proteins that seem to be restricted to mammals. These are sperm proteins that bind to the 90-kDa family of zona pellucida glycoproteins in a calcium-dependent manner [1]. These represent some of the specific molecules that mediate the first steps of gamete interaction, allowing fertilisation to occur [2]. 21.20 21.20 21.20 23.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -11.82 0.70 -5.75 5 94 2009-01-15 18:05:59 2003-09-16 16:59:07 7 5 32 0 42 97 0 231.60 49 65.01 CHANGED MDh+LuscElVDPsYpWpGPsG+sLoGNSplNITsTGpLlLpcFpESLSGlYTCTLSYKllcApTQEEssl+cpY+FhVYAYREPcYsYQhoVRFTA+uCsutYNssFhRtLKKIL-sLISDLSCcVptPSY+CHSVKhP++GL.sELFlsFQVNPFAPGWcshCsu.StDCEDoTN+plpKA+DRIE-FFRpQsYIL+HpFps.LPsIHYVEGSLQVVhIDsCRPGFG+NplpHssCAuCCVVCSPGTYSPDsuloCpsCsSuLl..YGAKoCP .............hs.cLpppEllDPoa.WhGPptKhlo........tNsphpITpTGp.LhhpsF.EshSGlYTChLpYKs....T.EEhhhphphcah..laAYREPcY.YQhssRapst.sCtuhaN..F.+hLhpILspLl.DLSCcl...p.cCH.pVch.+tGL.pELFhsF..pVs.shssthtst.sst.shsCE....s.pp..l.p.A+shIEcFFppQs.....lht+php...lPthaal-t...ohQhVhlspChPGaGhN.hhHspCspCC.VlCSPuoasPcsslpC.pCs..osh...h..YGAKsC............ 0 4 5 12 +7183 PF07355 GRDB Glycine/sarcosine/betaine reductase selenoprotein B (GRDB) Vella Briffa B anon Pfam-B_19711 (release 10.0) Family This family represents a conserved region approximately 350 residues long within the selenoprotein B component of the bacterial glycine, sarcosine and betaine reductase complexes. 19.40 19.40 20.00 19.70 19.20 19.30 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.07 0.70 -5.52 12 440 2009-01-15 18:05:59 2003-09-16 17:22:43 7 2 262 0 72 365 148 241.10 30 90.78 CHANGED Mt.hKlVHYINQFFAGlGGE-KADhtPclt-u..sGsuhtLsthlcscAElltTVICGDSYas..ENh-cAppplLchlcphpPDlhlAGPAFNAGRYGsACGsIsKhVp-cLsIPulTuMYhENPGs-haKKslYllpTusSAAuMRculPthAcLAhKlhKGE.cIGsPpcEGYhsRGIRhNaFtE-R..GucRAV-MLlKKLpGEpFpTEaPMPsFDRVsPssAl+DlSKAKIALVTSGGIVPKGNPD+IESSSAS+YGcYDIsGhccLostsaETAHGGYDPsaANtDPsRVlPVDVLR-hEKEGhIGcLHchFYoTVGNGTuVAsuKpaupEhstcLhpsGVDAVILTST ...................................................................+hlhhlsp..uthGu--pAph..thcpt..hGsuh.h....h.t.t.....hclhsTlhCGDpah...pp.-p.s.tphhthhpphpsDhhlsGPuhphspaG.hsutlst...pthslPsl..suM..E.N.s.u..h.phahp..p..h.Ilp.h.tps.uhshppsh.thst.hs.thhp.tc.thh.....................................................................h..t.htphplAlhTsuG.l.h.....h.t...t.......cp.....s.st.pat.h.h.......t..............t.ph.s.Hu..GaD.s.s.s.tD.shhhPlDhh+chtpcGhI.ttlh.hah..shh.Gs..G.s.s.....t.p.t.p.ph.u.tlst.LhtttVDuVlhsu.............. 0 38 58 65 +7184 PF07356 DUF1481 Protein of unknown function (DUF1481) Moxon SJ anon Pfam-B_20042 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 230 residues in length. Members of this family are often referred to as YjaH and are found in the Orders Vibrionales and Enterobacteriales. The function of this family is unknown. 25.00 25.00 25.80 25.30 19.40 19.40 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.25 0.71 -4.85 17 658 2009-01-15 18:05:59 2003-09-17 11:01:20 7 3 648 0 52 244 1 185.70 59 83.59 CHANGED uuGhstscusshhWhp-ctsps..thhusasshtssuphpocYRWppspL+plpRps.......stsP.pl+lRFspcG-ssa.QhclsGch..LosDQlshYphcAcpllptocsLcpsplhLhQG+Wp..spolpTCpG.phhp.-h-pph.sal.pR...uohcs.lAaLtuscs........ppLLLlsss-a.ChhpPp ...................ASGFAtDpGAVRIWRKDosDpV..HLLuVFSPW+SG.sTTTpEYRWQGDsLoLIplNlY......SKPPhsIRARFDc+G-LSFMQREssGcKQQLSNDQIsLYRYRA-QIRQhSDALRpGRVlLRQGRWH.h-pTVTTCEG.pTlKPDLDSpAIuHIERRQs+SSV-VSVAWLEAPEG........SQLLLVANSDF.C+WQPp...... 0 3 12 32 +7185 PF07357 DRAT Dinitrogenase reductase ADP-ribosyltransferase (DRAT) Moxon SJ anon Pfam-B_20108 (release 10.0) Family This family consists of several bacterial dinitrogenase reductase ADP-ribosyltransferase (DRAT) proteins. Members of this family seem to be specific to Rhodospirillum, Rhodobacter and Azospirillum species. Dinitrogenase reductase ADP-ribosyl transferase (DRAT) carries out the transfer of the ADP-ribose from NAD to the Arg-101 residue of one subunit of the dinitrogenase reductase homodimer, resulting in inactivation of that enzyme. Dinitrogenase reductase-activating glycohydrolase (DRAG) removes the ADP-ribose group attached to dinitrogenase reductase, thus restoring nitrogenase activity. The DRAT-DRAG system negatively regulates nitrogenase activity in response to exogenous NH4+ or energy limitation in the form of a shift to darkness or to anaerobic conditions [1]. 25.00 25.00 69.20 41.30 21.50 16.00 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.73 0.70 -5.20 13 92 2009-01-15 18:05:59 2003-09-17 11:15:03 6 3 79 0 43 88 13 252.50 44 92.94 CHANGED GHSTNLVGlPs-hLuSssFN-cP.hPLHIuGVREMNssLFEMLupApsLt-AG-AFhsYMsAhFGlDsEQptsc.....sssG+..RRFRuSaLRLL+GWGaDSNGsEGAVLKGWVESRFGLFPTFHKpsIs+huosuWtsYVEEKMuSRFHNNuIalQLDLLYEFCQWALsR....assPGc.oHlsLYRGVNsFcEHplltRlD+RpsVlRLNNLsSFSSDR-lAsCFGDpILTs+VPlsKVlFFNsLLPuaPLKGEGEYLVIGG-YRVssSh.l .......thNhsslPshlluShtFNcpP...hsLcIsGVRchpssLFchLsttss.t-tupsFpcYMsshFsLt..p.pt.........stsuc...+Rh+..uSYLRLL+GWsaDSNusEGAVLKGWVESRFGLhPoFH+t.lsphsopAahpYhp-+hsupa+sNuI.sQLDLLYEaCQatltR.....th..Pup..pHlpLYRGsNchsEHpllt.....ch.scR.ptllRLNNLsSFoo-R-hAspFGDhlLcspVPlsKllFFssLLPs.hL+GEuEaLVIGGcYcVcht.h.... 0 16 31 35 +7186 PF07358 DUF1482 Protein of unknown function (DUF1482) Moxon SJ anon Pfam-B_20128 (release 10.0) Family This family consists of several Enterobacterial proteins of around 60 residues in length. The function of this family is unknown. 25.00 25.00 28.10 27.70 23.50 16.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.75 0.72 -4.38 12 929 2009-09-11 12:49:17 2003-09-17 11:20:43 6 1 454 0 29 221 0 57.20 57 86.88 CHANGED hFALVLhVshlsGsspDlllulYsopppChsAtsEQ+lp.GsCaPl-chIc..s..hPAu ..hFALVLhVhhlsGtspDIlVs.VYsTcQpClhuhs-Q+Ip..GsCaPl-chIc...s...hPAt.... 0 2 5 16 +7187 PF07359 LEAP-2 Liver-expressed antimicrobial peptide 2 precursor (LEAP-2) Moxon SJ anon Pfam-B_20235 (release 10.0) Family This family consists of several mammalian liver-expressed antimicrobial peptide 2 (LEAP-2) sequences. LEAP-2 is a cysteine-rich, and cationic protein. LEAP-2 contains a core structure with two disulfide bonds formed by cysteine residues in relative 1-3 and 2-4 positions. LEAP-2 is synthesised as a 77-residue precursor, which is predominantly expressed in the liver and highly conserved among mammals. The largest native LEAP-2 form of 40 amino acid residues is generated from the precursor at a putative cleavage site for a furin-like endoprotease. In contrast to smaller LEAP-2 variants, this peptide exhibits dose-dependent antimicrobial activity against selected microbial model organisms [1]. The exact function of this family is unclear. 21.20 21.20 24.50 23.20 19.60 19.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.85 0.72 -4.32 2 67 2009-01-15 18:05:59 2003-09-17 11:26:34 6 2 48 1 28 57 0 75.00 52 66.92 CHANGED MhpLKLFAVLhhCLLLLuQVsuSPlPp.SSAKRp.RRMTPFWRuVSLRPIGASCRDDSECITRLCRKRRCSLSVAQE ........................lhAhLh.lhLLLls.Ql.suS..P.l........P.......-...........l..........SS.........u.........+...RR.+...RMTPFWRuVSLRPlGASCRDsSEClT+LCR+t+CShs............................................ 0 1 2 8 +7189 PF07361 Cytochrom_B562 Cytochrome_b562; Cytochrome b562 Vella Briffa B anon Pfam-B_18074 (release 10.0) Family This family contains the bacterial cytochrome b562. This forms a four-helix bundle that non-covalently binds a single heme prosthetic group. [1]. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.37 0.72 -3.61 25 795 2009-01-15 18:05:59 2003-09-17 11:35:06 6 2 709 109 71 320 3 102.50 48 80.57 CHANGED s-LcssMpphstshppstcAssspphcpulschcsts.cupptp.Pschcs....p.thpsYpcGhcpLlsplDpApthsppGcLc-AKpuhpclpslRpcYHcKa+ ..........tDL--sM-sLscNh....Kll...p...K.A.DsAsplKsA...Lo+MRuA..AlDAQKus.PPKLEsK....usDSPE....M.KDFRHGFDlLlGQIDcALK.LAsEGclc.EAp...AAAcpLKsTRNsYHcKYR.............. 0 6 22 46 +7190 PF07362 CcdA Post-segregation antitoxin CcdA Moxon SJ anon Pfam-B_20349 (release 10.0) Family This family consists of several Enterobacterial post-segregation antitoxin CcdA proteins. The F plasmid-carried bacterial toxin, the CcdB protein, is known to act on DNA gyrase in two different ways. CcdB poisons the gyrase-DNA complex, blocking the passage of polymerases and leading to double-strand breakage of the DNA. Alternatively, in cells that overexpress CcdB, the A subunit of DNA gyrase (GyrA) has been found as an inactive complex with CcdB. Both poisoning and inactivation can be prevented and reversed in the presence of the F plasmid-encoded antidote, the CcdA protein [1]. 21.40 21.40 21.40 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.24 0.72 -4.11 25 640 2012-10-02 18:44:02 2003-09-17 11:44:48 7 4 509 9 114 324 32 67.90 35 86.53 CHANGED sKKssNlolsu-LlppA+shsINlStslEpuLpcpl+c.......pcscpWpcENccAIcshNchl-ppGhFuD-aRpF ...................ppploVTl-s-LhppA+s....h....s..l.NlSuhlssulppEl+c........pt.s..c..R..W.p..t..E.Np..cu.httlsphh-tpG.auD-.Rsa....................................... 0 27 64 88 +7191 PF07363 DUF1484 Protein of unknown function (DUF1484) Moxon SJ anon Pfam-B_20389 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 110 residues in length. Members of this family appear to be found exclusively in Ralstonia solanacearum. The function of this family is unknown. 21.20 21.20 23.10 22.40 20.90 19.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.32 0.72 -3.71 9 55 2009-01-15 18:05:59 2003-09-17 11:47:25 6 1 19 0 24 52 0 101.50 40 86.61 CHANGED cpppuPQhLAhupp+pLlAQLApphuphs+RstsphAusltpLcssuppIccsTE-uCucLLsVSAGLtGILpLLDLpSDRus-scuLHCLLsPLKpQLDsALsclQcML .................................t..p.sPphLAhsppppLhupLstpss..s++stt.httslpQLcssut.Ipposc-uCApLLsVSuGLsGILpLL-lpS-+uh.-.C+sLHCLLsPLKtpLDpALs-lQcML...... 0 4 13 16 +7192 PF07364 DUF1485 Protein of unknown function (DUF1485) Moxon SJ anon Pfam-B_20495 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 300 residues in length. Members of this family all appear to be in the Phylum Proteobacteria. The function of this family is unknown. 22.00 22.00 23.00 23.00 21.80 20.00 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.80 0.70 -5.22 60 567 2009-01-15 18:05:59 2003-09-17 11:50:56 7 6 358 1 215 587 505 283.50 31 58.09 CHANGED MRlhlAultpETNTFuPs.oshssFp........thhpGsshhpshps.ssssluualchAct...............pGh.-ll....sslhAtApPuGhVscsAaEplps-ILssl+uu....hshDulhLsLHGAMVu-shDDsEG-LLpRlRsllGsclPlussLDLHuNlTppMlppADsllua+pYPHlDhhEsGpcsscllhchl..pGch+PshuhtchPhlhsh...hsTsppPh+shhcthtphEsc......llulSlhhGFshADhs-sGssllshu-.....DtstAcpsAcclupplashRscahs..phhsh-pulscAhs ........................................M+IhlAuhtpETNTFuPs..sshssFt.........httGsch...ht..t....ph....hssshsuhhchs.t.t..................................p.G.h..pll....sslhAtAtPuG....hVscpAaEplpscllstl...css....h..lDulhLsLHGAMls-sh-..Ds..EG-..LLpRlRtllGs....slPlusshDhHuNlotchlppsDllsua+paPHsDh.hEouc+....us.clL.hchL.......c.......u.......c......h.......+..........P.s.huhhclPhlhsh.t..hsTs.t-Ph.+uhhs....tlttlEtc..G...llusSl....hhGFs..h..AD..hPcsGssllls..ss..........Dt.stupthAccLupthhstRppath...shhshcpulspAh.t............................................ 0 35 109 165 +7193 PF07365 Toxin_8 Alpha conotoxin precursor Moxon SJ anon Pfam-B_20562 (release 10.0) Family This family consists of several alpha conotoxin precursor proteins from a number of Conus species. The alpha-conotoxins are small peptide neurotoxins from the venom of fish-hunting cone snails which block nicotinic acetylcholine receptors (nAChRs) [1]. 21.60 21.60 21.60 22.80 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.86 0.72 -4.00 22 253 2009-01-15 18:05:59 2003-09-17 12:37:11 7 1 43 19 0 244 0 41.20 39 81.76 CHANGED TTVVSFTSD.RASDGRNAAAppKsScLluhs.h+.tCCup......PsCtsppsthC.G ......................s....stpsAApp+sscLhshs..h..p..sC..Cup......PsCtssps.hC............ 0 0 0 0 +7194 PF07366 SnoaL DUF1486; SnoaL-like polyketide cyclase Moxon SJ, Bateman A anon Pfam-B_20348 (release 10.0), Pfam-B_4335 (release 18.0) Domain This family includes SnoaL [1] a polyketide cyclase involved in nogalamycin biosynthesis. This family was formerly known as DUF1486. The proteins in this family adopt a distorted alpha-beta barrel fold [1]. Structural data together with site-directed mutagenesis experiments have shown that SnoaL has a different mechanism to that of the classical aldolase for catalysing intramolecular aldol condensation [1]. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.19 0.71 -4.55 29 1134 2012-10-03 02:27:23 2003-09-17 12:44:11 7 18 662 23 389 2430 1653 122.50 20 66.56 CHANGED thhhphatshh....sptch....cthschlsschhtps........sshhGhcuhtthhpthhp.uhPDlph...plcphls-u.D+VssRhphpGoap.GthhG.....hss..oG+plphpthslh+lpc.GKIsEpWshhDhhulhpQL ...........................................................................t...hphh.thh....s...pt.ph.....s.h.h.s.p....h.h.s....s....c..h..h.......p...............................sth.G...h.....p..u...h.t....p.h.h........p.....t....h..h..p.....s.......h......P.....D....hph............pl..c..........p...h...l......s.........p.....u.....-........p..........V.....s......s.....+..h....p.....h.pG..oa.....p....G.....h....h....s....................h.s.s..oG+.plph...p...t.hsh..h+..h..c.s....G..+Is..-pWt.hhDthshhtQl....................................... 0 95 233 320 +7195 PF07367 FB_lectin Fungal fruit body lectin Moxon SJ anon Pfam-B_20370 (release 10.0) Family This family consists of several fungal fruit body lectin proteins. Fruit body lectins are thought to have insecticidal activity [1,2] and may also function in capturing nematodes [3]. 25.00 25.00 36.50 42.20 19.40 23.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.86 0.71 -4.57 14 44 2009-01-15 18:05:59 2003-09-17 12:52:52 6 4 31 32 22 53 0 129.80 40 82.04 CHANGED MSYTI.....plRlhpss..sshhplVE+TsWhYANGGTWo-sc..Gth.lLoMGGSGTSGhLRFp.ssuG-hFhVslGVHNYKhWCDllssLps.-sTulplpPcYY........susRhpsp.tQhushpspstcG+slplpaphs-GNsLpAsls .................MsYsIplclhpsp..sshhplVEpssWpaupGGTWo-ts..Gth.sLoMGGSGTSGhLRFp.sssGEtFhVslGVHNYKpWCDIlssLss.stTultlhPcYY........s..sstRhp.hp.pQhsphphp..s...pGcslthpYpht-GssL.ssl.h............................. 0 7 13 20 +7196 PF07368 DUF1487 Protein of unknown function (DUF1487) Moxon SJ anon Pfam-B_20425 (release 10.0) Family This family consists of several uncharacterised proteins from Drosophila melanogaster. The function of this family is unknown. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.26 0.70 -5.11 7 111 2012-10-02 17:28:28 2003-09-17 12:55:20 6 3 19 0 51 291 82 213.40 36 71.11 CHANGED sWpuPphMllhc-GDlssAhahLlculppPFAsssVAolhVpEoIt-EhlcRl+sph+PLspclupHPsYlpslpclcp..hpschIhu...........psh.ssASPllVh.DhsHpahGs.tPTGllThHTFRshpEssplht+E.sLsFsuVslWsE+lussY-Llstls.sshahlNChsssLp.Ihp.atsppspVllt+saHYEoLhlsGchKhIVaPlushh ...........W.uPpLMllF-sGDlso.Ah.c.h..Ll..p.S..L...p...s..P.F...u..ss..uVA..o..VLlpESIt-pFlpplts...ch........+......P.......L........s......p........p...........V...u....p....H.....P...s.....Y...l....+....o..L......p.....p..lcp.......Lps.csltu....................c.s.h..sp..u...S.....PllVh..D.h...s...H......p...a..L.G....s....GP...T..G.....VlTl.HTFRTspEAsplt.p+E..sLsas.u.VS.lWsE+lussY-LlstLs.sshahlNChsssLsPIhp.atsppspVhltcsYHYEoLhlssch+lIVFPlus....................................................................................................................................... 0 9 10 32 +7197 PF07369 DUF1488 Protein of unknown function (DUF1488) Moxon SJ anon Pfam-B_20604 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 85 residues in length. The function of this family is unknown. 21.20 21.20 21.50 21.20 21.10 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.62 0.72 -4.14 70 961 2009-01-15 18:05:59 2003-09-17 13:14:29 6 1 762 1 202 509 12 78.20 37 90.60 CHANGED sIpFssp...saDssppslpFsuhssGtplpCtlospsLpch.st...tssp.....ppphlssFcptRtcIEchAcphlpp....sststlhLp .................tItFPsp..tpastsppslhFsAhlsGhplsCAIos-uLt.+F.......suss.....scp..hLuuFcpaRaDlEEtAEsLIpc.p..scpGhlhL......................... 0 18 60 126 +7198 PF07370 DUF1489 Protein of unknown function (DUF1489) Moxon SJ anon Pfam-B_20654 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. Members of this family seem to be founds exclusively in the Class Alphaproteobacteria. The function of this family is unknown. 25.00 25.00 32.20 31.80 18.30 17.50 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.79 0.71 -4.65 54 250 2009-01-15 18:05:59 2003-09-17 13:18:14 6 1 248 0 84 197 185 137.40 50 96.12 CHANGED LlKLsVGs-Sl--LpsW.sp+....ttts.ss.shHlTRMhPKRtsElLs.GGSlYWVIKGhltsRQpllsl-phsssDGIpRCslVL-PcllcspspP+RPFQGWRYLpspDAPsDLs.tucsst.ssLPscLpppLs-LGll ..........LlKLsVGs-Sl-DLtsWhspR........pst...uh..s..spth.HsTRMhPKRt-ElLs.GGSLYWVIKGplpsRQ+llcIcshscu-GIsRCpLVL-Pcll.spspP+RsFQGWRYLpsp-APtDLs....sucuut...ssLPscL+pELscLGLL... 0 23 54 64 +7199 PF07371 DUF1490 Protein of unknown function (DUF1490) Moxon SJ anon Pfam-B_20678 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. Members of the family seem to be found exclusively in Mycobacterium species. The function of this family is unknown. 21.00 21.00 21.30 21.70 20.80 20.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.85 0.72 -4.03 8 169 2009-01-15 18:05:59 2003-09-17 13:37:13 7 1 75 \N 23 55 1 90.00 60 95.79 CHANGED MVhaGLLAKAusoVVTGlVGVuAYEsLRKAluKAPLRpsAVoustLGLRGoRKAE....EAAESARLKlADVMAEARERIGEEsPsPAluDs.c- ........................MshashLAKAssTVlTGLVGVsAYEsLRKAluKAPLRpuuVossAhGLRGTR+AE....EAAESARLpVADVlAEAR..ERIGEEuPsPAlucsc........... 0 8 13 19 +7200 PF07372 DUF1491 Protein of unknown function (DUF1491) Moxon SJ anon Pfam-B_20742 (release 10.0) Family This family consists of several bacterial proteins of around 115 residues in length. Members of this family seem to be found exclusively in the Class Alphaproteobacteria. The function of this family is unknown. 25.00 25.00 42.00 42.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.27 0.72 -4.11 53 260 2009-01-15 18:05:59 2003-09-17 13:39:21 7 2 258 1 89 215 166 106.20 37 92.50 CHANGED LsuchWVsAhlRRlpstGhsAhVhp+GDcsAGAVlVK.lssh-GpApLappuhph........sGsRtWhhh....ssssEs-lDstlsRppcFDPDLWllElED.+pGc+hL-ps ......LpochWVuAhlRRlpssGs..Ahlt++Gss-AGAlhlK.lss..h.sGpssLau.usps............sG-Rhahph......pssc-tsscstlpRch+FDPDLWlVElED.cpupchh..t....... 0 25 58 68 +7201 PF07373 CAMP_factor CAMP factor (Cfa) Moxon SJ anon Pfam-B_20762 (release 10.0) Family This family consists of several bacterial CAMP factor (Cfa) proteins which seem to be specific to Streptococcus species. The CAMP reaction is a synergistic lysis of erythrocytes by the interaction of an extracellular protein (CAMP factor) produced by some streptococcal species with the Staphylococcus aureus sphingomyelinase C (beta-toxin) [1]. 23.30 23.30 24.20 24.10 23.00 21.30 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.55 0.70 -4.85 9 517 2009-09-11 06:24:37 2003-09-17 13:44:42 6 3 137 \N 15 141 1 226.20 41 81.27 CHANGED spsos.ts...pp..tstptcphhptLNtchspLpsh.pcslpGo-.....htcplschlcssccLKsulcs.sc...shYDhsSIssRVEhlsssl-sIphuTpsLpsKVppAHl-hGhuITKhlIhllsPhuoscplcsplsslKth.tKV.sYPDLpPTDtATlYsKsKLsKtIWpsRhsRDppVLshKshcVYptLNKAIT+AsGVphNPpsTVtpVDptlpsLpsAhQTALc ............................................................................ss...........tpshsss-AcptlptlNuRIspLpcs.QKss.uSp............ht-pIpcLLc.s.....AhcL+ssl-sls+...G..sl...shhDh.soIssRVcLlssssDTIpsAspTLQsKVpsAHs-lGLpIs+AlllhlsPsSTssQLp-EhAAlKstls+lpsYPDLpPsDsATlYsKspLs+tIhQlR.....hsRsppl...lshKstsshctLN+AIo+AsuVphNstsTVupVDpAlppLcAAYQsALp................................ 0 13 14 15 +7202 PF07374 DUF1492 Protein of unknown function (DUF1492) Moxon SJ anon Pfam-B_20776 (release 10.0) Family This family consists of several hypothetical, highly conserved Streptococcal and related phage proteins of around 100 residues in length. The function of this family is unknown. It appears to be distantly related to Pfam:PF08281. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.28 0.72 -3.90 6 341 2012-10-04 14:01:12 2003-09-17 13:47:25 6 2 248 0 28 243 4 92.40 22 68.36 CHANGED LLSSPKWpsDKVQGGp++KhDDVYl-Llsh....KEsIEpcTsEAIp++lELp+hIspLcNscSRolLpMVYIsKhssaplhDcLshS+oTYY+hh+.Ah+ELs ...........................................h.t.............................t....h.hh.h.....p......h.p..ph..tphhp....pp.h...c..lp...p.hIs.c.Lt......s..spp...R..p..lLp.hhYl..s..c..hs..h....p..lsccls.hS.csohYcl+pcAlpcL...................... 0 11 19 22 +7204 PF07376 Prosystemin Prosystemin Moxon SJ anon Pfam-B_20835 (release 10.0) Family This family consists of several plant specific prosystemin proteins. Prosystemin is the precursor protein of the 18 amino acid wound signal systemin which activates systemic defence in plant leaves against insect herbivores [1]. 25.00 25.00 49.00 283.70 24.40 18.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.91 0.71 -11.58 0.71 -4.92 3 8 2009-01-15 18:05:59 2003-09-17 13:55:29 6 1 7 0 0 9 0 197.50 82 99.87 CHANGED METPSYDIKNKGDDVQE..KTKL+HEKGGDE+tKIIEpETPSQDIpNKs........DDAQphPKVEHEEGGstKEK.lEKETlSpCIIKhEGDDAQEKlpVEYEEEEh.KEKIVEKETPSQDIuNKGDDAQEKPK.......................................................................VEHEEDGDEKETPSQDI.KIEGEDAQEIPKVECEERE...KIVIRVDLAVHSTPPSKRDPPKMQTDNNKL .......MtTPSYDI.KNKGDDhQEE.KVKLHHEKGGDEKEKIIEKETPSQDINNKDTISSYVLRDDsQEIPKhEHEEGG.sKEKIVEKETISQhIIKIEGD.DAQEKLKVEYEEEEYEKEKIVEKETPSQDINNKGDDAQEKPKVEHEE.GD-KETPSQDIIKhEGEGALEITKVVCE......KIIVRtDLAVpSpPPSKRDPPKMQTDNNKL 0 0 0 0 +7205 PF07377 DUF1493 Protein of unknown function (DUF1493) Moxon SJ anon Pfam-B_20460 (release 10.0) Domain This family consists of several bacterial proteins of around 115 residues in length. Members of this family seem to be found exclusively in Salmonella and Yersinia species and several have been described as being putative cytoplasmic proteins. The function of this family is unknown. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.53 0.72 -3.97 18 511 2012-10-02 01:16:24 2003-09-17 13:59:56 7 2 255 0 69 887 434 104.30 38 92.53 CHANGED M..sslppclh-hl+cchGshh.....tchpLT.so-LcpDLplstsDsc-LhpcFhccFsV-husFphppYF.P..pssh..hhs.hc+p......csc.l..PlTluMLhcSA+AG+WLYD .......................................-slppplhcLlRpph.s..h..Y.lh....KphpLo..-oDLpp.D.Lplst--sp-LMscFFccF.....NV-tusFphpsYF.P.........p...Ph........h..p....F++p.............-...s...shTIuMLl-SA+AG+WLY............................................................... 0 5 17 48 +7206 PF07378 FlbT Flagellar protein FlbT Moxon SJ anon Pfam-B_20574 (release 10.0) Family This family consists of several FlbT proteins. FlbT is a post-transcriptional regulator of flagellin. FlbT is associated with the 5' untranslated region (UTR) of fljK (25 kDa flagellin) mRNA and that this association requires a predicted loop structure in the transcript. Mutations within this loop abolish FlbT association and result in increased mRNA stability. It is therefore thought that FlbT promotes the degradation of flagellin mRNA by associating with the 5' UTR [1]. 19.50 19.50 20.80 25.10 19.30 19.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.38 0.71 -4.28 49 266 2009-01-15 18:05:59 2003-09-17 14:07:26 6 2 220 0 105 208 18 123.60 35 80.56 CHANGED L+lpL+PtERlllNGAVlcNuDR+spl.l.ss.sslLRp+Dllps--AsTPl+clYaslQhhhhs.sss.pphps.hhptlppLh.hhssscststltpssctlhsuchYcALKtlRsLlshEpcllu .......L+lsL+ssERlhINGAVlc.u.D.R.+ssLplhNc.AshLhEpclLpPE-AsTPlRplYFssQhMLlt.ssttcp.stshahphlptlhthhpss-hhstLctlsphVtsGchacALKslRsLhshEtclh.s................. 0 27 57 72 +7207 PF07379 DUF1494 Protein of unknown function (DUF1494) Moxon SJ anon Pfam-B_20601 (release 10.0) Family This family consists of several bacterial proteins of around 175 residues in length. Members of this family seem to be found exclusively in Chlamydia species. The function of this family is unknown. 23.00 23.00 24.30 24.30 22.70 22.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.11 0.71 -4.34 6 41 2009-01-15 18:05:59 2003-09-17 14:09:44 6 1 37 0 8 18 0 165.00 56 93.80 CHANGED pKRuFLLhElLVShTLlALLhGsLGFWpR+hasSpKccE+lY+TFLpEshAYKpLRTlF.hoTSpIE-hPGhLhShlFDRGVYRDP-LAGtVtGSLaYcppptRL-L.IpS.Rpcu+.ETh.LhcpV.pV-hVshR.....ppt.uchP-RVhhsl+R+sst.s.RsLoY.FAlG+ ..pKRuFL.L.ELLIuhsLIuLLLGoLGaWpR+IahSpKcKE+VY+hFLpES+sY+hLRss................F....hSThuh....Esp.t.hlFS..FDRGVYsDP+LAGsVpGoLHYDshsQclpLhlsS.R-cu+.Eph.LhoHVhph-hpshR......pu.sEhP-RlhLTlsR+ssAhPPRTLoY.FAVG+.............................. 0 2 3 7 +7208 PF07380 Pneumo_M2 Pneumovirus M2 protein Moxon SJ anon Pfam-B_20478 (release 10.0) Family This family consists of several Pneumovirus M2 proteins. The M2-1 protein of respiratory syncytial virus (RSV) is a transcription processivity factor that is essential for virus replication [1]. 25.00 25.00 73.20 72.80 22.90 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.11 0.72 -3.99 3 21 2009-01-15 18:05:59 2003-09-17 14:14:51 6 1 7 0 0 21 0 84.50 67 96.47 CHANGED MTpPKIMILPDKYPCSISSILIoSEscVssaNHKNlLpFNQNphsNHMYS.NphFDEIHWTSQELIDslQpFLQHLGIsEDIYTIYILV MshPKIMILPDKYPCSIoSILIoScscVshaNpKNsL.FNQNp.sNHhYs.Np.FsEIHWTSQ-LIDssQpFLQHLGIs-DIYTIYILV........ 0 0 0 0 +7209 PF07381 DUF1495 Winged helix DNA-binding domain (DUF1495) Moxon SJ, Bateman A anon Pfam-B_20509 (release 10.0) Domain This family consists of several hypothetical archaeal proteins of around 110 residues in length. The structure of this domain possesses a winged helix DNA-binding domain suggesting these proteins are bacterial transcription factors. 21.60 21.60 21.60 22.90 21.40 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.84 0.72 -4.31 11 58 2012-10-04 14:01:12 2003-09-17 14:17:45 6 2 37 0 41 59 0 86.40 40 74.89 CHANGED Ih+SLp+SclR+KILhaLhchhPpshYls-loRcV+ossoNVpGuLcGhGpRYsu-pSLlpLGLV.....pptcsGhK..hYplTc....hG+pls-hL ...............llRSLp+SclR++lLhaLhch...aPpssYluEIuRtltuDsoNVhGuLtGhG.sRYsucpSLltLGLV....pphpps.GhK...hY+lT-....hG+pls-h................ 0 11 13 30 +7210 PF07382 HC2 Histone H1-like nucleoprotein HC2 Vella Briffa B anon Pfam-B_20179 (release 10.0) Family This family contains the bacterial histone H1-like nucleoprotein HC2 (approximately 200 residues long), which seems to be found mostly in Chlamydia. HC2 functions in DNA condensation, although it has been suggested that it also has other roles [1]. 40.00 40.00 40.10 40.00 39.90 39.90 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.98 0.71 -12.70 0.71 -4.44 8 284 2009-01-15 18:05:59 2003-09-17 14:20:42 6 26 180 0 92 272 95 148.50 44 66.11 CHANGED MlssQK..K+SuKKTAuK..AVRKP.......AKKAAAKKss....sRKsAAKKssARKTssKKsVAtKpsutKtss......+KssAp+hsAtK....................hss+KsVAKKssAKKssAKKssA+KTVAKK.ssA+KsAAKKAs.AKKAsARKsA.A+KAlAK+Asu.............C+KpH+HoAACKRVsoSuAst.tsGuKo+lps.....AHuWRpQLhKhsoR ............................................................tc...s.s...s+.ss..s+K.ss.......AK.K..s.s..s.KKss.......s+K.s.A.sKK..ss...A+Ks.s.s.tKts...A.t..+.p....s..s.sKK.s.....s......AKK...........s.s....s..+K.ss.A.KK.s.s.A.+.K...........ssAKKs.s.A.K.K.s.s.AKK.su..AK.........Ks.s.A+K.s..sAKK.......s.s.A.+Kss.AK....Kss..A.+K.s.s.A.+....K..ss.s.cK..s.s...Ahcss..............................hc.s..s.................................................................................................................. 0 19 45 71 +7211 PF07383 DUF1496 Protein of unknown function (DUF1496) Moxon SJ anon Pfam-B_20554 (release 10.0) Family This family consists of several bacterial proteins of around 90 residues in length. Members of this family seem to be found exclusively in the Orders Vibrionales and Enterobacteriales. The function of this family is unknown. 20.20 20.20 20.30 22.10 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.95 0.72 -4.71 21 610 2009-01-15 18:05:59 2003-09-17 14:33:02 7 1 598 0 48 194 3 57.70 63 62.58 CHANGED pshssuspsspp.......RsCaYpDpsYSpGAVIcl-GllLpCst-schpoNusLhWhplpp ...................EVaSouGQSuQPC.....hQCCVYQDQNYSEGAVIKsE.GlLLQ.CQR......D-KTlSTNPLlWRRVK.P............. 0 3 12 29 +7212 PF07384 DUF1497 Protein of unknown function (DUF1497) Moxon SJ anon Pfam-B_20585 (release 10.0) Family This family consists of several phage and bacterial proteins of around 59 residues in length. Members of this family seem to be found exclusively in Lactococcus lactis and the bacteriophages that infect this organism. The function of this family is unknown. 20.00 20.00 22.50 107.80 19.90 19.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.70 0.72 -3.80 2 15 2009-01-15 18:05:59 2003-09-17 14:35:51 6 2 14 0 1 8 0 59.10 92 83.98 CHANGED MGYYDT+NEARRISKLASQNISSEQsKKEFELDpQSKFNQEMQAEFHE+IKKLGtKNGS MGYYDTKNEARRISKLASQNISSEQNKKEFELDSQSKFNQEMQAEFHERIKKLGEKNGS... 1 1 1 1 +7213 PF07385 DUF1498 Protein of unknown function (DUF1498) Moxon SJ anon Pfam-B_20610 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 225 residues in length. The function of this family is unknown. 24.40 24.40 24.40 25.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.38 0.70 -5.21 5 356 2012-10-10 13:59:34 2003-09-17 14:43:15 7 1 341 4 50 222 100 221.20 51 98.34 CHANGED MKRSpIN-IIcEu+AFhpoFsahLPPFAYaSP--WKQR-ssuhpEVhDuRLGWDITDFGpscFAchGLTLFTlRNGuucsh..uhsKsYAEKIMhlRcsQlTPMHaHs+KtEDIINRGGGsLVlELapuDc.sstlD-co-ITVuVDGpppThsAGupLKLcPGQSICLsPGLYHuFWAEuGs..VLlGEVSoVNDDLTDNhFLEPlGRFSsIEEDEPPlHLLCSDYcpa ........................MKRSpINpllccscthhppaphpLPPFAhaoPpcWpp.hst..s.....shpElhDhpLGWDITDFGpscFsphGLoLFTlRNGs..hpsh..sYsKsYAEKlhhl+csQloPMHFH......hp.K.....tEDIINRGG.GsLllcLasus.....ssphs............p...c......oslTVslDGpppThsAGspL+LpPGESIsLsPGlYHsFW.u..E..s..Gh.........VLlGEVSpVNDDppDNhFh.c.P.luRFssIEEDEsshhLLCs-Ysp.............. 0 10 26 40 +7214 PF07386 DUF1499 Protein of unknown function (DUF1499) Moxon SJ anon Pfam-B_20643 (release 10.0) Family This family consists of several hypothetical bacterial and plant proteins of around 125 residues in length. The function of this family is unknown. 22.60 22.60 22.80 23.50 21.80 22.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.51 0.71 -3.93 120 649 2009-01-15 18:05:59 2003-09-17 14:46:02 6 5 562 0 243 562 331 121.10 25 61.46 CHANGED tclssssssPsshps.ths............................................pt................tlpslt...............s.sspps........hstlhpll.......pphst.......shl.....ptsps..hlcApspotlhGFsDDlplpls.....ss.ts....hlclRSsSRlGtSDLGsNtcRlcpltstLp ....................................................................................................................h..hsss.tpPsph.s.t.......................................................pt..tltshh...........hp.sssps........hptlpph.h...........pshsh......................shl......h...ppsss......hlcspspStlh..uFs.....DDl..p...l.......plp.........sstt...........hlclRStSR.......h.....G.........h.u.DhGsNtcRlcphhptL.............................. 0 79 162 206 +7215 PF07387 Seadorna_VP7 Seadornavirus VP7 Moxon SJ anon Pfam-B_20657 (release 10.0) Family This family consists of several Seadornavirus specific VP7 proteins of around 305 residues in length. The function of this family is unknown. However, it appears to be distantly related to protein kinases. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.02 0.70 -5.59 5 11 2012-10-02 22:05:25 2003-09-17 14:48:06 6 1 4 0 0 107 14 302.00 56 97.28 CHANGED hscGQsplh+.tssFcVRFRpcGKlaclPLPNuoT+c......sIlGTIKYFTElMGLslVsNuh+LshsNVuDhp+asGNuTLstlKocl.GsLFLKKlsSLPlslssshY.aNK...YcVFARlHGIl+L+....ND.NNYcYGIILE+CYcIRlhssNhIlAGLKuLMDhHpEssssLHGDCNPpNLMCDKhGhLKLVDPusLlopsVsalN.ppYcuLTs-uEVsuFVhSCLplVucLRslcp--I...FIpchYLsLcosssDsNlpsGsRLTuL.stL-VsS.....DhlStlsMhPFluLLNslcYYslsDllShlpp-lDsEs-V ......................................................................hppGphsIhR...Gs.FclRhRphs+sasMPLPNATosc......NFLDCIKFITEpVGFDYVSsGFKLs.ANVsDFQHLNGNSTLllGKTcI.GPLILKKlRSLPCC..NDsLF.+NK...aRILARM..HGILRLK....NDhNuaKYGVI..LE+C.YKP.pI..N..FSNFl.T.A...I.sD.Lps..FHSu.s.sa.hLHGDsNP-NIMSDusGYLKLVDPV..C.L....L...E.NQVN.MVN.I-YEuLTQ.-AEKKV..F..lKSLLpLVE+QLSAsh-EI........YVsLcEsNPSFN.LpsGh+LTDLLsslDlaNsspWK.h.lNH+PMhPplulLNDLTYYchsDVpshlT-sLsDEDDV............................................................................ 1 0 0 0 +7216 PF07388 A-2_8-polyST Alpha-2,8-polysialyltransferase (POLYST) Vella Briffa B anon Pfam-B_20294 (release 10.0) Family This family contains the bacterial enzyme alpha-2,8-polysialyltransferase (EC:2.4.99.-) (approximately 500 residues long). This catalyses the polycondensation of alpha-2,8-linked sialic acid required for the synthesis of polysialic acid (PSA) [1]. 22.70 22.70 23.10 26.90 22.40 22.60 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.65 0.70 -6.12 3 54 2009-01-15 18:05:59 2003-09-17 14:48:52 6 1 43 0 2 27 0 403.30 53 97.46 CHANGED M.....LKKIRKALFpPKKFFQDS.WFsTS.......LFsLpsP+sNLFIISsLGQLNQAcSLIKhQKL+NNLLVILYToKNhKMPKLIpcSsNKcLFSShYLFELPRcPNslSPKKlLYIYRuYKKILpsIQPAHLYhMSFAGHYShLISLAKKpNITTHLIEEGTGTY.....APLLESFoY+PTKhEphaIGNNLplKGYhD.KFDILHVsFPEYAKKIFNAKKYaRFFAHuGGISoSssIANLQ+KYRIS+NDYIFVSQRYPISDELYYKoIVETLNpISLpIcGKIFIKLHPKEMtNKYIMSLFLNMVcINPR.LVVINEPPFLIEPLIYLTsPKGIIGLASSSLVYTPLLSPoTQCLSIGpIIIcLIp+pupQENTshIcEHLEIlKpFDFIKILSDlpssIsss.FKTEEThEhLLKSAEaAYKsKNaaQAIFYWQLASpNslolLGaKuLWYYNAL.pVKQ.YKMcY.-I.YIDpISlsFHDKDKhhWppIKp.aa...KRIhpsc .......LKKl+KhhhpPhtFFpDS.WhhsS..........Lp..ppNlFlISNLGQLppspShlph.phpsNhLllLhTpKNhcMPKllhpphNKpLFpShhlh.lPppPNsho.KKllahYpsYKhll...pstchYhhSastHYuhhl.lhKKpNIpspLI-EGTGTY............sPllp...Yp...........hIhNsl.l.htY.D.+F-.lassFPphhKKhFNApKasc.FttAsulp.sspIsNl.+KYpIo+sDhIassQ+Y.IpcsLahcSllpIL.pIsh..pu+IFIK.HPKE..pp.l.ul.hshhc..hpsR.lllIsEPsFLIEPlIhhsp.KtlIGLsSSSLlYsPLlS.psQshSIu.LhIpLhpp.p....cshphlp.Hh.-IlKpFs.lpILsD...sIos................................................................................................................................... 0 1 1 1 +7217 PF07389 DUF1500 Protein of unknown function (DUF1500) Moxon SJ anon Pfam-B_20659 (release 10.0) Family This family consists of several Orthopoxvirus specific proteins of around 100 residues in length. The function of this family is unknown. 25.00 25.00 35.50 34.80 17.20 15.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.45 0.72 -3.99 2 38 2009-01-15 18:05:59 2003-09-17 14:49:50 7 1 17 0 0 32 0 86.80 73 58.82 CHANGED MSSSVDVDIYDAVRsFLLRaYYsKRFIVYGhSNAILHNIYRLFTRCAVIPFD.IVRhMPNESpVpQWVhDTLNGIhMNE+DVuVsVGTGlLFMEMFF..h ......................MSSSVDVDIYDAVRsFLLRHYYsKRFIVYGRSNAILHNIYRLFTRCAVIPFDDIVRT.MPNES....RVKQWVhDTLNGIMMNE+DloVsVGTGlhFMEMFF...DY.. 0 0 0 0 +7218 PF07390 P30 Mycoplasma P30 protein Moxon SJ anon Pfam-B_20617 (release 10.0) Family This family consists of several P30 proteins which seem to be specific to Mycoplasma agalactiae. P30 is a 30-kDa immunodominant antigen and is known to be a transmembrane protein [1]. 18.70 18.70 19.20 19.90 18.30 18.10 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.59 0.70 -4.88 2 12 2009-01-15 18:05:59 2003-09-17 14:56:20 6 2 6 0 1 14 0 189.00 57 98.69 CHANGED MKLKQLLNLGTALTATFSIPFVAAKCSEDDKKEKVTKPKNEPTKPVDNSKTNDNSNEMVGETNLSNSINSSNSSTQNHFGAETNAKESPALNDLYSENPATPI.p.EKGIKESSEGSKNEGDKVIAGKEAIYKDIDFDISKVKITIDKKDLKDEDLISPKKGSHKQLFFNTYKDKTKVSGKLEKDQKPWGGIAIGSVTGLPKNYSIANADSPLYISKKDKKGTAKPNGFVNVEKDGGNLKIKFRFFKFNKGsNSTVSTKVYEAIIS .....MKhK.LLsLGTsLTATFSIPFVAAKCuEsDKKEKhpKPhpEPsKPs-NocTsDNosEhs.G.psN.s.sS.N.SsNut.......................Nssss..p.EKtIKEoS-u.Kp-G-KV.stpcshYKDIDFDhSKlKIhIsKKDlKDEDLIssKpGspKQlFFsThp.tTplpGKh..tphPWtGltIGoVTGLPcsYSIuss-sPla...+s+KGphpssGFVNVEK-GspLKIKFRFFKaNKGsssTVSTpVYEAIIS................................. 0 0 1 1 +7219 PF07391 NPR NPR nonapeptide repeat (2 copies) Bateman A anon Marshall M Repeat This nine residue repeat which I have called NPR after NonaPeptide Repeat. It is found in two malarial proteins and has the consensus EEhhEEhhP where h stands for a hydrophobic amino acid. 25.00 0.00 115.20 0.60 17.30 -999999.99 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.70 0.73 -6.93 0.73 -3.28 73 570 2009-09-16 13:04:07 2003-09-17 15:05:43 6 2 2 0 555 770 0 16.90 67 98.77 CHANGED EllEEllPEEllEEVlP ........ElVEEVlPEElVEEVlP... 0 555 555 555 +7220 PF07392 P19Arf_N Cyclin-dependent kinase inhibitor 2a p19Arf N-terminus Vella Briffa B anon Pfam-B_20449 (release 10.0) Family This family represents the N-terminus (approximately 50 residues) of cyclin-dependent kinase inhibitor 2a p19Arf, which seems to be restricted to mammals. This is a tumour-suppressor protein that has been shown to inhibit the growth of human tumour cells lacking functional p53 by inducing a transient G2 arrest and subsequently apoptosis [1]. 25.00 25.00 45.60 45.60 19.00 18.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.45 0.72 -3.80 6 33 2009-01-15 18:05:59 2003-09-17 15:41:38 7 1 25 1 7 29 0 50.20 71 51.22 CHANGED RFlVTVRI..RRAsRPP+VRlFVVphsRuupctoAssspAsVAhVLhLhRppR ..RFLVTlRI..RRACGPPRVRVFVVHIPRhAGEWAAPGA.AAVALVLMLlRSQR 0 1 1 1 +7221 PF07393 Sec10 Exocyst complex component Sec10 Vella Briffa B anon Pfam-B_20545 (release 10.0) Family This family contains the Sec10 component (approximately 650 residues long) of the eukaryotic exocyst complex, which specifically affects the synthesis and delivery of secretory and basolateral plasma membrane proteins [1]. 21.80 21.80 22.10 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 711 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.36 0.70 -12.95 0.70 -6.35 26 605 2012-10-02 15:56:29 2003-09-17 16:31:31 6 17 278 0 437 610 1 516.80 19 80.29 CHANGED lppsh-sFspLDpplss................luptssclGcpLcphspp+pp........tlcut.Llpaapchhspup..............................t.LppL.cp.ssspthhcsAplhppLhtlupcl..........................................sssttppspppI-pas-phEpcLLcpFssuY+cp-...hppMpchApl...LppFNGGs....sslphFlsp+phFh.pppph......p.sh......h..l.s..sspt..hh.pshpslhsclpsslppEupIIpcVF....sssppVhphFlp+lappplpphlphlLccspsh...............................SpLuYLRsLpshauhstpLsccLpsa.hts.p...........................................hsshL-pp..hpclF.sal...spYh-pEp+sLcphhtshltcFs.ph....ptp..................htph.st..t.h.........................ssp.t.........spsp.sphsphtpsphp.sppp..........................................................t....phpttcshholshshshl+tstEulsRshpLsp.....sscsscsshslhslLLphlhcpYlcsuL-.ss.........................hthsstc.tppss.ssLh.aLpslphsspIlpLhsthhps.ll.Phlsssssh+pphhptppphhpphEtplsthlpcslcslhsplphlLu.cQKKsDFpPpssshs.......................h..psTpsCtplsshL.spltpthstsls..up........................................................................NLcsFLoElGhclaphLlcHh++apVss.sGGlhlppDlspYpshlcsa....plsplsppFchL+pLusLalV.pP-sLpplsptsth...........................sthshp.lppalppRsDapphthssth ................................................................................................................................................................................................h.........................................................................p.t...............t..Lh.hh.th.t......................................................h......tt...........ptu.hh...p..pL...h.u...pp...................................................................th....h..t.ltth.p.hEpthlppFpt.......t........................ptt..............p....tth.tphuth.....L.thp.tht...............t.s.hth....alp..pp...............................................................................................p.t.hhpth..h.hpp..t..ltthF..............s..........lh.h.hhpp.lht........lt.hhp..hhp....p.t.................................................................................................s.......a..lp.l.......h...t........h...t.h..ppl.............................................................................................t..hpp.......h.pha.....ah....t.Yht.E.t.hpp..t..h.pa............................................................................................................................................................................................................................................................................................................................................................................t.pt.h.s.p.hshphlp.ht...tuhtRsh.h..................................tph..tth.tlh...........hllp.h..tal.t.uh-.....................................t.tt........t..t...t.t...sh...ah.hlp.ss.lhthhp.........hpp.lh...h.h.....sp.ph..............h.......ttp......pth.p.h-.thsthlp...t.............slsshht..hph.hht.tQt..tpD..apP..ttt...........................................................................o.ss.tlsthl.p.t....hp..hhtshs....tp..........................................................................................s.hp.sh.tE.......lu.phhphlh.cHh.pp.th.ss..Guhh.hh..........pD...h...st...Yhphh.p.p.h.......................t.......l...haphLptlsplhll...ts.p.thpphh.p...t................................t.hp.p.hhtahphRtDh........t........................................................................... 0 164 262 375 +7222 PF07394 DUF1501 Protein of unknown function (DUF1501) Vella Briffa B, Studholme DJ anon Pfam-B_20578 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 400 residues long. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 392 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.32 0.70 -5.63 51 1229 2012-10-03 20:55:17 2003-09-17 16:41:31 7 11 455 0 491 1424 2193 366.90 22 81.41 CHANGED AKplIhLahsGGPSpl-hFD.KPphpphpspsh......................................thttpGpsGh.lu-hhPch..uphsDc....lsll+Shhscths...Hs.AthhhpTGtt................shPohGuWhsauLGsts..psLPsalshst.......tth.tsuspsausGFLsutapuhsh.psssss..............lhslsssss.lstppppptlshlsphspphh......ppsscsphts+ltpaEhAh+....Mpspss-shDlssEsppshchYGhss...t..tt...................FuppCLhARRLlEcGVRFlplhps.......G..........WDpHs.....slppshspts...tplDpshAALlpDLcp+GhL--TLVlasuEFGRTPh.........pss.........tGRDHpspuFohahAGGGl+uGhsaGpTD.....-huhssscss..hplpDlaAThL+hhGlDcpclsathtGc.h+Lss...pucllc-ll .......................................................................................................................................................................................................................................+tllhlahtGGssthshhs..ps.p..h..hh.ttpsh....................................................u....h.h.s.p...h..hsth..........sphscc..............hslh.u.h.....s..phhs..........cs..u.th.hhts..G.......................h..tsh..h...h..t...h.sstp...............shPshhhht.......................t..sh..ssp..ht....s..G.a...Lsp..hh....ps...h.h..tstsss.............................................hhsl.ss.st...s...h.s...h....t...t...t.tpt..h.s.h..h..t...t....h...st...............tt.ts...s.p.h...t..s.h.ht..t...h.p..h.uhp......................hp......s...t....h.p...s.h..c.l.s.p...-.s..p...t..h..h.p.h.h.G.st.....................................................................hu.t..p..s..l...h...A......p.c......h....h...c..p..G....s..+..h..l.plt..hs..............G..............WDs..Hs.................s.hs......t.....h...sp.hh..................tplDpuluALh....pD...Lp......p...........p......G......h............h.......-.......c......T......lVlhhuEFGRTst........................tN.us....................tG...p.....D......H.spuh....s....h.......h....l....sG...G.......ul....+............G.......G.h.shG..p.s.s........p.h....u...h....t..........t..........................c...lhss.hh.h..hshs.p.t.h.h.h.t.h.up.htl.......st........................................................................................................................... 1 299 398 453 +7223 PF07395 Mig-14 Mig-14 Vella Briffa B anon Pfam-B_20642 (release 10.0) Family This family contains a number of bacterial mig-14 proteins (approximately 270 residues long). In Salmonella, mig-14 contributes to resistance to antimicrobial peptides, although the mechanism is not fully understood [1]. 19.70 19.70 20.20 19.90 19.50 19.60 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.55 0.70 -5.38 5 242 2012-10-02 22:59:21 2003-09-17 16:48:01 6 3 223 0 34 157 10 250.40 50 88.21 CHANGED HPLVV-pLu-huuIPVRYLuWEpuGElKAAIssWGRpLALSKDsLKRtGKKuLFDLGNAEIILPsAuDs+ssLRH+sRYLSALNcspIssL+...AQKEQLAhARsPEDFSKKFRYNQRRElRLlEEAGGsVRslu-FSuuElAulYl-LFpRRWG..FPATGA-chAcVlEpLR-LLhGSVLaLNspPIAIQlVYRuEAPpWVSlEYlNGGV..DPETRcFSPGSVLSaLNTQuAWEDARuhsKsLRFSFG..RADREYKDRWCsPsPVaQ..s ..........................................................................................................................................................HPDlV-ahhc+ash+F+F.F.+.acccscIKGA.......YFlsNDppl...uh.hsRRsFPLusDEILlPh...APDl..R..saLPcRTp+LSAL+pspIpNAha...+lspK+QsC...llK.......E...s..FSsKFc+sRRpEhp+FlcpGGoV+sV..u-hS.ucE...LspIa...l-LFc.....pRaGt..p.sshs.A-pLAs.FFSpL+c....LL.....FGalLal-shPCAhDlVLKuESphsVYaDssNGul..cs..EsRsLSPGSlLMWLNlscA+caCpcppKp.LhFShG..+P..-..hEYKchWssPh.sGp.s............................ 0 1 6 22 +7224 PF07396 Porin_O_P Phosphate-selective porin O and P Vella Briffa B anon Pfam-B_20808 (release 10.0) Family This family represents a conserved region approximately 400 residues long within the bacterial phosphate-selective porins O and P. These are anion-specific porins, the binding site of which has a higher affinity for phosphate than chloride ions. Porin O has a higher affinity for polyphosphates, while porin P has a higher affinity for orthophosphate [1]. In P. aeruginosa, porin O was found to be expressed only under phosphate-starvation conditions during the stationary growth phase [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.16 0.70 -5.59 34 1073 2012-10-03 17:14:37 2003-09-17 17:09:15 6 6 497 3 306 1050 285 310.00 14 76.92 CHANGED hplthp.hpush.phs...tps.tptpsusF..ph+ch+LplpGchsc.......chsYphp.clspssssts..s.ht..............h-hsalsach...scp..hslphG+.ts.aGthEh.sssslthhchu.....-hhs............hh..sshhtGlsssaphssspp.lphplh............ss..pssshsctassp...............thttstsshshshsasushh.sphhphchuhshtppActpssp.hlshGsthshs.......phth.hchhhu..slptphhhsphhp...........ttthpshpapuhhsphsYphsschpha.....................spGhaEsssthcthshttt.............hthp+shsahsGlpaaPh..tpsL+halsYltpchchpptsp..s....ssstlslth.Yph ..............................................................................................h...............ht.......tt...t..t....p....p.s..s.h......ph+c.h+lthp.G..p...hh.p........h.tap..h..p..hc...h......sps..s..s..............................................................lp-Aalpa..ph.........pp.....hplphGphphs..a.....uhp.pht.osp....h.h..h..pps..................h.p..........................................t.h........s..h...t.h..ths...h..t...h..p..h..t..tt...t..h.......h....t..h.shh.............................ss....t..s...s.st.s.....t..h.s..s..p....................................................hth.t.h.....th..............t...............................................................t....ht..........h...t.......t.......t.s.t.....t....t..................h........s........t.t......................hhshph..hhht....ht..h..p.schh..hhthtt..............................h.....s..h....p..h......u.h..asphsahh..h..s....p.....h..........................................h.......h.c...hhh..ths..hphtt................................................ttp.p...th.thGhpaa........th.....ch.hsa.....h....t..p.........................t.....h...h............................................................................................................................................................................................................................................ 0 111 228 276 +7225 PF07397 DUF1502 Repeat of unknown function (DUF1502) Moxon SJ anon Pfam-B_20836 (release 10.0) Repeat This family consists of a number of repeats of around 34 residues in length. Members of this family seem to be found exclusively in three hypothetical Murid herpesvirus 4 proteins. The function of this family is unknown. 19.00 19.00 19.90 19.40 18.10 17.10 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.02 0.72 -4.32 2 25 2009-01-15 18:05:59 2003-09-18 10:17:58 6 3 3 0 1 25 0 32.90 84 31.78 CHANGED RSEGAGCPARGAGPPAWGAts.ppsutsRssut. ..RSEGAGCPARGAG.PPAWG..AGPPRRDGGNRGDGAP....... 0 0 1 1 +7226 PF07398 MDMPI_C MDMPI C-terminal domain Moxon SJ, Bateman A anon Pfam-B_20685 (release 10.0) Domain This domain is found at the C-terminus of the mycothiol maleylpyruvate isomerase enzyme (MDMPI). The structure of this protein has been solved [1]. This domain appears weakly similar to Pfam:PF08608. 27.80 27.80 27.80 27.80 27.70 27.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -10.04 0.72 -3.41 68 478 2012-10-02 14:08:01 2003-09-18 11:25:45 6 2 187 0 169 384 19 92.60 21 37.19 CHANGED chsscsls...chlsthssphht...........thsh.hps.tshphpsss..........................................ssVpGssssllhh..hsGRhssss......lpssG-tshhst ..........................................lssDtls.....chLthh.sspht.....................hhhthpsstslpl+so-ss.....................s...st............................sssslpGsutsLlhhhsGRhsssst......lpssGDtslht.t.............. 0 61 131 160 +7227 PF07399 DUF1504 Protein of unknown function (DUF1504) Moxon SJ anon Pfam-B_20945 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 440 residues in length. The function of this family is unknown. 25.00 25.00 58.90 58.80 18.90 18.80 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.56 0.70 -5.82 4 138 2012-10-02 15:12:49 2003-09-18 11:28:01 6 2 121 0 52 121 122 416.10 45 97.30 CHANGED Mhhss...ulplsuAlLFhsAllHTFhT.ahpRLscop.++..............utLa+lLucVElVFshWAlsLhhhhhhTEGhphuhtYhDSRNas.shFlhAIhllhtoRPIlaFuttVlphlA+.lshsushuaaaTlhhhsPLLusFlpEsuAMhlAAhhLpcphas.osS.+htYAThGlLFsNISIGGhhosFuu.slLhlhsthpW-pSFhLoHFuWKAlLAIhlssshhhhLFRKEh++hPcT.ss.......tssus-+VPshIIslpllh.VGsVlhu+tsslFhG.LFLFaLGFt+hh.hYQD.lhLpcshhVuLFhAGLVlhGs.QpWWl.slhhGMos.uhhhsuhhLohFhDNAhlsYLsp.lsshoDsa+YhlVAGuhouGGLTlluNhPN.sGhhILRspF.suoIH.ltLhLuALsPolIuhhsFhhL ...............................slplhuAlLFslAllHTFhs.aFp+Lu....+c....t.c........+..................................................................uuLaHLLuEVElVFGhWAhsLhhhhhh.....h..pG.hpt...........uhsYh-.S.R.NaTEPLFVhslMllAuSRPILphspphlptlA+.....lsh....psshuhaaslhshsPLhGSFITEPAAMTlAAllLscphaph.ssss+hpYuTLGlLFVNlSIGGsLTuaAAPPVLMVAspapWssuFMhspFGWKAslAlhlssslhhhlhR+c.h..t.p....h..s...ps..hts............t.hpcp.....lP.hhlhhlHl.lhLlG.l.Vl.h.A.HaPsl.FlG.LFLFFLGFspAhttYQs..LhL+cuLhVGFFLAGLVlhGGLQpWWLpslltuhsshslahGAhsLTAhsDNAAlTYLuS.L.lsuho....................Ds....a+YhlVAGAVoGGGLTVIANAPNPAGhuIL+spFscsulushtLhLuALsPTlluhhsFhhh........... 0 18 38 46 +7228 PF07400 IL11 Interleukin 11 Vella Briffa B anon Pfam-B_20854 (release 10.0) Family This family contains interleukin 11 (approximately 200 residues long). This is a secreted protein that stimulates megakaryocytopoiesis, resulting in increased production of platelets, as well as activating osteoclasts, inhibiting epithelial cell proliferation and apoptosis, and inhibiting macrophage mediator production. These functions may be particularly important in mediating the hematopoietic, osseous and mucosal protective effects of interleukin 11 [1]. Family members seem to be restricted to mammals. 25.00 25.00 27.70 27.00 20.30 19.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.28 0.71 -4.78 3 55 2012-10-02 01:28:15 2003-09-18 11:38:20 6 2 31 0 23 43 0 175.50 46 79.67 CHANGED MNCVCRLVLVVLSLWPDTAVAPGPPPGSPRVSPDPRAELDSTVLLTRSLLADTRQLAAQLRDKFPADGDHNLDSLPTLAMSAGALGALQLPGVLTRLRADLLSYLRHVQWLRRAGGSSLKTLEPELGTLQARLDRLLRRLQLLMSRLALPQsPPDPPAPPLAPPSSAWGGIRAAHAILGGLHLTLDWAVRGLLLLKTRL .................................................................................h-hDphs..T+pLLtsT+pLstph..+D.+.FsuD.u.-...Hs.LDSLPsLu.hS.AusLuu......LQLsusLopL+uDLhSYh+HVpWL+RAuss.SL+oh-PELGslpu+Lc+LL++LQLL.MuRL..sLPQssPssPusPLsPPuSs.WsslpuuHtILttL+LhhDWAsRuLlhLKs+........................ 0 1 4 11 +7229 PF07401 Lenti_VIF_2 Bovine Lentivirus VIF protein Moxon SJ anon Pfam-B_21067 (release 10.0) Family This family consists of several Lentivirus viral infectivity factor (VIF) proteins. VIF is known to be essential for ability of cell-free virus preparation to infect cells [1]. Members of this family are specific to Bovine immunodeficiency virus (BIV) and Jembrana disease virus which also infects cattle. 22.00 22.00 22.60 23.20 21.90 21.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.38 0.71 -5.18 2 22 2009-01-15 18:05:59 2003-09-18 11:45:31 7 1 15 0 0 22 0 133.50 40 59.32 CHANGED MERTlQSshGRRRGSSsRt+tpsslISsPuYAlaPsPpaRYPRWEFVhps.YS.TAplpKtclllTYpYAlWtR.WpIpTGFhD.uhhMTPAGTHTTtElscLDLFWlRYs.C.Hc.P.Wh-hLhGphs.+hSCRRshQAuVlopT..HoLQRLAuLpLssNtsLCWYPlGplscsoPLWhpFooGKEPTIQQLSGHP ...................................t.t.p...hhhh.hhtt.t.hctcch.ah.........................................................................................................aa....ltlCSCpKc+acI..REFhlG+HR.WDLCKSChQGEIV+pTcP+SLQRLALL+lscsHVFQlMPLWRARRs......................sh.h.h.ppsh......... 0 0 0 0 +7230 PF07402 Herpes_U26 Human herpesvirus U26 protein Moxon SJ anon Pfam-B_21150 (release 10.0) Family This family consists of several Human herpesvirus U26 proteins of around 300 residues in length. The function of this family is unknown. 25.00 25.00 502.10 501.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.95 0.70 -5.12 2 6 2009-01-15 18:05:59 2003-09-18 11:51:38 6 1 6 0 0 7 0 293.00 61 99.15 CHANGED Mhplh.SF.hGLshGsVIPG..sF+hh.t+o.htQlsllhhhuhuhLLTa+Rhs.h.YpPhsDhKIlhLSLhs.phss.h.hVlh.hLhFSEhRLRhIlsRChhIhPoaS.AsahGhhlulhhK.p.hscYplLlTsh.lhPhsspYtahlpSptFhhsLQ+Y+PIhKssu.hphslKsllhahlpFLhLhhllWhGKhaLsh.pspHLFFLsVlpsshFhhplaphshCullslLuGlhhphChapllFEhFlGLGaSulhhplSpsltc+shasGDLLNhFaC.sshshaF MRRLTDSFILGLAKGAVIPGLYsFRMTEGRSsLtQIGVlITVAISFLLTFKRFDPRFYKPIGDFKIVFLSLMAsKLPShLSAVVMICLIFSEMRLRMILSRCVhIMPSYSPAVFTGhMVSLFFKSQMFDDYSVLlTsA.LLPhTlRYGWMIRSSGFLluLQKYRPILKSTSFREVDLKsLVKFTVEFLLLFTlLWIGKhFLSMPKSNHLFFLTVVNNVFFKLNVFKAAACAlVAILSGLMMNVCLYRIIFEAFlGLGFSSIMLsLSSDLKDRSFYAGDLLNGFFCLVVCCMYF 0 0 0 0 +7231 PF07403 DUF1505 Protein of unknown function (DUF1505) Moxon SJ anon Pfam-B_21179 (release 10.0) Family This family consists of several uncharacterised Caenorhabditis elegans proteins of around 115 resides in length. Members of this family contain 6 highly conserved cysteine residues. The function of this family is unknown. 25.00 25.00 69.40 67.10 19.40 19.20 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.08 0.71 -4.14 7 20 2009-01-15 18:05:59 2003-09-18 11:54:03 6 1 4 0 20 20 0 113.80 34 82.02 CHANGED Ms.hh.oVllhuh....slAhsssssSp..opsaspsppCh....Sscspsh.....pCphosshphppsSsCshSpYhhppsp...tssp..ss.NtVsQCopTPCsus-KlssDCtsAFu.-+lupIp .......Mp.hh.sVLlluh....hlAhsss..ssSphpospa.sp.ucpCt..p..sssspsh.....pCphsuTWchppsssCslSpYhKKshs......ssss....-PhNGVAQCTKTPCsuoEplssDCssAFu.-+lupI..... 0 7 7 20 +7232 PF07404 TEBP_beta Telomere-binding protein beta subunit (TEBP beta) Moxon SJ anon Pfam-B_20928 (release 10.0) Family This family consists of several telomere-binding protein beta subunits which appear to be specific to the family Oxytrichidae. Telomeres are specialised protein-DNA complexes that compose the ends of eukaryotic chromosomes. Telomeres protect chromosome termini from degradation and recombination and act together with telomerase to ensure complete genome replication. TEBP beta forms a complex with TEBP alpha and this complex is able to recognise and bind ssDNA to form a sequence-specific, telomeric nucleoprotein complex that caps the very 3' ends of chromosomes [1]. 20.70 20.70 20.90 424.90 18.50 20.60 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.33 0.70 -5.33 3 15 2012-10-03 20:18:03 2003-09-18 12:45:53 6 1 5 14 0 19 0 279.10 75 97.26 CHANGED MSKGQpA.QQQSAFKQLYTELFNNGGDFSKVSsNLKKPLKCYVKESYPHFLVTDGYFFVsPYFTKEAVsEFHuKFPNVNIVDLHDKVIVINNWSLELRRVNSAEVFTSYANLEARLIVHSFKPNLQERLNPTRYPVNLFRDDEFKTTIQHFRHQALQQSIAKNlKQENLPDISKVoGADAAGKKuKVDAGIVKASASKGDEFSDFSFKEGNTATLKIQDIFVQEKGKDALsKluDcsDVt......KVKGGAKGKuKAAoKSA.+GKKsSAKKuDouuSADVRKSVDKIVKYTPNKPSSRKETPQKSQSAPAAGKSSAKKTTTGSKKslPANPSPSGKKSTKTTDQMTMAQFKKYLDWHEKKKGGKTSSGGKVLGKRSAGKASATSG MSKuQ.s.QQQSAFKQLaTEhFN.GGDFuKVSpsLKKPLKsYVKESYPHFLVTDGYFFVpPaFTKEAVsEFHpKFPNVNIVDLHDKVIVINsWSLELRRVNSAEVFTSYANLEARLlVHSFKPNLQERLNPTRYPVNLaRDDEFKTTIQHFRHQALQQSlAKNlKQENlPDIuKVoGuD...KKuKVDAGIVRASsSKGDEFuDFSFKEGsTAsl+IQDIFVQEKGKDALp+ltsttps.......KV+GGA+GKtKAAoKSA..sKKssAtKt.s.tuADVRKSVDKIVKYTPNKPSSRKETPQKSQSsPA.GKSSAK+ThTGuKpplPANPSPSGKKSTKTTDQMTMAQFK+YL-WHEKKpsGKTSSGGKVLGKRSAGKASATSG. 0 0 0 0 +7233 PF07405 DUF1506 Protein of unknown function (DUF1506) Moxon SJ anon Pfam-B_20962 (release 10.0) Family This family consists of several bacterial proteins of around 130 residues in length. Members of this family seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 25.00 25.00 27.50 27.00 23.40 23.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.71 0.71 -4.15 2 112 2009-01-15 18:05:59 2003-09-18 12:48:34 6 1 27 0 8 63 0 123.40 63 98.52 CHANGED MNGVRKRLSDMSFRMINVFKDPpPL+FYKGTVVKLENDSSYQRVFDKNKYTEFAGVIIDI+PQELAlLYDSDMSDIQGYSKLYTYQDLNYELKDRISIuDLlYFEIFSIDSSIGYFTLVLKEFIWTN ...................................................MsGVRKRLuDMShRMINVFK.D.PpPL+FYKGsVVKLENDuSYQRlFDKNKYTEFtGVIIDI+PQELAhLYDSDhSDIQGYSKLYTYQDLNYELKDRISIuDL..VYFEI...F...SIDSS....IGYFTLVLKEFIWT......................... 0 4 5 5 +7234 PF07406 NICE-3 NICE-3 protein Moxon SJ anon Pfam-B_21003 (release 10.0) Family This family consists of several eukaryotic NICE-3 and related proteins. The gene coding for NICE-3 is part of the epidermal differentiation complex (EDC) which comprises a large number of genes that are of crucial importance for the maturation of the human epidermis [1]. The function of NICE-3 is unknown. 20.20 20.20 20.60 22.30 19.10 20.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.05 0.71 -4.79 6 138 2009-01-15 18:05:59 2003-09-18 12:56:57 6 5 89 0 77 153 0 165.80 46 65.51 CHANGED huSphp.LSGVslVLlhuaGsLlhllLFIFsKRQIMRFsl+SRRGPHVPlGHsAPKsL+cEIEtpLs+lQ+IpaEP+LLu......sDcs+alp...tssps..hhsYhYRMKAlDsl+-h-h.lpchutu.ophsGcslRuaLh.Lpspps.hpGscptLIDplhDhYE+ARHssstFGcsEYh+YpphLpcLsss .............................................p.LSGVsllllhuhGsLsh.....llLFIFsKRQIMRFsh+.S.RRGPH.VPlGps.AsKs.L+cE.I-hRLsclQc.Ip.aEPpLLs......s.-.c.s+hh.....ps..ppp....t.sYhY..RM.KAlDsl+.s.E.hshpptspp.pphhGcshRuaLhpL+s....p.u.sPhcGs..ppsLIcplhDsYEpARaGss..sFGpsEYhcYpctLpcLss.s..................................... 1 23 28 50 +7235 PF07407 Seadorna_VP6 Seadornavirus VP6 protein Moxon SJ anon Pfam-B_21021 (release 10.0) Family This family consists of several VP6 proteins from the Banna virus as well as a related protein VP5 from the Kadipiro virus. Members of this family are typically of around 420 residues in length. The function of this family is unknown. 19.60 19.60 19.60 19.70 19.50 19.20 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.36 0.70 -5.72 2 17 2009-01-15 18:05:59 2003-09-18 13:01:11 6 1 9 0 2 13 0 319.60 46 81.38 CHANGED MMIALIQMKNMKCTLKVEETAENHELEGVSFDELsALREENAKLKpENEhLKTKlHRLESDWTTSDIVEKlELMDsQF-RIGKIMDKMREPMLFKRD-IELHGDLLARVEGLLRIKNERSEVEF-KDIQCIVGRYFSDEsKQRNLEKMIKoFEYDDIADTIALRLTaFIQDPGLRSIVYAMCKAAVLNQNYLNIEVQEIVDVTRQKYTHNARDDIDFYPMFTFDANVPEGVFDHIYKKHYLsPQSAALVHTLSHLDVNVDGsGIAhYHIGSATRFAECSVVYVDGRAYKPIRVMAEYAIFPTLPHEYKGRVEGLLLLHGGLAPITLVRVYHDVNVGGLVTGSIAASVSTLlRNCMLYSFDlYFTPNGlCINAVGNNNhVNIIDIsCCGRAFGKAPLDQGsWNRNKFMGHKHG+GSKCKQY .....................................................................phsALRpENu+LKpENEtL+sKlcRL..Eu-hppScllEKhpL.h.-s..pa-+lsKIMDK..h......Rc..shL......Fc+D-lcLcusLLARl-sLhRlK.cpsp........psllttYFs.DtscpcsL.chl+o.pas.hhcsht.Rlhh.IpsssLhshshthCthhhh.s.Nh.NlpspplhclstpKYpHsh.ss...Y.hhs..hplspthhDHIYtKH.ls.pshslhHhhSHLsVNVsGsslAhYHlGsusRhsEsSVl.l....-s..RhaKPl+s.u.YAIF.pLPpEhchRVpGLL.hHhGlsPITLVRsht-lp....shhloGslusSlosLh+shhL.ph-lhFs.pGlhlpssGspshsph...sCCtpAFu..................................................... 3 0 1 2 +7236 PF07408 DUF1507 Protein of unknown function (DUF1507) Moxon SJ anon Pfam-B_21047 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown. 25.00 25.00 63.00 62.80 18.70 17.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.97 0.72 -4.24 14 527 2009-01-15 18:05:59 2003-09-18 13:03:12 6 1 527 4 46 155 0 89.70 58 95.66 CHANGED sc.ths.pccAhtLLpuDA-KIh+LI+VQhDsLThPQCPLYEEVLDTQMFGLSRElDFAVRLGLI-cc-GKplLscLE+ELStLH-A.hpc ......p...hsh+ptAlp.LpcDA-+IL+LIKVQhDNLTlPpCPLYEEVLDTQMFGLo+EVDFAV+LGLl-cE-GKplhhcLE+ELSpLHEAFTp....... 0 12 29 38 +7237 PF07409 GP46 Phage protein GP46 Vella Briffa B anon Pfam-B_20855 (release 10.0) Family This family contains GP46 phage proteins (approximately 120 residues long). 22.50 22.50 22.70 23.70 22.00 22.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.53 0.71 -4.77 25 364 2009-01-15 18:05:59 2003-09-18 13:04:22 7 1 322 0 50 265 9 111.60 37 81.26 CHANGED ssLpsAVlISLFTDRRA.............tssD...s.sss-pRGWWGDoasss.....plGSRLWLLpRpKLTsc..shpcAcsYAcEALpWhl-DG....hApulsVpup...psphspLsLpVplshs......DGshhshsasstW ..................................sLppAVlISLFT.RcA..............tssD..............ss....s..hG.WWGDoa.s...........plGSRL.WLLp.RpK.hTsp...lhppActYAcEAL...pWll-DG....hsspIsVpsp.....pst....spLsLslslhps......cGs.h.h.h.pat............................... 0 11 23 39 +7238 PF07410 Phage_Gp111 Streptococcus thermophilus bacteriophage Gp111 protein Moxon SJ anon Pfam-B_20904 (release 10.0) Family This family consists of several Streptococcus thermophilus bacteriophage Gp111 proteins of around 110 residues in length. The function of this family is unknown. 25.00 25.00 26.40 25.50 23.60 23.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.43 0.72 -3.89 2 14 2009-01-15 18:05:59 2003-09-18 13:07:44 6 1 14 0 1 15 0 98.30 43 76.44 CHANGED MKpAWpIAKEAstKhGhKAhEahuEuLKMAWo.AK......tchpslcE+hhchG..cYo......hh.sLD.....pFNEl..hthphs.Yt.chhADhDs.thYhhhpKshcst ..MppAWcIAK-AscKFGGKAhEYluEuLKMAWutAKst...ssolschpslcp+h.pKsG..cYo......hh.s.LsauK-hp.............................................................................................................. 0 0 0 1 +7239 PF07411 DUF1508 Domain of unknown function (DUF1508) Moxon SJ anon Pfam-B_20853 (release 10.0) Domain This family represents a series of bacterial domains of unknown function of around 50 residues in length. Members of this family are often found as tandem repeats and in some cases represent the whole protein. All member proteins are described as being hypothetical. 20.00 20.00 20.00 20.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.23 0.72 -4.53 103 1558 2009-09-14 11:58:01 2003-09-18 13:16:35 7 9 909 14 301 816 131 48.60 40 69.96 CHANGED spsGcahFpLKAsNGplIu.oSEsYso+suscsGIpSV+pNusssshh-h ...........ussupahFs.LK.A.uNsplIs..oSEh.Yso+sSscpGIsSV+sNusppphh-.............. 0 71 186 255 +7240 PF07412 Geminin Geminin Vella Briffa B anon Pfam-B_20861 (release 10.0) Family This family contains the eukaryotic protein geminin (approximately 200 residues long). Geminin inhibits DNA replication by preventing the incorporation of MCM complex into prereplication complex, and is degraded during the mitotic phase of the cell cycle. It has been proposed that geminin inhibits DNA replication during S, G2, and M phases and that geminin destruction at the metaphase-anaphase transition permits replication in the succeeding cell cycle [1]. 21.10 21.10 21.20 21.50 20.70 21.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.43 0.71 -4.63 7 153 2009-01-15 18:05:59 2003-09-18 13:18:27 7 2 84 11 87 154 1 156.00 30 67.08 CHANGED MssSMKQpp..EpspENlKs........ussPRRTLKhIQPSAsGsLVGRpsE.sKulsKRKhWsspLsSpsspsts.Vt.Eps..ENcs..tslopEAaDLMIKEsPoupYWKEVAEcRRKALYEsLpENEKLHKEIEtKDpEIARLKpENcELtElApHVQYMA-MIERLoGcs.DNLEsLcs.ph......-sEptts-.p..--o- ..................................................................................................................................................................................................+pp..hs.s............t.....t....t.......pp....t...........p......tt...........pptp...........tth..pu....DLh.pp......pPsp......pYWK-lAEcRR+ALh-uLpENc+..LHc...clE...pKppE..Iu...pLK...cENccLtE...l.ApcspahAphl-cL...t...................................................t.................................... 0 21 27 52 +7241 PF07413 Herpes_UL37_2 Herpes_UL37; Betaherpesvirus immediate-early glycoprotein UL37 Moxon SJ anon Pfam-B_21151 (release 10.0) Family This family consists of several Betaherpesvirus immediate-early glycoprotein UL37 sequences. The human cytomegalovirus (HCMV) UL37 immediate-early regulatory protein is a type I integral membrane N-glycoprotein which traffics through the ER and the Golgi network [1]. 25.00 25.00 34.60 34.50 18.90 17.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.88 0.70 -5.61 6 101 2009-01-15 18:05:59 2003-09-18 13:33:48 6 1 24 0 0 73 0 276.10 58 79.43 CHANGED sshssslcCpYchshhpc+psps..tChhsChhNtsLlasGsClus+ssl.lNhsh.spuh+c..........t+sshL+lGlpYYhcGhhlRsllucst......psNsoplsGsltC....W.csssoGGplTLNhosp......RhlhsscsthptspWcustpcos.........sVhplLscpsphDhhFLpchCP+Lscc....h.+h+sspsp.pssVpscopsspslhssWspuWusWoKYuEht.h..hhsthshhhpscsRshuch....shlGlFhlsuGohslLsLFCsLShhpRRcll+Dh ..............GhGVSVRCTYHGTDlNlTSNsTSMNCpLNCTpNpTQIYNGPCsGsEu+LPLNVTF.pQSRRp..........WHSVMLpFGFQYHLEGWFPLRlLNESR......-hNVTEVpGElAC...FpNDTNlTsGQLsLNhTG+S..YVLRAlA+TS..PFESpV+W.EE.T.NsTsssos.....pNTsTVMphLspYAESDYIFLQDMCPRFL+R.....olKLoKNppppNsTFT...Gs...N.hToLPhWT.scCcGWpYWTTLShM.....W+NRRSALLRAKSRALGHW....ALLSICTVAAGSIALLSLFCILLIGLRRDLLEDF.... 0 0 0 0 +7243 PF07415 Herpes_LMP2 Gammaherpesvirus latent membrane protein (LMP2) protein Moxon SJ anon Pfam-B_21212 (release 10.0) Family This family consists of several Gammaherpesvirus latent membrane protein (LMP2) proteins. Epstein-Barr virus is a human Gammaherpesvirus that infects and establishes latency in B lymphocytes in vivo. The latent membrane protein 2 (LMP2) gene is expressed in latently infected B cells and encodes two protein isoforms, LMP2A and LMP2B, that are identical except for an additional N-terminal 119 aa cytoplasmic domain which is present in the LMP2A isoform. LMP2A is thought to play a key role in either the establishment or the maintenance of latency and/or the reactivation of productive infection from the latent state. The significance of LMP2B and its role in pathogenesis remain unclear [1]. 25.00 25.00 88.20 87.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.99 0.70 -5.90 3 276 2009-01-15 18:05:59 2003-09-18 13:47:27 6 1 6 2 0 98 0 230.90 53 99.99 CHANGED MuSLEMpPLG.AGuPuosGGPDGsEGuuNspYPSuFGSSssoPGP..PssEDh-us-csPPPY-us...sGsctGsYQPLGsQDP.SLYAGLGpsGGsG.LPPPPYSPRc-oSpHlYEEs+.cuoMsPsWLPVIsAPYLFWLAGIAASCFSASVSAlVsoTGLALSLLLLAALsNSYAAQpRKLLTKLTlLVAVVTFFAILLTWlVpPsPpNuIVFALLsAAuGLQuIYlLVMLLhLLhAYRRRWRRLoVCsGLLFLACsLLLIVDAIaQLSPLLGAMTVVALTLLLLAFlLWLSSPaGlGALGAALLTLAAALALLASLILG-LNLATMFLLMLLWTLVIILI...CSoFPtothLLoRWLLYALALLLLASALLAGGSILQT..huusoTEFFPsLFCMLLLIVAGILFILAILTEWGSGSKTYGPVFlCLSGLLTMsAGLVWLTLMpKVLLSAWILTAGCLIFFIGFhLFGVIRFCRhCCFCCLpLESs-RPsTsYcNsV ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +7244 PF07416 Crinivirus_P26 Crinivirus P26 protein Moxon SJ anon Pfam-B_21324 (release 10.0) Family This family consists of several Crinivirus P26 proteins which seem to be found exclusively in the Lettuce infectious yellows virus. The function of this family is unknown. 25.00 25.00 518.10 518.00 18.10 17.30 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.67 0.70 -4.84 2 5 2009-01-15 18:05:59 2003-09-18 13:50:21 6 1 2 0 0 5 0 227.00 99 100.00 CHANGED MNNFPEIFDDESTCDYDKEIDHQELSDTFWCLMDFISSKHGKSVADINSGMNTLINIRKsLNGSGKVVSITDSYNKTYFHSQRGLTNVDSRINIDILKhDFISIIDDLQIIFRGLIYKDKGFLDSADLLDLDKKTTTRKFQEYFNILKIKIIEKIGMTKTFHFNIDFRsTISPLDKQRKCSISSSHKKTNRLNDLNNYITYLNDNIVLTFRWKGVGFGGLSLNDIKI MNNFPEIFDDESTCDYDKEIDHQELSDTFWCLMDFISSKHGKSVADINSGMNTLINIRKSLNGSGKVVSITDSYNKTYFHSQRGLTNVDSRINIDILKIDFISIIDDLQIIFRGLIYKDKGFLDSADLLDLDKKTTTRKFQEYFNILKIKIIEKIGMTKTFHFNIDFRNTISPLDKQRKCSISSSHKKTNRLNDLNNYITYLNDNIVLTFRWKGVGFGGLSLNDIKI 0 0 0 0 +7245 PF07417 Crl Transcriptional regulator Crl Vella Briffa B anon Pfam-B_20900 (release 10.0) Family This family contains the bacterial transcriptional regulator Crl (approximately 130 residues long). This is a transcriptional regulator of the csgA curlin subunit gene for curli fibres that are found on the surface of certain bacteria [1]. 25.00 25.00 27.80 41.40 22.80 19.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.86 0.71 -4.42 17 658 2009-01-15 18:05:59 2003-09-18 13:59:10 7 1 648 2 53 209 5 124.10 67 97.53 CHANGED slsps.s+uR.LhpphsALGPYlREtQsc-spFFFDCLuVCVsschuPEcREFWGWWh-Lcsp-stFoYpYphGhasppGsWhstsl..pcstcclppThcsFHt+LtphLpp.hpLpLpsss-......schs ........TLPSGHPKSRLIKKFTALGPYIREGpCEDNRFFFDCLAVCVNVKPAPEhREFWGWWMELEAQ.EsRFTYsYQFGLFDKsG-WpuVslp-sEVVERLE+TLREFHEKLR-LLso.hpLcLEPADD....FpDEPVKL.. 0 4 13 33 +7246 PF07418 PCEMA1 Acidic phosphoprotein precursor PCEMA1 Moxon SJ anon Pfam-B_20971 (release 10.0) Family This family consists of several acidic phosphoprotein precursor PCEMA1 sequences which appear to be found exclusively in Plasmodium chabaudi. PCEMA1 is an antigen that is associated with the membrane of the infected erythrocyte throughout the entire intraerythrocytic cycle [1]. The exact function of this family is unclear. 21.40 12.50 31.70 12.80 18.30 12.40 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.05 0.70 -5.04 8 26 2009-01-15 18:05:59 2003-09-18 14:04:48 6 2 5 0 18 27 0 235.50 36 79.28 CHANGED MKlISLGLISSIIFSIVLAKNSSsSsSTTGCFsFsRKKsKKlHpsssts....VKup-..-aDPDLPNLKFIDEF-PIsLEssKuRhScLD-sFlSETDGhIIDKVTGF.RRENDSslSGWYlRPYEEDYEcMIplNFIPLp...-YYQ+ppNssHKQusuPsPlsh......hPcKQEhslppp.o........................TlpE-DussLaEDct..................................ElDsEssshh......s-GEs...............cspcEh-pcsls.L........................p-tEcspp...................................hcpplpc.Ycs................hpt-tt..ttpppc.-................c.p..c.--ptsEcs-sccpspc .......................................tpt.tSt..hspFshhpKKsK.c.hth........hc..c...aDPcLPNlKFI-EFpPlhlEssKtp..cLs-sFlSETDGhIlDKVTGF.RREN-SshpGWYIRPYEEsYEcMIphpFhPLp....p.app.psps.KQ.ssssPlsp.......PcKpcls..pp.l.........................plpEcDt.hlpE-ct.......s.........................plstt..p.h......s-t-t...................ps.sEh-tt.ht.h.........................pt-p.p.p...................................h.ptch.p.Ypt..................ht...pt...tttt...........................................ph............................................... 0 0 3 18 +7247 PF07419 PilM PilM Vella Briffa B anon Pfam-B_20906 (release 10.0) Family This family contains the bacterial protein PilM (approximately 150 residues long). PilM is an inner membrane protein that has been predicted to function as a component of the pilin transport apparatus and thin-pilus basal body [1]. 25.00 25.00 25.40 32.50 22.60 22.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.45 0.71 -4.67 13 197 2009-01-15 18:05:59 2003-09-18 14:10:46 7 1 167 4 19 125 0 135.90 31 92.57 CHANGED hVhlllsu......th.tppsspsppptpsupsshlAtphLhatsulNDatYppsspc...GslssspLGLPs..h.-stlpplIstsRlaVWhs-pP..GLsuALpcpSpsSsLlGplpsGpLlsssGsshuloLPuuIs-GslValN ..............................hhhhhlss.......h.ppp.spstpttphsssshh.AsphlhhtsslNDatYspstpc.....Gs.l..ssspL..u..LPs....sscst.lcphlppGRlaVW..h.sppP.....GLsss...L....ppp.ocs.S..uLlGhh..p.sGpLshh.u...G.ss.....s...ulslPA..uIstGulVhhN.......................... 0 7 11 13 +7248 PF07420 DUF1509 Protein of unknown function (DUF1509) Moxon SJ anon Pfam-B_21379 (release 10.0) Family This family consists of several uncharacterised viral proteins from the Marek's disease-like viruses. Members of this family are typically around 400 residues in length. The function of this family is unknown. 20.30 20.30 20.40 22.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.63 0.70 -5.18 3 13 2009-01-15 18:05:59 2003-09-18 14:17:13 6 1 8 0 1 12 1 286.70 43 71.00 CHANGED MFsGtATIELhERLATSWLTAIRLILoWHPlaA.sRppEP...LE+lCR-GREYIlMLSGTIpssHATWPFWQlMpKCLDWCCSFHsPDD+SCEHGSPRIGIRLEGENphFAPuLGLYSuVMTWTP..IPCasEhPlh.......PRPupSS-s-ssPSTSptps..VARVRPpVlQ++VsKTRPlDsElHRPtPlAhsN......PS.asDEPDhthp.cPQPGPSGQN.........RuPRTPT..L-sVRVtDpPVopsRu.coPSPP.csDpsDsD.slutPsRsh.RTPsspPSSP-oElsEEl.AQPDPWGTpt.hhsNRuuTPDDsSuIo-DSAcGSctshRRPsHSsoGERRloRRNRSE..uRSRSRSRSG-....RRaRRsRlRuhPGRRSsS...Rp.TVLVsSSE--- .........MFsGsuTltLIERLATSWLTAlRhIluWhPhaA..pppEP...L-pLsREuRpYIhhlSGoVpssaATWPFWpVMR+CLDWCCuFHhPcDpSCpaGAPRlGIp.cGpN.hFsP.LsLYSslMsWoP..hsCahp...........sp..psstsps.PSsup.....luRV+Phshpphs.KppP.ch-.Hpshshuh........Po..hc-s-......spsuPStps.........ptspoPT..lpsVhsh-tssshphs.tsPsPP.p.s..s.-.s.u.......hoP.....Ss.s.....h.AQP-sWsh......spptoPsDposh.-Dpschp..th.pP.+Stss-.RhsRcsppc..sRSRSRSRSt-....RRht..chR..PGcppsu...cp.sVls.Ssptp........................................................... 0 1 1 1 +7249 PF07421 Pro-NT_NN Neurotensin/neuromedin N precursor Vella Briffa B anon Pfam-B_20947 (release 10.0) Family This family contains the precursor of bacterial neurotensin/neuromedin N (approximately 170 residues long). This the common precursor of two biologically active related peptides, neurotensin and neuromedin N. It undergoes tissue-specific processing leading to the formation in some tissues and cancer cell lines of large peptides ending with the neurotensin or neuromedin N sequence [1]. 25.00 25.00 59.90 59.80 19.30 18.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.15 0.71 -4.76 3 47 2009-01-15 18:05:59 2003-09-18 14:29:15 6 2 33 1 22 43 0 151.40 68 98.12 CHANGED MtGMNLQLVCLTLLAFSSWSLCSDSEEDVRALEADLLTNMHTSKISKASPPSWKMTLLNVCSLINNLNSsAEEAGEMHDDDLVuKRKLPLVLDGFSLEAMLTIFQLQKICRSRAFQHWEIIQEDILDsGNDKNEKEEVIKRKIPYILKRQLYENKPRRPYILKRGSYYY ...................tM+lQLVChhLLAFoShSLCSDSEEEMKALEADLLTNMaTSKI..oK...AplP.WKMTLLNVCSLlNNLNo.AEET..GEhcEE.-LlsRRphPtsLDGFSLEAMLTIYQLpKICHS...RA..FQH.WELlQ..EDlLDsGNcK.sEKEEVIKRKhPYILKRQL.apNKsRRPYILKRsSYYY............ 0 1 2 5 +7250 PF07422 s48_45 Sexual stage antigen s48/45 domain Vella Briffa B, Bateman A anon Pfam-B_21037 (release 10.0) and Pfam-B_4621 (release 14.0) Domain This family contains sexual stage s48/45 antigens from Plasmodium (approximately 450 residues long). These are surface proteins expressed by Plasmodium male and female gametes that have been shown to play a conserved and important role in fertilisation [1]. This domain contains 6 conserved cysteines suggesting 3 disulphide bridges. 20.80 20.80 20.90 21.20 20.50 20.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.37 0.71 -3.99 83 1042 2009-01-15 18:05:59 2003-09-18 14:56:31 8 8 16 1 195 1050 1 122.40 24 32.58 CHANGED plcG....CDF..........................sssp.....................tpphhspshphs...........psphCplps...pss-l.lGhtCPpp......................................................hplpPssCFcpV....h.........................sppphplpsll.........ssphh.........................tppppphpahtlP.p.hpp........phphtCpCpptp ........................................................................lpG.CDF..............................................ospt...................pphhppshs.s............ttschCslph................tss-l.lGhhCPss.......................................................plpPpsCFpplY............................tsppthclppl.lt.................ts.hh......................................................hsppptphsahplP.pphpc........phpFpCpCpp..t........................ 0 44 75 162 +7251 PF07423 DUF1510 Protein of unknown function (DUF1510) Moxon SJ anon Pfam-B_21355 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. 42.00 42.00 42.50 43.60 40.70 41.50 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.55 0.70 -4.97 16 275 2009-09-11 00:04:00 2003-09-18 15:34:46 6 2 186 0 28 185 0 178.30 36 98.23 CHANGED SRapp+pK+R+tNhlLNIhIulVllLIllVuspLhh.sssppps.tp..........................c.ptptsscpcpsstEcscsEsK-susss-pcc..............pcccc-spcpcsppp.p......psppstpcsspsssssVccshssssWcPlGTpQoupHsssa.cpuStDWpEMhcAlSYATGlsp-phhlhalGNN..Gs..scAhGslpcKsssp+.YcVpIpWVDscGWKPsKVppL ......................SRhpp+pp+++pNhlLNlhIslV..lhlhllsaplhh.sp.pppts.tp.................................ppsppppstp.pcptK..pcsK-tsp.p-pcp...................tppp..pppp.c.pt.cp.p.pctcc....psptspp.s.tcpstspspcshTpssWKPlGT-....Q..sup.shsa.cpuosDWpEMppAlShAh-lP.pphhhhhlGps..Gp..scAhGslpsKpssc+.YhV.IsWV-scGWKPshVppL...... 0 9 19 21 +7252 PF07424 TrbM TrbM Vella Briffa B anon Pfam-B_21098 (release 10.0) Family This family contains the bacterial protein TrbM (approximately 180 residues long). In Comamonas testosteroni T-2, TrbM is derived from the IncP1beta plasmid pTSA, which encodes the widespread genes for p-toluenesulfonate (TSA) degradation [1]. 20.90 20.90 20.90 20.90 20.80 20.50 hmmbuild --amino -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.21 0.71 -4.27 16 280 2009-01-15 18:05:59 2003-09-18 15:52:46 6 1 214 0 27 201 11 115.20 33 74.28 CHANGED lLTGDTRLACEAlLCLuS..us+PuECuPSLsRYFSIct+KhpDTlptR+sFLNLCPVssp......sPEMtsLpss.lsphsGc.CsssuLNsplcphp.....................thcchthpIssplss.CphhsupsYTDap.....s+Yssssc...cts.Wscuh-hccs.tEh.ppl......pc ..................sCEulLC....L....uu....us...ts...oECpsu.pcaF.s.I.p......t..K.....K......h.s......cThcAR+sFLN.CPssst................s..........................................................................................................................................sh............................. 0 3 16 22 +7253 PF07425 Pardaxin Pardaxin Moxon SJ anon Pfam-B_21422 (release 10.0) Family This family consists of several Pardaxin proteins. Pardaxin, a 33-amino-acid pore-forming polypeptide toxin isolated from the Red Sea Moses sole Pardachirus marmoratus, has a helix-hinge-helix structure. This is a common structural motif found both in antibacterial peptides that can act selectively on bacterial membranes (e.g., cecropin), and in cytotoxic peptides that can lyse both mammalian and bacterial cells (e.g., melittin). Pardaxin possesses a high antibacterial activity with a significantly reduced haemolytic activity towards human red blood cells compared with melittin [1]. Pardaxin has also been found to have a shark repellent action [3]. 25.00 25.00 86.60 86.50 19.20 17.00 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.39 0.72 -4.55 3 5 2009-01-15 18:05:59 2003-09-18 16:03:39 6 1 2 2 0 5 0 33.00 95 100.00 CHANGED GFFALIPKIISSPLFKTLLSAVGSALSSSG-QE GFFALIPKIISSPLFKTLLSAVGSALSSSG-QE 0 0 0 0 +7254 PF07426 Dynactin_p22 Dynactin subunit p22 Vella Briffa B anon Pfam-B_21336 (release 10.0) Family This family contains p22, the smallest subunit of dynactin, a complex that binds to cytoplasmic dynein and is a required activator for cytoplasmic dynein-mediated vesicular transport. Dynactin localises to the cleavage furrow and to the midbodies of dividing cells, suggesting that it may function in cytokinesis [1]. Family members are approximately 170 residues long. 21.50 21.50 22.00 21.70 20.90 21.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.05 0.71 -4.98 4 123 2009-12-07 10:30:11 2003-09-18 16:29:35 6 4 86 0 67 115 0 154.70 37 86.32 CHANGED Msc...sLcpLchRLQ.LEpRlYG..tutsucPsKsA-uLs+lQsALuNhAsKRERVKILaKKIEDLlKYLDPQahD+IAlPDuMKLEFILAEEphlsu...........QAALLEQlpshpPlLDSsaIpAVPEpAoKLQRLSQIHIpQQDQsEphosEVK+LhE-YNKMhhLLSKQFsQWDEsL .......................................................tlp.LptRlptLEphl.......h.G......t..s.ts....t....t...s..p...p...l.sD.uLs.......clpstL...ush..usKRE+l+...h...LaK+..l--L.hK.YLDPp...a.lDcl.slPcu..KLpaILApEp.lhs...........psthLEplppLpPhL-Stt.l.+slPpps.s+LppLuQlalpQp-ps.tlspcsptLhppYNphh..lScpFh.ast................................... 0 22 26 44 +7256 PF07428 Tri3 15-O-acetyltransferase Tri3 Vella Briffa B anon Pfam-B_21449 (release 10.0) Family This family represents a conserved region approximately 400 residues long within 15-O-acetyltransferase (Tri3), which seems to be restricted to ascomycete fungi. In Fusarium sporotrichioides, this is required for acetylation of the C-15 hydroxyl group of trichothecenes in the biosynthesis of T-2 toxin [1]. 20.10 20.10 20.60 23.10 19.70 18.20 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.45 0.70 -5.76 3 46 2009-01-15 18:05:59 2003-09-18 16:55:56 6 2 24 2 6 46 0 385.60 77 81.01 CHANGED ALPPLVPALYRWESTGu..RcVQRRCVGAEAIVGLEEKNRRSLYDLFIATSLRNIAPASTLLTLRNLKDMFELALVEGRFEHPECACTVSWDDQVAAIISYESPESDESARDWARGCVHVQPTAKSAIDLWTEMEEGRAAAKDNTPSKPIELFLLSDVPTDSTPIPQGATVDILFHSNHLFWDGIGCRKFIGDLFRLVGNHIGLSDStEThKMQWGQEIKNLSPPVVDSLKLDISTLGTEFDDKCTEYTSALVANYKSRGMKFRPGLALPRCuIYKLSADDSIAIIKAVKTRLGPGYTISQLTQAAIILALLDHLKPTDLSDDEFFISPTSVDGRKWLREDIASNYYAMCQTAAVVRVENLKSIAVSHKDEKEIQVRALEKACRDIKKSYDQWLGNPFLEALGLRVHNFEASYLN ........................................................................................ALPPLVPALYRWESTGs....RpVQRRCVGAEAIVGLEEKNRRSLYDLFIATSLRNV.APASToLTLRNLKDMFELALVEGRFEHPEsACTV....SWDD.QVAAIISYESPESDESARDWARGCVHVQPTAKSAIDLWTEhEEGRAA.AKDN.sPSKPIELFLLSDVPTDSTPIPQGATV-ILFHSNHLFWDGIGCRKFIGDLFRLVGNHIGLS..DSA-T.KhQWGQEIKNLSPPVVDSLKLDVSTLGTEFDDKCTEYTSAL...VANYK...SRGMKFpPGLu.LPRC.uIYKLSA-.DSIAIlKAVKTRL.GP.GYTISQLTQAAIILALLDHL.K.PTD..LSDDEFFlSPTSVDGRKWLR..E.DIASNYYAMCQTAAVVRVENLKSIAVS.HcDEKElQV+ALEKA.CRDIKKsYDQWLGNPFLEALGLRVHNFEAuYLp..................... 0 1 2 6 +7257 PF07429 Glyco_transf_56 Fuc4NAc_transf; 4-alpha-L-fucosyltransferase glycosyl transferase group 56 Vella Briffa B anon Pfam-B_21451 (release 10.0) Family This family contains the bacterial enzyme 4-alpha-L-fucosyltransferase (Fuc4NAc transferase) (EC 2.4.1.-) (approximately 360 residues long). This catalyses the synthesis of Fuc4NAc-ManNAcA-GlcNAc-PP-Und (lipid III) as part of the biosynthetic pathway of enterobacterial common antigen (ECA), a polysaccharide comprised of the trisaccharide repeat unit Fuc4NAc-ManNAcA-GlcNAc [1]. 25.00 25.00 28.40 25.20 23.30 24.60 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.18 0.70 -5.50 11 636 2012-10-03 16:42:30 2003-09-18 17:16:37 6 2 599 0 55 333 10 331.10 69 97.20 CHANGED MTsLIHVLGSDIPHHNpTVLRFFNDpLss.hsspQtR+FMlVup-suL.ssaPALclppFsuKKuLAcAVIApA+usRspRFFhHGQFNssLWLALLoGtI+PsQhhWHIWGADLYEsSpuLKF+LFY.LRRlAQtRVG+VFATRGDLsaatp+HPpVssp..LLYFPTRMcsuLsthssscscsu.phTILVGNSGDpSNcHlsAL+AIHQQF.GssV+lIlPMGYPuNNpuYIppV+psuhtLFsscNLQILoEKLsFD-YLsLLRpCDLGYFlFsRQQGIGTLCLLIQhGlPhVLsRcNPFWQDhsEQplPVLFssDsLstshlREAQRQLtulDKspIAFFsPNYlpGWppALtlAuGEss ...............................................................................MTVLIHVLGSDIPHHN+TVLRFFNDsLAA..TSEHAREFMVsGcDs.Gho-....SCPA....LS.l.pFasu..KKuL...AcAVIA....KAKANRpQRFFFHGQF.NsoLWLAL.LSGGIKPuQFaWHIWGADLY...E....l...S...p.....G......L+.a+LF..Y...P........L...RRlAQtRV....Gs.V...F.A.T.R.GDLSaFA+pH..P.+VRG.E........L.L..Y.....FPTR.M..DP......S.L..NoM.As-.R...QRtG.KhTILVGNSGDRSNcHIAAL+..AVH.Q.QF..G..D.T.V+.VVVPMGYPsN.N-AYI-E.VRQAGLpLFSpENLQlLSEKLEFDAYLsLLRQCDLGYFIFARQQGIGTLCLLIQAGIPCVLNR-NP.FWQDMsEQ.HLPVL..FTTDDLNEslVREAQRQLASVDKssIA..FFSPNYLQGWppALsIAAGEss................................................................ 0 5 20 38 +7258 PF07430 PP1 Phloem filament protein PP1 Vella Briffa B anon Pfam-B_20843 (release 10.0) Family This family represents a conserved region approximately 200 residues long, four copies of which are found within the plant phloem filament protein PP1. This is one of the constituents of the proteinaceous filaments found in the sieve elements of Cucurbita phloem [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.40 0.71 -4.72 3 26 2012-10-01 19:28:07 2003-09-18 17:18:52 6 3 6 0 14 73 0 125.60 24 88.55 CHANGED CGQlsssPKWIQIPDVKEhGlQVVIKFIVEQlKIpaGDSLKF-SIYEGWYFELCPNSLKYRLHIKAIDFLGRSLpYEIIIIEEK.hL.RIaKL-SIIVILSPGHhVGPVDPPQVEKWIKIPNLQVPFVQEVSKFAlDEaN.KuGDGLKYlEIYDGWYMEMGQDNIKFRLHLKAKDCLGRlRNYEAlVLVKQFLSKRIKILESF ..............................thh......................................................................................................................sttppWhpI..t..slp..shlQ-LucaAVpEaN..K.......s.s.s..sL+FpcVhpGh.h..clspss.hc.a+.LplhAtct......up.tpYcAhVh............................................. 1 0 8 14 +7259 PF07431 DUF1512 Protein of unknown function (DUF1512) Moxon SJ anon Pfam-B_21354 (release 10.0) Family This family consists of several archaeal proteins of around 370 residues in length. The function of this family is unknown. 25.00 25.00 62.80 62.80 24.00 23.20 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.06 0.70 -5.71 5 62 2009-01-15 18:05:59 2003-09-19 10:25:02 7 1 60 0 35 60 28 352.10 40 96.29 CHANGED ssslaaIlohlLFalLIhLL......pclphhhlhRsIEGpLuhLEchtN-Apppllchhcsccs+-s.....EohlcRhu-FFVIsPVslDPsGIIcRh++LLcpu-D+a+chlcRhsPptDsVpRuslpsAlEllsulshIaKaVRHhLloA+KTsNhhLLlQLQMlLPhlh+lu-oYp-AsNuFlKGVPIGDSAGPLVAupLlucusp...+Ec.p+-TVsuEsElEGR+LhIVKA+GPGSoVGRhGcAVEcllc+h.u...+sccIITIDAALKLEGE+TGuVAEGlGVAMGDsGsEKapIEplAlKYGIsLcAVlIKMShEEAlosMsc-IlsAVccAlERVK-IIpEsscPGDoVlVVGVGNTVGVuQ ........h.....llthllah.lhlhlh........tp+lphh.hhtpslcstLshlcphl....s-ups....chhphLccts.....h..p-s.......cshlcR.hh-aFlI-PVsl-PssIls+h+pllcot--ph+chlp.thsPph..Dshp..hsplpsulEllsuLNhIYKVVRHYhlhA+KhsshhLlhQLQhllPhlhchu-AhpcAhssFhcGhPlGDuhGPLVAtplhhsssp....hps.s+DTVhuEs-a-GR+lhllKAcGPuuTVGRPG-AVcpll-chts....+lshIITlDAALKLEGEcTGslAEGhGVAhGssGsEKhsIEchAs+YsIPlpAlllKMShcEAITtMpKEIhpAspcshchVpclIh-pocPGssVlVlGVGNTsGVuQ... 0 13 19 26 +7260 PF07432 Hc1 Histone H1-like protein Hc1 Moxon SJ anon Pfam-B_21362 (release 10.0) Family This family consists of several bacterial histone H1-like Hc1 proteins. In Chlamydia, Hc1 is expressed in the late stages of the life cycle, concomitant with the reorganisation of chlamydial reticulate bodies into elementary bodies. This suggests that Hc1 protein plays a role in the condensation of chromatin during intracellular differentiation [1]. 20.50 20.50 20.60 20.50 19.80 20.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.91 0.71 -3.94 4 147 2009-12-07 10:22:16 2003-09-19 10:31:06 8 1 117 0 26 101 24 74.90 46 96.62 CHANGED ALKDTAKKM+DLL-SIQpDLtKAE+GNKAAAQRVRT-SIKLEKlAKlYRKESIKAEKpGLhp+K..PApKA.tAAKs.tpAuKsssKKAsAtssppuKAs.KuKPtoKKTusK.KsKpsShpRuhtK .........................cchhpKlp-LhtshppD.h........t........p......h......E+GNKAAGTRARKsSL-LEKlhKpFRKtS..lcAuK.................................................................................................. 0 14 23 25 +7261 PF07433 DUF1513 Protein of unknown function (DUF1513) Moxon SJ anon Pfam-B_21436 (release 10.0) Family This family consists of several bacterial proteins of around 360 residues in length. The function of this family is unknown. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.82 0.70 -5.53 37 313 2012-10-05 17:30:42 2003-09-19 10:33:19 6 4 305 0 85 300 24 293.50 40 81.71 CHANGED hslPsRuHulsscP............tpscuVshuRRPGpashlhDs.psGp.ht..tltusssRHFaGHGsFSsDGphLYsTENDh.ps.....ucGlIGVaDsp..p..sap+luEasotGIGPHElhhhsDG.....poLlVANGGI.cTcs-tGRtcLNL-oMcPSLsh.lsptsGpLlpphtLs.ph+phSlRHLAlsss..........GpVhhutQapGp.p-tsPLluhactGps.hphhthsppp...htshssYsGSlAsssstthlAloSP+Gsphtla.....ctsoGphltttslsDssGlushssG.................FlsoSGt.....Gp....hhphs.ttht.h...t.......sshtWDNHlhtl ...............h.slPsRuHuluh+P..................shs.pA.VsFARRPGpahhlhDh..psup........lt..hh.su.sssRHFYGHGVaStDGc....hLYATEs........-h.cs........uRGlIGVYDss..p...pap+luEassaGIG.PH-lh..h..hs..DG.......sLVVuNGGl.+Tc.t.GRt.L.....NL-oMpPSLsh....l.stsGplL-phsLstt.+pLSlRHLAhsuD......................GoVhsGpQap...Gpsc..-hs....s.LlAh+ptGts.hp....htst.-p...htthspYluSlAss.....s-..hlssTSPcGsphhlW.....stsoGpllt.ssLsDsuGVsshtsu.................FhloSGp.....Gc..............lhhtss.thp.......ph.................sslhWDNHhst.l.............................. 0 17 39 64 +7262 PF07434 CblD CblD like pilus biogenesis initiator Moxon SJ anon Pfam-B_21452 (release 10.0) Family This family consists of several minor pilin proteins including CblD from Burkholderia cepacia which is known to CblD be the initiator of pilus biogenesis [1]. The family also contains a variety of Enterobacterial minor pilin proteins. 25.00 25.00 41.20 41.10 18.40 18.10 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.30 0.70 -5.75 5 262 2009-01-15 18:05:59 2003-09-19 10:39:02 6 1 240 3 7 118 1 303.90 54 91.10 CHANGED IlFIh..phllsStlhuhulpAssssGslscopolu.tDRusuoPl..a.IFssaluGYssSHsL.aDRhoFLCpSSoNsssGACPTscsstspthsGpTNI+LpFTEK+SLA++sLsLcGsK+alacsssCsp.............cMsLNSssspC.spstssGssLsLYIPAGELsKLPoGGlWcATLcLclKRau-sos..aGTYTlNITVDLTDK..GNIQVWLPpF+S.sPRVDLNLRPh.Guu+YSGoNsLDMCLYDGYSTpSsSl-l+FQDD...NpousGcYsLl....KoGus-KcLsYAlSLLhGGKplsPsNGpuFoINssS.L.lNWNRl+uVoLPpISlPVLCWPusLTLso...clssP-AGcYSGpLsITFTPSSpoL ....................................h...t.p.sho............................................................................................................................s.sG.shS..slplhhTEp+SGh+hsltLpGa.+suh..ssst.....h.ss...............h..h....s.ust..Sh........................t...tphlpLal.toEh+pLPIGGlWKu+lKL+.........uss...s................htsYhAsITLNshD...spIslaFPpF.upATPRVpLsLHPh....sNuSp....hu....t.c.s.LDMCLYDGasuNAhShplhlcD-.....ss+..psG.FSlahp.tspos....spscRIDYpVpMh.....sG.t..pIs...VpNspshshss.....lN.hp+l.RPVVLPGIRhAVhCVPsPLTLssptFsVh-KpAGhYhGpLoVhFTPSh.......................... 0 0 3 5 +7263 PF07435 YycH YycH protein Vella Briffa B, Szurmant H, Mistry J anon Pfam-B_21457 (release 10.0) Family This family contains the bacterial protein YycH which is approximately 450 residues long. YycH plays a role in signal transduction and is found immediately downstream of the essential histidine kinase YycG. YycG forms a two component system together with its cognate response regulator YycF. PhoA fusion studies have shown that YycH is transported across the cytoplasmic protein. It is postulated that YycH functions as an antagonist to YycG [2]. The molecule is made up of three domains, and has a novel three-dimensional structure. The N-terminal domain features a calcium binding site and the central domain contains two conserved loop regions [3]. 20.60 20.60 20.80 21.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.50 0.70 -5.77 6 596 2012-10-01 23:24:42 2003-09-19 11:16:32 6 2 586 1 74 392 0 421.40 30 97.23 CHANGED EshKSllLslLVlhSlVLTahlWsFQPDaus.....lsspcsc+s......ppshTtphspslpPhpllh.u+--psphshsssslhcchhssLpphclpslcclpccashs..h...usstI.......pl.FspslPhslF.splhpl-scs..hsphsFsRllIsh-tscsslt.lahlsc-+cpllchssospsh-plhcshcshpuchp.hsplIpsp.....ppchalPscspcLpshchlhsolsspphsphLFsDoohV...R..uSposssoYssuouVhshsscschhcY....pp.hs.-psoShpttclIpsSacFINuHGGaTs-..achash.scosploYphFlsGaPVaNppup.....spIpVTaGscslh-Y+RPhhclD.ss.hccscKpLPouEpVhsuLupps-lch-clssIslGYcMscsossst.....hVpLEPpWahKYcGcW......ahhp-uAtpulE .................................EthKollLslLVlhSllLTah.lW.....s..hs.Pch.ss..........l.s.s.sc.s.pps.................sps.h..s...tch.ss.s...l.pPhpllh.....+p-p..sp......h.t.........h..s..s...s..s.......l....s....cl.h..ps....L...c.s.t...c...l...c...s.l.p.c....lp...p...cp.shh........hs...sshl............lsFstslPls....s..a.tpl..h.s....hs....scs......ss.p.h.p....FNR.l.l..lDhst.s.cp.....l.Yhl..sc.-.p...c...p.h..hchp.h....o.s...p....s.....cc........lhctlsp...hp...p..c..h..p.....sa...p...chhhNp..................ppclah..P..s.csp..cLp..sh..phlh..ss..lsl.....-.p....h..ps.hLF..s...D.....s..s...h..V...c..ps..c.u.ss.s..s..Y..s....s..s.s.t.....s.h.p.h.s.s.c.sc....h.hcY....ps....s.sptss......sch.tphl.toF-FlNs.HG.Ghhs........p............aR...h..a..sh....t..........t.p....s......u...p......ls....a....ph.F.l....s.....G...aP..lF..N.p.c..uh.........spIpl.....s.h....G.pcs....V.hcYpR.....sLhphs..ss..h..s..s.p..p.p..hoLPsscs...VhssLtp.sss..ls.h..c.c.lpsIsIGYchppss.....pp..p.........sscL.PpWYVcY.c..sp..W...................thp....................................................................................................................... 0 23 47 61 +7264 PF07436 Curto_V3 Curtovirus V3 protein Moxon SJ anon Pfam-B_21777 (release 10.0) Family This family consists of several Curtovirus V3 proteins of around 90 residues in length. The function of this family is unknown. 25.00 25.00 82.50 82.40 22.50 22.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.04 0.72 -3.85 3 24 2009-01-15 18:05:59 2003-09-19 11:25:43 6 1 17 0 0 28 0 79.30 84 97.79 CHANGED MVCLPDWLFLLFIFSILLQAGTNFYGTFQSGSISRKLSsLuSRFDELFlKlQQVVYTRYPSR-RTVDsRRRRGLSAIPEGuEEsoEA MVCLPDWLFLLFIFSILLQSGTNFYGTFQSGSISRQLSSLuSphDpLFLKlQQVVYoR..sSR-RusDsRRRRGLSuIPEGsEEssE.. 0 0 0 0 +7265 PF07437 YfaZ YfaZ_precursor; YfaZ precursor Vella Briffa B anon Pfam-B_21552 (release 10.0) Family This family contains the precursor of the bacterial protein YfaZ (approximately 180 residues long). Many members of this family are hypothetical proteins. 21.40 21.40 21.50 21.40 20.80 21.30 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.99 0.71 -4.85 3 659 2012-10-03 17:14:37 2003-09-19 11:26:33 6 1 610 0 85 306 5 178.50 63 95.94 CHANGED MsKatVAulAuLLLVAuSVNAsSFNupsG+-YTNlGlGLGTNTsGLAlSGNWs+SDDDGDVAGlGLGhNLPlGPhpATVGAKGlYhsP--GssGuAVAVGGGLuasIGsSFSLaGEuYYAP-sLSSG.lcSYpEANuGVRYNIhRPlol-VGYRYINhEGKDGsRDNslADGsYIGushsF ...............................................................................MKKhhL.A.G..h.AGMLh...V...SASs.sA.h.SISGQAGc-YTNIGV...G...F.G.TES....TG...LAL.S....G.NWsHN.....D..D.......DG.D............sA.G...V..GL.G...L..N.....l....P....l..G..P....L....hATVG..G..K.GlYTNP....p...t...G..D.E...G.YA.A...AV.GG..GL....QW...c..I..G..s..S....F...RLF..GEYY...YSPDS.LS........SG.Ic..SYEE.....AN...A.GARaT..IMRPlSIEAGYRYLNLu.G.K.D.GNR..D..N.Al.ADGPYVGVNASF....................... 0 13 32 58 +7266 PF07438 DUF1514 Protein of unknown function (DUF1514) Moxon SJ, Eberhardt R anon Pfam-B_21857 (release 10.0) Family This family consists of several Staphylococcus aureus and related bacteriophage proteins of around 65 residues in length. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 29.20 29.20 24.70 23.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.16 0.72 -4.06 3 268 2009-01-15 18:05:59 2003-09-19 11:30:45 6 1 176 0 3 46 0 64.60 67 98.32 CHANGED MWIsISIVLAIhLLIsLSSNSc+t+EI-AL+YhNsYLFcclVKspGhpGLE-YchEl-RI+s-lKc MWIshoIVhAIlLLlslSlNS-+t+EIpAL+YMNDYLhDclVKspGasGLE-YchElcRhss-lKc......... 0 1 1 3 +7267 PF07439 DUF1515 Protein of unknown function (DUF1515) Moxon SJ anon Pfam-B_21875 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 130 residues in length. Members of this family seem to be found exclusively in Rhizobium species. The function of this family is unknown. 22.20 22.20 22.20 22.70 22.10 22.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.57 0.72 -4.25 3 22 2009-01-15 18:05:59 2003-09-19 11:41:41 6 4 15 0 12 21 0 107.50 46 88.61 CHANGED MIDAGVHQQLGTLlAEVKNLREDlR...........RSEDRSDAuRASMaRRMDELVERVsTLEGSsstlpuDITuMKPVT-DV+KWKLMGlGALGVIGIGGAALu..VT..FADVsKRsL.hlLRGG ..........Mh.usltppltsL.t-lcplR-Dh+...........cSED+SssuRssMpRRhDELVERltsL..E.....uuhthlcsDlupMKPVT-DV+..+WKLMGhGALG..VIGlGuAALG..VT..FADlh+Rhh..lh................ 0 1 6 7 +7268 PF07440 Caerin_1 Caerin 1 protein Moxon SJ anon Pfam-B_22039 (release 10.0) Family This family consists of several caerin 1 proteins from Litoria species. The caerin 1 peptides are among the most powerful of the broad-spectrum antibiotic amphibian peptides [1]. 25.00 25.00 28.50 27.50 19.90 19.10 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -6.88 0.72 -4.02 2 28 2009-01-15 18:05:59 2003-09-19 11:44:48 7 2 9 0 0 22 0 23.90 85 64.33 CHANGED GLhpVLGSVAKHlLPHVsPVIAE+ .GLhSVLG.SVAKHVLPHVVPVIAE+ 0 0 0 0 +7269 PF07441 BofA SigmaK-factor processing regulatory protein BofA Vella Briffa B anon Pfam-B_21773 (release 10.0) Family This family contains the sigmaK-factor processing regulatory protein BofA (Bypass-of-forespore protein A) (approximately 80 residues long). During sporulation in Bacillus subtilis, transcription is controlled in the developing sporangium by a cascade of sporulation-specific transcription factors (sigma factors). Following engulfment, processing of sigmaK is inhibited by BofA. It has been suggested that this effect is exerted by alteration of the level of the SpoIVFA protein [1]. 22.30 22.30 22.40 22.30 22.10 22.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.40 0.72 -3.99 32 390 2009-01-15 18:05:59 2003-09-19 11:48:48 6 1 376 0 106 283 8 78.00 31 90.69 CHANGED llu....llllhllstshhpPl...+hlh+lslphllGslhLahlNhhGuhhGl....plsINhlTshluGlLGlPGllhLlllc.hl.l ......................h.lu.llhlhllhtl..h.tsl.......+hlhchlhpsllGsllLallN..lhG.s..hhsh.......aIsINhhTuhlsGlLGlPGVshLlllphh..h........ 0 42 82 93 +7270 PF07442 Ponericin Ponericin Vella Briffa B anon Pfam-B_21790 (release 10.0) Family This family contains a number of ponericin peptides (approximately 30 residues long) from the venom of the predatory ant Pachycondyla goeldii. These peptides exhibit antibacterial and insecticidal properties, and may adopt an amphipathic alpha-helical structure in polar environments such as cell membranes [1]. 25.00 25.00 27.40 58.60 18.00 16.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.59 0.72 -4.13 3 5 2009-01-15 18:05:59 2003-09-19 13:47:52 6 1 1 0 0 5 0 29.00 71 97.32 CHANGED GWKDWhKKAGEWLKKKGPGIhKAALKAAT GWKDWlKKAGEWLKKKGPGIlKAALpAAT 0 0 0 0 +7272 PF07444 Ycf66_N Ycf66 protein N-terminus Vella Briffa B anon Pfam-B_21840 (release 10.0) Family This family represents the N-terminus (approximately 80 residues) of Ycf66, a protein that seems to be restricted to eukaryotes that contain chloroplasts and to cyanobacteria. 20.70 20.70 21.10 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.81 0.72 -4.26 16 155 2009-01-15 18:05:59 2003-09-19 16:08:48 6 2 108 0 55 167 131 82.00 43 29.81 CHANGED lNhshGPuolLGlhLsluGllLYhlRsh+PEluRDaDlFFuolGLLsGGILlFQGWRLDPILLFuQhLlsGTslFFuhEoIRLR ...............................................................lphshs.uslLGIhLul.uuhsLY...h.l.....Rph+PcluRDtD.lFF.....uulGL..LsGhILhFQ.GWRL..DPIL.FGQhLlsusslaFuhEolRLR............................ 2 14 41 53 +7273 PF07445 priB_priC Primosomal replication protein priB and priC Vella Briffa B anon Pfam-B_22037 (release 10.0) Family This family contains the bacterial primosomal replication proteins priB and priC (approximately 180 residues long). In Escherichia coli, these function in the assembly of the primosome [1]. 20.50 20.50 20.60 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.21 0.71 -4.79 21 862 2009-01-15 18:05:59 2003-09-19 16:34:29 7 3 792 0 84 387 9 162.00 41 94.83 CHANGED LppLcsplspLtppsuslscpt.........phpshFDcpLFpsRuphLpsYlpEscpsLstLpptsppsph..tpssaLsE+LsuQIpAlpREluTpslRcp-sts...................pshsclYpcLupHQ-aERRLhsMlp-RchpLspsssh.cpQplp+ElhAhEuRLsRCRpALt+IEcpIsppE+ ..................................LppLctpLssLcpcsAslspht.................shpARF.D.....+pLFps+uspLpshL-EAtsNLsuL+psVcpppL............QVAaL...AE+LsAQ....lpAl.sREh...u.....sh.......s.......LRph-sss....................................plsc..h..p....c+RhQH.Q-aE.RRLtpMlt-....R....+tpLu...p....sos......h.sEQQsL..p+.E.lps.hEuRLsRCRcALcKIEppls+hpR...................... 1 7 24 57 +7275 PF07447 VP40 Matrix protein VP40 Vella Briffa B anon Pfam-B_22295 (release 10.0) Family This family contains viral VP40 matrix proteins that seem to be restricted to the Filoviridae. These play an important role in the assembly process of virus particles by interacting with cellular factors, cellular membranes, and the ribonuclearprotein particle complex. It has been shown that the N-terminal region of VP40 folds into a mixture of hexameric and octameric states - these may have distinct roles [1]. 25.00 25.00 307.40 307.00 17.30 16.40 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.84 0.70 -5.36 2 35 2009-01-15 18:05:59 2003-09-19 17:02:16 7 1 20 4 0 33 0 287.30 60 91.60 CHANGED M......susssYNshh..hs..Phtspshsp.h.uD.Lus.tu.sPs.s......sslshssph.usVspAFhLEAhlsl.uhschhhKtlPhWLPLGlhsph.YshspTsAAlhhuSYTlTpFs+.up.hVRVNRLGsGIPsHPLRhLR.GNQAFlQphVlP.s..sp.FTasLTsLhL.sQ.LPsssWpsppsthhsNsh+PulSlHPpL.PIlLPshhtpth+tpc.ssss.l.sI.shl.pL+l..ls.hpslhtIplPt.hhphhpGhh.p.tpspPll.h.hPc.hsL ................h.......susssYNshh..lsssPhus+GANphIsuDQLus.pG.TPs.s......ssLshDsphtGsVspAFhLEAhlsl.uhNchhlKtVPhWLPLGIhuphpYshspTsAALLhuSYTITpFs+sup.hVRVNRLGsGIPsHPLRhLR.GNQAFlQphVlP.s..sp.FTasLTsLhLssQhLPsssWpsSpsplhuNsh+PulSlHPpL.PIlLPshptpth+QHc.ssss.lhAIuslL+pL+l.plstppSlatIplPtshFphhpGhhppptpGSPll.h.sPc.hsL.. 0 0 0 0 +7276 PF07448 Spp-24 Secreted phosphoprotein 24 (Spp-24) Vella Briffa B anon Pfam-B_22318 (release 10.0) Family This family represents a conserved region approximately 140 residues long within secreted phosphoprotein 24 (Spp-24), which seems to be restricted to vertebrates. This is a non-collagenous protein found in bone that is related in sequence to the cystatin family of thiol protease inhibitors. This suggests that Spp-24 could function to modulate the thiol protease activities known to be involved in bone turnover. It is also possible that the intact form of Spp-24 found in bone could be a precursor to a biologically active peptide that coordinates an aspect of bone turnover [1]. 25.00 25.00 28.00 27.00 21.00 18.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.84 0.71 -4.22 3 49 2012-10-01 19:28:07 2003-09-19 17:20:33 6 1 38 0 21 46 0 115.00 49 61.56 CHANGED VLDEDoLVMNLEFoIQETTCRRESGtDPATCAFQRGYaVPTAVCRSTV+MSApQVQsVWARC+WSSSTSESsSSEEMIFGDMLGSH+pRNsYLLGLlsDES+GEQFYDRSlcIMRRsaPPGNRRYsNhp+RARVNoGFE .............................lspsslsMsL-FuIRETsCp+-SGcDPsTCuFpRGaaVPoAsCRSoVclSupQVQsVa.s+CpWuo.oSESpS.SEE..Mhasshhtspp.R.pp..hshh.p...t......................................................... 0 1 2 5 +7277 PF07449 HyaE Hydrogenase-1 expression protein HyaE Vella Briffa B anon Pfam-B_21851 (release 10.0) Family This family contains bacterial hydrogenase-1 expression proteins approximately 120 residues long. This includes the E. coli protein HyaE, and the homologous proteins HoxO of R. eutropha and HupG of R. leguminosarum. Deletion of the hoxO gene in R. eutropha led to complete loss of the uptake [NiFe] hydrogenase activity, suggesting that it has a critical role in hydrogenase assembly [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.39 0.72 -4.15 9 623 2012-10-03 14:45:55 2003-09-25 11:36:21 6 4 503 19 47 938 96 106.10 58 75.57 CHANGED LhpRLhsphGhshVD.ssslDcalss....tssuVLhluGDPsRhPEssDsAVlLPELhpsFssthhtullu..ttpE-tLusRFslh+hPoLlhhpcGpalGsLutlpDWsEY .............................................LWQRhLsR.GWpPVs.tspLDDW.lsp.....sP.D...GV..VLLSS..DP..+..RT..PE.VSDN.P.V...MIuELL...R..E....FP.s.a..o......W...Q.VA.lA...DLEQSEAIGDRFsVhRFPATLVFTs......GpaRGsL.sGIHPWAEL........................ 0 12 26 35 +7278 PF07450 HycH Formate hydrogenlyase maturation protein HycH Vella Briffa B anon Pfam-B_22086 (release 10.0) Family This family contains the bacterial formate hydrogenlyase maturation protein HycH, which is approximately 140 residues long. This may be required for the conversion of a precursor form of the large subunit of hydrogenlyase 3 into a mature form [1]. 25.00 25.00 28.80 28.80 19.60 19.30 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.85 0.71 -4.05 19 787 2009-09-12 21:51:00 2003-09-25 12:51:46 6 2 548 0 50 216 2 129.60 66 95.41 CHANGED scVlFapLs+KFVDpscs...sP-cApQVhYYSLAIGHHlGVlDCLcstLpCPhcpYppWlstLspG.EA+RKhpGlhpFGEIsIspsHsshLApA..........hstlststp.stppph..oppLlchLtsIppEPAIYLMV.R+p ........cVVF.pLs+KFlD.psDs...sPtcAQQVhYYSLAIGH.HlGVIDCLcsALsCPhsEY.sWlAsLEtG.-ARRKMtGV.KaGEIVIDhsHlshLApA..........FDcspsstT...SppQp..W..SphhlshLc-IpQEsAIYLMVRR.l.................... 0 6 16 32 +7279 PF07451 SpoVAD Stage V sporulation protein AD (SpoVAD) Vella Briffa B anon Pfam-B_22130 (release 10.0) Domain This family contains the bacterial stage V sporulation protein AD (SpoVAD), which is approximately 340 residues long. This is one of six proteins encoded by the spoVA operon, which is transcribed exclusively in the forespore at about the time of dipicolinic acid (DPA) synthesis in the mother cell. The functions of the proteins encoded by the spoVA operon are unknown, but it has been suggested they are involved in DPA transport during sporulation [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.06 0.70 -5.99 41 555 2012-10-02 12:25:54 2003-09-25 13:15:53 6 4 414 6 107 452 34 322.00 51 97.04 CHANGED G+QThhFps.PhIlusuolVGP+EGcGPLuchFDhlhpD.hhGc-SaEKAEpphhc-AsptAlpKuslppc..-lcallAGDLLNQlhuooFAARslsIPalGLYGACST.sEuLuLuuhll-uGaAchllsuTSSHauoAE+QFRaPhEYGsQ+PsTAQWTVTGAGAsllu....ppGs................s.PplTtsTlGKVlDhGlpDs.NMGuAMAPAAsDTIhpHFcDhspsPscYDLIlTGDLGplG+pIst-LL.cccGhclspp.apDCGlhIacp-.QsstuGGSGCGCSAlVhsGalhcphpcGchKRlLlVuTGALLSssShpQsESIPuIAHAVsIE ........................G+QThhF.ps.pPhIhuouolsGPcEucGPLuc.FDhla.cD.hhhGpco...aEpAEppLhp-AhppAlpKuslcps..-...Icahl....AG....DLlN......Qh.s......s...o......s...Fu......A.R...p.....l.......s...l...P..a....lGla...G..ACSTuhEuLAluuhhlsuGhAchllsusSSHpuoAE+QFRaPsEYGuQ+PsTApWTVTGAGAsllu......pp.s..s...................................................u..s+lTuATlG+VlDhGlsDPhNMGuAMAPAAsDTIppHhcDhphssspYDLIlTGDLGpVGppIsh-LLp.ccGhc....l.spt..apDCGlhIYc..ps.Q.s.VhAGGSGCuCSAsVshGalLpchpcGchp+lLlVATGALLSPhohQQtEoIPsIAHAVslE.................................................................................. 0 54 86 92 +7280 PF07452 CHRD CHRD domain Hyvonen M anon Hyvonen M Domain CHRD (after SWISS-PROT abbreviation for chordin) is a novel domain identified in chordin, an inhibitor of bone morphogenetic proteins. This family includes bacterial homologues. It is anticipated to have an immunoglobulin-like beta-barrel structure based on limited similarity to superoxide dismutases but, as yet, no clear functional prediction can be made. Its most conserved feature is a GE[I/L]RCG[V/I/L] motif towards its C-terminal end Most bacterial proteins in this family have only one CHRD domain, whereas it is found repeated in many eukaryotic proteins such as human chordin (Swiss:Q9H2X0) and Drosophila SOG (Swiss:Q24025). [1]. 24.10 24.10 24.10 24.20 23.90 23.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.62 0.71 -3.49 101 936 2009-01-15 18:05:59 2003-09-25 15:53:44 7 50 447 0 346 766 59 117.80 24 54.16 CHANGED pphsuhLsGsptss.sst...os...........usGpAhhslssss.........sLpa...........plp.hsGL.................ssssts+l.........s.tsppusllhsh................tsst....ssushp...s..lsstphpt.................LhsG....phalslpTpspss.GE..lRGplp .................................h..h.u.Lsu..tpths..ssp.......op...............usG...pA.hhp...hssst..............pLpa...........plh..hpsl.....................................stsssscl..........ch........s.hsppus..llhtl..........................tsst.hsGsh...p.......t...lsstphpt...................................LhpG.....phalslpTpspPs..GE...lRGpl....................................................... 0 104 190 262 +7281 PF07453 NUMOD1 NUMOD1 domain Pietrokovski S anon Pietrokovski S Domain This domain probably represents a DNA-binding helix-turn-helix based on its similarity to other families (Bateman A pers obs). 22.80 13.50 22.80 13.60 22.70 13.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.18 0.73 -7.71 0.73 -3.95 63 326 2012-10-04 14:01:12 2003-09-25 17:24:02 8 37 165 0 53 316 76 36.40 24 17.45 CHANGED tslhlashssph....l...tpFsSlpcAActLslspss.Isphl .................lh.hshssph....l....ppFpShpcAuctlslspss.Ipph............. 0 18 39 43 +7282 PF07454 SpoIIP Stage II sporulation protein P (SpoIIP) Vella Briffa B anon Pfam-B_21650 (release 10.0) Family This family contains the bacterial stage II sporulation protein P (SpoIIP) (approximately 350 residues long). It has been shown that a block in polar cytokinesis in Bacillus subtilis is mediated partly by transcription of spoIID, spoIIM and spoIIP. This inhibition of polar division is involved in the locking in of asymmetry after the formation of a polar septum during sporulation [1]. Engulfment in Bacillus subtilis is mediated by two complementary systems: the first includes the proteins SpoIID, SpoIIM and SpoIIP (DMP) which carry out the engulfment, and the second includes the SpoIIQ-SpoIIIAGH (Q-AH) zipper, that recruits other proteins to the septum in a second-phase of the engulfment. The course of events follows as the incorporation firstly of SpoIIB into the septum during division to serve directly or indirectly as a landmark for localising SpoIIM and then SpoIIP and SpoIID to the septum. SpoIIP and SpoIID interact together to form part of the DMP complex [3]. SpoIIP itself has been identified as an autolysin with peptidoglycan hydrolase activity [2]. 20.10 20.10 20.50 20.40 19.70 18.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.86 0.70 -5.23 49 584 2012-10-02 19:46:12 2003-09-26 10:34:07 6 5 411 0 125 491 7 270.30 29 71.69 CHANGED lphpD.....h.shhsp-lshhsthpsphhhstpts..sph..psssshp...pttpt.......................................sspcshVhIYHTHosESYh.P.....t..sssst.....p.slstVGctLsptL.cptGIsshpDcT..ha-h.....saspuYppSRpslpchLpppsshphllDlHRDu....................hp.ppth...sspIsGcshA+lhFVlGpp............sspacpNhphApplpshh-chYPG......Ls+Glhhts.t.....pYNQDLsspAlLlElGu.sNoh-EApposchlA-.lluc .........................................................................................................................................th.....h.......t....t.tt.....pt.....p.....p...tp.p...................t.....ptt..........p...........................................................sssc.tlhIYHTHotESYh...P.h..........t.sssst.........phNlshVGchLpcpL.cppGIs.shp..DcT.......hpch.......saspSYptS+pslp......c......hLtpstslphhlDlHRDu....................hp...+.phs....oppIsGK.shA+lhFVlGpp................NssaccNhphApplpphhscpYPG......loRGlhhKs..t.....thYNQDLospulLIElGussNTh-EhpposcsLAcshu.p......... 1 67 105 113 +7283 PF07455 Psu Phage polarity suppression protein (Psu) Vella Briffa B anon Pfam-B_21666 (release 10.0) Family This family contains a number of phage polarity suppression proteins (Psu) (approximately 190 residues long). The Psu protein of bacteriophage P4 causes suppression of transcriptional polarity in Escherichia coli by overcoming Rho termination factor activity [1]. 21.20 21.20 21.20 21.20 20.10 21.00 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.10 0.71 -4.82 5 269 2009-01-15 18:05:59 2003-09-26 11:05:58 6 1 159 \N 19 169 0 163.10 41 85.17 CHANGED MTT.VTLQQAF-uCQNNKsAWLsRKsELAAAEQEYcELLhuG-css..sRRLQpLR-lIDVKKWElNQAAGRYIRSHEpVQ+ISIRDRLsDFMQQHGAELAAALAPELMGYppQsshl+psAlQ+SVDYLREALoVWLA......AGEKINYSAQDsDILTsIGFRPDAASRDDNREKFTPAQNhIYoRRRApLAup ...........................................................................................................................................................spphp..Rphl-VhcWplNpAAG+YIpoHctl.cIsh+stLpDFMQpHGsALsuALA..PpLM..G.pp.suhh..hsp..ulpcusshLR-ALhpaLs......pGs....hNYuh.-pD.ILsthGhtPDssuhpDspppaTPAQ..phha....pp+ps.lst............................................... 1 0 3 6 +7284 PF07456 Hpre_diP_synt_I Heptaprenyl diphosphate synthase component I Vella Briffa B anon Pfam-B_22032 (release 10.0) Family This family contains component I of bacterial heptaprenyl diphosphate synthase (EC:2.5.1.30) (approximately 170 residues long). This is one of the two dissociable subunits that form the enzyme, both of which are required for the catalysis of the biosynthesis of the side chain of menaquinone-7 [1]. 30.00 30.00 30.30 30.30 29.60 29.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.62 0.71 -4.37 47 636 2012-10-03 02:46:00 2003-09-26 13:39:27 6 4 611 0 123 444 27 143.80 39 79.07 CHANGED LsuhAlsltllEshIPhPh.h..PGsKLGLANllslluLhh.hu.h+pulhlsllRllluulhtGsh.o.sFhhShAGullShlsMhll.pph.......tcthShlGlSlhGAlhHNlGQLhlAuh.l.....lpshslhhYLPlLhlsGlloGhhhGlhuph....l ...LsA.ulllullEs.hIPhP.hsh..P.GsKLGLANllolluLah...hs.h+-uhhlhhlRl.lLssllsGsl.o.sFhaShuGulLShhsMhll.pph.h.........cp.lSllGlSssGuhhHNlGQLllAuh.l......hpshsl.h.h.YLPlLhhhGlloGlhlGlsush........................... 0 68 110 116 +7285 PF07457 DUF1516 Protein of unknown function (DUF1516) Vella Briffa B anon Pfam-B_22136 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 120 residues long. 29.90 29.90 30.10 31.50 29.70 29.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.67 0.72 -3.97 22 454 2009-01-15 18:05:59 2003-09-26 14:02:50 6 1 435 0 48 197 0 111.10 44 89.66 CHANGED M...hH.....hHIhoWllhlILFhlAhhhasp...ts+tsKhlHMllRLhYllhlloGhhL...hlpt.......hsshthhhhlKhlhGlhlIuhhEhhls+ppK...t+sopshahhhllslllThhLG .......M.lHlHIhSWVLulILFlssY.h.hSp...ts.hhKslHMlLRLFhlLsllSGFhl...llpph.....ssuusHMLhsLKMLsGlhVlGlMEhsluK+K+....pc.tops.hahlhI.sllllThhLG......... 0 11 30 41 +7286 PF07458 SPAN-X Sperm protein associated with nucleus, mapped to X chromosome Vella Briffa B anon Pfam-B_22197 (release 10.0) Family This family contains human sperm proteins associated with the nucleus and mapped to the X chromosome (SPAN-X) (approximately 100 residues long). SPAN-X proteins are cancer-testis antigens (CTAs), and thus represent potential targets for cancer immunotherapy because they are widely distributed in tumours but not in normal tissues, except testes. They are highly insoluble, acidic, and polymorphic [1]. 21.90 21.90 23.60 22.40 20.40 20.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.32 0.72 -3.76 9 92 2009-01-15 18:05:59 2003-09-26 15:59:36 7 2 12 0 10 83 1 92.30 49 79.27 CHANGED M-p.pSSssG.KRpoPC-SNp.tN-pM.ETPstD.sPcPu.KKhKTSE.ST.llVlpYR+phKI...sSspL.NDpSpENsINPlQ.EE-Ehh-hpsEospp ................................M-p.pSSssG..KRp.sPC-SNp.tN.......-......p......M..ETPsts.sP.p.u.KKhKTSE.sT.llVhpYR+shKh...ssspL.NDp.upENpINPlQ.EE-Ehh-...t.st.c....................................................................................................................... 0 10 10 10 +7287 PF07459 CTX_RstB CTX phage RstB protein Vella Briffa B anon Pfam-B_22203 (release 10.0) Family This family contains a number of RstB proteins approximately 120 residues long, including RstB1 and RstB2, from the Vibrio cholerae phage CTX. Functional analyses indicate that rstB2 is required for integration of the CTXphi phage into the V. cholerae chromosome [1]. 20.30 20.30 20.90 23.30 18.50 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.59 0.71 -4.23 2 78 2009-01-15 18:05:59 2003-09-26 16:48:33 6 1 49 0 7 34 0 109.50 79 94.27 CHANGED MKl.........VsFGhSpS.GluppsstPY.I.sLFVGKPIRQWKsDKG.s.s.Ghpp.El.F.SsDthhpKlcphAFPsLVphcsEP.PEDPo+NllIshpVlCoLaDsVPtsK. .............MKSRFVVFGASHSEGVS.KTG..APYLIPVL.FVGKPIRQWKNDKGQCLTFGLQHQEV..KFVS.SDAMTRKLE..Q..T..A..FPVLVTFDNEPDPEDPSRNLVIDYQVVCSLFDNVPGGKP.................... 0 3 4 5 +7288 PF07460 NUMOD3 NUMOD3 motif (2 copies) Pietrokovski S anon Pietrokovski S Motif NUMOD3 is a DNA-binding motif found in homing endonucleases and related proteins. It occurs on its own or in tandem repeats in GIY-YIG (Pfam:PF01541) and HTH proteins. It constitutes a beta-turn-loop-helix subregion of the the DNA-binding domain of I-TevI homing endonuclease (Swiss:P13299) [1]. 20.40 9.30 20.40 9.30 20.30 9.20 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.98 0.72 -4.06 25 485 2009-12-15 13:13:21 2003-09-27 14:40:31 6 26 140 2 73 456 321 32.80 29 23.99 CHANGED hGtK+oEEoKpKhSp........h.ttshhGKp+o-Eo.KpKlSc ................................h.hoc-s+p+hSp.................t.tps.hhGKp.t...otEs.+thhp................ 0 37 62 68 +7289 PF07461 NADase_NGA Nicotine adenine dinucleotide glycohydrolase (NADase) Moxon SJ anon Pfam-B_21586 (release 10.0) Family This family consists of several bacterial nicotine adenine dinucleotide glycohydrolase (NGA) proteins which appear to be specific to Streptococcus pyogenes. NAD glycohydrolase (NADase) is a potential virulence factor. Streptococcal NADase may contribute to virulence by its ability to cleave beta-NAD at the ribose-nicotinamide bond, depleting intracellular NAD pools and producing the potent vasoactive compound nicotinamide [1]. 28.50 28.50 29.00 31.80 25.60 28.40 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.43 0.70 -5.80 2 89 2009-01-15 18:05:59 2003-09-29 10:54:18 6 1 32 2 4 78 0 423.80 87 95.30 CHANGED MRNKKVTLAHIVAKTSVAIALAGAMGSSLLANSTTYAVSGKENKKSDVKYETTKVMEANATSSKEDNHVMHTLDGSMSTVWEENSPGGGVGEVLSYKFsSPM+IGRILIVNGDTSSKENYYKKNRIAKADVKYYNtNKLVLFpKIELGDTYTKKPHHIEIDKKLDVDRIDIEVTEVHQGQNKDILALSEVTFGNhERDlFEKKFKEIKDKWVTDKQADEFIETADKYADKAlQMSAVASRAEYYRMYVSRKYHYKKEFVEKLKQVYKESGASHVTSKKDLMLAFDDAK+KSTIGRQENGLFVTSFAEDMALLFTDQGKLKSADQIENIKGVDSGKYSDGVYQYEYDSELTKNIDKLGYIRTASGDTPGANSLNIPGCQTWSGKHIENSESELIFPSISVKDLKSKAVLAEIDAKGYFEIIDPTIIAPNGDHKKVTGRFKIKKMQDR ..........................MRNKKVTLAHIVAKTSVAIALAGAMGSSLLANSTTYAVSGKENKKS..DVKY.........ET.TKV..MEANATSSKEDNHVM...HTLDGSMSTVWEENSPGG.GVGEVLSYKFASPM+IGRILIVNGDTSSKENYYKKNRIAKADVKYYNGNKLVLFQKIELGDTYTKKPHHIEIDKKLDVDRIDIEVTEVHQGQNKDILALSEVTFGNhERDlFEKKFKEI...KDKWVTDKQADEFIETADKYADKAVQMSAVASRAEYYRMYVSRKYHYKKEFVEKLKQVYKESGASHVTSKKDLM.LAFDDAK+KSTIGR..Q...ENGLFVTSFAEDMALLFTDQGKLKSADQIENIKGVDSGKYSDGVYQYEYDSELTKNIDKLGYIRTASGDTPGANSLNIPGCQTWSGKHIENSESELIFPSISVKDLKSKAVLAEIDAKGYFEIIDPTIIAPNGDH.KKVTGRFKIKKMQDR..................................................... 0 1 2 2 +7290 PF07462 MSP1_C Merozoite surface protein 1 (MSP1) C-terminus Moxon SJ anon Pfam-B_21542 (release 10.0) Family This family represents the C-terminal region of merozoite surface protein 1 (MSP1) which are found in a number of Plasmodium species. MSP-1 is a 200-kDa protein expressed on the surface of the P. vivax merozoite. MSP-1 of Plasmodium species is synthesised as a high-molecular-weight precursor and then processed into several fragments. At the time of red cell invasion by the merozoite, only the 19-kDa C-terminal fragment (MSP-119), which contains two epidermal growth factor-like domains, remains on the surface. Antibodies against MSP-119 inhibit merozoite entry into red cells, and immunisation with MSP-119 protects monkeys from challenging infections. Hence, MSP-119 is considered a promising vaccine candidate [1]. 20.40 20.40 20.50 20.40 19.80 20.30 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.96 0.70 -6.12 4 835 2009-01-15 18:05:59 2003-09-29 11:07:15 6 6 34 0 10 628 1 341.40 56 39.29 CHANGED Shss+uESE--h.ssshEl-phYcsaLpplDs..NspFhpFlKSKK-lIsuLT.pKhNtLhhEIt+LK-h.ph.aD+YhKYKLKLERLapK+cpIpsuppQIKcLolLKs+L.+Rpp.lNssa.VLpsassFFNK+REAEKp.V-NsLKNT-hLLKYYKARsKYasuEusPLKTLocsSlp+EsNYLplEKFRshSRLEhRLpKNIpLGKE+ISYlSuGLHHVhpEhKEllKNKsYTGposs-NsscVpcAhcpYcELLPKssTtpAssss.ssTPsssssussstusssuuuuGuuuGpssssTstpl...G.utss......................Vlsh.ussDDDD--hDQlsoGpupstp.csILsAFcsE.-hlYhKsLussYKSlKKph.+chsshppslsshLNs+LcKRN.FL-VLsp-Ls.FKclSoNcYlI+sPYpLLDs-KKcK.lhshKYhtcuVscDIcTAsDGIpaaNKMlELYKspLsAVpcQIctltst.ss..c-cKK......................KYlPhhpsLcsLYEollsps--Yh-sLpp+lsshplEKsEh- ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................A..o...s..-..llssh.s-Y-VlY.LKPLAGMYKoIKKQLENHVsAFNTNITDMLDSRLKKRNYFL-VLsSDLNPFKYSS.SGEYIIKDPYKLLDLEKKKKLlGSYKYIusSIDhDlsTANDGluYYNKMt-LYKscLsuVpppIKclEsph.t..-ch..cKh.........spsspss.p.sp.hAcKtE.cKYLPFLNSlpKEYEoLVsKlsoYopN.LKKhIsNCQlEK+EAE............................................ 2 1 2 7 +7291 PF07463 NUMOD4 NUMOD4 motif Pietrokovski S anon Pietrokovski S Motif NUMOD4 is a putative DNA-binding motif found in homing endonucleases and related proteins [1]. 23.40 23.40 23.50 23.50 23.00 23.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.47 0.72 -4.08 55 364 2009-09-11 07:03:11 2003-09-29 11:41:44 6 21 293 1 46 303 100 52.20 30 27.43 CHANGED EhWKsI...G.acs...YpVSshGRV+S..................hpps+lLp.ths...sGY..hhVsLt.ps .......EhW+sIt.....u..a-s.....Ypl..SshG.+V+o..................htps+lL+sphs..t..sGYhhVsLh........................................................... 0 13 30 37 +7292 PF07464 ApoLp-III Apolipophorin-III precursor (apoLp-III) Moxon SJ anon Pfam-B_31170 (release 10.0) Family This family consists of several insect apolipoprotein-III sequences. Exchangeable apolipoproteins constitute a functionally important family of proteins that play critical roles in lipid transport and lipoprotein metabolism. Apolipophorin III (apoLp-III) is a prototypical exchangeable apolipoprotein found in many insect species that functions in transport of diacylglycerol (DAG) from the fat body lipid storage depot to flight muscles in the adult life stage [1]. 31.00 31.00 31.00 31.50 30.90 30.90 hmmbuild --amino -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.16 0.71 -4.20 8 45 2009-01-15 18:05:59 2003-09-29 14:54:15 6 2 36 1 18 50 0 143.30 27 77.83 CHANGED lEKHAtEFQKTFSEQhNulsNS..KssQ-VNKAlK-GSDSVLQQLssluoSLQuAlsDANGKAKEALEQsRpNlE+TAEELRKAHPDVE+pAspLRD+LQAAVQuTlQEoQKLAKEVuuNh-ETN-KLAPpIKpAYDD...FVKpsEEVQKKlHEAAoKQ ...............................pht-h.pshppphpphhs.......pssp-lscslK-socshlpplpshtsslpstlpc.ssschcpslcphppplpcTsccLpc..s.pP-lpcpAspLpp+lQsulQshspEspKluKclupssppss-cLsstlKpsaDs...hscsspclpcclppAsp.............. 0 4 8 17 +7293 PF07465 PsaM Photosystem I protein M (PsaM) Moxon SJ anon Pfam-B_21548 (release 10.0) Family This family consists of several plant and cyanobacterial photosystem I protein M (PsaM) sequences. PsaM forms part of the photosystem I complex and its binding is stabilised by PsaI [1]. 20.80 20.80 20.90 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.52 0.72 -7.03 0.72 -4.34 40 300 2009-01-15 18:05:59 2003-09-29 15:43:26 8 1 285 2 33 181 1 28.70 48 91.33 CHANGED IoDoQIalALllALlsulLAlRLGppLY+ ............l.-sQlllAL..hlAhhsulLAl+LGpsLYp.. 0 12 24 31 +7294 PF07466 DUF1517 Protein of unknown function (DUF1517) Moxon SJ anon Pfam-B_22018 (release 10.0) Family This family consists of several hypothetical glycine rich plant and bacterial proteins of around 300 residues in length. The function of this family is unknown. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.98 0.70 -5.42 27 295 2009-01-15 18:05:59 2003-09-29 15:46:33 6 6 196 0 143 297 48 193.10 20 68.03 CHANGED tAtSGGRIGGGSFp.uP.......SsPpo.ss.suuhh..................GGGhGaPFllPh......aGhGG.G..GLFuhLlhhulsshllpshRssttt...........ssttst.hssstVolsplQVGLLApA+sLQpDLpplAtpADTsossGLppVLQEsoLALLRpP-hWVYusscss.pssh....suAEu....pFNpLolpERSKhspEohSNlssppppsstst.ss.s.t.........-sssEYIlVTlLVAucupl.pLPt.lsous-LRpALptLGulsuscLlAlEVlWoPpscGDsLot-ELlssYPpLphL .....................................................................................h....uGt..GGt.uFt..ts.............s.ss.pt..t.tsss......................t......................us...hhs...hs.hhsh...............................hGhuh....t....t...h...hs.h.l....lhhhlshhlh.thhpt.............................................................................................................................................................................................................................................................................................................................................................................................. 1 31 88 132 +7295 PF07467 BLIP Beta-lactamase inhibitor (BLIP) Finn RD anon Pfam-B_41444 (release 10.0) Family The structure of BLIP reveals two structural domains, which form a polar, concave surface that docks onto a predominantly polar, convex protrusion on beta-lactamase. The ability of BLIP to adapt to a variety of class A beta-lactamases is thought to be due to flexibility between these two domains [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.16 0.71 -4.63 3 21 2012-10-01 23:09:26 2003-09-29 16:55:43 6 1 15 29 4 43 0 132.40 29 91.48 CHANGED VK.RluRouluLsAAAGAVLATAoSApAsoGFTuEKYsQIQFGMTRspVW-IAGAEtuC-TGGshGDSIlCasc.uGDYuPYGsFuFTuA...uKLhSKRQEcLasAssPTl+LA+YN+TslGMTEAQlWAAVPpDSCoohuEpYPNWPATsGspccYsC.....sSuTGhFsPSAaFaFTDGVLTuRpQpsL .................h.....................hll..A..sshuhs.AsothTsEpYppIphGMspsEVhsllG..ut.ss...C.s-..ss..h.................................................................................................................................................................................................. 1 1 1 3 +7296 PF07468 Agglutinin Agglutinin Finn RD anon Pfam-B_57133 (release 10.0) Family \N 25.00 25.00 27.50 25.80 23.20 17.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.00 0.71 -4.23 2 39 2012-10-02 19:42:32 2003-10-03 15:43:28 6 5 9 8 23 50 0 139.20 29 55.88 CHANGED hthPhhhshhpsNNtKYLth.o..IpQhshLQFuhDpl.DPhstaph.s..T.sGhlpIKSpYhNKahhh.osN..WIhssuN-PcE..oN.AssLF+s...-.tshphlpLLphQhsaahcpaTsGts.Flshh.Atop.lDpsu..lhclI- ..................................................hlhhpssNscYLphhs....hp..thshLpFuu--lsDPhstapl.s..s..tcG..hV+I+ssahsKaW+.c.....o...sN........WIhAsus-scp..os.ssTLFcPlhl.-.tphp.hlthpphttspaspphs......hsCL.Ass..tp.p............................................... 0 0 11 23 +7297 PF07469 DUF1518 Domain of unknown function (DUF1518) Finn RD anon Pfam-B_1525 (release 10.0) Domain This domain, which is usually found tandemly repeated, is found various receptor co-activating proteins. 25.00 25.00 28.90 28.90 24.20 23.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.85 0.72 -3.82 17 247 2009-01-15 18:05:59 2003-10-06 13:27:18 7 13 39 0 91 216 0 57.30 44 5.65 CHANGED uhsuthussthPQussQQFPasPNYGhuQQs-PuFssuhSP.SshMSsphu.oQSsMh ...uhsuphusPthPQussQQFPYPPNYG..h..up..Qs-PuFsssh.SP.oshhssphssoQssMh........... 0 4 9 29 +7298 PF07470 Glyco_hydro_88 Glycosyl Hydrolase Family 88 Finn RD anon Pfam-B_10896 (release 10.0) Family Unsaturated glucuronyl hydrolase catalyses the hydrolytic release of unsaturated glucuronic acids from oligosaccharides (EC:3.2.1.-) produced by the reactions of polysaccharide lyases [1]. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.22 0.70 -5.40 19 2447 2012-10-03 02:33:51 2003-10-06 14:06:07 8 26 1025 25 672 2054 111 336.70 21 81.37 CHANGED hhc+hspthh.c.s..........h......c........WspGlhhtG.hh.tsa-hTtccc....Yhphsps..........ascphhccssp..................hplDphshGhhh....hh....lYchst.Dpchhtsshpht-.lltphs+htcGshh+.....tph..hphW.............lDsLaMusPhhhph....uphhs-...cah-cshppahhptcp.................hhDscsGLaa+uac....pspp.hus........hWuRupGWhhhulschlch......LPp.pcss+p.lhplhpshs.ps.lt+hQc.psGhWapllccss...shhEoSuoAhhlYulh+ul............ppGh....l.scpYtpssp+uacullcp...........lpc-Gphhlppsssssuhst............YtshshtpspsaG.GhhlhAhhEhh+hh ...........................................................................................................h......................................tt...p..............Ws.pGhhhhG...hh..hha....c....ho....t....c....pp..............hh.ph..s.pp.........................hh.p.p.h..h..t..phsp.............................phpl.s.p...h..s.h.G.h..h.h.............hh............hY....c.....h.........s........s....s........t........ph..tp.h...h..h.phA-......hhp...p..h.........p.........t.....p.G...s..h..hp...............................tp.t..p.tpha.............................lDsl..hM..sl....h..L.h.hh...................uphs....s..c.t......ca.h.-..Ahpphh..hp..hpp..................................hh.D.s...c...o.....Ghh..h.+..uhs........t..p.u..h......p..s.us................................hWuRGpuWshhuh..sthhch...............l.......p....p........p......s......t........t...p.......h.......h.......h.......p......h.hpt....hh..pt..l.......h..c......h...............c....p........s..u.....h...W.....p........h............h....h........s.....c.......s...........s.......................s.........................h-oSAoA.hh...shu....l...h..p..tl...................................pp.th..............h.......t.....p...p....Y.....t...p....s....s...pc....t...hpulhpp...................hs.t....s....G..p............l..t...t.s.hhh.pu...t.................................p.sh...s...h..s.s................aG.shhal.Ahhch....h.............................................................................................................................................................................. 0 272 506 604 +7299 PF07471 Phage_Nu1 Phage DNA packaging protein Nu1 Finn RD anon Pfam-B_11430 (release 10.0) Family Terminase, the DNA packaging enzyme of bacteriophage lambda, is a heteromultimer composed of subunits Nu1 and A. The smaller Nu1 terminase subunit has a low-affinity ATPase stimulated by non-specific DNA [1]. 30.00 30.00 30.30 30.20 29.90 29.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.96 0.71 -4.77 11 762 2010-01-12 09:52:22 2003-10-06 14:22:59 7 4 374 2 31 345 35 138.20 50 83.26 CHANGED McVNKKpLA-IFGsulRTIppWQcQGhPVl+GGGKGsEslaDoAuVI+WYA-R-AEIENEKLR+EVEELRtAuEsDLpPGTI-YER+RLTRAQADAQELKNA+-suEVVETAFCTFVLSRIAuEIuSILDGIPLSlQRRF..PELENRHlDFLKpDIIKAMNKAAA .................................................................lNK+phAc.hsss.pshppW.t....p....Ps.p.......K.G..EshaDsAssIp...hpR.cs..p..c..s..p+Lp.c-.ht-...lct..s.s.pt....D..ps..spl.h..tRaRL.TcAQA-AQE..LKNs.+-pucVl-TsFChFsLS+lAtpIuSILD.u.lPLohQRpF..P-LpsRHlDhLKp-IhKAhNpuA......................................................................... 0 8 18 28 +7300 PF07472 PA-IIL Fucose-binding lectin II (PA-IIL) Finn RD anon Pfam-B_17609 (release 10.0) Family In Pseudomonas aeruginosa the fucose-binding lectin II (PA-IIL) contributes to the pathogenic virulence of the bacterium. PA-IIL functions as a tetramer when binding fucose. Each monomer is comprised of a nine-stranded, antiparallel beta-sandwich arrangement and contains two calcium cations that mediate the binding of fucose in a recognition mode unique among carbohydrate-protein interactions [1]. 20.90 20.90 20.90 22.40 20.70 20.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.15 0.72 -3.83 12 85 2009-01-15 18:05:59 2003-10-06 15:00:50 6 5 53 100 25 63 2 108.60 48 54.85 CHANGED FpLPsshcFGlTAasNou.tpQTlcValDDp.t..sTasGpGssss.huTpslsSG.sG+Vplpl.usGKPScLtup.ssLss........KsshullGuEsGsDsD..YNDulVlLNWPL ...........FsLPsNhpFGlTAasNou.spQTIcValssssp..ATFpGsuoss.....s........luTplLNSG..sGKVplpVosNG..KPScLsSpQsslss........cssFu..lVGSEDGoDsD..YNDulVVLNWPL.... 0 2 9 15 +7301 PF07473 Toxin_11 Spasmodic peptide gm9a Moxon SJ anon Pfam-B_90829 (release 10.0) Family This family consists of several spasmodic peptide gm9a sequences. Conotoxin gm9a is a putative 27-residue polypeptide encoded by Conus gloriamaris and is known to be a homologue of the "spasmodic peptide", tx9a, isolated from the venom of the mollusk-hunting cone shell Conus textile [1]. Upon injection of this venom component, normal mice are converted into behavioural phenocopies of a well-known mutant, the spasmodic mouse [2]. 25.00 25.00 33.40 33.40 21.50 20.30 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.67 0.72 -4.02 2 4 2012-10-01 22:06:18 2003-10-06 16:07:17 6 1 4 1 0 5 0 27.50 46 37.54 CHANGED .CspuCpS.scCsp+ChCp.ctpsshpt ..CNNSCQpHScCsSHClCphctCssVNt 0 0 0 0 +7302 PF07474 G2F G2F domain Bateman A anon Bateman A Domain Nidogen, an invariant component of basement membranes, is a multifunctional protein that interacts with most other major basement membrane proteins. The G2 fragment or (G2F domain) contains binding sites for collagen IV and perlecan. The structure is composed of an 11-stranded beta-barrel with a central helix. This domain is structurally related to that of green fluorescent protein Pfam:PF01353. A large surface patch on the beta-barrel is conserved in all metazoan nidogens. 23.50 23.50 23.60 25.40 21.10 23.40 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.16 0.71 -4.93 5 296 2012-10-01 21:32:51 2003-10-27 12:53:42 7 164 81 2 160 235 0 182.50 35 9.46 CHANGED cGuPQRlNGpVpGpIpVls.c...hslsFsssDLHuYlVos-GRuaTAIS.slPpsLGpuLpPlssIGGllGWLFAcEp.ssuhNGFoLTGGpFsRcspVcF..csG..clpIcQpacGlDs-.....saLsl+hslcGpVPpIsssuoVpIcDYoEhYppotsGllTSpSTRsaTVcs.....uu.spTasYolDQTITF-pCpc+.scls ......................................................GsPpRspGpltGplpss...p....hslsh.ssslps.sssssuph.aouIo.....p.lPpslG..uhhsLsslhuslhWhhAhE..ssshNGFo....lT...G..utFp+pspVpF...tsG...EhLplsQphcGLDsc.......shLhlcs.lpGpV.Pplsssuslplp.sYpE.Yh.o.su........lhutSoRpas.l.st...........tshsYphppsIpYppsttt....s................................ 0 26 38 83 +7303 PF07475 Hpr_kinase_C HPr Serine kinase C-terminal domain Bashton M, Bateman A, Moxon SJ anon COGs Domain This family represents the C terminal kinase domain of Hpr Serine/threonine kinase PtsK. This kinase is the sensor in a multicomponent phosphorelay system in control of carbon catabolic repression in bacteria [1]. This kinase in unusual in that it recognises the tertiary structure of its target and is a member of a novel family unrelated to any previously described protein phosphorylating enzymes [1]. X-ray analysis of the full-length crystalline enzyme from Staphylococcus xylosus at a resolution of 1.95 A shows the enzyme to consist of two clearly separated domains that are assembled in a hexameric structure resembling a three-bladed propeller [2]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.79 0.71 -4.95 81 2208 2012-10-02 15:24:17 2003-10-28 16:53:30 7 7 2083 29 470 1375 236 159.20 43 53.76 CHANGED ppLAsppslHGVLlDVaGlGVLIpGcSGlGKSEsALELlcRGH.RLVADDsV-lpchsppt.LhGp.uPcllcchlElRGlGIIslpsLFGhsul+spppIpLllpLppWcpp..ppaDRLGh-.ppptcILslclPplplPVcsGRNlAlIIEsAAhNa+hKphG.asuscpFpcRl ...................t.pLA.psolHG.VLlDla.Gh....G....VLIpG-SGlGKSEsAL........E........L....l.......c......R......G......H.......R......LVADD.........tV.-l...h.........p...h.sc.p............p...Lh.Gp........s...P..c...l...l.ca....L.l...E.I..RGlGIIsVhsLaGsuul+sppplpLslpLcpappp........ctaDRLG...p.p..p..p.hclh.s.splsplplPV+sGRNlulllEsAAhNaRhp..hG.hsssppFtcR................................................................... 0 158 314 385 +7304 PF07476 MAAL_C Methylaspartate ammonia-lyase C-terminus Bateman A, Moxon SJ anon COG3799 Family Methylaspartate ammonia-lyase EC:4.3.1.2 catalyses the second step of fermentation of glutamate. It is a homodimer. This family represents the C-terminal region of Methylaspartate ammonia-lyase and contains a TIM barrel fold similar to the Pfam:PF01188. This family represents the catalytic domain and contains a metal binding site [2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.55 0.70 -5.45 14 185 2012-10-02 01:07:48 2003-10-28 16:58:51 6 4 176 12 41 2518 572 245.40 60 60.15 CHANGED slshcslPlFuQoGD-RYhNsDKhIlKcs-VLPHuLhNsV-.KlG.cGEcLh-YlpWLppRlhpL.uscsYpPhlHlDVYGTlGthFss..Dhc+hs-YLupLccuAtPatLpIEGPhDsGs+ttQIctLstLRptLccpGlsVclVADEWCNThEDI+tFsDupAucMVQIKTPDLGGlsNol-AlLYC+c+GlGAYlGGoCNETDhSAclssHlALAspPsQhLuKPGMGlDEGlMIlpNEMpRslAlhcp .....................................................................................................s..sEsIPlFGQSGDDRYlsVDKMILKGlDVLPHALINNVE-KLGh+GEKLtEYVcWLu-RIlsh.RsuscY.+.PsLHI.D.V.....YGT...IGllF-h..Ds......hRsApYIAoL.EcpApshs.L.h.I.EG.PlDAG.sK.s.cQIchLsuls.ccLs.+hGos.....V+IVA.DE.W.C.NT.a.p.D....I.h.-.F.sDAsusHMVQIKTPDLG.uIcNllDAVLYCpp+uhpAY.pGGTCNETDlSARsCVHVALAsRPhpMLsKPGMGFDEGl.IVaNEMNRTlALLp.s........................................... 2 16 26 35 +7305 PF07477 Glyco_hydro_67C Glycosyl hydrolase family 67 C-terminus Finn RD, Moxon SJ anon CAZY Domain Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the C terminal region of alpha-glucuronidase which is mainly alpha-helical. It wraps around the catalytic domain (Pfam:PF07488), making additional interactions both with the N-terminal domain (Pfam:PF03648) of its parent monomer and also forming the majority of the dimer-surface with the equivalent C-terminal domain of the other monomer of the dimer [1]. 20.30 20.30 20.80 28.80 19.30 19.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.52 0.70 -4.95 38 293 2010-01-08 16:36:07 2003-10-29 13:35:56 7 7 257 17 125 306 20 220.20 42 31.06 CHANGED oGphhAQANhYuaGRLAWsPshsocpIscEWlRhTF.......us.cpcVlcslsp............hhhpSapAa.sY.ssPLGlpahsshs....a.HYGPsPtsp-tss......Wus..YH+ADcsGIGhDRT.poGoG...hsuQYss.lAcha-slcTsP--LLLWFHHVPasa+L+SG+TllQclYcp+acGscpVpshhpsWcuLcutlDscRapcVtt+LphQtpcAhhWRDuhssYFhphSG..IP-thsRst.a ..................................sGp.hAQANhYAaGRLAWsPslsucpIscEWl+hTF...............us.-.tpl.....lcslpp................hhhpShcsh.sY.psPLGltahhs.s.....+..HYGPsPts.c.hst....................Wss......YH+ADppGIGhDRT.uo..G..os...hsuQYhs.lspha-slposP-cLLLaFHHVPasa+L+S..G+T...lhpclhspahcGlcpspphhppWp.s.Lcs..h..l...D.s..........c..........RapcVtp+LphQtccAh.WRDshssYF.phSG..IPsthtR.............. 0 42 84 108 +7306 PF07478 Dala_Dala_lig_C D-ala D-ala ligase C-terminus Bateman A, Moxon SJ anon PSI-BLAST 2dln Family This family represents the C-terminal, catalytic domain of the D-alanine--D-alanine ligase enzyme EC:6.3.2.4. D-Alanine is one of the central molecules of the cross-linking step of peptidoglycan assembly. There are three enzymes involved in the D-alanine branch of peptidoglycan biosynthesis: the pyridoxal phosphate-dependent D-alanine racemase (Alr), the ATP-dependent D-alanine:D-alanine ligase (Ddl), and the ATP-dependent D-alanine:D-alanine-adding enzyme (MurF) [3]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.90 0.71 -4.93 18 6597 2012-10-10 13:17:03 2003-10-29 13:51:30 8 33 4445 68 1275 21237 8280 190.70 33 59.00 CHANGED LhpssGlssssahslp+pchppts......htph.tpLGaPlFVKPAptGSSlGloKVpst-ELpsAlcpAhpYDpcVllEcsl.sGcElpsulLG..Nts.hplussscIthss......uFYDYcsKY....s.ssuphhlPAsLstphppplpchAhcsY+sLus+GhARlDFFL.spcGplhLNElNThPGFTshShaPphhtssGlsascLlspLlp ............................................................................................................hpstGl.sh.ss..a..h...s.l.pp.sp.....p.t....................t.h.....p..p....L..s.h.....P.l.h.VK..P..u..p....t..G..S........S..l.....G.....l.....o....K.....V.......p....s.......p.......p.......p......L.......p.......t........A......l........c..........h.........A...........h..........p............a...........D.........p..............c.............V......l......l..Ep.hl.....s.G...........c......El...p...s...u....l..L.G...........p....tt......sp...s...h......s...s...s..E...I..h.h..ss...........................sF...Y..D...Y...-..u.K...Y...........................................s.....s.....p...........s.....p...h.........h.......h......P.........A.....p.................l.......s......s.........c.........h..t...p...p.....l..pp..hAhp.Aa..p.....s.....L.......s......s....p.......G....h...u..R.l...D..................h.....h........l.......s................t............c.........s........p.....h......h..l.E..lN.....T.hP.....G..h.....T.........s.....h.........S.h.........h.Ph.........h.........h.p.s.s.G.lsaspLlpcll...................................................................................................................... 0 456 861 1094 +7307 PF07479 NAD_Gly3P_dh_C NAD-dependent glycerol-3-phosphate dehydrogenase C-terminus Finn RD, Bateman A, Moxon SJ anon Prosite Domain NAD-dependent glycerol-3-phosphate dehydrogenase (GPDH) catalyses the interconversion of dihydroxyacetone phosphate and L-glycerol-3-phosphate. This family represents the C-terminal substrate-binding domain [2]. 25.00 25.00 27.20 26.30 24.30 24.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.81 0.71 -4.49 27 5512 2012-10-02 19:36:47 2003-10-29 14:12:16 9 20 4754 22 1400 3895 2018 143.50 41 42.43 CHANGED sDlhGVElsGALKNVlAlAsGhsDG......luhG.....sNsKuullphGLhEhp+huthhssssp....Taht.sGluDLlsTChu..uRNp+hGptlu+s...pshcplpcph.......GthlpGstTscplhplhpppsl...............chPlhpslYpllhpthpspchlppL .............sDllGVEluGAlK.............NllAluAGhs.cG...........l.G..hG..........sNu+.......AALlTRGL......sEhsRlG........s...s...h...Gupst.................TF.h.G.LuG..lGDLllTCou.....S..R............Nh+hGhtLGpG...............pslcph.pph.....................GtVlEGh....tosc...sl....h.p..lAc.c..h.sl...............cMPIspslap.lLap.stssccsh...h........................... 0 469 881 1177 +7309 PF07481 DUF1521 Domain of Unknown Function (DUF1521) Yeats C anon Yeats C Family This family of unknown function is found in a limited set of Bradyrhizobium proteins. There appears to be a periodic -DG- motif in it. 21.00 21.00 22.40 22.20 20.60 19.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.22 0.71 -4.55 5 27 2009-01-15 18:05:59 2003-11-03 12:43:51 6 2 16 0 10 27 0 155.60 39 56.14 CHANGED WSH.EV+DGKATIcLGDKYoIslDEKDGThplRNsQTG+lT+I+GDPHVDANGDGKcDFDFKKsMTFQLDDGTKITVDTVsYGp..GcTlASKLTITNGDNAMVVEGLGDccDGKNNL+VTQSNAGRTLDpLTsDGAQTIaEtoGpGWVDs.oG+tVTQAsIstsEsssssushp ....................ppupupIphsDtYoIpss-psushplpNsp.TGcsT.plaGDPHVDssGDGcssFDFK+shTFpL-DGTKITVsTssaGs..GtTloS+LTITsGcs.uhhVpGlscsc..pssLplppu.ssGhhlD.hssDG.phh.p..sstGal..s...tGt...ssQt.hs.sc.........t...................................................... 0 5 6 8 +7310 PF07482 DUF1522 Domain of Unknown Function (DUF1522) Yeats C anon Yeats C Family \N 25.00 25.00 25.00 111.30 20.60 19.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.40 0.72 -3.90 20 54 2009-01-15 18:05:59 2003-11-03 16:03:44 6 2 9 0 40 58 0 111.70 54 27.03 CHANGED oTLsVNGKTITFKsussPsuu...slssGSGls..GNllTDGsGNSTVYLss......ATVsDlLsAIDLAoGVpoATl.usGsATlussu.....GssuS...hsuuGsLsLsTuTGuDLSIo.Gsush .oTLsVNGKTITFKsussPsus...slsoGoGls..GNlhTDGsGNSTVYLts......uTVsDVLpAIDLAoGV+oAol.usGsATluTus.....GsssS...ssuuGtlpLsououADLolTGsush..... 0 8 14 20 +7311 PF07483 W_rich_C Tryptophan-rich Synechocystis species C-terminal domain Yeats C anon Yeats C Family This domain is found at the C-terminus, normally between 2-3 copies, of a range of Synechocystis membrane proteins. This domain is fairly tryptophan rich as well. 20.70 20.70 21.20 20.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.52 0.72 -4.37 14 123 2009-01-15 18:05:59 2003-11-03 16:47:07 6 23 22 0 33 93 105 104.40 32 17.49 CHANGED s.sTSLlttsssYahs.hssGosstLKYsGtshssGpFGshsPluA.pTusG.apVsWpssuTspaohWsTDuNGNahSp.oshVsGsShps.shEssFtpDlNGDGslG ............................thulltt..shahs.husssssslKat.G.ps.hssuphGsWpsl..uA.ET.ssGh.QVLWp..sss...ssphsVWsoDuNhNalu..Ssshss.sSsphhshEssFphDlNsDuhlG................... 0 13 20 20 +7312 PF07484 Collar Phage Tail Collar Domain Yeats C anon Yeats C Domain This region is occasionally found in conjunction with Pfam:PF03335. Most of the family appear to be phage tail proteins; however some appear to be involved in other processes. For instance Swiss:Q03314 from Rhizobium leguminosarum may be involved in plant-microbe interactions ([1]). A related protein Swiss:Q9L3N1 is involved in the pathogenicity of Microcystis aeruginosa. The finding of this family in a structural component of the phage tail fibre baseplate (Swiss:P10930) suggests that its function is structural rather than enzymatic. Structural studies show this region consists of a helix and a loop ([2]) and three beta-strands. This alignment does not catch the third strand as it is separated from the rest of the structure by around 100 residues. This strand is conserved in homologues but the intervening sequence is not. Much of the function of Swiss:P10930 appears to reside in this intervening region. In the tertiary structure of the phage baseplate this domain forms part of the 'collar'. The domain may bind SO4, however the residues accredited with this vary between the PDB file and the Swiss-Prot entry. The long unconserved region maybe due to domain swapping in and out of a loop or reflective of rapid evolution. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.77 0.72 -4.18 181 1934 2009-01-15 18:05:59 2003-11-03 17:20:07 7 82 865 5 447 1768 405 51.40 41 13.56 CHANGED GpIhhauhs......hs........P.........pG.....WhhCsGphlshsp...ssLau...llGshaG.Gs..GtsoFsLPDLRGchs ..........................GsslsaP.us......ss.........P...................................sG............ahhssGp...sa.st.sp...YPpLhs........................................u.........hso.......h..sLPDhRGhh...................... 0 134 266 347 +7313 PF07485 DUF1529 DUF1259; Domain of Unknown Function (DUF1259) Yeats C anon Yeats C Family This family is the lppY/lpqO homologue family. 25.00 25.00 27.20 27.10 24.10 24.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.50 0.71 -4.38 43 461 2009-01-15 18:05:59 2003-11-04 10:36:12 6 3 198 0 107 299 12 120.00 35 80.42 CHANGED ss.plsplLGpKGshs..sGVa+lshsRtDlplshpGhslss.s..hulsshhuFps.sss.spAhhhGDhVLlpcEVsPVhpsLppsGIpVTAlHNHhLt-pP+lhahHhtuhscsscLApsl+sALcth ...........ttlsphLG.ppGphs..sulh+hslsRpDhplphpGhplss..s..hulsohhuFps...sss.GcAhlhGDhVlhpcElssVhpulpstGIploAlHNHhLp-pPplaahHhtuhs.DsssLA+sl+sAL-t....... 0 45 78 91 +7314 PF07486 Hydrolase_2 Cell Wall Hydrolase Yeats C anon Yeats C Family These enzymes have been implicated in cell wall hydrolysis, most extensively in Bacillus subtilis. For instance Swiss:P50739 is expressed during sporulation as an inactive form and then deposited on the cell outer cortex. During germination the the enzyme is activated and hydrolyses the cortex([1]). A similar role is carried out by the partially redundant Swiss:P42249 ([2]). It is not clear whether these enzymes are amidases or peptidases. 21.70 21.70 21.70 21.90 21.50 21.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.71 0.72 -3.53 170 1705 2009-01-15 18:05:59 2003-11-05 10:54:44 7 29 750 0 448 1369 387 105.20 32 42.38 CHANGED cGEshpGplAVAsVllNRV.c......sspFPs..olsuVlaQs.........QFohs.ss.......................Gph.p.h........ssp....puhc...sApps.L..sGt...sssssAhaa..as.sts..s.s.p...Wh...shphsspIGsHhFac ...........tGEshpGplAVusVllNRV..c..........ss.p.FPs.......olssVlaQs.......stFsss.ss..............................................................Gphth..........sp.....pshp....sAccs.l........sGt.................s.....ss.s...........sAlaa..assts......s...s..p...........Wh.....st.hh...tplGpHhFhp.............................................................. 0 191 328 375 +7315 PF07487 SopE_GEF SopE GEF domain Finn RD, Moxon SJ anon Pfam-B_18665 (release 7.8) Domain This family represents the C-terminal guanine nucleotide exchange factor (GEF) domain of SopE. Salmonella typhimurium employs a type III secretion system to inject bacterial toxins into the host cell cytosol. These toxins transiently activate Rho family GTP-binding protein-dependent signaling cascades to induce cytoskeletal rearrangements. SopE, can activate Cdc42, an essential component of the host cellular signaling cascade, in a Dbl-like fashion despite its lack of sequence similarity to Dbl-like proteins, the Rho-specific eukaryotic guanine nucleotide exchange factors [1]. 19.60 19.60 20.80 22.60 18.70 16.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -10.86 0.71 -4.28 2 188 2009-01-15 18:05:59 2003-11-05 11:19:39 8 3 146 6 6 80 0 160.40 71 70.50 CHANGED AVLTsKsVKDFMLQpLNslDI+GsASKDPAYApQTpEAILSAVYSpNKDQCCpLLISKGlsIsPFLpEIGEAApNAGLPGphKNsVFTPuGAGANPFlsPLIuSAs.KYP+MFINppQQsSFKhYAEKIlMpEVsPLFNcssMPTPQQFQLhlENIANKYlQNss ........................AVLTsKsVKDFMLQpLNslDI+GNASKDPAYARQTCEAILuAVYSNNKDQCCKLLISKGlSITP.FLKEIGEAAQNAGLPGE.hKNGVFTPGGAGANPFVlPLIAuASlKYP.HMFINH..NQQVSFKAaAEKIlMKEVsPLFNcsoMPTPQQFQLTlENIANKYLQNAS............................................................ 0 3 3 4 +7316 PF07488 Glyco_hydro_67M Glycosyl hydrolase family 67 middle domain Finn RD, Moxon SJ anon CAZY Domain Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the central catalytic domain of alpha-glucuronidase [1]. 20.30 20.30 21.30 21.30 17.90 19.60 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.83 0.70 -5.54 37 290 2010-01-08 16:36:33 2003-11-10 10:33:56 7 7 257 17 125 302 12 316.00 52 44.22 CHANGED L+hltput....slssls.........hhpsPpstlRhlNcW....DNlD......GolERGYAGtSIFahs...................shsths.sRhtpYARhLASlGINulVlNNVNAps......plLospalpcluplADlFRPYGI+laLSlNFASPhp....lGG...LsTuDPLD.cVhsWWpppsscIY.phIPDFGGFLVKAsSEGQPGPhs.YGRTHA-GANMLAcALpPaGGlVhWRAFVYsp...p.p.-hpsDRA+uAY-pFpPLDGpFcDNVllQIKNGPIDFQVREPsSPLFGuh.+TshhlEhQITQEYLGQpsHLsYLsPhWc...ElLcaDsascGcs.SpVtcllsG..ph....aspphuGhAGVuNlGsDpNW ..................t.ttshppls.htpsPphtlRhlNHW....DN.hD......GolERGYAG...t..S..lahhs....................sh....th...sRhpcYARh.ASlGINusV...lNNVNups................phLospaLp.clu.tlA-lFRPYGI+laLSlNFuuPhp...............l....GG........L..sTuDPLD.pVtpWWcppsccIY.ph.IPDFGGFLVKA.sSEGQ..PGPhs.YG..RoHADGANMLAcAL....tP..a.G...GlVhWRAFVYs.......p-p...............psDRA+tAYccFpPLD.........GpFcDNVllQlKNGPlDFQsREPhSPLF.Guh.+TshhlEhQITQEYLGQppHLsYLuPhW+.EhLcaDTh.s..cG......c......s......S.....pVtc.llsGph..........hs....p.t.....hs..GhAuVuNlGsDtsW...................... 0 42 84 108 +7317 PF07489 Tir_receptor_C Translocated intimin receptor (Tir) C-terminus Moxon SJ anon PRINTS Domain Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation [1]. This family represents the Tir C-terminal domain which has been reported to bind uninfected host cells and beta-1 integrins although the role of intimin binding to integrins is unclear. This intimin C-terminal domain has also been shown to be sufficient for Tir recognition [2]. 20.50 20.50 21.00 97.30 19.70 20.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.38 0.70 -4.59 6 133 2009-01-15 18:05:59 2003-11-11 10:58:32 6 2 104 5 1 76 1 210.30 70 40.11 CHANGED lESNAQAQpRYD-QpAKRQpELslSSGsGYGLSGALILGGGIGAGVTAALHRRNQPsEQpTTTTTpTs......................lpNpsusNTsAQG...NsDToGsE-.outoRRsSpuSsAS.phSDTSSs-TV.NPYA-VshopNssph..ctsEEsIYDEVAAD.PsYSsIQ+huGssPso.uRLlGsPGpGIQSTYALLA.SGGLRhGMGGLTGGupSAsSosNssPsPGstRFV .IEsNAQAQpKYDEQpAKRQEELplSSGsGYGLSGALILGGGIGsuVTAALHRKNQPsEQ.TTTTTp.Ts.......................V-NpPuNNTPAQG...NsDTsGuE-.o.tSRRsS.ASsuS.saSDTSShGTVpNPYADVttuhpDS.s..ppSppsl.s.sssD..ssYSsIQH.stsosss.uRLlGsPutGIQSTYAhLA..SGGLRhsMGGLTGGup.SAVsTuN..ssPsPGstRFV............. 0 0 0 1 +7318 PF07490 Tir_receptor_N Translocated intimin receptor (Tir) N-terminus Griffiths-Jones SR, Moxon SJ anon PRINTS Family Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation [1]. This family represents the Tir N-terminal domain which is involved in Tir stability and Tir secretion [2]. 25.00 25.00 31.00 31.00 21.70 19.80 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.66 0.70 -4.77 4 135 2009-01-15 18:05:59 2003-11-11 11:08:53 6 3 104 0 1 78 0 256.50 76 48.76 CHANGED MPIGNLGHNsNVss.IPPAPPLPSQTDG.AtGupGQLIsSoGsLGSRhLFTPlRNSVADusDuRuSDlPGLPsNPhRlAA..SEloLpsGFEVLHD+GsLDTLNptIGSSsFRVETQ-DGoHlAIGQKNGlETSVlLS-QEauSLQuIDPEGKs+FVFTGGRGGAGHuMVTVASDIsEARQRIl-KLEPKsot..........................csopSGusNSuEsp...ssopopTSTSTSSLRSDPKLWLSLGoIAAGLIGhAATGIAQAlALTPEPDDPTTTD .MPIGNLGHNPNVNs.IPPAPPLPSQTDG.A.GGRGQLINSTGPLGSRhLFTPlRNShADSuD.sRASDlPGLPsNPhRLAA..SEloLpsGFEVLHD+GPLDTLNpQIGSSlFRVETQ-DGpHlAlGQ+NGlETSVVLS-QEaupLQSIDPEGKsKFVFTGGRGGAGHAMVTVASDIsEARQRIL-hLEPKsTGtppsts....................c-opSGu.sosps.....hoETpTSTSTSSLRSDPKLWLuLGTlAsGLIGLAATGIsQALALTPEPDsPTTTD.... 0 0 0 1 +7319 PF07491 PPI_Ypi1 Protein phosphatase inhibitor Wood V, Studholme DJ anon Pfam-B_11090 (release 10.0) Family These proteins include Ypi1, (Swiss:P43587), a novel Saccharomyces cerevisiae type 1 protein phosphatase inhibitor [1] and ppp1r11/hcgv (Swiss:O60927), annotated as having protein phosphatase inhibitor activity [2-3]. 19.50 19.50 19.60 25.50 18.50 18.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.15 0.72 -4.34 38 331 2009-01-15 18:05:59 2003-11-11 14:35:32 6 8 258 0 231 305 1 66.50 40 41.30 CHANGED lL+LRssp............................ppc++VpWsEsVVDNEphsKKKSKlCCIaHts.+sasE...........SSo-......ssuss.- ...............................................pLp......................................psc++VpWspssVD.NEths+KpSK.....s.........C...CI.Y+Ks.+sas..E....................SS.o.-s.ss-p.pp................................ 0 78 125 191 +7320 PF07492 Trehalase_Ca-bi Neutral trehalase Ca2+ binding domain Finn RD, Bateman A, Wood V, Studholme DJ, Moxon SJ anon Prosite Domain Neutral trehalases mobilise trehalose accumulated by fungal cells as a protective and storage carbohydrate. This family represents a calcium-binding domain similar to EF hand. Residues 97 and 108 in Swiss:O42893 have been implicated in this interaction. It is thought that this domain may provide a general mechanism for regulating neutral trehalase activity in yeasts and filamentous fungi [1]. 25.00 25.00 25.90 25.00 24.30 16.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.36 0.72 -4.62 19 181 2010-01-08 16:37:03 2003-11-11 16:42:46 6 3 147 0 122 182 1 30.00 62 4.12 CHANGED LcpLLppEDTDpNhQITIEDpGPKVlsLGT ..LcpLLppEDTDpNhQITIEDsGPKVlslGT.... 0 37 70 105 +7322 PF07494 Reg_prop Two component regulator propeller Yeats C anon Yeats C Repeat A large group of two component regulator proteins appear to have the same N-terminal structure of 14 tandem repeats. These repeats show homology to Pfam:PF01011 and Pfam:PF00400 indicating that they are likely to form a beta-propeller. This family has been built with artificially high cut-offs in order to avoid overlaps with other beta-propeller families. The fourteen repeats are likely to form two propellers; it is not clear if these structures are likely to recruit other proteins or interact with DNA. 20.40 15.10 20.40 15.10 20.30 15.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -7.26 0.75 -7.61 0.75 -3.21 668 7311 2012-10-05 17:30:42 2003-11-17 13:40:01 6 214 468 96 1696 7326 915 23.90 33 6.00 CHANGED slsss.tlh..slhp...DppG.plWluops ..............Lsss.plp...slhp...DppG.plWlGTt..... 1 834 1511 1628 +7323 PF07495 Y_Y_Y Y_Y_Y domain Yeats C, Bateman A anon Yeats C Domain This domain is mostly found at the end of the beta propellers (Pfam:PF07494) in a family of two component regulators. However they are also found tandemly repeated in Swiss:Q891H4 without other signal conduction domains being present. It's named after the conserved tyrosines found in the alignment. The exact function is not known. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.15 0.72 -4.22 79 2976 2012-10-03 16:25:20 2003-11-17 14:52:01 8 220 527 18 642 2956 292 64.40 23 6.25 CHANGED shssspphpYpYhlpuh-.spWhphss.s...........phsassLssGpYplplpspsptsphs.s..tplslplt ..........................tspphpYpYpLcG..h..-..pp....Whp.h.sstp..............phsaosL..ssG.pYphpV+.up.sps.u.thstp....tplphhl................... 0 299 555 608 +7324 PF07496 zf-CW CW-type Zinc Finger Yeats C anon [1] Domain This domain appears to be a zinc finger. The alignment shows four conserved cysteine residues and a conserved tryptophan. It was first identified by [1], and is predicted to be a "highly specialised mononuclear four-cysteine zinc finger...that plays a role in DNA binding and/or promoting protein-protein interactions in complicated eukaryotic processes including ...chromatin methylation status and early embryonic development." Weak homology to Pfam:PF00628 further evidences these predictions (personal obs: C Yeats). Twelve different CW-domain-containing protein subfamilies are described, with different subfamilies being characteristic of vertebrates, higher plants and other animals in which these domain is found [1]. 23.20 23.20 23.50 23.60 22.90 23.00 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.38 0.72 -4.26 52 682 2009-01-15 18:05:59 2003-11-20 14:53:00 10 54 121 3 435 667 21 48.90 35 6.09 CHANGED thWVQC-..pChKWRpLP..tphs...pplsc..tWhCphNs.....ssth...ssCsssE-ht ........hWVQC-..pCh.KWRpLP....tphs......pphsc..pWhCshNs.....ssph......spCss.PE-................... 0 120 218 310 +7325 PF07497 Rho_RNA_bind Rho termination factor, RNA-binding domain Finn, RD anon Pfam-B_1610 (release 11.0) Domain The Rho termination factor disengages newly transcribed RNA from its DNA template at certain, specific transcripts. It it thought that two copies of Rho bind to RNA and that Rho functions as a hexamer of protomers [1]. 21.00 21.00 21.10 21.40 20.50 20.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.42 0.72 -4.34 16 3771 2012-10-03 20:18:03 2003-11-21 13:57:14 7 13 3677 47 859 2307 2356 77.10 55 16.60 CHANGED lau-GlLE.lLsDGFGFLRSs-ssYLsGsDDIYVSPSQIRRFsLRTGDolpGpIRsPKEuERYFALl+V-slNh-sPEp ..................hscGlLE..IL...........D.....G.aGFLRosp.sYLsGs.-DIYVSsSQI.RRFsLRTGDslsGplR.........s.......P..............K.-........G.............E..............R............Y...........FALL+Vsp..VNhcsPE............................... 0 304 582 738 +7326 PF07498 Rho_N Rho termination factor, N-terminal domain Finn, RD anon Pfam-B_1610 (release 11.0) Family The Rho termination factor disengages newly transcribed RNA from its DNA template at certain, specific transcripts. It it thought that two copies of Rho bind to RNA and that Rho functions as a hexamer of protomers [1]. This domain is found to the N-terminus of the RNA binding domain (Pfam:PF07497). 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.76 0.72 -4.19 139 3972 2012-10-03 03:04:30 2003-11-21 14:02:28 7 30 3568 44 1023 2585 1952 41.90 35 9.71 CHANGED -L.cphsls-LhplAc.clsl.c....shuphRKp-LIFsILcspucpss ........pL.cshslsELhplAc.ph.sl.c.....s.huc.....h+.Kp-LlaAIl+tpscps............ 0 349 705 889 +7327 PF07499 RuvA_C RuvA, C-terminal domain Finn RD, Bateman A anon Pfam-B_1373 (release 11.0) Domain Homologous recombination is a crucial process in all living organisms. In bacteria, this process the RuvA, RuvB, and RuvC proteins are involved. More specifically the proteins process the Holliday junction DNA. RuvA is comprised of three distinct domains. The domain represents the C-terminal domain and plays a significant role in the ATP-dependent branch migration of the hetero-duplex through direct contact with RuvB [1]. Within the Holliday junction, the C-terminal domain makes no interaction with DNA [1]. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.28 0.72 -3.79 100 4234 2012-10-01 23:03:33 2003-11-21 15:47:17 8 7 4202 23 886 2640 702 46.70 32 23.40 CHANGED shhs-ulpALhuLGYppp-Apcslppl.....ssshs....s-p.hI+tAL+hlt ...............pEAlpALluLGYptpEup+....slpplt........tssto.............s-p..hI+pAL+th.h............... 0 290 583 752 +7328 PF07500 TFIIS_M Transcription factor S-II (TFIIS), central domain Finn RD anon Pfam-B_1373 (release 11.0) Domain Transcription elongation by RNA polymerase II is regulated by the general elongation factor TFIIS. This factor stimulates RNA polymerase II to transcribe through regions of DNA that promote the formation of stalled ternary complexes. TFIIS is composed of three structural domains, termed I, II, and III. The two C-terminal domains (II and III), this domain and Pfam:PF01096 are required for transcription activity [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.45 0.71 -4.02 84 1033 2009-01-15 18:05:59 2003-11-21 17:01:31 9 29 337 6 638 962 110 113.30 28 17.99 CHANGED pscplRspsh.chLhsuLttt..tpp.................ssppstplAtplEp.............................tlaphh....ts...ss...pcY+p+hRSlhhNLKc.+NssL+p+llsGplospp...........LspMospEhAS-ch+ptc.cphpccslppsph .............................................................................................................pthRppsh.chLhtsLhtt.......pt...t...........................................tppstp.lAtpIEp.......................................................................................................t.lap.h................ps....ss..................tcY+s+hRSlhhNL.KD.+Ns..sL+cpVL...sGp..loPpp...........ls.pMos.-EhAScEl..pphc.cphpccslpph.............................. 0 179 295 458 +7329 PF07501 G5 G5 domain Bateman A anon Bateman A Domain This domain is found in a wide range of extracellular proteins. It is found tandemly repeated in up to 8 copies. It is found in the N-terminus of peptidases belonging to the M26 family which cleave human IgA. The domain is also found in proteins involved in metabolism of bacterial cell walls suggesting this domain may have an adhesive function. 20.90 20.90 20.90 20.90 20.10 20.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.40 0.72 -4.03 140 4313 2009-01-15 18:05:59 2003-11-21 17:13:06 7 239 1219 7 576 3447 28 77.50 25 10.80 CHANGED clpppppsppcp..lsapsppppDssL.pGp.pcVhppGpsGh+phshplshp..sGpcls+p.llspplhp.tPhscllthGTcp ......................................h....p.psp.pcp..ls.F.ps.pppcsssL....t......G......p...pp......V....hp.......p.......GpsGp..+p..h..h..hplhh.....sG..p...c..........s....p.....cp.h.......l..sp.p.l....sp...ts.hsclVclGTc.................................... 0 260 400 487 +7330 PF07502 MANEC MANSC; MANEC domain Mitter R, Fitzgerald S, Guo J, Studholme DJ anon Guo J Domain This region of similarity, comprising 8 conserved cysteines, is found in the N-terminal region of several membrane-associated and extracellular proteins [1]. Although formerly called MANSC (for motif at N terminus with seven cysteines) it has now been renamed by MANEC (motif at N terminus with eight cysteines) by Richard Mitter and Stephen Fitzgerald after the discovery of an eighth conserved cysteine. It is postulated that this domain may play a role in the formation of protein complexes involving various protease activators and inhibitors [1]. 23.90 23.90 24.20 25.00 23.80 23.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.67 0.72 -3.99 14 245 2009-01-15 18:05:59 2003-11-25 16:33:57 9 11 78 0 128 211 0 89.60 31 20.16 CHANGED ssspsChsp.....Fps.htshlIcoptSlstGApFLcus.pVtohc-ClpACCossp......CslAlh-tptsss.ss......CaLFsCh..sp.sCpFssppGasoY ..................s..t.C.tt..........h.shllcsptSlp...t.GApaLps...plpstccClpuCCppps......CNlAlh-tptsts.tss..............CaLhpCh....p...p.sCpFssppGahsY..................... 0 24 32 69 +7331 PF07503 zf-HYPF HypF finger Yeats C anon Yeats C Domain The HypF family of proteins are involved in the maturation and regulation of hydrogenase ([1]). In the N-terminus they appear to have two Zinc finger domains, as modelled by this family. 23.00 23.00 23.10 23.10 21.80 22.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.76 0.72 -4.53 185 2924 2009-01-15 18:05:59 2003-12-02 16:07:56 7 13 1412 12 778 2421 117 34.90 45 9.25 CHANGED hCspCtc.Eap-Pt..s.RRa+tphhuCspCGP+hslhp ...hCssChp.EhpDPt..c.RRa+h..lsCscCGP+hpll....... 0 258 529 666 +7332 PF07504 FTP Fungalysin/Thermolysin Propeptide Motif Bateman A, Yeats C, Rawlings N anon Yeats C Motif This motif is found in both the bacterial M4 peptidase propeptide and the fungal M36 propeptide. Its exact function is not clear, but it is likely to either inhibit the peptidase, so as to prevent its premature activation, or has a chaperone activity. Both of these roles have been ascribed to the M4 and M36 propeptides ([1], [2]). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.11 0.72 -4.49 142 1811 2009-01-15 18:05:59 2003-12-03 12:00:01 8 74 862 2 304 1431 39 50.00 25 8.45 CHANGED shph..hcspps.spGt.s+sRapQpapGls.VhGspls.lphspsGplpshsGshh ................tapl.hptpps.tpGh.s+l+hppshpGls.Vhuspls.l+.....hs.csGplhtlsGsh.t..................... 1 102 178 249 +7333 PF07505 Gp37_Gp68 Phage protein Gp37/Gp68 Studholme D anon Manual Family Homologues of phage proteins Gp37 and Gp68 are found in several bacteria. 24.10 24.10 24.40 25.40 23.10 24.00 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.95 0.70 -5.22 10 525 2009-01-15 18:05:59 2003-12-03 20:35:13 6 5 427 0 163 511 57 235.60 33 93.07 CHANGED scTsIEWTDATWNPlsGCTKl...SPGCsNCYApphAcRhcuhuscpYp.suscppt.hu.hpt.lplctcpLphPtsWc+PRRlFVNSMSDLFHsc..VP--aIpcVFsVMcpsPpHsaQlLTKRssRhtclhsphth..............................sNVWLGsSlEsps.u.hRI-cLRcVPAulRF....lShEPLLGsls.......................................sssLssIHWlIVGGESGssARP.McP-WVRcIR-pCpsuGlPFFFKQWG......................................sh..t.hctuKKAsGRpL-GR...saDEhP ............................................t.................sWNPhtGCp.......+l.......SsGC.....ppCYA...ph.sp.+hth........t....t....t.at.ts..........................................h.........p....h........p..pp...................h...p......h..php.................psphlas.suh.S.......DlF..ttc......hss.pahpcla...p.hhc.....pssp.hpa.lLTKRspRhtphhsp......h................................................................sNlWhGsoVE.spp...s.hR........lsh...L.p.p..lP....A....t..h.+....a........lShEPLLssl.s....................................................................t.h.sh.ss.I.-WVlsGGE....S....Gsp...u...Rs.....h..c.....-..Wlhsl+-QCtt.s......slsFaFKQhG....................................................................................................t...t................................................................................. 0 54 118 148 +7334 PF07506 RepB ParB; RepB plasmid partitioning protein Studholme D anon Blast Family This family includes proteins with sequence similarity to the RepB partitioning protein of the large Ti (tumour-inducing) plasmids of Agrobacterium tumefaciens[1-2]. 30.00 30.00 30.60 30.70 29.90 29.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.97 0.71 -4.42 15 449 2009-09-10 17:01:13 2003-12-03 20:37:47 6 4 222 0 133 437 33 167.50 21 53.66 CHANGED ARssLohI-ctthhs+LhppGhscspItsALuhstspltph....cslsptlp.t..thltth.h.lsshuhsppluRsR.lElucLh.........hsh..p.hpss.hhcph.sss+hstLhphl..+sp+tcphospshu..p....tsc.hs.ths...pulpshhcppG.ptpLtlcsspuh........phs+aLscphschhspF ..............................RpcLSaIEcshas......tcLpppGa......s..c..ctIt..sALul.ctstl.sc..h........pslsptl..............ltsIGsAhplG..Rs..RWh..-..lschl....................hsh...p.h.pss....h.....p..uh.........ss..p+hptlhphl.......p.tpt..p..p.ss.p.t.hu..t.......tpp..ht..s.p....tth.hhh.pptu..p.pltltttps..............th.hp.al.tp............................................................................... 0 18 64 92 +7335 PF07507 WavE WavE lipopolysaccharide synthesis Studholme D anon Manual Family These proteins are encoded by putative wav gene clusters, which are responsible for the synthesis of the core oligosaccharide (OS) region of Vibrio cholerae lipopolysaccharide [1]. 20.10 20.10 21.30 26.80 19.60 18.60 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.91 0.70 -5.54 9 166 2009-01-15 18:05:59 2003-12-03 20:40:37 6 2 116 0 15 97 30 294.30 36 92.33 CHANGED IoVVlQGPV.s..sR.ppptuITppClsSlRsHLPGupIIlSTW.spsluGL-hDpllhs-DPGuslh.a..Dstsp.phN.NRQlVSThsGL+pVpTPYAlKLRuDNhLTustFlslhcpasp..RsspaphFcp+llsossahRc.p+GhsVhFHhSDhFpFGhTpDLLtlW-.sLhp-.phspss.....htp....tshsshphssEQhh...Wlpsh++hs.phcLtphp.-huhpphph.-phhusNlllssPcplGLslsp+Fhtpsphspp......apph-..............WlpLYpphC..s...ht.tsthphhhshhh.....+.+hl+hhhp+lcph ......................IThVlQGPl...........ssITppslpplRphFPtupIIl.....STWcG.......p...s..h.p.sl-......h..D.pll...cD..PGus.hhh......Y......cspsh...lNhNRQlhSThtG...L+tV.K...T....YAsKLRsDNlLssc....p....h....lElaEp.ast......Rtps.a.phL.s........pRllsSshFhhs.c..h.G.hsV.FahSDhFpFGhspDLLplWs....s.chhs.-hc..F.p.p................tY.....scp.....s..sE.Qhl...Whssh...Ls.thclcscp.Dasuhttsh..pFhtNNLlhsss-plGLD...lsc...R.h..hc...psshsh-.............ashpc...................WhhL.p.p.l............hhhhh.........t.ptlt.hh+phh.h......................................................................................................................... 1 8 10 13 +7336 PF07508 Recombinase Recombinase Studholme D anon Domain Family This domain is usually found associated with Pfam:PF00239 in putative integrases/recombinases of mobile genetic elements of diverse bacteria and phages. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.08 0.72 -3.80 73 4989 2009-01-15 18:05:59 2003-12-03 20:46:19 8 48 1838 0 981 4422 747 105.20 23 21.67 CHANGED sccutll+p.lachh.......h..puhuhpplsphLsppsl....s.pstt..........................W...stssl.......pplLpNshY..h............................Ghhhhsppt.......................t.hhhhps...........ta.slloc-haptspphhppp ..........................................cpAtlV+tIFchh................h.....pG......h....u.....hppIuct.L.s.p..c..ul...........s...pstt..........................................W......stssl.........ppILpN...h.Y.h...............................................................Gph...h..ht+pp..........................t...........p..p..h.h.h..c.s..................tH..p..sII.s.p-happsQphht........................................................................................... 1 429 755 881 +7337 PF07509 DUF1523 Protein of unknown function (DUF1523) Studholme D anon Manual Family \N 22.30 22.30 22.70 24.50 21.90 22.20 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.24 0.71 -5.04 20 220 2009-01-15 18:05:59 2003-12-03 20:50:56 6 1 218 0 38 192 35 165.30 42 92.02 CHANGED M+h.lK...hsllhlhalllhuhlaYsLPpaDlsRIoGsEVKRh..sc.sslhhusPssG......ssRDVaFIpT..........hc.....sst+shVYRNEDTsWuaP.YFKFsSAsLQAcApsh..sps.sphVtIcaYGWRlshhohFPNAlSI+sVsss-s.tshPahshllaslLhhhhahshphhRpah ......................................hhh....h+...hhllhshalhlhhhlpYshPcY-lshlTGsEVKRh..scssslstssPssG.......ssRDVYF.IpT...........pc.....cst+lhVYRNEDTtWGaPaYFKFsSAslQAcAQuh......s..s..sphVpl+YYGWRIshhs.FPNslsl+slsps-s..shPlFuaIhaslLhhhhahshphlRth................ 0 5 19 30 +7338 PF07510 DUF1524 Protein of unknown function (DUF1524) Studholme D, Bateman A anon COGs (COG3472) & PSI2 target BIG_246 Domain This family of uncharacterised proteins contain a conserved HXXP motif. A similar motif is seen in protein families in the His-Me finger endonuclease superfamily which suggests this family of proteins may also act as endonucleases. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.14 0.71 -4.33 240 2469 2012-10-05 18:28:12 2003-12-03 20:53:01 6 22 1617 0 608 2165 312 139.90 16 30.04 CHANGED hspspphtpphtthshhstttt...thhhllt.php.p.h.......stt............................l-HlhPps.st.p...........................tht.t...ttppthhss.l...uNLshls.tphN....sphu...s......psa...........htKpp......ta.t.....................................pphhhsp..lt....................hppp...ash..phlppRpptLhch ......................................................................................................................h.t.t........h..hllh.....php...p.h..........pht.....ph.................................I-HlhPps.sh..pt................................................................tht.t..ttpp+pphhss..l...uN...Lhhls..tptN............uphu...s..................tsa.........................h.cpp..........ta.t............................................pshh.hsp...lt...................................ppp....a.s...t..hp...tcppthh..h................................................................................ 0 200 420 542 +7339 PF07511 DUF1525 Protein of unknown function (DUF1525) Studholme D anon Manual Family \N 21.60 21.60 21.60 21.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.52 0.71 -4.09 32 218 2012-10-03 14:45:55 2003-12-03 20:54:55 6 1 164 0 63 210 8 108.30 39 79.60 CHANGED tAs.....hhVhT-sp+Plp.......sssss..pVhhLDtsc+lptpLuspLPucPppAtt.spph..........lpuss.pphppplspAapGlscAWphGlpKlPAVVhD....pchVVYGpsDVspAlthhpta+pp .....................................................................t.AtshlhTDppH.Plp.......ssssspll.LDtspplcspLsstLP..u.sPppAtt.hspph..........lpSsshpphQpcL...s...pAYQulscAW.u.lGlpKlPAVVhD....ccaVVYGpsDVspAhthhtthpt.t.............. 0 8 30 51 +7342 PF07514 TraI_2 Putative helicase Studholme D anon Manual Family Some members of this family have been annotated as helicases. 23.90 23.90 24.00 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.78 0.70 -5.67 21 472 2012-10-01 20:28:14 2003-12-03 21:24:15 6 5 346 0 109 489 32 271.90 31 46.79 CHANGED Gahss.su.pLLuo.hcpcLlpplhppsuhscpp.......Fcphahtslc+YAchVQhLPASEsHHHutsGGhLcHuLElshaul+lppualLPssusPEcputppcuWpsAshhuALlHDlGK.lsDlclphp.....cGs.pWtsatusLsp.h+h+....Yh.......+sR.thphHsshuuhlhspllspssLsWLup.hPc.lhssLlhsluG..ph-c...........ussLuElVppADptSlspsL.u.ussstuhtssspslp+pLlsAlRtLltpchcls.spssucsW...lsp-.uLaLVtKssuDplpuhLhspGls.ulPss..sssLh-hLtspGllpsss.-...scul ................................................Gh..s.ss.pLLus..ppphlpplhp.p.....s.u.hs.pp........Fpphhh.sl..c..c..hAthlQ.l..PASEsHHHutsGGhlcHuLElshaAh+lppuh.l.hss....su....sPE-pstppttWpsAshhuAL...hHDlGK.lsDlplp.t......sGp...tWtP.h.h.ss....lsp....h+h+....Yh........ptc......HtthusllhppllstpslsWLup.hPp.lhtsLlhsl....uG.....phpt.............sslLuclVhpADptSlsppL.t.tp.sht.........t...........t...sh.c.lhtuh+.ll.tp..ph...s..p..ucsa..............h.pp....tlal..h.p.hsc.lhthl.t.p.....th..........hPtp..s..hhs.h.p.thh..s.............................................................. 0 14 49 84 +7343 PF07515 DUF1528 Protein of unknown function (DUF1528) Studholme D anon Manual Family \N 21.50 21.50 22.70 21.50 20.10 20.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.41 0.72 -4.00 40 328 2009-01-15 18:05:59 2003-12-03 21:28:01 6 4 247 2 80 310 7 98.90 36 19.65 CHANGED lhlN-scAhVHhVssssFLVoPulFpcYhpcp............uptp..ptpsaphlQ+pFE+LplH++p........sGhNlWpCcVhGsRKs.p....cLpGYLlpssph..hhsp.s.PssNPaLpl ..........lhlNcscAhlHhVssssaLVoPulFppYhppp............uttp..s.psaphlQ+pFEcL..plH++p........sGhNlapsplhGsc+s.p.............pLpGYLlpssph...hhsp.h..P.sNshLp............................... 0 9 37 59 +7344 PF07516 SecA_SW SecA Wing and Scaffold domain Finn RD, Bateman A anon Finn RD Family SecA protein binds to the plasma membrane where it interacts with proOmpA to support translocation of proOmpA through the membrane. SecA protein achieves this translocation, in association with SecY protein, in an ATP dependent manner. This family is composed of two C-terminal alpha helical subdomains: the wing and scaffold subdomains [1]. 26.70 26.70 28.30 26.90 26.20 26.50 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.57 0.70 -4.57 171 5380 2009-01-15 18:05:59 2003-12-04 14:10:21 8 25 4467 32 1115 3923 3227 206.70 32 24.32 CHANGED sIcp.....phlo+ulEpAQKKVEupNF-hRKpLL-YDDVhNcQRclIYppRpclLpu...........................c..c..lpchlh.phhccslpph.....lppah....stp..........ppas...hpsLpptlpphh.s.h..phslsph..............pths............................................tcp..lpcplhcthpptY............p...p.Kppphs..................................................t..chhcplE+hlhLpslDppWp-HLpsMDpLRpuIsLRuYuQ+DPlhEYKcEuaphFppMlpslcpcslphlh...+lpl ................................................................................................................................sIEpphlo+ulEsAQ++VEupNF-hRKplLcYDDVhNc...QRclIYppRpc.l.Lc..s..........................................................................................................................c..c..lpp.p.lt.shhccslpph.lsta.h.s.sp...........cpac..........lps.L.h.p.hlppth..s..h.....chsl.p..ph....................pth.s.......................................................................................................................................t-p....lp-tlhpps.ctY.....pp....+pp.t.ls........................................................................................................................................t..-hh.c.phE..+h..l..hLpslDstWp-HLssMDpLRpuI.tLRuYu....Q+sPl.EY+pEuaphFppMlpslcp-ssphlh+sp.h............................................................................................................................................... 0 382 745 956 +7345 PF07517 SecA_DEAD SecA DEAD-like domain Finn RD anon Manual Domain SecA protein binds to the plasma membrane where it interacts with proOmpA to support translocation of proOmpA through the membrane. SecA protein achieves this translocation, in association with SecY protein, in an ATP dependent manner [1,2]. This domain represents the N-terminal ATP-dependent helicase domain, which is related to the Pfam:PF00270 [3]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null --hand HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.72 0.70 -5.10 90 5907 2012-10-05 12:31:09 2003-12-04 16:33:08 9 48 4809 34 1211 5542 4671 373.00 47 44.91 CHANGED l.pphh....tsts....c+pl++h..............pp.hlppIsshcsphpsLoDc-Lcpcopch+p+l.......ppG......................................c.......oL-p..............................lLsE...AFAllREAupR..............................................slGhctaDVQllGGlsL.........+pGpIAEMpTGEGKTLsAoLPuYLNALsG+GVHlVTVNDYLAcRDu-.hMu.lacaLGLoV..Gllhsshs..................................................s.ppR+p...............................................................uYtsD..ITYuTNsEhGF....DYLRD.Nhshstppt.....................Vp.R...shpaAIVDElDSlLIDEARTPLIISGssppssphahthsphstphppppptphppppptthhpppsttptcphhthtthhtt.........................................................................................................................................................tphphhchlppuL+AptLac+cpcYlVp-scVhIVDEaTGRlMtGRRao-GLHQAIEAKEsVpIpsEspTlAoITaQNaFRhYpK.LuGMTGT..ApTEspEFt ..............................................................................................................................................................h....h..s.ts.c+pl+c.h..............pc.hlp.pls....sh.E.sphpt.L.........oD..-p....L+s...KTtE...F+pRl..........tp.G............................................................................-........oL.D.s...................................lL.PE...AFA..l.V.R...EA..u.+R...........................................................................Vl..G..h..c..aD.VQl.h.G...G.h..lL...........................Hp.G.p..IAEM......+......TGEGKTLsA.T.hPs..Y...L.N..A.L..........s..............G.....+.......G..V.H.....VV.T.V.NDYLA.pRDu..-........Mu.la.p.F..LG..L..o.....V..G..l.......l....s..s.h.s...................................................s..tp.++..p...............................................................AY......s......s......D....ITYuTNN....................EhGF.....D.Y.LRD..NM.sh.p...h...--h.....................................V.Q...R...s.....hp.....a..AIVDE.......VD.SI..L.....IDE.ART.PL..IISGssccsophYhphsphh.pLhppcc.p.cppppthhhsppshpptpphhthtthhtt..........................................................................................................................................................tsh.lh+alstAL+Achlhp+Dh-YlVp-uEVlIVD-aTGRhMtGRRaS-GLHQAlEAKEGVpIpsEsp.Th.A.o...ITa.QNaFRh...Yc..........K....LuGMTGT..AcTEttEF...................................................................................................................................................................................................................................................... 0 452 817 1053 +7347 PF07519 Tannase Tannase and feruloyl esterase Studholme D anon Manual Family This family includes fungal tannase [1] and feruloyl esterase [2-3]. It also includes several bacterial homologues of unknown function. 20.00 20.00 20.10 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 474 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.63 0.70 -5.78 8 801 2012-10-03 11:45:05 2003-12-04 17:38:35 6 15 346 0 504 870 96 395.40 25 79.35 CHANGED CcVpG.....hpsscspt.pItFplpLPs.sWNGRalQsGsGGasGslsssst.s......hs.sLspG.aATsuTDuGHcAsss......sssFu.hss-shhsFAYpAl+cssssuKtLlpsaYs+sscp...sYFhGsSsGGREGLhAAQRaP--YDGIlAuuPuhsa.lptp...hatuh.u+sshss.....sshlsssclpllspAslspCDslDGssDGlVsDPctC+hshs......psLhCpush.........ssc....................sCLTssQlpslsplas.Ghp.....usGpshYsGas..s.....hGs.ssshspWhhss...............ts.hsstssphhp.hl..............hpssNaDsso.hch...tthccp....htplsssIsATsPDLosFRs+GGKLIhapGhsDPsloPtuolpYa-uVhA+hsts............pscsFhRhahlPGhsHsGuGsusssh...............DhLsshssWVEsGpAPspllAsctssss.........................hsRoR.LCpYPphshacGpGsscsAsSapCs .................................................................hts....tt..t....tlthph..hL.Pt...s............Ws...s...R...hh.h...Gs.GGh.s...u...sh...s.s...................................hs.s.lt..G..aAshuoDsGa......s...us............................h....u......hs..p......t......h.h......sa.u.apulpths.huK.tlhpthY..u..p.s......s.p.h...................oYa.GCSsGGR..puhh...tsp+aPppaDGllAusPu...hp...h..sp.............hhth..h............h....ht.................s.h.ss.s.p.h.p.h.ltp.ts.lttCD.s.l..DG..h..tD.......Gllps..st...hCp.hp.p............shhC.tss..............ts..............................s.s.lostph.....tshp....p.las....s.hs.........ssGph....ha...sh..............s..................s...........h.t............................................s..th.ht.....hh..........................................s.s.h..s..t.hs...............t.....t...tt.h......h.hs..s...sp...sD.L.osap..p..p....G...GKllhaHG................h.......u.....D.hlsstsohtaappltpt.h..t.t.............................thp..pFhRhahlPGhsHCs....s...u..s..s..ss...h............................................s.hltslhpWVE..p.G..huP.pt.l..usths....sst...........................................thpR.lC.aPhhsh.at......u..G..s...pts...tsa................................................................ 0 92 240 394 +7348 PF07520 SrfB Virulence factor SrfB Studholme D anon Manual Family This family includes homologues of SsrAB is a two-component regulatory system encoded within the Salmonella pathogenicity island SPI-2. Among the products of genes activated by SsrAB within epithelial and macrophage cells is Swiss:Q9KIJ9 [1]. Homologues are found in several other proteobacteria. 19.10 19.10 19.60 20.60 18.10 19.00 hmmbuild -o /dev/null HMM SEED 1002 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.76 0.70 -13.73 0.70 -7.08 12 283 2009-01-15 18:05:59 2003-12-04 17:59:07 6 2 255 0 53 223 26 878.30 58 97.93 CHANGED M.LspLscapp.ploLl.cSGIQFLDFuLp.phssc......sucFlcpsusGslhRL..ctsss+a..hlP...............spsGh.sclscs-h.olshcpSLclhpslWLPlPFhRhpPscs......FspGPsNWARlpllpL.spPDpcGNTHRlsLAFDTplhscpts.htYLAPscsDlpsGssFuLAa+sc-lu.FL-..............psWVssWL+ElFpp.s.tp.ctRstc-........lpctLtthEapAHYLNlLslLG...........splplPcl+llssshps....sIsVDLlLDVGNS+TCGlLIE-Hsp-ss.GLppshcLpLRDLSpPchlYs-.FESRlEFApApFG+pcaSlcSGRsDAFhWPoIsRVGsEAsRLAhpRpGTEGuTGlSSP+RYLWD-pshtpGWRF.....stshspoppEPLATAsPltphlN-pGcsLapL...........st--RlPVFpPpYSRSSLMTFMLuElLsQALhQINSsApRh+hspsssPRpLRslILTlPsAMPKsERcIFRpRhppAluLVWKuhGWa.tDsDh...p.tsppp....spsPlPtlphcWDEAoCGQhVYLYsEstspaGG+scpFFssh.t.....RP-p....t..s..t+sLRlASIDIGGGTTDLsIopYtLDcG.....hGsNVpIhPc.hFREGFKVAGDDILLDlIpchVLPAlptuLpptGhss..sculhucLFGs-..ut.stptlLRQQhsLQlFhPlGhulLpsYEsaDPhsspupl.phoFu-LLt..............................phP.oppVlsYlspslc+...susssFslhslPLtlcLspl+pthl...ssphslspslcuLCEllphYsCDVLLLTGRPSRLPGlQALhRphpPlPssRllPlcsY+sssWYPFpcpG..RIDsPKoTAAVGAMLChLu.s.RLssFhF+sschtsYSTlRYlGhLDsN.NtlpDpslaY+DI...........DLDsPshpLs.ctpFphRGphpLGFRQLss-RWsAoPLYpLsls..ssclA+plsG.......cusLpVcLp.......ptus........pcsshEpFtIs-A.h.psGspls...........pplpLpLsThssptusss.YWlDoGSlhhc ........................MLssLsDYKQ..plTLItNSGlQFLDFuLoPp.tss...c.......u+F.............VR..K...oA..N...GP.....LLR..Ls..ac...psG+Y..sLs.....................................stsGutPEl..VKPE..o.sLc.SLclLsslWLPLPFL.RFsPPRT......FlpGPDNWARlQlhpL...spPD.psGNTHRlTLAFDoplhtph.....s...hLAPsENDlhsGspFALAa+s-Elu-FLD..............pTWlDGWLREsFhphA..uphEpRstpt...........IppuLRpFE.YQAHaLNLLsLLG...........pQLslPElKhsopTLpp...PAlsVDLILDVGNoHTCGlLlEDHuDtss...GL+Q....T.hELQlRsLScPpaL....s.LFpSRVEFupA+FGKQpFSVESGR-D.AFlWPSIsRVGcEAptLAhQ.....Rl..G..T....EGSoGI.....SSPRRYLWDEpshhtsWRF.....sphts.p...sp.....cEPLATAhPLhpLhND-GpPLapL............Ph-ERLPVFSPpYSRSoLMTaMLsElLAQALhQINSsApRL+h...sassuPRQLRolILTLPSAMPK.EREIFRpRMhEAlALV..WKuMG...........WHPtD-..DFs.........ospp+tK........ShVPVP.-lQMEW..DEAoCGQlVaLYNEs.spauGRo-u.FFsuh.A.....RPD+p.p..su.ss..G+sLRlASIDIGGGTTDhAIspYpLDD....G............sGsNVKIoP+LLFREG.FKVAGDDlLLDlIQphVL......Pu.......LQsuLp+AGVss..usuLhupLFGss..GRhDsQulLRQQssL...........QlFMPlG+AlLpAaEp.D..DshAt.....l.cAoFG-LLh..............................ppP.TppVhsYIppsIp+.tLPuu.ussFDIhsVPLplphSpLppthL...uspholTpsL+AlCEslShYpCDlLLlTGRPopLPGlQALhRHLQPlPsNRIl.hDtYpsp-....WYPFsppG..RIsNPKSTAAVGAMLC.LALD.L.RLspF.FKAuDh..t..sYSTlRYLGhLDso..ssLp-ENlaY+-I...............DLDpsshs...L..s..sch+F....lRGs.loLGFRQLsNsRWPAoPLYsLSIs...ssELA+plAG...................Du.V..Ls..V+L+................lpsus..................+csuPE..pFhLu-AhL..pDGos.Vsh..........ctLpLKLNTLAsptpu.uoHYWIDSGSVah.c........................................................................... 0 4 17 37 +7349 PF07521 RMMBL RNA-metabolising metallo-beta-lactamase Finn RD anon Pfam-B_760 (release 11.0) Motif The metallo-beta-lactamase fold contains five sequence motifs. The first four motifs are found in Pfam:PF00753 and are common to all metallo-beta-lactamases. The fifth motif appears to be specific to function.\ This entry represents the fifth motif from metallo-beta-lactamases involved in RNA metabolism [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.99 0.72 -4.14 138 6209 2012-10-01 19:09:44 2003-12-05 14:54:50 7 36 3653 52 1894 4689 1569 40.50 33 7.13 CHANGED chhtlchplpplc.hSuHAcpp-Lhphlpth.ps...cplhlVHGE.p ...........t.........ppl+..sSGHuspp-lphhlsh...l..+P.........+hhhPVHGEh.................... 0 666 1222 1605 +7350 PF07522 DRMBL DNA repair metallo-beta-lactamase Finn RD anon Mannual Domain The metallo-beta-lactamase fold contains five sequence motifs. The first four motifs are found in Pfam:PF00753 and are common to all metallo-beta-lactamases. The fifth motif appears to be specific to function.\ This entry represents the fifth motif from metallo-beta-lactamases involved in DNA repair [1]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.69 0.72 -4.02 43 592 2012-10-01 19:09:44 2003-12-05 17:30:10 9 30 300 0 406 592 9 106.50 25 15.45 CHANGED spltshlTs-.tpsplHll.shs......plp....hpsLtsahpthttp......asplluh+PTG..Wshps.htt..s..t.............................................ttphslaslPYSEHSSapELccFVphl+Pp..clIPTVssss ....................................................................thhTss......t..tsplHhh..h..........php.........hppl..phhpt....t...h................hpph..luhpPou......Wshptt.h..t.p...................................................................................ttt.tha..tlPYS-HSSapELppF..lphl+Pp..pllP.sVs...s......................................... 0 126 211 313 +7351 PF07523 Big_3 Bacterial Ig-like domain (group 3) Bateman A anon Bateman A Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.09 0.72 -3.90 40 2232 2012-10-03 16:25:20 2004-01-06 12:05:57 7 198 845 2 141 1725 26 68.80 28 14.33 CHANGED lps+DSTIYlGDsWsuc.DNFsSAssKsGps........lsas-lpVsG.......sVDospsGsYplTYoasG......sopolsVTV ...........................h..spcsshhhG...-s..a.ssp..ssh.lsApsc.s.Gss.............ls.hsc..lp.VsG...................plDos..K....s...G.p.Y.pl.oYp.aps.............hptshpVpV........................ 0 69 91 112 +7352 PF07524 Bromo_TP Bromodomain associated Studholme DJ anon [1] Domain This domain is predicted to bind DNA [1] and is often found associated with Pfam:PF00439 and in transcription factors. It has a histone-like fold. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.42 0.72 -4.25 12 679 2012-10-10 12:36:46 2004-01-06 13:32:37 8 18 266 0 482 767 3 74.20 24 13.90 CHANGED pchspslhp.hsluplhppsGa-ssppusLEoLothhtpYlpclucphppau.ptssRspss.htDlh.hsLtchul.sVsp ......................................tcplLphuVup.lh.pp...sGF..-ssp.uu.l-sLT-l.hpc..Y....lpplucssppas..-...t......ts..R..s....t.s......s....htDlh...sh.tph.uh.....t..................................... 0 133 242 379 +7353 PF07525 SOCS_box Clip; SOCS_Clip; SOCS box Studholme DJ anon [1] Domain The SOCS box acts as a bridge between specific substrate- binding domains and more generic proteins that comprise a large family of E3 ubiquitin protein ligases. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.11 0.72 -4.09 141 2186 2009-01-15 18:05:59 2004-01-06 13:59:50 11 135 116 6 1208 1938 0 39.00 35 9.64 CHANGED stoLpcLCRhsIRpp.lsppt........lppLPLPptLcsYLt.ap ..........shoLpcLCRhsIRpt.lstpt...............lspLP.LPspL+pYLp.a.......... 0 198 297 634 +7354 PF07526 POX Associated with HOX Studholme DJ anon [1] Domain The function of this domain is unknown [1]. It is often found in plant proteins associated with Pfam:PF00046. 21.60 21.60 21.80 23.30 21.50 21.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.81 0.71 -3.95 39 330 2009-01-15 18:05:59 2004-01-06 14:12:16 6 5 36 0 166 329 0 129.90 38 22.83 CHANGED phltsS+YL+sAQcLL-Ehss.Vup.....................ht.tptptsttptpssstssssss..sstsssssspt.ssplo...........ss-ptEhQhKKsKLluML-E..................V-+RY+pYpcQMQhVluSFEsVAGhGuApsYTuLAL+shSRHFRCL+....DAIsu ..............................t.hlhsS+aLpsAQcLL-Ehss.Vst.............................................t...tttt..t..st....t..s...sts.s..s.s.tss....ssps.ttss.s..p.s.ssplu.......................................sspttEhQh.KKsKLluML-E.....................................................................V-.+RY+pYhcQMQhVsuSF-s.......V...A......Gh..GuA.tsYTuLAL+shS+HFRCL+DuIs.s........... 0 16 100 135 +7355 PF07527 Hairy_orange Hairy Orange Studholme DJ anon [1] Domain The Orange domain is found in the Drosophila proteins Hesr-1, Hairy, and Enhancer of Split [1,2]. The Orange domain is proposed to mediate specific protein-protein interaction between Hairy and Scute[2]. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.99 0.72 -4.21 79 952 2009-01-15 18:05:59 2004-01-06 14:24:01 8 6 111 2 510 910 1 42.90 31 15.11 CHANGED spa+uGapcChpEVs+aL.sshpuh........cs...thpp+LlsHLtpshsth .......tapsGapcChpEVs+aL.ushcuh...................ss.......slps+LlsHLpphhs..h................. 0 101 143 313 +7356 PF07528 DZF DZF domain Studholme DJ anon [1] Domain The function of this domain is unknown [1]. It is often found associated with Pfam:PF00098 or Pfam:PF00035. This domain has been predicted to belong to the nucleotidyltransferase superfamily [2]. 20.40 20.40 21.10 21.00 19.60 20.20 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.52 0.70 -4.96 4 544 2012-10-02 22:47:23 2004-01-06 15:10:16 9 21 114 0 254 492 0 210.30 43 35.06 CHANGED pVGSFtKGThhpGsssuDlVllLKoLPTp-sl-tLu+KVttsLctuhcs.........Eshph........hEhGhcIosshs+VRhLIshlPpshpKLEP.hHLDpKhh.upLAulRHs+WFpppApc.......ohplLIRlLKDLspRassFpPLssWhl-LLAahuIhNsPuRQshslshAFRRVFplLAuGlFlPsSuGIhDPsE.uphRltsshTL.QpDssChoAQTLlRlhAaG..GY++ILGhcsssos .....................................................................................RVG.lAKGLll.+.G-.sl-LVl.lspp.hPTt....sLLpplup...pLs.pLtt.hs.c..........................t.....-sh.........hh................t................hph..s......hp..lo.....Ss...h.h..+....c...........h....t.............s......s..p..s....h.t....h..D...P...........p.h.............LDpphCh.sA.L.Aul...RH..A+WFp.s...+Asslp.....Ss.hllIRlLRDLspRh.Ps.Wts...LpsW.slEL.Lsc.......+uls.....os...ppP...hu.su...cA.h.......RRlhEsluoGllL..........s....s....usGlhDPCE........cps..........hcshshhohpQp...............-slT.oAQ...p.hLRlhAFt..ph+KlL.Gh-s.s...................................... 0 60 80 160 +7357 PF07529 HSA HSA Studholme DJ anon [1] Domain This domain is predicted to bind DNA [1] and is often found associated with helicases. 20.80 20.80 21.00 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.26 0.72 -4.19 50 836 2009-01-15 18:05:59 2004-01-06 15:46:46 8 46 264 0 522 792 6 70.80 25 4.41 CHANGED p+htctp...+p+s+actlLpphthhupDFppt++h+.....hstsp+luchltpaapptpppcp+ctc.......tcpchptltp ...........................+htp.p...+p+p+as.h..Lpphh.hupDFppt++.p.......tupspKls+tltpaHtppccc.pc+ppc.........pcp+h+tlh..................................... 0 155 262 412 +7358 PF07530 PRE_C2HC Associated with zinc fingers Studholme DJ anon [1] Domain This function of this domain is unknown [1] and is often found associated with Pfam:PF00096. 22.10 22.10 22.80 22.50 21.40 21.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -8.95 0.72 -4.07 10 61 2009-01-15 18:05:59 2004-01-06 16:12:08 6 8 16 0 30 76 1 64.50 22 11.32 CHANGED sppIcppLpcpGapsppl+shpps................ss+sPhNMFhVpLssss-..pcc....ILplKpLGph+.VsVERtp++c-s ................................................IhppLpp.Gassh..plhshppt.................pcps....h.shahlcltss.s..pcc.....lhpl+plst.h.VplEt.phpt............................. 0 13 16 28 +7359 PF07531 TAFH NHR1 homology to TAF Studholme DJ anon [1] Family This corresponds to the region NHR1 that is conserved between the product of the nervy gene in Drosophila and the human mtg8b protein [1], which is hypothesised to be a transcription factor. 21.30 21.30 21.50 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.15 0.72 -4.35 14 474 2009-01-15 18:05:59 2004-01-06 16:26:22 9 11 109 4 227 399 0 92.60 49 15.33 CHANGED hcslpKC+pFLssLlcLuss...pus-huppVRsLVpsLlsuplpsEEFsp+L.ptLNusPQPaLVPFLKpoLPsLRphh.sspthlpQsth.hh..ssss .....scplpKhKpF.LoTL.phusc.....SPElucpVRsLVhsLlsuslph.EE...Fps+LpcthN.s..p..............Pall.PFLK............t.s.........LPhLpptLh.csAphhpQsstQh.s...p.......................... 1 38 55 123 +7360 PF07532 Big_4 Bacterial Ig-like domain (group 4) Bateman A anon Bateman A Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -9.12 0.72 -4.45 98 1839 2012-10-03 16:25:20 2004-01-06 16:50:15 6 189 521 0 210 1443 6 61.30 29 9.61 CHANGED hpshshs.st.........h..Gss.........................pLPppVpshh.s-Gos...p.......p...hsVp.W...........s........stphspsGs.aplpGp..lp..Gh .................................................h.t.hplh..st........................l..ups....P...........................pLPppVssha...scGpp...p........p...hsVs.W...........s........thsspphspsGs.apVpGp.lp.G............................. 0 96 157 179 +7361 PF07533 BRK TCH; BRK domain Studholme DJ anon [1] Domain The function of this domain is unknown [1]. It is often found associated with helicases and transcription factors. 20.90 20.90 20.90 21.10 20.60 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.94 0.72 -4.49 20 582 2009-01-15 18:05:59 2004-01-06 17:04:04 11 35 89 5 303 552 2 46.10 40 2.68 CHANGED slss-pRVsVlspcsG+pLsGscAPpt+cLppWLptsPsatls.....Pct .....p.su-p.V.VlppcsG+h.LsGs-APptppLp.pWLctsPsYtVs.....Pc............. 0 54 76 170 +7362 PF07534 TLD TLD Studholme DJ, Eberhardt R anon [1] Domain This domain is predicted to be an enzyme [1] and is often found associated with Pfam:PF01476. It's structure consists of a beta-sandwich surrounded by two helices and two one-turn helices [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -11.13 0.71 -4.19 112 1684 2009-01-15 18:05:59 2004-01-06 17:16:33 11 56 317 1 1140 1627 32 133.50 22 28.56 CHANGED ll.assp.pcGtuhps..hhpphp.......pp.ushllll.............ps.sc...h.....lFGuasspta......p.t.......ahGs...spo.FlFpl..........psphtsa+h.....ssps.........thahhssp......p.........luh.Gss..................phuLhl-s.shpp.uhs..p.hs.............soas..ss..hs..........tpppFplpslElWuht .................................................................................lasst.pcGhS.....hps...hhppht.............hp.sssllll........................................cs.pps....t.............lF...Guass.psh..................................p.spp.......ahGs......scsF.l.Fph................................ps.p.h...p.haph............ssts.......................................t.hh.hhss..............pt.................................lshGus...............................................thuLhlcs......sh.p...uhs.p...s...............toat...s...Ls........................................ttpF.l.tlElath........................................................................................ 0 519 685 941 +7363 PF07535 zf-DBF DBF zinc finger Studholme DJ anon [1] Domain This domain is predicted to bind metal ions [1] and is often found associated with Pfam:PF00533 and Pfam:PF02178. It was first identified in the Drosophila chiffon gene product [2], and is associated with initiation of DNA replication. 24.80 24.80 24.80 25.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.50 0.72 -4.28 29 271 2009-09-11 05:51:30 2004-01-06 17:34:58 7 7 203 0 185 277 0 47.60 39 6.36 CHANGED pcpcsGYCENC+hKY-sh-p.................HltSc+HRpFApscpNatslDsLItpLpp .......cp+sGYCEsCppKa-..-.h-p.................Hl..hSccHRpFAtpspNattlDplltpL............................ 0 50 87 141 +7364 PF07536 HWE_HK HWE histidine kinase Studholme DJ anon [1] Domain Two-component systems, consisting of a histidine kinase and a cognate response regulator protein, represent the best-known apparatus for transducing external cues into a physiological response in bacteria. The HWE domain is found in a subset of two-component system kinases, belonging to the same superfamily as Pfam:PF00512 [1]. The family was defined by [1] the presence of a highly conserved H residue in the kinase domain and a WxE motif in a C-terminal ATPase domain that is related to Pfam:PF02518. These proteins are found in a variety of alpha- and gamma-proteobacteria, with significant enrichment in the rhizobia. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.64 0.72 -3.55 44 1003 2012-10-11 19:05:54 2004-01-07 14:13:07 9 153 270 0 408 2339 212 82.20 32 14.48 CHANGED ELsHRVKNhLAslQSlspQTh+pssshschhpphpuRLtALupuHsl....LocspWtuusLp-LlptpLpPats......sp+lplsGPs ..ELsHRlKNhLAlVpuls...p..p....T..h....+..p.....s...s.....s....h....c...p....h..t..p..t...h....p...u...R..lpAL.....upu.a.s.l........L.s.c.....s...s.....W.....p.....u.....s.....s.....l....p....p....Llps....plt.sass...t......ssRl.plpGP.............................................. 0 81 201 261 +7365 PF07537 CamS CamS sex pheromone cAM373 precursor Studholme D, Williams W anon Pfam-B_18913 (release 11.0) Family This family includes CamS (Swiss:Q8L313), from which Staphylococcus aureus sex pheromone staph-cAM373 is processed. 25.00 25.00 25.40 25.20 23.60 23.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.98 0.70 -5.40 7 766 2009-01-15 18:05:59 2004-01-21 15:50:20 6 2 576 4 74 428 0 278.80 36 84.01 CHANGED PaKtupuRGlssssl..ssRhDl-EhEsGLhclup-pFss-sahaQ-GQYlDccplppalt+cp............cs......s.GLNPsh..tssshcphp.ppPhYLopILEpDYhscpDssshpLsGlsIGLAMNSV..appcpstsph...ppsIs-pchhppGcchAppllpclRpp-....shcslPIshAIY+Qts+sSlsPGNFlutssVptsssslssWpsIsEK.hlaPS.spsscpttpDssphppFppplpsaFs.N..asulVGcuhYcccphpcLplDIPlpa.GKuElluhTQalsshl.chaPK.h.clplpIpoupp.EAlIhRpss-ccPhV .................................PactupuRGlh...s.sh...ssphsht.-aEsGLhpluKc.FsTcpYlaQ-GQaLsppTl.pt......a.Ls.K..................c..p...........s.G...LNPs.......sp.....t..s..c.....phs......sPh.YLspIlEQDahsptsspsh..pltGhsIGLAMNSV.YYp.ccp.ssp......pppls....s...pchhtp...G.+phAscllpplRppc....ph.cs.lPIphAlY+Q.usp.s.SlssGpals.uss.pps..ps.plspWcsIsEKshlhP....S....sstsp.....cp....s........sspFppFpsplpsaFs...s.a....otssupspahcpphpplslslshpaaGpsEhhuhTQalsp.s.cahsp.s.phplpIps.ss...p...pAlI.+ptssccs........................... 0 19 41 56 +7366 PF07538 ChW Clostridial hydrophobic W Studholme DJ anon [1] Repeat A novel extracellular macromolecular system has been proposed based on the proteins containing ChW repeats [1]. ChW stands for Clostridial hydrophobic with conserved W (tryptophan). This repeat was originally described in Clostridium acetobutylicum but is also found in other Gram-positive bacteria including Enterococcus faecalis, Streptococcus agalactiae and Streptomyces coelicolor. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.17 0.72 -7.56 0.72 -4.43 194 1324 2009-01-15 18:05:59 2004-01-21 17:12:02 6 86 159 0 292 1109 45 35.70 41 26.52 CHANGED HVps..hGWp.s.h.spsGphuGTsGpuhRlEAlclpLssp ..............HVQshGW.......p..s...a.sp...sGp....h......u.GTsGpuhRlEAlcIpLss......... 0 235 272 292 +7367 PF07539 DRIM Down-regulated in metastasis Wood V, Studholme DJ anon Pfam-B_10642 (release 11.0) Family These eukaryotic proteins include DRIM (Down-Regulated In Metastasis) (Swiss:O75691), which is differentially expressed in metastatic and non-metastatic human breast carcinoma cells [1]. It is believed to be involved in processing of non-coding RNA [2]. 24.80 24.80 25.80 26.30 24.70 24.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.60 0.71 -4.77 29 267 2009-01-15 18:05:59 2004-01-22 13:23:18 7 4 244 0 207 278 1 140.10 31 5.54 CHANGED plhuKFpN.Ksla+usclaph.hhpLLsspssclQKhALcslhsa+ss.sls.Y+-NLpNLLDDspF+DElssFh..hssppps................IpspcRstlhPlllRILaG+hpspssusspp.....uR+tuVlphL.sshppp-lt.Flcluhs.l ....................lFupFtNP+slappsclaph.h.h.pLLsps-tplQ+hALcslhs..aKp..s..tl.hs.Y.c.-sLppLl-.....-..p.......pF+-ELspF..hspppt.................................lcs.pHRspLhPlllRlLYG+hhs+suupstt................tsR+tslLphL.ush..pspElthFlplhhts............................. 0 68 111 170 +7368 PF07540 NOC3p Nucleolar complex-associated protein Studholme D anon Pfam-B_8562 (release 11.0) Family Nucleolar complex-associated protein (Noc3p, Swiss:Q07896) is conserved in eukaryotes and has essential roles in replication and rRNA processing in Saccharomyces cerevisiae [1]. 20.70 20.70 20.70 20.70 20.10 20.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -10.17 0.72 -3.72 30 305 2009-01-15 18:05:59 2004-01-22 13:51:56 6 4 269 0 214 288 3 96.10 38 13.28 CHANGED lhctKEcIAclustlhEDPEENltsLccLtchspsp........phslpKLulloLlsVFKDlIPGYRIRPLoEpEtppKVSKEVp+LRsFEpuLlpsYKtY ..................................................l.ptKpcIAplustlhp..-PE..p..s..lt..........t..........L....+c.Ltphsppp....................................shslpKL...ullolhsVFKDIlPuYRIR.........s...........L.........o.E....p.E................t................p....p.....K.........lsKElp+LRpFEpuLlptY+tY...... 0 74 120 178 +7369 PF07541 EIF_2_alpha Eukaryotic translation initiation factor 2 alpha subunit Wood V, Studholme DJ anon Pfam-B_5125 (release 11.0) Family These proteins share a region of similarity that falls towards the C terminus from Pfam:PF00575. 21.70 21.70 24.20 23.60 21.30 20.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.30 0.71 -4.36 78 521 2009-01-15 18:05:59 2004-01-22 14:09:57 7 4 460 17 353 497 94 115.20 34 38.02 CHANGED -.hacpluasL.ccaG....csY-.AFc.ssspss.plLschp..ls..............................cchtcsLhphhpcplpsptV+lpu.l-lpshs.-Gl-tIK.cAL..psup..phs....scphp........lclphluuPpY..tlpspu.Dh .........................LappluWs..Lp..c..KYG..............cua-.AF.K.slsc........s........s..l..l.csls..ls..................................c.c.h.t-sLhp.Ip++loPpslKlRADlEls.Caua.-GI-ulK.cAL.+s.up...stu.............scph.................lKlpLlusPhYllospshDh.......................................... 0 119 206 294 +7370 PF07542 ATP12 ATP12 chaperone protein Wood V, Studholme DJ anon Pfam-B_6737 (release 11.0) Family Mitochondrial F1-ATPase is an oligomeric enzyme composed of five distinct subunit polypeptides. The alpha and beta subunits make up the bulk of protein mass of F1. In Saccharomyces cerevisiae both subunits are synthesised as precursors with amino-terminal targeting signals that are removed upon translocation of the proteins to the matrix compartment [3]. These proteins include examples from eukaryotes and bacteria and may have chaperone activity, being involved in F1 ATPase complex assembly. 20.50 20.50 20.70 22.80 20.20 19.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.67 0.71 -4.16 100 585 2009-01-15 18:05:59 2004-01-22 14:27:18 6 9 548 7 318 554 186 126.20 35 43.45 CHANGED KRFaKcsslt.p.........sssG.asltLDGRsl+TP.u+psLslPocs..LApulAsEW...sAQ..pctIcPtoMPlTpLussAlDtlss...p...............pstlhptlhpYhsoDhLhYRAss...........Ppp.........LspcQsctW-PlLcWspp...ph ......................KRFacpsslt..p...........................scsG..atltLDuRs.l+TP.scphLhlPocs..LAps..lAsEW.....suQ..pc.tIcstsMPlTpLsssAlDtssp...s.........................................................................pptlhcslhcahsTDhlhaRusp.........................spp...............L...hptQpcpW-PllcWhppt.h....................................................... 1 106 183 258 +7371 PF07543 PGA2 DUF1531; Protein trafficking PGA2 Wood V, Studholme D anon Pfam-B_46790 (release 11.0) Family A Saccharomyces cerevisiae member of this family (PGA2) is an ER protein which has been implicated in protein trafficking [2]. 24.00 24.00 24.40 24.20 22.60 23.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.19 0.71 -4.38 6 125 2009-01-15 18:05:59 2004-01-22 14:39:42 7 2 105 0 89 97 0 112.90 28 84.02 CHANGED htpNltsoFup...hohc+WIRlIllVGuYlLIRPY....FhKLusK....spt+cpEKEpAEscstp.......u+lSsNuLRGutstup....h.sE-TD-E.......Esstp.ussSusscWGKsARKRQ++hh+cL.cptEcppcc...t--DcDIpEhL-c ...............sh.tphss...hshpcalRllhIlGuYhllRsa....hh.chh.t+.....tthcphcc-p..tctctt............t..tspth+s..t..t................cp-.......cst....ttsou...tWG+psR+..+.+p........c........h.p...t.-phptp....................ppDpDIt-hLc........................................... 0 13 38 70 +7372 PF07544 Med9 CSE2; RNA polymerase II transcription mediator complex subunit 9 Wood V, Studholme DJ anon Pfam-B_45625 (release 11.0) Family This family of Med9 proteins is conserved in yeasts. It forms part of the middle region of Mediator [4]. Med9 has two functional domains. The species-specific amino-terminal half (aa 1-63) plays a regulatory role in transcriptional regulation, whereas this well-conserved carboxy-terminal half (aa 64-149) has a more fundamental function involved in direct binding to the amino-terminal portions of Med4 and Med7 and the assembly of Med9 into the Middle module. Also, some unidentified factor(s) in med9 extracts may impact the binding of TFIID to the promoter [5]. 26.10 26.10 26.20 26.40 26.00 26.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.61 0.72 -4.25 43 187 2012-10-04 14:01:12 2004-01-22 16:04:03 8 1 175 0 129 174 0 82.80 25 51.36 CHANGED ssphhPtlhshhpphtp.......sshssc-ltstsusl+h+lpcs+shlpplss....ls+oscEQppcIcpLcpplppppplLpcapppst ........................................t..phlPhlhthlpp..............hshcspDlppphssl+p+lpcsRphlpphPs....lcpoh--QppplcpLcpplppKpplLpca+phh............... 0 23 52 96 +7373 PF07545 Vg_Tdu Vestigial/Tondu family Bateman A anon [1] Family The mammalian TEF and the Drosophila scalloped genes belong to a conserved family of transcriptional factors that possesses a TEA/ATTS DNA-binding domain. Transcriptional activation by these proteins likely requires interactions with specific coactivators. In Drosophila, Scalloped (Sd) interacts with Vestigial (Vg) to form a complex, which binds DNA through the Sd TEA/ATTS domain. The Sd-Vg heterodimer is a key regulator of wing development, which directly controls several target genes and is able to induce wing outgrowth when ectopically expressed. This short conserved region is needed for interaction with Sd [1]. 19.40 19.40 19.70 28.80 19.30 18.40 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.35 0.72 -4.51 3 189 2009-01-15 18:05:59 2004-02-04 14:20:38 9 1 74 2 108 146 0 32.50 69 11.85 CHANGED pYlNAuCVlFTYFpGDIuShVDEHFSRALsass ...pYlsSRCVLFTYFQGDIuSVVDEHFSRALup.................... 0 15 23 56 +7374 PF07546 EMI EMI domain Bateman A, Doliana R anon [2] Domain The Pfam alignment is truncated at the C-terminus and does not include the final cysteine defined in Callebaut et al [2]. This is to stop the family overlapping with other domains. 21.80 21.80 21.80 21.80 21.50 19.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.38 0.72 -3.52 43 529 2009-01-15 18:05:59 2004-02-04 17:48:52 8 46 72 0 259 472 0 70.40 32 10.79 CHANGED spNhCuhhhppslsh.....spstspshhphhhpsC....hst.tCs..............................paRsha+s....sY+hshchh..ophtacCCPGapt.s .........................+shCuahs.s+slshh...spstspshhps..hp..C....hst.tCs..............................pYRshhRstY+hshKsl.....ophcW+CCPGapG..p.................................... 0 28 49 116 +7375 PF07547 RSD-2 RSD-2 N-terminal domain Bateman A anon Bateman A Domain This domain is found in three copies in the N-terminus of the C. elegans RSD-2 protein. RSD-2 (RNAi spreading defective) is involved in systemic RNAi [1]. Mutations in the rsd-2 gene do not effect somatic genes but only germline expressed genes [1]. 25.00 25.00 25.80 25.20 19.60 21.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.77 0.72 -3.84 7 25 2009-01-15 18:05:59 2004-02-05 14:53:25 8 7 5 0 22 24 0 86.50 29 13.64 CHANGED FSssspHhTYsRpluaADhYGhV-sup..ts.ptsssaps.Isls.lsYppstpsLFKssph.hch.ssc-ss-hhpphhpFE-ph ................FSSsspHhTYs+pluauDtYGhV-ls....c...tshcpssVYpspIslss.h..............phcsstpslFKlssh..hc.ptp...s-sspc.ahpphhpaE-p..................... 0 5 6 22 +7376 PF07548 ChlamPMP_M Chlamydia polymorphic membrane protein middle domain Yeats C anon Yeats C Family This family contains several Chlamydia polymorphic membrane proteins. Chlamydia pneumoniae is an obligate intracellular bacterium and a common human pathogen causing infection of the upper and lower respiratory tract. This domain is found between the beta-helical repeats (Pfam:PF02415) and the C-terminal Pfam:PF03797. This domain is excised subsequent to secretion [2]. 25.00 25.00 35.80 34.20 23.00 23.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.93 0.71 -4.49 55 594 2009-01-15 18:05:59 2004-02-19 17:21:38 6 15 39 0 64 433 0 177.10 26 16.15 CHANGED GsLsLccsApLpstu..FoQpsGo.lhhssGosLt........................................................sssssslslssLslsLsSlh..................................................................................sspssshtssssssslo.....loGslsLl-ss.sshY-shsLspsh.phsllplssss..........sstspshssshstssss+YGYQGsWohsWpp.................ssspppslhhsWss ........GsLlLc-sApLpshs..hoQ.ss.Gu.lhhssGosLp..................................................................sssssslslssLslsLsSlh..................................................................................s.s...sssss.....h.....ssssssslo.....l.s.G.slsLlDss.sshY-sh.tLsss...plsllplssst..........pstspstphshs.ts.ss+YGYQGsWohs.Wpp..................stpspoLhhsWp.................................................................. 0 9 9 50 +7377 PF07549 Sec_GG SecD/SecF GG Motif Yeats C anon Yeats C Motif This family consists of various prokaryotic SecD and SecF protein export membrane proteins.\ This SecD and SecF proteins are part of the multimeric protein export complex comprising SecA, D, E, F, G, Y, and YajC [1]. SecD and SecF are required to maintain a proton motive force [2]. This alignment encompasses a -GG- motif typically found in N-terminal half of the SecD/SecF proteins . 20.30 12.00 20.30 12.10 20.20 11.90 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.36 0.73 -7.54 0.73 -4.42 145 7147 2009-01-15 18:05:59 2004-02-20 10:11:56 9 16 3512 9 1648 5001 3208 30.00 30 6.74 CHANGED hhhhhspslshGlDhpGGspltlpsppsssh ............hhhhpshsLGLDhpGGspl.lpsc.st..h.......... 0 506 1052 1384 +7378 PF07550 DUF1533 Protein of unknown function (DUF1533) Yeats C anon Yeats C Family This family consists of several hypothetical bacterial proteins and is around 60 residues in length. It's function is not known. 20.80 20.80 20.80 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.10 0.72 -3.98 25 199 2009-09-11 11:31:05 2004-02-23 11:44:10 6 30 76 0 36 185 3 65.90 24 12.90 CHANGED spsahpsIsclpl..NGsph.........ptscatlttsst.lplssssFscsGpppIsIKApGYpDsslph ...............................ppahppIscVpV..Nssha....................th.pspspashshss........lpl..s...sssF...p.....s.....G...c....ppIpIpu.cGa.p-hphph...... 0 21 32 34 +7379 PF07551 DUF1534 Protein of unknown function (DUF1534) Yeats C anon Yeats C Family This family is found in a group of small bacterial proteins. Its function is not known. 25.00 25.00 26.60 25.20 24.60 23.40 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.44 0.72 -4.43 3 44 2009-01-15 18:05:59 2004-02-23 11:54:40 6 6 10 0 6 46 0 39.00 56 65.19 CHANGED LSFRTLQRGNAVuDAPRHRSAPRRAFKIGRGASRsAsshsA.spohhR ......LSFhTLQRGNAltDA.RH+SsPRRhhKhGRtAS.+suh...........t............... 0 0 0 0 +7380 PF07552 Coat_X Spore Coat Protein X and V domain Yeats C anon Yeats C Domain This family is found in the Bacilliales coat protein X as a tandem repeat and also in coat protein V. The proteins are found in the insoluble fraction [1]. 20.00 20.00 62.40 22.20 19.70 17.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -8.88 0.72 -4.19 22 264 2009-01-15 18:05:59 2004-02-23 14:24:19 6 2 76 0 29 192 0 60.10 42 71.12 CHANGED pscplsp-hhQhsthcQls...cpplhIcsSpsVsVoToDTpsAlslQshlQshlsl.llplsI .......p.ssplsp-hhQpophcQls...cpplhIhsSpsVsVTTTDTchAlsIQshLQshlsl.llplsI....... 0 7 19 23 +7381 PF07553 Lipoprotein_Ltp DUF1535; Host cell surface-exposed lipoprotein Yeats C anon Yeats C Domain This is a family of lipoproteins that is involved in superinfection exclusion. Proteins in this family have been shown to act at the stage of DNA release from the phage head into the cell [1]. 20.10 20.10 20.70 20.20 19.70 19.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.24 0.72 -4.09 18 773 2009-01-15 18:05:59 2004-02-23 15:58:35 6 18 375 0 116 573 23 47.10 43 32.24 CHANGED shc.psALcKAKsYucohpMS+pulY-QLsS-......aGEKFTpEpAQYAlDsL .......c.psALppAKsYt.p.h.hp.MSKpulY-QLoS-.......hu-+Fos-pAQYAlDpL...... 0 31 69 104 +7382 PF07554 FIVAR Uncharacterised Sugar-binding Domain Yeats C anon Yeats C Domain This domain is found in a wide variety of contexts, but mostly occurring in cell wall associated proteins. A lack of conserved catalytic residues suggests that it is a binding domain. From context, possible substrates are hyaluronate or fibronectin (personal obs: C Yeats). This is further evidenced by [1]. Possibly the exact substrate is N-acetyl glucosamine. Finding it in the same protein as Pfam:PF05089 further supports this proposal. It is found in the C-terminal part of Swiss:O82833, which is removed during maturation ([2]). Some of the proteins it is found in (e.g. Swiss:Q9RL69) are involved in methicillin resistance ([3]). The name FIVAR derives from Found In Various Architectures. 21.30 10.80 21.30 10.80 21.20 10.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.73 0.72 -3.65 225 12786 2009-09-13 16:22:53 2004-02-25 16:52:28 8 504 697 8 341 12670 23 49.90 26 19.54 CHANGED l....spssshptttph.....hshssstpsshspAlstApsllsp.ss.....sstpplspA ........................................l.tspsssptspsY...................hsAsssppsAYssAlssAcsllsp.ss.sss.hstspVspA.................. 1 168 208 297 +7383 PF07555 NAGidase Hyaluronidase_2; beta-N-acetylglucosaminidase Moxon SJ, Bateman A anon Pfam-B_4394 (release 12.0) Family This family has previously been described as a hyaluronidase [1,2]. However, more recently it has been shown that this family has beta-N-acetylglucosaminidase activity [3]. 25.00 25.00 26.50 26.00 21.20 20.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.04 0.70 -5.51 30 535 2012-10-03 05:44:19 2004-02-26 11:24:06 8 42 380 54 170 546 259 293.30 33 30.05 CHANGED RGsIEGFYGpPWSp--Rh-hhcFhGchKhN...TYIYAPKDDPYHRp.pWR-.YPs-El.sclp-LlcsAscs+lcFsaAluPGhsls.....ao...spcDhpsLhsKhcplhchGVRsFulLhDDIs.t.................sssupsQscLlNclpccahps+tsshs.....LlhsPTEYssstsss.......YlpsLscpLsssIplhWTGssVl.sspIohpshcph.......ssshtRsshlW.NaPVNDa..sps+LhhGPhts.pcssl..s..plsGhVoNPMppucASKlAlau....lADYsWNtpsa..DspcuWppuhchlss..............sss-slthFucp. ..............................................RGhlEGFY........Gp...PWopcpRhchhcahuchchN....oYl..YAPKDDshHRt...pWR-hYs......t-...........c..h.....pp...........lppLlpsupcsclcFlaAluPGhs.hs.........as........tpp-hpsLhpKhcQlh....p.h.GlRpFulL....hDD....Is.......................pshupsQsplhNcl.pphs....p.........c..................hlhsPT..cYssshsss.................................YLpslucpLs.s.slplhWT.GspVh.ssplo.ps.lpph...........sphh.p..RsshlW.NaPsNDa...........spp+....Lh.LGPhp......s....p.........sssl...s........plpG...llsNPMp.ph-s.....Sclulas....hAsa.Ws.tsh...ssp.pshpt.shp.h........t.t.th............................................................................................. 0 53 84 131 +7384 PF07556 DUF1538 Protein of unknown function (DUF1538) Yeats C anon Yeats C Family This family contains several conserved glycines and phenylalanines. 20.70 20.70 22.10 22.30 19.90 20.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.31 0.70 -5.25 72 585 2009-01-15 18:05:59 2004-02-26 17:09:14 6 2 271 0 199 528 359 210.10 35 83.88 CHANGED IllllhhFQhlll.+tshsshhpllhGhlhVllGLslFLhGlchGlhPlGcthuptlsp.....................hss....hhhllhFuFhlGFusTlAEPALhslutpsppVou..GtIs.........thslhhsVAlGVululuLGhhRIlh.GhPltahllsGYlllllhohFuP.....pthlulAaDSGGVTTuslTVPllhALGlGlAss.lp......GRssllDGFGLIAhASlsPllsVhlhGhlh ..........Ihhllhhhphhll....phshtp...hhpllhGhlhlhlG.LslFLhGlchuhhPlGcthGptlsp............................................................................hsp......hhhll.hhuFhlGFssTlAEPulhsluppspplos..GtIs...........ttslhhslulGVululsluhhRIlh.GhslhahllsGYlllllloh..asP......p.hlulAaDSGGVTTGslTVPhlhALulGlAus.lp...............u+.ss.hhDGFGllAhsSlsPlluVhlhGll.h..... 1 97 169 191 +7385 PF07557 Shugoshin_C Shugoshin C terminus Wood V, Studholme DJ, Watanabe Y anon [1] Family Shugoshin-like proteins contain this conserved sequence at the C terminus, which is rich in basic amino-acids. Shugoshin (Sgo1) protects Rec8 at centromeres during anaphase I (during meiosis) so that sister chromatids remain tethered [1]. Sgo2 is a paralogue of Sgo1 and is involved in correctly orienting sister-centromeres [1]. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -6.83 0.72 -4.46 25 261 2009-01-15 18:05:59 2004-02-26 17:34:01 6 9 200 0 173 252 0 25.90 48 4.60 CHANGED s.uRspR.Rsshsl.sYpEPoLpsKhRRs .............sRspR..Rspts.V.sYpEPsLpsKhRRs.. 0 42 82 135 +7386 PF07558 Shugoshin_N Shugoshin N-terminal coiled-coil region Coggill P anon Manual Domain The Shugoshin protein is found to have this conserved N-terminal coiled-coil region and a highly conserved C-terminal basic region, family Shugoshin_C Pfam:PF07557. Shugoshin is a crucial target of Bub1 kinase function at kinetochores, necessary for both meiotic and mitotic localisation of shugoshin to the kinetochore [1]. Human shugoshin is diffusible and mediates kinetochore-driven formation of kinetochore-microtubules during bipolar spindle assembly [2]. Further, the primary role of shugoshin is to ensure bipolar attachment of kinetochores, and its role in protecting cohesion has co-developed to facilitate this process [3]. 30.00 30.00 30.00 31.90 29.90 29.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.98 0.72 -4.25 24 155 2009-01-15 18:05:59 2004-02-26 18:24:25 6 3 123 1 92 154 0 45.80 30 8.09 CHANGED l+ppahpQNpplu+tNShlph+lpplcsclocLlsENhsLRpph.t ....lKc+ah+pNpElu+pNSh.sh+l..pplcscsopLhsEphslRppsl..... 0 15 37 67 +7387 PF07559 FlaE Flagellar basal body protein FlaE Yeats C anon Yeats C Family This family consists of several bacterial FlaE flagellar proteins. These proteins are part of the flageller basal body rod complex. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.13 0.71 -3.48 180 2146 2009-09-11 11:36:52 2004-02-27 10:17:02 9 15 1789 5 472 1518 458 131.10 26 27.96 CHANGED hss...ssssoas..sooh.slYDSLGss.........HslshaFs.K...................................sss.....ssW..plhssss.............................................................................................................................ss.s.ssslsFs.s.......sG...p....l.........................................................................sssssshsssssssuuss.p...............lslsh.........s.............................ssTQau....us..t..ssshs...Q.DGa ........................................ss.p.o.as....h.sso.l.slYDS.....hGs..s.........HslslYFsK....................................................................sus........Np..W.....pshs.p.ss.........................................................................................................................................................................................................................................................................................................................s.s.ss.s..shs.ssslpFs..s.........sG......sL...............................................................................................................................................................................s...ss..s.......s...s.....s...hs....s..s.....s.h....s..G.As.sts............hslsh.....................s..........................................................uh.T.Qhs......us...s.....lsshs.....p.sGY.................................................................................................................................................................................................................................................................................... 0 138 279 374 +7388 PF07560 DUF1539 Domain of Unknown Function (DUF1539) Yeats C anon Yeats C Family \N 21.10 21.10 21.80 24.40 20.40 19.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.50 0.71 -4.26 13 46 2009-01-15 18:05:59 2004-02-27 12:06:41 6 2 13 0 9 26 0 130.90 32 29.47 CHANGED APpsssFLcSLhpsosppWpshpc........LcspIp......................cLsss..lcssWtpILchlsst......spsphsuc.spslhhuhha+LhphLp.....sPsIsp-+KpplLpaIuSYus.sCsPTWlEshhpElptlaNpp-susshl ..............sPtss.FLpSLhpsssppWshhap........LcspIp......................plpss..hcssWtsIlphlsst......ppsp.psc.sushhhuhha+lhthLp.........ssslop-+KhphLs.IuSYss.tCsPTWlEshhpElptIaNpp-pshs.l.... 1 0 0 7 +7389 PF07561 DUF1540 Domain of Unknown Function (DUF1540) Yeats C anon Yeats C Family This family has four conserved cysteines, which is suggestive of a metal binding function. 22.10 1.00 22.10 5.70 21.90 -999999.99 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.82 0.72 -3.92 185 1099 2009-11-12 09:41:43 2004-02-27 12:20:09 6 2 553 0 243 664 14 41.40 32 75.43 CHANGED lpCpVssCta.Npspp.CsAspIpV.....u....................spu....ppspp.Ts..CtTF ...ltCpVssCsa..N.p..sst.Cs.Aps.IpVt....u.....................tpu.tpstpTs..CtTF................................................. 0 125 200 227 +7390 PF07562 NCD3G ANF_assoc; Nine Cysteines Domain of family 3 GPCR Liu XH, He Q, Studholme DJ anon [1] Family This conserved sequence contains several highly-conserved Cys residues that are predicted to form disulphide bridges. It is predicted to lie outside the cell membrane, tethered to the Pfam:PF00003 in several receptor proteins. 25.40 25.40 25.50 25.70 25.20 25.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.97 0.72 -4.25 119 2732 2009-09-14 23:23:15 2004-02-27 13:11:15 9 24 148 9 1676 2202 0 52.60 41 6.74 CHANGED PhSsCSpsC.sGptK...th.pGpshCCacChsCsssphssts.Dth.pChtCs.p...Wu ..............PpSlCS.psC...t..PG.hRK................th.cG..ps.....CCacChsC.s-sc...hs.....sp......s......Dt......pCh...p..Csts.pas.......................... 0 120 276 1069 +7391 PF07563 DUF1541 Protein of unknown function (DUF1541) Yeats C anon Yeats C Family This family consists of several hypothetical bacterial and occurs as a tandem repeat. 25.00 25.00 52.70 35.20 21.80 17.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.80 0.72 -4.29 25 328 2009-01-15 18:05:59 2004-03-01 10:17:26 6 3 151 8 58 283 0 52.60 52 56.99 CHANGED psGspVllsAsHMcGMcGAcAsIcuAhcTTVYhVsYsPTsGGcpVcNHKWVsc ....sGspVhlpAsHMsGMKGApAsIcuAhc.TTlYsVsYpPTsGGcpVpNHKWVsp....... 0 16 43 51 +7392 PF07564 DUF1542 Domain of Unknown Function (DUF1542) Yeats C anon Yeats C Domain This domain is found in several cell surface proteins. Some are involved in antibiotic resistance (e.g Swiss:Q9RL69 and Swiss:Q9LCJ9) [1] and/or cellular adhesion (e.g. Swiss:Q931R6) [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.37 0.72 -3.96 112 9196 2009-01-15 18:05:59 2004-03-01 17:55:50 6 178 465 0 319 6747 7 70.10 27 29.11 CHANGED pppstIssssssTsEEKptAhsplspthppAhssIspAposspVsps......pspulssIp....sl...pss..spt....KssApp ..................phstIss..sssuTsEEKpsAhsplspth.spAhpsIs..s..A..s...T...........ssp...........Vsps...........cspulssIp......sl.....pss...sph......KtsAp..................................... 0 58 119 228 +7393 PF07565 Band_3_cyto Band 3 cytoplasmic domain Bateman A anon Pfam-B_1004 (release 3.0) Domain This family contains the cytoplasmic domain of the Band 3 anion exchange proteins that exchange Cl-/HCO3-. Band 3 constitutes the most abundant polypeptide in the red blood cell membrane, comprising 25% of the total membrane protein. The cytoplasmic domain of band 3 functions primarily as an anchoring site for other membrane-associated proteins. Included among the protein ligands of cdb3 are ankyrin, protein 4.2, protein 4.1, glyceraldehyde-3-phosphate dehydrogenase (GAPDH), phosphofructokinase, aldolase, hemoglobin, hemichromes, and the protein tyrosine kinase (p72syk). [1] 23.20 23.20 23.20 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.51 0.70 -4.87 32 1074 2012-10-02 23:31:29 2004-03-02 15:25:32 8 13 122 4 398 964 12 235.00 42 26.14 CHANGED -l-csscRWu+PHVusLoa+SLhELR+sltpGsVLLDLctsoLsslActll-phlhpsplcspsRcplhcsLLh+Hp.......H.s-.........t.t..hshhhshushsp..t.................................t.sps.hs.spshtsp..tpttph....................................................................................phpphp.phhcKIPpsuEAosVLV.........GpV-FL-pPslAFVRLppuVh....LpulhEVPlPsRFlFlLLGP.sssshsYHElGRuhATLMoDclF+psAYtAcsRcDLLsuIc-FLDsulVLPPu-hssp ..............................................................................................................................................................cVEcsu-RWuKPaVAoLSh+SL....hELRp....sl.p...GslhLDhctso..........L.tl.................s.c...ll-p..l...ssplpsp...R.tpVhpsLLh+Hp.......Htsc......................hshhhS..hushsp...u.p.p.................................................................t..............t....s.......h..............tt...t..................................................................................................................................................t.phpphp.phhc.KI.P.tsAEAos.VLV................................GpV-FL-p..PhhA.F.VRLppA.Vh....L.suls.........E............VP..l.....Ps...RFLFlLLGP..tups.pY.H.E.IGRuhATLMoDc.........l.FH-sAYpAccR...........pDLl.uuI-EFLDpshVLPPGEh-s.s.................................................................. 0 68 97 223 +7394 PF07566 DUF1543 Domain of Unknown Function (DUF1543) Yeats C anon Yeats C Domain This domain is found as 1-2 copies in a small family of proteins of unknown function. 21.70 21.70 21.70 22.10 21.30 21.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.66 0.72 -4.45 44 345 2009-01-15 18:05:59 2004-03-03 13:14:42 7 2 198 16 80 277 231 53.00 32 53.00 CHANGED splEhHDltaVVupslcsshspl+ppW....hGstpuLHlDuatplcpl....DGaclp......L ...t.hElHDhthsVusshp-Ah.ph+psW....hsst.pplHhDshpsVcss......tuhpl..................... 0 21 36 60 +7396 PF07568 HisKA_2 Histidine kinase Studholme DJ anon BLAST Domain This is the dimerisation and phosphoacceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536. It is usually found adjacent to a C-terminal ATPase domain (Pfam:PF02518). This domain is found in a wide range of Bacteria and also several Archaea. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.61 0.72 -4.12 91 1811 2012-10-11 19:05:54 2004-03-03 17:01:18 7 326 1027 0 823 2166 242 75.40 34 13.93 CHANGED EIHHRlKNNLQlISSLLsLQucphpsccs......hcsh+ESQsRVhShAllHEcLYcup...shcslsFusYlpcLspsLhpoY ......ElHHRVKNNLQsluSLLpLQ.u..cp.s.....p.s...s.c..s.......+psLp.-.utsRlpulAhlH-.t.Lh...ps.t......c.....p.........p...lshs.p.hlpplhppl....h................................... 0 296 586 667 +7397 PF07569 Hira TUP1-like enhancer of split Studholme DJ, Wood V anon Pfam-B_7106 (release 12.0) Family The Hira proteins are found in a range of eukaryotes and are implicated in the assembly of repressive chromatin. These proteins also contain Pfam:PF00400. 20.30 20.30 20.30 21.60 20.20 19.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.40 0.70 -5.13 34 350 2009-01-15 18:05:59 2004-03-04 09:39:21 6 28 250 0 246 360 0 188.80 25 21.31 CHANGED sG+RlhPsllLsusssh..LEspupa.LhslTusGhLalWNlpptpshhss..SltslLs.s....t.....spusslotsplo.ppGhPllTLSNGcuYsasssltsW.hlo-sW..WuhuSpYWssh..............ssstst......................................ssssuhlshLEp+Tssphhhp...Gphh.hp+h........h+shl.ccGhEshE....psloluHLEN+ltsuhhLtupcEa+taLhhYs++Lupp..........Ghcs+lcElhppL ........................................................sG+RlhsslhLsu....hsh..lc.s...p...spa..lhslTssGhhhlW......s..lpp......t....ps......hhts.............S......lts.l.Ls...........................sssssl..spst.l.s..pp.G....h.P.l.l.s..L........o.......s..G.c.u......Yhas.shtsW.hlu-t....hh.supahssh.......stt..................................................ptu.lshlptpsppth....................................tshh.hps.pthp....p.holuaLEspltsuhhL.tutpE...a+haLhhYs+hL.stp..........G..c.+....lc-lhptL................................................................ 0 78 140 211 +7399 PF07571 DUF1546 Protein of unknown function (DUF1546) Studholme DJ, Wood V anon Pfam-B_3691 (release 12.0) Family Associated with Pfam:PF02969 in Transcription initiation factor TFIID subunit 6 (TAF6). 20.30 20.30 20.50 20.70 20.20 18.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.00 0.72 -3.87 29 394 2009-01-15 18:05:59 2004-03-04 11:00:50 8 7 260 0 270 386 3 93.80 35 17.62 CHANGED lLTCllu+pL...sspss......................hcppauLR-hAAsLLuhIs++ausshssLpP..Rls+ThlKshlDs........s+.shuoaYGAlhGLpsl.Gs-sl+hlllPpLpsa ...........................................llTCllu+pL..sspss..................................................hcs.HasLRDhAApLlup.Is..+p..auss.....hssLps...R....ls+ohhKshhDs.......................p+..shsopYGAlhGLttL..G..csl+hlllPpLp................... 0 93 145 215 +7400 PF07572 BCNT Bucentaur or craniofacial development Studholme D anon Pfam-B_10149 (release 12.0) Family Bucentaur or craniofacial development protein 1 (BCNT) in ruminents has a different domain architecture to that in mouse and human. For this reason it has been used as a model for molecular evolution [1-3]. Both bovine and human BCNTs are phosphorylated by casein kinase II in vitro [4]. 22.20 22.20 25.30 24.20 21.50 21.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.01 0.72 -4.04 37 295 2009-01-15 18:05:59 2004-03-04 11:32:09 7 6 263 0 214 299 2 78.80 36 26.19 CHANGED K+......shlsplhutht......KcpKLsTLEKS+hDWsuas-cc.GIp....-ELphHs+.K.......cGYLs+p-FLsRs-s+p.Ep.+phRhpph ..................tp......shhsplluphs......KptK....loTLEKS+lDWssah-cc...GIp..........-E.Lph..+s+uK................-GYL-+psFLpRs-t+p.Etc+phRhpt.h............. 0 71 117 177 +7401 PF07573 AreA_N Nitrogen regulatory protein AreA N terminus Studholme DJ, Wood V anon Pfam-B_11486 (release 11.0) Family The AreA nitrogen regulatory protein proteins (which are GATA type transcription factors) share a highly conserved N terminus and Pfam:PF00320 at the C terminus. 21.50 21.50 21.60 22.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.23 0.72 -3.42 7 19 2009-01-15 18:05:59 2004-03-04 13:21:54 6 2 18 0 9 21 0 82.60 56 9.91 CHANGED MSGloh.GGGssusRPTtsAs.h.o..ssADADR................osQLSDDFShsSPhSssDSupspDGLLpDSLFPEW+sGAPRsGh-SPDE ........MSGloh.GGG.s..GusRPTpsAshh.oh.s.ADADR.Sssp..p........oSQLSDDFShGSPlSPsDSSpApDuLLpDSLFPEW+sGsPRs.GhDuPDE 0 1 2 6 +7402 PF07574 SMC_Nse1 Nse1 non-SMC component of SMC5-6 complex Studholme DJ, Wood V anon Pfam-B_24547 (release 11.0) Family S. cerevisiae Nse1 (Swiss:Q07913) forms part of a complex with SMC5-SMC6 This non-structural maintenance of chromosomes (SMC) complex plays an essential role in genomic stability, being involved in DNA repair and DNA metabolism [1,2]. It is conserved in eukaryotes from yeast to human. 22.00 22.00 22.30 22.40 21.60 21.90 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.14 0.71 -4.69 9 296 2009-01-15 18:05:59 2004-03-04 13:40:24 8 7 238 1 199 289 2 168.00 25 61.36 CHANGED -s+RthLQslhs.+Ghlpps.ltphhsslsspps..s.h.ppt........hLssaVsplNpclpsLshclctspa............................ssutpaYValNssssstschuTsaossElpahKthl-tlspppshhtt........lshlstsscltupt.pp...............................................lt.ocsppLLpchsp.tWh.hohcschsLshRsLlEh .......................cRthLQhhhs.+us.hp.pp....hptlh...t...th...hphpp....t.....t.............................ptlp.salssINstl.psht...hcI+pshp................................................p.suphhaul..VNhsss...s.h.o.p..hA.T.....sastsE........lshh+phl-tlhp..s.ts...s...tpt........s.hls.hptlphtp..tp.....................................................................pcsEphLpphlpptWh.h..p..pp.G..asLssRslhEh........................................................................................................ 1 61 104 158 +7403 PF07575 Nucleopor_Nup85 Nuceloporin_Nup85; Nucelopor_Nup85; Nup85 Nucleoporin Studholme DJ, Wood V anon Pfam-B_55990 (release 11.0) Family A family of nucleoporins conserved from yeast to human. THe nuclear pore complex is a large assembly composed of two essential complexes: the heptameric Nup84 complex and the heteromeric Nic96-containing complex. The Nup84 complex is composed of one copy each of Nup84, Nup85, Nup120, Nup133, Nup145C, Sec13, and Seh1. The structure of a complex of Nup85 and Seh1 was solved [3]. The N-terminus of Nup85 is inserted and forms a three-stranded blade that completes the Seh1 6-bladed beta-propeller in trans. Following its N-terminal insertion blade, Nup85 forms a compact cuboid structure composed of 20 helices, with two distinct modules, referred to as crown and trunk [3]. 19.70 19.70 19.80 19.80 19.20 19.50 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.93 0.70 -6.30 20 368 2009-01-15 18:05:59 2004-03-04 14:56:35 8 8 253 16 255 381 1 430.50 18 69.60 CHANGED sshlYhlppstt.hposphRpllsEsaplFhsLQphc.......t.spspps......pal.plspsYRSllpsshpplpph.th...hhpc.tphpp......plolLhslcslWp....LhEhLah-ssstuslltpLL-Wl.Rhcspss-phsp.........-lLtppc.....slpcpssaWclVssLlLpGhh-pAhphL.tpcuph.........sspshhcshtsLLpphPhhp..tt.........shp-hcppWccW+spspc.......plpssshsspspLEsllpllsGsccslhp....hsssWYEhhsuhLLYhpP....osc.hE.LptYAppslsta..............s.ssspsh-plhlslhptclhpVltphpph.sshWhsAHlsDLl....-+sGlLp......spp.phu..............sshREaLLh-YApsLhS..c+oLWQlulsYlshssptG....+shlEhllsRlPlpTscct.+hLplCcphpLs-lsppIhKlhup+sLcps+hGsALsWhh+ApDhshlshlophlh.....cchspcGshhs.-llssluss.....................hlu.spLoFLupYp-Fa+hhpp.......ccatcAscLLlsLhpsphsPppaWhsLLsDslsLLp...ccsh.hssppThpllcsLEcht .................................................................................s............................................................................................................................................................................................................................l....L.hpW..h.p...c.............tp..t.............phh..t..............tp..taW.p.h.l..hlh.G..hp.shthL...ttt.............................thhp.h....tlhpphP..t......................................................................th...th..p..hp...h......tp...........................pt.....h.t.......tlp.lhplh...hGp....t.th.t................p.sWhchh.shhha...s....................................p....hht..hp.h..........................................s..p.h.c.hh.hshh.phshtthlt.....h..........p....h...s.hh...ssthh-lh....................pht..thlt.................s............................shc.c.h...lhpYut.Lhs.............ppth..........Wplulthh......s....h..s....p.s.........thhlp.hl..+h..sh.....p.....o.....p...........p............p.......h......chl................pl...............st.pht.h..p.........pplhchhu.p............h....h.p..p.t.p......hG...pAl.hh.......h............p...........ups............h...........hhp.............l.....sp.h.hh.....................pph...ps....................t.hl..t.lhst..............................................hht...tLsh....h....s....phhcFhp......hhtp............................tph......tu......ph..Ll...lh................................................................................t.............................................................................................................. 0 86 137 210 +7404 PF07576 BRAP2 BRCA1-associated protein 2 Studholme DJ, Wood V anon Pfam-B_5419 (release 11.0) Family These proteins include BRCA1-associated protein 2 (BRAP2), which binds nuclear localisation signals (NLSs) in vitro and in yeast two-hybrid screening [1]. These proteins share a region of sequence similarity at their N terminus. They also have Pfam:PF02148 at the C terminus. 20.30 20.30 21.60 20.60 20.20 19.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.27 0.72 -4.48 8 297 2012-10-02 20:46:34 2004-03-04 15:41:04 7 12 249 0 215 297 3 104.60 28 17.78 CHANGED ppssppphpsss.sthhslhsVsshhssc.lhphsuhppp.Icpl+ll+DusPNpaMVLI+F+sppsAhsFYppFNG+sFNslEs-s.CHllaVpcVEhspp....ssssss ..................s..........pp.ot.hhsl.hssshhhssc.L.h..hh.s.s..hppslp.p.h+l...l+..........c.up...s.NpYMVLlKFcspps.AcpFhppaNG+.F.NSh.Es.-s.C+llaVpplphpps..........s..................... 1 62 116 175 +7405 PF07577 DUF1547 Domain of Unknown Function (DUF1547) Yeats C anon Yeats C Family This family appears to be found only in a small family of Chlamydia species. 25.00 25.00 48.60 25.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.73 0.72 -4.30 17 329 2009-09-10 18:55:26 2004-03-08 13:41:42 6 4 37 0 10 241 0 60.10 42 19.82 CHANGED cILspVRpHLDsVYsupsst.stt....NQsLGslI+shE.ssTupsTllsshpssssshuop DlLusVRtHLDhVYPu-ssssops....NQsLG-llpchEspGTupcTllos.puusspss............ 0 3 3 7 +7406 PF07578 LAB_N Lipid A Biosynthesis N-terminal domain Yeats C anon Yeats C Family This family is found at the N-terminus of a group of Chlamydial Lipid A biosynthesis proteins. It is also found by itself in a family of proteins of unknown function. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.38 0.72 -4.06 10 310 2012-10-03 12:15:12 2004-03-08 16:42:16 6 3 227 0 109 288 189 71.50 40 47.27 CHANGED lGhluphFFosRFslQWhhSE+ppcSslPtsFWhhSllGusLhLlYulh..puDsVhlLsauhsLllYlRNLpl ..lGhhuQhhFuhRFllQ.Wl.h.S.E+.t.p+SllPhsFWhhSl.hGuhhhLsYulh..+pDs.VhlLupuhulhlYhRNLh............ 0 33 74 91 +7407 PF07579 DUF1548 Domain of Unknown Function (DUF1548) Yeats C anon Yeats C Family This family appears to be found only in a small family of Chlamydia proteins. 22.20 22.20 23.70 44.80 20.70 19.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.65 0.71 -4.05 10 36 2009-01-15 18:05:59 2004-03-09 10:28:22 6 2 13 0 8 22 0 133.10 39 23.45 CHANGED Rspc....EWHpIsuFKHh+GcpLGL.hDsLuc.LsshTlphTohphhpp+spYphlhppFlssY+sSsssLlpalhsQhlsSSp-lpsulpsaLL-sl.sslslPEs.c+sslls-lFY.D-s.YEhspEGIlYLLlh.GII .............ppcEWHhhsuhKah+G+pLGLsh-pLsp.LsshTlp.TuhphhpppppYphlhspFlssYpsSsssLlshlhpphhsuos-hpsslpsalL-pl.cslslPEs.t+sslhpslFa.D-p..Y-hsppuIsYLLhh.sII..... 0 0 0 6 +7408 PF07580 Peptidase_M26_C M26 IgA1-specific Metallo-endopeptidase C-terminal region Studholme DJ, Yeats C anon Merops Family These peptidases, which cleave mammalian IgA, are found in Gram-positive bacteria. Often found associated with Pfam:PF00746, they may be attached to the cell wall. 21.50 21.50 21.90 25.30 21.30 21.40 hmmbuild -o /dev/null HMM SEED 737 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.43 0.70 -6.23 18 836 2009-01-15 18:05:59 2004-03-09 12:20:28 9 67 284 0 22 447 2 617.60 40 40.45 CHANGED spltEYslssh.sllYTPNtlh+D.pssLlssVhscLpuVpLpSs..sl+plL...s....hss-s.....s.......lc-LYL-ESFscVKssLschl+KLLps-sphhpss.psspphllcKIccNKsAlhLGLTYLNRaYshKYschslK-LhhFKPDFaGK.ssSsLDpLIclGpSu.sNL+ucpslpsYsphlusshGps.cLasaLch.R+LFsspps.N-WFKcsoK..AYlVEppSsl.EhpsKp..........htlY.....Dplsss..pappMlLPLLTLp.cpplFlISshsTluFuuaE.....+Ytpss.......cEtpphhccplccsAccQRsahDaWYRlhs-ss+-KLh+S.l.....VaDuashsssshh.c+hutsstcsshssl+EFF....GPsG+aathp.uh.GAYAsGp..........sVYahshchlsca.GsSsaTHEhTHssDphIYLGGaG+RpGhGsEsaApGLLQoPssssss....sLGlNhsac+psDss....plashDP.s+hpoccDlcpYM+Nh.DslhhLDYLEupullsphsss.pspWF+Kl-pcahcs........sspsathstVRsLTsEEtp..pLsSl-DLlDNsllopRshsuNt.......ca..csuYholshhusIYuuhsoSpGuPGslhF++sAFclhuhhGYccGFlsYlSNpYcspApppGpsh...........loDchllcKV.sGpaso.hp-FKKAha+EhhsKtp..slsslTls..........spTIsoas-LpsLhccAVpcDhtt....lpssssspp....stsppLKptlaKAhLcpTDsF..+oSIF ............................................................................pltEYslssh..sllYT.PNhh..ps..hspllpplhspLppVph.St.....tlpphl...s....hptps......p.......................................hpc.LYL-EpFspsKtpLpp.lppll.p.s-.tt.h..ss.pss.thlhcKl..cpNK..ttlhhuLTYLpRaYshpasp..hshKcLhha+..DFa..Gc.ssssLDp.lIplG..............Su......pNLhuppshpsYt..lutthspt.sLhshLch.hclFhspps.N-WFhptsK..sYlsE.p..Ss..-htscp..........htla..............Dtlsss....h....at.p......M..lLPLLsL...ptplalISshsohuauua-.....+Ytppp.........pp.tp.hc.pphccsActppsahDaW.+lh.sps+p+Lh+s.......la.Ds.hp...h...sst.t.....h....pth.h..sph.p.....p.s......ssh+pha....GPhsph...h.hp.th.GAhA.sh.........tVhahshchl.sc..GhssaTHEhTHssDp.lYhGGat+RpGhssEh....aApGhLQsPspssss........slslN.hhctppsss................ph.shsP.pchpstt-lppYh+sh.DslhhL-aLEupullt.....p.ptt.p.phh+Kl-pphhps.............tsht.hshlRpLst-Ehp..pLsShssLl-pshhop+t...ss............sa..psuYhslp..hhssIYuu.ssppGsPGsl.h++.sachhuhhGYccGFlsYlSNpYcppAcp.pGcsh...........loDchllcKl..s.....spapo.htsFKKshapEhl-Khp...slpslT.ls............spoIs.shpcLppLhscAVpcDht...........hhp..t...p........ptshpLKptlaKthLppTssF..psSIF..................................................................................................... 0 3 9 12 +7409 PF07581 Glug The GLUG motif Yeats C anon Yeats C Family This family is found in the IgA1 (M26) peptidases, which attached to the cell wall peptidoglycan by an amide bond ([1]). IgA1 protease selectively cleaves human IgA1 and is likely to be a pathogenicity factor in some pathogens ([2]). This family is also found in various other contexts, including with Pfam:PF05860. It is named GLUG after the mostly conserved G-L-any-G motif. 20.50 18.00 20.50 18.00 20.40 17.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.22 0.73 -7.63 0.73 -3.40 110 564 2009-01-15 18:05:59 2004-03-09 12:56:16 7 70 210 0 192 551 145 27.50 37 4.06 CHANGED sstslGGllGtstt.........uslpsssusu..sls ......sttlGGLVGhsht............usIpNuhAoG..sVs...... 0 77 158 178 +7410 PF07582 AP_endonuc_2_N AP endonuclease family 2 C terminus Studholme DJ anon [1] Family This highly-conserved sequence is found at the C terminus of several apurinic/apyrimidinic (AP) endonucleases. in a range of Gram-positive and Gram-negative bacteria. See also Pfam:PF01261. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.51 0.72 -4.42 56 878 2009-01-15 18:05:59 2004-03-10 15:21:13 7 3 742 7 289 741 345 53.10 34 16.20 CHANGED -FcsIFSpLsphGYsGhsslEWEsslhctppGApEussFl+ch..lIpssstuFDsh .....atslhSpLtthGYDGhlSlEaE.....D.....sl.....hs.....s-...c.....Ghpcusphl+sh...I.spstsh..h........................... 0 96 195 254 +7411 PF07583 PSCyt2 DUF1549; PSC2; Protein of unknown function (DUF1549) Studholme DJ anon Blast single linkage clustering Family A family of paralogues in the planctomyces. 29.80 29.80 30.00 30.40 29.50 29.70 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.39 0.70 -4.83 176 580 2009-01-15 18:05:59 2004-03-10 15:58:43 6 38 66 0 247 688 957 204.70 35 22.91 CHANGED ssIDpFlhp+L.cpt.slpPustA-.cpsllRRlshDLsGLPPT...scElcsF.....ls...D...p..u...ss..u.acc....lVD+....LL......sSP.c....YGE....+WupcWLDlsRau-opGh...p.c.......tp.ssatYRDaVlcuhNcshPYDpFlpEQLA.........GD.l...................t....................................pth........h.........................AouFht.t....................hptchh...sDtssssupsFLGloltCApCHcHKa.DPlopcDYYphtAaFs.sspttstt ....................................sIDpalhs.+Lc.pp...s..lpP..u....stAcc.psllRRlshDLsGLPPT..scElcsFls........D...s..s....ss...u.hcclVD+....LL......uSP.c....YGE....+WuppWhDlsRau-opGh.....p.s........p.....statYRDaVlcu.hNpshPaDpFlpEQlA......GD.l..s.......st...........................pph......h..........AouFhphs.............t............php.h...s-tssssupsFLGlo.ltCApCHDHKa.DP.l...........spcDYYphtAhFsssp....th....................................... 0 211 242 246 +7412 PF07584 BatA DUF1550; Aerotolerance regulator N-terminal Studholme DJ anon Blast clustering of Pirellula proteome Domain These proteins share a highly-conserved sequence at their N-terminus. They include several proteins from Rhodopirellula baltica and also several from proteobacteria. The proteins are produced by the Batl operon which appears to be important in pathogenicity and aerotolerance. This family is the conserved N-terminus, but the full length proteins carry multiple membrane-spanning domains [1]. BatA ensures bacterial survival in the early stages of the infection process, when the infected sites are aerobic, and is produced under conditions of oxidative stress [2]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.79 0.72 -3.79 193 1151 2009-09-10 21:46:21 2004-03-10 16:07:12 6 14 818 0 405 993 385 77.20 25 14.72 CHANGED M..............Fh..sPh.hL................huLl.hlsl..lllahhhht+.pph..tasulphL.tphtppppp......phpphhLLlLRlLhluhlllAlApP ....................tFtsPh.hL.......hhLl.hlsl.....h..h...l...h..h...h...h..t...p+tpph....pFssh...plL....tplhspppp.......hp....a....l..hhh.LplLslshlllAlApP.................................. 0 168 307 362 +7413 PF07585 DUF1551 Protein of unknown function (DUF1551) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins identified in Rhodopirellula baltica. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.99 0.70 -5.57 14 57 2009-01-15 18:05:59 2004-03-10 16:13:21 6 2 20 0 35 65 47 311.20 17 65.17 CHANGED pAEhlhhhpcGss.ssLlo.ss......s.sssss.s..sspsLhusphs.c..hpuGhRlphGhhhss.sshulEhpaaslts.suuhss...ssuphshhutPh.p.Tshss...ssuphl..h.s...........sthslpssschtuhEhNh.....Rhthss.........tts.phshLsGaRahpLc-tLphsps.sshst............shs..sso.l...s.p.shsscNphaGsQlGhphphpp.sthohsuhhKsulsss+tstsspsstshs.s...................s..sssshspsphuhlsEhslshtaplspshslplGYphlahssVshAs...cpIspshsssss............spssss.spsohhhpGlshGhpapa ..............................................................................................................................................sh.ss..s.p..hpsGhRhphuh..hts....slphph.....hhh..p.ptshts.....sstththht.tsh...s.hsh............tts.h.........................................sthphphpschh...uh-hsh...........ph.h......................tthphphlhGhRahplc-plshsssspt.st..............................................................s..sss.h......................p.shpspNph..a..Gs..QlGhchphph.spasls.shsKsul..hsNctphshp.spsshsts..............................ts......sstspsphuhss...-hslsh............tap..l...spphshphGYphlhhssVshu....pplstshssss................sth..ppsshhhpGhshGhph.a....................................... 0 28 32 32 +7414 PF07586 HXXSHH DUF1552; Protein of unknown function (DUF1552) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins identified in Rhodopirellula baltica. 19.80 19.80 19.80 20.20 19.70 19.40 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.11 0.70 -5.49 102 250 2009-01-15 18:05:59 2004-03-10 16:19:24 6 2 43 0 119 277 568 302.40 23 67.78 CHANGED Rhlhlhhs.Glh..........stpa..hPp.........psGt..................sa............phsts..LpPLp...sh+cchsllsGlsp.tst......suHtsss.saLou.....h...tstst...hpss.lSlDQlhAppl....Gp..pTRasSLplusps..............tt.hss.slSas................psGp.....Plss..psPpplFc+LF.usssss........tttppp..hppcpSlLDtlhpcu+sl...pppLupsD..+pKLDpYhsolR-l..Epcl..........pptpthh........sh..t..Pthsht.s.................ss.shhpch.chhhDLhslAhpsD.TRVsThhhsss.........hhht.........tl.Gl..............stshH.slSHp...sss.ptht...phtphcpahspphuh ..................................................................RhshhhhstGsh.....................ppa...hPp...............ssGp...............................sh.............p.hsth...LpPLp...sh+schsllsGlsp.tst.............ssHtsss.shLTu......s...tsss...........thpsu.hSlDQhlAppl......up....pTphs.SLplusps..............st..hts.slSas................sssp.....Plss..psPpthFc+LF.usssss.............tttppp.............hppcpSlLDhVtp-scpL...pppLupsD..+pKLDpYhsulR-lEp+l.............pptptht.......st...h......Pphsht.s.......................tstshhpch.+lhhDlhslAhpsDhTRVsThh..huss................tth.hhs..............tl..Gl................................spsaH.slSHc........sss..ptht......phtplspahhpthu............................. 1 102 108 114 +7415 PF07587 PSD1 DUF1553; Protein of unknown function (DUF1553) Studholme DJ anon Blast clustering of Pirellula genome Family A family of proteins found in Rhodopirellula baltica. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.68 0.70 -4.99 41 593 2009-01-15 18:05:59 2004-03-10 16:47:29 6 37 66 0 253 700 966 244.70 30 28.00 CHANGED ssRhsLApWlsss-N...PLTARVhVNRlWpphFGp........Gl.Vcos-DFGhtGssPoHPELLDaLAtcFl-.sG.....WslKpLhRpIlhScTYppuSpss......scs........th.D....PsNchhuRhsh+RLsAEhlRDshLsloG...Ls.p.htGssh....s...........t........h.ts.stpphRRulYthhpRsh..............sshhpsFDtsststssspRppossP.hQALsLhNsshhhptup....thApplhpp............................................................................................tsss........ppplsthFttshuRtPospEhpthtshlsp ...................................................................................................t.pRhtLAcW.ls.ssc.N...PLs.ARVhVNRlWpphFGp........Gl.Vpos-DF.....Gh.....G......p.........P.o..H..PELLDaLAscFhc..ps............WslKpLhRhIlhSpsYppoSpss.......spt........................th..D....spN....phhuRhshpRLsA...E.lRDshLssoG.....Ls..p..ht..G.s.h...ts..............................................tt..h.RRul..Y....th...hpRsh....................sshhtsFDtss.t.p..s.s.spRpp.osss.hQALhlhNss.hhhctup....thup.p..hhpp..............................................................................................................................................................................................tt...........pp..tlpthahhshuRtPsspEhphhhphh..p....................................................................................................... 0 217 249 252 +7416 PF07588 DUF1554 Protein of unknown function (DUF1554) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of proteins identified in Leptospira interrogans. 21.20 21.20 22.00 31.10 21.00 18.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.20 0.71 -4.71 15 119 2009-01-15 18:05:59 2004-03-10 16:55:52 6 6 18 0 20 123 4 148.20 36 48.04 CHANGED sssssHNG..NaGGIoGADAaCp..uplPo.sLsusG..sYKAMLVD.s.....ssRhATossPNSosG..QhDWVhpPNppYpRuDsss.h.lhTTNusGlFsFs....LpNuFss....shpsshWTGLs.......ssWpThs........stsCss....Wso...................us.sshhGtaGsusp.psust ....................s..s.hHsG..shGGIuGADuaCp..uphPu...slsusG..sYKAMLVD.u.....ss..RhAsosssNussG....QhDWVhpPNppYpRu-.sss..h.lhTTNusGlFsFs......LpNsFss........hputhWTGLs.......ssWTThs........s.sCss..........Wss...................us.hshhGhhG.usthpsth.................................... 1 13 18 18 +7417 PF07589 VPEP DUF1555; PEP-CTERM motif Studholme DJ, Bateman A anon Blast clustering of Pirellula genome Motif This motif has been identified in a wide range of bacteria at their C-terminus. It has been suggested that this is a protein sorting signal. Based on phylogenetic profiling it has been suggested that the EpsH family of proteins mediate this function [1]. 20.70 18.00 20.70 18.50 20.60 17.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.59 0.73 -7.37 0.73 -3.88 163 1616 2009-09-11 20:23:54 2004-03-10 17:07:42 6 71 212 0 766 1794 370 25.50 40 9.66 CHANGED sVPEPuo..hsLhulG..lhulsh.....hpR+ ...sVPEPuo..huLluhG...Lsuluh.....htRR+........ 1 297 677 745 +7418 PF07590 DUF1556 Protein of unknown function (DUF1556) Studholme DJ anon Blast clustering of Pirellula proteome Family \N 25.00 25.00 31.10 115.10 20.70 17.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.32 0.72 -3.75 3 3 2009-01-15 18:05:59 2004-03-10 17:11:44 6 1 1 0 3 3 0 93.70 37 69.21 CHANGED MNQGSETTERIRTSSEGRMGAICRDGNLKLKTHRAhSVNPLPsNSSPupVV+SKRPtssSsAScPTtLpVRERsR....GulPpucFtpuoShpPtss MNQGSETTERIRTSSEGRMGAICRDGNLKLKTHRAhSVNPLPsNSSPupVV+SKRPtssSsAScPTtLpVRERsR....GulPpucFtpuoShpPts.. 0 3 3 3 +7419 PF07591 PT-HINT DUF1557; Pretoxin HINT domain Studholme D, Zhang D, Iyer LM, Aravind, L anon Blast clustering of Leptospira proteome. Family A member of the HINT superfamily of proteases that is usually found N-terminal to the toxin module in polymorphic toxin systems. The domain is predicted to function in releasing the toxin domain by autoproteolysis [1]. 25.60 25.60 25.60 25.60 25.40 25.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.62 0.71 -4.27 4 642 2012-10-03 10:25:13 2004-03-10 17:20:07 6 125 235 2 152 664 82 116.00 33 14.87 CHANGED LVpTssGhpAIs+IpsGD+Vlups.pohcsuYKsVptpYsp.hpEhlalplsD.....psLlosc.HPFYsp.stalcApcLphGDcLlspsGshpsVpsIhlc..scPhKsYNlpVsDaHTYFVt.....TpGlWVHNu ...............................................tG.h.h.s.Itplps.G.D.Vhu..ts..t..o....sp...h.t....+.V..ht...h.a...s....p.........p.c...h...l.......l..pl.s.s..............................psl....h.s.st.HP......Fa.....s..p....................................t................t...W..........lpA..........t.cLp.sGsp..L........h.......s.p.s.......G............p..h..........s.V.p.s..h.th...c..............s..p..s.hpsYNLsVs.chHTYa..Vt.....sp...u....V..hV..HNt............................. 0 72 126 136 +7420 PF07592 DDE_Tnp_ISAZ013 Transposase_36; Rhodopirellula transposase DDE domain Studholme D anon Blast clustering of Pirellula proteome Domain These transposases are found in the planctomycete Rhodopirellula baltica, the cyanobacterium Nostoc, and the Gram-positive bacterium Streptomyces. 22.50 22.50 22.50 23.80 22.30 22.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.82 0.70 -5.68 11 318 2012-10-03 01:22:09 2004-03-10 17:32:17 6 4 69 0 66 395 81 192.00 30 76.56 CHANGED lpcLcuLl-ssTtGDPp..SsL+WTpKSspplucpL.pppGap....lutpsVucLLp.chGYSLQustKTppGspHPDR-sQFchINcplpphpssspPV.ISVDTKKKEllGsa+NsG+pWpppups.cVtsHDF.s.shGcssPYGlYDlssNpGaVsVGss+DTu-FAV-Sl+pWWpphG+c+YPcApcLlIsADsGGSNG.RsRhWKhcL.QcLusphGLsIpVCHaPPGsSKWNKIEHRhFSaISpNW+GpPLsoaEsllNLIuuTTTppGLpVpspLDcptYpsGlKVoDcphcslp..IpRsshHs-WNYpIpPp ........................................................................................................h.....................................................................................................................h..pht..lG...p.G..h............p................uhh......t......hs.s..s...s.hhsttlt.hW.......h.t.ppl.l.hDsG.spNs.h.p.ah.th..t.hutp.th.lplhahPPhpSKaN.lE.+has.lp.p.WpGp.L..shpshlth..tT..hT..pGlt..stl.pt.Y.hGhpl..sp..p..thtth.....h.ht..h.tWsh.l.......................... 0 10 46 63 +7421 PF07593 UnbV_ASPIC ASPIC and UnbV Studholme DJ anon Blast clustering of Pirellula proteome Family This conserved sequence is found associated with Pfam:PF00515 in several paralogous proteins in Rhodopirellula baltica. It is also found associated with Pfam:PF01839 in several eukaryotic integrin-like proteins (e.g. human ASPIC Swiss:Q9NQ78) and in several other bacterial proteins (e.g. Swiss:Q84HN1 [1]). 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.31 0.72 -4.30 79 553 2009-01-15 18:05:59 2004-03-10 17:57:57 7 89 240 0 278 593 823 70.90 27 9.06 CHANGED sAlGApVplp.ss...uppphppls....uGuGahu.psshpl+FGLGs..sssssplplpW..P.sGphpphpsl.s....sspphhll ...................AlGAcVpl...h..ss............uttp....hppls....sus.G.ahu.ps.pshlHFGLGp....sspssplclpW..P..sGp...h.pp..hpth..t.....ssphh...h.................. 0 144 226 255 +7423 PF07595 Planc_extracel Planctomycete extracellular Studholme DJ anon Blast clustering of Pirellula proteome Motif This motif is conserved as the N terminus of several Rhodopirellula baltica proteins predicted to be extracellular. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.94 0.73 -7.26 0.73 -4.01 27 37 2009-09-16 17:53:18 2004-03-11 10:45:04 7 22 4 0 24 38 0 24.90 37 1.14 CHANGED ppppppppsp+RRLthEsLEsRpLL ...pppppppsp+RRLthEsLEsRpLL 0 24 24 24 +7424 PF07596 SBP_bac_10 DUF1559; Protein of unknown function (DUF1559) Studholme DJ anon Blast clustering of Pirellula proteome Family A large family of paralogous proteins apparently unique to planctomycetes. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.77 0.70 -4.37 147 1532 2009-01-15 18:05:59 2004-03-11 10:51:16 6 21 22 0 391 1685 425 209.30 20 68.30 CHANGED QQAREAARRhpCsNNLKQlGLAlHNYH..DTatt.h...Psushssssssst.........................hs..........Wts...h..lLPalEQssl.a-ph....................shstshtsssssssstt................................ls...........sa......h.C.PScs............................................t.....ssssttsstsssssssstsssstsstssssGhhhhss..................thphpDl..pDGsSN.TlhluEpththsstststhh......................................................................sthststttssshshssssshsssssssssssttshsShHsG.Gsphh...huDGSV+F..lo-sl-hs ...............................................................................tAREAAR.RspCtNNLKQlGLA.hpsY.p...s.s.h.s.t....h.......P.sst.hss..sss.s....................................................................hs.......ahh.............lhP.a.h....-..pt.s.h..hp.t...h....................................p...t................................................................h.hh..C..Pop............................................................................................................................................................................................................................................t.t.th...DG.sp..shhh.E.........................................................................................................................................................................................................................t.p....t............-ut...........s..................................................................................................................................................................................... 0 391 391 391 +7425 PF07597 DUF1560 Protein of unknown function (DUF1560) Studholme DJ anon Blast clustering of Pirellula genome Family Small family of short hypothetical proteins in Rhodopirellula baltica. 25.00 25.00 25.90 37.10 20.40 17.60 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.34 0.72 -3.96 2 4 2009-01-15 18:05:59 2004-03-11 11:09:06 6 1 2 0 3 4 0 38.50 75 69.06 CHANGED C..shGhL+Ps.hs.Ro................ISVRSAcISVPLAALAEH CVGNTGVLKPNIhsP+o................ISVRSAEISVPLAALAEH 0 3 3 3 +7426 PF07598 DUF1561 Protein of unknown function (DUF1561) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of paralogous proteins in Leptospira interrogans. 25.00 25.00 385.70 385.50 18.90 18.50 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.19 0.70 -6.57 23 62 2009-01-15 18:05:59 2004-03-11 11:15:06 6 1 10 0 31 64 0 608.70 54 98.82 CHANGED Kl....Ll..hhhSlhl.....s.pTshsA.....lsspllQKPsDpP+DKsI+V.pVHsGtcYCYuPsFosGEuYIhIppCsp.pVhpARYDVFQRIuaNINsTWLChTAPpoVs....cuppsWDYlhLRPCsINDP.QRWIIK.....cNuFaTADt+aRlKDhsWYuYISKNuuDhYsHTLs.soMscWlpTlATPGNISl+T.IAWsh.os.G........ppYaIpssGSsp.ssTP..LYYNPENGHlAQY.PsSGslhCMhSp.ssspsWNWVpWthC..oD.....sl....SKcssuaWNl.hhssctGhIh.DYpGNhLRVT+YGosWGVsYTAKPsYLcpDTo..NSPTShFllspDl.cWsRYssuNLGcT.pYCPA.GpKcsls.....+pRlKR..oLPPDFpLT-pWl+RLa-IApSoo.....ssupppl.GhCGsChLpohQMLAELQEaHspsPLQsG.GYFFsTA.spDPFISFRQRaPtLsphLpss.shas.....ss.p.hspsppluhuuAhshLPQY-Wps.SshhpTcsEhhSclpsLlsuPsGolWhsllhcppsDG.shsGHA.PILRTspGLVlIPTNs.sshoL-paRpuLsPT....pDPppllsphhptus.psLt..sLsThQ.hstlhcsPhshhlSpRNCTGEG-cRRGoGchPpoohlNQCu...S....GRCu .....h.hhhlhhl.lhh.......s..s.lsh....tls.pllQKPTD.P+DKsI+l.hlHsGupaCYuPsFosGESYIhI-pChp.pVhsARYDVFQRISYNINNTWLCITAPEoVl....+ucpsWDYVpLRPCTINDPLQRWIlK.....-NuFWTADt+YRLKDhsWYuYIS+NSGDpYsHTLs.sSMscWlpTlATPGNISIpTSIAWshto..u........pRYFI+ssGSsK.NTTP..LYYNPEsGHlAQYsPhSGsLhCMYSph.ss.pWNWVpWthC......oDt..sl.SKcN...PuFWNV..hts-cGGhIh.DYpGNhLRVTRYGsNWGVAYssKPsYlcpDTs..pSPTSLFllc+sLLcWsRYTsuNLGKT-QYCP.A.GpKEshh.....ppRlKR...oLPPDFQLTE-Wl+RLY-IApSss.....sssp.pph.GlCGlChLpoFQMLAELQEYHSpsPLQuG.GYFFDTAPspDPFISFRQRYPpLsphLpslsphas.........hspsphlshuoAhsMLPQYpWps.SsphsTRsEhhSHIpSLIsSPsGSlWLulhtppcsDG...ohsGHAlPILRTSQGLVVIsTNs.sshoL-pYRpsLsPT....pDPpplIss.hcpsspsL.t..pLsTlQ.LsphYpNsFDhhlSsRNCTGEG-DRRGoGpYPsSo.lNQCu..u....GRCs.... 0 12 14 28 +7427 PF07599 DUF1563 Protein of unknown function (DUF1563) Studholme DJ anon Blast clustering of Leptospira proteome Family A small family of short hypothetical proteins in Leptospira interrogans. 20.40 20.40 22.00 20.80 20.30 17.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.07 0.72 -4.08 2 16 2009-01-15 18:05:59 2004-03-11 13:20:55 6 2 6 0 7 10 1 36.70 74 15.61 CHANGED MNIILIsFFLLETLENLYsTYVEh.LKQhFLDphQKI.KssRK ..MNIILIGFFLLETLENLYuTYVEh.LKQhFLDphQKI.KhspK.......... 0 4 4 5 +7428 PF07600 DUF1564 Protein of unknown function (DUF1564) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of paralogous proteins in Leptospira interrogans. Several (e.g. Swiss:Q8F4V2) have been annotated as possible CopG-like transcriptional regulators (see Pfam:PF01402). 25.00 25.00 36.10 35.10 21.10 21.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.99 0.71 -4.92 14 111 2009-01-15 18:05:59 2004-03-11 13:28:11 6 1 7 0 32 122 0 159.80 35 92.35 CHANGED McllhLsospclpSth.......tcppssssoLLIPcshh.pLscp..cpKsLppcls.LLK+YsKhlhopchltpcssKshYQps.h..cLK+hshRssstsWshLGlLAtsHGVSRChLFshLLhL-...shscSIspshstGsPsFpts...aphhhclshtpNplo+clpht.ps....hhh.l .....................MthlhhssspclpSsh........cstsussoLLlPcshap+hstp......c+KsLpp+LPhLL++YsKhlsSh..cRLpp.+.As........KhpYpcssG.....chK+holRVpousWupLGsLAsAHGVSRCYLFNYLLhL-...t.ts.hlpThstGVPsFHhs...Yphh.hclshppN.loRcLphcPpsh...h............................................ 0 24 24 24 +7430 PF07602 DUF1565 Protein of unknown function (DUF1565) Studholme DJ anon Blast clustering of Leptospira proteome Family These proteins share a region of homology in their N termini, and are found in several phylogenetically diverse bacteria and in the archaeon Methanosarcina acetivorans. Some of these proteins also contain characterised domains such as Pfam:PF00395 (e.g. Swiss:Q8YWJ6) and Pfam:PF03422 (e.g. Swiss:Q9FBS2). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.92 0.70 -5.23 8 240 2012-10-02 14:50:22 2004-03-11 14:07:11 6 32 169 0 75 498 272 161.50 28 24.64 CHANGED lsGND.sssGo.cusPa+TIT+Alhhu+uss..lIplAPGsYssuoGEpFPlhlP-GVsLhGDEsuKGh....tslhhs.sh.hs+su..hIpGuu....sDls....hp.sTIlssNposIAGhsITNP....................s.....h+usulalpus.uspI+NNThou.shtcG..........Ips.........ssat.ssusuG.....Nl.......IoGNp......lt...........sphs.....GIuIsstpsu..h.....sKl.EN................N...lIsp.........Nth......G...Vs.sslu.hDLGsuSpsosGsNphSsNucpD.Iph..sspsspsL...hAhNNphD+.PPTl .......................................................................tGsD.sssGo.pp....sPa..+T..I..spAl.p.h.A.tsGs.......slhl.tsGs.Y..st......p......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.t................................................................................................................................................... 0 39 58 69 +7431 PF07603 DUF1566 Protein of unknown function (DUF1566) Studholme DJ anon Blast clustering of the Leptospira proteome Family These proteins of unknown function are found in Leptospira interrogans and in several gamma proteobacteria. 27.50 27.50 27.50 27.50 27.40 27.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.15 0.71 -4.39 70 852 2009-01-15 18:05:59 2004-03-11 17:18:20 6 68 343 0 265 776 364 132.00 20 39.54 CHANGED sss.VpDppTGLhWp+Cs.G.th................tGsstthsWpsA.....lshs......sslsts...............shss.WRLPslcELtSl.....l....-hssh....sP.......ls..........spsFss......s............sshaWoSTs........tssst.....t..uWs.....lsass...Gtshh.......spssthhshhVRs .............................................................s....lhD.hTsLhW...t..p.................................................tspsthh.sa.ppA........................hphs............pshsts..........................................GhssW..R......LPshp.......ELt.sL.......h..................chspt.......tst.................hs..............................sshFss.........s.....................sshaWou....os.........pstst.................t...uah...hsh.s........Gth..........thttt..hhh.l..s.................................................................................... 0 112 193 238 +7434 PF07606 DUF1569 Protein of unknown function (DUF1569) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of hypothetical proteins identified in Rhodopirellula baltica. 20.80 20.80 20.80 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.79 0.71 -4.30 13 113 2012-10-02 14:44:17 2004-03-15 11:23:25 6 3 83 0 55 261 38 144.50 23 82.67 CHANGED hsphtpLpassLpsAls-lppLppsuhpps..GsWsLuQlhpHLAtuh-hSl-GaPthhstlFt..+slh+hhahshht+GcMp...sLctshPuuh..ssslD-stslc+Ltpolp+FpsasGchsPH..AaGpLs+spachhHshHhtpHLpFlpPp .......................................................................................thh............pphhsclp.pLp...p.s..ph......Gch...sluQhltHs.......s...ts.h...c........h....u...h....p.....s....h..s.h........t..s....h.ht.............phl..h.+..h.h..h..p........hh.h...p...sc..........pp..st..s..ts.s.t..h.....h..h....ss.....s....h.....-....h....c..p...p....h..p.cL..hp..t....lp...pa..p.......p...t....p.........s...s...h........t.........s........H...shFGcLopccWsphthpHlsHHLpQF...s.... 0 30 48 53 +7435 PF07607 DUF1570 Protein of unknown function (DUF1570) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of hypothetical proteins in Rhodopirellula baltica. This family carries a highly conserved HExxH sequence motif characteristic of members of the Peptidase clan MA. 21.70 21.70 21.70 22.10 21.60 20.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.75 0.71 -4.14 9 34 2012-10-03 04:41:15 2004-03-15 11:53:20 6 4 13 0 18 107 21 125.70 27 27.92 CHANGED slATlsHEAsHQlAaNsGlpsRhuc.PhWloEGLApaFEsschtsttuW+GlG.tlNphRlpp.acchhs.ccsuss.t...chIssDptFptsps...s.sAYupAWAlsaaLhcpchccaspalpplut+hPhp ....................................tTlhHEusHQl.saNhGltpRh..u.s....h.P..hWlsEGlApaFEsssh..p....tsth.pu..lG..tlNph+l....tt.h...c....phhs.tp..s...stsht............phl.hs.D.p..t.......F.ttspt.....stsu..YA..p..u..WuLsaaLh..p....p....c.......p..c...a.scaLppluph.sh...................................... 0 16 17 17 +7436 PF07608 DUF1571 Protein of unknown function (DUF1571) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of paralogous proteins in Rhodopirellula baltica. 25.70 25.70 25.90 25.70 25.40 25.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.03 0.70 -5.21 4 69 2009-01-15 18:05:59 2004-03-15 11:58:36 6 1 57 0 23 72 16 195.40 40 60.37 CHANGED hlK+Ep.lsGpLpEsphMslKlRs+.psttt.spPhSVYLpa.sPcohKGREVlYlEspN-GphlV+cGGhtGphl.Tlpl-PpGhLAMctpRYPIT-lGlcslhp+LIEhtc+Dl-ps....sscsshhcst+hcG+ssThlQl..spPo+css.sF+pAplaID-EhclPIphcuasWPs........sEs-pspLIEpYsYtDLplNsuLsss-FDsTN.cYpFc .......................hlRQER.IuGch.u.s.P-pMhlKlp+p.........PLRVYAKWLPsGA+uGQElIYDsocRsDEMYGHLGGlLG+ls...hhhssDGsLARAQSNHpV+DLGsEalsshaLsEu+K....h..hEAG....sh+sTplEu+Tl.cGlRVlALTa..EoPsG+PQhhhpKcplh..lD.hc..sh.h..plEuY............................ss-.GclhE+lVa-cIs.p.s.L--osFDPcNPDY+F........................................................................ 0 13 14 17 +7437 PF07609 DUF1572 Protein of unknown function (DUF1572) Studholme D anon Blast search with Q7UW06 Motif These proteins, from several diverse bacteria, share a short conserved sequence towards their N termini. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.15 0.71 -4.69 15 202 2012-10-02 14:44:17 2004-03-15 12:06:42 6 1 177 0 65 278 44 157.90 45 91.07 CHANGED shup..sYLpsspppFcshKphG-+uluQLo-EpLphshs--sNSIAlIVKHluGNMhSRWTDFLT.....oDGEKssRNRDuEF-sshpo+pEllttW-cGWphlFsALssLss-DLpcTlhIRGEuHoVhpAIpRQlAHYuYHlGQlValuKhl+uscWpoLSIP+Gc ..........................................................................s..sppYlpsshppFcthKp.GE+sLuQLo....E.p.lpW.s.s.p.E.-.oNSIAlIlKHL...p...G...N...M...p...S...RW.T.D.F.L...T.........oD..G..E..K..h..s.R.N.R.....D...u...E.F-.s..s.h.p.o...K....c....E...l.....LtsWpc...GWp....hlF.ps..h.ss.Lss-cLh....p...s..V..h..I.....R....G.E....u.H....TV..h..p....A..IpRQ.....lu..HYuhHlGQIlYluKhLKps-WcsLSIP+Gp....................................... 2 30 53 61 +7438 PF07610 DUF1573 Protein of unknown function (DUF1573) Studholme DJ anon Blast clustering of Pirellula proteome Family These hypothetical proteins, from bacteria such as Rhodopirellula baltica, Bacteroides thetaiotaomicron, and Porphyromonas gingivalis, share a region of conserved sequence towards their N-termini. 20.50 20.50 20.70 20.90 20.40 20.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -7.98 0.72 -4.38 99 698 2009-09-13 15:23:25 2004-03-15 12:54:42 6 12 236 0 193 676 292 45.00 38 20.45 CHANGED sF.phpNsGcsPLlI..splpuSCGCTssphs+.cs......ItPGcp.uplpVpa .....FphpNsGctPLlI..splpsSCGCTss.p.as..+.cP.....................ltPGcp.uplplsa............... 0 105 174 193 +7439 PF07611 DUF1574 Protein of unknown function (DUF1574) Studholme DJ anon [1] Family A family of hypothetical proteins in Leptospira interrogans. 25.00 25.00 25.00 26.60 24.90 24.70 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.13 0.70 -5.58 4 48 2009-01-15 18:05:59 2004-03-15 12:59:46 6 2 10 0 17 40 4 313.40 24 92.76 CHANGED Mh+KhaLhhPlLhFlhsFslDKlho.phhcsYhptuhshlaYchKcpLhpcLl.....c.pppsc-pKKlhlhFGsSRhh.FpssslcpK.hsDWhlYNFSuPsusPsYaLYaLE+lhssGlKPDhllh-.sP..Fsssoshhhc.sLtYuhDs.FlL+YhsphShpDhstahhs+LFtssh.pPchp.hhtRhK-tsh..h..uh.pshhltNLKpscusAhosu.s.V...spss-+LcccAh+hhs.hhssashushQhtFhtphLplsccpsl+shhlhPplhcshcchhpphch..........hc.WhsIlcpltEppuss.hsMsc.c..htCppa.DstHhSssCYpsahcFIlc+lsh ...................................................................................hpp.hlhhPhllhhhhFhlDKlhhlthlcs..hh.p..tths.hhYc.+.cplhpcLh..................cpp..tp..ttcp+KlhlhhGsSRsh.F............st.phl...c......cp..hs-....a.lYNFSsPsusPsYahYah..E+lhp.sGl.+PDhllh-hsP..Fspssshhhc.sLthuhs..Flhpahs................huhpc..........hphahhp+lFtsth.pPphp.hhtRh...p...stt...h.........th..pphhh.....sLpptct......sthssh......................ps.-p.Lp..ppu.p.h...hp.hhssaphu.hphtFhcphlphhccpsltshlhhPtl..shpchhcphph............hc.W.hshhppl.t...cph.shs.....hlshsp.c.th..pCptasDsuHhSssCa.t.hchlh.ph..h........... 0 5 13 13 +7441 PF07613 DUF1576 Protein of unknown function (DUF1576) Yeats C anon Yeats C Family This small family is found in several undescribed proteins. The alignment is distinguished by the frequent occurrence of conserved glycine and aromatic residues. 25.00 25.00 34.30 26.70 19.50 18.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.25 0.71 -4.76 17 337 2009-01-15 18:05:59 2004-03-15 14:44:55 6 3 162 0 52 279 0 176.20 35 82.51 CHANGED hlh.u...spp.hpGhhpIhppsuhLlTDahtlsGhGuoalNhGlLsll.hhhlhlhpsplNGPsluulhTlsGFuhFGKplhNlhPlllGlhlhulhpp.pshss..hllsuLFGTsLAPlsuph..............G.lhGIlsGFlh.slstshuhlHtGhNLYNsGFouGhlAhhlhsllcsa .......................h.........pphhpGhhpIlpssuhLhTDahthsGhGushlNhGllsllshhhlhh.h.tsplNGPsluulholsGFShFGKplhNhhPIllGlhLhuhh...pp...pshsp..hllsuLFGTuLAPlsu.h..................GhhhGllhGalhsslstphthhHtGhNLYNsGFouGllAhhllsllc.......... 0 34 48 52 +7442 PF07614 DUF1577 Protein of unknown function (DUF1577) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of hypothetical proteins in Leptospira interrogans. 25.00 25.00 56.60 56.00 22.00 21.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.60 0.70 -5.48 6 39 2009-01-15 18:05:59 2004-03-15 15:06:33 6 2 9 0 14 43 0 259.80 29 66.28 CHANGED p+cHlIpKYLlpp-LhhKhss.ccchs.IhcllE-GpKIlhtsus.psls.sscllLYcILAKYlpLECsllcKh-sslhhlpVscluIA+ppRsssRhsV..-shalTNVloSKTl......I-ANhFsIP....T.VKVsFpDacs+LKpcp.shlplDl....FtsslscKFElV...............KKTcKhLaIcNTp..........DppSYtu.ss.shIchccEl..---lcspl+cYKcpcIh....SELIlPIlYlNcpcppI.PlGYlhlpo+Ep...slopppl.clppL ..............................................phh+lltcaLhtppl.hchs.ptpphh.lhchhc-uphIlh.s.pt.pshs.psclsLa+lLu+alpl-spllc.chcsplhhhplscltIApppRpssRlsl..s.shahsNllssKol......Ists.hsIs....TtV.cVpFpDhcp+lKpch.shhhIsl....atstlsccaElV...................K+ocKhlaIpsTs..........sppSYss.sppshlchtcEl..--tlcphhpca+spKI+....S.LIhPIlYhsctcptI.PlGYlhlpocpp...pIstppl..ltpL....... 0 5 9 9 +7443 PF07615 Ykof YKOF-related Family Yeats C anon Yeats C Family \N 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.68 0.72 -3.82 13 198 2012-10-01 20:40:01 2004-03-15 15:41:33 6 4 133 16 68 262 49 78.90 25 77.88 CHANGED .uusQFSLYPM.ssDal-hIhusl-hhc..puslhscocchuTpLsGcstslFssLcslFhpsupsssHlslssTlStssPuc ....................ssphSLYPh.........s........s.....c...a.h.s.h.Ih.sslctlc......psslhhp.oss.huTtlpGchppVasslpshattutp.p....HhshphshShtsPs........................ 0 21 47 56 +7445 PF07617 DUF1579 Protein of unknown function (DUF1579) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of paralogous hypothetical proteins identified in Rhodopirellula baltica that also has members in Gloeobacter violaceus, Sinorhizobium meliloti and Agrobacterium tumefaciens. 20.70 20.70 20.80 21.90 20.50 19.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.95 0.71 -4.58 17 94 2009-01-15 18:05:59 2004-03-16 15:14:49 6 1 86 0 51 110 25 150.90 25 89.46 CHANGED hs.cPppEHpWLpcllGpWsh-s....-s.MsP-pPspcspuhppVRp.lGGhWl.s-upGcMs.....t.GsshsolhTLGYDss+pcaVGTWlGSMMothWlYcGpLDpss+sLsLpuEGPshss.....sG+hupYRDVlphhDsscRshoSthhsp-GpWhphMsupY+Rp..c ...............h..psttpHphLppl...lGcWssps....chhh...sPsps.......st.psp.hptss+..l.sG.hallp-sp......Gphs........Gps..hpslhslGYDstppcaluoWlsS..Mhst..hhhhcGp..hsts.sp.slsLpspsssht.......sG.h.hpa+chhphh.sssphshp..h.t..t..sGphhphhphpapR..h.................................................................. 0 23 32 41 +7446 PF07618 DUF1580 Protein of unknown function (DUF1580) Studholme DJ anon Blast clustering of Pirelllula proteome Family A family of short hypothetical proteins found in Rhodopirellula baltica. 22.90 22.90 23.60 24.70 22.30 22.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.48 0.72 -4.60 5 10 2009-01-15 18:05:59 2004-03-16 15:20:36 6 1 3 0 6 14 0 53.60 34 55.43 CHANGED PLLcAlcLEoGhRPusSTlhRWuh+PNRHGNhLcoWhlGGR.RhTSVpAV+RYlcAsT .........shhphhc.psG.RsHsSTshRWsh+ss+pGshLcohhlGGR.RhTSVpAVpRalcsss... 0 6 6 6 +7447 PF07619 DUF1581 Protein of unknown function (DUF1581) Studholme DJ anon Blast clustering of Pirellula proteome Family Several Rhodopirellula baltica proteins share this probable domain. Most of these proteins are predicted to be secreted or membrane-associated. 20.50 20.50 21.00 29.10 20.20 19.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.89 0.72 -4.00 7 15 2009-01-15 18:05:59 2004-03-16 15:26:38 6 4 2 0 8 16 0 83.90 35 7.17 CHANGED Rssl+Dspppshl..NGR.lppcslhssusPWlulRu.hpsputhRNL+IsGsPplPsplshlsutpLhGW.ssYasts....tsp.tGph ..Rssl+Dspspshl..NGR.lpp-sltssusPWlulRuphpsputhRNL+ITGsPpIPcplshlsuspLpGW.ssYasts....tsc..sp......... 0 8 8 8 +7448 PF07620 SLEI_Leptospira SLEI Studholme DJ anon Blast clustering of Leptospira proteome Motif This highly conserved sequence motif is found at the C terminus of several short hypothetical proteins in Leptospira spp and related organisms. 16.20 16.20 16.50 17.80 16.10 16.10 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.85 0.74 -6.21 0.74 -3.76 10 31 2009-01-15 18:05:59 2004-03-16 15:31:40 6 4 30 0 4 38 0 15.30 65 4.13 CHANGED stphLRDNSLEISNph I.IMEE.GNSLEISRQM... 2 2 2 3 +7449 PF07621 DUF1582 Protein of unknown function (DUF1582) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of hypothetical proteins in Rhodopirellula baltica. 25.00 25.00 26.60 40.60 23.80 18.90 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.84 0.73 -7.06 0.73 -3.70 5 9 2009-09-11 06:28:21 2004-03-16 15:42:10 6 2 1 0 9 9 0 28.90 38 19.74 CHANGED RuLPSPcsLs-pRlscsssCsscG.tsLA ..RsLPSPc..hstRlss.ssCspcG.hsLu. 0 9 9 9 +7450 PF07622 DUF1583 Protein of unknown function (DUF1583) Studholme DJ anon Blast clustering of Pirellula proteome Family Most of these Rhodopirellula baltica hypothetical proteins also match Pfam:PF07619. 22.80 22.80 22.80 35.40 22.70 22.70 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.24 0.70 -6.01 6 21 2009-01-15 18:05:59 2004-03-16 15:48:41 6 5 2 0 11 21 4 306.00 25 36.07 CHANGED tltGSphESLLpYhRPlh-sspIEYEFFYDsGpsusHPAlsRhAhLIcs-GVu.H.hTDGtappo.LRPDNthhh.p.tpcpuslPL+sspWN+scLphtG-clpLpLNGpsIaEpsl-sps.sRsFGLFHFuDpopl+VRsLpLSGDWPppL..hspQpLAsshsscLctssscLspsFpHDFcc.shsschF.tpst..scshhss--Gl+hstuo.usacphshsPphplcGDFDlTAcFsthQsss.Ess.huulthplsL-sstpcpl...tsspthscppGp+lhsshphphssGpt.ppthst+psp-uTSG+LRlsRRG-plaaLFApsDSspF+LltpEpsscuslssuGIpL.sslssssGosSupWsslolRAEcl...ssusPDhpssLu..plc.s+st ...............tLL.YhRPhhcttplpYpFaat.spstsaPslsRhshhlp.sGht.p.hp.s...t.s.h.ssNh.h......t.sshs.psptWNphcl.h.sspl.lpLNtp.lhpt.lps.t.s.pFGLa+.tcppthhhpshhhpGcWPttl.....p.hs....tpLttthptLspsapaDFpc.t.sschF.thst..stsh.ts-cGlphptsu.s.apthsls.phtlcGDFDlshpFs.hph.s.tss...stlthplphssstpspl....hhhthsttts.phhtthphthssGp..phhhshthp.s.usushLRlsRRG-plahLhuptssp.hplltppphsptsl..hslph.h.h.s.stsspsss.hhpplpl+Acpl........................s.......... 0 11 11 11 +7451 PF07623 PEGSRP DUF1584; Protein of unknown function (DUF1584) Studholme D anon Blast clustering of Pirellula proteome Motif This sequence motif is highly conserved in several short hypothetical proteins in Rhodopirellula baltica. It also is associated with Pfam:PF07621 in Swiss:Q7UJJ9. 25.00 25.00 29.60 28.80 19.80 17.80 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.04 0.72 -4.64 6 15 2009-01-15 18:05:59 2004-03-16 15:59:05 6 2 2 0 14 15 0 26.50 76 24.92 CHANGED LAVhRKPPGEEPEGSRPSATSLVVHVV .LAVhRKPPGccPEGSRPSATSLVVHVV 0 14 14 14 +7452 PF07624 PSD2 DUF1585; Protein of unknown function (DUF1585) Studholme DJ anon Blast clustering of Pirellula proteome Family A conserved sequence region at the C terminus of several cytochrome-like proteins in Rhodopirellula baltica. 21.40 21.40 21.40 21.70 21.20 21.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.26 0.72 -4.32 63 199 2009-01-15 18:05:59 2004-03-16 16:05:40 6 17 35 0 86 222 297 75.30 25 9.62 CHANGED ssGppFpshtpl+phLhpc.p-pFscslsc+LlsYALGRslphsDcstl-pIhsphcpssaphpsllptlVpSc.Fp ................sGppFssht-L+phLhpp.tcphscshsc+llsYAlGRslp.htDcstlcplhpphppssaphpsLltslVpSp.Fp.... 0 79 80 83 +7453 PF07625 DUF1586 Protein of unknown function (DUF1586) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of short hypothetical proteins in Rhodopirellula baltica. 20.80 20.80 21.50 22.30 18.60 19.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.62 0.72 -6.59 0.72 -4.12 3 11 2009-01-15 18:05:59 2004-03-16 16:08:24 6 1 1 0 11 11 0 21.80 73 40.82 CHANGED SRTALAAVSQTPTGANAcWHLL SRTALAAVSQTPTGANAaRhll 0 11 11 11 +7454 PF07626 PSD3 DUF1587; Protein of unknown function (DUF1587) Studholme DJ anon Blast clustering of Pirellula proteome Family A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07624. 21.10 21.10 21.10 21.40 20.60 20.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.00 0.72 -3.97 66 193 2009-01-15 18:05:59 2004-03-16 16:12:43 6 25 37 0 90 219 315 67.80 31 8.22 CHANGED RRLs+pEYpNTl+DLLGlc...hshsp....thPsD.ssspGFcNsGpsLthSshphEpYhpsAcphlcpAhhpss ...........RRLs+tEYpNTl+DLlGls....hshsp.............thPsD.ssstG..F-NsussL.s..l.Sshhh-pYhpuAcplhspAhh..s................. 0 78 83 87 +7455 PF07627 PSCyt3 DUF1588; PSC1; Protein of unknown function (DUF1588) Studholme DJ anon Blast clustering of Pirellula proteome Family A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07626 and Pfam:PF07624. 21.40 21.40 21.40 21.50 20.80 20.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.39 0.72 -4.16 92 252 2009-01-15 18:05:59 2004-03-16 16:17:41 6 38 43 0 118 280 421 101.80 36 12.41 CHANGED pRGGlLspuulLshsosuscoSP..lpRGsWlh-plLssssPsPPs.sVs....slpsstsst.........TlR-pl.thH.ppsssCuuCHp.clDPlGauhEsFDslGpaRsppsst ....................RuGlLopuulLshsussscoSP..lpRGhWlhcplLs.pssPPPPs.sVs.......sltsstsss...............ThR-pl..ptH....ppss..sC.u.uCHp.plDPlGauhEsFDslGpaRsp-t.t..... 0 101 107 114 +7456 PF07628 DUF1589 Protein of unknown function (DUF1589) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of short hypothetical proteins in Rhodopirellula baltica. 20.60 20.60 21.50 21.40 19.80 19.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.01 0.71 -4.33 2 15 2009-01-15 18:05:59 2004-03-16 16:24:54 6 1 2 0 13 15 0 82.70 31 79.55 CHANGED MLWN+AuRPAGHARLSDVRPFT.PGTTWPT.LSQPDASARDTSEFWRSHQRR.GHYLAPSRpFGTKRAVQPVTQsSAMSVHSPRQVQPGLHhCPNPTRQRcTRLNoGEATNVGQVITWHPADALEQStPSSRSRKTQRCPSIHPARYNLsYI.VPTRRVSEtHV .......................................................................................+..R.sahhtPSRpFtTKRuV................................................................................................................. 0 13 13 13 +7457 PF07629 DUF1590 Protein of unknown function (DUF1590) Studholme DJ anon Blast clustering of Pirellula proteome Family These hypothetical proteins in Rhodopirellula baltica have a conserved C terminal region. 25.00 25.00 78.70 78.10 18.80 18.10 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.51 0.72 -4.35 2 5 2009-09-11 20:23:00 2004-03-16 16:28:30 6 1 1 0 5 5 0 32.00 88 26.02 CHANGED MtsGAchPPPEISLNAhFPTPPAApAtFSRlh MENGADCPPPEISLNARFPTPPAARAGFSRRY 0 5 5 5 +7459 PF07631 PSD4 DUF1592; Protein of unknown function (DUF1592) Studholme DJ anon Blast clustering of Pirellula proteome Family A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07627, Pfam:PF07626, and Pfam:PF07624. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.48 0.71 -3.77 85 245 2009-01-15 18:05:59 2004-03-16 16:52:12 6 37 41 0 113 273 427 126.60 31 15.60 CHANGED LssaELAoRLSYFLWuShPDpc.LhphAppucL.p...ssphLcpQlcRMLsDs+u..cphspsFssQW.Lshc.plsphs..ctpha.s.pa......ssp.........lcpuhppEshpFhppl.lccs..tslpcLlsuDao.alNppLAca.YGl ...........................................................................................LssaElAoRLSYaLWuShPDcp.LhphAppGc.Lp....sspslptQscRMLs.D.s.+u....cthspsFstQW.Lplc......pls....p...hs....Dtpha..spa................ssp...........................lppshppE...sptah.ppl..lccs..ts.ltcL.lsu-as.alNppLApa.YGl............ 0 99 105 110 +7460 PF07632 DUF1593 Protein of unknown function (DUF1593) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins in Rhodopirellula baltica that are predicted to be secreted. Also, a member has been identified in Caulobacter crescentus (Swiss:Q9AAT9). These proteins mat be related to Pfam:PF01156. 42.90 42.90 50.40 43.20 42.80 40.80 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.91 0.70 -5.08 29 158 2009-01-15 18:05:59 2004-03-16 17:00:58 6 7 98 1 97 173 8 254.30 37 53.54 CHANGED RllVhTD..lts.EPDDtpSLVRhLLYuNch-lEGllsooS....hahpsp..............................spsc.lpc.l...lcAYucVhPNLtpHsss..............YPos-hL+SllphGt.s..................uh.shGpsps.osGSphIlptlpc.s-s..cPLal.sWGGsNsLApALhplcpphstpp.....hpplhsKlRVYsIuDQD.........-s.usWIcppaPc.lhYItu..hpsh.ththssWsshssph..........p.phhopsW..lpppIpp.hGPLGuhYPs........................................hpahs........EGDTPuFLaLlssGLssP-pPsaGuW.GGRa ...............................RlhlhTD..lts...EPDDtpShlRhLlYuNph-lEGlluooS....hahtsp..........................................stsp.lpc.l...lc.uYtclhsNLppHsps..............YP...os-hLpullppGt.s....................................s.htslGpsps.opGSphllptlp...........c..sss..cPLWl.sWGGsNsLAQALhplcpphsstp.....htchhsKlRlYsIuD...QD.........-s.usWIcppaPc.lhaIts...hhshtt..hhhtsWsshssph..............p.phhsppW..lcpsIpt.tGPLGs.t.YPs.........................................h..ta...h.h........EGDTPoFLhLlpN...GLss..s-cPsaGuW.GGRa.......................................... 0 34 63 88 +7462 PF07634 RtxA RtxA repeat Yeats C anon Yeats C Repeat This short repeat is found in the RtxA toxin family [1]. 20.80 5.00 28.70 5.00 18.20 4.90 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.46 0.74 -6.50 0.74 -3.24 99 3103 2009-01-15 18:05:59 2004-03-16 17:20:41 6 42 64 0 556 2748 47 18.50 35 20.18 CHANGED utGsuNlls+s......Gsssslsh .....hhGtuNllT+l.......GsGsslt.......... 0 42 271 437 +7463 PF07635 PSCyt1 Cytochrom_C_p; PSC3; Planctomycete cytochrome C Studholme DJ anon Blast clustering of Pirellula proteome Family These proteins share a region of homology at their N-terminus that contains the C-{CPWHF}-{CPWR}-C-H-{CFYW} motif typical of cytochromes C, or CxxCH. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.27 0.72 -3.65 141 688 2012-10-03 10:02:11 2004-03-16 17:24:33 6 75 104 0 304 822 1079 58.40 31 7.34 CHANGED CapCHGscpp..cusLcLDstsshht....sspsG......sullPGcscpS.L.hpplt.spcpshpMPPsc .ChpCHusspp........cusLcL-shpshhp.....................sspsG........ssllP.Gcs.cpS.L.hpp.lt....sp...s.........s.s..p....MPP........... 0 232 290 300 +7464 PF07636 PSRT PSRT Studholme DJ anon Blast clustering of Pirellula proteome Motif This motif is found at the N terminus of several short hypothetical proteins in Rhodopirellula baltica and the predicted Arylsulfatase B (EC:3.1.6.12) Swiss:Q7UX97. 20.20 20.20 25.50 24.50 18.10 18.10 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.40 0.72 -4.48 4 7 2009-01-15 18:05:59 2004-03-16 17:31:57 6 2 1 0 7 7 0 30.60 59 19.12 CHANGED cGNptpaHARPSPSRTPERRRSo.PQTphRER .......ushhp.HARPsPSRTPERRRSoSPQTphRER 0 7 7 7 +7465 PF07637 PSD5 DUF1595; Protein of unknown function (DUF1595) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins in Rhodopirellula baltica, associated with Pfam:PF07635, Pfam:PF07626, Pfam:PF07631, Pfam:PF07627, and Pfam:PF07624. 21.20 21.20 21.20 22.00 20.90 19.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -9.09 0.72 -3.93 81 218 2009-01-15 18:05:59 2004-03-16 17:41:47 6 29 39 0 105 247 304 63.60 29 7.82 CHANGED tpu+..phlppFup+AaRRPlsss-.lstlhslacpttpp....upsapp.......ulctsltslLsSPpFLY.hsEt ......pucphlppFupRAaRRPlsssE..lpphhslappstpt......stshpp.......ulchslpulLsSPpFLYhsE..... 0 91 97 102 +7466 PF07638 Sigma70_ECF ECF sigma factor Studholme DJ anon Blast clustering of Pirellula proteome Family These proteins are probably RNA polymerase sigma factors belonging to the extra-cytoplasmic function (ECF) subfamily [1] and show sequence similarity to Pfam:PF04542 and Pfam:PF04545. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.04 0.71 -4.62 7 258 2012-10-04 14:01:12 2004-03-16 17:52:40 6 7 143 0 120 8136 1123 167.30 21 87.84 CHANGED .pcloplLpplcsGDstAsstLh.hlYp-LRRhAtsph.psE+hspsLpsoAL.Va-AalRhlstpch.chsuRsahauhuscsMRRILl-pARRcpppKRGGchhRtpLsp..ss.......hc.................sss-.lLsL--uLppLhs.hsPcpt+lVELRhFsGLohcEhAphLslShRTlcRpWthAR.sWhtpchps ..............................................................................................................................................h.l.tt.h...p.t..G.s..t.A.h.p..t....L.h.hh.a..pL.+.p..h.A.t.t.......h.......h....p...t...t..t.................s...t..s...................p.....s.........s.s..L...l.p..-.u.a..l...+...h......h...p............t.............p......h...p....s..........c....t...c....a....h.......t....h.....h.s....p.....h....h.....R...p.....h....l....l..D...t..t...R....p........p......t........s.......t.......K...........R.........t.........s......t....h.........h...h...p....l....t..t......t.............................t.t.................................................t.p......c....p....l....l.....t.......l.....c......c.....u.....L.....p.....p.......L.....t....p.....hs..s....c....p....t....c....h.l.p...L.p....a....h..s........G.....h..o..hpE....lAphL..s....l....S.....p....o....l....c....+....c....h....p.....hu....R..shlttt...t................................................................................... 0 83 100 112 +7467 PF07639 YTV YTV Studholme DJ anon Blast clustering of Pirellula proteome Repeat These hypothetical proteins in Rhodopirellula baltica contain several repeats of a sequence whose core is the residues YTV. 21.00 21.00 22.30 21.00 20.80 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.64 0.72 -4.23 18 108 2009-01-15 18:05:59 2004-03-16 18:00:16 6 10 10 0 52 113 196 43.40 40 33.91 CHANGED oYTVphPVhETcp+shpYoVp+PVaETpsp..sYTVpVPshET+sp ......................YTVp+PVhETpp+shpYTVp+PVhETppc..sYTVp..+PVhETh......... 0 52 52 52 +7468 PF07640 QPP QPP Studholme D anon Blast clustering of Pirellula proteome Motif These Rhodopirellula baltica proteins share a highly conserved sequence, centred around an invariant QPP motif, at their N termini. This motif may represent an export signal. 25.00 25.00 57.60 57.60 17.20 16.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -7.87 0.72 -4.28 9 11 2009-01-15 18:05:59 2004-03-16 18:04:46 6 4 2 0 9 11 0 36.00 49 13.76 CHANGED MASsQPPLTRCGShNQQPspshpssttspph.s.ss MASsQPPLTRCGShNQQPspuhpssthspph.shs.. 0 9 9 9 +7470 PF07642 DUF1597 Outer membrane protein family (DUF1597) Studholme DJ, Bateman A anon Blast clustering of Pirellula proteome Family This family of proteins are likely to be outer membrane beta barrel proteins. Possibly acting as porins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.28 0.70 -5.42 89 396 2012-10-03 17:14:37 2004-03-16 18:17:17 6 1 220 0 175 591 194 345.70 18 84.83 CHANGED slpluGhl-shaphshsps.tst.....................hssps..sphsls.s.lhhptsssp......hshthslhaGstut.s.hs................................tsthhhsl.pAYssht....hscslslssGpasThlGaEsh.shsNhsao+uhhhp.s.PahcoGl+ssYssss.phshhhulhsG.....h................p.h...........ssss....t.shssplsa..tsstphslshshhhGsps.....t............................spssphh...hslsssaplss........phplshshsashtptt......................sssssssaaGssh...................................ahp..Yshs-...phuluhRsEaapsps...............................shsssssspshuhTlsssap................tsslhlRsEhRhs......p....stst........h..Fhps.ssps.......................spsthshssshtF ...........................................................................t.hphtGhlpshh.hs..sp...tst...................................hssps.........sphplst..h.lhhppssst..................hshhhphhhGstuths.hh...................................psththsl.pAalpht.....ht.p.s..lslphGphhohhGh.Esh...s...hs...N.h.a...o...t...u....hh.....hph..t.P..ah..psGlhs.sa..t.hss....phsh..hhulssG...............h.....................................................................psh.........................psss.............t.shhstl.sa...t.ssph..s..lt..hsshh..Gsps.....st.....................................................ststphh........hslhh.saph...s...c..............................p.hslsh..phs.ahtppt.............................s.stsss.haGssh...................................................................ahp........Ys..hsc.......phsls.......h...Rs.Ehapcsst.............................................................hsh.h.ss.ssss...h.h..shT.lsssap.........................................hsshhlRsEhRhc............p.......utss........s....hs.s.sst.........................sthhhshshhh.F................................................................................... 0 76 146 161 +7471 PF07643 DUF1598 Protein of unknown function (DUF1598) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of Rhodopirellula baltica hypothetical proteins of about 500 amino acids in length. 25.00 25.00 34.60 77.50 24.30 19.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.80 0.72 -3.95 9 18 2009-01-15 18:05:59 2004-03-17 09:12:34 6 1 9 0 8 18 20 83.70 38 16.70 CHANGED CSIDPssEGLt+hppalpplsst....uthsstt...hhpshcpsLGhQ-lpVhGlssso+aARlLVEADY+MKRIuhGh-psslt.l.SY CSIDPs.EGLp+hppalpplssp....sshsstt...hhssLcpsLGhQ-lpVhGlssco+hARlLVEADY+MKhIuhGl-psslt.lsSY. 0 8 8 8 +7472 PF07644 PGAMP Planctomycete PGAMP Studholme DJ anon Blast clustering of Pirellula proteome Family This conserved sequence is centred around an invariant motif of PGAMP in several short hypothetical proteins from the planctomycete Rhodopirellula baltica. The motif also occurs twice in Swiss Q7UVK9. 20.30 20.30 43.70 43.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.85 0.72 -4.21 4 8 2009-01-15 18:05:59 2004-03-17 09:31:05 6 3 2 0 5 8 0 34.80 52 15.43 CHANGED SlplPGAMP.AohphAhGQpsRhtpuQpppSphus .SlplPGAMP.AoMphAVGQpsRhtKuQtQpSphus 0 5 5 5 +7473 PF07645 EGF_CA Calcium-binding EGF domain Bateman A anon Pfam-B_330 (release 10.0) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.08 0.72 -3.96 18 24503 2012-10-03 09:47:55 2004-03-17 16:01:14 10 2919 264 49 14019 30190 160 41.60 36 11.43 CHANGED DlDECss.ss.psC...ttsshChNshGSFpCh....C.sGap......stssspsC ..................DlDE.C......t.........p........s........s.....t................C.............t.t...u..pCh...N...o.........G..S...a....p......Cp.....................C.....s..G.ap...............tt...................................................... 0 4262 5341 9307 +7474 PF07646 Kelch_2 Kelch motif Finn RD anon Context Domains Repeat The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase [1] for which a structure has been solved [2]. The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.22 0.73 -8.44 0.73 -4.00 42 698 2012-10-05 17:30:42 2004-03-17 16:09:08 10 211 235 0 334 6336 157 51.00 22 11.00 CHANGED sphstsss.sh.......ssclhlhGGh...................s....phhhhcsppppWpphsshs ...................pssauss..sh............ss+l.hlaGG..h................................................................ttt.t.s..p...h..........s.-..l...h...hhD..h..pphpWpphp........................ 0 102 166 239 +7475 PF07647 SAM_2 SAM domain (Sterile alpha motif) Finn RD anon Manual Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.10 0.72 -4.13 49 3625 2012-10-02 20:42:54 2004-03-17 16:20:05 12 358 360 47 2049 7757 90 64.90 20 8.61 CHANGED sphhshpslspWLp...sl..sh..tpYp-tFpppslss.phl..hphstccL...t.clGlpphscpp+llpplpphp ...................................hs.pplt.....p.W.Lp...........sl.........sl.....p....pYs.s......t....h.......p........p...p.......s.lsu.ph.l.........hph.s..p.p...p...L.................p..plu..l..ss.h.tp..Rp+lhpthpth............................................. 0 524 798 1345 +7476 PF07648 Kazal_2 Kazal-type serine protease inhibitor domain Finn RD anon Manual Domain Usually indicative of serine protease inhibitors. However, kazal-like domains are also seen in the extracellular part of agrins, which are not known to be protease inhibitors. Kazal domains often occur in tandem arrays. Small alpha+beta fold containing three disulphides. 20.30 11.00 20.30 11.00 20.20 10.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.48 0.72 -4.02 73 4760 2012-10-02 00:52:43 2004-03-17 16:59:37 10 287 248 41 2600 6229 320 42.20 27 13.16 CHANGED Cspppt..P..VC......GsDGp...oYss..tCthpstshpt...........ptp..hphhppss.C ................Cspt.....P.....VC..............G..s.D...Gp...........TYs...s.......tCth..pp.ts.stt.....................ttp.....hph.hh.s.C................................ 0 733 989 1694 +7477 PF07649 C1_3 C1-like domain Finn RD anon Context Domains Domain This short domain is rich in cysteines and histidines. The pattern of conservation is similar to that found in Pfam:PF00130. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.89 0.72 -3.91 194 800 2012-10-02 13:15:50 2004-03-17 17:46:33 7 64 28 1 533 923 2 29.60 35 11.59 CHANGED htCps.Cshshtt....shYt..C..h.pC.cFhl.HpsCs ......hCss..Cs..hshpt.........hYt....C..h.pC.cF.hl.HppCs. 0 367 463 489 +7478 PF07650 KH_2 KH domain Finn RD anon Context_Domains Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.58 0.72 -4.42 165 9989 2012-10-02 00:34:43 2004-03-17 17:57:18 12 35 5164 213 2428 7426 3812 76.60 27 28.69 CHANGED cl.phlptpl.p.....tss.......huplpI..........ptshlsl+suc.GllIG+pGpplcplppplccpht.h.spp..............Vtlplh..cVccs.t ...........................................cl.phls..hcl...p............tss.............hupl.cI................ptshls....+..su+.Gl..VIGK..pGp..c...lcc..lt...pphcc.h.......spp..........................Vplplh..cV+ps............................................................. 0 805 1574 2054 +7479 PF07652 Flavi_DEAD Flavivirus DEAD domain Finn RD anon Pfam-B_199 (release 3.0) Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.84 0.71 -4.45 19 6300 2012-10-05 12:31:09 2004-03-18 10:02:46 9 76 281 64 1 7229 89 121.90 49 4.66 CHANGED h+KtplTlLDhHPGAGKTR+lLPpll+cslc+RLRTllLAPTRVVhuEMtEAL+GhslRapTsAl.sp+sGspIVDlMCHATaTpRhLp.Ps+hsNaplhIMDEAHFhDPuSIAARGalsopschscsAslaMTATPPGos-PFPcSNu .......................................................................h......................................................................................................................................................................................................................sa.u...T.as..h.h...L...hs.....s.....s.....p..s..s.s.YslIIhDEsH....h..TDss.....S..I..huh.G.h.l...s..p..s.E.h.u.t..A.t.s.l.h.h..TAT.PPGSs.shPpup........................................ 0 1 1 1 +7480 PF07651 ANTH ANTH domain Finn RD, Bateman A, McMahon H anon [1] Domain AP180 is an endocytotic accessory proteins that has been implicated in the formation of clathrin-coated pits.\ The domain is involved in phosphatidylinositol 4,5-bisphosphate binding and is a universal adaptor for nucleation of clathrin coats [1,2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.80 0.70 -5.70 29 1195 2012-10-02 18:21:09 2004-03-18 10:47:20 11 20 303 13 707 1532 5 243.60 25 36.38 CHANGED .sslphulhKATsp.p-sPsKc+asctIlhhops....tphsthhtslscRlspopsWslshKsLlllH+lLp-Gc.shhp-hhptppphsphtphps................shsaushlRpYstaLpc+lphatpttht...........hphttpt....................tp..tt........phsh.pll-pl.plQphl.phlphp.pssshp..spllltAhh.llp-shtlYptlschlhsLlp....phh-..hth.pscpshslhc+htpQhccLppFaphs+slthhps...IPpl.chssshlpsh-phhpps ............................t..ththslhKATst....ss.Kp.+ah.p.......l..h...h..h.s........pt..........h......p..h.......s....thh..sl.hcR..h.......s..t..s.....s....hl.........lsaKsLlshH+lh.p.c.......G....p...........p..hh.p..........ph.h...p..t.p.s..h..h..p...t..hst.....hsp......................................shshu..s...hlR....pYspaL..pp+.htha...pphsh...........................hph.cpt..................................................................tpss.t............phsh..pLLc.ph.hlQ.............p..l.s.hlp.h...p...t........ss.....ths....stsh.suhh.....l.hp-shplathhschlhsLh..p.............................phh-..h...................scphhtl.hc.+ahpphpcltc.Fhph..scp.l.t.....h.......pt....lPpL......p.....sPsshlps.-ph....t....................................................................................................................................... 0 180 352 538 +7481 PF07653 SH3_2 Variant SH3 domain Finn RD anon Manual Domain SH3 (Src homology 3) domains are often indicative of a protein involved in signal transduction related to cytoskeletal organisation. First described in the Src cytoplasmic tyrosine kinase Swiss:P12931. The structure is a partly opened beta barrel. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.60 0.72 -4.47 35 4617 2012-10-02 18:48:24 2004-03-18 11:11:45 12 450 402 69 2334 20355 74 59.20 23 7.44 CHANGED hhpshpcass..s.ssss.................LshpcG-...llplh..........................tp..css.........sa.ahs.pp..s.......Gct......GhlPpshlp.h ..................................hpshhcass......p..ssst....................Lshp.c.G.D...............ll..p.lh...........................................pp...pss.............sa..WpG..ph.....s...................................G...ct.................G..h..hPsshlp...................................... 0 522 751 1381 +7482 PF07654 C1-set Immunoglobulin C1-set domain Bateman A anon Bateman A Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.91 0.72 -3.98 128 16594 2012-10-03 02:52:13 2004-03-18 16:21:17 10 70 510 2328 2190 24856 1 82.80 38 31.24 CHANGED s..pss.ph...sptshLsChVssFaP..sslpVpW.h+NGpplspsspsst.h..s....sDhoaphhotLphs...P.pps-..tYoCpVpHtuLp..psht ...............................................scpsoLtChs.h.G..F.Y.P....sc..I.s.l..oW............+.....s......G......c.........-......t.....o......p..c....s...p..h.....s..p......shs....s.............GDs...T..F..Q...p..hut..l.sls............s..sc..pp...pY.o...CcV..pHpuLs.pPl.s................................... 0 174 356 861 +7483 PF00008 EGF EGF-like domain Bateman A, Sonnhammer ELL anon Swissprot_feature_table Domain There is no clear separation between noise and signal. Pfam:PF00053 is very similar, but has 8 instead of 6 conserved cysteines. Includes some cytokine receptors. The EGF domain misses the N-terminus regions of the Ca2+ binding EGF domains (this is the main reason of discrepancy between swiss-prot domain start/end and Pfam). The family is hard to model due to many similar but different sub-types of EGF domains. Pfam certainly misses a number of EGF domains. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.34 0.72 -3.96 71 24995 2012-10-03 09:47:55 2004-03-18 17:40:01 22 2546 284 90 15241 25121 202 32.00 40 10.56 CHANGED Cspp....sCpst..GpChps.........ssapCpCs.G.....asGpp ................Ctss........PC.pNG....G..s..C...h..cts.......................ss..ap.CpC.ssG.......asG................... 1 6681 7484 11311 +7484 PF07655 Secretin_N_2 Secretin N-terminal domain Yeats C anon Yeats C Domain This is a short domain found in bacterial type II/III secretory system proteins. The architecture of these proteins suggest that this family may be functionally analogous to Pfam:PF03958. 22.00 22.00 22.20 22.00 21.90 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.12 0.72 -3.60 60 476 2009-01-15 18:05:59 2004-03-19 11:19:25 8 5 427 0 107 379 37 91.00 28 16.70 CHANGED TcTFslsYLthcRtGtSpToVsSGslSssss...sssssssssss.................sssssususSu....sussGopIpopscoDFWs-LcpslpullG.susGRtVlssPQA .....................TcTaslsalphpp...pSto..olso......Gshossss........s................................................ssuusususou......susuusphpssscsDhasslcpslpshL..sstGphslsssp........................... 0 24 71 88 +7485 PF07443 HARP HepA-related protein (HARP) Vella Briffa B anon Pfam-B_21761 (release 10.0) Family This family represents a conserved region approximately 60 residues long within eukaryotic HepA-related protein (HARP). This exhibits single-stranded DNA-dependent ATPase activity, and is ubiquitously expressed in human and mouse tissues [1]. Family members may contain more than one copy of this region. 25.00 25.00 26.20 27.90 23.30 22.10 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.63 0.72 -4.59 15 154 2009-01-15 18:05:59 2004-04-01 10:52:49 8 9 78 0 90 142 0 54.50 44 11.43 CHANGED FpVcl.GYppcLIAlFKslPS+sYDssT+pWsFuLcDYptLMcplpcL.spVsLcPL .............FpVcl.GYst-LIulFKphPS+sY..Dsp..T+pWsFpLpDYstL.h..ptspsL..spVpLpPL............ 0 18 25 53 +7487 PF07657 MNNL N terminus of Notch ligand Liu XH, He QY, Studholme DJ anon Liu XH Family This entry represents a region of conserved sequence at the N terminus of several Notch ligand proteins. 22.50 22.50 23.60 22.70 21.80 21.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -9.86 0.72 -4.05 24 383 2009-01-15 18:05:59 2004-04-06 17:22:07 8 106 95 0 183 325 1 75.10 44 9.34 CHANGED uSGhFELclpsapNtpG...tsGpCCsGs.......stthshspCcTaFRVCLKHYQuplsssu...PCTaGsssTPVLGuNohslp ...........sSGhFELplp.phpNhpG.L.tsGpCCsus............ttt..s..stCcTaFRVCLK........cY.Q.u.p...Vospu..........................PCoaGsusTPVLGsNoFsl......... 0 35 54 113 +7489 PF07659 DUF1599 Domain of Unknown Function (DUF1599) Yeats C anon Yeats C Family \N 25.00 25.00 26.60 28.10 23.70 23.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.02 0.72 -3.91 14 429 2009-01-15 18:05:59 2004-04-07 15:17:06 6 2 231 0 116 393 387 61.80 51 66.07 CHANGED Kh+DYGsA....WRlhRlsSlTDQIaIKApRIRslpp.pupohVsEGIcuEaIulINYulhuLIQL .......KsHDYGpA....W....R.........h....R...lo....SlTD.IhhKhpRl+pIE.....s..pGp.olV...s.E.GIcupahsllNYulhuLIpL..... 0 49 100 112 +7490 PF07660 STN Secretin and TonB N terminus short domain Yeats C anon Yeats C Family This is a short domain found at the N-terminus of the Secretins of the bacterial type II/III secretory system as well as the TonB-dependent receptor proteins. These proteins are involved in TonB-dependent active uptake of selective substrates. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.20 0.72 -4.40 154 4169 2009-01-15 18:05:59 2004-04-07 17:23:43 9 51 1462 20 1089 3747 285 50.20 21 6.20 CHANGED hslhhs.t..ptlps....ppsul...p.ss.h......slppuLpplLpssslphph.ssspl.hlttp ......................................phhhs.....stlpu...........tssul.......phss.h......shcpuLpplL.pu..s.GLshph..psshl.hlt............... 0 353 719 943 +7491 PF07661 MORN_2 MORN repeat variant Yeats C anon Yeats C Repeat This family represents an apparent variant of the Pfam:PF02493 repeat (personal obs:C Yeats). 24.40 9.20 24.50 9.20 24.30 9.10 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.95 0.75 -7.19 0.75 -3.26 354 7333 2012-10-01 20:24:03 2004-04-08 11:38:36 8 45 575 0 1184 6471 5210 21.70 28 29.73 CHANGED hpGhhptYapsGplppct.pacs .............pG.hppYa.c.s.Gplppct.pYps............ 0 627 928 1065 +7492 PF07662 Nucleos_tra2_C Na+ dependent nucleoside transporter C-terminus Bashton M, Bateman A, Yeats C anon Yeats C Family This family consists of nucleoside transport proteins. Swiss:Q62773 is a purine-specific Na+-nucleoside cotransporter localised to the bile canalicular membrane [1]. Swiss:Q62674 is a a Na+-dependent nucleoside transporter selective for pyrimidine nucleosides and adenosine it also transports the anti-viral nucleoside analogues AZT and ddC [2]. This alignment covers the C-terminus of this family of transporters. 25.30 25.30 27.20 27.10 25.10 24.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.29 0.70 -4.86 161 4136 2009-01-15 18:05:59 2004-04-08 14:27:54 8 11 1902 1 688 2281 695 208.40 39 50.08 CHANGED aLluAShMuAPuuLlhAKllhPp.....ocp.sp.st........tpt..............................sNll-AhupGAhsGhplAlsVuAMLluFlALlAllNulL.uhl...G.........h............Gh............l............oLphILGalFuPlAallGV.P.a.sEshhAGullGpKlllNEFVAahsLs..phhss...............LSt+otsIloaALCGFANhoSluIhlGuluuls.PpR+sslAc.hGl+AlluGoLuslhSAsIAGlhl ...........................................................................YlluAsl..hsh.uulllupllsPhs.psppsh..........................................thtp..........cp............cspshh-hhup.ushsGh.........KlAlsVuAMLluFlAL.IAhlNull.u.sl.......sshh............Gh..............l..........................ohpt.....IlGYlFuPlA......alhG........l........P....h........s.......-A.hhuGolhupKLlhNEFVAhhsls..phhss...................................LSs+otuIlohhLsuFANFSSIGIllGulpu.ls.sc+s.st.lu+.hGl+hlhuuTLsslhSAsIAGlhl.......................... 0 182 343 518 +7493 PF07663 EIIBC-GUT_C Sorbitol phosphotransferase enzyme II C-terminus TIGRFAMs, Griffiths-Jones SR, Yeats C anon Yeats C Family \N 25.00 25.00 41.50 41.50 18.90 18.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.03 0.72 -3.86 9 882 2009-01-15 18:05:59 2004-04-08 14:30:57 6 5 806 0 77 379 42 92.40 78 28.81 CHANGED LSPlLGPGAVIAQVIGV.LIGsQIGtGsIPPphALPALFAINsQsGCDFVPVGLuLuEAKPETVclGVPAVLhSRhlTGsluVlIAWhhShhla .......LSPhLGPGAVIAQVIGV.LIGVQIGhGNIPPpLALPALFAINAQAACDFIPVGLSLAEA+t-TVcVGVPSVLhSRFLTGsPsVLIAWhsShhlY............ 0 24 39 62 +7494 PF07664 FeoB_C Ferrous iron transport protein B C terminus Bateman A, Yeats C anon Yeats C Family Escherichia coli has an iron(II) transport system (feo) which may make an important contribution to the iron supply of the cell under anaerobic conditions [1]. FeoB has been identified as part of this transport system. FeoB is a large 700-800 amino acid integral membrane protein. The N-terminus has been previously erroneously described as being ATP-binding [1]. Recent work shows that it is similar to eukaryotic G-proteins and that it is a GTPase [2]. 26.40 26.40 26.70 26.70 25.60 26.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.74 0.72 -4.54 217 3526 2009-01-15 18:05:59 2004-04-08 14:37:58 7 16 2806 0 775 2731 228 54.20 34 7.91 CHANGED llhulY.llGllsullsuhllp..pplhpup.sss...FlhELPsYRhPph.+slhhpsWp+ .......slhulY.llG..I.lh.All.suhlL+....pT.l...h+G..c....sss....FlMELPsY+lP....pl.+slhhpsWp+............. 0 304 565 683 +7496 PF07666 MpPF26 M penetrans paralogue family 26 Studholme DJ anon [1] Family These proteins include those ascribed to M penetrans paralogue family 26 in [1]. 21.30 21.30 21.80 21.40 21.10 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.85 0.71 -4.25 11 22 2009-01-15 18:05:59 2004-04-13 10:35:31 6 1 12 0 13 21 3 104.00 25 67.62 CHANGED hlphl+pl++luhhhllhahlssh.....llhlsllsalshhhp.p.hsp......s...h.hhhsllhlhlssl...hhIsphllpIlLhlKlsthKscpscapchphahhLhIlGIhl.slhulIssFhLs.KhpK ....................................................................t.hslGlL.Ihhssl...hhlhthILsIlhhlhsuphppcp.phtc..hphlLsIIGIll.slhslIsslhL............ 0 11 12 12 +7497 PF07667 DUF1600 Protein of unknown function (DUF1600) Studholme DJ anon BLAST clustering of M. penetrans proteome Family These proteins appear to be specific to Mycoplasma species. 25.00 25.00 48.60 48.60 22.70 22.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.78 0.71 -3.98 12 25 2009-01-15 18:05:59 2004-04-13 12:35:20 6 1 9 0 12 21 0 116.40 30 43.30 CHANGED thhSslahHlhtPhh.Flhhhalhhshs.hhshpphhKhLlhshIYPhlYs.lYlhslPal...ahhtss..ssYoVYuhhTssp.s.hh...................uhslhhshhhlahslShlh.lhhphhthpchh .....t.lhuslalHllsPls.FllsGalhhsas.tpsh+chtKaLlhshIYPhIYu.lYlholPal.................ahhssG...ssYSVYGhhTNsppNshh...................AhsllhshhFlaaPlSahl.lhhpahthpc+........ 0 10 10 10 +7498 PF07668 MpPF1 M penetrans paralogue family 1 Studholme DJ anon [1] Family This family of paralogous proteins identified in Mycoplasma penetrans includes homologues of p35 [1]. 20.30 20.30 20.30 20.30 20.20 19.80 hmmbuild --amino -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.21 0.70 -5.26 30 71 2009-01-15 18:05:59 2004-04-13 12:59:48 6 3 5 0 55 69 0 290.00 34 77.48 CHANGED TPcLKosVsLuGuLocIYDosss.....csTNsLIAc-IKsN.-sh..FsNGs-...hcslpshslTVcGsF....osSoW..sGpsYs.stps....W.........sshssssKllYsosusQlsIuSLsDLKopLsc.....psplKphLcsAssohss...sosaoV..pNpLGhT........s..sDLLHVNVpusp.......sssspNaDLQIPVSslNLplosLsloV......oGsNltsssctTTsFsYNIGI.cssssaspssssssssp.....s-sssusclLpcLGYss.t....................sssssLsNDplupuLGlYNspFo........t.tssssssssssspsYTlTLsAoP..spsYVW.DDG.oossKslSFsVslsls ..........TPplK.sslsLsGuLocIYDosss............csTN..pLIuc.-IKsN..psh..FsNGpc...hpslpshsloVsGsF....spS..sW.sG..t...s.Ys.s....W..............shsshsplhYsssusQls....IsSLsDLK.opLsp......tsplpphlptu.shshss...sssapl..p..NplGhT..........ssDLlHVNVtssp..........sssshshDLQIPsSslNLpls..sLploV......sGsN..ltt..s..sphoTsasaNIGI.csss...paspsss...tss.hsp.......sptssssplLtpLGass..t..........................ssshsLss-tlutuLGlYNspFo.............ssssspsssttYslolpAoP....spsYlW.-DG..osssK.ploFssplp..s................... 1 55 55 55 +7499 PF07669 Eco57I Eco57I restriction-modification methylase Studholme DJ anon BLAST Domain Homologues of the Escherichia coli Eco57I restriction-modification methylase are found in several phylogenetically diverse bacteria. The structure of TaqI has been solved [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.53 0.72 -3.68 29 913 2012-10-10 17:06:42 2004-04-13 13:56:19 6 28 738 0 199 3032 536 109.80 25 13.17 CHANGED MK...FDVlIGNPPYQhss...tst..tss..............spPIYphFl-hAhclt...s+alshIhP.uRWh.tuGcshcpaRpphLpDp+lpplh.a.supclFssssIcGGlshhhh-.psp ............................................................................h....FDhlIGNPPYhct..p...p.......t..t...h.t.....htp................................................................tssslY...t.....h..F.h.-....t.....u.h..p..lhp........sGh.l.s.a....I..s..P...spah...pup...h..s...c.p.h.Rp.....h.l.l.p.c........s.p..l....pp.l.hp.h.s.s.........t.....p..........l.F...p...s....s..s..l...psslhhh.+t...t............................................................. 0 84 151 182 +7500 PF07670 Gate Nucleoside recognition Yeats C anon Yeats C Domain This region in the nucleoside transporter proteins are responsible for determining nucleoside specificity in the human CNT1 and CNT2 proteins (e.g Swiss:O00337) [1]. In the FeoB proteins (e.g. Swiss:O25396), which are believed to be Fe2+ transporters, it includes the membrane pore region, so the function of this region is likely to be more general than just nucleoside specificity [2]. This family may represent the pore and gate, with a wide potential range of specificity. Hence its name 'Gate'. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.36 0.72 -4.15 238 13008 2009-01-15 18:05:59 2004-04-13 14:15:06 9 26 3306 1 2612 8940 1060 117.90 18 34.76 CHANGED hhphhshllhhhhllslLpph................................................Ghhshluphlssl.hp.l.Gh...sspssls.hlss....hhstpsulshhtphh..............................................................................t...hstpct.hshhh...hshh.hs..........shutl.s..shh...thhuhh ...........................................................................................................h.hphhshllhhhh...llslLpph..............................................................................................................................................................................................Ghhshlutlh.ssl.h.p.l..Gl......ssp..ss....ls...hlsu....h.h.s.s...t.s.s.ls.s.s.t.plh............................................................................................t.....hs...t...p...p...t...h...s...h...hh.......h.shh....hs.........shuhhs....hhh......h........................................................................................................................ 0 936 1758 2209 +7501 PF07671 DUF1601 Protein of unknown function (DUF1601) Yeats C anon Yeats C Family This repeat is found in a small number of proteins and is apparently limited to Coxiella and related species. 19.00 4.00 19.10 4.00 17.90 3.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.05 0.72 -7.52 0.72 -4.31 28 131 2009-09-12 22:43:26 2004-04-13 16:55:19 6 24 19 0 48 143 226 32.00 31 18.14 CHANGED MGh+WpcLctQcLosRLLsAl+.....+Ns-pFNsQsIANTL ............................................LhpAlt..........ppspcFNsQpIANsL.... 0 32 43 48 +7502 PF07672 MFS_Mycoplasma MFS_Mycoplamsa; Mycoplasma MFS transporter Studholme DJ anon BLAST clustering of Mycoplasma proteome Family These proteins share some similarity with members of the Major Facilitator Superfamily (MFS). 20.80 20.80 20.90 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.03 0.70 -5.22 7 58 2012-10-03 03:33:39 2004-04-13 18:09:49 8 1 45 0 19 92 2 252.00 30 55.53 CHANGED p+pKohlSshNhaGFNlGhhlshs.Flhs.slppsuops..WhhIhoshILllhshLllahhFtpch..hhsp..KQoK.p.shssc......sohhslLKpKpTaKhhshaGlhLlslV.shTsshhN.l.ls.SP...hsl.............................suGhhhhshshhhIhaVhGahhGhhsloPFNKThacRK+althhhshshlhlllhllh.uhhlGhssshGhsh......hhIhoFluGsFhWulQushLhlPaEhKthp.scVulhFGhlWGhGYlhYThhDIhhSshhp .................................+pKullSphs.ahaslGsllsllP.F..lhs...ps..s..p..thpss..Wphlhslhu.LLshIPLllahlh..GpcFDhhtsp....ppst....p.t.ts...........hslhshLKpKsTatWlllYGuWL...llsV.a..sho.s..hh...p.h...thSs....................................ssphs.thhpha..Ih.Fl.h.uhhlG..hslGhas+hphcR+halsh....hhshGllhh...ll...uhlh....sh.thGhs...ps.uhth....................hhIhuF..LsGhhh....WGIQuVhL.lPHEYKss.sPpplGhhFulIWGhGYhhaTlshIllSsh.................................... 1 11 16 17 +7503 PF07673 DUF1602 Protein of unknown function (DUF1602) Studholme DJ anon Clustering of A. pernix proteome Family \N 24.20 24.20 25.10 24.70 24.10 24.10 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.70 0.72 -4.49 43 115 2009-01-15 18:05:59 2004-04-13 18:16:47 9 3 81 0 21 116 7218 37.50 42 17.33 CHANGED GSStsphtG.hssARAhATRCCpPP-SssGhhhut.u...spP ......GSStshhtGthspARAhATRCphPP-sshGhhhtt.u..pP............ 0 5 12 17 +7505 PF07675 Cleaved_Adhesin Cleaved Adhesin Domain Yeats C, Collyer C anon Yeats C Domain This is a family of bacterial protein modules thought to function in various roles including cell adhesion, cell lysis and carbohydrate binding [1]. The beta-sandwich jelly-roll topology of these modules is known as the galactose-binding domain-like superfamily, clan CL0202. A tandem repeat of these modules (either two or three repeats) constitute the haemagglutinin/adhesin (HA) regions of the gingipains, RgpA, Swiss:Q51816 and Kgp, Swiss:P72194 and Swiss:P72197 [3] expressed by Porphyromonas gingivalis (Bacteroides gingivalis) [2]. They form components of the major extracellular virulence complex RgpA-Kgp - a mixture of proteinases and adhesin domains [4]. The adhesin domains in this complex are found in proteinase-cleaved forms when isolated from the cell surface [5]. Haemagglutinin genes of P. gingivalis [6] (hagA1 HAGA1_PORGI - Swiss:P59915 - and hagA2 HAGA2_PORGI - Swiss:Q51845) suggest that such proteins are composed of eight to ten tandem repeats of these adhesin modules [1]. Genomic data predicts that homologous protein modules are also expressed by a number of other bacteria and form part of putative multi-domain proteins, eg. Swiss:Q26BR9 and Swiss:B0VGL6. These domains may be acting in concert with other adhesion modules thought to be part of these multi-domain proteins such as fibronectin type III, Pfam:PF00041, and Meprin, A5, mu (MAM), Pfam:PF00629, domains. 20.50 7.00 20.80 7.10 20.10 6.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.25 0.71 -4.66 9 203 2012-10-03 19:46:52 2004-04-14 13:37:02 6 31 36 6 40 218 69 157.90 28 23.51 CHANGED stlL.EuFEsG.lPsuWpsIDADGDGtsWtphsssh.......hsGHsustCshStSa.lshhGs...LTPDNYLITPcl....pGut+lpYWVssQD.ssasuEHYAVhsSoTGssAuDFs.lLaEET....hTuKs.............pGsWhpRTlsLPAGTK...YlAFRHasCTDha..alhLDDVsl .................................................................h....Es.p..p.s...h.s.stWphI.......Duc.u........D..u.p.sWhhhs..s.s...........................hsu+su.st.s.sh.S.h....u...h...s.h..tt.......hs.s.DN..aLI..oPpl.....sG.s.p..p...lpaass...s...ps....t..s..a.s...s...EcY....s...VhhS.s..T...G..s........ss..usFs...hlh--s....hssts..............tusWhppolsL...P.sGT+....YlA.aR..H..h.s.s..o..D....h...h...h.l.hlDDlp..................................................................... 0 29 40 40 +7506 PF07676 PD40 WD40-like Beta Propeller Repeat Yeats C, Mistry J, Adindla S anon Yeats C Repeat This family appears to be related to the Pfam:PF00400 repeat This This repeat corresponds to the RIVW repeat identified in cell surface proteins [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. 20.60 10.90 20.60 10.90 20.50 10.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.99 0.72 -4.23 127 15082 2012-10-05 17:30:43 2004-04-15 11:18:12 7 349 2793 58 5424 14558 9482 34.90 24 15.47 CHANGED hpplssssu............stsPsaSPDGpplhFsSscs......Gs.p.....lah .................................................h........t...............................ptsP.saSP.DGppls....aso.p.ps..........u.................................... 3 2058 3763 4721 +7507 PF07677 A2M_recep A2M2; A-macroglobulin receptor Studholme DJ anon Prosite Domain This family includes the receptor domain region of the alpha-2-macroglobulin family. 21.70 21.70 22.00 21.70 21.30 21.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.86 0.72 -3.80 92 1246 2009-01-15 18:05:59 2004-04-16 09:26:22 9 72 186 60 533 1171 2 88.00 30 6.72 CHANGED osMsll-lslhSGFtsspssLcpLppstphh....lp+hEh.....psspVllYh-plsppp.hClsFphpp.phtVuth.pPAsVpVYDYYps....sppssphY .................................SsMull-lslhSGFhsspsslp.pLp..p.s.sphh........+hEh......psspVllYh....-...pls.pp.p............pslsh.pspp.phtVut.h..pPAsVplYDYYp.s....pcpssthY............. 1 90 147 332 +7508 PF07678 A2M_comp A2M3; A-macroglobulin complement component Studholme DJ, Sammut SJ anon Prosite Domain This family includes the complement components region of the alpha-2-macroglobulin family. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.86 0.70 -5.14 43 1721 2012-10-03 02:33:51 2004-04-16 09:40:48 9 109 288 67 626 1697 23 213.90 31 19.09 CHANGED pGYp..RpLsYK.+sDG..SYSsF......pst....suSTWLTAFVlKsFu............pA+pa..IhlDppplppulpWLlpp.Qpss.GsFppsu.lhpptMpG..Gl.........-scloLTAalsIALhEst...........hsps..............................psuls+AhthL.cst......ssshspsYshAlsAYAhsLss.cpsptpphhppLcppAhpc............................sss...haWppsppsp..sptt..................ssuhpVEhTAYsLLs.hlst...........p-lshAptIlpWLspQpNstGGFpSTQDTVVALpALocYut .......................................................................................................GYp..ppLpa+....+t..D.........G...Sausa.....tpt................suSs....W..LTAaVh.+.s.ht............tApph....h..l...-....phl.ppuhpWLhpp..Qp.s.s..G.pF.p.....-...sG...l.h.pp..sh..pG..Gh.......................................ppsluLTua...lllALhE.st....................hsph.....................................psslp+u.hpaL.ppp.........htph.pssYs.h....Al...su.YA..htLs...pp.......tt....pthh.p.pLt..th.u..hpp............................ss........haWtpst................................hplE.hTuYsL..Lu.hl.t...........pcht.tu.....lhpWLspQp.p...G..GFs..S.....T.Q...D..T..hluLpALspa..h.................................................................................................................................................... 0 122 188 400 +7509 PF07679 I-set Immunoglobulin I-set domain Bateman A anon Bateman A Domain \N 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.92 0.72 -4.13 48 59728 2012-10-03 02:52:13 2004-04-20 17:44:00 11 3030 1617 375 30033 71682 78 88.50 21 29.33 CHANGED PpFsph..pshplptGpssphpCplpGs.PsPplsWh+s..sp.lps............spchplptps......sptoLpIpssphsDsGpYsCpApNp.sGpspssspLpV .................................................................................ps..h..p...s....h....t....G...p.....s...s...p...l...p....C........p.......s.......p..........G...........p.......P.....t...........P..........p.........l........p.......W..h..+s.........s.p........l..p.s.....................................................s.p...c...h..p....l..p.t.ps........................t..p.L...p...I....p.....s......s......p......h......p......D.......s.......G...p...Y...p.C..h....A....p....Nt....h..G...p..s...p.t.p.h.tl................................................... 0 6694 9324 18688 +7510 PF07680 DoxA TQO small subunit DoxA Kletzin A, Studholme DJ anon [1] Family Thiosulphate:quinone oxidoreductase (TQO) is one of the early steps in elemental sulphur oxidation. A novel TQO enzyme was purified from the thermo-acidophilic archaeon Acidianus ambivalens and shown to consist of a large subunit (DoxD) and a smaller subunit (DoxA). The DoxD- and DoxA-like two subunits are fused together in a single polypeptide in Swiss:Q8AAF0. 20.70 20.70 20.70 23.90 20.40 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.55 0.71 -4.53 9 80 2009-01-15 18:05:59 2004-04-20 18:14:37 6 2 72 0 25 79 2 135.30 31 48.50 CHANGED hhassllGsLhNhSKhPclc.lshhphpsss....LhhplsclsGPDsYsuhlshlplhsusucllLppss......clsphPhsphKs-Ylshhu..........ulhlsLGucAplpLplP..hcLs..GTYplKLassu..Gtssttthpa ...................................hp.tshGsLpNhShtPthp.lus...t.hpsss....LphplaRspGsDsYGuFlltlpLhs.s.s.Gcl.lhphsu.....ppLuplPtssIcNcY.s.s+.hs......sutaulhlPLGucATlplp.hs......hpls.pGs.YpLpLhDlu..Gtsapt....h..... 0 8 17 22 +7511 PF07681 DoxX DoxX Studholme DJ anon BLAST Family These proteins appear to have some sequence similarity with Pfam:PF04173 but their function is unknown [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.74 0.72 -3.56 103 6635 2012-10-02 13:32:46 2004-04-21 11:57:59 7 10 2739 0 1741 5119 2239 87.30 23 51.92 CHANGED ulLlhRlhluhlFl.hpGhpKlh..................shsu..........ssthhs....shsl..................sshhuhhushsElsuulhlllGlhTR....huAhhlsshhlsAh....hhsHt ..................................................hhlhRlhlu....h.l....F....l....htG.h.t.Klh.............................shss.........................................ht.t.hht..................sh.u.l.................................................ss.h...h....s....h...l.s...s....h...sEls.uGl.hll...l.......G.......h.......h..TR.......h.u..A.h....h..hs.h.h.h.l.ssh.....hhh........................................................ 0 475 1065 1446 +7512 PF07682 SOR Sulphur oxygenase reductase Kletzin A, Studholme DJ anon [4] Family The sulphur oxygenase/reductase (SOR) of the thermo-acidophilic archaeon Acidianus ambivalens is an unusual enzyme consisting of 24 identical subunits arranged in a perfectly symmetrical hollow sphere and containing a mononuclear non-heme iron centre (personal communication: A. Kletzin). At 85 degrees C in vitro, elemental sulphur is oxidised to sulphite, thiosulphate and hydrogen sulphide with no external cofactors needed. The proposed equation is: 4S + O2 + 4 H2O ---> 2 HSO3- + 2 H2S + 2 H+. 25.00 25.00 191.60 191.40 22.80 16.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -12.12 0.70 -5.83 3 21 2012-10-02 00:20:33 2004-04-21 13:41:55 6 1 16 25 7 26 2 295.40 54 97.85 CHANGED PKPYVAINMA-V+N-PKThELFupVGPKVCMVTARHPGFVGFQNHVQIGVlPLGsRFGGAKM-MocEhc.............oLcLhQYThWKcWKDHEEMH+QNWupLFRLCluCAoQMVWGPaEPIYEI+YANMPlNTEMTDFTsVVGKKFApGc..sluIPsISQPYGKRVVAFGEHpVKEGhEcQFEEuAIKTLEhF++.APGFLGuMILKEIGVSPlGSFQlsuKGFHQlLESsGul-PcsssTI...YpsPEF+....s+PpcYIVHsEWScs-ALMFGhGRVLlsPElRcVHD.KVLDTLlY.GPYIRVlNPlMEGTaWRE .....hlAlN.sclhNt.pohphh.pVGPKVCMVTApHPGFVGFQNHlQlGllPhGsRYGGAKMDMscE.s.............sltlhQYThWKDWKDHEEMH+QNauhlFRLChSChuphl.GPWEPlYEIlhAsMPhNs-MTDFsuslGccFApGc..PhslPsIS.PYGpRVVAhuEHoVhPG+EKpFE-uIl+TLEhhK+.APGFLGhMlLKcIGVSuIGShQhsscGhHQlL..EosGuh.Pcssssh....aps..PEA+.....sTPppYlVHhEWuss-uhhFGhGRVllpPElRplHD.cVL-TLlh.GPYIRlhNPMMEsThWRE. 0 3 5 7 +7513 PF07683 CobW_C Cobalamin synthesis protein cobW C-terminal domain Bateman A anon Pfam-B_1247 (release 5.4) Domain This is a large and diverse family of putative metal chaperones that can be separated into up to 15 subgroups. In addition to known roles in cobalamin biosynthesis [1] and the activation of the Fe-type nitrile hydratase, this family is also known to be involved in the response to zinc limitation. The CobW subgroup involved in cobalamin synthesis represents only a small sub-fraction of the family [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.18 0.72 -4.27 158 5707 2009-10-26 17:34:02 2004-04-27 12:18:52 9 25 2706 1 1476 4245 1150 96.90 22 27.04 CHANGED lpohshptcpshc.....ppltphlpp.......hs........ts..llRsKGhltls....sp.st.hhhhptlsthhpht.ht..hts.............................ts+psclVhI...Gp..sl-...pptlpptLpssl .....................................................pohshp.t.ctshc.....pph..p.hlpp................hs......................................ss...llRhK.Ghl..h.ls.................sp...s....p...hh...hhp........tstt.h.hphp.....h....s..........hts..............................................................tstpsclVhI.........Gp....sl-....pst....lpptlpt.................................................................................. 0 401 896 1201 +7514 PF07684 NODP NOD; NOD1; NOTCH protein Guo J, Studholme DJ anon Guo J Family NOTCH signalling plays a fundamental role during a great number of developmental processes in multicellular animals [1-2]. NOD and NODP represent a region present in many NOTCH proteins and NOTCH homologs in multiple species such as NOTCH2 and NOTCH3, LIN12, SC1 and TAN1. The role of the NOD and NODP domains remains to be elucidated. 21.90 21.90 27.00 23.80 21.40 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.10 0.72 -4.37 28 312 2009-01-15 18:05:59 2004-04-27 13:03:59 7 249 96 8 140 249 0 61.70 36 3.09 CHANGED cltGolVaLElDNRpC...ppsscC.FpsAssAAsaLuAhusp.psLph.saPIpsVpups.psssss ............hGSlVaLEIDNRpC.............psuspC.FpsssssAsaLuAhAup.ssLp...sYPl.sVtucs.ts....s.............. 0 26 38 81 +7515 PF07685 GATase_3 CobB/CobQ-like glutamine amidotransferase domain Bateman A anon [1] Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.64 88 5307 2012-10-03 00:28:14 2004-04-28 09:26:43 9 29 2967 0 1396 9246 3546 155.60 25 37.14 CHANGED ssph.s.ssDhlllPGo+ssht-Lthhc.ppuhccslpphhppGt..llGICGGaQhLGcplt.....ct.....shss...hpGLGlLshpTthpp.pKtlspspspsht..........pshtlpGaEhHhGpohh.sst..hhhh...............tsthc.....ushs...psslhGTYlHGhatsssh ....................................s.....s.ssD.h.lh.l.sG..u....p...s......p...h......p......s......h......t.....h......tc......tp...s......h.p...p...s.l..p...p...t....h...p.....p......G...t......s..llulCG.G.aQ.hL.Gcp..lt...............................sss.u....hp.G.lGlL.s.h.p.T..t..........h...p........s.....p+t..lu.thpsphtt..................................stslpG.......a.......E...hH...h.Gpohhsss.st.shhth...................................................pttsth-.........Gsh..........ss.lhGoYhHGhh.sp.................................................................................................................. 1 449 934 1202 +7516 PF07686 V-set Immunoglobulin V-set domain Bateman A anon Bateman A Domain This domain is found in antibodies as well as neural protein P0 and CTL4 amongst others. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.70 0.71 -4.19 113 19537 2012-10-03 02:52:13 2004-04-28 13:40:02 12 592 276 695 9493 120210 5 107.80 17 38.44 CHANGED s..lpp..sp................................hpst.GpslpLpCphp...........t...tltWh+..pps........spt.phhh.........ht....thttpapsphph.tsss............ttphsLplps.lphsDsGsYhCshhs...............hhhhspsspLpV ....................................................................................................t....................................h.s.t.G.p..s.l.p...LsCphs...........................sth...t.l..t.W..hp......pps................................sp..s.....phlh...............................t..t...t.t....t..t...h....s...s.....p.....h..p..s..R.h.p...h......t.s.ts.....................................tt..s.h..s.L.p.l.ps..l.p.h..p...D.s.G..h.Y...hCth.t..................................................................................... 0 892 1878 3948 +7518 PF07688 KaiA KaiA domain Bateman A anon Bateman A Domain The cyanobacterial clock proteins KaiA and KaiB are proposed as regulators of the circadian rhythm in cyanobacteria. The overall fold of the KaiA monomer is that of a four-helix bundle, which forms a dimer in the known structure [1]. 30.00 30.00 47.90 47.70 27.20 26.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.84 0.70 -5.12 3 66 2009-01-15 18:05:59 2004-04-28 17:16:04 7 1 65 14 22 79 31 249.20 44 92.72 CHANGED LoIClaV.SsAIuQ-LpRlaouDRapLssFpSt--FCsYLEs+R-pIDCLILhaussShp.VlspLaapGsLLPAILltPpsS..AcPs-Psss.lYHsAElHLspcQL-QLPpQVDcAIAcFL+LuPlCTlPsHlhhh.ssLh-uSsps.LlsQQRRLAQKLKERLGYLGVYYKRDPu+FaRNhSPtEKQKLL--LpSpYREIVLSYFSs-usVN-pIDpFVNpAFFADluVSQVLEIHMELMDEFSK+LKLEGRSEDILLDYRLTLIDVIAHLCEMYRRSIPRE ..............................................................................h.hh.s.tl.p.htp.hL.s.p+Y.......l.......hp.s.pp..hhthlptppcplDslll..p......hhppLhppuhllPs.ll.l.t...........................................tsph.......YHssElpl..pplpplth.lDtAIspFLphu..ssthsp...ps................t.pt.........l.hpQpRLup+L+ERLGYLGVYYKR...........sPppFhRsLsssE+pcLLcpLcpsYRcIlLsYFss-tslNptIDpFVNpAFFuDlsVopllEIHMELhDEFSpQLKLEGRS--hLLDYRLsLIDllAHLCEMYRRSIP+.... 0 2 14 20 +7519 PF07689 KaiB KaiB domain Bateman A anon Bateman A Domain The cyanobacterial clock proteins KaiA and KaiB are proposed as regulators of the circadian rhythm in cyanobacteria. Mutations in both proteins have been reported to alter or abolish circadian rhythmicity. KaiB adopts an alpha-beta meander motif and is found to be a dimer [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.60 0.72 -4.51 16 352 2012-10-03 14:45:55 2004-04-28 17:24:08 7 6 163 18 129 572 244 80.00 41 45.47 CHANGED hLaVuscshsptpshpslpplppcphshpapLpllDltcQPpLsEtc+llATPsLlKlhPtPhphlsGs...pphhhhh.ch ....................+LaVuG.possShpAlpsLcp.lh-..p..c.h.t..s...Y.sLcV..I..DlhcpPpLAE.pccIlATPT.L.l.K.l.h.P.sPl+.+.IIGDLSspc+VLhsLc.h........................ 0 31 86 113 +7520 PF07690 MFS_1 Major Facilitator Superfamily Bateman A anon Pfam-B_5 (Release 13.0) Family \N 32.60 32.60 32.60 32.60 32.50 32.50 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.78 0.70 -5.74 195 181668 2012-10-03 03:33:39 2004-04-30 10:30:46 11 489 5747 5 55719 183465 25371 295.20 13 82.52 CHANGED lhhsthhshhshth.............hhshh.......s..hh..............tpp..........................................hs........................................hs.st.....huh....lhsh...hhl..sh...slsthhh.Ghl..s-....+.h.....G..p+p.hhhhuhlhhulu..hhhh.......h.......ss......hh.h......lhhhph...ltGhst.uhhhssshshlsphhst.p.....cputsh..uhhts...shslGs.hlG.shluu....hls.thhu............aphs....Fhlhuhlslls...hl..h..hhh..............................htptstttttttttt...........................................h.hshht.........hthhhpp.........h......................hhhshhhh.....hhshhs....hhhhhsh......h.........tpthuhs.....................hhss..hslsshhshhh...thhhuhls.c+.......hst......thhhhhshhhhhhsshshhhhshs....................hhhhh...........shhl....hGh.uhuhh....hsshhshssphh....s..........pttu.....pshuhhs.hhts.lusslus ................................................................................................................................................................h.....hh..h.h.s....h.................h...s..hh..................s............hh.................................................hp.p...................................................................................h..s..................................................................................h.s.....s.t..............................h..u.h......................hhs.h..................h.hl........sh....................sl...s...t......h.........h.......h...G....h..l..........s..-...............................+..h...................G..............p..+......t.....h........h.....h.....h........u.....h....h...h......hs...ls.........hhhs.......h..............s................ss....................................................hh....h................hh..h...h...th................l.h..G......h..........u.......t........u............s........h.............h........s..........s.............s........h......s...........h.....l...........s...........p......h......h.....s....t....p...........................p..p....u.......t........s......h.........u......h.....h..t..s....................s.....h...s....l......u.........s.....h........l......u.......s..........h....l....u...u.....................h..l....h....p...h...h.u............................................................a..p...h.s......................a..h...........l......h......u..s.......l....s.l.l..s..................hl..........h.........h.hh............................................................................h..............t...........................................................................................................................................................................................................................................................................h...h...t.............................................................................................hhhhh..hh..........................................h.h.h......................................h....h.h.s..................................................................h...t.hs.................................................................................................h....h................................h.........h...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................................. 0 15082 30526 45126 +7521 PF07691 PA14 PA14 domain Rigden DJ, Mello LV, Galperin MY anon Rigden DJ, Mello LV, Galperin MY Domain This domain forms an insert in bacterial beta-glucosidases and is found in other glycosidases, glycosyltransferases, proteases, amidases, yeast adhesins, and bacterial toxins, including anthrax protective antigen (PA). The domain also occurs in a Dictyostelium prespore-cell-inducing factor Psi and in fibrocystin, the mammalian protein whose mutation leads to polycystic kidney and hepatic disease. The crystal structure of PA shows that this domain (named PA14 after its location in the PA20 pro-peptide) has a beta-barrel structure. The PA14 domain sequence suggests a binding function, rather than a catalytic role. The PA14 domain distribution is compatible with carbohydrate binding. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.49 0.71 -4.55 34 1994 2012-10-02 01:24:23 2004-04-30 11:44:52 7 314 720 31 924 2134 500 135.80 15 13.85 CHANGED tpsGl.tthahpssth.s...h.................pss..sshh.stthss.shs.sphssphsGhlpsspsGpYpF..tlt......uDssscla......lsuphll-ps...stp.......................phppslthhuuphYsl+l-ahptsssthh........pltWsssssstpshstss .........................................................................t........................................................................................................t.................................t..h...t............s.....p..a..s.h......c..hp....Gh.l.p...s.....s.p....s....G.......p......Y....p..........F..tht..........u-s..s..s...c.la.................l..s.........s...p....h..l....l-p.t............stpt.......................................................t...pt..p.ss.......l..t...l...p.sG......p..ha..tl..clpahp.ttstt...................h.h.............s.............................................................................................. 0 402 643 808 +7522 PF07687 M20_dimer Peptidase_M20; Peptidase dimerisation domain Bateman A, Studholme DJ anon Pfam-B_253 (release 4.0) Domain This domain consists of 4 beta strands and two alpha helices which make up the dimerisation surface of members of the M20 family of peptidases [1]. This family includes a range of zinc metallopeptidases belonging to several families in the peptidase classification [2]. Family M20 are Glutamate carboxypeptidases. Peptidase family M25 contains X-His dipeptidases. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.27 0.71 -4.33 178 25350 2009-09-11 20:10:22 2004-04-30 16:35:36 9 57 4852 83 6606 19881 6380 112.40 18 27.08 CHANGED hup+Ghhshclp..........spGpsuHuu.hsthshN..Alp.......hhschlspLppp.................................................................sohplstlpuGt....stN....slPspspsphshR...h.stps..hcplhpplpphlppthhptp .....................................................................................................s.ht..h..plp..........lp.G..p..s..u..H..uu...h..P..p...........t......u...h.s.....Alt...............hh.s....ph...l..s....p....l..p..s.h.....p.h...................................................................................................h.........s...s....l...s...l...s..p..l..p.u.Gs....................shN..........ll...Ps...psph.p..hslR.......sh..s..t...p.s.....hc.....tlh...p.plpphhpt.....th.......................................................................................................... 1 1986 4003 5480 +7523 PF07692 Fea1 HCR1; Low iron-inducible periplasmic protein Merchant S, Studholme DJ anon Pfam-B_60541 (release 13.0) Family In Chlamydomonas reinhardtii, the gene encoding Swiss:Q9LD42 is induced by iron deficiency [1]. In green algae, this protein is periplasmic. The two paralogues FEA1 and FEA2 are the major proteins secreted by iron-deficient Chlamydomonas reinhardtii, and both are up-regulated in response to iron deficiency. FEA1 but not FEA2 is up-regulated by high CO2 concentration. Both FEA1 and FEA2 are secreted into the periplasmic space and genetic evidence confirms that their association with the cell is required for growth in low iron [2]. 25.00 25.00 27.30 26.60 24.20 20.30 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.24 0.70 -5.63 3 23 2009-01-15 18:05:59 2004-05-04 12:51:18 6 7 9 0 20 23 61 329.40 25 55.92 CHANGED SVLhLVlAAGAlslAsAQ.sTTGsthGGFpaAGNVIGYVNMThDYCDIQAALuAGDFTEALsIYssGKNSp+GsAp....psFh+FAoYh-ANtoGpsFaDAl.................LhsS-TahsDshlGAA.GDGD..........ATlAtGVIsVuTLKYthHEhDoAluKp....ADGNu......uDASGAPHNlDEAWAhaaGGssusCGshSuWtaS....LstAh..pGpsahGuSsuNThMlpoFlNuhcAotosuThpIpuhsAARsNphRlLsLpuLp..ushtYT-cAtsuYsccsupsshupphIAVsWshhc.MLphpsasGuuVN..slthlDFhhcpTEupLSSp...AVcsALKsVlpsLGh-sA-l..GulcucIhD-ohshcCuucTshulp ....................................................hh..............s..t..t..hs.hsGap.sosVltasphshDht-hp..A..u...lput.s.assAhphYopGtNShpupup......cThps.huoaspu..phsuEshaDsh...........................hhGp.sshhtshh.tAs.usts........................h..tslKtuhhpsshh.YshHEl--Alscs......tssss.......usssGuPHshDEuWAFYsGShp...Gssusuo.u.tth.............lstth......ttssh..sGs.o....s..ssshhhs.hpt.h..us..h.psst...th.....spspsshspllthhslshlQ..uhl..+..YthtAssu.tpsuspttt..t.....W.sa.t..shL...h...p...s.....ustst........cl.pt.s..t.uht......tlcttlcslhSshGhphupl..Gs........................hsss.shh............................ 0 16 19 20 +7524 PF07693 KAP_NTPase KAP family P-loop domain Aravind L anon Aravind L Domain The KAP (after Kidins220/ARMS and PifA) family of predicted NTPases are sporadically distributed across a wide phylogenetic range in bacteria and in animals. Many of the prokaryotic KAP NTPases are encoded in plasmids and tend to undergo disruption to form pseudogenes. A unique feature of all eukaryotic and certain bacterial KAP NTPases is the presence of two or four transmembrane helices inserted into the P-loop NTPase domain. These transmembrane helices anchor KAP NTPases in the membrane such that the P-loop domain is located on the intracellular side [1]. 20.00 18.00 20.00 18.00 19.90 17.90 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.87 0.70 -5.44 22 1164 2012-10-05 12:31:09 2004-05-14 10:44:01 9 77 813 0 383 1481 411 290.10 15 42.16 CHANGED hs.hupplschlspss............tsshslulpGtWGsGKophlphlppthppp................................................................................................................tphhlsphssahhsst.cshhtth...hsslhpsltpchpt.ttphhhthp...........hhshhhthththhhtsshhhhh.h...............................................................................pp.ppphpthhpchtppLcp.......ptclllllD-LDRh..cspphhpllcsl+hlhph...pshsalLshDpchl...tpulpptht........supcal-Kllphshplsssshpp.lcphhtpt...hpshhpthsppth.................................................................shphshhhss.s.ts.hsPRpl+Rhlsslphhhthh .........................................................................................................................................................t.h.p.hlt.p...............t.sh.slu..l.u..tWGsGKo.h.l.p...h...h.t..pp.h....ppt.......................................................................................................................................................................................................................................................................................................................p.hhhh....hss.h.....t......p....t........p...p...hh.th.......ht....p..l.....h........p..t..h..t..t..t........t.....................h.h..h...........................................................................h...............h...h...............................................................................................................................................................................................................................................................................................................................t.....................t...p.h.p......................h......ht...p..h...p...l.pt...................p.tlllhlDp...LD......ph.......t......p...hhphh.ph...lc.h.hhsh...................h..hh...lhs..h..c.p..l.................tsltt..t..........................s...........tahpphhph...h.l.............t......h.....................................................................................................................................................................................................................................................................h............................................................................................................................................................................................................................................................................. 2 116 190 297 +7525 PF07694 5TM-5TMR_LYT 5TMR of 5TMR-LYT Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the transmembrane region of the 5TM-LYT (5TM Receptors of the LytS-YhcK type) [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.03 0.71 -4.92 106 2704 2012-10-03 02:46:00 2004-05-14 13:35:26 7 88 1472 0 397 1873 31 168.50 27 32.17 CHANGED s.hFcpl.h.ppph.phtp.clhlhllFuhhuIhusahGlpl.................................s..ulsNsRslslhluGlhGGPllGlhsGlluGlaRhh..lGG..hsuhssuluolltGlluGlltthhpp......phhshhhuhhhuhhsEhhp...........hhlll......lhspPhstuhpl.......lphIulPMllsNslGsslhhhllpsh ....................................h..hFh.l.h.p.ph...ph.p.+.hh.l..hllFol.h.s.l.hu.s..a..h....Gl...lc........................s....uls.Ns...Rhl..slhhuGl.lGGP.hVG.hhs.Gll....u.G.l.a..R.ah........hGG........hs..u.l.s....s..h.....I..o..o........I..l.hG....l.l.u..G.h...l.thhhhp................chhs.hhhu...h...hl...s...h.l.sE..hlp............................Mll.l..l...................hhup...s.h.t....p...u.l.pl.............l.phI......u.h..PM...h...lss.o.lusulhhhll...p........................................................................... 0 119 232 308 +7526 PF07695 7TMR-DISM_7TM 7TM diverse intracellular signalling Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the transmembrane region of the 7TM-DISM (7TM Receptors with Diverse Intracellular Signalling Modules) [1]. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.86 0.71 -4.65 179 1345 2009-01-15 18:05:59 2004-05-14 13:37:09 6 102 600 0 524 1395 170 202.00 15 27.75 CHANGED pph.hhuh.hhGhlls.hhlYNLhlahth.+-psalaYshal..hshhlhhh....s..hsGhshpal....hs......s.hhtphhhhhhhhlsh.hhsh.Fspp.a....Lph.p.t........plhphhhhhhhh..hhhhh................hh.....hthhhhhhh.....hhshlhhlhhhhhulhth..hcshpsA+aal..luahhhhlushl...hh.h........hhu..llshs.hhs..hshtl...Ghhl....chllhohALu.....+ .....................................................h..hh.uhhhGhhhh.hslas..lhlahhh..+c...p...sa...l.aauhhs..hshh.l.hhh.......s....hpG..hh..ht...hh........s...ths....h....t..p.thh....hh..hhh....hshh.hhhl..Fhtp.h.....lphpp.t............thhph.h.h.h.hh...hh..h..h.lhhlh.........................h.s.hh.........hs.hhh..hhht.............hhhhl.hh..hhhhh.hshhth......hcs..h...........ps.A..t..h.h...l.....luhh..h.hhl.s.slh.....th.hh..........hhu........hls.s.....hh....s.....hhh.h......uhhl....thlhhuhslu........................................................ 0 178 351 447 +7527 PF07696 7TMR-DISMED2 7TMR-DISM extracellular 2 Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents one of two distinct types of extracellular domain found in the 7TM-DISM (7TM Receptors with Diverse Intracellular Signalling Modules) bacterial transmembrane proteins [1]. It is possible that this domain adopts a jelly roll fold and acts as a receptor for carbohydrates and their derivatives [1]. 21.30 21.30 22.00 22.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.46 0.71 -4.75 114 945 2009-01-15 18:05:59 2004-05-14 13:41:02 6 52 497 4 371 965 86 135.70 17 17.93 CHANGED hsl..s.tthphhcDsssphslspl..t.....t.pspFpsh........psphs.hGhspusaWl+hsL...tsssstpt.......phlLplshshL-p..lclYh......tss......thh.hhptGcp..hsasp.Rthtppshlasls.L..sssps.hshalRlpS.psshphP......lplhss .....................................h......hphht.D.ssphs.l.spl..........ps.tapth......................psphs.huh.....s.p..s.s.....hWl+hsl.........tss.ss.p.ph.....................p.hlLpls..h...sh.lc.p..lsl...ah......tss...................s.h.p.hhphGsp...hs....hsp...Rt...ht.t....p....s...hla..ls..l........sssps..hshhl+lpo.p.t....hp.hs......l.lhp..................................................... 0 115 220 299 +7528 PF07697 7TMR-HDED 7TM-HD extracellular Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the extracellular domain of the 7TM-HD (7TM Receptors with HD hydrolase) [1]. 22.60 22.60 22.80 22.70 22.40 22.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.20 0.70 -4.61 62 847 2009-01-15 18:05:59 2004-05-14 13:44:56 6 5 814 0 235 690 293 217.30 18 31.07 CHANGED LpsGplA.hsIhAP+slp..DptsT-p++ppAtspl...VYshctp..hspphhpp.lpphh....splpplp...ppspt..................................................pcltph.hss...............ls-pthpp...........................................................................hh.ptsppphpphc......sslhpsl...schhs.........pslss..sthtpttp.....cs.ptpl.phtsh...sss....hptssphlsphh.lpsNhhhDtptTpphppcshpplpslh..lcpGplIV+cG-hlopcthchLchlGLlpp ...............................................................................................................................................hplu.psIhushph.......s.p...t.Tcpc+ptAtptl...s.Yt.hppp..htpp.hpt.l.p.pha.......t..l..pphp....pp.pt............................................................................tplhph.h................hh.p...lspp.htp................................................................................................................................................lh.ph...stp..php.thp...................sthhph.l..............sphhp.............................ptlpp......pphtphpp................................ch.tppl...p..sl...sst......hpp.hhpt.l..hp.h..ltsNhhhDpptTcpt+p...pshpsV..pssh..lhpGplIVccG-hlspcthphLchlthhp.p..................................................................... 0 107 187 219 +7529 PF07698 7TM-7TMR_HD 7TM receptor with intracellular HD hydrolase Anantharaman V, Aravind L, Studholme DJ anon [1] Domain These bacterial 7TM receptor proteins have an intracellular Pfam:PF01966. This entry corresponds to the 7 helix transmembrane domain. These proteins also contain an N-terminal extracellular domain. 27.50 27.50 27.60 27.60 27.20 27.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.27 0.71 -4.86 99 879 2012-10-03 04:04:29 2004-05-14 13:47:25 6 4 869 0 252 722 397 193.80 20 27.36 CHANGED hhshlGhhlhlhlhhhhhhhahc+h.p.plhppp..ptlhhh...hlhllhlhLsph...lhh.hs.............hsalhPlsssshLlshhlssclul.....hhsslhullhuhh..........shshthhlhtlluuhsushhlp.+hppRsplhtuulhlulssh.hhhhsltl..lpsss.............h.....t..plhhpshhshhsullsu...llshGllPhhEshFsllT ............................................h...hhGhhl.hlhlhlhhhhh.ahcp.....h...p...p....phhpcp....t..lh..hh.h..hlhhlslllhph.......hhh.hp.....................hsallPhAhsslllpl.hls.p.+hul.....hs.sl.lhslhsuhh.h........shsh.phhlhtlluuhsush.hl+.c........h........ppR.sp.lhtuuh.hlulhss..lhhhslhl......lpsss......................h....s...phhhphhhuhh.sullus....llshGllPhhEphFslho............ 0 121 201 235 +7530 PF07699 GCC2_GCC3 GCC2 and GCC3 He QY, Liu XH, Studholme DJ anon He QY, Liu XH Family \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.24 0.72 -4.38 41 2150 2009-09-13 19:08:02 2004-05-14 13:53:13 8 312 132 0 1375 1889 98 47.60 31 7.28 CHANGED Gpahs.....sst.....t.CpsCPtGoY.......p...spsups...pChtCPss....psT.ht..Gupshs-C .....................G.a.t......sst........ptCt...sCPhGoY........p......sptGps......pCh.sCPss.....stT...pt...tGupshppC..................... 0 558 661 1027 +7531 PF07700 HNOB Heme NO binding Iyer LM, Anantharaman V, Aravind L, Studholme DJ anon [1] Domain The HNOB (Heme NO Binding) domain, is a predominantly alpha-helical domain and binds heme via a covalent linkage to histidine. The HNOB domain is predicted to function as a heme-dependent sensor for gaseous ligands, and transduce diverse downstream signals, in both bacteria and animals. 21.10 21.10 21.10 21.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.91 0.71 -4.73 16 853 2012-10-02 19:02:47 2004-05-14 13:56:09 10 23 422 52 440 781 58 159.00 24 32.10 CHANGED MaGhlhpslpchlpcpaGp-VW-plhcpsuh-.ppsapstp.YsDshhhcLlssluplhshshcplhchaGchhlpahhcpGaschltshucshpsFlpslDs.lHhplsphY..Pph+sPuFpspst.scssllLcYpSpR.GhhchllGll+slA+pFtp..-lplphlcppp- ..........................................MhGhl.pth.t.hl.p.p.a.G..ph.hpp.h.........hpps....t..hp......t.t..a....sht.Y.....s-.p.....hhplltsh....u.p..h.h..s.....lsh....pp..lhchaG.chhh..ph...htppt..c.hl.......psh..Gsshp-FLp.....................slDs.l.....H.p.plp..ph..a.....ss...hc......sPsFpspph...sp....s............sl..h.lcY...h...S..p..R......shtths..h..............GllcusAcphhp..clplphht....p.................................. 0 159 223 352 +7532 PF07701 HNOBA Heme NO binding associated Iyer LM, Anantharaman V, Aravind L, Studholme DJ anon [1] Domain The HNOBA domain is found associated with the HNOB domain and Pfam:PF00211 in soluble cyclases and signalling proteins. The HNOB domain is predicted to function as a heme-dependent sensor for gaseous ligands, and transduce diverse downstream signals, in both bacteria and animals. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.35 0.70 -5.13 33 958 2009-09-11 06:19:49 2004-05-14 13:58:40 9 50 126 16 637 851 5 138.40 30 19.84 CHANGED shspchplsspsFCcsFPFHllascchplhQhGpulp+lh.........sphhhspp.......pls-hFcllRPp.lphoFcsIls+lNolFllpo.....+t............p.sp.t....p...........................................................................................................................LcLKGQMlalsEsssllFLsSPhVssL--LpppGLYlSDIPlHDAoRDllLluEQtcAphpLppchE..phpcpLcpshptL-pEKc+T-cLLYullPtsVApcL ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tlh..............pphp.......phpspL....c.......cp....p....p....tLcpE.K+KT-pLLaphlPtsVAppL....................... 0 228 292 501 +7533 PF07702 UTRA UTRA domain Anantharaman V, Aravind L, Studholme DJ anon [1] Domain The UbiC transcription regulator-associated (UTRA) domain is a conserved ligand-binding domain that has a similar fold to Pfam:PF04345 [1]. It is believed to modulate activity of bacterial transcription factors in response to binding small molecules [1]. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.38 0.71 -4.58 168 12427 2012-10-01 19:33:20 2004-05-14 14:00:43 8 13 3138 54 2128 7191 599 138.40 19 57.50 CHANGED spGtp.spsc....llphphhsu..sspluptLsl.sssst.VhclpRlRhh...-spPlsl-psalstp.hhssht.....pp.ph......ppS..laphLcp..hsh...tlsp..uppplp.Aths.sscpuphLsl...sss..sslL..hlc+hsasps..upsl-aspshapu-cac ......................................................Ghpspop..llp.hph.h.....s....s..t..plup...t...L...s.....l...p.....s.p...p......l..h.pl.p..Rl..Rhh..........-s...p....P....h..h....l.......-..ps..al...s.....h...p...h..h..s..s..ls...........pp..th............ppS...l..a.ph.lcp....hsh...pls.p......up.p...p...lp..sh...h.s....ss.....c.....u.p....h....L..p.l......sts....p.P..lL...hlc.ph.s...a...s..p.s....upsl-aspshaps-ta............................................... 0 564 1212 1689 +7534 PF07703 A2M_N_2 Alpha-2-macroglobulin family N-terminal region Studholme DJ anon Prosite Family This family includes a region of the alpha-2-macroglobulin family. 21.50 21.50 21.50 21.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.86 0.71 -4.37 184 2785 2009-01-15 18:05:59 2004-05-14 15:54:42 9 133 1371 58 856 2463 101 141.90 22 9.28 CHANGED lpl...phsc..pshpsG-shplplp..sshs................up.hhhhl.h.pspllptt............................................................thp.hpssps.......................................slplslspchsP.sshlssh.hl.............................................s....hshsss.hlsVpp.sp...phplplps.s....p.phpPspphplplp..s.....................suhlslsAVDpulLtL .....................................................................................................................................................plthsp.sthpsG-s..hplplpsshs................sp.hhhhl....s.ps.pll.h.hp............................................................................hhc...ls..spth...............................................................slplsls.p..s..h..sP..shhls..sh..hl............................................h...sphshssh.tl.slp..s...sh...ph.s.l.p.lps..s.............s..ph....pPs....p.p........ls..lclp.s.................s.suhlsl.suVDpulL.l............................. 0 194 349 599 +7535 PF07704 PSK_trans_fac Rv0623-like transcription factor Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the Rv0623-like (Swiss:P96913) family of transcription factors associated with the PSK operon [1]. 28.30 28.30 28.50 28.40 27.80 27.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.61 0.72 -3.30 29 347 2012-10-02 18:44:02 2004-05-17 11:25:38 6 1 139 0 81 156 29 81.60 36 96.75 CHANGED Mu.LsIKsscscpLA+clAchTGp.ohTpAlppAlcccLp......ccppptshh..-cltsl.....hpttthshhss.psscpphhaDEhshss .........Mu..LsIKsscscpLsccLA.tcsGp.ohTpAVtsAl...pcc.Lu...........ttppcpssLt..-cLtsl.....hcptuhssscs.ttscphh...hYD-pGLPt............................... 0 20 42 63 +7536 PF07705 CARDB DUF1604; APHP; CARDB He QY, Liu XH, Studholme DJ anon He QY, Liu XH Domain Cell adhesion related domain found in bacteria. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.20 0.72 -3.96 72 1259 2012-10-03 16:25:20 2004-05-17 11:57:42 6 225 338 5 749 1464 732 105.20 19 17.15 CHANGED tP...DLhl.........thssssshsspp.slss.s.lpNpGsss.usshplp.........lYlsus.h...sshsl...........suLsuGsspslshshhs....ps........Gshs.lpsslDssNtlsE.sN...EsNNphot ...................................sDLhl.........t.hhssss..s..t.s.G..p.s..hsl..ss...s..V.pN..p.G.....s.ss.....u..s..s.....h.p....l.p........................l..h...l...s.ssts..................sstsl................................ssL...s.s.G..p.......o.....t......s.....l....s..hshsss............ss....................................G.s.ap..lt..s...h.s...D.s.......t....s.........t......l.......E....s.....c..sNN....s........................................................................ 0 244 406 657 +7537 PF07706 TAT_ubiq Aminotransferase ubiquitination site Hargrove J, Studholme DJ anon Pfam-B_15367 (release 13.0) Motif This segment contains a probable site of ubiquitination that ensures rapid degradation of tyrosine aminotransferase in rats. The half life of the enzyme in vivo is about 2-4 hours. In addition, unpublished information identifies at least 2 phosphorylation sites including CAPK at Ser29 and, at the other end of the protein, a casein kinase II site at S*QEECDK. This region of TAT is probably primarily related to regulatory events. Most other transaminases are much more stable and are not phosphorylated. 18.90 18.90 18.90 22.20 18.40 17.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.31 0.72 -7.87 0.72 -4.02 7 51 2009-01-15 18:05:59 2004-05-24 12:14:22 7 3 35 0 25 44 0 41.20 55 9.12 CHANGED hcsYlIQMsususL.......PolLDsHVNlsGhsoV........uKhKuRKsR .MDsYVIQMsusGsL.......PSlLDVHVNluGRSSV.......sGKhKGRKAR.. 0 1 1 7 +7538 PF07707 BACK BTB And C-terminal Kelch Stogios PJ, Studholme DJ, Finn RD anon Stogios PJ Domain This domain is found associated with Pfam:PF00651 and Pfam:PF01344 [1]. The BACK domain is found juxtaposed to the BTB domain; they are separated by as little as two residues [1]. This family appears to be closely related to the BTB domain (Finn RD, personal observation). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.15 0.72 -4.18 81 5713 2012-10-02 01:20:04 2004-05-27 16:12:29 10 234 772 7 3017 5073 29 98.70 24 18.96 CHANGED ClulhpaA-ta.sspcLhphuppaltppFhpVhps-.-Fhp.LshcplhpllssDcLslpsEcpVapAlhpWlpa-hppRppthsclL.ppVRl.sLlsspaLhphlp ......................................Clthhthu.c.ha....st.p.cLt..p.tshp.....a..l...hpp.F...p....l......h.....p.......s....-......-..........F....hp....L...s....h....ppl...h..p.l...Ls..p.-p......L.....p..l........p.....s......E.....pp.............l.ac..Asl...p.W.........l....p..............t..........c..............h...........p..........p............R.........p.........p.........h...........h........s.............p.............l....l....pt..lRh..shls.thLhp............................................... 0 922 1157 1963 +7539 PF07708 Tash_PEST Tash protein PEST motif Yeats C anon Yeats C Motif This motif is found in the Tash AT-hook proteins of Theileria annulata. These proteins are transported to the hosts nucleus and are likely to be involved in pathogenesis [1]. It is also often found in conjunction with Pfam:PF04385. It is suggested that they may be 'part of PEST motifs' (a signal for rapid proteolytic degradation) in [2], though this is not definite. This motif is also found in other T. annulata proteins, which have no other known domains in (unpublished data: C Yeats). 22.40 22.40 22.40 22.40 22.30 22.20 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.33 0.73 -6.32 0.73 -3.83 11 113 2009-01-15 18:05:59 2004-05-28 10:38:50 6 10 2 0 52 114 0 18.70 62 4.55 CHANGED E-L-PETIplElpSD-E-p ...pLcPETIPVEluSDEE-... 0 0 47 47 +7540 PF07709 SRR Seven Residue Repeat Yeats C anon Yeats C Repeat Associated with Pfam:PF02969 in This repeat is found in some Plasmodium and Theileria proteins. 19.50 2.30 19.70 2.90 18.70 -999999.99 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -6.43 0.77 -6.36 0.77 -2.75 14 54 2009-09-16 13:37:02 2004-05-28 11:45:45 6 9 8 0 38 51 9 13.60 30 9.06 CHANGED pphccVcssYppLp ..ppacpVpssYpplp... 0 12 31 34 +7541 PF07710 P53_tetramer P53 tetramerisation motif Bateman A anon Pfam-B_782 (release 3.0) Motif \N 20.70 20.70 20.90 21.10 20.20 20.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.89 0.72 -4.86 28 335 2009-09-11 15:49:44 2004-06-01 14:29:59 6 11 99 129 105 383 0 42.20 46 9.30 CHANGED KK++s....tDcEhFTLplRGREpYEhLpKlp-uLELt-hh.sppps .............K++s............D-E.h.....ahL..plRGREp...aEhLhKlpEuLELh-hl.sppp.h........ 1 12 18 42 +7542 PF07711 RabGGT_insert Rab geranylgeranyl transferase alpha-subunit, insert domain Finn RD, Bateman A anon Pfam-B_20675 (release 13.0) Domain Rab geranylgeranyl transferase (RabGGT) catalyses the addition of two geranylgeranyl groups to the C-terminal cysteine residues of Rab proteins, which is crucial for membrane association and function of these proteins in intracellular vesicular trafficking [1]. This domain is inserted between Pfam:PF01239 repeats. This domain adopts an Ig-like fold and is thought to be involved in protein-protein interactions and might be involved in the recognition and binding of REP [1]. 21.10 21.10 21.40 22.30 20.40 20.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.45 0.72 -4.19 4 58 2009-01-15 18:05:59 2004-06-02 17:26:25 6 16 37 3 24 51 0 98.50 59 19.56 CHANGED -hIpCVaVSR-EupVoVuFS+PVts..tpssLlLhLDspP.tVpWRoscsR.++S.lWlCDLPssuIsDpsspHshpVhWT-tcs++-CsLYsGppEsWCRDSA .............-uLRCLHVSR-EACLoVuFSRPllVu.sts-sLLLhVD-SPLhVEWRTPDGRNRPS.HV..WLCDLPAuSLNDphPQH.oFpVlWTuu-spKECVLhpGRpEuWCRDS.................... 0 2 4 10 +7543 PF07712 SURNod19 Stress up-regulated Nod 19 Studholme DJ anon Pfam-B032880 release 13.0 Domain \N 25.00 25.00 27.90 29.40 21.30 23.20 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.28 0.70 -6.02 3 106 2009-01-15 18:05:59 2004-06-03 14:42:03 7 3 29 0 66 84 1 309.50 44 88.67 CHANGED FLKTEc+IKSAVFaSPKlELuPGSVSN+aYYDIDFPRGHIuLKuFsAEVVDEAG+PIPLHETYLHHWlVVRYHQsKsScpsppphF....................IaVRNuGlCQusoLGQYFGLGSETRGTuTYIPDPYGIEIGNPEEIP-GYEEKWLlNIHAIDTRGVEDKpGCIECKCDLYNVTlDEYGRuIRPsYKGGLhCCYDpTQCKLKcGF.pGPK.RSLYLKYTV+WVDWDcYllPVKIYIFDVTDTLKlSD+SKGsss-HsCKlEYEVEPCsTNshpcsGCVDVKKsSLPMQsGGYVIYGVAHQHSGGIGSTLYGQDGRVICoSIPKYGNGcEAGNEsGYIVGMSTCYP.PGSVKIhDGETLTLESNYSNslcHTGVMGLFYlLVAEQLP.Pcl ................................................................................................................................................................lpotsFhSsth.hsPGplss+hhhslcFP+GHlulKuFsuElV..D...p..tG..s....slPLaETYLHHWhh.t..Ya...h...s......p.....................t...h......................hhh+NpGhCptthl.pa.aGhGuEoRtTso.lPsPaulEhGNPtths..p.GY-E+WllNlhsIDTRG.s...D+..huChEC+C.......D..............h.......YNV.ThD..............sp..............lp....................sY+GGLhCC.DthpCph.pp...GF......pu....p....Rplh.L+YslpWl-Ws.p.h..lPl.+..lYIhDsT...Dp.......ps.p................psCp.......hEYp...l..t.sssst....s..s.csppsp.hsh..................pGG..................llYGsuH.HsG........shsuoLaG.p.................DG.RhlC.....sShP......pYG.....sG.pEuGNEtGYlVGMSsCYPp...PG...ol.+ltcGEhLsl.opYss.p..t.pTGVMGhFYlhlA-......t................. 0 7 58 64 +7544 PF07713 DUF1604 Protein of unknown function (DUF1604) Yeats C anon Yeats C Family This family is found at the N-terminus of several eukaryotic RNA processing proteins (e.g Swiss:Q8N3B7). 25.00 25.00 25.30 25.30 20.00 19.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -9.96 0.72 -4.19 20 248 2009-01-15 18:05:59 2004-06-03 14:42:41 8 7 214 0 185 248 1 86.30 55 10.82 CHANGED pchlPlacQpVpDEcGR.+RFHGAFTGGFSAGYaNTVGSKEG....WsPpTFpSSRppRus....sp..sppsEDFMDEEDlu-ht.tspplpoppcF..s ............p.p.lPlhcQpV+DEcGR.+RFHGAFTGGFSAG.........YFNTVGSKEG...............WTPoTFsSSRpsRAc............pp......pp+PEDFMDEEDluEht...ts+plpTsspF............ 0 70 105 150 +7545 PF07714 Pkinase_Tyr Protein tyrosine kinase Studhome DJ anon Unknown Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.54 0.70 -5.32 145 24806 2012-10-02 22:05:25 2004-06-11 14:25:16 12 1823 1048 846 13827 124495 4199 232.30 26 33.84 CHANGED lpht.cpLGpGsFGpVacGphp...................ttsphpVAVKsL..+...p..sssppt...hpcFlp.EuplMppl.pH.plVpLhGlss...........ptpP.....hhllhEahptG...sLhsaL+pp..................................................................................................ptplshtp.LlphuhQlApGMpYLps+p..hlHRDLAARNlLls.....c.............st.hlKIuDFGLuRtl.........pps.Y..ptpssuc..hPl+Wh...APEulptsp..........aospSDVWSFGVlLWElhohGp.pP..............Yssh.ssp-........lhphl.cpGhR..hsp.PptCPsclY.pl.MtpCWptcPppRPoFppl...hptL ...............................................................................................................................................................................................................................p.lG..p....G....t.F..G....t...V..h..t...u....h.................................................t.h........V.......A.......l.........K..........h..........l.........p...................t....................t...t............t............t..........t................................................p...........p...................F.........h.................p...........E.................h...........p...........h..............h.................p.............p................l................p...................H.............................N...............l...............l...............p...................l............h.................G.....h....sh.................pt.t.......................................h.h..l...l......h........E........a........h........t....t....G.................s...L....t......p......a......L....p...p..p.......................................................................................................................................................................................................................................................t..t.........l....s.....h.....t....p.......h....l........p.....h.....s.......h....p.........l...........A..........p..........G....h........t...........Y..........L.........c.................p.................p..........p...................h...........l.............H...............R................D.........L..............t...........u..............p...........N...........l......L......l...s....p........................................................sh....h....s.....K......l......u.......D.......F.....G.........l...u......+..h...........................................t....t......h...................h....t......t.......t...........s........h.........h.............s........h................t...............a......h..................A.....P.........E........h....h.........t..t.h.........................................h....o.....t......p......o.........D.....V.........a.................S..............F.............G...............l....l.........l..........h......E..........l.............h......o..................s.....t........P.....................................................a............t........h......p.......t.....t....p...........................h....h.....t........h........l..........p..........p.........s............h........p....................h...................h...........s................................t...........s...........s.............................t..............h..........h.............p........l..........h..............h........t.......C.............h.......p.............p..s...p...p...R.P..s.htplht....................................................................................................................................................................... 1 4020 7259 10608 +7546 PF07715 Plug TonB-dependent Receptor Plug Domain Yeats C anon Yeats C Domain The Plug domain has been shown to be an independently folding subunit of the TonB-dependent receptors ([1]). It acts as the channel gate, blocking the pore until the channel is bound by ligand. At this point it under goes conformational changes opens the channel. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.23 0.72 -3.72 742 36469 2012-10-03 10:59:06 2004-06-15 15:23:27 10 123 2367 66 9642 33866 11001 109.70 22 13.45 CHANGED shpssstulss......lstcp.lppp..s.sssls-sLp......t..hP......Glsh.........................sssssss...s.....s..sh...........slRGh.....ss....................spshlhlD.Gh.h..............................tts...........shs..t..l.s.....s.....t..sl-plEVl+.Gssos.lYGs.suhGGl ...................................................................................................t...phstslss.....l.s.t.c..p...l.ppp....s...ss..s...l...s..-..s.Lp...................t...hP...........Glsh.................................ssss.sss....s...........s....pl.................plR...Gh.....ss...................................sps.hlhlD...G.l.h...............................................tts.................shs....t..l.....s........s.......s..sl.-..p.l-Vl+..G...s.s..u.s..lY.G.s...s.uhuGs............................................ 0 3204 6758 8441 +7547 PF07716 bZIP_2 Basic region leucine zipper Studholme DJ anon PfamB-200; Release 14.0; Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.57 0.72 -4.14 48 1966 2012-10-02 13:17:30 2004-06-16 18:28:29 10 24 376 25 1196 6475 28 53.70 29 17.49 CHANGED pssphpp+Rc.+NNtAu++SR-+++t+ppphpp+ltpLpccNppL...cpclppLcpE .................pphtt+.R....c..+N.s...A..A++SR-+++...h+p..p.......p...l.......p...p.......c.......l....tp.......Lp...p-.Np.pL..........cpclttLpt.......................................... 0 353 569 934 +7548 PF07717 OB_NTP_bind DUF1605; Oligonucleotide/oligosaccharide-binding (OB)-fold Yeats C anon Yeats C Domain This family is found towards the C-terminus of the DEAD-box helicases (Pfam:PF00270). In these helicases it is apparently always found in association with Pfam:PF04408. There do seem to be a couple of instances where it occurs by itself - e.g. Swiss:Q84VZ2. The structure PDB:3i4u adopts an OB-fold. helicases (Pfam:PF00270). In these helicases it is apparently always found in association with Pfam:PF04408. This C-terminal domain of the yeast helicase contains an oligonucleotide/oligosaccharide-binding (OB)-fold which seems to be placed at the entrance of the putative nucleic acid cavity. It also constitutes the binding site for the G-patch-containing domain of Pfa1p. When found on DEAH/RHA helicases, this domain is central to the regulation of the helicase activity through its binding of both RNA and G-patch domain proteins [1]. 21.40 21.40 21.50 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.61 0.71 -4.25 118 5209 2012-10-03 20:18:03 2004-06-21 13:22:17 11 143 2032 5 2747 4906 191 134.10 22 12.53 CHANGED pQLtplh...................................................pch...phph..........p.........................................................pp.phl+pslsuGha.plA.....................................thp............pp.....sp..Ypsh................................tpsp.hlhlHP.uSs...l...........hppps.c.....................allapEllp.............T........s+..ah+s........................sotl.pspWLhth.u...sph ...........................................................................................................................................t.QL.thh.................................tphth...p.....................................................................................................................................sshptl+pulhu.Gh.hs.plu...................................................php...........................pp...tp...Yhss.............................................csp....phhlHP..u..Ss.....L..........................hcp..t.P.c....................................................a..llatElVp.......................T..............................o+...ha..h..R.s.............................sstl..cPpWl..l..usp....................................................................... 0 873 1456 2200 +7549 PF07718 Coatamer_beta_C DUF1606; Coatomer beta C-terminal region Yeats C anon Yeats C Domain This family is found at the C-terminus of the coatamer beta subunit proteins (Beta-coat proteins). This C-terminal domain probably adapts the function of the N-terminal Pfam:PF01602 domain. 25.00 25.00 36.20 30.30 23.60 22.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.55 0.71 -4.53 25 351 2012-10-03 16:25:20 2004-06-21 13:22:31 7 9 301 0 255 354 8 138.40 49 15.21 CHANGED QsDDhIsF+QLpu+putsssp...sh-sDLs+AT.G.sssspp.chs...p.....+Ls+VhQLTGFSDPVYAEAYVsV+QYDIlLDVLlVNpTs-TLQNLolELATlGDLKLVERPpshTLuPtsFpsI+AsIKVSSTETGVIFGsIVY-ssu ..................................................................QsDDsIsFtQLstcssh..s..stc.............thp.sLtt.Ah.G......ssppp.p.shs..............SKLs+.V..sQLTGFSDPVYAEAYVpVpQaDI................VLDVLlVNQTs-TLQNlolEhAT.LGD..LK.lVE+P.....s.shs.LuP+-Ftslc.................AslKVoST-sGVIFGNIVYDsss........... 0 90 147 213 +7550 PF07719 TPR_2 Tetratricopeptide repeat Studholme DJ anon Context matches from Pfam 14.0 Repeat This Pfam entry includes outlying Tetratricopeptide-like repeats (TPR) that are not matched by Pfam:PF00515. 27.00 13.00 27.00 13.00 26.90 12.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.73 0.74 -7.75 0.74 -3.60 369 18395 2012-10-11 20:01:00 2004-06-21 18:10:22 12 4257 3733 24 7965 120483 33152 32.50 19 6.62 CHANGED spshhthutshhptsch.pcAhptapcAlplssss .........................hhhthGt.h.h.h..p.t..s..c...h..p...cAhp...h.appAlpht.................. 0 3026 4971 6601 +7551 PF07720 TPR_3 Tetratricopeptide repeat Studholme DJ anon [1] Repeat This Pfam entry includes tetratricopeptide-like repeats found in the LcrH/SycD-like chaperones [1]. 20.70 11.00 20.70 11.00 20.60 10.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.19 0.72 -7.60 0.72 -4.11 18 1102 2012-10-11 20:01:00 2004-06-22 13:19:31 7 26 538 52 97 1589 293 33.40 29 26.08 CHANGED h-hlauh...AtshatptcappAhph....hhshlptps ................h-hlhua.............Ahshpp.p.tchpcAtsh..at..hhhhhsh.s.............. 0 34 50 73 +7552 PF07721 TPR_4 Tetratricopeptide repeat Studholme DJ anon Manual Repeat This Pfam entry includes tetratricopeptide-like repeats not detected by the Pfam:PF00515, Pfam:PF07719 and Pfam:PF07720 models. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.82 0.74 -7.35 0.74 -2.96 143 472 2012-10-11 20:01:00 2004-06-22 16:56:25 9 256 297 0 244 3990 1150 25.00 25 5.32 CHANGED ststhshApshhttGchspAtshlcp ..........hhhsLAtshhtpGchspApphhc.......... 0 84 180 210 +7553 PF07722 Peptidase_C26 Peptidase C26 Studholme DJ anon MEROPS Domain These peptidases have gamma-glutamyl hydrolase activity; that is they catalyse the cleavage of the gamma-glutamyl bond in poly-gamma-glutamyl substrates. They are structurally related to Pfam:PF00117, but contain extensions in four loops and at the C terminus [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.30 0.70 -4.90 45 3232 2012-10-03 00:28:14 2004-06-23 11:09:18 8 15 2292 12 825 18376 10620 205.10 31 78.01 CHANGED PlIGlsusht..htttshtshst.pYltuth.pulptAGGlPlllPht..sspshpphlstlDGllloGGt.sVcPphYutpsp.tpss.hsssRDthclsLlctAlppshPlLGICRGhQhlNVAlGGoLaQclpt....t..thht........p......htssHslplp........uohLuplh.....s..sc...hpVNShH+Qulcc........LussLpVpApus.DGllEAlps.stp.hFslGVQWHPE ................................................................................................llG.lss.s.............................................t.......t....h.........pa.ls..tthhphl.t.t.u..GG.l.Pl...l...l..........P.......h........s.................s...........p.......h.......h....p.....p...hl...p..h..l..D..GllL...sG...Gt.sV....p...P..p...h...Y........G.................p..p..................................t........................p................t........s................s......h.....s.......t........R.......Dth...-...l.s.L...l.c...t....A...l.c...p.......p.......hP...lh..uICRG.hQ..l...........l...N........V.....A.......h...G...G..o..L...a..Q..c....lp..p...........t......h.....t...p.hp.p.......................................p.................hs..s..H.p...l...p.l.p..............................................ts...o...h...L.t..p.lh.......................................s....pp.....h.h.V..N...S...h..H..+...Q..u..l..c.p.........................l.u..s..s..L...c...l...p...A...h............u..........s......D.....G......l......l......E...A.....l.....p.....s.......h....s.....t....t......hhl.GVQWHPE.................................................................................................................... 0 299 509 675 +7554 PF07723 LRR_2 Leucine Rich Repeat Studholme DJ anon PfamB-152 (release 14.0) Repeat This Pfam entry includes some LRRs that fail to be detected with the Pfam:PF00560 model. 20.90 9.50 20.90 9.60 20.80 9.40 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.64 0.73 -7.26 0.73 -3.24 161 389 2012-10-02 21:32:02 2004-06-23 16:09:48 8 33 40 0 269 479 1 25.70 36 6.48 CHANGED sLKsLpL.pp...lta....ssctslppLlSuCP ...........sLKoLpL.pp...Vpa.......ssctslppLlSuCP........ 0 142 155 160 +7555 PF07724 AAA_2 AAA domain (Cdc48 subfamily) Studholme DJ anon PfamB-40 (Release 14.0) Domain This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.94 0.71 -4.35 139 20963 2012-10-05 12:31:09 2004-06-23 17:05:00 9 82 5116 83 4876 26856 10382 171.50 36 26.68 CHANGED cPhushlhhGPTGsGKTp.........Lu+sLAphLh..............spppshhphDMSEahE........c+slu+LlGussGYVGh-puG.Lo-tl....tppPasllLlDEIEKAcs...............sV.phLLQll-suplo.........-spG.ppVch+NsllIhToNhuupthtp........................................hpt.h.t.lpthahP.EFlsRls.h .........................................................................................................................................hushl.hlGPTG..s.G.KTc...........................LAc.sL.AchLh....................................................................................................................................t.tpsh..hp..h.......D..h....o....E.ahE...................................c....p...s.......l.......s.....+...L...l...G...u.......s.........s.....G.......h......V.......u...h.......-..c...sG...................................................tllllDE..I-...Kups........................................s....V........p......h.LL...p.ll...-....s....u...p..lo.........................................................................D.sp.G....hpV...D..h.....c..N....s..l.h.I.h.Tu.N..huutp..h.p....h...........................................................................................................t.t...hp.ts...p.....l.p....h.h.....h.....P.........EFlsRls............................................................................................................................................................ 2 1633 3130 4111 +7556 PF07726 AAA_3 ATPase family associated with various cellular activities (AAA) Studholme DJ anon PfamB-40 (Release 14.0) Family This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.70 0.71 -4.54 55 4128 2012-10-05 12:31:09 2004-06-24 10:26:01 6 8 2112 1 1431 6260 2158 130.60 58 39.23 CHANGED HlLlEGVPGluKThls+sLApslshsFpRIQ.FTPDLLPuDlhGo.plactcs....ppFcFctGPlFusllLADEINRAPPKTQSALLEAMpE.+QVTlsGpTasLPcPFhVlATQNPlEpEGTYPLPEAQLDRFlh ....................................................................................HlLLEsVPGluKThh...s+...sL...A....p.s...l.....s.......h.............s..............F...p....R....I...Q....F.....T...P.....D..L.........L...P...u........D.....l....s..G........s......p......l....a...s..t..ps...................t.p..F......c......F........c..........G....P.......l......F............s........N...........l...........l.....L..A.........DEINR.....A.....s....P.....K....T.....Q.....S...A...L...L...E..A..M.....p.....E..+.....Q.....V......T.....l......s.....G....p.....T.....a.....s...L...P.....p.....P...F.....h.V...l.A.T.Q.N.P..l.......E.......p.......E........G.T.Y......P.LPEA..QLDRFlh..................................................................................... 0 512 1017 1260 +7557 PF07727 RVT_2 Reverse transcriptase (RNA-dependent DNA polymerase) Studholme DJ, Eddy SR anon PfamB-40 (Release 14.0) Family A reverse transcriptase gene is usually indicative of a mobile element such as a retrotransposon or retrovirus. Reverse transcriptases occur in a variety of mobile elements, including retrotransposons, retroviruses, group II introns, bacterial msDNAs, hepadnaviruses, and caulimoviruses. This Pfam entry includes reverse transcriptases not recognised by the Pfam:PF00078 model. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.61 0.70 -5.41 99 7027 2012-10-02 12:54:00 2004-06-24 14:06:22 9 498 350 0 1834 6948 486 136.70 27 23.93 CHANGED NpTWcLVshPpsp.+slGsKWVF+hKhsscG.slpRaKARLVA+GasQppGlDYp..ETFuPVs+hsolRhlLulAuppsaplcQhDV+oAFLpG-LcE-..lYMpQP.GF.spsp......sspVC+LcKuLYGLKQAPRtWap+hsshLhphGFpputsDssla..h+ppsssh.lhlllYVDDlllsusspphlpphKppLspcFcM+DLGthpaaLGlElp+..sspGlhLoQppYspcl.LpchsMpss+..PssoPhsss ........................................................................................................................................................................................................................................................................s.salp..u.....l...p..-..p....l..ah.....t.......P..uh.................................t..hh.h..h.L.......+u.lYG......L+.Q.......u.s....t....W.....p.h...t..t.h.h.....t....th..s......p........s.h....a....h..............t....t................h..hl.hl......aVDD.h........................................................................................................................................................................................................................................................................................................... 1 611 910 1250 +7558 PF07728 AAA_5 AAA domain (dynein-related subfamily) Studholme DJ anon PfamB-136 (Release 14.0) Domain This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.54 0.71 -4.45 34 10563 2012-10-05 12:31:09 2004-06-29 09:05:52 9 237 3591 15 4659 65399 18925 134.50 18 14.25 CHANGED sllLlGsPGsuKohlsppluttl.....spshhhl.hspshots-lhtthph.......ssttthhsuslhpAsp..........cuplshl-Els+sp.sslhssLhslLsc+phhh.pssthhtst...........................hpllusuNshs.......hthsphssALtpRF ..................................................................lhlhG....sGs....GKo...t.....l...s..c......t..l....u..thh............................st..........h......h....h.......l..............h.......s........t..........t..........h.......s................p......c....l.........h...G...t...h....s...............................t..p..h..t....h...p....s...u.......l...h..p..s.hp...................................................................................ps..h....l....l....h....l..D.E.l..........s...........h.............s.............t.....s..........p..............h..........h.......s......s..........L......s....l....l.......c....p....p....p.....h....h....h.......t...t.....s....t......h...t..h.................................................................................................hhl..l.s.s.h.N.sht..................h..t..uhhpR...................................................................................................................................................... 1 1765 2764 3880 +7559 PF07729 FCD FCD domain Bateman A anon Pfam-B_117 (release 14.0) Domain This domain is the C-terminal ligand binding domain of many members of the GntR family. This domain probably binds to a range of effector molecules that regulate the transcription of genes through the action of the N-terminal DNA-binding domain Pfam:PF00392. This domain is found in Swiss:P45427 and Swiss:P31460 that are regulators of sugar biosynthesis operons. It is also in the known structure of FadR where it binds to acyl-coA, the domain is alpha helical [1]. This family has been named as FCD for (FadR C-terminal Domain). 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.55 0.71 -3.80 317 20783 2012-10-01 22:11:53 2004-06-29 14:06:45 7 44 3128 16 5347 14681 1573 125.20 20 52.31 CHANGED -...l...h...ph...RthLE.....stss.p..hAs..pphss...........pplpplpphh.......pphppshp.....tsch.t.............hhphctpFHttlhptusNthlt...........p..hhppltttht.h..hhthth................stpphppshpc..Hp.pllcAlts.pDsctAtph........hppH.lpt ........................................................................plh.ph...RthlE......stss...p....hAA....tp..t.os.........................pp.l.t.p..l.pphh.............p.p...h..p..p..s.hp...........................ttc.htt...............................................hhp..h.-..hpFHt.tlh.p.s.ut..N..h.Lh................p...hh.p.p.l.hshhp..t.......hhthhht............................ptpt.h.p.p...h..hpp......Hp.tIlcA.l..t.p.p.D.s...c.t..A.pps...........hppHlt.t.................................................... 0 1367 3132 4302 +7560 PF07730 HisKA_3 Histidine kinase Studholme DJ anon BLAST Domain This is the dimerisation and phosphoacceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.27 0.72 -3.63 191 12598 2012-10-11 19:05:54 2004-06-29 14:17:48 8 320 3309 15 3406 10118 555 67.60 26 14.71 CHANGED ERpRIARELHDsluppLsslthphphhpthh.....ppsst...........psppplpplpphsppuls-lRplltsLR..sssh .............................ERpRlARELHDslup..pLssl.p.hp.h...p..h.hpphh.......ppsst.............................psp.ptlpplpphsppuh...pcl.RplltsLR.s..................................................... 0 1258 2513 3067 +7561 PF07731 Cu-oxidase_2 Multicopper oxidase Studholme DJ anon PfamB-49 (Release 14.0) Domain This entry contains many divergent copper oxidase-like domains that are not recognised by the Pfam:PF00394 model. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.81 0.71 -4.67 127 6906 2012-10-02 17:41:00 2004-06-29 16:37:37 9 88 2541 150 2712 7112 437 134.00 20 24.40 CHANGED ths.pssshhph................................t...hhssspshslphsshlclhh.s........s.............................sHPhHLHGasFhllspssssh............................sh.ssspRDTltlss....suhssltaps...D.NPGsWhhHCHlt..hHhstGhhhhhh.tsts .........................................................................................................................th....................................................t.t....h.sstph..h..t.l..p....h....s...p...h.h.c.h..h..h..s...........s.h...................................sHP.hHl...HG..h.p...F.p..l..l..sp.....ss.s..................................................................................p..s..s.h.+D..T..l..t......l....s...................ssh...s..tlthph......................-..ss.........G....s.Whh......H......C.................H.....lh........H..hs.......t..Ghhh.hhh....t.................................... 0 653 1565 2232 +7562 PF07732 Cu-oxidase_3 Multicopper oxidase Studholme DJ anon PfamB-43 (Release 14.0) Domain This entry contains many divergent copper oxidase-like domains that are not recognised by the Pfam:PF00394 model. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.50 0.71 -4.37 121 9918 2012-10-02 17:41:00 2004-06-30 09:10:05 10 110 3033 407 3010 9791 561 100.50 30 22.68 CHANGED plphsshshhsts...ppshslNGphPGPslpsppGDplhlpVhNpl...sps.solHWHGlpphtssh.DGssulTpCPItP.GpsasYpFplpp..tGThWYHuHhsh...p...sGlaGslllpspts ...................................................h..................hhhh.s.....s...t.....h.........G....P......h....l......php...............G........-t.l.............lph....h.....N..............ph..........s..................p....................s.......h............o....l..............HW.................H........G.....l............h...............h...........s............s.............................D...........G..........s........s..................t....................h............s...........p............s..........s..............I.......t.......P..........G.........p.........s.........a....s....Y..p...F........p.....s........p.........p......................s.........G.........T............a..........W.........YHuH..t...............p.h..tGl.hG..hllps................................................................... 0 695 1662 2404 +7563 PF07733 DNA_pol3_alpha Bacterial DNA polymerase III alpha subunit Studholme DJ anon Pfam-B111 (Release 14.0) Family \N 22.00 22.00 23.70 22.40 21.20 20.60 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.23 0.70 -6.04 89 7657 2009-01-15 18:05:59 2004-07-01 10:32:47 7 67 4508 9 1594 6277 5227 405.60 35 36.47 CHANGED phLpchshpGl..ccRh.................................h.pRLchELslItph......GFssYFLIVtDhlpaA+p....pGI.VGsGRGSuAGSLVAYsLsIT-lDPlp...........................................asLLFERFLs.ERsshPDIDlDFspc+RpcVIpYVtc+YGc......-pVAplsTauThtuKuAlRDVuRsh.......shs.s.hstlsKhls.........sh............ptshp.p........phpphht.p..........phpclhclAppl...-GlsRpsupHAuGlVI.usp.sLs-hsPlhhssps..........................tthlTQa-hcs.lEshGLlKhDhLGLcsLThIcpshchlp.................tp.......tth............h.....s............l..tsIP..h-Dt.psac.hlspu................cThGlFQlESpuhpshLpcl+PssFpDllshsALhRPGPhp.......t.lpsalpR+pG......ptlpa.ps.........lc.lLppTaGlllYQEQlMplu..phhAGaohGcADhLRRAMuK.Kc.p.htc.+p.....pFhpGhhcpu ................................................................t..LtchshpGh..cc+h...........st......tt...................................................hpcRL-hELslItph........GFs.sYFLIVh-hlpau+s...........................p.Gl...V.G.sGRG.SuAGSlVAYsLtIT-.lDPlp.....................................................................................................a.sLL.F..ERFLNsE......R....s......shP.DIDlDF.s.t..........c.p.....RscVIpYV.tcpY...G..c........-.pV..uplhThGTh.tu.......K.ssl+.DVu+sh...............sh.s.t.hstl..s+h.ls.t...............sl...................tthht.p.........................................ph..t..p..h..h..t...t.p...........................................cspclhchAptl...-.G.lsRpsuhHu........uGlll..ssp.s.......lh-hsPl.hssps.............................th.lTQ.a.-hps..lE......t.hGLlKhDhLGLcsLThIchshchlp............................................................tp....................t...........h.....s.....................l...pp..I..P..........h-.Dt...p.......s...ap..hhppu.....................pThGlF..QhE.S.puh+phLc..c.h...........+...Ps...........s..........Fp....Dllsl....s...uLhRPGPhp.......s..lps....al.cR+pu...............p........l......p.h.ps.................lc.lLt.....p....ThGlllYQEQlMplu..t..hhuGaohGtAD.hLRRAMuK..K....p.....p...h.tc.....+p...tFhpGhhpp..................................................................................... 1 562 1068 1361 +7564 PF07734 FBA_1 DUF1607; F-box associated Studholme DJ anon Pfam-B128 (Release 14.0) Family Most of these proteins contain Pfam:PF00646 at the N terminus, suggesting that they are effectors linked with ubiquitination. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.44 0.71 -4.55 99 950 2012-10-01 23:07:08 2004-07-01 11:05:26 8 29 37 0 484 1107 0 142.20 20 39.04 CHANGED uVSLKGNTYWhAp.........ccppt.......................a.llsFDFosE+...Ft.shlsL..Paptt..............sshsLS.sVR.-E.+LulL..hQ.......ptpo.p......hEIWlTs.K..I-ss......sVSW..spF.Lpls.hpsh......sth.............ssSFF.lDE....-K.Klsls..hc...pptppt..............shhYIl.G.cs...thhpcls.......................hhPhhh..s.YVPS ..............................................................l.lpGshYWhup.........cppp....................................................h.llsFDh.s.s.Ep...F..t.h..l...L..P.hptp...................thphhsL....h.ht....p-..pLs.hh....hp............p.p..s..p.......h-IWlhs..c....hcss......psSW.....oKh..h...sls..h..h......................................h................................................................................................................................................................................................................... 1 160 208 232 +7565 PF07735 FBA_2 F-box associated Studholme DJ anon Pfam-B128 (Release 14.0) Family Most of these proteins contain Pfam:PF00646 at the N terminus, suggesting that they are effectors linked with ubiquitination. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.54 0.72 -4.12 141 1899 2009-01-15 18:05:59 2004-07-01 17:02:22 12 25 5 0 1889 1923 0 70.40 21 21.44 CHANGED tpt.phppll...spNh....pplph.........t.phsLs-LL..hhNssplpl..tp.s.....lo.sc-lNlFLKH..WlpG.usscLchhplp .....................................................ph.........pplplt.............psphlsl.-.cL.L...th..s.sp.p.l.p.l.....pp..sp......lo..sc-l.NpFlKp..W.h..p...G...sp.s.pLchlpl.t................... 0 176 183 1889 +7566 PF07736 CM_1 Chorismate mutase type I Bateman A, Griffiths-Jones SR anon Bateman A Domain Chorismate mutase EC:5.4.99.5 catalyses the conversion of chorismate to prephenate in the pathway of tyrosine and phenylalanine biosynthesis. This enzyme is negatively regulated by tyrosine, tryptophan and phenylalanine [2,3]. 21.20 21.20 21.20 35.40 21.10 17.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.31 0.71 -10.46 0.71 -4.01 57 434 2012-10-01 19:40:00 2004-07-07 16:22:31 6 2 424 49 151 378 287 116.20 43 92.42 CHANGED lRulRGATTlspNosctIhpAspELlppllpcNslps.--llSlhFosTpDLsAsFPAtsARph..GW.psVPLlsspEhsV.GuLs+CIRVLlahpospsp..tclpHVYL+sAtsLRPDls ....hRuIRGATTl-pNst-pIhpATpELlpcllppN.plps.-DllSlhFTsTpDLsAsFPAtAsRph.......G.a.ptVPLhsspEhsVsGuLs+CIRlhlpssocp.sp..pcIpHVYL+sAtsLRPDL.s..... 0 67 120 140 +7567 PF07737 ATLF Anthrax toxin lethal factor, N- and C-terminal domain Andreeva A, Bateman A anon Pfam-B_23800 (release 14.0) Domain The C-terminal domain is the catalytically active domain whereas the N-terminal domain is likely to be inactive. 21.40 21.40 23.10 22.90 20.40 19.80 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.60 0.70 -4.96 4 70 2009-09-11 08:46:23 2004-07-07 16:59:27 6 7 32 77 5 78 0 187.20 36 41.50 CHANGED hK.-hhpplV+l-lps-phsKtptspcLlcKlPp-VLEhY.pslGGcIYlsDt-lspHhpLpslScccphlhsupGcph.htp+FVFsKtshpPhLlIps.cDYs.sp.pShpVYYElGKuI.hDhhS.pthcLtsP..cFlshlsplsst..........DpDupDLL.....FspphKE....cspslslsalcpN.sEFQcsFAcAFuhYapPcH+-sLclY..APchFpYMschpchthp .........................................K.-.h.p.sphph.spphsKh.th.chhchls.sVhphY.ptlstphYhh..-hhpphp.p.lpc.pp..hs..G.......+aVatc.t..shh.....pD.h.pp.puhtlYh..u+.l.hch.S....pl.ps..tFlp.htph.s...........DpsupDLl.....FpphhKE....cspsl..s.l.s....alcpN.sEF.chFAcAFuYY.a.sP..D........H+shLcla..APcsFpYhschpK..hp...... 1 1 4 4 +7568 PF07738 Sad1_UNC Sad1 / UNC-like C-terminal Wood V, Finn RD anon Pfam-B_5052 (release 14.0) Family The C. elegans UNC-84 protein is a nuclear envelope protein that is involved in nuclear anchoring and migration during development. The S. pombe Sad1 protein localises at the spindle pole body. UNC-84 and and Sad1 share a common C-terminal region, that is often termed the SUN (Sad1 and UNC) domain [1-2]. In mammals, the SUN domain is present in two proteins, Sun1 and Sun2 [1]. The SUN domain of Sun2 has been demonstrated to be in the periplasm [2]. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.70 0.71 -4.37 25 1140 2012-10-03 19:46:52 2004-07-08 10:09:58 8 33 300 4 758 1131 30 126.30 24 15.87 CHANGED hshtupsspsllpps...hhsusCauhpuspsalslcLuctIhlpslsltHhpp.......h.SSAPK-FpVhuppphsptp.......hhhLGpFph.stsspshQoFplpss..t.......hhchV+lclhSpa....GsstasslhtlRVaGps ..............................................................................................h....ttsspsll.p.s...hh.ssC.h.s.....h...p.......s......p......p.....s...a.l......s.lcLsp.I.hssslslpH.hth...........h.sSuP+cF.plh.....u..................p.p..sspp.........................hh.hL..G.p.....aph...s....p..s..s....p......s...l.....Q..s.F.t.l...p...pt..........................hhphlclc.lhSpa.....Gp..ta..ssl...t.hRVaGp............................................... 0 233 353 597 +7569 PF07725 LRR_3 Leucine Rich Repeat Studholme DJ anon PfamB-184 (release 14.0) Repeat This Pfam entry includes some LRRs that fail to be detected by the Pfam:PF00560 model. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.38 0.73 -6.54 0.73 -3.89 74 609 2012-10-02 21:32:02 2004-07-26 15:33:50 7 122 16 0 284 634 0 19.90 51 2.28 CHANGED pLVcLphttS.pL.c+LW-Gsp ....LVELshppS.pL.EKLWcGsp... 0 96 177 191 +7570 PF07739 TipAS TipAS antibiotic-recognition domain Bateman A, Finn RD anon Pfam-B_8785 (release 14.0) Domain This domain is found at the C-terminus of some MerR family transcription factors. The domain has an alpha-helical globin-like fold [1]. The family includes Mta a central regulator of multidrug resistance in Bacillus subtilis. 23.70 23.70 23.70 24.00 23.40 23.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.49 0.71 -3.78 164 1938 2009-10-27 17:45:06 2004-07-27 13:56:40 8 15 1353 3 329 1455 73 113.30 19 48.65 CHANGED paspEspp+aGp...pthpt.ttp............thsc.pchpphppp....hpplhppltphhpps..sssupcspplscpapphl.s....pahs.....hstp........hhtsL...uphY...hsD.t+Fpphhc.ph.....st.Ghu.palpcAlptas ........................................................atpEh.pcaGp...p.htp..pc...................sp..pc.pphppp........hsplhpplsphhppt...sssupcsQphspcahphlp....phhs.........hs.c.........hhst..l...uphY..............hs-...sc.Fpp.....h..s..ph.............ss..uhu..pals-Altha.......................................... 0 107 213 277 +7571 PF07740 Toxin_12 Ion channel inhibitory toxin Bateman A, Finn RD anon Pfam-B_20319 (release 14.0) Domain This is a family of potent toxins that function as ion-channel inhibitors for several different ions. Omega-Grammotoxin SIA is a VSCC antagonist that inhibits neuronal N- and P-type VSCC responses [1]. Huwentoxin-IV, from the Chinese bird spider, is a highly potent neurotoxin that specifically inhibits the neuronal tetrodotoxin-sensitive voltage-gated sodium channel in rat dorsal root ganglion neurons [2]. Hainantoxin-4, from the venom of spider Selenocosmia hainana, adopts an inhibitor cystine knot structural motif like huwentoin-IV, and is a potent antagonist that acts at site 1 on tetrodotoxin-sensitive (TTX-S) sodium channels [3]. Study of the molecular nature of toxin-receptor interactions has helped elucidate the functioning of many ion-channels [4]. 21.00 21.00 21.00 21.40 20.60 20.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.46 0.72 -3.46 31 254 2012-10-01 22:06:18 2004-07-27 13:59:12 7 1 24 18 1 274 0 30.60 39 40.83 CHANGED -CtthattCssss-.CC..tshsCppc....h..pa..Cta ....-CttahspCs.sps.CC..sthsCpsc....a....+W..Csh....... 0 0 0 1 +7572 PF07741 BRF1 Brf1-like TBP-binding domain Bateman A, Finn RD anon Pfam-B_18050 (release 14.0) Family This region covers both the Brf homology II and III regions [1]. This region is involved in binding TATA binding protein [1]. 25.00 25.00 27.40 26.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.38 0.72 -3.77 42 348 2009-09-11 07:42:02 2004-07-27 14:01:28 8 11 267 4 234 338 3 99.70 33 16.01 CHANGED D-.E..lcthl.LsEcEschKpplWhphNc-aLhcpcpKp.........................h+tctcp...........psstpcc+p++tpcpp..................................sssspTAuEuscphLpc+s..hSpKINY-sLcsLa ................................D-.E.lc.phl.Ls-pEs.clKpplWh..p.Nt-YLc...cQppKc..........................h+t.ttcpt.........................tthp.c++....++sp.cpct.......................................ss.usTAuEAhcpML.cc+p....hSpKINYcsLcsL............................. 0 79 129 192 +7573 PF07742 BTG BTG family Bateman A anon Pfam-B_9208 (release 14.0) Family \N 23.00 23.00 24.10 25.70 22.50 21.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.61 0.71 -4.59 31 473 2009-01-15 18:05:59 2004-07-27 14:07:20 7 7 125 8 259 429 1 113.10 44 43.74 CHANGED MptElssuVsFls+hlpp+th...LsccplphFupcLpphLhc+YpsH.WaP-pPpKGpuaRCI..RIN...cphDslltpA...............spcSGLshpcLh..LPpElTlWlDPhEVshRhGEcu.shsVhhhp .........................Mp.EItsAlsFlhphLhs+......Lscc...plphFuppLpclL.pc............+Y.c.sH.WaP-+PtKGSua.R.CI.....+Is........cph....DPlltpA................upcuGlshp-lt.t....L.P...........p............E..L........olWlDPhEVSaRlGEcG.shpVhh.p................... 0 61 82 156 +7574 PF07743 HSCB_C HSCB C-terminal oligomerisation domain Bateman A, Finn RD anon Pfam-B_6234 (release 14.0) Domain This domain is the HSCB C-terminal oligomerisation domain and is found on co-chaperone proteins. 21.60 21.60 21.60 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.46 0.72 -3.52 143 1500 2009-01-15 18:05:59 2004-07-27 14:15:24 8 10 1465 13 424 955 122 77.40 34 40.93 CHANGED htDs..tFLMppMEhREpL--httt....pst.spLpplppclppphcphhpplspth...s........ppshppAsctlp+L.......+Fhp+.lppclcp .....................s.pDssFLMcQhElREcL--lptt...........cs-.spLcshhpclcphhcsthpphsppL........-...............spsassAA-pVRKL..........+FlcK.Lpsplcp......................... 0 114 216 329 +7575 PF07744 SPOC SPOC domain Bateman A anon Bateman A Domain The SPOC (Spen paralogue and orthologue C-terminal) domain is involved in developmental signalling [1]. 21.30 21.30 21.40 21.40 21.00 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.72 0.71 -3.63 58 707 2009-01-15 18:05:59 2004-07-27 14:20:39 8 40 232 1 436 686 1 115.20 24 8.36 CHANGED WpGh.lshpshsphsspuphlsGp...ppls.ptl.........................................................PtplshpuRlchppltchhpplptst......plhllsl................ssppspts.........hpp.hhsYhps+pR.h......GVhplss.................stshYlh.P ..............................................................................WpGh..lshp....s.....s....p....hssphahlsGs.....tplh.pp.tL.........................................................s.pIs..phRls..splpchhpclpsus.st.......thhllsl................t.s..ss.tpppps...............hps.lhsYLpp+pt..s.GVh.sl.ss...tt..............stslalhP................................ 0 106 174 299 +7576 PF07745 Glyco_hydro_53 Glycosyl hydrolase family 53 Bateman A, Finn RD anon Pfam-B_5764 (release 14.0) Domain This domain belongs to family 53 of the glycosyl hydrolase classification [1]. These enzymes are enzymes are endo-1,4- beta-galactanases (EC:3.2.1.89). The structure of this domain is known [2] and has a TIM barrel fold. 20.70 20.70 20.80 21.10 20.50 20.30 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.22 0.70 -5.65 16 584 2012-10-03 05:44:19 2004-07-27 14:21:50 8 37 453 23 213 550 107 322.00 33 69.58 CHANGED h+GsDlS.lhp.........................................hEpsGss..apssNGpppDhhplL+s.sGlNhlRlRVWssPhD..............G.sDlccslphAKRAcstGM+lh.lDFHYSDpWADPu+QphPpAWssh.sh-pLpps..lYsaThssLssh.pcsGlpsshVQVGNEhssGhLWs.Gp........sssasphupLlpuGhpAV+-..........lssssKlhlHLssGts.sp.phaacpltpp..G....sDaDlhGlSYYP...aWpu..........oL.....ssLpssLpshtspYsKsVhVsETuaPao........s..sp.t....h.shPhospGQsshlpslhplVpsl........scGlGlFYWEPAWlsss.....................................h.tGsshssp.shFD..hsupsLsSlssF ..............................................................................................h+GuDlShl.p.........................................hEp.p.Gsp..ahs..t..s...G.........t...p...p.D...hhplL+s.pGlNhlRlRl.WssPts............................................G....ss..hs.pslthA+..RA+stGh+lh.lDFH.YSD..hWADPu+QhhP.....pAWtsh........shspLp.ps............lasaTtsslsth..cptGl.......P-hVQlGNEhssGh..L......WstGp...............................................sssa.sp..hAtLlpuGhpAV+c..................sssp.s+l..hlHls.s........Gt.....s.s..s.......t.......h........p...a...aacpl.ppp.....s..........scaDlIGh.SaYP...........aWpu.................sh.......ssL..p..sshs...sls.p+.Y.s..K.c.lhVsEsuasas.t..s.t....s.hst.t.............sh.shospGQtphlpc...lhpt..ltsl.t......spGhGlFYWEPsWhs.s...................................................h...Gssh..tst...shFD...pGp.s.h..uht............................................................................................................... 1 77 147 182 +7577 PF07746 LigA Aromatic-ring-opening dioxygenase LigAB, LigA subunit Bateman A, Finn RD anon Pfam-B_18522 (release 14.0) Family This is a family of aromatic ring opening dioxygenases which catalyse the ring-opening reaction of protocatechuate and related compounds [1]. 23.40 23.40 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.48 0.72 -3.98 40 275 2009-01-15 18:05:59 2004-07-27 14:23:53 6 3 207 4 91 247 101 83.40 30 44.05 CHANGED LNpashoL.hcspsRccFhuDccAhh-casLocpp+pAlhsRDhtthlphGuNhahLtKluulhGlsh.phsusMsG..hoh--apphhhs ........lNcahhpL.hcst.RpcFhs...D.cAhhccauLoc-p+pAlhsRDhtsLlphGsshahLtKh..uus.Ghs..phhu..shpG...oh-patthh..s............................ 0 18 46 75 +7578 PF07747 MTH865 MTH865-like family Bateman A, Finn RD anon Pfam-B_25445 (release 14.0) Family This domain has an EF-hand like fold. 25.00 25.00 35.20 34.90 21.90 20.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.53 0.72 -4.35 6 69 2009-01-15 18:05:59 2004-07-27 14:27:15 6 1 56 1 52 72 0 75.50 37 86.51 CHANGED --l+tpll-hhKsA-.FPIps..ELhuALPsGhsToCp.Gs.ElpAuEhs.KLlTcsDFPaKsu-plADpll....c+suL ..p-l+tQlh-uhc..sAc..FPIpo.h-LlsALPsGssTsCpsGs..hpl.TAtEhs.KllsssDFPacsA-slA-sll....pcs........... 0 12 35 46 +7579 PF07748 Glyco_hydro_38C Glycosyl hydrolases family 38 C-terminal domain Finn RD, Bateman A anon Pfam-B_731 (release 3.0) Domain Glycosyl hydrolases are key enzymes of carbohydrate metabolism. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.64 0.70 -5.68 45 2571 2012-10-02 23:57:29 2004-07-27 14:36:28 8 30 1251 65 913 2233 104 416.20 15 45.85 CHANGED tslhlaNshsappsphVplssstsshthtstp..........................................................hhh.hsplPsluhtsh.ht.spt.p.tt...........................................h.l.....cNshl+lphsss.GtlpolhDcpss+chh.........hspphthacs...................hptt.p.t...h...usuh.hh.....ppsshpstl.h.ht................................tsshlsphlplhsss................ppl-hcspVc.hpppcp......lpspFssslpu......................spshY-sphGhhhc........shphs...s.h-tsha...........plssptalp.uc...........slulLs-staGsss............tssplcLoLhRs.........................................................cstsD...pupthh............saslhscssshhtht...................ssptuh.............plptPhhh....................hptht..s..h.hsls..sl.................hh.thK.scst..................pshlL+hh-htsspsctplth.shs.ttsph..pslh-t............................................................................................................t.pspphplplpPhEItThp ......................................................................................................................................hhlaNs.s..h..p..t..h......l.....p....h.........h.......................t..............h...t.................................t..........................................................................................................................................h.h...........p....lPsh.u.h...p.sh......ht............t.t......................................................................................................................................................................hhl..........pNt..h...hpl.ph..s..t.p....G..h....l.p..pl...h....c...p..p..s...s.pp..h...................................hssph.hhaps...........................................hptt...................st..s....h..hh...................p..ss.hpst.h..hphph..................................................................t....p...........t........l....p..p..h..l.p.l..htss........................ppl-hc..s....p.lc...htppcp.......................L.+l.th.ss..s..l.p.s......................pps.h.h.p.t.t.h.GhhtR..........................................shp.p...........pshpt..sh.h......................h...s.....t.....t.t.....a...h..p...h..sc......p.............................slul.hspshh.uhs.s...........................ss.p..l.pl..o..L..hRs.........................................................pshs.D..........ps.pthh........................................................paul.h.sps...t...h......ht..............................................shppuh............................................th.p.t.Ph.hh.............................................................hp..h.............................s.h...p..h........s..l................................................................hpshK.up....st............................................................................tthllR.hhp.....t.t..t.s.p.s.t.h............s...........sp......h......slhpt..........................................................................................................................h...h...t...shplhoh................................................................................................................................................................................................................. 0 333 530 740 +7580 PF07749 ERp29 Endoplasmic reticulum protein ERp29, C-terminal domain Finn RD anon Pfam-B_5062 (release 14.0) Domain ERp29 is a ubiquitously expressed endoplasmic reticulum protein found in mammals. ERp29 is comprised of two domains. This domain, the C-terminal domain, has an all helical fold [1]. ERp29 is thought to form part of the thyroglobulin folding complex [2]. 21.70 21.70 22.40 23.30 20.10 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.07 0.72 -3.36 36 300 2009-09-11 09:36:09 2004-07-27 17:35:15 7 10 234 12 194 303 6 91.90 30 27.52 CHANGED GsltsLDplltcassus...tptppllscscctsppLp.....st...+aupaYl+lhcKlhpps.......p-YspcEhpRLc+lLp......GulussKtDElhh+tNILppFt ..................................................GpltslDslstcahsus.t..pppp.....pllpcscctspplp...............tttt..chAchYlKlhcKlhcpu.......p-asppE.hpRLp+lLp.......sslus.sKtD-lpt+.hNILpsF.......... 1 70 115 159 +7581 PF07750 GcrA GcrA cell cycle regulator Thanbichler M, Finn RD anon Pfam-B_23428 (release 14.0) Family GcrA is a master cell cycle regulator that, together with CtrA (see Pfam:PF00072 and Pfam:PF00486), is involved in controlling cell cycle progression and asymmetric polar morphogenesis [1]. During this process, there are temporal and spatial variations in the concentrations of GcrA and CtrA. The variation in concentration produces time and space dependent transcriptional regulation of modular functions that implement cell-cycle processes [1]. More specifically, GcrA acts as an activator of components of the replisome and the segregation machinery [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.14 0.71 -4.04 8 480 2012-10-04 14:01:12 2004-07-27 17:35:51 6 7 310 0 157 401 1392 140.20 30 85.47 CHANGED MsWTDERVEtLKKLWp-GLSASQIAAQLG.....GVSRNAVIGKVHRLuLoG....Ru+s.t.osAssRs++ss.s.t.........................ssutpuLphchss-lsscsshtsstssVsshsRplpLLELuE+oC+WPIG.DPtsE-FsFCGs.cus-uuPYCshHuRlAaQPsuc+RRsc ...........................................................................pWT-E+lp..hL+clW.tcGhSuupIAtpL.G......loRNAVl...GKsH.R..L.t..Lss..........+sp..s................t.....t.s.s.....p...t..tst...................................................................................................tt.h......t............t..................................t.............tt.h..tl.h...pL.....sc..p.....pC+WPl.......G...-P....s.....cFh.FCGt.....ts...tu....PYCt.H.s.tlAapshttp....t............................................................................... 0 40 96 114 +7582 PF07751 Abi_2 Abi-like protein Fenech M anon Pfam-B_8740 (release 14.0) Family This family, found in various bacterial species, contains sequences that are similar to the Abi group of proteins, which are involved in bacteriophage resistance mediated by abortive infection in Lactococcus species [1,2]. The proteins are thought to have helix-turn-helix motifs, found in many DNA-binding proteins, allowing them to perform their function [3]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.43 0.71 -4.12 111 1544 2009-01-15 18:05:59 2004-07-28 09:55:11 6 5 1091 0 266 1249 75 178.60 18 63.71 CHANGED pphLpp.luYaRl.ssYhhsh.t.t.....................htts.spFpplhpLYhaDpcLRthlhcslpclElslRstlstt..lup.............t..ha.p.tphhptt.............................................hhpphp......pplpcspt....p....hpaatpph...........tthPhWhhh.....ElhshG....................phsphap.h.tp.............ttplspp...............hs...............hp.........tphlpshlcslphlRNhsAHasplasp ...................................................................Lpp.hs.Y.a.ph.psYhh.h...............................hh.s.hphpplhsL..YhhDpcL+thhhphlttlEhsl+stlsth..lsp...........................t....h..ahp......phhpt.t.........................................................................hhpphp.......ptlpcsp..............t...........hpphh.pph.........................................tphPhWhhh........chhshG......................phs.phap.h...t.................t.thh.pp................................................ht.......................................hp........hp.ht.s..h....lptlp..lRNhsAHpp.lht........................................................................................................ 0 72 163 220 +7583 PF07752 S-layer DUF1608; S-layer protein Fenech M, Eberhardt RY anon Pfam-B_2293 (release 14.0) Family Archaeal S-layer proteins consist of two copies of this domain [1-2]. 30.00 30.00 30.70 30.30 29.60 29.70 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.46 0.70 -5.18 14 125 2009-01-15 18:05:59 2004-07-28 10:41:59 6 15 13 0 123 125 2 236.20 27 60.47 CHANGED lRGslaD.........s.stssshsWsspsFuGFYYslccs..................lsoEshslhthss..scsIs-GsLVYsTpsh.spaEht..............sW..ssYpllGFhuccYhAs.sDss.............stLuplllD..u-DKcsl.hoG-sh-Lt..............cGYSLplspVDVsGs+VWlpLpK-G-hlDsshlsssss.......alacs-lu-s...-DlshhhlalspVFtGspsohlhlcula.Is.DshlcIpsGDpFGchElsphSpssIphcN-DohoLspsss..hplhsshhFpsuD ..............................................h..........................h.Wss......satuFaYslsps................htsEphth.........sp.ltc.....s..s...l.Ypo..p......s...p...acht..............sh..tpY.h..luahu-cYhshhssss........................spLu+lLlD.........s-cctol.psGpsLs.Lt..............-GYsLplppl...Dl-Gs+ValpLpKDGphVDspllssus..s.............asYcp.-lsst...pDlslltl+lc.plFpGsp...sshshl-GlaQIu.-shhplppGDpaGphclppls.....t...s..t..I.hpNp.s.s.hsLppsps...tlhtth..htsus.................................. 0 26 83 92 +7584 PF07753 DUF1609 Protein of unknown function (DUF1609) Fenech M anon Pfam-B_2149 (release 14.0) Family This region is found in a number of hypothetical proteins thought to be expressed by the eukaryote Encephalitozoon cuniculi, an obligate intracellular microsporidial parasite. It is approximately 200 residues long. 25.00 25.00 56.80 56.70 16.70 15.80 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.52 0.70 -5.03 9 42 2009-01-15 18:05:59 2004-07-28 10:43:16 6 2 6 0 26 42 0 199.40 58 45.79 CHANGED EsEputEAEssh.Eh....AtsK+KsutcKScGtc+pYKlH+RVLRWpKSsE+IKcELDcGpEE+W+G+SlEEI+EQKhlHDIsEVhcLLRSc-.sD+FFhcTG+YMKGGSERW+MVAlGlLEpGGcK+lGsVEVGLFKsc.sGpsVlYHLMF+PTshEctGcVsssuFu+uDDV-cI--s..-spDhuGFpYPpGVRsEhV+ssspF+IVWRNP+sTSEVLRoLTlhphPpl ............EsEtutEsEhs..Eh....uss+cK....su.K.....KScGsc+pYKIH+RVLRWpKSPEKIKcEhD+GSEE+W+GRSlEEIKEQKhlHDIstVlcLLRScD.AD+FFhcTGcYhKGGSERtRMVAIGlLEsGGc.+hsGVVEVGhFKDs.sGssVVYHLMF+sTthtphGtsht...sthschss.lttlcct..c.pD.ttF.YP.slp.EhspttstFpI.ats.psTu.llppLhl.phs.................. 0 26 26 26 +7585 PF07754 DUF1610 Domain of unknown function (DUF1610) Fenech M anon Pfam-B_8731 (release 14.0) Domain This zinc ribbon domain is found in archaeal species. It is likely to bind zinc via its four well-conserved cysteine residues. 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.78 0.72 -7.06 0.72 -3.81 21 205 2012-10-03 10:42:43 2004-07-28 10:44:10 6 2 117 0 143 189 34 20.80 46 59.03 CHANGED CpsC......RchussasCPpCGh ...CppC.......Rcp..ustahCPsCGh. 0 38 81 115 +7586 PF07755 DUF1611 Protein of unknown function (DUF1611) Fenech M anon Pfam-B_8752 (release 14.0) Family This region is found in a number of hypothetical bacterial and archaeal proteins. The region is approximately 350 residues long. A member of this family (Swiss:Q6M063) is thought to associate with another subunit to form an H+-transporting ATPase, but no evidence has been found to support this. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.71 0.70 -5.77 79 333 2012-10-05 12:31:09 2004-07-28 10:48:20 6 4 302 6 140 392 459 289.80 34 83.25 CHANGED h-..hchsGpsss........tslPlhsshpts.....ttsscslllGlAstGGhls.tsWppslhpAlctGhslsSGLHs.hL......s-cPclsthApp..tGtplhDlRpP.stsh.tlusGpt+...........t.sspRlLsVGTDCulGKhsTuLtlpcuhppcGlcusFhATGQTGIhI.uGpGlslDAVsuDFsuGAlEtll.csst.....pp.chhllEGQGSLh...HPuauu.....lohuLL+GupPDullLsHcssRpphcshsp...hsl.Pslpphlplhptlushs........spVlGlu.lNTpsls..-pp.utphltchppch......uLPssDPlRp..G.ss..pll .................................phsGppss.........hslP.hs.hpss........stsscsLllGlAstGGhls..ptWhpslhpAl.ctGhslsoGLHp.hL......s-.sclst.hApp..pGt..plaDVRps.s.sh...slusGpt+.............sucRlLsVGTDCulGKhhTuLtLccshcc+Ghc...u...sFhATGQTGI....hI..sGpG.lslD.....A...VluDFhuG...AlEtls.css......cc.ch.llEGQGSLh......HPuauG.......VohuLl+GuQPDullLsHcssRpphcshsp...hsl..Pslpphlchh.thuphs.......stspllGlu..lNTptl....s..-p.c.Atthhtchppch......ulPssDPlRh.Gsstl................................................................................... 0 35 86 119 +7587 PF07756 DUF1612 Protein of unknown function (DUF1612) Fenech M anon Pfam-B_8688 (release 14.0) Family This family includes sequences of largely unknown function but which share a number of features in common. They are expressed by bacterial species, and in many cases these bacteria are known to associate symbiotically with plants. Moreover, the majority are coded for by plasmids, which in many cases are known to confer on the organism the ability to interact symbiotically with leguminous plants. An example of such a plasmid is NGR234, which encodes Y4CF, a protein of unknown function that is a member of this family [1]. Other members of this family are expressed by organisms with a documented genomic similarity to plant symbionts [2]. 30.10 30.10 30.60 49.50 27.90 30.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.72 0.71 -4.05 18 141 2009-01-15 18:05:59 2004-07-28 10:54:13 7 2 98 0 42 113 1 126.70 55 35.37 CHANGED sLlaDsDWDE-sRLsEWRuVlspscsL..PPlLpAAlhhDAWppLEsLQ+usWLGRLLsAuhLRpcGhss.uHLsulshGLKslPh-RRRpRsRtoRLhuhLcultsAAptGhKEHDRLslARp.MpR+LcG ..............tLlYDsDWDEDuRLpEWRss.hststsL..PPlLpAAlhaDAW.pLEVlQRusWlGRLLsAuaLRpuGlsu.sHLsAlsLGL+shts-cRRupsRssRLtsFLtAlptAAEssMKEHDRLhLARcQMpRKL+G....... 0 4 19 29 +7588 PF07757 AdoMet_MTase DUF1613; Predicted AdoMet-dependent methyltransferase Fenech M anon Pfam-B_8934 (release 14.0) Family Proteins in this family have been predicted to function as AdoMet-dependent methyltransferases [1]. 20.20 20.20 20.20 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.45 0.71 -3.95 3 246 2012-10-10 17:06:42 2004-07-28 10:55:27 8 4 214 0 186 308 53 106.20 44 20.68 CHANGED LV.NlEKYNDLYsELKQKYuQpLV-pW...sEsTDPtKFVFEDLAIAAYLIhLW+QTpSu...pp.pSFVDlGCGNGLLVYlLsuEGY+GYGaDlRKRKLW-hYPs-sQVcLhEKslVP .......................................................l.s..papshYtpLKpKYuppllcsW.......sE..sTDPpKaVaEDlA..IAAaLl.LW..p.p....h.sp........................pth.....u.........FVDlGCGNGLLVaIL.pEGa.pG.h.GhDsRcRK.Wsha.s...spsp..L................................... 0 54 91 149 +7589 PF07758 DUF1614 Protein of unknown function (DUF1614) Fenech M anon Pfam-B_8844 (release 14.0) Family This is a family of sequences coming from hypothetical proteins found in both bacterial and archaeal species. 22.40 22.40 22.60 22.50 21.10 21.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.20 0.71 -4.54 48 186 2009-01-15 18:05:59 2004-07-28 10:56:30 6 2 156 0 112 170 2 169.60 31 72.00 CHANGED llGShI.....NIPltp..tt............................hhGh.h....Ph..h..tpshlAlNVGGAlIPlhlulY..Llhp....h.......tsllusllsullsahhu+sl..........sGlGIssPshlsPlsAulsAh....lhuttht..............sAYluGslGoLlGADllpL...t...clhphs.ssh..l.......SIGGAGsFDGIFloGllAlLL ..............................................lhsS.hNlslhp...t..............................hhsh.h.h...s.........pphlAlNlGGAlIPlhlulY...Llhc..........t.hltsllusllsullsahhu+sl.........sGlGI.hhP...salsPllAu.l.hAh...........lhuht.hc.................sAYluGslGsLlGADllpl.........t....plhphu...ssh...l..............SIGGAGsFDGIalo...GllAlLL........... 0 46 80 98 +7590 PF07759 DUF1615 Protein of unknown function (DUF1615) Fenech M anon Pfam-B_8943 (release 14.0) Family This is a family of proteins of unknown function expressed by various bacterial species. Some members of this family (e.g. Swiss:Q8Z8Z7, Swiss:Q8ZRF4) are thought to be lipoproteins. Another member of this family (Swiss:Q93SV8) is thought to be involved in photosynthesis [1]. 25.00 25.00 26.30 26.20 20.90 18.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.94 0.70 -5.67 22 632 2009-01-15 18:05:59 2004-07-28 10:58:44 7 1 622 0 68 326 17 315.30 72 86.98 CHANGED uslpDRpGWApDItsAhps.tlssoscNlCuVLAVsEQESsFpsDPsVPGLu+IuhpEI-pRAs+h...........tlPthllcsALphpSP.sG+oYpcRlculRTE+-Lstla-Dhl...........stlPhGppLFus.....hNPV+TGGPMQVSItFAEp+u+...tYPasss.uolRcEVFoRRGGlaFGhA+LLsYPssYspslYRFADFNAGaYASRNAAFQsAlSRloGhtLALDGDLlpYss.......stsusTEhAlhsL..upcL...shscspIRcsLcptcs.sFpcTtLYcpVasLA-+psG+shPRAhLPtIpLcSPKI.oRpLTTAWFAcRVDpRap+CMpR ....A.SVKDR-AWAKDlATTFcSQGLAPTlENlCSVLAVAQQESsYQADPsVPGLSKIAWQEIDRRAERh...........HIPuFLVHTALKI+SP.NGKSYSERLDoVRTEKQLSAIFDDhI.............uMV.PMGQTLFGS.....LNPV+TGGPMQVSIAFAEQHsK...GYPWKMD.GTVRQEVFSRRGGLWFGTYHLLNYPAsYSAPlYRFADFNAGWYASRNAAFQNAVSKASGVKLALDGDLIRYsS.......KE.PGKTELAsRKL..Au+L...GMS-uEIRRQLEKGDShuFE-TALYKKVYpLAEAK.TGKoLPREMLPGIQLESPKI.TRsLTTAWFAKRVDERRARCMp....... 0 11 22 43 +7591 PF07760 DUF1616 Protein of unknown function (DUF1616) Fenech M anon Pfam-B_8886 (release 14.0) Family This is a family of sequences from hypothetical archaeal proteins. The region in question is approximately 330 amino acid residues long. 31.60 31.60 33.10 32.70 31.40 31.20 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.80 0.70 -5.27 38 205 2009-09-11 00:12:54 2004-07-28 11:00:14 6 5 101 0 137 214 7 244.00 22 72.95 CHANGED Lhlllhhsllshlhlh.hssht...pohlRslLGlsh..lLFlPGYsLlusLaP................p+psLss.....lERh.sLShGLSIullsLlGLsLNaos.......huIchhPlll.oLshhollhshlAhhRRtphPssctath.hpthh.t................hhhtsssph-phLsllLllullssls.slsaslhhPcpsEpFTEFYlLG.pt....hAtsYPsphhhGpptslhlG.................lsN+EhpshsYslclhLpspphs.......p.ph............slss...p......pohppshphps..hhups.clpaLLapcs...................ppshsY+sl+LalsV ..................................................................................................hhhh.hhhhhhhh..h...........lR...hhhuh.h....lhF.hPGYshlthLaP....................................tttpl.s........l-Rh.slShGl...SlulsshlGlhlshs........hslphhslhh.slshhslhhshluhhR+hph.............h.................................................................h..p.phls.hllhl.llhshh..hhshhhh.s.....tt..s.....ctaothhlls.tt.....hutsYssphh.up...lhlt.................lhNpEhpshtYhh.hhltp..ht.........................th......h......tlst.........s........................tshp..hph..t.........stp...pl.h..hLaptt.........................t.t.shhthhlhlp................................................................................................... 0 19 77 107 +7592 PF07761 DUF1617 Protein of unknown function (DUF1617) Fenech M anon Pfam-B_8981 (release 14.0) Family This is a family of sequences from hypothetical bacterial and bacteriophage proteins. The region in question is approximately 150 residues long and is highly conserved throughout the family. 21.10 21.10 21.40 22.40 20.70 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.92 0.71 -4.14 2 86 2009-01-15 18:05:59 2004-07-28 11:01:05 7 1 53 0 8 64 1 137.70 43 98.75 CHANGED MDLTLKNK-LNTLYpVLDKIKlTNMRANRGRAKLLAKV.sKlsEYAKDEhDlID.YsAKscDDKalhD-+tN.KltDsuKLsELNDhLsELAsE.IVIKGGEYSKRFIDFLpaL.EsEDEFTSpEIlLIDNILEQFEES.KGE ......................MplTl+N+-LsslasVL-cIKlpsh..RAsRGRAKLLuKVhcKlcEYAKDEsDLIchYAtKDcDGK.all.D-+.tNh....KL.........s.D.PsKl.....c.....EhNchLsELusEcIsIcGsEYS+RFhDhlpaLt-sEDEhoup-hll...hDplLEpaEtu................................ 0 2 5 7 +7593 PF07762 DUF1618 Protein of unknown function (DUF1618) Fenech M anon Pfam-B_8857 (release 14.0) Family The members of this family are mainly hypothetical proteins expressed by Oryza sativa. 21.00 21.00 22.00 21.40 19.30 20.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -11.16 0.71 -3.95 61 623 2009-01-15 18:05:59 2004-07-28 11:02:59 9 14 11 0 353 498 0 130.80 21 29.62 CHANGED W.V..DLs.tGlLhCDsh...s..............t......s....clcalsLP.ss.hssttt....................pt.......hptaRs.lul.....ssG....pl+aVplstt.t...............................................ssshhlssWoL..............ssht..Wph-s.plshsclWstcs............apt.t........l.Pp.......h..h..Phl..uh.cs....slla ......................................W.V.D....Lh...t.G.l.l.h.C..D.sh.....s.....................cp.........splcalsLP..sshh.sttp.....................pts.phhR.s..lss.......spG.....pl+aVp.lptt......................................................tshhlssWsL.................sttpWph..-t..pl.s...hsclhttts..................................h.t.t......l..sp...............hh..h..Phls..ps....thh........................................................ 1 0 66 213 +7594 PF07763 FEZ FEZ-like protein Fenech M anon Pfam-B_8854 (release 14.0) Family This is a family of eukaryotic proteins thought to be involved in axonal outgrowth and fasciculation [1]. The N-terminal regions of these sequences are less conserved than the C-terminal regions, and are highly acidic [1]. The C. elegans homolog, UNC-76 (Swiss:Q7JNU9), may play structural and signalling roles in the control of axonal extension and adhesion (particularly in the presence of adjacent neuronal cells [2]) and these roles have also been postulated for other FEZ family proteins [1]. Certain homologs have been definitively found to interact with the N-terminal variable region (V1) of PKC-zeta, and this interaction causes cytoplasmic translocation of the FEZ family protein in mammalian neuronal cells [2]. The C-terminal region probably participates in the association with the regulatory domain of PKC-zeta [2]. The members of this family are predicted to form coiled-coil structures [2,3], which may interact with members of the RhoA family of signalling proteins [2], but are not thought to contain other characteristic protein motifs [3]. Certain members of this family are expressed almost exclusively in the brain, whereas others (such as FEZ2, Swiss:Q76LN0) are expressed in other tissues, and are thought to perform similar but unknown functions in these tissues [3]. 28.10 28.10 28.40 29.50 26.30 28.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.55 0.70 -4.66 10 222 2009-01-15 18:05:59 2004-07-28 11:03:54 8 5 82 0 108 181 0 207.80 46 62.63 CHANGED ShEDLVNpFDEKLolCF+NYsssTEslAP...V+....spoEEEhLpD--lWsALTDNYGNlhPlDW+sSasRsLHhssLN...Lt.......................................pussssspl...cpSDDEELpEphDMHolIluslp.......EEPLhTADQVIEEIEEMMQESPDPE---.........ssspS-slShLppElpuLppussss..............sapEcL+pLSsScLsElL-ElEsAIR-YSEELlpQLAlRDELEFEKEVKNSFISlLI-VQNKQKEa+ElhKKK+KhKususp ..................................................h.sLspph-EKlshCFp..s.t.tst...h.As...Vp......ppcphhptpplW.s.LTsNass.h.hshDWcsopscsL.h.hl....h.........................................p..u...s...s.s...h...cts.--EElpEphDhHShI.st..hs.......-EPLhTA-QVIEE.I-EMMQpSPD.PE--.c........................sss.pSc.thohl....p-hp..sh.ps....s.s...............................tE......pl+pLSsocLsElLcElEssI+-aSEELVpQLAhRDELEFEKEVKNSFISlLl-VQNKQKEp+Eh...hKK++K.+shs.............................................................. 0 31 41 69 +7595 PF07764 Omega_Repress Omega Transcriptional Repressor Finn RD anon Pfam-B_63922 (release 14.0) Family The omega transcriptional repressor regulates expression of involved in copy number control and stable maintenance of plasmids. The omega protein belongs to the structural superfamily of MetJ/Arc repressors featuring a ribbon-helix-helix DNA-binding motif with the beta-ribbon located in and recognising the major groove of operator DNA [1]. 25.00 25.00 25.90 29.30 22.70 22.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.31 0.72 -3.61 2 125 2012-10-02 18:44:02 2004-07-28 11:04:54 6 3 97 14 2 49 0 63.30 79 46.87 CHANGED MIVGNLGAQKtKRNDTPISAKKDIMGDKTVRVRADLHHIIKIETAKNGGNVKEVM-.tLcphl+phL.s+h ................hGNLGAQKtKRNDTPISAKKDIMG.DKTVRVRADLHHIIKIETAKNGGNVKEVM-htLcphl+phL.s+h............. 0 1 1 2 +7596 PF07765 KIP1 KIP1-like protein Fenech M anon Pfam-B_2332 (release 14.0) Family This is a family of sequences found exclusively in plants. They are similar to kinase interacting protein 1 (KIP1), which has been found to interact with the kinase domain of PRK1, a receptor-like kinase [1]. This particular region contains two coiled-coils, which are described as motifs involved in protein-protein interactions [1]. It has also been suggested that the protein's coiled- coils allow it to dimerise in vivo [1]. 20.50 20.50 21.50 21.90 20.00 20.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.44 0.72 -4.14 13 269 2009-01-15 18:05:59 2004-07-28 11:06:16 7 8 37 0 153 254 0 66.70 53 8.18 CHANGED SWWWs..SHhssKsSKWLppNLpEMDp+VKpMLKLl-E...-ADSFA+RAEMYY++RPELIshVEEhYRuYRALAERYD ..........................................ppspWLtps..Lp-M-ppVKtM.lKLI..--.........-.....u......DSFA++AEMY..Yc+RPpLlshVE-hYRsYRALAERYD............... 0 16 88 125 +7597 PF07766 LETM1 LETM1-like protein Fenech M, Wood V, Mistry J anon Pfam-B_2202 (release 14.0) Family Members of this family are inner mitochondrial membrane proteins which play a role in potassium and hydrogen ion exchange [3]. Deletion of LETM1 is thought to be involved in the development of Wolf-Hirschhorn syndrome in humans [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.64 0.70 -5.36 13 760 2009-01-15 18:05:59 2004-07-28 11:08:40 8 13 347 1 502 777 35 218.00 28 44.32 CHANGED KcsLhs+lhcElpHYacGh+LLhh-h+..lSsKllh+lhsGtp....LoRREppQLhRTsuDlFRLV.PFusFlIVPFhEhLLPlhlKLFP.sMLPSTFpopsc+p-....Kh+pplpsRh-hu+FLQcTlc-huhpscsphppttp....cFssFhp+lpsssc..sS....s-EIlchuKlFcD-h.sL..DsLoRsQLsuLC+hhpLpshGTsshLRaQL+h+l+pl+pDD+tIstE.GV-uLostELppACtuRGh+uhGlScEpL+-pLppWL-LpLppplPsoLLlLS .................................................h....h.chhp.hpha..h....pGh.+LLhh...-..h+....hut...+lhh..+.h..h....t..G...t.................LoRR.Eppp..........L...h+................p.............htDlh+ll.Ph...hhlllP.F...h....ph....LLP.lh.l.+.h...FP..shL.PSTFpsppp...cp.c...................thp..p..hhth+h.....p..........h.t.p.h.l....pp.ph....ppht............t...t....t........t..........phtphh.....p.pl....ptstp......s...............pplh.t...htp.h..Fpsp....sL...cp...ls..p.........p.Lhshs+.......hh..........l.p....s.....h............s..s..s...h........L....Raplph+hp.lpt-D....ph.IthE.....G...l..p.s.......L...s......tELp.AChtRG.....h.psh..s.....h.......s..t.p..hcp.Lp.....................Wlplphp.ths...Llh...................................................................... 0 164 274 405 +7598 PF07767 Nop53 P60; Nop53 (60S ribosomal biogenesis) Fenech M, Wood V, Mistry J anon Pfam-B_8778 (release 14.0) Family This nucleolar family of proteins are involved in 60S ribosomal biogenesis. They are specifically involved in the processing beyond the 27S stage of 25S rRNA maturation [3]. This family contains sequences that bear similarity to the glioma tumour suppressor candidate region gene 2 protein (p60) [1]. This protein has been found to interact with herpes simplex type 1 regulatory proteins [1]. 24.90 24.90 25.30 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.38 0.70 -5.47 25 375 2009-01-15 18:05:59 2004-07-28 11:11:04 6 8 272 0 244 369 3 306.90 24 83.25 CHANGED SRKuKKAWRKNlDls...-V-psL-chR.-EcIpGGs..luEKss--LFslDopuss.......pltptppp+hpKPLK..scpIL...ps+Splsulss++ppssps.............p+.+pp.lscc-htRLtplspcppstpsph.ssp................aDlWucpss.p...................thshsphshtpss.pslK.tPhplttss....t..lP.ulchscuGtSYNPohE-appLLppttccElctEK+cpchc+ppppthpths........h...t...psh.Esp....-s-scscs...---st..t.-u-h-....sh.stpc.sc+KT+sQRN+p++cKctc+ptc...hp+phKpcppplt+l+uItcclsp+ppphscppcpcp....pcsccpth+p++L.GKhch.-tslEVhLsDELssSLRpLKPEGNLLcDRF+oLQ .......................................................................................................s+ptK+sWRp.hp.....-lpphhcp.p.pp.h.....hG...htph.sp.pLFhlD.t.pt.............................hh.h.h...+.L+...sp.ll...tppot...l.s....t.ht.t........................................p.t....pp..h.p...h.....th....tt..........pt...t........................................hDhWsptt..t..........................................t....h............h...t.......thp....s.phttps............................h..slphstsGtSYNPshpsapphlt.p.t...hph.....Ehpt.c.pp.phpc.......h....hht......................................................ttp.................ptttt.t............tppp.t......p.sthp..........h.t...ht...sc+...KT.ctpRp+.tct+....p.ttttt......t..pt..h+..tp........ppl..p.lctlttpl.tpct...tthttht.tpt............tptpttt......p.......ppL....G+h.........ph..t..sl-l.Ls-ELs......sS.LRpL.KP.pu.slhtDRh+sh................................................................................... 0 86 136 203 +7599 PF07768 PVL_ORF50 PVL ORF-50-like family Fenech M anon Pfam-B_8834 (release 14.0) Family This is a family of sequences found in both bacteria and bacteriophages. This region is approximately 130 residues long and in some cases is found as part of the PVL (Panton-Valentine leukocidin) group of genes, which encode a member of the leukocidin group of bacterial toxins that kill leukocytes by creation of pores in the cell membrane [1]. PVL appears to be a virulence factor associated with a number of human diseases [2]. 21.60 21.60 21.60 21.80 21.50 21.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.90 0.71 -4.10 6 423 2009-01-15 18:05:59 2004-07-28 11:12:33 6 1 235 0 5 197 0 113.60 54 91.85 CHANGED IlplpsK.PhKaspaE...tElhcKpGIosGlVppRV+sGWchpEAl-APhGh+LsEY+EhphpctlcptphEREhtRcR..........R+EAEL+RK...KPHLFN.VPQKHPRG+Yssah..h.NphF.+Khpc ........................................IVpIpsK.PY+FocaE....ELIEpaGITsGMVuKRVK.c.GWcLcEAhcAPcGhRL..uEY+Ehhphc...hl...Ep.tchERchtR...cR.............++EA.ELRRK...KPHLFN.VPQKHsRs.Y..ah..h.NphF.hKhpc............................ 1 3 3 5 +7600 PF07769 PsiF_repeat psiF repeat Fenech M anon Pfam-B_8872 (release 14.0) Repeat This region is approximately 35 residues long. It is found repeated in a number of putative phosphate starvation- inducible proteins expressed by various bacterial species. psiF (Swiss:Q7AH28) is known to be an example of such phosphate starvation-inducible proteins [1]. 20.70 20.70 20.70 20.70 20.20 20.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.57 0.72 -4.41 35 1450 2009-01-15 18:05:59 2004-07-28 11:13:55 9 3 719 \N 184 570 34 34.50 64 64.33 CHANGED A.ssQQpKMpsCNupAssKsLKGD-RKsFMSsCLp ............s.LTPQQQKMpsCNpQATsQuL....KGD-RKsFMSsCLK.. 0 17 67 127 +7602 PF07771 TSGP1 Tick salivary peptide group 1 Fenech M anon Pfam-B_2333 (release 14.0) Family This contains a group of peptides derived from a salivary gland cDNA library of the tick Ixodes scapularis [1]. Also present are peptides from a related tick species, Ixodes ricinus. They are characterised by a putative signal peptide indicative of secretion and conserved cysteine residues. 20.00 20.00 20.70 20.30 19.60 18.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.95 0.71 -4.23 8 125 2009-09-11 03:26:28 2004-07-28 11:18:23 6 2 12 0 13 126 0 108.80 37 87.82 CHANGED MthTslTLVLVSLAFFuoAAAcsCpNGTRPAS-pNREGCDYYCWNssTsuWDcaFFtDGEsCFYNsGscGsCpsGcCHLsT..sSGsPocssDasspPo.......ptPKpKKKKspKsKKPK+poc.KD .........................................thh.hhhsL.V.sl...AF...hs.......s..uAu.c....s.....Cpsus..R..P.uu.cpsR...-GCcYYCa.Ns.t..T.....suW-p.h.FF....s...sGEpCaYs.s...Gp.c..GhC....ps.....G...t....C...Hhss...........suusPs..-s.s.p.ss..sp.........p.............................................................................. 1 13 13 13 +7604 PF07773 DUF1619 Protein of unknown function (DUF1619) Fenech M anon Pfam-B_8790 (release 14.0) Family This is a family of sequences derived from hypothetical eukaryotic proteins. The region in question is approximately 330 residues long and has a cysteine rich amino-terminus. 20.90 20.90 22.10 21.20 20.20 20.50 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.56 0.70 -4.61 16 255 2009-01-15 18:05:59 2004-07-28 12:10:45 6 8 95 0 139 257 1 242.40 26 46.65 CHANGED CsCDLohuhCDlNCCCDpDCsss.stphF.thCh.tshsss.....pphCsp.ppsppopshp..........hsphssshhClhpoNpp.............ssh.shhtpsssss........shsthsps.stsssssthYchGDPlhh..........t.pssuhhplPsshss.utCs....spsVtFLpshs.osCs......hsthp....th.hsphsshhllsssp...........sss.h.hslphshtpshsthlss........sphhs..spsptCsssVlplchphhassss.l.thplhhhtuplshps...tshltpcFplp...Fhpts.ts.ssshSGNsGYlsGpP ...............................................................................................CsCDLpsstCDlpCCCD..DCs......hpl....F...p..........C.sssht.ss..................p.hCst..phh.hs..s..................t.s......hhClp.ss.p...........................sh.t...hh....p.s.ss.shp...................s...h...t..t......t....ssshY.c.Gssl.s............................uhhp....hPtsh.hs.u.Cs..p...........ssPsu.F.......L........s.s..spCsh......t..............sLp.h...t...h....s.....lhth.t..................t.h..pl........hpphs..t.hht.............................................................t.phstsll.phpY.h.hs.....s...s........t.ltphslphh.spl.............s.t..............................lpQ..pF..lp............F...p...s.......................hSGsPGY.hGhP.............................................................................. 1 43 51 85 +7605 PF07774 DUF1620 Protein of unknown function (DUF1620) Fenech M anon Pfam-B_8944 (release 14.0) Family These sequences are mainly derived from predicted eukaryotic proteins. The region in question lies towards the C-terminus of these large proteins and is approximately 300 amino acid residues long. 20.70 20.70 25.40 23.40 19.60 19.20 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.37 0.70 -5.08 35 334 2009-01-15 18:05:59 2004-07-28 12:12:07 8 9 278 0 229 324 4 207.20 36 23.50 CHANGED hsENWllYoYass.........shhphplsllELaEsppsssptsh..............tphoSh........ss.shstllspsalhs.p.tIpphulTpTcpGITsKplLhth.sosplstlP+.lLssRR......sppssss-.pcEth.hsYpshlshss.phhloHppplhshc....................pllosPotLESTollhuaGh..DlFhTR.lsPStsFDlLscsFsK.htLlhTlluLhlsshlspshsppKplpttW ............................................uENWll...YpYass..........................ps+c.clsllELYEusp.t.tspssh..........................SSh......................sps.hP.pVhpQ.oYlhP..p..sIsshtsTtTcpGITo+plLlsh.tostIlulP+..t.lLDPRR..........P..ptsot...........pp..pEEsl.lPYsP.l.lps..ct.hl..saspsV.tl+............................sIhouPo.tLESTsLVhAaGl..DlFhTR.lsPSppFDlLp................csFsh.hhlhhslhuLhhushlsp.hsppKtlpptW................. 0 81 130 191 +7606 PF07775 PaRep2b PaRep2b protein Fenech M anon Pfam-B_4098 (release 14.0) Family This is a family of proteins, expressed in the crenarchaeon Pyrobaculum aerophilum, whose members are variable in length and level of conservation. The presence of numerous frameshifts and internal stop codons in multiple alignments are thought to indicate that most family members are no longer functional [1]. 19.20 19.20 20.10 20.00 19.10 18.70 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.70 0.70 -6.29 15 107 2009-01-15 18:05:59 2004-07-28 12:13:36 6 2 6 0 61 113 0 199.00 21 57.96 CHANGED LcAlsPtLPtLacLRDALsEFADAF+sVTtEsl++KaGl-huYDVRNEpFFKKL-EllsMsE-YVY+NlsVERsPLDsSGphPKsVIRFKLs.GEElAHIslYWTGpcLpApFsGSREpAERLASII+ALGGcAEVKch.GstWhVpLTTDGIsAIRHsuWLsAVRuFV-ELac......KGLIuc-RY-pLl+-IEAGPNsVKaAGVEFSVhYc....t...s+pIhlcYpPpSEsoKNAAVsALKA+GL+EGVHFTVpc.Gu..YEIRVstE.YsKAlEsLspsGL+cGEaYul.s++RhIpVKs-pKDsllNALKsAGLtEG+cFss+.sG.YhIhITYDGLREIQRMALsGDhEAERFIRcLEDVL+RRaGpsAlpKLhEVLpPAREEGTlD.LPLsVhD-+GNllARVVDLKYEFVc........sspPVspCAGE-CRLRllVEYEs.uGER+QFKhEWYWtc.pcc+GpsTlTYYaEhAtsslKs-VEAAVLKALTG....KuKRGpVhLhAcpL-ALpRFKuL+DA.lDpWRsu+P .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 46 47 47 +7607 PF07776 zf-AD Zinc-finger associated domain (zf-AD) Guo J, Finn RD anon Pfam-B_14442 (release 14.0) Domain The zf-AD domain, also known as ZAD, forms an atypical treble-cleft-like zinc co-ordinating fold. The zf-AD domain is thought to be involved in mediating dimer formation, but does not bind to DNA [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.03 0.72 -3.98 98 2455 2012-10-03 11:22:52 2004-08-16 09:21:17 10 977 74 1 1809 2536 0 74.80 20 13.71 CHANGED hCRlCh......................pppsphhslhppt..........pst........plsphlpphhslplp........ts-shsp..h.lCpsChpplpphhpFcppspps.pphhpph .......................................hCRlCh...................................pptph..hs.l..aptt............................................ttt.....................plsphl.p.p.h.s.s.lplp......................ps-..slPp......t.lCppChpp.lpph...h....p....F+ppshpspphlt..h..................... 0 336 502 1534 +7608 PF07777 MFMR G-box binding protein MFMR Fenech M, Bateman A anon Pfam-B_5000 (release 14.0) Family This region is found to the N-terminus of the Pfam:PF00170 transcription factor domain. It is between 150 and 200 amino acids in length. The N-terminal half is rather rich in proline residues and has been termed the PRD (proline rich domain) [2], whereas the C-terminal half is more polar and has been called the MFMR (multifunctional mosaic region). It has been suggested that this family is composed of three sub-families called A, B and C [1], classified according to motif composition. It has been suggested that some of these motifs may be involved in mediating protein-protein interactions [1]. The MFMR region contains a nuclear localisation signal in bZIP opaque and GBF-2 [2]. The MFMR also contains a transregulatory activity in TAF-1. The MFMR in CPRF-2 contains cytoplasmic retention signals [2]. 20.50 20.50 21.70 20.80 17.60 18.10 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.75 0.71 -4.37 23 187 2009-01-15 18:05:59 2004-08-16 09:23:41 6 6 41 0 63 170 0 162.00 36 47.29 CHANGED MGs.uE-spss.c....spPss...p.....s............psssssY.PDWuu.MQAYYus......s.PsaF.ssl.Auu.s.PHPYMWG.sQ.........MMPPY..GT.P..YsAhYP..GGlYA.HPuhP.us.s.s.h.........stss...sssshoh..Es..sKuopsK-+sshK+.KG.s.....sh.hottsspssKssu.usspshSp..........u-Sus..-GSS.pGSDuN.opsss ......................................................MGs.s-.sps.K..........pcssss..p................s......tsss..sa.PDWus.hQAY.s...................ss.saa.ssh..Auu.t.sHPYMWG..p.....................hhP.PY.Gs..P..Ys.AhYP...GulYA.HPuhs.s.......................s.........sssshuh..Es..uKsstsp-ps.hK+.Ks.......sh.hsttsspssKssusoussthSp.............................spSss..-uoS.-uSDtNopt..t............................................ 0 6 35 50 +7609 PF07778 CENP-I Mis6; Mis6 Wood V, Finn RD anon Pfam-B_17323 (release 13.0) Family Mis6 is an essential centromere connector protein acting during G1-S phase of the cell cycle. Mis6 is thought to be required for recruiting CENP-A, the centromere- specific histone H3 variant, an important event for centromere function and chromosome segregation during mitosis [1,2]. 20.00 20.00 21.60 21.60 17.90 18.80 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.72 0.70 -6.31 5 194 2009-09-11 09:41:59 2004-08-16 09:25:22 6 6 139 \N 135 181 0 357.00 25 61.93 CHANGED MSs.cts+Nopt..R..QsppS.QTsL.sW+httpssspKsh..cupu..LuDpcHssDQ...DuLppAlsYFEKusc+sSpsKcolLcKHLcTlEsVAacsGLsPEuIDlLLDVALSGK..FusAlsTRILKCLIPsotISEDSVVKuVSWLCVG..KCSusIQlLFLRWLIsMFDFID+KcQlspLYGlFFshLsD-sLCPYlCHLLYLLTKKENVKPFRlR+LLDLQuKMGM..QPHLQALLSLYKhFsP-LISlSLPsRKKsaFKNuDs.W+AALpAV+QRNpussP-Pp.+LhLGssss+SpKRKWNppSlIPAlsos+..h.sttK.MSh.D.husssSaPLEQLpSFPQLLpNIH+LEFPSQMuSVLsssLLLHYlNCs+DEslLLRLsYWLoQTLQEECsWhsssNsQcEcEFpsFLDhll+upCFLQEGF.SCEsFLYKSLPLWDGhsCRSQhLpLVoWIPhSoFSElKslLLDcLAQLFFTSolYFKCSlLpsLK-LLQNW.LpWLS.-lplpShosSPh ...................................................................................................................................................................pt...ht...h..ht.ltphAhppGl.sp.lp.Llplh..hp.p....hsp...ss.s.pll.K.sh.hP.s.p..h..lsppslhpslshLs.s.............KsS.shpshhh+WLlhh..a..c..hl.-....pp..p.lp.thY..uhhF.s.Lp..sl..p..lC+L......L.l..l............T+.+cp..................V....+s.......aRlptl....hpL......tph..G.....p..tL.uLLplaK.ahPpll...........sh...s..h...s.....th.t..h.F+p.c..Wptt.h.tlp....pp....p..........t...........s.......ht..........t..........s.tt...+..p.......h.lPshpo.p.......................................s.ppt.h..slEplpsh.phlpplcplE...hPsQhsuhLtsslh...+hl.hh...sp.h....Rh..Wltthhp-.................tt........t...p.tpp.h................hLphlhph.phh..pph..sh.tFhh.p.L.hWsG.p.ps.hltllpahPh.....s.pphh..hht.Ltthh.hsss.....phtllp.hptllppa..h.................h............................................................................................................................ 0 27 59 102 +7610 PF07779 Cas1_AcylT 10 TM Acyl Transferase domain found in Cas1p Anantharaman V anon Manual Domain Cas1p protein of Cryptococcus neoformans is required for the synthesis of O-acetylated glucuronoxylomannans, a consitutent of the capsule, and is critical for its virulence [1]. The multi TM domain of the Cas1p was unified with the 10 TM Sugar Acyltransferase superfamily [2]. This superfamily is comprised of members from the OatA, MdoC, OpgC, NolL and GumG families in addition to the Cas1p family [2]. The Cas1p protein has a N terminal PC-Esterase domain with the opposing Acyl esterase activity [2]. 27.30 27.30 28.90 31.10 24.90 23.70 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.78 0.70 -6.03 8 235 2012-10-02 17:00:17 2004-08-17 10:03:26 7 11 127 0 174 248 4 389.60 33 62.91 CHANGED -hhph............sDGTCCpshcssTslQtlshshlhlsh.hhhh...hhhhphs.thpspshhsshp..pp.ttt..tp............suhpslhsuLspLGLIMAYFYLCDRTshFMKENKaYochsFalPllYlhVLGLFasEso+.-TKVLNRDQTDEWKGWMQLVILIYHhoGAS+lLPIYMaIRVLVAuYLFhTGYGHFoaaWp+GDFG..hhRlsQVLFRLNFLoVlLChsMsRPYQFYYFVPLVSaWahVlYsTLAlsPplsupsspsN.h.ahhLLlKlsshhshITlLhhSpshFEclFslhPLctlFsl.cssl+EWWFRWpLDRYlVaaGMlaAhhaLthQ+tplhD-sptssLhSpp.ht..hlllullullsYhsashsCcsKh....pCNElHsYluhlPIluFllLRNIoGhLRSpYSoaFAWFG+ISLELFIsQYHIWLAADT+GlLVLIPGsPsL..........NllloTaIFVClSHEVuplTspLsphhVPpDtppsh+phh..hshFsus ..................................................................................................h..............................................................................................................................t.hh.uhsphuhlhhYaYhCD...................RsshahcppK.Ysc......Fh.h....h.h.h..lh....s...h.u.h............h.h.c..............p..........p..................p......p...............t..t..........h........LNRcQT-EWKGWMQ..........llhLhYHhh...uAsp.....lY.tIRl.hlAuYlahTGaGpFoaaah+.tD...Fu....................lhR..........hsphhaRLNFhsshhCl.shspsY.hYYhsPhhohah....hhlY.s.slulhsph...sptt................hhhhKlhshhhh........lhhl.h....s.........hF-hl....a......h....phhh........t.......h....p.......s.........p.........lpE.....WhFR.tLDRa.hhhhGMlhAh.........ha.t.......h.....p..c...h.................c..t.........tt................ht......................t....h...........hhhhu.l.hs...h....h.h.Yhh.a.t.........h...p.sKh....phNp.hHPah.ShlPIhs.........alhlRNhst.hRsh...shFA...WhG+loLEhal...QaHIW.....L.............t......u......s..s......+h..lLsl........l.P..s...shl.................Nhhlsohlalhluacl.phTs.Lt.hhls.p.p...................hhhh......................... 0 79 106 146 +7611 PF07780 Spb1_C Spb1 C-terminal domain Fenech M, Bateman A anon Pfam-B_5001 (release 14.0) Domain This presumed domain is found at the C-terminus of a family of FtsJ-like methyltransferases. Members of this family are involved in 60S ribosomal biogenesis, for example Swiss:P25582 [1]. 25.00 25.00 27.40 25.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.54 0.70 -4.85 35 314 2009-01-15 18:05:59 2004-08-17 10:07:12 7 9 273 0 235 323 2 238.50 34 30.26 CHANGED pssscc-s-hE.................h..ttt.tpttc...................................chh...........................sAEshuLuppls.sKKo+cDLID-uaNRauFpD.....cp...sLPcWFl-DEp+HsK.phPlTKEtstthKp+h+tlNARPIKKVtEAKuRKKhRuh+RLEKh+KKAssls-ss-hoE+-KucpIp+LhcKss+ppp+ppp......plVVA++s........spshsuRPpGs+G+aKhVDsRhKKDhRAhKRht...K....K ....................................................................................................................................................................................ttptpps.......t.-.................tttt.t.p.t...t.ptttt...tt...c..........................................phchh.......................................................ssEuhALuppls.ucKs+c.DllD.suaNR.YsF.pD..c-........sLP-WFl-DEp+H...p+.phP.lTKctstth+c+h+tlNARPIKKVtEAKARKKh+shp+LEKh+KKuphls-sschoE+-Kspplp+lh+K.As.pp.cp+pth......phVVA+ts........spt.hstRPpG..l+.G..+aKhVD..sRMKKDhRAt++ht+.t.............................. 0 87 135 197 +7612 PF07781 Reovirus_Mu2 Reovirus minor core protein Mu-2 Moxon SJ anon Pfam-B_9308 (release 14.0) Family This family represents the Reovirus core protein Mu-2. Mu-2 is a microtubule associated protein and is thought to play a key role in the formation and structural organisation of reovirus inclusion bodies [1]. 25.00 25.00 280.80 280.50 16.90 16.50 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.18 0.70 -13.19 0.70 -6.56 5 53 2009-01-15 18:05:59 2004-08-17 10:08:56 6 1 24 0 0 56 0 726.00 44 99.09 CHANGED MAYIAV.PsltVuSRsTsLlsoIDuhsscsst-tpDVushDPoalLRQLEhhSuGhosuDllcALlH+cWh+poshsLLPs++pLL-YLLSNPSAsPDslDRopLKuhhhpK+hssD.........F+IpDaauPLIosSTuluThoRpLNAuplVYosTsKVlGARL+LaAPAKYYuuoLSsppppuIlPSoccltssP+uRlsVosFPSloTs+CFVLouVDt.ssPsh..sVcaIQLhYpRshuV+ARhLsD..PlAluchlscpuLKsphssPoDA..RAARLstlRsputosPsGlNsStlsVVDLphph.ssscuLtsVsRsh+LTlHuVPSCLLphL-ITluDc.YPIRpEoGMFssWFLlLTLhSDclTDuRT+puVhLpPuSsuscslsaVplsusVSsRspSVtussusalcuVGLsLPKGSFKSTMI+sLsuLpIsGspVM.ussVlDSD-VGDSLcPTFETA...lY-tLtuLDP..lDDllKlALuTDLlsp-ssloplasoFLcLssELLTPtARchYspposEGRoLTFAHADSEhLNANasu+LlRstIPYacEVNILlRPNRVGGsLFQVlLSYCYKMaATSssosPhGtLLKpLFsPWLcussLlusLsPscoSAsluWaIPuchhsssGWChC-D.taloas.IRuhPL-lSVLctacWuRa+AsIlV-co.....LVplGuDhRssthuVhassatPsVcLlophAsFTLss+YclpL...........sCusooGRsasAcNs+LsloosG ..........................MAYlAl.Psl.VsSRposhlshl-uhs.lcsth-h.sDVthhD.shlLcQl-hhssGhpssDlhcuLlH+sWh+pSlhsLLPs+ppLLEYhhSNP.S.uhPDslDRphLKth.......Kch.pD.........F+lpDaauPLIosoTShhThsphLNst.IVYoTTc+VlGARlpLauPtKYYsho..uhhpphsIl..scclhsVPpuRhhVGsFPShuTspC.VLouh-h.stPsh....hchIpLhYp+.hp.VpAphLsD..P.lsuhhlsp+pl+st..sPs-t..RAAR.htlphp..stsssptlssthlpV.VDlhhph.sstcuLhsspR.h+LThhuVPsClLphLslp.luDt.hsIRpEsGMFs.WFLlLThhSDtlpDsRsp.tlhlpPSSssspsl.plplTuhlst+s.slhsshhsh.lcsIGlshPKGSFKSThh+hLsulpIsGsp.lM.pssVlDSD-VGDsl-PTFEpA...lYcult.ulDs...-DlhKhshsoDLlsp-.hh.uplassFLtLsp-LLsPhARchYspphsEuRsLTFAHADSEhLNAsasu+LhRshIsYhpEhNlllRpsRlGGsLFQllLShCYKMaATSssspPhu.hL+tLhsPWLcst.LluphsPspoSt.luWaIPsphhhpsGWCsC-c..talsas.IRu..hslp.LphhsWupa+Apl.Vssp.....LV.lGtshRssthuVhhssphP.VclhsphAhFT.ht+YchpL...........pCspusGRs.tAhNh+Lhhpo.................................................................. 0 0 0 0 +7613 PF07782 DC_STAMP DC-STAMP-like protein Fenech M anon Pfam-B_9122 (release 14.0) Family This is a family of sequences which are similar to a region of the dendritic cell-specific transmembrane protein (DC-STAMP, Swiss:Q9H295). This is thought to be a novel receptor protein that shares no identity with other multimembrane-spanning proteins [1]. It is thought to have seven putative transmembrane regions [1], two of which are found in the region featured in this family. DC-STAMP is also described as having potential N-linked glycosylation sites and a potential phosphorylation site for PKC [1], but these are not conserved throughout the family. 25.00 25.00 26.90 26.90 24.50 23.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.94 0.71 -4.68 19 281 2009-01-15 18:05:59 2004-08-17 10:10:02 8 6 81 0 182 260 0 173.30 25 29.66 CHANGED F-NhYlTctFhph-pcccchsttslLPLpppEcpphlplsphphotp..Ehhplhhphh.lhlphlthshhlhlDahlapllshlpp..........h...phps.ttlplpl.sGsuhhuclh............cphhpsFpshp...phshphss..pcC.lPpPph.shtshlhlslLhLhshhllhlpsYshRlR+lIsu.FYPpcE+cRlhaLa ......................................a-NlYITp.FhphDtpcpphtp.s.lLPLpptE..p.p..phl..spht.l.stp..Eh.hp.h.hhthh.hhhhhhhhhlhshlDahlahllphhpp.............................................php.s..hlplpl...tGtuhhuplh.............+phh.t.shsh.p...phs.hphss...pC.LspP..p..stpta.hhl..sllhhlhhhlslhpsashRLR+.llsu.a....aPppEc.cRlhaLa............................................................... 0 45 59 112 +7615 PF07784 DUF1622 Protein of unknown function (DUF1622) Fenech M anon Pfam-B_9062 (release 14.0) Family This is a family of 14 highly conserved sequences, from hypothetical proteins expressed by both bacterial and archaeal species. 19.90 19.90 20.50 20.50 18.60 19.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.37 0.72 -4.33 51 422 2009-01-15 18:05:59 2004-08-17 10:21:47 6 2 358 0 127 289 25 80.30 35 64.72 CHANGED Gslhuhh.......phlttthtpp.......stsa.....p.plRhplGphllLGLEFhlAADIlpTsl.sPohp-lhhLusIllIRThLuYFLs+El ..........................Ghhhuhhphlp..hppp.......spth.....p.tlRthLGsalLLuLEhLluADIlcTll..pP.Thp-lhhLuslllIRTlLSaFLppEI...... 0 44 91 112 +7616 PF07785 DUF1623 Protein of unknown function (DUF1623) Fenech M anon Pfam-B_9100 (release 14.0) Family The members of this family are all derived from relatively short hypothetical proteins thought to be expressed by various Nucleopolyhedroviruses. 25.00 25.00 61.90 61.60 23.70 23.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.17 0.72 -4.03 13 31 2009-01-15 18:05:59 2004-08-17 10:24:15 6 1 30 0 0 26 0 87.60 39 88.90 CHANGED LoAaVLhVsNs.sh...p.p.hYhthLc+asVlDulMCs.NGDCLAVslossshhsp...............p.ppLcll-...pas.psl-hLh-KIYsIV-hYN .LhsaVlhlsNs..ch...p.p.IY.pYLp+asVlDulMCs.NGDCLAVsVossshlpp.....................psLcll-...pts.psl-hLp-KlasIlEhYs 0 0 0 0 +7617 PF07786 DUF1624 Protein of unknown function (DUF1624) Fenech M anon Pfam-B_9180 (release 14.0) Family These sequences are found in hypothetical proteins of unknown function expressed by bacterial and archaeal species. The region in question is approximately 230 residues long. 20.60 6.20 20.60 6.20 20.50 6.10 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.62 0.70 -5.01 49 1276 2012-10-02 17:00:17 2004-08-17 10:26:21 7 9 944 0 477 1471 482 201.10 17 57.55 CHANGED RhhtlDhhRGlAllhMlhaHFsaDLpaFGhhshsht.sshathhup.hlAshFlhluGlSLsluhspshph..........ppah+RhhplhuhAhlIossTa....l..hhP.pual.hFG..................ILHhIu....luslluhhF..h+hshhshhhhuhhhlhh........shhhtt...................hhsss.....hLhWlG....lhs...tsh..ho.DYhPlhPWhGlhLhGlshuphhh.p...tpht...hst.hhhtsLsh.l..GR+SLhl..YLlHQP ...........................................................................RlhslDhhRGls.llhMll......s...p....h......h.....h.................h..........t......................h..h.....h...............h.........................t.....h............................................h.......h....h..h....h............sp....h...........s....h....sh..Fl....hls..G..h..ohh..l...h..ht.p.t.th................................................tth.h..p....R..s..l.....h...l.....h...h...h..u..h..h.....l.s.h.h.sh.................h......h.hs......ph....l....hh...s...................................................................................................l.Lpt.l..u....l.s....h..l...l.s.h....h....h............hp.h......h....h....h....h.h..h.h....u..h.h.h..hh.h..........................hh.....................................................................hh.t.................h.......hh........h.......................................s..............h........sh.h.......s...hh.....sh.hhhGhhhu..h.h................................................................................................................h..h.................................................................................................................................................................................................................................. 0 161 323 402 +7618 PF07787 DUF1625 Protein of unknown function (DUF1625) Fenech M anon Pfam-B_8907 (release 14.0) Family Sequences making up this family are derived from hypothetical proteins expressed by both prokaryotic and eukaryotic species. The region in question is approximately 250 residues long. 20.30 20.30 20.30 21.00 20.00 19.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.49 0.70 -5.13 22 173 2009-09-11 05:28:20 2004-08-17 10:32:38 7 4 130 0 114 183 5 235.90 27 61.61 CHANGED MYQWVEppppcp.pp.ss..p....oYpYsppWpsphlsSppF.t.pGHpNPss.h.lpupshhs.pV+lGsahLupslhpplss.hcslshss................pssttphphpsshhYhsps.stpPpl.....................GDlRlpFphs............ss.sollucQpss......plhPapopss............................................ppl..hlh.GphoscclFp...pttppsshhTWhhR..hsGalLhFlG..hhhhhs.lthLsshlPllts.............lsshshhlhu..hhluhslsLhsIAhuWlhYRP ...........................................................................MYQWlEppppcphpp.stt.p...........ppYpYsp-Wpschls..SppF..ppthGHp............NPs.....t..hslcSh..shhAstVplGt.ahLusslhcchss..hptls.h..sp...........................................ps.pssl.ph....psshhYaups...stpPpl.............................................................GDlRlpFphu...............ss.ph.soVl..u..cQ.p..us........plhP.ap..opsG.....................................................pplh.ll..t..Gp...h..oscchFp...ppp...pssshhTWh.hR...hhGahhhFh.G..hthhh....ph....lhhlh...s...hhPh.hts................lsshshhh....hs...hhhuhslslhsluhuWlhaRP............................................................................................................................ 0 49 66 89 +7619 PF07788 DUF1626 Protein of unknown function (DUF1626) Fenech M, Sammut SJ anon Pfam-B_9705 (release 14.0) Family This is a family consisting of sequences from hypothetical proteins of unknown function expressed by certain species of archaebacteria. One member (Swiss:Q9YCN7) is thought to be similar to tropomyosin [1]. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.14 0.72 -4.19 18 128 2012-10-11 20:44:44 2004-08-17 10:41:17 6 6 56 0 61 129 12 70.70 35 27.66 CHANGED ScVElDlllKDGplIllEIpSSlpRGDlhhlcRKs-LYE+scuhKhs+llllTPaI--R....s+thAcclGIc ....pplElDl.ll+sGhhlllElKSulc+uDl..h....htRKschYE+.hps++ss+l.llloPhlcc+....uhthAccLGI-.............. 0 26 36 45 +7620 PF07789 DUF1627 Protein of unknown function (DUF1627) Fenech M anon Pfam-B_9837 (release 14.0) Family This is a group of sequences found in hypothetical proteins predicted to be expressed in a number of bacterial species. The region in question is approximately 150 amino acid residues long. 25.00 25.00 27.80 27.10 24.10 23.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -4.68 4 472 2009-01-15 18:05:59 2004-08-17 12:03:29 7 1 171 0 4 267 0 143.60 65 64.12 CHANGED IEQ+GPQTADELAshFGsTSRKVASTLAMAISKGRLIRVNQsGKFRYCIPGsNLPAEPKAASVocsDGKAFPQPsGsALPVtEAATQE-IKTEoVAslVQs.PSFTcppsDtLlhPSL+hANhtLRRAKspVQKWERVCAALRELNKpRDIlRpI ...........................................ItQcGspTs-ELAshFGlo..oRKVASoLAhshupGRLhRVNQsGKFRYCh...P...G...ssLPAEP.KAAsVs..EoDGKAFPQPA.GsALPVpEAAT.QE-IKTEoVA-hVQshPSFT..cs.ps..DsLILPSLHhANRELRRAKupVQKWERVCAALRELNKHRDIlRpI....................................... 0 1 1 2 +7621 PF07790 DUF1628 Protein of unknown function (DUF1628) Fenech M anon Pfam-B_5107 (release 14.0) Family The sequences making up this family are derived from hypothetical proteins of unknown function expressed by various archaeal species. The region in question is approximately 160 residues long. 24.20 24.20 24.30 24.60 23.90 24.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.96 0.72 -3.23 48 295 2009-01-15 18:05:59 2004-08-17 13:02:07 6 14 62 0 237 307 97 81.90 25 31.71 CHANGED cAVSPVIGVlLMlulTVIlAAlluuFlhuh....ssspssPp.ssl...........p.p...hssssst...............lplpHpGG-slss...pslplh...lsssss .........puVSPVlGVlLM...lulTVIl.AAllus.as.huh.....sss.sppsPp..ssl.......................................php.........hssssst....................ltlpa.tG.G.-.s.lsh...pplplhh.....s.................................................................................... 0 40 200 223 +7622 PF07791 DUF1629 Protein of unknown function (DUF1629) Fenech M anon Pfam-B_9012 (release 14.0) Family This family consists of sequences from hypothetical proteins thought to be expressed by two members of the Xanthomonas genus. The region in question is 125 amino acid residues long. 22.10 22.10 23.50 24.80 21.40 20.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.58 0.71 -4.24 13 56 2009-01-15 18:05:59 2004-08-17 13:46:31 6 1 29 0 16 57 1 119.60 48 57.33 CHANGED ctGEFahLcsDhcusGsspGVVFENc+pLLoPPRLILRPc-GGFPsLREpP+LsY-PspGs.PcDLEuGFSGYWLVSERL+pVhtuVDP-AFAFA-sDaRLADGopGPRaaLCDVVRpLDALDE .....................................GpFahlpssh.tst.spGV.FpN.cpLlsPsRlILpP.c.pGGFP..sL+EpP+LlYcPscGs.P.cDLEsGFSGYWLVSERL+pVhpsVDPcAFAFA-sDaRLAD.G.o.tGPcaaLCDVVRpl.DALDE. 0 2 5 8 +7623 PF07792 Afi1 DUF1630; Docking domain of Afi1 for Arf3 in vesicle trafficking Fenech M anon Pfam-B_9160 (release 14.0) Domain This domain occurs at the N-terminal of Afi1, an Arf3p-interacting protein, is a protein necessary for vesicle trafficking in yeast. This domain is the interacting region of the protein which binds to Arf3, the highly conserved small GTPases (ADP-ribosylation factors). Afi1 is distributed asymmetrically at the plasma membrane and is required for polarized distribution of Arf3 but not of an Arf3 guanine nucleotide-exchange factor, Yel1p. However, Afi1 is not required for targeting of Arf3 or Yel1p to the plasma membrane. Afi1 functions as an Arf3 polarization-specific adapter and participates in development of polarity. Although Arf3 is the homologue of human Arf6 it does not function in the same way, not being necessary for endocytosis or for mating factor receptor internalization. In the S phase, however, it is concentrated at the plasma membrane of the emerging bud. Because of its polarized localisation and its critical function in the normal budding pattern of yeast, Arf3 is probably a regulator of vesicle trafficking, which is important for polarized growth. 25.50 25.50 26.00 25.50 24.90 25.20 hmmbuild --amino -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.76 0.71 -4.18 31 156 2012-10-02 14:18:06 2004-08-17 15:51:17 7 6 144 0 117 210 0 149.80 36 19.68 CHANGED pV-YILlApFDlD+GPlhcHQYPssl.sG..............sEphLAcLMLPDpsHsR.sp...DaTlFhLacss.spt.p...........ttt.......................pt.t......................................p.....tt....s.hahlsllpsptDpshcRGuhlKuhAlsTphshlaha+..sllhhsL- ............................................pVEYILlApFDlD+GPhhcHQYPssIsG...................................DEphLAE.......LMLPDpsHhRsp.....DWTlFa.La+sssspptp.............................t............................................p..................................................................................................sppptpsscs.....sshhYllNlVs.sppDpos+RGAhlKuhAIsTphsahplaKPlLllsL............................................................................................................... 0 42 72 103 +7624 PF07793 DUF1631 Protein of unknown function (DUF1631) Fenech M anon Pfam-B_9170 (release 14.0) Family The members of this family are sequences derived from a group of hypothetical proteins expressed by certain bacterial species. The region concerned is approximately 440 amino acid residues in length. 19.70 19.70 19.70 19.80 17.50 19.30 hmmbuild -o /dev/null HMM SEED 730 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.13 0.70 -6.40 36 372 2009-01-15 18:05:59 2004-08-18 12:44:36 6 9 187 0 125 418 96 617.90 21 89.36 CHANGED ssptpsst.....sphPslLhpl+-pstppLtphLpshF-ssDDsLF-hA-+Atss.p-QshaF-AMR-LRh+RcslppsFhppltpuFss.Lspspssss..shs...hs.ssLoLVppc-LEcplAlcsMls+spschstsLtpLstRLshLh.sshplppcsNPLuPptLCpsFhcAs.psLslsl+s+LlllKLFERhVlsplsplYscANplLlpsGVLPcLp.s....................................hppspsss.........sssptsssssssssspsuss.ttttt.......................................pshFs....tLppLLtpsRssttsst.ssssss............................tslssp-LhphLupLQ...........t.ssshsssp.......hslpp.lppLLpphp..spsGpppslupsD-DlINLVuMLF-aIL-DcsLsssl+ALIuRLQIPlLKVAlhD+sFFs+ssHPAR+LLNplApAuhGWsspss.htcDsLhp+lpplVpRlLs-FscDsulFs-Lhp-FptFhpp-cRRs-llEpRsp-AEcG+s+tctA+pplpptLpp+ltsps.LPpsVhplLppuWocVLhLshL+cGcpS.cWppulplhDcLlaslp.pp.pspsppplhphlPsLLcsLRpGLppl.uacshpssphhppLcphthpshps........................................................................................t.st.stpssstspss-clsssttppsttttt....................stppphlpplcpLclGoWlEhtccc.tchhRsKLuuhl.csos+alFVNRpGhKVtEtotpsLAhthppGpl+lL..Dcu.hLFDRAL-uVlssLR ..................................................................................................s..................h...t.hht.hh..ht.hh.thhtth........tpt....Lhp.upputss...pt.h...hts....h....ptl.c.....pt....tth..t.thhtth.pthtt....h.t.t....t................t.ttL......sLltpppl-pplhlpthstth.pp.t..L.t.Lp.tRlshlh.......thttpp.PhtPthlsthhhpsh.tth.s.lt.p..sphhlhchhcp.lhtt.hsthYtphsphLhttGl.hP.t.h...................................................................................t....t.t..................t...t.....t.......s..............................................................................................t.hht.....tlpthht....t...htt............................................................................h.s.tttlhp.h.Ls.tlQ...............s...tt....................h.tt..htp.llpt..........tp.t..tthls..-pss.....ls...........lluhlFp.lhp-tplsssh+shlu+LplPh.l.+.l.AlhD.pF.FsptpHPARcLLsplupsshuhss.pss...t.p...ptLhtc..lpphVpp...lhp.pap.p.s..slFt.p...h.t-ht.t.a...h....p..........p....t.tpc...sph.hpp+hhc.stc.upt+hph....Ac.....p....tstttlpp.....hh.t.st.........s....lPph...l.phLppsWtcllhh.sh.l.+..p....G......tpu....thpt..h............h..t.hhspLlhshp.tt.......tttt.tl.t.....h.stLhpt..lppultph...u.hs..tttthhppLpp..hhhtshtt.....................................................................................................................................................................................................................................................t.........t...hh.pp.hs....ttt..................................................tpthh..t.h.cplthGsWl.-h.....ppp..tphhps+Lshhs...ssphlF...ls.ppG..h+h.h.h...shttLAh.hpt..Gthphh....ppt...lh-cAhpssltt............................................................................................................ 1 33 80 108 +7625 PF07794 DUF1633 Protein of unknown function (DUF1633) Fenech M anon Pfam-B_9750 (release 14.0) Family This family contains sequences derived from a group of hypothetical proteins expressed by Arabidopsis thaliana. These sequences are highly similar and the region concerned is about 100 residues long. 20.40 20.40 34.20 29.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 790 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.40 0.70 -6.58 2 28 2009-01-15 18:05:59 2004-08-18 14:02:52 6 5 2 0 0 28 1 198.90 20 84.20 CHANGED MpRLFSAPHLTsQIRRGEEISsISKIVVMMA.LNNFP.LH.hSRQLTpLs............................................SDh-DSQNKSNHSYGDssSSS..Rst.......+RVITLGGMGPIRpPSTRsDKPKKKKAsKQtGuhPMTNsDL.................Vs.cRtstVslpl+.hLpc.................P.L+L.pNVS......sG..shYsshhtcptlhhPhPAu..........Rs+-V...............sLSthcLt..hLLhpG......................................pschuTFsRDR.VsALslsPAsPsIs.PsAQ.DPlEEVPpl..hPpA.h.h...ls.sSsSTSp........Cso..........pARAsDLSAsVsAARsoLAsSpspASsSHPSLPtsN.......................................susstAAVul...................................uAsN+.......................LlthhctR.sQVP..............oupphp.GcphhRc.sspltssp-REVp.tAc.....h.RLKhELSTSKDLEKGYAEKIthME.EFtGLpADKQhARsQIHRLpp++-ELSK+VhDLTS.AQGspKAVHDAKVELAAuY.KLLuGIK-KWVsKKEaTVLEuQAAEVESNLALIDQIsKAAlDLTlEKPRhQAElDDlEARCp.KEVSDFTLSKLDlPcVSEh.VVRPhsVDEQGTPIGLDEFGSNKDsFPtGLt-ssGTVFAhPAGtS+E ......................................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +7626 PF07795 DUF1635 Protein of unknown function (DUF1635) Fenech M anon Pfam-B_9707 (release 14.0) Family The members of this family include sequences that are parts of hypothetical proteins expressed by plant species. The region in question is about 170 amino acids long. 21.40 21.40 22.20 22.20 20.40 20.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.27 0.70 -11.66 0.70 -4.96 5 89 2009-01-15 18:05:59 2004-08-18 14:04:57 6 7 23 0 64 90 0 190.00 30 60.51 CHANGED cThEELRQoLlYTThELEQTKMhAsEElRp+DEQlhpLcDLLsKTlKERDEApE+hp+LLhcs..L.Qpp......................pDEQlpPNp.pLspsNSFSSSDsE...ESIsSSp.psh................-PsspppLcclsss-hLh..ll.-KsLPEKGKLLQAVlKAGPLLQTLLLAGPLPQWRHPPP.LcS..FEIPPVo.....ls.tCP.hossGCG.NFN+KRVa.luDtShsETKYQR.LLc ........................................................h-EL+ppLhhsohEL-thp..Ap-Eh++pcpplhpLhcLLptshpERDEApcphppLh..htt..h.t.t.................................................pspp.......tp....pp...u..hs..s.Sssp.....pshsSs....h........................................t..st.t.......t............t...phh....hs..t+sLPpKG+LLpAVhcAGPLLQTLLlAG...........PLPpWRpPPP.hp..s..hplPsh......................................................................................................t................................................... 0 12 47 56 +7627 PF07796 DUF1638 Protein of unknown function (DUF1638) Fenech M anon Pfam-B_6091 (release 14.0) & Pfam-B_3149 (release 23.0) Family This family contains sequences covering an approximately 270 amino acid stretch of a group of hypothetical proteins.\ These proteins are expressed by archaeal species of the Methanosarcina genus. 20.40 20.40 22.10 21.40 19.50 18.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.06 0.71 -4.61 55 232 2009-09-11 07:42:39 2004-08-18 14:25:11 6 8 168 0 105 231 180 166.10 23 67.84 CHANGED phLsu.tLHstP-clpstlppslpchp...........sth-pIhlsYus..CGsu.....Gtlppph..schG.........lhh........htuscChuhhh...Gssphttch....schssFYLTshhscpa-s..............hhhcshGhDcpspLc....phhaup.Yp+llals..........psc..............-.thppcscchAsplGLsapch.sshGsLptslpsh .............................hLsstLHsp.P..-clpptlpptlcphp............th-p...IllsYGt..CGsu.....Ghlspthp..t.........lhh............psscChshhh...Gspphttp......pchssaa..LTs....hhhcpacs...................hhhcthGh-ct.splh.....chhaspYpcllhls........ppc......................-..htppscchAchhslshphh.sshs.Lpphlt................................................................................................. 1 44 83 89 +7628 PF07797 DUF1639 Protein of unknown function (DUF1639) Fenech M anon Pfam-B_6036 (release 14.0) Family This approximately 50 residue region is found in a number of sequences derived from hypothetical plant proteins. This region features a highly basic 5 amino-acid stretch towards its centre. 22.20 22.20 22.20 23.30 20.90 21.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.55 0.72 -4.57 20 205 2009-01-15 18:05:59 2004-08-18 14:57:55 9 6 19 0 117 191 0 49.90 50 19.35 CHANGED LS+cEIEEDFhAMpGs+PPRRPKKRsKsVQKpLD...............hl.PGhhL..sclos-pY .LSp+EhEEDFhAhpG.sK.PpRPKKRsKhVQ+pLs........................hlhPGhWL..s-lot-pY................................... 0 14 66 95 +7629 PF07798 DUF1640 Protein of unknown function (DUF1640) Fenech M anon Pfam-B_6194 (release 14.0) Family This family consists of sequences derived from hypothetical eukaryotic proteins. A region approximately 100 residues in length is featured. 24.20 24.20 24.20 24.40 24.10 24.10 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.04 0.71 -4.23 24 513 2009-09-11 19:38:48 2004-08-18 16:04:56 6 13 255 0 324 490 8 162.70 26 59.06 CHANGED hFDTpthVp.pL.Ecs...............GFsppQAEslspslscllpsulpplspshVo+tphpp.........sthpQcschucl+s-lh.......................p...............h-+oEFsslpsEpE+l+sDLc+L+s+L+-Els+spAuh+LDLNLEKu............RlR-Ehupp-hK..................lp-hss+IDpElusL+splEosKhpslpaLhGsssushAlhLuahRlhh ..................................................FDThthVp.pL.c.pt.....................................G.FsppQApslhpslpp...llpss.....l.pt...l.....t.p.s.....h.ls.+.tc.hcp......................tthp.psshuc..l+s-l.........................................................................ccsch...sth+sppc.+lpt-lpplp....p...cLpp.E.l.sc.hps...sh+.LDhNhc+u..................cl+-..stp-hc...........................l.p-hpp+I....-p........-.l.s.s....l+tt....lE....s......hKhp....sh.p.a.h....h..u.h.hs.hhslhhuhhh.......................................... 0 78 163 247 +7630 PF07799 DUF1643 Protein of unknown function (DUF1643) Fenech M anon Pfam-B_9851 (release 14.0) Family The members of this family are all sequences found within hypothetical proteins expressed by various bacterial species. The region concerned is approximately 150 residues long. 20.20 20.20 20.40 20.50 19.80 20.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.87 0.71 -4.47 57 556 2009-01-15 18:05:59 2004-08-18 16:06:40 7 5 475 0 91 385 369 133.70 33 76.28 CHANGED RYhLpRpW.........s..tsttpllFlhLNPSpAsttpsDPTlcRh.phA.+sh.GaGuhhlsNlFAhRuTsPpsLpp............ssDPlG..sc.NDthlhchsp.hu....spllsAW.GspGt....hhsRs....ppVhchLpst.............lhpLGlo+sGp.PcHPL ...................RYhLp+pW.........s....tpc.sshhIsh.P...shs.....s....s.....hp..DhTsth.lhsh.............h.....ps.....................saG...ulhllNLFu.hp.T.Pc.sLc.c...............hp.c.Phs.....pc..sDhplhc.sls..cu.............-pVlhAW.Gs....huc.........hhpRs......ppV.h.chLcsp...................tttlhp.l.h........s.tssc..hHPL....................................................... 0 21 51 76 +7631 PF07800 DUF1644 Protein of unknown function (DUF1644) Fenech M anon Pfam-B_5078 (release 14.0) Family This family consists of sequences found in a number of hypothetical plant proteins of unknown function. The region of interest contains nine highly conserved cysteine residues and is approximately 160 amino acids in length, and is probably a zinc-binding domain. 25.00 25.00 26.00 31.70 24.50 23.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.89 0.71 -11.18 0.71 -4.38 18 198 2009-01-15 18:05:59 2004-08-18 16:09:58 7 4 23 0 117 186 0 146.10 44 52.08 CHANGED -DspCPlChEaPHNAVLLhCSSacKGCRPYMCsTStRHSNCL-QF++uhsctts.sp......................................................................................t.pppc..cLsCPLCRGcVpGWpVVcp.ARpaLNpK+RoC.p-sCsFsGoYp-L+KHs+pcHPsu+Ps-lDPscppcWcpLEpcp-htDllS ................................................cspCslCh-hPHNAVLLhCSSacKGCRPahCsTshp+SNCL-..pa+puhsp.t..........................................................................................................................pppp..pLtCPLCRGpVpGWhll.c..ARpaLNtKpRsC.p-sCsFsGsYpEL+KHs+pcHPpu+PpclDPs+ptcWcphEppp-htDllS................................................... 0 18 64 91 +7632 PF07801 DUF1647 Protein of unknown function (DUF1647) Fenech M, Pollington J anon Pfam-B_5249 (release 14.0) Family The sequences making up this family are all derived from hypothetical proteins expressed by C. elegans. The region in question is approximately 160 amino acids long. The GO annotation for this protein indicates the protein to be involved in nematode larval development and to have a positive regulation on growth rate. 20.80 20.80 20.80 21.60 20.70 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.66 0.71 -4.69 15 96 2009-01-15 18:05:59 2004-08-18 16:22:08 6 6 28 0 90 84 29 119.60 29 31.81 CHANGED CtChSspoGKoYsFCYpsPpNssSIGKKFsCuhLsTLEcLsLlspss.phlsLssshcNpsslVFVSATS-DHhs.uhpShpSlR+aYPppKaILYuLsLocs.IppLscp.pNlEhRtFNTotYPcYVsNWhcY+FKPLllA ..........................................................Y...ps.t.hGtp..FsC.hlthh-pht..............lh...........t..t....h.ph....pp..p.spppls.hVSssSssHhtthhp.hpslppahPsp+hllYuLslspt.lpp..lt..p................p.sshchRpFshotYPpaVp...shhpYpaKslllA........................................... 1 34 41 88 +7633 PF07802 GCK GCK domain Fenech M anon Pfam-B_8992 (release 14.0) Domain This domain is found in proteins carrying other domains known to be involved in intracellular signalling pathways (such as Pfam:PF00071) indicating that it might also be involved in these pathways. It has 4 highly conserved cysteine residues, suggesting that it can bind zinc ions. Moreover, it is found repeated in some members of this family (such as Swiss:Q9LMF3); this may indicate that these domains are able to interact with one another, raising the possibility that this domain mediates heterodimerisation. 23.40 23.40 23.40 23.40 22.10 23.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.78 0.72 -3.85 8 81 2009-01-15 18:05:59 2004-08-18 16:43:26 6 6 28 0 64 77 3 69.50 36 36.18 CHANGED GECtFCpFMKGGuCKEuFlAW.EcCs.-tAccsccpDhVT+CtElpuphK+CMcsHuDYYpPlLAuEKsucs+hcKEL ................-ptashFMKuGuCK-sFhAW.-cCs.pts...pt.ptp..shhpcCt-shstLc+CMc.AHuDY..YpPlLsspcshtpph.tt........................ 0 16 26 34 +7634 PF07803 GSG-1 GSG1-like protein Fenech M anon Pfam-B_9727 (release 14.0) Family This family contains sequences bearing similarity to a region of GSG1 (Swiss:Q9Z1H7), a protein specifically expressed in testicular germ cells [1]. It is possible that overexpression of the human homolog may be involved in tumourigenesis of human testicular germ cell tumours [1]. The region in question has four highly-conserved cysteine residues. 19.50 19.50 19.90 19.90 19.40 19.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.67 0.71 -4.50 8 150 2012-10-03 00:20:40 2004-08-18 16:56:19 6 3 42 0 77 136 0 118.40 46 41.34 CHANGED +psR+sRuLLSlsLNhLALhFSsoAhlToYWCpGTQKVPKPhC.ot...s+ppNChshssssss...................sP...............ssVpYsWETGDDRFlFRpFHTGlWhSCEEslassuc+CR........SFI...cLuPsSp+G ..........ptpRshLolhLshLALshSsoAllooYWC.GTQKVPKPLC..up...sttspChchsss.su...t.ss................sp..........................psVpYsWETGDDRFhFRtFHoGhWhSCEEsl..c..p.sG..E+CR........SFl...-LsPstcp.u...................................... 0 3 12 41 +7635 PF07804 HipA_C HipA-like C-terminal domain Fenech M anon Pfam-B_8632 (release 14.0) Domain The members of this family are similar to a region close to the C-terminus of the HipA protein expressed by various bacterial species (for example Swiss:P23874). This protein is known to be involved in high-frequency persistence to the lethal effects of inhibition of either DNA or peptidoglycan synthesis [1]. When expressed alone, it is toxic to bacterial cells [1], but it is usually tightly associated with HipB [2], and the HipA-HipB complex may be involved in autoregulation of the hip operon. The hip proteins may be involved in cell division control and may interact with cell division genes or their products [2]. 24.00 24.00 24.00 24.10 23.80 23.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.01 0.72 -3.86 175 2174 2009-09-13 11:39:15 2004-08-18 17:00:02 7 16 1315 18 624 1916 261 82.00 26 20.24 CHANGED Rlthpshsslhshsstp......................suat.plhphl............pp..hst............ttshpchhcphlFNhlluNsDsHsKNauhlhs....ss.s...h.pLuPhYD ........................................................Rl..pshsph..hshsst......p..................suht.pl.hphl....................tt...hs.t.s..................htch.tp.h.h.c.t.h.lFshLluNsDsHsKNaShhhp.....ss.s..................a..cLuPhYD........... 0 159 359 505 +7636 PF07805 HipA_N HipA-like N-terminal domain Fenech M anon Pfam-B_8632 (release 14.0) Domain The members of this family are similar to a region close to the N-terminus of the HipA protein expressed by various bacterial species (for example Swiss:P23874). This protein is known to be involved in high-frequency persistence to the lethal effects of inhibition of either DNA or peptidoglycan synthesis [1]. When expressed alone, it is toxic to bacterial cells [1], but it is usually tightly associated with HipB [2], and the HipA-HipB complex may be involved in autoregulation of the hip operon. The hip proteins may be involved in cell division control and may interact with cell division genes or their products [2]. 21.00 21.00 21.30 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.68 0.72 -3.76 170 2004 2009-01-15 18:05:59 2004-08-18 17:04:41 7 15 1218 8 598 1762 241 83.10 28 20.04 CHANGED SluGspsKhslhh.............................ssssspaIlKhss.sp............h.shstsEhhshp.lAp.thGl.sss..pspl....hphss................ptshhlcRFDR ........................SluGsp.Khslhh.t...........................tth.h...s...s..s.s.sspaIlKhsh.sp.............s.htshspsEahshp.lAp...th..Gl...sss...pspl...hp.hss.................tpslhlcRFDR................... 1 152 346 484 +7637 PF07806 Nod_GRP Nodule-specific GRP repeat Fenech M anon Pfam-B_8942 (release 14.0) Repeat The region featured in this family is found repeated in a number of plant proteins, some of which are expressed specifically in nodules formed during symbiotic interactions with certain bacterial species [1]. Some of these proteins are also termed glycine-rich proteins (GRPs), due to the presence of a glycine-rich C-terminal region in their structures [1]. Bacterial infection is required for the induction of nodule-specific GRP genes, and it is thought that nodule-specific GRPs may play non-redundant roles required at specific stages of nodule development [1]. Members of this group of proteins may be cytosolic, whereas others are thought to be membrane-associated [2]. 25.00 25.00 49.20 35.60 18.50 18.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.09 0.72 -4.34 10 36 2009-01-15 18:05:59 2004-08-18 17:16:57 6 5 5 0 0 36 0 36.80 65 52.23 CHANGED Gss+ESKTKhGhDGWRDWGGSFWp.DscENNGGG-KEGG GVs+ESKTKlGhDGWRDWGGSFW-stcENNGGucKEGG. 0 0 0 0 +7638 PF07807 RED_C RED-like protein C-terminal region Fenech M anon Pfam-B_9789 (release 14.0) Family This family contains sequences that are similar to the C-terminal region of Red protein (Swiss:Q13123). This and related proteins are thought to be localised to the nucleus, and contain a RED repeat which consists of a number of RE and RD sequence elements [1]. The region in question has several conserved NLS sequences [1]. The function of Red protein is unknown, but efficient sequestration to nuclear bodies suggests that its expression may be tightly regulated or that the protein self-aggregates extremely efficiently [1]. 21.50 21.50 25.20 24.60 20.50 16.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.70 0.71 -3.91 7 152 2009-01-15 18:05:59 2004-08-18 17:18:46 6 11 110 0 95 143 0 113.50 59 20.48 CHANGED YuECYPGh.E..cthsDSD-EsDaSKMDhGsK.KGsltRWDF-TpEEYucYMpsKEALPKAAFQaGVKMp-.GRKTR+.pt.+s-KtcLDR-hp+IspIlp++K.htcDGu..........sssK+sKa .....................YAECYPuh...-.....D.hhsDSD-E...VDYS...KMD....G...N.......K..KGPLGRWDFDTpEEYS-YMssKEALPKAAFQ..................YGlKMu-.GR.K.T....R+....hp..E.......p.......N-KA.....ELDRpW+KIssIlpKRK..h-t.D..Gs..................p.K.......................... 0 30 44 71 +7639 PF07808 RED_N RED-like protein N-terminal region Fenech M anon Pfam-B_9780 (release 14.0) Family This family contains sequences that are similar to the N-terminal region of Red protein (Swiss:Q13123). This and related proteins contain a RED repeat which consists of a number of RE and RD sequence elements [1]. The region in question has several conserved NLS sequences and a putative trimeric coiled-coil region [1], suggesting that these proteins are expressed in the nucleus [1]. The function of Red protein is unknown, but efficient sequestration to nuclear bodies suggests that its expression may be tightly regulated of that the protein self-aggregates extremely efficiently [1]. 20.40 20.40 20.40 21.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.67 0.70 -5.15 8 299 2009-09-10 21:25:46 2004-08-18 17:21:04 8 16 226 0 212 283 4 188.50 30 37.54 CHANGED KKK+h.stLh+p-csc.cclspKYRDRA+ERRcGtNcDhcsssl.......ssa+AVssshtss.puu-pc+psIpESKFLGGDhEHTHLVKGLDauLLpKVRuElhsKpspE-E.......tsclststpt.h.tttp..ttcpt........p.pschpFpsplu+sI.............F+hh..hcp..............ppl.p.NEhFts.....GRMsYlh-L-sEh.-sDIPTTllRSKsDlPstctth.TlssNshlls+LuplhS.....ahRtuspspcsKKKcK ....................................................t..c.p..chsp..pYRDRA..+ERRcs.s....pDhp.p.sch.................tsh+Alt...s..h...p...h...s.....p.....s.t-.pc+.p.hIpc.SK.a..LGGDh-HTHL..VKGLDauLL..pKVR.......s....-........l.t....s.cppcc-c........................................................p.t..........................................p.pp..thtt...tp.l.............ht.....................................t.................h................................................................................................................................................................................................................................................................................................................. 0 74 115 170 +7640 PF07809 RTP801_C RTP801 C-terminal region Fenech M anon Pfam-B_5179 (release 14.0) Family The members of this family are sequences similar to the C-terminal region of RTP801, the protein product of a hypoxia-inducible factor 1 (HIF-1)- responsive gene [1]. Two members of this family expressed by Drosophila melanogaster, Scylla (Swiss:Q9NHN4) and Charybde (Swiss:Q9NHN5), are designated by the GenBank as Hox targets [1]. RTP801 is thought to be involved in various cellular processes [1]. Its overexpression caused the apoptosis- resistant phenotype in cycling cells, and apoptosis sensitivity in growth arrested cells [1]. Moreover, the protein product of the mouse homolog of RTP801 (dig2 (Swiss:Q9D3F7)) is thought to be induced by diverse apoptotic signals, and also by dexamethasone treatment [2]. 20.50 20.50 23.70 26.70 18.90 18.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.35 0.71 -4.43 9 137 2009-01-15 18:05:59 2004-08-19 09:24:11 6 1 74 2 82 122 0 115.20 45 52.81 CHANGED LppAKpppLtCoclLlPscLhsplupEll+lStpEPCGlRGshl.lphEs-..pss+plAplpsDPssVsTFELhLsL+.Dpc.sWsplhsh...........Fhs.....s.uhspolpl...SssF+llKpKLYS ..............LppAK.ps..pLtCoclLlPpcLspclAp-lLRLupsEPCGLRGsllclslEpp......p.sC+cl..up.......lss.....D...P.............slVPTF..ELoLVL+.Dsp.sWsplpsh...........Fhs.......s.uhppolhL...SsuFRllKKKLYS................ 0 14 19 48 +7641 PF07810 TMC TMC domain Fenech M anon Pfam-B_5063 (release 14.0) Domain These sequences are similar to a region conserved amongst various protein products of the transmembrane channel-like (TMC) gene family, such as Transmembrane channel-like protein 3 (Swiss:Q7TN63) and EVIN2 (Swiss:Q8IU68) - this region is termed the TMC domain [1]. Mutations in these genes are implicated in a number of human conditions, such as deafness and epidermodysplasia verruciformis [1]. TMC proteins are thought to have important cellular roles, and may be modifiers of ion channels or transporters [2]. 25.00 25.00 25.40 28.80 20.80 23.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.30 0.72 -3.64 26 598 2012-10-02 00:51:22 2004-08-19 09:39:48 8 5 96 0 316 501 1 108.80 41 14.30 CHANGED CWEThVGQEhY+LllhDFlhollsslhs-F.R+lhschh........hhhphhuhtEFsIspNVLcLlYuQTlsWhGsaFuPLLPslsslKLhllFYlKKhoLhtsspPsp+saRAS .......................................................CWEThVGQ.Ehh+LhlhDhlhol.hs.hLls-FhR.tlhl.chh.........sh.hph...uhsEFcIucNVLpLl....YsQshhWhGsFFuPhLPs...lssl+Lhlhh.Yl+ph..ulhpsshP..tp+sFRAS.............. 0 69 95 195 +7642 PF07811 TadE TadE-like protein Fenech M anon Pfam-B_9054 (release 14.0) Family The members of this family are similar to a region of the protein product of the bacterial tadE locus (Swiss:Q9S4A6). In various bacterial species, the tad locus is closely linked to flp-like genes, which encode proteins required for the production of pili involved in adherence to surfaces [1]. It is thought that the tad loci encode proteins that act to assemble or export an Flp pilus in various bacteria [1]. All tad loci but TadA have putative transmembrane regions [1], and in fact the region in question is this family has a high proportion of hydrophobic amino acid residues. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.76 0.72 -4.07 180 2004 2012-10-01 21:13:59 2004-08-19 09:47:00 7 12 920 0 798 1915 145 42.30 26 24.50 CHANGED GssslEaul.lhPlhl.hllh...uhlchuhhhhspptlppAsppu...AR .......GssslEFul.lhPlll.hllh....ullphuhhhhs....ppslspAuppuAR................... 0 265 502 654 +7643 PF07812 TfuA TfuA-like protein Fenech M anon Pfam-B_9826 (release 14.0) Family This family consists of a group of sequences that are similar to a region of TfuA protein (Swiss:Q52872). This protein is involved in the production of trifolitoxin (TFX), an gene-encoded, post-translationally modified peptide antibiotic [1]. The role of TfuA in TFX synthesis is unknown, and it may be involved in other cellular processes [1]. 25.00 25.00 25.30 38.70 21.70 19.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.56 0.71 -4.66 32 184 2009-01-15 18:05:59 2004-08-19 09:53:57 7 2 149 0 90 162 10 118.40 46 33.95 CHANGED DGlFtpssuVtH+EILtAlppGltVhGuuSMGALRAAELssaGMhGlGpIachY+cGtl......tsDDEVAlhasssph..tsLo.PLVNlRtTLptAhpssllsppttppLlssAcslaascRT ......DGhFhppsuVhHKElLt.AlspGVpVhGuuSMGALRAAELcsFGMhGlGtlFctYRcGhl.............psDDEVAVsHuss-.G..a.slopsLVNlRtTLptAhtsGllsspttcpllpsA+uhaaspRT.............................. 0 24 50 66 +7644 PF07813 LTXXQ LTXXQ motif family protein Bateman A, Fenech M anon Pfam-B_6101 (release 14.0) Family This protein family includes two copies of a five residue motif is found in a number of bacterial proteins bearing similarity to the protein CpxP (Swiss:P32158). This is a periplasmic protein that aids in combating extracytoplasmic protein-mediated toxicity, and may also be involved in the response to alkaline pH [1]. Another member of this family, Spy (Swiss:P77754) is also a periplasmic protein that may be involved in the response to stress [2]. The homology between CpxP and Spy may indicate that these two proteins are functionally related [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.52 0.72 -3.55 46 1977 2012-10-02 12:34:46 2004-08-19 10:47:52 7 6 1152 10 392 1261 72 99.30 31 59.45 CHANGED thttthstlhppLpLT-pQcsphpsltpshcsptpsh..........ppphtshtt................stssspclt..pphhstphct....hpthtpshpphhshLosEQ+pphcplt ...................th....tpshhacsL..sL..T-pQ+....pQh+-lh....pp....tR....cphpts...............shp-.h.c.sh+chh.....................sucsFDc..sts....cs..p..h.......p....c.....ht.ppptsp.....tl.t.hhcspsphYplLTPEQ+pphstp.h................... 0 95 201 302 +7645 PF07814 WAPL Wings apart-like protein regulation of heterochromatin Fenech M anon Pfam-B_9039 (release 14.0) Family This family contains sequences expressed in eukaryotic organisms bearing high similarity to the WAPL conserved region of D. melanogaster wings apart-like protein. This protein is involved in the regulation of heterochromatin structure [1]. hWAPL (Swiss:Q7Z5K2), the human homologue, is found to play a role in the development of cervical carcinogenesis, and is thought to have similar functions to Drosophila wapl protein [2]. Malfunction of the hWAPL pathway is thought to activate an apoptotic pathway that consequently leads to cell death [2]. 23.90 23.90 24.00 24.00 23.60 23.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.97 0.70 -5.73 13 255 2009-09-14 15:20:58 2004-08-19 10:52:24 8 8 180 0 167 258 0 313.30 27 34.01 CHANGED spsVKshpElpEhGEppEacD-lEYlLsuLc.sssshusRCLShlsLsoKChssuFRhplRA+Gh.sp+lh+sltDspcc...........................shuLssuslhalLopDthshch..DppslclhlpLLc.............................tsspcps...t.spptls+spptlhchscphc..s.....sp+hclcslosutLAhEoh.........hShosp+sGt.FK-cLR.LGuL-+llchlt-sht.........spst..ctpss.hpplhtlp+CL+lLEssolhsspNQsallpappuhhspptsphhpphptphhph...............................hhshl+lllNLTpsssp...chuosthspp.shlsssh.phhphsshlspcssa-hplLuLulLINLsEpSppsR .....................................................................................................................l+php-lhchGEppcapD-l-alls..sl...p..s..s....p....sh.sh.RphS.hlpLssKhh..spF+hph.......Rup.Gh.s....tplhcsltstt.pc...........................shulss.u.sl....hhlLspst....shcl........cps..s...l...clhlpLLc......................................pt.....spstc............pppphsK...h.p.pt...l.hphs.cpl.p......................sp.......ph.c.l.p.shohuhLshEsl.................hsho.ppsst.h.+pplh.hGhL-pl....lchlhp..t...................p.s....tt.ptthhtshh.hhtp.sL.pl...LE..s.....sT..h...hs.....p...NQtal...ls..h..p.p...s...hh.....t.s..h.hhpt..stt...h.ph..............................................................................................................................................................................................................................hhhhltlllNlTpss.....p.hu.sp.t.h........spt...t.hl..s.sh....hhh..p..s..t.h..s..p..p...ph......h-hhlLhLshhlNhs..s..................................................................................................................................................................................................................................................................................................................................... 0 48 80 126 +7646 PF07815 Abi_HHR Abl-interactor HHR Fenech M anon Pfam-B_9732 (release 14.0) Family The region featured in this family is found towards the N-terminus of a number of adaptor proteins that interact with Abl-family tyrosine kinases [1]. More specifically, it is termed the homeo-domain homologous region (HHR), as it is similar to the DNA-binding region of homeo-domain proteins [2]. Other homeo-domain proteins have been implicated in specifying positional information during embryonic development, and in the regulation of the expression of cell-type specific genes [2]. The Abl-interactor proteins are thought to coordinate the cytoplasmic and nuclear functions of the Abl-family kinases, and seem to be involved in cytoskeletal reorganisation, but their precise role remains unclear [1]. 19.60 19.60 20.00 19.70 18.40 18.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.80 0.72 -3.87 11 332 2009-01-15 18:05:59 2004-08-20 13:21:50 9 4 91 1 135 371 0 76.80 62 17.42 CHANGED IuQsV-lHKEKVARREIGsLTssKpssRspKIluPus.....E.hh+YpRpPIsaosLDslGHGl+.st.....sspttp.pGol ...................ISQTVDIHKEKVARREIGILTTNKN.TSRTH.KIIAPAN....hERPVRYIRKPIDYTlLDDlGH..GVKh.h..p.tt................h................... 2 27 40 81 +7647 PF07816 DUF1645 Protein of unknown function (DUF1645) Fenech M anon Pfam-B_8798 (release 14.0) Family These sequences are derived from a number of hypothetical plant proteins. The region in question is approximately 270 amino acids long. Some members of this family are annotated as yeast pheromone receptor proteins AR781 but no literature was found to support this. 21.60 21.60 21.60 21.60 21.00 21.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.90 0.71 -3.84 19 218 2009-01-15 18:05:59 2004-08-20 13:23:27 6 2 22 0 123 199 0 182.70 20 62.30 CHANGED ADELFpsGpIRPlp............Plhsh...............................................................Rut.hphtspss.t.s......hRcshRsLSP.cst.................s.stpssppsp.pussPsshs.................sosssuss...pSsSsuuS++WR.L+DLl..LhRSpS-G+cs..........u+cshhphs..st.....................p.hh.su.cspts..................tsossts+pcspt.soAH-hhYssp.R.A....tuE-h+RR....TaLP .....................................................................................................ADElFtsGpI+Phh.......................Phht......................................................................ptt.hp.tsts.....................h+t.h.+ph....ctt........................................................tttpssttsp.psssst..s.hs............................ssssspsp.....pS.sSstu..s...++W+..lpDhh...lh.RS.pS-G+pp..................s.p.....t.....................................................t..t.t.t.......................t..tttp....huscc..h..Y..t.t..t......ttt.t++....ohhP........................................................................................................................................... 0 10 65 99 +7648 PF07817 GLE1 GLE1-like protein Fenech M, Wood V, Finn RD anon Pfam-B_9182 (release 14.0) Family The members of this family are sequences that are similar to the human protein GLE1 (Swiss:O75458). This protein is localised at the nuclear pore complexes and functions in poly(A)+ RNA export to the cytoplasm [1,2]. 20.30 20.30 20.40 21.20 20.20 20.00 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.57 0.70 -5.45 22 279 2009-01-15 18:05:59 2004-08-20 13:27:05 8 5 230 4 208 288 4 240.30 25 41.92 CHANGED htspIpph+pslht.lp.pcsplKchhspt+RpINsphGQlosop.ppltclhpclhph..........lstsps.s.L....uhpallshlAKtlVpQAEoE.lts+s..puAhPlAtlsh.hlhppaP-ht-hLhA+hhKKCPall....uaspuh..ssE-h+pphGa+cs.sss.hEcpssY.cRhuGhhpLaAAlsphph.tsphs.............................saulppu..W+aLARhlNh.st..........sssphsllusah-sAutphhptYupQhhKlLplltpc ................................................................................................thh.th...hp..ps.phKp.hhhphp+ths......h.l..uQl.....o....s.....s....pplpchhpc.ltph............................................t....hp...shs....pst.....................shta...hh.hhA...cthlpQs-sE..lssp......puAhPluh.lss...t.....lhp......t......h......P......c....h..t.......-llhA..+hh.+pCPahl.....shpp..s........spE..ch...........pc.hlG..........a........p....hp......s..........s...........t..hEpppsah...cRMsGhhplYAAlh.phphshsptp..............................pPaslspu..WpaLApllNh.Ph...........ssspsslLhshLcssutthhptY..tt.QhhKhl.hl...t......................................................... 0 69 110 165 +7649 PF07818 HCNGP HCNGP-like protein Fenech M anon Pfam-B_9462 (release 14.0) Family This family comprises sequences bearing significant similarity to the mouse transcriptional regulator protein HCNGP (Swiss:Q02614). This protein is localised to the nucleus and is thought to be involved in the regulation of beta-2-microglobulin genes. 21.30 21.30 21.40 21.50 20.50 19.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.08 0.72 -3.95 22 256 2009-01-15 18:05:59 2004-08-20 13:28:23 8 6 213 0 187 254 3 94.20 34 31.46 CHANGED IPPpPs.ucssstLppKlp+hhclKc..puhchNppltssppa+NPulhcKlhcahsID-...huTsaPt-lasPps.asp.sYh...-tLscsQ+ch..tchpp..K ...............................................lPPpPs.Gp.ss......s......p...LppKlp+.hhphKp.......pG.hchNpplppp+paRNPulhcKLlpassI.DE..............hGTsaPt-.laDPp.......s...asppuYh...-tLtcsQ+tt.hc+h-pt.p............................ 0 64 102 147 +7650 PF07819 PGAP1 PGAP1-like protein Fenech M anon Pfam-B_9244 (release 14.0) Family The sequences found in this family are similar to PGAP1 (Swiss:Q765A7). This is an endoplasmic reticulum membrane protein with a catalytic serine containing motif that is conserved in a number of lipases. PGAP1 functions as a GPI inositol-deacylase; this deacylation is important for the efficient transport of GPI-anchored proteins from the endoplasmic reticulum to the Golgi body [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.42 0.70 -4.76 20 1253 2012-10-03 11:45:05 2004-08-20 13:29:28 8 39 770 0 638 5035 1134 153.30 17 24.56 CHANGED clsGlPVLFIPG...NAGSa+..........QlRSlAuss..............pcs..ptsshp..............................................................hDaFolDFsE-hoAhaGpolh.-Qs-YlscAI+hILshY............tss.tsPpSVlllGHSMGGlVARshlshssahs....sslsoIlTLu.oPHstsPlshDsslhchYpplsphWpp.................tht.t.t...........LpslsllS.....lsGGhpD....hhlsu-aoslcs....hls.osuhpshoouIspVW.......hshDHhAllWCpQLhhtlu+sLhph ..................................................................................................................................................................t......................................................................................................................................................................................................................................................................................................................................................................................................h...t.......h......h.........p...t.....l...h.p.hh............................................................ts.pp....l..h...lluHSM.G.G..l..l.............u...+....t.h....l......h........h.......s.......t.t.......................p.....l.....p.....t....l...l...s.......lu..........o..P.....H.t...u....s......s......h..............................................................................................................................................................................................................................................................................................................................h................................................................................................................................................ 0 197 363 534 +7651 PF07820 TraC TraC-like protein Fenech M anon Pfam-B_9690 (release 14.0) Family The members of this family are sequences that are similar to TraC (Swiss:Q84HT8). The gene encoding this protein is one of a group of genes found on plasmid p42a of Rhizobium etli CFN42 that are thought to be involved in the process of plasmid self-transmission. Mobilisation of plasmid p42a is of importance as it is required for transfer of plasmid p42a, which is also known as plasmid pSym as it carries most of the genes required for nodulation and nitrogen fixation by the symbiotic bacterium. The predicted protein products of p42a are similar to known transfer proteins of Agrobacterium tumefaciens plasmid pTiC58 [1]. 20.80 20.80 20.90 23.30 20.40 19.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.90 0.72 -3.79 15 82 2009-01-15 18:05:59 2004-08-20 13:31:27 7 1 62 0 22 74 1 82.60 40 94.92 CHANGED KKPouKIR-EIAKLQEQLKpAETREAERIGRlALKAGLGEIEI-EuELQuAFEElApRFRuGctsssG.........uuusuopssussuoGAuAGusuEA ...............Kps.tclcsEIt+Lp-pL+ph-s+pAERIGRlAlKuGLu-lEIs-sclpttFE-lAtRFRpGtttt.t.................................t............................... 0 1 8 14 +7652 PF07821 Alpha-amyl_C2 Alpha-amylase C-terminal beta-sheet domain Fenech M anon Pfam-B_1278 (release 14.0) Domain This domain is organised as a five-stranded anti-parallel beta-sheet [1,2]. It is the probable result of a decay of the common-fold. 20.10 20.10 20.40 22.40 19.60 17.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.81 0.72 -4.10 33 290 2009-01-15 18:05:59 2004-08-23 10:31:22 7 8 64 15 116 300 10 61.20 48 13.67 CHANGED pRsuIpsp.SslcIltA-uDLYsAhIDs..................KlhhKIGsc.....shhP....s...saplsssGp-YAVWEK ........pRsGIpup.Ssl+IL..tA-...uDhYlApI.Ds..................KVlsKIGs+hD.hssllP....s.......sa.p.hus.pGpDYAVWEK............. 0 30 72 97 +7653 PF07822 Toxin_13 Neurotoxin B-IV-like protein Fenech M anon Pfam-B_66513 (release 14.0) Domain The members of this family resemble neurotoxin B-IV (Swiss:P01525), which is a crustacean-selective neurotoxin produced by the marine worm Cerebratulus lacteus. This highly cationic peptide is approximately 55 residues and is arranged to form two antiparallel helices connected by a well-defined loop in a hairpin structure. The branches of the hairpin are linked by four disulphide bonds. Three residues identified as being important for activity, namely Arg-17, -25 and -34, are found on the same face of the molecule, while another residue important for activity, Trp30, is on the opposite side. The protein's mode of action is not entirely understood, but it may act on voltage-gated sodium channels, possibly by binding to an as yet uncharacterised site on these proteins. Its site of interaction may also be less specific, for example it may interact with negatively charged membrane lipids [1]. 20.90 20.90 26.70 129.30 18.10 16.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.17 0.72 -4.05 2 2 2009-01-15 18:05:59 2004-08-23 10:33:18 6 1 1 1 0 4 0 55.00 75 100.00 CHANGED ASuTWGuuYPACENNCRKpYD.CI+CQGKWAGKRGKCAAHChlQpssCpsKCKKc ASuTWGuuYPACENNCRKpYD.CI+CQGKWAGKRGKCAAHChlQpssCpsKCKKc 0 0 0 0 +7654 PF07823 CPDase Cyclic phosphodiesterase-like protein Fenech M anon Pfam-B_73368 (release 14.0) Domain Cyclic phosphodiesterase (CPDase, Swiss:O04147) is involved in the tRNA splicing pathway. This protein exhibits a bilobal arrangement of two alpha-beta modules. Two antiparallel helices are found on the outer side of each lobe and frame an antiparallel beta-sheet that is wrapped around an accessible cleft. Moreover, the beta-strands of each lobe interact with the other lobe. The central water-filled cavity houses the enzyme's active site [1]. 20.40 20.40 21.00 20.40 20.10 20.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.50 0.71 -4.82 15 138 2012-10-03 21:31:48 2004-08-23 10:36:00 6 4 118 6 92 200 152 191.60 23 91.61 CHANGED M.......................hu..........lWhhPst..s.h.phppLl.puLpslFs......sp..........PsFEPHlTlsuslslc......sps-lpclLpu.ussulcul..t..................hlplssVssGcpYFc+lalplptsstLhulAplh+phFs...st.t................................................t.spaspptatPHlSLlYuDlp.h-p..sphptltpclccsh...............................suhuWs...hspltLVpC-Gs...Vc-WpllushsL ......................................................................hulWhhPst..shh.....pplptlh..tslpshas.....s.................PhFpPHlTlsu..s.lphp.......pts..ss..p..ph..Lpu..sssu.hpsh............................hlphsslss...u.c.p.a..a..pplalt.lp.s..t.lh...shsphh+phh................................................................tthspptahPHlSLl.Yu..Dl...cp....t.htth..tp.p..lpst......................................t.hsap....sphtlh...s...ps......lppWphltth.h............................................................................... 0 35 63 83 +7655 PF07824 Chaperone_III Type III secretion chaperone domain Fenech M anon Pfam-B_32938 (release 14.0) Domain Type III secretion chaperones are involved in delivering virulence effector proteins from bacterial pathogens directly into eukaryotic cells. The chaperones may prevent aggregation and degradation of their substrates, may target the effector to the secretion apparatus, and may ensure a secretion-component unfolded confirmation of their specific substrate. One member of this family, SigE (Swiss:O30917) forms homodimers in crystal. The monomers have a novel fold with an alpha-beta(3)-alpha-beta(2)-alpha topology [1]. 25.00 25.00 25.40 38.40 20.40 20.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.45 0.71 -4.16 4 132 2012-10-01 22:01:34 2004-08-23 10:39:55 7 2 125 2 3 37 0 106.40 88 98.71 CHANGED csll.pLYcALGL-.hshD-PAllIDDDlpIYFsEut-uLEMsCPhhsLP-slppLQphLpLNYASsVsLAsDA-sosLlALhRLPtpSstEEhhsGhphaIopV+pL+pchA .......ESLLNRLYDALGLD.APEDEPLLIIDDGIQVYFNESDHTLEMCCPFMPLPDDILTLQHFLRLNYTSAVTI....GA...DADNTALVALYRLPQTSTEEEALTGFELFISNVKQLKEHYA.................. 0 0 1 2 +7656 PF07825 Exc Excisionase-like protein Fenech M anon Pfam-B_46296 (release 14.0) Domain The phage-encoded excisionase protein (Xis, Swiss:P03699) is involved in excisive recombination by regulating the assembly of the excisive intasome and by inhibiting viral integration. It adopts an unusual 'winged'-helix structure in which two alpha helices are packed against two extended strands. Also present in the structure is a two-stranded anti-parallel beta-sheet, whose strands are connected by a four-residue 'wing'. During interaction with DNA, helix alpha2 is thought to insert into the major groove, while the wing contacts the adjacent minor groove or phosphodiester backbone. The C-terminal region of Xis is involved in interaction with phage-encoded integrase (Int), and a putative C-terminal alpha helix may fold upon interaction with Int and/or DNA [1]. 21.40 21.40 21.40 21.60 21.30 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.62 0.72 -4.21 2 289 2012-10-04 14:01:12 2004-08-23 10:57:07 6 2 219 9 20 95 3 71.50 39 84.80 CHANGED hhlTLpEWstcp.RpP.S.pTlRRWsREshIhPsPVKcGRpYhhctsAshh-.p.....PVsusLlpRItsu+hutp .......hl..lTLpEW.ss..ccF.u.t.P..o.sTLpKYu+tGhIhP.P.KlGRcWhlDcpAhFV.G...s.s.......................................................pssh......................... 0 2 10 13 +7657 PF07826 IMP_cyclohyd IMP cyclohydrolase-like protein Fenech M anon Pfam-B_50235 (release 14.0) Domain This enzyme (Swiss:O27099) is may catalyse the cyclization of 5-formylamidoimidazole-4-carboxamide ribonucleotide to inosine monophosphate (IMP), a reaction which is important in de novo purine biosynthesis in archaeal species. This single domain protein is arranged to form an overall fold that consists of a four-layered alpha-beta-beta-alpha core structure. The two antiparallel beta-sheets pack against each other and are covered by alpha-helices on one face of the molecule. The protein is structurally similar to members of the N-terminal nucleophile (NTN) hydrolase superfamily. A deep pocket was in fact found on the surface of IMP cyclohydrolase in a position equivalent to that of active sites of NTN-hydrolases, but an N-terminal nucleophile could not be found. Therefore, it is thought that this enzyme is structurally but not functionally similar to members of the NTN-hydrolase family [1]. 25.00 25.00 27.50 37.40 18.10 18.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.33 0.71 -4.97 6 187 2009-01-15 18:05:59 2004-08-23 11:01:02 6 2 187 13 63 183 27 212.30 40 92.54 CHANGED MYlGRFLllG+sppG.shssYRVSSRSFPNRpshphs-ssssllPcDspE..hhcNPYIoYNClRlVs-ssVVoNGSHTDsIA-KlchGhs.RDALs.uLhsMDYEKD-YN.TPRIAullst-c.ualGhVss-cl..........hh+hsElcsGcuahLosYpt...ststhhshcucos.....E-sschsh...phtsFEHsVsuAsshhcs.........-GaclA.spsh ......................YPGRGIllG+o.t-G.pslsAYaI.......MGRS....sSRN.R.l..h.l..c-.......s..p...s.h.s..p..sh..Dsuc..lpDss.LIIYsPVRll.......G....s..psIVTNGDQTDTIhE....GhstptoFppuLpoR-aEPDuPNaTPRISGll...........c....h-s......s.h...hSIl.Ks.ssss.t..............s.RhtatapsshsGEGaaIpTYpp......Dus.PL....PSF-GEPt.h.......s-hsphs.....ps..Ls.-N+VSLh.s+hIDh.........tth.......thst...................................... 0 22 44 57 +7658 PF07827 KNTase_C KNTase C-terminal domain Fenech M anon Pfam-B_29524 (release 14.0) Domain Kanamycin nucleotidyltransferase (KNTase) is involved in conferring resistance to aminoglycoside antibiotics and catalyses the transfer of a nucleoside monophosphate group from a nucleotide to kanamycin. This enzyme is dimeric with each subunit being composed of two domains. The C-terminal domain contains five alpha helices, four of which are organised into an up-and-down alpha helical bundle. Residues found in this domain may contribute to this enzyme's active site [1]. 26.40 26.40 26.50 27.00 22.90 26.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.80 0.71 -4.44 2 78 2012-10-01 22:14:54 2004-08-23 11:06:00 6 2 70 2 9 39 0 137.40 69 57.06 CHANGED DstGahp+lh.sAcSsptpsF+pAIptllVtEhaEYsGKhRNlphpGPoTaLPSLslphAhhGAMLIGLHpphhaoTuA.VLsEAlK.schPpGaDHlsphsMSGpLupstKllpuhEsFWpGl.tWstcHsYllc.SKRIPF ................DSGGYLEKVYQTAKSVEAQTFHDAICALIVEELFEYAGKWRNIRVQGPTTFLP.SLTVQVAMAGAMLIGLHHRICYTTSASVLTEAVKQSDLPSGYD.HLCQFVMSGQLSDSEKLLESLENFWNGIQEWTERHGYIVDVSKRIPF.......... 0 3 7 8 +7659 PF07828 PA-IL PA-IL-like protein Fenech M anon Pfam-B_99281 (release 14.0) Family The members of this family are similar to the galactophilic lectin-1 expressed by P. aeruginosa ((PA-IL, Swiss:Q05097). Lectins recognising specific carbohydrates found on the surface of host cells are known to be involved in the initiation of infections by this organism. The protein is thought to be organised into an extensive network of beta-sheets, as is the case with many other lectins [1]. 20.80 20.80 21.60 46.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.63 0.71 -4.32 2 27 2012-10-03 19:46:52 2004-08-23 11:09:01 7 2 21 15 9 19 1 113.30 56 91.42 CHANGED sWpGpV.ANsEsGpsTulIhp.GDsIolVAtGWspYG.sph.hstsDt.hPsp...spsu.husLVhKIuNpthh.sNssLa+hVs...VpGtlhLlaNDVPGTaGsNSGpFpVplhh-pp .sWpGpV.ANsEsGQsTulIhpsGDVIoIVAuGWspYG..sppahstsptchP.....cphhhspsshssuLlhKIGNpuhh.sNsGLa+hVs...VpGtloLlaND..VPGoYGNNSGuFSVNlth-pp.... 0 1 2 7 +7660 PF07829 Toxin_14 Alpha-A conotoxin PIVA-like protein Fenech M anon Pfam-B_46690 (release 14.0) Domain Alpha-A conotoxin PIVA (Swiss:P55963) is the major paralytic toxin found in the venom produced by the piscivorous snail Conus purpurascens. This peptide acts by blocking the acetylcholine binding site of the nicotinic acetylcholine receptor at the neuromuscular junction [1]. The overall shape of the peptide is described as an "iron" with a highly charged hydrophilic loop of 15S-19R forming the "handle" domain that is exposed to the exterior of the protein. The stability of the conotoxin is primarily governed by three disulphide bonds. A triangular structural motif formed by residues 19R, 12H and 6Y is thought to constitute a "binding core" that is important in binding to the acetylcholine receptor [2]. 25.00 25.00 30.70 57.60 22.10 21.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.29 0.72 -4.21 2 4 2009-01-15 18:05:59 2004-08-23 11:14:11 6 1 2 2 0 6 0 25.50 81 66.67 CHANGED GCCGpYPNAACHPCuCp.sRPsYCsp GCCGsYPNAACHPCuCK.sRPsYCsp 0 0 0 0 +7661 PF07830 PP2C_C Protein serine/threonine phosphatase 2C, C-terminal domain Fenech M anon Pfam-B_5253 (release 14.0) Domain Protein phosphatase 2C (PP2C) is involved in regulating cellular responses to stress in various eukaryotes. It consists of two domains: an N-terminal catalytic domain and a C-terminal domain characteristic of mammalian PP2Cs. This domain consists of three antiparallel alpha helices, one of which packs against two corresponding alpha-helices of the N-terminal domain. The C-terminal domain does not seem to play a role in catalysis, but it may provide protein substrate specificity due to the cleft that is created between it and the catalytic domain [1]. 22.70 22.70 22.80 23.90 20.80 22.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.72 0.72 -3.87 15 272 2009-01-15 18:05:59 2004-08-23 11:23:01 8 3 85 8 121 231 0 75.90 49 20.16 CHANGED SllLlCFPGAP+VSEEAl++EtcL-chLEs+VEEllcc.sucpphPDLhpVh+sLuuEs.IPsLPPGGGLsSK+slIpsVYp+ .....................SllLlCFPsAPKVStEAV++EsELDKhLEsRVc.El....hpc...tttcshP...DL..spVh+h.Lu..uEs.....IPsLPPGGGLsu.....K..+.slIEtsYpc............ 0 26 37 69 +7662 PF07831 PYNP_C Pyrimidine nucleoside phosphorylase C-terminal domain Fenech M anon Pfam-B_1661 (release 14.0) Domain This domain is found at the C-terminal end of the large alpha/beta domain making up various pyrimidine nucleoside phosphorylases [1,2]. It has slightly different conformations in different members of this family. For example, in pyrimidine nucleoside phosphorylase (PYNP, Swiss:P77826) there is an added three-stranded anti-parallel beta sheet as compared to other members of the family, such as E. coli thymidine phosphorylase (TP, Swiss:P07650) [1]. The domain contains an alpha/ beta hammerhead fold and residues in this domain seem to be important in formation of the homodimer [1]. 20.50 20.50 20.50 21.40 20.40 20.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.21 0.72 -4.52 54 2815 2012-10-02 20:27:15 2004-08-23 11:28:27 8 10 2677 20 573 1892 417 73.80 36 16.87 CHANGED alsplsspplGhsuhtLGAGRtpppD.IDhusGlhlp+KlG-pVcpG-slhslauscct.hcpshstlppshtIu .......................................hloplsupslGhAuhtLGAGRtptpD.sIDhuVGlhhpt+lGDpVc..pG.c.sLssl.ausccs....hp.pshptlppulpl................... 0 197 372 477 +7663 PF07832 Bse634I Cfr10I/Bse634I restriction endonuclease Fenech M anon Pfam-B_46671 (release 14.0) Domain Cfr10I (Swiss:P56200) and Bse634I (Swiss:Q8RT53) are two Type II restriction endonucleases. They exhibit a conserved tetrameric architecture that is of functional importance, wherein two dimers are arranged 'back-to-back' with their putative DNA-binding clefts facing opposite directions. These clefts are formed between two monomers that interact, mainly via hydrophobic interactions supported by a few hydrogen bonds, to form a U-shaped dimer. Each monomer is folded to form a compact alpha-beta structure, whose core is made up of a five-stranded mixed beta-sheet.The monomer may be split into separate N-terminal and C-terminal subdomains at a hinge located in helix alpha3 [1]. 20.70 20.70 22.80 21.60 20.40 20.20 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.54 0.70 -5.28 4 13 2012-10-11 20:44:44 2004-08-23 11:38:48 6 2 11 37 2 16 0 267.20 23 86.22 CHANGED hssIpthsEs..GKhplN.+..assluphl-Ntlsps.phpc.LDthRs.ssctActtG...ppsusuuhspssGsW.ElMlu.......ph.chhLp.s.pp.l...VlpMPNsp..........SFDahslacsEhpEhI.phcupL..pKssltL.TSsPDlulI......E-.Ks..c-hhpp.Ist.T+ss.sh..sLYpphps+sphcclpuululKTShRPDRRhQ.laEuslhKuL.salphphW........KYahssop.lusADssuhpT...sAsHulsps+Shsp+AVD-lahhsohtDlsphlsphlpc ............................................th..hp.t....sh.p.h...hs.t.ph.c....hpshhtt.scttt...th.ssuAhsNssGsW.Ehhhu.........ltshphhhp..ssp..p...l...llphPN.p..........oaDhhpLacsphpptlpsL....cppL.......ptsplpLhoSsPDllIl........pthps..pshhh....psIsphocs.shsh.....slYpphps+sch.cslhuhlulKTShRPDRRhQ.laEusllKuL..salppptW.................hKYYutuop.lusADspuhpT...sAsHolspspshPp+AVDclaphsohtDlsphlpphl.............. 1 1 1 1 +7664 PF07833 Cu_amine_oxidN1 Copper amine oxidase N-terminal domain Fenech M anon Pfam-B_46519 (release 14.0) Domain Copper amine oxidases catalyse the oxidative deamination of primary amines to the corresponding aldehydes, while reducing molecular oxygen to hydrogen peroxide. These enzymes are dimers of identical subunits, each comprising four domains. The N-terminal domain, which is absent in some amine oxidases, consists of a five-stranded antiparallel beta sheet twisted around an alpha helix. The D1 domains from the two subunits comprise the 'stalk' of the mushroom-shaped dimer, and interact with each other but do not pack tightly against each other [1,2]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.42 0.72 -3.65 134 2772 2009-09-11 19:31:59 2004-08-23 13:46:59 6 269 442 32 1082 2618 19 86.20 21 23.12 CHANGED tssRshVPlRhlu..-uL..G..s...pVpW.sspspslsl..pp......sspp.....lplp.l................G.....sptshl........N.......G.......pphthc..sss.h....lt...t....uR..ThVPlRaluEshG.hp...........VpW.-sp.....spslhl ...........................................................................................hhlPhc.h.........p.t.h.....s....h.......pl.ta....ptt...t...p.t....hhh...p.............................sst.t..........lp..hp.h.....................................................................s.......s.p.phh.l............................N..............G............p..p..h..phs......ssshl..........hs......uc..shVPl+.hl.u-.shG...hp...........lpa.stt........................................................................................................................... 0 649 941 984 +7665 PF07834 RanGAP1_C RanGAP1 C-terminal domain Fenech M anon Pfam-B_23411 (release 14.0) Domain Ran-GTPase activating protein 1 (RanGAP1, Swiss:P46061) is a GTPase activator for the nuclear Ras-related regulatory protein Ran, converting it to the putatively inactive GDP-bound state. Its C-terminal domain is required for RanGAP1 localisation at the vertebrate nuclear pore complex, and is sumoylated by the small ubiquitin-related modifier protein (SUMO-1, Swiss:Q93068). This domain is composed almost entirely of helical substructures that are organised into an alpha-alpha superhelix fold, with the exception of the peptide containing the lysine residue required for SUMO-1 conjugation [1]. 25.00 25.00 27.10 26.40 24.20 24.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.89 0.71 -4.85 3 102 2009-01-15 18:05:59 2004-08-25 09:31:02 6 11 54 14 43 98 0 173.50 54 32.44 CHANGED GsDuEssu+stpcPsluEsAPs.sPPhPuDlSTFLuFPSPEKLlRLGPKRSsLIAQQVDVoDsEKVVpAFLKVSSVYKDEuEVKtAVpETlDALM+KAFsNSuF.QSNoFITSLLVpMGLLKSED.KVKsIusLsGPLLTLNHMVQQ-YFPKuLAulLLAFVSKPNuVLESCuSARHoLLpTLaK ...........................tcstssppch.csss.uEss.Ps....hss...ss.PsDlSTFLuFPSPEKLLRL.GPK.sSh.L......I......sQQT..D.T.....SD..s.EK.......V....Vs........AFL........KVS...SVa..+.............D.-..u..s..V....+..oAVt-ulDALMKKAFSouoF..NSssFl..T.pLLlHMGLLKSED...Kl.K.A.I.s.sLaGPLM.sLNHh..VQQDYFPKuLAPlLlAFlTKPN....tALEoCSh.ARHsLLQTLap................... 0 10 13 26 +7666 PF07835 COX4_pro_2 Bacterial aa3 type cytochrome c oxidase subunit IV Fenech M anon Pfam-B_86185 (release 14.0) Domain Bacterial cytochrome c oxidase is found bound to the to the cell membrane, where it is involved in the generation of the transmembrane proton electrochemical gradient. It is composed of four subunits. Subunit IV consists of one transmembrane helix that does not interact directly with the other subunits, but maintains its position by indirect contacts via phospholipid molecules found in the structure. The function of subunit IV is as yet unknown [1]. 20.30 20.30 20.40 20.60 20.10 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -7.95 0.72 -3.98 36 225 2009-01-15 18:05:59 2004-08-25 15:22:17 7 1 213 5 76 177 38 44.90 37 68.34 CHANGED Hcc........utMDhstpE+TacuFl+hsphuslsllsl...LlhhAlhhs ..................Hpp......huusMDYspHE+TYsGFlthsKauolsllsl...llhMAhhhh......... 1 16 40 56 +7667 PF07836 DmpG_comm DmpG-like communication domain Fenech M anon Pfam-B_1675 (release 14.0) Domain This domain is found towards the C-terminal region of various aldolase enzymes. It consists of five alpha-helices, four of which form an antiparallel helical bundle that plugs the C-terminus of the N-terminal TIM barrel domain [1]. The communication domain is thought to play an important role in the heterodimerisation of the enzyme [1]. 21.00 21.00 21.10 24.10 19.50 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.97 0.72 -4.57 37 789 2009-01-15 18:05:59 2004-08-25 15:24:35 6 8 628 4 196 566 103 65.60 60 19.32 CHANGED pssplDRpoLslGYAGVYSSFLhHAcRAAp+aGVDsR-ILlELGRR+hVGGQEDhIlDlAh-Lupp ............PlRVDR-oLsLGYAGVYSSFL+HsEpAAt+Y.GlsAhDILVELG+R+hVGGQEDMIlDlAL-Ltp.t.................. 0 45 122 167 +7668 PF07837 FTCD_N Formiminotransferase domain, N-terminal subdomain Fenech M anon Pfam-B_4434 (release 14.0) Domain The formiminotransferase (FT) domain of formiminotransferase- cyclodeaminase (FTCD) forms a homodimer, and each protomer comprises two subdomains. The N-terminal subdomain is made up of a six-stranded mixed beta-pleated sheet and five alpha helices, which are arranged on the external surface of the beta sheet. This, in turn, faces the beta-sheet of the C-terminal subdomain to form a double beta-sheet layer. The two subdomains are separated by a short linker sequence, which is not thought to be any more flexible than the remainder of the molecule. The substrate is predicted to form a number of contacts with residues found in both the N-terminal and C-terminal subdomains [1]. 19.20 19.20 19.80 23.20 18.10 16.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.86 0.71 -4.76 37 493 2009-01-15 18:05:59 2004-08-25 15:26:05 7 7 380 6 191 419 213 172.50 42 49.65 CHANGED KlVECVPNFSEGRcp-hl-tIhsshp.shpGVpLLDhssDssHNRoVlThVG-P...........-slhcAshpus+hAs-LIDhppHcGp.HPRhGAsDVlPFlPlpssTMEEClplA+plGccluccLtlPVYLYpcuAspPcRcsLssIR+G..paEuht-Kl.KpscW...........cPDaGPsphp...PouGsTslGAR. .......................pllEClPNFSEGR..spphI-...tIsps.....h......+......s.....h.....s.........G....V..pLL.DhssDtsaNRoVhT.l.V.Gc.P.......................cs.l..t-AshphschAschI....Dhsp.HpG.p........HPRhGAsDVlPFlPl.p..s.s..T..........h-EClplA+.p.....l.ucclu.c.c...l.s.lPVaLYEpuA..s.p.PcRcsLsslR+G..paEuh.tE..K..l....p.ps...c.W...................tP.Da..Gs.t...php...PosGsTslGAR......................................... 0 81 121 155 +7670 PF07839 CaM_binding Plant calmodulin-binding domain Fenech M anon Pfam-B_9279 (release 14.0) Domain The sequences featured in this family are found repeated in a number of plant calmodulin-binding proteins (such as Swiss:Q8W235, Swiss:Q84ZT8 and Swiss:Q8H6X1), and are thought to constitute the calmodulin-binding domains [1,2]. Binding of the proteins to calmodulin depends on the presence of calcium ions [1,2]. These proteins are thought to be involved in various processes, such as plant defence responses [1] and stolonisation or tuberization [2]. 21.00 21.00 22.40 21.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.55 0.71 -3.87 19 140 2009-01-15 18:05:59 2004-08-25 15:30:43 6 6 22 0 90 126 0 105.70 30 21.15 CHANGED pppptp.ppsphhcspspppps+phpph+csll.hp+hs....pshccsh+hsshtsphls.ps-.s-sE+V.sLRH.Qcsp-..+Kcu-chMlDaAlccslSKLsssRKpKVchLVpAFETVls ....................................................................................h................t.tt.p...t....pp..hp...hcthh....tp......pt.cphtphp....t.....ph..hs.psp.....csEcV..LRH.Qpsp-..+Kpuc.hhhspslccssoKLs..sRK.pKVchLVtAFETVls... 0 12 52 72 +7671 PF07840 FadR_C FadR C-terminal domain Fenech M anon Pfam-B_11411 (release 14.0) Family This family contains sequences that are similar to the fatty acid metabolism regulator protein (FadR, Swiss:P09371). This functions as a dimer, with each monomer being composed of an N-terminal DNA-binding domain and a regulatory C-terminal domain. A linker comprising two short alpha helices joins the two domains. In the C-terminal domain, an antiparallel array of six alpha helices forms a barrel-like structure, while a seventh alpha helix forms a 'lid' at the end closest to the N-terminal domain. This structure was found to be similar to that of the C-terminal domain of the Tet repressor. Long-chain acyl-CoA thioesters interact directly and reversibly with the C-terminal domain, and this interaction affects the structure and therefore the DNA binding properties of the N-terminal domain [1]. 23.00 23.00 23.20 23.30 22.90 22.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.88 0.71 -4.62 29 828 2012-10-01 22:11:53 2004-08-25 15:31:18 7 3 818 8 104 339 14 167.80 62 68.98 CHANGED NNFWETSGLNILETLs+L.DtsthPpLl-sLLSARTNlSsIalRtAl+tNP-cs..............................hElLsphcpl--s..............AcAaspaDYpLa+pLAFsSGNPlYsLILNGhKGLYoRVGpaYFusscuRpLAhsFYcpLhplscpppa-plhthlRpYGhpSGtIWpph+ssl..Ppshs ...................................................NNFWETSGLNILETLARL.D.HESVPQLIDNLLSVRTNISo....I.FI.R.TA.hRpHP...-K.A...................................................................pEVLAsA..pE....V....sD+........................................A-AFA-LDYsIFRGLAFASGN.PIYGLILNGh..K.G.LYT.RIG.RaYFuN.PEARpLALsF....Y++LuuLCppG.s+DQVhEsVR+YG+-SG-IWH+MQcsLPuDL.A.......................................................... 0 15 38 73 +7672 PF07841 DM4_12 DM4/DM12 family Fenech M anon Pfam-B_5243 (release 14.0) Family This family contains sequences derived from hypothetical proteins expressed by two insect species, D. melanogaster and A. gambiae. The region in question is approximately 115 amino acid residues long and contains four highly- conserved cysteine residues. 21.70 21.70 21.80 21.90 21.60 21.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -10.08 0.72 -3.65 45 504 2009-01-15 18:05:59 2004-08-25 15:54:11 8 5 28 0 367 546 0 82.60 26 33.78 CHANGED RpplYchlEphl.sphGh...sG..+sClLRsICEsuph.h...ctsGllu-ll+llFoPspscst.........pYhpActhGptt...s-Cppha..t.pC ..........................Rh.lYphlEphh.p...p.h..Gh.......sG+sClLRsICEs.u.phsh..........pp........pu.l.....l.u-.......ll..+llhosspsp.t................................tpYhpApphGptt..............tsCtphat.pC...................................................... 0 91 117 287 +7673 PF07842 GCFC GC-rich sequence DNA-binding factor-like protein Fenech M, Mistry J, Wood V anon Pfam-B_9357 (release 14.0) & Pfam-B_9894 (release 19.0) Family Sequences found in this family are similar to a region of a human GC-rich sequence DNA-binding factor homolog (Swiss:Q9Y5B6). This is thought to be a protein involved in transcriptional regulation due to partial homologies to a transcription repressor and histone-interacting protein [1]. This family also contains tuftelin interacting protein 11 which has been identified as both a nuclear and cytoplasmic protein, and has been implicated in the secretory pathway. Sip1, a septin interacting protein [2] is also a member of this family. 20.20 20.20 20.60 20.60 20.00 19.90 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.90 0.70 -5.04 36 631 2009-01-15 18:05:59 2004-08-25 15:56:45 7 15 282 0 392 539 6 222.60 22 31.13 CHANGED LppltptFcphppca.pcYc...phtLuplAsuhltPLl+pthhsWsPLcsss...hlt.htph+.lL................................t..ssYcshlWpshlsthp.sshspWpst.sssshlsllptW.slLP.hhhpsll-plllP+Lp..ptVpp.WcPhocsh.....sh.p....sWlaPWLPhLs.p+hcs....................lhssI+pKlppsLpsWp.p.tush.hLp.W.pclass...........tpasshlhppllP+LtttLp...ph.lsP.pp..Qs..lchhptlhpWpsllssphhspL..l.ppFFsc...WhpsL .................................................................................................................................................................pthhphFpphpppa.ppYp...ph.lu.hhst.hltPll+.plh....s.Ws.P...........L...............ppss.....................................................................psa.c.ph..hW..hhh.hh...t.t.....t.php.c.....ps..............ss..h...plls....................................s.ll-.p........lllP+Lp..thlcp..Wc............Pho.ss...............sh.p...........hlhshh..sh.h..t.....t.p.hcs....................lhpslht+h.ppsl........p.p.......................................slahs............................a.t....h..h.p.phhsthth.hhp...........tp.....ps.......h.chht.lhtW...p..shls...ph.............hhtl..h.t.phhs+...a...................................................................... 2 116 174 305 +7674 PF07843 DUF1634 Protein of unknown function (DUF1634) Fenech M anon Pfam-B_9594 (release 14.0) Family This family contains many hypothetical bacterial and archaeal proteins. A few members of this family are annotated as being putative transmembrane proteins, and the region in question in fact contains many hydrophobic residues. 21.00 21.00 21.10 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.20 0.72 -4.40 45 351 2009-01-15 18:05:59 2004-08-25 16:01:49 6 4 335 0 99 238 4 98.50 34 79.49 CHANGED IutlLRhGVlluusllhlGhllhhlpssu...............h...ts..hs.stlhpuhhthcuhslI..hhGLhlLIhTPlhRVllulhsFhpE+DhlYssIoh.....lVLhhLhhul...hl ..............................................................IuplL+hGVhluuhlIhhGllhhhlpsts...................t...ht..tlh.puhht......hpshsll..hhGlhlLIlTPVLRVslulhsF.hpE+DhhYVh.ITslVLhIlhhuhl........... 0 39 67 87 +7676 PF07845 DUF1636 Protein of unknown function (DUF1636) Fenech M anon Pfam-B_9608 (release 14.0) Family The sequences featured in this family are derived from a number of hypothetical prokaryotic proteins. The region in question is approximately 130 amino acids long. 28.10 28.10 28.10 28.50 28.00 27.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.56 0.71 -3.85 52 189 2009-01-15 18:05:59 2004-08-25 16:05:15 6 1 150 0 66 191 92 110.70 33 83.77 CHANGED lhVCsoC+tt.......tss.psGttLhstLpst..t.s..tt...lplpsVpCLsuCs+..uCsVAlp.uss+hoYlaGcl....sst...sssssllshAthYtsoscGhlPa+cRPptl+cphlARIPP ........................lhVCsoC+ts..........tss.ps...GttLhstLpst..........s......ts...lplcsVpCLuuCs+..sColAlp.usu+hoYlaGcl....ss......ssupsllshAphYtsos-GhlPapcRPpsl+cthlARlPP............... 0 12 42 48 +7677 PF07846 Metallothio_Cad Metallothio_7; Metallothionein family Fenech M anon Pfam-B_9622 (release 14.0) Family The sequences making up Metallothio_Cad are found repeated in metallothionein proteins expressed by several different Tetrahymena species. Metallothioneins are low molecular mass, cysteine-rich metal-binding proteins that are thought to be involved in the regulation of levels of trace metals, and detoxification of these metals when present in excess [1]. Some of the metallothioneins found in this family (for example, Swiss:Q8T6B3) are known to be induced by cadmium and are thought to be involved in the cellular sequestration of toxic metal ions. The high proportion of cysteine residues allows the metal ions to be bound by the formation of clusters of metal-thiolate complexes [1]. Tetrahymena spp. metallothioneins differ from other eukaryotic metallothioneins mainly in the length of their sequences and in the cysteine-containing motifs they exhibit. 19.50 19.50 24.70 23.70 16.80 15.80 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.37 0.72 -4.02 4 42 2012-10-04 14:35:37 2004-08-25 16:06:25 6 4 9 0 0 48 0 20.80 83 35.79 CHANGED CCCGsNAKPCCTDPNSGCCCV CCCG-pAKsCCTDPNSGCCCs. 0 0 0 0 +7678 PF07847 DUF1637 Protein of unknown function (DUF1637) Fenech M anon Pfam-B_6051 (release 14.0) Family This family contains many eukaryotic hypothetical proteins. The region featured in this family is approximately 120 residues long. According to InterPro annotation, some members of this family may belong to the cupin superfamily. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.52 0.71 -4.95 32 328 2012-10-10 13:59:34 2004-08-25 16:08:44 7 5 139 0 188 457 26 187.40 32 75.01 CHANGED llcplpspDlslpsphth......................................stp...hssloYhcl.aEs-.sFShulFhLssuusIPLHDHPGMsVhpKlLaGpl+lcSaDhlc........s.ssstt.p............s+hAplh.sssp....hosssssslL...hPpp.........sNlHphsAls...PsAhLDlLuPPYs...sstG.RcCsYYcthshss...........t........................................hsaLpEh...s.Pcsah.hts..Y.GP.pl ...................................................................hsphpstDltlt.t....................................................t..sslsYhc..l..a..Es..c....sFS...hu.....lF.h.L.P.s..uu.sIPLHsHPuMsV......hoKlLa.Gsl+lcSYDhlc................s...s........tt......................................................h+hAplh.h-s..........housss.sslL...hPpp...............GNlHphsAls......ssAhLDlL.......uP..PYs...........t...tG....RcCsYYc.h..sp..........................................................t.shL.ch.....Ppshh.htt..Y.GP................................................................................................. 0 43 102 152 +7679 PF07848 PaaX PaaX-like protein Fenech M anon Pfam-B_9563 (release 14.0) Family This family contains proteins that are similar to the product of the paaX gene of Escherichia coli (Swiss:P76086). This protein is involved in the regulation of expression of a group of proteins known to participate in the metabolism of phenylacetic acid [1]. In fact, some members of this family are annotated by InterPro as containing a winged helix DNA-binding domain (Interpro:IPR009058). 22.70 22.70 22.70 22.80 22.60 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.04 0.72 -3.98 13 652 2012-10-04 14:01:12 2004-08-25 16:08:49 7 2 571 5 164 630 53 68.10 38 23.66 CHANGED pApSlIlTLaGDhlts+GGslhlusLIpLhtshGlsEpslRsAloRhsppGhLssp+sGp.puhYpLS-+u .................................upSLIholaGD..lt.t.+.G....u.p..lhluuLlt.Ll.p.s.h...G..hsEphVRoALhRLs+cGhL....ss.s...+hG..R.puhYpLo-p................ 0 45 102 139 +7680 PF07849 DUF1641 Protein of unknown function (DUF1641) Fenech M anon Pfam-B_9217 (release 14.0) Family Archaeal and bacterial hypothetical proteins are found in this family, with the region in question being approximately 40 residues long. 20.30 20.30 20.40 20.70 20.20 19.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.29 0.72 -7.74 0.72 -4.34 60 843 2009-01-15 18:05:59 2004-08-25 16:10:27 6 1 521 0 181 438 10 40.50 33 23.65 CHANGED shpshcssp......lulhsLl+tL+DPDlp+uLGhhlshLKslG+s ..................s..pphpsscc.....sulhuLl+.s.LKDPDhpRulsahlshLKuhup......... 0 52 107 147 +7681 PF07850 Renin_r Renin receptor-like protein Fenech M anon Pfam-B_9266 (release 14.0) Family The sequences featured in this family are similar to a region of the human renin receptor (Swiss:Q8NG15) that bears a putative transmembrane spanning segment [1]. The renin receptor is involved in intracellular signal transduction by the activation of the ERK1/ERK2 pathway, and it also serves to increase the efficiency of angiotensinogen cleavage by receptor-bound renin, therefore facilitating angiotensin II generation and action on a cell surface [1]. 21.30 21.30 21.70 21.60 21.20 19.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.25 0.72 -3.65 8 137 2009-01-15 18:05:59 2004-08-25 16:10:47 9 4 93 4 83 131 0 95.90 43 27.64 CHANGED shYGGsAVVEllT..scoh-ssLsRcsRsIlpocs..............hpssssPYNLAYpYsh-YuVIFNIlLWlhlsLuLAVIsISYslWsMDPGhDSIIYRMTsQ+I+hD ......................................................................t.hYsusslVphls....csh-ss.hhRpsRs..llp..scp...................................psss..sPYNLA.hp..Ysh-.Y...sVl..FNIlLWhMlsLuLullslsYslh.N.MDPGhDSIIYRMTsp+l+hD........ 0 27 34 59 +7682 PF07851 TMPIT TMPIT-like protein Fenech M anon Pfam-B_9674 (release 14.0) Family A number of members of this family are annotated as being transmembrane proteins induced by tumour necrosis factor alpha, but no literature was found to support this. 22.90 22.90 25.40 23.50 22.80 22.80 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.09 0.70 -5.36 14 274 2009-01-15 18:05:59 2004-08-25 16:11:51 8 8 130 0 167 257 5 265.40 39 89.51 CHANGED cshpEWp-Lpc-appLp-TH+hYppKLE-lspLQppCossIs+Q+++L+plppsL+phpts.......lss..E-hctlpclcppl+cRpstha-ME..uaLPcK.NGlYLsLlLG.sVNVoLLs+psKFtYKDEYEKFKLhlTlIhllhuhhCpalhsY.......RshDtlFsFLLVWYYCTLTIRESILhsNGSRIKGWWlhHHYlSThhuGVhLTW.....PpG.haQhFRsQFl.FshY.ShVQaLQahYQuGCLYRL+ALGERHpMDlTlE....GFpSWMWRGLoFLLPFLFhGahaQhYNuhTLFphuppspsp.EWQVhhhuhhFLlLFlGNhhTTLtVVhpKhppp...p ............................................................................tlpptt.p..ah.phpphtthpp.psttlpppph....p....h.pplt.slp..pht.................t.phhpplptp...hpctpshhh......-h-......shL.Pp+.sGh.aLplhLG..sVNVphhppps+htaK-................EYEpFKhhhsllhllhshhhhhlhph......................phhsthaphhLlaaYsTLslREsILhsNGS+I+uWWl.HHYlSshhusVhLTW.....P.s.s.ha.........QhFpppF...L......tauhhQ..uhVQhLQ.hYQpGpLYphhALGc.tpphDlo...p....G.pu.hhhtt.....Lh.hLhPh.LFhhph.aQhY....suht.Lh...........ph......u..............sp...pW.............Q.......V.h...huhhh...llhhhGNhhsTl.slhtKhp.p....t....................................................... 0 49 72 121 +7683 PF07852 DUF1642 Protein of unknown function (DUF1642) Fenech M anon Pfam-B_9838 (release 14.0) Family The sequences making up this family are derived from various hypothetical phage and prophage proteins. The region in question is approximately 140 amino acids long. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.16 0.71 -3.83 54 426 2009-01-15 18:05:59 2004-08-25 16:12:14 6 4 292 0 26 304 0 123.50 24 69.65 CHANGED pthshVP...phVA-aIEppK..tt.........hpshphh.h.tp.........................................chhpW...........ss.chhhpAals..G............YpVEK......EpLYhVcl.ps.....................h.hhppth..t...................hphTcpEI.cp.s..chtWtat......lcVc ...................................................................thshVP...phVA-Wl-ps+........pp.................phh.t.....h.pp.............sp.....................ch.pW.............h...hppshch.....hs.pAals..G...............YEVEK......Eph.YhVcl.t.t........................hh.h.tpph.......................hphTcpEl.cp.s..ph.W.at......h.l.................................................. 0 9 16 20 +7684 PF07853 DUF1648 Protein of unknown function (DUF1648) Fenech M anon Pfam-B_9801 (release 14.0) Family Members of this family are hypothetical proteins expressed by either bacterial or archaeal species. Some of these are annotated as being transmembrane proteins, and in fact many of these sequences contain a high proportion of hydrophobic residues. 28.70 28.70 28.70 28.70 28.40 28.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.32 0.72 -4.45 86 1279 2009-01-15 18:05:59 2004-08-25 16:13:32 6 12 929 0 232 913 44 52.80 25 26.36 CHANGED hllhlhshlhsh..hhaspLPcplssHashsGpsDsassKh.hshhhhPl.......lhlh ......................hlhlhhhhhsl....hhY...s....p.LPsp..lPhHash.sGp.sDsassKh..hslh.hhPh..hhh.h..................... 0 95 171 201 +7685 PF07854 DUF1646 Protein of unknown function (DUF1646) Fenech M anon Pfam-B_9337 (release 14.0) Family Some of the members of this family are hypothetical bacterial and archaeal proteins, but others are annotated as being cation transporters expressed by the archaebacterium Methanosarcina mazei (Swiss:Q8PXG5, Swiss:Q8PXG7 and Swiss:Q8PXG8). 20.30 20.30 20.40 20.30 20.20 19.70 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.00 0.70 -5.32 6 83 2012-10-02 15:12:49 2004-08-25 16:13:47 7 2 52 0 45 108 23 323.20 43 93.85 CHANGED lAuLlVILlLlLlLPFhl+hlEcNLEhFhhsM...GlAushlSGl.........aSh-LlhcAFcsPLtlhp..lh.IPlGIsQsVLlsGLlFaha+c+lhphltpsh-+lul+VFuFllIslLGLhSSlISAIlAulILsEllshhPLsRKsKlchsVIAsFuIGhGAALTPlGEPLSTIAlSKL.....NtcFhYLhclLGhYIIPulhshGlhusahlp+hsh+p.hlEh..V-YsEsl+-......VllRAhKVFVFIhALpLLGpGFKPlIhhYlsclsSclLYWlNhlSAllDNATLAAAEIuPphTpEQIRuhLMGLLISGGMLIPGNIPNIluAG+L+Is.pEWARlGlPLGlIhhllYFIllaVL ....................................................h..hLllIllllLlLPFhh+tlE+NLEhFLhlM...Glh...AshlSth.....................hshcLl.cshps.lhhh..........Is.sVLlu.GLlFhh.h+s+lpphl.pt.lhctl...s.lclhlFlllllLGLhSSlITAIlAullLVEl.l.phhPL.c.RpsKl.clsVluCFuIG.hGAALTPlGEPLSTIslSKL.....p.A...-......F....h.YL.hchlGhhI.ls.ullhhulluhhhl.t+..t....ph....ps........hpt..........hp.t.p......Esl+c.................VhlRAhKlalFlhALpLLGsGFKPlIctYllclsstlLYWlN.h.lSAlLDNATLA..AAEIoPt..Ms.tQl+AlLhGLLISGGM.LIPGNIPNIIuAuK.....Lp.IpS+EWA+lGlPlGllhhllYalllFh.......................................................... 0 13 31 32 +7686 PF07855 DUF1649 Protein of unknown function (DUF1649) Fenech M anon Pfam-B_9402 (release 14.0) Family This family is made up of sequences derived from hypothetical eukaryotic proteins of unknown function. 20.50 20.50 24.00 20.70 19.30 20.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.95 0.71 -4.57 11 256 2009-01-15 18:05:59 2004-08-25 16:18:47 7 7 211 0 187 241 2 158.50 30 73.51 CHANGED lElhs-sppV+DlVcuILHTIhFHR..............huoltPp..opDhlDhThstlsssEL-phl-p+lsshlcplcsspsp.................................spGQIulpFa-K++++.............sW.F..........hp.................................u-EpVsWE.WTlcVslspscoEt-+tpsRcuhpc..................pLpcslhcIlplsN+c.DaIPPIsTpsps .....................-l.h-.pplc-slt.u...........lLHTIhFHR................h.us.ltsp..sp-hl.-hTas......t.s.ss.s-L-phlcpclsphhctl.c...pss.ss.............................................hpGQl...slpFap....K....++p+.................W..F...............................................................s-Epl...sWE.Wslplplh....p..........p..s........c...p....-.........+ths+cthtp............................tLpctlhpIlphhN.cc.....-alP.hsopt......................................................................... 0 63 101 154 +7687 PF07856 Orai-1 DUF1650; Orai-1_Ce; Mediator of CRAC channel activity Fenech M, Pollington JE anon Pfam-B_9685 (release 14.0) Family ORAI-1 is a protein homologue of Drosophila Orai and human Orai1, Orai2 and Orai3. ORAI-1 GFP reporters are co- expressed with STIM-1 (ER CA(2+) sensors) in the gonad and intestine. The protein has four predicted transmembrane domains with a highly conserved region between TM2 ad TM3. This conserved domain is thought to function in channel regulation. ORAI1- related proteins are required for the production of the calcium channel, CRAC, along with STIM1-related proteins [1]. 25.00 25.00 33.60 28.70 21.60 20.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.29 0.71 -4.70 15 268 2009-10-24 14:23:42 2004-08-25 16:20:09 7 6 107 0 171 238 1 183.50 52 64.56 CHANGED p+aplpslppR+L-.uR..................tQLKAoSppSALLuGFAMVAhVElQhscspph....spuLLlAFussTollVuVcLhAlhloThlLssI..................-sss.htshps..sspoPHcphcha....hEssWtauhph...GlhLFLlplullsWlKFass................................................................hsAAhssTuIhlsssllalhaohphatshlt++tscps ............................s..as.psLSWR+L.LSR................................................................AKLKASS+TSALLSGFAMV....AMVEVQL-sspph.....P.sLLlAFosC.TTlL.VAVHLFALMlSTCILPsI.............................................EAVSNlHslsu.....lp-SPHcRhHha................IELAWuFSTsl.........GhhLFLsElsLlsWVKFhsl...................................................................u.tAAhsoTsIhlPssllFl.sFslHFYRSLVsHKp-p.t.................................... 0 54 76 121 +7688 PF07857 DUF1632 CEO family (DUF1632) Fenech M, Bateman A anon Pfam-B_9654 (release 14.0) Family These sequences are found in hypothetical eukaryotic proteins of unknown function. The region concerned is approximately 280 residues long. This family has been termed the CEO family for C. elegans ORF [1]. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.76 0.70 -5.13 6 180 2012-10-02 19:55:49 2004-08-26 09:34:15 7 7 72 0 127 161 9 207.00 32 68.02 CHANGED GllAChlSslhFGShFVPlK+acouDGhFlQWlhShulhLVGllVauspGFPtFaPlAMLGGhhWusGNuhuVPIhssIGLulGhLlWsTssClsGWAsuRFGLFG.lssphPpsshLNYlGllllVVGGslFh.IKsp...spscscsoshphE..sshppc..sspcsSsh...c.....h.p..+p......RllshlhAlluGhhYG.hhsPlhYIpspsp....lYPsusppslsYlFSaahGIFlTSTllFlsYsIhp+NsPhlssp .....................................................GhhushluhlhFGS.aVPlK..+hs..su....D..............G.h..............ahQWhhshulhl...........suh....l........h.h.h.................h.............t........s.....t.....F.......hPh.A.MlGGhlWus.....GNhhs.VPllptlGLulGhLlWuohshlsG..WssuR..FGh..FG..hp...p...s.t.....p...s.hL.NhhGhslsllu...shha.hh.....l..+sp........t...........p......p...p............t...........sh........t...................p..tpp....................t.....s..........................................p...................................................+h.h..uh.hulhsGhhaG..hsPh.hh.pp............hts......slsal..auah.Glhhsuohh...ahhYshhh+...t...h...t.......................................... 0 58 75 113 +7689 PF07858 LEH Limonene-1,2-epoxide hydrolase catalytic domain Fenech M anon Pfam-B_15033 (release 14.0) Domain Epoxide hydrolases catalyse the hydrolysis of epoxides to corresponding diols, which is important in detoxification, synthesis of signal molecules, or metabolism. Limonene-1,2- epoxide hydrolase (LEH) differs from many other epoxide hydrolases in its structure and its novel one-step catalytic mechanism. Its main fold consists of a six-stranded mixed beta-sheet, with three N-terminal alpha helices packed to one side to create a pocket that extends into the protein core. A fourth helix lies in such a way that it acts as a rim to this pocket. Although mainly lined by hydrophobic residues, this pocket features a cluster of polar groups that lie at its deepest point and constitute the enzyme's active site [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.53 0.71 -4.13 7 167 2012-10-03 02:27:23 2004-08-26 09:46:28 7 2 124 7 43 472 122 120.10 39 83.09 CHANGED ssshcsVpsFhsAhpcsDhcssltchhsssphapN.GlsshpGhscshshlcth....shsuhEhcIh+IAADGutVLTERsDthhh..Gs.hhphhVhGlFEVpss+IshWRDYFDl.shhctssc .........................................................tshcsVpsFls.Ah..p..s..t...D..h.-...s...s.......s...s.lhs...-.................V....Y....p...N......V..u......h......s......s..l....+.........G.....t...c...t.s.t.p.h.lc.th.........pst..sG...F-.l.+.....I...H...+...I..u..A....D....G.s.s..........V.....L.......T......E........R.o............D...slh..h..........G.s.l.....+....lp..FWV..CG.V..F.E..V...c....D.G......+ITl.WRDYF.Dhhchh+u...ht................................... 1 8 30 36 +7690 PF07859 Abhydrolase_3 alpha/beta hydrolase fold Bateman A anon Pfam-B_100 (release 15.0) Domain This catalytic domain is found in a very wide range of enzymes. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.45 0.70 -4.76 130 13075 2012-10-03 11:45:05 2004-08-29 17:28:14 8 147 3378 94 4994 21100 3681 191.50 20 57.99 CHANGED llYhHGGGal...lGsh.sop..cshspplAptssshVlSV-YR...LA.PEa.aPAAh-DuhsuhpWlhpps.........................................t.s..hD...ss.+lsluGDSAGGNLAtslu..ltsccp........h.............lsutlLlaP...shs.............pt..tsh....t..tpt.h.l....s..tsthp.hhh.ch...Yl...s..sssts.cP.....hsuPl....hu....s...lss..LP.P.......shlhsuphDsLpD-uttYAc+Lp.t..sGVpVphh.phtGhhH.uFhh ..................................................................................llahH....G.G.......G...a.h.......h.u...s...h...pst................pth..h....p.....t......l............s..............t............t..............t.............s..............h......h......l........l....s............l...s......Y....+....................L........u...........P..............c.............t............................a...........P...........s.............s...........l........p.............D......s............h......s...........u.h.p....a.l...hpph....................................................................th.t....hc.......sp...p..l.s.l...s........Gc.S........AG..G..p..Lu.h.s.hs..........hhhp..pt......t.....t....................................................................t.s.h.l.l...h..h.P.....................hh.sh.....................t........p.h..........................t.p..............h.....h...........................s.......t......h.t..........h.....h....h....ph...............hh....s.........ttthp....p..................................h.h...s..P.h.......t.........s..................htt....h..P....s................................h.h..l..h...s.....u..t....h...D.......h...h.......s......p...s.......h...h...............h..p...t.....l.t..t...........ts.....h...............s..p.hh...h.........s...h...H.sh..t........................................................................................................................................................................................ 1 1372 2820 4031 +7691 PF07860 CCD WisP family C-Terminal Region Yeats C anon Yeats C Family This family is found at the C-terminus of the Tropheryma whipplei WisP family proteins ([1]). 25.00 25.00 110.20 108.90 18.80 18.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.09 0.71 -4.71 2 5 2009-01-15 18:05:59 2004-09-01 16:41:52 6 2 2 0 2 5 0 110.40 83 6.06 CHANGED hQpAK.........Pp.psh.E+...TPTESKGGGFWSKVGSGIAAPFKWIWHGITWPFRKLFGSRSEA.........................phP........phlssAVspF....................LpFL.. PS........HTQSAKPTEKPKEEK...TPTESKGGGFWSKVGSGIAAPFKWIWHGITWPFRKLFGSRSEAPSSTTNATGNTsGKTRVKRDT.ppPPEHPLKSVN-QIppVTsAVNNFQKSVLTSLKsFFTYLTDTA+LpFLp............ 0 2 2 2 +7692 PF07861 WND WisP family N-Terminal Region Yeats C anon Yeats C Family This family is found at the N-terminus of the Tropheryma whipplei WisP family proteins ([1]). 25.00 25.00 362.20 362.20 19.80 16.80 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.88 0.70 -5.36 2 12 2009-09-11 19:29:28 2004-09-02 08:46:53 6 7 2 0 4 12 0 250.50 77 31.64 CHANGED AhhL..SS.........psPPuLSLLSSVSpsSl.pSso+aopVS.sssp.sClosssssshhlDPlT..usotQT.oCssuhSs...pPtot....las.Yo-TsSYlYVPYIpss.l.LYY...KtsPSS.lshshoDhtTsa..........us-+VlShohososlhsLLTstNlaFa.pl.so.psplTVsl+hphcsshl.SshPuLRsS.aTaSLopPstslplDt.TGtlphS....olsspslTAhAlphsTuThlT..YhhDo.h. LT..LLLLSSLQYETAFARQTPPALSLLSSVSSTSV.SSNTKYTRVSNTNTQEVCVTTNTNVSLLIDPVT..SSTKQTLSCTPSLSP...QPQTH....IYVPYTDTSSYLYVPYITNTHISLYYTDKKADPSSFLTFPHTDIATPY..........GDEKVlSITKTTTNLIALLTTRNIFFF.DIHVTEKPKITVPIHKQIDNTYL.SDIPSLRNSRYTFSLTHPNKDITIDRYTGQIHLS....SLPTSPITAIAIN+DToTHIT..YAlDo...ss... 0 4 4 4 +7693 PF07862 Nif11 Nitrogen fixation protein of unknown function Yeats C anon Yeats C Family This domain is found in the Cyanobacteria, and may be involved in nitrogen fixation, but no role has been assigned ([1]). 22.90 22.90 23.10 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.17 0.72 -4.02 116 396 2009-09-14 15:19:10 2004-09-02 09:07:36 6 4 112 0 200 413 401 48.80 26 53.68 CHANGED MStcp....LcsFLpcl.p.........sDssLppplpu..st.st-plltlApptGaphotc-lt .............MS.cp....lctFlpcl.p..........sDssLpcplps......sp..ssccl....ls....l..A....pptGashotc-l.............. 0 34 83 179 +7694 PF07863 CtnDOT_TraJ Homologues of TraJ from Bacteroides conjugative transposon Yeats C anon Yeats C Family Members of this family have been implicated in as being involved in an unusual form of DNA transfer (conjugation) in Bacteroides ([1]). The family has been named CtnDOT_TraJ to avoid confusion with other conjugative transfer systems. 25.00 25.00 25.00 25.00 24.40 24.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.38 0.72 -3.51 15 330 2009-01-15 18:05:59 2004-09-02 09:22:06 6 2 117 0 39 273 4 62.30 53 19.65 CHANGED FMIIGIlGYFTlPTVuuWIIpAGGu.GuYu+sVNptut+uGssA.......uusuGAAhGssuG+lp..............u+ ....FhlIGIlGYFTlPTVAGWIIQAG.Gs.GuYuR..NVNpsAs+uushA........uusAGAssGNhuGRl...th....................... 0 20 36 39 +7695 PF07864 DUF1651 Protein of unknown function (DUF1651) Yeats C anon Yeats C Family This is a family containing bacterial proteins of unknown function. 20.40 20.40 20.80 20.70 19.70 20.10 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.40 0.72 -3.88 45 133 2009-01-15 18:05:59 2004-09-02 09:34:00 6 1 30 0 51 136 674 70.90 25 80.29 CHANGED pGWLlssppphlhpFc...-ppohpt.s.tlhlcphchhs...upPslhcs+c+l.phcpAhctWccLlpsGWpcs.ps.hs ................sWLlssp...pphlhpFp.....spp.s.p..s.hlhlcphphhs......spPslh+spcch.ph-pAhchWppLhppGWpps.p..h........ 0 2 20 42 +7696 PF07865 DUF1652 Protein of unknown function (DUF1652) Yeats C anon Yeats C Family This is a family containing hypothetical bacterial proteins. 21.30 21.30 21.30 25.20 21.20 19.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.04 0.72 -4.37 16 126 2009-01-15 18:05:59 2004-09-02 09:42:41 6 1 42 0 31 125 0 67.00 32 78.74 CHANGED hh-lRpllEpuFLPhpCpCohssstshTl+lh.-ssSGclpLhloGluhppLposRDlspLlspL+t-hs ........phppllEtuFhPhtCpsohssstoMsl+Lh.D.sSGcp.lslsGlshuplhoucDlscLltpl+p-h..... 0 0 0 20 +7697 PF07866 DUF1653 Protein of unknown function (DUF1653) Yeats C anon Yeats C Family This is a family of hypothetical bacterial proteins of unknown function. 21.80 21.80 22.10 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.26 0.72 -3.79 54 591 2009-01-15 18:05:59 2004-09-02 10:42:42 6 8 565 2 154 512 59 62.70 44 59.86 CHANGED sGhYRHYK.........G.s.YcVlulA+HSETcE..lVlYcsLYG-h..ulWVRPhsMFhEsVpl.-GpplP......RFphl ............................................shYRHa.K............G..shYpVlsl.Ac...HSETpE..pl..VlYpsLY.....u-h...ulWVRPhsMFhEp.V.h..Du..pphP......RFph................... 0 48 101 131 +7698 PF07867 DUF1654 Protein of unknown function (DUF1654) Yeats C anon Yeats C Family This family consists of proteins from the Pseudomonadaceae. 20.90 20.90 21.10 23.50 20.60 17.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.47 0.72 -4.61 12 129 2009-01-15 18:05:59 2004-09-02 10:55:33 6 1 57 0 18 103 2 71.10 37 83.43 CHANGED s.ouhE+LGhRlpchINuPsAQcpRhslIaRL-s-up--WEpllptlsEs-slsLsap.DDGuVpltWph.p-- ..s..puY-tLuhRlQ+.lIsuPsAQpp+tsllhRhssEspccWsplL-pIuEs-slplshp.-DGo.VpltWph......... 0 1 1 12 +7699 PF07868 DUF1655 Protein of unknown function (DUF1655) Yeats C anon Yeats C Family This protein is found in some prophages found in Lactobacillales lactis ([1]). 19.20 19.20 19.90 110.70 18.20 17.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.73 0.72 -3.98 2 18 2009-01-15 18:05:59 2004-09-02 11:17:53 6 1 12 0 3 13 0 54.70 80 86.09 CHANGED aI..Dphth.hh-hL.A+ARHaIcls-hs.+LFDGpSVsTFThhtNshQVEh.cu YILDDSIAFELMDLLKAKARHFIQLNEYVYRLFDGQSVVTFTTLENDIQVEMVKG. 1 2 2 2 +7700 PF07869 DUF1656 Protein of unknown function (DUF1656) Yeats C anon Yeats C Family This family contains bacterial proteins, many of which are hypothetical. Some proteins in this family are putative membrane proteins. 21.30 21.30 21.30 21.50 21.20 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.83 0.72 -4.18 72 1584 2009-01-15 18:05:59 2004-09-02 11:23:22 7 4 929 0 230 607 18 58.50 37 80.14 CHANGED pElslhGlahPslllhhllAhslshllctlLschshaRhlWH.uLFcl...uLalhllus.l .........lslhGlhF.PPlFhthlluh.slahll+RlLtshthhch.lWHPuLFsh....uLashlhsL............. 0 34 89 154 +7701 PF07870 DUF1657 Protein of unknown function (DUF1657) Yeats C anon Yeats C Family This domain appears to be restricted to the Bacillales. 24.00 24.00 24.70 27.40 23.70 23.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.07 0.72 -4.28 27 523 2009-09-11 09:19:45 2004-09-02 11:26:35 6 3 182 0 90 296 1 49.90 36 36.63 CHANGED pl+psl.AuLKuspAsLcsauLpTcsppAKphappsupphcpllpslpsRl .........l+psL.AuLKusp...AsLEsFuLsTcsppAKphappsApphppIlspLpsRl..... 0 30 66 74 +7702 PF07871 DUF1658 Protein of unknown function (DUF1658) Yeats C anon Yeats C Family This family of small proteins seems to be found in several places in the Coxiella genome. 25.00 25.00 25.20 34.20 22.70 24.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.92 0.73 -7.26 0.73 -4.18 7 73 2009-01-15 18:05:59 2004-09-02 11:31:08 6 2 6 0 19 34 0 30.10 42 65.46 CHANGED sslKshcs.uhsp+PhtGssstahshhhh. sslKshcs.uhsp+PstGNsssahshhhh..... 0 19 19 19 +7703 PF07872 DUF1659 Protein of unknown function (DUF1659) Yeats C anon Yeats C Family This family consists of hypothetical bacterial proteins of unknown function. 20.70 20.70 20.80 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -8.03 0.72 -4.33 20 335 2009-01-15 18:05:59 2004-09-02 11:35:22 6 1 251 0 81 255 0 46.30 31 62.86 CHANGED spshsosLhLchpsGls.ssGcPlh+s+oaspVKssAscpDlYsVApA .................s...hstsLtLhhpsG.lD.psGcslh+s+sappVKssAs.-plasVApA.............. 0 38 70 76 +7704 PF07873 YabP YabP family Yeats C anon Yeats C Family This family of proteins is involved in spore coat assembly during the process of sporulation [1]. 21.40 21.40 21.40 23.10 21.20 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.79 0.72 -4.41 73 814 2009-01-15 18:05:59 2004-09-02 11:39:05 6 1 421 6 177 539 9 66.30 30 70.00 CHANGED hsplolpu+pclhlpsh+slhsFsspclhlpTphGhLhI+GcsLplpplss-p..ltlpGpIpslpY. ...............clslhG+ppl.Ipsh+slhsFsscclhLp.TphGhLtI+GpsLplcplssEc..ltlcG.pIpsltY...... 0 93 147 157 +7705 PF07874 DUF1660 Prophage protein (DUF1660) Yeats C anon Yeats C Family This protein is found in Lactobacillae prophages. 21.40 21.40 21.60 21.60 20.90 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.75 0.72 -3.69 9 38 2009-01-15 18:05:59 2004-09-02 11:54:19 6 1 30 0 7 29 0 60.00 40 93.52 CHANGED MKLhCKLFGHKW-....hsPhsphcs.aCtRC.........................saNRSDLDESENVFP...EKWLDKHMD ..MKLhCKL.FGHKWp.........ssshpphhc.tCpRC............htp..pt.....h.hNRSDLDES-sha....c............................. 0 2 4 6 +7706 PF07875 Coat_F Coat F domain Yeats C anon Yeats C Domain The Coat F proteins, which contribute to the Bacillales spore coat. It occurs multiple times in the genomes it is found in. 23.80 23.80 23.90 23.80 23.60 23.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.58 0.72 -3.84 65 780 2012-10-01 21:25:29 2004-09-02 11:54:34 7 2 357 0 179 551 2 63.60 24 50.09 CHANGED sDcsl.sschLsstKsssssYusAhsEsssPpLRpsltptlspshphphplaphhhpKGaYps.p ......sDpsh.ss-hLsshKtsspsYupslsEssssplRpsL.pphhsps.phptplaphhhpKGaYts............... 0 81 147 156 +7707 PF07876 Dabb Stress responsive A/B Barrel Domain Yeats C anon Yeats C Domain The function of this family is unknown, but it is upregulated in response to salt stress in Populus balsamifera ([1]). It is also found at the C-terminus of an fructose 1,6-bisphosphate aldolase from Hydrogenophilus thermoluteolus (Swiss:Q9ZA13; [2]). Swiss:Q93NG5 is found in the pA01 plasmid, which encodes genes for molybdopterin uptake and degradation of plant alkaloid nicotine. The structure of one has been solved (Swiss:Q9LUV2) and the domain forms an a/b barrel dimer ([3]). Although there is a clear duplication within the domain it is not obviously detectable in the sequence. 21.20 21.20 21.40 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.10 0.72 -3.54 51 1481 2012-10-02 00:20:33 2004-09-02 12:33:13 7 15 921 48 686 1398 234 96.20 21 80.22 CHANGED lcHlVha+h+csssspph..tph....hpshpsL........splsslpphp..hGtshsspp.....ttu.ashshhspFcshpsh.csYts.cP..........tHhphs.phhpshhp....phhshDaps .........................lpHlVha+h+..psss....tpph......tph.......hpth..psL....................spl..s.t.l..t..p...hc....sG..hshs.sps...................pu.ashslhspFpstcsh.pt.Yts..HP..........t.Hht..ht..phh...ts...h.h..p.....phh.shDa..................... 0 173 439 571 +7708 PF07877 DUF1661 Protein of unknown function (DUF1661) Yeats C anon Yeats C Domain This is a family containing bacterial proteins of unknown function. Many of the proteins in this family are hypothetical. 25.00 25.00 25.60 25.60 23.10 22.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.87 0.72 -7.22 0.72 -4.45 9 52 2009-09-11 10:00:21 2004-09-02 13:59:39 6 2 3 0 9 48 0 29.80 45 32.42 CHANGED LsR-hhssRA+TKKFSR+hh...RphsspotpFR LsREhKpSRApTKKFS++hh...+php.p.t.hh............ 0 9 9 9 +7709 PF07878 DUF1662 Protein of unknown function (DUF1662) Yeats C anon Yeats C Domain This family contains bacterial proteins of unknown function. This domain belongs to the Ribbon-helix-helix superfamily suggesting these may be DNA-binding proteins. 22.20 22.20 22.30 22.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.52 0.72 -4.23 27 70 2012-10-02 18:44:02 2004-09-02 14:12:51 6 1 32 0 30 72 224 51.30 31 59.07 CHANGED usps+SsRlpVlLPppls-cLpthAcpEuRosSs......hAclLIpcGlpRhtst .....stscsshIpslLPcplsccLpthApp-uRosus......hAclLlpculpRtht..... 0 2 10 24 +7710 PF07879 PHB_acc_N PHB/PHA accumulation regulator DNA-binding domain Yeats C anon Yeats C Domain This domain is found at the N-terminus of the Polyhydroxyalkanoate (PHA) synthesis regulators. These regulators have been shown to directly bind DNA and PHA ([1]). The invariant nature of this domain compared to the C-terminal Pfam:PF05233 domain(s) suggests that it contains the DNA-binding function. 25.00 25.00 25.70 36.80 21.60 20.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.97 0.72 -4.24 47 465 2009-01-15 18:05:59 2004-09-03 09:55:21 6 3 457 0 198 441 191 63.80 56 33.74 CHANGED plIKKYsNRRLYDTpoSpYlTL-cltphVhpup-FpVlDAKTG-DlTRslLhQIIhEpEs..tGp.s ...lIKKYsNRRLYcTpTSoYlTL-DltphVhcuE-FpVhDAKoG-DLTRolLhQIIhEcEs.tG..s..... 0 60 127 160 +7711 PF07880 T4_gp9_10 Bacteriophage T4 gp9/10-like protein Fenech M anon Pfam-B_73396 (release 14.0) Family The members of this family are similar to gene products 9 (gp9) and 10 (gp10) of bacteriophage T4. Both proteins are components of the viral baseplate [1]. Gp9 (Swiss:P10927) connects the long tail fibres of the virus to the baseplate and triggers tail contraction after viral attachment to a host cell. The protein is active as a trimer, with each monomer being composed of three domains. The N-terminal domain consists of an extended polypeptide chain and two alpha helices. The alpha1 helix from each of the three monomers in the trimer interacts with its counterparts to form a coiled-coil structure. The middle domain is a seven-stranded beta-sandwich that is thought to be a novel protein fold. The C-terminal domain is thought to be essential for gp9 trimerisation and is organised into an eight- stranded antiparallel beta-barrel, which was found to resemble the 'jelly roll' fold found in many viral capsid proteins. The long flexible region between the N-terminal and middle domains may be required for the function of gp9 to transmit signals from the long tail fibres [2]. Together with gp11, gp10 (Swiss:P10928) initiates the assembly of wedges that then go on to associate with a hub to form the viral baseplate [1]. 25.00 25.00 25.30 25.20 24.60 24.50 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.74 0.70 -5.34 14 97 2009-01-15 18:05:59 2004-09-03 14:03:50 6 3 54 22 1 90 685 229.60 30 52.13 CHANGED KphIclGslsssuTGDhLhcGGsKlNsNhsplYsthGDtc.................l.HAsGhap+aups.............hshsl.hGstasIDTo...sGs.lsVsLP+.....G...chGcslchhsssGShu....sNPlslhstuGDoIcG.ssss..hplspsasclpLhhsusu.......pW-Y....pl-slhssshssVscphh.IsssspTD.lslFs.pspYNsspLhVhtcss......sshhhpssp.hh.ls.sps...scVhss-h.ssl+ht...tsssDplh.lsah..sG..lhshhSS.tspsl+l ....................KphIslGphss-uTGDhLhcGGpKINsNFsplYsthGDsc..................h.ausGhapphsts.............................hssthGp.aslsTo...sut.lsl..plsc.....G....chscslchhss.uohs....spsltl..tssDolt...G.ssss.....h.lspsa.clplhhhuss.......hWpY....plpphhssc.ssltpp...ltststss.lsl...ht.tspYshhpl.Vhtpss......s...hptsp.hh..s.t.s......splhsh-h.tsh+ht....p.tDp.l..hsa...ss..l.thhps..t.th................................................................................................... 0 0 0 1 +7712 PF07881 Fucose_iso_N1 L-fucose isomerase, first N-terminal domain Fenech M anon Pfam-B_11456 (release 14.0) Family The members of this family are similar to L-fucose isomerase expressed by E. coli (Swiss:P11552, EC:5.3.1.3). This enzyme corresponds to glucose-6-phosphate isomerase in glycolysis, and converts an aldo-hexose to a ketose to prepare it for aldol cleavage. The enzyme is a hexamer, with each subunit being wedge-shaped and composed of three domains. Both domains 1 and 2 contain central parallel beta-sheets with surrounding alpha helices. Domain 1 demonstrates the beta-alpha-beta-alpha- beta Rossman fold. The active centre is shared between pairs of subunits related along the molecular three-fold axis, with domains 2 and 3 from one subunit providing most of the substrate-contacting residues, and domain 1 from the adjacent subunit contributing some other residues [1]. 25.00 25.00 29.00 28.90 24.90 23.80 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.84 0.71 -4.73 17 960 2009-01-15 18:05:59 2004-09-03 16:01:50 7 3 940 15 59 415 11 163.90 70 29.68 CHANGED tshP+IGIRPsIDGRchG..VRESLEtpTMpMA+ulAcLlpssl+assGssVECVIADssIGusuEAAtsuc+FpppsVslTlTVTPCWCYGoEThDMsPhhPKAlWGFNGT..ERPGAVYLAAshAuHsQ+GlPAFuIYG+-VQDscDpsIPpDVpEKlLRFARAulAVupM+G .....s..hPKIGIRPsIDGRRhG..VRESLE-QTMNMAKusAsLloppL+assGssVECVIuDosIuGhAEAAACcEKFpppNVGlTITVTPCWCYGSET..lDMDPshPKAIWGFNGT..ERPG...AVYLAAALAAHuQKGIPAFuIYGHDVQDADD.T.oIPsD..VcEKLLRFARAuLAVAoM+G................ 0 22 41 49 +7713 PF07882 Fucose_iso_N2 L-fucose isomerase, second N-terminal domain Fenech M anon Pfam-B_11456 (release 14.0) Family The members of this family are similar to L-fucose isomerase expressed by E. coli (Swiss:P11552, EC:5.3.1.3). This enzyme corresponds to glucose-6-phosphate isomerase in glycolysis, and converts an aldo-hexose to a ketose to prepare it for aldol cleavage. The enzyme is a hexamer, with each subunit being wedge-shaped and composed of three domains. Both domains 1 and 2 contain central parallel beta- sheets with surrounding alpha helices. The active centre is shared between pairs of subunits related along the molecular three-fold axis, with domains 2 and 3 from one subunit providing most of the substrate-contacting residues [1]. 21.30 21.30 21.50 21.40 20.90 21.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.03 0.71 -4.63 21 1038 2009-09-11 15:21:47 2004-09-03 16:04:42 7 4 952 15 63 438 21 167.00 60 30.95 CHANGED +oYLshGuVSMGIuGSllss-FFpcYLGM+sEtVDhoEllRRh-ptIYDp-Ehc+AhtWlccps+..cGcDpN..Pcchpps.cpp....ct.ac.llKMhhIhRDlMtGN.+LtphGatEEuhG+NAlsuGFQGQRpWTDaaPNGDhsEAhLNosFDWNGhREPalhATENDoLNuluMLFs+LLTss ..........................KSYLShGuVSMGIAGSIVspsFFppaLGM+spuVDMTElpRRlDptIYDc.tE...hEhALuWsccsh+....G.DpN...............scphpcstE.pp....ct.hc.slhMsMhhRDhMpGNs+LA-hGhhEEulGaNAIAAGFQGQRpWTDpa.PNGDhhEAlLNoSFDWNGlRcPFVlATENDSLNGVuMLhG+.LTsT...................................................... 0 24 43 52 +7714 PF07883 Cupin_2 Cupin domain Bateman A anon Pfam-B_81 (release 15.0) Domain This family represents the conserved barrel domain of the 'cupin' superfamily [1] ('cupa' is the Latin term for a small barrel). 21.50 21.60 21.50 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.47 0.72 -4.47 156 20335 2012-10-10 13:59:34 2004-09-06 15:00:03 6 127 4001 229 6298 26569 7119 70.00 18 38.76 CHANGED hhhpltPGs.sss....Hh.H.ssp...EhhaVlpGc.hplp.ls...up...phhlpsGDshhhss.s..h.Hph.tN....susp.ssph...lslh ............................................h....h.sGt..ths.........+t...H..st.......p......ch.h..h....V...l...p..G..p...h..p....l....p...l.s.................sp.............pht..l..p..s...G..D...s........l..h...h.s.s..s.............h..Hth...ts.........s....pt...h.h...h...h.................................. 0 1857 3867 5218 +7715 PF07884 VKOR Vitamin K epoxide reductase family Bateman A anon [1] Domain Vitamin K epoxide reductase (VKOR) recycles reduced vitamin K, which is used subsequently as a co-factor in the gamma-carboxylation of glutamic acid residues in blood coagulation enzymes. VKORC1 is a member of a large family of predicted enzymes that are present in vertebrates, Drosophila, plants, bacteria and archaea [1]. Four cysteine residues and one residue, which is either serine or threonine, are identified as likely active-site residues [1]. In some plant and bacterial homologues the VKORC1 homologous domain is fused with domains of the thioredoxin family of oxidoreductases [1]. 21.00 21.00 21.30 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.10 0.71 -4.32 38 864 2009-01-15 18:05:59 2004-09-06 15:44:38 9 18 670 1 380 845 319 134.20 23 51.63 CHANGED tthhhhlhuslGllsShhlshpchshhp....hhshCsls...ssluCu..pVhsS.husl..FG.............hPsullGllsassll.shulhhhh...........phschhh.hslhhsshsuslhshaLhal.h..ahIp.uhChaChssaslolslhllshhts ...........................................t..hhhhhhuhlGlhhohhls.h...chphhp................h.shCs.ls........shh..uCspV.hs.S..uph....aG...............hPsuh..hGlhhaslhl.shuh.h...hh........................phsphhh...hhlhhu..s.h.hus..s.h..sh..aLha..h......ah.lt.shC.aChsshshshslhhhsh..t................................................ 1 129 240 313 +7716 PF07885 Ion_trans_2 Ion channel Bateman A anon Pfam-B_55 (release 15.0) Domain This family includes the two membrane helix type ion channels found in bacteria. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.87 0.72 -4.26 123 9163 2012-10-03 11:11:44 2004-09-08 16:19:51 11 158 2837 170 4782 12148 1623 81.40 21 22.30 CHANGED hhlhhhhhhushhhhh..............t..h....shhs.ulYashsoloTlGYGD.l...sPt.sss.....t.......+lhs.hhhhhhGlshhshhlshhsphhhp ...............................................................hh....hhhh..uhhhh.h................................................t..t.h..s.hh....s...uh.Ya............s.hlTho..Tl...G..........Y......G..D..l.....sPt..oth..........u..................................+lhs...hhh....h.l.h.....G.l.s..hhshh.h.shlsphh..t....................... 2 1860 2647 3883 +7717 PF07886 BA14K BA14K-like protein Fenech M anon Pfam-B_4068 (release 14.0) Family The sequences found in this family are similar to the BA14K proteins expressed by Brucella abortus (Swiss:Q44701) and by Brucella suis (Swiss:Q8FVU0). BA14K was found to be strongly immunoreactive; it induces both humoral and cellular responses in hosts throughout the infective process [1]. 21.70 21.70 21.80 23.10 21.30 21.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.50 0.72 -4.27 58 605 2009-01-15 18:05:59 2004-09-09 13:46:19 6 5 152 0 174 430 7 30.90 47 18.05 CHANGED sahpaCspRYRSYcPsssTY.sas.G.R+.C. ....HlpaChsRYRSYcspDsTYQPas.GsR+tCh....... 0 13 69 96 +7718 PF07887 Calmodulin_bind Calmodulin binding protein-like Fenech M anon Pfam-B_4579 (release 14.0) Family The members of this family are putative or actual calmodulin binding proteins expressed by various plant species. Some members (for example, Swiss:Q8H6T7), are known to be involved in the induction of plant defence responses [1]. However, their precise function in this regards is as yet unknown. 21.00 21.00 21.50 22.10 18.70 18.20 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.87 0.70 -5.37 11 283 2009-01-15 18:05:59 2004-09-09 13:48:24 6 6 26 0 157 262 0 252.30 37 49.84 CHANGED plpLtFhspL..phPlFTGuKlEucsGsslclsLlDssTs.....lssGst......SshKlEllsLcGDFsstpc.-pWTsEEFppp.IV+pREGKRPLLsGslpVsLps.Glusl.u-lsFTDNSSWhRSRKFRLGsRVsss...sGlRlpEAho.EuFsV+DHRGE.....................................LYKKHaPPhLpDEVWRLEKIGK-GAaH++LssptIsTVc-FL+hhshDss+LR.pllGssMSs+hWcshlcHA+oClLus.clalYp....spssslhFNslaEllGlhhsspahs..hcsLsphppshl...........cpLlppAYpphspl.ph-schhhshs ............................................................................................hpLpFhspl....sl.FTss+lpu-pusslclsL.hDssst.....hlssGs.......ushKl-llVL-G...DF....ss...cp....p...ps..W...T...t..--F...psp...lV..pt....RpGKcPLL..........s.........G.........-..l..lpLcp...Ghusl..s-lpFTDNS.S.a.h.RS.....RK...FRLGs+.ls....s....s.....tuh...Rl...pE....Aho.-sFsV+D+RG.E.....................................hhKK+aPPtL..pD-VaRL-+IuKc...............G........s.hH.+.........cLp................p......t...............tI.oVp-FLph.h..h...h..c.............p..cL..R....p..............lLG.....thS.s+hW-shlcHA+.sCslss...c.ha..hYh........ttpphslhFNslhphhGh.....h.s.tp.hhs...pplsp.pp..s..............pthhppAYpph.pl.tht....................................................................................................... 0 20 89 126 +7719 PF07888 CALCOCO1 CoCoA; Calcium binding and coiled-coil domain (CALCOCO1) like Fenech M, Mistry J anon Pfam-B_4504 (release 14.0) Family Proteins found in this family are similar to the coiled-coil transcriptional coactivator protein coexpressed by Mus musculus (CoCoA/CALCOCO1, Swiss:Q8CGU1). This protein binds to a highly conserved N-terminal domain of p160 coactivators, such as GRIP1 (Swiss:Q61026), and thus enhances transcriptional activation by a number of nuclear receptors. CALCOCO1 has a central coiled-coil region with three leucine zipper motifs, which is required for its interaction with GRIP1 and may regulate the autonomous transcriptional activation activity of the C-terminal region [1]. 30.00 30.00 30.00 30.00 29.90 29.70 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.79 0.70 -6.22 3 312 2009-01-15 18:05:59 2004-09-09 13:50:00 6 6 55 0 115 266 0 340.20 34 63.29 CHANGED spp+tGVsFLNVA+TYlPNTKVECHYTIPsGpcsSoRDWIGIFKVssuoVRDYaTFVWAlsPEuss-GSsuHCSVQFQASYLP+PGsQpYQFRYVDppGsVCGpSssFsFuEPRPMDELVTLE-....EDuGsDlLLVVPKATlLQNQLEtuQ+ERNDLM+t+LALEG-V..................o-hRSRIppLEsALcpScccspELcEQaK-lsoScpphouERNlLssQcAEpppRILELEsDIQTlocKh..................pEp-+lLtthp-IcA-LEppKuELp...........................QRLK-pTlQh+Dp.spt.chQLElEsL+EcLRouQEhLuSSQQKAsLLGEELAShuSlRD+TIAELH+SRLEoA-lul+LS-LuL+LKEGpuQWuQERsuLpQSsEA-KD+IlKLSAElL+LEcuLQEERSQRcsLcsELupEKDuspVQLSEs+RELoELRSALRVLQKEKEQLQEEKQELL-YlR+LEtRLDKlADEKWsEDAs..lsEDccuulshosslDLoDS-DESPtDEu.SpcLuPsuLDEcscstSoPssP+Espc.VVIsQPAPIuPaLSGlAEDssS- .............................hu.....tV.F.sVs+.YlPpsplpC+YTLs.hhhPpspDWlGIFK..........V........GW....sos.RDYaTFlWu.sh........P..-s..hspsoshpp.pVtFp....................u.YLP.p.s....ss..p.hYQF.sYV.spp.Gp.lpGsSsPFQ.....FR..s..p.s...-..L....l..shpp.................t.t.pDh.Llls...+....s.t.....h...p.........plc..c........pEpp-.....Lh...p......Lptph...........................tp.hppphtph....ptthtptpp...p........htpL.t.t..p..th........p........t..t.....h.......p.....Epp.............h...........p..........tc.........tch....hp.Lc....pp......thppp................................................t..............t.........p..................phpt.....t.....................................................pht.............p...hp................t......................................p.......................................................................................................................................................................................................................................................................................................................................................................................................................................ttt................................................................................ 0 16 26 55 +7720 PF07889 DUF1664 Protein of unknown function (DUF1664) Fenech M anon Pfam-B_4797 (release 14.0) Family The members of this family are hypothetical plant proteins of unknown function. The region featured in this family is approximately 100 amino acids long. 30.00 30.00 30.00 30.40 29.80 28.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.67 0.71 -4.33 14 118 2009-09-11 16:08:29 2004-09-09 13:51:16 7 2 27 0 61 110 0 120.00 38 37.07 CHANGED uoG.hshhll.ssslGAlGYGYhWWK............GhphsDlMaVT++shusAssslsppL-plp-slsusK+HLopRIpplDc+LDppp-ltcpspc-VstlppslssIpcDlcslppslpsLcuKlsplEtpQ ..........s..sshs.hllsssslGAl..GYuYhW.WK............GhphoDlMaVT+RshusAssslsKpL-pVssuluusK+HLo...pRIpplDp+l-cpt-lspthcc-V..stl+tslspItpDlpslpphVpsL-sKl.splEtpQ........... 0 14 37 50 +7721 PF07890 Rrp15p DUF1665; Rrp15p Fenech M, Mistry J, Wood V anon Pfam-B_9434 (release 14.0) Family Rrp15p is required for the formation of 60S ribosomal subunits [1]. 21.60 21.60 22.60 22.10 20.40 19.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.04 0.71 -4.10 32 310 2009-01-15 18:05:59 2004-09-09 13:52:49 7 4 276 0 216 307 1 139.90 30 50.85 CHANGED uausuhspILu...........o+htsssps..PILu+sKchh...cphps-cLEpKA................................................ct.hhpEK+phhp+sRV+s..........ls...............................phh-pE+pL+Kl...........Ap+GVV+LFNAVpssQhpsccthpct.ptpphh............................cchl...sc...lSKcpFLDll...............pu ...............................................................................uausuhscILs............pchspspps..slLu+sKphtt....cc....hp..p-c.lEp+s.......................................................................pt.h.tpcK+ph.ppsRl+ssls..................................................................pst-pE+pL++l...........Ap....+GVVpLFNAVpptQtps.p.c.t.h.c.cs.t.t.p.tppt.................................................pchl.....sp..lSKpsFL-hlp.u......................................... 0 72 115 177 +7722 PF07891 DUF1666 Protein of unknown function (DUF1666) Fenech M anon Pfam-B_9387 (release 14.0) Family These sequences are derived from hypothetical plant proteins of unknown function. The region in question is approximately 250 residues long. 25.00 25.00 33.00 32.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.81 0.70 -4.96 6 77 2009-09-10 15:06:31 2004-09-09 13:54:16 7 2 16 0 50 75 0 227.60 41 40.08 CHANGED ppDLEhVYVGQlCLSWEhLaWQYcKAh-lhE...cD.ash+pYNpVAGEFQQFQVLLQRFVENEPFQ.GPRVpsYlRpRClh+shLQVPsIR.pDctKsKKssRcE....ppD.sIoospLsEIlcEohplFWcFLRADKccusl.slKuhptoQhp.....DssDlELhhplKpchQK................KE++L+-lhRSssCIlKKFpKpcpcp...sps.hFhutl-LKLVoRVLsMs+lsp-cLtWCpcKLs.........................KIshssR+..lphEsSFsLFPs .....p-LEtsYVuQlCLoWEhLpWpYpphpchhp...........sss..p..ssp.pasplAtcFQQFQVLLQRFlENEPFE.G........RschYsRtRshh.pLLQVPph+.t-p...p-cpp.tpp......t.c.hIsuspllcIhEEoIpsFhtFl+sDKcpss....hhpshhpp..p..hp.....ssh-.pLltp.lppshpK.................Kch+LK-lh+stpshh++hhptp.......pchphhhu.lDlKlVoRVLpMscloc-QLhWCccKhs.........................Klph..h.pt+..lph-.sS..lhFP........................ 0 6 33 42 +7723 PF07892 DUF1667 Protein of unknown function (DUF1667) Fenech M anon Pfam-B_9631 (release 14.0) Family Hypothetical archaeal and bacterial proteins make up this family. A few proteins are annotated as being potential metal-binding proteins, and in fact the members of this family have four highly conserved cysteine residues, but no further literature evidence was found in this regard. 20.20 20.20 20.30 20.60 20.00 19.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.58 0.72 -4.29 35 405 2009-01-15 18:05:59 2004-09-09 13:57:04 6 5 308 1 112 341 7 80.60 40 59.44 CHANGED sRGhcYAcpElpsPpRllTooV+lpsGc.hshlsVKTccPIPKchIh-sM+tLpplpVpAPV+lGDVllcNlhsTGlcllAT+ .............RGttYucpElssPpRllTooV.+.l..c..........uGp.hshlsVKTpcsIPKpplh-shctLpp...lpV...pAPV+hGDVllcslhsTGVDllAT+............... 1 63 88 101 +7724 PF07893 DUF1668 Protein of unknown function (DUF1668) Fenech M anon Pfam-B_5066 (release 14.0) Family The hypothetical proteins found in this family are expressed by Oryza sativa and are of unknown function. 21.30 21.30 21.50 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.58 0.70 -5.32 43 450 2012-10-05 17:30:43 2004-09-09 14:01:57 8 10 10 0 256 374 0 265.40 19 82.32 CHANGED RRalaLllcstt.ts.....asl++lD...........................................s.stssssssthpthpLP.sshhph...................tshpFhsl.....sss+llus.....-ts.........sp...shlYDssspsVt.................shPs..Ltp.PKhhP..lulsl.................Gss.LY......lh-pssps..ss..........pFEsl....shp...............tttcsWsWps.....L......PP.PP.......Fststp..............lsuYAVV.....sG....spIhlooss.........................G........TYuFDTssp..p.....Wp+h...G-W.tLPFpGpAcYVs..EhsLWhGlussstt.....hl.sAsDlsusst........pP.hh..hthh........chtsP.....................hsupLlpL.G.uG+FCls+hhpst.................................t.stppttspphhshlTulcl...........................stspsspLchlp++up.pa ...............................................................................................h.phs.............................................................................................................hP.....hph....................hphh.h...........tstllsh........s.t....................tt...shlaD..sttt.....thh............................hPt.......h.t..sp..h...s...h.h.h...........................................................ssp..la......hhpt....................Fcsh...ht..........................ththp............lPs..P.P...a.h...............................ltuaslh......s....pp.lhlShtt...............................G........Tash...D......s..pt...t.....Wpph....GpW..LP.F..G...pu.ass..ch.t..hahGlstts............l.suhslss........................h.....................th...........................ts.lh.h.G...su.....paChhchh........................................................hh.h.hhth....................................................................................................................................................... 1 0 55 167 +7725 PF07894 DUF1669 Protein of unknown function (DUF1669) Fenech M anon Pfam-B_9335 (release 14.0) Family This family is composed of sequences derived from hypothetical eukaryotic proteins of unknown function. Some members of this family are annotated as being potential phospholipases but no literature was found to support this. 36.60 36.60 36.70 37.60 36.40 36.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.84 0.70 -5.21 23 379 2012-10-02 13:01:53 2004-09-09 14:15:35 7 3 44 0 217 312 0 254.00 40 43.34 CHANGED L--spsp.csstu....psphhYsEptRLAl-sLLpsG.cAapphLppEplhcFLSspElptlhpssph.pssscp...........................tt..sttpstssst-su.....SsTYaPttS.Dt-sP.sLDLGWPs..sssa+GsT+lslahQP..sh.cspssIKEllR+hIppApcVIAlVMDlFTDlDIhpDLl-Aup+RpVsVYlLLDpsslstFLcMCp+hplshpcl+...........NlRVRoVsGssahs+oGp+hpGplpcKFlLlDsc+VhoGSYSFoWosu+lcRs....hlplhoGplVEtFDcEFRpLYApSpsl ................................s.....t.hasEttRLAl-sLlptG.cAahphLppEph...hsFLSt.Elphlhpphpt.....t..s.st.t..................................t..t..s.t....s..tstsss........ShTYaP.tS.DhpsP.tL-LGWPt...psshpGhTcsslahQP...s....spt.sIK-hlRphIppA.ppV.lAlVMDhFTDlDIhpD.ll-Au.+.RtV.sVYlLLDptslphFLcMspchp...lp.t+..l..c...........NhRVRolsGssahs+oGp+.hpGplppKFlllDscpVhsGSY.S.FoWo.su+hcps....hlhlhoG.plVE.FDcEFRpLYAtSps.............................. 0 8 29 86 +7726 PF07895 DUF1673 Protein of unknown function (DUF1673) Fenech M anon Pfam-B_4746 (release 14.0) Family This family contains hypothetical proteins of unknown function expressed by two archaeal species. 22.90 22.90 23.10 23.50 22.80 22.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.56 0.71 -4.79 23 47 2009-09-14 08:49:41 2004-09-09 14:24:15 6 4 14 0 41 45 6 189.10 18 76.51 CHANGED hhhh-pIKKLMGWCPNA+shEs.ppphs.psF-usstscuc.tpssshcssuhap+t....psphLlhshhhThhhh.lhls.hGlsh......hhLLhGhhlu...Lhhhlhs..................WKtphppa-ultcp.lhcsssKpph.....lhhlhhhlhhhhhhh..hh.t.h.tthshpsllSF...........................luGhhlh.hWlhYhQllYWEKKN+phlahcppps....pchYllsE+p .......................................................c.I++hMGWCPsspt.ps.tpphs.t.hcht..ptst..psts.hs...thhp+h....psphLlhshhhohhhh..lhl..hulsh................hhllhGhhhs...Lhhhlhs....................a+p.hppac..shtph.lhpsp.+pph.....hh.hlhhhlhhhhhhh.......h...hs.phhhuh.............................hsuhhlh..hahha..hphhhacc..+pphhlhhppp.t.....t.hhh.tc........................................................................................................... 0 15 21 21 +7727 PF07896 DUF1674 Protein of unknown function (DUF1674) Fenech M anon Pfam-B_4326 (release 14.0) Family The members of this family are sequences derived from hypothetical eukaryotic and bacterial proteins. The region in question is approximately 60 residues long. 23.50 23.50 23.50 25.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.51 0.72 -3.75 75 599 2009-01-15 18:05:59 2004-09-09 14:35:58 7 4 563 1 312 536 163 65.50 37 65.31 CHANGED Eu-pp+tt...tt.................................P.....tElG...G.scG.EPsRY......GDWEpcGRssDF ......................................................................tttt..t.......................................tpstP........tElG...G.s.cG.....EP.s..RY......GDWEhKGRssDF.......... 0 89 167 248 +7728 PF07897 DUF1675 Protein of unknown function (DUF1675) Fenech M anon Pfam-B_4280 (release 14.0) Family The members of this family are sequences derived from hypothetical plant proteins of unknown function. One member of this family (Swiss:Q9SFV5) is annotated as a putative RNA-binding protein, but no evidence was found to support this. 20.50 20.50 20.60 20.50 19.60 20.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.09 0.70 -4.54 6 138 2009-01-15 18:05:59 2004-09-09 14:43:05 6 7 27 0 84 134 0 171.40 27 53.82 CHANGED EIELsLGLSLGGpFGsD....h++s.+LsRSSSlssssshscs.....puushts.ph............sL.RToSLPsEoEEEh+KR+EhQoL+RhEAKR+RsE+...hsusts..............stush.pRccpsusshspssspsstuustssoSSuhsEhssts.stt.spstt.push..h.h.spts.tts..Pssspsppsspspsspcstcs.p.cpsust-sspshhpDMPCVoT+GDGPNGK+V-GFLY+Y.tptEEVRIlCVCHGoFLSPAEFVKHAGGscVs........+PLRHIV .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.phP.hV.ops...Ps....G+plpGhhY+Y..pt.ppVpIVCsCHGsahoPsEFVcHAGu....ps........................................................................... 2 10 49 71 +7729 PF07898 DUF1676 Protein of unknown function (DUF1676) Fenech M anon Pfam-B_4779 (release 14.0) Family This family contains sequences derived from proteins of unknown function expressed by Drosophila melanogaster and Anopheles gambiae. 24.30 24.30 24.50 24.30 24.10 24.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.33 0.72 -3.74 64 512 2009-01-15 18:05:59 2004-09-09 14:45:18 8 6 30 0 390 577 0 109.70 18 36.55 CHANGED +hlphlschh....pp...........sp.lplssGlplV+ssst....sttspstphtpt.....................hsphlhc+ltpalpoHsLplphsp.ttpt.h.............................t.htEuR.................t+Kh++....hhhPhlh .................................hhphhschh....pp...........sp..lpls..culpll+ssss.....tttspshp.ptt.................................t.hsphlhc+ltpalpoHslplp.lsptttpttt.............................tshtEuR.......................h+Khc+.......hhhPhlh.............................................. 0 103 147 331 +7730 PF07899 Frigida Frigida-like protein Fenech M anon Pfam-B_4728 (release 14.0) Family This family is composed of plant proteins that are similar to FRIGIDA protein expressed by Arabidopsis thaliana (Swiss:Q9FDW0). This protein is probably nuclear and is required for the regulation of flowering time in the late-flowering phenotype. It is known to increase RNA levels of flowering locus C. Allelic variation at the FRIGIDA locus is a major determinant of natural variation in flowering time [1]. 21.30 21.30 21.50 21.90 20.00 20.70 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.69 0.70 -5.65 22 700 2009-01-15 18:05:59 2004-09-09 14:49:35 6 7 74 0 143 485 0 188.80 38 53.50 CHANGED sspsssspsssts.......pLppLCtpMDucGLhpFl.pphcphs..slppElssAL+tusDPApLVL-ulpsha.sspts...tpphphhshcpsCllLLEsLh.............hps.lss.......sl+-cA+plAhcWKspl.....sspsspsh-uhuFLpLLusFulsupFcp--lhcLlhhsut++Q...sscLspuLGLs.-+h.............................................................................................................sshIcpLlspGcplcAlph...hathsLs-pFsPlslLKsaLpc.s+csst.hhptss.......................s...putspstc+-lsAL+uVlKClE-p+L-tcas.-s..Lpccl.tpLcctctp+++sspssp ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hstlhh...h.......h...............................................................................................................IVE.hlppGhpIEAlph...sasFGh.DKFs...sss.lLpuaL+..uKcs.pth.....................................................................................t.p...h..h.phhpt.p........................................ts........................................................... 0 11 76 106 +7731 PF07900 DUF1670 Protein of unknown function (DUF1670) Fenech M anon Pfam-B_9559 (release 14.0) Family The hypothetical eukaryotic proteins found in this family are of unknown function. 25.00 25.00 25.00 26.20 24.90 24.80 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.33 0.70 -5.12 6 52 2012-10-04 14:01:12 2004-09-09 14:52:00 6 3 10 0 21 55 62 189.40 31 74.16 CHANGED lhs-apFhsG..Vs+hlscplhphhcp.a..ssplcsGQllahulSs-Ess..GKslp-ppLlsVhLTLls.EDh-shpc.hthsElhcp+llRlspEAa-QsALLTpsDlAhLLshSpsTIS+clcpYcpc.GcllPTRGhl+DIGsuloHKphIlcLYLKGhpToEIARpTpHS.Eul-RYIKDasRVphLhpKGhss-EIphhsGlScplVcEYhEL .....................................h..h.p.h.pthhphhpp..a.....pphp.GQhhahslstppss..GKshpphphhslhLolhs..-D.c.....hppth.t.pphpppplhRlhpEAacQsuhLTptDlA.hlLshSspTlp+plcphpcc.s.llPsRGph+.DlG.suloHKphl.lch.alc.s.hphs-IuRpTpHS.pAlpRYlpsFp+Vhhh.hpcshpspElthlsshoppLlp-Y.t.............................. 0 16 19 19 +7732 PF07901 DUF1672 Protein of unknown function (DUF1672) Fenech M anon Pfam-B_9698 (release 14.0) Family This family is composed of hypothetical bacterial proteins of unknown function. 25.00 25.00 25.20 25.30 22.60 24.90 hmmbuild --amino -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.92 0.70 -5.41 7 690 2009-01-15 18:05:59 2004-09-09 14:53:00 6 1 200 0 7 359 0 239.30 62 91.09 CHANGED ETKSVPEEM-ASKYVGQGFQPPAEKDAIEFAKKH+-chtKhGEQFFhDNFGLKVKATNVVGSGDGVEVaVHCDDHDIVFNASIPFDKshIcp-uShRSpDpGDDMSshVGTVLSGFEYRAQKEKYDNLhKFFK-NEcKYQYTGFTKEAIsKTQNsGYpNEYFYIos.shsLpEYRKYaEPLI+KsDcpFKcGMcpu+Kplshpups-lsTTLFSTKcNFTKcssh--lIchS-cl++h+phPcshplolQlucspINTp+s.YssppslEhtVapp....E ..................ETKSVPEEMEASKYVGQGFQ..PPA...EKDAIEFAKKH+.cchtKhGEQFFhDNFGLKVKATNVVGpsDGVEVaVHC-.DHsIVF.NASlP....h.KshI......cpcu..ShR..S..pDpGDDMShhVGTVLSGFEYRAQKEKYDNLYKFhK-NEcKYQYTGFTKEAINKTQNsGYpNEYFYITh.s.hsLpE.YRKYaEPLI+KsDcpF+cGMppu.+Kpls.hpu.ps.pssoTLFSppcNaoKcppl-s.VI-hS-plcch+.shPp.p.plolQlupphIsT.p+s.YscppslchtV...................................................................................... 0 4 4 7 +7733 PF07902 Gp58 gp58-like protein Fenech M anon Pfam-B_4289 (release 14.0) Family Sequences found in this family are derived from a number of bacteriophage and prophage proteins. They are similar to gp58 (Swiss:Q38355), a minor structural protein of Lactococcus delbrueckii bacteriophage LL-H [1]. 30.00 30.00 30.10 30.10 29.70 29.80 hmmbuild -o /dev/null HMM SEED 601 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.77 0.70 -6.30 10 178 2009-01-15 18:05:59 2004-09-09 14:56:42 6 11 111 0 20 176 0 345.80 24 41.91 CHANGED MSRDPTLTlDE.......SNLoIGuDGRsYYTFTA-sDspoV+LAsN.ClGTTRFNQLMIERGsKsTsYVAPVVVEGoGpsTGLFKs.......LKElNLELTDTcNSpLWuKIKLTNpGMLpEYacssIKoEIlpoAcGItpRIS-DT-+KL............................................ALIN-oIsGIRR-YQDADRcLS....uSYQAGI-GLK........ATMANDKlGLpAEIpsoAQ....GLSQ+YDsEl+pLSAKI....TTTSSGTTEAYESKLsGLRAEFTRSNQGMRsELE.........................ScISG..LpusQQoTApQISQEI+NRpGAVSRVQQsLDSYQRRLQsAE......cNYSSLTQTVpGLQScVSuPssplpSRhTQLts.I-Q+VTRscVpSlIspSGDSIhLAI.Kuth.pSKMSusEIISAINLNuYG.V+IuGcpIALDGNTTVNGAF..uAKIGEhIKL+ADQIIuGTIDAsKIpVINLNASSIV.......GLDAN.......FIKA+ItYAITuL......LEGKVIKARNGAMhIDLsSuphsFNpsApINFNos-NALhRpD.uTpTtFl+FoNussh....uapGus.YAthsIoS......suDGl-sssSGpFuGlRhaRhAouYppTusVDpsEIYGDsVlItcshNhshGaphp.s+sspllDhN.....h.A.u.hh..............hT ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................l.t........................................h..t.....t..th.....o.......lp.t....s...sp.....h.sl.........psthsus-llStINLss..tG...l.pIs...Gcp.ltls.GsThl..ss..sh............sutl.up..................lpAspIhsGolsAsclpshsl..sAspIs.....................tlsAs...............hhp.thh...t............................................................................................................................................................................................................................................................s....................................................................................................... 0 3 11 17 +7734 PF07903 PaRep2a PaRep2a protein Fenech M anon Pfam-B_4102 (release 14.0) Family This is a family of proteins expressed by the crenarchaeon Pyrobaculum aerophilum. The members are highly variable in length and level of conservation. The presence of numerous frameshifts and internal stop codons in multiple alignments are thought to indicate that most family members are no longer functional [1]. 25.00 25.00 32.90 32.80 21.10 19.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.68 0.71 -4.46 8 34 2009-01-15 18:05:59 2004-09-09 14:58:44 6 1 1 0 34 34 0 80.20 53 72.37 CHANGED hsAlspWYhpCFGcs...Gp.tKVVKRLEEYauMCQMtpAsKREaG.+.splhhSEaALRRAFWW-GEWpGKPhSCFVTE+cAVCKVGcpphcFhVhssscGVYl+PEh.Lhp-WIKVAHRGsD ...................................................htEhG......h..-huL+tuF..ctt.hstshSCFVTE++AlCKVG-+hAsFYVFDTPcGVYL+PEIKLlp-WIKVAaRGs....... 0 34 34 34 +7735 PF07904 Eaf7 CT20; Chromatin modification-related protein EAF7 Wood V, Bateman A anon Wood V Family The S. cerevisiae member of this family Swiss:P53911 is part of NuA4, the only essential histone acetyltransferase complex in Saccharomyces cerevisiae involved in global histone acetylation [1]. 20.80 20.80 22.00 22.00 20.60 20.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.27 0.72 -4.01 19 246 2009-01-15 18:05:59 2004-09-09 15:29:00 8 3 222 0 178 256 0 90.30 31 35.27 CHANGED EpEhpLF+ultp.aKPV...Glp+HFtMhsIh-clspt..............hscphsupslWcKLsphYsLctLD-hE.....pshshsscpc..........................-FsLPps-auc ..........................EhpLhcuhht.a+Ps............Ghp+HFpMlsIt-+hppp.........................................hscphsscsIWc+LsohYsLptL--pE...............h..shssppc..............................................................pFsLPpp.ht.t............................................................................. 0 52 92 145 +7736 PF07905 PucR Purine catabolism regulatory protein-like family Fenech M anon Pfam-B_4388 (release 14.0) Family The bacterial proteins found in this family are similar to the purine catabolism regulatory protein expressed by Bacillus subtilis (PucR, Swiss:O32138). PucR is thought to be a transcriptional activator involved in the induction of the purine degradation pathway, and may contain a LysR-like DNA-binding domain. It is similar to LysR-type regulators in that it represses its own expression [1]. The other members of this family are also annotated as being putative regulatory proteins. 28.90 28.90 28.90 29.80 27.20 28.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.22 0.71 -4.54 113 1282 2009-09-13 12:56:03 2004-09-09 15:29:15 6 5 619 0 359 1123 18 122.10 25 24.49 CHANGED p-lLphsslpts..plluGp.....psLc+slcWVpls...Ehs.Dhssa..........LpGGELlLTTGhsltp.ssp...hppa...lcpLscsusuu..Lslths.hhp.p.lP..psllphAcphslPllplst...plsFsslsptltptlh ................................-lLphstLptt...pllu...Gc.....ps.L.s.p.s.Vphlplh...Ess...Dh...s.sa..........lpuGEL.lLToGhshpc.stp...........hppa...lcpLtp.tss..uuLslph.s.t.hht..plP...ppll...phAsphslPllplPt....shsasplhptlhp.h..................... 0 152 276 325 +7737 PF07906 Toxin_15 ShET2 enterotoxin, N-terminal region Fenech M anon Pfam-B_4512 (release 14.0) Family The members of this family are are sequences that are similar to the N-terminal half of the ShET2 enterotoxin produced by Shigella flexneri (Swiss:Q47635) and Escherichia coli (Swiss:Q47634). This protein was found to confer toxigenicity in the Ussing chamber, and the N-terminal region was found to be important for the protein's enterotoxic effect. It is thought to be a hydrophobic protein that forms inclusion bodies within the bacterial cell, and may be secreted by the Mxi system [1]. Most members of this family are annotated as putative enterotoxins, but one member (Swiss:Q8X606) is a regulator of acetyl CoA synthetase, and another two members (Swiss:P76205 and Swiss:P23325) are annotated as ankyrin-like regulatory proteins and contain Ank repeats (Pfam:PF00023). 25.00 25.00 29.70 29.40 23.50 20.10 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.92 0.70 -4.94 8 670 2009-01-15 18:05:59 2004-09-09 15:30:05 8 1 306 0 7 369 0 250.50 38 50.79 CHANGED o...hph........pphlppcphosps.pchlptcpK..PYhSc.KppsplNLNGps..s-psuchIhCRHLAuQYhhDhh..pssGKVsh.ccaSSs-pIpp+lslpcccphpsl..p.PuslYalsNscFGpslsshFppMcppucsspolhlposNHuMAlRL+IKcoppGp.+.aVVshYDPNtTssplRs...ssstsclpphSlccFls....ppthpsYu.pcsspphhs.psphPcppslh..t.......scls.sPLsssslhhhMstGhsctIpplhcplchhscpc..sphhplLsA+s ..............................h.....h...........pp.hs.......hp.spK......c.p..spl.pLNGpshF...s..tpP..p...stIhCRHluhQYh.D.h..pspG+lsh.ccYSSsEp..ltpHlshppcp.....ph.....hsLhpp.PtGppVlAstDFGhslphaFs+MpsNs....lShMuAILhssN.............HshuVRLRIKp..ospGphc.YVVSlYDPNsTNsplRh...spsppshthhSL.cFhs...ss.sh.pWuscplh..spsluIl..PhLPcpp.lhlhssh.......schs.sPlpPush.LlMuhG.spplhplh.plph....hschc....hhphlsAhN.................... 0 2 5 6 +7738 PF07907 YibE_F YibE/F-like protein Fenech M anon Pfam-B_4781 (release 14.0) Family The sequences featured in this family are similar to two proteins expressed by Lactococcus lactis, YibE (Swiss:Q9CHC5) and YibF (Swiss:Q9CHC4). Most of the members of this family are annotated as being putative membrane proteins, and in fact the sequences contain a high proportion of hydrophobic residues. 27.20 27.20 28.10 28.10 26.00 25.90 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.36 0.70 -5.19 89 1478 2009-01-15 18:05:59 2004-09-09 15:37:41 6 2 897 0 279 1002 29 239.00 28 70.23 CHANGED ss.lhhlshlahhlllllGtppGlpul.luLhlshhllhhhl.lshhhtGhsslllsllhshlhhhloLhllsGhsp+ohsAhluTlhulhlshllshhhhphsplsGhs.EpsphLthhstt...lph.tlhhuullIuuLGAlhDluhoIuSulhElhppsP....ploh+pLhpSGhslG+DlhGThsNTLlhAahGuulshlllah..thshshspllNtchhuhEllpslsGuIGllLslPlTshl.suhh ...............lhllshlhlhlllllGtcpG.ltul.loLhlNhhllhhhl...l.hh.tGhshhllshlhsllhshlsLhllsGh.sh.+Th.sAhluTllush.lshhlshlhhthsphpG...hs..h...Ep...h...p.....lth...hshs....lph..tlhluslllusLGAlhDluloIuSulaElhpps.P....ploh+cLhpS.G.hplG+-lhGThsNTLlhuahGusLs..h..hlh.ah..p...sh..ohsphls.p.huhEllpslhGuIGllLolPlTshlsuh.h............. 0 119 208 254 +7739 PF07908 D-aminoacyl_C D-aminoacylase, C-terminal region Fenech M anon Pfam-B_13711 (release 14.0) Family D-aminoacylase (Swiss:Q9AGH8, EC:3.5.1.81) hydrolyses a wide variety of N-acyl derivatives of neutral D-amino acids, in a zinc-dependent manner. The enzyme is composed of a small beta-barrel domain and a larger catalytic alpha/beta-barrel. The C-terminal region featured in this family forms part of the beta-barrel domain, together with a short N-terminal segment. The beta-strands of both barrels were found to superimpose well. The small beta-barrel domain does not seem to contribute to the substrate-binding site or to be involved in the catalytic process [1]. 22.40 22.40 22.40 22.40 22.30 22.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.14 0.72 -4.65 150 859 2009-09-11 00:09:33 2004-09-14 13:31:24 7 23 530 12 324 878 733 47.40 33 9.08 CHANGED Ppp.lpDtuoas..cPhphAsGlctVlVNGtlsh.c.......sGp....sTG......spsG+llRps ........stp.ltDhuoa...p...cPpphupGIctVhVNGthsh.c..........cGp....hTu......spsG+hl+t.t............ 0 114 222 284 +7740 PF07909 DUF1663 Protein of unknown function (DUF1663) Fenech M anon Pfam-B_4106 (release 14.0) Family The members of this family are hypothetical proteins expressed by Trypanosoma cruzi, a eukaryotic parasite that causes Chagas' disease in humans. This region is found as multiple copies per protein. 25.00 25.00 53.80 51.70 18.00 17.40 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.87 0.70 -5.97 2 85 2009-01-15 18:05:59 2004-09-14 13:33:30 6 9 6 0 4 85 0 202.10 76 97.85 CHANGED htlpRcAVspcE......................RusRchlEtucAAh.DE..............LG.....-.hhssTHE+................uspshsAcEsttRGp.VGpE.............RcshsGhH...cpslcuEER...hsR+hLphtEstuhsplhp..........chR.hppp+t.phhtA.pDAths.httEER.......RshlsScE+hsRRhlEtG.hut.D...phGcEhusA......saphAlpthuhpEsAARtp.h.E.....YuhpRDul.Sp.psshpsLc+GpustVs..............ELhpEYcpsscchhsEtlhu..D........AsRtplVhEE.....sLH+ssl.oE....p+AVR+...............................slERsEttAhD.l..........................GEp.hSsshpR...........................slEsLttE ..........................................................................................................................................................................RcshhG.LH...RDAVDSEER...AVRRpLE+GEAAAVDELGE..........EYRSATHERsVEALAAEEDAARGQLVGEEREDshGLHRDAVDSEERAVRRCLERGEAAAVD....ELGE..EYtSA......THERsVEALAAEEDAARGQLVGEER............EDsaGLHRDAVDSEERAVRRCLERGEAAA..VD................E....LGEEYtSATHERsVEALAAEED........AsRsplVhEE..................................................................................................................................................................... 0 1 2 4 +7741 PF07910 Peptidase_C78 DUF1671; Peptidase family C78 Fenech M, Bateman A anon Pfam-B_9699 (release 14.0) Family This family formerly known as DUF1671 has been shown to be a cysteine peptidase called (Ufm1)-specific protease [1]. 29.10 29.10 30.50 30.30 25.70 28.50 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.77 0.70 -5.22 38 453 2012-10-10 12:56:15 2004-09-14 13:38:51 8 18 236 3 315 461 3 187.60 30 42.79 CHANGED upthhlhushshaHYtp.uhsDcGWGCuYRslQhllSWhht.......phh....s.....slPoIhcIQphL.chh.DK..ss........shhGo+cWIGosEsthllspL..slps+lhpht...............................................psu-l...sphhptltpaFp..sp.u....................sPlhltt..s........................shuhTIlGl-hp.......psuphphLlhDPHasus.ssh+hlhpcshsua...............+sssh..hp+...sphYpls .............................s...hh.sshthhHa......hp..thsDp....G.....WGCu.........YRslQhlhSahh.t..........thh.................s........slP.ohhclQp..hl.psh.Dc..ss..........shhGo+pWIGohEst....hlLspl..sl......ps+llphp..................................................................................pssph...sphhc.lh........pa..Fp..st....s................................sPlhltt....s........................shu+TIlGlphp..................tss..p...hphLlhDPpasus...p.hp........hlhpp..shssh..........................+s..ssh..h.p...pt.Yplh......................................... 0 110 167 249 +7742 PF07911 DUF1677 Protein of unknown function (DUF1677) Fenech M anon Pfam-B_4922 (release 14.0) Family The sequences found in this family are all derived from hypothetical plant proteins of unknown function. The region features a number of highly conserved cysteine residues. 25.00 25.00 27.30 26.80 23.00 22.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.33 0.72 -3.80 26 199 2009-01-15 18:05:59 2004-09-14 13:44:12 8 4 19 0 134 186 0 88.20 45 57.50 CHANGED ElE.ss.+CECCGhpEECTspYItpVRscasG+WlCGLCuEAVp-Eht...R........psshslEEAlctHhuhCccFsuts...sPshp...lssuMRclLR+p .......lEss+CECCGhpEECTstYIspVRs+atG+WlCGLCuEAV+-Eht...R.........ps.shshEEAlptHhuhCpcFps...s.....sP..slp...lspuMRplLR+p............. 0 12 72 108 +7743 PF07912 ERp29_N ERp29, N-terminal domain Fenech M anon Pfam-B_28781 (release 14.0) Domain ERp29 (Swiss:P52555) is a ubiquitously expressed endoplasmic reticulum protein, and is involved in the processes of protein maturation and protein secretion in this organelle [1,2]. The protein exists as a homodimer, with each monomer being composed of two domains. The N-terminal domain featured in this family is organised into a thioredoxin-like fold that resembles the a domain of human protein disulphide isomerase (PDI) [2]. However, this domain lacks the C-X-X-C motif required for the redox function of PDI; it is therefore thought that ERp29's function is similar to the chaperone function of PDI [2]. The N-terminal domain is exclusively responsible for the homodimerisation of the protein, without covalent linkages or additional contacts with other domains [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.50 0.71 -4.20 3 92 2012-10-03 14:45:55 2004-09-14 13:52:51 8 4 73 13 52 132 2 117.60 51 47.87 CHANGED lTs+GCVDLDolTF-KVlcKF+YSLVKFDTAYPYGEKHEAFTuLAhEssuoT-ELLlApVGIKDYGEp-Nt-LG-RYKlDKEsYPVIaLF+.GDu-sPVpa.PucG-VTlDuL++FLKupTuLYIGhP ........................................................spGslsLDploF.Kl.I.sK..Ka.l.LVKFDstYP.Y.G.EK..p...D...E..Fp...+lA.E.susuoc.DLLVAEV......GIp.D.YG-K..N.h-LuE+YKl-K-.saPVh.h.LFp.Gs...h-sPl.a.....sGslps.sslp+alKup..u..lYlGhP......................................... 1 15 18 33 +7744 PF07913 DUF1678 Protein of unknown function (DUF1678) Fenech M anon Pfam-B_4886 (release 14.0) Family This family is composed of uncharacterized proteins expressed by Methanopyrus kandleri, a hyperthermophilic archaebacterium. 25.00 25.00 35.50 141.40 19.10 18.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.35 0.71 -4.76 5 16 2009-01-15 18:05:59 2004-09-14 14:21:22 6 1 1 0 16 16 0 197.30 70 97.59 CHANGED ss.+htlP...PcDPVEplRsLRVLhEshRRGphPhLtsoYRoVNGpshGPYY.ARWR.sSRaERGRTLYLGKSENESVpFlE.WLVSLsRtEVLELARH......LMRNLRSVLKoLLscVSuLPYK+...ARRVLAR.............GL..ALAFD.....ARPuuSP+.IRDVLEELPDRLESFllRTLGGWPAaYSS+Lc+llRuRRuSp-t+R.............tcVPDs..plcRhK.Rc ....p....HlP..IPpDPVERIRALRVLREVaRRG+KPSLEVTYRTVsGSTCGPYYVARWRRDSRa+HGRTLYLGKPENESVpFVE.WLVSLDRpEVLELARH......LMRNLRSVLKTLLTEVSsLPYK+...ARRVLAR.............GL..ALsFD.....ARPSpSPR.IRDlLEELPDRLESFhlRTLGGWPAHYSS+LcKlIRuRR+SLDG+H..............ElPDVtLELERWKLR.+.......... 0 16 16 16 +7745 PF07914 DUF1679 Protein of unknown function (DUF1679) Fenech M anon Pfam-B_4694 (release 14.0) Family The region featured in this family is found in a number of C. elegans proteins, in one case (Swiss:Q19034) as a repeat. In many of the family members, this region is associated with the CHK region described by SMART as being found in ZnF_C4 and HLH domain-containing kinases. In fact, one member of this family (Swiss:Q9GUC1) is annotated as being a member of the nuclear hormone receptor family, and contains regions typical of such proteins (Interpro:IPR000536, Interpro:IPR008946, and Interpro:IPR001628). 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.44 0.70 -5.84 20 190 2012-10-02 22:05:25 2004-09-14 14:22:31 6 7 8 0 178 1879 311 303.80 22 85.78 CHANGED Lh-suDGLLpTHVTWcDVpcslpcphsTcAchGcs+phssIuDhpGFMS+luLlEs-Ws...t.psscpLPcKFsLKIoSpLslhslschhc.pssssascEc....hcthpphs+chHNREVshYKlLpKhN.pssl.hsKVYhhKpF-...--NcLKGalhhEalsslcshpha-slss--Lhsll+uIAshpAhuhpLo...--Eppph.tGt-a.c.hhpphhs-cslcshacphRphh.s...sE+lcEhlchhcpYh.plh.hcphsplschlGh.+sVLsHGDLWsuNlLaspps-GphplctlIDaQslphusPA.DLsRLhlosLSspDRRp+hpclL-paY-sFlcsL..ts.ctPYohcpLc-SYpLYFPhhulhlLPhlushl-sp...shspcct-ph+chshcKhtuLhEDlhchHc.sh .....................................................................................................................................................................................................................................................................................Gh..S...hhhlt.pa.........t..hPtphhlK................hs................t.....h..h.p...........h..t...........t.h...h........hHN...pEsthY.p.....h.h.t.php............th.......hs..+....................h...Y.....hh..pthp.......pt....t.....h...t...G...hl..hh.-..h..h..t..s..h.....h.....h.....t..h.....ht....sh...s........pp..l...h..l.................hcs.........luthpuh.u.h.......p...............t.c..p........h...p........h.....................t..h.........t........h...h......t..p.......h....h....t...................................h....p..t....h.....h.....t..................h........t............................................t....p.h.......t......p........h.....h..........p.h..h....t...h..................................................................h.....p..........h........p.....p.....h........p....p........h..h........G...............h......................V........l...sHuDlhtsNhla.......p.....p....t........p....p....................................p..l......t...........u...................l....I..D...........a................Qh................sp...hGsss.D....lh..+.l.h.h.........s....h..........o..s.p...............p..R+.....t.th..ptlL...c.a....a...p...t....h...h.p....h.........................................p.........s........a...o....h....p.....p..l......p......p.....t.......Y...p......h.....h....h..s...h....h....s.....h...h..h.........h...s...h.....h........h..h..................................................t..p....thht-h.........h........................................................................................... 0 85 98 178 +7746 PF07915 PRKCSH Glucosidase II beta subunit-like protein Fenech M anon Pfam-B_9407 (release 14.0) Family The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing [1]. The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum [1]. Mutations in the gene coding for PRKCSH have been found to be involved in the development of autosomal dominant polycystic liver disease (ADPLD), but the precise role the protein has in the pathogenesis of this disease is unknown [2]. This family also includes an ER sensor for misfolded glycoproteins and is therefore likely to be a generic sugar binding domain. 21.80 21.80 21.80 21.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.14 0.72 -3.18 104 586 2012-10-02 14:19:21 2004-09-14 14:25:08 8 29 264 2 365 860 7 84.40 27 19.03 CHANGED upChh...ht.su.......haoYchCah.....cplpQhc............................tctsspphhLGpa..................................t..t.p.p...............tp..ttt...a.............hshtassGs.hC ............Chh...tt..pu.......WWoYEaCaG........+clcQaHt.....................................spchptpphh..LGpa................................................................................ptt.ph.tpp.ssp..............................t.t.....tp.....tt.t..tha..................................hsphhssGohC................................................................................................................................... 0 128 189 295 +7747 PF07916 TraG_N TraG-like protein, N-terminal region Fenech M, Mistry J, Coggill P anon Pfam-B_4841 (release 14.0) & Pfam-B_689 (release 23.0) Family The bacterial sequences found in this family are similar to the N-terminal region of the TraG protein (Swiss:P33790). This is a membrane-spanning protein, with three predicted transmembrane segments and two periplasmic regions [1]. TraG protein is known to be essential for DNA transfer in the process of conjugation, with the N-terminal portion being required for F pilus assembly [1,2]. The protein is thought to interact with the periplasmic domain of TraN (Swiss:P24082) to stabilise mating-cell interactions [2]. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.73 0.70 -6.05 62 860 2009-01-15 18:05:59 2004-09-14 15:13:40 6 7 549 0 140 792 47 399.90 21 55.85 CHANGED Elash.hsG.hlpssl.ulhshoGhhs....hs.shlhls...hhlulhhtuhshshptsh..tph.phhhsshllhhh..hhsP......psslphp-pp...............................sshstpsssVPlhhAhhtulooslstuhspshpssh.s.......................h+hhptphptphh.sP..............tltpplpcFhppChhhshth..tp.p.pshh........ss...sc.hshls..................pupsPhpshhhssspsss....................sh.TCpphas......sL+scl.hspss...........shhhhspthhsth.tsttt...shlpshl.spshphhht.u.pshsshhppshhhsshpsuhsp.....t...........husstuhtptshshtsttphstpsLPhhpshlhhhhhshhPlllhluh..h...hshuhshhthahhshha.LutWhshhhllshhsthhht..................sttshshh.shhshstsp.thshlhGhhhlslP.hhstslshuGspshuslss.hhususssuppu ...............................................................phahh.h.u.hlppsh.ul.hhhhuhhs.............hs..shhhhs...hh.l.tsh.h...uh.........s.ptsh..............h.h.th...hhsh...h...ll.hhh...hhhs.......hsslpl.st...................................sshs.ttsspVPlhhuh.hulsstl.stuhstshtphh..s.........................................hp.hp.h..ph...p....hhhh..ss...................tltpphtsahppChhhshhh....tp.t..pphh.........ss........s.hshlh.................................................................pp.psh.pth..sss......................................sh.oCpphhs.............tL+stl...tphp........................thhhhhp.thhsh...h...s.........................shhhth...h..s.pshph...........u..........tshpphhtps.hhssh.tp.uhsp.........................hssst..u.htttphsh..hshhphhhphl....Phh.shlphhhhhhhPllhhhuh.....h....phshthhthahhshha.lthW...hhlhshhhph.h.t.........................stsshsh........th.p.t..t.......ht.s...hGhh....hh...hlP.hhshh...hsh.u...u.htsh..sshhs...hhsss.tsst............................................................................................................................................................................................. 0 28 72 112 +7749 PF07918 CAP160 CAP160 repeat Fenech M anon Pfam-B_9359 (release 14.0) Repeat This region featured in this family is repeated in spinach cold acclimation protein CAP160 (Swiss:O50054) CAP160 is induced during periods of drought stress; its precise function is unknown but it has been implicated in the stabilisation of membranes, cytoskeletal elements, and ribosomes. By acting as a compatible solute, it may reduce the toxic effects of cellular solutes that accumulate at high concentration during dehydration; it may also function as an enzyme that produces such a solute [1]. Other members of this family are also induced by water stress, abscisic acid, and/or low temperature, such as desiccation-responsive protein 29B (Swiss:Q04980) and CDet11-24 protein (Swiss:O23764). 25.00 25.00 28.80 28.10 18.40 23.30 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.79 0.72 -6.76 0.72 -4.32 7 30 2009-01-15 18:05:59 2004-09-16 15:00:16 6 3 9 0 15 40 0 26.40 67 7.17 CHANGED ISsAsuslADKAsuAKNsVASKLGYGs ..ISSATSAIsDKAluAKNsVASKLGYGs. 0 2 11 12 +7750 PF07919 Gryzun DUF1683; Gryzun, putative trafficking through Golgi Fenech M, Pollington J anon Pfam-B_9179 (release 14.0) Family The proteins featured in this family are all eukaryotic, and many of them are annotated as being Gryzun. Gryzun is distantly related to, but distinct from, the Trs130 subunit of the TRAPP complex but is absent from S. cerevisiae. RNAi of human Gryzun (Swiss:Q7Z392) blocks Golgi exit. Thus the family is likely to be involved with trafficking of proteins through membranes, perhaps as part of the TRAPP complex. 26.20 26.20 26.30 27.10 26.00 26.10 hmmbuild -o /dev/null HMM SEED 554 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.70 0.70 -6.19 51 282 2012-10-04 00:47:01 2004-09-16 15:05:11 7 8 231 0 202 293 2 497.10 19 45.31 CHANGED lplch.cchhsh..............................lpsshsFpps.phpsspshphplslpSss...ssPlclsplpltFssphpshhhptsp............................................th.s.tsltLhPspsphhshthhscp.....uGchclsulphplt.......................ppthphshshpsp..stttshhhhptppsst..phhlt......................tpstsplhsp...........................sPplplph................shpsshhssEhhsls........lpIpNpcctstpsshththh.........................s...t..sttsps.h.....t..pt.tt.hphslu.lptusptshplhlp..........shpssphtLplphhYpl...............................sssspsslhpsts...................hplshhpPF.phsa.....chhs+h+spsh..................s.............hhhpppahLpsphhshu..........sLhl.ph...............................slplhstssshhs.htppt......................psstlpspp.tt.ph..................................................................................................................................................................thtph.hsscpsssh.sslpIpW+R......................sps....ts.....shssoslslPcl..l.ss.hplhsph......sspstlss.lplchplcN.sotphhshplsh................-sScs..FhFSGs+......phslp.llP.....hSc+plpasLhshps.............Ghh..........................hlP.plplhs ...............................................................................................................................................................................................................h..lt..tph.sh.......................lpsphhFpp...phpsspshphplhlpups........stslphsplplthss..t.......s.hh.hpt..........................................................sttshpl.ssp.hth..hphth.scp.....stchchsslpl.lt........................ppthphhhshp....t..ttttt..hhpttps.t..ph.hh.................................tt.hh..lhsp.....................................splplph.......................................tht.s.hhssEhhsls........lplpspc.ct..hps.hh.hhhh..........................................t..s.tspsh.............s.tp....t...hths..lupltsspphphh.lhlp............s.pssph.l...lphtY.l..................................tpp..s.h.h.psts.........................hplthh..PF...t............chh.phhsp.h............................................................hhhp.....ahLhsphhshu...............................sL.l.s..........................................plplh.st.s....shtp..tp.s.................................thhltstp.tttth...................................................................................................................................................p.shtst.ps.ssh.sphhlpW+R..............................ss.ts.........................hsposlslPcl..l.......t..hpl.sph...........ss.shl....tts.....htlphplpN..oshh.shpl.s.h................................-sS-s......FhFSG.K...................................p.hslp.llP.....hocpphhapl...hPhhs..........................Gh............................hP.plpl.......................................................................... 0 66 109 164 +7751 PF07920 DUF1684 Protein of unknown function (DUF1684) Fenech M anon Pfam-B_9328 (release 14.0) Family The sequences featured in this family are found in hypothetical archaeal and bacterial proteins of unknown function. The region in question is approximately 200 amino acids long. 25.00 25.00 26.20 25.90 24.70 19.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.98 0.71 -4.34 59 312 2009-01-15 18:05:59 2004-09-16 15:12:52 6 2 229 4 161 351 127 142.30 35 56.67 CHANGED h.psFpGLsaashDssaplpA...paps.........hsss.....................cshthtoss..GtttphhphGplpFpl....sG.pphpLpsap........thsstLFlsFsDtTsGp..poYuuGRYLphphsssp.u.........p.....lsLDFNpAYNP.CAYsstaoCPlPPtpNcLslslpAGEKp ..........ppFpGlshashDspaplpA...paps.........h.s.ss.....................cslslssss..Gt.ppph..tsGplpFph.......sG..pphpLpsht..........ttsssLalsFsDtToGp....poYuuG....RaLphshsssp.G................plslDFN+AaNPsCAaos.thsCPlPPspNpLslsVpAGEK...... 0 54 121 154 +7752 PF07921 Fibritin_C Fibritin C-terminal region Fenech M anon Pfam-B_31175 (release 14.0) Family This family features sequences bearing similarity to the C-terminal portion of the bacteriophage T4 protein fibritin (Swiss:P10104). This protein is responsible for attachment of long tail fibres to virus particle, and forms the 'whiskers' or fibres on the neck of the virion. The region seen in this family contains an N-terminal coiled-coil portion and the C-terminal globular foldon domain (residues 457-486), which is essential for fibritin trimerisation and folding [1]. This domain consists of a beta-hairpin; three such hairpins come together in a beta-propeller-like arrangement in the trimer, which is stabilised by hydrogen bonds, salt bridges and hydrophobic interactions [1]. 20.40 20.40 20.40 21.10 19.70 19.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.75 0.72 -4.07 9 39 2009-01-15 18:05:59 2004-09-16 15:16:02 7 3 37 32 1 56 4 96.60 34 16.66 CHANGED hsphspslpD.lpsclGsssSGlpusslphsstl.Gssssss.slpccGlppolKs.psslt..........................G.......hls-ssssG......phYhcccGtWsplssh ..................sthssslQs.lQs-IGNssoGlpGpllpLss.ltGsNssus.sV-c+GlpsolKs.psshs....................................G.......hls-sPpDG......phYsR+cGtWs.lss................. 0 0 1 1 +7753 PF07922 Glyco_transf_52 Glycosyltransferase family 52 Fenech M anon Pfam-B_2778 (release 14.0) Family This family features glycosyltransferases belonging to glycosyltransferase family 52 [1], which have alpha-2,3- sialyltransferase (EC:4.2.99.4) and alpha-glucosyltransferase (EC 2.4.1.-) activity. For example, beta-galactoside alpha-2,3- sialyltransferase expressed by Neisseria meningitidis (Swiss:P72097) is a member of this family and is involved in a step of lipooligosaccharide biosynthesis requiring sialic acid transfer; these lipooligosaccharides are thought to be important in the process of pathogenesis [2]. This family includes several bacterial lipooligosaccharide sialyltransferases similar to the Haemophilus ducreyi LST protein. Haemophilus ducreyi is the cause of the sexually transmitted disease chancroid and produces a lipooligosaccharide (LOS) containing a terminal sialyl N-acetyllactosamine trisaccharide [3]. 21.00 21.00 21.40 21.40 20.80 20.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.92 0.70 -5.36 16 318 2010-01-08 16:41:35 2004-09-16 15:19:39 6 3 241 4 28 224 7 254.60 24 82.76 CHANGED ppphhhlhhsssts.chshhhcplppps.pshhhppsppp.ph......hhhhhlc.phhp..tchhcplahuN.splhhphhLsth....phhsh-DGhsNhpppshhhtpp.thh..............t++lhphhlsschsl.chhp.pKh+as.hKs...pcNhh....ppsphlslhp...pplpshpsss......slLlsQ...Plhp.D............ccEp....Ichhcsllpcashp....hhhPHPR-shchs..hhhlsscllhEsY.hcllph..hpthplhThFSosslshhspss.hclshl+sph .................................................................................................................h.pph.hlhh..sps.s.K..phahp..plt..h..s.p..c..s...hh.tstphpphh.h........hhhhhK..plhp.........pphc.pla..hA.shch.hhphh.LSph..t.c....lp..TFDDGosN..l..ppsh.h..ht.pp...hht........................tp+lhp...hhhs.p.c..h.sls...chh...phspt+aola.s...hpNlh....cptphl...L.hc........t......t.lssppss.........hplLLup....P.h.p..s...................cccp.....lph.hpchlcp.hshc........hhhPHP...Rps...hchp..t.lhhl.s.sph....l..hE-hhhc.h..lcp.......hhphclashhSosthsh...shss..hcshtlps.................. 0 5 9 16 +7754 PF07923 N1221 N1221-like protein Fenech M anon Pfam-B_9309 (release 14.0) Family The sequences featured in this family are similar to a hypothetical protein product of ORF N1221 in the CPT1-SPC98 intergenic region of the yeast genome (Swiss:P53917). This encodes an acidic polypeptide with several possible transmembrane regions [1]. 21.20 21.20 22.60 22.10 19.80 19.10 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.84 0.70 -5.57 26 320 2009-01-15 18:05:59 2004-09-16 15:25:12 8 8 225 0 215 308 0 276.60 28 32.65 CHANGED pPshcFpYuDspshscElsEWFsYs..-hs.hhts...+ssF-ccap.a.............pWpctshtpc+sahtpllssLpp.s.thttR..hcsLpslhYluhGsaupss..............................ScspQlphh+pNshLLhphushtsLhplLsphhc.............pstsspssssshsths..........pp-ltllLTlhYlhlpssppp..pspsppphhlcshltsLtcP......sLhshLhchIs..+hRWssssshPl+......................pllLLlWKslLLtFGGpcpLccsKptlctthslp..........t.tpsppsshlTsSPLDYphFRp-Isu+YPsassP..pshl......Ph-h-N .............................................................................................s.ssh-FpYsDss.shttEls.EhasYs....-h...phhhs....ppsFEppap.....t................................pW....chsp..pppcs.......ahh..pllc..tLc...s.s....stptR........hcshcsllYlspGs.au-ss..............................oc.sc.hphh+hNshLLhp..hGshsslh-lLpht.h.-.............................................pstssss..s.hchss.hs.....................................hscss-LpllLolh....Y.lhlEsh+pc..........tt.st.tth.hpshhtp..Ltps................................shshhLhshls..+a.p......s.puspaPh+......................+llLLLWKslL..hsh..G....G.hc.pLpphKtthcthhuls............................................t..pu.sss...sshsss.l-.p..................................t.................................................... 1 55 96 163 +7755 PF07924 NuiA Nuclease A inhibitor-like protein Fenech M anon Pfam-B_43172 (release 14.0) Domain This family consists of protein sequences that are similar to the nuclease A inhibitor expressed by bacteria of the genus Anabaena ((NuiA, Swiss:Q44296). This sequence is organised to form an alpha-beta-alpha sandwich fold, which is similar to the PR-1-like fold. NuiA interacts with nuclease A by means of residues located at one end of the molecule, including residues making up the loop between helices III and IV and the loop between strands C and D. The mechanism of inhibition of nuclease A by NuiA is as yet incompletely understood [1]. 20.50 20.50 20.70 38.60 20.00 19.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.38 0.71 -4.25 6 25 2009-01-15 18:05:59 2004-09-16 15:30:56 6 1 21 3 12 30 0 128.40 35 95.48 CHANGED TpsspshLcpLcpAocGLLFMSEoDaPFpsFhW.sshhsloPEpVLptsscs.DssVpslslDsFFusATT.p-WasscEpsTlppFQpLl-TLKssLp-lpVYRlG-lplDVYIlGcsssGsLAGLpT+VVET .....................pphhcpLcpAocGLLahSES-YPFElh..hW..tsttslo.spplhphsGhs.sssVcplslDpFFpsssspp-Was-tppsssp+FQpLlpsLKssL..schpVYRl.GplplDVYIlGcsssGslAGlpTKVlET........ 0 4 9 12 +7756 PF07925 RdRP_5 Reovirus RNA-dependent RNA polymerase lambda 3 Fenech M anon Pfam-B_9372 (release 14.0) Family The sequences in this family are similar to the reoviral minor core protein lambda 3 (Swiss:P17378), which functions as a RNA-dependent RNA polymerase within the protein capsid.\ It is organised into 3 domains. N- and C-terminal domains create a 'cage' that encloses a conserved central catalytic domain within a hollow centre; this catalytic domain is arranged to form 'fingers', 'palm' and 'thumb' subdomains. Unlike other RNA polymerases, like HIV reverse transcriptase and T7 RNA polymerase, lambda 3 protein binds template and substrate with only localised rearrangements, and catalytic activity can occur with little structural change. However, the structure of the catalytic complex is similar to that of other polymerase catalytic complexes with known structure [1]. 17.30 17.30 17.40 17.30 15.50 15.40 hmmbuild -o /dev/null HMM SEED 1271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.26 0.70 -14.17 0.70 -7.18 3 118 2009-09-10 15:58:44 2004-09-16 15:38:12 6 1 33 6 1 75 0 466.00 63 99.55 CHANGED LFNALPspLQpLSLALSGppPLTD+IFEsAAcAWHVpPRSpsYKLLDHIPFSosVV..IPPSIYsuhsWScYaAlssspVlRVsThsuPDDVYVPNSsIuPLLoPL+TIP-YGpLHPAIENDAo-hGsspARhASTFaKIASSQARQVKlDPpRFLtFLLVopAuPRVPSGVloDQPshhDPopSPALaAIWQIMQ+YKlsGsYYAPALVVsoGAlWWIPPPG+RNsVoVQaLLTDLINLAILAasTcLSPoLEhsGVplYL+AASSsSYAasLLchKSIFPsLSLHSMYRstEFGGKCPsIEWTEPRScYKF+WhGVTQLa-GLRPpoPSpDsKALElhcKYGLsDVu+lII+cR+sHPRHsaDSVRFVRDVMALTSGMYLVRsPTMSVLREYSQoP-IK-PIPPcsWTGPVGNlRYLpDospGPARHLYcTWhtAARQlAtDPpTHDPLsQAIMRoQYVTARGGSSAALKpALtsosVsLPDFcGosVK+SSKIYQAAQLA+luFppLIsAIhA-VTMGIRNQVQRRARSIMPLNVPQQsVSAPHTLVANYINKHMNLSTTSGSAVpDKVIPLlLYASTPPNTVINVDIKACDASITYsaFLSVICGAMHEGF-lGssutPFMGVPSSIVsDRRssuAPYuRPISGLQTMlQHLA+LYpAGFSY+VsDuFSSGNcFoFPTTTFPSGSTATSTEHTANNSTMMEaFLsVauPpHsKSuoLKRIlKDMTIQRNYVCQGDDGlLIIPcpuAuKISuEDlsELLELLcKYGctFGWsYDIDaSDTAEYLKLYALFGCRIPNlSRHPPVGKEYAuPpT-EIWPSLlDIlMGpFaNGVTDsLpWR-WLRFSWAFACauSRGua+shpGpSVu.AQYPhWSFVYhGLPPILLPGQTPFIaSsYMPsGDQGMFSILstWRDaLTu+AooshPPL+RsHPVWGLADVPSLLS-lGVYpGYaAAQlsRRPcPSP-sAss-SlEQhTuALS-YLhpDPsLKuRVlRGRssWERLosoauuslsSRVPSLFDVPsKWltAGRDA-KPsPSuVA-MhcoLpRAtR+Sp+SFSRLLELYL+VHV+LGEulPLAVDP-VPpVAGADPLNDDHWaKhTSLGPIsQSTRKYFupTLFVGKTVSGLDVEAVDATLLRL+ILGA-PEAapAhLsGIGMSDSEAHpIAG+ISLADAQlVQLARVVNLAVPSSWMSLDFDohI+HHuYshpPGIocsSTclR-RuuWlsuILRLLCAohAMTssGPVssuhVo-IDGGusALuGsLRsWMRDV ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................GltuscPISGhQsMhQaLuKLYpRGFpYpVsDsFSPGN.FTHhTTTFPSGSTATSTEHTANNSTMMEsFLTVWGPEHTDDPDVLRLMKSLTIQRNYVCQGDDGLMIIDGNTAGKVsSETI.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 0 1 1 +7757 PF07926 TPR_MLP1_2 TPR/MLP1/MLP2-like protein Fenech M anon Pfam-B_9285 (release 14.0) Family The sequences featured in this family are similar to a region of human TPR protein (Swiss:P12270) and to yeast myosin-like proteins 1 (MLP1, Swiss:Q02455) and 2 (MLP2, Swiss:P40457). These proteins share a number of features; for example, they all have coiled-coil regions and all three are associated with nuclear pores [1,2,3]. TPR is thought to be a component of nuclear pore complex- attached intra-nuclear filaments [1], and is implicated in nuclear protein import [2]. Moreover, its N-terminal region is involved in the activation of oncogenic kinases, possibly by mediating the dimerisation of kinase domains or by targeting these kinases to the nuclear pore complex [2]. MLP1 and MLP2 are involved in the process of telomere length regulation, where they are thought to interact with proteins such as Tel1p and modulate their activity [4]. 30.00 30.00 30.00 30.10 29.90 29.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.87 0.71 -4.41 29 251 2009-09-11 18:57:06 2004-09-16 15:44:10 7 8 210 0 170 250 3 131.80 28 6.62 CHANGED hppphsplps-lpphppptpphpp...phpphppDlcpQschsppAQpp....YEpElhpHucs.sppLppl+pphpphppphspLcppscpspspL.................................................pppcpsWppp+ptlcpElsphppRh--LppQNpLLHsQlEsls .................h.ppphsplpsElpchhpctpptts...ptpptppDlppQuclApcAQpc....YEpELhhHAps.sctLpth+pp.......hsp.......hp....pphppLcppspsAcspL.........................................................................................tppcto...WpEpcptLccElschppRp--Lp..pQNpLLHsQlEsl.................... 0 52 89 138 +7758 PF07927 YcfA YcfA-like protein Fenech M anon Pfam-B_2914 (release 14.0) Family The viral, archaeal and bacterial proteins making up this family are similar to the YcfA protein expressed by E. coli (Swiss:Q9F561). Most of these proteins are hypothetical proteins of unknown function. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.97 0.72 -4.05 255 2133 2012-10-01 21:18:35 2004-09-16 15:49:37 7 4 1365 1 556 1700 149 54.70 24 71.59 CHANGED hc-lh+hLp.ph.G........a..thh..ct.p.GSHhtap+ssppt..........hsls...H..sppc.lthstl+...pl.l+.ps ..................p-lh+hLp.pt.G........ahhh...ct.p.GSH.h..pappssppt..............hslP....H..stpc.lptttl+......pIh+........................ 0 167 380 484 +7759 PF07928 Vps54 Vps54-like protein Fenech M, Finn RD anon Pfam-B_9294 (release 14.0) Family This family contains various proteins that are homologs of the yeast Vps54 protein, such as the rat homolog (Swiss:Q9JMK8), the human homolog (Swiss:Q86YF7), and the mouse homolog (Swiss:Q8R3X1). In yeast, Vps54 associates with Vps52 and Vps53 proteins to form a trimolecular complex that is involved in protein transport between Golgi, endosomal, and vacuolar compartments [1]. All Vps54 homologs contain a coiled coil region (not found in the region featured in this family) and multiple dileucine motifs [1]. 20.90 20.90 20.90 22.10 20.00 20.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.64 0.71 -4.03 17 311 2009-01-15 18:05:59 2004-09-16 15:52:26 7 10 251 4 226 328 4 128.50 38 12.82 CHANGED cspsahlssssLhllphlp-YhphhsphPslss-hhspll-h..........................L+hFNSRssQLlLGAGAhcsuG.LKsITo+pLALuSQsLphllthlPhl+thhpth.......t..hhtpac+lppDappHpsEItsKLluIhs-Rh .................................................t.tppahlss...oslhllchlt-Yhphh.s..slPs...h...ss-hhsplh-h..........................L.+hFNSRssQLlLGAGAhp...s.u.G.LKsIToKpL...............A.LuSpsLphlht.h....IPh.................lRphhctp....................s.p.p...........s..h..........h.tcFD+..lp+sYp-H.spIpsKLlsIMssh.h................. 0 79 126 185 +7760 PF07929 PRiA4_ORF3 Plasmid pRiA4b ORF-3-like protein Fenech M anon Pfam-B_4929 (release 14.0) Family Members of this family are similar to the protein product of ORF-3 (Swiss:Q44206) found on plasmid pRiA4 in the bacterium Agrobacterium rhizogenes. This plasmid is responsible for tumourigenesis at wound sites of plants infected by this bacterium, but the ORF-3 product does not seem to be involved in the pathogenetic process [1]. Other proteins found in this family are annotated as being putative TnpR resolvases (Swiss:Q9LCU7, Swiss:Q50439), but no further evidence was found to back this. Moreover, another member of this family is described as a probable lexA repressor (Swiss:Q7UEI4) and in fact carries a LexA DNA binding domain (Pfam:PF01726), but no references were found to expand on this. 26.60 26.60 26.80 26.60 26.40 26.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.08 0.71 -4.87 18 780 2009-01-15 18:05:59 2004-09-17 17:08:24 6 17 653 2 251 715 156 155.20 21 60.93 CHANGED ssplapL+lsLpsspPsIWRRltVPtsloLspLHpVlQssMGWpssHLHcFphs.sppYsh...........s....................s.s.ss.s..hspsphpLspllt..ttscshpYhYDFGDsWpHpIplE+hlsttsssphPh.ClsGptA.sPPEDsGGhsGYpchL-hlscPc+........cap-hhcWhGc.........F-sptFshccls ..................................................h.hpl.l...........t.s.l..aRc.l..lssshshtpLH.t.slptuh.s.a......p......s......s....Hhap.Fhhs......stpast....s.......................................................................................h...s....s......s.t.s........h.t..pps...pLsplh...........pttp...c..............hhYhYDFGDpWpaplp..................lp..c..........h........t.........p............t.........s.......t.......hsh..h..lput..s.......s...sP.p..s..s....................................................................................t............................................................ 0 89 173 216 +7761 PF07930 DAP_B D-aminopeptidase, domain B Fenech M anon Pfam-B_29283 (release 14.0) Domain D-aminopeptidase (Swiss:Q9ZBA9) is a dimeric enzyme with each monomer being composed of three domains. Domain B is organised to form a beta barrel made up of eight antiparallel beta strands. It is connected to domain A, the catalytic domain, by an eight-residue sequence, and also interacts with both domains A and C via non-covalent bonds. Domain B probably functions in maintaining domain C in a good position to interact with domain A [1]. 21.40 21.40 21.40 23.20 21.30 20.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.90 0.72 -4.25 2 66 2012-10-02 21:13:33 2004-09-17 17:09:08 7 2 65 1 6 26 0 87.40 75 17.05 CHANGED sucssRspAssuWFGSWLssETGLVLSLEDAGtGRMKARFGTuPEhMDlsutNEApSuhTTlRRDG-hIcLsRtsENL+LuM+RlKGE ........PAKPVRAQANPGWFGSWLNPETGLVLSLEDAGGGRMKARFGTGPEIMDISGENEAQSSMTTLRRDGDMIHLARKDENLHLAMHRLKGE........ 0 1 3 6 +7762 PF07931 CPT Chloramphenicol phosphotransferase-like protein Fenech M anon Pfam-B_29509 (release 14.0) Domain The members of this family are all similar to chloramphenicol 3-O phosphotransferase (CPT, Swiss:Q56148) expressed by Streptomyces venezuelae. Chloramphenicol (Cm) is a metabolite produced by this bacterium that can inhibit ribosomal peptidyl transferase activity and therefore protein production. By transferring a phosphate group to the C-3 hydroxyl group of Cm, CPT inactivates this potentially lethal metabolite [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.08 0.71 -4.64 7 191 2012-10-05 12:31:09 2004-09-17 17:09:32 7 3 165 6 65 615 54 167.20 29 83.68 CHANGED uclIlLNGsSSuGKSolucslQslhssPWhphusDsah-t.h.schps...............sssGlcats.h...........tP.hchhhtuhhculsAhAcsGsslIlD....-lhhstch.h.-th.clLtshcVhhVGV+Csht.lhctREhtRGD.....RhsG.............huthQhchVHpss.YDlElDTotpsshECAttItpphp .......................................................................plIlLNGsSSuGKoolA.psl.Q.s...h.h.s...p....s..a....h..p..l.ul...D...h...ahtt...h.s...s.pt.h.p...................ts..tGlp...h..hs......................ssh.hphh.htu..h..h.........cu..l..tu..h..hcsGhs..V..lsD......cV...hh..s..t....th....h.-th..c...hh..p.....u..h..p..V..h....hV..G...V...+..s.....s..-...ht..c..R..E.h..t...R...G..D...............R..h..sG...............h..st..t.p..t..c.h...s....H......t..s.s.....t..YD......h......c.l.DTot.....hssp-sAcpltpt.................................................................................................................. 0 15 47 56 +7763 PF07932 DAP_C D-aminopeptidase, domain C Fenech M anon Pfam-B_29283 (release 14.0) Domain D-aminopeptidase (Swiss:Q9ZBA9) is a dimeric enzyme with each monomer being composed of three domains. Domain C is organised to form a beta barrel made up of eight antiparallel beta strands. It is connected to domain B by a short linker sequence, and interacts extensively with the domain A, the catalytic domain. The gamma loop of domain C forms part of the wall of the catalytic pocket; domain C is in fact thought to confer substrate and inhibitor specificity to the enzyme [1]. 21.10 21.10 21.70 102.50 19.30 18.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.17 0.72 -3.86 5 78 2009-09-11 05:52:52 2004-09-17 17:10:48 7 3 77 1 15 36 1 96.80 72 18.63 CHANGED c+sDIAGRY+ucELEA-hplsucGGulYGuFEGaLGpGPhEhM+slGcDVWhLuspRuMDAPAPG-WTLVFpRc-sGcVTGVTlGCWLAR+VsYsKs ...A.RQDIAGRYRSDELE.ADLLLVS...EGGAIYGAFEGFLGKSDMYPLYAAGPDVWLLPVQRSMDAPSPGEWKLVFHRDAAGRITGVTVGCWLARGVEYKR.L 0 2 8 13 +7764 PF07933 DUF1681 Protein of unknown function (DUF1681) Fenech M anon Pfam-B_4989 (release 14.0) Family This family is composed of sequences derived from a number of hypothetical eukaryotic proteins of unknown function. 28.90 28.90 29.10 29.00 28.40 28.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.14 0.71 -4.67 23 357 2009-01-15 18:05:59 2004-09-17 17:14:00 9 8 219 1 230 343 4 159.60 47 61.76 CHANGED saEplLhl.pscValY+IP..PpsSs+.GY+AuDW.sh-........p.hWTGRlRll.........................................scucps..pIpLEDssoG-LFA..........................psPhsphs..suVEsVhDSSRYFVlRlpss.sG....+pAalGlGFpDRu-AFDFNVALQDahKh.............scppsphspppp.................sppP..chDhuLKEGpTIpINlus .....................hEplLhl.ps-VaVY.+IP..PR.so.s+.GY..RAu-W.pls..........p.hWoGRLRls.......................................................u..+..Gc....t..s....hI...+.........LED.p.....so........G...ELFA..........................p..s...Pl...cph......shuVEsVsDSSRYFVlRlpDs..s....G....R.+AFIGlGFs-Ru.DAFDFNVALQDHhKa...............................................lcpppph.tcptp......................................t.pttPplDhuhKEGpTIplsls.s......................................... 0 83 126 176 +7765 PF07934 OGG_N 8-oxoguanine DNA glycosylase, N-terminal domain Fenech M anon Pfam-B_29151 (release 14.0) Family The presence of 8-oxoguanine residues in DNA can give rise to G-C to T-A transversion mutations. This enzyme is found in archaeal, bacterial and eukaryotic species, and is specifically responsible for the process which leads to the removal of 8-oxoguanine residues. It has DNA glycosylase activity (EC:3.2.2.23) and DNA lyase activity (EC:4.2.99.18) [1]. The region featured in this family is the N-terminal domain, which is organised into a single copy of a TBP-like fold. The domain contributes residues to the 8-oxoguanine binding pocket [2]. 20.10 20.10 20.10 20.30 19.50 18.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.67 0.71 -4.13 95 767 2012-10-02 11:58:57 2004-09-17 17:15:28 7 13 710 31 324 723 56 113.40 26 34.56 CHANGED tl.hsttp..hsLptsL..psGQsF+..Wc.......cp.pss.........ahsll........t.s.....cllpl+.p....ps.....................splhhphht.............................................................pcshpphlpcYFsLch..s................Lssla.pph.....spt........Ds..........hhp.........................pthpph..pGlRl.L+QDPaEsL .............................................................tp....hplp.sh....tsGQsFR...Wp........................pt...pst...................t....ahslh....................t.s....................pllp.lp..p....ss...............................................sp..lhhpsht.......................................................................................................tpshtphlpcYFsLsh..c.......................Lsplh.pph..........spt........Ds................hhp......................................ps..h..p..th....tGlRl.LpQDsaEsL........................................................................................................ 1 121 203 278 +7766 PF07935 SSV1_ORF_D-335 ORF D-335-like protein Fenech M anon Pfam-B_4933 (release 14.0) Family The sequences featured in this family are similar to a probable integrase (Swiss:P20214) expressed by the SSV1 virus of the archaebacterium Sulfolobus shibatae. This protein may be necessary for the integration of the virus into the host genome by a process of site-specific recombination [1]. 21.60 21.60 22.50 21.70 21.20 18.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.71 0.72 -4.24 13 65 2009-01-15 18:05:59 2004-09-17 17:16:16 6 2 27 0 16 65 0 67.10 42 47.79 CHANGED +tY...paGDhhIRERKG+YYVYKLEp..NGcVKEpYVGPLsDVVEoYlKlK...GGltssP.shshs......GhEPGooss .......phaphuchhlREhKG+YYVYhlEp.psGch+-pYVGPLscVVEpYlchh.....uGl..ss..sP....ps.s........shpPss...h...................................... 0 7 7 16 +7767 PF07936 Defensin_4 BDS_I_II; Potassium-channel blocking toxin Fenech M anon Pfam-B_56105 (release 14.0) Domain This family features the antihypertensive and antiviral proteins BDS-I (Swiss:P11494) and BDS-II (Swiss:P59084) expressed by Anemonia sulcata. BDS-I is organised into a triple-stranded antiparallel beta-sheet, with an additional small antiparallel beta-sheet at the N-terminus [1]. Both peptides are known to specifically block the Kv3.4 potassium channel, and thus bring about a decrease in blood pressure [2]. Moreover, they inhibit the cytopathic effects of mouse hepatitis virus strain MHV-A59 on mouse liver cells, by an unknown mechanism [1]. 20.60 20.60 23.10 43.00 18.10 16.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -7.95 0.72 -4.28 10 17 2012-10-01 20:50:19 2004-09-20 09:42:44 7 1 6 4 0 17 0 34.20 50 68.19 CHANGED ssCpCuuKp..GsYWhs.husCPuG+GYTGsCtYhlG ssChCGsph..GlYWas.hssCPuGaGYTGpCsaahG 0 0 0 0 +7768 PF07937 DUF1686 Protein of unknown function (DUF1686) Fenech M anon Pfam-B_5313 (release 14.0) Family The members of this family are all hypothetical proteins of unknown function expressed by the eukaryotic parasite Encephalitozoon cuniculi GB-M1. The region in question is approximately 250 amino acids long. 25.00 25.00 37.70 36.90 21.30 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.44 0.71 -4.66 8 27 2009-09-11 08:48:26 2004-09-20 09:49:39 6 2 2 0 23 27 0 149.30 37 33.28 CHANGED osstluWNTuLYSLAVAGsIsYQlWlLApp...hRccGsusMlRpsWoAlGCMsPMhsuL......hHuGlIRSusYSlTVAG.......luAVhlYVQsuspc..GMsh+QhCslsuGNllLuuAChuGssKh.hsuststsphlssGlVsFuulLLLVshsccutsGcKspu..pG..ltshlhVhohhVsshsS........hVCGRD ......tsthsWNTulYSlAVsGsIhaQhWlLstt....h.ptGh.thhpptWsslushsPMhsul.......psGhhpSshYuhshsG.......huhshhhsQshhpp..GMshpphCshssGN..llLushChuts.t.....s.thshhlshGhlVhsulLlll.hh..tttGccs.s......pG....h.phlhlhohhVsshsS........hVsuRs.......... 0 23 23 23 +7769 PF07938 Fungal_lectin Fungal fucose-specific lectin Fenech M anon Pfam-B_48600 (release 14.0) Domain Lectins are involved in many recognition events at the molecular or cellular level. These fungal lectins, such as Aleuria aurantia lectin (AAL, Swiss:P18891), specifically recognise fucosylated glycans. AAL is a dimeric protein, with each monomer being organised into a six-bladed beta-propeller fold and a small antiparallel two-stranded beta-sheet. The beta-propeller fold is important in fucose recognition; five binding pockets are found between the propeller blades. The small beta-sheet, on the other hand, is involved in the dimerisation process [1]. 20.80 20.80 21.20 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.04 0.70 -5.14 3 66 2009-01-15 18:05:59 2004-09-20 09:51:25 7 5 46 25 47 63 1 208.30 21 64.82 CHANGED AQEVLFRTGIAAVN....SsN+LRVYFQDlaGSIRESLY..EuuWANGTupNVIGKAKLGSPLAAT...SKELpNIRVYslscDNlLoEssYDG.sSGW.lsGuLuusslsVuPsS+luulahuGoso..PplRIYtQKSssusuTIpEYhWsGssWcsGoshGVolPGTGIGsTsaRYTDYsGPS.IRIWFQTDDLKLVQRAYDP+oGWY.pLsTIFDKAPPRsAIAATSFssGpSSIYMRIYFVNSDNTIWQV....CWDHGpGYHDsRTITPVIQGSEIAIISWGsh..pGPDL+LYFQNGTYVSAVS..EWsWspuHGSQLG+pALPPA .....................................................s.............................p..........t.h.............t...........h.....................u...t.............oslAsh...............t..........la.h.t...tt.....htphh...t....ttW...Gt...h.t.....th...s..Stlssh...s..............hlhh.....s.......h...t........................t.....W......t.............h......s.......sss.huh..........sh..........p..s..s......p...lRlaa.pss..shpl..hptsa..-...sp.p...W......s.s.h..h.s.p.s.......s.t.ssluAs..sa....t..ss..hclahhstt..s.hhph.........hhpt.t.......................................................................................................................................... 0 10 29 40 +7770 PF07939 DUF1685 Protein of unknown function (DUF1685) Fenech M anon Pfam-B_5502 (release 14.0) Family The members of this family are hypothetical eukaryotic proteins of unknown function. The region in question is approximately 100 amino acid residues long. 20.30 20.30 20.80 20.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.25 0.72 -4.33 14 160 2009-01-15 18:05:59 2004-09-20 09:54:41 6 4 21 0 97 154 0 61.50 41 29.40 CHANGED LTD-DL-ELKGCh-LGFGF..-E..t.s........scLCsTLPAL-LYaulspph.st.....ptSsssssssssuo ......lTD-DL-ELKGClDLGFGF...sc..p..s.............scLssTLPAL-Lhauhspph.s......................s.s................................... 0 14 60 76 +7771 PF07940 Hepar_II_III Heparinase II/III-like protein Fenech M anon Pfam-B_5577 (release 14.0) Family This family features sequences that are similar to a region of the Flavobacterium heparinum proteins heparinase II (Swiss:Q46080) and heparinase III (Swiss:Q59289). The former is known to degrade heparin and heparan sulphate, whereas the latter predominantly degrades heparan sulphate. Both are secreted into the periplasmic space upon induction with heparin [1]. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.90 0.71 -4.34 158 1644 2009-01-15 18:05:59 2004-09-20 10:00:19 8 23 1018 12 395 1466 1366 143.20 18 21.37 CHANGED psGathlps......sps.hllhc...sus.ss........ssHuHuDsLuF-lts.suptlllssGss...........tt............sch+ph.hR.uTsuHsTlsls.spsps..phht...................thhh.....................ththpt.........t.....shhhsst+suYht..........................hshtHpRplh.ls.........sptlhs.D.................pl ...........................sGhh.hhcs..........psh.hl.h.h..c...sGsh.s.......................ssHuHsDphuFp..lhh.pGp..lhhDsGths...........Ytp..................tphpth..h+..sotuHNTlh...ls....sps.pp..thst.......................shth...................................t.......................ttth.......thhh.s...s..hps.u.Ys.............................s.h.h.a.pRplh.ls......tphhhlhDp.............................................................................................................. 0 167 309 348 +7772 PF07941 K_channel_TID Potassium channel Kv1.4 tandem inactivation domain Fenech M anon Pfam-B_7603 (release 14.0) Family This family features the tandem inactivation domain found at the N-terminus of the Kv1.4 potassium channel. It is composed of two subdomains. Inactivation domain 1 (ID1, residues 1-38) consists of a flexible N-terminus anchored at a 5-turn helix, and is thought to work by occluding the ion pathway, as is the case with a classical ball domain. Inactivation domain 2 (ID2, residues 40-50) is a 2.5 turn helix with a high proportion of hydrophobic residues that probably serves to attach ID1 to the cytoplasmic face of the channel. In this way, it can promote rapid access of ID1 to the receptor site in the open channel. ID1 and ID2 function together to being about fast inactivation of the Kv1.4 channel, which is important for the channel's role in short-term plasticity [1]. 25.00 25.00 38.40 36.70 21.90 20.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.91 0.72 -3.75 2 44 2009-01-15 18:05:59 2004-09-20 10:06:49 6 2 32 2 20 45 0 72.40 77 11.62 CHANGED MEVAMVSA-SSGCssHhPYGYA.QARARERER.AHSRAAAAAAsu.......GpGGs.GGGsthpt....ppts.ppp- MEVAMVSAESSGCNSHMPYGYAAQARARERER....L....A.....HSRAAAAAAVAAATA.AVEGuGGu..GGG..u..HHHH...QoRGAsoSH-............................... 0 1 1 8 +7773 PF07942 N2227 N2227-like protein Fenech M anon Pfam-B_5433 (release 14.0) Family This family features sequences that are similar to a region of hypothetical yeast gene product N2227 (Swiss:P53934). This is thought to be expressed during meiosis and may be involved in the defence response to stressful conditions [1]. 20.10 20.10 20.10 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.79 0.70 -5.44 12 457 2012-10-10 17:06:42 2004-09-20 10:09:33 7 12 292 0 334 464 12 246.30 33 63.30 CHANGED ss..hhsshhchsKlpssl+plsRDWSsEuptER-tsapPllppLsphhP.t.h-pppsplLVPGuGLGRLsa-luph..GatspGNEFSYaMLlsSpFlLNhspppspaplYPalHsaSNphspsDQLRslphPDhpshs.hs.......s.sshShsAGDFl-lYsps.......................psaDsVlTs..FFIDTAcNll-YlcTItplLKsGG..lWlNlGPLLYHFp................sh...ssphS.......lELoh--lhtlhpp.hGFpl.pccp.I.....cssYs..sN.cSMhpshYtssaass++ss ................................................tt.....s...chsKlpssl+phsRDWSs..-G..t.t...ERcss.apPllpp..........lp....phh..s............................t.....p............t................p...ph..p.....lLVPGuGLGRLsaEluth..........Ga.......t.s.p......GN.................EhSaaM..LlsSpal.L...N..t.........s.......p.............t................p.....papla..Pal.p.p.hS.Nphpp.psp.l+sl.thPDlpP.ts......hs.............................sshShsAGDFhp...l.Y.sp.p...................................................................ttaD...s..lsTs..FFIDT.Ap.NllcYlcsIhph...L..+.....s..........GG...........hWlNl..GPLLaHat................................................................s..........s.........................lELo.h--lhtlhpp.hGFph....t........pp...t...h......................ssYh....ts.pShhp....hYps.hassp+.t.............................................................................................................. 0 128 204 285 +7774 PF07943 PBP5_C Penicillin-binding protein 5, C-terminal domain Fenech M anon Pfam-B_1086 (release 14.0) Domain Penicillin-binding protein 5 expressed by E. coli (Swiss:P04287) functions as a D-alanyl-D-alanine carboxypeptidase. It is composed of two domains that are oriented at approximately right angles to each other. The N-terminal domain (Pfam:PF00768) is the catalytic domain. The C-terminal domain featured in this family is organised into a sandwich of two anti-parallel beta-sheets, and has a relatively hydrophobic surface as compared to the N-terminal domain. Its precise function is unknown; it may mediate interactions with other cell wall-synthesising enzymes, thus allowing the protein to be recruited to areas of active cell wall synthesis. It may also function as a linker domain that positions the active site in the catalytic domain closer to the peptidoglycan layer, to allow it to interact with cell wall peptides [1]. 21.20 21.20 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.85 0.72 -3.97 74 4610 2009-01-15 18:05:59 2004-09-20 10:14:53 8 6 2602 30 731 2984 1285 93.00 27 23.36 CHANGED F-shplhppsp.l...tphpVhtGpcc.....pltlsstcDl...hlsl.s.....+sptcplpsphph..ppslp......APlp+GphlGpl......ph.hhsschltp............h.sLlstpsVcc ..............................................Fcshphhptupth....sstpVh.h.G.cpc........plsl.s.s...pcsh...hlsl.P.......+..u..p..h...p.p...l......+.s.p.hsl.....pppLp...................APlp+GpsVGpl........ph..phc.s..c..p..ltp....................h.sLlshpsVtc......................... 1 223 436 575 +7775 PF07944 DUF1680 Putative glycosyl hydrolase of unknown function (DUF1680) Fenech M anon Pfam-B_4918 (release 14.0) Family The members of this family are sequences derived from hypothetical bacterial and eukaryotic proteins of unknown function. One members of this family is annotated as a possible arabinosidase, but no references were found to back this. These proteins are related to a large family of glycosyl hydrolases. 24.90 24.90 24.90 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 520 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.74 0.70 -6.23 20 1388 2012-10-03 02:33:51 2004-09-20 10:18:10 7 35 764 0 436 1283 178 495.00 25 73.60 CHANGED sV+lps.uhatpptcsspphll.....ltsD+........lhcshh............hpuhlsstt........shGsaE.........................h.uhshpssslG.....+aLpulAhhhAspsDspLcp+hcplls.tlucsQp.......sDGYLsuhhptt...........................shscsWss............hYsht+lhtGhlshYphoGppphLclsp+hADah....hsshus-phpchhhs...pHsthp..ulscLY..............phTG-p+YLcLA+hFhcpcshsP...................................hshspDclsshctsTtlu+sV.......GhAclaphoGDsshhp.......sucphWpslss++hYlsGGsusp....................EtFssshclsspssh...sETCuolshlhhsc+hhphs........s-.upYsDhhE+sLYNslLuuhs.Du..stahYhsPL...tsssp+.p..........hppcapuhhCCsssshcshsphucalYsps......cc..slaVNLahsSpschchpu...lpLcQcTs..aPapuplplTl.........psstssphsLtLRlPuW.................uuu...splplNGcs........sspttssuYlplsRcWpcGDplpLpLPMslchhtu....p-ssspVAlhpGPlVhs .................................................................................................................................................................................................................lpl.t.s.shht.p.h.phshphhl......h..cp............h.ts...........................ps.......l...s.h..t..........s.h.s.....p.....................................s.h.h.p.spshs.......+alcuhu.h.h.s..t.....p.....s.........D.........s.............p.......L....c....p....p....h....c....phlp....hl.ttsQp..............sDG..Ylsshhphp............................................................s..p.ppWss..........................hYsht+hhp......u..h....lsha...p......h..T....G......p....p....p...........h....Ls....lsp+h.AD....al..............................ss.......h....us.t.p.....h....p.....h....h..h....hs................t+.s...t..........hp......u.....lhc....LY.....................ph..T...s..c.......t.......+.......Y.LpLA....p....hFhc.pp.sh.ps...........................................................................................................................................hthpps.c.lsh..h...c.t.p.....s...t.lu..+sV.......................Gh...uc...lh..ph.....o...s........D.....p..t.....hhp................sspphWp.s.hsp.+.chY.lTGGhuup.............................................EtF...s..s..s......h.p....l.s.sp..hsh..............sE.o.Cu..slshhh.hs...........cchhphp........s..-...upYADhhE+............sLYNsllu.u....h.............s..............-s..............t...ta.......hY..h...sPL...........pstspp....................................hppta...h.u.....h.s.CCss..Nhscshsph.s.p.h..l..Ys...p.................................ss......sla..lsl.Y.h.s.sp..hp....h.ph.......t......s..........l..p..lppp.os............YP...a...p...t.....p.......l.plsl............................p.s.s.p.s.s.p..a.sLtlRlPsW.....................spt....sp...lp.l.......N...Gpt.....................htts.hts.u..Ylpl.p.R.p..W.p.pG.D.p..l.plp.lP.M.t...l+hh..hs.......ps.tsplAlt+GPlVhs................................................................. 0 156 320 388 +7776 PF07945 Toxin_16 Janus-atracotoxin Fenech M anon Pfam-B_50381 (release 14.0) Domain This family includes three peptides secreted by the spider Hadronyche versuta (Swiss:P82226, Swiss:P82227, Swiss:P82228). These are insect-selective, excitatory neurotoxins that may function by antagonising muscle acetylcholine receptors, or acetylcholine receptor subtypes present in other invertebrate neurons [1]. Janus atracotoxin-Hv1c (J-ACTX-Hv1c, Swiss:P82228) is organised into a disulphide-rich globular core (residues 3-19) and a beta-hairpin (residues 20-34). There are 4 disulphide bridges, one of which is a vicinal disulphide bridge; this is known to be unimportant in the maintenance of structure but critical for insecticidal activity [1]. 25.00 25.00 84.30 84.20 19.90 16.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.25 0.72 -4.28 2 4 2012-10-01 22:06:18 2004-09-20 10:20:42 6 1 2 1 0 4 0 35.80 87 97.95 CHANGED sICTGADRPCAACCPCCPGTSCpu.EsNGVSYCRpD ..sICTGADRPCAACCPCCPGTSCpGPEsNGVSYCRND. 0 0 0 0 +7777 PF07946 DUF1682 Protein of unknown function (DUF1682) Fenech M anon Pfam-B_4955 (release 14.0) Family The members of this family are all hypothetical eukaryotic proteins of unknown function. One member (Swiss:Q920S6) is described as being an adipocyte-specific protein, but no evidence of this was found. 23.80 23.80 23.80 25.00 21.50 23.70 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.96 0.70 -5.58 39 327 2009-01-15 18:05:59 2004-09-20 10:21:26 9 6 273 0 236 331 0 316.70 27 76.34 CHANGED hpsahhEhh.slshlllhlhhahh..GpppNcphAppa...hpsppshhppp..FuhlGhstst...............................hlhccu.spashYATGRpssstlhlslcLhpRpshhshl.hphlhsh............pDphplpl.............................spsshDs............hlaAlVpKpshpph+c-ph.-L................Sh......spop-ptpLPpphslhoEusElsstlls.sp...........lhpsls.ps.sch...lcaltlTDQshtps.p.p.............cts...................................c+plhlshslPpsss.....ht..phttLlshhlphhDpls........shclps-shcKscpsRppthcchhKttcpt+tEt..htpc+tctK+pc+cphhsphosEcQ+Kh-.........cKcpc+pt++ .........................psahh.Ehh.hlshllhhhh.ahh..GpppNpphAptW...................hps..p....ps...lLppp..FuhVG.ssstp.........................................pshLhccstp.ash..asoGRpsspshhlplc..........hhpRpsh.lshl.hchhhsh............pDplplpl............................t.hspps.hDs..............aVaA..lspKcshpphp..c-...hh.DL.....................Sh.............spstsp.....t...LPsp....hslhSE..sElss.s..hls..sc............hhpslp..ph...uch..............lchlthoDQ.ss......p.....psp............................................s++plhhs.hs..lPssss..................hp.shhsLhphhhh.hhDplt.............ph+hs..t-s..hpKs..cpsRpch.cphhKhpctt+tEt..t.tpc+ccp++tc+-chhpp.ss-cQ++h-.........c+cpc+p.++......... 0 83 130 194 +7778 PF07947 YhhN YhhN-like protein Fenech M anon Pfam-B_5325 (release 14.0) Family The members of this family are similar to the hypothetical protein yhhN expressed by E. coli (Swiss:P37616). Many of the members of this family are annotated as being possible transmembrane proteins, and in fact they all have a high proportion of hydrophobic residues. 25.00 25.00 25.60 25.00 24.70 24.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.43 0.71 -4.92 119 1531 2009-01-15 18:05:59 2004-09-20 10:23:20 9 5 1341 0 371 954 58 182.60 31 82.28 CHANGED h.hphlhKshshlhLhhhsht......................sshshhl.hsuLhhShlGDhhLh...................pphFlhGlsuFhlAHlsYlhs.Fh.............thhtshphhhshshhshus...................shhhh...lhspl..............st..hphsVssYshllssMsht.Aht....................hhtshsthhsslGAllFllSDsll...AhscFh.......hshshsphhl.hsoYasAQhLI.shuh ..................................................phlhKslsllLLhlhshp.......................ts.hsshshhl..lsGLsh.ShlGDsLhh.............................................hsppphhhuluuFhLuHLhYshhFs...................tphshs..h...hh..s.ls.ls..l...h.ll.us...............................................................l.hhsl.......lhscL...............................tc...hphPVss.ahshlhsMshh..Asthh.............................................hhtsss.suh.shhGA.hLahlSshl.l.hhs+ap....................pphpts.s.hll...husYauuphLIstS................. 0 101 189 287 +7779 PF07948 Nairovirus_M Nairovirus M polyprotein-like Fenech M anon Pfam-B_5426 (release 14.0) Family The sequences in this family are similar to the Dugbe virus M polyprotein precursor (Swiss:Q02004), which includes glycoproteins G1 and G2. Both are thought to be inserted in the membrane of the Golgi complex of the infected host cell, and G1 is known to have a role in infection of vertebrate hosts [1]. 22.00 22.00 22.90 35.40 20.60 21.90 hmmbuild -o /dev/null HMM SEED 645 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.36 0.70 -13.20 0.70 -6.40 6 90 2009-01-15 18:05:59 2004-09-20 11:34:11 6 2 7 1 0 87 0 473.40 69 41.74 CHANGED PTNRSKRs.pscIILThsQGLKKYYoKILKLLcLT.EEDoEGLLEWCsRhLupsCDDsaFpcRIpEFFl..TGcGaFNEVL.FK..S.o.usThsPs+su.....usss-PF+SaaAKGhlphDSGYFSAKCYs+ASNSGhQLINlTpHshKlssTPGPKhoNLKolNCINLKsSsDK-HsElEINVLlPQVAVNLSNC+Vll+SHVCDYSLDsDGsI+LPplp.HpG....oFIPGTYKIlIDKKNKtNDRCsLhTNCVIKGRElRKGQSsLRQY+TEI+lGpspuGoRRLLS.Esuss-ClSRTQLI+TEoAElHsDcYGGPG-KITICNGSTlVDQRLGSELGCYTINRIKSaKLCENSAouKuCEIDSsPVpC+QGaCLKIoQEGRGHVKLSRGSEIlLDsCDoSCEIMIPKGoGDILVDCSGGQQHFLpsNLlDLGCPNlPLLGKMAIYICRMSNHPKTTMAFLFWFSFGYVITCIhCKllFYlLIllGTLuK+lKQYRELKPQTCslCEssPVNAIDAEMHDLNCSYNICPYCASRLTSEGLsRHVsQCPKRKEKlEETELYLNLERIPWhVR+LLQVSESTGlALKRSSWllVLLlLLsVSlSPVQS....APlupu+sVpsYpsREsYsuICLFIhGSVLhAlShLh+GLlDSlG ..................................hhph.h.t+lLphhths.-.ss-tL.pWCpc.ht.sCssshhppRIpEFFh..supuhFN-VLph+h.s.s.tso..ss..st......sss.shhp.hu+t.Lph.SshhpspChsts.ssu.Qhls...Hss+lhpTsGPKhpsl+slpClNlcsph.K-pp.l.lsVLhsplsVsLpsC+s.Ipu+.C.YshshDG.l+lPphh.+pG........hhh.GsYpIslDhpsp.NcpCpLhTsCVlKG+El+KGQS.L+tYpTEl+lsps.sGpR+LLu.ctsss-ChStTQLh+TEssElHsDsYGGPG-KITICNGSTlVDQRLGSELGCYTINRV+oaKLCENSAosKsCEIDShPVKC+QGaCL+ITQEGRGHVKLSRGSEVVLDsCDoSCElMIPKGTGDILVDCSGGQQHFLcDNLIDLGCPpIPLLGKMAIYICRMSNHP+TTMAFLFWFSFGYVITCIhCKslFY.LIllGTLGK+hKQYRELKPQTCTICETsPVNAIDAEMHDLNCSYNICPYCASRLTSDGLuRHVsQCPKRKEKlEETELYLNhpRlPhhlRpLLplS.usGhsLKRusWhhsLLlLhhlolSPVQu....APlGcGKTIEsYpsREsaTSICLFlLGSlLFlVShLhKGLVDSlu............ 0 0 0 0 +7780 PF07949 YbbR YbbR-like protein Fenech M anon Pfam-B_4990 (release 14.0) Family The members of this family are are all hypothetical bacterial proteins of unknown function, and are similar to the YbbR protein expressed by Bacillus subtilis (Swiss:O34659, Swiss:O87088). One member (Swiss:Q97EN2) is annotated as an uncharacterized secreted protein, whereas another member (Swiss:P43521) is described as a hypothetical protein in the 5'region of the def gene of Thermus thermophilus, which encodes a deformylase [1], but no further information was found in either case. This region is found repeated up to four times in many members of this family. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.74 0.72 -3.86 190 4025 2009-01-15 18:05:59 2004-09-20 11:52:03 7 7 1621 2 591 2564 94 83.70 18 58.23 CHANGED lPV........psphspsh..lsshps....pssp.VplpGsp.shl......sslpphps...ps....Dlsslsp......sh.shp.....lsl..hssssssph...........Psplp..Vpl .............................l........h.st.....s.ssh..tl..t.sh.ts..........pstp..V.plsG.sp..shl..........................ppl..p.p.h.ps.....ps.......Dl.ss.lst...................sh.shp..........lpl.......ss.st.ssph..............Ps.plp..lpl................................................................... 0 239 414 504 +7781 PF07950 DUF1691 Protein of unknown function (DUF1691) Wood V, Bateman A anon Wood V Family This family of fungal proteins is uncharacterised. Each protein contains two copies of this region. 21.70 21.70 22.00 21.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.47 0.72 -4.14 38 223 2012-10-03 07:11:12 2004-10-06 09:55:10 6 5 115 0 163 218 1 114.70 21 62.00 CHANGED sshluhHhhsspllPlls..sssuupssLthlptlhtpts...........hthhlLlsssshHlsuGhshhhthp..........................ptpp++pthhhl........................sshstlG.....hulhhhu.thp.h.t.s.............ssa ....sahuhHhhsspllPlhs.....s....ssu.....opssLthlcphhtp.s...........hthhhLlsssshHlsuGlshhhhhp.................................pppt++htthhh...................................t.s.hs.....hshhhhh.h.t.................ht.hhh......................................................... 0 25 72 131 +7782 PF07951 Toxin_R_bind_C Clostridium neurotoxin, C-terminal receptor binding Finn RD anon Pfam-B_3087 (release 15.0) Domain The Clostridium neurotoxin family is composed of tetanus neurotoxins and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains [1]. This domains is the C-terminal receptor binding domain, which adopts a modified beta-trefoil fold with a six stranded beta-barrel and a beta-hairpin triplet capping the domain [1]. The first step in the intoxication process is a binding event between this domains and the pre-synaptic nerve ending [1]. 25.00 25.00 50.90 50.90 21.10 20.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.82 0.70 -4.64 10 143 2012-10-02 19:42:32 2004-10-06 11:27:06 7 3 31 75 3 169 0 187.30 42 17.41 CHANGED YsspssoslLKDFWGNsLpYDKEYYllNlsp.N+Yl.....shp..oc.l...h.htt.p....ph.NhahNtppLYpGhKlIIK+hu...ssssDshVRpsDhlYlNh.shsNpcYp.Lhhps......spst.tpcll.hhclussssshsphhlhphp.hsshssY.Co.lFpsss..sNslGL..lGh+p.....s.....p..hsslVASsWYhsplccp.pp..sGCaWpFIPp-cGWsE ..Ypsps.sshLKDFWGN.LhYsKcYYhlNhhp.NpYIshp..psuh...h.....Rs...ps.shalN.p.LYpGhKhII++hu...pussDslVRcsDhlYlsh.hspspEap.l...h......tpsp.tEKll.hhpIsssstths.hhshp.p..ss.ssh.Cph.Fppsp..sspIGL..lGhHp...........p..hshhltSpWYhpplccps.s..hGC.WpFIsc-cGWtE..... 0 1 3 3 +7783 PF07952 Toxin_trans Clostridium neurotoxin, Translocation domain Finn RD anon Pfam-B_4943 (release 15.0) Domain The Clostridium neurotoxin family is composed of tetanus neurotoxin and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains [1]. Subsequent to cell surface binding and receptor mediated endocytosis of the neurotoxin, an acid induced conformational change in the neurotoxin translocation domain is believed to allow the domain to penetrate the endosome and from a pore, thereby facilitating the passage of the catalytic domain across the membrane into the cytosol [1]. The structure of the translocation reveals a pair of helices that are 105 Angstroms long and is structurally distinct from other pore forming toxins [1]. 25.00 25.00 27.40 26.10 20.00 18.80 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.97 0.70 -5.53 8 149 2009-01-15 18:05:59 2004-10-06 11:34:15 7 5 32 46 7 144 0 292.30 51 25.87 CHANGED slFaYLaAQKhPcshpsIoLTsSl--ALLsssKVYTFFSS-aIspVNKsVpAuLFlsWlppVlsDFToEuoQKSTlDKIADISlIVPYIG.ALNIGNEstKGNFcsAhElsGAuILLEFlPELhIPllusFoIcSals..pNKNKIIKTIsNALccRpcKWcElYuaIVosWLo+lNTQFsphKEQMYpALpNQssAIKpII-YcYNpYot-EKspIss-aNIpsIcscLNcKlshAMpNIs+FlsEsSlSYLM.Khh..htlcKLp-FDsslKstLLsYIhcNpthLs.splscLpshVNsoLssoIPF....cLSpYTsDpILIphF ...............shFaYLcAQchsps.psIsLTSSh-cALLppsK..lYTFFSo.....-..aIcsl..NK..sVpA.uLFlu....WlpQll.DFTpEssppoThDKIADISlIVPYIG.ALNIGNpstKGNF...pp...AhplhGuuILLEFhPE..lhIPsltsFhl..Sal...pNKsclIcsIsNALpcRscKW.....c.....-hYthIVupWLoplNTQF.pl+EtMYpALp.QspAlKpIIchcYN.Yotc.....E....Ksplp..pash.slps.....cLNppls.AMpNIscFls-sSlSYLMKphhPhtlp+Lp-aDpslKp.LLsYI.cpp.hLh.spspcL.pshVsppLpssIPF....pLSpYTssplLlphF................................................................................................................ 0 1 6 6 +7784 PF07953 Toxin_R_bind_N Clostridium neurotoxin, N-terminal receptor binding Finn RD anon Pfam-B_1058 (release 15.0) Domain The Clostridium neurotoxin family is composed of tetanus neurotoxin and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains [1]. This domains is the N-terminal receptor binding domain,which is comprised of two seven-stranded beta-sheets sandwiched together to form a jelly role motif [1]. The role of this domain in receptor binding appears to be indirect. 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.45 0.71 -4.81 13 203 2012-10-02 19:29:29 2004-10-06 11:35:29 7 7 33 77 5 232 29 185.80 43 15.67 CHANGED sNhlhDhSGhsoplphssDltLs..lNsssltLpsssp...sVshsNslhhNuhhsNFSIsFWlRhsphssp........chslIssh.csN....sGWcI.lcsNsllaolhDsNGpp+slah........SDhl.NpWaaITIosDRLpsp.hIaINspLlsscsIcslhNIauSNsIsh.....lscNp.IaIcthsIhsKpL .................................................pNtlhDhS..GYsoplphscsl.tLss...lspN.phtLpus..sp...p.lpls..sNsIlaNuhapsFSlSFWlRIsphpsp........cYTII.ssh..cNN.....SG..W.cIsl...p...s...s...t...llaoLh.DssGppcslh...Fp.t...thSDYI..N+WaFlTITssRLssp...hIYINGpLlsspsIcpl.t.sIauSspIhhthst........s...hsp...sphlaIchFsIFs+pL........................ 1 2 5 5 +7785 PF07954 DUF1689 Protein of unknown function (DUF1689) Groocock L, Finn RD anon Groocock L Family Family of fungal proteins with unknown function. A member of this family has been found to localise in the mitochondria [1]. 25.00 25.00 35.60 51.40 24.90 20.30 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.87 0.71 -4.35 13 52 2009-01-15 18:05:59 2004-10-06 11:36:28 6 2 43 0 33 49 0 161.10 33 56.82 CHANGED acsulcFYEtDppL-spDRhpLucshpoluhushhuGasuhsuuFhsPhsYhta....pstul+GlssP+.............oFllGlsuhhlusphsu+htas+plpphcs.........................................sspp+phcsh+hhchspsuhashYYhpTupsPphpl.DP+phhpp......LKcs ...acsAlcFYEtDphL-scDRLcLucshpoIuhuphhuGahuhhusFhsPhhYpha....pstul+GVpls+...............sFlLGlhshhhusphsu+hhYs+plsphcss........................................spsp+Qhshh+hh-.s..ssutashYahhThpsPph+l.DP+shhpp.....h+........... 0 5 17 31 +7786 PF07955 DUF1687 Protein of unknown function (DUF1687) Groocock L, Finn RD anon Groocock L Family This is a putative redox protein which is predicted to have a thioredoxin fold containing a single active cysteine [1]. 20.90 20.90 21.10 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.91 0.71 -4.18 9 121 2012-10-03 14:45:55 2004-10-06 11:38:47 6 4 117 1 95 114 0 139.10 28 85.85 CHANGED MShF+TLQ.pP.csIoLFspsttst..upclhphLcpshssp.............Fcl-l.sschPThDQLphhts.h.ps..........pshltsphP....................................chpplhcphscs...tp...pshtsshap.....tPLhVDW-pshlus-......hpslcchL ................................................................a+.h.pp....csITLFHpsstss..Sp+lhslL+ps.suss.ttsss.........................pF-.L.-l...spc.PThDQL+hIl-ahsts.h............sshlpssh...........................................p.pcsl+thpps......................t..s..tFp.....cPlsVDWsNG.p..hust............................... 0 18 51 80 +7787 PF07956 DUF1690 Protein of Unknown function (DUF1690) Groocock L, Finn RD anon Groocock L Family Family of uncharacterised fungal proteins. 28.40 28.40 28.50 28.60 27.90 28.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.69 0.71 -4.21 7 160 2009-01-15 18:05:59 2004-10-06 11:42:59 6 3 127 0 118 152 0 114.40 28 78.45 CHANGED VasPpoPlsFSpsLluQL-sSsEoDaoRpQhsE+alpc+VupcLppLE.EslKcFcspLpsShhss.cs.psspsslSostlspclcsLpp+Lp...pLpptchpK......t.cchpss.....RuclscCLhcNcsKPLNCa-El-tFKchsh ...................................s.hthStshlppLp...s...s...........EoD...s..Rtp.hEh.lpt+lu...tcLpc...l...pt...p...p.......t...p...h...p...p...h..h.....pt.......t........................t.....tttt.p....tl...........SptplppclptLp.t+Lp....t.plpphp.t..............ppu.......+ptllpCLptNpt..+PLsCacEV-tFKp.V.t....................... 0 25 58 97 +7788 PF07957 DUF3294 Ribosomal_MRP8; Protein of unknown function (DUF3294) Groocock L, Finn RD anon Groocock L Family This family was annotated as mitochondrial Ribosomal protein MRP8, based on the presumed similarity of the S.cerevisiae protein to an E.coli mitochondrial ribosomal protein; however, this similarity is spurious, and the function is not known [Wood, V]. 20.00 20.00 20.00 21.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.44 0.70 -4.81 10 45 2009-01-15 18:05:59 2004-10-06 11:45:09 6 1 44 0 30 36 0 208.10 43 98.55 CHANGED MS...-.....lEpL+KcVscLpslV+KQutLIuKTGcpVl-LQlsppKschsshs.......sppspp..suplDsoDaATNEDLVQLVsELQGQLDsLE-RSIRRhsNSp..tp--c-llAPIPNtDG-hPs.p.......DthhPcTLcEFccl.-clcLh+Lu+FYEllPPs.p-pEchcchLcs..psEshch........sttsDc-lpcclcchoc-ElD-l..Fc-lARYLGLRsRRGossW ...................MSsc....lE.LpcpVs-LpsLVKKQShlIoKTGppVLELQlccpKpclsshs.......sppsss..ssphDsoDaATNE...DLVQLVsELQG....QLshlEERSIRRllNSp...ppcD-D.llAPLPNsDG-lPs.........-slFPpTLcEF+cl.sclcLl+Lu+FYEhlPPs.pEpEch-.phL-s....psEshpl........spss-c-IpcclcpaSc-plD-h..Fs-lARYLGlphRRGophW.................. 3 7 17 27 +7789 PF07958 DUF1688 Protein of unknown function (DUF1688) Wood V, Finn RD anon Wood V Family A family of uncharacterised proteins. 25.00 25.00 49.70 29.80 22.60 21.90 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.48 0.70 -5.94 32 232 2009-01-15 18:05:59 2004-10-06 11:46:41 6 3 181 0 163 231 29 398.00 45 92.28 CHANGED shLpohpAVR-Rupplh...phAcpschpHFslc.s+hsssAsaVsslI+ccYss....pIP.HuRWpHF-sGG....lsRhspLhsphs.....................-s.EpuRthlDLhlVSVLLDAGAGspW+YpEstssph....auRSEGLAVAShchFhsGhFSu.DsscPhpVDupuLppLohppLupuFQVoss.NPLsGLEGRspLLp+LGcuLtspsplF.....Gsp...uRPGsLlDaL.............tsssss.lshsplhssLhssLsPIWP.uRsplcGh...sLGDsW.pssl.......................sss.ssullPFHKLoQWLsYSLlpPhpp..hGlplssh-hLTGLPEYRNGGLhlDhGVLsL+cssht....................h.sassscshIVEWRALTVuLLDcLtshVpppLs..hssppLsLAplLEuGTWpAGRclAtppRPp.usPPItIhSDGTVF .............................................................aL+SlpAVR-Rsphlh...phA..p.pscLsHFslc..s+hsssAsaVsslI++cYsss......sIPsHuRWpHF-sGG.......hsRlspLhpphs...................t.hDshEcsRphlDLhlVSVLLDAGAGspWpYc.-s.t..oGph.............apRSEGLAVASLcMFpsGhFSu....-s.s.....p.Ph.pVDutuLpplosptLupGhQlots.NPlsGl-GRssLLpRLGcuL...sps-hF.........Gtp............uRPGsh..lDaL...........................ttsussshlshsslWssLhs...sLsPIWP....uR....splsGh..................slGDsW.pssl................................................sst.hpsllPFHKLoQWLsYSLhtPhpc..hslphssh-hLTGLPEYRNGGLhlDhGlLsLKstsht....................hshapss--lIVEWRAlTVulLDcLtshVpppLs..hssppLsLAplLEuGoW+.................uGRclAttpRP....s.......ttPPIhIhSDGTVF...................... 0 46 87 133 +7790 PF07959 Fucokinase L-fucokinase Adamkewicz J, Finn RD anon Pfam-B_121298 (release 15.0) Family In the salvage pathway of GDP-L-fucose, free cytosolic fucose is phosphorylated by L-fucokinase to form L-fucose-L-phosphate, which is then further converted to GDP-L-fucose in the reaction catalysed by GDP-L-fucose pyrophosphorylase [1]. 27.50 27.50 27.50 28.90 27.20 27.20 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.37 0.70 -5.91 6 272 2009-01-15 18:05:59 2004-10-06 11:47:43 7 11 161 0 110 250 6 326.70 26 42.43 CHANGED uGG.S+RlP.AsshGKlFTsLPh...-pspt....Lss...plLsLp.shhpcF.spu..Gl..hlsouD...lhsss-s.hlshsc.susssluhPsoLslAosHGVaVscppss.tc.shphpllcchLpKPTlE-lhphpAlp+sGphhh......DoGll.hcscss..-sLhthshpsuphhlsah-L.lsshp.EhshYtDhltAhuPu...Ea......sphsocsht.t..hh.hRp+haphhpt.slshl.lssupFhHhGTop.EhLp+hous.ssl....tlsp+phss.Pshpsps.ttossIloShlsuslSlussSl.lppS+LusslpIGupCIVoGl.lhppsuh.u..........L.DhlC..hcVhhsGslphVhhhhGlpDNhKsSl+p....cuhpFhGhsappsLcchsIcsoDlhsSss..pshsLWsA+lFPVhoshc ........................................................uGG.SpRhP.hsshGKhhoslPh............tpsp..h.......l.p....phhpL..shh.ch..tthss.G.l..hlssuD........lh.hs.....t....p...h........p.....h.........sp....s.....shh..s..huh.s.s.shupsHGVal.sp.........p.......s..............................lp.hLpKsohpch...t.sl...........ts.t..h.h....................s.Ghhh.h..sschs.........-.Lhth.p.h...ps.s...h.p......hs.....hh........t..........h....................t.h.....sha.Dhhhshu.........c....................sp.t.pt.h...........................p..t..s..lshh.....l......ss.....upahahsTo....t...E..hlpphht.....th.........................th..ttp.h.........s......................hl.suhl.pst...h..ph.ssssl..lp.spl.ts..s...hpluptslloGh...p.......th................................shhh....p..lh.................s......p.....t...hshh.hGhpDshctt.......tt.tahshsh.phh.t..hslp......-hh..ts.....sL.sAplFPlhp...p............................................................................. 0 40 59 81 +7791 PF07960 CBP4 CBP4 Groocock L, Finn RD anon Groocock L Family The CBP4 in S. cerevisiae is essential for the expression and activity of ubiquinol-cytochrome c reductase [1,2]. This family appears to be fungal specific. 20.60 20.60 21.60 23.10 20.10 20.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.77 0.71 -4.52 18 129 2009-09-10 15:02:01 2004-10-06 11:49:54 6 2 125 0 95 123 0 115.40 33 85.79 CHANGED p+ssWhhWsKhhhsGusIhsuGshLhpYhoPTDE-Llp+asPEL+pch.cpRchRQpE.p-hhphlKcpSKSDcPIWpsushcS...E.+c..............p+pctt..pt.php+tppEhcchcpppppp+c- ........t....hhhWhKhhss.GuslhsuGshLhpassPT-E-Lhp+asPELp+c..cpR..ctRQpE...pchhpplK-hS...cSD......cPIWtsst.ps...Ecp...pt.pt................ht+.ptcp...p+pphtchtt...............t.............. 0 19 49 80 +7792 PF07961 MBA1 MBA1-like protein Groocock L, Finn RD anon Groocock L Family Mba1 is an inner membrane protein that is part of the mitochondrial protein export machinery [1][2]. It binds to the large subunit of mitochondrial ribosomes and cooperates with the C-terminal ribosome-binding domain of Oxa1, which is a central component of the insertion machinery of the inner membrane. In the absence of both Mba1 and the C-terminus of Oxa1, mitochondrial translation products fail to be properly inserted into the inner membrane and serve as substrates of the matrix chaperone Hsp70 [3]. It is proposed that Mba1 functions as a ribosome receptor that cooperates with Oxa1 in the positioning of the ribosome exit site to the insertion machinery of the inner membrane [3]. 27.40 27.40 27.40 27.40 27.20 27.20 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.27 0.70 -5.52 7 74 2012-10-03 02:27:23 2004-10-06 11:51:34 6 1 72 0 48 79 0 223.40 28 77.91 CHANGED ppsppc.pshs.+alGlhs-halPsua..h.sshssPhhhhphLhRRhYhhulNThpluhFRhpoGhKPpFh.WKNcAIEsYlpVNcuFAp+sL...........pplcshsolWVpcALptRu+plPsssch-WpLlKFssVPKLluhpsh.lPspshE.hlQllY+FcT+Q+Ll+hs+tssKs-phD+sVl-YlualpDuoTs..-hlLhGSVFESs.scshLPcss.sssplslppM+ssGDIaR.ssu .................................................................................hs.p.lG.lhschalPsth...hss.htp.s.h..hhphl.hR+hhhhshN.......ohplh...ha.+..h...p.........u..h.....+.....p.Fh.WKspAIEhalpsNpuFAptsl.................splcthsu....hhl.cuLtsRs...p.....phP.t..s.s.pLsWpLlKasps...P........KlVS..h...psh.l........s..s...t.s...h.p....hlQlVh+hsT+Q.....+l.l...p............hs....ptp...p..........p......s...pph....-....+..Dls-YlVa.....h..s.shss....-hhlhGplFESp..psh..s...p........stp.h.t.....tuDlaR....t....................................... 0 11 26 43 +7793 PF07962 Swi3 Replication Fork Protection Component Swi3 Wood V, Finn RD anon Pfam-B_9217 (release 15.0) Family Replication fork pausing is required to initiate a recombination events. More specifically, Swi1 is required for recombination near the mat1 locus. Swi3 has been found to co-purify with Swi1 Swi3, together with Swi1, define a fork protection complex that coordinates leading- and lagging-strand synthesis and stabilises stalled replication forks [1]. The Swi1-Swi3 complex is required for accurate replication, fork protection and replication checkpoint signalling [1,2] 25.00 25.00 25.20 25.30 24.90 24.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.76 0.72 -4.35 29 279 2009-01-15 18:05:59 2004-10-06 11:52:44 7 7 238 0 196 269 2 84.40 34 27.13 CHANGED KLcsp+LhSp.+GlPtLc+t....................h.cch+hpu+s+E......................hs-LspllphYQhWsHcLFPKupFcDhlshlEpLG+p.+plpshhpchhpcth. ........................................................KLstpRLlup.cGlPtLp+h.............................h..cph..K..h+.G.K.GHE................................hpD..LppLlphYphW.sccLaP+hpFcDhlshlEpLGpp.+plp.shhpchh.c..s................................ 0 58 101 157 +7794 PF07963 N_methyl Prokaryotic N-terminal methylation motif Bateman A anon Pfam-B_6484 (release 14.0) Motif This short motif directs methylation of the conserved phenylalanine residue. It is most often found at the N-terminus of pilins and other proteins involved in secretion, see Pfam:PF00114, Pfam:PF05946, Pfam:PF02501 and Pfam:PF07596. 21.20 20.50 21.20 20.50 21.10 20.40 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.24 0.73 -6.49 0.73 -3.90 72 4741 2012-10-03 10:38:27 2004-10-06 12:39:50 7 22 2129 0 985 9899 3456 19.90 44 11.31 CHANGED pGFTLlElllslsllullhs ...+GFTLlElllVlsIlulLs........... 0 364 650 834 +7795 PF07964 Red1 Rec10 / Red1 Wood V, Finn RD anon Wood V Family Rec10 / Red1 is involved in meiotic recombination and chromosome segregation during homologous chromosome formation. This protein localises to the synaptonemal complex in S. cerevisiae and the analogous structures (linear elements) in S. pombe [1]. This family is currently only found in fungi. 25.00 25.00 41.40 41.40 24.40 22.60 hmmbuild -o /dev/null HMM SEED 706 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.24 0.70 -6.37 6 28 2009-01-15 18:05:59 2004-10-06 13:14:18 6 2 24 0 18 30 0 598.10 30 82.59 CHANGED hllLclhapssph+plItpLlc...scashplG.LlDN.IpDhQLsNallElLSpsFs++usspstlsphPpL.WpcppKsK.FFppphYPapuKaGsttlhpFlhsp..FushlsNhs+l+plhashust..pspsltsLsppscpt.hahQsIasplYhWsGEssFlclc+.KslcIh+-L+splcl....Klhs.sFccslpospcthht...........TlppsptFpl-Fp-pphschahps.sNlPKISEVQsalsLpa.pp.--p.I-spspc-pso+cpsl.ppcptssss-u............hphutpcspa-p....TP-+s...htpcssl.DL........ppssccpc...cshss.Klp.ppsph.psph.pllppp....pSPlsphQc+KltRssSKoh.........c.L+psh-cpt....lpsppspScpshspshpsssslsspsplpss........tutcltsphsucp.pssssVsshochpssKohKppDlsl....L-sIFupPlsKtp+p...K.p+pKQ.hLpNhhshh..........KoKh.ltsNsppsh+optlpptcosphss..ph..........sppusP.s.tch.tcQssoosshtpss+sshcs+sPptpppKlEshststphssh.........sKpNsts.sp.ss.cpth..tpp.t.sshp.......................shu-STTIss.sup..s........FTspLQ-QIapSIspFSsELppKIuIINpEhNpKIl+ELSEKYpplF+-LppsFpsDspcMhpFVGElK-MhpLPE-pLVptIRs+pF ......................................................s.hl.plh.phs.p.+phIhpllc...scastplGplhDs.lpDhQ.sNaLlElLSsCF.+psssphtl.phPpL.W.ppcp.Ks.c.FFpsp.YPapuKpGp.pshpFlhpp..Fhspl.s.hshl+plsYssup...p.pslchh...p.tpsc..p..halQsIasplYlWhsE......p..........t.lEhc+.KplcIsK.sLKsplpI....+ltp.shppslpostsphhh...........hhs+s+tFQL-FpDpphs-pFapshsNlPKISEVQsFLsLsah-p.s........-sphp..c-pp.tpss..tpppptpps.sp.................hp.uhspspt-...lhTP-cS...sph+oDtWDl........pssopppp...sshss.Kls.pp.pphppp..p..pl.pc-......................QSPlV.sQhRKhtRpoo+TL.........EhL+p-Fpppt..................lpsppspspp..s..htp..sshlhtpsphhps...................ppp.p.s.pshthschpsspSlpK+DIsl.........LsTIFupPssKtp+p.........K.ppppQppLpNaK.Pllpl..............spsc..lps.ss.tps+shclpphposphsp...............tp.spsttttch.ppp....s.sposs.pthspsscctphsc.stt+.p.tptptphsh.....................sppssts.sp.ss.pp....ths.sps.pssslo.......................shh.-STTlls..hps...s......................ssFTspLQEQIasSIspFSNELh+KIsIINpELNpKIl+ELSEKYQpLFt-LQpsFQsDspcMhpFhGEIK-hhpLPE-pLVphIRs+KF................................................................................ 0 2 8 17 +7796 PF07965 Integrin_B_tail Integrin beta tail domain Bateman A anon Pfam-B_1876 (release 14.0) Domain This is the beta tail domain of the Integrin protein. Integrins are receptors which are involved in cell-cell and cell-extracellular matrix interactions. 20.10 20.10 20.30 20.10 18.50 18.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.32 0.72 -10.88 0.72 -3.55 58 524 2009-01-15 18:05:59 2004-10-06 13:37:43 7 25 119 21 230 460 0 83.70 32 9.93 CHANGED C.spp+cCVpCptFpsG.h..pc....sCstpCpph.l..phVcphppps...........Cph+Dp-.-ChhhFshp...psss...sphhlhVhcch.-CPpuPs ...........................Csp++sCVpCptFspGph....pp.....sCsptC.pth.l...phVcph.....pcpspss...........tCph+Dp-.D...ChhtFsat........tsss......scshlpVlcc......-CPpuPs......... 0 30 46 122 +7797 PF07966 A1_Propeptide A1 Propeptide Rawlings ND, Finn RD anon Pfam-B_386 (release 15.0) Motif Most eukaryotic endopeptidases (Merops Family A1) are synthesised with signal and propeptides. The animal pepsin-like endopeptidase propeptides form a distinct family of propeptides, which contain a conserved motif approximately 30 residues long. In pepsinogen A, the first 11 residues of the mature pepsin sequence are displaced by residues of the propeptide. The propeptide contains two helices that block the active site cleft, in particular the conserved Asp11 residue, in pepsin, hydrogen bonds to a conserved Arg residues in the propeptide. This hydrogen bond stabilises the propeptide conformation and is probably responsible for triggering the conversion of pepsinogen to pepsin under acidic conditions [1,2]. 20.50 20.50 20.70 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.93 0.73 -7.15 0.73 -4.01 85 802 2009-01-15 18:05:59 2004-10-07 15:04:45 7 5 152 8 295 720 0 28.40 36 7.79 CHANGED ll+lPLcKh+SlRcsLpEpGhhpcaLcpp .....lh+lPL+KhKSlRcsLpEc...GhhpcaLcp........ 1 23 33 100 +7798 PF07967 zf-C3HC C3HC zinc finger-like Wood V, Finn RD anon Wood V Domain This zinc-finger like domain is distributed throughout the eukaryotic kingdom in NIPA (Nuclear interacting partner of ALK) proteins. NIPA is implicate to perform some sort of antiapoptotic role in nucleophosmin-anaplastic lymphoma kinase (ALK) mediated signaling events [1]. The domain is often repeated, with the second domain usually containing a large insert (approximately 90 residues) after the first three cysteine residues. The Schizosaccharomyces pombe the protein containing this domain (Swiss:O94506) is involved in mRNA export from the nucleus [2]. 20.80 20.80 20.80 20.90 20.40 20.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.15 0.71 -4.40 28 278 2012-10-01 20:49:39 2004-10-07 15:12:26 8 8 216 0 192 304 7 127.90 26 24.50 CHANGED Phs+tsahpRLcTFpshs.W..........ssKPstloslpsA+pGWhs..hs......p-tlpCp.sCpspLshp..........................sphstphhcphsccatttlp.suHcpsCsW+spsss.cthtths..lsts.pshlsshtc.....chssLhph.....spLPhhss ..............................hs+pshhpRlpTF.....p...t...hs...W..........tsKPp.tlsslthA+pGWhs.....ss............t-hl...cCs..sCp..stLshp....................................sthshp...hhpph...s...pch.tp.tls.s......uHpp.C.Wt....spsss.cphhhls.....hsps..thhlpthhp+hppLhph..........t............................................................................................................................. 0 55 103 156 +7799 PF07968 Leukocidin Leukocidin/Hemolysin toxin family Bateman A anon SCOP Domain \N 20.40 20.40 22.20 21.40 17.00 16.10 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.59 0.70 -4.74 19 1628 2009-09-11 18:55:27 2004-10-27 16:53:05 7 6 341 102 22 554 5 237.50 36 70.82 CHANGED pslQFsFlcDtpaDKcslllphsG.pIsSshphsssps......phspthhWs..........tpYslul..ssssssspllc..hhP.KNp.cphpVppThGYslG..Gsl..pl.psGPs....huusuoasaScoIsYsQpsY+oplsppssp.slsWsVctsphsssstt...........shasp.lFhhscppss..supp.FlspschPsLspuGFNPpalsllopcpssspoo.chclsYpRshD.hhshh.tp......hhstpsac..shps+phss...pYcVDWcs ................................................QslpFsFlcDppYsKcsLllKhpG...IsSthph.s.cs.........hsthhWP.................tpYsl..ul...p..sssssssllc..YhP.KNp.-otpVpp.TlGYshG....Gsh.......psu.o......hG.hsuuhsaScTIo..YsQpsYcTpl-.ppssc....sVtWsVcApphhsssht.........pssasp.LFhts+ssuh...tupp.FlspsphPsLlpSGFNPpFlsslS....+-..+..s.s.s..cpS.chcloYp.RshDhhphthspp............hhs.spsap..shhscphos....pY.EVDWcs........................................... 0 10 12 18 +7800 PF07969 Amidohydro_3 Amidohydrolase family Bateman A anon Pfam-B_751 (release 15.0) Family \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.16 0.70 -5.26 43 5270 2012-10-03 00:45:34 2004-10-27 17:18:53 6 49 2463 42 1791 12984 4622 377.00 18 74.72 CHANGED hlhPGFlDsHhHlhstshth..hhshtssh..shtthhthht......................hshsaspsthtptchs.sttsl..-tsts.stshhhtch.tphshlspt..uLph......................t.t.stshltppst....hhht................th.s.hsshtphtphhct.thtthsptGlssshsss..............s.tphhpshtplsppht.hsh.hhh.h......................................sshphhssGs.s..spsuhhtp..hsssh....phth....sttp..h.pplsptspppththplau.............ststulsphlsshcphhtp....................hhh+sphssspshcthtplshthshts........h...........hh.phhss.phphs....hshtthhssG.....hplshuoDhsh...sshsPh.hshtssshtpsht...t.hhh....................................pplsltculphhTtssAhh....hshpcph.GpltsGt.ADlsl .............................................................................................................................................................................................................................................................................................lhPGhlD..sHhH......h...t.h..t.h..t......h...........................t........h....s.h...t.ssh..............s....h...t..h.t.................................................................h...s........t........h.......s.....t......t......p...h.................s.....h.tpl............c.t...ht.................s....hh....h.......t......................h.hspt......ulph.........................................................................t..tps...p..s...h.l...tpps..........thhht.......................hh.s................s......p..p.....h.t..t.....h...lp......t....hht.h.sp...h..Glsuh...sst.........................................................t......h...p....h..h..h....p...h...........s....t...p......s.............................................................................................................................s.u..h....h...h........s...t.t..........shs..u.h...h........t......................h.........s...p....p.........................................................................p.......s.................h..........s.pp...................l..pph..h...th...s.....t.....p..t..u..h...t.h.t...l.au...................................................................t.s...t.u.h.p...p..h...l.s...s...h....pph.hhp................................................................................hlt+.s...p...h....h....t........t...s.......h..t.......t..h....t....p....L....t...h..t...h..s..h.ts............................................................................................h.................c...t...p...t.h..................h....h.....t....p....h....h....p..t.G..................................h.tl.s.h....u...s..D....sh..............sshsP..h....hs..l....t....s....u..s....h.t....p..s.h...p............t.....h...h........................................................................................................................................................................ptl.s..l..p..p..A..l..p.hh..T.t.suAht............ht.hp.......c.t.......GslpsGp.ADhll................................................................................................................................................................................. 0 578 1162 1523 +7801 PF07970 COPIIcoated_ERV DUF1692; Erv41; Erv46; Endoplasmic reticulum vesicle transporter Mistry J, Wood V anon Pfam-B_2028 (release 16.0) Family This family is conserved from plants and fungi to humans. Erv46 works in close conjunction with Erv41 and together they form a complex which cycles between the endoplasmic reticulum and Golgi complex. Erv46-41 interacts strongly with the endoplasmic reticulum glucosidase II. Mammalian glucosidase II comprises a catalytic alpha-subunit and a 58 kDa beta subunit, which is required for ER localisation. All proteins identified biochemically as Erv41p-Erv46p interactors are localised to the early secretory pathway and are involved in protein maturation and processing in the ER and/or sorting into COPII vesicles for transport to the Golgi [3]. 21.90 21.90 22.00 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.89 0.70 -4.73 51 979 2009-01-15 18:05:59 2004-10-28 13:59:39 7 20 303 0 653 899 28 187.70 29 53.30 CHANGED CYGAt..............stptCCNTC--VppAYtp+tWshsssp.slcQCpp-....h....ptphs..EGCpltGshpls+lsGshHhAPGp...shp.tthH.h...HDhp.hpp.........phNhoHhIpcLsFG......pp.....hs.t.............pP...LDshph.......t.pp.pt...hhapYalKlVPTpap............cospaSsspap+slpsst.............tuhPGlFFpY-hSPlplh.ppp+..pohtpFlsslsullGGlasluullD ...................................................................................................................................t.....-G..CplhG..lpVNKV.sG.s.FHhs.stp..............sh.................th.t...................................ht..htt.....................phNhoHhIpc...LSFG....pt........hsshh..........................sPLDssph..................tp.p.t............................hhapY..alclVPThY.phttt............................h.popQaSVT.p....pp...cshptt.................................tslPG.laFpY-lSPhhVh..hpEp+...psahpFlsplCuIlGGhhsluuhl...................................... 0 250 385 541 +7802 PF07971 Glyco_hydro_92 Glycosyl hydrolase family 92 Mistry J anon Pfam-B_1199 (release 16.0) Domain Members of this family are alpha-1,2-mannosidases, enzymes which remove alpha-1,2-linked mannose residues from Man(9)(GlcNAc)(2) by hydrolysis. They are critical for the maturation of N-linked oligosaccharides and ER-associated degradation [1]. 24.50 24.50 24.80 24.60 24.00 24.30 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.66 0.70 -5.86 187 2270 2012-10-02 14:50:22 2004-10-29 16:03:55 7 44 847 40 652 2109 326 474.50 31 62.78 CHANGED u.pph...t.uhlpF...st.....spp.......l...s+lulSalS...h-pApt...NLct....Eh..........s..shs..F-pl..pppApstWscp.Lu+l.pV..p..........u.....ss....psp.....pphFYouLY+shltPphhs-........h.....sup..................Yp.u.....t...........................t.............uht........Yss.a................ulWDTaRuhaPLhsllpPp.hss-hlpShlsha.c..ps..........GaLPcWthsspp.stsMsGspussllADAa...hK..G............lp.shD......hc.pAacAh..h+sAp.............ps....................+sG...........lppah.phGYl..P...........................hsc...u.......sSpTLEYAYsDasluphAc.u...L...........Gcpp...............................-.h.ptahcRupsa+Nlacsp.......................................sGFhps+......p..ts..........G....s......at..........t......s.......asP..h.............th...s.......psah.EGsuW....pYoahV.PHDhsGLl.pLhGGc..................ct...........Fhp+LDphFst....ht......................hGpY....s.hGNEPuaHhsYLYsas.GpPa+TQphlRplhsphYs..sss..sGls.G..N-DsGpMSAWYlaSu.lGhYPlsPG...sstYhluoPhFccssl...pL.........................sG.......+...s.hsI....pA.pssu........tc.s.........hYlpusplNGcshs....+..sa..lsHsc...lh..pGGpLpapMuspP ...............................................s...tshhpF...t.st...spp.......l.ph+luhSalS...h-p.....Apt.....NLpt.Eh..........s...shs...F-..ph..pp...pupstWpph.Lu+.l.pl...p...................s................ss....psp.............pphFYosLY+shlhPphht.-.h.......ssp..........................Yp..sh...pt...................................pht..s.t...h.....Yos..h......uhWDTaRshaPLhs.Ll..P.c.t.h.pchlpuhlsha.c..ps.................G..a.L.P...cW....hs.....sp..........c.s................shM..........sGstussl...lADAahK.....G.............lp...shD.......hp...puapAh.....hcsAp..........t.............................s+tu...........h.t.t.Yh...chGYlP.........................sphpc........u..........lScTLEYAasDasluphAc.tL...........Gc.pp...............................p.t.pha.hpR.ut.s.Y.+.s.l.acsp.......................................s.Gahps+pss.................................Gs............at..................................s............s..........FsPh..................ph....s........psas.Eu...suWpYoa...........hV.......HDhtGLl.pLh..GGc..................c.s............Fhp+LDphFst....h.t.....................h.h.Gpa..s.huNpPuaHhsYLYsa..s.G..pP.....h+TQthl+p.....l....................h...................p...................p....h...................ap.................sss.........sGh...s..G..............s-DsGpMSAWYlaou.lGhYPls..PG....pspYslGsP.hFc.c.spl..pL........................t.sG....+p..hsIps..tsss..........tc..s.................hYlpphplNGc...........h..s.........p.sa..ls.H.pc....lh....pGu...pLpaphusp............................ 0 232 458 572 +7803 PF07972 Flavodoxin_NdrI NrdI Flavodoxin like Finn RD anon Pfam-B_1603 (release 16.0) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.55 0.71 -4.21 86 2192 2012-10-03 05:08:30 2004-11-12 15:12:44 6 3 1921 11 257 1337 42 121.30 41 86.07 CHANGED lhasS.hoGNT+RFl.....c+Lsh...t.+hshp........thsp......hplscPalLlsPTYusGt...tsu........VPppVhcFLs..cNcphlpGV..........luSGNpNFGsp.ashAuchlup+hpVPhLhcFELtGTppDlpplpp ...................................lha.S.hSsNTcRFl.......p+Lsh.............u...c.hslp............ttpp.........hplcEP...al.Ll.l..P.TYGsGss....tsu...............VPctV.hcF...Ls.......t...p.N.....+....p...h...l.....+G....V..........luSGNR.NF.G.pt..ashAuch....Iup...+h..sV..Ph..L..h..+FELh.G.TppDlcpV+.......................................... 0 54 135 202 +7804 PF07973 tRNA_SAD Threonyl and Alanyl tRNA synthetase second additional domain Finn, RD anon Pfam-B_270 (release 16.0) Domain The catalytically active from of threonyl/alanyl tRNA synthetase is a dimer. Within the tRNA synthetase class II dimer, the bound tRNA interacts with both monomers making specific interactions with the catalytic domain, the C-terminal domain, and this domain (the second additional domain). The second additional domain is comprised of a pair of perpendicularly orientated antiparallel beta sheets, of four and three strands, respectively, that surround a central alpha helix that forms the core of the domain [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.06 0.72 -4.07 219 11375 2009-01-15 18:05:59 2004-11-15 16:21:20 9 40 4904 28 3159 8949 4659 48.10 36 6.80 CHANGED lRll.....ph.G...s................hsh......-LCuGsHlpsTucI..ttF+lh.......ptsshspslpRIh ...............................lplh.....ph.G...-.................................h.....-LCtGs..HV.ssTucI...th.FKlh........................ptsuhsttlpRI........................................... 0 1065 1968 2631 +7805 PF07974 EGF_2 EGF-like domain Bateman A anon Pfam-B_80 (Release 16.0) Domain This family contains EGF domains found in a variety of extracellular proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.44 0.72 -3.78 51 1990 2012-10-03 09:47:55 2004-11-17 13:40:45 8 364 173 0 1055 1784 147 30.20 42 4.71 CHANGED C....s......Css...pGpCs........tthCpCpsu.....atGspC ...............C.t.............Csu....+GpCs...........sGpChCpsG.......apGssC.... 0 399 519 772 +7806 PF07975 C1_4 TFIIH C1-like domain Finn RD anon Pfam-B_10678 (release 16.0) Domain The carboxyl-terminal region of TFIIH is essential for transcription activity. This regions binds three zinc atoms through two independent domain. The first contains a C4 zinc finger motif, whereas the second is characterised by a CX(2)CX(2-4)FCADCD motif. The solution structure of the second C-terminal domain revealed homology with the regulatory domain of protein kinase C (Pfam:PF00130) [1]. 24.60 24.60 24.60 25.20 24.50 24.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.47 0.72 -3.96 16 268 2012-10-02 13:15:50 2004-11-17 14:18:53 7 7 237 1 201 260 6 52.60 46 12.05 CHANGED hCauCQpph.s...p................phYpCspCcphFClDCDlFIHEoLHsCPGCps ..........................................CauCptphss..............................................ptYpCstCpphFClDCDlFlHEsLHsCPGCt.... 0 62 106 161 +7807 PF07976 Phe_hydrox_dim Phenol hydroxylase, C-terminal dimerisation domain Finn RD anon Pfam-B_19435 (release 16.0) Family Phenol hydroxylase acts a homodimer, to hydroxylates phenol to catechol or similar product. The enzyme is comprised of three domains. The first two domains from the active site. The third domain, this domain, is involved in forming the dimerisation interface. The domain adopts a thioredoxin-like fold [1]. 20.90 20.90 21.40 21.60 20.70 20.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.99 0.71 -4.65 78 504 2009-01-15 18:05:59 2004-11-18 16:03:01 7 14 183 10 351 544 44 164.70 24 25.89 CHANGED FsuGhulcYssS.hLs..........spss.p.................................pt........LAsslhlGpRhhoupVlRhuDupshcLtctlpuDGRaRlhlFuGchpt.s..p..sh....s.plpplsphLs.sssShlp+assts.....................tphs.Sl.l-lhslapss+psl-lt.DlPp.lhpPhptphth...........Dap+lass.D.t..........pscsachh..G.lDccc.G ..................................................FsoGhulcYssu.hls.......tpss..p.................................pt......lussltsGpRh.ssp..VhRhuDupshcLpc......pl.us..Gp.aRlhlFu.Gs.pt.sp...h..........ttlpshs...phLt.t.p...Shlppassts..........................tt.s..Sh.h-lhh.l.......a.......st...+pp.l-lp.s.lPt.hhp.s......h...........sh.c.lass.-..............tsphaphh..G.ls.p.............................................................................. 0 72 165 283 +7808 PF07977 FabA FabA-like domain Bateman A anon Bateman A Domain This enzyme domain has a HotDog fold. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.75 0.71 -4.69 51 6557 2012-10-02 20:54:35 2004-11-19 13:29:09 8 37 3882 226 1494 3917 3933 124.30 34 64.08 CHANGED LPpch.hLhlDRlhpls.sGtp.....lhu.Kslo.s-.FFpsHF.scPVMPGsLhlEAhhQhsGhhhhht..tpspG.........h...ulscs+F+tpVhPss.plphclchp+......p+hs.huhucuhshVDGchlhpAc ....................................LPHRhPhLhlDRlh-hp.....t..........u.pp.......................lhAhKsVoh..N.....E.....F...F....p....G....H....F.Ps.p...Pl.....MPGVLllE.............AhAQ....s....s....G....lh.......hh.....tp...................t.tp.G......................thhhh..hGlcc.s+F+p..V.lP.G..D...pl..h.hclchh+........p+ts..hsh.hcuhu.h..V.DGc.llspA............................................................... 0 441 905 1223 +7809 PF07978 NIPSNAP NIPSNAP Mistry J anon Pfam-B_3436 (release 16.0) Domain Members of this family include many hypothetical proteins. It also includes members of the NIPSNAP family which have putative roles in vesicular transport [1]. This domain is often found in duplicate. 22.30 22.30 22.30 22.30 21.60 22.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.04 0.72 -4.04 42 1376 2012-10-02 00:20:33 2004-11-23 10:06:50 8 10 733 16 670 1283 354 97.80 20 66.90 CHANGED aE..lRsYplcPuphsta.hptatcthhthht.htschlGhahs...............phGs.hsplhtlasasshssRpthRsthhpDstW.ttthptstth..lpp.csplhhPsshSs ....................................E.htpYplc.P.s..p..h..t..p..a....hp...t....a.tc....t..h...h......h....h...p...t....h...s....s..chlGhah.s...............phGs...hspshtla...sap.s.....hpshcp....hRpt.hh.p....c....s....ta....tt..h....h...t..h..s.h..t....h....l.pp....csphhhsh................................................ 0 183 323 489 +7810 PF07979 Intimin_C Intimin C-type lectin domain Bateman A anon Pfam-B_1879 (Release 16.0) Domain This domain is found at the C-terminus of intimin. Its structure has been solved and shown to have a C-lectin type of structure [1]. Intimin is a bacterial adhesion molecule involved in intimate attachment of enteropathogenic and enterohemorrhagic Escherichia coli to mammalian host cells. Intimin targets the translocated intimin receptor (Tir), which is exported by the bacteria and integrated into the host cell plasma membrane. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.59 0.72 -3.81 14 389 2012-10-02 16:37:33 2004-11-24 13:34:50 6 10 133 15 2 312 0 94.00 48 12.60 CHANGED Pspl.IsVspps+ssYssApspCp..shuupLsSSpspLps......lYspWGAANKYpaYpupsoIoAWlpQTssDhpsGVuSTYDLVTpNsl.N......VsssssNAYAVCVK .........Psph.ItV-ptp+ssYs-A.shC+..s.su.LPSSpshLus........lYssWGAANKYsaYpu.pSlTAWIpQTss-ppuGVoSTYDLlTpN.l.s......Vslss.NsaAVCVc........................... 0 0 0 1 +7811 PF07980 SusD SusD_RagB; SusD family Mistry J, Bateman A anon Pfam-B_1855 (release 16.0) Domain This family includes several hypothetical proteins. It also contains RagB, Swiss:Q9ZA59, a protein involved in signalling [1] and SusD, Swiss:Q8A1G2, an outer membrane protein involved in nutrient binding [2]. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.83 0.70 -4.87 151 6618 2012-10-11 20:01:00 2004-11-24 15:06:21 6 10 210 38 1597 6145 784 279.40 17 50.72 CHANGED sEhIa......ththtt.ssssssshshthhhtstss............................tshsthts.........opphhsha............................................................................pspDsRhtt.shhh.tttht.....................................................................................htt.ststshsshhhpKahs.............................ttssstssstsh..........lhRhA-lhLhhAE...Ahh............chuss.......sp.....Ah..phlNp....l...R..pRA..................................................................uhsshs.s................................ssthhp...tlhp.ERthELshE.spR.ahDLhRh.................................................................................tthhpphtthttstpththtttt...........................................................................................lhPIPpsplsts............ssLp..Q.N...s........Ga .....................................................................................................................................................................................................................................................................................................................................-.hh...................................................................................................................................sh.s...h.h.s.............s...p...h.h...p..a.................................................................................................................................t.s..p...D.....Rhth....s....hh.h....t..h..........................................................................................................................................................................................................................................................t...t.....t...t....s..h....s..s.h..h.....h.t.Kahs...................................................ttsstt.t...ss...t.sh...............................lh...RhAElhLhhAE....Ahs...................................c.hsss........................sp..............Ah.....phl.....N.p......l..........R..p..Ru.............................................................................................................................................................s.h...ss...h.s..s....................................................psth.h.p........tlhp.ERphE.Lsh.E....G....t.....R.ahDL.hRh.................................................................................................................................ttth..t..t..h..t.t.h.h...t..t..h.....t.......t..h.t.h.....t...................................................................................................................................................................hhP..I.P.p....s.p.l.p.hs......................st.lh..........Q..N...sua............................................................................................................................................................................................ 1 756 1474 1596 +7812 PF07981 Plasmod_MYXSPDY Plasmodium repeat_MYXSPDY Mistry J anon Pfam-B_3138 (release 16.0) Repeat This repeat is found in two hypothetical Plasmodium proteins. 21.10 21.10 41.10 21.60 15.70 18.60 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.69 0.72 -5.92 0.72 -4.22 24 71 2009-09-11 18:54:15 2004-11-24 15:52:40 6 2 1 0 71 71 0 17.00 79 98.85 CHANGED MYhSPDYTL.hVtLPDT MYFSPDYTLRLVQLPDT 0 71 71 71 +7813 PF07982 Herpes_UL74 Herpes UL74 glycoproteins Mistry J anon Pfam-B_3076 (release 16.0) Family Members of this family are viral glycoproteins that form part of an envelope complex [1]. 25.00 25.00 29.80 29.50 17.30 17.60 hmmbuild --amino -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.54 0.70 -5.79 4 191 2009-09-11 13:31:48 2004-11-24 15:59:50 7 1 15 0 0 100 0 200.00 65 81.24 CHANGED +VLSppGKpt.cKhK.-ILKpLhu.opDhY+FahhPopQ+lhNlslsMccFPpsYILAGPIpNcSITahWFDFYSTQLRKPAKYVYSEYNHTu+KITFRPPSCGTVPSMsCLSEMLNVS+RNsTGEcuCGNFTTFNPMFFNVPRWNTKLYVGSsKVNVDSQTIYFLGLsALLLRYAQRNCTRSFYLVNAMSRNLFRVPKYINGTKLKNTMRKLKRKQAPVKE.spKKSKKSQ.STTTPYSsYTTSTsFNVoTNVTYSsTssspRlsTSTIuYRPDSsFMKSIMTTQLRDLATWVYTTLRYRp-PFC+ssRNRTAVSEFMKNTHVLIRNETPYTIYGTLDMSSLYYNETMsVENETASDNNETTPTSPSTtFQRTFIDPLWDYLDSLLFLscIRNFSLQ.PsYGNLTPPEHRRAVNLSTL ..........................................pVLSphGK.cLcphKlEILKQL...ppDhYphahp.o+QtlpNlThs.MscFPph.Y.ILAGPIpNpSITYLWFDF..YSTQLRK..P.......A.........KYVYSpY.....NHT.A..+pIT.....FRP...PsCGTVPS............................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +7814 PF07983 X8 X8 domain Bateman A anon Pfam-B_374 (Release 16.0) Domain The X8 domain [2] domain contains at least 6 conserved cysteine residues that presumably form three disulphide bridges. The domain is found in an Olive pollen allergen [1] as well as at the C-terminus of several families of glycosyl hydrolases [2]. This domain may be involved in carbohydrate binding. This domain is characteristic of GPI-anchored domains [4,5]. 25.70 25.70 26.30 25.70 25.40 25.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.15 0.72 -3.49 189 1389 2009-01-15 18:05:59 2004-11-24 17:27:23 8 20 213 4 865 1350 0 72.40 35 18.91 CHANGED hhCVs.p.sss.....sspphtshhsasCu......st..s.-CsuI.sssus..........Css......ps+hSashNpYYppp...sp.s.ssuCsFsGsAs ....................WCVsp....s.ss.....spspLpss....lsaACut.........t....uDCss.Ipsuus............Cap.....shts+uSYAhNsYYQpp......sp..s.....sssCsFsGsA......... 0 131 493 696 +7815 PF07984 DUF1693 Domain of unknown function (DUF1693) Mistry J anon Pfam-B_3630 (release 16.0) Domain This family contains many hypothetical proteins. It also includes four nematode prion-like proteins. This domain has been identified as part of the nucleotidyltransferase superfamily [2]. 19.80 19.80 19.90 19.80 19.50 19.70 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.07 0.70 -5.27 11 296 2012-10-02 22:47:23 2004-11-25 10:17:43 7 7 97 0 182 265 0 287.70 57 67.88 CHANGED Lsa-QVpRLcslLsEslPIHGRGNFPTL-lp.+plV......................................................psVRu+LE-p......GlpV+DVRLNGSAASHVLspDsGluYKDLDLIFslsLss-ss.......FphlKDVVLssLLDFLPcGVsKEKloshTLKEAYVQKhVKV..ssDsDRWSLISLSNssG.KNVELKFVDSlRRQFEFSVDSFQIhLDSLLhFhcsupsshoEsFaPTVlGEShYGDFpEAhsHLpp+LIATRsPEEIRGGGLLKYCsLLVRsFRPuspp-lKsLERYMCSRFFIDFPDItEQpRKLEuYLpNHF..............hGt-.cs..................KY-YLMpL+cVVsESTVCLMGHERRQTLsLIohL .......................Lsa-QVpRLcplLsEslPIHG....R....G....NFPTLplp.+pIV......................................................psVRs+Lpcp......GItV+D..VRLNGSAASHVLtp..-s.........G..hu.............YKDLDLIFsl..-..L...s..-tp............FphVK-s.VLssLLDFL....PcG.Vs+cK.......I...os......hTL.KEAYVQKhV.KV....s.sDs.......DRWSLISL...S..Nss...G...KNVELKFVDSlRRQFEFSVDSFQIhL..DSLLhFa......ps....u.....p.....s..Ph.o.-.....s.F....aP..TVl..GEShYGDFpE...AhpHLp.p+lIuTRpP...E...EIRGGGLLKYCpLLVRsF+Ps.s.pt.-............l.K.sLpRYMCSRFFIDFsDls-Q......p+KLEsYLpNHF..................sG.-..pp...........................+YpYLhhL+pVVsE..STVCLMGHERRQTLsLIshL................................. 0 45 59 114 +7816 PF07985 SRR1 SRR1 Wood V, Mistry J anon Pfam-B_29119 (release 16.0) Domain SRR1 proteins are signalling proteins involved in regulating the circadian clock in Arabidopsis[1]. 20.70 20.70 20.70 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.75 0.72 -4.04 38 284 2009-01-15 18:05:59 2004-11-25 16:25:20 7 8 229 0 197 286 1 58.00 31 18.80 CHANGED cl.lClGLGshp.........pshsupaQLAhllhlhchh...pl.....splplY..DPlFsps-t...........phlp.slG ......phlChGLGshs................pshsupaQLAhl.lhlh-th.......ph.....................tps...la..DPlFoph-h...........phLppL........... 0 59 96 156 +7817 PF07986 TBCC Tubulin binding cofactor C Mistry J anon Pfam-B_4111 (release 16.0) Domain Members of this family are involved in the folding pathway of tubulins and form a beta helix structure [2]. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.65 0.71 -4.70 52 603 2012-10-02 17:34:32 2004-11-29 14:13:34 7 18 275 4 417 624 21 115.80 23 26.40 CHANGED spshplpshppCplhl......hsshsslplcsspssplhh.GsVsuSlalcsCpsspllssspQlRh+sspssshalpsso.pPII......EsspslpFus....ashhYstlptphtts.tls..sss.Wsp..ltDF .............................phhlpphppshlhl...................hsshss....lplcsspss.t.llh..G...............s...V...s...sulalcsCcssp..lh.....l.....u..C..p.....Q.............lRl............+ssps.splaLt......s..s..........o.....pP..II.......Ess....ps...l.pFuP............a.sh..hYstlptphtts....sls......sp..Wsp................................................... 0 171 246 344 +7818 PF07987 DUF1775 Bacterial_GLE1; Domain of unkown function (DUF1775) Mistry J anon Pfam-B_12641 (release 16.0) Domain Domain found in bacteria with undetermined function. Its structure has been determined and is an immunoglobulin-like fold. 25.00 25.00 27.60 27.50 24.30 23.90 hmmbuild --amino -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.91 0.71 -4.36 87 551 2009-01-15 18:05:59 2004-11-29 14:53:07 6 4 483 1 162 455 4 130.40 35 57.34 CHANGED HVol...ps..spAssGuat.phsh+VPpts-st.uTsclcVplPps..ltuspspPtPGWslpspcsshttsh....tt.hspsVsplsWouss....lsss..pascFslps.plP...tsssslsFsshQshscG....ss.cWs-.sts....Gt.-s.cp...............PAPslpls ................................................................HVol...ps....spuss.Guat.phsh+VPsEps.s..uTTKlplplPpG..ltht.ps...pPh...sGWphchpKss................sttlp.plsWpuss.....l.ss..papcFshsu.p.P......pctsplsasshQpYpDG.......slVcWs.........Gptcs..cp................PAPhhpl.h....................... 0 42 102 139 +7819 PF07988 LMSTEN Wos2; LMSTEN motif Wood V, Mistry J, Bateman A anon Pfam-B_4851 (release 16.0) Motif This region of Myb proteins has previously been described as the transcriptional activation domain present in the vertebrate c-Myb and A-Myb, but neither vertebrate B-Myb proteins nor Myb proteins of invertebrates. Because vertebrate B-Myb (but neither A-Myb nor c-Myb) can partially complement Drosophila Myb null mutants, this region appears to have been a relatively recent insertion. 19.30 19.30 23.60 26.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.24 0.72 -4.67 6 191 2009-01-15 18:05:59 2004-11-29 16:03:41 7 19 45 2 59 174 0 46.60 72 7.43 CHANGED csNhlphsQsusAhIQppasD.EDP-KEKRIKELELLLMSTENEl+pKp ..............clNIVNl.PQPuuAAIQRHYsD.EDPEKEKRIKELELLLMSTENEL+GQp.. 0 3 7 23 +7820 PF07989 Microtub_assoc Spindle_assoc; Microtubule associated Mistry J, Wood V anon Pfam-B_45034 (release 16.0) Domain This presumed domain has been identified in two microtubule associated proteins in Schizosaccharomyces pombe, Mto1 and Pcp1. Mto1 has been identified in association with spindle pole body and non-spindle pole body microtubules [1]. The pericentrin homolog Pcp1 is also associated with the fungal centrosome or spindle pole body (SPB) [2]. 21.90 21.90 22.10 22.00 21.80 21.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.40 0.72 -4.15 15 383 2009-09-11 00:31:27 2004-11-29 16:39:38 6 11 174 0 233 391 5 72.10 39 5.41 CHANGED TLRE.EpplscL+KENFsLKL+IaFLEEplp..pcss-sscclhKpNI-LKlEltoLc+-lpcpcchLpcAp+slcs .................sh+-.-pp..lscLcKENFsLKLRIaFLEEphp..............pphc.sstcchh+cNlE.LKV-ltoLp+-lpchcchLpcsp+slc.s........................................ 0 56 101 170 +7821 PF07990 NABP Nucleic acid binding protein NABP Wood V, Mistry J anon Pfam-B_10222 (release 16.0) Domain Many members of this family are putative nucleic acid binding proteins. One member of this family has been partially characterised [1] and contains two putative phosphorylation sites and a possible dimerisation / leucine zipper domain. 21.60 21.60 21.80 22.40 19.60 21.40 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.51 0.70 -5.19 10 106 2009-01-15 18:05:59 2004-11-30 09:14:21 7 8 21 0 60 101 0 289.90 33 38.22 CHANGED RVPSPCLsPIGs.RVuusDK+ssuusSsFNssoSulsESuDLluALSGMNLSsouulct.psh.pSQhpQDV-shpsYhFslQGGpspsNQ.Huahp+uDpuah.........................................+usssstsu.hpuspuSshssGuulss.aQp.hDusN.sh......suYulNPAluSMMtNQLGsuNhsPha-NsuAuSuhussuhDSR.hGuuhsSus...tusS-scNLsRlGNph..uGouLQSshsDPMYhQYLphsp.uAsusAshsDPSh-RNah.GoSYMDhLtlQKAYLuuLLt.QKpQYGlPh..KSuusssHuYYGsPuFG..GhuYPGSPLuoPslPsS.huPsSPlR+uEhNMRas...SuhRNh.GGlMGSWHhDs.sshD-sFuSSLLEEFKSNKTRuFELS .................................................................................................................................RsPust.sPlGs.+h.s.-.K+s.ss....pt.sos.hs-ss-lhsALSshshS...ssthh...st...tt...uph.p.plpp.pph..hhsh.ssppph.pp.pthhppsp....tt................................................................................................................................tsashsss...suhhhs..phs.ssshss..h-s..s..s.....uuuh.u..sthsSt..Gssh.st.....s.sp..shsthus.....usuhQs..hDPhYhQYhp...s.p.sstt.....hus.tDP.htps.ah.Gsuahsh.t.....hQKAalt.shh....t.QK.......Q..at.......s......pts..s.sYaGs..s.a...shsYsGSPls...sshhssS.hussS.PhRcs-h.shRhs...SshRNh.u..GshG........uWp.-h.t.ph-tshssShLEEFKoNKo+saELu................................. 0 8 34 48 +7822 PF07991 IlvN Acetohydroxy acid isomeroreductase, catalytic domain Bateman A, Griffiths-Jones SR anon Prodom_2380 (release 99.1) Family Acetohydroxy acid isomeroreductase catalyses the conversion of acetohydroxy acids into dihydroxy valerates. This reaction is the second in the synthetic pathway of the essential branched side chain amino acids valine and isoleucine. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.96 0.71 -4.88 45 4225 2012-10-10 17:06:42 2004-11-30 13:30:19 7 7 3943 20 1058 7052 5249 165.30 49 44.30 CHANGED lL+s.KplAlIGYGSQGHAHAhNLRDSGl.cVllGLRpGu....t.Shc+AcpcGFcVholsEAscpADllMlLlPDEhQspVYcpcIpPNL+cGs..sLsFuHGFNIHas.IpPPcsVDVhMVAPKGPGHhVRcpYpcGtGVPsLlAVa..QDso.GpAh-lALuYAKGIGusRAG ......................................h.lpGKKVAll.G.Y.G.S.Q..G..HA......aAh...NL..R...D........S.......G.l....-......V...s...l..G.L.R.p..G.u...............t....S.a...c....K..A.......c........p........s........G...........F...............c.................V.......h.....o.........l...s...E..........A....s....p...p...A....D...l...l..M...h...L..h..P...D..c..h..Q..u.c..l.a...t..p...c..lp..Pp..L.+pG.s..AL..u...F.u..H.G........F...N....I...H....a........s....t........l...p........s...........p.D.l....DV...hM.......V.AP.K.u.PGHh..V....RcpYp.....c...G.....t.....GVP..sLlAVa..QDs....o.....G....p.......AhslAluYAtulGGsRAG............................................................................................ 0 334 687 899 +7823 PF07992 Pyr_redox_2 Pyridine nucleotide-disulphide oxidoreductase Finn RD anon Manual Domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null --hand HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.67 0.71 -4.44 139 57441 2012-10-10 17:06:42 2004-11-30 13:43:45 9 725 5624 527 15622 68615 26122 259.00 18 57.57 CHANGED cllllGuGsAGhsAAhpht......phshc.lsll.............pp.sthsh.stsslspphhhpstthht.h....t.h.....................................pphh...tp.tslch............hht.ppshpl..shspt.......................t..h..........hphcplllATG.up.sth.....sls.G..................t.h.hh.ptltsupphhphhph............................sp...cllllGuGhlGlEhAthhpphGtcVsllcttsphhsth.sphttthhpphh..................................................hththhttttstpttsssphtththsstpphpsDhlllulGhpPsschh.....cphGlch..........tp..................pGhI..hl-c.....th..pT.......................s...ssslaAsGDsstht. .................................................................................................................................cllllGu......G..s.u...Ghs....sAh.t...hs........................pt.s...h....c....l....s..ll.........................................................pt.....s...............s......h.............h.......s....t....l.....s.....p...t...h...h....h....p..s...h..t.h..h....t.....h..............................................................................................................................................phh.t.t.h..t.......p....hs..h..ph.....................................ht......pp...s...h..pl.........thst.t...........................................................................................................t....t.t....p....h....p.h...c...t...l...l.l....A....s...G....u.......p....s.t.h.....................sls.G..........................................t..t....l.........s...s..t...t...h....h..t.h....h.........................................................................................sp.......pls...l...lGuG.huh-hu.hhtths.pshhh.hht.hh............................................................................................................................................................................................................................................................................h.hh.........tt.t......ttttt..hshhhhhhshhsssp.h....tths.l..p..h..........t.p..........................................................p.G...h...l........h..s......s.p..............h..........p.T......................................................................s......hs..s..laA..hGDsht...t........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 5062 9898 13194 +7824 PF07993 NAD_binding_4 Male sterility protein Griffiths-Jones SR anon Pfam-B_1115 (release 6.4) Family This family represents the C-terminal region of the male sterility protein in a number of arabidopsis and drosophila. A sequence-related jojoba acyl CoA reductase is also included. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.58 0.70 -5.45 102 3739 2012-10-10 17:06:42 2004-11-30 13:44:47 7 268 1310 1 2010 30893 13882 233.90 23 19.78 CHANGED lTGuTGFLGphllccLL..cssssh..clasL.VR.upssp....sutpRl.pphhphslap.h..............tp+lpslsGDlspsp...LGLsspshppL..sppl-lllHsAAsVpast..shpp..hhssNVhGTtpllcLAp.........p.h+ph......shhaVST........uhssstpt.th.c.....................................................htp.thshhtshss.sYshoKhhAEhllpptt...sLP....l.....sIhRPuhlsu...................................-shs.G..hhsshc.hhphlhhsht.tGhh.shhss.t.............hshlPVDhVspul ...............................................................................................lTGuT.GF.L...Gt.h......l....l.p......cL....L....p................p.......s.......s...................h......pl.....h...s..L.....l.R.....s...p..p......t...t............t...s........h...p...R.....l.......p.t..h....p.h.t..h....tth............................................hp+.l.p..s.l.s..G...D..l...s..p.......p.........L...G......L.............s....p....t...p.....h....p.t...L........h........p.................p......l..s........h.............l..l..H......s...A.........A........t.......l.......s.......a.......s.......t..............s.....h.....p...p.............h....h....t....s......N.....l....t....G....T..t....p....l....l....c....l...At.......................p..hpph..........thha...l..So........uh..s....s..s......t..t......t...h..............................................................................................................................................................................t.p....t......t..h..h...t....s..h.s...s....sY.....s.t...o...Kh..h....u....E.......h....l....l.........p......p.......t......t.......................................s........l....s..................s...................sl..h..R........s.uh.lsu.......................................................ps.h.s..G....h.h..s...s...h...s.......h....t.h...h.h.......h..s..h.t.....h.....s.....h..h...........t...h....h...s.p........................................hshlPVDhssph....................................................................................................................................................................................................... 1 595 1051 1665 +7825 PF07994 NAD_binding_5 Myo-inositol-1-phosphate synthase Bashton M, Bateman A anon Pfam-B_959 (release 4.1) Family This is a family of myo-inositol-1-phosphate synthases. Inositol-1-phosphate catalyses the conversion of glucose-6- phosphate to inositol-1-phosphate, which is then dephosphorylated to inositol [1]. Inositol phosphates play an important role in signal transduction. 25.00 25.00 28.60 28.40 20.90 20.70 hmmbuild -o /dev/null --hand HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.93 0.70 -4.92 37 1323 2012-10-10 17:06:42 2004-11-30 13:45:19 7 12 949 36 547 1220 416 337.10 33 89.86 CHANGED ltlhlVGhhGNsuoTllsGlhA.+cshshts............G.lsthushclG...............th.PhhsssshVluGaDI...sshplucshpc....Atshs.slpppltschp.....................shlssspup+hDslhs........tstsphcpspphtpDItcFtcpsslD......phllL.susTEp.........h.......shttl.tshp.sttp.lssSphYAhAul..h......tGssalNusPpssh.sP..uhh-hAccpsl..IsGDDhKoGpTth+osLs.hhlttshcspulsoYNhLGNsDGtsLos.pphRSKchoKSsl.ss.............................................p.hhphphsccssHhssIcYlPhlGDsKhAhDchpuclFhGup.plpl+.sscDShlAuPlllDLlhhsclspc.........................................+shtu.Vhsh.SYahKuPh......stthssh.cQttsl-shlR .................................................................ltlhllGh.GNsuoolhtGl.h.+ps.s.tt..................lsth..php..hG.....................shhs.s..hhuuaDl...ss...h...pl...upsh....tc.....Ahhhp..s.p.pl.t..ph..........................thlssp...ps.p..h.sslht..........................phpt.pt...DlhchhcpsplD........Vll.hssTtp..................................................hssophYAhAul........tsssF.lNu.Pp.hh.sP.......thhchhpc.tsl..IsGDDhKSt.sthhohhs.hhlhtshtsp..hohph.ssssh..Ls..pp.R.cphphSps.s.................................................ts.h..thttcssHhss.cYlshlsDpKhAhschpuphFhss..slphp.ps.DS..uAslllDhlhhsclutc.............................................cuhtu..lhs.h.uahhKuP..........s..hps....pththlcphh................................................................................................................... 0 192 351 473 +7826 PF07995 GSDH Glucose / Sorbosone dehydrogenase Mistry J, Bateman A anon Pfam-B_1863 (release 16.0) Domain Members of this family are glucose/sorbosone dehydrogenases that possess a beta-propeller fold. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.23 0.70 -5.36 164 3525 2012-10-05 17:30:43 2004-11-30 16:29:24 6 176 1857 34 1319 3362 3764 295.30 22 60.71 CHANGED LcpP...Wul......uFL.P....D.G.ph.LlTER.sGclplls.........sGph..pslsGl........P.pV..........hspG.....QGGLLDlslsPcF..sps........phlYloYu......................tss.....ss.ss...sT.sluRucL..sss.....pLp....shcslact...pPt.hss..stHaGuR.lsFs.....s.....DG.pLalohG-Rtp..........cs......pAQDh...ssphGKllRls.sDGolPsDNPFs........s.........ps..............suts.pIWSYGHRNsQ.GhshcspsGpLWspEHGP+GG......DElNllcsGtNYGWPlloaGhpYs....Gs.................lspt..sspsG.........hppPlh.hWsPo....IAPSGhsaYsG...ct.F.P.p......WcGslhlGuL.p.......sptLhRlpl.c.s...............sp....lsppE+lh.........................tshG.RlRDVppuPD.G.tlYllTD.........tsG...p.........llR ...................................................................................................................................................................h.h.hh.s.......s...t.....t......h.hl....s..p....G.t.lhhht.................ttth..............h.........th...................tl..........................t....s.....p.......t...G....l.h.s..l.s.h....t...P.t....a.....t......p..s.........thh.a.h.hs..........................................................t......ts...tlsph.ph...pts...................p..h...p......s......p....s....l..hp.t...............h.Pt......t..........s.pH...hG.sp...lh.Fs...............s..........-.....G...hLa.l..shG-ssp........................ht..............................tu..ps...h.........p.p......h........t...G...p........l.....l.......R...............l......s.......s...............s...............G..................p............h......s....s......NP.h..............................................................................ts....cl.a..uhGhRNs...............G..huh.......s.......s.......t.......s..........u...p......La...........h..s.....-...h..G......sc.th................................................D...E..l....s...h........l........p......t......G.........t......sY.GW.....Ph..h.t..h....s......p.hs.......................s..........................h.tt.....p.htt.........h......P.....h......h......h.....t.sp.....................u.....s.........G.h...h.....h.h....p...u...........p.........h...s..t.............................ap.tthh....huth..t..................s..plh....hh.h..p.s.........................t............h.h.t....t..hh...............................ttht...R..h....sl....h.......s.....c..G...lhl.ss.............................................................................................................................................................. 0 434 842 1113 +7827 PF07996 T4SS Type IV secretion system proteins Mistry J anon Pfam-B_4497 (release 16.0) Family Members of this family are components of the type IV secretion system. They mediate intracellular transfer of macromolecules via a mechanism ancestrally related to that of bacterial conjugation machineries [1][2]. 24.70 24.70 24.80 25.00 24.50 24.60 hmmbuild --amino -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.17 0.71 -4.45 50 544 2009-01-15 18:05:59 2004-12-01 12:38:11 6 3 398 1 96 471 20 189.20 20 79.43 CHANGED GIPVhDsus........hhpt....hppltphtpplpp..............hppQlpp.hcp.Ypu....loGscshGslh...sssshppslP.psapslhsth....sshuuluupsptlhpttphhss........tsstppptsptthspsststuhuppsYspsppRlsplppLhppIssspDsKshuDLQsRItsEpshlQs-ps+Lphhphhtpupppltpppppc .............................................GlPVhDsss..............htph..........hpphtphtpp.lpp.................hppQlpp.hc.......pphpu...............hT.Ghp..sh..sshh.........ps..sh.pphh..s.....ps.hps.l..hssh.......tss.uuhusp.hp.ph.......hp.p..phhs.....................h.spsttptspt.ph....ph...sts.h...s.h..spps.....hppsppchpplppLhpplspsp....D.K..thtDLpsplpsEpshlpsp...php....hph.hpthtps.pp.ph.pppt...................................................................................... 0 9 46 67 +7828 PF07997 DUF1694 Protein of unknown function (DUF1694) Mistry J anon Pfam-B_4517 (release 16.0) Family This family contains many hypothetical proteins. 25.00 25.00 26.90 32.50 24.60 21.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.25 0.71 -10.41 0.71 -4.17 33 780 2009-01-15 18:05:59 2004-12-01 14:05:26 6 1 736 2 85 413 0 120.90 33 82.58 CHANGED Mo-p....lccpLppuhaG..ssphcPDE+RpaLGoaRERVllulThsplppppsh..pplpptLpc....hps..hplhlNGplshs.hhspYl+lAsctslpaTlVsspptpo....shGlVls.ucpAlsp-plpl ....................Msc.....lpcpL.cthaG..s.plsPDEQR+YLGTFcERVhltlslspspssplp..pthhphLcs......tps....lplhlssplshs.ttshYlKhApcpssphTIVscpphpo.......shGLllp.uspAVsh-ph-l.............. 0 27 50 65 +7829 PF07998 Peptidase_M54 DUF1695; Peptidase family M54 Mistry J, Bateman A anon Pfam-B_4509 (release 16.0) Family This is a family of metallopeptidases. Two human proteins have been reported to degrade synthetic substrates and peptides [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.41 0.71 -4.54 5 267 2012-10-03 04:41:15 2004-12-01 14:39:37 6 3 198 3 176 401 24 127.40 29 50.09 CHANGED M-cIAFlYMGGcEacWLFFEVYDRVcRal+DVsLsVslVYAGRIKLPPGhLlRVpstsGalsMYsFEAVVEALYGKLVEM+sDVNDDSlTKIFGITTlPIGSRDpYFDIYKKYLGIpVslGNYsVLuLS..IKPFYTE..N+ELFlERVFKGVLHElGHLYGLSHCss.DCVMNPPsDL+DWD+RuPoYCNsCL+cLKR .........................................................................................................................................................................................................................................................................h.lh...-ha..........hp..hhhG.....s......thslh.h............h..........................s.t..p..l..h.....h...pRshKps.sHElGHhhGLpHCpp...pClMph.S..so...lp...-sDp+s.p.h.Cs.Chp+Lt...................... 1 64 100 138 +7830 PF07999 RHSP Retrotransposon hot spot protein Mistry J, Bateman A anon Pfam-B_4567 (release 16.0) Family Members of this family are retrotransposon hot spot proteins. They are associated with polymorphic subtelomeric regions in Trypanosoma. These proteins contain a P-loop motif. 19.50 19.50 19.50 19.60 19.20 19.40 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.68 0.70 -5.89 3 829 2012-10-05 12:31:09 2004-12-02 10:36:38 6 4 10 0 349 862 1 307.10 29 57.65 CHANGED YDSIYNA+WuYVMSGaNsEPLGMKVa...DGEPPchWTcEEVsVSHTP-DhsEP..LPR+GNLEIAVLTSQ+GWPashFh..pcllscDAsc.t..DaVFs.DVYIRREVhRVWalVKpRLspWLpS+Lp-+psPs..VLlGTPGIGKShuVGSFLLY+LLHYDAELLpIIAYsV+G.KAYVFaKsTss+sGpVTFYpchssulcAVc-LuR.....EslKGYIIYDVGKchcpPuPhsPPsGWusIVLoSPDhssYhEWucp+RAlRIhINCs-EsDLKAhsl..W+Klups.t.sPo-A+..plEsEWQEIcGRIcKVGPLLRaIl-.-uSYKtplcKIcEAlsEhScsccpSYhpVFssuspWcscKsochLARlVRV+sE.NGEpCcN...sPlSAYluQKhLshLRsWlspAphNcasuRpulRusttHAA.pFEKsGIaAFoplNsltsIs++LRtLP .............................................a-SlhpApWpaV.h...p...s........s.....t....................h....G.M.tVh.....t...G...p..............p........Wo.tps..s..............s..............p....t.c..s....p......................t........h..h.hVLoSphGWPat............................................t...c....salppE..RVW.lVcttlstW...h......t...t...............s..h..lll.GTPGIGKShusGShLLapLLHap.......st.L........hVsahh.ts.psalh.ptt.t....pV..hY.tt..shp.lpth.t......................tth..c.GalIhDhstt...........s...........s...t.W.uhlllosPp.ppapta...p.p....t..s....l.hhNC.pt.-h+Ahhs...W.......p.h.t...............................................................t.....................................Wp....l....ctR.hp.VGPl.Ralhs..t.t...a.ttchtthtthlt.h........ht..h..hh.......h..pt...p.lhclVp...t................s..........hs...h........................................................................................................................... 0 183 223 349 +7831 PF08000 bPH_1 DUF1696; Bacterial PH domain Mistry J, Bakolitsa C, Bateman A anon Pfam-B_4657 (release 16.0) Domain This family contains many bacterial hypothetical proteins. The structures of Swiss:A1SD03, PDB:3hsa, and Swiss:A3QB43, PDB:3dcx, show similarities to the PH or pleckstrin homology domain. First evidence of PH-like domains in bacteria suggests role in cell envelope stress response [1]. 21.50 21.50 21.50 23.30 21.30 21.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.41 0.71 -4.49 55 796 2012-10-04 00:02:25 2004-12-02 12:40:12 6 3 653 16 178 567 35 123.10 36 88.40 CHANGED MGlhsulhGsuutl..s.sclpcchsslLl...suEplphuaKllRDhhlFTsKRLIllDpQGlTG+Kl-apSlPY+u..IopFSlETA.GpFDLDuELKI.WlSupshPlp........hphp+stslhclppsLAphlh ............................hhpGlhGsuo.h..ssp.p.hpc.cl.t.clLl...csEpl.hu....aKhl....RD....hhlFTs+RLIllDtQGlTGKKspa+S..l..PY+u..IspaSlET....A....G.p.F.D..L..DuELKI.Wluut.p.hslp.........hpF.cp.spslhslppsLsphh.............................................. 0 63 119 152 +7832 PF08001 CMV_US CMV US Mistry J anon Pfam-B_4698 (release 16.0) Family This is a family of unique short (US) cytoplasmic glycoproteins which are expressed in cytomegalovirus [1]. 21.50 21.50 22.90 24.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.75 0.70 -5.31 8 91 2009-01-15 18:05:59 2004-12-02 13:34:17 6 1 12 0 0 75 0 218.80 30 93.92 CHANGED Mpl....hhshlhlsshsAlushpsshcE.uhph...hspssstttsR...........phthPPhP.......pt.pVpSpsupCVlcc.GsLsAlWplRGsF.....hP+....shspatsccocthhcltsPct-lsss.hs..lRa.....pVssclsaVhLplhPCppCppha.cCcPphclPWlPhhoShc.DlcRLaaEpRaLplhaVlhlphhthsLL...shhsApslathplthalR.RHt.hh...........ss.p.hp..tcpt .....................................................hh.hhhhhshhslsh......-.shph....ppsps.ptpc..........hphpLP.hP........pt.pVsSppupCs.lcs.GsLsAsWplcGs...F.....sP+.....shsphhtccust.hh+V.sPphps-.ss.hs..lRh.....pV.s...sclshVhlplhPC.ptCps.a.cCcPphpl.PWlPh.hsS.hp..DlcRLaaEcRaLphhaslhlphshhsLL...hhhlhpshahhhlt.al+.hph..........................s......................... 0 0 0 0 +7833 PF08002 DUF1697 Protein of unknown function (DUF1697) Mistry J anon Pfam-B_4800 (release 16.0) Family This family contains many hypothetical bacterial proteins. 23.30 23.30 23.30 24.20 21.00 23.20 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.13 88 961 2009-01-15 18:05:59 2004-12-02 15:33:07 6 2 861 4 233 734 24 130.00 31 75.33 CHANGED MspYlALLRGINVGGps+ls.Mu-L+shhpshGassVpTYIpSGNllFp.....sppstsplppplcptlccpaGhsssVllhotpclpplhsssPa..tt...tps....pphhlhFlppshst-.thtpltthpst..Echthss...psl.Y ........MtpYhhLLRGINVGG+N+ls.MA....-L+phL.psl.G.h.ppVc..TYIsSGNllFp.....op.......pstspltpclcshlpppasa.t...hshh.lhohp-hpthlpsh.Ph.hpt........-h.......tchplhFhspshshc.....lt...p.l.ts..h.p....hts..Etlhhspt.h................................ 0 76 156 197 +7834 PF08003 Methyltransf_9 DUF1698; Protein of unknown function (DUF1698) Mistry J anon Pfam-B_4787 (release 16.0) Family This family contains many hypothetical proteins. It also includes two putative methyltransferase proteins, Swiss:Q8EEE6 and Swiss:Q88MX8. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.89 0.70 -5.73 11 1216 2012-10-10 17:06:42 2004-12-02 16:27:41 6 2 1182 0 199 2993 1047 295.80 54 95.83 CHANGED FYQpIAp.ssLpcWLpTLPsQLssWp.pppHGcat+Wh+sLcpLPphpP-plDLpsu.VoscpcpslupGEpcc.LcplLRthMPWRKGPaslaGlHIDTEWRSDWKWDRVLPHlSPLpsRolLDVGCGsGYHMWRMlGEGAphsVGIDPopLFLCQFEAVR+LLGsDpRAaLLPLGIEQLPt.LpAFDTVFSMGVLYHRRSPLDHLhQLKsQLVpGGELlLETLVI-GDEssVLVPs-RYAQM+NVYFlPSAtALpsWLcKsGFsDVRlVDpslTos-EQRpT-WMpsESLsDaLDPpDsoKTlEGYPAP+RAlllA+K ...............................................................................................hYp.IAp.s.Lp.WLpoL.PtQlst.Wp...pptH.G.htpW...pslch.LP.p...........l.p..P....p.l.D...L..h.p....u.....Vs.scsppsLo..tG..p..h..cc....lcslh+s..L....hPW..R.K....G...PFp.L.....a..G...................lp......I....D..T...E.W....R.....S..D...W...K..W...D.R.V..L..P...........H......l......S...........s....L......s....G...R...o...lLDVGCG...uG....Y....H...h..W...R.M...l..........G...t...G................A..+..h...s...V.G..I..D......P...o....p.......L........F.............L...s...Q...........F..E..........A....l.R...K.........L..L....G.......s......D.......p...........R.......A..+...L..L...P.....L......G....I...E..Q.....L........P.....s.....L..........p.......A......F..D.T.V...F....S..M.G....V...L..Y..H......R.....R..S..P...L.......-..H....L...h....Q.L....K....s.Q.LV..stGE.L.V......L.........E.......T..........L...V....l-............G......D.......c.....................s......s....V......L.....................V....P....s.....D..............R........Y....A...Q......M.......R.......N.....V.......Y......F......I.P.....S.s.A..LKsW...L...cKsGF.h..D..l..Rl.lD...s.s...l....T....os...-....E.QR.+..T..-WM..s..sc.SLsDF...LD...PpD.oKTlEGYPAPpRAlllApK........................................................................................................... 0 46 104 155 +7835 PF08004 DUF1699 Protein of unknown function (DUF1699) Mistry J anon Pfam-B_4896 (release 16.0) Family This family contains many archaeal proteins which have very conserved sequences. 29.20 29.20 30.40 30.30 28.60 29.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.56 0.71 -4.62 13 59 2009-01-15 18:05:59 2004-12-02 16:44:29 6 1 11 0 59 60 0 127.60 53 95.70 CHANGED MKIRVVSSR-EItsLNsNE+lVHLAFRPSN+DlFpLVcoCP+lElIQlPpSYh+TlSKSIcMFLEMQpIpLlEGDVWGHRKDINEYYsVspsVI-+IpEh+sEGhSsEcIsEKls+EoKLuP-MltYILsp .........M+IRVVSS+-EI.sLNsNE+lVHLAFRPSNpDlFsLVcpCPclcslQlPpSYh+TlS+SIcMFL-MQtIpL.lEGDVWGHRKDINEYYplspsll-+Ic-L+s-GhosE-ItcKls+EoKLsP-hltYIlp.p............ 0 12 35 40 +7836 PF08005 PHR PHR domain Stogios PJ, Finn RD, Mistry J anon Stogios PJ Domain This domain is called PHR as it was original found in the proteins PAM (Swiss:O75592), highwire (Swiss:Q9NB71) and RPM (Swiss:Q17551) [1]. This domain can be duplicated in the highwire, PFAM and PRM sequence.\ The C-terminal region of the protein BTBD1 includes the PHR domain and is known to interact with Topoisomerase I, an enzyme which relaxes DNA supercoils [2]. 20.80 20.80 21.00 21.20 20.70 20.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.94 0.71 -4.05 18 549 2009-01-15 18:05:59 2004-12-02 17:17:51 7 40 122 5 326 477 0 142.80 41 14.32 CHANGED sRFpps...uspWsauup.sDuIpFsVD+c.IhlsGhGLYG.....up...u-YpsplcLh.......................................pcpsclLupscssa.s-u.ssssaclhFccPVplpsshhYsspAplpGsc.SthGssGhppVpssc.......VsFpFpsustusNGTsVpsGQIPEllaYs ............................pRFpps...uspWtYpGp..sDuIp.....F.....uVD+c..IhlsGhG........LYG............ut......u-YpscIclh.........................................................................................cpsssLupspsta...hsDu....suss..atlh..FccPVplpssshYsAs.s.h...l...pG.s.c...Sh.hGpcGhspVpsss.............VsFp...Fp....s.u.st.Ss....N.GTsVps.GQIPpllaY.................................................. 0 93 117 219 +7837 PF08006 DUF1700 Protein of unknown function (DUF1700) Mistry J anon Pfam-B_5023 (release 16.0) Family This family contains many hypothetical bacterial proteins and two putative membrane proteins (Swiss:Q6GFD0 and Swiss:Q6G806). 28.30 28.30 28.50 28.50 28.20 28.10 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.97 0.71 -4.98 18 1443 2012-10-01 22:34:14 2004-12-06 14:55:25 6 16 1095 0 127 829 10 161.70 25 81.69 CHANGED MsKppFLpcLcppLcplPccE+c-ILt-YEpHFh.GtpcGKoEcEIhccLGsP+pIAKElpAp.sIccs...........cppsohpNlh+AlhuslGLulhNhhlllhPhlhllsl...llulhlsuhsh......lhuPllLlhtulhsGhhphhhs..........lFhuIshsGlGlllsllsahlsKhha+Lhl+YL+WNlpllKG .............................................................Ms+pcaLppLcthL..c.p..L.Pcp.-.p.p-h.hp.YccaFtp.t.t....pG.sE...p-lltpLGsPcp.lAcE.l........h..u...p.....h..t..l.p.pt.....................................p.t..p.....s........p.......p.........h..............p....s..l............h........s....h.........l......u......L.....s.....l...............h..s.............h....h.....h.....l.....h..h..h.....h..h.hh.hh................ll.u.hh.h..h..sh.h.h..............l..h.s.s...h...h.h....l........h..t...s...h...h...h...hh......................lh....h.....u....l..h...h....h..u....hu..h..hhh...h...h..h.............h....hhchh.hhhhhth...h.a...................................................................................................................................... 0 46 82 107 +7838 PF08007 Cupin_4 DUF1701; Cupin superfamily protein Mistry J, Bateman A anon Pfam-B_5011 (release 16.0) Family This family contains many hypothetical proteins that belong to the cupin superfamily. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.29 0.70 -5.19 26 1723 2012-10-10 13:59:34 2004-12-06 16:31:36 7 28 1356 7 547 2097 365 287.10 32 65.62 CHANGED scpFLccaWp+KPLLI+puhs.....thts.hsss-.LttLupccclsu....Rll...spctspWpsppGPhpp......apphspptWoLLlQulspat.sstpLhpsFcFlPp.WRlDDlMlSa..........ussuG..GVGPHaDpYDVFllQspG++RW+lsp......pss.tphtsc.slphls.h..cshh.DhlLcPGDlLYlPsGhsHpGhu.s..pslsaSlG.hRsPsht-l...............................hsphs-hLhpph.sh...................tthpcst.psh.pssstltss.th.stltthlppllp..sssphtphhsphlsps..phph-.lh...s.h...................ptss...ltphh.csGshL.+pssh+ .............................................................................................................................................h..pFlcc.a.W.Q.K.+.Pl.l........l...+..............p.u.hs..........sa.h....s..s......l.....o.s--.....L.....s.s.L...Ah-p.-lco..................RLl.......sp.p......s.s...p.....W.....p...s.p.pG.P.h...p..p.....................asp.L.s....c.....p.....s....W.o.........LL.VQ.ul..spa.........t.s.s...........t......t.Lh.psFc.....h..lPs..WR.lDDlMlSa..................usP...........GG.....GV...G..P.HhD..p........Y..D...V..F...............l.l..Q......upG.+.RR......WRlGp..................................ph.p....p.p.h......p.....s.....p.......s.....s.l....h.......l.c.....sa...............-s.lh.....D........................L.E.......PGD..lLYlPPGasH.pG..huh-.........suh..saS...l...G....a..R..u...P...s...s..p..-..L....................................................l.s..shs.-a..lhppthst...................thapD.s....t...h....st....tp....s.u....pl....s...p....th...sp....l.....p..phhhph.lp....ps.p.p.h..p.p.ahG.phlops..+hphc......lh......h.....................p.tc......l.phh.tpG..l.t..................................................................................................................................................... 0 189 314 431 +7839 PF08008 Viral_cys_rich Viral cysteine rich Mistry J anon Pfam-B_4965 (release 16.0) Domain Members of this family are polydna viral proteins that contain a cysteine rich motif [1]. Some members of this family have multiple copies of this domain. 25.00 25.00 27.00 27.00 23.70 22.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.05 0.72 -4.41 15 41 2009-01-15 18:05:59 2004-12-06 17:12:16 7 4 10 2 0 37 0 80.50 32 56.19 CHANGED phcPsCIs..NacsChtosKPCC.ctpp.opGthsscEalCtcFsuGlCpPlpsIpNlphahELlccLN-TNFpELcppYap.sltpsts ..............t..pssCIs..shp.C..h..pos+PCC.p.pt.p.sphhsc-alChh.FGpG......lCpPlpslpNlphahpLhcplNpTNatELptpYht.sl....p....... 0 0 0 0 +7840 PF08009 CDP-OH_P_tran_2 TOM13; CDP-alcohol phosphatidyltransferase 2 Mistry J anon Pfam-B_51131 (release 16.0) Domain This domain is found on CDP-alcohol phosphatidyltransferases. These enzymes catalyse the displacement of CMP from a CDP-alcohol by a second alcohol with formation of a phosphodiester bond and concomitant breaking of a phosphoride anhydride bond. 21.50 21.50 21.50 21.90 21.40 21.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.10 0.72 -4.22 14 224 2009-01-15 18:05:59 2004-12-07 09:29:33 6 2 222 0 63 151 42 38.20 40 13.83 CHANGED Pc.LlLslhlhlslhhAhLIshPWhsLolhullYlholP ........lVlPlllhVslhlAhLlsaPWhoLulsulhYlh.LP 0 16 34 43 +7841 PF08010 Phage_30_3 Bacteriophage protein GP30.3 Mistry J anon Pfam-B_5273 (release 16.0) Family Proteins in this family are bacteriophage GP30.3 proteins. Their function is poorly characterised [1][2]. 28.20 28.20 28.90 52.80 27.30 28.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.69 0.71 -4.71 5 72 2009-09-13 17:33:30 2004-12-07 14:05:45 6 1 71 0 3 50 3 149.50 59 98.28 CHANGED M...DI+SGuuYPSCALSNFAPHsFVhDGVcCASMEGFLQSLKFKNPEMQc+VCuLVGKAAKF+GpKKsWaRsQ...TLYW+GlPhpRpS-AYQpLl-NAYsElu.QNcGFR+ALpAT+sooLTHSMG+s.KpsETVLTEpEFlssLsRLRDpL .......MSEL.EIRSNF.pWPSCA..LSNFApWPFVMDGIQFGGLEGFLQGCKVKNVE.QQR.RIFGL.SGL.AAQQ...sG..Ru..YARAQD...RGTLFWLGlPFSRYSsAWKELYTNAYFEAAlQN+GFRDALpASKGKlLKH.SMASuLTKcDTILTEuEFIDlLNhLRDpL.... 0 0 3 3 +7842 PF08011 DUF1703 Protein of unknown function (DUF1703) Mistry J anon Pfam-B_5377 (release 16.0) Family This family contains many hypothetical bacterial proteins. It has been identified as a member of the PD-(D/E)XK nuclease superfamily through transitive meta profile searches [1]. DUF1703 has the predicted secondary structure pattern of the restriction endonuclease-like fold core and contains an additional beta-strand at the C-terminus [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.37 0.72 -4.30 45 1376 2012-10-11 20:44:44 2004-12-07 14:49:26 6 10 264 0 217 1284 64 98.50 27 20.59 CHANGED thEsaYpsllYuahu...shsaplhsEsposcGRhDlhlp.......s.tshhalhEFK....tt.pptssp........cuLpQI+c+uYuppapsps...cplhplGlsFsscpcslhph .........................t.tEtaYpshh.hshht....hh..sa..hlpsEhcoupG.R........hDlhlp...........p...pch...sa...l.hEhK..............h..pts.sc....................cAlpQI.ccKpYstthptps..........pplhtlGl.sFsscptpl......................................................... 0 109 189 212 +7843 PF08012 DUF1702 Protein of unknown function (DUF1702) Mistry J anon Pfam-B_5312 (release 16.0) Family This family of proteins contains many bacterial proteins that are encoded by the UnbL gene.\ The function of these proteins is unknown. 25.00 25.00 123.50 123.20 19.80 19.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.93 0.70 -5.81 23 78 2009-01-15 18:05:59 2004-12-07 14:53:09 6 1 51 0 27 85 2 312.60 42 96.17 CHANGED Ms.shtslRRRlLTPsls....-TphssRGF+hKsssA+cpLEoVGpsFLpGYuaAlEARsss-scptLEplPschRGFAYEGAuMuhulLDuLsssuttR..........lsshLAGc.GstHsYMlaVGlGWAMARLP.RhhWssltss......DPLLRWLsLDGYGFHQAYF+TsRYVcpppRcpsasW..s.spscYssRAlDQGIGRALWFVsGoDP-hVAshlcpFPtsR+uDLauGlGLAATYAGGusssELptLtctAGtaRssLAQGuAFAAEARh+AGhlssHTclAsplLC.GhosppAAslspcsRsssss.sGshPA.YEsWRpcIAsphss ..................httlR+hlLsPsls....psphstRGF........phcss.su.pcpLEslGpsFlpGataAl-up.s.s-spppL-t.ls.schRGFAYEGAuMuhslhDuLss.s..ptpR..........htshLsGt......GptHsYhsYVGlGWAMARLP....RhhWtclhss...................cPLL+WLslDGYGFHpAYF+Ts+aVcppttss.asW.t..s.ssYssRAlDQGIGRALWFltGsDsstVushlppFsssR+uDLauGlGLAATYAGGssts.-LptLtctAGpa..pspLAQGuAFAAcARh+AGhlssHTclAsplLC..GhosppAAplsc-stss....sss.....su.shPA.YEhWRpcItsph.s... 0 11 19 24 +7844 PF08013 Tagatose_6_P_K Tagatose 6 phosphate kinase Mistry J anon Pfam-B_5149 (release 16.0) Family Proteins in this family are tagatose 6 phosphate kinases. 25.00 25.00 25.00 25.60 24.50 24.30 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.49 0.70 -6.04 31 1086 2012-10-03 05:58:16 2004-12-07 16:20:22 6 5 720 5 98 655 63 386.40 57 96.98 CHANGED Mps..L..sLlpp+KsGcshGIhSVCSAHPLVlEAAlcpAhpss.ssVLIEATuNQVNQhGGYTGMTPuDF+caVhplAcchGaPp-+llLGGDHLGPNsWpchPA-pAMspA-sLIpAYVtAGFpKIHLDsSMuCAsDP..ssLsDphlAcRAARLsplAEps....h.pphGtpsslYVIGTEVPlPGG..AtEsLs..plpVTsspAAtpTlpsH+pAFtptGL.ppsasRVIulVVQPGVEFcHpsVlcYcsppApsLophlcs.PthVFEAHSTDYQospAhppLVcDHFAILKVGPuLTFALREALFALstIEsELl...sstppusLtpshEplMhcpPp.WppaYpGssspt+ltR+YShSDRIRYYWscPclpsAlcpLhsNLsspslPLsLlSQYLPtQapulppGcLsscPcsLllc+IpplLpsYttACps .......................................hppllt.pHKAG.c........phGIhuVCSAH.PLVlEAAlpaspssp.p.lLIEATSNQVsQ..F...G.GYT.G.M.TPADFRpFVhplADplsFsp-.tlILGGDHLGP.....Np.....W.....Q.....p.......sA.s.....tA.Mtpus-LlKuYVtAGFpKIHLDsSMSC.t-DP....lPLs..s-..hV..AE..RAAhLs.....tsAEps.....s...pch.....tc..t.....pLsYV.IGTEVPVPGG..........tt.......ps............lp..p..ltlTps-sAtsTLcsHp+AFttpGL.sthhsRlIAlVVQPGV.EF.DHoslIcYQPtcApALuphl-.pht..hlaEAHSTDYQT.puhppLVtDHFAILKVGPALTFA.............LREAlFuLAtIEpELl.....ss...c...t...p...StlhpVlEcVMLc..cPpYWppaY+ss...h...ssphL......s.........h....t....YS...hS....DRl.....R........YY.WPcspIcsuhtpLhtNLt.sssIPL.slISQYLPhQa....+lppGcL...p...s...tP+pLIhs+I.DlLttY+huCt.t.................................................................................................................................... 0 21 45 68 +7845 PF08014 DUF1704 Domain of unknown function (DUF1704) Mistry J anon Pfam-B_5490 (release 16.0) Domain This family contains many hypothetical proteins. 21.80 21.80 22.80 22.10 20.70 20.20 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.88 0.70 -5.42 35 415 2009-09-10 23:48:06 2004-12-07 16:57:52 6 2 337 0 154 362 93 328.70 31 67.85 CHANGED +ccFhpsphphtPpapY+.......LshDstph+cpLasl.l-plcD....sslppLacchhcphsthlchLpshGo..ccFhatSlclYGsPscphhssAchlLchs.............thtcppscphsAppAschhppphcpa.....thpscVplSsslsAcAhVuusp...lhlspsshFScp-lpsLtpHElGVHllTTlNGppQP.L+hLShGhPssTtTQEGLAlluEahoGuhohpRL+pLAhRVlAl-thhcGtsFh-sFphLpcphshscccAFslTtRVaRGGG......FTKDhlYL+Ghhclhshh+p.hss.....ls.LhsGKhulcclsllp-LhpcGlLssPcalP.........hhps.sshsshhsa..hlsslc .......................................................................................................................ptF.tt..p.....Pththt.......hshss.t...hptph.tl.hch.pp..........ssltphhtp.hppht.hhphLp..sh........Gp...tFh..S.phYG..t.sscthhtss.hlhphs........................t..ppc.phhsuppsh..p..hh.pphppah........h..phpVpho-sllucA..h..suusp......lplss.....psh.ascp-lpsLhpHEhhVHlhTslNG.....ptQP..h...p....h....h....u....hG.......h.........Psss.....sTQEGLA.lLtE.h.l.s.t.s....hpRh+pL....shRVhA.ls.hh......pttsFhplFphlpcp.....sh................stpsuashssRVaRGussp.........s.sFTKDhlYLcGhlpllpahpp..hpp................hshL.h.sGKsohcDlchlcp..LsppGhltsP+ah.........thhc.hptLpshhpa..hht...p........................................... 0 31 54 100 +7846 PF08015 Pheromone Fungal mating-type pheromone Lee SC anon Bateman A Family This family corresponds to mating-type pheromone proteins. The homobasidiomycetes, or mushroom fungi, have arguably the most complex mating system of all known organisms. Many species possess a mating system known as bifactorial incompatibility, where two unlinked loci control the mating -type of an individual incompatibility loci (the A and B mating-type loci). Each A mating-type sublocus encodes a pair of divergently transcribed homeodomain transcription factors while the genes responsible for B mating-type activity encode lipopeptide pheromones and G-protein -coupled pheromone receptors [1]. 21.80 21.80 21.90 23.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.50 0.72 -3.15 38 68 2009-01-15 18:05:59 2004-12-08 15:46:22 6 1 9 0 20 59 0 61.10 24 97.63 CHANGED MDs..Fss.lshh.........................................h.ssts.......sssstsssphpshssDtERhssG...shsuaCVl .............MDs..Fso.lshh.................................t......tssss.s.......sssstssssh.slPsstE+hsuu...shsuaCVI........ 0 18 20 20 +7847 PF08016 PKD_channel Polycystin cation channel Bateman A anon Bateman A Family This family contains the cation channel region of PKD1 and PKD2 proteins. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.31 0.70 -6.06 13 1249 2012-10-03 11:11:44 2004-12-08 17:04:05 7 121 184 0 812 4460 432 268.20 19 25.62 CHANGED shuhtslsphpslapahppsLlstl.....puspo.tt.stp....pshlLGsPRLRQlRlcss...hhh.c.hhpphht....Cpsshu.sspDp..a...W....cthspsssthhhYps...........sspL..............shtpaGhlssYs.ouGYhh..Ls.tstppotctlstLpcppWLDppTRAlFl-aohYNsssNLFssloLlhEhPssGsslsshplcohsLhcassshsh.lhlh.llallhhlaashsEhhpltpct.hpYl+.osWNhL-hsllslsslsslltlhRphhssphhpphh.ssstsFhsFcpluphsphhpsltAhLlFLshlKlh+hlpFspshplhopTLpcuhc-lhuhulhlsllhlAYuthuhLlhGophssasshscuhlol.....hphlsssFsas......th.pss+hLGsLhasshhhlhhalLLNlFlulIp-sYspl ......................................................................................................................................s......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...t....h......h..u.....h...h...h.....h....h...h.l.....p....h...h...c...h.l......p...h.......t...p...h......s.......h....h...s......l...p.t...s...h....p...l..h...t.F.h.h..hh..h..ll..h.....hu.a...s..................h..u.....h..........l....l............h...........G...............s..............................h.............................p...............a...............p.............s.....h...p...s.h.....sh..........................ht.h...h.........u....s.............a.t..............................................th.......t.......h...t...............h....h.....u...........l...h.....h..hsa....h....h.h.h....h....h.....hl.ls.hhlull.tsapt.............................................................. 1 330 402 599 +7848 PF08017 Fibrinogen_BP Fibrinogen binding protein Mistry J anon Pfam-B_4323 (release 16.0) Domain Proteins in this family bind to fibrinogen. Members of this family includes the fibrinogen receptor, FbsA, (Swiss:Q8GIU3) which mediates platelet aggregation [1]. 27.30 27.30 27.40 27.80 26.90 27.20 hmmbuild -o /dev/null HMM SEED 393 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.92 0.70 -13.56 0.70 -6.26 4 85 2009-01-15 18:05:59 2004-12-09 16:47:47 6 15 47 0 45 87 8 228.40 17 25.45 CHANGED YMGVLGSTIILGSSPVSAMDSVGNQSQGNVLERRQRDAENRSQGNVLERRQRDAENRSQGNVLERRQRDAEN+SQGNVLERRQRDAENRSQGNVLERRQRDsEN+SQGNVLERRQRDsENKSQGNVLERRQRDAENRSQGNVLERRQRDAEN+SQGNVLERRQRDAENRSQGNVLERRQRDsENKSQGNVLERRQRDsEN+SQGNVLERRQRDAENRSQGNVLERRQRDsEN+SQGNVLERRQRDsENKSQGNVLERRQRDAENRSQGNVLERRQRDsEN+SQG................................NVLERRQRDAENKSQVGQLIGKNPLLSKSIISRENNHSSQGDSNKQSFSKKVSQVTNVANRPMLTNNSRTISVINKLPKTGDDQNVIFKLVGFGLILLTSRCGLRRNEN ............................................................................................................................................th.tt.p..tph.....tp..t.ptphttpt....ptph.t+tp.tph.p.t....cs..p....tph.....t.+tptphtt....+tptph......t.+t...ptphttc.s...pt..ph.t+tpp..phptps.......pt........ph.t+tpppspt+sp....tps..t+tpppstppsptp....s.t+tptps.t................ptptph.tptptphttptptph......tptptph.tptpt........ph.tptptth.t.pt...tth....p.th....ptth.....ptth.............................................................................................................................................................................................................................................................................. 0 16 19 38 +7849 PF08018 Antimicrobial_1 Frog antimicrobial peptide Lee SC anon Bateman A Family This family includes antimicrobial peptides secreted from skins of frogs. The secretion of antimicrobial peptides from the skins of frogs plays an important role in the self defense of these frogs. Structural characterization of these peptides showed that they belonged to four known families: the brevinin-1 family, the esculentin-2 family, the ranatuerin-2 family and the temporin family [1]. 21.20 21.20 21.30 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.51 0.72 -6.65 0.72 -4.04 26 221 2009-01-15 18:05:59 2004-12-09 17:23:23 6 3 48 0 1 216 0 23.70 54 39.09 CHANGED FLPhlhulAAphlPplhCsIoKKC .....FLPhlAulAAphlP+laCtIoKKC... 0 1 1 1 +7850 PF08019 DUF1705 Domain of unknown function (DUF1705) Mistry J anon Pfam-B_1101 (release 16.0) Domain Some members of this family are putative bacterial membrane proteins. This domain is found immediately N terminal to the sulfatase domain in many sulfatases. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.62 0.71 -4.72 114 2318 2009-01-15 18:05:59 2004-12-10 10:47:59 7 4 1307 0 257 1329 103 149.40 26 27.51 CHANGED hlhshhshl...hslhuhtalhKPlhhllllhuAhssYah.sYGllhDpsMlpNlhpTssuEAtsllohphllalllhGllP.uhllhplcl...phpshh+tlht+hh....hhlsulhllsslshhhapsauSlhRNp+.pl+thlsPsshlhushpahppphhpp ...............................................l.hh.hh.hl...ls.Lhu.h...hhhh+..hl....u.sl...l....l....l....h....SA...uA....p.Y....ah....h....hY....G.ll...Ispuh.ltslhp.TsssE.upp....ll.o.........ph....lLhllh..hul..L...s.sll..l.hhs+l......phs....h..h....+....s....h..h...h...+...lt.......shll...s.lll...lhhh...s...h....h.....psatthhcs.t..ph.......h..h.P.p.lhu.hphh.......t.................................................... 1 40 114 185 +7851 PF08020 DUF1706 Protein of unknown function (DUF1706) Mistry J anon Pfam-B_5540 (release 16.0) Family This family contains many hypothetical proteins from bacteria and yeast. 20.60 20.60 20.90 20.80 20.50 20.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.92 0.71 -4.75 42 927 2012-10-02 14:44:17 2004-12-10 11:49:08 6 4 794 0 99 520 21 149.80 35 96.98 CHANGED M.spspoKp-LltslppsapKlhs.hsslPcchpppth..t..........ts-+osp-hluYLlGWtpLlLpWhppp..ppGhtVthPspsYKWNpLGtLsppFaccYpph.shpchhthLppshpclhpLI-shSs-ELFsts.htWs......upWslG+alphNTsSPacshppKlR....+apKt .............Mtp.psKpELhtthppsapKhht.hsslscp..tpp.h...............phD+s.t-slsa.htWppL.lLpW.p..sp....ppGh.ps.hPs..ptapW...pp...hGtL.p.Fhppatph.olpphhthLppshpplhthI-shSp-ELFp..phhtWs......ushslhpahh.sTsu.ashhtpKlR+apK.h..................... 1 33 66 80 +7852 PF08021 FAD_binding_9 Siderophore-interacting FAD-binding domain Finn RD anon Manual Domain \N 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.45 0.71 -4.13 40 1962 2012-10-03 00:38:56 2004-12-10 14:18:49 6 12 1325 1 503 1509 90 109.60 34 37.53 CHANGED pVhpspcloPphtRlshsG.ssLssas..ss.ssDpalKLhFPpsuts.s.h.......ths.th........tt.+PhhRsYTlRphDssst....El-lDFVlH...ss........GPAusWAtpApsGDpltlsG.P.tush ..............................................pVhcspplosphhRlsls.G.....p......s......Lss.Fs...........st..shDs.al.KlhF...Pp...sssph...................h..st....t...hh....h.......stt.+Ps.RsYTsRth..Dt..t..tt...........ElslDFsl.H.....ss............................GsAusW.A.tp.Aps.GDplslsG.Ptut.h............................. 1 122 319 431 +7853 PF08022 FAD_binding_8 FAD-binding domain Finn RD anon Pfam-B_728 (release 4.2) Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.67 0.72 -4.16 19 2729 2012-10-03 00:38:56 2004-12-10 14:19:05 7 89 751 1 1659 2722 95 111.30 22 16.74 CHANGED hhssph.hpltlhs......ssllplphsKPpt....a+apsGpahalph..slo......phHPFoI.sSust........s-tlslhI+spusaTcpLpphhtp...............................................sthsph.+lhl-GPYGssu ........................................ht.......ph..hhs......ss.s..h.c..l.ph..p..p.sp.................hpacs...G.Qal..a.....lph...s..........s.........lu.....................p...........H.......PFT.l......s.S.s..s.p...................................p..s....t..l...sl....h..l..+s........h....G..s..a....Tpp.Lhphh...t.tt................................................................................................................................pl....hl-GPaGt..s..................................................................................................................................................................... 0 436 875 1356 +7854 PF08023 Antimicrobial_2 Frog antimicrobial peptide Lee SC anon Bateman A Family This family consists of the major classes of antimicrobial peptides secreted from the skin of frogs that protect the frogs against invading microbes. They are typically between 10-50 amino acids long and are derived from proteolytic cleavage of larger precursors. Major classes of peptides such esculentin, gaegurin, brevinin, rugosin and ranatuerin are included in this family [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.33 0.72 -3.63 18 473 2009-01-15 18:05:59 2004-12-10 16:58:22 7 2 54 1 0 469 0 32.20 38 49.17 CHANGED GlLsslKthAKssu....KslApshLsplsCKlotpC ..........GlhsslKshu....tssu....Kss....utslLcpluCKloppC... 0 0 0 0 +7855 PF08024 Antimicrobial_4 Ant antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of the ponericin family of antimicrobial peptides isolated from predatory ant Pachycondyla goeldii. The ponericin peptides may adopt amphipathic alpha-helical structure in polar environments. In the ant colony, these peptides exhibit a defensive role against microbial pathogens arising from prey introduction and/or ingestion [1]. 25.00 25.00 26.90 34.40 19.90 19.90 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.60 0.72 -6.65 0.72 -4.06 5 7 2009-01-15 18:05:59 2004-12-15 11:27:15 6 1 3 0 0 7 0 23.40 51 94.80 CHANGED WGoLlKhGlKLlPSVVGhFpKKKQ WGohhKhuhKLlPuVlGhh.KKKp. 0 0 0 0 +7856 PF08025 Antimicrobial_3 Spider antimicrobial peptide Lee SC anon Short protein clustering Family This family includes antimicrobial peptides isolated from the crude venom of the wolf spider Oxyopes kitabensis. These peptides, known as oxyopinins, are the largest linear cationic amphipathic peptides chemically characterised and exhibit disrupting activities towards biological membranes [1]. 25.00 25.00 88.20 88.10 19.50 15.60 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.69 0.72 -4.23 2 4 2009-01-15 18:05:59 2004-12-15 11:46:53 6 1 1 0 0 4 0 37.00 84 100.00 CHANGED GKhSshuKlLRuIAKhFKGVGKsRKQFKpASDLDKNQ GKFSsFuKILRSIAKhFKGVGKVRKQFKpASDLDKNQ 0 0 0 0 +7857 PF08026 Antimicrobial_5 Bee antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of antimicrobial peptides produced by bees. These peptides have strong antimicrobial and some anti-fungal activity and has homology to abaecin which is the largest proline-rich antimicrobial peptide isolated from European bumblebee Bombus pascuorum [1]. 25.00 25.00 28.50 27.90 23.40 23.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.20 0.72 -4.10 2 26 2009-01-15 18:05:59 2004-12-15 11:48:51 6 1 20 0 4 21 0 33.30 73 63.28 CHANGED aVPh.NsPpPGp.+PFPoFPGpGPFNPKIpWP......Ga .....Pl.psP.PGt.KPFPTFPGQGPaNPKI+hP.......... 0 1 1 4 +7858 PF08027 Albumin_I Albumin I Finn RD anon Pfam-B_100627 (release 16.0) Domain The albumin I protein, a hormone-like peptide, stimulates kinase activity upon binding a membrane bound 43 kDa receptor. The structure of this domain reveals a knottin like fold, comprise of three beta strands [1]. 20.20 20.20 23.00 22.70 18.20 17.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -11.06 0.71 -4.37 4 79 2012-10-01 22:06:18 2004-12-16 13:53:21 6 3 23 2 3 89 0 96.10 46 81.40 CHANGED LsVFLlAshoL.hFspK.luAsDCsGsCSPFEMPPCtSSsCRCIPlGLlsGaChpPSu.solhKMV-EHPNLCQScADCpKKGSGsFCARYPNPDIEYGWCFuSsSEA.-VFhpl...P..RshhK .................hhlhh..h.h..hp.htA..Cs.s..h...Cosat..h.s.sCt.o.s..s.....CR.C..lPh....sL.h.......sGh.Ch.Po.u....shsKhl-EHPNLCQScs-ChKKGSGNFCARYPNs.lcaGWCFtu.u-u.................................. 0 0 3 3 +7859 PF08028 Acyl-CoA_dh_2 Acyl-CoA dehydrogenase, C-terminal domain Finn RD anon Pfam-B_8101 (release 16.0) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.41 0.71 -3.91 37 2598 2012-10-01 23:33:27 2004-12-17 10:14:48 6 16 1047 24 864 19514 7084 132.50 23 33.66 CHANGED lshuussLGhA+uALssah-hsp.sRhp.....hsssths-pshsthplAcussclcAAchhl.csspt........scsGpth.s.t.pscspts....puhAschshsussplhpsuGupuhhpssPlQRhaRDl+ssssHshhs. ........................................................................................h..hsushl..Gl...A...cu...A......hc...t.......sh.p...hs....p..p+sp.............tusst..p..h...s....c....c..P......h..............s..........tp..l..G...c......h.......p.s...p.l...p....u....A...cs...h...l...h...p..s.u..c...t...h..p.t.h...................ht.p...s...p..t.....h......s..........t......t...p.....s......p..s..p..hs.............................ps.h..ss.chu....lp.s........s....s.p....l....ac.....h..sG.up.u.h.t.......t.......s..........p..........s...L...pRa......aRss+sh..Hs....s............................................. 0 165 466 687 +7860 PF08029 HisG_C HisG, C-terminal domain Finn RD anon Pfam-B_1550 (release 16.0) Domain \N 22.80 22.80 22.90 27.60 21.60 22.10 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.30 0.72 -4.04 39 1973 2012-10-01 21:59:08 2004-12-17 12:10:15 6 7 1931 6 566 1247 328 75.80 44 25.62 CHANGED tApphhhlhhNsPpspL-clht......lhPGhcuPTlosL.....scp..salAVpshlsccplhclhscL+plGApsIlVhsIpph ..................pA+cp+hlhhcsPp-+L-clhu.........lLPGhEsPTlhPL.............u-p.....phVAl+hVssEshha-sM-cLKslGApuILVhPIEKh...... 0 177 364 489 +7861 PF08030 NAD_binding_6 Ferric reductase NAD binding domain Finn RD anon Manual Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null --hand HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.09 0.71 -4.32 21 2341 2012-10-02 19:13:12 2004-12-17 14:53:52 7 96 341 1 1603 3081 161 162.10 21 23.27 CHANGED a-sllLluuGhGloshlSllp-lhpp.p..................................hctpphpFhWlsRc.uslchact.h..sEltphcpp.....plclcsYhTu.hps...............ssspsthhp......hpshp.......................................................................pthps......ht.........................sphphu.RPNhcpllpch.........tpsssplGVhsC....Gssshsccl+phss ..................................................................................................................................................................................................................................acsslLlu..uGhGlTshhullp...s..lh..pphp..............................................................................................tt.t..h.t.h.c..+..lh....h.lW.lsR.....c.tp..p.h....c.W.h.........t..s..h.h...........pplt..p.........tpp...............thl.p...l.p..h.....a..l..Tp......pp..................................................................tsh.ps.........................t.....................................................................................................................................................................................................p.........t...p.......................................................hph.ph.G..R.P.sh..pp.h.h.pph....................................t...p.......t...t....p....l..uV...hhC....GP.shspplcphs.t.................................................................................................................................................................................................... 0 440 839 1305 +7862 PF08031 BBE Berberine and berberine like Mistry J anon Pfam-B_649 (release 16.0) Domain This domain is found in the berberine bridge and berberine bridge- like enzymes which are involved in the biosynthesis of numerous isoquinoline alkaloids. They catalyse the transformation of the N-methyl group of (S)-reticuline into the C-8 berberine bridge carbon of (S)-scoulerine [1][2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.32 0.72 -4.05 67 2389 2012-10-02 00:48:38 2004-12-20 08:43:13 7 38 719 49 1461 2372 41 47.30 32 9.22 CHANGED uYlNahDhDls..........................asppYat.sN.apRLhplKspa..DPsNhF+ppQSIs ...................................sYlNah.-.h.s.s................................sa...tpt.aaG...sN....as....RLtplKp+a..DPpslF.p.....psl............. 0 342 871 1216 +7863 PF01238 PMI_typeI Phosphomannose isomerase type I Finn RD, Bateman A anon Prosite Family This is a family of Phosphomannose isomerase type I enzymes (EC 5.3.1.8). 20.20 20.20 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.26 0.70 -5.49 8 4066 2012-10-10 13:59:34 2004-12-31 11:15:28 16 20 2915 8 847 2789 266 273.80 26 82.30 CHANGED LF+LpsuhppYsWG.hGspSAlAchhuho...sPS...lttsKPYAELWMG.THPpuPS+..lhssp.....l+slshsphhAhhsEslu....c+a..uu..pLPFLFKVLSlccsLSIQsHPsKc.uch..........LHstsP+NYPD-NHKPEhuIAlTsFcuhCGF+.htplsp.LtpssElpplIsscsusphppshph.s..ts.-shp.ph.lLpslFuplhsoss-clpppsshLlcptpsps....ushpthD........hsplIpRLpp.aPsDlGlFs..hhLNhhcLpsGEAhFLc..........AssPHAYlpGDhlECMAsSDNsVRAGhTPKahDVssLspMLsYpapss...-ctcht....pc...shhpu........SlLasP.Plt-Fulhps...slc..sGcphl.sh.susSILlsstGsuplhsusp .........................................................................Lps..hp.phhW.......G..............stltp..h.h...u..hp.........Ps................ps...hu.EhW.....hh..AH.Pp.u.s......St.......l.h...sut........................hps.h.....h...h...s..p..h.....h.....s...........h.....t..c.h.h.s.........................p.p.h......sp........hPh...LhK.........lLsAppsLSlQVHPs.......................................................c.......s......Yt........t....H....c.s..Ehs.....................ps.s...ha..................h........p..ht...h.s.p.l.l.............s......h.....t.s..t....p...h....t...........t.h..........h.......t...h...........p.............pt.h........htt............F.....hl....s.s.......s.......c.......t.....t....t.h..hh.l....h....p....t........p...s...p.s............sp..h...h...............................h....h............t.....s.u..h............h.....................s..........sh...........................................h....s....hh.t.....................................................................................................................................................................................................t................................................................................................................................................................................................................... 0 291 516 721 +7864 PF08032 SpoU_sub_bind RNA 2'-O ribose methyltransferase substrate binding Finn RD anon Pfam-B_742 (release 16.0) Domain This domain is a RNA 2'-O ribose methyltransferase substrate binding domain. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.37 0.72 -3.81 127 7167 2012-10-10 14:40:03 2005-01-04 15:00:01 7 11 4492 7 1567 4691 2075 74.30 22 27.20 CHANGED hlhGh+sVhcALps...t..p...lpcl..alpps.....t.p.........plhphsppp..sltlthlscpt.Lsplst.sssHQGllAhlp.hphh ..............................................hlhGh+sVppuLps............s..p......lpcl..altcs.......tstp..............h..ppll.phhppp.......tl...t......l.p...h..l....sc..p.t....Ls..p.....h...u.........s...s.......s...s.....HQ.....Glhuhlp.h...t.................... 0 500 998 1314 +7865 PF08033 Sec23_BS Sec23/Sec24 beta-sandwich domain Finn RD anon Manual Domain \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.06 0.72 -3.47 141 1441 2009-01-15 18:05:59 2005-01-05 11:01:17 7 32 326 24 954 1393 15 91.90 29 10.61 CHANGED uapushcl+sSpsl+lsshhGshhst........s........................sss..hphsslssspohslhh....ch................................stp..lstt.ptsalQhshhYpss.sG.p+RlRVpTlshshs .........................uFpAshclRsS.p.s......l..+lsshh.Gshhst..............s...........................................sss....hph..ssls.scpshu.l.hch................................sss....lsps..stshh....QhshhYoss..sG.p.RRlRVpTlshsh............................ 0 314 512 772 +7866 PF08034 TES Trematode eggshell synthesis protein Ebersberger I, Finn RD anon Ebersberger I Domain This domain has been identified in a number of distantly related species of trematodes. This protein domain is crucial for eggshell synthesis in trematodes (Ebersberger I). 21.20 21.20 21.40 22.20 19.60 19.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.18 0.72 -3.90 13 56 2009-01-15 18:05:59 2005-01-05 16:40:20 6 1 7 0 17 90 0 66.20 37 27.38 CHANGED hpucGcFtupGspccGspappsTpF++GGGhDpYG+K+pa.scY-ThGphK+Yusphhps+FDlhGpL ............pu+G+hpupGptchGsp.ppsTpFphtGthspYG++.K.pa.ucacTpG+.KKYuc+hhcs+FDlhGsL.......... 0 17 17 17 +7867 PF08035 Op_neuropeptide Opioids neuropeptide Bateman A, Lee SC anon Prosite Family This family corresponds to the conserved YGG motif that is found in a wide variety of opioid neuropeptides such as enkephalin. 20.40 20.40 20.40 22.50 20.30 19.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.24 0.72 -4.37 14 584 2009-01-15 18:05:59 2005-01-07 15:32:49 6 5 291 0 25 585 0 23.60 87 11.63 CHANGED YGGFM+s..ERuQ.pPLlTLFKNlhhKss.cp ..YGGFMTP..ERSQ.TPLhTLFKNsIlK......... 0 1 4 9 +7868 PF08036 Antimicrobial_6 Diapausin family of antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of diapausin-related antimicrobial peptides. Diapause during periods of environmental adversity is an essential part of the life cycle of many organisms with the molecular basis being different among animals. Diapause-specific peptides provide anti-fungal activity and act as N-type voltage-gated calcium channel blocker [1]. 25.00 25.00 27.90 29.50 20.00 17.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.27 0.72 -4.20 3 10 2009-01-15 18:05:59 2005-01-07 16:23:30 6 1 6 1 5 5 0 39.60 52 63.36 CHANGED VRVGPCDQVCSRIsPEKDECCRAHG+uGHAoCShGGMpC ..VRVtsCDpVCuRIssERDECCRAHGY.pGh..u...CpsGph.C............. 0 5 5 5 +7869 PF08037 Attractin Attractin family Lee SC anon Short protein clustering Family This family consists of the attractin family of water-borne pheromone. Mate attraction in Aplysia involves a long-distance water-borne signal in the form of the attractin peptide, that is released during egg laying. These peptides contain 6 conserved cysteines and are folded into 2 antiparallel helices. The second helix contains the IEECKTS sequence conserved in Aplysia attractins [1]. 25.00 25.00 28.80 93.30 18.40 17.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.11 0.72 -4.19 2 5 2009-01-15 18:05:59 2005-01-07 16:32:56 6 1 5 1 0 6 0 54.20 59 89.14 CHANGED CDIt.hTSpCpMpapsCt-AsuCsslIEECKTSh.EcC..pphpSstuSTTltPp CDIGNITSQCcMQHQNCuDAsGCsTlIEECKTSMVERCQNQpF-SuSuSTTLGPQ 0 0 0 0 +7870 PF08038 Tom7 TOM7 family Lee SC anon Short protein clustering Family This family consists of TOM7 family of mitochondrial import receptors. TOM7 forms part of the translocase of the outer mitochondrial membrane (TOM) complex and it appears to function as a modulator of the dynamics of the mitochondrial protein transport machinery by promoting the dissociation of subunits of the outer membrane translocase [1]. 21.20 21.20 23.10 23.40 17.50 17.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.98 0.72 -4.50 24 216 2009-01-15 18:05:59 2005-01-07 16:46:53 7 6 183 0 140 201 0 41.70 39 54.60 CHANGED KERlshlhchu+sssHYGaIPLllYLGhspsss...P.......olhpLL ..........K-Rlsplhchu+sshHaGFIPhVlYLGappssss....P.......olhpLL.................. 0 39 70 116 +7871 PF08039 Mit_proteolip Mit_preoteolip; Mitochondrial proteolipid Lee SC anon Short protein clustering Family This family consists of proteins with similarity to the mitochondrial proteolipids. Mitochondrial proteolipid consists of about 60 amino acids residues and is about 6.8 kDa in size [1]. 25.00 25.00 25.90 25.80 19.20 19.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.94 0.72 -3.99 2 43 2009-01-15 18:05:59 2005-01-07 16:49:16 6 1 26 0 17 44 0 55.50 65 94.05 CHANGED MLQSlIKplWIPMKPYYTpsYQEIWlGhGLMuaIVYKIRuADKRSKALKASusAP..GHH .......MLQSlIKNVWlPMKPYYTQVYQEIWVGMGLMuaIVYKIRSADKRSKALKu.u.sAP..GH.......... 0 2 2 4 +7872 PF08040 NADH_oxidored MNLL subunit Lee SC anon Short protein clustering Family This family consists of the MNLL subunits of NADH-ubiquinone oxidoreductase complex. NADH-ubiquinone oxidoreductase is involved in the transfer of electrons from NADH to the electron transport chain. This oxidation of NADH is coupled to proton transfer across the membrane, generating a proton motive force that is utilised for the synthesis of ATP [1]. MNLL subunit is one of the many subunits found in the complex and it contains a mitochondrial import sequence. However, the role of MNLL subunit is unclear [2]. 20.00 20.00 20.30 33.40 19.90 16.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.81 0.72 -4.35 5 67 2009-01-15 18:05:59 2005-01-07 16:50:39 6 1 53 0 33 69 0 56.50 50 88.18 CHANGED MVNLhthsR-HWValLVPL.GFVlGpYLDRppDERLTAFRNKSALYutRELKPGE-sTWK ..................hshhthhp-+WlallVPh...GF...llGpYLD++pDE+LTsFRNKShLat.R..-LpPsEcsoW..... 0 7 10 21 +7873 PF08041 PetM PetM family of cytochrome b6f complex subunit 7 Lee SC anon Short protein clustering Family This family consists of the PetM family of cytochrome b6f complex subunit IV. The cytochrome b6f complex consists of 7 subunits and contains 2 beta hemes and 1 chlorophyll alpha per cytochrome f. It is highly active in transferring electrons from decylplastoquinol to oxidised plastocyanin [1]. 23.30 23.30 29.40 28.80 23.10 23.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.13 0.72 -4.46 21 131 2009-01-15 18:05:59 2005-01-07 16:53:32 6 2 113 9 56 116 5 31.40 46 45.33 CHANGED uE..IhssAslshsLlLlGLuhGFhLLKl.Q...G..E ......uE...IFssAslhhsLlLVGLAlGFlLLKl.Q...u...t...... 0 15 41 53 +7874 PF08042 PqqA PqqA family Lee SC anon Short protein clustering Family This family consists of proteins belonging to the coenzyme Pyrroloquinoline quinone A (pqqA) family. PQQ is the non-covalently bounded prosthetic group of many quinoproteins catalysing reactions in the periplasm of Gram-negative bacteria. PQQ is formed by the fusion of glutamate and tyrosine and synthesis of PQQ require the proteins encoded by the pqqABCDEF operon but details of the biosynthetic pathway are unclear [1]. 21.10 21.10 21.40 22.70 19.40 19.20 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.97 0.72 -6.22 0.72 -4.20 26 215 2009-01-15 18:05:59 2005-01-07 17:13:00 6 1 181 0 84 150 7 20.90 53 63.51 CHANGED M...........tWspPphs-lplGhElshY ..................M...........tWo+Psas-lRlGhEVThY. 0 19 39 63 +7875 PF08043 Xin Actin_bind_SAA; Xin repeat Wu X, Finn RD anon Wu X Repeat The repeat has the consensus sequence GDV(K/Q/R)(T/S/G)X(R/K/T) WLFETXPLD. This repeat motif is typically found in the N-terminus of the proteins, with a copy number between 2 and 28 repeats. Direct evidence for binding to and stabilising F-actin has been found [1] in the human protein Swiss:Q702N9. The homologues in mouse and chicken localise in the adherens junction complex of the intercalated disc in cardiac muscle and in the myotendon junction of skeletal muscle. mXin may co-localise with Vinculin which is known to attach the actin to the cytoplasmic membrane [1]. It has been shown that the amino-terminus of human xin (CMYA1) binds the EVH1 domain of Mena/VASP/EVL, and the carboxy-terminus binds the, for the filamin family unique, domain 20 of filaminC [4]. This confirms the proposed role of xin repeat containing proteins as F-actin-binding adapter proteins. 20.00 20.00 23.60 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.10 0.74 -6.17 0.74 -3.83 60 1090 2009-01-15 18:05:59 2005-01-10 11:10:43 7 19 36 0 561 928 0 16.00 62 7.03 CHANGED GDVpss+alFETpPLD ..GDV+os+WLFETQPLD. 0 22 78 230 +7876 PF08044 DUF1707 Domain of unknown function (DUF1707) Lai X, Finn RD anon Lai X Domain This domain is found in a variety of Actinomycetales proteins. All of the proteins containing this domain are hypothetical and probably membrane bound or associated. Currently, it is unclear to the function of this domain. 26.30 26.30 26.50 26.40 26.10 26.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.39 0.72 -4.21 59 938 2009-09-10 22:21:14 2005-01-10 16:55:33 6 10 341 0 294 719 4 52.70 37 25.50 CHANGED hRsuDsDRppssphLspAhAcGpLshsEa-cRlstAhsAcThu-LssLhsDLP ....hRsuDs-RcpshphLpsAhucGcLshsEa--RlspAhtApThu-LsslhsDLP.... 0 125 230 276 +7877 PF08045 CDC14 Cell division control protein 14, SIN component Wood V, Mistry J anon manual Family Cdc14 is a component of the septation initiation network (SIN) and is required for the localisation and activity of Sid1. Sid1 is a protein kinase that localises asymmetrically to one spindle pole body (SPB) in anaphase disappears prior to cell separation [1] [2]. 20.40 20.40 20.40 21.00 20.20 20.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.72 0.70 -5.26 13 152 2009-01-15 18:05:59 2005-01-11 12:58:53 6 7 134 0 108 153 0 243.70 30 80.45 CHANGED MEslLupuhDpLsopcsscIppGL+Ql-ulLuplshshsptppssh.............................tsssuhcE.FhpLQ-uFpaNlA..p+Llssl-hlhspss..pu......psshLlhs.lcLlQGlLLLHPsSRslFu.RctsMshlLcLL.............csssssslpsuslpTLlshLLcsPsNhRsFEclsGLtplsphFKhpps............................sp-l....+hKIlEFLhFYLhsEssshs.............................psttstt..t.t.......p+TspEKppLlt+hhsp.l-uLV--Lp-hpsh .......................................................................................................................................h.hthp.L..p.....pl+pGL+pscuhLuplphpt.p..t....t......pt....s..........................p.lt..hp.ps.uh.p-.F.hLQc.sF..paNlt........cLls.s.......L...-..clhupt.s..ph.............psDhl..l...hssLcllQGshLLHPsS+sLFs.+chhhpl.LLcLL......................................ps.s.s..sslQuu.sL.oLlshLlDsPsNpR.sFEphsGL.pVssLhKp+ps............................sccl+hKllEFLhhYLhsE.s..s................................................................................p..hh.............................................................................. 0 29 63 93 +7878 PF08046 IlvGEDA_leader IlvGEDA operon leader peptide Lee SC anon Short protein clustering Family This family consists of the leader peptides of ilvGEDA operon. The expression of the ilvGEDA operon of E coli K-12 is multivalently controlled by the three branched -chain amino acids. Regulation is thought to occur by attenuation of transcription in response to the changing levels of the cognate tRNAs. Transcription of this operon is usually terminated at the end of the leader (regulatory) region [1]. 25.00 25.00 49.30 49.30 17.90 17.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.31 0.72 -7.59 0.72 -4.80 2 92 2009-09-11 14:17:33 2005-01-11 13:56:14 6 1 92 0 8 16 0 31.90 94 95.19 CHANGED MphllQVIsLVlISVVVIIIPPCGAALGRhKA .MTALLRVISLVVISVVVIIIPPCGAALGRGKA. 0 1 4 6 +7879 PF08047 His_leader Histidine operon leader peptide Lee SC anon Short protein clustering Family This family consists of the leader peptide of the histidine (his) operon. The his operon contains all the genes necessary for histidine biosynthesis. The region corresponding to the untranslated 5' end of the transcript, named the his leader region, displays the typical features of the T box transcriptional attenuation mechanism which is involved in the regulation of many amino acid biosynthetic operons [1]. 25.00 25.00 32.40 32.30 17.00 14.90 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.31 0.72 -6.69 0.72 -4.51 4 51 2009-01-15 18:05:59 2005-01-11 13:57:05 6 1 50 0 5 10 0 15.70 96 100.00 CHANGED MsRVQFKpHHHHHHPD MTRVQFKHHHHHHHPD 0 1 2 4 +7880 PF08048 RepA1_leader Tap RepA1 leader peptide Lee SC, Rossi R anon Short protein clustering Family This family consists of the RepA1 leader peptides. The frequency of replication of IncFII plasmid NR1 during the cell division cycle is regulated by the control of the synthesis of the plasmid-specific replication initiation protein (RepA1). When RepA1 is synthesised, it binds to the plasmid replication origin (ori) and effects the assembly of a replication complex composed of host proteins that mediate the replication of the plasmid [1]. The tap gene encodes a 24-amino acids protein. The translation of tap is required for translation of repA. 25.00 25.00 31.00 38.80 16.60 15.50 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.84 0.72 -6.96 0.72 -3.89 5 34 2009-01-15 18:05:59 2005-01-11 13:58:01 7 1 28 0 2 20 0 24.40 70 100.00 CHANGED MLRKlQYhFLCHLLLPCNISAGRCD MhtKlQ.hFLppLLL.CIVSAGhCD. 0 0 0 2 +7881 PF08049 IlvB_leader IlvB leader peptide Lee SC anon Short protein clustering Family This family consists of the leader peptides of the ilvB operon. This region encodes a potential leader polypeptide containing 32 amino acids, 12 of which are the regulatory amino acids valine and leucine. A model for the multivalent regulation of this operon by valyl- and leucyl-tRNA is proposed on the basis of the mutually exclusive formation of five strong stem-and-loop structures in the leader mRNA [1]. 25.00 25.00 33.50 31.20 18.10 17.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.66 0.72 -4.20 4 116 2009-01-15 18:05:59 2005-01-11 13:59:13 6 1 113 0 11 28 0 31.90 83 99.14 CHANGED MNsShhNusLLsTA.sAAVVVVRVVVVVGNAP ............MssSMlNApLL.sTAPSAAVVVVRV...V..VVVGNAP 0 1 4 7 +7882 PF08050 Tet_res_leader Tetracycline resistance leader peptide Lee SC anon Short protein clustering Family This family consists of the tetracycline resistance leader peptide. The presence of 3 inverted repeats which can form 2 different conformations of mRNA suggests that the tetracycline resistance (TcR) region is regulated by a translational attenuation mechanism. A Rho-independent transcriptional terminator structure is present immediately after the translational stop codon of the TET protein [1]. 16.30 16.30 16.30 16.30 16.20 16.10 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.11 0.72 -6.36 0.72 -4.32 4 14 2009-01-15 18:05:59 2005-01-11 14:00:00 7 2 12 0 2 9 0 18.50 79 22.44 CHANGED MKCpKMNRVQLKEGSVSMsL MKCNECNRVQLKEGSVShsL. 2 1 1 2 +7883 PF08051 Ery_res_leader1 Erythromycin resistance leader peptide Lee SC anon Short protein clustering Family This family consists of erythromycin resistance gene leader peptides. These leader peptides are involved in the translational attenuation of erythromycin resistance genes. Interestingly, the consensus sequence of peptides conferring erythromycin resistance is similar to that of the leader peptides, thus indicating that a similar type of interaction between the nascent peptide and antibiotics can occur in both cases [1]. This family also includes a small number of regions from within larger proteins from actinomycetes. 22.80 22.80 23.10 23.10 22.60 21.00 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.79 0.73 -6.06 0.73 -4.12 10 16 2009-01-15 18:05:59 2005-01-11 14:00:36 6 2 13 0 8 11 0 15.00 72 8.77 CHANGED MLISGTAFLRLRTNR hhhSGsAaLRLRTsR 0 1 4 6 +7884 PF08052 PyrBI_leader PyrBI operon leader peptide Lee SC anon Short protein clustering Family This family consists of the pyrBI operon leader peptides. The expression of the pyrBI operon, which encodes the subunits of the pyrimidine biosynthetic enzyme aspartate transcarbamylase. is regulated primarily through a UTP-sensitive transcriptional attenuation control mechanism. In this mechanism, the concentration of UTP determines the extent of coupling between transcription and translation within the pyrBI leader region, hence determining the level of rho-independent transcriptional termination at an attenuator preceding the pyrB gene [1]. 25.00 25.00 34.20 34.20 17.90 15.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.20 0.72 -4.15 2 185 2009-01-15 18:05:59 2005-01-11 14:01:14 6 1 184 0 5 32 0 38.10 90 91.70 CHANGED MVQCVRHFVLPRLKKDAGLPFFFPLITHSQPLNRGAFFC.GVRR MVQCVRH.VLPRLKKDAGLPFFFPL.hT.popPLN.................. 0 1 1 5 +7885 PF08053 Tna_leader Tryptophanese operon leader peptide Lee SC anon Short protein clustering Family This family consists of the tryptophanese (tna) operon leader peptide. Tna catalyses the degradation of L-tryptophan to indole, pyruvate and ammonia, enabling the bacteria to utilise tryptophan as a source of carbon, nitrogen and energy. The tna operon of E. coli contains two major structural genes, tnaA and tnaB. Preceding tnaA in the tna operon is a 319 -nucleotide transcribed regulatory region that contains the coding region for a 24-residue leader peptide, TnaC. The RNA sequence in the vicinity of the tnaC stop codon is rich in Cytidylate residues which is required for efficient Rho -dependent termination in the leader region of the tna operon [1]. 25.00 25.00 70.80 70.70 17.20 16.70 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.78 0.72 -6.97 0.72 -4.56 2 48 2009-01-15 18:05:59 2005-01-11 14:01:50 6 1 46 0 2 6 0 24.00 99 98.63 CHANGED MNILHlCVTSKWFNIDNKIVDHRP MNILHICVTSKWFNIDNKIVDHRP 0 1 1 1 +7886 PF08054 Leu_leader Leucine operon leader peptide Lee SC anon Short protein clustering Family This family consists of the leucine operon leader peptide. The leucine operon is involved in the control of the biosynthesis of leucine. Four adjacent leucine codons within the leucine leader RNA are critically important in transcription attenuation-mediated control of leucine operon expression in bacteria. The leader RNA contains translational start and stop signals, a cluster of four leucine codons and overlapping regions of dyad symmetry that are capable of forming stem-and-loop structures [1]. 25.00 25.00 26.30 26.30 22.60 19.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.92 0.72 -7.16 0.72 -4.27 4 72 2009-01-15 18:05:59 2005-01-11 14:02:44 6 1 72 0 7 20 0 27.80 79 95.61 CHANGED MhHhsRhhu.LLLNA.llRGh.VuG.Qp .....MoHIVRFlGLLLLNASpLRGRhVuGIQH... 0 2 3 6 +7887 PF08055 Trp_leader1 Tryptophan leader peptide Lee SC anon Short protein clustering Family This family consists of the tryptophan (trp) leader peptides. Tryptophan accumulation is the principal event resulting in downregulation of transcription of the structural genes of the trp operon. The leader peptide of the trp operon forms mutually exclusive secondary structures that would either result in the termination of transcription of the trp operon when tryptophan is in plentiful supply or vice versa [1]. 25.00 25.00 48.60 48.50 16.40 14.90 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.54 0.72 -6.78 0.72 -4.36 2 2 2009-01-15 18:05:59 2005-01-11 14:03:54 6 1 2 0 0 2 0 18.00 78 100.00 CHANGED MFA.phpNWWWTAHPAAH MFA.phpNWWWTAHPAAH 0 0 0 0 +7888 PF08056 Trp_leader2 Tryptophan operon leader peptide Lee SC anon Short protein clustering Family This family consists of the tryptophan operon leader peptides. The tryptophan operon is regulated by transcription attenuation in response to changes in the level of tryptophan. The transcript of the leader peptide can adopt alternative mutually-exclusive secondary structures that would either result in termination of transcription of the tryptophan structural genes or in transcription of the entire operon [1]. 21.00 21.00 27.00 59.20 20.00 19.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.34 0.72 -3.96 4 77 2009-01-15 18:05:59 2005-01-11 14:04:29 6 1 75 0 5 24 0 37.70 82 87.76 CHANGED MLQEFNpNpKsKlu.h.p.ssuAELAWWRTWTSSWWANVYF MLQEFNPNHKPNFS.........PADA....ELAWWRTWTSSWWAHVYF 0 1 1 3 +7889 PF08057 Ery_res_leader2 Erythromycin resistance leader peptide Lee SC anon Short protein clustering Family This family consists of erythromycin resistance gene leader peptides. These leader peptides are involved in the transcriptional attenuation control of the synthesis of the macrolide-lincosamide -streptogramin B resistance protein. It acts as a transcriptional attenuator, in contrast to other inducible erm genes. The mRNA leader sequence can fold in either of two mutually exclusive conformations, one of which is postulated to form in the absence of induction, and to contain two rho factor-independent terminators. [1]. 25.00 25.00 42.70 42.70 19.10 18.50 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.07 0.73 -6.10 0.73 -3.49 2 2 2009-01-15 18:05:59 2005-01-11 14:05:10 6 1 2 0 0 1 0 14.00 100 100.00 CHANGED MTHSMRLRFPTLNQ MTHSMRLRFPTLNQ 0 0 0 0 +7890 PF08058 NPCC Nuclear pore complex component Wood V, Mistry J, Novatchkova M anon manual Domain Proteins containing this domain are components of the nuclear pore complex [1]. One member of this family is Nucleoporin POM34 (Swiss: Q12445) which is thought to have a role in anchoring peripheral Nups into the pore and mediating pore formation [1]. 26.80 26.80 58.40 28.40 21.80 20.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.75 0.71 -4.33 21 117 2009-01-15 18:05:59 2005-01-11 15:00:35 6 5 109 0 89 110 0 130.40 31 39.27 CHANGED GsWcpPhlcElsRRQ...s.p-ppl++llhNshuhlhh.lhsphlp.hh.hhphspth.s..........................................asphhhhllphlhllNIllALasLh+s..pDshuDlPLTspQRcLLGLsss............spTsush.....hlTPP+Yph ..............................GsWcpPhLcEls+Rp...s.p..Eppl++lhhNshslhhh.hhtphhp.hh.hhththph..s..........................................asshlhhllpllhhlNIlhuLh....Lh+...pDchuDlPLTspQRpLLGLcss.st..................ssssssh.....h.p.P+Yp.............................. 0 14 44 75 +7891 PF08059 SEP SEP domain Mistry J, Wood V anon Pfam-B_1894 (release 16.0) Domain The SEP domain is named after Saccharomyces cerevisiae Shp1, Drosophila melanogaster eyes closed gene (eyc), and vertebrate p47. In p47, the SEP domain has been shown to bind to and inhibit the cysteine protease cathepsin L [1]. Most SEP domains are succeeded closely by a UBX domain [1]. 21.50 21.50 21.70 21.70 21.20 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.27 0.72 -3.67 44 608 2009-01-15 18:05:59 2005-01-11 17:43:31 8 13 300 3 378 595 4 72.10 36 19.95 CHANGED lphWpsGFol-DG.LRpacDPtNtpFLpslpcGcsPh-Lhsht.t.ppVsVslpc+pcEcYhtPhh..phpsFsGpGp .......lplWcsG.Fol.s.D.Gs....LRsas...DP..s....N.tpFLc.......s.....I.p..cG........chPhELtp.h.....h.t....ppVslclpc+psEsahps.......thpsFsGpGp................... 0 118 184 273 +7892 PF08060 NOSIC NOSIC (NUC001) domain Staub E, Bateman A anon Staub E Domain This is the central domain in Nop56/SIK1-like proteins [1]. 23.70 23.70 24.10 23.70 22.80 23.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.55 0.72 -4.16 101 1203 2009-09-11 13:22:58 2005-01-12 13:32:34 8 20 441 26 811 1191 39 52.30 44 10.32 CHANGED hIlQulsll-clD+-lNhhthRlREWYuh+FPELspllsc.shpYu.+lVthlGs ........hIlQAluLLDpLDK-lNsasMRlREWYuaHFPELs+lls....D....shpYs.+llphlGp.............. 0 287 457 669 +7893 PF08061 P68HR P68HR (NUC004) repeat Staub E, Bateman A anon Staub E Repeat This short region is found in two copies in p68-like RNA helicases [1]. 25.00 25.00 30.50 33.30 23.60 24.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.31 0.72 -4.22 8 120 2009-01-15 18:05:59 2005-01-12 13:38:59 6 6 32 0 40 103 0 33.60 59 11.58 CHANGED SAGhpuGFpohpsptsYppGYuu..pppaGupspN ..SAGhpsuFpT.hpsptsYppGYsS..pppaGupstN. 0 2 2 12 +7894 PF08062 P120R P120R (NUC006) repeat Staub E, Bateman A anon Staub E Repeat This characteristic repeat of proliferating cell nuclear antigen P120 is found in three copies [1]. 20.30 20.30 21.70 20.30 19.30 20.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.62 0.73 -6.89 0.73 -3.37 6 44 2009-01-15 18:05:59 2005-01-12 13:43:13 6 5 10 0 8 55 0 21.90 49 6.15 CHANGED sGKA+GspKsK.QQLh+Q.tsKt .hGKAKGlcKsK.pph.KQ.tsKh..... 0 3 3 3 +7895 PF08063 PADR1 PADR1 (NUC008) domain Staub E, Bateman A anon Staub E Domain This domain is found in poly(ADP-ribose)-synthetases [1]. The function of this domain is unknown. 21.10 21.10 21.10 23.00 21.00 19.10 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.12 0.72 -4.57 17 187 2009-01-15 18:05:59 2005-01-12 14:18:02 7 27 111 4 131 208 2 54.70 41 6.18 CHANGED ll-RluDshhFGsLt.CspCsG.phhapup..tYhCpG.l..StWoKC..sapspsPpRhp .........ll-RlADshhFGALhsCsp..Csu.....pl..hapus..sYhCoGpl...otWoK.C..shpTpsPsRp........... 0 54 72 108 +7896 PF08064 UME UME (NUC010) domain Staub E, Bateman A anon Staub E Domain This domain is characteristic of UVSB PI-3 kinase, MEI-41 and ESR1 [1]. 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.06 0.72 -4.09 29 246 2009-01-15 18:05:59 2005-01-12 14:25:06 8 14 218 0 176 262 1 105.80 24 4.49 CHANGED lspaLppchLGllshFspslpc...sppshh-KccslpuIt.ll+.hstpplssshsQ......IhssLpouLchp...-LpptuhpsWplhlppLsp...pcltsllsphlshllphasp ...............hspaLpp+lLGllshFsppltc.....s.p.....tsh.-K+pslpultpll+.h.............ss.........pplssshsp.................lhssLpou.Lphc..........-lpph...s..hpsWsshlpsLsp...pcLtsllspslshll.hhp.h.............................. 0 51 92 143 +7897 PF08065 K167R K167R (NUC007) repeat Staub E, Bateman A anon Staub E Repeat This family represents the K167/Chmadrin repeat [1]. The function of this repeat is unknown. 20.70 20.70 21.10 20.70 20.00 20.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.28 0.71 -3.97 24 437 2009-01-15 18:05:59 2005-01-12 14:29:07 7 18 30 0 146 511 0 104.90 42 46.71 CHANGED sTKlsscSPQP-sscTssSoKppsKpuLpKs-V+EEh.AlpKhopouGcshcT.+.ssu-s+sIcuhhposKQKLDssusloGSKRp.pTPKE+AQsLEDLsG.FpELFQTPu .......................TKhsC+Ss.s-slsTPsSp+p.......p.+pslt..Ks-lcEE.h.Al.p..KhTposGcsscT..+.....Ps.u-c+slcsa.....pcoP+QKL.Dsstsl.oGSKR...psRTP.K-K.A.Q.......sLEDLsG.FKELFQTP........ 1 16 18 28 +7898 PF08066 PMC2NT PMC2NT (NUC016) domain Staub E, Bateman A anon Staub E Domain This domain is found at the N-terminus of 3'-5' exonucleases with HRDC domains, and also in putative exosome components [1]. 22.20 22.20 22.50 22.50 22.00 21.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.82 0.72 -3.69 31 282 2009-01-15 18:05:59 2005-01-12 14:35:30 7 8 228 0 176 278 0 91.50 28 11.48 CHANGED llsss+uuuuLuu..pDlsFY+ohpsshuppl-ppupcLLslhspllp.thsspschhtss.......t.csl-s..pacsls-shDsLhE+sDpsLDchsGh .............llsss+uu.suLsp....c-...hsFa+Sh.Psap...p.h-ppusRLLphhsp.lhp.htssp..s.shpsts...........................p.spl--..pa.c.h.ll-s.DslLE+sshhLDEhsG.l................ 0 54 90 143 +7899 PF08067 ROKNT ROKNT (NUC014) domain Staub E, Bateman A anon Staub E Domain This presumed domain is found at the N-terminus of RNP K-like proteins that also contains KH domains Pfam:PF00013 [1]. 19.90 19.90 23.40 22.80 18.60 18.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.13 0.72 -4.10 4 131 2009-01-15 18:05:59 2005-01-12 14:39:44 6 8 36 0 42 81 0 42.40 79 10.24 CHANGED M-TE.-.Q.EEsoFSNsEoNGKRPAEDh-EEpuFKRSRNoDEMV ...........METE...Q.sEETFsNTETN...Gc..hGK...RPAEDME.EEQAFKRSRNTDEMV.. 0 1 6 15 +7900 PF08068 DKCLD DKCLD (NUC011) domain Staub E, Bateman A anon Staub E Domain This is a TruB_N/PUA domain associated N-terminal domain of Dyskerin-like proteins [1]. 20.90 20.90 20.90 20.90 20.40 20.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.76 0.72 -4.04 16 540 2009-01-15 18:05:59 2005-01-12 14:44:23 7 12 459 21 349 531 82 54.30 55 12.61 CHANGED csupaPLLLKshc.+LhVRos.caTPhshGssPhcRsIcEYlchGlINLDKPusPSSHEVV ................sSpWPLLLKNa-.c.............L.VRos.HaTPl.stGssPL+RslppYlp.pGlINLDKPuNPSSHEVV........ 0 117 200 290 +7901 PF08069 Ribosomal_S13_N Ribosomal S13/S15 N-terminal domain Staub E, Bateman A anon Staub E Domain This domain is found at the N-terminus of ribosomal S13 and S15 proteins.\ This domain is also identified as NUC021 [1]. 25.00 25.00 27.60 26.50 22.80 21.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.85 0.72 -4.13 64 646 2009-01-15 18:05:59 2005-01-12 14:52:27 7 7 524 8 365 560 71 58.50 55 38.20 CHANGED MuRMHu+t..+..GhSuSshPh....+ppsPpWlp..hos-ElcchIlcLAK.+GhsPSpIGllLRDpaGI .........................MGRMHutG..K..GlSpSAlPY....+RssP....sWlK..hos--Vc-pIhKLAK.KGhTPSQIGVlLRDuHGl... 0 115 203 297 +7902 PF08070 DTHCT DTHCT (NUC029) region Staub E, Bateman A anon Staub E Family The DTCHT region is the C-terminal part of DNA gyrases B / topoisomerase IV / HATPase proteins [1]. This region is composed of quite low complexity sequence. 19.70 19.70 20.50 20.40 18.30 18.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.39 0.72 -3.34 8 130 2009-09-10 16:42:44 2005-01-12 14:58:57 6 9 47 0 61 99 0 96.90 43 6.92 CHANGED +AAPKusKp.......DS-hsuulsKKPsPsKuKs...p+KRKsSSSD-SDSsFtKtsSKusTSKKuK.......u-sDDFpsDhs....sss...APRs+SGRAKKPlKYLE ........................+tAPK.tpKh..........D.S-t.phu.l.sKKss.s..s...K.uKs......................++++tsuSps.-..uD.s.......t....+tsSK.....sssSKKsKp.............u-sDpF..s.Dhs.....ops.......ss+s+oGR..A.+K.lKYht............ 1 6 9 25 +7903 PF08071 RS4NT RS4NT (NUC023) domain Staub E, Bateman A anon Staub E Domain This is the N-terminal domain of Ribosomal S4 / S4e proteins. This domain is associated with S4 and KOW domains [1]. 20.40 20.40 20.60 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -7.69 0.72 -4.10 34 754 2009-01-15 18:05:59 2005-01-12 15:00:55 7 10 531 4 419 697 74 36.40 62 14.46 CHANGED pGs+KHLKRlsAPppWhls+ps.utaAs+PSsGPHphcc .....RGsKKHLKRLsAP++.WMLDKLs.GsaAPRPSsGPHKLRE..... 0 139 234 326 +7904 PF08072 BDHCT BDHCT (NUC031) domain Staub E, Bateman A anon Staub E Domain This is a C-terminal domain in Bloom's syndrome DEAD helicase subfamily [1]. 25.00 25.00 41.50 40.30 18.10 16.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.06 0.72 -3.85 3 59 2009-01-15 18:05:59 2005-01-12 15:04:31 6 7 34 0 22 45 0 40.30 67 3.39 CHANGED QLlSVME-ICKLVDsIPlcEL+lLSCGpELLQQRplRRKLL .QLl+VMEcICKLVDTIPtDcLKsLcCGsELLQQRslRRKLL.... 0 1 3 7 +7905 PF08073 CHDNT CHDNT (NUC034) domain Staub E, Bateman A anon Staub E Domain The CHDNT domain is found in PHD/RING finger and chromo domain-associated helicases [1]. 20.80 20.80 21.00 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.54 0.72 -4.28 5 268 2012-10-02 14:16:02 2005-01-13 16:21:08 7 31 81 0 123 210 1 54.50 69 3.24 CHANGED V-l-YoEEDacoLTNYKAFSpaVRPlIsKcNPKIssoKMMMLluAKWREFsssNP ...........l-alFSEEDY+TLTNYKAFSQFl..RPLIA.+KNPKIshSKMMhl......LGAKWREFSsNNP.......... 0 20 32 70 +7906 PF08074 CHDCT2 CHDCT2 (NUC038) domain Staub E, Bateman A anon Staub E Domain The CHDCT2 C-terminal domain is found in PHD/RING finger and chromo domain-associated CHD-like helicases [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.08 0.71 -4.53 4 307 2009-01-15 18:05:59 2005-01-13 16:28:26 6 33 89 0 132 242 0 160.40 78 10.29 CHANGED KhsEIWHRRHDaWLLAGlslHGYARWQ....DItNDspFAIlNEPFKspusc..sNFLEhKNKFLARRFKLLEQALVIEEQLRRAAaLNhpQ-PsHPAMALsARFAElECLAESHQHLSKEShsGN+sANAlLHKVLsQL--LLuDhKsDVsRLPuTlSpltP..VssRLtMSERpILSpL ........................Kh.EIWHRRHDYWLLAGIlp........HGYAR....WQ.................DIQNDsRaAIlNEPF.........KsEhsK.........GNFLEhKNKFLARRF.K..L..LEQALVIEEQLRRAAYLN.hop.DPuH...P...uMALNsRFAEV...ECLAESHQHLSK.EShA.GNKPANAVL.HK....VLsQLEELLSDMKADVTRLPATLuRIPP..VAsRLQMSERsILSRL................... 0 26 37 75 +7907 PF08075 NOPS NOPS (NUC059) domain Staub E, Bateman A anon Staub E Domain This domain is found at the C-terminus of NONA and PSP1 proteins adjacent to 1 or 2 Pfam:PF00076 domains [1]. 20.20 20.20 20.30 24.50 19.90 18.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.50 0.72 -3.56 11 282 2009-01-15 18:05:59 2005-01-13 16:34:28 6 8 89 2 146 244 1 51.10 64 10.43 CHANGED PllVEPhEp.DDpDGLPEK.LspKsspapKEREpsPRFAp.GoFEaEYusRWK .....PVlVEPhEQhDDE.DGLPEK.LspKs.pap...KEREpPPRFAQPGoFEaEYupRWK.... 0 27 36 78 +7908 PF08076 TetM_leader Tetracycline resistance determinant leader peptide Lee SC anon Short protein clustering Family This family consists of the tetracycline resistance determinant tet(M) leader peptides. A short open reading frame corresponding to a 28 amino acid peptide which contain a number of inverted repeat sequences was found immediately upstream of the tet(M). Transcriptional analyses has found that expression of tet(M) resulted from an extension of a small transcript representing the upstream leader region into the resistance determinant. Thus this leader sequence is responsible for transcriptional attenuation and thus regulation of the transcription of tet(M) [1]. 25.00 25.00 60.40 60.10 23.90 22.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.26 0.72 -4.39 3 31 2009-01-15 18:05:59 2005-01-14 11:01:15 6 2 28 0 1 8 0 27.70 92 72.55 CHANGED MLChPMlMH+FPSDKSIYHWDFhuLFGF MLChPMVMHKNPSDKSIYHWDFYALLGF 0 1 1 1 +7909 PF08077 Cm_res_leader Chloramphenicol resistance gene leader peptide Lee SC anon Short protein clustering Family This family consists of chloramphenicol (Cm) resistance gene leader peptides. Inducible resistance to Cm in both Gram positive and Gram negative bacteria is controlled by translation attenuation. In translation attenuation, the ribosome-binding-site (RBS) for the resistance determinant is sequestered in a secondary structure domain within the mRNA. Preceding the secondary structure is a short, translated ORF termed the leader. Ribosome stalling in the leader causes the destabilization of the downstream secondary structure, allowing initiation of translation of the Cm resistance gene [1]. 25.00 25.00 48.90 48.80 19.50 18.20 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.15 0.73 -6.44 0.73 -3.94 2 3 2009-01-15 18:05:59 2005-01-14 11:02:43 6 1 3 0 1 1 0 17.00 100 100.00 CHANGED MSGVPGALAVVTRRTIS MSGVPGALAVVTRRTIS 0 0 0 1 +7910 PF08078 PsaX PsaX family Lee SC anon Short protein clustering Family This family consists of the PsaX family of photosystem I (PSI) protein subunits.\ PSI is a large multi-subunit pigment protein complex embedded in the thylakoid membranes of green plants and cyanobacteria. PsaX is one of the 12 protein subunits found in PSI and these subunits are arranged as monomers or trimers within the membrane as shown by the structure of the trimeric complex from Synechococcus elongatus [1]. 25.00 25.00 33.50 33.00 21.50 20.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.72 0.72 -4.03 2 15 2009-01-15 18:05:59 2005-01-14 11:04:10 7 1 15 2 6 15 0 36.10 63 68.65 CHANGED SshAspuAKPsYsFRThWAlLLLAlNFLVAAYYFtIl .........ssKuAKPsYsFRTuWAl.LLLAINFLVAAYYFHII 0 0 5 6 +7911 PF08079 Ribosomal_L30_N Ribosomal L30 N-terminal domain Staub E, Bateman A anon Staub E Domain This presumed domain is found at the N-terminus of Ribosomal L30 proteins and has been termed RL30NT or NUC018 [1]. 23.90 23.90 24.20 24.90 23.80 23.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.49 0.72 -3.96 70 661 2009-01-15 18:05:59 2005-01-14 14:47:42 7 8 345 8 354 608 5 68.80 34 28.50 CHANGED olLKKRKpppph+tpptppthtp+ttppp+RphIa+RAEpYhKEY+ptER-.IRLpRpA+ppGsaYVPuEs ................................olLKKRKp.pp.h+tpptppth.t.pKt......tp+tc..Rc.lIacRA.cpYhKEY+..........ptcRcpIRlpR.A+ptGsaYVPuEs........... 0 109 177 256 +7912 PF08080 zf-RNPHF RNPHF zinc finger Staub E, Bateman A anon Staub E Domain This domain is a putative zinc-binding domain (CHHC motif) in RNP H and F. The domain is often associated with Pfam:PF00076. 20.20 20.20 20.80 21.10 18.00 19.20 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.96 0.72 -4.45 3 163 2009-01-15 18:05:59 2005-01-14 14:50:25 7 9 41 2 72 105 0 33.70 76 8.58 CHANGED FGSDRFGRDLoYChSGMSDHRYGDGsSTFQSTTGHC ..........FsoD.hFGR.D..LsYChSG...MSD.HRYGDGGSTFQSTTGHC....... 0 4 8 18 +7913 PF08081 RBM1CTR RBM1CTR (NUC064) family Staub E, Bateman A anon Staub E Family This C-terminal region is found in RBM1-like RNA binding hnRNPs [1]. 22.00 22.00 22.60 25.80 21.60 18.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.27 0.72 -4.23 8 128 2009-01-15 18:05:59 2005-01-14 14:57:06 6 3 49 0 49 133 0 45.30 69 12.12 CHANGED SuusMG..GRushSRGRDuYG.GPPRR-shsSRRDsYhuPRDDGYSo+ ...............SSSGMG..GRAPlSRGRDuYG.GPP.RREPlPSRRDVYLSPRDDGYSTK...... 0 9 11 19 +7914 PF08082 PRO8NT PRO8NT (NUC069), PrP8 N-terminal domain Staub E, Bateman A anon Staub E Domain The PRO8NT domain is found at the N-terminus of pre-mRNA splicing factors of PRO8 family [1]. The NLS or nuclear localisation signal for these spliceosome proteins begins at the start and runs for 60 residues. N-terminal to this domain is a highly variable proline-rich region [4]. 25.00 25.00 25.00 31.00 21.00 24.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.99 0.71 -4.57 24 428 2009-01-15 18:05:59 2005-01-14 14:59:30 6 30 362 0 242 401 7 136.00 71 7.54 CHANGED Kt-MPPEHLRKIl+DHGDMSS++apsDKRsaLGALKYlPHAlhKLLENMPhPWEps+pVKVLYHhoGAITFVNElP+VIEPlYhAQWuTMWlhMRREK....RDR+HFKRMRFPPFDDEEPPLDYu-Nl.DlEP.-uIphcLDpp-DssVhDWhYD .....................................Kt-MPPEHlRKIl+................DhGDhop+KappDKRsYLG...ALKahPHAlhKLLENMPMPWE.plR-VcVLYHITGAITFVNEIPhVIEPVYlAQWGoMWIMMRREK....RDRRHFKRMRFPPFDDEEPPLDYuD.NlLDVEPLEsIQh.ELDp-EDss.Vh-WFYD................................ 0 94 143 205 +7915 PF08083 PROCN PROCN (NUC071) domain Staub E, Bateman A anon Staub E Domain The PROCN domain is the central domain in pre-mRNA splicing factors of PRO8 family [1]. 19.80 19.80 19.80 19.90 19.70 18.90 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.45 0.70 -6.00 12 433 2009-01-15 18:05:59 2005-01-14 15:01:09 6 40 348 0 269 361 13 328.20 62 18.09 CHANGED LhscpTpsuh...tLhaAPhPFN++pGt......h+RAtDlsLlKpWapp+ss...sshPsKV+VSYQKLLKsaVhNpL+p.......pc.psppK............hpLL+sLKsTKFFQpTpIDWVEAGLQlCRQGaNMLNLLIHRKsLsYLHLDYNFNLKPsKTLTTKERKKSRFGNAFHLhREIL+hhKllVDuHVQaRLGNlDAaQLADGltYIhsHlGQLTGlYRYKYKlM+QIRtCKDLKHll.YhRFNs..lGKGPGCGFWtPuWRVWlFFLRGIIPLLERWLGNLLsRQFEGRpSpsl..sKTlTKQRl-SaaDLELRAuVMpDILDMlP-Gl+..psKuRTILQHLSEAWRCWKANlPWcVPGhPtPlcsIIhRYlKuKADhWhssAaYNR-RI+RGAsV-KTlsKKNLGRLTRLWlKsEQERQ+phhK..-GPhl .....................................................sppTssuI...sLhaAPhPFshRSG+......hhRA.D.....lPLlppWY..h.EHsP....s.PVKVRVSYQKLLKhaVLNtL+p.......+..PK...s.p.pK....................p.Lh+shKsT.KFFQpTplDWVEsGLQVCRQGaNMLNL.LIHRKsLsYLHLDYNFNLKPlKTLTTKERKKSRFGNAFHLhRE.lLRLoKLlVDupVQaRLGNlDAaQLADGl.YhFs...HVGQLTGMYRYKY+LM+QIRhCKDLKHlI.YYRFNoGsVGKGPGCGFWAPuWRVWlFFhRGIhPLLERWLGNLLuRQF..E..GRcSKGl..AKTVTKQRVESHaDLELRAuVM+DllDMMP.E.GlK..................QNKsRTILQHLSEAWRCW.........KANIPWK...............VP..GLPhPIENhILRYVKtKADWWhssAHYNRERI+RGATVDKTVsKKNLGRLTRLaLKuEQERQ+sYhK..DGPYl............................. 0 108 159 227 +7916 PF08084 PROCT PROCT (NUC072) domain Staub E, Bateman A anon Staub E Domain The PROCT domain is the C-terminal domain in pre-mRNA splicing factors of PRO8 family [1]. 19.50 19.50 19.70 20.00 19.30 19.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.76 0.71 -4.42 32 394 2009-01-15 18:05:59 2005-01-14 15:02:50 6 37 344 4 235 386 4 107.70 55 5.71 CHANGED slslolSFoPGSsolsAapLTppGapWGt...pNpDhhsppPpGapssahc+sQlLLSD+hhGaFhVPcsslWNYsFhGspas...shpYslpl-hPhsFYc-lHRPtHFlpFscl....tts-.phpsD.-ssF .......................TlhlTsSFTPGSsSLoAY+LTPsGYEWG+...pNpD........t.u..sp..Pp......Ga..s.o..hhE+sQhLLSD+hhGaFhVPp.pss.WNYsF.M..Gspas....phpaplpls.sPhpFYc-.HRP.HFhpFupl.........-...h.sDppD............................. 2 85 132 194 +7917 PF08085 Entericidin Entericidin EcnA/B family Lee SC anon Short protein clustering Family This family consists of the entericidin antidote/toxin peptides. The entericidin locus is activated in stationary phase under high osmolarity conditions by rho-S and simultaneously repressed by the osmoregulatory EnvZ/OmpR signal transduction pathway. The entericidin locus encodes tandem paralogous genes (ecnAB) and directs the synthesis of two small cell-envelope lipoproteins which can maintain plasmids in bacterial population by means of post-segregational killing [1]. 23.30 23.30 23.30 23.30 23.20 23.10 hmmbuild --amino -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.26 0.73 -6.22 0.73 -4.02 64 1429 2012-10-01 23:27:00 2005-01-14 17:10:06 6 3 892 0 211 521 26 21.00 57 43.49 CHANGED NTspGsGcDlpsuGpAlpcuA .NTsRGhGEDIpcuGsAISpAA..... 0 33 99 154 +7918 PF08086 Toxin_17 Ergtoxin family Lee SC anon Short protein clustering Family This family consists of ergtoxin peptides which are toxins secreted by the scorpions.\ The ergtoxins are capable of blocking the function of K+ channels. More than 100 ergtoxins have been found from scorpion venoms and they have been classified into three subfamilies according to their primary structures [1]. 25.00 25.00 83.10 83.00 23.30 21.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.46 0.72 -3.89 4 27 2012-10-01 23:31:40 2005-01-14 17:11:46 6 1 6 2 0 27 0 41.00 79 93.81 CHANGED DRDSCVDKS+CuKYGYYtQCp-CCKKAGHsGGTChYFKCKC .DRDSCVDKS+CuKYGYYtQCp-CCKKAGcpuGTChaFKCKC 0 0 0 0 +7919 PF08087 Toxin_18 Conotoxin O-superfamily Lee SC anon Short protein clustering Domain This family consists of members of the conotoxin O-superfamily. The O-superfamily of conotoxins consists of 3 groups of Conus peptides that belong to the same structural group. These 3 groups differ in their pharmacological properties: the w-conotoxins which inhibit calcium channels, the delta-conotoxins which slow down the inactivation rate of voltage -sensitive sodium channels and the muO-conotoxins block the voltage sensitive sodium currents [1]. 24.30 24.30 26.10 26.10 22.10 21.30 hmmbuild --amino -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.62 0.72 -4.17 7 30 2012-10-01 22:06:18 2005-01-14 17:12:28 6 1 24 0 0 32 0 31.10 72 56.38 CHANGED hACsETGtsChaS.ECCS..GACSssFsaCL.p .hACTETGRNCpaS.ECCS..GACSAsFsaCL.R.... 0 0 0 0 +7920 PF08088 Toxin_19 Conotoxin I-superfamily Lee SC anon Short protein clustering Family This family consists of the I-superfamily of conotoxins. This is a new class of peptides in the venom of some Conus species. These toxins are characterised by four disulfide bridges and inhibit of modify ion channels of nerve cells. The I-superfamily conotoxins is found in five or six major clades of cone snails and could possible be found in many more species [1]. 23.20 23.20 23.70 23.40 22.30 23.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.93 0.72 -4.05 8 26 2009-01-15 18:05:59 2005-01-14 17:13:11 7 1 5 3 0 29 0 40.30 66 83.17 CHANGED G.s.CtpDt+sCsYHADCCN..CChtGhCpPSTsWI..GCSTus ...G.s.CtKDt+tCsYHADCCN..CCLo..GICtPSTsWI..GCSTu... 0 0 0 0 +7921 PF08089 Toxin_20 Huwentoxin-II family Lee SC anon Short protein clustering Family This family consists of the huwentoxin-II (HWTX-II) family of toxins secreted by spiders. These toxins are found in venom that secreted from the bird spider Selenocosmia huwena Wang. The HWTX-II adopts a novel scaffold different from the ICK motif that is found in other huwentoxins. HWTX-II consists of 37 amino acids residues including six cysteines involved in three disulfide bridges [1]. 25.00 25.00 31.80 51.80 21.20 19.70 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.41 0.72 -4.06 3 92 2009-01-15 18:05:59 2005-01-14 17:13:28 6 1 10 2 0 90 0 38.40 81 47.52 CHANGED lFECoFSCDIEKEG.KPCKPKG..........+KKCSGGWKCKlKLCLKl .LFECSlSCEIEKEG.....NKsCK.............KKKC..KGGWKCKFNMCVKV. 0 0 0 0 +7922 PF08090 Enterotoxin_HS1 Enterotoxin_ST; Heat stable E.coli enterotoxin 1 Lee SC anon Short protein clustering Family Heat-stable toxin 1 of entero-aggregative E.coli (EAST1) is a small toxin. It is not, however, solely associated with entero-aggregative E.coli but also with many other diarrhoaeic E. coli families. Some studies have established the role of EAST1 in some human outbreaks of diarrhoea. Isolates from farm animals have been shown to carry the astA gene coding for EAST1. However, the relation between the presence of EAST1 and disease is not conclusive [1]. 25.00 25.00 73.10 73.10 19.70 17.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.93 0.72 -4.20 2 9 2009-01-15 18:05:59 2005-01-14 17:14:06 6 1 4 0 0 8 0 35.30 91 96.07 CHANGED ShphIR+PASShASCIWCsTACuSs.GRTTKPS.AT STQYIRRPASSYASCIWCATACASCHGRTTKPSLAT 0 0 0 0 +7923 PF08091 Toxin_21 Spider insecticidal peptide Lee SC anon Short protein clustering Domain This family consists of insecticidal peptides isolated from venom of spiders of Aptostichus schlingeri and Calisoga sp. Nine insecticidal peptides were isolated from the venom of the Aptostichus schlingeri spider and seven of these toxins cause flaccid paralysis to insect larvae within 10 min of injection. However, all nine peptides were lethal within 24 hours [1]. 20.10 20.10 20.20 22.50 19.80 19.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.54 0.72 -4.25 2 5 2012-10-01 22:06:18 2005-01-14 17:14:47 6 1 3 0 0 5 0 38.20 57 69.96 CHANGED C.StthPCoNuc-CCuGpCuh.hWsCh..tssCSKpC.u. CISARYPCSNSKDCCSGNCGs.FWTCalRKDPCSKECLAP. 2 0 0 0 +7924 PF08092 Toxin_22 Magi peptide toxin family Lee SC anon Short protein clustering Family This family consists of Magi peptide toxins (Magi 1, 2 and 5) isolated from the venom of Hexathelidae spider. These insecticidal peptide toxins bind to sodium channels and induce flaccid paralysis when injected into lepidopteran larvae. However, these peptides are not toxic to mice when injected intracranially at 20 pmol/g [1]. 25.00 25.00 26.30 26.10 24.60 18.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.41 0.72 -4.12 3 78 2012-10-01 22:06:18 2005-01-14 17:15:22 6 1 8 0 0 79 0 39.20 84 36.06 CHANGED CMGYDIECNEcLP.CCua..LECV+TSGY.WWYK+pYCRRK+ ....CIGEGVPC.D....ENDP.RCCSG..LVCLKPTLHGIWYKSYYCYKK.... 0 0 0 0 +7925 PF08093 Toxin_23 Magi 5 toxic peptide family Lee SC anon Short protein clustering Family This family consists of toxic peptides (Magi 5) found in the venom of the Hexathelidae spider. Magi 5 is the first spider toxin with binding affinity to site 4 of a mammalian sodium channel and the toxin has an insecticidal effect on larvae, causing paralysis when injected into the larvae [1]. 19.60 19.60 20.20 49.90 18.90 18.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.85 0.72 -4.01 5 8 2012-10-01 22:06:18 2005-01-14 17:16:09 6 1 5 3 0 9 0 29.20 50 36.68 CHANGED uClLoph+CSSDK-CCGhTPsCshGlClPs sClLophtCSsDK-CCGhTPsCshGlCsPp 0 0 0 0 +7926 PF08094 Toxin_24 Conotoxin TVIIA/GS family Lee SC anon Short protein clustering Family This family consists of conotoxins isolated from the venom of cone snail Conus tulipa and Conus geographus. Conotoxin TVIIA, isolated from Conus tulipa displays little sequence homology with other well-characterised pharmacological classes of peptides, but displays similarity with conotoxin GS, a peptide from Conus geographus. Both these peptides block skeletal muscle sodium channels and also share several biochemical features and represent a distinct subgroup of the four-loop conotoxins [1]. 25.00 25.00 71.00 71.00 17.80 17.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -7.96 0.72 -4.35 2 2 2012-10-01 22:06:18 2005-01-14 17:16:32 6 1 2 2 0 4 0 31.50 63 98.44 CHANGED uCSGRsSRCPP.CCMGLhCuRG...KCluhat- uCSGRsSRCPP.CCMGLhCuRG...KCluhat- 0 0 0 0 +7927 PF08095 Toxin_25 Hefutoxin family Lee SC anon Short protein clustering Family This family consists of the hefutoxins that are found in the venom of the scorpion Heterometrus fulvipes. These toxins, kappa-hefutoxin1 and kappa-hefutoxin2, exhibit no homology to any known toxins. The hefutoxins are potassium channel toxins [1]. 25.00 25.00 28.30 27.60 17.70 16.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.65 0.72 -6.82 0.72 -4.19 2 4 2009-01-15 18:05:59 2005-01-14 17:17:22 6 1 3 1 0 4 0 22.00 67 67.69 CHANGED GHACYRNCWREGNDEETCKERC GauCYRsCW+tGpDEETCKccC 0 0 0 0 +7928 PF08096 Bombolitin Bombolitin family Lee SC anon Short protein clustering Family This family consists of the bombolitin peptides that are found in the venom of the bumblebee Megabombus pennsylvanicus. Bombolitins are structurally and functionally very similar. They lyse erythrocytes and liposomes, release histamine from rat peritoneal mast cells, and stimulate phospholipase A2 from different sources [1]. 25.00 25.00 26.50 26.50 17.30 16.10 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.39 0.74 -6.37 0.74 -3.51 2 4 2009-09-11 05:23:05 2005-01-14 17:17:58 6 1 1 0 0 4 0 17.00 77 100.00 CHANGED .KIhDILAKLGKVLAHV IKIpDILAKLGKVLAHV 0 0 0 0 +7929 PF08097 Toxin_26 Conotoxin T-superfamily Lee SC anon Short protein clustering Family This family consists of the T-superfamily of conotoxins. Eight different T-superfamily peptides from five Conus species were identified. These peptides share a consensus signal sequence, and a conserved arrangement of cysteine residues. T-superfamily peptides were found expressed in venom ducts of all major feeding types of Conus, suggesting that the T-superfamily is a large and diverse group of peptides, widely distributed in the 500 different Conus species [1]. 25.00 25.00 27.80 27.80 12.90 11.70 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.18 0.73 -6.36 0.73 -3.17 2 2 2009-01-15 18:05:59 2005-01-14 17:18:32 6 1 1 0 0 2 0 11.00 91 100.00 CHANGED FCCPhIRYCCW FCCPhIRYCCW 0 0 0 0 +7930 PF08098 ATX_III Anemonia sulcata toxin III family Lee SC anon Short protein clustering Family This family consists of the Anemonia sulcata toxin III (ATX III) neurotoxin family. ATX III is a neurotoxin that is produced by sea anemone; it adopts a compact structure containing four reverse turns and two other chain reversals, but no regular alpha-helix or beta-sheet. A hydrophobic patch found on the surface of the peptide may constitute part of the sodium channel binding surface [1]. 25.00 25.00 25.60 25.20 17.90 16.00 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.61 0.72 -4.12 2 7 2009-01-15 18:05:59 2005-01-14 17:18:57 6 1 3 1 0 7 0 24.30 76 41.98 CHANGED poCCPC....YhuGCPWGQssa.pGCS KSCCPC...hhpGs.CFWGQNCYPEGCS 0 0 0 0 +7931 PF08099 Toxin_27 Scorpion calcine family Lee SC anon Short protein clustering Family This family consists of the calcine family of scorpion toxins. The calcine family consists of Maurocalcine and Imperatoxin. These toxins have been shown to be potent effector of ryanodyne-sensitive calcium channel from skeletal muscles. These toxins are thus useful for dihydropyridine receptor/ryanodyne receptor interaction studies [1,2]. 25.00 25.00 55.50 55.10 21.60 21.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -7.87 0.72 -4.05 2 5 2012-10-01 22:06:18 2005-01-14 17:20:37 6 1 4 1 0 5 0 33.00 83 60.66 CHANGED GDCLPHLKhCKtspDCCuKKCKRRGTNhEKRCR .GDCLPHLKRCKENNDCCSKKCKRRGTNPEKRCR 0 0 0 0 +7932 PF08100 Dimerisation Dimerisation domain Bateman A anon Pfam-B_455 (Release 16.0) Domain This domain is found at the N-terminus of a variety of plant O-methyltransferases. It has been shown to mediate dimerisation of these proteins [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.30 0.72 -4.13 30 919 2012-10-04 14:01:12 2005-01-16 14:21:20 6 7 155 29 350 965 0 50.20 38 14.45 CHANGED MsLKsAlELGlh-lltptG........phlosSElsupLs....hsPcusshlDRlLRlLu ...........MsLKsAl-LGls-hIpptG...........tshoh.s-ls.u...pLs..........hpPs...p.s...s..h.lcR.lhRlLs........... 0 22 202 291 +7933 PF08101 DUF1708 Domain of unknown function (DUF1708) Mistry J, Wood V, Novatchkova M anon manual Domain This is a yeast domain of unknown function. 20.80 20.80 20.90 21.60 20.40 20.60 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.55 0.70 -5.64 19 144 2012-10-03 21:54:49 2005-01-24 15:49:59 6 4 126 0 112 156 0 412.60 32 37.80 CHANGED pphstccV+pllHhhTtELKsRGhchsalhLPFRPcpssppl.sFlpphFshustt...hptctl.phlpps-shTLhpsLKah..WsRLPsu.lluW-sYppFKhtEp-psa.s+cAFlplhPpslsSsuHusIlYDFhDLLsuIuup....uphNthuGRKl.S+MuuhWAFs.spt.s...................su...FpsGhcpWtpuu-AhhHLhlAaLRShsPcs.pss....+LPpoLpsLL.sspYPPp.psshhpscoh.....pVPhlshps...SpsPapLLcRssc....st.t.Fps+-sashLpshF..phcssh...l.ptLocES+RlLcplo....osp........................................................................pshpsuWuc...................................................pphh...pscsshp.tpthSlscl-lcDaFhWsWhSSLusEpssp+KplFGRshllEscl...sG.pKWlVhpEps ...........................................................................p.pht.cclppll+hhTtELKt...Ru..h.................chPalh....LP..F....RPp......pssst....hcsFlpphF...ststtt..........ps.......ctl..p.c.l....p.h.s-sh...sLssslKah..Wu..RL.................P..........s.....G....l........lsW..-..u.....YphF..+htE....................p...................-upa.s+cAFt.shlPhslsS.su+.spIlaDFFDLlsulAA+.....u+tNuhuGRKL.S+hsuhWAFppscss.........................su...FpsuacsWhpAuDAhpHLhhAaLRS.....hsPcs.tss......pLPhoLpsLlptspY...........P........Pp.psshhpspoh...............pVsh.ls....ss...SssPatLLcRspp.........FphR-s.thLpph....phcssh.....pthocEs+RlLcslo....stp.................................................................................................................................................................................................pshsPuWAc...................................................................................p.th.....-pth.p.tp.h.sslsph-lDD.FhWsWhSSLusEpsst+KthFGRshllEs.l....u.t+WllhpE................................................................................................................................................................................................................................................................................................................................................... 0 32 63 99 +7934 PF08102 Antimicrobial_7 Scorpion antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of antimicrobial peptides secreted by scorpions. Novel antimicrobial peptides have been isolated from scorpions, namely the opistoporin [1] and the pandinin [2]. These peptides form essentially helical structures and demonstrate high antimicrobial activity against Gram-negative and Gram-positive bacteria respectively. 25.00 25.00 27.30 46.10 24.00 23.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -7.94 0.72 -3.94 4 7 2009-01-15 18:05:59 2005-01-25 14:26:24 6 1 5 0 0 8 0 42.00 60 67.59 CHANGED GKVWDWIKSsAKKlWNS-sVppLKspuLNAAKNaVAEKIGATP .G.lWDhIKshAKKlWNS-ssppLKspALNAAKNaVAEKIGATP 0 0 0 0 +7935 PF08103 Antimicrobial_8 Uperin family Lee SC anon Short protein clustering Family This family consists of the uperin family of antimicrobial peptides. Uperin is a wide-spectrum antibiotic peptide isolated from the Australian toadlet, Uperoleia mjobergii. Being only 17 amino acid residues long, it is smaller than most other wide-spectrum antibiotic peptides isolated from amphibians. Uperin adopts a well-defined amphipathic alpha-helix with distinct hydrophilic and hydrophobic faces [1]. 21.40 21.40 24.10 32.00 20.60 16.60 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.13 0.73 -6.25 0.73 -3.70 4 7 2009-01-15 18:05:59 2005-01-25 14:39:32 6 1 2 0 0 7 0 17.00 59 100.00 CHANGED GVGDhhRKlVosIKNVV GVhDhhRKlsoslKNlV 0 0 0 0 +7936 PF08104 Antimicrobial_9 Ponericin L family Lee SC anon Short protein clustering Family This family consists of the ponericin L family of antimicrobial peptides that are isolated from the venom of the predatory ant Pachycondyla goeldii. Ponericin L family shares similarities with dermaseptins. Ponericin L may adopt an amphipathic alpha-helical structure in polar environments and these peptides exhibit a defensive role against microbial pathogens arising from prey introduction and/or ingestion [1]. 25.00 25.00 59.50 59.40 20.60 17.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.73 0.72 -6.75 0.72 -4.15 2 2 2009-01-15 18:05:59 2005-01-25 14:40:06 6 1 1 0 0 2 0 24.00 96 100.00 CHANGED LLKELWTKhKGAGKAVLGKIKGLL LLKELWTKhKGAGKAVLGKIKGLL 0 0 0 0 +7937 PF08105 Antimicrobial10 Metchnikowin family Lee SC anon Short protein clustering Family This family consists of the metchnikowin family of antimicrobial peptides from Drosophila.\ metchnikowin is a proline-rich peptide whose expression is immune-inducible. Induction of the metchnikowin gene expression can be mediated either by the TOLL pathway or by the imd gene product. The metchnikowin peptide is unique among the Drosophila antimicrobial peptides in that it is active against both bacteria and fungi [1]. 25.00 25.00 35.30 34.90 21.40 21.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.55 0.72 -4.34 2 13 2009-01-15 18:05:59 2005-01-25 14:40:48 6 1 12 0 7 13 0 51.90 66 99.70 CHANGED MQLNLGAIFLALLGVMATATSVLAEPHRHQGPIFDTRPSPFNPNQPRPGPIY MQLNL.GAI.FLALLGlhAsussl.huEsHR+QGPI.FDTRPSPFNPNQPR.P.GP.Y.. 0 1 1 4 +7938 PF08106 Antimicrobial11 Formaecin family Lee SC anon Short protein clustering Family This family consists of the formaecin family of antimicrobial peptides isolated from the bulldog ant Myrmecia gulosa in response to bacterial infection. Formaecins are inducible peptide antibiotics and are active against growing Escherichia coli but were inactive against other Gram-negative and Gram-positive bacteria. Formaecin peptides are 16 amino acids long, are rich in proline and have N-acetylgalactosamine O-linked to a conserved threonine [1]. 25.00 25.00 41.80 41.80 14.90 14.30 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.42 0.73 -6.40 0.73 -3.90 2 2 2009-09-10 15:42:18 2005-01-25 14:41:15 6 1 1 0 0 2 0 16.00 88 100.00 CHANGED GRPNPVNsKPTPaPRL GRPNPVNsKPTPaPRL 0 0 0 0 +7939 PF08107 Antimicrobial12 Pleurocidin family Lee SC anon Short protein clustering Family This family consists of the pleurocidin family of antimicrobial peptides. Pleurocidins are found in the skin mucous secretions of the winter flounder (Pleuronectes americanus) and these peptides exhibit antimicrobial activity against Escherichia coli. Pleurocidin is predicted to assume an amphipathic alpha-helical conformation similar to other linear antimicrobial peptides and may play a role in innate host defense [1]. 21.10 21.10 22.00 21.30 19.20 17.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.17 0.72 -4.41 4 43 2009-01-15 18:05:59 2005-01-25 14:41:37 6 1 23 3 1 50 0 39.40 46 61.12 CHANGED MKhsAhFLVLSLVVLMAEPGEuFltalh+GlhHuGKhIHGhl MKhsAhFLVL.lVVLMAEPGEsha.t.plh+GhhHsG+h.I+th...................... 1 0 0 1 +7940 PF08108 Antimicrobial13 Halocidin family Lee SC anon Short protein clustering Family This family consists of the halocidin family of antimicrobial peptides. Halocidins are isolated from the haemocytes of the tunicate, Halocynthia aurantium. They are dimeric in structures which are found via a disulfide linkage between cysteines of two different- sized monomers. Halocidins have been shown to have strong antimicrobial activities against a wide variety of pathogenic bacteria and could be ideal candidates as peptide antibiotics against multidrug-resistant bacteria [1]. 25.00 25.00 34.60 33.90 21.30 20.00 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.24 0.73 -6.32 0.73 -3.79 2 3 2009-01-15 18:05:59 2005-01-25 14:42:07 6 1 1 0 0 3 0 15.00 100 34.88 CHANGED ALLHHGLNCAKGVLA ALLHHGLNCAKGVLA 0 0 0 0 +7941 PF08109 Antimicrobial14 Lactocin 705 family Lee SC anon Short protein clustering Family This family consists of lactocin 705 which is a bacteriocin produced by Lactobacillus casei CRL 705. Lactocin 705 is a class IIb bacteriocin, whose activity depends upon the complementation of two peptides (705-alpha and 705-beta) of 33 amino acid residues each. Lactocin 705 is active against several Gram-positive bacteria, including food-borne pathogens and is a good candidate to be used for biopreservation of fermented meats [1]. 21.00 21.00 89.90 89.90 19.50 18.30 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.39 0.72 -4.13 2 2 2009-01-15 18:05:59 2005-01-25 14:42:34 6 2 2 0 0 2 0 31.00 100 72.94 CHANGED GMSGYIQGIPDFLKGYLHGISAANKHKKGRL GMSGYIQGIPDFLKGYLHGISAANKHKKGRL 0 0 0 0 +7942 PF08110 Antimicrobial15 Ocellatin family Lee SC anon Short protein clustering Family This family consists of the ocellatin family of antimicrobial peptides. Ocellatins are produced from the electrical-stimulated skin secretions of the South American frog, Leptodactylus ocellatus. The family consists of three structurally related peptides, ocellatin 1, ocellatin 2 and ocellatin 3. These peptides present hemolytic activity against human erythrocytes and are also active against Escherichia coli [1]. 22.70 22.70 23.10 36.00 21.00 22.60 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.27 0.73 -6.55 0.73 -3.64 3 5 2009-01-15 18:05:59 2005-01-25 14:43:01 7 1 3 0 0 5 0 19.00 62 81.20 CHANGED VLDILKGAAKDLLAHlAsK VLDILKGAAKDLLAHlAsK 0 0 0 0 +7943 PF08111 Pea-VEAacid Pea-VEAacid family Lee SC anon Short protein clustering Family This family consists of the PEA-VEAacid neuropeptides family. These neuropeptides are isolated from the abdominal perisympathetic organs of the American cockroach. These peptides are found together with Pea-YLS-amide and Pea-SKNacid, giving a unique neuropeptide pattern in abdominal perisympathetic organs. The functions of these neuropeptides are unknown [1]. 25.00 25.00 44.20 44.10 23.00 19.50 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.31 0.74 -6.14 0.74 -3.46 2 2 2009-01-15 18:05:59 2005-01-25 14:45:27 6 1 1 0 0 2 0 15.00 93 93.75 CHANGED LsLTPGSHVDSYVEA LsLTPGSHVDSYVEA 0 0 0 0 +7944 PF08112 ATP-synt_E_2 ATP synthase epsilon subunit Lee SC anon Short protein clustering Family This family consists of epsilon subunits of the ATP synthase. The ATP synthase complex is composed of an oligomeric transmembrane sector (CF0), and a catalytic core (CF1). CF1 is composed of 5 subunits, of which the epsilon subunit functions as a potent inhibitor of ATPase activity in both soluble and bound CF1. Only when the epsilon inhibition is disabled is high ATPase activity detected in ATPase [1] 25.00 25.00 25.30 25.30 24.70 21.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.81 0.72 -3.90 2 15 2009-01-15 18:05:59 2005-01-25 14:48:08 6 1 15 0 4 6 0 50.40 71 92.87 CHANGED Msp......DKYlpIL+spL-pKKsElLppINMEYEKhLKpRLspL-clKtplLKE .....MDQ......DKYLQILRSSLEEKKSEILKNVNAEYEKLLKNRLNQLDEVKRKVLKE 0 1 1 3 +7945 PF08113 CoxIIa Cytochrome c oxidase subunit IIa family Lee SC anon Short protein clustering Family This family consists of the cytochrome c oxidase subunit IIa family. The bax-type cytochrome c oxidase from Thermus thermophilus is known as a two subunit enzyme. From its crystal structure, it was discovered that an additional transmembrane helix 'subunit IIa' spans the membrane. This subunit consists of 34 residues forming one helix across the membrane. The presence of this subunit seems to be important for the function of cytochrome c oxidases [1]. 20.70 20.70 23.20 22.60 20.20 20.00 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.39 0.72 -4.23 2 13 2009-01-15 18:05:59 2005-01-25 14:49:03 6 1 13 27 5 11 0 34.30 59 88.49 CHANGED MEEKPKGALAVILVLTLTILVFWLGVYAVFFARG .......MEEK...PpGALuVIhVLTlTILVFWhGVaAlFhARG. 1 1 4 5 +7946 PF08114 PMP1_2 ATPase proteolipid family Lee SC anon Short protein clustering Family This family consists of small proteolipids associated with the plasma membrane H+ ATPase. Two proteolipids (PMP1 and PMP2) are associated with the ATPase and both genes are similarly expressed in the wild-type strain of yeast with no modification of the level of transcription of one PMP gene is detected in a strain deleted of the other. Though both proteolipids show similarity with other small proteolipids associated with other cation -transporting ATPases, their functions remain unclear [1]. 22.10 22.10 22.30 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.22 0.72 -4.47 2 29 2009-01-15 18:05:59 2005-01-25 14:51:50 6 1 21 0 18 21 0 39.90 63 68.42 CHANGED M...TLPGGVILVFILVGLACIAIIuTIIYRKWQARQRGLQRF ...........LPGGVILVFILVGLAsIAIluTIIYRKWQARQRuLQRF..... 0 3 8 16 +7947 PF08115 Toxin_28 SFI toxin family Lee SC anon Short protein clustering Family This family consists of the SFI family of spider toxins. This family of toxins might share structural, evolutionary and functional relationships with other small, highly structurally constrained spider neurotoxins. These toxins are highly selective agonists/antagonists of different voltage-dependent calcium channels and are extremely valuable reagents in the analysis of neuromuscular function [1]. 25.00 25.00 39.90 39.90 23.40 18.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.96 0.72 -4.29 2 10 2009-01-15 18:05:59 2005-01-25 14:53:16 6 1 1 0 0 10 0 33.30 79 79.10 CHANGED +psMsDEoVCYIpspNsssGpCLp.pusaAcPWEh .KECMsDGTVCYIHNHNDCCGSCLC.NGPlARPWEM. 0 0 0 0 +7948 PF08116 Toxin_29 PhTx neurotoxin family Lee SC anon Short protein clustering Family This family consists of PhTx insecticidal neurotoxins that are found in the venom of Brazilian, Phoneutria nigriventer. The venom of the Phoneutria nigrivente contains numerous neurotoxic polypeptides of 30-140 amino acids which exert a range of biological effects. While some of these neurotoxins are lethal to mice after intracerebroventricular injections, others are extremely toxic to insects of the orders Diptera and Dictyoptera but had much weaker toxic effects on mice [1]. 25.00 25.00 68.20 68.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.79 0.72 -4.16 4 6 2009-01-15 18:05:59 2005-01-25 14:55:43 6 1 4 0 0 6 0 31.00 72 96.88 CHANGED AFC+aNGQQCTSDGQCCpG+ChTAFhG+ICM sFCRaNGQQCTSDGQCCaG+C+TAFhG+ICM 0 0 0 0 +7949 PF08117 Toxin_30 Ptu family Lee SC anon Short protein clustering Family This family consists of toxic peptides that are isolated from the saliva of assassin bugs. The saliva contains a complex mixture of proteins that are used by the bug either to immobilise the prey or to digest it. One of the proteins (Ptu1) has been purified and shown to block reversibly the N-type calcium channels and to be less specific for the L- and P/Q- type calcium channels expressed in BHK cells [1]. 24.50 24.50 24.90 79.10 24.20 23.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.03 0.72 -3.92 2 3 2012-10-01 22:06:18 2005-01-25 14:56:59 6 1 3 2 0 3 0 34.70 57 99.05 CHANGED A-cDCls.Gu.ChGpsK.CCp.+shC..YAN+C..l AD-DCLPRGSKCLGENKQCCc.+TTCMFYANRCVGl. 0 0 0 0 +7950 PF08118 MDM31_MDM32 Yeast mitochondrial distribution and morphology (MDM) proteins Mistry J, Wood V anon Pfam-B_37122 (release 16.0) Family Proteins in this family are yeast mitochondrial inner membrane proteins MDM31 and MDM32.\ These proteins are required for the maintenance of mitochondrial morphology, and the stability of mitochondrial DNA [1]. 18.20 18.20 38.10 18.40 17.20 17.70 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.66 0.70 -6.04 5 247 2009-01-15 18:05:59 2005-01-25 16:36:47 6 4 139 0 186 252 0 358.20 32 76.81 CHANGED cRDQLLAQAoNhauRLRIRLKWhLKRS.NRPFNTDDISAFlSWlLVSNsLLlFLuTTTFlSLVIYLhNTVFAQEYVAcKlGNFLTKNSALTVVFESAIVPDWSSGKISF+KVFVSRRPKpscuFoKGSQ+EAsERAKLALSEsLLVscE-FDDGNYTQFDLTIDQV-ISLSLsKWINGKGhlDEVpINGLRGVVDRTHVaWKssDDARNYKNVHQPGDFEISsF+MNDVLFTLYQPuGFRPFpVSIFNCELPQLRKHWLFYDFLNANsMSGoYDNSMFTIHKKa+pcDhsc...psSsSssW+KVTRMRVDSLNIDHLNAGlEGPFGWITSGKVDMIGDVLLP--NtDslsLSELLTlIuDRIlKEA+RYpNhlPtpKs-pPDI..........DlccYFVMDFoLRLNNVRAcVPLFTPELSYINNALIRPIVGYINSKRTYIPI+CRVVKNLsDFuGSWTIYDShLMDDLSAEVYDAFA-YVAD-EpRslRMKRVGFWSLQLLlQLILhSLGAIA .........................................................+tphL..spshhp+hhlph+Wh.h+s..Rsas.--..h..sshhSWhhhuphhhhhluTTTFhuhhhh.h....N.....oh....p-hls...t.h.l.....uphhs......t...hph.hpp.shhst.ht.....pshI.hp.ps.l...................................................................................................................................................................................................................p..hhpa-lplpplsloLSh.pWhpGpGhlpphpl.GlRG...........hl...........shp.................p..h..........t..................s..............h.....t.h...hp.....hp......s.......sa.EhpphphpDhhhplhpst.s.h.ps.....hphSIashclspLRtpalhhDhhsAp.hsGshssShFol.....H.pQ...h.......t.........t..........................................a.pp.........hsRhRlDslplscLp..t.....sh...p.ushsWIhpGpl-hluDlhhPt.p....t..p...................t.....th....h..........h............................................................................................................................p.....hhh.hshplph.sl+A.hP................ths.....hhs.s.....l.....RslluahN............s...p....pp.....l.lpsph...htphts..ph.pshhhp.h..th...................Ytth...l...pt...p..p.c....Ws.p..............tt.................................................................... 0 51 109 166 +7951 PF08119 Toxin_31 Scorpion acidic alpha-KTx toxin family Lee SC anon Short protein clustering Family This family consists of acidic alpha-KTx short chain scorpion toxins. These toxins named parabutoxins, block voltage-gated K channels and have extremely low pI values. Furthermore, they lack the crucial pore-plugging lysine. In addition, the second important residue of the dyad, the hydrophobic residue (Phe or Tyr) is also missing [1]. 25.00 25.00 90.40 90.30 22.30 21.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -8.05 0.72 -4.42 2 3 2009-01-15 18:05:59 2005-01-25 16:49:08 6 1 2 0 0 3 0 36.70 93 100.00 CHANGED DEEPKEoCSDEMCVIYCKGEEYSTGVCDGPQKCKCSD DEEPKETCSDEMCVIYCKGEEYSTGVCDGPQKCKCSD 0 0 0 0 +7952 PF08120 Toxin_32 Tamulustoxin family Lee SC anon Short protein clustering Family This family consists of the tamulustoxins which are found in the venom of the Indian red scorpion (Mesobuthus tamulus). Tamulustoxin shares no similarity with other scorpion venom toxins, although the positions of its six cysteine residues suggest that it shares the same structural scaffold. Tamulustoxin acts as a potassium channel blocker [1]. 25.00 25.00 97.40 97.30 16.70 16.70 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.97 0.72 -4.35 2 2 2009-01-15 18:05:59 2005-01-25 16:50:44 6 1 1 0 0 2 0 35.00 97 100.00 CHANGED RCHFVlCTTDCRRNSPGTYGECVKKEKGKECVCKS RCHFVlCTTDCRRNSPGTYGECVKKEKGKECVCKS 0 0 0 0 +7953 PF08121 Toxin_33 Waglerin family Lee SC anon Short protein clustering Family This family consists of the lethal peptides (waglerins) that are found in the venom of Trimeresurus wagleri. Waglerins are 22-24 residue lethal peptides and are competitive antagonist of the muscle nicotinic receptor (nAChR). Waglerin-1 possesses a distinctive selectivity for the alpha-epsilon interface binding site of the mouse nAChR [1]. 25.00 25.00 25.70 57.80 24.10 15.40 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.73 0.72 -6.95 0.72 -4.35 2 2 2009-01-15 18:05:59 2005-01-25 16:51:09 6 1 1 0 0 2 0 22.00 95 91.67 CHANGED GGKPDLRPCaPPCHYIPRPKPR GGKPDLRPCaPPCHYIPRPKPR 0 0 0 0 +7954 PF08122 NDUF_B12 NADH-ubiquinone oxidoreductase B12 subunit family Lee SC anon Short protein clustering Family This family consists of the NADH-ubiquinone oxidoreductase B12 subunit proteins. NADH is the central source of electrons in the mitochondrial and bacterial respiration. NADH-ubiquinone oxidoreductase is involved in the transfer of electrons from NADH to the electron transport chain. This oxidation of NADH is coupled to proton transfer across the membrane, generating a proton motive force that is utilised for the synthesis of ATP. The function of this subunit is unclear [1]. 22.70 22.70 22.70 23.40 22.60 22.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.98 0.72 -4.23 13 205 2009-01-15 18:05:59 2005-01-25 17:17:10 7 3 190 0 146 193 0 56.50 39 58.04 CHANGED LRDPWuRNEAWRY.hssFuppho.hhsshF+GFtaG.FuAFVlslulE.....ahhtssc+sp.H ....+DPWtRsEAWRY..pGsFu.c....hs......hhcshh.....Ghshu.hs.AFs.shluhE......ah.l..p.sp..c+p................. 0 49 73 111 +7955 PF08123 DOT1 Histone methylation protein DOT1 Mistry J, Wood V anon Pfam-B_12064 (release 16.0) Domain The DOT1 domain regulates gene expression by methylating histone H3 [1]. H3 methylation by DOT1 has been shown to be required for the DNA damage checkpoint in yeast [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.18 0.71 -5.02 11 554 2012-10-10 17:06:42 2005-01-25 17:38:26 8 19 328 15 391 1008 656 161.00 26 26.32 CHANGED YsRuV.PcspcL+.cYcuFSspVYGELhPsFloslhpcssLsssclFhDLGSGVGNsVlQAALEhGCchSaGCElM-sASclAEtQhcEhcp+hphaGh+hsplcat.+tSFlsN-clcpllspsDVlLVNNFhFDspLNppl.pchltsLKsGsKIISLKshtsssap.Is.csh-sIhshL+Vpchshscs.SVSWTsps.ssYYISTl ..............................................................sYGEh...............t...hl.sp....l...h....p....p.h....p......l...p.p..........s..s..........l.FlDLGSGlGp...ll............hQsA.hph.s.sc........s..hGlEh.h........pp.h..s.p..h...A.........pt........t..t..h..p....t.........h.h....t......h.h...s....h..t......h.....php....h...ps....s..hh.p....t.........h.................h.......tp...ss..llhhN..s...........h..h......F..s...........p.l.t....tl.....p.h.h.......p.h...t.....G.sp.l.l.o...p...hh........................................................................................................................................................................................................................................................ 1 140 218 354 +7956 PF08124 Lyase_8_N Polysaccharide lyase family 8, N terminal alpha-helical domain Mistry J anon Pfam-B_2438 (release 16.0) Family This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen. 24.00 24.00 24.90 24.20 21.40 23.90 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.96 0.70 -5.56 26 980 2012-10-02 15:11:41 2005-01-31 09:02:22 6 52 764 44 96 695 5 312.20 30 35.54 CHANGED Wtshl......hGsp.hsssssshtshhpphsppup.p.hsshp...psspshLWpcls........tpssoupl..Tssap+LpphApAappPsSshapssslhssIhcuLcahppphYssspp.................phG.NWWcWpIGsPpulsshLlLha-tlo...psclssYssslc+FsP-P.............sht....hpuTGANpsDhupshllcGlLpcDssclppuhcuLss........VashVspG......DGFYpDGSaIQH...........sslsYTGuYGsVLlcGlupLhsllpsosaslss.phsslachlccuahPllhcGtMMDhlRGRuISRtssp.........s+..stGtsllpullhl.uphussspp..tchpuhlKs ................................................................................W.sh.hG..p.ac..pps.phtphhpt..h-p.....c.s...t.....p.....hlpshpp....pss.Rs..h..L.Wpshp...............................ppS.ush....Tpoa+plcchApsh..p....sP.....s....op..h.a..p-....pplhptlh-ul-ah.pcphYs.s..p.s......................hu..NWWDaE.........IGsP+ulssTLhLhp...-.h..ho..............s-.chpp..ao.ss.Icp...FsPDs................tthh..ohs....shtu.......pG..uNhlDhu..+s..hlhpull....p.c.......Ds.p.p......lpp...ulc...ulsp...............V..F..ph...Vs..pu...................-.GFYpDGSaIpH..............ssVsYTG.uYGs.V.L...lcGluplhsllpt.....T....tas..h..s..spp..pslhpal-c..u.FhPll.hp.G.....ch.....h.D.h.s.RGRu.ISRtspp.........u+.....stuh.plhpulhhl.uc.h..sppp..tchpphlK.................................................................................. 0 54 80 93 +7957 PF08125 Mannitol_dh_C Mannitol dehydrogenase C-terminal domain Bateman A anon Prosite Domain \N 20.60 20.60 20.80 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.61 0.70 -5.20 16 5053 2012-10-02 19:36:47 2005-01-31 18:33:56 8 18 2344 4 791 3180 477 232.20 31 52.45 CHANGED ppVuFPsshVDRIVPt...........thuhcDPhsVssEPFhpWVlEcshhpG.ss...hchsGsphVsclpPY.EhKLhhLNuuHuslAYlGaLtGhphlcEuhpDtplpthlcshhtEphtsllshh...sts-LpsYtspllpRFpNPhIpDplpRluhs.....PhRhLsshcchlt...shhchhshs.tsahphlpGlstshphhp.sDsputplpt..........hhspcpspshLu.hshhtt-h.pssph ..............................................pVsFssohVDRIVPs...........s....t...........h...............h....................p...........t...........h.....G.h.p..D............s............h.........................V............s............s............E......s.F..t..........p............W....VlE..c.s.h..h.....t.G...p..s...............h..c.h.....s..G....sp....h..V...s...D...l...h...PacEh..KL.t.h.LNuuHoh.lA....Yl...Gh....L...u..G...a...p....pIt-shp.D.sth.......cthsc.t...hhh...c.-tt...s.h..l.ph........sss...-......h.psYt.p.....p.l.l.t.RF..pN...P...hlccpshplAhss.pKh..P..h..RhLssh.c.thlpt....t.....s..........h....t....h..L....s.....h.....u....Au.a....h.pa...l..........p......G..........l...........s..........t............t.........t.........p..........h.........h.s..........s..D..s.....h......phtp....................h...s...p...p.t.....sp....s....h....Lu.....shhsts.......h.................................................................. 1 196 439 611 +7958 PF08126 Propeptide_C25 Propeptide_C25 Mistry J anon Rawlings N Motif This is found at the N terminal end of some of the members of the C25 peptidase family (PF01364). Little is known about the function of this motif. 20.30 20.30 22.30 20.70 18.90 18.00 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.23 0.71 -5.32 4 56 2009-01-15 18:05:59 2005-02-01 17:12:27 6 18 27 0 24 60 111 185.60 21 14.11 CHANGED QsschttsPs....lphltssppSho+lpFc..hsplpFTpVpTpcG..shQssohstusshuEhGpPpLPlLp+.lAVs-..Ttsh+Vclhoochh-ppsl...hlsPopssh.+sEsP-plPY......l.upuYupstFhPuElsslspshhhRsVRhtsVshsPlQYNPVsppL+lhscIpVsVS.su.....ppspshhshhcsSsFssFEssYKphF ......................................................................................................................tts.....hp..hp..h.th.htphp.h.tt..hht.hshst.sh.....hsch.G.PpLPh.hpp.lulPs...s.hp..hpVps.ss.sp.h..hph.psh....hlhP.sps..h.csp.s..Pp...p.lsa.........hpspsYspspaaPsplsplspshhhRsh+stslshtPlQYNPVoppL+l....hsplplplohpu........tpt.t................................................ 0 17 22 23 +7959 PF08127 Propeptide_C1 Peptidase family C1 propeptide Mistry J anon Rawlings N Motif This motif is found at the N terminal of some members of the Peptidase_C1 family (Pfam:PF00112) and is involved in activation of this peptidase [1]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.83 0.72 -4.33 60 407 2009-01-15 18:05:59 2005-02-02 09:21:42 8 5 183 5 123 418 2 41.70 37 13.35 CHANGED LS--hIphINp.p.ssTWKAG+N..F.tshohspl+pLhGs.h.sss .....LS--hlshINc.p.ssTW+AG+N...F...shs....hs.h+pLhGs.hhps........... 0 61 73 99 +7961 PF08129 Antimicrobial17 Alpha/beta enterocin family Lee SC anon Short protein clustering Family This family consists of the alpha and beta enterocins and lactococcin G peptides. These peptides have some antimicrobial properties; they inhibit the growth of Enterococcus spp. and a few other gram-positive bacteria. These peptides act as pore- forming toxins that create cell membrane channels through a barrel-stave mechanism and thus produce an ionic imbalance in the cell. These family of antimicrobial peptides belong to the class II group of bacteriocin [1]. 25.00 25.00 26.80 26.50 24.20 23.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.73 0.72 -4.19 2 7 2012-10-02 23:56:30 2005-02-22 13:49:07 6 1 4 2 0 8 0 46.40 57 90.78 CHANGED MKQYKVLNEKEMKKsIGGESVFSKIGNAVGPAAYWILKGLGNMSDVNQADRINRKKH .......K.LsEKEh+pslGG....scIGpulG.sAYWlhKuhGNMSDVNQAsRINRKK.t.............. 0 0 0 0 +7962 PF08130 Antimicrobial18 Type A lantibiotic family Lee SC anon Short protein clustering Family This family consists of the type A lantibiotic peptides. Both Pep5 and epicidin-280 are ribosomally-synthesised antimicrobial peptides produced by Gram-positive bacteria that are characterised by the presence of lanthionine and/or methyllanthionine residues. The lantibiotics family has a highly specific activity against multi- drug resistant bacteria and has potential to be utilised in a wide range of medical applications [1,2]. 25.00 25.00 31.80 31.60 24.00 23.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.98 0.72 -4.09 2 6 2009-09-11 06:04:39 2005-02-22 13:49:58 6 1 4 0 0 4 0 53.30 58 100.00 CHANGED McNpKsLFDLEIKK-s.pNssELEsQohGPAI+Aohp.C....KATRhhTVSCK.KssCp M...NKELFDLDINK.pMEsPTEMTsQThGTslKVS+ulCK...puTCIsTISCo...NC.K 0 0 0 0 +7963 PF08131 Defensin_3 Defensin-like peptide family Lee SC anon Short protein clustering Family This family consists of the defensin-like peptides (DLPs) isolated from platypus venom. These DLPs show similar three-dimensional fold to that of beta-defensin-12 and sodium-channel neurotoxin Shl. However the side chains known to be functionally important to beta-defensin-12 and Shl are not conserved in DLPs. This suggests a different biological function. Consistent with this contention, DLPs have been shown to possess no anti-microbial properties and have no observable activity on rat dorsal-root-ganglion sodium-channel currents [1]. 17.70 17.70 17.70 18.00 17.50 17.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.20 0.72 -3.98 2 9 2012-10-01 20:50:19 2005-02-22 13:51:53 6 1 3 4 8 11 0 38.40 44 68.38 CHANGED ac.psC.ShsGVCRcKsphNC+.hhhs.C.NcpQKCCch .......ptCpshuGVCRcKss+NC+sIhhs.CcNRNp+CCc... 1 0 0 6 +7964 PF08132 AdoMetDC_leader S-adenosyl-l-methionine decarboxylase leader peptide Lee SC anon Short protein clustering Family This family consists of the S-adenosyl-l-methionine decarboxylase (AdoMetDC) leader peptides. AdoMetDC is a key regulatory enzymes in the biosynthesis of polyamines. All expressed plant AdoMetDC mRNA 5' leader sequences contain a highly conserved pair of overlapping upstream ORFs (uORFs) that overlap by one base. Sequences of the small uORFs are highly conserved between monocot, dicot and gymnosperm AdoMetDC mRNA species, suggesting a translational regulatory mechanism [1]. 25.00 25.00 55.60 55.00 17.20 17.00 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.69 0.72 -4.03 3 44 2009-01-15 18:05:59 2005-02-22 13:56:19 6 3 24 0 23 40 0 50.70 78 40.31 CHANGED MESKGGKKKSSSSSSpsoLFFEAPLGYSIEDVRPNGGIKKFRSAAYSNCo+KPS ........MESKGGKKKSSSSsS...LhYEAPLGYSIEDVRPsGGIKKFRSAAYSNCu++PS..... 0 4 18 23 +7965 PF08133 Nuclease_act Anticodon nuclease activator family Lee SC anon Short protein clustering Family This family consists of the anticodon nuclease activator proteins. Pre-existing host tRNAs are reprocessed during bacteriophage T4 infection of certain Escherichia coli strains. In this pathway, tRNA(Lys) is cleaved 5' by the anticodon nuclease to the wobble base and is later restored in polynucleotide kinase and RNA ligase reactions [1]. 25.00 25.00 36.50 36.30 18.50 16.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -7.23 0.72 -4.45 2 15 2009-01-15 18:05:59 2005-02-22 13:59:57 6 1 14 0 0 9 0 25.40 82 96.21 CHANGED MSNFHNEHVMQFYRNNLKshGlhGhp .MSNFHNEHVMQFYRNNLKTKGVFGRp.. 0 0 0 0 +7966 PF08134 cIII cIII protein family Lee SC anon Short protein clustering Family This family consists of the cIII family of regulatory proteins. The lambda CIII protein has 54 amino acids and it forms an amphipathic helix within its amino acid sequence. Lambda cIII stabilises the lambda cII protein and the host sigma factor 32, responsible for transcribing genes of the heat shock regulon [1]. 25.00 25.00 64.30 64.20 23.90 23.20 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.28 0.72 -4.36 3 42 2009-01-15 18:05:59 2005-02-22 14:20:08 6 1 42 0 2 15 0 43.80 89 98.29 CHANGED MMHFQLAGSGVMSAFYPHESELSRRVKQLIRAAKKQLEALCAMK MMHFQLAGSGVMSAFYPHESELSRRVKQLIRAAKKQLEALCAMK.... 0 0 0 1 +7967 PF08135 EPV_E5 Major transforming protein E5 family Lee SC anon Short protein clustering Family This family consists of the major transforming proteins (E5) of the bovine papilloma virus (BPV). The equine sarcoid is one of the most common dermatological lesion in equids. It is a benign, locally invasive dermal fibroblastic lesion and studies have shown an association of the lesions with BPV. E5 is a short hydrophobic membrane protein localising to the Golgi apparatus and other intracellular membranes. It binds to and constitutively activates the platelet-derived growth factor-beta in transformed cells. This stimulation activates a receptor signaling cascade which results in an intracellular growth stimulatory signal [1]. 25.00 25.00 37.10 36.90 20.90 20.30 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.50 0.72 -3.97 3 23 2009-01-15 18:05:59 2005-02-22 14:21:20 6 1 6 0 0 22 0 42.30 82 99.79 CHANGED Msa.GLLLFLGLTFAlQLLLLVFLLFFFLVWWDQFGCRC-Gh.L MPNLWFLLFLGLVAAMQLLLLLFLLLFFLVYWDHFECSCTGLPF. 0 0 0 0 +7968 PF08136 Ribosomal_S22 30S ribosomal protein subunit S22 family Lee SC anon Short protein clustering Family This family consists of the 30S ribosomal proteins subunit S22 polypeptides. This polypeptide is 47 amino acids in length and has a molecular weight of about 5 kDa. The S22 subunit is a component of the stationary-phase-specific ribosomal protein and is assembled in the ribosomal particles in the stationary phase. This subunit along with other stationary-phase-specific ribosomal proteins result in compositional changes of ribosomes during the stationary phase. The significance of this change is not clear as yet [1]. 25.00 25.00 26.00 25.30 24.00 23.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.05 0.72 -3.81 3 402 2009-12-01 13:51:23 2005-02-22 14:28:17 6 1 400 0 14 34 0 44.90 88 97.59 CHANGED MKSNRQARHILGLDYKLSNQRKVVIEGDsEoVVTHATGRKRHA-K ..MKSNRQARHILGLDHKISN.QRKIVTEGDKSSVV..N..N..PTGRKRPAEK............ 0 1 2 8 +7969 PF08137 DVL DVL family Lee SC anon Short protein clustering Family This family consists of the DVL family of proteins. In a gain-of-function genetic screen for genes that influence fruit development in Arabidopsis, DEVIL (DVL) gene was identified. DVL is a small protein and overexpression of the protein results in pleiotropic phenotypes featured by shortened stature, rounder rosette leaves, clustered inflorescences, shortened pedicles, and siliques with pronged tips. DVL family is a novel class of small polypeptides and the overexpression phenotypes suggest that these polypeptides may have a role in plant development [1]. 19.60 19.60 19.70 22.30 18.50 19.20 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.20 0.72 -6.21 0.72 -4.48 20 182 2009-09-11 09:54:20 2005-02-22 14:32:13 7 4 18 0 129 166 0 19.00 66 23.47 CHANGED KEQRuRhYIlRRClsMLlC .KEQRARhYIlRRCVsMLlC... 0 24 81 101 +7970 PF08138 Sex_peptide Sex peptide (SP) family Lee SC anon Short protein clustering Family This family consists of Sex Peptides (SP) that are found in Drosophila. On mating, Drosophila females decreases her remating rate and increases her egg-laying rate due, in part, to the transfer of SP from the male to the female. SP are found in seminal fluids transferred from the male to the female during mating. The male seminal fluid proteins are referred to as accessory gland proteins (Acps). The SP is one of the most interesting Acps and plays an important role in reproduction [1]. 25.00 25.00 53.90 53.80 19.80 19.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.08 0.72 -4.12 5 23 2009-01-15 18:05:59 2005-02-22 14:41:42 6 1 10 1 2 24 0 52.70 57 99.51 CHANGED M+sPl.LhLllL..LlGlAhuh...hscRsc.....susIhGP+DRcKWCRLNLGPAWGGRsC ....MKs.hhhLlLVh..lLGLs.uhpWPhs++ss.....+hsI.SPpsR-KWCRLNLGPAWGGR.C 0 2 2 2 +7971 PF08139 LPAM_1 VirB; Prokaryotic membrane lipoprotein lipid attachment site Lee SC, Bateman A anon Short protein clustering Motif In prokaryotes, membrane lipoproteins are synthesized with a precursor signal peptide, which is cleaved by a specific lipoprotein signal peptidase (signal peptidase II). The peptidase recognizes a conserved sequence and cuts upstream of a cysteine residue to which a glyceride-fatty acid lipid is attached [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.71 0.73 -7.05 0.73 -3.54 71 711 2012-10-01 23:27:00 2005-02-22 14:43:14 7 16 567 0 60 226 22 24.40 62 12.78 CHANGED M...................................++hhhhhhhhh...LuuCuo .....................................................MhKKILFPLlALFh..LAGCAp. 0 6 22 43 +7972 PF08140 Cuticle_1 Crustacean cuticle protein repeat Lee SC anon Short protein clustering Family This family consists of the cuticle proteins from the Cancer pagurus and the Homarus americanus. These proteins are isolated from the calcified regions of the crustacean and they contain two copies of an 18 residue sequence motif, which thus far has been found only in crustacean calcified exoskeletons [1]. 21.40 21.40 21.40 26.00 21.10 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -7.95 0.72 -4.13 38 90 2009-01-15 18:05:59 2005-02-22 14:44:51 6 6 6 0 0 82 0 40.00 45 71.21 CHANGED GsSGllhsDGp.hQhstsh..sl..lLhGPSGhVhusGcslQh .....G.SGllhsDGp.hQhstsh..sl..llhGPSGhVhusGcNlQh...... 0 0 0 0 +7973 PF08141 SspH Small acid-soluble spore protein H family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) of the H type (sspH). SspH are unique to spores of Bacillus subtilis and are expressed only in the forespore compartment during sporulation of this organism. The sspH genes are monocistronic and are recognised by the forespore-specific sigma factor for RNA polymerase - sigma-G. The specific role of this protein is unclear but is thought to play a role in sporulation under conditions different from that of the common laboratory tests of spore properties [1]. 25.00 25.00 51.10 51.00 21.30 17.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.72 0.72 -3.92 20 329 2009-01-15 18:05:59 2005-02-22 14:48:28 7 1 194 0 59 184 1 57.70 46 94.31 CHANGED M.-spRAKcIlsSsshlsVoYpGsPVaI-cVs.EpscsApla.l.ssspccpcVslssLcE ..MslpRAKpIhsSspphsVoa.pG.hPVaIppVD.EpspsAplYpl.sN.PscchpVsVssLcE....... 0 19 39 43 +7974 PF08142 AARP2CN AARP2CN (NUC121) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is the central domain of AARP2. It is weakly similar to the GTP-binding domain of elongation factor TU [1]. 21.50 21.50 21.50 25.10 21.20 21.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.67 0.72 -4.45 73 707 2009-01-15 18:05:59 2005-02-22 15:35:56 7 33 318 0 470 699 8 82.40 35 9.33 CHANGED -htNLhRhlssh+..+slsWRss+sYllu-chch...hsssp..t................................plslhGalRGss.hss..sphVHIsGhGDFplspI.ptlsDPs ......................................EhpNLhRhlsshK..+.slpW..Rss+sYlLu-chc.....hsssppht............................................tslslhGYlRGps....Lps.....s..........phVHIsGhG..DFplspl.phhsDP............................ 0 170 265 390 +7975 PF08143 CBFNT CBFNT (NUC161) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in proteins of CARG-binding factor A-like proteins [1]. 21.70 21.70 21.80 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.89 0.72 -2.88 16 125 2009-01-15 18:05:59 2005-02-22 15:43:46 6 4 36 2 37 95 0 66.10 44 21.96 CHANGED MS...E-Qhhpss..st.....sGpcuss-ppus...tshtGs.u..............................sussssAEGspI-ASKNEEDsGK .........................MS...EpQ.htss..st.....sG..uss-tpush..ssttussu....................uG...tsusssss...sGssstAEGspIsASKNEEDtGK. 0 2 7 16 +7976 PF08144 CPL CPL (NUC119) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is fund in Penguin-like proteins associated with Pumilio like repeats [1]. 20.70 20.70 20.70 21.40 20.50 20.30 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.72 0.71 -4.36 8 270 2009-01-15 18:05:59 2005-02-22 15:44:06 6 5 231 0 193 266 1 141.50 26 20.63 CHANGED lssKYGRKVLLYLLuPRDssHFhPEIIclLccGD....sNAaSKKDsplRR+ELLEuISPsLLchls-cup-llhDpussllVuDILssssGDspsshs.AlAplAsp-h.suuh-G-hHIuccPAGHhsLKhLIpQD+chtEsGKEspFu+s .....................................h..spauR+slLY.Lls.s..t......sstah...s......s...p....h...l.p.h.L.pcsc............tsspSKKDspl...RRpELl..c...uho...P...s....LLphl....sp.....p.....s.........p.........pllp.s....sh....us.hls.-........l.Lh..........s......u.s..........G...........-.p.p..s.s.hp....A.lAph.s.....sp..p.........................t..............t..........t..t...p..................+hh.pp.shuthhL.KhLlptsp....................................................................................... 0 59 99 157 +7977 PF08145 BOP1NT BOP1NT (NUC169) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in BOP1-like WD40 proteins [1]. 25.00 25.00 25.30 25.40 21.80 24.50 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.59 0.70 -4.65 38 353 2009-01-15 18:05:59 2005-02-22 15:44:34 7 9 289 0 249 359 9 244.50 45 35.02 CHANGED Y--hsHIGYDlsGKKIhKst.ppstLDphLcsh-.cPcs..........W...RslhDthsscslpLoc--Lcll+Rlppuchsspsa-sYpshl-aFs..tcttlh..PlossP-P.KR+FlPS.KtEtK+lhKlV+AIRpGcIpspc.pcc........cpphYDlWscc.......tppptpphtclsAPKhsLPGap.ESYNPPsEYL.sccEcppWpph..c.c-RchpFlPp+asuLRpVPuYpchlpERFERCLDLYLsPRs++p+lsl.DPEsLlPKLPsP+-L+PFP ...................Y--hsHIGYDlsGK+Ih+Pt....pt-tLDphL-ph-.sP.c.h..........W...pslhD.toG.pslpLoc..--lcLlc+lp.psc..hscssas...PY...psh.l-aFo..tc.tlh..Pl.os.tP-s.KRpFlPS.+h.Et+..+VhKlV+AI+tGhIhs.+.hcp.............p.phYDLWus-...........p.ts.cphhalPAPKhs.PG.at.ESYNPPsEYL.sccEcptWppt.......-sp-R..........ch.pFlPp+asuLRpVPuYpchlcE......RFERCLDLYLsPRs..R...+....+.lNl.D.P.-sLlP+LPsPc-L+PFP............... 0 96 148 210 +7978 PF08146 BP28CT BP28CT (NUC211) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in BAP28-like nucleolar proteins [1]. 20.50 20.50 20.80 23.40 20.40 18.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.80 0.71 -4.49 29 300 2009-01-15 18:05:59 2005-02-22 15:45:25 7 10 246 0 210 299 0 152.00 29 8.80 CHANGED Kpshsspps.lhchhLpAhDhRppp.........................phstpslsclEsplhss.hlphlhKLs-ssFRPLFhcLhcWAhst...tt........pssphpRhlsFa+hhsplt-sLKSlhosYhuall-sss.plLpph....................sssssss...........................hpLhphlLpsLppsFpaDp-pF .........................pphsppts.lhphhLpAhDhRppp................................p.shpplsclEss....l...hcs.hlshlhKLs-ssFRPLFh+Lh-WAts................................psuptsRhloFYphhsplt-pLK................ulhTsahuallcsss.plLpps...............................................s.pptpp........................................htLhphlLp..sLppsFhaDpppF............................................ 0 68 114 174 +7979 PF08147 DBP10CT DBP10CT (NUC160) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in the Dbp10p subfamily of hypothetical RNA helicases [1]. 20.60 20.60 22.50 22.30 19.70 18.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.44 0.72 -4.16 23 302 2009-01-15 18:05:59 2005-02-22 15:49:32 7 8 257 0 208 294 1 63.70 37 7.59 CHANGED DLssD-stt...htpp+phh+WD+KKpKaVsh.....uspsspKhI+uESGt+IsA.Sa+SG+a-cWpKp+ ...........DLhsD-stp......hppppphh+WD+K+KKaVst..................suppsp..K..hI+sESGthI.s....A..S...a+.o..u.p....a.ccW+pp.p... 1 67 110 172 +7980 PF08148 DSHCT DSHCT (NUC185) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in DOB1/SK12/helY-like DEAD box helicases [1]. 21.90 21.90 22.70 25.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.96 0.71 -5.04 102 1286 2009-01-15 18:05:59 2005-02-22 15:57:23 7 24 846 4 689 1241 370 174.30 29 17.64 CHANGED hsssshs.pl..+GRlAscIpu...tsELllsEhlhsu.hhscLsPpclsAllSshVa-p+pssp.........ss............plpcshpp.......ltpltpcltplppcppl...........hhp.....p..phsLhplV..apWApGtsasplh..........phT.sl.EGslVRhh+Rlt-lLcQltpA.........hhusspLppphc.pAhphl+R..slVhtsShhl .................................h...tthl.p...cGRlsscI...po...tsE...LllsEhlhsG.hassLsP.t.plAAl.lSshVapp+ssst..............ss.........................pLtpshpp........................................lp.c.l.tpclsplppc.ppl..............t.............hhpp...chslhcll..YpWAp.Gt.sasplh....................pho..-l.EGslVRhh+Rlt-lLcQl....tpA...............................hhu..s..spLtp....phc..pA.hptl+R..slVhhs................................................. 0 256 432 601 +7981 PF08149 BING4CT BING4CT (NUC141) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in the BING4 family of nucleolar WD40 repeat proteins [1]. 20.90 20.90 22.50 20.90 20.60 18.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.58 0.72 -4.47 31 342 2009-01-15 18:05:59 2005-02-22 15:57:57 6 6 292 0 239 342 11 78.80 53 14.46 CHANGED PYhsH..tsusplpslpFsPaEDlLGlGHspGFoSllVPGuGEsNaDuhE..sNPaETpKQRpEpEV+sLL-KLsPEhIoL-P ..................PYhsa....t.utplpslpFCPaEDVLGlGH.spG............aoSllVP..GAGEsNFDuhE..sNPaEo.pKQRpEtEV+uLL-Kl.PEhIsLDP.................. 0 86 134 202 +7982 PF08150 FerB FerB (NUC096) domain Staub E, Bateman A, Mistry J anon Staub E Domain This is central domain B in proteins of the Ferlin family [1]. 25.00 25.00 29.50 27.30 24.80 23.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.62 0.72 -3.91 5 436 2009-01-15 18:05:59 2005-02-22 16:00:55 7 62 72 0 247 370 0 75.70 48 4.27 CHANGED PQNSlPDIhIWMlpG-KRlAYARIPAHQVLYSpss-ptsGKsCGKlQTlFLKYPt-KssGs...+VPVKlRVpLWLGLS ...PQsSlPDVhIWMlpssK.RlAYARlPA+plLaShs.p.p..tsG+pCGKlQTlFLK..............h..P..t....ccs..G................pls.uclclhlWLGL..................... 0 47 64 150 +7983 PF08151 FerI FerI (NUC094) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is present in proteins of the Ferlin family. It is often located between two C2 domains [1]. 28.00 28.00 28.20 29.20 27.60 27.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.55 0.72 -3.96 16 414 2009-01-15 18:05:59 2005-02-22 16:01:35 7 51 80 0 250 369 4 71.60 46 3.94 CHANGED lDlGhlYcpPsHtFh+KWslL......oDPcDp....puGsKGYlKlolhVlGp.............GDp...sshph.tsssscp-DIEsNLLl.PsGV ...........hDlGolYspP..s.......H..pah+KWhlL.......sDP.-.D.h......ouG.s.KGYlKsslsVlGp.............GDp...s.spp....+st.ss-p-DIEuNLLl.PtGh................. 0 52 73 156 +7984 PF08152 GUCT GUCT (NUC152) domain Staub E, Bateman A, Mistry J anon Staub E Domain This is the C terminal domain found in the RNA helicase II / Gu protein family [1]. 21.20 21.20 21.20 21.40 20.80 20.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.22 0.72 -4.00 19 241 2009-01-15 18:05:59 2005-02-22 16:02:17 7 10 123 1 143 235 2 96.60 33 14.30 CHANGED Gaop.lcpRSLLouhcGaVTlhLpss.psh.shuasaphL+p.Ls-...ptsspl+slsLhtDs..pGsVFDVPp-.s-chhst.tcs.......uhpLsssppLPs..Lpp ........Ghop.hcpRSLloucpGalThhLpss....ppht..s..hu..aua+pLpc..pLu-....shss..clpphsl.lcsp....hGs.sFDVPsptsccl.pphpcs..............phpLolsscLPcLp.............................................. 0 47 77 102 +7985 PF08153 NGP1NT NGP1NT (NUC091) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in a subfamily of hypothetical nucleolar GTP-binding proteins similar to human NGP1 [1]. 21.40 21.40 21.90 24.50 20.90 21.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.77 0.71 -4.19 37 376 2009-01-15 18:05:59 2005-02-22 16:02:59 7 9 299 0 254 366 3 128.30 43 21.82 CHANGED MY+ph+shRstcGcllpst.hQ..sp.ptssARIcPsR+WFGNTRVIuQcsLppFR-thupptpDPYpVllKpsKLPhSLLp-s.....t+ppcs+l.l-sEsappTFGP.KupRKRP+.L.sssslE-LuppApp.pppsY ..................................MY+p..Ks..hRspcGcll+sstaQ.......sp..sss...sARl-PsR+WFGNTRVI.uQpuLppFR-phupphpDPYpVllKpsKLPMSLLp-p.......................+pp+s+l.l-TEsFpsTFGP.KuQRKRP+.L...sssslc-LsppuppptppY......................... 1 99 146 212 +7986 PF08154 NLE NLE (NUC135) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is located N terminal to WD40 repeats. It is found in the microtubule-associated protein Swiss:Q12024 [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.88 0.72 -3.96 66 603 2009-01-15 18:05:59 2005-02-22 16:03:45 7 15 305 0 433 599 6 64.60 28 13.67 CHANGED plhlpFhocptt...phsssshhlPsslsp.hsLspllNp..LL..............psccslPF-Fllss.p....hl+soLpcal ...............lhlpFhopps...hphss.ssltlP.s....s.lsp.tpLspllNp..LL..........................psccsl..PasFhlps.p.........lpsoLtchh...................... 0 154 243 360 +7987 PF08155 NOGCT NOGCT (NUC087) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in the NOG subfamily of nucleolar GTP-binding proteins [1]. 20.10 20.10 21.20 23.40 19.80 19.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.37 0.72 -4.28 29 370 2009-01-15 18:05:59 2005-02-22 16:04:31 6 11 298 0 245 355 1 53.40 52 8.46 CHANGED +++Lp+DlEpE..pGGuGVYslDL+KpYhLts-EWKaDhlPEIhsG+NlsDFlDPDI ...............+RKLERDlE.E...uhussYslDL.+KpY.....Ltss-W.KaDhIPEIh-G+NlhDalDPDI.... 0 87 138 203 +7988 PF08156 NOP5NT NOP5NT (NUC127) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in RNA-binding proteins of the NOP5 family [1]. 21.60 21.60 22.00 21.70 21.50 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.99 0.72 -3.81 69 744 2009-01-15 18:05:59 2005-02-22 16:05:18 8 14 327 0 499 728 15 65.40 36 12.64 CHANGED halLaEo...............uuGYuLFKlttctchhss...pltcphpshpphschVcLpuFp.FcsstpAL-sssslsEG ............halLaEo...............uuGYALF+lt.ctc....hss.......plt....cp.hpshp+hschV+LtuFp.Fp..ssspALc..s..ssulsEG........................... 1 178 282 416 +7989 PF08157 NUC129 NUC129 domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in a novel family of hypothetical nucleolar proteins [1]. 25.00 25.00 31.10 49.00 18.90 18.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.90 0.72 -4.06 4 38 2009-01-15 18:05:59 2005-02-22 16:06:17 6 1 32 0 22 34 0 61.20 66 27.01 CHANGED YhssRLKDpshpsSpQpAAccFIpSpLYGPsosRTTsNchLSLpNKRussKtAAsQFlspsWu .YlAVRLKDQDLRDSRQQAAcsFIpssLYGPGTNRTT.....VNKFLSLsNKRhPVKKAAVQFLNsuWG. 0 1 3 9 +7990 PF08158 NUC130_3NT NUC130/3NT domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in a novel nucleolar protein family [1]. 25.10 25.10 25.60 25.80 25.00 25.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.34 0.72 -3.68 29 303 2009-01-15 18:05:59 2005-02-22 16:10:13 7 7 261 0 217 296 4 51.70 41 7.26 CHANGED FlupVusCYPc..psssFPppLt-LLppa+s....sLss-LRtpllpuLlLLRNKs.lI ......FlAp.VupCYPc......phssFPpcLt-LLppp+s...............sLcP-LR.plspuLlLLRNKslI........ 0 69 113 171 +7991 PF08159 NUC153 NUC153 domain Staub E, Bateman A, Mistry J anon Staub E Domain This small domain is found in a a novel nucleolar family [1]. 19.60 19.60 21.20 20.30 19.10 19.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.63 0.72 -7.10 0.72 -4.36 39 580 2009-01-15 18:05:59 2005-02-22 16:11:10 7 11 285 0 419 565 4 29.70 40 4.31 CHANGED DsRFtplFEsc-FulD.os.-F+thss.spp .DsRFp.slF-s.p-FslD.ss.cF+thpsht...... 1 134 226 346 +7993 PF08161 NUC173 NUC173 domain Staub E, Bateman A, Mistry J anon Staub E Domain This is the central domain of of novel family of hypothetical nucleolar proteins [1]. 20.20 20.20 20.30 20.40 19.80 19.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.21 0.71 -4.96 32 340 2009-01-15 18:05:59 2005-02-22 16:12:26 7 5 272 0 238 341 3 186.10 30 15.59 CHANGED lschh-shL.ol+apsuhtplLplluuhF-phthp.usPhhhpsLpsluchRsspp..FshppEh-pslGuAlpuMGPEsVLshLPLNLst..ssst....pssRuWLLPlLRDplpsupLuaFpsphlPLupthpp+htc...ttsccslpsKlapTllpQlWolLPuFCshPhDLtpuF.cthAphLuslLhpps-LRssICpAL+tLl .........................................................................................................................hphhpphL...oh+apt.shtplhpllsshFptht......h........p........u.........t..s...........hhh.phlpslschRtstp.................hshppph-p....slGuAlpuMGP-sVL..p.....hlP.Ls......ls.......tpsh........................p.sRuW....LLPlLR..-..t...l..p................s..s.....p....LuaFpsth.....lPLup......t...htp.+shc..........ht..p.s.tp....sh.psKl...a...c..o...lhtQlWslLPuFCp.h.Ph.Dlt.p.uF..p...shAchL....ush...L.c..p...s...........-LR..sslCpALppLl........................................ 0 77 130 200 +7995 PF08163 NUC194 NUC194 domain Staub E, Bateman A, Mistry J anon Staub E Domain This is domain B in the catalytic subunit of DNA-dependent protein kinases. 19.30 19.30 21.30 22.50 18.90 18.00 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.30 0.70 -5.61 8 121 2009-09-11 05:38:58 2005-02-22 16:13:54 7 15 83 0 75 118 2 341.00 37 10.01 CHANGED sslRpuhlDRsLlTLLpHCsh-ulhpFFophIsslh-slpu+asK..ssshsa-oQllcKhusY+hlElhYuRLsK--lpScpspINpAaasSsss......cGNELTKsLhKtsasAhoEsM.sGEopLlEhR..RpYHCAAYNshlAlISCohoEhKFYpuFLFsEpsEKNpalaENLIDhcRsY...sFslElEsPhERKcKhluIR+EuR......-uupstpcpPpYLSSp.SYlsDSSLSEEhSQFDFSTGVQs..hsauop-.....ttssptutp+pEpusp........hpLEhDELNpHECMAslsuLIc.HMp+spITPps.....-cGshPt....-LPsWMKFLHsKLuNsuspLNIRLFlAKlIlNscpVF+PYA+hWluPLMQLlV....pssNsucGlHYhVVDllsTlLSWsulupPp....Gss+-El ...................................................h+.shhcRsLlsLh.pCs.sslppFF....s.p.lsphhsh.lpschs+.........sptth-...s...plhcKhu...hachl-lhYsRLsK--lpuppupINpsa.ps.s.s.hs.......cGsELT+sLlKhs.acA.............h.......o...E..sh..s......G-.spL....l..Et+..............R.YHCAAYNC..hlulIssshs-.....K....FYpuFLFs.Ecs-Ks.hlaENlID.hc+pY...sFsl.El...Esshc.c...+..c+h...l..tI.Rccsp................ct.tt..s.....p...p..tspYhu...S....s.....a.hs-SSLSEEh.o...p.F..D..Fosu.Vps..hshssps...........stssphphpcpc.tp.............................hcLEhDE.LNpHECMsshssLlp.HMp+.s......l.Pt.........pp..sshst.................................plPsWMchL+sKLss.stsslNIRLFluKlllN......sp.-VF+PYA+aWLsPLlQl.......ll..........sts..sGucGl..HYhVl-llsslL.SWsslusP.....s................................. 1 36 43 59 +7996 PF08164 TRAUB Apoptosis-antagonizing transcription factor, C-terminal Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in traube proteins [1]. This is the domain of the AATF proteins that interacts with BLOS2 or Ceap, that functions as an adaptor in processes such as protein and vesicle processing and transport, and perhaps transcription. 20.30 20.30 20.50 20.50 20.10 20.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.85 0.72 -3.88 30 299 2009-01-15 18:05:59 2005-02-22 16:22:17 7 6 263 0 214 301 3 82.10 41 16.59 CHANGED YcsLL+-Ll-p+sssuss..............t...sshhhphspppsKhKKsVDTKASKGRKlRYpVp-KLtNFM.AP.psphsWs.......-cth-ElFuoLh .....................YppLL+-LlEp+susssss...............t...spphhthtphcsK..h+K.p.VDpK.ASKGRKlRYpVppKLhNFM.AP...spsshs.............-cshsEhFtoLh................... 0 71 115 175 +7997 PF08165 FerA FerA (NUC095) domain Staub E, Bateman A, Mistry J anon Staub E Domain This is central domain A in proteins of the Ferlin family [1]. 22.20 22.20 22.20 23.10 20.70 22.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.86 0.72 -4.30 12 211 2009-01-15 18:05:59 2005-02-22 16:23:34 6 33 52 0 98 201 0 64.60 36 3.52 CHANGED hulpApssssplsplhlcll-plIpDspp..LPplcsp.sssssLDhplpcLRpppLppIpEtAh+h+ ...............ulpuphstpplsplhhpLlD-lIpD...spp..sLP..slctp.sssThLDpplh+LRsppLppIpEA.Ahph+... 0 18 25 51 +7998 PF08166 NUC202 NUC202 domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found in a novel family of nucleolar proteins [1]. 25.00 25.00 34.20 26.80 24.00 22.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.14 0.72 -4.11 4 96 2009-01-15 18:05:59 2005-02-23 09:50:55 7 4 31 0 52 81 0 70.40 48 12.27 CHANGED PYoSsRC+..lYplL.shlhs..........C.......GtopssLQsutasoEALls..tLlpshsPP...............h.phG ......PYoosRs+..lYsl...L.hhl.s...........C.......GtptssLpsuu.suEALlT..tLhpshsPP......................s.......... 0 11 14 22 +7999 PF08167 RIX1 NUC201; rRNA processing/ribosome biogenesis Staub E, Bateman A, Mistry J anon Staub E Domain Rix1 is a nucleoplasmic particle involved in rRNA processing/ribosome assembly [1,2]. It associates with two other proteins, Ipi1 and Ipi3, to form the RIX1 complex that allows Rea1 - the AAA ATPase - to associate with the 60S ribosomal subunit. More than 170 assembly factors are involved in the construction and maturation of yeast ribosomes, and after these factors have completed their function they need to be released from the pre-ribosomes. Rea1 induces the release of the assembly protein complex in a mechanical fashion [3]. This family is usually associated with NUC202, Pfam:PF08166. 25.80 25.40 25.80 25.40 25.70 24.50 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.76 0.71 -4.57 37 221 2009-01-15 18:05:59 2005-02-23 09:51:27 7 9 181 0 158 213 0 166.40 23 20.46 CHANGED h.hllssLppsptlhss...................spstlpclhs+.ltsLLpopsspsRWsGlsLl+shlpts.hplL.ppussalpsLlslLpp................spshtshphsl.slsplhphhpshPoL..oRElhTPpLssh.........Isshlslhp....................phllssLpcLlh.pHPTsFRPFsspl+shLhplls ...................................................h....ht.ltppt.lhss..........................ttstlptLhsp...lssLL..put...psR....a.tGlsLlpshlpts....h.-hh.pps.ssWlpu.lhplLpp.........................scs..s.shc..hslhsLsclh......p.hstphPsL..sR-l.s.ssp.Lssh.........lsshLs..Lhp.....................th.p...ssLpshppllh.haPssh+shtsplpshlhshl.h............................ 0 41 81 124 +8000 PF08168 NUC205 NUC205 domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found in a novel family of nucleolar proteins [1]. 25.00 25.00 28.00 27.00 22.60 21.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.07 0.72 -4.56 4 52 2009-01-15 18:05:59 2005-02-23 09:51:41 6 1 37 0 25 55 0 44.00 54 6.46 CHANGED EpsVhpSFossVspKFISLhSLSSDG.ClYETLIPIpsoDsEcNQ ...EpSlhcSFTASVDpKFISLMSLSSDG.CIYETLIPIpPsDPEKNQ.. 0 2 2 7 +8001 PF08169 RBB1NT RBB1NT (NUC162) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found N terminal to the ARID/BRIGHT domain in DNA-binding proteins of the Retinoblastoma-binding protein 1 family [1]. 25.00 25.00 25.80 26.80 21.30 24.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.31 0.72 -4.05 7 171 2012-10-02 16:56:36 2005-02-23 09:51:57 6 4 73 1 86 164 0 96.30 50 8.53 CHANGED RRLNDELLGKVVsV....psptccssWasALVVSPSCsDDloVKKDQCLVRSFtDSKFaoVARKDl+Elss.slsKu-hsh+pGhctAhhFhps+tlPcs ........R+.pDEL.LGKVVsV......st..cpc...KspWaPAL..VVu.....P.....oC..sD.-....l..s.VK...KDphLVRSFpD.uK.FaS.......VsRKDl+E.lstp......shPKs-.h.s.hK.uhppAhpFh+o+slPsp.............................. 0 17 22 49 +8002 PF08170 POPLD POPLD (NUC188) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found in POP1-like nucleolar proteins [1]. 25.20 25.20 26.30 25.30 24.60 25.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.02 0.72 -4.14 31 292 2009-01-15 18:05:59 2005-02-23 09:52:11 7 8 256 0 218 296 1 95.00 31 11.29 CHANGED sWslllPata.shshWhtL...............sph......ss+hsGL+phcQlshEpstshFPtDaPt.opuGhthpph.ppcptcpcap++P.u....KRls.applshht.............sFssDW .............................sWslllPhta.shshWhs.l...............hhh.......ssRhuG......L+Etpp.....luhEpt.t.....shFPt.D..aPs.....ot..AGh...............thttp.ptpphppcap..R+P.u....KRss.atpls.hhs.............satssW................................ 0 70 114 178 +8003 PF08171 Mad3_BUB1_II Mad3_like; Mad3/BUB1 homology region 2 Mistry J, Wood V anon Pfam-B_113144 (release 16.0) Domain This domain is found in checkpoint proteins which are involved in cell division. This region has been shown to be necessary and sufficient for the binding of MAD3 to BUB3 in Saccharomyces cerevisiae. This domain is present in BUB1 which also binds BUB3 [1]. 20.90 20.90 20.90 21.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.38 0.72 -4.08 8 63 2009-01-15 18:05:59 2005-02-23 09:52:36 6 2 48 7 39 66 0 73.40 37 8.76 CHANGED KIslF+Dsh..............ucssPVYKLIcsPG+KsEKIssNFcLLYP-scc......EaClEEILA...luRsl......Y+cppsphK...c- ............KhsIFpDph...........................spssPVYplIpssG+KPE+IssNhcLIYs-sc-......EashEElLA...lSRsl.......Y+p.ppth...pp...................... 1 7 21 35 +8004 PF08172 CASP_C CASP C terminal Mistry J, Wood V anon Pfam-B_7701 (release 16.0) Family This domain is the C-terminal region of the CASP family of proteins. It is a Golgi membrane protein which is thought to have a role in vesicle transport [1]. 28.00 28.00 34.30 31.30 27.90 27.90 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.62 0.70 -5.35 29 261 2009-01-15 18:05:59 2005-02-23 09:53:29 7 5 207 0 179 266 3 229.80 33 35.98 CHANGED hppLppplspsstclpctcpLst+LEsDLtplptt.shstt.ssstuhhshhspphst..........t..ph.uPsssh.s......................ssssSlLPIlosQRDRFRpRNsELEc-L.+ctppplspLcpElssL+tDNhcLYEKhRYlpSYs....................ss.sssthstss....................s.t.............................................pYpppYEppLsPhsuF+t+EppRh.hp+lus....hERhhhShsRhlLus+toRhlFhhYslsLHhlVhhhhhh.huhss.hph ..........................................................................h.tpLptclsphpsphpcpppL.t+LEpDL.plpt...h.tp......spshuh...shh.pphsps....................h..sph.uPsuuh.hst...............................................................usssulLsllouQRDRFRpRNpELEp.........El.pptppplptL..ppElssLptDNlpLYEKhRalpoYst...............................sssssth...........................s.t....................................................................................+YpptY..Epp...lsPFuuFpt+EppRt.hpplSs........................h-+hhhShsRhlLuN+huRslhhhYslhLHhLVFhhLYh.huhsp....h............................... 0 63 104 152 +8005 PF08173 YbgT_YccB Membrane bound YbgT-like protein Rossi R anon Short protein clustering Family This family contains a set of membrane proteins, typically 33 amino acids long. The family has no known function, but the protein is found in the operon CydAB in E. coli. Members have a consensus motif (MWYFXW) which is rich in aromatic residues. The protein forms a single membrane-spanning helix. This family seems to be restricted to Proteobacteria [1]. 26.10 26.10 26.70 26.60 26.00 26.00 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.21 0.72 -4.12 36 1086 2009-01-15 18:05:59 2005-02-23 11:52:44 6 1 889 0 151 341 16 28.00 61 71.26 CHANGED MWYFuWILGlsLAsuhullNAhWhEhpp MWYFAWILGsLLACuFGlIsAlhLEphp.. 0 23 66 108 +8006 PF08174 Anillin DUF1709; Cell division protein anillin Mistry J, Wood V anon Pfam-B_55293 (release 16.0) Domain Anillin is a protein involved in septin organisation during cell division.\ It is an actin binding protein that is localised to the cleavage furrow, and it maintains the localisation of active myosin, which ensures the spatial control of concerted contraction during cytokinesis [3]. 21.50 21.50 21.80 21.90 20.40 20.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.89 0.71 -4.13 70 383 2009-01-15 18:05:59 2005-02-23 14:59:18 6 8 201 0 244 378 0 146.60 19 15.15 CHANGED sspstlshs....shplshttct.......hpchF..sstc..............pppthhshh......................chs.......sshcsph..h...sp-.Gshupshlshss..hph................pshupsaplclplasp....hpppsssspppphttt............................pt....................................ssapl..Gplplplhhlsc ......................................................puplslS..sl+lPLhhc.............thF...ps.p.p.................................pphhhhhhh...................................+hu...........c.hcsph.hhs..sps....hshstlsFps.hhh......................ps.sus-F.plclElYut.h...pcptshsss.s.+.+http.........................tt.hs++.......................................................tstapl..uphpLsLt.l.............................................................. 0 55 97 181 +8007 PF08175 SspO Small acid-soluble spore protein O family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) O type (sspO). SspO (originally cotK) are unique to the spores of Bacillus subtilis and are expressed only in the forespore compartment of sporulating cells of this organism. The sspO is the first gene in a likely operon with sspP and transcription of this gene is primarily by RNA polymerase with the forespore-specific sigma factor, sigma-G. Mutation deleting sspO causes the loss of the SspO from the forespore but had no discernible effect on sporulation, spore properties or spore germination [1]. 20.60 20.60 21.70 21.10 18.20 17.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.60 0.72 -3.64 5 137 2009-01-15 18:05:59 2005-02-23 15:09:59 7 2 133 0 23 61 0 47.80 76 63.65 CHANGED sKRKANHVhPGMNAAKSQGNGAG....YpEE.uQcPLTpAQRQNNKKRKKNQ ....................G.KRKANHsIsGMNAASAQGQGAG....YNEEFANEsLTsAERQNNKKRKKNQ.. 0 3 14 17 +8008 PF08176 SspK Small acid-soluble spore protein K family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) belonging to the K type (sspK). The sspK are unique to the spores of Bacillus subtilis and are expressed only in the forespore compartment of sporulating cells of this organism. The sspK gene is monocistronic and transcription is primarily by the RNA polymerase with the forespore-specific sigma factor, sigma-G. Mutation deleting sspK results in loss of SspK from the spore but had no discernible effect on sporulation, spore properties or spore germination [1]. 25.00 25.00 28.10 27.20 17.50 15.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.32 0.72 -3.96 7 127 2009-01-15 18:05:59 2005-02-23 15:10:38 7 1 127 0 20 41 0 47.40 72 89.68 CHANGED MRNKA+GFP...NppKF-G.EPcA+scaASKRssGohNT+PQERMRASs MG+QAEFWSESKNNSKIDG.QPKAKSRFASKRPNGTINTHPQERMRAAN.. 0 4 12 14 +8009 PF08177 SspN Small acid-soluble spore protein N family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore protein (SASP) N type (sspN). SspN is a 48 residues protein that is expressed only in the forespore compartment of sporulating Bacillus subtilis. The sspN gene is recognised equally by both sigma-G and sigma-F. The role of SspN is still not well-defined [1]. 25.00 25.00 43.90 62.50 19.40 16.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.38 0.72 -4.23 8 129 2009-01-15 18:05:59 2005-02-23 15:17:28 6 1 129 0 18 37 0 44.90 71 98.57 CHANGED MG.N.KcspsQFsPsHLGTKPhcacuNKGKKMpDKSGcpP.VhQTKG .......MG.NPKKNSKDFAPNHIGTQSKKAGGNKGKQMQDQTGKQPIV..DNG 0 2 10 12 +8010 PF08178 GnsAB GnsA/GnsB family Lee SC anon Short protein clustering Family This family consists of the GnsA/GnsB family. GnsA and GnsB are multicopy suppressors of the secG null mutation. These proteins participate in the synthesis of phospholipids, suggesting the functional relationship between SecG and membrane phospholipids. Overexpression of gnsA and gnsB causes a remarkable increase in the unsaturated fatty acid content. However, the gnsA-gnsB double null mutant exhibits no effect. Both proteins are predicted to possess a helix-turn-helix structure [1]. 25.00 25.00 32.60 30.30 21.80 19.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.62 0.72 -4.31 4 459 2009-01-15 18:05:59 2005-02-23 15:20:45 6 1 376 0 7 49 0 53.40 75 71.87 CHANGED MN.EcLK+pAEp-IusaIoKKlsELpKpTGKEVoEIcFssREpMsG...LESYcVKI ...MNIEELK+pAEsEIA-aIupKIAELpKpTGKEVSEIcFTAR.EKMTG.....LESYDVKI.......... 0 3 3 5 +8011 PF08179 SspP Small acid-soluble spore protein P family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) P type (sspP). sspP is expressed only in the forespore compartment of the sporulating cell. sspP is also expressed under sigma-G control from the same promoter as sspO. Mutations deleting sspP causes no discernible effect on sporulation, spore properties or spore germination [1]. 25.00 25.00 25.10 31.50 21.40 24.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.13 0.72 -3.76 8 120 2009-01-15 18:05:59 2005-02-23 15:21:37 7 1 120 0 20 59 0 41.10 67 92.60 CHANGED M.sKNsuKchRpNspKGcssGQP..EPLSGSKKVKNRNHoRQKHssH ....................................+pNppptcp.p.GQP..EPLSGSHKVKNRNHSRQK+pu.... 0 3 11 13 +8012 PF08180 BAGE B melanoma antigen family Lee SC anon Short protein clustering Family This family consists of the B melanoma antigen (BAGE) peptides. The BAGE gene encodes a human tumour antigen that is recognised by a cytolytic T lymphocyte. BAGE genes are expressed in melanomas, bladder and lung carcinomas and in a few tumours of other histological types [1]. 20.60 20.60 22.50 21.70 20.50 18.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -6.93 0.72 -4.19 5 119 2009-01-15 18:05:59 2005-02-23 15:22:01 6 2 112 0 16 78 0 28.70 74 4.85 CHANGED LIAASlWLAA.SAQALEAKLpK-DLPlLA .LIAASLWLAA.SAQALE.....A.....KL+.EDLPlLs.. 0 6 8 13 +8013 PF08181 DegQ DegQ (SacQ) family Lee SC anon Short protein clustering Family This family consists of the DegQ (formerly sacQ) regulatory peptides. The DegQ family of peptides control the rates of synthesis of a class of both secreted and intracellular degradative enzymes in Bacillus subtilis. DegQ is 46 amino acids long and activates the synthesis of degradative enzymes. The expression of this peptide was shown to be subjected both to catabolite repression and DegS-DegU-mediated control. Thus allowing an increase in the rate of synthesis of degQ under conditions of nitrogen starvation [1]. 20.90 20.90 22.00 94.60 20.80 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.27 0.72 -4.24 3 28 2009-01-15 18:05:59 2005-02-23 15:22:52 6 1 26 0 5 9 0 46.00 86 99.77 CHANGED MEK.cIEELKQLLWRLENEIRETTDSLRNINKSIDQYDKYoY.lKIS MEK.KLEEVKQLLFRLE.DI+ETTDSLRNINKSIDQLDKYsYAMKIS 1 1 2 3 +8014 PF08182 Pedibin Pedibin/Hym-346 family Lee SC anon Short protein clustering Family This family consists of the pedibin and Hym-346 signaling peptides. These two peptides have been isolated from Hydra vulgaris and Hydra magnipapillata. Experiments have indicated that both cause a reduction in the positional value gradient, the principle patterning process governing the maintenance of form in the adult hydra. The peptides cause an increase in the rate of foot regeneration following bisection of the body column. Thus both play important signaling roles in patterning processes in cnidaria and maybe in more complex metazoans [1]. 27.00 27.00 39.50 42.00 24.20 18.20 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.37 0.72 -4.45 4 5 2009-09-11 09:46:32 2005-02-23 15:23:37 6 2 3 0 3 5 0 32.20 49 8.64 CHANGED pLptEIslLQ.hhA-GEDVscpLEpKEKtLpNhcp pLptEIslLQ.hhA-GEDVs+ELEpKEKALuNacc 0 3 3 3 +8015 PF08183 SpoV Stage V sporulation protein family Lee SC anon Short protein clustering Family This family consists of the stage V sporulation (SpoV) proteins of Bacillus subtilis which includes SpoVM. SpoVM is an small, 26 residue-long protein that is produced in the mother cell chamber of the sporangium during the process of sporulation in B. subtilis. SpoVM forms an amphipathic alpha-helix and is recruited to the polar septum shortly after the sporangium undergoes asymmetric division. The function of SpoVM depends on proper subcellular localisation [1]. 20.60 20.60 21.40 22.90 19.80 19.50 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -6.70 0.72 -4.18 3 45 2009-01-15 18:05:59 2005-02-23 15:24:02 6 1 44 0 19 38 0 25.90 67 15.99 CHANGED MKFYTIKLP+FVGGlV+slLGSFKKD ..MKFYTIKLPKFLGGlVRAhLsSF+K.t 0 6 12 15 +8016 PF08184 Cuticle_2 Cuticle protein 7 isoform family Lee SC anon Short protein clustering Family This family consists of cuticle protein 7 isoforms that are isolated from the carapace cuticle of a juvenile horseshoe crab, Limulus polyphemus. There are 3 isoforms of cuticle protein 7. The 3 isoforms are N-terminally blocked but could be deblocked by treatment with pyroglutaminase, showing that the N-terminal residue is a pyroglutamine residue [1]. 25.00 25.00 139.70 139.60 19.30 18.00 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -4.17 3 3 2009-01-15 18:05:59 2005-02-23 15:24:28 6 1 1 0 0 3 0 59.00 95 100.00 CHANGED QAVRYANGYTYDIETGQVSSPYTGRVYETKGKAPFYGFGFEHPYHYYPGYYHGYPHAFY QAVRYANGYTYDIETGQVSSPYTGRVYETKGKAPFYGFGFEHPYHYYPGYYHGYPHAFY 0 0 0 0 +8018 PF08186 Wound_ind Wound-inducible basic protein family Lee SC anon Short protein clustering Family This family consists of the wound-inducible basic proteins from plants. The metabolic activities of plants are dramatically altered upon mechanical injury or pathogen attack. A large number of proteins accumulates at wound or infection sites, such as the wound-inducible basic proteins. These proteins are small, 47 amino acids in length, has no signal peptides and are hydrophilic and basic [1]. 25.00 25.00 27.00 25.90 23.20 20.20 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.25 0.72 -4.05 4 21 2009-01-15 18:05:59 2005-02-23 15:25:19 6 5 16 \N 13 22 0 43.50 66 43.31 CHANGED MIY-ssSuLFRSFL.ppupssDKRppEsp+.pt.t.KASsNKP...VMsE ..........MIYDVNSPLFRSFLSQKG.u.uuDKRKhEEpKPK-Q+.KAsENKP...VMsE....... 0 3 10 12 +8019 PF08187 Tetradecapep Myoactive tetradecapeptides family Lee SC anon Short protein clustering Family This family consists of myoactive tetradecapeptides that are isolated from the gut of earthworms, Eisenia foetida and Pheretima vitata. These peptides were termed ETP and PTP respectively. Both peptides showed a potent excitatory action on spontaneous contractions of the anterior gut. These peptides show similarity to Molluscan tetradecapeptides and arthropodan tridecapeptides [1]. 25.00 25.00 37.80 37.80 17.60 15.60 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.49 0.74 -6.07 0.74 -3.72 2 2 2009-09-11 09:59:55 2005-02-23 15:25:42 6 1 2 0 0 2 0 14.00 86 100.00 CHANGED GF+DGuADRISHGF GF+DGuADRISHGF 0 0 0 0 +8020 PF08188 Protamine_3 Spermatozal protamine family Lee SC anon Short protein clustering Family This family consists of the spermatozal protamines. Spermatozal protamines play an important role in remodelling of the sperm chromatin during mammalian spermiogenesis. Nuclear elongation and chromatin condensation are concomitant with modifications in the basic protein complement associated with DNA. Somatic histones are initially replaced by testis -specific histone variants, then by transitional proteins, and ultimately by protamines [1]. 25.00 25.00 112.10 112.00 22.80 17.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.72 0.72 -3.98 2 2 2009-01-15 18:05:59 2005-02-23 15:26:42 6 1 1 0 0 2 0 48.00 98 100.00 CHANGED ARRRHSMKKKRKSVRRRKTRKNQRKRKNSLGRSFKtHGFLKQPPRFRP ARRRHSMKKKRKSVRRRKTRKNQRKRKNSLGRSFKtHGFLKQPPRFRP 0 0 0 0 +8021 PF08189 Meleagrin Meleagrin/Cygnin family Lee SC anon Short protein clustering Family This family consists of meleagrin and cygnin basic peptides that are isolated from turkey and black swan respectively. Both peptides are low in molecular weight and contains three disulphide bonds with high concentrations of aromatic residues. These peptides show similarity to transferrins and probably play some vital role in avian eggs but the exact function is still unknown [1]. 25.00 25.00 30.30 30.00 17.90 15.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.34 0.72 -4.48 2 9 2009-01-15 18:05:59 2005-02-23 15:27:09 6 1 4 0 3 11 0 38.90 67 77.43 CHANGED QVhKYCPKlGYCSSKCSKA-VWuhSsDCKhaCClPss.K .......VLKYCPKIGYCSspCSKsplWAhSpsC.KhYCCLPAuWK 0 0 0 1 +8022 PF08190 PIH1 Nop17p; pre-RNA processing PIH1/Nop17 Mistry J, Wood V anon Pfam-B_10462 (release 16.0) Family This domain is involved in pre-rRNA processing [1]. It has has been shown to be required either for nucleolar retention or correct assembly of the box C/D snoRNP in Saccharomyces cerevisiae [1]. The C-terminal region of this family has similarity to the CS domain Pfam:PF04969. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.88 0.70 -4.99 26 646 2012-10-02 21:54:05 2005-02-23 15:49:18 7 21 191 0 432 665 8 212.10 14 58.79 CHANGED LpD.ppc......cthcp.lpphpppps..p.hhhlpPpPGaClKTphsss.t...KsFINlCpssclstP...pppthht...stt.shpaplPhSlups+tptDpsspsCsVaDVlhNPsslppspcsptF+phlhshAhculcpca...plpLs.csh+hh.phKaKG.shpspplRtpt.tp..tt.t................................ssphttpptstplhpph................ptt...s.t..hpphssst......stpPpY..............phphhpcs......psshPcplllclcLPtlpSsppssLcluccclhl.stp................................tYhLs...l.LPYslcc-pspApFs+pp+tLplphPV .........................................................................................................................................................................................................................................................................................hhl.Nh......h...................................................................hP....h..........p....................................................h.hshhhp..h.h.................................................h.....h.ht..ht.............t.....h...........h...........................................................................................................................................................................................................................................................................................................................................................................................s...h.hp...h.h...s........h....t...t.h.lplt.........h.h........................................h....h.hs...l.......t.......s.h.................................................................................................................................................................................... 1 180 235 336 +8023 PF08191 LRR_adjacent LRR adjacent Mistry J, Schubert WD anon Pfam-B_1177 (release 16.0) Family These are small, all beta strand domains, structurally described for the protein Internalin (InlA) and related proteins InlB, InlE, InlH from the pathogenic bacterium Listeria monocytogenes. Their function appears to be mainly structural: They are fused to the C-terminal end of leucine-rich repeats (LRR), significantly stabilising the LRR, and forming a common rigid entity with the LRR. They are themselves not involved in protein-protein-interactions but help to present the adjacent LRR-domain for this purpose. These domains belong to the family of Ig-like domains in that they consist of two sandwiched beta sheets that follow the classical connectivity of Ig-domains. The beta strands in one of the sheets is, however, much smaller than in most standard Ig-like domains, making it somewhat of an outlier [1] [2] [3]. 21.80 5.70 21.80 5.70 21.00 5.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.77 0.72 -4.25 29 1133 2009-01-15 18:05:59 2005-02-23 17:17:53 6 181 124 32 13 1027 3 56.90 48 9.22 CHANGED GsllsPssIScNGsYsssslsW.s.LPs..a..hsEVSYsFsp.lplG.pspspFoGpVhQPLpp .............GsLlsPtoISDsGoY......s..p..Ps..lsW.s...LPs..a....hNEVSYsFsQsVslG.ps.....pssFSGTVTQPL+......................... 1 11 11 11 +8024 PF08192 Peptidase_S64 Peptidase family S64 Mistry J, Rawlings N anon Rawlings N Family This family of fungal proteins is involved in the processing of membrane bound transcription factor Stp1 [1]. The processing causes the signalling domain of Stp1 to be passed to the nucleus where several permease genes are induced. The permeases are important for uptake of amino acids, and processing of tp1 only occurs in an amino acid-rich environment. This family is predicted to be distantly related to the trypsin family (MEROPS:S1) and to have a typical trypsin-like catalytic triad [1]. 19.80 19.80 19.80 19.80 19.60 19.70 hmmbuild -o /dev/null HMM SEED 695 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.26 0.70 -6.19 9 71 2009-01-15 18:05:59 2005-02-24 11:40:50 6 2 57 0 45 71 3 457.00 26 72.60 CHANGED M.hpphFuhs+tpc.t-tpsp...t-tppsps.tsss...........pso.hthcsh.ps..hospsssuspuSuh...........ascuphshssulSsh..p.ssoh.pp.....sutpsssuhhu.tsst+shpsppsSh.s..ssh.ttppssotssucspssssSp.hp.hshsEppppp..ppt..pltcpLppLtpcLshlMsplppslhNlSpAVIssI-hFKcF..............................h.oh.............psphsaplos.ssuslR+IhKIhLHFhDNLLss-sa.p.+hlLl+pa.cFhppLN.psc.......tstslsp.+saAIG..psssLPscDpltpIh-cIspss.S.lp-QsGuFIAPlLRGlopchsILslhFGhPsPpppHachlpsLasLas.DlHhhshKs.IchAusss.sss..hst...................................pph.....h.pFp.PaRlPsDs.pPPhShSlSoEsos+hSGTlGGYlYPpIs.ppps+LpuYAsupFAITCuHVsLspsp....sYPpVusPSsVL.shYKpuLtcphp+as...........-spltasuhhpplpcl...hphp....................scp+FGQllWGERolIs.......................p+LSDhAIIKVNpphcC.pNaLGDDlt.s..DPuLhFcNLYVRKh..lp+hpP..........................GhpVFKhGuTTKYT+GslNGlKLVYWhDGcIpSSEFlVsS..ssshFAuGGDSGuWILTKL-D......phGLGVlGMLHSYDGEhKQFGLFTPhs-IL-RLcpVTsIcWsl ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...................................................................hGplh....h..up+.h.p.......................tph...DhAllchs...tph.s.tNhl..Gssl....t..sPsh..hhpNh.Vpph.....l.p..hts............................Gh.pVFKhGsoTsaTsGp...lN.u...h...+...L.l...Y.Ws.......D..Gp......l..p..o....o..EF...lVsS.........sssh...............FAsuGDSGualLoKhps.............................t.uLsllGMlauhDtc.tpasl..hoPht.I.pclp.hT.................................................. 0 9 24 39 +8025 PF08193 INO80_Ies4 DUF1711; INO80 complex subunit Ies4 Wood V, Mistry J anon manual Family The INO80 ATPase is a member of the SNF2 family of ATPases and functions as an integral component of a multisubunit ATP-dependent chromatin remodelling complex. This family of proteins corresponds to the fungal Ies4 subunit of INO80. 20.10 20.10 24.40 23.90 19.30 19.20 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.76 0.70 -4.41 11 94 2009-01-15 18:05:59 2005-03-04 14:12:13 6 4 91 0 75 89 3 211.90 32 78.21 CHANGED Muuuot.ssssssRpppuus.s...........+tlVsLKLoschLpphh.........................uss.lKccpP.....S.SPuuSsst.s..sSusDNASDus.STP..ssssuuscsPpppuhPuP......KsG...sKRussts..u-ops+sRGKPGPKKKsRL........DDGT.-.......ssphsuuHRLGPKANtG............AINAGLRALDRSGpPCR+WERKslpLKSFTGl.WpLPsWRuP.s.psEpssEspp.s.pTGDSsSKsNp...ssSul.SEKSNoG ...................................................s..t.p.s.....................phhlsLplssthLpth..................................t.....s.ct.pps.....................p.oPss...sss..............sss-ssS-...us....sTP.........ss.ssss.t.tts.hssP...........KtG......sKRuhs.t..s......ts..s..........s........hs+sRGKPGP.K.KK..RL........-DGshs..........................sts.ss.ss.p+LGPKANtG............AINAGLRALDRSGKPCRKWp+pshpLKSFTGlhWplP.sWpu......P..cs.ps...ppsspppt.................sspussptp..........pttt...up................................................ 0 16 38 62 +8026 PF08194 DIM DIM protein Rossi R anon Short protein clustering Family Drosophila immune-induced molecules (DIMs) are short proteins induced during the immune response of Drosophila. This family includes DIMs 1 to 4 that have masses below 5 kDa [1]. 20.80 20.80 20.80 20.80 20.30 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.75 0.72 -4.00 7 97 2009-09-10 23:55:24 2005-03-08 14:48:02 7 1 12 0 49 82 1 35.70 50 49.37 CHANGED MKhLol..shslhLLA.LAsAsshs..PGpVhING-C+sCN ...MKahol..shlLuLLA..LAsAssls....PGsVlINGcChsCN... 0 10 10 31 +8027 PF08195 TRI9 TRI9 protein Rossi R anon Short protein clustering Family Putative gene of 129 bp in the Trichothecene gene cluster of Fusarium sporotrichioides and F. graminearum. Encoding a predicted protein of 43 amino acids which function is unknown [1,2]. 25.00 25.00 25.10 88.00 19.50 17.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.42 0.72 -4.32 3 20 2009-01-15 18:05:59 2005-03-08 14:50:26 6 1 19 0 1 5 0 43.00 93 100.00 CHANGED MLAAAKLIDSYEMDPDVSWLEVFAYSGVSAALCATIWVAAKAC MLAAAKLIDSYEMDPDVSWLEVFAYSGVSAALCATIWVAAKAC 0 0 0 1 +8028 PF08196 UL2 UL2 protein Rossi R anon Short protein clustering Family Orf UL2 of Human cytomegalovirus (HCMV) which is a short protein of unknown function [1] 25.00 25.00 119.90 119.80 18.90 16.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.01 0.72 -4.23 2 13 2009-01-15 18:05:59 2005-03-08 14:53:45 6 1 6 0 0 10 0 59.60 86 100.00 CHANGED MttDuVuILIVED.s..hPSFGohsASHA.YuFRlLRGIFhlTlVlWslhWlKLLRDshh MuEDSVuILIVEDDDDAYPSFGTLPASHAQYGFRLLRGIFLITLVIWTVVWLKLLRDALL 0 0 0 0 +8029 PF08197 TT_ORF2a pORF2a truncated protein Rossi R anon Short protein clustering Family Most isolated ORF2 of TT virus (TTV) encode a 49 amino acids protein (pORF2a) because of an in-frame stop codon. ORF2s isolated from G1 TTV encode 202 amino acids protein (pORF2ab) [1]. 22.00 22.00 24.10 28.30 20.30 19.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.35 0.72 -4.19 2 69 2009-01-15 18:05:59 2005-03-08 14:59:27 6 2 2 0 0 64 0 33.30 77 65.01 CHANGED MAEFShPVRSttATEGh.pVPRAGAtGEFTHRSQGAIRARDWPGYGQGS MAEFSTPVRStpATEGc.RVPRAGAtGEFT...................... 0 0 0 0 +8030 PF08198 Thymopoietin Thymopoietin protein Rossi R anon Short protein clustering Family Short protein of 49 amino acid isolated from bovine spleen cells [1]. Thymopoietins (TMPOs) are a group of ubiquitously expressed nuclear proteins. They are suggested to play an important role in nuclear envelope organisation and cell cycle control [2]. 25.00 25.00 25.30 36.90 24.60 23.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.09 0.72 -4.44 7 109 2009-01-15 18:05:59 2005-03-08 15:14:41 6 4 36 2 42 92 0 48.90 74 11.69 CHANGED spFLEDPulLTK-KLKSELlApNVsLPsu-p+K-VYVQLYLKpLTspNp ...PEFLEDPSVLTK-KLKSELlANNVoLPuGEQRKDVYVQLYLQHLTu+N+... 0 6 10 23 +8031 PF08199 E2 Bacteriophage E2-like protein Rossi R anon Short protein clustering Family Short conseved protein described in Lactococcus Bacteriophage c2 of 37 amino acids [1]. 25.00 25.00 36.80 36.80 18.10 16.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -8.11 0.72 -4.36 2 7 2009-09-10 18:27:20 2005-03-08 15:17:58 6 1 7 0 0 4 0 31.60 89 94.44 CHANGED ML.RLLY.RFGK.IKRRlLIDNFSNFCaYNFIs.Fh. ML.RLLYSRFGKFIKRRlLIDNFSNFCaYNFIh.Fh. 0 0 0 0 +8032 PF08200 Phage_1_1 Bacteriophage 1.1 Protein Rossi R anon Short protein clustering Family Gene 1.1 in Bacteriophage T7 encodes a 42 amino acid protein, rich in basic amino acids suggesting its interaction with nucleic acids [1]. Many homologs are present in different T7 and T3-like bacteriophage. 19.90 19.90 20.40 20.40 19.00 15.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.52 0.72 -3.77 6 29 2009-01-15 18:05:59 2005-03-09 11:13:14 6 1 24 0 0 18 0 42.50 49 88.58 CHANGED MR.NFEKhTKR.uNR.scp.F-hpEt.c+G+KhNKspRsRupKRs.WE ..............MR.NFEKhTKR.sNh.scc.hEhpEtps+h+KhpKspRspupKRp.Wc........ 0 0 0 0 +8033 PF08201 BssC_TutF BssC/TutF protein Rossi R anon Short protein clustering Family BssC short protein (57 amino acids) has been described as the gamma-subunit of benzylsuccinate synthase from Thauera aromatica strain K172 [1]. TutF has been identified and described as highly similar to BssC in T.aromatica strain T1 [2]. 25.00 25.00 34.00 33.10 17.50 16.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.94 0.72 -4.48 4 18 2009-01-15 18:05:59 2005-03-09 15:12:35 6 1 14 0 6 17 1 57.30 53 97.91 CHANGED M..TTCKsCuFaFuVPEsAsDaEsGKGDCVppKEDtKGKYWLSKPshcsossCtsF+.Kp ...MoTCK-Cp.FFslPEsADDaEsGKGDCVpE+cDpKGKYWLSKPlh..-su..s..sCpsFp.K.t........ 0 3 4 6 +8034 PF08202 MIS13 Mis12_component; Mis12-Mtw1 protein family Mistry J, Wood V anon Pfam-B_127825 (release 16.0) Family Mis12-Mtw1 is a eukaryotic conserved kinetochore protein that is involved in chromosome segregation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.99 0.70 -5.35 9 220 2009-01-15 18:05:59 2005-03-15 09:49:21 6 6 165 0 141 229 2 265.80 21 60.35 CHANGED h.Lpss.hh.pppp..htpptpspR+u.hppRGRRhS.lspth.....lssPHp-VstpEaY+plsss.Lucsh+h+QLLhWshp+ulpchc.......................h.tpsp.ss.htls+sIhcphlcDh+tsphslsW.s+c.t-c.s...........sDsplpp......p.........................................................PNspNlpNcpsls.lcpKlsplcpEhppWsphh.cspp.........sshch.tppp.ht....s+lppsp.-ssss......................splhsphEpplDpLptss+pLputs.hhs-hsspplp.+lshhhtp+hhpchcp..s.t.................ohcLLRsLo........upsp ......................................................................t.......tpttps.RRuSht.pu.RRhS............................h..Hppl..s-hh+pIssp..LsEscRhppLLhWshptultch..............................................................hh+slpcthlcshtscs..hsW.s+c.tpt.s.................................l.t.......................................................................PNspNlpspppltplp..p..plp....clptEcp...pWpthh.phpp................phch...t....t....p....tt....hp..psh.psth......................................................................tth.t..sp.l..s...phchh...lDpLttts+.Lpt......h.c.ssphh.........h.t...p.t.............................................th......................................................................................................................................................... 1 31 64 109 +8035 PF08203 RNA_polI_A14 Yeast RNA polymerase I subunit RPA14 Mistry J, Wood V anon manual Family This is a family of yeast proteins. A14 is one of the final two subunits of Saccharomyces cerevisiae RNA polymerase I and is proposed to play a role in the recruitment of pol I to the promoter [1]. 25.00 25.00 58.30 54.90 21.10 19.70 hmmbuild --amino -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.74 0.72 -3.74 17 47 2009-01-15 18:05:59 2005-03-15 10:13:06 6 1 45 3 29 38 0 79.80 43 51.19 CHANGED sPlsl+htus.ptlsp--s.phLscFIsppEph........s..sstt.s......ssthussssssusLuQLKRlQRDL+GL.............PPhhs .TPlllHssph.pplop-Es.pFLppFIcppEsl...........sh.ssstss.....ss.....ssTuhshcoshooslSQLKRIQRDh+GL.............PPs..s.. 0 4 14 26 +8036 PF08204 V-set_CD47 CD47 immunoglobulin-like domain Mifsud W, Bateman A anon Pfam-B_2739 (release 7.5) Domain This family represents the CD47 leukocyte antigen V-set like Ig domain [1,2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.95 0.71 -4.38 12 126 2012-10-03 02:52:13 2005-03-18 11:48:33 6 10 67 8 29 205 0 121.20 39 39.37 CHANGED hhlsshhss..spLhhsshhoVp.aTsCNsTVslsC.lssl.hpshsplaVcW+FpscsIhhhssspp.o.......................Lphchscsl..sGNYTCEshph.p.phcphIcLph.hhpWFo.pEp .................................h..lt..hhh..s.llhssTKolE..aTsC..N-Tll.IPCh..ls.N.......t...sphY.l+WK...h.....c....s.....+.s......I...h...o....a....sts.p..ppoh.....hsp.a.o...S.A+l......ppL..cu..s.s.S.L.h.h.-.h..p.D.hl..sGsYTCEsht.sp..th.cohlcLp.+hssWFss.p..................... 0 3 5 9 +8037 PF08205 C2-set_2 CD80-like C2-set immunoglobulin domain Bateman A anon Pfam-B_280 (release 17.0) Domain These domains belong to the immunoglobulin superfamily. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -9.86 0.72 -4.08 33 3971 2012-10-03 02:52:13 2005-03-18 11:48:59 7 376 126 29 2150 9868 2 86.30 17 19.89 CHANGED splpss..shlsst.....t.phlhpCs...ossGhPssploWhpsspsh..........hpsppppsspstlholpSpLphsssp.pspspslsCplpasshptpp ......................................................................phplsCp.....ut.s..u..p..P...s..s..p.l..p..W..h.....p..s..s.p.th............................................pp..p..p..t...p....s......p...s...s.....h...h...s....s..p..S...p.l.p..h.p....s.s.t...t...t..pst....p..l.sCps.pp.......t.............................................. 0 644 828 1357 +8038 PF08206 OB_RNB Ribonuclease B OB domain Bateman A anon Pfam-B_484 (release 17.0) Domain This family includes the N-terminal OB domain found in ribonuclease B proteins in one or two copies. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.64 0.72 -4.46 48 5260 2012-10-03 20:18:03 2005-03-18 13:56:51 6 22 3124 6 885 5744 3246 58.60 30 10.43 CHANGED Gplpua.ccGF.GFltsDs.....tppDlFlPtpphppshcGDpVlspltt.tccct+pcupll+l .............Ghh.sp..ccG.F..GFl.h..s.....-s............ppDlF.....I.....P.....s.....s.....p.....h.....p.....t.....u.....h......c....G.DpVl..splpp....sc...+.c..s....c....t..cGcllcl.................................... 0 278 554 744 +8039 PF08207 EFP_N Elongation factor P (EF-P) KOW-like domain Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.50 0.72 -4.10 222 5745 2012-10-01 20:16:17 2005-03-18 14:02:52 7 10 4640 16 1192 3021 2037 57.00 36 30.63 CHANGED sss-l+pGhslph.cGp.ahl.....................l-h.pasK.PGKGsAhsRsKl+NlhoG.shh-coF+u.u-pl ..............ss-h+sGhsl..ph.-.G.p...hhtV.....................l-h..pasK..PG..K.G..s..A.hsRsKl+slhoG.phlEcTF+us-ph.................... 0 397 771 997 +8040 PF08208 RNA_polI_A34 RNA_polI_final; DNA-directed RNA polymerase I subunit RPA34.5 Mistry J, Wood V anon manual Family This is a family of proteins conserved from yeasts to human. Subunit A34.5 of RNA polymerase I is a non-essential subunit which is thought to help Pol I overcome topological constraints imposed on ribosomal DNA during the process of transcription [1]. 28.10 28.10 28.10 28.10 27.30 28.00 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.40 0.71 -4.64 25 173 2009-01-15 18:05:59 2005-03-21 15:31:00 6 2 161 8 126 170 0 209.50 20 59.28 CHANGED ptcsKplWLIpsPsslslspL.pplslsh.....sthtpup.sslptpsppYplhp-sht........tsstuptolLlssscspthhs........tshshsphapIpEssplPths.......htpshss+.shppscGL+hRahPsGhusss....................pt.stppppsppp..pptphhppscspccEppcK+pcpE.cpp......hpcKKsKKc+..........................pcp+ccKK+KK .............p.hpscplWhIpsPsshshssl.ppltlsh......tt.tpsp...shh.stc..sp..p...Yplhpsp....................sptsphs.lLlPsppssshhh.........tshshspshplpcssplsths...............tpshss.....pssh.p.sps..L..+hRahPhGhsss....................................tt.s.sttpsppp...pptph..p.ptp....h..p.ppc..p..pc.c+pptc....ctp...................................h.tKKpKKpc.....................................................................pp........................................................................................................................................................................................................................................................................... 0 33 64 98 +8041 PF08209 Sgf11 Sgf11 (transcriptional regulation protein) Mistry J, Wood V anon manual Family The Sgf11 family is a SAGA complex subunit in Saccharomyces cerevisiae. The SAGA complex is a multisubunit protein complex involved in transcriptional regulation. SAGA combines proteins involved in interactions with DNA-bound activators and TATA-binding protein (TBP), as well as enzymes for histone acetylation and deubiquitylation [1]. 25.00 25.00 25.20 25.20 24.10 24.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.37 0.72 -4.63 33 238 2012-10-03 11:22:52 2005-03-21 16:24:55 6 7 194 4 166 251 0 32.80 46 10.50 CHANGED ss.hhsC.NCsRpluusRFAsHL-+ChG.hupps ........spCsNCsR.tluAuRFAsHLEKChG.hGpp...... 0 54 88 134 +8042 PF08210 APOBEC_N APOBEC-like N-terminal domain Finn RD anon Manual Domain A mechanism of generating protein diversity is mRNA editing. Members of this family are C-to-U editing enzymes. The N-terminal domain of APOBEC-1 like proteins is the catalytic domain, while the C-terminal domain is a pseudocatalyitc domain. More specifically, the catalytic domain is a zinc dependent deaminases domain and is essential for cytidine deamination.APOBEC-3 like members contain two copies of this domain. RNA editing by APOBEC-1 requires homodimerisation and this complex interacts with RNA binding proteins to from the editosome [1] (and references therein). This family also includes the functionally homologous activation induced deaminase (AID), which is essential for the development of antibody diversity in B lymphocytes, and the sea lamprey PmCDA1 and PmCDA2, which are predicted to play an AID-like role in the adaptive immune response of jawless vertebrates [2]. Divergent members of this family are present in various eukaryotes such as Nematostella, C. elegans, Micromonas and Emiliania, and prokaryotes such as Wolbachia and Pseudomonas brassicacearum [3]. 38.10 38.10 43.90 43.60 32.50 31.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.61 0.71 -4.67 22 683 2012-10-02 00:10:39 2005-03-22 09:56:53 6 6 111 12 182 716 0 172.70 33 80.64 CHANGED hphth+ts.hsp..................................tcppohlhhhlctpt......hh+Ghhpscs.....tttHsEphhlptlhshh................pssphaplshasShSPChc..................CActluphLpph.....tslslplhsp..pLa.h.pptt.................pppGlhphhpu...Ghplchhthp...Daphshphhs...ps-...............................cthphWpstp.shthhppp.hth.p ..............................................................................................................s+pcoaLCYpVctppsst........hsh.pGhhps..p...................ttpH.AEhpFLphhpshp................sssppYcVTWahSWSPChs..................CApclscFLppp.....sploLpIFsu..RLYah..ctp...............................hpcGLRpLpps....GspltlMshp...-FpaCWcsFV...ppt.......................................csFpPWcsLppp...............h................................................... 0 17 27 55 +8043 PF08211 dCMP_cyt_deam_2 Cytidine and deoxycytidylate deaminase zinc-binding region Finn RD anon Pfam-B_8221 (release 16.0) Family \N 20.90 20.90 20.90 20.90 20.80 20.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.54 0.71 -4.12 18 900 2012-10-02 00:10:39 2005-03-22 09:57:04 6 6 846 12 148 2195 501 122.40 54 42.26 CHANGED LppYLP-uFGPpDLtl.....sshLh-ppspshslpssDs...............LhppALcAANpSaAPYScs.SGVALhspsGclYsGtYAENAAFNPSLsPlQuALlphshs..GcsassIppAVLVE+psuplSphusupsh ..........................................................Lc-YLPDAFGPKDLpI........coLL.MD.cpD....H.G...asL..s.u..Ds............................................................LsQAAlsA.A.N.+S.HhPYS+.S.P.SGVALE....s.............+.....D..G...+....I...F.......oGuYAENAAFNPoLPPLQuALh.hLsLp.......G.h...-.a..s...D..IpR.AV.....LsE+uc.A.sL.QhssTp..s................................... 0 35 71 109 +8044 PF08212 Lipocalin_2 Lipocalin-like domain Bateman A anon Pfam-B_2479 (Release 17.0) Domain Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The structure is an eight-stranded beta barrel. 21.10 21.10 21.10 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.83 0.71 -4.40 19 1931 2012-10-03 08:47:39 2005-03-23 14:57:04 7 16 1379 7 485 1577 247 138.30 39 74.08 CHANGED lD.LpRYhGpWYElARhPhhFp.+ssscspApYsLp-DG.pIpVtNcChp..hcG...phppspGpAphtssup.su+L+VpFhs.........spu-YaVlhlDs-.YphAlVGsPsRcYLWlLSRoPplscpphppllscA+pp.GYDsscLhhssp ..........................................................................hDhpR.Y.l.Gp.WYE..I........A...R......h....s...p....p..F....E.....+...G...h....p.p....l..o.As.....Yo.L...c...s.....D.G.....s.....l.pVl..N....+shs..........ct..............hpp.s...c.G....c.....Ah.h..s...sss.......s..............putL..K..........Vo.Fas........................shhG.sY.Vlt.....L...........D..........p.....-.......Y.......p.....a.......A.......l......V..........s.........u.......P........s......+.......c..........Y..........L..W....ILSRo.Ps..l.sc...p..h....hp.p.h.ls.hAppp..G.a.D..sscllhs..................................................... 0 154 272 396 +8045 PF08213 DUF1713 Mitochondrial domain of unknown function (DUF1713) Mistry J, Wood V anon manual Domain This domain is found at the C terminal end of mitochondrial proteins of unknown function. 20.10 20.10 20.10 20.10 17.90 19.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.94 0.72 -4.34 23 504 2009-01-15 18:05:59 2005-03-29 14:01:14 6 5 475 0 231 276 2 32.90 55 24.09 CHANGED pspSVhR+RRpKMpKHKaKKLpK+pRsLRR+Lc+ ...hsSVlKKRRK+MsK+K+RKLl++TRhpRR+htc.......... 0 74 139 193 +8046 PF08214 KAT11 DUF1714; RTT109; Histone acetylation protein Mistry J, Wood V anon manual Domain Histone acetylation is required in many cellular processes including transcription, DNA repair, and chromatin assembly. This family contains the fungal KAT11 protein (previously known as RTT109) which is required for H3K56 acetylation. Loss of KAT11 results in the loss of H3K56 acetylation, both on bulk histone and on chromatin [1]. KAT11 and H3K56 acetylation appear to correlate with actively transcribed genes and associate with the elongating form of Pol II in yeast [1]. This family also incorporates the p300/CBP histone acetyltransferase domain which has different catalytic properties and cofactor regulation to KAT11 [3]. 20.30 20.30 21.30 20.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.36 0.70 -5.37 34 515 2009-01-15 18:05:59 2005-03-29 14:08:11 6 56 258 9 340 491 6 297.80 30 21.94 CHANGED LsshLtch..lPpstplsIRhluSssppspsh.sh.st..t....psphsa+pcthhshpphss.......................-lslFGlcV..Yso............sspRplalShhDSstah+............thcstshpphlluYL..s+pcsaspth................................................................................hhuh.sptuspYlF...PsspcsPc...........scpLhcWah+hL-phl.....................................pstlllssp-hacphthhsspsp..s.............................................................stsplPhFssD.hsphL--LhcE............................lpp.h-phthRpEhphuthsu.hhh......................ts......t.hppt..h+tshptht.....tshsssspts.pplhsc......lhpphp.tp..hhhlphptssptsspshhs ..........................................................................................p..hstst...plh...lRhlt.sssp.ps..cst.thht...t..h..p..pt...hspphs..Y+s+s...lhs.Fpc...l-G..............................s-VshFGh.aVpEYs.otss...........sNpRpVYlS..YLDSlpah+Pp......................thRT.tlY+.ElLl.u.YLc.a..s+p.h..G.a...spsa................................................................................IW.A.C.PP.p.cG..DDYIFa.sHPssQKhPK....................................sc+LpcWYp+hL-+uh......................................................................................tctllhsh......p.-hac.p.........sppcths.......................................................................................................ssppl.PaF-sDhasphlE-.lcE..........................................................................................lcp..-pht..hpp...p.t..s...t....s.th.................................................................................pt...pt....p...................p.p...t....t......h........................................s.......................................................................................................................................................... 0 96 158 280 +8048 PF08216 CTNNBL DUF1716; Catenin-beta-like, Arm-motif containing nuclear Mistry J, Wood V anon Pfam-B_13045 (release 16.0) Domain CTNNBL is a family of eukaryotic nuclear proteins of the catenin-beta-like 1 type that contain an armadillo motif. A human nuclear protein with this domain (Swiss:Q8WYA6) is thought to have a role in apoptosis [1]. The interaction of CTNNBL1 with its known partners (the Prp19-CDC5L complex and AID) is mediated by recognition of NLS (nuclear localisation signal) motifs. The RNA-splicing factor Prp31 is also an interactor, with recognition also occurring through the NLS. CTNNBL1 uses its central armadillo (ARM) domain to bind NLS-containing partners [2,3]. 20.50 20.50 20.50 25.10 20.40 18.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.19 0.72 -4.46 12 275 2009-09-11 07:46:25 2005-03-29 14:47:21 6 9 226 0 202 271 0 104.80 41 19.23 CHANGED u-ccclslhchh-pstps.....EsLD-sslKKhlLsFEKRhhcNpEhRlKaPDsPEKFh-SElDLcchIpch+slAshP-LY..hV-LssVpSLlsLLuH-NoDIslsVl ........................t....t.phhphl-pptptt.......EslDtssl++hlLsFEK+hhKNtEhRhKasDsPpK..FM-SElDLcstIpphp....llAs.tP.-.....L.Y..hV.cL..s.sls.....SLluLL.u...H-NT.DIulssl.................. 1 77 112 164 +8049 PF08217 DUF1712 Fungal domain of unknown function (DUF1712) Mistry J, Wood V anon manual Family The function of this family of proteins is unknown. 18.20 18.20 18.30 18.80 18.00 17.80 hmmbuild -o /dev/null HMM SEED 604 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -13.03 0.70 -6.27 14 421 2009-01-15 18:05:59 2005-03-29 15:31:36 6 7 235 0 288 406 0 245.80 15 61.02 CHANGED hhlFsss..hGppEGp..EhcKlLhaHP.......phslspKlpsIGlsEullpFTcsFossc.sCcs.....lcsp+pshlhhcsEssaWhshslp....p.csK-h.t.h..........hhpshL+psYphFphhpGshpuhhp.....t.scptLpshLp-ahhsah..pplp.hspp-hhc.hsol.................................QhLsLs+psaLplp................................uhlss....hssshsslt.hSshLap-pllhus.lusc-......................................................tphLasaslshhhshss.s-sus..hhpso.uhhh..sps..............h..hohsptsssspsph......................hsplal..............phcpppc.htlLhaphps.lslhlhhss.phhspp-hhpcLctplhpshoplhpslt.plsppts.st.tpcspsa..........+Ylh...hsptshth+ool.thhst.sp.s.tls.ssLcll...........................................s-lssttptu..p.tstttstEhh......................................................l+shsssWllt+psst+challLcph...................usol....l-lscplt+hsspah.sshF ............................................................................................p.....t........tplhhah...............................t....php.lGh.puhhthsp...h...s.t....t....p...........................lp..pp.p....hh.p.E..tha.h.....h.lp....................................................................h.t.l...hthahh..s...................................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 2 100 149 222 +8050 PF08218 Citrate_ly_lig Citrate lyase ligase C-terminal domain Bateman A anon Pfam-B_3588 (release 16.0) Domain This family is composed of the C-terminal domain of citrate lyase ligase EC:6.2.1.22. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.12 0.71 -4.82 14 1051 2012-10-02 18:00:56 2005-04-01 14:24:12 6 8 898 0 105 1842 244 182.00 56 52.98 CHANGED luuIVMNANPFTLGHpYLlEpAuppsDalHLFlVs-DsShFsap-RhsLlppGspcLsNlslHsGucYlISpATFPuYFlK-ps..sVhcspsplDlplF+chIApsLsIT+RYVGpEPhsplTshYNQtMpphLtpst.....Ipll.ltRp..ptsspsISASpVRphlccsshptltpLVPtTThpYl ...............................................IGsIVMNANPFThGHRYLlpQAAupC...D..W..LH..L..F..l..V....+...E...D....s..S....h.....F.s.Y..cDRhsLVhcGs.A..c..l...s...p...lTVH.p.G.S....-...YI....I...S...R...A...T...F..P..s..Y..Fl..KEpu..llsc..saspID..LpIFRpalAPALG.......I.THRFVG.oE.......P.......FCpVTspYN.QsM+hh.Lc.sss.s.ussIclVEItRl...........phpptsISAS+VRpLLscp....-.h.s.........AIusLVPssTLpYL............................................................................................. 1 27 54 77 +8051 PF08219 TOM13 Outer membrane protein TOM13 Mistry J, Wood V anon manual Domain The TOM13 family of proteins are mitochondrial outer membrane proteins that mediate the assembly of beta-barrel proteins [1]. 25.00 25.00 36.80 35.60 17.50 16.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.75 0.72 -4.22 16 121 2009-01-15 18:05:59 2005-04-01 16:42:45 6 2 120 0 92 112 0 81.80 45 43.57 CHANGED lshssDuEststhppps.s.pssusssh......slWull+uuuINLlLPFlNGhMLGFGELlAHEluFRasW......sGs+l.P.pRh ................................................................l...s-SEphs...s.pp...s...sssss..ssssh......................olaull+uuAINLlLPFlNGhMLGFGELhAHE.h.uFRh.GW.......sGsKlaP.pR+................. 0 22 47 77 +8052 PF08220 HTH_DeoR DeoR-like helix-turn-helix domain Bateman A anon Bateman A Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.51 0.72 -4.43 15 15183 2012-10-04 14:01:12 2005-04-05 17:40:47 7 49 3318 0 2086 11826 840 55.70 33 21.64 CHANGED RpppIlchlpppGplolcELspthsVSstTlRRDLscLpppGl.lpRsHGGAthsssp ...........................RpptIl.p.h.l.....p..p..p...s..t..l..s.l..p.....-..L.u.p..t.....h..s.V..Sp...tTIRRDLs........p....L..pp.....pG....h....l...p...R...s....+G.GAhh............................... 0 573 1175 1649 +8053 PF08221 HTH_9 RNA polymerase III subunit RPC82 helix-turn-helix domain Moxon SJ, Bateman A anon Pfam-B_9884 (release 8.0) Domain This family consists of several DNA-directed RNA polymerase III polypeptides which are related to the Saccharomyces cerevisiae RPC82 protein. RNA polymerase C (III) promotes the transcription of tRNA and 5S RNA genes. In Saccharomyces cerevisiae, the enzyme is composed of 15 subunits, ranging from 160 to about 10 kDa [1]. This region is a probably DNA-binding helix-turn-helix. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.67 0.72 -4.10 18 285 2012-10-04 14:01:12 2005-04-05 17:50:21 6 11 249 2 200 310 6 60.60 30 11.07 CHANGED pLsstllcpaaG-lsupVsstLlppGpholpplscpsp..LshspV+puLssLlQaphVpYhtp .......................LsshllpppFG-l...ssclhstL.lp.p.G...p.s....lppls..p............pop.....ls....hc.pV+puLslLlQpslV.a............................. 0 63 106 160 +8054 PF08222 HTH_CodY CodY helix-turn-helix domain Moxon SJ anon Pfam-B_7573 (release 9.0) Domain This family consists of the C-terminal helix-turn-helix domain found in several bacterial GTP-sensing transcriptional pleiotropic repressor CodY proteins. CodY has been found to repress the dipeptide transport operon (dpp) of Bacillus subtilis in nutrient-rich conditions [1]. The CodY protein also has a repressor effect on many genes in Lactococcus lactis during growth in milk [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.02 0.72 -4.63 3 1097 2012-10-04 14:01:12 2005-04-05 17:50:46 6 4 1087 3 131 519 14 60.70 76 23.39 CHANGED pEGRLTASsIADRIGITRSVIVNALRKLESAGIIESRSLGMKGTYLKVLN-pFl+ELcchK ...........EGhLsASpIADRlGITRSVIVNALRKLESAGlIE.S.R....SLGMKG.TYlKVL.ps.chh-ELcK......................................... 0 59 91 113 +8055 PF08223 PaaX_C PaaX-like protein C-terminal domain Fenech M anon Pfam-B_9563 (release 14.0) Family This family contains proteins that are similar to the product of the paaX gene of Escherichia coli (Swiss:P76086). This protein is involved in the regulation of expression of a group of proteins known to participate in the metabolism of phenylacetic acid [1]. 21.00 21.00 21.10 21.10 20.60 20.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.97 0.71 -4.57 63 785 2009-11-26 12:00:55 2005-04-06 08:39:11 6 3 636 5 203 603 67 159.20 26 55.33 CHANGED tsW.......cGpWpllhh...sptptptRcplRcpLphhGFGplusulalpPsshtt...slpthltchshpsps..hhpsh......s....shpphlppsWsLsplsptYcpFlppapshh...............tptpt..hsstpsahtRhLLlHpaR+hLlpDPhLPpcLLPscWsGtsARpLhpslappltss .....................................................................................tWcGpWhLllh...sthcpss+tpl+cp.....LthhGFGsLt....sulahpPsphtt.......................slpp.h...lt...c...h...sls...s...pl...hhhpup.....hsps.........shpshlp.....phWc...LsphsttYcp....Flppapshh...............ttttp....hsstps.hhhphlLl..ct...aRRhlhpDPh.LPt-LLPscWsGtpA.Rphhhshhpplt..h................................................................ 0 55 122 166 +8056 PF08224 DUF1719 Domain of unknown function (DUF1719) Mistry J, Myung-il K anon Pfam-B_27966 (release 16.0) Domain This is a domain of unknown function. It may have a role in ATPase activation. 25.00 25.00 29.10 26.60 23.20 22.70 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.27 7 104 2009-01-15 18:05:59 2005-04-07 11:11:33 6 3 7 0 74 84 0 200.20 37 46.05 CHANGED +pSuFPRRIAHATKShlSS.lhhsshDt.sopSsVRRFEhhADGAs-FLR.VEhGGo..P+pYhFFDPLIuHLLAGcsLcYchlp.Gsp.+hhhIRPhshtERGlEAhlhFshpsspsPccsFhLGhlLplSESTsllGhlI+CLQ.hhsPHF+SssEsV+pELsQLPTQDhoWl.sas..hc.haWDslHshtTpWaRPNPlCCppHs....t..toushshhtl.sh..psVIpl.lppal ............................psuFP+RlA...+As+ShlSS..hh..t.......t...s.-...p....op.............osVpRFEhFADGAs-FLRhl.E.h..Gus..spp..a.......h.h..hDPLlt+L.LAGctLpYc.h.hp....G....sp..............h.hh.hl..t..P.......h..s.ht-RGl....EAtlhhhhpDt..pts-psFhLshhLplSE.STsllGhhlpCLp...hhs...P..pFc..s.ssEss....+pcLspL.ppch...hh..P....h..........p......ahppl....Hph.ophhRPsPhCCcp.pp.........t...........t...t...t...t...t......th...EslIpl.hpt................................ 0 0 36 58 +8057 PF08225 Antimicrobial19 Pseudin antimicrobial peptide Rossi R anon Short protein clustering Family Pseudins are a subfamily of the FSAP family (Frog Secreted Active Peptides) extracted from the skin of the paradoxical frog Pseudis paradoxa (Pseudidae). The pseudins belong to the class of cationic, amphipathic-helical antimicrobial peptides [1]. 19.40 19.40 21.20 56.60 19.10 17.10 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.45 0.72 -6.81 0.72 -4.47 2 4 2009-01-15 18:05:59 2005-04-08 11:48:24 6 1 1 0 0 4 0 23.00 78 97.87 CHANGED GlNTLKKVhQGLHEsIKLlsNHs GlNTLKKVhQGLHEsIKLlsNHs. 0 0 0 0 +8058 PF08226 DUF1720 Domain of unknown function (DUF1720) Mistry J, Wood V anon Pfam-B_19709 (release 16.0) Domain This domain is found in different combinations with cortical patch components EF hand, SH3 and ENTH and is therefore likely to be involved in cytoskeletal processes. This family contains many hypothetical proteins. 21.40 21.40 21.40 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.26 0.72 -11.42 0.72 -3.92 66 116 2009-01-15 18:05:59 2005-04-13 14:45:25 6 24 75 0 89 133 0 78.20 31 8.31 CHANGED hQP...............................QtTG............................pts..........sl.....psQsTG............hpP....................Q.TGh........................lpP....QtTGh..............................................t......h.pPQsT..............Ga ..................................................................s.ttp.....tlpsQ....TG...........hpP..........Q.TGa.....s.tt.....................LpPQpTGF........................t.h.pPQ.TG............................................ 0 28 53 79 +8059 PF08227 DASH_Hsk3 DUF1721; DASH complex subunit Hsk3 like Mistry J, Wood V, Finn RD anon manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. This family also includes several higher eukaryotic proteins. However, other DASH subunits do not appear to be conserved in higher eukaryotes. 24.00 24.00 24.70 28.90 23.50 23.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.82 0.72 -3.77 16 165 2009-01-15 18:05:59 2005-04-13 16:15:21 6 2 133 0 96 142 0 45.60 43 20.17 CHANGED pRphupLtuQLspLpsNLuchpphLchsuhQs..pshctLGshpuuh ...sRphspLhhp.ptLstNlAphsp.LchhSlQs..hc.+FLuuhshu...... 0 16 37 68 +8060 PF08228 RNase_P_pop3 RNase P subunit Pop3 Mistry J, Wood V anon manual Family This family of fungal proteins form a subunit of RNase P, the ribonucleoprotein enzyme that cleaves the leader sequence of precursor tRNAs to generate mature tRNAs.\ The structure of Pop3 has been assigned the L7Ae/L30e fold [1].\ This RNA-binding fold is also present in human RNase P subunit Rpp38, raising the possibility that Pop3p and Rpp38 are functional homologs. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.02 0.71 -4.40 5 88 2012-10-10 14:40:03 2005-04-14 09:40:32 6 1 88 0 67 163 1 156.70 25 65.50 CHANGED KR+QVYKPVL-NPaTNEAchWP+Vc-Q.lllELLpspll+sLl+hpc...Kss.-pclssGaNEIl-lLu..pusSc-V.....hLFVCs+D...PSVLloQlPLLstsushs....VsLVQLP+uupA+Fc-+l.GhS+s...GMLLV+ccAslDpsFsshlpspVEphshPWLcs ........................................................................................................................................................................................................................................p....l............hhu......hNplsphLp........ptss..p.h.................................hlFVs+pD..p.P.sl..LhpphPhLsh.h....Asts......................l+LVtLPKuu.splussL..Gl.s+s.....uhlulp..s.ss.s.hs.ps.LhchlpppVs.....l....p..hPWLp......................................................... 0 18 38 58 +8061 PF08229 SHR3_chaperone ER membrane protein SH3 Mistry J, Wood V anon manual Domain This family of proteins are membrane localised chaperones that are required for correct plasma membrane localisation of amino acid permeases (AAPs) [1]. SH3 prevents AAPs proteins from aggregating and assists in their correct folding. In the absence of SH3, AAPs are retained in the ER. 25.00 25.00 46.70 38.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.15 0.71 -5.35 17 149 2009-09-11 11:37:31 2005-04-14 11:25:11 6 2 130 0 110 144 0 193.30 34 90.31 CHANGED Yp-hssluTuL.IlsuToFhLGllFushPYDashLWssssT.tptFDhuLpHYphhtsoP.hlhalLahVhhLGllGthIKLYKPst-spLF-YuSLsLYhlulslYlTNl+pGlpsshsGsWG.................-VspppGlsVlAASplhlhllLlGVLlLQuGhWYApac-ppphcpFhpcE.....tttttptppptcsppppspppK ...................................sshuThl.IlssosFhLGllFupaPYDaslLWsssso..ssa.........a.-hh.pHhp.h.las.......o.....Ps...hl.tlLpl...VhhlGllGhhhKLa.KP....sEushhFDGuSLsLYhhulsVY.lsNlhpGlcssss..staG..........................................................l..scps...u....LpV..l..AASNsIlsllLlGVLlLQuGpWY.Acpc-ppchcph.tcc.........tpttttttt...................pt............................. 0 35 64 95 +8062 PF08230 Cpl-7 Cpl-7 lysozyme C-terminal domain Garcia Lopez E, Bateman A anon Garcia Lopez E Domain This domain was originally found in the C-terminal moiety of the Cpl-7 lysozyme encoded by the Streptococcus pneumoniae bacteriophage Cp-7 (Swiss:P19385). It is assumed that these repeats represent cell wall binding motifs although no direct evidence has been obtained so far. 25.00 25.00 39.20 25.30 22.80 23.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.92 0.72 -4.23 14 202 2009-01-15 18:05:59 2005-04-14 13:01:08 6 31 126 0 22 178 7 40.60 50 18.33 CHANGED lcplApEVIpGpaGNGp-R+psLt..GaDhcAVQscVNplLs ..................lDplApEVIpG..caGNGp-R+p+L...GhsYssVQp+VNchL..... 0 8 8 13 +8063 PF08231 SYF2 SYF2 splicing factor Mistry J, Wood V anon Pfam-B_11988 (release 17.0) Domain Proteins in this family are involved in cell cycle progression and pre-mRNA splicing [1] [2]. 22.40 22.40 22.80 24.20 20.70 22.30 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.22 0.71 -4.09 26 339 2009-01-15 18:05:59 2005-04-14 14:01:54 7 5 283 0 231 317 2 149.80 35 56.19 CHANGED cppGhDh-Rp+.hhshospcsEchcc+ccKKcpp..s.uassaspps.h+tYc+ph+sh..phDh-pYc+pKcphsct.............................FYsssssh..s.....pppPsp-A.l-+llpslc+pp....................cpRpKhpR+Rttc--t...DlsYIN-RN+pFNcKLsRaYsKYTsEI+pNLE ......................................................................t.ttGpDh.-+h+.hhchos--uE+..a-+Khc+Kptc...hGFs.sYsp.ts...h....+tYc+hh+pl........psDhEpY..c+p+ppt.s-t..........................................Fassus...o..lh..au...ppt..s.s.pcs..lDRhVp-Lccp.....................c+...RtKhpRR.Rhhs..--s.....Dl..s.YI.NE+NpcFNcK....lpRaYsKYTtEI+pslE.......... 0 79 127 188 +8064 PF08232 Striatin Striatin family Mistry J, Wood V anon Pfam-B_7946 (release 17.0) Family Striatin is an intracellular protein which has a caveolin-binding motif, a coiled-coil structure, a calmodulin-binding site, and a WD (Pfam:PF00400) repeat domain [1]. It acts as a scaffold protein [2] and is involved in signalling pathways [1] [3]. 27.00 27.00 27.30 32.00 26.70 26.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.79 0.71 -3.85 31 401 2009-01-15 18:05:59 2005-04-14 16:11:00 7 11 230 0 232 376 0 125.40 42 18.23 CHANGED TLsGVh+aLQoEWp+hER-RstWElERuEM+uRIApLEGEp+s.cthppcLt++lchLEhslKpcRs+hpp............................................tt..t.tt..t.t.tsst.sp.stpspsphpcuR.hLpcshpElsY..hllss..s .........olsGlh+alQpEWsRaEh-RspWElERAEhp.............A+IAhLpGER+upEs.LKpDL..sRRIKMLEaALKQE..R.AKh++L+hGs-hs........................................................................................ptp...........p..ps....tsp.p.t.st.......ttssphth+puRphL+........Q..a..LpElGY..pIlssp.s............................................................................................................ 0 64 103 170 +8066 PF08234 Spindle_Spc25 Chromosome segregation protein Spc25 Mistry J, Wood V anon Pfam-B_14484 (release 16.0) Family This is a family of chromosome segregation proteins. It contains Spc25, which is a conserved eukaryotic kinetochore protein involved in cell division. In fungi the Spc25 protein is a subunit of the Nuf2-Ndc80 complex [1], and in vertebrates it forms part of the Ndc80 complex [2]. 22.30 22.30 23.60 22.30 21.70 20.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.27 0.72 -3.98 58 256 2009-09-23 13:20:45 2005-04-15 13:01:43 7 7 219 2 194 247 3 73.90 28 28.58 CHANGED lcshts-p..l+FhFspl-sp-.p+E..hthslp.lss..ppYclhpssPpl...pslscllpcLN....cspshhtFl+phRctFtp ...........................hchhtt-p.LcFlFsp..lD.ps.-+c..hhFpLs..lsp.....ccYclhcspPpL....pslpcllcclN.......copshssFL+phRctFht............ 0 62 110 157 +8067 PF08235 LNS2 LNS2 (Lipin/Ned1/Smp2) Mistry J, Wood V anon Pfam-B_2646 (release 16.0) Domain This domain is found in Saccharomyces cerevisiae protein SMP2, proteins with an N-terminal lipin domain (Pfam: PF04571) and phosphatidylinositol transfer proteins [1]. SMP2 (also known as PAH1) is involved in plasmid maintenance and respiration [2], and has been identified as a Mg2+-dependent phosphatidate phosphatase (EC:3.1.3.4) that contains a haloacid dehalogenase (HAD)-like domain [4]. Lipin proteins are involved in adipose tissue development and insulin resistance [3]. 26.70 26.70 27.50 26.90 26.30 26.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.75 0.71 -4.66 16 814 2012-10-03 04:19:28 2005-04-15 14:14:50 8 20 307 0 473 726 11 147.40 44 17.04 CHANGED VVlSDIDGTIT+SDsLGHhlshhG+D.Wo+sGlscLapclppNGYpIlYlTuRulGQAcpT+uYLppl.pts..pLPcGPllLSPsthhsuhhRElIh++PchFKhusLp-IpsLa............sppPFaAGFGN+.sDshuYpsVGlPssRIFhlNspGElhhp ...............................lVlSDIDGTIT..+SD..s......L.Gpl.l...........s...h...lG+..D....Wo..+.........tGlscLa.p..cl............ppNGY.phlYlouRul.uQAchT+uaLptl.p..t....s...............h....t...............LPcGPl.hhSPsphhsu.hp..REll..p+..cPc.tFK..hssL.p.........DIpsLF.............................................pppPFYAuFGNR.sDlhu..Yp..............pVGlP...s+IFhlsspuclh............................................. 0 132 208 346 +8068 PF08236 SRI SRI (Set2 Rpb1 interacting) domain Mistry J, Wood V, Sammut SJ, Bateman A anon Pfam-B_106465 (release 17.0) & pdb_2a7o Domain The SRI (Set2 Rpb1 interacting) domain mediates RNA polymerase II interaction and couples histone H3 K36 methylation with transcript elongation [1]. This domain is conserved from yeast to humans. Members of this family form a compact, closed three-helix bundle, with an up-down-up topology. The first and second helices are antiparallel to each other and are of similar length; the third helix, which is packed across helices alpha1 and alpha2 is slightly shorter, consisting of only 15 amino acids. Most conserved hydrophobic residues are largely buried in the interior of the structure and form an extensive and contiguous hydrophobic core that stabilises the packing of the three-helix bundle. This domain mediates RNA polymerase II interaction and couples histone H3 K36 methylation with transcript elongation [2]. 20.90 20.90 20.90 21.90 20.70 20.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.96 0.72 -4.04 25 233 2009-01-15 18:05:59 2005-04-15 16:03:34 6 10 207 2 167 246 1 90.50 28 7.69 CHANGED psspphccpacp..hlupaVsNhlcKYccc.......ls+-shKphA+-ls+tLssKEhK.......pss...spsPstEloccpp+KlKpFs+pYMDKhltKhcp+c ..............t.tc+pcchacp..phutalsphlsta++p.......hscE-...hK+.h.A+cls+tlssK.E.hK.......psc......ss.shcls-......phc+KlKcalKcYhpKhssha+t......... 0 52 85 132 +8069 PF08237 PE-PPE PE-PPE domain Mistry J, Adindla S anon manual Domain This domain is found C terminal to the PE (Pfam:PF00934) and PPE (Pfam:PF00823) domains. The secondary structure of this domain is predicted to be a mixture of alpha helices and beta strands [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.41 0.70 -4.93 32 715 2012-10-03 11:45:05 2005-04-19 14:46:55 6 11 138 0 135 708 1 213.70 31 45.55 CHANGED P..shs...h.hl...hPt........thh......hssh..........ohspSlutGsstLpsAlpst...................ssss...hslhGaSQGAhlsst.htpLts..ssss....ssloFlhlGsPtps....sGGlhspass..hhlPh.....tlsassssPsss.........Ys....ThplstpYDuh.ADhPshPhNllAssNAlh.......G.hhhlH.........s.......sa.s...........s..s.....stshs.s.spstsusTTYhhlPsp..pLPLL.PL+.l.s.......lssslsshl-ssL+slV-tGYs ..................................................................psl...hPt....................phhPhss.......lssh....................ohs..pSV.spGs.s..Lc.sAI..t...s...t......................ssss...lsV...h...GaSQuAhlsshthpp..Lts...s.ssss.........................sspL.s.F.l.h.lGsPtpP..............sGG..lhtpas......s.............hh....lPh....................l.s..assssPsss......................Ys..........T.h.hsh.pYDGh.uD...h...Pp...h...P...h...Nl.l.....u.s.h.NAlh...........G..hhh..sH............s...........sa..shs......................................................stt...........stsl.hs..ss..uhs...usTs.YYhl.sp..pLPLL.PLRtl..s...............lsss...lsshlpssL+slls.hGY....................................................................................................................................... 0 19 73 122 +8070 PF08238 Sel1 Sel1 repeat Bateman A anon Pfam-B_49 (Release 17.0) Repeat This short repeat is found in the Sel1 protein [1]. It is related to TPR repeats. 21.10 2.40 21.10 6.80 21.00 -999999.99 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.33 0.72 -3.19 594 40768 2012-10-11 20:01:00 2005-04-21 12:03:46 7 502 2506 23 13860 42646 6566 35.20 26 38.95 CHANGED spAthpLG........h...pG...........l.p....s....hpp..AhpaappAAppGp .......................................AthtLG..............hha.hpG.t.....................G..ltp.........D..............hpp.......Ah..paappAAptG................................. 0 6224 9270 11773 +8071 PF08239 SH3_3 Bacterial SH3 domain Bateman A anon Pfam-B_178 (Release 17.0) Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.82 0.72 -3.78 263 10209 2012-10-02 18:48:24 2005-04-21 15:21:09 6 416 2914 9 1841 9471 1861 54.50 23 20.20 CHANGED ssl..slR..ssP.sts...up..hlsp.......l..tG..pplpl....lp..ppss................W..hcl....................t.suh.p....Gal..t..sshlp ............................s..l..slR..ssP..uss...sp....llsp...................l..ppG......pp.lpl.............ls....ppss.........................................W.......hcl.......................p.sGp..p..........Gal..s...sphl........................................... 0 797 1353 1602 +8072 PF08240 ADH_N Alcohol dehydrogenase GroES-like domain Bateman A anon Pfam-B_7 (Release17.0) Domain This is the catalytic domain of alcohol dehydrogenases. Many of them contain an inserted zinc binding domain. This domain has a GroES-like structure [1-2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.71 0.72 -4.29 92 42970 2012-10-01 22:45:51 2005-04-21 15:24:04 7 521 5327 455 13369 34065 8204 101.00 25 23.28 CHANGED ss-Vll+lpssGlCsoDl..phhpG...............hhsphP.......hlhGHEhsGhV.p..lG.ssVps......hphG-+Vsl...shhs.C........up..............Ct.Cpp.Gp..shC.........................tphphhGhs..........hsGuaAEY.lhlP.......tptlh.l ..................................................s-Vll+lp.s.sGl..s.t..s......Dl.....th..h.pG.........................................h...ht.hP..............h.l..h.G...H...E...........s.....s..G.........h..Vh..p....l...G....s...s..V.s.s..............................h.c..sG...D.........+.V..s.s.......................s...........................................s.........s.....................t............s...............................................................................................u.....t..p..h...h.............h............................................................................................................ 0 3704 7884 11039 +8073 PF08241 Methyltransf_11 Methyltransferase domain Bateman A anon Pfam-B_21 (release 17.0) Domain Members of this family are SAM dependent methyltransferases. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.13 0.72 -3.50 172 22723 2012-10-10 17:06:42 2005-04-22 12:24:16 7 278 4590 31 8318 65769 22557 94.00 22 33.45 CHANGED LDlG...sG.sGhhsthlspt.........stplhulDhstp.......hlp......hucpphttt.....................hhtus.......sppl..Pht.-s..oFD.hl...hsttsl..pah.......pc.ppslpEhtRlL+PGGhlhh .......................................................................................................................L-lG...sG...sG..h..h..s...h..h..htpt........................stp.l....h..u......l....D...h...stt..............................h.lp...................h.u..p..p....p..h..t..t.t.t..t............................................................hht.u..s......................s.p..p..L...........P.....a.....s.....-....s............o......F...D.....h.l.....................h..s....t.....t.....s....l........pah..........................tc..h...t...p......s...l...p.....E....h....t......R....V....L.+.P.G.Ghhh........................................................................................... 0 2885 5512 7158 +8074 PF08242 Methyltransf_12 Methyltransferase domain Bateman A anon Pfam-B_46 (release 17.0) Domain Members of this family are SAM dependent methyltransferases. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.27 0.72 -3.52 108 3160 2012-10-10 17:06:42 2005-04-22 12:25:33 7 531 1307 4 1307 38110 12780 102.00 25 6.03 CHANGED L-lGsGsGthhthlhpt...................thphh.uhDhSsthl.....hstpchtphph.th.thphtthshhp..............................saDl....ll...sh.sllcah.........tshp.psLp..plt.phLps.uGhl ...................................................................LElG.uG.o.G....s...s.o.t.h.ll..pth.......................................th.c.Y.s....hoD...l......S...s.shl...........ttA.p..p............+....h...s....s....h........s..............h......t......h......t......h....h.....c..h.p..p..h..s..h..t.p.t.........................................................................................tsaD.l.........l...l......us....s.....V.L.Hss......................ppl...p...psL.p....plp...pl...L.p.P.GGhl........................................................................................... 0 345 748 1069 +8075 PF08243 SPT2 SPT2 chromatin protein Mistry J, Wood V anon manual Domain This family includes the Saccharomyces cerevisiae protein SPT2 which is a chromatin protein involved in transcriptional regulation [1]. 20.90 20.90 20.90 22.10 20.30 20.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.95 0.71 -3.78 7 281 2009-09-11 05:41:46 2005-04-22 12:54:49 6 7 216 0 195 268 0 106.40 30 22.63 CHANGED Kpsshsh..ptpSt+t...s.ph-s-pDpchpcFIcD-cE-psch..........pccIhpIFshs+p+....ap..D--D.....MEAshtEl.cEEpRutRhAcLEDccE.ph.cEctppK+t+Kp+ ...........................................................tt.........................pcp.cc...-....D...p-h..-.D..F..I...-D-t-ppc.p.......................ppt.IhthFs.h.s+p+.......Y..p....D.-....sD.......ptMEuuat-lpcEEt+St+lutpEDhcE....hchEcccp+c.Kct+Kt......... 0 57 95 152 +8076 PF08244 Glyco_hydro_32C glycosyl_hydro2; Glyco_hydro_32; Glycosyl hydrolases family 32 C terminal Finn RD, Mistry J anon Prosite Domain This domain corresponds to the C terminal domain of glycosyl hydrolase family 32. It forms a beta sandwich module [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.12 0.72 -3.60 178 3225 2010-01-08 16:43:10 2005-04-25 13:01:37 7 49 1933 59 570 2522 88 88.70 22 16.56 CHANGED hcls............................ssp.....th...Gltlhht..sstpcpstlhac................t.tppplslDRspou....ts.......ht.thss......hctshhtt...................tth..........................pL+lhlDcSslElFsssG ....................................................................................................ththtt..................h......th..l.h......tst..scthtlh.ac.............................t.tps..plslDR.opuu........t....................ht.thssh................scss.hsp..............................................................pt................................pLclhlDpSslElFsNsG............... 0 129 322 462 +8077 PF08245 Mur_ligase_M Mur ligase middle domain Bateman A anon Pfam-B_26 (release 17.0) Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.40 0.71 -4.25 92 26660 2009-09-15 12:35:17 2005-04-26 11:46:18 7 94 4735 65 5962 19905 11199 195.10 21 41.78 CHANGED lTGTNGKTTTsphltpll..ptt....st.hhsosG................s..t.lGlshhhhthtp.....tschhlhElSSa.......plpth.......hcsclu..lloNlsp-HL-..ha.toh-s.YhpuKtcla.ptht.ts......hsllN..h....DDt.....hhhthhpptthp.sl.sauhpsps...............lth..pssph...................................hh.h............hpl...slhG....p...aNltNsLAAluss ...................................loGTsGKTTTs...s..hl.splL.....pts.............Gh..ps...t.s...h.G.....................................................................s............l...s.h.s..h..h.h..h...h..htp........................tsc.h.hVh...Esu...p.t..................tplsth....................h.pP...p...lu.........l..l..TN.......l.s.h.D.Hh....-........ha....s.......s......h..-.s....ht.p.s.Ktp.....lh...c.t...h.............ts................h.s.l.ls....t.....-cs..............................hhp.h..h.....t...t..t....t...t...p......hh...t..a.u.h.p.tts....................th..t...tlt.h.........p..s.t.t.h................................................................................p.h.h..h............t...ht........hpl.......sl.G..........p......aNh....N.uhsAluh........................................................................................................................................................................ 0 2011 3956 5101 +8078 PF08246 Inhibitor_I29 Cathepsin propeptide inhibitor domain (I29) Bateman A anon Pfam-B_14 (release 17.0) Domain This domain is found at the N-terminus of some C1 peptidases such as Cathepsin L where it acts as a propeptide. There are also a number of proteins that are composed solely of multiple copies of this domain such as the peptidase inhibitor salarin Swiss:Q70SU8. This family is classified as I29 by MEROPS. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.98 0.72 -3.70 95 3417 2009-01-15 18:05:59 2005-04-26 17:20:04 7 34 536 17 1591 3509 92 58.50 30 16.86 CHANGED FppahpcasKp..Y...............tsppE....ptpRh.phFtpNhphlppaN...........pspts.......a...ph........................................ulNcFuDhopcEa ............................appattpas+s.....Y........................tst.pE.....ctp..Rh.plFc.....cNhchIcpaN...........................tspto..............a......ph..........................................ul.N...pFuDhTp-EF........................................................................................ 0 651 976 1322 +8079 PF08247 ENOD40 ENOD40 protein Rossi R anon Short protein clustering Family Rohrig et al. reported the in vitro translation of two peptides of 12 and 24 amino acids from the short, overlapping ORFs of soybean ENOD40 mRNA [1]. The putative role of the enod40 genes has been in favour of organogenesis, such as induction of the cortical cell divisions that lead to initiation of nodule primordia, in developing lateral roots and embryonic tissues. This supports the hypothesis for a role of enod40 in lateral organ development [2]. 20.40 20.40 22.70 22.70 17.80 16.30 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.84 0.74 -6.01 0.74 -3.79 4 11 2009-01-15 18:05:59 2005-05-03 16:13:00 6 1 7 0 0 9 0 12.30 76 100.00 CHANGED McLCWQpSIHGS ...McLCWQKSIHGS 0 0 0 0 +8080 PF08248 Tryp_FSAP Tryptophyllin-3 skin active peptide Rossi R anon Short protein clustering Family PdT-3 or Tryptophyllin-3 peptide is a subfamily of the family Tryptophyllin and of the superfamily FSAP (Frog Skin Active Peptide). Originally identified in skin extracts of Neotropical leaf frogs, Phyllomedusa sp. This subfamily has an average length of 13 amino acids. The pharmacological activity of the tryptophyllins remains to be established [1] but it seems that these peptides possess an action on liver protein synthesis and body weight [2]. 17.70 17.70 23.10 23.10 12.50 10.90 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.28 0.74 -5.89 0.74 -3.17 2 8 2009-01-15 18:05:59 2005-05-04 09:25:07 6 1 5 0 0 8 0 12.00 75 91.43 CHANGED EKPaaPPPIYPh D.KPFWPPPIYPh. 0 0 0 0 +8081 PF08249 Mastoparan Mastoparan protein Rossi R anon Short protein clustering Family Mastoparans are a family of tetradecapeptides from wasp venom, that have been shown to directly activate GTP-binding regulatory proteins. These peptides show selectivity among G proteins: they strongly activate Go and Gi but not Gs or Gt. The peptide of this family are composed by 14 amino acids but they can assume different structures [1]. 21.00 21.00 21.40 21.40 20.80 20.70 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.67 0.74 -6.03 0.74 -2.95 5 10 2009-01-15 18:05:59 2005-05-04 09:45:41 6 1 8 3 0 12 0 14.00 72 84.34 CHANGED INLKAlAAlAKKlL INLKAIAAhAKKLL 0 0 0 0 +8082 PF08250 Sperm_act_pep Sperm-activating peptides Rossi R anon Short protein clustering Family The sperm-activating peptides (SAPs) are isolated in egg-conditioned media (egg jelly) of sea urchins. SAPs have several effects on sea urchin spermatozoa: stimulate sperm respiration and motility through intracellular alkalinization, transient elevation of cAMP, cGMP and Ca++levels in sperm cells [1,2]. 18.00 18.00 18.20 18.20 17.60 17.60 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.36 0.75 -5.62 0.75 -3.07 11 21 2009-01-15 18:05:59 2005-05-04 09:46:47 6 1 8 0 0 10 0 10.00 75 100.00 CHANGED GFuLuGGGVG GFsLsGGGVG 0 0 0 0 +8083 PF08251 Mastoparan_2 Mastoparan peptide Rossi R anon Short protein clustering Family Mastoparan (MP) peptides I II and III are extracted from the venom gland of the Neotropical social wasp Protopolybia exigua(Saussure) They are tetradecapeptides presenting from seven to ten hydrophobic amino acid residues and from two to four lysine residues in their primary sequences. These peptide cause the degranulation of mast cells. Protopolybia-MP-I also act causing hemolysis in erythrocytes. 25.00 25.00 26.10 26.10 16.50 15.00 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.18 0.75 -6.33 0.75 -3.35 2 5 2009-01-15 18:05:59 2005-05-04 09:49:55 6 1 3 0 0 5 0 14.00 76 100.00 CHANGED INWLKLGKtV.shL INWLKLGKtVIDAL 0 0 0 0 +8084 PF08252 Leader_CPA1 arg-2/CPA1 leader peptide Rossi R anon Short protein clustering Family In this family there are Leaders Peptides involved in the regulation the glutaminase subunit (small subunit) of arginine-specific carbamoyl phosphate synthetase. In Neurospora crassa it is a small upstream ORF of 24 codon above the arg-2 locus [1]. In yeast it is the leader peptide of the CPA1 gene. The 5' region of CPA1 mRNA contains a 25 codon upstream open reading frame. The leader peptide, the product of the upstream open reading frame, plays an essential, negative role in the specific repression of CPA1 by arginine [2]. 18.50 18.50 19.20 22.90 18.20 17.10 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.86 0.72 -6.75 0.72 -4.63 3 17 2009-09-10 15:06:03 2005-05-04 09:54:31 6 2 17 1 9 13 0 23.50 64 40.24 CHANGED hstSsSQYTCQDYISDHIWKApSH ....FphS.SpYTCQDYISDHIWKoSS... 0 3 7 9 +8085 PF08253 Leader_Erm Erm Leader peptide Rossi R anon Short protein clustering Family These short proteins are Leader peptides (15-19 amino acids) of erm genes that code for resistance determinants in Staphylococcus aureus [1]. 21.00 21.00 24.90 24.40 19.60 18.50 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.28 0.72 -6.18 0.72 -4.47 5 32 2009-01-15 18:05:59 2005-05-04 11:41:20 6 1 23 0 1 13 0 18.80 80 82.42 CHANGED MGhFSIFVIsTVHYQPNcK MGhFSIFVINTVHYQPNcK. 0 0 0 1 +8086 PF08254 Leader_Thr Threonine leader peptide Rossi R anon Short protein clustering Family Threonine leader peptide of the Threonine operon thrA1A2BC. It as been sequenced in different bacteria: E. coli, Serratia marcescens, Salmonella typhi [1,2]. 25.00 25.00 25.40 26.20 18.50 16.70 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.04 0.73 -7.30 0.73 -4.09 2 68 2009-01-15 18:05:59 2005-05-04 11:43:54 6 1 68 0 8 11 0 21.20 87 98.63 CHANGED M+hIS..TTIhTThp.TTG.GAG MKRIS..TTIhTTITITTGNGAG 0 1 2 5 +8087 PF08255 Leader_Trp Trp-operon Leader Peptide Rossi R anon Short protein clustering Family The tryptophan operon regulatory region of C. freundii's (leader transcript) encodes a 14-residue peptide containing characteristic tandem tryptophan residues. It is about 10 nucleotides shorter than those of E. coli and S. typhimurium [1]. 25.00 25.00 26.70 28.10 23.40 22.50 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.06 0.74 -6.50 0.74 -3.84 4 51 2009-01-15 18:05:59 2005-05-04 11:48:22 6 1 51 0 5 7 0 14.00 71 91.30 CHANGED MsuhhuL+GWWRTS MKAIFVLKGWWRTS 0 1 2 4 +8088 PF08256 Antimicrobial20 Aurein-like antibiotic peptide Rossi R anon Short protein clustering Family This family of antibacterial peptides are secreted from the granular dorsal glands of the Green and Golden Bell Frog Litoria aurea, Southern Bell Frog L. raniformis, Blue Mountains tree-frog Litoria citropa (genus Litoria) and frogs from genus Uperoleia. They are a part of the FSAP peptide family. Amongst the more active of these are aurein 1.2, aurein 2.2 and aurein 3.1; caerin 1.1, maculatin 1.1, uperin 3.6 [1]; citropin 1.1, citropin 1.2, citropin 1.3 and a minor peptide are wide-spectrum antibacterial peptides [2]. 20.60 20.60 20.90 20.60 20.10 19.80 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.89 0.75 -5.92 0.75 -3.03 14 32 2009-01-15 18:05:59 2005-05-04 11:52:33 6 2 7 0 0 29 0 13.00 62 70.70 CHANGED GlhDlsKKVsGtl GLFDIlKKVsGsI.. 0 0 0 0 +8089 PF08257 Sulfakinin Sulfakinin family Rossi R anon Short protein clustering Family The sulfakinin (SK) family of neuropeptides have only been identified in crustaceans and insects. For most species there is the potential for producing two sulfakinin peptides one have a short sulfakinin sequence The function of the sulfakinins is difficult to assess. For the American cockroach, various forms of the endogenous sulfakinins have been shown to be active on the hindgut, and also on the heart. In C. vomitoria the peptides act as neurotransmitters or neuromodulators, linking the brain with all thoracic and abdominal ganglia. In adults of P. monodon they appear to be restricted to a few neurones in the brain with a neural pathway extending along to the ventral thoracic and abdominal ganglia [1]. 21.20 21.20 21.50 21.50 18.50 20.80 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.41 0.76 -5.79 0.76 -3.28 2 53 2009-01-15 18:05:59 2005-05-04 11:55:08 6 1 50 0 0 7 0 9.00 92 82.10 CHANGED .-DYGHMRF F-DYGHMRF 0 0 0 0 +8090 PF08258 WWamide WWamide peptide Rossi R anon Short protein clustering Family This family contain neuropeptides, isolated from ganglia of the African giant snail, Achatina fulica. Each peptide has a Trp residue at both the N- and C-termini. Purified WWamide-1, -2 and -3 showed an inhibitory effect on the phasic contractions of the anterior byssus retractor muscle (ABRM) [1]. 19.40 2.80 19.40 2.80 11.30 -4.50 hmmbuild -o /dev/null HMM SEED 7 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.27 0.77 -5.57 0.77 -2.79 2 19 2009-01-15 18:05:59 2005-05-04 13:22:39 6 2 2 0 0 18 0 7.00 83 27.71 CHANGED W+pMSVW WKQMSVW 0 0 0 0 +8091 PF08259 Periviscerokin Periviscerokinin family Rossi R anon Short protein clustering Family Abdominal Perisympathetic organs of insects contain Periviscerokinins neuropeptides of about 11 amino acids. 19.50 19.50 19.50 19.50 18.60 18.30 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.64 0.75 -5.78 0.75 -3.14 3 195 2009-01-15 18:05:59 2005-05-04 13:39:12 6 1 82 0 0 34 0 10.90 75 99.16 CHANGED GSSGLIPFPRV ..GSSGLIshPRV. 0 0 0 0 +8092 PF08260 Kinin Insect kinin peptide Rossi R anon Short protein clustering Family These neuropeptides are the first members of the insect kinin-family isolated from the American cockroach. Their occurrence in the retrocerebral complex suggests a physiological role as a neurohormone. The C-terminal sequence Phe-X-Ser-Trp-Gly-NH2 characterised the peptides as members of the insect kinin family. Data suggest a possible involvement of insect kinins in water-balance by regulating the osmoregulation. These peptides have length from 6 to 14 amino acids [1]. 19.70 19.70 21.90 21.90 17.50 17.50 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.55 0.76 -5.53 0.76 -2.79 3 5 2009-01-15 18:05:59 2005-05-04 13:46:48 6 1 2 0 0 5 0 8.00 79 100.00 CHANGED cPAFNSWG DPAFNSWG 0 0 0 0 +8093 PF08261 Carcinustatin Carcinustatin peptide Rossi R anon Short protein clustering Family A total of 20 peptides of the superfamily allostatin were isolated from the shore crab Carcinus maenas. They are named carcinustatin 1 to 20 and their length ranges from 5 to 27 amino acids. This family includes carcinustatin 8,9,15 and 16. 20.70 0.50 20.70 0.90 20.20 0.40 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.30 0.77 -5.42 0.77 -2.46 2 2 2012-10-01 21:03:17 2005-05-04 14:12:48 7 2 2 0 0 66 0 8.00 88 2.84 CHANGED uGPYuaGL ..AGPYuFGL 0 0 0 0 +8094 PF08262 Lem_TRP Leucophaea maderae tachykinin-related peptide Rossi R anon Short protein clustering Family These peptides are designated Leucophaea maderae tachykinin-related peptides (Lem TRPs). Some were isolated from the midgut of L. maderae, whereas others appear to be brain specific. The Lem TRPs of the brain are myotropic and induce increases in the amplitude and frequency of spontaneous contractions and tonus of hindgut muscle in L. maderae [1]. They were also isolated from brain-corpora, cardiaca-corpora, allata-suboesophageal ganglion extracts of the Locusta migratoria. They stimulate visceral muscle contractions of the oviduct and the foregut of Locusta migratoria [2]. 25.00 25.00 26.90 26.90 24.20 24.20 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.20 0.74 -5.42 0.74 -3.63 4 4 2009-01-15 18:05:59 2005-05-04 15:39:16 6 1 2 0 0 4 0 10.00 72 100.00 CHANGED APSMGFpGhR APSMGFpGhR 0 0 0 0 +8095 PF08263 LRRNT_2 Leucine rich repeat N-terminal domain Bateman A anon Pfam-B_35 (release 17.0) Family Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the N-terminus of tandem leucine rich repeats. 20.70 3.10 20.70 5.20 20.60 -999999.99 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.56 0.72 -3.99 101 5573 2009-09-16 14:48:15 2005-05-06 17:06:28 7 2088 260 5 3276 5555 7 41.00 31 5.53 CHANGED hss-tpuLLshKsulst.......ss....shLsuWsss........sssCs...WpGVsCs ........................p-tpALLphKp.u.ls....................ss.........shL..s..o...Wsss................sssCs.........Wp.GVsCs.............. 0 335 1979 2672 +8096 PF08264 Anticodon_1 Anticodon-binding domain of tRNA Bateman A anon Pfam-B_23 (Release 17.0) Domain This domain is found mainly hydrophobic tRNA synthetases. The domain binds to the anticodon of the tRNA. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.94 0.71 -4.46 171 17806 2009-12-15 13:50:21 2005-05-07 14:43:35 8 114 4976 36 4987 14537 7549 145.80 20 16.55 CHANGED D+alLsclpphlpplspsh-.p.apaspshptlhpa...hhsphsshYlchs+sph.t.....................tslhpslcthlplLsPhhPaloEEla.......................t..........t.l..hh.....tt.a..P..p..........ptph............................................hphh.tl....lphhcp....hR...........sphpls.tstp......hphhl ..........................................................pphhhpphppslp.p.l.s.c.s.h..-.......p..acFspAhp.tlhp.F..........hhsc.hsshY.l..-..hs.K..s.h.h...htttt.t......................s...................ssl...h...p...s...l...c...s...l...l...+...llsPhhPalsE...E..lW...........................................................................ttt........pol....hh.......ss...a...Pp..s.t.h.........p.th.............................................................................tt.thp.h.h..t.l.........lp.s.htp.........hR...............sc.h.pls..sh........h........................................................................................................... 0 1700 3159 4209 +8097 PF08265 YL1_C YL1 nuclear protein C-terminal domain Bateman A anon Pfam-B_3088 (release 8.0) Domain This domain is found in proteins of the YL1 family [1]. These proteins have been shown to be DNA-binding and may be a transcription factor [1]. This domain is found in proteins that are not YL1 proteins. 20.60 20.60 21.30 21.00 19.70 18.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.05 0.72 -4.38 51 540 2009-01-15 18:05:59 2005-05-07 14:55:52 6 11 291 0 386 523 1 29.80 42 8.37 CHANGED chCsITGh.ApYhDPpT.pl.Ytss-saphl .....hCsITGLsApYpDPpT.pL.Ysss-sFphI...... 0 114 198 309 +8098 PF08266 Cadherin_2 Cadherin-like Finn RD anon Pfam-B_179 (release 17.0) Domain This cadherin domain is usually the most N-terminal copy of the domain. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.54 0.72 -4.41 77 2331 2012-10-03 16:25:20 2005-05-09 16:18:59 7 43 53 2 1054 2531 1 83.80 38 9.36 CHANGED spl+YSVsEEscpGohVGNlAKDLGLslp-LusRshRllSpspcpahplshcoGsLllsE+lDREcLCupstsClLphplllEs ...........................................pl+YSlsEEhcpGohVGN.l.A.....c..D...L.....G....L....p.l.....t............c.....L......s...s....R.....p.....hR.ls..o..p..s......p......p.............phl.plshpsGt.L.h.l.s.c.+.IDR.E..pL..C...u........p....s.....s...t...Chlph-lll-......................... 0 88 224 576 +8099 PF08267 Meth_synt_1 Cobalamin-independent synthase, N-terminal domain Finn RD anon Pfam-B_553 (release 17.0) Domain The N-terminal domain and C-terminal domains of cobalamin-independent synthases together define a catalytic cleft in the enzyme. The N-terminal domain is thought to bind the substrate, in particular, the negatively charged polyglutamate chain. The N-terminal domain is also thought to stabilise a loop from the C-terminal domain [1]. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.71 0.70 -5.16 56 2913 2012-10-01 21:20:02 2005-05-09 16:57:19 7 5 2719 25 628 4722 465 302.40 41 42.29 CHANGED tspsLG.aPRIGtpRELK+AlEsYWpGchs...pp-LhpsucpLRtppWptQpssGlDh....lPsuDFoaYDaVLDtshhhGslPpRat.......t....sLDpYFshARGs....tsh...sAhEMTKWFsTNYHYlVPEhspsppFpls.ssphlp-hpEA.ptlG.h.....csKPVllGPlTaLhLuK....ststs....shhs.................LLppLlslYpclLpcLsstGsc.aVQlDEPsLshDl..spphhpshcpsYppL.........ttssscllLsTYFss.hscphshl..hpLPVs.GltlDlVc.uscpLssltp.ths..scKhLuAGll-GRNIW+sDLppsLphLppltpths ........................................................................................................h.sphLG.FPRlG.....p..RELKhAhEsYWt.....sc..ho.........c-...-Lhtsu+......-LRtcpWphQpp...s.G.l...Dh....lPs..sDFuaYD.pVLDsuhhh...GslPtRap............t..st..........ss....lDphFthuRGp...........................tsssAhEMTKWFsTNYHYlVPEas.p.s.........p..........pF.c......L.......s......h..s.......pll-EhpE.....A.....ps.L...G..h.............ps+PVLlGPl.....T...a...L..h...LuK...spsts........sths.............................................lLsc.LL.P.lYpplLscL.sc.t...........Gsc...WlQlDEP...s......L...V......h............DL........spp....h.......h......s....t....a....c......p.......s..Y.stL...................tpstsKlLLp..T..Y............F..ss..........l......t....c...........s..........h..stl....ss..L.P.Vs.GltlDhV.....c.....utcs.....L...s....tl...p.p......t.hs.................scphLsAGl....lsGRNlWRsDlppphshlcplht..t....................................................................................................................... 0 176 381 529 +8100 PF08268 FBA_3 F-box associated domain Finn RD anon Pfam-B_322 (release 17.0) Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.94 0.71 -4.41 51 607 2012-10-01 23:07:08 2005-05-09 18:41:09 7 19 34 0 459 913 0 116.00 20 32.80 CHANGED GlCING.VlYYhAhh..................sp.....phhlhsFDVRSEcF.shIph..................h..hthsLlsYcGKLuh..h..s..t........lclWVLE.DscK.pcWS.+pha...hhs.s..th.....hphplsGsTps.GE..llhh.........sphhppsFal .......................................................................hsG.hlYah.s.......................t.......p.hlhsFDlp.sE.pF..p.hlph............................................t.t.hpL..l..ph....p..G....+.Luh.....ht.hpp.t............................hcl.Wl......Lc.....D...h..c..c..p.p.Wo.+h.hh......h....................................................................................................................... 0 120 175 213 +8101 PF08269 Cache_2 Cache domain Finn RD anon Pfam-B_865 (release 17.0) Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.12 0.72 -4.01 34 2356 2012-10-01 23:40:40 2005-05-10 17:25:59 6 99 833 2 845 2155 121 91.30 24 17.47 CHANGED +sshhpp+cppLpshlptAhshlphhtp.sptsphsc-pApppupphl..............pslRa.ssssYFalhDppsthlhHPhpPcL.Gpshts.hpDspGstlhp ................................................................thhpt+ctplpphl.p.hAhshlp.....hhpts..t......t..sth.s....cp.pA..p....p.p..u...hphl..................................psh..+...a....s...s....s......s.Y.h.a.l..h........D.....t....p.....s...s....h...lh.HP.h.ps..c.h...G.p.....s.hh.s..hpDsp.Gphlh.................. 0 253 511 690 +8102 PF08270 PRD_Mga M protein trans-acting positive regulator (MGA) PRD domain Moxon SJ, Bateman A anon Pfam-B_5126 (release 7.7) Family Mga is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions [1]. This corresponds to the PRD like region. 21.00 21.00 22.50 22.50 20.70 20.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.10 0.70 -5.07 13 638 2012-10-02 16:05:11 2005-05-11 14:46:11 6 4 320 0 27 325 1 210.10 29 43.86 CHANGED spphlcchl...shplppo.hshh.shFpaaclLlulsaKR..+ta.lslPpstlh......cplpplhhhsplhpsspphlt.caslshspsslsYlFLsYloss.shu.sthhsppctcphhphhpchssaptLLc.lpctLshphss+pclhptL...saFp+p.lhshphLI.-hpthshptaptph.pLYptlcshlpcahpphst..ptplpppchahhshalEpl ...............s.p-.lsphl...shhhKposhshshspaphh+lLlshshhR..hsa.h-lspsph.......cphhshh.h.s...hh...ctstp.h-scaslshsp-sls.lFlsYhpsth.ls.p.hhpshccsphsc..hshp......hhs..pll-plptphtlphpN+spllhtL..........sahhRppLFs.phllh-p+t..slcpaps.a.phhss.lKp.lpcahpshph.....pshhspHhhYhhhh+hcpL........ 0 5 10 19 +8103 PF08271 TF_Zn_Ribbon TFIIB_Zn_Ribbon; TFIIB zinc-binding Finn RD anon Pfam-B_1298 (release 17.0) Domain The transcription factor TFIIB contains a zinc-binding motif near the N-terminus. This domain is involved in the interaction with RNA pol II and TFIIF and plays a crucial role in selecting the transcription initiation site. The domain adopts a zinc ribbon like structure [1]. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.38 0.72 -4.64 71 1235 2012-10-03 10:42:43 2005-05-11 16:17:32 7 14 529 7 745 1178 225 42.80 34 11.75 CHANGED htCPpCsu.sp.l.....lhD...ppG-h.lCssC.GhVl--phl-pssEacsh ............h.hCPp..Css..s...l...........lhD.....p..pG-h.lCs.....pC.GhVl...p..-phlDttsEWRs................ 0 230 423 611 +8104 PF08272 Topo_Zn_Ribbon Topoisomerase I zinc-ribbon-like Finn RD anon Pfam-B_5615 (release 17.0) Domain Some Proteobacteria topoisomerase I contain two zinc-ribbon-like domains at the C-terminus that structurally homologous to Pfam:PF01396. However, this domain no longer bind zinc. Indeed, only one of the four cysteine residues remains [1]. 25.00 25.00 28.90 28.90 19.10 16.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.72 0.72 -4.60 19 1903 2012-10-03 10:42:43 2005-05-11 16:18:39 6 10 949 2 262 1090 300 41.40 39 9.71 CHANGED u.ssllpas++scppYlpoct.sGKhouWpsaapstcWpssph .....t.splsp.p+pst..hlps.ts...sGh..hhuhssF.pstchpsshh....... 0 40 98 188 +8105 PF08273 Prim_Zn_Ribbon Zinc-binding domain of primase-helicase Finn RD anon Pfam-B_18441 (release 17.0) Domain \N 21.80 21.80 21.80 21.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.35 0.72 -3.88 18 651 2012-10-03 10:42:43 2005-05-11 16:18:46 7 22 488 2 103 533 210 37.60 54 6.24 CHANGED t+HsPCPsCGupDta+.asDt.st.........GshaC.sC...u......sGDGh ......+HsPCPs.C...G.G....c..DRFR.FD.Dp...cGp...........GoWhC.N..pC......G......uGDGh............ 0 21 44 78 +8106 PF08274 PhnA_Zn_Ribbon PhnA Zinc-Ribbon Finn RD anon Manual Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.62 0.72 -4.16 34 2095 2012-10-03 10:42:43 2005-05-11 16:19:05 7 3 2023 1 331 1007 47 31.30 58 26.06 CHANGED sL.....P...pCPpCsSEaoY............pDsslhlCP-CuaEWs ...........LP...sCPcCsSEYTY............EDsuhhlCPECAaEWs... 0 97 190 266 +8107 PF08275 Toprim_N DNA primase catalytic core, N-terminal domain Finn RD anon Pfam-B_313 (release 17.0) Domain \N 21.40 21.40 21.50 21.50 21.30 21.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.86 0.71 -4.19 93 4712 2009-01-15 18:05:59 2005-05-11 16:22:45 6 53 4423 10 1053 3634 2781 126.40 37 20.82 CHANGED aapppLpssps.....AhsYLp.pRG.lstchlccFplGaAP...ps.........hptLh..phltp.cshs...p.lhpsGLltpppt..stha..Dp......F+sRlhFPIpctpG.cl.luFGGRsl..............sspp..s.KYl...NSPET.lFcKuchLY.uhtpA+ .......................................................................................................aapp.t.L.p.sstu...t....AhpYLp..pRG..lo..s.....-hl.pcF...tlGaAP....ss.............................hs.slh...chh......tp...pshsp....ptLh.cuGL.lhps-p..........sphY...DR..................F..R.s.R.lMFP...Ics...t..p.G....+V.lGFG..GRsL...................sssp...s.KYL...NSP...E.Ts.lFcKuc.LYultpA.......................................................... 0 367 705 894 +8108 PF08276 PAN_2 PAN-like domain Finn RD, Mistry J anon Pfam-B_291 (release 17.0) Family \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.89 0.72 -4.13 106 1792 2012-10-02 11:41:37 2005-05-11 16:56:24 6 91 83 0 773 1862 3 65.90 34 10.50 CHANGED C..........tssDtFhplp.shKL..PDs.sth.hp.ps.hshc-CcppCLpsCSCTA..YAh..ssh......su......sGCllWp...u-LhDh ...........................................C.....psD.sFhp.hp..shK.L....P-.sp.t..s.h...hs......ps..hshc-Ccp.pCLp.sC.s.CoA..aA...sshp.....su..........sGC....llWh...s-.LhDh................ 0 36 432 619 +8109 PF08277 PAN_3 PAN-like domain Finn RD anon Pfam-B_1455 (release 17.0) Domain \N 21.10 21.10 21.10 21.80 21.00 21.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.68 0.72 -4.36 43 345 2012-10-02 11:41:37 2005-05-11 16:56:38 7 19 14 0 344 322 1 69.90 22 24.54 CHANGED MlllaGpPssh....ss.phpshsacsClstChpsssCllsats...ss..sCthaph...ss.lsslpp...hpssssphl.AhKh ..............................MlhhaGpstsh........ss.tttshsappClppCapss.s...Cllsahs....ss........pChhaph.......ss...l....p...lpp.....hp...pss..t..t..hl.AhKh..................... 1 74 107 344 +8110 PF08278 DnaG_DnaB_bind DNA primase DnaG DnaB-binding Finn RD anon Pfam-B_3213 (release 17.0) Domain Eubacterial DnaG primases interact with several factors to from the replisome. One of these factors in DnaB, a helicase. This domain has been demonstrated to be responsible for the interaction between DnaG and DnaB[1]. 26.80 26.80 26.90 26.90 26.40 26.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.56 0.71 -3.92 90 1338 2009-09-10 21:47:29 2005-05-12 11:10:45 6 11 1330 3 256 893 235 126.70 34 21.41 CHANGED s.hRtsluLLlQpPpLAttls..shtslpphp.s..uhslLhpLlphspp.psshsou.p.....LLEpaR...sssttphLppLAs...hp.phl..s--shpppFt-slspLhpp.h....lcpclppL..huKspt.ts..LospE..+pcLtpLl .....shRhlIuLLlQNPpLAshVs......shtslcp.p..hP.......GlsLhp-.Llpssh...u..pPu.l.oTG.Q.........LLEpaR.....so.spsssLcpLut.................W-clh..-cs...h....hEppFsDoLs+lhcp.h.....lcp.chEpLhA+pRs...pG...LosEE..+h.ELhsL........................ 0 50 142 210 +8111 PF08279 HTH_11 HTH domain Bateman A anon Pfam-B_125 (Release 17.0) Domain This family includes helix-turn-helix domains in a wide variety of proteins. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.58 0.72 -4.32 103 12449 2012-10-04 14:01:12 2005-05-13 11:46:06 7 125 3577 14 2246 23862 1739 54.70 24 15.12 CHANGED Rttpllph..Lhpscp..louppLAccLsV.Scpolh+DIptL.pttG.....h.Ituptsh..GYt ...............................pllph....L...p..s..p...p......l.......o.u.p.p......L.....Ac..p..l.....s.....V...Sc.....pTl....h+-l....p.t....L...pphs................h...l...u....t..th.....GY......................................... 1 790 1488 1900 +8112 PF08280 HTH_Mga M protein trans-acting positive regulator (MGA) HTH domain Moxon SJ, Bateman A anon Pfam-B_5126 (release 7.7) Domain Mga is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.56 0.72 -4.21 16 958 2012-10-04 14:01:12 2005-05-13 11:47:10 6 23 527 3 56 1743 53 58.10 27 12.18 CHANGED EccItcclcLlslLhc.ppphshs..-lscpLshothplpphlppLpth.Fscplthphpcst ................pKc.pRplp...LlchLhc..pphh.p....lp..ELuchLssoc+slps-Lsclppt..Fsp..h.hhp.pps.h............... 1 15 32 46 +8113 PF08281 Sigma70_r4_2 Sigma-70, region 4 Bateman A anon Pfam-B_125 (Release 17.0) Domain Region 4 of sigma-70 like sigma-factors are involved in binding to the -35 promoter element via a helix-turn-helix motif [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.16 0.72 -4.50 141 24630 2012-10-04 14:01:12 2005-05-13 11:47:27 7 126 3614 8 7611 38704 4348 53.10 25 25.60 CHANGED tttltphlppLstpp+plhhLpthpshshpEIAphl.....sls.ssVcpplpRApppl ......................h...l.pslppL...s....t.c...p...R...p...s...l...h.L....p....h......h......p....u.......h.......o....h.....p.......EIAphl........s.l.s.h.uoV+splpRAppph........................... 0 3028 5560 6758 +8114 PF08282 Hydrolase_3 haloacid dehalogenase-like hydrolase Bateman A anon Pfam-B_66 (Release 17.0) Domain This family contains haloacid dehalogenase-like hydrolase enzymes. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.36 0.70 -4.81 69 20961 2012-10-03 04:19:28 2005-05-13 16:17:25 7 63 4356 221 3153 23365 3191 223.30 21 85.03 CHANGED lshDlDGTL.Ls..............pspp.ls.ppstpslpchpp........pGhhlslATGR..shhshhphhp.pLs..lp...hhhls...hNGuhl.....tpsphl.hpphls...pctlppllchhpphp.......hphhhhs...scshah.pt..........................thhhhtpthtthhhthtp.t.htspshhKlh.....hhtstpphpph..tpplppph.............shhpottthl-..lhspsssKupulptls..pthslshpcshAFGDutNDlpMlphu....GhulAMuNAspplKphAch..lsssssc.sGluchl .........................................................................................................................................................................................................................lshDhDGT.L..Ls.........................................................s.p..p....p....l...s.........p.shps.l.p.p.h.p.p.........p.Gh...h.slso...GR...........h....h....t....h.....h..........h...h....p...p....lt....hp...........................hls..........N.Gu.hl................t.pt....p......h...l....hp...p......ls...................p...h.....p.lh..p..hhpp.t.............h.h.h.h..hs.........tp...th.hh.pt...........................................................................................................h...........h.................................t......................t....t....t....h.hchh..................h...p......t...t...h..t.th..........t..t...l...t...t....h.....................................phh..h...s...t...........h..l..-......l..h....t..s....s.....s...Ku....pu.......l....p......p.lh...........cp..h.............s.............l.......s.................p.....p.......s...h..A...h..G....D......sh....NDlpMl....p..h.s........G.h.u.l..A.M...s...N...A...t...s...p...l...K.t....h...A..s..h.....l..s..t..sspp..pGlhthl................................................................................................................................................ 0 948 1870 2569 +8115 PF08283 Gemini_AL1_M Geminivirus rep protein central domain Bateman A anon Pfam-B_286 (release 2.1) Domain This is the cetral domain of the geminivirus rep proteins [1]. 20.80 20.80 21.10 21.30 20.50 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.55 0.72 -4.21 13 2813 2009-01-15 18:05:59 2005-05-13 16:58:13 6 5 1027 0 3 2628 1 102.40 48 32.13 CHANGED uQpsusDsht+Al..NusS+EEALsll+-chPc-aslphHNlpsNssRlF.csPEsassPFP.SS.FsplPEplp-WssspltpsSAt.hshR..SlllEG-SRoGKThW ..............................tpQoANDA.YAcAl..NuGSKpEALpll+EchPK...Dall.QaHNLsuNL-R.IFstPs.p.s..YlsPFs.SS.FspVP-Elp-Wss.-Nlh.....s.....u.....A....A..........R....P..........hRPhSIllEGD..SRTGKThW................... 0 2 3 3 +8116 PF08284 RVP_2 Retroviral aspartyl protease Bateman A anon Pfam-B_ Domain Single domain aspartyl proteases from retroviruses, retrotransposons, and badnaviruses (plant dsDNA viruses). These proteases are generally part of a larger polyprotein; usually pol, more rarely gag. Retroviral proteases appear to be homologous to a single domain of the two-domain eukaryotic aspartyl proteases 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.57 0.71 -4.23 7 1125 2012-10-02 15:32:34 2005-05-13 17:10:21 6 93 68 0 570 1652 9 115.90 36 9.78 CHANGED shspuRVNHlsAEpsQsuP-VlhGTF.VNSlPAoVLFDSGAoHSFIStsFVttHulth.pL+pPhhVpoPGsshpusphsPuVslcIpGlsF.us.IlLcSpsLDVILGMDWLspacGVIDCApRolsLTsspGc ........................................t............t.p.....t.t....t...l...lh..s..hh.......lps...hsshl..LhDSGAo.....HSFlSht.F.s.tp..p.t......h...t.h.p....p..L....p..p.....P..h.h.l....p.....o.....P.u...s...p...h.....p...s.....t..t...h..s...s...s...l..........s..l...c..I..p...s....h..s...F......u..s.L.I....l......L.-..s...c.......s.l...D...V..ILGMsWLspa.p.u.l.I.DC..s..p+plsl.t................................ 0 109 151 165 +8117 PF08285 DPM3 Dolichol-phosphate mannosyltransferase subunit 3 (DPM3) Mistry J, Wood V anon manual Family This family corresponds to subunit 3 of dolichol-phosphate mannosyltransferase, an enzyme which generates mannosyl donors for glycosylphosphatidylinositols, N-glycan and protein O- and C-mannosylation. DPM3 is an integral membrane protein and plays a role in stabilising the dolichol-phosphate mannosyl transferase complex [1]. 23.80 23.80 25.30 24.90 23.70 23.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.00 0.72 -4.14 23 232 2009-01-15 18:05:59 2005-05-17 13:34:31 6 3 205 0 169 218 0 88.40 35 92.74 CHANGED Mo+hpchlhhhshlsulahulhhshls....sp..hppll.hLPhahLVsFGsYuLsslGaslhTFsDs.-stcELhppIcEAKc.L+pKGlcl ..............................................MT+htphl.hhhllsu.......lahu..Lhhshls...................ls.....ppll..hLP..h.ahLVshGsYuLhplGatlhTFsD...ss-AtpELppcIpEA+t-Lpp+Glc.......... 0 45 83 133 +8118 PF08286 Spc24 Spc24 subunit of Ndc80 Mistry J, Wood V anon manual Family Spc24 is a component of the evolutionarily conserved kinetochore-associated Ndc80 complex and is involved in chromosome segregation [1] 22.60 22.60 22.70 23.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.56 0.71 -4.40 24 178 2009-01-15 18:05:59 2005-05-17 14:02:25 6 1 166 4 123 167 0 116.60 28 57.89 CHANGED lhcL-spphcluKphscLEsplppLpsplpcLppphp-lpppt.p................sscttt.......ss.cuslL+L+LYRsLG.lpl-t...t..........................-tspsl.I+spcpss.....lpslsl-s.phScaFhosYlWspL ................................................................pL-spphcluKphscLE...tp......hpp...Lps-LpcLcppht-L-ppth-...........................sppps.................stsuslL+L+lYRuLG.Icl-h..............................phspsl..I+sp...ccGs..............lpslslDs..phScaFhusYhWpth......... 1 32 62 95 +8119 PF08287 DASH_Spc19 Spc19; Spc19 Mistry J, Wood V anon manual Family Spc19 is a component of the DASH complex.\ The DASH complex associates with the spindle pole body and is important for spindle and kinetochore integrity during cell division [1][2]. 25.00 25.00 27.90 27.10 24.70 24.00 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.66 0.71 -4.60 20 131 2009-01-15 18:05:59 2005-05-17 16:15:36 6 2 128 0 99 123 0 146.60 35 82.74 CHANGED oLsssVsSLpuSlplLcsS...lstLcsuspDhPRLs.pVLpTsRhFELlPEs-LppAppslh-EIpPplppLls+l-cplp+LpR+cpsLpsKhELppsRLpsspspssssptt......................s...h.sssspcltcL+tLppKKERLpYslpRL .........LpssVsSLcuSlplLcsS...lphL-suspDhPRLs.plLpTsRaFELlPEsslppAptuLh-EIsPtls..pLls+scpplp+hpR+.psLpu+sE.LppuRLppspstssssstt.....................................tpt.shht.shsspc.htcL+tLppKK-pLpYsl-RL.......................................... 0 24 52 83 +8120 PF08288 PIGA PIGA (GPI anchor biosynthesis) Mistry J, Wood V anon Pfam-B_6971 (release 17.0) Family This domain is found on phosphatidylinositol n-acetylglucosaminyltransferase proteins. These proteins are involved in GPI anchor biosynthesis and are associated with disease the paroxysmal nocturnal haemoglobinuria [1]. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.93 0.72 -3.62 23 340 2012-10-03 16:42:30 2005-05-17 16:20:46 7 6 290 0 246 348 6 87.90 55 19.14 CHANGED HuYs.sRpGVRYLTNGLKVYYlPahVhYcpsohPThFusFPlhRsIllREpI-IVHGHuuhSoLuHEuILHA+TMGl+TVFTDHSLFGFAD .................................HAYs.sRpG.VRYLTNGLKVYYlPhhVh...a...p...psThPTlFs.shPllRs.........Ihl..REpIp....IVHGHu.ohS.shsHE....Alh..H..A+TM..G..L+TVFTDHSL.FGFAD................... 0 86 132 206 +8121 PF08289 Flu_M1_C Influenza Matrix protein (M1) C-terminal domain Bateman A anon Pfam-B_30 (Release 17.0) Domain This region is thought to be a second domain of the M1 matrix protein. 20.80 20.80 20.80 23.80 20.60 20.20 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.21 0.72 -3.96 2 22741 2009-01-15 18:05:59 2005-05-18 12:08:22 6 2 22466 0 0 3048 0 94.30 91 38.38 CHANGED pHRuHpphspoosPhlR+E.pMV.A.sTAKsMptMu....put-s.clApphpp.ltshRslGsp.psutGltpDlhEsLp..QppMG..s.hp+ah ......QHRSHRQMVTTTNPLIRHENRMVLASTTAKAMEQMAGSSEQAAEAMEVASQARQMVQAMRTIGTHPSSSAGLKDDLLENLQAYQKRMG..VQMQRFK. 2 0 0 0 +8122 PF08290 Hep_core_N Hepatitis core protein, putative zinc finger Bateman A anon Bateman A Domain This short region is found at the N-terminus of some hepatitis core proteins.\ Its conservation of four cys and his suggests a zinc binding domain. 25.00 25.00 26.10 27.40 24.70 24.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.29 0.72 -4.51 2 5101 2009-09-11 06:29:00 2005-05-18 14:57:32 6 7 66 0 0 2681 0 26.90 96 24.69 CHANGED hphFpLCLIISCoCPThQASKLCLGWL ....MQLF.HLCLI.ISC.S.CPTVQASKLCLGWL..... 0 0 0 0 +8123 PF08291 Peptidase_M15_3 Peptidase M15 Finn RD anon Manual Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.40 0.72 -4.25 19 623 2012-10-02 01:02:30 2005-05-19 09:30:08 6 14 470 1 168 1206 1181 110.70 24 47.23 CHANGED taFphpEhspsssssppslD...............plhshLpslRcpaGp..........PIhloSGaRssthNppVGGApsStHhpG.....pAADlpshst.sspcltphhc...sphstch....Ghttts....salHls ..........................................................h.....tt...............t.h.............................thhphL...ctl.Rp.t..h.st..........Pl.h..l.o..........SG.......YR........s...phN.......c........p.....l........G....G......u.....s.....s.....S.pHh..hG.........pAuDltls..........sh.....s.......p.l..t....p.hht......ph.h.....t...........hh...t..........talHls............................................................. 3 78 128 153 +8124 PF08292 RNA_pol_Rbc25 RNA polymerase III subunit Rpc25 Mistry J, Wood V anon Pfam-B_9841 (release 17.0) Domain Rpc25 is a strongly conserved subunit of RNA polymerase III and has homology to Rpa43 in RNA polymerase I, Rpb7 in RNA polymerase II and the archaeal RpoE subunit. Rpc25 is required for transcription initiation and is not essential for the elongating properties of RNA polymerase III [1]. 24.20 24.20 24.40 24.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.37 0.71 -3.92 33 328 2012-10-03 20:18:03 2005-05-19 09:32:39 7 9 290 3 240 371 3 116.40 35 51.74 CHANGED PFhGEllpG.pIpssopcGI+Vol.uFF-D.........IalPhs.L......-sspa..cpp-psWlWpht................-ppchahDhsEplRFRVppphF..-.pPhs.t..........................ttttpttppphssatll.GShppsGLGhloWW ..............PFhuEllhG.+Ipsso.t.cGl+lol.sFF-D.........IhIPsphL....................psu...p....F............-ps..-..p...s...WlWcht.........................pspcLahDh.sEplRFRVppE.a.hD.pPtssp...............................................ttt....tttpp.sPYplh.GShppsGLGhluWW.......................................... 0 82 132 196 +8125 PF08293 MRP-S33 Mit_rib_S27; Mitochondrial ribosomal subunit S27 Mistry J, Wood V anon Pfam-B_31036 (release 17.0) Family This family of proteins corresponds to mitochondrial ribosomal subunit S27 in prokaryotes [1] and to subunit S33 in humans [2]. It is a small 106 residue protein.The evolutionary history of the mitoribosomal proteome that is encoded by a diverse subset of eukaryotic genomes, reveals an ancestral ribosome of alpha-proteobacterial descent that more than doubled its protein content in most eukaryotic lineages. Several new MRPs have originated via duplication of existing MRPs as well as by recruitment from outside of the mitoribosomal proteome [3]. 25.00 25.00 25.10 28.20 23.20 24.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.07 0.72 -4.01 30 282 2009-01-15 18:05:59 2005-05-19 09:52:05 6 4 245 0 194 253 2 88.60 34 73.37 CHANGED shhhclscluu+IFupshNPsstRoGs..KlLppcL..+Gspls...sYY..Psth.papphcphhs..thhhhD.cEsh....Rhphlch.................+KtRGKGsP......KK ...............hhhchs+Lps+IFupshp..Ps.st+ohp...Klhppc....+sspls...saY....Ppchhhh.t.h.pphh...............hhhhhDEcpch....+.chhch.................+KhRGKGtP.....KK................................ 0 58 99 154 +8126 PF08294 TIM21 TIM21 Mistry J, Wood V anon manual Domain TIM21 interacts with the outer mitochondrial TOM complex and promotes the insertion of proteins into the inner mitochondrial membrane [1]. 20.40 20.40 20.90 21.10 20.20 20.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.70 0.71 -4.54 13 289 2012-10-01 19:51:31 2005-05-19 10:32:58 6 5 252 1 206 280 0 136.30 30 55.74 CHANGED Kl+cssphoh.hhlVluGlGloGlllYlIhsELFSsSucspIFN+AlpplcsctcspslL........GcplKuaGEtsppsR..RsRshVSppch-+cGhcHhhM+FHVEG..s++pGhVplEhpcsstp..hp.-Fhaha..lDlssc++Ihlhcs+ ...............+ltcssppss.hhlllhGlulo.uslhYhl...apELF.u.s.s.Sssplas+Alc+lcpcscl.hshl...............Gp..l+...uYG..E..t..op...s+.....R.hs.op.h.p.h..c..+..c.Gh.......cHhphpFalcG..stppGhVph-hhcs.tp......tpa.-apalh..l-s.......s....p..p.p..hhl.p..................... 0 62 107 166 +8127 PF08295 Sin3_corepress HDAC_interact; Sin3 family co-repressor Mistry J, Wood V anon Pfam-B_2731 (release 17.0) Domain This domain is found on transcriptional regulators. It forms interactions with histone deacetylases [1]. 25.00 25.00 26.90 25.50 22.80 23.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.39 0.72 -4.26 26 505 2009-01-15 18:05:59 2005-05-19 10:38:32 7 10 275 0 333 477 4 97.90 45 7.99 CHANGED sCcphGPS....YRhLPKs...s.CSGRschs...tpVLNDpWVShPohu.ED.tuFhshRK.NQYEEsLa+sEDERaEhDhllEuspsoIchLEplhpclpshspc-cts .........................pC+phGsS....YRhLPK.s..htt...CSGRstLC..................ppVLNDpW.VSa.P.oWu..E....D.S.s.Flu.p........+.K..sQYEEpLaRsEDER..aEhDhllEsshs...TIphLEsltp+ls.pho.c-p..p................................... 1 122 194 269 +8129 PF08297 U3_snoRNA_assoc U3_snoRNA; U3 snoRNA associated Mistry J, Wood V anon manual Family This family of proteins is associated with U3 snoRNA [1]. U3 snoRNA is required for nucleolar processing of pre-18S ribosomal RNA. 25.00 25.00 27.30 27.30 24.30 24.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.80 0.72 -3.75 21 98 2009-09-11 16:06:36 2005-05-19 11:16:19 6 3 96 0 72 97 0 90.70 27 29.00 CHANGED chLP--lLsshspssssss.tps....................h.tpphKtpKh+hLcphcK..psl+hGsVslpVLsspsst.....pLsPKucppshssK-pWLpRp ............thLP-ElLps.sppc...sss.tpp............................tpp...hcspKh....+hLcphcK.....csl+hGsssl+VLssssst.........pLsPKspppshpsK-pWLpR..... 0 10 33 61 +8130 PF08298 AAA_PrkA PrkA AAA domain Vella Briffa B, Bateman A anon Pfam-B_3917 (release 10.0) Domain This is a family of PrkA bacterial and archaeal serine kinases approximately 630 residues long. This is the N-terminal AAA domain [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.18 0.70 -5.76 7 1391 2012-10-05 12:31:09 2005-05-20 09:52:18 6 8 1305 0 343 3100 1561 347.80 58 55.27 CHANGED hoht-YL-hs+ps.pshtsAtcRlhshIt-s....hlcsttp.+lt+latNcsI++YshFs.-FaGhE-sl-+IVs.YF+tAAptLEE+KQILYLlGPVGGGKSSLsEpLKplhEhh......PlYsL.....ctsPhaEpPLpLh.PpchtphhEccaGI...clpG.hSPhsshRL.cEFGG-IpcFpVsKlh.ShhpphuIupspPuD.NNQDlSsLVGpVDIpKl-pY.upsDPcAYSasGuLN+uNpGlhEFlEMFKssIKhLHsLLTATQEGsasustphuhlsFsGlIlAHSNEuEWtpF+sN+sNEAhlDRIhllcVPYCL+loEEhKIYEKlLppSplsps.hAPcTL-hhuhFolLoRLpps-pSs ................................................................................................oLpEaLs..lC+pD.oAYAsAuERlLhAIGEP...phlDTup-sRL.S.....R.l.F..............u.N..+.l.I.t.R.Y......P.u.F.......c........-.......F.......Y..G..ME-AIEQIVu.Yh...+H...A.A.Q.G.L..E..E.+K.Q...I..LYLLGPVGGGKSSLAE.+LKpLM.pph..........PIYsL.............................ctSPVp-cP.L....sL......F.sPpE......Du........p........l........L.........-.........c........E.......YGIP....................pRYL.tsIMSP.....W...A....s....K....R...L.....c...E..............F.....G..GD.........I.....o.....+.....F.......+...V.......V......K.......l.........aP........SILpQ...IuIAKTEPGD..E...NNQDISuLVGKV...D...IRKLEc.a..uQ.sDPDA.YuY.S.GuLC+ANQGlMEFV.EMFK...APIKVLHPLLTATQEGNYsGTE.GluA.lPFsG.lILAH..SN..ESEWhoFRNNKNN...E..A..FLDRl.YIV..KVP....Y..C..LRlS.EElKIY-...KLL.pcS.-Ls....c..AsCAPu.TL-sL.............upFSlLSRLKEPENSs................................................................... 1 95 198 277 +8131 PF08299 Bac_DnaA_C Bacterial dnaA protein helix-turn-helix Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.90 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.02 0.72 -4.12 14 4828 2012-10-04 14:01:12 2005-05-20 13:18:53 6 14 4481 14 1038 3266 2314 68.70 46 15.45 CHANGED olcpIpcsVA-haslslp-lhScsRs+slspsRQIAMYLs+pLTspSLPcIGctFGGRDHTTVlaAsRKI .............................l-sIQct.VAcaa.plpl.pDl.hu.p..+R.s.+.s.l.sp.PRQl.A.M.YL.u+EL..T.s.tSLPcIGctFGGRDHTTVlHApcKI.............. 0 367 704 891 +8132 PF08300 HCV_NS5a_1a Hepatitis C virus non-structural 5a zinc finger domain Paterson M, Bateman A anon Bateman A Domain The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. This domain corresponds to the N-terminal zinc binding domain [3]. 20.70 20.70 20.90 21.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.30 0.72 -4.06 19 5702 2009-01-15 18:05:59 2005-05-20 13:58:15 8 39 117 6 0 5232 0 61.60 83 5.83 CHANGED lPFlSCQ+Ga+GsWcGDGlhpTpCsCGA.IoGcV+NGoM+..lsGP+hCSNhW+GTFPINshTo ...lPFlSCQRGYKGVWRGDGIMpTpCsCGApITGHVKNGSMR..IVGP+TCpNhWpGTFPINAYTT...... 0 0 0 0 +8133 PF08301 HCV_NS5a_1b Hepatitis C virus non-structural 5a domain 1b Paterson M, Bateman A anon Bateman A Domain The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. This region corresponds to the 1b domain [3]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.02 0.72 -3.66 20 10555 2009-09-11 00:37:05 2005-05-20 14:01:38 8 41 117 6 0 7446 0 68.20 85 10.02 CHANGED GPusPhPuPNYppALWRVuAc-YVEVpRlGDaHYVsGsTsDsLKsPCQVPuPEFF..TEVDGVRlHRaAPsC+PLLRDEloFoVGLsuaslGSQLPC-PEPDV ...................................................................................PLLR-E.VoFpVGLNpYhVGSQLPCEPEPDV........................... 0 0 0 0 +8134 PF08302 tRNA_lig_CPD Fungal tRNA ligase phosphodiesterase domain Mistry J, Wood V anon Pfam-B_49998 (release 17.0) Domain This domain is found in fungal tRNA ligases and has cyclic phosphodiesterase activity [1]. tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns. 21.30 21.30 21.50 21.30 21.10 20.70 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.89 0.70 -5.17 19 168 2009-01-15 18:05:59 2005-05-20 16:00:11 6 8 142 0 119 174 3 219.00 25 29.09 CHANGED SShcNlcpVlspL+ptYPpLl...clPosp-lDpAlphAhstY+Pshc+shshsspp.tpp....pp.........................ss.cppp+plcYaulsl.sspclpshLcshhs.s....ss-pt+hacpLhss+RlQspFHVTLIH+AupKp.pP...clWcpYsphahsphpppspsp.............h.shussclcL-+Ll.WDD+lMshlsclh...s.p...........ssa..tCsNplsHITVGThuspVKP+ESN-LLp+ah.......p.tGous-...sGlhphplsGshllpGsVthsh .................................................................................ShtNhchllptl.t.aP.lh....phPsspphctAhp.uhp.Yp.sphp+.ht.t..t......................................t.pt.h.htp.pYhul..pl....s.pplhphlpphhsss......st.tp.happLhtspRl....Q.ph...HVTLhHpsstpp......p......plWpphhphatt.thtpt..t...............................st...splpL.c.+ll..aDc+lhAlssc.lhs.......................................sph....ssN.p.hsHITlGTtpssVKPhESNcLLpchh........p...sst.t...ttl..h.h............................................. 0 38 71 101 +8135 PF08303 tRNA_lig_kinase tRNA ligase kinase domain Mistry J, Wood V anon Pfam-B_49998 (release 17.0) Domain This domain is found in fungal tRNA ligases and has kinase activity [1]. tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns. This family contains a P-loop motif. 22.70 22.70 22.70 22.80 22.60 22.20 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.11 0.71 -4.38 9 141 2012-10-05 12:31:09 2005-05-20 16:00:33 6 7 132 0 106 153 2 160.00 43 19.71 CHANGED lllPIATIGCGKTTVuhsLpsLFsp.WuHlQNDNI.ouK.sps+hh+psLchL....t+csppsVlsDRNNHph+ER+QLF-.lpph+-pals.s.sl+hlulsFlc.c.phpElh-lThsRVhpRGDNHQSIKspo.stp+VhtIMpGFlKRaQPls.s.+pPDspFDhlIcLclu .................lLVPIAoIGCGKTTlulALscLF...s.....WGH....lQNDNI.suK.tcs.cFscpsl.phL......tspssVlAD.................RNNHpp+..ERcQ...lhs...slpp.........hp.........................ss...+hlALpa.scps.......lscl+clTtpRVlpRGDNHQTI+..uso...sp..pcl......huIMcGFlpRFpslssp.pp.......P........DstFD.lIcL-..s......................................... 0 32 63 93 +8137 PF08305 NPCBM NPCBM/NEW2 domain Rigden D anon Rigden D Domain This novel putative carbohydrate binding module (NPCBM) domain is found at the N-terminus of glycosyl hydrolase family 98 proteins. This domain has also been called the NEW2 domain (Naumoff DG. Phylogenetic analysis of alpha-galactosidases of the GH27 family. Molecular Biology (Engl Transl). (2004)38:388-399.) 21.20 21.20 21.20 21.20 20.60 21.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.65 0.71 -4.23 43 539 2009-01-15 18:05:59 2005-06-23 15:31:28 6 113 262 16 146 530 40 138.50 24 18.44 CHANGED ss.sssshL...SDlsh..hou....ssGWGslp+Dpus......supsLslsG.........psasKGlGsHAs..SplsYsL.Gst.hspFpAhVGlDcp...hssp..GoVhFpVh...sDG..pplasSss.....hpsssssptl.slDl.....sGspplcLllssuG.su.ssDHusWusA+ltp .............................................................ss..t..hlo-h.h....ps......tsth.t..s.hp.t..sts..........psp.lplts........hpa..s+.....GlGs.....p.....As.............S......plh.....Yslp..u...t.t...hspFp...uhl.G.lDcp................hssp.......sslpFpVh...........sDG......chla.sSss........hphss...ss.thl..slsl.....pGspplcLhss.s..uG..ss.s.t.t.Dc..ssaussph.................................................... 0 74 114 142 +8138 PF08306 Glyco_hydro_98M Glycosyl hydrolase family 98 Rigden D anon Rigden D Domain This domain is the putative catalytic domain of glycosyl hydrolase family 98 proteins. 21.00 21.00 21.10 21.10 19.10 20.90 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.11 0.70 -5.73 6 208 2010-01-08 16:43:45 2005-06-23 15:32:49 6 15 206 14 11 93 0 299.60 59 31.09 CHANGED uhhssssspsss...shRRsISsEpPhhhl.l.u............Wssss.pthh-hIPsDl+PYTVlpLp.u........ls+c-usup......chhEphlEpApSal..KTstspsl.shlps.SuG..chPsYsssspLpos....hh-EhFpcYPNhhGh..sEpaWsassshu.......sHhAphLKLosKYGGYh.......hWusp.NshAhtK.......psssFppAlcpYt+NFIhtpK.TsptshpD..sESlshGhWLSGaAspaGhphDohtWYEpt...........pupGs+pasststAhhhIE..plhLsGtTVash.chhaT.sVpspss...............PtFsNlhh-hFR+lIsssh. ................h.......s...s...thRp.lss-pPhhh..h.s...............................Ws.ss.KGAWEAIPEDVKPYAAIELHPAKVCKPTSCIPRDTKELR......EWYVK.MLEEAQSL.........NIPVFLVIMSAG.....E......RNTVPPE.................WLDEQFQKYSVLKGVLNIENYWIYNNQLA..........PHSAKYLEVCAKYGAHF.......IWHDH.........EKWFWETIMNDPTFFEASQKYHKNLVLATKNTPIRD..DAGTDSIVSGFWLSGLCDNWGSSTDTWKWWEKHYTNTFET...GRARDMRSYASEPESMIAMEMMNVYTGGGTVYNFECAAYTFMTNDVPT................PAFTKGIIPFFRHAIQNPAP...................................... 1 6 6 8 +8139 PF08307 Glyco_hydro_98C Glycosyl hydrolase family 98 C-terminal domain Rigden D anon Rigden D Domain This putative domain is found at the C-terminus of glycosyl hydrolase family 98 proteins. This domain is not expected to form part of the catalytic activity. 25.00 25.00 25.00 25.90 23.20 23.90 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.78 0.70 -5.12 3 232 2009-01-15 18:05:59 2005-06-23 15:34:03 6 14 206 9 16 109 0 224.00 53 25.82 CHANGED KpEVlsRTKuVlasshTp.NGpGpYSS.pshFsGLY......TphsQpPhY.sTGRYssIPsVappID+pKIuSpFPsu.hKllocNSoELSSIssKspYLNuLYPcEYsGDlYAQRlDNoWaIYN.sYNpNKNQpGuFslhhNNsKSLslTLsPHTYuVVcENssuLsIhLNNYRT-KsuLWthutNhDtuKph.chpcl-hhNWl.csY.hNssss-hRToTITLpGtousPThoNlsGD+GcYshsTVsasssT+uhTITVsHNGslDhoIssc ........KEEVlsRTKsVha......NGpG+...hSS.pshapGLY.............oscEshPLY..ssGRYpllPVIa-hlDp-KlushFPsu..KI.......locsSpEhSS...KVsYLNuLYPc..YEGDhYAQRlsNoWalYN.sss.N...hN.........KsQps.hLPhhhN.sscS.LoL-hoPHTYuVVKEps.N..sL+IhLNNYRT-Ksuh.........Wu.utshstuKph.phpchthhpW.IpcpY.hs.......s..ss-hRTTTlTL+Gtoup.s.hslSGD+......NcYsh...T......NaDpssHVhTITVNHNG.lEhoIssp...... 0 8 8 11 +8140 PF08308 PEGA PEGA domain Mistry J, Adindla S anon manual Domain This domain is found in both archaea and bacteria and has similarity to S-layer (surface layer) proteins. It is named after the characteristic PEGA sequence motif found in this domain. The secondary structure of this domain is predicted to be beta-strands [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.30 0.72 -4.19 32 1445 2012-10-02 19:08:27 2005-06-23 15:51:23 6 105 449 0 727 1662 573 68.30 22 20.63 CHANGED huslslsSsPpGApVhlDG.thhG...pTPh.sls.lssGp+plplphpGYtsappplplp.sscshplphp..Lp.tp .............................tlplpS.sP.t..u..ApV.h..l....s...G.....p..h..h.G...........pT.....P.......h....p.....l....t...............l..s.....s...G.......p....a..p.......l..p.l.p..t...p..G..Y.p.s..h.p..p.pl..plp..ss.p.p.h.tlpht.................................................. 0 322 491 635 +8141 PF08309 LVIVD LVIVD repeat Mistry J, Adindla S anon manual Repeat This repeat is found in bacterial and archaeal cell surface proteins, many of which are hypothetical. The secondary structure corresponding to this repeat is predicted to comprise 4 beta-strands which may associate to form a beta-propeller [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. The repeat copy number varies from 2-14. This repeat is sometimes found with the PKD domain Pfam:PF00801. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -8.00 0.72 -4.97 22 449 2012-10-05 17:30:43 2005-06-23 15:54:39 6 46 137 0 309 513 544 40.50 33 8.98 CHANGED uG.spslsVSGNYAYVA..DtssGLlIVDI...SNPSSPsLpGsasT ..................s.s.ltls..G..s..YAYVA..........s..s.s.sG......Lh.IlDl......SsPssPthhuph............ 0 143 204 270 +8142 PF08310 LGFP LGFP repeat Mistry J, Adindla S anon manual Repeat This 54 amino acid repeat is found in many hypothetical proteins. Several hypothetical proteins from C.glutamicum and C.efficiens along with PS1 protein contain this repeat region. The N-terminus region of PS1 contains an esterase domain which transfers corynomycolic acid. The C-terminus region consists of 4 tandem LGFP repeats. It is hypothesised that the PS1 proteins in Corynebacterium, when associated with the cell wall, may be anchored via the LGFP tandem repeats that may be important for maintaining cell wall integrity [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. Deletion of Swiss:Q01377 protein results in a 10-fold increase in the cell volume of the organism and infers the corresponding proteins involvement in the cell shape formation [1]. The secondary structure of each repeat is predicted to comprise two beta-strands and one alpha-helix [Adindla et al. 2004]. 20.70 20.70 20.90 20.70 20.20 20.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.56 0.72 -4.33 31 1397 2009-01-15 18:05:59 2005-06-23 16:04:57 6 50 220 0 484 1277 0 52.30 30 25.49 CHANGED LGhPhu.sEtshsDG.G+appFps.GsIYWsssTGAasl.sGsIhctWtstuhEpG. .....................LGhPss..s.E.....h.....s...h...s....s...G............u...p.hpp...F.......p......s......G......s....l...a...W..o...s...s.........o....G.....A.a.s..l...tGslhstasphuhttu........... 1 164 343 447 +8143 PF08311 Mad3_BUB1_I Mad3/BUB1 homology region 1 Mistry J, Wood V anon Pfam-B_3330 (release 17.0) Domain Proteins containing this domain are checkpoint proteins involved in cell division. This region has been shown to be essential for the binding of the binding of BUB1 and MAD3 to CDC20p [1]. 20.70 20.70 20.70 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.59 0.71 -4.35 25 450 2009-01-15 18:05:59 2005-06-24 09:15:27 7 14 255 13 294 455 1 122.00 31 13.79 CHANGED lppp+ppaEpclpshp...tDDPLplWhcYIpWhccsaPp....ssppSsLhslLERslptFtcsc+Y+sDsRaL+lWLcYhc..hhs.....-sp-hFpaLhpptIGsplAhaY.paAphLEspspapcAsplaphGl .....................................h..t.+ptaEtclpthp...ucDPLsha...............c..............YlpWsppsaPp...............tsppstL.hs.lLE+shptF.hsp....p..+..Y+sDsRaL+lWlcahc.hhs........................cspchap....aLhppsIGpphAhaY.paAthhEtp.sphpcAcplaphGl............................... 0 87 150 231 +8144 PF08312 cwf21 cwf21 domain Mistry J, Wood V, Bateman A anon Pfam-B_14400 (release 17.0) Domain The cwf21 family is involved in mRNA splicing. It has been isolated as a subcomplex of the splicosome in Schizosaccharomyces pombe [1]. The function of the cwf21 domain is to bind directly to the spliceosomal protein Prp8. Mutations in the cwf21 domain prevent Prp8 from binding [2]. The structure of this domain has recently been solved which shows this domain to be composed of two alpha helices. 21.00 21.00 21.00 21.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.27 0.72 -4.11 76 461 2009-10-29 13:58:15 2005-07-13 09:22:44 7 10 268 1 314 448 0 48.80 38 7.17 CHANGED l-H-++...RcIElKlhEhc-cLE-..cu.....................................h.s---I-p+lsphRp+Lhpchp ............h-H-++...RclEl...Klh-hp-pLE-.....ps.................................................h..spppI..pc+VpphRpcLhpc..t................................................................................................. 0 97 155 240 +8145 PF08313 SCA7 SCA7, zinc-binding domain Mistry J, Wood V anon Pfam-B_21229 (release 17.0) Domain This domain is found in the protein Sgf73/Sca7 which is a component of the multihistone acetyltransferase complexes SAGA and SILK [1]. This domain is also found in Ataxin-7, a human protein which in its polyglutamine expanded pathological form, is responsible for the neurodegenerative disease spinocerebellar ataxia 7 (SCA7) [1]. Ataxin-7 is an integral component of the mammalian SAGA-like complexes, the TATA-binding protein-free TAF-containing complex (TFTC) and the SPT3/TAF9/GCN5 acetyltransferase complex (STAGA). This domain is a minimal domain in ataxin-7-like proteins that is required for interaction with TFTC/STAGA subunits and is conserved highly through evolution. The domain contains a conserved Cys(3)His motif that binds zinc, thus indicating this to be a new zinc-binding domain [2]. 19.80 19.80 20.00 20.20 19.50 19.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.56 0.72 -4.38 17 392 2009-11-18 17:56:09 2005-07-13 09:42:51 7 11 194 2 252 404 1 70.50 44 12.05 CHANGED s++pccs+p+s.......sps+p.lDl-KQCGV.LPpGt.hCuRSLTCKoHSMGuKRAV.GRopPaDlLLs-ap++sph..K. ...............................................ttp.....................hsct.hD.s+pCGVlsscsp.t......CTRSLT.CKoHShspRRAV.GRpp.aDhLLscaptcsptp..................... 0 62 111 174 +8146 PF08314 Sec39 Secretory pathway protein Sec39 Mistry J, Wood V, Schmitt HD anon manual Domain Mnaimneh et al [1] identified Sec39p as a protein involved in ER-Golgi transport in a large scale promoter shut down analysis of essential yeast genes. Kraynack et al. (2005) [2] showed that Sec39p (Dsl3p) is required for Golgi-ER retrograde transport and is part of a very stable protein complex that also includes Dsl1p (in mammals ZW10), Tip20p (Rint-1) and the ER localized Q-SNARE proteins Ufe1p (syntaxin-18), Sec20p and Use1p. This was confirmed in a genome-wide analysis of protein complexes by Gavin et al (2006) [3]. 18.50 18.50 19.20 18.60 17.70 18.30 hmmbuild -o /dev/null HMM SEED 715 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.94 0.70 -6.60 20 278 2009-01-15 18:05:59 2005-07-13 09:48:18 6 15 208 1 202 289 0 559.60 21 47.25 CHANGED LhLLAsplsspus..lpsLspLhspasuhls.-...lL+IlLohhP.Eoh-PpsYssllppl.psp.sts...............hhsp-shslusscshsssssp+ps..........cphcLlsL+h.tt.ps.t..ss-hlspFLhcRuhcl-p.os.hshhhpL...........Lhshhc...pssplpsWlhusllPLl+h.Nhchh..ts.sholsshpsh.sstsslslLLuhssscpt........lspsLcsllsPalhtppc.pp..hpphhtssshshhpsppp............lhcaLhspuphshtssspuh....s..p.......................ppcaspsuLuslYtss.......csohpsLshshtlhpths.lhthpt..hh.............................shlssss.lshsst.shshhptlLhohphLpphs..hshphhtphh..ps..pctQltchpshlpshhhpppstts...Wpplhpplhhlpsht................hlFsplsp-hlpscllc..sLLpsscas...lAtslhppssst................lssp.lccslhpshashassAoNss+oRsshK+Apch..Lphhssp.......hssss.shpplcsLlpAocsLSpYSL..sLppG.......PFpPlsl...Rh+sDPlulIp+lL-QNP+uYppl-cllslu+pLlpAh.......................sptpccshhsscpRl....huhsIcsALsssDFpsAYshslshLpsss..spp.............t.......c-hsWcssaQsG+YhsPs.sss...pth................LspRhElLShALplsP.s-sLpclLusWpph-pELss..Lhspcpst-sshc ....................................................................................................................................................sh.hs.psp..httl.hh.t.h..............lhs.hP.E.h.....s.tth....hl.......................................................................tt.........h....t....pt.....t....h............................t...ph.h.....t.............................p.h.pah.pRs.tlpp.st....h.t...............l..hhp...........h...th...h.......sh.hh.......h............t......h....slt.h.pph..ts.p.hhphhhp.stt......tp............hsp.hhthhhPah.....http.....................t.........t....................h...hlh.p.......p......h.......................................phht.hsLtshY.st...........p.p.h.ph..tlh.ppls....................................................................thht....s..h.p........t..hp.hpt.......hl...ssphLt.phsh..hshphhtph...........ppp.ht.plh.h.hhtph....hppts.....hs.t......Wtthhpph.hhhpp.s..................asp.ls..phh.t.hhc..uL.L.ps......s...chp...............LAtphh.p..s..s.s........................ls...pp..plVhts.hphassuos.scsphsht.+.....sp.......Lphhssp...........ss.shpc..sLl.pAhtt.L.p.pasl.............................hhPlpl..........R.......h.....pt.......D..................lslIppslppsspsYpp.ppLltlup.Lhhs............................tpp...ttp...tpl....h.h..hlc.tALtttDaphAhths.ph......ht...............................s.s.sW..cs.shphGph..s.........................................ltpR.-lLuhuLth...sP..s.....p....plt......lLs.shpphptp.h................................................................................................................. 0 59 107 161 +8147 PF08315 cwf18 cwf18 pre-mRNA splicing factor Mistry J, Wood V anon Pfam-B_19718 (release 17.0) Family The cwf18 family is involved in mRNA splicing. It has been isolated as a subcomplex of the splicosome in Schizosaccharomyces pombe [1]. 25.00 25.00 25.30 25.10 24.90 24.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.69 0.71 -3.68 31 268 2009-01-15 18:05:59 2005-07-13 09:55:05 7 3 235 0 201 257 0 132.60 35 70.43 CHANGED usL-ttAhcRKtRLtpL+ph.................................t.ppppptttssspcssphtLphRNYcPcsc.shKtshhsssp...........ssplEcclpcQhctsctt.....cplDLhpLtP+KPsWDLKRDls+KhchLcpRTppAIAcLlR-Rl ..................................................................s.pLpttAhcRKtRLttL+ph..................................................t....ppp.p......tppspppstpp.pLphR.NYsPcsc.shKtthhsssp.............................................sspl.E.cp.lp....cphptsptt..........................cplDLhsLsP+KP...sWDLKRDlscKL-+L-+RTpcAIAcLlR-Rl...................... 0 71 107 160 +8148 PF08316 Pal1 Pal1 cell morphology protein Mistry J, Wood V anon manual Family Pal1 is a membrane associated protein that is involved in the maintenance of cylindrical cellular morphology. It localises to sites of active growth. Pal1 physically interacts and displays overlapping localisation with the Huntingtin-interacting-protein (Hip1)-related protein Sla2p/End4p [1]. 25.00 25.00 25.90 25.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.87 0.71 -3.73 21 213 2009-09-11 00:21:46 2005-07-13 11:52:31 6 4 131 0 161 204 0 110.00 38 26.42 CHANGED DsIDKLDVTGl.aG.GsFHHDGPFDACsPHRN+ssp..sAPVhAFPtDusNsolGG..ssssccsshspsaGpt-.-s.s...................t.sshttstsp.t..s...h..psspl...spFDsps+sE.lHGssThGLGooTFLDGAPAS+uAIpc ..................DhID+LDsTul.a.G..GhFHHDGPFDAssPcRN+psp......tAPl.tAF.....st...D.u.Npsltu........s..p..tht..hu..t...................................................................ss..p...h.u..o.GLssoThl-GsPAs......t...................................................................................................................... 0 34 81 130 +8149 PF08317 Spc7 Spc7 kinetochore protein Mistry J, Wood V anon manual Domain This domain is found in cell division proteins which are required for kinetochore-spindle association [1]. 34.00 34.00 34.00 34.30 33.70 33.30 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.95 0.70 -5.77 15 147 2009-09-13 12:07:19 2005-07-13 15:40:18 6 13 141 0 113 154 0 314.20 25 26.55 CHANGED l-...........psspschEPIpLp-FLshssI+F.....hplsohcRhpsohss...............pspssshcDhlsAtasslPhLELYp..aSC+EL++hIuEGRcll+plEscThs-NPP.LF+EYhoAss-h+lLMcsQhp.VKoaARLpSKssWYEWRhpLLcGLK-sLtcplpthppDcchLs+p.shlsslhsclpc+psuLccEhssLcplsc-.....hspsDpp-Lpsh+pcLpplcpcIstpppplpELpsclpchsssIpsssppKpphhtcIp-t-+lhccs+saospEIscL+pphptlcphoGaslhuls.......Gsslohsaccp..........lcLsFs.......usapl ..............................................................................................t.tttt.......t.php..lp..Lp-FLshsslcF......hph.....sss+Rppshhst........................ttpshslcchlsA..thhslPhLELYp..auC+ELpphIs..-G+ph..hcplEscs..htpN..PP.LFpE.Y..hs.....u.s..........s-.h....+..h.l....McsQhp.lKsauRL.uKthWYEWR..hpLl..cGLcpsLtcphpthppDtphLs.cptphls.sllsplhpcpptLppEhp.pLpph.spE...........hpssDp..p-Lpph+....pcLtphctc...........lpthppplpchppclpphppp.l..cphspp+pphhppIp...cs-+hh-.cs.+sa.otpElppL+splctLEp.pGhplhphp................u.s.tl.phtappp..........lpl.ht............t............................................ 0 40 67 98 +8150 PF08318 COG4 Sec38; COG4 transport protein Mistry J, Wood V anon manual Domain This region is found in yeast oligomeric golgi complex component 4 which is involved in ER to Golgi an intra Golgi transport [1]. 20.10 20.10 21.10 20.90 19.80 19.90 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.82 0.70 -5.67 14 351 2012-10-02 15:56:29 2005-07-13 15:46:43 7 11 268 0 250 355 4 282.30 26 39.00 CHANGED LcphpcpLpplFpccFpcAs+spDltplT+aFKLFPLlGtc-hGLshYucYlCphIAscuRphhpss.t.......sptshhaupslhpLF-plupllpsHstllpphYG..sst...hlpllp+lQcEsDhQsulll-pFhDpR+lcchhppIspas...hst.............t........stp.phlsh+-lssllsEhotlhppWslYp+Fhsh+h.p..............hsssp.p.....hphspllpsuphsppl..pcllssFhtLppaahpcSlp+ulpl-ch...............ptps..sSShV-Dlhhll+psLtpslsTuphsslsphlsp.lsphlpsDa.hphhpspl+ .....................................................................................................................................................................................................................lppspppLpslhhccFtpAspp.....tDhsplpRFFKlFPll..G..h..pc.GLpha............upYlsp.ls.tpuc.t........hp.s.ht...........................pp....tshh...aussLohLF-tlupll-sHtsllc..ph.YG.....sp...................hhp.llptLQhEsDhpsth.llcpa.cpR.ph..pphhp.plpp..ht..st.........................................................................-.t...l...s..+.-lD.lLsEhshh.tphphYh+Fltp+htt...................................................................t.pt..............phsphlppst.hspph..pclls.....Y.shppaahccolpKAhtl.Dph.....................................................................................tps.hhoS.hV.DDlhallppsltRuluouphsslsuhlst.hsphL..ps-a.hthlppch................................................... 0 91 145 210 +8152 PF08320 PIG-X PIG-X / PBN1 Mistry J, Wood V anon manual Domain Mammalian PIG-X and yeast PBN1 are essential components of glycosylphosphatidylinositol-mannosyltransferase I [1]. These enzymes are involved in the transfer of sugar molecules. 20.50 20.50 21.30 21.10 19.80 18.60 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.57 0.70 -5.05 22 258 2009-01-15 18:05:59 2005-07-13 15:56:35 7 5 217 0 165 278 0 194.10 24 54.09 CHANGED pPsGLHPpLplslss.............spssppCplahahpLPpslFhD+YQ.........t..shs.plhhltGtsDLEhP-Yth..ppWGSphLhcltss............................hpsplPLHhRYhcPup..ssh....pslsls.PhlFhuCssc-ss..........htpsPFsph.....shua-shFsscThFaals.p..................ptslplplPhsst.......tshptVphsThlsllluhla....llhplhtt ...............................sGhH..sL.lp.lph..................h.st.ppCpl.hhh...pLPs....slFsD.apLts.......................................Lppcshh...phh.....hltsps...DLEtP..sa........pp.hus.pl.Llplp.ss................................t...pphpsplPlHhRY...hpP....ps...tsuh..............ppltls.Ptl.hhtC.stppst....................ht..sapph..............sh.s.hhtsp..shh.ahp.t.......................hs.tlplPlhph..........phthlp.sThhhhhls.hhlhhtl...h........................................................... 1 43 84 133 +8153 PF08321 PPP5 PPT1; PPP5 TPR repeat region Mistry J, Wood V anon Pfam-B_6912 (release 17.0) Family This region is specific to the PPP5 subfamily of serine/threonine phosphatases and contains TPR repeats. 23.50 23.50 23.60 23.80 23.40 23.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.16 0.72 -3.97 10 434 2012-10-11 20:01:00 2005-07-13 16:27:50 7 49 269 28 266 410 3 88.60 30 17.10 CHANGED KlpECcKlV+clsFEcAIul.-cpc+Sls-sl.DlEshsIEs-YsGP+LEsspVTl-FlKchhEaaK.pQK+LH++aAYpILlplcclL+ppPSLV-l .............................................h..spphlpp.tF.tAIt..............o.h.....h-..th....D.h...-.s..h.........s....l.-c.s...Y.sGP+................L..............p.....................t...............p...............lTh...............p.............Fhcphl.-t.FK..ppK..pLH.++YshpILhps+cllpp.Pohlc......................... 0 82 129 200 +8155 PF08323 Glyco_transf_5 Starch synthase catalytic domain Bateman A anon Pfam-B_148 (Release 17.0) Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.98 0.70 -4.88 179 5560 2012-10-03 16:42:30 2005-08-09 17:08:06 6 34 3850 19 1001 5688 791 204.20 33 44.27 CHANGED +...lLaluuEs..........sPh..sKoGGLuDVssuLPpAL..tphG.t..-V..cllhPtY.stl.pph.........php.lhp...h............h..........h.hhph.h...........p.slslah.l-s.t.....ha.p....Rss..hY............sa.DsspRFu...hFupAshchhtt.h..........sh.t..PDll..HspDWpTuLl.PhhL+ph.htt.......stoVhTIHN.........lsaQGhas...tphh.th..hsls..........h...pthcahs...........................plshlK.uGlhhuDtloTVSPoYAcEItsst.......hG..GL-slLpp ........................................................................................................................................lh.hs.Eh.............sh......p.GGL.ucsht.u.Ls...t...........t........s..h..c.s....hh..l..h.P....ha....h.t.t.................................h..................................................hth.h.pthh...................................p..Glsh.hh...l...D.....p.h....................ah..p.....................+ss...........hYss....................sah.D.s.t.h..R.Fu...............hh.sp.A..u.l.Ehsph..l........................................sa..t....Dll..h.spD.WH......ou.Ll.Ps..a...L.+th...h.......p................h..............t......s+..........sshsIHN..............lsaQG..hFs..........hp.h...sh....hsLs.............phh........t...s.h...c...a......s.............................pl.sa.h...K...AGl.h..uD+lsT...VSP...hYApElhss.........th.tL-sllp..................................................................................................... 0 340 651 848 +8156 PF08324 PUL PUL domain Bateman A anon L. Iyer Domain The PUL (PLAP, Ufd3p and Lub1p) domain is a novel alpha-helical Ub-associated domain. It directly binds to Cdc48, a chaperone-like AAA ATPase that collects ubiquitylated substrates [2][3]. 21.00 21.00 21.40 21.50 20.80 20.70 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.74 0.70 -5.60 22 406 2009-01-15 18:05:59 2005-08-10 13:17:14 6 21 274 9 297 411 2 256.60 19 36.81 CHANGED phhPhpphlhhcshshctlhp+lpchNsp.......tchphs-pplss....ltphlpthpps.t........hpthhthhhpllpsWs.sphhPslDllRlhlhp.sss.hh..t.........ss.hsphhtts.ss...................tp.s.hhhslRhlsNhFsssshtthlhspts.........plhstlsshhssh...............spNlplAlATLhhNhulhhhcss.........sh-hph.lluslhp.....htc..pspEAhYRhllAhGsLh....oht.sshtthspt.........shsthhptptp.........hsp.t+hp-lsp-l ..............................................................................hhP.pphl.hh..cp.s.s.h.ptlhpKl.t.chNtt............tphth...sp..splpt.......ltphl.p.......thpps.ss.....................hpt.h.h...hl.hp.hhtpW.P......s..h..h.hPslDll.Rl.hlh.psp....s.ss..h.hspt..................tphhtphhp.t..s....p...................ss.stthl.slRhhsN.hF.......s..s....tthp.......phhhsptp...............tl.hstltt.htss..............................spNlplAhuTLhhNhulhhpppp........................stctphtll..uslhp..................lhpt..tp.s......Euha.RhLlAlGsLl..........sss.sphhphsps........hth.t.thh.tthtp...................hsp..hhtphht.......................................................................... 0 103 168 244 +8157 PF08325 WLM WLM domain L Iyer, Bateman A anon L Iyer Domain This is a predicted metallopeptidase domain called WLM (Wss1p-like metalloproteases). These are linked to the Ub-system by virtue of fusions with the UB-binding PUG (PUB), Ub-like, and Little Finger domains. More specifically, genetic evidence implicates the WLM family in de-SUMOylation [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.38 0.71 -4.42 23 403 2012-10-03 04:41:15 2005-08-10 14:50:09 5 13 219 0 290 2217 499 191.90 27 49.79 CHANGED spstphphLp....pt.PscscALchLp+lAsp..lpslM+c++a+VshLsEhhPppts..................LLGhNhN+Gpc..................IpLRLRsss..ppFlsacslhsThLHELsHslausHDppFacLhcpLps-htplphth.......thhssG+pluupshh.s...............tth.httsthtGtsppLG.Gss...................................................................................................................ssstshRchhutAA-+Rhp .......................................................................................................................................................l......hPp.tppAhphLc+lAst..lpslM+.c+.papVshLsEh.Pp.pts.......................................lLGhN.....h....N.pGpp............................I..pLR..L.Rsss.......pta.h...shc....p..lhp.T.h.lHEL...sH.......s..l..aus..Hsp.pFa....sLhs.p.L..p..c..Ehp..pl..t..h..p..................h..stG.ppLustth.......................................t..ttthhts..ts...hLG...Gss..............................................................................................................................................................................................tshshRphhAtAA.pRh.......................................................................................................................................................................................... 1 97 176 251 +8158 PF08326 ACC_central Acetyl-CoA carboxylase, central region Fenech M anon Pfam-B_2008 (release 18.0) Family The region featured in this family is found in various eukaryotic acetyl-CoA carboxylases, N-terminal to the catalytic domain (Pfam:PF01039). This enzyme (EC:6.4.1.2) is involved in the synthesis of long-chain fatty acids, as it catalyses the rate-limiting step in this process. 18.50 18.50 18.60 18.60 18.40 18.40 hmmbuild -o /dev/null HMM SEED 708 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -13.13 0.70 -6.42 37 703 2009-01-15 18:05:59 2005-08-10 14:56:40 7 35 365 6 377 673 13 529.70 28 30.19 CHANGED sLDDPS+V++....ApPFpGpL..PchusPshh.G....sKstp+apthhss...............LpsILsGY.......phhhss.slppLlpsLcsscLPa.paptphSsLtsRlP.pLcptlpphhcchptp....tsp........FPu+pLppllpphhs...hss.....hhthsltPLhplhppYpsGlpsHthslhtsLlccYhsVEplFs..sspp--VIhcLR-cpcsDlpcVlphlLSHuplssKNpLlLulL-ph.ps..................sssthcssLp+lspLps+stu+VAL+ARElLlpspLPSlcpRpsplcclLpu..shhpsthGp.......h.p+ptsph-hlc-LlsSphsV..hDlL.spFF..scsDthVphAAlEVYlRRu.....YpuYp.ltc....lpacpps......slhpWcF..lsp.t.sphst..........................................p..chtshoshohhspptpt......RtGhhlshcpL-clpphl.stuLcthsp.......................tt...t............t.p.....sp.sNlhslsl.........pphpsh..s-p-llsclptllcppc..pcLts.tulRRlTFlhsp.pc...............uphPpaaTF+..uss.......YpE-phlRHlEPuLAapLELsR..Lp.sFclp.l.opsRplHlYpus..........................uKp.........sssD+RaFhRullRsupl..psphsht-hL.uEss..............Rlhs-hLDsLElhsss....poDhNHIFlNhss.hpl....................sspplEtuhsshlcRaGpRLaRLRVspsElRlhls....sssssshPlRsllsNsSGaslps-lYtEhcssp.u............phla+Shs...c.Gs.hH ..............................................................tLDsPotVpt.....sp.a.u..h.......P...t.......h..s..............tp..phht..hp..........................h...lhtGa....................................t.....hpt.hlpphh.sLc...sspLPh.phpt.hssltsRhP..lpt.hpt.ht..t...................................................................................FPup.l.thhpt.ht................................htslhplhp.pattG.t.......h.......h.hh.tlhppYhpVE..p.Fp..................p...ppsl.tl.Rpp..p....s.httVhphhhSHttlttKs.Llhhlhcth............................................................st.hhs.L.ch.spLp..p.pt.......s...cl...uLcApp...........l.L.tsthsphc....spht......p.h....p...h......t..................................................p..hpcll.u.hsl...D.......h.L...hF....pts.....hlt.t.shEs..YlRRh.....Y.sa..ltt......hpht...................hh.apF.....t.................................................................................................................................................................+hGhhh..hp.hpth..........t......h.sthht.h............................................................................................................................................................tp..thhplsl................................t...pt.....ptp....thht.htthhpp.p..t.Lht.ttl+clohhhtp...p.....................tthPtaaTF+.h.s......................................atE-phhRclEPsluhpLELsR..hp..pa...plp.h..st.s...c...phHlY.uh..........................................................................................u+............tshDhRhF....hRullR...p.sph..............hsc..sshphh.sEsp................................Rhl.phh-tL..E.l.h......p.........p..........................p....s.ch....NHlalsh....h.....s.....h.h...........................................sstplcp.htthh.phG.Rhh+LpVhthEl+h.hp..........s.ss.s.hshRlhlsN.oGahh...phphYhEhps.p..t.......p.hhapuhs.......u................................................................................. 0 139 213 317 +8159 PF08327 AHSA1 Activator of Hsp90 ATPase homolog 1-like protein Fenech M anon Pfam-B_4145 (release 18.0) Family This family includes eukaryotic, prokaryotic and archaeal proteins that bear similarity to a C-terminal region of human activator of 90 kDa heat shock protein ATPase homolog 1 (AHSA1/p38, Swiss:O95433). This protein is known to interact with the middle domain of Hsp90, and stimulate its ATPase activity [1]. It is probably a general upregulator of Hsp90 function, particularly contributing to its efficiency in conditions of increased stress [2]. p38 is also known to interact with the cytoplasmic domain of the VSV G protein, and may thus be involved in protein transport [3]. It has also been reported as being underexpressed in Down's syndrome. This region is found repeated in two members of this family (Swiss:Q8XY04 and Swiss:Q6MH87). 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.83 0.71 -4.02 114 4271 2012-10-02 19:24:03 2005-08-10 15:01:15 6 39 1661 63 1665 4537 461 127.90 18 71.07 CHANGED ssss-pVacuhTcs-tltp.W............hs.hph-h...+s..GG.paph.........tst.hstpsp...........hhcltssc+lshs.atht..t..........................httlshplpp.tsss..Tclphpp..........sshstspttp.............hptGWpphl.cpLpphlp ........................................................ss.cpVacs...hos.schl....tp...W......................h.t...tph.-h.............cs......G.G....pa.ph................t...tst..t.h.s.h..p.sp...................hhcl..p.s..s.........c..........p....lshs..htht......t...................................................ts.tlp.hp...h..p...p.....p.......s.ss.......Tplp.hpp....................sshst...ppttt.................httGW.pthl..spLpthl................................................................. 0 625 1111 1404 +8160 PF08328 ASL_C Adenylosuccinate lyase C-terminal Wuster A anon Pfam-B_1176 (release 18.0) Family This domain is found at the C-terminus of adenylosuccinate lyase(ASL; PurB in E. coli). It has been identified in bacteria, eukaryotes and archaea and is found together with the lyase domain Pfam:PF00206. ASL catalyses the cleavage of succinylaminoimidazole carboxamide ribotide to aminoimidazole carboxamide ribotide and fumarate and the cleavage of adenylosuccinate to adenylate and fumarate [1]. 34.10 34.10 34.10 34.50 31.90 32.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.38 0.71 -4.24 107 1767 2009-01-15 18:05:59 2005-08-10 15:17:40 6 8 1709 11 421 1259 659 115.00 62 25.35 CHANGED SRaQRDLTDSTVLRNlGVuhGaollAYpuhl+GLsKLplNpsplspDL-ssWEVLAEsIQTVMR......RaGltpPYEpLKcLTR..Gp.plstcslppFI..csLc.lP-psKscLhtlTPssYl .......SRWQ.....RDLTDSTVLRNLGVGlGYuLIAYpSoLKGluKLElNcs+Lhc-LDpNW.E.VLAEPIQTV....MR......RYGIE+PYEKLKELT.R...GK..clssEuh+pFI-u...Ls...lP--.....t..KsRL+uhTPAsYI........................ 0 122 261 354 +8161 PF08329 ChitinaseA_N Chitinase A, N-terminal domain Fenech M anon Pfam-B_1049 (release 18.0) Domain This domain is found in a number of bacterial chitinases and similar viral proteins. It is organised into a fibronectin III module domain-like fold, comprising only beta strands. Its function is not known, but it may be involved in interaction with the enzyme substrate, chitin [1,2]. It is separated by a hinge region from the catalytic domain (Pfam:PF00704); this hinge region is probably mobile, allowing the N-terminal domain to have different relative positions in solution [1]. 21.20 21.20 21.20 21.30 21.10 20.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.58 0.71 -4.38 43 378 2012-10-03 16:25:20 2005-08-10 15:19:53 5 25 265 36 56 351 4 122.60 40 18.18 CHANGED sAsPGsPoIc......Wu-.psaul.....Vclsp..pAT.uYppLVph.c-tlsVsVoWNlWSG-sG-puplhhDGpp......Va....pGsus..sppA..shplspGGpapMpVcLCNu-G....CSsSs.ssplllADTDGSHLtP.L.h..shpENN+saspps ....................................t.PshPsls...aup...pphth..lpls...tsT..uYpphVph.+-tssloVsaNhWo..Gs.sGsoh+lhhsGpp......Vh.......oGsss....upsoA.....sFph..s..KGGpYQhplcLCNusG....Cop.Ss.sscI.slADTDGSHLtP.Lph..slt.NNKsap...s.................. 0 13 26 38 +8163 PF08331 DUF1730 Domain of unknown function (DUF1730) Wuster A anon Pfam-B_1023 (release 18.0) Family This domain of unknown function occurs in Iron-sulfur cluster-binding proteins together with the 4Fe-4S binding domain (Pfam:PF00037). 21.40 21.40 21.50 22.20 20.50 21.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.50 0.72 -4.31 57 2383 2009-09-10 21:15:36 2005-08-10 15:23:48 5 19 2359 0 510 1721 810 77.80 37 21.12 CHANGED RscPptLlPss+SlIuluhsYhs.......tshpssppuhlo+Yuh.G+DYHcll+c+LcpLuphlppps.sch..t.....h+shVDo ............................RscPctLlPustslIulths....Yhs...................t....s.............................p............s..l..............p...s.....scp.....GhlSRYAh....G.....+..DYHcllRpRLccLuc......h......I......p....p....cs....s.sh......p..............h+sFVDo........................... 1 151 311 421 +8164 PF08332 CaMKII_AD Calcium/calmodulin dependent protein kinase II Association Wuster A anon Pfam-B_1025 (release 18.0) Family This domain is found at the C-terminus of the Calcium/calmodulin dependent protein kinases II (CaMKII). These proteins also have a Ser/Thr protein kinase domain (Pfam:PF00069) at their N-terminus [1]. The function of the CaMKII association domain is the assembly of the single proteins into large (8 to 14 subunits) multimers [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.49 0.71 -4.24 7 782 2012-10-03 02:27:24 2005-08-10 15:27:36 5 6 295 44 244 1086 152 118.40 52 32.17 CHANGED ccpEIlplT-pLlcAIssGDacsYo+lCcsshTsFEPEAhupLl-Gh-FH+FYFE.hhupps+slppslLsP+V+llGD-uAshAYl+LhQhhDcsGhs+ohQupETRVWp++sG+WpsVHhHRSuus ....................................................................................RKQEIIKlTEQ.L.IE.A....ls.sG...D...FE...uYsKl..C..D..P..s..h.TuFE...P.EA......LG.......N.L.V.E.G.h.DFH.......+...F....YFE...N.h..L.u.....K..s.......s..K.s....l.....+.T.....hI..L..N..PHV....H..l.l..G-.-..u..A...C.I.A...Y..l.....R..l..T.Q..a..hD.u...........p..G.hs..+.o.sQ.S..EETR.VWH....R....R....D......G.....K.....W..VHaHpSusP...................................................... 0 55 91 151 +8165 PF08333 DUF1725 Protein of unknown function (DUF1725) Fenech M anon Pfam-B_2110 (release 18.0) Family This family include many eukaryotic and one bacterial sequence. Many of its members are annotated as being putative L1 retrotransposons or LINE-1 reverse transcriptase homologs. The region in question is found repeated in some family members. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.32 0.73 -6.56 0.73 -4.47 24 244 2009-01-15 18:05:59 2005-08-11 08:45:36 6 30 26 0 135 164 33 19.60 62 2.50 CHANGED hpFhGKWMELEsIILSElsp ......FsssWM-LEsIhLSEloQ..... 0 2 8 20 +8166 PF08334 T2SG GSPII_G; Type II secretion system (T2SS), protein G Fenech M, Desvaux M anon Pfam-B_1144 (release 18.0) Family The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for the transport of proteins across the outer membrane first exported to the periplasm by the Sec or Tat translocon in Gram-negative (diderm) bacteria [1,2]. The T2SG family includes proteins such as EpsG (P45773) in Vibrio cholera, XcpT also called PddA (Q00514) in Pseudomonas aeruginosa or PulG (P15746)in Klebsiella pneumoniae. The PulG is thought to be anchored in the inner membrane with its C-terminus directed towards the periplasme [3]. Together with other members of the Type II secretion machinery, it is thought to assemble into a pilus-like structure that may function as a dynamic mechanism to push secreted proteins out of the cell. The polypeptide is organized into a long N-terminal alpha-helix followed by a loop region that separates it from a C-terminal anti-parallel beta-sheet [1]. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.56 0.72 -4.30 175 1599 2012-10-03 10:38:27 2005-08-11 08:56:08 6 8 1122 10 474 1149 368 105.00 40 69.58 CHANGED plhs.......ph-....cA...+hpsAcsplpslps.AL-hY+L.DsGpYPospp.GLpALlptPsst...ppa.pGsYl..cplP.pDPWGps...YhYh..sPG....t..p.G.thDlhShGsDGp.GGpu.suDIssW ................plhuph-+ActppAh.sDI..s.s...Lcs.AL-hY...........+............L....D..........N..G.c...............Y.P..o....s.p......Q...GL.c.A.Ll..ppPss................spsa..ps...uYl....++L.P..pDPW..Gss............YpYl....sPG............p.....c.....G.thDlaS.hGsDGp..GG-s.stDIssW................................................................. 0 154 301 395 +8167 PF08335 GlnD_UR_UTase GlnD PII-uridylyltransferase Fenech M anon Pfam-B_2147 (release 18.0) Family This is a family of bifunctional uridylyl-removing enzymes/uridylyltransferases (UR/UTases, GlnD) that are responsible for the modification (EC:2.7.7.59) of the regulatory protein P-II, or GlnB (e.g. Swiss:P05826, Pfam:PF00543). In response to nitrogen limitation, these transferases (e.g. Swiss:P27249) catalyse the uridylylation of the PII protein, which in turn stimulates deadenylylation of glutamine synthetase (GlnA). Deadenylylated glutamine synthetase is the more active form of the enzyme [1]. Moreover, uridylylated PII can act together with NtrB and NtrC to increase transcription of genes in the sigma54 regulon, which include glnA and other nitrogen-level controlled genes [2]. It has also been suggested that the product of the glnD gene is involved in other physiological functions such as control of iron metabolism in certain species [2]. The region described in this family is found in many of its members to be C-terminal to a nucleotidyltransferase domain (Pfam:PF01909), and N-terminal to an HD domain (Pfam:PF01966) and two ACT domains (Pfam:PF01842) [3]. 20.50 20.50 20.50 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.78 0.71 -4.35 122 5858 2012-10-01 22:14:54 2005-08-11 08:58:32 6 24 2142 3 1374 4628 1176 134.10 23 21.87 CHANGED chhctplpcp........tpR+..........tphs..........t.NlK.G.GGlRDlchlh.ltphhh.............thpsLptLhpt.....uhlspp..-hppLpcuhpFLhplcptL.phlss+pscpLs..h-......tptplAph.h....G.................a.............sshtu...ahpphhpttppVpphhphlh ..............................................................hptpl.ppp.................ht.c+...............tt.p................slKhutGGlRDIEalsQ..lh..p..Lhautp..tl.........p..sslpsL.ptls..ph.......uhl..s..pp...-s.tpLp...cuap...hL..pc...l..c...p..t..L...ph....h..t...s....c....t..s....p..t.L....s.......................t..tl..A.h.h...s....................................h.....................ts..t.....hhpth.hthhppltth.p.h.................................................................. 0 366 832 1128 +8168 PF08336 P4Ha_N Prolyl 4-Hydroxylase alpha-subunit, N-terminal region Fenech M anon Pfam-B_2013 (release 18.0) Family The members of this family are eukaryotic proteins, and include all three isoforms of the prolyl 4-hydroxylase alpha subunit. This enzyme (EC:1.14.11.2) is important in the post-translational modification of collagen, as it catalyses the formation of 4-hydroxyproline. In vertebrates, the complete enzyme is an alpha2-beta2 tetramer; the beta-subunit is identical to protein disulphide isomerase [1-4]. The function of the N-terminal region featured in this family does not seem to be known. 22.70 22.70 22.80 23.90 22.30 22.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.56 0.71 -4.34 57 702 2009-09-11 11:25:59 2005-08-11 09:00:04 6 15 93 0 369 644 0 123.70 30 26.34 CHANGED oSlsphpcLlphEcpLlssLcpYlpt.pp+lcpl+phhpphcppppputp..chppYlusPlNuFtLl+RhppDW.pl..cphhpps.hspp..hlptlpphtp....ph.Popc..DlpsAspulhRLQssYpLpspclApGh.lsGh ................oShsphpcLlthEppL.lpsLcpYlpt.pp+lppl+.......ph...hp.....phct...........pp..putp...s.ctal....upPlNAapLl+RLpsDW.pl..cphhtps....spt.......hlss.lp..pp......hh.Ps..p-..DhpuAupuLhRLQcsYpLcspslupG.l.G......................... 0 106 122 259 +8169 PF08337 Plexin_cytopl Plexin cytoplasmic RasGAP domain Fenech M anon Pfam-B_3123 (release 18.0) Domain This family features the C-terminal regions of various plexins (e.g. Swiss:P51805). Plexins are receptors for semaphorins, and plexin signalling is important in path finding and patterning of both neurons and developing blood vessels [1,2]. The cytoplasmic region, which has been called a SEX domain in some members of this family [3], is involved in downstream signalling pathways, by interaction with proteins such as Rac1, RhoD, Rnd1 and other plexins [4]. This domain acts as a RasGAP domain [5]. 20.10 20.10 20.10 20.60 20.00 19.60 hmmbuild -o /dev/null HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.65 0.70 -6.19 11 880 2009-01-15 18:05:59 2005-08-11 09:05:10 7 64 115 19 421 669 1 437.30 47 33.18 CHANGED uGIPFLDY+sYshRlFF...........................sGpcsHPlhtchp....ttppsslEpuLshFusLLNsKsFLlsFIHTLEpQ+sFShRDRsplASLLhlALpuKLEYhT-Il+sLLsDLI-ps..sK.pPKLMLRRTESVVEKMLTNWMSlCLYsaL+EssGEPLahLapAIKpQl-KGPVDAlTGcARYTLNE-+LLR-slEa+slsLp..........sl..htscss-u........lsV+VLsCDTIoQVKEKlL-slY..KssPaSQRPcsc-lDLEWRsGptu+llLpD.-DlTohhEst.WK+LNTLtHYpVsDGAoluLssp....sshp.s....................hssssssstpttsh.sc..ps................+haHLV+PpD-.s-.....pp+ucRts...............+sKslsEIYLTRLLoTKGTLQKFVDDLFpoILSh..hspslPlAlKYhFDFLDEQA-p+GIoDPDslHhWKoNSLPLRFWVNllKNPQFVFDIcKosphDACLSVIAQTFMDuCShSEa+LGKDSPoNKLLYAKDIPpYKchVccYY+cIpphsslS-QEMNuhLAE.S+tHss-FsshsALpELYpYlpKYtppI ..........................................................................................................................sIPaLDY+sYs.RlhF..................................................s...u.h.psp..l.h.tchc............t.tt.ph..-puLt.hupLlssK.....hFLlpF..I+TLEtQ.+...sFShRD..R.s..lASLlhh..sL..p..u+hEYhTslh+pLL.-Lh-p...sK..pPKL..hLR.R..T...EoV.sEKhLoNWhohh.....Lap..aL+..................-s.........sGEPLahLapAIKpQh-KGPlDulTscA+YoLs-stLl+p.pl.-apsl.s.lp.............................sl.......ts...t.s...t.................ls.V+l.LsCDTIoQlKEKlL-tla..+shPhSpp.Pp.s.ts...h....-L...............EW..RtG......t.h.......s...p.....hlLpD....D.......h.T........o.h.......h.pst...WK+L...NTL......tHY..p...V..s-...u...u.s.lsLs.p........p...............................................................................h.t.tp.p......t..p.p....t....s.h....p.......pt.......................+haH.LV+s.p..-c....c..........thcscR........................tsKhl.sEIYLTRLL..u.....sK.G.TLQpFVDDhFpslhS.........s.p..s..lPhAlKYhFDFLDE.QApp...+..t.I.p.D.......-shHh.WKo..N..sLPLRF....WVN.llKNPpFlF.Dlc..psshsDAsLSVlAQTFMDuCoho-H+..LG.............+...............DSPoNKLLYAK-IP.pYKp........h...V-..cYYtsItphs.slS-Q-MsshLsE...S.......+....a..ts.ph........ss.sALpElY.p.Y.hpKYh-pl........................................ 0 104 131 262 +8170 PF08338 DUF1731 Domain of unknown function (DUF1731) Wuster A anon Pfam-B_1045 (release 18.0) Family This domain of unknown function appears towards the C-terminus of proteins of the NAD dependent epimerase/dehydratase family (Pfam:PF01370) in bacteria, eukaryotes and archaea. Many of the proteins in which it is found are involved in cell-division inhibition. 20.40 20.40 20.40 20.90 20.30 19.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.09 0.72 -4.47 150 2466 2009-01-15 18:05:59 2005-08-11 09:12:23 6 12 2252 1 619 1759 633 48.10 39 15.43 CHANGED lP..uhsL+...lllG.Ehu.pllLpuQ+VhPp+LhpsGFpFcassLcpALpsll ..............lPuhsl+....llhG..Ehu.tLlLsGQ+slPp+L.p.p.s.GFpF+assLcpALpsl........... 0 205 395 534 +8171 PF08339 RTX_C RTX C-terminal domain Fenech M anon Pfam-B_2178 (release 18.0) Family This family describes the C-terminal region of various bacterial haemolysins and leukotoxins, which belong to the RTX family of toxins. These are produced by various Gram negative bacteria, such as E. coli (Swiss:P09983) and Actinobacillus pleuropneumoniae (Swiss:P15377). RTX toxins may interact with lipopolysaccharide (LPS) to functionally impair and eventually kill leukocytes [1]. This region is found in association with the RTX N-terminal domain (Pfam:PF02382) and multiple hemolysin-type calcium-binding repeats (Pfam:PF00353). 30.20 30.20 30.20 30.70 30.10 29.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -11.04 0.71 -4.48 11 230 2009-01-15 18:05:59 2005-08-11 09:14:43 5 17 136 0 4 184 0 136.40 36 15.23 CHANGED shSDhshcDlsFc+VscsLll...........pNs+psplTIpsWFccushup............pscKIEpIlsKsGc+ITScpl-cllp.cscGp.IptpsLsp...................hu-sath......t.ptsslsNslsKlISSsuuFsoup....sptsuhh..lsosths...phpohpLApAA ........phSDlshcDlsFcRlssDLlh...........psscts.s.lThpNWFtcss.ut...................................ps+pIEpIhsKsGccIss-pls+hhp.ptss..htsptlup......................pp..s........shsslsNslsKlISSsuuFsos.....spthush..ls.pt.ss....ppuhpLsps............................... 0 0 0 4 +8172 PF08340 DUF1732 Domain of unknown function (DUF1732) Wuster A, Eberhardt R anon Pfam-B_1065 (release 18.0) Family This domain of unknown function is often found at the C-terminus of bacterial proteins, many of which are hypothetical, including proteins of the YicC family which have Pfam:PF03755 at the N-terminus. These include a protein important in the stationary phase of growth, and required for growth at high temperature [1]. Structural modelling suggests this domain may bind nucleic acids [2]. 20.50 20.50 20.70 21.50 20.20 19.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.88 0.72 -4.16 157 2499 2009-01-15 18:05:59 2005-08-11 09:16:42 6 3 2472 0 580 1771 606 85.80 52 29.77 CHANGED tQElslhApKsDlsEElsRLpoHlsphcchL.p...s...........s...............s.lGR+LDFLhQEhNREsNTluSKus.....shplophsl-lKs.lEphREQlQNlE ......QElslhAp+hDlsEELsRLcuHlpphpplL.cp...p........t........................slGR+LDFlhQEhNREuNTluSKS.......ss-lostul-LKslIEQhREQlQNIE.......... 1 207 385 489 +8173 PF08341 Fb_signal Fibronectin-binding protein signal sequence Wuster A anon Pfam-B_4004 (release 18.0) Family This domain is found near the N-terminus of fibronectin-binding proteins in Streptococcus where it functions as a signal sequence [1]. 25.00 25.00 25.50 26.00 24.60 24.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.95 0.72 -3.97 30 227 2009-01-15 18:05:59 2005-08-11 09:19:11 6 21 39 6 11 210 0 74.20 35 16.57 CHANGED PaYGY-uhss.h........tYHcLcVs.LpGo.+pYp..VYCFNlc+p.P.cspuhspsa.............Yc+l-Gss.psFppYAtsPR ................saYGYDshssh...........pYHcLpVs...lpGo..csYQ.....VYCFNls+phPpsspu.hspsh.............YKKlcGos.psFppYAtsPR..... 0 0 2 8 +8175 PF08343 RNR_N Ribonucleotide reductase N-terminal Wuster A anon Pfam-B_1066 (release 18.0) Family This domain is found at the N-terminus of bacterial ribonucleoside-diphosphate reductases (ribonucleotide reductases, RNRs) which catalyse the formation of deoxyribonucleotides [1]. It occurs together with the RNR all-alpha domain (Pfam:PF00317) and the RNR barrel domain (Pfam:PF02867). 20.80 20.80 20.80 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.75 0.72 -4.17 65 2010 2009-01-15 18:05:59 2005-08-11 09:27:31 5 10 1914 6 245 1190 16 80.30 42 11.63 CHANGED YhpLNspl.Nl.spsGplp.hcKD+EAlcsahpppVpsNThhFsSlcE+lcYLlcp.sYYEpphl.cpY...shsalccLachAaupcF ..............YatLNs.l.NhhsssGpIp.h-KD+EAlcuahtppVps..NTl..hFsShpE+lsYLlcc.sYY-pshl........scY.......shsFlpcLapaAcspsF........... 0 53 132 191 +8176 PF08344 TRP_2 Transient receptor ion channel II Wuster A anon Pfam-B_1032 (release 18.0) Family This domain is found in the transient receptor ion channel (Trp) family of proteins. There is strong evidence that Trp proteins are structural elements of calcium-ion entry channels activated by G protein-coupled receptors [1]. This domain does not tend to appear with the TRP domain (Pfam:PF06011) but is often found to the C-terminus of Ankyrin repeats (Pfam:PF00023). 20.90 20.90 20.90 24.10 20.50 20.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.07 0.72 -4.04 18 680 2009-01-15 18:05:59 2005-08-11 09:37:35 6 28 108 0 347 603 0 62.10 55 7.46 CHANGED CsCs-C.ptpppDSL+HSpSRINsYRALASPuhluLo...ScDPlLoAFcLSaEL+cLuthEpEF+ ................CpCs-.Cspp.pct.DSlpHSRS.RlNsY+uLASPuhluLS..............SEDPlLTAFcLStELpcLu....plEpEFK............. 0 97 125 217 +8177 PF08345 YscJ_FliF_C Flagellar M-ring protein C-terminal Wuster A anon Pfam-B_1149 (release 18.0) Family This domain is found in bacterial flagellar M-ring (FliF) proteins together with the YscJ/FliF domain (Pfam:PF01514). 19.20 19.20 19.30 19.30 19.10 17.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.08 0.71 -4.27 169 2282 2009-09-10 21:18:48 2005-08-11 09:40:36 6 5 2017 0 541 1738 450 168.80 31 31.19 CHANGED LpsllG..s..ssp.spVss-lDFsptppspcpa........cP...sp...........tsl.RSpps.pEss..ssst...sss..G...lPGshoN...h..Pss...........................sssts..sss...................................................t..................sscpcps....pNYEls+shppspp.ssGplc..RLSVAVll.st..........t....................t..thtshssp...clsplpsLVpsAlGas.....ts..................RGDs.......l.sVsshsF .................................................LsPllG.ss.ph+.spVsspl.DFspp-popEpY........sP.....st.............ssl.RScQp.p-sp....pust.....hssG......lP.GuhS.Np.....Pss.......................s......tts...........................................sp.............ssc..pppTpNYEl..s+ol.pcscp..ssG.s..lp..RLSV.AVl.V..st..................................p...................psp.hsh.osc...phpp....l..csLlpsAh...Gas.......tp.......................RGDs.......l.sVsshsF.............................................................. 1 175 342 438 +8178 PF08346 AntA AntA/AntB antirepressor Wuster A anon Pfam-B_2097 (release 18.0) Family In E. coli the two proteins AntA and AntB have 62% amino acid identities near their N termini. AntA appears to be encoded by a truncated and divergent copy of AntB. The two proteins are homologous to putative antirepressors found in numerous bacteriophages, such as the hypothetical antirepressor protein encoded by the gene LO142 of the bacteriophage 933W [1]. 20.50 20.50 20.60 21.10 19.30 20.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.65 0.72 -3.78 57 810 2009-01-15 18:05:59 2005-08-11 09:43:36 7 14 597 0 102 564 4 70.80 41 31.26 CHANGED VsAR-LHphLcl.ppcFssWhctRhpcYs.FhEs.DFhsh.p.....................tpspuu+............phDYtlTLDhAKEluMlpRs ..............lsAR-LHphL......pV..ppcFssWhc.Rh..p...cYG.F.tEstDah..shpp..........................ht.s.tts..us+...........ptpDYtLTLDhAKElAMlpRs...................................................................... 0 22 65 79 +8179 PF08347 CTNNB1_binding N-terminal CTNNB1 binding Wuster A anon Pfam-B_2064 (release 18.0) Family This region tends to appear at the N-terminus of proteins also containing DNA-binding HMG (high mobility group) boxes (Pfam:PF00505) and appears to bind the armadillo repeat of CTNNB1 (beta-catenin), forming a stable complex. Signaling by Wnt through TCF/LCF is involved in developmental patterning, induction of neural tissues, cell fate decisions and stem cell differentiation [3]. Isoforms of HMG T-cell factors lacking the N-terminal CTNNB1-binding domain cannot fulfill their role as transcriptional activators in T-cell differentiation [1,2]. 22.40 22.40 22.40 22.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.81 0.71 -4.06 18 496 2009-01-15 18:05:59 2005-08-11 09:47:26 6 5 89 10 161 406 0 168.80 45 42.31 CHANGED MPQLs.....uuGGD.DLGAsDEhlsFKDEG-.p-EKh...sENs.s.tD....Ls-lKSSLVsEoEsspssss................cth......................pRpsps.h....sa.-c.tc+h-p.s+ppp..shGhh.+s.sYs........uas.hhhh......sps..sNGohuP......................................uNKlsVlpss.th.s..LsPLhs...Yss-Hao.GsPP....shhPs-l.ssKs....Gls.RPspss-lsshYPLssuthGQls...asl.sW .......................................................................................stsuD.-LsusDEhh.FpDE..Gt.pp-......tt..........L.s-.KSSLls.oE...ts.........................................................tct..s............a...t...p.....tt..tt......s..uhh..+sssYs........uas..hhM.............t.shh...ssuu.SPs.............................................................................................................SNKVPVVQ.sHtV.HP..LTPL.IT........YS.sEHFo..PG.s.P..........sHlPsDl.ssKp....Gls.RsPpssDlSsaYPLSPGsVGQIs...aPLGW.............................................................................................................. 0 24 37 88 +8180 PF08348 PAS_6 YheO; YheO-like PAS domain Fenech M anon Pfam-B_2023 (release 18.0) Domain This family contains various hypothetical bacterial proteins that are similar to the E. coli protein YheO (Swiss:P64624). Their function is unknown, but are likely to be involved in signalling based on the presence of this PAS domain. 21.10 21.10 21.10 21.30 21.00 20.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.58 0.71 -4.54 36 1856 2012-10-04 01:10:46 2005-08-11 09:50:25 6 5 1451 0 282 973 28 116.10 38 51.45 CHANGED lLpsatsls-uluphhGspsEVVLHsL..cs.csolltIsNscloGRplG.sPhTph........uLctlpp.t..pppshtsYhspstcG.+hl+SsohhI+sspschlGhLCINh....Dlsshpth.pphLpt ........Lcsapsll-GLutll.Gsp.CElVLHsL..p....c..h..csShltIsNGchTGRplG.uPl.T-l.........AL.chL+c.....hp........cp..s......s...p...sYh....o+spsG.tlhKSsTlhI+s.c.ct.+.lI.GlLCINh..slss.h...pph...h.......................... 0 69 132 209 +8181 PF08349 DUF1722 Protein of unknown function (DUF1722) Wuster A anon Pfam-B_4169 (release 18.0) Family This domain of unknown function is found in bacteria and archaea and is homologous to the hypothetical protein ybgA from E. coli. 21.20 21.20 21.30 21.40 20.50 21.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.20 0.71 -4.22 94 1436 2009-01-15 18:05:59 2005-08-11 09:51:49 6 6 1379 0 231 801 53 115.00 35 49.27 CHANGED KhlLMAHs.ptY+pLGpllAshpphs.hcphhppYtptlMpALpphAoppspsNVL.HlhGYFKcpLssp-Kp-lhclIppYRpGhlPLlsPlTLL+Halpcas..ssYLtpQs.YL.pPaP ......................KhhlLAHSQstY+cl.GthlAsh.......ppht..l-shhppYppplhthLpcssohpspsNVL.Hl.GYF+ppls..spE+ppltpLIpp.YRpG.p.h.PlhsPL.shlKcahscYP..ssYLtpQpYhp.a......................................... 0 62 133 183 +8182 PF08350 DUF1724 Domain of unknown function (DUF1724) Wuster A anon Pfam-B_1158 (release 18.0) Family This domain of unknown function has so far only been found at the C-terminus of archaean proteins, including several transcriptional regulators of the ArsR family (see Pfam:PF01022). 20.50 20.50 21.90 20.50 20.40 19.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.04 0.72 -4.44 67 138 2009-01-15 18:05:59 2005-08-11 09:55:08 5 7 40 0 120 138 7 63.20 29 23.56 CHANGED Nsclalh.psslcl.uhs.VTDcahsLuLFscsGp.aDp.ptllSa-spAlpWGcELFpaY+ppucpl ..........Nhplalh.ppslcl.uhs.VTDchhhLuLapc....sGp.aDp..ptllSh.....-ppAlcWGp-LFpaYcppup....... 0 36 69 82 +8183 PF08351 DUF1726 Domain of unknown function (DUF1726) Wuster A, Eberhardt R anon Pfam-B_3131 (release 18.0) Family This domain of unknown function is often found at the N-terminus of proteins containing Pfam:PF05127. Its fold resembles that of Pfam:PF05127, but it does not appear to bind ATP [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.71 0.72 -4.30 42 1117 2009-01-15 18:05:59 2005-08-11 09:57:59 6 15 1047 2 384 869 19 91.60 36 11.76 CHANGED Yp-oc+l...LGpTashllLpshcslpPNhluRhl-sVcGGGllllLhsshpphcphhs..............................shcpph...hs..sat..clhtpFpcRFhtpLtpp.cshhlhD ..........pplLGppaphsVhDs.h...c....uhsssshAthsGTlcuGuhllLLls.s.h..pphcph.s..............................ssphRa...psp.sps........ss..s+Fsp+hh.psLsss.ppsh............................................................. 1 121 208 310 +8184 PF08352 oligo_HPY Oligopeptide/dipeptide transporter, C-terminal region TIGRFAMs, Fenech M anon Pfam-B_3025 (release 18.0) Family This family features a region found towards the C-terminus of oligopeptide ABC transporter ATP binding proteins, immediately following the ATP-binding domain (Pfam:PF00005). All characterised members appear able to be involved in the transport of oligopeptides or dipeptides. Some are important for sporulation or antibiotic resistance. Some dipeptide transporters also act on the heme precursor delta-aminolevulinic acid. 21.00 9.90 21.00 9.90 20.90 9.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.38 0.72 -3.62 59 28166 2009-01-15 18:05:59 2005-08-11 09:59:58 7 33 3987 0 6863 20640 4857 55.40 33 17.52 CHANGED VEhGsspplh..psPtHPYTptLlsuhPphsstpp...........htuphP..shtphsp..................G..ChFtsRCshspst.C ...............VEtGs..sccl..a....ps..P..p......HPYTpuLL..u..u....l..P..p...hsst..t.....................h....t................................................................................................................................ 0 1910 4134 5534 +8185 PF08353 DUF1727 Domain of unknown function (DUF1727) Wuster A anon Pfam-B_2131 (release 18.0) Family This domain of unknown function is found at the C-terminus of bacterial proteins which include UDP-N-acetylmuramyl tripeptide synthase and the related Mur ligase. 25.00 25.00 29.30 28.40 22.00 18.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.45 0.71 -4.46 66 1470 2009-01-15 18:05:59 2005-08-11 10:01:48 5 4 1459 0 228 929 38 108.60 40 24.67 CHANGED LlK..NPsGhspslshl........ssps......................................hslllhlN...sshADGpDsSWlWDsDFEpL.st.p.......lpplhsuG.RttDhAlRL+hA..Gls.............pphph..pp-hcpslpt..hppssscp..lYlL....sTYTAh .....LsKNPsGhspuL.sh.lt......s.ts..............................................hsl...ll...hLN...uNh.....AD...GhD....sS.WIWDsDFEpl..sp.p.......ltp....lhs.uGs.R.tpDlAlRL+lA......Gls....................t....lh...tp-.lppslpp..hp..tps.spc.....shlL....uTYTAh................. 0 76 155 202 +8186 PF08354 DUF1729 Domain of unknown function (DUF1729) Wuster A anon Pfam-B_3179 (release 18.0) Family This domain of unknown function is found in fatty acid synthase beta subunits together with the MaoC-like domain (Pfam:PF01575) and the Acyltransferase domain (Pfam:PF00698) [1]. The domain has been identified in fungi and bacteria. 25.00 25.00 40.10 39.20 20.80 19.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.77 0.72 -4.18 31 433 2009-01-15 18:05:59 2005-08-11 10:05:45 5 23 350 9 202 457 1 54.90 53 2.12 CHANGED IPsLD-cFEhaFKKDSLWQSEDl-AVlspDspRVCILpGPVAspaos..pssEPlt-IL ..VPVlDc-hcpWa+pDSLWQSED.pth...DsspVCIl.GPsAVtthT..plDEPVu-lL...... 0 48 111 168 +8187 PF08355 EF_assoc_1 EF hand associated Wuster A anon Pfam-B_4111 (release 18.0) Family This region typically appears on the C-terminus of EF hands in GTP-binding proteins such as Arht/Rhot (may be involved in mitochondrial homeostasis and apoptosis[1]). The EF hand associated region is found in yeast, vertebrates and plants. 19.10 19.10 19.40 19.60 18.20 18.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.49 0.72 -4.69 32 382 2009-01-15 18:05:59 2005-08-11 10:07:56 7 37 244 0 248 390 0 73.90 40 11.79 CHANGED sW..pssa.sosspNp.tGhlTLpGaLuQWohsTaLDhppTLEYLuYLGass.....................................ssssAlpVTRtR+hcp+ptps ...............W..ssshsso.sssN-.tGal.TLpGaLuQWo.LoTaLDsppoLE.YLuYLGash..................................................................ttpt.s..pssAl..p..V.TRs++hDhc+tp..................................................... 1 68 122 190 +8188 PF08356 EF_assoc_2 EF hand associated Wuster A anon Pfam-B_3018 (release 18.0) Family This region predominantly appears near EF-hands (Pfam:PF00036) in GTP-binding proteins. It is found in all three eukaryotic kingdoms. 21.50 21.50 21.70 22.00 21.40 21.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.97 0.72 -4.23 30 391 2009-01-15 18:05:59 2005-08-11 10:11:02 7 40 244 0 254 401 0 88.10 46 14.14 CHANGED spPLsspsLtslKpslpcp.hPsuspp...pGlTlpGFLhLNplahE+GRHETTWsILRpFtYsDsLsLp-caLhPc..........hclPsssSsELSstGY ........................................pPLtspsLpclKp.llp+p...hs-G.Vtc...........sG.lTL..cGFLaLps..LFIp+GRHETTWslLR+FGYsDsLpLss-aLa.Ph..........lclP...s...ssosELs.puY......................... 0 71 126 194 +8189 PF08357 SEFIR SEFIR domain Fenech M anon Pfam-B_33671 (release 17.0) Family This family comprises IL17 receptors (IL17Rs, e.g. Swiss:Q60943) and SEF proteins (e.g. Swiss:Q8QHJ9). The latter are feedback inhibitors of FGF signalling and are also thought to be receptors. Due to its similarity to the TIR domain (Pfam:PF01582), the SEFIR region is thought to be involved in homotypic interactions with other SEFIR/TIR-domain-containing proteins. Thus, SEFs and IL17Rs may be involved in TOLL/IL1R-like signalling pathways [1]. 21.40 21.40 21.50 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.82 0.71 -4.15 15 480 2012-10-02 18:56:14 2005-08-11 10:17:46 6 21 166 0 243 474 10 138.00 22 25.54 CHANGED hKValsYuu.DsshahchVtphAphLpshh..GhEVslDLW-chclsp.G.hpWhtpp....lppushVlllhS.sh.thhcppsscpcusspspup...........t-hFhsshstl...l.p..p..spshp+alsVYF.shspppclPshLph..sspapL.cphsplhscL .......................................+VhlsYSt...Dss.a...phVhphAphLpsh....GhcVh....lDl..a-p.p.........lt..t.u..hpWhtpp.................lpp....ss..hllllso.s.......h..hh.ptt.t.t.p..p..ps.s.s..t.tps................................................tshah..h.t..ht.h............ph..p......stsh.t.+als..shF..t...sppt......p..l..P..shhp........h.ha..p..h...pph.phh.t......................................................... 0 52 85 148 +8190 PF08358 Flexi_CP_N Carlavirus coat Wuster A anon Pfam-B_2014 (release 18.0) Family This domain is found together with the viral coat protein domain (Pfam:PF00286) in coat/capsid proteins of Carlaviruses infecting plants. 19.70 19.70 24.90 24.60 19.10 16.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.48 0.72 -4.19 25 358 2009-01-15 18:05:59 2005-08-11 10:18:53 5 3 50 0 0 371 0 51.70 47 17.60 CHANGED RLspLp-hLppppsusplsNsuFEh.GRPsLc.sssM+sDsoN.sYsRPSlDtL ..RLspLhEhhtpcppsssloNsuaEh.GRPsLpssssMRtsPsN.PYuR.SlDtL.... 1 0 0 0 +8191 PF08359 TetR_C_4 YsiA-like protein, C-terminal region Fenech M anon Pfam-B_20730 (release 17.0) Family The members of this family are thought to be TetR-type transcriptional regulators that bear particular similarity to YsiA (Swiss:P94548), a hypothetical protein expressed by B. subtilis. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.54 0.71 -4.22 8 631 2012-10-03 00:15:22 2005-08-11 10:46:25 6 4 485 2 267 630 83 130.10 20 62.86 CHANGED FcEKMGpFVE+IccchsstsospEKLtlLlcpHFptLuuDhcLAIVTQLELRQSNpELRhKINEVLKGYLsllDcIltEGhcpGEF+p-LDlRLARQMIFGTlDEsVTsWVMs-pKYDLsALucsVacLLlpG ..................................................................................thlpphcp.th..t.t.....p....s.shp+.Lth...llc....sHh...ph...l....t..p...s...p....s....l.....s.....h.l.h.....h...E..h.+.p.t..s..p..c..l.p...p.c.l..p.p.lh...+...p.....Y....hphlp...cllpcGhppGphc....s.-...ls....s.c.lspphlhGslp.th.lhphhh..s..p..t..p.hs..h.t.sp..hhthh...h........................................ 0 121 209 240 +8192 PF08360 TetR_C_5 QacR-like protein, C-terminal region Fenech M anon Pfam-B_96140 (release 17.0) Family This family features the C-terminal region of a number of proteins that bear similarity to the QacR protein (Swiss:P23217), a transcriptional regulator of the TetR family. QacR is able to bind various environmental agents, which include a number of cationic lipophilic compounds, and thus regulate the transcription of QacA (Swiss:P23215), a multidrug efflux pump [1]. The C-terminal region contains the multifaceted, expansive drug-binding pocket, which is composed of several separate, but linked, binding sites [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.51 0.71 -4.18 3 259 2012-10-03 00:15:22 2005-08-11 10:46:40 6 3 203 104 24 172 11 128.70 32 65.29 CHANGED cppcWpEKWcccEKpYoTuTEKLYulAEasL.p-YppPLpNAIpEFuo-.ssosSIl-cMhuLsscslcsYcsIl-EGIQSGEFpI-NVcDlShIluuhLuGLssFhHEh-hcELc+LaNKAIsIFLpGlSs ................................................pW.-pWpcc....ph.h....pTspEKLYths-hhl..psl.p.pPlpp.....A.hpEF.p.hp..hhocp.l.-clht.lpc.c....hsha...cpLl-cGIpSGEFp....ps.sscslshIlsu.h.l.s.G.ls..s.h.ha...p.....s.h.c..chcc.lhpch.slhLpGhs............................................ 0 11 19 20 +8193 PF08361 TetR_C_2 MAATS-type transcriptional repressor, C-terminal region Fenech M anon Pfam-B_3020 (release 18.0) Family This family is named after the various transcriptional regulatory proteins that it contains, including MtrR (Swiss:Q6RV06), AcrR (Swiss:P34000), ArpR (Swiss:Q9KJC4), TtgR (Swiss:Q9AIU0) and SmeT (Swiss:Q8KLP4). These are members of the TetR family of transcriptional repressors, that are involved in the control of expression of multidrug resistance proteins [1,2,3]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.60 0.71 -4.20 12 1463 2012-10-03 00:15:22 2005-08-11 10:46:56 6 2 937 24 179 672 28 119.10 37 55.81 CHANGED sPLpllRElLIalLpusVs-.+pRtlMEIlFHKCEFVGEMssl.phpcpLhhtsYsRIEpsLpcCIptt.LPssLch+RAAIhhRuhhoGlhENWLFsP-SFDLpp-AtsLVDshl-Mlph ..............................................................DPLphLREhLIhsLpthspp.RpptLhc.I.laHK.CEFs..sEMhsltptpcphshp..s.h.pplc....psLppChpt..thL.sssLsschAsIlh+uhlSGlhpNWL..hsP..p.....uaDLhKpA..shVshlLcMh...................................... 0 27 74 128 +8194 PF08362 TetR_C_3 YcdC-like protein, C-terminal region Fenech M anon Pfam-B_4012 (release 17.0) Family This family comprises proteins that belong to the TetR family of transcriptional regulators. They bear particular similarity to YcdC (Swiss:P75899), a putative HTH-containing protein. This family features the C-terminal region of these sequences, which does not include the helix-turn-helix. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.73 0.71 -4.47 16 1091 2012-10-03 00:15:22 2005-08-11 10:47:06 6 4 871 4 194 637 128 140.80 48 65.60 CHANGED LcslL-sWLpshpsFcsps-PhpsLsuYI+sKlEhSR-hPpuS+lFAsEIhpGAPpl.s.LtppL+thscc+sslIptWlcpG+l.AslDPhHLlFsIWAsTQHYADFshQlpsVoG+sthscsta-pAscslppllLcGhtPc .......................LcpIL-lWL.uPLc.s.F.c.t.-.hs.PltAlp.cYIRhKLElSRDaPpASRLFshEhltGAPhLhc..c..LsucL..Ks.L...l.-cK.u.A.lItuWlc....sG+.l.A.......P.lDPpHLIFhIWAsTQ.HYADFusQVcAVT.G...t.sh.t.s-shFsps.s-sVp+lIlcGlts............................................... 0 30 71 133 +8195 PF08363 GbpC Glucan-binding protein C Wuster A anon Pfam-B_3074 (release 18.0) Family This domain is found in the Streptococcus Glucan-binding protein C (GbpC) and also in surface protein antigen (Spa)-family proteins which show sequence similarity to GbpC [1]. 21.00 21.00 22.60 26.10 20.60 20.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.72 0.70 -5.16 15 406 2009-01-15 18:05:59 2005-08-11 10:50:30 5 32 178 6 30 396 0 263.90 27 26.07 CHANGED EpcKs-..DGaLo.cPpu.QuLVF-s.EPsAplolso...................thtpYsp+ph..phcshcl.pthpssstssushphhushssKtshop.hs.t........VlLc+GpslTsTYTNLpNSpYs....GKKISKlVYpYTlcsoop.ps...+lhlslasDPTlslFsuAaTGssc.+pssl.lcschpFYDEsGp.Ishss...ALlSluSLN+pps........uIEhsKsasGp..FlcIsGSSls.cpsGplYAscohsaKps..GS+ash........SsWDoss......uPsuWYGAGssph.oGsshphTlGupsss.s....................sIWFuhN ...........................................................................................................................................................................ctp..pu.ls.cs.u.psLsapp..EspA.h.s..hps.........................htph.tt.h..th..t...t.h.t.hpps.thht..s.p.th.h.sshsst.p.t................hh..lcpGpshosTYsNLps...upap.....G+K....IoKlh..apY.s.lpss.s.p...ps....ph..hhl.sDP.Thshhhu.......s.psps........p....th.hph.p.hp.Fa..DcsGp...lshsp.........ulhuhuSLNptts.......................phEhlp.s..h..s.sp......al.......IsGSoVsh.p.s.s.s...t......hYusps..phtps......Guphs............................ssWDs.ss.................................us.uaaGAushph.sssphsh....o.h.Gtpsts.s.....................shWFuhN.............................................................................. 0 4 11 20 +8196 PF08364 IF2_assoc Bacterial translation initiation factor IF-2 associated region Wuster A anon Pfam-B_3037 (release 18.0) Family Most of the sequences in this alignment come from bacterial translation initiation factors (IF-2, also Pfam:PF04760), but the domain is also found in the eukaryotic translation initiation factor 4 gamma in yeast and in a hypothetical Euglenozoa protein of unknown function. 20.90 20.90 21.20 20.90 19.40 20.70 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.78 0.72 -3.94 89 1458 2009-01-15 18:05:59 2005-08-11 10:57:39 6 17 1446 0 330 951 291 40.90 51 4.66 CHANGED stc+lTLp.........R+ppopl+pshupG.....+s....KsVsVEVR..KKRshl+ps ...........tPcKLTLp.........RKTpSTl.p.lsu..ouG.............KS...............K.oVQVEVR..KKRTaVKRs...... 0 70 174 253 +8197 PF08365 IGF2_C Insulin-like growth factor II E-peptide Wuster A anon Pfam-B_4175 (release 18.0) Family This domain is found at the C-terminal domain of the insulin-like growth factor II (IGF-2, also see Pfam:PF00049) in vertebrates and seems to represent the E-peptide [1,2]. 21.00 21.00 21.50 22.00 20.20 20.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.89 0.72 -4.21 8 168 2009-01-15 18:05:59 2005-08-11 11:00:23 6 3 122 0 22 173 0 53.80 62 28.71 CHANGED Ka.KYDlWQ.KSuQRLRRGlPAlLRARRaRhhAcclcAtcpAp.hHRPLhoLPopcP ......Ka.+..Y-..sWp.puAQRLRRGlPAlLRA++hRp.AcclcAhcpAh.hHRPLIuLPoc.P.............. 0 1 4 10 +8198 PF08366 LLGL LLGL2 Wuster A anon Pfam-B_4088 (release 18.0) Family This domain is found in lethal giant larvae homolog 2 (LLGL2) proteins and syntaxin-binding proteins like tomosyn [1]. It has been identified in eukaryotes and tends to be found together with WD repeats (Pfam:PF00400). 22.00 22.00 23.10 24.30 21.90 20.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.31 0.72 -4.14 12 358 2009-01-15 18:05:59 2005-08-11 11:10:57 8 12 89 0 195 366 1 104.40 45 9.98 CHANGED PhpppsPaG........s.PCKuIpKl.a+ss.+supsFlIFSGGMPpsshGc+.pCloVhp.upspssL-hsppllDFhslsps..sschp-PhAlsVLLEc-LVllDLpssGaP ............................hpshhPa.G..........P.PCKsI.Kl....a..+os..c.s..up..sFlIFSG................Ghsh...ss.hGcR..slTVh+..G+stssL...-hsppllDFhTl.......s..........-s.s..h...............s..................s.-...........a......p-.....P......aAlVVLLEc-LVVlDLppsGaP........................ 0 45 59 124 +8199 PF08367 M16C_assoc Peptidase M16C associated Wuster A anon Pfam-B_3062 (release 18.0) Family This domain appears in eukaryotes as well as bacteria and tends to be found near the C-terminus of the metalloprotease M16C (Pfam:PF05193). 25.00 25.00 25.90 25.40 24.90 23.90 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.32 0.70 -5.53 22 854 2009-01-15 18:05:59 2005-08-11 11:13:51 6 17 730 10 360 815 243 242.30 23 25.25 CHANGED hpPcpshppcpspcEppcLcphhppLoEp-pcpIhcpuhpLpphQpppp...sLs.sLPsLslsDlscpscphslpppp.sshplhh+shsTNsIsYhchhhsls.slPt-hhPalsLassslspl.GTtshsYpphtppIphpTGGluhusplhssspsspphp.thtlsupALscpsschFplhp-llspscFsctc....Rl+.LlpphtuphssulssSGHsaAhshusupho.sutlsEphuGls.lchlpcL .............................htPctshtpcppptp....ppcLpph....pts.L......o......c......c......-......hpp..l..hc.ps.tp.L....pph..Qp..p.c.............sls...pL...P..t........Lp..lpD.l....s..p..p..h.....p.........h.h.............h...p.................p......p.......h...............s.........s.............h....s...............l........hh.p.ph....T..NGlsYh..phhh........s........l........s...s........l........s....p....-.....h...............sa...lsLhsp.hlsp..............l.G.Ttph.s.a.tc.l.....pphlphpT.G.............G.lsh.....s...hp.....h.............h......s....s.........h....p......s.........h........s......p.........h...............p...s...t..........hh......l........su+s....Ls...pphschhcl.h.p-ll..........pp..sc...F.s...-pc.......Rl.+pllpptps..ph.pss.lhsuGHthAhtp.usuth.o.sut.h......p-.thsGls.hphlppl................................................................................................................... 0 144 229 310 +8200 PF08368 FAST_2 FAST kinase-like protein, subdomain 2 Vella Briffa B, Fenech M anon Pfam-B_2858 (release 10.0) Family This family represents a conserved region of eukaryotic Fas-activated serine/threonine (FAST) kinases (EC:2.7.1.-) that contains several conserved leucine residues. FAST kinase is rapidly activated during Fas-mediated apoptosis, when it phosphorylates TIA-1, a nuclear RNA-binding protein that has been implicated as an effector of apoptosis [1]. Note that many family members are hypothetical proteins. This subdomain is often found associated with the FAST kinase-like protein, subdomain 2. 26.90 26.90 26.90 26.90 26.10 26.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.05 0.72 -3.61 22 367 2009-01-15 18:05:59 2005-08-11 11:15:47 7 6 76 0 185 347 0 87.50 27 13.55 CHANGED phph+LhpLstslpLEsP-a......psPhL.s...p........hspcstphpsh.ppltcsLpplL.Guppth+hsVhTsasaslDhEshLDpc.tpsLP ......................................hhtpLhpLstslpLE....tPtY......pGPh.L.tt..p.......hhspps.tph...shpppltcsLppLL.Gupph....hptslhsPasa........s.l..DhElhlDpp.tpsLP........... 0 26 39 86 +8201 PF08369 PCP_red Proto-chlorophyllide reductase 57 kD subunit Wuster A anon Pfam-B_2047 (release 18.0) Family This domain is found in bacteria and plant chloroplast proteins. It often appears at the C-terminal of Nitrogenase component 1 type Oxidoreductases (Pfam:PF00148) and sometimes independently in bacterial proteins such as the Proto-chlorophyllide reductase 57 kD subunit of the Cyanobacterium Synechocystis. 27.30 27.30 27.90 27.60 26.30 27.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.22 0.72 -4.09 132 610 2009-01-15 18:05:59 2005-08-11 11:17:14 5 21 408 2 163 571 349 46.20 42 10.64 CHANGED Wss-ApttLc+.l....Ph.FVR....t+lR+ssEphApppGhsplTh-hlhp.A+ ........Wss-ActELpK.I....Ph.FVR....s+l++NTE+aARcpGhpplTlEshhsAK... 0 45 93 131 +8202 PF08370 PDR_assoc Plant PDR ABC transporter associated Wuster A anon Pfam-B_2126 (release 18.0) Family This domain is found on the C-terminus of ABC-2 type transporter domains (Pfam:PF01061). It seems to be associated with the plant pleiotropic drug resistance (PDR) protein family of ABC transporters. Like in yeast, plant PDR ABC transporters may also play a role in the transport of antifungal agents [1, also Pfam:PF06422]. The PDR family is characterised by a configuration in which the ABC domain is nearer the N-terminus of the protein than the transmembrane domain [1]. 22.70 22.70 23.30 23.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.01 0.72 -4.46 38 427 2009-01-15 18:05:59 2005-08-11 11:20:17 6 26 40 0 289 452 0 64.60 40 4.80 CHANGED Wsphhsss..s....pTLGtslLcuRGlas-shWYWIulGALlGFsllFNhlaoLALsaLpPhs+upullS .............Wpp..sss...s....polGhtlLcuRGlFs-s..hWYWIGlGALlGaslLFNllFslALsaLs..Phscspshl..................... 0 35 197 250 +8204 PF08372 PRT_C Plant phosphoribosyltransferase C-terminal Wuster A anon Pfam-B_3195 (release 18.0) Family This domain is found at the C-terminus of phosphoribosyltransferases and phosphoribosyltransferase-like proteins. It contains putative transmembrane regions. It often appears together with calcium-ion dependent C2 domains (Pfam:PF00168). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.87 0.71 -4.72 29 485 2012-10-01 21:10:52 2005-08-11 11:26:41 5 13 121 0 278 450 4 140.40 36 18.93 CHANGED LPTlFLYhFhIGlWp..YRhRPRtPP.HMDs+LSpA-u..spPDELDEEFDoFP.To+ssDlVRhRYDRLRoVAGRlQoVVGDlATQGERlQALLSWR..DPRATulFlhhCLlsAlllYssPh........+llsllsGh....YhlRHP+FRs.+hPSsPhNFFRRLPupoDshL ..........................................................................................hs.s.-s..h.p..DElD.E..E.hD.shs..ss.c......chl+h..RYctlpsVuu+.lQsllG-lAo.GERlpslhsWp..sPchoslhllh..h.hlusll..LYhsPh..........+hl.sllhGl...........ahhp+......+hRs.....t.h...P...s.....l..sF..hpRlPucsp................................................ 0 51 143 213 +8205 PF08373 RAP RAP domain Fenech M anon Pfam-B_5583 (release 17.0) Domain This domain is found in various eukaryotic species, where it is found in proteins that are important in various parasite-host cell interactions. It is thought to be an RNA-binding domain [1]. The domain is involved in plant defence in response to bacterial infection [2,3]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.82 0.72 -4.18 79 594 2012-10-11 20:44:44 2005-08-11 11:27:25 5 25 110 0 425 598 10 58.80 25 8.24 CHANGED lElsGspHa..............hpso.pphp......spsth+p+hLpthG.apl.lplsaa....................-Wpph...sppp+hp.Y..lpch ..............................h.s.ppF.......................................................................spss..pphh..............Gppsh+cRpL..........p.t.h.G..a...pl.lplsaa....................EWpph.....pspppphp.YL+p............................................ 0 160 211 306 +8206 PF08374 Protocadherin Protocadherin Wuster A anon Pfam-B_4100 (release 18.0) Family The structure of protocadherins is similar to that of classic cadherins (Pfam:PF00028), but particularly on the cytoplasmic domains they also have some unique features. They are expressed in a variety of organisms and are found in high concentrations in the brain where they seem to be localised mainly at cell-cell contact sites. Their expression seems to be developmentally regulated [1]. 25.20 25.20 25.50 25.20 25.00 25.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.57 0.70 -4.71 7 319 2009-01-15 18:05:59 2005-08-11 11:47:02 6 13 55 0 124 322 0 198.20 47 21.31 CHANGED oluNuollps.lt+SlcTPLspsIussspst.u..pphlsIhlullAGshsVlLlIhlsshlR.CRpspp+puhQtGKp..sp-ahoP..........ptps+p....pKt..KKcKK...........pKSsKs.h.shVTlcts+s--tht-phshplsL..-hpppohu+a.....hPssapPs............SPDLARHYKSuSP.PuhQLpPpoPsA.sKKHpllQ-LP.sNTFVGus..................ucssSouSDpaSs.pCposs.pa ..........................oluNuohlpsLl.t+Shc.TPLs.sIu.sss.p.t.s..pphlsIhlulVAGshsVlllIhlssll.R.CR.....psp.php.....uhQtuKp..sp-ahoP..........p.csKp.........pKt...KKc..K.K...........pK...SPKs.h.shVTlEtsKs--tst-t.hshplsL....-h-ppohu+a.....hPssatPs............SPDLARHYK.SuSP.PuhQLpPpoPsu.sKKHpllQ-LP.sNTFVGu...................ucssSouSDpaSs.pspops.ta..................... 0 5 16 57 +8207 PF08375 Rpn3_C Proteasome regulatory subunit C-terminal Wuster A anon Pfam-B_4098 (release 18.0) Family This eukaryotic domain is found at the C-terminus of 26S proteasome regulatory subunits such as the non-ATPase Rpn3 subunit which is essential for proteasomal function [1]. It occurs together with the PCI/PINT domain (Pfam:PF01399). 21.20 21.20 22.90 21.80 19.30 18.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.39 0.72 -3.57 24 366 2009-09-11 08:23:08 2005-08-11 11:49:06 6 6 295 0 240 357 3 65.90 51 13.11 CHANGED DlYSTp-PppsFcpRIpFCLpLHN-uV+AMRYPssppcpp.cpt.pppp-c-p....-ltccls-s.-hD..D ......DlYuTpEPQhAFHpRIpFCLslHN-uVKA......MRaPsssapp...-..lcsA...c-pREREp..-..ElAKEhsEt..D.D................................. 0 84 136 202 +8208 PF08376 NIT Nitrate and nitrite sensing Wuster A anon Pfam-B_37103 (release 17.0) Family The nitrate- and nitrite sensing domain (NIT) is found in receptor components of signal transducing pathways in bacteria which control gene expression, cellular motility and enzyme activity in response to nitrate and nitrite concentrations. The NIT domain is predicted to be all alpha-helical in structure [1]. 20.40 20.40 20.50 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.53 0.70 -4.85 118 931 2009-01-15 18:05:59 2005-08-11 12:01:15 5 47 468 2 424 987 49 239.30 17 31.40 CHANGED husLlHpLQcER...........GhSusal....uSp......G.t...th....sspLtspRptoDpthsphpsthp........ph.ts.stt...........htpplsphhptLsp.LsslRppl.....Dshs.lsssps.hshYo.pllstlLsllsplsptsscsplspthsAhhshhpuKEtAGpERAllussaus.....sph.ssshhpchhshlupQpshh.cpFtsh..ussptpphapphhss...sshpp...lpphRphshs...........pss........tts.hs.hsupp.WFsts.Tp+I.....shl+pl.Esplss..plhpts ........................................................................h.spllptLQpER.............................sho.ssaL.............uus.......s...p..th........tsplppp+.tpoDpshsphppthp...........ph.tt.st............htptlsphhp.t.lsp.LsslRpp..l........................ss..tp...hs....s......sp.s....h.sh.Yo.pll..pt...l.......l.sh.h.t.......p.hs.....p....t...s......s...s.spl...sp...th...t.....u...h...h.......s.....l.......h.....p.....u+EhsutpRu.lhsss.hst..............................sp.h...sss...t....hpp......h.h....shhspp......p.....t....h.......h..pp.htsh..............us..s..p.t.....pphhpp.hhss.......sshpp......hpp...h.pp...th..hs...................................pss............tts..ht.....hs.s....pp...Whsh....totpl....sh....hppl.cpplhtph....h.................................................................................................................... 1 142 295 380 +8209 PF08377 MAP2_projctn MAP2/Tau projection domain Wuster A anon Pfam-B_26981 (release 17.0) Family This domain is found in the MAP2/Tau family of proteins which includes MAP2, MAP4, Tau, and their homologs. All isoforms contain a conserved C-terminal domain containing tubulin-binding repeats (Pfam:PF00418), and a N-terminal projection domain of varying size. This domain has a net negative charge and exerts a long-range repulsive force. This provides a mechanism that can regulate microtubule spacing which might facilitate efficient organelle transport [1,2]. 18.20 18.20 18.30 18.30 15.50 18.10 hmmbuild -o /dev/null HMM SEED 1134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.07 0.70 -14.11 0.70 -7.01 5 79 2009-01-15 18:05:59 2005-08-11 12:08:39 5 7 38 0 30 63 0 731.30 53 59.19 CHANGED DKVADVPVSEATTVLGDVHSPAVEGFVGENISGEEKGTTDQE...KKETSTPSVQEPTLTETEPQTKLEETSKVSIEETVAKEEESLKLKDDKAGVIQTSTEHSFSKEDQKGQEQTIEALKQDSFPISLEQAVTDAAMATKTLEKVTSEPEAVSEKREIQGLFEEDIADKSKLEGAGSATVAEVEMPFYEDKSGMSKYFETSALKEDVTRSTGLGSDYYELSDSRGNAQESLDTVSPKNQQDEKEL.LAKASQPSPPAHEAGYSTLAQSYTSDHPSELPEEPSSPQERMFTIDPKVYGEKRDLHSKNKDDLTLSRSLGLGGRSAIEQRSMSINLPMSCLDSIALGFNFGRGHDLSPLASDILTNTSGSMDEGDDYLPPTTPAVEKIPCFPIESKEEEDKTEQAKVTGGQTTQVETSSESPFPAKEYYKNGTVMAPDLPEMLDLAGTRSRLASVSADAEVARRKSVPSEAVVAESSTGLPPVADDSQP.VKPDSQLEDMGYCVFNKYTVPLPSPVQDSENLSGESGSFYEGTDDKVRRDLATDLSLIEVKLAAAGRVKDEFTAEKEASPPSSADKSGLSREFDQDRKANDKLDTVLEKSEEHVDSKEHAKESEEVGDKVELFGLGVTYEQTSAKELITTKETAPERAEKGLSSVPEVAEVETTTKADQGLDVAAKKDDQSPLDIKVSDFGQMASGMSVDAGKTIELKFEVDQQLTLSSEAPQETDSFMGIESSHVKDGAKVSETEVKEKVAKPDLVHQEAVDKEESYESSGEHESLTMESLKPDEGKKETSPETSLIQDEVALKLSVEIPCPPPVSEADSSIDEKAEVQMEFIQLPKEESTETPDIPAIPSDVTQPQPEAlVSEPAEVRGEEEEIEAEGEYDKLLFRSDTLQITDLLVPGSREEFVETCPGEHKGVVESVVTIEDDFITVVQTTTDEGELGSHSVRFAAPVQPEEERRPYPHD.EELEVLMAAEAQAEPKDGSPDAPATPEKEEVPFSEYKTETYDDYKDETTIDDSIMDADSLWVDTQDDDRSILTEQLETIPKEERAEKEARRPSLEKHRKEKPFKTGRGRISTPERK.VAKKEPSTVSRDEVRRKKAVYKKAELAKKSEVQAHSPSRKLILKPAIKYTRPTHLSCVKRK ..........................................................................................................................................................................................................................................................................................................................t....t.......t...t......p.......p.p.h...s.tchph.h..t...D+SGMStYFETSsLK--h..sc..s..t...uSDYYELSss+t...-..sh...t.....p.tp.pt.............sh.EhuYSTLspsh..............st-ph.TlsPplht-Kp-..hhsKNKDD..LpLSRSLGLGGRSAIEQRSMSINLPhSCLDSluLGhshGRu+sLSPLAoDILopTSGShDEusD.YLPsTTPul-KhPsFP...h-stt.......t..............+.....s.t..p......p...sESP..ApphYKNGsVhuPDLPEMLDLsGoRSRLuS.ss-sEhsp+KSs.u-....h.-sts.t.hs.hssps...h.Ks-uQhE-hGYCVFscYosPhPSPsps......t...........................................................................................................................................................................................................................................................................E.s..+-..+..E.EhKEKs.sKPDLVHQEA...hDtE-sYp.oGtt....c..................t.....t...c.......c...p...p..........p...........p....p...t........pc....................h...h.t.......t.................t.....t........p.......t.p....c..h.t-...........s...-...................E.+uslESVVTlEDDFITVVQT.hD-uE.uuHSVRFus..p.-..ph..........t..cE.p.....-sp.Es......p-.sP...-sPAsPp+E.Els.SEhhTEoYDDYKDETTIDDSIMDsDSLWVDTQDDDRSIhTEQLETlPKEE+.A.......-K-.......sR....RsSL-KH.RKEKPh.Ko...G...R..GRI..STPERK.lAKKEPST.V.SRDEVRRKK..AVhKKAELuKKoElQuHSPSRKhILKPAlKaTRPT.HhSCVKRK................................................................ 1 2 5 15 +8210 PF08378 NERD Nuclease-related domain Wuster A anon Pfam-B_9750 (release 17.0) Family The nuclease-related domain (NERD) is found in a range of bacterial as well as archaeal and plant proteins. It has distant similarity to endonucleases (hence its name) and its predicted secondary structure is helix - sheet - sheet - sheet - sheet - weak sheet/long loop - helix - sheet - sheet. The majority of NERD-containing proteins are single-domain, but in several cases proteins containing NERD have additional domains which in 75% of cases are involved in DNA processing [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -11.32 0.71 -3.82 377 1767 2012-10-11 20:44:44 2005-08-11 14:40:20 6 64 1278 0 464 1715 271 125.70 17 30.27 CHANGED pGttuEpts..t....phL.....pt.h.pp....h.h.h......ts.h.h.......................tt..pspIDhlllss.tt..lhllEsK....s.............hp.G......h....t..........h.....................................thts...Plpp.s....ppphptl....pph...l........................................h......hpslllhs ................................................uE..h.h.phL............pt.l.ppt...hhl.h.......ps.lhl...............ttspttpIDalllst..pG..lhllEsK......s.........................hp..G....p..lh....ss........tp..s....h.................................................h...t............................h.phts.......PlpQ..s..........tpphphl.....tph....l...........................................................h................................................................................................ 0 152 325 415 +8211 PF08379 Bact_transglu_N Bacterial transglutaminase-like N-terminal region Fenech M anon Pfam-B_2190 (release 18.0) Family This region is found towards the N-terminus of various archaeal and bacterial hypothetical proteins. Some of these are annotated as being transglutaminase-like proteins, and in fact contain a transglutaminase-like superfamily domain (Pfam:PF01841). 21.50 21.50 21.60 21.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.48 0.72 -3.67 188 1621 2009-01-15 18:05:59 2005-08-12 17:21:43 5 8 789 0 617 1553 423 80.90 28 16.48 CHANGED phplpHpTpYpYspPVshuhpplRLpP..pssst.........Qp.llsaplplsPtstphpp.hpDhaGNtlphh.sh.ppspp.c.lp...lpspupV-h ..............................hplpHpTpYpYsps.Vphu.phlRLpP.tspsp.........Qp.llsaplplp.......Pts....t...........hhst.tpDsaGNhlsthsh.spspp..p.Lp...lpspuhV-............... 0 164 370 493 +8213 PF08381 BRX DZC; Transcription factor regulating root and shoot growth via Pin3 Fenech M, Coggill P anon Pfam-B_2116 (release 18.0) Domain The BREVIS RADIX (BRX) domain was characterised as being a transcription factor in plants regulating the extent of cell proliferation and elongation in the growth zone of the root [1,2]. BRX is rate limiting for auxin-responsive gene-expression by mediating cross-talk with the brassino-steroid pathway. BRX has a ubiquitous, although quantitatively variable role in modulating the growth rate in both the root and the shoot [3]. The family features a short region of alpha-helix, approximately 60 residues in length, which is found repeated up to three times [1]. BRX is expressed in the vasculature and is rate-limiting for transcriptional auxin action [4]. 24.50 24.50 24.70 26.80 24.10 24.00 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.98 0.72 -4.91 34 367 2009-01-15 18:05:59 2005-08-15 09:27:43 6 31 36 0 237 374 0 56.60 49 10.58 CHANGED ssptEWl-QhEPGVYlThsshssG.sptLKRVRFSRcpFs.cppAcpWWpENps+lhcpYs .....t..ppEWlEQsEPGVYITls..sLs.s......G.sp-LKRVRFS.....RcpFs.chpAcpWWtENps+lhcpYs...... 0 32 143 192 +8215 PF08383 Maf_N Maf N-terminal region Fenech M anon Pfam-B_3103 (release 18.0) Family This region is found in various leucine zipper transcription factors of the Maf family. These are implicated in the regulation of insulin gene expression [1], in erythroid differentiation [2], and in differentiation of the neuroretina [3]. 20.20 20.20 20.30 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.60 0.72 -4.40 9 171 2009-09-11 10:53:55 2005-08-15 09:39:28 6 2 45 0 78 126 0 34.90 59 11.59 CHANGED spLE-LYWM..us..h.QQhs.PEuLsLTPEDAVEALIGs ...spLEDLYWM..uu...h.QQlN.PEALsLoPEDAVEALIut....... 0 5 16 35 +8216 PF08384 NPP Pro-opiomelanocortin, N-terminal region Fenech M anon Pfam-B_1053 (release 18.0) Family This family features the N-terminal peptide of pro-opiomelanocortin (NPP). It is thought to represent an important pituitary peptide, given its high yield from pituitary glands, and exhibits a potent in vitro aldosterone-stimulating activity [1]. 20.50 20.50 20.50 20.60 20.40 20.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.54 0.72 -4.33 20 577 2009-01-15 18:05:59 2005-08-15 09:51:58 5 4 358 0 22 546 0 23.50 84 11.75 CHANGED QCWE.so+C+DLsoEsslLECIcsC+sDLoAEoPlaPGNGHhQPho ................................SAESPVFPGNGHMQPLS.......... 0 1 3 8 +8217 PF08385 DHC_N1 Dynein heavy chain, N-terminal region 1 Fenech M anon Pfam-B_3094 (release 18.0) Family Dynein heavy chains interact with other heavy chains to form dimers, and with intermediate chain-light chain complexes to form a basal cargo binding unit [1]. The region featured in this family includes the sequences implicated in mediating these interactions [2]. It is thought to be flexible and not to adopt a rigid conformation [1]. 22.20 22.20 22.40 22.20 21.60 22.10 hmmbuild -o /dev/null HMM SEED 579 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.78 0.70 -6.14 40 1478 2009-01-15 18:05:59 2005-08-15 10:01:37 7 120 275 0 1051 1462 56 440.20 18 14.51 CHANGED DsshlppLpssVspWh+pIpplhphsps.st......sostsElpFWpshppsLppItpQLpu.cVphslclLcpu..Kp.phhssFps.t.slccuhscspchsphlcsh..hs-hL.tss....s..hsplppslsslhpplphhaphop..hs.pRhhsLlptIssplhpphpphL....ssp....p.lhp.h....-hs-hpphlps.shplhptaccpacchpshlcp.....tpccs.....ppaphp..tlhs+.hsphppRlppltphppsppph.pplhpt..................s.pst.hspslp...phtc.....-hppsacshpsls..sLDhssp...papsshppapp+hpclEpplsshlpct.h.sss.csspphF+llspFpsLh.hRPp.......Ipsslp.-htspLlcphcp-lcplpphFppph....p.tttphshh+shPs....luGsIhWs+pLpc+Lpph.hcplc...slh..s.....h.pps-.GpclppchsphpppLsp..pphappWhp...plppps..hslstsll..........pstsphcLh.VNF-spllpLh+El+hLthl..................s.........h..plPtshtplhpptcplhshtssLpchlcsasplhp.pl..pp.....hpt......Lltsphpclpphl.ptGls.lsWpsht..lptahp ........................................................................................................................hh..hpt.h..W.ptlpt...hht..p.t...........................s.s.tE.hpaW...th..............tth..l.pph.pt.t.h.hh...ht.hLp................pp................hh..........h..t........t.l.tth.pspp....hhtsh......p.l...t.....................p......p..h..p..h..l..lhphlph.hht.......sp........s....pRh....h...Lhpt......l...s...p.hhp.....hh...thl............s.t......p.lhp..............................ht.....h..phht...shphhp.app..h......th.pt..h..........t.t...............................t.a..ph.......hl..p.hp.h.pRht.p.l.....p..hhp....h.ph...th.....................................................................................................t....th........th.t..................php.t.h.h..p..h.p..p.h....p..............hc.ptt.............ta.p.t....shtt.....app.ph......tt....l-tplts......hhp.........pt.h.tph...t.sh..pp...................hh...ph...h...t.h..a..tt...lh........c.h.................lttt.....h..th...l..ht.h.tp-lpt.h.pp..........app...ph.......................tt......h...ps.........hP..........huut.lhWsppl.p+........l.pt..hphhp...........th.................................h.p....p...s........pth.......hpp....hpp.......h.......ht.lt.p.............t..haptWhp..........php....p.........................t...h....p....t..lh..........................................t.tthtl.............lNas.plhtlh.cEsc.h..h...............................................................t.........h..plP..h.phh.t..ptp..p..hh....h...tL..........hh.p.ap..phhp..th...............................Lh....t..htthp.th.l.....t.............u...h.........ltWps...........t........................................................... 0 416 544 845 +8218 PF08386 Abhydrolase_4 TAP-like protein Fenech M anon Pfam-B_3096 (release 18.0) Family This is a family of putative bacterial peptidases and hydrolases that bear similarity to a tripeptidyl aminopeptidase isolated from Streptomyces lividans (Swiss:Q54410). A member of this family (Swiss:Q6E3K7) is thought to be involved in the C-terminal processing of propionicin F, a bacteriocidin characterised from Propionibacterium freudenreichii [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.41 0.72 -4.13 38 1197 2012-10-03 11:45:05 2005-08-15 11:43:12 5 9 575 0 480 3862 878 102.40 26 19.93 CHANGED sPh..aGshhstshh..CssW............Ps..ssstshsphs.s...s...s.sslLllssptDPsTPapsApchsctLss..u....tllohpGt.GHsshh.ssspClsptlssYLhsGphPt.....tsssC ............................................................huthhu.s.h...Ct..h..W..............................Ps...tss..t...h.t...ht..s.....ts............s..sPlLllus.p.tDPsT...Ph.p......s.A.p.p.h.upp..h.ss....u........hl.l...o..hcGs...G...Hsu...h.........h...ts...........s......t...C...s.....s.ps......l.ssY.L.h.s.G.phPt......tshhC.................................... 0 153 319 425 +8219 PF08387 FBD FBD Fenech M anon Pfam-B_1153 (release 18.0) Family This region is found in F-box (Pfam:PF00646) and other domain containing plant proteins; it is repeated in two family members. Its precise function is unknown, but it is thought to be associated with nuclear processes [1]. In fact, several family members are annotated as being similar to transcription factors. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.48 0.72 -4.50 129 806 2009-01-15 18:05:59 2005-08-15 11:49:10 5 32 19 0 451 833 0 49.30 25 12.21 CHANGED ta...p..sslPcCLhopLchlph.p...........tapuppp.c.hcls.pYlLcNuphL.Kchslph ................................................s.cCl.spL.cplph.p............sapGpps..-..lphs..pallpNupsL.cphsl............. 0 116 212 319 +8220 PF08388 GIIM Group II intron, maturase-specific domain Fenech M anon Pfam-B_4063 (release 18.0) Family This region is found mainly in various bacterial and archaeal species, but a few members of this family are expressed by fungal and chlamydomonal species. It has been implicated in the binding of intron RNA during reverse transcription and splicing [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.88 0.72 -4.19 164 2393 2012-10-02 14:46:49 2005-08-15 11:52:43 6 14 1025 0 536 2190 222 80.40 24 19.71 CHANGED l+phpp+l.+p.lh.+ptps.......hshpplIppLNPllRGWssYaphs.sspph..applDphlhp+LhpWth++ap..p.......ht...........pphhhp+.....ha ...............................pphpp+l...+p...lh....+.ptt.s...........hshpphlc....p.LN.hl+GWhsYaph..s...ss....pph...hpplD.phlhp+Ltthhh+ca+...p.......ht...........hh.........hh.......................................... 0 205 383 464 +8221 PF08389 Xpo1 Exportin 1-like protein Fenech M anon Pfam-B_4058 (release 18.0) Family The sequences featured in this family are similar to a region close to the N-terminus of yeast exportin 1 (Xpo1, Crm1, Swiss:P14068). This region is found just C-terminal to an importin-beta N-terminal domain (Pfam:PF03810) in many members of this family. Exportin 1 is a nuclear export receptor that interacts with leucine-rich nuclear export signal (NES) sequences, and Ran-GTP, and is involved in translocation of proteins out of the nucleus [1,2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.74 0.71 -4.29 59 1456 2012-10-11 20:01:01 2005-08-15 12:46:41 7 30 320 21 1051 1452 11 147.50 19 14.93 CHANGED pphlhsc..Lshslsplhhp.a....sppWps..hls-ll...shhps.s...............shsthhlplLphLsEElh-....htp..sphspp+.ppcl+ctlpsp......hpplhphhhplLpp.ts..................splhptsLcslssalp.....Wlslshlh.....s.s...llshl.hphL......s.sph..........ppsAhcCL .........................................................t..hlhs+Lshsls.p.lhhppa......spp.Ws.s..hls-ll.........ph..hps..s................................................ststhhltlLphLsEElhs...................htp....tphspp+..pppl+.....p....t.....lppp.........hppl..h....phhh.....p.llppttp................................................splhptsLc.sltpalp........alslshl..h.................p..st........ll.p.hl.hp.hl..........p....h...........ht.shcs...................................................................................... 0 370 587 863 +8222 PF08390 TRAM1 TRAM1-like protein Fenech M anon Pfam-B_3108 (release 18.0) Family This family comprises sequences that are similar to human TRAM1 (Swiss:Q15629). This is a transmembrane protein of the endoplasmic reticulum, thought to be involved in the membrane transfer of secretory proteins [1]. The region featured in this family is found N-terminal to the longevity-assurance protein region (Pfam:PF03798). 25.00 25.00 25.00 25.00 22.70 24.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.04 0.72 -4.47 61 512 2009-01-15 18:05:59 2005-08-15 12:53:12 6 5 230 0 347 489 0 65.70 30 16.18 CHANGED hsphFltlsYph................sssshYspGhcDhsaVhFYhlhhohlRthlhcallcPlu.+thslppp+p ...................................................hthFlhlpYpl...............................................sssshYst.GhcDlshVhFYhllhshlRshlh-alLcPlu.+phtlppp+.............. 0 80 162 266 +8223 PF08391 Ly49 Ly49-like protein, N-terminal region Fenech M anon Pfam-B_1187 (release 18.0) Family The sequences making up this family are annotated as, or are similar to, Ly49 receptors (e.g. Swiss:P20937). These are type II transmembrane receptors expressed by mouse natural killer (NK) cells. They are classified as being activating (e.g.Ly49D and H) or inhibitory (e.g. Ly49A and G), depending on their effect on NK cell function [1]. They are members of the C-type lectin receptor superfamily [2], and in fact in many family members this region is found immediately N-terminal to a lectin C-type domain (Pfam:PF00059). 30.00 30.00 30.50 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.67 0.71 -3.97 27 249 2009-01-15 18:05:59 2005-08-15 12:57:01 5 4 35 18 46 271 0 113.20 44 46.66 CHANGED SVPWpLIVlsLGILChLLLVoVAVLsTpIFQYtQEKHph.pEsL.NhppphssMQsD.pLKEEhLpNKolECs.hps.......hL-sLpREQpRhapKTKohhsS.QcT.............G+tsEh+WFCaGlKCYYF ..........SVPWpLIVlsLGILChL.LLVhVsVLssp.IFQhtQ.cK+p.h..pEhL.Npppphs.....h.MQsDhpLK....EE.h..LpNKSlEsp.hpp.......hL-Sl.p+.-..QpRh.....apc..s+slh....c...o.pcT..............G+hs...c..aW.CaGhpCYYF......................................................................... 0 1 2 4 +8224 PF08392 FAE1_CUT1_RppA FAE1/Type III polyketide synthase-like protein Fenech M anon Pfam-B_1177 (release 18.0) Family The members of this family are described as 3-ketoacyl-CoA synthases, type III polyketide synthases, fatty acid elongases and fatty acid condensing enzymes, and are found in both prokaryotic and eukaryotic (mainly plant) species. The region featured in this family contains the active site residues, as well as motifs involved in substrate binding [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.77 0.70 -5.34 37 707 2012-10-02 12:25:54 2005-08-15 13:22:38 7 11 136 0 353 4319 396 250.90 42 57.47 CHANGED ahhpRPR.s...VYLlDauCY+Psccp+lohpthhchsctssph.spcs.........lcF.p+llpRSGLG--TYlPculhphPsp..s..ohtpARpEuEtVhFuAlDcLFt+T......s.lpPc.-IGlLVVNCSlFsPTPSLSuMlVN+YKhRuslpSaNLuGMGCSAGlIulDLA+sLLp.sp..NoaALVVSTEslo.NWYhGscRSMLlsNCLFRhGGAAlLLSN..+st-+c.RuKYcLhHsVRTHpGADDcuapClhQcED-pGphGVuLS..K-LhsVAGcALKsNIsTLGPLVLPhSEpLhFhss.llt+Kl ..................................................................................................................hpRs+..s....lYLlDauCacPs.sp.h.....+........sshtthhch..p.h....t.......ph.....stps.....................lcF.p...+...ll...p...+SG.....l.......G.-.....c.........T...a.....h........P.......u...l.h...t.....h......P.sp................sh.t..t...u.........R........p........E..s..-....Vh....a........u.u.l...-.p....L.h.t+.T........s...lp.......P........c..-ls.l.L.l.V.N......s..S........h...F..s.P...T...P....S.Lo..uM.l...lN...+.Y.+h.R.s.s....l.t..o.a.NLuGM....GCSAGlI..ulD.LA+sl.L....p....sp.......s..s......hAlV....VSTEs..l.....o.............s...a.....Y.....h.....G....s.....p.....R.....u.......M.....L.....l.sNC.LFRh....G.G.A..Al.LL.S..N..........+........t.........t......t....t...+.............u.K..Y.....c.Lh...+..sV.RTHp..G..As..DcuapClhQcE.....D....p....p......G...p.hGlsL..S..K.-Lht.lAGc.AL+sNI.ss.LG..PlVLPhSEplhFhhs.hlh++............................................................................................................. 0 72 218 296 +8225 PF08393 DHC_N2 Dynein heavy chain, N-terminal region 2 Fenech M anon Pfam-B_3094 (release 18.0) Family Dyneins are described as motor proteins of eukaryotic cells, as they can convert energy derived from the hydrolysis of ATP to force and movement along cytoskeletal polymers, such as microtubules. This region is found C-terminal to the dynein heavy chain N-terminal region 1 (Pfam:PF08385) in many members of this family. No functions seem to have been attributed specifically to this region. 23.50 23.50 24.00 23.50 23.00 23.20 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.40 0.70 -5.96 94 2363 2009-01-15 18:05:59 2005-08-15 13:39:22 8 259 303 14 1713 2264 96 354.20 26 10.66 CHANGED ppLpplpp-lpshpplWphhpphpp.phppatp.tsa.ppl...sspplcpplp...p.hhcphpplscph...pp...........hplhpplpp....plcpapphlPllps.L.+stuh........+...p.....R...HW.pplhp.hls.......sh.......t...ps..hoLpplls.hsl.h..patcplpcIsspAppEhslEptLpclcspWps......hpFphhsacs....p.....shllps..h--lhphL--phsplpohpsS........ahp....Fcpc.sppWcpcLshlpcll-hWhplQ+pWhYLEsIFsu..p..DIpppLPpEspcFppl-ppap.plhpps.ppsspllcss.t...sp.hhppLpphtcpL-plQKuLscYLEpKRptFPRFYFlSs--LL-IL..upup-Pp.tlQpHlpK.hF-uIsplphpppspp.......lhuhhSsE.G.Eplthtp.slphps......pVE..p...WLppl-ppM+poL+phlppulpphp ..............................................................................................................................................h.th.pphp.hpplaph.h..ph.p.th.pt.....hhp.t.a..p..l..........p.p..th...p...tpht........p..h..........p.....p.....h...t...p...h......c.th...pp.......................................hth.h.t...lpp..........pl....cph...h..p...hs..lltt..lps.uh...............+..p....................R..HW.ppl.p.h.ht...........hph.....p..................................ps...h...s.L...tplhp....hsl...h.........ph..tpp....l.pplstt.A.tp...Eh.s.lE....p.t..L.p.p..........htttWps..............................hph.ph....app......................p...........thhl..l.ps.....h-..-.l..hthl--p..htlps.hhsS........a.ht......F.ppp.lppWpp...pLsh......hppl.l....c.WhplQppW.......hYL...EsIF.u..........t...DI..t....pQ....LPp.E.....up.+F.pp..l-...pp.......ap..plM..........p...............ps........ps......s......p.....l.....l............ps............s.........t.........h................................s..hh...............p..........pLp....p.....htp........L...phhpK..u.LspYLE......pK...Rhh.....FP....R....Fa..........F.lSs...s-L..L-IL................u.p..s.p........s.st.....tlQ....Hl.pK.hF-s..ltplphpp.s.p................................hhuhh...S....t...E..G....Eh....l....h.....tp.............l.hps................pV.E..t........WLtpl.........tptMptol+phhtpuh.t..t..................................................................................................................... 0 747 940 1412 +8226 PF08394 Arc_trans_TRASH Archaeal TRASH domain Fenech M, [1] anon Pfam-B_18882 (release 17.0) Family This region is found in the C-terminus of a number of archaeal transcriptional regulators. It is thought to function as a metal-sensing regulatory module [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.04 0.72 -3.79 8 65 2012-10-03 05:12:49 2005-08-15 13:58:30 5 6 57 0 34 119 16 36.70 39 22.76 CHANGED CDYCGpEIpG-PIshKh+N+lYYsCCsTCpcchKK+h ...........CDaCGppI..t..s..c..P..l..s.hK..h..ss.+s.YahCCsoChpph+cp...... 0 8 15 27 +8227 PF08395 7tm_7 7tm Chemosensory receptor Robertson H, Finn R, Fenech M anon Robertson H Family This family includes a number of gustatory and odorant receptors mainly from insect species such as A. gambiae and D. melanogaster. They are classified as G-protein-coupled receptors (GPCRs), or seven-transmembrane receptors. They show high sequence divergence, consistent with an ancient origin for the family [1,2]. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.36 0.70 -5.44 78 1580 2012-10-01 21:54:26 2005-08-15 14:17:53 7 10 51 0 884 2288 0 315.20 12 89.70 CHANGED lchhhhhspl.hG..lhsh........phstpptphph.hphhhhhhhhhhhhhhhhhhhhh...h.hhhthhhhptsthhphhhhlhhhlthlshhhhhltthhppppltplhppl....hplppphtptspttt..........phhshhhhhhhhhhhhhhhhshh.............hhhtthhhhshhhlhhhhhhh....hh.hhhhhahhhlhhlh..phhphlpppLpphhppht.......................hpppshtp...............l..pplhphappl.hplspplsphas....h.ll..hhhhhhh.lthshhha..hhhtphh.............p.hhhhhhhl......ahhh.hhhhh......hhhhssstspp....pspcssthl..tph..........htttspphppp..lcpFhlphh.ppph....phsssGhFslspshlhshhuulhoYlllLlQFchtt ..................................................................................................................................................................................h.........h.s......................................h........h.h....hhh..hh..h..h.h...hhhhh..h..hhhh..............................................t....h.....p.....hh...h..........h..............h.h...h...........h...h.h...h.h..h.hh.h.......hh......p.......t..p...p..hh.....ph....h...pp.h...............hphptp.....h...th.th.....................thh.h.hh.....h.h...h.hh.h....h.h.hhh.hhhhh...........................................h..h......h...hh...h...h...hhh..h..hh..hh.......................h.hh......h.......h.h.h....h.h.......h....h.h..lh.............th...ht...hl..pptl...pt.hht....p.........................................tp.p..t.....................................................l....pphhphat.p....l...hp.h.s.p.p....hsp...has....h.hl...hhhhhth...hthh.h.ha......hhhhthh................................hh.h.hhhhh.........................h.hh.h.h.h..hh.......hhhhhs..pt..h.p............................phppht.hh..........hph........................s..tp.h..ppt.........lp....h.....h...th......pp..........thphhs..hh.ls.phhh..thhsshhsahlhhhQh................................................................. 0 345 411 732 +8228 PF08396 Toxin_34 Spider toxin omega agatoxin/Tx1 family Bateman A anon Mondal S, Ramakumar S Family The Tx1 family lethal spider neurotoxin induces excitatory symptoms in mice [1]. 21.10 21.10 24.80 24.70 17.80 17.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.82 0.72 -3.43 7 30 2009-01-15 18:05:59 2005-08-15 16:18:18 5 1 8 0 0 34 0 51.30 46 76.95 CHANGED CIclGs-CDG..cKsDCQCCRcNuYCSC..h...FG.hKsGCKCpVGssuphhulC.+cKppC.sp.....ssspCppsshs+Rp ....ClphGpsCDG..cKsDCQCCtcNuaCSC..h...as.hchs..CpC.ht................................................... 0 0 0 0 +8229 PF08397 IMD IRSp53/MIM homology domain Wuster A anon Pfam-B_4120 (release 18.0) Family The N-terminal predicted helical stretch of the insulin receptor tyrosine kinase substrate p53 (IRSp53) is an evolutionary conserved F-actin bundling domain involved in filopodium formation. The domain has been named IMD after the IRSp53 and missing in metastasis (MIM) proteins in which it occurs. Filopodium-inducing IMD activity is regulated by Cdc42 and Rac1 and is SH3-independent [1]. 20.50 20.50 20.60 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.32 0.70 -5.03 8 584 2012-10-03 12:17:00 2005-08-17 11:36:12 6 17 112 9 305 490 0 190.70 31 31.80 CHANGED Y+sIMpQFN...PuLcNhlshGpsYc+ulpuhlhAucuYaDAlpKlGEhAssSps.S+ELGslLhcMu.sHRplpscLEchhpsFHcpLIspLEcKs-.DpKahssshK+YppEaKp+pcsL-KspSDLhKLc+Kupt...G+sst+hpl+Es..l-slsc+psplpcalucus+cALlEE+RRFCFLV-Kppslupp.huaaucuhshLpspL.sWpptsuDso+lP ................................hp....P.hcphlthupph.ptl....................pshs.Ast..s.......ah-.......Ah.....tK...........lu...-h...A..t...p................op.s..o...+...-..l.......G....ssLhphs.hHR..plpspLcph...h...p..shhppLls.Lp..cphE..t+...h.hs...t....hpKc.a...tp......Ea+pt..tpp.lcKppu.-.hh......KL......p+Kspt....................u+..ss...p......p..hcph.........lp.s....lssc.htl.p..p.h.ppuh...+pALlEERpRaChhlphhpsls................pp.hshhsc.hp...h....Lp..thh..hpthst-spclP............................................. 2 68 101 199 +8230 PF08398 Parvo_coat_N Parvovirus coat protein VP1 Wuster A anon Pfam-B_2198 (release 18.0) Family This is the N-terminal region of the Parvovirus VP1 coat protein. Also see Parvovirus coat protein VP2 (Pfam:PF00740). 26.60 26.60 26.60 26.70 26.50 26.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.17 0.72 -4.18 11 1185 2009-01-15 18:05:59 2005-08-17 11:58:51 5 6 140 0 2 856 0 57.20 59 11.36 CHANGED LPGY+YLGPGNuLDpGcPVNtuDuAA+cHDhuYsp.LKsGcNPYlpaNpADpcFlccLccDTSF .......LPGhpYlGPGNt....L....ps....G.P.P.psssDuAA+hHDhpYuphhK.G.NPYh.hashADpch.cplppt.s............................... 0 1 1 2 +8231 PF08399 VWA_N VWA N-terminal Wuster A anon Pfam-B_2075 (release 18.0) Family This domain is found at the N-terminus of proteins containing von Willebrand factor type A (VWA, Pfam:PF00092) and Cache (Pfam:PF02743) domains. It has been found in vertebrates, Drosophila and C. elegans but has not yet been identified in other eukaryotes. It is probably involved in the function of some voltage-dependent calcium channel subunits [1]. 20.70 20.70 20.90 20.80 20.60 20.50 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.56 0.71 -3.50 11 476 2009-01-15 18:05:59 2005-08-17 12:04:57 6 21 88 0 252 411 0 113.30 37 11.56 CHANGED AEps..ptpHpaptslt.pphpYYsA+hhs-hsc...........-sstsEhu.....hph.hpccspFpN.sVNhohSuVplPTslYpcsstlLNslpWocuL-sVFhcNhccDPoLpWQYFGSuTGhhRhYPAspW .....................................................................Acphp...taph.p.sh......htYasuhh.sch.sp...............t.s.s.-ht.........hch.ht..sspFps..Vsh.shS.s.VplPTslY.pt.ss.llNslpWo..cuLs..pVFhc.....N...hcpD.PoLhWQYFGSuoGhhRhYPu.htW............ 1 68 89 164 +8232 PF08400 phage_tail_N Prophage tail fibre N-terminal Wuster A anon Pfam-B_3101 (release 18.0) Family This domain is found at the N-terminus of prophage tail fibre proteins. 26.60 26.60 26.60 27.00 26.40 26.50 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.58 0.71 -4.45 9 1004 2012-10-02 19:08:27 2005-08-17 14:13:30 5 47 343 0 14 1041 4 125.20 65 25.71 CHANGED MoVpISGlLKDGsGcPVssssIpL+Ap+sSsTVlssTVAos.s-psGpYuhclE.GpYsVhLts-GhsssasGsIsVhsDSpPGTLNsFL.tAhsEsDlpPEllpcFEthstpsscsAusuttssppAspsutsA ......MuVpISGVLKDGsGKPVpNCTIpLKA+RsSoTVV.....VNTVASENPD.EAGRYSMDVEaGQYSVhLLVEGF.P.PSHAGTITVYEDSpPGTLNDFL.GAMoEDDlRPEALRRFEhMVEEsARpAptAppNAupAcpSupsA.................... 1 1 2 7 +8233 PF08401 DUF1738 Domain of unknown function (DUF1738) Fenech M anon Pfam-B_3014 (release 18.0) Family This region is found in a number of bacterial hypothetical proteins. Some members are annotated as being similar to replication primases, and in fact this region is often found together with the Toprim domain (Pfam:PF01751). 20.60 20.60 20.60 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.68 0.71 -4.40 112 907 2009-01-15 18:05:59 2005-08-17 16:38:04 6 25 579 0 235 850 162 122.50 28 26.43 CHANGED +tclY.pclTc....cIIspLEp.G.stPWhpPWp.....tsssshshPhN.toGcsYpGlNllhLWhss.tpGas.....sspWhTa+QAppLG...........upVRKG.E+uosl...laacp..hppc.....pp.t........ps.ptptcpl..shl+tasVF ..................................p..shhpplss.......plI.ttlEp.....G.shPWh+PWp.......tsthshPhN.ho...GctYpG.lNslh...Lh.h.ts..tpGas.........sspWhT.a+Q.Ap.p.h....G....................................................u..pV+..KG..E..+us.sl...laaph......hpcp............t.p.t...................tptptphpth....shh+tapVF............................................................................. 0 46 136 189 +8234 PF08402 TOBE_2 TOBE domain Bateman A anon Pfam-B_4178 (release 18.0) Family The TOBE domain [1] (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulphate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain. In this family a strong RPE motif is found at the presumed N-terminus of the domain. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.46 0.72 -3.98 128 14426 2012-10-03 20:18:03 2005-08-18 14:22:02 5 12 3742 54 3382 10860 3488 74.40 20 20.70 CHANGED lulRPE+lpl...................ss...s....s..sl....sGpVpphtahGspshht.lclsssphhhs...ttssst........ht..hGcpVtlsasscsshlh ..............................lulRPE...c.l.pl........................................ss...s........s....sl........pu...p..V..ps...h...p..ah...Gsph......h.......h.....h....l..........c........h........s.......s........s....p....h.......hhs.................ph.sspt............hp............hG..p.p.l.tlsh.s.spphhlh............................................... 0 780 1829 2550 +8235 PF08403 AA_permease_N Amino acid permease N-terminal Wuster A anon Pfam-B_3112 (release 18.0) Family This domain is found to the N-terminus of the amino acid permease domain (Pfam:PF00324) in metazoan Na-K-Cl cotransporters. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.65 0.72 -4.66 16 266 2009-01-15 18:05:59 2005-08-18 14:58:10 5 5 64 0 109 240 0 70.70 44 7.06 CHANGED pcstsppshhpshGasTlDsVPplDFYpNouuhut.+tsRPSLp-LHcshccsss...ssht-sssst...sGsss- ............coHosTaYlpTFGHNThDAVPpI-aYRNTuu...s.G..Kh.sRPSLtELH-p.LcKt.......shtDshusG...-uh............................... 0 15 25 55 +8236 PF08404 Baculo_p74_N Baculoviridae P74 N-terminal Wuster A anon Pfam-B_3059 (release 18.0) Family This domain is found at the N-terminus of P74 occlusion-derived virus (ODV) envelope proteins which are required for oral infectivity. The envelope proteins are found in baculoviruses which are insect pathogens. The C-terminus of P74 is anchored to the membrane whereas the N-terminus is exposed to the virion surface. Furthermore P74 is unusual for a virus envelope protein as it lacks an N-terminal localisation signal sequence [1].\ Also see Pfam:PF04583. 25.00 25.00 27.10 26.60 19.20 19.00 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.88 0.70 -5.66 26 102 2009-01-15 18:05:59 2005-08-18 15:00:00 5 2 71 0 0 99 0 268.90 44 46.86 CHANGED TslDlhNAspYusphppLcaI.+WRs+FPHIh..IDYsIRsA.oNs.DYYVPssLts+.AlsVcLpFS+cGCESMSCYPasETGsI-h.pTPhG.YTQTS-TuVtYuQ..PACYsLD+AuAsR-GsEtclQosELRYosss+.CIlVDohoKMYhNSPYlRT--Hll+GVDDV.PuFNVts.ssDPlFPE+acGpFN-AYCRRFGRsl..N......suCohpWWEollGFVLGDoIahTFKLLssNVFo-LRsFDYp+PSslLPstPss-upp.lLp-WtssRDsslDh.-hEtpFhphp..shs-lsl...sssppLlYsAppGF ........ThhDhhNAspYu.phtpLpaI.+WRp+hPHIh..IcYplR.A.s.ss.DaYVPstltp+...AlhVcltFS+cGC-.u.MSCY.Pap....ETGsls..pTshs.YTQTS-......Ts..lt.Y...sQ..PACYpLDRuuAh......R-Gt-pplQusEhRYo..spp.ClhV..DohoKMYh...NoPYlRT--HhlhGlDDV.PuFNVts.....s...sDshFP............E+F+GpFNcAYCRRFGRph..N......suCshpWWEollG.FVLGDTlYhThKhlssslao-LRsaDYp+PSs.lLPs..Phssupp..hLspW+ssRDsssch.-hEh.F.p....phsclsh....s...pl.YhAE.Ga............................... 0 0 0 0 +8237 PF08405 Calici_PP_N Viral polyprotein N-terminal Wuster A anon Pfam-B_4167 (release 18.0) Family This domain is found at the N-terminus of non-structural viral polyproteins of the Caliciviridae subfamily. 24.20 24.20 24.40 44.80 24.10 24.10 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.98 0.70 -5.62 6 386 2009-01-15 18:05:59 2005-08-18 15:01:23 6 4 326 0 0 369 0 351.20 73 21.29 CHANGED RsspPApEPlIGshLEha-G+IYHYuIYIGpGKslGVHsPpAAhSlA+lslpPIuhWWRssYsPp...hLo.DpLKcLcsEsaPYsAhTNNCYpFCC.VhsLpDTWLpRRhIo.ostFa+PsQDWs+cs.-h.pDSKLchVpDAlLsAlsuLlS+PhKDLLGKLKPLNlLNlLusCDWTFhGlVEsVILLhELFGVFWsPPDVSNFIASLLsDacLQGPEDLAhDLVPlVLGGIGLAlGFT+-KluKhhpSAssuLRAAppLGpYGLEIFpLlhKaFFsu-psc...cTL+sIEsAVIDMEslussplTpLlRDKpSupsYMphLDtEEEKARKLSsKsAsPclVuTTNALlARIShARSAL ......RENRDAKEPLTGTILEMWDGEIYHYGLYVERGLVLGVHKPPAAISLAKVELTPLSLaWRP..VYTPQ..YLISPDTLKKLHGETFPYTAFDNNCYAFC.CWVLDLNDSWLSRRMIQRTTGFFRPYQDWNRKPL.PTh.DDSKLKKVANIFLCsLSS...LFTRPI..KDIIGKLRPLNIlNILASCDWTFAGIVESLILLAEL.FGVFWTPPDVSAMIAPLLGDaELQGP.EDLsVELVPVVMGGIGLVLGFTKEK....IGKMLSSAASTLRACKDLG...AYGLEILKLVMKWFFPKKEEAN.ELAMVRSIEDAVLDLEAIENNHMTTLLKDKDSLATYMRTLDLEEEKARKLSTKSASPDIVGTINALLARIAAARSLV................................................................................. 0 0 0 0 +8238 PF08406 CbbQ_C CbbQ/NirQ/NorQ C-terminal Wuster A anon Pfam-B_3065 (release 18.0) Family This domain is found at the C-terminus of proteins of the CbbQ/NirQ/NorQ family of proteins which play a role in the post-translational activation of Rubisco [1]. It is also found in the Thauera aromatica TutH protein which is similar to the CbbQ/NirQ/NorQ family [2], as well as in putative chaperones. The ATPase family associated with various cellular activities (AAA) Pfam:PF07728 is found in the same bacterial and archaeal proteins as the domain described here. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.42 0.72 -3.97 81 795 2009-01-15 18:05:59 2005-08-18 15:03:12 5 4 655 0 191 519 390 86.40 31 30.94 CHANGED EspIVucEo.GlspspAtpLVplApplRsL..cGps....L-EGsSTRLLlYAApLlssGlsstpAscsAllcPLTDDs-l..tpuLpchlsuha ............E.tllt.cp..o...u.l.s.c.p.h.s...pplVchupclRsl........ptps.............l-Eus.STRhLlhhupLl.s........t.shss....c....c....A.....hctsllcsL...s..D..-...-h..hpAlhphht...h...................................... 0 57 127 167 +8239 PF08407 Chitin_synth_1N Chitin synthase N-terminal Wuster A anon Pfam-B_1105 (release 18.0) Family This is the N-terminal domain of Chitin synthase (Pfam:PF01644). 19.40 19.40 20.90 19.80 18.70 17.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.93 0.72 -4.29 55 491 2009-01-15 18:05:59 2005-08-18 15:04:37 6 9 174 0 331 510 1 79.90 43 8.49 CHANGED +Rtps..h.....++lp..Lh...pGsllh.D.......sPVPstLhst.lst........psssEFoaMRYTAs.TCDPs-Fs.cpuasLRpt.hYs..RcTELhIslT .................h...hsh+cVpLh.....pGsllh.D.......sPlPstLhstlsp........sspcEFTHMRY.TAs.TC.DPs..-Fs.cp.........GasLRtt.has..RcTELhIslT........... 0 99 186 287 +8240 PF08408 DNA_pol_B_3 DNA polymerase family B viral insert Wuster A anon Pfam-B_3028 (release 18.0) Family This viral domain is found between the exonuclease domain of the DNA polymerase family B (Pfam:PF03104) and the Pfam:PF00136 domain, connecting the two. 24.50 24.50 24.70 24.80 22.00 24.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.11 0.71 -4.33 13 96 2009-01-15 18:05:59 2005-08-18 15:05:26 5 3 53 0 0 90 8 143.70 69 14.78 CHANGED SKNsFsCsstlc.phssshhphhussssDs+sKlplFu-VLpTGNYlTIs-.claKIlcK.cIt..c-uFpl....sltsspshth....ssh......hsluFGKDDVDLuD..MYtNYsL-hAl-MupYClHDACLCKYLWsYYtIcoKIsAuAsTYlLPQshsFEY ...................SKNAFuChuKVL.scGscEMTFIGDcTTDAKGKAAsFAKVLoTGNYVTVD-.hICKVI+K.DIa..ENGFKV....lLsCPs........NDT......YKLSFGKDDVDLAp..MYKcYNLNIALDMARYCIHDACLCpYLWEYYGVETKTDAGASTYlLPQSMVFEY......... 0 0 0 0 +8241 PF08409 DUF1736 Domain of unknown function (DUF1736) Wuster A anon Pfam-B_4104 (release 18.0) Family This domain of unknown function is found in various hypothetical metazoan proteins. 21.70 21.70 22.40 22.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.59 0.72 -4.41 30 412 2009-09-10 21:08:40 2005-08-18 15:06:46 6 202 109 0 252 413 12 78.60 43 10.48 CHANGED lhsptsPt.FopsDNPsuhssphhoRhLTapYLhsh.NhWLLLhPssLsaDWoMsuIPLl...coltDhRNlholhhasslh...lL ..................hssp.Ph.Fop...DNPAuhus.hhs.RtLTasYLhsh.NhWLLLsPsp..LCaDWoMGsIPLl.....colhDhRNlsTlshassLh...l...... 0 83 103 178 +8242 PF08410 DUF1737 Domain of unknown function (DUF1737) Wuster A anon Pfam-B_2030 (release 18.0) Family This domain of unknown function is found at the N-terminus of bacterial and viral hypothetical proteins. 20.60 20.60 20.60 24.50 20.30 18.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.67 0.72 -4.19 19 1062 2009-01-15 18:05:59 2005-08-18 15:07:52 5 3 451 0 59 981 214 52.20 58 13.11 CHANGED M.Kh.....YRhlTusDsusFC+R.VTpALscGWpLaGSPshuasu.tthhtsuQAV....lK-l ...........MshKH......YDVVRAASPS.DLAEK.LTcKLKEGWQPaGuPlAhTs........asLMQAlstE.G............. 0 16 35 46 +8243 PF08411 Exonuc_X-T_C Exonuclease C-terminal Wuster A anon Pfam-B_3061 (release 18.0) Family This bacterial domain is found at the C-terminus of Exodeoxyribonuclease I/Exonuclease I (Pfam:PF00929), which is a single-strand specific DNA nuclease affecting recombination and expression pathways. The exonuclease I protein in E. coli is associated with DNA deoxyribophosphodiesterase (dRPase) [1]. 20.00 20.00 23.80 22.50 19.70 19.70 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.85 0.70 -5.37 74 1111 2009-09-11 17:37:21 2005-08-18 15:09:21 5 3 1089 6 187 830 1335 260.60 49 55.39 CHANGED RpKcpltphlDl....hphpPLlHVSGhauut.pussuhlsPlAaHPsNpNAVIshDLstDsssLlc.......Lsu-plRpRLYTc..+s-Ls-s.th.lPlKhlHlNKCP.llAPspsLps..........-sAp.....RL...GlDtptstpphphL+pp....sp.lp-Kltplas.......pp.pas..sssDsDtpLYs.GFF.ussD+pthchlR..pssPppLup..hphsFpDpRltcLLFRYRARNaPcTLsppEpp+Wppa........ppp+Lpssp......hppahppLppLhpp......ppss...cchplLpsLhpYuppL .......................................................................RsK+KlhsLIDl....spMpP.LVHVSGMFGAh.RGsTSWVuPLA..WH.PcN+NAVIhsDLA.u.Dh.oPLLE.......LDu-sLR-RLYTt........+sDL.u-..pssVP..lKLVHINKC.P.VLA...Ap.T.LcP..........Ec.A-.....RL...GIsRppCLcNLplLRps.....Pp.lREK.VlslFup......tc.sFs....sS-s..VDsQLYs.G.....FF...S-A.DRAsMcIlh....pT.c.PcsLsu................L-lsFsD.cRIc...c...L...LFpYRARNFPs.TLstuE.QpR.WhcH......................RRphhssch...........lpsYtpcLp.Lspp........as.-..Dc......cKluLLKuLapYApc.h...................... 0 38 88 143 +8244 PF08412 Ion_trans_N Ion transport protein N-terminal Wuster A anon Pfam-B_4115 (release 18.0) Family This metazoan domain is found to the N-terminus of Pfam:PF00520 in voltage- and cyclic nucleotide-gated K/Na ion channels. 20.60 20.60 20.60 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.69 0.72 -4.67 9 383 2009-01-15 18:05:59 2005-08-18 15:10:34 5 12 89 0 232 371 6 66.70 47 8.90 CHANGED QhtuhLQPslNKhSL+hFGScKAVpcEQ.Rl+oAGsWIIHPaScF.........................................RFYWDLhMLhLhhuNLIlLPVuITFFpDpsos ................................hh.PtsN+huh+ha.GSpKAl.pEp.R...c.s.u.....G.hW...lI.HPhSsF........................................................RFYWDlhMLllhVuNLIIlP.VuIoF..Fp--.o..................... 1 91 116 169 +8246 PF08414 NADPH_Ox Respiratory burst NADPH oxidase Wuster A anon Pfam-B_2127 (release 18.0) Family This domain is found in plant proteins such as respiratory burst NADPH oxidase proteins which produce reactive oxygen species as a defence mechanism. It tends to occur to the N-terminus of an EF-hand (Pfam:PF00036), which suggests a direct regulatory effect of Ca2+ on the activity of the NADPH oxidase in plants [1]. 21.90 21.90 23.80 23.50 21.30 20.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.26 0.72 -3.92 28 221 2009-01-15 18:05:59 2005-08-18 15:13:29 5 16 30 2 128 252 0 93.50 49 11.45 CHANGED RLDRo+S.uAt+AL+GL+FIo+ssus..s.G...WspVEpRFsclss..-GhL.RucFucCIGMs..-SKEFAsELFDALARRRplp.s-sIsK-pL+EFW-QIoDQSFDS ...........phcRo+.S.uAt+AL.+GL+Flspssss...t.u...........WtpVEcRFcpLut.....-G.....hLsRscFupCI.........G.Mp......-SKEFAsELFDALuR.RR...plp....h..........ssIoK--L+EFWpQIoDpSFDo...................... 0 16 81 103 +8247 PF08415 NRPS Nonribosomal peptide synthase Wuster A anon Pfam-B_1156 (release 18.0) Family This domain is found in bacterial nonribosomal peptide synthetases (NRPS). NRPS are megaenzymes organised as iterative modules, one for each amino acid to be built into the peptide product [2]. NRPS modules are involved in epothilone biosynthesis (EpoB), myxothiazol biosynthesis (MtaC and MtaD), and other functions [1]. The NRPS domain tends to be found together with the condensation domain (Pfam:PF00668) and the phosphopantetheine binding domain (Pfam:PF00550). 20.90 20.90 20.90 21.60 20.80 20.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -9.06 0.72 -4.20 144 1536 2009-01-15 18:05:59 2005-08-18 15:14:43 5 209 643 0 259 1385 12 57.50 37 3.74 CHANGED HpthSGVc.VlR.-LsRp.......tsss........shhPVVFTSsLuhss............tsstshhGph...s.aslSQTPQ .....HppaSGVc.VhR.-Ls+p.....psts.........shhPVVFTSsLGhst.t.............ttspphhGc......saslSQTPQ............... 0 45 137 215 +8248 PF08416 PTB Phosphotyrosine-binding domain Wuster A anon Pfam-B_3174 (release 18.0) Family The phosphotyrosine-binding domain (PTB, also phosphotyrosine-interaction or PI domain) in the protein tensin tends to be found at the C-terminus. Tensin is a multi-domain protein that binds to actin filaments and functions as a focal-adhesion molecule (focal adhesions are regions of plasma membrane through which cells attach to the extracellular matrix). Human tensin has actin-binding sites, an SH2 (Pfam:PF00017) domain and a region similar to the tumour suppressor PTEN [1]. The PTB domain interacts with the cytoplasmic tails of beta integrin by binding to an NPXY motif [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.56 0.71 -4.24 14 631 2012-10-04 00:02:25 2005-08-18 15:15:56 8 20 96 7 308 1087 1 127.00 29 14.53 CHANGED QapVpHLsThsh-sppuhpslcDslppLphLsupG+lWsQshllpVsspuloLhD.poKp.L.EpaPLsolpaspsshpsppasS...............lLulVspcsspupssl.HlFp..ElcAp.ltpsIpsslschhhsp.p+ ...........................................................thpV...aLsohph-..shs.Ghps.lpc..t....p..h...t.....t........L.......s.......t.........p......s...p......s....h..s.......p.......l..........hh.cVssp.uloLhD.......sp...p..K....................................h....cp.......YP.......lsolp....assh...s..s...p...s...p...pa.ss...............................lhuhVs....+c...s....p........s.....p.....s...s..l....HlFt.....El...c...sppssssI.shlsc.h.st................................................................................. 1 54 78 174 +8249 PF08417 PaO Pheophorbide a oxygenase Wuster A anon Pfam-B_3102 (release 18.0) Family This domain is found in bacterial and plant proteins to the C-terminus of a Rieske 2Fe-2S domain (Pfam:PF00355). One of the proteins the domain is found in is Pheophorbide a oxygenase (PaO) which seems to be a key regulator of chlorophyll catabolism. Arabidopsis PaO (AtPaO) is a Rieske-type 2Fe-2S enzyme that is identical to Arabidopsis accelerated cell death 1 and homologous to lethal leaf spot 1 (LLS1) of maize [1], in which the domain described here is also found. 20.90 20.90 21.00 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -10.21 0.72 -4.06 29 296 2009-01-15 18:05:59 2005-08-18 15:35:27 7 5 102 0 163 309 25 97.40 26 20.94 CHANGED F.APChhhps.......................hsscs.tclttlhhChPsu.G+oR.....LlhRashsF......sthhh+lhP...RWapHl.spspVL-pDhhhLctQc+hltp..Gsss......app ...........................................................FhsPChhhsp..........................h.ptppttphhh..lhhslPsus..G+oR.....l...l...hp.hshsF.............................sthhhph..lP......cWa......pHh..st.spVL-pDhhlLptQ-chhhp....s................... 0 41 111 141 +8250 PF08418 Pol_alpha_B_N DNA polymerase alpha subunit B N-terminal Wuster A anon Pfam-B_4046 (release 18.0) Family This is the eukaryotic DNA polymerase alpha subunit B N-terminal domain which is involved in complex formation [1]. Also see Pfam:PF04058. 21.10 21.10 21.80 22.00 21.00 21.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.53 0.70 -4.84 27 272 2009-01-15 18:05:59 2005-08-18 15:36:04 5 9 224 16 181 279 0 213.00 20 36.09 CHANGED FGsss....ss-llpcLpslhplaslos--Lah+WEuFshpp..spp.p.p.LslpsLcpFccplppphp+pspstht.............tphpphhpssttct.hl.t.sssushhuhhpt.sTPshtpp+h..................................sspsp....htoPsstssstssss................................................................phsp..+pssGpll-olNsch...sps......hsssscsc.....l+lssph-sp+apY+sMh.+L.-uu-VLD-pI-chsplh.pcpaplsp..pFGsPsl.SQ ......................................................ptplltch.plhhhash.s.sp-hh.cW.u....ashph....tts.t.....lshpslptF.cppltp.ph.ppps.ptt.......................p.pthhts..pt..........t......h.s.sss.hhs......p..s.....t..TPpt.tpph.........................................ttt........h..oPs...s.p.st.t.t.......................................................................asp...RtstGpllpshs.s..t.......ps............hss.s.sh.....hc.l.hst..-...h.ph..t...a+.Mh.+L.-t.p-lLsspI-phsp.l.pppaphp...tass....u............................................................................................................................ 0 52 97 146 +8253 PF08421 Methyltransf_13 Putative zinc binding domain Wuster A anon Pfam-B_2038 (release 18.0) Domain This domain is found at the N-terminus of bacterial methyltransferases and contains four conserved cysteines suggesting a potential zinc binding domain. 20.90 20.90 21.10 21.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.95 0.72 -4.48 27 438 2009-01-15 18:05:59 2005-08-18 15:41:25 6 9 358 10 145 436 1072 61.50 34 14.89 CHANGED CRlCssstlpsllDLGtpPlsssFlss..ttscspshaPLclhhC..psCtLVQLpchlssphhFs .................CRhCust...h.pshlDLGhsPh.ss.uFlss..tph.s...p..E.s....haPLclhlC......psChLVQLpchhsscplFt........... 0 48 101 122 +8255 PF08423 Rad51 Rad51 Mistry J, Wood V anon Pfam-B_684 (release 17.0) Domain Rad51 is a DNA repair and recombination protein and is a homologue of the bacterial ATPase RecA protein. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.52 0.70 -5.48 20 2467 2012-10-05 12:31:09 2005-08-18 16:12:14 6 29 887 65 1071 8769 2265 183.50 30 69.83 CHANGED shGFpTAo-htppRpcllpITTGS+pLDpLL.GGGIETGSITElFGEFRTGKTQLCHTLsVTCQLPl-hGGGEGKshYIDTEGTFRPERllsIAERaGLDspsVL-NlAYARAYNs-HQhpLLtpAuuhMuEu+auLLIVDSsTALYRTDaSGRGELusRQ.+Lu+FLRsLp+LADEFslAVllTNQVlApVDGu.uhFs.uDsKKPIGGpIlAHASTTRLhLRKG+G-pRlCKIYDSPsLPEuEssFuIsppGItDsc ...............................................................................................................hc.t.hh........t..........u.................Gh..........................lsE.................h..h.................G...ts...GKoQl....s.hp.h.s..ls.s......p.....ls.....................t...........t.....G.............h.......t.....u..........t..sha..l.................D.T..........E...s.s.....F......p....s...............p..R.....l........t...........h.................s................p...............t..............................t................................p..........................p..........t.............h........L........p......p...........l............h......h...s......+.......s.............h.....s..........................p........c......p....h............t........l.......l.................t....h......t........t......h......h.............s......c..........p...........t.......h......p....L......l..ll.DSl.....su...h.....a.....R...s......-.........a.............s.........u........c.......s........p...............l........u......t............R.....p.....p.....p......L.....s.....p.....h....h............p..t..L..p.l.u...c...p...a...s...h...A...l.h.l.sNQ.............h.......t..............t..........t.................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 328 556 830 +8256 PF08424 NRDE-2 DUF1740; NRDE-2, necessary for RNA interference Mistry J, Wood V anon Pfam-B_21376 (release 17.0) Family This is a family of eukaryotic proteins. Eukaryotic cells express a wide variety of endogenous small regulatory RNAs that regulate heterochromatin formation, developmental timing, defence against parasitic nucleic acids, and genome rearrangement. Many small regulatory RNAs are thought to function in nuclei, and in plants and fungi small interfering (si)RNAs associate with nascent transcripts and direct chromatin and/or DNA modifications. This family protein, NRDE-2, is required for small interfering (si)RNA-mediated silencing in nuclei. NRDE-2 associates with the Argonaute protein NRDE-3 within nuclei and is recruited by NRDE-3/siRNA complexes to nascent transcripts that have been targeted by RNA interference, RNAi, the process whereby double-stranded RNA (dsRNA) directs the sequence-specific degradation of mRNA [1]. 26.60 26.60 26.60 26.60 26.40 26.40 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.99 0.70 -5.55 23 285 2009-01-15 18:05:59 2005-08-18 16:51:24 5 6 205 0 211 292 3 263.90 22 29.68 CHANGED lpp+stELs+plccsPpDlctWlchlcaQ-plhp.tpp............psppptls-hKLSlhE+ALcp..s...s..sc-cLllthLctssclW.ssccltp+....Wppllpp...........sss..phsLWhpYLcacQup.........FusFohsclhpsahcslctLpsuhsp...................phppshltlFlphshFl+puGasEhAlulaQAhlEhshapPpplptp...........ptlptFppFW-S.tssRlGEssApGWpph.pt..ps.ps..sttp..ssps.........................hap..........uWtpsE...................ppRpspsthPsRs.........s-st--DP.RsllhsDlpshLhhl.s...tpsphpLlpshLtahtlP.h.p.ss ...........................tc..phpptlpppPpshphWhthhtaQc..phht..t..................................ptpht.hh-hKlulh-+Alpt.......p..................s...sp.pLhl..t......hhc...s..t...ph........h...ps.....pt....lhpc...................Wpphl....................pst.......thtLW.pal.hppsp..........hspashsphhpha.pslpthtthtpt.....................................thp..hh.lhhphshahppuGa.....EhuhuhhQuhl-hshhtPt.h.t....................t.ht.ht..FW-s.t.sRhG............-.su.GWt........t..............................t.......................................................................................................hap.Wht.E........................................................ptpt......ttthhPh+s.................sp.tppDs...R...lhhsDlt..Lh.h.p.....t.t...Lh..hh.hhth.......s........................................................... 0 77 117 173 +8258 PF08426 ICE2 ICE2 Mistry J, Wood V anon manual Family ICE2 is a fungal ER protein which has been shown to play an important role in forming/maintaining the cortical ER [1]. It has also bee identified as a protein which is necessary for nuclear inner membrane targeting [2]. 25.00 25.00 32.80 27.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.45 0.70 -5.87 16 140 2009-01-15 18:05:59 2005-08-18 17:08:01 5 2 127 0 104 136 0 388.20 36 93.55 CHANGED lRshhushYLhlllloIPlAFDVGGlsCGLuFSlTLFshYFlhoTl+llst+pp...ahhhsSllYY..sQahlIPoLLhhFLShass-p.tt................................p..s...hhhhhpthVc.....sWchhLspSTPlFTLLEGFCoLLlIQAlGpss+WLshc..+SDoWlIhSLlsSGulITuohYaLYRIYshP.aplshhsAoLLGhsLohshsLGhaGIlSG+GShlESSLhFAYlV+CIYEIFPchupsAopslhphhppsh.tshpsplP...................................lPP.....................Ihssloplh+hlo.olPsShpslaphhh.hAhpoloPulllsLsaRIhVFYuATRIIPulpcsushs..........................................................................pcssspllphlYhYSPCIlIAVYTHLlLQYpGpLts-LslW.....ha.....................p.phllcuWpFWNWlNlhsTllLYAsELhuu ................................................................................phh.ushaLh.hlloIPlAFDVGGhpCGLuaSloLhhhYFhhohl+lhs.cpu.......hhh..h..s..sl..lhh..sQahlIPuLLhh.LstFSsDs.ss.............................................................t.h.......ppp.....sh..ahahhptllp.....sWchlLpaSoPlFpLhEGFsoLLlIQAsGQhsR.WL..ssc..pS-..oW.hIs.LlhSuulIouul.YaLaRlh.hP..tlosh.sAsLlGsslTssshLshaGIsSu+GsslESS...LLFAYlVhClYpIhschtsos.s.tth......s...spp.phP.................................................slPP.....................................lIhss....h.o.slh+hlo.....sLPs.lpshhphlh.hAhpsloPullIsLsYRlhVhYuATRIIPAlpcsus.ps..p.sp....tp....................................................................................................................................................................................................................................................pcs.ssp.hlthL.aaSPsILIAVYTpLlh.Qa.up..t.t.shh.........h.......................phsssuhshWpWhNlhsThhLYAsELhh.s...................... 0 26 58 89 +8259 PF08427 DUF1741 Domain of unknown function (DUF1741) Mistry J, Wood V anon Pfam-B_35314 (release 17.0) Domain This is a eukaryotic domain of unknown function. 20.30 20.30 21.80 21.60 19.50 18.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.51 0.70 -4.90 13 205 2009-01-15 18:05:59 2005-08-18 17:17:45 5 4 175 0 150 199 1 213.60 36 35.35 CHANGED Ph.sspsRsLuTslLDlhl-sIsaNL+++LssslYslsluIlhRlloYhscs+lRLsYHWpELWpsLLoLl+FL..sohso-Lpsp.....splppLsppllNllAhhlosGDoFLPossuYD-LFYKllcsupllscF+shhtppsp.s......ss..h....................................hsulssLluVspHFpshlh............t.sups.........ppppLoscpVpclIKpsY-TLolphp-uLstap+acEus.hcshhK+hsRssVsDsRthl ...................................s.ssRsLssslLDlhl-hIspphh.+chshcLYhhCltllhRlLsY.p.+s.+lR..L...p.Y.pWpELWpuL.lsLl+FL.......ho.p.ss..Lhsp.......sIh.sLshplVNLhshhlohGDTFLPoPsSYD-L...........aYcllchtplhsphhshhhhh...spss...........s.th....................................hpulsslhsl.sHapshlt...........................t...t.p......................ph.pplo.ppVhpll+psY..-TLolp.hp-uLstaE+apEts..tshhKchsRshstss+......................................................................................................................... 0 49 76 118 +8260 PF08428 Rib Rib/alpha-like repeat Fenech M anon Pfam-B_3139 (release 18.0) Repeat The region featured in this family is found repeated in a number of bacterial surface proteins, such as Rib (Swiss:P72362) and alpha (Swiss:Q02192). These are expressed by group B streptococci, and Rib is thought to confer protective immunity. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.22 0.72 -4.21 16 1574 2012-10-03 16:25:20 2005-08-18 17:26:22 5 133 326 0 165 1657 10 66.80 33 17.85 CHANGED loscsGt....sPDss-GIpNhscLPcGTp...Y.......sWcs...........sPDsosPGcKsusVVVTYPDGSpD-VsVsVpVss .....................................................ps..ut.......pP.s.sp.-.sI.......p..N.........h......ss...L.....P.....c.....G....Tp.........h.................sacs.............................s.DT.s......s.s.G.c.p.s......up.VsVTYPD..GS...p.....-......p......V...s...V.sVpVh..................... 0 21 54 149 +8261 PF08429 PLU-1 PLU-1-like protein Fenech M anon Pfam-B_4023 (release 18.0) Family Sequences in this family bear similarity to the central region of PLU-1 (Swiss:Q9Y3Q5). This is a nuclear protein that may have a role in DNA-binding and transcription, and is closely associated with the malignant phenotype of breast cancer [1]. This region is found in various other Jumonji/ARID domain-containing proteins (see Pfam:PF02373, Pfam:PF01388). 24.30 24.30 25.50 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.98 0.70 -5.55 30 546 2009-01-15 18:05:59 2005-08-18 18:47:40 6 64 236 0 309 488 4 291.60 26 21.73 CHANGED -sWtp+scchLc...pss+ssLcsL+sLlsEuEchtas...................s-L.ppL+shVppAcphlcpAppllspKppsR.......p+stts.............................p...sp..hslpplppLlcphpsLshsssp.lspLcchhppl-pFppcApphLpp......sshshtclcpLl-pGpshsl-lPElstLcphlcpt+Wh-cspcths.h..............hol......c-lccLl-.pGtpls...spspchhtcLpchlshu-pWEc+Acch.....................Ls.........t.p.hshspLpulhppu..ps...........lPlslssltpLcshLp+u+p......atpplpsl..p...........sspphPphc-lcslhtpucsLssph.pphppLE.....pplpptpsWp-+us+hFhptNush.slLp .............................................................ptWhtphpthLp....tt+.sL.ph+sLhp-uc..p..h.as..............................spL.ppLpthhpcscphsp.utthlst.ppppp...........p.............................................................................................t..hslpc...lpthl.pp.h...sLsh.sh.sp.htt.lcphhpplcpappcupphLtp.......ptss...........t........p........lp............pLl-.upp.......h...sl.-...l.PplttLp...p.t.l.p.p.t.+.Whccsppthttst........................................hoL......p.hcpLlc...tGhtls......ssscc.ths.....c........Lp...c.lLshucpW-cKAc.h...............................................................Lp................p.p.hshspL.csllpps..pp.............................................l.P.s.l.....ss.ht....sLcphlp+A+t........................Whtplctl....p.................................stpphshhcp.LptLh.ttu..c..s...lssth.pth..plE............tltpscsWpcp....stphFhptsu.h.plLp............................................................................................................................................. 0 82 140 232 +8262 PF08430 Fork_head_N Forkhead N-terminal region Fenech M anon Pfam-B_3191 (release 18.0) Family The region described in this family is found towards the N-terminus of various eukaryotic fork head/HNF-3-related transcription factors (which contain the Pfam:PF00250 domain). These proteins play key roles in embryogenesis, maintenance of differentiated cell states, and tumorigenesis [1]. 21.60 21.60 21.60 21.80 21.50 21.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.63 0.71 -3.77 29 224 2009-01-15 18:05:59 2005-08-19 10:58:52 7 3 94 0 100 197 0 126.60 31 30.04 CHANGED shYs-s.ppsYSss..s...........sMNoMsshhshssh....ts.............................s.sM.uhss....suhus.sshsuMssGhsuhhs......s.huhsshus.shussu.hus.hust.sss..hssluP..tshsts.s......s..thpR.....ssKsYRRSYTHA ..........................................hY.ts..psYos...s........................sMNs...Mso.hhshssh.........s...........................shsM.uhhs......suhus....sh.suMusussuhh............uhhuhsuhus...slusus..hus.hust.sus...hsuluP..tshssh.s.....s.utlptsR......ssKTYRRSYTHA........ 0 17 26 59 +8264 PF08432 Vfa1 DUF1742; AAA-ATPase Vps4-associated protein 1 Mistry J, Wood V anon manual Family Vps Four-Associated 1, Vfa1, in yeast, is an endosomal protein that interacts with the AAA-ATPase Vps4. It would seem to be involved in regulating the trafficking of other proteins to the endocytic vacuole [1]. There is a CCCH zinc finger at the N-terminus. 21.30 21.30 21.30 21.60 20.90 21.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.55 0.71 -4.26 24 142 2009-11-11 09:10:55 2005-08-19 15:36:58 5 2 129 0 107 136 1 164.20 33 88.68 CHANGED NhYphR+VAsssuKuChICaKPoooVLlossst.................DFFYlCssHLpDc.pFso....Plhss.chttsctKpctlpcclccl...KK-hEpcpphhpch.tph..............Kcs-cpK-cc................sspcs.-ppccpcpcptcpchpslppshst.tsphsp...ps+hasLcp.............shaptRlcphpptch......tccpp......pclpp.sshFPslPp ..........................NhYphR+VAppsu+sChICaKPoooVLl..s...ssst......................DaFY.sC..HLp..Dp.tFss...........Phhsp...ph.t..st.......tctcthpc-l-+l...cp..-hE....p+ppt.tpct.ppt...............ccpcppK-pc................ppppp.....sppspp...pp.cpppcphpp..hppphst..........sphsp.......psRhapLpp..................shap.Rlpch+....phph......s+cpt............pcl.tp..thFPSsPp............................................... 0 25 55 86 +8265 PF08433 KTI12 Chromatin associated protein KTI12 Mistry J, Wood V anon Pfam-B_11625 (release 17.0) Family This is a family of chromatin associated proteins which interact with the Elongator complex, a component of the elongating form of RNA polymerase II [1]. The Elongator complex has histone acetyltransferase activity. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.68 0.70 -5.19 21 408 2012-10-05 12:31:09 2005-08-22 14:09:07 5 14 303 13 286 1745 907 262.30 25 83.50 CHANGED MPLlllTGhPsSGKoThAcpLtphhpc............pshsVhlls.D-slt.....hs+cs..Yt.cSppEKthRuplhSsVcRsLS+s.slVIlDuhNYIKGaRYpLaChAKsspTsaCllashss.................h-hshpaN.....pp+sps.........................................................ass-ll-pLh.RaEcPsspsRWDpPLFolh......sccshsh......................--Ihpsl................hppptLpPNpuThspPh.usssaLpcLDppTppllstIhpt.ppssssstlp..............hs.ssp..lphsts...holspLpRlRRpFlshs+........psh-h-+lhshFl-aLN .................................Ms.Ll.l.hsGhPsSGKo.phut..p..l..t..p..h...hpt..............................................................t.h....p.l..h...h.ls...ppsht.......................ht.pps.......a............t..........s.s............t..........t..E....+.....h....R......u...............h....h....u.....t....l........p......R.......t......l.........s.................c........p.....s..........l.V....I........l.D..u....h..N.....Y.I..K...G..aR......Y....p..L.a.C...h.u..........+..t...h..p......s..s..h....C.h.......l..a.s...s.ss.........................................h-ps.hphN.............ppR...t..p.t......................................................................................................................................................a..s..t...-.h...hc...p...Lh...h..R.......a......E...p.P...s.s.p.s.R..WDp...PLhsl...........sptp....s..h...............................................................................................................ppl.h.ps.l.........................................................................hp.t.t............s..pt.u..T.......t.t....sh.......s.......s..s.......shLa.p..lDp.tTppllstlhp.....t..t.p..t........h..s..h...........................................................h.t.................hsh.pLpch++palth.p..................t......ph...Fhthl.................................................................................................................................................................. 0 108 158 233 +8266 PF08434 CLCA_N Calcium-activated chloride channel Wuster A anon Pfam-B_3091 (release 18.0) Family The CLCA family of calcium-activated chloride channels has been identified in many epithelial and endothelial cell types as well as in smooth muscle cells [1] and has four or five putative transmembrane regions. Additionally to their role as chloride channels some CLCA proteins function as adhesion molecules and may also have roles as tumour suppressors [2]. The domain described here is found at the N-terminus of CLCAs. 20.30 20.30 21.20 20.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.70 0.70 -5.16 13 295 2009-01-15 18:05:59 2005-08-22 15:35:17 6 15 48 0 190 313 0 228.60 40 29.90 CHANGED MushpsslF.lLlLaLLpus.ssSL......lpLNNNGYEGIVIAIsPsVPEDEpLIppIK-MVTcASsYLFEATc+RFYF+NVuILIPtoWKu+scYh+PKpEoYcpADVlVAssshttsD-PYThQaGpCGEKGcYIHFTPDFLLsccl.spYGPpGRlFVHEWAHLRWGVFDEYNsDcPFYlutppcIcATRCSssITGpshVhpCQGGSClo.+pC+hDppTGLYEcsCpFlPc+sQotKASIMFMQulDSVVEFCsEKsHNpEAPNL .....................................................hh.hhhl.hll..s.....tssh........lpLpsNGY-slllAIsPs..VPE....D..p...p..lIppIK.....-MlTpASsYLFpATcpRhY.F+sVsILIP...tTWp...s...............p.s.........p.Yt.hs....+..p.E.o........Y...cp...ADVlV....ss..s.t.........hs.D..c.PY......ThQas..tC.G-pGcaIHhTPsFlh.....s....c.....ph....t.Y..G....s.p......G+lhVHEWAHLRWGVFDE..Y...s...p....-.......p.......P........F.Y..h.s.t....p...s....pl.....csT.R........CSstIsG...l..tsp..tssC......p.......Cphs....ot.l.a.c.ttCpF.hPp....p......Q..s.t....p..uSIMF......h...Q...s.lsSVs-FCs.......p.......p.....s.......HNp-APs............................................... 0 89 93 121 +8267 PF08435 Calici_coat_C Calici_coat_N; Calicivirus coat protein C-terminal Wuster A anon Pfam-B_108 (release 18.0) Family This is the calicivirus coat protein (Pfam:PF00915) C-terminal region. 25.00 25.00 27.10 30.20 20.80 19.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.40 0.70 -5.06 29 2058 2009-01-15 18:05:59 2005-08-22 15:40:16 6 5 1840 102 0 1400 0 178.20 60 47.19 CHANGED LTpLDGoPacPs-c.PAPlGhPDFuuphasVhsp.csss........suspucpshlsTssspFsP+LGslphso.s.....sD.hhsstssphoPsulssst...phs.WslPcYuusLs.sspLAPuVsPshsGEplLFFhSplPhs.uG..hssshlsCLLPQEaVpHFhpEpAPupu-lALl+YVNPDTGRsLFEuKLappGFlTl..ssoGssPhslPsNGhFcFsSWVspFYpLtPhGTuuutRR ...............................................................................................................s.shssaDPsE-hPAPLGsPDF.Gpl.GlhoQ.pspt........ssTRuHcAhl.Ts..s.spFsPKLGp.....l.h..sops.....sD..hpstpss+FTP..lGlh.s.....tt.....c.pQWslPpYsut......shpLAPslsP.hsGE.lLhFto.h......ss...hss.........lsCLlPQEWVQHFYQEuA....PuQ.S-VALLRaVNPDTGRVLFEsKLHKsGalTV..ApoGpps.lVlPPNGYFRF-SWVNpFYTLAPhGsG................. 0 0 0 0 +8268 PF08436 DXP_redisom_C 1-deoxy-D-xylulose 5-phosphate reductoisomerase C-terminal Wuster A anon Pfam-B_445 (release 18.0) Family This domain is found to the C-terminus of Pfam:PF02670 domains in bacterial and plant 1-deoxy-D-xylulose 5-phosphate reductoisomerases which catalyse the formation of 2-C-methyl-D-erythritol 4-phosphate from 1-deoxy-D-xylulose-5-phosphate in the presence of NADPH [1]. 20.80 20.80 21.10 20.90 20.70 19.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.10 88 3400 2012-10-10 17:06:42 2005-08-22 15:49:37 7 8 3281 79 827 2645 1733 87.00 58 22.25 CHANGED llPVDSEHsAIFQsLpupstpp......lc+llLTASGGPFRshshcpLpsVTsppALpHPsWsMGtKITIDSAThhNKGLElIEApaLF ...........lLPVDSEHsAIFQsL...........s......p....t....p...pp................lp+llLTASGGPFRsp.s.hccLtsV.......T.......s.......cp....AlpHPNWuMGpKIolDSATMMNKGLElIEA+aLF.......... 0 290 560 711 +8269 PF08437 Glyco_transf_8C Glyco_transf_8N; Glycosyl transferase family 8 C-terminal Wuster A anon Pfam-B_3038 (release 18.0) Family This domain is found at the C-terminus of the Pfam: PF01501 domain in bacterial glucosyltransferase and galactosyltransferase proteins. 19.10 19.10 19.70 19.50 18.10 18.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.70 0.72 -3.95 14 973 2009-01-15 18:05:59 2005-08-22 15:50:24 5 2 442 0 23 344 0 56.50 39 17.19 CHANGED YPsspYFhpAhpsSPWpchsLhcAsstpph+hphKHhhtpt+YlsGlhshltYhhcK .....YPsupYFhpA+psSPWcchsh.h.cs.ssppph+hphKHhhtQp+YhsGlhshltYhhcK.............. 1 2 6 15 +8270 PF08438 MMR_HSR1_C GTPase of unknown function C-terminal Wuster A anon Pfam-B_4095 (release 18.0) Family This domain is found at the C-terminus of Pfam:PF01926 in archaeal and eukaryotic GTP-binding proteins. The C-terminal domain of the GTP-binding proteins is necessary for the complete activity of the protein of interacting with the 50S ribosome and binding of both adenine and guanine nucleotides, with a preference for guanine nucleotides. 23.30 23.30 23.60 31.10 22.80 23.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.48 0.72 -3.82 93 341 2009-01-15 18:05:59 2005-08-22 15:51:20 5 8 303 1 227 327 110 110.50 40 25.54 CHANGED AANKsD.h.s..uccNlc+lpc..............llPsSAtuELuLR+AscsGhIc..YtPGDpcFp...lss-s....................l.scpQcpuL-tI+c.llpcaGu.............TGVQpslspAVa-lLshIsVYPVcc .....................................................................AhNKhD.hss...........A-cNlp+ltc............ph..ssptlVssSAhuElhLR+hs+pGhIc..YhsGs.-Fp...hsp-s......................l.s-cpcptLEpl+-.V..Lh+aGo................TGVppsLspAs.-lLshlsVYPVcs......... 0 60 135 193 +8271 PF08439 Peptidase_M3_N Oligopeptidase F Wuster A anon Pfam-B_679 (release 18.0) Family This domain is found to the N-terminus of the Pfam:PF01432 domain in bacterial and archaeal proteins including Oligoendopeptidase F. An example of this protein is Lactococcus lactis PepF [1]. 20.70 20.70 20.80 21.40 20.40 20.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.05 0.72 -3.93 182 3235 2009-01-15 18:05:59 2005-08-22 15:52:47 5 3 2039 3 529 2248 391 68.60 25 11.49 CHANGED Elhpls-pplpphlp.pps...pLptYpahLccl.hcp+sHhLStcpEplLuph.ssshsuhsplashls.uclpF ........................Elhpls-cplpphlp.tp.......cLptYpahlc.pl.hpp+sHhLotcpEclLuph.s-shsusschashhs.uDlpF........................ 0 204 361 452 +8272 PF08440 Poty_PP Potyviridae polyprotein Wuster A anon Pfam-B_237 (release 18.0) Family This domain is found in polyproteins of the viral Potyviridae taxon. 23.40 23.40 23.70 23.60 23.30 23.30 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.58 0.70 -5.33 63 1068 2009-01-15 18:05:59 2005-08-22 15:54:46 5 30 151 0 0 1152 0 222.90 45 9.43 CHANGED AAFLCFsYGLPV.hTpsVoTulLupCTlcQARTMhpFELoPFahschV+aDGoMHPpIHclLKpaKLR-S-lhLschAIPhpssspWhosp......-Yp+lGsplph.scsl+IPFhs+slP-claEclWcslpcaKs-usFu+lo.SssAsKlAYTLpTDstuIsRTlslI-pLlpcEppKppaFcolsussssutsFSLsu...Iss.slRsRYh+DaopcNIphLppu+uQLhEFpshshchps.ph.........lpsaGsLpsVpaQocpp...lu+tLpL+G+W......stslhspD ......AAhhsFsYsLPV.hTpsVSTsllupsTV+Qs+sh.tFELoPFah.phlpaDGoMHP.lHclLKpaKLR-u.h.Lsc.uIP.phsstWhosp......-YpR.......Gh.plph...cpl+lPFhhpsIPpcLap.lWcslhp.a.KssssFsplp.usshsKlu.YTLpTD..uI.RTlhll-pLltpEhhKpp.Fcshhspss.u..FSl.s...lss.sh+t+hh+DaTtcNIthLpts+uQLhEFpshphshsp.ph..........hpcaGuL.psVaHQspss...huKhLpLKG+WsKoLls+D........................................... 0 0 0 0 +8273 PF08441 Integrin_alpha2 Integrin alpha Wuster A anon Pfam-B_609 (release 18.0) Family This domain is found in integrin alpha and integrin alpha precursors to the C terminus of a number of Pfam:PF01839 repeats and to the N-terminus of the Pfam:PF00357 cytoplasmic region. This region is composed of three immunoglobulin-like domains. 33.00 33.00 33.40 33.20 32.90 32.70 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.85 0.70 -5.64 97 1499 2012-10-03 16:25:20 2005-08-23 09:52:25 7 51 113 50 666 1224 1 389.40 20 41.35 CHANGED uRPVlpl.psslphpPpp..lshpppp....Ct.s.......hsC..hslpsChphpupsh.......stplslshslplD.....ptpthpRshF..................tspppshptphhhhsptpthCp.phphal.p...shcDhloPIslplsasL................t.ttshsslp..PlL.s.t..psshhppplsFt+s...........................................................................................................................................................CGsDs..lC.sDLpL.ssphstp..p............................hllG.ss..pplslplslpN..........pGE.sAYpspLhlph.PssLpatplt..................pt.tp..sC....stppssp......hlsCsl..GsPhhp..ssplphplhasssphst..........tpslphplps...posspps....pssshphplslthtsp.......lpl.pGsspPsphh........................p..spcchGs...tlp.apaplpNpGss...slspsplplphPhphpss........hLlahhplt...............t....C.................sttthsshpl................................ptst.ssspppphphpppphhtp.ttt......................................s.ps....pChplpC .............................................................................................................................................................................................uRPVlpl...ps...slp.h.pPp..........lshpthp.......Ct.t..............s....hplphChphpspsh...........s.phsl......p.hp.lphD......ttth.tRshF...................spptp.h.ttp..hh..hht.ttp......C..p..pht...hhl...p...............shcD.....hls.....P....I...sl...plsasL......................................p..slt.....PlLs.......t...p..t.........h...ptp..h....a.....p.s.........................................C..GpDp..hC.ssL.pl..psp...hstpp............................hhls.sp..pplslplslpN..........pG.E.sAYpsplhlph.P..s.lpattl............................h..pC.....p.tpps....................h.C.pl..G.p..P.....hhp...ssph..phtlhas.s..s..th.p...............t..pplphpl..p......h......po...p.....s....p..pp..................pps.h..p.hp.h.l...hhts.t..............l.l...pu..spssphh.......................p...........p.pshGs............lp..a.a..p.l...........t...................N....G.....sls.th...lplthPht.h....ps....t..............lla.hhph...........................t.hp..Cp.................t...p..th................................................................................................................................................................................................................................................................................................................... 0 114 162 355 +8274 PF08442 ATP-grasp_2 ATP-grasp domain Bateman A anon Bateman A Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.14 0.71 -4.94 22 4398 2012-10-10 13:17:03 2005-08-23 14:07:01 5 18 3546 33 1434 3767 3414 194.30 40 47.81 CHANGED sLHEYQuKclhp+aGlsVPpGhVAposE-ApchApcluspshVlKAQVhsGGRGKA.........GGV+lspos-EstchAcchLGppLhThQTs.tGphVpcVhlp-tssIp+EhYlulllDRssppslllASscGGh-IEEVAtcsPEtIhKhslDsthGlpsapARclAhphGhps.c.hppsschlhpLYclFhcpDAohlEINPLVh ............................................................................slHEYQu.K..p.l.h.tp.......a.........G...l.s.ls.pG......hs...s.....to..s..c......EA..............p....s.....A....p....c......l........u...............s....s.........s............h..V..VK...u...Q.l.+..A.GG.RG.Ku.................................GGVKl.s..c....o..h..-.-s.c.t...h..A.pp....h.L..G..p.pL...l....T.h.QT........s.s..p......Gp.....V..ppl...h....l.E..p...s.s........s....I.........t.......+.....E...hYluh.ll.D..R...usp.p..sshhuSs..E.GGh..-.IE...c.VAp...c...s...P.-t.I..h.Kh.s........l.D...P...h....s.G.h.p.s...h..p..u.R.c..l.A.h.p.l....G..l.s..s...p......h...pphschhhsLap.hF..h.-..p..D.h.sl..l..E..INPLl............................................................................................................................................................. 0 451 852 1189 +8275 PF08443 RimK RimK-like ATP-grasp domain Bateman A anon Bateman A Domain This ATP-grasp domain is found in the ribosomal S6 modification enzyme RimK [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.07 0.71 -4.82 5 2606 2012-10-10 13:17:03 2005-08-23 14:07:23 6 34 1803 4 917 10880 6158 177.30 31 43.97 CHANGED AcDKh+ShpLLAKps........IslPsouLuau.-cApchIEclt...uaPlVlKslaGSpGltVhLAcccQuLcull-uh+.hsssI....LlQEFIccuss...cDIRslVVGsEVVuAlcRhuc-G-FRoNLaRGGsuEshslosEEcEIAIKAucAhuLslsGVD.Ilcoc+GLLVhEVNuSPGLcshp+soGINIAtKll-aI ............................................................................................................................................psKhhshplL..tpp.u.................l.s...h......P....h....T....h....h.....s.........t.....s..................c.....s....h....t....c....h...l..c.h.l..........uh.P...lV.lK.....h......h......p......G......o.....p.......G...h.....G....V...h......l.....s......c...s...........p.......p...........s........h.....p.......s......l.....l......c......s.......h.......h......t......h......p......s...t...l...........................ll.Q..E...al.....p..p..s..t.s.............tDl...R.....s..h..V.........l....G..........s...........c.........l........l.........u......A.........h........p........R......p........u........t......c.......G...........c.......a..........+........s........N.......h......c........p...........G.........G....s........s.............p..........h.........h.........p..............l.........o........s..........p........t.....c.....c......l........A...........l.....+........A.....u....c.......s......h.......G.....l.......s......l........s.....G.........V.....D.....l........l.......c.................s.......s.......c.......G........s.......h.....V.h.EV.N..u...s.....P..u.....h..c.u...h...p....t.s.s..s...h...s.l.u.t..hht........................................................................................................................................ 0 288 540 763 +8276 PF08444 Gly_acyl_tr_C Aralkyl acyl-CoA:amino acid N-acyltransferase, C-terminal region Fenech M, Moxon SJ anon Pfam-B_7828 (release 9.0) Family This family features the C-terminal region of several mammalian specific aralkyl acyl-CoA:amino acid N-acyltransferase (glycine N-acyltransferase) proteins EC:2.3.1.13. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.01 0.72 -3.97 8 176 2012-10-02 22:59:21 2005-08-23 15:35:18 5 3 37 0 93 173 2 87.10 38 30.37 CHANGED hClLGPEGTPVSWsLMDQTGElRMuGTLPcYRtQGLls+VhappsptLcKhGFPhYsHVDcsNpshQ+MStsLsHlshPCsWNQWNCsP .......................hClLsPEGsPVSWslMDQouEhpMuYTlP-YRp.pGhhphlhhp.hsptLpppGa.P.h.YspVtcsNptsh+hhpslta..h...h.P...Cp.WppW.hsP.............. 0 7 9 26 +8277 PF08445 FR47 FR47-like protein Fenech M anon Pfam-B_71946 (release 17.0) Family The members of this family are similar to the C-terminal region of the D. melanogaster hypothetical protein FR47 (Swiss:Q9VR51). This protein has been found to consist of two N-acyltransferase-like domains swapped with the C-terminal strands. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.60 0.72 -4.23 9 870 2012-10-02 22:59:21 2005-08-23 15:35:46 5 14 549 3 377 18220 2313 82.70 21 31.53 CHANGED slGlhct..chsphsuWhl+..s.Gtluulpshsca+p+GLGshLspuluctIsp.cGpsshshlsssNssupplac+LGFphh...pshal ...............................................................................h.................h....s.......p.....l......s......s....l......h...s...h.s.c..aR...t+..G..h....u....s.t....l....s....t........s....l....s.....p..........p...............l....h.........p......p...........G.....c.......p...........s.......h.......l...h.....s...s....s....s..N...s.......s...u...t....p..l..Y.p..+..l.GFpth...........hhh............................................ 0 141 246 325 +8278 PF08446 PAS_2 PAS fold Bateman A anon Pfam-B_437 (Release 18.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 21.40 14.00 21.40 14.80 21.30 13.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.44 0.72 -3.60 41 1308 2012-10-04 01:10:46 2005-08-24 15:19:37 6 52 760 31 297 1415 19 101.80 37 12.43 CHANGED shhptIQpsGhIQPaGslLAl.-Es....shplluhS-Nss-hLsls................spp........................lGpcl+sLhsssusssLccAhsss-lohlNPlhlp.s+s...su+sFhAllHR....h-..sslll-LEPs ..........s..p.Iphs.shIQPaGshlAl...-...-..t.........s.hpllt.h.SENs.p.hLshs................tt.............................................lG.pslc...s...L.h...s..s.s..s......s.h...t.L.p..p....A.h.s.t....t..c...l....s....h....hs.Pl...h.hp....scs.........ouK...P..F..aAIlHR...........hs.....sulllDhEPs.......................................................... 0 79 167 233 +8279 PF08447 PAS_3 PAS fold Bateman A anon Pfam-B_64 (Release 18.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.00 0.72 -3.89 122 12998 2012-10-04 01:10:46 2005-08-24 15:19:44 6 2702 2306 12 5490 23426 1576 87.10 19 13.27 CHANGED hlhhss.ph..tclhGass.p-h..hst...hhphlHP-Dhtthhpshpp......tht.pst.hpt.-aRhhp.psGphh...Wlpspspsht.spsGpshphhGsh .......................................h..hhst..ph....hpl..h.G...h...s....p....c......h..................s.................................h..........h..p..h.....l...H...P...-..D......h...p...t...s..t..p...s.h.......pp.............thp..ps....p...s........a..p.......h....c......a..R...l...h...........p....p..........c...G........p...h.p.........Wlp.sp.u.psh..h...s...pp.G.p..h.thhG....................................... 0 1661 3542 4558 +8280 PF08448 PAS_4 PAS fold Bateman A anon Pfam-B_493 (Release 18.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.00 0.72 -4.02 49 13956 2012-10-04 01:10:46 2005-08-24 15:20:11 5 2601 2465 26 5614 38008 2799 109.70 16 17.36 CHANGED hcshssslhhh....Dt-hphhhsNts......htp.hhthsspphhGcsht..-..hhst.......ttsphppthpcshpsppshphhthhth.......ssp.chhphph..hPlhs.pGp.hhuhhhhspDlTpppp ......................................................................pthss.slh...hh.......D...t.....c......t.....p...h...h..h..s..Nps...................htp....h.....h.....u......h......s.....t......p........p....h......l.......G...c.s.hh.............-.......l.h...s.tt..........................t.t..t....h.....t....p....t......h......p.......p......s.......h........p........s........t.....p......s......h.....p...h...p...t..h.hhh........................ss.p..t...p...h..h...p..h..ph............hP...l.....h.........s.......t.......p.........G.........p....h..........h.......u...l.l..s.h..spDlTcpt............................................................................. 0 1870 3885 4919 +8281 PF08449 UAA UAA transporter family Bateman A anon Pfam-B_606 (release 18.0) Family This family includes transporters with a specificity for UDP-N-acetylglucosamine [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.61 0.70 -5.50 23 1443 2012-10-02 19:55:49 2005-08-25 11:28:56 6 22 332 0 997 3957 210 274.70 19 77.88 CHANGED hlhshuGlasuhhs.ulh.Ehlhptp.us..............uthlTFsQhlhhshhuhlhthhhp............htppphPh+pYhlhshhhhhssshsNtuLpa.lshPspllh+SuphIssMlhGhll..hpK+YshhcYhushhlolGlhlholhsupsssspptpt.p...............GlhlLshuLhhsuhhushQEpla+pYstss.........pEhlFYophhuhshhhlhhh......pshhhpuhthhh......................phPp.hhhaLlh.slspals.phVahhhsphuuLTsollsTlRKhlSlllSllhFspshohtphlGshlVFhGhhlhshstpptp .........................................................................................................................................................................h......h..h........s...h.h.....c..h..l....hp....t...........................................s.hls..h....h.p...h..h.....h....h..h..h.....h..u.h..h..h.h....h.ht........................................................tt.hs....h.......h......t....a.....h....h.....h....u....h........h....h.....h....h....s.........h....h....s....p....t......u...L....p....a.....l.s...h...Ps................h...lh..Ku..sp..h.lsl.....h.l.h.u.hll.............ht...+..c..Ys.h.h.c...h.h.....s.sh.....h.ls.hGlhl........h.....s...h..t.ss....p.t...t.t.t....t.t.t.t........................................................................G.hh.h.l..h.h...s..l...h...h....s..u.........h....h.......sh..h.p.......-.p........h..h...p...t...h....t...h..p.s..........................hp.h.h..h.h..s...s....h....h....u..h........hh..h.h...h.h..hh..........................s.t.h..h..p..s..h..t..h.h.....................................................................ph.st....hh.......h.......l....h...h....h....s...h....s..........s....h..h......u........h....h...l......h...h....h....h....t.....h......u......u.l.s.h.shl.....h.s......h......R.K.h....ho.h.l.h.S.........h..l....h....a........s..p.....s...h....o.h..h..t.h..h..uhh.l.lFhuhhh.sh......t...................................................................................................................................................................... 0 376 570 831 +8282 PF08450 SGL SMP-30/Gluconolaconase/LRE-like region Finn RD, Fenech M anon Pfam-B_3630 (release 7.0) Family This family describes a region that is found in proteins expressed by a variety of eukaryotic and prokaryotic species. These proteins include various enzymes, such as senescence marker protein 30 (SMP-30, Swiss:Q15493), gluconolactonase (Swiss:Q01578) and luciferin-regenerating enzyme (LRE, Swiss:Q86DU5). SMP-30 is known to hydrolyse diisopropyl phosphorofluoridate in the liver, and has been noted as having sequence similarity, in the region described in this family, with PON1 (Swiss:P52430) and LRE [1]. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.67 0.70 -5.00 50 3454 2012-10-05 17:30:43 2005-08-26 09:32:17 7 90 1667 66 1425 4351 2023 232.20 22 68.45 CHANGED LGEGPhWctp...........ppsLaWV......DIhstplaphsssssppp.....................hchss..........hlusls..h...ps..spllluhppul......................hlh..c.......hss........s....phphlsph..........sp.spsRhNDGpsDspGp.hahGoMshsttsst...........GsLYRlsss....tp.....lphhhss.lslsNGluaSsDspthYa.....sDohspp..lhta-hDhssu.lss++sa.hchppts......t....PDGhslDu.-GslWsAha.....suu......plh+asPp.Gc.llpplplP.sp.psTsssF.GGschssLalToAp ......................................................................................................................................................................................................................sEuPha...............p.tt.L......h..as................D.....l............t....t....p.....lh..p....h.....p.....t..s..t..t.....t.h.....................................................h.t..hst...................suh..hh.........tp.........sp.h...lh..s..........t.t.sh......................................................hhh.......s..............t.s..............................t.......ph..p...h.lhs..........................st.thsp..N...D.s.....h.s....c....s................p.G..p....ha.hs.s.....s.h...t...t.....t.tsh............................................utl.ap.lsss......tp..........................l.p.h.l..h...s.s......l..s.h...s.........N..G............ls.a..S..s....D....t..p.......hh..Yh.......................s-..o............h.....s.....pp...............lh.ta......s......h.....s.......p..s.......s..........l...s....s..t......c.hh....hp....hsttt..................................................PD..Gh.s....l......Ds...cG...s.l.a.s.A..h.h...........................su.s..........................tl....h..h..a....s.....P......p....G.......p.......h...l....tp..l.....t......lP....s.....t...t.....s.....s...shsF...u............G.....phppLalos..t................................................................................................. 0 451 825 1164 +8283 PF08451 A_deaminase_N Adenosine/AMP deaminase N-terminal Wuster A anon Pfam-B_3145 (release 18.0) Family This domain is found to the N-terminus of the Adenosine/AMP deaminase domain (Pfam:PF00962) in metazoan proteins such as the Cat eye syndrome critical region protein 1 and its homologues. 21.30 21.30 22.00 21.80 21.00 20.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.98 0.72 -3.96 32 240 2009-01-15 18:05:59 2005-08-26 09:39:06 6 5 76 4 102 235 0 85.10 33 19.39 CHANGED hLhhshhhs.t.........................................psapppRstlhptEpthplGuclhLspcEppsNphlMp.lKp..cElpcuhhsstpFsPuhHFFcshshIcp.SslFpll ...............................................h.......................................................h.p.Rpthh.p.Ep.hthGs..pl.LsttEttsNphLMs.lKp..pElpcG...............hhss.p....FsPuhHFFcshshIcp.SslFphl..... 0 26 35 71 +8284 PF08452 DNAP_B_exo_N DNA polymerase family B exonuclease domain, N-terminal Wuster A anon Pfam-B_3196 (release 18.0) Family This domain is found in viral DNA polymerases to the N-terminus of DNA polymerase family B exonuclease domains (Pfam:PF03104). 21.00 21.00 21.00 34.50 20.40 17.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.58 0.72 -6.73 0.72 -4.67 6 88 2009-01-15 18:05:59 2005-08-26 09:45:14 5 2 48 0 0 82 0 22.10 77 2.21 CHANGED M-I+ClNWFEs+G.Ep+FLYLKA MDVRCINWFES+G.ENRFLYLKS 0 0 0 0 +8285 PF08453 Peptidase_M9_N Peptidase family M9 N-terminal Wuster A anon Pfam-B_4156 (release 18.0) Family This domain is found in microbial collagenase metalloproteases to the N-terminus of Pfam:PF01752. 23.90 23.90 23.90 27.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.84 0.71 -4.96 34 561 2009-09-11 12:23:32 2005-08-26 09:47:51 5 11 300 3 58 499 2 179.60 32 22.09 CHANGED hs.hspLsphs.pcLlstlps.hchsslssLFphsssstphhhscs+hpslhstLpppupsYsuss..u+ul.sLsEhLRAuaYltaYs..cplsthsst.hp.chhsulpAhhpNPsFh.ss..............cpQspllpuhshlls..NspppstslstshslLppaN...cshsp.hp.hs..........thsslaplhtGhpas.phht............hhtt ..........................hs.hu-LsphssppLs..chlss...hs.a...p...plssLFphspsshs.hh.scsphpslhstLsppupsYTt-s...u+slpshsEhLRuuaYlta.s..scls.hspt.hp.chhsulcshhpNsshhh.ss..............t-QspVl..suhupLIs..NA.us-s-slssshplLcpaN...cshsphspphs..........tssul.aslMpGhsashp.hh..................................................................... 0 19 31 45 +8286 PF08454 RIH_assoc RyR and IP3R Homology associated Wuster A anon Pfam-B_4135 (release 18.0) Family This eukaryotic domain is found in ryanodine receptors (RyR) and inositol 1,4,5-trisphosphate receptors (IP3R) which together form a superfamily of homotetrameric ligand-gated intracellular Ca2+ channels [1]. There seems to be no known function for this domain [2]. Also see the IP3-binding domain Pfam:PF01365 and Pfam:PF02815. 22.10 22.10 24.00 22.30 21.80 21.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.60 0.72 -4.44 43 718 2009-01-15 18:05:59 2005-08-26 09:51:09 6 72 128 0 390 593 1 120.10 46 3.81 CHANGED ppp...tppphcl..lppllRhLQLhCEGHNhshQNYlRpQsssc.....sohNllppslclLpsh...........................hh.ths....ppshclhhpsh-oLoEhlQGPCppNQ.sls..cophh-hsssll..........pphp .......................s....ts.ph..hpslhRFLQLLC.EsHN...p..DhQ.NaLRsQss..p.......Ts.hNllhpTlpaL.plpt.....................................Sspss..ls.hGhh......p+sls.lhpQshpoLTEYhQGPCptNQ......pslA..cSphhDhlsuhl....p................................................................. 0 162 186 280 +8287 PF08455 SNF2_assoc Bacterial SNF2 helicase associated Wuster A anon Pfam-B_3199 (release 18.0) Family This domain is found in bacterial proteins of the SWF/SNF/SWI helicase family to the N-terminus of the SNF2 family N-terminal domain (Pfam:PF00176) and together with the Helicase conserved C-terminal domain (Pfam:PF00271). The function of the domain is not clear [1]. 23.60 23.60 23.60 23.70 23.00 23.20 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.25 0.70 -5.89 42 1037 2009-01-15 18:05:59 2005-08-26 09:53:56 5 6 932 0 142 780 18 354.50 22 33.95 CHANGED u+hlhlssshhcchlpll.ptpsh......th.h.shp.hthplhc.pchP.lp....Fslpcppp.ph.Lphppt.sh..lh.stphhhhpsslYhlspcppphltslh.phht...spppplpaspcppsphlppllPtLpplG..p.l.lstpl..cphhhpshpschahDphcp.plhsplpacY.G.shplsshpchp....................................phhlhRDhc+Epclhphhcph...............................sFttstpphh.h.ts-cplapFhpptlspLpphG.cVahocsh+slhhhpssp..stlclppp..shL-hsFchssIsppElpplLpulpcpccYY+LcsGphlsL-pcchpchpphlppLphptpchppt.lplst.cuhhlsshL...pshphlphscsFccllpclpp.-chpaplP ..........................................................................................................................................................................................................................+hlhhP.shhpphlphl.th.ph...........hp.pphp.hh.hp.hc...sphs..ht....Fplpch....p...p....h....plpl..pp......php...hh..pht..h.hh..h....ps....p..lY.....h.....lst.pp..hph..l....p.....t.lh....h.ht............tp.t.pp...pl.h...spschsc.hupsLshh...tphG...p..........sh....c..ph.t...h....p...s..h....p....spFah...Dp.............t.....c.....s..cl.phcl..pacY..G...sh.plsshpchp....................................ph.hspDhcpE...pclhp.lh.pt.h...............................uFp.t.s.............p....u...ah..h.....h..p.t-p.lYp.FhpchlPthc..plG.......c..V..hhoc.p...lcplh..hh.s.ssp.....tlplspp..sh..L-....l....p..F..Dhp.....s.....I.s.p.p.ElcpslpuLh.p.ppcaahhpsGpll.h.-....c..-.hp......clpphLpcL......thp.tphps.t.lplst.puh.lsphh.....cp....tsplphs...ppFppLspclppP-c..h............................................................ 0 54 97 114 +8288 PF08456 Vmethyltransf_C Viral methyltransferase C-terminal Wuster A anon Pfam-B_2153 (release 18.0) Family This domain is found to the C-terminus of the viral methyltransferase domain (Pfam:PF01660) in single-stranded-RNA positive-strand viruses with no DNA stage in the Virgaviridae family. 25.00 25.00 64.60 64.30 19.50 19.40 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.50 0.70 -4.92 12 48 2009-01-15 18:05:59 2005-08-26 09:55:58 5 3 14 0 0 53 0 197.10 39 13.47 CHANGED +VLptLRhph+-plshpslhpsshtKlFGpVShaQ+uL+saApWluas.aGsshlphcslPLYVEIpDRl+LWppts..spsFshshpDl--KlchacEhE+E+cclScplsp-K.....................h....s.csssscssschtthpc.tt+sshp-hhtG......c.......................alppWhpp.pspFshsppps........hhthh..hthlptlh-hhhPshphuslh.s--psptlp .pVhphLRlphK-sVshtsLhpsAFhKVFGpVShaQ+AL+SFApWluYs.HGossIchpslPLYVEIsDRlKLWpptu.PspsFhLsh-DL-EKh+LaE.pE+E+pclSc+IlppK......hG...................pl...tst-htsschssch..hpcsRt+sshtchhEG......cVsTs...................hlppWsEp.-DHFshstpss......pshh.ah..h.hh+hl.-shhssh.Fuslh.ssDpstthc................ 0 0 0 0 +8289 PF08457 Sfi1 Sfi1 spindle body protein Mistry J, Wood V anon Pfam-B_54813 (release 17.0) Family This is a family of fungal spindle pole body proteins that play a role in spindle body duplication. They contain binding sites for calmodulin-like proteins called centrins [1] which are present in microtubule-organising centres. 21.10 21.10 21.30 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 576 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.56 0.70 -13.25 0.70 -6.12 14 195 2009-01-15 18:05:59 2005-08-26 10:05:18 5 14 148 3 150 209 5 386.70 14 42.23 CHANGED psh+shlptp.....sscTEcaap+LEcRAuRAR-laLlsKuFs+Wtphup-Elp+TssARRHlLp......h+hFpuWRplT.......sVNEhKup...+FhLp+saptW+p+scc..hcphcppAsphppcclh+psahpWahphst+c............Aspah-apl+++..............uL.aWhcKh+s....sc-clpthcshpc+hslspshphWpp+opsltstppcspshpcpplhpptLppW+hpApLtPhtpplssth-scllppuaspWtcchphhppAp-hcRt+lh+suaTsWpct.LRhpsLpsRh-..-..Rlhhcshh+WhLtpRhpLhQ+l+-pRlppssFssalsshpcphscL.cps-htcchcscclL+upLtpW+sphs.p+chEhtAsthatsRlhppslstW+s+hp+hsplpsa....AcsAchaFlsppslKpW+tAs.pot+cRRppuhtphRRphKhsL.AtcshspW+s+spchtsl-ppAhthtpp+shphshchlcpW+ppshcthpphcpA-.hatcplhpchLh+Wt-pltphpphpppAsth.c.cshtpssstL+KhShRhhp.l+uppcsAcsh+ERp.R+psRuhFctWhp+sc ....................................h........................................................................................................................................................................................................................................................................................................................................h..h.....................................h.......t......h..hhthh.......t.............h.........................hh....h.t........W..p...................h.....t.....t.thh.t.ht.Wt..........................t................p.........hh..........ht.......a......................p................................h...a.........ch.......h.thtp.p...h..h....pthh.....tt..ah....tt.................h....ph.t..tt.......h..t.t...hhpp....thhpthhthWp..p...tht........p...p...hp..........h......u..hh...t.......p...h......hh.thpt....t.h..t.p.ht.p.h...p.h...........ut.p.......hp.....hh...hhpthhp.....W...h..thcp..+ht...phh..t....hppt...h.p...h.pl...tphhph.........Whp....t....h....t.t....ht...hp.p....p.t..t..h..t........h..hhh.th....hthWttth...........t......t..........t........s.....h....t...p....h...h.t.h......hWh.t..h..th..........t.........h...............h.t.ht.h................................................................................................ 0 59 93 133 +8290 PF08458 PH_2 Plant pleckstrin homology-like region Fenech M anon Pfam-B_7298 (release 8.0) Family This family describes a pleckstrin homology (PH)-like region found in several plant proteins of unknown function. 21.80 21.80 21.90 22.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.36 0.72 -4.12 9 142 2012-10-04 00:02:25 2005-08-26 10:18:09 5 4 18 0 92 132 0 106.40 39 26.64 CHANGED sELlpRsRpGsL+hKpVuVYINKpuQVhLKLKSKHlGGAFoKKpKslVhuVscplsAW..sG+chhpsu...........chhhFGL+Tu.pGllEFcCcSphcpphWspuVpsLLp.sustc ...................s-Lhp+T+cGsL+h+hVSVYI.N.......+p.u...pVhLKhKS+H.luGshoKKKKsVVhsVpp-lsAW...P..GRc...hh-su..........ccctY.FGL+Ts.pGl.lEF-.Ccs.p.tc.hchWspulppLLphss...p.......... 0 11 50 71 +8291 PF08459 UvrC_HhH_N UvrC Helix-hairpin-helix N-terminal Wuster A anon Pfam-B_288 (release 18.0) Family This domain is found in the C subunits of the bacterial and archaeal UvrABC system which catalyses nucleotide excision repair in a multi-step process. UvrC catalyses the first incision on the fourth or fifth phosphodiester bond 3' and on the eighth phosphodiester bond 5' from the damage that is to be excised [1]. The domain described here is found to the N-terminus of a helix hairpin helix (Pfam:PF00633) motif and also co-occurs with the Pfam:PF01541 catalytic domain which is found at the N-terminus of the same proteins. 21.30 21.30 22.80 24.40 20.50 19.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.85 0.71 -4.68 110 4453 2012-10-02 11:25:59 2005-08-31 14:14:15 6 20 4381 10 982 3406 2670 161.00 40 26.34 CHANGED LpchLsLs.ph....PpRIEsaDlSHlpGsssVuuhVVFpsGtspKppYR+apIc....h.............stG......DDauuM+EVlpRRa..p+hhcpp........................................hPDLllIDGG+GQlssAhcllc.cLGl..s..ls.llGlAKspccpst...........lhhss......tcsltLspss.sLhllQ+lRDEuHRFAI .....................................tphLtLs.t.h.....PhRIEsFDhSHh.t.G.ss.s.VuuhVVFps..u.tPpKs-YR+apI+....sl............................................sss.......DDYAuM+EVlpRRa..s+hlc.-.p...........................................................................................................................hPDLIlIDGG+GQlstAppVlp...EL.Gl......s......ls..lhGlAKssc+pss.........................lhhs..s.......t.tcsl....tLs.p.sS.sLaLlQ+lRDEuHRFAI............................ 0 335 662 843 +8292 PF08460 SH3_5 Bacterial SH3 domain Bateman A anon Pfam-B_1108 (Release 18.0) Domain \N 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.11 0.72 -4.06 15 1610 2012-10-02 18:48:24 2005-08-31 14:30:18 5 87 699 2 109 1038 6 64.90 33 24.53 CHANGED .thspsGoahhssppsl...+ssPplouPsthhhppGsplhYDpV.hptDGasWluYsshsGsRtYlPl .............s..hspsGsahhs..p..pssl...+spPpsu.u.s.l...h...s....ha.sGppl.p.Y.Dph.hptDGYp.......WloYh.u.hs.G...p...RRYls................. 1 25 50 77 +8293 PF08462 Carmo_coat_C Carmovirus coat protein Wuster A anon Pfam-B_4180 (release 18.0) Family This domain is found to the C-terminus of the Pfam:PF00729 domain in Carmoviruses. 20.80 20.80 20.80 21.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.40 0.72 -4.09 8 62 2009-01-15 18:05:59 2005-08-31 16:17:01 5 1 4 3 0 62 0 98.80 70 28.49 CHANGED QASNDKVSDGPTYVVPSVNGNELQLRVVAAGKWCIIVRGTVEGGFTKPTLIGPGISGDVDYESARPIAICELVTQMEGQILKITKTSAEQPLQWVVYRM ........QASNDKVSDGPTYVVPSVNGNELQLRVVAAGKWCIIVRGTVEGGFTKPTLlGPGISGsVDYESA.RPIAlCELVT.QMEGQILpITKTSAEQPLQWVVYRM. 0 0 0 0 +8294 PF08463 EcoEI_R_C EcoEI R protein C-terminal Wuster A anon Pfam-B_4136 (release 18.0) Family The restriction enzyme EcoEI recognises 5'-GAGN(7)ATGC-3' and is composed of the three proteins R, M, and S. The domain described here is found at the C-terminus of the R protein (HsdR) which is required for both nuclease and ATPase activity [1,2]. 22.80 22.80 22.80 23.20 22.30 22.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.01 0.71 -4.39 125 1424 2009-01-15 18:05:59 2005-08-31 16:20:10 5 18 1139 0 251 1037 195 157.50 23 17.71 CHANGED pchpca.tthpp..hlppp.hsshssLpplhss....pphppp.LccLppthh..pt.....h..hstp...pl.p.....................t..sslhsll+plh..uh.-....hhshc-+spp.th..ppah......tpts.....hss.pQtcaLch.............................lhcpht..ppGhh-.....cshphss..Fpp..Gs.tplt..hFt........plpphlpclpctlh ............................................phpcY...scc..hltpp..hss.sshpplaps.....tchppt.lccLpphhh..ct......ltppt..shpp................................thchhchlpcl..uh..s.........hshpER...spp.th....spah...................................sphs.....hsppthphLch.............................llcphh-p.Glh-....hcsLphtP...F.pph..Gs..tl...phFsst.p........ph.pslpclpptl........................ 0 75 171 214 +8295 PF08464 Gemini_AC4_5_2 Geminivirus AC4/5 conserved region Wuster A anon Pfam-B_4177 (release 18.0) Family This domain is found in replication initiator (Rep) associated proteins such as AC5 in the Geminivirus/Begomovirus. 25.00 25.00 56.30 55.40 22.80 21.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -7.90 0.72 -4.23 17 174 2009-01-15 18:05:59 2005-08-31 16:21:10 5 2 91 0 0 174 0 43.00 59 26.82 CHANGED MpsIlshhKRLcLThAFTu.tpIhuSlHsVasGLsVHtPVsPs ..MtsIlsthKRLcLThAFTustcIhsSlHsVHsGhuVHGPVsPs. 0 0 0 0 +8296 PF08465 Herpes_TK_C Thymidine kinase from Herpesvirus C-terminal Wuster A anon Pfam-B_4030 (release 18.0) Family This domain is found towards the C terminus in Herpesvirus Thymidine kinases. 20.10 20.10 20.50 32.80 20.00 17.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.38 0.72 -4.44 12 34 2009-09-11 05:18:18 2005-08-31 16:21:50 5 2 26 0 0 33 0 32.70 48 5.80 CHANGED pFpcDlsGlWspIYsQlh+NsuIKs+hlsWsAL .pFpDDlsGhWocIYpQlhpNsAIKophlcWsuL.. 0 0 0 0 +8297 PF08466 IRK_N Inward rectifier potassium channel N-terminal Wuster A anon Pfam-B_4080 (release 18.0) Family This metazoan domain is found to the N-terminus of the Pfam:PF01007 domain in Inward rectifier potassium channels (KIR2 or IRK2). 25.00 25.00 28.60 27.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.06 0.72 -4.05 5 111 2009-01-15 18:05:59 2005-08-31 16:22:24 5 2 41 8 60 91 0 45.20 67 10.61 CHANGED MuuuRTNRYSIVSSEE-GL+LuTMuusNGFGNG..KVHTRR+CRSRF ......MuusRsNRYSIVS.S.EEDGh+LsTMu.....sA.NGFG.NG...KV....H.T.RppCRsRF.. 0 3 11 28 +8298 PF08467 Luteo_P1-P2 Luteovirus RNA polymerase P1-P2/replicase Wuster A anon Pfam-B_4011 (release 18.0) Family This domain is found in RNA-dependent RNA polymerase P1-P2 fusion/replicase proteins in plant Luteoviruses. 20.50 20.50 86.80 86.70 19.30 19.20 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.21 0.70 -5.65 4 151 2009-01-15 18:05:59 2005-08-31 16:23:28 5 2 10 0 0 154 0 332.90 77 58.24 CHANGED h.F-.LIsASA+sVKDFISaCYsRh+SlYYuhKRWLhElpGpFcAHDAFVsMCastMhsIE-FEtELAEEauptEsEVp.AcshhKhLVAptu...........sGspcuhsDF..ltuRuGsasPl..............uCcs+Spp..s+s-Kh.pLlccpclh...Es+ht+sY.cEhGcu.hspW.NsL.oRhphlKcstccttcNAKtAp+lss.hptsssIsDhhuhsEVspV-TG.ph.spKshpGEEhspspPhlc.VRRlK..sEstssApsaIpppI+.pN.pL.sus-lShATIsRYs.phsEchcLDlsSpThLhphAMh.VPlPpppDIctthllQSPsARplR-chsVLsSpsF .hFFElLIGASuKAVKDFISHCYSRLKSIYYSFKRWLMEISGQFKAHDAFVNMCFGHMADIEDFEAELAEEFAEREDEVEEARSLLKLLVAQKS............KoGVTE.AWTDF.FhKSRGGVYAPL.....................SCEPT+QELEsKSEKLE+LLEEQHQF...EVRAAKKYIKEKGRGFINCW.NDLRSRLRLVK-VKDEAKDNA+AAAKIGAEMFAPlslQDLYSFTEVKKVETGLhKEVlKEhNGEEEK+LEPIhE-VRSIKDTAEuRDAASTWITETVKLKNSTL.sADELSLATIARYVENVGDKFKLDIASKTYLKQsAoMSVPIPTsKDIKhKMVLQSPEARA+RERMDVLDSsGF 1 0 0 0 +8299 PF08468 MTS_N Methyltransferase small domain N-terminal Wuster A anon Pfam-B_4172 (release 18.0) Family This domain is found to the N-terminus of the methyltransferase small domain (Pfam:PF05175) in bacterial proteins [1]. 21.10 21.10 21.20 21.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.76 0.71 -4.38 19 926 2009-01-15 18:05:59 2005-08-31 16:25:02 6 2 917 1 125 547 57 151.90 52 45.12 CHANGED SpVlhRphchFps+pVLlAGplpDphPtpLsthspplpsao..aaas.ththptpsslphp..hsspts..t.sDhllhYWPKuKpEAca.LttLlupLshGpEIhlVGENRuGV+Ss-KhLssa.GplsKlDSARRCuLaas.plpppPp.FsLcsaa+pY ...................................................SEVLLRHuDpFppp+lLFAGDlpDDLPAcLc.ss..u..u..+.uc.s.p..........aHHap.sl.up.p.......h...s.-..ss+Fu........Ls.A.p...s..ss...s......u..ssDTLIYYWPKsKsEApFQLhsLLShLPsGs-IFVVGENRSGVRSAEpM.L.A.-Y.u.s.l.sKl.DSARRCGLYaG.+LE+p..Pt..F-h-paWtpY.................... 0 20 45 88 +8300 PF08469 NPHI_C Nucleoside triphosphatase I C-terminal Wuster A anon Pfam-B_4183 (release 18.0) Family This viral domain is found to the C-terminus of Poxvirus nucleoside triphosphatase phosphohydrolase I (NPH I, [1]) together with the helicase conserved C-terminal domain (Pfam:PF00271). 21.00 21.00 21.60 21.00 20.70 20.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.83 0.71 -4.31 13 67 2009-01-15 18:05:59 2005-08-31 16:26:02 5 4 49 0 1 57 7 146.10 57 24.20 CHANGED HhssP.ERRYVNVHFIIA+hosGcsoVD--LL-II+sKS+EFsQLF+VLKpoSIEWIasppKsFpPVDDEoGacsLhSRsl..D-sstosphh+ltpGpNIWYSpSschloIhKGFKscD.G+IYDs-Gsalpshs-N...Pll+IcssKLlYI ...................HVhTPPERRYVNVHFIhARLSNGhsTVDEDLh-IIpoKSK.EFsQLFRVhK+oSlEWI+sspK.DFSPlDsESGWcsLlSRuI..DhssKpshssKLl-GpNIWYSsSsRLhoIp+GFKssD.GRlYDsDGNaLpsMPDN...PlIKIHsGKLlYI......................................................................... 0 1 1 1 +8301 PF08470 NTNH_C Nontoxic nonhaemagglutinin C-terminal Wuster A anon Pfam-B_4024 (release 18.0) Family Bacteria of the Clostridium genus produce protein neurotoxins, which are complexes consisting of neurotoxin (NT), haemagglutinin (HA), nontoxic nonhaemagglutinin (NTNH), and RNA [1, 2]. The domain described here is found at the C-terminus of the NTNH component. 23.90 23.90 24.20 26.30 21.70 23.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.61 7 75 2009-09-10 15:40:16 2005-08-31 16:27:13 5 3 29 2 3 60 1 162.30 68 14.08 CHANGED NYFssLNNSYIRDuscERLEYNKsYpLYNYVFP-sslhEVppNNNIYLSIpNpsNLNlpssKFKLlslssNKQYVQKWDEVIIsVLsspEKYlDISsENNRIQLVssKssA++hIlNNDIFhsNCLThuaNNKYlsLSh+spNYNWMICNssppIPKtAaLWILK ................NYFpsLNNSYIRDSscERLEYNKTYQLYNYVFs-pslhEVppNNNIYLoINNTNNLNlQuuKFKLlsIssNKQYVQK.....aDEs.IIslLDshEKYlDI.S.EsNRlQLlssKssAKKhIIsNDIFISNCLThoYNsKYlsLShKDcNYNWMICNNspplPKtuYLWhLK................. 0 1 3 3 +8302 PF08471 Ribonuc_red_2_N Class II vitamin B12-dependent ribonucleotide reductase Wuster A anon Pfam-B_4121 (release 18.0) Family This domain is found to the N-terminus of the ribonucleotide reductase barrel domain (Pfam:PF02867). It occurs in bacterial class II ribonucleotide reductase proteins which depend upon coenzyme B12 (deoxyadenosylcobalamine) [1]. 21.60 21.60 22.00 32.80 20.30 19.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.16 0.72 -3.90 47 480 2009-01-15 18:05:59 2005-08-31 16:31:44 5 13 471 0 161 391 512 99.30 52 8.97 CHANGED DGolVFc.cslEVPppWSQlAsDllAQKYFR..KsGlPt+..................t.t..........hssEsSh+QVhcRlsGsWshWG.hKsGYFso-sDAcsFaDEhtahLhpQ ....DGolVFc.cslEhPstWSpsAusIlApKYFR...psGsPs+...............................................EsSh+QlhDRlssTashhG.hK.sGYFs.S....--DApsFt-ELsahLspQ....... 0 64 112 136 +8303 PF08472 S6PP_C Sucrose-6-phosphate phosphohydrolase C-terminal Wuster A anon Pfam-B_4159 (release 18.0) Family This is the Sucrose-6-phosphate phosphohydrolase (S6PP or SPP) C-terminal domain [1] as found in in plant sucrose phosphatases. These enzymes irreversibly catalyse the last step in sucrose synthesis following the formation of Sucrose-6-Phosphate via sucrose-phosphate synthase (SPS). 20.20 20.20 21.40 21.80 19.90 19.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.88 0.71 -4.54 15 99 2009-01-15 18:05:59 2005-08-31 16:33:44 5 8 33 0 46 100 2 119.50 41 30.38 CHANGED GPNlSPRDl.DF.s..csKhE......shsPuc-VVKFYLFYEKWRRAEVEpS-ha.....lsslKulscPuGVhlHPSGlEpSL+ssIsu.L+sCYGDKQGKpFRlWVDpllsTplGSsoWLVKFDKWEhsG-ERpCClTTllLosK ...............................GPNlSPRD......l....-h.s........c.c...........shpPuctVV+FYlhaE+WRRuEV.po-th.....hphhKslscssGshlHPuGhEpsL+ssI-s.LtspYGDKpGKpaRhWVDplhssp..huossWL...V+FcpWEh.p.Gpth.sChoohhls.K............... 0 9 32 39 +8304 PF08473 VGCC_alpha2 Neuronal voltage-dependent calcium channel alpha 2acd Wuster A anon Pfam-B_4072 (release 18.0) Family This eukaryotic domain has been found in the neuronal voltage-dependent calcium channel (VGCC) alpha 2a, 2c, and 2d subunits.\ It is also found in other calcium channel alpha-2 delta subunits to the N-terminus of a Cache domain (Pfam:PF02743). 20.80 20.80 20.80 21.00 20.70 20.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.24 0.72 -4.21 2 161 2009-01-15 18:05:59 2005-08-31 16:34:57 6 7 49 0 67 128 0 91.60 59 9.28 CHANGED LDAEhEs-lKV-IRppMIDGEpuphpF+TLhKSQDERYIDKG.RTYTWoPVsssDYSLALVLPsYSh.YIKAplt-TITQA+........SEoL ......LDAELEs-.K.EIR+pMIDGcsG-+p.h+TLVKSp...DE.....RYIDcssRTYTWsPVpGTDY....S....L..uLVLPs.YShaYIpAp...lp-sIhQs+.h...................................... 0 5 12 32 +8305 PF08474 MYT1 Myelin transcription factor 1 Wuster A anon Pfam-B_4029 (release 18.0) Family This domain is found in the myelin transcription factor 1 (MYT1) of chordates. MYT1 contains C2HC zinc finger domains (Pfam:PF01530) and is expressed in developing neurons of the central nervous system [1] where it is involved in the selection of neuronal precursor cells [2]. 20.60 20.60 20.70 22.00 17.70 20.30 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.82 0.70 -4.89 3 273 2009-01-15 18:05:59 2005-08-31 17:44:20 6 25 37 0 104 204 0 181.60 45 24.52 CHANGED YRsNVAsTTPRANLAKELEKFSKVoFDYASFDAQVFGKRhlAPKlQTu-sSPKuaQcAKPFsKssSPcSSsoSSYV+SSSSssSu..GupspSTsptSSFDYoQDuEAAHM.AATAILNLSTRCREMP-NLSTKPQDL.soKusDIEVDENGTLDLSM+KpRhRDpuhPsoSSCooIsTPpSP.SPQ+p......SSulsNspsaQ.Lu-QDsWDlPlDYT..KP+RlcEEEsKEp-Psshs.upEsLEE++auGEsoIPSPKPKacpRK .....................a+ss.sssoPRusLuKE.EKauKssF-Y.sSaD....s....p....s...aGKR...hPh.l.p.s...pph.....p.....................................................................................................s.asYsps.EssHh.AAsAILNLSTRChE..h.sp.LSsKPQs.L....s+ss-...hEVDENGTLDLSMpKp+.t-.........ssss....ss.p..ss.pp..............hhsst..Q.hsc.t-tW-..hPlsYo..KspthpE.-.-...c...............-.....................p.......s.....................s...........................................................pt........h--pchstE.............................................. 1 4 12 39 +8306 PF08461 HTH_12 Ribonuclease R winged-helix domain Bateman A anon N-terminus of DUF128 family Domain This domain is found at the amino terminus of Ribonuclease R and a number of presumed transcriptional regulatory proteins from archaebacteria. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.09 0.72 -4.24 10 1050 2012-10-04 14:01:12 2005-08-31 18:07:20 5 14 1020 0 197 812 52 64.30 54 8.60 CHANGED lEILsILuEuccPlGAKhIApELcpR.GYcIGERAVRYHL+hLDEcG...LT++lGYu....GR.ITEKGl-EL .................E.FIL-aL...sc...+c...p...PA.SR-ELAsELplcsEEQl..EuLRRRLRA.MERDGQLVF..T.R....R.Q..C.Y.A................LPE+.LD.......................... 1 42 102 154 +8307 PF08475 Baculo_VP91_N Viral capsid protein 91 N-terminal Wuster A anon Pfam-B_4034 (release 18.0) Family This domain is found in Baculoviridae including the nucleopolyhedrovirus at the N-terminus of the viral capsid protein 91 (VP91) [1]. 21.80 21.80 159.30 152.90 19.40 17.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.18 0.71 -4.92 27 63 2009-01-15 18:05:59 2005-09-01 10:08:50 5 2 56 0 0 65 0 186.40 39 23.89 CHANGED LLLlsIlllllFhlhahhIhs-FsEssFssRLpVlpEYh+pss..u-pPlPssLuYVScV..ssshYhVThFsTpsLpphppplHDDphEhFsFlpQpFp......................ss.sspsRVpspssDsscFhl+uDDG..lphcC..PpstpFDss..cCVPlssC.scssGp.hPlTEchlDpLVhN++ss+p....t.sspphHPT .LLLlAIlllllFslhYlhIhs-FsEssFssRLpVlsEYh+RTN..A-pPhPcsLuYVS-V..spphYhVThFsTssLsslppolHDDphEpFsFlpQpFp......................ssssspsRVpsp.ssDsscFhl+GDDG..hphcC..PsstpFDtst.+CVPlPsCts+ssGp.YPlTEchlDsLVhN++ls+s.....t..ssspthHPT..... 0 0 0 0 +8308 PF08476 VD10_N Viral D10 N-terminal Wuster A anon Pfam-B_4155 (release 18.0) Family This domain is found on the N-terminus of the viral protein D10 (VD10) and the related MutT motif proteins [2]. The VD10 protein is probably essential for virus replication [1] and is often found to the N-terminus of a Pfam:PF00293 domain. 25.00 25.00 52.70 52.70 21.00 17.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.14 0.72 -4.49 7 51 2009-01-15 18:05:59 2005-09-01 10:13:46 5 2 38 0 0 39 0 44.80 58 18.60 CHANGED hpha.Sslhs.IhppNR+LoKTalhpDssQ+lpsTuFspQpLchh .MsaY+SSllSpIIKaNRRLuKohIhcDDSQhITLTAFVNQsLasH.. 0 0 0 0 +8309 PF08477 Miro Miro-like protein Fenech M anon Pfam-B_1154 (release 17.0) Family Mitochondrial Rho proteins (Miro-1, Swiss:Q8IXI2, and Miro-2, Swiss:Q8IXI1), are atypical Rho GTPases. They have a unique domain organisation, with tandem GTP-binding domains and two EF hand domains (Pfam:PF00036), that may bind calcium. They are also larger than classical small GTPases. It has been proposed that they are involved in mitochondrial homeostasis and apoptosis [1][2]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.61 0.71 -3.61 17 1563 2012-10-05 12:31:09 2005-09-01 11:57:45 8 235 397 7 1022 40314 7022 116.40 20 16.79 CHANGED +lsllGctssGKoSllpphhttphs.............h-hpssshshp.....hhs.ssst...........h...-.stt.......hthp......hppusuhlllashsctpohpplp.hh.hlsphcp.t.t.lPllllusKhD .....................................................................+lhllG.s...t..u..sG...K.......o......o..L......l.p..p....h..h..s..t..s.hs....................................................................p..t..h...t...p..s...p...h...s....h.p................h.....h..p..s.p..pph...............................................l...l.h.....-....h....u....s.p...................................t.t.h...t...h.....t.................................hp.......t....s.....c.....s......l....l.....l......l.........a....s......h.........s........c........t................p........o.....h....p..........t.....l..............p.............h.........h..................h...........l.........................p.............h.........p...........t..............................................p.....l......P.....h..l..l.l.u.s+.D...................................................................................................... 0 351 546 791 +8310 PF08478 POTRA_1 POTRA domain, FtsQ-type Fenech M anon Pfam-B_1605 (release 7.0) Family FtsQ/DivIB bacterial division proteins (Pfam:PF03799) contain an N-terminal POTRA domain (for polypeptide-transport-associated domain). This is found in different types of proteins, usually associated with a transmembrane beta-barrel. FtsQ/DivIB may have chaperone-like roles, which has also been postulated for the POTRA domain in other contexts [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.83 0.72 -3.93 92 3670 2012-10-01 23:48:22 2005-09-01 14:28:48 5 6 3629 6 775 2508 1251 69.80 23 23.53 CHANGED hslcplplsG.sphlsppclhphhslph..tsshh..tlshsplppplpp.hPalcpspVp+p.aPs.plplplpE+ ........................lpplplp...G..s.....ph..sssp..clpps.h.thts.......ssshh...........ph...c...hsth.p..pplcp..hPa....lcpsp.Vc+p..aPs....plplclpEh............... 0 252 491 639 +8311 PF08479 POTRA_2 POTRA domain, ShlB-type Fenech M anon DOMO:DM07489; Family The POTRA domain (for polypeptide-transport-associated domain) is found towards the N-terminus of ShlB family proteins (Pfam:PF03865). ShlB is important in the secretion and activation of the haemolysin ShlA. It has been postulated that the POTRA domain has a chaperone-like function over ShlA; it may fold back into the C-terminal beta-barrel channel [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.15 0.72 -4.31 60 1933 2012-10-01 23:48:22 2005-09-01 14:29:39 6 19 922 6 495 1965 284 75.00 24 13.68 CHANGED aslpplp.lpG....sp.....hls.t.plpplhpshhu+slshsslspLspploshYlp+GYlToRshl.ssQ..sl.ssGt.LplpVlEG ................................................lpplp..lpG........sp...........hstt...tl...p....phh..p....s...h...h..Gc.slshpslp....t....lh...ptlsphhh.s+GYlT...ocshl...P...tQ.....sl..ps.G.h..lplpll.G........................ 2 80 250 391 +8312 PF08480 Disaggr_assoc Disaggregatase related Wuster A anon Pfam-B_4000 (release 18.0) Family This domain is found in disaggregatases and several hypothetical proteins of the archaeal genus Methanosarcina. Disaggregatases cause aggregates to separate into single cells [1] and contain parallel beta-helix repeats. Also see Pfam:PF06848. 20.90 20.90 21.30 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.59 0.71 -4.87 21 34 2012-10-02 14:50:22 2005-09-02 16:30:26 5 12 6 0 31 42 2 190.60 46 24.16 CHANGED MsDIEIYNNsIasThusGIWlhGYs.uuYoKspAtsVaIHHNhFYsTGTNsuhsWlGGIVssGF.sTLIENNVFDGsYsAAIs.phhspt.....phuPsG..sGYsThVRNNIIsNTh......spusuGoGYGlhNhL.spTHoFlLpNNClYNNuuGsYtsss.SsoDIhsDPhas-pppHDYHLKS.sG+WsGpsWVpDtlsSPCIDAG ........................MsDIEIYNNhIasTaGPGIWlhGht...suYsKsputsVaIHHNhFYsTGTNss...l.....pWlGGIl..sS.GFhs...oLIENNVFDGsYpAAls.phYsst..............thuPsG..oGYT.ThVRNNIIsNTh.R....ppsssGoGYGlh....NhL.scoHsFlLcNNClYNNuuGsYpNss.SsoDI.sDPhFssppp+DYHL+SpsGpWssptWhpc.h.SPCIDsG................................ 0 10 10 10 +8313 PF08481 GBS_Bsp-like GBS Bsp-like repeat Wuster A anon Pfam-B_2122 (release 18.0) Family This domain is found as a repeat in a number of Streptococcus proteins including some hypothetical proteins and Bsp. Bsp is a protein of group B Streptococcus (GBS) which might control cell morphology [1]. 20.00 20.00 20.20 20.10 19.70 19.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.53 0.72 -4.11 50 1326 2009-01-15 18:05:59 2005-09-02 16:33:46 5 104 156 0 188 1240 1 91.80 34 44.51 CHANGED lslpststpsGsaclhlssl.sssslpsVtlPlWS-pNsQDDlpWYsAs+pssGoapsslchssHpsp.GpYplHlY...shssGphhGlsuTs.hpV ..............................lp.pstpsusa-lhlosl...s..s..p.s.lppVplPsWScpNGQDDlh..WYsAs.+....ps...cGo.YpsslphssHcsptG...pYplH..lY..hhpssGphhG.lsuTshp................................ 1 19 53 118 +8314 PF08482 HrpB_C ATP-dependent helicase C-terminal Wuster A anon Pfam-B_2170 (release 18.0) Family This domain is found near the C-terminus of bacterial ATP-dependent helicases such as HrpB. 25.00 25.00 34.60 33.70 23.40 22.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.65 0.71 -4.02 136 1495 2009-01-15 18:05:59 2005-09-02 16:40:32 5 8 1469 0 345 1253 181 132.00 52 16.94 CHANGED WLtPaLsGlpshsplpplclhsuLpuhL.sWs..ppppL-phsPs+hssPoGs+lsI-Y..............ssst..PsLuVRLQEhFGhspsPplu........sG+lPlhlcLLSPAtRPlQlTpDLsuFWpuoY.t-V+KEMRGRYPKHsWP-DP ..............................WLhPah...s...GlpsLp.sL.psl-lhpALcuLL....sWs...hpQ..+LDp.hPsHasVPoGS+.lsIcY.......................................p.p.-s.....sPsLAVRhQEhFG.s.ssPslA........pG.RVPLlLcLLSPApRPLQlTcDLuuFWpG.uY.t-VpKEM+GRYPKHsWPDDP............. 0 98 201 282 +8315 PF08483 IstB_IS21_ATP IstB_N; IstB-like ATP binding N-terminal Wuster A anon Pfam-B_3188 (release 18.0) Family This bacterial domain is found to the N-terminus of the Pfam:PF01695 like ATP binding domain in proteins which are putative transposase subunits [1]. 20.80 20.80 20.80 20.90 19.60 20.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.00 0.72 -7.05 0.72 -4.27 15 366 2009-01-15 18:05:59 2005-09-02 16:44:39 6 4 226 0 141 347 108 30.00 42 12.31 CHANGED cEQtspsss.ssLSF-ERLGLLlDRElspR- .......cQhspPsh.ppLuFEERLuLLl-cEhspR-..... 0 49 87 116 +8316 PF08484 Methyltransf_14 C-methyltransferase C-terminal domain Wuster A anon Pfam-B_2106 (release 18.0) Family This domain is found in bacterial C-methyltransferase proteins. This domain is found C-terminal to methyltransferase domains such as Pfam:PF08241 or Pfam:PF08242. But this domain is not a methyltransferase. 20.20 20.20 20.20 20.40 20.10 19.80 hmmbuild --amino -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.92 0.71 -4.68 24 606 2009-01-15 18:05:59 2005-09-02 16:47:16 6 15 457 10 185 611 1437 149.00 30 37.09 CHANGED lssHGGSlRlhhs+pssppsss....pVscllspEpstGLsphssYppFucRlcph+cpLlsaLtpt+spG+plhGYGAssKGNTlLsaCGlspc.lsalsDpsshKpGpho.PGo+IPIhss-chtshcPDalLlLsWNap-EIlpcppthhstGG+hlhPlP ............................................shaGGSlphhht...t.s.t.......t.......tltthhttE............thslpp..t.ha.tt.Fspcspp.h+ppll.......phLh.p.h+s.pG.+.plsuYGAsuKGs....TL...LNa.s...Gl.ss...-.h.lsallDp.N.PtKpG.+ah..PGo+.IPIhsPc.p..l.pp..p..p.P..D...h..l.ll.L.s.WNhp...-EIhpphp..h.h.p..Guphlh.lP............................... 0 62 135 158 +8317 PF08485 Polysacc_syn_2C Polysaccharide biosynthesis protein C-terminal Wuster A anon Pfam-B_4073 (release 18.0) Family This domain is found to the C-terminus of the Pfam:PF02719 domain in bacterial polysaccharide biosynthesis enzymes including the capsule protein CapD [1] and several putative epimerases/dehydratases. 25.00 25.00 27.40 26.90 23.10 21.70 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.37 0.72 -4.46 50 564 2009-01-15 18:05:59 2005-09-02 16:49:37 5 3 510 0 93 379 199 47.90 54 14.07 CHANGED DsRDLNYsKYFpEG-pcloph...-DYsSHNTcRLsV-thKcLLLcL-aIp ..DsRDLNYsKYhcpGsc+loph...p-YNSc.NTchLsVEplK-hLLpL-al+.... 0 30 61 79 +8318 PF08486 SpoIID Stage II sporulation protein Wuster A anon Pfam-B_1108 (release 18.0) Family This domain is found in the stage II sporulation protein SpoIID. SpoIID is necessary for membrane migration as well as for some of the earlier steps in engulfment during bacterial endospore formation [2]. The domain is also found in amidase enhancer proteins. Amidases, like SpoIID, are cell wall hydrolases [1]. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.40 0.72 -3.72 160 1665 2009-09-10 14:51:08 2005-09-02 16:51:50 5 45 1176 0 415 1429 537 101.00 27 22.16 CHANGED lhst.ts..php.....hlsp.lslEcYLtGVVuuEMsusas.hE...ALKAQAVAARTYAltphtp........................................ttps...hclss.ospsQVYtu............hpstp...spsp......pAVpsTpGpV.LsY ...........................h....ttphphls.p.lsh.E.-YltuVlssEhs..s..oa..s....hE.............ALKAQAVAuRTa....slp.phtp...................................................ptp.s...h.s.lss...ostsQs.Yts................................hpt....p..hp.........pAlptTtG.llh........................................................ 0 188 317 379 +8319 PF08487 VIT Vault protein inter-alpha-trypsin domain Wuster A anon Pfam-B_2015 (release 18.0) Family Inter-alpha-trypsin inhibitors (ITIs) consist of one light chain and a variable set of heavy chains. ITIs play a role in extracellular matrix (ECM) stabilisation and tumour metastasis as well as in plasma protease inhibition [1]. The vault protein inter-alpha-trypsin (VIT) domain described here is found to the N-terminus of a von Willebrand factor type A domain (Pfam:PF00092) in ITI heavy chains (ITIHs) and their precursors. 21.30 21.30 21.40 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.27 0.71 -4.41 18 1019 2012-10-10 13:59:34 2005-09-02 16:53:43 5 50 337 0 559 925 68 115.20 29 13.23 CHANGED sslslhShpVcSploSRaA....+TsVoScslNpuspspEstFplplP+pAFIoNFohhIsGpsasGpIK-KptApp.Ypc.ApucG+oAuLV+ssupshEpFpsSV.sVsssoKVsFcLsY.pE ..............................h.lhshplps.pl..suphA....psslspphh....N.ps......s.p.s...h..Es.ha.h.LPc.sAhlssFph...........p..l.s.s.c.s..h..hGplKE.Kp....pA..+p.pYcp.....Ah.s.pG..c...sAuL..l......c.......p.s.s.t.s..h..............F.pssV..Nlsss.sc.lphpLpYt......................................... 0 184 259 391 +8320 PF08488 WAK Wall-associated kinase Wuster A anon Pfam-B_4138 (release 18.0) Family This domain is found together with the eukaryotic protein kinase domain Pfam:PF00069 in plant wall-associated kinases (WAKs) and related proteins.\ \ WAKs are serine-threonine kinases which might be involved in signalling to the cytoplasm and are required for cell expansion [1]. 21.30 21.30 21.60 22.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.65 0.72 -3.71 14 91 2009-01-15 18:05:59 2005-09-02 16:56:30 6 13 11 0 77 112 0 106.30 25 17.43 CHANGED sp.hCsGhtCCQAp.lPspp.QlIGVsIEsssstspst.tGC+V.AFLTscpYu.SNsT-PEphaupGYssVELGWahpTosppFhss..LuCpNhs-......Yssss...........pChCcY ...................................................t.CsGhsCCQsp.lP.st..h......Q.l......hs.....sslps...p.s..t.s.psp..ssCch.AFLs-c...p.......a...h.h.s...s....hosP.p.ph..p....st.....tYss.l.LsWhh..p...s.......s...s......p....h..........ss......hsCps.p..............hsttp.............pC.Cp....................................... 0 16 52 60 +8321 PF08489 DUF1743 Domain of unknown function (DUF1743) Wuster A anon Pfam-B_4001 (release 18.0) Family This domain of unknown function is found in many hypothetical proteins and predicted DNA-binding proteins such as transcription-associated proteins. It is found in bacteria and archaea. 20.80 20.80 21.10 24.00 18.30 17.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.42 0.71 -4.34 59 216 2009-01-15 18:05:59 2005-09-02 17:02:30 6 3 149 7 150 227 81 117.20 29 29.78 CHANGED sRGllGAhAulut.................caTaEllsYRhsp.hsp.pRplshpslht.hctcphPhsacslD.htcchllsP+sssPVLaGI...RG.s.tslhpstphl.ps.E.....phpthtIahTNQuTDsH ...s+GllGAhuulut.h..........tcaTaEhlsYRtscphsp.pRplstsolht.hcppshstsacslDhhpcchllsP+ossPVLaGI...RGhs.tslhpstphl.pstE.........hpthtlahTNQsTD.H.............. 0 35 93 125 +8322 PF08490 DUF1744 Domain of unknown function (DUF1744) Mistry J anon Pfam-B_5917 (release 18.0) Domain This domain is found on the epsilon catalytic subunit of DNA polymerase. It is found C terminal to Pfam:PF03104 and Pfam:PF00136. 20.40 20.40 21.50 21.10 20.30 19.40 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.14 0.70 -5.89 17 355 2012-10-02 01:06:00 2005-09-05 12:53:33 7 10 276 0 260 359 14 351.10 32 18.13 CHANGED slpplYpchhpphhp..t.tphtphh.hs-shsFplphhostcphh+plschlpph+cp+ss.sllhlQSsh.hppltppl.hLspFPhlpl..s.s-sslssLsWQphhu++hlpHaLslusWlsphlphu+YuclPlsNlphDshtFhlDlhaAR+LpppNhVLWWSss.shPDhGGhEpD...phsh.h-clt....PslNsPGhYsslslElplpsLslNolLpSullNEhEGus.s..s.sh..ps..........s.sstssasEsuhsssuhpVL+phlKcWac-.uhpsNshAD.l......lpphhpWVpSssShLaDsuL+halcsLh+KshLQLluEF+RlGuplIaAshN+lllpTsKhslpsuhAYupYll+ul+o+slFpalcLplt+YWDhLlWMDpaNaGGh ...............................................................................................................thatt.h.hh.t.........t..t..h....pphpFc.sp..sshcphhctl..........pchltth+p..p...c.....p...t..s.s....llslQS..s.....................h.........p........pL..h.pt..ls.h.Lp-aPhl..l......................s.....sc..ths...s.............L..sWQphsu+phlp+ahsh.sph.lpp.hhphu.R.......YhclP...lsNl.........t.............D..shahhDlh..aARpLpppshlLWhS..ss.shPDhGGh...EtD.......phhh..h.-pht......................splNpsGsYso..VClELclp.s.LAl.Nsl.L..pSshls-.h..E....G.u..ssh....hs.ss...tt...............................ttss.ss.sta...s-.s.uhssssh.pl.L+pMVp.sWhp-....s......tt...t...N...h...hA....D..........l.+.......hhR.WlpSs..sShL.aDsuL+phlpth......h+KhFhpLluEF.+.RlGupllaAs..hs+lllpTsKtplt.sA........hA.......YspYllpol+s.....+...........lFchlslphpchWchL.lWhDthNaGGh............................................................................ 0 97 148 220 +8323 PF08491 SE Squalene epoxidase Wuster A anon Pfam-B_3107 (release 18.0) Family This domain is found in squalene epoxidase (SE) and related proteins which are found in taxonomically diverse groups of eukaryotes and also in bacteria.\ SE was first cloned from Saccharomyces cerevisiae where it was named ERG1. It contains a putative FAD binding site and is a key enzyme in the sterol biosynthetic pathway [1]. Putative transmembrane regions are found to the protein's C-terminus. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.82 0.70 -5.57 13 429 2012-10-10 17:06:42 2005-09-06 09:10:11 5 17 277 0 255 1836 818 239.90 39 51.57 CHANGED aAsLTlVCDGhFS+FR+sLs.sspspVsSpFVGLlLcNscLPpssHGHVILus.suPlLlYQISSoEsRlLsshsupplPs.sss-ltpYLcssVtPplPccLpsSFhpAl-cu.plRsMPNpaLPAs.sss...pGlllLGDAhNMRHPLTGGGMTVuLsDllLLp+LLpPl...DLsDcpplschlpo.FahtRKshsu.llNTLuhALYpLFsAsscph+.tLcpGCFcYhphGG.ClsGPluLLuGl.P+PhhLhtHFFuVAlYulhp.hhsts.hhhPhulh ...................................................................................................................................AsLTllsDGhhSp....hR..........+..p..lh....s............p.....s.........p.....l.......S..p....F.l.....Gh......l..........s.........s...........h.....P...........h...........s......p........+.G...c.V.l.....L.u.......s...s....s.................Pl......L.hY.p.I.u..s.........p...E.....sR.h......L.l.D..l....s......p..............h.P..s.................p......h.p..p....a..hpp...h..l.......h......P.p...l......P...t..p...l..p.....s.Fh....t...A....l...p......p....u....p..........l.R...o....M.....P..N...p.h....h....P...s...s...........tp........G....hl.llGDAhNMRHPLTGG.GMTVAhsDlll....L....p.p.LL..p.sh...................sL.....t........D.........t..t.........t..l..h...p..h..h.p....p....F.aht.R.K.s....h.s..o..slNhL..A..t.A...LY.pl..Fu.....A.s..................sp...............p.h....+.thppuCFcYhphGG.hssGPluL.Lu.Gl......spP.h.LhhHFFuVAhaulhhhh.h...s..................................................................................... 0 73 152 207 +8324 PF08492 SRP72 SRP72 RNA-binding domain Zwieb C, Rosenblad MA, Bateman A anon Pfam-B_7529 (Release 18.0) Domain This region has been identified as the binding site of the SRP72 protein to SRP RNA [1]. 25.00 25.00 25.10 26.10 23.50 24.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.43 0.72 -3.60 19 306 2009-01-15 18:05:59 2005-09-06 16:14:07 7 56 261 0 211 304 1 65.40 35 10.29 CHANGED uupthpppuspt...................................pppc...++KRKs+.hPKsa...s.sshpP.........DPERWLPh+-RSsYRs ..............................................ts...............t............................pthpppc.t...K+KRKs+..LP..Ksa........c.Ps..hsP.........DPERWLPh+..-RSsYRs..... 0 70 114 173 +8325 PF08493 AflR Aflatoxin regulatory protein Wuster A anon Pfam-B_2081 (release 18.0) Family This domain is found in the aflatoxin regulatory protein (AflR) which is involved in the regulation of the biosynthesis of aflatoxin in the fungal genus Aspergillus [1]. It occurs together with the fungal Zn(2)-Cys(6) binuclear cluster domain (Pfam:PF00172). 20.80 20.80 20.90 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.88 0.70 -5.36 5 174 2009-01-15 18:05:59 2005-09-06 19:28:07 5 4 87 0 46 171 1 201.60 32 56.62 CHANGED GlssPsTsSoPsh.PAsTsAsTTSostPpssspupPsu.p.........PPlsTP...hTPssTSusSPchsp.QSPPspsElWGuhLSPssSs.tsoDLSSLlSVsoDFGsLFuSl.ss.L..pDGsDAD.hhAcuhGsL.sA.hsVuosMpDlFssuAspPPpSscsocsh......CLSlsL-TLp+LFPcAPlGCQ..+sDuE-SSu+LsTIESVISDNKcAhDTlpsIL-CsCAQDGYlLSLVSLIVLKVLGWYlAAARsQsouTscsGshsp-outcSRRsSSSSF......EEcVLH ..................................................................................................................................................................................................................................................................................tP....o....st.sSsh.sp...pSPP...........-h.s..Luss.hus..pssL....SS.L..olso.....pFut.......ht.Sh................cu.cs-...h.A......huuL....s......A.....s.uo.h..s.....h...tsAs.ss.ps....p.hs..................C.hslsLphLhpLa.s....p.ushs...Cp.............hsstp....ss.s.s......p.hhT.....h-....VlpsNKpsh-slpplLsC.s.C..up.Dthlh..hhs.hhh.+llthY.........ss...ht............................................................................................ 0 8 26 42 +8326 PF08494 DEAD_assoc DEAD/H associated Wuster A anon Pfam-B_4150 (release 18.0) Family This domain is found in ATP-dependent helicases as well as a number of hypothetical proteins together with the helicase conserved C-terminal domain (Pfam:PF00270) and the Pfam:PF00271 domain. 20.20 20.20 26.50 25.30 20.10 19.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.95 0.71 -4.57 178 1433 2009-01-15 18:05:59 2005-09-06 19:34:35 6 13 1220 0 512 1383 597 187.00 30 15.93 CHANGED hlPsscplllEph...ptt...stpllhashhG+psppsLuhllut+lscphshslshsssDYuhhlhsspp......................htc.....lhp.......-plpc...hlppslspotlh+p+FRpsAthuGll.+ph..Gt.phsspp.phsushLhps.h...............ct.ss.p.llppshcEshpchhDlsplpphLc+lppu..clplht...hspsSPhAh ................................h.lPsscpLllEphh....chu....caplll+ushG+plppshuhhlutRlppths.h.shs...hsusD.Glslphscs................................sp.shsp........lat.h.........s-pltp......hlpptl.ssSsLhttRFRcsAspuhLls+ptP.G+.c.pshh........Qp....phpustLlcl..t.................pth.P-asllhEohREsLp-hhDlstLtcllp+.lptu....clpltp....sspPSPhA....................................... 1 148 309 423 +8327 PF08495 FIST DUF1745; FIST N domain Borziak K anon Borziak K Domain The FIST N domain is a novel sensory domain, which is present in signal transduction proteins from Bacteria, Archaea and Eukarya. Chromosomal proximity of FIST-encoding genes to those coding for proteins involved in amino acid metabolism and transport suggest that FIST domains bind small ligands, such as amino acids [1]. 22.10 22.10 22.20 22.10 21.70 22.00 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.06 0.71 -5.17 138 1150 2009-01-15 18:05:59 2005-09-06 19:37:04 5 35 807 0 415 1000 182 190.60 18 40.54 CHANGED psslll.lFsosp..astpt..lhptlppths.sstllGCoouGtlss.............ts.sshulslhsh....sssthpshshhh..hs....sshpsutpthpphhpshttt..............................hhllhsDGhssstp...........pllpul.......ptthsslslhGG.AuDshthppohlhs..s........spshps........uslsshlps..............s.hphtshhspGapPh.utthhlTcu..cpphlhElsscP ................................................................................................................................hhh.haso.p...hs.p.h....lhptlp...pths..sstllGsoosu.lss.............................tt.sp.slsl..h..th...ssst..h....p.sh.th.hh......hpp.....s...h...ts.u..t..phhpphhpphtt..............................thhllhhDu.h.ssppp..............tllpsl.......p.tthss..sslhG..G....s....A.us...s..h.....t...h.....p...p....s.h.lht...s....................sphhps...........usl..s.lhlts.............s..hphtshhsp..sa..p.Ph....u.p......hhVTpu...csphlhElsspP................................................ 0 135 287 368 +8328 PF08496 Peptidase_S49_N Peptidase family S49 N-terminal Wuster A anon Pfam-B_4027 (release 18.0) Family This domain is found to the N-terminus of bacterial signal peptidases of the S49 family (Pfam:PF01343) [1,2]. 22.70 22.70 22.70 22.70 22.50 22.60 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.84 0.71 -4.43 68 1104 2012-10-02 13:07:06 2005-09-06 19:40:42 5 6 1078 0 169 648 164 149.80 52 44.51 CHANGED MEFLh-YGLFLAKslTlVlAIlsllshlhuhsp+p+..tpc...GcLclscLsEpYcchccplcttllsccphKthcKppKKpcKt..cpKtpcpt.pt.....................+s+laVLDFcGsIcAs-VsuLREEIoAlLulApsp.DEVLlRLESuGGhVHGYGLAASQL ......................................................MEhLs-YGLFL..AKlVTVVlAIuslshlIls..h..s..pRp+...tp+.......GELclssLoEpY+Eh+-.cLtssL.h-pcptKthcKupKKc.c.Kp-sKutKt.chK.ut..................................sp..pKPRlaVLDFKGShD....A...+EVsSLREEITAlLAsA+sp..DcVlLRLESPGGhVHGYGLAASQL....................................... 0 45 79 128 +8329 PF08497 Radical_SAM_N Radical SAM N-terminal Wuster A anon Pfam-B_2018 (release 18.0) Family This domain tends to occur to the N-terminus of the Pfam:PF04055 domain in hypothetical bacterial proteins. 25.00 25.00 28.30 27.70 22.80 21.70 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -12.02 0.70 -5.49 100 1422 2009-01-15 18:05:59 2005-09-06 19:45:10 5 3 1388 0 288 1078 179 328.40 58 47.65 CHANGED FLPho+cEMcphGWDphDlIlVTGDAYVDHPSFGhAlIGRlLEupGaRVGIIuQPDW+ss-..sFptLG+PpLFFGVoAGNMDSMVN+YTus+KhRp-DAYTPGG..........csG+RPDRAslVYop+h+EAa.p-l..PlllGGIEASLRRlAHYDYWSDKVRRSlLhDS+ADLLlYGMuE+sllElAc+Ls..........sG..cslps..lpsIRGTuahtpph.........................................................................................................sts...............pshlcLPSaEcVpsD+hhYAcA.+lhatEssPhsu+sLlQtHGs....R........hlhhNPPshPLo...........pcEhDtVYsLPYsRssHPsY........tct.tIPAh-hI+FS .........................FLPhSRcEM-.pLGW.DuCDlIlVTGDAYVDHPSFGMAIlGRhLEAQGFRVGIIAQPDW...poc.-....DFh+LG+PsLFFGVTAGNMDSMlNRYTA-++lR+DDAYTPss...........huGKRPDRAolVYoQRC+EAa.K.D..V..P.VlLGGIEASLRRhAHYDYWSDpVRRSlLlDSKADhLhaGNGERslVEVA+RLA.................tG.E.s..Isp..........I....pD.....lRsTAhhs+cs...th..h.sp....................................................................................................................h...t...t.shc......cshlhLPSaEcVp....s....D...........K....lLYAHAsRllHpETNPhs.ARALhQ+H.G-.......R........hVWlNPPslPLo...........TEEMDpVFuLPYp.RlPHPsY........Gss.+IPAa-MI+FS.................................................... 0 95 193 245 +8330 PF08498 Sterol_MT_C Sterol methyltransferase C-terminal Wuster A anon Pfam-B_3143 (release 18.0) Family This domain is found to the C-terminus of a methyltransferase domain (Pfam:PF08241) in fungal and plant sterol methyltransferases [1]. 21.70 21.70 22.60 22.40 21.60 19.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.92 0.72 -4.02 17 347 2009-09-11 17:15:18 2005-09-06 19:48:55 5 6 196 0 227 339 4 65.40 42 18.38 CHANGED hhohaRhotlGRhlTpshVthlEplGLAPpGSp+VsssLEpAAcuLVtGG+cclFTPMaLaVARKPt ............hohhRhothGRhhs+thltsLEhlGlAPcGopcsschLtpAA-sLVtGGcpplFTPMahhluRKP..... 0 72 142 197 +8331 PF08499 PDEase_I_N 3'5'-cyclic nucleotide phosphodiesterase N-terminal Wuster A anon Pfam-B_3045 (release 18.0) Family This domain is found to the N-terminus of the calcium/calmodulin-dependent 3'5'-cyclic nucleotide phosphodiesterase domain (Pfam:PF00233). 21.60 21.60 22.60 24.90 20.50 21.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.51 0.72 -4.07 6 248 2009-09-11 06:25:11 2005-09-06 19:54:19 7 5 83 0 129 240 0 59.40 69 10.83 CHANGED RLhDpDDELt-lps-oVPsE..VR-WLASTFTRps...ttpsc-KP+F+SlspulpuGIFs-+ ..................RLLDTEDELS-lQoDuVPSE..VRDWLASTFTRQMu..hhtR+uE.E..KP+FRSIVHAVQAGIFVER............ 0 21 30 75 +8332 PF08500 Tombus_P33 Tombusvirus p33 Wuster A anon Pfam-B_2139 (release 18.0) Family Tombusviruses, which replicate in a wide range of plant hosts, replicate with the help of viral replicase protein including the overlapping p33 and p92 proteins which contain the domain described here [1]. 21.80 21.80 22.90 22.10 20.60 19.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.86 0.71 -4.44 10 116 2009-01-15 18:05:59 2005-09-06 20:20:20 5 3 28 0 0 123 0 145.10 43 31.33 CHANGED plslsshhh.ss.......LP...Rthlpphcphp-Ah-phs-DD-ssssl.h...p..hh.s.....Psschp+lVtps...RRs+YAsKlAtsA+uKVGLLKNocANcLVYQRVhl-.McpcsVRasDRstlLPLAVsA.CFl............pP-uV-EsppllGuS ...............................................................................hhthhh.......lh..s.......hP...Rthhp.hhhhhptpshhps-c-ss-sl.hp......c..hs.Dh.........sso+hTKhltuo...RRtsYAs+lApVARAKVGhLKNo.tNRLlYQRlhl-hMcccsVRhscpcshlPLAlus.CFl.............s-shEEptAlhGs........................... 0 0 0 0 +8333 PF08501 Shikimate_dh_N Shikimate dehydrogenase substrate binding domain Bateman A anon Pfam-B_99 (release 18.0) Domain This domain is the substrate binding domain of shikimate dehydrogenase [1]. 21.30 21.30 21.40 21.40 21.20 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.64 0.72 -3.92 40 6595 2009-01-15 18:05:59 2005-09-07 11:19:50 6 52 4461 89 1540 4807 2370 81.30 34 25.44 CHANGED llGpPlp.HShSPhlHNtha....pphGl.sssYhsh-ls........-shpsh.lptlcs....shtGhNVTlPaKpplhshlDclsspActlGAVNTl .......................lhGpPlt.HShSPhhHsthh................p.p..hG....l..s..h.s.Yh..sh.pls...............p.sh.s.ps..lp..s..hps........hshpGhNVTh..PaK..p.psh......s......h..h....D...c....los.pAph..lGAV.NTl................ 2 443 935 1297 +8334 PF08502 LeuA_dimer LeuA allosteric (dimerisation) domain Bateman A anon Pfam-B_223 (release 18.0) Domain This is the C-terminal regulatory (R) domain of alpha-isopropylmalate synthase, which catalyses the first committed step in the leucine biosynthetic pathway [1].\ This domain, is an internally duplicated structure with a novel fold [1]. It comprises two similar units that are arranged such that the two -helices pack together in the centre, crossing at an angle of 34 degrees, sandwiched between the two three-stranded, antiparallel beta-sheets. The overall domain is thus constructed as a beta-alpha-beta three-layer sandwich [1]. 24.90 24.90 24.90 24.90 24.50 24.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.61 0.71 -4.36 57 5003 2009-01-15 18:05:59 2005-09-07 12:07:43 5 10 3771 14 1392 3984 3575 133.80 27 25.84 CHANGED cls-tDlhslhppphtp......................tp.phhcLpphpVhssst.........hssAsVplph....sGch......hptsupGsGPV-AhhpAlp+hlsh...pl...cLh-YplpulssG.sDA.upspVplp.....pc.sph..............hpGhGsssDIlpASscAhlsulNphh ...............................................................plh-t-l.sLhp.p.phhp......................tp...phacLp..p..h...p.l....p...ssss................thss...As..V.p.lps..............sGct....................................hptsupG.s.GPV-AlhpAlpchhsh..............sh.........cLhc......Ypl.pu.l..........s.............p..............G.....s....D.........A......h.u...p...shlplp...................ts..Gcp..........................................hpG.sGlssDIlpAShcAhlsAlNph.h...................... 0 459 946 1204 +8335 PF08503 DapH_N DapD_N; Tetrahydrodipicolinate succinyltransferase N-terminal Wuster A anon Pfam-B_4065 (release 18.0) Family This domain is found at the N-terminus of tetrahydrodipicolinate N-succinyltransferase (DapH) which catalyses the acylation of L-2-amino-6-oxopimelate to 2-N-succinyl-6-oxopimelate in the meso-diaminopimelate/lysine biosynthetic pathway of bacteria, blue-green algae, and plants [1]. The N-terminal domain as defined here contains three alpha-helices and two twisted hairpin loops [2]. 21.70 21.70 22.70 22.00 21.10 20.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.62 0.72 -3.86 38 1077 2009-01-15 18:05:59 2005-09-08 09:31:38 5 7 1060 10 128 492 4 81.20 49 34.92 CHANGED hDApEIIpaIpsuKKpTPVKVYlcGs.Lpslsh..sslcsFsssphtllFG-ap-lcshL-tNpcpIccYclEsDpRNSAlPLLDl ....................hsApEIIpaIusAcKpTPVKVYlcG....p....Lps..lsh...p....olpsF.......Gstp.tVlFG-Wc-lcPhL-s.spphpDYhlEpDtRNSAlPLLDh............ 0 42 75 101 +8336 PF08504 RunxI Runx inhibition domain Wuster A anon Pfam-B_4085 (release 18.0) Family This domain lies to the C-terminus of Runx-related transcription factors and homologous proteins (AML, CBF-alpha, PEBP2). Its function might be to interact with functional cofactors [1]. 20.50 20.50 59.70 59.70 18.00 17.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.64 0.72 -3.57 9 243 2009-01-15 18:05:59 2005-09-08 09:34:32 6 4 51 0 82 197 0 96.50 68 23.14 CHANGED P.sos.QsQ.....sGsFQosSoPYaLYYGouoGSYQFSMlssG....GG-RSPoRhLssC.TuASTGu......sLhNPs.LssQsD...GVEu-GSHSN.........SPTuhssssRhDEuVWRPY ........................................PGSo.QsQSGPFQoSSoPY.LYYGo.SSG.SY.QFSMVs.......GG-RSPoRML.PPC..TsuSsGo......sLlNPs.LPsQsD...GV-..ADGSHSN.........SPTshssuGRhDEuVWRPY............. 0 4 10 33 +8337 PF08505 MMR1 DSL1; Mitochondrial Myo2 receptor-related protein Mistry J, Wood V anon manual Family Myo2p, a class V myosin, is essential for mitochondrial distribution, class V being vital for organelle distribution in S. cerevisiae. It is the myosin essential for mitochondrial distribution. The established mechanism for distribution of cellular components by class V myosins is that they interact with the cargo at the C-terminal tail domain and transport it along the actin cytoskeleton using the N-terminal motor domain. Cargo-specific myosin receptors act as the link between the myosin tail and cargo. Myo2 binds with MMR1 (mitochondrial Myo2p receptor-related 1), the receptor on cargo, via the C-terminal domain. 23.00 23.00 40.10 40.30 22.80 22.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.84 0.70 -4.32 7 25 2009-09-11 00:16:20 2005-09-08 10:12:00 5 1 24 0 13 22 0 232.40 53 47.39 CHANGED KLuEhoRsGRSpp...+psS......DohRSsSPhRhthhss............sPKMLKPEYl...............S..ssshsLlSuhltpu..tptsp................................p.tts.s..ss.tts...t.....hhppsh....tQhR.p.............ph.tstpppppps.phpptpsppptpt...lpstptshss.sh.t..o......s.stphpp..tt.....p...hp...sSssuuss.s.hp.pph.pthpls.slPsDcNGFV......sspspR.SaISusuoD.h-.-...Whs .....KLSELSRGGRSKQ......RRGS......DTMRSVSPIRFQFLNN.................TPKMLKPEYL....................SQTTS..NLPLLSALLKNS..KKTTS................................EuQNSNPDPLNIEKNIIKQS.....IKDKL....EQLRoS...............................EolupVQ+KE+sssSaE...A..css.AEE...sllhpNsEuhLsShs......................PVPAoshcsPpsH..spcsEccuhRlVSuuSTpslS.sElNELPKDLNLD.sLPTDpNGFVQhs.....................tsNNNNNRYSFISSTSTD.YEsE...WpD..... 0 1 4 10 +8338 PF08506 Cse1 Cse1 Mistry J, Wood V anon Pfam-B_9217 (release 17.0) Domain This domain is present in Cse1 nuclear export receptor proteins. Cse1 mediates the nuclear export of importin alpha. This domain contains HEAT repeats [1]. 20.30 20.30 20.50 20.40 20.10 20.20 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.37 0.70 -5.89 15 650 2012-10-11 20:01:01 2005-09-08 11:24:21 5 15 291 3 453 616 8 300.30 24 31.98 CHANGED IFKRWRPLF+Ss-LalEIKhVLDpFupPalsLhpsssphlps..sp.sscspLpllFcsLlLlsKlaYDLNsQDlPEFFEDNMpshMsha++YLohsN.PLL-..s---EAulL-+lKuuICEhlpLYsp+Y-E-FpPalppFlpslWsLLso.sospsKYDlLVS+ALpFLTuVAchs+YtplFss-ssLppIsEclllPNlsLR-uDEELFED-PlEYIRRDLEGSDsDTRRRAAsDhL+pLpccaEuhVTsllhpalpphLspYtpsPssNWKtKDsAIYLaoSlAsKGssTps.GVToT.NsLVsls-FFsppIhPDLh.sssssaPILKsDAIKYlasFRsQLsKpQLlplhPlLhpaLtssshVVaTYAAhsI .......................................................................................................................................................................................................................................................................................................................t......hh+laash..s..s...lP.....h..h..p..p..p..h...t.th....h....t......hhphlp......h.ss.................p.............p.......pp..p..s.......h............hh+..t..........h.hc.....h....t....h........a...........h.......p........+.....Y............t..........-.....................h.......t............hl...............p........a........h....ts.h.h.p.....l.........L....hp...h....p.................p..........c...................h.........l.....s.....ps.....l...p.a.L.s.t..l.sp...t.s..........p..a...t.............h..a.p.t..psplpplhp..c...lIhPshshp-p...D.ElaE--P.EYIR+...c.h....-...............s..p.D.h.s....o..cpAAss.hlp.sL...s...c........p.hc.p..lhthh....h....talpph..Lsp............h...t..t....s.....s..s...t..s...........a..+.p...................K........Ds..AlallsoLA.hh............................h...hh.t.lh.........ch............................................................h...........stsh..hh..a.................................................................................................................................................................. 0 138 231 354 +8339 PF08507 COPI_assoc COPI associated protein Mistry J, Wood V anon manual Family Proteins in this family colocalise with COPI vesicle coat proteins [1]. 23.80 23.80 23.90 24.20 23.60 23.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.96 0.71 -4.49 24 296 2009-01-15 18:05:59 2005-09-08 11:31:58 5 12 163 0 225 274 1 136.70 20 72.33 CHANGED chssha..+hlNlssGslhlluGlsphh.........shp.....shllulYhIhhGlhlhhLEa..phP.......lhcYuSFhaSalGRGlFYlhlGsllhts.......shhphlsGhllhllGlhYlsLcahsslp...Ps..sh.........+psshshshpp.....................l ...............................................................h....hh.+lhslssus.l.hll....u.u...l.hphh..........hshp..........shllulYhl....lFulh...lshhEhph.h........................lhcahsFLh....sahGRGlhYlF........lG.slhhst........................thhphlsGhh.lhhhG..lh.alsl.thh..h..............................................h.......................................................... 0 111 165 207 +8340 PF08508 DUF1746 Fungal domain of unknown function (DUF1746) Mistry J, Wood V anon manual Domain This is a fungal domain of unknown function. 22.30 22.30 22.60 24.60 21.50 21.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.52 0.71 -4.16 14 96 2009-01-15 18:05:59 2005-09-08 11:59:42 5 2 94 0 74 99 0 113.30 37 32.54 CHANGED sLDhLhashlshlYahDsShlhLhlRuhsQh.hhsPc.................ss.hc.stspshlhsllhu.NlhCllhHhhhuhspuu-u..scGYLHGGlhIsFIGp+sPhS+hcllhhDllllslQllh ......sLDhLhas.LsslYYhDCShhpahlRulsQhhahoPK..................sssht.stspshlhslhhs.Nl..hChlhHhhhuhPpuuEs.........oRGYLHGGlhIDFIGQ+s....PsS+hcLlhlDllllslQllM................. 1 16 38 61 +8341 PF08509 Ad_cyc_g-alpha Adenylate cyclase G-alpha binding domain Mistry J, Wood V anon manual Domain This fungal domain is found in adenylate cyclase and interacts with the alpha subunit of heterotrimeric G proteins [1]. 20.10 20.10 20.10 20.40 19.60 20.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.46 0.72 -4.63 19 100 2009-01-15 18:05:59 2005-09-08 12:50:02 6 46 95 0 57 104 0 51.20 30 2.63 CHANGED scshsPscc.....psssla+LDTNLscMEGIlocP.PhoPhDsshhsstps.cppp ....................tt.sssspt.........ssssla+LDTNLs-M-GIl..spP.PhoPhDsshh.sht................. 0 9 27 45 +8342 PF08510 PIG-P PIG-P Mistry J anon manual Family PIG-P (phosphatidylinositol N-acetylglucosaminyltransferase subunit P) is an enzyme involved in GPI anchor biosynthesis [1]. 29.90 29.90 32.10 32.00 29.80 29.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.66 0.71 -4.50 38 303 2009-09-13 01:51:06 2005-09-08 13:13:10 7 8 260 0 208 294 0 126.70 31 50.41 CHANGED sppchYGFshalhoplsallYllWualPcphLcp.l..................slsYaPs+aWAlAlPsallh.shlhsalshhhhNh.hhT.PLsulps.........lsDphuphtstpphsphpt..t................................................................hsslhDlslstVsclLYt ........phthYGFlhalsothsallYllWualPpsaLpp.l...................Gl.s....YaPs+aWAlAlPsallh.sllhsalhhhuhNh.hhTsP..lsS...lps.........lsDphup.hth.pp..h.tp..t.................................................................................................................................................................................................hsslhDlslstVschha...................................................................... 0 62 114 169 +8343 PF08511 COQ9 COQ9 Mistry J, Wood V anon manual Domain COQ9 is an enzyme that is required for the biosynthesis of coenzyme Q [1]. It may either catalyse a reaction in the coenzyme Q biosynthetic pathway or have a regulatory role. 20.50 20.50 20.80 20.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.48 0.72 -4.47 70 470 2009-09-11 14:22:47 2005-09-08 13:40:06 6 6 434 2 244 450 340 77.20 34 29.11 CHANGED Ppssspuh+hlaphuDsIWphsGDpSs.DhsWYTKRssLuulYuuThLaa........lsDsStsapsThtFL-RRIcsVhphpch+s ...............................P.phstuhp.lhphsDsIWhhAG........D.pS.s.Dh...sWYTKRssLuulYsoo..L.ah........lpDpS..satcThpFl-cRlpc.lhp.htph..h................................................. 1 74 140 193 +8344 PF08512 Rtt106 DUF1747; Histone chaperone Rttp106-like Mistry J, Wood V anon manual Domain This family includes Rttp106, a histone chaperone involved in heterochromatin-mediated silencing [1]. This domain belongs to the Pleckstrin homology domain superfamily. 25.50 24.60 26.40 25.60 24.60 24.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -10.20 0.72 -4.00 156 861 2012-10-04 00:02:25 2005-09-08 15:06:29 7 18 317 14 611 854 9 92.10 27 12.65 CHANGED hhuVpsphtsspG...h...LaPlpc......slla.hcKPhhhlshs-.IptlpapRh.......sshsp+oFDhslhh.+............s.........ttshpFssIspp.-hsslcp.alpppslphpst ........................h..ul.sp.t.ssu...hLhPhsc......ulla.hcp...Pshhlsh--.Iph......lpapRh........shst+sFDhslsh..K..........................s................tpsshphssI..........spp.phs.....slc-.alsspslphpp.h................. 0 210 346 514 +8345 PF08513 LisH LisH Mistry J, Wood V anon Pfam-B_8344 (release 17.0) Domain The LisH (lis homology) domain mediates protein dimerisation and tetramerisation. The LisH domain is found in Sif2, a component of the Set3 complex which is responsible for repressing meiotic genes. It has been shown that the LisH domain helps mediate interaction with components of the Set3 complex [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.66 0.73 -6.83 0.73 -4.03 58 1365 2009-01-15 18:05:59 2005-09-08 15:43:56 6 33 318 20 874 1281 4 26.50 35 4.96 CHANGED ppLNtllhcYLhcpGapcoApsFtpEu ......plNhllhcYLhcpGapcoApsFtpEu.... 0 243 438 680 +8346 PF08514 STAG STAG domain Mistry J, Wood V anon Pfam-B_4766 (release 17.0) Family STAG domain proteins are subunits of cohesin complex - a protein complex required for sister chromatid cohesion in eukaryotes. The STAG domain is present in Schizosaccharomyces pombe mitotic cohesin Psc3, and the meiosis specific cohesin Rec11. Many organisms express a meiosis-specific STAG protein, for example, mice and humans have a meiosis specific variant called STAG3, although budding yeast does not have a meiosis specific version [1]. 25.00 25.00 25.60 25.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.41 0.71 -4.39 9 489 2009-01-15 18:05:59 2005-09-08 16:00:32 6 8 266 0 290 449 3 117.10 42 10.50 CHANGED EcsuDYPLssps.paKpF+ssLssFlpsLVppsp.tuhLYDs.....slhDslhshlsuLSsSpsRsaRHTATlsuhplhTuLlsVAhpLspp+-ssp+Ql.EAE+pKtpss+u...Rl-uLhpp....tpc ...............................css-YPLhht.u.....p..aKcF+.....ssFs-.FlpsLlp.pCp..hSllY..Dp......................hhM-sllohLou.LS.....s.S.plRuFRHTuT....L..s..Ah.+.LhTuLVsVA.l.sLoh....p.h.-..sspR...Qh....EAE.+.pKh.hs.p+u.........Rl-.L.p+ht............................... 0 75 127 214 +8347 PF08515 TGF_beta_GS Transforming growth factor beta type I GS-motif Wuster A anon Pfam-B_630 (release 18.0) Family This motif is found in the transforming growth factor beta (TGF-beta) type I which regulates cell growth and differentiation. The name of the GS motif comes from its highly conserved GSGSGLP signature in the cytoplasmic juxtamembrane region immediately preceding the protein's kinase domain. Point mutations in the GS motif modify the signaling ability of the type I receptor [1]. 20.40 20.40 20.50 21.00 19.90 20.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.75 0.72 -7.15 0.72 -4.61 28 639 2009-01-15 18:05:59 2005-09-08 18:27:40 7 10 112 65 307 548 0 28.20 68 5.73 CHANGED pslpDll-....soSG..SGS.GLPhLVQRTlAR ........poLpDLlp...p.soSG..SGS..GLPLLVQRTlAR.. 0 55 82 176 +8348 PF08516 ADAM_CR ADAM cysteine-rich Wuster A anon Pfam-B_197 (release 18.0) Family ADAMs are membrane-anchored proteases that proteolytically modify cell surface and extracellular matrix (ECM) in order to alter cell behaviour. It has been shown that the cysteine-rich domain of ADAM13 regulates the protein's metalloprotease activity [1]. 20.60 20.60 20.60 22.60 20.40 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.47 0.71 -4.04 68 1660 2009-01-15 18:05:59 2005-09-08 18:30:05 7 30 158 17 695 1511 0 112.40 31 15.88 CHANGED sGpPCps.stuYCYsGpC.shspQCpplFGtsAcsAspsCapclNppGsca.GpCGpp...ss...paltCptpDlhCG+LhCpsssphshh.t.....shh.s.h.....p.shhChuhcht.hs........sssDhGhV ..........................sGpPC.p.s....s.....p..uYCYsGpC..s.h.spQCp..plaG......s...A.ps..A....schCapclN.....p......p..G..sp..............a.GsCGpp.....ss..............pahtC...........s..t..pDs.hCG+L.Cpsspp.h.Ph.h...t...p.........tsh...h.....h...........p...th.Chuhth....t........................................................... 0 75 106 268 +8349 PF08517 AXH Ataxin-1 and HBP1 module (AXH) Wuster A anon Pfam-B_5484 (release 18.0) Family AXH is a protein-protein and RNA binding motif found in Ataxin-1 (ATX1). ATX1 is responsible for the autosomal-dominant neurodegenerative disorder Spinocerebellar ataxia type-1 (SCA1) in humans. The AXH module has also been identified in the apparently unrelated transcription factor HBP1 which is thought to be involved in the architectural regulation of chromatin and in specific gene expression [1]. 33.00 33.00 33.50 34.10 31.60 32.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.48 0.71 -3.91 10 201 2009-09-13 12:05:04 2005-09-08 18:31:55 7 7 75 8 118 189 14 116.40 43 20.73 CHANGED FlKGohlpltcGphK+VEDlpoEDFlpSApcSsDh+lssS............sVp+I-ssu.u.ullpLTFssGs.ccuhlslEspl-HPFFVhs+GWSSCsPshTlppaGLsCpcLpVGDVCLsLshp ............................FhKGohlpht....s..G.....phK+.lcDlt.sEDFhpSsE.hssshclssu...................................................hVs+l-ssp....s............uhlhL...pFss.Gp..ccup.lolEshl-HPFF.VhspGWSShpPphTsthauLPCpcLpVGDV.Cls.sh.h........... 0 22 35 69 +8350 PF08518 GIT_SHD Spa2 homology domain (SHD) of GIT Wuster A anon Pfam-B_13873 (release 18.0) Family GIT proteins are signaling integrators with GTPase-activating function which may be involved in the organisation of the cytoskeletal matrix assembled at active zones (CAZ). The function of the CAZ might be to define sites of neurotransmitter release. Mutations in the Spa2 homology domain (SHD) domain of GIT1 described here interfere with the association of GIT1 with Piccolo, beta-PIX, and focal adhesion kinase [1]. 21.10 21.10 21.60 21.20 18.80 20.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -6.99 0.72 -4.59 16 634 2009-01-15 18:05:59 2005-09-08 18:34:00 6 17 202 0 362 665 2 30.50 48 6.87 CHANGED ARpKLt+LSpppFp-LssDVhsElcRRpppu ....ARpKLtpLssppFp-LshDVasElcRRppt..... 0 92 167 267 +8351 PF08519 RFC1 Replication factor RFC1 C terminal domain Wood V, Mistry J anon Pfam-B_5399 (release 17.0) Domain This is the C terminal domain of replication factor C, RFC1. RFC complexes hydrolyse ATP and load sliding clamps such as PCNA (proliferating cell nuclear antigen) onto double-stranded DNA. RFC1 is essential for RFC function in vivo [1][2]. 20.70 20.70 21.30 20.80 20.50 20.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.92 0.71 -4.47 48 368 2009-01-15 18:05:59 2005-09-09 09:02:23 7 16 300 1 251 360 30 150.00 36 16.87 CHANGED IScGDllschI+uspp..WSLhPhaulhSoVhPuphltGp....hss.phs........FsuWLGpNSppsKhpRhlpElphHhphpo....sss+pplphsYl.shlhpp.LhpPLhppst-.....ulpcllphM-pYhLo+EDh.-sIh-ls......hssppshhpplsoplKuAFTRtYNp ................IS-GDLV-phI+u.........spp..WSLhPsp..AlhSoV.hPup.hhtGp........hss..h.s........FsuW.LGpNSppuKh...pRllpElphHhpl+s....sus+pplp.hsYl.shLhpt.Ll.......pPLt..p....pG.t-......................ulpcVlphMD..sYhLs+EDa.DsIhElu.......hss.pss.hs.plsspsKuAFTRtYNp............................. 0 93 146 211 +8352 PF08520 DUF1748 Fungal protein of unknown function (DUF1748) Mistry J, Wood V anon manual Family This is a family of fungal proteins of unknown function. 20.00 20.00 21.70 30.40 19.50 18.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.30 0.72 -4.33 19 138 2009-01-15 18:05:59 2005-09-09 11:04:59 5 2 100 0 108 143 0 70.70 38 74.67 CHANGED lG+lhHhuhDhsLlSshLAGl+RsTGLo.............................ch-..thpspp.l+salc+YLshGEh.laDpoVAh.suoSsaFcRp ..................................................................lG+lsHauhDhlLl....SshLAGl+RsTGLo..........................................................................p..hs...hhpsps...lcpalcpYLshGEh.lhDpoVAh.hus.SuaFcRp.................................. 0 36 70 100 +8353 PF08521 2CSK_N Two-component sensor kinase N-terminal Wuster A anon Pfam-B_4164 (release 18.0) Family This domain is found in bacterial two-component sensor kinases towards the N-terminus. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.81 0.71 -4.69 111 966 2009-01-15 18:05:59 2005-09-09 16:15:29 5 12 521 0 332 893 127 146.10 28 30.67 CHANGED sLhllhslshhh....sahhAppsAspsaDRsLhuuApslucplph.p....sGp.lplclPhsAh-hlpsss...pD+laYpVhss..............sGphloGh.s-.LPhss....ssssp..........pstaaDsp....a.pGpslRhsslhp......l.t.....s..s...t.lhVpVAcThpsRpthupclh .....................................PLhlLhshushh....sYhhAhpsAspsaDRsLhuu.Apslu.....cpl...ph...p.......sGp...l....p....l....slPh...sAL-.hh.ptss........sDp.laYpVhss...............sG...chlu.......G...s-.L.Phss.......sssstt......................hsh.aa-sp......a..cG....p..s..lRlAtlhp.............................................slsps......shs............thshltVAEThpsRptlAppl........................................................................... 0 37 144 239 +8354 PF08522 DUF1735 Domain of unknown function (DUF1735) Wuster A anon Pfam-B_2199 (release 18.0) Family This domain of unknown function is found in a number of bacterial proteins including acylhydrolases. 21.40 21.40 21.40 21.70 21.30 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.97 0.72 -3.60 101 771 2009-01-15 18:05:59 2005-09-09 16:19:03 5 17 106 11 91 680 4 88.10 20 24.59 CHANGED sspshs.lpl.slDtshlctYN....pp.....pss.sY.phLPps...hYshss..plslpsG..pt.hus.hslphpst.........sh.p.sppYlLPlpls.ssss........tlsps ..............................s.tpshp.lpl..th...Ds....s....h....ls....pYN.........tp.........pss...sY.phLPps...........hYs.l.ss................p........lsl..psG.........ph...hus..lplplpst...............tth.stsppYlLPlpls.ssss.........s......................... 0 57 80 91 +8355 PF08523 MBF1 Multiprotein bridging factor 1 Wuster A anon Pfam-B_4141 (release 18.0) Family This domain is found in the multiprotein bridging factor 1 (MBF1) which forms a heterodimer with MBF2. It has been shown to make direct contact with the TATA-box binding protein (TBP) and interacts with Ftz-F1, stabilising the Ftz-F1-DNA complex [2]. It is also found in the endothelial differentiation-related factor (EDF-1). Human EDF-1 is involved in the repression of endothelial differentiation, interacts with CaM and is phosphorylated by PKC [1]. The domain is found in a wide range of eukaryotic proteins including metazoans, fungi and plants.\ A helix-turn-helix motif (Pfam:PF01381) is found to its C-terminus. 22.20 22.20 22.70 22.70 21.00 21.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.25 0.72 -3.96 21 391 2009-01-15 18:05:59 2005-09-09 16:30:21 5 2 301 1 230 359 6 71.00 39 48.23 CHANGED sDW-s.VsVlpK+......sP+sssh+sppslNuApRsGhslpTpKKasuGsNK....ssstspphsKLDc-T-.lphc+V ...............pDW-o.Vsl.lt++.......uPp.sssh+.scpAlNAApRp..GtslpTpKK.auuGsNK........psstspsssKLDc-TE.l+hc+V...................... 0 71 126 190 +8356 PF08524 rRNA_processing rRNA processing Mistry J, Wood V anon manual Domain This is a family of proteins that are involved in rRNA processing [1]. In a localisation study they were found to localise to the nucleus and nucleolus [2]. The family also includes other metazoa members from plants to mammals where the protein has been named BR22 and is associated with TTF-1, thyroid transcription factor 1 [3]. In the lungs, the family binds TTF-1 to form a complex which influences the expression of the key lung surfactant protein-B (SP-B) and -C (SP-C), the small hydrophobic surfactant proteins that maintain surface tension in alveoli [4]. 23.00 23.00 23.00 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.17 0.71 -4.41 7 187 2009-01-15 18:05:59 2005-09-13 09:14:14 6 3 165 0 123 167 0 123.90 23 64.94 CHANGED Mutstptp..ptKKFT..+Ea....KsK-Ip+sLs++ARL+KpYhKsLccEGYth...PEcp.......p.+..........-s++hppKp+ls..E+tEhtKpRKRpQ+-chptphpcchEcIc...pKppEREp++cpLop+T+pGQPlMGP+INDLLDKIKpDp..Tp ..........................................................................ht............................................................................ptt.........................p..p...............................pp..p.p......c.t.pp...p.p....php.....E.chc.th+pc+...ctp+pchpcp.t...p.c.+cE...t...hc........ptppcRh..cp...p.chlsK+T++GQPlMssphchLLcKIpp....tt...................................... 1 46 72 103 +8357 PF08525 OapA_N Opacity-associated protein A N-terminal motif Bateman A anon Bateman A Motif This family includes the Haemophilus influenzae opacity-associated protein. This protein is required for efficient nasopharyngeal mucosal colonisation, and its expression is associated with a distinctive transparent colony phenotype. OapA is thought to be a secreted protein, and its expression exhibits high-frequency phase variation [1,2]. This motif occurs at the N-terminus of these proteins. It contains a conserved histidine followed by a run of hydrophobic residues. 20.70 20.70 20.70 20.90 20.60 20.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.27 0.72 -4.07 45 1258 2009-01-15 18:05:59 2005-09-14 11:24:36 6 6 755 0 126 489 6 30.60 43 9.32 CHANGED phhphLP+hH+hhlhslssllll.lllh....Pus .......hhssLPhhHRhhlhhhsllhls.uhhh....P.......... 0 9 34 81 +8358 PF08526 PAD_N Protein-arginine deiminase (PAD) N-terminal domain Mifsud W, Bateman A anon Pfam-B_2195 (release 6.4) Domain This family represents the N-terminal non-catalytic domain of protein-arginine deiminase. This domain has a cupredoxin-like fold. 25.00 25.00 29.00 28.20 24.30 19.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.39 0.71 -4.03 15 190 2009-01-15 18:05:59 2005-09-15 15:51:24 5 6 40 12 84 173 0 106.60 38 17.47 CHANGED Mu.pphV+LShcpPT+AVCVlGsEhplDlpusAPcsspsFslpGSPGVplplstsss.psccssuss+WPLssss-llVsMsusSssssDsKVpVSYausppchPlupAVLYLT ............Ms.pphl+lshppsspAVCVlGsphhlDlhu...uAPpssp...oFslpuSssVplplshss..ppppss..upp+WPLsss.s-lhlsMsssSssh.sD.s..KVp...loY.Yupcpt.PlspAlLYLT................ 0 5 6 18 +8359 PF08527 PAD_M Protein-arginine deiminase (PAD) middle domain Mifsud W, Bateman A anon Pfam-B_2195 (release 6.4) Domain This family represents the central non-catalytic domain of protein-arginine deiminase. This domain has an immunoglobulin-like fold. 20.00 20.00 30.30 28.60 19.90 17.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.06 0.71 -4.83 15 201 2009-09-10 21:19:31 2005-09-15 15:53:00 5 7 49 12 96 184 1 154.90 46 24.89 CHANGED V-ISLDsDssRsGpV-+sps..cKpoWpWGPsGaGAILLVNCD+Dshtust............DpccsplhshcDLpDMS.MlLpspGPcsLhcsY+LVLHlSpSDuc+lRVFpupsst.............shpsYchVLGPpphoYpVthhsGppchpFYVEGLsFPDssFsGLlSlsVSLL- ......l-ISLDsDhsRsGpVccsps.....cKpsWsWGPpG.GAILLVNCD+D.sstp..s.t............Dspcpc.........lhs..........h.....p.....DLpD..MS.MlLpTpGPspLhs.sacLVLalStuDuc+lRVFpspss.....................sppYptVLG.p.....+hsatVthh....sG..pt-hpFaVEGLsFPDssFsGLlSlpVSLL-........................ 0 8 11 27 +8360 PF08528 Whi5 Nrm1; Whi5 like Mistry J, Wood V, Chahwan C, Finn RD anon manual Motif In metazoans, cyclin-dependent kinase(CDK) dependent phosphorylation of the retinoblastoma Tudor suppressor protein (Rb) alleviates repression of E2F and thereby activates G1/S transcription. The cell size regulator Whi5 appears to be an analogous target of CDK activity during G1 phase [1]. 25.00 25.00 25.60 25.10 24.30 24.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.36 0.72 -6.59 0.72 -4.24 24 164 2009-01-15 18:05:59 2005-09-15 16:38:06 6 3 80 0 127 152 0 25.00 42 6.36 CHANGED pL+sRLphAhaKspsG.pchoLscl ...pL+sRLshAhhKlpsGhpchoLscl.. 0 31 71 109 +8361 PF08529 NusA_N NusA N-terminal domain Bateman A, Roth A anon Pfam-B_407 (Release 18.0) Domain This domain represents the RNA polymerase binding domain of NusA. 20.80 20.80 20.90 20.80 20.60 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.46 0.71 -4.12 161 4422 2009-09-13 23:45:58 2005-09-20 15:50:34 6 28 4355 5 970 2799 2484 119.80 35 27.93 CHANGED El.lpslctlscEKsIs+-hlhcAlEpALhsAh......+Kp.a....s......ps.pslcVplDccoG-hclapphpVV..-c...........h.......cs..thplsLp-A...p.phsssh...pl..GDhl.p.pl..ssts..FGRIAAQsAKQVlhQ+l.REAERct ...............El.LtslcslppEKuls+-.hlh-AlEsALhsAh..........K+pa.............s.............pp.tsl+Vpls....c..c.oGc..hclap.p.hpV...V..--.............................Vp......ss...phEIo..LppA......p.......s..s..sh.....................pl..GD.hl..c.cl..ss..ts...FGRlAAQoAKQVIhQ+lREAERt.h............... 0 341 650 828 +8362 PF08530 PepX_C X-Pro dipeptidyl-peptidase C-terminal non-catalytic domain Bateman A anon Bateman A Domain This domain contains a beta sandwich domain. 20.70 20.70 20.80 23.00 19.60 20.20 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.12 0.70 -4.45 53 2418 2012-10-03 19:46:52 2005-09-20 16:49:51 5 23 1578 54 704 2136 679 229.10 17 35.97 CHANGED Nsh.tshPsVph.sptssps...................................Wps.ssassspspt......phhLs.......t......sthshpttsshssshsh.tthhhss.....................................................tptsshsapotslscshplsGpsplcLplsssss.......ss.plsspLh-l......ssc.....................Gps....................plloc....Ghlplpt+p...p........................................ptlpPsphhslclcLpsosaphtsGcpLclhlsosshsthhhsssstp.hshshsp.........................................pLh ..................................................................................................................................p.P..V.h.h..p..s.tt......................................Wps.h.s..s..aPs......psp.................phaLs...................................s....t.....h.sh..p....t...t...t..sh.s.s.sh.t...........tthhhss.....................................................................................................................tptsshsah.s.sl.s....c..s.......h..clsGpsplcL...p.ls.s..ss.s...............................cs...slh.spLh-l......ssc.................................................Gts....................................thlop...........G..h.l.php...Rp.....p........................................p.lps.s..c..h.hp...lplc.L..s.s.saphptGcpLclslt.uo.s...h...s..h..hh.h.s..s.s....s..hp...hs.h.p.tp....................................................................................................... 0 207 463 615 +8363 PF08531 Bac_rhamnosid_N Alpha-L-rhamnosidase N-terminal domain Bateman A anon Pfam-B_8527 (release 8.0) Domain This family consists of bacterial rhamnosidase A and B enzymes. This domain is probably involved in substrate recognition. 25.00 25.00 25.00 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.73 25 876 2012-10-03 19:46:52 2005-09-20 16:50:02 5 42 361 2 288 895 96 166.90 27 18.23 CHANGED +pltpARLYlouLGlYEhalNGc+VGDps.........LsPGaTsYc++l.YpTYDVTshLpc.G.cNslGlhVGsGWYssphsh........thp.pthYG.sc.ullupLclpat.DGppcsl.sTDsoW+ss.pGslhtusl....YsG..EsYDARt-.tsWspssaD..DstW....................tssph..............hshPp.stLtAp.ssPl+lsc ..............................................................ltpAplalouhG.......hY-hhl......N....G..p.+.V....G.c..ph.....................LsP.u.hTsY...p.................+................p......lhYp.TY.....DVT..shLpp.....G...pN..........s......lulhl...u.s.GW..ap..s.th.s............................pthhu....sp....s..shhspL.p.lpat.D.......Gs..p.p..h..l...so.........D.......s....o....W+s.......s...p...u.s....l...h.hssl......................asG.......E...p...Y..D..A..R..h..c............t........s....Ws....p..s...s...a..s........c....s..tW.....................................t.ss.h..................................h.t.....s...........l........................................................................................................... 0 134 224 270 +8364 PF08532 Glyco_hydro_42M Beta-galactosidase trimerisation domain Bateman A anon Pfam-B_2131 (release 5.4) Domain This is non catalytic domain B of beta-galactosidase enzymes belong to the glycosyl hydrolase 42 family. This domain is related to glutamine amidotransferase enzymes, but the catalytic residues are replaced by non functional amino acids. This domain is involved in trimerisation [1]. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.22 0.70 -11.17 0.70 -5.17 27 1054 2012-10-03 00:28:14 2005-09-22 16:56:59 5 20 641 2 343 1020 170 197.10 26 29.29 CHANGED AcVAl....laDa-shWAhc.p..Pp.pshp..........YhpplppaYcshhchGlslDlls.ss.-lst...YclllsPsLahlscshspclpcalcsGGsllhshhSGhhsEssplhhuthPGs....LcclhGlpl.cEh-sLssp.....pp.plp.htGp.......hpsphap-hl...pspsAcs..lApaps..t......Gp.PA....lscpthGc..GpshYlus.....t.spphlppllpplhsc.tslt ....................................................clAl.....laDa-s.....hWuhc...t..........sp...ts.hp............Y.pp.ltpaY.c.sh..hctslssD..ll....s....s...c.....s....-.....hss..............Yc..lll.sPsl..h...hlssshsp+lppaVcs.GG..pll.so.....hhoGhhs-pstl...h.hu......sh.P..Gs....L.....pclhGl.ps...p.-h...s.....sLhss............................ppspl..p....htGp......................................hp...s...p.ha...s.-h....l........ps..p..s......A.ps.....lApYts.......t.ht...........Gt..PA.....................lTpp.t.h....Gp..G.pshYlus..........t.st.t.hh.pthhtplhtphsl.t.............................................................................. 0 123 237 290 +8365 PF08533 Glyco_hydro_42C Beta-galactosidase C-terminal domain Bateman A anon Pfam-B_2131 (release 5.4) Domain This domain is found at the C-terminus of beta-galactosidase enzymes that belong to the glycosyl hydrolase 42 family [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.71 0.72 -4.46 26 633 2012-10-02 20:10:03 2005-09-22 17:02:10 5 6 441 2 201 611 24 56.50 23 8.50 CHANGED Glclpp..h..ttpspahFhhNaoscstplsl....stttppllssph.t.....hslpPhsltVlct ................tVpspt..R..pssps..palFlhNaos.cs.....tp.lsl....stshp-llsu.p...h.hpst............lsLsshsVtllp................. 0 72 140 172 +8367 PF08535 KorB KorB domain Bateman A anon Pfam-B_20369 (release 10.0) Domain This family consists of several KorB transcriptional repressor proteins. The korB gene is a major regulatory element in the replication and maintenance of broad host-range plasmid RK2. It negatively controls the replication gene trfA, the host-lethal determinants kilA and kilB, and the korA-korB operon [1]. This domain includes the DNA-binding HTH motif [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.72 0.72 -3.83 17 2280 2012-10-04 14:01:12 2005-09-23 14:10:54 5 9 1911 4 384 1460 462 83.50 29 28.74 CHANGED stGhppu-IA+pLGKstuaVopahuLh-hPsslcphhssthssslcsl.-LtpshccpPpcVpsal.....tstspplTRusl...pchlcsc+psp ..........shTQp-lAcclGKS..RsaIuNhLR...L.L...p.....L...Pp.p.l.pp.hl.p.pG.p..l..o.....t...h..t.....................................................................................ttttthhh................................................................... 0 127 237 310 +8368 PF08536 Whirly Plant_TF; Whirly transcription factor Mistry J, Pachon DMR anon manual Domain This family contains the plant whirly transcription factors. 20.00 20.00 22.40 22.10 19.70 18.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.63 0.71 -4.70 4 82 2009-01-15 18:05:59 2005-09-28 13:43:07 6 2 35 12 47 76 4 129.40 45 44.76 CHANGED ulYKsKuALplcsVtPoFsulsSGshhlcRsGulLLphAsAsusRpYDWppKtsFhLSsTEsupLss.MuupsSCpFFHDPu.tsuustGpVpKuhKVEPhPDGS.GhFlNLoVssu..psscpF.sVPVohuE.Asl+phhs ...................lYKGKAALolpPh.PpFstL-SGuh+ls+cGhlhLpFAP.......A.......l....G.............tRp..YDWs+KQh.FuLSsoElGoLlo...LGsp.-S.sEFFHDP..htpSstGpV+KsLplcPh..sDus.GhFhsLoV.sph...p...s...s-ph...lPlTcuEFuVhhosh......................................... 0 10 32 40 +8369 PF08537 NBP1 Fungal Nap binding protein NBP1 Mistry J, Wood V anon manual Family NBP1 is a nuclear protein which has been shown in Saccharomyces cerevisiae to be essential for the G2/M transition of the cell cycle. 21.50 21.50 21.60 21.60 20.90 21.40 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.20 0.70 -5.23 6 52 2009-01-15 18:05:59 2005-09-28 14:11:52 5 2 26 0 25 47 0 225.70 33 97.19 CHANGED Mh-slKshhssa.hh......csDGRK+EaGsLs-h+cRp..+sR+tpsppcucshh+hs+...............................sssptot.phtsphpt..................ssspcupt+shhssI+ulFSs-ppslptMppA...lshhL.s.otsppspcc...hpsRIlRS-sFKKKlhEhcYscphLppLR+Gupstp.sh.t.su.p......DpVlLLQ++lcch-c+ltplppELp.spKcLpFupEKspLLpsLLDDANIDscYlKSRRsIpNL.p.p-slpPp.csLs..PSPhR..sVNPLFTSSPlRpssppupcssspshp..........-NFYsKYP+lPcTEpLspptt..............ccSLSPlRlDYS+YSS ..............................................ts.sh.++pht.l...+ppp..p.t......pspphhph.+...............................c.t..pt......................................................................................................................................................................................................................Dp..L......phpt+httlcppLp........hpctLpaspEK.clLpslLDsuNID...pYhcSRRshpNl.p.p-p.....lKPh....sLs..P.SPhR.........tsNsLhTSSPh+h.shpsp....pshp...........sh..sthPp.P.pph.ppp................cco......................................... 0 3 10 17 +8370 PF08538 DUF1749 Protein of unknown function (DUF1749) Mistry J, Wood V anon manual Family This is a plant and fungal family of unknown function. This family contains many hypothetical proteins. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.96 0.70 -5.53 8 206 2012-10-03 11:45:05 2005-09-28 14:46:02 5 5 158 2 158 362 33 286.70 31 88.64 CHANGED Muhphss......GlLHpYsp+L.......lAFEasosush..+sps....LlFlGGLGDGLhTVPYlpsLAsAL.......-tuuWSllplplSSSYuGWGTGSLcRDsEElppsV-Yl+sph....uG...sppKlVlMGHSTGSQDVlaYLopu.......................tspV-GuIlQAPVSDREAhhpshtc...........cthcchVshA+ch..lccGpuc-llPpEascthhh...soPlSAhRahSLsusc.....GDDDaFSSDLoDE.......cLscoFGplpc........PLLlLhSppDEaVPsal..DKpsLLsRW+pusc....cphWspp.SGIIsGAoHsVs....scupsts.chLlccVhuFL+ ....................................................................................................t..............shla..a...t....t........ssaEat..sss...........t.p.s........llFlGGLs.DGhhos.sYhts....LupuL.................ptt..sWSlhplhL...oS...SapGa.......G....h.u....S...L..c...p.D.......s.c....El.sph.lp..Yl+sh........................s.p+lVLhGHSTGsQDllcYLp.pss......t........................p.sl.-.G...sI.LQAP..V.S.D.REuhthhhpt....................t.hpphlphAcph.....ls........p...u...p.s...pp..l.h.Phphsshhh...........................ssP.loAhRahSLsu.t........u-DDhFSSDL..s-..-.........pLpp.saGhlsp...................slLlLhS..spD-aV.Pthl..Dpp..t.L.lp+appsst..............t.hs......otll.s.....AsHslt....t..t......hhthl.tah.t........................................................................................................................ 0 50 92 139 +8371 PF08539 HbrB HbrB-like Mistry J, Wood V anon manual Domain HbrB is involved hyphal growth and polarity [1]. 22.00 22.00 22.10 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.39 28 284 2009-01-15 18:05:59 2005-09-28 16:01:24 6 8 167 0 164 267 1 140.30 32 25.68 CHANGED pphosssAWshlpstllslF.........cucslp.hs..lEDLNclVphalppplppp.hss.....hlh...t-lcpLLssGhssLccpl.....pts.....s-+LlspLsElW.hFFsslLPhlQAlFLPlp.......htt.t.....tpstp.h...............................upELslRpLsLluFRDhllLshhpshthh ............................................................h..hsssssWs.lpstVlslF.........psc.sl...t...l..............sLNchl.c..h.hlppplssh.hsp...................hh........ppLLspGhhhLcc.pl..........................chh.................ps..pp.hl...sp...Ls-hWpaFFsplLPhlQAlFhPlp...............................................................................................................upp.slRplsLlsFRDhllLshh.t....t..................................... 0 36 68 110 +8372 PF08540 HMG_CoA_synt_C Hydroxymethylglutaryl-coenzyme A synthase C terminal Finn RD, Bateman A anon Prosite Family \N 25.10 25.10 25.20 25.30 25.00 24.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.87 0.70 -4.93 10 2505 2012-10-02 12:25:54 2005-09-29 09:26:20 5 15 1386 46 503 1607 169 142.60 23 53.54 CHANGED IVFDpulRuoHMpHAYDFYKP..DLsSEYPVVDGKLSlpCYLpALDpCYppYssK..hpphhtp..t.spthsLccFDahlFHoPaCKLVQKShARLlaNDFlppssp.phsslhccLpsh..tslch--oYpDR-lEKshhploKshaccKspPSLllsspsGNMYTuSLYuuLASLL.ptuss-LsG.KRluhFSYGSGLAAohFSh+lssDsssho..p..IsslhDlps+L.DsR+phoPE-FsEshclREpsHtpKsFsPpuS..l-sLhPGTaYLsplDchaRRsYup+ ...............................................................hh........................................................................................................................................................................................................................................................................................................................................................................s....hsp..GNhYsuolahshhS.hl.........t........t......................s..pp.............lhhhSYGSG..uphat.h...l....t...................................................................................h..tth..............a................................................................................................................................................................................................................ 1 143 271 402 +8373 PF08541 ACP_syn_III_C ACP_C; 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III C terminal Mistry J anon Pfam-B_67 (release 18.0) Domain This domain is found on 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III EC:2.3.1.41, the enzyme responsible for initiating the chain of reactions of the fatty acid synthase in plants and bacteria. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.64 0.72 -3.97 187 7398 2012-10-02 12:25:54 2005-09-29 09:55:06 5 25 4322 91 2044 8046 1911 89.30 33 25.98 CHANGED Lpps.slshsDlDhhlsHQANhRIlcs.hsc+Lsls..-..+shhsl.pcaGNTSAASlPlALscslcpG..clcsG-hlllsu.FGuGloWGusll+h .........................................hppssls.h.p-..l....D..ah.l.s.H....Q............AN....h....R....I...l....c....s.....h.....s.....+.....+......L...........s.......l.......s........-.........+.s.....h...h...s.....l...p....caGN...TS...AAS...lP....lu....L...s.c....s....h....c....p.G........c.l..+..p.......G........c.......h........l.........l.lh...u..F.G.uG..hoaGusllc....................................... 1 605 1328 1740 +8374 PF08542 Rep_fac_C Replication factor C C-terminal domain Mistry J, Wood V, Finn RD, Coggill PC anon Pfam-B_930 (release 18.0) Domain This is the C-terminal domain of RFC (replication factor-C) protein of the clamp loader complex which binds to the DNA sliding clamp (proliferating cell nuclear antigen, PCNA). The five modules of RFC assemble into a right-handed spiral, which results in only three of the five RFC subunits (RFC-A, RFC-B and RFC-C) making contact with PCNA, leaving a wedge-shaped gap between RFC-E and the PCNA clamp-loader complex. The C-terminal is vital for the correct orientation of RFC-E with respect to RFC-A [1]. 21.00 21.00 21.00 21.00 20.90 20.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.59 0.72 -4.00 117 1565 2010-07-14 04:08:27 2005-09-29 17:21:44 6 38 497 21 1053 1492 176 89.50 21 25.10 CHANGED shs..thlc.pllpphhp.p.....shtc..................spphlt.cLhsp...GhuspsIlpplhchlhph....sh..sph+hpllptluph-hRltpGssphlQLpuhlAph ................................................................................thlc.phlpthhs.t...............shpp...............................................s.tptlp.c.Lhsp...GhuspsIlp...p..lhchlhph............phs..sph+hcllctl.uph-hRlspGssphlQLtuhlup.................. 0 361 595 866 +8375 PF08543 Phos_pyr_kin Phosphomethylpyrimidine kinase Mistry J, Wood V anon Pfam-B_787 (release 18.0) Family This enzyme EC:2.7.4.7 is part of the Thiamine pyrophosphate (TPP) synthesis pathway, TPP is an essential cofactor for many enzymes [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.55 0.70 -5.18 132 8058 2012-10-03 06:25:16 2005-09-30 10:04:10 7 34 4351 38 1754 12925 3225 233.80 27 80.50 CHANGED DouGGAGIQADLKThpuhGsaGhosITulTAQN..TtGVpulpslss-.hlpsQl-ulhsDlslsAlKhGMLusscllcsV..Actl.cp.hsh.slVlDPVMlupoGspLlps-u.lpsl+cp..Ll.PhAsllTPNlsEAp.....hLsG....hplp...s.p..-hcpuAcplhp.h...GspsVLl..KGG.H.h..t............ppsh..DlLh....sssph.hthpstRlsTppTHGTGCThSuAIAAtLA+Ghsl.cAVppAKpalptAlp....pshplG..pG.tGPls .............................................................................................................................Ds.uGuGh.ADltshtths..........sauhsslTsl.s.s.s.....s....t....s.....t..h........h.......p....h..l.tt..Qh...p...s..l...h...p.......s...h..........t........h.............c...A..l...K...h..G..h....L.u..s....s....c....h....l........c...h..l.....s....c.....h.....l......p...........p................p..........h...............l...V...l..D......P.......V.....M......s..s....p......s...........u...s........t.....l.....l..s....s.s.s....h...p....s...h....p.....p.....p.......L...l......P.h...A......s.....ll.TPNls.E.Ap...................hLsG.........hplp.........................s..c...........-h.h...p..A....u.......c....t..Lhs.....h..............G.s..p.....t....V..l..l....K..GG..c..h...t............................................pps.............-.hlh..............s.s.......p...p....h...........h......h..h..p..s.....s.....+......l......s...t.......p......p....s...+..G.TG.s.o.h.uu.....s.ls.u..........t..........L...s..........p.......G.....t...........s......l..t.....c....A.......l.p..p.A..p..t.a.ltpulp.......psh.p..h..s.............................................................................................. 0 531 1053 1453 +8376 PF08544 GHMP_kinases_C GHMP kinases C terminal Finn RD anon Prosite Family This family includes homoserine kinases, galactokinases and mevalonate kinases. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.76 0.72 -3.78 121 12811 2009-09-10 21:23:33 2005-09-30 10:45:27 8 48 4606 85 3000 8683 2225 82.10 18 24.10 CHANGED hhpsl......t.............tpht.htphht..t...............hhthhhsplpplhcth.pphG......hssthoGu..Gsslhslh.............ppppsppltptl.ppthtp ..............................................................................h....h.t.............tphphh.up.hhp.ts..t......................h.hthth..splc.plhphs..tphG........shuuphoGu...GsslhuLs.................sp..pp.spplhptl.tt....t....................................................... 0 971 1843 2506 +8377 PF08545 ACP_syn_III 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III Mistry J anon Pfam-B_135 (release 18.0) Domain This domain is found on 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III EC:2.3.1.180, the enzyme responsible for initiating the chain of reactions of the fatty acid synthase in plants and bacteria. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.70 0.72 -4.32 170 6937 2012-10-02 12:25:54 2005-09-30 14:13:13 5 20 4279 91 1726 8497 1908 78.70 35 23.31 CHANGED FDlsA.u...CoGFlauLshAsshlpuG.th+plLVlGu-thS+.hlDa...s...D..RsTsl....LFGD..GAGAsllpss......ppt.......u..lls...splto..DG ...................FDls.A.ACuGFlauLs.s.As.p.h...l...p.........o......G....t...........h..........+....p...s..LV.lG..u.-..p.h...S+....hl...Dh.......s.........................D.......Rs.Ts.l.....................L..F..GD...GAGAsllpss.......pp...............s...lls.hphts-G.................................................................... 0 550 1116 1470 +8378 PF08546 ApbA_C Ketopantoate reductase PanE/ApbA C terminal Mistry J anon Pfam-B_396 (release 18.0) Family This is a family of 2-dehydropantoate 2-reductases also known as ketopantoate reductases, EC:1.1.1.169. The reaction catalysed by this enzyme is: (R)-pantoate + NADP(+) <=> 2-dehydropantoate + NADPH. AbpA catalyses the NADPH reduction of ketopantoic acid to pantoic acid in the alternative pyrimidine biosynthetic (APB) pathway [2]. ApbA and PanE are allelic [2]. ApbA, the ketopantoate reductase enzyme is required for the synthesis of thiamine via the APB biosynthetic pathway [1]. 20.90 20.90 21.20 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.36 0.71 -4.13 112 4691 2009-01-15 18:05:59 2005-09-30 16:41:07 6 30 2834 28 1372 3635 1120 124.90 25 38.00 CHANGED slttthWpKllhNs.....shNsloulhs...sss.uplh....tssthpplhptlhpEshtlu.........pupG...........................ht.hstp........................hhctlhphhpt..........sss.....p..psSMhpD.lppG+..oEl-hl.sGtllc.h.....ucphGls...sPhsphlhp.ll.+t.hp .................................................slttthWpKlhhNs.................shNsloulhp...................ssh.uplh......................pp.s.t..h..pph...h.pplhpEsh.sVu...................pt.p.G.........................................................hp...hstp.........................................lhpt.lh.p.lhpt.............................sst........p.....hoSMhpD.lp.ptR....oElDhI.sGhllc.h.........uc..p.t..G...ls......s.Ph.sphlhphl+th........................................................................ 0 379 802 1142 +8379 PF08547 CIA30 Complex I intermediate-associated protein 30 (CIA30) Mistry J, Wood V anon manual Family This protein is associated with mitochondrial Complex I intermediate-associated protein 30 (CIA30) in human and mouse. The family is also present in Schizosaccharomyces pombe which does not contain the NADH dehydrogenase component of complex I, or many of the other essential subunits.\ This means it is possible that this family of protein may not be directly involved in oxidative phosphorylation [1][2]. 19.70 19.00 19.80 19.00 19.60 18.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.90 0.71 -4.37 83 768 2012-10-03 19:46:52 2005-12-01 09:21:38 7 21 485 0 464 811 928 156.70 22 55.05 CHANGED hcFsss......s...........shpp...WtslsDsVMG.GhSpuphph....................s.....ttuhFpGplShc..........ssGGFuShRo.........hps.hDlu..sasulpL+l+...GDG.+pYphplpsp..st............hsshsYptsFt.....T..sst...........Wpslc................lPascFhss.hRG+hlpst...s..h...sssplpplulhlu...........sc..psGsFpLpl ................................................................th.pp....WtshsDt.s.h.G...Gh.Spup.hphs...........................................s..stuhFpGplshc.............................................................ppu.Gasuh.Rs......................................hp..hc.h.s.sass..ltLcl+..............G..DG.......+..p...Yhlplpsp..s..............ts.ha.ptth.....st..sst....................Wpplp....................................................................lPaspFh..s...c..G.p........h.h...t............h...........p..plppluhhht..............st....G.atL....................................................... 1 145 272 386 +8380 PF08548 Peptidase_M10_C Peptidase M10 serralysin C terminal Mistry J, Rawlings ND anon Rawlings ND Domain Serralysins are peptidases related to mammalian matrix metallopeptidases (MMPs).\ \ \ \ The peptidase unit is found at the N terminal while this domain at the C terminal forms a corkscrew and is thought to be important for secretion of the protein through the bacterial cell wall. This domain contains the calcium ion binding domain Pfam:PF00353. 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null --hand HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.11 0.71 -4.68 15 432 2009-01-15 18:05:59 2005-12-01 09:50:32 6 100 256 24 107 457 56 201.30 36 37.88 CHANGED ANhoTRTGDTVYGFNSNT-RDFYoATSuSsKLIFSVWDAGGNDThDFSGaoQNQRINLNEuSFSDVGGLKGNVSIA+GVTIENAIGGSGNDlLIGNsAsNlLKGGAGNDIlYGGGGADpLWGGAGsDhFVYuuuuDSssuAsDhIpDFpSG.DKIDLSuhspsss....L+FV.DsFoGcAGEAlLoYDuuoslosLtlshuGcss.sDFlVplVGQsss.oDhIV ....................................................................................................................................................................................s.pTRssDTsY........GFN..S..s.....o...s...c......D....a.h....o..........u......s......s....s........s....s....p..l....l....F.olWD.uGGsD..ThD..FSGa.s.p...sptIsLs.......t...s...s..........a..........S.....s..........l...G..............G...............h...........t..........u.....N..........lo..........IAhs.sslE.NAlGGsGsDhlhGNtssN.l.Gss.........................................................................................................................................................................................................................................................................st..hhsststshhhhtt.tts.sts.Dhlh.DF..p...u...D.+l..sl..t....h...............h...hh....t.t...............h...t.........t........t...........................h.h............................................................................................................................................................................................................................................................................................................................................................................................................................ 0 13 50 73 +8381 PF08549 SWI-SNF_Ssr4 DUF1750; SWI-SNF_ssr4; Fungal domain of unknown function (DUF1750) Mistry J, Wood V anon manual Domain This is a fungal domain of unknown function. 19.50 19.50 19.50 19.50 19.00 19.20 hmmbuild -o /dev/null HMM SEED 669 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.45 0.70 -6.22 3 99 2009-09-10 22:09:48 2005-12-01 10:49:12 5 3 82 0 83 100 0 503.50 32 94.93 CHANGED MD.DPASRVPuQLLPHMHLVSRaRYPLMHMMPTDTVV-YLLSAPKIVREApPMHWTFLDGPQDGTVMLTWQPLNHLGTNFASDGYVWADVEQAFTFEA.RGYVVEMWLHRSGYHPPNESVAIHCRRRYRLLPoKVPNPSLPPPDPSLWIVHYSRAPPsDHIPANRIPVSPQVQsMLAQRRFLQsQGQLARKDFMLHDRNNWPTIsLPPQhA.sQuhtQPsGPYPNAMVGRQPFYPQPGssAsPPsusussKAPRGHRASoAAssAAosDFALEDE.DVSsGDLMDLLTPREVSKMRYQQHHEWMEEILASPYAISQITPVSLGLGRKGELESLTAGFFDAPVGPusGDSc-GsEusQATKLEPE+A-EFADRVAKKVADMTAEIEKLKKRHARRMEKFNRTStLKDAEpRLRDAAAsPtDTGSEIWRLEGRlEhsTE-DsucluPlEHKAKYKVDDIVREVEsSWpKpIVPEP+VSCVEKGGLLEKI...EPEPso.......................hhuDlDIDMGHTDSHLLDQF.TAtGssuQotuTsAP.....uAsGQAsPTATGVAssQPsA....GLDI-MDhGDupsouTAuGETGDWVMVN-s................KKDDslshPstph.tspTPGSGLQGLTPG.souuDsGLDusNFDFTN...MDSAGDALAAY.........TEQN-GLDLP.DLENSAFGDAFH..........ASDNEsTHHHDADDMS .......................................................................................pas......h.hppshcaLhtAPpls+-.tPhhWta.lDt.P..DGolh.LsWQs..phGspFuoDGhhWsssE.hap.ph.pGh.....................lEhaht+sGahs....s.EphAhHsR+RaRLh..ss...s...s.......sDPsLa..llHYu.u...-plPssh.l.hs.th.....pth....httRp...h.L.ptGQ.lhRK-FMLpDRs....sWPpls...t....p........s......t...h..p.........h....s................................s.t............t......t..t.tps........s........h...............s..........-..h.s..........h.--E.-sSpG..DhhDhlTPR-lSh.RYpppHEWMEElhuSPYtltQI.PssLGLGh+GE.LtsLTtGhF.s....s....t..t....t.s......h.s.+hcst.sc-FtppstcphtthpsEhpphctpHtcthtphpp.shhhptEhtLR.hh......t.Gs-h.a+hE...s+hp....tpt.t..................p............l--lltplpttht+thss....tltplpcGGh.p.....t..P..........................................................s....DhsM..tst....t.hsth............t.......s.s...........................................s...................................................t.D.h...h...t...........s....p.................t..t....................s.....t....t......s......s-h.l....hlsp.t.............................................s.........s............................s.t..........s...h.st...................s..h..........-....hts......hsoAG-ALAsY...................tt.s.thsh..th.-sSAFG-AFH..........usptt............t.................................................. 0 20 43 67 +8382 PF08550 DUF1752 Fungal protein of unknown function (DUF1752) Mistry J, Wood V anon manual Domain This is a family of fungal proteins of unknown function. This short section domain is bounded by two highly conserved tryptophans. The family contains Swiss:P34072 that is thought to be a negative regulator of RAS-cAMP pathway in S.cerevisiae. the Sch.pombe member is a GAF1 transcription factor Swiss:Q10280 that is also associated with the zinc finger family GATA Pfam:PF00320. 20.90 20.90 20.90 20.90 20.80 20.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.69 0.72 -7.15 0.72 -4.36 48 460 2009-01-15 18:05:59 2005-12-01 11:05:33 5 14 156 0 340 467 0 29.00 39 4.13 CHANGED lW+has+s+p.....plss..scRlENloWRhhstp ............W+hhspp+s.....plss..upRLENloWRhWsp...... 0 88 185 291 +8383 PF08551 DUF1751 Eukaryotic integral membrane protein (DUF1751) Mistry J, Wood V anon Pfam-B_13217 (release 18.0) Domain This domain is found in eukaryotic integral membrane proteins. Swiss:Q12239, a Saccharomyces cerervisiae protein, has been shown to localise COP II vesicles [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.38 0.72 -3.67 23 355 2012-10-01 23:21:32 2005-12-01 11:26:52 5 5 269 0 250 1107 281 98.30 29 27.91 CHANGED Puhsh.....aPWoLlTusalEtslashllshlsLhluG+alEphWG.upEhlKFllllsshsNLlshlhtllhhhhops.p..L..h.lsGthulhsGhlVAhKQ ....................Pshhh.....as..W.ol..lTsshl..E.....p..s.....l..h.slllshhslhhsG+hLE.hWG..up..............Ehh+.Fl..h..llshh.ss.lls..hh...hh..l..lh..ah..h.......ots.p..........h....Lh..h.lpGh.huhhsGhLVAh+Q.................................................. 1 73 135 210 +8384 PF08552 Kei1 DUF1753; Inositolphosphorylceramide synthase subunit Kei1 Mistry J, Wood V anon manual Family Kei1 is a subunit of Saccharomyces cerevisiae inositol phosphorylceramide (IPC) synthase [2]. It is localised to the Golgi and is cleaved by the late Golgi processing endopeptidase Kex2 [2]. Kei1 is essential for both the activity and the Golgi localization of IPC synthase [2]. 25.00 25.00 34.10 33.90 23.10 21.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.16 0.71 -4.63 17 133 2009-01-15 18:05:59 2005-12-01 11:35:41 6 5 128 0 100 134 0 176.00 31 63.88 CHANGED poFLuhhsLhhGsEllhhhhlhNKsoGlYGlLulhTGasLshhQhhhYlaSlhsLshahhsl.pl+..c...........................psshpshhlshlYshDollsshaThhFshsWFht..........pstsssst............................sstsusstttsspsssu...htp..................................utstthEhhholhlslhhhllRhYFshllhuas...pplL+p.hhts .....................pFhhhhsL.hGspllhhhhlhNKhoGlYGlLAlhT..Ga.Ls..hhQlshYlaSlhsLslhshhh.pI+...c.............................psshpsltlAalYhlDollsshaThhF.ussWFhh................tss......ssss.t..ts.....................................ss..tss.tssttstpstst..htp..................................ssshp.EphholhllsshhllRlYFshllhuFA...pplL+p.h...h........... 0 25 54 84 +8385 PF08553 VID27 VID27 cytoplasmic protein Mistry J, Wood V anon manual Family This is a family of fungal and plant proteins and contains many hypothetical proteins. VID27 is a cytoplasmic protein that plays a potential role in vacuolar protein degradation. 27.20 27.20 28.90 30.90 23.90 26.00 hmmbuild -o /dev/null HMM SEED 794 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.72 0.70 -13.23 0.70 -6.31 9 256 2009-09-11 00:42:47 2005-12-01 11:45:52 5 5 188 0 187 259 0 563.90 30 87.52 CHANGED M.hL+p....hhGsu.sppEllpIPuGpLYLlR..pSPKGspECIYpDAssoIR+Tu.-apYQLVVp+saEEGEsph.......tt.-s-Dsshs.........-DEhsFhlDcsLch+hphpctGctslsWcDlpGDpG.DhaEFVssss.lshspl-pFthTshcC.YEpKY++SspcAo-.--Lppacaps.hs............cscphsh--ph.ssshtphsss.scssps.s.htcutpp...t..............................ph.t..hhsttpu-LalYDshotpFlLQ..cssVslslh-sGc.acaWLtlcGt-p..Luhslss-hNPsFshpphSFlFNahssc.....shSWhL+FcDhsshscFppsaspslWEphNcpKWscs.csEpcYl.-Aaps.........hph-Dt.....ps..pE-E-E---E--spu.....ucpth-s-pa--ccsttt.pp...sssNppLAVGa+pDRSaVlRGsKIGVF+pss.sspLEFsTsIpplus.pGKhFsPcKhMLHtpD+phlLpDss.stspLY+MDLEhGKVV-EWcVpDcs...VssauPssKFAQMTsEQThlGlSpNulF+IDPRlSG.sKLV.-uphKpY.ASKNsFSuluTT-pGYlAVuSsKGDIRLFDRLGhNAKTtlPALG-PIlGlDVSADGRWlLATC+oYLLLlDshIK-G.KNtGpLGFp+SFst-uKPpP+RLplpPEHsAah.p.T.tcPlsFT.ApFNTGhsppEpoIVTSTGPYlloWShKclLpGc...psPYhIKRYsssVhADsF+FGoD+NVIVALccDVsMsp++shppPoRpslsss...........................shhNp.h ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.a....................sahl+F...s.........pF.tth.thha.t..t..ph.tp...p.p.Y.hhp.ht............................................................ht.pt..................pptttt..............p.tt.ttt.....................tpt.....p.p.p.......p........pp...t.........t............tthsp...p.LslGh..DpSallp..ss.t......ItV.a+p..........ps......t......l..p.....at........s.shs.plp.....s........pG...p.......h...s..PpKsh.....L.hp.t-pshlL.....s.t.....p......ss.....t.....l.....aphDlEhGKlVpEWch.p...c...c...hs...hhshssp.s...KhuQhs.s.p.p.TFlGlspNsla+hD.....sR.........l.....s.u......sp.....l...........l...t.s.........p....h............+.........p...Y...spps......s......Fs.sh.ATTtpGhlsVuSpcGcIRLa...............s.......+..............h...............u.....h..p......AKTtlP.u.LG.pPIhtlDVotDG+WlLuTscoYLlLlss.h...p.cs..csps.ph..GFp.pph.sts.ppstP+hLtLpPtcst.h...t...tps..l..pFs.ApF..s..T..t.....s...p....pE.ppIlsusG.ahlhWshcplhpup.....ptsYph....................................................................................................................................................................................................... 0 69 126 168 +8387 PF08555 DUF1754 Eukaryotic family of unknown function (DUF1754) Daub J, Mistry J, Wood V anon Pfam-B_10536 (release 18.0) Family This is a eukaryotic protein family of unknown function. 21.70 21.70 21.70 21.90 21.40 21.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.32 0.72 -10.62 0.72 -3.40 37 249 2009-01-15 18:05:59 2005-12-02 16:25:23 5 7 201 0 171 240 1 86.70 28 58.97 CHANGED c....Ysp.sssGpLKLKGst..h............+KKKKKccppspp....pctstpspppppppstcst.....................................tptt.pttsthshTcAE+pacchpcKRhpc .......tYps.ss..tGpLKLKGss..h..............hh+KKKKK..c+ccpc.p......tpt.htssp.p.pp.ppp.pttt....................................................................thT.AEt...tacchpcKR........................................ 0 56 89 131 +8389 PF08557 Lipid_DES Sphingolipid Delta4-desaturase (DES) Daub J, Mistry J, Wood V anon Pfam-B_9504 (release 18.0) Domain Sphingolipids are important membrane signalling molecules involved in many different cellular functions in eukaryotes. Sphingolipid delta 4-desaturase catalyses the formation of (E)-sphing-4-enine [1]. Some proteins in this family have bifunctional delta 4-desaturase/C-4-hydroxylase activity. Delta 4-desaturated sphingolipids may play a role in early signalling required for entry into meiotic and spermatid differentiation pathways during Drosophila spermatogenesis [1]. This small domain associates with FA_desaturase Pfam:PF00487 and appears to be specific to sphingolipid delta 4-desaturase. 20.30 20.30 20.30 21.60 20.10 20.10 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.02 0.72 -4.71 24 352 2009-01-15 18:05:59 2005-12-05 16:10:50 5 5 268 0 247 355 8 37.80 47 11.04 CHANGED sspsDFhWoho--PHssRR+tILpKa.PElKcLhGs-Pth ........csDF.WsYT-EP...HssRR+tILpKa.PEIKcLhGs-Ph...... 0 89 133 203 +8390 PF08558 TRF Telomere repeat binding factor (TRF) Daub J, Wood V anon Pfam-B_8956 (release 18.0) Domain Telomere repeat binding factor (TRF) family proteins are important for the regulation of telomere stability. The two related human TRF proteins hTRF1 and hTRF2 form homodimers and bind directly to telomeric TTAGGG repeats via the myb DNA binding domain Pfam:PF00249 at the carboxy terminus [1]. TRF1 is implicated in telomere length regulation and TRF2 in telomere protection [1]. Other telomere complex associated proteins are recruited through their interaction with either TRF1 or TRF2. The fission yeast protein Taz1p (telomere-associated in Schizosaccharomyces pombe) has similarity to both hTRF1 and hTRF2 and may perform the dual functions of TRF1 and TRF2 at fission yeast telomeres [2]. This domain is composed of multiple alpha helices [3] arranged in a solenoid conformation similar to TPR repeats. The fungal members have now also been found to carry two double strand telomeric repeat binding factors [4]. 25.00 25.00 27.10 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.23 0.70 -11.43 0.70 -5.03 27 262 2009-01-15 18:05:59 2005-12-05 16:13:17 5 4 151 11 139 253 0 216.50 25 37.59 CHANGED hhp..oL.shLDsl......................usQlLphltp....s...h...htp.ts....spspsFppltslFchhpcha.scsshl.s.pl..............hpsppthl.psl++sNhuphlssshss.c.luh...h.LsptFl-lFsspss......................................phh+spshLhlpLKTQAaIsulc..............tt.cppp-lLcclhssshps.h..hp.........l....sEhphh.+hcpR+cpLh....ppsshpsL.ppasatsFh+clhsalppphs.....hllhutptts.spt.ps .......................s.......hL-.h......................shpllphhtp....................................spspsFpph.pslhctlhphh.......spss.........................hpt+phhlhph..Lp+ls.uph.L...sspF..sspp..ls....hEss.thhchhpsEts.................................................hh.csppplhh.lKpQAhlsshc.............................................ptphcptp-lLc.clFscs.sp......................................pph+ppLl..lp.pcsshcslhppas..apphhcclhsalppphs.....hLhhttpthhpp...stp................................................. 0 15 46 92 +8391 PF08559 Cut8_C Cut8; Cut8 six-helix bundle Daub J, Wood V, Eberhardt R anon Wood V Family In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome [1]. Cut8 comprises three functional domains. An N-terminal lysine-rich segment (Pfam:PF14482) which binds to the proteasome when ubiquitinated, a central dimerisation domain (Pfam:PF14483) and a C-terminal six-helix bundle (this entry), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding [2]. Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 [1]. Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome [1]. In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 [1]. Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum [1]. 21.00 21.00 21.50 21.50 20.30 19.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.92 0.71 -4.36 19 160 2009-01-15 18:05:59 2005-12-05 16:15:06 5 5 156 3 123 156 0 140.60 30 43.46 CHANGED PolpsslplLpphhpplhsslP..YspsspS......DYAYhRl+tpLhphLssLsDas.paLPP.....pcsphptSLpFLDtATpll+pLPsa-otpaNhhKspsY-plupsWhlllpcuucctsshth..s.....................pcLpc+NppSss+.hppsl ...............................PolpsslphLpphppplhpulP..hu.psspu..................DYAYsRl+.pLsphlc.sLsDas...phLPP.....pp..p.phssSLpaLct.A.Tcl.l+pLPpW-stpaNhh+cpsa-plupsW.hhlIcc.uucc.tuthphp.sth...................................ppLtcaNppusGc.hpps...................................................... 0 30 60 103 +8392 PF08560 DUF1757 Protein of unknown function (DUF1757) Bateman A anon Bateman A Family This family of proteins are about 150 amino acids in length and have no known function. 21.00 21.00 21.80 21.50 20.70 20.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.94 0.71 -4.53 4 53 2009-01-15 18:05:59 2005-12-13 12:59:48 5 4 28 0 49 49 0 149.10 24 75.66 CHANGED pWF+NhhGhp...lo-pEhtsIPpPcsEhslHlTh+ohQAhuLlGullsuPlsphlpu.+.Nhpthpssusphu+hG........hlhGlVsuPhLsYhch+stshsp.uLaDRCYRLRhNpsplRhDRhuhlushsG..........hhpsuhhGhVsGssluhsYs.l.Sshhs .............................................................lPpPthphslHssh+uhpsuull...............Gu.ll.sPl..hhh.hpp..pp....s..p.thh..s........shspsupsG........slsGhshGPh..lohhch+s..hschchhDRsYRLRhNpspLphDRhslhuuslG....................hh.ss.hGhVsGlsluhhhs........h.h..................................... 1 23 30 49 +8393 PF08561 Ribosomal_L37 Mitochondrial ribosomal protein L37 Bateman A anon Bateman A Family This family includes yeast MRPL37 a mitochondrial ribosomal protein [1]. 19.60 19.60 22.90 20.20 17.50 17.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.76 0.72 -4.41 23 304 2009-01-15 18:05:59 2005-12-13 13:51:40 5 2 258 0 232 295 1 89.50 29 61.01 CHANGED spsssSus.suThlsslNlhKs.GpDPshL.DSEYP-WLWpl.LDssspstcttcss.............................................................................p.t++hhRptp+ppI+psNhLsph ....................................s....hsss.hsThltGlNhhKs.spDshhh.DsEYP-WL....Wpl...hss.t....thtcttcss...................................................................................htth+chhRttp+pp.I+psNhLpt.................................................................................. 1 75 126 188 +8394 PF08562 Crisp Crisp Mistry J anon manual Domain This domain is found on Crisp proteins which contain Pfam:PF00188 and has been termed the Crisp domain. It is found in the mammalian reproductive tract and the venom of reptiles, and has been shown to regulate ryanodine receptor Ca2+ signalling [1]. It contains 10 conserved cysteines which are all involved in disulphide bonds and is structurally related to the ion channel inhibitor toxins BgK and ShK [1]. 20.10 20.10 23.10 29.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.85 0.72 -3.69 26 238 2012-10-02 17:51:16 2006-01-03 16:31:09 5 2 114 25 72 235 0 54.30 49 22.83 CHANGED CuDCPssC-NGLCTNsCpapDtaoNCsoLtpphuCpp.phlKspC.AoChCcscIh CusCPssC.-.s...G.LCTNPCpapDtaoNCcsLtp...p...h...uCpc.....ph.....l.....+ppCtAoChCpscIh.... 0 7 10 37 +8395 PF08563 P53_TAD P53 transactivation motif Finn RD anon Pfam-B_3515 (release 19.0) Motif The binding of the p53 transactivation domain by regulatory proteins regulates p53 transcription activation. This motif is comprised of a single amphipathic alpha helix and contains a highly conserved sequence [1-2]. 20.00 20.00 20.00 22.30 19.40 19.30 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.69 0.72 -7.08 0.72 -4.85 12 143 2009-01-15 18:05:59 2006-01-03 16:48:47 6 6 66 16 32 159 0 24.40 62 6.48 CHANGED spptshs.PLSQ-TFp-LWphLsts .....S-.ulEPPLSQETFSDLW+LLPEs.... 0 3 4 6 +8396 PF08564 CDC37_C Cdc37; Cdc37 C terminal domain Bateman A, Mistry J, Wood V anon Pfam-B_3345 (release 6.5) Domain Cdc37 is a protein required for the activity of numerous eukaryotic protein kinases. This domains corresponds to the C terminal domain whose function is unclear. It is found C terminal to the Hsp90 chaperone (Heat shocked protein 90) binding domain Pfam:PF08565 and the N terminal kinase binding domain of Cdc37 Pfam:PF03234 [2]. 20.80 20.80 21.00 22.10 20.70 20.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.30 0.72 -4.16 31 299 2009-09-11 10:45:19 2006-01-04 11:51:26 5 10 236 1 188 289 3 91.80 29 21.44 CHANGED h.h..Ps...............uhDP...........clF-.oLPs-hQcsh-occl-tlpcslucMss-EActhhcphs-uGlhs.puth.......sptphc-.ptp.....tthpptptppttsp ......................hPu...uhDP...........-lF-.oLPt-hQcsh-opsl-hlpcslucMss--Achhhc+hs-uGlhs.psth.................psppphp-.ttt.....................stts............................. 1 59 99 155 +8397 PF08565 CDC37_M Cdc37; Cdc37 Hsp90 binding domain Bateman A, Mistry J, Wood V anon Pfam-B_3345 (release 6.5) Domain Cdc37 is a molecular chaperone required for the activity of numerous eukaryotic protein kinases. This domains corresponds to the Hsp90 chaperone (Heat shocked protein 90) binding domain of Cdc37 [2]. It is found between the N terminal Cdc37 domain Pfam:PF03234, which is predominantly involved in kinase binding, and the C terminal domain of Cdc37 Pfam:PF08564 whose function is unclear. 25.00 25.00 26.30 25.10 22.80 24.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.05 0.71 -4.78 19 361 2009-01-15 18:05:59 2006-01-04 12:01:11 6 12 244 3 220 346 3 181.10 27 44.13 CHANGED NKsptt........t.st.ts.pphpphsppsEpl...................................ccFuplp.scaccoppFLh-HspllsEpptctLlhpAFchphctccchhpplsHQullhQYlhp...Lu+..phss...+sslp.FFpKlts..sc..htptFpc-Vpshhp+l+sRupth....hcEpppps .................................................................................................................................................pt.tt.........................tt..tp.ph.s.....................................................t..p...............p.ht........h.+....cFGhlp..cac-SpcFLp-H.s.p.L.l.s..E.c..s..ss.hLllhshchph....E.p..............Kp.s....hhcplsHQsllhQalh-....L.ucp.phss.......Rsslp.FFpK....lps...sct.hhcsFps-lpshhpR.l+tRAp.h.......hp-.t...t........................ 0 65 105 170 +8398 PF08566 Pam17 Mitochondrial import protein Pam17 Mistry J, Wood V anon manual Family The presequence translocase-associated motor (PAM) drives the completion of preprotein translocation into the mitochondrial matrix. The Pam17 subunit is required for formation of a stable complex between cochaperones Pam16 and Pam18 and promotes the association of Pam16-Pam18 with the presequence translocase [1]. Mitochondria lacking Pam17 are selectively impaired in the import of matrix proteins [1]. 20.50 20.50 21.30 47.60 19.60 19.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.97 0.71 -4.57 24 137 2012-10-02 17:14:55 2006-01-05 11:20:55 5 2 133 0 108 136 0 173.70 40 77.72 CHANGED hsp........sshpssSssssssssss..............LsWssFFpLR+p...cRRhshsoSlhTulsusshuhsaLushplD.....sp.IhGlDPhhllGhushusuulGaLhGPhlGsslFpLhp....RpthtthphK-p-FhpRI++pRVDPSupShuNPVPDYYGEKIuSlpsYRQWLRDppAapRKsc.pF....l ............................thtstts.........putst..ssstttss.............LsWssFFpLR+p...+RRhslsuSlhsulhu...sssussh....Lu...sh..phD.......st.lhGhDPhhllGhushusuulGaLlGPh.lGsslapLhp....R.phhsthsh......K-+EFapRI++pRVD..P..S.upShuNP..VPDYYGEKIuSlpsYRQWL+DQ.+AasRKtppFl................. 0 34 62 93 +8399 PF08567 TFIIH_BTF_p62_N TFIIH p62 subunit, N-terminal domain Finn RD anon Pfam-B_31040 (release 19.0) Domain The N-terminal domain of the TFIIH basal transcription factor complex p62 subunit (BTF2-p62) forms an interaction with the 3' endonuclease XPG, which is essential for activity. The 3' endonuclease XPG is a major component of the nucleotide excision repair machinery. The structure of the N-terminal domain reveals that it adopts a pleckstrin homology (PH) fold [1,2]. 21.10 21.10 21.20 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.35 0.72 -4.33 19 254 2009-01-15 18:05:59 2006-01-05 11:46:22 6 6 219 7 178 238 0 74.10 34 13.13 CHANGED h.hpGtssaK.....Kss......GhLplspDpp.ltWhspussusp.sloltlspIssLQtoPtsusKlhL+llh+ss.......pshphhFss .........h...stpspaK.....Kp-......GsLhlhs-+..lsWssc......upcpss....lohhhucIps..........phoPcu.psKl.Lpllh+ss.........sspsahF..................... 0 52 87 143 +8400 PF08568 Kinetochor_Ybp2 DUF1760; Uncharacterised protein family, YAP/Alf4/glomulin Mistry J, Wood V, Lonsdale D anon manual Family This entry contains a number of protein families with apparently unrelated functions. These include the YAP binding proteins of yeasts. These are stress response and redox homeostasis proteins, induced by hydrogen peroxide or induced in response to alkylating agent methyl methanesulphonate (MMS) [1,2]. The family includes Aberrant root formation protein 4 (Alf4) of Arabidopsis thaliana (Mouse-ear cress), which is required for the initiation of lateral roots independent from auxin signalling. It may also function in maintaining the pericycle in the mitotically competent state needed for lateral root formation [3] [14731255]. The family includes glomulin (FAP68), which is essential for normal development of the vasculature and may represent a naturally occurring ligand of the immunophilins FKBP59 and FKBP12 [4,5]. 21.40 21.40 21.60 22.10 20.90 21.30 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.13 0.70 -13.07 0.70 -6.28 29 316 2009-12-17 14:51:14 2006-01-05 11:55:57 5 8 213 0 210 288 0 458.30 17 85.82 CHANGED M.............................................................htct.s......shhpt...spDhloalThl-hh..................p.p.thLstLhplL.p.-..pslsppIGWDLsphLl.hlstsp.................cChpplAchGNP+EllLpssEhLppLps....................................tDps..pp...................................ttp.t..hpl.ph.s...LlphlsshhpRIpTphPS+FLuhulpulhphhpp.s.....ppshs.hpFl.....+plhshpRsh.sst.sp.................ss-hsscs.s.hhssEssl.++lLpthhhphlpphlh..........shphshch.....sth...h.sttpppphhs.pp.ht.h..lhu+hstLu.....hDlsLps.h....h............ppl.....t.s.pspsh....s...-cs.hphc..h.h............cIslshpGsLlLhstthh.............pssp.........hsDhlhlhlchs........sshhpspulpDshhhhshhshpssp...........t.h....spthhhsalpsLhhhsspps...th+ttshplhsplLphtPpps.pachI+csLcsssat....slKsshluhLKc.lhpsppssptt.....................................................ptslhlsscphsslhsllhtshpthh..............hhp...p.hshlhuhLNlhhhlhp.........p.s..............hhhpchhcslcshlpthcschttp....................................stpph.ph....ssp.hLpps ......................................................................................................................................................................................................................................................................................t......t-...h...h.h.......................thh..l.phl.p..p.....hhtphGWsL.t.ll.h..h..pp...................thhp.lst..ssP+EhllthhEhltp.p........................................................................................................t..h.h........Lhp.lthhh.+l..t..shhstp.lt.hslpslhp.ht..s.......tt.t.........hh...............ptl.t..hps.....................................................................p...p....t...........t..-.....tp.l..hhh..lt..h....................................................................h.ht.h.th.......s..h...............................................p......t.....................th.....s.hhh.h.................................hh.hh.phh...........t........sh.......h..hhh...tp............................t................hh..thhphlhhhhhhp......s....th...Rph.sh..tlhphhlphhsp.ps.+aphhtph.lp.ss..h.....shps.hlthlKp.h..s.tts...........................................................hh.hss.th.tlh.hhh....pt..........................p...t...s.h....lhsh...LNh.hhhLh...................................p.p....................t.........th.pphhp.lp.hhp...tph...............................................................t.............................................................................................................. 0 53 106 167 +8401 PF08569 Mo25 Mo25-like Finn RD anon Pfam-B_5502 (release 18.0) Family Mo25-like proteins are involved in both polarised growth and cytokinesis. In fission yeast Mo25 is localised alternately to the spindle pole body and to the site cell division in a cell cycle dependent manner [1,2]. 20.50 20.50 20.50 22.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.89 0.70 -5.41 23 518 2012-10-11 20:01:01 2006-01-05 12:11:16 6 8 288 6 335 448 8 289.20 41 92.36 CHANGED MsFhF...p+.sp.KoPs-lV+slp-tlhtLt..............tscstcKs.-EluKpLsshKphlhGss-s-PssEp...lspLspEhhpp.-llh.LlpsLtpL-FE.uRKDlshlasplLRpphss...........ps.Ps......V-Ylspp.sclLshLlcuY-.......ss-luLssGshLREsl............................+a-sls+llLh...ss.......................pFap.....FFcalphssF-IuoDuasTh+-LLTpH+plVucaL...psNa-cFhs.phspLlpSsNYVT+RQSlKLLu-llL-RsNhplMspYlsss-NLKLhMpLLpDcS+NlQhEAFHlFKVFVANPsKspPlhcILl+N+-KLlcFLpsFpsD+h.cDcpFhDEKpall+pIppL.p .................................................................................F...tp...p...+sPs-lV+th+-.l.h.L....................................................ts.pptpct.--luKpL.thKtlLhGs..................s..........-t...-.....P.s-t................lsQLspEhh.pp..slLhh..Llt..sL.hlsFE......u+K..DssplFspllRp..phss.......................c..ss.......l-Yl......sp..cll.hLhpGY-...............s.-....hALpsG.hLREsl...............................+a.-slA+.hlL....sp..............................................................pFhp.....FF.c.a.lp.....hssF-lAoDAF...sTF+................-LLT.+..........HK.........lsucaL...............ppNa-h.FFp...h...p.pLl.pS....p.N.YV..T+RQSlKLLGElLLDRpNhslMs+Ylsps-NLKlhMsLL+..D.cS+s.IQhEA..FHVFK.....VFVAN..P......pKs.s.lhpILlpNppKLlcFLtpF..........tt-+..........-D-QFt-E..Kthll+pIppL........................................ 0 107 186 274 +8402 PF08570 DUF1761 Protein of unknown function (DUF1761) Wood V, Finn RD, Bateman A anon Pfam-B_85869 (release 19.0) Family Family of conserved fungal and bacterial membrane proteins with unknown function. 26.00 26.00 26.30 26.80 25.20 25.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.71 0.71 -4.12 81 254 2009-01-15 18:05:59 2006-01-05 13:12:42 5 3 233 0 120 231 54 127.80 21 85.78 CHANGED hlAlllAslsuallGslWYush..FGcsW...............hcstuhspcp..hcstts......shshshlsshlhuhhluhhhshhs.................................htohtsuhhhuhhlhls....hhsshhssshhapt+shplh..hIsuGatllsh................slhuhllsh ..........................................ulhluslhsahhuhhaassl..Fucsa...............hcutuhsspc......h+ptphh......shshshlsshltuhslutl.lsths.................................shohtsuhhhGhl....lhhu....h.ssthhsphha.....E..pRshp..hh..hlsuuapllth................lhlulhls......... 0 51 81 104 +8403 PF08571 Yos1 Yos1-like Wood V, Finn RD anon Pfam-B_23321 (release 19.0) Family In yeast, Yos1 is a subunit of the Yip1p-Yif1p complex and is required for transport between the endoplasmic reticulum and the Golgi complex. Yos1 appears to be conserved in eukaryotes[1]. 19.30 19.30 20.60 20.00 18.60 17.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.59 0.72 -3.88 20 281 2009-01-15 18:05:59 2006-01-05 13:31:28 5 6 222 0 190 258 3 76.20 43 59.59 CHANGED lhsLl.ulLLhlNAlAILsE-RFLu+lGW.usossps.u...............................sssolKspllsLIpuVRTlMRlPLIslNlllIlacLlLG ..................................................LhsLlpusLLhlNAlAlLsE............-............RFLs+.lGW..ut.sp.s.t.s..G..........................t..............ppsolKupllsLItulRTlhR..........l..........P.LIhlNhlhIlhtLlhG............................ 0 51 99 153 +8404 PF08572 PRP3 pre-mRNA processing factor 3 (PRP3) Mistry J, Wood V anon Pfam-B_7232 (release 18.0) Domain Pre-mRNA processing factor 3 (PRP3) is a U4/U6-associated splicing factor. The human PRP3 has been implicated in autosomal retinitis pigmentosa [2]. 31.40 31.40 36.40 33.40 21.70 28.30 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.55 0.70 -4.97 21 355 2009-01-15 18:05:59 2006-01-05 13:48:31 5 10 293 0 250 343 1 212.70 39 38.51 CHANGED ssPYhsspht..............tphpp+ppcshpFhctG+a.ppApchRpcsphE......chcpchtpt...scpsslpppp............tcph.ttphs.......................PslEWWDtshlsps..........shsslss-ts....t...........................p.ssIst....alpHPlslcsPh-...h.sss...slaLTKKEpKKlRRppRttppcE+p-+......I+LGLcPsP................PKVKlSNLM+VLss-A.lpDPTphEtcVRcphtERpppH.ccNppRKLTs .................................................................................................................tsahDsphs..............htsttRpp+shpF.pppGKa.p.A.pph...RppsplE...................chptclspt...........s+c.s.Glppst...........................ths.hs.h.t.p.h..................................................................P.p.l.EWWDphllst.............................shpsl.sp..t..p..h...p............................................................................................p.psIT......hlpHPs.lpPP.t-.......hss...hshaLTpKEpKKlRRQp..Rttt.KEpQ-K.......IRLGL.PsP.............................PK.V+lSNLMRVLGs-A..VpDPTtlEs+V+pphAcRpptHpctNttRKLT......................... 0 87 141 208 +8405 PF08573 SAE2 DNA repair protein endonuclease SAE2/CtIP C-terminus Mistry J, Wood V anon manual Family SAE2 is a protein involved in repairing meiotic and mitotic double-strand breaks in DNA. It has been shown to negatively regulate DNA damage checkpoint signalling [1][2]. SAE2 is homologous to the CtIP proteins in mammals and an homologous protein in plants. Crucial sequence motifs that are highly conserved are the CxxC and the RHR motifs in this C-terminal part of the protein [3]. It is now known to be an endonuclease. In budding yeast, genetic evidence suggests that the SAE2 protein is essential for the processing of hairpin DNA intermediates and meiotic double-strand breaks by Mre11/Rad50 complexes. SAE2 binds DNA and exhibits endonuclease activity on single-stranded DNA independently of Mre11/Rad50 complexes, but hairpin DNA structures are cleaved cooperatively in the presence of Mre11/Rad50 or Mre11/Rad50/Xrs2. Hairpin structures are not processed at the tip by SAE2 but rather at single-stranded DNA regions adjacent to the hairpin. The catalytic activities of SAE2 are important for its biological functions [6]. 22.90 22.90 23.00 22.90 22.30 22.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.37 0.72 -3.28 22 209 2009-01-15 18:05:59 2006-01-05 14:38:56 5 9 190 0 153 218 1 83.20 35 12.52 CHANGED sas-VlRp+c-Rcpl.pGs.p.sC..Csptapshut.t................................phh.lsphppcc+pchh.ptp........tp.hhpphu+HRapa.t+ssTPPGFWchDF.Ps .............................................................EVVRcKp-RcpL..Gp...pC..Ctthatshs............................................t..tc.ccp..c..hh.................sphSRHRapa..h.ssTP.sFWcssF.Ps.............................. 0 45 78 122 +8406 PF08574 DUF1762 Protein of unknown function (DUF1762) Mistry J, Wood V anon manual Family This is a family of proteins of unknown function.\ \ \ Swiss:Q07532 is known to interact with RNA polymerase II and deletion of this protein results in hypersensitivity to the K1 killer toxin [1]. 21.30 21.30 21.50 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.88 0.72 -3.56 16 232 2009-01-15 18:05:59 2006-01-05 14:41:35 5 4 208 0 167 219 1 74.30 30 20.15 CHANGED -sss-YVYDlYh.........hpps.--.pssp.s..........ppsIGalplh-csp-....hhp-----s....phhoDDEDSNsENaYpNDYP-DE .........................................ps-YVYDlYh...........................tp....th.....................pslsh..l.hhhpp.....p.p..p...........hhs---..sc........h.pD--DSN.........sEs.apNDYP---.......... 0 48 87 138 +8408 PF08576 DUF1764 Eukaryotic protein of unknown function (DUF1764) Mistry J, Wood V anon manual Family This is a family of eukaryotic proteins of unknown function. This family contains many hypothetical proteins. 21.80 21.80 24.20 22.20 21.40 21.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -11.11 0.72 -3.24 19 96 2009-01-15 18:05:59 2006-01-05 15:47:32 5 1 85 0 71 96 3 111.90 24 68.96 CHANGED pscK..............tstphslsshFsstptpppctp..t.........pspcspssp................ttp.p.phppsttRRcs.DGhhIaoh....EELp...hu..cu.GsTs..........pCPFDC-CCF .......................................................................................................................ttp.............t.tp.ttpl-plFsshpppppptptttpt.....................p.pt.t.pp..................ppppptpspp.tttp.....s..ps..sp...sRR+TpDGhsIYot....-ELs...hu.....p.u.GsTs...............CPFDCpCCF......... 0 41 56 66 +8409 PF08577 PI31_Prot_C PI31_Prot_Reg; PI31 proteasome regulator Wood V, Finn RD anon Manual Family PI31 is a cellular regulator of proteasome formation and of proteasome-mediated antigen processing [1]. 25.00 25.00 25.00 25.00 22.50 24.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.11 0.72 -3.33 26 230 2009-01-15 18:05:59 2006-01-05 15:52:27 6 3 199 0 156 220 0 74.20 35 24.24 CHANGED shGcsDL.P.................sGhs..sshps.hss....................t........GGMh.sss...c.PhFss.tp...stt...s...........ssssPPGARa.DPhGP ....................................hGppDL.P..................Ghs.........sshpsh..hss..s.......................stt.........GGMhssPp...c.shFsp..h..........ssp.......G.s.........tth.ssusPPGARa.DP.hGP.... 0 50 82 126 +8410 PF08578 DUF1765 Protein of unknown function (DUF1765) Wood V, Finn RD anon Manual Family This region represents a conserved region found in hypothetical proteins from fungi, mycetozoa and entamoebidae. 25.00 25.00 25.00 32.40 22.80 24.50 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.56 0.71 -4.07 31 180 2009-09-10 15:38:14 2006-01-05 15:55:44 5 6 133 0 142 183 0 130.40 24 11.91 CHANGED hhstllhshsp+TslYDtssshhlhshlpphl.thhst...p..........................h.shhDapFWLpslthhlp.ocpsho.h+slthlassWshhshs.c+cl.....................................lc..aLlppphah+hF.HWs.hVRshFh+LLlaRlh ...............................h.hsthlphhhp+TslaDtsushhlh-hlcchl..hhsphppp......................................hsshhDasFalsshchhlp.ocsshs.l+slsalassWshl..sts.ccch....................................hhc.hLLspphF.c.h.F..HWsshVRshah+LLsaRl................................................................. 0 62 94 129 +8411 PF08579 RPM2 Mitochondrial ribonuclease P subunit (RPM2) Mistry J, Wood V anon manual Family Ribonuclease P (RNase P) generates mature tRNA molecules by cleaving their 5' ends. RPM2 is a protein subunit of the yeast mitochondrial RNase P. It has the ability to act as transcriptional activator in the nucleus where it plays a role in defining the steady-state levels of mRNAs for some nucleus-encoded mitochondrial components [2]. 27.00 27.00 27.00 27.50 26.90 26.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.63 0.71 -3.97 6 46 2009-01-15 18:05:59 2006-01-05 16:12:12 6 4 43 0 28 49 0 122.10 36 10.25 CHANGED slht--sh.sWp...pDpEssLN....cpsaLpTpIcpIpssacp....csYNhINsLYQoLKRNsI.lPslclaspVLpSIscRcLDs.....ssI-sKhhpLLoCYQDIlsN+...lKPspEIYsIVlsoLL+GSlp .........................t..........hp...p-s-ssl.....cpsaLpTp.lspIspsapp....pchNhI.sLYQuLKRNsl.lPsl-lYshVLcSlscRplDs.....ssl-sKlspLLTsYQDl.l...ssp.......lKPscEhYNIVltuLhcGSl... 0 3 14 27 +8412 PF08580 KAR9 Yeast cortical protein KAR9 Mistry J, Wood V anon manual Family The KAR9 protein in Saccharomyces cerevisiae is a cytoskeletal protein required for karyogamy, correct positioning of the mitotic spindle and for orientation of cytoplasmic microtubules [1]. KAR9 localises at the shmoo tip in mating cells and at the tip of the growing bud in anaphase [1]. 20.40 20.40 23.20 21.80 19.80 19.70 hmmbuild -o /dev/null HMM SEED 683 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -13.22 0.70 -6.25 9 136 2009-01-15 18:05:59 2006-01-05 16:19:49 5 3 119 0 96 131 0 566.70 25 69.82 CHANGED p.ppclsplspllhsphs.l.t.....l.phhsstcshhsslhphh....-Wlh-u+sllhpL.............hpslcsI-psls+hhphl-s.hts.ps.p.pc-.hs..hhslh-csoplhs......pl+shlpslKchlDhAlEapEIhcshhssLspEl-pshphsh-lpEc+atSPh....+c.hssFsL-pllcph......usp.ssp....phph.PsF..............ss.-cplhpcalpL.csplsPlcsSL.-hLP.RlspFpsRs......p.hsshtp.Lpc+apsLhccYchLpsEhppL+pEll-c+WshlFpsLscElthhh-.sl.+.lp+lpss.ph..shphp.t.thscplpshptphp+ohslIhpAhp.S..Ilscu.lushhN..........chts+W.pL.........+...phh-plLpc.psps..t..............ss.spshpohop.....s.sspsl.sssspt......sspsSpp.shss.ptt+hussL.c+hshtPs.sp..s.t............................ssssssshhps..............psPhFs................ps....cp......t.................................s..t.............sphs..S.p+..hthshoplPslu.pps.hhps............................shpRsssptSp.....h.phtspl........s.s.............................sh.+Sltp..ttpht..tp.h.ssphPsh.hpt..s......psshlspstth.phsts.....sp.cct.....l+.P ................................hspplsplopl.hs....ltt.h.....lssh.ssh+.sh.s..sst.ph.....tWh.cupphltsL.............hcsl.cpl-puls+h.pLlps.htt.-php.+.-hsp.s..s.lhp.s.sphct......pl+thLpslKppl-lAhEapElhssllsslt.Eh-ph.phhhEhcEcRato.h..........tp.hsh.pLEpllcph.........................t.....t..tsp...................ph.ph...Phh...................st--p...phLsL.hu+hpPLcsSL.-hLP.hRls.FpsRs.....pphFsoup...ccLpp+ppt...L.ppa+tLpp-tcsL..+cELh-cRWsllFRshscpspphh-.ulE+sl.....tK....lpp..........s...p.........thphpt.......thscclcshpt....pps+hhssI..thhhu..llpcG.lts+hs.........t-htt+atsl.................p......t.hDth.L....pchpspp..............+c...slsp.ho...tps...tp.sh.tTsss.......sssS.pl.hhss.........hs..........t.......................p.....t.ps..p..s..s.s.......tt.pphsts..p+.s..hPt..s.s..ttp...........t........................................t.....t..............................t.p.s.hp........t.......t................................................................................h.............p..........s..hs......................p........................................................p......s..............t.ph..................s.s..h........................................................p.th..............................s......................t....................................th................................................................................................................................................................................................... 0 22 51 84 +8413 PF08581 Tup_N Tup N-terminal Wood V, Finn RD anon Pfam-B_9595 (release 19.0) Domain The N-terminal domain of the Tup protein has been shown to interact with the Ssn6 transcriptional co-repressor [1]. 22.40 22.40 22.40 22.70 22.30 22.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.67 0.72 -3.79 20 172 2009-01-15 18:05:59 2006-01-06 11:04:59 5 8 149 6 122 173 2 74.50 42 12.30 CHANGED RLsELLDulRpEF-shupcst.hp..ppc-YEt+..lspQlpEhphIRpoVY-LEtsHpKhKppYE-EIt+L+pELEsRstp ....................RLsELLDtlRpEF-s.sp........ppc-aEpp..lspQlpEMphIRppVYpLEpsahKhKppY...E-EIppL+pcLEsRsh............ 0 40 73 109 +8415 PF08583 Cmc1 UPF0287; Cytochrome c oxidase biogenesis protein Cmc1 like Mistry J, Wood V anon manual Family Cmc1 is a metallo-chaperone like protein which is known to localise to the inner mitochondrial membrane in Saccharomyces cerevisiae. It is essential for full expression of cytochrome c oxidase and respiration [1]. Cmc1 contains two Cx9C motifs and is able to bind copper(I). Cmc1 is thought to play a role in mitochondrial copper trafficking and transfer to cytochrome c oxidase [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.82 0.72 -4.20 52 511 2012-10-02 15:44:21 2006-01-06 11:43:59 5 6 275 0 351 549 2 68.80 21 58.80 CHANGED h+pthcpcsh.cpCpchlpsht-Cppp..phhpshhtCpcpppthppClpphppscth........ctpcschhpc.+hc ..................hh..hpp+tp.ppCsphlp............thpcCpps......p....hh..phhst.CpctppthppCLpttttppth....................pttctchhpp.+................................... 0 101 180 284 +8416 PF08584 Ribonuc_P_40 Ribonuclease P 40kDa (Rpp40) subunit Wood V, Finn RD anon Manual Family The tRNA processing enzyme ribonuclease P (RNase P) consists of an RNA molecule and at least eight protein subunits. Subunits hpop1, Rpp21, Rpp29, Rpp30, Rpp38, and Rpp40 (this entry) are involved in extensive, but weak, protein-protein interactions in the holoenzyme complex [1]. 20.20 20.20 20.70 20.50 19.80 19.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.65 0.70 -5.40 15 216 2009-01-15 18:05:59 2006-01-06 12:26:07 6 3 179 0 151 218 0 217.90 26 71.17 CHANGED LppLL-.....s-FhsoaIKp...........................GshhhlS.csR.ssDNshsLhs...GpLhLpLsK-sYEcsGLpGKPschuG++th..RalVclDL+.sShthGpKuFpRlhWuhKshLs.hsssaLahphsos..uhsp.tpssshLssa...........psplhpscssppphpslhsPsLspschsspspstt.hp-......up-LhEWLGhVuls...lsssD.cs-saLSpYssP.-sssth...schlslpWpGFlsPphlhpLltslp+hh.......................ts.s.ssWhuLospGFucsslp...........GcssYTllhhssp ....................................................................hlp.tFhpthlp..............................G.phhhlo..ps.....p.s.....ssshs.lhs...GhLhLpLsK-sYEph.G.L.G+......s.........t..t......s....cc....................+a.llplsL...h......sh....sp.+ta.pRl.huhcphhs.....t.....h.....shLht.h.ss..................s.h.tth....................................................t.p.hp...p.thp...htp.l....h........P....h.t............t.t.t..................pp.......................s.-hh-WLuhs..t.......lph...ss.p.ssalSpYp.sP......pssp.h.............sphhhh.....phpG......hl.sphl..lh.t..hhp.h..........................t.....Whs.lsspuhscsslt......................................................................................................................................................... 0 51 78 117 +8417 PF08585 DUF1767 Domain of unknown function (DUF1767) Wood V, Finn RD anon Manual Domain Eukaryotic domain of unknown function. This domain is found to the N-terminus of the nucleic acid binding domain. 20.40 20.40 20.40 20.60 20.30 19.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.18 0.72 -3.71 34 333 2009-01-15 18:05:59 2006-01-06 14:33:46 7 14 209 1 245 330 0 87.50 23 16.57 CHANGED sphLppht..ltlsspWLpphlst............s.sthspplhpphLssDl+-h..s...tssLPs..sltp..hpptpLp...........GshlLQlpplp-Iotshh.sphp .............................................htpht..h.l.s..p.alpthhs............................s.sthspplhpphLtsDLcsh......s........tss..LPs...sltp.......hpptpLs.......................GshlL....Qlppltslutsth.pp................. 0 80 130 199 +8418 PF08586 Rsc14 RSC complex, Rsc14/Ldb7 subunit Wood V, Finn RD anon Manual Family RSC is an ATP-dependent chromatin remodelling complex found in yeast. The RSC components Rsc7/Npl6 and Rsc14/Ldb7 interact physically and/or functionally with Rsc3, Rsc30, and Htl1 to form a module important for a broad range of RSC functions [1]. 25.00 25.00 89.30 84.80 19.60 18.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.28 0.72 -3.53 5 27 2009-09-10 21:44:18 2006-01-06 14:54:42 5 1 26 0 15 19 0 100.30 60 53.69 CHANGED huYYDVIuGLSuLE+ScpVoFospELpELT....cps--sRcsp-..ELp+s-pEcsKRVsVHGYLGG+Vuh+-AupAs.....Y-LsHTLLGGYVPRpQLESLSSsDFA .hGYYDVlAGLSALEKSsQVsFSssELQQLT.....QQscsscKuh-ssE..so+ucsoKsKRVsVHGYLGGKVoLuDAupsp.....Y-luHoLLGuYVPRpQLEuLSSlDFu 0 1 6 12 +8419 PF08587 UBA_2 Ubiquitin associated domain (UBA) Mistry J, Wood V anon Pfam-B_10238 (Release 18.0) Domain This is a UBA (ubiquitin associated) domain [1]. Ubiquitin is involved in intracellular proteolysis. 21.80 21.80 22.10 22.70 20.90 21.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.11 0.72 -3.77 7 138 2012-10-01 23:03:33 2006-01-06 15:47:57 6 5 132 2 94 130 0 44.70 45 6.48 CHANGED lD-sllptLSpTMGYs+D.-Ih-uLcp............sEs....NEI+DAYhLl+EN .....lccpllscLu+TM.GYs+-.-I.-ALcp............sEP......stIKDAYhll+EN.. 0 17 47 79 +8420 PF08588 DUF1769 Protein of unknown function (DUF1769) Wood V, Finn RD anon Manual Family Family of fungal protein with unknown function. 21.10 21.10 21.20 21.80 21.00 20.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.51 0.72 -4.02 16 120 2009-01-15 18:05:59 2006-01-06 16:05:57 5 4 103 0 99 124 0 55.50 48 14.49 CHANGED Gs-LlFGNDF-+PI+chlPsu.hssuh+lhp.aIDPslcGDlYuDc..PYLYuPuLuSh .....GsDLlFGNDFD+PIRDpLPsG.hssAh+ls+WaIDPuL-GDsY.AD+..PYLYuPuLuSa................ 0 34 60 86 +8421 PF08589 DUF1770 Fungal protein of unknown function (DUF1770) Wood V, Bateman A anon Wood V Family The function of this family is unknown. These proteins are rather dissimilar except for a single strongly conserved motif (PDLRFEQ). 25.00 25.00 26.20 32.40 24.40 24.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.66 0.72 -3.40 12 76 2009-01-15 18:05:59 2006-01-06 16:12:56 5 1 75 0 61 73 0 97.40 42 56.10 CHANGED ApTlQTAols+.c..............................PSPp+..................DlNssT............uAsc+p...sls...pps.s-.sDSls...........p-.s-....spp......hlpPhsRR..p.........phPPlPDLRFEQ......SYLsSIcuA-.o........................WtcVAaIT ......................................ApTlQoApIp..p..............................PSstH..................DlNPsT............AAsc+p........Plsh...t.....ps-...s-uls.................sDh.-....Ppp.........sl+.Ph.tR+..p.........pLPP...LPDLRFEQ......SYLsSIcsA-...o........................Wt+VAaIT................................................... 0 14 31 50 +8422 PF08590 DUF1771 Domain of unknown function (DUF1771) Mistry J, Wood V anon Pfam-B_10757 (release 18.0) Domain This domain is always found adjacent to Pfam:PF01713. 22.90 22.90 23.00 23.10 22.40 22.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.04 0.72 -3.96 30 501 2009-01-15 18:05:59 2006-01-06 16:21:49 5 28 240 1 355 500 1 65.70 29 10.99 CHANGED -Yp.....clRspAppthppRpchhpcuppAappGDt.....spA+pLSpcuKpttpphcchNcpAAptlacpsNp ..............................Yt.....chRppAtpptptRsphhp..........pAppAappGct.....ttA+tLSppG+tctpphcchscpAAc.tlacppN........ 0 112 203 290 +8423 PF08591 RNR_inhib Ribonucleotide reductase inhibitor Wood V, Bateman A anon Wood V Family This family includes S. pombe Spd1. Spd1p inhibits fission yeast RNR activity by interacting with the Cdc22p [1]. 20.10 20.10 20.30 20.70 19.30 20.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.55 0.72 -3.08 23 119 2009-01-15 18:05:59 2006-01-06 16:53:36 5 4 112 0 90 111 0 95.50 25 41.59 CHANGED uuLhoVGMRlRKSVsEGYKT...p..s..............t.s.h..................shtsstt.hssphEhsPFsuh.p.s.shhspsspssss..................sshpthsSup-uhtushs .....................ssLhoVGMRlRKuVs-GY+o....................................p..sh.................s.htshsshhssth.chhPhsu.h.phs.shhsptttsssp....................sshp..sssppt..t..t......................................................................... 0 16 44 75 +8424 PF08592 DUF1772 Domain of unknown function (DUF1772) Wood V, Finn RD anon Manual Domain This domain is of unknown function. 24.70 24.70 24.70 24.90 24.40 24.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.87 0.71 -4.44 75 579 2009-01-15 18:05:59 2006-01-06 17:09:07 6 7 370 0 326 606 107 134.50 19 80.16 CHANGED Gshhuaoshl............hPAlt......plsssptlpshpslspts....shhhsshhussshs.shlAh..............h.thts..sssshlhsuuulhllushshThhh.plPhNstL.............tthp...sstsssshh....tshhspWstaNtlRolhulhusslh.lhAh ............................................Ghhhshuhhh............hPult...................ph.ss..pt.h.l..t.th.p..t..h.pth.........shhhs.s...hhhsshhs.hhhuh..............................thtt....sss.h..h.hhsuuuhhh.lu.h..hshThhh.......sP.lN.st.L...................................................tthp.......ts.ss..s.ssth..........pphhpcWsthshlRshhs.hsuhshhhhu.......................................... 0 115 217 284 +8425 PF08593 MUG2_C DUF1773; Meiotically up-regulated glycoproteins C-terminal Wood V, Bateman A anon Pfam-B_26890 (release 18.0) Domain This is the C-terminal part of some meiotically up-regulated gene products from fission yeast. The actual function is not yet known but the proteins are likely to be cell-surface glycoproteins. 25.00 25.00 25.30 26.70 20.10 24.00 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.72 0.72 -3.73 17 50 2010-01-05 10:44:21 2006-01-06 17:21:54 5 3 40 0 44 50 0 56.50 29 13.04 CHANGED chtht+t.shp+Rp...........sthsssslP......F.sspps.pstLPslsolpDlstLSc.QhppaLpG ...................................aLau+shshs++p...........sttsssshP.........stps.pPhL..sshusIhslshLS..QlShFLsG. 0 11 27 39 +8426 PF08594 UPF0300 Uncharacterised protein family (UPF0300) Wood V, Bateman A anon Pfam-B_20198 (release 18.0) Family This family of proteins appear to be specific to S. pombe. 25.00 25.00 126.50 125.80 22.20 21.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.32 0.70 -5.41 6 9 2009-01-15 18:05:59 2006-01-06 17:25:57 5 1 2 0 9 11 0 209.40 25 43.02 CHANGED hhLpsppssaccLspaLpsGphPLpVLlHHVMLYcaYPsshQ-ALWsAVppYVpppVssstYTplHhhAspp+IGcIRMYLVcPcDIYshsssssWlsIsocsFpshlcLcpshptsslhpspsthpp..lhps.spSspEluWLhhhhulGSsuutFPlHAYLshKpplhpshlPcpl...hhhpcsDptlFpc.pshcchpsa.hpplhpDLshC-pap .....h....t..pshpplh.aLpssphPlpVLlHHVMLYchYPptlp-uLWpAVppYlpcpsssttYoclHhhAAp++lGcIRhYLlcPcDlaslsssssWlsIsscpFps.lcLcpsh.spslhpppsthpp..lhph.ssospEluWLthlhuhGssupuFPlHsYLsspppl.tphhPpsh...hahpppDphlFps.tshc.hpsa.hpphhp-L..C-p........................ 0 6 6 9 +8427 PF08595 RXT2_N RXT2-like, N-terminal Wood V, Finn RD anon Manual Family The family represents the N-terminal region of RXT2-like proteins. In S. cerevisiae, RXT2 has been demonstrated to be involved in conjugation with cellular fusion (mating) and invasive growth [1]. A high throughput localisation study has localised RXT2 to the nucleus [2]. 21.10 21.10 21.50 23.40 20.60 21.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.92 0.71 -4.41 17 118 2009-09-10 20:49:02 2006-01-09 16:37:06 6 2 111 0 90 121 0 136.40 32 32.14 CHANGED soNRGsKLhppuc.l..ttl..ssssshhpchlpYs..Ghp+tlLp.....................pttsphcp-ts-h-stpp---.-pctp.t.....sPhpplclcElLuPLspsu-lhsHPulS+sapSpsLpcLAhphlthlcpEQsslhphp+LLplhlGD .......................s.oNRGNKLptpu..chVppttL..ssstshhcctl-as....Ghp+..p..lLp...........................................pssshhDp-ss-l.D.....--.....-p...pc...spt.tst-.-sPasplplc........clLuPLppso-ls.sHPslS+sa...pScsLppLspphhthlcpEptsLhphppLhphhlGD................... 0 18 46 75 +8428 PF08596 Lgl_C Lethal giant larvae(Lgl) like, C-terminal Wood V, Finn RD anon Manual Family The Lethal giant larvae (Lgl) tumour suppressor family is conserved from yeast to mammals. The Lgl family functions in cell polarity, at least in part, by regulating SNARE-mediated membrane delivery events at the cell surface [1]. The N-terminal half of Lgl members contains WD40 repeats (see Pfam:PF00400), while the C-terminal half appears specific to the family [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.11 0.70 -6.00 13 313 2012-10-05 17:30:43 2006-01-10 12:04:56 5 10 193 1 188 340 0 278.70 24 27.85 CHANGED slslsplShAs-otELuVuhpoG-VllaKFps..............Nphas...ptpssuh-hp.tphphssp.sstLlDIpcRusssl+pG..FhPhollphp.pGploulppSNIGFVAlGYcsGollllDh.RGPAlIap.-slpc..........lsstpouhsss......lEFuIMphssDsYSSIlllsGTst.GpLhTFKIlPsusGtFsVpFssss...........hsscupIlpIssl......ss-sGpSAhAohsthQ.....sLupGlhlsGhVlsooss-IRllpsspoKssHKsa.chsltssuhuhlsh.t....cppuhlLlsLhtsGpl+sholPsL+EltshplP........hslcuphlppSsl.LtsGD.lhhpsG.pEusLholhsppupt......................tppsspDpLaNssttIPsRPp...lssLQWs+G.otYsosp-LDhLlGGssR.PsSK .....................................................................................................................................p...........................................................................................................................................................................................................................................................................................................hhhspt..u.hhh..h.h...............t.......h........................................t....h..h......................................................t.p..ptht.......pt..h..sll....so..ccph+l.hshs.s.pp....s..sa.Kth.....cs..shhspu..ssV..h.............hpsuhsLsshh.usGc.lhshSlPu.L+.lhcsphh........shh.chchs.p...shsh..ossGp.slhhs.uPo.....E......lphl......oh.tpst............................................................................................................................................. 0 45 83 135 +8429 PF08597 eIF3_subunit Translation initiation factor eIF3 subunit Mistry J, Wood V anon manual Family This is a family of proteins which are subunits of the eukaryotic translation initiation factor 3 (eIF3). In yeast it is called Hcr1. The Saccharomyces cerevisiae protein Swiss:Q05775 has been shown to be required for processing of 20S pre-rRNA and binds to 18S rRNA and eIF3 subunits Rpg1p and Prt1p [1][2]. 27.20 27.20 28.10 27.50 27.10 27.10 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.88 0.70 -4.81 40 373 2009-01-15 18:05:59 2006-01-10 13:30:36 5 9 290 5 256 351 2 229.00 27 92.17 CHANGED M....ssW.D........---h-sssssst.......ssts+W--E...s---slh-SW-..t---pct..cppcsp.tsscspsKtphctchpccp+tpchppccthc............p.sst-phscKtRh++hpc-uDLpsAt-LFG.......................................htssshsslshF.pPpoKp-FpchpcsLspp.lssh..c.pshpYss.ahp-LlRslstsLssssl+KlsooLssLhsEK.KtEKtuc...............sspKKputsts+sslssttcp.stasthssshhDD.....DDFM .......................................................................pW..-........tpt.p....................hppa-sE......--psl......h-.s.....W-.............tp--pc..........pp........t...p......st.......t....st.......h.p.tKt..th.tt...pht....c+pptpch....tpcph.t....................p.ssp-phsc+h+hc+hpc-uDL.ptAp-.hFG.........................................................t...ssshpslshh.pP.po+p-Fpchsch...Lssp.lsth...pcsh.pYss..alcslh+plsts.....L...p.ss-.......l.KKlsso.LsslssEK.K.pEKtuc.................tttK.K.pst...st.........spsph.s....s...pp....sh....s.s.h.sth....t..s..s.....hh.-D.........-D.FM.................... 0 93 144 207 +8430 PF08598 Sds3 Sds3-like Wood V, Finn RD anon Manual Family Repression of gene transcription is mediated by histone deacetylases containing repressor-co-repressor complexes, which are recruited to promoters of target genes via interactions with sequence-specific transcription factors. The co-repressor complex contains a core of at least seven proteins [1].\ This family represents the conserved region found in Sds3, Dep1 and BRMS1-homologue p40 proteins. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.38 0.71 -4.79 54 585 2009-01-15 18:05:59 2006-01-10 13:48:35 6 6 246 2 375 547 0 209.60 23 51.78 CHANGED ppshsclsplEppFsph+-ph................Yc-+LspLppcLp.lt..p.........................................Gsps-ahchhpclpcp+chclphsphhpcaplpslcpchps-hptscpcappphppl+-plhsclpp+ht+.lpc-RpphDls........................ssshshph...................................................................hps+phpct...sp.h................................................................................................................p+p+hsptts.htt...hh.........................l................................................................................................hpsp-lt-Dhpsh ..............................................h.pph.plEcpFsp.....h+-ph.................Yc-+LspL.ppcLpplt....p.................................................................sptsEYhc.lppLppphch+l.....phs.t.h.hp..php.lpslcpcattEhptuppcacppthtl+-plhsclpc+...hpc.l...pc-.+p.sh-ls.................ssphthp.......................................................................................................ht.pR.hpp.t...........................................................................................................................................ppp+hs..st............h....................................................h....................................................................................................................................................................Lp..-l.pDht......................................................................................................................................................................................................... 0 102 177 287 +8431 PF08599 Nbs1_C Nbs1_N; DNA damage repair protein Nbs1 Mistry J anon manual Family This C terminal region of the DNA damage repair protein Nbs1 has been identified to be necessary for the binding of Mre11 and Tel1 [1][2]. 25.00 25.00 25.30 40.60 24.40 23.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.04 0.72 -4.06 5 81 2009-01-15 18:05:59 2006-01-10 13:57:02 5 6 52 0 33 71 0 64.30 74 8.88 CHANGED pKNFK+FRKVsYPGAGuLPcIIGGSDLlAHsR+KNSELEEWLRQElEEQsQpsREESLADDLFRY L.KNFKKFKKVsYPGA.G..K.LPHIIGGSDLIAHHARKNo.ELEEWLRQEMEVQsQ+AKEESLADDLFRY............ 0 4 6 14 +8432 PF08600 Rsm1 Rsm1-like Mistry J, Wood V anon manual Domain Rsm1 is a protein involved in mRNA export from the nucleus [1] 20.90 20.90 20.90 20.90 20.50 20.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.50 0.72 -4.13 11 189 2012-10-01 20:49:39 2006-01-10 16:15:18 5 7 140 0 130 193 0 89.60 23 20.42 CHANGED AhhLAhhGW.............suhscuphG.....LssCssCaRRLGLWMaKsKpsuths........hstLDllpEHh-aCPWhsspuQossucssttptt.t.....uGWclLspsL..........K ....................AhhLulhGW...................................p.shts...phu.........lhsCptChR+lGLWha...pthcsu.ss...............................s.s.hsshppHhpaCPahs.....p...st.p.....tp...................................................................................................... 1 33 53 88 +8433 PF08601 PAP1 Transcription factor PAP1 Mistry J, Wood V anon Pfam-B_20528 (release 18.0) Family The transcription factor Pap1 regulates antioxidant-gene transcription in response to H2O2 [1]. This region is cysteine rich. Alkylation of cysteine residues following treatment with a cysteine alkylating agent can mask the accessibility of the nuclear exporter Crm1, triggering nuclear accumulation and Pap1 dependent transcriptional expression [2]. 22.70 22.70 22.80 22.90 22.60 22.60 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.50 0.70 -4.76 10 181 2009-01-15 18:05:59 2006-01-10 17:16:00 5 6 130 2 118 186 0 264.60 22 57.66 CHANGED sS.........htss.pt.hshsstpFsh-hscFsucLstususcssslsK...........tpssstsos.........lsulpspssps.s.sssssss.sosp.ssts.sspp................sshusD..h.tsstsshps.u........sssssoossoPSsuusuasspp.ossusss.P..spSPsshtpsss...............................................hhhp.t.s.s.s.sttssssshshlDssls....sss.h-s.LFtsaREPQcsshsp.DaD.......................................ssLhs-p.shs-.hsPht..sp....s.......supssssppstssphcs...c-s--sVssuccsshLsCocIWDRIoSHPKa..u-IDIDuLCSEL+sKAKCSEsGVVlNpcDVcssLs+a .......................................................................................ttttst..........................................................................................................................................................................................................................................................t.ss.......t......t...st.t........t..t.t...............pP...........................................................................................................................................................t.............hs....ht..........s..t..s..h.t.hppsppt.h.t..s...h................................................................sshhsct.....sc.ht....s......t..........t.................................t............sst.s..h.ht...p...hpt........pt......tpp.lsst..ttp.hpCscIW-+lp........s.hs+a..u-lDlDsLCsELptKA+CS-tG.lVlpppDlpphlt+.............................................................................................................................................. 0 24 58 100 +8434 PF08602 Mgr1 Mgr1-like, i-AAA protease complex subunit Wood V, Finn RD anon Manual Family The S. cerevisiae Mgr1 protein has been shown to be required for mitochondrial viability in yeast lacking mitochondrial DNA. It is a mitochondrial inner membrane protein, which interacts with Yme1 and is a new subunit of the i-AAA protease complex [1]. 25.00 25.00 62.50 26.00 19.50 22.90 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.40 0.70 -5.44 6 63 2009-01-15 18:05:59 2006-01-10 17:48:11 5 3 43 0 40 59 0 252.20 28 91.40 CHANGED MulaTPPupstsspD+.................SuspucsotslssstpFasR.PSLGL+LWGPLVPASDNhsGLaoLlulQoslGhhhhpRhRpL.........................................t..hlK+DIADFPoLNRFSpTpGDhah..............uPh........................hpcRFso.....h++sLallsGSlLLsQShLEhsRLThLhYDPWh-EAKsVR-KpFaNslV+aYHEGlDss+hhsKD.hsGp.hshNlPEV+QulAlsRApscApN.lopWFGPl-.YKPMoFsEaLDKlEaaL-hh-a......................hQshpptpcsthshlsplsscsptL-hh.ctNcpN++Rhc+lLcpp.p..s-lscu.uhslhhcupsp.hh...hhRulIL.tDpcsspDl-LcplWslY-PWssLAL-TuLSIKFlPos ...................ahPPst..s.s..tst.................................st.tstt.......p......phh.+.PSlGL.hWGPLsPAsDNh.uLashhshQ.hlGhhhhhhhRth................................................h.p.l..............s...........................................................................................................................................h.......h...h.hhhGshlh.tShLEhsRh..L.YDPWh-Ehp.hR..................................................+hthcsts...hWaGshp.YpPMshppahp+hp.alp.ht...................................h.pht....t..t....h.......hpplp..........pp.p....p.h.ph..ppspphh.p.l...........................p.s.t.ths...........ttp.............+.s.l....shp..p-.php.hh..apPW.pLt.-TphsI+hlPp.................... 0 6 22 38 +8435 PF08603 CAP_C CAP; Adenylate cyclase associated (CAP) C terminal Finn RD, Bateman A anon Prosite Family \N 23.80 23.80 23.80 24.00 23.70 23.50 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.87 0.71 -4.89 35 461 2012-10-02 17:34:32 2006-01-11 12:58:49 6 11 296 10 293 478 8 154.60 41 33.01 CHANGED +cPs....phEL.cGpKWhlEp.cssp....slllp-s-hcpsVaIapCpsoslpIKG.KlNulolssCcKsullhDslVSul-llsspshplQVhGplPTIoID+oDGsplYLS+-Sl..ss.EIhouKSSplNl.l..Ps....c-sDa.pEhslPEQacoph..cs......uKLhTpss-ps ......................................tPshhEL..-GpKWhlEppcsps......slllp-.s-.hc.QslaIa+CpsoslplKG..KlNuIol......DsCc.KhulVh.DslV..u..s..lEllNspslpl..Q..........V..h........G..pVPTIoI-KoDGsplYLSc-SL...ss..EIsouKS..S-hNlhl........Ps...........t-uDa..p.................EhPlP.E..Qa+ohh...ps.......sc..LhTpss-h..................................................... 0 99 154 233 +8436 PF08604 Nup153 Nucleoporin Nup153-like Wood V, Finn RD anon Pfam-B_56527 (release 19.0) Family This family contains both the nucleoporin Nup153 from human and Nup153 from fission yeast. These have been demonstrated to be functionally equivalent [1]. 25.00 25.00 47.40 39.50 16.40 23.50 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.97 0.70 -5.83 4 78 2009-01-15 18:05:59 2006-01-11 14:59:46 5 9 42 0 36 71 0 425.60 55 34.34 CHANGED PoTopsu.Nas-.sLoRPoLaRuH..LsFs.L-........SsALpsQPSoSSAasIuT..SGFSLlKEIKDshSQHDDDNISTTSGFSSRASDKDlssoKssSlPsLWSPEs-RSpShspsoppSsKKPuFNLSuFGo.Ssu.GNsSlLs.opLGDSPFYPGKTTYtGAAAs.RSSRhR.sTPYQAPlRRQhKAKPhu.uQ.hGVTSusARRILQSLE+MSSPLADA+RIPu.ssSsL...Spsh-p.slDh.c..uK+cKhD..hPPVQRLsTPpshslusNRShahKPoLTPuus.ptsscclDp..sspsts+cssL.tps.cppp.......huYPhhSoPAuNulos....GGGKMpRE+soa.huoK.hp.EEl.-sPlLPcIsLPl.oosuLPoFsFoosphosT.p.oPlshso.A.oppsphps....sSs.FoFSSPIVKST..cuss.sPS.Shsho...FSVPshKh...scsouscshl.slhpstss+sAssposssc-.....p.Ghh+PAKTLKpGSVLDhL+oPGF.SSPs.pssAsp.sssoP .................................................................PSTopou.NasD.VLoRPoLaRSH..LshohL-........SsuLaCQPSTSSAhsIGo.....SGFSLVKEIKDSTSQHDDDNISTTSGFSSR..ASDK.DlsVSKssSlP....LW.S....PEs-...RS+..S......lSQpouoSSKKPAF.NLSAFGoLSsSLGNoSlLpoSQLGDSPFYPGKTTYGGAAAA...Rpo+hR...sTPYQ.A..P....VRRQhKAK.hs...uQuYGVT......SSTA.RRILQSLEKMSSPLADAKRIP.S...s....SSPL...sSslDRSslD.h.s.shQu.K+c+.hDSph...PPVQ+Lh.TPKslSlAsNRohYFKPSLTPuuchp+sspRl...Dp..cpsst...h.ccphhs.s..p..s...tpp.pp......shSYPp.hS.hPAuNGLoSt..............sGGGKM+.RERoph...su.oKs.E..EEh..ElPVLPcISLPI.oooSLPoFsFSSP...oso.us.S..P.ls.so....t..uhs.sclphsoss...ssuSPhFp..FSSPIVKST..pAslhPPo.S....lGFT...FSVPVAKs..........sEhs..us.s.s.h..pslh.os.ss..pc......sssls.S.sspcc....pph-GsF+PAK..hLKpGSVLDlLKoPGF.sSsphs...s.u.t.......................................... 0 7 9 17 +8437 PF08605 Rad9_Rad53_bind Fungal Rad9-like Rad53-binding Wood V, Finn RD anon Manual Family In Saccharomyces cerevisiae the Rad9 a key adaptor protein in DNA damage checkpoint pathways. DNA damage induces Rad9 phosphorylation, and Rad53 specifically associates with this region of Rad9, when phosphorylated, via Rad53 Pfam:PF00498 domains [1]. This region is structurally composed of a pair of TUDOR domains [1]. 22.00 22.00 22.60 22.00 21.90 21.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.67 0.71 -4.37 8 79 2012-10-02 16:56:36 2006-01-11 17:18:52 5 3 79 0 58 87 0 136.30 29 10.55 CHANGED DsuhLspsDII.tsAVWsta..shsaYPG+llup..spspst.shVcFE-Gs.h-VpssDlahLDlRIGDpVcsct...s..palVpGLcsphot-sts.........I+ClRGYsTVhL..++hpp...uG+lu.pslhp.slScIal-lppW .......................ls.pplh...usWsth...shtaYPuphluh...sssp.p.hhVcF-Dup..tclcsps.l.+h......L-LRIGDtV+lct...s+hs.alVsGhppp...s...tss.t............lpsl+GasoVhL..Kp+ps......sG..th.spp.s.lhs.PlSpIaL-ht.W............................................ 0 7 26 49 +8438 PF08606 Prp19 Prp19/Pso4-like Wood V, Finn RD anon Pfam-B_6384 (release 18.0) Domain This regions is found specifically in PRP19-like protein.\ The region represented by this family covers the sequence implicated in self-interaction and a coiled-coiled motif [1]. PRP19-like proteins form an oligomer that is necessary for spliceosome assembly [1]. 21.50 21.50 25.60 30.70 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.16 0.72 -4.24 30 339 2009-01-15 18:05:59 2006-01-11 17:41:08 6 30 290 0 243 334 5 69.20 57 13.94 CHANGED ssssSIPuLLoshQsEWDAlhLEsFsLRppLpps+pELSpALYppDAAsRVIARLh+E.RDpA+ptLspls .......s.osTSIPulLpshQ......sEW.....DAlMLcoFsLRQQLppsRQELSpALYQHDAAsRVIARLpKE.+spAR-ALupl.p.................... 0 90 140 203 +8439 PF08534 Redoxin Redoxin Mistry J anon manual Domain This family of redoxins includes peroxiredoxin, thioredoxin and glutaredoxin proteins. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.96 0.71 -4.65 68 9684 2012-10-03 14:45:55 2006-01-12 14:51:33 5 39 3870 123 2384 24093 13509 140.60 20 71.03 CHANGED p......sGsp..h.P..shs...............h.t.stsspshslsp.......hpGKp.hll....shhsuhasPs................Cstpt...hlpch.sphhpspusshlsl..........ssssDsh........stphhuc...........ps.shhh.hsD..t.sushscshG..h.............ts.phhllsccG..pltththssts.t........hsshpshL ....................................................................Gp...h..P....shp........................h.........t...t...s...h...p....p...h....s...h......ss.........h.p..G.K.h....hl.l.....s.h..a.s....o.........a.....s.s....s.....................Cp.t.ph.........h.p.ch....tp............t.....s.....p......s.......s......s...l...l.s..l....................................s..s...s...s..psh............................st.pa.hsp..............................................tu.h.s...h..s....h....l.........h....D..............s...s...u...p.....h....s....p.....s....h..G..l..h..........................................ts...p.s....h.l...l....c..p.sG.....pl.h.h.hphss................................................................................................................................... 1 675 1408 1941 +8441 PF08608 Wyosine_form Wyosine base formation Mistry J, Wood V anon manual Family Some proteins in this family appear to be important in wyosine base formation in a subset of phenylalanine specific tRNAs. It has been proposed that they participates in converting tRNA(Phe)-m(1)G(37) to tRNA(Phe)-yW [1]. 21.30 21.30 21.30 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.83 0.72 -3.86 72 579 2009-01-15 18:05:59 2006-01-12 16:46:12 7 11 497 2 327 541 82 61.60 33 12.94 CHANGED hupPsalElKuhh..ahG.Sp....tLohsshPtap-V....h-Fsptl.p.....ht.......Yplss-pttSclsLlup ......upPsalElKu.ss..asG.Ss...ttLohs.shPhap-V....hcFs.tl.t...............................................atlss-+t+SsssLlA.................... 0 95 193 273 +8442 PF08609 Fes1 Nucleotide exchange factor Fes1 Mistry J, Wood V anon Pfam-B_36022 (release 18.0) Family Fes1 is a cytosolic homologue of Sls1, an ER protein which has nucleotide exchange factor activity. Fes1 in yeast has been shown to bind to the molecular chaperone Hsp70 and has adenyl-nucleotide exchange factor activity [1]. 22.30 22.30 22.30 22.60 22.20 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.24 0.72 -3.50 17 214 2009-01-15 18:05:59 2006-01-12 17:19:00 5 9 186 0 151 200 0 92.50 33 31.88 CHANGED M-p....LL+WSIpss..........................ssspsussssssP..............................lsPchLspLF..GGPs-AsLMKtAMtslps..scsoLEsKlIAFDNFE.LIEslDNANNl ..................................................................hppLL+WoItso............................................ts..s..tps.sps...sp.s..............................................pth.sschL.pplh.....tsPs-upLMKpu.......hp....llps........spso...lEs+hhAhDshE.LlEslDNANsl... 0 46 86 129 +8443 PF08610 Pex16 Peroxisomal membrane protein (Pex16) Mistry J, Wood V anon manual Family Pex16 is a peripheral protein located at the matrix face of the peroxisomal membrane [1]. 25.00 25.00 29.20 25.60 24.00 23.80 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.13 0.70 -5.44 18 280 2009-01-15 18:05:59 2006-01-13 09:38:16 5 9 214 0 186 267 0 296.50 26 84.37 CHANGED hp.tYpcalhcNsss...lu...plEosl+....hloYllsGRFscuchhSEhlaoh.pLLshhpDtIlppthpp...............................................h.ts..hp+lshhLshlpasElhlEhuAc+hhG-ps+WhsllhlphhKAshRlhl.LhhppuchlhosPls.h-ccsptppt.ppptspss...............................psssashpRo....G+sl....t...Ps.t.p...ht......lspshsppshct...t..p.lst.thlAEhLaIsRPLlalluhtt........hup+S........WpPWlluhul-hhShpLhpcpcph.........osh-+tEhpRRsht.....lhaah.hRuPFY-paT+s+lhpllphl.ptlPlhullutslhDYLshWpch ......................................................................................hYppaltcNss....ss...plEosh+..........sloal...ls..G.Rats....u.chhS.EhlaohspLlshhpD....t.llppthpp........................................................s......pplhhhLphlphsE.lhhEhsA..p+h..hG.cpt..............+Whllsllphh..KA...hh......RlhL.lhhh.p..sp.h.....hsPs..ls.h-hcspt.t.tttt..........................................................................................pp.sash.Ro............shsl.pl.ss.........sshp......h..........hpphhp.tpphp............p..l.........s.......h...t.h.hAEhlaIhRPLla.hlslth................hsp+S................WpPWlluhsl-hsuh.....p.Lh...pp.t.ht..............................................othE+t..Elp+Rshh............hhhal.hRusFY-......pho.csh.l..thhphh.p.hPhhs....hlst.l.-a..hhpp........................................................................................................ 0 64 101 148 +8444 PF08611 DUF1774 Fungal protein of unknown function (DUF1774) Mistry J, Wood V anon manual Family This is a fungal family of unknown function. 22.80 22.80 23.30 23.80 22.50 22.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.35 0.72 -3.83 14 95 2009-01-15 18:05:59 2006-01-13 10:43:53 5 4 86 0 77 87 0 93.50 42 31.85 CHANGED RIlANVFIWshhlhsthalhhhpDaslGaulShLhhuLultQhhhKlhALQWIFAFlIhulhhVhSlhsulsthht+chhhp.........-pERtPLLs .....RIlANlFI..Wsh..hlaGthaIhh..hpDashGauLSlLohuLultQhh.h..K..l.IuLQWIFAFlIhulhhlhSlhsulsthhs+sh.htt......ssDpERtPLL............... 0 13 36 60 +8445 PF08612 Med20 TATA_RF; TATA-binding related factor (TRF) of subunit 20 of Mediator complex Mistry J, Wood V anon manual Family This family of proteins is related to TATA-binding protein (TBP). TBP is a highly conserved RNA polymerase II general transcription factor that binds to the core promoter and initiates assembly of the preinitiation complex. Human TRF has been shown to associate with an RNA polymerase II-SRB complex [1]. This Med20 subunit of Mediator is found in the non-essential part of the head [2]. 25.00 25.00 25.00 28.10 24.50 24.80 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.61 0.70 -5.21 32 235 2009-01-15 18:05:59 2006-01-13 11:26:05 6 2 198 11 160 239 0 213.80 26 92.51 CHANGED Msl.sh...............lhh..sssp...s.tTlsphpcpLps.tsshtGpWsl-hcha+ssspshs..........................+hhhsls.ocpPspshhlhssss...........................................................ss.sDshhphlhtKlpshapp+pslps-ssts.Ypl.....GDFhlRlGsVh.ss...sh+GlllElEa..............................................................................................sssthspstsllc-Flpph........s.sts+.sh................h.ppthspl.................................hDhshQ.Yhclhs ....................................................................................................................t................hh...sssp......s.tslp.hpcpl.p......suttt..Gpasl-hcha+ss.spshs....................s.......................+hhaslp.Sch.P.t.psFslhcsss................................................................................................shhsD.s.hshlh.hKLpshapp.+pss+l-spus.Yph...............sDFhl+lGsVp.ss...........sh+GlllElE.Y.............................................................................................tssss.hss...shpllpEFlppa............thstsp....................sshh.tpptsthh...............................................s..DshhQ.Yhchh......................................................... 0 45 76 127 +8446 PF08613 Cyclin Cyclin Mistry J, Wood V anon Pfam-B_6792 (release 18.0) Family This family includes many different cyclin proteins. Members include the G1/S-specific cyclin pas1 [1], and the phosphate system cyclin PHO80/PHO85 [2]. 21.00 21.00 21.00 21.00 20.60 20.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.84 0.71 -3.80 9 1457 2012-10-03 00:42:12 2006-01-13 14:18:39 6 13 314 4 1040 1807 12 153.60 21 39.34 CHANGED ccllptlsthlsRhsshssssspsSppstss.ps....................................Fhsh.s................................................................PslultsYhtRlp..........+astsss.....sVhlssLlYlcRlhc.........tphp.shslsuhNh+RLhlsulhlAoKhhsDhpho.Nppau+luGlsLcELNcLElsFLhhlDFcL ..........................................................................................................................................................................................................................................................................................................................t....................................................................................................................................................................................................................................................................................................................s...h.s.....l...p.a.l.tclh........................p.hs.t.h.o..........sshlhs.....hhYl.c.Rltt....................................t......h...h.l..s..........p..........h..s..........h+..R.......l.........lls...ulhlAsK........h.......h..s..D......................h.....a....s....N....p....t....a.u........c.............l........u.........G...l.........s........h.........p..E......lNtLEhpFLh.hlsacl..................................... 0 377 618 878 +8447 PF08614 ATG16 Autophagy protein 16 (ATG16) Mistry J, Wood V anon manual Family Autophagy is a ubiquitous intracellular degradation system for eukaryotic cells.\ During autophagy, cytoplasmic components are enclosed in autophagosomes and delivered to lysosomes/vacuoles. ATG16 (also known as Apg16) has been shown to be bind to Apg5 and is required for the function of the Apg12p-Apg5p conjugate in the yeast autophagy pathway [1]. 35.00 35.00 35.00 35.40 34.90 34.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.43 0.71 -4.48 16 330 2009-01-15 18:05:59 2006-01-13 14:49:26 6 13 233 8 202 320 1 170.80 25 44.66 CHANGED W+pclhcpLppRD+hp.psap-lhpph.................ppL.-csth.th.sptLps......t.................tt..usshsssht.....t.tthhhthpt-Lsphh+ppuchtppllplssplpcpcpchtppppplpplppphppLcpcl+cLcpplpp+p+sspsLpDEltuLplphshLE-+lp+lpcENppLlpRWht+tspEA-tMNp .................................................................W+pch.ttLp.Rsphp.th.........p....lh.th..................spLh-.+sshhph.stphp.................t...t...t..........................tt......s....t..t..st...h.t.s..s.tht....pht.....phhhphppELsphp+ppuc...........hspplhphspphpphcp..phptpptp..lsphppphspLcpchpchcpplp-hp..+t....p...........plp.DEhtuLp.....l...ph.......shhEc+hcch.......pcENpcLlpRWMtcpsp-ApthNt............................... 0 53 96 150 +8448 PF08615 RNase_H2_suC RNase_H1_sml; Ribonuclease H2 non-catalytic subunit (Ylr154p-like) Wood V, Finn RD anon Pfam-B_36578 (release 18.0) Family This entry represents the non-catalytic subunit of RNase H2, which in S. cerevisiae is Ylr154p/Rnh203p Swiss:Q12338 (. Whereas bacterial and archaeal RNases H2 are active as single polypeptides, the Saccharomyces cerevisiae homologue, Rnh2Ap, when expressed in Escherichia coli, fails to produce an active RNase H2. For RNase H2 activity three proteins are required [Rnh2Ap (Rnh201p), Ydr279p (Rnh202p) and Ylr154p (Rnh203p)]. Deletion of any one of the proteins or mutations in the catalytic site in Rnh2A leads to loss of RNase H2 activity [1]. RNase H2 ia an endonuclease that specifically degrades the RNA of RNA:DNA hybrids. It participates in DNA replication, possibly by mediating the removal of lagging-strand Okazaki fragment RNA primers during DNA replication. 20.80 20.80 21.80 20.90 19.90 19.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.82 0.71 -4.33 43 277 2009-01-15 18:05:59 2006-01-13 16:09:50 6 8 240 10 196 272 0 130.50 24 66.56 CHANGED ssplLPscIp......asGsss.sscaFps......................................................pspppspph.........................................................hpsaFRGRpLhGcplslPp.uapGhlhppspph...........................................................................scspspp.......................................................................................................hp.pupFpc..hsh......................................Ws+-shP.tssDshhcsl.cahplupsl ...................................hphLPscIp........hsG.sss..sspaFps..............................................................................................................t..ptpstth.............................................................................................................................hpshFRGRpLpGppls..lPp.GapGhVhpp.ppp.....................................................................................................................................................................................................................tptpspt................................................................................................................hpspup.F.sp..hsh.....................................................................Ws.h-phP.sssDs.hhpuh.pWhplAps.............................................. 0 72 115 164 +8449 PF08616 SPA SPB_interacting; Stabilization of polarity axis Mistry J, Wood V, Bateman A anon manual Family Swiss:Q99222 has been shown to interact with the outer plaque of the spindle pole body [1]. In Aspergillus nidulans the protein member is necessary for stabilization of the polarity axes during septation [2]. and in S. cerevisiae it functions as a polarisation-specific docking factor [3]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.20 0.71 -4.37 27 396 2012-10-02 14:18:06 2006-01-13 16:54:27 5 12 241 0 281 674 2 111.20 28 17.65 CHANGED tpshLphhshhss.lhhL.hsAlLtpKRllhluhppsusp..........lspalLuhstlhss...sshhpth.tp....FPY..hslSp....l-tLpchsu..........aIsGssNPhF.....cpppph.WDllhDl-ssplhlup ................................................hhphhsshh.hp.lhh.L.aphlLhtc.lllhuss....ss..................................suEhVLuLs..u...llss...........ht.hp.th.tph........................hP...Y..hslpc.............hsch..p..t..ssu..........................................hIhGV...TNPhF.......tpphph..W.s.h.l.l..clsssph....s.......................... 0 97 144 217 +8450 PF08617 CGI-121 Kinase binding protein CGI-121 Mistry J, Wood V, Bateman A anon manual Domain CGI-121 has been shown to bind to the p53-related protein kinase (PRPK) [1]. PRPK is a novel protein kinase which binds to and induces phosphorylation of the tumour suppressor protein p53. CGI-121 is part of a conserved protein complex, KEOPS. The KEOPS complex is involved in telomere uncapping and telomere elongation [2]. Interestingly this family also include archaeal homologues, formerly in the DUF509 family. A structure for these proteins has been solved by structural genomics. 21.40 21.40 23.00 22.30 18.30 21.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.79 0.71 -4.56 46 397 2009-01-15 18:05:59 2006-01-13 17:30:53 5 8 349 6 278 392 48 150.60 22 83.16 CHANGED acslpss..........t.lhp.hhttp...................thphshlsuph...................lhutp+lhtAlh+Alp................shpps.phpo+sltsEIlhpLSssppIs-ul+paGlpcs.sssllslhls.................sp.....ppthpc...lhp.hlcupth.......................shsp..t...tphsDhppl+KhYKlsstt.......................t.tpltphllsphAl+ .................................applpNs.....lhpphhttp...............................phshlsush.......................................lsshhplhsAs.+A.lp.................................sh.pps....chpo+slpoEllhsLSsspp....Is.-Ah+caGlscs..sss.llllhhs..................................pppp.hpp.......ltp.h.lcGptl.......................................................s.hpp..........phschspl+K.hYclss.t..........................t.tpl.phlhtthuh........................................... 0 83 159 233 +8451 PF08618 Opi1 Transcription factor Opi1 Mistry J, Wood V anon manual Family Opi1 is a leucine zipper containing yeast transcription factor that negatively regulates phospholipid biosynthesis [2]. It represses the expression of several UAS(INO) cis acting element containing genes and its activity is mediated by phosphorylations catalysed by protein kinase A, protein kinase C and casein kinase II [1]. 20.10 20.10 20.10 20.60 19.80 19.00 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.29 0.70 -5.56 4 279 2009-01-15 18:05:59 2006-01-16 10:36:52 5 4 125 0 221 291 0 159.50 14 68.11 CHANGED Ds-hh.tsStLstl+............................................to.thYppoKsaSPRhR.GAEhVEpsl.....PVssTlsssspcoGlEusscathpphspspsSSso..pt.pp+..................pppp....c+lpushsshp.p-tpts..o.-h......................susscss.s.sspThsshDDpposphoso...........sptsussssspppSpWQp...clhlosouLu.uMSpEShKSL+YCLphL+hAsu+LtpslstLpsslschs.....psshstshs.............tpssptp...hspplTtLKtDVluTI+pVlcVVupYAGuALPEsARNhVRpalLSLPtRWupsopoo........................................ssttP..sppstsuhsspAAp+lLsLApEuLDhhuplhsllspoL-+AEtWsE .....................................................................................................................................................................t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 46 115 186 +8452 PF08619 Nha1_C Alkali metal cation/H+ antiporter Nha1 C terminus Mistry J, Wood V anon manual Family The C terminus of the plasma membrane Nha1 antiporter plays an important role in the immediate cell response to hypo-osmotic shock which prevents an execessive loss of ions and water [1]. This domain is found with Pfam:PF00999. 23.30 23.30 23.40 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -12.81 0.70 -5.20 25 146 2009-01-15 18:05:59 2006-01-16 11:42:25 5 5 122 0 91 137 0 434.30 23 49.53 CHANGED WMsRLP+lspsu+.ShSl+RsDTpss..sstp..t..................hs.sshs.TsGlPht.hGGhhRRh+..cccpcsts.....................sp...sshptcRcpc+cc.psthhshG...shspssh...............hPp.cttps..pt...........p.ttpttt.t........................pppsp.sptsp..sppc-ptpp......sspAYtEGcplIIEDccG-llcshclsptpsttt.............................spscs-tspcshs.....s..shtpl++plupa.shttt.......................tthc+sps..................scs+pcpsaAYphsNpIIlEsEDGEVl+RYcIs.s+sp..................sppscp....usV....ls+shohlGlcs...............................................................sppp..tp.ps.tspsppplstcpssspclhsstcsp.........................................stsspsh.sspscpplpcphsp.httsss.............t..s...pspspcDo-s-s......................................p..--ppETssER+RRLuALGphs.ssc-c-DcE ...............................................................................................WMsRLP+lpptu+.ShShp+sDsps...tp.t.sp..................hs.sshs.s.Ghs....GshhRRp+..cccptppt......................spp...ssht.p+p..cpc...thhshG...shsposh.......................................................................hPp.+p.ps...........................t..................................................................pppsttppptp.p.pp.p-pttp..........shpsYpEGcplIlEscpG-llchhchpptp.tt................................tpstspt.ttpshs..........shtph++thssh.tht.tt.......................................hp+tpp.....................tcs+.pppshAYphuNs.....lIlEsEDGEVl++Yclssctt....................spp.ps........ssl..........ls+hhohhGhps...........................................................................................................hsppph.tphpph.sspsp.....pphspp.......sss.pthht.tppc..................................................................................s.t.ttphtp.....st..t.t...t.........................t......t..s.stp-s.psps....................................................tttpptET.sERcRRLsALG.hs.s.....tpppD....................................... 0 14 45 76 +8453 PF08620 RPAP1_C RPAP1-like, C-terminal Wood V, Costanzo M, Finn RD anon Manual Family Inhibition of RPAP1 synthesis in Saccharomyces cerevisiae results in changes in global gene expression that are similar to those caused by the loss of the RNAPII subunit Rpb11 [1].\ This entry represents the C-terminal region that contains the motif GLHHH. This region is conserved from yeast to humans. 21.10 21.10 21.10 29.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.39 0.72 -4.13 35 280 2009-01-15 18:05:59 2006-01-16 16:11:19 5 4 247 0 201 281 2 73.90 43 7.90 CHANGED spphRFDFcGsLl................ssppp.....tshssppGLHHHu-sPchAGYTlsELhpLuRSohsuQRslAlpsLG+IL.h+hspt .........h.phRFshpGpLl...................................s.p.s......hslPsphGLHHHG.--P-tAGYTltELhpLuRSslssQRslAlpsLupIlh+htt.t......... 0 64 106 161 +8454 PF08621 RPAP1_N RPAP1-like, N-terminal Wood V, Finn RD anon Manual Family Inhibition of RPAP1 synthesis in Saccharomyces cerevisiae results in changes in global gene expression that are similar to those caused by the loss of the RNAPII subunit Rpb11 [1].\ This entry represents the N-terminal region of RPAP-1 that is conserved from yeast to humans. 23.30 23.30 23.30 23.80 22.50 23.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.31 0.72 -4.37 39 266 2009-09-11 10:38:13 2006-01-16 16:14:21 5 4 229 0 189 264 2 48.20 35 5.46 CHANGED sppIcpENhppLp..............sMStpEItpEpcELhssLDPpLlphLh+Ruptcpss ........ppIccENhp+Lp..............sMS.cEIhpEppcLhspL...DPsLlphLhpRtphct..p................. 1 56 98 151 +8455 PF08622 Svf1 Svf1-like Wood V, Finn RD anon Manual Family Family of proteins that are involved in survival during oxidative stress[1]. 25.00 25.00 147.40 61.10 23.40 18.10 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.13 0.70 -5.45 28 181 2009-09-11 15:23:04 2006-01-17 13:27:54 5 2 133 0 129 169 0 327.80 39 86.05 CHANGED TsVETQTFYFss.su.phGFsQlIaSNlh.Gl.psTsQFsh+lFsscss............c.slWpSspLcN..Fc.-tssFhA....DslulcL..............sc-usp.................YsIKusl........scculV-LshpR..hsPGFplG...csGsoaY.............GsD.psPWGoMRHsFWPRssssGTIss...........ps..............................................................psl-lps......huhFltAl.QGMKPHHAAupW.NFlsFQo.........tpaSAlhMEFTTP.SYus............TpVslGuIsccscI...lhsus................s.sslpHhpscpDspssWshPpsIcapasGpsp-..scs.ph........................................lpGsL.............tphl-RlDVMAElPsFVKsIVuuVAGTKPYIYQYs....pchslclphst........p..pEcGhhasEsTFIS ......TsVETQTFYhhs.ps.hhuhsQlIaSNlh.Gl.asssQFshKlFsscss............p.plWpSspLpNh...F................p.....-thsFhu....DslulcL..............sp-sss.....................YpIKusl........scpshVslplsR..hsPGFthG...csGsoaa.............GsD.psPWGpMRHsFWPRspspGoIss.........cc..................................................................pslchps..........huhFlhAl.QGMKPHHAAu+W.NFhsFQo................s...aSAlhMEFT.T.P.SY.us............TpVslGulscc.scl...lhsus...................s.spspHhso.ppDspssWs.PpsIcasasGpspc...sp..ps............................................lpusL.................tphl-RlDVMuElPuFlKsIVuulAGTKPYIYQas....................pphs.h..clphss................-....pEcGhhasEuTFIS.............. 0 32 65 106 +8456 PF08623 TIP120 TATA-binding protein interacting (TIP20) Mistry J, Wood V anon manual Family TIP120 (also known as cullin-associated and neddylation-dissociated protein 1) is a TATA binding protein interacting protein that enhances transcription [1]. 25.80 25.80 26.20 25.80 25.00 25.70 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.02 0.71 -4.67 23 345 2009-01-15 18:05:59 2006-01-18 09:29:55 5 14 251 3 238 339 1 164.60 44 14.11 CHANGED sLIhshL............splLPhlhs-Ttl+sELlRcVpMGPFKHplDDGLElRKsAYEslYoLL-.....oshupls...lhch.hs+lhsGLpD.cpDI+hLsplhls+L........sslssctlhp+....LD..........plspsL+ssLs.tKlKssAVKQElE+ppEhhRosL+hshsltpph.......ssspsss.............tWspahcplpps ................................LlpshL.sp.lLPhLY.sETpl+pELIREV-MGPFKHpVDDGL-lRK..........uAFEChYoLL-.............osl..spl.c..........lh-F.ls+.l.tsGL....cD..caDI+....hLsaLMLs+L........usls.PstllpR.......LD.........................pLlEsL+sTho.tK.lKssuVKQEhEKp-EhpRSshRslsuL.hpl..................spsppss.................hhppa.pplpp.s................................................................................................................. 0 79 122 186 +8457 PF08624 CRC_subunit Chromatin remodelling complex Rsc7/Swp82 subunit Mistry J, Wood V anon Pfam-B_56720 (release 18.0) Family This family has been identified as a subunit of chromatin remodelling complexes. Saccharomyces cerevisiae Swiss:P32832 and its paralogue Swiss:P43554 have been identified as subunits of the RSC chromatin remodelling complex, and SWI/SNF chromatin remodelling complex respectively [1]. 25.00 25.00 35.90 74.50 20.80 19.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.46 0.71 -4.26 22 230 2009-01-15 18:05:59 2006-01-18 11:32:37 5 3 133 0 176 233 0 138.20 50 24.36 CHANGED hsltsDEhhls.pDPcGEp..........Klcc.GpLtGGRcY+h+TFTlh....s+G..cplYMLuTEsARsl....GaRDSYLhFppH.sLaKhlsspsEKpcLI-pslIP.....SY+uRslslVTARSlF+EFGA+lIhsG.......++ll........DDYacpcAtp....pG.s ...................h..h.sDEhhh..tDspGEp..........KVDp.GpLhGGRc.Y+sRTFsls....sRG...c+LaMLuTEsARsl....GaR.DS.YLhFpKp+pLaKIIsspsEKcDLIcp..-llP....aSY+u.R.p.IulVTARShFRpFGA+lIlsG................++Vh........DDYapscARcpG..h.................. 0 41 91 147 +8458 PF08625 Utp13 Utp13 specific WD40 associated domain Wood V, Finn RD anon Pfam-B_8625 (release 19.0) Domain Utp13 is a component of the five protein Pwp2 complex that forms part of a stable particle subunit independent of the U3 small nucleolar ribonucleoprotein that is essential for the initial assembly steps of the 90S pre-ribosome [1]. Pwp2 is capable of interacting directly with the 35 S pre-rRNA 5' end [1]. 21.00 21.00 21.30 21.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.65 0.71 -4.54 35 326 2009-01-15 18:05:59 2006-01-18 11:48:04 6 20 280 0 230 328 4 140.80 32 16.82 CHANGED EQpLpNhlpptcYpcAlhLALsLc+Ph+Lhsll....cssl..............p-ppuh....ppl-......psltpLss-Qlh.....pLLchhR-WNTNu+ssplAQtlLpslLpphsssc.Lhpls....................u.......................ltcllEuLlPY........................oERHapRl-cLlcpoYhlDYslppMs .............................................................-QpLpNhlp.p.t.capcAltLAlpLs+PtplLslh.pshh.....................................pctpsh.....ptl-...........psltpLsp-plh.......tLLphh+cWNTNuRpsplAQtlLtsllpphsssc.lhpht...........................................u.................................hpphl-uLlsY.......................................T-RHapRlsc.Llppoahl-asltpMp........................... 0 81 128 190 +8459 PF08626 TRAPPC9-Trs120 Trs120; Transport protein Trs120 or TRAPPC9, TRAPP II complex subunit Mistry J, Wood V anon Pfam-B_15686 (release 19.0) Family This region is found at the N terminal of Saccharomyces cerevisiae Trs120 protein (Swiss:Q04183). Trs120 is a subunit of the multiprotein complex TRAPP (transport particle protein) which functions in ER to Golgi traffic [1]. Trs120 is specific to the larger TRAPP complex, TRAPP II, along with Trs65p and Trs130p(TRAPPC10). It is suggested that Trs120p is required for the stability of the Trs130p subunit, suggesting that these two proteins might interact in some way [2]. It is likely that there is a complex function for TRAPP II in multiple pathways [3]. 19.30 19.30 19.40 20.00 19.10 19.20 hmmbuild -o /dev/null HMM SEED 1185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.79 0.70 -14.13 0.70 -7.23 9 449 2012-10-04 00:47:01 2006-01-18 12:57:28 6 15 255 0 329 471 2 725.10 15 78.18 CHANGED Mp...sthS..h.ssuplRslVlPI.GchspppFpcahphltpp.sclpLtDl...hspppps.FssQsaspGplhhcFhhus...s.p..hcDFpsaRKshslIGlssh.ps.shs.....h..hpcpYPssls.pphhhFs..Ppsp........tp.cph.shF..stsp.p..pthcolhpDlstshLhshpsa.....plol+SsssI............................................................sh.ssshsopssluS...pp..+spp+p.GR..KhhGsahLLAGph.DAhppahpAlthh+tssDhLWhuuALEGhssshVlhpaht.sh....................................spht..sshp...............................................sL.p.lphhhscll.hYppuh.p.pt...sPtlh.hEuhL+hh+hhhth..................................sph-lhphl.pshthplsphshp-phplYuthAphauslGhpRKpAFhhR.lhhth.s..........................shtt........hhttlhthhsls.pscu.phtsphtp.t...............WssLQhplLp.hlpsu.phsD...hhphsshLLppahphlssspQpplhptlppsh......hpshplshsah.P.h.hth.................l.h....t....pslsp.........spphs.tPF.asPapp......htssss.p..hhhlhs-.sphplpltNPhsF-lplpsltLssctsph.........................................................pshshuhhl..Psu.chhhLshhshpsGplpl.Ghplphhss.sthhh..s.p.......................cssp.................slpllPs.PtLplhs...............t.hossshhLh-GEppphhIplpNtu.shslsplphsh..ps.c.h.ph.h....sthh.stch..hp.....h....hp............slpPsthhphplp...................................................sstts...hpt.thllp.u.pt..-top.ah+pLslPlplslh.ulclsphchlsh..................s.p..........s..p.....ss.chpLLlL-hhNuhtc..tlp............h.h.....sshtspphhlcsststRhllPlc+h...thshs.hshPpl.ppp.hlps..thstt-chph+ch.hhhppllp+lpspW+.spsp.....pGplsl+sh..p.LssphhphL..s.lphthpl.tssppt...p.tp.................hth.ss.hhshchhlh.psppsls............hhsh..hstpsshh..p.ppplLhsGsLpph.....lpstspsshphshhh ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.....R..h.hu.h.h..u....u...h.........u.....................................D..W.u.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..................................................................................................................................p.th.............................................................................................................................................................h..apsh............................................hh.t-.hth.h.l.Nsh.h.pl.lpphtl...p.h.................................................................................ps.............h...l..........t...............h.......l...hh.s............t...............G.l...l.....G..h...ht...........................................................................................................................................................h.lh...P.h.h....................................hhtGp...h.h.h.N.u....h..h.h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 116 199 276 +8460 PF08627 CRT-like CRT-like Mistry J, Pilcher K anon Pfam-B_67420 (release 18.0) Family This region is found in proteins related to Plasmodium falciparum chloroquine resistance transporter (CRT). 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.76 0.71 -4.21 3 27 2012-10-02 19:55:49 2006-01-19 09:12:24 5 3 19 0 16 29 3 103.40 26 25.10 CHANGED -EKpPLLSsIN-sDD-.NapDlNlKoPlshhSNIKKcSl.p+FKsaLKNSMSKETlTILIYVVLYIlSGVINSVLLKKVMNKFTNYGFFLSQLTNYGYVPIFGAlshYKIaFTsDIPKETRsFPQaKFVI .................................................................pthhth.......................................................tcsh.lhlhhlshlhsuVhNplLhKhhhhshpNYs.aFLsQl.TshsYlsl.aaulhhahhhhtstlsp-.hthPph+Fh......................... 0 10 13 16 +8461 PF08628 Nexin_C Sorting nexin C terminal Mistry J, Wood V anon Pfam-B_5897 (release 19.0) Family This region is found a the C terminal of proteins belonging to the sorting nexin family. It is found on proteins which also contain Pfam:PF00787. 22.30 22.30 22.50 22.70 20.80 22.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.30 0.71 -3.99 24 634 2009-01-15 18:05:59 2006-01-19 10:04:24 7 21 243 0 395 596 0 110.60 26 11.49 CHANGED WLR.Rsllshlp....phhG...sTIp+hlp-plp.plhs-cplsphlshl+-sla......PsG.........phtp.s.....R......oppp+tcscppAp..phL.hphlP-hlspllGppssppuhpplFssLQpthl ...........................................WLp.+sl.lshlp....ts.hG..........ss.lp+.h....lpcp....lp..lhs-pplsphlphl...+.-uhW...............PsG...............................htt.s........sR...........................................otpp+t.cscp.pAp....ptL.h.............shlP..-hlspll...G...ppsspcuhtclFpsLQps........................................................... 0 112 184 296 +8462 PF08629 PDE8 PDE8 phosphodiesterase Mistry J, Vasta V anon Pfam-B_72889 (release 17.0) Family This region is found in members of the PDE8 phosphodiesterase family [1]. It is found with Pfam:PF00233. 25.00 25.00 28.30 30.80 20.60 19.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.33 0.72 -4.17 2 24 2009-01-15 18:05:59 2006-01-19 11:07:15 5 5 13 0 8 46 0 49.50 77 6.74 CHANGED MGCAPSIHVSQSGVIYCRDSDESsSP+QTTolSQGsAAsL.GLFlpTDAA-s .MGCAPSIHVSQSGVIY..CRDSDESNSP+QT.TS.VSQG..PAAPL.GLFVQTDAADA.......... 0 3 4 7 +8463 PF08630 Dfp1_Him1_M Dfp1/Him1, central region Wood V, Finn RD anon Pfam-B_28140 (release 19.0) Family This is the middle regions described by Ogino et al [1]. This region, together with the C-terminal zinc finger (Pfam:PF07535) is essential for the mitotic and kinase activation functions of Dfp1/Him1 [1]. 25.00 25.00 25.20 26.80 22.90 23.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.62 0.71 -4.34 21 135 2009-01-15 18:05:59 2006-01-19 16:09:07 5 5 127 16 103 138 0 126.30 36 19.62 CHANGED sDl......Lt+A.+chtMKlWshEKL.pRhLpslsssppshpptp...........................tpspspssLopLLcsEKlaGsoDRDPpstRcDlpYF+..tPalYlaDlspph+PIhl+Ea...chhp.pct......aPph+sss.G+CPFls- ...........pDlL.p+A....+phshKIWuhEKl.pRhLpsltsspsstttt..................................................spspscssLspLLcsE+lhG.P.oDRDPpstpc-...lhaFK..uPalYlaDhppch+PlhlREY..........cls.pp...p-us..........WPph+sus.G+CPFlp-.................... 0 26 57 89 +8464 PF08631 SPO22 Meiosis protein SPO22/ZIP4 like Mistry J, Wood V anon manual Family SPO22/ZIP4 in yeast is a meiosis specific protein involved in sporulation [1]. It has been shown to regulate crossover distribution by promoting synaptonemal complex formation [3]. 26.00 26.00 26.70 26.00 25.80 25.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.59 0.70 -5.32 19 164 2012-10-11 20:01:01 2006-01-20 08:51:43 5 8 135 0 108 177 0 243.10 19 29.98 CHANGED phAhppsDhshApthhs+ucphh...hhssphstpLuchhYshGhphhppp.......shspuhphlpcuhchhph....schp..........pppsphppl+hpsLphLspshlpspstcsh.c.shchlphh...pp-hsp+.shhhLthchhhp.....httsthpc.Lpphltshshscsshphhlptht.hhspssttsstsl.hhlhs+hpssss...hhchtlhsphhhhstppp.ssp.hhcsltthls..hl.t.hspplosc.shsshhslLWspspphhptppYs.uhpahphuh.pthhh ........................uhtpschphAphhht+scthh....hhpsphh...t.pLsphhYshGhphhppp............phppuhhaLpp.uh-lh..............schp.............ptssphtph..phplLphLspsh...lphp..s...p.thtc..shphlphh...........pp-hs.p.psslhhLthclhhp..............tpthpc.Lhphlhp..h..s-.....sshphhlphhp.hhp.ps....t...t....sshth.hhlh.p+h.......pss....ts.....hhphh..llph.hhh.hppttp..s.p..lctl.................hh.....p.htp.lst......p.thpsh..hslLWpp..utp.aphppas.uhpahphu............................................................ 0 24 53 85 +8465 PF08632 Zds_C Zds1_C; Activator of mitotic machinery Cdc14 phosphatase activation C-term Mistry J, Wood V anon Pfam-B_44907 (release 19.0) Domain This region of the Zds1 protein is critical for sporulation and has also been shown to suppress the calcium sensitivity of Zds1 deletions [1]. The C-terminal motif is common to both Zds1 and Zds2 proteins, both of which are putative interactors of Cdc55 and are required for the completion of mitotic exit and cytokinesis. They both contribute to timely Cdc14 activation during mitotic exit and are required downstream of separase to facilitate nucleolar Cdc14 release[2]. 25.00 25.00 34.70 33.70 24.80 16.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.61 0.72 -4.57 11 139 2009-09-11 06:09:32 2006-01-20 16:03:23 5 2 123 0 104 153 0 52.40 63 5.56 CHANGED ohSTVlMhDaRhPIpVERAIYRLSHLKLSNP+RsLRpQVLLSNFMYSYLNLVs ..........psslsh..sRFPIh.ERAIYRhuHlKLANP+RsLhpQVLLSNFMYuYLshVp..... 0 27 56 89 +8466 PF08633 Rox3 Rox3 mediator complex subunit Mistry J, Wood V anon manual Family The mediator complex is part of the RNA polymerase II holoenzyme. Rox3 is a subunit of the mediator complex. 25.00 25.00 50.10 49.00 23.30 22.80 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.40 0.71 -3.98 13 111 2009-01-15 18:05:59 2006-01-25 16:29:35 5 2 107 0 82 105 0 192.60 34 59.81 CHANGED YpsppPsPhpDLlslYGLsslAcpVARsss.DGpKhs+LRKSYKspIpcL..uG+Fssl.sc..Nt.GGlhshlh.............hps.utDlhpt.......hss-.hpcthpshshuhhpsPp.DW..spsVLuphcpShssthtNt......sshssschuhshsGotus.ss.stp.....t.spsKRph+Kpshu-toh.shuEth.DD...................hKRR++ ......ap.stPsPppDLlslYGLsslAppVARssP.sGpKh..N+LRKSYKG+IpcL..uG+acs..ltpc......sssGGhhshhh...............................tps.sp-lhhs.........hssphhppth+shp.ht.G.hP.p..-a...psVLup.c.ohssthttt...........st.ssssh....s.h.shs...sst.t.s.ts..s.p..........t.sRscRth+KRshs-tuh.uauEua.DD....................hK+R+h................................................... 0 11 37 66 +8467 PF08634 Pet127 Mitochondrial protein Pet127 Mistry J, Wood V anon manual Domain Pet127 has been implicated in mitochondrial RNA stability and/or processing and is localised to the mitochondrial membrane [1]. The Pet127 family is part of the PD-(D/E)XK nuclease superfamily [2] including a full set of active site residues. 25.00 25.00 54.20 46.70 18.70 21.50 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.87 0.70 -5.24 11 147 2012-10-11 20:44:44 2006-01-25 16:32:47 5 4 140 0 111 154 0 272.20 48 34.04 CHANGED aloPScDppLlclAcctpKKYsuSTSSMTSlLSQlHaLLSsaRslsh..utlSpsF..s..sppssFopss+hPoSllLp.......hpsslauIDuDK....shDp-..ILSsLGHuLEphLTsccc-Ftphh.............ppphss.t.pphs-uY+YuphGcFlhRSQLDsaDs+LP.GoGsFDLKTRAVsuIRhDhsphpps...suYcIp+hhGphESaEREYaDLIRo.AhLKYSLQARIGcMDGIFVAYHNhS+IFGFQYlPLpEhDptlHuos-pt.....................................lAssEFphShplLpKlLc .......................YlTuS+DpsLhplApcppKKYhGSTSSMTulLS+hHaLLSsaRslsh..stl..Sp.sF....s.tpspsF.Tpht+hPuulhL+...........hccGlYu.IDuDK....phDs.t......s...l.Lo.LG+ohEKhLThsp--Fc+Yphp...............................ppp.phsttt...pst..sEsYH.....Yoph.Gc.FlMRSQLDAY..DPRLP.GoG...hFDLKTRAVsuIRhDhpphcps...hGYpIcpth..GpaESFEREYaDhIRo.AhLKYSLQsRlG+MDGIFVAaHNhpRIFGFQYlsLsEhDhtlHGppctt...................................................lGDpEF+hSlpLhsclLp................................... 0 36 67 98 +8468 PF08635 ox_reductase_C Putative oxidoreductase C terminal Mistry J, Wood V anon manual Family This is the C terminal of a family of putative oxidoreductases. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.74 6 39 2012-10-02 13:21:44 2006-01-26 11:15:45 5 2 28 0 27 83 46 132.10 46 33.14 CHANGED RYLpsVQKMKpIlcENNLpVMuTsARYssAYEHssKhsWWsKSlssGPlVEQuTHFCDLSRYFGGDVDlsTlpu+ulEWaE.sG+LoKlPlDESs.IP--pRIPRhTuAoWKYcSGAVGshpHuluLQGssYuTELEVhADGY ........................................RYLpsVQphKpllc-ssl.p.lhssh.A.RYs................sA..YtthsKssWW.s.K.u.h.s..s..G.PlVEQuTHFsDLuRYFGG.-.V-.hsoVhu+ulch.-ps...G.pLsKls..lDEot..Is.-pRlPRhTsAsWKacoGAVGshhHshsLQGpsYsspl-VhADGa............................................................. 0 19 24 27 +8469 PF08636 Pkr1 ER protein Pkr1 Mistry J, Wood V anon manual Family Pkr1 has been identified as an ER protein of unknown function. 25.00 25.00 25.40 25.00 24.60 24.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.58 0.72 -3.92 20 147 2009-01-15 18:05:59 2006-01-26 12:00:54 5 7 140 0 110 140 0 74.60 44 37.83 CHANGED Mus.FlpsLWpSIFTPGsTPsLllATpsoFAAL.llLlsLlhsTt.SIHFlhL.lLuusLWholsWFltELppschp ..............Mus.FhtcLWpSIFTPGs..T..P.....s.....LllATssoFuAL.lsLhsLlhsTh..SIHFllL.hlssuLWholsWFhpELptsp.......... 0 32 62 95 +8470 PF08637 NCA2 ATP synthase regulation protein NCA2 Mistry J, Wood V anon Pfam-B_15813 (release 19.0) Family NCA2 has been shown to be required for the regulation of ATP synthase subunits Atp6p and Atp8p in Saccharomyces cerevisiae [1]. 20.60 20.60 20.90 23.80 19.80 19.80 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.85 0.70 -5.52 13 207 2009-09-14 08:31:22 2006-01-26 13:06:51 5 3 172 0 151 206 5 263.60 28 41.54 CHANGED slL.hhhht..olhtlhps+ppIhpal+.ssh-slpuFhpNWVhpPlpplhcTlRasp.sSplulhSpcoLpS-h-SLpRMll-FstDppssss.......lshpplpsc....lcpGDLT.......lhchYEppl+pPlKslloGsLlRoLLIQlQKsKVDsplAlsGID+lLKSQQLlFulVuluPullIlYshhphLpphltstsphupst+hp..phpl....oLsslE.RlL.......shstp...................tt..ss-hsphph......GLLll-lpsL+phuspll..........PtshpcEW...lcDLc-Lsssshss.ss+LsllpRIa+sY .....................................................hh...........t..h....hhpppt.pl..palp.phhpshhsFhhsWlhpPlpplhssl+cs...psphulhuppoLpu-hp......SLcRMll-Fst-p.t...................ph.ptlppp...................l.cpGDlo.................lhctYEp-l+p.P.lpshl.pGcLlRuLLIQ.................lQKsKVDlphAhsGIDplL+SQcLs....F..uhluhsPulll....hthh..ph....lpt..hhttt.pth.ptttc...h...t....ph................Lpsl-.R.lL.......p.t.........................................................................ttts..s.hph.................GLLlhplph...Lhphspthh.............................s..t.p.h..tp-a...hcDlp-L.....ps...t.h.sh..ttphpslpRlhhsY..................................................................... 0 52 93 130 +8471 PF08638 Med14 MED14; Mediator complex subunit MED14 Mistry J, Wood V anon Pfam-B_13303 (release 19.0) Family Saccharomyces cerevisiae RGR1 mediator complex subunit affects chromatin structure, transcriptional regulation of diverse genes and sporulation, required for glucose repression, HO repression, RME1 repression and sporulation [2][3]. This subunit is also found in higher eukaryotes and Med14 is the agreed unified nomenclature for this subunit. Med14 is found in the tail region of Mediator [5]. 19.90 19.90 20.30 19.90 19.10 18.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.25 0.71 -5.06 29 300 2009-01-15 18:05:59 2006-01-27 09:41:35 6 6 249 0 227 324 1 186.60 31 16.21 CHANGED shhPLutLlpphsQpsap-LsslhcpLsp.......................................t.pspsshpKKhclLpahhthRspFlKLhVLs+Wu+pup..cVs+hIDlhsal+tpphhasssh.tLssh.+psLstA+lPNsDLhTALEVLspG...phsshsshs..alssss............loscphLcpLpcLNpllphRLs.l..hpplPtph.psYcI+cGRVTFpVssEFElsLolss-s .............................hsLuhLlphhhppsap-Lp.LhchLsp...........................................psshc+KhpllpastppRp.Fl+LlsLsKWuppus.......pV.p.+hh.p.l..sal.c.ppph.haspshppLtph..+cs.....L.spA+lPshclssAl-VLooG..........+lPsh.c..p..t.....hls.sss......................lo...pct..h.....psLp.cLNpllphRLs.........ps..plP..ph..pshp.....l...............tsGRVpFpVtsEFElsLTlhsc............................................................... 0 76 124 189 +8472 PF08639 SLD3 DNA replication regulator SLD3 Mistry J, Wood V anon manual Family The SLD3 DNA replication regulator is required for loading and maintenance of Cdc45 on chromatin during DNA replication [2]. 20.00 20.00 20.10 20.30 19.10 19.40 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.74 0.70 -5.86 24 128 2009-01-15 18:05:59 2006-01-27 10:36:39 5 2 119 0 98 134 0 463.60 21 59.51 CHANGED -.h-hlcppYhpuLYho+TulAYFsKusLuRsRshhpsspsss..................p.lhshl+phlLsscphDp+Ycts....hpshhlhtsss-s...tt.pt+pp+s.pc..pchucsthhstt.thhtp.ahtp..s.pt...tp.st........pchcchlusL+lREsQLQllLlLEhl........uL......hshssEtppstppsstsh.pp...pp+sppccKtpclsh.......................................................hL-lalDRLCIWcslpsscshhsppsps.stt....................spchppssl+cFCspVllPaausRLPchschIs+KLs...sshthsPhpppps..........ptshscsusssc.........cp.....sspppppcohp+Vho-ppt.....ttttttssL.RStTsss....hccEop.shh.sh.spsp........usl.ps+phppRplslsshutpcpsch+c.thh...cpcLcsuhsulcKss+tlsucsl.spsucK+st..................hhpslQlhATPt.ssRpcs.h........................................................s..p.t....s..................................ptspssVptTPp+ ...............................................................................................................p.hp.lpppYhpsLY.........psslAYFsKusLsRh+shhp.s.pt...................t.hhphhpphllshtphDt+..acpp....htthhhhhtpspt......t..php+t.pp.......phscss..............t.t....t......................pphpphlstL+hREsQLQllLlLEll...........tL..........hth.ttc.....p.....htttp.t..t........tpp..pp.cpcpppshsh.............................................................hL-lhlDRLsIWcslttschh..tt.t....tt............................ppspp-tlpsFss-VllPaast+LPchschlpcKLt...ssht.ss.ptptp...........pptspspsssshp.......................p..................s.tpsppoh.p+shsscp................t..tth.sL.+otosss...........tlcpEsp..p..h.......s...spsc..............................usl.p.+h.hp.pRphsl...sshs.....p..tpp.c..........ppht.h............ctpLp.sAhsshp+ssRtl.sscsh..scssc++tt.............................................................s.tpslQVhATPt.tpRh.psh..........................................................................s....................................................................................................lttTP.................................................................................................................................................................................................................................... 2 24 51 84 +8473 PF08640 U3_assoc_6 U3 small nucleolar RNA-associated protein 6 Mistry J, Wood V anon Pfam-B_8720 (release 19.0) Family This is a family of U3 nucleolar RNA-associated proteins which are involved in nucleolar processing of pre-18S ribosomal RNA [1]. 20.90 20.90 20.90 21.30 20.70 20.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.84 0.72 -4.22 34 307 2009-09-11 15:26:11 2006-01-30 13:06:58 6 3 277 0 221 297 2 82.60 32 15.23 CHANGED LEptlPELc-LccptlFo+pElppIl++RocaEa+LppRssphp..DFlpYlpYEhsL-cLhpKRtp+lt.......pppptt-auh.p..RI ...........LEphlPELcclccptlFo+..sEIppIl++RscaEa+lpp+ssphp...DalpYlpYEhsLcpLhp+Rpp+lth.......ht.p..p.sh...................................... 1 79 124 183 +8474 PF08641 Mis14 Kinetochore protein Mis14 like Mistry J, Wood V anon manual Family Mis14 is a kinetochore protein which is known to be recruited to kinetochores independently of CENP-A [1]. 21.20 21.20 21.70 22.70 21.10 21.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.82 0.71 -4.11 14 172 2009-01-15 18:05:59 2006-01-30 13:18:49 7 2 152 0 115 161 0 138.70 20 55.64 CHANGED phuppulplssh-hpu.....sl...hslss.s..pspchEPaDh-LstplpplapphEctsVcVAphR+ssPpphtctYs.......cpppphLpp......h-pcl.............s...t.p.s.t.t-sDtpsp.st.tpp..pthhp......htpuhsplhphp.sl ..............................................tt.slp.sths..hps.............sl..................hsh.sts.......pppchEP..aDsclptpltpLhsph--lhlclAphR+phPt+lscsht.......cstpchLpp........................hcttl................s.t.t.p.p....t......t.s...tt...........t................................................................................................ 0 18 45 84 +8475 PF08642 Rxt3 Histone deacetylation protein Rxt3 Mistry J, Wood V anon manual Family Rxt3 has been shown in yeast to be required for histone deacetylation [1]. 29.90 29.90 30.30 30.10 29.50 29.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.73 0.71 -3.84 13 171 2012-10-01 19:31:27 2006-01-30 14:35:38 5 3 148 0 130 169 2 109.70 34 14.56 CHANGED lsl+Istcal................ptth....cs.hstpRplWG..TDIYTDDSDllhlLhHsGhhpsshs.p.......................+pos.sh.spspsh...us.......s.Ph.tDLcVslLlLPpLp+YtSshR.GIpSRsWs ............................hpl+lst.cl................ssth...........ppchstpptLWG..T-lYTDDSDl.lAshhHsGahpsp..hs.s...............................................................t...p......t.............tt...............s.......DLplplLlLPpL.cY.ushh.ulpSRtW....................... 0 32 76 111 +8476 PF08643 DUF1776 Fungal family of unknown function (DUF1776) Mistry J, Groocock L anon manual Family This is a fungal family of unknown function. One of the proteins in this family Swiss:P32792 has been localised to the mitochondria [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.85 0.70 -5.28 15 135 2012-10-10 17:06:42 2006-01-30 15:01:01 5 4 126 0 99 2683 945 275.30 30 64.76 CHANGED ARp-VVllsGSss-PlTRslAhDL-RRGFIValsspstc-tphlcscs.p.-.IcsLslsp.......ts.shpsslscFtphlppP.hshsssp.HhLpLpullllPSLsa.ssGPltsIssuoass.lss+lLs.l.lsspullPLlpt...................ppsplIllsPoIhuuLshPaHuPEslhsuslpshhpsLscElp....hsIsVspl+LGslslu...............sssSssphuslssS-....hhsWstph+sLYussasth.tpt.shtth.........RGoolRcLaphlaDhl.sp...............sssllYsGpGuplYsa.....lucalPculls .............................................................................................................uRp-lV.llh.G.u.sp-PlsRsluhDLE.RRG.FIV.al....s...sp....ss.-..-.........p....hl...cs.p....s.....p.D....Ics.Lhl-.........ps.phtsslpph.tphlpp........s..................sh.s.s..h..........s...........p......h.....p...Lp.u...l....l...h.lP..s.Lp...Y...so.GP.l....tsI..s.s.s.s..aschlN...o+lLhsl.hshpshLPLLpt......................................................pp.s..pl...l..lh.s.P...S.I......o.uL.s.h......P.a.p.u..s.E.s...h.s....s...s...u..l..s....uhhpsLppELp............slsVsplcL.G....sl..c..lu........................tt...t..p.....t.h..t.p.hssoc................W...........tt...+s.lYu...ssaht..hp.t..t...sh.............pGo.s.....h..R..p...L.a..hlhDhlts.........................hsss.h.h.s..G..p..G.u.hhYsh.....luphhPtshl.t........................................................................................... 0 22 52 81 +8477 PF08644 SPT16 FACT complex subunit (SPT16/CDC68) Mistry J, Wood V anon Pfam-B_4478 (release 19.0) Family Proteins in this family are subunits the FACT complex. The FACT complex plays a role in transcription initiation and promotes binding of TATA-binding protein (TBP) to a TATA box in chromatin [2]. 25.00 25.00 31.10 29.40 23.80 24.00 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.00 0.71 -4.37 32 353 2009-01-15 18:05:59 2006-01-30 17:00:05 6 13 294 0 255 354 4 151.60 43 15.34 CHANGED IaVDpcppolllPIhGthVPFHlsTlKNsSpspEush.sa.lRINFpsP..Gsts..sppct....hps.ssshFlKEloaRSpcsc+...........hsplhctIppLpKphppR...EsEc+chtsllp..Q-+Lhl.ps.....pcsh..pLpslalRPsh.su....++hsGsLEhHpNGhRY .....IaVDpKtpoVllPIhGhsVPFHIsTIKNsSp.osEG-a..s..............Y.LRINFhsP...Gssh...u+p-sts......aps..ssApF...l+plTaRSpcscc...........hspshptIp-lpKchtpR...............EtEc..+EtpslVc..Q-cLlhsps.....+pss.......+L.pDl.alRPsh..ss......KRhsGsLEhHpNGhRY.............. 0 90 143 212 +8478 PF08645 PNK3P Polynucleotide kinase 3 phosphatase Mistry J, Wood V anon Pfam-B_6220 (release 19.0) Family Polynucleotide kinase 3 phosphatases play a role in the repair of single breaks in DNA induced by DNA-damaging agents such as gamma radiation and camptothecin [3]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.07 0.71 -4.79 32 1136 2012-10-03 04:19:28 2006-01-31 09:22:57 6 29 976 20 329 2109 716 164.20 32 45.94 CHANGED Kl..AuFDLDuTLIpo+S.Gtsasp.s..........ssDWpa...h..p......l.p+LppL.hp-s...........YplVIFoNQuGlst..............spt....shpsappKlpslhcpl....slP.....ltlasAsp.c..........................................D.....haRKPpsGMWphhtcchsp.....h.lshp..pS...aaVGDAAGR..........................................................ppDaSssDttFAhNlGlpFhTPEEaF ...............................................................hhhhDhDGTLIppss...pp..a..s...s...........t.Dh..h..h...hcss................VhspLh....cL..pctG............Y+lVhlTNQcGlGo................thspt...ch-s..+...s...h....h..h..p..Ih..p.u...........Glp.................l..p..l..a..h...s.s..c...c........................................................................................s..........pCRKPK.suhlc.c.alpc.t....................h.Dht....pS............ahIGD.....thsc........................................................................................................ht..hh............................................................................................................... 0 129 192 276 +8479 PF08646 Rep_fac-A_C Replication factor-A C terminal domain Mistry J, Wood v anon Pfam-B_3457 (release 19.0) Family This domain is found at the C terminal of replication factor A. Replication factor A (RPA) binds single-stranded DNA and is involved in replication, repair, and recombination of DNA [1]. 24.20 24.20 24.30 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.04 0.71 -4.49 33 798 2009-01-15 18:05:59 2006-01-31 09:34:27 5 48 363 3 503 733 13 135.40 23 24.30 CHANGED aaol+Aslsal.....Kp-.shhYsACsp............sCNKKVs-p.ss...........Gp...........WRCEKCspsaspspYRYllshplsDpTGphWlTsFsEsAcplh.GhoAsELtcl+c....pssppasplhpphphppahF+l+sKp-oYs......-EpRl+hTVhslp...slcapt-ucpL ................................................hphhuslh.hl.....c..pc...s.......h..hY..ACs....................pC.s.K..K..Vhpp..ts........uh.....................apC...c.+..C..s.....p..ph...s..p...s.p..aRYh..lsh..plsDto.u....p.hhlosF..s..-s..ucplh..Ghs.A.sp.L..h..p.hpp.............ps..pt..h.tphh.p.ph....hp.pa.a..+...hps.p....ct.at......tp.+...p...h.h.t................................................................... 0 138 269 387 +8480 PF08647 BRE1 BRE1 E3 ubiquitin ligase Mistry J, Wood V anon Pfam-B_35727 (release 19.0) Family BRE1 is an E3 ubiquitin ligase that has been shown to act as a transcriptional activator through direct activator interactions [1]. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.04 0.72 -4.01 20 193 2009-01-15 18:05:59 2006-01-31 13:57:30 6 7 177 0 138 205 0 97.20 30 12.53 CHANGED LppELsslcpAacchpphspcKht-hpshEpphs+LssEKsKADQKYFAAM+s+DulpsEhKpLppphsKss-llppLp-....hEpphppplpshcKpl ....LppELsphppuacchpphsppKht-hsshEp+ht+LpsE.................KsKADQKYFuuh+st-shpsEl+pLptphsKss-llppLc-....sEsphpptlpshEKph....................................................... 0 39 71 111 +8481 PF08648 DUF1777 Protein of unknown function (DUF1777) Mistry J, Wood V anon manual Family This is a family of eukaryotic proteins of unknown function. Some of the proteins in this family are putative nucleic acid binding proteins. 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.10 0.71 -12.04 0.71 -4.54 7 315 2009-09-11 17:09:29 2006-01-31 14:03:37 7 31 236 0 237 303 3 165.60 29 60.66 CHANGED MuRSRS............Ro..c+cRRcsc.spsR-.................ccRcR-RsRSR-.RDR....+RsRsR..s.+.R.pRsRSPcR................cRSpSpS..Rc+-...Rccpcc+csc.cP......+t+.hQ..........................IscpcL-.Gcsc-ph..................-MMK.hMGF.ssFDTTKGKKVsGs.DsusVplppKR+YRQYMNR+GGFNRP..LDFhs ..............................................................................................................................Rp...ppp+.p...ptc..spp.Rc.....................p.cRc+.p.Rs...R.sR.p.....R-c............cR.s..Rp...+......s..p...+....p..Rs.+Ssp+................................................................pcsp....s...s.....t..cp.pp......Rp..p.....ppppp.pp...p........................p..p...........................................................................................................pp..t.c..hp...st........st.--h.....................................c..Mh..+.hMGF..suFsoTKsK+V...............Gs.....s..s.........u....l..p..h...pK..ppYRQYMNRpGGFNRPL....s.................................................................................................................... 0 86 137 191 +8482 PF08649 DASH_Dad1 DASH complex subunit Dad1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. Throughout the cell cycle Dad1 remains bound to kinetochores throughout the cell cycle and its association is dependent on the Mis6 and Mal2 [5]. 27.50 27.50 27.70 27.70 27.00 27.40 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.67 0.72 -4.27 11 108 2009-09-11 11:16:04 2006-01-31 17:20:58 5 3 106 0 80 107 0 57.10 48 38.23 CHANGED pYFpcQR-lLlQEIosoh-sllsNLNsLN+SLEpSluVG+EF-sVucLWppFYsulsp ............paFEpQR-hLlpEIu.....tohEpVLsNlNpLNRSLEulI.........uVGpEFsSVpuLWSpFpssMt.t................. 0 21 45 73 +8483 PF08650 DASH_Dad4 DASH complex subunit Dad4 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 20.70 20.70 20.90 20.70 20.50 20.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.30 0.72 -4.07 8 105 2009-01-15 18:05:59 2006-01-31 17:22:25 5 6 102 0 82 101 1 67.70 50 63.19 CHANGED M.ENPaEcVQsslLuRIIuNVE+LNpSVspLNQpLcclNp+N+NLElMuQhCENYpcuVpFNLEATGs+KsPL ........MEsPHEppQshLLuRIIsNVE+LNEulshlN+sLp-INhpNhNl.ElluQ.....MacNYpsNV.FpLE..ATpsh+.P.t.............. 2 23 46 70 +8484 PF08651 DASH_Duo1 DASH complex subunit Duo1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 25.00 25.00 25.90 27.00 23.10 24.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.47 0.72 -4.40 16 126 2009-01-15 18:05:59 2006-01-31 17:37:22 5 3 123 0 95 124 0 76.90 39 32.01 CHANGED tuLp+EL-pLc+INtlIEslhtsLcsup.sphpplpcoscusspLLspWhpILSQTpaspcLl.sssWpGtsp.DstshE ......uLpcELcslRpINpsIEullsoLcpup.sNhpsVscolpsussLLsoWo+ILSQTEasp+LlhsPsWpGssp.Dhhc.E............. 0 23 48 79 +8485 PF08652 RAI1 RAI1 like PD-(D/E)XK nuclease Mistry J, Wood V anon Pfam-B_13095 (release 19.0) Family RAI1 is homologous to Caenorhabditis elegans DOM-3 and human DOM3Z and binds to a nuclear exoribonuclease [1]. It is required for 5.8S rRNA processing [1]. Profile-profile comparison tools demonstrate this to be a PD-(D/E)XK nuclease, with a full set of canonical active site signature motifs characteristic to the PD-(D/E)XK nuclease superfamily [2]. 19.90 19.90 20.40 19.90 19.50 19.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.19 0.72 -4.19 31 357 2012-10-11 20:44:44 2006-02-01 09:27:35 6 7 232 4 253 347 1 69.10 34 17.75 CHANGED VDslhc.tpsps..t.....................ppYVELKTopth.....ps.p...phpsFc+..KLhKhWsQSFLlGls+IlhGFRDcpthLpsl .........................................hDsh.s..t.tt..............................tpaVELKTotth........ts.p......phpsF.c.........R...KLLKaWhQSFLlGlscIlsGFR-sc.Ghlpp.h.............. 0 79 137 212 +8486 PF08653 DASH_Dam1 DASH complex subunit Dam1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. 21.40 21.40 21.50 21.90 21.20 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.82 0.72 -4.20 16 130 2009-01-15 18:05:59 2006-02-01 09:35:50 5 2 126 0 97 128 0 57.70 51 23.74 CHANGED phlhsphpcLuDuhtsLDtNhscLphIH-uLss.FNESFuoaLYGLphNuWCVDFPssP .......s.hlpPtFuELuDuhs-L-uNhh+LphhHESLuc.FNESFASFLYGLsMNAaCVDFPcuP........ 0 26 55 85 +8487 PF08654 DASH_Dad2 DASH complex subunit Dad2 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 20.70 20.70 20.80 21.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.33 0.72 -4.02 18 132 2009-01-15 18:05:59 2006-02-01 09:47:46 5 3 126 0 100 127 0 97.40 34 64.41 CHANGED sltt+I.spK+AEL-sL+pl+chossLssQh-tLppKlushsDGTEuVAsVLuNWpsVl+uIShA....Shtlh+hsptchppst..........................PLPpsLVRI.sls .......h...t+ltpK+tEL-sLppL+clSssLssQh-tLpp+Lsshs-GsEuVAtVhuNWp.sVlpuIshA....Shplhphspt..s.p.t......................................sLPpsLVRI.h......................... 0 29 57 89 +8488 PF08655 DASH_Ask1 DASH complex subunit Ask1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. 20.70 20.70 20.80 21.20 19.60 20.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.00 0.72 -4.15 15 126 2009-01-15 18:05:59 2006-02-01 09:56:55 5 3 124 0 95 122 0 64.40 50 14.44 CHANGED LE+L-Q-ITLsLQ.-IDpNlSpsaplITpcIlPhlpcYupsocclh-uu..pFWKpFFEpSANVpLsua ..........LE+L-QpITLsLQ.-IDpNFS+uH+IlTssIlPhVcpYucpScsVW-uo.....+FWKpFFEsSANVsLouY....... 0 26 52 79 +8489 PF08656 DASH_Dad3 DASH complex subunit Dad3 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 21.80 21.80 22.40 22.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.65 0.72 -4.28 17 110 2009-01-15 18:05:59 2006-02-01 12:45:57 5 2 109 0 83 108 0 79.10 45 62.12 CHANGED LSPLEpplLpcYppLussLpp.....LsspLppL............ossssp......................tlLcsLRpLEhKhuLVhTLhKuSVYSllLppp.spptcssp .........LoPLEQEVL-EYp+Lspshpp.....LussLppL.............ussPss..........................plLDsLRpLERKhuLVhTLLKASVYSIVLQQphtttt....ts................. 0 18 44 71 +8490 PF08657 DASH_Spc34 DASH complex subunit Spc34 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. 25.30 25.30 29.10 26.40 24.80 24.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.60 0.70 -4.72 21 142 2009-09-10 21:43:34 2006-02-01 13:01:21 5 3 120 0 104 127 0 205.80 29 85.12 CHANGED LsppL-pIptSscuIsoL.F...ssPtIFTNAllpsp..s.........ITsLIRDs-scE.puLaphssssp..t..t.................................................tppppspc......ctshhp..............spsts.shhsphhsupsppppst..hsh..Gu.hh.p...............ht-hsl-hlLcshppLsshY.Phs..........usp-+lssLppcapplpssltpLEpcltcQpppLcpht.spppt.s.........t...................ss-c.Ic+EppEIccLEpchpplc ..............................LptpL-pIphuspuIssL.......F..........PPtIFsNAlLtst.s...........ITpLIRDspscE..psLFplsss....t.......................................................tpt.tt......pthhh................tsth....thhs.th...tspt.hpp.h..huh..Gs.hhtp.p.......................ht-hsh-llLcshp.LsslY.Pls...........shpp+lspLppcapplpsplt.hEtcltppptpLpphs..t..ttt..................................scc.lc+E.pEIcpLEtchppL............................................... 0 24 55 88 +8491 PF08658 Rad54_N Rad54 N terminal Mistry J, Wood V anon Pfam-B_26946 (release 19.0) Family This is the N terminal of the DNA repair protein Rad54 [1]. 20.20 20.20 20.30 21.50 20.10 20.10 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.57 0.71 -4.55 24 204 2009-01-15 18:05:59 2006-02-01 13:08:54 5 12 187 1 144 211 0 167.20 29 20.79 CHANGED s.p.hssspShs+L..sKPFKsPh....................Suosspssc+PuRKRR..pVsYu.....ssss-ss-t.s-psh.................pscc+hALusRc...............sphsshphccp-ssh++sFoVPl...hspptssYssp+.PsPoLGhRptsshss+PLHDPouEFAIVLYDPTVDstsp............ccpptpp............cpcspcpp.c.................lcs.shh...HK..SLAEILG.....l....KKKhp.chPcV .................................................................................t........................................................Rptp....hsYt....................................h........................................ptth.uL.sp...........................hhs.s...p.tt..hcplhp+sFpVPl.....p..s.s...Y.sspt..ss.sLGh+psshhls+PLHDPhsEaAIVLYDP.olDs...............................................................t.....................................................................p...........p+.oLtclLG................p.pt...thP.l................................. 0 40 71 115 +8492 PF08659 KR KR domain Hoof I, Finn RD anon Hoof I Family This enzymatic domain is part of bacterial polyketide synthases and catalyses the first step in the reductive modification of the beta-carbonyl centres in the growing polyketide chain. It uses NADPH to reduce the keto group to a hydroxy group [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.88 0.71 -4.62 85 6808 2012-10-10 17:06:42 2006-02-01 14:48:52 5 1300 1204 23 2532 84357 23197 179.60 32 8.62 CHANGED GoYLlTGGhGG...LGttlAcaLs.c....+G.AccLlLsuRs...............sssspsp.....t............hl....ppLps.h.Gs....plphhssD..luctsslppl...lsplptph...sslcGVlHuAullp.Ds.hltphospchppVlusKVpGshsLcchhts.......psLDFFllaSSluu.lhGss..GQusYAAANuFLDAhAptRcspGhs......uhSlsWGhWtss ...................................................................................ush.LlT..G....G.......h....G...u.......lG..h...h.......l.A...........c.a..L....s....p...........c....G....s.....p.........c...........l.....l......L.....h..uRp...................................................................s.s.t..s.t..........t..................................................hl.......p..c....L....p...t......h.....G..s.....................p.l.....p.....h................h...s.........s....D......l....u....D......t........s........s...........l........t....t....l.............l......s......p.......l............t...........t............ph.......................s........l.................s.......G.......V.......l...H.......u...........A...........G.................l................l.................c........D................u.........h................l............t................s................h............o........................c.........p.........h.........p.........p.........V.........h.........t.............s..........K........l...........p........G........u...........h.........p.........L......c.....c.....h.....s.ts.............................ts.L......c.......h.........F.....l.......l...F....S.....S...........h...u....u........l....h.....G........s.....s............G....Q........u......s......Y.......A....A....A....N.....u....a....L....D....u....l....A....p....t....R.....+.....s....p....G..h.s.......................uh.o.l.sWGhWtp.s...................................................................................................................................... 0 634 1473 2142 +8493 PF08660 Alg14 Oligosaccharide biosynthesis protein Alg14 like Mistry J, Wood V anon Pfam-B_12992 (release 19.0) Family Alg14 is involved dolichol-linked oligosaccharide biosynthesis and anchors the catalytic subunit Alg13 to the ER membrane [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.22 0.71 -4.26 35 618 2012-10-03 16:42:30 2006-02-01 15:15:23 6 10 514 0 296 1021 447 156.50 28 77.72 CHANGED lhllhGSGGHTuEMhcLlpth...........pshcpahlutsDshStpphp.....ht.p................hhplsRuRpVtpuhhpo...lhoslhsh.htuhhll........h+.+...........PclllsNGPGssVPlshhuhLhthhh..............................ps+llYlEShuRVpsLSLSG+lLh..h.uDhFlVQW.pLpcp.....Y.s+upY...h.Ghl .......................................................................hllhuS.GGHhschhtLhthh...............p.cpahls.scp.utp............................................................................hhph......+...s.t...p.l.t.ps..h..hps......lhsh....l....hsh.hhuhhll...........h+..+................................PDlllssGsusslPhhhhuhlh.................................................................ts+hlYl.EShsRlpphSLoGKllh...l..sDh.FlVQW.phtch......Y...s+u..h.h.G...................................................... 0 99 171 240 +8494 PF08661 Rep_fac-A_3 Replication factor A protein 3 Mistry J, Wood V anon manual Family Replication factor A is involved in eukaryotic DNA replication, recombination and repair. 23.50 23.20 23.70 24.40 23.40 23.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.26 0.72 -4.15 22 305 2012-10-03 20:18:03 2006-02-01 15:55:35 6 3 260 14 216 294 2 107.40 23 87.79 CHANGED Ms....tsssRlssuhLppa........hupsVRllu+Vpphc..uphhlhpsssst............slplphssshph..sshl.EllGpsssss......slcshshh.-hup..shDhshhsplhplspch..pha ................t.....sssRlssshL..s..p..a.........huc.sVpllG+V..pp..lc...Gp..thhlsss.-st............sls.lpl........s....p.s.h..ph...t..h.s.s...hl.EllGpV..ssst...............slphhtht..-hus...shDhphhspllcls.p.ch..tha........................................... 0 71 118 178 +8495 PF08662 eIF2A Eukaryotic translation initiation factor eIF2A Mistry J, Wood V anon Pfam-B_7957 (release 19.0) Family This is a family of eukaryotic translation initiation factors. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.13 0.71 -4.60 22 1027 2012-10-05 17:30:43 2006-02-01 16:42:42 6 49 446 0 624 2639 160 177.20 24 29.09 CHANGED KsFFpu-csphhWNppGssLLlhss...TchDKospSYYGppsLahl......thsss.sshlpLspcGs.IaDhsWsPpucEFsVlaG..hMPu.csshash.....c....sssltshspps+NolhasPpG+hlllAGFGNLs.Gpl-h.aDhp..phcpluphcssssohs-WsPsGcallTAoTuPRlRl-NGaKIWpasGpLlaphph...sELapl .........................................................................................shhpsspsphhW..ptt..uphlhhhsp..........ch.s..p.s.t.....p........shh..s...p.............plah..h......................t.t.t..t.h......s....t.h.lp..l........p.c.p.t.s.....l.h.s..h..sW.............p...P.............s............u........pc.......F....s......l.....l............h...........G.............h.s.............t.........pl..........sha.sh........................c..................sp..h...l...t...s...h...t....p...p......s.....t.....N.......s.......l....h...a....S..P.p.G........p...h.l.l.l.....u.u......h....t..............s....h...p........G.p..l.p....h...aD...........h.............p.................p..............h..........p.....h................h..............s.........p...h........c.....t.......................s....s...........o....s..h...p......Ws..P.sG.+......al.h.....o.....u.....s...s.........s........................p...h....p...h...p...N.........G.hpla.p.h.p.G.t..h.p..h....tth..................................................................................... 0 222 347 511 +8496 PF08663 HalX HalX domain Bateman A, Galperin M anon Galperin M Domain HalX is a domain of unknown function, previously (mis)annotated as HoxA-like transcriptional regulator. 23.30 23.30 23.60 23.60 23.20 23.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.31 0.72 -3.99 13 74 2009-01-15 18:05:59 2006-03-07 15:01:53 5 4 22 0 47 76 0 69.40 28 34.79 CHANGED lsRs-YD-plpEhaALsSKpAsLEupKssspLpsS-cYscLp-Rl-pLcscl-ssssphssp.Dacuhhpsh ........pRspY-cplpEhauLsuK+AsLEspKsps-Lpcs-cYpcLpsRlccLcspl-psh.sph..psp.Dh.thh.................. 0 3 32 47 +8497 PF08664 YcbB YcbB domain Bateman A, Galperin M anon Galperin M Domain YcbB is a DNA-binding domain [1]. 25.00 25.00 26.10 75.80 20.20 24.30 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.59 0.71 -4.37 21 318 2009-01-15 18:05:59 2006-03-07 15:12:28 5 2 240 0 48 218 1 134.50 43 44.99 CHANGED cphphlLu-LGIsGEuGucDllpllphlhcppps.s.......sLKplFpcluppc.....t..chp+EhKAhEQRIRRAlhpuLsplAuLGlsDasNspFcpYAsphFDFpsV+pcMpclpscsst...pu+lNlKKFlpsL ........s.ph+aLLSELGIuGEsGS+DLlshl-YLhppEpspo.....thPuLK-lFpplst++Ls...s.ts-lc+phKAuEQRIRRAIhpuLsHLASLGLTDFpNPKFEsYAs+FFDFssVR++MsElppcpst.........tsRIssKKFIQVL. 0 21 37 41 +8498 PF08665 PglZ PglZ domain Bateman A, Galperin M anon Galperin M Domain This family is a member of the Alkaline phosphatase clan. 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.19 0.71 -4.61 16 530 2012-10-03 20:55:17 2006-03-07 15:56:20 7 2 502 0 158 551 302 178.40 25 24.53 CHANGED cRVhVIlSDAhRaEhupELtphLNpcsphps-l..p.......shhulLPSYTpLGMAALLPHctltat..tstsVhVDGpsspuhtpRptILtphh......uhAhptc-lhshspccsR-......hl+spcllYlYHNpIDuhGD.....+tsoEppsF-Ascpslp........pLpcLlphlhsp.sGsplhlTADHGFlapcssl ........................................................................+lhlIlsDuhRY-ht.ppL.tp.p...l.s.p..p.p...p..h.phch....p..................shhuhLPohTphuhs.Alhs.....s..p.....h......p...h...........t....t...t.sp..h..hs..-.....s.p.p.p.p.s.h.s.....t.+.p..p..h.Lpt...........................shshp..hc..clhs.h.pp..sctpp.........hhp..sp...c...llhlh...a.NtI..Dt.huc.........p.h.p.Et.....t.s..hc......us.......h.....cul.s...........tLtcllppltst....s.h.c.lllTuDHGalhppp..h.................................................. 0 72 122 146 +8499 PF08666 SAF SAF domain Bateman A, Lakshminarayan I anon Lakshminarayan I Domain This domain family includes a range of different proteins. Such as antifreeze proteins and flagellar FlgA proteins, and CpaB pilus proteins. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.85 0.72 -3.57 169 4935 2012-10-01 20:51:14 2006-03-07 16:09:21 7 24 2605 28 1282 4640 2751 65.60 23 18.28 CHANGED psllluscs..lttGphl.........sspslphtpss...h.huhhspt................hspllG.....thstpslttGphlphsplp ..................................plslA.hp-..lttGphl.....................stps..lp..h..t..p..s..s....hthultsht............................................aspl.l.G.......hAtpslttGphlp.ptl............................................... 0 427 825 1065 +8500 PF08667 BetR BetR domain Bateman A, Galperin M anon Galperin M Domain This family includes an N-terminal helix-turn-helix domain. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.80 0.71 -4.26 6 180 2012-10-04 14:01:12 2006-03-07 16:15:43 5 3 131 0 45 208 4 118.90 32 49.44 CHANGED sschlsss+lRcLLs+pGIucRp+sohIsplLGLShSsupRKL+GuhPWsLuQLp+lAptaGhPsu.LL....-spGhsP............ss-hpDAlLshpspchpC+............AhI...uspusupspspFVAhp...pspWhVht+scs.-p....csYsVchl-lps ......................................s..phhhup+V+cLhs+pGIs.Rpp.s.o.plsclLsLSaSsu.pRK.L+.GpsPWoLuQlpclActaG.s...supLh.....ssp......................pAhh..t..ph.Ch............hhl...st...s.t..s.h.A.p......W.lh.hpth..t.....ha.sch.................................................................................................................... 0 5 12 28 +8501 PF08668 HDOD HDOD domain Bateman A, Galperin M anon Galperin M Domain \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.07 0.71 -5.03 57 3180 2012-10-01 20:28:14 2006-03-07 17:06:09 7 42 890 11 1179 4044 485 178.50 20 48.05 CHANGED LPslPslhhclhphl.psPssshsclAclIspDssLoA+lL+luNSshau..hsppls.olppAls.hLGhpplpsLsluhulhphhpst......hhchpthWc+SltsuhhuctlApth......shtp.s..-csahuGLLHDlGclhlhphhscthttlhphhtttt..shhpsEcchhGhsHsp.........lGuhLhcpWphPttlscslthH+ps ....................................................P.hsthhhclhphh...s..p..s..s.s..s.h..p........clscllppDssLosclL+h..s.N....S....s....has.........hs..p.......p......ls....ol..p.p...Als.hL..Ghpp..l..c.s..l.l.h.s.h..s.l..t.p.hhptts...............h....pph.a.p..p.....uh.t.....s.At.h.s.p.h..l.u.p.p.h................sh..tt.............-p.sa...h...s..GLlpslG.h.h.h.h.t........h........t.....h.................h...................................................................h......t......t..h..h......t.h.......h.................hs....l.h.t....W.t..h...s....hh..t.h............................................................................................. 0 403 849 1049 +8502 PF08669 GCV_T_C Glycine cleavage T-protein C-terminal barrel domain Bashton M, Bateman A anon Pfam-B_933 (release 4.0) Domain This is a family of glycine cleavage T-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. The T-protein is an aminomethyl transferase. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.25 0.72 -3.96 190 6206 2009-01-15 18:05:59 2006-03-08 14:03:29 6 31 3260 45 2055 5279 10043 94.50 25 19.51 CHANGED ssFlGp-ul..tctpp.p......u........++hlul....................hp..t.shsptGttlht................ss.....ptlGtlTSusaussl.tpsluluhl...............sstsp..Gs...pl.plp.l.....................cu..pthsupl..sph ........................................................................sFlG+csl...tppcp..p...................G............s.....++lVGL.........................................thp...scs...s..s...cs..G...t..t..lht.............................................ss..........ptlGhlTS.G..sh.S..Pol......st..s..IAlAhl.t.............tth.sth..Gp....pl..l.p..l......................cs..cth.spls.............................................................. 0 599 1224 1672 +8503 PF08670 MEKHLA MEKHLA domain Burglin T, Bateman A anon Burglin T Domain The MEKHLA domain shares similarity with the PAS domain and is found in the 3' end of plant HD-ZIP III homeobox genes, and bacterial proteins. 23.70 23.70 23.90 26.80 23.50 23.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.53 0.71 -4.67 26 387 2012-10-04 01:10:46 2006-03-08 17:08:34 6 5 225 0 141 393 60 143.80 34 29.20 CHANGED PEshsLsphlspSYp.....pahGtsLlps.ps..uscshhctLac.pspulLspuhc..ssPlFsaANpuuLchhEs.ohssLpcls.chhh-.cssRcshssthsclhpQGasph.uGlplSshGRphphEpAssWplLss-p....sscs.AhhFsNWpFl ................................................shshsphlspSY+.........thhGt.pLlpsttt........sspphhchLac.t.s.Allspuhc.........spP.lFsaANpuuLchhEh...ohstLpsls.chsh..-...p...ss...R.cphp.s.h.splhpQ...Gas....ph..uGlplSshGRphph-pAlsWpl.ls....c-s......sh+s.AhhFhsWph....................................................... 0 29 90 116 +8504 PF08671 SinI Anti-repressor SinI Mistry J anon pdb_1b0n Domain SinR is a pleiotropic regulator of several late growth processes. It is a tetrameric DNA binding protein whose activity is down-regulated thorough the formation of a SinI:SinR protein complex. When complexed with SinI, the SinR tetramer is disrupted such that is no longer able to bind DNA. 19.80 19.80 19.80 20.80 19.30 18.80 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.94 0.72 -7.11 0.72 -4.51 17 297 2009-01-15 18:05:59 2006-04-20 09:18:08 5 4 127 4 39 164 15 29.60 46 35.33 CHANGED LDpEWhpLlpEAhctGlohE-h+cFLphpK ..LDpEWhpLlp-AhsuGlohcphRcFLch.K........ 1 7 20 25 +8505 PF08672 APC2 Anaphase promoting complex (APC) subunit 2 Mistry J anon pdb_1ldd Domain The anaphase promoting complex or cyclosome (APC2) is an E3 ubiquitin ligase which is part of the SCF family of ubiquitin ligases. Ubiquitin ligases catalyse the transfer of ubiquitin from the ubiquitin conjugating enzyme (E2), to the substrate protein. 20.30 20.30 20.30 20.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.89 0.72 -3.71 18 240 2009-01-15 18:05:59 2006-04-20 09:25:36 6 4 216 4 176 238 3 59.20 40 7.61 CHANGED YIhuMLTNhso.LsL-RIHsMLKhh...ss.sshshopcELccFLsphVcEp+Lphs.GGsY+L ...............aIhGMLT.Nhsu..hsL-RIapMLKhh...ss.sshsho.pELppFLsphVp-tcLphs.uGsY+l........ 0 56 90 143 +8506 PF08673 RsbU_N Phosphoserine phosphatase RsbU, N-terminal domain Mistry J anon pdb_1w53 Domain RsbU is a phosphoserine phosphatase which acts as a positive regulator of the general stress-response factor of gram positive organisms, sigma-B. The phosphatase activity of RsbU is stimulated by association with the RsbT kinase. Deletions in the N terminal domain are deleterious to the activity of RsbU [1]. 23.10 23.10 23.10 23.50 21.70 23.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.37 0.72 -3.97 14 334 2009-01-15 18:05:59 2006-04-20 09:26:18 5 8 325 8 32 137 0 77.00 54 22.08 CHANGED cppY+plLccYLtspsEpsLY.pspcho+csIc+pIsPE-IVslH+shlppl..sl.....pcplhcohDlLlEVMhGYGhAY ..cp+YKuLlcESLssQD....KspLIKKCEKaTcEVI+KDVLPEDIV-IHKsYIhoLsLo.......cEDVh+TL.DVLQEIVKGFGYSY...................... 0 10 19 29 +8507 PF08674 AChE_tetra Acetylcholinesterase tetramerisation domain Mistry J anon pdb_1vzj Domain The acetylcholinesterase tetramerisation domain is found at the C terminus and forms a left handed superhelix. 25.00 25.00 28.10 28.10 22.20 21.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.04 0.72 -4.43 6 95 2009-01-15 18:05:59 2006-04-20 09:29:11 5 3 45 8 45 93 0 37.40 68 6.56 CHANGED DEAERpWKhEFHRWSuYMh+WKsQF.DHYS+p-pCusL .DEAERpWKAtFHRWS.sYMhcWKNQF.Dah..SKpEpCssL....... 0 5 6 17 +8508 PF08675 RNA_bind RNA binding domain Mistry J anon pdb_1whv Domain This domain corresponds to the RNA binding domain of Poly(A)-specific ribonuclease (PARN). 27.90 27.90 27.90 29.20 27.80 27.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.13 0.72 -4.00 2 85 2012-10-02 20:46:34 2006-04-20 09:35:57 6 4 61 5 49 96 0 81.40 61 14.59 CHANGED G.-.p.pR-HlhasTFPcpW+TuDl.phFpsFGslplSWlDsTSAFVuLpp.ptsp.slph.tYtpua+l.saApa.......pQhK ........GPDLQPKRDHVLHVT.FPKEWKTSDLYQLFSAFGNIQlSWIDDTSAFVSLSQsEQVp...IAlNTS..+.Y..AESYRIQTYAEYhtpKp............................ 0 12 16 32 +8509 PF08676 MutL_C MutL C terminal dimerisation domain Mistry J anon pdb_1x9z Domain MutL and MutS are key components of the DNA repair machinery that corrects replication errors [1]. MutS recognises mispaired or unpaired bases in a DNA duplex and in the presence of ATP, recruits MutL to form a DNA signaling complex for repair. The N terminal region of MutL contains the ATPase domain and the C terminal is involved in dimerisation [3]. 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.72 0.71 -4.59 108 4304 2009-01-15 18:05:59 2006-04-20 10:11:38 6 18 3912 12 1164 3441 773 144.10 26 22.12 CHANGED s......h.hlu.....QlpssYllups...pcG.LhllDQHAA+.....ERlhYEpl.cpp..htp............psQ.LLlPhsl.pls.tp-hthlpcpt-tLpclGhplct....hGspslhlRslPshLtptph...ppllp-llspltphs.......p..........tthhcclhsthAC+sul+us...cpL ......................................hluQlpssY..l...l....s......p..s...............ps.....s......lhll..DQH..AAc.....ERltaEph..ppp.............hsth....................psQt..L....L....l.....P.......h....hl...cl........s..t....p-t..thlpc....p......t...s....t....L.p.c.....lG....lplp...........................husp....p....hhl+......u.l....Ph...hh...t.p...t....p..h...............p..p...l.......lh.-.lls...lhpps.....................p...........htphhpp.lht.h.huC+t.ul+ssp......................................................................................................... 1 401 726 969 +8510 PF08677 GP11 GP11 baseplate wedge protein Mistry J anon pdb_1el6 Family GP11 is a viral structural protein that connects short tail fibres to the baseplate. The tail region is responsible for attachment to the host bacteria during infection. 20.80 20.80 21.00 79.60 20.60 20.70 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.39 0.70 -5.18 8 37 2009-09-10 20:27:43 2006-04-20 10:37:38 5 1 36 3 0 32 0 216.80 35 94.22 CHANGED hohscsKAuVhSRcADFLta..c.suscD.sl.......hsspslGusTlsQltKGsY.PNVQSAIsDltshu.phsVGsVllsTsusuPpulpQ.s-hloFoGoVssss..sus.llIcVaGlPVpsssGsousplsspVpsshp-hlssphhhspspcc.sosuspLpl+YlDsppH.slssaoppGI..............TloppIsupu+sGYGTWshLGspTpTLssts.sss..lYYF+RIA .....hohspstAtlhSRhAsalpac.spssshsV.......hsspsIGusolsQhtKGhhhPNVQSAIsDltshu.phPlsulllsssssuPpulpQ.sDhhoFoGoVssss..sGsslllpVaGhPVpsssGsousplsspVpstLp-hhspshhhsssppc.ssssspLplpYlDsppH.hhpsaophGI..............TlsppIss.u+sGYGsWphLGspohTLsstsssss..lYYFcRl....................... 0 0 0 0 +8511 PF08678 Rsbr_N Rsbr N terminal Mistry J anon pdb_2bnl Domain Rsbr is a regulator of the RNA polymerase sigma factor subunit sigma(B).\ \ The structure of the N terminal domain belongs to the globin fold superfamily [1]. 27.00 27.00 33.30 82.60 26.20 25.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.55 0.71 -4.26 9 83 2009-01-15 18:05:59 2006-04-20 11:15:30 5 2 83 6 13 59 0 130.20 46 47.84 CHANGED Fltcpps-LlppWsspl+clsspphshplo-chaEshspEal-lll.s.sptsspphpcclp-Fup+hVQlGhsLphlosGLptFt+hlaptMtccs..hscppt..h-llhcl-calsPlssEIlNpYohSWE ..FIpsN+s-LLssWhscMccpS-QhhsslspEthYEpTSKEFVDLIl.SslTcssscasE+L--FAEKlVpLGWPl+FlTTGLpsFGhLVYTs.MpD--....LcccE+..sDhaac.h.-oWlSshhNclVstYusoWE.... 0 5 10 11 +8512 PF08679 DsrD Dissimilatory sulfite reductase D (DsrD) Mistry J anon pdb_1ucr Family The structure of the DsrD protein has shown it to contain a winged-helix motif similar to those found in DNA binding proteins [1]. The structure suggests a possible role for DsrD in transcription of translation of genes which catalyse dissimilatory sulfite reduction. 24.20 24.20 24.50 49.10 24.10 24.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.02 0.72 -3.97 13 56 2009-01-15 18:05:59 2006-04-20 11:17:52 6 2 52 4 29 53 5 66.60 48 66.35 CHANGED -hKptIl-a.....u+pusKoKaYF+Dhhch....hP-hKsRElKKllscLVsEtpLtaWSSGSTTMYGLKspG ....-hKptll-aLp...tcsssKoKFYFpDFtch....hPDtKsR-lKKllspLVsEtpLpYWSSGSTTMYGLKGsG.. 0 15 28 29 +8513 PF08680 DUF1779 Protein of unknown function (DUF1779) Mistry J anon pdb_2fpn Family This is a family of uncharacterised proteins. The structure of the ywmB protein from Bacillus subtilis has shown it to adopt an alpha/beta fold. 25.00 25.00 25.30 25.00 23.30 24.60 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.48 0.71 -4.46 14 234 2009-01-15 18:05:59 2006-04-20 11:23:29 5 1 231 1 55 184 0 191.20 31 80.00 CHANGED ssplpc.................Wshtu+pphshsp..ctapphspphct-htphpW..spsp-.cchtchpGshpc...thptplpll.sstpssptpoYllYEhputt...ps.s.thcph.cpshclaptcshIFoClpGphssphsts.LpppupplLcphsA+pVEsltEpsFlSlSAaoscapptIhssscc.hNlQlALRps.uhss+TplslGTPIlTsEY ...............................................................................plpp...Wshhs.+p.hshsp....ppFpphlpplct..ctph.cW......ppp.-.hctp..php...th.p.......hppclhl.....s......hopcsspppoallh-hpusc...........h..chlpph.clas...pKshlaoC.lpGhl..s..s+..lpss...L.pscsp.p.lL.+.clsA+slEplcE...cs.a.VSlSAYspcac-...s.lpospcK.lNlQlAlRps....ssKspIsVGTP.IITsEY..... 0 25 42 46 +8514 PF08681 DUF1778 Protein of unknown function (DUF1778) Mistry J anon pdb_1y9b Domain This is a family of uncharacterised proteins. The structure of one of the hypothetical proteins in this family has been solved and it forms a helix structure which may form interactions with DNA. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.18 0.72 -4.15 45 1442 2012-10-02 18:44:02 2006-04-20 11:29:57 6 3 878 2 236 790 75 77.30 30 79.07 CHANGED RlshRlss-p+pLlc+AAslpG.polosFllsuAhctApcllpcpch...lpLotpshptFh.ssLsp...PspPNscLccsht......phpp ......................lslRlss-p+sllccAApltG.pslosFllpuAhp...tApcllt..cp..ch....hhLsppsapthh.phL-p.....P..s.s..s..s..ttLpphhp....p............................................... 0 60 137 192 +8515 PF08682 DUF1780 Protein of unknown function (DUF1780) Mistry J anon pdb_1y0k Family This is a family of uncharacterised proteins. The structure of a hypothetical protein from Pseudomonas aeruginosa has shown it to adopt an alpha/beta fold. 19.70 19.70 21.50 21.10 18.70 17.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.38 0.70 -4.86 2 56 2012-10-11 20:44:44 2006-04-20 11:31:19 5 1 55 1 13 39 4 202.10 81 99.11 CHANGED s-uDaLRLLThQAEQANsFLSNARKW-RERWVCQRhLpuLNlPYRp--FsAsGppPPDVLF+tAuFEVFFVLDEGRRLN-EWR-ELpRRRpAhSLpQLlRREt+PpRIsAuEL.hRLAPTLRKKAHNYpERGhshGELDllAFssLKRtV.DhNo.FPPPTEYLRQGWRSLShVGPTFARVLFAHssAPEFLRuNLGRSILFDsGluL .............DDSDYLRLLTlQAEQANAFLSNARKWERERWVCQRLLQGLNlsaRsEDFsPAupE..PPDVLFRDutFEVFFVLDEGRRLNDEWREELsRRRSAFSLuQLVRREA+P+RIsAuELLtRLAPTLRKKupNY+ERGIDLGcLDIIAFsSLKREVLDLNoHFPPPTEYLRQGWRSLSLVGPTFARVLFAHPGAPDFLRsNLGRSlVFDVGISL............ 0 1 3 8 +8516 PF08683 CAMSAP_CKK DUF1781; CKK; Microtubule-binding calmodulin-regulated spectrin-associated Mistry J, Baines A anon pdb_1ugj Domain This is the C-terminal domain of a family of eumetazoan proteins collectively defined as calmodulin-regulated spectrin-associated, or CAMSAP, proteins. CAMSAP proteins carry an N-terminal region that includes the CH domain, a central region including a predicted coiled-coil and this C-terminal, or CKK, domain - defined as being present in CAMSAP, KIAA1078 and KIAA1543, The C-terminal domain is the part of the CAMSAP proteins that binds to microtubules. The domain appears to act by producing inhibition of neurite extension, probably by blocking microtubule function. CKK represents a domain that has evolved with the metazoa. The structure of a murine hypothetical protein from RIKEN cDNA has shown the domain to adopt a mainly beta barrel structure with an associated alpha-helical hairpin. 19.60 19.60 20.30 22.30 19.00 18.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.53 0.71 -4.42 14 287 2012-10-02 14:14:57 2006-04-20 11:33:32 6 6 107 1 153 255 3 125.80 57 10.32 CHANGED ss.cl.ahcsusKSN+slIpNAlsassLuGpsNcsp+ptlLc..clscsp...upHFlILF+..DspppaRulYoh.psps-phhKltGhG..PptlsptMlcshaKYsSusKpFptI..sKphosslDAhol.......K+ ........................TGPKL.aKEPSuKSNKhIIpNAluH.CCL.A.GKVNEspKp+ILE...EhEKS-...ANHFLILFR..DuG...CQFRuLYo.......Y..sP-T..E.........E...lsKLsGhG.......P+sI.oc...pMl-tlYKYsSDRKpFopI.PuKThSsSVDAhTI....+sHLWQsK+................ 2 44 60 101 +8517 PF08684 ocr DNA mimic ocr Mistry J anon pdb_1s7z Family The structure of an ocr protein from bacteriophage T7 has shown that this protein mimics the size and shape of a bent DNA molecule [1]. ocr has also been shown to be an inhibitor of the complex type I DNA restriction enzymes [1]. 25.00 25.00 83.30 83.00 22.20 21.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.24 0.72 -3.97 3 48 2009-09-11 05:26:13 2006-04-20 11:37:54 5 1 11 3 0 48 0 97.00 83 90.53 CHANGED MSNMTYsNVasHAYEhLKEpIRYDDIR-sDDLSDAIHEAADNAVPHYYADIFSVMASDGIDLEFEDSGLMPDTKDVT+ILQARIYEQLTIDLasDAEDLLN .MSNMTYNNVFDHAYEMLKENIRYDDIpDT..DDLHDAIHMAADNAVPHYYADIFSVMASEGIDhEFEDSGLMPDTKDVIRILQARIYEQLTIDLW.......... 0 0 0 0 +8518 PF08685 GON GON domain Mistry J, Rawlings ND anon Rawlings ND Domain The GON domain is found in the ADAMTS (a disintegrin and metalloproteinase domain with thrombospondin type-1 modules) family of proteins. It contains several conserved cysteine residues. 22.60 22.60 24.80 23.10 20.90 21.70 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.68 0.71 -4.47 11 187 2009-01-15 18:05:59 2006-04-20 11:48:09 6 56 84 0 124 173 0 176.80 43 12.88 CHANGED lspoCpElQphpuhtpDGEYhLpV..cGchl+IYCHGMpocsPpEYloLspGsp-NauphYshRLtssppCP.sGpc+psssspss..hshGtTpFsKlRlDlsshpIhssDapFupop.Gps.PauoAGDCYSss.+CPQGcFSINLpGTGh+lpssspWpspGshsshch....c+spssp+VhG+CGGaCGtChPp.poGLhLpVh ...........................................h..poCpElpt..h.p....sh..pc.......DGEYhL.l....pG+.hl...+...laCtsMpoppPKEYlTL.sp.G.p-NaSElYG.h..RL..ps.Ph...pCPaNGsRppsC..pCpps..hsAGhThFpKlRl..Dl..ss..hp..IhssDhpFApT.......G.........ps.VPaAT..A....GDCYSss.....cCP...Q..GpFSINLhGTGhplspsspWhspGp...a.ss..pl.....p+o.ssp+lhG+CGGaCG+ChPp.tsGL.lpl.h............................... 0 48 54 93 +8519 PF08686 PLAC PLAC (protease and lacunin) domain Mistry J, Rawlings ND anon Rawlings ND Domain The PLAC (protease and lacunin) domain is a short six-cysteine region that is usually found at the C terminal of proteins. It is found in a range of proteins including PACE4 (paired basic amino acid cleaving enzyme 4) and the extracellular matrix protein lacunin [1]. 26.40 26.40 26.40 26.40 26.20 26.30 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.35 0.72 -3.68 29 739 2009-01-15 18:05:59 2006-04-20 11:50:11 6 140 83 0 426 640 0 34.10 37 3.34 CHANGED pCpDpsp........CtlVhpspLCphpaYpptCCpSCpp ...........pCpDpsp...........aCtlVhptpLC.sptaapptCCcoCp.... 0 50 86 209 +8520 PF08687 ASD2 Apx/Shroom domain ASD2 Mistry J, Hildebrand JD anon manual Family This region is found in the actin binding protein Shroom which mediates apical contriction in epithelial cells and is required for neural tube closure. 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.67 0.70 -5.06 14 259 2009-01-15 18:05:59 2006-04-20 13:14:24 6 9 75 2 149 229 0 247.60 41 24.19 CHANGED cELs.....pplsstDcShs....shLsP.....csshsLhcuLhshsp.hht.t.tttthh.phs....................s...sossssahssuss+AE.Lh.chpshp......pt.spsE.spslstKKhELlppls+KLpsL+ctpcsLhp-hpsNssLGp...............................-lEutVpphCKPNEh-KF+hFlGDL-KVVsLLLSLSGRLARVEsALsslspsss.-E+toLhEK....+clLpcQpEDAKELKEpl-RRE+sVhclLuphLstEpLtDYp.......................................HFV+MKuALllEQRcL-DKI+LuEEQLcsLp-SL ..........................................................................................Ls.....ppl...D..h.....sh..ss....csshslhpslhs.s..hh....t...ttth..p..........................................................hsssssaassSssKA.E......Lh.ch.p-h...............tt.stp-.......st..-l.stK...K.......ELlp.uls+KLpsLc-tpcsLhp-hpsNssLGp...............................-VEAhlppl..C+PsEh-Ka+hFlGDL-KVVsLLLSLSGRLARVENAL.....ssl.s....ps.....u.....s...p.E..+ps.LhcK.................pclLhpQhEDA+ELKEslDRRE+hVhslLupaLstEpLtDYp.......................................HFV+MKuuLllEQRcL--KI+LG-EQLcsLh-SL....................... 0 20 30 74 +8521 PF08688 ASD1 Apx/Shroom domain ASD1 Mistry J, Hildebrand JD anon manual Family This region is found in the actin binding protein Shroom which mediates apical contriction in epithelial cells and is required for neural tube closure. ASD1 has been implicated directly in F-actin binding. 25.00 25.00 36.10 26.00 23.30 22.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.19 0.71 -4.50 8 137 2009-01-15 18:05:59 2006-04-20 13:15:24 5 8 37 0 75 108 0 156.60 34 12.14 CHANGED A+LQKS+STssLss.-uEsEssssph+st.....u.sos-uSFssTYK-+LKEAQuRVL+ATSF+RRDL-P.hPtp.h.t..cp.shphs.p.s.su.sspsshs.p...tt..ptso.....................sssusPpVsRIGGRKRhTsEQKh+SYSEPEKlNEVG............lpcEhs.t......spp.pposGohADRhKaFEcsuK ...............................pL.+SpSshtL.s...tsptp.....t.................s..s..-s.s.h.s.psY+spLK-AQuRV...LcATSF+.R..+DLc......Ps...t....hst...tp...shth...hp.s.s..........s.sps...........................t.st.......................tsts...ssRhGuR+RhTsEQKh+SYSEPEKhscVG............lst.t.p......................s.sohAcRh+hFEppsp............................ 0 3 11 26 +8522 PF08689 Med5 Mediator complex subunit Med5 Mistry J, Wood V anon manual Family The mediator complex is required for the expression of nearly all RNA pol II dependent genes in Saccharomyces cerevisiae. Deletion of the MED5 gene leads to increased transcription of nuclear genes encoding components of the oxidative phosphorylation machinery, and decreased transcription of mitochondrial genes encoding components of the same machinery [1]. There is no orthologue from pombe, and this subunit appears to be fungal specific [2]. 19.20 19.20 23.30 23.10 18.70 18.70 hmmbuild -o /dev/null HMM SEED 989 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.92 0.70 -13.80 0.70 -6.87 19 144 2009-01-15 18:05:59 2006-04-20 13:29:25 5 4 120 0 112 159 3 740.80 21 85.92 CHANGED M..........tustphpshlppCht++lsuspFhsLh...h.c+hPhsttshhphh...hpsp.tp.......................................t.ph-sl.ls.YlstLhh..hl.ssslLpshL...........................sphtp.sppphhh...phohht-.+lhpclhhshhpsphscsltpulthhs......slsphlptllsht.shp.s..st.tshhsp.-tsshh.s..hshllsslhtpphuhplLop...pts...h.tp...uhlshsspsuls...t..p+htthpp.p..hshhsps.spshssshhcshhsphhph...psplhshsshpocutLahYlssh..lsupsh.s...h..............hhp-LIpAuFcshusuh.......hps-sshshaha+sFlls+LPhhlhtht.........ssshsp...sh-.slp+Aluphpsst.sohophhsh...ssoshsDlRp-FlhuhthptLlP.osI.......pplhscs..s.psLslss.hshc-llpphpss.c+.........................hpQllspl-shpust.ssIssAIsElhpchsppp-h.pLpplsstLspps.puLsllLhFps..PpplLp.sL..sphL..........ss.t.hDE..DptE.QsVYppFGslLLLllshpa+Y.....clshhDl........uIsus.pS......FlhcLhttuspSpp.splsppppppLssWlpuLF.sp..GlSD-lMsu..ssPp-hYhLlPhlFpQslhAspsGtl.-hpsLpuGhEYhlpPFLlsuLlhhlhWLtpa.hhcppss.shslphhptllp.....sushSspu..ptlHpsVLpIsupsL.ppL+sh+stpsspp........t..cPhlcsl.spLsh.hs................sshpppcLp.hphh.................sh..psls.hho....s...phssssYsac.llssIchlusp+lLtsllcEL.......+hpsp......sususlslDlusshlsus.s...-shsh..t.hps....s....s....s..............p..hsh+sslph-c-.s.hhs-sDshtu...................u....p......................htshhpphp.t..sshsshstt.....hchctt.s..tsutpsss.............pchsslpp ...............................................................................................................h.thh.psh.pphss..F.th....h.tp.s.......p.h.....ht.p..t.............................................h.h..Yl..l............phl.shh.....................................................................................tp.............s.....hh..phhh...ptph.p.....h..h..........................h.thh..hhth........t..............t...t.p.h....h.t...............hh..h.th..pth.....th.t.......................t......t...hh..t..t........................h.p...................t.t.hp.h.....phh...t....p..l..ph.......h..s.....shl.ha.s...h..h.tps..t..................................h.-LI.ssFsshuphh........ppp.s...h.hhh.Flhp+lP.hh.th...................sh-.pl.pAlt.....ph...sp...h..........sshsp......................s......ss....slRp-FhhuhhhhtL...s.l..............phl.sp...s.pt.s.tt...hpphltph.t..pp.............................h.plltp.h-ph.tsst..ttlstslhphhtphhpphph.pltplst.L..p..p...slshhLha.p..s.tllpsLhthl............................s..s.t.........h-p..sp................h-.pshappFG.llLhllshh.pa..........sls..cl.................hhts..ps........l.hchhttsp....spt.ppltppp.pp.lssWlpuLF........tp........GloD-hhpu..ssspphhhLlshlapQslhAhp...tGhl.p.p.sLpsGhEY................hhpshLhs..sll.sl.......haLtp......hh....pp.p...p.p..lphh..hlh........ss.o.pt.......pthhpsVLplsu..L.p.L+t.hphp..tp..................t...s.hpsl...h...hp...................s..p..tplt.ht...........................................hh..p..sl..h......p......s...Yshc.hhthlph.ss.phl..llcpl............th.sp............stpsphshshhh..hlsh..h....th.........ht.........................................................................t...p.c..h..ptc...hh.ct.p..ht.................................................................................................................................................t.......................................................................................................................... 0 28 61 96 +8523 PF08690 GET2 GET complex subunit GET2 Mistry J, Wood V anon manual Family This family corresponds to the GET complex subunit GET2. The GET complex is involved in the retrieval of ER resident proteins from the Golgi [1]. 20.10 20.10 20.20 20.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.90 0.70 -5.17 9 97 2009-09-11 07:52:02 2006-04-20 13:35:14 5 2 96 4 72 96 0 248.30 21 81.04 CHANGED pLS-sEKR+LLRERRQtKhSpGsASuRLNcIhuQupusphso....pSVLDpcsssssssspssps............................ssPElpDl.pshsss.............pspsstpslD...chFpplhphQstG....tssss-sshsslhsMhpphts.................s.stostusppst..hhpppLlcYppYphphhKhhhlll+ashhLhsalYahhp.ss.h..h....hhhptL.p.h.sspsFFolFsThEllhluhYYplhpplphhssssshp.ssKllshsSMV....LP.ltshcshVlhhLpYa-lluMhlsDlshVlVhhGLhoh .............tEpt..RLhRERRpAKh.ppGsASuRLsKITu..u....tssptss.......pSsh-ss.s.sss.s........ss.s.s...sspp................................ssPc.p-..p..t.h..s...........................................t..tptsspshss.p...chhptlht......p...t.t.......................t.........s.ss............t..s...sshs....p....h.....h.p.hh.sh...ss.s..........................s..........................................p.........hh..h.....h....h.h....h...hh...hh.........................................................h.hh.s.Eh............................................................................................................................................................................................................................................................................ 1 10 35 62 +8524 PF08691 Nse5 DNA repair proteins Nse5 and Nse6 Mistry J, Wood V anon manual Family Nse5 and Nse6 are non essential nuclear proteins that are critical for chromosome segregation in fission yeast [1]. Nse5 forms a dimer with Nse6 and facilitates DNA repair as part of the Smc5-Smc6 holocomplex. 25.00 25.00 44.10 30.90 19.20 21.00 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.73 0.70 -5.98 16 54 2009-01-15 18:05:59 2006-04-20 13:39:35 5 2 28 0 31 46 0 438.90 23 94.46 CHANGED Msusspsp...s.sp.s............hhVp.spcc..s.Ethsuhp.htshspllh.h-t...........t...h......t-llhhhhohssh.ssa.csshlcc.t.hs.st......t.hsppsht.lpphhptLpphcspphsp.slphh+sphhhh.cp.......phchphspphcsptspphhchhhpsst.sp.psh.hlshcsshpshhsh.t.t.thhtsh.hshhhpp.st.hp.h..sLsshl..h..tppssshhhph..tp....hhchlhslhsl....+phhhh.scsstt.pp............lhtFlphltshphhpshspph..hhsshpph.t..hthphhslYhph.-hshs.hahphs+hhsphKtt...............lLppl.pshph....h..sphlhp.hcslhsppchpphhtFh.....................................hlhshssp.hsh..hp.plhphlcs............................phlDhstpss.pch.lsl.p..hh.........................................................................LKshhp ...................................................................................................Mss.........ts..st........p.....h.Vt.h.cc..s.Ehlps.hh.-s.sp.L...Ep.........t.pthhh.s.....sllhhhhh.p...pph.pps..ct.s.hshph.........p.hshts...h..pphhshlpchps.phphh-..hhppphhls.sol......pschppt.thp.tphsct.hslhh+pshSsc.cs.phhohps.Ycphhsh..pcpshh.t.hhhshphspssEFlphhh....oLos.L......p-pss.hh.s.......pch..shh-.Lhslasl....+.hhah.p-ssp.s.spph..........LtsFhp.l.sRphhsthpp.h..hhps..ph...s.p.........sphhssYhp..-ps.hhhah.hh+h.P.hcst...............LLt+l.sscphhp.p.h..pp.lhp.hppLhshpshpphhhFh...................................................lp.hust.hsF............ps.plhphhcs............................shh-.sspss.pcs...lsI...................................................h...................................... 0 4 14 29 +8525 PF08692 Pet20 Mitochondrial protein Pet20 Mistry J, Wood V anon manual Family Pet20 is a mitochondrial protein which is thought to play a role in the correct assembly/maintenance of mitochondrial components [1]. 21.40 21.40 21.60 22.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.95 0.71 -4.23 6 105 2009-01-15 18:05:59 2006-04-20 13:51:08 5 3 29 0 59 83 0 97.30 28 44.10 CHANGED KKtspDappLPRVPoTpaLctc-hosDlLYSGYRPlhhss+-sPLhpppps+.hcathchp..........chsEPhpP....................................WsSSAhGhEaasEW-NVPs-llKcLKPFcssp.pc ......................lP+VsoTpal.tp-hppphLauGYRPlh......h....ss...pt...............tp.....ptpp.....hph........................t..................................................................................W.sS..G.h.a.s...ac.lP..hhtphKPFc......t................................ 0 6 23 50 +8526 PF08693 SKG6 Transmembrane alpha-helix domain Mistry J, Coggill P anon manual, Wood V Domain SKG6/Axl2 are membrane proteins that show polarised intracellular localisation [1]. SKG6_Tmem is the highly conserved transmembrane alpha-helical domain of SKG6 and Axl2 proteins [1], [2]. The full-length fungal protein has a negative regulatory function in cytokinesis [3]. 42.00 42.00 43.00 42.20 41.90 41.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.96 0.72 -4.64 13 66 2009-09-11 17:06:57 2006-04-20 13:56:40 5 4 37 0 39 61 0 38.70 41 5.65 CHANGED Ksoc..tspsssluluVslPVuVIllVLhhhLhhhaRRpK .......os.tpssuVslAluVulPlGVIlllLhshLhhhaRRsK.. 0 8 20 33 +8527 PF08694 UFC1 DUF1782; Ubiquitin-fold modifier-conjugating enzyme 1 Mistry J, Bateman A anon pdb_1ywz Domain Ubiquitin-like (UBL) post-translational modifiers are covalently linked to most, if not all, target protein(s) through an enzymatic cascade analogous to ubiquitylation, consisting of E1 (activating), E2 (conjugating), and E3 (ligating) enzymes. Ubiquitin-fold modifier 1 (Ufm1) a ubiquitin-like protein is activated by a novel E1-like enzyme, Uba5, by forming a high-energy thioester bond. Activated Ufm1 is then transferred to its cognate E2-like enzyme, Ufc1, in a similar thioester linkage. This family represents the E2-like enzyme. 25.00 25.00 74.30 44.60 22.10 21.50 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.07 0.71 -4.76 9 167 2012-10-02 15:28:41 2006-04-20 14:03:43 6 2 145 10 111 163 2 153.40 68 93.63 CHANGED tsT+polspIPLLpTpAGPRDt.-tWlpRLKEEYtuLI+YVEpNKpsDNDWF+lE.SNcpGT+WhGKCWYlHNhhKYEFDlpF-IPlTYPsosPEIslPELDGKTsKMYRGGKICLTsHFtPLWu+NsPKFGIAHALALG.....................LuPWLAsEIPsLl-pGllK.p-c ...........p.sT+cslupIPLLpT+AGPRDt.-hWsQRLKEEYpuLIp...YVcsNKpuDNDWFRLE.SNcEGTRWaGKCWYlHsLlKYEFDlEFDIPlTYPsTAPEIAlPELDGKTAKMYRGGKICLTsHFKPLWARNVP+FGIAHAhALG.....................LuPWLAlEIP-Llp+GllpaK-................ 0 49 65 89 +8528 PF08695 Coa1 DUF1783; Cytochrome oxidase complex assembly protein 1 Mistry J, Wood V anon manual Family Coa1 is an inner mitochondrial membrane protein that associates with Shy1 and is required for cytochrome oxidase complex IV assembly. It contains a conserved hydrophobic segment (amino acids 74-92) with the potential to form a membrane-spanning helix. The N-terminus of Coa1 is rich in positively charged amino acids and could form an amphipathic alpha helix, characteristic of a mitochondrial presequence. A cleavage site for the mitochondrial processing peptidase is predicted adjacent to the presequence. Upon in vitro import into mitochondria, Coa1 is processed to a mature form, indicating that it possesses a cleavable presequence [1]. The eukaryotic cytochrome oxidase complex consists of 12-13 subunits, with three mitochondrial encoded subunits, Cox1-Cox3, forming the core enzyme. Translation of the Cox1 transcript requires the two promoters, Pet309 and Mss51, and the latter has an additional role in translational elongation. Coa1 is necessary for linking the activity of Mss51 to Cox1 insertion into the assembly complex [2]. 21.10 21.10 21.10 21.10 21.00 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.31 0.71 -4.59 43 323 2012-10-01 19:51:31 2006-04-20 14:04:22 5 3 276 0 200 455 12 113.00 22 61.06 CHANGED hhhh.shaslshssuhhhlhNhpKppSsllsssLatlRpSspsp-hLG..-tIshpsth.....PWlpGplNpl...pGclc.lsFsV+Gs+...u.pGpl+lpusRcsctt.Fplccaslpsc....supp...lcLlc ..............................................hh..h..lhhhshs.s.hhhhh....hs.h...p.......h....pp....o.....s....h..h.ppslhtl+psspshphL....G..-.....s.hppth...............hls.G.phNps.........pG..c..hs..lphsVpG..s+..................u..pGplhhpup.Rps.p.p....aphpphtlphc....psp.l.l........................................... 0 61 111 165 +8529 PF08696 Dna2 DNA replication factor Dna2 Mistry J, Wood V anon Pfam-B_8878 (release 19.0) Family Dna2 is a DNA replication factor with single-stranded DNA-dependent ATPase, ATP-dependent nuclease, ( 5'-flap endonuclease) and helicase activities. It is required for Okazaki fragment processing and is involved in DNA repair pathways [1]. 20.70 20.70 20.80 20.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.14 0.70 -5.02 31 301 2012-10-11 20:44:44 2006-04-20 14:14:05 6 24 251 0 216 318 21 196.90 29 17.42 CHANGED llhhpcsssppptlhL+ssWhpss.hphGDhlHllu.........phssssshslsssps..hlIlpPDhLlSuTslusSlpC.R+uVLp-ph.ptssssohshlhGTIlHElFQcul.....pssphssphlp.phhppslc..pah.plahlshs.hspltpclpp.thsslppWsppahpppsssp.h..............tpsppttlslscllDIEEsIWSPpaGLKGpIDATlcsp .................................h.....ppttppphshLpstWh...pss..hp.Gshlclhu......................................phs...sp.sp.hl.lsp...sp..s........hlIlp..PDhLlSuTslusShp...C.R+uVLp-ph...+s.............s......sp......ss.....t.shlhGollHEl...FQcul.........................sp....p..h.s...phlp...phhpphlp.......phlpplYt...l......shs..s-s.ppcltp.hhsslppWsppahpppsp.sp.h.................................tpspp.phplscllDlEEplWSPhaGLKGpIDsTlps................................................................................ 0 73 117 180 +8531 PF08698 Fcf2 DUF1784; Fcf2 pre-rRNA processing Mistry J, Wood V anon Pfam-B_13623 (release 19.0) Family This is a family of eukaryotic nucleolar proteins that are involved in pre-rRNA processing [1]. 20.90 20.90 22.20 26.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.25 0.72 -3.98 24 326 2009-01-15 18:05:59 2006-04-20 14:20:19 6 6 281 0 231 325 5 97.50 39 31.77 CHANGED KppccsssspWFsh.cs-..lTsEl++DLpll+hRsslsPc+aaK+sctcp...hPcaFphG.......Tllpsss-a.........au.oRh............s++cRppohl-E...Llp-sshpca.....h++K ...................pppcosGssWFshsts-..lTsElK+DLplL+hRs.slDPK.RaYKK..sctct.....hP+aFQlG.......Tll-uss-F....................as.uRl............s+KpRKp..Tll-E...Llu.Dpchppa..++K................................................... 0 82 129 195 +8532 PF08699 DUF1785 Domain of unknown function (DUF1785) Mistry J, Wood V anon Pfam-B_1585 (release 19.0) Domain This region is found in argonaute [1] proteins and often co-occurs with Pfam:PF02179 and Pfam:PF02171. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.47 0.72 -4.60 46 958 2009-09-11 17:06:15 2006-04-20 14:26:01 5 11 239 8 597 914 1 53.60 41 6.15 CHANGED hslupSF.Fstp...........hsp.......sLGs.Glpuh+GaapSlR.........so.....ptu.LhLNlDVSsssFhcs ...............................................sVGRSF..Foss.............................tst......tsL..Gu...G..hEsWh........GFaQSlR.........Pu.......pht..hhLNI.D........VSsTAFacs........................ 0 155 301 462 +8533 PF08700 Vps51 Vps51/Vps67 Mistry J, Wood V anon manual Family This family includes a presumed domain found in a number of components of vesicular transport. The VFT tethering complex (also known as GARP complex, Golgi associated retrograde protein complex, Vps53 tethering complex) is a conserved eukaryotic docking complex which is involved recycling of proteins from endosomes to the late Golgi . Vps51 (also known as Vps67) is a subunit of VFT and interacts with the SNARE Tlg1 [1]. Cog1_N is the N-terminus of the Cog1 subunit of the eight-unit Conserved Oligomeric Golgi (COG) complex that participates in retrograde vesicular transport and is required to maintain normal Golgi structure and function. The subunits are located in two lobes and Cog1 serves to bind the two lobes together probably via the highly conserved N-terminal domain of approximately 85 residues [2]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.70 0.72 -4.14 56 831 2012-10-03 17:31:52 2006-04-20 14:27:19 6 16 297 0 605 1053 2 83.80 20 11.90 CHANGED hsssshcscphhp.t.htpps.....l....pplhphcpplp........pplpphpp-l+phVhpsYpchlpss-sIpphcsph........pplpsplsplppshpphspt ..............................thsspthh.........p..t.ht.p.ps.....................h....pclp.phc.pplp...................................pphcppsp-l+phVhc..........sYpchIp......su.......cpIp.p.hcsph............pplps.lsphppthtth...t................................................... 0 197 331 497 +8534 PF08701 GN3L_Grn1 GNL3L/Grn1 putative GTPase Mistry J, Wood V anon Pfam-B_22650 (release 19.0) Family Grn1 (yeast) and GNL3L (human) are putative GTPases which are required for growth and play a role in processing of nucleolar pre-rRNA [1]. This family contains a potential nuclear localisation signal. 21.20 21.20 21.40 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.09 0.72 -4.08 36 317 2009-01-15 18:05:59 2006-04-20 14:33:43 6 5 270 0 228 328 2 78.00 37 14.88 CHANGED +hRaKIcKKsut+pRKt+KtAK..Kssph+S.+p.p.K..DPGIPNsaPaK-clLpElEcc+pppcEc+ppp+tppptc+pttp.cps ...................+hRaKIpKKstt+pRK.RKt....A..K...Ks.sp..........h.....+p..+t..+.K.......DPGIPNtaPaK-clLcElE...pc+pptc..Eccppp+ptpppppptt...tt................ 0 78 127 190 +8535 PF08702 Fib_alpha Fibrinogen alpha/beta chain family Mistry J anon pdb_1m1j & pdb_2a45 Domain Fibrinogen is a protein involved in platelet aggregation and is essential for the coagulation of blood. This domain forms part of the central coiled coiled region of the protein which is formed from two sets of three non-identical chains (alpha, beta and gamma). 29.50 29.50 29.50 33.20 29.40 29.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.15 0.71 -4.07 20 256 2009-01-15 18:05:59 2006-04-20 14:42:43 5 5 86 240 81 298 0 130.80 29 29.67 CHANGED scpssstpsD-caGshCPTsCclpshLs+hcpslc.pclpplcshLpphpppsssscphlpplpshhpscpssspssptlhsthpcslccpl..hhhh-pplssp.ppIchLQsslpsppp+Ip+LEscIspthcpC+pPCpcos..pIss ..........pts.shh.DpcaGshCPTsCtltshLsphppslc.pclppLcs.Lpphpppospspphhptlpphhpsctts.psst..sl....st.....hpps.+chh...hhhcth..ph.pplphLpphlps...chppLc.cls...p.CptsCppss..pl..s............ 0 3 9 33 +8536 PF08703 PLC-beta_C PLC-beta C terminal Mistry J anon pdb_1jad Domain This domain corresponds to the alpha helical C terminal domain of phospholipase C beta. 23.10 23.10 23.60 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.23 0.71 -4.58 8 215 2009-01-15 18:05:59 2006-04-20 14:48:55 5 16 68 2 101 201 0 168.00 38 15.68 CHANGED hDp-....l.-h+-cppQcLLpLREEQacsp++pKcpHlppthpKLpElAcEpQssQLK+LKElsE+EKKELKKpLD+KRh-+Is..pA+Tp-KttpEc-KpEIN+SHIQEVVQsIKpLEEsQp+RQEKLpEpps-sLQpIp-cEPphQuphht-aptch+pLPsEVpchLppptpcs...........shPstscs .......................................p+hh-L+-+QpQpLL.pLRpEQh.sEttp+ccH......lcphhpKLp-lAcEsQs..sQL.K+LK.EhsE.+Ep.KELpK...pl-cKRppc...Ip......pspo+DKpp.tE.c.KpEl.s+SaIpEsVph.I+.RLcEspp+RpE+L.cpppplhQQIt-c.cs.c....hpt.ph.....tEh.ptphttL....-l...h......................t...................... 0 10 19 51 +8537 PF08704 GCD14 tRNA methyltransferase complex GCD14 subunit Mistry J, Wood V anon Pfam-B_5615 (release 19.0) Family GCD14 is a subunit of the tRNA methyltransferase complex and is required for 1-methyladenosine modification and maturation of initiator methionyl-tRNA [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.52 0.70 -4.88 7 1145 2012-10-10 17:06:42 2006-04-21 14:09:47 5 11 975 20 564 2188 468 194.50 31 60.23 CHANGED GalYlLtPTPELWTlsLPHRTQILYosDIuhIhhhLEl+PGoVVsESGTGSGSlSHuIhRolAPTGHLaThEFHppRAcpAR-EFccHtl...sp.hVTVpppDVC.ppGF...tlsthADAVFLDlPuPW-AlsHAhssl.+hcGGRhCSFSPCIEQVQRTCpsLtphGFsEIpTlEVL.psasVRpsplsh.DL.utst.cssp..................ssssss..pSus.................Ph.....................pEshGHTGYLTFAsh ....................................................................sh..shP+t.sQl.lY.sKDh.u.......I.l.h...h.h.-..l.h..P.G.s.p...........VlEAGsGSGuLohsLhRA.l..................u.....s............p........G.p..lho.aE....h...+p..-..ht....ch.A............p...p...s....h........c......p....h...t.h.................sp....h..pl...p...h...p....D.....l..t.....p.......p.......u.h.......................................................t...t...............s............h........D.....t........l..h.....L.....D....h....s..P..W..p...s...l..p...p.st........c.....s.............L...................hs...........G.......G.............h..l.ss.a.s.....s...sl......p.Q..l.......p.......+s.......hc..sL..+..p...t...s...a...s...........-.......p.shEs...hh.Rpac.s.....p.....................................................................................................................................................................................................................................................................................................................................hh............................................................................................... 0 193 343 476 +8538 PF08705 Gag_p6 Gag protein p6 Mistry J anon pdb_2c55 Domain HIV protein p6 contains two late-budding domains (L domains) which are short sequence motifs essential for viral particle release. p6 interacts with the endosomal sorting complex and represents a docking site for several cellular and binding factors [1]. The PTAP motif interacts with the cellular budding factor TSG101 [1]. This domain is also found in some chimpanzee immunodeficiency virus (SIV-cpz) proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -8.16 0.72 -3.92 130 21799 2009-01-15 18:05:59 2006-04-24 12:03:20 6 32 62 6 0 17390 0 35.10 70 10.25 CHANGED .QSRs.......E.....PT...APP.A............Esat.hGEE....h.sss...K.....QEt+....D.........+.........PL .............LQS.RP..........E.......PT......A..P.P..A........E.S..F.R..FGEE........h..TPu........K............QE...h........D..............+.E........PL................. 0 0 0 0 +8539 PF08706 D5_N D5 N terminal like Mistry J anon manual Domain This domain is found in D5 proteins of DNA viruses and bacteriophage P4 DNA primases phages. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.96 0.71 -4.12 74 1334 2009-09-10 21:39:52 2006-04-24 16:40:24 6 39 1034 0 232 1230 422 157.50 16 23.13 CHANGED Aphhtcha.....tpp..ltasstht...............Whh.asuh.........Wp.s............ppthtphhppht.....chhhtpt..............................................................th.phhhpttpspthpshlpphpt......................................hssphpchDsc..shhlshsNGll..Dlcs..Gph....................ts..tcs.cchhTp.....hhshsa....................ps.......................ssst.........appaLpc...hhs ..............................................................................................................................................................................................................................................................................t...............h...a.s..................Wt..................h.t..hh.tphh.......t..h.tth....................................................................................t.h.....t.h..s...pptlp.s..slcthp...........................................h.ht.pph...c.ss......pLlshp.N.G.ll......Dlco......uph...........................................................ps.....ass..cp..hhop....hssssa.............................................ss........................................s.....pssp...........apcaLpph............................................................... 0 80 154 199 +8540 PF08707 PriCT_2 Primase C terminal 2 (PriCT-2) Mistry J anon Aravind Domain This alpha helical domain is found at the C terminal of primases. 20.90 20.90 21.00 20.90 20.50 20.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.87 0.72 -4.11 50 419 2009-01-15 18:05:59 2006-04-24 16:58:30 6 30 316 0 84 430 307 76.60 23 10.50 CHANGED clcshLphlss...s.h.DYcsWlplGhAL+pth.....u........spuhclWcpWSpp........us+Ycs.pc..spppWpoF.....csss...lThuTlhhhA ...................htthlphlss....s.h.sYppWlplG..hAlpsth......u.........cpuhphapcaSpp........us..+Ypt...pc.....scphW..psh........psss........lshuTlaahA........... 0 26 56 72 +8541 PF08708 PriCT_1 Primase C terminal 1 (PriCT-1) Mistry J anon Aravind Domain This alpha helical domain is found at the C terminal of primases. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.29 0.72 -4.16 93 1023 2009-01-15 18:05:59 2006-04-24 17:24:04 6 23 745 0 118 843 165 69.20 23 18.39 CHANGED pttht..stssthGRNssLFchs.tthh.hctlsppt..................lhphspthNsp.h......ssPLstpElcpss+Slh+hphp .................h....httsptuRNssLhphs...tthh.hptlspph.................shphhth.hNsh.h.......ssPLs.tpElpph.hcShh+hph.......... 0 47 77 105 +8542 PF08709 Ins145_P3_rec Inositol 1,4,5-trisphosphate/ryanodine receptor Mistry J anon pdb_1xzz Domain This domain corresponds to the ligand binding region on inositol 1,4,5-trisphosphate receptor, and the N terminal region of the ryanodine receptor. Both receptors are involved in Ca2+ release. They can couple to the activation of neurotransmitter-gated receptors and voltage-gated Ca2+ channels on the plasma membrane, thus allowing the endoplasmic reticulum discriminate between different types of neuronal activity [1]. 20.30 20.30 20.30 20.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.15 0.70 -11.37 0.70 -5.31 19 714 2012-10-02 19:42:32 2006-04-25 13:25:41 6 68 123 27 374 627 9 196.90 40 6.46 CHANGED ppsSFL+hGDlVSLYsEuosp.........GalSThGLs--+CllpssusshssP...Ph.cFcsClFpl.sPhsphsApcphhput.scpssss..-........tuAphpp+ps........t..lhYGpslQ.LLHh+SshYLos.pphPuhh-KsAh+VsLspsus.Eu..sWahIpPhaKhRStGDsVsVGDcVlLssVsus.......p..LH.uss.thh-ssst.hpVsussppTsWplphahpts-s .......................................................................................p...t.Lp.h.sD.lsL.s.u.shp................hhlu.s.....G.h.s............s..ch..C..h.lpsts.....ss.psP.......................P...chp....ChFhl.p.h...p....hpA........p.......ph...hp...s..t..s...c...tss..pt..t......................................................puAphtt+ps.............................................................lhY.G.p.s.I...Lh.HhpSsh......YLos.pp........uhh-...K.A..hc....VsL.......ppsus......................Eu....sWahIp.P..h......K...R.....S.p.G.....-..p...........V....hl.GDcllL.ss.V...su.t.................p..LH.u.................hss...s...s...........h.......pV......s......u..s..p.p......T.Wpls.hhphp.................................................................... 0 133 156 257 +8543 PF08710 nsp9 nsp9 replicase Mistry J anon pdb_1uw7 Domain nsp9 is a single-stranded RNA-binding viral protein likely to be involved in RNA synthesis [2]. Its structure comprises of a single beta barrel [1]. 22.10 22.10 22.70 41.20 19.90 22.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.35 0.72 -3.91 9 698 2009-01-15 18:05:59 2006-04-25 14:22:47 5 34 208 10 0 632 0 103.80 56 2.26 CHANGED NNElhPspLKppsspAusD.ssssss.upAhYNstsGppalhAhlSspssLKasKaEpcsG..hlslEL-PPC+Fhl-sPpGPplKYLYFVKsLNsLpRGtVLGhIuATVRLQ .............NNElhPstL+ppustAuss.ssssss.upsaYsspsGtphlhAllSspssLKas+a.pssG..hIhlEL-PPC+FsscsspG.P.cVKYLYFlKsLNoLpRGhVLGtIuATVRLQ 0 0 0 0 +8544 PF08711 Med26 TFIIS; TFIIS helical bundle-like domain Mistry J, Moxon SJ, Bateman A anon pdb_1wjt & Pfam-B_7936 (release 8.0) Domain Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species {1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Mediator exists in two major forms in human cells: a smaller form that interacts strongly with pol II and activates transcription, and a large form that does not interact strongly with pol II and does not directly activate transcription. Notably, the 'small' and 'large' Mediator complexes differ in their subunit composition: the Med26 subunit preferentially associates with the small, active complex, whereas cdk8, cyclin C, Med12 and Med13 associate with the large Mediator complex [4]. This family includesthe C terminal region of a number of eukaryotic hypothetical proteins which are homologous to the Saccharomyces cerevisiae protein IWS1. IWS1 is known to be an Pol II transcription elongation factor and interacts with Spt6 and Spt5 [5,6]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.24 0.72 -4.46 76 1525 2009-01-15 18:05:59 2006-04-25 14:52:23 6 42 326 13 982 1411 10 52.80 30 10.64 CHANGED clLptLpp..hs.ho......t-hLppTclGhsVstl+Kp............ssppltplAcpLlpc..W+chlp ........................lLptLpp....hs.lo................h-hL.p....p.T.p.lG+sVs.tl+Kp....................pspcl....pphA+pLlpp..W+chl............... 0 274 473 734 +8545 PF08712 Nfu_N Scaffold protein Nfu/NifU N terminal Mistry J anon pdb_2ffm Domain This domain is found at the N terminus of NifU and NifU related proteins, and in the human Nfu protein. Both of these proteins are thought to be involved in the the assembly of iron-sulphur clusters [1][2]. 21.30 21.30 21.40 21.80 20.60 20.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.37 107 1440 2009-01-15 18:05:59 2006-04-25 16:01:34 6 14 1107 3 493 1036 1572 85.90 34 36.69 CHANGED Ip.TEsTPNPssLKFlP.uppllssu..o..h.-FssscpAts....SPLAppLF.plsGVpuVFhGsDFlTVoKsst.s-WsplKPplhuhI.h-ahpuG .............IphEsTPNPsohKhl.....u..ps...lhspt....o....h-a.h.st....p.pstp.......usL.sppLh..pl.-..GVpuVFashD....FloVsK.......psc...s...-WpplhPplhusl.h-.h........................ 0 159 305 413 +8546 PF08713 DNA_alkylation DNA alkylation repair enzyme Mistry J anon pdb_2b6c Family Proteins in this family are predicted to be DNA alkylation repair enzymes. The structure of a hypothetical protein in this family shows it to adopt a supercoiled alpha helical structure. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.32 0.70 -4.83 89 2486 2012-10-11 20:01:01 2006-04-25 16:53:16 6 10 1475 10 471 1879 159 170.60 17 82.37 CHANGED lpppLpshus......sppAtthppahK.......pphtalGlpsPth+pls+phhpphsht.........thsppLapssh.+Et+hhAhpllhphh..pc....hs.s................hphhpphlt..phstW-hlDthssplluphlhp..............thtshlhpWspo-shWhRRsAlltplhatcp........schptlhphspthls-p....-.hIpKAlGWhLR-huKp.c.shltpFlpp........ctpt.hsthuhRpAhchl .....................................................................................................................................p.........t..h....................hhGl.hs.hp.hhcthh.t.................................h.t..hh......t..p....................-.............hh.uh.hh..........t........t.t.............................h..htthh.....ph.s..at.hhD.h....h..hhtth.h.t........................h..phh.pW.h.ts.p.....p...aht.Rh...u...h....h..........h.....h.h.p............tp..t....h..hthltt..h..p.sp..................p....hl.pp.ulu.hLpphutp.p.phhh....phlp.............................................................................................................. 0 196 347 408 +8547 PF08714 Fae Formaldehyde-activating enzyme (Fae) Mistry J anon pdb_1y5y Family Formaldehyde-activating enzyme is an enzyme required for energy metabolism and formaldehyde detoxification. It catalyses the condensation of formaldehyde and tetrahydromethanopterin to methylene tetrahydromethanopterin [1]. 20.90 20.90 21.30 23.50 18.40 20.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.62 0.71 -4.54 45 346 2012-10-03 01:04:38 2006-04-25 17:32:49 6 2 176 10 133 327 105 146.40 45 77.97 CHANGED lGEALlG-Gs.........ElAHIDLlIGs+sGPsGpAFANuLsststGHTsLLAVlpPNLhsKPsTlhlsKVTIKstcQAsphFGPAQuAVA+AVADuVp-GlIPc-ps-DlsllsuV.......FIHPpA........pDcp+IacaNYpATKhAIpRAhpshPshcclltp+cpupHPhhu ..........hGEuhlstus.........phAHIDLlIGs+s..usstpAFANuLsspppGaTsLLAVlsPNLhsKPsTlhhsKVTIKss.cQAsphFGPAQtAVAcAVsDuVt-GlIPt..-cA-DlhIlluV.......FIc.ps.........D.pcl.chNYpAsp.AltpAhtt.PphpplhtttpphtH.h.s....... 1 34 93 115 +8548 PF08715 Viral_protease Papain like viral protease Mistry J anon pdb_2fe8 Family This family of viral proteases are similar to the papain protease and are required for proteolytic processing of the replicase polyprotein. The structure of this protein has shown it adopts a fold similar that of de-ubiquitinating enzymes [1]. 21.40 21.40 21.40 23.00 19.60 20.50 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.10 0.70 -5.36 5 760 2012-10-10 12:56:15 2006-04-26 12:01:56 5 34 211 22 0 801 0 290.70 29 6.89 CHANGED csKslTIalTEDGVNl+oVVVcsucSLGpQFGsVas+sKshptVhPuDssEDKplhhlPosDhlt..uFKsslpYaoLDsptYuhYhssL..spKWph..VsGFplLcWsDNNCWVNSslllLQtuKl+Fpu.uLssAWsKhluGDsssFVAalYAsssusVG-hGDAc-sLo+LuEHhssDusssLL+hsVCspCGhK.osolsGlEAsIh.suolshDshKTGYS.sCsCGpcssscVlpssusalllsAs-s.PuAss+LpsGluhss..FoGSsssGH.YT.apAAscAhY..DGA+hpKaucpossVTAlah+tuhhopslhPVus .....................................................................................................................ph.lhhT.Dssp......hcs..Vt.s.ohG.ph.G.s.shhcstshpt.hhstst......psc.hh..h..s.....D...........................sh.....c........hh....s....hD..t.p.hh.Yhp..sL...hppaphs.h..s..shhslK.sDNNCalsushlhLQth.c.lpFps.hlp-AWtcapuGcsssFVuhhhA..hsshp.hG-.GDuc.hLppLhphhs.h-.sstlhp.pssspCG.K.ppphpGl-ushh.hts.shpphcpGhp.sCsCGpssspplsphcushlhh..p..s........s.....ss.tp.h..pu..s..shss..a.p..G.shp.sGH.YT..a.st..p..p..s..h..h...Dus....+hpKhsc.ph.lT.s.hhh.h..s.h.........t.................................................................. 0 0 0 0 +8549 PF08716 nsp7 nsp7 replicase Mistry J anon pdb_1ysy Domain nsp7 (non structural protein 7) has been implicated in viral RNA replication and is predominantly alpha helical in structure [1]. It forms a hexadecameric supercomplex with nsp7 that adopts a hollow cylinder-like structure [2]. The dimensions of the central channel and positive electrostatic properties of the cylinder imply that it confers processivity on RNA-dependent RNA polymerase [2]. 25.00 25.00 25.70 25.20 24.40 24.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.70 0.72 -3.59 7 563 2009-09-11 00:21:17 2006-04-27 12:56:27 5 32 199 10 0 567 0 84.10 55 1.51 CHANGED SKLTDlKCTsVVLLulLppL+VEuNSKhWAaCVpLHN-ILhssDsscAh-pLluLLusLhShpuslD......LscLs-shh-ssolLQ ..SKLoDVKCTsVVLLslLppL+VESNSKhWuYCVpLHN-ILhscDsscAhEKLluLLssLhSh..puslD...........lscLC--hlcssolLQ. 0 0 0 0 +8550 PF08717 nsp8 nsp8 replicase Mistry J anon pdb_2ahm Domain Viral nsp8 (non structural protein 8) forms a hexadecameric supercomplex with nsp7 that adopts a hollow cylinder-like structure [1]. The dimensions of the central channel and positive electrostatic properties of the cylinder imply that it confers processivity on RNA-dependent RNA polymerase [1]. 25.00 25.00 29.20 28.20 20.50 20.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.44 0.71 -4.71 9 574 2009-01-15 18:05:59 2006-04-27 13:42:27 5 35 209 6 0 578 0 198.40 54 3.56 CHANGED uVASpFsslPSYltYEsA+psYEcAlu...NGuus.QllKpL+KAhNlAKStFDR-tuVQ+KL-RMAEQAhTsMYKEARusDRKSKVVSAMpohLFuMLR+LD.sulssIlspARsGVVPLulIPtsuAsKLhlVlPDhssas+hhstssVpYAGslWsIppVpDsDGcsV+lp-lTpp.....Nt.sLsWPLllsspR.....s.VKLQ ....ulsSEFuslPSYstYEpA+psYEcAlu...NGsus.Q.LKpL+KAhNIAKStFDRDtAVQ+KL-+MA-pAhTpMYKEARusD++uKVlSAhpohLFoMLRKLDssuLNsIlspA+sGsVPLshIPhssAsKLhlVlPDhpoaspss-sssVTYAuslWpIppVhDADGp.lpLsEIshs.....ss.NlsWPLllsspR..tsps.stLQ................... 0 0 0 0 +8551 PF08718 GLTP Glycolipid transfer protein (GLTP) Mistry J anon pdb_1wbe Domain GLTP is a cytosolic protein that catalyses the intermembrane transfer of glycolipids [1][2]. 25.60 25.60 26.00 25.60 24.90 25.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.63 0.71 -4.17 68 702 2009-01-15 18:05:59 2006-04-27 13:59:54 6 10 261 19 468 642 6 140.30 27 56.28 CHANGED spl.sptFLcuscplsplhc.........hl...G.ssFuhlppDltuslp.+lcphh..tsspt...............................hpoLpshlptEhpsth.t.tps.................ouocsLLWLp.........R........u.LcFlthhLcplhs........s.spp.....hsssspcAYspoLptaHuall+pshp.........lAhtusP........sRcphhptlssss ............................................................t..l.sp.FLpusptls..hhc.............hl...G...ssFs.lppDlhusl..p.+lcphh....ss.s.tp...............................hpoLpsllptE.hps..thhp..hps..................ouocuLLWLp..........R...................u.LcFlthhLpplhs.......s.ppp..............lpssh.ppAYspo.LptaHuWll+tshp.....................hAh.hshP........pRppFhttls...t........................ 0 153 245 359 +8552 PF08719 DUF1768 Domain of unknown function (DUF1768) Mistry J anon pdb_2b3w Domain This is a domain of unknown function. It is alpha helical in structure. The GO annotation for this protein suggests it is involved in nematode larval development and has a positive regulation on growth rate. 31.30 31.30 32.20 31.50 30.50 31.20 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.15 0.71 -4.49 80 1081 2009-09-13 10:17:57 2006-04-27 14:19:58 6 29 789 1 374 820 99 145.10 35 55.13 CHANGED lhFap.pp......shuhhSNah.ps.h...........pl.....c..........s.......hp...asosEH........Ya.ApKhth....h..p........................................ppp...pIhp..up.....sPtcstp...............LGRp..ht....h+..W.pph+hplMhpushtKFpQ..pt.................c...L+phLLsT....GsphLVEsSshD....phWGsGhs.................................hpGpNhLGclLMcVR.ccLpp .............................................................................Fat.t.......shushSpahs.t.s..h...............................ph..s........................................................u..............hp.....asosE+....................a..h.....ApKhh..........h...t............................................................tp...p........pIhp....ss........sPhp....stp...................hGRp.....hp..c....hp..W..pph+..pl.h....tcuhhtKFpQ......ps...................................c...L+phL..LuT....................u....st...hL....V....E.s....o......pD..............thWGsGhs.................................................hpGpNhLGhhLMclRcpL..t.................................................. 0 172 262 347 +8553 PF08720 Hema_stalk FluC_stalk; Influenza C hemagglutinin stalk Mistry J anon pdb_1flc Domain This domain corresponds to the stalk segment of hemagglutinin in influenza C virus. It forms a coiled coil structure [1]. 21.60 21.60 301.20 301.10 20.80 19.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.18 0.71 -4.75 3 144 2009-01-15 18:05:59 2006-04-27 14:57:47 5 1 130 3 0 107 0 172.80 99 27.14 CHANGED IFGIDDLIIGLLFVAIVEAGIGGYLLGSRKESGGGVTKESAEKGFEKIGNDIQILRSSTNIAIEKLNDRISHDEQAIRDLTLEIENARSEALLGELGIIRALLVGNISIGLQESLWELASEITNRAGDLAVEVSPGCWIIDNNICDQSCQNFIFKFNETAPVPTIPPLDTKIDLQ IFGIDDLIIGLLFVAIVEAGIGGYLLGSRKESGGGVTKESAEKGFEKIGNDIQILRSSTNIAIEKLNDRISHDEQAIRDLTLEIENARSEALLGELGIIRALLVGNISIGLQESLWELASEITNRAGDLAVEVSPGCWIIDNNICDQSCQNFIFKFNETAPVPTIPPLDTKIDLQ 0 0 0 0 +8554 PF08721 Tn7_Tnp_TnsA_C TnsA_C; TnsA endonuclease C terminal Mistry J anon pdb_1f1z Domain The Tn7 transposase is composed of proteins TnsA and TnsB. DNA breakage at the 5' end of the transposon is carried out by TnsA, and breakage and joining at the 3' end is carried out by TnsB. The C terminal domain of TnsA binds DNA. 22.00 22.00 22.00 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.45 0.72 -3.97 38 303 2009-01-15 18:05:59 2006-04-27 16:28:33 6 4 250 4 69 203 9 78.00 24 26.68 CHANGED EI..s.hhhcNlpaLhshhpppsstphpht.p.......lhphlpppss..psltslhpphs.t...........slptupsL..lttLlApchlthDl ...................................-l..sphhhcNIpalpsh.hcp.......ts..p...h...sph.ht.........lltpL..ptpst...sslpplhspl.................slssspulhhlptLlAp+hIpsDl... 0 20 36 51 +8555 PF08722 Tn7_Tnp_TnsA_N TnsA_N; TnsA endonuclease N terminal Mistry J anon pdb_1f1z Domain The Tn7 transposase is composed of proteins TnsA and TnsB. DNA breakage at the 5' end of the transposon is carried out by TnsA, and breakage and joining at the 3' end is carried out by TnsB. The N terminal domain of TnsA is catalytic. 25.50 25.50 25.50 25.50 25.30 25.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -10.10 0.72 -3.99 76 557 2012-10-11 20:44:44 2006-04-27 16:37:50 6 8 444 4 111 422 696 88.40 25 30.37 CHANGED ssVhchc-Q.Pl.......................shphshp.Ghp+s.................hTsDaLlphpss.........t.hhhplK.........ppcLp....c....p+hhcKhph..........p+.pahpp.pGhsatlhTE ...................................sVh-lp-Q.Pl.........................shphs..pp.shp+...................hosDFLlshpss..........phhhlpVKs......................sp.clp................c........pchhpKhch......................t+.tahpt.ps.hpatlhs..................................... 0 23 62 83 +8556 PF08723 Gag_p15 Gag protein p15 Mistry J anon pdb_1hek Domain Gag p15 is a viral membrane-binding matrix protein which is alpha helical in structure. 26.10 26.10 26.10 26.60 25.90 26.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.72 0.71 -4.12 2 200 2009-09-11 06:11:00 2006-04-27 16:54:40 5 3 5 2 0 160 0 109.70 81 35.42 CHANGED GDsLTWSKALKKLEKVTVQGSQKLTTGNCNWALSLVDLaHDTsFVKEKDWQL+DlIPLL-DVsQoLSGQE+EAFE+TWWAIoAVKMGLQINNVsDGKASaQLLRAKa.....sKKQuEssEtY .....GDsLTWSKALKKLEKVTVQ.GSQKLToGNCNWALSLVD.LFHDTNFVKEKDW...QLRDVIPL...LEDVo.QT....L.S.G.QE+EAFE+TWWAIsAVKMGLQINsVsDGKAoaQLL.+AKaE+.+.su.s..KKQuEPpEEY...... 0 0 0 0 +8557 PF08724 Rep_N Rep protein catalytic domain like Mistry J anon pdb_1m55 Domain Adeno-associated virus (AAV) Replication (Rep) protein is essential for viral replication and integration. The catalytic domain has DNA binding and endonuclease activity. 21.30 21.30 21.60 24.30 20.00 21.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.15 0.71 -4.75 9 213 2009-01-15 18:05:59 2006-04-27 17:04:03 5 2 57 9 0 219 0 158.90 41 26.63 CHANGED hacsllp...........lSsNFlsasssphWp....uhhsL-ps-hPp...LTss-+lhshalschsshhcsPsu.....caFlQhEps..-phFHlHlllussslsshsl...ssplcsths+slachspPplsshFtsshoKK...Gt.phsu.saIssYLhPKl.Ppl.WuhTNlpEYthsC.sLphR+phhcpa .....................................................................................................t.........lphsptlht.hLpphsph.p..tuP.......haFhQlEssp...EctaHlHlVlstssls.sRsl...hsp.lcshhs.phhhphhs.sh.lha..shTpp....G+haps...sppFI.NYLh.Kl..P..l.p.............ll.W.shTNID.t.ahssslSsshR+thhpt.h.... 0 0 0 0 +8558 PF08725 Integrin_b_cyt Integrin beta cytoplasmic domain Mistry J anon pdb_1m8O Domain Integrins are a group of transmembrane proteins which function as extracellular matrix receptors and in cell adhesion. Integrins are ubiquitously expressed and are heterodimeric, each composed of an alpha and beta subunit. Several variations of the the alpha and beta subunits exist, and association of different alpha and beta subunits can have different a different binding specificity. This domain corresponds to the cytoplasmic domain of the beta subunit. 21.30 21.30 21.30 21.70 20.90 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.22 0.72 -4.57 33 606 2009-01-15 18:05:59 2006-04-28 13:40:02 6 34 134 25 288 512 1 45.40 48 6.09 CHANGED KlLhplpD+REat+FE+E+tpu+WspucNPLY+sAToTap.NPsYtup .......KLLhpIHDRREaAKFEcE+t.pA..KWcs.up.NPlYKsAsoThh..Nspapt......... 0 59 81 178 +8559 PF08726 EFhand_Ca_insen efhand_Ca_insen; Ca2+ insensitive EF hand Mistry J anon pdb_1h88 Domain EF hands are helix-loop-helix binding motifs involved in the regulation of many cellular processes. EF hands usually bind to Ca2+ ions which causes a major conformational change that allows the protein to interact with its designated targets. This domain corresponds to an EF hand which has partially or entirely lost its calcium-binding properties. The calcium insensitive EF hand is still able to mediate protein-protein recognition [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.41 0.72 -4.00 23 640 2012-10-02 16:17:27 2006-04-28 13:56:49 5 73 193 3 326 1364 9 67.50 48 5.87 CHANGED -psouEQlhpuF+tl.AssK.sYlTcp-L+psLsP-ps.-aClppMs.ap.......ssp...pps.su.aDYhuFspsLau .......................-scTuEQVhsSF+hL..As.s...K..sYITt-ELR...+...p.........LsP...-.QA...-YClpRMsPYp.....................................GPc......usPuA.hDYhuFosuLat....................................................... 0 82 120 210 +8560 PF08727 P3A Poliovirus 3A protein like Mistry J anon pdb_1ng7 Domain This domain is found in positive-strand RNA viruses. The 3A protein is a critical component of the poliovirus replication complex, and is also an inhibitor of host cell ER to Golgi transport. 22.30 22.30 23.00 22.40 20.90 22.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -9.08 0.72 -4.63 21 1219 2009-01-15 18:05:59 2006-04-28 16:12:37 6 15 264 2 0 1300 0 56.90 55 2.66 CHANGED GP..a+.lpIsl..csPPPsAIsDLLpSVDotEVRcYCccptWIVPt...psslERslNpA .........GP..a+sl+Isl..psPPPsAIsDLLpSVDSpE.VRcYCc-pGWIlPt..ossplERclNRA.... 0 0 0 0 +8561 PF08728 CRT10 CRT10 Mistry J, Wood V anon manual Family CRT10 is a transcriptional regulator of ribonucleotide reductase (RNR) genes [1]. RNR catalyses the rate limiting step in dNTP synthesis. Mutations in CRT10 have been shown to enhance hydroxyurea resistance [1]. 21.90 21.90 22.00 22.10 19.00 21.80 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.22 0.70 -6.54 9 141 2009-09-13 09:52:06 2006-05-02 12:36:21 5 8 105 0 106 153 0 320.30 15 55.24 CHANGED hsPtchhhssh-phshpclhhp.ssppapsthplphhu............FKNNlhshhp....tsaLhlussoplhlashDslsshsph................hc..scsshsshhDc.hlSohP..saTINal.phssahGpphLssChDsGhlhhWhhsoIlp.hppa..........ps-.c.............th..psR...hplpPc...hpl+hcu...SsWuhDhh.sa.....sspslIssucNu....puloL..hhachs..c-RaYhtc....shp..............h.HNlPslSFlssp.sst.ha.lhVussoIsGplhohpFp...Fp.pps...............................................h.clphssshh.o+shhu-DsWTlpPlSscsFhpVsuhchlsss.pphpccpplppIhp-StlLss..s.s..os.hGhuAphp.apsPVssl..........................................................................................................................ps+pss...ppps...............................................................................................phpss+.oslc--a................................pplHctlcp.h....ph..tpcpps.G..h...........ps.hlhlTTs++lsLh+scsLhssuuTp.clFsLp..shsptscho....NRIShsphI.ELsChlsuSQ.GLlSlhRLspa+GlYuhRQEaIhs.....................................sspslshshtthpslshltl..............p-psh...h.hhhlYVsYssulh.sYcI ......................................................................................................................h......................................................................................................................................................................ptlN.h.hhs.h.........htp-hlhhshDsG.lhha.hptlht.hpph...................................................ts.....h....ht....SsWulshh........p..phlAsutNp............lsl..ahht.........ppt...........t....................................................................................tNlPslsFhspp........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...................................................................................................................................................... 0 18 54 90 +8562 PF08729 HUN HPC2; HRD; HPC2 and ubinuclein domain Mistry J, Wood V, Balaji S, Iyer LM, Aravind L anon manual Domain HPC2 (Histone promoter control 2) is required for cell-cycle regulation of histone transcription [1]. It regulates transcription of the histone genes during the S-phase of the cell cycle by repressing transcription at other cell cycle stages. HPC2 mutants display synthetic interactions with FACT complex which allows RNA Pol II to elongate through nucleosomes [2]. Hpc2 is one of the proteins of one of the multi-subunit complexes that mediate replication- independent nucleosome assembly, along with histone chaperone proteins. the Hip4 sequence from SCH. pombe is an integral component of this complex that is required for transcriptional silencing at multiple loci [3]. HPC2, ubinuclein/yemanuclein, and the cell cycle regulator FLJ25778 share a conserved domain that is predicted to bind histone tails [4]. This domain is also referred to as the HRD or Hpc2-related domain. 26.90 26.90 26.90 27.10 25.20 26.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.12 0.72 -4.10 54 344 2012-10-09 00:06:17 2006-05-02 12:49:19 5 6 246 0 236 333 0 52.80 40 6.69 CHANGED pp+ccpch.c....tttYDp-DsFIDDoE...hh-Ehh.............hssppsGFalspGsLt. ..................t...tcpc..chhshshtYDpsDsFIDsSE...ha-Ehh.............hssptsG.FalssGsL................... 0 70 121 180 +8563 PF08730 Rad33 Rad33 Mistry J, Wood V anon manual Family Rad33 is involved in nucleotide excision repair (NER). NER is the main pathway for repairing DNA lesions induced by UV. Cells deleted for RAD33 display intermediate UV sensitivity that is epistatic with NER [1]. 21.70 21.70 23.90 35.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.06 0.71 -4.79 9 47 2009-01-15 18:05:59 2006-05-02 13:22:52 5 2 40 0 30 41 0 135.10 37 77.88 CHANGED ht.ptpho..........+lPsElEDEILEtYuphot........ppDhsls-LPpaFcsLplPpsa...........Ychl+sc..clpl-uT.............................DIlDh-KLlpssh+LLhFhsN.opIcspWcLhlssuuc..tpshs.h.lpsahLol.DLpKlpsplshDps. ...........ps......p..........cls.ElEDEILEtYup.sh........-pDhslscLPpaFccLpl..sh...........achl+sc..slhl-uo.............................DllDhsKLlpsTspLLhhhsNhphIcc.WphllpssGc.ssshspstlcsahLol.DLpKlps.lshDpp.s........... 1 2 14 27 +8564 PF08731 AFT Transcription factor AFT Mistry J, Wood V anon manual Family AFT (activator of iron transcription) is an iron regulated transcriptional activator that regulates the expression of genes involved in iron homeostasis . This family includes the paralogous pair of transcription factors AFT1 and AFT2. 21.60 21.60 21.60 22.40 21.50 21.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.69 0.72 -3.87 8 82 2012-10-02 23:28:20 2006-05-02 13:51:45 6 5 39 0 46 87 0 88.90 35 18.93 CHANGED FcDKsDIKPWLQK.IFYPQGI-IVIERSDuhKlVFKCKusK+pcstsc........................................................................KKKpus....u+assCPFRVRAsYSl+pK+WolVVlNNsHoHsL ............................................................................Fps+p-l+salpc.hhhspGhtlVIt+Ssp.tlhFpCc.stphpt.t...............................................................................pctsho+h.pCPF+lRAsaSh+pptWslslhsstHsH........ 0 10 31 46 +8565 PF08732 HIM1 HIM1 Mistry J, Wood V anon manual Family HIM1 (high induction of mutagenesis protein 1) plays a role in the control of spontaneous and induced mutagenesis [1]. It is thought to participate in the control of processing of mutational intermediates appearing during error-prone bypass of DNA damage. 28.70 28.70 28.70 28.70 28.60 28.30 hmmbuild -o /dev/null HMM SEED 410 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.48 0.70 -6.01 4 31 2012-10-10 17:06:42 2006-05-02 15:48:55 5 2 30 0 20 61 6 330.40 38 90.90 CHANGED LLhGuouLsGKhVhpphLcls.Ylssh.-L.phhLp....ulpphppNlhlppH.........................lhshsR.hhp..p......phh+o.sh.s........htshshpGSQa.......................a.pphpslthDl-plspo.sshs.a-s....ahKscsc..pths.huhhhthK....pasaslpYsouctchlplssshsVsQll.PcSppWPpllP.+IFo..spl-thsh.pc..hP.......ls-IpThISoLGSTSsRs++opsspsalDYhLNhslsKs.Fs....ssssKphlIsTSFNNhhlSps..YF+hKt+LEssLs.slss.LppLsILRPGPLlGcHGs.....PoNssl.c.sushLc+hhhYKKslhpphhpals-h+clGhsTKsSElVApshY+hPGuhllGYslPst+VAalhuhtA......lc+hh+pu..hhcVs..SSpphDshc ...............................................................hhGuouLhGphhLpphLp.phYlts..ppl.p.hhp.....shpp..h.tphhhpph.........................hhshsRp..p.p........hhp..ph.t........hpshshpGuca.............................................a.cphp..phc.hpphs.pss.sshs.acs.t..hhcssscp.pshsthShl..hp.K....phsapLpYsssctchlpIhhshsVhQllhscSppWPcLLP.cIFo.........tplcthshcp....cp....hhP......sLs-.IsTMlC..oLGSTSAcs++opsspsa.sDYhLsasLAQp.Fo......s......Tt.....sK+lVlsTuF.NNshlSphFpYFRhKuKLENDLcpsLs...s+LKcLVILRPGPhsGp....Hus.....Plssplt+...ssshL.p+hhha.+hhh.hphthhtph+phG.t.+ho-llAp.hYphPGs.hlGYslsst+su.hhs..u......hp........p.....cl...pS.phD...s.......................................... 0 3 9 16 +8566 PF08733 PalH PalH/RIM21 Mistry J, Wood V anon manual Family PalH (also known as RIM21) is a transmembrane protein required for proteolytic cleavage of Rim101/PacC transcription factors which are activated by C terminal proteolytic processing. Rim101/PacC family proteins play a key role in pH-dependent responses and PalH has been implicated as a pH sensor [1]. 23.70 23.70 24.50 44.80 22.00 23.60 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.23 0.70 -5.61 24 159 2009-01-15 18:05:59 2006-05-02 16:21:54 5 2 116 0 116 156 2 343.90 28 55.16 CHANGED sCpshtLssGhll.hsh.t........hphhhsAhapspCsts.........h.ssh....ph..................shppa....ppsppssFhhuhlsllaslusssVhsWhLslllhl....pP.................................ht....................tpshLh+luslhsulhhTlhlscshptlccp.h.pGhhcutpLhchlhsshshpllcllsshhhplsplQllhRlFsRp+cKph..................lhhlGhhlhlsspllaulsphp..ss..........tppstshlsshhhLhclultlhaAuhlhhYslpch.....................+hth...tt+phhhLsllolhllhl.lshFlhDlu....shhltpWuchlshlhtlhssVlsWEWls+lphlE+ctc+puVLGR+l.c.D-hhphc.s....ps ................................................Cpsh.Lss.Ghlh.h.s.t..........hshstsuhat...Csts...........s....................................s.tth.......s...hp-sF..hShhshhaulusssVlsahLslllhl....oPp..................................................shs..t+.shL.+lusLhsAlslTlhhscoh.pshccQat.hG......h.cuptlpsplh...........sshph+llclloshhL.lsQlQhlhRLFs.Rp+-Khh..................IhasGhhLhl.hsslh.slspFh..sst.................spphhsslsshsYLhcLulshlYAuhllaYshpK+..............................................................................+hsa.....th+phhllslLollslllslsFFl.hDlu....p..lssWu-hhpalstssuoVlVWEWlp+lEtLE+cccKculLGRcla-..D-hhchs.t..p................... 0 17 54 98 +8567 PF08734 GYD GYD domain Bateman A anon PSI2 target Domain This protein is found in a range of bacteria. It is usually less than 100 amino acids in length. The function of the protein is unknown. It may belong to the dimeric alpha/beta barrel superfamily. 25.20 25.20 25.30 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.64 0.72 -4.26 39 239 2009-01-15 18:05:59 2006-05-02 16:35:35 6 2 184 0 114 254 1015 90.70 27 86.84 CHANGED YlslssaTspGhcsltcs..scRtcAscphlcs.hGGplcuhahohGp.YDlVslsEuP.DssssuthuLsluutGsV+o.pTlpuhs.s-hpchlt ..............................alhlhsaTspGhcsltcs....scRhcss.pphlcp..h..Gsclc..shYhs.hGp.aDhlslsEuP.Dstshsthu.ltlsutGslco.cThpAhsh--htphl.t.................... 0 30 67 88 +8568 PF08735 DUF1786 Putative pyruvate format-lyase activating enzyme (DUF1786) Bateman A anon PSI2 target Family This family is annotated as pyruvate formate-lyase activating enzyme (EC:1.97.1.4) in UniProt. It is not clear where this annotation comes from. 25.00 25.00 31.20 81.50 22.30 21.50 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.81 0.70 -5.21 15 82 2012-10-02 23:34:14 2006-05-02 16:42:18 5 1 81 0 47 80 5 247.50 34 72.86 CHANGED MGGGPlTtAl+cHlcpGhcVhhT.pAAhTl+DDL-+V+uhGIpIs--s.........sssslphtDlDhttlpshlusasl-hshs..lulAVQDHGhuPs.hSsRhaRFc.hh+chL.ppGsp.-cFla..c-sPpt..aoRM+Alhcslccsshtt....hlMDTs.sAAlhGuLp-sch....shpshlllslGNGHTlushl.ccs+IpGlFEHHTshLssp...KLpphlc+hssGcLosEElasDGGHGAal.....suhspscslllTGP+Rplh ....MGGGshstAlppHlptGhpVhso.pAAhTlcDDL-cV.+.s.hGlpIsccs...............ssstlhhtDhDhthlpshhsshulchs.p..lslAsQDHGasss.tSsRhhRFp.hhcchL.pp...........ssp.tshha..pcsPp...hoRhpultcsltts............hlhDTu.sAAlhGsLt-spl...pttpshlllNlGNuHTlushl.pss+IhGlFEHHTuhlssp...cLtphlc+hppGpLssEEVas-sGHGuhh.........tths.thc.lhlsGP+Rph.h.. 0 18 37 43 +8569 PF08736 FA FERM adjacent (FA) Baines AJ, Finn RD anon Manual Family This region is found adjacent to Band 4.1 / FERM domains (Pfam:PF00373) in a subset of FERM containing protein. The region has been hypothesised to play a role in regulatory adaptation, based on similarity to other protein kinase substrates[1]. 28.70 28.70 28.90 28.80 28.60 28.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.15 0.72 -4.16 44 1186 2009-01-15 18:05:59 2006-05-03 11:03:47 6 34 97 0 506 928 0 46.50 43 5.71 CHANGED sphh..shGS+FRYSGRTptQshctspp..htRps.pFpRs.Spptsppps ........pFh..shGS.+FRY....S...GRTptQshcsuph..htR.sspFE.RssSK+hspp................ 0 80 119 275 +8570 PF08737 Rgp1 Rgp1 Mistry J, Wood V anon manual Family Rgp1 forms heterodimer with Ric1 (Pfam:PF07064) which associates with Golgi membranes and functions as a guanyl-nucleotide exchange factor [1]. 31.00 31.00 31.00 31.10 30.40 30.40 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.22 0.70 -5.40 30 392 2009-01-15 18:05:59 2006-05-03 12:51:38 5 4 255 0 271 363 0 254.90 21 59.05 CHANGED psLhhuasQIpGphplstshls....sshpt.ppp..hht..........................tt.tp.tp.t..............h.phh.sssphstshsthhststpss................................................................................pcslPlhoTPpolLFsDLpLsPGEoKoFpa.shsLPpsLPPSa+.Gps.........l+lpYsLslGspc....................sttsppsptlplPlRVhs.hptt..................h.tsl.pshhhlccptplphhssppptsssshht............................spcss..........pphpphhchlppLlspcs.p....................p.p...........................................................................tspapIspsscpluplsLsKshY+lGEslshsl-hssss....htshtlsssLEopEp.................lssph..............................tlpusspspp.............sstpshsppppsshpsp.plshplsIPhssT....PpFpTshlp.....LcWpL+FcFVhsp .............................................................................................................................................................................................................................................................................................................Lhsph.l....sp...........p.a....h.p..lP........hPsoap.uts.........hph.Y....l..hluhph..............................hplPhplh........................................................................................................................................................................................................................................................................................................................................................................t.aplt..stthlshh.h...cs...h..at..hG-.l.hhhpht.tt...................h.shth.pstLps..Eh.................................lt.th..................................................................................h.hhsp..t.sh..h.ph.h..slPhpss....sth.....Ts.......hhp...........hpW.lphcFhh..t......................................................... 0 83 135 213 +8571 PF08738 Gon7 Gon7 family Wood V, Bateman A anon Wood V Family In S. cerevisiae Gon7 is a member of the KEOPS protein complex. A protein complex proposed to be involved in transcription and promoting telomere uncapping and telomere elongation [1]. 21.80 21.80 22.10 21.80 21.40 21.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.39 0.72 -4.03 18 104 2009-01-15 18:05:59 2006-05-03 14:02:37 5 1 102 0 75 92 0 97.40 31 80.47 CHANGED l.sAsYouPsssp.csFchshs.P.........ot.Sths.pAuspsp-csocsKs.............sYLucLRspLosLQDcINhFLTpRMEc-Kpttttputt.t......p-cpEcchhs .............................................hA.YpuPs..p..ppFt.t.s.s......................su...t.s.psp.sops.+s.............sYLupLRstlssLQ--INpFLTpRMEc-Kpcptt.tutttp..........p-pc-cc.hs..................................................... 0 11 35 63 +8573 PF08740 BCS1_N BCS1 N terminal Mistry J, Wood V anon Pfam-B_10126 (release 19.0) Domain This domain is found at the N terminal of the mitochondrial ATPase BSC1. It encodes the import and intramitochondrial sorting for the protein [1]. 22.40 22.40 22.60 22.40 21.80 22.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -4.68 55 539 2009-09-16 12:49:40 2006-05-03 16:25:19 6 9 253 0 410 534 1 175.50 25 35.62 CHANGED hhlGsshuhh+phhphhhph.....hpcphhsolElss.....cDcsYsallpW.huppshsph.pp..ht..t....................................................phthpsphtpcp.................ppphpasPu.G.sHahhYcG.p..hltlpRp+.pppthshss.t.s.........hEslsloslupstplhpcLLpEA+phhhpppcu+..TllYputusp.........Wpp.stsRtpRPlsoV ...........................................................................h.hhusshthh+pshphshth.....hpcphhsol-lss.....cDc.sYsal.lpW.lsppsttp..t...pp..ht.............................................................t.t.h.p..sph.pcp.....................psphpahPu.G..pHhhh......Ycs.p..alhlp...Rp+..ppp....hshts...s.s................hEsloloslup.s....pp....lhpclLpEA...+p..hsh.pppcu+..TllYpuhusc.........Wp..stsRtpRPlsoV................... 0 114 215 349 +8574 PF08741 YwhD YwhD family Bateman A anon PSI2 target PSI-blast from BH3813 Family This family of proteins are currently uncharacterised. They are around 170 amino acids in length. 25.00 25.00 81.10 80.80 20.90 19.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.97 0.71 -4.84 13 388 2009-01-15 18:05:59 2006-05-04 13:58:48 5 1 383 0 44 165 0 163.10 67 96.27 CHANGED KKp..huFNIIKsDsTDGHGGaGsGoLSL-NVSPVhlDlE-pcAFVDlGAMHARSsVEKtIKFlss+--Vs..sGK.YWLVWVTl-RpE-GPYYAGVTACEMsVs+EhRR...GYKSLPEHVN+MDKSlK++IlV-HMD-sSKplLu-FLcsHs.sMWp+Ss-EL+cuLt ..............KKshsFNIIK.NDPhDGH+GhslGSlSLDNIuPVFIDVtsKEAFlDIGuMHARucVEKGlKalT-KstV...puKtYWLsWVTsERsEp.GPYYAGlTAC.hhVs+sIRR...GYKShPEHVNhMDKSMK+HIIlDpls-csKtlL+-FLhsHsEuMWpcSS-tL+pAh.p... 0 14 27 36 +8575 PF08742 C8 DUF1787; C8 domain Mistry J, Liu XH anon Liu XH Domain This domain contains 8 conserved cysteine residues, but this family only contains 7 of them to overlaps with other domains. It is found in disease-related proteins including von Willebrand factor, Alpha tectorin, Zonadhesin and Mucin. It is often found on proteins containing Pfam:PF00094 and Pfam:PF01826. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.93 0.72 -3.73 131 2979 2009-01-15 18:05:59 2006-05-04 14:10:28 6 390 118 0 1758 2535 1 74.30 30 11.28 CHANGED pttspphCphlh.sst...FspCHshVsPpsahpsClhDhCt.....stss........psh.CsslssYApsC..pptGlslp..WRs...shCs .........................................t...spphCshlh..sss..........Fts.CHs..h..V...s.P.p.s.ahcs..C..lhDhCt...............ssss............p...psh.C.sul...ssYApt.C......p.p...t..G...l......s...lp.....W...Rs...shCs.................................................. 0 299 442 1045 +8576 PF08743 Nse4_C SUMO_ligase; Nse4; Nse4 C-terminal Mistry J, Wood V anon manual Domain Nse4 is a component of the Smc5/6 DNA repair complex. It forms interactions with Smc5 and Nse1 [2]. The exact function of this highly conserved C-terminal domain is not known. 19.60 19.60 19.80 19.90 18.20 19.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.07 0.72 -3.96 24 357 2009-01-15 18:05:59 2006-05-05 09:24:19 5 11 270 0 259 358 0 89.00 29 23.74 CHANGED pslshhcFVlNP...pSFuQTVENlFalSFLl+-GtspIphc.pcslPhlps..t.s.tsstspstsst+pQuVhslDhcsWcphlchasI..pEshlspc ...........t.slshacFllsP...pSFupTVENlFalSFLl+DGplplphD.....pc.s....l.......Phlps...........t..s..........ts...p......t......hp........p................pshtppQh.lhslshpsWcphlc..hapl..pcshl....................... 0 83 137 208 +8577 PF08744 NOZZLE Plant transcription factor NOZZLE Mistry J anon Pfam-B_86265 (release 19.0) Family NOZZLE is a transcription factor that plays a role in patterning the proximal-distal and adaxial-abaxial axes [1][2]. 20.20 20.20 20.30 21.00 19.40 19.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.26 0.70 -5.06 2 13 2009-01-15 18:05:59 2006-05-05 10:55:01 5 1 7 0 9 13 0 216.80 34 57.38 CHANGED MATSLFFMSTDQNSVtNPN-LLRNThLV...sGEIRTE.shKSRGRKPGSKTuQQpQKcPTLRGMGVAKLER.hhEEEKKph.ssAs.GDTS..Au.sNsATRhP...D.GVVLQGFPS........SLGu...sRhhCGGsGSGQlMIDPVhSPWGFVETSu..HELSSIsNPQMaNASSNp.CDTCFKKKRLDGDQ.NVVRSNGGGFSKYTMh..PPPMNGYDp.LL.sD..QRSQGFhYDpRIARuA..sAuSsohNPYFNEATNhTG.shEEFGSh...NPRNGotGVKEYEFFPGKYs-hhuhsh.suo.VGDCSPN...TIDLSLKL ........................s..........................................h.p.shpoRuRKssoKpupppQKK.s.RGMGVAQLERlRIEEcpKph.shAs...us.....sS.........hp...s.phsp.s...s.G.s...............................G....sh..s.uhGuG.hhlsPhhs...s.t.st..st......+ELSShsp.........p.CDhCFK................................................................................................................................................st............................................. 1 2 8 8 +8578 PF08745 UPF0278 UPF0278 family Bateman A anon PSI2 target MJ0950 Family Members of this family are uncharacterised proteins about 200 amino acids in length. 22.50 22.50 22.50 22.50 22.20 22.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.46 0.70 -5.08 11 103 2012-10-03 20:43:45 2006-05-05 13:00:30 6 2 91 1 79 113 3 182.50 35 88.86 CHANGED RFVLDTosFT-splRcthG.cslsEuscphLDLIucARlphsISCYlP.PoVYpElppFhcp.ssstElls+l-TWllKKoPsRYEl+IPAplFYEYlc-lRcRls+GhRlAEctlh...Euuspsh............cpp...-.lGclIschRcKYRpsLRpGhLDSs.DlDVLLLApELDAuVVuuDpGIc+WA-+LGLRalsussFPphLcEYLch. ..............................................................RFlLDTohhsssplRp.hG.ps.scuhpphlclhtpsch.htlphahP.solYpElhthhp.....tclhscl-hallhKsPs+aplpIPu.lhYEalc-hRpRls+GhRluEctlh...cus.ts................................p.lsphIpphRc+YRpALRp.GhLD..SptDlDlLLLAhELDAslVosDtGIppWAc+lGl+alsutpF..hLcphlp.h.............................. 0 19 42 61 +8579 PF08746 zf-RING-like RING-like domain Mistry J, Wood V anon manual Domain This is a zinc finger domain that is related to the C3HC4 RING finger domain (Pfam:PF00097). 22.90 22.90 22.90 23.20 22.80 22.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.95 0.72 -3.96 12 210 2012-10-03 15:03:13 2006-05-05 13:42:37 6 5 176 2 141 204 0 43.10 37 14.10 CHANGED CphC+clshpG.tCss.sCshc............hHhtChppahpppss..hCP...sC ...CphC+cllhtGppCs..s...pCsh+...............hHh.Chp+aapppp.....t...pCPpC....... 0 27 60 102 +8580 PF08747 DUF1788 Domain of unknown function (DUF1788) Bateman A anon PSI2 target Npun02004481 Domain Putative uncharacterised domain in proteins of length around 200 amino acids. 21.80 21.80 22.10 22.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.50 0.71 -4.27 24 273 2009-01-15 18:05:59 2006-05-05 16:38:57 6 1 261 0 68 239 41 125.00 30 65.39 CHANGED lhclsLa-lsl-lLc-Rtlh-cllchEpppGp-tlhctLpulLc.......pcclsptIscp.htstshDllhLoGVGpsaPhl.RoHslLNNLpshhtppPllMFaPGpYs.....GpsLcLFsplc--s...YYRAFpll ...........................................h..phslaplhl-hLpc+..s.......h..h.-...p.....hhp....hE.p..h....Gt...-tlhc.tlps.Lp................pcclsphIsc+....h...t......h..s.s.ps..lVhLTGlGpsaPhl.RuHplLssLp..s..hhs.psP..llhFaPGpYs.....GpsLplFs....phcspN...YYRAFpl......... 0 30 49 59 +8581 PF08748 DUF1789 Domain of unknown function (DUF1789) Bateman A anon PSI2 target CAE43632.1 Domain Putative uncharacterised domain found in phage-related conserved hypothetical protein from Bordetella. 21.10 21.10 21.30 21.80 20.80 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.51 0.71 -4.04 16 124 2009-01-15 18:05:59 2006-05-05 16:46:41 6 4 104 0 23 102 2 103.00 28 77.82 CHANGED MA..KhsL.........utsP.TF+hsVplshhsGpsscl.F.............TFKah..sppElt-lhppts....................pp......................................ss-hlpplssGWs.l--cFs-ENlphLlspasuAspAllsAY.pAlstsRlGN ....................................................Mu...hhpL..........spP.TFchsVpIPhsGt.css..l.ph.............sF+ah..shp-ht.ph.p..t....................tt........................................sh-hhhchlpGWs.......l-..-.....sFscENlphLhssYPt.AspAlhssYhptlhssRttN............ 1 1 11 19 +8583 PF08750 CNP1 CNP1-like family Bateman A anon PSI2 target CAB84161.1 Family This family of proteins are likely to be lipoproteins. CNP1 (cryptic neisserial protein) has been expressed in E. coli and shown to be localised periplasmicly [1]. 25.00 25.00 77.60 77.30 23.30 22.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.78 0.71 -4.49 22 187 2009-01-15 18:05:59 2006-05-05 16:59:41 6 1 186 0 53 147 10 146.50 39 75.01 CHANGED ..h.pstsWcEtps.tLP.shPpspsLlsFsV....ussos.pFhlDscSloVG.sDGVVRYslVlsSsuGA+NVsYEGIRCsoh..........Eh+hYAthsss.spWstsppsp....................Wptlpss.shNpapusLhp-aFCssts.ssts..sspllpsL+s ..................sttppacEpps.pLP..PLP..psp..shl.Fsl....s.sss.+ahlDupSlslu.sDGlVRYhlllpSss.Guc...NlsaEGIRCs.sh..........ca+hYAhhssspspW.pspps-....................Wp.Itss.shNshttsLhpphFCpsth.Psts..spsllppl+............... 0 9 30 41 +8584 PF08751 TrwC TrwC relaxase Mistry J anon pdb_1omh Domain Relaxases are DNA strand transferases which function during the conjugative cell to cell DNA transfer. TrwC binds to the origin of transfer (oriT) and melts the double helix. 22.80 22.80 23.00 23.30 22.30 22.70 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.93 0.70 -4.99 34 741 2009-01-15 18:05:59 2006-05-08 13:13:30 6 26 467 20 202 783 45 275.90 33 24.11 CHANGED thpssusussYYpsps.....................sYYt......pspssspW.hGpGupp.......L..........GL.sGpl..s.ppphtpl......l.sGphP...........sGptlsps.t....t.....................................................................................................psGaDLTFSAPKSVSllhhluuD...ccll.pAHppAVstulph.lEcphu.sRtsppGp....th.pss.sLlsutacHcoSR...........st.........DPpLHTHsllhNhs........................................hssDG....pWRoLsuc..............placpphthGtlYpupLtpcl.ccLG..aphc.........cpGsaEltGls.pchlctF..SsRsppIcpthuc.u.............ssstttcph......AshsTRpsK.pps.shsplcptWppcupph..Gh...-h ...........................................................................................................................................t....ttuhtYatpp.................................sYYs............sp....stpW..hGpG....Apt...................L.................GL..pGps.....s.pp.h..ppl........l..p.Gp.hP....................................sGtp..l...s...p..h..............tt.....tp......................................................................................................................................................................................................psGa.DLTFSAP.KSVSh....h..........th..l...........usD...............c.......plh.pAHppAVp.s........lp..lEp...h...u....s.Rhtps..Gt.........th..Ts.sLlhAhFpHcTSR............................st.........D.....PpLHTHsllhNhs..........................................p..t.s.G..........cW+..s..Lssc..............sla.tp.phsh..utlYps......p......Ltpp......l.ctlG...aph..c.............cps....h..aEl..........sGls...........t...................l-s.F..SpRspp.....Iccthsp..........................................ssshpt+ph............AslsTRpsK...p...ps......s.p.hhtpWhpphpph..Gh..h........................................................................ 0 61 144 184 +8585 PF08752 COP-gamma_platf Gamma-COP; Coatomer gamma subunit appendage platform subdomain Mistry J anon pdb_1r4x Domain COPI-coated vesicles function in retrograde transport from the Golgi to the ER, and in intra-Golgi transport. This is the platform subdomain of the coatomer gamma subunit appendage domain.\ It carries a protein-protein interaction site at UniProt:P53620, residue W776, which in yeast binds to the ARFGAP Glo3p, and in mammalian gamma-COP binds to a Glo3p orthologue, ARFGAP2 [1]. 25.00 25.00 25.20 25.00 24.20 23.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.73 0.71 -4.40 81 438 2012-10-03 16:25:20 2006-05-08 13:33:25 5 10 295 2 300 436 6 145.80 42 17.35 CHANGED ppppYsppLusIP....cht....sh.GslhKSS.p...slpLTEsETEYsVsslKHlF..ppHlVlQFslsNTLsDplL-sVsVhhsss...-st.......h..pphhhlPlspLs..h.spsGssYVshpt.....s.sshshusFus.sL+FhsKE.lDPsTG.-s--....-GY-DEY.lE-lElss ...........................................t.tphatcpLutIP.....Eht...sh...GsLhKSS.s....PVpLTEuETEYsVpslKHl.F..spHlVh........Q.......FcssNTL.....sD...plLEsVoV.hpss...-s........a..p.hhhlPstp.Lsh..s.pPGtsYlhhph..............s-.ssh...........sssoF...us.sLKFssK-.sDPsTG....Es--.....-GY-DEY....lEDl-ls.h....................... 0 101 159 238 +8586 PF08753 NikR_C NikR C terminal nickel binding domain Mistry J anon pdb_1q5y Domain NikR is a transcription factor that regulates nickel uptake. It consists of two dimeric DNA binding domains separated by a tetrameric regulatory domain that binds nickel. This domain corresponds to the C terminal regulatory domain which contains four nickel binding sites at the tetramer interface [1]. 20.90 20.90 21.30 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.42 0.72 -4.09 39 1066 2012-10-02 00:29:19 2006-05-08 14:04:46 6 4 979 79 297 602 67 78.40 43 56.69 CHANGED sGslsllYDHcppsLspcLsslQHcapcl.....IloohHlHLDccpChEllll+GpupclpcLucclhuh+GV+au+Lshsss ......................uVLohVY-H.c.pR.-Lup+lsshQHcHHDl....sluTLHVHl....s....H.-c.C.LElhlLKGchu-Vp+hADclhAp.RGV+HG+Lpslsp.......... 0 76 181 243 +8588 PF08755 YccV-like Hemimethylated DNA-binding protein YccV like Mistry J anon pdb_1bvb Domain YccV is a hemimethylated DNA binding protein which has been shown to regulate dnaA gene expression [1]. The structure of one of the hypothetical proteins in this family has been solved and it forms a beta sheet structure with a terminating alpha helix. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.41 0.72 -4.07 64 1071 2009-01-15 18:05:59 2006-05-08 14:58:45 6 15 964 1 318 578 1189 98.80 44 47.48 CHANGED puKFpIGQlVRH+hasaRGVlhDlDPpFsso--WacsIst-h+..Pt.+cQPFYHlLsEscps.............phl.uYVuEpNL.....h.DssspslcHPpl....schFpphcsu..tYh.c .......................s.uKFuIGQ.V...R.Hp..........Lh..G..Yh....GVV........l......DlDP..a..u..hsE..........-p.....ls.........ss.c..............ph..RstP......aYHV.lhED-.su.............................V..sYlAEtpL........ps-hpsE.cscpPsh....-Elhpshcpp..h.ts.+..................................................................................... 0 90 160 239 +8589 PF08756 YfkB YfkB-like domain Bateman A anon PSI2 structural target yfkB Domain This protein is adjacent to YfkA in B. subtilis. In other bacterial species it is fused to this protein. As YfkA contains a Radical SAM domain it suggests this domain is interacts with them. 25.00 25.00 25.80 190.60 20.20 17.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.95 0.71 -4.64 14 371 2009-01-15 18:05:59 2006-05-08 16:01:15 5 6 371 0 42 183 0 153.50 70 40.66 CHANGED MYPuDFASsLEsLSLcEhRcAIH+LLDhRDcslWMLFGTLPFYPCSssEEDhcLL+RLhpppNVTVRNDPDGRSRLNVNIFoGslIVTDFGDp.PsLGNIps-sLs-AYs+WppoclA+pLNCHCPuVpCLGPNVLVKNsYYp-sDFpp+pA+l MYPuDFASpLsVLTLAEM+csIHclLDhRDEslWMLFGTLPhaPC.cD--DQ+LLpRLRpuKNVTsRNDPDGRSRLNVNlFTGNVIVTDFGDE.ssluNIQcD+LsDVFDKWL.u.ScLAKSLNCHCsphpCLGPNVLVKNMYYPshDF+cpctp.h.. 0 12 25 34 +8590 PF08757 CotH CotH protein Bateman A anon PSI2 structural target cotH Family Members of this family include the spore coat protein H (cotH). 24.60 24.60 24.60 24.80 24.40 24.10 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.02 0.70 -5.19 53 707 2009-01-15 18:05:59 2006-05-08 16:09:44 6 49 471 0 258 662 798 299.10 19 51.59 CHANGED l+hRG..so.......opph.sKKSa+l....c.hcpt..............th.Gh............cchhLpspatD..sohhRspluhclhcph...........uhssspsp.....asp..lalNG.........cYhGlYhh..hEplc.cphlctc..htsss.........utlacss................t..sh..t...ptth........ppssptshsplhs.hlphlsss....s.tphtstlpphlDl-salcahshptlhsNh...D.sh....scNhaLa..............tspss+aphlP.WDhDtuauphhp............................................t...thh.hsttssLh..p+llps..ssa+pphppphppll..pshhstptltshlcshtshls..sthpp-stthtsht..........................phppphphlppalppRhpal ....................................................................................h+G..sp...............t..t..h...+p......sacl...........p.hpph................t.h.Gh................cphhLp.s.....t.h......D...............oh.hRptlu..hphhpph.......................................sh...s.sps.p......ast.....lhlNs......................................ph.GlYh......hEp.lc..cphhc...tp.....hhs..ss.........................................uhhaphs...............................................................................tt.sh.......tt.........h............................p.t.ts.......pp..s.....hptl...p..hlphhsss........................ttth....ptltphlDl.-.p.hlcahshp.hhtNh......D.sh............spNhhla.............................ttpsshap....hlP..WDh.-hsautt.t.........................................................................................................................thh.t...t...ss.Lh...tpllp......stappthpp.h...hpc.lh..........p..p......h..s....phh.sh.l.pphtphlt......s..h..p.s....h.h..h.............................................................thpt.tht..l..phhptR.t........................................................................................... 0 146 211 244 +8591 PF08758 Cadherin_pro Cadherin prodomain like Mistry J anon pdb_1op4 Domain Cadherins are a family of proteins that mediate calcium dependent cell-cell adhesion. They are activated through cleavage of a prosequence in the late Golgi. This domain corresponds to the folded region of the prosequence, and is termed the prodomain. The prodomain shows structural resemblance to the cadherin domain, but lacks all the features known to be important for cadherin-cadherin interactions [1]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.86 0.72 -4.21 52 421 2012-10-03 16:25:20 2006-05-08 16:13:35 6 13 48 1 158 409 0 82.30 30 10.07 CHANGED sCpPGFppcsashplspc.lpcGp.L........hpVpFssC.sGpp+lpYpoo-s.cFKVssDGolhstRslplpscphpFhVaApDspscc...............hslpltl ...................................p.h..h...lspp....l.ttp.l........hp...VsF..p-C..ts..ppplta..pSSDP.cF+VtpDGolassRslh..L.s....s..cp.....t..s.FhlhupDspspc....hpltV..l..................... 0 7 17 64 +8592 PF08759 DUF1792 Domain of unknown function (DUF1792) Bateman A anon PSI2 structural target AAO75156.1 Domain This putative domain is probably missannotated as a glycosyl transferase 8 family member. This domain is found at the C-terminus of protein such as Swiss:Q97P75 that also contain the glycosyl transferase domain at the N-terminus. 20.80 20.80 20.80 35.90 20.40 20.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.44 0.70 -4.74 19 183 2009-01-15 18:05:59 2006-05-08 16:19:41 6 4 170 0 25 154 14 215.50 52 33.74 CHANGED VsRFGDGEhsll........tGcsIsaQsa-scLAp+L+cll.ppscp..shlVCLPDsFp.sltcYsphuppFW+sahhhhsshapchh.....ssphYusTFlSRPYhDhtD....KspustaFccLKpIWcs+DlLIVEGtpSRuGVGNDLFcNs+SIcRIlCPu+sAap+hccIhptlpch.....ucs+LILlhLGPTAKVLuaDLtchGaQsIDlGHIDoEYEWa+MGAppKVKl.ppKasAEhN .........llRFGDGEhsLh........hG.psIsYQsaD.ELAppL+cll..th..pSsE...cLlVCLPD.s.Fc...sh.pas.hupsFW+.pHh..hah-hYpcls.....pusaYGSTFISRPYI.DhcD....Kop..utuhFcKLKplWcsRDlLIVEGhsSRSGVGNDLFDpspSl+RIICPS+NAYSpl-pIpptlh.ca......Acs.+LILhMLGPTAKVLuYcL.s.phGY...QslDlGHIDSEYEWh+MGAppKVKh.uHKHTAEaN........... 2 4 11 17 +8593 PF08760 DUF1793 Domain of unknown function (DUF1793) Bateman A anon PSI2 structural target AAO78587.1 Domain This presumed domain is found at the C-terminus of a glutaminase protein from fungi [1]. This domain is also found as a single domain protein in Bacteroides thetaiotaomicron. 19.40 19.40 20.00 20.70 18.10 19.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.04 0.71 -4.39 37 301 2009-01-15 18:05:59 2006-05-08 16:38:11 6 11 161 0 170 307 0 170.00 37 23.78 CHANGED lGltAhuclAshhGcstsAppYsslAcphhscWt.phuh....-us.......HhpLsa.sp.uoWo.hYNLhhD+LLsLs.........................................................lhPpplachpssaYhohppcYGlPLDoR+s.YTKoDW.hasAslussp.shpthlsslhpalsETsochPhoDhY-Tp..sGshss........FhARsVVGGaFhhLhh ................lGltAhuplAphhG...p.p....ssA.ppYpshAcphsscWp.phuh....-us.........Hh.+Lsa...cp...s.oWophYNLh.hD+LLs.Lp....................................................................lhPppVhptpssaYhs....h....p..pp...YGlPLD.......oRc.p.....Y...T.KoDW.hasAuhussp....shpphlsslhpahNETsochPhoDhapTp..sGphss..........FtARsVVGGaahhlh.................................................... 0 67 109 144 +8594 PF08761 dUTPase_2 dUTPase Mistry J anon pdb_1w2y Domain 2-Deoxyuridine 5-triphosphate nucleotidohydrolase (dUTPase) catalyses the hydrolysis of dUTP to dUMP and pyrophosphate (EC:3.6.1.23). Members of this family have a novel all-alpha fold and are unrelated to the all-beta fold found in dUTPases of the majority of organisms [1]. This family contains both dUTPase homologues of dUTPase including dCTPase of phage T4. 23.00 23.00 23.00 23.10 22.70 22.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.25 0.71 -4.16 34 876 2012-10-01 21:36:44 2006-05-08 16:42:01 6 2 693 18 118 593 126 152.60 27 88.39 CHANGED sLpplhchQctLspclspcp.......pt.ph......hh.pp..hAlhsEluEhhsch...........tsaKaWKspp.............scttp...............................................lh.EhVDslHFllSlslphthp..........................................t.hs.ph.pt............hh.......phhtphpphhpp..st...pht.lhsthhtls.hlGhs.--lhctYhtKNplNatRQspGY ..................................LpphhplQcchscclstpp.............p..p.................h...h..huhhlEhuEhhsph...........p.s.aKhW.Kpps............spsh-s.........................................................hlpEhlDhlHF..h..LS.lslphhhppp.......................................tt..h......shhpp............................................hh............phhtp.h...p.h..h...tp...s...........ht...hht.a..htls.h.t.ht.......ash-plhcsYhtKtthNatRQs........................................... 0 41 92 106 +8595 PF08762 CRPV_capsid CRPV capsid protein like Mistry J anon pdb_1b35 Domain This is a family of capsid proteins found in positive stranded ssRNA viruses such as cricket paralysis virus (CRPV). It forms an all beta sheet structure [1]. 21.10 21.10 21.20 21.20 20.70 20.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.31 0.71 -4.88 7 192 2012-10-04 01:49:40 2006-05-08 17:16:02 5 14 36 2 0 153 0 163.40 32 24.60 CHANGED o..s.h.hhhGEplsslRpLl+RFphh.t......ptsssl.chsplppPt............tst.shshh.......shhualYtFaRGuhRaKlhshps...................t.....s..lshhtTsspsspsssuh............hh.sslpshtEhplPYYu.....Psss.s.ts.s....psl...psh.s.sltpt.psp.......sthhhRA.uuDDaoFuhLlGsP.h.hssup ..................................................................tsh..............................................................................................................h...............shluphYthYRGGlRhKllscps.........................................l.c.hsp.ss.s..sspps.hsSc........................s.tlp.hslpsVtEhplPYYu..........Pshoso..pups..................ph.....pGt..s..s..hslssss.suh.................sph..h....slu.AGDDhsFShalGsP.h..t.s................................................ 0 0 0 0 +8596 PF08763 Ca_chan_IQ Voltage gated calcium channel IQ domain Mistry J anon pdb_2be6 Domain Voltage gated calcium channels control cellular calcium entry in response to changes in membrane potential. The isoleucine-glutamine (IQ) motif in the voltage gated calcium channel IQ domain interacts with hydrophobic pockets of Ca2+/calmodulin [1]. The interaction regulates two self-regulatory calcium dependent feedback mechanism, calcium dependent inactivation (CDI), and calcium-dependent facilitation (CDF). 20.70 20.70 20.70 21.40 20.50 20.30 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -7.80 0.72 -4.71 25 724 2009-01-15 18:05:59 2006-05-09 09:54:38 6 7 96 16 235 602 0 34.30 52 1.83 CHANGED sD-VTVGKFYATaLIQDYFR+FKp+Kppptpppps .....DElTVGKhYAoaLIpDYFRpaKp+KEpthht...s..... 1 33 56 116 +8597 PF08764 Coagulase Staphylococcus aureus coagulase Mistry J anon pdb_1nu9 Domain Staphylococcus aureus secretes a cofactor called coagulase. Coagulase is an extracellular protein that forms a complex with human prothrombin, and activates it without the usual proteolytic cleavages. The resulting complex directly initiates blood clotting. 20.70 20.70 20.80 20.70 20.10 20.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.89 0.70 -4.47 11 424 2009-01-15 18:05:59 2006-05-09 10:55:11 5 11 168 6 3 229 0 256.90 41 46.40 CHANGED TKDYStcSpVNcsSKpGosISstYaWuhIcsLEsQFspAlcLlEcYpYGEKEYKDAKDKLMTRILuEDQYLLEKKhspYEhYKchYKKaKcpN...Ppsp.hKMtsFacYslYsLTMcEYN-IppSLK-Al-cF+p-Vc-IppKNpDLKsYsccpEcKATccVYDLVsElDTlasuYauDppaspcAKELRAKLDLILGDpcsPpRITNERI+KEMhcDLNSIIDDFFhETsQNRPssITKYDPshHDY+...-NKpsFDALVKET+-AVscADESWKsKTVKKYG ................................................tK.st.hss.hh.thlp.tLts.hh.uhslhc...p.cYs-.EYK-AhDKhhp+..lhuEDphL.p+hhtthc.hKcaapptpp..........tp.hph..hp..hcphs....lhsLThccYsplapoLKcshc-Fp+EVccIppKNsDLKs.a........s........cs...c-p+...As........scl..c..L.scl.h.l.....shs.aa....sc..p....patccsc-LhuKL..DLILG...-...pc...c...s....+.+.....h...TNcRhtcEhh-DLpoIID-FFh-hppNRPpsIsph.sss.c.H.s.p....cN+tNh-pLhp-TctAh.tcuD-Sh+p+psK....................................... 1 2 2 3 +8598 PF08765 Mor Mor transcription activator family Mistry J anon pdb_1rr7 Domain Mor (Middle operon regulator) is a sequence specific DNA binding protein. It mediates transcription activation through its interactions with the C-terminal domains of the alpha and sigma subunits of bacterial RNA polymerase. The N terminal region of Mor is the dimerisation region, and the C terminal contains a helix-turn-helix motif which binds DNA. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.32 0.72 -4.27 17 765 2012-10-04 14:01:12 2006-05-09 12:02:26 6 4 521 1 130 526 7 97.00 22 79.65 CHANGED s+hP-lLs-LsphhtptLpchs....l-.tt.AcpluhplAspluppaGGpslYhPpGhshchs.RDhcIas-Fs........GcNhtpLAR+YslohphIYpll+RhR+pchpcpQhc.L ..............................h.....................................p.h.u..p......hs....tplhphhG..Gp.......p.l.YlP...........+............s....p............p...........h............p............t............p............h.Rs..ppIhp.-.a.s............G..p.s.h...p..cLAc+YtLS.pplhpIlpc.ct.................................. 0 57 101 114 +8599 PF08766 DEK_C DEK C terminal domain Mistry J anon pdb_1q1v Domain DEK is a chromatin associated protein that is linked with cancers and autoimmune disease. This domain is found at the C terminal of DEK and is of clinical importance since it can reverse the characteristic abnormal DNA-mutagen sensitivity in fibroblasts from ataxia-telangiectasia (A-T) patients [3].\ The structure of this domain shows it to be homologous to the E2F/DP transcription factor family [1]. This domain is also found in chitin synthase proteins like Swiss:Q8TF96, and in protein phosphastases such as Swiss:Q6NN85. 24.80 24.80 24.90 24.80 24.50 24.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.18 0.72 -4.19 59 900 2009-01-15 18:05:59 2006-05-09 13:39:52 6 46 294 1 579 859 1 53.50 28 6.68 CHANGED o-pplpptlcclLpsu.DLpslTp.+plRcpLpp+h.sh-L.s..s+KshIcphlcphLs ........-ptlppplcpILpss..D...Lps.l...Tp.KplRppLEpc....h..sh...sL.s...p+KsaIcptltthl....................... 0 169 305 449 +8600 PF08767 CRM1_C CRM1 C terminal Mistry J anon pdb_1w9c Domain CRM1 (also known as Exportin1) mediates the nuclear export of proteins bearing a leucine-rich nuclear export signal (NES). CRM1 forms a complex with the NES containing protein and the small GTPase Ran. This region forms an alpha helical structure formed by six helical hairpin motifs that are structurally similar to the HEAT repeat, but share little sequence similarity to the HEAT repeat [1]. 20.20 20.20 20.30 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.87 0.70 -5.52 29 475 2012-10-11 20:01:01 2006-05-09 14:14:28 6 15 300 13 326 464 6 280.10 33 28.82 CHANGED QLupIYhDhLplY+hhSphISptlsttG.hsoK....pshl+shRolK+ElLKLl-Talp+up.....................................s.p..VhpshlsPLh-sVLhDYpp.NVPsAR-sEVLsshsollsKlps....hhpstlshIhcuVF-sTLsMIscDFp-aPEHRlpFapLLculstpCFsAllp..lssppFKhllDollWAhKHspRsVt-sGLslhh-llpNlpp............ssphuppFYpsaahslLp-lFhVlTDos..HKuGFphpshlLtplhpll-ss.....plssPlhs........hssNthalppalsshLpsuFsplstpQlpsFlpuLaphsp.s...................htpF+tplRDFLlplKEFuu ..................................................................................................................................QlshIahDMLsh...Y.+hhSp.Isttltt...t.............G..hs.s.+.............pshl+.th.RslK+EhL+Llpsalp+up..........................................................................................................s.p.....lhpphl...ssL.hcsVLhDYpp..s..lPsAR-sEVLs...hhssllpK.ltt....................hh.t.spl...s.I...h.csVF...csTLsM........I..s..cD.Fp-..aPEHR...spFapLLpulsppC....F..su.........llp.....ls......sspFKhll.DSl.hWAhKHspRsVsp..s...............GLphlhpLlpNhtp.................ptthuptFap...s.aahplLpclF.V.l.TDsp..Hp..uGhphpu..lL.tth.hpllp.s.....tlp.sl.s......................sNt.alp.palsphLpsuF.....splptt..........p....lp.Fl.pu..Lhp.tp..s...................................................hstF+tplRDFLlplKEas............................................................................... 0 131 195 269 +8601 PF08768 DUF1794 Domain of unknown function (DUF1794) Mistry J, Pollington JE anon pdb_2fr2 Domain This domain forms a beta barrel structure but the function is unknown. The GO annotation for this protein indicates that the protein has a function in nematode larval development and has a positive regulation on growth rate. 25.00 25.00 25.20 25.40 24.80 24.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.06 0.71 -4.37 55 758 2009-01-15 18:05:59 2006-05-09 15:33:03 6 9 575 7 271 552 113 161.40 28 72.68 CHANGED ssLsPLuhLlGsWcGc..GtuthPsh....p.sap...YtEclsFs.as....Gp.....shLtYp..t+................oath....ssGp....PhHsEsGahRh...psssp...................................l-hhlupPsGlsElhhGp......ssusplcLs.scu.....lspostu.+..pss...........................uhpRhaslh..ssssLsYshch.t.....shsp...s.Lpp+hsupLcRh ............................................tLhsLs.LlGsWcGc.....Gp.ushPs.............t.sapaspplsas.cs.........Gt.........sa..Lsap..sp.................................................oaph.............ssGp........Ph+pEs....GaaRh.....ssssp..............................................l-llhupssGhsElhhGp........hsss.p..l.cLt..ocu........lucostu.p......pss...........................shpRhasls.....pss.s.Ltas.ch..h.....ssss.....s.lpsahsApLcR................................................... 0 79 160 239 +8602 PF08769 Spo0A_C Sporulation initiation factor Spo0A C terminal Mistry J anon pdb_1fc3 Domain The response regulator Spo0A is comprised of a phophoacceptor domain and a transcription activation domain. This domain corresponds to the transcription activation domain and forms an alpha helical structure comprising of 6 alpha helices. The structure contains a helix-turn-helix and binds DNA [1][2]. 25.00 25.00 25.40 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.33 0.72 -4.17 28 674 2009-01-15 18:05:59 2006-05-09 15:37:23 6 2 490 7 111 516 13 102.40 58 44.53 CHANGED clTsllHclGVPAHIKGYpYLR-AIhhslp-sclLsulTKpLYPpIA++YsTTsSRVERAIRHAIEVAWsRGph-slschFGYTl.........................stp+GKPT.....................NSEFIAhlADKLR ................lTsIIHEIGVPAHIKGYhYLR-AItM.VhpDh-.lLuulTKhLYPsIAKKYs.TTsSRVERAIRHA.IEVAWsRGs.l-sIsplFGYTl...........................s.sK.uKPT.....................NSEFIAhlADKLR............................ 0 59 93 100 +8603 PF08770 SoxZ Sulphur oxidation protein SoxZ Mistry J anon pdb_1v8h Domain SoxZ forms an anti parallel beta structure and forms a complex with SoxY. Sulphur oxidation occurs at the thiol of a conserved cysteine residue of the SoxY subunit [1]. 20.00 20.00 20.10 22.10 19.50 19.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.87 0.72 -4.58 88 444 2012-10-03 16:25:20 2006-05-09 16:07:57 6 2 281 14 195 457 292 98.30 28 59.33 CHANGED hRl+......ssttGslspl+sLlpHsMEoGhRKDt.sGphlPscaIpplsspaNGcsVhsuphusulSpNPahpFphpuspsG..plplsWtDscGsshssptsl ...................lp......ttttGphhcl.+hllpHPM-oGhp+Dt..s.sphI...........PspaIppls.s.p.h.s.....G....p........V.h.ssphssulScNPhhpFphpsstsG..plplsapDscGsshssph........... 0 44 121 159 +8604 PF08771 Rapamycin_bind Rapamycin binding domain Mistry J anon pdb_1aue Domain This domain forms an alpha helical structure and binds to rapamycin [1]. 20.40 20.40 20.40 21.30 20.30 20.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.38 0.72 -3.92 38 401 2009-09-13 23:50:46 2006-05-09 16:26:34 6 35 280 10 270 412 16 98.30 52 4.38 CHANGED ELIRlAlLWaEhWa-uLE-.ASRhaF....s-cNhctMhssLpPLH-hLc+.sP-TlpEsuFhpsaGccLpcApcalppYppopc.hssLspAWclYapVF++Is+Q ...........................ELIRVAlLWHEhWHEGLEE.ASRLYF....G-+NlcuMapsLcPLHphL-+....GPp....T..L+Es..........SFs.........QuaGRDLtEAp-asppY..................pp..............o........ts.......ls-LsQAWDlYYpVFR+Is+Q.................. 0 111 162 236 +8605 PF08772 NOB1_Zn_bind Nin one binding (NOB1) Zn-ribbon like Mistry J anon pdb_2con Domain This domain corresponds to a zinc ribbon and is found on the RNA binding protein NOB1 (Nin one binding). 22.00 22.00 22.00 22.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.72 0.72 -4.15 37 331 2012-10-03 10:42:43 2006-05-09 16:52:55 6 6 292 1 235 333 15 73.50 42 16.00 CHANGED pI+pl+palLRC+ACaphsp....-hs+pFCPpCGss.TLp+Vulols..psGphplalppp.hphssRGspYSlPpPpuG ..........I+pl+salLRCHuCFpsTp.........chsKhFCP+CGst.TLp.+Vulols......csGphphHlppN.h.h...ssRGp+YSlPpPpuG..................................... 0 84 130 194 +8606 PF08773 CathepsinC_exc Cathepsin C exclusion domain Mistry J anon pdb_1k3b Domain Cathepsin C (dipeptidyl peptidase I) is the physiological activator of a group of serine proteases. This domain corresponds to the exclusion domain whose structure excludes the approach of a polypeptide apart from its termini. It forms an enclosed beta barrel structure composed from 8 anti-parallel beta strands [1]. Based on a structural comparison and interaction data, it is suggested that the exclusion domain originates from a metallo-protease inhibitor [1]. 20.20 20.20 20.80 20.50 19.10 19.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.78 0.71 -3.93 10 167 2009-01-15 18:05:59 2006-05-09 17:23:39 6 6 78 5 92 178 1 106.90 39 23.58 CHANGED DTPANCTY-DLlGTWlFpVucts....cpslsCSphssscpolsVsLpcLslAsD-aGNpGaFTLIYNQGFElslNcYKWFuFFKYcppGopVhSYCccThPGWVHDlLGRNWACFsupKl .........................................DTPAsCsa.-.lhGsWhhphu..t.................tp.ppl.sC..............s........t....s.....s....p..pph..h...lpLp..p.shAh......D.p........h...G.N..s..GpaTlIYNQG..FElsl.....Ns.....hK......aFAFFK.....Y...c..p...c..G....p...p......s.h..S..h.CpcThs..G..W.s........+..D..h......hs..ppa.uCahupK................................................. 0 42 50 67 +8607 PF08774 VRR_NUC VRR-NUC domain Iyer, L, Bateman A anon Iyer L Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.47 0.72 -3.98 77 1394 2012-10-11 20:44:44 2006-05-09 18:51:10 6 14 1169 0 380 1233 264 99.00 21 24.30 CHANGED hpctphpstlhpthp.phs........hhhhhhthuh......................h..........uh.sGhPDlllh........hsssp........................hhhlElKuPs.......s+lpcpQhphh.ctltpp.....GhpstVspstp ..................................................................p..hhp.hp.phs...........h.hh.hu.........................................................h.th..............uttsGhPDhll..h..................hspsp...................................................................hhhlElKuss.................s+lpst.....Qhp....hh.chlppt.....GhtVtVsps........................... 0 109 224 308 +8608 PF08775 ParB ParB family Mistry J anon pdb_1zx4 Domain ParB is a component of the par system which mediates accurate DNA partition during cell division. It recognises A-box and B-box DNA motifs. ParB forms an asymmetric dimer with 2 extended helix-turn-helix (HTH) motifs that bind to A-boxes. The HTH motifs emanate from a beta sheet coiled coil DNA binding module [1]. Both DNA binding elements are free to rotate around a flexible linker, this enables them to bind to complex arrays of A- and B-box elements on adjacent DNA arms of the looped partition site [1]. 25.00 25.00 28.10 27.70 24.90 23.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.64 0.71 -3.82 15 218 2009-01-15 18:05:59 2006-05-10 09:43:09 5 6 166 4 18 138 1 123.10 38 38.43 CHANGED FPVts-LoauDYphLh+lpcphpppphslspllpslppclcslps..phs.--tKspIhchIppchptlpstss+cps.plssLtpF-sKcpFARK+s..KGRphoYEFuRlsp-lQccL...DpAIcplLcc ...FPVts-LohoDYphLhclsEchppcsh.ol-pllpslpp..clpsl.s..ths.--tKspILclIppps.phLts.ssKsp....ssstLhpFc-KcpFARK+s..KGRtlsYEFuRlSp-lQcEl...DcuIpclLc....... 0 1 4 14 +8609 PF08776 VASP_tetra VASP tetramerisation domain Mistry J anon pdb_1usd Domain Vasodilator-stimulated phosphoprotein (VASP) is an actin cytoskeletal regulatory protein. This region corresponds to the tetramerisation domain which forms a right handed alpha helical coiled coil structure [1]. 20.70 20.70 20.70 20.90 20.50 19.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.97 0.72 -4.64 6 249 2009-01-15 18:05:59 2006-05-10 11:43:15 6 5 71 2 102 217 0 39.40 58 7.98 CHANGED -uhDhDRhKQEILEEVhRELpKVKEEII-A...hpQELp+huo .......-uhDhDRhKQEIL-EhR+ELpKlKEEIIDA....IRQELu+.so....... 0 16 26 55 +8610 PF08777 RRM_3 RNA binding motif Mistry J anon pdb_1owx Domain This domain is found in protein La which functions as an RNA chaperone during RNA polymerase III transcription, and can also stimulate translation initiation. It contains a five stranded beta sheet which forms an atypical RNA recognition motif [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.29 0.72 -4.18 15 306 2012-10-02 20:46:34 2006-05-10 12:05:32 6 12 140 1 190 739 8 99.90 29 22.53 CHANGED GsllchoG.lscsso.R-clKshFppau...cVtaVDapcGsppGtlRFcssps..AccAhcpss-u....pl.l+ctplsh-lLpG-cEccYhpKIlpcpppph..sp.+pKGR ........................Gsllchp....sc...ss....Rccl+phhs.p.hu...........clt....a.l......D.a......h.....c.......G.s.p......c.......G......h......lR......Fcpsps..Apcsl.pphpp...s....................t........h.h.....c....s.....p.p......h.......phcl.Lp..G...-tEccah....pK....Iht.....c.pppph...sp.+p+t............................................ 0 63 85 134 +8611 PF08778 HIF-1a_CTAD HIF-1 alpha C terminal transactivation domain Mistry J anon pdb_1l3e Domain Hypoxia inducible factor-1 alpha (HIF-1 alpha) is the regulatory subunit of the heterodimeric transcription factor HIF-1. It plays a key role in cellular response to low oxygen tension. This region corresponds to the C terminal transactivation domain. 25.00 25.00 26.80 27.50 23.50 22.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.75 0.72 -4.49 15 244 2009-01-15 18:05:59 2006-05-10 13:04:30 5 11 100 5 63 202 0 38.70 68 4.98 CHANGED hsshsLP...QLTRYDCEVNAPlpGpppLLQGEELLRALDQVs ......h-p.sLP...QLTpYDCEVNAPlQ..GspsLLQGcELLRALDQss... 0 5 14 28 +8612 PF08779 SARS_X4 SARS coronavirus X4 like Mistry J anon pdb_1yo4 Domain The structure of the coronavirus X4 protein (also known as 7a and U122) shows similarities to the immunoglobulin like fold and suggests a binding activity to integrin I domains [1]. In SARS-CoV- infected cells, the X4 protein is expressed and retained intra-cellularly within the Golgi network [2]. X4 has been implicated to function during the replication cycle of SARS-CoV [3]. 20.40 20.40 20.60 124.50 19.30 15.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.09 0.72 -3.82 2 79 2009-09-11 12:31:16 2006-05-10 13:19:27 5 1 75 2 0 18 0 83.90 97 68.92 CHANGED ELYHYQECVRGTTVLLKEPCPSGTYEGNSPFt.PhhsNtahl......hsFspssGoR+T.YQ.hscp.os+hF.h..phQ.chas ELYHYQECVRGTTVLLKEPCPSGTYEGNSPFH.PLADNKFALTCTSTHFAFACADGTRHT.YQLRARSVSPKLFIRQEEVQQELYS........ 1 0 0 0 +8613 PF08780 NTase_sub_bind Nucleotidyltransferase substrate binding protein like Mistry J anon pdb_1wwp Domain Nucleotidyltransferases (EC 2.7.7) comprise a large enzyme family with diverse roles in polynucleotide synthesis and modification. This domain is structurally related to kanamycin nucleotidyltransferase (KNTase) and forms a complex with HI0073, a sequence homolog of the nucleotide-binding domain of this nucleotidyltransferase superfamily [1]. 25.70 25.70 25.80 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.64 0.71 -4.35 40 469 2012-10-01 22:14:54 2006-05-10 14:57:29 6 5 340 14 148 413 71 119.10 30 86.46 CHANGED p+hssap+AltpLcculph...........p.hs-l.psGlIQpFEaTaELuWKhhKcaLch.pGh..plh.us...RsshRpAhptGLI...sDs-h.WhchlcsRNhTSHTYscchAcclhppIh.pYhsthppLhpplpp ..........................................................chpshppAlppLpcuhp....................p..schhhsGhIQ+FEaoaELuWKhhKchLph.pGh.....phh..us....+sslR.pAaptGLI...p-t..ch..WhphlcsR.NhTuHoY.sc..phA..p..plhppIh....pahs.hppLhpplp......................... 0 58 106 125 +8614 PF08781 DP Transcription factor DP Mistry J anon pdb_2aze Domain DP forms a heterodimer with E2F and regulates genes involved in cell cycle progression. The transcriptional activity of E2F is inhibited by the retinoblastoma protein which binds to the E2F-DP heterodimer [2] and negatively regulates the G1-S transition. 20.90 20.90 21.00 21.00 20.20 20.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.69 0.71 -4.51 24 312 2009-01-15 18:05:59 2006-05-10 16:38:45 5 6 136 1 166 279 7 135.20 48 35.77 CHANGED p-sppLchE+pc.thcRIcpKps.LpELl.QplAhcsLlpRNpphEpp.sssPs..osIpLPFIllpTsp+ssl-spISsDppchhFsFsso.FE.IHDDhpVLKpMuhshulcsstssspshphspohss....................hslpshlsthspspttp ....................................QEspsLEhE+p+.RhERI+pKpupLQELl.....lQplAFKNL.VQRN+psEpp..sptPss......sosIpLPFIllNTu++TlI-C.SISsDKhEYlFsFDs....T.FE.IHDDlEVLK+................MGhuhGL...-sGpCo.c.slphu+ohlP.....................psLp.Ylpth.t.....s........................................................... 0 54 85 121 +8615 PF08782 c-SKI_SMAD_bind c-SKI Smad4 binding domain Mistry J anon pdb_1mr1 Domain c-SKI is an oncoprotein that inhibits TGF-beta signaling through interaction with Smad proteins [1]. This domain binds to Smad4 [2] 25.00 25.00 25.80 30.30 24.60 18.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.42 0.72 -3.87 7 320 2009-01-15 18:05:59 2006-05-10 16:54:37 5 4 90 2 180 285 0 92.80 47 15.16 CHANGED SF+VYHECFG+C+GLFlPELYsuPsAuCIQCh-CRhMFsPpKFVsHSH+s.E.pRTCHWGF-SuNWRuYlhLspcYts+-cpupLpplL--lKtKFc ............uFcVhHEChhtC+GhFl.PphYsu.scAtCIpCh...Cshh.FoPpKFlhHSHR..s..s.....-..+......h.T...p..uhs.ssWRpaL..cLs........s.c.st..ppc.Ltphh--lKthFs............................. 0 38 54 115 +8616 PF08783 DWNN DWNN domain Mistry J anon pdb_2c7h Domain DWNN is a ubiquitin like domain found at the N terminus of the RBBP6 family of splicing-associated proteins [1]. The DWNN domain is independently expressed in higher vertebrates so it may function as a novel ubiquitin-like modifier of other proteins [1]. 21.70 21.70 22.30 21.70 21.20 21.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.41 0.72 -4.04 26 408 2012-10-03 10:59:06 2006-05-10 17:03:34 6 14 265 1 270 417 1 72.60 46 11.02 CHANGED laYKF+.Sp+c.hsplsFDGo.sIoVhDLK+cIlppp+LGcupD......F-LplhNspTpE.....E.Yp.DDstlIP+sooVll+RlP ....VaYKFp.Sph.s..asslsFDGh.tISVt-LK+pIhppc+Ls.cupD..........hDLpIpNupT.pE...........E.Ys.DDsslIP+soSVIlRRlP...................... 1 80 164 220 +8617 PF08784 RPA_C Replication protein A C terminal Mistry J anon pdb_1z1d Domain This domain corresponds to the C terminal of the single stranded DNA binding protein RPA (replication protein A). RPA is involved in many DNA metabolic pathways including DNA replication, DNA repair, recombination, cell cycle and DNA damage checkpoints. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.39 0.72 -3.60 23 349 2012-10-04 14:01:12 2006-05-11 10:35:05 6 5 269 16 223 404 8 100.20 20 37.20 CHANGED Hhhho+spstssuu...........sssssssuhsss.........ushsussspshs......u.hsshpppVLshl+p..tstsscGlphc-ls.ppL..phssscl+pAl-hLss-GpIYSTlD-p ........................................................................................tt.................s....s...st..ss.hs..ss...........ssh.s.sss.t..shs..........s.hsshp.ppVhshl+s......stsp-....G...lphpplt..ppl.......ph..........s.hs...plppul-hLhspG.hIYoTlD-p....... 0 67 119 185 +8618 PF08785 Ku_PK_bind Ku C terminal domain like Mistry J anon pdb_1q2z Domain The non-homologous end joining (NHEJ) pathway is one method by which double stranded breaks in chromosomal DNA are repaired. Ku is a component of a multi-protein complex that is involved in the NHEJ. Ku has affinity for DNA ends and recruits the DNA-dependent protein kinase catalytic subunit (DNA-PKcs). This domain is found at the C terminal of Ku which binds to DNA-PKcs [1]. 21.30 21.30 21.40 21.40 21.20 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.34 0.71 -10.58 0.71 -4.21 9 263 2009-01-15 18:05:59 2006-05-11 11:19:19 6 10 204 3 172 270 0 117.30 26 17.18 CHANGED GolNPspDFpsLlpp+sts..hpcAhsQhpp+IhpLlpss.psp.apKulpClhAhRctslh.upscpFNsFLppLpphhps+pLpcFWp.llsp........cploLIopsEupsSsVosEEAppFLs...ppE ................................sPht-Fc.t.hl.........p.........p.........p.......css.........hpc..............AspQhtshIpphlpss....tss...at+..u..h-sltshRcpslphp.E.........sphaNsFl.ppL+c....pl...........p....c....p.....h........p........cFW.p.hl.hp........p.plsL.Isp..pEsp....s......Sp.V...otc-A.p.p.Fh....t........................... 0 62 96 135 +8619 PF08786 DUF1795 Domain of unknown function (DUF1795) Mistry J anon pdb_1tu1 Domain This is a bacterial domain of unknown function. It forms an antiparallel beta sheet structure and contains some alpha helical regions. 20.90 20.90 21.00 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.42 0.71 -4.30 39 1088 2009-01-15 18:05:59 2006-05-11 11:40:40 6 4 617 4 112 378 4 129.40 29 78.06 CHANGED EGshsLPss.apDcolNlhhhsssts..shollloRssltsucslpsalpcplptl.pppLptaphhs......ppssplusp....suhplp.........hpap.ppsp.laQhQshhhhs.........pplLhhThTs..ssshssppcttatsllsSh ...........................................hshsL.P..us..hpD.......p.......S...............Nh...hVhoDstsp..psslVIst-.hs.s......-.sLsshsp+hh.sp..cs+.sphplhs......scslpLtsp.......h.pLD.............shhs...upsp.psa..p....shllh.ls.........splLshphTh..sss.ppptpsp.hpslIpoh........................... 0 8 30 70 +8620 PF08787 Alginate_lyase2 Alginate lyase Mistry J anon pdb_1uai Domain Alginate lyases are enzymes that degrade the linear polysaccharide alignate.\ \ They cleave the glycosidic linkage of alignate through a beta-elimination reaction. This family forms an all beta fold and is different to all alpha fold of Pfam:PF05426. 25.50 25.50 25.60 28.80 25.00 25.40 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.01 0.70 -4.71 70 350 2009-01-15 18:05:59 2006-05-11 13:04:19 6 15 175 10 153 347 22 224.10 25 77.76 CHANGED hDL...opWplslPh............ts....psspltspplts...saps..........haahssspus..hsFhsssp.....u....oossophsRoELREhh................s.sttsWshssss............phpAolpVspVsss............VlluQIHutp..............spPlh+lhap......ppG...........slhhthc................stttshthshhs...................sltLG.........ctFsaplp...lsss.....plplshsup....sptsh.....................................tsasspt..hYFKAGsYsQ...ssstsst.......................shupspFhpLpl.s+ ......................................................hslspWplplPh................ts.stplpsspl.s......saps..........haahssstss..lhFhs..sss.....us...sTtsupasRoELREth................s.p.tsWhhssss..p...............phpuslpVsplsss.t..........llluQIHutp.............sspPlh+l.at......psG...................slhhthc.................ssststthslhs....................sl.Ls.........cpF.saplp...lssu.....pl.sVphssp.....sttph.....................................tsassp...hYFKAGsYsQ...ssss.st.......................phucspahpLplt.............................. 0 22 98 137 +8621 PF08788 NHR2 NHR2 domain like Mistry J anon pdb_1wq6 Domain The NHR2 (Nervy homology 2) domain is found in the ETO protein where it mediates oligomerisation and protein-protein interactions. It forms an alpha-helical tetramer [1]. 20.10 20.10 20.10 23.90 19.80 19.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.45 0.72 -4.49 5 245 2009-01-15 18:05:59 2006-05-11 13:49:28 6 7 68 2 108 216 0 64.00 71 11.19 CHANGED pEEslDHRLT-REWA-EW+H..LDphLNCIMDMVEKTRRSloVL.RRCQEuDREELNYWtRRaSDuEEs+ ...............QEEhlDHRLT-REWAEEWKH..LDpl.......LNCIMDMVEKTRRSLTVL..RRCQEADREELNaWhRRYS-sE-h+............... 1 13 21 55 +8622 PF08789 PBCV_basic_adap PBCV-specific basic adaptor domain Iyer L anon Iyer L Domain The small PBCV-specific basic adaptor domain is found fused to S/T protein kinases and the 2-Cysteine domain [1]. 20.40 20.40 20.50 20.50 20.30 19.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.93 0.72 -4.29 24 80 2009-01-15 18:05:59 2006-05-11 14:12:19 5 8 13 0 5 80 1 37.90 44 19.68 CHANGED TG+lsuKGRtlacss+GtpYVhssu.KKVhVpchhpPpts ....TGKlsAKtRcVF+ssKG+sa.Vhps.s....KKVYVKKlhsPKt..... 0 0 2 4 +8623 PF08790 zf-LYAR LYAR-type C2HC zinc finger Mistry J anon pdb_1wjv Domain This C2HC zinc finger is found in LYAR proteins such as Swiss:Q08288 which are involved in cell growth regulation. 20.70 20.70 20.70 22.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.22 0.72 -7.50 0.72 -4.24 30 311 2009-01-15 18:05:59 2006-05-11 14:27:46 6 11 250 1 203 291 3 27.80 55 8.36 CHANGED aoCIDCspsF.stpsa+sHouCITEsEKY ..hoCIDCspsF..GssY+sHopCIoEspKY.... 1 70 114 167 +8625 PF08792 A2L_zn_ribbon A2L zinc ribbon domain Iyer L anon Iyer L Domain This zinc ribbon domain is found associated with some viral A2L transcription factors [1]. 22.90 22.90 22.90 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.84 0.72 -4.40 14 90 2012-10-03 10:42:43 2006-05-11 14:40:21 5 3 78 0 7 72 3 32.30 41 12.11 CHANGED pp.phCph..Cspstlh...pppshhhCh.Csssh.h ......sl+hCss..Cppss..ll...o-puYchClhCpslaph... 0 2 7 7 +8626 PF08793 2C_adapt 2-cysteine adaptor domain Iyer L anon Iyer L Domain The virus-specific 2-cysteine adaptor domain is found fused to OTU/A20-like peptidases and S/T protein kinases. The domain associations of these proteins indicate that they might function as viral adaptors connecting the kinases and OTU/A20 peptidases to specific targets [1]. 20.10 20.10 20.10 20.10 19.40 19.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.65 0.72 -4.07 38 112 2009-01-15 18:05:59 2006-05-11 14:53:58 5 10 28 0 2 116 7 36.90 36 19.63 CHANGED ppCpcF....pcsPs.hNPhTG+sI+hsGPsY+clhccCsss .......hCpcF....pcsPs.hNPhTGRsI+tsGPsactLtccCss.... 1 2 2 2 +8627 PF08794 Lipoprot_C Lipoprotein GNA1870 C terminal like Mistry J anon pdb_1ys5 Domain GNA1870 is a surface exposed lipoprotein in Neisseria meningitidis that and is a potent antigen of Meningococcus. The structure of the C terminal domain consists of an anti-parallel beta barrel overlaid by a short alpha helical region [1]. 25.80 25.80 26.50 26.30 24.80 25.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.00 0.71 -4.36 2 580 2009-01-15 18:05:59 2006-05-11 14:54:44 5 2 98 13 6 555 0 148.80 63 58.36 CHANGED QsHSAlsALphEplpss-+.sphlspRpFhluDluGEHTuFspLP-.G+ApY+GpAFuSDDAsGKLTYTIDFAAKQGHGKIEHLKoPE.NV-LAuu-lKsDcKpHAVI.GsshYsttEKGoYpLulFGspAQElAGSApVchuptl+cIGlAsKQ ......................................QsHSAlsALQhEp.lpss-+ssp.h..lspRp.Ftlu-IuGEHTuFspLPcsG+AsY+GpAF.....u..S.........D....D.........A.........G......G.......KLTYTIDFAAKQGaGKI...EHL.K.o...PEh.N...V...-LAu...A-............l.........Ks............D......EK....p..HAV.......I...SGs..s..hY.s....p.......s..E....KGoYpLulFGs+AQElAGSApVK..hsptl+cIGlAuKQ................................ 0 1 2 4 +8628 PF08795 DUF1796 Putative papain-like cysteine peptidase (DUF1796) Iyer L anon Iyer L Domain \N 21.60 21.60 24.50 24.30 19.90 19.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.09 0.71 -4.62 26 236 2009-01-15 18:05:59 2006-05-11 14:56:26 5 2 163 0 42 187 9 161.00 29 64.13 CHANGED hsthlSLGutCtsAhhLpKhsLR..shuhPFDahhs.olssltchlpscFsshhp.pth........................................t............h..pshhtallacshauh.sha-apss..s..s......chhtchp+Rhcphhpplpssp..........plhFlR.....ssh.shctlh-hhphltptsss.pshhlll.htppsts ...................pslhSLGppChsAhtLpph.pLc......shuushDahh....Sso...Lpp...Vs...pLLpNc.FscFhphpsl...............................................................p...............thh.psssphhl..cDshYsl.ohHDFtsshss.psh..........tY.ch+tphc+RlsRFlpplpssc..........slLFlR.....tss.s.h-EshpLpplLsphspp.phplLll........ss....................................................... 0 9 24 32 +8629 PF08796 DUF1797 Protein of unknown function (DUF1797) Mistry J anon pdb_2ffg Family This is a domain of unknown function. It forms a central anti-parallel beta sheet with flanking alpha helical regions. 21.40 21.40 22.80 39.90 21.10 18.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.19 0.72 -4.10 17 675 2009-01-15 18:05:59 2006-05-11 15:37:50 5 1 670 2 59 161 0 68.10 54 86.93 CHANGED IIsRLcuMtcst..suEsppRpFE+pG..hspVsasppsc...sapLcchpsccp...apFDsIDLlAIEIaDLL ..................IIsRLEAMtp....DGtshpRpFER-GVslspVuasccpE...hFpLc-scs+Eo...YpFDsIDLlAMEIY-LL. 0 11 31 45 +8630 PF08797 HIRAN HIRAN domain Iyer L anon Iyer L Domain The HIRAN domain (HIP116, Rad5p N-terminal) is found in the N-terminal regions of the SWI2/SNF2 proteins typified by HIP116 and Rad5p. The HIRAN domain is found as a standalone protein in several bacteria and prophages, or fused to other catalytic domains, such as a nuclease of the restriction endonuclease fold and TDP1-like DNA phosphoesterases, in the eukaryotes [1]. It has been predicted that this domain functions as a DNA-binding domain that probably recognises features associated with damaged DNA or stalled replication forks [1] 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.27 0.72 -4.12 51 781 2009-09-10 22:04:53 2006-05-11 16:15:26 6 29 554 5 366 662 33 100.50 19 15.98 CHANGED hGshpsps.......huhph......................hhphlph....uptlhlpRpsps.............................................hDpsA....................lpVtsss......ut............plGalPcchuphlus.Ll-pt...hhphcuhlh.........ssp....tthsh.pplhlhhpsh .......................................................................hth.......huhph...........................h.t.lp........uph..l.tlpREs.sNs............................................aDps.A.............................l+..V....sss.......st.............plGalscphAthlus.hhDpt....hhphpu.h.l......................................t.................................................... 0 103 231 304 +8631 PF08798 CRISPR_assoc CRISPR associated protein Mistry J anon pdb_1wj9 Domain This domain forms an anti-parallel beta strand structure with flanking alpha helical regions. 25.00 25.00 27.40 25.90 21.00 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.19 0.70 -4.58 50 667 2012-10-01 21:23:39 2006-05-11 16:20:22 6 2 628 14 132 457 17 206.90 36 97.62 CHANGED MYLS+lhLssppttstp...........hsssYshHphlhphFss.......t.ssptphLaRlE...........pttstsplLl.SsppPchst................sshshpscshhspLpsGpphpF+LpANPstpt.........................t.ttpup...chshhppp.pthpWLp..++upptGFplhs.............................hhphpshppcphpppp...........p.lphssVsF-GhLpVsDssthhpsLppGIG+uKAaGCGLLolss ......................................................................................................MYLSRlpLcs.sp...h....pstph............ps.YshHphLasLFPs............................spc+pFLaRhE..............phpsshphhllSpppPsto.......................shhslps+sF.hsp.LpsGppLpFpLRANPshsp.............................................................h..ptp.t.phc....sp.s.cup......cl.h...hhppp..stlsWLt...ppu-ps..G.F.sLhc.................................................ssVcu.hcpp..p.hc+cp..........ppphlphuoVcasGhLs.VsDP..s...hFhppLspGhG+u+AFGCGLhhltP..................................... 0 39 99 117 +8632 PF08799 PRP4 pre-mRNA processing factor 4 (PRP4) like Mistry J anon pdb_1mzw Domain This small domain is found on PRP4 ribonuleoproteins. PRP4 is a U4/U6 small nuclear ribonucleoprotein that is involved in pre-mRNA processing. 25.00 25.00 25.10 25.20 24.40 24.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.24 0.72 -4.67 61 535 2009-01-15 18:05:59 2006-05-11 16:32:06 6 15 264 2 398 508 1 29.90 51 6.74 CHANGED Vhp+LRplsEPIsLFGEsct-RpcRL+plh ...Vhp+LRplGEPIsLFGEsst-RhcRLRpl...... 0 135 213 321 +8633 PF08800 VirE_N VirE N-terminal domain Bateman A anon PSI2 target AAO76744.1 Domain This presumed domain is found at the N-terminus of VirE proteins. 20.60 20.60 21.30 21.50 20.20 20.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.43 0.71 -4.41 14 419 2012-10-02 15:26:12 2006-05-11 17:59:31 5 13 90 0 49 404 58 131.70 27 24.52 CHANGED -tpphtcp.LPtlhsuutacc..tcstpphptasGllhl-lc+Ls..pchstl+pphtthP.T..hhAFhusSG+uVKIhlhhst.-ss.h.t........p.tp...ap....utAYphssphYpthl...shsl-hps.slsphChhoaDP-sYa .............................tphKpp.LPhlsPuupFpc...cstpphtpasGllhlDlDcLs..sh.....p...E.h...tpl+p..ph..hp..tP...ts......hhsFhosSGcGVKlhl.....hh.s.h....s....sss..h............t........ap....tpuY.ph.ss..p..h..ap.hh......sh...plDh.p...s...t..slsRhCh.loaDPcsaa...................................... 0 20 45 49 +8634 PF08801 Nucleoporin_N Nup133_N; Nup133 N terminal like Mistry J anon pdb_1xks Domain Nup133 is a nucleoporin that is crucial for nuclear pore complex (NPC) biogenesis. The N terminal forms a seven-bladed beta propeller structure [1]. This family now contains other sized nucleoporins, including Nup155, Nup8, Nuo132, Nup15 and Nup170. 23.70 23.70 23.70 23.80 23.60 23.50 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.45 0.70 -5.73 57 599 2012-10-05 17:30:43 2006-05-12 09:01:07 6 12 283 1 388 605 4 407.80 16 33.61 CHANGED sp.stapshphhs.hPstlhpphsps...........................p..spsuhhschshuhhsscsclhlWs.Y..................................t.ssph.h........................h.st.ppslsul..........sl.VpPps..G..hahpslsahhslusshp......lhlhh.........................lhssphssphhsspsss.hlhssssGRlhhhuhps..sh.pl..ph.pt.ps.......psst.shstsuh.usl....hss..........................................tt.cs.lsplpssppc.....phlashoscuhlphaplp.....sspthpp.......lhphhtpth.pth.......................................s.....phcllclpslstp.....................psthhhLluhspsusph.hthh.h......................................h...h...........................................p..hh.ss......h.t.phps......thhs.tlalsssspps...................................hlhssssshs..................hhp...pt...........hEsshhhp.s..................................hss..uhpp..............p.p.....ps.p.hhlhpstGlhh .......................................................................................................h...ph.s.lPstlhc..thsth...........................pstphhul.h.sphshAWlshcsclhlWsY...........................................p.stph.h.............................h..ss.spsl.uV...............sl.ltsps..G..hahss.lp...ahhs.lssshp.......h.l.s.....................................................................lhssthh.....s..h.hsss..sts..h..lhuossG.Rlhh.........hs.h.s.s.........slapl.......th..pttps.......................phpthshst.uu.l.u.l......hss......................................................................st.cs..lspl...th-..ps+........phlaslospu.hlp..haclp.....sspthtt.......lhp.h.pst.hhtth.................................................s.t.p.phpllsl...p..slsss......................ts...h.htLlu....h...s.ps.uh.phahshssh.........................................................t.h.h..h..............................................th.s..s..............s....p.pt.......hhs..t.hhl.hh.s.pps....................................hhhhsssshs.....................t........................-.shhhht........................................................................................................s.t.hhhhps.Gh....................................................................................................................................................................................................................................................................................... 0 119 200 315 +8635 PF08802 CytB6-F_Fe-S Cytochrome B6-F complex Fe-S subunit Mistry J anon pdb_1q90 Domain The cytochrome B6-F complex mediates electron transfer between photosystem II (PSII) and photosystem I (PSI), cyclic electron flow around PSI, and state transitions. This domain corresponds to the alpha helical transmembrane domain of the cytochrome B6-F complex iron-sulphur subunit. 23.30 23.30 23.50 23.80 23.20 23.10 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.69 0.72 -3.95 19 215 2009-01-15 18:05:59 2006-05-12 09:15:23 5 3 170 9 83 203 111 37.50 46 19.78 CHANGED D.VPDMu+RplhNLLhhGulussssuhLhPhspaFlPPu .....VPDMuRRQhMNL.LhhGslohsAsGhLhPhspaFlPPt.... 0 20 54 73 +8636 PF08803 ydhR Putative mono-oxygenase ydhR Mistry J anon pdb_2asy Family ydhR is a homodimeric protein that comprises of a central four-stranded beta sheet and four surrounding alpha helices [1]. It shows structural homology to the ActVA-Orf6 and YgiN proteins which indicates it could be a mono-oxygenase. 27.10 27.10 27.20 28.40 26.60 27.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.19 0.72 -3.91 23 612 2009-01-15 18:05:59 2006-05-12 10:05:35 6 2 606 6 47 201 70 96.50 66 94.04 CHANGED LLQlDFsasGP.FG--hupuhpsLAcSIspEPGhIWKIWTENppspcAGGIYLF-scsuApsYLpMH.oARLpsh.Glsclpu+lFDlNpsLopIs+uPl ...............LLQlHFsFsGP.FG.-tMscQLp..sLAESINpEPGFlWKl......WTESEKN+EAGGIYLFp-EcoA..A.YLEKH..TARL.KsL.GV-EVsuKlF-VNEsLopINpupL................... 0 11 28 37 +8637 PF08804 gp32 gp32 DNA binding protein like Mistry J anon pdb_1gpc Domain gp32 is a single stranded (ss) DNA binding protein in bacteriophage T4 that is essential for DNA replication, recombination and repair. The ssDNA binding cleft of gp32 comprises regions from three structural subdomains [1]. 20.10 20.10 20.90 22.50 19.60 19.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.26 0.72 -3.87 10 98 2009-01-15 18:05:59 2006-05-12 11:05:39 5 1 90 4 0 87 390 82.80 51 31.51 CHANGED DccEWKLchDsuGNGpAVIRFLPuKs-Es.LPFVKLlNHuFKcNGp.WYIENCsSTHGDaDsCPVCpahpps-L.a.....Noss-ctphhup.hKRKtSYW ............Dcp.WKLchDsu.GN.GpAVIRFL..P.......u...........s....t-p........hPasplhsHuF+ts....Gt.WYIENs..oThGc..ssPVspa..ppcL.a.....Nsspc..p...p........KRKhuaa.......................... 1 0 0 0 +8638 PF08805 PilS PilS N terminal Mistry J anon pdb_1q5f Domain Type IV pili are bacterial virulence-associated adhesins that promote bacterial attachment to host cells. In Salmonella typhi, the structural pilin protein PilS interacts with the cystic fibrosis transmembrane conductance regulator [1]. Mutagenesis studies suggest that residues on an alpha-beta loop and the C terminal disulphide-bonded region of PilS might be involved in binding specificity of the pilus [2]. 20.80 20.80 20.80 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.73 0.71 -4.64 18 332 2012-10-03 10:38:27 2006-05-12 11:51:41 6 3 257 5 49 317 16 137.50 30 72.62 CHANGED sscsssEpsNlsslhsss+u.hKus.uuYs...uushsssLlphtulPssMss..sGs......slhNsWGGsVTVsss.....usst.oFolThssVPpssClsLsTplusu...hsshsIsus......shssu..plssssAs....osCsus.....sNTlsaTos ........................................................................tpstpEtsNlpsIhsss+u.hh.pus.suYs.......suphsssLlp.hsshP..ss.Mhs....sus..........slhNuW.GG.s.VT.luss.........usss..uFTlT...h...s...sVPpcsClpluT.t.huss.....htsh...s...lsus..................sh.su.........lssssAu......ssCsss.....sNolsaT............................................ 0 13 27 37 +8639 PF08806 Sep15_SelM Sep15/SelM redox domain Mistry J anon pdb_2a4h Domain Sep15 and SelM are eukaryotic selenoproteins that have a thioredoxin-like domain and a surface accessible active site redox motif [1]. This suggests that they function as thiol-disulphide isomerases involved in disulphide bond formation in the endoplasmic reticulum [1]. 21.20 21.20 21.40 21.70 21.00 20.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.43 0.72 -4.23 12 179 2009-01-15 18:05:59 2006-05-12 13:10:50 6 2 121 2 112 204 1 73.00 33 48.92 CHANGED putl.pssGU+LsphPcVctFlpsDh...sha.sLphKalhGucP.lhLhDcpsp.hEc.lslschspDclpphltp+shh .............................s.ttl.ps..sh..p....p..h.sthPp...l.puFlpp-h...tha...slplKalhGuc.PhLhLLD.c.pGp..tEp...lsl.pcW.sp-plppalpp+hh............... 0 44 62 91 +8640 PF08807 DUF1798 Bacterial domain of unknown function (DUF1798) Mistry J anon pdb_2ets Domain This domain is found in many hypothetical proteins. The structure of one of the proteins in this family has been solved and it adopts an all alpha helical fold. 25.00 25.00 40.00 65.50 21.00 19.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.43 0.72 -4.35 19 391 2009-01-15 18:05:59 2006-05-12 13:39:28 5 1 391 5 32 144 0 109.40 44 93.99 CHANGED hTppLlptscchhp+Y.ps..+cpspcaDFappVKPas-chcphlcpWpphAhpalppt+PcYl+tpQl-tsh-Nhpplslpuaas+spcKRFh-hhcSlpYsLphlh-tlt.p .......s.sEpLl.cssphpppapps..Kpptp-aDFYpsVKPas-clDshLschK.ht.hhIch....Yhsspph-hlhsNlppluVpCaap+os+KhFlE+hcSlpYsLQNIl-tlt.Kc... 0 8 18 26 +8641 PF08808 RES RES domain Bateman A anon PSI2 target CAE41587.1 Domain This presumed domain contains 3 highly conserved polar groups that could form an active site. These are an arginine, glutamate and serine, hence the RES domain. The domain is found widely distributed in bacteria. The domain is about 150 residues in length. 20.50 20.50 20.80 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.51 0.71 -4.16 184 1322 2009-01-15 18:05:59 2006-05-12 14:02:02 6 8 874 0 452 1175 109 164.20 15 70.86 CHANGED plaRlptt.............................ph.t..s.............s......s.Rass...........s.uhssl.Ysuts.....hssAlhEs.......................................hhhsthhhtththsshthttth....t........t.h....htshsshts.t............thspthsp...thtt.sh........................ulhhsS...sh................s.....s..stslslhsst............................tthphhpspththths .................................................................................h...................................ph....s.........t.hh..hG..u.Rass..............t..shs.sl..Ysups.....hpsAlhEs.........................................................h.h..................................sthh...sthh...ht.th.t.hssh.h...thtph.......hph...............h..........h......th..ts.sh.tt..........................h.hspt...hup.......thp..sh..............................t.......GlhhsS......sh..............s......s.....shslslas.t....................................................................................... 1 115 273 368 +8642 PF08809 DUF1799 Phage related hypothetical protein (DUF1799) Bateman A anon PSI2 target CAE43631.1 Domain Members of this family are about 100 amino acids in length and are uncharacterised. 20.80 20.80 21.10 20.80 20.30 19.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.01 0.72 -4.18 15 119 2009-01-15 18:05:59 2006-05-12 14:06:48 6 3 110 0 32 116 31 77.20 31 69.89 CHANGED LtthG..hp.-Da..c..psshEVWPEN.htAhplFtuhuTQWRs........GssGshGLDYusl.shhchhGlctEppp-lassl+lhEppALchl ...................h...................hplWP-s.h.uhplFhshuTQWRs..................Ghu.G.s.sGLDYusl.tlhchh.sl.p.scpp.tlhsclRlMEttALphh...................... 0 2 17 26 +8643 PF08810 KapB Kinase associated protein B Mistry J anon pdb_1y71 Domain This bacterial protein forms an anti-parallel beta sheet with an extending alpha helical region. 20.00 20.00 20.00 41.60 19.80 18.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.34 0.71 -4.03 10 347 2009-01-15 18:05:59 2006-05-12 14:14:48 5 1 347 2 28 126 0 112.60 61 88.34 CHANGED YKTGsYhGpIpE-+spp..hLVcVcAVlKHPpQGDLHNPsQs.-ssFFHERKALuahEKphls+utV+sa-s.ElPsYs-SLppAlschcs+Lps-soc...aAppSLcsLcpL+c-Ytl .HKTGsYuVsIsE-sss.s..lLVKVcQVIKHPKQGDLHNPsET.-sVFFHERKALSaaEKRaspcSpL+cFNs.-.lhcYEDSLQpAlocLEspL+tpp.oc...aAchSLsoLscLKcDYsL.. 2 9 17 24 +8644 PF08811 DUF1800 Protein of unknown function (DUF1800) Bateman A anon PSI2 target AAK23953.1 Family This is a family of large bacterial proteins of unknown function. 25.00 25.00 26.60 26.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.46 0.70 -5.56 77 530 2009-01-15 18:05:59 2006-05-12 14:19:30 6 11 396 0 221 585 994 467.30 24 84.11 CHANGED tphLsRhsaGsp...suplsphhp.h.......uhpsalppQl...............stsss..sshhtt.hsphs....................................................................................................................................................ttptttcpt..t........................................hh.t..tuhhtpAlhus.spLpERhshFWpNHFsVSspc...s.h.phhssshpp-slRsauhG.......pFc-LLtAlsppPAMhhYLDshps......................................s.t.......ctt...t.NENaARElhELaTLGl........................s..uG.....YoQpDVpphARshTGWslsstt..........................tsshahapsph........H-susKs...............lL..Gpshs.s..............sG.tcs..ptsL-hLspHPsT..ApFlup+Lhp+FVu.....Ds.PssuhVpRlApsFp...po..............cGDltAVlpslltssEhhss..........tsK.....l+sPhchhlushRshshp.sss..................................................tthhshlspLGQshatsso..........PsG......as.sssuWsuspthltRhphstpl..usthhsshh.............................sspshtt............hh.thhss...shospop.psltps.sttt................................hlslllsSPEF.hc .....................................................................................................................................................................................................................hhLpRhsaGsp...ss..plsth...ht..h........shpthltt...l......................ts..t.....sshh.....tt...httht.......................................................................................................................................................................................t.ptthpp..........t....................................................................................................htphtt...th....lt..phlh..s..s.......s......tLpERh.shFWpsH.Fslutpp...s.h.ph.......hhh.hppphlRtpA.hG..................sFp-LLtAlspcPAMLhYLDstps..........................................................................................ppttsNENaARElMELaTLGl.......................ssG........YoppDVpphARshTGWshss......................................tstahapsp.h........H-sGsKs........................................lL.Gp..s.h.t................tG.tcsctsLchLhp..+..P....s..T..ApFlup+LhpcFVu.............-s....P...........s.........su.hV..p+lAssFp...ps....................sGDltsllpsllpsschhss....h...............ss+lKsPhchllushRshshs.sss...................................................t.h.hs...h.h.ppLGQ.shatsss.......................................PsG......a......s.........s.....s...suWlsssshltRhphsttl...sst.hhtt.h....................................................................................s.tth..t...............................................th.lts....t....hssps.t.tsltpt..st.p..........................................................................hhthlhhuP-a.h..................................................................................................................................................... 0 79 146 186 +8645 PF08812 YtxC YtxC-like family Bateman A anon PSI2 target YtxC B.subtilis Family This family includes proteins similar to B. subtilis YtxC an uncharacterised protein. 20.80 20.80 20.80 21.40 18.90 20.70 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.41 0.70 -4.84 33 299 2009-09-10 22:38:25 2006-05-12 14:27:35 6 1 295 0 79 225 0 214.20 31 73.96 CHANGED hc.hhlusllschllpph-cchltclIpcsYhahcp-EpppIhch.uppILcs-ppth..pph.phs++s.Ihcclt-hlp-s.splsl-GFlsFRL+-YhccLcchl-pAl-EYhhE+EYpEFIcLL+aFV-hQcs+lctVallhs.stpahLaDcctcplss-hlpphhsc..lhppslsh-DlLISsLIolAPccIhlast-psc...phlpTIpsVFp-RVph ..............................................................pthlhsslsphIlp.hhppchlhpllpcpaa.ahc.p-EpppIhph..upplLcscppt...h......t....p.h.phs.............pc.shIhsplpsalp-s.....phshsuFlpFRL+sYhcplpcls-hAlDEYhhEpE...YppFIch....L+.aVc..pcs+lspV+llhc.p.sFhlaDccscclppc.pltphhcc..htppsl.h-..shlIusLloluPc+Ihlasc.cps.p.....phlpTlcsVFp-RVp.h.................................................... 1 38 64 69 +8646 PF08813 Phage_tail_3 Phage tail protein Bateman A anon PSI2 target CAE43633.1 Family This family of proteins include phage tail proteins. They probably include bacterial Ig-like domains related to Pfam:PF02368. Which also includes a number of phage tail invasin proteins. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.90 0.71 -4.85 12 222 2009-01-15 18:05:59 2006-05-12 14:32:47 6 3 207 0 34 177 8 149.50 24 74.81 CHANGED SuWspLss+ss+ssssossshsl-GIDTocst.assG.Ghuphh.lsoWs-lspVpslupsGG-QQFhsaphLpD.D.+cpQIPThKSAhshThThAa-.shsaatsLcpAD-s+pshslRhplPsusph.hasuYhuFscsPohshNthhsholslSltuc.ThhAu .........................................................................................h..h..s..sapcl.sp.lp..-lspsGu-tphlphshLpD...s..hp..p..phss...hps.Asshshsh..uac.s..s..p..s..s..ap.sLct.ss-scp.hhshchth.Ps.G..s....ps...hapuhlSh.c.....hs..ssssNtlhsholslslpu.c..shh..s....................................................... 0 3 13 27 +8647 PF08814 XisH XisH protein Bateman A anon PSI2 target ZP_00111899.1 Domain The fdxN element, along with two other DNA elements, is excised from the chromosome during heterocyst differentiation in cyanobacteria. The xisH as well as the xisF and xisI genes are required [1]. 25.00 25.00 26.10 26.00 21.50 20.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.64 0.71 -4.16 8 139 2012-10-11 20:44:44 2006-05-12 14:41:39 5 2 37 2 44 187 1 116.50 41 95.22 CHANGED MsAKDlaH-sVKsALhKDGWtITcDPLhl+hGc.sslaIDLuA-KlIAAE+pspKIAVElKSFlu.sSpIsDF+sALGQaIsYRhlLcsp-sERlLYLAlscssYcsFFppchsQhllpcpplpLllaDsEpEpIlQ .................MsA+DlaHpsV+pAL.K-GWhITcDPhhlp.hst.hphhlDLuAE.......+..........llAAE+ptpKIAVElKSFlu..S.lp-hcpAl..GQal.Y+hhLpt.p..-P-RhLYLAlspshYpsaFpp.hhphhlpc.plpLllassppE.Ih..................... 0 11 31 44 +8648 PF08815 Nuc_rec_co-act Nuclear receptor coactivator Mistry J anon pdb_2c52 Domain This region is found on eukaryotic nuclear receptor coactivators and forms an alpha helical structure. 19.80 19.80 19.90 19.80 19.60 17.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.48 0.72 -4.43 10 226 2009-01-15 18:05:59 2006-05-12 15:38:57 5 16 42 2 81 169 0 49.10 64 3.62 CHANGED EGpsDE+ALL-QLsohLsstD..uLEEIDRALGIPcLVsQutsh-...-pF ...EGpsDE+ALLDQLhohLsspD....GLEEIDRALGI.P-LVs.Q..uQul-s..-tF...... 0 4 9 30 +8649 PF08816 Ivy Inhibitor of vertebrate lysozyme (Ivy) Mistry J anon pdb_1gpq Family This bacterial family is a strong inhibitor of vertebrate lysozyme. 19.90 19.90 21.00 21.00 19.10 19.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.63 0.71 -4.12 17 506 2009-01-15 18:05:59 2006-05-12 15:59:04 6 2 482 7 45 183 8 116.00 56 74.15 CHANGED Lsppssa+ssWppMlpupppLPpWlppstGous.PhpslshsGppYlVGshCKPHD.CuupphhVhautDccp...AaGlhVplspt..sshcsPocaAsapWLGpPscs........hpuhLpppLc.pcPN .............................LAKucsTKAAFNQMVQGHK..LP..A....W.VMK.G.GThT....PAQTVTLGDETYQVMSACKPHD.CGSQRIAVhWSEKSsQ...MoGLF........SoID...EKTSQEKLTWLNVs...DALSID.GKTVLFAALTGSLE.NHPD............................ 1 8 17 29 +8650 PF08817 YukD WXG100 protein secretion system (Wss), protein YukD Mistry J, Desvaux M, Burroughs AM, Iyer LM, Aravind L anon pdb_2bps Family The YukD protein family members participate in the formation of a translocon required for the secretion of WXG100 proteins (Pfam:PF06013) in monoderm bacteria, with the WXG100 protein secretion system (Wss). Like the cytoplasmic protein EsaC in Staphylococcus aureus, YukD was hypothesized to play a role of a chaperone. YukD adopts a ubiquitin-like fold [1]. Usually, ubiquitin covalently binds to protein and flags them for protein degradation, however conjugation assays have indicated that the classical YukD lacks the capacity for covalent bond formation with other proteins [1]. In contrast to the situation in firmicutes, YukD-like proteins in actinobacteria are often fused to a transporter involved in the ESAT-6/ESX/Wss secretion pathway [6,7]. Members of the YukD family are also associated in gene neighborhoods with other enzymatic members of the ubiquitin signaling and degradation pathway such as the E1, E2 and E3 trienzyme complex that catalyze ubiquitin transfer to substrates, and the JAB family metallopeptidases that are involved in its release [7]. This suggests that a subset of the YukD family in bacteria are conjugated and released from proteins as in the eukaryotic ubiquitin-mediated signaling and degradation pathway [7]. 27.00 27.00 27.10 27.00 26.70 26.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.57 0.72 -3.42 54 869 2012-10-03 10:59:06 2006-05-12 16:23:53 5 3 548 2 159 472 11 79.60 27 25.86 CHANGED shscVslph........sp....p.thDlsLPsplPlppllsslhphls..........hsshs...........tss....phpLs....tsGt..hLstspoLsptsVtDG-lLhlh ..............hscVTlth........sp.....p.phDlslPutlPlcsllstllchls..............hshh-.............sss.h..phplt....spGt..hLstspsLs-hslsDGDlLtL....... 0 48 107 141 +8651 PF08818 DUF1801 Domain of unknown function (DU1801) Bateman A anon PSI2 target AAO81511.1 Domain This large family of bacterial proteins is uncharacterised. They contain a presumed domain about 110 amino acids in length. 27.20 27.20 27.20 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.52 0.72 -3.93 150 1704 2009-09-12 22:25:29 2006-05-15 11:59:35 6 10 1072 5 545 1425 322 100.10 20 67.93 CHANGED tt+pthppL+pll.tpsssp....h.pcplpa.......uhPsath..ttp................hsthss..hKp..alulhh....hpss.hp.........c..tt.l......ptphs+shhphpph.p.cls....hphlpphlppslp ...............................hppthppl+pllhpsssp................l.pEpl..pa........shPsash.sup...............sllthps.......hKp....alulha........tpu..u.lp..........c..tphl.t........phpt.s+sh.chp.ph....p.pls.....hchlpphlptsh.t................................. 0 237 402 491 +8652 PF08819 DUF1802 Domain of unknown function (DUF1802) Bateman A anon PSI2 target Domain The function of this family is unknown. This region is found associated with a Pfam:PF04471 suggesting they could be part of a restriction modification system.. 25.00 25.00 57.80 45.90 24.20 19.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.02 0.71 -4.66 27 227 2009-01-15 18:05:59 2006-05-15 13:54:39 6 3 145 0 55 187 29 162.40 53 84.40 CHANGED hALKEWssslcALtpGcphlLlRKGGIpEt.p..F....psptppFlLaPTh.Hpps....ctl+scapshlpts..st.tscplplpuaAplsssh.lss....ptlppLpshaIWstctlp.pRhpa+spp.lhlLlLRla.LscPhplshssp.atGCpSWlsLsp.l..shpsspPVlsDppasphtppl .........s.ALKEWuAAV+ALl-GRQoVLLRKGGItE+.R....F........cVAu+cFLLFPTVuHoHA..............ERVRPEH+DL.....LsPA....AADS..T..-E..s..VlLRAuA+VVAAlsVsR...PEu..L-AIEcLHIWTAESVRuDRLDFRPKH+LsVLVVpshsLsEP....Vc..lscpP-.YuGCp........SWVpLs......l...ssphutPVhs-sshschstc.h............................. 0 22 41 53 +8653 PF08820 DUF1803 Domain of unknown function (DUF1803) Bateman A anon PSI2 target AAO81393.1 Domain This small domain is found in one or two copies in proteins from bacteria. The function of this domain is unknown. 23.70 23.70 23.80 24.50 22.20 23.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.21 0.72 -4.28 14 715 2009-01-15 18:05:59 2006-05-15 15:21:35 5 2 417 0 49 356 0 91.70 36 69.98 CHANGED lpsa.hphhpppPh......hhcLl.....pYhhca..sshlL+.l++cashpc+hDhhl-shlthGYIhp.Es++YpLshsl...........................................Dp-uhhath ...........h.p.h..phhpp.sh......hhsll.....cYhhcY..sshlL+.lK+phs.ppshDhhl-phlthGaIlp.Es++YpLshsh..............................s....................t.......................... 1 5 11 29 +8654 PF08821 CGGC CGGC domain Bateman A anon PSI2 target AAB98576.1 Domain This putative domain contains a quite highly conserved sequence of CGGC in its central region. The domain has many conserved cysteines and histidines suggestive of a zinc binding function. 21.50 21.50 22.90 37.20 21.10 20.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.95 0.72 -3.97 31 185 2009-01-15 18:05:59 2006-05-16 15:44:05 6 2 147 0 77 174 5 106.50 31 88.27 CHANGED KluIlpCppsp-..h.CsG....ssCFKuhpp+puuFppap.p..sElluFhsCGGCs.........ucclhppscphh.cpss-....slHLuoChhts.............CPp..hcphpchlppch..sl.pVltGTH .....KluIltCppsps..h..CsG....ssCh+uhpp+puuFppYsp.c.h-lluFhsCGGCs.........upplh....p......pscph......h...cps.s-......sIHluoCht...ptt..............CPp..hcphpchlpcch...Gh.plVpGTH........... 0 37 67 74 +8655 PF08822 DUF1804 Protein of unknown function (DUF1804) Bateman A anon PSI2 target CAB84459.1 Family This family of bacterial protein is uncharacterised. 29.00 29.00 29.60 31.50 28.50 27.30 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.76 0.71 -4.51 12 154 2012-10-04 14:01:12 2006-05-16 15:50:41 6 2 139 0 18 111 2 157.70 37 93.63 CHANGED M.AHspEsRctlRphYVhsphoLEhAAhpsGVshuTARRWKpcA+spGDDWDKsRAA.tlAuGGlE-luRphLssallQapsTMctLp...pspslssuc+sclLASLuDuasKTluAs...+RlhPETscLAsAl-llphlusalpp+aPpHl.AasElLEPFG.plpKca .....................MAaspch+cthRchYlhsphsLptuAthhsls.sTARpWKptsctcG...DDWDKhRsAphLAu.ssl-clupulltuFh...hQhpssh-plp.......sspchsspcKschLAuLuDuFoKhhuuu...++lhP-oscLusAhcllc.h.lhualppc+Pcplsshl-lLEshu..ltc............................. 0 4 9 15 +8656 PF08823 PG_binding_2 Putative peptidoglycan binding domain Bateman A anon DUF1028 C-terminus Domain This family may be a peptidoglycan binding domain. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.59 0.72 -3.88 9 96 2012-10-01 23:43:47 2006-05-16 17:46:52 6 6 94 0 51 368 46 69.20 26 22.05 CHANGED hhhcp.cs.tphslscDhtsplpsuLtcLGY...........ppthtcALpsalthpNFEs+.............hpscsp.IspsVhpaL ..........................................pstphhslsschtpplpphLpchGa..ht..t.....s........h.spshppALpsahuhENhEpR.....................htsss..ID.tVLphh.............................. 0 23 36 46 +8657 PF08824 Serine_rich Serine rich protein interaction domain Mistry J anon pdb_1z23 Domain This is a serine rich domain that is found in the docking protein p130(cas) (Crk-associated substrate). This domain folds into a four helix bundle which is associated with protein-protein interactions [1]. 21.00 21.00 21.00 21.20 20.60 20.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.95 0.71 -4.52 12 250 2009-01-15 18:05:59 2006-05-17 14:37:00 5 8 78 2 110 232 2 155.60 43 19.69 CHANGED LDL-sAhEpLs+LQpplsSSVutLhsFVu.....ssWRshsp.hEsslpcl+tAs-+lctul+-hL-Fu+GshusAsphsDpsLpsKl+cQLQplc-uaQhLlctppsLDsss.WohphLsts.tsps.....ssDDL-RhlhsARslP-DsKphAShltuNupLLF+Rssp ..............L-L-sAlEpLtRLQ......pslssoVupLhshlu.....ssWRs.hs....hEsplp-l+sAls+Vcsul+-hL-F.A+GAluNAup.....hsD.......psLpsKLp+QLQ+lEDuaQhLhppupsL-sss..WulshLshs...t..s...............ssDDLDRhVhsuRsVP-DsKQLsShl......p............uNApLLF++s..s........... 1 20 31 61 +8658 PF08825 E2_bind E2 binding domain Mistry J anon pdb_1y8x Domain E1 and E2 enzymes play a central role in ubiquitin and ubiquitin-like protein transfer cascades. This is an E2 binding domain that is found on NEDD8 activating E1 enzyme. The domain resembles ubiquitin, and recruits the catalytic core of the E2 enzyme Ubc12 in a similar manner to that in which ubiquitin interacts with ubiquitin binding domains [1]. 20.30 20.30 20.40 21.90 20.20 19.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.69 0.72 -4.22 24 285 2009-01-15 18:05:59 2006-05-17 16:11:40 5 11 231 29 198 288 4 85.70 38 19.97 CHANGED lphssshTLp-lI-pLscpschQl+pPSls.....sss+sLYhtss..spLEctT+sNLs+pLt-L.lpcGpElsV.........oDsshs.hshplplpFp ..........hphssssoLp-ll-hL..sp.ps..ph..QhKsPulo.......ucs+oLYhpss..sslEE..pTRsNLsKsL....p....E.....L.....l.......t-GpE........lsV.........sDsshs..tshph+LpF............... 0 70 109 164 +8659 PF08826 DMPK_coil DMPK coiled coil domain like Mistry J anon pdb_1wt6 Domain This domain is found in the myotonic dystrophy protein kinase (DMPK) and adopts a coiled coil structure. It plays a role in dimerisation [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.93 0.72 -4.07 6 271 2009-01-15 18:05:59 2006-05-17 16:41:07 5 15 56 3 115 239 2 59.70 48 4.79 CHANGED ELQSALEAEIRAKQulp-ELpKVKsuNlshEsKLp-oEsKNpEL.pElcpL+K-MEE.hRuc ..................ELQSAL-AEIRAKQulQEEL.p.c.V+suNlphEs+L+-uEt+Np-L.pElcpLpcchE-.hRuc........ 0 12 19 55 +8660 PF08827 DUF1805 Domain of unknown function (DUF1805) Mistry J anon pdb_1qw2 Domain This domain is found in bacteria and archaea and has an N terminal tetramerisation region that is composed of beta sheets. 21.40 21.40 21.40 30.20 21.20 21.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.46 0.72 -4.17 18 202 2009-01-15 18:05:59 2006-05-17 17:03:56 6 1 201 1 63 138 3 62.30 62 62.88 CHANGED hCGhLsVsshp+......hG.sAu+Vp......GVcTl-DhLpuplhsloptAccLGlcsGMoGcEALp+h .....................MCGALDVuLLNEKL..tDRGIIAGRAV......GVRTIEQLLEAPLESVThtAEsLGIpsGhhG+EALLKM. 0 19 45 50 +8661 PF08828 DSX_dimer Doublesex dimerisation domain Mistry J anon pdb_1zv1 Domain Doublesex (DSX) is a transcription factor that regulates somatic sexual differences in Drosophila.\ The structure of this domain has revealed a novel dimeric arrangement of ubiquitin-associated folds that has not previously been identified in a transcription factor [1]. 20.70 20.70 20.90 39.70 20.30 20.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.98 0.72 -4.44 11 105 2009-01-15 18:05:59 2006-05-17 17:27:46 5 3 47 6 15 114 0 59.40 55 18.06 CHANGED lspDslL-pCQ+LLEKF+YPWEMMPLMYVILKsAcuDl-EASRRI-EGphllppYppppphs ....spDhhL-aCQKLLEKF+YPWEMMPLMYVILKDAsADlEEASRRI-EGptllNph.p.p............. 0 4 8 12 +8662 PF08829 AlphaC_N Alpha C protein N terminal Mistry J anon pdb_1wym Domain The alpha C protein (ACP) is found in Streptococcus and acts as an invasin which plays a role in the internalisation and translocation of the organism across human epithelial surfaces. Group B Streptococcus is the leading cause of diseases including bacterial pneumonia, sepsis and meningitis.\ The N terminal of ACP is associated with virulence and forms a beta sandwich and a three helix bundle [1-3]. 20.20 20.20 20.30 30.40 19.80 18.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.26 0.71 -4.96 5 46 2009-01-15 18:05:59 2006-05-18 09:50:22 5 22 21 2 1 51 0 177.50 50 32.35 CHANGED VTQGQlNIFs-TVlAAEVIsGSAATLNTulTKNlQNGNAYIDLYDVKNGKIDPLQLIVLsPsuYoApYYI+QGuKYYosVSELQTsGuAoITYNILcEDGsPHsKoDGQIDIVSVuLTIYDSTsLRDKI-EVcsNANDPKWSDGSRDEVLTGLEsIKsDIDNNPKTQoDIDNKIlEVNELEKLLVl..slPDKDKY ...............hTQsphNI.p-olhAApsIsGSA.sTLNTshTKNlQNGpAYIDlYDVK.GhIDP.pLIsLss.uYoApYYI+QGucYao..sssclpoTGuAoITYslLDcsGsPapKuDGQlDIVSlslTlYDoosLRspI-EVhppAsDPKWS-GSRDEVLpuLEcIKpDIDNNPKTQsDIcsKIsEVNplEKhLss...hPDt.K.............. 1 0 0 1 +8663 PF08830 DUF1806 Protein of unknown function (DUF1806) Mistry J anon pdb_1njh Family This is a bacterial family of uncharacterised proteins. The structure of one of the proteins in this family has been solved and it adopts a beta barrel-like structure. 19.60 19.60 19.60 25.00 19.50 18.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.62 0.71 -4.28 14 377 2009-09-10 16:05:25 2006-05-18 10:39:37 5 2 372 1 40 133 2 115.10 62 96.23 CHANGED csIppppVQphL-pFsN+sVYlHLETTNGAYAuHaD-ph.aosGAaIRNAplpYc+uKIs.GsG.PYRVGLKh..stGWlYAEGLTcaElD-psRLLlAGHs.pG+LAlALEIScpPFs ..........................pPIccp-V.chLsoFtpKPVYLHlETTNGAYAsHFDp+s.FsAGsFlRNhplTYp+AplK..Gsp..PYRlGLKL..stGWVYspGLTHaE..Vs-+-chLlAGashEGpLAsALpIScpPFs. 0 13 25 34 +8664 PF08831 MHCassoc_trimer Class II MHC-associated invariant chain trimerisation domain Mistry J anon pdb_1iie Domain The class II associated invariant chain peptide is required for folding and localisation of MHC class II heterodimers. This domain is involved in trimerisation of the ectoderm and interferes with DM/class II binding. The trimeric protein forms a cylindrical shape which is thought to be important for interactions between the invariant chain and class II molecules [1]. 25.00 25.00 55.00 55.00 21.50 18.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.71 0.72 -4.13 10 94 2009-01-15 18:05:59 2006-05-18 11:36:40 5 6 54 3 27 80 0 70.80 51 27.02 CHANGED NhTEDQV+HLLhpuDPhKhaPpLKsohhsNLKsLKsoMsssDWKsFESWMHpWLLFEMAKsshs-.cPsphPA ....NhTEDpVhHLLhpuDPhKhaPpLKsoF.-NL++LKpoMpshDWKsFEoWMHpWLLFEMu+pshpp.cPTthP........... 0 1 2 11 +8665 PF08832 SRC-1 Steroid receptor coactivator Mistry J anon pdb_2prg Domain This domain is found in steroid/nuclear receptor coactivators and contains two LXXLL motifs that are involved in receptor binding [1]. The family includes SRC-1/NcoA-1, NcoA-2/TIF2, pCIP/ACTR/GRIP-1/AIB1. 25.00 25.00 33.60 33.60 18.90 18.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.00 0.72 -3.23 10 234 2009-01-15 18:05:59 2006-05-18 11:48:35 5 17 49 173 84 190 0 82.50 50 6.17 CHANGED Kupp....KLLQLLTopo-pht......shtsusssssKDuhus........ussSssuussosso...........SLpEKHKILHRLLQsusSPsDlAKLTAE ......................................Kupp....KLLQLLToso-pht................hs.u.s.lssssKDusus............sussSusu.ussoSsS.............................SLpEKHKILH+LLQsGsSPsDlAKlTAE...... 0 3 9 31 +8666 PF08833 Axin_b-cat_bind Axin beta-catenin binding domain Mistry J anon pdb_1qz7 Domain This domain is found on the scaffolding protein Axin which is a component of the beta-catenin destruction complex. It competes with the tumour suppressor adenomatous polyposis coli protein (APC) for binding to beta-catenin [1]. 20.00 20.00 20.20 20.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.42 0.72 -4.25 9 177 2009-01-15 18:05:59 2006-05-18 13:19:08 5 6 76 1 81 150 0 40.50 58 5.22 CHANGED EDsPpuILD-HVSRVh......+TPGspSPss........hp+ps..tupSP-s ....EEsPpoILD-HVpRV.h......+TPGCQSPGs.........upass..tpRSP-.......................... 0 16 23 47 +8669 PF08837 DUF1810 Protein of unknown function (DUF1810) Mistry J anon pdb_2d2y Family This is a family of uncharacterised proteins. The structure of one of the members in this family has been solved and it adopts a mainly alpha helical structure. 25.00 25.00 25.90 25.90 23.10 16.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.82 0.71 -4.60 44 312 2009-01-15 18:05:59 2006-05-19 14:32:37 6 6 292 1 118 297 33 135.90 48 84.13 CHANGED sDs..acLpRFVpAQss..lYspslsEL+sG+KpSHWMWFlFPQlpGLGpSshAp+YuIsShsEApAYLsHPlLGsRLhEsochllshpucosppIFGsPDshKh+SSMTLFutsss....ssssFppsL-paasGp.DstTlphLs .........................sa-LpRFVpAQps.........lYppslsEL+uG+KpoHWMWFlFPQLcG..LGpSshAp..pYuIuul-EApAYLtHPlLGsRLcEssphlht.l.p.s.+.o..sppIF..G.s.PDshKhpSSMTLFutsss............ss..ssFttsLs+aasGp.DttTlphL.s.......................... 0 45 76 98 +8670 PF08838 DUF1811 Protein of unknown function (DUF1811) Mistry J anon pdb_1sf9 Family This is a bacterial family of uncharacterised proteins. Some of the proteins are annotated as being transcriptional regulators (see Swiss:Q4MQL7, Swiss:Q65MA2). The structure of one of the proteins in this family has revealed a beta-barrel like structure with helix-turn-helix like motif. 20.60 20.60 21.00 60.70 20.30 18.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.33 0.72 -4.06 15 365 2009-01-15 18:05:59 2006-05-19 16:33:10 5 1 364 2 38 123 0 100.90 62 97.01 CHANGED -KRYS-MocaELcpEIAtLpEKARKAEQhGhlNEaAVhERKlhMAcAYLlDPs-FcsGchYclc.sssphF+lcYLNGVFAWGaRhs....usppEEALPISLLpc ....K+hSEMSE.ELRcEIplhKEKh.R.KAE.pGIlNEYDVYppKllhAcSYLl.DhpKlcIGcIY+Ls-GospYFKV-hLKGlFAWGaRhs......SDcsEEGLPIuLLQ.h. 1 11 23 32 +8671 PF08839 CDT1 DNA replication factor CDT1 like Mistry J anon pdb_1wlq Domain CDT1 is a component of the replication licensing system and promotes the loading of the mini-chromosome maintenance complex onto chromatin. Geminin is an inhibitor of CDT1 and prevents inappropriate re-initiation of replication on an already fired origin. This region of CDT1 binds to Geminin [1]. 21.70 21.70 21.90 21.80 21.60 21.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.01 0.71 -4.30 11 217 2009-09-10 23:23:53 2006-05-19 17:18:32 6 4 153 3 161 226 0 153.10 25 25.92 CHANGED LPhKYchLtEhFcslDolsshhcpRscphTFsp.lptsVQchs+KpFs.sHLuQIKplaPpuhplc........p.phhsaspsopp.hYpLpIc..lst.............................................tsssppphsssphhpRtplF+phLl-hsKppcptaL.p..s.s....pp.l...thps.Fpl-p.s.-ls.ucLPp .......................LP.caphLt-hFcsh-......olls...hLp.sR...pc.s.s.TFsp.lppsVpphh.+......+.pFp..pcluQIKtlhPp.uhphc.......................tt.h..thppts...p.p....hp..l...hlp..htt......................................................................s.t..p..hs..tp.h.hht....ctphFpptLlphlpppcpt................................................................................................................................................................................................................................................ 0 55 89 133 +8672 PF08840 BAAT_C BAAT / Acyl-CoA thioester hydrolase C terminal Mistry J anon Pfam-B_4571 (release 20.0) Domain This catalytic domain is found at the C terminal of acyl-CoA thioester hydrolases and bile acid-CoA:amino acid N-acetyltransferases (BAAT). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.15 0.70 -4.63 24 605 2012-10-03 11:45:05 2006-05-24 14:29:18 6 14 292 4 300 3028 585 192.20 24 50.28 CHANGED lcLEYFE-AlsaLhpHPpVptstlGllGlShGu-lsLuMuuah+..plsAsVsINGosssshsshha+t..............slsslshshpclph..........stsshhphhchhpshhspssppshIPlE+u..csshLhlsGpDDpsWtSthaAc.hpc+LpppG+c.pspllsYPsuGHhIEPPYhPhstAshthhhs......hhaGGEs+sHAhAQ.DuWpcl.tFF+KHLsu .......................................................................................................................................LEaF-c.....Ah.pal..h.p.p.s....p..............l.p.....s.s..t.lGlhGhS+Gu-.ls.L.h.hA.u.......h...ht.........pl.s.....s...s....V........s....l....s.....s...u.s...s...s..h.t..s....h...h.h.ct.t................................................ls...l....h..p..h..p.p..h..ph......................................p.s.sh...h..t..h.h..........h...h.....t..................t.....t.........t...p.......p...s..h.....I.........s......l...E....ch..........pus.hLh.....l.s...............G.p..DD.p...W.s..S...t.....h..u...p.h.....pp.Lp.t.p.....s.p.p....p...hph.l..pY.......................suG..H..h...l....p..s.P.....a.h.P.h.p....h.sh...................hhhGG.pstspuh..Ap.cuWtph.tFhppth..s.................................................................................................................................................. 0 52 103 210 +8673 PF08841 DDR Diol dehydratase reactivase ATPase-like domain Mistry J anon pdb_2d0o Family Diol dehydratase (DDH, EC:4.2.1.28) and its isofunctional homologue glycerol dehydratase (GDH, EC.4.2.1.30) are enzymes which catalyse the conversion of glycerol 1,2-propanediol, and 1,2-ethanediol to aldehydes [1]. These reactions require coenzyme B12. Cleavage of the Co-C bond of coenzyme B12 by substrates or coenzyme analogues results in inactivation during which coenzyme B12 remains tightly bound to the apoenzyme. This family comprises of the large subunit of the diol dehydratase and glycerol dehydratase reactivating factors whose function is to reactivate the holoenzyme by exchange of a damaged cofactor for intact coenzyme. 24.60 24.60 24.60 24.70 24.50 24.40 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.90 0.70 -5.61 12 375 2012-10-02 23:34:14 2006-05-24 16:49:07 5 1 324 6 39 285 7 314.90 70 55.80 CHANGED pV-lspGA-tIMpslstssslpclpGEsGTNlGGMLE.+VRQsMAsLTspsss-IhIQDLLAVDThlP.pVpGGlAtEFShEpAVG.IAAMVKuD+LQMphIApplcpclshsVclGGsEAEhAIhGALTTPGTspPLAILDlGAGSTDASIIspcsplsAhHLAGAG-MVTMlIsoELGLpshtLAE-IK+YPLAKVESLFplRHEDGsVpFF-pPLssslFARVVllKp.sshlPl.suphSlEKI+.lRpsAKc+VFVTNslRAL+pVS..PTGsIRDIsFVVLVGGSALDFElPQhVT-uLu+aslVAGRGNIRGhEGPRNAVATGLlLsaspc ........................................................................cVDVAtGAEAIMcAVsuss+L-NloGEsGTNIGGMLE.+VRQTMA-LTsKs......ss-IaIQDLLAVDT.VPVsV.............pGGLAGEFShEQAVG.IASMVKSDRLQMAhIAp......EIc..p+LslcVplGGAEAE.......AAIlGALTTPGT.s+PLAILDLGAGSTDASIINscGEIl..AT.H.L.AGA..G....DM....VTMII......spELG.L-.DRYL.....AE-IK.KYPL.AKVESLFHlRHEDGoVQFFssPLsPsVFARVs.l.VK.P.DcLVPl..PGDlsLEKlRslRRSAKERVFVTNALRAL+pVS..PTGNIRDIPF..VVLVGGSSLDFElPQLVTDALuHYpLVAGRGNIRGoEGPRNAVATGLlLuapp................................................................ 0 14 21 30 +8674 PF08842 Mfa2 DUF1812; Mfa; Fimbrillin-A associated anchor proteins Mfa1 and Mfa2 Bateman A anon PSI2 target AAO79331.1 Family This family of proteins may be lipoproteins principally from bacilli. They are between 300 and 400 residues. Many Bacteroides-like bacterial species, including Porphyromonas gingivalis, the causal agent of periodontal infection, carry at least two types of fimbriae, namely FimA and Mfa1 fimbriae, following the names of their major subunit proteins [1]. Normally, FimA fimbriae are long filaments that are easily detached from cells, whereas Mfa1 fimbriae are short filaments that are tightly bound to cells; however, in the absence of Mfa2 protein, the Mfa1 fimbriae are also very long and are not attached. Mfa2 and Mfa1 are associated with each other in whole P. gingivalis cells to the extent that Mfa2 is located on the cell surface and probably associated with Mfa1 fimbriae in such a way that it anchors the Mfa1 fimbriae to the cell surface and regulates Mfa1 filament length [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.11 0.70 -5.05 87 437 2012-10-02 11:46:57 2006-06-01 18:05:17 5 5 93 5 62 421 4 277.90 14 80.67 CHANGED hs.sCs...............shtlphp........ashs...............ttssshspplpplslalFD.....t.sGp.hhpphshpspplt.....tt...............................hh.tlssG.p...Ypllsau........sh.......pppt..................hsssssl...sclphplpp............tssthst......phssLa....aGph..shsh...........tstpppphslsLh+sssplpl..........hlps...........t..s.shsshp......hplpssssph...sassph.....hsspshsahPh...........tt.ssssss..................hhsph.sshphhpsp....thpl....pltppssstph......phs.h..h...............hsspthlsppsph...sl.hhh......................st.......................hssplpls.sWthhtps ............................................................h.l.h...........h..t...............t.spthtpplpplplalFD.........p..sGp..hltphphp..sppht.............tt.................h.h.............hhplssG.s...Yp.hlsau..............sh........sspt..hs..........................hsssssl..............pchhhphpp..............tsshhst.........phssLa.............aGph..shsh.....................tssttpphslsLh+sssplpl................hlps....................shss.hp................h.pltss.ssth.........shssph......sspshtahsh.....................htt.psptst............................hhsph..pshphhtsp.........................thpl.......plhtpssstth.....hp.s.h..h..........................s.p...lst.ppph....l..hhh...................................t......................hthtl.ls.sW.hh....................................................................................................................................................................................... 0 13 47 62 +8675 PF08843 DUF1814 Nucleotidyl transferase of unknown function (DUF1814) Bateman A anon PSI2 target CAD86002.1 Domain This large family of proteins are largely uncharacterised. Some are annotated as abortive infective proteins but support for this annotation could not be found. This family was recently identified as belonging to the nucleotidyltransferase superfamily [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.54 0.70 -4.58 177 2210 2012-10-02 22:47:23 2006-06-02 11:52:52 6 12 1271 0 615 1962 254 226.00 14 83.81 CHANGED thLtplh.....sthtpphs.hKGGTuLshh.h......hh....RhScDlDhhhht...................ttphpph.....hptlppth............................................sshthphpthhtts......................................st.ptplcl-lsht.........h..st.h..hsh.............................shshpclhucKltAhhp..................................th.RDhaDlhhl.........................htttththtphhps.h...........ht.....phppp....tpthhtphth................................................phhpp.htt..tt...........................................phpthhpp .................................................................................................................................h.....h........htpphh.hpGGoulshh.t......h.t.......R.hotDlDhhhh......................................php.ph............hptlp..phh................................................................pt..hhhphpthhtt..........................................................t..tttlpl-lsht.........................sht.h...hthh...t.........................tl......shsh..tph.hupKltuhhp......................................tph.+DhaDlhhl...............................tttths.tthhpt.h.......................................ht.t...............................................................................................................t............................................................................................................................ 0 204 415 530 +8676 PF08844 DUF1815 Domain of unknown function (DUF1815) Bateman A anon PSI2 target ZP_00111304.2 (BIG_33) Domain This presumed domain is about 100 amino acids in length and is functionally uncharacterised. 25.00 25.00 26.00 76.70 23.10 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.41 0.72 -4.42 11 59 2009-01-15 18:05:59 2006-06-02 11:56:19 5 1 57 0 24 60 27 102.00 69 88.83 CHANGED FhRLA-QYRshVQDLVMSLQALApuLcppGhsASCYoC....Gc-hcuASFhssLG-sHhlRFLVSDaGISWsE.RssRELVKLEGAEAIpcLQclAshl+pspsssu FhRLApQHRpFVpDLVMsLQALAhlLEpRGYhASCYTC......GsphNSASFMVSLG-sHLIRFLVSDYGITWTEMRDDRELMKLEGAEAIsQLQELAsLlKhp....ss.... 0 3 15 22 +8677 PF08845 SymE_toxin DUF1813; Toxin SymE, type I toxin-antitoxin system Bateman A, Eberhardt R anon PSI2 target AAC77303.1 Domain SymE (SOS-induced yjiW gene with similarity to MazE ) is an SOS-induced toxin. It inhibits cell growth, decreases protein synthesis and increases RNA degradation. It may play a role in the recycling of RNAs damaged under SOS response-inducing conditions. It is predicted to have an AbrB fold, similar to that of the antitoxin MazE. Its translation is repressed by the antisense RNA SymR, which acts as an antitoxin [1,2]. 20.70 20.70 21.00 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.80 0.72 -4.12 23 839 2012-10-01 20:57:08 2006-06-02 12:01:30 5 3 449 0 149 526 5 49.20 39 50.52 CHANGED tRhhTV.....................GYs.http.t..........sPslpL+GcWLEpuGFssGp.lplpVppGpLVIps ....................................l..uYh....p..............h.....PslpLpGpWL.cpAGFsoGpsVsV+VhcGClVlp........... 0 19 51 90 +8678 PF08846 DUF1816 Domain of unknown function (DUF1816) Bateman A anon PSI2 target ZP_00109395.2 BIG_34 Domain Swiss:Q4C9H3 is associated with the Pfam:PF01383 domain suggesting this presumed domain could have a role in phycobilisomes. 20.50 20.50 21.00 35.40 19.80 18.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.17 0.72 -4.28 29 104 2009-01-15 18:05:59 2006-06-02 12:48:16 5 3 70 0 46 121 101 67.20 41 56.63 CHANGED uhuNth.GLAWWs+lpTpsPssTYaFGPFlocpshctplssalpDLpsEuspsIppsllRs+........+sEsLTl ...........p.hGhAWWlcIpTppPcCTYYFGPFhopp-AptthsGYlEDLpsEGApsIphslpRCp.PcsLTl.............. 0 7 32 44 +8679 PF08847 DUF1817 Domain of unknown function (DUF1817) Bateman A anon PSI2 target ZP_00111140.1 BIG_36 Domain Members of this family are functionally uncharacterised. 25.00 25.00 25.80 25.10 23.90 23.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.70 0.71 -4.65 21 131 2009-09-10 22:53:16 2006-06-02 12:51:59 6 2 119 0 42 100 33 134.30 40 60.20 CHANGED plss-uIppLDLoPlpthhp.........pshsslLststsLphphsaPRs.ssDPR.ELSEhPEsRLWhlRhDApYPWLPLLL-hpsGpLsRasAMLVPHpFstsEGlpFsPEALElalhH+lFlLscahppp.Gls..ppu.pLppMAthLGY-LDsuFasLl .......................................s.lsp-plppLDLSPlpphhc........hhptps.lhh.ppslthslpa.ps.ssDPR.ELuElPElRLWF.lRLDusYPWhPllLDhpt...GplhRasAMlVPH...........ph...p.t.G...l.a.sPpALElalhp+lh...hl.phhpp..ths..t...phtphAthhGatlssthap................ 0 8 29 38 +8680 PF08848 DUF1818 Domain of unknown function (DUF1818) Bateman A anon PSI2 target ZP_00110314.1 BIG_37 Domain This presumed domain is found in a small family of cyanobacterial protein. These proteins are functionally uncharacterised. 25.00 25.00 53.90 32.60 19.30 19.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.51 0.71 -4.03 27 71 2009-01-15 18:05:59 2006-06-02 12:59:54 6 1 70 5 30 76 118 115.20 41 87.84 CHANGED .pEGsGWRLuhDPs+spassLIGGEsWAlELTcsEhpshspLlhpLscphpsltspLMsEEpIsLElEpt.hWhpl-GptpsWuL+lILpss.......RusEGtWPAssssslstAhcplh .l.+cGsGWRLGaDP...p.t...spassLlGu-sWAlELTcsEhs-hscLltpLspshptls..s-.L..M-EEpIshEhEsphlWhplEGhspsaoL+hILpss.......RpsEGtWPssssssLltAhppl............ 0 6 20 28 +8681 PF08849 DUF1819 Putative inner membrane protein (DUF1819) Bateman A anon PSI2 target ZP_00108899.1 BIG_41 Family These proteins are functionally uncharacterised. Several are annotated as putative inner membrane proteins. 20.30 20.30 20.60 22.60 19.60 20.10 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.78 0.71 -4.89 25 271 2009-01-15 18:05:59 2006-06-02 13:12:16 6 1 259 2 54 226 31 176.70 23 85.97 CHANGED sppcYpsslsuGuLhlpES+plAcLhLpshsh-phccplhp-NlLQppo.uos+Rhs+pl.tpRLcoLssc.h.....phls-.usppptpplLahAsh+psplltDFhh-Vlp-+ahphchcLstcsastFhspps-hcsplsphSsSTptKLpQlla+hLtEAGhLtt..scspplpsshluscltphLpc.psppclh ............a.uslhutshhhpE.+hlspLhhps....s.pphp...ptlhpcNlhptsotsosc+hspsl.ptR.LpsLsps.hh....................phlsp.....us.pppp.lhh...huhhhps.llt-FhtcVlpcthhph...c....plstpchptFhppp.tp.ppstls.s.ao-sThp+htsshhphLt-uGh..lps....scp..c.plp....hhl..ph.thl.t.......h....................... 2 21 37 48 +8682 PF08850 DUF1820 Domain of unknown function (DUF1820) Bateman A anon PSI2 target AAG07366.1 BIG_46 Domain This family includes small functionally uncharacterised proteins around 100 amino acids in length. 20.90 20.90 25.90 25.70 20.80 18.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.22 0.72 -3.90 21 216 2009-01-15 18:05:59 2006-06-02 13:18:53 6 1 215 0 60 152 247 98.60 49 90.46 CHANGED pslY+lhFhNpsplYElYARplhQSchaGFlElE-FlFuE+opllVDPuEEKLKsEFuGVpRoa...IPMHullRIDEVcKcGsuKIo-spst...ssVssF.Phs ..t.slY+lhFl..N.pGc.lYE.lYARplhpSs.L.aGFlEIt-FVFsp+osllVDPSEEKLKsEFsGVpRSa...lPhHuIlRIDEVc..+..c..Gsu+Io-ht.....sNVhsF.Ph.................. 0 14 33 45 +8684 PF08852 DUF1822 Protein of unknown function (DUF1822) Bateman A anon PSI2 target ZP_00109005.1 BIG_39 Domain This family of proteins are functionally uncharacterised. 25.00 25.00 29.20 28.60 23.90 22.90 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.21 0.70 -5.64 27 120 2012-10-11 20:01:01 2006-06-02 13:41:17 6 3 38 0 42 154 0 290.10 19 92.18 CHANGED l.lsssspptA.phupphssspcphpsYL.NpLAltultsWLpt...-hpsshpsssthps.lsphh-.Vsshtls.th.t+l.hlsspshps...s.lplPpEh..hsshlu..YlsVpls.-tp.sclhGalstppl.........pst.cpshplshp.......cLhs.hshLp..hstp.tss..t.....s.h.......h...lpp.Lts..hs......sap..htsLlt.st....h...p........................................................................................h.t..............hths.....tthppsttlshthplsspsh...tLhltht.pssp.phtlhhpLpsht......ssshLPsulpLpllscstpsh.pspsps....p..c.hlplp.hpsp.Gppaplclthss...hhpE.hh ..........................................................................................................l..t..p.u...p...ut..t....s.tpt.psal.NtLulhsh.taLp.........th..php..s.hps..h.phht..lssh.l.....th.h+....lthhshtt.pp...t.hpl.P.Eh..hs......phhu..YlsVpls....p.p.splhGahpt.................tt.pl.shp.....................pL....thl..........................................t.......ltp.ltt..............sap....httl...ht.......h..................................................................................................................................................t........t...hptsthlshthp.hstp..l...hLhltl....sp.spp..phtlhlplhsht.............tpthLP.slpLtllspssphh.pstspt......tsphlplp.hpsp.uppFplplthss....hpE.h........................................................................................... 0 4 33 42 +8685 PF08853 DUF1823 Domain of unknown function (DUF1823) Bateman A anon PSI2 target ZP_00108651.1 BIG_42 Domain This presumed domain is functionally uncharacterised. 25.00 25.00 79.40 78.40 24.50 17.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.47 0.71 -4.35 21 75 2009-01-15 18:05:59 2006-06-02 15:29:12 6 1 75 1 32 79 121 115.90 52 79.95 CHANGED PLocslLhtILpD+lSDthVspLlWp+LGYphcs..ss...WssussoPp.Wp-caPcsPphIupRPAoV+LTRSIPKEaKQLLKcpLGFpGY+IuELhPRRTRRATAVNWLLualtpp ..PLsp-slhtILs-clsDphVspLVWphLGYRh-tsp.sp........WssupVssp.Wp-cYPcPPshIs.s............R............P...AoV.....+LTRSIPcEpKQLLKEp...L...GFcGY+IGEhsPRcTRRATAsNWLLuahtp.p... 0 8 22 31 +8686 PF08854 DUF1824 Domain of unknown function (DUF1824) Bateman A anon PSI2 target BIG_44 Domain This uncharacterised family of proteins are principally found in cyanobacteria. 25.00 25.00 36.00 42.20 17.80 18.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.51 0.71 -4.50 23 72 2009-01-15 18:05:59 2006-06-02 15:34:20 5 1 72 3 32 77 106 124.00 34 83.93 CHANGED lspLpDLspLRsA....PpLssspccpLtpELpthhssu-WhTlGlMAPSsppAlpALRshppthuast....hpsh-.....pspt-G.sVFLKuNQpoGslalRsEpGLGcGlLlosptsccspsusTaGPLPLDhF ....................phLp-hssLcht....s.ls.sppccpL+ptLhhhhstu-....a..........slGIhAsosppAltAL+shppuhuatt......hps.....s.......tss.-G.sVaLKhNppoGshalcs.sGht+GVLlSCQusp....ss..t.ssTaG.hPLDhF... 0 7 21 30 +8687 PF08855 DUF1825 Domain of unknown function (DUF1825) Bateman A anon PSI2 target BIG_45 Domain This uncharacterised family of proteins are principally found in cyanobacteria. 25.00 25.00 29.10 38.50 24.60 21.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.47 0.72 -4.05 12 99 2009-01-15 18:05:59 2006-06-02 15:37:52 5 1 99 0 33 86 318 104.30 55 89.98 CHANGED Mu.FF-SEIVQpEA+plFp-YQpLhplGupYGpFDREGKhhaI-pMEpLh-R.+lFhKRhELS..DDF.ApMshcQlcsQLstFGhoP.p...pMF-QMspTLE+MKsphc ........Mu.FF-SEIVQpEAKpLFpDYQpLhpLGuc..YGKFDREGKKhFI-pMEsLM-RY+lFMKRFELS..EDF.AphTlEQL+TQLuQFGhTP.p....QMF-QMstTLERMKspl......... 0 8 23 31 +8688 PF08856 DUF1826 Protein of unknown function (DUF1826) Bateman A anon PSI2 target BIG_48 Domain These proteins are functionally uncharacterised. 20.70 20.70 21.10 21.20 19.30 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.41 0.71 -4.55 50 214 2009-01-15 18:05:59 2006-06-02 15:45:18 6 1 195 0 63 230 227 184.90 32 84.78 CHANGED shussPslLssIhps.ssNlAlWpRpLssplppalstllsp.p.sshphthsltsspsshcshppths....sspttpsLhpDluhLlchFssLhshcplGLRLclLcpsMCPRFHVDpVPsRLlsTYpGsGTpWLtptsssRstLG...t...sp.........tpIpplssG-VALLKG-tW.G.NEssGLlHRSPsh..s....sGcpRLLLoLD .................................................................................................h.stpstsLsplhps.slNlulWpRplsstltphhs.th...ht.t....sh.th.hsh.........tt....t...ht....h.tths...................sh.shpshltDlshLspha....s....sLh..shc..plGLRLcsls...psMCPRFHVD+VPs...R.....Lls..TYtGsGopWLtpsshsR....p..L....u..th.ttt..spt.................................spIpplssG-VALLK.Gc.p..W.......G......N...........cst............GL.....lHRSPsh...s........ss.ptRLlLoLD...................... 0 19 37 54 +8689 PF08857 ParBc_2 Putative ParB-like nuclease Bateman A anon PSI2 target AAG07772.1 BIG_47 Domain This domain is probably distantly related to Pfam:PF02195. Suggesting these uncharacterised proteins have a nuclease function. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.22 0.71 -4.41 15 262 2012-10-01 20:12:50 2006-06-02 16:09:00 6 2 211 6 96 234 41 170.00 34 65.27 CHANGED pllplslssLRPTQhsVGhtEVptKppcap....................cppc.ppppc....aLcsHhl.........................................PVVlGPsu.phallDHHHlsRALh-hGspp.................VhspVluDLSsl.spssFWphMcsptWVaPaDs+Gp.++shspLPcslt........sLcDDPYRSLAuhL+ptGuacKss..sPasEFtWAcaLRc+ .............p.hh.lplspL+PTQhslGhcpVttKtt+ap....................thst..+phps.......ah..ts+hl................................................PlVlGP.........sG.phYlsD+HHhhpALh-t.s..ssp................................Vhs...h.V..hs-Luth...ststFWptMppppWla...hD...scGt.phs....hspLPssLs..........................sLpDDPYRSLuhhlRctGhhtcss................ssF.EFhWADaLRp......................... 0 21 40 70 +8690 PF08858 IDEAL IDEAL domain Bateman A anon Bateman A Domain This short domain is found at the C-terminus of proteins in the UPF0302 family. The domain is named after the sequence of the most conserved region in some members.\ The function of this domain is unknown. 20.40 20.40 20.80 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.54 0.72 -4.46 42 993 2009-01-15 18:05:59 2006-06-02 16:24:31 5 2 543 3 101 402 1 36.70 34 26.17 CHANGED hLccslt.....phpcccLhppIDpALDp+DcctFhpLoppL ....................lpcthp.....phphcpLhppIDpALpppDcptFhpLopch...... 1 34 67 85 +8691 PF08859 DGC DGC domain Bateman A anon PSI2 target BIG_49 Domain This domain appears to be a zinc binding domain from the conservation of four potential chelating cysteines. The domain is named after a conserved central motif. The function of this domain is unknown. 21.50 21.50 23.30 23.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.39 0.72 -4.20 39 205 2009-09-11 06:56:43 2006-06-02 16:29:11 6 3 167 0 104 181 5 108.80 32 79.43 CHANGED shlasCSGsSs.sGQluNpsAlcLscpG.h...uchhCluuluupssslhchAcuuctIluIDGCslpCup+sLppsulsscpalhlo.-h.........GlcKpht.tshspp-lpphh.ptltc....h .......s.llauCSGsSs.luQlANplAlcLs+ts.h....ucMuCluGlGu.slssll+hA.+uucsllAlDGCslsCs+psLpptGlsss..tHlhLs.-h.........Gl.cKpht.....p-hs...t-hpplh.t.h...t................................ 0 48 80 91 +8692 PF08860 DUF1827 Domain of unknown function (DUF1827) Bateman A anon PSI2 target BIG_53 Domain This presumed domain has no known function. 23.40 23.40 23.50 30.20 23.30 23.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.89 0.72 -3.88 15 568 2009-01-15 18:05:59 2006-06-02 16:42:00 5 1 513 4 43 173 0 95.60 51 94.90 CHANGED M+LINsTNSascLVpsQLssT........DAphVcVYShGNTcVlaTcAPcHhElLIoN++RsI+-sEI-hIhEhhLK+......hspsslcsl+ss+LIEIolP ............MKLINsTN.SHspL......VcsQL-sT........DApLVEVYSAGNTcVlFTpAPhHhEILIoNK+RuIR-sEIEpIp-aFLKRhh.ptshcpssIKsla..op+LItISlP................................. 0 5 18 27 +8693 PF08861 DUF1828 Domain of unknown function DUF1828 Bateman A anon PSI2 target BIG_51 Domain This presumed domain is functionally uncharacterised. 21.50 21.50 21.80 21.90 21.10 21.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.76 0.72 -4.34 22 497 2009-01-15 18:05:59 2006-06-02 16:47:38 5 2 378 0 50 221 4 89.20 31 38.70 CHANGED TPFh..spGDtltlalcpts..sthhloD-GhTLhcLp.tGhphp..stpRt.cllpslLstaGlphps..upl.hshsstcshutshtshlQulltl ..........TPFhD.phsDpltlahptps...sp......h.pLoD-GhTlhsLE.t.Glsls..pKpRp.clhpslLps..aG...lchs-..pEI...hh...ps...s.p.cshspshasllQslltl............ 0 13 27 39 +8694 PF08862 DUF1829 Domain of unknown function DUF1829 Bateman A anon PSI2 target BIG_51 Domain This short domain is usually associated with Pfam:PF08861. 22.20 22.20 22.90 22.90 21.50 22.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.86 0.72 -3.70 14 377 2009-01-15 18:05:59 2006-06-02 16:50:04 5 2 271 \N 25 144 2 86.70 33 36.27 CHANGED GpSuhpHpFDallspppppsp+hlpshsssspsthpshhasa.Ds....ppscpssschhl..Ih.NDppcplssphpphhppYslpslsaSc+ .......GcSGlhHsFDallssp+.pp.EKhlpshsNhspsplpsthhsa.Dsp....t+ppcp..cppsphhl..Il..N.......Dsp.....cs.....Is...E..cspshhcc.slpllsaSp+............. 0 8 17 23 +8695 PF08863 YolD YolD-like protein Bateman A anon PSI2 target BIG_52 Domain Members of this family are functionally uncharacterised. However it has been predicted that thes proteins are functionally equivalent to the UmuD subunit of polymerase V from gram-negative bacteria [1]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.82 0.72 -4.13 39 1100 2009-01-15 18:05:59 2006-06-02 17:07:01 5 1 636 0 106 648 6 85.80 23 75.75 CHANGED lPEphptlcchhp-.ppKlp.+PhLsppph-clpphlhpuhtpppplploYac.........sGhhhshhsplpplc.hppplphss..t..tpphplphpsIl..cl ...........................hsEphttlpp...-..ppKlp..+sh.Loc.p...phpclp.......h...Lp...p...u...h...t...p...p..p...t...lpl..paac............sGh.h..s.hp.hplhclst..hp.hhlpsps........pphplphtDIlsI.................................................... 2 34 66 85 +8696 PF08864 UPF0302 UPF0302 domain Bateman A anon PSI2 target BIG_50 Domain This family is known as UPF0302. It is currently uncharacterised. 20.40 20.40 21.30 35.80 19.70 19.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.90 0.72 -4.23 21 533 2009-01-15 18:05:59 2006-06-02 17:09:35 5 2 509 3 50 197 0 105.20 42 56.87 CHANGED cK+pFlcahLppaphKcREuhWlLNYlhsccplLppVHFV-stphss+uLhlostsscs.sFtFa+ssphhscspcsFt-lphN.s-slYlpLpFps...phpsppYls ..KcsFlcahLtpYphKpR.sVWlLNYlh...sp-shLppVHFV-..s.thtspcsLpluss.s...scsss.hpFhKpNl..phhsspchFhDhhh.N...+..s....p.s.laIQlpFts...shpptphL.............. 0 18 32 42 +8697 PF08865 DUF1830 Domain of unknown function (DUF1830) Bateman A anon PSI2 target BIG_56 Domain This family of short proteins is functionally uncharacterised. 25.00 25.00 25.20 36.90 20.00 18.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.38 0.72 -4.35 26 103 2009-01-15 18:05:59 2006-06-02 17:21:47 6 1 68 0 40 121 114 67.00 43 63.27 CHANGED CsYtNsTs+h.llRshs.ssaYhERVlFPsphhhFEAP.cApLEIassshuushLpsphssscltlsp .CsYhNsTsplQlsRI.sNlsNaYaERVVFPGp+LlFEAsscApLEIaos.huusILs-pIsCpcLtlp.t.......... 0 5 30 38 +8698 PF08866 DUF1831 Putative amino acid metabolism Bateman A anon PSI2 target BIG_55 Domain Solution of the structure of the Lactobacillus plantarum protein from this family has indicated a potential new fold with remote similarities to TBP-like (TATA-binding protein) structures. This similarity, in combination with genomic context analysis, leads us to propose an involvement in amino-acid metabolism. The potentially novel fold is an alpha + beta fold comprising two beta sheets packed against a single helix. The enzyme is present in the cytosol. 25.00 25.00 37.80 43.40 22.20 19.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.30 0.71 -3.81 22 596 2009-01-15 18:05:59 2006-06-02 17:24:05 5 3 592 1 56 208 0 111.20 54 97.07 CHANGED MAFpp.olplpGsphpYplssslK+aTL+DssFhpo+sGNapLpR.L-ssss.spuhhLKIoIscDLsuFKhslTstsGL+hVNIFKscppp.h.-paaFlhcsLl-Rplhpc .....MAFppplpLpssphsYoLSPslKKaTL+DNsFhETKsGNYpLpRhLEpsPsS.s-GFpLKIhINK-LoGhKlsITDpsGLRhVNIFKsEcp+hpQEKFYFLMDuLVERslFsK... 0 11 26 41 +8699 PF08867 FRG FRG domain Bateman A anon PSI2 target BIG_54 Domain This presumed domain contains a conserved N-terminal (F/Y)RG motif. It is functionally uncharacterised. 25.00 25.00 26.00 25.00 21.70 23.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.49 0.72 -3.51 73 450 2009-01-15 18:05:59 2006-06-02 17:25:37 6 4 401 0 125 417 24 102.00 26 31.55 CHANGED tpsthlFRGp.....ust.sa..t.....LhPolhRtttph.t......................Epph..........lc..............pFccp.................................utt..hhsppss............................hchLAluQHaGlPTRLLDWTpsPLVAhaFAsps.tpts............usla .................................................................................t....hhaRGp.......sst..pa....t....................LhPolhRtttt.................................Epphhp..............................phhpt..........................................................................................t.t..thp.p.t.....................................hchluhhQHaGl.sTRLLDhTpsPhlALaFAspsttptt.........h................. 0 33 71 99 +8700 PF08868 YugN YugN-like family Bateman A anon PSI2 target BIG_5 Domain This family of proteins related to B. subtilis YugN are functionally uncharacterised. 25.00 25.00 27.50 26.80 23.90 23.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.69 0.71 -4.53 26 285 2009-01-15 18:05:59 2006-06-02 17:42:44 5 1 164 3 55 187 0 121.80 41 95.57 CHANGED h..sSslEGpphpLpcL-plhcshGashuGpWDY-+shaDhKlspc.-G..hhaLRlPshAl-G-lsscsAllcLhoPhLh+HhY.+Gl-.t........................cDtchPpp..hlphucpllp-lccpLp ......hsopl-GtshsLshLc-lMcshGalluGp...WDYE+soaDYKh-....cs...hhaLRV.shAl....-G-l....su....p....pAhl+LhsPhLh+HhYs+Gl-hs...............................s.Dtphspp..hlphucpLlpplEccL.............................. 0 17 38 43 +8701 PF08869 XisI XisI protein Bateman A anon PSI2 target BIG_57 Domain The fdxN element, along with two other DNA elements, is excised from the chromosome during heterocyst differentiation in cyanobacteria. The xisH as well as the xisF and xisI genes are required [1]. 25.00 25.00 26.40 26.30 21.30 20.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.22 0.72 -3.95 34 213 2009-01-15 18:05:59 2006-06-02 17:46:45 6 4 37 7 71 266 3 103.50 39 97.22 CHANGED MDpLs.pYRplIpplLpcYuph..psspsclEs............pLlhDpp+........ccYhlhslGWcspcRlausllHl-I+ssKIWIppDsTEcGIAp-LlctGlPKpDIVLGF+sPphRpaT-FAVu ................M-.p.l.p..pY+pllpplLpcYuph......tssp.spl-s....................phlhDppp........c+Y.lhplGWpsp.c.R.l.a.usllHl-I.+.ssKIWIppDsTE.uIAp-LlctGlPKpDIVLu.FpsP.hRpaT.saAl............. 0 17 52 71 +8702 PF08870 DUF1832 Domain of unknown function (DUF1832) Bateman A anon PSI2 target BIG_58 Domain This family of proteins are functionally uncharacterised. 25.00 25.00 25.20 25.70 24.80 23.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.31 0.71 -4.37 23 127 2009-09-10 14:48:39 2006-06-02 17:49:01 6 3 123 0 36 131 30 112.60 30 66.51 CHANGED +l+lSppuccpLp+LKppTGlss.NllCRhAhstSLppsshsp.....stsh.sDuslEh..shcsas.....G-hsslhhslLKp+h.....s.ph.-scplhcthphHlcRGluhLtschplts....h..pll ..pl+lSppsc-pLp+L.Kp...h..T.sl.ss...NllsRhAhshSLtpsp..h.s.....s.sh....-u.slEh..shpsas...........G-hsslh..h...hlL+t+h.....shth..-pcslhpta+hHlcRGIuhLtschpltp..................... 0 5 25 34 +8704 PF08872 KGK KGK domain Bateman A anon PSI2 target BIG_60 Domain This presumed domain is found in one or two copies in cyanobacterial proteins. It is named after a short sequence motif. 21.40 21.40 22.10 22.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.50 0.71 -3.93 9 64 2009-01-15 18:05:59 2006-06-02 18:16:26 5 2 30 0 19 58 0 107.00 25 80.45 CHANGED s-phcptN..............scDDVl..tscp.......psaKlpclhptlcpthpschhp-h...hp..t.................sphahspGlsCElLchsspsWpKGKl+l.huLEFlPD..E.........Pphs-.ESPLDDIRppIs ..............................................................tcpsl....tp.......phhpltphhphh.t.hpp.....p..t.....h....ht............................................tpha..h...spGlcCclLphGspsWpKGKl+l...............h...........s............LEF..hPD..E..............................sp....pp.p.SPLD-lRp.h........................ 1 3 13 19 +8705 PF08873 DUF1834 Domain of unknown function (DUF1834) Bateman A anon PSI2 target BIG_62 Domain This family of proteins are functionally uncharacterised. One member is the Gp37 protein from the FluMu prophage. 25.00 25.00 33.00 32.60 23.80 23.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.09 0.71 -4.71 15 142 2009-01-15 18:05:59 2006-06-02 18:25:54 6 1 131 \N 25 118 1 179.60 26 94.11 CHANGED hIsplEpAllsRl+p...shGshl+tVso.....................asGcaD-pslsplVRphPAlaVsatGts.ts...tsucs+apssupasVhVsucslsuEpssRhG...........lYQllpslpslLtsQcl....s.hshssLp..Pppl+sLa.ssphcspG...luVYAh-Fpssh...............l-.ssLsshphsptsh...................ts.ssDhpphtlphplsssps .................................lsphEpAllsRlcp......hhG.ph.lpp.Vco.....................asGcas-.tsltphlt..shPAlaVsahGss.......tupsRhpssucaslhVs.ucslp....ucpssR.G............hYpllpplptlLsGpph...........t.ssslp...p..pl+..sla.sssh.t.stu...lulYu..l.Fssph...............s-.ssLcs..hth..tth........................................hst................................................... 0 11 17 23 +8706 PF08874 DUF1835 Domain of unknown function (DUF1835) Bateman A anon PSI2 target BIG_63 Domain This family of proteins are functionally uncharacterised. 21.10 21.10 21.10 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.70 0.71 -4.18 25 370 2009-01-15 18:05:59 2006-06-02 18:26:58 5 4 323 0 86 318 19 119.80 25 37.51 CHANGED lHlshussAuuoL+hAltp.t..hs-pVlsltD-LSlGPlhslcstt.thtRtpW...l.ptht.t.......pp.htpp......hhschcsthpp...lppl..ssss.plslWpucsup-plsLphlhhhLpsps.p..lphlssop ...........................lHlshusssuusL+hhltptt............hpcs.Vlsh.p-s..holGP.l.t.p.lc.s.s.t.h...htRtpW......l.pslst......................t.hpp-.......hhschcp.phpp........L.cpl..ssps..plslW.pup.sup-plhLphlhtpLc..spt.p...lp.lphs................. 0 22 51 66 +8707 PF08875 DUF1833 Domain of unknown function (DUF1833) Bateman A anon PSI2 target BIG_61 Family This family of proteins are functionally uncharacterised and are predicted to adopt an all-beta fold [1]. They are often found in gene neighborhoods containing genes for an NlpC peptidase and a Ubiquitin domain predicted to be involved in tail assembly [1]. 20.60 20.60 21.60 21.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.49 0.71 -5.05 12 139 2012-10-02 17:50:33 2006-06-02 18:33:47 6 1 118 0 27 137 7 147.30 21 88.56 CHANGED shhcphtusssschhltslEIp+sshsc......shhlVpshcDlssphEs......Gp.l.FhAhsh-lslPspssuss.slplslDNVsptlschl-tuhtsp..h.tlshRhYLsschssPp.-hshphslpsssh-shplospAGhhDlhNpthsphpa ...........................tth.ttsssph...lpslElsHPshsp..............shh.ls.p......s......hc.s.lss.ptcs..............Gp...hhatshshsl.shPsps.sststslshslsslss.l.sstlc..pshts...p.........h...plsaRhY.....lu.scls..sPt.thshshplpssshs..stslshpss.h.shhspth.......................................................... 1 5 10 18 +8708 PF08876 DUF1836 Domain of unknown function (DUF1836) Bateman A anon PSI2 target BIG_64 Domain This family of proteins are functionally uncharacterised. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.51 0.72 -4.28 48 1171 2012-10-04 14:01:12 2006-06-02 18:36:12 6 2 876 0 118 656 3 95.50 36 57.32 CHANGED .php-l...Psl.............DLYhDQVlphhsphhps..................................th.....chLTpTMINNYsKp...pllssPpc.KK..Yo+pplshLllIthLKsllolp-IpplLphhhss..........hshcphYpta ..................................................php-lPsl.............-LYhDQVlthhsphhss.................................h....p..phLTsoMlNNYVKp...thls...tP......K.....KK.....YsppplshLlhIshLK.s.l.holp-Ippslphltsp..........hs.pphYp..h..................................... 0 45 84 98 +8709 PF08877 MepB MepB protein Bateman A anon PSI2 target BIG_66 Domain MepB is a functionally uncharacterised protein in the mepRAB gene cluster of Staphylococcus aureus. 21.80 21.80 22.60 29.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.44 0.71 -4.59 14 546 2009-01-15 18:05:59 2006-06-02 18:40:34 5 1 530 0 56 301 2 119.70 49 79.89 CHANGED EppNs-Ycuhhhplppcp..h+hRlAKhTPsKhGhFVsFWcK.spsspNpPFshp-ss-hLlI.slhDcsppG.FlFPK-lLlcpGILpopsppGKMAhRVYPsWspsLNppApKTQpWQhpYFh-h ........EK.Np-Y-uhhFphppcs...hpsRLAKKTPpKtGYFVohWpK...D.c.s.s.pN..pPas....hcshsDhLhI.sV..........hD..-......p.....ppGhFlFP+ElLlc+sILsopppKGKMAhRlYPpWsss........LNppAppTQcWQhpYFh-............ 0 24 37 50 +8710 PF08878 DUF1837 Domain of unknown function (DUF1837) Bateman A anon PSI2 target BIG_65 Family This family of proteins are functionally uncharacterised. 25.00 25.00 25.40 25.20 24.60 23.50 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.30 0.70 -4.76 59 272 2009-01-15 18:05:59 2006-06-03 10:19:05 6 3 262 0 46 206 4 214.50 17 69.97 CHANGED cphschlhstlscaulsp................pc..hpphtppphtthht............putpph........................sphtppG...E.hGEll.Lahllcphhss......llsKh.hKss..p.thcGsDulHlthpsst......pLahGESKh..Yss...hssA........lpsshculpphh........ppsthpp-hpllpsphp.p.........t.t.tptlpchls.spps.........pphphphshshhlsa-sshh.ph.t............tpchpcplppphppphpphtp.............hp.tthtphphplhllPl ......................................................h...l.phhhs....................c....htth.htp..h.tthht..............htphh............................stphcsG...-..lGEll.ltthl.cthh.s...h.s....h.l..s+lthKss.pshsh+GsDslthh..hs..spst......plhhuEuKh.....hts...hpsu..........lpcshsslpcp...........ppp.hspphphlpsplppp.........p.shtctlcchlp.....................tt....hthshhhhhp.t.....................th...h...h.......................................................................................... 0 11 29 37 +8711 PF08879 WRC WRC Riano D, Finn RD anon Manual Domain The WRC domain, named after the conserved Trp-Arg-Cys motif, contains two distinctive features: a putative nuclear localisation signal and a zinc-finger motif (C3H). It is suggested that the WRC domain functions in DNA binding [1]. 21.30 21.30 21.40 21.70 21.20 21.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.59 0.72 -4.29 25 422 2009-01-15 18:05:59 2006-07-26 15:40:26 5 12 32 0 213 410 1 45.50 49 10.33 CHANGED -sEstRC+RTDGKKWRCs+pshsspKaCE+Hhc+u+pRsp+phcs.s .......sEPuRCRR....TDG.....KKWRCS+cshs.spKYCE+H.h...pR...G..R....pRS+K.l-..t.............. 0 23 124 179 +8712 PF08880 QLQ QLQ Riano D, Finn RD anon Manual Domain The QLQ domain is named after the conserved Gln, Leu, Gln motif. The QLQ domain is found at the N-terminus of SWI2/SNF2 protein, which has been shown to be involved in protein-protein interactions. This domain has thus been postulated to be involved in mediating protein interactions [1]. 23.30 23.30 23.70 23.30 22.20 21.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.04 0.72 -7.66 0.72 -4.54 43 563 2009-01-15 18:05:59 2006-07-26 15:44:28 6 26 209 0 287 521 0 36.80 41 3.82 CHANGED ssFossQhppLcpQhhsaKhlsp.....shPlPscLhhslpp ....sFossQhppL+tQlhsYKh.Lup.....utPlPspL.hslpt..... 0 51 141 221 +8713 PF08881 CVNH CNVH; CVNH domain Bateman A anon Bateman A Domain CyanoVirin-N Homology domains are found in the sugar-binding antiviral protein cyanovirin-N (CVN) as well as filamentous ascomycetes and in the fern Ceratopteris richardii. 22.10 22.10 22.80 22.40 21.90 21.90 hmmbuild -o /dev/null --hand HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.47 0.72 -3.81 34 277 2009-09-10 22:09:16 2006-07-26 15:50:08 5 8 82 44 218 294 10 104.10 24 56.75 CHANGED sFppSuccI+l.....pssphLsAcspsssG.p....hhsucIcLsphlGNss........GpFhW....................................................................sGtNFoco..Acslcht...ttptsslLcApLtsscGphtspp.lsL.sE+IsNpsGpLpa ..............................Fttosp.shpl....................ssp.hLtApC.p...st..s..G..p..........hh.p.o.pl-.LNph....lG.Nss....................Gph..ph............................................................................s.s.ssFspo..s..pshplp..............ts.h.L...pup.h.......ts........p..G..t...h....h......tsp....lsL.....s.c....pl.t.NtsGpL........................................................... 0 29 118 190 +8714 PF08882 Acetone_carb_G Acetone carboxylase gamma subunit Bateman A anon PSI2 target BIG_95 Domain Acetone carboxylase is the key enzyme of bacterial acetone metabolism, catalysing the condensation of acetone and CO(2) to form acetoacetate. 20.80 20.80 21.30 37.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.52 0.71 -4.05 24 126 2009-01-15 18:05:59 2006-07-26 15:56:01 6 2 113 0 52 118 9 111.40 52 68.55 CHANGED Vshs-hllLsLuschalsps...........CGHcFushRcNaKhtshlasRDscEhhptl.....YsthhAPDspWppIhEYYCPpCGshh-sEhssPhaPllHDhEsDI-uhhpcWlt .............VsacD+IlLPLGsHLalVQssp.p+..hll+C.pCGHsFsssc-NWKL+A.IYVRDTtEthcEl.....YPclhAPDspWQVhREYhCPsCGhhl-VEAsTPWYPVlHDFEPDI-sFY+-WL.G..... 1 17 31 44 +8715 PF08883 DOPA_dioxygen Dopa 4,5-dioxygenase family Bateman A anon PSI2 target BIG_92 Domain This family of proteins are related to Swiss:P87064 a DOPA 4,5-dioxygenase that is involved in synthesis of betalain. DOPA-dioxygenase is the key enzyme involved in betalain biosynthesis. It converts 3,4-dihydroxyphenylalanine to betalamic acid, a yellow chromophore. 25.00 25.00 33.20 32.80 23.50 21.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.61 0.72 -3.95 44 382 2010-01-08 13:27:16 2006-07-26 15:58:34 6 3 347 6 160 314 37 105.30 39 76.16 CHANGED YHAHVYF...c.tsshptApsLpcpltccFs.......lphG..+l+p....+sVGPHP...hh.aplsF.ssc.FspllsWLtlsRssLoVLlHP.T.G-...-LtDHT.cpAhWLGcplsLslshh .........aHAHl.YF..........s.ssptphAptLpcpltccFs..............lplh.....phap......................+.VGPHP...th.aplsF....ssspa.ssllsWLslsR.GsL.oVLlHPsT....G-..............-htDHp.-pAhWlGcshsLshshh....... 0 36 83 130 +8716 PF08884 Flagellin_D3 Flagellin D3 domain Bateman A anon Bateman A Domain This domain is found in the central portion bacterial flagellin FliC. The domain contains a structural motif called a beta-folium fold [1]. Although no specific function is assigned to this domain its deletion leads to a reduction in filament stability [2]. 21.40 21.40 21.40 21.40 21.10 20.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.34 0.72 -3.80 34 687 2009-01-15 18:05:59 2006-07-26 15:59:46 6 4 182 3 5 483 1 88.90 36 20.39 CHANGED ushshouhs..ssslKsus.............ss...ssussuspsuclhaDs.sscYYlcVsuhst...su+sGaYcVsVss......sGpVohsssssp..tsstPs...usspVTpsQ ..............................s.T.shouhs..sssIKAus................Gu...ssuTssstsuslpFcs..ssKYYspVsGhss...suKsGhYEVsVss............sGpVohsussTp...sshPs...usoslTcsQ...................... 0 1 1 3 +8717 PF08885 GSCFA GSCFA family Bateman A anon PSI2 target BIG_88 Domain This family of proteins are functionally uncharacterised. They have been named GSCFA after a highly conserved N-terminal motif in the alignment. Distant similarity to the Pfam:PF00657 lipases suggests these proteins are likely to be enzymes. 25.00 25.00 29.90 29.50 22.50 19.80 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.72 0.70 -4.83 49 313 2012-10-02 11:02:24 2006-07-26 16:17:02 6 8 248 0 91 323 73 208.60 32 70.18 CHANGED splhshGSCFApplGctLptttapsh..........................sssaGs.lYsstslhphlppuhspp..s....tpslatts...s+aash...shsshuhsotpclhpphspplp..........ps+ptlppushllhTLGhs.sappppssplhssC.tlsptpFscch.....holsEIhpshpthhshlpsl..NPpl+llhTVSPV.hht.......cshsuNphSKusLhsAsppl...sp................pp..s.cs..........tYFPSYEIlh.sphpchpaYssD...hhHssptuVsalhcpF .....................p.plhhhGSCFApplupthttt..taph............................ss.aGh.lasPhul.phlpphhtt..........ttthh...t............thahs....tht..s..s.tthhtthpttht..........thtphh.phshhllTLGoshsah........pps.......utl...ls.NCpp.h.stp.Fpcph.....lolpEhhpshpthlphltth.......NP.ph+llhTVSPlRahp.......cshhtsphSKusLhhAhcplhp..........................t...p..ps.............hYFPuYEIlh..DcLRDYRFYtpD...hhHsst.ulpalhcpF.................. 0 29 64 78 +8718 PF08886 GshA Glutamate-cysteine ligase Bateman A, Eberhardt R anon PSI2 target BIG_70 Family This is a rare family of glutamate--cysteine ligases, EC:6.3.2.2, demonstrated first in Thiobacillus ferrooxidans and present in a few other Proteobacteria [1]. It is the first of two enzymes for glutathione biosynthesis. It is also called gamma-glutamylcysteine synthetase. The structure of this family has been solved, and is similar to that of human glutathione synthetase and very different to gamma-glutamylcysteine synthetase from Escherichia coli. 25.00 25.00 77.70 77.60 20.30 19.70 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.23 0.70 -6.16 27 271 2009-09-11 17:05:15 2006-07-26 17:16:03 6 1 265 1 85 225 152 400.70 57 94.39 CHANGED Ec+lLsppssIEcWFRtpWpcassPFYsSVDLRNuGFKLAPVDTNLFPuGFNNLsPphhPLslQAshuAlp+.hCP-A+plLlIPEsHTRNtFYLpNVstLppIhppAGhpVRlGSlsspIscsTslpLssGpplslEPLhR...sssRLul.csFsPCsILLNNDLSuGlPplLpsl.-QhllPPLHuGWssRRKSpHFpsYccVup-FucllsIDPWLINPhFspCsslsFtsppG.EsLAspVDslLs+IRcKYccYGIc-cPFVlVKADsGTYGMGlMoV+suc-lhsLNRKpRNKMuslK-Gh.VscVIlQEGVhThEphss..AVAEPVVYMlc+aVVGGFYRlHssRGtcENLNuPGhcFhPLuFppss.hPp.tt..Psss........................sNRFYhYGVlARLAhLAAuhElc ......Ep+ILsspssIEpWFRhpWpcHsPPFYsSVDLRNAGFKLAPVDsNLFPGGFNNLsP-hlPLAlQAA.uAl-+.hCP-AKslLlIPEs.....HTRNsFYLpNVttLspIh+pA.GhpVRlGSLsPpl...sEsTplpLssG.ppllLEPLhR...spcRlu..........L....c....s...FsPCsILLNNDLSAGlPsILcsl.cQhlLPPLHuGWssRRKSsHFusYccVApcFAKllsIDPWhINPYFspssGlDFpt...+pGc-sLAcuV-tVLpKIpcKYcEYGIs-+PaVlVKA.DAGTYGMGVMoV+susEltsLNRKpRs......KM.utsK-GLtVo-VIVQEGVYTaEpl......ss....AVAEPVVYMhDRaVlGGFYRVHsuRGtDENLNAPGMcFVPLuFpps.shPDsptc..PsAs.........................sNRFYhYGVlARLuLLAASlELE............................. 0 18 52 68 +8719 PF08887 GAD-like GAD-like domain Bateman A anon PSI2 target BIG_90 Domain This domain is functionally uncharacterised, but it appears to be distantly related to the GAD domain Pfam:PF02938. 25.00 25.00 33.90 33.50 24.60 23.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.13 0.72 -3.98 14 185 2012-10-01 21:10:01 2006-07-26 17:18:58 6 2 130 0 28 187 5 100.20 28 50.71 CHANGED DcsaphhLcpFG.shcpp.lssus.I..-+Y+s+LP-tLLcYWp-cGWsuau-GlFWhVNPp-YcsllssWltGs.hhst-sh+llARoAFG-lalWuEpsuhslplsshhs ...................................tp.hstp..I..c+Yc.st.LPspLlphWpcaGausahsGhhtllNPp-Yp....sllpphh.................ctc.shhslhpoAFGDlhhWtcppsh.........h................ 0 5 12 19 +8720 PF08888 HopJ HopJ type III effector protein Bateman A anon PSI2 target BIG_86 Domain Pathovars of Pseudomonas syringae interact with their plant hosts via the action of Hrp outer protein (Hop) effector proteins, injected into plant cells by the type III secretion system. The proteins in this family are called HopJ after the original member HopPmaJ [1]. 21.30 21.30 21.50 22.80 20.90 20.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.43 0.72 -4.06 39 245 2009-01-15 18:05:59 2006-07-26 17:21:56 6 2 245 4 74 224 31 109.60 47 92.81 CHANGED M.......slp-...hlppLpspspplpFs-ThslI-ppYcaoPsuFpNG.s...lhNpAGpNpGSCKlFuFAplpsLocppTLtsFGpaYR.-VLtsPcGsDHtNIRsFh.p...pGWsGlpF-spsL ..................lpshlspLps..pphpFsDslAhI-spYsasPsuFpNG....s.....hpNsAG.pNpGSCKlFuFApLpuLopppTLtsFG-aYR.sVLssP-GsDHtNIRsFh....p....pGWsGlpF-upsL.... 2 22 45 63 +8721 PF08889 WbqC WbqC-like protein family Bateman A anon PSI2 target BIG_75 Family This family of proteins are functionally uncharacterised. However it is found in an O-antigen gene cluster in E. coli [1] and other bacteria [2] suggesting a role in O-antigen production. Feng et al. suggest that wbnG may code for a glycine transferase [2]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.34 0.70 -4.89 67 583 2009-01-15 18:05:59 2006-07-26 17:27:58 6 4 429 0 184 571 435 164.00 24 92.01 CHANGED QPhahPalGYFphlspsDpFllhD-spa.p+pu..WhNRNpI.tssp..........G.phlTlPlp.......cspppp.pIp-hplssst..Wpc+phpslppsY.u+APaF.cphhshlcslap..pp.ppLschNhphlphls...chLuI..psplhhoSchph....stppsc..+llslspphuAspYlsu.uupshhp.p.....Ft.ttulplpahpht.h.tYsQ..hts...t.FlPslSIlDlLhssG.scshphlh ...............................................s.ahs.htaa.ph.lhth..-...hhl..pp.pa....+.ps..ahNRshI...hs.s.p..........u......hlolPl.........p.t...p.....hp-hplssp.........Wtph+hpsl.pt.sY.tpuPaF.p.h.s.ht.hht.....pphp.L.phN.phhphlh...ph...lt.l..p.p.h..opphth........tt.p.h..ht.tt.......................................................Y.Q.....t......F.stlSllDllh..u..pt.....h................................. 0 65 135 164 +8722 PF08890 XkdN Phage XkdN-like protein Bateman A anon PSI2 target BIG_83 Family This family of proteins are functionally uncharacterised. They are found in prophage sequence in various bacteria. 22.30 22.30 22.30 24.80 21.50 22.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.68 0.71 -4.42 23 222 2009-01-15 18:05:59 2006-07-26 17:31:39 6 1 160 1 58 199 8 132.50 24 93.15 CHANGED slp-...hlhpphhpp.ch..p..shhcRhss..G..l.aph+ulotcchscl+ccsTpppphht.......thppchDpscapupllhtuslpPs....hpsp-Lpcuass.ssc.Ell++h...L.hsGEhsslsspltElsGa.-ssh--...lEE...hK ...........................................................................hh.tphhtt.ph....p..hhh.cRhh....p..s..t.h.hpl+sloscchpclccpssphpt.ht...............thhpchDpp.pa..sp..llhpus..l.P-......................hcspELp.....c.u.....a.....s.....s.h.s.st.-ll+ch...L..hsGEhsslhstl.-lsGa...sp..p.......hp-...l.-E.hK............. 1 29 49 51 +8723 PF08891 YfcL YfcL protein Bateman A anon PSI2 target BIG_80 Domain This family of proteins are functionally uncharacterised. THey are related to the short YfcL protein from E. coli. 25.00 25.00 25.70 25.30 22.60 22.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.76 0.72 -4.00 29 716 2009-01-15 18:05:59 2006-07-26 17:33:15 6 1 710 0 84 220 6 86.50 67 94.87 CHANGED ltcaEppl.shIDshVppuoD.DELFAuGYLRGHluLulAphE.cspp.slcsLcs+lppSLppA..puELussDpsLVpshWppLttp ......IAEFESRILALIDs..MV-HASD..DELFASGYLRGHLTLAlAE......LEu....uDDH.Ss.pAV+ss....VSQSLEKAIuAGELSPRDQALVssMW-sLFpp.... 0 8 27 55 +8724 PF08892 YqcI_YcgG YqcI/YcgG family Bateman A anon PSI2 target BIG_93 Family This family of proteins are functionally uncharacterised. The family include YqcI and YcgG from B. subtilis. The alignment contains a conserved FPC motif at the N-terminus and CPF at the C-terminus. 20.80 20.80 21.20 20.80 20.40 19.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.49 0.70 -4.78 34 324 2009-01-15 18:05:59 2006-07-26 17:39:34 6 3 265 0 87 286 8 212.20 33 84.71 CHANGED pptacpFpptlts.....ssFPClhuppuhppspL+hsFhs..p.tptpsspplspsLtcalct.cshs.phs...SLllhFcss......pphohppacchhWphLppLpptDs...psWPpclPsDPccspWpFsFuGcshFlhsssPua..tpRpSRphsh.hhlsFpPRhlF-cl..psssp.hup+h+ctIRpRlttaDshshpPpLutaGt.-spEW+QYhlp--sp.............scCPFptt ..............................t..tappFpphlh-......sFPChhuhpuhppsplRYsFls......ppshpclspslhpalchh+-p...s.hhp......uhhlFhcs.......cctol-tacchaWplLphL+cpDs...psWPppIPpDPcc.tWEFsFuGEPhFlhsssPua..ppR+oRp.h...us...hhlsFQPRtlF-s.l..puspt..tupph+phlRpRlppaDp.hPhHPsLupYGs.cppEW+QYhltD-.p..........h.u+CPFp..h.................................... 0 22 54 70 +8725 PF08893 DUF1839 Domain of unknown function (DUF1839) Bateman A anon PSI2 target BIG_68 Family This family of proteins are functionally uncharacterised. 20.50 20.50 20.70 187.00 17.90 20.40 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.91 0.70 -5.46 13 104 2009-01-15 18:05:59 2006-07-26 17:48:32 5 1 96 0 42 97 5 312.40 50 90.29 CHANGED hsshs..scshssHuLHus-tlWsETNCYlDLWIELLHuhGLsPhAALuFTVsQDFEGDQFTFFKaP.tDLEpLYGlsVpELAlYDsLEsHVtsQlsRG+lVLlEVDSYaLPDT+GsuY+ccHsKTTIAIDhlDs-AptluYFHssGYHphpGEDYcGlFph.sshppsss.hhPYsEhsKpstsshspssLhcAShsLLRpHLsRRPcpNPlstFRpsFstcl-plhsRs.saFHhYuFNsLRQLGANFELhu+aLcWLstpGpssPts..supustoIAoEAKVlQFRLARAVhR+KsDsspssLDsLEsAappslsuLApp .......ht...cscpYcPHsLHusphlWppTNCYVDLWIEVLthhGLsPhAALsFTlo.DFEGDQFTFFKaPhEDLEpLYGIhVQEhAIa-sl-sHVEpQlAR...GpLhLVEVDuaaLPDTRGsoY+ppHsKTTIGIDsIDhptRpluYFHNuGYahh-GtDYDGLFtt.ssht........LhPYVEhAKRph.t.PL-.cptLs-sShsLLp+HLpRRPssNPIsAFRpthstcscslAsp.....PhsaFHtYoFNoLRQLGANFE.Lhu+YLcWLpssGtsuP.h......s........hssAscpIASEAhVlpFRLARAsuRuKp-cscusLDhlEpAassllsslut.h...... 0 4 16 25 +8726 PF08894 DUF1838 Protein of unknown function (DUF1838) Bateman A anon PSI2 target BIG_43 Family This family of proteins are functionally uncharacterised. 25.00 25.00 50.30 26.70 21.20 16.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.74 0.70 -5.15 9 37 2009-01-15 18:05:59 2006-07-26 17:54:37 6 2 34 0 19 34 76 224.60 36 82.65 CHANGED cupslaahWpGplauhhsGE..cc+LFsl.GMsls+Chshs-ut....uachloRElhhYhDPpTG-lLcpWcNPWoscsVsVlH.............VANDPVp.............uhhptphsh..tG-phsashsl.LhYPNPLus-.Qht...sGshYpAsELFphhsspssLtss-sso.ssspluWsRlusWLPWMtMG.sR.GhLhapstGpKlsSh--Ls.hhpppIss+hPhYtpsPcphs-t.N.TSWhYF+phhc ........spplhahWpGclYuhhPGE...chLFthcGhNVu+sl.ps-ut.t..uachloRElhhYh......DPsTsElLcpWcss...t.ssVlH....................VhNDPss....................................................tthh.h.hshp..hGsphshshpl.LtYPsPLss.hQh.......sussYcAhELFpahsspssLt.sst.ss..ssphuWsRhusWLPWMthG.sRsGhLhapstGpKls.uas-lPthhps.IsschP.atpsPpphs-.sN.TSWhaF+ph................. 0 8 14 18 +8727 PF08895 DUF1840 Domain of unknown function (DUF1840) Bateman A anon PSI2 target BIG_69 Domain This family of proteins are functionally uncharacterised. 25.00 25.00 25.70 34.00 23.10 24.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.43 0.72 -4.01 41 231 2009-01-15 18:05:59 2006-07-26 18:01:24 6 2 210 0 91 204 18 103.90 36 96.15 CHANGED M.LlTF+S+AsuDlhMht-lApplLpllG+.......ssss.GsIss--lPsAlppLcsAl....tuppstssssp...........pppccstctsluLuQRAhPLl-hL+tAtc..pss-VhW ......MllTF+S+AssDlsMhcDhAhhlLtllGK.......phs.cGlIos--lssAIs+L-sAlstt.ptptppssp.p........................tsctccppp-.sluLuQRAhPhlcML+pAtt..pss-VhW..... 0 15 47 72 +8728 PF08896 DUF1842 Domain of unknown function (DUF1842) Bateman A anon PSI2 target BIG_72 Domain This domain is found at the N-terminus of proteins that are functionally uncharacterised. 20.20 20.20 20.30 21.60 20.10 17.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.38 0.71 -4.35 17 105 2009-01-15 18:05:59 2006-07-26 18:20:34 5 2 61 0 24 64 0 115.10 44 60.12 CHANGED sGLFsspYhl.....ussh.GAPsLpLsLLVsTsc+pVsGpApIoQusp.PPlshcucVWGpao.htl.ss....upspIlloLpGs......uGPtSs.ht.sF+LchlLssDWps...GsAsYcYh.psGpW .............sGLFPVpahV.....uTsh.GAPsLhLsLlVsT.s-+oVsGhA..p..ITQ.uVs.PPLsF+AcVhGsas.htl.P.....ussplhloLpGss.......uGPhusthh.sFclchlLsssWpo...GsAoYRYa.psupW.......................... 0 4 7 12 +8729 PF08897 DUF1841 Domain of unknown function (DUF1841) Bateman A anon PSI2 target BIG_71 Domain This family of proteins are functionally uncharacterised. 21.00 21.00 21.30 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.35 29 307 2009-01-15 18:05:59 2006-07-26 18:20:47 6 1 298 0 91 253 52 132.80 45 88.24 CHANGED PoR-pl...RpFFh-sWpKtpsspsLosLEshAschItpHPEYHshLp.ssEstLspDYsPEtGpoNPFLHLShHLuIpEQlSIDQPsGIRssacpLst+.hss.H-ApHphMECLuEhlWpAQRsGssPDsssYlpslccp .............................spc-V...R+FFhcsWpKphssp.LosLEthAschIttHPEYHt.Lp.sh-p.tlsp-ah......PEpG....coNP..FLHlShHLuIpEQluIDQP.GIRssa-pLsu+..sshH-ApHt..hMEsLuEtlWpAQRt.Gps.PDsssYlsplp+.h... 0 22 55 74 +8730 PF08898 DUF1843 Domain of unknown function (DUF1843) Bateman A anon PSI2 target BIG_72 Domain This domain is found at the C-terminus of a family of proteins that are functionally uncharacterised. The presumed domain is about 60 amino acid residues in length and is found independently in some proteins. 21.70 21.70 21.70 48.10 21.60 21.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.49 0.72 -3.93 15 126 2009-01-15 18:05:59 2006-07-26 18:24:54 5 3 42 0 24 71 0 53.00 56 33.94 CHANGED PhYGVAIQpAhASGDLupMKuLsupA-pQLsptsplpsAlptLcsEIARLEtR .shYGVAIQpAtASGDLu+MKoLustAcpQLuspspIAuALptLcsEIAKLEuR. 0 6 12 15 +8731 PF08899 DUF1844 Domain of unknown function (DUF1844) Bateman A anon PSI2 target BIG_76 Domain This family of proteins are functionally uncharacterised. 20.70 20.70 21.70 21.30 20.20 19.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.25 0.72 -4.06 18 82 2009-01-15 18:05:59 2006-07-26 18:29:23 6 1 81 0 54 80 123 74.50 40 62.58 CHANGED plsFssFlhSLuoSAlhpLG-hPcPpoGphpc.sL.h.......A+poIDlLuMLpEKT+GNLos-Es+lL-slLh-LRh+Y .........lsFssFlhSLsooAhspLGchssPp.oGphpp.sL.h........A+....poIDlLsMLpEKT+GNLss-Ep+lLcslLa-LRhpY.. 0 33 46 53 +8732 PF08900 DUF1845 Domain of unknown function (DUF1845) Bateman A anon PSI2 target BIG_78 Family This family of proteins are functionally uncharacterised. 20.40 20.40 20.40 20.70 20.30 20.20 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.27 0.70 -5.05 25 252 2009-09-11 12:10:13 2006-07-26 18:32:17 6 2 206 0 69 241 11 209.90 34 82.41 CHANGED ptphGuL..RSslslsLHTpaAhRLWp.GRptpcscp.............uIhGhssahuhhsplppu.uppDDPYADhallplEcpltpucpplpphspplcthhsp.lPsslslucstSlpPlplsLahsosLGapsVaLLscaDpLspplhpApHhuLlu+pctpchlcpGu+hlRplFulsQpY.......RasGloRcDht..tsNA+uppA...lc+hG.clPt-lLpGp+RSs..FuP..lsps ...........................................p..phGuL..+SshslsLHT+aA.RlWp.GR...tttcscs.......................sIlGhstalshhsphpps.utpDDPYuDhhhlplEcclppsp....pphpslt.ppl-phhus.lPsslslucshslp.....Plp..lslalsssLGapsVaLLssaDpLs++lhhApHhuLIs+sphc.p....hLscGu+hl....RplaulsppY.......+.hsGsoRsDht.....tpNutuptA......hcchG..clPp-lLpGp+RSp..FuP.sl...p................................................... 0 11 34 54 +8733 PF08901 DUF1847 Protein of unknown function (DUF1847) Bateman A anon PSI2 target BIG_82 Family This family of proteins are functionally uncharacterised. THey contain 4 N-terminal cysteines that may form a zinc binding domain. 25.00 25.00 28.40 28.20 21.60 20.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.01 0.71 -4.72 31 185 2009-01-15 18:05:59 2006-07-26 18:49:59 6 2 169 0 70 167 5 151.60 44 73.44 CHANGED Csstp..tchlccshp.Yppc.pst+lupsAAplEuptY...........s+hTRlEElltFA++hGa++lGlAhClGLhpEu+lhscILcspGFElhSVhCKsGul-KsplGltcp...p.sshEuhCNPIhQAclLNctpT-LNlllGLCVGHDhLFhKaScAPlTTL ...................C.s.t....c.hppshphYpp-.pst+ltpsu.AplEuphY...........sc.hTRlEEllpFA++hGaKKIGlAhClGLhcEA+hhscIL.ctpGFE.lhSVhCKsGulsKsp.lGltcp....p.s.shEshCNPIhQAclLNctpT-LNlllGLCVGHDoLFhKYScAPlTTL........... 0 27 49 61 +8734 PF08902 DUF1848 Domain of unknown function (DUF1848) Bateman A anon PSI2 target BIG_74 Family This family of proteins are functionally uncharacterised. The C-terminus contains a cluster of cysteines that are similar to the iron-sulfur cluster found at the N-terminus of Pfam:PF04055. 24.80 24.80 24.90 29.80 23.90 24.70 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.78 0.70 -4.86 33 307 2009-01-15 18:05:59 2006-07-26 18:54:42 6 1 280 0 86 291 10 258.50 39 84.80 CHANGED IISASRRTDIPAFYu-WFhsRl+cGaVhVpNPass+ploclsLpPcsVDulVFWTKNPtPhl...shLsElcshuasYYFpFTlTsYspp.lEPsl.....Pshpp.tlcsFppLSctlG.-RVlWRYDPlllosp..hsh......caHhcsFpplAstLs..GaTc+ClISFlDh..YpKscpshppls.....hh.psspc....chtplupphupI.....Apca.ulpLpoCu-chs.LsthG...ItpupCIDtpllccl.....hGpph...........................thtK.D+uQR..ptCGChcSpDIGsYs..TChHGClYCYA .........................IlSsSRRTDIPAFYucWFhsRl.+..pGalhVpNPaN.pplo+lsLss....csVDslVFWTKNPtP.hl...spLppLpp....a.t.aYFpaTlTsYsc-.lEssl......Ps.hp.c.hlc.sFpcLSc.tl...G.tc..+llWRYDPIl.losp...hsh........paHhctFppluppLp..Ga.Tc.+sllSFlDh..YpKsppshtpht....................h.th.spc..........chhplupphscl.......Ap..p..a.....sl..plpoCuEphc....Ls.thG....lpputCIDtpll.ccl.....hGtp.l.........................................phtK..DpsQR..ptCuChtSh.DIGsYs..T.Ch+GChYCYA........................ 0 43 70 78 +8735 PF08903 DUF1846 Domain of unknown function (DUF1846) Bateman A anon PSI2 target BIG_73 Family This family of proteins are functionally uncharacterised. Some members of the family are annotated as ATP-dependent peptidases. However, we can find no support for this annotation. 25.00 25.00 45.40 45.30 24.30 24.10 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.76 0.70 -6.04 22 660 2009-01-15 18:05:59 2006-07-26 19:20:06 6 3 644 4 66 448 23 479.30 62 99.14 CHANGED +IGFDs-KYLchQScHIhERlspFssKLYLEFGGKLhDDaHAuRVLPGF-PDuKl+hLpcLKDpsEIlIsIsAsDIE+sKlRGDhGITYDpDVLRLIDsa+shGLhVsSVVITpYsu.QsuAshF+p+LE+hGIKVYhHYsItGYPo-lchIVSDEGYGKN-YIEToRPLVVVTAPGPGSGKhATCLSQLYHEaKRGlcAGYAKFETFPlWNlPLKHPVNlAYEAATADLsDVNMIDPFHLEAYGcTsVNYNRDVElFPVLpphhE+IhG.cSPY+SPTDMGVNMsG.sIsDD-Asp-AS+QEIIRRYapshs-htpstssccplp+lcLlMppsslospDRtVVssAhphA-cs............ssPusAlELsDG..pIlTG+TSsLhsuouAhlLNAlKcLAsIsc-lcLISPpsIEPIQpLKsphLGS+NsRLcs-ElLIALSloAsosssAphAhcpLspL+GC-sHoosILossDcpshRKLGlplTs-PhYpsp.pLYp ...........................K.uFDsEpYLphQp-HILERIsQ.F...D.GKLYLEFGGKhl-DaHAuRVLPGa-PDsKI+lLpcLK-QVElVIsINAusIE+sKsRGDLGIoYDp-VLRLIDpFp-hGlaVGSVVITQYsG.QPAA-sF+spLE+pGIcsYhHYsIcGYPoDh-+IlSsEGhGKNDYIcToRsLlVVTAPGPGSGKLATClSphYH-phpGl+uGYAKFETFPVWNLPL+HPVNLAYEAATADLcDVNMIDPFHLpsYGcTTVNYNRDIElFPVLKRhlE+IhG..cSPYtSPTDMGVNMVGFuIsDDEAshEASKQEIIRRYYpTllDaK.tpp.ls-ssVcKIELLMsclGlossDR+VslsARpKAEcT..............GuPAlAlELPsG..pIVTGKsS-LhGsoAAsLlNAlKphAsIs.cEl+LIpP-slcPIQsLKhcaLGS+NPR.LHosElLIALuIoAspNPsAtpAh-cLspL+Gs-AHSTlILocpDcslLRKLGIsVThDPhYphc+LY................ 0 31 47 58 +8736 PF08904 DUF1849 Domain of unknown function (DUF1849) Bateman A anon PSI2 target BIG_67 Family This family of proteins are functionally uncharacterised. 25.00 25.00 102.80 102.40 16.90 16.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.49 0.70 -5.05 26 192 2009-01-15 18:05:59 2006-07-26 19:20:35 6 1 186 0 65 145 128 251.00 40 87.83 CHANGED usAtAAuussLssHRAlYDLsLtcopsptulsuhpGRhlY-Fs.GsuC-GYoscaRhVoplpss-sssploD.posoaEstcG+sa+FpscohsssphsppVcGsAcps..scshpVclcpPcscshsLs.tslFPTpHhhclIctAcsGcphhpsslFDGo-sucKshpTsslIGcstssssssssst.ts...........................................phsshstWPVTluYFs....ssss..-thPsYphSFcLaENGVoccLslDYuDFslsGpLucl-lhcsp .......u.AtAuuuspLlPHRAlYDLoLscAsppoulsuhsGRMVYEFs.G..SAC-GYTosFRhVocls.s-pspcloDppoTTaE-uDG+sFRFhs+ohsscpls..ccVcGsAchp....sst.ssV+Lp+PcppolsLs.uo.FPTcHhtclIstAcuGp.phhpsslFDGS-cu-+lhsTosllG+tpss.ssscs.cs....s...........................................chuptphWPVTlAYF-......cppp....-thPlYchsF+LYcNGloRcLshDYGDFshpGcLscL-lhcp.s... 0 16 37 46 +8737 PF08905 DUF1850 Domain of unknown function (DUF1850) Bateman A anon PSI2 target BIG_87 Domain This family of proteins are functionally uncharacterised. Some members of this family appear to be misannotated as RocC an amino acid transporter from B. subtilis. 21.00 21.00 21.00 25.80 19.70 19.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.08 0.72 -4.17 50 249 2009-01-15 18:05:59 2006-07-26 19:21:19 6 2 241 0 113 252 29 90.80 25 58.67 CHANGED pFoLpWsHSVE+spWpEsaplssss....LhLscschcuhGAGM.....csssssphcsGhahap.shs.shspls.ltsos..susapLshssp.....s.......hsLsp .......FoltapHSVE+sthpEsaclssss....LhLtcs+hpuhGAGhs.......c.ss..sst.h.c.sG.hahhp..ht.sh..s..p..lp..ltsus..supaplshssp.....ph.l..t.................................... 0 37 76 94 +8738 PF08906 DUF1851 Domain of unknown function (DUF1851) Bateman A anon PSI2 target BIG_90 Domain This domain is found at the C-terminus of a variety of proteins that are functionally uncharacterised. 27.40 27.40 27.40 27.70 26.30 27.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.40 0.72 -4.00 31 216 2009-01-15 18:05:59 2006-07-26 19:26:17 6 2 152 0 36 227 5 76.00 30 42.01 CHANGED pphspplptFhhthpssph..s........hcs.....LFctAtccLG.LctDEhYGFsPALsLGGstslcsLpKlchhEHLhlLuQlss ....................................thp.Fht.hpspp.............hcc.ht..happAlc+h..G..LphsEhaGasPhLsLGGttcl-NLpKV+hhEHlhllsQl..s........... 0 11 20 28 +8739 PF08907 DUF1853 Domain of unknown function (DUF1853) Bateman A anon PSI2 target BIG_96 Family This family of proteins are functionally uncharacterised. 25.00 25.00 27.30 27.20 24.50 24.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.85 0.70 -5.23 43 413 2012-10-11 20:44:44 2006-07-26 19:26:35 6 3 401 0 93 362 50 259.20 28 88.53 CHANGED RDLuWhLtoPsLlssss.......................thhttaLpphDts..sssltphh................tsspR..LGhYhEpLhpahLp.....pssshcLlApNltlp.pps...pTLGELDaLl+s..sssphhHhElAlKaYLh..........ssssspt...........spWlGPNtcDpLcpKLs+lhp+QLsLuppstspthLsph......tssppplhhpGhLFhP..........hsssssssttlsssplpGhWhphc.-a.ths..........sttathLs+hsWLu..ssth..............sthhstpplpphlpphts............P.hlhth........tsthpEspRhFlV ........................................RpLsalltuPsLhpsts.................................tthhshLttL-t.s...stsLtcaht................ts..hR...LGhhhEpLhtahlp.....psP.thcl..lAtsl..tlp..ssG....pTlGplDFLlct.....tsp..p..h.HhElAlKaYLt..............................p..................ssahGPNspDpLstKhs+hhpHQL.Lopp..st...hhphh.th..............tshptphhlpGhLahs.................................stsshs...slsspphpGhWhptp.-.htths.................ttahhLs+.sWLsstph...................tts.h.tttth.................................................................................................................. 0 16 47 75 +8740 PF08908 DUF1852 Domain of unknown function (DUF1852) Bateman A anon PSI2 target BIG_94 Family This family of proteins are functionally uncharacterised. 25.00 25.00 134.20 134.10 21.50 20.70 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.84 0.70 -5.65 27 294 2009-01-15 18:05:59 2006-07-26 19:27:27 6 1 290 0 85 262 19 317.50 70 97.96 CHANGED Msp-FsFoIKsIpFDEsYpPS-sTRlTTNFANLARGcsRQENLRNsLpMIDNRFNsLAaWDNPpuDRYoVELEIISV-hcI..cGsupsFPhIEILKTsIlD+KTscRI-GIVGNNFSSYVRDYDFSVLL.-HNKspspFSsP-sFG-LHGKLFKpFlsSssY+ppFsKPPVICLSVSosKTYHRTtNpHPVLGlEYpQs.....EhSLTDpYFpKMGLpVRYFMPsNSVAPLAFYF..hGDLLsDYTNLELISTISTMETFQKIYRPEIYNANSsAGphYQPSLcaQDaSLTpIVYDREERSQLAlcQGKFsEEHFIKPYpslLEQWSAs ....MspcFTFoIKolpFDENYpPS.-sTRITTNFANLARGpsRQENLRNsLpMIDNRFNuLAHWDNPpuDRYoVELEIISV-hsI........cusupsFPhIElLKTsIVD+KTscRIEGIVGNNFSSYVRDYDFSVLL.-HNKspspF...SlP-sFG-LHGpLFKpFlNSssYKtpFpKsPVICLSVSos+TY+RTpNpHPVLGlEYpQs.....-hSLT-pYFpKMGLpVRYFMPsNSVAPLAFYF..hGDLLsDYTNLELISTISTMETFQKIYRPEIYNANSsAGpsYQPsL+ppDaSLTpIVYDREERSpLAl-QGKFsEEpFIKPYpslLEQWSA............ 0 11 29 59 +8741 PF08909 DUF1854 Domain of unknown function (DUF1854) Bateman A anon PSI2 target BIG_97 Family This potential domain is functionally uncharacterised. It is found at the C-terminus of a number of ATP transporter proteins suggesting this domain may be involved in ligand binding. 21.30 21.30 21.30 29.70 19.90 21.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.45 0.71 -4.56 22 103 2009-09-11 14:44:06 2006-07-26 19:28:22 6 3 102 0 52 97 16 131.80 42 45.30 CHANGED ps+-GVsPVRAFPIuAPscGluLlstDG+ElhWl-cLspLssssRtLlEp-LAsREFhPpIp+IpsVSoauTPSsWpVpTDRGpsphVL+GEEDIRRLsups.LLIsDscGlpahItDhssLD+cSRKLL-RFL ..................................sacuVsPVRAFPIosPscGlSLh....s.s-G+ElhWI-cL.s.cLssssRpllEp-LAtREFhPpIp+IhsVSoauo.PSsW..pV-TDRGtsph....sL.+.G.E.EDIRRL.sups.LLIsDscGlpahI.DhpsLD+pSRKlL-RFL...... 0 11 33 48 +8742 PF08910 Aida_N DUF1855; Aida_N; Aida-C2; Aida N-terminus Mistry J, Sammut SJ, Coggill P, Zhang D, Eberhardt R anon pdb_1ug7 Family This is the N-terminal domain of the axin interactor, dorsalization-associated protein family [1]. 25.00 25.00 25.40 27.00 23.70 19.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.56 0.72 -3.81 8 75 2011-09-20 15:42:39 2006-08-01 13:23:48 5 4 53 1 50 59 0 93.80 62 33.31 CHANGED pls+Wpuuhc+usDFDSWGQLlEAlDEYQhLu+pLpKpspu.tsspsFTE-QKKhluKlATCLcLRSpALQs...Tuup-uhoL--lKKLcslLKsllsss.ctFPlcVp ..........h.lp+WtuSh++usDFDSWGQLVEAlDEYQhLARH....LQKEAQu.pNsS-FT.E-QKKTIGKIATCLELRSAALQS...TQSQEEFKLEDLKKLEPILKN.IL.TYN.KEFPFDVQ.............. 0 11 16 27 +8743 PF08911 NUP50 NUP50 (Nucleoporin 50 kDa) Mistry J, Sammut SJ anon pdb_2c1m Domain Nucleoporin 50 kDa (NUP50) acts as a cofactor for the importin-alpha:importin-beta heterodimer, which in turn allows for transportation of many nuclear-targeted proteins through nuclear pore complexes. The C terminus of NUP50 binds importin-beta through RAN-GTP, the N terminus binds the C terminus of importin-alpha, while a central domain binds importin-beta. NUP50:importin-alpha:importin-beta then binds cargo and can stimulate nuclear import. The N-terminal domain of NUP50 is also able to actively displace nuclear localisation signals from importin-alpha [1]. 22.50 22.50 22.60 23.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.57 0.72 -3.60 35 204 2009-01-15 18:05:59 2006-08-01 13:53:58 6 5 157 7 130 216 0 72.10 35 13.50 CHANGED uKRsAscpLo+DNa..DpE..--.s-EsGoFppASpElLpsRtItKs+RRt....ss...........ssssstushssFuuhshsss ....................uKRhApppLoccNa.......DpE.....-p..s--sGo.FphASp-VLpsRt.ItK..s+R+p.....s.......................pssssush.psFpuhths..s..................................... 0 42 67 105 +8744 PF08912 Rho_Binding Rho Binding Mistry J, Sammut SJ anon pdb_1s1c Domain Rho Binding Domain is responsible for the recognition and binding of Rho binding domain-containing proteins (such as ROCK) to Rho, resulting in activation of the GTPase which in turn modulates the phosphorylation of various signalling proteins. This domain is within an amphipathic alpha-helical coiled-coil and interacts with Rho through predominantly hydrophobic interactions [1]. 23.00 23.00 24.00 26.90 22.90 22.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.32 0.72 -3.67 22 184 2009-01-15 18:05:59 2006-08-01 13:56:02 6 18 68 4 94 144 0 67.40 56 5.32 CHANGED LTpDluNLupEpEELNsKhKcupEchpphK-EE..hsslKAtFEKp...lpoER..TLKTQAVNKLAEIMNRK- ............................LTpDltpLspEKEELspKh...K.csp.E..c...hpthK-EE...hsslKApaEKp...LpoER..TLKTQ..AVNKLAEIMNRK-. 0 15 23 56 +8745 PF08913 VBS Vinculin Binding Site Mistry J, Sammut SJ anon pdb_2b0h Domain Vinculin binding sites are predominantly found in talin and talin-like molecules, enabling binding of vinculin to talin, stabilising integrin-mediated cell-matrix junctions. Talin, in turn, links integrins to the actin cytoskeleton. The consensus sequence for Vinculin binding sites is LxxAAxxVAxxVxxLIxxA, with a secondary structure prediction of four amphipathic helices. The hydrophobic residues that define the VBS are themselves 'masked' and are buried in the core of a series of helical bundles that make up the talin rod [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.58 0.71 -3.88 22 278 2009-01-15 18:05:59 2006-08-01 13:58:23 5 29 85 5 132 225 1 116.70 43 7.10 CHANGED oFVDYQTTMV+pAKAIAVTsQEMhTKSsTsP-ELGsLAsQhTsDYupLAtpup.AAssAEsEE..............................................................IGh+I+pRVQ-LGHGCssLVpKAGALQssPoDuYTK+ELIECARcVSEKVSpVLAALQAGNR .......................................................oFlDYQTphV+tuKuIAhoApEMs..spSss.sPp..-LusLA.....sphopD.as.pLsp..puh.AAusApspE..............................................................luhpI..+spVp-LGpus.tLlptAuALpssPs.DshsK+..........-Lhcs.ARsVoEKV.utVLuALQuGs+....................................... 0 32 42 78 +8746 PF08914 Myb_DNA-bind_2 Rap1 Myb domain Mistry J, Sammut SJ anon pdb_1fex Domain The Rap1 Myb domain adopts a canonical three-helix bundle tertiary structure, with the second and third helices forming a helix-turn-helix variant motif. The function of this domain is unclear: it may either interact with DNA via an adaptor protein or it may be only involved in protein-protein interactions [1]. 27.10 27.10 27.30 27.20 27.00 27.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.22 0.72 -4.17 11 164 2012-10-04 14:01:12 2006-08-01 13:59:53 6 13 122 1 117 163 0 62.50 33 11.24 CHANGED GRluaT-tEDsAILsYV+EpuRuP.uoVoGNALWKtMEKspLTpHSWQShKDRYLKHL+GQc+chL ......RhsaTspDDthlhpalt....p.......tcps..ss.h...s..G.Nplapph.......pcp....p.................pH..oWQSh+-RYlK+Lptp.................. 0 38 61 86 +8747 PF08915 tRNA-Thr_ED Archaea-specific editing domain of threonyl-tRNA synthetase Mistry J, Sammut SJ anon pdb_1y2q Domain Archaea-specific editing domain of threonyl-tRNA synthetase, with marked structural similarity to D-amino acids deacylases found in eubacteria and eukaryotes. This domain can bind D-amino acids, and ensures high fidelity during translation. It is especially responsible for removing incorrectly attached serine from tRNA-Thr. The domain forms a fold that can be be defined as two layers of beta-sheets (a three-stranded sheet and a five-stranded sheet), with two alpha-helices located adjacent to the five-stranded sheet [1]. 25.00 25.00 41.10 40.30 23.70 18.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.87 0.71 -4.20 35 141 2009-01-15 18:05:59 2006-08-01 14:01:24 6 3 129 15 97 144 14 135.50 45 25.44 CHANGED MRlLhIHuDhhcYcs+cKs.chAE-..tssppuch--sLVsFsuVE+sD-ps.p.llcpAlc-Ihcsup+lcssp..lllYPYAHLSSsLAsPpsAhclLcplE...ptL..tppshcVhRAPFGWYKuFpIsCKGHPLSELSRoI ..........M+lLhIHu-hhcacs+cKs.c..h..sE-...tp.cpsch--sLVsFhuVE+sD-ps.ttllcpAlp-IhclusplKsps..lVlYPYAHLSSsLusPcsAlclLcplE...ptL..tppshcVhRAPFGWYKuFcloCKGHPLSELSRsI........ 0 28 58 79 +8748 PF08916 Phe_ZIP Phenylalanine zipper Mistry J, Sammut SJ anon pdb_1q2h Domain The phenylalanine zipper consists of aromatic side chains from ten phenylalanine residues that are stacked within a hydrophobic core. This zipper mediates dimerisation of various proteins, such as APS, SH2-B and Lnk [1]. 25.00 25.00 30.00 35.20 23.70 20.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.79 0.72 -3.65 7 161 2009-01-15 18:05:59 2006-08-01 14:02:27 6 4 64 3 89 161 0 58.10 48 9.67 CHANGED .uWpEFCELHApsAAtDhA+pahhFlppN..Ppa.sPhust.FSt+Fs-hF.paFpsElpc .....uWcEFCEhHApAAAhDF.A++Fphal.......ppp..Ppa..ss.P.s.uts...uFS++Fs-h..FhpaFptEVt...... 0 12 20 44 +8749 PF08917 ecTbetaR2 Transforming growth factor beta receptor 2 ectodomain Mistry J, Sammut SJ anon pdb_1ktz Domain The Transforming growth factor beta receptor 2 ectodomain is a compact fold consisting of nine beta-strands and a single helix stabilised by a network of six intra strand disulphide bonds. The folding topology includes a central five-stranded antiparallel beta-sheet, eight-residues long at its centre, covered by a second layer consisting of two segments of two-stranded antiparallel beta-sheets (beta1-beta4, beta3-beta9) [1]. 25.00 25.00 25.00 36.20 21.50 22.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.02 0.71 -4.33 6 96 2009-01-15 18:05:59 2006-08-01 14:04:18 5 3 46 9 38 87 0 110.20 54 22.61 CHANGED lspLCKFCDVptooCsGpsoCtSNCsITSICEpP-EVCVAIWR+s-cNlTlETlCHcPphpLYGhhL-DtNoopCVMKE+pssuGphahCSCssE.ECNDhLlFs....s.p.sppppll ..........spLCKFCDVc.ooCcspcoChSNCSITSICEcspEVCVAlWRKND-NlTlETlCHDPphshHGa...hL-DssSsKClMKEKKssGpsFFMCSCss-.ECNDhlIFs...phssts........h............. 0 1 7 18 +8750 PF08918 PhoQ_Sensor PhoQ Sensor Mistry J, Sammut SJ anon pdb_1yax Domain The PhoQ Sensor is required for the virulence of various Gram-negative bacteria by allowing interaction of PhoPQ with the intracellular membrane, resulting in remodelling of the bacterial cell surface and subsequent bacterial resistance to host antimicrobial peptides. The domain contains a major flat acidic surface, which binds to at least 3 calcium ions, neutralising the domain's negative charge and allowing interaction with the negatively charged membrane [1]. 20.30 20.30 20.30 20.50 20.10 20.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.98 0.71 -4.77 10 556 2009-01-15 18:05:59 2006-08-01 14:06:04 5 7 550 8 46 195 7 178.50 77 37.00 CHANGED PFSLRsRFLlATAuVVLALSLuYGlVAlVGYSVSFDKToFRLLRGESNLFYSLAQWcNNKLoIslPPslDlNsPTLVLIYDEpGplLWRQRcVPcLEspIpsEWLc+sGaaELDTDscsSstlL.usNsphQcp...L+ch.-ssD-ssLTHSVAVNhYPATu+LPsLTIVVVDTIPQELQ+osl ....PLSLRVRFLLATAAVVLVLSLAYGMV.ALlG.YSVSFDKTTFR.LLRGESNLFYTLAKWENNKLpV..ELPE.N.l.DhQSPT.MT.LIYDEsGpLLWuQRDVPW.Lh.KhIQP-WLKoNGFHEIEuDVssTShLL.SsDHShQpQ...LpEV..RED.D...D.DAE.MTHSVAVNlYPATSRMPpLTIVVVDTIPlELKpSYM..................... 0 2 11 28 +8751 PF08919 F_actin_bind F-actin binding Mistry J, Sammut SJ anon pdb_1zzp Domain The F-actin binding domain forms a compact bundle of four antiparallel alpha-helices, which are arranged in a left-handed topology. Binding of F-actin to the F-actin binding domain may result in cytoplasmic retention and subcellular distribution of the protein, as well as possible inhibition of protein function [1]. 22.20 22.20 24.30 28.80 21.40 22.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.48 0.72 -4.35 16 161 2009-01-15 18:05:59 2006-08-01 14:07:44 5 10 74 2 82 188 0 108.80 48 9.55 CHANGED ssssploK-ulLElsphLcsulsphpss..s.h............hupahpLuDphppLashCssYs-s.hhsPHsKFpFRELloRLEsps+pLRs..suups......sssss+llscltsol+-IsslVQR ....................................h.uspIoKpslL-ss-.hL.......ssAIocsspp.....................hspaStll-sG+pLhsaCsuYVDs.I.QhRNKFAFREAlsKLEssLpELQl......ssAsAG.....sPusspshscLLSoV+EISDlVQR.. 0 14 20 47 +8752 PF08920 SF3b1 Splicing factor 3B subunit 1 Mistry J, Sammut SJ anon pdb_2f9j Domain This family consists of several eukaryotic splicing factor 3B subunit 1 proteins, which associate with p14 through a C-terminus beta-strand that interacts with beta-3 of the p14 RNA recognition motif (RRM) beta-sheet, which is in turn connected to an alpha-helix by a loop that makes extensive contacts with both the shorter C-terminal helix and RRM of p14. This subunit is required for 'A' splicing complex assembly (formed by the stable binding of U2 snRNP to the branchpoint sequence in pre-mRNA) and 'E' splicing complex assembly [1]. 25.10 25.10 25.90 25.40 24.80 25.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.17 0.71 -4.17 25 284 2012-10-02 12:52:19 2006-08-01 14:09:09 5 8 240 9 211 279 8 155.20 40 13.15 CHANGED sssspp+.+RSRWD...pTP......sss.sssss................s.hppsch..sp...sP.....upTPhs....sp....................uhsTPhsspp..................hpspphhthphpp-h-pRNRP.....LoDEELDplL..PsEGYcILcPPssYtPlRss....tpKlhtsssshts....ssFhl..-sspu..........tppl ................................................................................................................................ss...ts+.++SRWD.......p.TPt............sst..s.............................tssp++SRW..Dp..........TP..........s...uuTPss.st.....................shATPsssph................................hssppht.shpapp-I-cRNRP.....LoDEELDsMh..P...E..GYKlLtPPsGYsPIRTP....ARKLhATP.s..Phsu........sGFhhQ.pt-ps....tth..................................... 0 83 121 175 +8753 PF08921 DUF1904 Domain of unknown function (DUF1904) Mistry J, Sammut SJ anon pdb_1u9d Domain This domain is found in a set of hypothetical bacterial proteins. 21.10 21.10 21.20 21.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.28 0.72 -4.00 20 267 2012-10-01 20:38:22 2006-08-01 14:12:29 6 2 254 2 45 195 7 103.30 40 97.80 CHANGED MPHlRhRGlspctVpplScsLlc-Luslsssss-sFTlEalsSshapsGphspsashVEVLWFsRsp-spctlAphIsctlpph..sptpcVsVhFpsLssssYYcNGpH ..................MPHl+FRulptphlpplSpsLlpELuplhpsspc..sFThEhhs..opahhsG......ch...t.sa.PhVEVhWFs.RsQchp.DplApsITph.l+p...su.sclsVlFhsLspsuYY.sGpH...... 0 18 26 35 +8754 PF08922 DUF1905 Domain of unknown function (DUF1905) Mistry J, Sammut SJ anon pdb_2d9r Domain This domain is found in a set of hypothetical bacterial proteins. 21.00 21.00 21.00 21.10 20.70 20.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.60 0.72 -4.41 48 396 2009-01-15 18:05:59 2006-08-01 14:14:49 6 7 306 1 165 375 65 81.40 27 63.68 CHANGED FsAplh...psss.....sasalslPh-sucplstt..............GplpVpuslsGpsaps.SlhP...spG......salLslctslR+stGh.ssGDpVplpl ..............Fpu.lh.....tsts......shsa.ltlPh-htcphs.t..............G.plpVpuslss.h.sacs.olhs...hGss.........salLslcpslRcthuh.psGDtVpVpl......... 0 76 135 155 +8755 PF08923 MAPKK1_Int Mitogen-activated protein kinase kinase 1 interacting Mistry J, Sammut SJ anon pdb_1vet Domain Mitogen-activated protein kinase kinase 1 interacting protein is a small subcellular adaptor protein required for MAPK signaling and ERK1/2 activation. The overall topology of this domain has a central five-stranded beta-sheet sandwiched between a two alpha-helix and a one alpha-helix layer [1]. 20.50 20.50 20.50 21.00 20.40 20.40 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.17 0.71 -4.47 7 141 2012-10-02 21:07:43 2006-08-01 14:16:48 5 4 114 5 84 146 2 113.10 45 83.01 CHANGED -cl+caL..thhppV-Gl...puIhloDRDGVslh+lup-ss.ss.shcPuhlsTFshAs-QusKLsLGcN+oIIshYpsaQlVQhN.....tLPLllohlusossNsGhILuL-ppltsll....p-lppsl ..................................................cl++aL..chL.pV.-G.L....auIlloDRDGVPll+..Vu..s-su..P-.hA.lRPuFLoT.Fu.hAoD.Q.uS....KL.GLuKN.KoIIshYss..YQ.V.V.QhN....................+LPLlloFIA.os....sA.NTGh....I.luLEccLsslh....c-Lcpsh..................................................................... 0 35 42 62 +8756 PF08924 DUF1906 Domain of unknown function (DUF1906) Mistry J, Sammut SJ anon pdb_1sfs Domain This domain is found in a set of uncharacterised hypothetical bacterial proteins. 26.90 26.90 27.00 28.80 25.20 26.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.68 0.71 -4.30 35 469 2009-01-15 18:05:59 2006-08-01 14:22:11 6 19 346 1 122 422 5 132.00 30 30.10 CHANGED tustl.s.spApsl+.suGassV.sRYlosspssst....hsKslotsElcslhsuGLplhslYQhs........shts.usastu.ttGst-ApsAhphAtthGhspsssIYFuVD..DssssphsshllPYF+uhpsslut....s.....YcsGlYG .........................h....l....tsppl+..suGatss.sRYlosppssst............hsKslohsEhcsltssGLplhsl...YQhG.......................ttps..upas...t..u..sGhtcAp.pAhphtt.s.hGhP..s....ussIYhulD..Ds.sptphs...spllP...Yh+uhpsslst....p.........a+sGlYu..... 0 35 83 110 +8757 PF08925 DUF1907 Domain of Unknown Function (DUF1907) Mistry J, Sammut SJ anon pdb_1xcr Domain The structure of this domain displays an alpha-beta-beta-alpha four layer topology, with an HxHxxxxxxxxxH motif that coordinates a zinc ion, and an acetate anion at a site that likely supports the enzymatic activity of an ester hydrolase [1]. 25.00 25.00 25.40 29.40 19.60 24.50 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.86 0.70 -5.32 14 174 2009-01-15 18:05:59 2006-08-01 14:24:16 6 5 109 2 102 187 329 237.30 46 85.41 CHANGED VlpuuLppNFtsVpVoVspCPDLpcsPFphsspGLsGpspls-VGGssaLlPhsphsKhYslhslu+c.hch....ssth..llGAGAGPa.hlGpNsEhhhNlphp................psspssNtSahuplsstsspsllcKhsp..scschuLLuNLahsEG.KPGpVL+lpA+pRTGpc.sFlsCIRpuLcp+YG-..+sVuLGGsFll+pGKA+hHlMP.-F...SpsPlpocEcl.NpWL+aa-MsAPLlslsVllSpDP..GLDLRlEHhHsFS.pHGc..GGHYHaDT...TP-pVEYcGYFssAEtlYRl ......................VlptuLppNFtplpVs.Vs-CPDLsptPFphsspGls..Gpspls-VGGsPaLlPhsp..pcK.hYslppls+c.lph....sssh..llGAGAG..Pa.hhG.NsEhh.slphp................ttp..ssNuSahuplp..s.....ts.t.tsllc+h.sp....pchphuLLuNLahS-G.p.P.G..c.Vlc.lpA+pRpG.t....salsshRpsLpp+Yss..csluhGGhFllppGcs+.HlMs.-F..............SpsPlp...oc-cl..spWL+aa-hp.APLlshsshVop.........Ds...shDLRlpHhHs.FS.pcsp....GGHYHhDT......TP-tVEY.GYF.sA-hlhRl...................................... 0 31 40 80 +8758 PF08926 DUF1908 Domain of unknown function (DUF1908) Mistry J, Sammut SJ anon pdb_1v9v Domain This domain is found in a set of hypothetical/structural eukaryotic proteins. 25.00 25.00 27.70 27.70 21.10 20.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.89 0.70 -5.64 6 371 2009-01-15 18:05:59 2006-08-01 14:26:40 6 10 86 1 177 290 0 235.00 62 18.97 CHANGED PlDSPRNhSsuuslsFPF..ARp.hsp..RADGRRWSlASLPSSGYGTNsPSSolSSSsSSQERL....HQLPaQPTsD-L+FLuKHFRSoE......................................................................SssDED..................G+hSPhhRPRSRSL.SPGRSssoFDNEIlMMNHVYKERFPKATAQMEERLp-hIschs..PssoLsLADGVLGFIHHQllELARDCLcKSpsuLlTSRYFhELQEKLE+LLpEA+ERS-SEEVshIspLl+KLLlIISRPARLLECLEFDPEEFY+LLEAAEGQAKVsQGIKTDIPRYIIpQLGLsRDPlEEl ....................................P.DSPRNhS.ss.ssh.pFsF...s...................Rs....DG..RRWSLASLP..S.SGYGT.....N.TP...S..ST..V....S..S..S.sSSQE+L..........HQL.P.a.Q..P....T.sDE......L......+FLoKHFtooE....................................................................................................................................................S...lssEp...................s.pp.us...hR.PR.SRSL..SPGRos.s......shDp..EIlMMNH..VYKERFPKATAQMEERLp.-hlps.s............PsssL...s.LAD..GlLuFlHHQllElARDCLsKS.cpsLITSpYFhELQ-pLE+L.Lp-Aa..-RS.-StElsaltpLV+KlLIlIuRPARLLE.CL.E..FDPEEFYa.LLEAA..EGHAKEG.p.G...............I....Ks....DIP.+YIIsQLGLs+DPLtEh.......................... 0 34 49 103 +8760 PF08928 DUF1910 Domain of unknown function (DUF1910) Mistry J, Sammut SJ anon pdb_2fef Domain This domain is found in a set of hypothetical bacterial proteins. 21.90 21.90 22.00 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.62 0.71 -4.07 16 185 2009-01-15 18:05:59 2006-08-01 14:30:54 5 3 113 0 30 177 5 119.60 23 41.43 CHANGED RDsLpsEphhhctIc.pcctltch...pEcIhphcpctcpslpRh.........tshhshahhshchllAcYShGpsl-phps.a.pslsthEc..................hh-tc.stYh.hLWhlSLuILLch-cp.plppLspll .....................................RD.ltscphhpchlph.pctIpc....ppplhphc...psppps.htp..........thhhshht..hp...h.c.hlhspYShGpslpplpp.a.phlp..hhcp............................shptp..shY.hphlahlSLulLLchccc..hpcLhphl................................ 0 7 18 27 +8761 PF08929 DUF1911 Domain of unknown function (DUF1911) Mistry J, Sammut SJ anon pdb_2fef Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 30.30 27.20 22.00 18.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.51 0.72 -3.37 20 220 2009-01-15 18:05:59 2006-08-01 15:05:44 5 3 139 3 37 214 5 107.20 32 37.40 CHANGED pcs...pDhLlchLlsstshs....p.spchhappPYttLhpsl.....spst-ppsctLtpYl.cpWY+uhcshsW+sp............H+tstt.....sYaGYWuFEuAAlshlhulDDSuh+Dpsa.YPtDL ..............................................p....pDhLl-hllth.tt.th...p.spp..hhh.pPYtthh.chl.....ppspppt.ctLppYl.cpWYcu.ppch..s..W..+ss..................HKps.......sYhGYWuFEsAAlsKlhsl.D..DosL+sps.a.YPhDL............. 0 8 23 32 +8762 PF08930 DUF1912 Domain of unknown function (DUF1912) Mistry J, Sammut SJ anon pdb_1z0p Domain This domain has no known function. It is found in various Streptococcal proteins. 25.00 25.00 94.70 94.60 23.10 16.30 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -10.05 0.72 -3.89 17 344 2009-09-11 13:53:27 2006-08-01 15:07:31 5 2 343 1 24 96 0 83.70 81 98.41 CHANGED MSYEQEFLKDFE-WVpoQIplNQhAMsoupKVhEEDtDERAtDAaIRYESKLDAYcFL.GKFsNY+NGKuFHDlPDGLFGpRHY MSYEQEFMKEFEAWVNTQIMIN-MAhKESQKVY.EEDQDERAKDAMIRYESRLDAYQFLLGKFENFKAGKGFHDLP-GLFGERNY. 0 2 5 14 +8763 PF08931 DUF1913 Domain of unknown function (DUF1913) Mistry J, Sammut SJ anon pdb_1zru Domain This domain has no known function. It is found in a various putative receptor proteins from Lactococcus bacteriophages. 25.00 25.00 25.10 28.30 20.90 20.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.74 0.71 -4.40 5 97 2009-01-15 18:05:59 2006-08-01 15:09:40 5 2 97 66 0 69 0 145.80 80 54.39 CHANGED MTIKNFTFFSPNGTEFPVGSNNDAKLYMMLTGMDYsTIRRKDWpoPlNTALNVQYsNTSIIAGGRYFELlNETVALNANSVNYIHANIDLTQTTsPVSLSAETSDNSNsVDINNuSGVLKVlIDIlTTNGhGVostcsPsQsToLD .......MTIKNFTFFSPNGTEFPVGSNNDGKLYMMLTGMDYGTIRRKDWoSPL.NTALNVQYsNTSIIAGGRYFELL.NETVALKuNSVNYIHANIDLTQTAsPVSLSAETus...NSNsVDlNNsSGVLKVshDIhTTsGTGVhSsKs.spsohLD.............. 0 0 0 0 +8764 PF08932 DUF1914 Domain of unknown function (DUF1914) Mistry J, Sammut SJ anon pdb_1zru Domain This domain has no known function. It is found in a various putative receptor proteins from Lactococcus bacteriophages. 20.70 20.70 21.60 21.70 20.60 18.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.59 0.71 -4.05 9 84 2012-10-01 20:11:45 2006-08-01 15:11:48 5 3 82 140 3 68 0 114.10 37 30.57 CHANGED lssNsls...hpGSlsl.hp..sh....sGsGLphphpKKs.-lVlh+ahGpl...sshssGhphu..WVctPap..Pshsp.SLlG+F.........sspusSFHIDlsPsGohpWWGsshupssl..RGsu.YFI ..........................lssNslo...lpGSlsVPspp.Th...psGsGLpLpLpKKNsDlVIl+ahGsl...ss.lppGhshuh...sWVchPap..PsssQ.SLlGHh.........sGpsssFHIDlsPsGolsWWGsslusssh..RGsuoYFI........ 0 2 2 2 +8765 PF08933 DUF1864 Domain of unknown function (DUF1864) Mistry J, Sammut SJ anon pdb_1zee Domain This domain has no known function. It is found in various hypothetical and conserved domain proteins. 19.70 19.70 20.30 19.70 19.60 19.40 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.27 0.70 -5.94 11 77 2012-10-01 19:57:26 2006-08-01 15:13:56 6 1 76 12 22 87 28 363.90 45 96.75 CHANGED TpsstAFDcWIRscFl-lNocLEpLYapQsD+AsVsGlG-sLKttLcsEG+shIpsLLsEGNTDEGFDuAFDLLGNVGLYMAACRRHEITEPSRETsSPLhEASALAMHIGASIGVsPRFATAHLTTHNpAhsGlYKRFTsLsDE+LFlDYNT+GILAYKRAADALL+I.PLGloHPhoA-LLcsAcpALpcVl-SNptLFppLDs-RFF.CVRPYYKPaRVGuplYRGANAGDFAGINVIDLlLGLCtAN-PuYSQhLVDKFLYMMPEDQslLRDCMRRsSLMDcFLsutctspp..sWaQ-Nl+LFLpVCchHGpTAIQHHsQLVpKaIApPucphpQpHhup....lTASGPPLcVLLsuLcKLRD+RAAA+.RsDIpTRapDIp...tLKuoL .............................................hss.Ahs-.lpuR..tLs.pL.tL....sc+tsV....GIss.L+phL.scGhs..........os.GFspAhshhtslGhahuuh+RH..t.pEPsc....ssPhlEsshLsh..utuhGlsPR.shhHlTsaN....Atssh.+paTsLsDEthhl.-.sphuh.hAhctAhsAhl..clps....lulpp..Phhuphhcshttt...LpchlES.shhap.....plssphFa.t.plRPY...............Y.......cPhR.......VGup.........sYh..GssAs-hs.l.VlDhlLhhs..pAscsuYpphh.shh.YhhPp.ptlhtchhtcPuLhDchLttt.p.us.sp....hpEslph........G.sAlp+h.pllh+FhA..sp.h-p..ta.ut.......shuSGs.h.shLusL.pLp...RAAsp.RtcItst.................................................................................................. 0 3 6 15 +8766 PF08934 Rb_C Rb C-terminal domain Mistry J, Sammut SJ anon pdb_2aze Domain The Rb C-terminal domain is required for high-affinity binding to E2F-DP complexes and for maximal repression of E2F-responsive promoters, thereby acting as a growth suppressor by blocking the G1-S transition of the cell cycle. This domain has a strand-loop-helix structure, which directly interacts with both E2F1 and DP1, followed by a tail segment that lacks regular secondary structure [1]. 20.70 20.70 20.70 21.00 20.60 20.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.96 0.71 -4.59 6 127 2009-01-15 18:05:59 2006-08-01 15:15:48 5 4 54 18 50 123 0 125.00 55 15.18 CHANGED ILQYASsRPPTLSPIPHIPRSPYKhPNSPLRVPGSNNlYISPLKS.oR.....hSPshMTPRSRILVSIGESFGou-KFQKINQMVsSSDRuhKRohDuSuAPKPLKRLRFDlDGQDEADGSKs.uGEStLIQKLAEMoSTRSRMQEQKMKE-s-occc.p ...................csPsLSPIPHIP.p.SPh.p.ss.SPhRIstt.sIYISPhKs..........sothTPRSplL.hphutS.......ScchpcINpMlps.u-.R.shKRuhp.s.u.ss.pPhK+Lp...................................................p.................... 0 4 6 18 +8767 PF08935 VP4_2 DUF1865; Viral protein VP4 subunit Mistry J, Sammut SJ anon pdb_2bai Domain This domain is predominantly found in viral proteins from the family Picornaviridae. It is VP4 of the viral polyprotein which, in poliovirus, is part of the capsid that consists of 60 copies each of four proteins VP1, VP2, VP3, and VP4 arranged on an icosahedral lattice [1]. VP4 is on the inside and differs from the others in being small, myristoylated and having an extended structure. Productive infection involves the externalisation of the VP4, which is cleaved from the rest, along with the N-terminus of VP1. There thus seem to be three stages of the virus, ie a multi-step process for cell entry involving RNA translocation through a membrane channel formed by the externalised N termini of VP1 [2]. 22.20 22.20 24.40 22.80 19.10 18.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -9.87 0.72 -3.38 12 688 2009-01-15 18:05:59 2006-08-01 15:18:15 5 18 80 15 0 699 0 81.60 66 4.62 CHANGED GsGpSS.ssGspNpSGNoGsIINNaY.pQYQNShD..LusNshSstuspusssoosopoppotssshFSplus.........LLA ....GAGQSSPATGSQNQSGNTGSIINNYYMQQYQNSMDTQLGDNAISGG.......SNEGSTDTTSTHTsNT....QN......NDWFSKLAsSAFoGLFGALLA..................... 0 0 0 0 +8768 PF08936 CsoSCA Carboxysome Shell Carbonic Anhydrase Mistry J, Sammut SJ anon pdb_2fgy Domain Carboxysome Shell Carbonic Anhydrase is a bacterial carbonic anhydrase localised in the carboxysome, where it converts bicarbonate ions to carbon dioxide for use in carbon fixation. It contains three domains, these being: (1) an N-terminal domain composed primarily of four alpha-helices; (2) a catalytic domain containing a tightly bound zinc ion and (3) a C-terminal domain with weak structural similarity to the catalytic domain [1]. 20.60 20.60 21.20 20.90 19.60 19.60 hmmbuild -o /dev/null HMM SEED 459 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.51 0.70 -6.13 22 80 2009-01-15 18:05:59 2006-08-01 15:21:33 5 1 77 2 31 84 273 428.30 46 83.35 CHANGED HPLocpstNpcLpsYEppVKu+F-cIVPlLKclSulQH-.DFlppAQpLA+tcLGFsLPpplL-cAWVpsLDMRu......LaAaCVFpoapphS-pFFpsDPLpu....ppupth-sFLh-CGFHhlDlo.PCADGRLAHsluYsLRl.PauuVR.R+uHAGAhFDlEsoVs+Wl+TEHpRaREuhPNsAcpsTRYLKlslYHFSShDPsHpGCAAHGSsDthAApAuLpRLh-FRpAlENoFCCGASVDlLLIGlDTDTDAIRVHlPsssGclsLccalsstplYppThsLou-pA+tpItpsl........pstusus.p.GMhphIspLltNNlSQIDYVpphHsGpYs..DhGHAERFIGVG.GF+EVpLRNLoYFAHLDTVEEGAPDLDVGlKIFpGLNVu+sLPIPVllRFDYsu+VPGAR-RAlscCpRVpsAIpsRYs-LsspGLLashlTlR..DRsttssuEsVuushD ...................................................................HPLospttNppLhsYEpplKucF-pIVPsL+cluulQH-.DFhppAQplu+tcLGacLPpplL-cAWVpsLDMRA......LaAaClFpoachhu-pFapssPLpt....tpupthcpFLl-CGaHhlDlo.PCuDGRLutslsYsLRl.Phu.uVR.R+uaAGAhFDlEssVp+WscTEhcRaREuhPNsAptsTRYLKllsYHFSSsDPpHpGCAAHGSsDchAApAuhpRLh.cFRpAVENoFCCG.A.SVDlLLIGlDTDTDAIRVHlPsscG.chsLc+alsstpLYppThshss-pActpltpsl.........tutusus.p.GMhphlspLltNNlSQIDYVpphHsGtYs..DhGHAERFIGVG.GFcEVpLRNLsYFAHLDTVEEGAsDLDVGl.K.IFpuLNVu+sLPIPVll+F-Ysu+VPGuRERAltcCpRVpsAIpsRYscLsppGLLpstholR..D+stssshEhlut......................... 0 8 19 28 +8769 PF08937 DUF1863 MTH538 TIR-like domain (DUF1863) Mistry J, Sammut SJ anon pdb_1eiw Domain This domain adopts the flavodoxin fold, that is, five parallel beta-strands and four helical segments. The structure is a three-layer sandwich with alpha-1 and alpha-4 on one side of the beta-sheet, and alpha-2 and alpha-3 on the other side. Probable role in signal transduction as a phosphorylation-independent conformational switch protein [1]. This domain is similar to the TIR domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.55 0.71 -4.10 55 389 2012-10-02 18:56:14 2006-08-01 15:25:41 6 7 328 1 118 650 62 126.70 18 55.87 CHANGED ++lFlSacapsDs...php........pl+shhstp....tp.....t.hshs.hc.h.....pppscs....tI+chI.ccplpsossslVLlGppT.tp.........pcWVcaEIp.........................................................tuhc.........pspsllulplpshp..s............ssshsssshsh ...................................................hcsFlSapapcst........hhp..........tltphh..tp...................t..php...phc.h.....pppspt.....tlcphl.ccplpsossslVL.lGtpT.tp...........................ppW.l..pa..E.....I..p......................................................................................................................................................................t..uhc.....................tspsllulhlps....pph........................hh............................................................................................................................................................................... 0 36 74 99 +8770 PF08938 HBS1_N DUF1916; HBS1 N-terminus Mistry J, Sammut SJ, Eberhardt R anon pdb_1ufz Domain This domain is found at the N-terminus of HBS1 proteins. It interacts with the ribosomal protein rpS3 at the mRNA entry site [1]. 23.00 23.00 23.50 23.50 22.60 22.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.55 0.72 -3.86 47 250 2009-01-15 18:05:59 2006-08-01 15:33:01 5 9 187 2 144 233 1 100.30 25 15.27 CHANGED D.-Dh..................................................psEs-....................tpcpLssE-pct.hppshspl+ptLsstss.h..s-pplp-ALWaaYaDl-KulshLhpchpsptsppp ............................................................................p...-h...................................................-.-sp.............................tps.tpcpLss..Dpsp.L.sslschRplLG-ss.....s-pplhEAlh+.......htFDlpKulshllpppptps.t..t.................. 1 35 63 107 +8771 PF08939 DUF1917 Domain of unknown function (DUF1917) Mistry J, Sammut SJ anon pdb_1ztp Domain This domain is found in various hypothetical and basophilic leukaemia proteins. It has no known function. 20.40 20.40 20.40 20.70 19.70 20.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.76 0.70 -4.63 20 148 2009-09-11 14:20:58 2006-08-01 15:35:33 5 5 120 6 107 159 2 202.60 27 72.09 CHANGED phcphspsasstsaatptcsph.thh.....pst..............................t-tcspplhpstpos...................sEssssFLsRlsPuss.........Ia..........................t....chsptGpphLpthpphtthlptppsp.....t...uhsRtlsstRpth.pplhpLAhpstlhoGKWhlFlss-.cVDcsWptVAcATl.pGcLGhuAKVuT.......tscspspsRLIsVYTc......DasDcsDVhRVlc+LccL.Gllc.tt...IhYKsDsa..TYLs ............................................................................................................t................................................................................t...........ts........................................................tttlstaLt+h.Psps...........................................................s......thhp.up...p.ht.hh.hhtt...t................tshpt...spt....+....h.tplhpLAhppplhoGKWhla.hsss...clD.csWstlAcAss..pGc.....L..........s....A....KVus..................psptstpplICVYTp......DFpDct-Vh+lhp.tl.+ph...Glhp...............lhYKsDsaTah.................................................. 0 33 62 83 +8772 PF08940 DUF1918 Domain of unknown function (DUF1918) Mistry J, Sammut SJ anon pdb_2a7y Domain This domain, found in various hypothetical bacterial proteins, has no known function. 20.80 20.80 21.10 26.40 20.70 18.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.81 0.72 -4.63 18 199 2009-01-15 18:05:59 2006-08-01 15:37:26 6 7 132 1 67 133 1 57.00 55 62.72 CHANGED M+ApsGDpLlV+Gpslsps-RcGEIlEV+G-cGuPPYlVRWpDpGHpohVaPG..PDAhV ...M+AcVGDhLVl+GsTlsptD+cGEIlEVRu.sDGuPPYlVRW...-sGH-uhVaPG..PDAlV............... 0 23 55 66 +8773 PF08941 USP8_interact USP8 interacting Mistry J, Sammut SJ anon pdb_2fzp Domain This domain interacts with the UBP deubiquitinating enzyme USP8. 25.00 25.00 29.50 32.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.27 0.71 -4.95 5 110 2009-01-15 18:05:59 2006-08-01 15:38:36 5 7 71 6 69 97 0 166.80 59 55.14 CHANGED ELRolVQsQsp+lAELKpppsDpcpQluEQKRElpLlKtYlRAlRSoN..........PthRNlu-QlE+s..ElhcWssGLssARVTRWGGMISTPDssLQhlIRRuLsESGCPsHILN-LlENCHERRWPpGLuTLETRQtNRRhYEpYVsRRIP....GKQAVVVhuCENpHMuEslps-PGLVMIFAHGVE ................................cLRolVQpQQs+IuELccptuEpcpQluE.QKR-lQLLKsYMRAl.RSsN..........................PslpNlt-plEhs..EllcWssoLt.ARVTRWGGMISTPDslLQ..thIKRuLsESGCPspIls-LhENsHERpWPpGLuTLETRQhNRRhY-NYVs+RIP....GK......QAVVVhuC-NpHMs-Dhh.EPGLVM...IFAHGVE............................ 0 17 21 43 +8774 PF08942 DUF1919 Domain of unknown function (DUF1919) Mistry J, Sammut SJ anon pdb_2g6t Domain This domain has no known function. It is found in various hypothetical and putative bacterial proteins. 25.00 25.00 25.80 25.70 21.10 19.90 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.40 0.71 -4.96 15 177 2009-01-15 18:05:59 2006-08-01 15:41:26 5 2 148 2 17 139 4 178.80 38 78.08 CHANGED l....+p.........hh++lh+phhspph+p+LpNp...shTIISsNChGuhIhccLuhpFpSPFlNLalpssDYlKhLcNhcaYhpp-..LsFhpsscs..............tYPlGpL....sDIcl+FhHYpS.cEA+sKWpcRpcRINaDNLalhhsD+Duso.c.lpcFDpLPaKNKVlFosK....sYsplcSshaI.uhEsps....pVushh...tth.s+cYhcpFDhlsWlN ...............................hhpph.t..hshh.hhcLpsp...sholISsNChGuhlh+pLs..apoPFVs.Lal.s.pDal+hLpNhcaY.hppp..LsFhpppps.................YPlGh.L......sDlcIHFhHYpS.pEAppKWpcRppR.l.......Nh......cNLahhhs.-cD...u...s...o.c.lpcFDpLPapNKllFsp+....sY..thc..S..thhlpu..Epps....plu.hh.........pp.hhp.FDhhsWhp................................................ 0 6 11 15 +8775 PF08943 CsiD CsiD Mistry J, Sammut SJ anon pdb_1jr7 Domain This family consists of various bacterial proteins pertaining to the non-haem Fe(II)-dependent oxygenase family. Exact function is unknown, but a putative role includes involvement in the control of utilisation of gamma-aminobutyric acid [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -12.02 0.70 -5.52 11 476 2012-10-10 13:59:34 2006-08-01 15:43:17 5 1 456 2 26 252 906 284.50 85 90.83 CHANGED GaolssaspssRLhplTlscpsLcsFl-pspcasVQALEYKPFhRFclAchLcclsussLtshLssIlpDRcpGuFllsspGlss........ps--hVKhuTAluHLIG.sNaDAMoGpYYARFsVKssDNSDSYLRQAaRlM-LHsDGTaVcEtTDalLMMKhDEpNhtGG-ShLLHLDDWpDL-+FasHPLA+pshhasuPsSKNVsccVpHPVFFt.DspG+PsMpFIDQFspPpNh-EGhaLp-LS-SlEsSpsshsVplPlGshlVlNNaFWLHGRctFpts.sLpRELMRQRGtFs ..............................................................................GFTLhPSAQSPRLLELTFTEQTT+QFLEQVAEWPVQALEYKSFLRFRVGKILDDLCANQLQPLLLKTLLNRAEGALLINAVGIDDVA......QADEMVKLATAVAHLIGRSNFDAMSGQYYARFVVKNVDNSDSYLRQP......HRVMELHNDGTYVEEITDYVLMMKIDEQNMQGGNSL.........LLHLDD.W.E.H.L.Dc.aFRHPLARRPMRFA....A.PP....S.....K..N.V...SK.D.VFH...PV..FDVDQQ.G.R.PVM.RY....I....DQF.VQ........PK...DFEEGV.W..LS..ELSD...AI..E.......TSK..GI.......L.SV.......P.....VPVGKFLLINNLFWLHGRDRFT.P..HP.D...LRRELMRQRGYFA.............. 1 5 8 18 +8776 PF08944 p47_phox_C NADPH oxidase subunit p47Phox, C terminal domain Mistry J, Sammut SJ anon pdb_1k4u Domain The C terminal domain of the phagocyte NADPH oxidase subunit p47Phox contains conserved PxxP motifs that allow binding to SH3 domains, with subsequent activation of the NADPH oxidase, and generation of superoxide, which plays a crucial role in host defense against microbial infection [1]. 21.50 21.50 22.30 22.40 21.20 20.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.94 0.72 -3.78 12 85 2009-01-15 18:05:59 2006-08-01 15:48:52 6 11 38 2 33 74 0 58.90 39 14.82 CHANGED .LQpR+pt.cshPpusuos......spsppsKstPslPPRPSs-LILcRCoEsT++Klo..pus ............................................hppR+p..tshsppssss....................spspcsKspPsVPPRPSs-LILpRCoEsT++Kls.....s....... 0 4 9 18 +8777 PF08945 Bclx_interact Bcl-x interacting, BH3 domain Mistry J, Sammut SJ anon pdb_1pq1 Domain This domain is a long alpha helix, required for interaction with Bcl-x. It is found in BAM, Bim and Bcl2-like protein 11 [1]. This domain is also known as the BH3 domain between residues 146 and 161. 25.00 25.00 62.30 62.20 21.80 20.60 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.05 0.72 -4.42 6 51 2009-09-10 23:22:43 2006-08-01 15:49:49 5 2 33 14 21 67 0 38.60 75 22.36 CHANGED huup+ppputPsc.lpPElWIAQELRRIGDEFNu.a..P.RRu ......MAShRQSQAEPAD.MRPEIWIAQELRRIGDEFNA.Y.Ys.RR.... 0 2 3 6 +8778 PF08946 Osmo_CC Osmosensory transporter coiled coil Mistry J, Sammut SJ anon pdb_1r48 Domain The osmosensory transporter coiled coil is a C-terminal domain found in various bacterial osmoprotective transporters, such as ProP, Proline/betaine transporter, Proline permease 2 and the citrate proton symporters. It adopts an antiparallel coiled-coil structure, and is essential for osmosensory and osmoprotectant transporter function [1]. 23.80 23.80 23.90 24.40 23.70 23.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.31 0.72 -3.93 10 544 2009-01-15 18:05:59 2006-08-01 15:51:13 5 3 540 2 42 183 0 46.10 82 9.30 CHANGED SDlpEAKElLpEHHDNIEQKIEDIDpQIAELpcKRp+LlcQHP+Is .....SDIQEAKEILsEHYDNIEQKIDDI.......D.......cEI.......A-....LQ.......A.......K.......RoRLVQQHPRID.... 0 5 11 25 +8779 PF08947 BPS BPS (Between PH and SH2) Mistry J, Sammut SJ anon pdb_2auh Domain The BPS (Between PH and SH2) domain, comprised of 2 beta strands and a C-terminal helix, is an approximately 45 residue region found in the adaptor proteins Grb7/10/14 that mediates inhibition of the tyrosine kinase domain of the insulin receptor by binding of the N-terminal portion of the BPS domain to the substrate peptide groove of the kinase, acting as a pseudosubstrate inhibitor [1]. 19.10 19.10 19.20 25.40 18.30 17.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.42 0.72 -4.34 4 191 2009-01-15 18:05:59 2006-08-01 15:55:43 5 6 54 1 97 163 0 47.70 66 9.83 CHANGED RSsSENSLVAMDFSGct.GRVI-NPsEA.SsAlEEGpAWRK+oshRhuhh ............RSlSENSLVAMDFSGpt.GRVIENPsEA.SsAlEEG....pAW.R..K+us.Rhsh.......... 0 11 18 45 +8780 PF08948 DUF1859 Domain of unknown function (DUF1859) Mistry J, Sammut SJ anon pdb_1w8x Domain This domain has no known function. It is predominantly found in the N-terminus of bacteriophage spike proteins [1]. 25.00 25.00 32.00 205.40 21.70 20.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.68 0.71 -4.39 2 14 2009-01-15 18:05:59 2006-08-03 15:07:56 5 2 6 1 0 7 0 123.50 66 53.00 CHANGED Msspp.st.TVT..YNGs.shtGP.ulpuhh-.lAGppVhhDLph.hsTtthoGVQslYID.t-..Gshplsh.-TGQRIps.AGpQGYaPlLssshhKFlspthhsGK..hPh.FlNFsIA.GVW Msspp.st.TVT..YNGs.shtGP.ulpuhh-.lAGppVhhDLph.hsTtthoGVQslYID.t-..Gshplsh.-TGQRIps.AGpQGYaPlLssshhKFlspthhsGK..hPh.FlNFsIA.GVW 0 0 0 0 +8781 PF08949 DUF1860 Domain of unknown function (DUF1860) Mistry J, Sammut SJ anon pdb_1w8x Domain This domain has no known function. It is predominantly found in the C-terminus of bacteriophage spike proteins [1]. 25.00 25.00 462.10 461.40 19.00 18.90 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.65 0.70 -4.74 2 7 2009-01-15 18:05:59 2006-08-03 15:09:31 5 1 6 6 0 7 0 219.00 96 64.41 CHANGED PSGIKGDKGDPGtPGPAGGTVVVEDSGA.FGESLLDTsS-PGKILVKRISuGSGITlTDYGDclEIEAo.GGGGGGGGVTDALSLhYoTSoGGPASIAANALTDFDLSGALplNpVGTGhTKuAsGIQLAAGKSGLYQlTMTVKNNTVTTGNYLLRVKYGSs-aVsACPASoLTAGGTISLLIYCpVLGVPSLDVLKFSLCNDGAALSNYIINITAAKIN PSGIKGDKGDPGtPGPAGGTVVVEDSGA.FGESLLDTTSEPGKILVKRISuGSGITVTDYGDEVEIEAS.GGGGGGGGVTDALSLMYSTSTGGPASIAANALTDFDLSGALTVNoVGTGLTKSAAGIQLAAGKSGLYQITMTVKNNTVTTGNYLLRVKYGSSDFVVACPASSLTAGGTISLLIYCsVLGVPSLDVLKFSLCNDGAALSNYIINITAAKIN 0 0 0 0 +8782 PF08950 DUF1861 Protein of unknown function (DUF1861) Mistry J, Sammut SJ anon pdb_2b4w Domain This hypothetical protein, found in bacteria and in the eukaryote Leishmania, has no known function. 18.70 18.70 102.30 102.20 17.30 16.20 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.91 0.70 -5.61 12 151 2009-01-15 18:05:59 2006-08-03 15:10:58 5 1 107 1 25 116 5 292.00 48 95.12 CHANGED htsFcts..cpha-sshLoFpGVsshDVYNhSsPFphpGchaIhGRVEcRDp.tpScVhhFpEstcspassV.sshsapLpDPFlo+lpsEhlFGGsclh....sssphhsahssFYRGp.hppLpaFsoGP-tMKDIRlVcLtDG+IGVFoRPpsc..ucshIGFshlssLsELTs-sIspA.hlc.thps-tWGGVNpsaLLosGplGsluHhuhh.Dpcsscc....YhshSFVhsPcTpphpph+lIuT+usFsssssKtPcLtDssFuuGllh+sDG+s-LYuGlGDsctt+lsI-.PFcGa ...........h...acpp..ppsapst+LpFsGV.s-+DVYNIoAPFp.tGphhIAGRVEuRDS.EtScVhFFpcp.pspas.VEssssat.LQDPFloplpGELIhGGVElhP+..sssshLsW+TsFYRGpslpsLppFhsGPsGMKDIRlscLtDG+IGVFTRPQG-h.....GGRGpIGashIsoL--LT.EsIpsA.LLcppFss-EWGGsNEsHLLssGpIGlLGHIAsFDppGsRH....YYshsFhhNs-opphpphKIIApRusFhPussKRPDLsDVlFoGGLlhpsDGsscLYuGluDu-Ap+IsIsDPFps...... 2 12 20 22 +8783 PF08951 EntA_Immun Enterocin A Immunity Mistry J, Sammut SJ, Coggill P anon pdb_2bl8 Family Gram-positive lactobacilli produce bacteriocins to kill closely-related competitor species [1]. To protect themselves from the bacteriocidal activity of this molecule they co-express an immunity protein (for discussion of this operon see Bacteriocin_IIc Pfam:PF10439). The immunity protein structure is a soluble, cytoplasmic, antiparallel four alpha-helical globular bundle with a fifth, more flexible and more divergent C-terminal helical hair-pin [2]. The C-terminal hair-pin recognises the C-terminus of the producer bacteriocin and this interaction is sufficient to dis-orient the bacteriocin within the membrane and close up the permeabilising pore that on its own the bacteriocin creates [3]. These immunity proteins interact in the same way with other bacteriocins, family Bacteriocin_II, Pfam:PF01721. Since many enterococci can produce more than one bacteriocin it seems likely that the whole operon can be carried on transferable plasmids [4]. 21.40 21.40 21.40 21.50 21.10 21.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.20 0.72 -3.83 62 1139 2009-01-15 18:05:59 2006-08-03 15:14:23 5 3 516 8 131 559 0 73.70 19 66.60 CHANGED pcppph...hpplhpll...p..s.ptpp.plcplLhpstpclcpsps...h.hlhs+Lspslpthsh.p...phpLsps.pphhppl ............................pph...hphl.ppLh....pphchs.tsp..clpphLhpstpcLcpspp....phlhscLsptls.hhhhp...phphstplhshhh.l..................... 0 24 44 81 +8784 PF08952 DUF1866 Domain of unknown function (DUF1866) Mistry J, Sammut SJ anon pdb_1ufw Domain This domain, found in Synaptojanin, has no known function. 20.40 20.40 20.50 20.50 20.10 20.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.76 0.71 -4.66 6 189 2012-10-02 20:46:34 2006-08-03 15:35:34 6 6 73 2 80 187 1 141.10 47 10.92 CHANGED Dl-lhEVDsppRpsVac-VIuspGPPDuTllVslpS.........s.sEsshFD-sLhspLlppLupaGEVsLVRFVp-pMWVTFtDGpSALssLslsuhpVhG+slpI+LKS.-Wl+tLE-El..psssplshu...ossSsLLu-ssshsss-a ............................-l-l.EV-sptRppVapEVhuhQGP.DuTVlVslpS...............oh.Eps.F.s-sLhsELhQphus.aG.p.ll.Ll.R.h.....sp.s...pM.....hVTFh-upSALsVLs.lsG..hcl.hGR.slpIp.Ko.D..Wl...KsLcEEh...p.phhslu...ossSsLLtEsh-hst...................................................... 1 18 25 47 +8785 PF08953 DUF1899 Domain of unknown function (DUF1899) Mistry J, Sammut SJ anon pdb_2akf Domain This set of domains is found in various eukaryotic proteins. Function is unknown. 20.70 20.70 20.70 20.80 20.40 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.05 0.72 -4.33 33 702 2009-01-15 18:05:59 2006-08-03 15:39:09 6 34 265 2 401 638 1 64.30 49 12.32 CHANGED +hVRsSKaR...HVFGpssK+EpCY-sl+locssWDoshsulNPKalAVsh-uuGGGuFhVlPlsc.sG .........phVRsSKFR............HVFGp.ss.Kp...-pCY-sl.+.l..S+s.s..W.D.o.sFCA..............VNP+FlAllhEuuGG.G..AFhVlPLpcoG........ 0 119 174 282 +8786 PF08954 DUF1900 Domain of unknown function (DUF1900) Mistry J, Sammut SJ anon pdb_2akf Domain This domain is predominantly found in the structural protein coronin, and is duplicated in some sequences. It has no known function [1]. 21.20 21.20 21.20 21.60 20.90 20.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.62 0.71 -4.64 64 952 2012-10-05 17:30:43 2006-08-03 15:41:35 6 51 276 2 567 880 1 128.00 42 25.62 CHANGED Plss.hslDsusGlLhPaYDpDosllYLsGKGDusIRYaEl..ss-...t.hlphlspapSssPp+.........................Ghu.hhPKpulDVpcsElsRhh+lsss..s......lcPlSahVPR+.u.-hFQcDlYPsohus.cPuloAc-Wh.sGpssp..PhhlSl ......................lt..plDs.Ss.GlLhPaYDsDosllYLsGK......G..DusIRYaEl.........ss-..........tPalph.Lsp.a..p.S...t.c.PQ+.........................G.hu.hhP..KRu..l-VpcsE.ls.RhaKLpppp......lEPIuhhVPR+.........u....-hFQ-Dl.....YPsTsus.cP.u.ls.AcEWhsGpstt..PhhlSL..................... 0 177 252 404 +8787 PF08955 BofC_C DUF1901; BofC C-terminal domain Mistry J, Sammut SJ, Bateman A anon pdb_2bw2 Domain The C-terminal domain of the bacterial protein 'bypass of forespore C' contains a three-stranded beta-sheet and three alpha-helices. Its exact function is, as yet, unknown [1]. 20.60 20.60 20.60 25.20 19.80 20.00 hmmbuild --amino -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.37 0.72 -3.93 10 152 2009-01-15 18:05:59 2006-08-03 15:43:03 5 2 152 1 28 109 0 75.70 57 41.96 CHANGED DDISPLsKsNGYhGlo-DGllSlFcG+Pcps.chIQSFFQIDlp+LES+hpcpLc+GIPl+TKpcaccVIEth+sY ..........DDISPLLKssGYFGVS--GlLpIF+GsP...cs-..psI+SFFQIDh+K....LE....Sa....c....+....s+LK+GIRI+SKEtFscsIE+hKpY.... 0 9 18 20 +8788 PF08956 DUF1869 Domain of unknown function (DUF1869) Mistry J, Sammut SJ anon pdb_1nei Domain This domain is found in a set of hypothetical bacterial proteins. 21.30 21.30 21.50 21.30 20.20 20.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.87 0.72 -4.37 3 423 2009-01-15 18:05:59 2006-08-03 15:45:07 5 4 421 2 17 63 0 58.90 87 91.84 CHANGED pscGpFLLTVTNNNNGVSVDK-FSoLAsL+DPplAAEoVKDLVNIVRGYDoDEETNVCGW ......MGKATYTVTVTNNSNGVSVDYETETPMTLLVPEVAAEVIKDLVNTVRSYDTENEHDVCGW......... 0 1 5 11 +8790 PF08958 DUF1871 Domain of unknown function (DUF1871) Mistry J, Sammut SJ anon pdb_1u84 Domain This set of hypothetical proteins is produced by prokaryotes pertaining to the Bacillus genus. 19.60 19.60 20.80 19.70 19.20 18.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.61 0.72 -3.91 7 175 2009-09-11 15:05:30 2006-08-03 15:49:52 5 2 170 1 22 97 0 78.70 47 89.54 CHANGED ptMlclltpWDPFphGc-aY-TEsuDVlpAlash-DPcpLA+pIQpIYEhSFEphlPlEsCp+lAtpLLhIKpuuSCoh ......pMlcllcsWDP...Fp.....h....G....s......-....FYETEAuDVVsllpshD..Dsc..h..lAKcIQ+IYhhSFE..E...s....s..l..EcC.E..K..lAhcLLsIK-uuSCo.L.................... 1 5 12 16 +8792 PF08960 DUF1874 Domain of unknown function (DUF1874) Mistry J, Sammut SJ anon pdb_2blk Domain This domain is found in a set of hypothetical viral and bacterial proteins. 20.30 20.30 22.00 21.30 19.50 19.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.12 0.72 -4.29 12 35 2009-09-12 20:42:06 2006-08-03 15:52:21 5 3 35 6 11 38 1 104.10 29 78.08 CHANGED Mt......lalhNuhhhsh..stpshhphccIshpEsKphl....psppFlSAIGHcuTAplloplhsssl.hNRlplphp.GD+AlshhLppRl.EGpVL.opcElc......cIuaphhlh ...................lYllNuhslsh...sp....shhchcclsh.cEscphl.....pspphlSA.IGHcuTApllspLL.s.........ssl.h..NR..lplchptGDcslshp.LppRl.EGpll.stcElp......clta.hh..h................... 0 7 9 9 +8793 PF08961 DUF1875 Domain of unknown function (DUF1875) Mistry J, Sammut SJ anon pdb_2crb Domain The MIT domain, found in Nuclear receptor-binding factor 2, has no known function. 25.60 25.60 25.70 25.70 25.50 25.50 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.69 0.70 -5.06 3 73 2009-01-15 18:05:59 2006-08-03 15:53:06 5 3 39 1 40 59 0 206.80 62 81.58 CHANGED MKLTESEQAHLSLELQRDSHMKQLLLIQERWKRAKREERLKAQQuTE+DAAsHLQssH+PSsEDAEGQSP.LSQ.Y.PSTE+pLPElQGlFDRDPDTLLYLLQQKuEPuEPCIGSKAPKDDKTIIEEQATKIADLKRHVEFLVAENERLRKENKQLKAEKARLLKGshEKELDVDADFVEpSELWuLPsHuEoAsASSTWQKFAANTGKAKDIPIPNLPPLDFPSPELPL.....MELSEDILKGFMND ....................MKLTQSEQA+LSLELQRDSHMKQLLLIQERWKRApREERLKAQ...Qs...sD+-ssspLQsSt+PuuEDu-ups...P.l...s...pcaSPSsE+pLsElpGlFDRDPDTLLaLLQpKp.....E.....PsE.....PC.IGSKAPKDDKTIIEEQATKIADLKRHVEFLVAENERLR+ENKQLKAEKARLlKGPhEKELDVDADFVEpSELWuLPP+S-oAsu.S..uoWQKFA.usoGKA.KDIPIPNLPPLDFPSPE......LPL.....hELSEDILKGhMs................................................. 0 2 5 20 +8794 PF08962 DUF1876 Domain of unknown function (DUF1876) Mistry J, Sammut SJ anon pdb_2fgg Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 38.90 38.50 23.90 22.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.73 0.72 -4.20 6 190 2009-01-15 18:05:59 2006-08-03 15:54:17 6 2 118 1 53 125 0 86.30 47 88.10 CHANGED HVhpsWpVDlpI-EHDp+TRAKARLR.WcG+ElVGVGLARLcPADcsVPcIGDELAlARALSDLApQLLsloocDIEAuTHQPu+sLa .......hppWpl-l..I.-E+.....-.....t.....p.TRA+ARLc..h.....ss..p....pls..GsGlARhsPuDpsVPcIGDELAlARALuDLAppLlphuspDIEA..s.THpPsphl........... 0 16 44 50 +8795 PF08963 DUF1878 Protein of unknown function (DUF1878) Mistry J, Sammut SJ anon pdb_1sed Family This domain is found in a set of hypothetical bacterial proteins. 20.90 20.90 20.90 22.50 20.60 20.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.50 0.71 -3.86 5 133 2009-01-15 18:05:59 2006-08-03 15:55:35 5 1 128 3 17 68 0 110.50 60 97.71 CHANGED MpSlEcRlu+LEYYlcLLlcslDM-KYPFYuLlI+KsLoKEEu-Elt+lCcELu-EhEsQKAQGaVhFDcLLsLFAGQLsEKL-VHETIFALacQGLFpPLMsEFIoII+paD ...M.DVV+RLEQAEYYV-LLFKMI...DEEK..CPFYSLIIKKKARKKDIERILsLCEpLNEQYlsEKAEGLLLFDALLDQFEKALPHQLEVHETAEALtKQGLFpPLMNEFLsMIA+..c..................................... 0 1 7 10 +8796 PF08964 Crystall_3 DUF1881; Beta/Gamma crystallin Mistry J, Sammut SJ, Eberhardt R anon pdb_1yhp Family This family of beta/gamma crystallins includes the N-terminal domain of Dictyostelium discoideum Calcium-dependent cell adhesion molecule 1 (Swiss:P54657), which mediates cell-cell adhesion through homophilic interactions [1]. 26.70 26.70 26.70 27.00 26.60 26.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.86 0.72 -4.26 17 98 2012-10-01 23:14:22 2006-08-03 15:57:29 5 9 60 8 21 97 3 87.90 34 21.43 CHANGED .sstshFappKNapGpuapYspussl....h.h.sspLND+FhSVclGstscVhhWcH.......pstshhhchssspsDlop.lsGLS+FpVhs ................tVCFYt-cNasG-SlChspGppl...........tsh.ssphND+lpSIpIPtuhpVTlYEcs....satGt.hshptshs...l.......................hs.p.s.h..................... 0 7 16 18 +8797 PF08965 DUF1870 Domain of unknown function (DUF1870) Mistry J, Sammut SJ, Bateman A anon pdb_1s4k Domain This domain is found in a set of hypothetical bacterial proteins. It contains a helix-turn-helix domain so may be a DNA-binding protein. 23.60 23.60 24.10 24.40 22.90 23.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.66 0.71 -4.44 4 448 2012-10-04 14:01:12 2006-08-03 15:59:13 5 2 411 2 9 103 0 104.80 77 97.79 CHANGED MNshELQALR+IhhhsIsEsAphIupsssSpTWQpWEsGclsIPs-l.tclhphpphRp+hlstllccls.......NtlGp.ThRaasDLpuFpphYs-usalcW+lYQSVsucLaAcsh-.cLs ......MNAYELQALRHIFAMTIDECATWIAQTG......sSESWRQWENG+CAIPDpVVEQLLAMRQQRK++lpAIl-KIN.......NRI..G...NNT.MRFFPDLTAFQpVYPDGNFIDWKIYQSVAAELYAHDLE.RLC........................................ 0 2 4 6 +8798 PF08966 DUF1882 Domain of unknown function (DUF1882) Mistry J, Sammut SJ anon pdb_2atz Domain This domain is found in a set of hypothetical bacterial proteins. 20.50 20.50 21.80 54.40 19.10 18.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.46 0.72 -4.01 10 188 2009-09-11 17:04:33 2006-08-03 16:02:25 6 1 188 1 17 76 0 72.60 66 40.34 CHANGED MoshDLsLIKh.TsHYYIKRDsIlsKIca+GRpFasKFERIDAPLohsllpcHhc+cIslAHSLIs.+sDKVEN .........hosMDhSLIKIIoDHYYI+RDpIspKITH+GRlFFDKFERVDAPLNhNlM+EHssKKIlVAHDLIo.KDNKVEN..... 0 5 14 17 +8799 PF08967 DUF1884 Domain of unknown function (DUF1884) Mistry J, Sammut SJ anon pdb_1she Domain This domain is found in a set of hypothetical bacterial proteins. It shows similarity to the N-terminus of ATP-synthase. 28.60 28.60 28.80 72.50 27.90 28.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.75 0.72 -3.92 7 26 2009-01-15 18:05:59 2006-08-03 16:05:13 5 1 14 1 21 30 0 87.80 46 89.25 CHANGED Mss..ps.hlcIls.lEptIsELK.-Ga-PDlILsG.Ehhcahsc..........shhph.pl+VhhlEELGsDAllsDSKhLG.l.tAuKRIpI.P .........ps.hlclLshlEcphNELKh-GaEPDllLsG.Ehhcalsp..........chhchssLKlhll-ELGsDAVlsDSKhLG.lhtAuKRIpI.... 0 2 2 11 +8800 PF08968 DUF1885 Domain of unknown function (DUF1885) Mistry J, Sammut SJ anon pdb_1t6a Domain This domain is found in a set of hypothetical proteins produced by bacteria of the Bacillus genus. 25.00 25.00 29.60 118.30 20.60 19.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.66 0.71 -4.47 4 111 2009-09-11 06:05:25 2006-08-03 16:06:35 5 1 111 1 11 61 0 131.20 70 99.20 CHANGED M.ppAaIKLV.pSsppoloI-DVKcLhchYKslTuKTG-QlsaAYspsAFPYEIh-pot...osLhLQSsc-RYssIhlGVs.........oEp-QoFIQloLPssATaGDKGKANEFs+FLAKKLpGELQLFNGRTMYFhpR ....MQHAFITLVPKSN.QQSVSIDDIKQLFHYYKTVTSKTGsQINYuYTNTAFPY-ILDTSs......TTLKLQSoH.DRYDSIYlGVG..........I.Ep....E...QS.....aIQlSLPPNATFGDKGKANEFCRFLAKKLEGELQLFNGRTMYFYKR. 0 3 6 7 +8801 PF08969 USP8_dimer DUF1873; USP8 dimerisation domain Mistry J, Sammut SJ, Bateman A anon pdb_2a9u Domain This domain is predominantly found in the amino terminal region of Ubiquitin carboxyl-terminal hydrolase 8 (USP8). It forms a five helical bundle that dimerises [1]. 20.90 20.90 20.90 21.00 20.70 20.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.35 0.71 -4.13 25 410 2009-01-15 18:05:59 2006-08-03 16:08:08 6 16 194 4 269 390 3 106.40 26 18.47 CHANGED uphK.Lahu....sslccLpchsphh.pstpstshph.....hhpoApKlhcpA-cath-GD-EpAYlhYMKahslh.ptI+c+sDYpppcsph+phLGssphpchhhsp....hEcLppSLppRYcpt .....................t..............plctLsphu.ps...h.ph...s...pshsh..+p........YhRousclh+tAphYtpEGshEpAYlLYh+ahsLhhcplspH.Da+..p.st...s..+p........th....p......p....hpch....h..h.c.........hEpL+...tpLht+Yp.............................................. 0 71 118 194 +8802 PF08970 Sda Sporulation inhibitor A Mistry J, Sammut SJ anon pdb_1pv0 Domain Members of this protein family contain two antiparallel alpha helices that are linked by a highly structured inter-helix loop to form a helical hairpin; the structure is stabilised by numerous hydrophobic and electrostatic interactions. These sporulation inhibitors are antikinases that bind to the histidine kinase KinA phosphotransfer domain and act as a molecular barricade that inhibit productive interaction between the ATP binding site and the phosphorylatable KinA His residue. This results in the inhibition of sporulation (by preventing phosphorylation of spo0A) [1]. 21.10 21.10 21.50 27.00 20.50 18.10 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.06 0.72 -4.47 18 164 2009-01-15 18:05:59 2006-08-03 16:19:17 5 1 141 4 36 93 1 45.20 58 91.48 CHANGED Mcp...LSDELLhESYaKApELpLsPDFItLIcpEIhRRSLpcKlshSS ......Mcp...LSsELLhESYaKApEL+LssDFIhLIcpEIhRRSLccplshSS........ 0 11 26 28 +8803 PF08971 GlgS Glycogen synthesis protein Mistry J, Sammut SJ anon pdb_1rrz Domain Members of this family are involved in glycogen synthesis in Enterobacteria. The structure of the polypeptide chain comprises a bundle of two parallel amphipathic helices, alpha-1 and alpha-3, and a short hydrophobic helix alpha-2 sandwiched between them [1]. 24.20 24.20 24.40 24.20 23.90 24.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.28 0.72 -4.05 4 480 2009-09-13 10:19:12 2006-08-03 16:20:08 6 1 476 1 21 81 0 65.20 75 97.93 CHANGED M.D+slYShsNFDFLApSFARMpuEGR.VDItAVTGNMsEuppsWFpcRYuaYpQQhhQtpthplEH ...........M.ccsl.SLNNFDFLARSFARMHAEGRPVDIhAVTGNMDEEHRTWFCARYAaYCQQhhQARELELEH.......... 0 1 4 12 +8804 PF08972 DUF1902 Domain of unknown function (DUF1902) Mistry J, Sammut SJ anon pdb_1wv8 Domain Members of this family of prokaryotic proteins adopt a fold consisting of one alpha-helix and four beta-strands. Their function has not, as yet, been elucidated [1]. 29.40 29.40 29.80 29.50 29.30 28.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.71 0.72 -4.41 8 55 2012-10-02 16:06:15 2006-08-03 16:21:41 6 1 52 1 31 65 2 53.10 39 65.57 CHANGED lpIQAsWDsEAGVWVApSDDlPGLlTEA-TlEtLhcKlpsMlPDLLp-Nusspt .............htlcshWDpEAsVWVApSsDl...PGLsTEAsTl-sLhpKlpshIs-LLphNt....t....... 0 7 16 21 +8805 PF08973 TM1506 DUF1893; Domain of unknown function (DUF1893) Mistry J, Sammut SJ, Iyer, LM anon pdb_1vk9 Domain A member of the deaminase fold that binds an unknown ligand in the crystal structure. The protein is ADP-ribosylated at a conserved aspartate [1]. Contextual analysis suggests that the domain is likely to bind NAD or ADP ribose either to sense redox states or to function as a regulatory ADP ribosyltransferase [2]. 20.50 20.50 20.80 29.90 18.30 20.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.56 0.71 -4.62 9 122 2009-01-15 18:05:59 2006-08-03 16:24:45 5 3 117 1 18 93 1 129.50 36 76.92 CHANGED M-p.....hhclLccGGYShVlhp-pc.lpT.spRGltsLh-L...lpppsp.hcGAtlADKVlGKAAAhLhlhGGl+plYAclISpsAlclLcpssIcssYschVPaIhNRstoGhCPlEphsh-l-sscchaclIppFlpK ...........................p.....hhphLpptshohslhpc.pc..lho.ppRGltsLh-l....lspt.p....hcGup.lsDKVVGKAAAslhlhuGlppVaAsllSpsAhclLcptuIcVsapchVshI..s.RstsshCPhEshs.pl-ssEEhhthl.t........... 1 10 16 16 +8806 PF08974 DUF1877 Domain of unknown function (DUF1877) Mistry J, Sammut SJ anon pdb_1ryl Domain This domain is found in a set of hypothetical bacterial proteins. 22.00 22.00 22.80 22.40 21.90 21.50 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.06 0.71 -4.59 16 510 2009-01-15 18:05:59 2006-08-03 16:27:53 5 2 476 2 63 275 3 158.20 50 98.00 CHANGED MGMhspYhplsccclpcLh..st............stpshhphht-htpsp...........................phDl-KtWcsLHalLTGsss..........ppssPlu.slhGs..psht.-ss.s..sphhsssclpplsctLpslshcpLhppFshpphppsclYPs.h.p.pptcp-hh-plhphatcLpcFapcsucpspslLlhI .....................................................................................................MGMIGhascIcsEclspLL....co.................................sccsLhD.sIcD...shsshc.........................................cLDIDKpWDhLHFsLTGoSAh.............-PscNDPLS+AVLGE...........cSL.....E..Du...l...DG..F...luLThspElAusl-+LEuLDcsELRKp......F......Sl...........K+Ls......Eh.E......I......YPG..l........shsEEh.E..up..l...F..us..Ihh.chEKLluhY++hL+pGspsLssI............................. 0 28 46 56 +8807 PF08975 2H-phosphodiest DUF1868; Domain of unknown function (DUF1868) Mistry J, Sammut SJ anon pdb_2fsq Domain This group of 2H-phosphodiesterases comprises a single family typified by the protein mlr3352 from M.loti. Members are also present in various alpha-proteobacteria, Synechocystis, Streptococcus and Chilo iridescent virus. The presence of a member of this predominantly bacterial group in a large eukaryotic DNA virus represents a potential case of horizontal transfer from a bacterial source into a virus. Several proteins of bacterial origin have been noticed in the insect viruses (L.M.Iyer, E.V.Koonin and L.Aravind, unpublished observations and these appear to have been acquired from endo-symbiotic or parasitic bacteria that share the same host cells with the viruses. Presence of 2H proteins in the proteomes of large DNA viruses (e.g. T4 57B protein and the Fowl-pox virus FPV025) may point to some role for these proteins in regulating the viral tRNA metabolism. Each member of this family contains an internal duplication, each of which contains an HXTX motif that defines the family. 21.00 21.00 21.30 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.52 0.71 -4.34 13 265 2012-10-03 21:31:48 2006-08-03 16:30:44 5 1 258 1 37 106 81 117.50 61 51.47 CHANGED KFspsGchhPhsGNTlVCHL-psusshp...slLshppphhshshAs+hsFhPsSShHMTlFpGlh-pcRpsshWPucLPlDsslschsshatpRLcsFsh..sssFphtl......ssspP.tulhL ...KFKENGEFNHFPGNTVVAN..LYTK..Q.DLME....VVDIIQSRYRELP..FI.DK.FT.L.TPRNSIHMTVIELLCHENRETEFWSSNLPLDTPLQEIHDYFAKQLEIFPL......LDEEIHMRl......TEMGK.QNIL.V......................................... 1 5 17 22 +8808 PF08976 DUF1880 Domain of unknown function (DUF1880) Mistry J, Sammut SJ anon pdb_1wlz Domain This domain is found predominantly in DJ binding protein. It has no known function. 21.30 21.30 22.30 21.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.54 0.71 -3.97 2 37 2009-01-15 18:05:59 2006-08-03 16:31:47 6 21 27 4 18 46 0 116.30 52 9.47 CHANGED lQILTDEQFDRLWsEhPVNAKGRLKY.DFLS+hS.Epsso.PMAsGD...SshAQRGSSsP-hSpGTRSsL..Psp-.RsG.KSpSHPCTPs......GTPPLQNC-PIESRLRKpIQGCWR ..............................................................................................................lQlLTDEQFDRLWsEMPVNAKGRLKY.DFLS+FSoEcs.s....T...Ph..AsGD.......Ssh.AQ.RGSSV.P-lS...pusR...SAlSs.Psp-hRsG.KspS...H.P.CT.....Pustt..shsGoPPLQNC-PIES+LRK+IQGCWR...................................... 0 3 3 6 +8809 PF08977 BOFC_N Bypass of Forespore C, N terminal Mistry J, Sammut SJ anon pdb_2bw2 Domain The N-terminal domain of 'bypass of forespore C' is composed of a four-stranded beta-sheet covered by an alpha-helix. The beta-sheet has a beta2-beta1-beta4-beta3 topology, where strands beta1 and beta2 and strands beta3 and beta4 are connected by beta-turns, whereas strands beta2 and beta3 are joined by an alpha-helix that runs across one face of the beta-sheet. This domain is similar to the third immunoglobulin G-binding domain of protein G from Streptococcus, the latter belonging to a large and diverse group of cell surface-associated proteins that bind to immunoglobulins. It has been hypothesised that this domain may be a mediator of protein-protein interactions involved in proteolytic events at the cell surface [1]. 25.00 25.00 26.80 26.40 24.70 20.60 hmmbuild --amino -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.36 0.72 -4.15 10 128 2009-01-15 18:05:59 2006-08-03 16:36:58 5 2 128 1 18 85 0 50.80 61 29.42 CHANGED lollLE+lYLDGEVSEEp+sETVhohEDFWupY+sWpLV-hc-splVFRKp ..VTILLERMYVDGEVSEEIhTEKVssLEcFLQQYKEWQLVDRDDsQIVLQKK.. 0 3 10 12 +8810 PF08978 Reoviridae_Vp9 Reoviridae VP9 Mistry J, Sammut SJ anon pdb_1w9z Domain This domain is found in various VP9 viral outer-coat proteins. It has no known function. 19.00 19.00 28.90 21.30 17.60 16.50 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.84 0.70 -5.44 2 12 2009-01-15 18:05:59 2006-08-03 16:39:53 5 1 5 3 0 10 0 187.80 64 93.29 CHANGED MlS-s.lRuhK+LuhpTpRssG-pThtLsSpVKLSKGEVEhlAVTKcEhh-tLtQCNL.plE.lsh-tTFNGslhRhuAahFlhpu.hlhhspslAVp.s.pYuTslAG.shphT.VhssphhhhtplstGs.usPaSspsuuLhIT.thsLhss.l.sGplhVLF.TSL.TThspoNSaAYShCslPhpcWD.phIKLTuETSCsSLsuMpsLsNSll.G-Rs.ssGLYVDI.GVTVoTSsS.uoLPlTslssshPlhFpAhs+.VEpVu.INhLYsLu .......................................................................................................................................................................................................................WDFNMIKLTAETSCsSLTtMTNhlNoLV.GDRsRPVGLaVDIPGVTVTTSASluoLPITTIPAsTPLIFSAYhKQVEEVGlINTLYuLS... 2 0 0 0 +8811 PF08979 DUF1894 Domain of unknown function (DUF1894) Mistry J, Sammut SJ anon pdb_1z9v Domain Members of this family have an important role in methanogenesis. They assume an alpha-beta globular structure consisting of six beta-strands and three alpha-helices forming the secondary structural topological arrangement of alpha1-beta1-alpha2-beta2-beta3-beta4-beta5-beta6-alpha3 [1]. 25.00 25.00 26.20 59.70 23.60 16.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.05 0.72 -3.93 20 55 2009-01-15 18:05:59 2006-08-03 16:42:58 6 2 46 1 42 54 1 91.30 39 87.56 CHANGED M.u.Cl-sh.....sYEILL+psoFKECc-aI+cpscElYclpPGaclh.GlhlIGhsPIPVGlcs..s.slIFPYTKPC..aGoFVl+l..pss.-Elc+lRc ...Mu.Cl-ph.....sYEILL+stoFKECp-aI+cshcElhclsPGhclh.GlhlIGlPPIPlGl-s...s.pllFPYTKPC..aGTaVl+l..pss.cElc+lR.............. 0 10 27 35 +8812 PF08980 DUF1883 Domain of unknown function (DUF1883) Mistry J, Sammut SJ anon pdb_2b1y Domain This domain is found in a set of hypothetical bacterial proteins. 21.00 21.00 21.50 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.15 0.72 -4.45 10 161 2009-01-15 18:05:59 2006-08-03 16:45:37 5 3 155 1 48 116 3 87.50 35 79.80 CHANGED M+Fs.Y-hcph+cG-llsloLs.pssNVRLMssuNFppF+NstcapY.hGGhsc+SPs+IsVPSoGaWhlllDh..sGp+Ghlsuol+...s..th..clhR .........hpah.aphtphptuDlVVlcCS.ppCNI+LMsssNFppa+sGscasY.hG..Gshc+.PA+lsVPpoGaWslsIDo..tupp...hphohp.............hh............... 0 9 16 33 +8814 PF08982 DUF1857 Domain of unknown function (DUF1857) Mistry J, Sammut SJ anon pdb_2ffs Domain This domain has no known function. It is found in various hypothetical bacterial and fungal proteins. 21.30 21.30 24.70 24.50 21.00 20.30 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.68 0.71 -4.61 17 171 2012-10-02 19:24:03 2006-08-03 16:54:50 6 4 160 2 79 146 10 151.00 33 88.30 CHANGED papahl.lNsPss.thssLTRpQlWpGLlh+ucpPp.Fls.ul.-pCpllpcs........sstltRcLpa.........GsthlcDpVpht..pplca.....tptsGuolshhI-.......-sssusLaLRFhYchphs.hpssut-.cth..p-hh+pua+tushcTl+hIRphstsGp ...............pFEHLlplNssss..thssLTRsQlWpGLVhRAcpPphFV..GL.-sCtlhpcs.............sstlcR-Lpa................Gps.sl+DcVThpssp.pVcaph..sssttsGu...oLohsIE.......E.s.-..st.pLFlRFtYcTsls....h.pss.ot-tpph..pphlKpAY+puDl-Tl.chIR-hstt.............................. 0 10 41 64 +8815 PF08983 DUF1856 Domain of unknown function (DUF1856) Mistry J, Sammut SJ anon pdb_1ytv Domain This domain has no known function. It is found in the C-terminal segment of various vasopressin receptors. 25.00 25.00 30.70 29.60 24.10 24.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.47 0.72 -3.87 9 110 2009-09-11 07:47:04 2006-08-03 16:57:04 5 1 65 0 32 102 0 46.40 58 11.36 CHANGED shpKEDSDSSh..RRpT.hT+h.ssRoPosuh......ssWK...sSPKSScSl+hl.hpp .cFsK-DSDShS..RRQT.ao...NNR.SPTNSo......GhWK...-SPKSS+Sl+FlPlsT............. 0 1 4 14 +8816 PF08984 DUF1858 Domain of unknown function (DUF1858) Mistry J, Sammut SJ anon pdb_2fi0 Domain This domain has no known function. It is found in various hypothetical bacterial proteins. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.71 0.72 -4.31 33 1114 2009-01-15 18:05:59 2006-08-03 16:58:53 6 29 835 3 232 793 11 56.80 29 33.48 CHANGED Ishshslt-llcpaP-sl-lhhph..GFpplssPshhpohu+hhTLcpuuphtslslsplh ...............Ishshslu-llcpa....P....-..ll.-lLhch...GhcsLusPh.....u.phhoLcpuuphcGlsh-clh.......... 0 110 183 204 +8817 PF08985 DUF1888 Domain of unknown function (DUF1888) Mistry J, Sammut SJ anon pdb_1ai4 Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 31.70 31.40 23.10 22.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.54 0.71 -4.44 4 33 2009-09-11 00:41:11 2006-08-03 17:02:49 6 1 32 21 10 34 5 121.90 49 94.95 CHANGED Mpss.spspaIpVTVTLE.NGEPVFsYTsApGt.p.GDVslTpuuT.ITY.LpDQTGKGLKFVGAuFhTPFDplIDAVplSoDGpLlpLsDLDcssGsTpFQFVLoNouNTLhlLSPDPplIN+sp ........M.ss.t.upalpVsVTLE.NGEPV.F.YTcss.Gc..ts.sGDVTlTpuuT.lTYhL..sDpT.G.K.GLKFVGsGFl.....TPFDp....ll....D..AVTlSoD..GhLlQLVDhDcoPGoTKFQFVhoNosNTLllLSPDPplIN+s.p.. 0 2 3 5 +8818 PF08986 DUF1889 Domain of unknown function (DUF1889) Mistry J, Sammut SJ anon pdb_2es9 Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 53.20 53.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.73 0.71 -3.92 2 354 2009-01-15 18:05:59 2006-08-03 17:04:34 5 1 350 2 10 76 1 99.00 87 99.77 CHANGED MQIKVIYSLIDNMVNFKDKNMPAVIDKALDFIGAMDVSAPTPSSMNESTAKGIFKYLKELGVPASAADITsRAD.EGWNPGFTEKMVGWAKKMEoGER.VIKNPEYFSTYMQEELKALV ...................................MPAVIDKALDFIGAMDVSAPTPSSMNESTAKGIFKYLKELGVPASAADITARADQEGWNPGFTEKMVGWAKKMEoGERsVIKNPEYFSTYMQEELKALV....... 0 1 1 6 +8819 PF08987 DUF1892 Protein of unknown function (DUF1892) Mistry J, Sammut SJ anon pdb_1n6z Family Members of this family, that are synthesised by Saccharomycetes, adopt a structure consisting of a four-stranded beta-sheet, with strand order beta2-beta1-beta4-beta3, and two alpha-helices, with an overall topology of beta-beta-alpha-beta-beta-alpha. They have no known function [1]. 19.90 19.90 21.10 60.70 19.10 17.50 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.58 0.71 -4.07 9 44 2009-01-15 18:05:59 2006-08-03 17:08:24 5 2 42 1 28 38 0 110.00 45 85.71 CHANGED Must..............-NshRhlllL-cs....ppcpsptDc+...........hlDE......VpphDplNpaFDKFDEcIsIPNEGHIKYEluSDGLlVlIlD.KEl.-cVlshVcsasppsp.cppctpD ................h.ut..............sNsaRhllLLE-...................cp..c.....s...c.-E+................p-FlDE.....tlsphDplNpWFDKFDpcICIPNEGHIKYEluSDGLlVlllD.+El.--VlchVccaVccs.........p.............. 0 3 14 25 +8820 PF08988 DUF1895 Protein of unknown function (DUF1895) Mistry J, Sammut SJ anon pdb_1zw0 Family The YscE protein, produced by the pathogen Yersinia, assumes a secondary structure composed of two anti-parallel alpha-helices separated by a flexible loop. The function of this protein is, as yet, unknown [1]. 21.80 21.80 22.30 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.19 0.72 -3.94 5 99 2009-01-15 18:05:59 2006-08-03 17:10:32 5 1 94 16 12 42 2 66.40 37 86.26 CHANGED MTsLEppLpu.ussppVcsIpp+LpQAQucVKRQLcpGGsPQQYQlWp+Qu-AlpAAlsIIcTlEucpK .......MTpLEphLps..sspps+uIphpLctAhspl++phs+GssPpQYQ.hppphcAlEuAhsIIphht............ 0 4 6 7 +8821 PF08989 DUF1896 Domain of unknown function (DUF1896) Mistry J, Sammut SJ anon pdb_2apl Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 26.30 26.10 24.10 24.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.80 0.71 -4.32 7 270 2009-01-15 18:05:59 2006-08-03 17:11:53 5 1 103 1 19 217 4 140.40 50 90.90 CHANGED hsppppELSYa+LtLLsYL+EsHPchAGDpsFIppRu-pAAcsYpcAltpGhshstAtphAptlLhpGLHFS+YDslhpVl.sEFtsEVPptptcshsLpLL..P.scsVFs+YslsDD.FphSPpYcpLYsELTGsIhhhlEc.Gl ........................h.ppspp-hSYatLpL.sYLp-p+PEphsD......psFIptRADpAhpsYpcAhtpGastspApphAp-lLapGLHFS+YDTLhpVlENEFpcE....lPsshsEphs.hLL....hlpsVFs+Y-..LoDD.FAhos-Y-pLYTELTGslVLhIEppGl.......... 0 8 15 19 +8822 PF08990 Docking Erythronolide synthase docking Mistry J, Sammut SJ anon pdb_1pzr Domain The N terminal docking domain found in modular polyketide synthase assumes an alpha-helical structure, wherein two alpha-helices are connected by a short loop. Two such N-terminal domains dimerise to form amphipathic parallel alpha-helical coiled coils: dimerisation is essential for protein function [1]. 20.90 20.90 21.00 21.30 20.20 20.70 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -7.08 0.72 -4.70 141 653 2009-01-15 18:05:59 2006-08-03 17:16:00 6 165 190 6 148 643 1 26.80 48 0.94 CHANGED MssE-KLh-YLKRlTuDL+psRpR.L+- .....ss--KLh-YLKRlTuDL+coRpRLpE.... 1 24 106 140 +8823 PF08991 DUF1903 Domain of unknown function (DUF1903) Mistry J, Sammut SJ anon pdb_1hp8 Domain Members of this family adopt a coiled coil structure, with two antiparallel alpha-helices that are tightly strapped together by two disulfide bridges at each end. The protein sequence shows a cysteine motif, required for the stabilisation of the coiled-coil-like structure. Additional inter-helix hydrophobic contacts impart stability to this scaffold. The precise function of this eukaryotic domain is, as yet, unknown [1]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.78 0.72 -3.82 7 194 2012-10-02 15:44:21 2006-08-04 08:57:27 5 3 149 3 132 223 0 59.70 30 66.44 CHANGED psPCptpACAIQsCLptstYsEu+CtslI--LhpCCtpa......cu+SlCCst.pl.....lphc...hpptpK ...........sPCptpAC...t.lQ.pCLp...cNsY.....scs+CpphlptlpcCCppa.t.......ptpo..Cs..t......................t............................. 0 33 70 107 +8824 PF08992 QH-AmDH_gamma Quinohemoprotein amine dehydrogenase, gamma subunit Mistry J, Sammut SJ anon pdb_1jmx Domain Members of this family contain a cross-linked, proteinous quinone cofactor, cysteine tryptophylquinone, which is required for catalysis of the oxidative deamination of a wide range of aliphatic and aromatic amines. The domain assumes a globular secondary structure, with two short alpha-helices having many turns and bends [1]. 25.00 25.00 92.90 92.20 19.80 18.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.98 0.72 -4.22 9 37 2009-01-15 18:05:59 2006-08-04 08:59:52 6 1 32 4 14 41 1 77.80 67 75.10 CHANGED suVVGCToThDPGWEVDuFGGVuuLCQPMEADLYGCuDPCWWPAQVPDThsoYPcWupcAsssspDWRpLsoVFPpsK ..............suVsGCTsThDPGWEVDuFG.GVuSLCQPMEADLYGCSDPCWWPAQVPDhhsoY.DWsspAssuscDWRpLsoVFPcs.... 0 2 9 11 +8825 PF08993 T4_Gp59_N T4-helicase_N; T4 gene Gp59 loader of gp41 DNA helicase Mistry J, Sammut SJ anon pdb_1c1k Domain Bacteriophage T4 gene-59 helicase assembly protein is required for recombination-dependent DNA replication, which is the predominant mode of DNA replication in the late stage of T4 infection. T4 gene-59 helicase assembly protein accelerates the loading of the T4 gene-41 helicase during DNA synthesis by the T4 replication system in vitro. T4 gene-59 helicase assembly protein binds to both T4 gene-41 helicase and T4 gene-32 single-stranded DNA binding protein, and to single and double-stranded DNA. The structure of T4 gene-59 helicase assembly protein reveals a novel alpha-helical bundle fold with two domains of similar size, this being the N-terminal domain that consists of six alpha-helices linked by loop segments and short turns. The surface of the domain contains large regions of exposed hydrophobic residues and clusters of acidic and basic residues. This domain has structural similarity to members of the high-mobility-group (HMG) family of DNA minor groove binding proteins including rat HMG1A and lymphoid enhancer-binding factor, and is required for binding of the helicase to the DNA minor groove [1]. 21.10 21.10 21.80 22.20 20.00 21.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.05 0.72 -3.98 8 64 2009-01-15 18:05:59 2006-08-04 09:01:32 5 2 62 1 0 53 451 91.20 41 44.10 CHANGED lsuhuVY+LYLshKsHhsG.KYDhlKYpWp.hRso-sAFpKR+DKYFFcKLAcKaoLpElhtlhluNhlANscu...WlG-IsssDAhsFYtcalG+hc ...puhsVYplYLhlKpHFss.+YDhlKYshp.h+sS.-suapKR+D+YFFpKLucKaph+..E.Lt.hFluNhVsNscu...WlG-lsstDAhshYp-als+h.p............ 0 0 0 0 +8826 PF08994 T4_Gp59_C T4-helicase_C; T4 gene Gp59 loader of gp41 DNA helicase C-term Mistry J, Sammut SJ anon pdb_1c1k Domain Bacteriophage T4 gene-59 helicase assembly protein is required for recombination-dependent DNA replication, which is the predominant mode of DNA replication in the late stage of T4 infection. T4 gene-59 helicase assembly protein accelerates the loading of the T4 gene-41 helicase during DNA synthesis by the T4 replication system in vitro. T4 gene-59 helicase assembly protein binds to both T4 gene-41 helicase and T4 gene-32 single-stranded DNA binding protein, and to single and double-stranded DNA. The structure of T4 gene-59 helicase assembly protein reveals a novel alpha-helical bundle fold with two domains of similar size, this being the C-terminal domain that consists of seven alpha-helices with short intervening loops and turns. The surface of the domain contains large regions of exposed hydrophobic residues and clusters of acidic and basic residues. The hydrophobic region on the 'bottom' surface of the domain near the C-terminal helix binds the leading strand DNA, whilst the hydrophobic region on the 'top' surface of the domain lies between the two arms of the fork DNA, allowing for T4 gene 41 helicase binding and assembly into a hexameric complex around the lagging strand [1]. 25.00 25.00 64.70 63.90 21.70 21.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.07 0.72 -3.87 10 58 2009-01-15 18:05:59 2006-08-04 09:03:10 5 2 56 1 0 50 390 101.80 38 50.40 CHANGED F+EDl+slhYFucKhsls.hcclFpYspcspoShIFKLlQoshIShETFllLDSFLsIlD+aDc.hssDllWps.apsKlpAY+KLlsIDsc...pAKslFIcslcppK ......................Fc-DlcslhhFucpsthpthcclFp.sscsppshIhKhlQSshIShETFllLDShLshl-caDc.ptsDllWps.auhKlpuY+KlLsIDsp...csKplFl-Tl+sh.... 0 0 0 0 +8827 PF08995 NIP_1 Necrosis inducing protein-1 Mistry J, Sammut SJ anon pdb_1kg1 Domain Necrosis inducing protein-1, a fungal avirulence protein produced by plants, consists of two parts containing beta-sheets of two and three anti-parallel strands, respectively. Five intramolecular disulfide bonds, stabilise these parts and their position with respect to each other, providing a high level of stability [1]. 25.00 25.00 186.20 186.00 20.10 18.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.14 0.72 -3.81 3 16 2009-01-15 18:05:59 2006-08-04 09:05:11 5 1 1 1 0 17 0 82.00 96 100.00 CHANGED MKFLVLPLSLAFLQIGLVFSTPDRCRYTLCC-GALKAVSECLHESESCLVPGDCCRGKSRLTLCSYGEGGNGFQCPTGYRQC MKFLVLPLSLAFLQIGLVFSTPDRCRYTLCCDGALKAVSECLpESESCLVPGDCCRGKSRLTLCSYGEGGNGFQCPpGYRQC 0 0 0 0 +8828 PF08996 zf-DNA_Pol DNA Polymerase alpha zinc finger Mistry J, Sammut SJ anon pdb_1n5g Domain The DNA Polymerase alpha zinc finger domain adopts an alpha-helix-like structure, followed by three turns, all of which involve proline. The resulting motif is a helix-turn-helix motif, in contrast to other zinc finger domains, which show anti-parallel sheet and helix conformation. Zinc binding occurs due to the presence of four cysteine residues positioned to bind the metal centre in a tetrahedral coordination geometry. Function of this domain is uncertain: it has been proposed that the zinc finger motif may be an essential part of the DNA binding domain [1]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.62 0.71 -4.94 40 326 2009-01-15 18:05:59 2006-08-04 09:06:55 5 14 267 7 211 330 1 179.70 29 13.00 CHANGED l-opl.oDsp+a+ssspLplpC..spCspphhFsG...........lhs.......ssstphpssGlpCsp......Cst..hhsshpltsQLphtIRpalshYYpGWllC--ssCss+.TRQlslhu+R....Cl......................sC.+GpMph-Yo-+pLYNQLhYapuLFDl-+shppthp..................pptptcpl.hhspps+phappl+ssV-caLscsGhphVshu.slF ................................................................................sph.sDp.+a+sspthphpC.....p..Ctpp.has.u........................lhp......................tsspt.h..p..ss...h...h..tCsp......Cpt...............hh.s..hplsspLphplRphl.pcYY.pG.WL.lC-DssC...s..p.c...TRp..hsl..hs.p.R..................C.............................sC..pGphp..hcYo-+pLYsQLhYapt.lFDs-pshpph.t............................tttp.ppph..hh.s.tp.p.....thattlpsslc.phLpcsuhphVshsplF................................ 1 74 116 172 +8829 PF08997 UCR_6-4kD Ubiquinol-cytochrome C reductase complex, 6.4kD protein Mistry J, Sammut SJ anon pdb_1sqq Domain The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is an essential component of the mitochondrial cellular respiratory chain. This family represents the 6.4kD protein, which may be closely linked to the iron-sulphur protein in the complex and function as an iron-sulphur protein binding factor [1]. 24.00 24.00 24.10 24.60 23.80 23.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.68 0.72 -4.26 5 96 2009-01-15 18:05:59 2006-08-04 09:08:46 5 2 70 17 55 108 0 53.60 42 80.45 CHANGED Mlo+l.lGsKYsElA+uWlPoussWGuAGGVALVaFTDWRLlLDYVPYlNuKFcKDE ...........hh.+h.hG.+ahplspsWlPosusaGusuuluLlahTD.W..+LILsaVPahsGKFccs...... 0 16 18 33 +8830 PF08998 Epsilon_antitox Bacterial epsilon antitoxin Mistry J, Sammut SJ anon pdb_1gvn Domain The epsilon antitoxin, produced by various prokaryotes, forms part of a postsegregational killing system which is involved in the initiation of programmed cell death of plasmid-free cells. The protein is folded into a three-helix bundle that directly interacts with the zeta toxin, inactivating it [1]. 25.00 25.00 43.50 42.80 20.20 19.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.08 0.72 -4.01 3 79 2009-09-10 22:13:13 2006-08-04 09:09:58 6 2 68 4 1 25 0 85.90 77 95.90 CHANGED AVTYEKTFEIEIINELSuSVYNRVLNYVLNHELDKsDSpLLEVNLLNQLclApcVNLFchSLEELQAlHEYWRSMN+YSKQILsKEKVA .AVTYEKTFEIEIINELSASVYNRVLNYVLNHELsppDopLLEVNLLNQLclApcVsLFp.shEELQAlHEYWRSMNpYSKQlLsKEKVA................... 0 0 0 1 +8831 PF08999 SP_C-Propep Surfactant protein C, N terminal propeptide Mistry J, Sammut SJ anon pdb_2esy Domain The N-terminal propeptide of surfactant protein C adopts an alpha-helical structure, with turn and extended regions. It's main function is the stabilisation of metastable surfactant protein C (SP-C), since the latter can irreversibly transform from its native alpha-helical structure to beta-sheet aggregates and form amyloid-like fibrils. The correct intracellular trafficking of proSP-C has also been reported to depend on the propeptide [1]. 25.00 25.00 28.30 27.80 23.00 21.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.27 0.72 -4.50 3 104 2009-01-15 18:05:59 2006-08-04 09:13:37 5 3 70 7 22 92 0 62.70 80 51.94 CHANGED MDMGSKEVLMESPPDYSAuPRGRFRIPCCPVHLKRLLIVVVVVVLVVVVIVGALLMGLHMSQKHTEMVLEMSIGG.PEsQQRLALSE+sGTTAT ..................................FtIPCCPVpLKRLLIVVVVVVLVVVVIVGALLMGL................................................... 0 1 1 5 +8832 PF09000 Cytotoxic Cytotoxic Mistry J, Sammut SJ anon pdb_1e44 Domain The cytotoxic domain confers cytotoxic activity to proteins, enabling the formation of nucleolytic breaks in 16S ribosomal RNA. The structure of the domain reveals a highly twisted central beta-sheet elaborated with a short N-terminal alpha-helix [1]. 21.10 21.10 21.10 22.90 20.80 20.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.00 0.72 -3.96 5 56 2009-01-15 18:05:59 2006-08-04 09:15:10 5 19 47 6 17 71 1 81.20 38 15.33 CHANGED YHPAPKsptlsGLssL..+.stpKTPhQuGGuKRKRWpssKG....++IYEWDSpHGElEVYc.S+G+HLGShDPpTG-.lKssV+GRoIKt.l .............................t....l.uhsth....st.KTs..hpGGGshRtR...WhspKG......cpIYEWDSQH....G....clEsYc..pcGc.HLGpFDPpTGcpl..Ks.ss..tRplc.h............... 0 4 6 11 +8833 PF09001 DUF1890 Domain of unknown function (DUF1890) Mistry J, Sammut SJ anon pdb_1kjn Domain This domain is found in a set of hypothetical archaeal proteins. 25.00 25.00 136.10 135.90 24.70 19.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.73 0.71 -4.17 14 40 2009-01-15 18:05:59 2006-08-04 09:16:13 6 1 39 2 30 39 2 141.30 40 93.70 CHANGED llllGCPEsPVQhshslYLsptLcctGhclllAuNPAAh+LlcVADP-+aYlcch..h-lDcsltplttt..sh-hhhuFsHNDAulsYssTh.ttl.pscshullFG+c.s--Lscplp.....hssphlsu+AhHNPhPLps+l-clh LllLGCPEsPlQsPhslYLsptL+ccGacVslAuNPAAhKLlcluDPE+aYlppl..h-lDcslpslttt..-hDhlhuFlHNDAuloYhsTa.ptlhpsc.shAlVFG+c.s--Lschlc....thsschlsuRAaHNPtPLps+l-+lh... 0 7 19 25 +8834 PF09002 DUF1887 Domain of unknown function (DUF1887) Mistry J, Sammut SJ anon pdb_1xmx Domain This domain is found in a set of hypothetical bacterial proteins. 23.90 23.90 23.90 23.90 23.60 23.80 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.31 0.70 -6.06 6 279 2012-10-11 20:44:44 2006-08-04 09:20:15 6 6 251 1 59 213 19 305.10 28 86.60 CHANGED MtlHVslls.QcsspLIssLLDpth+sc+hVFIsocsp..+EthpRLcplLp.pGIss....-hFplsstsshptl+pplpsLhE-hKh.ssElhLNsosGh+hhhLuAYEsFR.ShHhPIahl-ssSDclpaL.PsGscptpV.pcpIp..luDYLssaGA......Rst.u-sphscphsc+LhpLupRaAosAh-hGshLuoLNhLAoss..R+tphlslclo-cptuY.+-LshLlsDLs-ssLssa-sGl.lTFtsE-ARRFhNGpWLEhhVaspl+sIpp....lpDhuLsVQVhcchsEKp...VRNELDVVslssNKLHIIECKTtGhccDu.....DDTLYKLESLRD.LGGLpuRuhLVSFRPlpssDlh..RApchsltlIGPDELsDLKcHLpsWhpts .......................................................................................hhs.p....p.............th.s.p..chlhlh...sppt..........hhp.pL.p..p.hhp....h..th...p.............ch.hp.l.s.s..................shptlp...ptl.pp.lh.pphc........sccl..hhNhosGh+h..hhLusa.p.hh.p..php..h..s.....l.......h.hh-spps..p..h.hhl..ts...p....p....p..pl....sp.lp..ltshlshhGs.........p.th...p.........h......pht.p..hsp....h.......p....h...h...l................sph...h.tpps.......p......ph...p.th..ppp...t.hh...t.......p.l....l.s....L....ctt...h...h.....p.....h...p....pt....h.....h.....sF..s....c...p...h...+p......F..h..s..GpW.LEhhl.atpl.pphpp........l..pDhp...hs....l.....pl...h..hp.....h..t..-pc.................lpN....ELDVshh.h.ssp.L.hlIECKo.psh.....p..s-.s.............hLh+..Lpsh..tchhG.G.pscthL..Vs.h...t....hp....s.....h........Rspph...tl..l.l....tp.h..th...l.t....s............................................................................... 1 24 44 53 +8835 PF09003 Phage_integ_N Bacteriophage lambda integrase, N-terminal domain Mistry J, Sammut SJ anon pdb_1kjk Domain The amino terminal domain of bacteriophage lambda integrase folds into a three-stranded, antiparallel beta-sheet that packs against a C-terminal alpha-helix, adopting a fold that is structurally related to the three-stranded beta-sheet family of DNA-binding domains (which includes the GCC-box DNA-binding domain and the N-terminal domain of Tn916 integrase). This domain is responsible for high-affinity binding to each of the five DNA arm-type sites and is also a context-sensitive modulator of DNA cleavage [1]. 21.80 21.80 22.10 23.00 21.40 21.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.61 0.72 -4.27 17 707 2012-10-03 08:51:45 2006-08-04 09:34:33 5 9 359 8 34 331 1 71.50 43 19.78 CHANGED MutRsRphs.hslP.NLhtp.s.+...sYapY+sPlTGK.aGLGp.D+ppAhspAhpANhplhpptscthLhpp.pss ..........MutR.Rpac..pclP.sLY.+.s....s....+s...YapYRcPlTGKpaGLGp.DcchAhs.AhpANpcltptptcphLshp.c.h....................... 0 7 19 27 +8836 PF09004 DUF1891 Domain of unknown function (DUF1891) Mistry J, Sammut SJ anon pdb_2cq2 Domain This domain is found in a set of hypothetical eukaryotic proteins. 20.60 20.60 20.60 20.70 20.40 20.20 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -8.06 0.72 -4.57 9 72 2009-01-15 18:05:59 2006-08-04 10:05:59 5 14 28 6 42 109 0 38.20 39 7.06 CHANGED WosNpsulhKKAQQRLaFLRpL+Ksphs....shhh+usIES .......hssNp....puhhKhuppchhFLRK.pKs+h......sLhh+psIEo.... 0 13 14 30 +8837 PF09005 DUF1897 Domain of unknown function (DUF1897) Mistry J, Sammut SJ anon pdb_2bn5 Domain This domain is found in Psi proteins produced by Drosophila, and in various eukaryotic hypothetical proteins. It has no known function. 20.10 20.10 20.30 20.10 19.70 20.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.05 0.72 -4.50 12 369 2009-01-15 18:05:59 2006-08-04 10:08:04 5 23 86 2 175 293 0 32.50 48 8.58 CHANGED ssssssGpsDYSApWhEYYRphGhhcpA-hlcpphtt.p ..........ssssuGQ.sDYSAAWtEYY+p..G...pAthhtt.....s............ 1 36 48 112 +8838 PF09006 Surfac_D-trimer Lung surfactant protein D coiled-coil trimerisation Mistry J, Sammut SJ anon pdb_1m7l Domain This domain, predominantly found in lung surfactant protein D, forms a triple-helical parallel coiled coil, and mediates trimerisation of the protein [1]. 20.70 20.70 20.70 20.80 20.60 20.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.22 0.72 -4.23 10 72 2009-01-15 18:05:59 2006-08-04 10:14:29 6 6 32 67 37 66 0 42.60 52 13.77 CHANGED VsALRQQVEsLpGpLQRLQsAFSQYKKAsLFPDGQSVGEKIFKTAG ..................lsuL+QQlpsLptplppLQsuhSpYKKstLFPsGpuVGcKIFKTuG..... 0 1 1 6 +8839 PF09007 EBP50_C-term EBP50, C-terminal Mistry J, Sammut SJ anon pdb_1sgh Domain This C terminal domain allows interaction of EBP50 with FERM (four-point one ERM) domains, resulting in the activation of Ezrin-radixin-moesin (ERM), with subsequent cytoskeletal modulation and cellular growth control [1]. 25.00 25.00 46.10 45.00 20.20 19.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.89 0.72 -4.10 6 149 2009-01-15 18:05:59 2006-08-04 10:16:42 6 4 37 9 55 106 0 40.90 60 12.56 CHANGED -ssLcLshShAtAKE+A+.ppRupKRAPQMDWSK+pElFSNh .....-ssLcl..u.osAtAKE+A+.tpRssKRAPQMDWsKKpElFSNh. 0 2 8 21 +8840 PF09008 Head_binding Head binding Mistry J, Sammut SJ anon pdb_1lkt Domain The head binding domain found in the Phage P22 tailspike protein contains two regular beta-sheets, A and B, oriented nearly perpendicular to each other and composed of five and three strands respectively. The topology of the strands is exclusively antiparallel. The tailspike protein trimerises through this domain, and the direction of the strands with respect to the molecular triad is almost parallel for beta-sheet A, whereas beta-sheet B is perpendicular to the triad, forming a dome-like structure. This domain is dispensable for thermostability and SDS resistance of the intact protein, and its deletion has only minor effects on tailspike folding kinetics [1]. 21.50 21.50 24.60 23.60 20.20 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.47 0.71 -4.04 5 167 2009-09-11 14:20:10 2006-08-04 10:18:20 5 10 154 22 13 147 0 109.70 70 16.69 CHANGED MTDITsNlVVSMPSQlFTtsRuFKAVANGKIYIGKIDTDPVNPsNQIPVYlENEDGSaVpVAQPIIINAAGaPVYNGQIAKFVTVQGHSMAVYDAsGAQQFYFsNVLKYDPcph ..........MoD.ITANVVVSMPoQLFTMuRSFKAVANGKIYIGKIDTDP...VNPpNQIpVYlENEDGSHVsVoQPIIINAAGaPVYNGQ.IAKFVTVQGHSMAVYD..AYGuQQFYFPNVLKYDPDQ.......... 0 2 2 6 +8841 PF09009 Exotox-A_cataly Exotoxin A catalytic Mistry J, Sammut SJ anon pdb_1aer Domain Members of this family, which are found in prokaryotic exotoxin A, catalyse the transfer of ADP ribose from nicotinamide adenine dinucleotide (NAD) to elongation factor-2 in eukaryotic cells, with subsequent inhibition of protein synthesis [1]. 25.00 25.00 26.80 36.00 20.40 19.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.62 0.70 -5.47 2 67 2009-01-15 18:05:59 2006-08-04 10:19:20 5 3 24 45 1 86 0 202.00 71 36.78 CHANGED A.lphp..oGtpaLs-stsl.hospGspNWThpcL.tsHptLpccGYVFVGYHGT.h.AAQoIV..husV.Rupsp-.-thWtGhYlAscstlAaGYAp.pE.......PstttR.tpGshLRVYlPRuSL.tFYRTshsLtssEt..clpplIGHsLPLR.-AhTGPEptGGc.ETllGWshA.+sVsIPSsIPssshp.thslD.puls.KEQuISshPsY.....K..+--LK ANINIESRSGRSYLPENRAV.ITPQGVTNWTYQELEATHQALTREGYVFVGYHGTNHVAAQTIVNRIAPVPRGNNTENEEKWGGLYVATHAEVAHGYARIKEGTG-hGLPTRAER-sRGVMLRVYIPRASLERFYRTNTPLENAEc..HITpVIGHSLPLRNEAFTGPEusGGEDETVIGWDMAIHAVAIPS............................................ 0 1 1 1 +8842 PF09010 AsiA Anti-Sigma Factor A Mistry J, Sammut SJ anon pdb_1jr5 Domain Anti-sigma factor A is a transcriptional inhibitor that inhibits sigma 70-directed transcription by weakening its interaction with the core of the host's RNA polymerase. It is an all-helical protein, composed of six helical segments and intervening loops and turns, as well as a helix-turn-helix DNA binding motif, although neither free anti-sigma factor nor anti-sigma factor bound to sigma-70 has been shown to interact directly with DNA. In solution, the protein forms a symmetric dimer of small (10.59 kDa) protomers, which are composed of helix and coil regions and are devoid of beta-strand/sheet secondary structural elements [1]. 25.00 25.00 25.90 77.90 22.00 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.01 0.72 -4.11 6 30 2009-01-15 18:05:59 2006-08-04 10:21:43 5 1 29 6 0 23 0 88.50 46 97.68 CHANGED M........Nts.Ehl+-IIuhASlLIKFupE.DIlssQpsFluFLNElGh+ossGc-FTpsSFRQMhsRLst-p+cELl-pFN.pGacslh+phhMYoNs ...MshplEhV+EIIolASILIKFupE.DIl-sRssFIAFLNElGl+s.pG+cLspsSFRclhpcLTp--+cpLI-EFN.pGa.EslaRhLhMYos..... 0 0 0 0 +8843 PF09011 HMG_box_2 DUF1898; HMG-box domain Mistry J, Sammut SJ, Coggill P anon pdb_2cto Domain This short 71 residue domain is an HMG-box domain. HMG-box domains mediate re-modelling of chromatin-structure. Mammalian HMG-box proteins are of two types: those that are non-sequence-specific DNA-binding proteins with two HMG-box domains and a long highly acidic C-tail; and a diverse group of sequence-specific transcription factor-proteins with either a single HMG-box or up to six copies, and no acidic C-tail [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.46 0.72 -3.61 10 1103 2012-10-02 14:16:02 2006-08-04 10:25:46 5 49 229 12 522 7615 215 70.30 37 19.76 CHANGED PsKPKushsAYhaFspcppsEhK+c......tP..ssshuEhoKtsSpcW+shSscEKccYp-pA+tcKscacpEhtsac ........................................KP..+..s.t.h.Su.Y.s.hF....lp.s....p....p....p....-....t.Kcc........................ps.......slsh..u.Eh..s..K.....c...C..S..E.+.................W..K..........s...h..Ss........c........E...Ksc..a...c.-....h...A+.....t...D..K.t.+.Y.-pEMpsa.h................................... 0 146 190 303 +8844 PF09012 FeoC DUF1920; FeoC like transcriptional regulator Mistry J, Sammut SJ anon pdb_1xn7 Domain This family contains several transcriptional regulators, including FeoC, which contain a HTH motif. FeoC acts as a [Fe-S] dependant transcriptional repressor [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.29 0.72 -4.27 36 803 2012-10-04 14:01:12 2006-08-04 09:28:13 5 3 792 2 109 876 60 71.10 45 86.96 CHANGED hLppl+palppcspsShs-Lup+Fphs.sslcuMLshWlpKG+lp+h.sssss.....uuuCpp......Ct.....tt.p....shYc ..................SLlQVRDh..LA.L.+.GR.h-AsQISppLs..sPpPhIsAMLppLEpMGKss.RIp--scGC...............LSGSCKS.CP.....EG+s........shcphh................................ 0 29 58 82 +8845 PF09013 YopH_N YopH, N-terminal Mistry J, Sammut SJ anon pdb_1huf Domain The N-terminal domain of YopH is a compact structure composed of four alpha-helices and two beta-hairpins. Helices alpha-1 and alpha-3 are parallel to each other and antiparallel to helices alpha-2 and alpha-4. This domain targets YopH for secretion from the bacterium and translocation into eukaryotic cells, and has phosphotyrosyl peptide-binding activity, allowing for recognition of p130Cas and paxillin [1]. 19.60 19.60 19.70 135.40 18.80 17.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.50 0.71 -4.45 4 68 2009-09-11 06:44:49 2006-08-04 10:34:58 5 2 31 4 6 18 0 114.70 61 40.01 CHANGED hpIssLpp.lS.plsp.tsG-phG+L+........sThQtlo.opuhpssEKsFAppVLcHVpNssLspcDlApLL......phsNaEL+pstsGpslLsGLRo-QLoLpDAKlLL-AAhRQ .hplssLpp.ls.plsQ.tpGspsG+Lp........pT..hhp.upthppuEKsFAppVLpHVtNssLopcDhApLL......plsNhEL+pstsGpsllsuLRo-QhoLpDAKlLLEAAhRQ. 2 0 2 2 +8846 PF09014 Sushi_2 Beta-2-glycoprotein-1 fifth domain Mistry J, Sammut SJ anon pdb_1g4f Domain The fifth domain of beta-2-glycoprotein-1 (b2GP-1) is composed of four well-defined anti-parallel beta-strands and two short alpha-helices, as well as a long highly flexible loop. It plays an important role in the binding of b2GP-1 to negatively charged compounds and subsequent capture for binding of anti-b2GP-1 antibodies [1]. 22.10 22.10 22.10 23.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.07 0.72 -4.22 6 79 2009-01-15 18:05:59 2006-08-04 10:39:08 5 8 40 7 37 69 0 82.80 44 22.33 CHANGED RAsCpVPlK+upVlYsGhKhhltDltcshlhHG-+VoFaCKspcK+CSasssupChDGslplPuCacE.s.Ltah...+p.sS-lpsC .............+AsCplslK+upVlYpGc+lplp-hhcsshhHG-pVoFaCKNK..E.K.+CSYotsupChDGslclPpCFc.E.o.lt.ah...+o.sS-lpsC.............. 0 1 6 19 +8847 PF09015 NgoMIV_restric NgoMIV restriction enzyme Mistry J, Sammut SJ anon pdb_1fiu Domain Members of this family are prokaryotic DNA restriction enzymes, exhibiting an alpha/beta structure, with a central region comprising a mixed six-stranded beta-sheet with alpha-helices on each side. A long 'arm' protrudes out of the core of the domain between strands beta2 and beta3 and is mainly involved in the tetramerisation interface of the protein. These restriction enzymes recognise the double-stranded sequence GCCGGC and cleave after G-1 [1]. 20.70 20.70 21.20 27.80 19.50 20.40 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.90 0.70 -5.41 6 56 2012-10-11 20:44:44 2006-08-04 10:56:01 5 2 50 6 9 43 3 239.70 54 95.23 CHANGED FH+pLl.-GslhhsN...........stu..lsSNADuSsspSpslApGltcplpu.polsc+h.....AGQTsGspFEcIsppFlccTFp+lpHLRPGsWpVppssutp+hp.IspaEQYuHLtcLs+lu+cpsELuuuLGsDYsIpPDIVlsRcstsDptIN.csc.LVD..ssssthos.LR+uN....supP..lLHASISCKWTIRSDRAQNTRoEALNLlRNRKGRlPHIVsVTAEPhPSRIuSlALGTGDIDCVYHhALsELppulpulGp-Dst-hLpshIsG+RL+DISDLPLDLul ..................FHtpLl.psslhhhN...........stG..VsSNADuSNppShsIApuIAchLtu.coluc+l.....sGQTuGstFEslsscFlppsF.+LpHlRPG.sWsVppluupsRhc.IucapQYsHLssLscAscpss-LAAALGsDYsIoPDIlVsRp..sDttIN.psphLV..D..-s.lsphus.LRtsN.....ushP...lLHASISCKWTIRSDRAQNARSEuLNLlRNRKGRLPHIVVVTAEPoPSRluSIALGTG-IDCVYHFAL.ELppslpsl..s.......h-DAh.....-hhhhMVsG+RLKDISDLPLDLAV.......................... 1 2 7 7 +8848 PF09016 Pas_Saposin Pas factor saposin fold Mistry J, Sammut SJ anon pdb_2b8i Domain Members of this family adopt a compact structure comprising five alpha helices. Charged and polar residues are exposed mostly on the surface, while most of the hydrophobic residues are buried inside the hydrophobic core of the helical bundle. The precise function of this domain is unknown, but it is has been shown to induce secretion of periplasmic proteins, especially collagenase [1]. 27.50 27.50 123.30 123.20 27.30 27.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.37 0.72 -3.98 6 38 2009-01-15 18:05:59 2006-08-04 10:57:41 5 1 33 1 8 27 0 75.40 68 99.10 CHANGED hpoLIY-TLhNLAsp-PEQHApIRQpLYEQLDLsF-KQLALYusVLGPASSGKLtspcslscAV-pAlclLEh.p+ MKTLIY-TLlsLAsQEPEQHApIRQNLYEQLDLPFDKQLALYSsALGPASSGKLEsppuIsNAVDsAl+LLEsPE+. 0 0 1 6 +8849 PF09017 Transglut_prok Microbial transglutaminase Mistry J, Sammut SJ anon pdb_1iu4 Domain Microbial transglutaminase (MTG) catalyses an acyl transfer reaction by means of a Cys-Asp diad mechanism, in which the gamma-carboxyamide groups of peptide-bound glutamine residues act as the acyl donors. The MTG molecule forms a single, compact domain belonging to the alpha+beta folding class, containing 11 alpha-helices and 8 beta-strands. The alpha-helices and the beta-strands are concentrated mainly at the amino and carboxyl ends of the polypeptide, respectively. These secondary structures are arranged so that a beta-sheet is surrounded by alpha-helices, which are clustered into three regions [1]. 25.00 25.00 722.90 722.60 20.90 19.70 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.37 0.70 -5.36 4 22 2009-01-15 18:05:59 2006-08-04 11:06:04 5 1 10 5 0 27 0 397.00 82 99.39 CHANGED MppRRRhLsFATlGAVlCTAGhMPSsupA....AuuGsGEEctSYAETHcLTADDVcsINALNEpA.sAupsG.......PS....FRAPDu...DDRVTPPAEPLDRMPDsYRs.hGRApTVVNNYIRKWQQVYSHRDG+KQQMTEEQREhLSYGCVGVTWVNSG.YPTNRLAFA.FDEsKYKN-LKNuRPRssETRAEFEGRlAK-SFDEuKGFpRAR-VASVMNKALENAHDEusYlsNLKpELANsNDALhpEDuRSsFYSALRNTPSFKERsGGNaDPSKMKAVIYSKHFWSGQDppGSSDKRKYGDP-AFRPspGTGLVDMSRDRNIPRSPspPGEuaVNFDYGWFGAQTEADADKTVWTHGNHYHAPNGuLGsMHVYESKFRNWSsGYuDFDRGAYVITFIPKSWNTAPsKVcQGWP ..................M..RtphLsFAThuAllCsuGhhPSsupA....AssGsGEpctSYAETHtLTA-DVcNINALNcpA.ssupsG.......PS....FRAPDu...DDRVTPPAEPLDRMPDsYRs.hGRApTVVNNYIRKWQQVYSHRDG+KQQMTEEQREhLSYGCVGVTWVNSG.YPTNRLAFAFFDEsKYKN-LKNuRPRssETRAEFEGRlAK-SFDEuKGFpRAR-VASVMNKALENAHDEGsYlDNLKpELANtNDALhpEDuRSsFYSALRNTPSFKERsGGNaDPSKMKAVIYSKHFWSGQDppGSSDKRKYGDP-AFRPDpGTGLVDMS+DRNIPRSPspPGEuaVNFDYGWFGAQTEADADKTlWTHGNHYHAPNGuLGsMHV.YESKFRNWSsGYuDFDRGs.YVITFIPKSWNTAPsKVKQGWP.................. 0 0 0 0 +8850 PF09018 Phage_Capsid_P3 P3 major capsid protein Mistry J, Sammut SJ anon pdb_1hqn Domain The P3 major capsid protein adopts a 'double-barrel' structure comprising two eight-stranded viral beta-barrels or jelly rolls, each of which contains a 12-residue alpha-helix. This protein then trimerises through a 'trimerisation loop' sequence, and is incorporated within the viral capsid [1]. 25.00 25.00 937.50 937.30 20.50 20.20 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.38 0.70 -5.88 3 7 2009-01-15 18:05:59 2006-08-04 11:10:51 6 1 6 78 0 6 0 394.00 100 99.75 CHANGED AQVQQLTPAQQAALRNQQAMAANLQARQIVLQQSYPVIQQVETQTFDPANRSVFDVTPANVGIVKGFLVKVTAAIKNNHATEAVALTDFGPANLVQRVIYYDPDNQRHTETSGWHLHFVNTAKQGAPFLSSMVTDSPIKYGDVMNVIDAPATIAAGATGELTMYYWVPLAYSETDLTGAVLANVPQSKQRLKLEFANNNTAFAAVGANPLEAIYQGAGAADCEFEEISYTVYQSYLDQLPVGQNGYILPLIDLSTLYNLENSAQAGLTPNVDFVVQYANLYRYLSTIAVFDNGGSFNAGTDINYLSQRTANFSDTRKLDPKTWAAQTRRRIATDFPKGVYYCDNRDKPIYTLQYGNVGFVVNPKTVNQNARLLMGYEYFTSRTELVNAGTISTT AQVQQLTPAQQAALRNQQAMAANLQARQIVLQQSYPVIQQVETQTFDPANRSVFDVTPANVGIVKGFLVKVTAAIKNNHATEAVALTDFGPANLVQRVIYYDPDNQRHTETSGWHLHFVNTAKQGAPFLSSMVTDSPIKYGDVMNVIDAPATIAAGATGELTMYYWVPLAYSETDLTGAVLANVPQSKQRLKLEFANNNTAFAAVGANPLEAIYQGAGAADCEFEEISYTVYQSYLDQLPVGQNGYILPLIDLSTLYNLENSAQAGLTPNVDFVVQYANLYRYLSTIAVFDNGGSFNAGTDINYLSQRTANFSDTRKLDPKTWAAQTRRRIATDFPKGVYYCDNRDKPIYTLQYGNVGFVVNPKTVNQNARLLMGYEYFTSRTELVNAGTISTT 0 0 0 0 +8851 PF09019 EcoRII-C EcoRII C terminal Mistry J, Sammut SJ anon pdb_1na6 Domain The C-terminal catalytic domain of the Restriction Endonuclease EcoRII has a restriction endonuclease-like fold with a central five-stranded mixed beta-sheet surrounded on both sides by alpha-helices. It cleaves DNA specifically at single 5' CCWGG sites [1]. 20.30 20.30 20.30 20.70 19.90 20.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.92 0.71 -4.40 14 145 2012-10-11 20:44:45 2006-08-04 11:12:05 6 2 130 13 25 144 16 164.70 40 44.93 CHANGED F+hhEctllhp+lptGFs......sVDsFl........sauhSVpNRRKSRAG+SLE.HLcplhpstGl....pasppu...hT.....EssKKPDFLFPusstYc......sstFPscpLpMLusKTTCKDRWRQVLsEA-R..IcpKHLhTLp.G.lSpsQhpEMpcpslpLVVPpsl+poYspp.+scLhTlpsFI ...........................................FhhhEchhh.phlppsas........slDpFl........shupS.l.....sNRRKSRAGKSLEhHLcplF.ctGl....pFpsQA........hT...........Es..sKK...PDFL.FPuutsY+............s.tFs.scpLpMLusKTTCKDRWRQlLsEA......-..R......I..p..phaLhTLp-G.lStsQhpEMpcpslpLVVPp.slpcp..aspthpsplholtsFI................. 0 11 17 22 +8852 PF09020 YopE_N YopE, N terminal Mistry J, Sammut SJ anon pdb_1l2w Domain The N terminal YopE domain targets YopE for secretion from the bacterium and translocation into eukaryotic cells [1]. 21.60 21.60 24.20 78.80 20.70 18.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.75 0.71 -4.09 2 31 2009-01-15 18:05:59 2006-08-04 11:19:43 5 1 27 6 2 13 0 104.70 97 52.96 CHANGED MKISSFISTSLPLPTSVSGSSSVGEMSGRSVSQQpS-QYANNLAGRTESPQGSSLASRIhE+LSShAHSsItFIpRMFSEGSHKPVVTPAPTPAQMPSPTSFSDSIKQLAAETLPKYMQQLsSLDA ..........................MSGRSVSQQpSDQYANNLAGRTESPQGSSLASRIIERLSShAHSVIGFIQRMFSEGSHKPVVTPAPTPAQMPSPTSFSDSIKQLAAETLPKYMQQLNSLDA 0 0 1 1 +8853 PF09021 HutP HutP Mistry J, Sammut SJ anon pdb_1wmq Domain The HutP protein family regulates the expression of Bacillus 'hut' structural genes by an anti-termination complex, which recognises three UAG triplet units, separated by four non-conserved nucleotides on the RNA terminator region. L-histidine and Mg2+ ions are also required. These proteins exhibit the structural elements of alpha/beta proteins, arranged in the order: alpha-alpha-beta-alpha-alpha-beta-beta-beta in the primary structure, and the four antiparallel beta-strands form a beta-sheet in the order beta1-beta2-beta3-beta4, with two alpha-helices each on the front (alpha1 and alpha2) and at the back (alpha3 and alpha4) of the beta-sheet [1]. 25.00 25.00 41.80 41.50 24.20 23.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.44 0.71 -3.93 17 271 2009-01-15 18:05:59 2006-08-04 11:30:01 6 2 242 36 67 175 0 131.60 41 90.58 CHANGED +lu+sAlhhAlopocEEEtplcphh.tppsh+ssssclGGp..h.psspKllcsslsAAK+pGVIpcs.ap-ptAlhtAThEAlptlhspshulss.....GhKluIsR........cs-alAVAlahslGlhhL..-c.slGLGhp ...pluRhAhhlAlupppEppthh.p....htcGh+ssssclGu......sspKlltuh.sAAKpstVIcss.ap-ptAlhtAhhEALhtltp.ttltLss.....Gh+huIsR........puEaluVAlassIGh.hh...-c.uIGLGhp............. 0 30 50 58 +8854 PF09022 Staphostatin_A Staphostatin A Mistry J, Sammut SJ anon pdb_1oh1 Domain The staphostatin A polypeptide chain folds into a slightly deformed, eight-stranded beta-barrel, with strands beta-4 through beta-8 forming an antiparallel sheet while the N-terminus forms a a psi-loop motif. Members of this family constitute a class of cysteine protease inhibitors distinct in the fold and the mechanism of action from any known inhibitors of these enzymes [1]. 24.10 24.10 24.10 24.10 23.40 23.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.28 0.72 -4.20 3 151 2012-10-01 18:54:33 2006-08-04 11:30:27 5 1 150 1 2 24 0 105.00 74 97.89 CHANGED McpYpLINIccscscapEKYWLHILEGlWHPp-lsTSPLKITFNKsIsPsYICKhINEDSR+IILsNsDNoNIIIEIIIINc+KIlFNllNKEuLGTSPKITFIK MEQhELFSIDKF..KCNSEAKYYLNIIEGEWHPQDLNDSPLKFILSTSDDSDYICKYINTEHKQLTLYNKNNSSIVIEIFIPNDNKILLTIMNTEALGTSPRMTFIK... 0 1 1 2 +8855 PF09023 Staphostatin_B Staphostatin B Mistry J, Sammut SJ anon pdb_1y4h Domain Staphostatin B inhibits the cysteine protease Staphopain B, produced by Staphylococcus aureus, by blocking the active site of the enzyme. The domain adopts an eight-stranded mixed beta-barrel structure, with a deviation from the up-down topology of canonical beta-barrels in the amino-terminal part of the molecule [1]. 25.00 25.00 30.10 30.00 22.20 19.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.35 0.72 -4.23 2 163 2012-10-01 18:54:33 2006-08-04 11:32:06 5 1 163 8 2 15 0 107.00 97 98.19 CHANGED MYQLpFIpls.DshpLT+.cQssIpLFIGNW.N.phQKSIsIRpGsDTsHNQYpIL.IDTtHQRIKhoS.-s.plhYILDY-DTpHIhhQTSsKpuhGTSRPIhYE+ ...MYQLQFINLVYDTT.KLTHLEQTNINLFIGNWSNHQLQKSICIRH..GD.DTSHNQYHILFIDTAHQRIKFSSIDNEEIIYILDYDDTQHILMQTSSKQGIGTSRPIVYER. 0 1 1 2 +8857 PF09025 YopR_core YopR Core Mistry J, Sammut SJ anon pdb_1z21 Domain The YopR core domain, predominantly found in the Yersinia pestis virulence factor YopR, is composed of five alpha-helices, four of which are arranged in an antiparallel bundle. Little is known about this domain, though it may contribute to the virulence of the protein YopR [1]. 25.00 25.00 98.30 98.10 22.20 21.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.90 0.71 -4.19 9 74 2009-01-15 18:05:59 2006-08-04 11:50:47 5 1 69 1 9 35 16 140.50 43 81.11 CHANGED s-ushh....p+hp.sssL-shLus.sPuspRElLW.hap.....pGscpss...ppLhtslppcLlucFuGp.hsl.sshDhsEL+uhlppa.PLGup+EpsLLplhu-LKsh.....PshtaLs-LsRcEL.hLIPhNuMVcNLh+pSHKLDLE .......................................................s-lhspLEptLpuEssptsRElLW.ta......Ausssst...p.Lhss.lcEcLLuRFuQp.tsl.PshDhsEL+uhLppa.shG+ppEshLLQlLtulKss.....sGh.YLu-Llp+EL.lLlPhNuhVDNLl+NSHKlDh- 0 1 3 4 +8858 PF09026 CENP-B_dimeris Cenp-B_dimeris; Centromere protein B dimerisation domain Mistry J, Sammut SJ anon pdb_1ufi Domain The centromere protein B (CENP-B) dimerisation domain is composed of two alpha-helices, which are folded into an antiparallel configuration. Dimerisation of CENP-B is mediated by this domain, in which monomers dimerise to form a symmetrical, antiparallel, four-helix bundle structure with a large hydrophobic patch in which 23 residues of one monomer form van der Waals contacts with the other monomer. This CENP-B dimer configuration may be suitable for capturing two distant CENP-B boxes during centromeric heterochromatin formation [1]. 24.30 24.30 24.30 24.50 24.20 24.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.46 0.72 -3.77 2 32 2012-10-08 13:44:34 2006-08-04 11:51:16 5 5 24 4 19 37 1 93.70 69 22.07 CHANGED PTLHFLEGtEDS-SDS-EEE-D--EDE..-DE-D-E-sDEVPVPSFGEAMAYFAMVKRYLTS.PIDDRVQSHILHLEHDLVHVTRKNHARQAGsRGLGHQS ............PTLHFLEGtEDS-SDS-EE--..-E--..D---.-.D-D.D-EDsD.EVPVPSFGEAMAYFAMVKRY..LTSFPIDDR....VQSHILHLEHDLVHVTRKNHARQAGVRGLGHQS.................. 0 4 4 7 +8859 PF09027 GTPase_binding GTPase binding Mistry J, Sammut SJ anon pdb_1cf4 Domain The GTPase binding domain binds to the G protein Cdc42, inhibiting both its intrinsic and stimulated GTPase activity. The domain is largely unstructured in the absence of Cdc42 [1]. 21.30 21.30 22.10 21.30 21.10 20.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.12 0.72 -4.38 5 170 2009-01-15 18:05:59 2006-08-04 11:51:45 5 14 62 1 77 128 0 62.20 49 7.74 CHANGED AGlSAQDIpVPLKsGFlHoG+GhuNsR+C.WGsPucF-NsYLs..hDP.shhhspLSsA..tPTQHLuslG ......AGlSAQDISpPLpsSFIHT..GHGDu..sP++C.WGhPD+ID-lYLsNPMDPPDlhss..p...usu....tph.stht........................ 0 15 22 43 +8860 PF09028 Mac-1 Mac 1 Mistry J, Sammut SJ anon pdb_2avw Domain The bacterial protein Mac 1 adopts an alpha/beta fold, with 14 beta strands and 9 alpha helices. The N-terminal domain is made up predominantly of alpha helices, whereas the C-terminal domain consists predominantly of beta sheets. Mac 1 blocks polymorphonuclear opsonophagocytosis, inhibits the production of reactive oxygen species and contains IgG endopeptidase activity [1]. 20.10 20.10 20.20 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.26 0.70 -5.67 3 78 2009-01-15 18:05:59 2006-08-04 12:34:31 5 9 60 8 13 76 1 280.70 32 54.08 CHANGED KhsDuIuAPVlApsplhVcMKV.DRGlEs-sapsDsEV....cTSEss.YcVTchcc.................lWs+GlTPPAhFspGGhV.........apAPahAtpGaYDhNKolNG+..DlsLCaAATASNMLHWWFEQNs-YIEpYLKc..cKQKlslGEphclLDlR+YIcohQDQsN...SclFNhFRsh...uausRRsGalsDuLlDhFINGYYLpV.+sGssNLscoY...DpRGGlFsDVFs+sspsKpLTsR..h+scThKEFu-Ll+KsL-sK+ALGLoa..opuNh.hsHlVTlWGA-YDssGNIcAlYITDSD.spAsIGhK+YslGVcuuGNltlsAc+lK........scstGAhlDGLaTLcpGQDpW .................................................................................................................h......phpp.............................lWscGlss..Pt..p.pst.h..............apA.Pa.ss.......p.GaYDhsKs....hNup......Dp.LChAAsAuNMlHWWh-QNpppl-tYLpc...cptpl..htpp.thh-l+chlsoh....tsQps....SplFphF+s....hhuhpp.GhhsDhllDhFINGYh.ph..ptsss.....lp.....cs....Dp+GGhF.sVF....ppphLTs+..hhtts.hcchup.l+p.LppspslGLoa..phhsh...hsHllslWGA-a..D.sp.GplcA..lYlTDSD..p.......pt.....p.....lGhK+YhlshsssGp..tls.pphp........ppshGu.l.hlaTlphGps.W...................................................... 1 7 11 13 +8861 PF09029 Preseq_ALAS 5-aminolevulinate synthase presequence Mistry J, Sammut SJ anon pdb_1h7d Domain The N terminal presequence domain found in 5-aminolevulinate synthase exists as an amphipathic helix, with a positively charged surface provided by lysine residues and no stable helix at the N-terminus. The domain is essential for the import process by which ALAS is transported into the mitochondria: translocase of the outer membrane (Tom) and translocase of the inner membrane protein complexes appear responsible for recognition and import through the mitochondrial membrane. The protein Tom20 is anchored to the mitochondrial outer membrane, and its interaction with presequences is thought to be the recognition step which allows subsequent import [1]. 20.70 20.70 21.30 22.40 20.40 20.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.79 0.72 -4.05 2 196 2009-01-15 18:05:59 2006-08-04 12:34:46 5 6 50 2 81 167 0 83.20 29 19.56 CHANGED hlAhAhllRpCPhLupsPpshLt+suKo..hLh.htRCPl............LuTthPThpQ...KsTpsGs-psuhA.S+CPFhhsEhtcRcutlVp+Au.ElpEDVpphps .....................................hlp.pCPhLspsspshltpshts..hl.hhtpCPh....................................................h..sst......st.....p.p........................tt..t..s..s....sp.pt..........s..hA..spCPFh.u.phtt.tstlh.cAu.ElQEDVpthp............................................................................... 0 7 12 32 +8862 PF09030 Creb_binding Creb binding Mistry J, Sammut SJ anon pdb_1kbh Domain The Creb binding domain assumes a structure comprising of three alpha-helices which pack in a bundle, exposing a hydrophobic groove between alpha-1 and alpha-3 within which complimentary domains found in the protein 'activator for thyroid hormone and retinoid receptors' (ACTR) can dock. Docking of these domains is required for the recruitment of RNA polymerase II and the basal transcription machinery [1]. 21.10 21.10 21.60 24.50 20.70 20.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.57 0.72 -3.84 4 148 2009-01-15 18:05:59 2006-08-04 12:48:14 5 14 46 7 70 100 0 105.30 55 4.94 CHANGED ..ssQWsQusLPQ...hpsuhPRPVhp.t.AQtuss...GPR.susQss.....tuI..uPsALQDLLRTLKSPSSPQQQQQVLNILKSNPQLMAAFIKQRTAKYsAsQPt. ..................................................s.W.t...Qus.lP..Q..Q........h.....sG....M...s.RPs..M.....u.Q..t.u.s..........uP.....p....ss.s...Qs.s....................tuh....u.sALQ-LLRTL+SPSS.PpQQQQVLNIL+SNPQLMAAFI..KQ.RsAKYs.usp.Pt.......... 0 11 19 36 +8864 PF09032 Siah-Interact_N Siah interacting protein, N terminal Mistry J, Sammut SJ anon pdb_1ysm Domain The N terminal domain of Siah interacting protein (SIP) adopts a helical hairpin structure with a hydrophobic core stabilised by a classic knobs-and-holes arrangement of side chains contributed by the two amphipathic helices. Little is known about this domain's function, except that it is crucial for interactions with Siah. It has also been hypothesised that SIP can dimerise through this N terminal domain [1]. 21.30 21.30 21.30 21.40 20.80 21.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.83 0.72 -3.92 10 147 2009-01-15 18:05:59 2006-08-04 12:59:19 6 6 107 6 89 136 0 69.90 32 31.30 CHANGED hsphlcpLptDL-El+pLlEpApRpRVpDlLppEh+KlEsElppppp....QcpQ..ppppEspcPou.hsssspsYTVKI ..........ppLppDL-El+sLLppApRpRV+-hLosEhp+lEsEltpt.h.p......p...tp.......p........................................................ 0 23 36 61 +8865 PF09033 DFF-C DNA Fragmentation factor 45kDa, C terminal domain Mistry J, Sammut SJ anon pdb_1iyr Domain The C terminal domain of DNA Fragmentation factor 45kDa (DFF-C) consists of four alpha-helices, which are folded in a helix-packing arrangement, with alpha-2 and alpha-3 packing against a long C-terminal helix (alpha-4). The main function of this domain is the inhibition of DFF40 by binding to its C-terminal catalytic domain through ionic interactions, thereby inhibiting the fragmentation of DNA in the apoptotic process. In addition to blocking the DNase activity of DFF40, the C-terminal region of DFF45 is also important for the DFF40-specific folding chaperone activity, as demonstrated by the ability of DFF45 to refold DFF40 [1]. 25.00 25.00 39.80 39.30 23.20 22.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.00 0.71 -4.41 5 60 2009-01-15 18:05:59 2006-08-04 12:59:53 5 2 35 2 30 53 0 163.20 61 53.64 CHANGED DGGTAWluRES..MEsD-sD.suGsD..+WKNLAcQLKEDLSSIILMSEEDLQsLIDVPsuELAusLshSppKVQuLQsTLQRVLDRREEERQSKQLLELYL+AlEKEGsh..+pQEscAsLu-E.DuVDoGh...sElsScsshuSpILhlLK-KsuPELSLSoQDLEhVsK ............................DGGTAWloQES..h-.s.DEsDouuGl...KWKNlARQ.LK-DLSSIILLSEEDLQsLlDlPCuDLApELsQSssplQsLQpTLQQVLDpREEsRQSKQLLpLYLpALEKEGulLSKppES...cA..u.hu.-EhDAVDo.Gh..spEsuSclsLsSplLssLKEKsAPELSLSSQDLElVs.......... 0 2 4 11 +8866 PF09034 TRADD_N TRADD, N-terminal domain Mistry J, Sammut SJ anon pdb_1f3v Domain The N terminal domain of 'Tumour necrosis factor receptor type 1 associated death domain protein' (TRADD) folds into an alpha-beta sandwich with a four-stranded beta sheet and six alpha helices, each forming one layer of the structure. The domain allows docking of TRADD onto 'tumour necrosis factor receptor-associated factor' (TRAF): the binding is at the beta-sandwich domain, away from the coiled-coil domain. Binding ensures the recruitment of cIAPs to the signaling complex, which may be important for direct caspase-8 inhibition and the immediate suppression of apoptosis at the apical point of the cascade [1]. 25.00 25.00 96.60 95.50 20.00 18.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.55 0.72 -4.07 6 45 2009-01-15 18:05:59 2006-08-04 13:05:06 5 2 37 2 25 42 0 110.40 57 37.13 CHANGED -Su.Guh-uhEILKlHcuDP.LhVpLKFsuhssCpRFLpuYtpGALppuLpp+hsphLAl..cuLtlpTpLKAGscpLDthLsD.EpCLpaIptpQP-RLRD-ElAELEppLps ...-Su.GS.-slphLKIHcSDPQLIVQLRFCGcpsCsRFLcuYREGALRsuLQppLusuLA...pul.sLpLEL+AGAEcLDshLsDEERCLpsIhtppPDRLRDEEluELE-tL+s........... 0 1 3 10 +8867 PF09035 Tn916-Xis Excisionase from transposon Tn916 Mistry J, Sammut SJ anon pdb_1y6u Domain The phage-encoded excisionase protein Tn916-Xis adopts a winged-helix structure that consists of a three-stranded anti-parallel beta-sheet that packs against a helix-turn-helix (HTH) motif and a third C-terminal alpha-helix. It is encoded for by Tn916, which also codes for the integrase Tn916-Int. The protein interacts with DNA by the insertion of helix alpha-2 into the major groove and the contact of the hairpin that connects strands beta-2 and beta-3 with the adjacent phosphodiester backbone and/or minor groove. Tn916-Xis stimulates phage excision and inhibits viral integration by stabilising distorted DNA structures [1]. 22.90 22.90 22.90 22.90 22.70 22.80 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.25 0.72 -3.88 8 457 2012-10-04 14:01:12 2006-08-04 13:11:25 5 1 293 1 23 251 24 66.20 52 93.39 CHANGED MN+.-..VPIWEKhoLTIEEAAcYasIGpNKLRpLs.cNPsscFVLalGo+hhIKRKcFEcaI-shssl .................MppsD..lPIWE+YTLTIEEASKYFRIGENKLR+LAE..E..Nc..sAsW...l..IhNGN..RIQIKRKQFEK.hIDsLcsI............. 0 13 18 19 +8868 PF09036 Bcr-Abl_Oligo Bcr-Abl oncoprotein oligomerisation domain Mistry J, Sammut SJ anon pdb_1k1f Domain The Bcr-Abl oncoprotein oligomerisation domain consists of a short N-terminal helix (alpha-1), a flexible loop and a long C-terminal helix (alpha-2). Together these form an N-shaped structure, with the loop allowing the two helices to assume a parallel orientation. The monomeric domains associate into a dimer through the formation of an antiparallel coiled coil between the alpha-2 helices and domain swapping of two alpha-1 helices, where one alpha-1 helix swings back and packs against the alpha-2 helix from the second monomer. Two dimers then associate into a tetramer. The oligomerisation domain is essential for the oncogenicity of the Bcr-Abl protein [1]. 20.90 20.90 21.10 32.00 20.40 19.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.81 0.72 -4.02 4 67 2009-09-11 10:35:23 2006-08-04 13:43:33 5 10 25 8 23 63 0 72.20 81 8.04 CHANGED hVpPluFAEAW+AQFP-..u-PPhM-LRShGD........lEQEL-+C+uSIRRLEpEVN+ERFRMIYLQTLLAKERKSYD+QRWGF+R ..MV-PVGFsEAW+AQFPD..SEPP+MELR..SVGD........IEQELERCKASIRRLEQEVNpERFRMIYLQTLLAKEKKSYDRQRWGFRR....... 1 2 5 15 +8869 PF09037 Sulphotransf Stf0 sulphotransferase Mistry J, Sammut SJ anon pdb_1tex Domain Members of this family are essential for the biosynthesis of sulpholipid-1 in prokaryotes. They adopt a structure that belongs to the sulphotransferase superfamily, consisting of a single domain with a core four-stranded parallel beta-sheet flanked by alpha-helices [1]. 20.70 20.70 20.70 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.47 0.70 -4.88 13 172 2012-10-05 12:31:09 2006-08-04 13:59:44 5 5 158 4 52 155 31 228.40 33 80.83 CHANGED uYllCuosRSGSTLLsclLpuTGsAGpPppFFp........pss.hppWhtths.........shp.t.s-hthscsahptslptG+ussGVaGh+LMhpphshlhppLspL.PshsuDshR..lccsaG.p.shalHLpRcDpVuQAVShhRApQotlW+..ttsDusc.tR................tspYDssuIsphlphLccpEpuWpsWFtppsl-PlcIsY-sLsssPptslsslLptLGl-sphAsthsPslp+.ADppSc-WscRYRpD .................................................................................................YllhuotRSGSTLLsc.LpuTG....ss....GpPpEaFp........................ssp..ccWhtthp................shp...s.ps-ht..s.s.tah.ptlh..stG.p.osNGlaGs.KL....Mh.sQhshl.p+.h...t..tl.ss...hs.........s..cshc..hlc.cl.h.G..p.tshhlHlpR.DlVsQAVShh......RA...hQ.......T.plW+.......sp.s.....Dspccsp....................shYcs......s.....sIu+hlp.tLcs.p-psWc.sWFs.p.cs.Ic.P...lcl.sY.s.L...h.c..s....s...s..h..lusVL-tlG...-..s..p.h..A.s..........tP...hlc+Qu...sppocEWspRa+t-................................................................................ 0 10 31 42 +8870 PF09038 53-BP1_Tudor Tumour suppressor p53-binding protein-1 Tudor Mistry J, Sammut SJ anon pdb_1xni Domain Members of this family consist of ten beta-strands and a carboxy-terminal alpha-helix. The amino-terminal five beta-strands and the C-terminal five beta-strands adopt folds that are identical to each other. This domain is essential for the recruitment of proteins to double stranded breaks in DNA, which is mediated by interaction with methylated Lys 79 of histone H3 [1]. 20.50 20.50 20.80 21.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.49 0.71 -4.29 3 89 2012-10-02 16:56:36 2006-08-04 14:11:33 5 3 51 16 40 84 2 117.20 74 7.46 CHANGED GSSFVGLRVVAKWSSNGYFYSGKIT+DAGuGKYKLLFDDGYECDVLGKDILLCDPIPLDTEVTALSEDEYFSAGVVKGHKKESpELYYSIEKDGQRKWYKRMAVILSLEQGNRLREQYGLGP ......GsSFVGLRVVAKWS.SNGYFYSGKITR.D.VGAGKYKLLFDDGYEC.....DVLGKDILLC...DPIPL.DT..E..V...TA.L.S.ED...EYFSAGV...V.KG..HR.K.E..S..GELYYS.IE.KE.GQRK.W..YKRMAVILSLEQGNRLREQYGLGP...................... 0 7 9 23 +8871 PF09039 HTH_Tnp_Mu_2 Mu_I-gamma; Mu DNA binding, I gamma subdomain Mistry J, Sammut SJ anon pdb_2ezh Domain Members of this family are responsible for binding the DNA attachment sites at each end of the Mu genome. They adopt a secondary structure comprising a four helix bundle tightly packed around a hydrophobic core consisting of aliphatic and aromatic amino acid residues. Helices 1 and 2 are oriented antiparallel to each other. Helix 3 crosses helices 1 and 2 at angles of 60 and 120 degrees, respectively. Excluding the C-terminal helix 4, the fold of the I-gamma subdomain is remarkably similar to that of the homeodomain family of helix-turn-helix DNA-binding proteins, although their amino acid sequences are completely unrelated [1]. 20.50 20.50 20.50 20.50 20.40 20.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.27 0.72 -4.32 6 131 2012-10-04 14:01:12 2006-08-04 14:14:54 6 14 112 4 25 141 1 106.80 35 17.57 CHANGED hapVpshscsDWhPhLlst....sttcppsphu.Is-cAWtaFpuDYLR.EKPohssCYcRLcpAAp-pGW.sIPShuohpR+h.pplPcsplVhsREGpauLp+LhPsQpRo .................................pVpthscsDWhssLlst....tss+ps.s+h..u.chsp-AW.p.F.hpuD.YLR.E...+Ps..aspCYcRLc...ts..Accp..G..W...s.....IPShpohpRRl..pplscs...h...hVhtRE.G-cALhc.hhPuQpRo........... 0 9 18 22 +8872 PF09040 H-K_ATPase_N Gastric H+/K+-ATPase, N terminal domain Mistry J, Sammut SJ anon pdb_1iwc Domain Members of this family adopt an alpha-helical conformation under hydrophobic conditions. The domain contains tyrosine residues, phosphorylation of which regulates the function of the ATPase. Additionally, the domain also interacts with various structural proteins, including the spectrin-binding domain of ankyrin III [1]. 25.00 25.00 46.20 45.50 24.60 23.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -8.12 0.72 -3.85 5 35 2009-01-15 18:05:59 2006-08-04 14:31:05 6 6 21 2 19 36 0 41.00 88 4.36 CHANGED GKAENYELYSVELGPGPGGDMAAKMS.KKKAGGGGGKKKEKL ..GKAENYELYSVELG.PGPGGDMAAKMS.KKK.AGGGGGK+KEKL...... 0 1 1 2 +8873 PF09041 Aurora-A_bind Aurora-A binding Mistry J, Sammut SJ anon pdb_1ol5 Domain The Aurora-A binding domain binds to two distinct sites on the Aurora kinase: the upstream residues bind at the N-terminal lobe, whilst the downstream residues bind in an alpha-helical conformation between the N- and C-terminal lobes. The two Aurora-A binding motifs are connected by a flexible linker that is variable in length and sequence across species. Binding of the domain results strong activation of Aurora-A and protection from deactivating dephosphorylation by phosphatase PP1 [1]. 20.20 20.20 21.60 23.00 19.50 17.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.46 0.72 -4.21 6 51 2009-01-15 18:05:59 2006-08-04 14:31:21 5 3 31 3 24 49 0 63.60 57 8.93 CHANGED MSpspsoYSaDAPosFINFSSLc--.DhcNhDSWFDcpANLENh.sspctluclhQspsshpKstLQp ......MSQspoSYSaDAPocFINFoSLc-EtDspNlDSWFEEKANLENKh.ucNGhutlaQuKTsLRKsslpp................. 0 2 3 7 +8874 PF09042 Titin_Z Titin Z Mistry J, Sammut SJ anon pdb_1h8b Domain The titin Z domain, that recognises and binds to the C-terminal calmodulin-like domain of alpha-actinin-2 (Act-EF34), adopts a helical structure, and binds in a groove formed by the two planes between the helix pairs of Act-EF34. This interaction is essential for sarcomere assembly [1]. 21.50 21.50 28.40 21.50 19.80 21.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.92 0.72 -4.36 13 282 2009-01-15 18:05:59 2006-08-04 14:33:34 6 47 33 1 104 290 0 41.60 41 1.23 CHANGED pEplR+Es...........EK.sAVspVVlAssKA+ppEslscspEphus+pEQ ..............pEpl+KEs..........................................................-K.sAVspVVlAssK.A+cpE.h.+spEthss+p-Q.............. 0 6 7 19 +8875 PF09043 Lys-AminoMut_A D-Lysine 5,6-aminomutase alpha subunit Mistry J, Sammut SJ anon pdb_1xrs Domain Members of his family are involved in the 1,2 rearrangement of the terminal amino group of DL-lysine and of L-beta-lysine, using adenosylcobalamin (AdoCbl) and pyridoxal-5'-phosphate as cofactors. The structure is predominantly a PLP-binding TIM barrel domain, with several additional alpha-helices and beta-strands at the N and C termini. These helices and strands form an intertwined accessory clamp structure that wraps around the sides of the TIM barrel and extends up toward the Ado ligand of the Cbl cofactor, providing most of the interactions observed between the protein and the Ado ligand of the Cbl, suggesting that its role is mainly in stabilising AdoCbl in the precatalytic resting state [1]. 25.00 25.00 70.20 70.10 17.50 17.30 hmmbuild -o /dev/null HMM SEED 509 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.65 0.70 -6.38 8 226 2009-01-15 18:05:59 2006-08-04 14:41:35 6 3 199 25 54 150 6 435.60 50 68.49 CHANGED LsLDhshV-+ARstA+cIAtssQcFI-pHTTVoVERTlsRLLGIDGVDspsVPLPNlVVDHlK-pssLspGAAhaluNAMlpTGpoPQEIAEuVAsGELDLpphPhp-ptcI+tshpslAptsV-+I+uNR+pRE-hlcph.G-tssPaLYVIVATGNIYEDVVQApAAARQGADVIAVIRoTuQSLLDYVPaGATTEGFGGTYATQENFRIMR+ALDEVGpElGRYIRLCNYCSGLCMPEIAAMGALERLDhMLNDALYGILFRDINMpRTLlDQaFSRlINGaAGIIINTGEDNYLTTADAhEcAHTVLASQhINEQFALhAGLP-EQMGLGHAFEM-P-LENGFLaELAQApMsREIFPKAPLKYMPPTKaMTGNIFKGHlQDAhFNlVolhTsQpIHLLGMLTEAIHTPFhpDRhLuIENAKYIFNNh+clusEIpFKcGGhIppRApEVLcKAhsLLEpIEpcGLFpuIE+GhFGsV+RPhDGGKGLsGVlpKstsYaNPFl-LM .................................................................................................................................................................................cYhcph.ss.p.s.sPhlhs....lAoGch.-DlcphRhAAhpGADhIhVIRTTGQShlDhl.EG.TsEGhGGs.hTpcphRh.RKAhD.lp-.EVGR.IphpsYsSGlshPElAlhhA.EGlsshhpDs.YslLaRsINhhRoaVDtt.u+plhAhAsIh..sGtcNh.sTA.c.t.h.csh.p.l....hVpchlNphaulhsGhPc-.IGLups.....sPss.sphhh-LsaAlhlR-LFs-h.h+h..sT+YhpuslhcuhhpcslssLlohLTutcIQ.shhPsEuhssPWhpspshulposK.sasuhcGlt-.lphpc-G.lschsR-lh-+AlshLpE.h..cs.s.G.......hFsAlEcGhFsD...........IsRstpGGhu.usVsERDsDYhsPsh-..th.... 0 31 42 52 +8876 PF09044 Kp4 Kp4 Mistry J, Sammut SJ anon pdb_1kpt Domain Members of this fungal family of toxins specifically inhibit voltage-gated calcium channels in mammalian cells. They adopt an alpha/beta-sandwich structure, comprising a five-stranded antiparallel beta-sheet with two antiparallel alpha-helices lying at approximately 45 degrees to these strands [1]. 25.00 25.00 26.30 26.20 24.30 24.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.86 0.71 -4.32 2 55 2009-01-15 18:05:59 2006-08-04 15:01:32 5 2 26 2 49 54 0 118.20 34 83.92 CHANGED .hLhhshh.ssstpLGINCRGSupC..hhsst.hh.hhRs.puhss.sphassGE+hApVs.....ssssuhsAalQs.sssshushcuhtHh..lspHGC+VCGSsP.....usNsVscGpLThNYV.Nu ........................................h.h....hshh.h.ss..suALGINCRGSuhC..s...s....u.....s.....ss....t...h..hphhttl.pshss...s+passGpcIACss..........s....t..su.......lCAF...hQsh.s.u.s...sssshpu..ltph..............Lh-HGCptCGSlP...stssNsVspGpLThNhV...s....... 0 7 24 37 +8877 PF09045 L27_2 L27_2 Mistry J, Sammut SJ anon pdb_1y76 Domain The L27_2 domain is a protein-protein interaction domain capable of organising scaffold proteins into supramolecular assemblies by formation of heteromeric L27_2 domain complexes. L27_2 domain-mediated protein assemblies have been shown to play essential roles in cellular processes including asymmetric cell division, establishment and maintenance of cell polarity, and clustering of receptors and ion channels. Members of this family form specific heterotetrameric complexes, in which each domain contains three alpha-helices. The two N-terminal helices of each L27_2 domain pack together to form a tight, four-helix bundle in the heterodimer, whilst the third helix of each L27_2 domain forms another four-helix bundle that assembles the two units of the heterodimer into a tetramer [1]. 20.90 20.90 21.00 21.40 20.00 18.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.67 0.72 -4.43 3 113 2009-01-15 18:05:59 2006-08-04 15:14:15 5 14 40 6 50 114 0 57.80 61 3.30 CHANGED DKpRALQAlERLQuKLKERGDVssEEKLSLL+SVLQSPLFsQILoLQpSlQQLKDQVN ....DKppsLQshERLQsKL+ERGDsupp-KLSlL+ssLQSPLFsQILoLQpSlpQLKcQls...... 0 6 9 22 +8878 PF09046 AvrPtoB-E3_ubiq AvrPtoB E3 ubiquitin ligase Mistry J, Sammut SJ anon pdb_2fd4 Domain The E3 ubiquitin ligase domain found in the bacterial protein AvrPtoB inhibits immunity-associated programmed cell death (PCD) when translocated into plant cells, probably by recruiting E2 enzymes and transferring ubiquitin molecules to cellular proteins involved in regulation of PCD and targeting them for degradation. The structure of this domain reveals a globular fold centred on a four-stranded beta-sheet that packs against two helices on one face and has three very extended loops connecting the elements of secondary structure, with remarkable homology to the RING-finger and U-box families of proteins involved in ubiquitin ligase complexes in eukaryotes [1]. 25.00 25.00 107.80 107.10 24.60 18.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.58 0.71 -4.12 4 25 2009-01-15 18:05:59 2006-08-04 15:14:50 5 1 22 1 2 34 0 122.10 75 30.16 CHANGED .tVVADIRAALc.IusQFsQLRTISKADAESp-.GF+DAAD.HPDDsTpCLFGEELSLSNPcQQVIGLAGpsTDhsQPYSQEuNKsLsFMDMKKLAQaLAuKPEHPMsR-pLsAcNIAKYAFRIVP ..............tVVsDI...RAALD.IusQFSQLRTISKADAESEELGF+DAAD.HP.DsATpCLFGEELSLSNPDQQVIGLAsNPTDpsQPYSQEsNKsLsFMDMKKLAQaLAsKPEHPhNRQpLDAcNIAKYAF+IVP 0 0 0 1 +8879 PF09047 MEF2_binding MEF2 binding Mistry J, Sammut SJ anon pdb_1n6j Domain The myocyte enhancer factor-2 (MEF2) binding domain, predominantly found in the calcineurin-binding protein CABIN 1, adopts an amphipathic alpha-helical structure, which allows it to bind a hydrophobic groove on the MEF2S domain, forming a triple-helical interaction. Interaction of this domain with MEF2 causes repression of transcription [1]. 21.50 21.50 21.90 34.60 21.30 19.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.30 0.72 -4.31 2 61 2009-01-15 18:05:59 2006-08-04 15:27:42 5 3 32 1 25 48 0 34.20 90 2.02 CHANGED TLLSPKGSISEETKQKLKssILSuQSAAss+K-oL ...TLLSPKGSISEETKQKLK......SAILSAQSAANVRKESL.... 0 2 5 12 +8880 PF09048 Cro Cro Mistry J, Sammut SJ anon pdb_1d1m Domain Members of this family are involved in the repression of transcription by binding as a homodimer to palindromic DNA operator sites in phage lambda: they repress genes expressed in early phage development and are necessary for the late stage of lytic growth. These proteins have a secondary structure consisting of three alpha-helices and three beta-sheets, and dimerise through interactions between the two antiparallel beta-strands [1]. 30.80 30.80 30.80 36.10 29.10 30.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.74 0.72 -4.33 5 146 2012-10-04 14:01:12 2006-08-04 15:37:13 5 1 126 17 5 68 1 59.00 66 89.33 CHANGED Mp..RITLuDYVt+aGQAKAA+DLGVtQuAISKAL+AGRcIhVolssDGSVhAEEVRPFPS ....Mc.RITLpDYAhRFG.QsKTAKDLGVhQSAIsKAI+AGRcIFLTlpuDG.S.VhAEEV+PFPS...... 0 0 1 4 +8881 PF09049 SNN_transmemb Stannin transmembrane Mistry J, Sammut SJ anon pdb_1zza Domain Members of this family consist of a single highly hydrophobic transmembrane helix that transverses the lipid bilayer at a 20 degree angle with respect to the membrane normal. They contain a conserved cysteine residue (Cys32) that, together with Cys34 found in the stannin unstructured linker domain, constitutes the putative trimethyltin-binding site that resides at the end of the transmembrane domain close to the lipid/solvent interface [1]. 25.00 25.00 49.90 49.40 20.50 19.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.58 0.72 -4.25 3 39 2009-01-15 18:05:59 2006-08-04 15:42:51 5 2 37 1 24 25 0 33.00 95 37.68 CHANGED MSIMDHSPTTGVVTVIVILIAIAALGALILGCW .MSIMDHSPTTGVVTVIVILIAIAALGALILGCW 0 1 3 8 +8882 PF09050 SNN_linker Stannin unstructured linker Mistry J, Sammut SJ anon pdb_1zza Domain Members of this family are unstructured, acting as connectors of the stannin helical domains. They contain a conserved CXC metal-binding motif and a putative 14-3-3-zeta binding domain. Upon coordinating dimethytin, considerable structural or dynamic changes in the flexible loop region of SNN may take place, recruiting other binding partners such as 14-3-3-zeta, and thereby initiating the apoptotic cascade [1]. 25.00 25.00 63.90 63.40 17.80 16.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.51 0.72 -6.97 0.72 -4.39 3 39 2009-01-15 18:05:59 2006-08-04 15:43:56 5 2 37 1 24 25 0 26.00 95 29.68 CHANGED CYLRLQRISQSEDEESIVGEGETKEP CYLRLQRISQSEDEESIVGDGETKEP 0 1 3 8 +8883 PF09051 SNN_cytoplasm Stannin cytoplasmic Mistry J, Sammut SJ anon pdb_1zza Domain Members of this family consist of a distorted cytoplasmic helix that is partially absorbed into the plane of the lipid bilayer with a tilt angle of approximately 80 degrees from the membrane normal. They interact with the surface of the lipid bilayer, and contribute to the initiation of the apoptotic cascade on binding of the unstructured linker domain to dimethyltin [1]. 25.00 25.00 34.70 39.20 24.90 24.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.42 0.72 -6.91 0.72 -4.21 5 37 2009-01-15 18:05:59 2006-08-04 15:44:50 5 1 35 1 24 23 0 26.50 87 30.25 CHANGED LLVQYSAKGP+VE+KTKL.TPNGTESH LLVQYSAKGPCVERKAKL.TPNGPEVH.... 0 1 3 8 +8884 PF09052 SipA Salmonella invasion protein A Mistry J, Sammut SJ anon pdb_2fm8 Domain Salmonella invasion protein A is an actin-binding protein that contributes to host cytoskeletal rearrangements by stimulating actin polymerisation and counteracting F-actin destabilising proteins. Members of this family possess an all-helical fold consisting of eight alpha-helices arranged so that six long, amphipathic helices form a compact fold that surrounds a final, predominantly hydrophobic helix in the middle of the molecule [1]. 25.00 25.00 50.80 50.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 674 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -13.09 0.70 -6.13 3 143 2009-01-15 18:05:59 2006-08-04 15:52:32 5 2 130 10 3 112 0 584.70 80 99.48 CHANGED MlTussTQAPshLsussosouTpASsLuupLSDVR.SuoToLusctSlu-LESFsAsasQ+SLDsLFuuSs+ADsL+ElYoNSsNsYAKpEIhEFAsVapSLl+QsuLsPEAcKsLpKluAQYoApIIKDGLuEKSAFGPWos+TKKtaQLRpNLE++LA-IAppHTuGEApKLGscLlpsEVooFItSCIEscLGpoLDslTSppLTcLVDuAAtpAFEuLRppRpcLI-p+GFSVG+LARDL-TVAVlPpLLRsVLssI..sPuD+tP-psuhssPscPpPuuGPsPuGsGKsucPstIHYHIN..IcssN+ShDNR+asNpucoalssup...RHlDNSsH-NScpsAsssTotosD.LsRNGpSlLSssuSssutpHuLVsuVTp...........slsHSISGpVDssAssoA.E+VhNsou-ucDGtVhhutlGSDGLTTS.pEhsAlsS+ScsG+PLpuss+uVsD...........sL+PlhothsGsEsVKouTssSsDTstSGschpp.sAGpssssNSsTDusGsFoGl+FRsGshYhTlPTls.h+uht..F-AccclLsulRsALEPsuopPhsQRREF-uLRs-ILPSDTh+psslKs+soDuschscLs-.cA-TL+cslssHPthEK....L+ElApsLuREAsLo+lccsos.LLoslLD..GLpuDs-hRAuPs...hsuKPsssuVlpTlDGLH ................MQTEIKTQATNLAANLSAVRESATsTLSGEIKGPQLEDFPALIKQASLDALFKCGKDAEALKEVFTNSNNVAGKKAIMEFAGLFRSALNATSDSPEAKTLLMKVGAEYTAQIIKDGLKEKSAFGPWLPETKKAEAKLENLEKQLLDIIKNNTGGELSKLSTNLVMQEVMPYIASCIEHNFGCTLDPLTRSNLTpLlDtA.AAKAVcALDMCHQKLTQEQGTSVGREARHLEMQTLIPLLLRNVFAQI...PADKLPD......PKIPEPAAGPVPDG.GKKAEPTGINININ..IDSSNHSVDNSKHINNSRSHVDNSQ...RHIDNSNHDpSRKTIDNSRTFIDN.SQRpGESHHSTNSSNVSHSHSRVDSTTHQTETAHSASTGAIDHGIAGKIDlTAHATA.EAVTNASSESKDGKVVTSEKGTTGETTSFDEVDGVTSKSIIGKPVQATVHGVDDNKQQSQTAEIVNVKPLASQLAGVENVKhDTLQSDoTVITGNK.....AGTTDNDNSQTDKTGPFSGLKFKQNSFLSTVPSVTNMHSMH..FsAREsFLGVIRKALEPDTSTPFPVRRAFDGLRAEILPNDTIKSAALKAQCS....DIDKHPELKA.KM-TLKEVITHHPQKEK....LAEIALQFAREAGLTR.KGETDYVLSNVLD..GLIGDGSWRAGPAYESYLNKPGVDRVITTVDGLH..... 0 1 1 2 +8885 PF09053 CagZ CagZ Mistry J, Sammut SJ anon pdb_1s2x Domain CagZ is a 23 kDa protein consisting of a single compact L-shaped domain, composed of seven alpha-helices that run antiparallel to each other. 70% of the residues are in alpha-helix conformation and no beta-sheet is present. CagZ is essential for the translocation of the pathogenic protein CagA into host cells [1]. 25.00 25.00 134.40 134.30 19.30 19.20 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.42 0.71 -4.99 3 75 2009-01-15 18:05:59 2006-08-04 16:00:15 5 1 40 1 1 58 0 195.10 97 99.99 CHANGED MELGFNETERQKILDSN+SLMGNANEVRDKFIQNYATSLKDSNDPQDFLRRVQELRINMQKNFISFDsYYNYLNNLVLASYNRCKQEKTFAESTIKNELTLGEFVAEISDNFNNFMCDEVARISDLVASYLPREYLPPFIDGNMMGVAFQILGIDDFGRKLNEIVQDIGTKYIILSKNKTYLTSLERAKLITQLKLNLE MELGFNEAERQKILDSN+SLMGNANEVRDKFIQNYAoSLKDSNDPQDFLRRVQELRINMQKNFISFDAYYNYLNNLVLASYNRCKQEKTFAESTIKNELTLGEFVAEISDNFNNFMCDEVARISDLVASYLPREYLPPFIDGNMMGVAFQILGIDDFGRKLNEIVQDIGTKYIILSKNKTYLTSLERAKLITQLKLNLE..... 0 1 1 1 +8887 PF09055 Sod_Ni Nickel-containing superoxide dismutase Mistry J, Sammut SJ anon pdb_1t6i Domain Nickel containing superoxide dismutase (NiSOD) is a metalloenzyme containing a hexameric assembly of right-handed 4-helix bundles of up-down-up-down topology with an N-terminal His-Cys-X-X-Pro-Cys-Gly-X-Tyr motif that chelates the active site Ni ions. NiSOD catalyses the disproportionation of superoxide to peroxide and molecular oxygen through alternate oxidation and reduction of Ni, protecting cells from the toxic products of aerobic metabolism [1]. 25.00 25.00 49.20 49.10 19.70 18.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.85 0.71 -3.76 25 144 2009-09-10 20:18:48 2006-08-04 16:03:13 6 4 139 81 73 148 532 126.20 43 79.80 CHANGED ssspVpA.......HCDlPCGlYDPusARltu......olhphhcclp-lsst..........st.tsphsRhlshKEp+ApclKcclhllWoDYFKss+l-..........pYPcLH-lhapshhtuuts..KtslDhspApcLlstlsclschFWpoK .............s.spVpA.......HCDlPCGVYDPApARIcAE.....oVcuhpcKhpu.css..............shhsRhlhIKEp+AchsK+cl.lLWoDYFKssHhE.............cYPcLHpLhpcshKhsuAu....KsssDsupupcLLshIscIscIFWcTK................................ 0 32 54 68 +8888 PF09056 Phospholip_A2_3 Prokaryotic phospholipase A2 Mistry J, Sammut SJ anon pdb_1faz Domain The prokaryotic phospholipase A2 domain is predominantly found in bacterial and fungal phospholipases, as well as various hypothetical and putative proteins. It enables the liberation of fatty acids and lysophospholipid by hydrolysing the 2-ester bond of 1,2-diacyl-3-sn-phosphoglycerides. The domain adopts an alpha-helical secondary structure, consisting of five alpha-helices and two helical segments [1]. 26.10 26.10 26.10 26.50 26.00 25.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.86 0.72 -3.86 18 148 2009-01-15 18:05:59 2006-08-04 16:06:21 6 9 113 5 85 147 14 106.50 36 37.65 CHANGED TDphlFuhslssFsssRssp..sPup.L..DWooDGCSpuP.......DsPhGF...sFtsuCpRHDFGYRNY+tQsRFo.ss+t+..IDssFhpDhhptC...sthssh...htssCctsApsYYpAVRtFG ..................................t.shsta.stRpsp...sst..h..sWooDsCSsu..P..........Dp....PhGa....sF...tsuCpRHDFG.......Y....RNa....+.....t.....t...s+....F....ot....ss+.p+..lDssFppDLhphC..........st.st.............tpssCcu.hAtsYYtAV+thG.......................... 0 22 50 73 +8889 PF09057 Smac_DIABLO Second Mitochondria-derived Activator of Caspases Mistry J, Sammut SJ anon pdb_1g73 Domain Second Mitochondria-derived Activator of Caspases promotes apoptosis by activating caspases in the cytochrome c/Apaf-1/caspase-9 pathway, and by opposing the inhibitory activity of inhibitor of apoptosis proteins (XIAP-BIR3). The protein assumes an elongated three-helix bundle structure, and forms a dimer in solution [1]. 25.00 25.00 29.90 27.90 24.60 24.20 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.43 0.70 -5.24 4 112 2009-01-15 18:05:59 2006-08-04 16:07:38 5 3 52 23 36 107 0 189.70 51 94.57 CHANGED p+L.tuss.lL+aptslls..sus.+pRplphL.ssaRK.sloluVGsulCAVPhsQ+sE..sLSpEuLlRRAsSLVTDSusTFLSQTThALl-ulTpYsKAVaTLlSLp+pYpu.luKhsspEEsuIWQVIIGtRsEhps++cphh+aESsWMsAVsLSEhAAEAAYpoGADQAslss+splQlsQoQVEpl+plu+cAEhpLA-opsEElcRhhp............p.ulp-tE-lPEAYLRED ..........................................................................................................t....hpp.hh.hshsh.slCAlPht.Q....p.-..sLSp-uLhRRAsSLVTDSooTFLSQTThALI-AlTEYoKAVYTLlSLh+pYtuhLGKhsupEEDpVWQVIIGtRsEhos+ppEhh+hEooWhoAV.sLSEhAAEAAYpoGADQASlTs+splQlspsQVpcs+pLStcAEpKLAEspspEl..cphtp...............p...t..ttp.t-t..EAYLRED............................. 0 5 9 19 +8890 PF09058 L27_1 L27_1 Mistry J, Sammut SJ anon pdb_1rso Domain The L27 domain is a protein interaction module that exists in a large family of scaffold proteins, functioning as an organisation centre of large protein assemblies required for the establishment and maintenance of cell polarity. L27 domains form specific heterotetrameric complexes, in which each domain contains three alpha-helices [1]. 21.60 21.60 21.60 23.00 20.60 21.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.99 0.72 -4.09 4 147 2009-01-15 18:05:59 2006-08-04 16:10:09 5 21 76 3 67 170 0 59.10 57 8.95 CHANGED MPV++pDspRALpLLE-YpS+LSpstDctLRouIERVIsIFpSsLFQALlDIQEFYElTLhD.ss .......MPl++p.-spRALcLLE-Y+u+Lo.p.............spD+pLRpuIERVIsIFpSsLFQAL.l..DIQ..EFYElTLLDs............. 1 24 30 48 +8891 PF09059 TyeA TyeA Mistry J, Sammut SJ anon pdb_1xl3 Domain Members of this family are composed of two pairs of parallel alpha-helices, and interact with the bacterial protein YopN via hydrophobic residues located on the helices. Association of TyeA with the C terminus of YopN is accompanied by conformational changes in both polypeptides that create order out of disorder: the resulting structure then serves as an impediment to type III secretion of YopN [1]. 21.10 21.10 21.40 21.50 21.00 20.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.67 0.72 -4.05 7 103 2009-01-15 18:05:59 2006-08-04 16:28:52 5 2 94 2 16 55 1 84.90 45 49.81 CHANGED MAYssS-LMuDlIALVEcRWsusp-lpplssAhpLsssppplpFapEL++LlRhlPl-VFuDEEQRQNLlpAsQtALDtAI-pEEEE .......MAYt.S-LMuDllALl-cRWlusp-lppLusuhsL..ssscpclpFap-L++lhRhlPlpVFuD-EQRQNLLpusQpAlD.AI-pEEEp.... 0 7 9 11 +8892 PF09060 L27_N L27_N Mistry J, Sammut SJ anon pdb_1vf6 Domain The L27_N domain plays a role in the biogenesis of tight junctions and in the establishment of cell polarity in epithelial cells. Each L27_N domain consists of three alpha-helices, the first two of which form an antiparallel coiled-coil. Two L27 domains come together to form a four-helical bundle with the antiparallel coiled-coils formed by the first two helices. The third helix of each domain forms another coiled-coil packing at one end of the four-helix bundle, creating a large hydrophobic interface: the hydrophobic interactions are the major force that drives heterodimer formation [1]. 25.00 25.00 26.20 25.20 23.80 16.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.30 0.72 -4.16 4 63 2009-01-15 18:05:59 2006-08-04 16:39:47 5 6 39 8 31 52 0 48.90 65 7.50 CHANGED ElE-LL.SLKplp+sLsDsQSQpDlELlhQLlppsDFQsAapIHNAVAt ...ElEDLhSSLKHIQHTLVDSQSQEDIuLLLQLVQNpDFQNAFKIHNAVo.s 0 2 5 13 +8894 PF09062 Endonuc_subdom PI-PfuI Endonuclease subdomain Sammut SJ anon pdb_1dq3 Domain The endonuclease subdomain, found in the prokaryotic protein ribonucleotide reductase, assumes an alpha-beta-beta-alpha-beta-beta-alpha-alpha topology. The four stranded beta-sheet forms a saddle-shaped surface and assembles together through an interface made of alpha-helices. The presence of 14 basic residues on the surface of the beta-sheets suggests that this large groove may be involved in DNA binding [1]. 24.40 24.40 24.70 38.40 23.20 24.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.50 0.72 -3.24 3 5 2012-10-03 01:41:40 2006-08-07 09:45:50 5 3 5 1 3 5 0 87.00 66 4.99 CHANGED PDGEDYEFIFDYWLAGFIAGDGslDKY+SHVKGHEYlYDRLRIYDYphEThtIINDaLEKTFG++YSlQ+DRNIaYIDIKARsITSHYlELL-GI-NG PDGEDYEFIFDYWLAGFIAGDGsFDKY+SHVKGHEYIYDRLRIYDYRlETFEIINDYLEKTFG++YSlQ+DRNIYYIDIKARsITSHYlKLL-GIDNG....... 0 1 1 2 +8895 PF09063 Phage_coat Phage PP7 coat protein Sammut SJ anon pdb_1dwn Domain Members of this family form the capsid of P. aeruginosa phage PP7. They adopt a secondary structure consisting of a six stranded beta sheet and an alpha helix [1]. 25.00 25.00 25.30 283.20 24.00 17.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.78 0.71 -4.34 2 2 2009-01-15 18:05:59 2006-08-07 10:03:32 5 1 1 17 0 4 0 127.00 96 99.61 CHANGED SKTIVLSVGEATRTLTEIQSTADRQIFEEKVGPLVGRLRLTASLRQNGAKTAYRVNLKLDQADVVDCSTSVCGELPKVRYTQVWSHDVTIVANSTEASRKSLYDLTKSLVspupsEDLVVNLVPLGR SKTIVLSVGEATRTLTEIQSTADRQIFEEKVGPLVGRLRLTASLRQNGAKTAYRVNLKLDQADVVDCSTSVCGELPKVRYTQVWSHDVTIVANSTEASRKSLYDLTKSLVspupsEDLVVNLVPLGR 0 0 0 0 +8896 PF09064 Tme5_EGF_like Thrombomodulin like fifth domain, EGF-like Sammut SJ anon pdb_1dx5 Domain Members of this family adopt a fold similar to other EGF domains, with a flat major and a twisted minor beta sheet. Disulphide pairing, however, is not of the usual 1-3, 2-4, 5-6 type; rather 1-2, 3-4, 5-6 pairing is found. Its extended major sheet (strands beta-2 and beta-3 and the connecting loop) projects into thrombin's active site groove. This domain is required for interaction of thrombomodulin with thrombin, and subsequent activation of protein-C [1]. 21.40 21.40 21.70 22.20 21.30 20.40 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -7.98 0.72 -4.34 6 46 2012-10-03 09:47:55 2006-08-07 11:18:30 5 20 34 0 27 41 0 34.80 55 7.17 CHANGED MFCNQTsCPADCDPNosu..sChCPEGYILD-Gs...lC ..MFCNpTsCPA.DCDPNs.s...sCpCPEGYILD-G....hC........ 1 1 4 11 +8897 PF09065 Haemadin Haemadin Sammut SJ anon pdb_1e0f Domain Members of this family adopt a secondary structure consisting of five short beta-strands (beta1-beta5), which are arranged in two antiparallel distorted sheets formed by strands beta1-beta4-beta5 and beta2-beta3 facing each other. This beta-sandwich is stabilised by six enclosed cysteines arranged in a [1-2, 3-5, 4-6] disulphide pairing resulting in a disulphide-rich hydrophobic core that is largely inaccessible to bulk solvent. The close proximity of disulfide bonds [3-5] and [4-6] organises haemadin into four distinct loops. The N-terminal segment of this domain binds to the active site of thrombin, inhibiting it [1]. 25.00 25.00 26.00 71.90 23.30 15.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.30 0.72 -4.36 2 2 2009-01-15 18:05:59 2006-08-07 11:44:31 5 1 1 3 0 3 0 27.00 100 46.55 CHANGED CDCGEKICLYGQSCNDGQCSGDPKPSS CDCGEKICLYGQSCNDGQCSGDPKPSS 0 0 0 0 +8898 PF09066 B2-adapt-app_C Beta2-adaptin appendage, C-terminal sub-domain Sammut SJ anon pdb_1e42 Domain Members of this family adopt a structure consisting of a 5 stranded beta-sheet, flanked by one alpha helix on the outer side, and by two alpha helices on the inner side. This domain is required for binding to clathrin, and its subsequent polymerisation. Furthermore, a hydrophobic patch present in the domain also binds to a subset of D-phi-F/W motif-containing proteins that are bound by the alpha-adaptin appendage domain (epsin, AP180, eps15) [1]. 21.10 21.10 21.40 21.10 21.00 20.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.29 0.71 -4.20 31 409 2009-01-15 18:05:59 2006-08-07 12:37:55 5 12 142 8 246 427 5 109.50 33 13.65 CHANGED hs.D...uplspppF.phWpsl.spss..-hphphp...thss.pshpptLpspslahlApppsss...phhaaos+tssshhhlhElthpssssphplslKspss..chsthhhphhcpllp ..........................hsE-...Gph..-+phFltTW+...sl.Psps.......Ehpaphp.......s.hs..s....-.slps+L.pssNlaTIA+Rssps...pchhY....ShKh...s...s...Gl.hhLsELplp.s...us.s.s...h....p.....lolKscss.....Essphhhpsh-sll..................................................... 0 86 125 180 +8899 PF09067 EpoR_lig-bind Erythropoietin receptor, ligand binding Sammut SJ anon pdb_1eer Domain Members of this family interact with erythropoietin (EPO), with subsequent initiation of the downstream chain of events associated with binding of EPO to the receptor, including EPO-induced erythroblast proliferation and differentiation through induction of the JAK2/STAT5 signaling cascade. The domain adopts a secondary structure composed of a short amino-terminal helix, followed by two beta-sandwich regions [1]. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.61 0.72 -4.03 12 520 2012-10-03 16:25:20 2006-08-07 13:34:30 5 16 109 41 149 538 0 98.30 31 19.36 CHANGED Spsu..hhstcsc.hpCFopshcDFTCFW-tspssshs...YshhYphps.-phppCslhppssssu...h.hChFsps.-splaV.hclpV.spssttspasR.lsV- ...........ss.......stcPchhpChS.phETFoCaWpsG...s..p..ss..l...s..os...apLhY...........p......p....c......s......p............p............h+E.C.P..-Ypsuus......so.......CaFspp..pTolWhsYplpVtsssphss..pc................................... 0 7 22 58 +8900 PF09068 EF-hand_2 efhand_1; EF_hand_2; EF hand Sammut SJ anon pdb_1eg3 Domain Members of this family adopt a helix-loop-helix motif, as per other EF hand domains. However, since they do not contain the canonical pattern of calcium binding residues found in many EF hand domains, they do not bind calcium ions. The main function of this domain is the provision of specificity in beta-dystroglycan recognition, though in dystrophin it serves an additional role: stabilisation of the WW domain (Pfam:PF00397), enhancing dystroglycan binding [1]. 21.10 21.10 21.10 21.60 20.90 19.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.71 0.71 -4.06 30 673 2012-10-02 16:17:27 2006-08-07 14:46:03 6 51 101 2 277 608 0 121.10 42 11.54 CHANGED chs-LhpsL..tDLNslRaSuYRTAhKLRtlQKthp.........LcLlsltslhcsFccpsLpp...............Dps.....lsVsclhssLoslY...................................................ppLspchsshs.sls............................................................................................................................hslDhhLNaLLNVYDssRsG+IpVLShKhulssLC ......................hppLatph....tcLsslR..hSsYRTAhKLRhlQKths..................................LcLlsl.ss.hEuhccpsLpp.......................Dps.....lsVsc.l.ssLoolY....................................................pLppchsshh.pls............................................................................................................................hslshhLNaLLssYD..........o..t..psG+lpVhShKsuLhoLC................................................................................................................ 0 64 87 168 +8901 PF09069 EF-hand_3 efhand_2; EF-hand Sammut SJ anon pdb_1eg3 Domain Members of this family adopt a helix-loop-helix motif, as per other EF hand domains. However, since they do not contain the canonical pattern of calcium binding residues found in many EF hand domains, they do not bind calcium ions. The main function of this domain is the provision of specificity in beta-dystroglycan recognition, though in dystrophin it serves an additional role: stabilisation of the WW domain (Pfam:PF00397), enhancing dystroglycan binding [1]. 21.70 21.70 21.70 28.00 20.20 21.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.96 0.72 -3.91 30 641 2012-10-02 16:17:27 2006-08-07 14:54:41 6 48 99 2 269 581 0 90.00 48 8.40 CHANGED Lp-KaRYlFpQl.......u-ssGhhDpp+LulhL+-slplP+plGEssuFGsp..lEsSVRSCFpts....t.......................csclph.....spFL-WhphE..PQolVWLPlLHRlA ...........l.DKhRY.lFppl.......usosGhhspp+Ls.hL+-slplPptlhEssoFGh.....hEsSVRSCFpt........................................p.cIph.....stFLDhhhh-...P...Qs.h.......V.WLPlLHRlA................................... 0 59 79 159 +8902 PF09070 PFU PFU (PLAA family ubiquitin binding) Mistry J anon Pfam-B_5813 (release 20.0) Domain This domain is found N terminal to Pfam:PF08324 and binds to ubiquitin [1]. 20.90 20.90 21.10 22.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.71 0.71 -4.08 39 348 2009-01-15 18:05:59 2006-08-07 15:24:39 6 21 290 7 246 358 3 111.50 39 15.52 CHANGED ppsGp+EGpshhl+ss..GslEAYpWo..pupW.KIG-VVs..u....suss......stKthacG+cYDYVFDVDlp-GtPsLKLPYNhs-NPatsAp+Flp+p-..Ls.sYh-QVspFIhpN..TpGssls ................................................t.psGp+-GQsphl..+ps..splpAapWo......ptpW.plGsVVs..u..........sstp........utKhhapGc-YDYV..FsVDlp...-....G..tP...slKLPYNh...u-sPatsApcFlpcNc..Ls.s.......YlDQVspFIhpNTpu.t..s............ 0 91 141 207 +8903 PF09071 Alpha-amyl_C Alpha-amylase, C terminal Sammut SJ anon pdb_1eh9 Domain Members of this family, which are found in the prokaryotic protein glycosyltrehalose trehalohydrolase, assume a gamma-crystallin-type fold with a five-stranded anti-parallel beta-sheet that packs against the C-terminal side of a beta-alpha barrel. This domain is common to family 13 glycosidases and typically contains a five to ten strand beta-sheet, however its precise fold varies [1]. 20.80 20.80 22.40 59.80 19.80 17.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.95 0.72 -4.22 5 18 2009-01-15 18:05:59 2006-08-07 15:32:08 5 3 18 8 4 16 0 66.80 58 12.17 CHANGED CcR+lEVcsG-NWLTlptcKlhslauFScSVIplKYoGsLLlSS.sSFP++IscuK.h+l-KGFGlYK .CNRKLoVENGNaWLTVKGNGYLlVYVFSpSlIEMKYRGTLVLSSNNSFPSQIsEsK.Y+L-KGFALYK. 1 1 1 3 +8904 PF09072 TMA7 Translation machinery associated TMA7 Mistry J, Wood V anon manual Family TMA7 plays a role in protein translation. Deletions of the TMA7 gene results in altered protein synthesis rates [1]. 18.80 18.80 20.80 20.70 18.70 17.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.03 0.72 -3.51 24 302 2009-01-15 18:05:59 2006-08-07 15:37:21 5 6 232 0 204 302 0 61.00 54 81.09 CHANGED uuRpGGKtKPLKAPKKppp-hDE-DlAFKpKQ+--pKAhKthtsKAtttGPLssu..GIKKSGKK .............puRpGGKtK.PLKtPKKppK.E.h........DE-Dh...AFKpKQK--pKAhcEhtsKA.t.tKGPLs.sG..GIKKS.GKK............... 0 57 97 157 +8905 PF09073 BUD22 BUD22 Mistry J, Wood V anon manual Family BUD22 has been shown in yeast to be a nuclear protein involved in bud-site selection. It plays a role in positioning the proximal bud pole signal [1]. More recently it has been shown to be involved in ribosome biogenesis [2,3]. 37.00 37.00 37.00 37.00 36.60 36.40 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -12.79 0.70 -5.41 26 209 2009-01-15 18:05:59 2006-08-07 16:03:09 5 4 184 0 156 214 2 308.60 23 71.15 CHANGED Khcpuhpp...LppsLKpApth-hpKLh++.+th.tt.......................ssshscltcplphh+ph...shpphscthlhppLhK........................s.thh.stthhp...hhp..cKsttspssc.htp...t.t..sslsupLhssK.l+shhsslhpslctlhGhpsptttppcptpsppsppspppp............................ttppppppttcup-ssspcpstc-.....................ssss-tcpps-p...............thspaDsh.....lssusp-pp...........................................t.scspspscscppspcsSpp.tss.s.ppt..sspKKtKtpp...........tpph.LPpL.hsGYaSGu-s-spp.t............-hsspth..............pRKNRhGQ+ARptlWEKKYGppApHlpc.....cpE+ptpcppt.........Rptta-t..Rputp.......................tucsss.ttspspthts+.ppstpstppp.................LHPSWpAK+tAc-pt.t..sApFpGKKlsFD ..............................................................................................................................................................................h......h..........................................................................h.t.h.t.....h...h..h...h......t.......t....tt.tt.....................................................................t.............t......................................t................................tt..................sttptt...................................................................................p...tt.......t...................t..........................p...c.............................................lPpL....suahpsspst..t..........t.t................................cKNRhGQpARptlhEpKaGttApHltp........t...t...t.................................pp.thc...+psth......................................................tttttt.tt..ppttthttp..t.tt.s.ttttp.p......t................LHPSWcAp++tKEpt.p.......s...sFp..G....KKIsFD................................................ 0 41 78 126 +8906 PF09074 Mer2 Mer2 Mistry J, Wood V anon manual Family Mer2 (Rec107) forms part of a complex that is required for meiotic double strand DNA break formation. Mer2 increases in abundance and is phosphorylated during the prophase phase of cell division [1]. Blocking double strand break formation results in delayed dephosphorylation and dissociation of Mer2 from the chromosome [1]. 27.70 27.50 27.70 27.90 27.40 27.10 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.38 0.71 -4.37 6 18 2009-12-23 15:34:04 2006-08-07 16:30:48 5 1 18 0 11 19 0 183.10 29 57.19 CHANGED us.shuopssscshputcs.........slsEuD+QIlEWAGKLELESl-L+E.pu-pLhsllsppsppLhhsstplsphL.ppp.....tuppcslcslh.slusplsNpLp-shpulppphcshpt..pp.htt...............h.sspchs+hsu.pshcIIpp.-.stt.ch.Kuhcshp-hlhNhusQLEshptlhlSlS+pL+s....LpsRpsshc ...................................o.sssshpph.s.........slpEuDKQILEWAGKLELESh-LRE.poscLlplLpcNScpLhpshpphsphL.p.c.....tupptslcphhcsLssplpspLccs.psh.spscphps..ppthtp................l.hspclp+hss....+Ih+php.s+Qpch.KShcsTQchlaNlssQLEchpcVLlShS+-hcs....LpsRQssLc.......................................................................................... 0 1 7 11 +8907 PF09075 STb_secrete Heat-stable enterotoxin B, secretory Sammut SJ anon pdb_1ehs Domain Members of this family assume a helical secondary structure, with two alpha helices forming a disulphide crosslinked alpha-helical hairpin. The disulphide bonds are crucial for the toxic activity of the protein, and are required for maintenance of the tertiary structure, and subsequent interaction with the particulate form of guanylate cyclase, increasing cyclic GMP levels within the host intestinal epithelial cells [1]. 25.00 25.00 128.80 128.50 18.70 17.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.40 0.72 -4.11 2 6 2009-01-15 18:05:59 2006-08-08 09:28:43 5 1 3 1 0 5 0 48.00 98 71.46 CHANGED STQSNKKDLCEpYRQIAKESCKhGFLGVRDGTAGACFGAQIMVAAKGC STQSNKKDLCEHYRQIAKESCKKGFLGVRDGTAGACFGAQIMVAAKGC 0 0 0 0 +8908 PF09076 Crystall_2 Sklp_toxin; Beta/Gamma crystallin Sammut SJ, Eberhardt R anon pdb_1f53 Domain Members of this family assume a beta-gamma-crystallin fold [1,2], wherein nine beta-strands are connected by loop, and are separated into two sheets, each sheet forming the Greek key motif. The two Greek key motifs face each other in the global topology. The three-dimensional structure of the molecule is a 'sandwich'-shaped beta-barrel structure: hydrophobic side-chains are packed in the large interface area of the beta-sheets. In Streptomyces killer toxin-like protein domain confers a cytocidal effect to the toxin, causing cell death in both budding and fission yeasts, and morphological changes in yeasts and filamentous fungi [1]. This family also includes chitin-biding antifungal proteins [2-3]. 25.00 25.00 25.30 45.10 22.60 20.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.35 0.72 -4.21 10 18 2012-10-01 23:14:22 2006-08-08 10:32:47 5 1 15 2 5 22 0 71.90 35 65.99 CHANGED cIcsHtssspuh.CaANuGshshuh.s......Vs+ISTGNNhVsaphssGs.lpht+hpslTa.NhsshVssh-lh .......plhoHhssppSh.CaANtGphsFuhhs......VD+ISTGNNhlpapsssGsp..lphs..+apslTaPN+PstVssIcIh........ 0 1 5 5 +8909 PF09077 Phage-MuB_C Mu B transposition protein, C terminal Sammut SJ anon pdb_1f6v Domain The C terminal domain of the B transposition protein from Bacteriophage Mu comprises four alpha-helices arranged in a loosely packed bundle, where helix alpha1 runs parallel to alpha3, and anti-parallel to helices alpha2 and alpha4. The domain allows for non-specific binding of Mu to double-stranded DNA, allowing for integration into the bacterial genome, and mediates dimerisation of the protein [1]. 25.00 25.00 26.70 26.70 22.70 20.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.49 0.72 -4.31 15 110 2009-01-15 18:05:59 2006-08-08 10:54:26 6 3 100 1 15 94 1 77.00 46 25.15 CHANGED SRlAK+suIpKoKKuDVpAlApAWsl..ps-cthplhppIupK.PGuLRlLo+TLRLAuMsAcGcs.s.ls.chlptAap-L .SRlA+pptlpKsKKuDVpAIAcAWsl..ss-pEhplhppIupK..PGALRlLo+TL+LAshsApGcGts.lspcalptAa+El.... 0 4 9 14 +8910 PF09078 CheY-binding CheY binding Sammut SJ anon pdb_1ffg Domain Members of this family adopt a secondary structure consisting of an open-face beta/alpha sandwich, with four antiparallel beta-strands and two alpha-helices. They bind to a corresponding domain on CheY, with subsequent phosphorylation of the CheY Asp57 residue, and activation of CheY, which then affects flagellar rotation [1]. 21.10 21.10 21.30 30.50 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.87 0.72 -3.96 30 622 2009-01-15 18:05:59 2006-08-08 13:08:00 6 8 596 13 79 360 5 64.90 65 9.76 CHANGED LRIpLoplcsp-h-LLpEELGNLGslsssp+uu-oLsshLsoslupDDIsAVhCFVI-s-QIshp ...RIlLSRLKAsEVDLLEEELGpLsTLTDVVKGADSLSAhLsGslAEDDIsAVLCFVIEADQIsFE.......... 0 6 28 52 +8911 PF09079 Cdc6_C CDC6, C terminal Sammut SJ anon pdb_1fnn Domain The C terminal domain of CDC6 assumes a winged helix fold, with a five alpha-helical bundle (alpha15-alpha19) structure, backed on one side by three beta strands (beta6-beta8). It has been shown that this domain acts as a DNA-localisation factor, however its exact function is, as yet, unknown. Putative functions include: (1) mediation of protein-protein interactions and (2) regulation of nucleotide binding and hydrolysis. Mutagenesis studies have shown that this domain is essential for appropriate Cdc6 activity [1]. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.61 0.72 -4.26 83 705 2009-01-15 18:05:59 2006-08-08 13:37:42 6 7 285 10 384 699 139 80.40 22 15.43 CHANGED LhAllhhtppsp...p.hssuclachYpplscp.hsh..cslop.cclpshls-LchhGllpsch.spG..p.tG+ppcl..pLshs..pplhcs.....l ........................Lhullhhhcppt.....cphshsc....lachYppl.Ccp..hsh..pslsp.pchhshl.scLc.thGll.phpp......ptpuphpcl....pLp.hs...cplhtsl........................ 0 83 216 312 +8912 PF09080 K-cyclin_vir_C K cyclin, C terminal Sammut SJ anon pdb_1g3n Domain Members of this family adopt a secondary structure consisting of a five alpha-helix cyclin fold. Interaction with cyclin dependent kinases (CDKs) at a PSTAIRE sequence motif within the catalytic cleft of CDK results in the regulation of CDK activity [1]. 25.30 25.30 25.70 134.40 24.10 25.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.45 0.72 -3.84 2 10 2009-01-15 18:05:59 2006-08-08 14:06:09 5 1 6 2 0 9 0 103.90 69 40.57 CHANGED AVLsTDshu.hLhK.hhspppL.hhHppVsp.lpKAlVsPtTGuLPsSlluAA.CALhs.usshP.ss.......pLAphlGsssusLtAAsEplhTolp-FD..RI AVLATDVTSFLLLKLLGGSQHLDFWHHEVNTLITKALVDPKTGSLPASIISAAGCALLVPANVIPQDTHSGGVVPQLASILGCDVSVLQAAVEQILTSVSDFDL.RI... 0 0 0 0 +8913 PF09081 DUF1921 Domain of unknown function (DUF1921) Sammut SJ anon pdb_1gcy Domain This domain, which is found in a set of prokaryotic amylases, has no known function [1]. 25.00 25.00 29.30 89.70 24.80 17.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.29 0.72 -4.00 5 11 2009-09-11 11:07:02 2006-08-08 14:32:33 5 2 11 9 3 20 0 50.80 71 9.46 CHANGED oGFSGLVATlSGSsQpLVhALDSNLSSPuQVASGSFSpAlNpDNGplRIWR SGYSGLVATVoGSQQTLVVALNSNLSNPGQVASGSFSEAVNsSNGQVRVWR 0 1 2 3 +8914 PF09082 DUF1922 Domain of unknown function (DUF1922) Sammut SJ anon pdb_1gh9 Domain Members of this family consist of a beta-sheet region followed by an alpha-helix and an unstructured C-terminus. The beta-sheet region contains a CXCX...XCXC sequence with Cys residues located in two proximal loops and pointing towards each other. This precise function of this set of bacterial proteins is, as yet, unknown [1]. 20.70 20.70 21.80 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.39 0.72 -3.91 3 19 2009-01-15 18:05:59 2006-08-08 14:42:03 5 1 19 1 16 17 1 66.50 36 78.59 CHANGED YlIFRCDCGRVLYSKEGsKTRKCVCGKTlNVKcRRIFK+A-opEEASEsVQcMQEEIYGuocF+sAS..E ..............YhIFRC.c.CGRhlYu+-sspT++C.sCG+sl+lKppRIht+scstcEAuphVp+lQpphhGtstFpps..t....... 0 2 13 14 +8915 PF09083 DUF1923 Domain of unknown function (DUF1923) Sammut SJ anon pdb_1gjw Domain Members of this family are found in maltosyltransferases, and adopt a secondary structure consisting of eight antiparallel beta-strands, which form an open-sided 'jelly roll' Greek key beta-barrel. Their exact function is, as yet, unknown [1]. 19.20 19.20 20.20 98.00 18.30 19.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.15 0.72 -4.49 2 7 2009-01-15 18:05:59 2006-08-08 15:55:14 5 1 6 2 1 5 0 64.10 86 10.04 CHANGED GKFENLTTKDLVMYSYEKNGQKIVIAANVGKEPKEITGGRVWNGKWSDEEKVVLKPLEFALVVQ GKFENLTTcDLVMYSYE+NGQKIllAANVGKEPKEITGGRVWNGK.WSDEEKVVLKPL-FsLVVQ 0 1 1 1 +8916 PF09084 NMT1 NMT1/THI5 like Mistry J, Wood V anon Pfam-B_2797 (release 20.0) Family This family contains the NMT1 and THI5 proteins. These proteins are proposed to be required for the biosynthesis of the pyrimidine moiety of thiamine [1][2][3]. They are regulated by thiamine [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.17 0.70 -4.75 60 7206 2012-10-03 15:33:52 2006-08-08 16:24:59 6 87 2898 13 2338 13311 2793 196.00 17 56.48 CHANGED NssHsslalAppKGaFc-pGL-V.cIhps.sssssssphVusGpsDh.ulsht.plh..hs+u.pGhPlhslusllppshsulhshccss.Ip..s.pDLcGK+lG..hss.ssh..pcshlpshlppsGh..s.sclphlss.shsh...ulhsGplDsshhshtsh-hlp..hc..........hcGh...chphhhhp-hGss.sahshlllsscshlpp.ps-hl+pFlcAsp+GhpashscPc-A ................................................................................................................................................................t.......t.....t.......u.......l.....p.....l...p.h..h..........ss....s....s......t.........h.....p.....h..l.....t..s.G..p..hD.....h.....u..........h.........s.......t......t.....................s....h..h.........hs.........t.........s.......p............G.........h.........s..........l..........h.........h........l................u.........s...............h........h.......t.......p.........s...........s.........t........s...........l........l.........s......h............p............s..........s.......s...Ip..............oh.....t...........D..........L.........+.........G.........K.........+....l..u............h..st....uss.............s.p...h......h....h...t.......t.....h........L....p.......p...t...G.l.....................s.....s......c.......l.....p.....h........l..........t.........h.............s............s............s............s.......t................h.................s......u.......l..t...s......G..p..l...D..A..h...h........h.........s............h......h..t.......................................................t.s.........h........h...h.................t.....p..............s.........................................................h......h.....h....s....p....t.p....h....hp.....p...p..............t.......h...t...t...h...h.t..sh....pu.h.t..h...t................................................................................................................................... 0 641 1405 1915 +8917 PF09085 Adhes-Ig_like Adhesion molecule, immunoglobulin-like Sammut SJ anon pdb_1gsm Domain Members of this family are found in a set of mucosal cellular adhesion proteins and adopt an immunoglobulin-like beta-sandwich structure, with seven strands arranged in two beta-sheets in a Greek-key topology. They are essential for recruitment of lymphocytes to specific tissues [1]. 25.00 25.00 26.30 65.10 18.50 17.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.35 0.72 -4.21 5 43 2012-10-03 02:52:13 2006-08-08 16:37:59 5 4 21 2 9 34 0 108.90 71 32.38 CHANGED AFPDQLTVSPEALVPGpDpEVACTAHNVTPAcP-uLShSLLLG-QELEGuQALsPEVpE...EPQEuE.DPLFQVTpRWLLPuLGTPuPPALHCQVTMpLPGLpLSHR+uIPVL .AFPDQLTVSPsALVPG.D.EVACTAHcVTPsDPNuLSFSLLLGsQELEGAQAL.sPEVcE...E..PQp-E.Ds.LFRVTcRWRLPPLGTPsPPALaCQATMRLPGLELSHRQAIPh...... 0 1 1 2 +8918 PF09086 DUF1924 Domain of unknown function (DUF1924) Sammut SJ anon pdb_1gu2 Domain This domain is found in a set of bacterial proteins, including Cytochrome c-type protein. It is functionally uncharacterised. 21.00 21.00 21.00 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.42 0.72 -3.94 17 99 2012-10-03 10:02:11 2006-08-08 16:50:24 6 2 90 17 41 102 18 95.10 45 66.68 CHANGED hpssuh.sshsusRGptLappct......cphuCsSCHssssppsGpHspTGK.IpPhAPusNPcRaoDsAKVEKWFpRNCppVlGRtCTspEKGDhlsaL ........h.tstuhsshsAtRGpthatscp.....s.cphSCuoCHssss.....TpsGpHspTGKsIcPhAPusNPcRaTDsAKlEKWFpRNCssVluR-CTstEKuDhLsaL............................ 0 9 27 32 +8919 PF09087 Cyc-maltodext_N Cyclomaltodextrinase, N-terminal Sammut SJ anon pdb_1h3g Domain Members of this family assume a beta-sandwich structure composed of the eight antiparallel beta-strands. A ten residue linker is also present at the C-terminal end, which connects the N terminal domain to a distal domain in the protein. This domain participates in oligomerisation of the protein, wherein the N-terminal domain of one subunit contacts the active centre of the other subunit, and is also required for binding of cyclodextrin to substrate [1]. 25.00 25.00 29.60 28.80 21.70 20.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.01 0.72 -4.06 28 194 2009-01-15 18:05:59 2006-08-09 09:13:25 6 4 178 12 55 197 92 87.20 39 14.09 CHANGED +lEPs.WWsGMpNscLQLMlaGcsIus....hplslsh.sGVplpslp+.s-NPNYLFlsL-ls.pAcsGshslshpps............cpphphsYpLKpRcp ....+l-Ps.WWsGM+NP.pLQLhlY..G..csIus....spVolsh.sGVplpsls+.h-sPNYLhlhLsls..pApPGphslshp.ps............cpphshsYpLKtRp............ 0 22 42 52 +8920 PF09088 MIF4G_like MIF4G like Sammut SJ anon pdb_1h6k Domain Members of this family are involved in mediating U snRNA export from the nucleus. They adopt a highly helical structure, wherein the polypeptide chain forms a right-handed solenoid. At the tertiary level, the domain is composed of a superhelical arrangement of successive antiparallel pairs of helices [1]. 19.80 19.80 20.10 20.10 19.60 19.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.23 0.71 -4.86 9 328 2012-10-11 20:01:01 2006-08-09 09:53:49 6 9 258 11 235 329 0 159.30 35 20.77 CHANGED PsssolsuhlhR-hlhDhIshhcaNRp-sA+hLlsLchahs.tTFs..sss.spL...........hsPspSoWplEDlhVEslhuplFpLPsP.hp.lYYpSlLhEhC+huPsslAPslGRulRhlYpplso..hDhEhhcRFlDWFSaHLSNFsFpWpWpEWlsDl.pLssh..HP+hsFl+plIcKElRLSahpRI+poLP- .......................................................................................................................................................................s.tsph.h-.hhVEslFuplFpL.........PsP.......a....h.......laYtolLlElC.......Kh.......t..P....u.....u...ls..s....lupAhchLYpp.l-s...............hsh..ph.hpRalsWFSaHLSNFpFp......W...p.Wp-Wsssl......p.h.st..................pP+htFlppllcKplRL.Sat.p.RIpphlP................... 0 75 123 197 +8921 PF09089 gp12-short_mid Phage short tail fibre protein gp12, middle domain Sammut SJ anon pdb_1h6w Domain Members of this family adopt a right-handed triple-stranded beta-helix fold, and are found in the middle of the phage short tail fibre protein gp12 [1]. 25.00 25.00 25.50 26.00 21.90 18.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.92 0.72 -4.03 2 20 2009-01-15 18:05:59 2006-08-09 11:06:17 5 2 18 1 0 24 0 76.70 59 14.83 CHANGED TGtTLNGRGuTsSMRGVVKLTTpAGststGDuSuALAWNADVIppRGGQhI.GoLpl.DphT.ANGhhshsGhh+.sst.l ....TGtTLNGRGuTTSMRGVVKLTTTAG.tstGDuSoALAWNADVIsTRGGQTINGoLNl.ssLT.ANGhhshsGhhp.sst.l............ 0 0 0 0 +8922 PF09090 MIF4G_like_2 MIF4G like Sammut SJ anon pdb_1h6k Domain Members of this family are involved in mediating U snRNA export from the nucleus. They adopt a highly helical structure, wherein the polypeptide chain forms a right-handed solenoid. At the tertiary level, the domain is composed of a superhelical arrangement of successive antiparallel pairs of helices [1]. 21.00 21.00 22.80 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.58 0.70 -5.15 17 319 2012-10-11 20:01:01 2006-08-09 11:16:47 6 8 264 11 230 316 4 252.90 22 31.81 CHANGED Fpa......pp-shPhpptuppllshh+p+.ts..sc-lhpllppltp.sss.t.....s..hhtlslhlpsllplGS+ShSHshshlp+hpppL+tl........................hpspppcthllculhcaWpspsQhhhlllDphlphpllsstullsWhh.spph....sphhscshsaEhlppslpchttpht....t................................................c-.sst..............................h.ppLtpthsctppllthhhpphl.ll............sp....psspphtphstpW...ahthhGhlcshltca ........................................................apa........tppshPh..tshupplhshl+pK.ss....scEl.sllp...pl..p.spstt.................s...hplclhlpslLplGSKShSHshu.slp+..a+phlpp..l......................................................spsp..ps.php...llpslhchWp.spsQhhhlllDKh.lphpIlsstuVlpWl...F...ssph...............spths.......chalaEhlpsolpKhsp+lhpltpch...p...........................................................................................p-.s.t...........................................phpthpcpLppt.s.-tpslhhhhh..p.phl.lh...................spt...hpspt.th.pph.tp.W........ah.hhth.pthhh.................................................................................. 0 79 126 191 +8924 PF09092 Lyase_N Lyase, N terminal Sammut SJ anon pdb_1hn0 Domain Members of this family are predominantly found in chondroitin ABC lyase I, and adopt a jelly-roll fold topology consisting of a two-layered bent beta-sheet sandwich with one short alpha-helix. The convex beta sheet is composed of five antiparallel strands, whilst the concave beta-sheet contains five antiparallel beta-strands with a loop between two consecutive strands folding back onto the concave surface. This domain is required for binding of the protein to long glycosaminoglycan chains [1]. 26.00 26.00 26.30 30.20 25.60 25.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.22 0.71 -4.64 8 133 2012-10-03 19:46:52 2006-08-09 13:18:47 6 3 88 3 22 127 1 177.80 36 17.68 CHANGED uhhpSphh-Fs.GsQlP-hhpsuuGSpLSLSus+YhhGpQSLcWcWpsGSohslc+PlsL.pccsASKsaGhpu...lohWIYNEpPVDshhph-LGpch.hsSGsPcAuFcl+lNFoGWRushVShppD..h-Gcch-G.....................Kush-SlRhhAPhtAPpGplaIDRVhhuhc.DARhQhoD.QV...+sR ....................................h.hsh.Ep..p.lPsthpsustSpLolSsp+YKpGppSLcWsa.p.sGus.Ls.lccs.lph...pc.........s...ss.uKshuhpu..............hphWIY.NEpP.c.ct.l.p.F-Fhcs.......Gcss...suFphplNFoGWRusaVsacpD..MpG..ptt-G.......................pMsplRlhAP.......ss.pGp..lalD+lhhuhc.DsRhQhuD.Ql.h...................... 0 8 19 19 +8925 PF09093 Lyase_catalyt Lyase, catalytic Sammut SJ anon pdb_1hn0 Domain Members of this family are predominantly found in chondroitin ABC lyase I, and adopt a helical structure, with fifteen alpha-helices which are at least two turns long and several short helical turns. The bulk of the domain is formed by ten alpha-helices forming five hairpin-like pairs and arranged into an incomplete toroid, the (alpha/alpha)5 fold. Additionally, two long and two short alpha-helices at the N terminus of the domain wrap around the toroid. At the C-terminal end of the toroid there is one additional short alpha-helix. This domain is required for degradation of polysaccharides containing 1,4-beta-D-hexosaminyl and 1,3-beta-D-glucoronosyl or 1,3-alpha-L-iduronosyl linkages to disaccharides containing 4-deoxy-beta-D-gluc-4-enuronosyl groups [1]. 22.80 22.80 23.60 23.00 22.70 22.70 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.11 0.70 -5.56 4 140 2009-01-15 18:05:59 2006-08-09 13:19:05 6 4 89 3 25 131 2 351.20 36 35.10 CHANGED P-lpl...loP.psRphcslttl-p..hhp-hls.hhts.t.ps.shpppIsph+pca-thsIshpusG.thoGsPlhss+pp.hapsEhh.s.sKshhssh.lh.ttYss..................+pp.KppaLshaDahhDQGaAhGSuhsThHHaGYpsRthYhusaLM+DsLpEps+hsphhsTLRWas.shppoh.scPstsst.hDpapThhht+lh.lLhhsDs.c+lphlpohSRWlstuL.pssPGhtGulKsDGsuFHHcGsY.PuYuhsuhcsAuphIYLhpcTsFuloEpuppslKcshluhp.YsN.hchuhulSGRHPhss..uppls.uYAhhAluucss.....hD+phAusYLRLhcpssops.tt..a .................................................................phth....s.lss.ptt-hthh-p..hhpshl......ts..t...t.h..pphpphctpasthpIphp..sG.ploGpsIh...ts.+pp....haps...hhsshscphho...ph.shhssYtsLh.pluhhapps.s.....+pphtchalhhhcalhDQGashGSuhsshHH.aGYss.RthYhuhaLM+c.hLc...cps....hh..pts.csLhW..Ys.shppphp...p.....s....s..s..pupslDhaNThhpt+l.uslhhhs..Ds.....s..c+lphL+uFucWlshul.pssPGhhGGhKsDGosFHHpssY.PuYuhsuhss.AuphlYhLsGTs..Ftlo-pA+pslKpAhLs.....h.....c......h.YsN..hp.h.sluloGRHPh.s........psp.l.h.tsFthh.ALuGsPsst.pphDptLAAsYL+Lspssps.t................................... 1 10 22 22 +8926 PF09094 DUF1925 Domain of unknown function (DUF1925) Sammut SJ anon pdb_1k1x Domain Members of this family, which are found in a set of prokaryotic transferases, adopt an immunoglobulin/albumin-binding domain-like fold, with a bundle of three alpha-helices. Their function is, as yet, unknown [1]. 21.10 21.10 21.40 26.60 20.90 20.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.92 0.72 -3.99 15 86 2009-01-15 18:05:59 2006-08-09 14:08:33 6 2 86 5 57 89 15 78.20 46 11.65 CHANGED GhW+NFhsKYsEoNhMHKRMLtVSc+lpsh.......tsp......ApctLa+AQCNDAYWHGlFGGlYLPHLRcAlYcsLI+A-shL ......GhW+NFhsKYPESNhMHKRMLhlSpplpph........tptp.......spctLa+uQsNDsYWH.G.lFGGlYLPHLRcAlacplIcAEp..h... 0 27 41 47 +8927 PF09095 DUF1926 Domain of unknown function (DUF1926) Sammut SJ anon pdb_1k1x Domain Members of this family, which are found in a set of prokaryotic transferases, adopt a beta-sandwich fold, in which two layers of anti-parallel beta-sheets are arranged in a nearly parallel fashion. The exact function of this family is, as yet, unknown, however it has been proposed that they may play a role in transglycosylation reactions [1]. 19.60 19.60 21.50 21.10 17.40 19.50 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.56 0.70 -5.18 21 118 2009-12-15 10:48:00 2006-08-09 14:08:46 6 5 105 5 74 118 24 224.80 21 38.21 CHANGED DhDhDGpcElhlpscsh.shlcsstGGslhElss+..sttaNasssLsRphEtYHctl..........ppptpslsohH-hsphhtc.hcccltaDhahRshhhD+hhpsstsL-shhpsp...p.uDh...sa.ht..tt.....lphhtct.h...hth.hplpKshplps...sslplpYplp.........pshshhaulElN..........LA...................lpuhtcshs..tscplplpD.ahhsclplc..hspssplathPlpTlSQSEtGa-hlhQulshhhhasl....ptphphplph ......................DhDhDGhcEhhhpspph.hhlpst.GGplhEh-hh..ttthNhhsshsRp.EhYaphh.........................t..t.t.....h..hcp....h.......tthh.D.h.+h.hhDphh..thshpph.tsp.......t.....a..h......ttt........hhh.t...h..........hplpKphphpp...ss..lplpYplp.......t..s...hphhausEhN.........lu..........................tth..p..t.........tspthth.-..hh.hph.lp...h.st.hphhhhPh.olo.po-.tGh-hh.Quhthhhhh.h......t.........h......................... 0 35 58 64 +8928 PF09096 Phage-tail_2 Baseplate structural protein, domain 2 Sammut SJ anon pdb_1k28 Domain Members of this family adopt a beta barrel structure with a Greek key topology, which is topologically similar to the FMN-binding split barrel. They are structural component of the viral baseplate, predominantly found in the structural protein gp27 [1]. 25.00 25.00 59.50 58.10 23.30 19.40 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.14 0.71 -4.62 6 29 2012-10-01 22:58:23 2006-08-09 14:44:45 5 3 27 3 0 28 0 166.00 58 42.44 CHANGED hlspEPIshlVsEPRLlGQaIp.l-p.lsFDFEWLTKANsaTRsPacNsThYAHSFlDKphs+IlTG-GpNulslSRSGAYuDhTYRNGaEEusRLhThuQYDuYApspTaGNFsLTPGhKIpFaDpKNQF+sEFaVDEVIHElSpppSlTpLYMFsNSptlp.....cVKNE ..hlsQEPhshlVGEPpLIGQalQplchPlAaDFpWLTKuNt+sRsPhcNsTlYAHSFlDpphs+IosGcGpNSIlVSRSGuYS-hTYRNGaEEA.RLhTMAQYDGYAcCpohGNFsLTPGhKI.FhDsKNQF+s-FYVDEVIHElSNNsSlTpLYMFTNuppLc.....cVKNE.......... 0 0 0 0 +8929 PF09097 Phage-tail_1 Baseplate structural protein, domain 1 Sammut SJ anon pdb_1k28 Domain Members of this family adopt a beta barrel structure with a Greek key topology, which is topologically similar to the FMN-binding split barrel. They are structural component of the viral baseplate, predominantly found in the structural protein gp27 [1]. 18.90 18.90 19.10 120.10 17.30 16.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.12 0.71 -4.73 5 31 2012-10-01 22:58:23 2006-08-09 14:44:55 5 3 30 3 0 31 0 194.80 57 49.04 CHANGED QRsGYPNVSIKLYQsYDAWLENRFIELAATFlTLTMRDGLh.GlNEGLLQFYDuKNLHTKLsG-EIIQlSLKTANT.EpTaNRIYGIKHhuVoVDpKGDNIITFQLGSlHplcNLKFSRMFTNsAVsSVsEMIGsIYpDpPLLsPsIsuINshVPpsPWVsoINcYhcFVRcaGQoVEoE+FVaVWEDh-GIsIuDa ....QRsGaPNlSIKLY-sYDAWh-NRFlELAAThTTLTMRDuLY.GpNEGlLQFYDuKNlHTKMsGcpIIQISlpNANo.pplpoRIYGsKHauVSVDsKGDNIIsIpLusIHplcNLKFuRsFFssAsEolpEMlsVIYpD+sLLsPsINuINsYVPslPWsuohcsYhsaVRElGhuVtS-cFVFVWEDh.GIshhDY... 1 0 0 0 +8930 PF09098 Dehyd-heme_bind Quinohemoprotein amine dehydrogenase A, alpha subunit, haem binding Sammut SJ, Eberhardt R anon pdb_1pby Domain Members of this family are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase. They have a predominantly alpha-helical structure and can be divided into two subdomains, each binding a haem C group via a conserved CXXCH motif [1,2]. 25.20 25.20 25.30 25.40 25.10 25.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.12 0.71 -4.81 11 50 2012-10-03 10:02:11 2006-08-09 17:04:58 5 5 42 4 19 60 5 135.40 38 33.92 CHANGED tsGppllpppChuCHsspsss..thuRISpQRKTPEGW.MTlsRMphhHGlplos--RpslVKYLADpQGLAPuETcuhRYllERcPNshEpscstphophCuRCHStARluLQRRTspEWc+LlpFHLGQaPolEYQAhuRDR-WaslAhs-llPhLAcpYPh-osuW ...t.pGppllpspChuCHssptps....shsRIuppRKTPEG.W.MolsRMphhHGl.plos--+pslV+YLADppGLuPsEstshpYhh-+p.ss.-.ph.......s..hsphCuRCHShARhhLQRRstpEWppLlpaHluQaPohEhQA.uRDRpWh.lA.tphhs.Lucpashtp........................... 0 3 13 16 +8931 PF09099 Qn_am_d_aIII DUF1927; Quinohemoprotein amine dehydrogenase, alpha subunit domain III Sammut SJ, Eberhardt R anon pdb_1pby Domain Members of this family, which are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase, adopt an immunoglobulin-like beta-sandwich fold, with seven strands arranged into two beta sheets; the fold is possibly related to the immunoglobulin and/or fibronectin type III superfamilies. The precise function of this domain has not, as yet, been defined [1,2]. 21.20 21.20 21.20 21.20 21.00 20.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.36 0.72 -3.66 14 45 2012-10-03 16:25:20 2006-08-09 17:07:52 5 8 39 4 18 55 11 81.00 32 14.49 CHANGED pspllAVpPshl+sGsc.sclslsGsuL.....suclsLusGlcVscVlppossplslcV+ssucApsG.RsVulGutp.ussLsVYs ...........tspllAVpPshl+AGsc.oplolsGouL.....sucssL...u.s..G...lcVscllp..posspltVcl+suA-ApsG.+plulGshp.sssLsVYp.......... 0 7 14 15 +8932 PF09100 Qn_am_d_aIV DUF1928; Quinohemoprotein amine dehydrogenase, alpha subunit domain IV Sammut SJ, Eberhardt R anon pdb_1pby Domain Members of this family, which are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase, adopt an immunoglobulin-like beta-sandwich fold, with seven strands arranged into two beta sheets; the fold is possibly related to the immunoglobulin and/or fibronectin type III superfamilies. The precise function of this domain has not, as yet, been defined [1,2]. 25.00 25.00 131.60 130.70 19.20 15.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.84 0.71 -4.47 12 36 2009-01-15 18:05:59 2006-08-09 17:08:39 5 4 31 4 13 43 3 130.40 52 25.03 CHANGED VKVsPsaulARlGGsGG..shPKh.upF-AhuahsGsDGKsGTsDDlclGslPAoWul-sFDEpAtcDpDs+YAGphpss.GlFsPusAGPNPtR+huTNNsGNLKVlATVc-u....uctlou-uphlVTV...QRassPPl .VKVsPsauIARIGGsGu..s.hPKVtupFEA.AassGsDGcPtTtDDlRlGhlPAoWolEsFsEpAtcDcDl+aAGphpus.GlFsPusAGPNPcR+htTNNAGNLKVlATlsDG....up.loGEuHhIVTV...QRWNsPPl.. 0 2 9 10 +8933 PF09101 Exotox-A_bind Exotoxin A binding Sammut SJ anon pdb_1ikp Domain Members of this family are found in Pseudomonas aeruginosa exotoxin A, and are responsible for binding of the toxin to the alpha-2-macroglobulin receptor, with subsequent internalisation into endosomes. The domain adopts a thirteen-strand antiparallel beta jelly roll topology, which belongs to the Concanavalin A-like lectins/glucanases fold superfamily [1]. 20.50 20.50 20.60 20.80 19.60 19.00 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.79 0.70 -5.27 2 97 2009-01-15 18:05:59 2006-08-10 09:38:45 5 7 59 4 3 94 0 187.30 43 28.60 CHANGED E-thslasECtpsCsLs.csGh..pS+hSl.ssslhD...pGVLaYSMslpstpsslK.t.Dpu.SIho.G..hTlRhpt.........GV......p.NtshpYSYsRp.cGpaulNWLVPIGc-pPusIKl.lcELsttppl.chs.lYoI-hssphLt..KhttssoF.Vpt.EpN.....lAISasuVShhhAQtpspRcKRWucWtoGhsLChLsPhDulYNYlsQQpCsLsDsW.Gt.YcslAGsP....sKpsl-.KP..lppRlHF .......................................CsLs.c.Gh..pSphSl.s.slhD...pGVLaYSMslpstpsslh.t.spu.SIho.G..hTlRhpt.........GV......p.NtshpYSasRp.cupaslNWLVPIGc-pPusIKl.lcELsttppl.chs.lYoI-hssphLt..c.ttssoF.Vpt.EpN.....lAISaPuVShKhApppGpRHKRWu+WtoGL...AhCWhlPl.uIYNYIsQtpCshuDsWhGt.YcslAGsP....sKpsl................................ 0 1 2 2 +8934 PF09102 Exotox-A_target Exotoxin A, targeting Sammut SJ anon pdb_1ikp Domain Members of this family are found in Pseudomonas aeruginosa exotoxin A, and are responsible for transmembrane targeting of the toxin, as well as transmembrane translocation of the catalytic domain into the cytoplasmic compartment. A furin cleavage site is present within the domain: cleavage generates a 37 kDa carboxy-terminal fragment, which includes the enzymatic domain, which is then is translocated into the cytoplasm. The domain adopts a helical structure, with six alpha-helices forming a bundle [1]. 21.60 21.60 22.60 208.90 21.30 16.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.89 0.71 -4.68 2 59 2009-01-15 18:05:59 2006-08-10 09:57:46 5 2 22 4 1 67 0 140.50 74 23.57 CHANGED cGsuhuALsAHpsCtlPLEThsRpRpPRshpp..pCuY.sQplVuLalAsRl.asplDpVhp.sLsp.tst.....uDLtch.cppPt.sp.sLTlAtt..pcaVpptsG....pspAGAtuADllSLhCPsAstpC.AussD ..tNAMpALAAHRVCGVPLETLARSRKPRDLsDDLSCAYQAQNIVSLFVATRILFSHLDSVFTLNLDEQEPEVAERLSsLRpINENNPGMVTQVLTVARQIYNDYVTHHPGLTPEQTSAGAQAADILSLFCPDADKSCVASNND 1 1 1 1 +8935 PF09103 BRCA-2_OB1 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 Sammut SJ anon pdb_1iyj Domain Members of this family assume an OB fold, which consists of a highly curved five-stranded beta-sheet that closes on itself to form a beta-barrel. OB1 has a shallow groove formed by one face of the curved sheet and is demarcated by two loops, one between beta 1 and beta 2 and another between beta 4 and beta 5, which allows for weak single strand DNA binding. The domain also binds the 70-amino acid DSS1 (deleted in split-hand/split foot syndrome) protein, which was originally identified as one of three genes that map to a 1.5-Mb locus deleted in an inherited developmental malformation syndrome [1]. 23.30 23.30 23.40 25.40 23.10 23.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.50 0.71 -4.40 19 188 2009-01-15 18:05:59 2006-08-10 11:08:30 5 33 140 3 119 199 1 125.00 36 6.93 CHANGED Sul++IhEpDsssuphhVLhVSpl............................p.t.s.ttshlELoDGWYsl+utlD.sLpphlccG+lpl..GpKLh..........lpGAc...Lhu.scsssPL.E....sssslhLplshNuTRhAcWps..+LGhh+..hs ........................SAl++IhEpDssuupsh.VLCVSsIh.................................................................tpppsts.spp.sshlELTDGW.......Yul+Ap.LDs.sL....tphl...cp...G.....+..Lpl..GpKlh.....................lpGAc........Lhu.ss.csss.PL.E..................sspslhLplssNSTR.ApWts..+LGahtp.............................. 0 65 79 100 +8936 PF09104 BRCA-2_OB3 BRCA2, oligonucleotide/oligosaccharide-binding, domain 3 Sammut SJ anon pdb_1iyj Domain Members of this family assume an OB fold, which consists of a highly curved five-stranded beta-sheet that closes on itself to form a beta-barrel. OB3 has a pronounced groove formed by one face of the curved sheet and is demarcated by two loops, one between beta 1 and beta 2 and another between beta 4 and beta 5, which allows for strong ssDNA binding [1]. 21.10 21.10 21.20 23.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.84 0.71 -4.38 5 83 2009-09-11 15:20:08 2006-08-10 11:09:07 5 17 53 3 44 85 0 137.90 45 5.33 CHANGED REsLcFo+LLDPuFQPPCSEVDlVGlVVSV..V+spGLAPlVYLSDEChNLLsVKFWsDLN....EDIlKP+VLIAASNLQWR.PEu+SsIPTLFAG-aSsFSASPKEsHFQE+FscM+pTlE..NIDoFYcEAEpKLlcLLsuNsPK ...............RcslpFscLh-PsFpPsCuEVDllGhVlSV.......s+p....t..GhuPhVYL.S....DEsaNL.LulKFWtDLs....EDIlKPpsLIAASNLQWR...s-op...Ssl..PoLaAGDhSsFSA.sPKEuHhQEsFschKsslc..Nls..hFhs-AEpKL.hclLptp.................... 1 10 14 27 +8937 PF09105 SelB-wing_1 Elongation factor SelB, winged helix Sammut SJ anon pdb_1lva Domain Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding [1]. 25.00 25.00 25.40 148.40 23.10 23.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -8.84 0.72 -4.31 2 2 2009-01-15 18:05:59 2006-08-10 11:45:38 5 1 2 3 1 5 0 61.00 100 9.62 CHANGED GSPEKILAQIIQEHREGLDWQEAATRASLSLEETRKLLQSMAAAGQVTLLRVENDLYAIST GSPEKILAQIIQEHREGLDWQEAATRASLSLEETRKLLQSMAAAGQVTLLRVENDLYAIST 0 1 1 1 +8938 PF09106 SelB-wing_2 Elongation factor SelB, winged helix Sammut SJ anon pdb_1lva Domain Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding [1]. 21.50 21.50 21.60 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.64 0.72 -4.04 67 966 2009-01-15 18:05:59 2006-08-10 11:46:27 6 14 953 6 177 683 32 57.80 44 9.28 CHANGED llstLspaHpcpP.c.Gls+-cL+...Rhst.ths....splaptllppLhppGplttptshl+L ......lL-sLAsYHEpHtDcsGsuRERLR......RhAlPht-.................-sLl...hhLI-chp-sGtIhsc+GWLHL............................. 0 66 113 151 +8939 PF09107 SelB-wing_3 Elongation factor SelB, winged helix Sammut SJ anon pdb_1lva Domain Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding [1]. 22.20 22.20 22.60 22.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.40 0.72 -4.57 95 1262 2009-01-15 18:05:59 2006-08-10 11:47:24 6 13 1235 10 249 928 241 50.30 43 8.23 CHANGED ltphhp..p..ssplssuphR-hl.GhoRKhulslLEahDctthT+RtG..-.pRhlp .............tpl.p..c...sGuhssA-FRDtL....s.....luRKhAItlLEYFDRh.GaTRRcG...s.c+lLR....... 0 93 169 217 +8940 PF09108 Xol-1_N Switch protein XOL-1, N-terminal Sammut SJ anon pdb_1mg7 Domain Members of this family, which are required for the formation of the active site of the sex-determining protein Xol-1, adopt a secondary structure consisting of five alpha helices and six antiparallel beta sheets, in a beta-alpha-beta-beta-beta-alpha-beta-alpha-alpha-alpha-beta arrangement. The fold of this family is similar to that found in ribosomal protein S5 domain 2-like [1]. 25.00 25.00 25.00 32.40 18.90 24.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.02 0.71 -4.56 2 12 2012-10-03 01:04:38 2006-08-10 13:53:43 5 3 5 2 11 15 0 147.40 27 40.39 CHANGED lc+Sps.Pl....Ep-sssNh.ssKlsu.APHsVchMsShhhAlN+.Chsps+s..spcP+SupEHhI.-hscphHsphphullRshlcpLcLppVYcIphhs.hD.sG+lu.hshLlAlW+s.............LKSh.psh.tpFt..........DshhS .........p..ph.p..t......................tpphsu...APHVVpluss...sahAVN+hClVpupllpp+.PpssppH..hI.phstc.psp.phslltphlccLpL+.psYcIpIhsthDassphu...hhshLsAIWKShsh...........p....h........h...tp.................h................. 0 1 2 11 +8941 PF09109 Xol-1_GHMP-like Switch protein XOL-1, GHMP-like Sammut SJ anon pdb_1mg7 Domain Members of this family, which are required for the formation of the active site of the sex-determining protein Xol-1, adopt a secondary structure consisting of five alpha helices and seven antiparallel beta sheets, in a beta-alpha-beta-alpha-alpha-alpha-beta-beta-alpha-beta-beta-beta arrangement. The fold of this family is structurally similar to that found in the C-terminal domain of GHMP Kinase [1]. 25.00 25.00 28.00 27.20 20.60 20.00 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.15 0.71 -4.21 2 10 2009-01-15 18:05:59 2006-08-10 13:54:42 5 2 5 2 9 12 0 170.80 24 43.23 CHANGED VthlAE.................aDhVFVpTsLHs.caTPphF.sptpsKh...tFpp.c-pspaPD..hst.MshaScpRVtppshss..l.h.op.uLctl.ppcppl.GFElQQGGhLVsLKKssFhsDc..hIphhuhhlts-pppSlppl.FcLLc.G.tup.hp.s..+hh-.pp+sslplchKsVp ..........................pssphhshhu..................tF.cpaDllFV+TNLH......sScFcPp..h...s+..pps+s..cthpp....c-ssph.scs..lsthMhthScsRhspEs.h..s...hpphEpDCcsAlpphp.ppc.pp...l...cGFEVQQGGILllLKKssFhssc....LLcsIuhuItcpsp..t.ploplSFsLLpPutsu..p...........................h................. 0 1 2 9 +8942 PF09110 HAND HAND Sammut SJ anon pdb_1ofc Domain The HAND domain adopts a secondary structure consisting of four alpha helices, three of which (H2, H3, H4) form an L-like configuration. Helix H2 runs antiparallel to helices H3 and H4, packing closely against helix H4, whilst helix H1 reposes in the concave surface formed by these three helices and runs perpendicular to them. The domain confers DNA and nucleosome binding properties to the protein [1]. 21.00 21.00 21.20 21.20 20.60 20.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.36 0.71 -3.68 29 385 2009-01-15 18:05:59 2006-08-10 14:51:15 6 11 236 5 260 400 3 101.60 46 10.01 CHANGED SlDsYYKDlLpsGspssp.......spsPRsPKphslpDaQFaPspLhcL.E+Ephaa+KplsYKsshp-sss............................cs.p-+cpcpchEQccI-NApPLTEEEpp.KpcLhpEG .....................ulDsYa+-AL+...supsps...........PKsPRsPKQsslpDFQFFP.PRLhELhEKE..hhaa+KplGYKVsh.s.s.......................................phspttttpc.EQpcIDpApPLT-EE.pEK-cLlspG.............................................................................................................. 0 70 132 206 +8943 PF09111 SLIDE SLIDE Sammut SJ anon pdb_1ofc Domain The SLIDE domain adopts a secondary structure comprising a main core of three alpha-helices. It has a role in DNA binding, contacting DNA target sites similar to c-Myb (Pfam:PF00249) repeats or homeodomains [1]. 20.80 20.80 21.00 24.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.29 0.71 -4.37 36 454 2009-01-15 18:05:59 2006-08-10 14:52:14 5 18 282 5 306 466 4 115.10 47 11.41 CHANGED a-KhlppIEpGEcKhp+hppppchLcpKlpphcsPhp-Lplp....Ys..ssp++sYo--EDRFlLshlt+hGh..t.sha-cl+ppI+psPhF+FDWFhpSRTspEluRRssTLlphlp+Ehp .........-KhhtpIEcGEt+lp+ptphc+uLcpKlspY+.sPhppL+Ip..............Yu.....ssp......tKsYoEEEDRFLlsMLaK.hGh.-p-....slY-clRpsIRpuP.FRFDWFlKSRTshELpRRCsTLlshIE+E............ 0 101 169 249 +8944 PF09112 N-glycanase_N PngaseF_N; Peptide-N-glycosidase F, N terminal Sammut SJ anon pdb_1pgs Domain Members of this family adopt an eight-stranded antiparallel beta jelly roll configuration, with the beta strands arranged into two sheets. They are similar in topology to many viral capsid proteins, as well as lectins and several glucanases. The domain allows the protein to bind sugars and catalyses the complete removal of N-linked oligosaccharide chains from glycoproteins [1]. 17.80 17.80 17.90 19.10 17.60 17.50 hmmbuild --amino -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.36 0.71 -4.90 16 80 2009-01-15 18:05:59 2006-08-10 15:15:43 5 4 67 8 27 83 150 166.50 33 34.00 CHANGED slsVF-ctplpFssssh.......tts...th.ttphhLpplpLPs..htcsh.slpLcls.tssG...Ds...WD+oGol..FVlPcssshph.s.htp...........................slELhRFhTPFtluphs....................psVsacpDlosLhPhLpG.cshlshahssWsttG.....ahsSlclch ............................pl.VF-pp.lpFssshh.......ttss.hphssGpllLKKlpLPs..hpcs.h.slplclsltSsG......Dt....WD+oGSs..FVlPcs.sshshhs.htps.tthP.hps.php..h.GllsstpY.sslELMRFhTPFGVupas..........h...lhh.pW.psV.appDlTcLhPlLc.....u.-salGlaIssWs...tcG.....ahsShclch...................................... 5 10 22 26 +8945 PF09113 N-glycanase_C PngaseF_C; Peptide-N-glycosidase F, C terminal Sammut SJ anon pdb_1pgs Domain Members of this family adopt an eight-stranded antiparallel beta jelly roll configuration, with the beta strands arranged into two sheets. They are similar in topology to many viral capsid proteins, as well as lectins and several glucanases. The domain allows the protein to bind sugars and catalyses the complete removal of N-linked oligosaccharide chains from glycoproteins [1]. 25.00 25.00 42.90 37.50 17.10 16.80 hmmbuild --amino -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.19 0.71 -4.49 23 91 2009-01-15 18:05:59 2006-08-10 15:17:00 5 6 75 8 36 95 190 148.10 34 28.22 CHANGED psYssh....................t.shsh...sFslPsss+pscLhsllTGHG.......spssssEFs.psHplhVsGpps....hphhsht.sCAshth.sss....pGsWh...........................huRusWCPGpslpPhpl-lsshhssspthssslshtshhsGsp............uhhhhSuaLl ........................pYsph.hspc.slsh...sFslspss+sscLchlsTGHG.......GasGGDEFst+ppplhlDGppl....hsahPWRp-CuoaRpaNPu..........oGsWh.......................uSSDhSRSNWCPGsslsPphlslssltsGpHohslsIPtut.h-Gsp............saWhlSuhLl............................... 0 18 30 35 +8946 PF09114 MotA_activ Transcription factor MotA, activation domain Sammut SJ anon pdb_1bja Domain Members of this family of viral protein domains are implicated in transcriptional activation. They are almost completely alpha-helical, with five alpha-helices and a short, two-stranded, beta-ribbon. Four alpha helices (alpha1, alpha3, alpha4 and alpha5) are amphipathic and pack their hydrophobic surfaces around the central helix alpha2 [1]. 26.10 26.10 27.10 26.50 25.70 26.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.21 0.72 -4.23 3 22 2012-10-04 14:01:12 2006-08-10 15:54:45 5 2 21 3 0 19 0 93.10 56 44.63 CHANGED SKVTYIIKASNNA.LNEKTAsIhIpIAKKsFITAAEVREuVcsEhNsuVVNSNIGVLIKKGLVEKSGDGLIsTGEApDIIscAA-LaAQENAPELLK ..........SKlTYIIKASpss.LNEKTAuILIpIAKKsFITuuEVRE.lp.-husAVVNSNIGVLIKKGLlEKSGDG.LlhTuEupDIlppAAsLaApENAPEhL........... 0 0 0 0 +8947 PF09115 DNApol3-delta_C DNA polymerase III, delta subunit, C terminal Sammut SJ anon pdb_1a5t Domain Members of this family, which are predominantly found in prokaryotic DNA polymerase III, assume an alpha helical structure, with a core of five alpha helices, and an additional small helix. They are essential for the formation of the polymerase clamp loader [1]. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.43 0.71 -4.07 41 817 2009-01-15 18:05:59 2006-08-11 10:11:52 5 2 811 9 97 453 22 117.10 43 36.28 CHANGED p.t.hppRpphhpshhpshpspsh..lpLlshls..ccps.tpLcWLtsLLhDAlKhphGlsp.hhhNtD.hshlpplupphssptLhpthpplhph+ppLhplsulNpELLLhchLlphpp.h...l ....................................t.s-pWptRcsLCQuL.h..ulp.osDh..huLL..sALN...HE..pAssRLaWLuoLLhDALKt+aGAup..lsNsDhsslVspLAspLSsu+Lpsllscls+hR-QLhsVoG..lNRELLlTDhLL+lEch.............. 0 20 42 70 +8948 PF09116 gp45-slide_C gp45 sliding clamp, C terminal Sammut SJ anon pdb_1b77 Domain Members of this family are essential for the interaction of the gp45 sliding clamp with the corresponding polymerase. They adopt a DNA clamp fold, consisting of two alpha helices and two beta sheets - the fold is duplicated and has internal pseudo two-fold symmetry [1]. 25.00 25.00 25.80 33.30 22.00 24.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.34 0.71 -10.38 0.71 -4.09 9 69 2009-01-15 18:05:59 2006-08-11 11:03:52 5 2 67 21 1 60 670 110.60 33 49.59 CHANGED sVhF-lcA-DhpQLh+sSpslplDslulssc-G..+IVlsuapph.Dus.spspaSlslu.-a-Gss.sFsFllphsNMKhhs...ucYKVhlhu.......chAupFputpss..YllAhEscSoasF .....sVpFc..Lcu-cLppLh+supshplsslslsscsG....+lVlss.....cp.....spspspYSlp.l.G.-h-sss..FsFslphpNhKhls...GcYcVhlss.......ptsupFpup..s..hs..YhlALEsss............ 0 1 1 1 +8949 PF09117 MiAMP1 MiAMP1 Sammut SJ anon pdb_1c01 Domain MiAMP1 is a highly basic protein from the nut kernel of Macadamia integrifolia which inhibits the growth of several microbial plant pathogens in vitro while having no effect on mammalian or plant cells. It consists of eight beta-strands which are arranged in two Greek key motifs. These Greek key motifs then associate to form a Greek key beta-barrel [1]. 21.80 21.80 22.80 22.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.82 0.72 -3.56 2 31 2012-10-01 23:14:22 2006-08-11 11:26:43 5 1 12 1 19 30 0 78.70 46 75.83 CHANGED ShFTsWuGPGCNN+AtRYSKCGCSsItpp..GGY-F.YpGQTAAhYNpssCpGVApTRFuSSs.pACssFGWKShFIQC ..............SYFssWuGPGCNN.....chtRYS...sCGCoNluss.H.GGYcFsYQ.GQTAusYNsssCpGVspTRFS....s....os.QsCu..s.FGWpShFIQC............................................................... 0 0 16 19 +8950 PF09118 DUF1929 Domain of unknown function (DUF1929) Sammut SJ anon pdb_1k3i Domain Members of this family adopt a secondary structure consisting of a bundle of seven, mostly antiparallel, beta-strands surrounding a hydrophobic core. The 7 strands are arranged in 2 sheets, in a Greek-key topology. Their precise function, has not, as yet, been defined, though they are mostly found in sugar-utilising enzymes, such as galactose oxidase [1]. 25.00 25.00 27.80 27.60 23.30 23.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.47 0.72 -3.90 68 596 2009-01-15 18:05:59 2006-08-11 11:52:05 6 64 224 13 381 626 15 99.90 29 14.34 CHANGED RPslss.s.....s........ssl....saGsshslss..........ss......shspssLl+sussTHuhshsQRhl.Lsh........tsst....ssp.hssshP..sss..s.lssPGaYMlFlls.sG....lPShuphVpl ........................................RPslss..s..s..........ssl..shG.s.s.hsls.hp...................ss..........shhpssLlpsuhsTHohsh.sQRhl.Lsh............................ssss......sts..hslshP..sss...slsPPGaYMLFlls.sG......lPS.h.u.phVpl........................ 0 152 263 339 +8951 PF09119 SicP-binding SicP binding Sammut SJ anon pdb_1jyo Domain Members of this family bind the chaperone SicP, which is required both to maintain the stability of SptP, as well as to ensure the eventual secretion of the protein. The domain is found in the Salmonella effector protein SptP, which interacts with SicP chaperone dimers mainly through four regions of its chaperone-binding domain. The structure of the SptP-SicP complex contains four molecules of SicP, aligned in a linear fashion and arranged in two sets of tightly bound homodimers that bind two SptP molecules. The SicP homodimers do not interact with each other, but are held together by a molecular interface formed between two SptP molecules. Each SptP molecule is wrapped around by three SicP chaperones (two chaperones from one homodimer and a third one from the opposite homodimer pair) [1]. 25.00 25.00 25.50 38.20 24.20 19.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.48 0.72 -4.28 3 136 2009-09-10 17:21:18 2006-08-11 13:28:16 5 3 135 2 4 70 0 81.50 68 14.40 CHANGED A+EGFKEKLLoaLSHlPLLKNT-AVQ+YsEslRl-N++lLpVFLpALocRYGc-AApDAlDhuclssssPLTQRpl...VQITE ...APEKFSSKVLTWLGKMPLFKNTEVVQKHTENI+sQDQKILQTFLpALTEKYGETAVNsALLMSRIN.MN.KPLTQRLA...VQITE.. 0 1 1 2 +8953 PF09121 Tower Tower Sammut SJ anon pdb_1mje Domain Members of this family adopt a secondary structure consisting of a pair of long, antiparallel alpha-helices (the stem) that support a three-helix bundle (3HB) at their end. The 3HB contains a helix-turn-helix motif and is similar to the DNA binding domains of the bacterial site-specific recombinases, and of eukaryotic Myb and homeodomain transcription factors. The Tower domain has an important role in the tumour suppressor function of BRCA2, and is essential for appropriate binding of BRCA2 to DNA [1]. 25.00 25.00 25.10 48.10 24.80 23.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -8.14 0.72 -4.02 9 69 2009-01-15 18:05:59 2006-08-11 14:43:34 5 13 38 3 30 70 0 41.90 65 1.52 CHANGED MEKpssGhhlFRNpRtEE+EAt+aupsQQKKLEsLFoKIQsE MEKTsoGhYIFRNERtEEKEAsKaAEsQQKKLEALFsKIQsE. 0 4 6 13 +8954 PF09122 DUF1930 Domain of unknown function (DUF1930) Sammut SJ anon pdb_1okg Domain Members of this family are found in 3-mercaptopyruvate sulfurtransferase, and have no known function. They adopt a structure consisting of a four-stranded antiparallel beta-sheet and an alpha-helix, arranged in a beta(2)-alpha-beta(2) fashion, and bearing a remarkable structural similarity to the FK506-binding protein class of peptidylprolyl cis/trans-isomerase [1]. 20.10 20.10 21.00 90.50 19.40 19.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.25 0.72 -4.20 3 12 2012-10-02 13:30:10 2006-08-11 15:37:18 5 1 12 1 3 13 0 68.20 59 18.13 CHANGED MLhpMhSPSLGDNPKAsL.DssTLlVDGslVspPDAELpSAlsHLHlGE+ApVaFKSpRVVVIEV.PtlP MhhpM.oPsLGDNPKAsL.DshTLhVDGssscpPDAElpSAhsHLHhGEtApVaFKStRVVsIEV.Phls.... 0 1 2 3 +8955 PF09123 DUF1931 Domain of unknown function (DUF1931) Sammut SJ anon pdb_1r4v Domain Members of this family, which are found in a set of hypothetical bacterial proteins, contain a core of six alpha-helices, where one central helix is surrounded by the other five. The exact function of this family has not, as yet, been determined [1]. 28.20 28.20 28.20 80.60 25.60 28.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.46 0.71 -4.22 14 60 2009-01-15 18:05:59 2006-08-11 15:53:48 6 1 55 10 36 63 1 137.20 42 90.89 CHANGED h-+lFRpsAuLDlc..Ks-hcRhsDhVpcKlYDLLlsuptsAptNsRDlIc.hDLPITKGLpEoIcpF+pl..-c-lELcsIL-hLA...........shPPLDhslupEscscLP-IsGGLslslARshKhlcP-lKs..PpsEchEcspplhD .F-+lFRpAAGLDVD..KsDlKRhsDhVccKlYDLlllAptsAKuNsRDlIc.hDLPITKGLpESl+pF+pl..DcclELcPlL-tLs...........shPPLDlslu--scscLP.IsGuLsVAlARshKtlcP-lKN..PpspHaEcApclhc.... 0 15 25 33 +8956 PF09124 Endonuc-dimeris T4 recombination endonuclease VII, dimerisation Sammut SJ anon pdb_1e7l Domain Members of this family, which are predominantly found in Bacteriophage T4 recombination endonuclease VII, adopt a helical secondary structure, with three alpha helices oriented parallel to each other. They mediate dimerisation of the protein, as well as binding to the DNA major groove [1]. 21.60 21.60 21.60 21.60 21.50 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.64 0.72 -4.34 8 37 2012-10-03 03:04:30 2006-08-14 09:14:20 5 2 35 10 1 30 0 53.20 48 34.94 CHANGED IHPpalsDpsKpFSRLs+sEMhAEMpucGF-Ys-sDsKspLscpF+KQhhKul+ .....IHPpalsDKsKcFSRLsKpEMhAEM.ppGF-YN-uDTKspLltsFKKQl+KulK....... 0 1 1 1 +8957 PF09125 COX2-transmemb Cytochrome C oxidase subunit II, transmembrane Sammut SJ anon pdb_1ehk Domain Members of this family adopt a tertiary structure consisting of two antiparallel transmembrane helices, in a transmembrane helix hairpin fold [1]. 25.00 25.00 25.10 25.10 22.40 21.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.04 0.72 -4.41 2 13 2009-09-11 15:01:49 2006-08-14 09:34:59 5 1 13 26 6 15 2 37.60 62 22.94 CHANGED DEHKAHKAILAYEKGWLAFSLAMLFVFIALIAYTLATH .....DEHKAHKAILAYEKGWLsFuLAMllVFIALIAYTLATH... 0 1 4 6 +8958 PF09126 NaeI Restriction endonuclease NaeI Sammut SJ anon pdb_1ev7 Domain Members of this family adopt a secondary structure consisting of nine alpha-helices, six 3-10 helices and 13 beta-strands. They bind two GCC-CGG recognition sequences to cleave DNA into blunt-ended products [1]. 25.00 25.00 101.80 101.60 18.90 17.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.58 0.70 -5.77 6 31 2012-10-11 20:44:45 2006-08-14 09:51:55 5 1 28 4 10 34 1 279.50 35 60.68 CHANGED Dt-LLhshts.hoscssGch.....huuhLRcCIDsVl.s.+TGRhsa--L-KTEKTaIGTcVEIELRAhhphscG.chDh.......Ihs.sVDlKhoM.GuNWMlPsEulDs.lCLLVtADEt+ARhhlGLlhsRPsaLTpup..N+DuKpslospG.huslLWLhpDH.PhPtNhhhplsscshspIFAspo......GssRhAcLFRclQtcPIsRsVlcAVApQcDFMRRlRust..GsRslLccEGILlLuGp.hDspLhpALsLPsssuupalSsRlhhtc.tch.............st.suD.llpLs .......................hhht..hphcssGph.....hutslRcolDpllDsp+TGRasa..cpLpKTEKTalGThlEIpLpcpFt..h..sc.G......phDh.......ItGh-VDsKaohp...ttsWMIP.Euh............sp....hsLlltAD-ppupassGLl+scs-hL......st.ut........NRDuKpslo.....spu.+pt.lhWLacct..Ph.tNhLLpLst.pshppIh.u.spS.......GppRlspLFRplppp.lsRslVtsV.A.pQcDaM+RlR.su..GuRshLp.EGIllLusp...pctplApsLsLPhsstuEalSsRls.tp.tct.t.ts.hts..WhhA..s-......s......................... 0 0 5 9 +8959 PF09127 Leuk-A4-hydro_C Leukotriene A4 hydrolase, C-terminal Sammut SJ anon pdb_1hs6 Domain Members of this family adopt a structure consisting of two layers of parallel alpha-helices, five in the inner layer and four in the outer, arranged in an antiparallel manner, with perpendicular loops containing short helical segments on top. They are required for the formation of a deep cleft harbouring the catalytic Zn2+ site in Leukotriene A4 hydrolase [1]. 21.20 21.20 21.20 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.70 0.71 -4.59 37 628 2012-10-11 20:01:01 2006-08-14 10:36:11 6 10 372 47 367 640 57 142.00 25 23.77 CHANGED oLscsshsLup.cWhst..t.......stphsstDlpsasspQhlhFLspL............hptps...ls.pp..lptLscsY..plssSpNuElth+ahplsl+....uphp..shhsphtcaL..sphGRMKFlRPlY+tLsp.....hs+phAlcsFpct+stYHPlspthVpKDL ...........................................................hhp.s.tLup.tWhpt..t.................ttshsstsh.psasspQhlhFLspL.........................h..tps.....Ls.pp..lppL..sc.sY..plssopN.u.El.phRW..hplslc....sc....ap....ssh..tstcaL.......pp..GRhKastPLYcsLhp....ttscshAhcsFtps+sthHPlstthlpc.L...................................... 0 113 181 274 +8960 PF09128 RGS-like Regulator of G protein signalling-like domain Sammut SJ anon pdb_1htj Domain Members of this family adopt a structure consisting of twelve helices that fold into a compact domain that contains the overall structural scaffold observed in other RGS proteins and three additional helical elements that pack closely to it. Helices 1-9 comprise the RGS (Pfam:PF00615) fold, in which helices 4-7 form a classic antiparallel bundle adjacent to the other helices. Like other RGS structures, helices 7 and 8 span the length of the folded domain and form essentially one continuous helix with a kink in the middle. Helices 10-12 form an apparently stable C-terminal extension of the structural domain, and although other RGS proteins lack this structure, these elements are intimately associated with the rest of the structural framework by hydrophobic interactions. Members of the family bind to active G-alpha proteins, promoting GTP hydrolysis by the alpha subunit of heterotrimeric G proteins, thereby inactivating the G protein and rapidly switching off G protein-coupled receptor signalling pathways [1]. 21.10 21.10 21.30 21.10 20.80 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.03 0.71 -4.80 9 251 2012-10-03 22:10:09 2006-08-14 11:04:46 6 13 79 9 111 223 0 172.50 40 14.50 CHANGED psusFQsl-.L....KSRPAHLAVFL+aVloQh.DPusLLsYLhu.DhYpp..ssuKEsR+hhh-hashFL-+sA.sL+lsls-plsh-l..........................ptp+scLhsp-htRthlppspppshs-lpcpLpD.............FRpKRoMGLs.hpuEhspL-t.httDhs....+ERpsA.Eplls+l...t-lL.stpshE..E-+osshpaslhTYM++lGV+ ........................................su.FQslEhL....KpRPAHLuVFLpaVhSQh..DPuPLL..........hYLhu.-hYpp......ss..sK-....sR+hhh-haphFL-+sA..sL+V.p.l.P-..plsh-l..........................-h.h.cs-.Lhsp-hhRphl.pphpppshs-lpcpLcD.............F..Rp..KRohGLssh.uEhstL-..hstDph......+ERpsA...Ep.l.l.sp..l..........t-hl...p....p..s.E.......E.-+Ssshthsl.hYMpHhGl+.................................................................. 0 25 34 64 +8961 PF09129 Chol_subst-bind Cholesterol oxidase, substrate-binding Sammut SJ anon pdb_1i19 Domain The substrate-binding domain found in Cholesterol oxidase is composed of an eight-stranded mixed beta-pleated sheet and six alpha-helices. This domain is positioned over the isoalloxazine ring system of the FAD cofactor bound by FAD_binding_4 (PF:PF01565) and forms the roof of the active site cavity, allowing for catalysis of oxidation and isomerisation of cholesterol to cholest-4-en-3-one [1]. 25.00 25.00 27.90 27.90 19.30 16.30 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.89 0.70 -5.30 3 72 2012-10-02 00:48:38 2006-08-14 11:36:47 6 3 62 4 14 78 1 295.20 57 53.89 CHANGED RhRCQSYVDIPASELFAPAGScGRTFESFVp+SGRAEAIWFPFTDKPWLKVWTVoPT+PsG...................ARsVsGPYNYPFSDNIPcslSDLluAIN.sGcPpLAPLFGKsQY-IThLGLAsThG..sDLWGWSKDVLaYIKPTTLRVTANGYAVLTRRcNVQRVINEFsttY+pRIAsYRAsG+YPlNGPVEIRVoGLDQPADVtVPGAsPPSLSAIRPRPDHPDWDsAIWLDILolPGTPsANcFYREhEQWMhSHYSGDYATlRPEWSKGWGYuPsAAWoDsslVssplssoaRpGLsss-NWDoAlRpLNchDPHRVFSSPLLDRLMP ........................RhRCpSassIPAsELFA.AsGo..s.G....RTh-SFlp+SGRsEAIWFPFT-pPWLKVWTssPs+P.s...................uRsVstPYNYPFSDsIPctloDLlupIs.sG.pstLsPLhGphQYslshsGLshThs.........hDLWGWS+slLhYl+PTTLRVTANGYAVLTRRtslQRVIsEFsthYpphlstY+AtGcYPhNGPlEIRVoGLDpPu-sh.sGAhsPoLSAlRPRPD..+PE..WDsAIWhDlLolPGTPtAstFYREhEpWhhsp..aoGsYAolRsEWSKGWGYss.sAAWsDsshlsphlsp.hRpG.lsusssWDsAhtpLschDP+RlFSSPLLDRLhs................ 0 4 11 13 +8962 PF09130 DUF1932 Domain of unknown function (DUF1932) Sammut SJ anon pdb_1i36 Domain This domain is found in a set of hypothetical prokaryotic proteins. Its exact function has not, as yet, been described. 25.00 25.00 25.00 25.10 24.60 24.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.30 0.72 -4.24 45 239 2009-01-15 18:05:59 2006-08-14 11:43:05 6 8 206 4 130 266 129 73.40 27 24.62 CHANGED AcphGVp-pllsoLspohPuhsh..ppsshhlscshtHuhRRstEMcElucTlpssGl.........sst...hspusAshhpphuc ..............AcphGVt-tllssLspoh.Puhs...phsshhlspss.HuhRRspEMc.Elucslp-sGl.........................ssh....hscusAphhptls......... 0 32 77 108 +8963 PF09131 Endotoxin_mid Bacillus thuringiensis delta-Endotoxin, middle domain Sammut SJ anon pdb_1i5p Domain Members of this family adopt a structure consisting of three four-stranded beta-sheets, each with a Greek key fold, with internal pseudo threefold symmetry. Thus they act as a receptor binding beta-prism, binding to insect-specific receptors of gut epithelial cells [1]. 21.20 21.20 21.70 21.50 20.00 21.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.39 0.71 -4.86 3 68 2009-01-15 18:05:59 2006-08-14 13:04:28 5 5 9 1 0 63 0 192.60 70 32.83 CHANGED LLVSSGANLYASGSGPQQTQSFTAQNWPFLYSLFQVNSNYVLSGlSGARLoITFPNIGGLPGSTTTQoLpuARVNYSGGVSSGpIGusNLNQNFNCSTlhPPLSTPFVRSWLDSGTDREGVATSTNWQTESFETTLuLRCGAFSARGNSNYFPDYFIRNISGVsLVIRNEDLsRPLYYNEIRNIESPSGTPGGARAYLVSVHNRKN ....................LLVSSGANLYASGSGPQQTQSFTuQsWPFLYSLFQVNSNYVLsGhSGARLohTFPNIsGLPGSTTTpuLhuARVNYSGG.........l.SSGpIG....u....osh.....NQ............N.................FN...C.STlhPPL.TPFVRSWLD...SGoDREGVATsTNWQTESFETTLuLRsGAFoAR.GNS.NYFPDYFIRNISGVsLVlR.NEDLpRPLHYNEIRNItSPSGTPGGARAYhVSVHNRKN....... 0 0 0 0 +8964 PF09132 BmKX BmKX Sammut SJ anon pdb_1rji Domain Members of this family assume a structure adopted by most short-chain scorpion toxins, consisting of a cysteine-stabilised alpha/beta scaffold consisting of a short 3-10-helix and a two-stranded antiparallel beta-sheet. They are predominantly found in short-chain scorpion toxins, and their biological method of action has not, as yet, been defined [1]. 20.70 20.70 24.70 29.20 20.00 19.70 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -7.50 0.72 -4.08 2 14 2009-01-15 18:05:59 2006-08-14 14:09:14 5 4 4 2 5 15 0 29.90 55 20.45 CHANGED PhsspCKscpDssMCshGhSsKsGhCpuCT PVsGECKscsDAsMCTsGsssKs.GTCTuCT... 1 5 5 5 +8965 PF09133 SANTA SANTA (SANT Associated) Zhang D, Mistry J anon Zhang Domain The SANTA domain (SANT Associated domain) is approximately 90 amino acids in length and is conserved in Eukaryota. It is sometimes found in association with the SANT domain (Pfam:PF00249, also known as Myb-like DNA-binding domain) implying a putative function in regulating chromatin remodelling [1]. Sequence analysis has showed that the SANTA domain is likely to form four central beta-sheets with three flanking alpha- helixes [1]. Many conserved hydrophobic residues are present which implying a possible role in protein-protein interactions [1]. 20.90 20.90 21.10 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.21 0.72 -4.03 7 120 2009-01-15 18:05:59 2006-08-14 14:20:17 5 5 73 0 79 112 0 91.90 32 12.64 CHANGED lpLpDWhlKphsps.....ltlcGh.cspps...hhasSshIscRhpsshLcs.sGhhhsLhGhl..............sppph.csGhs.clhccFhhGFP..Wcchh.st. .........................................lpLp-Whlc.hhss.s................tlsVpGh..tsp...ss...........hha+...S.....ssIlcRhppspLcThsGplYhLpGhl..............sp.phccs..G..aPstl.......hc+Fhh......G....FPcpWcchlpp..h......... 0 21 35 58 +8966 PF09134 Invasin_D3 Invasin, domain 3 Sammut SJ anon pdb_1cwv Domain Members of this family adopt a structure consisting of an immunoglobulin-like beta-sandwich, with seven strands in two beta-sheets, arranged in a Greek-key topology. It forms part of the extracellular region of the protein, which can be expressed as a soluble protein (Inv497) that binds integrins and promotes subsequent uptake by cells when attached to bacteria [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.30 0.72 -4.14 2 269 2012-10-03 16:25:20 2006-08-14 14:36:03 5 18 254 1 6 214 8 102.50 72 8.12 CHANGED PDsspSSFsVSssDIlADGoMpShLoFVPhsKNscFlSGhpsLpFhQsGVPVoISPlTEpsDsYTAoVVGNosGDVsITPQVss..LshLQK+IoLaPl .....M.DVANS..TLSANEPSGDVVADGQQAYTLTLT..AVDSEGNPVTGEAS.....R...L.RF...V..PQ....D.T.......N....G....VTV...G..A...I...S...E.IK..P...G..V...Y.SA...T.VSSTRAGNVVVRAFSEQYQLGTLQQTL.....KFVA............ 0 0 2 2 +8967 PF09135 Alb1 Alb1 Mistry J, Wood V anon manual Family Alb1 is a nuclear shuttling factor involved in ribosome biogenesis [1]. 20.40 20.40 23.00 23.00 18.00 18.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.79 0.72 -3.45 48 156 2009-01-15 18:05:59 2006-08-14 14:58:35 6 1 117 0 115 151 0 110.10 28 63.64 CHANGED h+SRAARRtss.shs.h-+.......p.lpsls....sccsshp+stlhsspp.......suslsccp..t...+spc.....ls.+tpp..........hpppsh-+AthlhcpLspKhsKSlsR.uKhlpp..R+ssW-phNpphppp .............................hpSRAARptspsshs.h-+.......p.h.p.sls....sccsshp+stlhsst+.............suulsccp.......+spp.......ls..ppppp...............tpccuhc+AphshcpLspKlsKSlsR.sKhlpp..R+tsW-phNpph...t............ 0 16 54 96 +8968 PF09136 Glucodextran_B Glucodextranase, domain B Sammut SJ anon pdb_1ug9 Domain Members of this family adopt a structure consisting of seven/eight-strand antiparallel beta-sheets, in a Greek-key topology, similar to the immunoglobulin beta-sandwich fold. They act as cell wall anchors, where they interact with the S-layer present in the cell wall of Gram-positive bacteria by hydrophobic interactions. In glucodextranase, Domain B is buried in the S-layer, and a flexible linker located between domain B and the catalytic unit confers motion to the catalytic unit, which is capable of efficient hydrolysis of the substrates located close to the cell surface [1]. 29.80 29.80 30.50 31.50 29.70 29.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.24 0.72 -3.82 2 5 2009-01-15 18:05:59 2006-08-14 16:35:04 5 3 4 2 0 7 0 74.60 39 8.58 CHANGED PtLolsuPttLoTADSAsssVpGTTsAAKVYVSVNGshhEAPlT...DG..TFSlDlALsusKNpVTVAAVuuDGGTAVEsRTVLaYGS ...PuLTVpu.sshSsssStTspVSGTTNAuKVhssVNGptTEhPVo...pG..oausDLsLstscN+VTlsAsGucGuoAopcRTlhtYG....... 0 0 0 0 +8969 PF09137 Glucodextran_N Glucodextranase, domain N Sammut SJ anon pdb_1ug9 Domain Members of this family, which are uniquely found in bacterial and archaeal glucoamylases and glucodextranases, adopt a structure consisting of 17 antiparallel beta-strands. These beta-strands are divided into two beta-sheets, and one of the beta-sheets is wrapped by an extended polypeptide, which appears to stabilise the domain. Members of this family are mainly concerned with catalytic activity, hydrolysing alpha-1,6-glucosidic linkages of dextran to release beta-D-glucose from the non-reducing end via an inverting reaction mechanism [1]. 19.50 19.50 19.70 20.50 18.00 19.20 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.66 0.70 -5.16 14 171 2009-01-15 18:05:59 2006-08-14 16:38:33 6 8 150 6 82 181 17 265.40 31 32.85 CHANGED AsGuPGtsssWu.usKpGVGTu..............ssssSKVWFTlucGlloEVYYPpIDpAph+-LpFlVsc.GpsFhs-EpcDTtScl-hhs......stohuY+lsssDppGRYpIpKcIFTDPcRsullh+VpFpALcG...tDaplYlLhcPHlsNsGusssuals.cuput.sLhAptssshhALsuShsats......sSsGaVGsSDGhoDLt...tssphshpassAsp...GNlu.sucIsL.stspT..pFslsLGFGpotpEAspsAtuoLpsuasphhppY.s.....sWccYhsSL ...................................................A.GuPGtss..pWs..usKsGlGTu..................hsssS+VWFTl.ucG.lloElaYPplDpspl+-.lpFlV..ss...GpsFh.........spE+css....p....pplchhs.............sts.uac.ls...sp.s....tp...G+...aplpKclhoDPp+ssllh+lpFpu.hps.......shphYhlhsP+lsNsG.......t.......s..............spuhs.....p.....h...........tut.t...s..........Lh.A.pp............s..................sthhuLtu......sh.....s.....att.....................sSsGal.G.s.......SDGhpDLt......pstph...sh.p..aspAss............GNVuhsuplsh.......tsps......phslsLGFGpotp..p..A..hssAtuoLsp.u..apshhppYts.....sWcsahtsL............................................. 0 29 53 71 +8970 PF09138 Urm1 Urm1 (Ubiquitin related modifier) Mistry J, Wood V anon Pfam-B_16507 (release 20.0) Family Urm1 is a ubiquitin related protein that modifies proteins in the yeast ubiquitin-like pathway urmylation [1]. Structural comparisons and phylogenetic analysis of the ubiquitin superfamily has indicated that Urm1 has the most conserved structural and sequence features of the common ancestor of the entire superfamily [2]. 21.00 21.00 21.00 21.30 20.90 20.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.19 0.72 -3.87 29 304 2012-10-03 10:59:06 2006-08-14 17:07:09 6 2 274 6 211 350 18 94.70 42 89.71 CHANGED clplEFhGGLElLhs.sp+paplsls.......sppthshpsLltalcpNlIp-.Rs.-lF..........................l.p.ssolRPGILVLINDsDWELhGph-YhLc-sDslsFlSTLHGG .........................................lplEFsGGhEhLFs..s.+pHplsls.....................t.tp.hslppLlpalpcNllp-.....Rs..-LF..................................................................lp...ssoVRP.GILVLINDsDWE.L...G..c.-Y.Lpss.DslhFlSTLHGG........................ 1 72 114 175 +8971 PF09139 Mmp37 Mitochondrial matrix Mmp37 Mistry J, Wood V anon Pfam-B_15301 (release 20.0) Family MMp37 is a mitochondrial matrix protein that functions in the translocation of proteins across the mitochondrial inner membrane [1]. It has been shown that MMP37 proteins possess the NTase fold but they have only one active site carboxylate and thus probably are not able to carry out enzymatic reaction. These potentially non-active members of NTase fold superfamily may bind ATP, hydrolysis of which is necessary for the translocation of proteins through the membrane [2]. 21.10 21.10 21.80 21.80 19.20 19.00 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.34 0.70 -5.41 24 355 2012-10-02 22:47:23 2006-08-14 17:14:10 6 8 272 0 247 347 4 275.80 34 80.49 CHANGED tL+pllspF.ss.lcaAFAYGSGVapQsG................t........tt.pttMlDhIFuVscspcaHohNLcQN.cHYSs..L+hhGschloplQsphGAGVYFNsalslN.....GphIKYGVVShcsLhcDLtpWcoLYlAGRLpKPV..clLp.c...csplchssphNL+SAlpsALLLL.......PppFoEhpLYppIuGLSYhGDhRM.lsG.EsPpKVpNIVpsphsp..F+cLYtPllps....................h.phsh.s..t..............phtpDhss.sphshlttLPp.shppplhhphpp.h..........................ptppst.h.hplutDs..ph.tpslppslpphlphsShsQolKGlhTAGlt+SlpYuhtKhpK .............................h.hptlh.pF....lph..sFuYGSGVh.Qts...............................................ttpttMlDhlhsV....sps...pWHshNlppp.pHYSh......l.t..hh..G....st.hlsp.....lQp...h.G..AG.VYaNs.hl.hs..........uphl..KYGVlshcsLhpDLhpWpsLYlAGRLpKPV...cl..l.p...s...............ssplp....hs...phNLhuAlps....AlLhL....................................PtpF..o....E.pLatpIsuLSYh.GDhR..M...lhu.Es.pKVpNIVp..sp..htp..FcpLYt.sllpp.........................h.ph.h.........................thhh.t..D.hs.s.tphp.l.tLPt.phpppl.h.ht.........................................................................t.h.......p..l..u..tp....ph..tp.lppslpphlhhsShhQohKGlhoAGhh+ohtY..tKh.K...................................................................... 0 80 139 203 +8972 PF09140 MipZ ATPase MipZ Mistry J, Thanbichler M anon Pfam-B_23525 (release 20.0) Family MipZ is an ATPase that forms a complex with the chromosome partitioning protein ParB near the chromosomal origin of replication [1]. It is responsible for the temporal and spatial regulation of FtsZ ring formation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.76 0.70 -5.31 12 431 2012-10-05 12:31:09 2006-08-14 17:22:01 6 7 400 5 138 13620 5467 180.40 27 57.60 CHANGED HlIVlGNEKGGoGKSTsulHlAlALhttGh+VusIDLDhRQ+ohsRYhcNRsthtc+p.GlsLPhPpah........pl.cs-ssp..phpthhschptstDFIllDTPGscohLuRhAHohADTLVTPlNDSFVDFDllupVDPcThclptPShYuEhVW-uRptRAps....sttshDWlVlRNRLuthpuRN+p+lspuLp-LS+RlGFRlhsGhuERVIYRELFPpGLTLL.Dlpchsh..chshSHlsARQELRsLltuLsLPh .............................................................................llsV..u..s.tKGGsGKST.lo..h....p...........L.A..l.....A....L...s.......+.....h.....G.......h....+.....VG.l........l.D...............sD...............l..............h.................t.................................S..................l..................s..................p................h....................h..............t...................s...................+..................t...................h....................h.....................................t...................p....................p....................................h......................................l............................................h....................................t............h....................h..........................................l........................................................................................................................h.........................................................t.............................................h.......................................................................h............p......t.....h...h......t.......p.........h..........t.............p.......h....D......a..lllD..h.P...s......................................................................................................................................................................................................................................................................................................................................................th............................................................................................................................................................................ 0 46 90 114 +8973 PF09141 Talin_middle Talin, middle domain Sammut SJ anon pdb_1sj7 Domain Members of this family adopt a structure consisting of five alpha helices that fold into a bundle. They contain a Vinculin binding site (VBS) composed of a hydrophobic surface spanning five turns of helix four. Activation of the VBS causes subsequent recruitment of Vinculin, which enables maturation of small integrin/talin complexes into more stable adhesions. Formation of the complex between VBS and Vinculin requires prior unfolding of this middle domain: once released from the talin hydrophobic core, the VBS helix is then available to induce the 'bundle conversion' conformational change within the vinculin head domain thereby displacing the intramolecular interaction with the vinculin tail, allowing vinculin to bind actin [1]. 25.00 25.00 35.30 25.20 24.90 23.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.71 0.71 -4.56 6 194 2009-09-11 14:56:09 2006-08-15 09:39:34 5 31 87 6 108 161 0 157.90 59 6.87 CHANGED sQpALhGoIsuuhcAVppA-c-Lcshss..lPsLGsDhuShpW+cNplDsSKpsVsS+lAAhoAuTApVVphTAu-PsDsDasAVGsAVosIouNLsElSKsV+LlAALM-c-s.sGccLL-AARsLsuAFSDLLKuApPpS...cEPRQsLLsAAGpVGpuSucLL .................................AQQALhGTINoSMpAVQtAQssLs....-h-s...LP.PLGpDhAS+...sWhpNKhDESKHEIHSQVDAITAGTASVV...........NLTA..G.DP......s-TDYTAVGCAlTTISSNLTEMSKGVKLLAAL..M-D-s...GsGcsLLpAA+sLAGAVSDLL+u.spPsS.........................uEPRQslLsAAGslGQASG-LL...................... 0 33 42 70 +8974 PF09142 TruB_C tRNA Pseudouridine synthase II, C terminal Sammut SJ anon pdb_1sgv Domain The C terminal domain of tRNA Pseudouridine synthase II adopts a PUA (Pfam:PF01472) fold, with a four-stranded mixed beta-sheet flanked by one alpha-helix on each side. It allows for binding of the enzyme to RNA, as well as stabilisation of the RNA molecule [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.31 0.72 -4.32 34 629 2012-10-02 17:37:24 2006-08-15 10:01:30 6 7 616 2 142 459 81 55.70 33 18.30 CHANGED FsthclossEApslsaG+tLsss....uhsGshAAhsPDGcllALl...c-putcs+slsVht ......................................hshhplTscEAstlphGRhlphs....................shst.h.hA.Ahs.s.cG+......llAll........cc..cGsp....h+sltVh................ 0 43 99 129 +8975 PF09143 AvrPphF-ORF-2 AvrPphF-ORF-2 Sammut SJ anon pdb_1s21 Domain Members of this family of plant pathogenic proteins adopt an elongated structure somewhat reminiscent of a mushroom that can be divided into 'stalk' and 'head' subdomains. The stalk subdomain is composed of the N-terminal helix (alpha1) and beta strands beta3-beta4. An antiparallel beta sheet (beta5, beta7-beta8) forms the base of the head subdomain that interacts with the stalk. A pair of twisted antiparallel beta sheets (beta1 and beta6; beta2 and beta9/9') supported by alpha2 form the dome of the head. The head subdomain possesses weak structural similarity with the catalytic portion of a number of ADP-ribosyltransferase toxins [1]. 25.00 25.00 28.70 28.20 24.50 23.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.05 0.71 -4.55 4 50 2009-01-15 18:05:59 2006-08-15 10:31:42 5 1 47 1 4 43 1 156.10 48 71.89 CHANGED SpalGt.TLTSIHQLSsspREpFLssHDPMRshsLss-TslYRTTppRYl....+psKLAGNPpShAhlthHEELp.ss.hAS+..hGuhPcpAc.........AYhP+ph+AsDLssPSLNVMsGstAcsulRuYA+..sDHVsVcMRLGDFL-pGGKVYuDTSuhusGGDpspALIVTLPKGpKVPVcIl ................hstpshul................................F+I++DhVsVRIpsspFsD..hKNcKIpGHpNTVASVhDaYs.QcNu.Lu.s..hGosc+.oAD.........hh+.c++sAhNhhlhphNs.hap......s......us-Nht+SYuK.T-DsshVuholGsLLDKG.uKVYsDTSsulc...LuEPhIhTLPEus+VsV-I............................. 0 0 1 3 +8976 PF09144 YpM Yersinia pseudotuberculosis mitogen Sammut SJ anon pdb_1pm4 Domain Members of this family of Yersinia pseudotuberculosis mitogens adopt a sandwich structure consisting of nine strands in two beta sheets, in a jelly-roll topology. As with other superantigens, they are able to excessively activate T cells by binding to the T cell receptor [1]. 25.00 25.00 205.40 205.20 19.30 18.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -4.14 2 4 2009-01-15 18:05:59 2006-08-15 10:43:22 5 1 2 4 0 5 0 116.80 90 77.45 CHANGED IPNIATYTGTIQGKGEVCIIGNKEGKTRGGELYAVLaSTNVNADMTLILLRNVGGNGWGEIKRNDIDKPLKYEDYYTSGLSWIWKIKNNSSETSNYSLDATVHDDKEDSDVLTKCPV IPNIATYTGTIQGKGEVCIIGNKEGKTRGGELYAVL+STNVNADMTLILLRNVGGNGWGEIKRNDIDKPLKYEDYYTSG.LSWIWKIKNNSSETSNYSLDATVHDDKEDSDVLTKCPV 0 0 0 0 +8977 PF09145 Ubiq-assoc Ubiquitin-associated Sammut SJ anon pdb_1pgy Domain Ubiquitin associated domains contain approximately 40 residues and bind ubiquitin noncovalently. They adopt a secondary structure consisting of three alpha-helices, and have been identified in various modular proteins involved in protein trafficking, clathrin assembly/disassembly, DNA repair, proteasomal degradation, and cell cycle regulation [1]. 20.20 20.20 20.30 56.80 18.60 16.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.22 0.72 -4.10 4 19 2009-01-15 18:05:59 2006-08-15 11:59:10 5 4 19 1 13 20 0 45.50 60 6.90 CHANGED llDEV+DMElA+LMSLGLsI-cAscYY-+GlpYE...-.L+pR+pc lVDEVKDMEIARLMSLGLSI-cAs-aYEpslhYEphl-hlKp+pt..... 0 1 6 12 +8979 PF09147 DUF1933 Domain of unknown function (DUF1933) Sammut SJ anon pdb_1q15 Domain Members of this family are predominantly found in carbapenam synthetase, and are composed of two antiparallel six-stranded beta-sheets that form a sandwich, flanked on each side by two alpha-helices. Their exact function has not, as yet, been determined [1]. 20.50 20.50 20.50 20.60 20.20 19.50 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.24 0.71 -4.89 3 6 2012-10-03 21:14:07 2006-08-15 13:47:08 5 2 6 8 4 26 83 199.20 38 44.24 CHANGED N-FClV+tGhDKDINpLt+-FsGphEtLSNGsLFhcpsT+VQKa+hERGTAYLIGSLYN+ohLRuLAG+aEGcusVlNDAEILhLlpT+LGuuALuLAEGDFCFFIED+NGsLTVITESRGhNPVaLVQucctWITNSLKLVoAlEG-tAhDFccEupVscoul+sDsaoPl+NlQRLKPGTlNVLTFDspcYpalESRpL ...............................spFChl+tuhsKNlsplhpsF.shph-pLSsGpLahpspophpKhcs-+tTAYLI..GoIYN+shLculAG+aEGcu..VlsDsElLhhlhs+LGsuALoLAEGDFCF.FI.E-Kp.G.pLpllT-SpGhssVaLVp...sc.h.hWI.TN.oLKlVu..t.lEG-tAhDFpsEu..pVhpssL..+..sDsaoPl+NspRLKPGolNhLoaDppcY.alEsc.l....................................................... 0 0 2 3 +8980 PF09148 DUF1934 Domain of unknown function (DUF1934) Sammut SJ anon pdb_1r0u Domain Members of this family are found in a set of hypothetical bacterial proteins. Their precise function has not, as yet, been defined. 21.40 21.40 21.50 22.20 21.30 21.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.32 0.71 -4.43 66 1332 2009-09-10 22:25:01 2006-08-15 13:52:04 5 1 1317 1 164 708 2 126.10 26 91.48 CHANGED l......pIphpphh..........tpsc.p-slEhhspGpahpKs.sshYlhYcEp..p.ss...sspsslKl..pcscltlhR...tGssph+hh.FppsccshshYpTPhGphpltspTpplplshs-ps...GplplcYpLh....hspphhuphplpl ..................................hpIphpshl.......p..tssc..pEph-.hhhp...Gphhp.+s.stpYlpYpEp..p.tt....phplslKl....pcpplhlhR...tGs.sphph+.Fhc.sp.c.shs.hYs.TP.hG.h.hpltspTppl..p.h.phpct.........tplpl.....cYpLh....tssphhusYplcl...................... 0 64 110 138 +8981 PF09149 DUF1935 Domain of unknown function (DUF1935) Sammut SJ anon pdb_1r75 Domain Members of this family are found in various bacterial and eukaryotic hypothetical proteins, as well as in the cysteine protease calpain. Their exact function has not, as yet, been defined. 20.90 20.90 20.90 25.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.35 0.72 -4.05 71 300 2009-01-15 18:05:59 2006-08-15 14:02:30 5 8 15 2 76 289 0 104.30 31 23.51 CHANGED ppGp...Ps...hp....sclhpsFcp...........G.........LLFRlVs.p................cppp..WuFYNDTpcaphcVpspFutsSp.l....csLssT..plt.........ppssGp........hhsplsVhPhcTphFl.cGps.sGacs.phpAhs ............................................GtPs.....hp.......sclhpsFcp...........G..........LLaRlls.p......................cppp....WuFYNDTpsaphcVpspFutsSp.l........pslssT..plp...............ppssGp...............hhsplsVhPhcTphFl.cGps.sGapsphpA................. 0 38 56 76 +8982 PF09150 Carot_N Orange carotenoid protein, N-terminal Sammut SJ anon pdb_1m98 Domain Members of this family adopt an alpha-helical structure consisting of two four-helix bundles. They are predominantly found in prokaryotic orange carotenoid protein, and carotenoid binding proteins [1]. 25.00 25.00 64.60 64.60 23.40 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.90 0.71 -4.54 8 95 2009-01-15 18:05:59 2006-08-15 14:34:12 5 2 47 8 43 100 4 155.00 45 65.57 CHANGED ThDsA...Fs.ohtussV.sllspFNpLss-DpLALlWFsYpEMG+oITsAAPGAA..shpLAEuhLsplptho.cE.QhplMpDLss+sDTPloRsYGsaSsNsKLsFWYpLuEhMcQGsVsPlPsGYQLSssANplLEsIKsLDhGQQITlLRshVlsMGaDs ..............s............s..usslssshspFppLos-DQLALlWFuYhEMGcoITsAAPGAA..phphA-shLspI+pMo.pE.QhplMpDLAs+s-...Tsl.oRsYushSsNsKLuFWYpLuchMcpGhVsPlPsuYpLSssAsslLpsIcsL-.uQQITlLRssVs-MGhDs........... 0 5 27 42 +8983 PF09151 DUF1936 Domain of unknown function (DUF1936) Sammut SJ anon pdb_1pvm Domain This domain is found in a set of hypothetical Archaeal proteins. Its exact function has not, as yet, been defined. It possesses a zinc ribbon fold. 25.00 25.00 98.90 98.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.81 0.72 -4.45 2 2 2012-10-03 10:42:43 2006-08-15 15:04:02 5 1 2 4 2 5 1 36.00 89 20.45 CHANGED pHLCPKCGVGVL.PVYspKGEIKVFRCSNPACDYEE pHLCPKCGVGVL.PVYspKGEIKVFRCSNPACDYEE 0 1 1 1 +8984 PF09152 DUF1937 Domain of unknown function (DUF1937) Sammut SJ anon pdb_1t1j Domain This domain is found in a set of hypothetical bacterial proteins. Their exact function has not, as yet, been described. 23.00 23.00 23.00 23.40 22.90 22.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.41 0.71 -3.52 7 46 2012-10-02 19:28:18 2006-08-15 16:31:34 5 1 35 2 10 39 1 111.80 41 80.93 CHANGED RhlaLAsPYS........+.stshlptphhssschAApllcsGhssaS.VohS....HPI.hshs.h.cst.stlWushst.ahchhptllVl-lsGWccSsGI++Elchatstsp.V.laup ..R.laLAsPYS........+sss-hsEtphtssschsAp....llc.sGhsshS.VshS....Hsh.hshh..h.csh.stLWtshst.hhcth.ptllVh-lsGWccSsGIR+ElchhpAtsh.V.lau........ 0 7 7 10 +8985 PF09153 DUF1938 Domain of unknown function (DUF1938) Sammut SJ anon pdb_1mgt Domain Members of this family, which are predominantly found in the archaeal protein O6-alkylguanine-DNA alkyltransferase, adopt a secondary structure consisting of a three stranded antiparallel beta-sheet and three alpha helices. Their exact function has not, as yet, been defined, though it has been postulated that they confer thermostability to the archaeal protein [1]. 22.10 22.10 24.80 94.00 20.50 19.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.65 0.72 -4.17 4 13 2009-01-15 18:05:59 2006-08-16 09:16:18 5 1 13 1 11 16 0 86.00 45 49.32 CHANGED MLoscpF+ltsRtlhIGVlaE-+IQGIsaSlDthEhl+cplscLhsaLpKR.GVplsLcEppScYP-lVFcVLhGKIuNEcuhE..ELS MLSlE+FcIssR-lhIuVla-cK.IQGIoFSLDGtEFLccRIssLspaLc+R.GVsVsLcpccScYP-LVacVLlGclcNE-uLc..ELS. 0 1 1 6 +8986 PF09154 DUF1939 Domain of unknown function (DUF1939) Sammut SJ anon pdb_1mxg Domain Members of this family, which are predominantly found in Archaeal amylase, adopt a secondary structure consisting of an eight-stranded antiparallel beta-sheet containing a Greek key motif. Their exact function has not, as yet, been determined [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.73 0.72 -4.07 9 520 2012-10-02 20:10:03 2006-08-16 09:30:45 5 6 490 22 62 384 5 58.10 47 11.92 CHANGED LlVhI.Npussh+p+hVpT.sWsspslhDYoGNuu...sssossDGWVplpsPsp.......GYulaS ............hu..lhl.sNsp.t.sS.K.pMaVGp..c.a.s.spsFhD..lLGNpp.........spVTI---GaGpFsVuup.........SVSVWs. 0 18 31 47 +8987 PF09155 DUF1940 Domain of unknown function (DUF1940) Sammut SJ anon pdb_1nig Domain Members of this family adopt a secondary structure consisting of six alpha helices, with four long helices (alpha1, alpha2, alpha5, alpha6) form a left-handed, antiparallel alpha helical bundle. The function of this family of Archaeal hypothetical proteins has not, as yet, been defined [1]. 25.00 25.00 252.60 252.50 23.20 18.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.83 0.71 -4.35 3 3 2009-01-15 18:05:59 2006-08-16 09:46:44 5 1 3 1 3 4 4 143.00 47 93.87 CHANGED YCPVIDDpLPlDHVYFKFRSEIEuAEAFLGLAVSEGlKVsETRElLDILDTVYNSLYDcESKLNDFQEKRLNFTEE-WYDIKEKsNNGNKWSLYMFLARSHlDsAVYWlo+M+EDERFK-hVcDEsIstLLKlGhVILREGLG YCPVIDDpLPlDHVYFKFRSEIEuAEAFLGLAVSEGlKVsETRElLDILDTVYNSLYDcESKLNDFQEKRLNFTEE-WYDIKEKsNNGNKWSLYMFLARSHlDsAVYWlo+M+EDERFK-hVcDEsIstLLKlGhVILREGLG 0 1 2 2 +8988 PF09156 Anthrax-tox_M Anthrax toxin lethal factor, middle domain Sammut SJ anon pdb_1j7n Domain Members of this family, which are predominantly found in anthrax toxin lethal factor, adopt a structure consisting of a core of antiparallel beta sheets and alpha helices. They form a long deep groove within the protein that anchors the 16-residue N-terminal tail of MAPKK-2 before cleavage. It has been noted that this domain resembles the ADP-ribosylating toxin from Bacillus cereus, but the active site has been modified to augment substrate recognition [1]. 83.00 83.00 108.40 108.30 82.90 82.90 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.06 0.70 -5.30 2 16 2009-01-15 18:05:59 2006-08-17 09:03:18 5 3 14 17 1 17 0 259.30 97 36.03 CHANGED MLARYEKWEKIKQHYQHWSDSLSEEGRGLLKKLQIPIEPKKDDIIHSLSQEEKELLKRIQIDSSDFLSTEEKEFLKKLQIDIRDSLSEEEKELLNRIQVDSSNPLSEKEKEFLKKLKLDIQPYDINQRLQDTGGLIDSPSINLDVRKQYKRDIQNIDALLHQSIGSTLYNKIYLYENMNINNLTATLGADLVDSTDNTKINRGIFNEFKKNFKYSISSNYMIVDINERPALDNERLKWRIQLSPDTRAGYLENGKLILQRNIGLEIKDVQIIKQSEKEYIRIDAKVV MLARYEKWEKIKQHYQHWSDSLSEEGRGLLKKLQIPIEPKKDDIIHSLSQEEKELLKRIQIDSSDFLSTEEKEFLKKLQIDIRDSLSEEEKELLNRIQVDSSNPLSEKEKEFLKKLKLDIQPYDINQRLQDTGGLIDSPSINLDVRKQYKRDIQNIDALLHQSIGSTLYNKIYLYENMNINNLTATLGADLVDSTDNTKINRGIFNEFKKNFKYSISSNYMIVDINERPALDNERLKWRIQLSPDTRAGYLENGKLILQRNIGLEIKDVQIIKQSEKEYIRIDAKVV... 0 0 1 1 +8989 PF09157 TruB-C_2 Pseudouridine synthase II TruB, C-terminal Sammut SJ anon pdb_1k8w Domain Members of this family adopt a secondary structure consisting of a four-stranded beta sheet and one alpha helix. They are predominantly RNA-binding domains, mostly found in Pseudouridine synthase II TruB [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.78 0.72 -4.11 98 1516 2012-10-02 17:37:24 2006-08-17 09:13:28 6 5 1503 3 305 983 305 58.00 38 18.73 CHANGED PplpLsspputtlhpGQsV...............sssst..pG.hVRla......spstp..FlGlGclstcG...hltP+RLl .........PhVsLs.ss.ushhppGpsV........h................s.ss.sPh..cG..hVRVh........ucss+....FlGlGE.l...sc.-.G......+luP+RLl................. 0 68 161 235 +8990 PF09158 MotCF Bacteriophage T4 MotA, C-terminal Sammut SJ anon pdb_1kaf Domain Members of this family adopt a compact alpha/beta structure comprising three alpha-helices and six beta-strands in the order: alpha1-beta1-beta2-beta3-beta4-alpha2-beta5-beta6-alpha3. The beta-strands form a single anti-parallel beta-sheet and the three alpha-helices pack side-by-side onto one surface of the beta-sheet. In this architecture, the domain's hydrophobic core is at the sheet-helix interface, and the second surface of the beta-sheet is completely exposed. The domain is a DNA-binding motif, with a consensus sequence containing nine base pairs (5'-TTTGCTTTA-3'), that appears to bind to various mot boxes, allowing access to the minor groove towards the 5'-end of this sequence and the major groove towards the 3'-end [1]. 25.00 25.00 57.50 55.90 19.00 19.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.37 0.72 -4.34 3 25 2009-01-15 18:05:59 2006-08-17 09:29:02 5 2 24 6 0 22 0 102.40 43 49.29 CHANGED ITuEMcELt-hlhcLl--N.lsLKcVEIYRSNYplIFuKRT.pGIRpFEIpNNGshRIFGYKMuEc+lctFTslGs-lKIthGGpNTYIDIcpsucNItsVlTlA ....ITs-MEp.tDhhhcLL..--p.hs.lKclpp.RSNallhhpKRT.cGIRpFElpNsGphRIFGYKMtE+clppFTslGhssKlttsG.NsYlDIcpos-NIttlIssA... 1 0 0 0 +8991 PF09159 Ydc2-catalyt Mitochondrial resolvase Ydc2 / RNA splicing MRS1 Sammut SJ anon pdb_1kcf Domain Members of this family adopt a secondary structure consisting of two beta sheets and one alpha helix, arranged as a beta-alpha-beta motif. Each beta sheet has five strands, arranged in a 32145 order, with the second strand being antiparallel to the rest. Mitochondrial resolvase Ydc2 is capable of resolving Holliday junctions and cleaves DNA after 5'-CT-3' and 5'-TT-3' sequences [1]. This family also contains the mitochondrial RNA-splicing protein MRS1 which is involved in the excision of group I introns [2-3]. 20.00 20.00 20.00 20.70 19.80 19.90 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.92 0.70 -4.91 53 128 2012-10-03 01:22:09 2006-08-17 10:06:20 5 6 108 2 83 123 59 264.70 23 72.90 CHANGED I....LSIDhGl+NhAascLps..........................shtssppshP............................pLpsWp....+lsLs.............................................................................pttstppccpp.shsPshhuphAasLlspLl.tshp.....P...shllIERQRhRSsGuuul.EhsL+VsllEsMLaAsLpshppppptt.st..............hVhussPp+lspaWhpttsh.........ps......................tpppphssppsKph+IcLltphLsssh........................hthshsts.ph....h...pthlt+apsptpsppthtthtt...............................................tt.chpKlDDLADsLLpu.lsWhcWppsppcl .............................ILSIDhGl+NhAasplph................................tttpshs.............................................................................pltsWp....+lsLp.......................................................................................................t.t...ppppp.shs.Pt.huphsapLlppLh..shp..........P.shllIE..R..QRhRosu.u......ssl.-.sl+VshlEshLaAsLpsh.t.ttp...t................hVhussPp+hspaWhp.t.........................................................ppphhssppsKph+lcLltphLpssh........................hth.h..tt.ph............thhtt.attthptththtth.t.................................tt..phtKhDDLsDslLpu.lsWhcW.tthpp.................................................................. 0 18 40 69 +8992 PF09160 FimH_man-bind FimH, mannose binding Sammut SJ anon pdb_1klf Domain Members of this family adopt a secondary structure consisting of a beta sandwich, with nine strands arranged in two sheets in a Greek key topology. They are predominantly found in bacterial mannose-specific adhesins, since they are capable of binding to D-mannose [1]. 25.00 25.00 27.20 27.10 24.00 19.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.74 0.71 -4.36 6 806 2009-09-14 15:18:23 2006-08-17 10:27:01 5 4 326 37 14 375 0 143.80 68 49.23 CHANGED C+sssTGQ.sh...suGsusVhVNLsPsVpsspNl.VlDLSQ...plsCpND.ushp.hDYlplppGSuFusuLss.......FoGolpahupoYPhPhssps.sphhhspssa+PhPlKhYLoPsuuAsGVlI+uG-LIAplhhpKhuohu.cus.+N.......FsWp ...............C+s.sGsu.sI....GGG....oA...NVY.VNLuPs.VNVGQN.L..VVDLST...QIFCHNDYPE.......TITDYVTL.QR.GSAY.GGVLSs.......FS.GTVKYsG.oSYPFPT.ToET.sRVlYsSRTDKPWPlsLYLTP....VS.o..AGG.VsIKAGSLIAVLIL+QTNNY.....NSD-FQ.......FlWN........................... 0 2 4 9 +8994 PF09162 Tap-RNA_bind Tap, RNA-binding Sammut SJ anon pdb_1koh Domain Members of this family adopt a structure consisting of an alpha+beta sandwich with an antiparallel beta-sheet, arranged in a 2(beta-alpha-beta) motif. They are mainly found in mRNA export factors, and mediate the sequence nonspecific nuclear export of cellular mRNAs as well as the sequence-specific export of retroviral mRNAs bearing the constitutive transport element [1]. 20.80 20.80 21.60 21.50 20.20 20.00 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.86 0.72 -4.41 12 238 2009-01-15 18:05:59 2006-08-17 10:39:35 5 15 77 12 138 237 0 85.00 43 15.68 CHANGED Dtsp..csWFKlTIPaG+KYDKpWLLs.lQshCSlPFsPVcFHh-pp+ApFFVEsussApALKplSt+IhDc-spKlsIhhsPsssP.pl .............tsWFKl.TIPaGpKYDKpWLls..I.Qu.p.CSVPFsPl..........-.FHY-ps.c.ApFFV-DussAsALKslshKI.hD.c.-.sp+IsIhVs.sussP.................. 0 21 29 62 +8995 PF09163 Form-deh_trans Formate dehydrogenase N, transmembrane Sammut SJ anon pdb_1kqf Domain Members of this family are predominantly found in the beta subunit of formate dehydrogenase, and consist of a single transmembrane helix. They act as a transmembrane anchor, and allow for conduction of electrons within the protein [1]. 20.60 20.60 20.90 20.90 20.20 20.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -7.95 0.72 -4.46 40 1298 2009-01-15 18:05:59 2006-08-17 10:52:16 6 17 834 2 134 480 11 44.10 58 14.80 CHANGED ISssVpLWK.GlhKPLushuhuussluuhhHYlslGPNcss....---- ..IspoVphWK.GhhKPLAAsGFhATF..AutIFHYlGlGPN+ts....----p....... 0 20 52 94 +8996 PF09164 VitD-bind_III Vitamin D binding protein, domain III Sammut SJ anon pdb_1kxp Domain Members of this family are predominantly found in Vitamin D binding protein, and adopt a multihelical structure. They are required for formation of an actin 'clamp', allowing the protein to bind to actin [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.39 0.72 -4.36 6 46 2012-10-01 21:15:37 2006-08-17 11:02:52 5 2 31 9 23 56 0 67.30 58 14.43 CHANGED ELCADYSENTFTEYKKKLu-pLRsKhP-Aosp-Ls-LVsKRSDFASpCCSINSPPlYCsSpIDAElps .....................ELCADYSENTFTEYKKKLuEpL+sKhPDAoss-LtcLV-+RSDFASpCCSINSPPLYCsSplcs.ht............ 0 1 2 6 +8997 PF09165 Ubiq-Cytc-red_N Ubiquinol-cytochrome c reductase 8 kDa, N-terminal Sammut SJ anon pdb_1l0l Domain Members of this family adopt a structure consisting of many antiparallel beta sheets, with few alpha helices, in a non-globular arrangement. They are required for proper functioning of the respiratory chain [1]. 22.90 22.90 23.70 22.90 21.60 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.42 0.72 -3.66 16 86 2009-09-11 00:16:57 2006-08-17 11:15:50 5 2 67 47 31 89 0 73.30 47 27.43 CHANGED hSlsuRSGsluPYlpATopuVAusLK.PLlPusl.hpuEKlllcs++shLs+ESLsGphPppuLtsosu.lsususVR ........hSlAARSGsFAPhLpATSpuVAGsL+.P.LltusV....sssEpslLDsK+PFLsRESLSGQus+p.sLsASVG.lNsPAoVR....... 0 9 12 18 +8998 PF09166 Biliv-reduc_cat Biliverdin reductase, catalytic Sammut SJ anon pdb_1lc0 Domain Members of this family adopt a structure consisting of four alpha helices and six beta sheets, in an alpha-beta-alpha-alpha-alpha-beta-beta-beta-beta-beta arrangement. They contain a catalytic active site, capable of reducing the gamma-methene bridge of the open tetrapyrrole, biliverdin IX alpha, to bilirubin with the concomitant oxidation of a NADH or NADPH cofactor [1]. 21.00 21.00 21.00 23.40 19.20 20.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.43 0.71 -4.07 4 55 2009-01-15 18:05:59 2006-08-17 11:33:23 5 3 38 7 31 52 0 111.70 61 38.98 CHANGED -FKtLK+ElpGKpL.EGsLHFTGGPLcts.FGFPuFSGIARLTWLVsLFG-LoVTSAThEEcKEppY.KMTApLhTppc+PLTWIEERGPGhtRsKHIcF+FpssoLsplPuusR ................................EFthLKKEVsGK-LlKGoLhF.TuGPL-Ep+FGFPAFSGIuRLTWLVsLFGELSlsSATLEEcKEcpYhKMTVpLcTpsK+PLTWIEE+GPGLKRs+alsF+FcSGoLEslPsss.h............. 1 2 3 10 +8999 PF09167 DUF1942 Domain of unknown function (DUF1942) Sammut SJ anon pdb_1lmi Domain Members of this family of bacterial proteins assume a beta-sandwich structure consisting of two antiparallel beta-sheets similar to an immunoglobulin-like fold, with an additional small, antiparallel beta-sheet. The longer-stranded beta-sheet is made up of four antiparallel beta-strands. The shorter-stranded beta-sheet consists of five beta-strands, four of these beta-strands form an antiparallel beta-sheet. The exact function of this family of proteins is unkown, though a putative role includes involvement in host-bacterial interactions involved in endocytosis or phagocytosis, possibly during bacterial internalisation [1]. 20.70 20.70 20.80 23.30 20.40 20.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.59 0.71 -4.38 13 123 2009-01-15 18:05:59 2006-08-17 11:45:15 6 2 76 1 32 109 0 122.70 51 64.17 CHANGED ApssspplGpsucLls..GsVVQsWTVSDLKPSoDsIP.YtltGpLWEATATscAlsGsVTPIVSNhNARAssGpoYRVLapVATPpGlNPuTLuQGppoTGKlYFDV.TGssPsSVVY.NsGupDlllW ....................s..PhstphGophphsD..ssGtVV.uWpVSDL+sSossIP.YsltGplWEATATspAlpGoVTPsVupFNARsssG.sYRVLap.sAsPsslssATlsQGppoTGKIYFDV.TGssPshVsh.NsG..hpDLLlW........ 0 1 18 27 +9000 PF09168 PepX_N X-Prolyl dipeptidyl aminopeptidase PepX, N-terminal Sammut SJ anon pdb_1lns Domain Members of this family adopt a secondary structure consisting of a helical bundle of eight alpha helices and three beta strands, the last alpha helix connecting to the first strand of the catalytic domain. The first strand of the N-terminus also forms a small parallel beta sheet with strand 5' of catalytic domain. The domain mediates dimerisation of the protein, with two proline residues present in the domain being critical for interaction [1]. 25.00 25.00 28.60 28.50 21.30 19.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.07 0.71 -3.98 24 510 2009-01-15 18:05:59 2006-08-17 11:56:45 5 3 489 1 52 340 1 141.70 47 19.29 CHANGED MKhN..QFualssshcpthpELppluF....hptpss.ps.LcsFlp+hahphps...psttLsplhAsscpDLhsFhpoc.pplotclFYslALQLLGFpsthDFsh.DshshhcchshP.......hhssppllpuhYpLLsTRoKsGpoLlDpLsucGa ...M+aN..QaSYlshsh-phlpELcplGFp...hpspsstKcsLEsFLR+hFhpaps....osasLo.LAA-pcTDLLoFFpS-.pcLTs-lFYsVAhQLLGFphhVDF-..DspsFh+csuFP......hhasp..LI-sLYpLLNTRTKpGpTLIDpLVScGL......... 1 7 20 34 +9001 PF09169 BRCA-2_helical BRCA2, helical Sammut SJ anon pdb_1iyj Domain Members of this family adopt a helical structure, consisting of a four-helix cluster core (alpha 1, alpha 8, alpha 9, alpha 10) and two successive beta-hairpins (beta 1 to beta 4). An approx. 50-amino acid segment that contains four short helices (alpha 2 to alpha 4), meanders around the surface of the core structure. In BRCA2, the alpha 9 and alpha 10 helices pack with BRCA-2_OB1 (Pfam:PF09103) through van der Waals contacts involving hydrophobic and aromatic residues, and also through side-chain and backbone hydrogen bonds. The domain binds the 70-amino acid DSS1 (deleted in split-hand/split foot syndrome) protein, which was originally identified as one of three genes that map to a 1.5-Mb locus deleted in an inherited developmental malformation syndrome [1]. 25.00 25.00 25.00 25.10 24.40 24.60 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.13 0.71 -4.74 5 134 2009-01-15 18:05:59 2006-08-17 12:55:12 5 28 95 3 83 142 1 148.40 43 7.01 CHANGED DLhusLps......ARDlQ-MRIKKKpRQplhPQPGSLYLsKoSslsRISLKuAVGccsPStpSs...cQLYsYGVSK+CIcVNScNAESFQFclp-FFuK.EsLpsGcGIQLADGG..WLIPoNDGKAGKEEFYRALCDTPGVDPKLISctWVYNHYRWIVWKLAAMEpuFP+cFANRCLTPEpVLLQLKYRYDlEIDpS ....................................................................................................................................................................h.t...............h............ph..htl..t.....h.tlpstsu....apF...paht.....t..h..........t.tuh.htDus.....hl.l.s.ppGp..s.Gt.cEFhcAlhsss...GVDPpLloctWVhNHYRWIlWKLAu..M..Eh.sFPc...c...h...us...+.sLoP-pVLhQLKYRYDhElDp..................... 0 36 46 64 +9002 PF09170 STN1_2 DUF1879; CST, Suppressor of cdc thirteen homolog, complex subunit STN1 Mistry J, Sammut SJ anon pdb_1wj5 Domain STN1 is a component of the CST complex, a complex that binds to single-stranded DNA and is required for protecting telomeres from DNA degradation. The CST complex binds single-stranded DNA with high affinity in a sequence-independent manner, while isolated subunits bind DNA with low affinity on their own. In addition to telomere protection, the CST complex probably has a more general role in DNA metabolism at non-telomeric sites. 21.90 21.90 24.90 24.20 21.50 21.30 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.27 0.71 -4.50 6 63 2009-01-15 18:05:59 2006-08-17 12:57:40 5 4 43 1 32 51 0 165.20 48 47.30 CHANGED DPshslQIARMLELPplYRpVYDpPFchPsptcs..Eutsspt.ls.stLlShLSEKlKEFLhEp+lpsFYQpELE.hV-SLlulASpPV...ssuDQ.p.tcoSsSpQl+plFKEAlplLQ-cGhlFQKcpspDEVYpVTcQDKDLHptIhcII+EDC++pKHhEKGCHhLHILsCVRhs .......DPshslQIARMLELPplYRclYDpPFp.sshtpp...cuhs.s.sss.lshssL.sshL.....SEKhK-FL..hE.s+VpoFY....QpELE.hV-SLlslAspPl........sssucQ...sh.ps.sosSptI+slFKpAlplLQ-cGlVFQKssu.Dpl.YaVTccDK-LH+pIhcIIpEDCQKPpHsEKGCHFLHILuCsR.p.................................... 0 5 7 14 +9003 PF09171 DUF1886 Domain of unknown function (DUF1886) Mistry J, Sammut SJ anon pdb_1xg7 Domain This domain is predominantly found in the Archaeal protein N-glycosylase/DNA lyase. 21.30 21.30 21.60 21.30 19.80 21.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.58 0.70 -5.35 19 68 2009-01-15 18:05:59 2006-08-17 13:00:51 5 1 58 4 53 72 1 219.20 28 79.14 CHANGED ls-hlpclul-tsphlEcp.DsQacAlppLhcph.spthhspLllhNALVSYpLou+GE-aWhpFucY......Fupp....pspslscsahpFLppSphN+RhlcsKl+Rlc+hpsalpsL.t..s...ha.pshstlhppLu+hLsuctpsKTlVFAlKMhsYAhRsshsh.hshPh-IPIPlDhRltplThp......................hpp-tshchWsplA+couIPPLHlDollW.lhGtsh.htp..tp..........ltcclttlhc ....................................................................................................................................hphlpplslchhphhEcp.D.QahslppLhpph..st..............thhhhLslhNuLlSYpLss+G.E.c.aW..Fuca.......aspp.............pshhctahp.Fl.po.htchhhptKl+Rlp+h..hsh.h...pl.....p...hh.pshtthhppLuphlssc.ptKTlVFAlKMhsY...uhchs.s..h....hsh-IsIPlDhRlsphThp..h...............................hppctshphWpplu+p.....u.s....IPPlHlDolLW.lhGtth..h.p.......................t................... 0 16 26 38 +9004 PF09172 DUF1943 Domain of unknown function (DUF1943) Sammut SJ anon pdb_1lsh Domain Members of this family adopt a structure consisting of several large open beta-sheets. Their exact function has not, as yet, been determined [1]. 23.10 23.10 23.60 23.20 22.60 22.40 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.78 0.70 -5.55 71 578 2009-01-15 18:05:59 2006-08-17 13:03:58 6 29 211 1 253 586 0 275.40 18 14.04 CHANGED +aS+shchshassshhhGs...usps.hI.sssohLP+slhhphpshhhGt..shslhElGhRsEGlcchltc................................................................................ts...tpphpclpphlpt.lpph+shs.......ppsh.............ushYl+hhGpElsFhslsc.........phlcphhphh..............pphlpp.......lh.pGhphp..hs+shlhh-schhhPTshGlPhcluhhssul.suhpspsp.hslps.h.p.h..........pshphcschpPSluhphhuhhGlsss.hhps..ulphcsplpstsshchpsclshptt.phclph...ss..pp.pclhsh..p.scsasl ..................................................................paSpshp....hshh.p...h........hG....s....thph.hIhsssohlP+thhhphpt.hhGh..shshhEluh..cs..cuhcphltph.........................................................................................t....tp.....tpthpp.lt.p..hl...ph.hpphcth........ts..............up.h.al+hhGp-htahshsc.............phl.pph.hphh..................tphlpp...lh.....puhphp.......ht..p.hhhh-sphhh..PTshGlPhp.hshhsssl.sshp.s.p..sp..hphps...t...................tphphph..phpPohuhphhsh..hGh.ss..hhps....ulthpsph..pt.ths...hchphphpht....t..p.....hclph....ss....pp......pl..hph.psp.hh................................................................................. 0 84 117 208 +9005 PF09173 eIF2_C Initiation factor eIF2 gamma, C terminal Sammut SJ anon pdb_1kk1 Domain Members of this family, which are found in the initiation factors eIF2 and EF-Tu, adopt a structure consisting of a beta barrel with Greek key topology. They are required for formation of the ternary complex with GTP and initiator tRNA [1]. 22.00 22.00 23.10 23.40 21.90 20.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.95 0.72 -3.94 36 581 2009-09-10 18:26:08 2006-08-17 13:07:42 6 11 483 29 368 573 91 89.20 51 18.91 CHANGED asclcl.paaLLcRllGs+p......thKVppLppsEsLMlNlGSsoTsGhVsulK..sDhsclpLspPVCsphG-.+lAlSRRlsp+WRLIGWGpI ..................asElEl.saaLL+RLLGV+ot...........stKtuKVpKLs+sElLMlNIGShoTGG+VsulK........sDh.A+l.tL.TsP.VCT-hGE.KlA.LSRRl-....KHWRLIGWGpI........ 0 119 204 294 +9006 PF09174 Maf1 Maf1 regulator Mistry J, Wood V anon manual Family Maf1 is a negative regulator of RNA polymerase III [1][2]. It targets the initiation factor TFIIIB [3]. 21.60 21.60 23.00 23.50 21.30 21.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.08 0.71 -4.42 21 367 2009-01-15 18:05:59 2006-08-17 13:09:05 5 7 291 1 253 346 4 169.50 32 62.03 CHANGED slhupl-saos+hstsc+plhpphppp.pttt.ps.s.s.....................t.........................sshsslscpsSR+sLsYLIusLNtsa.PD.YDFS.slcspsFp+p.ohppVhsplsssL.slspp.h............................shhtplWpslc-pls..lp-C-IYoYs..Pp.ssDPa.sEcGslWShsYFFaNKKhKRllahp ............................................................................................................................hhsth-.aosKhsus-+chh+ph..ppp..h..p..sp....p..p.s......................................................s......tt....t.t....................................................sshusL.s...pp.s.SR+ThhYLIuTLNtua.PD.YDFS.sh+s..pcF..............p+E.s...hphlhstlsssLh..shs.tp.........................................................................................................shhsphWpsl-cphs..Lp-...CslYoYs..P...s.sDPa..t-p.....G...s....lWShpYFFaN+chKRlsah...................................... 1 94 148 216 +9007 PF09175 DUF1944 Domain of unknown function (DUF1944) Sammut SJ anon pdb_1lsh Domain Members of this family adopt a structure consisting of several large open beta-sheets. Their exact function has not, as yet, been determined [1]. 20.90 20.90 23.70 21.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.02 0.71 -4.62 30 201 2009-09-11 17:00:47 2006-08-17 13:15:43 5 9 70 1 58 162 0 155.20 37 12.32 CHANGED PshsllsRAVRuDpKhtGYQlusYhD.+ssuR..lQlIluslu-ssNW+lCADuslLS+HK.lhA+luWGtEC+pYssplpAETGhl.GspPAsRl+lsWs+LPpsh+...cYuKtlscYl...sssAhhsGlspp+p+Nsp+QlplTlsssSp+olslllKsPchTlYKhultLP ...........................................................................PshsllhRAlRuDpKh.GYQlssYhDpssuR...lQllluslu-ssNWKlCADuslLSpHK....spA+lsWGtEC+.pYps.hpA.EoGhl.uppPAsRl+lpWp+lPphhp...phu+pltcal....ssAh.hGhp..p+scNsp+plplosAhsSp+olsllh+hPchTha+hslhLP...................................... 0 0 11 41 +9008 PF09176 Mpt_N Methylene-tetrahydromethanopterin dehydrogenase, N-terminal Sammut SJ anon pdb_1lu9 Domain Members of this family adopt a alpha-beta structure, with a core comprising three alpha/beta/alpha layers, in which each sheet contains four strands. They are predominantly found in prokaryotic methylene-tetrahydromethanopterin dehydrogenase, which catalyses the dehydrogenation of methylene-tetrahydromethanopterin and the reversible dehydrogenation of methylene-H(4)F [1]. 19.10 19.10 21.30 24.80 17.80 15.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.70 0.72 -4.12 17 128 2009-01-15 18:05:59 2006-08-17 13:40:18 6 9 85 6 52 135 48 76.40 47 27.55 CHANGED PFDlNMAlDAGa-slhsYusVp.p-VpsLVQDuIFSRuPpshp+TulFIGGpDhshAhshLcsAKcuhlPPFclSV..hsDPu .PFDlshAhDAGh-hlhsYssVp.spVsuLsQDuIFoRuPpshtcTuIFIGG+DsthAhDMLcsA+cuhhsPFclSV..hADPu. 1 16 37 44 +9009 PF09177 Syntaxin-6_N Syntaxin 6, N-terminal Sammut SJ anon pdb_1lvf Domain Members of this family, which are found in the amino terminus of various SNARE proteins, adopt a structure consisting of an antiparallel three-helix bundle. Their exact function has not been determined, though it is known that they regulate the SNARE motif, as well as mediate various protein-protein interactions involved in membrane-transport [1]. 24.90 24.90 25.00 25.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.11 0.72 -3.58 43 417 2009-01-15 18:05:59 2006-08-17 14:06:41 6 8 263 7 271 389 2 95.20 29 35.74 CHANGED DPFa.Vpc-Vpculsph....cslappatphhssss.................chtphpp-LpsslpslchsLcDLcculsl.....spps...Pp+a.slsppElscR+palpphcsplpplc ..............DPFa.....VppE.Vpculsph....culappahclhppss.....................t..Ehp.tppELcssLpolchsL.-DL-c.ol..pl...................scps.....Pt+.a.slstsElspR+palpshcpplpphc.................... 0 80 146 214 +9010 PF09178 DUF1945 Domain of unknown function (DUF1945) Sammut SJ anon pdb_1lwh Domain Members of this family, which are predominantly found in prokaryotic 4-alpha-glucanotransferase, adopt a structure composed of six antiparallel beta-strands, four of which form a beta-sheet and another two form a type I' beta-hairpin. The role of this family of domains, has not, as yet, been defined [1]. 21.20 21.20 29.40 28.10 20.90 19.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.52 0.72 -4.08 2 9 2009-09-11 06:49:26 2006-08-17 14:23:45 5 1 8 4 2 8 2 49.40 71 11.17 CHANGED AplEFLCKE-KhL.VYRLhD-t+SLKVhHNLSstEhVFEGV+hpPYpTEVl .......AKlEFLCKE-KFL.VYRLYDDQ+SLKVFHNLSGEEVVFEGV+h+PYKTEVV. 0 1 1 2 +9011 PF09179 TilS DUF1946; TilS substrate binding domain Sammut SJ, Bateman A anon pdb_1ni5 Domain This domain is found in the tRNA(Ile) lysidine synthetase (TilS) protein. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.10 0.72 -3.96 123 1699 2009-01-15 18:05:59 2006-08-17 14:53:22 6 7 1681 3 357 1331 279 68.00 29 15.99 CHANGED Lslss..Ltpho.scppplLRhWLpth...sh.thPoptpLpplhpplhtup...tDupsplph.s...shplRRapscLah ...........Lplss..hhshS.ss+pttllRtWLstt...sh..shP.......S..pspLpclhpplt.hA+...pDussplph.s......shplRRapspLah............................. 0 94 204 286 +9012 PF09180 ProRS-C_1 Prolyl-tRNA synthetase, C-terminal Sammut SJ anon pdb_1nj1 Domain Members of this family are predominantly found in prokaryotic prolyl-tRNA synthetase. They contain a zinc binding site, and adopt a structure consisting of alpha helices and antiparallel beta sheets arranged in 2 layers, in a beta-alpha-beta-alpha-beta motif [1]. 20.90 20.90 21.10 21.00 20.60 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.60 0.72 -4.05 82 1277 2009-01-15 18:05:59 2006-08-17 15:23:59 6 39 1155 18 562 1221 419 73.60 33 12.63 CHANGED a--h............ppsl.spt....shlhssWCGs.tcsEpcIKcco.....................sApshCl.Ph..-........p.tpsspCl...hsG.+sAp..phshFu+uY ............................................a--hpptl..pcs........salhu.WCGs..tcCE-cIK-co...........................................uAssRCI.Pa-.....................ttptsspCl.....hsG..+sAc..phshFu+uY.............. 0 222 375 492 +9013 PF09181 ProRS-C_2 Prolyl-tRNA synthetase, C-terminal Sammut SJ anon pdb_1nj8 Domain Members of this family are predominantly found in prokaryotic prolyl-tRNA synthetase. They contain a zinc binding site, and adopt a structure consisting of alpha helices and antiparallel beta sheets arranged in 2 layers, in a beta-alpha-beta-alpha-beta motif [1]. 25.00 25.00 78.40 77.10 19.50 17.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.08 0.72 -4.05 4 16 2009-01-15 18:05:59 2006-08-17 15:24:53 5 1 16 4 10 17 0 65.10 57 14.15 CHANGED ITll-NhcsD......clKtsLSEpKGlILVPacEsIYNEEFEEhIDASVLGpTpYcGKcYISIA+TY .........ITll.sh....-s-.....-clKphLsEp+GlILIPacEsIYNEEhEEpl-ASVLGpTpYcGKcYIuIA+TY 0 1 2 7 +9014 PF09182 PuR_N Bacterial purine repressor, N-terminal Sammut SJ anon pdb_1o57 Domain The N-terminal domain of the bacterial purine repressor PuR is a winged-helix domain, a subdivision of the HTH structural family. It consists of a canonical arrangement of secondary structures: a1-b1-a2-T-a3-b2-W-b3, where a2-T-a3 is the HTH motif, a3 is the recognition helix, and W is the wing. The domain allows for recognition of a conserved CGAA sequence in the centre of a DNA PurBox, resulting in binding to the major groove of DNA [1]. 24.60 24.60 24.80 24.60 24.50 23.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.21 0.72 -4.36 37 1165 2012-10-04 14:01:12 2006-08-17 15:54:53 5 2 1161 8 145 506 0 68.50 57 25.17 CHANGED +Rs-RllshT+hLl-pPpcLlsLshFu-hapuAKSoISEDLsIlKcshcchuhGpl-TlsGAAGGV+YIP .....+RS-RhVshopYLlspPpcLlsLshFA-+YpuAKSSISEDlsIIKcsFccpplGplpTlsGAuGGVpahP.... 0 52 92 118 +9015 PF09183 DUF1947 Domain of unknown function (DUF1947) Sammut SJ anon pdb_1q7h Domain Members of this family are found in a set of hypothetical Archaeal proteins. Their exact function has not, as yet, been defined. 22.50 22.50 22.60 23.00 22.40 22.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.19 0.72 -4.21 7 27 2009-01-15 18:05:59 2006-08-17 16:16:08 5 2 27 1 13 24 4 64.70 45 42.26 CHANGED pRHhlScK-tKhhhschcp.YGIDlou.tclEVuppK+phhYalssh.shFs-.pLIPTLhhlpphp .QRHlhSpK-tKhhlsKlKp+Ys.IDlSs.s+lElGKEKKcs.aYYlsslL..u..F.Fs-...LIPTL.ChlhKhp........... 0 3 5 10 +9016 PF09184 PPP4R2 PPP4R2 Mistry J, Wood V anon manual Family PPP4R2 (protein phosphatase 4 core regulatory subunit R2) is the regulatory subunit of the histone H2A phosphatase complex. It has been shown to confer resistance to the anticancer drug cisplatin in yeast [1], and may confer resistance in higher eukaryotes. 25.00 25.00 29.60 29.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.10 0.70 -4.86 15 305 2009-01-15 18:05:59 2006-08-17 16:18:36 6 3 245 0 201 303 0 204.50 22 50.95 CHANGED shcpl.phhccFpchtpK.Elss.L-paLsclA+T.G-ThhsWsphKshhpaKlppVhcDFp....................tpsP..tchssssNV-shshE-MKcplLcllssFNu......hPFTIQRlCELLs-P..p+pY..sclDKFlRAlEKNlhVVSolpPssc+ssus......stsphsulh..................hststssahcc.t.............Vsssusscshs......csp..huss.ssNshssospscspphpp..-ccss.sussps-ss.s..sslt......sc+s-c--sppt-u-..........hEscphc.-c---Esc--p-pssss- ...............................................................................................................................t....................................................h..hh...h..h....ph..........................................................................p..........p................hpp.h...pphl.h.....p.h.l.s.sFss.......sPFTIQRLCELlhcP..p+p.Y.....sshsKal+AlEK...........slhVsS....s....h....t..........s......t...................................t........................................................................................................................................................................................................................................................................................s................................................................................................................................................................ 0 64 106 162 +9017 PF09185 DUF1948 Domain of unknown function (DUF1948) Sammut SJ anon pdb_1q8c Domain Members of this family of Mycoplasma hypothetical proteins adopt a helical structure, with one central alpha-helix surrounded by five others, in a NusB-like fold. Their function has not, as yet, been determined [1]. 25.00 25.00 292.80 292.60 23.90 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.78 0.71 -4.52 2 4 2009-01-15 18:05:59 2006-08-17 16:25:49 5 1 4 1 2 4 16 140.00 87 88.33 CHANGED LTRTQRRIAlVEFIFuhLFFLPKpA-.IQAsFL-YDs.ER.LN-WQK.IVKsFSEphhpF.chIEpQQ.+NQhElQoKYNKlSGKKlDLLTpAVlLCALSEQ+ApsTDKPLLISEALLIMDHYSQssEKKQTHALLDKLL LTRTQRRIAIVEFIFATLFFLPKTADQIQAAFLDYDVPERPLNDWQKEIVKVFSERCVEFIELIENQQQRNQAEVQSKYNKVSGKKVDLLTKAVILCALSEQHAQATDKPLLISEALLIMDHYSQVPEKKQTHALLDKLL 0 1 1 1 +9018 PF09186 DUF1949 Domain of unknown function (DUF1949) Sammut SJ anon pdb_1vi7 Domain Members of this family pertain to a set of functionally uncharacterised hypothetical bacterial proteins. They adopt a ferredoxin-like fold, with a beta-alpha-beta-beta-alpha-beta arrangement [1]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.33 0.72 -4.36 181 2861 2012-10-02 20:07:24 2006-08-17 16:36:13 6 4 2841 2 493 1750 45 55.80 23 26.73 CHANGED lphcYsphuplcphLp...p.ts.htlhcppYssp..Vplpltlstsphpshpptls-hosGp ......lphsYsphsplcthLt....p.ps.htlhssp.Yssp.....Vphpltl..s..tsc..h..pshpstLs-hopG................... 0 132 288 408 +9019 PF09187 DUF1950 Domain of unknown function(DUF1950) Sammut SJ anon pdb_1vk5 Domain Members of this family pertain to a set of functionally uncharacterised hypothetical eukaryotic proteins [1]. 25.00 25.00 107.40 25.60 17.60 17.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.71 0.71 -3.96 3 18 2009-01-15 18:05:59 2006-08-17 16:49:10 5 2 10 3 11 22 0 110.80 59 64.70 CHANGED LLR+.AEMYQ-YMKQlPIPo+R.GSlIPsTTWlGLGpSMKQLYGQPLHYLTNVLLQRWDQSRlGSDsEH+PLDSIIHPoKAEATIWlVEElHRLTTSoQHlAoLWtSDPMYHAFIDPIFPc ...llRR.AEMYQ-YMKpIPIPspR.GShIPFooWhGLu+SlKQLYsQPLHYLTNlLLKpWDQ.RlGS--Ep+sLDsIIHPsKAEATIWlhEElHRpToSphHlApLWtsDPMYa.uFlDsIFP...... 0 1 7 9 +9020 PF09188 DUF1951 Domain of unknown function (DUF1951) Sammut SJ anon pdb_1tm9 Domain Members of this family of Mycoplasma hypothetical proteins adopt a helical structure, with a buried central helix. Their function has not, as yet, been determined. 24.40 24.40 26.40 30.50 23.70 24.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.92 0.71 -4.22 2 7 2009-01-15 18:05:59 2006-08-17 16:57:27 5 1 7 1 3 4 0 136.60 49 96.37 CHANGED ME.NNlKEpLlShFppACSoHpERLDFICSsRESDTFSsVDVPLtPIKsIIEIsKsEppQhEIhKlAlpNIKTLSoVG.oGQYhASaFSTpsEsAIIFCl.YFLYHFsFL+DpNKKQllK+AaEslA-pIADYLNEN ..............EchlosFpphhopctp+hDFIpSVhEsDshuNh-hPht.lpplh-lhhNE.spp.hhphhIpshhTh.Tsh.php.lhShFppppplh.hFCl.YhLa+.sF.aD-sc+phlp+hhpslAcclh-hLs.... 0 2 2 2 +9021 PF09189 DUF1952 Domain of unknown function (DUF1952) Sammut SJ anon pdb_1v8c Domain Members of this family are found in various Thermus thermophilus proteins. Their exact function has not, as yet, been determined. 21.20 21.20 21.90 21.80 18.60 17.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.61 0.72 -4.28 2 11 2009-01-15 18:05:59 2006-08-17 17:05:56 5 2 11 4 4 11 0 77.60 56 49.80 CHANGED GFERTFGAFPPWLLERYLEEWGGTREGEGVYRLPGAVVRFREVEPLKVGSLSIPQLhVEVEGEtuEtWFERIAhAASR ........htppFGAhPPWLLE+YLpEWGGp+-GEGsYRLPGAhVRFRElEPL+VGSLSIPQLcVEVEGEEA..EtWFERIAhAASR........ 0 1 3 4 +9022 PF09190 DALR_2 DALR domain Sammut SJ, Bateman A anon pdb Domain This DALR domain is found in cysteinyl-tRNA-synthetases [1]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.11 0.72 -3.72 124 4263 2012-10-02 19:03:26 2006-08-17 18:00:12 6 6 4170 6 980 3173 1201 63.60 30 13.64 CHANGED cFtpAM-DDFNTspAlulLF-LA+clNp..........ttt.....shptt....pth..t......stLppL....uslLGl..lpp........ss..csal ..........pFhpAMsDDFNTspAluslF-hu+p.lNp...............pst.........sttst...........pth.....t......................stlc.ph...........sslLGl..ltpt.............................................................................. 0 339 644 840 +9023 PF09191 CD4-extracel CD4, extracellular Sammut SJ anon pdb_1cid Domain Members of this family adopt an immunoglobulin-like beta-sandwich, with seven strands in 2 beta sheets, in a Greek key topology. They are predominantly found in the extracellular portion of CD4 proteins, where they enable interaction with major histocompatibility complex class II antigens [1]. 25.00 25.00 25.40 49.20 21.40 24.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.46 0.72 -4.03 11 88 2009-01-15 18:05:59 2006-08-18 08:50:08 5 16 45 8 18 96 0 105.90 61 25.33 CHANGED QKsSsTVYtKEGEQsEFSFPLsFp-ENL..pGELpW.QActASSsQSWITFoLcN+KVSVpKsppslKLQMpEsLPLpLTLPQsL.QYAGSGNLTLsLs..KGpLHQEVNLVV ....QKsSsTVYKKEGEQVEFSFPLsFptEpL.....oGELhW.Q.AEtASSupoWITFsLcN+cVSVpclppD.KLQMucpLPLpLTLPQALPQYAGSGNLTLsLs..pGK.LHQEVNLVV.................................. 0 1 1 2 +9024 PF09192 Act-Frag_cataly Actin-fragmin kinase, catalytic Sammut SJ anon pdb_1cja Domain Members of this family assume a secondary structure consisting of eight beta strands and 11 alpha-helices, organised in two lobes. They are predominantly found in actin-fragmin kinase, where they act as a catalytic domain that mediates the phosphorylation of actin [1]. 20.30 20.30 21.40 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.79 0.70 -5.42 7 59 2009-09-11 11:13:43 2006-08-18 09:14:55 5 5 31 2 47 61 0 252.30 26 38.15 CHANGED sssuh..slsppthhhssIpsIsWssL.slchl-hu....................ssslhhlhThps.......t..p.....................................plllKuoso...Isp-sauSlLphlLtlPlPchRllc.ss.EappMopsLh.....tohpscpLhchIpoclpcsa.....hLIMEYhp.GpphscLspp......paFuspt.uc++hpQLGplluhDlhs......................................................NN.s+hP...ht.ssps.huNIlhh-pPp..GhhhsllsSslp...slssS.FshtY...................+pahsRl+.lLaolhQ...pPstEShQ...............lpphR-hl.pppshclsppSs...hplQp.....GIspGl ................................thttssh.......c.pshsWs...lpulcpocpu............................SuGVl..FhshFps...............t...........................................usVlKhu.so...hpuEhhuhcluchLG...lpsPpsRll.cpss........Eatphpcuhp.....huspp..s-p.ltchhp..pEl.hcuh.....lhlMpYlp.G.ps..Lh-.sps.............Fpsp....p.upcphpsLG+llhLDlll..........................pNpDRLP.t.LtWc..GNsuNlll.scc............h.tsh......t.....................................p.hh.pht....t.................................................................................................................................................... 0 24 38 43 +9025 PF09193 CholecysA-Rec_N Cholecystokinin A receptor, N-terminal Sammut SJ anon pdb_1d6g Domain Members of this family are found in the extracellular region of the cholecystokinin A receptor, where they adopt a tertiary structure consisting of a few helical turns and a disulphide-crosslinked loop. They are required for interaction of the cholecystokinin A receptor with it's corresponding hormonal ligand [1]. 20.30 20.30 20.70 24.70 20.10 17.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.33 0.72 -4.08 5 39 2009-01-15 18:05:59 2006-08-18 09:29:54 5 1 27 1 20 37 0 47.20 73 11.41 CHANGED MDVVD.SLLsNGSNITP.PCELGLENETLFCLDQP+.PSKEWQPAVQILL ..MDVVD..SLLsNGSNITP.PCELGLENETLFCLDQPp.PSKEWQPAVQILL 1 1 1 3 +9026 PF09194 Endonuc-BsobI Restriction endonuclease BsobI Sammut SJ anon pdb_1dc1 Domain Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence CYCGRG (where Y = T/C, and R = A/G) and cleave after C-1. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates [1]. 25.00 25.00 53.90 53.90 21.60 17.50 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.92 0.70 -5.49 4 29 2012-10-11 20:44:45 2006-08-18 09:44:51 5 2 22 2 5 29 0 225.10 41 98.98 CHANGED hPYp.HLpSsDDLhToYEthRAGFlALALEKN+RuTPaltcARALKltASpAcsPpDLLplcDIpsuLLsASGlSDKAtsaLp.pDKsEAIpsLIpNFLEPAGEpFVEELVaRFLLhRGDoLGGoMRNlGGsLAQpKhTRuIISsLslAsIuY+WLcSpsKp...Wh-ts-DDs-lElhlRGlSWp.pGcsRTlhYNlsVPlV.+pNlDlCLFsCcssplpsQ....pshpssshYIALGELKGGIDPAGADEHWKTApoALsRIRsAFs+tuhpPaTFFIGAAIE+pMAcEIWcQLpoGhLTNAANLTpssQluSlsRWhhpL ..........................spDLhTshpthhsGF...A.p+.t+usPalt.Achhp..hpp.hpp..pLhp..tlp.hllsAshhScKuhtaLp.p.p.chIptLI.sF.c.ssppalppL.h+aLLhpGDoLGG.MRNhsG..ApphhsphllstL...shs.ph....ppp................................................l.ppslDhhlhpht.tth........h....p.hlshGELKGGIDPAGADEHWKTApsALsRIhpAF.phthp.PhhhFlGuAIEhtMu.EIap.LppthLssAANl.p.pQlhpl.thhh................... 0 0 5 5 +9027 PF09195 Endonuc-BglII Restriction endonuclease BglII Sammut SJ anon pdb_1dfm Domain Members of this family are predominantly found in prokaryotic restriction endonuclease BglII, and adopt a structure consisting of an alpha/beta core containing a six-stranded beta-sheet surrounded by five alpha-helices, two of which are involved in homodimerisation of the endonuclease. They recognise the double-stranded DNA sequence AGATCT and cleave after A-1, resulting in specific double-stranded fragments with terminal 5'-phosphates [1]. 21.60 21.60 22.40 22.00 21.10 20.30 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.89 0.71 -4.77 20 74 2012-10-11 20:44:45 2006-08-18 10:00:22 6 1 72 10 33 86 41 177.60 21 84.63 CHANGED pApsllpp.phssthpEltpsLtshslphsclpsuutscptsp.hh.........cchltscGWtpch.............................................hh.......................sh-pps...........................pclDahK.........sslulElpauNhs.hh.pDLhp..aphhaspsh......IcVGllls.spshpc.................................................................chusussaaE+lhpcl.ptuc.ss.slPllllGls ............................................................................................................................................................................uttllt...thsp.hpEl.psltsh.php..h......tp.httss..tpptt..ht.hh...............ccthhtp.GW.pct.............................................ph..................................t..............................pplDahK.........sclulElpasshshhh.pDLhs....aph..h..a..p.tsh......IslGllIs...tspp.hpp............................................................phusussta-+hh.cl.ctup..ss.ssPllllGl............................... 0 12 27 31 +9028 PF09196 DUF1953 Domain of unknown function (DUF1953) Sammut SJ anon pdb_1iv8 Domain This domain is found in the Archaeal protein maltooligosyl trehalose synthase produced by Sulfolobus spp. Its function has not, as yet, been defined. 25.00 25.00 41.80 122.20 22.90 21.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.19 0.72 -4.27 2 2 2009-01-15 18:05:59 2006-08-18 10:11:54 5 1 2 2 1 3 0 64.50 49 9.06 CHANGED EYKsLcLpcGLCGFhRhsKlLVIlKT...lNhchclE.su.YTDVlTsEpl+tcVplscLPhILV+ EYKsLcLpcGLCGFhRhsKlLVIlKT...lNhchclE.su.YTDVlTsEpl+tcVplscLPhILV+ 0 0 0 1 +9029 PF09197 Rap1-DNA-bind Rap1, DNA-binding Sammut SJ anon pdb_1ign Domain Members of this family, which are predominantly found in the yeast protein rap1, assume a secondary structure consisting of a three-helix bundle and an N-terminal arm. They contain an Arg-Asp-Arg-Lys sequence that interacts with an ACACC region in the 3' region of the DNA-binding site [1]. 22.40 22.40 22.80 22.40 22.30 20.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.56 0.72 -3.49 19 53 2012-10-04 14:01:12 2006-08-18 11:12:12 5 4 48 3 30 86 0 114.20 39 17.31 CHANGED KFTA--DYpLshtlpc.hhpchhphsssputshhps.c...h.h.tph.h.........................tpFFcph.....................ucppP..sHTpsuWRDRaRKFlhsaG.lccYIcYYEppptsucpPcshKNhTs .........................................................................................KFoA-EDYtLshslpcphhc-hhphDs-supshlpstct.shlscpphs.........................................sls+pFFcpa.....................uccas..sHTcsuWRDRFRKFlhsYG.lccYIpYYEspptpscpPEPM+NlT.......................... 0 5 17 28 +9030 PF09198 T4-Gluco-transf Bacteriophage T4 beta-glucosyltransferase Sammut SJ anon pdb_1jix Domain Members of this family are DNA-modifying enzymes encoded by bacteriophage T4 that transfer glucose from uridine diphosphoglucose to 5-hydroxymethyl cytosine bases of phage T4 DNA [1]. 25.00 25.00 56.40 55.20 17.90 14.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -7.91 0.72 -4.63 2 4 2009-01-15 18:05:59 2006-08-18 11:21:40 5 1 2 22 0 6 0 38.00 82 13.98 CHANGED MKIAIINMGNNVINFKTVPSSETIYLFKVISEMGLNVD MKIAIINMGNNVINFKTVPSSETIYLFKVISEMGLNVD 0 0 0 0 +9031 PF09199 DUF1954 Domain of unknown function (DUF1954) Sammut SJ anon pdb_1m4v Domain Members of this family are found in various staphylococcal toxins, and adopt an OB fold, wherein the domain folds into a five-stranded beta-barrel. The exact manner in which they confer pathogenic properties to the protein has not, as yet, been determined [1]. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.82 0.72 -3.78 21 1637 2009-01-15 18:05:59 2006-08-18 11:49:10 5 2 161 29 14 384 0 83.10 38 33.87 CHANGED pcL+pYYopsShEh+NloGht.pt...ps.phlphh.spphhplsLlGcDKcKa+c.ssc.pslDVFlVpEt.pchpuppYSlGGlTKoN ........cL+pYYopsShEhcNloGhh.p....tspphlphh.pp..phhpltLlGcDc.pKY+ctsp...slDVFhVpEt.pchpuphaSlGGlTKpN................ 0 12 12 14 +9032 PF09200 Monellin Monellin Sammut SJ anon pdb_1mol Domain Monellin, a protein produced by the West African plant Dioscoreophyllum cumminsii, is approximately 70,000 times sweeter than sucrose on a molar basis. The protein adopts an alpha-beta structure, with a cystatin-like fold, where each helix packs against a coiled antiparallel beta-sheet [1]. 23.60 23.60 23.60 87.80 21.90 23.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -7.73 0.72 -4.32 2 2 2009-01-15 18:05:59 2006-08-18 11:59:41 5 1 1 49 0 22 0 41.50 20 87.37 CHANGED tEhchh-ht.a...oppLh+hslsE-.KhtththLpFNtsl.P tEhchh-ht.a...oppLh+hslsE-.KhtththLpFNtsl.P 0 0 0 0 +9033 PF09201 SRX SRX Sammut SJ anon pdb_1nrj Domain Members of this family, which are predominantly found in eukaryotic signal recognition particle receptor alpha, consist of a central six-stranded anti-parallel beta-sheet sandwiched by helix alpha1 on one side and helices alpha2-alpha4 on the other. They interact with the small GTPase SR-beta, forming a complex that matches a class of small G protein-effector complexes, including Rap-Raf, Ras-PI3K(gamma), Ras-RalGDS, and Arl2-PDE(delta) [1]. 25.00 25.00 26.20 141.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.91 0.71 -4.54 5 23 2009-01-15 18:05:59 2006-08-18 12:22:27 5 5 22 1 13 22 0 149.30 57 24.56 CHANGED MFDQLAIFTPQGpVLYpYNsLsKKFSEsQlNuFIScLIopPVo+cc.......cshsSKLsoIs.o.pKsocSFoslFHloKQPELYFVlTYAE.pSLELNsEAEpVLsLuLpLWDSLsLN-uILcNhpG+ucKNcHNas-ILpulsE-IcKF-pYF MFDQLAVFTPQGQVLYQYNCLGKKFSEhQINuFISpLITSPVT+KE.......cuh.casLLoIN.......S..p.....ccN........osSFsAhFalsKQPELYFVVTaAE.QTLELNQEspQTLsLsLKLWNSLcLsESIL+Nl.pGpsc.KNcHNYlDILpGl--DLcKF-QYF.... 0 1 6 12 +9034 PF09202 Rio2_N Rio2, N-terminal Sammut SJ anon pdb_1tqi Domain Members of this family are found in Rio2, and are structurally homologous to the winged helix (wHTH) domain. They adopt a structure consisting of four alpha helices followed by two beta strands and a fifth alpha helix. The domain confers DNA binding properties to the protein, as per other winged helix domains [1]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.78 0.72 -3.91 29 449 2012-10-04 14:01:12 2006-08-18 13:09:27 6 7 407 5 321 479 8 80.70 43 19.50 CHANGED h+aLop-DFRlLoAlEhGh+NHEhVPspLlsphupL+..tsusp+.tlpcLh+tcLls+...pstpY..-GY+LTYtGYDaLAL+sh.scR ..............hRaLsp-DFRV...LsAlEhG.h+NHEl..VPspLIsplusL+..puusp+.hlpcLsKhcLls+...cstpY................-..GY..RLTas...GYDaLAL+ohspR................... 0 105 187 266 +9035 PF09203 MspA MspA Sammut SJ anon pdb_1uun Domain MspA is a membrane porin produced by Mycobacteria, allowing hydrophilic nutrients to enter the bacterium. The protein forms a tightly interconnected octamer with eightfold rotation symmetry that resembles a goblet and contains a central channel. Each subunit fold contains a beta-sandwich of Ig-like topology and a beta-ribbon arm that forms an oligomeric transmembrane barrel [1]. 19.70 19.70 20.70 22.10 19.30 19.60 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.15 0.71 -4.83 19 280 2009-01-15 18:05:59 2006-08-18 13:29:33 6 2 59 10 90 299 3 172.20 25 80.36 CHANGED PDGhp....lTVstpDEptsslPsLsss...hssRphhVuGshsus...........................lsGssst..ssGpLpsGY.lGCslslus.....hussuGhoPulsh..shss.s..s.uh.s.h...hsuuhsl....................sLtPGtlpsVslst.p-hcGsssa.................VthpshclplsGCsG.uhlRSYAslstpT-sssthlshYGtshsl .......................................sGhp....lslp..ttsphhsslssL.sss....hoREhal..S.....Gpsssp.........................lsG..sust.....hsupLpsGY...plGCthslus................hss.ss.G.ss.suls.h...........shs.s......s..............................ss..s..l..ssulsl...................................................sltPGtltsls.l.p....hshpsssst........................lslssh+lplsGCuG.stlRuaApl...........pssTssspt.lshYGpPhsh................................... 0 10 60 76 +9036 PF09204 Colicin_immun ColicinD; Bacterial self-protective colicin-like immunity Sammut SJ anon pdb_1v74 Domain Colicin D, which is synthesised by various prokaryotes, adopts an antiparallel four helical bundle fold: the helices are tightly packed, forming a compact cylindrical molecule. The protein specifically cleaves the anticodon loop of all four tRNA-Arg isoacceptors, thereby inactivating prokaryotic protein synthesis and leading to cell death [1]. This family also contains immunity proteins to klebicins and microcins. Many bacteria produce proteins that destroy their competitors. Colicin D is one such. The immunity proteins are expressed on the same operon as their cognate bacteriocins and protect the expressing bacterium from the effects of its own bacteriocin [2]. 21.60 21.60 23.60 23.20 21.40 20.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.88 0.72 -3.90 10 59 2009-11-02 17:58:57 2006-08-18 13:44:51 5 1 47 3 11 46 0 78.90 43 88.30 CHANGED MS.hsll-LA+sFlpp+lSAppFoEsahphW+lERcsthhl+D.spslscCLuplFshADhYsPDs-.R--..YElD--pLRcEV+plLcKap ...................Ms.hhllchA+pFlstcloAp.Fuptahtha+hEpcpt.hhpD..spplspsLsslFshADhYsPDsD...RE-...YElDDcpLhcpVhpllsKhp..... 0 2 6 9 +9037 PF09205 DUF1955 Domain of unknown function (DUF1955) Sammut SJ anon pdb_1vdu Domain Members of this family are found in hypothetical proteins synthesised by the Archaeal organism Sulfolobus. Their exact function has not, as yet, been determined. 23.70 23.70 23.70 188.30 23.60 23.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.00 0.71 -4.71 4 17 2009-01-15 18:05:59 2006-08-18 13:52:30 5 1 17 1 6 12 0 159.40 67 94.03 CHANGED Ell+KLM-AK+hllDGhl-cGlcIlpchspSSshcEhNWhICNllDohsCchlhpsL-sIGphFDlotCtNLKpVlpChhhhNp.SEaVDhALDhLVtpsK+DpL-cIhp-lh..NpclssplLlKIAsAh+KlGspR-us-LLpcACc+GlKEACpslspl .ELRRKLIEAKKLILDGFVEQGIELLSKTIoSENIKESNWIICNVIDTADCDAVVKTLDSIGKIFDhSPCANIKRlVYCYALlNKsSEYVDLALDlIVKuNKKDuLDKLYNDLK..NEKINPEFLLKIGhAYKKLGAV+ESNEVLRKACENGLKEACENIKEI.. 0 1 1 5 +9038 PF09206 ArabFuran-catal Alpha-L-arabinofuranosidase B, catalytic Sammut SJ anon pdb_1wd3 Domain Members of this family, which are present in fungal alpha-L-arabinofuranosidase B, adopt a beta-sandwich fold similar to that of Concanavalin A-like lectins/glucanase. The beta-sandwich fold consists of two anti-parallel beta-sheets with seven and and six strands, respectively. In addition, there are four helices outside of the beta-strands. The beta-sandwich strands are closely packed and curved with a jelly roll topology, creating a small catalytic pocket. The domain catalyses the hydrolysis of alpha-1,2-, alpha-1,3- and alpha-1,5-L-arabinofuranosidic bonds in L-arabinose-containing hemicelluloses such as arabinoxylan and L-arabinan [1]. 25.00 25.00 26.10 25.70 24.50 23.20 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.19 0.70 -5.48 7 132 2009-01-15 18:05:59 2006-08-18 14:15:12 6 11 81 4 81 133 29 312.70 58 63.03 CHANGED PCDlYuuusTPClAAHSTTRALYuuYoGPLYQVpRuSDGsTsDI.......uPLo.AG....GVANAuuQDsFCANTTClIoIIYDQ..................................Su+GNHLTpAPsG.uFsG...PsssGa........................DNLAsA.GAP.....VoLsGpKAYGVFloPGTGYRNN.sspGoAsGDpsEGhYAVLDGTHY..NuuCCFDYGNAETsStDTGs.......GHMEAIYF..GssTlWGp..GsGsGP..............WlMsDhENGLFSGsss...c.NuusPuIs.RFlTAllKGps....shWAIRGGsAuoGuLSTaYsGsRPssoGYsPMuK.......EGAIILGIGGDNS.GAQGTFYEGsMToG..aPSDATENtVQANlVAAtYusss .........PCDIYuuGGTPCVAAHSTTRALYuuYsGsLYQV+RuSDuuTpsI..............usLo.A.G....GlAsAA.....AQDoFC..Au.TTClITlIYDQ..................................SG+GN+LTpAPsG..uh.pG.......Ps.s.sGh.......................................DsL.As....A.huAP.......VTl.sG.pKAYG.Val.uP..GsGY.RNN.ss.sGs..ATGDpsEGhYAV..hDG....THY....NuuCCFDYGNAETs....spDsGN.............................GpME.AIYF..Gsss..h.aGs.....GuGsGP..............WlMADLENGLF..S.G.sss......t.NsssP...olstR.FVTAhlKGps....spWAlRGGNApSGuLoTaYsGs.RP.t.........s...............GYsPMpK.......EGAIILGlGGD......NS.....suupGTFYEGVMToG..YPSDATENuVQANIVAAtYus.u................................................................................... 0 32 52 74 +9039 PF09207 Yeast-kill-tox Yeast killer toxin Sammut SJ anon pdb_1wkt Domain Members of this family, which are produced by Williopsis fungi, adopt a secondary structure consisting of eight strands in two beta sheets, in a Greek-key topology [1]. 21.50 21.50 22.00 196.20 21.10 20.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.28 0.72 -3.55 2 2 2009-01-15 18:05:59 2006-08-18 14:27:41 6 1 2 1 0 4 0 86.50 88 69.48 CHANGED DGYLlMCKNCDPNoGSCDWKQNWNTCVGIGuNVHWMVTGtS.sGpQGCAhIWEGSGCsGRSTTMCCPusTCCNINTGFYIRSYRRVE DGYLlMCKNCDPNoGSCDWKQNWNTCVGIGuNVHWMVTGtS.sGpQGCAhIWEGSGCsGRSTTMCCPusTCCNINTGFYIRSYRRVE 0 0 0 0 +9040 PF09208 Endonuc-MspI Restriction endonuclease MspI Sammut SJ anon pdb_1sa3 Domain Members of this family of prokaryotic restriction endonucleases recognise the palindromic tetranucleotide sequence 5'-CCGG and cleave between the first and second nucleotides, leaving 2 base 5' overhangs. They fold into an alpha/beta architecture, with a five-stranded mixed beta-sheet sandwiched on both sides by alpha-helices [1]. 25.60 25.60 26.00 82.40 25.00 25.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.58 0.70 -5.32 3 10 2012-10-11 20:44:45 2006-08-18 14:41:34 5 1 10 4 4 9 1 265.90 35 70.46 CHANGED YTchLSDlLs......clsEKs..phtpGlpus+LGNcaE+aIV-lLNDlcNLutYNsNppAQpchc....cIhcclLccLsL-ctaDpILEVTuTs.DIs+LpNGGSPKTDlolRlphssKEh+IsNISIKNTpcK+VSIHEYsVcDlloslulSDoD.L+pLlc+FQcsGStKcFsulpsp+splL-ps..LcPYpE+lIcWsVT..up+stssLLc-KIQlsshIIsRNtsuVssK..DDYlKcYIEEhStAhGKG.FGTPFsWTYPSK+RGQKIQlKG ............hpp.Lo-lhs......pI.EKAspNlupGlRuslLGNshEppIVNlLNDlcNlshWNs.pssppshc...YcIaKcIlccl..slc.cth..-pIl-loAT...s.-IPhLp....NtGKPKTDVpVTIpsss..Kch.IhsISlKp..TpcppVoIHEhoVccllosLclS...-S.....D....LppALc+FQcVGStKKlhsppssptclL-cp..LcsYN+cLIcahls..u.hu.sslls-KIQhsshIIspsphsVhs+.....D-Yl+cYIpEhstt.uKGtFGTPFpWTYPSKKRGpKIQlKG. 0 1 4 4 +9041 PF09209 DUF1956 Domain of unknown function (DUF1956) Sammut SJ anon pdb_1t33 Domain Members of this family are found in various prokaryotic transcriptional regulator proteins. Their exact function has not, as yet, been identified. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.24 0.71 -4.19 57 836 2012-10-03 00:15:22 2006-08-18 14:51:42 6 2 775 2 151 427 30 123.30 42 54.72 CHANGED scppLpshlcshlth..lhsp....psthhs+hhhRE...hpP.osAhc..plhpphhtPhpphlsplluplhGts......ssstphthpshollGpslhatlu+pshhthhs...shsst.htplpslhpch..hphhLsul ..........................................h.R-hlltAh+sMlph...Lsp-....-T.lsl.....SK.FluREQ....L..SP..TuA.Yc..LlH-Q...VIsPLH...s+LsRLlAAaTGsD......A.sDo.ch.lLH.THALlGplLAFRLu..+ETILhRsGWs..saDc-cs-hIsp.sVssHl-llLpGL................................ 0 40 90 121 +9042 PF09210 DUF1957 Domain of unknown function (DUF1957) Sammut SJ anon pdb_1ufa Domain This domain is found in a set of hypothetical bacterial proteins. Its exact function has not, as yet, been defined. 25.00 25.00 28.40 27.10 22.80 19.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.27 0.72 -3.96 57 366 2009-09-10 21:40:42 2006-08-18 15:02:37 6 6 355 5 163 338 133 102.80 33 18.87 CHANGED .clccAscpMlclssphs...t.sstlpp..RsLsQAARELLLAQSSDWuFIhpssTss-YAtcRhccHlpRFhcLhptlp....pspl........s....pphLpplEttDslFPpIsa+..ha ...........................h.pstpphhchspt...........ssthcc...RsLsQhsRElLLspSSDWsFllsssoss.......-YAppRs+pHhppFpclhpslt....suc.h........-.......pchL.p.t.hcp.tDslFs.slDhRha...................... 0 67 120 148 +9043 PF09211 DUF1958 Domain of unknown function (DUF1958) Sammut SJ anon pdb_1tvf Domain Members of this functionally uncharacterised family are found in prokaryotic penicillin-binding protein 4. 23.40 23.40 23.70 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.14 0.72 -3.90 9 328 2009-01-15 18:05:59 2006-08-18 15:22:48 5 2 303 6 15 172 0 65.70 53 15.00 CHANGED YKKlLSKGppcIDGKKYplccDLYDVVPKspst...+lhlc-.GplplDhsRpalssphtss.pVpspc ....YhKlLSKGEQcIsGKKYhVcNDLYDVlPpchsp...aKlsVED.G+.V+sDYPRcFlNpchuPP.oVEspp....... 0 2 3 13 +9044 PF09212 CBM27 Carbohydrate binding module 27 Sammut SJ anon pdb_1oh4 Domain Members of this family are carbohydrate binding modules that bind to beta-1, 4-mannooligosaccharides, carob galactomannan, and konjac glucomannan, but not to cellulose (insoluble and soluble) or soluble birchwood xylan. They adopt a beta sandwich structure comprising 13 beta strands with a single, small alpha-helix and a single metal atom [1]. 19.50 19.50 21.70 20.10 19.20 17.00 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.07 0.71 -4.24 2 51 2009-01-15 18:05:59 2006-08-18 15:57:36 5 14 33 6 9 53 0 171.90 24 23.72 CHANGED hQ.s.A.El..pFShsp-hpNhassGTWQApFthPsIp...pststsLphNVsLPGpuDWEEV+V.lp.hspLs.sphlpaDlhlPcV-.lsGtLRPYhsLNPGWlKIGlD..psslsshphVohcsppY+hhHVplEFsthPsVNELalslVGs+LtYcGPIaIDNVpLaKK ...............................................t..........................................ht..t....t......sssuLclsls.hsss..s..s.W-Ehc.......l....h......p......th.....t.......c...l..sshptlca-.lalP..ps..p..t...h...s.Gslps.........h.us..l...s..s.GW.s.c....l....shsh.p.ph.s..lpshcp.l....pl..s.GppYtp..hpssh....p....h...s...p...s...t...p.hs...pL...hlplsGsphsY.sG..sIYlDNlcL...t... 2 5 7 9 +9045 PF09213 M3 M3 Sammut SJ anon pdb_1mkf Domain Members of this family of viral chemokine binding proteins adopt a structure consisting of two different beta-sandwich domains of partial topological similarity to immunoglobulin-like folds. They bind with the CC-chemokine MCP-1, acting as cytokine decoy receptors [1]. 25.00 25.00 104.10 104.00 20.80 19.20 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.24 0.70 -5.99 2 8 2009-01-15 18:05:59 2006-08-21 09:23:23 5 1 3 8 0 9 0 332.90 57 90.18 CHANGED SsVshpohshsp.cpt--hpsa......CLh.sphpsT.Cus.hccl.p+shapL.shCNVKsphhVsa.shcchG..hhpuRLPhPohuusssscll+VLVlAEus....pP.ccaaA.lth.T...shtLoD.NshFpoca.plW.lsls+p.VDlshhhtuhhhtt..usplTlhhsYssTFTWCGpl.uls-.shP.PShpAhpsl...Chs.hRY.sup.Fpc.DGCptEoth.p.ohlhPh...Gs.spphphNTCsCahKYs.lp.Lsshc+lhlhslush.uhhpPlYVhssYFsSoc.Ns.t.us.L.aCsl.hppts.Ghapo..pus.pCPh+hs.Gpsp.VL.s+hs..sh.plVGlolhh-GQpaRlpYhG SsVshpohshsp.cpt--hpsa......CLh.sphpsT.Cus.hccl.p+shapL.shCNVKsphhVsa.shcchG..hhpuRLPhPohuusssscll+VLVlAEus....pP.ccaaA.lth.T...shtLoDhNshFpoca.pIW.lsls+p.VDlshhhtuhhhtt..usplTlhhsYssTFTWCGpI.uls-.shP.PShpAhpsl...Chs.hRY.sup.Fpc.DGCptEosh.p.ohlhPh...Gs.spphphNTCsCahKYs.lp.LsAhc+lhlhslush.uhhpPlYVhssYFsSoc.Ns.tPusKLYHCALQMTSHD.GVWTS..TSSEQCPIRLVEGQScNVLQV+VAPTSMP+LVGVSLMLEGQQYRLEYFG 0 0 0 0 +9046 PF09214 Prd1-P2 Bacteriophage Prd1, adsorption protein P2 Sammut SJ anon pdb_1n7v Domain Members of this family form a set of bacteriophage adsorption proteins, composed mainly of beta-strands whose complicated topology forms an elongated seahorse-shaped molecule with a distinct head, containing a pseudo-beta propeller structure with approximate 6-fold symmetry, and tail. They are required for the attachment of the phage to the host conjugative DNA transfer complex. This is a poorly understood large transmembrane complex of unknown architecture, with at least 11 different proteins [1]. 25.00 25.00 1204.10 1203.90 17.00 16.70 hmmbuild -o /dev/null HMM SEED 560 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -13.00 0.70 -5.95 2 7 2009-01-15 18:05:59 2006-08-21 09:39:33 6 1 6 2 0 8 0 555.70 90 93.92 CHANGED AshsVPKLG.FPshAVhDIDNVP.DSSsTGSRWLPSlYpGuNYauGGPQtLpAphusFDSssRLPYNPRT-sNPAGNCAFuFNPFGQYISNISSAQSVHcRIYGID.NsEPLFoPNAASITNGGNPTMSQDhsYHNIGPINoAYKAEIFRPVNPLPMSDTsPDPETLEPGQs.PlIKoDGlYosSGIAuFIFD+PVTEPNPNWPPLPPP.IPIIYPhPALGIGAAAAYGFGYQVThYhWEplPVEFIADPtTCPApPTTDKVIIRTTsLNPEGoPCAY-suIhLVRQsuNPMNAVAGRLVP.V.DIsVDIFLTGKFFsLsPPhRlTNNYFAD-pVpE.TVThGNapsshuusYatVYpTDGhGhApsFIu.GGuGlSALlpLQDsoVlD.LaYSlPLSlGGStushcEWlANNsGhaPhShGhsKosLlEIPRRpLEAIpPQssPG..DlFhLD-SuuYASFSSFIGYspuAYYVAGAuTFMDVENPDpIIFlLRsGtGWYuC-IuDALhI.....uDsEaDSVDYFAapGGVMFIGSARYTEGGDPLPIKYRAlIPuLP ANFNVPKLGVFPVAAVFDIDNVPEDSSATGSRWLPSIYQGGNYWGGGPQALRApVSNFDSsNRLPYNPRT-sNPAGNCAFAFNPFGQYISNISSAQSVHRRIYGIDPNDEPLFTPNAASITNGGNPTMSQDTGYHNIGPINTAYKAEIFRPVNPLPMSDTAPDPETLEPGQs.PLIKSDGIYSsSGIAuFIFD+PVTEPNPNWPPLPPP.IPIIYPTPALGIGAAAAYGFGYQVTVY+WEEIPVEFIADP-TCPAQPTTDKVIIRTTDLNPEGSPCAYEAGIILVRQTSNPMNAVAGRLVPYV.DIAVDIFLTGKFFTLNPPLRITNNYFADDEVKENTVTIGNYTTTLSSAYYAVaKTDGYGGATCFIASGGAGISALVQLQDNSVLDVLYYSLPLSLGGSKAAIDEWVANNCGLFPMSGGLDKTTLLEIPRRQLEAIsPQDGPGQDDLFILDDSGAYASFSSFIGaPESAYYVAGAATFMDVENPDEIIFILRNGAGWYACEIGDALKI.....ADDEFDSVDYFAYRGGVMFIGSARYTEGGDPLPIKYRAIIPALP 0 0 0 0 +9047 PF09215 Phage-Gp8 Bacteriophage T4, Gp8 Sammut SJ anon pdb_1n7z Domain Members of this family of viral baseplate structural proteins adopt a structure consisting of a three-layer beta-sandwich with two finger-like loops containing an alpha-helix at the opposite sides of the sandwich. The two peripheral, five-stranded, antiparallel beta-sheets are stacked against the middle, four-stranded, antiparallel beta-sheet. Attachment of this family of proteins to the baseplate during assembly creates a binding site for subsequent attachment of Gp6 [1]. 25.00 25.00 86.40 25.90 19.70 24.30 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.86 0.70 -5.28 10 76 2009-01-15 18:05:59 2006-08-21 09:49:36 5 2 62 12 0 68 1391 281.80 34 83.52 CHANGED uIVT...sKFRTcshhNFYcolG...............Dsss+sTIYhoFGRscsWucNEs-stFAPPYPsDShcGlsDsWocMlGslKIspShLcsVVPR+DW...............................GDoshssPppFaluDIVVVNStPhNpT-.u.uGWMVYRClDlP-s........GtCSIsolssKsEClplGGcWTss.....tcSltsPpGpusu...I-sG.DGYlWEYLYTIPPDssINcCTNEaIVVPaP-ELhtDPsRWGY-ssls...........W.ssch-llYRhKssTlRF+AahDSlhFspsuhsGNpGFRQlSlIlNPL.hKucPsss-VKAsts.tYsspplphcSGpMI.........YMENRpPIh+ohDQTEElsIlFsF .....................................ullT...spFRh.phhpFhps.lt...............ss.spsplYhhhGRspsWsspp...........uPP...PsDshpthtshaspMhuhh+l.tS.httVl.RhDW.........................ucsthss...shphh.s-hhVsN...........usa.VY+Cl.ssss........s............s...Gt..Tsp.....hpS...sth.........lss...G.DGYhWcYlYpIPsssslphhoN-ahsV..........sphuhcssls...........a.pschchlah.p..ssoh.p......puhhDul...hspsuhsGstG..plSllssP..tpupsssssVcus...ts...sYshsclphcSGphl.........Yh-NRpsIhpuhDQsE-lpIlhpF.................................... 0 0 0 0 +9048 PF09216 Pfg27 Pfg27 Sammut SJ anon pdb_1n81 Domain Members of this family are essential for gametocytogenesis in Plasmodium falciparum. They contain a fold composed of two pseudo dyad-related repeats of the helix-turn-helix motif, serving as a platform for RNA and Src homology-3 (SH3) binding [1]. 25.00 25.00 119.70 118.90 22.30 19.90 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.29 0.71 -4.69 2 7 2009-01-15 18:05:59 2006-08-21 10:00:20 5 1 5 1 4 8 0 176.60 49 44.52 CHANGED Y+YEc..cpctsL.shIppls-lEFpusss..YLhhhlspD..KaN.uLpDp.uIh+.lpKsQNch..hhl..clpsshs.RIS-RLhsashDK-lTt.YlKKlcDhhhlEpcshcph.h.Vcpt+php-KKRlhNshc.I+hha-o...hp.lphscDph.sAhhRlSphlsDl........I.hLP .............................hI.plhplEFcssps..YLhhhls..pD.EKaN.uLcD+luIh+.lpKNQN+as.Fhl..clpDshs.RISDRLhsYChDK-lTEsYlKKlcDhhhlEpcVhEpl.h.V-Ht+ph+EKKRlhNDhcLI+hha-oh.hspslphTDDQacsAAhRlSpFl.Dl........I.......... 0 2 2 3 +9049 PF09217 EcoRII-N Restriction endonuclease EcoRII, N-terminal Sammut SJ anon pdb_1na6 Domain The N-terminal effector-binding domain of the Restriction Endonuclease EcoRII has a DNA recognition fold, allowing for binding to 5'-CCWGG sequences. It assumes a structure composed of an eight-stranded beta-sheet with the strands in the order of b2, b5, b4, b3, b7, b6, b1 and b8. They are mostly antiparallel to each other except that b3 is parallel to b7. Alternatively, it may also be viewed as consisting of two mini beta-sheets of four antiparallel beta-strands, sheet I from beta-strands b2, b5, b4, b3 and sheet II from strands b7, b6, b1, b8, folded into an open mixed beta-barrel with a novel topology. Sheet I has a simple Greek key motif while sheet II does not [1]. 25.00 25.00 34.10 33.60 24.40 23.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.92 0.71 -4.65 6 84 2012-10-02 12:51:43 2006-08-21 10:21:06 5 3 73 3 10 59 2 147.10 46 39.22 CHANGED chuscsahhahKRLoANDTGATGuHQuGhYIPpshsppLFPslN+s+-pNPolhlss+hpSHpssDSphRAIYYNs+hhs...tTRNEtRITpaGtuts.hhs.csTGALsllAF...ctstcsthscsWVCsos-EtDllEutlGpllPGu....lhss.uupI ..........sspshhlYlKRLSANDTGATGGHQsGlYIPpsh.s-c..LFPslsc..s+phN..Polhlps+hsoc.sss-.SphRhlYYNs+has...tTRNEtRITRaG+s.s.l.ss-NTGuLhlLA.....cts.....tcsp.hphaVhsos-E.Dlh.sthGpllPGs....Lh.s.ttpl.................................. 0 4 6 8 +9050 PF09218 DUF1959 Domain of unknown function (DUF1959) Sammut SJ anon pdb_1nxh Domain This domain is found in a set of uncharacterised Archaeal hypothetical proteins. Its function has not, as yet, been described. 21.10 21.10 21.70 21.40 21.00 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.54 0.71 -4.31 5 29 2009-09-10 17:31:46 2006-08-21 10:27:44 5 1 29 2 19 28 0 115.10 42 84.75 CHANGED KhNlIKuNRalMEDVIlPISKALKlslEEVIDIFscKLDhuSLYELHAYsEQA+MGCLGRKVDIDLGLCWlsDFFGLISK-DADLIRKKVVE-hIlcKKPYKEALEEGR+hllcLLK ............KhplIcs.Ra.hE-lllPlSKtLplsh-Elh-lFhcphDhuoL.plHAhhEpA+hsCLsc+lDhD.LuLCWlsDahsLIS+c-ADhIRcKVscphllpp+sYc-AL-EGRphllclLK..................... 0 4 9 15 +9052 PF09220 LA-virus_coat L-A virus, major coat protein Sammut SJ anon pdb_1m1c Domain Members of this family form the major coat protein of the Saccharomyces cerevisiae L-A virus [1]. 25.00 25.00 27.00 25.30 23.30 21.20 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.35 0.70 -6.03 2 18 2009-12-01 14:28:50 2006-08-21 10:41:59 5 2 13 2 5 23 0 311.60 31 49.63 CHANGED ML+FVsp.spctpssLap.ppscGThsshsRlRsDFKaDsLsFsRshssSQcaThVGpshsshsEspS.L-GlsKKYLTLDGuluhDNVhpEL+sosGh.uNhlAuHAYNlsuWRWYDNHVALLhNhLRhYhLpsLsEpuphSsGchPhYcDGHVhIcLssTl..pstsspasWPucRus-SYP.Ws.hoE.hPshDsPalDlRPLT.pEsphVLMMhucW+.pTNhtlDa.sPpLA-KhhYRat.slpshsEWl-u-tTsspah.PpS+VhhSALRKYVsHNpLYNQFYTAspllAQlMhpshPssAEGhsWLhHss.VplPKFGSlRGRYPFL.SG-AAhIQApALEDWuAlhAKPELlFTYuM.lussLNhGLhlRcsKtohhhsp.csSa-DshFLpPETFhtuAluhsTG.DAPLNGMuDVYVhYP-Llph .................................................php..hps...p+.h........p.hs.lG...s.hsps.s.L-Gltt.hhs.cGslshs.l..tLpp.shh.ts..s.p.ht.tth.ahcNhsuLLhNhLRhYhlt.ltp.tth.pssph.hYcsGHspl..s.tl...sts.pht...WP.....spt.ptths.hs.h....P...ssshl.hpshs.pcsthlL..hhupat..sshtlsa..PpLspphhhph...ht..s...........h..h..psthhhsAl+cYVstNplYspF.sAhtlluQlhhoshPssAEuhsWLhp.s.VslPpFuSlRGhYPhL.pG-uhhhptpAl--WsthhspP.phlFohu..Mhhuss...lphGLhhRpsphp.hhst...th-p.hhlpscohhtuhhuhsoG.phs..t.tshhh......................... 0 0 1 5 +9053 PF09221 Bacteriocin_IId Bacterioc_AS-48; Bacteriocin class IId cyclical uberolysin-like Sammut SJ, Coggill P, Eberhardt R anon pdb_1o82 Domain Members of this family are membrane-interacting peptides, produced by Firmicutes that display a broad anti-microbial spectrum against Gram-positive and Gram-negative bacteria. They adopt a helical structure, with four or five alpha helices forming a Saposin-like fold [2,5]. The structure has been found to be cyclical [1, 3, 5]. It should be pointed out that one reference [4] implies that both circularin A and gassericin A are class V or IIc-type bacteriocins; however we find that these two proteins fall into different Pfam families families, this one and BacteriocIIc_cy, Pfam:PF12173. 23.00 23.00 23.30 23.00 22.00 21.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.14 0.72 -4.06 17 111 2009-09-11 17:00:14 2006-08-21 10:51:36 5 1 108 12 12 62 0 63.50 51 69.18 CHANGED AutlGIususAtplVsllssuuolsolIulluulsuuGshu.......sulhAslKphlp+pGpttAssW .......AGTLGISTtAAsTVVNlIsAhSTVsulISIV.GAlTGsGuIu.......uGIsATVhhllKKpGtAtAAhW....... 0 5 8 9 +9054 PF09222 Fim-adh_lectin Fimbrial adhesin F17-AG, lectin domain Sammut SJ anon pdb_1o9w Domain Members of this family are carbohydrate-specific lectin domains found in bacterial fimbrial adhesins. They adopt a compact, elongated structure consisting of a beta-sandwich with two major sheets: one consisting of five long strands in mixed orientations, and a front sheet with four antiparallel strands, forming an immunoglobin-like fold [1]. 25.00 25.00 25.40 25.10 21.30 21.10 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.22 0.71 -4.97 2 22 2009-01-15 18:05:59 2006-08-21 11:05:39 5 1 9 15 0 29 0 167.20 50 47.14 CHANGED VSFIGSTENDVGPS.GSYSpTHAMDNLPFVYNTG.NIGYQNANVWRIStGFCVGLDGKVDLPVVGSLDGQSIYGLTEEVGLLIWMGDTNYSRGTAMSGNSWENVFSGWCVG.NhsSTQGLSV+VpPVILKRNSSApYSVQKTSIGSIRMRPYNGSSAGSVQTTVNFSLNPF ................................................................PS..uYSpsauhDNLPFha.N.sGhsI.......tYQsuNs.....a+h.....osG......FCssL-uKsDLPVlGoLDGQSIYsLT-E..lGlLIahGDTNYSRsoAhsGNSWpsVFSG.WCsu...hSoQGhSV+VhPVlLKps.uu.upYoV.+TpIGSIRhR.hssSphGph................................ 0 0 0 0 +9055 PF09223 YodA YodA lipocalin-like domain Sammut SJ, Bateman A anon pdb_1oej Domain Members of this family of prokaryotic domains have been identified as part of the response of bacteria to a challenge with the toxic heavy metal cadmium. They are able to bind to cadmium, and ensure its subsequent elimination [1]. 20.50 20.50 20.60 20.90 19.80 19.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.17 0.71 -4.63 33 1239 2012-10-03 08:47:39 2006-08-21 11:21:21 6 5 1174 5 75 565 3 177.80 56 49.23 CHANGED -pccplhpGYF-DspVKDRsLSDWpGcWQSVYPYLpcGTLDtVhsaKAccsc.chTApEYKsYYcpGYpTDV-pIsI..cssslTFhp.sspspospYpYsGacILTYcpGNRGVRalFcts-usu.st..P+YlQFSDHsIuPpKusHaHlYaGs-ppt.LLcEl-NWPTYYPupLouc-IscEMlAH ...................................-.s-ppstsGhF-DssVcD..RsLSDasGsWQSVYPaLpsGpLD.....VFchKAct.s.t..chThtEhKsYYcK.GYpT.DlppIsI..c.c.s..p.lEFhp.....s......s....p.....s....p..o.s.p.Y.cY.sG....hK.IL...TYppGp+.GV....R....a....L....FEs...p.Ds..s.u..tt..KY.lQF......SD.H.IA.P.p.K.u.pHFHIFhGs-SQpuLL...p.EM-N...W.PTYY.P.pLoupElspEMluH............................ 1 20 36 56 +9056 PF09224 DUF1961 Domain of unknown function (DUF1961) Sammut SJ anon pdb_1oq1 Domain Members of this family are found in a set of hypothetical bacterial proteins. Their exact function has not, as yet, been determined. 25.00 25.00 28.80 25.80 19.50 16.40 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.61 0.70 -5.25 3 41 2009-01-15 18:05:59 2006-08-21 11:26:34 6 1 38 4 21 39 9 214.70 46 84.29 CHANGED REG.uLLYpNPLuSPcDV+GWVMEGsGpluFcDGuLHLSs.hDsEclGD-AHFVFWCPETFPDGIlVoWDFhPlcEPGLCMlFFAAAGhsGEDLFDucLAcRTGpYPQYHSGDINALHLSYFRHKHA-ERAFRTCNLRKSRGFHLVApGADPLPPs-DAcsPYRMKLIKDGuYV+FSINGLPILEWTDDGcRaGPVLGuGKIGFRQMAPL+AAYRNFsV ......................................................thlYpNsLpSspDVtsWhhEG.su.ploh...sssthcLp..h...phsppu+FVaWCPEsFPDsI..hloW-FpPlc-..PGLsMLFFuA.AGhs..Gc.DlF.DsuLt.....RTGpYPpY..HSGDINsLHlSYFRR+as-ER.AF+TCNLRKStGF.HLVApGADPl.PsssD........Ap.u......sYRh.cllKDtstV+FuI.N......sLPlhpWpDDG.ss.hGPVLspG+IGFRQMAPhtAsY+sLpV.... 0 9 15 20 +9057 PF09225 Endonuc-PvuII Restriction endonuclease PvuII Sammut SJ anon pdb_3pvi Domain Members of this family are predominantly found in prokaryotic restriction endonuclease PvuII. They recognise the double-stranded DNA sequence 5'-CAGCTG-3' and cleave after G-3, resulting in specific double-stranded fragments with terminal 5'-phosphates [1]. 25.00 25.00 118.70 118.40 18.60 18.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.89 0.71 -4.53 3 14 2012-10-11 20:44:45 2006-08-21 11:42:23 5 2 14 21 3 19 2 144.30 63 82.21 CHANGED HsDhsKLlcLWPpIcEYQcLAsKHGINDIFQDNGGKLLQVLLILGLTVLPGREGNDAVDssGsEYELKSVNlELTKuFSTHHHMNPsIIAKYRQVPWlFAIY+NIsIcuIYRLcPDDLEsFYDKWERKWY-DGGKDINNPKIPVKYVMEYGclIW .......DhttL.tLaPplpcaQtLAp+aGINDIFQDNGGKLLQVLLlhuLp.llPGREGNDAVDssGsEaELKSVNl-LT.KSFSTHHHMNPsIIuKYRQVsWlFAlYpsIslpplYhLpPc-LEsFYsKWEcpWapcsGKDINNPKIPlKYVhEaGplla... 0 0 2 2 +9058 PF09226 Endonuc-HincII Restriction endonuclease HincII Sammut SJ anon pdb_1xhv Domain Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence 5'-GTYRAC-3' and cleave after Y-3. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates [1]. 19.30 19.30 20.10 312.90 18.70 19.20 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.84 0.70 -5.03 2 10 2012-10-11 20:44:45 2006-08-21 11:55:36 6 1 9 49 3 12 3 256.50 73 99.34 CHANGED SFIKPIYQDINSILIGQKVKRPKSGTLSGHAAGEPFEKLVYKFLKENLSDLTFKQYEYLNDLFMKpPAIIGHEARYKLFNSPTLLFLLSRGKAATENWSIENLFEEKQNDTADILLVKDQFYELLDVKpRNISKSAQAPNIISAYKLAQTCAKMIDNKEFDLFDINYLEVD.ELNGEDLVCVSTSFAELFKSEPSELYINWAAAMQIQFHVRDLDQGFNGTREEWAKSYLKHFVTQAEQRAISMIDKFVKPFKKYIL ........SFhKsIYp-INspLlGppVP+Pp...SGTLSGHAAGEPFEKLVYpFLK+pLsDhTFKQYEYLNDLahKNPslIGHEARhKLFNSPTLLFLLSRGKsATcKWSIEN.FEEKQNDTADILLVKDpFYELLDVKTRNISKSAQuPNIISAYKLAQTCAKMIDNcEaDLFDINYLElDW.ELss--.LlChSTaFAELFKSpPS-LYINWAAAMQIQFHVRDLDQtFsGTREEWA+uYLKHFVsQAcpRAssMIsKFVKPFcKYIl. 0 1 3 3 +9059 PF09227 DUF1962 Domain of unknown function (DUF1962) Sammut SJ anon pdb_1uoy Domain Members of this family of fungal domains are functionally uncharacterised [1]. 21.30 21.30 21.80 118.50 20.60 18.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.42 0.72 -3.93 3 8 2009-01-15 18:05:59 2006-08-21 12:10:45 5 1 7 1 4 7 0 65.20 70 72.20 CHANGED DTCGSGYGGDQRRTN......SPCQuuNGDRHFCGCDRTGVVECRGGKWTEIQDCGSSTCHGTNDGGAsC DTCGuGYGGDQRRTN......SPCQAuNGDRHFCGCDRTGVVECRGGKWTEIQDCtuSTCHGTNDGGApC 0 0 1 4 +9060 PF09228 Prok-TraM Prokaryotic Transcriptional repressor TraM Sammut SJ anon pdb_1us6 Domain Members of this family of transcriptional repressors adopt a T-shaped structure, with a core composed of two antiparallel alpha-helices. These proteins can be divided into two parts, a 'globular head' and an 'elongated tail', and they negatively regulate conjugation and the expression of tra genes by antagonising traR/AAI-dependent activation [1]. 21.40 21.40 22.00 22.60 20.90 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.17 0.72 -3.87 14 32 2009-09-10 15:54:28 2006-08-21 13:14:02 5 1 24 12 10 38 1 100.10 36 95.07 CHANGED Mphtsospss......sEtcshtuhhsuhpcu-LEsLsluAIR-HR+Llsts-slap-h..ussDspsusushpshptEYlptphcpcAQQptLosll-hLGalPcV ................Mp..sus.ss.....tsEh+shhuhhpulspu-LEsLolsAIRpHRpLltpA-pla.pt...hs-c.psupushts.phcYlctphchcAQ.ptlssllshLGalPcV........... 0 1 4 7 +9061 PF09229 Aha1_N Activator of Hsp90 ATPase, N-terminal Sammut SJ anon pdb_1usu Domain Members of this family, which are predominantly found in the protein 'Activator of Hsp90 ATPase' adopt a secondary structure consisting of an N-terminal alpha-helix leading into a four-stranded meandering antiparallel beta-sheet, followed by a C-terminal alpha-helix. The two helices are packed together, with the beta-sheet curving around them. They bind to the molecular chaperone HSP82 and stimulate its ATPase activity [1]. 22.50 22.50 23.00 23.00 21.30 20.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.58 0.71 -4.34 69 522 2009-01-15 18:05:59 2006-08-21 13:38:25 6 16 302 7 363 496 9 134.10 25 44.42 CHANGED -KsstsWu+phlcphlss.....lp..h.psssh............................php...lsclsp..lcG.-usVspRK.GKlIsha-hclphpapGp....................tspst................hpGplplP-lup-.s..-t--hph.pl............shp.scssptp.....hcsll+pphhsp..l+ptltpahpcLhtp...up .................................................-KssssWu+phlcp.h.Lhs.......lp.....s..p.ssss............................................psclsc..lpp.l-G.-us.lspRK.G..KlIhha-.hp.lpLpapGp......................................................................spss................hpGplplPpluc-.s...c.--h...ph...pl...................................sht...ps.p..s.ptp.........lcshh.+.pp...h..h...sp...l+ptltpahptLhtpa.................................................................... 0 116 202 295 +9062 PF09230 DFF40 DNA fragmentation factor 40 kDa Sammut SJ anon pdb_1v0d Domain Members of this family of eukaryotic apoptotic proteins induce DNA fragmentation and chromatin condensation during apoptosis [1]. 25.00 25.00 52.10 36.10 23.10 23.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.69 0.70 -4.99 9 108 2009-01-15 18:05:59 2006-08-21 14:03:21 5 3 71 1 54 108 0 223.70 45 71.91 CHANGED tllptAcpLloD-cuPcpp+lLushlpslp-phptEsR--DscWFEGl.-spF+oKpshM+hsspSRIRGYhpcscshhops....tsspA+c.hpchL-thpppLpuscY.ushFDRu.tctt.......................................................RLCTs-GWFoCQGsFDps..sCsttHoINPYusREuRILFSTWNLDHhIEK+RoVlPulscAlcc.h...............cG+clshtYFYpLLFThpNLKLVHIsCHcKssHcLp......CDps+hY+ ..........h...pthcphls-.pt.ppp+lLssh.........ppphthpp+sEcspWFcGh..EpR.F+oKpthhphpspoRlRuYhhcspp.hops.......sscA+p.h.pllpthp.hLpuscY.u.aFDRs.tptt.......................................................................................spLCospG.FpCQGsasts..sC.....p..p..HoINPYusREshILFpTWNL.DHhIEhpRTllPsLst.slcc.................pst-lsh..aa..LFThcNLKLVHIsCHcKspHp.t......Cs.pc.Yp......... 0 11 15 32 +9063 PF09231 RDV-p3 Rice dwarf virus p3 Sammut SJ anon pdb_1uf2 Domain Members of this family are core structural proteins found in the double-stranded RNA virus Phytoreovirus. They are large proteins without apparent domain division, with a number of all-alpha regions and one all beta domain near the C-terminal end [1]. 25.00 25.00 1167.80 1167.60 18.80 18.40 hmmbuild -o /dev/null HMM SEED 965 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.57 0.70 -7.14 2 12 2009-09-10 17:20:48 2006-08-21 14:21:20 5 1 4 2 0 14 0 964.70 62 94.51 CHANGED pSVVSRsPlPlShhslS-lpKLFDI.sIppGSohpIV-cPQsoFhlphtcshascahCl-H.sAaEPuLhhHRl+hlaShLscYsuphISEVPh.sshlsph.VpslshsKhsDRNMsshAE+L.h-.-VhsApp-.hh.Y.RpSsstsPlTFtDDL..sVRERssLY+RY.VPap.lELuLashA.phLslQYCHPhlVYpYLpsRAPsFLhlsDQluLchhSAG.GpLhPRPVhplLDYsLVY.SPLALNNLuShLhp+Iphplhhp.lstVppsLuElVssSSsVoNsASuslspMNVtGVpTlAsFIhpShLNPNISYuhlutLsLDsFssFIYGsCLhLhQAlhPPSAlsAhpRlcINNthAYFLl+hhssQsshspll.NplI.slssh.pWpSscRDlLsAIYsNLhsu-hhl.NLlppYaRtsssppsspl.lPAcpTSYGhNETRulShPYLFGssIs.htPDsRLssYKpcLsLPsRSPILIsss.tpNslslp.lphKhshIhshY..NsFVppPstWlRNusSNoALLu+FhDsssNlhGIhEslLuNsYuNAVNsYCDSVYRAslPhpWKh+.slDPpDhhFslFGlsPpY.lhs-ulPDFFAGuEDILILQLlRAla-shSN+hGpsPschF+h--V.KslpEhVSlllppKlDsp+YFT-sMRSsoFSK.tW-pFltR.VupcLPsL.psIhsQs-pl.NYMsQhhtIhPIsDpFYlV+NSGhssRGSssPlhAuool..N.lpsshhItDapthstLhhppcpVD.so.ps.hps.F.tLppIuSsEFVRSs...pucshFT-..l-AI+VNMhs+a-LphhpEpGtaSKPsplpKlMapDhhSFlcSphuchhPPlhTlPIsIhLNsLGEssSsphRMRS.tlDEYFpsasGAQlllPlshVshchhsphp-LpshFsGsVslp.cPa.lh-shcssYl.hGsHtVhlDP ..SsVSRVPLPVSFluhpDVc+lFcIIPl+hGSTohIlDpPpVsFllhYs-sIYDDFMpIcplsAFEPSlTMHRVcslFSlhpKYCsSMlstVPThSohVu-lPVKuVTMScFGDRsMDpLApaLspEhElluAQtpNRthYVRouA.-VPhsFGDDLssAshERtN.YHpYpVPFHslELALYpLAs-LLchQYCHPTVshcaLpcRAPPFLsV-DpVu.+MhpAGsGsLMPRPVMELLDYoLVYpSPLALspLAoRL.SKISl+L+MRMVTEVQpolS-hluVSSosSs.uoSuItshNlhGVEsLslahARSlLNPNhuYA.IScLTh.AFpDFlYGoCLLLlQAhlPPSAIhApsRlhINNRLAYFLIRYIAh.ATYsRLssNpVlPphhNpDcWQ.sshDhLVAIYoNLLsGEtRLsplIphYFRGpsP.tVstIsIPAsQTuYtIsEppuISAPYLFGAPINtMAPDsRL.-aKpsLNLPPRSPIL.TNlEGsNVISLpNLhsKsDlIpAlY.LsGFs+.oPuhWIRNAu.NTAhLoKhlssVSNLouIYEAVLANTYANAlNVYCDo.Y+s-IPLNWKl.hoIcPKDhLFuVFGllPhYQL.sEAVPDFFAGSEDILILQLIpAVYchLup+LGssPTpaFHLEEVhpslScIVSILsppplDV+hYFTDS.+ShTFsKPhWDRFIRRs.ttpLPPLYclIhpQlssVYshhptMpcllPIsDaFYIsRNsGaVARGSopsIlAATSlYpNQhsVpppIssFo-AssLRLptRRVDNsShTosL-DMFYsLSSISSsEFVRSstRGcop.+hss..VssIKVNMRARYsLpIhTE-Gsh.+.PclKKhMaSDFlsFLhpHpp-P+sPllpIPITIGLNNlGtTTSTslR.cScsVDEYFKuYlGAQVlIPhDslslEplGSFsELRNhhossVVsRDKsW-IWssV-AoYVPIGNHsVpLDP 1 0 0 0 +9064 PF09232 Caenor_Her-1 Caenorhabditis elegans Her-1 Sammut SJ anon pdb_1szh Domain Her-1 adopts an all-helical structure with two subdomains: residues 19-80 comprise a left-handed three-helix bundle with an overhand connection between the second and third helices, whilst residues 81-164 comprise a left-handed anti-parallel four-helix bundle in which the first helix consists of four consecutive turns of 3-10-helix. Fourteen Cys are conserved in all known HER-1 sequences and form seven disulfide bonds. The protein dictates male development in Caenorhabditis elegans, probably by playing a direct role in cell signaling during C. elegans sex determination. It also inhibits the function of tra-2a [1]. 19.80 19.80 45.60 45.40 19.20 18.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.05 0.71 -4.10 2 15 2009-01-15 18:05:59 2006-08-21 14:35:42 5 1 8 2 13 10 2 114.70 37 68.76 CHANGED CCo.phhECCh-.lpFupPl+Cs.thcLth.h.Vh.ChQpELau.E.p.hhNLsDoVCCsVFupD.ND.pchC.o.ChTsMQ.PuLcsspKLp+IK-Cp.ppNsLYpCFs+C..hh+pchc.EshcFpp.Cs .CCstpthcCCh-uIcFsh.lpCs.thphtt..hpshpClQpELauccs....hhslschsCCsVFtsD.sDspthChptChpsMpuPSlcussKLppI+pCp...hsNsLapCFppCpthhc.pt.c.Eshphpp.Cs................. 0 8 9 13 +9065 PF09233 Endonuc-EcoRV Restriction endonuclease EcoRV Sammut SJ anon pdb_1sx5 Domain Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence 5'-GATATC-3' and cleave after T-3. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates [1]. 25.00 25.00 35.50 34.70 21.70 21.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.62 0.70 -5.02 3 40 2012-10-11 20:44:45 2006-08-21 14:45:55 6 1 39 61 7 49 8 236.60 41 89.70 CHANGED SsLlscIpDsNppWcVKGFIDu-s+IYoLSsDTKVISKILEIalFPhIpcFAccHcFpVlLPKcQNaYPDlSFlc.K-SscKIAIDIKTTYRNccNsK..sGFTLGuaTuYFRNRpSsKNIsFPYc-YluHalLGlVYTRVssRlDELKpYsIsELs+I.SVIKslclFLQEKYKIAoDpuGSGNTsNIGSI+.+h-Dll-GKGIFuchGE-lFDDYWhNYpphsh.psSQL+scsYsNISEY..apYR.G+ ..........ht....t.lssptscWpl+GFIDhpKsIYTIouDTKllSKIlElhlFPt.lppFAccpGaclhhsppQNaYPDloFhs.....pssshKaAlDIKTTYR...pscs.s.....sGFTLGSasuYFRNRpusKNIpaPYscY.uHasLGllYoRs.s.p.p.h.-.Ep.chhsl-cLcpIsSVl+shpFFlpEKWKIASD+sGSGNTsNIGSIp..pIccllpGpGhFssL....GE-..lFD-YWh.N.a.shh.h...ptphphp.hssLtpa..hta....h............. 0 1 5 6 +9066 PF09234 DUF1963 Domain of unknown function (DUF1963) Sammut SJ anon pdb_1pv5 Domain This domain is found in a set of hypothetical bacterial proteins. Its exact function has not, as yet, been described. 21.80 21.80 21.80 22.30 21.30 21.60 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.82 0.70 -4.11 34 835 2009-01-15 18:05:59 2006-08-21 15:34:19 5 12 604 1 134 636 11 217.00 27 78.00 CHANGED SKhGGp.PalPps...paPp..............s.ppG.........p.PhhhLAQlNhs-ls.....p.lpsa..P.pp...GlLpFFls..p..........psppps..shcVhah-phtppp.....ppl.p-hshhp.h.........h.tthpl...p.p..........sl..pshphpp.ht..h.p...........cpppchhcchh-.h...........tps...p+..lGGYP.asQ............................ps.+pttc...........................pphhLLhQ..lDo-......tphshhaGDs.Ghh.FaIpcpDLtshcFsplhhsasC ......................................................................................................SKhGGh.PhLPtt...hcaPh......................................................................s..pps......................PhhhlAQl.Nhs.-ls..............t...p.a......P...pp.......GlLp.Falstps.................p...s.spt....shRll...Yhpp.hpph.h..........ppl.schp.hstt...........................httthplth..p.t.......................psl...p.s.hchpp.h.......ch.......................tt.pphhpt.l.hpph..................pt...hpp......lGGYPhasQ..............................................................-scp.pp..........................ttt.h...l.LhQ....lDSs.............sshshhWGDh...G.hstah.IpppDLtshcFsphhhshp.............................................................................................................................................. 1 52 91 117 +9067 PF09235 Ste50p-SAM Ste50p, sterile alpha motif Sammut SJ anon pdb_1uqv Domain The fungal Ste50p SAM domain consists of five helices, which form a compact, globular fold. It is required for mediation of homodimerisation and heterodimerisation (and in some cases oligomerisation) of the protein [1]. 20.90 20.90 20.90 21.10 20.80 20.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.55 0.72 -3.95 3 28 2012-10-02 20:42:54 2006-08-21 16:06:02 5 2 27 2 16 31 2 73.20 54 22.58 CHANGED -sFscWSTDEVlpWCtosLGl-EsDPLhpR.IRENcIsGSlLsELTLQDCKELCDsDLscAIKLKlhINKhlDScL .........-DFopWSVDDVloWClSoLEl-E.s..DP.LCp+....LRENDIsGDLLPELsLpDCp-LCD.uDLs+AIKFKILINKhpDoc................. 0 1 6 13 +9068 PF09236 AHSP Alpha-haemoglobin stabilising protein Sammut SJ anon pdb_1w09 Domain Alpha-haemoglobin stabilising protein (AHSP) acts a molecular chaperone for free alpha-haemoglobin, preventing the harmful aggregation of alpha-haemoglobin during normal erythroid cell development: it specifically protects free alpha-haemoglobin from precipitation. AHSP adopts a helical secondary structure consisting of an elongated antiparallel three alpha-helix bundle [1]. 20.50 20.50 21.10 20.70 19.30 20.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.94 0.72 -3.78 3 32 2009-01-15 18:05:59 2006-08-21 16:22:21 5 1 27 10 16 34 0 85.80 62 82.00 CHANGED QoNKDLISsGIKEFNVLLNQQVFsDPLISEEDMVTVVcDWVNFYINYYKKQVoGEQ-EQDKALQEFRQELNTLuusFLAKYRsFLKS+E ................puNKDLISsGhKEF....slLLNQ.........QVFsDPl.lSEEcMVTVVcDWhNFYINYY+pploGEpQEpD+ALQELpQ..ELNTLusPFLsKYRsFLKSp.... 0 1 1 3 +9069 PF09237 GAGA GAGA factor Sammut SJ anon pdb_1yuj Domain Members of this family bind to a 5'-GAGAG-3' DNA consensus binding site, and contain a Cys2-His2 zinc finger core as well as an N-terminal extension containing two highly basic regions. The zinc finger core binds in the DNA major groove and recognises the first three GAG bases of the consensus in a manner similar to that seen in other classical zinc finger-DNA complexes. The second basic region forms a helix that interacts in the major groove recognising the last G of the consensus, while the first basic region wraps around the DNA in the minor groove and recognises the A in the fourth position of the consensus sequence [1]. 20.20 20.20 20.20 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.75 0.72 -4.37 3 58 2012-10-03 11:22:52 2006-08-21 16:35:59 6 13 44 2 38 74 1 48.30 47 10.02 CHANGED stsshhc+hsGllKIKSKSQSEQPATCPICQAVIRQSRNLRRHLELRHFKKPGV ...........................................+s+upS.-pPuTCPlCtAllRQSRNLRRH....LElpHh.................... 0 7 11 31 +9070 PF09238 IL4Ra_N Interleukin-4 receptor alpha chain, N-terminal Sammut SJ anon pdb_1iar Domain Members of this family are related in overall topology to fibronectin type III modules and fold into a sandwich comprising seven antiparallel beta sheets arranged in a three-strand and a four-strand beta-pleated sheet. They are required for binding of interleukin-4 to the receptor alpha chain, which is a crucial event for the generation of a Th2-dominated early immune response [1]. 22.90 22.90 23.70 26.10 22.50 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.32 0.72 -3.88 7 71 2009-01-15 18:05:59 2006-08-22 09:17:01 5 2 37 4 27 64 0 90.20 45 18.27 CHANGED +VLp-PsCFSDYIpsSTCEWchsussNCSupLpLsYpL..F.hSENh.TClPENpu...uoVCVC+Mhh-p.VpsDsYpL-LWAspp.LWpuSFKPSppV ....chlp-.sCaSDYhut.TCcWKhsussNCSs.-LpLhYpL..a.......s...csh.oCVPENst......uus......ClC+lhh-shVssD.YpLcLauGppl...LWp.u.SFpPuppV............................. 0 1 6 10 +9071 PF09239 Topo-VIb_trans Topoisomerase VI B subunit, transducer Sammut SJ anon pdb_1mu5 Domain Members of this family adopt a structure consisting of a four-stranded beta-sheet backed by three alpha-helices, the last of which is over 50 amino acids long and extends from the body of the protein by several turns. This domain has been proposed to mediate intersubunit communication by structurally transducing signals from the ATP binding and hydrolysis domains to the DNA binding and cleavage domains of the gyrase holoenzyme [1]. 20.50 20.50 21.20 28.80 20.30 20.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.93 0.71 -4.70 36 213 2012-10-03 01:04:38 2006-08-22 09:42:12 6 7 190 20 129 216 138 155.40 40 26.23 CHANGED Ps.sssLSPIGE-hIcpGLcc....hps-Fluu.lTRpPpsYuGp.PFlVEsGIAYGGcl...sps..psplhRFANRlPLLYcpGuCslTc.slc..slsW+pYslcpst......uPlllhVHlsSTpVPapSsuK-uIA....-lPEIccEl+lAlpcsuRcL+paLs++c+tcctcc+ ........................PssssLSPIG--.lctGlcc....hp.P-.Flss.hT........RpPpsYpGp.PFlVEsGlAYGGcl.....................spp........phplhRFANRlPLLa-pGusslTc.s...lc..plsW+p.Ytlcpst......uPlslhVHlsSTplPapStuK-uIA....clsEItcEl+hAlpcsuRcLcpals++cptpctpc+............................. 0 41 82 109 +9072 PF09240 IL6Ra-bind Interleukin-6 receptor alpha chain, binding Sammut SJ anon pdb_1n26 Domain Members of this family adopt a structure consisting of an immunoglobulin-like beta-sandwich, with seven strands in two beta-sheets, in a Greek-key topology. They are required for binding to the cytokine Interleukin-6 [1]. 23.10 23.10 23.20 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.63 0.72 -3.83 23 523 2012-10-03 16:25:20 2006-08-22 09:59:09 5 17 48 40 212 691 0 94.90 22 19.27 CHANGED ssp..sLpChh+s...........hphlpCoWpsGtsssssspYsLaahapshppstp.....Cppahpsp.t.........phuCphsh.ph..sthp......pahlpVsuoSpsuslpst.pshphpsl ....................sp.slsChhps............hphhpCoWpsGp..s..ss...s..T.pYsLahph.ps.....p..pt.pp.......Cpph...hpsp...t............phuCphsh.p..h..p..ht......phhltlpspsttt.lps...hh...................................................... 1 12 21 54 +9073 PF09241 Herp-Cyclin Herpesviridae viral cyclin Sammut SJ anon pdb_1bu2 Domain Members of this family of viral cyclins adopt a helical structure consisting of five alpha-helices, with one helix surrounded by the others. They specifically activate CDK6 of host cells to a very high degree [1]. 25.00 25.00 217.10 216.80 23.60 18.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.52 0.72 -3.75 3 3 2009-01-15 18:05:59 2006-08-22 10:12:15 5 1 3 5 0 5 0 106.00 81 41.30 CHANGED AVLATDFLIPLCNALKIPEDLWPQLYEAASTTICKALIQPNIALLSPGLICAGGLLTTIETDNTNCRPWTCYLEDLSSILNFSTNTVRTVKDQVSEAFSLYDLEIL AVLATDFLIPLCNALKIPEDLWPQLYEAASTTICKALIQPNIALLSPGLICAGGLLTTIETDNTNCRPWTCYLEDLSSILNFSTNTVRTVKDQVSEAFSLYDLEIL 0 0 0 0 +9074 PF09242 FCSD-flav_bind Flavocytochrome c sulphide dehydrogenase, flavin-binding Sammut SJ anon pdb_1fcd Domain Members of this family adopt a structure consisting of a beta(3,4)-alpha(3) core, and an alpha+beta sandwich. They are required for binding to flavin, and subsequent electron transfer [1]. 21.50 21.50 22.20 21.50 20.20 21.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.37 0.72 -4.02 53 230 2009-09-11 08:59:02 2006-08-22 10:22:50 6 6 178 2 112 249 205 70.00 30 16.54 CHANGED Pss.shhsNTCYSllu..PchuloVAuVYchsssp..lhslpG.......loshssssshpppEAphAhuWYpsIopDhaG ..........shhsNTCYShlu..scpulpVuulYchsssp.....lsplsG.....hhpsss...tsss.hp.ptcuphAhuWhpsIhsDhaG......... 0 28 69 89 +9075 PF09243 Rsm22 Mitochondrial small ribosomal subunit Rsm22 Mistry J, Wood V anon Pfam-B_8789 (release 20.0) Family Rsm22 has been identified as a mitochondrial small ribosomal subunit [1] and is a methyltransferase. In Schizosaccharomyces pombe, Rsm22 is tandemly fused to Cox11 (a factor required for copper insertion into cytochrome oxidase) and the two proteins are proteolytically cleaved after import into the mitochondria [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.89 0.70 -5.37 7 679 2012-10-10 17:06:42 2006-08-22 11:14:57 5 14 492 0 408 777 58 228.70 20 53.83 CHANGED chsshAYhAtRhPuTYAAV+uuLcuhApusPpFsPtShLDlGuGsGsuhWAssphWs.chppshhl-tStshhslGpplAtcssthppshhttshhhthlph..s.....DLVTluYVLsELss.spRptll-pLWstssp..hlVIVEsGTPsGapRll-ARctL....IAtGh+lsAPCPHshsCPlsss....DWCHFStRVu..RSplHRhsK....suplsaEDEKFsY..lAAsRtssuss...............ssRVltPPpltuG+VllcLCppDtphpcplsTK.R..............cG.tha+tARcucWGDta. ....................................................................................................................................t.....ah.......a...t.h.t.h.th....................................t......h..................s.t........p...h.h.DhGsG..........u.s...s...h.....h...........sh.......t....t....h..........h......t....................h...................p.................h............h.......h...l...-..........s........h........t.....hu.t..l..h....t....t.........................................................................................h....t..........................................Dll.hhsas.L....c......l...........................t.............t......c.........t.......t........h......lpph..h..t..h..h.t............hlll...lE....Gs.t..G.aphl.h..ts.Rphl................................tt.....h.hlhAPCsp..t..............C.....P.h......................................C.p..F....t..h..........................................................pt.....t..asa....l....h....h..p....................................................................................hsRlh...s.h.tttth.hphC......t...................tt........h...tt...hst..p...........................................t.......a.....h+....Gs................................................................................................................. 0 139 251 353 +9076 PF09244 DUF1964 Domain of unknown function (DUF1964) Sammut SJ anon pdb_1r7a Domain Members of this family of bacterial domains adopt a beta-sandwich fold, with Greek-key topology. They are C-terminal to the catalytic sucrose phosphorylase beta/alpha barrel domain, and are functionally uncharacterised [1]. 21.80 21.80 24.00 73.30 21.70 16.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.40 0.72 -3.64 3 41 2009-01-15 18:05:59 2006-08-22 11:24:14 5 1 40 6 7 34 4 68.70 68 13.57 CHANGED AFDG-FSYoVDDDTSIoFoWsGt..TSpATLTFEPuRGLGVDNsTPVAoLsWoDSAGDHRTDDLIANPPV AFDG-FSYpsDGDTSIoFpWpus..sooAsLTFEPG+GLGsDNsssVAoLsWoDuAGDHcTDDLlANPPl. 0 1 2 3 +9077 PF09245 MA-Mit Mycoplasma arthritidis-derived mitogen Sammut SJ anon pdb_1r5i Domain Mycoplasma arthritidis-derived mitogen (MA-Mit) adopts a completely alpha-helical structure consisting of ten alpha helices. It is a superantigen that can activate large fractions of T cells bearing particular TCR V-beta elements. Two MA-Mit molecules form an asymmetric dimer and cross-link two MHC antigens to form a dimerised MA-Mit-MHC complex [1]. 25.00 25.00 62.90 62.70 20.20 19.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.65 0.70 -5.06 2 5 2009-09-11 16:59:48 2006-08-22 11:50:13 5 1 2 8 2 9 0 165.60 79 87.34 CHANGED SMKLRVENPKKAQKHFVQNLNNVVFTNKELEDIYNLSNKEETKEVLKLFKLKVNQFYRHAFGIVNDYNGLLEYKEIFNMMFLKLSVVFDTQRKEANNVEQIKRNIAILDEIMAKADNDLSYFISQNKNFQELWDKAVKLTKEMKIKLKGQKL.............................................................D SMKLRVENPKKAQKHFVQNLNNVVFTNKELEDIYNLSNK.EETKEVLKLFKLKVNQFYRHAFGIVNDYNGLLEYKEIFNMMFLKLSVVFDTQRKEANNVEQIKRNIAILDEIMAKADNDLSYFISQNKNFQELWDKAVKLTKEMKIKLKGQKLDhhDs.sslNpVcchFGuDcsVKplhWF+SLLl+us.hlh+YY-us..hpspoDFtKAlFE.. 0 0 2 2 +9078 PF09246 PHAT PHAT Sammut SJ anon pdb_1oxj Domain The PHAT (pseudo-HEAT analogous topology) domain assumes a structure consisting of a layer of three parallel helices packed against a layer of two antiparallel helices, into a cylindrical shaped five-helix bundle. It is found in the RNA-binding protein Smaug, where it is essential for high-affinity RNA binding [1]. 21.50 21.50 21.60 23.90 21.40 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.48 0.72 -4.44 4 24 2012-10-11 20:01:01 2006-08-22 12:56:51 5 1 23 1 19 32 0 110.20 45 11.80 CHANGED RhslL.RlEpDLl..uGQ.hpLSTslEELTNIVLTPMKPlt.stP..EsIutpFlKVlDLVushl.tcPhC.sQD-EshsVFhWIL-RulHN-AFhsHusQLK-hKaKlSKl ................................RhphL.+lEp-Lh...........sGp.hp....LusslEELTNIVLTPMKPl.......ps.ssP....EsIuhpFlKVlDlVs..stLtt-Phs.sQDDEsLsVahWIL-RulHN-AFhsHssQLK-hKaKlsKh............. 0 4 5 16 +9079 PF09247 TBP-binding TATA box-binding protein binding Sammut SJ anon pdb_1tba Domain Members of this family adopt a structure consisting of three alpha helices and a beta-hairpin. They bind to TATA box-binding protein (TBP), inhibiting TBP interaction with the TATA element, thereby resulting in shutting down of gene transcription [1]. 25.00 25.00 25.20 25.20 23.90 24.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.95 0.72 -3.76 8 130 2009-01-15 18:05:59 2006-08-22 13:12:45 6 18 84 1 77 139 0 63.20 49 3.73 CHANGED SDuDs-p-s....shsLsGFLFGNIDpsGcL-sDsh........LDcEuKcHLuuLuchGLuShLsEl........hssc-t ...........sp-p-t-p.........shs.LsGFLFGNIspsGpLEsDsl........LDs.......EsK+HLAuLu.s.L.GLGSLloEl........sus--.................... 0 21 33 53 +9080 PF09248 DUF1965 Domain of unknown function (DUF1965) Sammut SJ anon pdb_1n9e Domain Members of this family of fungal domains adopt a structure that consists of an alpha/beta motif. Their exact function has not, as yet, been determined [1]. 29.20 29.20 31.50 30.20 28.40 29.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.44 0.72 -4.07 7 53 2009-01-15 18:05:59 2006-08-22 13:32:51 5 7 42 7 41 57 0 73.70 35 9.63 CHANGED FDspoLlPhGLaFpSDlTGRDPSpWpL.GWlYNshFYpTTEEFRpAaaSssFsKhtPNl-...GsWutTDppGsl.P ..........sLlPlGLahthDlTGRDPSp..WplhGalYsspFYpTs-cFRpAahsssF.p+.h.ssshs....GsWutpsppGp.......................... 0 16 26 36 +9081 PF09249 tRNA_NucTransf2 tRNA nucleotidyltransferase, second domain Sammut SJ anon pdb_1r89 Domain Members of this family adopt a structure consisting of a five helical bundle core. They are predominantly found in Archaeal tRNA nucleotidyltransferase, following the catalytic nucleotidyltransferase domain [1]. 21.90 21.90 21.90 22.10 21.60 21.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.60 0.71 -4.20 41 166 2009-09-11 09:45:38 2006-08-22 13:58:42 6 5 161 44 104 174 43 112.60 42 25.93 CHANGED D-VRLLKpFhKulGVYGuEl+spGFSGYLsELLllpYGuFpslLcuAu.pW+s.shhI-hp.......p.hcpF............ccPLlVlDPVDPpRNVAAAlShcshupFlhtuRtFL.cpPShsaFhs ...................--VRLLKpFhKulGlYGuEl+spGFSGYLsELLllcYGuFpslLcsAu.pW+..shhI-hp............t..p..ppF...............ccPLlVlDPVDPpRNVAAulShcshupFhhtu+paLc.pPuhpaFh..................... 0 26 61 84 +9082 PF09250 Prim-Pol Bifunctional DNA primase/polymerase, N-terminal Sammut SJ anon pdb_1ro2 Domain Members of this family adopt a structure consisting of a core of antiparallel beta sheets. They are found in various bacterial hypothetical proteins, and have been shown to harbour both primase and polymerase activities [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.83 0.71 -4.26 98 956 2009-09-11 10:28:24 2006-08-22 14:21:46 6 32 635 4 284 971 283 175.10 18 37.07 CHANGED Ahthh.pp.GhsVhPl.....................sssKpPh................................hpsappss.p.....c.pplpp..hap......t.......................................shslulhs.............ssllllDlDsps...............................slpthtt..........thlss.....sh......sspT....sps.................GtHhaaphsss..............htt......................t..lD....l.tsss.sa............l..ls.......s.......PShp....sss....Yp......hh.....................................hsshP...........thL ............................................................hh.tt.Ghs.lh..Ph................................s.ssKtPh.....................................................hpshp.pso..s..............s.p.plpp...ahp...ph.....................................ssss.lulhs......................t.sshhllDlDsts..........................s.t...................................slpphtp...........h.hthlss.......sh..sstTsss......................................Gt.Hha.ap..h..stsh........h.pphhhh...........................................su...l-....l..p....u.p...s...sa............l..ls..s..PSht.............sts.....Yp.........h.tt............................................................................................................... 0 86 198 257 +9083 PF09251 PhageP22-tail Salmonella phage P22 tail-spike Sammut SJ anon pdb_1tyv Domain Members of this family of viral domains adopt a structure consisting of a single-stranded right-handed beta-helix, which in turn is made of parallel beta-strands and short turns. They are required for recognition of the 0-antigenic repeating units of the cell surface, and for subsequent infection of the bacterial cell [1]. 25.00 25.00 127.50 127.30 18.20 17.40 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -12.76 0.70 -6.24 6 48 2012-10-02 14:50:22 2006-08-22 14:45:56 5 2 44 23 0 51 0 509.60 79 82.26 CHANGED phEADKKFKYSVKLSDasTLQshAsAAVDuLLIDlDYpFoNGETVDFGGKsLTIDCKAKFIGDGNLlFTpLGpGSlVtuPFMESsTTPWVIhPWT-DspWITDAAAVVATLKQSKT-GYQPTVNDYVKFPGIEoLLPspAKsQsIsSTL-IRECoGVEV+RASGLMAsaLFRuCHaCKMlDuDs..GGKDGlITFENLSGDWGhGNYVIGGRTsYGSVSSsQFLRNNGGhu+DGGVIGFTSYRAGESGVKTWQGTVGuTTSRNYNLQFRDSlsL.PVWDGFDLGADssMsPEsDRPGDaPlSQYPlHQLPhNHLIDNLLVhGSLGVGlGMDGpGhYVSNITVpDCAGSGuhhhTappVFTNIulIDTNThNFsAsQIYIpGsChVNGLRLlGI+sTsupGhsIDAPNSTlSGITG.VDPSRINVANLh-.sLGNoRINSFNsDSAuLclRIHKLSKTLDSGAlhSHlNGGsGSGSAWTElTAISGSsPDAVSLKlNRGDaRAsEIPlusolLPDsAV+DpuohuhYhEss..SLKALVK+sDGShTRlTLA ........SIEADKKFKYSVKLSDYsTLQDAASAAVDGLLIDlDYpFYsGEpVDFGGKsLTI-CKAKFIGDGNLIFTKLGKGSRIAGVFMESTTTPWVIKPWTDDNQWLTDAAAVVATLKQSKTDGYQPTVSDYVKFPGIETLLPPNAKGQNITSTLEIRECIGVEVHRASGLMAGFLFRGCHFCKMVDANNPSGGKDGIITFENL.SGDWGKGNYVIGGRTSYGSVSSAQFLRNNGGFERDGGVIG.FTSYRAGESGVKTWQGTVGSTTSRNYNLQFRDSVVIYPVWDGFDLGADTDMNPELDRPGDYPITQYPLHQLPLNHLIDNLLVRGALGVGFGMDGKGMYVSNITVEDCAGSGAYLLTHESVFTNIAIIDTNTKDFQA.NQIYISGACRVNGLRLIGIRSTDGQGLTIDAPNSTVSGITG.MVD.P.SRINVANLAEEGLGNIRANSFGYDSAAIKLRIHKLSKTLDSGALYSHIN.GGPGSGSAaTQLTAISGuTPDAVSLKVNHpDsRuAEIPFsPslsSD-hlKDuSCFlPYWEsN.sSLKALVKKPNGpLVRLTLA....... 0 0 0 0 +9084 PF09252 Feld-I_B Allergen Fel d I-B chain Sammut SJ anon pdb_1puo Domain Members of this family of cat allergens adopt a helical structure consisting of eight alpha helices, in a Uteroglobin-like fold. They are one of the most important causes of allergic asthma worldwide [1]. 24.00 24.00 25.10 24.80 22.80 21.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.03 0.72 -4.07 10 38 2012-10-01 20:54:19 2006-08-22 16:24:57 5 1 15 6 15 64 0 66.30 42 64.32 CHANGED ClPFFcuYuuVloGu+laLpp-LStFNATstE+sAaEKIQDCapEpGlKoKlL-splMtollhSsEC .................ChsFassYsullsGs+hhLptpLuhFsATssE+sAaEKIQDCasEpGL+sKlh-splMholhhSsEC..... 0 0 0 0 +9085 PF09253 Ole-e-6 Pollen allergen ole e 6 Sammut SJ anon pdb_1ss3 Domain Members of this family consist of two nearly antiparallel alpha-helices, that are connected by a short loop and followed by a long, unstructured C-terminal tail. They are highly allergenic, primarily mediating olive allergy [1]. 25.00 25.00 28.90 28.90 19.30 18.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.46 0.72 -4.31 3 13 2009-01-15 18:05:59 2006-08-22 16:37:14 5 1 7 1 7 14 0 39.50 54 51.09 CHANGED CYDsCQKECSDcGsGYTFCEMKCDsDCosK-lKEKIENL..Kp CFssCccECpscGsG.TFCEMKCDTDChsK-lttKl......h........ 0 0 5 7 +9086 PF09254 Endonuc-FokI_C Restriction endonuclease FokI, C terminal Sammut SJ anon pdb_2fok Domain Members of this family are predominantly found in prokaryotic restriction endonuclease FokI, and adopt a structure consisting of an alpha/beta/alpha core containing a five-stranded beta-sheet. They recognise the double-stranded DNA sequence 5'-GGATG-3' and cleave DNA phosphodiester groups 9 base pairs away on this strand and 13 base pairs away on the complementary strand [1]. 25.00 25.00 25.00 25.20 23.60 24.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.96 0.71 -5.20 12 30 2012-10-11 20:44:45 2006-08-22 16:55:39 6 3 30 3 5 37 1 174.70 34 34.62 CHANGED KsslpchKsclRcclsplsHcYLpLlDlAaDuK....pNR-...FEhhTh-Lhhp.hsFcGh+LGGoRKPDGllYpssh....GlIlDTKAYupGYsLsIsQADEMhRYl--NppRDcphNPN+WWEsFscsl..pp.ahFlaVSupFhGsFpcQLpphspcTsspGuAlsVppLLLhA-hl+sGchshp-hhchhpNs-Ihh ..........................phKt.hhpp.pplshpYlphl-lAacu+.....cs.p-...FEhhTh-LFps.htapup+LGG.u...pKPDsllassct....ulIlDoKAYucGYslshspsDcMh.RYIcpsppRccphsPs.WWc.asppl..sp.haFhalSupFsupacpQLpphspcTshpGuAlsVtpLLlhA-phpptphshcclhchhpsppl............ 0 3 3 5 +9087 PF09255 Antig_Caf1 Caf1 Capsule antigen Sammut SJ anon pdb_1p5v Domain Members of this family are predominantly found in the F1 capsule antigen Caf1 synthesised by Yersinia bacteria. They adopt a structure consisting of a seven strands arranged in two beta-sheets, in a Greek-key topology, and mediate targeting of the bacterium to sites of infection [1]. 25.00 25.00 27.80 27.00 22.70 21.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.70 0.71 -4.27 2 29 2009-01-15 18:05:59 2006-08-22 17:09:56 5 1 20 15 1 24 0 107.20 85 82.49 CHANGED VEPARITLTYKEGuPITIMDNGNIDTELLVGTLTLGGYKTGTTSTSVNFTDAAGDPMYLTFTSQDGNNHQFTTKVIGKDSRDFDISPKVNGENLVGDDVVLATGSQDFFVRSIGSKGGKLAAGKYTDAVTVTVSNQ .VEPARITLTYKEGAPITIMDNGNIDTELLVGTLTLGGYKTGTTSTSVNFTDAAGDPMYLTFTSQDGNNHQFTTKVIGKDSRDFDISPKVNGENLVGDDVVLATGSQDFFVRSIGSKGGKLAAGKYTDAVTVTVSNQ 0 0 1 1 +9088 PF09256 BaffR-Tall_bind BAFF-R, TALL-1 binding Sammut SJ anon pdb_1oqe Domain Members of this family, which are predominantly found in the tumour necrosis factor receptor superfamily member 13c, BAFF-R, are required for binding to tumour necrosis factor ligand TALL-1 [1]. 21.00 21.00 21.20 22.40 20.80 20.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.64 0.72 -4.36 2 31 2009-01-15 18:05:59 2006-08-23 09:17:39 5 1 22 70 15 30 0 31.00 57 17.37 CHANGED PT.Cs.sECFD.LVRpCVuCtLh+TPcst.u ....PTpCspuECFDPLVRpCVuC.cLh+T..Pcst.......... 0 1 1 3 +9089 PF09257 BCMA-Tall_bind BCMA, TALL-1 binding Sammut SJ anon pdb_1oqd Domain Members of this family, which are predominantly found in the tumour necrosis factor receptor superfamily member 17, BCMA, are required for binding to tumour necrosis factor ligand TALL-1 [1]. 22.40 22.40 23.40 51.20 22.20 22.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.15 0.72 -4.00 2 32 2009-09-10 15:58:20 2006-08-23 09:26:21 5 2 24 12 15 30 0 37.40 77 22.05 CHANGED C.psEYFDSLLHAChPCpLRCSs..PPhTCQ.YCssSVT .C.QNEYFDSLL+ACKPCpLRCSs.TPPLsCQRYCNAS..... 0 1 1 2 +9090 PF09258 Glyco_transf_64 EXTL2; Glycosyl transferase family 64 domain Sammut SJ, Bateman A anon pdb_1omz Domain Members of this family catalyse the transfer reaction of N-acetylglucosamine and N-acetylgalactosamine from the respective UDP-sugars to the non-reducing end of [glucuronic acid]beta 1-3[galactose]beta 1-O-naphthalenemethanol, an acceptor substrate analog of the natural common linker of various glycosylaminoglycans. They are also required for the biosynthesis of heparan-sulphate [1]. 20.60 20.60 20.60 21.00 20.30 20.50 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.82 0.70 -5.22 25 640 2012-10-03 05:28:31 2006-08-23 09:38:24 5 15 149 8 394 576 20 229.70 37 40.08 CHANGED FTlllhs.....hsRtphLh+llppl..ussshlspIlVlWss.scssPpp...........pphsssu.VPlpllc.................sppsslssRFhPassIcT-AVLulDDD.shlsss-lcFAFpVWp..sFP-RlVGassRtHh.hD...spspWsYso........paoscYSMVLTGAAFaH+hY.hpLYo...phhPpulRshVDcptNCEDIhMNFLVushTppPP....lKVs......ph+papcstsst..............suhsucss..H.htpRspClNpFuchaG..hMPLhtoph+h..ssshF .............................................FThlhhs......hpR.psLhphlpph.............sts.s........l.ppllVlWNs...s...cssPtp........................phs..sht..lPl..hll.p..........................spp....ssl.ssRFhPa....s..........pIc....T-A..VLulDD.D..s.hl.ss.s-lpFuFpV.......Wp.........pa..P-.R.l..V.G..as..s.RhH.......h..aD...........spp...pWtYso....................pho.sphSMVL.TG...AAFaH+.hY.hh.LYo...phhPt..s.l...+s.hVDph.h.........NCEDIh..MN.FLVup...lT.tpsP......................l.K.Vs........ttpac..pstsst........................uh.tcss...H.hhpRppClNpFsp.haG..hMPLhhoph+h..-.hha................................................................................................. 0 136 195 290 +9091 PF09259 Fve Fungal immunomodulatory protein Fve Sammut SJ anon pdb_1osy Domain Fve is a major fruiting body protein from Flammulina velutipes, a mushroom possessing immunomodulatory activity. It stimulates lymphocyte mitogenesis, suppresses systemic anaphylaxis reactions and oedema, enhances transcription of IL-2, IFN-gamma and TNF-alpha, and haemagglutinates red blood cells. It appears to be a lectin with specificity for complex cell-surface carbohydrates. Fve adopts a tertiary structure consisting of an immunoglobulin-like beta-sandwich, with seven strands arranged in two beta sheets, in a Greek-key topology. It forms a non-covalently linked homodimer containing no Cys, His or Met residues; dimerisation occurs by 3-D domain swapping of the N-terminal helices and is stabilised predominantly by hydrophobic interactions [1]. 25.00 25.00 169.00 168.80 24.50 22.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.47 0.71 -4.14 2 8 2009-01-15 18:05:59 2006-08-23 09:52:28 6 1 6 5 1 8 0 109.50 72 95.01 CHANGED SsTuLhFpLAa.VKKlsFDYTPNWGRGsPssaIDslTFPKVLTDKtYoYRVsVsGpsLGVcssaAVpssGuQplNFLpYNpGYGlADTpTIQVFVV.PDTsN..-aIIApW ..SDTALlFpLAWsVKKLSFDYTPNWGRGsPSSaIDNlTFPKVLTDKAYTYRVVVSG+DLGV+PSYAVpSDGSQKlNFLEYNsGYGIADTNTIQVFVVDPDTGN..-FIIAQW 0 0 1 1 +9092 PF09260 DUF1966 Domain of unknown function (DUF1966) Sammut SJ anon pdb_7taa Domain This domain is found in various fungal alpha-amylase proteins. Its exact function has not, as yet, been defined [1]. 21.30 21.30 21.30 21.60 21.20 21.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.95 0.72 -3.90 39 219 2012-10-02 20:10:03 2006-08-23 11:03:31 6 7 96 10 148 223 0 86.70 32 16.21 CHANGED slYpDs........sslAhRKGs.tGtQllsVLoNpGo........uusYslslss..sGasuGsslh-lloCsshTs....sssGslsVsMsu....G.P+VahPsshhtGSGLC .................lap-s........sslAhRKGs.tGtpllsVloNtGu.s........ussaslsls.....suass.Gppls-lloCs.s..hos....sssGslsVshss....G.P+.VhhPssh.h.u.SulC............. 0 45 74 117 +9093 PF09261 Alpha-mann_mid Alpha mannosidase, middle domain Sammut SJ anon pdb_1o7d Domain Members of this family adopt a structure consisting of three alpha helices, in an immunoglobulin/albumin-binding domain-like fold. They are predominantly found in the enzyme alpha-mannosidase [1]. 20.90 20.90 21.00 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.92 0.72 -3.77 126 2561 2009-01-15 18:05:59 2006-08-23 11:25:36 6 27 1449 64 839 2123 87 78.00 28 8.09 CHANGED EaHpGhaTSpsp.....hK+hsRpsEphL.pssEhlsshusht........tt.........ttplpplW......................cslhlsQaHDslsGouhppVhp.Dhtpch ......................................hapGhho.S..+ht.....hKphsRcsEp.h.L....ps.s.E.LsshAtht.................................th....th.........pppLpp.h..W......................+plhhsQaHDulsGouhspVhc-hhtc.............................. 0 291 479 678 +9094 PF09262 PEX-1N Peroxisome biogenesis factor 1, N-terminal Sammut SJ anon pdb_1wlf Domain Members of this family adopt a double psi beta-barrel fold, similar in structure to the Cdc48 N-terminal domain. It has been suggested that this domain may be involved in interactions with ubiquitin, ubiquitin-like protein modifiers, or ubiquitin-like domains, such as Ubx. Furthermore, the domain may possess a putative adaptor or substrate binding site, allowing for peroxisomal biogenesis, membrane fusion and protein translocation [1]. 19.50 19.50 21.20 19.60 18.80 16.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.78 0.72 -3.93 32 271 2012-10-01 20:15:13 2006-08-23 11:53:34 6 14 244 1 190 278 1 81.00 35 7.38 CHANGED AppVplEPlTscDWEllEhpAphlEs.plLsQlRslhs.............sphlslal..sssossplpVspltPssst..................hu+lsssoEllVAP .....sppVplEPlos-DWE....IlELHAphlEp.pLLsQlRlV.s.............sphlslal........sssossplplsslpPsss....................hu+lsssoEllVAP............... 0 53 95 152 +9095 PF09263 PEX-2N Peroxisome biogenesis factor 1, N-terminal Sammut SJ anon pdb_1wlf Domain Members of this family adopt a Cdc48 domain 2-like fold, with a beta-alpha-beta(3) arrangement. It has been suggested that this domain may be involved in interactions with ubiquitin, ubiquitin-like protein modifiers, or ubiquitin-like domains, such as Ubx. Furthermore, the domain may possess a putative adaptor or substrate binding site, allowing for peroxisomal biogenesis, membrane fusion and protein translocation [1]. 25.00 25.00 29.70 32.80 21.60 20.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.94 0.72 -3.68 2 52 2009-01-15 18:05:59 2006-08-23 11:54:29 5 4 36 1 28 53 0 83.50 64 7.17 CHANGED GAVVolthTss+DsFh+Ls.cllAQL+L.QNpAlEVu.scQ.PsYLsWhEuRphss.upNVAElNRQhupKLGhSpGpQVFL+.Cop ....ssVTVsFTNARDCFLHLPp+LVuQLHLhQNQAIEVsWucQ.PsFLSWVEGRHhocp....GENVAEINRQlGQKLGLSsGpQVFL+PCoH..... 0 3 5 12 +9096 PF09264 Sial-lect-inser Vibrio cholerae sialidase, lectin insertion Sammut SJ anon pdb_1w0p Domain Members of this family are predominantly found in Vibrio cholerae sialidase, and adopt a beta sandwich structure consisting of 12-14 strands arranged in two beta-sheets. They bind to lectins with high affinity helping to target the protein to sialic acid-rich environments, thereby enhancing the catalytic efficiency of the enzyme [1]. 25.00 25.00 36.90 35.40 18.60 16.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.09 0.71 -4.98 2 111 2009-01-15 18:05:59 2006-08-23 12:46:17 5 8 55 9 4 48 0 178.40 52 44.74 CHANGED DVTcQVKE+SaQIAGWGGSELYp+ssoLNSpQDWQ.NAplRIhDGAANpIQsADGuRpaVVThulD.SGtLsApLNG.SuPlllt.ppucVauFHpYpltYSALsppsoLhVDGpplsoWuGEsSppN.lpFGNADuplDGRlHlQcIsLpQpGhsLVphDAhYLAQQsP.psppDLEpLGWoK.KoGNTMShYGpAS ....................................................................p...ppp.u.s..Sp.sWp.ssch+llsGuh.p..hAsGop+hlshlSlDpSGsLVsphpG.ou.hlLtotpAt.ppaHcaEL.a.s.h.sp.osohahDGphIp.s...pspsSppN.I.aGNusuphDGhhthpcIthp.QGcs................................................................... 0 2 2 4 +9097 PF09265 Cytokin-bind Cytokinin dehydrogenase 1, FAD and cytokinin binding Sammut SJ anon pdb_1w1o Domain Members of this family adopt an alpha+beta sandwich structure with an antiparallel beta-sheet, in a ferredoxin-like fold. They are predominantly found in plant cytokinin dehydrogenase 1, where they are capable of binding both FAD and cytokinin substrates. The substrate displays a 'plug-into-socket' binding mode that seals the catalytic site and precisely positions the carbon atom undergoing oxidation in close contact with the reactive locus of the flavin [1]. 20.50 20.50 20.70 20.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.94 0.70 -5.26 18 278 2012-10-02 00:48:38 2006-08-23 13:02:20 5 5 80 16 137 330 2 242.40 36 52.73 CHANGED Pp+VRWlRllYoDFssFTcDQEhLIShpss.t.......hDYVEG.lhls.puhhsshpos..............FsPsD.s+luslss..sutVlYCLEsshaYc........ss...s.ssshDQcl-sLhppLsaltGhlFppDVsYh-FLsRV+ppEhpLRupGLW-VPHPWLNLFVP+SpItcFccuVFcuI.Lpssss.GPlLlYPhN+sKWDs+hSslhP.-...E-VFYhVGlLpSu.....suhssl-cLpppNccIlcFC......c..puGIshKQYLPaa..sopp-..W.p+HFG.s+WsRFsc+KscYDP+sILuPGQpIF ..............................................................PthV+WlRhlYssFstFotDQEhLluh.tt.........hDYlEG.lhhs....p..s..ssh......ss..................h.sstp...phsth.t....tuth..lYsL..Ehsh.as..............tt...p..tsshs....pclptlLtpLpah.uhhFtpDlsYh-FLsRV+t.tE.pLRupGhW-.V.P.HP..WLNLFlP.......cSpItcFsptVFpsl.L.pp..s...s...s..........G.PlL.lYPhs+.sKWDsph.Ss...sh.P..-.......--lFYhVuhLpou............sssslcpl.ppNpcIlchC........p..tsslthK.pYLspa....popt-..W.tpHFG....s+WppFhptKtpaDPhtILuPGQtIF................................................................. 0 23 81 113 +9098 PF09266 VirDNA-topo-I_N Viral DNA topoisomerase I, N-terminal Sammut SJ anon pdb_1vcc Domain Members of this family are predominantly found in viral DNA topoisomerase, and assume a beta(2)-alpha-beta-alpha-beta(2) fold, with a left-handed crossover between strands beta2 and beta3 [1]. 25.00 25.00 43.80 43.70 21.60 20.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.61 0.72 -4.09 13 69 2009-01-15 18:05:59 2006-08-23 13:10:41 5 2 45 4 0 56 0 57.70 68 18.51 CHANGED haYpDGKLFpDKphop.Vsp..DNPsYEILK+lKIPsHLoDVlVYEQTaE-AhspLIFVG .LFYKDGKLFsDssFhNPVSD..DNPAYEVLpHVKIPoHLTDVVVYEQTaEEALTRLIFVG 0 0 0 0 +9099 PF09267 Dict-STAT-coil Dictyostelium STAT, coiled coil Sammut SJ anon pdb_1uur Domain Members of this family are found in Dictyostelium STAT proteins and adopt a structure consisting of four long alpha-helices, folded into a coiled coil. They are responsible for nuclear export of the protein [1]. 20.70 20.70 21.40 21.40 20.30 19.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.54 0.71 -3.86 3 15 2009-01-15 18:05:59 2006-08-23 13:26:01 5 3 4 2 15 17 2 109.90 33 13.48 CHANGED QpILNEIaKLphpQ+ETL-KMhIsQKQlLu+hssshspNscEsL+SLss-QsTLuuQl-oEloALsQlcpshILEPs-LsKLhhLLQDLoIQhKQLcLYHpELQhllsPQcPsP ..........................................p.llsphh+Lh.tQcppL.pMhh.QpplLsc................hspsp.hphhptLpscQsTL+pQI-sEhouLpplhpphIL-Ps-LpKlhhLlp-LpIQh+QLcLhHpELQhllsPppP......... 2 12 15 15 +9100 PF09268 Clathrin-link Clathrin, heavy-chain linker Sammut SJ anon pdb_1utc Domain Members of this family adopt a structure consisting of alpha-alpha superhelix. They are predominantly found in clathrin, where they act as a heavy-chain linker domain [1]. 20.60 20.60 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.46 0.72 -6.38 0.72 -4.87 17 470 2012-10-11 20:01:01 2006-08-23 13:48:23 5 23 355 13 187 307 3 24.00 68 2.26 CHANGED -psIlPYlpspLpNs-LAl+lAsR ....EENIIPYITNVLQNPDLALRMAVR. 0 59 97 144 +9101 PF09269 DUF1967 Domain of unknown function (DUF1967) Sammut SJ anon pdb_1udx Domain Members of this family contain a four-stranded beta sheet and three alpha helices flanked by an additional beta strand. They are predominantly found in the bacterial GTP-binding protein Obg, and are still functionally uncharacterised [1]. 20.70 20.70 21.20 22.10 20.50 18.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.02 0.72 -4.31 110 2054 2009-09-11 09:14:44 2006-08-23 14:07:22 6 7 2030 1 404 1340 191 69.80 42 15.64 CHANGED Fpl...p+..csss..........hahVpGpclERhlphTshs.scEulthhtctLcphGVc-tLcctGsc.sGDtVpI..s....chpF-a ....................FpI..pR..-s..Du.......salloGp+lE+hhphTsFs.pD......EultpFA+pL+phGV--uL+.cpGA+.sGDhV+Is.....sh.EFEF........ 0 162 293 361 +9102 PF09270 BTD Beta-trefoil; Beta-trefoil DNA-binding domain Sammut SJ anon pdb_1ttu Domain Members of this family of DNA binding domains adopt a beta-trefoil fold, that is, a capped beta-barrel with internal pseudo threefold symmetry. In the DNA-binding protein LAG-1, it also is the site of mutually exclusive interactions with NotchIC (and the viral protein EBNA2) and co-repressors (SMRT/N-Cor and CIR) [1]. 25.00 25.00 32.90 26.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.13 0.71 -4.48 7 243 2009-01-15 18:05:59 2006-08-23 14:47:35 5 7 119 7 151 235 0 146.30 56 27.06 CHANGED lCIspsopVuLFNRlRuQTVsT+aL..........................slEss......shtuustpWssFtlphh..........ss+sp.cchshp-G.hlpYGslVhLVsp.TGlt.PPlhl+KV-ptpulLss...s-PVSpLpKh....AFphp-....us+hYLslspc+l.p.....................hpss..sp..s+..lssGupW ..............LCIuSGoKVALFNRLRSQTVSTRYL..........................cVEsG............s..FpASopQWuAFhIH...Ll-........................Ds..c..u..puc-.......Fs.l.R-G...YI.+YGpsVcLVCoVTGhuLPhLIIRKV......D.KQpAlLD...........u....................D...............-PVSQLHKC....AF.hhD............scphYLCLS.pE+IlQ.....................FQ..AoPCP.KEsN+thlNDuusW..................................................... 0 61 71 112 +9103 PF09271 LAG1-DNAbind LAG1, DNA binding Sammut SJ anon pdb_1ttu Domain Members of this family are found in various eukaryotic hypothetical proteins and in the DNA-binding protein LAG-1. They adopt a beta sandwich structure, with nine strands in two beta-sheets, in a Greek-key topology, and allow for DNA binding [1]. This domain is also known as RHR-N (Rel-homology region) as it related to Rel domain proteins. 25.00 25.00 27.30 25.20 24.40 23.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.98 0.71 -3.70 12 251 2009-01-15 18:05:59 2006-08-23 15:01:53 6 6 119 7 155 234 0 132.00 55 24.73 CHANGED slhlhHu+VAQKSYGsEKRFhCPPPhlYLhGsuW......pht....tp.hp...................tsh..sppu.....splsuahsluusst....phpphsh-st................hssA..KoLaISD.sD.KRKphpLtlphhh...........ssu....pcl......GhF.SphIKVISKPSKK+pohKss- .................................................................TVhILHAKVAQKSYGNEKR.....F.FCPPP.CVYLhGsGW................+hK........ppph.pp......................................................pup...u-pt.............sp.CualGIGsusp.....-hQpLsh-s.t......................................................................passA..KTLYISD..oD.KRKHFhLsl+hah.....................................usu......c-l....................GsFhS+pIKVISKPSKKKQSLKNsD..................................................................... 0 64 75 116 +9104 PF09272 Hepsin-SRCR Hepsin, SRCR Sammut SJ anon pdb_1p57 Domain Members of this family form an extracellular domain of the serine protease hepsin. They are formed primarily by three elements of regular secondary structure: a 12-residue alpha helix, a twisted five-stranded antiparallel beta sheet, and a second, two-stranded, antiparallel sheet. The two beta-sheets lie at roughly right angles to each other, with the helix nestled between the two, adopting an SRCR fold. The exact function of this domain has not been identified, though it probably may serve to orient the protease domain or place it in the vicinity of its substrate [1]. 26.70 26.70 26.80 27.10 26.50 26.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.33 0.72 -3.83 2 55 2012-10-03 20:35:02 2006-08-23 15:58:54 5 4 32 6 24 43 0 105.50 65 25.71 CHANGED sLapVQlSsuDpRLhVhDpTphpW+hlCSSpsNthlAslsCEEMGFlRAlsaS.hss.puG..GspsFFCVcEutLshup+lhssl.sCcCs+GphLpshCQDCGRRhLP ...............LYsVQVSsuDuRLhVFDcTEGTWRL.LCSS.RSNuRVAGLSCEEMGFL......RA.L.sHSE..LDVRTAG.ANGTSGFFCVDEGcLPaup..RLL-VIS.VCD.CPRGRFLsslCQDCGRR.KLP................. 0 1 3 9 +9105 PF09273 Rubis-subs-bind Rubisco LSMT substrate-binding Sammut SJ anon pdb_1p0y Domain Members of this family adopt a multihelical structure, with an irregular array of long and short alpha-helices. They allow binding of the protein to substrate, such as the N-terminal tails of histones H3 and H4 and the large subunit of the Rubisco holoenzyme complex [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.43 0.71 -4.22 43 613 2009-01-15 18:05:59 2006-08-23 16:27:20 6 17 220 26 433 611 10 124.40 18 24.97 CHANGED lspsD.hhtp....KtplLcptGhsss..t.hslthsss.........hstcLLsaLRllshsspchtthtsstpstsh................slStpN............EtpshphltshspthLspYsTTlc-.Dct..hlcp...sshptp..................tphAlplRhsEKcIL .............................................................................+.phlt.....p..s.....h.tp.......h.lthst....................s.pLlsh.LR.lhth.s.t...p...ch..p.ph.t.th.tts.......................................................h..u.ss......................................Etps.hp.h.L.tstsp.hhL.ppa.sT....o.l..cc.....Dpp.....lLpp....ts....ttp.............................................................tphAlplRhtEKplL.................................................................................... 0 156 269 351 +9106 PF09274 ParG ParG Sammut SJ anon pdb_1p94 Domain Members of this family of plasmid partition proteins adopt a ribbon-helix-helix fold, with a core of four alpha-helices. They are an essential component of the DNA partition complex of the multidrug resistance plasmid TP228 [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.52 0.72 -4.05 3 92 2012-10-02 18:44:02 2006-08-23 16:30:01 5 1 84 2 16 60 5 66.00 30 86.74 CHANGED MALEKsHTSsKKMTFGEHRDLEKVVsSPlPSGKpKRVNVNFDEEKHTRFKAACAKpGTSITDVINQLVDNWLKENE ........................................................................stt.hKRVssNh..sE-hHpRhKhtCscpGpSIsDllspLl.cpaLpp........... 0 6 10 14 +9107 PF09275 Pertus-S4-tox Pertussis toxin S4 subunit Sammut SJ anon pdb_1prt Domain Members of this family of Bordetella pertussis toxins adopt a structure consisting of an OB fold, with a closed or partly opened beta-barrel in a Greek-key topology [1]. 25.00 25.00 254.40 254.10 19.00 18.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.55 0.72 -3.96 2 5 2009-01-15 18:05:59 2006-08-23 16:43:19 5 1 5 12 1 5 0 110.00 99 72.37 CHANGED DVPYVLVKTNMVVTSVAMKPYEVsPTRMLVCGIAAKLGAAASSPDAHVPFCFGKDLKRsGSSPMEVMLRAVFMQQRPLRMFLGPKQLTFEGKPALELIRMVECSGKQDCP DVPYVLVKTNMVVTSVAMKPYEVTPTRMLVCGIAAKLGAAASSPDAHVPFCFGKDLKRPGSSPMEVMLRAVFMQQRPLRMFLGPKQLTFEGKPALELIRMVECSGKQDCP 0 1 1 1 +9108 PF09276 Pertus-S5-tox Pertussis toxin S5 subunit Sammut SJ anon pdb_1prt Domain Members of this family of Bordetella pertussis toxins adopt a structure consisting of an OB fold, with a closed or partly opened beta-barrel in a Greek-key topology [1]. 25.00 25.00 217.50 217.30 20.60 17.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.30 0.72 -4.02 2 5 2009-01-15 18:05:59 2006-08-23 16:44:09 5 1 5 6 1 8 0 97.00 96 75.90 CHANGED PTHLYKNFTVQELsLKLKsKNQEhCLTAFMsGRSLVRACLSDAt+p+sTWFDTMLGFAISAYALKSRIALTVEDSPYPGTPGDLLELQICPLNGYCE PTHLYKNFTVQELALKLKGKNQEFCLTAFMSGRSLVRACLSDAGHEHDTWFDTMLGFAISAYALKSRIALTVEDSPYPGTPGDLLELQICPLNGYCE 0 1 1 1 +9109 PF09277 Erythro-docking Erythronolide synthase, docking Sammut SJ anon pdb_1pzq Domain Members of this family of docking domains are found in prokaryotic erythronolide synthase. They adopt a structure consisting of a bundle of four alpha-helices, and mediate homodimerisation of the protein, stabilising the resulting complex [1]. 24.60 24.60 25.00 25.00 23.60 24.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.86 0.72 -4.28 2 8 2009-01-15 18:05:59 2006-08-23 16:59:38 6 5 7 2 3 10 0 54.80 50 2.20 CHANGED uA.PsVslGsRLD-LE+AL-ALsstpGHsDVGtRLEuLLRRWpSRRsstspsssIS-D ............slhscLDcLE+ALcALPsEDG.Hs-VusRLEuLLRRWpsRRAsAsus............... 0 1 2 3 +9110 PF09278 MerR-DNA-bind MerR, DNA binding Sammut SJ anon pdb_1q08 Domain Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.01 0.72 -3.57 304 2740 2012-10-04 14:01:12 2006-08-24 08:59:03 6 15 1578 18 772 6957 665 64.40 30 43.61 CHANGED lpRLphIppu+plGFoLsEI+plL....sl.......pp..t.spssschp..plhpp+ltclcp+lpcLpphcpp.Lpph .............hcplthI+pAp.c.lG.hoLscI.tclL.....sL....................ps.....c..sts.s.p-..hc..........plu..p.p+....h....p....-.l.-...c....+I....p....p.Lp.thcsp.Lpth.......................................................... 0 215 470 645 +9111 PF09279 EF-hand_like efhand_like; Phosphoinositide-specific phospholipase C, efhand-like Sammut SJ anon pdb_1qas Domain Members of this family are predominantly found in phosphoinositide-specific phospholipase C. They adopt a structure consisting of a core of four alpha helices, in an EF like fold, and are required for functioning of the enzyme [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.76 0.72 -3.96 63 1479 2012-10-02 16:17:27 2006-08-24 09:12:10 6 79 244 26 795 1356 2 83.20 25 8.27 CHANGED ElpplFppa.us.p...pphlospcLhcFLpccQ+csch........s.pcpstplIpc.....aEssp.....pspcc.......thlol-GFhpYLhSs-sslhssp+hcla ....................................-l..lh.ph...us...p.......pthlo.hppLhpF...Lp.pcQ..p.-sph.........................................s...ppstpl.Ipc.............a..Essp.......................ptppc...............................shloh-.GFhpYL.h.S.p.-.sslhsspp.pl.................................. 1 179 275 510 +9112 PF09280 XPC-binding XPC-binding domain Sammut SJ anon pdb_1pve Domain Members of this family adopt a structure consisting of four alpha helices, arranged in an array. They bind specifically and directly to the xeroderma pigmentosum group C protein (XPC) to initiate nucleotide excision repair [1]. 22.80 22.80 22.80 24.50 22.70 22.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.94 0.72 -4.51 36 513 2009-01-15 18:05:59 2006-08-24 09:22:59 6 13 297 12 316 512 7 58.10 44 15.62 CHANGED Ls.hLpspPQFpplRphlQpNPplLpslLQplupsNPpLhph...Ippsp-tFlphLsp.ssss .............L-FLRspPQFQplRpllQpNPp....lLtslLQQlGppNPpLhph.......IpppQ-pFlphLNE.ss..s............ 0 95 163 247 +9113 PF09281 Taq-exonuc Taq polymerase, exonuclease Sammut SJ anon pdb_1qtm Domain Members of this family are found in prokaryotic Taq DNA polymerase, where they assume a ribonuclease H-like motif. The domain confers 5'-3' exonuclease activity to the polymerase [1]. 25.00 25.00 76.00 74.10 21.90 21.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.89 0.71 -4.03 4 36 2009-01-15 18:05:59 2006-08-24 09:36:09 5 4 33 37 13 49 0 130.30 60 15.85 CHANGED stPcE.A.hssPEGshhG.lLspscP....hhAphtA.tAsp-uch+RAs......Plsu.A-h...+EVpuhhAKsLAshhShcGssl-PGDDPLLlAYLLDPANTN..sVA+RY.ssEWsEDAApRAhlotRLhpsL.P+L ...............shEEAPWP.PPEGAFVGFlLSR.sEP....MWA-LhALAAAp-GRVH.RAss......PhtuLcDL...+ElRGlLAKDLAVLALREGlsLsPGDDPhLLAYLLDPSNTsPEGVARRY.GGEWTEDAucRALLoERLapsLh.RL...... 0 3 8 13 +9114 PF09282 Mago-bind Mago binding Sammut SJ anon pdb_1rk8 Domain Members of this family adopt a structure consisting of a small globular all-beta-domain, with a three-stranded beta-sheet and a contiguous beta-hairpin. They bind to Mago alpha-helices via extensive electrostatic interactions and at a beta2-beta3 loop via hydrophobic interactions [1]. 20.60 20.60 20.60 25.60 20.20 18.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -6.79 0.72 -4.37 11 235 2009-01-15 18:05:59 2006-08-24 09:52:20 5 3 201 1 178 238 0 26.60 56 12.55 CHANGED G-+hIPsopRsDGohRKsl+lRsGYhP ....Gp+aIsuopRPDGThRKthRV+sGYhP.. 1 60 94 146 +9116 PF09284 RhgB_N Rhamnogalacturonase B, N-terminal Sammut SJ anon pdb_1nkg Domain Members of this family are found in prokaryotic Rhamnogalacturonase B, and adopt a structure consisting of a beta supersandwich, with eighteen strands in two beta-sheets. The exact function of the domain is unknown, but a putative role includes carbohydrate-binding [1]. 27.00 27.00 28.30 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.51 0.70 -5.02 6 102 2009-09-11 00:34:51 2006-08-24 11:09:34 5 8 71 5 74 110 2 228.70 42 45.15 CHANGED FGhTsoGssaslDuGus..LlFpVsKoosDlsSlpYRGsEhQ.YsuKuSHIuSGLGSATVosppluu....aIKVTssoSoL..THYals+sG-ssIYMAT.hsAEsslGELRFlARLpsshLPs..ptPaupVuTTtGsou.IEGuDVFll.sGpTRSKFYSScRFIDDchH......slSGuu..spVCMlhs..saEsSSGGPFFRDIsopssu-sssLYaYMNSGHVQTEuaRhGLHGPYulsFocuGsPssu....plDTSFFs.oLGIsG ........................FGhTpousphllDsGus..LsFsVs.psssDIsSlpYpusELQ.hpup.sSHIsSGLGo.u.o..V.shp..phss........hIhVosps.......u....s....L..spYhls+pGcssIYMAT.hsscsslG..ELRFlARLssshLPs....p.shussuss......s.ps.IEGsDVFhl.sGpTpSK....FYSup......RhIDDphH......sVsGsu...htVhhlhs...shEpSSGGPFFRDIssppssstppLY.Y..M.SsHsQT.....EsaRhG.LHGPYuhhFocuusPsss......l.DhuFhs.sLslpG.................................. 0 30 45 66 +9117 PF09285 Elong-fact-P_C Elongation factor P, C-terminal Sammut SJ anon pdb_1ueb Domain Members of this family of nucleic acid binding domains are predominantly found in elongation factor P, where they adopt an OB-fold, with five beta-strands forming a beta-barrel in a Greek-key topology [1]. 25.00 25.00 25.20 27.80 20.20 23.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.56 0.72 -4.62 108 5622 2009-01-15 18:05:59 2006-08-24 11:26:27 6 9 4549 10 1133 2891 2007 55.20 50 29.52 CHANGED VpLpVscT-PulKGsT.uoust.KPApLE.TG.hplpVPhFIppG-pIplcTcs.GpYlsR .............V-Lcls-T-PGl.KG...DT...u...........o.u.us.KPATLc..T.....G.....hs.....lpVPhFlptG-hlclcTcs.GpYluR............ 0 377 737 951 +9118 PF09286 Pro-kuma_activ Pro-kumamolisin, activation domain Sammut SJ anon pdb_1t1e Domain Members of this family are found in various subtilase propeptides, and adopt a ferredoxin-like fold, with an alpha+beta sandwich. Cleavage of the domain results in activation of the peptide [1]. 20.90 20.90 20.90 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.64 0.71 -4.15 124 1063 2009-01-15 18:05:59 2006-08-24 11:45:25 6 31 332 4 629 1085 28 137.50 21 20.46 CHANGED sshtphstssssph...l.p..lplsL.ppps..h..splcphlhpl..ssPssspY......................spa..LotpphtphauPsppslstVtsaLpstG.lsstp....................tsstphlshsuolupsc...............phh.ps..phthYp..................ssst.........hhhp..ssp..plPppls....................................shlshl....hs....hsph ..............................................t.......t.t.h.sssp.....l.p...lplsL....p.ps....h...sp..Lpphl...h.pl..ss.Psu.s.pY......................sc.a.....Lo..pphtphau.......Ps.sp......slstVtsaL.p..p...p....G...lsstp.............................ssspshlph.p.uolsps-...............phh.ss.....phptap....................tstp................hhhp.ss.p..hplPtplt....................................shlshl....hshp................................................................... 0 204 379 512 +9119 PF09287 CEP1-DNA_bind CEP-1, DNA binding Sammut SJ anon pdb_1t4w Domain Members of this family of DNA-binding domains are found the transcription factor CEP-1. They adopt a beta sandwich structure, with nine strands in two beta-sheets, in a Greek-key topology [1]. 17.80 17.80 18.30 17.80 17.60 17.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.42 0.71 -4.88 2 7 2012-10-03 00:25:27 2006-08-24 11:57:19 5 1 7 1 7 9 1 171.90 28 36.20 CHANGED EcWhph-VhKp+suKsSDhtFthsspcthYLWsKMtC.lPh.VKWplsppH..ppL.L+lRhVpY.tp-NlE.uIRsP.SslhKC+sHp.pEp+hPh-SFFYlhpStccao..hsucKspsFshhhhPGssQs.FDlIFhCQcpCLDLs-RRKpMCLAVFL-DENGNElLHshIKQlhIVuYPRRDWKNFCE+csshp .............h...hphpV.pt+stK.Ssh.....st.thhLWo+hts.lPh.lpWplspth...pppL.L+lRlVpY.tppslp.uI+ss.sslhKCpsHp.cEp+.hPh-uFFYlhsSspcas.....hs...p+.uppasshl.s..Gthp..ltFDlIFhCQcpChtlt-+RKphCLssFL-DE.tp.l.athlcplhlhuYPpRDhpNFp.+.............. 3 2 3 7 +9120 PF09288 UBA_3 Fungal ubiquitin-associated domain Sammut SJ anon pdb_1tte Domain Members of this family of ubiquitin binding domains adopt a structure consisting of a three alpha-helix bundle. They are predominantly found in fungal ubiquitin-protein ligases [1]. 20.40 20.40 20.40 20.50 20.30 19.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.58 0.72 -4.37 6 131 2012-10-01 23:03:33 2006-08-24 12:52:50 5 3 125 1 95 122 2 50.80 36 19.96 CHANGED DEupLYGID+-lV-pFsuQGFE+sKllEsL+RLslKohs.sDNpTsN+IlEELLK .................thh.Ghs+-LVDcFpsMGF-h-+VV-sh+hlGIcphss.s.................................. 0 21 50 80 +9121 PF09289 FOLN Follistatin/Osteonectin-like EGF domain Sammut SJ, Bateman A anon pdb_1nub Domain Members of this family are predominantly found in osteonectin and follistatin and adopt an EGF-like fold [1,2]. 21.10 21.10 21.50 21.20 20.20 21.00 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.79 0.72 -6.84 0.72 -4.09 14 363 2012-10-03 09:47:55 2006-08-24 13:32:02 5 12 89 21 133 310 0 21.90 49 6.20 CHANGED CtNacCK+GKsCchscps+PhC ..CtNhpCttGKhCchsc.pscPpC 0 9 22 54 +9122 PF09290 AcetDehyd-dimer Prokaryotic acetaldehyde dehydrogenase, dimerisation Sammut SJ anon pdb_1nvm Domain Members of this family are found in prokaryotic acetaldehyde dehydrogenase (acylating), and adopt a structure consisting of an alpha-beta-alpha-beta(3) core. They mediate dimerisation of the protein [1]. 25.70 25.70 26.10 32.80 19.20 25.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.82 0.71 -4.27 62 778 2009-01-15 18:05:59 2006-08-24 13:46:32 6 4 619 4 192 558 124 146.10 64 47.83 CHANGED CGGQATIPlVtAlSRVs.sVcYAEIVASIAS+SAGPGTRANIDEFTcTTucAlEpVGGAp+GKAIIlLNPAEPPllMRDTVasLsc..ssD...pssIpsSlppMltcVppYVPGYRLKppspF-th....................+VolFL..EVEGAucYLP .....CGGQATIPhVAAVSRVs.....pVpYAEIVASIAS+SAGPGTRANIDEFTcTTu+AIEsVGGAs+GKAIIlLNPAEPPLhMRDTVasLs-...-Ac.......p-sIpASIp-MsctVQsYVPGYRLKpcsQ.F.-shs.st............................hsth+suVaLEVEGAucYLP.................... 0 45 122 164 +9123 PF09291 DUF1968 Domain of unknown function (DUF1968) Sammut SJ anon pdb_1oga Domain Members of this family are found in mammalian T-cell antigen receptor, and adopt an immunoglobulin-like beta-sandwich fold, with seven strands in two beta-sheets in a Greek-key topology. Their exact function has not, as yet, been determined. 25.00 25.00 36.80 36.30 22.90 17.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.80 0.72 -4.17 4 101 2009-01-15 18:05:59 2006-08-24 13:54:43 5 2 23 79 18 841 0 84.50 59 39.18 CHANGED PAVYQL+DPpSsDpolCLFTDFDS.QsNVsp...StsSssalTspTVLDM+uMDSKSNGAlAWSNposFuCpssFp..NuohP.uDs..Ps PAVYQLR..sspSSDpSVCLFTDFDS.QsNVSp...ScsS-ValTspTVLDM+uMDSKSNGAVAWSNpSDFuCpsAFp....NuslP.usoh....................... 0 1 1 2 +9124 PF09292 Neil1-DNA_bind Endonuclease VIII-like 1, DNA bind Sammut SJ anon pdb_1tdh Domain Members of this family are predominantly found in Endonuclease VIII-like 1 and adopt a glucocorticoid receptor-like fold. They allow for DNA binding [1]. 25.00 25.00 27.00 25.80 18.50 17.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.05 0.72 -4.15 6 57 2009-01-15 18:05:59 2006-08-24 14:08:40 5 4 41 1 31 51 0 38.60 70 10.96 CHANGED sYSsFcsWLpCYhVsGMsSLRD+NGRTIWFpGDPGPLAP .DauAF+AWLpCYssPGMsSLRD+pGRTIWFQGDPGPLAP.... 0 6 8 15 +9125 PF09293 RNaseH_C T4 RNase H, C terminal Sammut SJ anon pdb_1tfr Domain Members of this family are found in T4 RNaseH ribonuclease, and adopt a SAM domain-like fold, consisting of a bundle of four/five helices. These residues may have a role in providing a docking site for other proteins or enzymes in the replication fork [1]. 25.00 25.00 25.30 27.10 22.80 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.42 0.71 -4.21 8 45 2012-10-01 19:52:02 2006-08-24 14:24:43 5 2 42 6 0 45 191 115.00 40 38.84 CHANGED GSPcpDLhsKlIKGDtKDGVAuIKsRSDallT+VEGERAPsspsKhLEsl....h-sEDP+sLLTsEEa.pRacENpcLlDFDaIPDcIuspIlppYNo.KssPRuKlYsYFVKsuLsKLls+ls-F ................GosthDhhsKllKGD+KDsVAulKsRuDahho.+VEGERsPshpsphlEtl....h-p-pscsLlTc-p..a.pRacENphLIDFDaIPDsItspIlptYNshphs.s+uKlYsYFVKsuLsKLhspls-F................. 0 0 0 0 +9126 PF09294 Interfer-bind Interferon-alpha/beta receptor, fibronectin type III Sammut SJ anon pdb_1n6u Domain Members of this family adopt a secondary structure consisting of seven beta-strands arranged in an immunoglobulin-like beta-sandwich, in a Greek-key topology. They are required for binding to interferon-alpha [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.62 0.72 -4.00 96 681 2012-10-03 16:25:20 2006-08-24 14:40:10 5 13 59 57 291 719 22 99.50 21 28.34 CHANGED TplGPPp.lpl...pstssslplslpsP.......pspthohpslas.phtYplhhacsu..ss....cppthpsspsh.hlpsLpPtosYClpVpu.......ths+pu.hSphpChpTs ..............................................luPPt.lpl...ps.ssslplplpsP...............tttths.......h.....p.......phas.......t....h.....p.Y...plh.....hhcss..ss.............ppp.t..h......p.......s..p..t..s...h...h...lpsLp.....P.....t.o..s..YClpVps.......t.s+pu.h....Sp.pChp.......................................... 0 14 34 115 +9127 PF09295 ChAPs ChAPs (Chs5p-Arf1p-binding proteins) Mistry J, Wood V anon Pfam-B_11349 (release 19.0) Family ChAPs (Chs5p-Arf1p-binding proteins) are required for the export of specialised cargo from the Golgi.\ They physically interact with Chs3, Chs5 and the small GTPase Arf1, and they form also interactions with each other [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.29 0.70 -5.70 14 327 2012-10-11 20:01:01 2006-08-24 15:27:08 5 42 208 0 196 576 86 276.20 23 43.34 CHANGED psclGoaaYssGlDsSssASlAsaLpsLs.pl.pcsQhWFGcppsaKlsphoYCsaNAFo+sDhRVpV+IPGuV-oahlDpcG-+......cpt.................s-plWtETalSullRulhhu-D.......................ssssphpplstsRphNPhoss-h-cc.......FlcshEpLFhcGhpLGussclts..PThlsNaLVcullchh+hTppacpulsll-+Lpppp.PEVssLlA+lhlhtDcElcAV+lhpcuL........pps.pD.......ssLLslQucFLlsKc+s...........chALpsAppAVpuuPSEFtTWshLsclYlcLpDhENALLoLNSCPM...oapEK.hh+hssPhc.......lHLPlPh-ssL-Elsshsssc.....ppcpsDPsLlpLsAusL+uTFtpAYpLLTEIVpphGW-pLLKhRSpVFVMEEEYR .........................................................................................................................................................t..........................................................................................hs.shs..htp.-hph....h.......t......p.................h...t.tt..................................................hW.Eh.hsthlR.h....p................................h....t..........................hh......h..h..h.......................u...............t...............................................s..h..........p....N.hL..ht..sh......h...hh...t....s.....p...h..t.....sh....phhc...p.........l..h..p...p......p....s....p.....s..............h....l....sp...l...h...h.............t...p..p...E...h......pu..lph...hpp.hl................pp..........s.................t.hL...hQu...pahhpp.tc.......................chAlph..uppusphsPspapsWhh.LsphYh.thpph-pA..L....h....s...l.N.u....hPh...........pp.p.......................................................................................................................................................................................................................................... 2 63 120 174 +9128 PF09296 NUDIX-like NADH pyrophosphatase-like rudimentary NUDIX domain Sammut SJ, Bateman A anon pdb_1vk6 Domain The N-terminal domain in NADH pyrophosphatase, which has a rudiment Nudix fold according to SCOP. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.44 0.72 -3.65 75 1383 2012-10-02 00:00:35 2006-08-24 15:37:11 6 14 1293 3 430 990 338 99.50 22 32.64 CHANGED puhhhlhpssplllps.......................................ththsththtth...........................shshtpslhLG...hhps........................pshaulshsttss.........................shphhsLRphh......htls.spphslhupAtpllpW ...........................................................................................t.shhll.pc.pplhlsp.......................................................s.tls.h.s.pstph.............................................shssp.pslh..lG.........phpu......................................psla.slphsttts.........................................................h.h..ssl.Rplh...................pls..ss.htlhupAhpLhpW............................. 0 106 236 334 +9129 PF09297 zf-NADH-PPase NADH pyrophosphatase zinc ribbon domain Sammut SJ, Bateman A anon pdb_1vk6 Domain This domain is found in between two duplicated NUDIX domains. It has a zinc ribbon structure. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.69 0.72 -4.43 63 1999 2012-10-03 10:42:43 2006-08-24 16:14:14 6 20 1879 3 487 1363 306 31.80 34 10.88 CHANGED psapFCupCGstsphtps.thuphC..ssCstpta .....sa+FCutCGptht.sps..t..huhhC....spCtpchY........ 0 120 265 382 +9130 PF09298 FAA_hydrolase_N DUF1969; Fumarylacetoacetase N-terminal Sammut SJ, Eberhardt R anon pdb_1hyo Domain The N-terminal domain of fumarylacetoacetate hydrolase is functionally uncharacterised, and adopts a structure consisting of an SH3-like barrel [1]. 25.00 25.00 25.00 26.60 22.30 24.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.32 0.72 -4.05 94 764 2009-09-11 16:58:44 2006-08-25 09:49:03 6 9 631 10 393 776 109 99.60 35 23.95 CHANGED QNLPaGlFo.pss.ss....RsGVAIGDpllDLuul.ttt..................G.lhs......ts.tsshspssLNsFhuLGtssWpslRtpLppLLp.................thpsppshhptsLlstuc.sph+LP .............................pNLPaGlFS.sss..ss.........RsGVAIGDpllDLuul.ttt...................u.lhs............t..tssFs..psoLNsFhuLG+ss..WptlRtpLppLLs.........................hpsspshtppsLls.sc..sphHLP........................................ 0 104 219 322 +9131 PF09299 Mu-transpos_C Mu transposase, C-terminal Sammut SJ anon pdb_1bco Domain Members of this family are found in various prokaryotic integrases and transposases. They adopt a beta-barrel structure with Greek-key topology [1]. 20.50 20.50 20.50 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.03 0.72 -4.09 36 1172 2009-01-15 18:05:59 2006-08-25 10:46:00 6 26 862 3 233 975 33 64.70 26 11.47 CHANGED Lclhhh....stptRpVp+........sG.lphh...sh+Yhsst.Ltua..sGcp...VhlRYDPpDl.splhVapp..sG......alspA ...............................hhh.....ttpRp.lp+........sG..lphp.............sh.p.Y......h.s......st....L.t.sh..........hucp...........VhlRaDPp..Dh..splhVhp...st.....hh............................... 0 68 146 204 +9132 PF09300 Tecti-min-caps Tectiviridae, minor capsid Mistry J, Sammut SJ anon pdb_1w8x Domain Members of this family form the minor capsid protein of various Tectiviridae [1]. 25.00 25.00 197.80 197.70 18.90 18.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.96 0.72 -4.20 3 7 2009-01-15 18:05:59 2006-08-25 14:36:31 5 1 6 1 0 4 0 84.00 98 100.00 CHANGED MALINPQFPYAGPVPIPGPAPTETMPLLNYRVEGRIAGIQQARQFMPFLQGPHRAVAEQTYYAIGTGIQMGQTFNQPLINTQEG MALINPQFPYAGPVPIPGPAPTETMPLLNYRVEGRIAGIQQARQFMPFLQGPHRtVAEQTYaAIGTGIQMGQTFNQPLINTQEG 0 0 0 0 +9133 PF09301 DUF1970 Domain of unknown function (DUF1970) Mistry J, Sammut SJ anon pdb_1w8x Domain Members of this family consist of various uncharacterised viral hypothetical proteins. 22.50 22.50 24.70 266.90 20.40 17.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.60 0.71 -4.05 3 7 2009-01-15 18:05:59 2006-08-25 14:42:11 5 1 6 1 0 5 0 117.00 96 100.00 CHANGED MDKKKLLYWVGGGLVLILIWLWFRNRPAAQVASNWEGPPYMTYNQPQAGSVTLPVAGYTSPSLTLPNRNRSCGCNPAVSAAMAQGADLASKLTDSITSQLNNYAESLNDYLASQAGV MDKKKLLYWVGGGLVLILIWLWFRNRPAAQVASNWEGPPYMTYNQPQAGSVTLPVAGYTSPSLTLPNRNRSCGCNPAVSAAMAQGADLASKLT-SIoSQLNNYAESLNDYLASQAGV 0 0 0 0 +9134 PF09302 XLF XLF (XRCC4-like factor) Mistry J, Wood V, Hentges P, Doherty A anon manual Family XLF (also called Cernunnos) interacts with the XRCC4-DNA ligase IV complex to promote DNA non-homologous end-joining. It directly interacts with the XRCC4-Ligase IV complex and siRNA-mediated downregulation of XLF in human cell lines leads to radio-sensitivity and impaired DNA non-homologous end-joining [1]. This family contains Nej1 (non-homologous end-joining factor) [2], and Lif1 [3]. 21.70 21.70 22.80 21.70 21.50 20.90 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.64 0.71 -4.48 30 240 2009-09-11 09:52:26 2006-08-25 15:05:44 6 5 179 28 128 239 0 164.00 22 41.85 CHANGED tsWphlplsp..s.........hlhpsphs..tsoas....lhloD........Lps..lWsEclspsslhp+upppstslcs...sspphphhLpclhpsh.......sspcssphsLppt.t.....tssLhlphphclsssh.shpWsh+lpppsss....slhppLshPLlphptshpppt..............................ppLtshLpcKDt...................................slspLh-phps ..................................Wthl.lst.ps.........hlhphhhs..ppu....t....lhlo-........Ltp..lWpEplspss....l.pcspp.spplss...s.sph.hhhLpplhpsh........sspcssphslsttt....................sssLh.lp..hpspLs..sh...shp.Wshclppssss....tlhp...cLhhPLhphttthppph..............................ppLtslLppKDt...................................tlpch.-pht.t............................................................ 0 24 53 96 +9135 PF09303 KcnmB2_inactiv KCNMB2, ball and chain domain Mistry J, Sammut SJ anon pdb_1jo6 Domain Members of this family are found in the cytoplasmic N-terminus of KCNMB2, the beta-2 subunit of large conductance calcium and voltage-activated potassium channels. They are responsible for the fast inactivation of these channels [1]. 25.00 25.00 25.30 25.00 24.50 24.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.13 0.72 -4.18 7 63 2009-01-15 18:05:59 2006-08-29 09:22:20 5 2 35 1 25 55 0 29.60 80 15.20 CHANGED MFIWTSGRTSSSYRpDEKRNIYQKIRDHDLLD ........MFIWTSGR.TSSSYRHDEKRNIYQKIRDHDLLD..... 0 1 3 9 +9136 PF09304 Cortex-I_coil Cortexillin I, coiled coil Mistry J, Sammut SJ anon pdb_1d7m Domain Members of this family are predominantly found in the actin-bundling protein Cortexillin I from Dictyostelium discoideum. They adopt a structure consisting of an 18-heptad-repeat alpha-helical coiled-coil, and are a prerequisite for the assembly of Cortexillin I [1]. 23.90 23.90 24.00 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.53 0.72 -4.00 4 18 2009-01-15 18:05:59 2006-08-29 09:39:01 5 6 11 2 12 18 0 102.60 35 23.56 CHANGED KEEKtcLEAS+s-hAN+LAuLEpSLEuEKsSp-pL....hKQKDp...LcuhLtoLcupsApRppRlpELpAKl-EhL+NLEhEKhA+hELEuRLuKsEKDKAILELKLAEAhD ..........+EE+ttL-uSpsplts+LAuLppSLEspKtSp-cL.......h+QK-p...LcstLppLcspssspspRls-LpA+ls-sl+sL-p.EKhA+.-LcsRLsKscKD+AhLEL+LtEh.s......... 0 9 11 11 +9137 PF09305 TACI-CRD2 TACI, cysteine-rich domain Mistry J, Sammut SJ anon pdb_1xut Domain Members of this family are predominantly found in tumour necrosis factor receptor superfamily, member 13b (TACI), and are required for binding to the ligands APRIL and BAFF [1]. 21.30 21.30 21.40 21.50 19.00 20.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.11 0.72 -4.16 2 66 2009-01-15 18:05:59 2006-08-29 10:41:35 5 3 27 5 27 55 0 39.30 55 22.37 CHANGED lsCRKEQG+aYDHLLtsClSCsShCsQHPpQCAaFCEp+.R ............sCp+EQGpYYDpLL+sClSCtSICG.Q.HP+.Q.CAhFCctp...... 0 2 2 6 +9138 PF09306 Phage-scaffold Bacteriophage, scaffolding protein Mistry J, Sammut SJ anon pdb_1gp8 Domain Members of this family of scaffolding proteins are produced by various bacteriophages [1]. 25.00 25.00 25.30 25.80 24.00 24.90 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.88 0.70 -5.27 2 104 2009-01-15 18:05:59 2006-08-29 10:52:09 5 1 99 2 5 48 0 273.20 52 92.96 CHANGED M-.TT-IQuoE-LTLoGsHAAASADuLVVDNANDNAGQEEGFEIVLK.DE.tPKQDPApNAEFARRRIERKRQRELEQQMEAVKRGELPEpLRVNP-LP.QPD.NsYLSE-uLAKYDYDpSRALAAFptANoEW.hKA.DARSpAVAEQGRKTQEFTQpSAQYVEAARKHYDAAEKLNIPDYQEKEDAFMQLVPPAVGADIMRLFPEKSAALMYHLGANPEKsRQLLAMDGQSALIELTRLSERLTLKPRuK.lSpAP.sDpPIpGcssAANhsAIcKQM-AAAsKGDVETYRKLKApL.KGIR ..................................................................................................................................................................................EpQhEsh.c.RtpL.EuLts.pPs..pQPpssAh..scssLtphDYDpp....AFppA.T-W.p.Kttcsc....pQt.tpptRpp.QEapQp.tQhVEAhtcHhpt.AtKLsl.DYQEhEshhhp.lPPh.tt.Ih.+hh..sEtSthLhYtLGtN.tphRQllA..h.Ds.pAhh.LsplSc+loLtP+sKps..ss.s...h.tts................................................................... 0 2 2 2 +9139 PF09307 MHC2-interact CLIP, MHC2 interacting Mistry J, Sammut SJ anon pdb_1muj Domain Members of this family are found in class II invariant chain-associated peptide (CLIP), and are required for association with class II major histocompatibility complex (MHC) in the MHC class II processing pathway [1]. 21.00 21.00 21.10 23.40 20.70 20.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.53 0.71 -4.30 11 121 2010-01-11 15:07:59 2006-08-29 13:06:26 5 6 60 11 32 96 0 111.30 39 44.98 CHANGED M--QR.....D..LI.ossppshLPh....sssscuusoRuhtloGlolLssLLLAGQAlTsYaVapQpGcIscLTpTspsLphE.LppKhPtu..ssspM+hsM.shPhLhchhs.sss..tss...hc .....................--Qp.....D..LI.usp...pp...L.sh........sss.cupsoRushhoGhSlLVsLLlAGQAsTAYFlYQQpGplcKLThTSpsLpLEsLph.......Kh.Ptss.sss+M+Msh....Phlhphhs.t.t...hs..t.............................. 0 2 7 15 +9140 PF09308 LuxQ-periplasm LuxQ, periplasmic Mistry J, Sammut SJ anon pdb_1zhh Domain Members of this family constitute the periplasmic sensor domain of the prokaryotic protein LuxQ, and assume a structure consisting of two tandem Per/ARNT/Simple-minded (PAS) folds [1]. 25.70 25.70 26.10 35.40 25.60 25.60 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.50 0.70 -5.03 6 115 2009-01-15 18:05:59 2006-08-29 13:44:49 5 8 113 6 14 78 0 236.20 54 28.02 CHANGED SspIItQEVpRTpQQTSuLIpNlF-p+LuhLQIHpDSsuKstulhchah-pD.s-pLshFFhSlDQt-PopTP-FRFlospcullWDDGNApFYGlNp.hLcplup+VshSNNWaalpsposhG.tahLlRRoPll-ssTGEVlGahYsuVVLsNNFuLhEpL+stSNS-NlVllssspsLASSLsGs.EsY.slssVLppppssp+hDshllscTPIplpussT.lslLolQsNpsVloL ...............SS+lhuQEspRTshQTSSLIQsLFDFRLAALcIH.QD....SoAKNsSLlsALsoRD.sspLDpFFsSVDplEhSNAPDlRFISoHD.sIlWDDGNApFYGIspp..pLs+Lh++VuhSuNWHlVQTPSphpshHlLhRRoulI-ssTGpVlGYLYVGIVLNsNFALlEsI+sGSNS-NlVLsVcosPLsSTLKGN..EP.Y..o.lc.Vl...........+s.uc-sh+.D.ualVuQThLEVcuVPTaLCVYSIQsNQNVlTL............................................ 0 5 6 11 +9141 PF09309 FCP1_C FCP1, C-terminal Mistry J, Sammut SJ anon pdb_1onv Domain The C-terminal domain of FCP-1 is required for interaction with the carboxy terminal domain of RAP74. Interaction relies extensively on van der Waals contacts between hydrophobic residues situated within alpha-helices in both domains [1]. 22.70 22.70 24.10 24.40 22.50 22.60 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.96 0.70 -5.08 4 63 2009-09-11 10:47:49 2006-08-29 15:20:43 5 6 42 2 32 66 0 210.90 56 26.15 CHANGED ERW-KVEEQLFPL+DDaoKspRsNSPAsFPDppush.TsLFHPsPI+sKs.pPGPEVRlYDssTGKLIRpGsQuStPuP......sSuhss+tEPSSFRuVpPpQ.QhFs..EphssuQDsEQPGPSRRKRQPSMSETMPLYTLCKEDLESMDKEVDDILGEGSD.DSDuEKK+s..p.c-pEptsQspK.psPs.RpEp.................s.thPuSSERSssGuRsPRGHKRKLsEE.........DAtSE....pStESSNEDEtGSSSEADEMAAALEAELNDhM ........................................ERW-KVEEQLFPL+-DasKs.p.RpsSPAsFPDppush.TsLFHPsPlpPKs..pPGPEVRlYDssTGKLIRpGs....psst.ss.........s..s.Lsl+tE...S.SFRsVpPpQ.pQhFs....EphssupD...s.EQPGPSR..RKRQPS...MSETM.PL.YTLCKEDLESMDKEVDDILGEsSD.-S-ucc+cs....p.cpct..t.pspp........s.t.pptp...................t.sssupcs.hss.p..sR.GHKRKhp-p..........-htsp.............pStcsSN--E.GSSSEADEMAtALEAELsDhh................................ 0 5 8 16 +9142 PF09310 PD-C2-AF1 POU domain, class 2, associating factor 1 Mistry J, Sammut SJ anon pdb_1cqt Domain Members of this family are transcriptional coactivators that specifically associate with either OCT1 or OCT2, through recognition of their POU domains. They are essential for the response of B-cells to antigens and required for the formation of germinal centres [1]. 19.20 19.20 19.40 19.20 18.40 18.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.89 0.70 -5.22 2 69 2009-01-15 18:05:59 2006-08-29 15:42:10 5 5 43 2 30 56 0 197.40 55 86.37 CHANGED MHhtKS.hSEQtsp.+PYQGVRVK-PVKELL+RKRGNs..ApshssTsVVlPppsLPSYo.hG.ssh.ssstuA.s.sus-.GALCsuWluQPSs.uohQPLspWss.P-YhpHEt..uohP.hTuDMYlQPhCPSYslVGPSSVLThAptPLhTNhsshS.STsul.PQl-V..QpssLsYhPWA.PLSshPtss...........sPQhlPhPlslscPtPQp.EsA.ps.GTLslEKLL.E-E-spp..Yshs.uL.spsl ................sEQt.P.s.P.RPYQGVRVKEPVKELLRRKR.....G.+.s.....ssG..s..sssPT.u.V....V..LP.+pPLso.Yos...s.....Gsssl.-h-s.us.ss...s--us.LCsu.....W.luQPss..AsLQ..PLs..s.Woshs-Yh.c-u.......sosP.houDhYl.QPh.CPSYT.hVGsoShLTYus.PL..lTNhssp..............S..usPs..ssP.lEh.-pQuPLTYhPWsQPL...STLPss..oLQYQssussLPGPQFVpLPISIP......EPs.Q-h-DsRR.shsoLsI-KLLLE-E-sss..Y.hspsLslEG.h............................................... 2 1 3 11 +9143 PF09311 Rab5-bind Rabaptin-like protein Mistry J, Sammut SJ anon pdb_1tu3 Family Members of this family are predominantly found in Rabaptin and allow for binding to the GTPase Rab5. This interaction is necessary and sufficient for Rab5-dependent recruitment of Rabaptin5 to early endosomal membranes [1]. 26.00 26.00 26.00 26.00 25.90 25.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.12 0.71 -4.39 20 544 2012-10-11 20:01:01 2006-08-30 09:24:06 6 90 98 14 254 485 0 165.40 42 29.53 CHANGED ELuhucuQshhslpppLstlpsp+p+lcspl+RLspENpaLRsEhutoppch..ptpEppstpL.-chccLpahsphp+.Dthpppst....cpchcschssLcp.......hhss.E-phtsph..................p.tsptsstpssthEhsucLRoL+sLllQhssQs+hE........htlshsKpALEDLppsstccpscl ........................................ELuhu-uQlhhuLss+.Lssl-uE+Q+L+s.......QVRR.LsQ.......ENpWLR-E....L..u...s..TQp+L........QpSEppVAQLEE....EpcHLcFh..spl++hD.tsts..s........t..............-.c.....c...t....c.s.p.p.-sLc-..................Lhss-E--.stth...................................................t.tsttustp.puuhElPARL.RTL+NLVIQYuuQ....GRYE...........................VAVPhCKQ..ALEDLp+osG+cHscl................................................................................... 0 51 71 148 +9144 PF09312 SurA_N SurA N-terminal domain Sammut SJ, Bateman A anon pdb_1m5y Domain This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment. 29.50 29.50 29.50 29.50 29.40 29.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.25 0.71 -4.24 19 1802 2012-10-02 13:36:56 2006-08-30 10:53:39 6 12 1787 8 392 1588 590 113.80 32 28.46 CHANGED lD+lVAVVN-sVlLpS-L-ptlcpVcpphtppsspLPPcsVLccQVLERLIl-plQlQhAccsGlRlsDspLspAlusIAppNshol-QhppuLup-GloYspaREQIRcEhlluclR ..............................................lDplsAl..VNssllhpo-l....cth....h....p......p....l..ph...p...h.....t....p........t........t.....t.....p...l........P....s......p......s.....t....L.....+....c.......Q..l.......L....-+LIh-pl..lQ......hu.p+.hG.l..c..l..s....DppLD.p...AI....s....s...I.......A....p.......p......N....s...h...T....l......-..Q..h.+..spL....s.t.c.G.l..sa.s....pa....Rpp....lRc....-hhhsclp......................................... 0 109 231 319 +9145 PF09313 DUF1971 Domain of unknown function (DUF1971) Sammut SJ anon Pfam-B_3000 (release 20.0) Domain Members of this family of functionally uncharacterised domains are predominantly found in bacterial Tellurite resistance protein. 23.10 23.10 23.60 23.60 22.60 22.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.70 0.72 -4.26 44 1196 2012-10-10 13:59:34 2006-08-30 14:03:34 6 10 1013 13 105 527 12 81.20 43 43.97 CHANGED KphPlascsolPphhhpcH...NTKsGsauplsVlpGpL+ahths-ptt...spclhhssspsshstPphWH+VEsho-Dhchplc ......+phPhWsKpThPtulhp+H...sT+sGsas+LoVhcGslKahshs-.....Ets.pssphlhhsuuphsh.hsPptWHplEshTDDspapl-......... 0 10 37 74 +9146 PF09314 DUF1972 Domain of unknown function (DUF1972) Sammut SJ anon Pfam-B_3020 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in bacterial glycosyltransferases and rhamnosyltransferases. 21.10 21.10 24.00 23.10 20.60 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.20 0.71 -4.83 12 494 2012-10-03 16:42:30 2006-08-30 14:34:02 6 7 403 0 103 539 148 177.60 41 47.78 CHANGED hpcVaIIGS+GlPA+YGGFETFVEcLlcaQpsp.sIpYaVAChu-sptpp.....pFcYpGADCFsIssPplGsA+sIhYDhhAIphAlchsKppp.ppPIFYILGsoIGsFIs.at+pI+plGGplalNPDGlEW+RuKWutPVppYLKaSEKlMsKaADLlIsDNpsIEpYIpscYs...scTpaIAYGTD ....................................................................................................ppVaIIGo+GlPApYGGFE....TFVEcLs.p..h...p.p..s............p...sI.p........YaV.uCh.......s.c...s....p.s.t.p..........................phca.pGs.csas..l..s..s.P..p...l...G.s...A.c...sIsYD.hh...AlphAlph.h+pp.t....pts.I.hY....lLuss..lGs.Fl.h.shh+pI+phGsp.l.hlNPDGhEWcR.u.KWut.sV.+pYh...........KhSEphMsKaADllIsDspsIcpYlppcYs...........scTsaIAYGsD...................... 0 25 61 79 +9147 PF09315 DUF1973 Domain of unknown function (DUF1973) Sammut SJ anon Pfam-B_3022 (release 20.0) Family Members of his family of functionally uncharacterised domains are found in various eukaryotic calcium-dependent chloride channels. 21.00 21.00 21.60 21.50 20.80 20.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.18 0.71 -4.84 13 287 2012-10-03 16:25:20 2006-08-30 14:51:31 6 17 61 0 185 287 1 171.20 37 20.05 CHANGED ulQLESpG...slpsschlsGTVsVDSTVGsDThFLlTWssp..sPpIh...LtDPsG+pYso..FhsD.tss+sApLpIPGT.AcsGsWTYoL.ptpsssQsLTlTVTSRAuSsolPPlslsA+hspcoupaPSPhlVYAcVpQGhLPlLuAsVTAhI............EopsG+sV..TLcLLDNGAG.ADssKNDGIYSR ................................................................................lQl.Sps....plpspthhssoVhlDuoVGp-ThFllT.Wssp..............P.p..Ih.....LhDP.sGphhss..............Ft.hD....hs...+hutLp.I.P...G.....s...Ac...sGtWsYol............pt......p...ss..s...ps......l....ol.........TVT..SR..As......s.s........s.s......s......Pl.o.....Vsu...th...s...p...ss...sp....a.....P...s...P....h.l..lYApVpQGh..hPlLu..AsVTA.hI..............E.spsGp.s.s.....sLcL.hDNG.A....G..A....DshKsDGlYSR...................... 0 86 92 118 +9148 PF09316 Cmyb_C C-myb, C-terminal Sammut SJ anon Pfam-B_3027 (release 20.0) Family Members of this family are predominantly found in the proto-oncogene c-myb and the viral transforming protein myb. Truncation of the domain results in 'activation' of c-myb and subsequent tumourigenesis [1]. 20.30 20.30 20.90 20.90 20.10 20.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.38 0.71 -4.58 15 288 2009-01-15 18:05:59 2006-08-31 09:38:54 5 26 77 0 111 268 0 154.40 46 24.59 CHANGED FSPSQFLNtsspp-shsl-sssLTSTPVC.uQKsh.oTsLpRDpTP.hhQKENuhFRTPsl+RSll-sTPRTPTPFKsALsh.-cKYGPLKhlspTP.aLEEDlpEVl+pEsspslIlt-psc...PhhKK...tKQ.phcSP..hKKVRKSLsLchh-.pch..ssphhspsss.scptPs ...........................................................FSPSQFL.Nssssp.-phsl..EsPoLTSTPls.upKlhlTTP....hH+-p.Ts.+....sQKEN....ss.......FRTP.s...h+RS.l..lp.soPRTPTPFKsALAsQEtKYGPLK...h.l..P...QoPuaL.EDlpEVlKpEospsh....hl.t..-.pc....................P.h++.........hKQ.phpoP..scKstp.hs..ctW-t-ph...ssphhsps.s.......p........................................................................ 0 15 23 54 +9149 PF09317 DUF1974 Domain of unknown function (DUF1974) Sammut SJ anon Pfam-B_3029 (release 20.0) Family Members of this family of functionally uncharacterised domains are predominantly found in various prokaryotic acyl-coenzyme a dehydrogenases. 25.00 25.00 28.20 26.30 23.90 23.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.80 0.70 -5.00 103 1236 2009-01-15 18:05:59 2006-08-31 10:02:30 6 6 996 0 224 886 173 278.60 50 35.24 CHANGED lRCHPYlLcEhpAutssDp.ppuLcpFDchlhuHlGashsNshRuhhhuLTuuphu.suP.....ssstoppYY+plsRhSAuhAlhuDluMhsLGGsLKR+EhlSARLGDlLSpLYLuSAsLKRa-Dc...GR.ppDlPhl+auhpcsLhphppAhcchlpNFP.s+hluhlLRhl.lFPhGp.p..hptPSDcLspclAchlh...pPus..sRcRLspsh.Ylsp......s......csss.lGtlEpAapshhps-slhcKlpcAh+...tsplshh......phpphhptAlctGlIopsEschLpcscthRhcsIpVD-F .................................IRCHPYVLcEMpAApss..D.....lpsFDclLFpHIGassSNtlRShWLGLTtGhho..ssP.......ssssT+RYYppLNRlSAsLALLuDlSMulLGGuLKRRERlSARLGDlLSpLYLASAs.LKRY-DE....GRpc.uDLPLVHWulQDuLapAEpAhD-lLpNFP...NR.sl.uulL+sl...lF.P.hG...R...+...ahuPSD+L-cclAclLQ...sPsu..oRsRls+Gp.Ylss.....u..............-csP.VGhlEpALhslluA-Plap+lsKtlt.pplPhp.........pL-clscpALtpGlIsp-EAslLhcAEctRh+uIsVDDF............................................................... 0 52 102 172 +9150 PF09318 DUF1975 Domain of unknown function (DUF1975) Sammut SJ anon Pfam-B_3057 (release 20.0) Family Members of this family of functionally uncharacterised domains are predominantly found in the N-terminal region of various prokaryotic alpha-glucosyltransferases. 26.80 26.80 26.90 27.10 26.70 26.50 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.43 0.71 -4.71 51 2040 2009-01-15 18:05:59 2006-08-31 10:17:07 5 3 483 0 135 1104 1 188.20 20 38.94 CHANGED hhYhlspslshssSGlEhAthtRhplFcp.hshssKhlhhsap..splpphhcphsh.....pc.scllshY-aFpch...hsssptt.hsh...cplsh.pthphttss..thhphh............ps.phhhtlhhtspp...phlppl-ahs.pspl.l++-hashpGahSphthast.ssclhhcpaas.-Gphhhcchh.tspptt.phs...h.........hhFps ...............................................a.h...l..t.ss...s.h.ph.placp.hsh....sschlhhs...at......plp.p.h.hpphsh...............hs.schhsh.Y..s..aFpsh.....ht.pptt.hsh..........pplsh........t...t.hchhtss......p.hc.lhs....................ps....phh....s.ph.aaps.p..t......phlphl-aac..p..spl.h++-.hYstpGhhSshph.....hss..p.....s......c......hhh....cpaas.pGphhlpchh...s..sptt...h.....hhh..tt......................................................... 0 43 57 115 +9152 PF09320 DUF1977 Domain of unknown function (DUF1977) Sammut SJ anon Pfam-B_3043 (release 20.0) Family Members of this family of functionally uncharacterised domains are predominantly found in dnaj-like proteins. 20.90 20.90 20.90 21.10 20.80 20.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.42 0.72 -3.80 29 448 2009-01-15 18:05:59 2006-08-31 10:32:53 6 9 265 0 284 412 2 105.10 31 28.52 CHANGED lluphhss........sPs.YSh..p.o.tashpRpTsphpVsYYVsp..sFppca.....susplppLEppVEp-YlppL+psChpEppp+..........................pphh.tAp..hhtDpc..hhpcApphphP.sCccLpcl ......................loplhso........sPs..YSL........pso..ssa..s.hcR...pT.p.......p..l..p....V....sYYVsp..sFp.pcY..................puspLppl.EcpVEcDYlspL+psCh+Eppp+..........................pphh.tAp.....hatDtc....hhpcApphths...sCpcLpp.............................................. 0 78 129 206 +9153 PF09321 DUF1978 Domain of unknown function (DUF1978) Sammut SJ anon Pfam-B_3044 (release 20.0) Family Members of this family are found in various hypothetical proteins produced by the bacterium Chlamydia pneumoniae. Their exact function has not, as yet, been identified. 25.10 25.10 25.20 63.70 23.20 25.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.69 0.70 -4.93 6 49 2009-09-14 14:09:44 2006-08-31 10:41:27 5 4 2 0 25 49 0 196.80 38 35.96 CHANGED cDcpKsupAcpcatEht-phcca+KshFWLsE-ssIDh...ossssWshst.PpRps..........hsclspcEh...Wp+pstLK+hcspYspshsphpcpsoccNpptLp-tppch.cthp-happEhccscpRlcsLpthYspl.sspp-sctppphs..h.......cL-phh-pIEpphppssc-Q-sYWKpp-s+E....tEh+Ectsctcp.cEhpcsLct......L-chl+pppcpLchlctclpctphphst.sspppLpsu .......cDcsKSupAEp+hp-hp-pWcca+cslFWVcE-GshDl....shh.usWshsh.PhRpt.........RhsclshHEl...a-cThhlKch.cpphshA+sthEKptSpcN.pthpchptth.p.hpchhtpEhpcstpRlcpLpthYstl.sp..-tchppph...........sL-phhttl-pphppssp-Q-.Yhc..-.pE.....Ehctphsphh..cph.p.hp.......h-phlcth.ppL..hph.h.tht..hpt..t.h.lp......... 0 0 0 25 +9154 PF09322 DUF1979 Domain of unknown function (DUF1979) Sammut SJ anon Pfam-B_3053 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in various Oryza sativa mutator-like transposases. 25.00 25.00 32.20 31.70 21.40 16.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.80 0.72 -4.33 3 157 2009-01-15 18:05:59 2006-08-31 10:53:18 5 17 4 0 87 153 0 57.80 75 4.81 CHANGED MSSKlhFchaaGEGNVRaGPsGVDLSDFlsooRGIDRPAERSFpSIpNWLMRGFRIDP ....MSsKlhFQlhHGpGNlRaGPsGVDLSDFlhTu+GIDRPAERohpSIhuWLhRGhRlD.................. 0 0 0 2 +9155 PF09323 DUF1980 Domain of unknown function (DUF1980) Sammut SJ anon Pfam-B_3062 (release 20.0) Family Members of this family are found in a set of prokaryotic hypothetical proteins. Their exact function, has not, as yet, been defined. 25.60 25.60 25.60 25.90 25.50 25.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.99 0.71 -4.54 29 841 2009-01-15 18:05:59 2006-08-31 11:00:14 5 1 748 0 102 467 0 173.80 37 63.92 CHANGED hlRhLILhGashlhhaLhloGclspaIss+ashhshhuhllhhILullQlhhhhpshcpp..................cpH..t....t+hhshhlhllPllhGhh....hPssoLD......SshsstKGhphsh........ts....tscptopsphl+s-..............stt.htp.hpt.h.pphhtpppIpls--sahcsh-tI..hp.s.scahG+ ..............MlRhllLhGah.LhhaL.plSGcLspYINh+YsYLuhlohll.hlLA...l..VQlh....l....hh+...phc.p.............................HsHh.ps+........t+hhuhsLLslPlllGlh...........FPoloLD..................................Ssh.V...sAKGapFPl..........................utts....ps.s..tts..p..sQaL+PD.............................TS.Ya.scs....sYcctM.ppth.cca..hsp..ssIplssENYhcsMEhI.YsYP.s-FtGK........................................ 0 30 70 85 +9156 PF09324 DUF1981 Domain of unknown function (DUF1981) Sammut SJ anon Pfam-B_3041 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in various plant and yeast protein transport proteins. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.71 0.72 -4.46 41 681 2009-01-15 18:05:59 2006-08-31 11:31:41 5 13 272 0 470 659 6 83.80 34 4.95 CHANGED h+FLcp-ELspap.FQ+-FLpPFEhlh.....pps..psh-l+-hlLpCltphlps+s..spl+SGW+slFsllshuuppps-pllphuap....llph.l ...............................+Fh-ctELspFp.FQc-FL+P..FEplh......pps..pssslR-h...llcCltQh..lpupu..ssI.+SG.W+slF...uVhptAus...-....p....p.csllphAFppht................................................... 1 163 245 367 +9157 PF09325 Vps5 Vps5 C terminal like Mistry J, Wood V anon Pfam-B_6206 (release 20.0) Domain Vps5 is a sorting nexin that functions in membrane trafficking. This is the C terminal dimerisation domain [1]. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.27 0.70 -5.11 22 1147 2012-10-03 12:17:00 2006-08-31 11:48:15 5 17 292 0 720 1151 2 193.30 20 42.46 CHANGED htphhsshtpulst........shKhsEs.....DpaF.-+pphl-sLEppL+pLhcul-slsspRp-Lutshs-hupulstLussE.s..psLSpsLspLu-lpt+lcphhpcputpDhhpLuthlc-YlRhluSlKssFspRhKhapphppsppsLpKK+pphsKhpts.tsp..t-KhpphppElp-hcp+sppscpcFccloppl+cElp.+F-p-+hcDFKsslptaLEutlcsQcEhl-hWEsFhsp ..................................................................................h........................th..p-.............D.hF.p........tt.h.ph.cpplp.p.h.pttsc.ph.spp..+.c.......plutshs..phutshtt.Lu......t.....E...t.p................s..l.......s..p........s.......h.......pplu.......c.h.p.......ph..pp......h......t........p.c.....u.....ps.-.hph........sch...lp.Yhp.ht.......ul+sh.hppR.pth..php.pspptLpct...+tt..t.....ch....................................h.......t.....................................................p...............c...+hp.p....h..................t....cl.t.p..............hp.....tp.p..thp.......pphcpl.s.......p....h...pp..Eh..cap....pp+htsh+p.lhphhp..lphtpp....ht............................................ 0 198 345 556 +9158 PF09326 DUF1982 Domain of unknown function (DUF1982) Sammut SJ anon Pfam-B_3077 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in the C-terminal region of various prokaryotic NADH dehydrogenases. 25.00 25.00 28.00 26.70 20.90 19.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.70 0.72 -3.81 98 630 2009-01-15 18:05:59 2006-08-31 12:44:19 6 11 567 0 299 594 813 49.80 34 7.25 CHANGED Dpltssshts.hsh.h..........splup..ss.F...psslpDFYhTsPIuRASssMAcC .............................Dpltsss.htp.hss...............hssphss..ss.h......ptslcDFYhT.ssIoRASssMAcC.. 0 96 171 232 +9159 PF09327 DUF1983 Domain of unknown function (DUF1983) Sammut SJ anon Pfam-B_3073 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in various bacteriophage host specificity proteins. 20.90 20.90 20.90 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.64 0.72 -4.03 47 1596 2009-01-15 18:05:59 2006-08-31 12:57:54 6 28 644 0 69 1683 37 78.60 46 8.51 CHANGED lpptupuhsshsG....clsAhaslKsps.sssGphhsAGhuluh-.sssusspSpllltADRFull...ss..ssGshtsP......FVlp..sG..plal .......................................................................lpphp+s.hDsNs.........phsAMWul.Klpp.spD.G..phY.l.A.GIGhuhE.sTss..s.........hhSQlLluADRIAhI....sP...ssGNp..pPh......FVuQ..GsQlFM............ 0 7 22 45 +9160 PF09328 Phytochelatin_C DUF1984; Domain of unknown function (DUF1984) Sammut SJ, Bateman A anon Pfam-B_3070 (release 20.0) Family Members of this family of functionally uncharacterised domains are found at the C-terminus of plant phytochelatin synthases. 25.00 25.00 26.20 78.80 21.10 24.70 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.82 0.70 -4.90 14 95 2009-01-15 18:05:59 2006-08-31 13:15:16 5 3 42 0 22 104 0 238.20 45 54.33 CHANGED p+sPulLYTLSC+cESWhuhAKYLhEDVPhLLpScslcslpclLSslhcSLPuNhspFIKWVAEVRRpE-Gs.psLScEEKpRLtlKpcVLpQVp-TcLF+hVschLpp.pt.stp.stsstcDSLs.plAAsVCCQGAslLsGp.hsussthCC+c.Tsh+slcus.GcsssTVlSGsVl..ssssEQuVDhLlPhs.tpssssss.........t.stp..hhHPossDVLTlLLLALPPpTWtsIcDcpLhsElpsLVSp-sLPslLQcEV.LHLR.cQL ...................+.sshLaTLSCKcpsWhuhuKYLhE-VPhLL+ScslssVccll.slhpSLPushspFIKWlsEVRhtE-Gs.ppLSpEEppRLtlKpcVLpQl+pTcLFphlscaLpp........s.ssp--SLs.phAApsCCQGAthLs..Gs..s.SsthCs+c.sshpslpus..ucu.ssVloGpVl..ssGsEQslDhLVPpp.sps.s.ssss.........t.s.p..shaPossDlLTVLLLALPPpTWpsIpDpplhtEhppLlSpcpLPs.LQpEV.hHLpcQ....... 0 3 14 17 +9161 PF09329 zf-primase Primase zinc finger Mistrj J, Wood V anon Pfam-B_9710 (release 20.0) Domain This zinc finger is found in yeast Mcm10 proteins and DnaG-type primases [1]. 21.80 21.80 21.80 23.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.44 0.72 -4.36 37 283 2009-01-15 18:05:59 2006-08-31 13:27:25 6 12 258 4 201 287 3 46.00 37 6.37 CHANGED GpupDhGhCpuh+.+sGphCsshlNt.....pcspaCpaHhphp....h++h.pupR .....GpupDlGhCKuh+..KsGch...CsshVNh.......pcs-aCpaHlptp....h+Kh.putR........... 0 58 101 164 +9162 PF09330 Lact-deh-memb D-lactate dehydrogenase, membrane binding Sammut SJ anon pdb_1f0x Domain Members of this family are predominantly found in prokaryotic D-lactate dehydrogenase, forming the cap-membrane-binding domain, which consists of a large seven-stranded antiparallel beta-sheet flanked on both sides by alpha-helices. They allow for membrane association [1]. 25.00 25.00 70.10 61.00 20.70 20.00 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.81 0.70 -5.03 19 896 2012-10-02 00:48:38 2006-08-31 13:47:55 6 2 874 2 110 550 214 277.90 68 51.68 CHANGED VFYIGTNssscLTclRRclLssFcsLPluGEYhHRDhFDIA-KYGKDTFlhIcphGTcpLP+hFulKuplDshhp+lsalscaloD+lMQhhuclhPsHLPcRMp-aR-+YEHHLlLKMuscGlpEA+paLcpaFucus..GsaFECos-EGp+AaLHRFAAAGAAIRYcslHpscVEDIlALDIALRRNDc-WhEpLPtEIsspllcKLYYGHFhCHVFHQDYIlKKGsDsptlc+cMLcLLDpRGAcYPAEHNVGHLYcAcssLppFY+cLDPTNoFNPGIGKTSKpKpW ..................VFYIGTNpPpVLT-IRRHI.LusFcsLPVAGEYMHRDIYDIAEcYGKDTFLMIDKLGT.D+..hPhFFsLKGRsDAhL-KVpF.htsHFTDRsMQphu+LFPuHLP.RM+saRDKYEHHLlLKMuG.DG.......VuEA+paL............t-a..F............ppA.....-......Gs....FFsCTsEEGsK...AF.LHRFAA..AGAAIRY.pA.VHuDEV.EDILALDIALRRNDp-WaE+LPPEIDspLlHKLYYGHFMCaVFHQDYIVKKGVDs+A.............LKcpMLcLLppRGAQYPAEHNVGHLYcA.poLp+FYRc.DPTNShNPGIGKTSK+KpW.................. 0 20 49 82 +9163 PF09331 DUF1985 Domain of unknown function (DUF1985) Sammut SJ anon Pfam-B_3094 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in a set of Arabidopsis thaliana hypothetical proteins. 29.80 29.80 30.10 30.50 29.50 29.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.72 0.71 -4.38 50 173 2009-01-15 18:05:59 2006-08-31 14:19:20 6 13 9 0 34 177 0 119.50 24 17.19 CHANGED LLsRpLhscKcpE.hWhlhuGpPlRFSlcEFtllTGLsCtphPpphcsppttph........shhpph....h...t.ppshsltclhphLppt.......hhsstp+lpluhlhhlsullhsppphst....lp..hhchsp-lchhhsaPWG+h.uFpt.hhpsl .........................................hh.pph.hpp..p.hWhhhss.PlRaulpEathlTGL.Ct.hspp.p....th.th...........thhtph....h...h.pps.hshtcl.phL.tt.......thps.tp+lphuhlhllpullhsppptst....ls..hlchspslchhhpaPWGchuFphhhpt..................................................................... 0 12 13 13 +9164 PF09332 Mcm10 Mcm10 replication factor Mistry J, Wood V anon manual Domain Mcm10 is a eukaryotic DNA replication factor that regulates the stability and chromatin association of DNA polymerase alpha [1]. 20.10 20.10 20.40 22.10 19.30 19.80 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.59 0.70 -5.10 9 127 2009-01-15 18:05:59 2006-08-31 15:11:39 6 8 106 1 85 126 3 292.30 32 37.32 CHANGED holPTsGhhplppc.....tpu.httsGsshpShSAspLLKpQ+ppp.phLthh+ccuEclQK+......hLpSostscssSpsooo.......pushpoPptus-h.pspth....ssoPKLuRuhs.u.Dl.Fhscpss...t.supu.pAtKhAAltKL+A....LtKtsPN.lK+Kpupouc.h.lsptVcpp.......ssuu.....ppus--pEPthKKcR......p.cEhp+ILsAKSpHosllcttEtEhQEcYFssL.+KEphEEKMpshhEhp.C+sVTCppCKYTtFpsu-cClpEpHch+hHDAsKRFF+Cs.CGNRTloLtRLPKppCusCsh.KWERsuMl+EKpG.plGGEsLhsRGEEc.KFLsS .......................................................................................................................................t.....t......p.hss.th.tppp.p....h.h..h.ttp...ttthp........hhtsstt.........s........................s.ph.p....p...t............s.s..hPpLupu..t..u.pl.h.hs...tp............s.u.......K..hsAlh+h+s...........ltK.sPN.h...p.p+t..tpstc....t....lp.ptscpp................t..s............pcp.c.t.++p+.......................pp-ph...pcl...l..p.A+SpHssllcpsEt-hpEcYFp.L.+KEpMEEKMpshpEhp.C+sVpCcp..C..p.YTtFpss-pChpcpHs.l+.h+DulKRFF+Cs..tCGpR..ol.ol.p.+.l.Pp.p...pCps..Cs.......+.WERsuM..........h+...E.....+p......s..hhstEtL..RG-Ec.pFlsS.................................................... 1 30 39 66 +9165 PF09333 ATG_C ATG C terminal domain Mistry J, Wood V anon Pfam-B_61662 (release 20.0) Family ATG2 (also known as Apg2) is a peripheral membrane protein. It functions in both cytoplasm to vacuole targeting and autophagy [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.01 0.72 -3.84 36 689 2009-01-15 18:05:59 2006-09-01 09:08:59 6 33 269 0 463 699 7 95.20 27 4.20 CHANGED +thSlYusQ...PtslppGlppAYpoLpc..............................slthstpslhpsstch..hc....spuspuA..........stsVl+tsPssllRPhIGuT-AlupsLhGlpNplDPppppc.c-KYK ..................................................................p...phhspp....Ptslt-Glspuhpultc..............................ulhsu...hsulhppPhcs....tc...........pcGssG..h..........................hpGVs+ulss.sls+PhhGss-hsSpshtGlcN.......ph.................................. 0 164 244 364 +9166 PF09334 tRNA-synt_1g tRNA synthetases class I (M) Bateman A anon Pfam-B_107 (release 20.0) Family This family includes methionyl tRNA synthetases. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.33 0.70 -5.81 39 7775 2012-10-02 18:00:56 2006-09-01 16:59:03 6 66 5004 42 2168 33465 18847 310.30 30 47.26 CHANGED hhlTosl.YsNussHlGHhh.ohlsADlhsRapRlp.Gp-.VhFlsGoDEHGp.IphpApcpGloPpchs-c.hpptapchacp.hsIsaDtFhRTTsppHpchspchapcLhcpGhIactphpthYsssscp..Fls-+.................upCP................tCG+plc.................hh+.pcpaFFcLscapct....................Lhcal.cps.t....psphpphs.sal..cpGLc-huIo...pchsW.GI.l.....P.....ssps+slYVWhDAhlsYloust.hs....................c.pa....pcaW.....p.....t.hHhIGKDIlhFHslaWPAhLhu..........tshp.......lPpplhupualsh-Gp.KhSKSpGsslps.pchlcp.as..sDhlRYaLhp.psshspDscFShcchhp+lNs-LssslGNhlsRsh ........................................................................................................................................................hlTsslsYss.G........t........l.Hl.GHhh..shltu.Dlhu.R.a.........p.R....h....p......G............h.........s..........V.....h..........a.........l...............s.........G..........s.................D........t..H.........G.............t...........................l......p..........t...............t.............A.................t............p..................t..................G......................h................s....................P.................p.................p.................h.....................s.................c.................c................h.................t................t.................p.................h............p............p........................h................p............t..............h................s..................l.................S............a...............D............p.............a.............h.........p....................T.............o....s......s......p......a..........t....c.h....s..p........t.........l.........a....................p...cL...h....c...p...G..............Ih...p.t.p.h.p.....t...h.a..s..s..p..p...p...p....a...h.s-p......................................l.t.t...h.................................................sut......h..p...........................................................................h..p.....p..cpaFhc..l..s..t.......h..t..p.h........................L..t.ah....p.......t.....t.................p....h.......ph....h....pa.h...............p.....s......L.p......t...hslo........ps..h........a.Gh...l.........P......................................tt.t+h..h..Y.V..........W........h......D........A...........h.....s.......Y...h..s.....sht..t...............................................................a.............pp.a...W.................................................................h.p.....h..h.......GK.........D......l...................h....F...........H...s....l...h....a....P...h..h.L.u..............................th..................................h.P.....p.p.....l.....h.s..p.t.....a.....h.....h.....h........p......s......t......K.hS.K...........S.....h..Gs..h.l..s................h...l..........p...............p......h.s................-......hR.....Y..a.hh......t...................s........h.....p.D.h.sh......s......p......t..h..h..t...p..h.N......-..lssthsNhhsRs.h.................................................................................................................................................................................................. 1 764 1399 1855 +9167 PF09335 SNARE_assoc SNARE associated Golgi protein Mistry J, Wood V anon manual Family This is a family of SNARE associated Golgi proteins. The yeast member of this family (Swiss:P36164) localises with the t-SNARE Tlg2 [1]. 32.60 32.60 32.60 32.60 32.50 32.50 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.50 0.71 -3.91 83 15761 2009-01-15 18:05:59 2006-09-01 17:20:35 6 40 4479 0 3638 10044 3069 125.40 20 54.58 CHANGED lP....sthlh..hsuGhhh........uhhh.....uhlhshlGshlGshlsahlu+hht.............pthhpphhpppthpt...hpphh.........pc..huh.hh............lhlhRhlPhlstshls..hhuGh.splsh.tpFhhsshlGthshshlhshhGth .................................................................................lP..uphlh......hh.u.G..h.l.h......................shhh.......................shlhshl.uu...h...lGsh....lsa...h.l.G.Rh.hG.............................phhhp...h...h..h..p..p..p..t.....h...p..p.............s..pph.h......................p+...aG...h..hs....................................lll.s.R..a.lP..h.l...psh..ls...hs.AGh..s.p.hs......h.tp.FhhhshlGsh.hhshlhshhG..h.......................................... 0 1172 2266 3020 +9168 PF09336 Vps4_C Vps4 C terminal oligomerisation domain Mistry J, Wood V anon Pfam-B_8681 (release 20.0) Domain This domain is found at the C terminal of ATPase proteins involved in vacuolar sorting. It forms an alpha helix structure and is required for oligomerisation [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.87 0.72 -4.08 26 1201 2009-01-15 18:05:59 2006-09-04 10:04:39 5 14 381 41 789 1144 12 54.50 29 9.96 CHANGED hlTPCSPGDPs.A.lEMoWh-ls....uccLhEP.slThpDFlKAlpss+PTVspcDlc+apcFTc-FG .........................h............................p..l.......tpch......P..slshpDFpcAlpps..+sSVSppDlc+aEcasp-FG................. 0 272 408 623 +9169 PF09337 zf-H2C2 His(2)-Cys(2) zinc finger Mistry J, Wood V anon manual Domain This domain binds to histone upstream activating sequence (UAS) elements that are found in histone gene promoters [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.05 0.72 -4.54 11 664 2009-01-15 18:05:59 2006-09-04 13:23:51 5 42 181 0 194 519 2 39.20 51 10.82 CHANGED Hhpp.HuGINKoTotIApKYHWhRIKETVucVI+sCscCK ............HslA.HoGp-u.Tah..KloSKYaWPNlRKDVlKVIRQCcQC.h.. 0 88 124 161 +9170 PF09338 Gly_reductase Glycine/sarcosine/betaine reductase component B subunits Mistry J anon Pfam-B_25756 (release 20.0) Family This is a family of glycine reductase, sarcosine reductase and betaine reductases. These enzymes catalyse the following reactions. sarcosine reductase: Acetyl phosphate + methylamine + thioredoxin disulphide = N-methylglycine + phosphate + thioredoxin Acetyl phosphate + NH(3) + thioredoxin disulphide = glycine + phosphate + thioredoxin. betaine reductase: Acetyl phosphate + trimethylamine + thioredoxin disulphide = N,N,N-trimethylglycine + phosphate + thioredoxin [1]. 20.00 20.00 20.30 20.70 19.80 19.60 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.32 0.70 -6.30 16 404 2009-01-15 18:05:59 2006-09-06 15:52:04 6 3 133 0 71 362 11 324.90 29 81.50 CHANGED M+LELGpIaIKDlpFGcpTcVcsGVLhlNKcEllchltp.D-+Ipol-l-IA+PGESlRIhPVKDVIEPRVKVEGsGslFPGhhuKV-.TVGpGRTHlLKGsAVV............ToGcIVG.FQEGIIDMSGsGAcYTPFS+hhNlVlls-sh-Glppa-HEp...AlRhsGLKAAtYLGEAu+slpPDElcsYETpPlhEpspcYPsLPKVuYVYMLQSQ..GLLHDTYVYGVDAKpIlPTllYPTEVMDGAIlSGNCVSACDKNsTYlH.NNPlIc-LYc+HGK-lNFlGVIlT.NENVhLADKERSSsasAKLschLGhDGsIlSEEGFGNPDsDLlMNC+KlEpcGIKTVllTDEaAGRDGsSQS.LADusscAsAVVosGNANpllhLPsM-KlIGcl..phl-slAGGasG......SL+tDGSI-sElQAIsGATsElGFspLoA+sY .....................................................................................Lphtphhlpclpaup..pop.l.c.sssLhl.pc.p.h.th..h.t.....cphl.pphclcl.hpPs-.t.h.p.h..s.hh-slps.ts+hpGt......................lGpGhThsL.pG.shVh............hs..G........p............hu....p..-Gh.l-....t.h...h..s..t..s.....h....s.....t........plhl.hp........th..p.....ht..pt....h.hhu...hchs..a.l.s....p...hc.t....h...p...s..-..p.......h.ph..hhp.t.t....hsth.pVshlh.h.sQ....Ghha-s.........hhh.........G....h...p......hh.....th...........l.....Ps.hh.PpElhDGul.huh...shlusssKpsohpahppPll.cplhpccsp-lshhGVlhs.sps.h.s-K.hsuphsuthsc.hhssDGsllop.EGaG.NschDhh.shcplttpGl.sVhlo.......................................................................................................................................................................................................................................................................... 1 42 66 68 +9171 PF09339 HTH_IclR IclR helix-turn-helix domain Bateman A anon Pfam-B_70 (release 18.0) Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -7.78 0.72 -4.37 93 11639 2012-10-04 14:01:12 2006-09-12 11:35:16 5 84 2544 26 3363 13801 1334 51.10 26 19.10 CHANGED ulsRulplLchluppsps........h..sls-lAptsGls+uosaRlLpoLhptGalcpcs .............................................slt+ultlLc..h...l.s..p.p.s.ss.....................h..sls-.lupp...s.....G..l.s.+.o.TsaRlLpo.LtptGalpp................ 0 800 1991 2786 +9172 PF09340 NuA4 Histone acetyltransferase subunit NuA4 Mistry J, Wood V anon Pfam-B_29415 (release 20.0) Family The NuA4 histone acetyltransferase (HAT) multisubunit complex is responsible for acetylation of histone H4 and H2A N-terminal tails in yeast [1]. NuA4 complexes are highly conserved in eukaryotes and play primary roles in transcription, cellular response to DNA damage, and cell cycle control [2]. 20.90 20.90 21.20 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.82 0.72 -4.34 29 357 2009-01-15 18:05:59 2006-09-12 13:53:42 5 8 275 0 236 335 0 80.70 39 40.29 CHANGED +pcLp.phlp+KppL-ppLssLEcpIYchEspYLp.....tsoshGNIl+.GF-sa..hpsss................ttsp++pttap-sDRlFShSSso.h ...................tcLt.phlp++pplp.cp...LssLEcp......IYshEsuYL-................pophhGNII.+.Ga..DpY...lpspp.......................................sutss+R..pppap-s-RlFSpSSlT................................... 0 85 135 199 +9173 PF09341 Pcc1 Transcription factor Pcc1 Mistry J, Wood V anon manual Family Pcc1 is a transcription factor that functions in regulating genes involved in cell cycle progression and polarised growth [1]. 20.90 20.90 21.30 21.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.31 0.72 -3.94 57 386 2009-01-15 18:05:59 2006-09-12 13:56:42 5 11 320 13 261 415 29 76.60 23 62.38 CHANGED hphslplsFtotc.AplshpuLps-..sphptspsphphshps.s..............hLh..lphpA........p-sphLRsulNoalc.lplshcshpt ...............................thslplsaso.tc.A..plshpuLtsD....tphp.stsppph....shss.s..................................................................hLh..lphpA............pcs..+hLRsulsualc.lplshcshp...................................... 0 72 146 210 +9174 PF09342 DUF1986 Domain of unknown function (DUF1986) Mistry J, Rawlings ND anon Pfam-B_99782 (release 20.0) Domain This domain is found in serine proteases and is predicted to contain disulphide bonds (see Swiss:P98159). 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.99 0.70 -5.11 3 34 2012-10-02 13:45:52 2006-09-12 14:19:54 6 16 29 0 24 2142 2 201.90 31 10.89 CHANGED N++sElV-sFc.....................--hcWPWlAcVYl-GshhCoGVLIDtSWVlVScSCLpslsLcHpYlSVVLGGuKTh+Sl.+GPYEQIhRVDCa+slP+ScllLLHLcoPloFS+HVLPTFVP-opNcNpocu.cCloVGQDD.hGRsKTluIaL.cNsTNCsScpl.CYK+cpKQP.h....lhN..stsMsSpHE.stlISCaTPpthsslscFT.............sss..sLKN.....sScthassS.pGVlV..C+sSRoGWaPsuhapapRGsC.GFccls.GVRoLE-uY+clQ-llHK .............................................................................................................p.h..h.WPW...L.A.....c...l......a...........s......s.....G......c.....h....h.....Ch.....GlLl-.pWlLsp.puC.......l........p........s......l.......s.......h........p........s.....p......Y...l.......o........s....l.....L......G......t......u....+...........o....h......h.............h........c.........u.......s......a......p......Q.......I...........h..........V.......D.........p........h........c..................l.....................c.............o.............p........l......LL.....HLc.........p.........s.....p....a....o.+..a..V.Ph..hl...c....................t..p..ts.....C......l...u....l..h....p..sp..............p..........o..................t...l..h.....h.............s........p....s....C.....s...........Capht.................................................................................................................................................................................................................................................................. 0 8 11 21 +9175 PF09343 DUF2460 CHP2217; Conserved hypothetical protein 2217 (DUF2460) Bateman A anon PSI2 target BIG_186 Family This model represents a family of conserved hypothetical proteins. It is usually (but not always) found in apparent phage-derived regions of bacterial chromosomes. 21.90 21.90 21.90 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.96 59 277 2009-01-15 18:05:59 2006-11-08 13:52:02 5 4 254 0 88 226 66 193.60 38 84.33 CHANGED H-lRFPh..sluhGusGGPERpT-lVsLusGtEcRNssWucuRRRYDAGhGl....RSh--lpsLlAFFEARpGphaGFRa+DhsDapSs.sstsss.tDQhlGh...GDGssspFQLsKsYsu......GtpsYtRsIsKPVsGoV+l.ulsuschttu....tatlDhsoGhloFs..psPssGstloAGFcFDVPVRFDTDplphSlsuFpAGpsP....slPllElRh ................................................H-lpFPh...sluhGusGGP-hpTclVshsoGhEpRNs.WupuRR+a-suhul...........+Shc-lptlluFFEAR+GphauFRa+D.hDapSs..t.................................................................................s.hDQslGp...GDGssspFQLhKsYss..........s.tsYsRsIs+PVsG.oVhl....uVsGschstu....taslDssoG.....lVT..Fs....tsPstssslTAGF..tFDVPVRFDoDplphsls.....sapuGpls.....slPllEl+........................... 0 20 59 68 +9176 PF09344 Cas_CT1975 CT1975-like protein Bateman A anon PSI2 target BIG_184 Family CRISPR is a term for Clustered, Regularly Interspaced Short Palidromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family is represented by CT1975 of Chlorobium tepidum. 25.00 25.00 29.30 29.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.06 0.70 -5.08 25 679 2009-01-15 18:05:59 2006-11-08 14:22:11 5 3 640 0 127 467 19 337.10 44 96.95 CHANGED lplHlLpoassSNLNRDDoGtPKoAhaGGspRsRVSSQuhKRAhRpuhp.tthhst....hG.lRT++lsc..lhctltp................tuh......ctp...Atphscphhsshu.....Klpptppttt...........................coctLhalutpElstluphspcttpsspsspttth......pccpp................................ulDIALFGRMlAs...ss....phNV-AAsQVAHAlosHtlssEsDaFTAVDDLtp....--..sGAuahGssEFsSusFYRYsslDlcpLhcNLGGs.......p.........-hAtcslpAhlcAhspssPoGKQNoaAupshsshlhhph.tpspPhSLAsAFcpPV.......pspcs...hhpsulptLsshhpphcpsYGtt....pshhthsshssptt......s.psolccLlshlt ...................................IplHlLpuaPsuNLNRDDTGuPKTsl.hGGssRlRVSSQSLKRAhRsSthacps..lus........lGlRotRlucc.sAph.L..h-......................pG..l..-pc..cAhchut..plsshhG.............KsKpc+.c..cc..............................................-TcpLlalSssEh-sltsLApphsp-cc.s.sp.ccch.t.....h++cph..................................................................................AVDIAhFGRMLAs...ps.......chNV-AAsQVAHAhulpcs.h.lEsDaFTAVDDLp...........pss.....ED...uGAGHlGpstFuSAlFYpYhsIsh-hLlcN..Luus..............c..............tLAspslcAFs-AhlpssPTGKQNSFAu.+s.hAsaslsph.t.s-.QPhSLAuAF.cP..l......susc.......lpsulp+lsshtcshsplYspp........spssshsl.hspp...ss........................pssh................................. 0 38 93 111 +9177 PF09345 DUF1987 Domain of unknown function (DUF1987) Bateman A anon PSI2 target BIG_79 Domain This family of proteins are functionally uncharacterised. 21.30 21.30 21.40 22.90 21.20 21.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.29 0.72 -4.20 56 185 2009-01-15 18:05:59 2006-11-08 14:48:30 5 3 128 0 66 195 81 98.00 35 74.40 CHANGED PpVpFcspsulhclpG-SYsEssh...sFYpPllsWLppYlpp..spptlphsh.cLtYaNTSSo+thhplhchL-ch.tppsspVplsWaap..p-DcchhEhGE-a .......PtlpaDhpsshLplpG-SYPENuh...tFatPllpalcpYLsp............spp..........s..........lplcl.cLhYhNoSSsKslhslh-hL-ps.tppGppVslpWaY-..p-D-phtEhGE-F................ 0 29 47 57 +9178 PF09346 SMI1_KNR4 DUF1988; SMI1 / KNR4 family (SUKH-1) Bateman A anon PSI2 target BIG_91 Domain Proteins in this family are involved in the regulation of 1,3-beta-glucan synthase activity and cell-wall formation [1][2]. Genome contextual information showed that SMI1 are primary immunity proteins in bacterial toxin systems [3]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.44 0.71 -4.13 178 1863 2012-10-01 20:46:44 2006-11-08 15:03:16 5 30 934 3 480 2059 25 133.60 15 53.86 CHANGED ssoppplp...p.hEpp..luh.pLPps.a+phlp.........................................tthhhthtshshhthhhtthph..tthtphhphpphhpph..................................................................................................................................h.pphlshus...sssushhslchsstt................splhhhspc........tsp...hthl..................us..oFsca.......lpp ..................................................................hoppplp....p.hEpp.....hs.h....pLPps.apphhp...............................................s.ss..h..h...h...t...h.t...h...p..h..............t......hh......h..ttph........pthtt...h..hp.h...t..thhpt...................................................................................................................................h.tthl..shup.....sssu....s.h..lsl.-hpsst.t...............splh.h.hsp-......................................tpp.........hhhl.......................................up.....sappal..t................................................................................................................. 0 137 283 391 +9179 PF09347 DUF1989 Domain of unknown function (DUF1989) Bateman A anon PSI2 target BIG_202 Domain This family of proteins are functionally uncharacterised. 20.40 20.40 20.80 20.60 20.20 20.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.68 0.71 -4.80 144 1044 2009-01-15 18:05:59 2006-11-08 15:22:35 5 9 576 7 482 1045 1397 168.80 30 62.00 CHANGED ppplsutsshuhtlcpGphlRlsDlcGsQssDhlhasAcc.hs.....ERhssscThpht...tsha.lssG..shLhSshs..Rshholl.pDo..........sGhHDslsusCsspp.phh.aut..pph........csCp-Nhhtulu......paG...ls.......pcDlssslNhFhsVsls.ssGp.lphtsshSpsGcaVpLcAEhDll ......................p..hlPutsshuhtlptGphlRlhD...lcGs....QssDhhhasAcs..p.....ERhssscThphp....pssalssG..shLaSshs..RshhoIltDo............................sGhHDsluusCssppsphh.aGp...pph...........csCp-Nhhhulu..............caG..........Ls.............................ppDlssslNhFhslsls.s..........cG..........p.....ht....hpssh.opsGcal-lcAEhDll................................................ 0 123 281 393 +9180 PF09348 DUF1990 Domain of unknown function (DUF1990) Bateman A anon PSI2 target BIG_236 Domain This family of proteins are functionally uncharacterised. 20.70 20.70 20.70 20.70 20.60 20.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.87 0.71 -4.60 30 289 2009-01-15 18:05:59 2006-11-08 15:25:40 5 8 240 0 132 262 2 152.20 33 78.55 CHANGED oYsplGuotttth...........ss.....GappsctcshlGpGct.......sF-pAspAlhsWphhctuhlplh.....ssspsstsGssVslp.hthh.......hhhhhsssRVlhll.....-Es.........c+hGFuYGTLsGHsEsGEEpFhlch......s.ssGpVahclpAFSRPAph...hu+luhPls.+hhQ+ths++.hhpul ...................................................................................htsot.t.h.........ss........Gaphhchpt.lGpGpt.......tFcpA..spulhpWthpct...uGlpV..................ssspsstsGssVh....lt.hth.........................ltsPsRVVaVh................--s.........................................shtGFuYGTLsGHs...sGEEpFsVch.......c..tsusVahplhuFSRPAsh...hs+hutPls.ph.hQ+hhspR.hhcuh................................. 0 55 101 123 +9181 PF09349 OHCU_decarbox DUF1991; OHCU decarboxylase Bateman A, Percudani R anon PSI2 target BIG_237 Domain The proteins in this family are OHCU decarboxylase - enzymes of the purine catabolism that catalyse the conversion of OHCU into S(+)-allantoin [1]. This is the third step of the conversion of uric acid (a purine derivative) to allantoin. Step one is catalysed by urate oxidase (Pfam:PF01014) and step two is catalysed by HIUases (Pfam:PF00576). 20.50 20.50 20.60 20.50 19.60 20.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.00 0.71 -3.94 139 1098 2009-01-15 18:05:59 2006-11-08 15:27:51 5 15 925 26 464 1030 527 154.20 29 57.32 CHANGED hs...shspspF.stthsslaEpo....sWluctshs.......tp.....sasoh.ssLhsuhtphlpsss....................pppphsllpAHP-Luu+hh.........stpLospSssEQusAGlsph................ssp-hpc.hppLNssYcp+F.GFPFllsV+.....G.ps+.pp....Ilsshp............................pRl.pNs...cpEhppAhpplp+IAthRLpc ............................sthst.pph.htthsslhEps....sWlschAhs...............t+.....Pasoh.ssL...hsshpp.h.h..p.shs.......................pspthsllpAHPcLus+h................ttph..o..s.....pS.s..pEQ..u..uuGl.sp.h................sspphpp..hppLNttYc.p+F.GasFl........l....slc......G....p.s+...pp.....ILsshc............................pRl..pNs......ppEhppAhpplt+IAthRLp........................................................... 0 124 273 377 +9182 PF09350 DUF1992 Domain of unknown function (DUF1992) Bateman A anon PSI2 target BIG_207 Domain This family of proteins are functionally uncharacterised. 20.60 20.60 20.70 20.70 19.20 20.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.13 0.72 -4.15 74 1227 2009-01-15 18:05:59 2006-11-08 15:29:28 5 11 1099 0 334 771 12 84.10 39 45.09 CHANGED lsEcpIpcAhpc..G-F-s...LsG.tGKPLs...hccs..shhs.phphth+llpssGhlP.tlpLp+El.............................tp....lpchlsph .................hAEc+It-Att+....G-FDN...LsG.pGcPL.....L-Ds...Sal..Ps-lRhuYRl....LKNAGh.lPPplE.p+Eh...........................hp.....Lh-hLp..thpp.......................................................................................................... 1 97 193 267 +9183 PF09351 DUF1993 Domain of unknown function (DUF1993) Bateman A anon PSI2 target BIG_238 Domain This family of proteins are functionally uncharacterised. 20.40 20.40 21.50 22.30 20.10 19.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.81 0.71 -4.31 60 472 2012-10-02 14:44:17 2006-11-08 15:31:17 5 8 351 6 234 454 160 160.20 38 87.75 CHANGED hYssoVPs.ahp.....hLssLsslLcKApsaApspslcsss.LlsuRLhPDMhPLstQVphAs-tA+tssARLu....Gh.....-sP...........shs.DsEso.Fs-LpARIucslsalpulssspl..-usps+slsh......hsspphshsG.p.sYlhsauLPNFYFHloTAYuILR+pGVtlGKtDYlG .......................hYpholPshhp.hLpsLsslLsKA.pu..a.Apspt.hc..sss.llsuRLhP..D..............Mh......PLstQVphA.sDtA+tss.A..R.Ls....Gh....-sP................phs..DsE..sT.as-LpuRIscTlsaLps..l..s...s.ppl.-us....ps..+..p.l.sl.................hsstshph.s.G.p.sYlhsauLPNFaFHlTTAYsILRHpGVtlGKhDYlG.......... 0 39 113 189 +9185 PF09353 DUF1995 Domain of unknown function (DUF1995) Bateman A anon PSI2 target BIG_247 Domain This family of proteins are functionally uncharacterised. 26.00 26.00 26.50 26.50 25.40 25.90 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.50 0.70 -4.71 39 273 2009-01-15 18:05:59 2006-11-08 16:33:42 5 9 102 0 174 269 140 221.80 21 67.59 CHANGED L.PssLppA..........hpputpAltsALt..sspsRhp....l-lpFs..sL................plhsluhphhpt...Ltp......ts.pshhllasDuGuuALApR-h.sshs......plhshsphh.st.ts.................t.........chhlhVsPpsh-....l-plEtlscthss.............psllhlNs+L-...Dsu.lG..lG.su.Rph...RcpFluoapssYaLcPL..ps....................GALh+s.aP......ssWplapps......s..ssYphlsphppRPss-plsth ..................................................................................Ppshpph..........htputpAhttAlp......tttt+h.....l-lph.P.....tL.....................................................p.hpluhthhpt...htp...............ps..pthtllaP-s....uts..shAppph.tshs..................hplsslssht..h.t..sts......................................................................s.....chhlhls...Psshp.......lsplcths.pthss...........................+Pll.lh.NscLc......shp.su....................hu...hss.+.ph............+pp..Fl..ssa..ps.....sYhl+sl..ss.....................GslhRs.YP........s.Wplhhcp.........s.spYt..hl.tp..tp.+.Pshpplp.h.................................... 0 67 131 161 +9186 PF09354 HNF_C HNF3 C-terminal domain Bateman A anon PSI2 target BIG_367 Domain This presumed domain is found in the C-terminal region of Hepatocyte Nuclear Factor 3 alpha and beta chains. Its specific function is uncertain. The N-terminal region of this presumed domain contains an EH1 (engrailed homology 1) motif, that is characterised by the FxIxxIL sequence [1]. 20.80 20.80 21.10 21.10 20.70 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.81 0.72 -2.83 26 211 2009-01-15 18:05:59 2006-11-09 09:21:50 5 5 86 0 97 189 0 62.20 43 14.86 CHANGED sHPFSIsNLM..Sspp..............pKh.DlK.sY-th..pY.uuY..............ssh.shshs........psshp.ssshsssss..........YYQ ..NHPFSINNLM..SsEpp.............pKh.DlK.sY-psh...pY..suY..............sus.sss.s.......ssctshc..sss....ss..............YYp............................................................................ 0 16 25 52 +9187 PF09355 Phage_Gp19 Phage protein Gp19/Gp15/Gp42 Bateman A anon PSI2 target BIG_98 Domain This family of proteins are functionally uncharacterised. They are found in a variety of bacteriophage. 21.10 21.10 21.30 23.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.49 0.71 -4.25 8 161 2009-01-15 18:05:59 2006-11-09 09:41:09 5 1 154 0 13 124 2 113.30 32 84.71 CHANGED LhRsLcsDEpcRApALLcsVscplRhEhs+sG+DL-shlstcPsYhtsVhpuVsl-lVARslhsusc.EPhuphSEoshsYShSuoYhls......uGGLhIcDSELcpLGL+K.............pRhGslshYGhs ........hhRsLss-.EpspspshLpsspchlRpchs....-LDt.h.....sstcs.hhtsV......lcl.Acslhp..hsc....sP...u...hoET..sGsYoaphoatls......sGsLhIpcpEhcpL.....Glp+.............p.Rhu.h......h.................. 1 1 5 9 +9188 PF09356 Phage_BR0599 Phage conserved hypothetical protein BR0599 Bateman A anon PSI2 target BIG_187 Family This entry describes a family of proteins found almost exclusively in phage or in prophage regions of bacterial genomes, including the phage-like Rhodobacter capsulatus gene transfer agent, which packages DNA. An apparent exception is Wolbachia pipientis wMel, a bacterial endosymbiont of the fruit fly, which has several candidate phage-related genes physically separate from obvious prophage regions. 20.40 20.40 23.90 22.50 19.70 16.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.87 0.72 -3.84 73 343 2009-01-15 18:05:59 2006-11-09 09:42:31 5 5 305 0 99 303 38 76.40 37 28.73 CHANGED suaFstGplshhsGsssGhsttl+pcps.......stlpLhpshstslssGDtlplhAGCDK...phsTC+sKFsN..hlNF+GFPalPGpD ............................saFstGhltahsGs.stuhthtlttcts..................stlpLhtsh..slss....G-thplh...sGCDK...ph.sTCcsKFsN..hlNFRGFPalPupD..... 0 20 64 76 +9189 PF09357 RteC RteC protein Bateman A anon PSI2 target BIG_10 Family Human colonic Bacteroides species harbor a family of large conjugative transposons, called tetracycline resistance (Tcr) elements. Activities of these elements are enhanced by pregrowth of bacteria in medium containing tetracycline, indicating that at least some Tcr element genes are regulated by tetracycline. An insertional disruption in the rteC gene abolished self-transfer of the Tcr element to Bacteroides recipients, indicating that the gene was essential for self-transfer [1]. 20.60 20.60 20.70 20.90 18.50 20.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.38 0.70 -4.49 17 391 2009-01-15 18:05:59 2006-11-09 09:46:05 5 2 123 0 52 328 12 175.30 27 80.01 CHANGED pFFK.hKP.hhu+LlaasclaphEhppPpGshcstppahppclpcLpphhpp...shsFhpYh+sttshhDcpYFsRsphclt..ssphhhppD.pFoTu......aDhhsApllAs-hl.hahscclcthtpt........tt...tpslpWTusKhsLlELIYALpuptslNsGphsIKclushhpplFsl-L.tchY+oYh-lKpRKp.sRTtFLccLp-sL.p+Mpc-D ..............................................................................................................................hhh..phhth..ths...t..t.p...thh.tph.tlp....t......hhtYhc.t.p.hs...h.p......tp.ph......h.hp.s..Fsp.......hp.hhut...s..hh.....tph.............................lpWTust..hsLlELlYuL......sts..s..l...s...s..G....p...h.slpcluthhpplFslcl..sc..hY+hahc.l+pRKt.sRThFLDpLp-pL.c+M.c............... 0 19 42 52 +9190 PF09358 UBA_e1_C Ubiquitin-activating enzyme e1 C-terminal domain Bateman A anon PSI2 target BIG_346 Domain This presumed domain found at the C-terminus of Ubiquitin-activating enzyme e1 proteins is functionally uncharacterised. 29.20 29.20 29.40 30.30 29.00 29.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.46 0.71 -3.96 57 525 2009-01-15 18:05:59 2006-11-09 09:54:28 5 25 308 2 345 522 14 123.90 36 12.42 CHANGED a+NuFlNLALPhhshoEPlsssctph.....pchca...............TlWDRaclpt......s........hTLpchlcahcpch.sLclshlotGs.shLY..ssa.......tcpp-RlshplscLlcplscp......l.stpchlslplssp.D.psspDl.-lPh..l ......YKNuFlNLALPaFuF....oEPlss.s+tca.......pshpa..........................TlWDRaclpu.........p.......hTLp-hlcahccc......s.L-loMlSpGs..uhLY...usa.......s+hp-RL................shpho-lVcplsKp.................ls.sph+tlll-lssp.....D..pss-..DV..-lPhl..................... 0 128 190 271 +9191 PF09359 VTC VTC domain Bateman A anon PSI2 target BIG_223 Domain This presumed domain is found in the yeast vacuolar transport chaperone proteins VTC2, VTC3 and VTC4. This domain is also found in a variety of bacterial proteins. 20.80 20.80 22.40 20.90 20.40 20.60 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.83 0.70 -5.39 44 868 2009-01-15 18:05:59 2006-11-09 09:58:08 5 23 587 8 395 831 403 232.00 26 51.08 CHANGED sRpshKahlc.cslhpl+ttlh++Lslhsasssp.......................................................s.sIsSlYaDsss.hchYpp+lp+hpsspplRlRWY.up..t.........splFlEpKh+....ppshsst......KpRhplcp+.lpsal..ssp........t.................................................................................................................phpp.pphssElp....hlhpp.pLpPhlpspYpRsAFphss.-splRlolDoslph..........................................................................................................W++h-ls...hp.s...hlttsth..hs..aullElKhps.....................chspWlp-Ll..uchspplspFSKYhpGhAsLa ..................................................R.phKahlp.pphhtl......pthlh..phhsh..assp..................................................................................................t.sIsSlYFD..s..p.hphapp..clp...ph....p..........tpcplRlRhY..st..t.......................splalEhKp+.................................tpshss............KtRh.slp.t.ppspthl.......pGph.................................................................................................................................................htp..pplh.pElth...hh.hpt...pLpPhhhstYpR......pua......t......h..t......-......s...........p....l.R...lolDpslp..................................................................................................................a+phcls.....t.s.....l.tt............hslhElKhps..........................thPtWlpcll......schshtsspFSKaspuht........................................................................ 0 147 259 354 +9192 PF09360 zf-CDGSH Iron-binding zinc finger CDGSH type Bateman A anon PSI2 target BIG_227 Domain The CDGSH-type zinc finger domain binds iron rather than zinc as a redox-active pH-labile 2Fe-2S cluster. The conserved sequence C-X-C-X2-(S/T)-X3-P-X-C-D-G-(S/A/T)-H is a defining feature of this family [1]. The domain is oriented towards the cytoplasm and is tethered to the mitochondrial membrane by a more N-terminal domain found in higher vertebrates, MitoNEET_N, Pfam:PF10660 [2]. The domain forms a uniquely folded homo-dimer and spans the outer mitochondrial membrane, orienting the iron-binding residues towards the cytoplasm [3]. 21.20 21.20 21.20 22.20 20.50 20.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.37 0.72 -4.12 158 1359 2009-01-15 18:05:59 2006-11-09 09:59:42 5 27 737 21 688 1317 995 42.40 34 41.39 CHANGED sppsPhtl............................phpp...pphhhCpCGpSp.spPaCDG.oHp .............................................................t...sPhhl............................p.pp...pphhhCpCtpSp.sh.....PaCDG.SHp... 2 241 412 580 +9193 PF09361 Phasin_2 Phasin protein Bateman A anon PSI2 target BIG_183 Domain This entry describes a group of small proteins found associated with inclusions in bacterial cells. Most associate with polyhydroxyalkanoate (PHA) inclusions, the most common of which consist of polyhydroxybutyrate (PHB). These are designated granule-associate proteins or phasins. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.01 0.72 -3.99 132 960 2009-01-15 18:05:59 2006-11-09 11:03:55 5 1 504 0 408 873 126 99.50 18 65.25 CHANGED hcphtstp.+ssh-s....hhshsstshcGhpclsphshphs+sshppuhstscplhusKssp-hhp.l...Qsphspst..hEphlutu+clt-lsspstp-hhcshcsphs ................................................pph.stt.cpsh-s....hhthsphshcu....h....ppl........sp...........hplphs+sshppshsp.h.cph....hs.sK.s...s...pch...hp.h....Qsph.hppt..h-phhs.pu+cltplsppstp-htc.hptph.................. 0 92 225 298 +9194 PF09362 DUF1996 Domain of unknown function (DUF1996) Bateman A anon PSI2 target BIG_243 Domain This family of proteins are functionally uncharacterised. 19.90 19.90 20.00 20.30 19.30 19.80 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.90 0.70 -4.67 63 716 2009-01-15 18:05:59 2006-11-09 16:06:26 5 20 264 0 462 652 24 214.10 31 52.22 CHANGED DPlVsPGth.ou.HlHplhGusuFshsh.s...h-.hppusCToCshsp.DhSsYWs.....ssLYa.ct.pN.........Goh..chVP.................GuhslYYhtt..............ttslpAFP...Ga.....RMlsGssttp....................t.tts...tptslsapChsstttt.........................spsaPspsCs......sG....lptslhFPoCWD.GpsLD.o.sc..................HpoHhAYPs....th.s.....sGs.CPsuaPl+lPplhaEshWDTstassts...t...ssFshSs..............GDs..oGYuhHuDFl.GW ...............................Dslh.PG...ss.HhHphhG..s................ss........hshs..s......hp.ht.tu..s..sToC......p.pt.DhSsYWs.........Ps.lhh........s............G.p.h..hss..................uthp.sYYhtt.............................ttlpsFPt.....G..h+h.lsGcsptp.................................hsppslsatChssps.t............................t.ths.s..p...Cs......sG....lphp...lhFP......s..CWD.GhsL-.o..sc.............................a+sHhuYss.............t........tGt.CP.s.s.aP.l+lPplhhclhWsss...t.hs...s.ht.............p.h.hhS...................G.......ouashHuDF..htGW............................................................. 1 164 312 412 +9195 PF09363 XFP_C XFP C-terminal domain Wood V, Bateman A anon Wood V Family Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 [1]. 21.20 21.20 21.60 21.80 20.70 21.00 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.18 0.71 -4.89 93 1022 2009-01-15 18:05:59 2006-11-10 17:50:17 5 13 862 16 389 968 40 198.30 45 25.77 CHANGED KQPp.QaLoh-pAhpHCspGlGIW-WAS.sD....putE...PDVVhAsAGDlPThEsLAAlslLRcphP-LKlRhVNVVDLh+Lpsp..s..-HPHGLSDc-F.......DslF.TsDKPVIFAFHGYPhLIH+LsY+RsNppNlHVRGY+EcGohTTPFDMsVhNclDRF+LshDslcRlPp....ltspuutlppphccplhcH+pYlpcaGpDhPElpsWpWs .........KQPp.QaLoh-EAtpcsspGlulW-.WAS..s-...........ps.tE..........PDVVhAsuGDhPThEsLAAlslL+..............cphP-..LKlRhVNVVDLh+Lpss.........p....p+P+GLSDc-F.......DslF..Tp.DKPVlFAaHGYshLI+cLhacRs...N....t...c..NlHV+GYcEc.................GshTTPFDM.tVlNclDRF+Lsh-slc....clst....................htspuuth....hpph....pstlhcH+pYl+cpGp.DhPElpsWpW.............................................. 0 98 230 324 +9196 PF09364 XFP_N XFP N-terminal domain Wood V, Bateman A anon Wood V Family Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 [1]. This family is distantly related to transketolases e.g. Pfam:PF02779. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.05 0.70 -5.91 6 1135 2012-10-02 16:07:47 2006-11-10 17:51:28 5 14 914 16 430 1562 249 358.00 48 46.75 CHANGED hlo-ctLcplDtaWRAANYLulG.IYLp-NPLh...+EPLc.EclKpRLlGHWGTsPGloFlYAHlNRlIpKaDtsMlYlsGPGHGGPAhlusoYL-GoYoEhYPclopDcpGhp+LF+QFSFPGGIsSHhsPETPGSIHEGGELGYuLSHAYGAlhDNP-LIVsCVVGDGEAETGPLATSWHSNKFlNPtpDGAVLPILHLNGYKIuNPTlLuRIs--EL+shFcGhGYcPhaVpu....cDs.shH+hMApshDpsh--IpsIQ+sAR...sssps.RPcWPMllhRTPKGWTGPKa.......lDG..hhsEGoaRAHQVPLuss+cssupLp.LccWhcSY+PEELFDtsGslptslcshsPcG-KRMuuNPpANGGlLpcsL+lPDa+cYuls ...................................................................................................s..s.p.Lptlc...taWRAANYLol..............GQlYLh...c.NPLL...........+cPLp..-clK...s+l...l....GHWGT....s....PG.....NF...lYuHLN....Rl....I...p...c...h..s.......ls.....h..........ha.ltGPGHGGPuh.lu.ss.YL-Go.Yo.E.h.Y.P.c.lo.pDppGhp+LF+pF.....S.F.......P....G.G........l.s.....SHs.u.P.E..TPGSI..H..E..G..GEL....GYuLu....H....A.....a.....G....A.....s.....h.....D.....N.....P....D....L....l...sssllGDGE.AE.T.G.PL..A.sSWa.SNKF.lNP.h....pDG.AVLPILHLNGaKI..u.N..P.T..lL.u.Rho.c.-E.L.pphFcGhGacPh.a.V-G.............cD....tsh.Hpt...h.AtshDpslpcIptIQppAR.................p.s..s...s.s..+P..pWPMIlhRoPK...................GWTGP.+p.......lD.G..p.lEGsaRAHQVPlsss..p...p.s..pHh.chLpp.Wh+SY+P-.ELF....D.......p.......s.G...p....l..h..s-lt.t.....l.sP.p.G.p.p.R.MusNPhsNGG.h.lh....+sLphP.Da+caAl............................................................................ 0 111 259 365 +9197 PF09365 DUF2461 CHP02453; Conserved hypothetical protein (DUF2461) Bateman A anon PSI2 target BIG_248 Domain Members of this family are widely (though sparsely) distributed bacterial proteins, about 230 residues in length. All members have a motif RxxRDxRFxxx[DN]KxxY. The function of this protein family is unknown. 20.80 20.80 21.40 21.20 20.20 18.40 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.21 0.70 -11.38 0.70 -4.94 99 698 2009-09-11 12:04:33 2006-11-14 14:11:37 5 3 644 0 255 606 173 203.20 28 84.13 CHANGED sssshpFLpcLp..tNNsR-WFpspKspYcp.l+pshhshlspltspls.phcsph.s..h..psslaRIaRDl.RFS+DKoPYKsphusth.ptt.........pp....tsu...aYlclps.st..shluuGhap.....PpsptLpplRppIppsssp...hcpllpphphppha..............................t..s-pLKpsP..................+..Gasp..ccPhl-hL+pKsahshpphss.ptlhsschhcplsptapthtPhhca ................................................................t..pshpFLppLp..tsNs+pW...........Fpp............H+s................pY-p.lcpshpshlppltspht.phc.th.t.....tcs.laRIaRDs.RFS+D.KoPYKsphuuhh.pst...............tchs....tsuaYl...c..lpP..st......shlu.....sGhap.....sp.pstLtthRptItcpspt.......appll.pshphptha...................................................................hst-p..LK..psP.....................................................+..Gast..-hshl-hL+pKsassht.phss...chl.h.s.s.c.hhppltchapthpPhhca....................................... 0 79 170 223 +9198 PF09366 DUF1997 Protein of unknown function (DUF1997) Bateman A anon PSI2 target BIG_266 Family This family of proteins are functionally uncharacterised. 20.80 20.80 21.60 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.79 0.71 -4.48 34 280 2009-01-15 18:05:59 2006-11-14 15:55:48 5 4 121 0 159 288 167 151.10 21 67.50 CHANGED sp.........tls.cYLpp.pRhhpshh.cshclptLsc.....spa+h..........pltshphhth.plpPslslclhspsss..............h.hp..shclcGlshl.......scaslshcupl..........................tsppstLpucscLsVslph.PthlphlPcsllcuoGcp...............................................lLptlltplppRlsppLhpD....appahtt .................................................................................................t...lt.cYLpp.tphhpthh..cs..p....hpt...ls.c...........ppach.................................phh.hth.hth.plpPhlslplhspsps.......................h.hp..ph..cl.cG..hsh.......sppas.l..s..hpuplh..p........................................tsstsplpsch.clsV.slpl.P.slphlPp..sllcssGst...............................................lLppllpphpt+hhppl.pDappah..t....................................................................................... 0 37 94 136 +9199 PF09367 CpeS CpeS-like protein Bateman A anon PSI2 target BIG_280 Domain This family, that includes CpeS proteins, is functionally uncharacterised. 20.60 20.60 20.80 21.60 20.20 19.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.92 0.71 -4.36 66 209 2009-01-15 18:05:59 2006-11-14 16:11:35 5 3 80 1 90 233 201 158.60 29 89.90 CHANGED shtpFhppStGcWhSpRosHpL...sh..pcs..EsspSplslp..lssscsplhpls...p..tlsss.thhsuhthpWpup.phpp...pspsssslhhhlP..ts..ppGtLLRstGYsEphss...supaphssDs.sLsLpTcYsss.hspERlWFsssslR.hRsSslpphsGhs.......pso.FsoElRp ....................hhpFhptStGpWhSpRosHpLsh....pcs....EpspSplhlc..Lsssssplhpls...p.hplsss...thhsGhplpWpup.thsp...psppusslhshlP..ps....ppGhLLRspGYs.Ephss.......supYphsp-s.sLsLpTcYpph.hstERhWFsssslR.hRsShlpphsG.s........hsoFsoEhRh...................................... 1 11 50 81 +9200 PF09368 Sas10 Sas10_Utp3_C; Sas10 C-terminal domain Bateman A anon Bateman A Domain Sas10 is an Essential subunit of U3-containing Small Subunit (SSU) processome complex involved in the production of the 18S rRNA and assembly of the small ribosomal subunit. 25.00 25.00 26.60 26.60 23.70 22.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.58 0.72 -3.81 35 308 2009-01-15 18:05:59 2006-11-15 10:51:18 5 4 279 0 228 307 3 76.10 46 13.75 CHANGED hssDuKRtIohpItKN+GLT.+R.pKcp+NPRVKpRtKYcKuhp+h+ophpsh+....pppus...YuGEhoGI+ssls+SlKl ....h.ttsuKRuIoYpItKNKGLTP+R.pKcsRNPRVK+RcKac+App+h+ut.hpsh+.......cppss...YuGEhoGI+sslsKSlKL...... 0 82 128 188 +9201 PF09369 DUF1998 Domain of unknown function (DUF1998) Bateman A anon PSI2 target BIG_296 Family This family of proteins are functionally uncharacterised. They are mainly found in helicase proteins so could be RNA binding. This family includes a probable zinc binding motif at its C-terminus. 24.20 24.20 24.20 24.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.22 0.72 -3.41 165 1448 2009-01-15 18:05:59 2006-11-15 11:31:00 5 16 1110 0 569 1299 129 85.60 28 7.75 CHANGED uhpHALhpthsh..hhthsps-l.shshh.t.........t.ttsslhlYDuhsG..GsGhsppl.....hct..htcllppAhchlpt..........................C..s..C...............p..sGC.sCl ..............................................................shpaAlhthlsh....hh.tss.ps.......-l..shsshhps.........................sthsslhlYDuhsG...GsGhsp.ph........hct.....htcllptuhchlpp...........................C....s...........C..............................................p..sGCssCl................ 0 196 395 515 +9202 PF09370 TIM-br_sig_trns TIM-barrel signal transduction protein Bateman A anon PSI2 target BIG_293 Domain This domain is likely to have a TIM barrel fold related to IGPS. Although this family of proteins are functionally uncharacterised this domain is found as an N-terminal domain of sigma 54 -dependent transcriptional activators (enhancer-binding proteins) suggesting a potential role in signal recognition/receiving and signal transduction. 25.10 25.10 25.20 26.00 24.70 25.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.60 0.70 -5.33 20 371 2012-10-03 05:58:16 2006-11-15 17:30:52 5 9 289 6 140 329 41 255.50 56 79.87 CHANGED pplL.cphRpplppG.pPIlG............uGAGoGLSAKstEtGGlDLIlIYNSGRaRMAGRGSLAGLLPYGNAN-IVl-MA.pEVLPVV+c.TPVLAGVsGTDPFpsh-pFLscLKshGFuGVQNF.PTVGLI..DGpFRtNLEETGMGYshEVEMI+tA+phsLLTTPYVFssc-ActMs.cAGADIIVsHMGLTTGGsIG....AcTuhoL--sVphlsslscAA+slssDlIVLCHGGPIApP-DApalLc+sss.scGFYGASSMERLPsEtAIpppsppFKsl ...................t.plLp+h+ppItcG.cPIlG..................uGAGTGlSAKstEAGGhDLIlIYNSGRaRMAGRGSLAGLLsY.GsANpIVl.-MA..pEVLPVVKp.TPVLAGVsGTDPF.p..ph-tFLcpL+shGFuGVQNF.PTVG.....LI..D..GsFRtNLEETGMGYsLEV-MI+hAHchsLLTTPYVF.ss--AhsMs.cAGADIlVsHhGLTTuGsIG....A.c...T..A.....h.oL--sVthlpphtcAA.cpV.psDl..IVLCH.GGPIupP-DApYllcpstt.scGFaGASSMERLPsEtAlpppscpFKsl......................................... 0 40 93 117 +9203 PF09371 Tex_N Tex-like protein N-terminal domain Bateman A anon PSI2 target BIG_312 Domain This presumed domain is found at the N-terminus of Swiss:Q45388. This protein defines a novel family of prokaryotic transcriptional accessory factors [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.09 0.71 -4.98 145 3396 2009-01-15 18:05:59 2006-11-15 18:19:14 5 18 3331 3 724 2645 145 190.50 46 25.65 CHANGED lApELs.lpspQVpuslpLLDEGsTVPFIARYRKEhTGuLD-sQlRplp-RhpYL+cL-cR+pslLcuIcEQGKLTsE...LcppIpsApshscLEDLYLPYK..KRRT+ApIA+EtGLEPLAchlhspspt.....ssp.ptAtpal.....st-.......tt....VssscpALpGA+...cIlAEphuEcAclRpplR..phhhcpGh.lpo.pl..hpsccc-s ........................................................................................................................IupELs.hp.pQlpuslp.L.L.-.E.GsTVPFIARYRKEhTGu....LD-sQlRplc-R................hpYL.+pL--R+pslLc..sIp...EQ......GK......L...Tc.E...Lcpt.ItsupshscLEDLYLPYK.....K.RRT+ApIA+.........E..tG....LEPLA-h.lhspspp.................ss-..ppAtpal.................st-................t...........Vs..sscsALcGA+......pILsEphuE-AsLhsplR..phhh.c.p.u..h.lsSpl..hcscccE.s.................................... 0 230 452 608 +9204 PF09372 PRANC PRANC domain Bateman A anon PSI2 target BIG_313 Domain This presumed domain is found at the C-terminus of a variety of Pox virus proteins. The PRANC (Pox proteins Repeats of ANkyrin - C terminal) domain is also found on its own in some proteins. The function of this domain is unknown, but it appears to be related to the F-box domain and may play a similar role. 23.40 23.40 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.37 0.72 -4.02 77 463 2012-10-02 00:56:31 2006-11-16 17:40:10 5 59 47 0 6 429 0 93.10 24 17.31 CHANGED hssho..laDllhspsp...phhh+hlps.phhph........ph..lYtphlcchIppuhpRpphlppslphlsshhpp......shWshLPhEI+hpIlphLssp-Lphlhp ...................tsholasllhspsp.....phhhRY.hps..p.hhph........h....sh.plYsshlcphltphhp+pphlsp.llcplpss.............shhopLP.EIphpIlphls..-Ltph..h................................... 1 0 6 6 +9205 PF09373 PMBR Pseudomurein-binding repeat Steenbakkers P, Bateman A anon Pfam-B_12784 (Release 21.0) Repeat Methanothermobacter thermautotrophicus is a methanogenic Gram-positive microorganism with a cell wall consisting of pseudomurein. This repeat specifically binds to pseudomurein. This repeat is found at the N terminus of PeiW and PeiP which are pseudomurein binding phage proteins. 24.90 24.90 25.10 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.08 0.73 -7.34 0.73 -3.77 49 106 2009-01-15 18:05:59 2006-11-27 14:56:45 5 26 19 0 90 104 9 32.50 28 8.87 CHANGED sssplohpphhchss+lhsa.hpppsRh..Pshlsl ..t..splshsphlchssRlhsF.hppssRh..PsYls..... 0 37 67 76 +9206 PF09374 PG_binding_3 Predicted Peptidoglycan domain Moxon SJ, Bateman A anon Pfam-B_8737 (release 8.0) Domain This family contains a potential peptidoglycan binding domain. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.46 0.72 -3.99 20 823 2012-10-01 23:43:47 2006-11-30 10:00:54 5 12 535 9 152 611 209 75.60 34 37.79 CHANGED pAs+hLQ+hL......u.....ltsDGhIGspTlsAl........ssts.ssLhpthssuRhpaYhcLs.......................stspFscGWhsRls ...........................ss+hLQRhL..............u........l.tsDGhIGspTL...uAl..................sppu.tt.sL.lp.sLs...stptpa.Yhpluttp....................spppF.hhGWlpch.................... 0 30 90 125 +9207 PF09375 Peptidase_M75 Imelysin Bateman A anon MEROPS Family The imelysin peptidase was first identified in Pseudomonas aeruginosa. The active site residues have not been identified. However, His201 and Glu204 are completely conserved in the family and occur in an HXXE motif that is also found in family M14. 21.10 21.10 21.10 21.10 21.00 20.80 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -12.04 0.70 -5.10 169 1931 2009-01-15 18:05:59 2006-11-30 11:01:45 5 5 1429 10 414 1465 252 267.80 27 74.43 CHANGED shssY.....pshhps.spsLtsu...spshs..sssst...............ssLptA..+sAahsARhsappsEshph.................s.h...........................t.tlthWP..sc...........uhl-hshssh...............................t..hssp.ht................sssspGaHAlEalLasps.......................s.t....t..sth.................................sphlp..uhustLhscsppltspWpt.........tshttth.....................t.tt.......ulp..pll...sGhssh.ls-lusp+.ls...sh..............ps.ptcspaScsohtshhs.slpGhpsla.s.............sulps.ll....tpp.ss..s....Lssplcsphss......shstlstl.ps.........shs..................ptl...ssspspth...........lps.h....suLsp.t.spl ......................................................hhssY.csalps.sppLhps.....scsFs......puh...ps.................sclppAKsha.hssRhhYpchEsltt..........................................................th.......uplDhplcup....................................tssh.pt.c.h...........................ssshoGaHtlEhhLasps.......................s................................................................sphhcthAspLhsDspcLppphssht..................h................................................................................ssp.phl....sGtssL..lpElAssK.ls...............................sEE-h...a.S..c.osLhD.apA.NlcGspp..lh.................................shlcs...h...l.......ppp..s.......p....LssclpspFppspshLsph..ps...............p.t......................ppl.......sssstpth...........hss..l...suLsc.hut.............................................................................................................. 1 114 235 339 +9208 PF09376 NurA NurA domain Iyer LM, Bateman A anon Iyer LM Domain This family includes NurA a nuclease exhibiting both single-stranded endonuclease activity and 5'-3' exonuclease activity on single-stranded and double-stranded DNA from the hyperthermophilic archaeon Sulfolobus acidocaldarius [2]. 20.60 20.60 20.70 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -4.89 65 438 2012-10-03 01:22:09 2006-12-01 16:54:16 5 2 287 10 251 429 41 278.20 14 74.78 CHANGED tplhAlDGS............phth.phpsshhhhlhuhuhsstthtphthhthh.th.hh...............................................................cpthphhhthhEht...............................hhtttppsclllhDGslhtphh........................................................................................................................................h...thtchltth.phlct..........hsllulsKshpspplhsth..........................................................h.Dttllpthh.p.................s.pt.h.............................................hhhtthhtshp.hhhhYlph........ttss.lh+lE........h...................spstcllstlt...............................................hsspGYPhs...LthAcchu+l ...................................................................................................................................................................................h.phhulDGS............pht...h..phtths..hhhhhs....hu.h..h.s.ht.ht..p.h.thh..h...h.shh....h..h...t............................................................................................................pphhp.h.hthhEht......................................................hh..t.t.t.pp.ss.lllhDGsL.hhhhh........................................................................................................................................h...tht.ph.lt....h..phlcp.............th.llu.llKpsps..p...p.hhphh................................................................................h..tDh.t.lht....thh.p..............................................ss.pt.....h.h...................................................................................................hhthhtphp...lhhhYlph.......................sstlh+lE........h....h............................................tp.pchlshlh..............................................hhp.st.G....aPhs...LthAcp.sph.......................................................................................................................... 1 91 160 213 +9209 PF09377 SBDS_C SBDS protein C-terminal domain Bateman A anon Bateman A Domain This family is highly conserved in species ranging from archaea to vertebrates and plants. The family contains several Shwachman-Bodian-Diamond syndrome (SBDS) proteins from both mouse and humans. Shwachman-Diamond syndrome is an autosomal recessive disorder with clinical features that include pancreatic exocrine insufficiency, haematological dysfunction and skeletal abnormalities. Members of this family play a role in RNA metabolism [2] [3]. 21.20 21.20 22.80 21.20 20.90 20.00 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.38 0.71 -4.43 27 515 2012-10-02 20:07:24 2006-12-06 09:57:19 5 6 468 6 343 502 98 134.40 28 49.03 CHANGED phhp-IssIlup+slsPpTp+sassshIE+Ah.c-lphu..............................................lsss+oAKpQAL-lIKtLpc..hlPItRAcM+l+lshssctspthh...........................................................ctltshhp.ph..tpp-ptsstaphlshlpPsta+tlpphlppp ..............hh+-llslluppslNPp.T.c+Passsh..IE+Ah.c-l.+hs..................................................................................lcssKssK.pQAL-slKtLpp....h.lPIcRAcM+lclplPs.ph..s.p.p..hh...............................................................................................................................................................................................ppltphhp...hh........cppt....t.s.s..phph.......l.shlsPGhhcpl.-hlp................................................................................... 1 114 198 282 +9210 PF09378 HAS-barrel HAS barrel domain Iyer LM, Bateman A anon Iyer LM Domain The HAS barrel is named after HerA-ATP Synthase. In ATP synthases, this domain is implicated in the assembly of the catalytic toroid and docking of accessory subunits, such as the subunit of the ATP synthase complex. Similar roles in docking of the functional partner, the NurA nuclease, and assembly of the HerA toroid complex appear likely for the HAS-barrel of the HerA family [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.93 0.72 -4.08 60 266 2012-10-02 13:55:04 2006-12-06 11:10:15 5 8 209 12 130 3149 1869 90.80 17 19.30 CHANGED lGhllGssst.pshhhhlps.t...............plphG-hVphp.t................tppllGhlhslpp.................................s.hshlshs..........................clluplcp ..................lG.Vlussss.pththhhcs.................tsphGcaVhlptp................sspllGhlpslpp...............h.p..t...t.......t..sp.hthltht............................h.....h......................................................................... 0 46 82 110 +9211 PF09379 FERM_N FERM N-terminal domain Bateman A anon Bateman A Domain This domain is the N-terminal ubiquitin-like structural domain of the FERM domain. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.55 0.72 -3.93 30 3027 2012-10-03 10:59:06 2006-12-08 09:24:46 5 150 200 46 1524 2604 2 84.60 27 9.05 CHANGED Vp.hLDss.hhphp.....lsspsp.GpplL-pVsp+l.slpEp-aFGLpa.....sps......sph...................................pWL-hs+plp+phsc...............ssshhlhFRlKFas .......................................lhhh.Dsp......h..pht..............lp..tpsp.GppLh-..t....Vsp..+l......sL..h..E..p-.............YFGLta............cpp...................spt....................................................................................................................................pWL.-.sK.pl.p.cQ.hhp........................................................ssh.p.h..hF..plKFas................................................................................................ 0 372 505 936 +9212 PF09380 FERM_C FERM C-terminal PH-like domain Bateman A anon Pfam-B_851 (release 2.1) Domain \N 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.03 0.72 -3.77 62 2452 2012-10-04 00:02:25 2006-12-08 13:07:32 5 88 124 42 1204 2086 0 90.40 28 10.92 CHANGED cpcssc..lh.LGlsstGlhla...csss+l...ptFsWscIp+lSFccK.................................................+Fhlcht............................................tppppphsFhhss..hcssK..tlW+hClppHp.......F..a...phpcpsps .........................................................................................................................pcGsc..lhLGlsstGlhla................csp.p...+l.....tt..FsW.scIt..+l.Sa.cc.+.................................................pFhIc.lhs..................................................................pppp.ss.hsFhhss......hcssK..plW+hClppHs.......F..aphtp........................................................... 0 270 364 720 +9213 PF09381 Porin_OmpG Outer membrane protein G (OmpG) Mistry J anon pdb_2f1c Family Porins are channel proteins in the outer membrane of gram negative bacteria which mediate the uptake of molecules required for growth and survival. Escherichia coli OmpG forms a 14 stranded beta-barrel and in contrast to most porins, appears to function as a monomer [1]. The central pore of OmpG is wider than other E. coli porins and it is speculated that it may form a non-specific channel for the transport of larger oligosaccharides [1]. 25.00 25.00 46.20 45.70 21.90 19.60 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.21 0.70 -5.33 4 299 2012-10-03 17:14:37 2006-12-08 14:43:39 5 1 282 10 9 87 0 288.50 92 99.37 CHANGED MKKLL.CTALVMCAGMACAQAEEKNDWHFNIGAMYEIENVEGYGEDMDGLAEPSVYFNAANGPWRISLAYYQEGPVDYSAGKRGTWFDRPELEVHYQFLESDDFSFGLTGGFRNYGYHYVDEPGKDTANMQRWKIAPDWDVKLTDDLRFNGWLSMYKFANDLNTTGYADTRVETETGLQYTFNETVALRVNYYLERGFNMDDSRNNGEFSTQEIRAYLPLTLGNHSVTPYTRIGLDRWSNWDWQDDIEREGHDFNRVGLFYGYDFQNGLSVSLEYAFEWQDHDEGDSDKFHYAGVGVNYSF .................................MKKLLPCTALVMCAGMACAQAEE+.NDWHFNIGAMYEIENVEGYGEDMDGLAEPSV......YFNAANGPWRIuLAYYQ...E...G.P.VDYSAG.KRGTWFDRPELEVHYQFLEsDDFSFGLTGGFRNYGYH.YVDEPG..KD..TA...NMQRWKIAPDWDVKLTDDLRFNGWLSMYKFANDLNTTGYADTRVETETGLQYTFNETVALRVNYYLERGFNMDDSRNNGEFSTQEIRAYLPL.....TL.....G.N.H...SVTPYTRIGLDR.WSNWDWQDD.IEREGHDF.NRVGLFYGYDFQNGLSVSLEYAFEWQD.H.....DE...GDS..D.....KFHYAGVGVNYSF.............................. 0 1 3 5 +9214 PF09382 RQC RQC domain Bateman A anon Pfam-B_571 (release 21.0) Domain This DNA-binding domain is found in the RecQ helicase among others and has a helix-turn-helix structure. The RQC domain, found only in RecQ family enzymes, is a high affinity G4 DNA binding domain [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.18 0.72 -4.32 174 3296 2012-10-04 14:01:12 2006-12-11 14:34:43 5 41 2926 9 924 2638 355 104.30 30 14.90 CHANGED DsTptApclLSslh+h..........t..p....p..........a..Ghshlh-lLp.........................Gspsp+ltph....sa.cplssa..GlGc.chsppphpsllcpLlspGhL.....phshp......pas.sLplspp.u..cslLc..Gc....p..plhlphst ................................................DsTpcAppsLSslhRs.................sp.........+.........FGhshll-V.L+.........................G..s..p.s.p..+lpph.....sa..-p..L.ssaGlG.......+..-.....h...op..cc........hpsll+pLltt.Ghl.........p.shs.......pas..sLpL..Tct..u..+slL+.Gct..pl.ht...s..................................... 0 278 557 764 +9215 PF09383 NIL NIL domain Bateman A anon Pfam-B_524 (release 21.0) Domain This domain is found at the C-terminus of ABC transporter proteins involved in D-methionine transport as well as a number of ferredoxin-like proteins. This domain is likely to act as a substrate binding domain. The domain has been named after a conserved sequence in some members of the family. 20.60 20.60 20.60 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.05 0.72 -4.41 184 4317 2009-01-15 18:05:59 2006-12-11 17:17:47 5 25 3162 22 702 2638 28 75.40 25 22.67 CHANGED sstll+LpFsGpssppPllupls+ca....s.lssNILtGsl-plpstshGpLllpl......t..Gsppp..hppulpaLp.pps.lpl.Ell ...............................s...ll+LpFsG.p.s.ssp.Pllupls+ca....s.lssNILtusl-hlp.s..sshGtLllpl................................p..Gsppp..hptAlpaLp.pp.t.Vpl.Ell...................... 0 181 399 562 +9216 PF09384 UTP15_C U3_snoRNA_C; UTP15 C terminal Mistry J, Wood V anon Pfam-B_7112 (release 21.0) Family U3 snoRNA is ubiquitous in eukaryotes and is required for nucleolar processing of pre-18S ribosomal RNA [1]. It is a component of the ribosomal small subunit (SSU) processome. UTP15 is needed for optimal pre-ribosomal RNA transcription by RNA polymerase I, together with a subset of U3 proteins required for transcription (t-UTPs) [2]. This entry represents the C terminal of UTP15, and is found adjacent to WD40 repeats (Pfam:PF00400). 26.90 26.90 27.40 27.60 26.00 26.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.45 0.71 -4.51 40 321 2009-09-21 22:48:38 2006-12-12 09:50:17 5 11 278 0 219 324 1 147.80 28 28.41 CHANGED +Gps...hhscp--hllppt..........ppp+lppa-+hLppF+aucALDssL.........tstpscsslollpELp+RG......sL+tALtsRDEtoLpslLpalh+pls-sRassllhcssssllDlYusslspSs..hlcchltpLpp+lpcElchppchhplpGMl-...hL ..................................t.phhspts-hl.lspt..........pcp+lp...ta-+tL+pF+........aucALDpsL................tspssphslollpELt+Ru......sL+sALsuRDEpsLpslL.palh.+pl.s..c...PRassllhsluthllD........l..Y..u..sh.l...sp.Ss.......hlcchh.hpLppplpcElchppphhphtGhl-hl................................................................... 0 77 122 179 +9217 PF09385 HisK_N Histidine kinase N terminal Mistry J anon pdb_1yku Domain This domain is found at the N terminal of sensor histidine kinase proteins. 24.40 24.40 24.90 26.60 24.30 24.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.62 0.71 -4.55 7 130 2009-01-15 18:05:59 2006-12-12 10:37:03 5 3 103 3 13 66 0 137.40 58 43.26 CHANGED Mp...........sspphLssaLcpppcpFlpsW+p+llls-cD.a+-cllpNGptlhchhhphhpcphs.p...lp.lupKIApERh-AcsNIu-FVYNsNhGRpElhphlstlssshp-LpsllccINhhFD+hlYasVppYs- ..............................................MtVFPIDK..DIKElFCSHLKNNRHQFVENWKNKM.....IIS-KDPFK.EVVQNGEcLLEhIIELhME-KDIs..YLQPLCEKIAIERAGADANIGDFVYNANVGRNELFEAMC.....ELDVSAR..ELKPIM...spIHTCFDKLIYYTVLKYSE.... 0 1 7 9 +9218 PF09386 ParD Antitoxin ParD Mistry J, Bateman A anon pdb_2an7 Domain ParD is a plasmid anti-toxin than forms a ribbon-helix-helix DNA binding structure [1]. It stabilises plasmids by inhibiting ParE toxicity in cells that express ParD and ParE. ParD forms a dimer and also regulates its own promoter (parDE). 22.80 22.80 23.30 23.50 22.30 22.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.65 0.72 -3.86 4 38 2012-10-02 18:44:02 2006-12-12 14:31:42 5 1 37 2 7 32 2 77.80 61 92.81 CHANGED MSRLTIDITDQQHQSLKALAALQGKTIKQYALERLFPsDsDuDQAWQ-LKsLLGsRIu-GLAGKVSsKSlsEILsEELu ..MSRLTIDlT-QQHQSLKALAALQGKTIKQYALERLF...P..uDuDuDQAWpEL+sLLssRIppGLsGcVSsKSlscILDEELs......................... 0 1 6 6 +9219 PF09387 MRP Mitochondrial RNA binding protein MRP Mistry J anon pdb_2gid Family MRP1 and MRP2 are mitochondrial RNA binding proteins that form a heteromeric complex. The MRP1/MRP2 heterotetrameric complex binds to guide RNAs and stabilises them in an unfolded conformation suitable for RNA-RNA hybridisation. Each MRP subunit adopts a 'whirly' transcription factor fold [1]. 18.60 18.60 18.80 18.80 18.50 18.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.31 0.71 -4.58 11 164 2009-01-15 18:05:59 2006-12-12 15:52:17 5 4 50 14 43 134 1 157.00 29 36.07 CHANGED suts.u-usustustusssuo....+ppssusppt.Rs...PAF-IsHhsc-csupGs.atlsVst+sshl.p.PsLD.+..Rp.pplDp.............Ncss.Qscu-R.sloVphpslasAphluVLcuRhsplcspsphhsAsFsPsspsYshctpl+psuop................psh-W.....oVcF-sthulhLc+FLppAL+.shGFucphs .......................................................................tst...ttts.....t...t..p..........t.hpl.+.stps.s.u.........t...s......htt.h..........Ls.p......tss........................s....t.pu-h...h...h.t........ssa.sp.sthlptRhshhps.s....h.......ssphY.hctp.+...tst...................................................................................................sth.W.....hhphss.hhuhh..hhLppsL...........h................................................................................... 0 6 9 19 +9220 PF09388 SpoOE-like Spo0E like sporulation regulatory protein Mistry J anon pdb_2c0s Family Spore formation is an extreme response to starvation and can also be a component of disease transmission. Sporulation is controlled by an expanded two-component system where starvation signals result in sensor kinase activation and phosphorylation of the master sporulation response regulator Spo0A. Phosphatases such as Spo0E dephosphorylate Spo0A thereby inhibiting sporulation. This is a family of Spo0E-like phosphatases. The structure of a Bacillus anthracis member of this family has revealed an anti-parallel alpha-helical structure [1]. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.94 0.72 -4.44 47 988 2009-01-15 18:05:59 2006-12-12 16:44:01 5 4 287 3 195 665 0 44.70 32 66.69 CHANGED pLhppIEp+Rcchlplstph.GlsspcslphSQELDpLlNpYp+hp .............................LpppIEtpRccLh...p.l..s...pch...Gh..s.pp.cllphSQELDcLlNpa.ph.h............ 0 68 144 154 +9222 PF09390 DUF1999 Protein of unknown function (DUF1999) Mistry J anon pdb_2d4o Family This family contains a putative Fe-S binding reductase (Swiss:Q72J89) whose structure adopts an alpha and beta fold. 25.00 25.00 85.50 85.30 21.90 20.00 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.94 0.71 -4.88 3 18 2012-10-02 22:59:21 2006-12-13 11:24:32 5 1 18 2 12 18 1 148.80 53 97.85 CHANGED M...RYRsFoEPDaEsLQALDLAtQRRssPtaDsLP-REpAGRLSoSLuALRFYERSGHSFVAEsEu-clpGalLAQuVWQGDRPsVLVRAllLs-upsEDstRGLLRAVVKSAYDAAVYEVHLPLsP...ELEAAARAEEA+LTG.sYAVpHLGTRAAoAPG++LtR ...............................M+aRsFsE.Dh-tLptL....................tGchssSLuALRFFsRTGHSFlAp.EG-cshGFsLAQAVWQG-tsTVLVsRl...-ups....t....-shcGLLRAVVKSAYDAGVYEVALpL-Pt.+t-LcpALcA-Gas.lushsLAVRVLGSRGtRuEsctVL.E. 0 3 7 12 +9223 PF09391 DUF2000 Protein of unknown function (DUF2000) Mistry J anon pdb_2gax Family This is a family of proteins of unknown function. The structure of one of the proteins in this family has been shown to adopt an alpha beta fold. 25.00 25.00 26.20 30.30 20.70 17.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.37 0.71 -4.49 36 322 2012-10-02 19:40:38 2006-12-13 13:31:01 5 2 297 2 106 309 39 130.80 30 94.96 CHANGED -sKhslllcc-LsshptLNVsAaLusGlus..stP-hlGpsYtDusGphahslht.PlllLpu.spspLppl+pculp+...slshssaopshhsTGpppspptshttsst--lchlGlALhGs+KhVcKlT+uhsLat ............pKhslllcpcLssh.thNssAhLuhulut........phP-llGp..s.htDusGpta.slht.Pl.lLpu.stptLppl+pcuhsp....slhhssFspth.ssss.pp.ptshtssstsslphlGlulhGs+KtVsKlstuhsLa........................ 0 25 52 81 +9224 PF09392 MxiH Type III secretion needle MxiH like Mistry J anon pdb_2ca5 Family Type III secretion systems are essential virulence determinants for many gram-negative bacterial pathogens. MxiH is an extracellular alpha helical needle that is required for translocation of effector proteins into host cells [1]. Once inside, the effector proteins subvert normal cell function to aid infection. 27.40 27.40 27.60 27.90 27.20 27.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.90 0.72 -3.42 39 1127 2009-01-15 18:05:59 2006-12-13 14:16:18 5 1 510 43 74 367 1 79.00 20 87.18 CHANGED lsthspthpssssst...............................................tpplppthpshptt...................................................ssP...pplhphQtplspaolhhslpopslpthppslpsll.ph .............................................................hh..................................................................tlpt..tthtpts.......h......................................................................st.hs...s...P.....ptlhphQhsltpaolhhshpSsls+thpshlssll.p.h....... 0 17 35 50 +9225 PF09393 DUF2001 Protein of unknown function (DUF2001) Mistry J anon pdb_2guj Domain This family includes phage-like element PBSX protein (Swiss:P54332) whose structure adopts a beta barrel flanked with alpha helical regions. 22.40 22.40 22.40 23.30 21.80 22.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.48 25 231 2012-10-01 22:58:23 2006-12-13 17:06:55 5 1 162 2 56 189 2 140.60 30 95.64 CHANGED M......hcupcsIsGphGclal....cspphspspphpAclchsKp-lshhGcphsppKssGhcGoGolThY+VoShhhphhhchlKpGp-shFclhspLcDPsS.hGt..EclsLcssshDclslAsa-ss..-hlEEEsPFTF--a-ll- ..................................hhcupcslsGp.Gchal.....cspphtphpphEAclEhsKp-lplhGc.hhpspKs..sGhpGoGohThap.loShhtph.hhch.l..+p.G.....p.-.hF.sl.ssh-DPso...hGp...cplhLpsss.hD...s...hhlush-ss..-hlE.EEhsFTFpca-h.-................................. 0 24 47 50 +9226 PF09394 Inhibitor_I42 Chagasin_I42; Chagasin family peptidase inhibitor I42 Mistry J, Bateman A anon 2fo8 Domain Chagasin is a cysteine peptidase inhibitor [1] which forms a beta barrel structure [2]. 22.30 22.30 22.30 23.60 21.80 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.12 0.72 -3.65 103 354 2009-10-19 12:22:43 2006-12-13 17:26:24 5 19 270 16 162 332 18 92.20 23 51.54 CHANGED lsl..phGp.phtlpLspNPo..TGYpWplpt........sshlpl..hss......pahss..t.ss......hlGuuGhph.aphpuhps....G....psplphhYtRsW-ssss...ppash.slpV ................plp.Gp.phhlpLsuNPo..........T.GYpWphps...........sssslph....hss....................pahss...s.ss.t........hlGu.u.Ghph.apapuhps.....G....psplphtYtRsW-ssst...pphshpl.l.................... 0 55 103 136 +9228 PF09396 Thrombin_light Thrombin light chain Mistry J anon pdb_2b5t Domain Thrombin is an enzyme that cleaves bonds after Arg and Lys, converts fibrinogen to fibrin and activates factors V, VII, VIII. Prothrombin is activated on the surface of a phospholipid membrane where factor Xa removes the activation peptide and cleaves the remaining part into light and heavy chains. This domain corresponds to the light chain of thrombin. 20.60 20.60 20.80 22.20 19.50 18.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.62 0.72 -4.35 13 80 2009-01-15 18:05:59 2006-12-14 17:18:55 5 8 46 403 27 111 1 47.70 64 8.36 CHANGED TshspacoFFss+TFGsGEADCGlRPLFEKKslpDpoE+ELL-SYhpGR ....Tsss-apsFFs.+TFGs....GEADCGLRPLFEKKslcD+oE+ELL-SYh-GR... 1 1 3 10 +9229 PF09397 Ftsk_gamma Ftsk gamma domain Mistry J anon pdb_2j5p Domain This domain directs oriented DNA translocation and forms a winged helix structure [1]. Mutated proteins with substitutions in the FtsK gamma DNA-recognition helix are impaired in DNA binding [1]. 25.10 25.10 25.40 25.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.10 0.72 -4.26 130 5039 2012-10-04 14:01:12 2006-12-15 16:44:09 5 23 4114 16 987 4009 1919 66.30 43 7.56 CHANGED ssp.ppDsLa--AlplVlcpppASsShlQR+hRIGYNRAARLl-pMEppGlVushpus.tsR-VLh....tp ..................s.tptDsLa-cAsph.Vl....c...p..p..+....ASsShlQR+h+IGYNRAARll-phEppGlVust.p.u.s.tsR-VLh..t.............. 0 318 635 820 +9230 PF09398 FOP_dimer FOP N terminal dimerisation domain Mistry J anon pdb_2d68 Domain Fibroblast growth factor receptor 1 (FGFR1) oncogene partner (FOP) is a centrosomal protein that is involved in anchoring microtubules to subcellular structures. This domain includes a Lis-homology motif. It forms an alpha helical bundle and is involved in dimerisation [1]. 29.40 29.40 29.40 29.40 29.30 29.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.85 0.72 -3.99 6 189 2009-01-15 18:05:59 2006-12-18 11:26:16 5 6 93 2 120 199 3 75.50 35 27.53 CHANGED K.oPLsNENLKKhlsT+-...GRLVAsLlpEFLpFFpLDFTlAVFpPEuuh.pshpsRpsLuK-LsIs-u-ssKssPLLhEll++ ..................................p...shLlspLlpEaLpF.phcaThuVap....sEo..up..s.p.s...........s....Rp.Luc-Lslh-.....s..pt...s.ss..h..PLLhtllt................................. 0 50 60 86 +9231 PF09399 SARS_lipid_bind SARS lipid binding protein Mistry J anon pdb_2cme Family This is a family of proteins found in SARS coronavirus. The protein has a novel fold which forms a dimeric tent-like beta structure with an amphipathic surface, and a central hydrophobic cavity that binds lipid molecules [1]. This cavity is likely to be involved in membrane attachment [1]. 25.00 25.00 197.30 197.10 19.70 15.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.42 0.72 -3.95 4 60 2009-01-15 18:05:59 2006-12-18 11:57:56 5 1 59 8 0 23 0 97.70 94 100.00 CHANGED MDP.KTNVVPPALHLVDPQIQLTITRMEDAVVHGQNNADPKVYPIILRLGSQLSLSMsRRNLDSLEARsFQSTPIVVcMTKLATTEELPDEFVVVTAK MDPNQTNVVPPALHLVDPQIQLTITRMEDAMGQGQNSADPKVYPIILRLGSQLSLSMARRNLDSLEARAFQSTPIVVQMTKLATTEELPDEFVVVTAK 0 0 0 0 +9232 PF09400 DUF2002 Protein of unknown function (DUF2002) Mistry J anon pdb_2g7j Family This is a family of putative cytoplasmic proteins. The structure of these proteins form an antiparallel beta and sheet and contain some alpha helical regions. 19.70 19.70 20.40 26.20 19.00 18.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.49 0.72 -4.34 4 520 2009-01-15 18:05:59 2006-12-18 12:57:38 5 2 516 1 30 104 2 111.10 86 97.78 CHANGED MYLRPDEVARVLEpsGFphDhVTscuYGYR+GEpYVYVNREARMGRTALlIHPsLK-+SssLApPsSsIKTss+Y.pFPLYLuG.thpE+YGIPHGFSSR.uLppaltphF.....c .........................MYLRPDEVARVLEKVGFTVDVVTQKAYGYRRGENYVYVNREARMGRTALVIHPTLKERSSoL.....AEPASDIKTCDHYQQFPLYLAG..EpHEHYGIPHGFSSRlALERYLNGLF.GE............... 0 1 4 17 +9233 PF09401 NSP10 RNA synthesis protein NSP10 Mistry J anon pdb_2fyg Family Non-structural protein 10 (NSP10) is involved in RNA synthesis. it is synthesised as a polyprotein whose cleavage generates many non-structural proteins. NSP10 contains two zinc binding motifs and forms two anti-parallel helices which are stacked against an irregular beta sheet [1].\ A cluster of basic residues on the protein surface suggests a nucleic acid-binding function. 21.50 21.50 22.10 38.80 20.00 21.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.64 0.71 -4.39 11 653 2009-01-15 18:05:59 2006-12-18 16:29:24 5 34 206 53 0 662 0 121.00 53 2.47 CHANGED ssNSslLolCuFuVDPupsYlDhVpsGupPlsNCVKMLss+oGsGhAITssP-AshsQ-oYGGASlClYCRsHl-HPshsGhC+hKGKaVQVPh.sspDPltFsLcNsVCsVCGhWhsaGCsCDt ....ssNSulLSlCAFAVDPAKsYhDalssGspPlsNCVKMLssHsGoGhAITssP-ushsQ-SaGGASsClYCRs.Hl-HP....s.hcGhCchKGKaVQlPo.stpDPVuFsLcNcVCsVCGhWhsaGCsCDt... 0 0 0 0 +9234 PF09402 MSC MAN1_C; Man1-Src1p-C-terminal domain Mistry J anon pdb_2ch0 Domain MAN1 is an integral protein of the inner nuclear membrane which binds to chromatin associated proteins and plays a role in nuclear organisation. The C terminal nucleoplasmic region forms a DNA binding winged helix and binds to Smad [1]. This C-terminal tail is also found in S. cerevisiae and is thought to consist of three conserved helices followed by two downstream strands [2]. 21.70 21.70 22.00 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.04 0.70 -5.73 37 386 2009-01-15 18:05:59 2006-12-20 11:29:55 5 12 255 1 248 363 1 257.20 21 43.10 CHANGED hLlhshhlha.....shaatppphtlGaCupsh.st....................................................C..CP.puhCh...plpCctsahht...h..................sspCh.Dsc+tphlptl..hcthhchL+p+suphcC.G................t.psshssslsts-..........................ltchltppp....hhstppF-p.hapsslttlpcp....-lhhp.......................................................................spthhhusohsplslpC..thppplpphlh..........chthhlhslhlhhhsh...hhl+hthppcppcptplppllppslcpLppptt...........t.spsppsalslspLRDp.....lltsptphp........ppplWp+lhphl.-p.susVcsphtE.htG-hh+sWEWlssh ..............................................................................................................................................................................h.............................................................................................................................................................................................................................................h........ctpttthhptl...pphhphLtpptuphpC.G.............................t..p.tls.t-..........................htphltp.p..........tp...hpp.haptsl.t..lhpp...........t-lh.ht........................................................................t..s.sp.hhhhp.sshsphshtC..thpp.th...hthht...............phhhh..hhslhhh.h.hhh.......hhhc.hphp.pt....cppptth.p....hlppll...chLpsptt.................t.s..tt.psalslsplRDp.....ll.sp.pchp.........................................hpplWp+shphl..tt.poplpsp.pc..hsGc.hhsWcWhts.s........................................................................................ 0 80 125 192 +9235 PF09403 FadA Adhesion protein FadA Mistry J anon pdb_2avr Family FadA (Fusobacterium adhesin A) is an adhesin which forms two alpha helices. 25.70 25.70 26.00 25.70 24.70 25.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.63 0.71 -4.12 3 115 2009-01-15 18:05:59 2006-12-20 11:44:49 5 2 30 9 12 93 0 119.50 35 97.25 CHANGED MK.KhLLhuhLlLSuhSaAA--......AtpllSELKuL-AEYQsLspcEEARFpEEKppuEsAcppltcLcElpsulEE+lt+LpEEuKTRFaKDpYccLhK+Y--YLsKLEpKIs-pcplIS-FEKIQclR ...................................MK.K..hl..Lh..s.hLlluuhuaAAps...............stplhuE..lpsl-AEYQpLhpcEptRhpE.+ppt-...shcppltp.cph.tphpchht+Lpp-.uchRaa+..cpYpcLh.K+ac-hhpcLEpchtcpcphIsphpKl.tlh............... 0 6 11 12 +9236 PF09404 DUF2003 Eukaryotic protein of unknown function (DUF2003) Mistry J anon manual Family This is a family of proteins of unknown function which adopt an alpha helical and beta sheet structure. 19.80 19.80 21.60 20.30 19.10 19.00 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.44 0.70 -6.07 3 98 2012-09-27 07:58:09 2006-12-20 14:16:26 5 7 69 1 71 99 0 328.40 41 81.61 CHANGED MsDs...EELRPVP+ERAVLESFFTQLGMFSFDRAKDYVEKEKDASKSAGAIWuSLLAALAHLAAAEKlYHNMTFLGQKLGGQSFFSRKDSIRTIYTSLHNELKKVVoMGRsAPGGSAPsLEELLPHLSEQLCHFlQARMEIADFYEKMHoLGSQKoINSEELVoTLDoIL+KYSSRFHHPILSRLESSFQlEVDVLTQLLRCQAQISEW+FLPSLLoLHGAHSKLQoWGQlFERQRETRKHLFGGQSQKAVQPPHLFLWLQRLQAsLLAKFSFYFHEALSRQTosSEMKALTARTuPDYFGKISSFIRKYDASNVSLVFDNRGSESFQGHGYHHPHSYREAPKGVDQFPAVVSLPSGERPVTHWPNVIMIMoDRAoELNTLEKVVHFYDDKVQSTYFLoRPEPHFTIVVIFDGRKSERDSaIlAFLQELsGSLRNSKPFoSLKPGSKG .............................................................................................................................hh.th..hs.hcp.Yhph.ah..c.....hht+.pp.o.hh...YpsLhsph...pph.t...................................t.......p......hhsplspQLs.FhpARh-hhshYcp.hhshu.sp..p..h..s.ccLlshL-slhcpasp+hpHs..hLp...lcsuhphElslLtpLL+sQsplscacFLs.oLlsLasA+.....s+Lp..sWs.p.h...h....p.pp..ppo..pK..phFuu..pspKs......sp....sP.pLahWL.+hpshLLuKFShYFH-sLSp...Q.....so......s....u..-.M.K.slsu+s....ss.Dhhs....+IpuFhRKhD.u..slsLla....Ds..+u.s....-.s..ap.upGYpaPpp..p-sPp.....G..l.cpaPslhohP.s...p.......p.Ph...HhPNllhl......ht-pt..s-Lss.h.-+llaahD....s.+.lps.................TY...alsps-sphhlV.lIac.s.p+.p-+DphhhsFlp-ls.tL+ssKhht.L+.......................... 0 31 36 54 +9237 PF09405 Btz CASC3/Barentsz eIF4AIII binding Mistry J anon pdb_2jou Domain This domain is found on CASC3 (cancer susceptibility candidate gene 3 protein) which is also known as Barentsz (Btz). CASC3 is a component of the EJC (exon junction complex) which is a complex that is involved in post-transcriptional regulation of mRNA in metazoa. The complex is formed by the association of four proteins (eIF4AIII, Barentsz, Mago, and Y14), mRNA, and ATP. This domain wraps around eIF4AIII and stacks against the 5' nucleotide [1][2]. 20.80 20.80 21.50 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.24 0.71 -4.10 21 266 2009-01-15 18:05:59 2006-12-21 09:49:36 5 7 179 10 185 259 0 121.40 24 19.09 CHANGED sssssppsup.sst..tt...t..pptccp-pcchcc-ccpsPs..aVPsRGsFFhHDcRss-sssssh+................R.hu......................h...s.h..................t+phpssutpspWsHDha-phss-psPpsp.phhsthshshpspss ........................................................................tts................t..t..tp..........tttcct..t..p.c.ttpccccpsPt..alPp+GsFahHDc.Rtp.sst...p...th+....................pstG.......................................h........................................t+thpsss....t.ttpWpHDpacp...pc..ptsppptphht..t........tt.......................................................... 0 52 87 143 +9238 PF09406 DUF2004 Protein of unknown function (DUF2004) Mistry J anon pdb_2aby Family This is a family of proteins with unknown function. The structure of one of the proteins in this family has revealed a novel alpha-beta fold [1]. 22.40 22.40 22.90 24.00 21.20 20.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.17 0.72 -3.98 22 148 2009-09-10 18:54:03 2006-12-21 10:54:57 5 1 128 1 20 117 5 99.60 26 66.38 CHANGED phpcpAcpAltshhpsc........ppahpaHl--httchh..hh...t...sshpphlctlpltths.a.p.tst....hhhhDathss-.hoD.llsVpFcppGclhp.lsaES ..................................p.htppA+pAlhp.hcp-........shYh-FHt--hspphh...h...........shpphlctl..pLttls.ahs..pst.pp.shs...hDaphss-..oDElLsV+acpcGclhp.lsaES... 1 7 14 17 +9239 PF09407 DUF2005 Protein of unknown function (DUF2005) Mistry J anon pdb_1zel Family This is a family of proteins with unknown function. 25.00 25.00 56.60 32.70 20.50 19.30 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.10 0.70 -5.67 2 47 2009-01-15 18:05:59 2006-12-21 11:07:22 5 2 45 2 3 12 0 279.90 91 99.00 CHANGED M....................VSPAGADRRIPTWASRVVSGLARDRPVVVTKEDLTQRLTEAGCGRDPDSAIRELRRIGWLVQLPVKGTWAFIPPGEAAISDPYLPLRSWLARDQNAGFMLAGASAAWHLGYLDRQPDGRIPIWLPPAKRLPDGLASYVSVVRIPWNAADTALLAPRPALLVRRRLDLVAWATGLPALGPEALLVQIATRPASFGPWADLVPHLDDLVADCSDERLERLLSGRPTSAWQRASYLLDSGGEPARGQALLAKRHTEVMPVTRFTTAHSRDRGESVWAPEYQLVDELVVPL.RVIGKA ..............................hlSPAGADRRIPTWASRVVSGLARDRPVVVTKEDLTQRLTEAGCGRDPDSAIRELRRIGWLVQLPVKGTWAFIPPGEA...AISDPYLPLRSWLA.RDQNAGFMLAGA..SAAWHLGYLDRQPDG.RIPIWLPPAKRLPDGLASYVSVVRIPWNAADTALLAPRPALLVRRRLDLVAWATGLPALGPEALLVQIATRPASFGPWADLVPHLDDLVADCSDERLERLLSGRPTSAWQRASYLLDSGGEPARGQALLAKRHTEVMPVTRFTTAHSRDRGESVWAPEYQLVDELVVPLLRVIGKA..................................... 0 2 3 3 +9240 PF09408 Spike_rec_bind Spike receptor binding domain Mistry J anon pdb_2dd8 Domain Spike is an envelope glycoprotein which aids viral entry into the host cell. This domain corresponds is the immunogenic receptor binding domain of the protein which binds to angiotensin-converting enzyme 2 (ACE2) [1]. 25.80 25.80 26.00 26.50 22.00 25.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.56 0.70 -4.83 11 626 2009-09-14 15:17:47 2006-12-21 12:06:25 5 3 218 29 0 442 0 166.60 48 21.86 CHANGED PNLPD.CsIEcWLsApoVPSPLNWER+pFSNCNFNhSoLhpalQA-SFoCNNIDASKlYGhCFuSlolDKFAIPsSRplDLQlGsSGaLQohNYKIDTsuTSCQLYYSLPtsNVTls...NaNPSSW.............................NRRYGFNshshht......hspHDVVYuppCFssssoaCPCtpss.......hhusCls.........spssousC.PsGTphhpC.tthshsh..........tCcCsCsPcPhs ............................t.C.ht.hhss.....PpshsWcRhhhpsCshshs.lhp..h.htph.C.shs.s+l.s.Catsl.hD.Fhh.tt...........................................................t.h..........................................................s..........sh.psps........hGhhspHDVVYApcCFp.....APssaCPCKLsu.........u...hCVs.s......hppTGhGsC.PsGTsY..hsCapts.................sDslssPcshp............................................................................................................... 0 0 0 0 +9241 PF09409 PUB PUB domain Mistry J anon pdb_2cm0 Domain The PUB (also known as PUG) domain is found in peptide N-glycanase where it functions as a AAA ATPase binding domain [1]. This domain is also found on other proteins linked to the ubiquitin-proteasome system. 20.30 20.30 20.30 20.30 20.20 19.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.79 0.72 -4.22 76 656 2009-01-15 18:05:59 2006-12-21 13:09:37 5 60 181 5 451 632 8 85.50 22 15.60 CHANGED pshcphtpslphLh.cllpNIlppPsctKaRpl+hsNpshppplhsh.ps....uhplL.phhGFpppt.........pthhhhs...tts.......shttlpphhpt.......L ..............................t...tthtpuhphLh.phlsNll...p.....p..P......p..-......p...KaRpI+hsN.tsFppcltsl.pG......uh-hL.pthGFpct.................pphhhhs.....t..........p...........hh.......................................... 2 189 262 372 +9243 PF09411 PagL Lipid A 3-O-deacylase (PagL) Mistry J anon pdb_2erv Family PagL is an outer membrane protein with lipid A 3-O-deacylase activity. It forms an 8 stranded beta barrel structure [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.81 0.71 -4.07 51 691 2012-10-03 17:14:37 2006-12-21 16:16:45 5 3 608 2 225 817 917 139.70 20 62.70 CHANGED sssshpshplshpashsh.......phtttplphhh-sshshhp.....sst........ssth.hGhssh.hpaphss.t.....halEsGlGsthhspsphs......................t.phuotFpFpsplGhGhpassst......pluhRapHhSNAGlp.p.PNsGlsthslhhuhsF ..................................................................................................s......h.h.h.ht...........p..hhth.t..h..ch.shs.hhp......sst.............ts.h.hG..hs.h..hth.hhtt......hah..phG.hG.st...hhspspss......................thp..l.uos.a.p..F......tpp..hul..G..hpasst........psuhphpHhSNuul....p.p.sNsGhN.h.h.shpluhsa............................ 0 59 132 186 +9244 PF09412 XendoU Endoribonuclease XendoU Mistry J anon pdb_2c1w Family This is a family of endoribonucleases involved in RNA biosynthesis which has been named XendoU in Xenopus laevis. XendoU is a U-specific metal dependent enzyme that produces products with a 2'-3' cyclic phosphate termini. 25.00 25.00 29.60 29.40 24.40 24.40 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.80 0.70 -5.35 40 279 2009-01-15 18:05:59 2006-12-22 09:55:27 5 13 125 3 203 282 1 246.00 34 69.40 CHANGED stsss-lpslsppLWshD....sN+h.............sutca.plshQsptsshsp.....sDt.AspsLFshVs-shhp..pP....TautahsLlDNYphssG.....hsE.plTsp..EppEppsFL-tlh.pTslM+hhapaLhpKsh.......ssss.psF+.phLpplWFphYsRssut......h-.SSGFEHVFVGEhK.s..........sclpGhHNWlpFYlpE..+pGp......lDYpGYhhctpts...s........lLslQFpWs..........ul.hKsluShFlGsSPEFElALYTlCFlsssc......cp.splplus..hplsIhs..aphtpps.......IGouYP ...........................................tp-lpplsppLaphD.....Nph.................tpph.hlshQsphps..tp......tDt.usps.LFshVspphhp..ps....TatthhsLhcN..Yphs.su.....hsE..hho.sp..chpEpssFLctlh.pTslM+......hapaLhpKsh.......h.ss.p.pF+.p.Lppl.WFshYuRspsp.......s.SSGFEHVFl.GEhK..s..............scl...GhHNWlpFYhpE.......cpGp......lDYhGahhctphsp..sp..........llslpFsWp...........uh.hK.luo..hFlGsSPEFEhALYTlCFlspsp........ct..spl..plss.....h.htlhsaphttpt.......Iuoua................................... 0 77 101 166 +9245 PF09413 DUF2007 Domain of unknown function (DUF2007) Mistry J, Bateman A anon pdb_2hfv Domain This is a family of proteins with unknown function. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -9.00 0.72 -4.15 90 1038 2012-10-01 21:59:08 2006-12-22 11:53:11 5 11 859 1 319 859 252 65.90 22 55.22 CHANGED hclhpsss....lpsphlpslLcpsGItshlpspt.huuh.........s...us.sh..hclhVtc.cDhccAppllpchpt ..................h..lhpsss....hcAphlpslLcspGItshl..p....spt.hssh...........h.....ss.uh.....hclhVt-.cchppAppllpph..t................. 0 106 212 273 +9246 PF09414 RNA_ligase RNA ligase Mistry J, Coggill P anon pdb_2hvq Family This is a family of RNA ligases. The enzyme repairs RNA strand breaks in nicked DNA:RNA and RNA:RNA but not in DNA:DNA duplexes. 20.90 20.90 21.00 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.53 0.71 -4.50 22 428 2012-10-02 00:43:09 2006-12-22 12:03:17 5 5 363 7 118 450 40 184.40 19 55.28 CHANGED p-aVsoEKlHGsshslhh.tttp.........hphstpos.............thp..................hhstatpslpshphhhc..ht..h.......stslhlhGElhG.............Pslpttsh....................t......chcFasFclhh....tp.t......phhs.cthpthspphslt....hsPhLscGshsthhhhss......................................................................h..shtt.slsEGlVl+sst...........ststshlKh+ .........................................................hlhpEKlDGsNhs.l.hh..t....................hthhtRsp.........................ht.....................................................................tpsta...ttt.hpthhthhp...t...................htshhla.GEhhhhh.................pul.phpth..........................................................................cpcFa.lFslhsp.......stt.........taLs.h-..ps.phhsphhs...l......h....VP...h..lh...ts.t.h..s....h.tthph..thh......................................h..shts.sh.t..EGlVh+stt........t.........h...................................................................................................... 2 55 89 107 +9247 PF09415 CENP-X DUF2008; CENP-S associating Centromere protein X Mistry J, Wood V anon manual Family The centromere, essential for faithful chromosome segregation during mitosis, has a network of constitutive centromere-associated (CCAN) proteins associating with it during mitosis. So far in vertebrates at least 15 centromere proteins have been identified, which are divided into several subclasses based on functional and biochemical analyses. These provide a platform for the formation of a functional kinetochore during mitosis. CENP-S is one that does not associate with the CENP-H-containing complex but rather interacts with CENP-X to form a stable assembly of outer kinetochore proteins that functions downstream of other components of the CCAN. This complex may directly allow efficient and stable formation of the outer kinetochore on the CCAN platform. 25.00 25.00 26.10 25.50 24.10 24.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.38 0.72 -4.16 24 170 2009-11-05 17:40:01 2007-01-02 12:50:08 5 1 155 12 127 175 0 73.10 33 50.36 CHANGED stphlsRllp.p.Fcs.ppT+Isp-AhpllscYlclFVcEAltRut......pcsputpp...........lchpcLE+.lssQLlLDh .................chls+lLp...t.Fps..ccT+ls..p-..AlplsucalclFVpEA.ltRust.......tpscsttt...................................l-l-cLE+.lhPQLLLDF.................................. 0 33 66 104 +9248 PF09416 UPF1_Zn_bind RNA helicase (UPF2 interacting domain) Mistry J anon pdb_2iYK Domain UPF1 is an essential RNA helicase that detects mRNAs containing premature stop codons and triggers their degradation. This domain contains 3 zinc binding motifs and forms interactions with another protein (UPF2) that is also involved nonsense-mediated mRNA decay (NMD) [1]. 25.00 25.00 25.50 29.80 23.20 22.30 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.12 0.71 -4.63 16 324 2009-01-15 18:05:59 2007-01-04 16:32:50 5 7 266 6 238 323 3 142.70 57 14.31 CHANGED HACAYCGIcsPssVlKC...sCsKWFCNu+ssT.uuSHIVsHLV+S+H+pVsLHs-SsLGDTsLECYNCGs+NVFlLGFlsAK.sEsVVVLLCRpPCA..pt.+DhNWDsspWpPLI-..-RphLSWlspsPS-p-..h+AR.IT.pQIs+LEphW+sN .......................HAC.uYCGIHsPusVVpCs..sCpKWFCNu....R.G....sT.SuSHIVNHLVRA+HKEVpL.....H.-usL...G-TlLEC.YNCGs+NVFlLGFIPAK..uD.oVVVLLCRpPC......A.s..s.u.KDhN.WDsop...W.QPLIp..DRsFLsWLVp.hPS-.pE.QLR.AR...pl.ospQIsKLEEhWKpN....................................................... 0 88 129 199 +9250 PF09418 DUF2009 Protein of unknown function (DUF2009) Mistry J, Wood V anon Pfam-B_18128 (release 21.0) Family This is a eukaryotic family of proteins with unknown function. 25.00 25.00 49.70 25.80 18.10 21.80 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.45 0.70 -5.71 11 144 2009-01-15 18:05:59 2007-01-05 15:47:23 5 7 77 0 106 145 11 364.20 38 78.36 CHANGED c+A+YIPlRLo.-ERKlLRLLEAALpVS-YTD+VDllSh.pS+sKRlspQLKEhCulLsGLlVA.DhKtGpcLlcpK-FsspAcaFQslFEIGRRYKlMNP-+MRosYGKlhYMlQDSh.s-lpc.tLGFslhKPIhTVapaLppps......uhslLpDshl.hAhscl.P.sKsRsplp+tI+pKEpAlEpLu++YSs..uthscE-lc.slYSluDtNualptNR-Plp+MLphLcpaFsPssspch.asLuIphGtsGARLoHsHc+QaaYVhQSLoLWppIh+-MFpLWhlA-tDLhsssp.YcLssTGQGLNRlQtCPslh+AhcplLpcspccht.sWVGSSVlHLGDcsVPNALaFIDKYsQVsRILIPllpsltpIcsL.csDtplhsYIcppaGusppL++sILpDFF+HuFDGSGuDNaaDAGSCIDGRLTSAWNWsNpIpKKpYYsIFLhoGFouF-G ...............................................spalPlRLs.pERphLcLl-us................LpVS-YTD+lD.h.p.........h...h..................s.psp+hhtplcchhshlsGLhlut.....sh....c.Gpp.lhp.p.+s.htt..t.phhpphFEluRRaKhhNPpphRspYGKhhahl.Du...pltc....hlta...p..hhpslpTVhthLp.ptt......s.thlp.D.hl.hsh......l...........................lpp.lptKct..uhptl.pcYss..pthpp-plcpslhSlsD.psalt.NppPhpphlphLppaa......pPtp..p........t........hoLuIp....................Gt......sGuRLoHsHppQa.YVhQSLhLWppl.ppMhpLW.hu-pD.hL.ss.pt.YpLhsTGQGlpRhptsPph.phMppllppsppph.s..tWVGSsllHLGDcsVPNuLhFIDKYsQls+IL.Plh.sltpl.pl...........p..p......t................httalpptauuhpphphhILtDFF+HuFDGSGuDNh..uGSCIDGRLTSAWNWCsplp+K.aashFhhsGF.GF-G............................... 0 59 79 102 +9251 PF09419 PGP_phosphatase DUF2010; Mitochondrial PGP phosphatase Mistry J, Wood V anon Pfam-B_22310 (release 21.0) Family This is a family of proteins that acts as a mitochondrial phosphatase in cardiolipin biosynthesis. Cardiolipin is a unique dimeric phosphoglycerolipid predominantly present in mitochondrial membranes. The inverted phosphatase motif includes the highly conserved DKD triad [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.90 0.71 -4.90 16 326 2012-10-03 04:19:28 2007-01-09 14:34:34 5 8 304 0 187 825 133 152.60 27 70.39 CHANGED lsu.slslhpLlhsPSLhlPHlsVsoFspLPhsl..................hppssI+AVVLDKDNChshPccsclassYpcphccL+.........psasst.plLIVSNoAGosc.DhstptActlE+soG..lsVLRHs.........hKKP..GCtcElhsYF+pp.h..lppscElAVVGDRLhTDllhANhMGuauVWlp-GVp ............................................................................................................thhhPphhl.........s.sltpl..shth...........................LpptuI+ullhDhDNs....Ls..ppsplhs..p..hh....ph.hcch+...................pt.hst..tlhIVSNss...........st..ppsp..tlp...pths.............l..slt.as......................sK.KP....ts...h...p...cl.hp.hh.....t..tp.........................t.........s.pclslVGDRlhTDllhuNh..h.G.aslh......h................................................ 0 72 121 162 +9252 PF09420 Nop16 Ribosome biogenesis protein Nop16 Mistry J, Wood V anon Pfam-B_6406 (release 21.0) Family Nop16 is a protein involved in ribosome biogenesis. 25.00 25.00 27.70 28.50 21.10 22.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.15 0.71 -4.14 45 332 2009-01-15 18:05:59 2007-01-10 16:36:55 5 4 279 0 223 301 1 178.70 25 84.52 CHANGED lR++++s+uuhs+ss++pts+p..t...+plsh.s..sslI.......tp...sWDcptTlsQNYp+LGLsschstss........................................................shtph..spscltRDspss.l..h.....................................................................pttpspllppLEc.......App.s.....+pt+phSccE..pcalppLlcKHG.-DacAMthD+KLNhhQpotupl+...R+lp+app .......................................................................................................................................................................+p+p+ppp.thpss.p+ph.p+p............+thshht...sshl.........tp......sWDcptolpQNapchGLssc.Ntss..........................................................................................s.ppht.tphc.lpp-.....tps.t.........................................................................................thppspVlppLEp........p.Ast.p............+ptpphSccphcalppLlcKHG....-D...YcAMspDcK.N.hQpT.tpl+++lppah.t............................ 0 77 120 183 +9253 PF09421 FRQ Frequency clock protein Bateman A anon Griffiths-Jones S Family The frequency clock protein, is the central component of the frq-based circadian negative feedback loop, regulates various aspects of the circadian clock in Neurospora crassa [2]. This protein has been shown to interact with itself via a coiled-coil [2]. 22.40 22.40 22.60 25.60 20.30 22.30 hmmbuild -o /dev/null HMM SEED 989 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.00 0.70 -13.94 0.70 -6.78 4 63 2009-09-11 06:31:12 2007-01-17 12:55:18 5 3 35 0 55 66 0 625.70 32 95.98 CHANGED RssPhsSpGHPLPRRsSP-pSlTL+pHRLARDAS...lpuShhusssspsQ.sSSssRRsSSGESp-TGQSDsppWFsQSNpNPsAsF..-SNhM-VDPPFYQKEoDSSNE-u+hP.ttsPs........shh+sosAHSSSADDYRSVIDDLTVE.pRLKEELKRYKQFGSDhMRKEKLFEIKVHGLP+RKKRELEATLR-FAASL.GsSSp..SoSpR...+KsuRHusts+....SuusShSKHsSSSS.S+SRPVDSAYASMSTG.....spSSGsSLsRPShouttpTupQKVEsYLRDhP-GLhP+HllMT-KEKKKLVVRRLEQlFTGKIuG+phpRspo..hPuhsuuLss..............pGpthu..pPPs..............EshREApI..Q-sp.sc...KpppS+DssSASNSstDQTEsGGsssuSGsGssSG....sNTSPPhs.sP-QRPTRPRDLDPDRlQIPSENM-YIRHLGLVsPEhLpsSpsphp.DVAPDAEGWVYLNLLCNLAQLHhlNVTPsFIRpAVsEKSTKFQLSsDGRKIRWRGGTDGTKFSSDSStDpSQpSPhTDDTEDuSsKNG+RKKpKsppAcSphuphs.S+..........usSDoFHYKPhFlHppuSStETSLE-u..uS.Gs...l-ESs.usS+WshSGSGsTpQRRKRRhDGAIVYYoGAPFCTDLSGDPGDh.........SPss.MpuutctcusupG.........ctschV..RohSGSSLshRPLSDs+hpluph.cFsPtN.....P-LVsDsGspssD.-h.FPWs--PthlcVpPL.....EPSGLGGVLPDDHFhllVTTRR...shRPs.ppp.LuRopoSE..-Ts-hIspRLAohpTS.....PhP.Pps++Lss....u.lpIEYlSGch+RLNPssLPPPAIFaPPFSoDSSasstDDLuSDs-.....-E-s.SEt.MSRRANPH.SDNp.Y.ccsDLu.ssEss.c.Ds-.......cDhchuuDpGts.RuhhspscuVcGsupPhusssG+-sshh+TGSSsATAGGsESGYSSShE- ......................................................................................................................................tt.st......sp...s.pWappss.psh.......t.h-........p.s...pps.p..p........pt....t...........................................................sp.usss--aRSlIDDLTl-.ppL+pcL++Ycp..ss.shhcc-KLFEl+haGLPtcK++ELEthLRpFsssh..t...s.t........s.....t.........tt..............ptts.Ss.......-SuYsShS.t.u.....stsptss......t....h..t.......p..t.tphpsaLp-hPtGLhP..p...h.ho-+p+KphlV+RLEQlFsGc........t.....................................................................Es.+EAph..................t......................................................................scQRPTp.hDlD..R.......hpYhppLsh.s.p..................cGWl.LNLlhshAQLHhhsVsstFl+pAl.thSs+hplS.DGpKlRW+GG.psTphSup...u.....ttts....tp..h....t.tt.tphppt......t.t.....t........s...t....t..................................................ptFhYcPhFhp.....sst......ss....-tt....S..s............pps....st.st..hs.sh...t..........p.pp.-GshhaYsss.FChDLSGD..hsh.........p..ss...h..st.p................................ps.SGo.....h.hp.Phsp.......................t..........sp..t.......p....hspt..t.......t.......h......ssGlGGV.PtDpFhhhV.o++...............................t........................t..p.......l..pth...sth..ps.............................lph-hhohphp.L.PssLP.Ps.ah..as.ssp..t....s...p..s......................................................................................................................................................................................................................................................................... 0 8 23 51 +9254 PF09422 WTX WTX protein Bateman A anon Bateman A Family The WTX protein is found to be inactivated in one third of Wilms tumours [1]. The WTX protein is functionally uncharacterised. 21.20 21.20 21.40 21.80 20.20 21.10 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.70 0.70 -5.53 9 168 2009-01-15 18:05:59 2007-01-17 13:40:10 5 3 40 0 96 163 0 267.30 25 53.70 CHANGED psls+SKTHDGL...........pctspususssphssstShsssP.....s.t..sspShstuhsFhShl++uutppsttu.....phuhs+pK+GLKGlFSShRh+RKsK.ttt-.p.s..pt.ttshhhstp.suSh.pl.......E...pshcc................susc.sssssppsPupspstsssPstsspp.usth.sstsh.ts-u.hps..st.tth..s................................uss-PPu-PSsDpLCh.hFsDVTSLKSFDSLTGCGDIIAD.-.-...GsSsssp..............................................hsusGptssu....thcpsuslVsYQGGGEEMASP-psD......................-s-hpchW-hlspo--pppt............s..hPths.....p.hh..uspssc.....+c.splcsttLtclPl.........ppt.p.t.tsPps-ptEusPsSDEGYaD..SsTPG.E--s...uu.s..p+sslPRDShSGDALY-LY.DP--u.....httusp-ssslSph+shSt.hhs.Ph ..............................................h.pspoh-sh.......................................t...........................tph...h.....................t...t......................sh.+.t+sh+shFp.h+.ppp.....................................................................................................................................................................................................................................TGCG-lhA-...........S..hs...................................................t.t....t......pptsshsshQGusEphA.Ptt.-...................t......tt...ht....t.....t........................................................s..t.............s..t...h.......................t..pp..Es.ssSDEGYaD..ShoPs.p.-t...........t.hhsRDShSGDtLY-ha.-Ppt..................................h................................ 0 4 8 30 +9255 PF09423 PhoD PhoD-like phosphatase Bateman A anon Prosite Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.82 0.70 -5.92 148 1704 2012-10-02 19:15:56 2007-01-23 14:55:29 5 31 861 2 812 2178 650 392.20 20 71.03 CHANGED hGVASGDPtsDulll................................WTR...lss..............................s.......slsVpaElA...s.........DtsFp.........p..lVppGss.hsssc..hDaoV+..........................V-lsG................LpPsppYaYRFp...s...........stSssGRsRTs....sssthpp....l+hAhsSCssa............pGa...................assYcphAp....p.s...hDhVlHLGDYIYEh.............t.t.h....sppth.p......t...t-.................hhTLsDYRtR....auhY+sDssLpuhHA....phPalssWDDHElsNNhh...tsusp..................ttsattR+ssAhpAYaEahPl............R...........tss.s.....................hp.l..........YRphpaGsLsclhhLDTRpa+s..........................................spsh.h....................................................................t.h...........................s....ssRolLGtpQppWLhppLt.......sSp...upWpllupQlhhuphshss..........................................................t.shshDsWD...........GY.spRpclLpth...p....pt.slpNsVhLoGDsHpsaAs-lpss...............ttt..p.s....lusEFsssS....loSsuh....................hst.lhst.sPclpahshsp...............RGYs........plc..hsspphpsca ..................................................................................................................................................................................................................................................................h.hth..................t.....t............................................h................h..............t.....s..a.ss...p..........................htht.s................L..ss.p.h..h.Y...p.h.h.................................s.....h....G........p......hp...Th.......stt...tp...................hphsh...sSCt...t..h.............t..uh....................................................................hpha.p.p..hsp....................p......Dh.hlahGDhlYt.s...............................h........................................................................h..s..l...t.c.....Y.R.....tp........a.t.....h.h..........t....c......s.......s.......lp...t...h.p..u.............phP..h........l.h..hW.........DDH.........E.l.t.sshh........ttt..................................................tsh...t...t.h.t..t..s...AhpAa..h...E...a..h..Ph...........................................................c.................t...p............................................hpl.......a..c....p....h....p.....a.....G....s....h.....h....p....lh..h.LDsRpaRs...............................tpt...t.........................................................................................................................................................................s...sspsh.lG....t....p....QhpWLhpt........L.t...pup...........upW.p..l..l..us..p.h....hs....h......s..................................................................................................h.s..h..-.s.Ws.............G..a.....t.p.pp......c...lhphh...........tp....t.....t.....h....p.....s....hl..hLoGDhHhs.hs.t.c.ht.t....................................................hhEhssss............lst....................................................................................................................................................................................................................................................................................................................... 0 272 518 709 +9256 PF09424 YqeY Yqey-like protein Bateman A anon Bateman A Domain The function of this domain found in the YqeY protein is uncertain. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.67 0.71 -4.34 42 2465 2012-10-02 13:42:24 2007-01-24 12:41:00 5 3 2421 1 718 1873 2150 142.00 36 94.47 CHANGED +-+lppDhKpAMKA+DKt+LuslRhlpAAlppcElctpt....c.LsD--llsVLsK.lKQR+-SlppappuGRpDLs-pEpsElpllppYLPpQLo--ElpshlppsIu-s...GAs.....uh+DMGKVMuslps+ltG+A.DhutlSshVKptL .......................................................................................pplppDhK.pAMKAK..D..c.+..L...sslRhlpAAl..ppt-.l....ctpt.........................p..L..sD-.c..ll..slls+..lKQR..+-Slpt.appu...G..R.p..D...L.A-p....EpsEl.sllppY...L........P....p......pL..o--E.lpshl.c.ps....I....sc....s....G.As.......u.h.p.D.MGKVMssl.ps...+...l...t...G...+...A...Dsstl.uthVKphL........................................ 1 253 495 623 +9257 PF09425 CCT_2 Divergent CCT motif Bateman A anon Bateman A Motif This short motif is found in a number of plant proteins. It appears to be related to the N-terminal half of the CCT motif. The CCT motif is about 45 amino acids long and contains a putative nuclear localisation signal within the second half of the CCT motif [1]. 21.00 21.00 21.00 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.76 0.72 -7.10 0.72 -4.40 18 348 2012-10-01 19:54:00 2007-01-29 13:51:09 5 9 35 21 176 437 0 26.20 60 10.36 CHANGED PlAR....+uSLpRFLEKRKcRlss...ssPY.s ...........PhAR....K.sSLpRFLE..KRK-.Rlsu...tuPY..t...... 0 26 104 145 +9258 PF09426 Nyv1_N Vacuolar R-SNARE Nyv1 N terminal Mistry J, Wood V anon Pfam-B_50964 (release 21.0) Domain This domain corresponds to the N terminal domain of vacuolar R-SNARE Nyv1 which adopts a longin fold [1]. In yeast it has been shown that this domain is sufficient to direct the transport of Nyv1 to limiting membrane of the vacuole [1]. 25.00 25.00 151.30 150.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.97 0.71 -4.62 5 27 2009-01-15 18:05:59 2007-02-05 13:24:10 5 1 26 1 15 24 0 136.60 63 56.58 CHANGED VSYVEVlcsG+olSSC..Ycsts.ssusYGslousscuss....TP-lFHsLIsDMVlPKVVslsGNKVTKMShsLIDGYDCYYTT..cscDsspVLVCFT+lDlPKILPIRlLS-LKphEst......DsDEhLSuslGsILDsFHcELloYRNp VSYVEVI+sGcTlSSC..Fpshp...pspsYGolsss...scph.................TPslFHpLIhDMVLPKVVPIcGNKVTKMSMsLIDGFDCaYoT...-D......cDscTVaVCFThVDIPKILPIRlLStLpchEuN..........uTsEhLSuHVGpILDuFHEELlpYRNp. 0 1 6 12 +9259 PF09427 DUF2014 SREBP_C; Domain of unknown function (DUF2014) Mistry J, Wood V anon Pfam-B_71890 (release 21.0) Domain This domain is found at the C terminal of a family of ER membrane bound transcription factors called sterol regulatory element binding proteins (SREBP). 20.20 20.20 20.70 64.90 17.60 19.80 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.58 0.70 -5.25 8 72 2009-01-15 18:05:59 2007-02-05 14:12:33 5 3 72 0 59 73 0 263.90 50 26.35 CHANGED +shKaoLRsllG.psY..uaLTGsoE.....EpEtARlKAWsIALDAQLsGGDsElSKSRLsLTlhASGTLPcTPtRlMLKALHsRVLLWcl....ssshplsspluuclARhpWNtARphpphLsp......ucccsLPEHLAsLLEp-.sDDVhssuIlQRAaNLA.WNRsTscsshsp..ssuhDsVVEDsAl+SPLDALAAWYSSpsLp+sLhpoL...pspsts.....upcshhscIslAlplAPhsSsAphRALVA+AVlhsccRussIssALsAls.spss .............+shcasLRsllGh+hY..uhLTGlTE.....EpEtARVKAW-IAlDAQLsGGDsElSKSRLlLTlhASGTLPcTPtRlMLKALHsRlLLWclu..suhshtluNtlAtpLARhpWshARphpchLsp............sp-DsLPcHLAsLL-tD.CD-VhsDsIlQRAtNLA.WNRsTp-ssssc..pshhDsVVEDsAlpSPLDAlAAWWSSphLQcALlpsL.....-hssss.......................hcspcshppplclAlclAPhsSsAasRAlVs+AVhh-ccRsssIsssLtALs.p.ps... 0 10 26 46 +9260 PF09428 DUF2011 Fungal protein of unknown function (DUF2011) Mistry J, Wood V anon manual Family This is a family of fungal proteins whose function is unknown. 24.50 24.50 25.00 27.70 22.50 24.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.09 0.71 -4.27 37 112 2009-11-20 16:11:53 2007-02-05 17:05:15 5 2 109 0 85 112 0 149.10 29 52.01 CHANGED +lpLRpso........schlss.RshsYYFsp.s.tp...........................+cpFpsuAlou-slhphup.hsas.......tsphPWRVlclp.....pttsphppp...t.................t...p++++PGKK+Rlth+pp.............ttcp.....tccp.......-t-K+..........p++p+cKKh++...............Rtpp+p+ .......................+lpL+p.s........tthlsstRPhsYYFus.pttp...........................+ppFppuAVou-sllptup.hsa...................usphPWRVlclp..ttssphctptt..t............................................pctpp+++RPGKKpRlshRtp.............ttcp......pEcp.....p+-K+..........pR+N+cKKh++....Rtpc+pp.................................. 0 14 41 71 +9261 PF09429 Wbp11 WW domain binding protein 11 Mistry J, Wood V anon Pfam-B_13108 (release 21.0) Family The WW domain is a small protein module with a triple-stranded beta-sheet fold. This is a family of WW domain binding proteins. 21.10 21.10 21.70 21.70 20.70 20.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.64 0.72 -4.00 30 272 2009-10-15 16:14:36 2007-02-06 10:25:24 5 9 212 0 198 259 1 77.90 41 17.48 CHANGED c+shNPs-utRKppKcKElKKsKp-RpptRcttlt++sPcplpcplccLcph.EtsttL.....pstc+p+lcpLEcslchlt+ .......tK.hNPsDttRK.......pt+K+ELKK..........NKcpR.hsRss..hLtt..KDPcpIhcph-cL-ph..Ehss.h......................cps.+c+hcpLccshctlh................................... 1 63 99 149 +9262 PF09430 DUF2012 Protein of unknown function (DUF2012) Mistry J, Wood V anon Pfam-B_49614 (release 21.0) Family This is a eukaryotic family of uncharacterised proteins. 26.60 26.60 26.60 26.70 26.50 26.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.55 0.71 -4.28 26 303 2012-10-02 19:08:27 2007-02-06 12:46:16 5 6 253 0 216 352 5 118.10 28 48.11 CHANGED hsstshhsssplhL............su......................spa...hshl+pDGoFshpsVPsG....oYll-lpsssahFss...lRV-ls......psphcutplshhpstpsh.........................hshPlhlcshuhtpYa..Rcpaslhsl..LpsPMlLM ......................................th.sss+lhl..................su..................................spt...huhlppDGoFshpslP.s.G............SYll-lt....s..sahFpP...lRVDls................ttphc.s....ph...lshhpst....p.s....p...........................hsh....P.Lph.csh..u.hpYappRcpa..shhsh..L.h.NPMlLh................................................. 0 76 117 175 +9263 PF09431 DUF2013 Protein of unknown function (DUF2013) Mistry J, Wood V anon Pfam-B_11317 (release 21.0) Family This region is found at the C terminal of a group of cytoskeletal proteins. 21.40 21.40 21.80 22.70 20.20 21.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.73 0.71 -4.42 23 255 2009-01-15 18:05:59 2007-02-06 12:52:43 5 8 227 0 183 253 1 139.20 34 23.37 CHANGED ss-.hs..hl+LLLslNEQahhsuht..............pNtVhchL..............ptssh+sFsEpLlLhlNR-pDs......lplh.........lLKhLYLlFT...ossThchFYsNDL+VLlDIlIRpLssL......ssctchLRtTYL+VLtPLLppTplpch...Y++s-lhcl ..............................................................s.p-.h...hlplLLshN.pahhss...................................pNhlhpsL..............pts....sh+sFsEpllLLLN.Rts-s....................lplh..................lLKhLh.lFo...os..sT.t...p.hFYsNDl+VLlDIllRpLhDL............sssstLRhpYLclLhslLppTphpp......a+ps-lht.h................. 1 60 94 148 +9264 PF09432 THP2 Tho complex subunit THP2 Mistry J, Wood V anon manual Family The THO complex plays a role in coupling transcription elongation to mRNA export. It is composed of subunits THP2, HPR1, THO2 and MFT1 [1]. 28.00 28.00 29.10 31.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.71 0.71 -4.29 5 26 2009-09-11 02:57:22 2007-02-06 14:47:07 5 1 25 0 15 20 0 127.80 61 49.81 CHANGED hLpYINLLs+LSVDLA+QlEsuD..sssclhV-+hsPPsELQulLcpYss...-ss-scsLRAcLp+YLD-IKMsRAKYuLENKYSLp-oLppLTKEVScWRccW-sIEsLMFGDuPsSMK+MlQsIESlK.cpLTs ..hLRYINLLcRLSVDLAKQVEVSD.PSVTVaEhDKWsPs-cLQuILEQYss...P-TDIcslcAQlcsYLDQIKMuRAKauLENKYSLKEpLsTLTKELN+WRKEWDcIEMLMFGDsAHSMKKMlQpIDSLK.Scls.u... 0 1 6 12 +9267 PF09435 DUF2015 Fungal protein of unknown function (DUF2015) Mistry J, Wood V anon manual Family This is a fungal family of uncharacterised proteins. 20.90 20.90 21.50 41.90 20.80 18.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.69 0.71 -4.36 20 119 2009-01-15 18:05:59 2007-02-07 13:30:17 5 1 117 0 90 111 0 120.20 43 87.04 CHANGED Ms..hllYhhshhlllsuT.hlahsRpRWhsh....h...........lschhYs+Ls....SFssDlEAGLSSosFDLuu.Nl..ssuDsRuGLD-puKcEIp+IM+pcplsFD-ARhlYhcc+FucNsIuPD.GpPRDPK ..............................hha.h.hhhhllhss..hhahhRp+hh.h...........................hsshhYs+LP.....SFpsDlEuGLSSusFDLsu.Nl..ssuDsRuGLDctuKcElh+IM+pc.plsFD-ARhlYhcp+FscNs.IGPD.GpPpDPK.. 0 23 49 79 +9268 PF09436 DUF2016 Domain of unknown function (DUF2016) Mistry J anon Iyer L Domain A predicted alpha+beta domain that is usually fused N-terminal to the JAB metallopeptidase. This protein in turn is found in conserved gene neighborhoods that include genes encoding the bacterial homologs of the ubiquitin modification system such as the E1, E2 and Ub proteins [1]. The domain is also known as the JAB-N domain. 19.80 19.80 20.00 22.90 19.10 19.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.31 0.72 -4.41 14 40 2009-01-15 18:05:59 2007-02-07 13:38:47 5 1 34 0 23 41 0 71.90 37 32.17 CHANGED hDthLQssFPolhsP+aGs.lsshppsGcRhllAusGlalElpRsWLcslpplu...t.psslPYGplscplchh ........................hDtsL.suhPoVhVP+aus...lssh..p..psGcRlLluusGlalElpRPWLcllptlu...s.sstlPYGsVpEphchs...... 0 4 12 19 +9269 PF09437 Pombe_5TM Pombe specific 5TM protein Wood V, Bateman A anon Wood V Family \N 19.10 19.10 131.50 41.20 18.00 16.50 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.83 0.70 -4.96 2 8 2009-01-15 18:05:59 2007-02-07 13:40:20 5 2 1 0 8 9 0 120.00 34 85.26 CHANGED uhhcchNoQ.NR.M........ppSshs.pNIs..hhslhIsc...TspYClAs+.........................................lLlaL.Y...................ChYI..au.s.olppphcpasFus.slhappFas..c...FlRTp...........uhs.hRThsKa.hI....IlhulppVhh.p.hpcsYsho-I.DhhQ.......................sYpNss.uRFhpR .....uhhcchNoQ.NR.M........ppSshs.pNIs..hhslhIsc...TspYClAs+..................................................................hChYI..au.s.ohppphcpasFus.slhappFas..c...FlRTp...........uhs.hRThsKa.hI....IlhulppVhh.p.hpcsYsho-I.DhhQ....................................hh.......... 1 8 8 8 +9270 PF09438 DUF2017 Domain of unknown function (DUF2017) Mistry J, Iyer LM, Burroughs AM, Aravind L anon Iyer L Domain This is an alpha-helical domain found in gene neighborhoods that contain genes encoding ubiquitin, cysteine synthases and JAB peptidases [1]. 25.00 25.00 25.10 33.00 24.80 24.40 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.15 0.71 -4.71 19 377 2009-09-11 16:56:57 2007-02-07 15:24:14 5 2 375 0 100 254 96 175.00 33 92.35 CHANGED ht+ppshtGsphputl-PtEsplLpsLsuslhshLpp..pstsuPpD.LutlsGh.sGpsp.PpDPsLtRLLPDhaps-s-ss.ss...........uthRpLpEs-lhcsKhssuphlhcsL......Pts.....usclpLot-pAcAWluuLNDlRLsLus...tLclss-ss........-t..s-cshs.phslYpWLTalQESLlpAlhu .................................................................................................t.th.s.ht.hEhtlLtslssth.thltt...........t.....t...p....ss.s....t.D..Ltth.h...u.......................ss.........sp.P.........p..DPsLtR.L.LPDha+sD-pss....................uthRsh+Es-lhsAKhtsApslhssl................scs........uuplpls.ppApsW..lsulNDlRLslustLsItscss...................tchst.p.c...s...ts...s.chsVYp.WLshlQEsLVpshh........................................................ 0 31 77 96 +9271 PF09439 SRPRB Signal recognition particle receptor beta subunit Mistry J, Wood V anon Pfam-B_7840 (release 21.0) Family The beta subunit of the signal recognition particle receptor (SRP) is a transmembrane GTPase which anchors the alpha subunit to the endoplasmic reticulum membrane [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.17 0.71 -5.06 8 357 2012-10-05 12:31:09 2007-02-07 17:41:34 5 11 294 5 246 9068 2252 182.20 30 56.77 CHANGED ospsslllsGlssSGKTsLFspLoTs.....shpcThsS..Esssuh+h...ps.+GsphTLIDaPGH-+LRhcLl-phhtpus.l+ullFVVDSol.sKclp-sAEaLYplLo.othh.csslsILIACNKp-lhhA+ssphI+puLEKElsplhcpRutuLssh..........-uss-tss.LsppGcsFcFspLcsp.V ...............................................t..tpslLL..sG.ss.SGK.T......s.....L....as......p..L..h.p.s........................................ph.t....t....T....h....T....S......h.......c...s.s.....s..s.h....t.h........................t..t..t...p..s...p...s..h...........p..L..l......D....h...P...G...........H..........t.......+......L..........R.......................p..............h.......h....c......h.................................................t................s..............p.........s........+..........u..........l.......l.......F..V..V...D.......o....s.......s.........h...p....c.............p....l........c...s..s.A..E...a....L...Y...s..........l...L...........h.........s...........s.p...........h.............h.........p.........p.............p........h.............s.lLl.s...C.NK.......p..D...h..........h....s.....A....c...s...s....p..h....l...+.....p...t.........L..E....p..El.s....p.....l..R.h.o..Rsts.lpsh......................-tst.p.............h....l.s..t.....u..p.t..F.pFpph...................................................................................................................... 0 85 137 199 +9272 PF09440 eIF3_N eIF3 subunit 6 N terminal domain Mistry J, Wood V anon Pfam-B_4886 (release 21.0) Domain This is the N terminal domain of subunit 6 translation initiation factor eIF3. 25.00 25.00 25.40 26.50 24.40 24.10 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.68 0.71 -3.99 23 318 2009-01-15 18:05:59 2007-02-08 09:34:55 5 6 244 0 203 294 3 126.30 37 30.33 CHANGED DLstKlhsaLDRHLlFPLLEFhusctha-p..........p-lhcA+h-LLpcTNM.sDYshslhpcl....sscphPsEhsp+RppVlppLpcLppcssplhclhpc.-llssl+o..D+stNlcaLp...cc+slTh-hlsuLY+aupap .........DLTs+ls.aLDRHLlFPLLEFLss.....p..p.l..as-................c-llpuKh-LL.p..cTNM.l.Dash-lacpL........sc...-.........hPpchtc+RppV.lspLc.pLppcspslschhp.ss-sspphco.....Dpph.hc.aLp...cc....+.s......h................p.-hl.ssLYcau+FQ...................................... 0 78 113 163 +9273 PF09441 Abp2 ARS binding protein 2 Wood V, Bateman A anon Wood V Family This DNA-binding protein binds to the autonomously replicating sequence (ARS) binding element. It may play a role in regulating the cell cycle response to stress signals [1]. 25.00 25.00 28.70 27.80 24.60 19.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.08 0.71 -4.35 6 71 2009-01-15 18:05:59 2007-02-09 13:41:04 5 2 68 0 58 71 0 167.70 58 23.27 CHANGED pRsLPsRc.Vos-oI-DAYsuFILYCNPsVshsTDTspLRcsFRsPPKS-GKSFSoasLFELI+pL-sKEIKTWupLAlcLGVEPPD.EKGQSoQKlQQYAVRL..KRWM+uMHVDAFFEYLLG+.HPYaspIPPspsPlu-htRDGVssE-DLALRALlPch+P+RGR+Ks--h--ss .....s.RsLPsRc.lT.s.pTI-DAYVsFIhYCNPsVPhs...oDTspLRcsFRsPP+S-GKoFohFsLapLIcK.L-pK.ElKTWhpLAlcLGVEPPshEKcQSoQKVQQYAVRL..................K.....RWM+uMHVDAFFEYhlG+sHsYaTplPsssss..hs-..RDGVshE-DhALRALlPch+PKRGRKRs--cp..t............................. 0 10 27 45 +9274 PF09442 DUF2018 Domain of unknown function (DUF2018) Coggill P anon manual Domain Acid-adaptive protein possibly of physiological significance when H.pylori colonises the human stomach, which adopts a unique four alpha-helical triangular conformations. The biologically active form is thought to be a tetramer. The protein is expressed along with six other proteins, some of which are related to iron storage and haem biosynthesis [1]. 25.00 25.00 30.20 29.90 22.70 17.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.50 0.72 -3.42 14 194 2009-09-11 13:54:03 2007-02-09 16:45:20 5 1 193 8 23 82 2 81.60 53 91.25 CHANGED FtGoP+-KFh-IlhpANpsllcpElEclhcchsshcplhEcp.Gls.........Epcl+sahhcpsch..lcsthsslYIEhhGcILopsE ..............FspoPKEKFhEIIpNuNhsslEK.hEchFtcalAM.ELLEKp.Gls.........Eh..-sK...sFILENuDh...IE-RpNDlaIELuAcILu+p.t. 0 6 19 23 +9275 PF09443 CFC Cripto_Frl-1_Cryptic (CFC) Coggill P anon pdb_2j5h Domain CFC domain is one half of the membrane protein Cripto, a protein overexpressed in many tumours [1,2] and structurally similar to the C-terminal extracellular portions of Jagged 1 and Jagged 2 [1]. CFC is approx 40-residues long, compacted by three internal disulphide bridges, and binds Alk4 via a hydrophobic patch. CFC is structurally homologous to the VWFC-like domain [1]. 20.20 20.20 20.50 20.50 19.60 18.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.17 0.72 -4.32 9 108 2009-01-15 18:05:59 2007-02-12 15:04:21 5 3 47 1 44 92 0 33.30 52 19.83 CHANGED CGu.l.HGsWl.+uCpLCRChaGsLHChPpph.scCD ......CGu.lsHGsWl..+tCpLCRChaGtLHCFPpth.ssCD... 1 6 8 16 +9276 PF09444 MRC1 MRC1-like domain Wood V, Bateman A anon Wood V Family This putative domain is found to be the most conserved region in mediator of replication checkpoint protein 1. 25.00 25.00 25.90 25.90 24.00 23.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.23 0.71 -3.87 12 137 2009-01-15 18:05:59 2007-02-12 16:05:59 5 3 132 0 101 140 0 137.30 40 11.45 CHANGED hc+pshsshlEtEAEES-DEa.........tGlGGs............DsEto..--.su-lccMIDD...pstpchcscpltp.hhhpcppptDp+.lpKlLcDIpsGshR.+Rh...tNuh-h-lS.Ds-D-.lpphR.p..+Rc...hh+p+hLpssc...hspLscNsKupAFaco .......c+pts+chlEppAEESE.DEa...................tGLGGs...................-sEsu...--p-spsccMIDD...tsspshcpcclst.hhApcp+ppDc+plpKLh+DIp.sGtLR.++R.....su.-...h-LS..Ds.-D.-thtph.Rht..RRcht.ch++tLLtscc...ltclucNPKppAFhco............ 0 25 53 85 +9277 PF09445 Methyltransf_15 RNA cap guanine-N2 methyltransferase Wood V, Bateman A anon Pfam-B_9480 (Release 21.0) Family RNA cap guanine-N2 methyltransferases such as Schizosaccharomyces pombe Tgs1 and Giardia lamblia Tgs2 catalyse methylation of the exocyclic N2 amine of 7-methylguanosine [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.12 0.71 -4.69 11 426 2012-10-10 17:06:42 2007-02-12 18:11:39 5 9 324 7 290 6417 1083 156.50 32 33.86 CHANGED tphlLDlFuGuGGNsIpFAp....Fp.Vhul-hshp+ltpst+NAcsYGV.sc+lhhlhGDahp...........................................hlsph+htph...hDsVFhSPPWGGPsYpcppsaDLpp.plpPhslppllcs............shplopNll...lFLPRNosLsQL.....................................................splshclhssts+Cp.................lh.hppNGhhKulhsaaGpt ..............................s.hllDsFs..G..sG..G...N..s.I....p..F.....A..hp...............h...p....+..V...l..A..lD..hDst+lphA....c.+...N.....A....p.l..Y.......G..V.....s......-.......+...I....p..a..l....p.G...D.h.hp................................................................................................................l.h..t...p...h..p.................hD.s..V.Fh.S.P.P...W....G.G............P....s....Y.............t..p.............t.....p........s.............a.....c.l.p.....h......h....p....P...h..sh......plhph..........................hpph..o.p...p....ls.......ha..LP.Rss.slp.Ql...........................................................sph...............t..p....hc.................l.p...h.pst.pslhhhhs..h......................................................................................................................................................................... 1 114 171 246 +9278 PF09446 VMA21 VMA21-like domain Bateman A anon Mitreva M Domain This presumed short domain appears to contain two potential transmembrane helices. VMA21 is localised in the ER where it is needed as an accessory factor for assembly of the V0 component of the vacuolar ATPase [1]. 21.30 21.30 21.60 22.70 20.70 21.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.16 0.72 -4.05 18 286 2009-01-15 18:05:59 2007-02-13 13:09:18 5 7 216 0 203 272 2 62.80 32 50.42 CHANGED ssuVlhKL......................lhFoshMlslPlssaFshpthl..........hts..th...s..ssshhuGlsAslssplVLshYlhhAa ...............................ssltpL..........................lhFoh.hMlslPlusaFsspphl................hpu...............ss.shhuuhhAllssplVLshYlhhAh............. 0 50 97 165 +9279 PF09447 Cnl2_NKP2 Cnl2/NKP2 family protein Wood V, Bateman A anon Wood V Family This family includes the Cnl2 kinetochore protein [1]. 21.40 21.40 21.90 29.30 21.30 19.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.92 0.72 -4.30 11 88 2009-01-15 18:05:59 2007-02-13 13:17:23 5 2 87 0 64 79 2 66.90 42 34.58 CHANGED sEppILssaLLssupL.sIISLppFpc...LFP+plpssPpI.csLY+-LQpQRp.pslDpVpspI-pEhcp ...oEssILsNaLLoPusLPolISLppFsc...LFP+pl.....po.cPpl.RsLYR-LQp.Rs.psl.DhVptNIcpEh+.t........ 1 14 30 51 +9280 PF09448 MmlI Methylmuconolactone methyl-isomerase Coggill P anon manual Domain MmlI is a short, approx 115 residue, protein of two alpha helices and four beta strands. It is involved in the catabolism of methyl-substituted aromatics via a modified oxo-adipate pathway in bacteria. The enzyme appears to be monomeric in some species [1] and tetrameric in others [2]. The known structure shows two copies of the protein form a dimeric alpha beta barrel. 29.60 29.60 30.20 31.90 28.70 29.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.55 0.72 -4.32 3 8 2012-10-02 00:20:33 2007-02-13 14:42:15 5 1 8 14 3 13 0 105.10 71 95.79 CHANGED IRLLYLLVKPAGMSDETFRAECLRHYEMSHDVPGLHKYEVRLVAEQPTDTHVPFFDIGHVDAIGECWFKDDAAYATYMASDIRKAWFEHGKTFIGQLKPFRTAPVAGDEPAS .....IRlLYLLVKPEuMScEpFRtECL.RHaEMStslPGLHKYEVRLVAp..pPTDTHVPFFDlG+VDAIGECWFcsEtsYpsYMsSDIRKAWFEHGKTFIGQLKPFhTtsVs......h......... 0 0 1 2 +9281 PF09449 DUF2020 Domain of unknown function (DUF2020) Coggill P anon pdb_2i8g Domain Protein of unknown function found in bacteria. 25.00 25.00 38.50 42.00 20.40 17.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.75 0.71 -4.52 4 58 2009-09-11 14:12:22 2007-02-14 16:43:04 5 2 56 2 15 52 0 138.70 53 78.50 CHANGED LPlDAhPtsP.GRsu.ptCPYLDocWVADTNGQRlTGhGsDERFsTPACVFWSYPEtPQhTVhVRcMsTp-DAIAVVDWAAPIDoTE.AEEPsGWSGGRRGGscpSGAlYAVQKsssAVlVaTNQ-QSLKAQLlAEEsIpNLGL ...............................P.....p.ss.....scCPYLDopWVA-TNGQRhsu.GlDsRFsTPACVFWSYs--PQhTVhVRcMsopp-AhtVVDaAAPIDoTEPAp....-PsG...WSGGR...tu.....ss...cGAlYAVpKGssAVVVaoNQpQSlKAchIAcEsIspLuL......................... 0 4 11 15 +9282 PF09450 DUF2019 Domain of unknown function (DUF2019) Coggill P anon pdb_2i9c Domain Protein of unknown function found in bacteria. 20.70 20.70 20.70 21.40 20.50 20.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.20 0.72 -4.18 7 37 2009-01-15 18:05:59 2007-02-14 16:49:09 5 2 22 1 27 37 0 101.70 31 82.69 CHANGED sptLVtRFAclultQDsAlLts-hu+FNRLascMtplssELKpRsGDQRpALhsLasHPNMQVRLpAAKtTLAltPsEARppLEAIAsS+WhPQAGDAGMsLhpLD ..........pcLVtpFschshtQscAlhts-tpphN+hacchhtlhscL+u+ssst.RpuLhsLhcHPshpVRlhAAttsLt..htsscA+thLptlupt.................................... 0 1 14 22 +9283 PF09451 ATG27 Autophagy-related protein 27 Wood V, Bateman A anon Wood V Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.91 0.70 -5.16 14 241 2012-10-02 14:19:22 2007-02-15 13:10:36 5 15 184 0 180 355 1 221.90 18 72.68 CHANGED sshhhuhtsssuuhcCspppl...ppYclspluu.pslpsscsTPPopsphpahlslC......pcsshps.cpCsss.stlCulp.hhhsst...sshsopllshtpsspsshcthts............tGlplphsG.......tsauspphcAplpapC....ssstsusEhps......................................................sstssp.lclshps..uC.p................t...t.......ttsspsssppuss.....usthGaFTWLFlhlhLhh.slYllsGuahNasphuspsa.....-lls+u.-hl+slPhhh+-hlp+........llsshpG.........suuRG ..................................................................................................................................................................................hs..........................................................................................................................h.hs.s.....................C............hC.h...................h...hh....s....h.....................t.....t..p...t..........................u.l.l..hp.u............t...t....t....t.t...ps.lph...C....s..ph.p.t....p..h.........................................................................t..s...t......h.p..lphps...uC..............................................................ts.ttstp.......................usthG.aF...s.hlh...l..l.h...hlh.h.ssYllhGuhhshpt.hutpG.h.......-hlPph.-hhps.lPhhhp-hhth........hhtt.ts......................................................... 0 83 123 159 +9284 PF09452 Mvb12 ESCRT-I subunit Mvb12 Wood V, Bateman A anon Wood V Domain The endosomal sorting complex required for transport (ESCRT) complexes play a critical role in receptor down-regulation and retroviral budding. A new component of the ESCRT-I complex was identified [1], multivesicular body sorting factor of 12 kD (Mvb12), which binds to the coiled-coil domain of the ESCRT-I subunit vacuolar protein sorting 23 (Vps23) [1]. 25.00 25.00 81.30 81.10 21.80 17.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.03 0.72 -3.81 4 24 2009-01-15 18:05:59 2007-02-15 13:39:10 5 1 23 1 15 20 0 91.10 53 88.18 CHANGED hLR+IPLYNtatss..aP+cR.sKlplPthpl.shssTt-hLpsWhcECccIhcssp.+cppscpF-pWYpEpYLupKPPGllps....shLuPpRK .lLR+IPLYNKYGc-..FP.p.EslsRhphPEaKLPsLQ.PTc-hLsPWYEECDsIs+s.CphHDuSsKcFDpWYcEpYLSKKPPGllss....slLSPSRK........... 0 1 6 12 +9285 PF09453 HIRA_B HIRA B motif Coggill P anon pdb_2i32 Motif The HirA B (Histone regulatory homologue A binding) motif is the essential binding interface between HIRA Pfam:PF07569 and ASF1a, of approx. 40 residues. It forms an antiparallel beta-hairpin that binds perpendicular to the strands of the beta-sandwich of ASF1a N-terminal core domain, via beta-sheet, salt bridge and van der Waals interactions [1]. The two histone chaperone proteins, HIRA and ASF1a, form a heterodimer with histones H3 and H4. HIRA is the human orthologue of Hir proteins known to silence histone gene expression and create transcriptionally silent heterochromatin in yeast, flies, plants and humans. The yeast CAF1B proteins which bind H3 also carry this motif at their very C-terminus. 21.00 21.00 21.10 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.47 0.72 -6.80 0.72 -4.30 30 239 2009-01-15 18:05:59 2007-02-15 14:15:42 5 22 194 2 167 262 0 23.40 48 2.46 CHANGED pQpsThTKsGKKRlAPhLlSouss .....KQp.ThTKDGK+RIsPlhlupss.... 0 36 73 131 +9286 PF09454 Vps23_core Vps23 core domain Coggill P anon pdb_2caz Domain ESCRT complexes form the main machinery driving protein sorting from endosomes to lysosomes. The core domain of the Vps23 subunit of the heterotrimeric ESCRT-I complex is a helical hairpin sandwiched in a fan-like formation between two other helical hairpins from Vps28 (Pfam:PF03997) and Vps37. Vps23 gives ESCRT-I complex its stability [1]. 20.60 20.60 21.10 20.70 20.20 20.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.55 0.72 -4.36 19 362 2009-01-15 18:05:59 2007-02-15 14:37:13 5 6 269 7 223 353 3 62.60 44 14.79 CHANGED -phltstsslhpQlhcLlAc-pAl-DsIhhLscuLcpGpIsl-palKplRtLuR-QFhtRhhhpK .........D-lllsssslh+QllsLhAEEpAI-DsIahLucAL..c..+....G...s.....I....s...l.-saLKplR.LuRcQFhhRAhhpK................... 0 76 126 181 +9287 PF09455 Cas_DxTHG CRISPR-associated (Cas) DxTHG family Coggill P anon pdb_2i71 Family CRISPR is a term for Clustered Regularly Interspaced Short Palidromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR associated) proteins. The family describes Cas proteins of about 400 residues that include the motif [VIL]-D-x-[ST]-H-[GS]. The CRISPR and associated proteins are thought to be involved in the evolution of host resistance. The exact molecular function of this family is currently unknown. 22.30 22.30 22.30 22.80 21.40 21.20 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.31 0.70 -5.31 44 268 2009-01-15 18:05:59 2007-02-15 14:47:23 5 6 191 2 136 269 9 350.30 14 81.01 CHANGED hllohlGss.............pappspYphsspp...............hco....thsttulhchhp......sschllhssss.....................................t...sshptlpptlpsphtp........................pslhlP..............................cGpsps.hhp...lahpl.hcp..l......pcss.clllDlTHGhphhP.hlshhuhphhttlt.......................tsphcslhhuhapsps...............hsshh-lsthhplhchhhsspphhphsssp.lsph..hppt...h.......................tpthpphhphlpslspul.lshshph................thhpplhphlppthphtt.........................hhth.hh.hlpplhpphphhstpp....................lcphhclhc.hahcpt.h........hpuhhltpE...............hpphpptpt+pphppth............................................pp....hhphhsphtphRNhlAHuGhp ................................................................................................................................hlshhGts................tap.spYhh..tspp.....................ps........thht.slhchhp.........schllhhspp...............................................................pphpthhptl.pphh.p..................................................................phh.lP........................................pupspp.hhp......lhppl.hcp...l.........ppss.clhl..DlTHGh+.hP.hlsh..hshphhphht.......................th.p.h.ptl..hauhhpsps.............................t.sslh-ls.sh....h.pl.hchh.uhptahph.sp.s..p.ltph.......htt..............................t.hpth......hp....hlpphppsl.h..hhhth...................p.hpphhp.lpph.p..t.....................................................hh.hhpph...pph.t...ht.tp....................htp..hphhc.hhhpt.t..h.........puh..hlhp.....E....hhh...t.........h.ph.pht.tpt.ht..h.....................................................................................t.....................ht....ph.p.hRN.hsHsuh.................................................................................................................................................................. 0 69 89 110 +9288 PF09456 RcsC RcsC Alpha-Beta-Loop (ABL) Coggill P anon pdb_2ayy Domain This domain is found in the C-terminus of the phospho-relay kinase RcsC between Pfam:PF00512 and Pfam:PF00072, and forms a discrete alpha/beta/loop structure [1]. 25.00 25.00 37.40 35.80 19.70 18.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.95 0.72 -4.34 12 546 2009-01-15 18:05:59 2007-02-20 16:07:56 5 4 538 2 41 289 1 91.90 68 9.90 CHANGED pCWLAlRNspLppaLpslLutpGlpltcYp.sppsss-DllIoD.ssphshtspAhIphspcHIG.spEppPG.WhpSsusspEL.sLLs+Ia ......pCWLAVRNASLCQFLETSLpRSG.lsVspYE.GQEP..sPEDVLITD-sls+cWQGRAsVsFCRRHIGIPLE+APGEWVHSVAuPHELPALLARIY.. 0 1 9 24 +9289 PF09457 RBD-FIP FIP domain Coggill P anon pdb_2d7c Motif The FIP domain is the Rab11-binding domain (RBD) at the C-terminus of a family of Rab11-interacting proteins (FIPs). The Rab proteins constitute the largest family of small GTPases (>60 members in mammals). Among them Rab11 is a well characterised regulator of endocytic and recycling pathways. Rab11 associates with a broad range of post-Golgi organelles, including recycling endosomes [1]. 23.00 23.00 25.30 23.60 22.80 22.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.19 0.72 -4.16 19 430 2009-01-15 18:05:59 2007-02-21 13:42:40 5 10 90 10 225 398 0 44.20 41 7.00 CHANGED o+-ELhphltcpEcpht+.......Lc-YIDsLLlplMEcsPsILcss .................o+-ELhptl..hcpEc..t+.......LccYIDplLlplhEpsPsILcl.... 0 40 60 129 +9290 PF09458 H_lectin H-type lectin domain Coggill P anon pdb_2ccv Domain The H-type lectin domain is a unit of six beta chains, combined into a homo-hexamer. It is involved in self/non-self recognition of cells, through binding with carbohydrates [1]. It is sometimes found in association with the F5_F8_type_C domain Pfam:PF00754. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.00 0.72 -4.32 87 326 2009-01-15 18:05:59 2007-02-21 14:07:37 5 22 127 25 229 344 33 71.10 20 18.43 CHANGED pphppslpFs.psFpp..hPpVhlulphhD..hsps........psh..phplpspslTtpuFs.lphpshsss.plhplphsahAlss ..................hplsFs.psF..ps.....sPtVhlulshhD...hsps........tsh..phplpspslTps........u..Fs.lphp...s..hsss..tl..hph..p..hsahAh..t....................... 0 170 204 223 +9291 PF09459 EB_dh Ethylbenzene dehydrogenase Coggill P anon pdb_2ivf Domain Eythylbenzene dehydrogenase is a heterotrimer of three subunits that catalyses the anaerobic degradation of hydrocarbons. The alpha subunit contains the catalytic centre as a Molybdenum cofactor-complex. This removes an electron-pair from the hydrocarbon and passes it along an electron transport system involving iron-sulphur complexes held in the beta subunit and a Haem b molecule contained in the gamma subunit. The electron-pair is then subsequently passed to an as yet unknown receiver [1]. The enzyme is found in a variety of different bacteria. 26.20 26.20 26.30 27.10 24.20 26.10 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -12.05 0.70 -4.94 49 210 2009-09-13 05:32:36 2007-02-21 14:20:32 5 12 144 1 98 213 46 238.40 19 66.68 CHANGED sssWsp....ssssplsl....hss.ssh.........................................................tsstpslpVpAsasucpl.ahhhpWps.........................................................................................stsh......................t.............tss.apD+lAlha..s..............s.tss.h.s..s........hpshssss.t.....................hhpahsss..................................sthl-l..WpW+u..t.........t.................t....s...........................................................ph........................tt.sht...s.p.p............................stsslpssu....a..psG....pWsVhhsRsLpss...s...-sshps.GpshshuhAla...-sspsp+stphu .............................................................Wpph..hplsL.....hss...h.........................................................pthtt.lt.VpAsa....sup....pl.hhhhpWts...................................................................................................................................stsht..........................t.....................t.t.h.DphAlha..s.t...................sh.h.s..s..........s.hsth..h........................hhsss................................sthssl.ah..Wpu...................................................................s........................................................................................................t...pst..............................ts.t.htstu.....ph..tsG.....hWpl.hsRsLts.....s...t...psphp..Gt.h.huhAha...psp.hpp.hph............................................... 0 31 74 89 +9292 PF09460 Saf-Nte_pilin Saf-pilin pilus formation protein Coggill P anon pdb_2co1 Domain This domain consists of the adjacent Saf-Nte and Saf-pilin chains of the pilus-forming complex. Pilus assembly in Gram-negative bacteria involves a Donor-strand exchange mechanism between the C- and the N-termini of this domain. The C-terminal subunit forms an incomplete Ig-fold which is then complemented by the 10-18 residue N-terminus of another, incoming, pilus subunit which is not involved in the Ig-fold. The N-terminus sequences contain a motif of alternating hydrophobic residues that occupy the P2 to P5 binding pockets in the groove of the first pilus subunit [1]. 25.00 25.00 25.10 25.00 24.20 24.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.75 0.71 -4.41 4 84 2009-01-15 18:05:59 2007-02-21 14:30:22 5 1 81 19 1 51 0 138.30 60 90.19 CHANGED GShlPNoEQppSVDlsFuuPppLolohsPVuGLhAG.ptustpIApLsVsSsohKpaulpG..hussVlsssGssWplsGKNoGptIpVsFuusshuppsus.paNG+pWhsaDhNDpLslhLsG.uQNVsADTYPlTlclsuYQs .........GSFLPNTEQcKSVDISFAAPEcLTlSL-QsP.GLhAGKsKPsT.slAKLTVNSTS.IKEFGVR...G..VSsohl..NshGShWsITGKN.S.GsoluVGFS.......SpoLusS+SspsWNGl+WaTFDsN....DPVsIsLlt...DQNIPPDTYPlTVDVVGYQP...... 0 0 0 1 +9293 PF09461 PcF Phytotoxin PcF protein Coggill P anon pdb_2bic Domain PcF is a 52 residue protein factor of two alpha helices, containing a 4-hydroxyproline and three cysteine bridges. The presence of the hydroxyproline is unique in relation to other fungal phytotoxic proteins. The protein has a high content of acidic side-chains implying a lack of binding with lipid-rich components of membranes and appears to be an extracellular phytotoxin that causes leaf necrosis in strawberries. 25.00 25.00 25.10 33.10 24.40 24.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.53 0.72 -4.06 5 31 2009-01-15 18:05:59 2007-02-22 13:45:16 5 2 5 1 11 32 0 44.60 56 36.26 CHANGED QQLC+.AsGCAYEYScANtVVSKCC+AINs-PlAFHDCCpcSCNoG QQLCp...AsGCA.Y.cYScANpslSKCC+sI.NscPsAFa-CCucSCNoG.. 0 1 10 11 +9294 PF09462 Mus7 Mus7/MMS22 family Wood V, Mistry J, Bateman A anon Wood V Family This family includes a conserved region from the Mus7 protein [1]. Mus7 is involved in the repair of replication-associated DNA damage in the fission yeast Schizosaccharomyces pombe. Mus7 functions in the same pathway as Mus81, a subunit of the Mus81-Eme1 structure-specific endonuclease, which has been implicated in the repair of the replication-associated DNA damage [1]. The MMS22 proteins are involved in repairing double-stranded DNA breaks created by the cleavage reaction of topoisomerase II [2]. 21.10 21.10 21.20 22.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 614 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.13 0.70 -12.86 0.70 -5.68 17 141 2009-01-15 18:05:59 2007-02-22 14:11:52 5 5 120 0 111 150 1 490.60 19 29.02 CHANGED hEthW.plhphhs......lsplsthshhhhssp.....tssWshlp.hhp.hh...p.t.t.........shspYtcs.....hhtRChhLhptasW..c.sp.llhp..LachFsppphhchcpEp.hppPt....h.lccLspsPohthpstp..hFchal+lLh.ulp.L...........tspcpl+shs.RlhPspshpYs+ppsl..tphs.LhN+asLLhsLhahsssshc.+lpplpp..lpstpuchcs.....phIslps.spLshhploppcchpsht.hsthhs.hlsphlcpah.hpt..p...........................hhpsFppphhcphhspspcplpshLphslpuhpshl............ptusshppsphllsps.lptlhph.p....pls....pcslphIpsaL..............tt......................tt..t...........t........................................................phhphlcplhp.slp..p.hsp.htts.s.-t.h............................................lhshlcsasphAphLVcpshp.hWs....hs...................astpsahtlp.sstpccas.hFhsphlphDstthp..chcpplhuhhhpsLlpphshlt.....pLhphlhsh.pps.hhp.hsh..t.....hplshsshpppcLsllosllpsh+...........................t.p.t+ppapchlpphhpsh+ssYtphtt........ut....ttahsFlp+llshl .............................................EthWpthhthhs......h.phs.h.shhh.s.+............spWthhp.hht.h...h...t..s..t..........hspYhcs.....hhtpChhLhptasW....phsp.ll.h..lach....Fspp.phtshppEtshtp.P........................h.lpplstps.shthp....stp..sFchaL+llh.shp.h...............ptcplpshshRhlPspsh........pa.......s.cppsl..pphshLhN+asLLh..sLhhhss..s.s.hc.p..lptlpph..lp.tpucpcs.....p.lslcs.tpLshhtls...pt..c...sh.......ht.hhthhsphhpphlppa..htt..t.............................p.h.pphhp...h.ht..ppth.phl...h.s.hthh............ptt.....s...hhh......httlht.........th.........shthl.tah.......................................................................................................................................hpphht..........p.............t................................................h.thhp.a.thuthhlppthh..hs.....s....................hs..sh..ht.p....ppcah.hFh..hhp.ss...hp...php..phhphhhtslht.t............thhthl.p..................................htht.....ppphslhp.hhpshp................................................tt...ta.tphlp.hhtthp...tpht.h.....................t..........h.tFhtthlt.............................................................................................................................................................................................................................................................. 0 27 60 98 +9295 PF09463 Opy2 Opy2 protein Wood V, Finn RD, Bateman A anon Weod V Family Opy2p acts as a membrane anchor in the HOG signalling pathway [1]. 21.10 21.10 21.60 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.06 0.72 -3.86 15 95 2009-01-15 18:05:59 2007-02-22 14:50:21 5 2 92 0 63 92 0 35.20 46 8.07 CHANGED CVs.Csosss.oCPsCscGcpCshsStTCspCspThC .CVs.Css.sss.sCP..s..CssuEhCsho.uhoCspCssshC 0 11 29 51 +9297 PF09465 LBR_tudor Lamin-B receptor of TUDOR domain Coggill P anon pdb_2dig Domain The Lamin-B receptor, found on the TUDOR domain Pfam:PF00567, is a chromatin and lamin binding protein in the inner nuclear membrane. It is one of the integral inner Nuclear Envelope membrane proteins responsible for targeting nuclear membranes to chromatin, being a downstream effector of Ran, a small Ras-like nuclear GTPase which regulates NE assembly. Lamin-B receptor interacts with Importin beta, a Ran-binding protein, thereby directly contributing to the fusion of membrane vesicles and the formation of the NE [1]. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.73 0.72 -4.34 5 85 2009-01-15 18:05:59 2007-02-23 17:27:58 5 10 51 2 45 73 0 53.10 54 9.68 CHANGED MPupKYpcGElVMGRWPGSsLYYEVcVlSFDsKSQLYTVlYKDGTELELKESDIK .........MPs.tKascGElVhGRWPGSuLYYEVcllSaDspopLYTVpYKD.GTE..LELKEsDIK........... 0 8 13 25 +9298 PF09466 Yqai Hypothetical protein Yqai Coggill P anon pdb_2dsm Domain This hypothetical protein is expressed in bacteria, particularly Bacillus subtilis. It forms a homo-dimer, with each monomer containing an alpha helix and four beta strands. 24.80 24.80 25.70 26.00 22.10 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.35 0.72 -3.97 2 32 2009-01-15 18:05:59 2007-02-23 18:12:33 5 1 25 2 4 31 0 63.30 35 82.67 CHANGED MlENPMVhpNh......pc.s.hDh.I-.hhGsElhPsD-all.ssGEllLRENl.cYhhpQLGFEhKsAt. .............t....................p...pEs-h.h-DhaGsElhssDcYll..ss.GEllLc-NLpcYLhEphGhchp.............. 0 1 3 3 +9299 PF09467 Yopt Hypothetical protein Yopt Coggill P anon pdb_2dlb Domain This hypothetical protein is expressed in bacteria, particularly Bacillus subtilis. It forms homo-dimers, with each monomer consisting of one alpha helix and three beta strands. 25.50 25.50 25.50 46.40 22.20 25.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.42 0.72 -4.13 2 7 2009-01-15 18:05:59 2007-02-23 18:13:57 5 1 7 2 2 5 0 70.40 65 95.73 CHANGED AGYLNNIALNLEIVLKNKADSPEVSETLVTRICENLLLSKEVSFLKADGSVENFKLSDMEYEITNTEELPE ..AGYLNNluLpLEIVLKNKAcs.EVSpoLspRlCEpLhlu+EVsFLpADGoVEpFKLsDhEYEIoNTEEl.... 0 1 1 1 +9300 PF09468 RNase_H2-Ydr279 Ydr279p protein family (RNase H2 complex component) Wood V, Chahwan C, Bateman A anon Wood V Family RNases H are enzymes that specifically hydrolyse RNA when annealed to a complementary DNA and are present in all living organisms. In yeast RNase H2 is composed of a complex of three proteins (Rnh2Ap, Ydr279p and Ylr154p), this family represents the homologues of Ydr279p [1]. It is not known whether non yeast proteins in this family fulfil the same function. 23.60 23.60 23.90 23.90 23.20 23.50 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.05 0.70 -5.22 42 345 2009-01-15 18:05:59 2007-02-26 08:50:54 5 5 278 16 229 334 0 283.10 21 86.68 CHANGED s..s+lh.....lLPpssssst......hphlpLssPp.sspsstahh.....ppsplaElpthpt........................s.+Shhls.............................stlhpsuplhhsoshDslFlllshLhptt........................pspFhsl--ll......Dphhs...........thpphh.h.........hshlcpplptls-sh...............Esusc..pha+hsppKllpaLttKspphsph...l.pshp.phhpph...................................................................................th.pphhp.thhphuhshlsp.h.pphhpplhpt.hh............-ht.hpthhtt.................hppc+th.psh-uthsps...........pppppp.pttpKpsp...opst+....shtphsssGhp..plsuFF ...............................................t...hh.....lhs..p..t.t.........hphlpL....pPp..pu........psshalh.............p.pps..laElpthpt.........................th+SWhls.....................................................s.VtpsupLhhsTPhDslFllLs...hLhpsp................................ptpFhsl--ll.........Dp.hs............................thpplh.....................hthhcptlppls-sh....................................-.h..usp.....pha+hscpKhlpaLtpKspphs......ps....L.ts.p......phst.t...........................................................................................t..tp...p..hl..ph...Ahshlsphh.tp.....hh.ppl.hp.h....................t.t....t...................................p.p..........ptt...phs....................tp.tt...tt.ttpptt.......stt.c....th.t.ph.stpG..ht..plssFF.......................................................................................... 0 78 128 191 +9301 PF09469 Cobl Cordon-bleu ubiquitin-like domain Coggill P, Bateman A anon pdb_2daj Domain The Cordon-bleu protein domain is highly conserved among vertebrates. The sequence contains three repeated lysine, arginine, and proline-rich regions, the KKRAP motif. The exact function of the protein is unknown but it is thought to be involved in mid-brain neural tube closure. It is expressed specifically in the node [1]. This domain has a ubiquitin-like fold. 25.00 25.00 27.70 26.80 23.30 18.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.68 0.72 -4.01 4 145 2012-10-03 10:59:06 2007-02-26 12:46:05 5 5 39 1 57 123 0 79.10 57 8.93 CHANGED PsICpKCEFsPtHVlLL+DshupEEL-LsKSLs-LGIKELYAaDsp.............+EopphSSstss.o-KEKKuFLGFF+hsK+pps ......PlICuKCEhsPtHslLL+D..utE.L-LoKSLN-LGl+ELYAhDsp....................REohp.hS.....p...ss...p-KEpKtFhuFFptsK+pp.p....... 0 2 6 22 +9302 PF09470 Telethonin Telethonin protein Coggill P anon pdb_2f8v Domain Telethonin is a 167-residue protein which complexes with the large muscle protein, titin. The very N-terminus of titin, composed of two immunoglobulin-like (Ig) domains, referred to as Z1 and Z2, interacts with the N-terminal region (residues 1-53) of telethonin, mediating the antiparallel assembly of two Z1Z2 domains. The C-terminus of the telethonin appears to induce dimerisation of this 2:1 titin/telethonin structure which thus forms a complex necessary for myofibril assembly and maintenance of the intact Z-disk of skeletal and cardiac muscles [1]. 25.00 25.00 41.90 41.80 19.00 17.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.02 0.71 -4.79 3 40 2009-09-11 00:10:22 2007-02-26 13:39:14 5 1 34 3 25 38 0 162.40 66 97.71 CHANGED MATSELSCpVSEENCERREAFWAEWKDLTLSTRPEEGCSLHEEDTQRHETYHRQGQCQALVQRSPWLVMRMGILGRGLQEYQLPYQRVLPLPIFTPAKVGATKEEREETPIQLQELLALETALGGQCVDRQDVAEITKQLPPVVPVSKPGsLRRSLSRSMSQEAQRG .......MAToELSCpVSEENpERREAFWAEWKDLTLSTRPEE..........GCSLHEEDopR+ETYHpQGQCQuLVQR.SPWLVMRMGILGRGLQEYQ.L.PYQRVLPLPIFTPs.KlGssKE.ERE-TPlQLpELLALETAL....G.....GQClDRQ-VAEITKQLPPVV.PV....S.....K.....P.GsLRRoLSRSMSQEAQRG................. 0 1 5 10 +9303 PF09471 Peptidase_M64 IgA Peptidase M64 Mistry J anon Rawlings N Family This is a family of highly selective metallo-endopeptidases. The primary structure of the Clostridium ramosum IgA proteinase shows no significant overall similarity to any other known metallo-endopeptidase [1]. 20.10 20.10 20.10 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.58 0.70 -4.76 14 251 2012-10-03 04:41:15 2007-05-02 11:06:12 5 20 205 8 66 360 113 241.70 28 47.19 CHANGED apso--upcsstshppohphPssstssplshhph.tstpph.p.hphllc...P...c-.hhspstssssss.....lh.lhcoGs.spp+lDlllhG-GYTssEhscFhpDAp+hhcslFu.ppPa+pa+scFNVWulsssSp-S.........GV..upPpsshh+cohlGupassas..ERhltsscs+shppss...........u...hsPY-hl.......hllsNscpYGGuGhhsh........auslsucss.hushlhlHEhGHuhuuLADEYhsusssh.s......EPac.NlTs..sspth....KW .............................................................................................................................t..h..........ts.hhp.t.t.tp........hhhlhpsus.sscplclsllu-GY..Tt....s..E...h...s...p...Fhp.D..A..ppth.-.sLF......u...tE.Pa+oh+stFNlhAVtssSp-S...........................Gl.......s.s......t.....p....s......h.h.....+..sTul.s..opF....s....s.ah....s...-R.h...Lss...s..phcshc.ssh............s......sssa.-.tl...............llLsNo.s...p...YG.....GuGhhss......................ashs.o.s..pps....h....hp.VllHE.h.G.HoFuGLuDEYhh.spss.h.th............thEPhc.Nlosh....s.s.t......KW............................................. 1 30 54 64 +9304 PF09472 MtrF Tetrahydromethanopterin S-methyltransferase, F subunit (MtrF) TIGRFAMs, Coggill P anon TIGRFAMs Family Many archaea have evolved energy-yielding pathways marked by one-carbon biochemistry featuring novel cofactors and enzymes. This domain is mostly found in MtrF, where it covers the entire length of the protein. This polypeptide is one of eight subunits of the N5-methyltetrahydromethanopterin: coenzyme M methyltransferase complex found in methanogenic archaea. This is a membrane-associated enzyme complex that uses methyl-transfer reactions to drive a sodium-ion pump. MtrF itself is involved in the transfer of the methyl group from N5-methyltetrahydromethanopterin to coenzyme M. Subsequently, methane is produced by two-electron reduction of the methyl moiety in methyl-coenzyme M by another enzyme, methyl-coenzyme M reductase. In some organisms this domain is found at the C terminal region of what appears to be a fusion of the MtrA and MtrF proteins. The function of these proteins is unknown, though it is likely that they are involved in C1 metabolism. 21.00 21.00 21.40 21.30 20.50 19.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.77 0.72 -4.73 9 59 2009-01-15 18:05:59 2007-05-02 16:24:54 5 2 54 0 38 58 10 63.30 32 52.29 CHANGED slshsscPphsuIpshV-sl+Y+uQLlARspKLsSGltusshhGhslGhlhAllhlllP.hlhh .............l.hsspPphsuIcshlEslcY+spLluRct+LsSGltssslhGhshGhlhAllLlslslhlh.h.. 0 8 24 32 +9306 PF09474 Type_III_YscX type_III_YscX; Type III secretion system YscX (type_III_YscX) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are encoded within bacterial type III secretion gene clusters. Among all species with type III secretion, those with this protein are found among those that target animal rather than plant cells. The member of this family in Yersinia was shown by mutation to be required for type III secretion of Yops effector proteins and therefore is believed to be part of the secretion machinery. 20.50 20.50 21.20 35.80 20.00 19.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.50 0.71 -4.09 6 77 2009-01-15 18:05:59 2007-05-02 16:41:22 5 1 74 0 10 33 0 121.50 53 99.75 CHANGED MSR.IoAhHIGIEpLotlShEElssuLPcRatLLPDGQuVETHlp+LYPtp.u-QtLhsaApPphsFHsLLRPcDaRQthcs....LppLLppssospLptAusLLpppppD-RLLQMALNLLHKV .........MSR.loAhHIGIEpLotlShE-lsssLP-RatLhPDGpsVEsHLc+LYPttpocptLhDaApPphsFHsLLRPpDaRpphcs....LppLLspu.pSsp......LpAAAsLLpspQpD-RLLQhsLNLLHKV........................ 2 2 4 5 +9307 PF09475 Dot_icm_IcmQ dot_icm_IcmQ; Dot/Icm secretion system protein (dot_icm_IcmQ) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are the IcmQ component of Dot/Icm secretion systems, as found in the obligate intracellular pathogens Legionella pneumophila and Coxiella burnetii. While this system resembles type IV secretion systems and has been called a form of type IV, the literature now seems to favour calling this the Dot/Icm system. This protein was shown to be essential for translocation. 25.00 25.00 27.00 140.90 18.10 16.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.10 0.71 -5.04 21 62 2009-01-15 18:05:59 2007-05-02 16:43:40 5 1 51 3 3 55 3 178.20 57 91.04 CHANGED AILKALD-AIEpGPWEESNFLRVIGKNL+EIR-sFuspluss..sptps+ptoph.AsRlALRoGQQElFIuLYSo-GsNlQoWERIlANLPRQMISRPIYAsEpDV+slIKoKENKlNEAYVAIYIsQsDILslssDKsPhDKhGKPLLoLKD+ulsLENIsRFVHhSGlY+Ys+GR...LlKs ..ILKALs-AIcpGPW-cSNFLRVIGKpLhtIRDpFhcpluus..sps..cl+.s-.upL.ANRlALRSGQQElaVSLYSSDGoNLQSWE+IlusLPRQMISRPIYA-E-DlKsllKoKENK.NEAYVAIYIsQoDILplosDKsPsDKLG+PLLTLKD+oIsLENIoRFVHlSGVY+YspGRLIKp....... 0 2 2 3 +9308 PF09476 Pilus_CpaD pilus_cpaD; Pilus_cpaD; Pilus biogenesis CpaD protein (pilus_cpaD) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry consist of a pilus biogenesis protein, CpaD, from Caulobacter, and homologues in other bacteria, including three in the root nodule bacterium Bradyrhizobium japonicum. The molecular function of the homologues is not known. 30.10 30.10 30.50 30.80 29.30 30.00 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.32 0.71 -4.84 38 190 2009-01-15 18:05:59 2007-05-02 16:43:56 5 2 168 0 79 185 16 181.00 27 84.57 CHANGED hllshshsLuuCsssss.......t..spshcppHPIsVpcsspslsl.lssstsuLossp+spltuahppatppussslhlpsPu.ss.st.s..Assshp-lpphLtptGlssspltstshtsssspssuslRlsas+hsA.psssCGtWscshs.sh.....hcNpsh.NaGCAspsNLAAMVANPpDLlpsRshosssssR.Rspsl...-pYRpu ............................h..hhhshhLuuCstt.............s..shptp+slslppsppslpl.lt.ttttLsssppspltshhtphhp..t..usshlhl.h....Ps.st.....t..utpshtpltphlsttGlsspplhhts.....st........s.t........ssu......s.lRlsa.phpA.hs.s.sCu.thspc..hs.ss..........hpNcshhNFGCAspsNLAAMlAsPtDLltPRsh.sssDusp.pssslcpYRp............... 0 21 40 55 +9309 PF09477 Type_III_YscG type_III_yscG; Type_III_yscG; Bacterial type II secretion system chaperone protein (type_III_yscG) TIGRFAMs, Coggill P anon TIGRFAMs Family YscG is a molecular chaperone for YscE, where both are part of the type III secretion system that in Yersinia is designated Ysc (Yersinia secretion). The secretion system delivers effector proteins, designated Yops (Yersinia outer proteins), in Yersinia. This entry consists of YscG from Yersinia and functionally equivalent type III secretion proteins in other species: e.g. AscG in Aeromonas and LscG in Photorhabdus luminescens. 26.10 26.10 26.30 44.40 25.90 26.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.45 0.71 -4.01 7 84 2009-01-15 18:05:59 2007-05-03 10:10:21 5 1 65 4 9 55 1 110.50 55 99.54 CHANGED MphpL+t.LAElALhGoGpHpHpEAssIA-WLtppsp.pEsVsLIRlSSLMNpGcYppA..L..stshshPDLEPWhALCEa+LGLtutL-pRLstLuuSusPplpsFssGhRpQlps MchcLpp.LAElALhGTGpHCHpEAssIA-WLcttuc..-EsVpLIRlSSLMNRGcYppA..Lt.utppsaPDLEPWhALCEa+LGLtuAL-sRLttLutpts.t.tsFssuhpppl+s............... 0 1 3 4 +9310 PF09478 CBM49 Carbohydrate binding domain CBM49 Mistry J, Urbanowicz B anon Pfam-B_6310 (release 21.0) Domain This domain is found at the C terminal of cellulases and in vitro binding studies have shown it to binds to crystalline cellulose [1]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.82 0.72 -3.88 28 310 2012-10-01 21:34:18 2007-05-03 10:13:48 5 20 40 0 245 346 0 82.10 25 20.10 CHANGED lslpQplTuoWhsssp.sY.hpassslsNpu...spslpslplpls.pl.....ssIWGlsp..usssYshPuWls...oLssGpohsFsYIp....s ..............................tlppphhsoWh...p...s...sp...sa.hpaslplpNpu...spslps....l......plshs...slt............sslWs..ls......p.....ss.....s.......t..........a....s....L...P..u..aht....sls.s.Gp.oa.s.FGYI..ps................. 1 165 230 238 +9311 PF09479 Flg_new flg_new; Listeria-Bacteroides repeat domain (List_Bact_rpt) TIGRFAMs, Coggill P anon TIGRFAMs Family This model describes a conserved core region of about 43 residues, which occurs in at least two families of tandem repeats. These include 78-residue repeats which occur from 2 to 15 times in some proteins of Bacteroides forsythus ATCC 43037, and 70-residue repeats found in families of internalins of Listeria species. Single copies are found in proteins of Fibrobacter succinogenes, Geobacter sulfurreducens, and a few other bacteria. 20.70 14.00 20.70 14.00 20.60 13.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -8.25 0.72 -3.92 74 3687 2009-01-15 18:05:59 2007-05-03 10:35:46 5 395 401 4 433 3605 541 42.10 41 10.26 CHANGED PT..+...sG.YoFsG.WYssp.ssGspasas..ssh...spslTLYApastss .............Ps...K....pG..YTFpG.W...Y.....s..s....p....ss........G.......s....c......a....sFu............Tshh.........ssss....l....T........LYApashs.s........................... 0 247 380 413 +9312 PF09480 PrgH Type III secretion system protein PrgH-EprH (PrgH) TIGRFAMs, Coggill P anon TIGRFAMs Family In Salmonella, the gene encoding this protein is part of a four-gene operon PrgHIJK, while in other organisms it is found in type III secretion operons. PrgH has been shown to be required for type III secretion and is a structural component of the needle complex, which is the core component of type III secretion systems. 20.40 20.40 20.40 21.90 20.20 20.30 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.93 0.70 -5.60 8 407 2009-01-15 18:05:59 2007-05-03 11:43:20 5 1 334 38 14 205 0 296.60 47 95.75 CHANGED hhlRlLsGPLpGsEahL.sGpsLhllGpssuhspsupss-........hPtsTlalPhspGssNFclhls..............tssschhLchLs-pssppc.lsaNpshpsGslthsl+.tsEsWpsp...................ssht.susspspsRhtsuhhsslsshhhLuslhshtlhhhsoppc.QltpLsslLssssppaplLsGcDGplYlLAsopRDusWu+QuLh+sphspsVplls.ssEppRIppaLscphPtLsha+LcLscPppPhLh.LS+pRsuhspsthc+L.ttLhshhPYAcslslsshsDssltppAcpGLschulsYcchscssuVTFlIpuuLsDsEltclppal-sah+pWGupYVpFuIpL+-DaL+GKSaphGssGYVhlssuHWYF ..............................................................................lGpssuLstSsths-..........lPtsohhlPhschsssFEI.ls.................................sssh.lhh+.ELpttpsEs...RolpLNp.IpVhtLhhhl+.csEsW..p...................PthlEss...hhpsc.+Fhsuhl.....hth.s..hh..FhLhhhhh.hthhhhsusp........c..phtplsplLstppptaplL.GcD.p.h.lYl.sp..c-slWs+QsLt+sp.sKssRVIN.s-EshR.I.oWLss.aPpLtYa+Lph.-spp.hhh.lS+Q...RNshopp.l-sL.ptLhthMPYAsslsIslhDDsshtuQA.tsLpt.uLsYp+hpptssVhFsI.GsLsDsEl.+hppaVDpYY+pWGtpYVpFslpLKsp.hpspSFpYGspt.a.K.psupWhF.................................. 0 3 6 9 +9313 PF09481 CRISPR_Cse1 CRISPR_cse1; CRISPR-associated protein Cse1 (CRISPR_cse1) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry, represented by CT1972 from Chlorobaculum tepidum, is found in the CRISPR/Cas subtype Ecoli regions of many bacteria (most of which are mesophiles), and not in Archaea. It is designated Cse1. 21.90 21.90 22.30 22.30 21.00 21.80 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.69 0.70 -5.74 55 645 2009-01-15 18:05:59 2007-05-03 12:04:30 5 2 613 4 118 467 12 456.40 33 89.10 CHANGED NLlc-PWIPVhht.....sGs..pt.plulpplh.....pssclhslshspPshpsAlhc..lLlul...lpss...............................tsP....cs.tc..cWtchapp...........shssctlpsaLpc..apcpFpLhssp...PFhQssth............pscstslsp...............Llh-h...suusshhhh.sahscpuhsph...ssupAAhsLlshpsausu...Gh+suhp...............GuGPlssls.sh...........tGpsLacTLhLNllstp..............thssss............hssWpts.spsspptspthh.........sshhphahhhsR+lcLh....spsst.............spsshhstGh...shsssh............h.t.cPhssa+............hppppshhshc...hpsuphhWRshsuLlhspssss....ptt........stllchhtphttpth..................................hplhshGhchDs.pA.ph.hphptshlslssth.........pppthtpplpphlptAcphtptLcstlpphhtst..............ps-hshhst.............tpaWppsEstFhp.hlpslst...........ttsppttttWpppl....pphuhpha-p .........................................LLssPWlPVRhc...................DGs........ss.clu.h-L.......ss.cslhc.lAusRsDhp.sAsap..hLLulLQsu..............................................huP.....cc..h.....cpW.c-lWpc...........ulps-tlcchLts..h..c.ctFphss-s..ssFMQsh.................p.hpscp......sslus......................LLs-h......PGupssch...-ah..hc+Gssch.....hsspsuAlALaohQh.A.Puh.GtGh+o.GlR.....................GGGPhTTLltl............pt.s.s.Lac..pL....WLNVhsp-........tthshstphcst............lhPWhss...s...csuchsusshs..........ttsshlpsYashPRRIRlc.....tphpsGsss............................................hsstpsstLlshhhhcsaussa............hW.p.HPhTPaR...............h.hKpss....tahsl+....spPGuhhWRcahuLltsspscs..........spp...................AtVlchhsshsh.ppht....................................lsLhuhG.h-h-s.hcu.psa..h-c.chslL...............tpcshhsplctssp....sA.s.c.l.h.....s.h.L...Rs.Al+-shhus....................pss+uDhuhlch........................cFWspopt...pFhp.llcplcps...........pcsc.-.hlspWp+pl....hhhspp.F-............................................................................................. 0 34 85 102 +9314 PF09482 OrgA_MxiK Bacterial type III secretion apparatus protein (OrgA_MxiK) TIGRFAMs, Coggill P anon TIGRFAMs Family This protein is encoded by genes which are found in type III secretion operons, and has been shown to be essential for the invasion phenotype in Salmonella and a component of the secretion apparatus. The protein is known as OrgA in Salmonella due to its oxygen-dependent expression pattern in which low-oxygen levels up-regulate the gene. In Shigella the gene is called MxiK and has been shown to be essential for the proper assembly of the needle complex, which is the core component of type III secretion systems. 25.00 25.00 26.10 25.90 23.10 22.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.06 0.71 -4.64 7 353 2009-01-15 18:05:59 2007-05-03 12:05:50 5 1 334 0 15 157 0 136.90 46 92.30 CHANGED lhplhacPloYlHssRhslssthhss.stRslsNchllstYcLss-ht....tsshhthalspWphLPplAhLhGsphhRspLsppGhhlpLsshspsFh.Ah.lssp.puu..pthss.ttll.......ssGhhpLluhhcphP.ultQRhsLLFPshh-cshs......hPstpsL.h.p.shpaAp+Hsph.ss .................tlhhtPh.Yhp.ph...ss......s..t.hhN.hll.tapLp...s.t........ss.h.hhlppWphhP.hshh.GsphhRtthscpGhhhtlsshh..p..saL..u.h..ths..psp.....sths.s..p.ll.......ssGhupL.sh.c..P.uhtQRFsLLFPsFl.-chph.........hslt.ol..L..ls.h.htpphsphs.h........................ 0 4 7 10 +9315 PF09483 HpaP Type III secretion protein (HpaP) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are always found in type III secretion operons, although their function in the processes of secretion and virulence is unclear. Hpa stands for Hrp-associated gene, where Hrp stands for hypersensitivity response and virulence. see also PMID:18584024 23.30 23.30 25.40 25.40 23.10 23.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.24 0.71 -4.34 11 81 2012-10-01 19:58:36 2007-05-03 13:00:00 5 2 68 0 15 85 3 190.60 33 77.85 CHANGED sPss....PA........cssppuhstsphhpsuhpsuPsspshPsps........h....sps.psAsss......sssPps.....pssscs.pstsDu.stpshutstsuc.......h.pspchsthlcplAtclAtFCussslhtuGp.WplplslDPslLPposLtLsLS.apLoLRF.....cosss-oRcLlspHtssL+tpLcs....thsusRslpI-Vs ........................................t...s.PA..........pupcpsF.hsphhpcu.h.ph.uPsstshPsss................h....spscc.sAsss......sssP.h........sstcs.ts..Ds.sptuhuRshAut....................htppp+hshhlctLAtclAtFCuusAlhsuGp.WEspLslDsplLPpTsLaLtLS.apLuLRF.....-sscs-oRcLLhsHpstLctpLcs....thusshsIpl-V.................. 0 3 7 10 +9316 PF09484 Cas_TM1802 cas_TM1802; CRISPR-associated protein TM1802 (cas_TM1802) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This minor cas protein is found in at least five prokaryotic genomes: Methanosarcina mazei, Sulfurihydrogenibium azorense, Thermotoga maritima, Carboxydothermus hydrogenoformans, and Dictyoglomus thermophilum, the first of which is archaeal while the rest are bacterial. 22.50 22.50 23.10 23.40 22.00 22.40 hmmbuild -o /dev/null HMM SEED 593 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -13.15 0.70 -6.03 16 118 2009-01-15 18:05:59 2007-05-03 13:06:51 5 2 108 0 59 118 6 501.70 18 87.54 CHANGED MlpulhplGchhlp..scutpphph..........l.p.sptushtplLhlshsscpsplp..hhht-EhspcshpcaLahspussssu...hssTsphs.......................c.hcphtpKlpphhpph...h..............t+.c.hlp....................................................slcphlpcphcphlp-h.......shcppcsslholph...sschhhsp..aptahcthlpthppphh.....tKps+ppul.CplCscp.stlhuph.p...aKaYTsDK.sFss.shsppstaKshslCpcChhpL.tGcpalcppLssphh.uhc..halIPphlh........phcp.lccIhcphpphpshp......tslpppE--Ihphlpppcp...hhhshLF.acpsp..usa+IlhhIcDlsPSRhccIhcttc+spptht...................................phsLspIhhhh.......pp.spsspsKpaLplhsuIFsG+.lshchLlsphhptlRppah...............cstththhshpuhhlltFLpclshLp.......tshphcpspht.......pp-lcpFh....pphhpss.KpAlFLLGlLhscllphQa.........tcppsKsalsK..Lph.thsppclhplhs-lpsKLppY.......tt...ppY.hcplhtphtchhhpuhssWpLopDEhsFYhl.Ghohup ..................................................................................................hlt.h..lGp...t...t........................................ph.hhhhlth...tt.tth..........cpht.tpphtphh....a......httsss.ss...h...s..s.pht................................p..pth.thl.phhp.h...................................p.t.hhp....................................................ph.....phhtpt..hpphhpc..................t..tpp....h.hh.slth.......ssp....h.s....h..ahphh...ht.httt.........ttpt....tpth..CtlCspp.ptlhspht........apaaohD+.sahs.thp....ppphh..+shslCtpCh.tl.tGcpal.........pp.p.........L.phphh..uhp......hhllPphhh...........pp.ht..plh..pth..pphh..pht..........ph.t.cc..clhp.hp...p.pp.......h.hshlF.hpptp...st.+lhhhlpDV.PS+hpplh..pth.pp..hpphh.....................................phshttlh.hh...............tpt...p.h.Lp....lhpt....lhptp.lshphlhp..hhpth.....p..hh.......................pt...h.........hh...p.h.hhh.alpphshlp.....................t..h...t.......................p.hppah....tth.h.ss.cpu.hFLLGsLhstlhphQh................ppptspPhhpp..l.phht.hsppclhplhsclhsKlppY........pt......htth....hctl.htph..pphh...p..t.......t....pa.phs.pEh.FYhh.Ghsht........................................................... 1 23 43 53 +9317 PF09485 CRISPR_Cse2 CRISPR_cse2; CRISPR-associated protein Cse2 (CRISPR_cse2) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family of proteins, represented by CT1973 from Chlorobaculum tepidum, is encoded by genes found in the CRISPR/Cas subtype Ecoli regions of many bacteria (most of which are mesophiles), and not in Archaea. It is designated Cse2. 22.10 22.10 23.40 22.80 20.40 20.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.94 0.71 -4.15 58 642 2009-01-15 18:05:59 2007-05-03 13:07:14 5 4 627 2 120 399 12 146.00 32 78.32 CHANGED uspApLRRutshsss...........h..ha........hhp..h..t............ptpppshhhlAulhAttppt.....................................ps.ut.....slGpsht.hhttstt.............sss-tRFppLlpus..........hcplhppLRpllphlc..sp....lsaspLscsLhtWpppp..............ptlptcWupDYa ...........................................................Gt+AsLRRusssscs..........phscua.......Lhhpstp.hh.pt...............phc.hsAlsls..AulhAphcthcc...............................................................ppshus.......pLuts............................hsc.tRFs+Lhssc.........ss--LlcpLRRhVcLLs....us........lshssLA-slapWspcpp............s-hlRlRWAh-YY................. 0 34 87 105 +9318 PF09486 HrpB7 Bacterial type III secretion protein (HrpB7) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow range of species including Xanthomonas, Burkholderia and Ralstonia. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.89 0.71 -4.30 6 92 2012-10-01 21:16:01 2007-05-03 13:20:05 5 1 73 0 16 91 1 149.70 35 85.75 CHANGED RRspsacsLlpt+sRcscRlpscLustRstLppssstLtpspspscApsscLssasuplDshsusGushsl-shLppccaRssLh-pputAEpttstAptuLputtspluuhppcluR.cApl-lhs-+tcplcRAt-AttEsAp-EEs.EAlluhRh ...............cpttsapsLhth+spctcRhptclpshRtthptstttlspsptpsctppsthsthstplsshss.ssushs..lsshhtpccaRssLh-cpttAEpppAthcsslputtcpluuspppluR.pApl-lsc-+hcplcRut-AttEsAp-EEh.EullAtRh............ 0 3 6 10 +9319 PF09487 HrpB2 Bacterial type III secretion protein (HrpB2) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow group of species including Xanthomonas, Burkholderia and Ralstonia. 25.00 25.00 51.70 51.20 21.90 20.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.47 0.71 -3.90 9 93 2009-01-15 18:05:59 2007-05-03 13:24:07 5 1 73 0 16 60 1 116.90 38 89.29 CHANGED ussosAhsPsss.....sstpLss+FpALMpsusshPsuhptscs.Shlu+lVtppDsulRpsssclthhsppAsphohp-l..sAtslclphEhsuhphchpsphuVspSuKsAlpTLMKNQ ............................hss.AhsssAs.....sstcLssRFpALMpsAsstPsttppsts.Sslu+lVsspDstlRpss-clsshshpAsphohpEh..sApsl+lphElsuhphchpsphuVspSuKsAlpTLMKNQ 0 3 7 10 +9320 PF09488 Osmo_MPGsynth osmo_MPGsynth; Mannosyl-3-phosphoglycerate synthase (osmo_MPGsynth) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of examples of mannosyl-3-phosphoglycerate synthase (MPGS), which together with mannosyl-3-phosphoglycerate phosphatase (MPGP) EC:2.4.1.217, comprises a two-step pathway for mannosylglycerate biosynthesis. Mannosylglycerate is a compatible solute that tends to be restricted to extreme thermophiles of archaea and bacteria. Note that in Rhodothermus marinus, this pathway is one of two; the other is condensation of GDP-mannose with D-glycerate by mannosylglycerate synthase. 20.20 20.20 21.50 127.70 19.30 18.70 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.23 0.70 -5.78 15 56 2009-01-15 18:05:59 2007-05-03 13:38:18 5 4 53 12 32 53 9 385.70 44 87.68 CHANGED lEhPp.sEpFGuVpIH.-VQ+VlE..LDoG........s.ttpssslpslspccLpclhpchAIVVPsKsEcl+LLEGVLpGIPH-ChlIlVSNSpRps.hDca+hEh-hlcpFsphTc+pllllHQKDPsLApAFp-sGYsslLsscuh.....VRsGKuEGMIlGhLLAKh..hG+cYlGFlDADNYlPGAVpEYV+tYAAGFsMu..............cSPYoMVRlpW+aKPKVscss.lYFcKaGRVSElTN+aLNtLlSshouFET-..........lIpTGNAGEHAMohcLAcpLsauoGYulEPYchV.LLEcFGshhts.tps-shppG..................................................lEIhQIEThNPHlHE.-KGs-H.lpcMlhsSLusIYH..................................S+Lss.........ctl+pcILc-L.....ptpshlspt.--PPpPhlhPPltplDlctFtchlc ........................h.EhPphsEhFGsVcIa.-lQ+Vlc..LDos...............sttsts.slpslsppslpcllpchAIVVPhKsEcL+Ll-GVLpuIPHcChIIlVSNSpRt...-pa+hEhDhlccFsphTc+phlhlHQKDPuLApAFpcsGYsclLs.csGh...........VRsGKuEGMllGllLA+h..hG+cYVGFlDADNYlPGAVpEYl+sYAAGFtMu...................cosYoMVRlpW+aKPKlspsp..laF++aGRVSElTN+aLNpLloshouFE..Ts........................llpTGNAGEHAMohcLA.hLsauoGYulEPachV.LLEpaGthhs...ttp-shppG..................................................VEIhQIEThNPHhHE.sKGc-H.lccMlhsuLuoIYH..................................ScLss.........-pl+pclLc-L.....ptp..shlpps.cEPPpPhlhPPlpplDlctFhchl............. 1 7 18 24 +9321 PF09489 CbtB Probable cobalt transporter subunit (CbtB) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of proteins which have been proposed to act as cobalt transporters acting in concert with vitamin B12 biosynthesis systems. Evidence for this assignment includes 1) prediction of a single transmembrane segment and a C-terminal histidine-rich motif likely to be a metal-binding site, 2) positional gene linkage with known B12 biosynthesis genes, 3) upstream proximity of B12 transcriptional regulatory sites, 4) the absence of other known cobalt import systems and 5) the obligate co-localisation with a protein (CbtA) predicted to have five additional transmembrane segments. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.92 0.72 -4.06 20 220 2009-01-15 18:05:59 2007-05-03 14:19:16 5 1 217 0 69 157 46 56.40 39 87.85 CHANGED sspsssssspu.sclhtssuAshLGhsllahsGFuphsslHNAAHDsRHusuFPCH ........................................hs...t.shshs.tplssulsuhlL..G...shL...lYhsG........aup.......s.......sh........lHsuAHDsRH.u.hG.FP.CH 0 12 37 53 +9322 PF09490 CbtA Probable cobalt transporter subunit (CbtA) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of proteins which have been proposed to act as cobalt transporters acting in concert with vitamin B12 biosynthesis systems. Evidence for this assignment includes 1) prediction of five transmembrane segments, 2) positional gene linkage with known B12 biosynthesis genes, 3) upstream proximity of B12 transcriptional regulatory sites, 4) the absence of other known cobalt import systems and 5) the obligate co-localisation with a small protein (CbtB) having a single additional transmembrane segment and a C-terminal histidine-rich motif likely to be a metal-binding site. 22.20 22.20 22.70 24.30 21.40 22.00 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.60 0.70 -5.04 25 417 2009-01-15 18:05:59 2007-05-03 14:20:37 5 1 375 0 137 384 69 223.70 30 92.26 CHANGED Mhp+llhuAlhAGllAGllsshLQhhhlpPlILcAEsYEsush...................sH.tupu................................suuHs..Hstt......s.ht.tsGhp....RshhT....hsuNllsusGaA.LlLsuhhult.t...stsssppGLlWGlAGFsuhpLAPulGLPPElPGssAAD.............................LssRQhWWluTlsuTAlulsLlAFupshhhhhlullLllsPHllGAPp........P-shsussPspLuspFssAuhssuhshWssLGlluuahapR ................................................MhtpllhpuhhAGllAGlLshshthlhspPh...l.pA.saEputs.......................pttutu...............................huscs...ct...............tstcshp....................Rssts....hhusllhusuh....u..Lhhssh.sh.h....h....s........th....ss....ttt.u.l....hhuhuGFhslhllPsLthPsp.Pu..sus.s.-.................................L.s.tR..h...h....Whu.....o.l......s.......u..s...u..h....u...lh.....l...........l.............................................................h.....h.........h.u....l.........s....h...l....l....h...P...p......l.....ht.....s.....Pp............t...h..s.shPusLhhpFtlASlusphlhWsslGLshGhhhp............................................................... 0 27 74 108 +9323 PF09491 RE_AlwI AlwI restriction endonuclease Bateman A anon Bateman A Family This family includes the AlwI (recognises GGATC), Bsp6I (recognises GC^NGC) , BstNBI (recognises GASTC), PleI(recognises GAGTC) and MlyI (recognises GAGTC) restriction endonucleases. 19.70 19.70 20.00 19.90 19.60 19.50 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.48 0.70 -6.00 18 173 2012-10-11 20:44:45 2007-05-03 14:23:23 5 4 152 2 21 155 34 374.30 18 73.57 CHANGED pplGFlhhc.........t...thlplTcsG+tLls..................p...s.pplFL+pLlKaQlssshppp..........h.hpPahhlLcllpcL...........stlohcEhshalhhhhs...ppphcpllsc..........IhpaRppchttpt....................scchapccphpt..................................ptuhcpphcspt.............................................................shpDYsDsshRah+hTGlhshp....upG+hlplsppcp.cpl-hllpshpp....h......s.ppahphhspsststLsh-......shtpphlpplsthhsphspphtltpht..h...........shsp-plccplpclpc......pKhhpc..........hhplLp.htch.pt................tptPshhEahhhhuhh.tlssththh....NhphDs-shPhssAsG..stsDI.h.......ascathllEVTLscuppQp...phEupPVsRHlschtpch.....scshaslFlAPpIcssohppFhht.h.........t.ttthtIhPhslppFhph ................................................hGhh..t...................h..lot.Gphhlp.....................ph.p.haLptlhphphsssh................h.Phhh..llplltch...........t.l.shc-h.hhhhhh........ppphptlhpp..........IhphRpph.t...p.........................pp.h.pt.ht..............................................................................tp.hph.t.......................................................................................................hp-h.D...hRhhphTslhph.......t.sphh..h....t....hp.l.lpphtt.................h....t......l.hp...........h.......p...................................ph..p.h.p.h..hhp......tp..pp..........hhphl...................................t.P.hhhEah.hhhhh..h.t..........shph-sphhPh.pAsG.......s......sDl.h.........tph..hlEsoh.ptppQ....thEh.sl.RHh..hphh.p........h..hslFlAs.lptsshp.h.....................thtIhshth.ph...h........................................................................................................................................................................................................... 0 7 17 18 +9324 PF09492 Pec_lyase pec_lyase; Pectic acid lyase TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are isozymes of pectate lyase (EC:4.2.2.2), also called polygalacturonic transeliminase and alpha-1,4-D-endopolygalacturonic acid lyase. 19.80 19.80 19.80 20.10 19.70 19.70 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.00 0.70 -5.24 51 198 2012-10-03 02:33:51 2007-05-03 14:24:54 5 14 122 5 65 232 27 282.70 34 61.02 CHANGED QpssGGWsKN......hDhstshsttpttth...............httttp...puTIDNsATssplpaLA+lapts........pst...+a+sAhh+Gl-aLLsAQYs.NGGWPQ...................aYPh.ppu.......YpsaITaNDsAMlsVlplLc-lupspssa.t..hls.psppt+uppAlp+Gl-sIL+sQl.h...sGp..hTsWsAQHDppTLpPstARuYE.sSLousESsuIlcaLMsl.pPo...scllpAlcuAlsWhc..ps+lsGh+h.phts............p.hhltc..susslWARFY-lsTN...................+PlFssRDuh.......h+asls-lptERRsGYuWasshspcll ..................................QpssGGWsKs.......hchssthstpphtph.....................hptttph....puTlDNsATssplpaLuclYpto........psp.......+Y+sAhh+GlcaLL.puQYs....N......GGWPQ............................haPh.tts.........................YpspITaND.s.AMsp..lhplLpplhpt.pssat.................hls.tthp....t+...sppAhs+Gl-sILcsQlh....s..Gp...hTsW.stQaDph.TL......pPstARuYEhsSluusESssllphLMslspPo...sclhpulcuAlpWhc..ps+lpGhphpphts.............t.hhl.........s..susslWARFY-lpTs...............................................................+PhFscRDG.h........hphs.ltclstERRsGYuWYss.stpl......................................................... 0 34 57 62 +9325 PF09493 DUF2389 CHP02450_Tryp; Tryptophan-rich protein (DUF2389) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are small hypothetical proteins of 60 to 100 residues from Cyanobacteria and some Proteobacteria. Prochlorococcus marinus strains have two members, other species one only. Interestingly, of the eight most conserved residues, four are aromatic and three are invariant tryptophans. It appears all species that encode this protein can synthesise tryptophan de novo. 25.00 25.00 32.30 32.20 21.00 16.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.12 0.72 -3.70 57 214 2009-01-15 18:05:59 2007-05-03 14:45:43 5 3 201 0 79 203 221 60.10 36 70.83 CHANGED sSKWTuspshs.tp+HFhlsphthcc.ctphlc..hlpsl.scpphplsWp-L+ssspWppGWp .h.SKWTAspshs.tp+HFhVsphp.....tcc.....csshlc..hlpul.spp....phhlsWpp.L+DsshWptGWp. 0 15 45 65 +9326 PF09494 Slx4 Slx4 endonuclease Wood V, Coggill P anon Wood, V Domain The Slx4 protein is a heteromeric structure-specific endonuclease found from fungi to mammals. Slx4 with Slx1 acts as a nuclease on branched DNA substrates, particularly simple-Y, 5'-flap, or replication fork structures by cleaving the strand bearing the 5' non-homologous arm at the branch junction and thus generating ligatable nicked products from 5'-flap or replication fork substrates [1]. 21.30 21.30 21.40 23.30 21.20 20.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.11 0.72 -4.22 57 215 2009-01-15 18:05:59 2007-05-03 15:16:06 5 8 193 0 153 225 0 65.60 27 7.08 CHANGED lhptlpphlc.................ppPs....hac+ILhY-PItLc-LtshLp....tth....clststl+pahDppuIshphp ..........................................................hptlpphlp.................spPs....hac+ILhY-........PI.Lp-LtshLp.............ssl.....clshspl+cahDspsIshph.t................ 0 39 67 117 +9327 PF09495 DUF2462 UPF0390; Protein of unknown function (DUF2462) Wood V, Coggill P anon Wood V Family This protein is highly conserved, but its function is unknown. It can be isolated from HeLa cell nucleoli and is found to be homologous with Leydig cell tumour protein whose function is unknown [1, supplementary Table I]. 21.80 21.80 23.00 22.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.85 0.72 -3.25 16 199 2009-01-15 18:05:59 2007-05-03 15:21:16 5 6 183 0 143 182 0 80.70 31 76.49 CHANGED MAQGthKh..KAKsssssp........+Kpps.++.usRhItPKKtshhptpKLpKhhouulpspsE+.lsp+A.....G+LpLlK...usp+cp ........................MAQGph.K...........Ku+tssssp.........tpppp..u.s..+K.Gs+sIt.PKKs.p..l.h.p.p.pK.lp....Kplosulsp.psE+tlsp+A.....G+LpLlK...ustc..h................... 0 41 69 114 +9328 PF09496 CENP-O Cenp-O; Cenp-O kinetochore centromere component Wood V, Coggill P anon Wood V Family This eukaryotic protein is a component of the inner kinetochore subcomplex of the centromere. It has been shown to be involved in chromosome segregation via regulation of the spindle in both yeast and human [1][2]. 19.60 19.60 19.80 20.00 19.00 18.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.03 0.72 -3.75 28 202 2009-01-15 18:05:59 2007-05-03 15:21:40 5 1 164 2 135 187 0 88.50 31 28.93 CHANGED LGlRh-lhsc.pupFhcPYYllL++hsp.tp.....................hpla+HTlPsaIPlcpltpph.Lss.............................tpp-lppFscclpcpLssaphRpcthptL .........lGlph-l.st.pGpahcsYYllLpp.pt..tt.......................................lpla+HTlPsFI..PLcpltpca.Lss......................................slppFlcplpcpLsuaptR+tthp.L........... 0 25 60 100 +9329 PF09497 Med12 Transcription mediator complex subunit Med12 Wood V, Coggill P anon Wood V Domain Med12 is a negative regulator of the Gli3-dependent sonic hedgehog signalling pathway via its interaction with Gli3 within the RNA polymerase II transcriptional Mediator. A complex is formed between Med12, Med13, CDK8 and CycC which is responsible for suppression of transcription. This subunit forms part of the Kinase section of Mediator [2]. 25.00 25.00 29.80 25.00 24.90 19.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.07 0.72 -3.99 28 294 2009-01-15 18:05:59 2007-05-03 15:21:57 5 9 223 0 201 300 0 62.60 40 3.55 CHANGED sFp.PsRVTLocs++-sWLp-LAssshsLpcLu+p.lPHGh+p+pLl-thh.............................................ppplPlpRAlWhlK ..............Fh...hsshopsphcsWhpDLAs.stPLppL......u+...........p.lPph....++cplhshlh.............................................phsVPlhRAsWLlK.. 0 54 94 155 +9330 PF09498 DUF2388 CHP02448; Protein of unknown function (DUF2388) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of small hypothetical proteins, about 100 amino acids in length. The family includes five members (three in tandem) in Pseudomonas aeruginosa PAO1 and in Pseudomonas putida (strain KT2440), four in Pseudomonas syringae DC3000, and single members in several other Proteobacteria. The function is unknown. 25.00 25.00 26.60 31.10 19.30 18.80 hmmbuild --amino -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.29 0.72 -4.34 32 301 2009-09-10 15:05:37 2007-05-03 15:32:04 5 1 84 0 73 218 5 71.80 49 68.29 CHANGED ulssSstsTossoooh+.DpKlVlsARDDAAoFVASsGsIRGApLEAALpplRpptPpL.pASDhpLApAILuh ...........................s.hhposssTSssoooh+.DsKllhsARDDAASFVASsGsIRGApLEAALpplRppsPph.pASDhpLApAILA.h.. 0 7 14 47 +9331 PF09499 RE_ApaLI ApaLI-like restriction endonuclease Bateman A anon Bateman A Family This family includes R.ApaLI and R.XbaI restriction endonucleases. ApaLI recognises and cleaves the sequence GTGCAC. 25.00 25.00 26.30 25.90 22.80 20.20 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.25 0.71 -4.92 6 327 2012-10-11 20:44:45 2007-05-03 15:36:43 5 1 171 0 9 42 2 56.80 51 84.36 CHANGED slpcpI+hLAppYAscLpppltpRshEMpsDDp.................................................SHYLIY...........RVLGlsh-EGcLIDhYQNpGRFLYKYAGSFLEE..AThLCFKctF........................scu..u+t+VtNTpGp+PKsFEIDCLl.....sspAaEIKWR..DATTDGDHIsKEHTRl+VlpstGYhPlRIMFYYPsRsQAhRIQpTLcTLYpGVGGpYYhGDuAWsaVcccTu .................................................................................................................................................................................................................................................................................................................................................................. 0 6 9 9 +9332 PF09500 YiiD_Cterm yiiD_Cterm; Putative thioesterase (yiiD_Cterm) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry consists of a broadly distributed uncharacterised domain often found as a standalone protein. The member from Shewanella oneidensis is described from crystallography work as a putative thioesterase because it belongs to the HotDog clan of enzymes. About half of the members of this family are fused to an Acetyltransf_1 domain Pfam:PF00583. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.80 0.71 -4.62 47 840 2012-10-02 20:54:35 2007-05-03 15:37:05 5 5 832 4 122 505 74 141.30 61 49.49 CHANGED tppLpphh+ppIPloctMulplhpasspplplsAPLssNlNh+sThFuGSlaoluTLoGWGhlaLpL+-tslp.GcIVlt-upIcYhtPlssc..hApsphs...................shss.lspLpptp+ARlplpsplhsssph.......supFpGpaVsh. ........................CuQLQQAWY-HIPLSEKMGVRI....QQY..T.GQ+FlTTMPE.s...G...N.Q.NPHHTl..........FAGSLFSLA.TL.TG.WGL.IW.Lh.L.RERHLG....G........TI..I..L.A.D.AHIRYS+PIoG+...PpAVADLu...................uLSGDLDRLAR.GR..K.ARV.QhpV-la.G.sE.ss.......GAlFEGsYlVL.P............................... 0 22 48 86 +9333 PF09501 Bac_small_YrzI Bac_small_yrzI; Probable sporulation protein (Bac_small_yrzI) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are very small proteins, about 47 residues each, in the genus Bacillus. Single members are found in Bacillus subtilis and Bacillus halodurans, while arrays of six members in tandem are found in Bacillus cereus and Bacillus anthracis. An EIxxE motif present in most members of this family resembles cleavage sites by the germination protease GPR in a number of small acid-soluble spore proteins (SASP). A role in sporulation is possible. 21.20 21.20 21.90 21.40 21.00 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -8.07 0.72 -4.05 13 422 2009-01-15 18:05:59 2007-05-03 15:43:11 5 1 124 0 34 156 0 46.00 49 98.60 CHANGED MTFplFFLTITIQK+p...hSpsElcpcpQhcplh-Eh+-Rp..spYhs+h .MpF+lFFLTITIQKpp...lSpsEIh+-pQIcphMD-lKERQ..upYho+L......... 0 4 21 22 +9334 PF09502 HrpB4 Bacterial type III secretion protein (HrpB4) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow range of species including Xanthomonas, Burkholderia and Ralstonia. 21.60 21.60 21.80 55.80 21.40 20.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.56 0.70 -5.15 8 89 2009-01-15 18:05:59 2007-05-03 15:43:34 5 1 71 0 15 75 1 211.40 37 94.56 CHANGED ssApstthARhLpcapsplssLs-alD....s.uWLpsssulus...u+usuhRsthphh.......h.+sh.shtGhssPoLssL-stAsR.................LAVLstssLlpVLpuRALhsRsssLRpCI-RspRotLpptlGPtsh.hh.s.tt....-AsptcsustuhsPL......sh-ucshAWl..GaRthppDGsWPssu.lh+llRLAL...Ptu.........u.sPpLsPhAusu...sucphLuALPoLaPE ...........................s.ss.hpttAthLptapsshtphschlc....s.uWhs.sls.uls.....upAsshRsAhphh..........h.csl.....Ghu...tsolsuLtthA.s.R....................LAlLshsphh+VLthRALhhRpstlR+sIDRtphs+LtshV.......Gssssphh...................-AsRtp.......tpst.....shsPL.....sth-A..cslAat..GWphhpsD..ss....hsss...........u.sh..+...llRLuL...Ptu.............s.ssh.LsthA.ucs..............sutthlstLPoLhPE... 0 3 7 10 +9336 PF09504 RE_Bsp6I Bsp6I restriction endonuclease Bateman A anon Bateman A Family This family includes the Bsp6I (recognises and cleaves GC^NGC) restriction endonucleases. 25.00 25.00 29.40 71.10 21.00 24.90 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.21 0.71 -4.82 3 17 2012-10-11 20:44:45 2007-05-03 16:24:33 5 1 17 0 0 10 19 169.30 53 94.55 CHANGED MAYKKFGYIEIDDARI--TCDAYFKWKDLNoYIKNTSSRGINMPDAISEPMGCYCLGYLWNRGoEVGDATDPcTNAKIEFKATSKFDGDLSSFGPKTVFDNLVFLRFNLD-NcLYIYDLsINSE-LcKYPANKTQTIQDQKAhGRRPHVSLQKLFVEA+DlcPDIIFDIRRCRIIEDNR+ .....uDsh...........DKs-FEhlp-lahcWhhhNp+lKSluuRGlNhPDVFSEuLhChAhNhlRosGTuh.hDslhcDTsctVQVKuuS.hssDsTSFGPTSsaDlLYFAcFs.ssc.-hhVhhhKIpsDDlYNlslN..KsETFKDQQAQGRRPRFSIQSsII+uKcLhPltslNI...........ITGc....................... 0 0 0 0 +9337 PF09505 Dimeth_Pyl Dimethylamine methyltransferase (Dimeth_PyL) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of dimethylamine methyltransferases from the genus Methanosarcina. It is found in three nearly identical copies in each of Methanosarcina acetivorans, Methanosarcina barkeri, and Methanosarcina mazei. It is one of a suite of three non-homologous enzymes with a critical UAG-encoded pyrrolysine residue in these species (along with trimethylamine methyltransferase and monomethylamine methyltransferase). It demethylates dimethylamine, leaving monomethylamine, and methylates the prosthetic group of the small corrinoid protein MtbC. The methyl group is then transferred by methylcorrinoid:coenzyme M methyltransferase to coenzyme M. Note that the pyrrolysine residue is variously translated as K or X, or as a stop codon that truncates the sequence. 25.00 25.00 529.60 529.50 19.30 18.40 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.64 0.70 -6.22 3 16 2009-09-13 14:57:56 2007-05-03 16:31:52 5 1 8 0 12 25 14 459.20 77 99.76 CHANGED MATEYALRMGDGKRIYLTKEKIluEIEAGoANAADLGEIPALSuDEM-KLAEILMMPGKAVSVEQGMEVPVTHDIGTIRLDGDQGNSGVGIPSSRLVGCMTHERAFGADTMELGHIDYSFKPVKPVVSNECQAMEVCQQNMIIPLFYGAMPNMGLYYTPDGPFENPGDLMKAFKIsEAWESMEHAADHLTRDTVWVMQKLFASGADGVNFDTTAAAGDGDMYGTLHAVEALRKEFP-MYIEVGMAGEMVLGMHGNLQYDGVTLAGLWPHQQAPLVAKAGANVFGPVVNTNTSKTSAWNLARAVTFIKEAVKASPIPCHVDMGMGVGGIPMLETPPIDAVTRASKAMVEVAGVDGIXIGVGDPLGMPISHIMASGMTGIRAAGDLVARMEFSKNMRIGEAKEYVAKKLsVDsMDLADEHVMRELREELDIGVITSVPGAAKGIAAKMNIEKLLDIKINSCELFRKQl .....MATEYsLRMGDGKRlahoK-+Ihp-lEAGhusAuDLG-IPsLSs-Eh-KLAEILMMPGKsVSVEQGMEVPVTHDIGTlRLDGDQGNSGVGIPSSRLVGCMhHERAFGADTMELGHIDYSFKPVKPVVSNECQAMEVCQQNMIIPLFYGAMPNMGLYYTPDGPFENPGDLMKtFKIpEAhESMEHAA-HLTRDTVWVMQKLFASGADGVNFDTTuAAGDGDMYGTL+AIEALRKEFP-MYIEAGMAGEhVLGMHGsLpYDGVTLAGLWPHQQAPLlAKAGANVFGPVCNTNTSKTSuWNLARAVTFhKAAVcASsIPCHVsMGMGVGGIPMLETPPIDAVTRASKAMVEIAGVDGIOIGVGDPhGMPISHIMASGMTGhRAAGDLVARMpFSKNMRIsEAKEYVAKKLsV-hMDLuDEaVMRELREELDIGlITSVPGAAKGIAAKMNIEKLLDIKINSCpLFRKQl.. 0 4 7 7 +9338 PF09506 Salt_tol_Pase Glucosylglycerol-phosphate phosphatase (Salt_tol_Pase) TIGRFAMs,Coggill P anon TIGRFAMs Family Proteins in this family are glucosylglycerol-phosphate phosphatases, with the gene symbol stpA (Salt Tolerance Protein A). A motif characteristic of acid phosphatases is found, but otherwise this family shows little sequence similarity to other phosphatases. This enzyme acts on the glucosylglycerol phosphate, product of glucosylglycerol phosphate synthase and immediate precursor of the osmoprotectant glucosylglycerol. 25.00 25.00 248.60 248.30 18.50 18.10 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.17 0.70 -5.91 16 45 2009-09-11 09:42:30 2007-05-03 16:32:15 5 1 45 0 16 45 195 380.80 52 93.38 CHANGED pcLLIVQDLDGVCMPLVKDPLTRpl-scYVpAsupLpscFsVLTNGEHEG+RGVNRlVEpALuspppspccGLYLPGLAAGGVQaQDRaGplSHPGVS-sElsFLAtlPp+McshLtppLsslhPpLoscplpphhptAlLDTplSPTINLNuLFull.sDVppQ+pLQthlpclMspLlspApupGLcsSFFLHlAPNLGpctsGpEhlK.AstsDlGTTDIQFML+GAlKEAGLLsLlN+aIup+oGpuPLG-sFNVRsAP+sHpuLLsLC+cpIss-pMPhLVGVGDTVTSph..sssupuWLRGGSDRGFLTLlQcLGppaspsNRVVhVDSSpGEV.sRPSlpDssLpGIS..........DP-DPL+FDllhsuGPcpYlsWFppLAp .pNLLIlQDLDGVCMsLVKDPLTRpl-scYVpAspphpscFhVLTNGEHtG+RGVNRlVEpALGssshsppcGLYLPGLAAGGVQaQDRaGplSHPGVScsElsFLAtVPpthcstLpphlsshhPpLus-plpptlptoVLDspsSPTlNLNuLFsllpc........-sphhppLQphhppLMspLltcAtupGLtsSFFLHlAPNLG+Dpp...GpEhlK.AptsDsGTTDhQFML+GAlKEAGlLsLLN+ahtpRTGphPLGcsFNsRsAPpsHpsLLcLs+pplsscpMPhlVGVGDTVTSps...ss..s..stsahRGGSDRuFLpLlQcLGptaspsNpVVaVDSSpGEV.sRPslphss...........l.pGIo.................Ds-DPL+hslshPsGPcpYlsaFpplA..... 0 2 8 14 +9339 PF09508 Lact_bio_phlase CHP02336; Lacto-N-biose phosphorylase TIGRFAMs, Coggill P anon TIGRFAMs Family The gene which codes for this protein in gut-bacteria is located in a novel putative operon for galactose metabolism. The protein appears to be a carbohydrate-processing phosphorolytic enzyme (EC:2.4.1.211), unlike either glycoside hydrolases or glycoside lyase. Intestinal colonisation by bifidobacteria is important for human health, especially in pediatrics, because colonisation seems to prevent infection by some pathogenic bacteria that cause diarrhoea or other illnesses. The operon seems to be involved in intestinal colonisation by bifidobacteria mediated by metabolism of mucin sugars. In addition, it may also resolve the question of the nature of the bifidus factor in human milk as the lacto-N-biose structure found in milk oligosaccharides. 26.60 26.60 27.20 77.50 24.40 26.50 hmmbuild -o /dev/null HMM SEED 716 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.52 0.70 -13.24 0.70 -6.87 7 288 2009-01-15 18:05:59 2007-05-03 16:33:53 5 3 255 18 27 215 17 673.60 54 98.37 CHANGED GRhTlPs-sshtppshELhpRWGADAlRDsDGTchs--lhphssKlYSTYhssRsDpsWAptp.-ElpQhYLMop.ssAhS-sh.pIsLhcGaac-QhhsssspD.ccWWEVlDRTTupVlss-pWsaDtpstpVhlpsspsaHcYTVoFLAahIWDPlpMYNalTNsWtshcHph...saDshpPcTppalhpthccWlc-pPpsDVVRFTTFFapFTLsaspht+E+aV..DWaGYusoVSPhALEpFEKEhGY+LpPEDhlDpGaYNssaRVPsKpa+DahsF.pcaVschsKcLV-lsHpsGKEAMMFLGDpWIGTEPatctFpclGlDAVVGSVGsGsThRhIuDIPGV+YTEGRFLPYFFP......DsFhEGGDPlhEApsNWlpARRAILRpPlDRIGYGGYLpLAspFP-FlDhlEcIssEFRcla-shpGppsh.shh+VAVLNsWGphRSWtsphVtHAlah....+psYSYhG..llEuLSGhPh-VcFISFDDIhcp.Gl.c-lcVlINsGsusTAaSGGphW.c-tclsutl+pFVtpGGGhIGVG-P.oAppapG+aaQLAcVLGV-+EhGholSpc+Yshphp.pHFlT.t-hstc..............................lDFGEuhpslYshss.splLttc...........ttplphAVNpaGcGRuVYluGlPYShcNuRlLhRulhWAuppEpphppahSoNhpsEVAhYscss+hsllNNT.-.QpTslhps.ucs.slcLts.-hhWhsl .................................GRlTLPs-psh.tpstELh-RhtADAlRsSDGTcLP-.lpcL.s.sKIYsTYassRuDp-WAptHPcElpphYLMSchssAh......u-s..sIsLMpsaas-QltPsscsDsc+WWpVIDRTTG-VlssstWph-t..tst.s......V..slppAtshH.YTVuFLAh.hWDPspMYNalTNsWt.Dc..pcI...PaDlRpPpThpalhctLcpWLt-..sPpVDVVRFTTFFYpFTLlasppu...pE+..aV..DWFGYSASVSs.AlEtFEKchGYtLpsEDhlDpGaYNSsFRVPpKpa+DalsFQpcFVsphs+ELV-hsHtsG+EAMMFLGDpWIGTEPYG.tFtphGlDAVVGSVGsGsThRhISDIPGV+YTEGRhLPYFFP......DsF.a-GGDPV..tEApssWlsARRAIlRsPlDRIGYGGYLSLAhpaP-FVDhlEclssEFRpIassstGppPh.ssh+VAlLNsWGtlRSWtsaMVAHALaY....+QsYSYhG..lLEuLSGhPhpVcFlSFDDIhp..Glscsl-VlINsGsAsTAaSGGp.W.tDtclsoslRcFVtsGGGhlGVG-P.oAh....t..p..GthaQLuDVLGVD+EhGasLSsc+ash..........c..hsccHFlo.tDl.s..s..c..............................lDhGEGh..sl.ashsssTplLttc.......................cp.pVpLAsNpaGpGRuVYluGLPYS.pNuRLLhRAlhWAu+cE-th.pp.WhSosspsEVssYPcss+hhVlNNoh-t.sTsVhss.sss.pclchpssthhWhc............................ 0 15 19 23 +9340 PF09507 CDC27 DNA polymerase subunit Cdc27 Wood V, Coggill P anon Wood V Family This protein forms the C subunit of DNA polymerase delta. It carries the essential residues for binding to the Pol1 subunit of polymerase alpha, from residues 293-332, which are characterised by the motif D--G--VT, referred to as the DPIM motif. The first 160 residues of the protein form the minimal domain for binding to the B subunit, Cdc1, of polymerase delta, the final 10 C-terminal residues, 362-372, being the DNA sliding clamp, PCNA, binding motif. 24.40 24.40 24.40 24.60 24.30 24.30 hmmbuild -o /dev/null HMM SEED 430 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.68 0.70 -5.25 21 329 2009-01-15 18:05:59 2007-05-03 16:34:30 5 7 232 7 213 324 1 344.60 20 88.93 CHANGED +lVTY+hLScsLsVHsNpAKQMLY-ahcpcppcps.splaATYlloGp.hsst.sspp...................................................lslVcE-cLEss+u+hspshShalYSlQpshLpDsssLhsssh-hlpp.....spp.sthpstphs.htstplppppt......s.ssts....psshsuc.s...............stcsspuhhs.thpscppspspspp+sppstpscttss......pssstKh.s.s+ushhssFhptpsppKhttp.tstpsstpE.....ppt.psspshs.pcpss.......cppcps..hcs...................p.sspppppp.......................EscKc+.cc+l+phhpDc................s---............ss-SP.sppcspsssPs..........cp-.ptppsphpst..........s+RRtRR+V.hKpcThhD.-EGalVTcch.p.EShS-sEss.s................PsKppsssp.ss..ss...........spcsK.............sttttQusIMSFFpKK ...........................................................................................................................loa+.lu.thtlp.s.AK..p....h................L.pahpppp.............splpssYllpG..h.p.tt...............................................................................................................lhl..lp.-..p.pLpth..h...t.pht...h..o...hplYSl.....p..........s...h..p.-.......s......l.....h.....s....s....t...........ph.t................t......sh...t...tt.h..s.h....s...t..pptpt..................................sh.stt............................................t...pt....t.....t...p...pt.t...ps.p..ttp.ttp.....tttp...st.......................ps.s.t.c.......................tp..ss.h.tshhtttsh...ttht........t...tt.....tp.....................................p........t..h..p..p..........p.ttpt...t..............................................ppppppp........................................................................................cp.ppc......ccch..+..........p.ss...................................sp-p..................pp.......t....p....t.p.................................ptp........t.p................................ptt....t.+..+pl..hp..ppp..hhD.-c.G.hh.........VTpp.....t.ph..s.ppc..............................................p..tsh..t.........................................................t.t.p.......................................tt.t..Q.ss....lhuFFt+.................................................... 0 66 111 175 +9341 PF09509 Hypoth_Ymh Hypoth_ymh; Protein of unknown function (Hypoth_ymh) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry consists of a relatively rare prokaryotic protein family (about 8 occurrences per 200 genomes). Genes for members of this family appear to be associated variously with phage and plasmid regions, restriction system loci, transposons, and housekeeping genes. Their function is unknown. 25.00 25.00 25.50 25.40 23.30 23.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.53 0.71 -4.50 25 190 2009-01-15 18:05:59 2007-05-03 16:38:58 5 3 179 0 48 143 15 120.50 37 43.17 CHANGED pscslHsclhchsppphhss..pYhcAVhEAsKulss+lRptoGl..stDGssLhppuFu.sc.........pPhLtl..sshpocopcupQcGht.......sLhpGhhssaRNspAHcsc.hp.....hocpDAl-hLuhlSlltRhL ...............p..+tlHsclhpaCcschl.....sc..sYacAVhEAsKulsc+lRpho..Gl.......s....tDGscLlsp......AFs.sp..........pPhLtl..sshpT.coEcuEQcGht.......sLhhGlauuhRNPhAHps+..hp.s......hscpDAL-lLsllShlpR+L....................... 0 15 33 41 +9342 PF09510 Rtt102p Rtt102p-like transcription regulator protein Wood V, Coggill P anon Manual Family This protein is found in fungi. The family includes Rtt102p, a transcription regulator protein which appears to be integrally associated with both the Swi-Snf and the RSC chromatin remodelling complexes, [1]. 21.40 21.40 22.80 22.40 20.40 18.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.64 0.71 -4.16 6 33 2009-01-15 18:05:59 2007-05-03 16:40:41 5 1 32 0 20 32 0 128.50 40 65.10 CHANGED SLIc+AN+.uGYussscpp..HWcY-WhoPs...........KpsEsspp.....P....ss.psp.ppYsFKaKsWl+sssscsh.shhc-ss-.plLDLscFDR........T+hsttspthpspusss....puLohsDIRGAVGGSEuI.......PGhS ................oLIs+AN+..uhYusssscc..pW+YDWYpPs..........................K.ss..usspp.sppp..............ush.pNslEKYsF+YKTWl...+spc.s-p....sLpc-ss-..D.........lLDLp-FDR........Tpcscsss......spsssssusspsspuLosDDIRGAVGsSEuI.......PGhS............................. 0 3 10 17 +9343 PF09511 RNA_lig_T4_1 RNA ligase TIGRFAMs, Coggill P, Mistry J, Wood V anon TIGRFAMs & Pfam-B_49998 (release 17.0) Family Members of this family include T4 phage proteins with ATP-dependent RNA ligase activity. Host defence to phage may include cleavage and inactivation of specific tRNA molecules; members of this family act to reverse this RNA damage. The enzyme is adenylated, transiently, on a Lys residue in a motif KXDGSL. This family also includes fungal tRNA ligases that have adenylyltransferase activity [1]. tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns. 26.80 26.80 27.60 27.20 26.20 24.50 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.60 0.70 -4.87 48 347 2009-01-15 18:05:59 2007-05-03 16:41:19 5 16 314 2 157 357 36 216.60 26 33.56 CHANGED hpuRGLhhp........................psscIlsRua-KFFNls.........EhstTphcsl..pt.......Phclh.KtsG..sll.hshl.........psG....plllsSKpSs...................................................................................................................psspAptucphlpp.....pltpt..........hppls....ctLhppshTslhE...lsssp.pchVlsYspcphsLhLpshstss.....sca.hshs.hplsph.uc..pashpphchh.thss......th......ttph.ssshpt................ct...hEGa.Vlcsps........................................shhaKhKh-t.hh ................pARGLhhs........................pssscIssRuacKFFNls........................Eh..t..t..oph.pp.l....tph...........PhclhhK.NG...sll...l.shl..............psu............plllsSKpSh...................................................................................................................pssau.phucph.l.pp................phtp...........................hpplt.....ptl.hctshTslhE...lsssp.-..cH..l.lt...Ys.p.pp.hhl.hLtshNhps.......sca.hshs..tplpph....scpauh.....p.....ps.....phh...hhps....hp....pltt.hh........pph....ps..s.hps.................cp......hEGa.Vlpspp........................................shhaKhKhp..h........................................................ 0 64 102 138 +9344 PF09512 ThiW Thiamine-precursor transporter protein (ThiW) TIGRFAMs, Coggill P anon TIGRFAMs Family Levels of thiamine pyrophosphate (TPP) or thiamine regulate transcription or translation of a number of thiamine biosynthesis, salvage, or transport genes in a wide range of prokaryotes. The mechanism involves direct binding, with no protein involved, to a structural element called THI found in the untranslated upstream region of thiamine metabolism gene operons. This element is called a riboswitch and is seen also for other metabolites such as FMN and glycine. This protein family consists of proteins identified in operons controlled by the THI riboswitch and designated ThiW. The hydrophobic nature of this protein and reconstructed metabolic background suggests that this protein acts in transport of a thiazole precursor of thiamine. 27.20 27.20 27.60 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.90 0.71 -4.38 26 735 2012-10-03 02:46:00 2007-05-03 16:43:36 5 2 729 0 72 328 1 148.20 47 86.38 CHANGED LslsAlhlAluVllSslhtIPlGhs+shPhQHhlNVluuVlLGPhauluhAhlhullR.hlGsGolLAFPGShhGALLAGl....hY+hs+.+h...hhAslGEllGTGlIGuL.luaPlAshlhG.spsuh...hhahssFhhSolhGulIuahlLhhLp+p .....LslhuhhIAlsVVLSsI.hpI....tt.hAPhpphVNlLuuVhlGPhYuLAhAhloullRhhhtsh....ssLAhsGuhhGA....LLAGl....FYpa.......s..+...+h......ahuulGEllGTGlIGSl.lua.PlhhhhsG....pst.tl..............hhahspF.hsuolIGohIualllhhLpK.p....... 0 36 52 66 +9346 PF09514 SSXRD SSXRD motif Bateman A anon Pfam-B_23332 (Release 21.0) Motif SSX1 can repress transcription, and this has been attributed to a putative Kruppel associated box (KRAB) repression domain at the N-terminus. However, from the analysis of these deletion constructs further repression activity was found at the C-terminus of SSX1. Which has been called the SSXRD (SSX Repression Domain). The potent repression exerted by full-length SSX1 appears to localise to this region [1]. 20.90 20.90 21.40 22.20 19.00 20.40 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.43 0.72 -4.64 33 133 2009-09-11 15:10:05 2007-05-03 16:52:11 5 18 25 0 55 155 0 32.80 56 11.21 CHANGED pssthpVpsWoHRLRERK.hVIYEEISDPEEE-- .......pss++thc.sWoHRLRERK.hVlYEEISDPEEDDp. 1 12 12 13 +9347 PF09515 Thia_YuaJ Thiamine transporter protein (Thia_YuaJ) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this protein family have been assigned as thiamine transporters by a phylogenetic analysis of families of genes regulated by the THI element, a broadly conserved RNA secondary structure element through which thiamine pyrophosphate (TPP) levels can regulate transcription of many genes related to thiamine transport, salvage, and de novo biosynthesis. Species with this protein always lack the ThiBPQ ABC transporter. In some species (e.g. Streptococcus mutans and Streptococcus pyogenes), yuaJ is the only THI-regulated gene. Evidence from Bacillus cereus indicates thiamine uptake is coupled to proton translocation. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.22 0.71 -4.51 43 674 2012-10-03 02:46:00 2007-05-03 16:53:45 5 2 640 2 112 689 6 171.80 34 87.25 CHANGED llEhulhsAlAhlLshl...hhphPpGGSlol.uMlPIhlhAhRaGhpsGllsGhlaGlLpll...hu.......s.a..llpshQslLDY.lAFsslGlAGlFtp............................ttt......hhhhhGsllushsRahsHaluGllFaGuYAPcGh......sshlYSllhNGoh.hlsshllshlllhlLhhpts.phhh ............................hlEsAlhsAlAhl.Lshl.....h.h.ths..p..G.h....Sloh...uMlPlhlhuhRhGhtsGhhuG.hl.aGl.Lphl...lG..............sha...hLsss.Qs.l....l..-..........Y.......hl.....AF..........u.h.l..G..h.A.G.l.F.tp.h...........................ttphh.th.hhhl......hhusl...l....ush...s.R..ah..h.Ha.lu.G..l.l.F.a....Gs..Y..A.....Pc...Gh................sshl...YS..h.l.h..NGoh...hlsshllshlllhlLhhs.sph..h......................................................................................................... 0 51 86 99 +9348 PF09516 RE_CfrBI CfrBI restriction endonuclease Bateman A anon Bateman A Family This family includes the CfrBI (recognises and cleaves C^CWWGG) restriction endonuclease. 25.00 25.00 96.00 95.50 24.50 23.30 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.85 0.70 -5.51 5 36 2012-10-11 20:44:45 2007-05-03 17:06:14 5 1 36 0 1 27 2 257.80 52 84.94 CHANGED MpFc-.........................plh++VIpcVLpGcDYREpl.......lNtlNsEFLDFu......IDFFK-lloAKhpspslsL-WYpcasLuNKu..Pc.-lAI..hsGhNpKTIpNlYGouoKpVVlshupsplchL.sulppLu.csQ--IG.IslKIsY....K-lSVs...LsLcESLlVINALAoK+lsLRGStaSohGKplEKhLMLoLCplhGlc.....E-slsAcsFsKsKutDhDRElDFpL.plcsuKtYRVEVKLMGKGNPE.............SADAVIARsosIFIADTLS-QNK ....................................slp+sIphLL+GKDYREhV.......LNsINsEFLDFA......IsFFK-IlhAKMpDcuIshsWYppashsNK-..sK.-lAI..LsGhNhKTIhNsYGToTKEV.VLDlupsNlcYLh-l.LQsLppss.s-lG.IsIKITY....K-ISVs...LDLcESLlVINALATKKIALRGSAaShlGKRlEKPLMLpLCc+CGlS.....EuaIDAosFpKDK+L-aDREVDFKLYNpD+SKsYRVEVKLMuKGNPE.............SADAVIAR-ocIFIA.TLSEQNK............... 0 1 1 1 +9349 PF09517 RE_Eco29kI Eco29kI restriction endonuclease Bateman A anon Bateman A Family This family includes the Eco29kI (recognises and cleaves CCGC^GG ) restriction endonuclease. 23.00 23.00 24.40 23.00 22.40 21.00 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.10 0.71 -4.93 6 60 2009-01-15 18:05:59 2007-05-03 17:25:00 5 2 56 17 15 56 3 164.70 44 74.26 CHANGED PcpFpGAGVYALYYsGsasLYcphu.chNchsastPIYVGKAVPtGhRpuRhucNss..uspLasRL+EHuRSIupsusLDlsDFhCRFVlh-hthSshIshhEAsLIchapPlWNosVDGFGNHDPGpGRasQ.....t+SsWDslHsGRsWAs+hss................h.uc..uItcsIppaLcp ...............cpFpGAGVYAlYYhGs.h.shYpplu.chN.R...s.as...t.PIYVGKAVPtGhRput.u.ss..pustLapRLc.EHucSIs.tss......sLc.sDFhCRFlll-...sshIshsEuhLIcha+Pl....WN..............s.....slDGFGNHDPG+GRapp.....t+ScWDslHPGRsWAp+hps................hspohcplhppltpaht.t..................................... 0 4 12 14 +9350 PF09518 RE_HindIII HindIII restriction endonuclease Bateman A anon Bateman A Family This family includes the HindIII (recognises and cleaves A^AGCTT) restriction endonuclease. 25.00 25.00 66.40 66.20 20.40 20.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.90 0.70 -5.52 5 26 2012-10-11 20:44:45 2007-05-03 17:38:09 5 1 23 8 3 23 3 278.00 34 94.43 CHANGED NuE.........hl.s........hss-hlKlsuphT.Dac+lss-llptI+KhsccplI-lLpouGhIPEsYcsDSSEEKLFSKhsDALlu-shpchGapApsLsERusAuDVhltsc..sptIVuDAKoFRLSRT..AhNsKDF.KVculcKWRstht.sKDaulVlsPpaQhPssKSQ..lYsQulDpNVLlLSYcHLAaLLcp+-.hss-hh..LWEYpsIFspp.hhN.suKsu+pYWsuINcsllclssct-KtahDphstscctLpphlcccIp.lE.....chEalcpEIs+IhshoREcAIR-LlcchKIpu+lEph.phl+slpcpcl ..............h...................t..phsu..s.-hpphstcl.t.lpp.spptll-hLppsGhIPEshptDSSEEKLaSKhsDsllucshphhGhpupsL.sERusuuDVhshsp..sYtlVADAKsFRLSRT..AKNtKDF.KVpulscWR...p....sKDaulllsPhaQhPsppSQ..lYpQulspNVhlhoacHLAhLlphtt..s.c.....Lh-......tIFst..hh..stcpuhpYWpslNpshhphssphpchah-.hthspctltphhpptI...........cahppEhppIhtho+-cAlptLlpph+lpu+lcth...hcthpp................. 0 3 3 3 +9351 PF09519 RE_HindVP HindVP restriction endonuclease Bateman A anon Bateman A Family This family includes the HindVP (recognises GRCGYC bu the cleavage site is unknown) restriction endonucleases. 25.00 25.00 26.00 34.60 23.10 20.50 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.83 0.70 -5.53 3 41 2012-10-11 20:44:45 2007-05-03 18:03:25 5 3 34 0 4 50 1 255.10 38 90.73 CHANGED oWGKNQFNSSFPlALACYMSSKNIpPlYL+LE+.tsIcHupIDVcsVFpIcP.ptpsFFAFEpSY..............................pIKLTAlPDpTTssLsDs..GsEIVIRPDoIVYLAhSlAclhpps.hshhDI..PssschhDWp-sppl.PhhPhhhphL.slhsRhpslQhPhLLQPlWKT.GK.shLtDNCLDIFlWSNlAFsKlFLDASplclNscSITRpcRTsVWLhKMLYDFApsGKINH++TIDcISaNTKNDKAFAuSGMlT+pYMKSPELp+PRIK+cEIKNIILGGGQ+LLSPERRFDAIIlNTPsLF- .....................aGKN.FssuFP.uLhsah.p.ptlps.Ylhh....p..p..............plppt.lshpplashps...s.tphaasFEt.a...............................hElKLTslPDpsTtphs-p.ausElVlRssol.ahAhSlspth....pp..ph..t..t.hh.h....h....cWsp.tplhshh.phhtslpplhpph.shQpPhlhQPlWKTpGKps.LspsCLDlFlWSshAFs+hhlshu....p.s..p.....pIsR.tRohlWLhKMLhDhsppGphsappllcphoasspsDKAFuhsGphTp.aMps.clppPtI.+pEIppIILssGppLLSPERRFDuhlh.ssspLF................................. 0 2 4 4 +9352 PF09520 RE_TdeIII RE_MjaII; Type II restriction endonuclease, TdeIII Bateman A, Coggill P anon Bateman A Family This family includes many TdeIII restriction endonucleases that recognise and cleave at GGNCC sites. TdeIII cleave unmethylated double-stranded DNA [1]. 23.80 23.80 24.00 23.80 23.70 22.70 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.67 0.70 -5.08 11 54 2012-10-11 20:44:45 2007-05-03 18:10:25 5 1 48 0 19 62 14 233.10 26 89.78 CHANGED Mulss...tT+NKl+-aLsshlccpLpsY..pt.hp.............ss-sshpPFHtpLlPtslMplt+FERSLsTuLGp.hFEpsA+lIAhspausA.ppYclpsslsptshssIDpllsplc..........+spppppsohsEhlcplpplspsthtEsolV.uDLYlp+cDGpEhaF-IKSsKPNKGQshpshpcLLRlhAlc..pp.tssscsFaAhsYNPaG....p+tuYohshh+thhDhcs.tVlIGpEFWshIGp.suTYsELL-Iac-VG .......................................................................httlpphlpp.lcph...hpph....................psptt.hP...Fhttll..............st.............-........hhthspF.+ShsTshGpshaEplAphlApspht.s.p.phpht.sp...IspptpppIpplhsplc..........t.ptp.sph.p..ctlct.h..hph....sp.tp....h.psphs...Dl.a.l.p.pp.s.sp..hhh-lKo.....s.....K.P.NtsphcthKpplLphhshh....pp......ptphpsahAhPY.......NPa...........p.psapp.......h..ht..th.h....Dh.-p..plllupEaWshluG.csoY.pplLchapclG........... 0 9 12 17 +9353 PF09521 RE_NgoPII NgoPII restriction endonuclease Bateman A anon Bateman A Family This family includes the NgoPII (recognises and cleaves GG^CC) restriction endonuclease. 25.00 25.00 35.90 35.90 23.60 23.20 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.76 0.70 -5.61 4 62 2012-10-11 20:44:45 2007-05-03 18:15:05 5 1 53 0 7 42 7 186.50 42 93.41 CHANGED M...pNIlsAIhNlspNP.hpLcpa...spS+NRANpMG-uLEEYlKDlFusoh.p.D.sp+.tlauchFSYLGNpNNPPDhIL+sGDAlEVKKIEshcSSLALNSSYPKuKLasssSMIT-AC+sCEcWE.KDIIYsIGTlccpp.LKtLhhVYG-sYAA-splYp+IpspIKtGlpsIsslpFSETpELGRVNRVDPLGITYhRlRGMWGIENPhKVFsYIhph-..csppFNhhAlhpc-KYpSFsspD+hc.huhpNptLtIpDl+IKs ...................NIlsAlhNlhpp......h.tlpp.....pu.sRANphGsuLEpalKDhFut.................................hl+tG.sulEsKKhps...s.ssLsLNSSaPKs.l..ss..l..pth+ttE..pWcpKcllYhlGhh.p..pp..LppLhhVYG..asAptphY.clcppIppul.tph...ss..l.hscTpElG+lpplDPLsho.LRlRGMWtIppPhhV..Fp....lht......ppthphhsll..tcatphspp.c......p...h.tlpch.l........................ 0 0 5 6 +9354 PF09522 RE_R_Pab1 R.Pab1 restriction endonuclease Bateman A anon Bateman A Domain \N 25.00 25.00 60.20 29.80 18.40 17.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.65 0.71 -4.21 5 56 2009-01-15 18:05:59 2007-05-03 20:02:19 5 2 32 6 4 55 0 116.20 69 53.14 CHANGED KQKQFIEDNF..MITRERFRSHQFGGMDFELSRISYPLLIHSFDD.NELSEIVIKEQQYGSKTQAMLYFCFSILELKTATPLLNRTAphKEHALLlIHcsNAshFLEMLKIFGLLSQsHHNDVLKILEKIL.QN ..............................KQKQFIEDsF..hIsRERFRSHQFGGMDFEhS+ISYPLL.IHsFND.Np.LSEIVIREQQYGSKTQAMLYFCFSI...L...ELKT...A...TP...........LLNRTAsLKEHALLhIHKTNA.hFLEMLKIFGLLSQAHHNDVLKILEKILpN.... 0 1 2 2 +9355 PF09523 DUF2390 CHP02444; Protein of unknown function (DUF2390) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are bacterial hypothetical proteins, about 160 amino acids in length, found in various proteobacteria, including members of the genera Pseudomonas and Vibrio. The C-terminal region is poorly conserved and is not included in the model. 25.00 25.00 26.30 27.90 22.50 19.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.40 0.72 -3.88 60 298 2009-01-15 18:05:59 2007-05-04 09:10:55 5 1 295 0 89 240 56 106.10 35 65.72 CHANGED psLWpauLshYupsuVppuCLpLQsphGssVsLLLhshWLs.....p..pshshsspphptltstsppWcpplltPlRplR+ph+stssps..........lhcplhshELpuE+hp.phLh ..p.pLWpFuLphYut.tVcpA.CLp.LQspptuNVNLLLhhtWLs.....p.ptl.shsppchtpltpslsphp-sllpshRcLR+phKsphscs..............LhcchhphELphE+ppttpL.h........................... 0 21 44 68 +9356 PF09524 Phg_2220_C Conserved phage C-terminus (Phg_2220_C) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents the conserved C-terminal domain of a family of proteins found exclusively in bacteriophage and in bacterial prophage regions. The functions of this domain and the proteins containing it are unknown. 21.10 21.10 21.20 25.60 19.70 20.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.64 0.72 -3.98 26 362 2009-01-15 18:05:59 2007-05-04 09:12:02 5 7 326 0 39 286 12 73.70 47 26.64 CHANGED IIsYLNpKuG+sF+.ssptspcLI+ARhpEGasl-DFKpVIDhKspcWhsssp.....hppYLRPcTLFu.s.KF-uYLNp ...................................IIcYLNcKsG+pa+h..s..otpopchI+ARhs-.GaplE........DFKpVIDhKss-Whsss.......hscYLRP-TLFG..sKFEuYLNp........ 1 16 26 37 +9358 PF09526 DUF2387 CHP02443; Probable metal-binding protein (DUF2387) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are small proteins, about 70 residues in length, with a basic triplet near the N-terminus and a probable metal-binding motif CPXCX(18)CXXC. Members are found in various proteobacteria. 24.40 24.40 24.50 25.00 24.30 24.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.96 0.72 -4.06 44 844 2012-10-03 10:42:43 2007-05-04 09:13:20 5 3 840 0 112 300 136 62.10 57 89.83 CHANGED hKKRFIAGAsCPpCsphD...plthap-ss..l-hhECVpCGas-ppssp......tsphpsR...............sptpsIsl..F ....RKRFIAGApCPuCpAQD...ohAMWRENN....l-lVECVKCGHp.RcsDK......pscc+VR...............pcEQVIGIF....................... 0 18 40 78 +9359 PF09527 ATPase_gene1 Putative F0F1-ATPase subunit (ATPase_gene1) TIGRFAMs, Coggill P anon TIGRFAMs Family This model represents a protein found encoded in F1F0-ATPase operons in several genomes, including Methanosarcina barkeri (archaeal) and Chlorobium tepidum (bacterial). It is a small protein (about 100 amino acids) with long hydrophobic stretches and is presumed to be a subunit of the enzyme. 23.00 23.00 23.20 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.74 0.72 -4.16 277 1319 2009-11-03 13:52:42 2007-05-04 12:08:56 5 2 1253 0 429 996 1353 54.40 25 56.48 CHANGED uhhht.l..Ghplls.sll..lGs..hl.GhhLDphh...so.t..shhhllhlll.GlsuGhhshh+hhp .............h..hhtl..uhphls.ull.lGs.hlGhhLDchh.......ss....Phhhl.....lhlllGlsuGhhslh+hh........... 0 172 321 377 +9360 PF09528 Ehrlichia_rpt Ehrlichia tandem repeat (Ehrlichia_rpt) TIGRFAMs, Coggill P anon TIGRFAMs Repeat This entry represents 77 residues of an 80 amino acid (240 nucleotide) tandem repeat, found in a variable number of copies in an immunodominant outer membrane protein of Ehrlichia chaffeensis, a tick-borne obligate intracellular pathogen. 20.50 20.50 21.20 20.90 18.00 18.00 hmmbuild -o /dev/null HMM SEED 688 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.09 0.70 -13.79 0.70 -6.39 2 47 2009-01-15 18:05:59 2007-05-04 12:09:46 5 4 7 0 6 47 2 244.40 44 123.71 CHANGED MDIDNsNloTushpspossLh-lIMRILsFGNps..psps.soclh.pp.p...sDsVupPS................LpPhVstS..........cVScsppEcosPEVhhcDLQ.....sltpppStVu-psupsspEcps.Elcucphpsth-sulpcSppc.sEhVSpoSpE..ssE......................pcVSKsppEEosPEVhhcDLQ.....sVtpppStVu-psupsspEcp.....................SEltp+.ucTpKEpuhsEs+tc-.....D-.VppsSSE...stpcVScsppEEssPEVhhcDLQsssstp..........VpcppSEltp+.uETpKEpuhsEs+tc-.p.sspsS.E..suE..pcVScsppEEosPEVhhcDLQ.....sVtpppStVu-psupsspEcpo.Elcucphpsth-uulpcSppc.sEhVSpsSpE..ssE......................pcVScsppEcosPElhsEDL.......ltps.s.VsEK.sEh.t.p.sP.VhtE-.................-KVsETscpE...Elht-sQsVtsupsl.lsPM.sl-shDo.l...........SslhpstMhCPhSc..sGpaVphYthYhYthQsVKDL.Gsh..shssC.CNlslYFhtaN.FTN+Esl......Dll .......................................................................................................................................................................................................................................................................................................................................................................................sptp..........VpcppsEltp+.uETpKEpuhsEs+tc-.p.VspsSsE.alAE..pcVSKl-pEEosPEVll+DLQ.....cVspp-SsVuDpsupsssERpo.Elcucphpsth-uulpcSppc.sEhVSpsSpE..ssE............................................................................................................................................................................................................................................................................................ 0 4 5 5 +9361 PF09529 Intg_mem_TP0381 intg_mem_TP0381; Integral membrane protein (intg_mem_TP0381) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of hydrophobic proteins with seven predicted transmembrane alpha helices. Members are found in Bacillus subtilis (ywaF), TP0381 from Treponema pallidum (TP0381), Streptococcus pyogenes, Rhodococcus erythropolis, etc. 21.80 21.80 22.20 22.30 21.70 21.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.70 0.70 -5.11 43 802 2009-01-15 18:05:59 2007-05-04 12:30:38 5 1 697 0 95 583 737 219.40 29 92.57 CHANGED hhsFphaussHhshlhhhllhslhlhhhhp................phptpppphhpphhshlhhlphluhhhah..hhhshhsltpsLPLahCcluhhhhslhLlscp..phhhphhaahGlsGuhhALlsP-l..........................................sasFPHhpahsFalsHhhllhsslahlhhpph+sshpshhtsllhshhhshhlhhlNhlh..........GuNYhaLsppP.hssohLslh...ssaP...hYllsthslshhl...hhlhh............................hsat..hpc ................h..shphhpss+hslhhhhhlh.sllhlhhhp................thp.h.p.p.p..p.h.hphhh...thl.hs..phlhL..a..sWa....hs...s..............th..s....L.s.p.SLPhahC+lAhh..s.lh...lls..p....p......p+.ht.p.......h....hh.h...h...GhhGulhA...llhPs.h.........................................ssY.sF.P.HlshlsFhhuHhsLlhsuLhhlh...c..p..Y.c.sph.hshpthhlhshslssll.hhVNhlT................GGNYuF..LscsP....ssp..h..Lsh.......................Yllsshllshhl.....hhhh....................................ph.................................................. 0 32 65 82 +9363 PF09531 Ndc1_Nup Nucleoporin protein Ndc1-Nup Wood V, Coggill P anon Wood, V Family Ndc1 is a nucleoporin protein that is a component of the Nuclear Pore Complex, and, in fungi, also of the Spindle Pole Body. It consists of six transmembrane segments, three lumenal loops, both concentrated at the N-terminus and cytoplasmic domains largely at the C-terminus, all of which are well conserved. 19.90 19.90 21.20 20.20 19.70 19.80 hmmbuild -o /dev/null HMM SEED 602 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.89 0.70 -6.10 34 331 2009-01-15 18:05:59 2007-05-04 13:22:05 5 8 241 0 220 326 0 466.90 18 88.01 CHANGED ppllppRhst.ustlslhlshlhhhshhl..........thhohhhshhsh...hh+slhlhluslllhlhRhp.hpl.t.psp..sohhsplhptlhshphl.hhlhashuuh.hhuhhath.hstsh..t..hh.................t.tppshLN-phlalhhhthhluhhh...........oltalhhshsclsF..............s....hpp..tppltppl.phlhpu..lhpohhshhstsllY..................hh..hh.hshh.h.thhhshsps.h.sshh...hshpllhphhlhuhhllhsW..phsNhhFslahops.slctspsloshop-Ppt..oLlsGLp.spc....hs+hhAhpELshluppsssc....Rpslasspptss..ssWstlhcpChplIpphssclsphhpt.t.....ststhss..ppppspp...........................................................t.......t...h....st...pt..h.t......hptshsppshpssssssthst.tp..pttpplhsthpphhp...............................................................hpphLpphhsh.athohp+pspshl......ss.hhspAl.uLotLlhtSlpEDpaGsVpss.lspllphlpchhtslspahph.sssshhst..............tttpp......stlphlhsshpsulhcIshpFstaLp-lhLsscsh+hhp .......................................................................................................................Rh......hh.hhh....h....h.h...............................h.....h..h.h.h.hs.h..hh..hh.ht.hh....h............s...ht...h...h...ph.h.h.hha...h.uh..hhs..hh...h...h..........t............................................hst...lahh...hhhuh.............sh.hhh.p..hthl.h.....................sh.....t.....h.hhtt.l..hh..hpu..hh.shh.hh.h..hha...................h.....h.....h.thhhs..hp.........s.....h....sh.......hh.hphhhhuhh.lhhha..ths...hhapha.hspt.sh.....t....ss..hsps.spt......sLhpsLp..sp............................hhphhA.h.-Lhhlupt..ssp.....RttlFp..p...t.......s..s..tsWstl.ptChphlpthspcl.th.p..........s......t....t....................................................................................................................................................................................s.......................t..t.h.h...tp........................................................................htthh..p........h.ah.sh..php..h.......sp...hh.uhp.uL.stLhhtShpEDpaGhVpps...lstllphhhph.tslcph.....h........t................................................................t.........t.h..hhhtshppulhplh.tFt.hlptl.hs.p..p................................................................................................. 0 65 111 182 +9364 PF09532 FDF DFDF; FDF domain Anantharaman V anon Anantharaman V Domain The FDF domain, so called because of the conserved FDF at its N termini, is an entirely alpha-helical domain with multiple exposed hydrophilic loops [1]. It is found at the C terminus of Scd6p-like SM domains [1][2]. It is also found with other divergent Sm domains and in proteins such as Dcp3p and FLJ21128, where it is found N terminal to the YjeF-N domain, a novel Rossmann fold domain [1]. 26.10 26.10 26.20 26.20 26.00 25.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.56 0.72 -3.49 18 611 2009-01-15 18:05:59 2007-05-04 13:55:02 5 11 255 10 383 561 2 97.50 26 19.68 CHANGED hc.cpDFDFEusNuKFsKpclhcchcpptphtttt.....................ppppssspcpspsssphhs+ppsFFDsISscspccsttss.t..................................apcEcchs..ETFGhst ..........................................ppDFDFEusNApF.sKpc...l..h.......cch.p.pp.p.h......t...t.p.pt..tt..t.........................ptpptt.pp.c.s...h.s.ss......t.h...hs+ppsFFD.s.l.Sscs.p...p..pp...p..s...............................................atppc.phs..-TFG.................................................................................. 0 78 159 280 +9365 PF09533 DUF2380 CHP02269_MYXXA; Predicted lipoprotein of unknown function (DUF2380) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of at least 9 paralogs in Myxococcus xanthus, a member of the Deltaproteobacteria. One appears truncated toward the N-terminus; the others are predicted lipoproteins. The function is unknown. 25.00 25.00 25.70 25.60 23.40 23.40 hmmbuild --amino -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.40 0.71 -4.88 8 28 2009-01-15 18:05:59 2007-05-04 14:10:00 5 2 8 0 27 29 0 164.80 35 60.96 CHANGED uhpsApEtCsssDEspCVSLLCpGDA.CGFYcCEDlsGcVEh.ARFPPA.....RPPsAsAAPGpGPRRsWGsGQpLPRGA..VMVFPsWsGAPpchlsPuhpLsPG.RWEKHHIFPQAcDLAcWFpp..+GVKIHDYThPIPR-lHRRIHuGsspGGAWNcAWR-F+cpp.G.A.SP-EIa+HAGELIaRFELhGGPIpPY......YSR ...........................................t...t.t..Cttspts.ClshhC.tsh.CuhahCEDlss...ts.h..uth.ss.......RPPh.......G.sspRsW.Gtt.tl.tss..lhsF..W..t.t..h.sshphssG.ca.....cKHHIFPQt.cLApWFpp..pG....lcIHcaTlsIPcclHpRIHuGs............s..p..GGs..WNpsWRpFhpts.t..A.o.pplac+AucLIhRFpLh.G.l.sYa................................. 0 11 13 21 +9366 PF09534 Trp_oprn_chp Tryptophan-associated transmembrane protein (Trp_oprn_chp) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are predicted transmembrane proteins with four membrane-spanning helices. Members are found in the Actinobacteria (Mycobacterium, Corynebacterium, Streptomyces), always associated with genes for tryptophan biosynthesis. 24.90 24.90 25.00 25.10 24.80 24.80 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.36 0.71 -4.68 24 358 2009-01-15 18:05:59 2007-05-04 14:15:32 5 2 348 0 99 256 3 178.30 31 85.29 CHANGED tIA.tLlLlluAusLWhAuRhsWVslpSFDsLGs........P+sssLoGAoWooALlPLALLhLAAsVAslAVRGWsLRlLAlLlAsAususuYluloL.WV....ss-sAsRuusL..ApVPlss..lsGopRphhu...........AslullAushsLluAVLLhppAs.ptsAts..uRYssPsARRutApppts.....t............hSERhlWDALDEG+DPT ...........................h....hhhhhuAhhh.hhu.up.sW..h.h.s..s..hs..........shthsloGushosu.L.h.ulALlhlAu.ssAs.hs.V.RshuRRllulLhAlsuhuhshhslsh...hh...ts.s.s.Atpsush................sts.s.sst......hsss.pho.sWs............hlslluuslsl.luulhLhhtus.chsutu......s+Ytt.tARRstthpt...............................................................s.Rsl..WDALDcGcDPT..................... 0 30 74 94 +9367 PF09535 Gmx_para_CXXCG Protein of unknown function (Gmx_para_CXXCG) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry consists of at least 10 paralogous proteins from Myxococcus xanthus and that lack detectable sequence similarity to any other protein family. An imperfectly conserved CXXCG motif, a probable binding site, appears twice in the multiple sequence alignment. 25.00 25.00 29.00 28.40 20.70 18.80 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.59 0.70 -5.36 11 36 2009-01-15 18:05:59 2007-05-04 14:22:52 5 2 9 0 31 35 0 184.80 40 91.83 CHANGED RFahl+cDcssp..aTGsL.sAsH+WuLPGVpsCssCGsshGssGhpYPCVDLSsLP..EppchscP.hPlsaEEFuRLRELVRPhAPPGAhL.PGTphGPLsGsASGpFGsLhhQsshsLhlRREALERLputGlRGLpGCsh-lRFRtKsPPELLELQLE.+GRLHPDClPsDctPPCssCGsp.shphP.-sslLDAsSLPsslDlFRltsasTlIluTERFVEAVc+LcL-GlsFpELssR ...............................p.s.s.....hsGth.pu...WthPGlt.Cs.st.shu..h.tYPsVDLuths..t.t.h.ps...h..shpEatRLtp.lRPhhP.th.l.PGsthGPhhGtupGtFu.h.h.ss..lhlRp-Ahc.Lpt.GlpGL.us.hpl+hRtptsPtlhcLplp.pGplH.cChssth.ssCspCGp..sh........phP........ct.hL-stSLPsshDlFRltsasThlluTERFV-Alc+LtLsGlsFpElss+... 0 12 12 23 +9368 PF09536 DUF2378 Mxa_TIGR02265; Protein of unknown function (DUF2378) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of a set of at least 17 paralogous proteins in Myxococcus xanthus DK 1622 and and 12 in Stigmatella aurantiaca DW4/3-1. Members are about 200 amino acids in length. The function is unknown. 21.20 21.20 23.10 22.50 21.10 20.30 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.81 0.71 -4.75 27 82 2009-09-10 15:59:30 2007-05-04 14:23:17 5 1 10 \N 69 82 2 174.80 24 87.95 CHANGED thhs.sp...sscGLFhpulhstlp....tthtpcl+psuh....-hcts.h.sYPltsahphlhsAAptltPph..s.-sAhRtlGcphspuFh.pohlGRslhslhthhuP+Rhlsplspua+.uussYsEpplp.h...Gspssclhhpcsh..sssappGlLpusLcssGAps.pVpspthsh.hsssYclsWp ........................................hh.tpshcGLahpulhshlp........tthtpclctsGh...-hct..h.sYPhptahphhtssAchlhPth..sh-tAhctlGcphspuah.pohhG+slhslhp.hhuP+RhLpchspuap.sussasctplpth...Gspssclthp...........cshh..sss....att....GllpusLchsGupsspV...p...spthss.t.sspYclpWp....................... 1 20 20 50 +9369 PF09537 DUF2383 CHP2284; Domain of unknown function (DUF2383) TIGRFAMs, Coggill P anon TIGRFAMs Domain Members of this protein family are found mostly in the Proteobacteria, although one member is found in the the marine planctomycete Pirellula sp. strain 1. The function is unknown. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.89 0.72 -3.82 79 432 2012-10-01 21:25:29 2007-05-04 14:23:56 5 2 358 2 189 457 26 109.50 24 67.21 CHANGED pslss..LscLlcsshDuppGacpuu-clc..s.spL+shhpchupp+pptspELpstlpphG..scPcss.uShsGslHRsahsl+utlousc-p.slLpcsEcGEctslcpYccALc.c ...............hhptLNcLlctspDutcuacput-csc...s...spL...+........shhpchsppppptspcLpshlp.phG...ucPpps...uohtGslp+sahsl+s....hhss..p.c.cp...lLppscc.uEctshctaccslp....................................................... 0 49 115 153 +9370 PF09538 FYDLN_acid Protein of unknown function (FYDLN_acid) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are bacterial proteins with a conserved motif [KR]FYDLN, sometimes flanked by a pair of CXXC motifs, followed by a long region of low complexity sequence in which roughly half the residues are Asp and Glu, including multiple runs of five or more acidic residues. The function of members of this family is unknown. 23.80 23.80 23.80 23.90 23.70 23.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.31 0.72 -11.07 0.72 -3.47 63 299 2009-01-15 18:05:59 2007-05-04 14:25:01 5 1 295 0 110 234 357 112.80 36 90.88 CHANGED MsKtEhGTKRlCPssGp+FYDLN+.sPllsPhsGpshshc................ss+s+thtspt-c...................scpscsssss-.ssl--s-s-.p..................p-s..t..--....DL-s.-DDs.......hcD-D- ........MAKsELGTKRlC..PpsGcKFYDLN+.cP.ll.sPasGpshshs................hhcspt..tsst-cst................tpchcss..pps.-hssl--.s.Ds.-sph...................................sDDl.s..D.sDD...sl-ls-...DDDs......hhtp-----p................................... 0 33 70 85 +9371 PF09539 DUF2385 CHP02301; Protein of unknown function (DUF2385) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this uncharacterised protein family are found in a number of alphaproteobacteria, including root nodule bacteria, Brucella suis, Caulobacter crescentus, and Rhodopseudomonas palustris. Conserved residues include two well-separated cysteines, suggesting a disulfide bond. The function is unknown. 25.00 25.00 58.40 58.10 18.80 17.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.20 0.72 -3.61 16 161 2009-01-15 18:05:59 2007-05-04 14:25:50 5 1 159 0 59 119 6 95.00 52 72.36 CHANGED PY-tpLhRLAEILGulHaLRsLCu.tscsspWRscMptLL-uEsss-.pRRpRLhuuFN+GYRuFussYppCTsuAphAhpRYhpEGpsLoc-IsuRY .......PY-schhRLAElLGoLHYLRsLCG..pcGscWRccMpAlIsAEpPs-.tcRt+LluuFN+GYRsFussYspCTPuAhsAlcRYhcEGucLSp-IhuRY.... 0 15 31 40 +9375 PF09543 DUF2379 CHP02267_MYXXA; Protein of unknown function (DUF2379) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of at least 7 paralogs in Myxococcus xanthus and 6 in Stigmatella aurantiaca, both members of the Deltaproteobacteria. The function is unknown. 20.00 20.00 20.20 20.00 19.70 18.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.41 0.71 -4.16 16 44 2009-01-15 18:05:59 2007-05-04 15:28:38 5 2 5 0 43 43 1 103.10 42 91.86 CHANGED scphDWsPIRuLu+RVlcpGEPLtLTc-VRALLpRoAcEVuIosu-sppALuosssAtsLLcEhpRRI+-GSpRLhcAlpRhhchp-AGDlDuARppMc-VLAVEVVPhYRchApspLcsls ......s...-Wp.lttLs+RV..ptGtsL.LotchRuLLh+oAtEVulstt-sttALto.tsAhsLLpEhtcRIp-GSpRL.cAlhc.hhchp-uGDh-uARpphc-lLAVEVVPhYRchAptpLcsh....... 1 9 9 30 +9376 PF09544 DUF2381 Mxa_TIGR02268; Protein of unknown function (DUF2381) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of at least 8 paralogs in Myxococcus xanthus, a member of the Deltaproteobacteria. The function is unknown. 20.60 20.60 20.60 20.80 20.20 20.50 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.67 0.70 -5.30 21 91 2009-09-11 05:39:59 2007-05-04 15:29:54 5 3 9 0 71 90 0 237.80 24 91.05 CHANGED sLLhussApApss..sspt.ptRsVslsusssp..ssPEV+VussssTsL.lFsuslpccslsl-..cuRhphlDsG..cpolhLtPsssLttGERhcLsVhFtDGssPspAsFlLVsc.Puc.......sDspl-VpRsttsstuhpsE..s........t...t..phcsptstPpshsL.Ghlsp......pGVssp..plpc..hsssupultstsuh..sYRu.tshshVslplcN.suttPWssp..tApLs.....uts.G.sL+shhV.hppuslsPGpt.scVlV.s-ssshusp....ssFT...LcLhspsG.RslplssVph ...................................t..ss.ut...........Rtl.lssp.st..s...lhlusshsThl.hFssslt.tphph-..csRhphlssu..tptlhltshtslt.sE+h.ltVhatDG..t.P.pssFhLssp.ssc.......s-t.lpV.R.ttsstsh.sc..h...................t.pt..ttststshs...h.uhlsp.......pultht....hpt....htssspshthhthh..sapu.tshshl.hplpN.tsttsWtst..tAplp..............stt.G........hcshhl.h.........ptsslh.PGtt..splhl.s-.hss.htst....t.as...Lclht.tsG.Rsl.l.th................. 1 13 13 56 +9377 PF09545 RE_AccI AccI restriction endonuclease Bateman A anon Bateman A Family This family includes the AccI (recognises and cleaves GT^MKAC) restriction endonuclease. 21.00 21.00 24.60 24.40 18.00 17.80 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.09 0.70 -5.91 2 13 2009-01-15 18:05:59 2007-05-04 15:31:22 5 2 8 0 7 20 3 229.80 29 92.59 CHANGED MsYY-pIREhTK.VPVpLVsFEpPRDhARTPTQASSNFITNKEQGDWAEsLlsRAINEsSpNFVAVKYGKSDNLVAGEsGFDsFYQDFQsELDTIGKRPDLLIFpKsDFDsoLGFDlSQ.PHcpITDYVKKAIAGIEVRSSAFLID+YEEAMQsRTp+FspIAhpT+DKILs-F.DVL-HPuRppYIpLLNolThpTlslhDF+VPuWpSs-RLIpVpNLFKpLKsAIKEIQKRDYLSITPKVEDlKVVYKWIETFNVPHFYFQVFFDKVYGISFEQILpIISsSDNDGVIFSVEpDspNQNKTTIKINSKhGhPIA.KVDEPhHESlRKEMDRGRLLFYVTFKGGTAYLDl-NLRsILslEEu.F ........................h..................................sSpFlhphpQGsWuEphlhpAIN-ss.pahAltYG.Stshsss-.puFt.aatchppth.shsKRPDlLlFp.ss...h.p.....................pl..........hV.cAlhulEscsS.ahht+h............................................................................................h.K.EDht.l.cW.pp.sV.hahhpVFFDhhaslSFpph.pll.....p...........u..h.h-p..ps.....p.Khhhhh..p.uh.lh.h.-....P...shh.E........ppG+ll.hVpF.GG.hhl........................................................ 0 3 4 7 +9378 PF09546 Spore_III_AE spore_III_AE; Stage III sporulation protein AE (spore_III_AE) TIGRFAMs, Coggill P anon TIGRFAMs Family This represents the stage III sporulation protein AE, which is encoded in a spore formation operon spoIIIAABCDEFGH under the control of sigma G. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. 20.40 20.40 20.70 20.70 19.90 19.50 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.98 0.70 -5.48 32 401 2009-01-15 18:05:59 2007-05-04 15:34:36 5 1 393 0 90 304 11 320.30 36 82.95 CHANGED lP..phshpphl.shh+.G-t..shshpchhpullpalF+Elhssh+LLupLllLullsulLpNLQsAFp.ppsloplAahlsYhlLlhlslpSFtlAlshuc-sIssMssFM.uLlPlLlsLlsusGulsouAhFcPlllhslshsuplhpslllPLlhluslLpllsslS-ca+lo+LusLl+phuhhslGlhlTlFlGllolpGlsuussDuVsh+TAKFssssFlPVVG+hhoDAs-sVhGsSLLlKNAlGhhGlllllhIshhPlIKllulshlYKluAAllpPlu-pclsssLsshusolhhlhuslusVulMFFlsIsllluuGNls ...........................................................p.phtshh.phlp.Gct..phoh.pphhtulhpalF+ElhsstKLLupllhLslhuulLpsLQsAFp.ppslSc...lAa.hlsYhlLlhlslsSFhlshshAp-sIpsMssFhhuLlPlL.lsLlAouGGlsSuuhaaPlllhhhshsuhlhptlllPLlhluslLplVssl.S.cpaKlo+LucLLpplshhhlGlhlTlFlGllolQGhss.ussDulsl+TAKFssusFIPVVG+hho-As-TVluuSlLlKNslGllGlllllhIshhPhIKlhsluhlYKhuAAllpPlu..s.stll.pCLsslucSlhhlhuslshV..ulMFFloIsllluuGNl.................................................................... 0 47 75 80 +9379 PF09547 Spore_IV_A spore_IV_A; Stage IV sporulation protein A (spore_IV_A) TIGRFAMs, Coggill P anon TIGRFAMs Family SpoIVA is designated stage IV sporulation protein A. It acts in the mother cell compartment and plays a role in spore coat morphogenesis. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. 25.00 25.00 30.50 30.50 23.00 22.30 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.65 0.70 -5.80 30 421 2012-10-05 12:31:09 2007-05-04 15:44:18 5 2 408 0 87 296 12 475.10 56 99.67 CHANGED MEphsIY+DIAcRTsGDIYlGVVGPVRTGKSTFIKRFMElhVlPNIcstac+-RA+DELPQSuoG+TIMTTEPKFVPNEAVEIsl.s-slch+VRlVDCVGYhVcGAlGa..E--p..PRMVpTPWF-cpIPFpEAAEIGT+KVIs-HSTIGlVVTTDGSITDIsREsYl-AEERVlpELKpIsKPFlllLNSpcPtupEThpLpp-LE-KYsVPVlslsstphpccDIppIhcplLaEFPVpElNIslPcWlEpL-ssHWLKpshhssl+-hspslp+lRDIppslpsls-hEalccsplpplshGsGsApIplssccsLFYclLsEhoGhcIcG-pcLlpllc-LupAKcEYDKltsALp-VKpsGYGlVsPpL-EhpLEEPEllKQGs+aGVKLKAoAPSlHhIRADIpTEloPIlGTEKQuEELVpYLLccFEs-PpcIWpSNlFGKSLc-LV+EGlQNKLa+MPE-uQsKLQ-TLQKIlNEGsGGLICIIl .....MEphcIaKDIAERTsGDIYlGVVGsVRTGKSTFIK+FMELlVlPNI-.N-tc+pRApDELPQSAuG+TIMTTEPKFVPNpAVpIpl......s-ulclplRLVDCVGYhV.GAtGYt..--ssPRMlpTPWa-c.IPFpEAAEIGTRKVIp-HSTIGlVlTTDGoIs-IPRcsYlEAEERVlpELKpluKPFllllNospPhps-..TppL+ppLpEKY-lPVlshsltphcEpDlhslLccsLaEFP........VtElNlslPpWV.hLspsHWL+psa.psV+-slcclp+lRDl.ccs.ltp.hsph..EalccsplstlchGpGsAcIclhs.-pLa.pILpElsGhEIcGcscLlplhp-Lu+AKpEYDpVu-ALc.VKpTGYGlsuPsLs-MsL-EPEII+QGuRFGVKLKAsAPSIHMI+sDlEoEhsPIlGTEKQSEELV+YLhp-FEsDP.pIWpSsIFG+SLpslV+EGlQsKLthMPEsuphKLp-TLp+IINEGsGGLIsIIL.................................. 0 46 72 77 +9380 PF09548 Spore_III_AB spore_III_AB; Stage III sporulation protein AB (spore_III_AB) TIGRFAMs, Coggill P anon TIGRFAMs Family SpoIIIAB represents the stage III sporulation protein AB, which is encoded in a spore formation operon: spoIIIAABCDEFGH that is under sigma G regulation [1]. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. 24.90 24.90 25.60 27.90 23.30 24.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.72 0.71 -4.51 37 385 2009-01-15 18:05:59 2007-05-04 15:44:36 5 2 379 0 87 291 10 165.10 33 98.57 CHANGED KllGulLIlhuoohlGahhupcapcRscpL+pLpsuLphLcsEIhYutTPLs-AhppluppsptslutlFpphuppLppppstostpAWpculpphh.ppssLppp-h-lLhphGpsLGppDhpsQpKplpLshppLcpp.pcAcpttp+ptKha+.LGhlsGlhllIlLh .......KlhGslLllhuoohhGa..thApphpcRscpL+pLptuLptLcsEIhYupTPLsEAhpcluc.p.......h.s.p.PlshlFpphuppLppsc..p..osp-AWpcul...cc.h...pp...suLp.pp-hElLpphGcsLGppDp-sQpKplcLslppLcppppcAcptptchpKMh+sLGlLuGlhlVILLl... 0 45 72 77 +9381 PF09549 RE_Bpu10I Bpu10I restriction endonuclease Bateman A anon Bateman A Family This family includes the Bpu10I (recognises and cleaves CCTNAGC (-5/-2)) restriction endonucleases. 25.00 25.00 56.80 56.60 17.90 17.70 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.18 0.71 -4.75 8 25 2012-10-11 20:44:45 2007-05-04 15:49:05 5 1 23 0 6 28 5 190.50 33 72.57 CHANGED QpcLcsSILEEFh.hLhpphhtt....hssplclG......ppsFsslsFh.sohtshhpuscstl+sKDpDFsIG.ptl.lKlohcschspspps.-hslshVAsECKTNLDKsMLp-ssuTAccLKpssPsuLYallsEaLDhs.ssss.ssTpIDEVaILRKp+Rsssphh...............hhchlshsPlss-VhhcLlccVpphLscsu.cssp...sLpRGal .......Q.cLcsolLEEFh.hLhp.ch.l.t....hssshp..lG......ppsFtsl.Fp.oshtshhpss.shI+pKDpDFslu.pph.h+hohpsp.ssh.pp..phplshlAsECKTNLDKTMhQ-susoApclKtssPsuhYhllsEaLDhT.Phss.phTplD-VhlLRKsKRhssphR...............ahchhsppPlts-lht+llpclpphlssst.ssps...sLpcGah.. 0 2 6 6 +9382 PF09550 DUF2376 CHP2216_phage; Conserved hypothetical phage protein (DUF2376) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a family of proteins found exclusively in phage or in prophage regions of bacterial genomes, including the phage-like Rhodobacter capsulatus gene transfer agent, which packages DNA. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.29 0.72 -3.81 39 165 2009-01-15 18:05:59 2007-05-04 15:50:55 5 2 163 0 50 131 32 41.30 41 61.95 CHANGED LtLpPcpFWsLTPsEL.phh...lGhpu.u.s.uPlsRutL-pLhppaPD .Lth.PttFWthTP+EL.s...hh.........lGhtu..s.s..sshsRspLDuLhttaPD....................... 0 12 27 37 +9383 PF09551 Spore_II_R spore_II_R; Stage II sporulation protein R (spore_II_R) TIGRFAMs, Coggill P anon TIGRFAMs Family SpoIIR is designated stage II sporulation protein R. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. SpoIIR is a signalling protein that links the activation of sigma E to the transcriptional activity of sigma F during sporulation. 25.00 25.00 25.90 25.80 21.70 21.20 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.67 0.71 -4.12 40 391 2009-01-15 18:05:59 2007-05-04 15:52:32 5 3 385 0 90 332 8 130.70 44 55.25 CHANGED lscclIRhHVLANSDSspDQpLKh+VRDpVlchlpstlp..sspsh-Eu+plIpspls-IcclAcphlpcpGhsYsVpsphup.hsFPsKtYGslshPAGpY-Al+IhIGcGcGpNWWCVLFPPLCFlDhoputs ..................lsccslRh+lLANSDSccDQsLKh+VRDtVhttlsshl...s.......sh...p....S.h-Eu+cllpscls-IcchuppslccpGh.s..ssplphuc.spFPTKsY..........G........shlaPAGcYEAlhIsIGcG..cGpNWWCVLFPPLCFlDhopu.s............... 0 47 75 80 +9384 PF09552 RE_BstXI BstXI restriction endonuclease Coggill P anon Bateman A Family This family includes the BstXI (recognises and cleaves CCANNNNN^NTGG) restriction endonuclease. 25.00 25.00 114.10 98.80 18.00 17.90 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.85 0.70 -5.37 3 10 2009-01-15 18:05:59 2007-05-04 15:59:13 5 1 10 0 2 10 4 279.30 35 80.40 CHANGED RKIYKTGQT..RGADcssIYQNRVSRNSTVLIPFEhl.pcspEsVstucY-NGYIVLIsPDaYF-cA+s-Kspshp.sstlsLGVNAllaYpQRuQa-cYsP.LsDhpspGL+pTpPhs..RouDl..GGcYVuRlSGTTuE..KcuKIphGaN.Tsu+GAGIRlaEYAssET..LEKARLQLEuhYWLsc-SL-uAIpaGMSscDAccR+c.oaNEAcpQGL........LppIcLlssRIIDccslTlCPLCLp+I............SASGFas+sEQsEhRcsaDLToTEINLFHIsELRYGALpHKPYNLGWGHH .....................pKl.KTG.T..RGAspstIYQNRVsRNssVLIPachh...sp...tssst.sp.Y-sGaIVLlsPc.YFssstss+hhhhp....tlpLGlNAllaYppRspWssasP..ssh.tspGLshsssps...Rpssl..uGpYVARIsuTT..uc............ctc+I.hGas.....t...p...sh+GAGIRlaEYAopEs..Ip+sRLQLEAhaWLscDSlEshhthGMs.pDshpR+c.sLscs.pcpGL..............................L-hpcLh-hRIls.ccshTICPLCLccI............SApsFas+hEQsEGRcsaDLTsTElNLFHIcELRaGphsHKPYNLGWGHH. 0 0 1 2 +9385 PF09553 RE_Eco47II Eco47II restriction endonuclease Coggill P anon Bateman A Family This family includes the Eco47II (which recognises GGNCC, but the cleavage site unknown) restriction endonuclease. 25.00 25.00 27.00 27.00 20.90 19.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.28 0.70 -5.21 6 64 2012-10-11 20:44:45 2007-05-04 16:05:53 5 2 59 0 13 61 7 190.80 44 79.26 CHANGED LoFIS-EDF.NcVpsTIscYpcpLcuh-hK+FsKNlIDPhKhIFDtslap.SacEhlssElhRQp-Ko.sNcIGhFHQ+IhtaIcsh+s.Ps....GaDV.htNs-..........pplasEhKNKHNThsuussuchahKhps.lhsctpp.cDssCahVElIs++SpNhpW.hpsssp+.........upcpIRhlShDpFYpLVTGppDAFpplshsLPhsI-KhlsE ..........................................LsFIoccDh.pcVptTl.pp.Ytct.Lpuh-lK+FN+NlIDPIKLlFDpslaptoaEcllpsElhRQpDKoNsNsIGYFHQpIFpYl.cs.hcVPps....GaD..V.hps.sp............pplalEhKNKHNTMNSuSuupsahKhQsplLpc....ccssCaLVEsIAK+SQNlpW.hplcspch........upphIRRlShDpFYtlVTGpcDAFhphChsLPpllpchlp.................... 0 7 9 12 +9386 PF09554 RE_HaeII HaeII restriction endonuclease Coggill P anon Bateman A Family This family includes the HaeII (recognises and cleaves RGCGC^Y) restriction endonuclease. 25.00 25.00 106.70 106.50 19.70 19.20 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.11 0.70 -5.44 4 29 2012-10-11 20:44:45 2007-05-04 16:07:30 5 2 29 0 2 17 2 315.50 69 92.61 CHANGED AK-ALDsIIKKuRVHLYKPIQIAEILYpcRs.csL..sL.NL-TYRspSK+WRDlIChRFLGRlSTSSAKaQDNLFEcNAhPPchLslLGp.N+pssGhVEuYIY+pFh-RasQMosuLsYshsosh-NF+LoEFLs.FW.EPGLKRSIDKIYEIVVYALFcsLlppLsVpVcIphshsNlDLLcEFpDFocKllolsupNsphpLsAKhaRVGVTNAADRGLDMWANFG.AlQIKHLSLsE-LAEsIVSSloADRIVIVCKcuEEclIlSLLNQIGW+SRIQSIITps-LIsWY-KALRGpas.llGp+llEplppEIphEFPus.-sNDF.sFhcpRtY .....AKEALDsIIKKSRVHLYKPIQIAEILYHDRshKpL..-hLNL-TYRNpSK+WRDpICpRFLGRlSTSSAKFQDNLFEcNAhPPE+LuVLGshNRposGGVESYIYKpFFsRFSQMSpALAY..VG..soD+.SFQLSEFLNLFWLEPGLKRSIDKIYEIVVYALF-uLVoELGlTVoIDaPcENLhLhcEapDFu-KIIoh.PcNp+LcLsAKIHRVGVTNAADRGLDMWSNFGhAIQVKHLSLDEELAEsIVSSISADRIVIVCKcAEpSVIVSLLTQIGWKSRIQNIVTEDDLIsWYEKALRGpYs..IAEsLLEsI+sEIhcEFPAV.EANEFl-FtQsRGY...................... 1 0 1 1 +9388 PF09556 RE_HaeIII HaeIII restriction endonuclease Coggill P anon Bateman A Family This family includes the HaeIII (recognises and cleaves GG^CC) restriction endonuclease. 25.00 25.00 25.40 25.30 21.30 21.20 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.89 0.70 -5.54 4 54 2009-01-15 18:05:59 2007-05-04 16:09:16 5 3 51 0 12 46 1 272.30 48 88.81 CHANGED G+AaEYAhlpuLtptLsssQcVlIEpNSuhplshcpYcshocphpp+hshuAcAulplILcLEP.Los.lsNss..LhLuIQcDs+Gp.GDVRDILhhRcp.pWEIGLShKHNHsAVKHSRLSRsIDFGEpWFGlPsSQsYaDpIpPLF-cLEphKccG.LWRslsNKE-cIYsPLLcAFIpElc+lspNppulIPpRhlpYLLGp.DFYKlIohDp++lTplQAFNhhGTLNRsSs+c+Phl.lP.h.hPTRhhcIsFKPsS+NTlElhLDpGWohSLRIHNASocVEPSLKFDl+LlGVP ......GRAYEaAhh.sL.pplsh..h.c.pl.ltcpsua.ss.cAapsLpcphpphahtSAhtul.hlhchEPhlp-..sssp..lpLplQpDphGchGDlRDILIhh...c...p..WpIGLSlKHNH.AVKHSRLS+cLDFGc+WhGlssSQsYaDsIcPlFppL-stKccs......hhW+-lsN....KEp-lYhPLLpAFhcElhRhtpp...pps.......lPp+hVEYLLGcaDFYKsI.l-pcphTplpuaNhpsTLN+.ShcpKsphhl.PlspLPTRhlthcFK.....P..........p.....StNTlElhLDpGWpFShRIHNASo+VEPSLKFDIplluhP....... 0 5 7 9 +9389 PF09557 DUF2382 CHP2271_C; Domain of unknown function (DUF2382) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry describes an uncharacterized domain, sometimes found in association with a PRC-barrel domain Pfam:PF05239 which is also found in rRNA processing protein RimM and in a photosynthetic reaction centre complex protein). This domain is found in proteins from Bacillus subtilis, Deinococcus radiodurans, Nostoc sp. PCC 7120, Myxococcus xanthus, and several other species. The function is not known. 18.60 18.60 18.90 18.80 17.90 17.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.59 0.71 -4.17 46 499 2009-01-15 18:05:59 2007-05-04 16:29:31 5 9 317 0 179 399 8 106.10 31 43.13 CHANGED lpLhEE+LpVsKc+hpsGcVclpKcVhp-ppslsVPVc+EcVhlERpslscsspsss.t..phpp.-shc..lslpEEcsslpKcsVlpEEVplpKcssp-scplp-slR+Ecl-l ........................h.lpEE+LpVsKc+lpsGc.Vcl+KcVlp-ppslpVPVc+EElhlE.Rp.slsctsssss..............................t.............h......p-..psh.c.........lslpEEpssVsKcsVshEcVpltKcthp-scplstplp+Epl-l................................... 1 67 126 161 +9390 PF09558 DUF2375 CHP02922; Protein of unknown function (DUF2375) TIGRFAMs, Coggilll P anon TIGRFAMs Family Two members of this family are found in Colwellia psychrerythraea (strain 34H / ATCC BAA-681) and one each in various other species of Colwellia and Shewanella. One member from C. psychrerythraea is of special interest because it is preceded by the same cis-regulatory site as a number of genes that have the PEP-CTERM domain described by PEP_anchor (IPR013424). 21.80 21.80 22.10 65.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.40 0.72 -4.13 6 23 2009-01-15 18:05:59 2007-05-04 16:31:02 5 1 22 0 10 13 4 70.60 75 92.22 CHANGED MpsspssVTVLYY.-APsGLlMHNtVlsuLslScsGRVMIPppFR+GKSIIAVLEGECcILNSLGERVauQt ..MQAsQATVTVLYY.DAPVGLIMHNuVLssLPVSEuGRVMIPASFRKGKSIIAVLEGECKILNSLGERVFAQ.A 0 3 5 7 +9391 PF09559 Cas6 Cas6 Crispr TIGRFAMs, Coggill P anon TIGRFAMs Family The Cas6 Crispr family of proteins averaging 140 residues are characterised by having a GhGxxxxxGhG motif, where h indicates a hydrophobic residue, at the C-terminus [1]. The CRISPR-Cas system is possibly a mechanism of defence against invading pathogens and plasmids that functions analogously to the RNA interference (RNAi) systems in eukaryotes [2]. 22.80 22.80 27.20 121.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -10.98 0.71 -4.58 10 43 2012-10-01 21:23:39 2007-05-04 16:34:45 5 1 41 0 20 51 26 199.20 32 90.31 CHANGED lDLsFslpGcsLPhDHuYhLhSALschlPtL+-hsshGIpsl+GsssssG.......llhLo+co+LhlRlPtsplstlhsLsGpsLcluGcplc.LGss+l+tLpPsssLauRhVl..h+sth-E-sFLpuspRpLcshslps+thls....G+..RcTl+hsptsllGauLhlcsLusE-SL+LQpcGLGu+RphGCGLFlPcKsls ...lDlhFslpGpsLPhDHuYsLhsAlpc......hl....PhLp-...p..sslu....lpsIpGssspsG..........hlhLocRo+LplRlPtcpls.t.l.h.s.LsGpsLclusatlp.lGtscl+sLpshssLhuRhVs......hcstp.-tpsFLcsst+pLpplslpsphhls.........G+..........c+slpht....sts....lhGauLhlssLst--SlcLQppGLGu+R+hGCGlFlPpKp..t.... 0 10 17 19 +9392 PF09560 Spore_YunB Spo_YunB; Spo_yunB; Sporulation protein YunB (Spo_YunB) TIGRFAMs, Coggill P anon TIGRFAMs Family Spo_YunB is the sporulation protein YunB. In Bacillus subtilis its expression is controlled by sigmaE.The gene YunB seems to code for a protein involved, at least indirectly, in the pathway leading to the activation of sigmaK. Inactivation of YunB delays sigmaK activation and results in reduced sporulation efficiency. 25.00 25.00 51.20 50.60 23.30 17.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.99 0.72 -4.27 31 313 2009-01-15 18:05:59 2007-05-04 16:36:26 5 1 305 0 78 259 3 94.00 38 40.71 CHANGED shthplPLGplhssslLushGP+IsV+hpslGsVpssh.pscFcsuGINQT+HpIhlplpspl+lllPhsocshpVpsplPlu-slIlGcVPphY ...s.thslPlGplosNsLLushGPcIPlchpsIGpVso.-l.cpchcstGINpTth..pIhlclcsplpVlIPFtoc.chpVppplPluppll.G-VPshY.. 0 38 63 68 +9393 PF09561 RE_HpaII HpaII restriction endonuclease Coggill P anon Bateman A Family This family includes the HpaII (recognises and cleaves C^CGG) restriction endonuclease. 25.00 25.00 32.60 32.50 19.20 19.10 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.13 0.70 -5.25 9 103 2012-10-11 20:44:45 2007-05-04 16:40:46 5 3 98 0 15 93 13 324.40 40 95.57 CHANGED NKGEWSElYshF+LLuDGplahGssplcKhEslhaPIhpIhREE.pstphpY.lcsptlhhl...........sssccchplPhcsFtppApL..lLstlKssp.pcsFshsslEpFhsulshhcLc.ApSosKoDIslslac.cs..pPhhGFuIKSpLGu.sTLLNuG+sTNFhacl...pshphsssplppINulsp.toclt-Rhh.I.chGGhLcYhcltscsFpsNLhhIDsphPclLuchlhhaYtsploclpDLTcplpchNPLphc.p..ppHsFYEaKhKpFLsslALGMpPuKlWNGp.sAsuGallVcpsG-VLCYHIhs+ppFc-YLapNT+LEpuSoSRacFGplhp-NGchYFKLNLQIRF ............sKtEWuElYshh+LLuDGplhhGsschpp..shhaPlhhl.RcE.ccGp.ppYhl.ccpt..lh.l..............psppt.tplPtp-FttsA-h..lLptl+ssp..tppshs.s.c.ulEpFL-ph.sha.c........Lc.A+ocD+oDhplshac.cs.....PhhGFsl+ScLGshssLLsuG+s.sNhhhc.....ts.hpF....s....ssslsclNul.........sp....sclt-RhhhIc.chGGhLcYtclssclFcsNLhhIDhphP+lLuEhlh..hhal-..slocls..-LsEh..lpphNPLKlccp.......pcHsFYEaKhKpFLhAlALGM+PuKlasGps.sAstGhllVcssG-V.LCY.H.h.c+phFcDaLahNo+hEpuSsp..+c+aGhl.+.E..NGsaYFKLNlpIth.......................... 0 7 12 13 +9394 PF09562 RE_LlaMI LlaMI restriction endonuclease Coggill P anon Bateman A Family This family includes the LlaMI (recognises and cleaves CC^NGG) restriction endonuclease. 20.90 20.90 22.80 22.70 19.30 18.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.75 0.70 -5.23 2 7 2012-10-11 20:44:45 2007-05-04 16:44:01 5 1 5 0 0 8 11 262.00 54 98.34 CHANGED MsssKE+lhElFhpNVhGh.PsIpGhsh+HsGthGHWLEc+hGhossAsNcADhhGYEhKN.hTSsKTTaGDWSANEYIFcp...Ns.Fphs.....pstFh+hFGKPNpAKpsRhSWSGpPlPc...pYs.FGQIMsI-EsLsIsIhYSFppD.R.NKF-lhP.phppspl.lA+WYGhtps..S++.psLcsKlpcKFNphGWFpChhcs.ssYscIsFG+PIsFE.WhNhV-pG.laFDSGMYpGNpRsYSQWRu.NSaWppLIp-pap .........KppII-lF+pNVhG+p...Pchs.uhN.RHDG+c..GHWLEcphGIuANAsNEADlaGYEhKN.oTuuKTTFGDWSAN.YIFcs...ps.aphs.....pspFhchFGKPNttKssRaSWSGSPlPch.sp.YshhGQhMsI--uhsIlIhYSaSpD.R.sKhsIlPspLQp-tl.lARWh...p.....p.csLcsKLpcKFNc+GWFpCppss.GsYscIsFGcPIsF-sWlpLV-pGlVaFDSGMYpGNsRsYSQWRAsNsaW-SLIs-sY.......................................... 0 0 0 0 +9395 PF09563 RE_LlaJI LlaJI restriction endonuclease Coggill P anon Bateman A Family This family includes the LlaJI (recognises GACGC) restriction endonucleases. 23.60 23.60 23.80 23.90 23.10 23.50 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.20 0.70 -5.77 12 86 2012-10-11 20:44:45 2007-05-04 16:44:32 5 1 83 0 15 98 4 346.80 21 74.68 CHANGED FVGl.hsp....sscllhsFPpththspsp.slc.tD.ttLhs....lLp+Yscppsp.....hph.stpsppppshslsshhhllcDahppG.hYpcscphhphNtpGcIsWs+TIpcspPllp....pssshYhphhsc+phps-pshlopIHcasVpcshpph..Galhsh.t.sh.phsht.h.....spshhlphLppclssTFsDcchhLhcuhhshlpppcp..scpp...haGTppFphlWEchhcplF..............sltphp....................shhs+PpWp....hsptp.spshpPDhlhhh....ccp..lhILDAKYYphthp....hcuhPsssDIsKQlhYtthlpphh........ptsclh...NhFlhPhsppsp.ht..sssphphls...hp...t.csht.l..shhlssc ......................................................................................................aVGl.h........tpphhhhhPhhh...pp....thp.....h.h.llp......sl.cap..ppppp..........h..ttstt.t.p.st.slsshh.llc.ah.p.pG..hYhpppphhppstpG+IsWs+TIpcspsllp....pssslYhchhs++ph.spschlptlathslppshpph..Galhsh..t..t..phsp.........spsh.hlph.LcpchsptFsDcchtLhp.shhs.alpppsp.hppsp....hhGTpsFphlWEchlcchh...................s.t.spp..............................phh.s+spWp........ttsp.p.p..tp.....phl.pPDhlhht.......pcp....h.a.IhDAKYYphsh...........p........hps.hPsssslhKQhsYsp..shtph................p.tplh...NsFlhPhpt..tp..........h............................................................................................... 0 7 10 14 +9396 PF09564 RE_NgoBV NgoBV restriction endonuclease Coggill P anon Bateman A Family This family includes the NgoBV (recognises GGNNCC but cleavage site is unknown) restriction endonuclease. 25.00 25.00 32.70 32.70 18.10 17.40 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.57 0.70 -5.13 2 50 2012-10-11 20:44:45 2007-05-04 16:47:48 5 1 44 0 4 35 4 197.50 69 96.74 CHANGED MIKLTAQQIFDKLLDEEKILSANGQIRFFLGDVDIIVKQKDVVGNIIQEWLGGWLRKREIEFDVSTNTQMPPDFFLNKKDRSRELLEVKAFNRNAsPGFDIADFKMYSDE.hhpP.h.sschh.hGYDMDDNGNVTIKDLWLKKVWQITRSMDGWAINhpsKK.........sW............Clcs..............................................A+ha .............KLTAQQIFDKLLsE-KILsspGQI+FaLGDVsIIVKQKDVVGNIIQEWLGGWhcKRpIEF-susNTQMPPDFFL.NKKDRSRELLEVKAFNRNAsPGFDIADFKMYSDEI....IHKPYMLDVDYLIFGYD.MDD.N..G..NVTIKDLWLKKVWQITRSMDGWAINLQVKKGVVHKIRPGV..WYSIN..KKNMPMFECLEDFVSAIEETVYQNPATRHNASLWK+KFEEAYKKHYNRSISIPRWHEIAHKY......................... 0 3 3 3 +9397 PF09565 RE_NgoFVII NgoFVII restriction endonuclease Coggill P anon Bateman A Domain This family includes the NgoFVII (recognises GCSGC but cleavage site unknown) restriction endonuclease. 20.80 20.80 20.80 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.00 0.70 -5.47 7 72 2012-10-02 13:01:53 2007-05-04 16:59:06 5 2 66 0 17 148 64 252.10 24 71.27 CHANGED at..hppuDcL.lhsGYlo.shulpclcpls-.php.hpIsLlsGMash-Ghsts.asshhcLpthhpcpshGsl...alhtshchHuKlYlahKc.tpsh.uhlGSANhss...suhhpcphE..sssspDs.s.spchht.lpss.l.php.sltsh.ph..........phlh-tss.hpshtshptlsppslphh.ppt....F.l.h......csst.............pSsLNh....u.sR....Gph..psRsh.Es.IplstcIsRp.........ssa..P......ppp.FpllTDDGahh.s+hsup.......ssKplsuhts............pplLG+al+sRL.hspGsl...pp..........lT+E.L .......................................................................................................................................................................h.hhsGh.s...............................h.t.......pl..plhhGMh...Ghs.......hpth..p...l.....hptpth..tth...al..........hHuKhY.a..pp....t..h..t.uhlGSANhs.........s..p...pt.hE....hhh.h.s....t..........h....lpp...h.ph.t...l...p....................hh..p..tp....hpth.....thtt.l........p.l.t.......t.p.....h.....ls.h........cs..t..............................................................+SsLNh..h.u.tu.R.....G.h..hsRsh.Es.lhlsppIppp......................tha...P..................tpc.s...FsllTDDG...h.hps.+sstp..........sKshpopss.............phLGcW.l.+s+L...tts.h................................................................................................................................................. 0 7 12 14 +9398 PF09566 RE_SacI SacI restriction endonuclease Coggill P anon Bateman A Family This family includes the SacI (recognises and cleaves GAGCT^C) restriction endonuclease. 19.30 19.30 20.50 123.60 18.10 17.80 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.29 0.70 -5.51 3 10 2012-10-11 20:44:45 2007-05-04 17:00:02 5 1 10 0 4 12 0 348.70 27 95.43 CHANGED MuITINHSsAppVL+cAaEcAup.oDDchpsQW.....lILuT+LpElss.R...TYpAALlTALLAKAsDsRVDPhuIpE+sssDsAYSARSLCHuVlVs.pVEtuFLEGsLGAsR.EPlNNpPFhRYspaSuI.pVcNK.uRcYLD+VLsALScIDpEchATTp..uaRALVAuLthTlsRTN+......................ssKEssAlGuAIVptSLluEpcuFVlpuH-VsR+hQAuuAuhLshsa.KE.Ilst+lNDPsRsFPhDIsVY...cDGssaLoIEVKDKslsapDLppuVSKAotuGIp+VlaLssA+A.TslsL....DcohAlERstsCtVpVsFS.VpoFs+sCFAlSPllusStl.hAF.cAIscpLIEIcV+-ssID ....tlsIs+.cpAccVLpcAacsAsppsD.shsspa......llpsspL..hss.+...Ta+YILlTALLAKATsscINPLsLQppusl-GAYDARSLCHKVlVP..FE+saLpsuLGuSN.EPFLNKP.ARascl..Sp....cNA.VRRGpD.c.lL.ptLsthc..hpssss..AFcuLssALhhtlppspc...............................tpphhhs.s.hIhphpLh.phppsal...lcspcs...GpphpAt..suusLth....ha...pE.....VtschVNp..s...Spc.suDIDVY........................c-cpllhoIElKDKsaotpDVpHAlcKsusuGhp+shFls.GP+A.splch....-cops..hppApppslhlhas.hppFs+.hhsh.sh.stpth.hsh...lhpphh-hp.pp.............................................. 0 2 3 4 +9399 PF09567 RE_MamI MamI restriction endonuclease Coggill P anon Bateman A Family This family includes the MamI (recognises and cleaves GATNN^NNATC) restriction endonuclease. 25.00 25.00 122.60 122.10 18.80 18.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.03 0.70 -5.36 2 5 2009-01-15 18:05:59 2007-05-04 17:06:57 5 1 5 0 0 5 3 274.20 30 84.58 CHANGED LGSLp.ppc..LlcDLaVDLht+.pV....WAAlTtQoAQschGYIuQalsSlVhG.PG..pGhRGK.....sD-hA-................GStVp.........uuANIptsDcs+W...lG..cDsEa..........tEhLt.PhhaYLll.c.pslpcPssIRhphWplDu.psG.hhsLh-ha.....sS+pGtsaN.......FpLh.Pl.............L.apshll.Dsplphtshst.p.s.HhPLs....PlT..uRopsLphGthtthssRL.....hNsuchVLhps..........D.sslhsulhtPhsshDhAshS.hto.-A.s-thS .........h.toLEsS-p..LIK....DLYVDL+++lss....WStITsQTAQA+hGYIGQHLASlVTGhPGstSGARGcDLs..ssDhuEIKoC.RVDQLspCpsCGosVpthEppCPsCuSTNIcRcDDSKW..LlulRsDsEa..........-ElLs..Pc+aYLlLFDapDl..pDs..csIRIpsWcVDupEc.GhsaC.Ll.DYYh.NI...Kh.uS++G...APaNLaPapacFtLp+Pl.............LIFcusIl.sDs..pIpht....lhPs.p.sh+hPLs....Pls..uRspslphsthtth.sRL..............................................................pt..hh................... 0 0 0 0 +9400 PF09568 RE_MjaI MjaI restriction endonuclease Coggill P anon Bateman A Family This family includes the MjaI (recognises CTAG but cleavage site unknown) restriction endonuclease. 21.60 21.60 26.80 36.70 21.50 21.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.90 0.71 -4.72 7 42 2012-10-11 20:44:45 2007-05-04 17:08:48 5 1 37 0 16 44 4 169.30 31 84.26 CHANGED s.llNhAsphhptshP+lVGthSELI+Ehp...s+ohc-W+EaYhp+..hp-pIsctscKlhhhlpphhpu.lpplccEslcpalc-LVIs+Ta-Ghhhp.csILpplAcc...hssphc.AssE.-cspslDGaI....tpIPlpIKPhoa.sppsplsE.hphphIhYcppcshhhl.hscsh ....llNhAsphhphs+PcsVGthSElI+Ehp..........s+ol......cEWcpaYhp+...............hsEslcchscKlashlpchh.....pu.lpploc-....DshpalcsLVIs+TasGhhhp.psIhppltpc...........hshpa..c.AssE.-cshsID..h.aI.....tthslQIKPhTY.......t.hsE.hph..h..ppppp.....h........................... 1 7 12 15 +9401 PF09569 RE_ScaI ScaI restriction endonuclease Coggill P anon Bateman A Family This family includes the ScaI (recognises and cleaves AGT^ACT) restriction endonuclease. 25.00 25.00 40.70 40.50 22.20 18.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.31 0.71 -4.81 4 14 2012-10-11 20:44:45 2007-05-04 17:09:11 5 2 14 0 2 13 2 172.00 47 93.08 CHANGED SPYtuhsEEcW.plTcpLIp-aPLSt-hIlphVLuSWEsIFoS+hGs.thpIGpslFPsPQhhGhlLcsLIshcltst.ss.Wpu-psp.-KDlVhhhs-hYSIElKTSSsccpIaGNRSaG..pspcuh..KSKsGYYLsINFEKap-s.sLcP+IphIRFGWLDaoDWluQputTGQQASlssphtpsKLlsla ..SPYtslspc.WhuhTccLIcpHPLStcpIVplsLcSW-sIFoSplGs..hpIG+shFPsPQIhGhlLHtLIstplpsca.s.Wpu-csth-KDlVhIhschYSIElKTSScsc.IaGNRSYu..p.spsuh..KSKsGYYLsINFEKF..s..phcPcIphIRFGWLDaoDWIuQpAuTGQQARLsscs.psKLhhlY..... 0 0 2 2 +9402 PF09570 RE_SinI SinI restriction endonuclease Coggill P anon Bateman A Family This family includes the SinI (recognises and cleaves G^GWCC) restriction endonuclease. 20.10 20.10 20.70 99.20 19.50 17.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.36 0.70 -5.36 3 15 2012-10-11 20:44:45 2007-05-04 17:11:24 5 1 15 0 6 17 15 215.10 37 92.89 CHANGED FlQNAAElAKpsMDsl-PSLSEKFTlVI+FLSDNP-usSshRGKc.RssVGs-EaIphLAQNFp-G.RcP+pPsPPoTIPDElVSVVLNVuF-lPpEpLN+IKEpHRLSMuAENIVGDLLERYLAEVLEPsGWIWCSGohVKAVDFI+pDsE.ssWtsLQVKNRDNTENSSSSAIRcGTPIKKWFRTFSK+cuTNWDNFPsphuucsLNEcGF+sFVEpYLccl ..........................................................................................................s..p.pL..tF..lhpaLsp.Pp.hS.hRuKp....p.pVhp..cEhlphhAppYacu.RpschPt..P...pTlPDEhVShlhp.hsashoppplppI+hpHphSMuAENhVGsLLERYLssVLcspGWhWCsGshVKAlDFlphssc.s.WhhLQlKNRDNTENSSSSAIRsGTsI+KWaRohS+ssp......T.......NWsslPp.hp....uhsLsEpsFhtFVcpYl.t.p............................................ 0 2 5 5 +9403 PF09571 RE_XcyI XcyI restriction endonuclease Coggill P anon Bateman A Family This family includes the XcyI (recognises and cleaves C^CCGGG) restriction endonucleases. 25.00 25.00 46.20 45.70 18.80 18.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.90 0.70 -5.57 3 13 2012-10-11 20:44:46 2007-05-04 17:12:37 5 1 13 0 4 13 0 292.30 44 93.16 CHANGED PsPcsQI-FutuL-cLRplaLQpALLcTV+chDIucLDcELuKYVPsuDLQpLApYGLRAEllFsVPsVLEANP+LIGYYRLLLGYSQKcFYstD+GLGhGsFKSMEcKGKIuKAtpPcIcDLClAFsASAStLLsGlGhlRISRELLDDLTLLTLGPQLRGGRNNslGhAGh+hVFEIIREIVAPAIsESc-ouIVLoNAAGRsVTIEFuuDPDIIIREKlcsp+aKNslAIEVKuGTDlSNIHNRLGEAEKSHQKARp+GFTECWTlVNsuNIDLVKARsESPTTD+FYpLohLsDKussEYADFRcRIlALsGIP ..................................P.PchQIsFh.tLcchRthhLQpALh-TVcchDIspLDcpLtcYVssucLtpLApaGLRuEllFsVPsVLcsNP+LlGYYRLLhGaSQKcFYsts+GhshGhFKSMEpKGphspsttsclpDLChshstsuStLLsuls...plSpcLLDDLTLLTlGPQLRGGtNNp+GssuhhhVFEIIR-IVup.u..h.sEs...cp.stIplssAsGRplhIEFAsDPDIlIREchp.ppa+NllAIEVKuGTDsSNIHNRlGEAEKSHQKA+tpsaTECWTllsVtplDh.hAppESPoTsRFYplotLs.psssEYtDFRcpllulsuIs............................. 0 4 4 4 +9404 PF09572 RE_XamI XamI restriction endonuclease Coggill P anon Bateman A Family This family includes the XamI (recognises GTCGAC but cleavage site unknown) restriction endonuclease. 25.00 25.00 43.30 43.10 21.30 21.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.69 0.70 -4.93 5 29 2012-10-11 20:44:46 2007-05-04 17:13:06 5 1 28 0 9 32 4 214.40 35 79.36 CHANGED TuNLT..uI...TP-lL.csaPpsLs.sLRMoTuPPIAcDRLlGLAchS.sLVcSlE.cs+LPs+Mcut.LcpcLcKlssVIp+hlDPDlFsWhscGcsPTccERchAATIVADRLCGAlAsPIlRNAQERR......QLAsIKSWLcARGYTQlssuAu..lNSM..sPGTFoFRtNlsV....Gs+..VNIPVDAVVps+Du+ptthPhhIEAKSoGDFTNTNKRRKEEAsKhuQLplp.YGpplsh.sLFLCGYFcoGYLGYuAAEGLDWVW ...................hp..tl...pst.l..t.s.hls.tLRh.suPPIucDcL.slus.....lpshp....thL..p.ps......Lp+lhsllp+hlD.chFsWhttsttPospphc.AAhlsustLhuA.......R.ApE+R......Q.shlcpaLcshGasch.tsu.....hpsh..................PsshpFptps.l.......Gpc......sDlVlt.tcs+.....hhlEsKsusshTNosKRhpp-.Ashhsphhhp.aGpt.hh.shhLsGhFcst.Lt.ttupGlshhW................ 0 0 2 5 +9405 PF09573 RE_TaqI TaqI restriction endonuclease Coggill P anon Bateman A Family This family includes the TaqI (recognises and cleaves T^CGA) restriction endonuclease. 25.00 25.00 44.80 44.70 19.10 17.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.78 0.70 -5.25 4 13 2012-10-11 20:44:46 2007-05-04 17:14:29 5 1 10 0 3 15 0 199.50 46 95.09 CHANGED opAQcALEAFEcFLcuLDLESYppKYRPIKTVEQDLPRELNPLPDLYEHYWKPssssPpFPsFEEFF-pWW-KRLR..PLDEFIRKYFWGCSYpFVRLGLEARLYRTAlSIWTQFHFCYRWpASCpLpLpAu.ELDuQGIDALIp.s.p-p.lGIQIKKETYRSEARutNRFLRKpp.soALlElPYTLQo.EELpcKApRARocp..EsYpLWsKVApHL-+LPNGFVIFRESYVKclEsFLpcNAsTLoGLI ..............pcsLctFEcFLtuLDL-pYpp+hRsIKTVEQDLP+EL.PLsslYcaYWcs...p.pF.sFE-aF-paWc+......+L+......P.LspFI+KYFaG..CShtFV+hGhcARLYRThlSIhTQFHFCYhWps.C.....p.....L.Lpus.ELDtpGIDAhlp.h.pt..lGIQIKK.oYRpEA+stsRFh++pp..tsuLlElPYsl.s.EELpcKhtpsRscp...csYp..hchh.p+.h.+L.NGFVlFpEsYl+.l.............................. 0 3 3 3 +9406 PF09574 DUF2374 Short_TIGR02808; Protein of unknown function (Duf2374) TIGRFAMs, Coggill P anon TIGRFAMs Family This very small protein (about 46 amino acids) consists largely of a single predicted membrane-spanning region. It is found in Photobacterium profundum SS9 and in three species of Vibrio, always near periplasmic nitrate reductase genes, but far from the periplasmic nitrate reductase genes in Aeromonas hydrophila ATCC 7966. 21.40 21.40 22.30 63.50 20.10 19.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.13 0.72 -4.34 11 107 2009-01-15 18:05:59 2007-05-08 12:46:36 5 1 106 0 12 42 1 42.00 78 97.19 CHANGED MSTLESVIWHlLGYuAMPVIILuGFluVAsVSIhLLuhsKDK MSTLESlhWHVLGYSAMPVIILuGFlGVAVVSIhLLuhTKDK...... 0 1 4 8 +9407 PF09575 Spore_SspJ Spore_SspJ; Small spore protein J (Spore_SspJ) TIGRFAMs, Coggill P anon TIGRFAMs Family Spore_SspJ represents a group of small acid-soluble proteins (SASP) from Bacillus sp., which are present in spores but not in growing cells. The sspJ gene is transcribed in the forespore compartment by RNA polymerase with the forespore-specific sigmaG. Loss of SspJ causes a slight decrease in the rate of spore outgrowth in an otherwise wild-type background. 25.00 25.00 88.60 88.30 19.30 19.30 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.20 0.72 -3.76 3 23 2009-01-15 18:05:59 2007-05-08 12:52:21 5 1 23 0 5 12 0 45.90 79 96.88 CHANGED MS.FFNKDKGtNSEKD+NsVcGALEDAGpALKGDPLQEAVQKKKNNR Mu.FFNKDKGKpS-K-KNVIQGALEDAGuALKDDPLQEAVQKKKNNR.. 0 1 2 3 +9409 PF09577 Spore_YpjB Sporulation protein YpjB (SpoYpjB) TIGRFAMs, Coggill P anon TIGRFAMs Family These proteins are found in the endospore-forming bacteria which include Bacillus species. In Bacillus subtilis, ypjB was found to be part of the sigma-E regulon. Sigma-E is a sporulation sigma factor that regulates expression in the mother cell compartment. Null mutants of ypjB show a sporulation defect, but this gene is not, however, a part of the endospore formation minimal gene set. 20.40 20.40 20.40 22.30 20.10 19.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.42 0.70 -4.79 10 157 2009-01-15 18:05:59 2007-05-08 13:35:37 5 2 156 0 27 110 0 227.20 45 87.38 CHANGED uhcELscLSDslLQLsKpp+YEEAlQVLpYFpcpFhus-hcppp..lTssplRplTluY-cAl+uLpppchscpEKl+ssspFRLllDAlsScpcPLWsphEcPlMcuFushKcAspppDspsFpcphNpFloLYslIYPSLpIDlssspLppVssHlshlEphc..phopsopp-+LsllcpDLpslFDpVccD-ADPSLLWVIloTGGIIlhTLTYVGaRKYKuEKcKcKsR ............................................................pWpELssLhD-uLQLVKcsc.EcAlQVLpaFS-QFhhpssc..ccp..lTs-plRlISLuY-cAppSLsppslscp.Klcslh.tLpLAVDA.sSKaQPLWhEhEtplM-AFSphEKAhpKc...Ds...spFppoLNsFLpcaslIYPSLhIslPEs-lQRVsuHlSY....L-.ch.c.s.sh.Lc.s.cus....phQ.L.ull.+uDLQ+lFcsVK.......KD.......Eh....s......P......SL.........I......Wh.MshTGGlIlhTLTYVGWRKYKGE+EK+Ksp........................... 1 6 17 19 +9410 PF09578 Spore_YabQ Spore cortex protein YabQ (Spore_YabQ) TIGRFAMs, Coggill P anon TIGRFAMs Family This protein is predicted to span the membrane several times. It is only found in genomes of species that perform sporulation, such as Bacillus subtilis, Clostridium tetani, and other members of the Firmicutes (low-GC Gram-positive bacteria). Mutation of this sigmaE-dependent gene blocks development of the spore cortex. The length of the C-terminal region, which includes some hydrophobic regions, is variable. 21.00 21.00 22.50 22.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.52 0.72 -4.03 38 379 2009-01-15 18:05:59 2007-05-08 13:55:22 5 1 375 0 85 295 6 79.20 32 46.51 CHANGED hlsslhsGhhlGhhaDhY+..hh+phh+hp.+hhshltDllFWl...ltulllFhhLhhsN.GplRhYlaLullhGhsl...Yhtllup ..............hlhhlhhGhhlGhsaDhYp..hhhpth.....cpp...+hh..shlpDlLFWl...ltulhlFhhLhhsNpuclRhYlaLullhGhhh...Ytphlp............. 0 45 70 75 +9411 PF09579 Spore_YtfJ Sporulation protein YtfJ (Spore_YtfJ) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this family are encoded by bacterial genomes if, and only if, the species is capable of endospore formation. YtfJ was confirmed in spores of B. subtilis; it appears to be expressed in the forespore under control of SigF. 25.00 25.00 25.10 25.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.81 0.72 -4.13 71 712 2009-01-15 18:05:59 2007-05-08 14:01:05 5 1 496 0 166 461 6 87.30 40 62.67 CHANGED lcssD.slIlPlu+VuFGFuuGGu-hpspp.................tppt..........hGGGuGuGlulpPlAhLVlp.s.....spl+llsl.spps..hl-+ll....-hl.Ppll-+ ..........lpssDGslllslS+V..uF..GFuAGGS-apssp....................t.ttps.............FGGGSGuGVSIsPlAFLVls.s.....ssV+lLpl..spss....hh-Kll.....-hsPphl-K......................... 0 73 135 147 +9412 PF09580 Spore_YhcN_YlaJ Sporulation lipoprotein YhcN/YlaJ (Spore_YhcN_YlaJ) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry contains YhcN and YlaJ, which are predicted lipoproteins that have been detected as spore proteins but not vegetative proteins in Bacillus subtilis. Both appear to be expressed under control of the RNA polymerase sigma-G factor. The YlaJ-like members of this family have a low-complexity, strongly acidic, 40-residue C-terminal domain. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.02 0.71 -4.16 52 715 2009-09-10 23:13:51 2007-05-08 14:11:15 5 3 229 0 145 527 0 161.00 19 84.30 CHANGED hhhhhluGCsssppsp.tt...............................................................................................................................................sstsst.thsp..hphpp.tsppt............htstpcpclAcclschss.plspVccAsslVs...........................................sppAlVulchp.t....t.......spsc...pl+ppVpcslcshssphppVhVouDs-hhpRlcshuppl.psGpsh..pshhp-lsphl..p+lhs ..........................................................................................................................h......................................................................................................................................................................................................................................................t..t....tp......hphpphttpp.........................pstpsspchAcplsshst.plssVpcusslVs...........................................sp....pshVulcsc.tp.....p.............................................stsc..pl+ppVtcplps.sP...p.hslhVosD.chhpclcplsppl.pp.s..p.sh...st.h.tpplstlhtch................................ 0 50 104 116 +9413 PF09581 Spore_III_AF Stage III sporulation protein AF (Spore_III_AF) TIGRFAMs, Coggill P anon TIGRFAMs Family This family represents the stage III sporulation protein AF (Spore_III_AF) of the bacterial endospore formation program, which exists in some but not all members of the Firmicutes (formerly called low-GC Gram-positives). The C-terminal region of these proteins is poorly conserved. 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.05 0.71 -4.43 40 411 2009-01-15 18:05:59 2007-05-08 14:19:16 5 1 379 0 93 314 6 174.90 20 89.74 CHANGED llhllhsshlEhLLPsush+KYl+hVlGLlLllllLsPllpLhp.p-hshththhpp.......pp...ppphppptcchpspppshll-p....hpspLcpplccplc...pphshphtclplphsps.pptt.......pIpplsltlpp...tp.t........................................................pVc.lpIst..................tp.stppttpspptpclcphlup....hapl.sp-pIpVth ..................................................hhllhsshlchlLPssshpKYl+hlluLlLllllLsPlhpl...hp.pc.h...s..h....h.t.....hsp.......................tttt.p..pp...l.p.pp.pcch.pttp..c.shslcp.....htpphccplppthp........pp.hshph.clpl.hs....psttp.p............plpp.l.ltlppt.t.tp..tt..t.........................................................plc.lplst.................................................t.t..tt.pt.t.c....hpphhup......hhpl.t.cpIpl................................................................................. 0 47 76 82 +9414 PF09582 AnfO_nitrog Iron only nitrogenase protein AnfO (AnfO_nitrog) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry include Anf1 from Rhodobacter capsulatus (Rhodopseudomonas capsulata) and AnfO from Azotobacter vinelandii. They are found exclusively in species which contain the iron-only nitrogenase, and are encoded immediately downstream of the structural genes for the nitrogenase enzyme in these species. 23.90 23.90 23.90 57.20 23.30 23.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.96 0.71 -5.18 14 61 2009-01-15 18:05:59 2007-05-08 14:24:52 5 2 51 0 39 60 0 200.20 27 92.49 CHANGED hcIAVhlsppGphsohh-sGhltVYpcssspWplt+ch.hs.lssspulstl+ttlsshlspLscC+lhlupsspGlsYuhL.-chuhplWchpGpsh-hLDtVhccEp-pt.cptt...............................tssshshshPlchG.s...GcaplsLpcl.ppsscloSKQlLlPFLcsssFpcL-llCsHlPpWhsp-lsthsLch-s..pphs+pslp ...cIAValscpGpssoh.csGh.ltlap+.psspWphhcch.hp.l.ssstulsplRpphppllptLscC+lhlupplpGlsYshL.-chGhslWch.pGps.-..hLDtlhccEpcptpptt.................................ssshsh.tPhchs..s...GpaplsLpcl.t.psssloSKplLlPFLcpssFppLEllCsHlP.Wh-pcltthsLphch..pt.......h............ 0 17 30 34 +9415 PF09583 Phageshock_PspG Phage shock protein G (Phageshock_PspG) TIGRFAMs, Coggill P anon TIGRFAMs Family This protein was previously designated as YjbO in Escherichia coli. It is found only in genomes that have the phage shock operon (psp), but it is only rarely encoded near other psp genes. The psp regulon is upregulated in response to a number of stress conditions, including ethanol, expression of the filamentous phage secretin protein IV and other secretins and heat shock. 25.00 25.00 55.60 55.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.28 0.72 -3.91 13 608 2009-01-15 18:05:59 2007-05-08 14:26:17 5 1 604 0 44 195 2 64.50 71 79.78 CHANGED MlELlFllsFhlsLllTGlSllGllAAlsVAhslMhluGMhulVIKLLPWLlLhllslWlhRshp .MLELLFVlGFFlMLMVTGVSLLGIlAALVVAT.AlMFLGGMLALMIKLLPWLLLAlAVVWVIKAl.t.. 0 1 7 25 +9416 PF09584 Phageshock_PspD Phage shock protein PspD (Phageshock_PspD) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are phage shock protein PspD, found in a minority of bacteria that carry the defining genes of the phage shock regulon (pspA, pspB, pspC, and pspF). It is found in Escherichia coli, Yersinia pestis, and closely related species, where it is part of the phage shock operon. It is known to be expressed but its function is unknown. 25.00 25.00 27.70 29.20 19.50 19.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.16 0.72 -4.27 7 520 2009-01-15 18:05:59 2007-05-08 14:33:02 5 1 516 0 28 113 1 63.30 79 86.29 CHANGED t+s+sG..LKhhuKlhlhsALpYGPAGsAGWhVKoVuRKPL+hLLAhsLEPlL++hhs+lutpahp ................QKVKPG..FKlAGKLVLLTALRYGPAGV...AGWAlKSVARRPLKMLLAVALEPLLSRAANKLAQRY..Kt. 0 1 5 17 +9417 PF09585 Lin0512_fam Conserved hypothetical protein (Lin0512_fam) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of few members, broadly distributed. It occurs so far in several Firmicutes (twice in Oceanobacillus), one Cyanobacterium, one alpha Proteobacterium, and (with a long prefix) in plants. The function is unknown. The alignment includes a well conserved motif GxGxDxHG near the N-terminus. 23.40 23.40 23.50 24.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.43 0.71 -4.51 22 222 2009-01-15 18:05:59 2007-05-08 14:40:49 5 3 169 0 69 202 119 110.20 39 85.68 CHANGED pphhlEhGhGsDlHGQDhTKAAtRAV+DAlp+sSlsuhhc.hhsh.shspMpVpVclGVscP..-pVDt-tltshlPaGp.ssVpllpGGL......sltchss.s-shlIAsAAVpVu .......hlhlEhGhGsDlHGQ..DhTKAAtRAV+DAIp+NSlsultp..h.....lsh..s...hp.sMhVplpluVs.......ps..-plDp-tl+ullPYGp..solcshpGGh.s..uhtltchs-ts-.hhIssAuVpV......................... 0 25 51 63 +9418 PF09586 YfhO Bacterial membrane protein YfhO Coggill P anon Pfam-B_2727 (release 21.0) Family This protein is a conserved membrane protein [1]. The yfhO gene is transcribed in Difco sporulation medium and the transcription is affected by the YvrGHb two-component system. Some members of this family have been annotated as glycosyl transferases of the PMT family. 28.50 28.50 28.50 28.50 28.30 28.30 hmmbuild -o /dev/null HMM SEED 843 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.37 0.70 -6.52 56 1820 2012-10-03 03:08:05 2007-05-08 15:29:27 5 4 1236 0 257 1471 211 637.40 21 83.13 CHANGED u.hllPhllhhhhhh.....hthhshuspslhssDhhpQYls....Fhthhpptlh......sssshFYoastGLGushhuhhuYYl.hSPhshlhhhFs..............hpphspuhhllsllKhulhGLohhahhpphh........tph....hhslh.....huhsYuLsuasls.phslhW...hDshIlLPLllhGl-+llcpp+hhhahlslulhl.lsNaYhuYMhslFhhlYhlhph...h.......pshpphh.....pthhpahhsSlLushhSulhllPshhslhs.spps.sp...sh.hthphshhchhs+hhhGsashpph.......s.laluhlshlhhlhaFht+phph+h+lhhsllhhhlllShhhphlshhWpGhptPsha.aRaualhuFhhlhLuupslp....phpph.........hhphhhshhllhtlhh.hshhhp.ppt...phhp.....h...................................................slhhlllhhlllh.hhh..........tph.hhhhhhllhlhhhh-hshNhhhslsphs....hsspp.asshhpshpphhphhp....ppsss.haRl-phhsps......t..........N..-shhhsYpGlStaSSlhspsh.shhs.....slG...tssssphphtssohlhDuLhulKYhls............................................pspphplYcNp.sLPluasssshh......pchchpp.....tssls...pQsthLpulsspst.............................................phFps.........................p.sphphpssptts..shht.st......................spstssslphshs.sssssshYlphs.........................................s.sppshslpV.Nspshppp...............pstlhsluhpscsp.plplsl......hsps....phplsphplhshsh.pshppshpplpp.pthphtphp...ssplpuslss..pcsshLhhoIPYs+GWpsplsGKplphp+..sp............ssFhu..lslspGppplplpYhPshhhhG..lllS .......................................................................................hlshhlh.hhhh.......t.h..h.....u...t........h......s.Dhhp....Q.hh.........a...h.hphh...........t..p...h.......as...h.s..ul.Gh.sh.ht........uYYh..hu.h....l.h....hh...............hp.hs.shhhh...hhl+hhhhhhshh.h.hphhh...........................th............h.hhhuh.Yuh.shhh...............a......hss.hhhhPLhlh.ulch.h.h............p....pp....+......h...h...hh....h.s.....h...hhh...h.p.....N......a......Y.....h.....ua..hhs.....lhhhhahlhth....................................ph..hp..hh......pthh..hhhhshlu....sh.uhhhhhPs...h..sh.p.ptp.t.......................................................................h.........h..........p..h..p..........h.....h.............h.......................s.........................................h..ah...hh........h.s....h..lh....h.hh.....h..h..........p......hp..h..h..h.......hhh.....hh....h...hhh.h.hh...s.......h........h.s.......ha.......p...uh..p.P.t...................hR...as...alh.sh.h.h.hsh.hhp..........phpph.....................hthhh...hhh..hhhhh.......hhh...h.h.....t............h.........h.h.h...............................................................sh...h.h.h.h.hhhh.hh.h.h......................................................h.h.h....hh..h..hhh.h.h..........h.......h.....s.................hpth..................p.t.h..tp.....h....t..........t.......t.h..............................ttt..h.Rhp...h.....t...........................N....sshh.ht....atuho....asSh.hstt.....p.hhp.............ths.........tp.s...ht.......s..sp.....h....slhslph.h.........................................................................................p.t......ph.hhpNp..t.hshu.h.hspp...h...........p.p...p.ht............sht.ppt.t.hhpths..p....................................................................................hhp.......................................p.t..ht..t..t..............................ttt.hthp.h.......hp.....t.....tph..ah.h..............................................................................tpthpl...l...st...p...............................hhsl.s........t...tp.phplph.............tt....thpht...thh....t...s....pt.hp......pthpph.pt............h...p.......ph........ps.plthshss..ppst.....hlhh.o.l..P.Y..sp.G...Wpsh...s.G.c..p..lp.hpp..sp............tshhu..lpls.t.Gp.p.plphpahP.hhhhGhhho..................................................................................................................................................................... 0 106 190 230 +9419 PF09587 PGA_cap Bacterial capsule synthesis protein PGA_cap Coggill P anon Pfam-B_1441 (release 21.0) Domain This protein is a putative poly-gamma-glutamate capsule biosynthesis protein found in bacteria. Poly-gamma-glutamate is a natural polymer that may be involved in virulence and may help bacteria survive in high salt concentrations. It is a surface-associated protein [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.63 0.70 -5.02 181 2048 2012-10-02 19:15:56 2007-05-08 15:48:07 5 29 1542 0 483 2755 332 235.10 26 60.66 CHANGED slshsGDlh.hs........psh.p.h..t..................................shs.ha.ttlt.s...hlppu...D.lslsNLEssl.........................................ssssp...shss..h.....htapsssph.sssLppsGh..c...slsl.ANNHshD....aGtpGltcTlp...tLc.psG...lthsG...suts.tpupp...hlhph........pGh+luhluas............................................................................................................ppltpc.......lpps+...p............sDlllVshHW..G....tE........hptt...........P.sspQpp............hA+thlD.u.....G.ADlllGpHPHVlpshE.ha..........csp.......................hIhYSLGNF.lhst .............................................................................................................lhhsGDhh.ht........t.t.h.t.h..t..........................................shp..a..ttlt......hl..ppu....D..l.s.hsNhEssl.............................................sspst....hsth..........hapsss.ph..hp..sl.......p.s.s........G...a.c....slsl.ANNHshD....hGh...p...Glh.s.T.lp............tl.c....p....ts...............lth..h.G.............shp....s........t...t....c..s........p.p..s..........hl.h.ph............pGh+luhluashsh.ssht.sh....s....t............................................................................................................................................................spltpp...lccs+....p..........puDl.llVhhHh.....G.......sE.........................a..p.p...........................P...sppQpp.......................................................hu+th...l.-..t........G...A...Dl.lhGpHPH.V..l.p.s.hE.hh................................ptp.................................hI..h..YShGNFl.s.................................................... 0 197 343 427 +9420 PF09588 YqaJ YqaJ-like viral recombinase domain Coggill P anon Pfam-B_3587 (release 21.0) Domain This protein family is found in many different bacterial species but is of viral origin. The protein forms an oligomer and functions as a processive alkaline exonuclease that digests linear double-stranded DNA in a Mg(2+)-dependent reaction, It has a preference for 5'-phosphorylated DNA ends. It thus forms part of the two-component SynExo viral recombinase functional unit [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.06 0.71 -4.24 97 1553 2012-10-11 20:44:46 2007-05-08 15:50:07 5 18 969 13 375 1400 442 140.60 24 46.91 CHANGED -WhphRptt........l.suS-sus.................................................hhG.hs..h.....ps.shplhhccssp..............................ptspsht....hGpphEshstphappppG..........hplpt.......thh........p.shhtAShDGhs..................................................................................................................................thlEhKsst....t..hht....................ttl....PptYhsQ.....lQtthhV........o.utc ...............................................................................WhthRhth..............l.suS-sts.....................................................llu....hs...............ts....hhphhh.ph.hsp.............s..........................................................................psp.sht....h.GpphE..........s..ut..........p....ha..ph.p.pu.........................hplpc..............p.shhh+...........spp.h...h.u....u.osDGls.........................................................................................................................................................sshLElK..ssh...............s.p..hhph..pht....................................tl.................ttYhsQlQhphhlos..t............................................................................................................................................................ 0 189 280 336 +9421 PF09589 HrpA_pilin HrpA pilus formation protein Coggill P anon Pfam-B_3574 (release 21.0) Family HrpA is an essential component of the type III secretion system (TTSS) which pathogens use to inject virulence factors directly into their host cells, and to cause disease. The TTSS has an Hrp pilus appendage for channelling effector proteins through the plant cell wall and this pilus elongates by the addition of HrpA pilin subunits at the distal end [1]. 25.00 25.00 26.00 25.90 24.90 24.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.82 0.71 -3.66 6 71 2009-01-15 18:05:59 2007-05-08 16:00:00 5 1 55 0 4 39 0 119.80 48 98.42 CHANGED M........................s.hppLsshGptslNslGGA....................................hQGlNslsSusshpsNll...................usT..GsopSscAppcuhucu.......................................DAsuA+L..........................................................uhQucEstK+pp.ssLsAhpAu+EDuoNKKISuTtpNApGIsY ....................h.....slhSSLosAGpulVNslGGA....................................hQGlNoVcSuADRphuLh...................psT..GSoDSlDAspsuluKG.......................................DAcuAcL.........................................................puhusEEsuhhREpSMLAGFEstKEsLoNQIVAuKIcNuV.VQF. 0 0 1 3 +9422 PF09590 Env-gp36 Lentivirus surface glycoprotein Coggill P, Bateman A anon Pfam-B_3269 (release 21.0) Family This protein is found in feline immunodeficiency retrovirus. It represents the surface glycoprotein which is found in the polyprotein C-terminal to the Env protein. 19.30 19.30 19.90 21.70 18.70 16.70 hmmbuild -o /dev/null HMM SEED 591 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -13.04 0.70 -6.31 5 1139 2009-01-15 18:05:59 2007-05-08 16:06:50 5 2 10 0 0 929 0 233.30 65 86.41 CHANGED FuQNRQWIGPEEAEELLDFDIATQloEEGPLNPGVNPFRVPGIT-pEKQ-YCsILQPKLQELR-EIpEVKL-E-NAGKFRRlRYLRYSDEpVLSllYLLl.GYl+YLhsRNKLGSLRHDIDIEss.scEpFsK+EKGsTlNpKYCRlCCIGsssLYLlLFIGIGlasGossAQVVWRLPPLVVPVE-oEIIFWDCWA..PEEPACQDFLGAMlHLKAshNISIQEGPTLGpWAREIWATLFKKATRQCRRG+lWRRWNETITGPhGCANNTCYNlSVlVPDYQCYlDRVDTWLQGKlNISLCLTGGKMLYNK...-TKQLSYCTDPLQIPLINYTFGPNQTCMWNTSLIcDPEIPKCGWWNQsAYY....NSCKWEQsDVKFQC........QRTQSQPGoWlRsISSW...KQRNRWEWRPDFES.EKVKISLQCNSTKNLTFAMRSSuDYu-VsGAWIEFGCaRNKS+pHopARFRIRCRWNsGoNTSLIDTCGscQNVoGANPVDCTMpuNTMYNCSLQcGFTMKIEDLIVHFNMTKAVEMY............NIAGNWSCsSDLPouWGYMsCNCTsosSo....hsKM+CPsc-GILRNWYNPVAGLRQALcKYQVVKQP-Y ..............................................................................................................................................................................................................................................................................................................................................................................................................AYY......NuC+WEps.sVpFQC.........QRTQSQPGoWhRsISSW...+QRNRWEWRPDFES.EKVKlSLQCNSTKNLTFAMRSSuDYG-VsGAWIEFGCHR..NKS+hHo..-ARFRIRC+WN.GsNsSLIDTCGp..s....NVoGANPVDCTMtAspMYNCSLQcGFTMKlEDLIhHFNMTKAVEMY............NIAGNWSCpSDlPtsWGYMsCNC..T..................KMtCPpppGIlhshYssshh.h.tLt+Y..lVKQP-Y................................................ 0 0 0 0 +9423 PF09591 DUF2463 UPF0328; Protein of unknown function (DUF2463) Coggill P anon Pfam-B_3132 (release 21.0) Family This protein is found in eukaryotic, parasitic microsporidia. Its function is unknown. 22.00 22.00 55.00 54.50 21.90 21.90 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.48 0.70 -4.74 23 38 2009-01-15 18:05:59 2007-05-08 16:07:07 5 2 2 0 34 38 0 199.70 39 75.93 CHANGED FPhhhYhIhsKDsF-ps.hLRFIslLhPaSYSAlQahhLLaoN...W+uspKPEshLaphLYahLNlLLlsFulISILSIIshsl.....scWpss-s.hhaSllLPSFhlssTYLLSTSCsLVPGpIuFTDTGIslLIDlLILLs.llsllh..hhccsc....Yh.a.ulhSslLlLl....RhL+E+ahP..SccS..........u.PousWRVulhllILlLushlYshhshssl ....aPhhMYhIhsc-pFccs..FLRFIslLhPhSYSAlpahhLlaoN...W+uppKscshLpshLYahLslLLluFulISILSIlsFsl.....s.cWcs..scs.hhaSl.lLPShhlsssYLLSTSCsLlPGpIuFTDTGIslLIDlLILlsslssllh..hhccsc....ah.ahullS.lLlLl....RlL+E+hsP..sccS..........s.PossWRlulFllILlLuhhlYshhshhsh.... 1 34 34 34 +9424 PF09592 DUF2031 Protein of unknown function (DUF2031) Coggill P anon Pfam-B_2630 (release 21.0) Family This protein is expressed in Plasmodium; its function is unknown. It may be the product of gene family pyst-b [1]. 20.70 20.70 21.40 21.60 20.30 20.60 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.66 0.70 -5.20 44 121 2009-01-15 18:05:59 2007-05-08 16:18:06 5 3 5 0 117 121 0 178.40 43 83.33 CHANGED RlsILK.YVhFS.......IlICSF...EYuKN...ELYhlN......-RsIhLERNlINFRNNRILADsDNpFDLN-FYQSTLSLANQhN-hs-....Ds........cEItaLRNhIDSHIKKHKEsNTL.DLpNVD.pKTKKLIscL+KELEEl......KKElDN..KpNuELuIQPIpDKpIhKKDENsSVSEcEsF+QLENp.s.lt............E.NcIs.SSsphhc.Khpp+h+..Kttpphlhps..lhhlshshslh.lsG....hh.lhhllls .....................................thhp.hlhas.......lhlh.F...casp.s.....................ELa.hs......tttlhhERNlIsFRNNRILussDNpFDLNpFYpSTLSLssQhs-hs-........ss.......cEIhhlRNhIsS+IKcHKcssT..sLpNlD.pKTKKlIpcl+cELEEl......KKElDN..htN.sclsIpsIpsKhIhKKscNssVSEp.EsapQLcNp...........................................c..pht..s....p.p..pchp..+...thhhp...hhhh.hh..h.h..h.s.h.h.hhhh........................................................................................ 0 0 69 117 +9425 PF09593 Pathogen_betaC1 Phage_C1; Beta-satellite pathogenicity beta C1 protein Coggill P anon Pfam-B_991 (release 21.0) Domain Cotton leaf-curl disease - CLCuD - is of major economic importance in cotton-growing areas of the far-east. The infectious agent appears to be a single-stranded DNA molecule of approx 1350 nucleotides in length, which, when inoculated with the Begomovirus into cotton, induces symptoms typical of CLCuD. This molecule requires the Begomovirus for replication and encapsidation [1]. DNA beta encodes a single protein, betaC1. The intracellular distribution of betaC1 is consistent with the hypothesis that it has a role in transporting the DNA A of Begomovirus from the nuclear site of replication to the plasmodesmatal exit sites of the infected cell. The DNA beta-encoded protein, betaC1, is the determinant of both pathogenicity and suppression of gene silencing [3]. 25.00 25.00 46.80 43.60 19.70 19.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.53 0.71 -4.38 36 475 2009-01-15 18:05:59 2007-05-08 16:19:52 5 1 198 0 0 438 0 116.10 44 96.96 CHANGED TIpYsNpKGlcFlIDVRLpppppIhVpIplhST+SPsLsKpcahIPYsHsGIIsP....FDFNulEEuI+shLclMY+-Sslp-F+pEDMl-sIDIlMMccAsVl-Icls-cYcVpspssV .............TIpasNp+GlcFh.lDV+lp.-sppIhVclclhST+..SPuLsKpcFhIPYsapsIhsP....FDFNul..E-uIpshlchhYpcupIpEh+pEDhVchlDIlMhcpssllsh-Vh-.YslspphsV................................. 0 0 0 0 +9426 PF09594 DUF2029 Protein of unknown function (DUF2029) Coggill P anon Pfam-B_1780 (release 21.0) Family This is a putative transmembrane protein from bacteria. It is likely to be conserved between Mycobacterium species [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -12.03 0.70 -4.87 135 2105 2012-10-03 03:08:05 2007-05-08 16:20:35 5 21 708 0 676 2166 215 239.00 18 52.91 CHANGED ah.YPPhssllhsPh.u...........hLs.....hssuhhlahhhs...lsshhhs.lthhhphh........................shhhhshh.hhlshtsl.hsshshGQhshllhshlhhshhhhh...tt.......t.h....................h.uGlhlGLusslKlh......Phlhsl..hhLh....p+c........a+uhhsu.sssssshssl..uhhhhs............sshthahp.hlhts.ptl...........htt.ssto...hsuhlut...hsh..............hhhhh...............shhs..sshshhhhhthh.........ps.sphtths...ssuh.h.......lLsSP ...................................................................................................YsPhssl.h...h.h.sh.s...............................h.ls................hsssh..h..l.h...hhhs............hhsl...h...hs...lhhh.h.ph.hsht........................................hhhhs.h.h.h.h.hh.s..h...pPl...htsh.sh....u..p..h..s.h...lhhslshhshhhht.........pp.......................p.h..............................h.uG.l.h.l.G.Lu....su..hKlh................Pslhhl......hhlh.......................s+c..........................................h..+s...hhtu....sssss.shssl......shhlhs........................................................p.s....h.p..h..ahp....hhhtt..tth........................hph.tspu........h.t.s..hhs.p.....hsh..................................................shhhh.hh.......................................shhs........hshsshhhhthh...............pp...tphht.hs.........hsuh.h.....hlh................................................................................................................................................................................ 0 204 477 607 +9427 PF09595 Metaviral_G Metaviral_G glycoprotein Coggill P anon Pfam-B_922 (release 21.0) Domain This is a viral attachment glycoprotein from region G of metaviruses. It is high in serine and threonine suggesting it is highly glycosylated [1]. 25.00 25.00 56.90 56.80 20.10 19.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.38 0.71 -4.65 6 400 2009-01-15 18:05:59 2007-05-08 16:31:51 5 1 2 0 0 422 0 170.10 51 88.21 CHANGED LIGlTsLShALNIaLIIsYshpKNhocoEHpss.PPsEsSKcTshsssssPsTsPNsQpsTQ.oTEsST..sAostup.cTtsTsTPDsTsp.posD+HTT.PpuopspTopsspcKpsoRsso......+ppos.pToTtAspsssTsppoSsG+csTTTSspPcosuTTQspEpTopsss.oSsSpt ..LIGlTsLShALNIaLIIsYsh..+shocsEHpss.PPsEssKETsohshssssTpPNsQpsTQ.oTEssT..sAsStupscTEsTsTPDsTsp.posDcpTT.PppopppTspTspcKpsspsso.......+ppos.psoTtAsppssThppoSsuccsoTTSspscsusTTQsp-pTspsss.uS.st......................... 0 0 0 0 +9428 PF09596 MamL-1 MamL-1 domain Coggill P anon pdb_2f8x Domain The MamL-1 domain is a polypeptide of up to 70 residues, numbers 15-67 of which adopt an elongated kinked helix that wraps around ANK and CSL forming one of the complexes in the build-up of the Notch transcriptional complex for recruiting general transcription factors. 21.00 21.00 22.40 35.70 19.40 17.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.98 0.72 -4.17 6 82 2009-01-15 18:05:59 2007-05-08 16:39:24 5 2 56 4 56 113 0 60.40 48 8.06 CHANGED PphHSsVVERLRpRIEsCRRHHsoCEuRYppupsEphEhERccTlsLhpRslcu+uKRuAp ....P.hHSsVVERLRRRIEsCRRHHssCEsRYEpspsEphEhERpcThsLpQRsLcuKAKRuu... 0 14 19 36 +9429 PF09597 IGR IGR protein motif Wood V, Coggill P anon Manual Domain This domain is found in fungal proteins and contains a conserved IGR motif. Its function is unknown. 22.40 22.40 22.50 22.90 22.30 22.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.56 0.72 -4.06 18 185 2009-01-15 18:05:59 2007-05-08 16:50:16 5 7 159 0 138 171 1 56.80 39 30.06 CHANGED lpsFLptIGRsssc+s-.pFEs.W....-cLFp..hsoppLK-hGl.ss+pRRYILphtc+aRpG ....................psFLshIGRsh..pp+ss.Kh..ps..W....cpLas..hsStpLK-h.GI..Ps+pR+Yl...Lcatc+aRpG......... 0 40 81 117 +9430 PF09598 Stm1_N Stm1 Mistry J, Wood V anon Pfam-B_39435 (release 21.0) Family This region is found at the N terminal of the Stm1 protein. Stm1 is a G4 quadraplex and purine motif triplex nucleic acid-binding protein. It has been implicated in many biological processes including apoptosis and telomere biosynthesis. Stm1 is known to interact with CDC13 [1], and is known to associate with ribosomes and nuclear telomere cap complexes [2]. 21.80 21.80 22.50 22.50 19.50 19.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.86 0.72 -3.28 24 195 2009-01-15 18:05:59 2007-05-09 10:35:27 5 3 137 2 126 189 0 68.20 35 20.52 CHANGED NhaDLLGNDs............E.DsstsstsPs.+tlsKpsspstKt-............sP...PsussPs.........pup+stspsoG.NEuAhR ........N.FDLLGsDs............E.Dssphs....ssss.....+tl....sK....s.s......spstKts.............ss..p.P..sps.sPsttt...............+ss+s.ts.pssG.scsuh+............................................... 0 22 67 105 +9431 PF09599 IpaC_SipC Salmonella-Shigella invasin protein C (IpaC_SipC) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of proteins associated with bacterial type III secretion systems, which are injection machines for virulence factors into host cell cytoplasm. Characterized members of this protein family are known to be secreted and are described as invasins, including IpaC from Shigella flexneri and SipC from Salmonella typhimurium. Members may be referred to as invasins, pathogenicity island effectors, and cell invasion proteins. 25.00 25.00 26.90 25.80 21.40 20.10 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.78 0.70 -5.42 5 167 2009-01-15 18:05:59 2007-05-09 11:34:12 5 2 152 0 6 107 0 338.40 53 86.65 CHANGED +ssspuLhps-uAhushs.....................AtspslLsohLsDctpsssLspslphhsuspls+lssh....VQ+cLcpcuAclssGpshDISu................hSocAsuLlhu.lssLMusLspADs+LuoKLSLlSFDATKoAAuShlREGhAsLSSSIsGuAsQluITGVGAKpphsGLssc+GALK+NLsstscLssEu+slpLsLNpQNsssLuA...DtlppltlK+ssu-usKplsc.p...................lssuNcplSsEHcusLupcstulpc+I-hcppsa-psplKAQppQppGDtlMcsSssAGNIuuuSGpYAustEcuEQpISQuuu+sApoASs-o+EuS++ocplIQElL+sl-SIsQS+o ...................................................................................+.suchuLu.sLuA.APsV....................LspsssslToaLp.sshQsochsQ-lNtLAsslTsKss-s....VQTpL+EppAEV......GchhDISu................MSSsAVALLuA.AssLMhoLNQA....DoKLSGKLSLVSFDAAKosAuSMhREGhsuLSGSISQSALQLGITGVGAKLcaKGLpsERGALK+NuAKlcKLoTEu+uIKssLNGQNol+LGA..sDuLKolshK+T...GsDuTKsLs-soh................htIKcSNKplSsE...HQAlLSKRlE.SlE.ScI.cL.EQsTMDhT+l-ARKMQMTGDLIM+sSsolGsIAGASGQYAAsQERSEQQISQsNNRVASTASDEARESSRKSTSLIQEMLKoMESINQSKA................ 0 2 2 4 +9432 PF09600 Cyd_oper_YbgE Cyd operon protein YbgE (Cyd_oper_YbgE) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a small protein of unknown function, about 100 amino acids in length, essentially always found in an operon with CydAB, subunits of the cytochrome d terminal oxidase. It appears to be an integral membrane protein. It is found so far only in the Proteobacteria. 22.00 22.00 27.30 26.90 20.80 20.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.80 0.72 -3.82 21 679 2009-01-15 18:05:59 2007-05-09 11:41:23 5 1 674 0 64 249 4 80.50 64 85.54 CHANGED ..LR........uLSllLAlhlsuhlhWsPstaAsphuuhsshhu.hllWAlCuuhlaGVGFcP+phlWphlF.uPhhuhsILhhslh....aah .........................PLRALSFVMAllLAGCMFWDPSRFAA+TSpLEIWHGLLLMWAVCAGlIHGVGFRPppVlWQGI.F.CPLLADIVLIlGLIFFF.h.................... 0 3 22 44 +9433 PF09601 DUF2459 Chp_urease_rgn; Protein of unknown function (DUF2459) TIGRFAMs, Coggill P anon TIGRFAMs Family This conserved hypothetical protein of unknown function is found in several Proteobacteria. Its function is unknown and its genome context is not well-conserved. It is found amid urease genes in at least one species. 21.80 21.80 23.40 24.10 20.60 19.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.06 0.71 -4.57 33 265 2009-01-15 18:05:59 2007-05-09 11:44:24 5 1 262 0 60 200 49 167.20 41 75.35 CHANGED IhllssslHTDlllPhss....hhthhthhsssslshstsss.p.alshGWGs+sFYhsossas-lsstsslcAl.ssss..oVl+Vsshsshs......ssssltplslupspappLhshlpsoFttstsuts..l.ssuasss-tFatApGcashhpTCNsWoussL+sAGlchuhWs.Phshul ............................IYllSpGhHT.sIlhPscss.............shshs..plcpc.as.pt...p.ahplGWGD+uFYts....p-losuhsLpAhFhsou..uVhHlssaushPc..........suu-l+sL...hLpssQhssLhcalucSFsRDtpGs.lIsl..c...........tG.hhucspFYsAsGRYulLsTCNpWTAcuLcuAGLshss.h.Lstu.s......... 0 19 41 49 +9434 PF09602 PhaP_Bmeg Polyhydroxyalkanoic acid inclusion protein (PhaP_Bmeg) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a protein found in polyhydroxyalkanoic acid (PHA) gene regions and incorporated into PHA inclusions in Bacillus cereus and Bacillus megaterium. The role of the protein may include amino acid storage. 22.20 22.20 22.60 22.50 22.00 20.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.21 0.71 -4.42 2 94 2009-01-15 18:05:59 2007-05-09 13:53:14 5 3 92 0 6 35 1 162.60 80 76.60 CHANGED MpTh.Y-.llDAhWcpWopuLp.hususKQlEQhTLcsLcQQQ-h...lTpuV-pLptp.pQapAphps..pchVcpL...sGNulsDphpEWpp+h+Eh.s+hQpLhhs.oKoShSllpQsptQacpsspQhlEpQphpRpEhQ+..-ualEphK.hQhphApphEp ...........................METKPYE.LVDAFWKNWSQSLSLFSSAGKQLEQLTLET.LKQQQDALHKLTSGVDELEKELQQF.......TAQFNNQYTDYVKQL....TGNSLNDQIN..EWQ-KWKELSAHM..pQLTVSPTKTSLSILTQTSGQFEETTKQFIEQQQLQREEAQKQLEGFLEEFKoKQLELAKKFEE...................... 1 0 2 3 +9435 PF09603 Fib_succ_major Fibrobacter succinogenes major domain (Fib_succ_major) TIGRFAMs, Coggill P anon TIGRFAMs Domain This domain of about 175 to 200 amino acids is found, in from one to five copies, in over 50 proteins in Fibrobacter succinogenes S85, an obligate anaerobe of the rumen. Many members of this family have an apparent lipoprotein signal sequence. Conserved cysteine residues, suggestive of disulfide bond formation, are also consistent with an extracytoplasmic location for this domain. This domain can also be found in small numbers of proteins in Chlorobium tepidum and Bacteroides thetaiotaomicron. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.59 0.71 -3.79 40 375 2009-01-15 18:05:59 2007-05-09 14:02:06 5 24 93 0 169 369 212 179.60 19 39.68 CHANGED VpIGsQhWMAcNLcsscYRNG-.sl.psts.ttW.....shssuAaCaYsscspssph.........YG+LYNWaAV.........sDs..................R.GLAPpGWHlPoD.sEW.ptLtshl.............G...GcstuGutLKu..h.....stWttss.sss........sspsGFsA........lP.uG.hRsssGsF.hhtGphuhaWooocss....stpAasRplshs....s.sslt+spss..+p...tGaSVRClK ..................................................................................................hstphWhspNLp...................................................ssshs..........pt..psttt.............................hG.hhYsatus....................ss................................................................p.slsPpGW.+lPop.sEa.ppL.hphh.................................................s............p..tsupt.lps...............staht.t.....t..........................tsthGFss...........................hs..sG...ht........t...........s..........s.....s............t.......h.......tt..sphshaWo.s..s.pts..........s.t.sh..h.h.h.ht......t..t...h..ps.ths........pt......uhulRCl+..................................................................................... 1 120 132 160 +9436 PF09604 Potass_KdpF F subunit of K+-transporting ATPase (Potass_KdpF) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a very small integral membrane peptide KdpF, a subunit of the K(+)-translocating Kdp complex. It is found upstream of the KdpA subunit (IPR004623). Because of its very small size and highly hydrophobic character, it is sometimes missed in genome annotation. 19.20 19.20 19.40 19.50 19.10 19.10 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.52 0.72 -6.83 0.72 -4.02 40 728 2009-01-15 18:05:59 2007-05-09 14:09:44 5 1 704 0 103 314 5 25.00 44 58.04 CHANGED hhluslluluLhl...YLhhALlcPE+F .........llulllshhLls...YLlYALlpsEcF.. 0 34 62 84 +9437 PF09605 Trep_Strep Hypothetical bacterial integral membrane protein (Trep_Strep) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of strongly hydrophobic proteins about 190 amino acids in length with a strongly basic motif near the C-terminus. It is found in rather few species, but in paralogous families of 12 members in the oral pathogenic spirochaete Treponema denticola and 2 in Streptococcus pneumoniae R6. 26.50 26.50 27.50 27.00 26.40 26.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.34 0.71 -4.62 23 1171 2009-09-11 16:54:32 2007-05-09 14:23:32 5 2 634 0 118 730 19 180.40 28 94.33 CHANGED ps+DLlsIGlFuAlYFlh.hhlsshluhhsPhhh..hhtPslsulluGslaMLhls.+VtKhGslolhullhullhhhsGph.hhhhlssllsullA-llt.phu...pY+shptshluYslFuhhhs..G.shlPhahhh-sYh.tshhppGhupsYl-shhthhss.hhhhlhllssllsuhlGuhlGp+lLKKHFcK ................................hKDllshGlFsllY.Flh.hhls.s.h.l..u..h.l.....s.hhh..........hhhPshsulluGslahlhhs.KVt+hGsl.hlhGllhulh.h.h.hs.Gas..hsshlsull.s.ullA.-llt..p.hG...pYc.s...h.p.t.sh.luaslFu..h.shs..G.shl..hal.s.tctYh.tph.h.tp.uh.up-Ylstlh.th.h.ss...hhhhlhlh.sshlsullGuhlGptllKKaFpK.................................... 0 64 86 105 +9438 PF09606 Med15 ARC105; ARC105_Med_act; ARC105 or Med15 subunit of Mediator complex non-fungal Coggill P anon pdb_2gut Domain The approx. 70 residue Med15 domain of the ARC-Mediator co-activator is a three-helix bundle with marked similarity to the KIX domain. The sterol regulatory element binding protein (SREBP) family of transcription activators use the ARC105 subunit to activate target genes in the regulation of cholesterol and fatty acid homeostasis. In addition, Med15 is a critical transducer of gene activation signals that control early metazoan development [1]. 26.20 26.20 26.40 26.40 25.90 25.60 hmmbuild -o /dev/null HMM SEED 799 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.78 0.70 -13.71 0.70 -6.34 7 273 2009-09-11 06:41:54 2007-05-09 14:50:32 5 17 89 1 140 249 1 349.30 26 82.33 CHANGED QpVVspIc-AhppsGhsp.....oKsut-MEsHVFtKA+o+DEYLuhVARLIlHhR-hppKppQsp.s.....................DPhNALQsLsutss.sst....shshGP.tP.GtphGG.Gshoshhpsh................psQ...u..shu.PpthstVuths.usQhss.h.........tt.pt...ttttuthtt.t.p.tQt...ttt.P.tMhtshhs.....t..tttt.....ttt.t.ts.pQ.p.h...t.Q.p......Q......p.phpQ.ps.tputsQh.p..tsput.psQphttu.sQ..shpQt.sttp.p.s.hthh.tQhpQh.........stG.ttshupst..Psp.hss.s..Qt...hs.............hhs.tt.t.h............Q.psptQtQpttsp..stt...........................................tQ.s...tstpuuph.u.t....................................................t.....ptsshsshsssthsptp..MMSussPs........pspssQtM..sPQPp.PpPstPsups..sp.ssus.hPSP.uhhPSPSPQ.uQsssstRs.tp......SPG.sL.......NTPGp..sus.SPhssp..EEp.Yh-Kh+pLoKYIEP..L+RMIsKh-ps.tch.cchuKMKpLL-ILosPopRsPLcTL.KCEhALp.........sphus.p.sP..........hspPLl-AV.uNlpSPlhNHoL.R...TFtPshpAlaGssIts.sssu+p.Rlp.D-........pQpIPplLQGElARL-sKFhVsLDPstpuss.tsl+LICpLDDKpLPsVPPlplolPt-YP.pSPphp.tpppYsAsPFL.p.Vp+shhu+lspLPthaSLotLLsTWt.uVRQAC......................hLuh .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hst..................................................................................................................................................................................................................................... 0 51 60 103 +9439 PF09607 BrkDBD Brinker DNA-binding domain Coggill P anon pdb_2glo Domain This DNA-binding domain is the first approx. 100 residues of the N-terminal end of Brinker. The structure of this domain in complex with DNA consists of four alpha-helices that contain a helix-turn-helix DNA recognition motif specific for GC-rich DNA. The Brinker nuclear repressor is a major element of the Drosophila Decapentaplegic morphogen signalling pathway [1]. 23.40 23.40 23.40 23.50 23.30 23.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.81 0.72 -4.40 3 116 2012-10-04 14:01:12 2007-05-09 14:53:10 5 11 65 1 88 108 0 53.40 40 11.82 CHANGED GSRRuaouuFKL..QVlESacsDNsC+Gs.RAsA+KYNIpRKpVcKWLQsEsQLQsucAp ....................RRoasssFKL..pVl.-hA.c..psssCp.....sA+cFsl..pc..+pVccWpchcspLpst................... 0 25 41 71 +9440 PF09608 Alph_Pro_TM Putative transmembrane protein (Alph_Pro_TM) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of predicted transmembrane proteins of about 270 amino acids. Members are found, so far, only among the Alphaproteobacteria and only once in each genome. 20.40 20.40 20.50 21.30 19.70 19.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.40 0.70 -4.84 49 249 2009-01-15 18:05:59 2007-05-09 15:58:14 5 2 244 0 90 221 746 229.60 36 87.83 CHANGED EplVhuLSpccVsIossFsGu-lllFGAlcptssh.tt..shDllVslpGPspslslR+K-RhhGIWlNs-uhphcssPoFYAVAoo+Plcclssts.pttchplGls.........hs...................ttssss.sssss..pFpcALlRl+pppGLY........ppptsuVplhppsLFRsslpLPAsls.GsYpsclaLhRsGplluptpsslpVpKsGhEpalashA+ppshlYGLhulslAlhsGWhAuslFRth ...................Epl.lulSscpluITusFsGs-lhlFGAlcpts.sh........tt..thDllVslp.G.Ps.psl.s.VR+.K-RhhG.l.WlNssShpFtslPshYulAooRslpcls.s..tp.shp...phplGlpph..........Ls......................ss.ss.ss...sssh.pFtsuLh+l+pppGLY.........ppc.uuVph....hsp.....oLFR....AslsLPAslPsGpaps+shLhRsG.thlsptssslclhKsGlEphlashA+ppsllYGLhulhlAlhsGWhuuhlFR+.................... 0 29 59 70 +9441 PF09609 Cas_GSU0054 CRISPR-associated protein, GSU0054 family (Cas_GSU0054) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a rare CRISPR-associated protein. So far, members are found in Geobacter sulfurreducens and in two unpublished genomes: Gemmata obscuriglobus and Actinomyces naeslundii. CRISPR-associated proteins typically are found near CRISPR repeats and other CRISPR-associated proteins, have low levels of sequence identify, have sequence relationships that suggest lateral transfer, and show some sequence similarity to DNA-active proteins such as helicases and repair proteins. 20.80 20.80 21.90 21.80 19.00 18.60 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.62 0.70 -5.68 4 90 2009-01-15 18:05:59 2007-05-09 16:03:54 5 2 55 0 36 91 7 277.10 14 76.09 CHANGED hhulohphhsG+acuss..........sD.usP...EWPPuPhRlFpAlVAoht.chs......E-h-sLpaLpu...Pslhthssshsu........psshphVstsasRG.pchasGsus.hpc......pslslPcsssVthlWc.s-ss-tplA.ls+lstplsahGRspShshspVV.sGul..........hs.cWlsct-s.....GslslRsPhsG.....pLp-Lps+aEshhshh.hGsh..s.PhssYs.hsphlu.-.s.ps..s..h.ch.shG.+tssssphtLDltshs..T-uLRRAslup........hcssplsuhlpGH....GD-tp......HsAYLslssluc+tAcG+l.slGshhPs.loth-......GhhGs-.s..huLh+s+pLtclcLc.ss....VuhhuLps....pths..tuS+oWsoVTPhhLsRaP............................s++lp..................ssullttSlspsGhP.Pttlpslss.tlsGshR....hcRYsshcshpphP....hHs+Isa......Pp.lcGPlhlGusRaaGhGLFsP .........................................................h...............................t.....E..PtP.Rlhtuhhush....th.....................t.........hl....t......................h.....................................................................................................................................................................h.h......................h...hh..h.........s.h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 28 32 32 +9442 PF09610 Myco_arth_vir_N Mycoplasma virulence signal region (Myco_arth_vir_N) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents the N-terminal region of a family of large, virulence-associated proteins in Mycoplasma arthritidis and smaller proteins in Mycoplasma capricolum. It includes a probable signal sequence or signal anchor, which, in most instances, has four consecutive Lys residues before the hydrophobic stretch. 20.40 20.40 20.50 24.10 19.40 19.30 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.55 0.72 -4.00 6 43 2009-01-15 18:05:59 2007-05-09 16:05:19 5 1 12 0 15 38 0 32.70 60 2.93 CHANGED MYFLKKKKNKILshALVASLATSLSFGSVIYYS .MaFlKKKKNKILshALlASLssSlSFGSVIYYS... 0 4 15 15 +9443 PF09611 Cas_Csy1 CRISPR-associated protein (Cas_Csy1) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2465 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy1, for CRISPR/Cas Subtype Ypest protein 1. 20.20 20.20 20.50 20.30 19.60 20.10 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.31 0.70 -5.52 18 182 2009-01-15 18:05:59 2007-05-10 09:29:29 5 1 173 0 41 149 8 345.90 35 83.41 CHANGED spYphpsWLsD.AA+RAGQIpLVTHshKasHsDA+GSslashs......tptsspaLuo..s.sphAhDslGNAAALDVhKhLplph.-.Gc.oLLspLppsDssALssLucsscQ....hcpWhsuFtslhssc..phoSHpLAKQlYFPl...........tsspYHLLuPLFuSoLAatlap+lppsRFu-psKtARpA++cpcaHspshhpYPsLAsQphGGTKPQNIStLNSpRGGcsaLLsShPPpWc.spp+sPhphcSlFp.psthpspspstlppLppaLtssp...........sNhclRppRs.phlsplIDpLlphsuplQph...sGWSspspp..pLtcspQLWLDPhRs..........sDpsFppERcpsD.W.pplupcFupWLNppLpp....ppl.lG-sEt+cWpp ..............................................cYphcsWLs.c.AA.p.....R.AtQlphsTHssKhtHsD...u...Ku..sslh.....h.s.......phspshlso...slsph.......s.hD.s.h.GN.AAA.....LD.lsKhLphph.-...Gp...oLlstlpp.s-.phhphhu....p...spp............hp.pahpuFtphhpsp...p.soHclsKQlYFPl...........tsspYHLLsPLasSSLspthap+lsp.sRFS-psKpuRps++pschp.pshhph.slAs.phGG.........TKPQNIShL.NS.pGG+saLLsShPPpap..st..+.P.p.cohF..p..sththtspshltphpphltsst................NshclRstRt.phlspllDhlhthsttlQph...sGWSpp.....pL.ttp..plWLDPhRs...............s-ttFpp.c.cpppD.W.pplspcFutWL....NttLpc.......pphshGssEhpcWp.t........................................... 1 10 25 33 +9444 PF09612 HtrL_YibB Bacterial protein of unknown function (HtrL_YibB) TIGRFAMs, Coggill P anon TIGRFAMs Family The protein from this rare, uncharacterized protein family is designated HtrL or YibB in E. coli, where its gene is found in a region of LPS core biosynthesis genes [1]. Homologues are found in Shigella flexneri, Campylobacter jejuni, and Caenorhabditis elegans only. The htrL gene may represent an insertion to the LPS core biosynthesis region, rather than an LPS biosynthetic protein. 23.60 23.60 23.80 24.80 22.20 23.50 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.82 0.70 -5.14 3 269 2009-01-15 18:05:59 2007-05-10 09:39:14 5 1 240 0 15 100 74 249.20 59 89.51 CHANGED M-spTTIITAFFDIGR+Da.......KclsRSNDKYaSYFEFLAuLKNcMIIYTsEslKc+IcuIRscasLE-KTThIIlc-Ip-hccpIYKRII-IppDcoF+Na...lp.RphcNsEshSPcYsYLMhLKuaFVsDAIsRGLT-TN.lAWIDFGFNHGGNVFsshcpFDF.a...sshDENKINLFoIKK....sDQQslF-IhhuhEsaIMGGlIlusS+pWccFY+hsLEShKIhsShGIlDDDQlIhLaC..sYRRNs..sYNhIt+upWFDuLpHFpspslGuKLpI ................McsShTIlTAaFDIGRGDWsupcGF.ccLsRSsDsYFSYFE+LAuLENcMIIFTSPDLKsRVEAIRN.....GKPTT..VIVIDIK...Kp.....h.+a.I+....s.+.....Ic.K..IQ.KDESFTN+...L.-P+plK.NPEYWSPEYVLVsNLKsYFVsK.A.IphG.L...VK....Ts..LVAWIDFGYCR+s.N.V.TpGlKhWDFPF.......DEsKMHLF.TI....KKGLsloSpQplFDhM.IGNHVYIIGGAIVGSQHKWKEFYKLVLESQKITLNNNIVDDDQGIFVMC..YYKRsDLFN.LNY.LG.R.GKWFDLFRCFRSNTLGAKMQA.......................... 0 8 9 15 +9445 PF09613 HrpB1_HrpK Bacterial type III secretion protein (HrpB1_HrpK) TIGRFAMs, Coggill P anon TIGRFAMs Family This family of proteins is encoded by genes found within type III secretion operons in a limited range of species including Xanthomonas, Ralstonia and Burkholderia. 26.90 26.90 27.10 27.80 26.00 26.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.86 0.71 -4.74 8 155 2009-01-15 18:05:59 2007-05-10 09:40:55 5 1 96 0 27 103 0 158.00 31 94.89 CHANGED shhpCssplluGLl-llssALpts......chtDhptlLpALRlLRPchsth-hhDGWLplsRtpWs-AtRlLR-l...-ssssshshu+ALhAhCLpuhpDssW+ttApclLtpssss-ulsLV+sLhuppshhpA.t....................ssssuu-Asussspu............s..tp.pahRl ..............h.pCstthlsGLl-shssALpts......c.tDhptlLsALRVLcPphssh-hh-uhLplt+tpas-AhRlL+c.l...ssss...sptshu+ALhAhCL.....pshsDssW+th........AsplLtsss..sssuhsLV+uL.stpshh.h......................................s.htsp..hts..s............................................................. 0 5 11 16 +9446 PF09614 Cas_Csy2 CRISPR-associated protein (Cas_Csy2) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2464 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy2, for CRISPR/Cas Subtype Ypest protein 2. 25.00 25.00 32.40 29.70 19.10 21.30 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.62 0.70 -5.21 25 219 2009-01-15 18:05:59 2007-05-10 09:58:56 5 2 202 0 47 169 10 266.80 33 80.18 CHANGED LlLP+l+VpNANAlSSPLTaGFPulTAFhGas.HALpRKLsspt...slplpusuVlsHcaplpshps...hs......psFsLTRNPL......sK...sGso........uuhlEEGRhHLslSLllpssuppt............sppptpthhppltpll.tshRlAGGoll.......phtpschhphs........tpchcphh++LlPGFsLlsRp-LLssthppLpps....................stsphlDAaL-hus.lpapshsspssp............Wp...+.....p.sGWlVPlslGYpul.SPh..atsGplpssRDspsPapFsEolaulGEWlsPHRl.p...........slsplLWpacsc..ppsl.....YhCp ...........lll.cl+lpsANAlSu.lThGFPuhTsahGhs.HALpR+Ltp...pt.....sl..plpGsulhsHphplpsapss..hs..........hsFthTRN...PL......pK........pups............suh.EEu+hHLslSLllEhpsp.t.............ptttpthhpplpphh..ptRlAGGolh............shtpsplhph..............phcclhh.pLhPGasLh-RpphLscthpphppt............................sp.p.LDAhL-hss.lchps..stsss..................Wth..h.......tGaLVPlssGYpuI.SPl..htsGpsppsRss....phP...apFsEslaulGcWhh.a+l.p...................................slpphhWpYchp...pp.h.....Yhh............................................ 0 9 25 35 +9447 PF09615 Cas_Csy3 CRISPR-associated protein (Cas_Csy3) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2463 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy3, for CRISPR/Cas Subtype Ypest protein 3. 25.00 25.00 29.50 29.50 22.50 24.90 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.91 0.70 -5.34 23 219 2009-09-11 10:34:39 2007-05-10 10:02:20 5 1 207 0 48 165 10 318.40 48 97.09 CHANGED csASVLAFERKLssSDAlhauusW-s+s...pasPlplpEKuVRGT...ISNRLK.stspDPAKL-A-l..ppsNLQpVDVAsLPs-sDTL........KlpFTLRVLushupPSsCNsstYpppLtpslssYhpcpGFsELAcRYApNLANGRFLWRNRlGAEslplpVs....spstpsas.FDAhpasLRsF...spsspplspLuphItpGLuu.ssaslLcVsAalclGtGQ.EVaPSQELlLDc...s..cupKSKhLYpV....s..........slAAhHSQKIGNALRTIDTWYPcss-....hGPIAVEPYGuVTspGpAYRQPK.pKtDFYTLL...DsWlh+schP......slE.pQHYVMAlLIRGGVF.GE ..........sASVLAFERKLssSDAhhaussW.ps.pp...phpPlplpEKuVRGT...................ISNRLKs..shss....DPsKL.DAEl.....pKuNLQpVDsAsLs..-sDTL.........cspFTL+VLushupPusC........ND..pYptpLtsslpsYlpppGFpELApRYApNlANGRFLWRNRlGAEpIpVpVs...........tspppsap..F.supphuL+pF....spsspplppLAphIppGLuu.psas.......hLpVpAhl+lGtGQ..EVaPSQELVLDp.......pupKSKlLYpl.....s......................shAuhHSQKIGNAlRTIDTWYP-ssp....hsPIAVEPYGuVTshGpAaRpPK.pKhDFYTLh........DsWlh+sphP......sl.E.pQHYVMAsLIRGGVFG............ 0 10 25 36 +9449 PF09617 Cas_GSU0053 CRISPR-associated protein GSU0053 (Cas_GSU0053) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is found in CRISPR-associated (cas) proteins in the genomes of Geobacter sulfurreducens PCA and Desulfotalea psychrophila LSv54 (both Desulfobacterales from the Deltaproteobacteria), Gemmata obscuriglobus (a Planctomycete), and Actinomyces naeslundii MG1 (Actinobacteria). 25.00 25.00 29.60 25.20 19.70 18.30 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.29 0.71 -4.47 4 71 2009-01-15 18:05:59 2007-05-10 10:12:02 5 2 66 0 28 72 5 173.40 26 46.68 CHANGED uulllcAsLpPVpGts.+lhPsTF.....uussYph..thsDG...ssslllDSVsS.ANRlE..Lhshc.st..lV......P.Ipspls..stploslEhsHRhsDAhl..RsS.lstsp..spsltptLpssps+ss......csLhthtPpoLlaGsWDS...cpsptsKluR.luuhI.uYsVcsl....ppuu ....................................pLp.PssG...tltPssa...ssh......ss.usYsh......ph.....s.cG...............hpslLlDSspStANRhEthlhp.t..h..csup.....hl..........................thPhlpV..ph.p................sst..p.h..o.s.....lptsHRhsDual...Rsup.h...st.t...........h......pcpsh.hp.shp.s.s.s.psh.................psl.hchsPsuLlaGs.WcS....tpt..s...sph+lsRslsupI.u..........s............................ 0 17 24 25 +9450 PF09618 Cas_Csy4 CRISPR-associated protein (Cas_Csy4) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This protein family, typified by YPO2462 of Yersinia pestis, is a CRISPR-associated (Cas) family strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy4, for CRISPR/Cas Subtype Ypest protein 4. 25.00 25.00 25.10 25.10 24.40 24.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.26 0.71 -4.47 32 230 2009-01-15 18:05:59 2007-05-10 10:15:38 5 2 218 7 52 183 6 181.10 33 97.59 CHANGED Yl-IplLPDsEhstshLhstlaspLHtsLsp....hspscIGlSFPpast............sLGspLRlau.sppsLppLptpsWlpshpD..YsplusltsV.Psssp.apshpRhpspu......shpRhh+Rhh+Rt....thstEpsptthspphp.pph..shPalplcStS.......ssQ.pFhLFIc..tphtspsssGtFsuYGLSss.......uTVPaF ......................Yl-IpllP...c.s...EhstthLhutlaspLHpsLst.............ptpscI..G.VSFPphst..............sLGs.pLRlau.sppsLppLptp.sWh..p.s.Lp.D..Ysplo.pltsV..Pp.p.sp..apshpRsQsKu......stpRhh+Rthp+t..................tlsp-psttch.pptp.ppt...............shPalpLcStS........opQ.pFhLaIc..tphts..p.s..spGpFsuYGLSps.......uTVPaF........... 0 12 28 39 +9451 PF09619 YscW Type III secretion system lipoprotein chaperone (YscW) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is encoded within type III secretion operons. The protein has been characterised as a chaperone for the outer membrane pore component YscC. YscW is a lipoprotein which is itself localized to the outer membrane and, it is believed, facilitates the oligomerisation and localisation of YscC. 21.50 21.50 21.50 21.60 21.40 21.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.23 0.72 -4.33 11 875 2009-01-15 18:05:59 2007-05-10 10:43:35 5 7 808 3 107 364 9 110.00 50 60.76 CHANGED s.tst.u.spVsGsVplpp..uLPhsAslpVTl....Lusssu+lLs.tssRhEssthPhpFsL.hNPspl.scucIhLpAtlphss+hsl.sss.QpVhs..ssschclpLlP .....................shppssVSGTVhlRp+.lALPP.DA..VLTVTLSD..sSLA.D.AP.S..+..V..LA.Q+...u.l.R.T.E.GK...Qu.P.FsFsLPFNPu-lp...PNARll.LSAAIo.VssKLlFITD.T.lps.VIs..pG.G.sc.uDLpLVP............................. 0 15 35 72 +9452 PF09620 Cas_csx3 CRISPR-associated protein (Cas_csx3) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is encoded in CRISPR-associated (cas) gene clusters, near CRISPR repeats, in the genomes of several different thermophiles: Archaeoglobus fulgidus (archaeal), Aquifex aeolicus (Aquificae), Dictyoglomus thermophilum (Dictyoglomi), and a thermophilic Synechococcus (Cyanobacteria). It is not yet assigned to a specific CRISPR/cas subtype (hence the x designation csx3). 20.90 20.90 21.90 21.20 20.20 20.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.55 0.72 -4.18 7 37 2009-01-15 18:05:59 2007-05-10 11:28:15 5 3 34 0 14 37 1 78.50 31 36.24 CHANGED chcLpcs...ltP-DLtplplPshlcsptG...VVlSGRuPIWLYuaLlHchH...TsalAsaDPRL..GAVVVuSHo.phR.GpVI.h ....................................ts...lp.ptltthp..lPt.ls.ptG....lV..loG+uPIWLYshLlHth.H...ssalAsa.DP+l..Gu.VVltoH.s.th.psGpll...... 0 8 12 13 +9453 PF09621 LcrR Type III secretion system regulator (LcrR) TIGRFAMs, Coggill P anon TIGRFAMs Family This family of proteins are encoded within type III secretion operons and have been characterised in Yersinia as a regulator of the Low-Calcium Response (LCR). 21.20 21.20 24.20 23.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.69 0.71 -4.90 7 286 2009-01-15 18:05:59 2007-05-10 11:50:46 5 1 274 0 15 74 0 113.30 36 83.16 CHANGED hp-PLlPWhht+GltVpPahhpposI.LGath.hcGhcLAWRV-..ptRlWIVhl+RsptptGLuNPFAALYLLApAshslLGssahLYGNVsVLtuSsLsupRLA+FYpRWsGAS.E.psGWF.Ltst+Vhoh+sh+KRQp .........p.l.paLcp+GhhspstaLstoAIhlG.ph.hssaplsYRl-..tpElIICpFcRhpsp....t..G.LtsPh.sL.h.hLhpthh.hhs......hl.u.......htss....t.pc..h..ah.ph.s....-..........Wa.h................sp............................................................................. 0 3 6 9 +9454 PF09622 DUF2391 CHP02587; Putative integral membrane protein (DUF2391) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is found in Nostoc sp. PCC 7120, Agrobacterium tumefaciens, Rhizobium meliloti, and Gloeobacter violaceus in a conserved two-gene neighbourhood. Proteins containing this entry appear to span the membrane seven times. 20.60 20.60 20.60 20.70 20.40 20.30 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.72 0.70 -5.24 9 115 2009-09-10 18:53:12 2007-05-10 12:00:33 5 1 112 0 52 100 14 184.30 24 90.50 CHANGED sslsRGhAGAhLFulPhLhTMEhW.lGhalcPhRLhLlLslshslLhlLs+ttGFR+op...shc-sltDul.AhulGhlsuuhlLhlhthlssshuhcEhlGKlslpuVPsoIGshLuRs.Lttcus-sppt..t..............pstsuhhu-LhhhhlGAlFluhNlAPT-ElhlluhthoPaahlhllLASLhI.ashVatspFpsp+phtpppGhapt.lp.TlsuYllulhluhhhLWhFpRhs.ssuhpphlptslVLGhPAoIGuAAuRLll ........................t.....shsGuhh.hulPh.ho.EsWtlG..h.....p..Lh...hl..h.lsh.hlhh....hsh.tsFppph.........ph......h..l..hchhhAhhlu......hl...ssshlLhhhshls.h..s.......lt+lhl.shPhohGAhls.................................................p....hhGuhhhshsh..st-h..lu..h.....hhhh.h.hhh.h.hh.....................................h....h.s.......h...................................................................................................................... 0 12 29 45 +9455 PF09623 Cas_NE0113 CRISPR-associated protein NE0113 (Cas_NE0113) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this minor CRISPR-associated (Cas) protein family are encoded in cas gene clusters in Vibrio vulnificus YJ016, Nitrosomonas europaea ATCC 19718, Mannheimia succiniciproducens MBEL55E, and Verrucomicrobium spinosum. 21.20 21.20 21.80 21.60 20.50 20.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.50 0.70 -4.89 17 107 2012-10-11 20:44:46 2007-05-10 12:53:01 5 5 94 0 44 103 11 188.90 25 58.63 CHANGED hKplLlushGhoPQVlTETL.uLhppup......hss-lhll..oTpps+sphthsLL.............pphhp-a.......thpastsslpl..l.sspst.lsDIpotp-stthhshIhphltpLpps.s.tplHlSlAGGRKsMuhhhuhAhpLauc.pDpL.HlhVss.hE......pFhh.stpsthlplpstps.....st..h.VpLsplPalphpp.lsstlhp..uths.spsltphppt.s. .........................................pplLlushGhoPQ....llTETlhsLhppst........hssElhll..oTps.stsph..thpLl...................tphhp-h.......thphppp...lpl.....hhs.ppst.lsDIcot....cDspthtchIhphltphptp.t..tlahsluGGRKsMuhhhshAhphht..tDtl.Hlh......st................................................h..hs..................................tt............................................................. 0 21 34 42 +9456 PF09624 DUF2393 CHP02588; Protein of unknown function (DUF2393) TIGRFAMs, Coggill P anon TIGRFAMs Family The function of this protein is unknown. It is always found as part of a two-gene operon with IPR013416, a protein that appears to span the membrane seven times. It has so far been found in the bacteria Nostoc sp. PCC 7120, Agrobacterium tumefaciens, Rhizobium meliloti, and Gloeobacter violaceus. 24.60 24.60 25.00 26.40 24.50 24.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.80 0.71 -4.68 40 338 2009-01-15 18:05:59 2007-05-10 12:55:27 5 1 213 0 42 168 8 149.40 34 87.04 CHANGED hhhhhhhhlslhhcpp.....hhhhhlhhshlhhhh.hshh.........hhhhl.cpts+psplslspscp.lp..hspshhls..hslpNpuphsh................ppCplpscl.hp......sssshhpphthp.ts...ahpp...................ph.ltp..Lphscspph....phh...hs...t.sh...tshslph.pupCh .............................................................h.LhshllhLplh.+ch.....Ahl.hhhhsIlhshL.lYhh...........hhpll.pppspsphoplsps+...lp....spulIls..hplpNhoKhsh................+cChlhhcIhspp....sssslhp-hph+.h.....appp....................ohEIh-..L.sNssp.h....Rhu...hs.....sh...NNhphhh.s.cCh............................. 0 11 34 41 +9457 PF09625 VP9 VP9 protein Coggill P anon pdb_2gj2 Domain VP9 is a protein containing a ferredoxin fold. Two dimers come together to form one asymmetric unit which possesses a DNA recognition fold and specific metal binding sites possibly for zinc. It is postulated that being a non-structural protein VP9 is involved in the transcriptional regulation of the White spot syndrome virus, WSSV, from which it comes. WSSV is the major viral pathogen in shrimp aquaculture [1]. VP9 is found N-terminal to the Pfam:PF07056 domain. 20.50 20.50 20.60 22.90 20.20 17.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.51 0.72 -4.17 6 19 2009-01-15 18:05:59 2007-05-10 13:38:38 5 2 2 7 0 15 0 77.60 41 19.98 CHANGED Lpl+suFhlsG-c.s-+YEcVhh.oFcuV-.....olRKSELcDssaIVpLK-scpl+lpsGlccLRpLTGDsoLpIp.hlosshP ..LphcuuFhhhG-c.sc+YEcVht.oF-uV-.....olR+SELc-ssaIVpLKcscphphpsGlccLRpLTGDsoLpIp.hlossh........................ 0 0 0 0 +9458 PF09626 DHC Dihaem cytochrome c Coggill P anon pdb_2fwt Family Dihaem cytochrome c (DHC) is a soluble c-type cytochrome that folds into two distinct domains, each binding a single haem group and connected by a small linker region. Despite little sequence similarity, the N-terminal domain (residues 12-75) is a class I type cytochrome c, that binds one of the haems, but the domain surrounding the other haem is structurally unique. DHC binds electrostatically to an oxygen-binding protein, sphaeroides haem protein (SHP), as a component of a conserved electron transfer pathway. DHC acts as the physiological electron donor for SHP during phototrophic growth [1]. In certain species DHC is found upstream of Pfam:PF01292. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.98 0.71 -3.44 35 196 2009-01-15 18:05:59 2007-05-10 14:12:42 5 5 163 2 75 205 20 107.70 32 55.86 CHANGED YhccCuuCHhAaPPtlLPupSWpplhssLspHFGssAs.LsssspttltsYLpspuuststs.............ssssPh.RITcs.aFpcpH..pcls.thhp.pspltohusCsuCHss.AcpGsF ....YtppCu.uCHhAaPPslLPupoWptlh..s..s..LppHaGssAs...Ls.sssptpItsYLtspAuptst.......................sssss.+Iocs.aFhcpH....scls.......thhp...spltohusCsuCHstAppG.a....................... 0 19 56 66 +9459 PF09627 PrgU PrgU-like protein Coggill P anon pdb_2gmq Domain This hypothetical protein of 125 residues is expressed in bacteria but is thought to be plasmid in origin. It forms a six beta-strand barrel with three accompanying alpha helices and is probably a homo-dimer in the cell. It may be involved in pheromone-inducible conjugation [1]. 25.00 25.00 77.10 77.10 18.60 18.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.52 0.71 -4.53 2 54 2009-01-15 18:05:59 2007-05-10 14:19:04 5 1 40 2 2 31 0 100.60 89 88.98 CHANGED MKElAIQEK-lshpWpG.pG+Llhl+lKps+shEhhhNpQlT.ENIpEIsslpllKNGKoLsLcV.sE+Shash.p.uphpVPhFaIcT.IpRtsac-hF..............Gpp..lKt MKEIAIQEKDLTLQWRGNTGKLVKVRLKNTRAMEMWYNKQITEENIQEITTLNIIKNGKSLALEVYPEKSIYVKPNLGRINVPVFFIKTPINRGlFEEIF......................GETLK....... 0 2 2 2 +9460 PF09628 YvfG Yvfg; YvfG protein Coggill P anon pdb_2gsv Domain Yvfg is a hypothetical protein of 71 residues expressed in some bacteria. The monomer consists of two parallel alpha helices, and the protein crystallises as a homo-dimer. 25.00 25.00 123.50 123.30 22.40 18.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.22 0.72 -3.83 2 24 2009-01-15 18:05:59 2007-05-10 14:21:45 5 1 24 4 6 13 1 67.00 88 93.33 CHANGED pLFos.hhhpNh+pahp.Npu..sKIpAhNuYY+sVsuolltDplsKNAsllhRhpHL-EAYpKVtpt ELFSVPYFIENhKQHIEMN.s.EDKIHAMNSYYRSVVSTLVQDQLTKNAVVLKRIQHLDEAYNKVKRG 0 2 3 4 +9461 PF09629 YorP YorP protein Coggill P anon pdb_2heq Domain YorP is a 71 residue protein found in bacteria. As it is also found in a bacteriophage it might be of viral origin. The structure is of an alpha helix between two of five beta strands. The function is unknown. 25.00 25.00 66.00 65.80 19.10 16.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.32 0.72 -4.00 2 8 2009-01-15 18:05:59 2007-05-10 14:43:34 5 1 8 1 1 7 0 65.40 75 99.43 CHANGED MPKYWSYPVGLAVEINNNARYGCPHHVGRKGKIIEHLHSATYDYAVSDETGDITYFKEHELTPLKGGLAYV MPKaWSYP.GLcV.INpNA+.uCPHHVGRcGKIIEhLHSATYDYAVSDETGDITaFKEHELNPhKGG.......... 0 1 1 1 +9462 PF09630 DUF2024 Domain of unknown function (DUF2024) Coggill P anon pdb_2hfq Domain This protein of 86 residues is expressed in bacteria. It consists of four alpha helices and two beta strands. Its function is unknown. One UniProt entry gives the gene name as Traf5. 20.40 20.40 21.10 69.20 20.10 17.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.85 0.72 -4.27 13 50 2009-01-15 18:05:59 2007-05-10 14:46:45 5 1 46 1 22 46 40 81.50 45 95.21 CHANGED MclHVaDTaVps+DG+hMHFDVll..sscstc+shpYAKpaLcolG.csssloppECpFCHSEtAss-VppuIcppGaaIhcM ....Mcl+VFDTaV+s+DG+hhHFDVll...sccpst+AhpaA+cWLsuhG.csAsloppcCpFCHSE...tAss-VtcuI+p+GaaIhph. 1 9 15 21 +9463 PF09631 Sen15 Sen15 protein Coggill P anon pdb_2gw6 Domain The Sen15 subunit of the tRNA intron-splicing endonuclease is one of the two structural subunits of this hetero-tetrameric enzyme. Residues 36-157 of this subunit possess a novel homodimeric fold. Each monomer consists of three alpha-helices and a mixed antiparallel/parallel beta-sheet. Two monomers of Sen15 fold with two monomers of Sen34, one of the two catalytic subunits, to form an alpha2-beta2 tetramer as part of the functional endonuclease assembly. 22.00 22.00 22.20 22.10 21.80 21.60 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.41 0.72 -3.78 44 217 2012-10-11 20:44:46 2007-05-10 14:48:09 5 8 184 2 146 215 1 113.10 26 62.71 CHANGED hVhtsLhchptWp-Vcl..hppp...t.......................lhllpGpPspphstp.................................EhllPlph....sptlolcplcplFspl.....s..t......................sp+llLAIlssDuTlVYYh...lpcGlhc.....Pcp.s .....................................................................................VhhsL.ct+.Wpplph.ht..p.hp.....................hhhlpGh..ct.hh............................................phVlPssh.........spphohpplpplac..tltt.t.t.............................................spplhLAhlp....s....DuTlVYYh..lpcGhhpPc.............................. 0 37 71 113 +9464 PF09632 Rac1 Rac1-binding domain Coggill P anon pdb_2h7o Domain The Rac1-binding domain is the C-terminal portion of YpkA from Yersinia. It is an all-helical molecule consisting of two distinct subdomains connected by a linker. the N-terminal end, residues 434-615, consists of six helices organised into two three-helix bundles packed against each other. This region is involved with binding to GTPases. The C-terminal end, residues 705-732. is a novel and elongated fold consisting of four helices clustered into two pairs, and this fold carries the helix implicated in actin activation. Rac1-binding domain mimics host guanidine nucleotide dissociation inhibitors (GDIs) of the Rho GTPases, thereby inhibiting nucleotide exchange in Rac1 and causing cytoskeletal disruption in the host [1]. It is usually found downstream of Pfam:PF00069. 25.00 25.00 43.00 42.30 23.90 19.00 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.76 0.70 -5.34 2 33 2009-01-15 18:05:59 2007-05-10 15:45:29 5 2 30 3 3 15 0 287.60 85 40.67 CHANGED DsRRlTP+KlRELSDhLRhHLSSAuT+QLshGhsLSDLssM.ssLDKAERpthsDpsQlKSFNSLILKsYuVIuuYlKGc.s-oKospsEsSP.hpuNhMhSlsEPoLppIQtpLsQoHu.sDIuoL.Ru+pHLETLLpVLhs.S.Q.p.VosEsYsFL.RlAEsKsoLu.pLssLptQQpps+upLShLhptssuWAssARQuL.RFDShRPVVKFGo.Q.hAlHRpMhAAaAAhTLQEVutFss-MRpFsAsuhPLLhQLGRSoLhDEtLs.QREpLRELsTlAERLNRLppEWh .......DVRRITPKKLRELSDLLRTHLSSAATK..QLDMGGVLSDLDTMLVALDKAEREGGVDKDQLKSFNSLILKTYRVIE.DYVKGREGDTKNSSTEVSPYHRSNFMLSIVEPSLQRIQKHLDQTHSFSDIGSLVRAHKHLETLLEVLVTLSQQGQPVSSETYGFLNRLTEAKITLSQQLNTLQQQQESAKAQLSILINRSGSWADVARQSLQRFDSTRPVVKFGTEQYTAIHRQMMAAHAAITLQEVSEFTDDMRNFTVDSIPLLIQLGRSSLMDEHLVEQREKLRELTTIAERLNRLEREWM. 0 0 1 1 +9465 PF09633 DUF2023 Protein of unknown function (DUF2023) Coggill P anon pdb_2guk Domain This protein of approx.120 residues consists of three beta strands and five alpha helices, thought to fold into a homo-dimer. It is expressed in bacteria. 21.30 21.30 54.60 54.40 20.80 20.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.26 0.72 -4.07 10 133 2009-01-15 18:05:59 2007-05-10 16:32:13 5 1 133 2 35 108 1 100.10 53 83.50 CHANGED +lFhHHIYEapKGlRsLlLhTLsscttstshtRLcppuIsYalQcs.ups+lNlFFGsspClcslRplss.+PLspLTsEEDFlLGuMLGYDhppQCcRYLpR ..+lFhpHIYEacKGVRphVLaThs+capshAlpRLcppsIsYhIQ.V..uss+lNLFFG+pECl-slRtllp.+PLNpLTPEEDFILGuMLGYDlptQCcRYCpR.. 0 12 24 29 +9466 PF09634 DUF2025 Protein of unknown function (DUF2025) Coggill P anon pdb_2hg6 Domain This protein is produced from gene PA1123 in Pseudomonas. It contains three alpha helices and six beta strands and is thought to be monomeric. It appears to be present in the biofilm layer and may be a lipoprotein. 25.00 25.00 170.10 169.90 24.20 17.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.24 0.72 -4.29 5 33 2009-01-15 18:05:59 2007-05-10 16:37:21 5 1 33 1 11 25 1 106.00 75 96.84 CHANGED MSITSppICQAADQLKGFVGFNRKTGpYIVRFSEDSFGMDVADDoIlPsSEFVWAsssDssMsLcREpLQLLLEQNIDDRlNIoEPLRVYLRRoDLPEIpApRSLl MuITSsDICQAADtLKGFVGFNRKTGpYIVRFSEDSFGMDVADDSITPTSEFVWuuspD.ssMpLsREpLQlLLEQNIs-RLNIGEPLhVYLRRpDLPEIsAQRpL... 0 1 2 7 +9467 PF09635 MetRS-N MetRS-N binding domain Coggill P anon pdb_2hsn Domain The MetRS-N domain binds an Arc1-P domain in a tetrameric complex resembling a classical GST homo-dimer. Domain-swapping between symmetrically related MetRS-N and Arc1p-N domains generates a 2:2 tetramer held together by van der Waals forces. This domain is necessary for formation of the aminoacyl-tRNA synthetase complex necessary for tRNA nuclear export and shuttling as part of the translational apparatus. The domain is associated with Pfam:PF09334. 21.20 21.20 21.90 42.10 20.60 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.58 0.71 -3.73 5 42 2009-01-15 18:05:59 2007-05-10 17:08:36 5 2 39 1 27 42 3 115.50 37 15.90 CHANGED KGHoutLQLANNLKLALALtLAsssLKLclNEDcutPpLhsouoGFcLFDANAILRYVLcDFEupcS-cYpaAluSLEshLYH..K-ssc-HlsclsNKuL-NYL.lsh-EPLoATcLIlFANsY .......Ktpsh.LpLANNLKlulAlphhs.tsL..clpls.-Ds..us...hpLhsspss.FpLh-sNAIlRYlhsDFps.ppu.cap.....hpshLYp....Kp..ppclp.hss.tltpah....cp.losoplIlFAslY..................................................... 0 3 14 26 +9468 PF09636 XkdW XkdW protein Coggill P anon pdb_2hg7 Domain This protein of approx. 100 residues contains two alpha helices and two beta strands and is probably monomeric. It is expressed in bacteria but is probably viral in origin. Its function is unknown. 23.00 23.00 23.50 23.30 22.00 22.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.55 0.72 -4.18 4 40 2009-01-15 18:05:59 2007-05-10 17:10:23 5 1 35 1 9 35 41 96.80 32 88.42 CHANGED M.Lh-AIhYpYPsAsspKDF.lRNDGDG..SYIpcWplcAPhPTEtELcsWWEEhQpNPsY-PP.Ql-hLAQELupEKLhRKQLEc.sppLGsELSslKLplLsLKG- ..........................................................lh-slhahaPssss.hDa.lpssup.G...hItcWplp.hP.PTptpLcphac-htcs..sshpsPspl-hLuQ-LupEKLtRKph-ph.psLGppLuslKLplLplKGt........................................ 0 4 6 7 +9469 PF09637 Med18 Med18 protein Coggill P anon pdb_2hzm Domain Med18 is one subunit of Mediator, a head-module multiprotein complex, that stimulates basal RNA polymerase II (Pol II) transcription. Med18 consists of an eight-stranded beta-barrel with a central pore and three flanking helices. It complexes with Med8 and Med20 proteins by forming a heterodimer of two-fold symmetry with Med20 and binding the C-terminal alpha-helix region of Med8 across the top of its barrel. This complex creates a multipartite TBP-binding site that can be modulated by transcriptional activators [1]. 20.70 20.70 21.10 20.80 20.30 20.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.51 0.70 -4.84 26 307 2009-01-15 18:05:59 2007-05-10 17:12:11 5 6 206 12 211 292 0 171.20 21 91.12 CHANGED pELhLhuSlsspshpphlppLpuLsu..p..Ppphtchphla+...s..........p.t...lpspstthpphhh+htpphspt.................hh....................p.s...tp...........................................ppsWpLphtDhP-sGp.spsssspphhsssl..............................................................................................pssslhpalpchG.achshEYhhcGhhFhp.uslhItlh+lhp.hsspspt...................p.hshSssallcs.lslscus-h.-thspuhtcL.tlp-pL+shlpLEhsDR..hDoR ...............................................................................................ph.L.u..l.t.t.th..hhp.Ltshss.....s..h.phphhh................................................................................................................................................................................................t..aplph.s.......Pt.s....p..tp....s.......p..hhp..................................................................................................................................................ptsh.thhtthG.ath..pahhtGhhahh.s.hhl.l.+hh..h....tt.................................p.thhlph....h.h.tstp.....t.t......htp.Lpshh.h............................................................................ 0 63 97 163 +9470 PF09638 Ph1570 Ph1570 protein Coggill P anon pdb_2hq4 Domain This is a hypothetical protein from Pyroccous horikoshii of unknown function.\ It contains six alpha helices and eight beta strands and is thought to be monomeric. 20.70 20.70 21.30 24.10 18.10 20.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.74 0.71 -4.42 4 13 2009-01-15 18:05:59 2007-05-10 17:14:26 5 1 13 2 11 17 0 152.10 56 97.29 CHANGED MhCEEKLEVFENGF+D-KFNlElcaYGpDuRKVLLAlIYELYLP-YGpEYVYPFECAKEFWsIYhDusElcsEEscLKPlKFlSESVhpKlcc.LccIcsPlEVK....lEcAclYKsK-GYLslGKNFlLD.+GRLFIFNKPSluEhILKYIWcW ................................MhCEEKLEVFENGFcDGKFNlclEaYGpDARKlLLAlIaEL.YLPDYGp-YVYPFECAKEFWGIYhDuuEIpsEEh+LuPlKFlscSVhsKLEcsLc-IcAPtEVKt.lslE+A-laKlKcGhLslGKNFlLDt.+GhLFlFNKPSstElILKYlGh............ 0 1 1 6 +9471 PF09639 YjcQ YjcQ protein Coggill P anon pdb_2hgc Domain YjcQ is a protein of approx. 100 residues containing four alpha helices and three beta strands. It is expressed in bacteria and also in viruses. It appears to be under the regulation of SigD RNA polymerase which is responsible for the expression of many genes encoding cell-surface proteins related to flagellar assembly, motility, chemotaxis and autolysis in the late exponential growth phase. The exact function of YjcQ is unknown [1]. However, it is thought to be a prophage head protein in viruses [2]. 21.80 21.80 22.20 22.10 21.70 21.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.14 0.72 -3.90 8 83 2009-09-10 15:07:02 2007-05-10 17:17:20 5 1 76 1 15 64 1 92.60 25 87.98 CHANGED tYKILptlhauaEshpc.D.slhD........cphscsLphLpD-GYIKGlphp.s....slhsuhssshlTh...cGlsYL-ENohhKKAYKshKEl+-WlP .........Y+lLphlht.s..h..cphpth.c...s.hhp.......lspphhsphlphLh--GhIcGlphhpth.......phhh.t.h.pshpITh...cGlcYLp-NShhpKshchhK-hpt............. 0 9 12 14 +9472 PF09640 DUF2027 Domain of unknown function (DUF2027) Coggill P anon pdb_2huh Domain This protein domain is of unknown function. though putatively involved in DNA mismatch repair. It is associated with Pfam:PF01713. 25.00 25.00 45.80 45.00 23.00 22.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.95 0.71 -4.88 5 110 2009-01-15 18:05:59 2007-05-11 10:06:21 5 2 110 1 14 104 3 160.50 48 43.58 CHANGED LNVaLAYVPpDhKAluTTPFETYLVNDSNYYLYYTYLSAEGsuW+sRSHGlIEPNTKLaLEEFsKu-LNDhERVsVQLIAFKDsKsFslKPAVSVELRIDTVKFYKLHTFpEoDFFEEPALIYDIV+NDhPVKQVaVSAEELKEALlQKKssD+s.pPQsllK ..LNVaLAaVPpD.hKshsoTsFEsYLVNDSNYYlaaoYh..ou.EGp.u.WpsRupGhlEPNTKlhlEEFs+ssLN.-hE+lsVQllAaKcsKsahlKPAlsVElRIDsVKFYKLHTFp-sDFFEpPALlYsIV+sDhPs....+plh..VsAc-lppuhhpKp..t.-.t....h........................................... 0 5 12 14 +9473 PF09641 DUF2026 Protein of unknown function (DUF2026) Coggill P anon pdb_2hly Domain This protein of approx. 100 residues is found in bacteria. It contains up to five alpha helices and up to seven beta strands and is probably monomeric. Its function is unknown. It is cited as a major prophage head protein [1], so might generally be of viral origin. 25.00 25.00 156.50 156.40 22.30 18.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.31 0.71 -4.95 4 14 2009-01-15 18:05:59 2007-05-11 10:08:21 5 1 14 1 5 16 1 201.70 47 95.83 CHANGED sLoDY.RIYpVl+uVLhS.tsAcss+AChFFuhhGAaILpcHY+lcA+shuGhhuathu..hsslLhFu.pIEcsplpSu--sFHsWVpsDsallDFMAPhFsEshtut.hshslPtKMhQR+L-sMAsS.ssL+puGDFhahPs.-lTpcLtt+htppth.tDllslsssWatKpPpphssoluhsDt+GpssplsLsssp.lhGAW .p.oDYpRIYRVIpSlLlu.pNADsAsAshaFSTFGAaILppHYKlcAhPpuGhAAYsLG...uslLhFu.+c-DGh.VsuAs-sFHCWVEADGWAIDFMAPtFupuscu....LslPsKMFQRsLuuMAsSlsDLspSGDFFYcS.-s-sTschhsDacppshluDlusVAssWFRKoPKpMssSlols-pcGcsp.plPLoGps.lsGuW 0 1 2 4 +9474 PF09642 YonK YonK protein Coggill P anon pdb_2h4o Domain YonK protein is expressed by the bacterial prophage SPbetaC [1]. It is a 63 residue protein that associates into a homo-octamer in the form of a beta-stranded barrel with four outer helical features at points of the compass. Its function is unknown. 25.00 25.00 29.70 29.60 21.30 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -8.81 0.72 -4.28 2 10 2009-09-11 09:12:47 2007-05-11 10:17:06 5 1 10 4 2 7 0 60.00 48 94.94 CHANGED ASKKVHQINVKGFFDMDVMEVTEQTKEAEYTYDFKEILSEFNGKNVSITVKEENELPVKGVE .........h.uKKVpplNlKGhhDh-sspIpEpsK-sEppYDLsElLScFsGKpVSITIKEEsELPhc...t.. 0 1 1 2 +9475 PF09643 YopX YopX protein Coggill P anon pdb_2i2l Domain YopX is a protein that is largely helical, with three identical chains probably complexing into a twelve-chain structure. 22.40 22.40 22.80 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.15 63 761 2009-01-15 18:05:59 2007-05-11 10:26:09 5 2 545 9 61 587 31 122.40 24 92.72 CHANGED KFRuasctppchh.spthph...phtthhhtp.tpp.t...................................l...hQaTG..LKDKNGpEIaEGD.....Ilch....t..h..h..........................pat-sphhhhht...............................phh..h.hpspsh..EVlGNIa.ENsELLE ........................................................cFRsaspt..p...pthh...htt.hph.........pt.h.h.h.tt........t..............p........................l..hQh.TG....LKD..KNG....pE..IaEGD.....Ilph......ptt.tthh..........................................hpapcsthhh.ht...................................ph.h......h.h.......t.....pph.....EllGNIa.ENs-LLc.......................................................... 2 18 46 56 +9476 PF09644 Mg296 Mg296 protein Coggill P anon pdb_2i15 Domain This protein of 129 residues is expressed in bacteria. It consists of three identical chains of five alpha helices. Two copies of each chain associate into a complex of six units of possible biological significance but of unknown function. 25.00 25.00 113.30 113.20 20.20 18.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.66 0.71 -4.26 3 8 2009-09-14 14:07:04 2007-05-11 10:29:33 5 1 8 3 3 5 0 116.10 52 95.77 CHANGED KPQLlAaKpFLQTEFpcVDFETFRpNFNLCLEREQcTllIYEDDDYDDQsFFhKPMLSDuFFIpoEVIKQL.DaLAcLV-NPKDDDKpCC...QsFYEALhlFISALAITKGIslsRaHQpLssR p.pL.thhp.lpp.hpclshEphhpNaNhhhEppppThlhY-DDDY--.sFF.Ksh.u-h.alpsclIppl.DaltcLlcs.hDDDKphs...ppFYp....hhpAlAlTKsIslpRhpphLtsc 0 2 2 2 +9477 PF09645 F-112 F-112 protein Coggill P anon pdb_2cmx Domain F-112 protein is of 70-110 residues and is found in viruses. Its winged-helix structure suggests a DNA-binding function. 22.40 22.40 22.80 45.00 22.20 22.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.57 0.72 -4.17 2 4 2012-10-04 14:01:12 2007-05-11 10:36:56 5 1 4 1 0 5 0 93.00 37 97.64 CHANGED QolN..chAp.haphLccKtElThEDIlA.aplosssAYsI.psLKshCppH.sECps.h+sRKTs.......................................IhuKQ ....lNshphAclhaKILppKtElolEDIlAQFEISsosAYsI.+sL+hICEpH.-ECpsppKsRKTlhh.hKpEphppptpEp..ppItKIhsAps...............h... 0 0 0 0 +9478 PF09646 Gp37 Gp37 protein Coggill P anon pdb_2gjv Domain This protein of 154 residues consists of a unit of helices and beta sheets that crystallises into a beautiful asymmetrical dodecameric barrel-structure, of two six-membered rings one on top of the other. It is expressed in bacteria but is of viral origin as it is found in phage BcepMu and is probably a pathogenesis factor [1]. 20.50 20.50 21.00 21.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.56 0.71 -4.74 13 105 2009-01-15 18:05:59 2007-05-11 10:50:08 5 1 99 6 20 81 0 141.20 42 92.06 CHANGED llsRL+ppLPphplEhFP-cPscYpLsHssGAlLluYtGS+FspPcDTsuVlQsRplplulTVlhRpLsucpGAlssLDplRpsLsGF+PPsC.pchaLlsEpFlG..EssGLWQYsL-hsTETltlE-s-spssPhLspVs....YEcp- .............................VhsRL+Et.P.p.hp.lch.spcsppYh.sp..uslLlpYsGSpFscP-sTsAllQpQplplssTVlstQlsst....tAls......sLDplRpuLGGap.PsC..cR.lWLppEpalG..-ssGhapYsL-hsspolFIt-p-s.pcu.PLLTtVN....YEE................................. 2 5 15 17 +9480 PF09648 YycI YycH protein Bateman A, Szurmant H anon Pfam-B_6483 (Release 21.0) Family This domain is exclusively found in YycI proteins in the low GC content Gram positive species. These two domains share the same structural fold with domains two and three of YycH [2] Pfam:PF07435. Both, YycH and YycI are always found in pair on the chromosome, downstream of the essential histidine kinase YycG. Additionally, both proteins share a function in regulating the YycG kinase with which they appear to form a ternary complex. Lastly, the two proteins always contain an N-terminal transmembrane helix and are localized to the periplasmic space as shown by PhoA fusion studies. 25.20 25.20 25.30 25.20 25.00 25.10 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.50 0.70 -4.90 33 732 2012-10-01 23:24:42 2007-05-13 16:43:49 5 2 721 12 94 434 1 224.20 27 82.91 CHANGED psp.sssppssttlppchcs-pIslss.lsscp.....scshhLsucpcshp..hcshps.....Lpspssshppps........plpushspslpls....................tppttpplpsalp...........pplhpG.....pcYpasph.p....sssplsatQpacst.la........spp....uplphplsscs...clsuYpQohlsclp.hc....c+.pplIospcAlpsLYh..pspltsssc...lppscLGYh......pll..sssspplh.sPsWpltlcpp...s....tthphhhVNAh ......................................................................................................................................................p.......ppps..h-pphpp-pIshs.s..lsscs...hph.hlsucspsFs....tcshps.............lt...spshphpsts..................phLpuslsp.slt.ls..............................cpshccl.pcalp...........pplhpG.......................pcY..p.hsph...................sssplhatQ.p.Y..cshPlh..........ssp......A..hls.h.plppcs...cls..SYp..Q...o.hhs...clc.hc.......c+...pplIospcAl.-sLYh.......ppt..lpp..ssc.......VhpscLGYh.......sll....ptsss...plh....hPsWplplcpcs.......tpsphhhVNAh.......... 0 30 60 76 +9481 PF09649 CHZ Histone chaperone domain CHZ Wood V, Coggill P anon Wood V Domain This domain is highly conserved from yeasts to humans and is part of the chaperone protein HIRIP3 in vertebrates which interacts with the H3.3 chaperone HIRA, implicated in histone replacement during transcription. N- and C- termini of Chz family members are relatively divergent but do contain similar acidic stretches rich in Glu/Asp residues, characteristic of all histone chaperones [1]. 23.30 23.30 23.40 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.84 0.72 -4.72 25 201 2009-01-15 18:05:59 2007-05-14 09:24:18 5 1 169 1 137 189 0 37.30 40 13.89 CHANGED ---sD-LppIDsuNIIosGRRTRGKsIDFspAAcclps ................c--p--Lt.pIDsuNII.o..uG.RRTRu+slsastsupph...................... 0 29 66 104 +9482 PF09650 PHA_gran_rgn Putative polyhydroxyalkanoic acid system protein (PHA_gran_rgn) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded by genes involved in either polyhydroxyalkanoic acid (PHA) biosynthesis or utilisation, including proteins found at the surface of PHA granules. These proteins have so far been found in the Pseudomonadales, Xanthomonadales, and Vibrionales, all of which belong to the Gammaproteobacteria. 20.50 20.50 20.50 20.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.58 0.72 -3.88 24 217 2009-01-15 18:05:59 2007-05-14 11:17:44 5 1 213 0 80 187 38 89.10 29 90.59 CHANGED IcIcRpHsLshppARptA-plAcclspcaslcspWp....uDplpFpRoGlcGplplsssplclplcLGhlLpshpupIcpEIcctLDphL ...............................Iplc+sHsLGh-pARp+s-ph...sp+ls...p...c.....a......u.....l.....ppp.....Wp.........GDpl..ph....p.....tp....G....l....cGplsltscplclplcL.shlL.ushpstlcscIc+hLcphh....................... 1 18 39 60 +9483 PF09651 Cas_APE2256 CRISPR-associated protein (Cas_APE2256) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a conserved region of about 150 amino acids found in at least five archaeal and three bacterial species. These species all contain CRISPRs (Clustered Regularly Interspaced Short Palindromic Repeats). In six of eight species, the protein is encoded the vicinity of a CRISPR/Cas locus. 24.80 24.80 25.00 25.10 24.20 24.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.74 0.71 -4.45 28 119 2012-10-11 20:44:46 2007-05-14 11:35:42 5 3 97 2 56 125 1 135.30 23 39.62 CHANGED cpsSAELNulhphhppt.hs.......phhLlsoDTssGchsApllccalpp............puhp.Vplht..t....lpshs.hcp..Fpc......GLtsLlctlspplp...ppsutp.lhlNsTGGaKspsuahsllu.h..husslhYlaE.phs-llpLPhl ................................................................phSAElsulhph......hppt.ht.........plhLlsoDT.pGchsucllppalpp...................pshp..lphhthts....lpsps...pp....Fpp...Glts...Lhctltpplp...ppp..u.tp..lhlNsTGGaKu.ssah.slhuhh...huh.lhYlaE.p.hscllplP............... 0 32 43 49 +9484 PF09652 Cas_VVA1548 Putative CRISPR-associated protein (Cas_VVA1548) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents a conserved region of about 95 amino acids found exclusively in species with CRISPRs (Clustered Regularly Interspaced Short Palindromic Repeats). In all bacterial species that contain this entry, the genes encoding the proteins are in the midst of a cluster of cas (CRISPR-associated) genes. 25.00 25.00 33.10 28.10 20.60 18.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.07 0.72 -3.98 6 31 2009-01-15 18:05:59 2007-05-14 11:44:26 5 4 27 0 10 31 1 91.80 41 51.20 CHANGED WhloRHsGAhEWAtc....QGlclD+hVs.HLc...htclstGDpVIGoLPVpLAtslCc+GucYaHLsL.clP.plRGpELoA-phcussA+LtcacVcps .....aFloRHsGAl-Whtp....p.G.l.plDchls.HLD...sspIpsGDsVIGTLPlpLAAclCp+GA+ahaLoL.slPhchRGpELos-phpstGApLppaplp..h.......... 0 4 9 10 +9486 PF09654 DUF2396 CHP02652; Protein of unknown function (DUF2396) TIGRFAMs, Coggill P anon TIGRFAMs Family These conserved hypothetical proteins have so far been found only in the Cyanobacteria. They are about 170 amino acids long and contain a CxxCx(14)CxxH motif near the N-terminus. 19.60 19.60 20.70 182.80 18.70 17.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.07 0.71 -4.26 7 46 2009-01-15 18:05:59 2007-05-14 11:53:07 5 1 43 0 20 48 1 160.70 74 95.31 CHANGED PIFGPEIpCPHCRQsIPALTLTDTYLCsRHGAFEAsPcTt-LVHLQSGRpWRLWEspWYRQHTHPDGIRFEIHEALDRLaTQGYRAT+VIIAcRYR-LlssYLERsssh...usscss..RLYGLPVEFSssssp-.....spWpVINF-LEKE.GsshRYPYFRL..F- PIFGPEIpCPHCRQsIPALTLTDTYLCPRHGAFEAsPcTsELVHLQSGRHWRhWpsEWYRQHTHPDGIRFEIHEALDRLYTQGYRAT+VIIApRYc-LlSsYLE.RsosWR.......up..s..-us.PRLYGLPVEFSP-spc-.....PCW-VINFDLEKE..PGlPh.RYPYFRLF-...... 1 3 17 20 +9487 PF09655 Nitr_red_assoc Conserved nitrate reductase-associated protein (Nitr_red_assoc) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are found in the Cyanobacteria, and are mostly encoded near nitrate reductase and molybdopterin biosynthesis genes. Molybdopterin guanine dinucleotide is a cofactor for nitrate reductase. These proteins are sometimes annotated as nitrate reductase-associated proteins, though their function is unknown. 25.00 25.00 73.40 73.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.85 0.71 -3.86 25 124 2009-01-15 18:05:59 2007-05-14 11:57:37 5 1 119 0 43 126 32 144.80 46 92.10 CHANGED hFpFEpDFlssLRCIPMsVRhKLDhsGlKLKLsHWtpLopppRptLVchPs-sssplp.saRptLpphspshsss....scsLs.-ssP.sWppssplPptl....pppApphGlt.lolsQWssLssLQRFALlKLoRsGH-N+NhhsAhpEF ..hFsFEt-.s-sLphIPMlVRapLDphGl+lpLppWphLshE-RphLschPs.-..............ssst.......lc..sacctLhchlps+uss....sphh..ts-ppP.uWpsssslP-ul....hppushtGls...o.hsQWtpLsPhQRasLhKLSRps+t.N+sFlPAh+EF.... 0 5 22 33 +9488 PF09656 PGPGW CHP02611; Putative transmembrane protein (PGPGW) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are putative Actinobacterial proteins of about 150 amino acids in length, with three predicted transmembrane helices and an unusual motif with consensus sequence PGPGW. 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.50 0.72 -4.67 29 301 2009-01-15 18:05:59 2007-05-14 13:06:46 5 7 274 0 124 284 202 51.40 35 34.98 CHANGED RsslsllGhsllllGllhlslPGPGWLllhhGLulLupEatWA+RhLchscpp ..........p.hlhllGhhlllsGllhlsl..P.G.P.G.aL.hlh.lGLu.l.LuhEa.WApphLthspt................... 0 46 90 111 +9489 PF09657 Cas_Csx8 CRISPR-associated protein Csx8 (Cas_Csx8) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes proteins of unknown function which are encoded in the midst of a cas gene operon. 25.00 25.00 32.30 32.30 19.40 19.40 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.61 0.70 -5.76 3 31 2009-01-15 18:05:59 2007-05-14 13:13:45 5 2 26 0 3 31 0 380.60 38 92.76 CHANGED FDTsLEuSDWRaSATIVGLIpYFcaL-pcY.hcK.pl.ElEDDYLLYNSssINEENYLDFVEKaY....cDsLHHKlVENILp+.-ElTEEpIKlINEKLsANTlMKKlFGKIKFDGTNK-EILDLI-KNRacLIKETFRRKKsLYuNYuNTNpLFsD.sNcsCRLlGYChDsGKKGKSTGYNFssSTF-apD-KIFDFIPFAFoG.SaEAFFINDNloIccLK........................KSNchIpcKhp--hEGppNScssRpTLFsslpEouDYIKaDVEVILKsRDKEYFETlYlRcESIcIFKE..sEDF-YKuI+F.aKlTDKYYhNlpcEVssuILNslLLDslIElLLK-KNs................aSYsIppLIKINsLIRcGGKEMcE+LKuAa.ACAKpVsKKl..cNppNKLsSYKQKLlSSVlFKD+DRlC-ILLQLSsYSGVhFuFAYDLaEDFEpNKDLAYTFINAL .........aDptlpsSDW+aSAAIsGLhhYhp.hphpa................h....pt..p...chp-....salhYsppsI....s....Ec.p...YLpFlEcaa....p-phhH+tlEs.Lpp.ppFo.E.IK.Ip-phpANolhKKlFtphKFsGpNccElLpllpcNR..lI+ETFRNtKshYsNasp.....splhpc..pppsCRLhGYhhD.s+KsKuhuasFspsohsa.D...FDFIPFAFot.sh-shFlNsNhslchL.........................Kppptlt...p...pp.hc.t.tp..pphsh+p.hht.hpc.pscaIcashE.....lIhKsp........-c..c.Y.FEThalRpcuIclhcp...hcchp......hhs...lph......Khs-ph....YhslhpEVhssILNh.hLsp.....I.....hLLK..-.cps...........................hsYh.IppLI+lN.hIhp.....ts.ppMp..p.......phctu......h.AsAtpVscKh......st......NKltSY+pKLhSsllhKsacRhhplLhQLSsYSsV.FsFsaDh.hEs.ppNcslhahFl.tL......... 0 2 3 3 +9490 PF09658 Cas_Csx9 CRISPR-associated protein (Cas_Csx9) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes archaeal proteins encoded in cas gene regions. 25.00 25.00 26.30 25.80 22.20 21.80 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -11.97 0.70 -5.65 3 8 2009-01-15 18:05:59 2007-05-14 13:47:11 5 1 8 0 6 9 0 349.00 46 88.05 CHANGED MSWVVMDAGLEPDcEELADALEGALcSLcSRs+INTSKIGRNDRNSFD+VLpAWFGRSAPETYGELFELVIsETIKLLR-GKIDPucSLSTIKTDKNGTYLGlAYNGEQAILPAIIKQPEYYEaQSuFLKPTTGQKAQIRMDPLWFSFMALGFFTSFAGFIuGKYYLMTKPGIEVFWPYEVEEIIE+GILPLTuAGASGRISLoTEELYEMKLAMKLAEEGRcVIEEVYPVTLHlISLEGQVYTELKTlQLNLpELSNYlsEYVKKIEuu+VGGlsLLVELKEGsATlcKYPLWALVDIAEKELWKGVsGDcEMLAYIFVKDLYRAINSGRKELIcDSIFRLFRQGRALLEGSGRASGEFRKVMRTFMWEEHLEVLL .......hSWlVh-AGlEPDpp-LA-ALEsulcoLcpRh+.hsoS.....KluRNDRsSacKVhpsW.....FshpsPETYsELFELlIcETIKLL+csKIDPucSLpoIKhDKNGTYLGlsYNGp.AILPAI.IKQPEYYEaQScFLoPTTGpKAQIplDPLWFSlLAlGFLTuFAGaIGGKYYLMTKPGIEsaa..........PYElE-Ilccull.LTsAGhpuRhSLpsEELYEh+LuMKLAEEt+pl.-clYPlTLHlISLEGQVYTELKTlQLsLppLspYhpcYlc+IcshphhuhslhVcLK-s....pKYPLWALlslAE+ELhKGVsGDpEhLAYIhVKDLYRAINSG++ElIcDolFRLFRQGRuLLEGp..u+uSG..EhRKVh+sFMhEcHLtVLl...... 0 0 0 1 +9491 PF09659 Cas_Csm6 CRISPR-associated protein (Cas_Csm6) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. 21.40 21.40 23.90 23.10 19.80 18.30 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.17 0.70 -5.83 4 211 2012-10-11 20:44:46 2007-05-14 13:59:36 5 2 187 0 24 168 0 273.70 26 89.77 CHANGED sh-sEII..hh-sl.sspcaDlFhshFc+aLh.lpsta..-sEIlLNlSSGTPQMcSALhllslhss.shpsVQVoTPt+s.SNtu.paussp.clEphhcsN-DNcs..ss.sRshEssutshpthhlRpshhshIssaDYpuALsIlus.ppF.....l.-pl.pll......puAhhRc+L..hpt.h+s+.lhs-lh..shhpsDu.pKshpYhLhLslhtpRtplA-hlhpspshhphllcshlcc+h.t.lh.c...sc.aLsDph..pcthL.-p-splhph..Lccs...ccpshsspphLshhsalslLphhtPsppllttlpsltslpu.lRNssAHslsslspcp.hp.ht.lLStphlKph......hh.phhphppssaNhY-+hNcElhch ........................................................................................................................................................................................................................................pttlp.LlspYcYptAhplupp...t...........spph..Ll......chhhcRp....cL..............................t...pp.hl....h....s.c...hh......hh.h.pt-...........p......p..l.s.pYlhsLp......h+h+ptplhDFlRuloPhl....hhlhhph.lp..p....clP....................tcph..pt.....h...h.h.......t.thh......p.p..p....lhpt.....hp.p..........pshss..........c.t..hlthhs.hh.....p....l..l......h.t..sp.....l....hphl.p.LpphEppVRNslAH.Ips.hsE-....cl...p+s.st..h...up.t.h..........lc.l..............hp.h.st...p..t.t.h......h.aDphNt.lh................................... 0 4 6 18 +9492 PF09660 DUF2397 CHP02677; Protein of unknown function (DUF2397) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Betaproteobacteria). 20.80 20.80 21.10 20.90 20.40 20.70 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.44 0.70 -5.90 21 146 2009-01-15 18:05:59 2007-05-14 14:13:19 5 4 129 \N 63 150 5 439.70 25 93.17 CHANGED .hpYls.u-puspYRsIhR.sFhtttpphphhLps.--V...lstLtt.tphhsch..........t.cplptcLcQLscWGNLtsppDTu+.ssolt-Fcp++ahYQlothuhplERhltplpssh.spsu.uLpsshLcclhppLppLtphs........ps-stc........laphhpcLhssFpsLspsupsahupLtpshs.pcphcs-sFluaKcpLlsYLpcFltcLtppuspItttLtplptptlc..plLptsspt-tt.st............................s........................shtcthschts+WpuLpsWFlupsu.csspschLpctsppAIspllpslpRlsEp+sstssRcs-hcpLAtWFupsss.-pAHcLhsssFGltpsRHLpss.......stc.ssshssuha-uPPlclshphRppGpttppspsutl.D+stp+cthttphtpcppptctthppL..sssstlclupls..LsspshphhLplLucAhs.......p.....ttsspo-sshplpLp.hss.sphshlcoscGsLphs-ahl ...................hpYLs.s-ss.spYRhIhR.hFhtth.p+h.phhLh..--V.......hphlpp..shhtch..........s.-phppsLppLsc...Wt..NL.st...pDs..u+.spTlpEa..cp++ahYpho.huhplcRhlhplpphh..ttsu.uLpsshhcclhppLppltphh...........ptstpc................................................lhphhpcLtssFppltpsspcahupLts..............hhs....pp...h.psctFlhaKcpLlpYL....ppFltsLpphu.pItthlpplpt.p.tlp.....pllpphsph-hthst..........................................p..............s.pphhpphts+WpuLptWFhu.pss..t.spsptLtptspphItplhthstclsEpppttssRpp-hhpLAphFsps.s...ppAHpL.ussFGlh.sRHltss.......ptc.s.shspu.ha-tsPhplshp.Rppshpppt.ttsshh.Dpstp+pthhtphhppp...ppppthhtpl..hpsstlchs.pls....lsstspphLLphlucAhs............p.......tttsps-suhphplp..ts...tptshl+stcG.pLphPshhl................................................................. 0 24 50 58 +9493 PF09661 DUF2398 CHP02678; Protein of unknown function (DUF2398) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Betaproteobacteria). 22.20 22.20 22.30 23.80 19.90 22.10 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.99 0.70 -5.73 18 150 2009-09-11 13:39:07 2007-05-14 14:29:58 5 4 127 0 66 155 4 340.50 24 84.48 CHANGED lRsLLspPhlsp.sp-sEhathV+Rc..pstL+caFpcchGapLlVss..phARLtKhPssspsshshtch......hps+cYshLCLsLAhLEs....sspQhsLucLs-plp.Atss-sshs.....hshsshscR+ulVpsL+hLlslGllptsDGct-sFspcp.....s.u-sLY-lsttshthLls..hsps.uph............pshs-hs.pcshssssttpst..........RpRlhR+Ll.sPVlahc-Lssp-hsYLpsppp.tltcclp-hhGhhhEhRuEGlhhl......cs-c.phssshaPcsu........o.lucssLLlsptltpch...t.sphsts...........lstsclpshltcltpc...asssWp+..ttcstu.scLscpllshLpphtLl.........pt.s-t..lhhhPAsAR.......ausph.ss .............................................hchLLpp.hlhp...tp..-.-ha.hl+..cp..tptL+cahtcphGapLllss..ths+LtKhPsps.c.s...hhshtph.........p.hcYshhC.lhLAhLEc.......stpQhhLupls-hlp..sths..t.t................h-hsthpcR+uLVcslchhhphGllphsD....Gst...-taspst.......s.s-sLY-ssthshhhlhs..hspshtph.....................................................pphpshh.pp.ph.s.sp...pt.pt...hh...........Rp+lhRpLl.sPsVYh..p..chssp-hhYlpp.+t..tltp.clpchh.Ghthch+tpuhhll........sps..phhs.hFPsps........s.hsclsLLlsthltpph.................th.....s....h.......................ls.tphtthltcltpc...htshWt+....hpths...tplhppslphhpphthh...................................ph.tct...lhhhPhhu+htst...t....................................... 0 23 51 60 +9494 PF09662 Phenyl_P_gamma Phenylphosphate carboxylase gamma subunit (Phenyl_P_gamma) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this protein family are the gamma subunit of phenylphosphate carboxylase. Phenol (methyl-benzene) is converted to phenylphosphate, then para-carboxylated by this four-subunit enzyme, with the release of phosphate, to 4-hydroxybenzoate. The enzyme contains neither biotin nor thiamin pyrophosphate. The gamma subunit has no known homologues. 25.00 25.00 26.60 26.50 21.60 21.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.82 0.72 -4.27 2 10 2009-01-15 18:05:59 2007-05-14 17:47:31 5 1 5 0 1 10 0 81.30 43 96.10 CHANGED MNQWEVFVMD.AELPEGppLELSVRTLNPGLKKYTYQRV+AElSsALDKFPDpLQVRhGRGQLssQpFSIRIIEpVQRMPAKYL ...........................M.pa-lFl.sLsELsEGpELELpVRDLTPGlHKYsa..+hVKApVSucPcsaP..-+LhlRFGRGQhpspsaSI+llpclp+hPt+aL...... 0 1 1 1 +9495 PF09663 Amido_AtzD_TrzD Amidohydrolase ring-opening protein (Amido_AtzD_TrzD) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are ring-opening amidohydrolases, including cyanuric acid amidohydrolase (EC:3.5.2.15) (AtzD and TrzD) and barbiturase. Note that barbiturase does not act as defined for EC:3.5.2.1 (barbiturate + water = malonate + urea) but rather catalyses the ring opening of barbiturase acid to ureidomalonic acid. 25.00 25.00 31.10 94.90 20.60 19.80 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.12 0.70 -5.88 10 66 2009-01-15 18:05:59 2007-05-14 17:49:48 5 2 60 0 33 66 9 335.60 48 98.20 CHANGED s-VaRlPhcuPuDVSGLAsLI-sGtlcPscIVAVlGKTEGNGCVNDFTRsaATpuLpslLuc+lshu.cEV.s+lAhVMSGGTEGVLSPHhTVFAt+cspcssps...s+RLAVGhAhTcsLLPE-LGRtsQlscVAAAV+sAMcDAGIsDPuDVHFVQlKCPLLTscRIpsAcuRGcslATcDThcSMuaSRGASALGlAlALGElsuuploDpslhpDauLaSsVASsSAGVELhcsEIIVlG.SsshuGcLsIuHAVMpDAIDscuV+uALcclGLtss.......sscttuRLVNVFAKAEAussGpVRGRRHTMLDDSDIssTRHARAsVGGVlAulsGcstlaVSGGAEHQGPsGGGPVAVIAct .....pVh+lshpuPuDsSuLtthIssGhlpssclVAllGKTEGNGsVNDaTRthAstuhppsLup....+hshs..p-.......V.pplshVhSGGTEGVlSPHhTlFspp....ss.s.sstt.ss....ttRLslGhAhTcshhPE-lGRhu.lpcsAsuV+pAMtcAGIsDPuDVHaVQlKsPLLT.ppItsApuRGpsssTcc....ThcSMuhSRGASALGlAVA....LGElshs....hsD...pslhpDhsLaSulASsSuGlEL.cspllVl......GNutshuGphtIuHuVMpDAlDhsulhtAlcshGlp.s.......ts.ts+lVsVFsKuEAsssGplRG+RpsMLsDS.Dl.p.pRph+AsVGGlhAulsGcsslaVSsu..ApHQGPsGGGPVAsIsc.h... 0 14 28 29 +9496 PF09664 DUF2399 CHP02679; Protein of unknown function C-terminus (DUF2399) TIGRFAMs, Coggill P anon TIGRFAMs Domain Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Beta-proteobacteria). Just the C-terminal region is ioncluded here. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.05 0.71 -4.35 17 184 2012-10-01 21:47:57 2007-05-14 17:52:04 5 5 172 0 66 266 4 144.40 25 38.01 CHANGED hpLoLRpltphts...shsspup........VaVsENPsVhushhDp.....s...........tssPLVCTsGpPssAshtLLstL..sssGspLtYpGDFDhsGL.tlAsplhpRasscP..WRhsssDY........htussssslsspsls......ush.spLspshtppuhsshpEtllstLlsDL .............................................................h..lsLhpltth.t.....h.s.sspt........lallENsuVhst..hhpp......t..................tshsLlCs...sGp..ss....h.u.s.h.hLlctL........tts..G..s...p....lhY..p..GDFDstGl.tIAspl.....h.......p....p.....a..s.....t.....p.......a+hsspsY.............................hts.h.s.t...t.......h.s.tp......p...hp..h..t..tl......s.....s.........tlspthpph....thsshpEtlht.hh.................................................................. 0 29 51 60 +9497 PF09665 RE_Alw26IDE Type II restriction endonuclease (RE_Alw26IDE) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this entry are type II restriction endonucleases of the Alw26I/Eco31I/Esp3I family. Characterised specificities of the three members are GGTCTC, CGTCTC and the shared subsequence GTCTC. 21.50 21.50 22.20 400.30 20.90 21.40 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -13.08 0.70 -5.79 7 16 2009-01-15 18:05:59 2007-05-14 18:17:05 5 1 14 0 1 14 11 493.30 51 93.32 CHANGED cPpFl-Yh+hIlpHPNYtGMPsshttcGcIpWpssSs+poG.Fhph.ppRhtWWcpKAcplGls..sspsthhopsA+hIHPTthKPC+hCG+.hslsYhYPs+shhp+lpK.....-pF.l..sp.ppIsclhspLhphhu-phhpph.thlhscth.phschsssh-talp.l.ppalspt.phLSPGsMuNsPDRhDGFHoaNtCCRuppDpGRpcENL+SYopDRRuFEaWsDGsWsAADpLM.....Gphthssphh...pl............SADHIGPISLGFsHcPpF.p.hCpusNSuKNsRhhhsDlppLlchEs.pucsVuSWasctlW-hhKcsV......sss-sshp..hSphLRsNpchahplLhclhp....sGpthhLushLp.cY.....ApashsF-slphpsphhphpslpp.p+hTchsptppsRtlRIAFEuLp-YspK..EsRphhts.s....cp.pphls.hhp.lp........hhs-plsp.lppss.p-c.hpslh...........tp...h+chLhphMshluccltp ..stFLEYs+hIVsHPNYhGMPDshGccGcIQWEAPSNRuSGpF+cTaQ+RhcWWcsKA+SlGID..soEpuWISKTAKLIHPhGhKPCKpCGKpM-LuYuYPNcpFhuRl+KLsYlDEoFEL..opsEHIlDLlsRLccpaGEcla.DLP+LhssKol.sIPcloSsL-sWIcaLcEpYIPpEu+hLSPGAMANPPDRFDGFHSaNRCCRShADKGRoKENLKSYsTDRRsFEYWVDGDWVAADRLM.....GplRo.NNhhh....pEECLNsst....PsPCpADHIGPISLGFuHRPpF.QhLCKSCNSAKNNRMYLSDlhpLL-AEN.EGcoVhSWaucclWs+lKHoV......cDsEcAlR..LSKlLRDNRHTYMpLLc+Ihc....-GaasFLAolL+LEh.....ANYs.hFEGLsISNHlTcYcSl+K.++pochsthpcsRplRIAFpuLp-YtpK..ENRNshhVss....c.ppphhschh.ppLpuhsp.h..ppLs-tlst.l.pp..pcpphpslhptl....h..t.cpaphhhchL.phhs.lGcpht.s..................... 0 1 1 1 +9498 PF09666 Sororin Sororin protein Bateman A anon Bateman A Family Sororin is an essential, cell cycle-dependent mediator of sister chromatid cohesion [1]. The protein is nuclear in interphase cells, dispersed from the chromatin in mitosis, and interacts with the cohesin complex [1]. 23.00 23.00 27.50 27.50 19.40 18.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.09 0.71 -4.05 14 61 2009-01-15 18:05:59 2007-05-15 14:10:46 5 2 42 0 29 58 0 126.90 37 35.78 CHANGED ENhPP.........hsppshhhssstsspssps.V.lPsstcolQp+ushcslsIl.......WpKpV......cpo.oR.phs..............................ts.pssoPp...tp...............s+psLFGFEchLssE....-h..spssspu+sts.psVo............hpchsp.tssscslPtVs..sh+c .........................................ENtPP.......p..hppsh.t...sps.sspssps.V..s..hco.ppc.s..hcshsl........MSKKV......RRSYSRLps...............................tsssTSTP.....................uRRShFGFEshLssE....DL.tsuslssSKhhpsspVs................scshsPDpsLPGlSsss.+..................... 0 3 5 12 +9499 PF09667 DUF2028 Domain of unknown function (DUF2028) Bateman A anon Bateman A Family This region of similarity is found in the vertebrate homologues of the drosophila Bobby Sox. 25.00 25.00 31.90 28.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.07 0.71 -4.09 3 80 2009-01-15 18:05:59 2007-05-15 16:21:03 5 4 40 0 31 87 0 124.30 72 15.86 CHANGED EMPQLNFuMADPTQMGGLSMLLLAGEHALTo................PEVSSGICR.s.St.Pp.ppKSsLFpFsElSSSTSHSDsPAssKQspTSALFQ..FAEISSsTS.sQl+sA-PVKRCGKSSL.............suusptK.CtpSALFQLAEMCLASEAsKMEpS+slcsD-S ..............EMPQLNFGMA..DPTQMGGLSMLLLAGEHALGT................PElSSGTCRPD..lSES....PELRQKSPLFQFAEISS.STSHPD..AsoKQCQsSALFQ..FAEISSNTS..QLGGsEPVKRCGK........................................................SALFQL..AE.................M...CLASEGhKME-SKLhKuKES................................. 0 2 4 14 +9500 PF09668 Asp_protease Aspartyl protease Mistry J, Wood V anon Pfam-B_9589 (release 20.0) Family This family of eukaryotic aspartyl proteases have a fold similar to retroviral proteases which implies they function proteolytically during regulated protein turnover [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.55 12 445 2012-10-02 15:32:34 2007-05-16 13:33:18 5 22 263 6 291 833 24 118.70 41 29.40 CHANGED ppplpENhppAhEapPEsFupVhMLYlssclNGh.VKAFVDSGAQsoIMS.cCAE+CGlhRLlDTRatGlA+GV.Gst+IlGRlH.splKlGs.halPsuhoVlE.spclDhlhGLDMLKRHQssID .....................................ptlp-shp..A.hE...tPEsF.s..p..V..s...MLYlssclN.Gp.s.l.K.AF.V.DSGAQ.hT.IMStu.CA..-..+..C...s..l..h...R..L...l.D.p..R.at..G..l.A..p..G..V.....G..o..t..c..I.l....G..+.l...H........s.....pl..p...lts....a.l...s...C......SF.s.V..l.-........s.........p.......s..h....-..h.LlGLDhL+RHpssID............................................................................ 0 105 148 212 +9501 PF09669 Phage_pRha Phage regulatory protein Rha (Phage_pRha) TIGRFAMs, Coggill P, Iyer LM, Bateman A anon Iyer LM Family Members of this protein family are found in temperate phage and bacterial prophage regions. Members include the product of the rha gene of the lambdoid phage phi-80, a late operon gene. The presence of this gene interferes with infection of bacterial strains that lack integration host factor (IHF), which regulates the rha gene. It is suggested that Rha is a phage regulatory protein. 28.20 28.20 28.60 28.30 27.70 27.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.13 0.72 -3.63 122 1673 2009-01-15 18:05:59 2007-05-16 13:38:50 5 20 1017 \N 171 1330 10 87.90 27 38.68 CHANGED ssoShclA-hhsKcHcsVlRsIcphh...........................t........sths.a.tthp.t....s.spspp...hYhl....s+cshhhLlhuao.sttshpa+...tthIctF..pchEp.pl ....................hsoShplAchh...........sKcHcsVl+sIcplttt..............................p..h..sphs.a..psphh...........s.sp.s.cp..h.hYhl....s+cuhhhLlhuho..stts.h.pFK...tth...lptFppMEpt..................................................... 0 39 110 141 +9502 PF09670 Cas_Cas02710 CRISPR-associated protein (Cas_Cas02710) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are found, exclusively in the vicinity of CRISPR repeats and other CRISPR-associated (cas) genes, in Methanothermobacter thermautotrophicus (Methanobacterium thermoformicicum), Thermus thermophilus (Deinococcus-Thermus), Chloroflexus aurantiacus (Chloroflexi), and Thermomicrobium roseum (Thermomicrobia). 22.40 22.40 22.40 22.40 22.10 22.30 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.34 0.70 -5.83 10 109 2012-10-11 20:44:46 2007-05-16 14:25:52 5 5 85 0 64 132 12 269.30 16 69.90 CHANGED -slthuIsshpP-+VlFLso...............-pScsp..lscl+...cpsstpspp.........hhshstDssslhcsYcchcsll-+a...thcccplllDhTGGTKsMouGLslAuhsh.....chshsYVsG....csssG+lpsGoE+lpp.psPassauclEtcpAtpLaN+tcauuAhplhcsLspR.lsccps..YshhttLscuYhtWDpFcaccAh-tLc+shsp.sl...........sscppsLpphsslh+sLpshLssttsshsth.c...shsLltDLLuNAcRRAupt+YDDAuhRlYRsLELluQtcLt.shGlsTusss.....hhpclP-tLcpsYcptpsspGht.........h+IuLhsuYtLLpshGDc..lucpa.....htphsclpphlpsRNpSILAHGacs....loccpYcpltDhlpshhpth .....................................................................................................................................................................................sltth..pP..ctlh.hl.ho....................................ppstth......htp...lh......p.h..th.t.tt.........................hhhh.hs.psh..p....h.a.ct.lpph.lpph........tt..t...pl.hlDhTGGTKsMusuh.s.huuh.h....................sh.p.h.Ylsst......p.t.tps..stp................................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................................. 0 27 45 60 +9503 PF09671 Spore_GerQ Spore coat protein (Spore_GerQ) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this protein family are the spore coat protein GerQ of endospore-forming Firmicutes (low GC Gram-positive bacteria). This protein is cross-linked by a spore coat-associated transglutaminase. 25.00 25.00 65.90 65.60 24.10 20.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.69 0.72 -4.33 19 183 2009-01-15 18:05:59 2007-05-16 14:32:04 5 1 174 0 31 116 0 80.90 67 52.50 CHANGED GMLPLEQSYIENILRLNKGKpATVhMTYEpuSphGopsasGIIEAAGRDHIVISEPpSG+RYLLLMIYLDYVpFsEEITYh ......GMLP.lEQSYIENILRLN+GKpATVhMTYEpuop..h..sspsapGIIEAAGRDHIlIS-Ppo......G+RYLLLMIYLDYVpFsEEIsY..... 0 10 21 23 +9505 PF09673 TrbC_Ftype Type-F conjugative transfer system pilin assembly protein TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents TrbC, a protein that is an essential component of the F-type conjugative pilus assembly system for the transfer of plasmid DNA. The N-terminal portion of these proteins is heterogeneous. 20.90 20.90 21.10 32.00 20.80 20.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.44 0.71 -4.47 49 432 2009-01-15 18:05:59 2007-05-16 15:26:40 5 1 316 0 67 337 22 108.60 35 41.09 CHANGED hlFlShSMPppuL+phhppspph....ssslVlRGhhss...shppThstlppLhppstt.............ssltIDPphFcpasIspVPuhVhsps...........s.spt.t.......spastltGslolphALcpls ........hhFlShShP-puLKphlt-spch.....GhssllRGhsss....sLps.TscslhsLlpcssss.............ulpIDPslFppYsIpsVPulVlhss..........................................psachlpGslplttAL-pl......................................... 1 16 35 54 +9506 PF09674 DUF2400 CHP02757; Protein of unknown function (DUF2400) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this uncharacterised protein family are found sporadically, so far only among spirochetes, epsilon and delta proteobacteria, and Bacteroides. The function is unknown and its gene neighbourhoods show little conservation. 21.90 21.90 22.30 32.70 21.70 21.80 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.43 0.70 -4.66 49 423 2009-09-10 16:07:27 2007-05-16 15:34:14 5 2 396 0 109 346 215 212.90 36 86.13 CHANGED DPlplsH+apc.pDhEIuuhlsAhhAaGNtKtIlcphccL.hpLhspuPhpalhphp.....tp..hpthpsFh..aRh.supDltthhhsLpplhp..phsuLEshFt................thppssslppulppFhpthhp.h...h..hs.cal..............ssstssSAsKRlNMaLRWMVRpcs..lDhGlWcs.lssupLlhPLDsHst+lu+pL.GLhpRKpsDh+sshElTpsLRclDPpDPlKYDFALatLGhpc ...........DPlphs+hasc.......s.EhhullsAhhAaG.st+pIlphlccL.hslhs..s..s...t.h...................pcp.h.h....pphcshhYRh..stpDlhthhhsLpplhp..c...toLcphFh....................................t.h.p.p...p...p.shhpu.ltsFhpthhph......h.....hs...chh............h...pssssSshKRhNMaLRWMVR+Dp..lDlGlWpp..lpspcLllPLDTHst+luhpL.GLl......c......RK..........phDhKsshElTpsL+clsssDPlKYDFALatlG.s................ 0 42 91 105 +9507 PF09675 Chlamy_scaf Chlamydia-phage Chp2 scaffold (Chlamy_scaf) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this entry are encoded by genes in chlamydia-phage such as Chp2. These viruses have around eight genes and obligately infect intracellular bacterial pathogens of the genus Chlamydia. This protein is annotated as VP3 or structural protein (as if a protein of mature viral particles), however, it is displaced from procapsids as DNA is packaged, and therefore is more correctly described as a scaffolding protein. 25.00 25.00 47.00 46.70 22.70 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.47 0.71 -4.07 4 9 2009-01-15 18:05:59 2007-05-16 15:38:40 5 1 8 0 1 10 48 108.80 40 74.45 CHANGED IphIVtKhNtsuslpHl....tpRpspYs-CssPhDap-ALssVhcupEtFDuLPA+lRcpFuNsPcEhLp.......aLpc.cNh-EuhuLG..ll..-........hhcst.+csp.sssQppsh .INpIVAKhNuTGVlpHl....p+RpscYhDC.sPh-YpEALNlVhEApEtFDuLPA+lREcFuNsPcthL-.......FLsc.cNhEEutuLG..hl.t-tp......hhcst.+csp.sssQp.s.l.......... 0 0 0 1 +9508 PF09676 TraV Type IV conjugative transfer system lipoprotein (TraV) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry includes TraV, which is a component of conjugative type IV secretion system. TraV is an outer membrane lipoprotein that is believed to interact with the secretin TraK. The alignment contains three conserved cysteines in the N-terminal half. 21.00 21.00 21.30 22.20 20.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.77 0.71 -3.40 39 394 2009-01-15 18:05:59 2007-05-16 15:56:05 5 2 298 0 59 271 10 128.70 30 69.11 CHANGED sscFuCsussus....Ctohspshppuhtt..stp.ht........................ssssssssssssssssssshsh...s..h..............................................................Pl.Rsttclh+lhlsPahDsc..GchapsshVahhlcsu...pW ....................................................................................................................po-apCsusssss...Chohcps...ctspphpt.spp.sps.sst.th....................................sss.susps.ssssp...sppthhss...cshhs..tstpsp............................................................................................................................Pl.RosppshplWIAPahD..sp..sshapsuhVahllcPutW........... 0 16 30 47 +9509 PF09677 TrbI_Ftype Type-F conjugative transfer system protein (TrbI_Ftype) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents TrbI, an essential component of the F-type conjugative transfer system for plasmid DNA transfer that has been shown to be localized to the periplasm. 21.50 21.50 22.00 21.80 21.20 21.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.13 0.72 -4.23 11 206 2009-01-15 18:05:59 2007-05-16 16:16:43 5 2 170 0 26 141 4 108.80 52 83.32 CHANGED ++p.l.hl.hslshlslsshlohhlsp..sPslVsFDMKpTlsuFapSsupppLo-tppcshssRFsssLccpLptaptpH+slILVoPAVVpGAsDlTp-IQpslhcRhp ........................pphhhhlsus.LuhVlLNAAlSau.l...l..R.l.s..sP..l.....ssAFsMKpTVDAF.aDS.AS.Q.......KpLSEAQSKALSuR...FNoALEASLQuWQQcHHAVILVSPAVV.Q..G..A.P.DITREIQQDIARRM+....... 0 4 10 20 +9510 PF09678 Caa3_CtaG Cytochrome c oxidase caa3 assembly factor (Caa3_CtaG) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are the CtaG protein required for assembly of active cytochrome c oxidase of the caa3 type, as found in Bacillus subtilis. 21.80 21.80 21.90 25.60 20.70 21.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.53 0.70 -5.44 106 1028 2009-01-15 18:05:59 2007-05-16 16:22:26 5 6 749 0 353 921 395 237.70 26 56.82 CHANGED sshYhhuhh+hppp..stths...tRsshah.hGhhslhhshsoslshhup.hhFosHMlpHhlLthlsP.LLlLG.tPhslhh...cslssttt..........hh.hhhhsthh+hlspPhlAhhlFsushahaa.hsslashshpshhhHhlhphphlluGhLFahslls.sP...stphshht+lhhlhsshshpshhGshlhhussslhssaht...............................sas.h.ssltDQplGGhlhWhsGplshllshhhlhhpWh+pc ...................................h.hhYhhu.hhth+p+....spthss....tRh...hhah..hGhssl.hhshsoslshhup.hhFShHMhpHhlLthlsPhLLlLG.tPhsLhh...pslshsht............hh....h....h..pt....hhc....h....l....spPlhAh...llFlushahha.hs.slashsh.psphsHhhhphphllsGhlaaaslls..c................Ph...spp.hshht+lhhlhsshshpshhushlhhsspslhssahp.............................hsas..h.shlpDQplGGhlhWhsu-lshlllhhhl.hhpWh+p...................... 0 113 251 309 +9511 PF09679 TraQ Type-F conjugative transfer system pilin chaperone (TraQ) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents TraQ, a protein that makes a specific interaction with pilin (TraA) to aid its transfer through the inner membrane during the process of F-type conjugative pilus assembly. 21.20 21.20 21.20 21.20 20.80 19.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.08 0.72 -4.06 4 133 2009-09-11 14:33:19 2007-05-17 10:28:23 5 3 110 0 3 55 5 89.20 81 97.41 CHANGED MRKhRFpLPchDITGhWVhulGlaFHIVuRLVh+cP.MAhhLAElIulhhVLaGuYRlLsAWIAcsp+EE+....ARptuhhctp.-t........+ .................hpKhRFSLPRLDITGMWVFSLGVWFHIVARLVYSKPWMAFFLAELIAAILVLFGAYQVLDAWIARVSREEREALEARQQAMMEGQQEGG....HVSH.............................. 1 0 0 2 +9512 PF09680 Tiny_TM_bacill Protein of unknown function (Tiny_TM_bacill) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of hypothetical proteins, half of which are 40 residues or less in length. Members are found only in spore-forming species. A Gly-rich variable region is followed by a strongly conserved, highly hydrophobic region, predicted to form a transmembrane helix, ending with an invariant Gly. The consensus for this stretch is FALLVVFILLIIV. 20.50 20.50 20.80 20.50 19.90 19.20 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.58 0.72 -6.85 0.72 -4.52 27 936 2009-01-15 18:05:59 2007-05-17 12:31:50 5 2 143 0 116 418 0 25.00 71 52.80 CHANGED uaGu..GFALIVVLFILLIIVGuuah ...............GhGG...GF....ALLVVLFILLIIVGASCa...... 1 27 80 91 +9513 PF09681 Phage_rep_org_N N-terminal phage replisome organiser (Phage_rep_org_N) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents the N-terminal domain of a small family of phage proteins. The protein contains a region of low-complexity sequence that reflects DNA direct repeats able to function as an origin of phage replication. The region is N-terminal to the low-complexity region. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.27 0.71 -4.54 26 638 2012-10-04 14:01:12 2007-05-17 13:57:17 5 8 478 0 41 432 20 117.60 31 43.88 CHANGED WIKlpsshFDDcKIKll-pMs-sDslhhIWh+lllhuGKhNssGhlahscslPYTsEhLAhhhs+slssl+hALpshpphGMIEl.h-ssshplsN..ap+aQsh-sh-ppRcpscc+hp+pcp ........................WlKLppshFcDc+I+.hlcph.s.su..s..shhhIalKLllhu..hpspGt.Lhh...s...tp...lshstc.LAphhc.cshssVchslphhp....phGLlEh..h-.s.st...hhlss..ht.ph.sp.-sstsc+cRptRpppp...h.................................. 0 11 34 39 +9514 PF09682 Holin_LLH Phage holin protein (Holin_LLH) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry identifies a family of putative phage holin from a number of phage and prophage regions of Gram-positive bacteria. Like other holins, it is small (about 100 amino acids) with stretches of hydrophobic sequence and is encoded adjacent to lytic enzymes. 24.80 24.80 24.80 24.80 24.60 24.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.23 0.72 -3.93 13 257 2009-01-15 18:05:59 2007-05-17 14:32:53 5 1 214 0 20 171 1 103.90 41 87.97 CHANGED Msplsp.Ilssulul.lsllsGhhs+tVhcaLhKK.GGEKss+IsEIlA+sAVsAVEQlss-su.KGp-KLspA+stlpshLsphsl.phoDsplcshIEuAVKpMNssh ......................MpplsphIhssAlul...LsllsGhl....l+...sVK-YL...hpK....G...GEKsl+Is......E..ILAKsAVsAVEQlus-.suhKGcEKLspApstV+spLs+h..sI..shTDcp.l-shVEuAV+pMNDs....................... 0 5 9 12 +9515 PF09683 Lactococcin_972 Bacteriocin (Lactococcin_972) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent bacteriocins related to lactococcin. Members tend to be found in association with a seven transmembrane putative immunity protein. 22.00 22.00 22.10 23.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.50 0.72 -3.62 6 605 2009-01-15 18:05:59 2007-05-17 14:37:33 5 1 388 0 27 146 0 60.30 44 66.27 CHANGED V-GGhWsaGlG..usasaScYhHup+sHsuTslut..s+os+uhAcAGshu+AShsK..sh.spcsFY ...................s-GGsWsYG......G..........s..th...saSsYhH.sp..+hHsSolh.....ut.....spS.sKG.hAt.AGspShAhlhs...sW.utphAFY.................... 0 10 15 19 +9516 PF09684 Tail_P2_I Phage tail protein (Tail_P2_I) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent the family of phage P2 protein I and related tail proteins from a number of temperate phage of Gram-negative bacteria. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.66 0.71 -4.62 49 1210 2009-01-15 18:05:59 2007-05-17 15:44:47 5 9 872 0 232 949 32 134.60 33 58.83 CHANGED LPssu.osLE+slt.tshspl......tthPsslpsLhsPspCPsslLPaLAWuhSV...DpW-ssWsEpsKRpllcs.........Ah.lH++KGThuAlR+slcslGh..hclhEWapps........st.......PuTFplpltlppp.......slspphhtplpcllscuKPsp .....................................................LPs......h.htht...tshstl.......pph..s.s.lpsL..h.s...P.s..sC.Ps...p....hL....PaLA.WsauV...........D.cW..-.p....s.W....sEpsKRpll+s.........AhhlHc+KGThuAl.Rcll-.s.......l..Gh....hlclpE.....Waphs.........................t.............tPsp..Fclpls.l.pp.......uls.pphh.plcclls-s+ssp.............................................................................. 1 54 137 196 +9517 PF09685 Tic20 Tic20-like protein TIGRFAMs, Coggill P anon TIGRFAMs & Jackhmmer:D3PVW8 Family Chloroplast function requires the import of nuclear encoded proteins from the cytoplasm across the chloroplast double membrane. This is accomplished by two protein complexes, the Toc complex located at the outer membrane and the Tic complex located at the inner membrane. The Toc complex recognises specific proteins by a cleavable N-terminal sequence and is primarily responsible for translocation through the outer membrane, while the Tic complex translocates the protein through the inner membrane. This entry represents Tic20, a core member of the Tic complex. This protein is deeply embedded in the inner envelope membrane and is thought to function as a protein- conducting component of the Tic complex. This family also includes many proteins of unknown function from non-synthetic organisms. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.76 0.72 -4.07 122 1525 2009-01-15 18:05:59 2007-05-17 15:49:58 5 10 1098 0 439 1083 83 107.20 21 79.33 CHANGED s-+ph.uhlsH....lu..................hh.uhls..PLllallp..+-p..ssalcppu+pulNFplo...hhlhs.....llssllshlh..........................................hhlsh.......llhhlh...........lht....llh..sIl....uul+A..ppGpha+.Y..Phs.lch....l ..............................p.h.uhLsa....hu..........................as..shls...Pllla...llp....ccp.........hlctpu+pu.l.hpls....hhlhs....llhh.hl.shlh..........................................hhlsh..............lhhhhh.........hlhs....hlh...sIh.......sslKs...ps........h..h..Ph...................................... 0 153 304 392 +9518 PF09686 Plasmid_RAQPRD Plasmid protein of unknown function (Plasmid_RAQPRD) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry identifies a family of proteins, which are about 100 amino acids in length, including a predicted signal sequence and a perfectly conserved motif RAQPRD towards the C terminus. Members are found in the Pseudomonas putida TOL plasmid pWW0 and in cryptic plasmid regions of Salmonella enterica subsp. enterica serovar Typhi and Pseudomonas syringae DC3000. The function of these proteins are unknown. 21.10 21.10 23.40 23.90 20.80 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.77 0.72 -3.90 30 227 2009-01-15 18:05:59 2007-05-17 16:26:49 5 1 183 \N 68 210 9 80.10 40 72.02 CHANGED uuhAs....sAuE+-pLAtslRQL-tlcths....pRApssAs.ssts...Ra+FDYsRlpsDLpplRpGIppYLsPSRAQPRD...sssLsGDY .......s..hsssuuE+pcLuhslRQL-tlctsl....pR.A...tstAs.sssp...RaaFDYsRlpuDlpplRtGIppYLsPSR..AQPRD....sssL.sGpY................ 0 7 30 54 +9519 PF09687 PRESAN P_fal_TIGR01639; PHIST_a_b; PHIST_a_c; PRESAC; DUF3837; PRESAC; Plasmodium RESA N-terminal Aravind L,Coggill P anon TIGRFAMs, Aravind L Domain The short, four-helical domain first identified in the Plasmodium export proteins PHISTa and PHISTc [1] has been extended to become this six-helical PRESAN domain identified in the P. falciparum-specific RESA-type (Ring-infected erythrocyte surface antigen) proteins in association with the DnaJ domain. Overall, at least 67 proteins have been detected in P. falciparum with complete copies of the PRESAN domain. No versions of this domain were detected in other apicomplexan genera, suggesting that the domain was 'invented' after the divergence of the lineage leading to the genus Plasmodium undergoing a dramatic proliferation only in P. falciparum. A secondary structure-prediction derived from the multiple alignment of the PRESAN family reveals that it is composed of an all-helical fold with six conserved helical segments. There is some evidence it might localise to membranes [2]. 23.60 23.60 25.50 24.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.67 0.71 -4.08 162 210 2010-01-11 11:54:32 2007-05-17 16:34:49 5 7 10 0 203 264 0 125.10 16 30.63 CHANGED phppphsccclpchlp...sltt..h...hspcchhtla.phhphp+pcahphhppLhphhpplspch.pl........sc.............chp.pchWpcspptlppphtph-pthpppFhphl..ppp...h...sttc................FhphlpphpptWcphhpphpppa ......................phpcpclpc.hlp..pltt..h...hspcchhtla.phhphp+pcahphhppLhphhppltpph....pl.........sc.............chp.pchWpcshptltpphtph-pthpp.pFhphl...ppp....h.shpc................FtphlpphpptWcphtpphppph............. 0 83 84 162 +9520 PF09688 Wx5_PLAF3D7 CHP1606_PLAF3D7; Protein of unknown function (Wx5_PLAF3D7) TIGRFAMs, Coggill P anon TIGRFAMs Family This set of protein sequences represent a family of at least four proteins in Plasmodium falciparum (isolate 3D7). An interesting feature is five perfectly conserved Trp residues. 25.00 25.00 28.80 28.80 20.10 20.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.00 0.71 -4.24 8 26 2009-01-15 18:05:59 2007-05-17 16:45:06 5 1 6 0 26 33 0 139.00 23 60.31 CHANGED acRIluE...+sp.sacp-FslthN-pp.pphpp.scpspPh..p..-pIpclspLsspsoclWK-slcsMcpcYhcpTD..pM-+pWRDtMWpp+WsK.YL-sVHspINppLN-.sholc-KEphlspWlpWsp-DacaFLphlKE-Wcc ..........................................................................t...t...pp.s......p.ht...c..hppLhhphschWcpslpsMhppYpphT-...phspcW+phMWNppWt+.YLEtlhspIspslps.shohpcpEshlpphlphspcDFphFLp.lptcWc....... 0 5 7 21 +9521 PF09689 PY_rept_46 Plasmodium yoelii repeat (PY_rept_46) TIGRFAMs, Coggill P anon TIGRFAMs Domain This repeat is found in the products of only 2 genes in Plasmodium yoelii, in each of these proteins it is repeated 9 times. It is found in no other organism. 20.80 20.80 22.00 20.80 19.50 18.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.41 0.72 -3.63 7 34 2009-01-15 18:05:59 2007-05-18 11:17:37 5 3 2 0 34 33 0 43.80 61 80.98 CHANGED KSK+SRFPoMFKRDKKDK-sccu.......ts.ESLssD.....KSLEoLsDDscsp ......KSK+S+FsoMFK+DKpDK-sccu...............stSQEoLssD.....KSLEoLsDDscs..... 0 0 18 34 +9522 PF09690 PYST-C1 Plasmodium yoelii subtelomeric region (PYST-C1) TIGRFAMs, Coggill P anon TIGRFAMs Domain This group of sequences are defined by the N-terminal domain of a paralogous family of Plasmodium yoelii genes preferentially located in the subtelomeric regions of the chromosomes. There are no obvious homologues to these genes in any other organism. The C-terminal portions of the genes that contain this domain are divergent and some contain other yoelii-specific paralogous domains such as PYST-C2 (IPR006491). 25.00 25.00 33.90 33.40 19.40 18.50 hmmbuild --amino -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.04 0.72 -4.33 22 27 2009-01-15 18:05:59 2007-05-18 11:23:47 5 1 4 0 26 27 0 60.10 38 47.86 CHANGED Ks.SslGN+.lRuhKcIppsNEcNsIE.KpETQLpNNNs.p.+DccDsps............KcsKcsK ......KsSslGN+.hRuhKcIspoNEKNsIE.KpETQLpNNNs.p.Ks-ccspt........cpp....t........................... 0 0 15 26 +9523 PF09691 PulS_OutS Bacterial chaperone lipoprotein (PulS_OutS) TIGRFAMs, Coggill P anon TIGRFAMs Family This family comprises lipoproteins from four gamma proteobacterial species: PulS protein of Klebsiella pneumoniae (P20440), the OutS protein of Erwinia chrysanthemi (Q01567) and Pectobacterium chrysanthemi, and the functionally uncharacterized E. coli protein EtpO. PulS and OutS have been shown to interact with and facilitate insertion of secretins into the outer membrane, suggesting a chaperone-like, or piloting function for members of this family. 25.00 25.00 25.70 25.50 19.90 19.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.50 0.72 -4.05 8 624 2009-01-15 18:05:59 2007-05-18 11:52:13 5 1 539 6 60 190 1 99.80 72 82.95 CHANGED LsGCQQsssp..sPotphts........-QlpQLuoLlAGo+YLKpcCpRSDlPD-ssIh+oAlplApp+GWssts..hptLsp+ScslYpsLhcDsTPctspCupFNpphssFI-u .....MAlCANSYALoE...............................SEAEDMADLTAVFVFLKNDCGYQNLPNu.QIRRALVFFAQQN.QWDLS..N.........YD.........TFDMKALGEDSYRDLSGIGIPVAKKCKAL..ARD.......SLSLLAY.. 0 1 15 34 +9524 PF09692 Arb1 Argonaute siRNA chaperone (ARC) complex subunit Arb1 Mistry J, Wood V anon manual Family Arb1 is required for histone H3 Lys9 (H3-K9) methylation, heterochromatin, assembly and siRNA generation in fission yeast [1]. 25.00 25.00 29.60 25.60 24.80 24.00 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.43 0.70 -5.62 11 86 2009-01-15 18:05:59 2007-05-18 13:22:42 5 5 74 0 72 88 0 321.70 29 75.91 CHANGED KKKppsK..op+sKp+.PTGFEEYYsDsPlTPsEYpEEcc.lYss......RhEcsI.Rapp+RRh.-SsRpslFsKYLshGGVDsGs+hFsG...lDpcsLpshcu-pIhttpAps.l..............spcpspasVDFsuVs+GFLophhPhhhs.psp-.lphusssl+NFLsYlLhH-VCPEYpcslctAp+hCDlApcEL.p.pphttthPG-FNsAsSoLFGtshtcha..ssuWsspccDp.hhssplARp....llpFshuutst.htshca.-hsp.sshpshplps.puaEVsslphPssss+thYpph.......hssLpPlGKLluKsahsPGhst........hDhss-p.......pssssspcaEFalE-slLphCasGMKlpssVapLNsGl+aFDplhssasSFYphLsN-LMhGWKEP .................................................h.c..ssGFEEhhs-sPhTstEhtppcp.l...Ys..........RlEpsIpRapt+R+h.pspRtplFscYh.hGGl-sus+hFpG........spppL.......pp..hstpplhthpAp...ssl..............................stpp.pasVDFpsVstGFLSphh..hhs..s.t.hphussslcNFLpYllhH-VCPE..Yp-..slhtApplCphAppEL.tspph.tt.hPGpFNhAsotLFs.s...h...t.h.....p...pta..pt......tt........thsht......hhthhhus.st..ts..ph.....phht.pphpsh.h.p..tshElhslh.Pstthpphapph........................sssltPlGphhs+shh...............Dh.stp..................tt.phphalEcslLphhhsGMKlt...sslhcLss..G..lpahcphhshhsSFashL.p-hMhtaKpP.................................................................................... 0 15 31 56 +9525 PF09693 Phage_XkdX Phage uncharacterised protein (Phage_XkdX) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry identifies a family of small (about 50 amino acid) phage proteins, found in at least 12 different phage and prophage regions of Gram-positive bacteria. In a number of these phage, the gene for this protein is found near the holin and endolysin genes. 23.50 23.50 23.50 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.73 0.72 -4.43 23 600 2009-01-15 18:05:59 2007-05-18 13:28:44 5 2 404 0 44 281 2 39.20 39 73.28 CHANGED a-slKphYchG..haTp-pl+paVphphITtE-YccITG.ccY ................FccIKphYshs..hY.....op-.....p....lthaVsh..t..hITcEEYppITGccY.......... 0 22 35 39 +9526 PF09694 Gcw_chp Bacterial protein of unknown function (Gcw_chp) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a conserved hypothetical protein about 240 residues in length found so far in Proteobacteria including Shewanella oneidensis and Ralstonia solanacearum, usually as part of a paralogous family. The function is unknown. 20.40 20.40 20.40 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.52 0.70 -4.56 7 705 2012-10-03 17:14:37 2007-05-18 13:31:32 5 2 422 0 274 695 451 214.20 22 85.19 CHANGED tApsuSPhohSuNluLsSpYhFRGlo.ost+PAlQGGhDhuH.SGFYsGsWsSols....ssssshu........ssEhDlYGGass..slGt..hsaDlGlhtYhYPGupsss....Y...hsEhhuuluat.l...sh+hsau.s.s.hG............pSpsstYlshssshsls.sshThhAHlGhpcst.h.ss...uasDW+lGho.+shssshshustYhDspuc.sh............u..ts.shscsshhsolstTF ..........................................hs..........ph..ous..lsh..s..SsYha.RGloQ...........o................s..s.....p.....PAl....QG..Gl....-....hua....s....u.....G.a..Yl...G.sWuS..s..l.................sh.ss.ss.........................s.hEh.DhYuG..Ypt.....ph.s.s.....hshDlG.l..h.tY.....hY.....P......us..s..............s...s..s......................................................hsE.h.h....s....s....l..u...a...s....s....h..............pht..h...s.h..sh...s...........p..h.......h..G.................................psps.s...h...Y...h.p........h.s......h.shs.....l...s..................s....h...s...l...s..s..p.h..G.........ap..p..h........p......s..................st.................sa........Dapluls......tth................s..hshsh..thhs.ss......t..t....................................................................................................................................................................................................................................................... 0 64 153 218 +9527 PF09695 YtfJ_HI0045 Bacterial protein of unknown function (YtfJ_HI0045) TIGRFAMs, Coggill P anon TIGRFAMs Family These are sequences from gamma proteobacteria that are related to the E. coli protein, YtfJ. 21.70 21.70 22.10 21.80 21.10 21.60 hmmbuild --amino -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.80 0.71 -4.90 26 1008 2012-10-03 14:45:55 2007-05-21 11:22:22 5 2 724 0 68 382 31 154.40 61 86.22 CHANGED lplupplPsVsVs-cGELhLps..sphuYpsWsSupLsGKVRVlQaIAGRoSAKEhNusLhpAIpuApFPc-+YQTTTIlNpDDAIaGTGsFV+SShEcuK+-FPWSphVLDppGsVppAWpLppcSSAIlVlDKpG+VhFsK-GuLossElppVlsLl+p. ..................................................................h.psGppVPPVuIs-+GELlLsp..DphsYpsWNSuQLsGKVRVlQHIAGRoSAKEKNAsLIcAI....cuAp.h...PpDRYQ....TTTIVNTD.DAI.GoGh.FV+SSlEsNK+hYPWSQFllDuNG.l.u.RsAWpLs..E..cSSA..ll..VL..DKsG+VQWsKDGALTp-EVQpVlsLlpK.L................. 0 6 21 49 +9528 PF09696 Ctf8 Ctf8 Mistry J, Wood V anon Pfam-B_46199 (release 21.0) Family Ctf8 (chromosome transmissions fidelity 8) is a component of the Ctf18 RFC-like complex which is a DNA clamp loader involved in sister chromatid cohesion. 21.10 21.10 21.70 21.40 19.40 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.51 0.71 -4.33 31 267 2009-09-10 14:53:20 2007-05-21 11:57:48 5 5 235 0 194 259 0 116.80 29 75.80 CHANGED pl.lpTPp......GhsllElQGslplPp......................ppshtssphGcLpa.............p.pptp+shLaVGppQRLhGplhKLspPLull++p.t...............................t.tpspspplcll-ll+hKl.lFKsRPhPl .........................t.............thsllELQGplph.t..................................................ppshtsh.lGcLpa.......................pp.ppshLhlG.appLpGclh+Lp..+PluVlc+ppt..................................................t..ttpp.psphp...lssll+tKl.lF+sRPcPl................................................................... 0 64 107 165 +9529 PF09697 Porph_ging Protein of unknown function (Porph_ging) TIGRFAMs, Coggill P anon TIGRFAMs Family This family of proteins of unknown function is found in Porphyromonas gingivalis (Bacteroides gingivalis). 25.00 25.00 25.30 25.00 24.60 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.45 0.72 -3.84 44 420 2009-01-15 18:05:59 2007-05-21 14:53:50 5 2 119 0 136 412 109 59.90 41 22.25 CHANGED Wcls.pEoKpIusYpChKATssh.................................................cthslpAWYTspIPlspGPtpahGLPGLILE ...................................................................................................................Wpl.t.s-.s.+p.I.tsY.pCpKAsspa............................................................................pGRpapAWaTs-IPl.sp.GPa+atG.LPGLIl... 0 37 118 136 +9530 PF09698 GSu_C4xC__C2xCH Geobacter CxxxxCH...CXXCH motif (GSu_C4xC__C2xCH) TIGRFAMs, Coggill P anon TIGRFAMs Family This motif occurs from three to eight times in eight different proteins of Geobacter sulfurreducens. The final CXXCH motif matches the cytochrome c family haem-binding site signature, suggesting that the sequence may be involved in haem-binding. 21.10 21.10 21.30 21.10 20.60 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.60 0.72 -3.78 135 404 2012-10-01 23:37:15 2007-05-22 11:49:07 5 30 15 0 299 381 5 36.50 38 11.69 CHANGED oCossYCHusuts........................s.tsss...Wsssssst........C..ssCH .oCossYCHosGpu......................ssshssPs....Wsssssst........C..suCH 0 42 180 299 +9531 PF09699 Paired_CXXCH_1 Doubled CXXCH motif (Paired_CXXCH_1) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents a domain of about 41 amino acids that contains, among other motifs, two copies of the motif CXXCH associated with haem binding. This domain is predicted to be a high molecular weight c-type cytochrome and is often found in multiple copies. Members are found mostly in species of Shewanella, Geobacter, and Vibrio. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.12 0.72 -4.44 178 1252 2012-10-01 23:37:15 2007-05-22 12:40:01 5 102 240 0 748 1592 142 43.00 29 19.96 CHANGED HsPltpu.t..CssCH......sPHuS.s..pstlL..............ptssspLChsCHspttt ............................h..ts..p.....CssCH..........................sPH...uu..s.......ptthL............................ptstsp.lC.hpCHsp...s............ 0 318 549 694 +9532 PF09700 Cas_Cmr3 CRISPR-associated protein (Cas_Cmr3) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR associated) proteins. This highly divergent family, found in at least ten different archaeal and bacterial species, is represented by TM1793 from Thermotoga maritima. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.17 0.70 -5.03 33 217 2012-10-01 21:23:39 2007-05-22 12:54:02 5 3 185 0 98 248 5 321.60 16 88.35 CHANGED hhlpPhDslhFR-uRPFs...susps...stuhhhP.PpTlAGAlRsthhppsshph...sth.t.........................lplh.GPhlhc...........ptchhaP...tPhchhh.........hcppsshhphhtlc..........................ht.hhlsst...............tsthp.hsua.lshpslhp.aLpuch.ptp.........plhptt.............hhphEpRlGlulcspppss..........cEutLYpsp.hlRhp....................tshslulhlcsssssph...................tshhpLGGEsRhu......thch..........tth.sh.pt.ttshtpspphtl..hLhTPuhFspuh..hst..................................lsusul..s+sh....hlS.GWDh.tppcP+shp.......hh......sPsGoV...............aahchtpststtt.pttt........................tp.....hpp.GaGphlhsth ........................................................................................................................................hpPh-shhhtpspsFt.........ttt.........ht..s.h.P.spshhGhlpsh....hh.tt.t...................................................h.tlh.Gshhht..............ttphhhP...hPhshhh..................tttt...h...hp....................................h.......................tt.........t.a..hthpth......t..hhpst....t.....................ph.t................................ht.c.+htlslct..pppps...........c-..uhLaptp..hlc.ht..........................tthshshhlphttttt.........................tthhplGGEs+.hs.....tlph.................................th.p..t..htt..t........t...s.pthtl..hLlTPuh.h.p...p.sh.....hsth..............................................t....lhhhsh.....s+.h..................lu...Ga....ch....t....pp...p..s+sht..........................hh..........lssGoV...............a.hhc...h..t.t......................................t......pp.Gauhhh.............................................................. 0 59 78 92 +9533 PF09701 Cas_Cmr5 CRISPR-associated protein (Cas_Cmr5) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family, represented by TM1791.1 of Thermotoga maritima, is found in both archaeal and bacterial species. 20.40 20.40 20.70 21.10 20.20 20.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.51 0.71 -3.94 25 134 2009-01-15 18:05:59 2007-05-22 12:57:38 5 1 127 7 72 132 5 120.80 23 88.87 CHANGED Tl-QppAptAappl...................pplppt................cphpccYtshs+cLPshIhpNGLsQsLAFlhuKucpptctt...........................LhccLspaLpppst...............sllctlhpt...-hppYhthTpEsLuhLsWl+RaApuhL .............................hppphAphAhptl...................................................................pp..htp.................pphpccYpshs+phPshIhpsGLspslAFhhuKsppptpt......................................lhppltph.....lppptt......................................slhct.l.hpt...s..tpYhhhTpcsLthhsWlKRaupuh........................................................................... 1 47 59 67 +9534 PF09702 Cas_Csa5 CRISPR-associated protein (Cas_Csa5) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry represents a minor family of Cas proteins found in various species of Sulfolobus and Pyrococcus (all archaeal). It is found with two different CRISPR loci in Sulfolobus solfataricus. 21.70 21.70 22.30 22.60 21.60 21.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.23 0.72 -3.95 5 26 2009-01-15 18:05:59 2007-05-22 13:02:21 5 1 25 0 13 27 0 100.10 43 74.66 CHANGED sFlYoEoPTYVDRIuNALSKEAVs+VLaEupRIlpoGl-uGEIcttss.........sGR+....YlsV..tEK-G.sYIllGtLPSDcDVEpFLc-VERDIYhARKVGALAMAhsN+ ..h.FlhoEosolVDRhANALS+EsVs+sLaDs.Rllphul-pu.EIss..h.........pG+c....Yst.s.hppp-G..chhllGhLPosp-lE.FLchlccDlhhARKsGALAholss+.............................................. 0 4 4 8 +9535 PF09703 Cas_Csa4 CRISPR-associated protein (Cas_Csa4) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR loci appear to be mobile elements with a wide host range. This entry represents a protein that tends to be found near CRISPR repeats. The species range for this species, so far, is exclusively archaeal. It is found so far in only four different species, and includes two tandem genes in Pyrococcus furiosus DSM 3638. CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. 25.00 25.00 97.10 96.10 20.30 19.80 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.11 0.70 -5.48 6 13 2009-01-15 18:05:59 2007-05-22 13:06:21 5 1 12 0 10 13 0 355.20 23 95.95 CHANGED lshhhTPGas.hhDThIhYGlVcsLstuGhssu.cVhshGpcYhIps-ssspt.hp...Eplc........huLhpshEEh...HhAhhscpsp..ss+lhpstDhssGssIsss..hplahctlstpLphlpcphctu++uscuc......thsTlsLsLhPshGKYh...lcphshpEspsh+VsphsYALAhlGFaY..YushlhhpcGcsplVplhshs....sh--LshlphLs.p-Lspcl.hsthschc.hlsp.hu.LYhLhhoEol....ElsocppFsllsYsh.EpssNsptlRsFtsl-lu+lh-FlhpLKthshYcshth.+hl-pL...........h+ts.ElhtpLh-slha-..s.tuhYoslRtl......p+ushsu...c..hlsslt-hlsph .....hhTPGHshIsDsLIhaGlVctL.stuGthcu.pl..hG-cYhlps-hsstphh....pshc.........tLhphhcEh...+hshht.psp..tshlhpshshshstshss.....sah.....cplstpLpp.lccp..hcp.t++phcuc........thhTlhLsLtPshGKYh.............lcsas..lpp.statVCshCaALAhlGahY..hoshl..phcc.....Gpp....shVplhths....sh-clshhplLslpchtppl.hhhhsp.p..lsphhA.LYhLshuEol.....hsscpshsllsYsh..p+sGNs..Qthpuhtph-ls+L.h-FlhplKt..hYcs.ph.+hl-sL...........hcc....s.EsLtsLh-slhac..p.tshYpslRpl......c+ushstp......ht.l.phh.................... 0 6 8 8 +9536 PF09704 Cas_Cas5d CRISPR-associated protein (Cas_Cas5) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This small Cas family is represented by CT1134 of Chlorobium tepidum. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.51 0.70 -4.72 102 1346 2012-10-01 21:23:39 2007-05-22 13:35:06 5 3 1181 1 402 1115 32 210.50 18 87.80 CHANGED slhl+lhG.shtsa.upsthtspRs....ohshPohSulhGlLsush.............uh.tt...................hpsslpaslct...........pptsplhp..sh.......pssp...................................t....tttttsshls.............pttYhhcAtahlshpsssp...........................hhp.hpctlccsthhhh..LGp+ps.s.stslp..................hps...pshh.......pshpphs.......................hthhh..........................................t..st..hpchss.......hps...phphuhhpl. .......................lhhplhG.shAsa.spsthtttRt.....oaplPohSAlhGlLuAsh...................Ghtptt...................................hp..p.hphslpt.............................spp.sp.hhp....sh.........posp............................................hh.t......t..t...phhssh..lp..............................................ptpYhh.-A....h.....ahl.sl..tsssp...........................hhlp..lpc.sl..c..c..s..h..h...hh...LGp+pp..s..s.slt...............hps......psh....................pshp.ph.............................................................................................pt.s...............t...h..u.h................................................................................................................................................................................................................................................................................ 0 160 289 348 +9538 PF09706 Cas_CXXC_CXXC CRISPR-associated protein (Cas_CXXC_CXXC) TIGRFAMs, Coggill P anon TIGRFAMs Domain CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes a conserved region of about 65 amino acids from an otherwise highly divergent protein found in a minority of CRISPR-associated protein regions. This region features two motifs of CXXC. 21.00 21.00 21.20 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.59 0.72 -3.80 10 84 2009-01-15 18:05:59 2007-05-22 14:03:33 5 2 83 0 35 86 4 64.70 28 12.16 CHANGED cscsphsChsCGcRpspphKs.ht.h.cosh.PLhuS.ushtNaFWsspsssslCPlCpllhphsPhuhh ....................h.pctph.ChhCu...p..t..t.....hc...s...t.hshhtcss...........a.....s.sctpsahWsh.p.s.shslCslCpllatssPhGhh........ 0 14 21 26 +9539 PF09707 Cas_Cas2CT1978 CRISPR-associated protein (Cas_Cas2CT1978) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a minor branch of the Cas2 family of CRISPR-associated protein which are found in IPR003799. Cas proteins are found adjacent to a characteristic short, palindromic repeat cluster termed CRISPR, a probable mobile DNA element. 21.80 21.80 22.10 26.80 21.50 19.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.74 0.72 -4.04 33 643 2009-01-15 18:05:59 2007-05-22 14:09:50 5 3 629 0 107 344 9 83.70 58 72.52 CHANGED MhVllscslPs+LRGcLuhWhlElcuGVYVG.shSs+VR-hlWpplsch....hpc.GsuVMsassss.EpGashpThGcscRtsVDhDGlhL ........MlVlVsENVPPRLRGRLAlWLLEVRAGVYVG.csSpRlREMIWpQlsph.......sss.GNsVMAWATNo.EuGFEFpTaGcNRRhPVDlDGL+L... 0 28 75 92 +9541 PF09709 Cas_Csd1 CRISPR-associated protein (Cas_Csd1) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR loci appear to be mobile elements with a wide host range. This entry represents proteins that tend to be found near CRISPR repeats. The species range, so far, is exclusively bacterial and mesophilic, although CRISPR loci are particularly common among the archaea and thermophilic bacteria. Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. 21.20 21.20 30.00 29.90 21.00 20.40 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -13.01 0.70 -6.22 36 393 2009-01-15 18:05:59 2007-05-22 14:22:25 5 3 343 0 123 375 18 525.50 25 96.60 CHANGED LhchYpRhtppst............lsPhua.stcpls...ahlsLstpGshls....shcptcs+cth...h.lPpst...pRo.oult..sphLhD+hsYVLGs.s...........ptttcptpt.........apuFhphlpp.hsss...p-tslp...AlhpFLcp....s..htshh..............thpp..............................p.tss.lsF+l-.ussp...hlaccsslpphWtphhssp...ps.........ps.lCLVoGcps.slAchHPshh....tspusG..ApllSFNps.uFsSaG+..p.....pupNAPlShpuuhthssALsaLlp+pppp+............plGD.sssVFWAppss............hpsshts.hshh..........tcsss................psspplpthlpphhsGhtsp.sss.....scaalLGL.usNuuRlulRaacpsshuchhc+lppaac-hthhp.t...................hhpsPt.hslhpllhuss...........+s-s.lsP..sLuucLhpuIlsGp...aPpsLlpsslpRhR...upp..............phsh.+suhl+AsLhRph+hpppt........hshuLDpspsshuYLhGRLFAlhEplQcpAl...sG..lNATItDRaauuASssPsslF.hLhcttps.HluKLtpcp.Ghhh.h..-+hlsEIhstlss...sFPsshsLspQGpFslGYYpQRpshap ..................................................LhphYpph.ttst....................l.s.ua....spppht.....hhlslspcGphhp.........hp.....h..p...tcpth...............h.lPts......pRo..osht.........sphLhDphtYhhGh.t...................................pt..pttp..ttt.................apsahphhtp.hhp......ps......t...lp.....Alhpalpp......ts.hht...................phtt....................................thhss.l...hFplp.tttt................hlhpctshpph.atphhpsp........................pst.......tt.hChloGcps...sluch.Hsshh.......tspsss.Ap..llShNt..........s..uFs.h.G+..........p.........pstsuslutpsu.thpsALpaLlpppt.ph............................plGs.tshVhWupssp..............h..p.h.....h..shh........................t..p.............................pttpph....tphhp...p..h..h..pGh.....hp..s.........tph.alluL.uss..suRlul+aahphshsphhppltpaapc.hth.t...............................htt.s...sl..hphlhtss....................pscp...h.s.......tlhtplhpullsGp...hP...slhpthltRh+.......utt................p.hs.h.+.....s.th.l..pshLh.+phph.tt.................hshsLspp....p..p.shuYlhGRLhAlhE..plpptAh......................s...hpsTht-RahsuhuspPhpsashLhphhp...ahp+.l.p.p...t...........s.....h...........h.............cc.ls-lhstls................sa...s.p..phsLp..u.tFhlGYYpQ+pthh.......................... 0 51 88 107 +9542 PF09710 Trep_dent_lipo Treponema clustered lipoprotein (Trep_dent_lipo) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of six predicted lipoproteins from a region of about 20 tandemly arranged genes in the Treponema denticola genome. Two other neighbouring genes share the lipoprotein signal peptide region but do not show more extensive homology. The function of this locus is unknown. 24.70 24.70 24.80 37.10 24.60 24.60 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.40 0.70 -5.79 4 20 2009-01-15 18:05:59 2007-05-22 14:31:14 5 1 2 0 8 20 0 282.90 34 80.84 CHANGED sKLKLIFILhLAVLLFSC.oKElKEtpphcscV.SshhhEsK.....Esph.SKsE.spaV..............poP..E.IK.LEpK....ctat.pL+plthELDch-.lh..sYcp.sthuIDpLspQK.........NlEhI...clc+DsE.hYCK.KtsspNGtpL.YplsYcWYshtphalshsY-LLpEpNhstIs-clIcpQVHGplhphcpD.cK-chIKlFEcY.Np..............Ttl+ppchtpYsh+lLDYVKGNFTNSGYDEYhVhFhp-................ssDsEl.D.......paIccVcCFlV-psKlIKsYhIsspuuhFhPs.hppssL.t.........lpsFGhpFSQGWlADFNQNGINEIY.ltahphtsstlhhlEF.DppFVpthlhsps.-lsuVDWaKKpI..........ll+DcotsscWhc-.YQWN-s.+palL ..sKLKLIFILlLAshLFSC.SKElKEpp.EcscVEossKhE.K.........psEh.uKt...Ehspch..............cos......EtIK.L.E-K....Ktat.plp.l.hpLDhh-.hh..th.....hshs.h...p...............hphh...ch.p-tc.h.hh.ph.....t.h.Yph.hp......h.p....................................................................................................hFh....................tsph.p.........lp.lthFlh.ttphhp.Y.l......h.....t...h...............t..h..shluDhNQNGhNEIa..............h..s.hh.hEa.tt.F....l....t..h.slDW.pphh................................................................................................................................................... 0 8 8 8 +9543 PF09711 Cas_Csn2 CRISPR-associated protein (Cas_Csn2) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR loci appear to be mobile elements with a wide host range. This entry represents proteins found only in CRISPR-containing species, near other CRISPR-associated proteins (cas). The species range so far for these proteins is pathogenic bacteria only. Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). 20.90 20.90 21.20 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.11 0.71 -4.61 5 132 2009-09-11 11:10:28 2007-05-22 15:11:46 5 1 132 12 11 105 1 185.20 44 84.50 CHANGED lVKahYQYDEDSELKlFDcKhKSLKuSELMLVTDILGYDVNSuslLKLIHADLEsQFNEKPEVKSMIDKLsATITELIuaECLENELDLEYDEITILELIKALGVKIETQSDTIFEKhFEILQVYKYLoKKKLLVFVNSsAYLTKDElhpLpEYIsLoQhsVLFLEPR+l-GFsQYILDcDYFLIsEN ...............................................................llp.hYQYptpp-LKlF..Dtc.+sL.K.toELh.llTDlLGaDlNSsshLKLIauDLEsQ.....L.N-KPEVKSMI.-..KLsuT.IoE.L.Iu...a.E..h...LE..p.ELDLE.h.D.EITllELhKALGlKIE.Tp.S.D.T.lFE..Khh.E.IlQVa+YLoKKKLLlFlNssuYLTc-EltpLhEYIpLsplpVLFlE......s...R....c.......l.........s..h.s.Q.....YllDpDaaL..c......................... 1 2 6 9 +9544 PF09712 PHA_synth_III_E Poly(R)-hydroxyalkanoic acid synthase subunit (PHA_synth_III_E) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents the PhaE subunit of the heterodimeric class (class III) of polymerase for poly(R)-hydroxyalkanoic acids (PHAs), carbon and energy storage polymers of many bacteria. The most common PHA is polyhydroxybutyrate but about 150 different constituent hydroxyalkanoic acids (HAs) have been identified in various species. 22.10 22.10 22.40 22.20 21.60 22.00 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.97 0.70 -5.01 10 111 2009-01-15 18:05:59 2007-05-22 15:24:40 5 1 108 0 41 130 21 237.60 22 79.03 CHANGED sDahEhQRpYWsuWu-suR+uhu.........t.tupssssWpcul-.Whcthussusspspshh-chhcQupsaathu-phhpt..ttsttp..........LcQhtcpFuG.hp.....tssotpchsshWphPl-s.....aQphhSohpshssshhpsh........hpthppphsphLssPuLGhsREcQuQhQpLhRsth-YQpAhpcYssthsplu.cul-+htspLptshsSupsls..SsRALYDpWlcssEpuYu-hlto--YsplaGcLlNApMpL++thQphlDchhpuLshPTRpELcosp+RLpELRRcp+tLp+ ..........................................................................................................t.....................................................................................................................................................hpt.h....t.t.............................t....t.h.t.a......tt..................ht...ht..s.......s.s.ht.h..............................p.h.ptt.t.hh.....p..PhhG.s+p..ptph.t.hhcs....ph.tt.ttY.t.htpsh.puhtphtpcl.ph.ppspplp..oh+thh-hWlpss-csatchhtocpFtcshGphlsuhhch....+tphpchh-phhcphshPTRuElDsltc+ltELcRcl+pl..p.................... 0 13 26 35 +9545 PF09713 A_thal_3526 Plant protein 1589 of unknown function (A_thal_3526) TIGRFAMs, Coggill P anon TIGRFAMs Domain This plant-specific family of proteins is defined by an uncharacterised region 57 residues in length. It is found toward the N terminus of most proteins that contain it. Examples include at least several proteins from Arabidopsis thaliana and Oryza sativa. The function of the proteins are unknown. 25.00 25.00 25.00 25.50 24.00 23.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.70 0.72 -3.83 15 206 2009-01-15 18:05:59 2007-05-22 15:30:56 5 3 36 0 120 187 2 53.00 50 20.53 CHANGED VQpLIE+ClphYMS+EEsspsLpcpAsIsPhhTps........................VWppLEKENtEFFcsYtp+ ..............VQpLIE+CLphaMspcEslc.sLpp+.ApIcPshTph.........................................VWpcLEcENpEFF+AYhh................. 0 27 75 99 +9547 PF09715 Plasmod_dom_1 Plasmodium protein of unknown function (Plasmod_dom_1) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent an uncharacterised family consisting of a small number of hypothetical proteins of the malaria parasite Plasmodium falciparum (isolate 3D7). 21.30 21.30 32.20 32.20 20.80 19.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.20 0.72 -4.04 5 9 2009-01-15 18:05:59 2007-05-22 15:47:30 5 1 1 0 9 9 0 66.30 39 31.54 CHANGED oFFKRAKLlL-sFDNIFIDKLIDuNIpNKuSsl+EDVh-NuLsLssAsI.hhAIPIaoYlsKRINFF SFFKKhKLlhshhDsla.DhLIssslpstu....EhV..ssLuhhsushP.huhshhsYlspRINFh 0 9 9 9 +9548 PF09716 ETRAMP Malarial early transcribed membrane protein (ETRAMP) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent a family of proteins from the malaria parasite Plasmodium falciparum, several of which have been shown to be expressed specifically in the ring stage as well as the rodent parasite Plasmodium yoelii. A homologue from Plasmodium chabaudi was localized to the parasitophorous vacuole membrane. Members have an initial hydrophobic, Phe/Tyr-rich, stretch long enough to span the membrane, a highly charged region rich in Lys, a second putative transmembrane region and a second highly charged, low complexity sequence region. Some members have up to 100 residues of additional C-terminal sequence. These genes have been shown to be found in the sub-telomeric regions of both Plasmodium falciparum and P. yoelii chromosomes. 25.00 25.00 25.50 25.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.90 0.72 -4.03 32 90 2009-01-15 18:05:59 2007-05-22 16:41:14 5 5 16 0 54 94 4 83.20 24 37.35 CHANGED MKlsKl.hhhhshLLulpll.............................................sPsh.ssss.tspss.................................+thtphpsshp+K...hcppKhhllSoluoslAl..llusul.GhuhYpppKpp ..............................................MKlsph.hhhhhhlhh.hphh......................................................................ssh.spts.t.tppts........................................tslcphc.pslpcK...h+pcKhllloo..luoslsl..llussl.GlshYhp+Kp.s...... 0 13 25 44 +9549 PF09717 CPW_WPC Plasmodium falciparum domain of unknown function (CPW_WPC) TIGRFAMs, Coggill P anon TIGRFAMs Domain This group of sequences is defined by a domain of about 61 residues in length with six well-conserved cysteine residues and six well-conserved aromatic sites. The domain can be found in tandem repeats, and is known so far only in Plasmodium falciparum. It is named for motifs of CPxxW and (less well conserved) WPC. Its function is unknown. 21.10 21.10 22.50 21.90 20.90 20.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.83 0.72 -3.35 72 270 2009-01-15 18:05:59 2007-05-22 16:49:37 5 9 13 0 270 301 4 60.40 28 48.96 CHANGED Cp.....cs.Ysp..CPtsWh...........tspst.....Chus..ssY....pGs..Cpphh.pFpp.hspppKppauppCplpWPChp .......................ps.Ys.....t..CP..hsWh................hspsh............Chus...psY.................pGs..Cpp.....hh..shps..h..s.pppKpp.....apppCplpWPC................... 0 84 120 223 +9550 PF09718 Tape_meas_lam_C Lambda phage tail tape-measure protein (Tape_meas_lam_C) TIGRFAMs, Coggill P anon TIGRFAMs Domain This represents a relatively well-conserved region near the C terminus of the tape measure protein of a lambda and related phage. The protein, which controls phage tail length, is typically about 1000 residues in length. Both low-complexity sequence and insertion/deletion events appear common in this family. Mutational studies suggest a ruler or template role in the determination of phage tail length. Similar behaviour is attributed to proteins from distantly related or unrelated families in other phage. 21.80 21.80 21.80 22.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.26 0.72 -4.07 62 1382 2009-01-15 18:05:59 2007-05-22 16:53:26 5 11 585 0 77 1312 114 76.70 38 9.36 CHANGED pusWhsGs.ppuhtsah-sApssuup...spphhosAFsuhpsulssFlsTGKhsFcs...........assSlluDlu+lssptuhssslsu .......................pGsWhsGl.ppGaupat-sA.sD....shuQ...scsssTpsFsGh....up....shAshlT...such...s...aRu...........F...T.pSVLu.hocIlh+tAhlshlt............................... 0 15 38 57 +9551 PF09719 C_GCAxxG_C_C Putative redox-active protein (C_GCAxxG_C_C) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a putative redox-active protein of about 140 residues, with four perfectly conserved Cys residues. It includes a CGAXXG motif. Most members are found within one or two loci of transporter or oxidoreductase genes. A member from Geobacter sulfurreducens, located in a molybdenum transporter operon, has a TAT (twin-arginine translocation) signal sequence for Sec-independent transport across the plasma membrane, a hallmark of bound prosthetic groups such as FeS clusters. 21.30 21.30 21.30 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.30 81 793 2009-01-15 18:05:59 2007-05-23 10:44:21 5 8 499 4 237 679 25 120.90 26 77.02 CHANGED asCupulltuhs-phuhs.ss....sl+huuuFGuGhG...sGssCGAloGuhhslGh..................hhGcsp...............ttttppchhthsp-lhctF+....ccaG..uhpCcpLhtt..................tt..tpppCsplltpssc.hlt-hL ................................asCupulltsh....s....-ph...shsh..pp.....shchuuuF...u.uGhG..tht.ssCGAloGuhhslGh..............................hhGcsp.......................ttttptpshthsp..........-lt.p..p.Fc......................pc...hG.....ohhCppLhst............................tp..ttppCsphltpsschhtchl............................................ 0 118 214 231 +9552 PF09720 Unstab_antitox Stabl_TIGR02574; Putative addiction module component TIGRFAMs, Coggill P anon TIGRFAMs Family This entry defines several short bacterial proteins, typically about 75 amino acids long, which are always found as part of a pair (at least) of small genes. The other protein in the pair always belongs to a family of plasmid stabilisation proteins (IPR007712). It is likely that this protein and its partner comprise some form of addiction module - a pair of genes consisting of a stable toxin and an unstable antitoxin which mediate programmed cell death - although these gene-pairs are usually found on the bacterial main chromosome. 22.40 22.40 22.40 22.40 22.20 22.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.32 0.72 -4.28 83 291 2009-01-15 18:05:59 2007-05-23 14:36:35 5 3 140 0 122 319 88 54.20 24 70.64 CHANGED pLsspERlpLl-pLhcSLsp...s.ps..c.lsphatpElccRhpthcsGpspslsh--lh ...........................pLs.pERhtLs-pLh.cSLst....s.ps..........p.lsphatpEhccRhpphcsGpsp.slsh--lh....................... 0 43 74 117 +9553 PF09721 Exosortase_EpsH Transmembrane exosortase (Exosortase_EpsH) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are designated exosortase, analogous to sortase in cell wall sorting mediated by LPXTG domains in Gram-positive bacteria. The phylogenetic distribution of the proteins in this entry is nearly perfectly correlated with the distribution of the proteins having the PEP-CTERM anchor motif, IPR013424. Members of this entry are integral membrane proteins with eight predicted transmembrane helices in common. Some members of this family have long trailing sequences past the region described by this model. This model does not include the region of the first predicted transmembrane region. The best characterised member is EpsH of Methylobacillus sp. 12S, where it is part of a locus associated with biosynthesis of the exopolysaccharide methanol-an. 25.30 25.30 25.30 25.40 25.10 25.20 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.97 0.70 -5.18 102 524 2009-01-15 18:05:59 2007-05-23 14:54:15 5 4 392 0 259 557 185 236.50 17 69.36 CHANGED hlhshhlhh....ah..h.t.........hhttW..hp.s.....tphsHuhllhslsha.lhatpp.t.h...tt...ts..h....ulhhlhhuh..hhalhuph.......sth.hhhtthuhshhlhuhhhhhh...............Ghphhp............th......hhPlh.hllhhlPhs..thl......hs.hs.sLpthsuphsshhLphhGhslhh......c..........Gshl.....hlsstp....lpVspACSGlchlhshhsluslhshh..hptshhp.+hhhlhsul.slsllsNhl....Rlhhlsl.ls...hhs.........................phu..tshhHthhGhlhFssshhhlh.hlsthhtph .........................................................................hh................................h........................h.hh..h.hh...hhh.......h.....................................shhhhhhhh.....hh.hhht.................................h..hsh.h.hhh..hshhhhhh...........................Ghthht................................hh.........hhslh.hhh.hh..l.Ph........h............h...h.h.....sLpths...uthss..hl..l.p..hh..G.....h..s...shh.......p................................Gshl.....hl.s.stp.......lpVst..uCoGlph.lhhhhulshhhh.h........h.tshhc...+l...h.h....hhh.ul....slsh....hsNhl....Rlhhlsh..ls......hhs...............................phh....hshh..H.p.h.h..G..hlhahhhh.hhlh.hhh......hh...................................... 0 88 205 245 +9554 PF09722 DUF2384 CHP2293; Protein of unknown function (DUF2384) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this family are found almost exclusively in the Proteobacteria, but also in Gloeobacter violaceus PCC 7421, a cyanobacterium. The function is unknown. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.39 0.72 -4.11 120 1010 2009-01-15 18:05:59 2007-05-23 17:01:51 5 9 668 0 338 835 106 54.10 24 37.91 CHANGED hppspsla.Gsp..........ptAppWLp.pPshu..LsGppPl-llp.o..ttGhptVtphLspl.ctGl ...............................................h...h..lh.sst..........ptA.tpWlp..pPstu..LsGppPl-llt..s....suhhtVhphLsthctG................... 0 84 192 261 +9555 PF09723 Zn-ribbon_8 CxxC_CxxC_SSSS; Zinc ribbon domain TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents a region of about 41 amino acids found in a number of small proteins in a wide range of bacteria. The region usually begins with the initiator Met and contains two CxxC motifs separated by 17 amino acids. One protein in this entry has been noted as a putative regulatory protein, designated FmdB. Most proteins in this entry have a C-terminal region containing highly degenerate sequence. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.56 0.72 -4.00 182 1432 2012-10-03 10:42:43 2007-05-23 17:08:51 5 3 985 0 622 1298 568 41.50 36 43.22 CHANGED MPlY-YpCp..pC..GppFEhlpphu-..ss.ssCPpCuu..p..hp+hlS ...MPhYcYp.......Cp..sC..GppF-h.l.p.p...hu-......sshssCP..pCuu..phc+lh................... 0 254 465 555 +9556 PF09724 DUF2036 Uncharacterized conserved protein (DUF2036) KOGs, Finn RD, Coggill PC anon KOGs (KOG0798) Family This family of proteins includes members ranging in size from approximately 300 to 460 residues. There are a number of well-conserved domains along the length. 20.50 20.50 21.30 20.60 19.10 19.80 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.01 0.70 -5.29 22 306 2009-09-11 06:41:13 2007-06-15 09:23:13 4 5 268 0 216 306 2 281.80 22 80.09 CHANGED sYpLLplsscllcplc......pu....p..sLtl+Gt..................s-cu..VLCocs+TaslKthcpSNolLLh.thh.s..................................p..tst....phphhshspphhElpp..h.psphcplcp..lthshasG..cstt..ch.t.............ashcpLlsssQsScpEhpptLpp.lsuhpl-.GthplLs.cahh+hLshllthlsspuhsh..s....clshpsshpslt.s...........hsppllcsslpha....upp.s-..................hapLctpplsphhuhtlLpphh.........phplc-...........Fh.pWppplP.......shshchchLcGhsllp...........pstsp...plphls.psL.Ppshp-RFppLFph+p.cWsh--lpPaIc ...............................................................................................hpLlcL.ssplhp.lp......................ps.........p.........pLhl+ut.........................................................s.-ps......VLCops.cTapl+tspp..SNslllh..h..stt................................................................t...............thphhuhh.pphhElpp.......h....tsphp...tlt.h..h..s.apu..p........ptt.p.t......................................................hshppLhpph.sSptEh.pthp.p...htu...h.............h......p....G......h................hp.h.lp.ph.hphl..phlht.h.t.ps...hth..p.....................p.hshpphhptlt....................................................spthhpth.l.p.ha........sp...t................................................................t.........hhplptptl...sphhu..h.Lpth......................ththp-..........................Fht.Wppt.lPt...............th.sp.hp....Lp.Ghhh................................................t.st.t..hl.hh..ppL.s..p.....ttRht.LFphp..pWp.t-htPal................................................................... 0 74 121 178 +9557 PF09725 Fra10Ac1 Folate-sensitive fragile site protein Fra10Ac1 KOGs, Finn RD, Coggill PC anon KOGs (KOG1297) Family This entry represents the full-length proteins in which, in higher eukaryotes, the nested domain EDSLL lies. Fra10Ac1 is a highly conserved protein, of unknown function that is nuclear and highly expressed in brain [1]. 20.90 20.90 21.50 26.10 19.20 18.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.75 0.71 -4.01 11 188 2009-01-15 18:05:59 2007-06-15 10:13:15 4 8 151 0 128 192 2 114.90 55 41.83 CHANGED +TDhDlLKccH+Fl+D----sc.........sWtcpLA+KYY-KLaKEYsIhDLS+Y......+csKl....................GhRWRTEsEVlsGKGphhCGs++C.....................................................pcspsLcoaElsFsYhEpGccKsuLVKlRlC.cCuhKLNY .......................................+TDhDll+EsHRFlhs-E..-.-...-.......................oWEpRLAK+YYDKLFKEYCIADLS+.Y......K-sKh....................GhRWRsEcEVlsGKGQFhCGN++.C..........................................................................................................s.c..c-sL+SaEVNFuYhE.cGEp+sALVKlRLC.cCohKLNa.................................... 0 58 75 104 +9558 PF09726 Macoilin Transmembrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1821) Family This entry is a highly conserved protein present in eukaryotes. 32.00 32.00 32.40 32.70 31.60 31.20 hmmbuild -o /dev/null HMM SEED 697 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.29 0.70 -6.25 5 207 2009-01-15 18:05:59 2007-06-15 10:14:40 4 6 88 0 126 181 0 441.60 33 90.48 CHANGED KRRNs-CGK..RRPlKRs.KhAEGlsuSTFLYIKFLllWshVLLADFlLEFRFEYLWPFWLLIRSVYDSFKYQGLAFSVFFVCIAlTSDlICLLFIPlQWLFFAASTYVWVQYVWHTDKGICLPTVSLWILFVYIEAAIRaKDLKN.PH.lDLCRPFAAHCIGYPVVTLGFGFKSYVSYRhRLRKQREVQKEN-FYMQLLQQALPKE..s+sLpss-KEsoEsoocuhToscsllsspsstssssssKlSs.shsplthp-pupcptp...sssppp+slupsNs...pspsl.......suslQ-lEhhps+oNuS+hoss.hsGsspshsh+cc......opuSSssptossSKsSsosptstts.uNSSP+SHsss.............................................................................................................NGSVsSSSsscN.........-cKpKpuupssutth..c..h...ssscsVcsshssNp...lSoPsAls.....................RLEsDIKKL+AELssuRQsEsELRsQIS.LoStERShKS-LsQL+KENDhLQTKlpuhVSuKQKDKQolQolEKRLKuEp-uRsslEKQLsEEKKRKKpEE-TAARAsA.AtsoRt.EsuESLKp+K+DLEsEIcKLpp-lKlKEEppcsLEKcs....QEl+KY......+E.SpK-TEVLMSALuAMQDKNppLENSLSAETRlKLDLFSALGDsKRQLEIApGuIapREpEIlDLKuKIA-llAVMPs.shu...SplpssTPHYSupFL-ssPuutsosuSsY.sL ..........................................................................................................................................................................................................................................................hh.hDhhh.hRhEhhaPhWhhhtshhpohphp............sl.......h.........hshhFhhhshh.shhChhhlPhphlhhhuoshVhhthhhp..s...........ptG.......................................husH.lGas.h.hsh.hp.hht...hh.phphtltttN..hhp.h.puLP..................................................................................................................................................................................................t...............t..........t.................................................................................................................tt...t...t....t............................................................................................................................................tts.........s.s.s.....p................ppt.ttt................................tt...t.........t.t...........................................................................................p...Lct-lcc..h+s-lpt.+p.Ep-h+tp..............h.s.tth..t.+.phtthptcs-........lpp..........+htph.p.+pt-+.shtthE++.......l......ttEtct+..........h-tpL.pp+pt+h...........tt.........t.....................................................ptsp.......h..+tc..pph-...t-hpplphchp.t-pth..h..ch......................................ttht.............................pc.......t.-.-hLhtsLtshp-Ks.pLEpsLuuEsRlK.-LFpsLGts++phc..t.................................................................................................................................................................................. 0 47 58 97 +9559 PF09727 CortBP2 Cortactin-binding protein-2 KOGs, Finn RD, Coggill PC anon KOGs (KOG1103) Family This entry is the first approximately 250 residues of cortactin-binding protein 2. In addition to being a positional candidate for autism this protein is expressed at highest levels in the brain in humans. The human protein has six associated ankyrin repeat domains Pfam:PF00023 towards the C-terminus which act as protein-protein interaction domains [1]. 25.00 25.00 27.50 27.00 22.90 23.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.28 0.71 -4.80 8 414 2009-01-15 18:05:59 2007-06-15 10:15:04 4 12 99 0 190 349 0 128.90 39 16.96 CHANGED suc+Epcs-sLSKc-LhhLLSlMEGELQARD-VIchLKA-Rp-lhl.EApYGphs.pcPhtALQRDuhtuuucsppccl...YpsPlupL-+lht+pKcoQcRMhsQLlsAE+pp++hlhcL-t-+ct+hcahppuD-hhshLEpctp..Lp..LE.EKupppptEpE+cKhss+Lc-EhsKhKShsLMLVcEppph ..................................phsp.-Lh.LhuhhEGElpAR..-.VIthL.+..........sp+hc..l..Et.YG..h.p..........p......csh.uLpRDh.....................h..t.......t....t......................t.p..sl....................hppPhs.Lchl.tpp+ph.cRh.tQLhhuEppp+p.......................................................p..p...lE.E+...tthEpEppKh.tpLc-Ehs...+.KphshhLltEpp...................... 0 20 35 91 +9560 PF09728 Taxilin Myosin-like coiled-coil protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1850) Family Taxilin contains an extraordinarily long coiled-coil domain in its C-terminal half and is ubiquitously expressed. It is a novel binding partner of several syntaxin family members and is possibly involved in Ca2+-dependent exocytosis in neuroendocrine cells [1]. Gamma-taxilin, described as leucine zipper protein Factor Inhibiting ATF4-mediated Transcription (FIAT), localises to the nucleus in osteoblasts and dimerises with ATF4 to form inactive dimers, thus inhibiting ATF4-mediated transcription [2]. 27.40 27.40 27.80 27.40 27.20 27.30 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.12 0.70 -5.34 18 400 2009-01-15 18:05:59 2007-06-15 10:17:26 4 7 207 0 247 384 2 263.30 35 60.93 CHANGED +chsKtsp.LhpsLsphss.t-KlptlhK+hsEhlp-p+php+phphhpK+hptlp+EK-plpsEps...Kshhs+sKLEsLCRELQ+cN.+pLK...........EEshpptcEE-c+RpEhsp+FQssLpDIQsph-ppsscspc...Lpc-NpcLscKlKpll-QYE...........lREpph-+lhKp+-LphQLs-AKLpptppthppppp+pcppp-hhhpph.....plpphpcpEppL+.........................pQLslYs-KFc-FQsoLsKSN-lFsTFKpEMEKMoKKhK+LEKEstsa+s+aEpsNpsll-MsEE+ph.......pp+chcphpp+lp+LEpLCRuLQsER ..............................................................................t...t+....l.p.l.s....p....hps....p+lt....hl.h.K+hs-Llp-h+p.....p+p.hp.hhp..K+ttpl.p-+cp........l.puEhs....................+sl...hs+sKLEsLCRELQ+cN.+pl..K...........................-Eshph....scc-ccpRpEh...ss+F...pssLp................-Ipsph-..ppspc...shc..............hp-..N.t........Lt.....pKLKplh-QY-...........................lR.EpHhcc..lh.+...p+-L.p.pLhpAKlpptpph.hcptpp+ppp.pp......................................psp.hpppEspL+....................................................t.QLslYs-KFc.p...........hp.....sTLsKSNElFtoF+pEM-cMoKKhK+LEKEshhh+p+aEpsNt.......sllphhEE+ph...........................ppcchcthphchp+LEpLCRthQtpt.................................................... 0 73 114 174 +9561 PF09729 Gti1_Pac2 Gti1/Pac2 family COGs, Finn RD, Bateman A anon COGs (COG5037) Family In S. pombe the gti1 protein promotes the onset of gluconate uptake upon glucose starvation [1]. In S. pombe the Pac2 protein controls the onset of sexual development, by inhibiting the expression of ste11, in a pathway that is independent of the cAMP cascade [2]. 20.40 20.40 22.00 20.80 19.80 20.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.12 0.71 -4.38 32 332 2009-01-15 18:05:59 2007-06-15 15:36:41 4 3 138 0 260 349 0 134.00 35 33.49 CHANGED Ta.pGalcospDALllhcAshp.......GhLs..plsRR.p-cERs.hIpSGsVFVapEppoGhKRWTDGhsWSPSRl..GpFLlYRELsKpp...........................t...tttt...ps...tt.tp.s.h..........................................................................................................................................................pshtaK.sG...LlKKThSlpsp.......................tspphHlISYYsh ...........................................oa.GalpostDAlllhcA.sh............GhL...pltRR.p-pERt.hIpSGsVFVa..pE.............p.........p..u..........Gh+RWTDGhsWSsS.Rl...G.s.FLhYREh-tth..............................................................................................................................................................................................................................................................................................t...h.h+.sG..LhKpohSlphp..................phHllSYYt.h........................................................................................................................................... 1 105 173 236 +9562 PF09730 BicD Microtubule-associated protein Bicaudal-D KOGs, Finn RD, Coggill PC anon KOGs (KOG0999) Family BicD proteins consist of three coiled-coiled domains and are involved in dynein-mediated minus end-directed transport from the Golgi apparatus to the endoplasmic reticulum (ER). For full functioning they bind with GSK-3beta Pfam:PF05350 to maintain the anchoring of microtubules to the centromere. It appears that amino-acid residues 437-617 of BicD and the kinase activity of GSK-3 are necessary for the formation of a complex between BicD and GSK-3beta in intact cells [1]. 26.00 26.00 26.30 27.10 25.40 25.90 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.61 0.70 -13.47 0.70 -6.18 11 255 2009-01-15 18:05:59 2007-06-19 14:23:25 4 5 95 0 150 256 0 528.20 44 84.39 CHANGED upuhSsp++sAtDGEopEEoLLQESAuKEuhYtt+lLELQsELKQsRshlsNspuENERLsslsp-l+-s.................spslElpRsRhR-ElKEYKhREsRLLQDYoELEEENISLQKQVSsL+psQVEFEGLKHEI+RLEEEsElLNSQLE-AhRLK-IAE+QLEEALEoLKsEREQKssLR+ELspahshssh..hushplpl-tl..phpc-.ptsspss...........sDs-ch.sut.pss..ht.................phtsstpuElh..sPssu.....VsDLhSELplSElQKLKQQLhQhEREKssLhusLpEsQpQLcpupsslsEpp-+VspLTpplsAl+p.............................ttKctpss.-ppcspsspcs...st.aElDhpu.ElLpCKh+sAlsEhscL....+pELKsL+scapphpp..php--+s+hcs-lQsLscclpphE+su+pc.tc..........................chucLEpElpthsclAsEopGsLssAQDELVsFSEELApLYHHVChsNNETPsRVhLDaY+cuthhtt.t......st..scth.sshh.phhh.........tEs.t...t.....................spuPussu.osshSPs............tDhR..p-PhNIhNLlAlIRDQIKHLQpAVDRohpLuRQRsA....utcLushsD+DpEuhhEEILKLKSLLSTKREQIATLRsVLKANKQ......TAElALuNLKSKYEsEKuMVoETMhKLRNELKuLKEDAATFSSLRAMFAoRCDEYVTQLDEMQRQLAAAEDEKKTLNSLLRMAIQQKLALTQRLEDLEh ........................................p..o.p+hsstsG.ppE-sLlpEoAs+Et.h.tplh-hps-l+p...+t.htpspsEt-Rh.t..pchtc..................pp.hEhpRhph...+s-l+EhKhREsRLLpDYoELEEENIoLQK.VSsL+ps.......Q......VEaE....GlKHEl+RhpEEs.hLpuQlE-hhcLKcIuE+QLEEALEsLpsEREpK.sL+KELsphhshps........s..phth..sl...pth.....tpp..........p...........s-.t...tsh.t............................t...pst.h...P........hsDLhSElplsElpKLcpQL.Qh.E.pEKs.LhtsL...............p-sQppLcpspstLp..p..pclttLs.t.p.lpuhpt.................................t.p...ts.....-ttpttsttpp................c...L...ts+hp.suhs-hhpL....ptclcsL+tphpt..t...pht-.pp..s..........phcschp.hppplt.h...Ect.p.p.tt...........................thtphcp-LpthsphAsEspspLssAQ-ELlshSEELApLYHHVChsN...sET.PsR.V..hLDah+psps.htt.........................p.pt..h...s.sh...t.h..................................t........................ptt.p.s.p....s...ush.......................tD.p...p-sh.....slhsh.shlpDQI+H.Lpt.AV-+ohplu+p+hs........s.phssh.cc-tEth.Epll.KLKSLLSTKREQIsTLRsVLKuNKQ......TAE..lALuNLKsKYEsEKshVoETMhKLRNELKsLKEDAATFSSLRAMFAs..RC-EYlTQ............lDEhQRQLsAAE-.......EKKTLNpLLRhAIQQKLALTQRLEpLEh...................................................................................... 0 43 60 108 +9563 PF09731 Mitofilin Mitochondrial inner membrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1854) Family Mitofilin controls mitochondrial cristae morphology. Mitofilin is enriched in the narrow space between the inner boundary and the outer membranes, where it forms a homotypic interaction and assembles into a large multimeric protein complex [1]. The first 78 amino acids contain a typical amino-terminal-cleavable mitochondrial presequence rich in positive-charged and hydroxylated residues and a membrane anchor domain. In addition, it has three centrally located coiled coil domains [2]. 27.40 27.40 27.50 27.80 27.30 27.30 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.81 0.70 -5.86 33 516 2009-01-15 18:05:59 2007-06-19 14:34:14 4 7 348 0 280 469 33 425.10 19 80.05 CHANGED pssht+...hlhhhsLhsslsaGGslhhuphs-cFp-hhpct.lPhu......Ecllphh-pts.......................htt...phhpth.t...............................................sshspcstsspppssshttpss....................................t.pphtpshcthtpphlppstpthttst..........p.hpttsp.h..........................................................................................t......................h.tht.hssssstl..pplhshhscllsslsspsh...tschsshlscsppplsplspclsclppphcpclppplcppppc.hpphspphhtphctt.t..p.ppphppchppccpclccphcpcLcpcLcppppshpp+lpstlt.tplEhp+pFpc.lcc+lppER...........sGRLupLscLpup.lpsLEphstu.......hspshspscpsppLthulpulcusLcsuss.........pPlppclssLpch............................usc.............Dpllssulsulsstuhpc...GlhopspLhsRF.ppltsclR+sSLl..PssAGlhuHhsShlhStllFc.Kp....s.........supDl-olLuRscphLccG-LDtAsc-hss.L+G.Wu+...........cLApDWls-uR+pLElcphlcllpu-up ..........................................................................................................................................................................................................................t.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t........................th.......t......................h.th..t..th.t.t..t...p....................t.t...ltt...................t.........t.t...............................t...p....tt.h............t.tt.h.pt.php......httttt..........tt.htp........t.t...........tt.h...t..pp......................tthh.pl.tph.sp.l................ptlpp....hhtt.................hs.......pt..s.phptlhhuspu..l.ptslppu......................hshtttlpslpt..........................................ssp.........s.hht......shltuls......uhpt...Gl.o.tpLhs+........F..tltp....hpps.uhl....tp.....sulhthhhuhl...Sh.lhhp...tt................................s..............ps.s....s..sllscsphhlp.pGcLc.Ahc.hsp.LpG..u+............tlu...p...-WlppsRhhl-spphhphl.t............................................................................................................................................................................................... 0 93 156 236 +9564 PF09732 CactinC_cactus Cactin; Cactus-binding C-terminus of cactin protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2370) Domain CactinC_cactus is the C-terminal 200 residues of the cactin protein which are necessary for the association of cactin with IkappaB-cactus as one of the intracellular members of the Rel complex. The Rel (NF-kappaB) pathway is conserved in invertebrates and vertebrates. In mammals, it controls the activities of the immune and inflammatory response genes as well as viral genes, and is critical for cell growth and survival. In Drosophila, the Rel pathway functions in the innate cellular and humoral immune response, in muscle development, and in the establishment of dorsal-ventral polarity in the early embryo [1]. Most members of the family also have a Cactin_mid domain Pfam:PF10312 further upstream. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.77 0.71 -4.53 19 306 2009-01-15 18:05:59 2007-06-19 14:37:51 4 10 240 0 225 312 5 124.40 58 22.10 CHANGED hscchcs...+KP+aFNRV+oGa-WNKYNpTHYD.h-NPPPKhVQGYKFNIFYP-L...hspsp..sPpYplps..........sts.ss-hsll+FpuG.....................PPYcDIAF+IVs+E...W-asp+t...GF+ssFcp..............G.lLpLaFsF..........K+hpYRR .............................Wt-KYRPRKPRYFNRV+TGaEWNKYNQTHYD.hDN.PPPKlVQGYKFNIFYPDL...IDKsp..sPpYplpp......................sss..scDhsllRFpAG.....................PPYEDIAF+IVs+E...W-YStK+...GF+s.p.F.c.p.G..Ihp..LaFpFK+hhYR+........................................ 1 87 133 186 +9565 PF09733 VEFS-Box VEFS-Box of polycomb protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2350) Family The VEFS-Box (VRN2-EMF2-FIS2-Su(z)12) box is the C-terminal region of these proteins, characterised by an acidic cluster and a tryptophan/methionine-rich sequence, the acidic-W/M domain [1]. Some of these sequences are associated with a zinc-finger domain about 100 residues towards the N-terminus. This protein is one of the polycomb cluster of proteins which control HOX gene transcription as it functions in heterochromatin-mediated repression [2]. 21.10 21.10 22.40 21.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.71 0.71 -4.53 11 298 2009-01-15 18:05:59 2007-06-19 15:00:15 4 4 186 0 137 263 2 114.50 39 22.98 CHANGED tcclsuEpscsRshhh.h+pRpFaHScpsQPhphcpl.SDpDSEDEsD..c-hhchc-pphL--FsDVsc.sEKclM+LWN.FV+KQpllADuHlPWACEuFs+hHGp-Llps.sLhhsap......lahIcLasaGLlsucohspssphL ...................................................h..pp.h.........p.DS-sE.D..st.hhp.cp..hp.lp-FsDVsc.sEKElM+hWNha....V...h..K....p......s....h.....luDsp...lPhACphFlp.h...+GpcLlc+..s...LhRsFh......LHhssLa..DauLlsshsl.pssph............................... 0 42 73 107 +9566 PF09734 Tau95 RNA polymerase III transcription factor (TF)IIIC subunit KOGs, Finn RD, Coggill PC anon KOGs (KOG2473) Family TFIIIC1 is a multisubunit DNA binding factor that serves as a dynamic platform for assembly of pre-initiation complexes on class III genes. This entry represents the tau 95 subunit which holds a key position in TFIIIC, exerting both upstream and downstream influence on the TFIIIC-DNA complex by rendering the complex more stable. Once bound to tDNA-intragenic promoter elements, TFIIIC directs the assembly of TFIIIB on the DNA, which in turn recruits the RNA polymerase III (pol III) and activates multiple rounds of transcription. 27.70 27.70 27.90 28.10 26.60 27.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.23 0.70 -5.10 52 385 2009-01-15 18:05:59 2007-06-19 15:02:08 4 9 291 0 250 378 2 276.50 25 54.86 CHANGED clsslEhPthV....psls+..................ulpslGG.ptlsp.....................slpstptp............................................................................................................................................lcLph.....p................spDsh..s+Pl..husp.....p.ssslLL+lsh...++stptpp....t................................................................hpschlGhlspsapFc...shuDFpahs......s..htp.........chppphhsh................pthp.hp.p.s..............................................................................................................................................................................................................................................hpl.......hPP....................................................................shFSphch.PasYt..................Ycpsstspt......................................................................................................................................................................................ttsp.t..........hsspppttphththhshs.hs.lPsp........................th.t.tthp.h.pptlphlcpLF-cRPlWs+psLh..splsp........pphtlK...............pslshluYhapsGPWRpshl+aGaDPRpcsp.thYQol.FR ...........................................................................................................................................................hhslchPhhl....ps.sp..................hltshGG.ttlpp..................................................hhtt.t..............................................................................................................................tlpLhh..p.....................spssh....s+Ph...hupp.......h.spsll..L+lph...+ppt...t......................................................................................................................................thphchlGhlsphapFp.................shsDFpahs................tp....phttphhth..................................h..h.t..t.........t..........................................................................................................................................................................................................................................h.h.....lPP..............................................................................................................................................................................................shFophsh.Phpah..................ac.ps..p...........................................................................................................................................................................................tt...........................h.h..thpp..tt........h.h.h...............s.hPpp......................................thp.h...p.ph..t.lp..p..LF-.....pRPlWo+pulhspls.............p.ptl..K.......................................................hhlshluYhhh.s.GPWRphhl+aGaDPR...p...ss.ps+hYQsl.FR................................................................................................................................ 0 91 143 211 +9567 PF09735 Nckap1 Membrane-associated apoptosis protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1917) Family Expression of this protein was found to be markedly reduced in patients with Alzheimer's disease [1]. It is involved in the regulation of actin polymerisation in the brain as part of a WAVE2 signalling complex [2]. 20.10 20.10 20.10 20.70 19.90 20.00 hmmbuild -o /dev/null HMM SEED 1117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.88 0.70 -13.74 0.70 -7.09 8 304 2009-01-15 18:05:59 2007-06-19 15:03:46 4 9 127 1 181 296 1 741.10 36 92.42 CHANGED spQpKlAEKLsILN-RG.GhLsRlYNIKKsCuDsKs+PsFLs-KshEsulKaIl+KFPslDs+ss..pLsslpcppsEIl+uLu.YYaoFVDlhEF+Dplh-LLsTlDusQlahDIslNaDlT+uYLDLlVTaVolhlLLSRlEDRKslLGlYNtAHEMpHGpSDsuF.sRLGQMIl-YDpPLKKLpE-FsPHo+tlosALpSLphlahRRNpou-pWRssQlLSLlusP.uslLssupo-ThACEYLSl-sh-RWIlhGallCHspL.ssssl.clWplALQsuLsloLFRDE.............................slhlHcshpshlpuhKGYuKRluD......................l+Es+EpAlupuuslH+ERRpaLRsAl+ELsLlLsDQPGLLGPKhLhVFhALuhARDEVlWLlRHsspss..pp+s+o...sEDhsDspIuELLFhMEcLRuLVRKYhuVlQRYalQYLSuaDAlsLspllQslulCsEDESlILoSF....lspLsSLssKQVEssEhF..DFouLRLDWFRLQAaTSsu+SulpLpcpc-.........LuchMNshVFHoKhVDpL-EhLsEsSDLShaCFYs+hhEchFtpsLEtP.upsRYsIAFPLlCuHFspCsH-hCPEE+s+Isc+uLuhsNpFLEEhAKpscslIsplCuE..ptsLupQLLPcHsA.+hloputscKspt..sps+KG.ts-hptPGtESaRKsRpslTshDKLphsLTELChulNas.slsVaEHsFsPREYLsspLEs+Fs+ulVuMstY.s.ssp-lsRPSElLuul+AYMollQolEpalulDloRlhpslLLQQT............QPtD.SpGcsTIsslYTNWYL-sLLRcsSsGsIVauPshpAFlShssE.thsFpAEEaSDlsELRALuELlGPYGhKhLsEpLMWHluSQVsELKKLVspN+DlLhtlRosFcKP-pMt..ull+pLs.............ssDsVLpRMhIIG.ILoFRsLhQEALccVLcp+lPFLhSsIcslp-plP.sssDhKshhpltElASAAGlsCclDPuLVsAltupKuc...spE.-acsuCLLLVFVAVSlPpLAtsssStYssph-GasNNlHCLA+AIsslsuA...............................LFol.tpssIEo+hKEFLsLASSSLLclGpEoDKsp......l+sREolhLLLD.lVpcSPaLThDhLESCFPYsLLRNAY+pVYcpsh ......................................................................................................................................................................................t...p...thh...s.h.........th..h.......t.t.t.hht.ht.ah..hhD.hh.hp..-thhphl.thts.......h.h..t.s..hh..ahpLhh.hh...th.hh.tp........c.+hlhthashha....h.ptt.-..th..+...................hsphh..h.p.....Ph+.hh.pch.......ht.......h....t....hl.uh..hh....t.p...p.h+t.thhs.ht......hh.ss.t...h...........h.....hh..h.t.h.pWhhh......shhh..t.h..t.................p..........h....hh..sLpts.hl.lh+-p..........................................................................hh.hHp..p..hh.s...h...h......sK..chtc...................................................lp....-.hp.sh..pss.hHtp...+R.hl.+.tltph..hht...cpPsLLuPp.....h......hh.h.uLsh.psEl.WhhpH.t......................t...........-..-....pls.Ll.hhtplpthltca..hhp.t.....Yhl..................palst.ss..lp.hht.s.........th.h..s.....-.shhh.psh....hp.ht..tls...p...........pp....................phpshRh.cWhpl...hhho.spsshtl.tc..p.......................lhp.hs.hh.aophl.Dth.p.L.cpusL.ph.hh.......aa..p..hp...t.Ft.....s.h..s..s..pa.hsa.hlsspF..psh.p...hs.PE.E................h.t...ltpculthsp.hl-phhtthpshl....lssc.....hthttp.LhPppsA...h...tt...th...............tt.....h....h.PG.E.Sh..csp...lp.h-thh.tLspLs.shs.h.sh.Vh.pashh.pEYhpppl..pht...c.lh.hh..............h....h.........s.tpl..........+PS.l.s.lptahshlp.htpalthDhophhppsLLpps.............................p.h-.tpst.ohsthhspWY.-sll+psS.......sstlhh....psF......hsh..............sa.s.-paoDhpEhpuLsclhGsYGhchLsctLh.Hhss.l.plcphlh..pNh-hL..th..tsshpps-.ht....t.h.p.l...............sh-sllphhh..hG.hlsF+thht-uh.tlhp.phPhlhu.ltsh.p.hs....s.h.p.........h.p.husssGl...sc.h.Dsths.sl.t.hst..........................t..hshhLh...al.hsuh.h.s.......o..ap.thtuatNNhHChsh...s...l.tl.su................................h.ph...........pt..s..hp..pth.....p..Flhhuu..lLp.....pp.p...p............................h.p..........lhhc.hhp.s.hls.s.hE.hhPashhpt....h....h....................................... 0 66 98 139 +9568 PF09736 Bud13 DUF2050; Pre-mRNA-splicing factor of RES complex KOGs, Finn RD, Coggill PC anon KOGs (KOG2654) Family This entry is characterised by proteins with alternating conserved and low-complexity regions. Bud13 together with Snu17p and a newly identified factor, Pml1p/Ylr016c, form a novel trimeric complex. called The RES complex, pre-mRNA retention and splicing complex. Subunits of this complex are not essential for viability of yeasts but they are required for efficient splicing in vitro and in vivo. Furthermore, inactivation of this complex causes pre-mRNA leakage from the nucleus. Bud13 contains a unique, phylogenetically conserved C-terminal region of unknown function [1]. 20.60 20.60 22.00 21.10 20.10 20.10 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.38 0.71 -3.93 44 337 2009-01-15 18:05:59 2007-06-19 15:04:41 4 3 291 0 240 323 0 136.80 36 33.20 CHANGED -TlaR.Dp.oG+hI.shcppppcpcpcp........................pccpcccpp.ttphspG.lQtpppcpphpchpcttptsh....sch.pDp-hsp.h+pppRhsDPhspa......hppccppss..............thht+stY.............pGssss.NRFsItPGaRWDGVDRSNGFEpchFptpsc ..................................................................................................................cTlaRDt.sG++h..s.h.p.cc.t.ctccpt..........................pcptccc...ph.htphs+G.sQpcppppphcchtcttptPh....ARhh-Dp-h.s.p.L+pppRhsDPMspa.............lpcpctpts................................tptht+stY..............pGssPP.NRasIhPGaRWDGVDRSNGFEpchFtths.t.......................... 0 82 130 197 +9569 PF09737 Det1 De-etiolated protein 1 Det1 KOGs, Finn RD, Coggill PC anon KOGs (KOG2558) Family This is the C-terminal conserved 400 residues of Det1 proteins of approximately 550 amino acids [1]. Det1 (de-etiolated-1) is an essential negative regulator of plant light responses, and it is a component of the Arabidopsis CDD complex containing DDB1 and COP10 ubiquitin E2 variant. Mammalian Det1 forms stable DDD-E2 complexes, consisting of DDB1, DDA1 (DET1, DDB1 Associated 1), and a member of the UBE2E group of canonical ubiquitin conjugating enzymes and modulates Cul4A function [2]. 25.00 25.00 28.70 26.30 18.00 24.20 hmmbuild -o /dev/null HMM SEED 407 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.36 0.70 -5.71 8 179 2009-01-15 18:05:59 2007-06-19 15:06:35 4 6 119 0 111 170 0 314.00 37 70.11 CHANGED REhSLFhp-s+asIluuuohlsc-.shP.......pah-lacss-ulpss..sslEcYThallDL+pGcloDu+sF+sDpIlLuHNpGlaLYsshLAILSlpaQsIaIapVs.-GcFlchRTIGcFCp--Dthhlsps..........pthsssshcpshh.P.....hIsuLKpRlLsFLaRpAcspuussupt.....R+FY+pF-pacpLhMWKMQLLDcchLLIKY....uopDlssL+ss-ss..psShFVlYsIhspplluVYpNsSscLLpLaEpFsDpFRsushtp.t.sFtsSsouNsaup.hppphKpThl..NtchuuppcAs+RlhuuLPlSuQSYSuSPYLDhuLFSYDDKaVSshERPKsCu-aPI+Fhu.RcouLLKF+lpAGspspssPts.sRRLVAFlFHPh-PFAlSVQ+s..stsYlVNFHhR+ ..............................................+-h.Lahcssphslhuoush.....pp....s.s.................................sstulpss.....sslEchohallcL.........p..sG..hlhDphhapsDhl.Lu...HN.GlaLa.....pshLullSlp.QsIalh.plt....p.G.p.hlpl+.sIGtaCh-DD.hhlps.........................t....tt..h.s..................hlsulK.pRlLsala+ph.ptts....s.s.......+cFa.hFpth.pLhhWKh.QhLDpp+LhlKa...........sS.Dsssh.Rss.........-............psuFFsVYNh.oo-lluhap.NoSp-LhpLFEpFsDhF+sss.ps.h.pF.sStSss.aAhp...pp......hK........stphusptphs++hLsp.LPhSsQSh..SsSPYhDhsLFpYD-KhlS.sh-R.+tss-pPI+Fhs..R.p.sshlKFcltsu............tt.....s.t.s+.+lsuFhFHPh.PhulSlQps............hlshHh+p......................................... 0 38 59 86 +9570 PF09738 DUF2051 Double stranded RNA binding protein (DUF2051) KOGs, Finn RD, Coggill PC anon KOGs (KOG2010) Family This is a novel protein identified as interacting with the leucine-rich repeat domain of human flightless-I, FliI protein. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.13 0.70 -5.13 7 400 2009-01-15 18:05:59 2007-06-19 15:58:51 4 3 88 0 181 356 0 199.10 31 66.18 CHANGED AEARLAA+RtARAEAR-IRM+ELERQQKE..................-pp.D+.a....scpsSpst.ssl...s..............sshsGo..SSRRuStD...Sh-p-s.ShR-l........................+cpLtEVEE+a+KAMloNAQLDNEKsshhYpVDhLKDpLEEhEEphAphpREhcEKp+-hct.K+shshLphphp.l+ppLppRDpLIpcpGLllls..ssNu-su-p.s.......su.shlptEstplLpss...tGsLDV+L+Khs....sEpppL.tplpp.ptpLpth..ptp..h.utpsGsh...........p.........sQ+-uNK.Io-hKhKL.sKu.EpElsNh ............................................................................................................................................................................................................pp...cpth.........t..op......t.t......................................ss.tst......uS+...R....us..t.s......t....t...................................................................................................-+tt...plchhpc.h.p.pp.h....+p.......-.K.............p...-.h.E.+.pKchhshlp.pht-l+-tl...pptcchl.............p......cp...............................................................................................................................................................................h...................................................... 0 26 42 93 +9571 PF09739 MCM_bind DUF2044; Mini-chromosome maintenance replisome factor KOGs, Finn RD, Coggill PC anon KOGs (KOG2545) Family This entry is of proteins of approximately 600 residues in length containing alternating regions of conservation and low complexity. The Arabidopsis protein is a replisome factor found to bind with the mini-chromosome maintenance, MCM-binding, complex and is crucial for efficient DNA replication. 25.00 25.00 27.70 26.20 17.10 18.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.72 0.71 -4.57 37 164 2009-01-15 18:05:59 2007-06-19 16:02:28 4 7 136 0 117 167 2 120.10 29 19.86 CHANGED cphLpsssshp...plP.lNss.slctlps........ssLVRFRsMlQDh.hssEhYlusYcstst.......hcssKYp.Dhhphs.ttth..........pshhhERpshYsVPlPGpssWstppppt.........phtstspssssppKR ..........................phLpppsshp.lPsLsps...slchlps.........soLVRFRsMlQD...M.hsPEaYhusYcshsppst.....hh+tuKYc.Dshpssstpph..ts......psshhERpshas.VPVPGpssWs+pt..s.............t................................................ 0 51 67 96 +9572 PF09740 DUF2043 Uncharacterized conserved protein (DUF2043) KOGs, Finn RD, Coggill PC anon KOGs (KOG2374) Family This is a 100 residue conserved region of a family of proteins found from fungi to humans. This region contains three conserved Cysteines and a motif of {CP}{y/l}{HG}. 20.10 20.10 25.70 21.70 19.60 19.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.91 0.72 -10.64 0.72 -3.96 9 103 2009-01-15 18:05:59 2007-06-19 16:10:46 4 2 75 0 68 99 1 104.60 44 16.58 CHANGED ths+APlVPaGhDLpYWGpEp.sssp.hp.sspH+FWt.s-.EppVsusclsphhppRplTatGchcslp+h.CpA.h.sGtLCpRpDhhpCPFHG+IlPRDDpGpPlppED .........................................t.ucAPVlPaGhDLpYWGpcp.ss.......suph...hp....sspHRFWtss-s...-p........plssscluEhhppRpIoFsGchEPlp+h.C+APh.s.....s.Gp..LCpRpD+h.K.....CPFHGpIIPRD-.p.GpPls.p.............. 1 23 34 50 +9573 PF09741 DUF2045 Uncharacterized conserved protein (DUF2045) KOGs, Finn RD, Coggill PC anon KOGs (KOG2465) Family This entry is the conserved 250 residues of proteins of approximately 450 amino acids. It contains several highly conserved motifs including a CVxLxxxD motif.The function is unknown. 25.00 25.00 25.80 25.10 21.20 22.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.51 0.70 -5.23 3 168 2009-01-15 18:05:59 2007-06-19 16:11:30 4 8 105 0 103 160 1 208.10 41 49.25 CHANGED V-pcosFWTslFssYFlcphEsR+p.D.DDLLFFVR+KLupuSS......RshsEsEsElEVY..RRDS+KLPtLGDPD...VDWEESVYLNLIlHphDYTVTlAICTRsusK-........Lpll+K+SQpVYASPSR..++MDSKGEsEcI.oYPcICFMVDsFDEVFcDllVsDGE.MVCVELVAoD......................+ssosQGVIFpGSIRY-ALKKVYDuRpSplGS+hAQ+MSFGaap....po+sEFVRMKGPQGKGHAEMAVS+Vss ..........................................h...t.haWp.hFspaFh...t....ph....tsp.p.............DDhLFaV+.p.hs.htu............ht...s..p...psplpVa...RRp.u.KLPt.ls-ss...................lDWEcolhLNLlhpp......h.sYhlTsAlCo+sssts....................lp.h.p+hoppVaASPS+..+.MDo.K.G................-..p..pl..oYPpIhFhlDsF--sFsch.hl.t-.sE.hhCVpLsApD.....................................................+.psshp.sslF.G.lpYpsl+.csYDs....phS...........hu..u................h.h..S.hGh....................s.ph-hlh...M+GPpG.+GcsEhAVots..s............................................. 0 35 50 76 +9574 PF09742 Dymeclin Dyggve-Melchior-Clausen syndrome protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2225) Family Dymeclin (Dyggve-Melchior-Clausen syndrome protein) contains a large number of leucine and isoleucine residues and a total of 17 repeated dileucine motifs. It is characteristically about 700 residues long and present in plants and animals. Mutations in the gene coding for this protein in humans give rise to the disorder Dyggve-Melchior-Clausen syndrome (DMC, MIM 223800) which is an autosomal-recessive disorder characterised by the association of a spondylo-epi-metaphyseal dysplasia and mental retardation [1]. DYM transcripts are widely expressed throughout human development and Dymeclin is not an integral membrane protein of the ER, but rather a peripheral membrane protein dynamically associated with the Golgi apparatus [2]. 25.00 25.00 27.20 27.20 24.40 24.30 hmmbuild -o /dev/null HMM SEED 678 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -13.23 0.70 -6.42 25 291 2012-10-01 19:21:38 2007-06-19 16:27:57 4 7 170 0 171 566 11 481.00 28 87.39 CHANGED MGss.S...................t..chphpph.......hthhssppsss.ss-sF..WspLhph.hphspssp-hh..lsut..phhpshh.sNhsohshttlhphhhtp........sphttspcpps.........................shNslhllspllp..ahhE..p.tpsphhthhhtt..t..........................................................shst....t............................h.shsphEphhpshls.hhss.ss...............sstp.th+hEhlplLLshhSsplatsssst........ss.ahphhhp...tpsptsssLhsoLLpphhphssssh........ph.hht...................uhh.hhsts...hh...............................sst.sp+sPLu..stulplLLlLhsapss...........................psNsapp..slshhpchcp............t..hpu.sp.ts..h.lsastL......hsol............sp...hhtppphllLLYpLlcpNppFhpYlLs.+uD.lpslllPILphLasuppc..................................................psphlahslhlLLlLSpDpsFspslpch........................h.hssls.Wh..p-hslt..........pho............lGuLlllVlh+hIphNhp...+hp................sh.LasssLuslsNhSsah+sLshhsup+LlsLh-hLop+ahphssthspch............................t.........h.hhpphl+hlLEllNuhls..hplctNspLVYsll+......+R.plFpphps........c................................................................................................................................................shhpcl.lpsItpl..Lpaasspl-.....phsssp...hs............hsclLchIpcushs..............................sstslp.....................as.lhacYppcptsppaF.hs........YhWsl ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhh..h.s..hh.........................................hh.......................h...hl..h............................................................................................................................................................................................................hs.....p.sh.lLllLhp.hpt...........................................................t.s.sapt..hl..h....ts.p..............................................t............h..pl..s.hs.L.............h.psh................................st......tppp.hlLLY...LlctNpphhp..ahh..psc...h.pl.....l.hPlLph.Lapstpp....................................................................pst.lahhLhlLLlLo...pDpsFst.pl.pph.......................................h..h.sls..ah...t-..+.h..t.....................phS.....................lG.uLhllllh+sI.p.hsht...+hc.......................s.hL.asssLshLuN..h.....u.s.h+p...Lp.asup+lls....L.hphls++...at+h...p.hppph..............................................................................t.........................lphhpchl+hlLEIlNuhLs.....................tL...pNsplVYsllh......cp.plFp.hts............p.........................................................................................................................................................................................................................................................................s.htcl..hpsl..h...lthh...h.....................................t........s........................................tthhthl..t.s..............................................................................h.................................hhh.....a.tp.....ah.h................................................................................................................................................................................................................... 0 77 102 141 +9575 PF09743 DUF2042 Uncharacterized conserved protein (DUF2042) KOGs, Finn RD, Coggill PC anon KOGs (KOG2235) Family This entry is the conserved N-terminal 300 residues of a group of proteins found from protozoa to Humans. The function is unknown. 23.20 23.20 23.20 25.10 22.80 22.50 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.61 0.70 -5.09 17 181 2009-01-15 18:05:59 2007-06-19 16:28:15 4 6 143 0 136 188 1 250.30 35 35.86 CHANGED ElpcLttcFpcsQhsp.ssp+LSERNClEllpKLlccphl-..llaThDGKEYlT.spLppElpcELhspGGR...lsll-LspsLNVchs+lEptspcllcps..slphhtGpLlscsYl-plspElN-cLpEpGplslu-LsppacLss-Flp.pllpt..hhuplIpGphsts.....hlaTpsalpppcuplRGsLhAhTcPsslsslhpp.............hthppplhhsllpsLlst..splpGphhG......u.YlPphYppspsshVcsaacQNGalEassl.ppLGIsssppalp ...............................................ElpcLttphptsQ.sp...ssp+LSERN.slElls......KLhppp.hl..-...llaThDG.KEYlT.tplp+Elp-ElhhpG........GR...lsllDLt..phlsVDh..+lEpphpclhp.....ppt...p...........l..p.ll....G.p.LlspsYl-plspElN-pL.........p-p..Gplslu-Ls.ppasLsu.cFlp.phlpp....clGp...lIpGph-ts.................hlaTpualsRp+AplRGhhsAlT.+...Psslsslh..pp...............................................hsh....p.....pplhh..sllppLlpp...upltGplhG.....tuhalPslaspsQpphVcsFapQNuYl-a-sL.p+LGIscshpal................. 0 55 74 112 +9576 PF09744 Jnk-SapK_ap_N JNK_SAPK-associated protein-1 KOGs, Finn RD, Coggill PC anon KOGs (KOG2077) Family This is the N-terminal 200 residues of a set of proteins conserved from yeasts to humans. Most of the proteins in this entry have an RhoGEF Pfam:PF00621 domain at their C-terminal end. 24.10 24.10 24.10 24.10 24.00 23.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.11 0.71 -4.24 6 354 2009-01-15 18:05:59 2007-06-19 16:29:01 4 5 89 0 191 319 1 144.20 40 16.80 CHANGED VpsLAsuIY+EFERlIcpYsE-VlKpLMPLVVsVLEsLDulhu-NQ.ch-lEhpLLc-DpcpL.sQYEREKpLR+pAEpKllchEDshEpE+K-Lpp+l-phEppsRpLELKhKNhs-pluRLEEREs-hK+EYsuLH-RaTcll+sYlEalERs+hph ....V.slAuuIhpEFERlIcpasp-.sVppLMPlVVsVLEtL.........-.........slhs....cs.......p..cpcl.Elc.hL+-..DsE.pL.sph....c....c....EKth.R+p.t...E.....p....chlc..h....E....DshctEp+...-Lpsp.lppLEp.ps...+pLp....hch.+.N....h...u-...p..................h..s....+....L....p.EcEsp..h..+pcas.slpp..RcsphhpphhEhl-+p+................................... 0 48 65 126 +9577 PF09745 DUF2040 Coiled-coil domain-containing protein 55 (DUF2040) KOGs, Finn RD, Coggill PC anon KOGs (KOG2117) Family This entry is a conserved domain of approximately 130 residues of proteins conserved from fungi to humans. The proteins do contain a coiled-coil domain, but the function is unknown. 25.00 25.00 25.60 25.10 24.60 24.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.76 0.71 -4.32 30 288 2009-01-15 18:05:59 2007-06-19 16:33:23 4 10 235 0 203 277 2 119.30 38 29.80 CHANGED +pscpctpcAhp...pDsolasYDulYDsl+.tt.pctppsp.pscsp.+cPKYhssLlcuA-pRK+-pphspERpLt+EREtEG-pauDKEKFVTuAYK+phEE.++hpE-.Ecc+-chEcppctspttshh ..................................p.pschphpcAht...tDsola-YDslYDphp.....ttpp...cpp.t..t.h.......t..pp............++P....KYIpsLlcuA-hRK+.-pphthE+plp+EREtE.sc...c..F...sD.K.EtF.VTuAYK+p...hpEhcch-Ec.E+cc.pthEtptchsptts.u.................................... 0 73 110 159 +9578 PF09746 Membralin Tumour-associated protein KOGs, Finn RD, Coggill P anon KOGs (KOG2092) Family Membralin is evolutionarily highly conserved; though it seems to represent a unique protein family. The protein appears to contain several transmembrane regions. In humans it is expressed in certain cancers, particularly ovarian cancers [1]. Membralin-like gene homologues have been identified in plants including grape, cotton and tomato [2]. 23.70 23.70 24.30 24.20 23.60 23.60 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.39 0.70 -5.41 3 172 2009-12-07 11:10:06 2007-06-19 16:39:37 4 5 100 0 123 196 0 233.70 36 53.94 CHANGED uQNPLINVRDRLFHALFFKsAlTYAcLVP+sVRRsIEFllLLKALLsFFILlYIHluFS+oPsTCLEHVKNcWPRDGILRVEIls.suc++sIaLpht-ssollRslK-suhhuI-PpTcpsHEolEpYQNsplKLpLslcPosahssppLps....pFDuENhosThSascAhSht.-sWs-EQYIVEYSLEYGFLRLSuSTRQRLNIPVhlVsLDPs+DKCFGDSFSRFLLKEFLGYDDILMASVKsLAEQEENKGYLRNVITGEHYRFVSMWhMARSSYlAAFsIMlLFTlSVSMLLRYSHHQIFVFIVDLLQMLEaNlSlRFPAAPLLTVILALVGMEAIMSEFFNDTTTAFYIILIVWlADQYDAICCHTSlTKR ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................YshEhGahh.Ls.ts+tchpI.sh.lplss.pp.C.FG....sth.pphllpphlGYDslLhsSlht............p..spG.aLhNh.otEhY.............p......h.....s.....................h..................hths.s.....a......s........s........hh...hhhh..h.....o.htluhhL.R..spp..phhhFh...h...pl........p.....hhphp.s....h.......h....hh..hshl.......hhl.G..h.hhh.EFasDp.hAFhlllhVWhs-.ashls..s+osho.......................................................................................................... 0 49 64 103 +9579 PF09747 DUF2052 Coiled-coil domain containing protein (DUF2052) KOGs, Finn RD, Coggill PC anon KOGs (KOG3044) Family This entry is of sequences of two conserved domains separated by a region of low complexity, spanning some 200 residues. The function is unknown. 25.00 25.00 31.80 25.20 23.90 23.60 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.46 0.71 -4.35 20 236 2009-01-15 18:05:59 2007-06-20 11:01:27 4 7 195 0 184 239 0 155.60 26 58.63 CHANGED NRRhthLppLlpcu-YFS-ppMc.R-PLLY-phlGpa.oppE+pscspsc...t..........uhuulLhsslp+pptchpltcpp.....p.-t.t...-pcspt...................p.p.s..sp..t-p.............ts............................ssp-+-.hc-cFhshMpp+FLsGEDcD.FDYopVDsss-aDD...c.hppDcEE+YF--- .........................................................NRRhthh.pt...................ps.......p....YFS...t.th........c.t......pPhLYcphl.....tpa.s.t-tptps.hc.................shushL.t....ph.cp....ph.p..t.h.tp....................................p...............................................................................tp.....p.....t.......................................................................ssppcp.htppahthMpp+FL........pGc.Dp-.aDYst.lDpst.phDs.........tpDtE-+YF---.................................... 0 65 102 150 +9580 PF09748 Med10 Transcription factor subunit Med10 of Mediator complex KOGs, Finn RD, Coggill P anon KOGs (KOG3046) Family Med10 is one of the protein subunits of the Mediator complex, tethered to Rgr1 protein. The Mediator complex is required for the transcription of most RNA polymerase II (Pol II)-transcribed genes. Med10 specifically mediates basal-level HIS4 transcription via Gcn4, and, additionally, there is a putative requirement for Med10 in Bas2-mediated transcription [1]. Med10 is part of the middle region of Mediator [3]. 21.50 21.50 21.80 21.90 20.30 21.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.50 0.71 -4.32 18 291 2009-09-10 23:46:38 2007-06-20 14:56:13 4 7 256 0 209 273 0 121.30 33 74.23 CHANGED hppplcplIEshhpLslhVp-Fp....P.......................soppsLtp+lssLlpsLpplpchspp...............................hps...............lp..lPlEV.lpYI-sGRNPDlYTREhlEt.shcpNphh+GKhcuh+chRcsLtcElpcpFP-hhsphcsI ........................................................l.ppplcphlpslhpltlhVpsap.....s..........................poppsLsp+lp...pl..lpsLpplschppp...................................................hps...............lp..lPhEV.l.cYID..p...G..RNPplYT+EhlEt.shtpNpthKGKh-uhc.pF+shLhpElspsFP-.htphctl................. 0 67 112 168 +9581 PF09749 HVSL Uncharacterised conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3102) Family This entry is of proteins of approximately 300 residues conserved from plants to humans. It contains two conserved motifs, HxSL and FHVSL. The function is unknown. 26.80 26.80 27.80 27.40 26.40 25.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.62 0.70 -5.35 27 269 2012-10-03 21:31:48 2007-06-21 09:20:52 4 6 231 0 194 278 2 211.80 23 79.87 CHANGED PPlPpshh-hhts......t.p.pDs....sshHsGRhRsh.HhcGNWsoa....lY..lthtsspt...hchlpphlsp.hpphhhhhphp................sth.....s.h.LHlSLS+slslcp.cphssFlpplcptl....sshpsFtlphss.lp..hhsNt-cTRhFLsLclsp........spsst...LpcllptlscshpcashsshY..........................................psspFHlSlAWsl.ss.ptphp...ct..ph..phhpth.p..................phpsspl+s+lGNpshshsL ..................................................................pp.....t.ctuRhRt.h.H.pGsWsoa....lY....l...h.....sptt........hp.......hl.p.tlltp.htp.h.t.................................................................tshHlSLS+slslpp.cthpsFlptl+ptl....................sphpt.....Fhhphsp...lc..hasN.p....-p.TR..sFluLpVsp.................................sttp......lpp.llp.hl.spshpp....a..shsphY...................................................pssphHlSlAWsl.ss.ptphp...t...p....p...p...htth..........................................ph.hpplph+.Gpthhph......................................................................... 0 55 93 149 +9582 PF09750 DRY_EERY DRY; Alternative splicing regulator KOGs, Finn RD, Coggill PC anon KOGs (KOG2548) Domain This entry represents the conserved N-terminal region of SWAP (suppressor-of-white-apricot protein) proteins. This region contains two highly conserved motifs, viz: DRY and EERY, which appear to be the sites for alternative splicing of exons 2 and 3 of the SWAP mRNA [1]. These proteins are thus thought to be involved in auto-regulation of pre-mRNA splicing. Most family members are associated with two Surp domains Pfam:PF01805 and an Arginine- serine-rich binding region towards the C-terminus. 25.00 25.00 27.00 27.00 24.30 24.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.64 0.71 -4.01 17 271 2009-09-10 16:58:51 2007-06-21 09:30:53 4 11 117 0 173 284 0 124.10 33 17.96 CHANGED p.LplaGpps+latDsthAtAs-ssptL...hPW...Gspp.hIDRaDsRuhL....shh.......ttsp.sp..phs..ppttEphhshERYhsLhpsch....................ptthppcpppphsppptp.tshsslGFoYssst ........................L.VaGhuCKla.hDs.thAhst-psppL...lPW.......G........D..p..shhIDRaDsRutL............chh.........................ts..sp.hpho.tEptp.Ephss.ERYhsLhpspht................................................tt.pc-chpphtpt.....t..t.tthsslGasYtss.s..................................................................................... 0 60 85 130 +9583 PF09751 Es2 Nuclear protein Es2 KOGs, Finn RD, Coggill PC anon KOGs (KOG2627) Family This entry is of a family of proteins of approximately 500 residues with alternating regions of low complexity and conservation where the domain similarities are strong. Apart from a predicted coiled-coil domain, no other known functional domains have been characterised. The protein appears to be expressed in the nucleus and particularly highly in the pons sub-region of the brain. The protein is clearly necessary for normal development of the nervous system [1]. 20.20 20.20 21.30 23.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.70 0.70 -5.06 34 313 2009-01-15 18:05:59 2007-06-21 09:35:03 4 10 226 0 243 314 2 326.40 27 74.37 CHANGED cptpVLsE-sYlpsLpcIIpRDFFPcLhchpspp-Y...................L-Al-spDhphlpphp....p+hpphht..............oPsphcss...sh....st.s.....ssspsss.ssh......tt-p..t...ttptt................................phuLspF.s+YTSEDNcSFpcll-ptppKcp.cKauWlap.scttpspplt.t...............................t.p.ssppt.....lc..........................sp.st............................................................................................-tW.ph+scNsLMahPsuspss..-sltptsct.........pclhapNTRh................pt.s.shsps......uhstlpcAhttp.p.p.sch.....ssc..........oPc..........VNGY......uaVcss.pPss...............t.sPlhTaGclpsT....Phpl-s.c..s.......tsss.......................ssFpI.Ess+REpluc+hsccsu.tKppp+p..............tpshsos.....t..ss.tls.........................LoPAA.Q+L ........................................................................................................tlLcE-pYhpslppIItRDFFPsl.chpspp-.a...................L-A.pptD..hhtpht.......chtphh.................................s.ht..tt..................t.....................................................................................................................................................................................................t.thoLstF.t+YTSEDNtSFpclh-ptpp+pc.t+aualap..s...c..ttppht................................................................t..p.....ttt...........hp..............................................tt..t...........................................................................................................................psW..p.h.pscNsLMahPp.uht......p........pph....tt..p......................pl.apNTRh..........................p.....shs.p......................shstlppshthpst...tp..........s.t.............oPp..............ssGa.......shVtss..pPtP...................t.t..sP.....hh...T....WGplpsT....Phhlpstp.............s.s............................ssF.+......l.-sspREplthphspcsu.tp.ptpt.................tt..to................s..th....t................................hoPAhp+h......................................................................................... 1 103 146 204 +9584 PF09752 DUF2048 Uncharacterized conserved protein (DUF2048) KOGs, Finn RD, Coggill PC anon KOGs (KOG1551) Family The proteins in this family are conserved from plants to vertebrates. The function is unknown. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.00 0.70 -5.57 11 211 2012-10-03 11:45:05 2007-06-21 12:56:45 4 4 135 0 126 523 39 304.20 34 88.08 CHANGED o+FFs+GWGc............chlcclhs.ct.lppR.......sPsshtlclspshppcssp.....lt-GpFtSPht..hsuhLPspucpA+hphLlPpph....h...+slCl.HLAGTGDHsah+Rcph.AcPLLK-.sluullLENPaYGtR+PppQptuuL+sVSDlalMGuuhlhEupsLLpWhccc.GaG..lGloGlSMGGpMAuLsuoshPcPlu.....lVPhLuhsoAssVFTpGllppulsWchLcpp...............................................................................................................................................................................................s.t........tpppsts..pEulR.hhthh.stTslpsFssPhsPphsIhVsAccDuYV..P+pust.sLpphWPGuElRa...l-u.GHVSAhlhHpchaRpuIh-Ah-R ................................................................................................................................................................................................................................................................................h...........t................pu.F.os......thhP..s....A.hhthlh...........Ptt.......................c.hsl.pL.A.GT...............G..D.H.h.a.+Rhph..utPhlKc..thuollLE....sPa......Y....G....R.+....P.t...t.Q....h.....t.S...p...L....p...s..V...S...D.....l...h...l..h.G...t.s...h....lhEst.sLLp.Whcpp....Gau....lGhsGlSMGGh...hA...u....lsso....saP...c...P...hs........hl.P..hL....o...........s.............o.....A.ss.s....Fsp.G...l..h.......p.....uh.s....W..t....Lp..pp....................................................................................................................................................................................................................................................tp..p.p...p.....hEs.l.......h.h....h....t.hh..p.h..T..c..lts...F..s..s.Ph.c.s..sh....l......lhVtApcDuYl......P+p...ssh..pLpch.W....P.G.s.E.lR..a...l..p.u.GHloualh+...p...thFRpsIh-sht........................................................................................................ 0 47 65 99 +9585 PF09753 Use1 Membrane fusion protein Use1 KOGs, Finn RD, Coggill PC anon KOGs (KOG2678) Family This entry is of a family of proteins all approximately 300 residues in length. The proteins have a single C-terminal trans-membrane domain and a SNARE [soluble NSF (N-ethylmaleimide-sensitive fusion protein) attachment protein receptor] domain of approximately 60 residues. The SNARE domains are essential for membrane fusion and are conserved from yeasts to humans. Use1 is one of the three protein subunits that make up the SNARE complex and it is specifically required for Golgi-endoplasmic reticulum retrograde transport. 29.40 29.40 29.40 29.50 29.30 29.30 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.61 0.70 -4.94 11 304 2009-01-15 18:05:59 2007-06-21 14:54:11 4 8 223 0 200 291 0 221.70 20 82.89 CHANGED S+LElNhhRLLu+CEthAp...EcpppspWRLcKaVsuLccMlspLccp..........hsKPos-hlsEYsc+lshLKullpupch.............................soss-KshssphLusups.sthsptcssso+p.............l+.pppu+hpsEhRpELls................sssuhp.ptshhtpp...........................ts.sssccpussclDphlphHpslQEKLA--MltLARNLKppo.sApslIKpDspsLspSt+hsDpNlspLppES-RLEpHupKusp..hahWlMlhlVhhhFIsMlLFI+lh .......................................................................................................................................................................................................................................................................................................................................................................................t.thh......th..cht.lt...ht....p....................................................................s.....ptt...tp..s..t............p..t.s.t................................................tt..........sphRp.chht..........................t..................................................................................t....t..pt.ps....ttph..-t.hp....tpcphQEcLsc-hlpLAppLKps.ohs.h.ppslc...pDpphLsps...tcth-ps...h...pphppt....sp+...l....pp.h..t.p.p.s.hp........h.hhh.........h.....hl...h.h.l...hh.h.F.l.....hllhh+h......................................................................... 0 63 103 159 +9586 PF09754 PAC2 HCCA3; PAC2 family KOGs, Finn RD, Coggill PC anon KOGs (KOG3112) Family This PAC2 (Proteasome assembly chaperone) family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 247 and 307 amino acids in length. These proteins function as a chaperone for the 26S proteasome. The 26S proteasome mediates ubiquitin-dependent proteolysis in eukaryotic cells. A number of studies including very recent ones have revealed that assembly of its 20S catalytic core particle is an ordered process that involves several conserved proteasome assembly chaperones (PACs). Two heterodimeric chaperones, PAC1-PAC2 and PAC3-PAC4, promote the assembly of rings composed of seven alpha subunits [4]. 22.60 22.60 22.60 23.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.33 0.70 -4.49 126 1323 2009-01-15 18:05:59 2007-06-21 16:27:51 4 4 791 13 568 1106 677 221.70 21 77.96 CHANGED hlpGhsssGpluplAscaLlcph..chchlupl.ps.phhs.....Ph....shhpc.s.........thhhsshclYhsps..tt.....................lllltup....s.ss.hthp.......paspt.llshscchslppllsLuuhsssh.c..pps...slhshssspchh.pphp.............p.ppht.....s.sGssuhlhths.tptshsuhslhstss........................t..hs.....sPpAutsllc..sl.sch............hslpl.s..hspLtccA...p............-hpphlpp.Lpc ..................hhpGass.sGpsuphAsctLhpph............ptchlupl.cs.-thhsh.sp......pPh...hthps.s......p..........thths..shplahsps.tt.....................sllllpG.....spPs...hpac.......paspp.lhshs.c.ch..s.lppllsLuulsssss+........s+s....hslhs.hus..s...sphh..pphp..........................htchp............sss.u.hss.ll.tth.tppsh.ss.ls.hhstlP................................pY.hs........sPtAshsLlc..tl..pch.............hslpl..s...hs..sLtppApphp...............Eltphlpt.L..t.................................................................................................................................. 0 182 378 494 +9587 PF09755 DUF2046 Uncharacterized conserved protein H4 (DUF2046) KOGs, Finn RD, Coggill PC anon KOGs (KOG2129) Family This is the conserved N-terminal 350 residues of a family of proteins of unknown function possibly containing a coiled-coil domain. 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.12 0.70 -5.54 3 169 2009-09-10 16:40:10 2007-06-21 16:31:04 4 5 127 0 120 159 5 230.70 45 56.20 CHANGED A-CsSESDoDGuTscsSSouSp.........EcLpsRIcSLpQENKVLKhELDTFKLKCKuLQEENRsLRQASVoIQAKAEQEEEFISNTLLKKIQALKKEKETLAhNYE+EEEFLTNDLSRKLsQLRQEKscLEQTLEQEQEaQVNKLMRKI-KLEADTluKQToLEQLRREKVDLENTLEQEQEALVNRLWKRMDKLEAEKRhLQEKLDQPVS-PPSPRDhh..oputDTsssluSHI+uLRSEVcRLRcNLAsSEt-aTEKMpQYAcEERphREENIRLQRKLpREVERREALCRQLSESESSLEMDDERYaNE.l .....................................................................................................................................t.p......hc...phth.+h+sphltc.-.+tL+psuV.lQu+AEQE..............EEaISNoLhKK...Ips.LpKEKEsLAhpYEpEEEhLT.....NpLSR.KL...QL...ppEKs.cLEppLEpEQEh.VNKLh+....+Ic.......+....L..............cs-p..s.pp....p...............LE.......pLR+...Etl-.LENsLEpEQEhLVN+LhK+h-cLpsEKR...LQ.+...Lpp...hs...t.s..s.....t...................t......t.............h..lptEh..p..............hpt.h.........................c..........pthttpNhphpp.l..phtch.th.pt....................................................................................................................... 0 59 73 99 +9588 PF09756 DDRGK DDRGK domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3054) Family This is a family of proteins of approximately 300 residues, found in plants and vertebrates. They contain a highly conserved DDRGK motif. 23.30 23.30 23.30 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.26 0.71 -4.89 16 209 2012-10-04 14:01:12 2007-06-21 16:39:39 4 4 157 1 132 209 5 164.80 40 57.11 CHANGED tclupKcttKh.ptKpt++ppREhEctpREp+c+hptp+.tchppp.....c-pcctpcpccEcpccctcEEpcc+EpEEYp+hKupFslE-pGppptps--ptp....hlpcFIsYIcppKlV.lE-LuscFsl+Tp-sI.cRlpsLptpGplsGVlDD.RGKaIYIos-ElpuVspaIpp+GRV.ohs-LsptsNcLIs .......................................................t..hst+chtKh.ptK.pt++tpR.c....t-pt...t.REpR+.phpptc-t.ch+cc.................--cpc.pEppcEE.tp+p.t.+E....EpcccE...p.EEY.+h..KtsF...sVEE..EGhtpp.sc-ppp.....hLpcFl.pYIKppKlV.LED.LAupFtl+Tp...-sI.sRIppL.spGploGVh.DD.R.....G.KF..IYIo.-EhtuVApaI+p+.GRV.SIs-LuptSNplI............ 0 53 73 106 +9589 PF09757 Arb2 Arb2 domain Wood V, Bateman A anon Wood V Family A second fission yeast Argonaute complex (Argonaute siRNA chaperone, ARC) that contains two previously uncharacterized proteins, Arb1 and Arb2, both of which are required for histone H3 Lys9 (H3-K9) methylation, heterochromatin assembly and siRNA generation [1]. This family includes a region found in Arb2 and the Hda1 protein. 19.20 19.20 19.30 19.20 19.10 18.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.12 0.71 -4.84 43 409 2012-10-03 11:45:05 2007-06-21 17:39:30 4 19 222 9 275 398 1 154.40 20 27.75 CHANGED sPcchhp.........hphhcpshh.pppaacshtp..............................sI+ph.hpcLppc.thl....LP................hsshsp.....p.ts.Ihsosshh.pspp..lllllHsssp....lWAppss.hsssl-suosls.......................aIphu.................................tpcshullslNhsphhhp..........t.t......sshpsshpsp-hsthla-s ..................................................................................p............hphhpphh..ppthapsh..................................................................hlpph.hphLtpp...thl.....lP................................................hssh.p.....p.cs.lhh.S....shh..pspp..llllla.s.ss................Wupp.....hh..hs.....psl....ctuotls.......................alchA......................................hppsaullshN..sp.hh......................t.pt......t..........h...h....................................................................... 0 74 138 218 +9590 PF09758 FPL Uncharacterised conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2219) Family This entry represents an N-terminal region of approximately 150 residues of a family of proteins of unknown function. It contains a highly conserved FPL motif. 21.30 21.30 21.50 21.50 20.70 21.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.89 0.71 -4.43 18 233 2009-01-15 18:05:59 2007-06-22 10:31:58 4 4 148 0 162 219 5 135.00 47 16.91 CHANGED lRpIsEhlIWGDppcsp.hF-aFhEcslhsphhcllpp.p..sspslplpllQolohLlpNlppcpslaYlLSNsplNclIsapaDhp....c-EllsYYISFLKoLuh+LspsTlphFFNp+hs...sFPLhscAl+Fhsap-sMlRsusRsIlLsIh+ .................................lRsIsEllIWGDQpD.ss.lF-.........FFhE+shhshFlpIl+p..p..ssphVslQLLQTLsILhpNlppEoSLY...........YLLSNNalNslIs.....ac.FDFs.........................DEE.lhuYYISFLKoLSlKLNpcTlpFF..a.Nccss.................sFsLYsEAlKFhsH.sEoMVRhAVRTloLNVa+............................................... 1 72 100 136 +9591 PF09759 Atx10homo_assoc Spinocerebellar ataxia type 10 protein domain KOGs, Finn RD, Coggill PC anon KOGs (KOG2676) Domain This is the conserved C-terminal 100 residues of Ataxin-10. Ataxin-10 belongs to the family of armadillo repeat proteins and in solution it tends to form homotrimeric complexes, which associate via a tip-to-tip association in a horseshoe-shaped contact with the concave sides of the molecules facing each other. This domain may represent the homo-association site since that is located near the C-terminus of Ataxin-10. The protein does not contain a signal sequence for secretion or any subcellular compartment confirming its cytoplasmic localisation, specifically to the olivocerebellar region [1]. 24.70 24.70 24.70 26.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.02 0.72 -4.28 20 296 2009-01-15 18:05:59 2007-06-22 15:02:31 4 5 255 0 203 293 0 93.20 31 16.29 CHANGED uhKppllcllusLsacsp-lQ-plR-hsG.ltllLssstlD-pNPal+EaulhsIRNLhcsNscNQchlupLcspsl..scsshLpchGhclpl.ps.G+lplcsp .............................thKp.llpllusLs..acs...psQcplp..............ch..sG..l.hlLss.C.s....h.D....cp....NPa.....l+Eaulhsl+.Lh-sNtcNQchltpL.c..tpth.......spss..hLpp.hGhph....t.tp......p........................................ 0 65 112 167 +9594 PF09762 KOG2701 Coiled-coil domain-containing protein (DUF2037) KOGs, Finn RD, Coggill PC anon KOGs (KOG2701) Family This entry represents the conserved N-terminal 200 residues of a family of proteins conserved from plants to vertebrates. In Drosophila it comes from the Fidipidine gene, and is of unknown function. 25.00 25.00 28.30 36.20 21.10 19.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.98 0.71 -4.22 14 147 2009-01-15 18:05:59 2007-06-22 16:15:42 4 4 105 0 94 146 3 172.80 51 30.58 CHANGED php-Il-lLlsAGYaRARlpuLSsFDKllGGhsWsIpss.s.....aclDl-hhFpEs.......oIGpKIuloE+IlpsL.cM+CPapLcPHQIpGLDa.slaPVlpWLlK+slEsRpEpu-hl+pauhspFppcas.h.scp..-hhppcpcsspsltshpchatPpR.....hcR.tss.s.hhsc.ppscpsLhEYGp .......p.hp-Il-LLVAAGYFRARIKG...LSsFDKVVGGMTWCIosC..s......aDVDVDLL..FpENu..........................TIGQKIALoEKIVuVLP..+M..KCPHpLEPHQIQ..GhDF...IpIaPV...lQWLVKR.ulEs+cEhGDalRpauluQ...FpKpap..hspDc......-hhpcpcpshcslhslpchYpPpR....ha+R..ttss.........pph.--tsclc.sTLLEYG................................................................. 0 35 45 76 +9595 PF09763 Sec3_C Sec3; Exocyst complex component Sec3 KOGs, Finn RD, Coggill PC anon KOGs (KOG2148) Family This entry is the conserved middle and C-terminus of the Sec3 protein. Sec3 binds to the C-terminal cytoplasmic domain of GLYT1 (glycine transporter protein 1). Sec3 is the exocyst component that is closest to the plasma membrane docking site and it serves as a spatial landmark in the plasma membrane for incoming secretory vesicles. Sec3 is recruited to the sites of polarised membrane growth through its interaction with Rho1p, a small GTP-binding protein. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 701 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.38 0.70 -13.14 0.70 -6.40 24 414 2012-10-03 17:31:52 2007-06-22 16:27:39 4 6 269 0 278 545 1 511.00 21 63.19 CHANGED p-...............u-shhccLs+ELspl-tss.lpsllpp-ppssplhphl.-pulsEsDcl-shLshaphpLpslp--lshIEspspGLQlpssNpKhLhpELppLLsplslscsplpsLpp.sslppsptlcth..EsuhtsLhpAhsslp.......................shcsshsphpAlpp+cphacchsppFhcRlspahpptFp.h..p.hpt.p..............chshppatshhppLhhYusLhhahK-lstcpaptLhptYpsphpplYcp-hpphhpth+tphp.......................................................t.pspptptsh.sss..........t..pph....hshpps+phph..................ppuphh.................................tp.p..cshtthLsphpslhhhcQsFl.pFF......+hss.......................................................shsas-hlpttssppppt..phsphp.hc.........ss+ths.pplpphhstlFts..h.scl.shls.s..phcsh......s.slLhhl-ptl.phpp..s..sps.aL.phlt+lhtplpp.as+alppQlctIEc.splsh+pp.GllshlpsFs.FsptsEshhppspp..............hssh.hl-puYp+lspuhhctlpphs.....t............................................ppppphspplshl....ENhpahhcpLs.........................hph......sslpshhcpupphaccphphYhp.tlltcshs+LhpFhpuscshlps.....s.sp.uhphuaSKptl+pllssYsu+-lcpslcpLh++l-KHF..t.p...........................shpcsLlp+lWpshpppalphap+ltsllp.csYss ............................................................hp..hcth.tplppclt.hptss.l......ttl..tp.-.....h.......p.lhphl.-tshtps-p....h....-.hlp.a.......p..L..pp........hpc...........pht.Iptpsphl..phpstNphhLhpclp.llpph.p.l.st.t.ht.Ltt..ss...h............tp.p......ltth........uh.hL.tsh.........................................hp.s.th...h..u..lppppt.htt.pt.Fhp+h.paht.ha.........t.t................................ph...tp.t.hht.lh.as.Lh.....ah+th....s.ttat.lhp........Y..p.hp...l.hppphpthht.hp.tht.................................................................................................................t.......................................tttt..t........................................................................................s.................................................................t.hhtthLtp.h...hh..EptFh.pFh.....php................................................................................................................t....t.ht.........tt............................................t.htthh..lht.....h..ph.thht.h...p...ssh.....................h.hhhhhpphh.....tt.....s.........stt.al...hh..tph...thtt.apc.hl.ppp...h............pthpp....scht....h....pp..t.Gllshh..h....h.thhEthht..................................................................................lsp..Y.pl.tshh..lphhs..............................................................plhhh.....cNhthh.t.l..........................ht........sLtt.hppAp.p.hppthp.Yh....hh....ht+l.tFhpthpthh.t........ht.tpl....sh..p.shs+t.hpcl................ltt..s.pp.....l.....ccslpthhc+hpKph...........................................pptLh..lhpt.hpt.hhp.h.th.thht.thY............................................................................................................................................................................................................................................................ 0 107 168 238 +9596 PF09764 Nt_Gln_amidase WDYHV; N-terminal glutamine amidase KOGs, Finn RD, Coggill P anon KOGs (KOG3261) Family This protein is conserved from plants to humans. It represents a family of N terminal glutamine amidases. The enzyme removes the NH2 group from a Gln, at the N-terminal, rendering it a Glu. 20.30 20.30 22.00 21.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.65 0.71 -5.00 14 159 2009-01-15 18:05:59 2007-07-09 17:17:08 4 5 132 1 108 172 5 164.70 40 77.58 CHANGED hYsSpYCEENVaKLsEhl...t..psshppha....AVFIS...........N-pKplPlW+Q+uups..sss.VlWDYHVIhlp.......sspsutshVaDLDosL......PaPs......shppYlpcuhps-..tplpspa.........RRpFRVlsucpYLppFuSDRSHM+ctsGsahpPPP.aPsIpsscus......hNLss.alsMp.pss.................GtVhs.sphhphFu ................YsspYCEENlaKLCc.l.....tt..p..t.....hp...cha............sVFIS..............Ncp+hlPlWcQ+uuts......sss...VlWDYHVlhlp..........................sssspshVYDLDosL......PFPs......sappYlpcsh+s-...sl.pspa..............+RhFRVl.AspaLppFuSDRSH..M.......+....c.s..s..G...s.....WhpPPPsYPsIts.sssp.....................NLsp.aIsMs.t.s...................G.Vhs.tph.p.a.h..................................... 0 44 58 85 +9597 PF09765 WD-3 WD-repeat region KOGs, Finn RD, Coggill PC anon KOGs (KOG3268) Family This entry is of a region of approximately 100 residues containing three WD repeats and six cysteine residues possibly as three cystine-bridges. These regions are contained within the Fancl protein in humans which is the putative E3 ubiquitin ligase subunit of the FA complex (Fanconi anaemia). Eight subunits of the Fanconi anaemia gene products form a multisubunit nuclear complex which is required for mono-ubiquitination of a downstream FA protein, FANCD2. The WD repeats are required for interaction with other subunits of the FA complex. 25.00 25.00 27.80 25.70 20.20 22.40 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.74 0.70 -5.13 9 143 2009-09-11 16:53:32 2007-07-10 10:43:26 4 7 100 4 91 139 4 227.60 33 69.28 CHANGED LhcchPhLlsps.+ptshh.talusptpsa.+l+lhLPccspLcsu+lhs.hthcplhhthppsspp+lpps..L.uFl.cLcplLEstLKspsttps.....sphhosLhp-ltsltas+hshl..Dsshopl+Lpu.Dut.RpHhlTlclpuphshcss-asls.slshuhsht..puoLtshhspFlthLEsLcsFaDshc-IDEhsaVLEPppssppsosRhIslsppV.lplplcPtcPhhh.tshhLu...ss+.Vs.LRphLssslc.WDPEsslhpNLhclh-l.tFPh. .........................................................................................................lph.....................h.h......th.........t..........p....p.ht.ps..l.thh.pl..ll..h.p...p.......s..........sphhsplhp-lt.tlGWsplh.l..ssshsplchph.Dst.tRpHhlplpl....s..paP....h....psP...s.h.sshPh....hth.pas.............p..............o...sLhslhpQFhttl-.pLptFWcshD-IDcpsWVL-PppPspus.s......hRRIslus..ssSlplpl.cPtcPphlP....pshhLu...ssp...................hlp.Lt..p..hsp.sh..phWssc....pslhpNLpplLph.thP...................................... 0 33 45 69 +9598 PF09766 FimP Fms-interacting protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2216) Family This entry carries part of the crucial 144 N-terminal residues of the FmiP protein, which is essential for the binding of the protein to the cytoplasmic domain of activated Fms-molecules in M-CSF induced haematopoietic differentiation of macrophages. The C-terminus contains a putative nuclear localisation sequence and a leucine zipper which suggest further, as yet unknown, nuclear functions. The level of FMIP expression might form a threshold that determines whether cells differentiate into macrophages or into granulocytes. 26.80 26.80 27.50 26.90 26.40 25.10 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.24 0.70 -5.38 17 303 2009-01-15 18:05:59 2007-07-10 13:56:45 4 8 220 0 209 278 2 260.80 32 53.71 CHANGED lpL+chsRhshhphccsR.cpscctKppVDttpLpLpNLhYEtpHLp.......KEIp..pCh-FKopct......cl-Lls.-EFap.cAPppls......csphsttspHp.hLtRLsaELpQRKcLscphccLpppKppltps.Itp+cchL.sSLtspL+...sltpuohPlQchlshsapp...p.cppc.......hsphLPtPLYlLYsplpuhtpsp-c......tlplpIh...Gs.c-Apshtpt................pspppspssp-sccppppp++Rcpptphp.sstp...pphhc.hHP..........Lplhlclhspct...............ltLpFp........YlspLplVsVpsphsst....................-slLssLFs.......sDsGp........chP.................p.ssph.hpchsl..pphsp...t..hG+PYpWsQpLsGlpah ...........................pL+thNR.shhph+ps+.ppTtcs+pclDthcLQLQNLhYEh.HLp..............pEIs...tC.pFc.s..+ap.......................pl.LlshE.EFhp...ptPtpht.........................ttscsHphhlsRLsaELpp...Rc..cLtpphpclhppKpplhp-..pp++chL.ssLt.cLp....plh.pAu..hPlQchh...th......................................................s..h..h............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 83 122 172 +9599 PF09767 DUF2053 Predicted membrane protein (DUF2053) KOGs, Finn RD, Coggill PC anon KOGs (KOG3236) Family This entry is of the conserved N-terminal 150 residues of proteins conserved from plants to humans. The function is unknown although some annotation suggests it to be a transmembrane protein. 21.70 21.70 21.70 23.10 21.60 21.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.91 0.71 -4.24 11 161 2009-01-15 18:05:59 2007-07-10 14:12:23 4 5 123 0 102 168 2 149.60 45 65.19 CHANGED TLFHFsNChALsasPaalsYKsouLSEY.suhhpClpAussYlhTQLsKhllLA....TFhsss-s....ssashhsEhL...+t.hshlDlhGLhLllop...hssctch+llssGLGWuhA-slho+hlsLWVGARGhEFoWcYl.pul-uNh.LlpplshssL.lahhoR ...............TlaHFhNChALsa.hPaalsYKtosL.S....EY.su......hhp.Cl.pAussY.lhsQLsKhLhLA................TFass....-s..............s.a..-...hhsEhh...KsslDlhDllGLh....hlhop...hs.GKuchKlhssu..LGWAhA-hlhoRhl..P.LWVG.A.RGhEFsW+YI.hul-SNlsLV..phlshusL.lWhaoR........ 0 39 53 81 +9600 PF09768 Peptidase_M76 Ku70-bp; Peptidase M76 family KOGs, Finn RD, Coggill PC anon KOGs (KOG3314) Family This is a family of metalloproteases. Proteins in this family are also annotated as Ku70-binding proteins. 25.20 25.20 25.70 25.40 24.20 25.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.29 0.71 -4.80 29 315 2009-01-15 18:05:59 2007-07-10 15:56:33 4 8 269 0 235 318 5 167.90 37 67.56 CHANGED pppCpchhchhlphSPhVpFhhpplc+ls.......ss....hppppIhCc...................hCsst.........puGGFsP........ctGIllCpNpl...............+sctclEcsLsHELlHsaDch+hc.lDa...tNl+HpACSEIRAusLSG-C+ahpEhh+tshs...phtp.......paQcCV+RRAlhSVhuNPsCp.s.pcAccsVscVWcSCFsDTRPF-c .............................................................................pcCpt.hphhlp..s.P..h.l+ahhpt..lcp..hu..........ss........hppppltCc...................................Cssp....................huGG.F.ss....................ptsIllCpNp.h..........................................................+sptcl..ccslsHELlHAaDahRh..c.lDW....pNl+HhACoE.IRAus...LSG-CpahpE.hh.+t.tht.....lpp..........paQ...pCV++RAlhSlhu.s.ssCp.........p.tpApcsVscV...a-sCasDpcPFs....................................... 0 83 133 195 +9601 PF09769 ApoO Apolipoprotein O KOGs, Finn RD, Sammut SJ anon KOGs (KOG4798) Family Members of this family promote cholesterol efflux from macrophage cells. They are present in various lipoprotein complexes, including HDL, LDL and VLDL. The apoprotein is secreted by a microsomal triglyceride transfer protein (MTTP)-dependent mechanism, probably as a VLDL-associated protein that is subsequently transferred to HDL [1]. 23.90 23.90 23.90 24.00 23.70 23.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.57 0.71 -4.52 36 346 2009-01-15 18:05:59 2007-07-30 13:28:07 4 5 224 0 212 323 0 151.40 23 63.87 CHANGED hh-ccsstts.h.spphsh.....sspp.p.................................hsscssshLppthpphRhtltpthshsps...........thsshhsphhstccphpsshssLtsssps...lLPshhaIlluuluGsIluRpRuhhhRhhhPlshussuhshhhPpo....hcssuphhashEccthPsls .............................................................................h.sppls.lhss......sttp.c.............................................hs.psss.LpptlsphRphhtshhshsps...........ths.pshsphhsh.p.t..hpsshs...Ltsssps....hhPphshIsluuh..sG.....llu...Rp...tuhhh+hshPhshushuushhhPtp....hpssuchhaphtpp.hssh................................... 0 47 90 156 +9602 PF09770 PAT1 Topoisomerase II-associated protein PAT1 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4592) Family Members of this family are necessary for accurate chromosome transmission during cell division [1]. 25.00 25.00 25.50 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 808 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.62 0.70 -13.55 0.70 -6.30 17 499 2009-01-15 18:05:59 2007-07-30 13:30:45 4 8 240 9 328 497 1 425.60 16 81.25 CHANGED MSFFGFDoohPtcptstst.tt..p..............................h.Fp-TYcG.LG-p.pE-sDshND-TFGssh....slG+DFDFtstpuphsts.tttt.........tsutss.tstsphs..................psppsshpttpssp.hscLpshsulWu...t....................tssttspstPpssststp.lphpthpt.h..........tt..t.st.s..shs.shsst...t.h..........h.sshsttas..ss..................tph..tst.t....tthsh..PsphPs...tht............ph.st..tsst.ptt............pttsPPht...ts.P.t.sp......tt.ssh....t.p...........................................tp.hphspppchshh.cc..................t++hp+pcchhth.......tKasGLMTPpDKsFITRlQLoQlV..........o--PYsEDFYaQVap........h.ttstppsppstsphApsYL.poGpR..........tt+h+pu-sshQRMQQQVp+AVp.............ts+t+sKtsphhh....EGuLGKIShu.suKsPRp.L....shcpspssp..........tt.pp.ssphshpc.........................................t+KpILphlEslYpplhclEshpRshs..................tth..hptchps.sppLWpsL+l....ps.ssss.p...........spPFIuhLSasKGhKllPRlFpalscEQcl.TllohIhspLspLsVlhpu...ss....Phhsh.........pcth-..hFptslhsslhsalsps.sat.lhuLLshllp.psslshlupo+IGLullThLlSRAEll+ps....u.ss...........upp-hppWsphashLFssL..s..lsslFPs.............s.ststptYl.....................WQFL..Aululuup.ppQphlV.tV+DclhtTlspuKpl.s.......................hhtppplpNlNLFLpshGLssc ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ssLGpl..s.s...tP+..l.......ph......t............................................................................................................t..hh...........lEthh..lhph-t.....p........................................................tth..thth.......................................................................................hhthhth.KGhhhh.Rhh..l.........t....t.h...hh.h.hh.th.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 84 140 233 +9603 PF09771 Tmemb_18A Transmemb_18; Tmem18A; Transmembrane protein 188 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4606) Family The function of this family of transmembrane proteins has not, as yet, been determined. 25.00 25.00 25.60 25.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.56 0.71 -4.15 5 136 2009-01-15 18:05:59 2007-07-30 13:31:49 4 4 93 0 68 120 0 113.90 55 84.03 CHANGED hEPS.ACEDLKAFERRLTEVIouLpPoThRWRIlLslhSlhTuluAapWLsDP....cTppVPhh-S.LWsHPhFTlSslsLllLF.lhGIHK+VVAPoIIAuRCRoVLAEFNMSCD-TGKLILKPRPpNssp ..................t......sE....DLKAFERRLTEhlpslpPuTtRWR....hlLlllSlCTAhGA.WpW..LhDP..................cTppV.sh..hpS.LWsHPhFTlSsl...TLlsLF.hhGIHK.RVVAPSIIsuRsRsVLu-aNMSCD.-.oGKLILKPR.....s................ 0 23 28 50 +9604 PF09772 Tmem26 Transmemb_26; Transmembrane protein 26 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4610) Family The function of this family of transmembrane proteins has not, as yet, been determined. 20.20 20.20 20.70 20.60 19.30 20.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.02 0.70 -5.19 10 150 2009-01-15 18:05:59 2007-07-30 13:32:27 4 2 77 0 112 137 0 235.10 35 74.38 CHANGED llshLpAllTRlLFhlHulVuVWpVshlK.c-shYWhLslsllLLslEslhTlhh+KGcEaKWFsPSlFLYLsoIlPulWlLElchlpp+.............................sshucshcs.tpLhusluls...............httlsscsWstsLEQsLlLlLIlGRWLLP+Gc.lTRDQLSQLLLsYVGsAADIlEFh.-olKEspVt.....sNstlVhulLslWoWShhQFsLVLosTtsptscsusptcst.p........shhsphssDlWuIhlslllQDGPFLllRLlLhshacVIspMhlFFTsKNsLVllLQLYR .......................................................................hhpAlhoRhlFhhHu.hlslWpVs.h......p.pp......haWhL....hh..hhL.hhEhhhTlhh+.cspt.....a......+WFsPulhhYL.sllPulWlLEhc..pph............................................................................s..p.p.t.......h...t.t..tht................................................lstpsWhhhlcQhhllhLIlGRWLLPh.Gs..lTRDQLSQLLLhalGsAADIlEFh.-ohcpp..pl..t.....pp.hllhhhLslWoWShlQFslsLssp..thh.....s.ht.ttt.............................s.p....hsh-lWslhhslhlQDuPFLhhRLhlhh.aplhs.h.lFFssKNhLllhLphYR................................................................... 0 52 61 89 +9605 PF09773 Meckelin Meckelin (Transmembrane protein 67) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4611) Family Members of this family are thought to be related to the ciliary basal body. Defects result in Meckel syndrome type 3, [MIM:607361], an autosomal recessive disorder characterised by a combination of renal cysts and variably associated features including developmental anomalies of the central nervous system (typically encephalocele), hepatic ductal dysplasia and cysts, and polydactyly. Joubert syndrome type 6 [MIM:610688] is also a manifestation of certain mutations; it is an autosomal recessive congenital malformation of the cerebellar vermis and brainstem with abnormalities of axonal decussation (crossing in the brain) affecting the corticospinal tract and superior cerebellar peduncles. Individuals with Joubert syndrome have motor and behavioral abnormalities, including an inability to walk due to severe clumsiness and 'mirror' movements, and cognitive and behavioural disturbances [1][2]. 25.00 25.00 27.70 25.10 22.10 22.20 hmmbuild -o /dev/null HMM SEED 853 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.53 0.70 -13.40 0.70 -6.77 7 200 2009-01-15 18:05:59 2007-07-30 13:33:13 4 7 106 0 140 197 7 504.90 26 83.21 CHANGED P.uhtspsspC.ssshhshhhssuCs.splhpuh.hshssssssshps.ssssh...s.hs.phsu..hhph..suAs.phhhushoACphLuNhCVh.hashsu...ssCtLapplhpst.t.........hhpthPhLaYucssshhp.l.sshshshphshttp.....LphlsusYDlcGshlthpslt.p.lpLCspssschpshashGsshphsCplshpcLl...tptspshFa-lalp..tsspchh.hslsh.hpshphpstphppsp.........................uh.phhhpRRhaLhDslsts+cts.p..........ppPphlpsspplplsh.lsspsppcplhsPllhlpYush.h....................sssphsshohulp...aphspssh..lhh.lshslhssLshhsuhhRThsWhRRpts.....hlshtslh+FhlahsscluNhFhhhshhsuhYhhlhaKhQpss.hhh.....spp.hhhpshlhsAhAhKulthLhclhpQsshDhFhIDWERs+uphhtppch.................sPVShWRohFVANEhNcLQslRphsPLhphhhlLFFL.sLsa.phu.psP.sott.sh.shs.s...hLRhuLsohhalhluLl.hlhchthahRFh..pPlppFVDLCSlSNISlhlLs-ppaGYYIHGcSlHuHuDssME-hppNLphEups.hs.RGLsspo...csQTaplhhs.phRpah.hhh.p.ptcpppu.h+t.ts.t...........hshppp.psYsshshhlpshIscsh+.shchh..h.hsK.hhcphLshtPs.hhhps.tp................sthslFahD-shuaupshhhG.-hsLhlh.hhlasslDluspNhhlAhslsaslphlh+ahRhp.GhtNlSpKTLID-RFhI ..............................................................h..............................................................................................................................................................................................................................................................................................................t.h........................C.................................................th.h.........................................................................................h.h..........................................................h.+Rh.hhpt..........................h....ph.h..........................s.h.h.h.............................................................h..............................................h.h........................................................h....t..s.h.h............................................h..hh......h..ht.h.hh..hh.....phFhlDWEp.c........................................................................lshWR.hhhANta.clp.hp.hs..hphhhhhhhh.hhth............................................................h..h....hah.h.hh.hhh...hh......p.h.tpFhDlsshuNlSlh.h.p..aGaYlHGcu.hshu-ssh..........ph...h..........tts....RGL..tt........p.psa.h.....hp..h..........................................................................................hh.thh...................................hth........................................................................................................................................................................... 0 74 84 118 +9606 PF09774 Cid2 Caffeine-induced death protein 2 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4613) Family Members of this family of proteins mediate the disruption of the DNA replication checkpoint (S-M checkpoint) mechanism caused by caffeine. 25.00 25.00 26.80 26.70 20.50 24.50 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.03 0.71 -4.11 22 228 2009-01-15 18:05:59 2007-07-30 13:33:41 4 3 202 0 153 219 0 140.50 30 83.25 CHANGED Chs.stlcshLRh.Rp.lDDpIpppLNshhsssp............t.p.................t.ttCpphhcp.LhsuWpsRscllpaCtshusp.c.cssp............tpssptpcpthspRlDPYus+thpcE.pt.....psltphlpsEcsVEpIIRpRThplls-+Cth..pshp ........................Chs.tthp-hL+hhRs.lDDpIhppLNshhssup...hshc.............................sssppCcphhcp.LhsuapsRsclIphChs.ss..................................tpstp.+-ch.cpp.Dshsh+thtcE......................psh.hphlpsEhsVEpIl..psRohclhp-RCphp.p...................................... 0 44 74 119 +9607 PF09775 Keratin_assoc Keratinocyte-associated protein 2 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4615) Family Members of this family comprise various keratinocyte-associated proteins. Their exact function has not, as yet, been determined. 23.50 23.50 24.20 40.00 21.90 23.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.61 0.71 -4.60 8 113 2009-01-15 18:05:59 2007-07-30 13:34:05 4 2 96 0 73 121 1 122.90 45 90.56 CHANGED MAVsouTShsLSolLhhLlFusMQMY+sQLASSphhTIhGGFLGSLLFlh.LTAluNlEsllhG+GFQsKlhPEVVlshllALhAuGhVHRVClTTCLIFSlsuLYYlNKIS.phat...ssslstssspKpR ...............................uoGsShhLSuLLulllFushQMYpppLASoEhhT..IhGGhLGShLFlh.LTAhsNlEphlFGpGFQsKlhPElllsLhlALhAuGhlHRVClTTChlFShsuLYalNKISpphaps...ss.s.sh..ttt..tt....................... 0 22 34 52 +9608 PF09776 Mitoc_L55 Mitochondrial ribosomal protein L55 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4616) Family Members of this family are involved in mitochondrial biogenesis and G2/M phase cell cycle progression. They form a component of the mitochondrial ribosome large subunit (39S) which comprises a 16S rRNA and about 50 distinct proteins. 25.00 25.00 29.90 28.70 23.00 22.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.31 0.71 -4.50 8 98 2009-01-15 18:05:59 2007-07-30 13:34:58 4 3 79 0 60 104 0 110.20 39 80.83 CHANGED hLplLppsshpuss..s.p.hhsoshRssusRASloRl+RpsYuRLYPlhLVpPDGSTIpIRY+EPR+llphPlDL-sLSPEERRARlcKR+P..+pKlchp-El-DsFDsc+YhpFh.....+K .................................h....................phhhsshp.hsS.s+.AuloRl+RpsYsRh.YPslLVpsDGSTIpI.RY+EPR+llt...hPlDLssLS.EE....R+sRLc+Rcs..p..p.K.hc.h..p.p..El..pDsFcsc+Yhpahp+........... 0 18 22 41 +9609 PF09777 OSTMP1 Osteopetrosis-associated transmembrane protein 1 precursor KOGs, Finn RD, Sammut SJ anon KOGs (KOG4617) Family Members of this family of proteins are required for osteoclast and melanocyte maturation and function. Mutations give rise to autosomal recessive osteopetrosis [MIM:259700]; also called autosomal recessive Albers-Schonberg disease. 18.90 18.90 19.80 19.60 18.40 17.50 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.19 0.70 -5.20 10 112 2009-01-15 18:05:59 2007-07-30 13:35:37 4 3 86 0 64 112 2 211.00 35 74.45 CHANGED CpcLLtpFusspuchssChshpuhPV.+LCps..ChstYcsLpsh......YsNlpus...........................stpCucslLsSDRlplVsTlpshLss.lWppANC-sCls........pt..shsNcTtpFhshhsphhsChcp..p...........Nto-lCcsCKssYpcLNchYt+l-Kh.........ss..clClDlEDuMNpTRpLWS+TaNCs..Cp-sVs....lIAVuuhlLhLPllFYloSalpocpKc........R+LIhssRhpSssutsslp ....................C.thl.phupttuchhtChspt.uhPs.....plCps..Ch..atphhph......h..ssltps........................pstsCsc.lh..sD+hplVshlp.phhss.hWppAsCssCls.......................................ppt.t.....hoNsThhFhsh..hsphhsChpp...............p................s...............................Nh....oclCpsCcpsYpsLsshYp.chp+.hs.................ss..plClDlEDsMNhTRpLWS+sasCs..pscsVs.....lIAVushlL.hLPllFYloSalpocp+c..........RcLlhspphps.................................................. 0 23 27 46 +9610 PF09778 Guanylate_cyc_2 Guanylylate cyclase KOGs, Finn RD, Sammut SJ anon KOGs (KOG4621) Family Members of this family of proteins catalyse the conversion of guanosine triphosphate (GTP) to 3',5'-cyclic guanosine monophosphate (cGMP) and pyrophosphate. 28.10 28.10 28.40 30.60 26.60 28.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.40 0.70 -5.15 10 169 2012-10-10 12:56:15 2007-07-30 13:36:51 4 4 106 0 89 157 9 174.60 40 78.21 CHANGED VPHlpQtYsWDCGLAClLMVLctlshsspp..t-FpclCpc.thTpSlWTIDLAYLL++FuVcHpYaTpTlGANPsapscoFYK................cphssD.sRVspLFpcAcssGlsVcpRSVohpEIppHLtsGp.lAIlLVsAslLsC-lC....Khsh..shsptsasppscYpGHYVVlCGYDpssscFhYRNPAsSD.+lC....psShcsLEcARKSaGTDEDILLIa ..............................lPhlpQhapWDCGLACshMVL....phh.t..p.t.....phpph.hp......opS.lWTlDLAYLhp......+a............uV..p.apahT.TLG.ss.sYpspoFY+................cphsp-ppRVspLFtpAps.st....l..l.ppp.o....lohp-l..hLhput..hsIsLVstshLp..................p..h......h.....t.hst...p......ssYtGHalVl...pGYstss...tp................h.hpsPu.sc..php....................phs.psh-pARpuaGTDEDllhl........................ 0 32 49 66 +9611 PF09779 Ima1_N DUF2349; Ima1 N-terminal domain KOGs, Finn RD, Sammut SJ, Eberhardt R anon KOGs (KOG4623) Family This domain occurs at the N-terminus of the Schizosaccharomyces pombe inner nuclear membrane protein, Ima1. Ima1 interacts with other inner nuclear membrane proteins [1-2]. 21.30 21.30 21.30 26.70 21.00 20.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.19 0.71 -3.66 12 204 2009-01-15 18:05:59 2007-07-30 13:37:26 4 4 167 0 144 192 0 120.30 35 19.20 CHANGED lsCaaCspcot.shpst.p.WpC.pCEthNhhsEpG-..pD...Psttpsp.....pshtsssp.ss.p...........sspshFCspC.cNQplhhptLApa..hPss-cspYttY-cch.taR+pLEcpYPplCspCEs+V ....................................lsCaaCspp......o.hhsh..t..spsp.apCspC-thNhhpcsG-..ps...Psth.pph.sp.hs.ps.....s...s...tsssp..tt...................ssspslhCpp..CpcsQplhhppLAs..a..h.P.c.s.-s....................................pa-cclpsY++pLEppY..plCpsCpstV............. 0 46 68 110 +9613 PF09781 NDUF_B5 NADH:ubiquinone oxidoreductase, NDUFB5/SGDH subunit KOGs, Finn RD, Sammut SJ anon KOGs (KOG4632) Family Members of this family mediate the transfer of electrons from NADH to the respiratory chain. The immediate electron acceptor for the enzyme is believed to be ubiquinone, the reaction that occurs being: NADH + ubiquinone = NAD(+) + ubiquinol [1][2]. 25.00 25.00 51.20 40.70 19.60 18.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.26 0.71 -5.01 10 143 2009-01-15 18:05:59 2007-07-30 13:39:22 4 2 98 0 69 136 0 164.40 44 97.25 CHANGED MAuMSlL.puuAuhsApLsslhpussttsslppslsts+ss.ush.auGsHG++hFsIpPS+FYD+RFLcLL+FYlhLTuIPVushITaVNVFIGpAELAEIPEGYhPEHWEYYKHPITRWIAR.laDSPpK-YEKhLAhlphEsEKA-hRhhEtEVR+hM+ERGDGPWYaYcT.......l-KEhlDcu.KATPDs ................................................................................h..........th....p....t......h..s...tu.HG++hFhI+P.Sp.a..c+FhcLh+FYlh.LssIPVshhlohlNlFl.GpAELAEI.P................EGYhPcH.WEYaKHPIoRWIARhhasSPpcpYE+thAhlphEsEKAcl...RhhEhcVR+hMppRsDh.haaYps........lsKphhDp..c........................................ 0 19 25 47 +9614 PF09782 NDUF_B6 NADH:ubiquinone oxidoreductase, NDUFB6/B17 subunit KOGs, Finn RD, Sammut SJ anon KOGs (KOG4633) Family Members of this family mediate the transfer of electrons from NADH to the respiratory chain. The immediate electron acceptor for the enzyme is believed to be ubiquinone, the reaction that occurs being: NADH + ubiquinone = NAD(+) + ubiquinol [1]. 25.00 25.00 31.50 28.10 21.50 21.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.03 0.71 -4.37 11 120 2009-09-11 15:31:47 2007-07-30 13:40:18 4 5 86 0 65 126 0 126.40 38 83.66 CHANGED hoGhpP...............DE+lRLptL....RphR+pWLKDQELSs+EPVlsP....cphsPIc+Fapp.L-p......pssathhhhpsYRhslhplshlLlshahsHYYhKYcss......ppsatllppKsplhPG.......sslhEpG.s.Ps.t-assppa .................hsGhpP...............-E+lRl.pt.l....RphR+pWLKDQELSs+EPVlsP.......pths.Plc....+Fapt.Lc..p.........pssat...h...hh.htshptshhthhhhLlssahhaYYhKYphs......................ppsa.tllpp+.tlhPG.......spl.-pu....s..t-h........................ 0 19 24 49 +9615 PF09783 Vac_ImportDeg Vacuolar import and degradation protein COGs, Finn RD, Sammut SJ anon COGs (COG5073) Family Members of this family are involved in the negative regulation of gluconeogenesis. They are required for both proteosome-dependent and vacuolar catabolite degradation of fructose-1,6-bisphosphatase (FBPase), where they probably regulate FBPase targeting from the FBPase-containing vesicles to the vacuole [1][2]. 20.70 20.70 21.50 24.10 20.50 20.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.34 0.71 -4.78 20 333 2009-01-15 18:05:59 2007-07-30 13:41:13 4 8 229 0 222 328 0 167.60 39 52.54 CHANGED saLcsGspFpGpQpS.............ppppYpVcVpIcsVDht.........psaLsGaLpIpsLTs.paP....plTTaFEGEIIs.....................scasFhTp.......cWsAsp.........csDlpHWt+FPuF.+slsptttp.......................................tphpht-h.spcalFMRWK.EpFLVPD........tpl.cslsGASa-GFYYIsasp.....ssGsIpGaYYHts.uEpF..QpLpLpss.p.c ...........................hLhsGtpFtGpQpS.............ctptapVpVpl...ppVDht......................puaLsGaLcI.....pGLT-..caP.................slTTaFEGEIIs.............................pcas..FhTp.......pWsAsp..................................csDhpHWs+F..uF.pt.htpph.p.p.............................................................................................sshp....hpchh..sp.palFM..........R....WK..E.p.F..LVPD........................ppl.+slsGASasGFYYICapp........................ssGslpG.hYYa..p.Scha..QpLpLp.s.sp........................ 0 65 115 176 +9616 PF09784 L31 Mitochondrial ribosomal protein L31 Mistry J, Wood V anon Pfam-B_24102 (release 21.0) Family This is a family of mitochondrial ribosomal proteins. L31 is essential for mitochondrial function in yeast [2]. 20.50 20.50 20.50 31.50 19.30 20.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.54 0.72 -3.95 10 125 2009-01-15 18:05:59 2007-07-30 13:41:43 4 3 122 0 100 120 0 106.30 53 91.26 CHANGED TpPlhGGLLWKlPWRhSssQKtRQRcRLRuVD-VlcsLs................puLp.h+.sp..spppl.+hlsp...hPpEppMSPKDK.......................YTsFsKKs.....+GYRKGIHKVPKWT+lShRcNPptF ............Tssl.GGLLWKI..P..WRLSshQKsRQRcRLRsVDpVlcsls.......................pAL.t...+p....Gt......stcslpRh..htc...hP+EpEMhPKDK.......................YThFD+Kp.....KpYRKGIH...........KlPKWT+lStRhNP.GF.............. 0 26 57 86 +9617 PF09785 Prp31_C Prp31 C terminal domain Mistry J, Wood V anon Pfam-B_7665 (release 21.0) Family This is the C terminal domain of the pre-mRNA processing factor Prp31. Prp31 is required for U4/U6.U5 tri-snRNP formation [2]. In humans this protein has been linked to autosomal dominant retinitis pigmentosa [2][3]. 25.20 25.20 28.20 30.90 24.80 25.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.74 0.71 -3.69 28 315 2009-01-15 18:05:59 2007-07-30 13:43:11 4 7 273 0 230 308 5 129.70 42 25.27 CHANGED ss+hsKsLPhPp-.pspKKRGGRRhRKhKE+auhTEhRKhtNRMpFG....................ppE-shh..hspshGlGMlupssst.........+lR.....htphss+.........sps+hoKph...........................pppLpp.pstss..............GhsSSlsFTPhQGlEllsP .....PsKtsKsLPsPt-.ts.+K..KRGGRRhRKhKE+.huhTElRKttNRMsFG...................c.E--sht..t-hshGlGhlGpsssG.........RlR................ts.plsp+.........o+A+lSKph...............................................pppLpt...pshtssshs..............................uGhuSSlAFTPlQGlEllsP.................................................................. 0 78 130 191 +9618 PF09786 CytochromB561_N Cytochrome B561, N terminal KOGs, Finn RD, Sammut SJ anon KOGs (KOG4670) Family Members of this family are found in the N terminal region of cytochrome B561, as well as in various other putative uncharacterised proteins. 27.30 27.30 29.60 27.60 25.60 24.20 hmmbuild -o /dev/null HMM SEED 580 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.78 0.70 -6.06 9 196 2009-01-15 18:05:59 2007-07-30 13:43:26 4 6 121 0 107 172 0 407.90 32 88.01 CHANGED pss...........psPhlspslshphpptppphhLhhsllsl....ulluhlhh-hshpshhsahsl...shahhthslsulluLsslhs......aspaF+hlh..................sp-plshostQppLLu.lcspspt....ssupsspp.stspsphPsssSss.lph.p.shstSstpStSsu.hhosssssthps..ps......................p..u..s...s.uasosl.s.psSsstspht......SPhuhpp.sspcDhhT-p+hL-paLpphcc...phppussspsos..p.t...sousohhstupsssshupslhpp.hph...Ssuss.s+pchshssKchpu.......h-su.Esht+lu.....hsplppapupL..RtWlSpTlLpPLVpcIcoscpph+ppus...sslpIGplu................................lcpL+psA....................t.phps.hhPhLPhlh.aLDshoNQc.......................YLVpRIKELAcGoClssY+WsuGushpGc...........cWspcLPTDSsllhaLFCsYLD...............oQLsspPh.sG....scsFss+YlllsssKPsstp.......stAhslhlss.sP..PpFshla.D++la....sshpsRsNLFcsll.FlahlKscpsGhlcslNLGpSulNILsll-s ...................................................................................................................................................ss.............ltt.h.....t......lhhh...h......hsh..h.c........h.t......h.....h.h..hh..hhsh.hlh.......h.thhh.hh........................t...h.ho..p..hLt.h.............................................h........t.s...t..........................................................................................t................s......................ss....t.....tpth...t-.t.L.phhtt.pp.................p....................ss.hs.........st.ht..h.hh.hph...u.s.....p...pp.............s.cp................ts.-sh.t+ls......hsphppahsph...R.............Wlspsll..Llpclpphst.hpphsh.....pl..pl.G.psu.....................................................................................lppL+thh.............................s.lshl..h..L-hhspp.p.......................YlhpRl+-LupGsChs.sacWstGu...shpsc.........................pWs.pcL.PTDutllhHlFCsYLD....................................upL...sp.P.h..s.........s+sFs.spahh.ps.sc.sshhp...................pp.shh..lh.ss..s..P...P+apllh....p..c...clh.........ph.pGRsNhFcsllhFlahl+ppptGhlttl...slG.uulNlh.lh............................................................................................. 0 39 53 85 +9619 PF09787 Golgin_A5 Golgin subfamily A member 5 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4677) Family Members of this family of proteins are involved in maintaining Golgi structure. They stimulate the formation of Golgi stacks and ribbons, and are involved in intra-Golgi retrograde transport. Two main interactions have been characterised: one with RAB1A that has been activated by GTP-binding and another with isoform CASP of CUTL1 [1]. 30.00 30.00 30.20 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.82 0.70 -5.98 16 253 2009-01-15 18:05:59 2007-07-30 13:43:51 4 5 110 0 149 263 0 354.90 25 75.37 CHANGED -N..................ElspLsQstAollptpptsp-.p............p.s...s....................................psNpusucspKhsscp.........sppssoohpLsutscshspssusps.......pLAslKltLpEhstElcph+ppL-sh.ppcsp.hpscpcsppLppttl......coLp-+Lp-t-sslppcppshpptphtFlc+lschEh.hppLt.t.s.A.R+hscchpchs-hppplclh+ttscspctELhcY+.+Ap+hLQsK-KhIspLKptshhpuhp.t.ss.............hEL-ph+cEppp.p-ElppLptQIpp.h.EhpDhcsctsupscph+cpspclppthtsphoo....-s-.thhppEhtahcEshtpppsshpsRlp-R....psEhQplRspLos+s.psSu.s-lEsRLpsLTpoLlp+QshLE...pLosEKNuLslQhERlpp.L+t.....t.pssssopl.hphls.s-Ds+tR.lPlhhppssh-l.thht+h++AhpsIDshuIRlGhFLRRYPhsRl.lIlYhAlLH.....................hW ..................................................................................................................................................................................................................................................................................................................................................................................t.ht.............p...t..................p..h.................ht.........htt.ptt.hpt.h..............p.h...p.pt....psp.pl.ttt.............................poLpc+hp.h.cttlthp.ptphh.h.....aht+h.......p..h.c....hp.Lt.t.....A.....p.p.hhc..hp....chs-.....p..lchh+h.hcptc.tL.pY+...tph.lps.-ph.hp.LKp.sh................................................h...........tp-.th.p..-phphh.hQ..l.....ph.pphp....p..t.t......p..pp..pphp......htt.........h..pph..hp.pt..p.tsshp.php.+.........................-hphhhp..tlss...ps..p.p...s...p..-......l-........thhpp.hs.............................ls.h.p+pth.hthc+l.p.hp...........ttpts..s..p..h...th....p..s-..s....sc.p..h....shhhp..p.h.....sh...................slDp.h.........R..hsR..llhYh.hlp................................................................................................................................. 0 48 60 105 +9620 PF09788 Tmemb_55A Transmemb_55A; Tmem55A; Transmembrane protein 55A KOGs, Finn RD, Sammut SJ anon KOGs (KOG4684) Family Members of this family catalyse the hydrolysis of the 4-position phosphate of phosphatidylinositol 4,5-bisphosphate, in the reaction: 1-phosphatidyl-myo-inositol 4,5-bisphosphate + H(2)O = 1-phosphatidyl-1D-myo-inositol 5-phosphate + phosphate. 30.00 30.00 30.20 30.10 28.60 27.00 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.16 0.70 -5.25 7 173 2009-01-15 18:05:59 2007-07-30 13:45:18 4 3 87 0 106 163 0 224.80 49 91.93 CHANGED M.ADs..ERSPLLScspcGs.....s..u.sst.ht......sstP...tshsPhs....ss.h.uE.PPPYoshsSP-ouosPslsCRVCQSlIsl-GKhHQHVVKCslCNEATPIKNsPsGKKYVRCPCNCLLICKsTSQRIACPRP.CKRIINLGPV............p.uPsoPss..QPtGsRVhCGHCusTFLWsEhpspTL......................ARCPHCRKVSSlGptaPR+RslhhhllsllhllsusGLh...........................sGThphAppatGlYsuWshhllLsllsLsRuhYWhsh+lS ...............................................................t...........................................................................................tt.s...............sE.PPPYs.s.........tS.sss..u...uh..PhlsCRVCQuhIsl-GKh+........QHVVKCshCNEATPIKNsPsGKKYVRCPCNCLLICKsoSpRIuCPRP.C+RIINLuPs..........................p.tPhpPt....pP.t.usR.VhCGHCpsTFLas.php.p....o..L.......................A+CP.H..C+.Kl...............SSl....G.p.tasR+RshhhhlluhlhhhhuhG...Ls...........................hGThph.Appat.u.hYsuWshh..hL..lullhLhRuhYahsh+VS............................................................... 0 28 37 70 +9621 PF09789 DUF2353 Uncharacterized coiled-coil protein (DUF2353) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4687) Family Members of this family of uncharacterised proteins have no known function. 30.00 30.00 30.00 30.90 29.50 28.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.23 0.70 -5.17 4 117 2009-01-15 18:05:59 2007-07-30 13:45:48 4 2 80 0 70 102 0 264.50 45 66.32 CHANGED +pKLpSKs-ALhILtp-LEpsppERDtaKhhscpLp.chpshK+p.pEhph.shttGc..h..tp.............................+ppsLupLlpcsR-cNppLss-hp-L+pphtElptDhclLRpslsc.csuhpthsspcph...........cp+pcLlppL...E+h+cKsptLEpDl+SlhDEKp-VshERDtappKupRLNsELsalLsuDpp..Rll.DlDuLlhENRY.+p+lspLcEEhphh+tslsKYKshhEs.Kpppshl....KsG...ssspsuVhutKQV+-LLtSctsc...thslpstohS-L+uLssuLL-sls-KshALtHQ+psNKlLGsRlsELEpKltsL .........................................................M.AppLp.RapsLK+p.p-h........ts.......p.............................ppssLupLLp-op-cN+pLspElcpLpQRLsElQGDsK.....LLRhTlA+p..+lsc..p.pl.u.s+..phs...................tHERE-LVpQL......E+h+cphcpLcaDLpuslDEhp-lppERssYpsKscRL...NpEL...salL......uGccs............RIl.DlDALhhENR...YLpERlpplpEElpLhKpslsKYKs.hL-t.K...pp..Ks.h..........................K.t...soshssV.LSsKQV.....pplL.Spt.t...sLPhpspoloDLKSLssALLEslp-KNhslpHQ+pTNK.ILus+lsELEp+lptL................... 0 19 25 50 +9622 PF09790 Hyccin Hyccin KOGs, Finn RD, Coggill PC anon KOGs (KOG4688) Family Members of this family of proteins may have a role in the beta-catenin-Tcf/Lef signaling pathway, as well as in the process of myelination of the central and peripheral nervous system. Defects in Hyccin are the cause of hypomyelination with congenital cataracts [MIM:610532]. This disorder is characterised by congenital cataracts, progressive neurologic impairment, and diffuse myelin deficiency. Affected individuals experience progressive pyramidal and cerebellar dysfunction, muscle weakness and wasting prevailing in the lower limbs [1][2]. 25.00 25.00 26.80 25.20 23.00 23.70 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.93 0.70 -5.57 12 234 2009-01-15 18:05:59 2007-07-30 13:46:59 4 4 110 0 141 211 0 266.40 36 61.63 CHANGED spsphsshAssLppctulssAlapllpc..stu-Ll-PlC+QLa-hYpSsE.pLphFsLQFlP.LlahYLptssucc.p...usushEAlLLulYNhEl......sccGsuKllohpIPsLSpPSlYHEPps...hsho-suhtpp......................sh.+sVhSush.pp-slpAQNRhcllshLLhsYNupls.MPtsShhp.lCphsuplsspGa..........................tpt.h.......Rl.lsspFhlp.hpuhaaAhhNG.hshu.psl-slh.RAphEhhscslLluNuhctSL.tuu.spsc-Gphsl.hElp.ss.R....IsppslTuhSlRs++hpcc ..............................................s..sph.phAtsLhpctsl.h..s.ulapslpp..............sts...cLl.-PlC+QLa-hYR.S...u-...pLpp..FsLQFLPtLhasYLthssucshp.....................Ssu...slEAlLLulYN...hElh........................cccG..p..s...KsloFplPoLSp..PS.lYH.E...Ppsh.....hshT-sshtpc......................sh.+s..Vh.Sssh..pp-thsApN.RhcllshLhhsYNu.tlshMPssSh.s.lCphso...pl.sspG...a................................................................................................................tpp.h...t.....t.ps.Rl..ls.s...tFhlphlpu.l.aa.Ah...a.N.Gthp...hu.psl--l..hhRAphEhhspslL..Vs..NAhcsSL...s.s..tp.sp-.G..t.hslps.t...ls.ss.+....lsps..slTshSh+t++h.cc................................................................................... 0 34 58 95 +9623 PF09791 Oxidored-like Oxidoreductase-like protein, N-terminal KOGs, Finn RD, Sammut SJ anon KOGs (KOG4690) Family Members of this family are found in the N terminal region of various oxidoreductase like proteins. Their exact function is, as yet, unknown. 20.40 20.40 20.50 20.40 20.00 20.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -8.70 0.72 -4.52 26 419 2009-01-15 18:05:59 2007-07-30 13:47:32 4 11 343 0 263 401 6 45.70 35 23.27 CHANGED +sIAGVsVPs+Pp..EPDNCCMSGClsCVW-hYpDDlc-Wsp+pcpApp+ ..............................P..pP....-..P..s..s..CChSG..C.s.s.CVa-hYt--L.pcaptthtth...t............ 0 75 132 207 +9624 PF09792 But2 DUF2295; Ubiquitin 3 binding protein But2 C-terminal domain Mistry J, Wood V anon Pfam-B_45554 (release 21.0) Domain This family is of proteins conserved in yeasts. It binds to Uba3 and is involved in the NEDD8 signalling pathway [1]. This family represents a presumed C-terminal domain. 24.20 24.20 25.00 24.30 23.80 23.50 hmmbuild --amino -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.79 0.71 -4.43 24 100 2009-09-11 16:53:07 2007-07-30 13:48:38 4 4 66 0 84 103 0 143.10 32 44.30 CHANGED apFPHLIlPlcSouPspuhGTsasGpVoss......lSoIFNFDlPsu....sucoCoLsFhFPp.cth.susasFsGsGphsFspL...susssssTTasNsPshtpcluphsloP..GssYslso.FsC..PuGp..sluaEMssuGs.TpLsa......FpDasPs ..........................hpaPHLIlPlssssPspAhGTsasu....pVoss..........lSoIFNFDlPsu....sucsCoLsFhFPppp....th.ts..ssasFsG.......s...Gp.....lsFspL........suss.sssT.T.asN.tP.....ss..ppc.h...Gs..h.slsP..Gpuasls......o..FsC...PsGp..sluachsssGs..TpLpaFpshs.................................................... 0 26 45 69 +9625 PF09793 AD Anticodon-binding domain KOGs, Finn RD, Coggill PC anon KOGs (KOG4401) Domain This domain of approximately 100 residues is conserved from plants to humans. It is frequently found in association with Lsm domain-containing proteins. It is an anticodon-binding domain of a prolyl-tRNA synthetase, whose PDB structure is available under the identifier 1h4q. 21.70 21.70 25.10 22.70 20.30 18.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.01 0.72 -4.14 29 270 2009-01-15 18:05:59 2007-07-30 14:02:34 4 6 225 0 189 258 2 89.20 31 42.44 CHANGED sssslslpplppRhppslppt.........pppttphGtuVo.-uQplF-slt+T..hstspWsup....sIlVh-.-VpIssP.Ypsss.sp......sssspuhs...pVpKl ................................................lslpplppRtcp..slcpt.............pppttthu.s.GVS.E..uQplFcslpKT...hs....s+Wpsp....sIlVhc....cVhIssP.Yps-s..sp..........ussssu.ls....+V+Kl......................... 0 63 99 148 +9626 PF09794 Avl9 Transport protein Avl9 Mistry J, Wood V anon Pfam-B_12001 (release 21.0) Family Avl9 is a protein involved in exocytic transport from the Golgi. It has been speculated that Avl9 could play a role in deforming membranes for vesicle fission and/or in recruiting cargo [1]. 21.30 21.30 21.30 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.35 0.70 -5.98 23 380 2012-10-02 14:18:06 2007-07-30 14:08:51 4 13 238 0 281 533 2 300.40 27 50.61 CHANGED lhtlsVVDFHHp+GP.................clEahas.......tpspttsshWp.LPF.ALPDGuHsapE-FoaFsLh...................ssspstpTlFGlSCsRQIcuscL....hpRssDVTRSTVQKAVVVlucpP.IFG.l+-KLSllTpAaFtQcDFospcILcpha-sLpsp.hpshsspp......................tpsc.........halGL..sLRcllh+FR+phLlLaKhlLL-KKllhau.ssVEtLsshQhullSLlPsLlsp.LpDsu.......................................................................................................sPhhcshcps..lspssShcoSsRpS................................................hLcahGhPLpIFs..................+GuhasPYhPLQQlchL......ss.ss+uallGoSNsLhhpQ+cph.sDlll..........slDssplphh...sspLcphLpLSstD++ahDhllppVppshc-sp.pt............ta....................GS--aIRhQFE-YLhuLLSos ......................................................................................................hlhlVsFchthG..................tlEh.as.................tt.....pthp.lsahAhPDu.sHs..................t-..-.h..a.F.pL............................................................................s..t.ts.ttsl..aG...h.u..C..hR..Q...lc.s.p..tL.........h.+...-...lT.R.uhV.Q.K...u.lsl.....l.........uc..h...P..ha..uh...l.........p............t+Lp..l.lsp.s.aFtp.....t.........ph..t.p.h............p.l..l..h.pth.............................................................................................................hhsh....s..p.ph...l............p...ap..p.h....l.hl.hKhh...hLp.+h..................hh..h.......h......t...........h.......s.h..shh...ShhPt.hh....h.................................................................................................................................................................................................................................................................................................................................................................................t.hth.Ph.lF.t..................p.s.h.h.PYh.sL..hp.l...............................h....ahhGso.N.lh.....pptp....-hhl..........phc......t......t.......l.h.....p.tht..h.....o..Dh+a.hp........lht..h.....t...................................................Gu-talR.pht.Yh.thlt................................................................................................................................ 1 105 148 233 +9627 PF09795 Atg31 Autophagy-related protein 31 Mistry J, Wood V anon Pfam-B_60001 (release 21.0) Family Autophagy is an intracellular degradation system that responds to nutrient starvation. Cis1/Atg31 has been shown to be required for autophagosome formation in Saccharomyces cerevisiae [1]. It interacts with Atg17 [1]. 25.00 25.00 92.10 88.90 21.20 20.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.07 0.71 -4.53 5 24 2009-01-15 18:05:59 2007-07-30 14:14:04 4 1 23 0 13 20 0 177.00 53 99.48 CHANGED MEs...TlTVYD+Nltaphpscc.............hhssht.S.pGuspoMFPTNIKYIFEDDDDplsDss-h......pp.ss-lENVIIV-LDsoGoLENVELISDpYELLSF...p..pL.p.t.ho..pcuNDpsND........................IEL-VlSEF.sDLSss.o+DLuLD-LlKlYspQNcQL+plSDoL ....Mss...TVTVYDKNV+apL.cEN.pp......s..sthsscS+Ss..DGuchAMFPTNIKYIFEDssD-Ll....DooDt.........................splsD.ElENVIIVpLDESGSLEcloLISDQYELLSa...pphSLppNp.+ohsS+u-D.+uND........................IELDVlSQF.sDLSPh.L+DLSLsDLIKLYspQNEQLQhLSNSl. 0 1 5 11 +9628 PF09796 QCR10 Ubiquinol-cytochrome-c reductase complex subunit (QCR10) Mistry J, Wood V anon Manual Family The QCR10 family of proteins are a component of the ubiquinol-cytochrome c reductase complex (also known as complex III or cytochrome b-c1 complex). This complex is located on the inner mitochondrial membrane and it couples electron transfer from ubiquinol to cytochrome. This subunit (QCR10) is required for stable association of the iron-sulfur protein with the complex [1]. 23.80 23.80 23.80 41.70 23.70 21.70 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.87 0.72 -4.31 21 109 2009-01-15 18:05:59 2007-07-30 14:15:48 4 3 105 0 82 110 0 63.60 35 51.33 CHANGED paushohpshp......pausthuhaGuuAushshhFhutlP+h+pDlhpKlPhhGsaa.p+pIsPEDsPh ............huulohpphh......pausthuhaGuuAuhsslhFhuslP+lpcDlLpKIPllG....p.aa.p+pl.sPEDsP...... 0 19 44 72 +9629 PF09797 NatB_MDM20 N-acetyltransferase B complex (NatB) non catalytic subunit Mistry J, Wood V anon Pfam-B_12009 (release 21.0) Family This is the non-catalytic subunit of the N-terminal acetyltransferase B complex (NatB). The NatB complex catalyses the acetylation of the amino-terminal methionine residue of all proteins beginning with Met-Asp or Met-Glu and of some proteins beginning with Met-Asn or Met-Met. In Saccharomyces cerevisiae this subunit is called MDM20 and in Schizosaccharomyces pombe it is called Arm1. NatB acetylates the Tpm1 protein and regulates and tropomyocin-actin interactions. This subunit is required by the NatB complex for the N-terminal acetylation of Tpm1 [1]. 22.80 22.80 23.00 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.08 0.70 -5.74 41 303 2009-01-15 18:05:59 2007-07-30 14:19:52 4 15 265 0 219 313 2 352.20 20 40.32 CHANGED Wshaphhl......pushpl..................ppppsshtphp.h.p.ltt..................t.hsRsshLApL-lhthh..................ptpchtstlhp...YaccFtsKssCasDLppYl...tl....stp.phpphhpth.pt..........................ss.pphhpplsshcl....................................t..tthsppph.p........hhpphhptappshpht.pt...............p-hpsuD.chsLlusp.hllch..............pppts.phllpulslLEphlscsspNaphpLhLlplYhhL.GssshAhptappLslKplQh-TluHhlhsphsshtshstss............phhspshpaYpsspppssch.ltpuactssaspl.shhcFpc+LppShp+...hhhtl-phplptlh.ssphht....thpthsp................p..stpslsDsRDh .................................................................................................................................................................atha..hh......tushph.............................t.t.pt.p.thp.h..p....l.p.......................tt.hhRsshLApL....Elhpphht.t............................p.ssh....phhhp...YapcFss.KssCasDLchal...thL........s.p...phpph.hpp.lht.h....................................tshpth.pplsshpl..................................thhh.s.......t..p..hs.tpph.p....................hhpphhtt.appshphs.ps...............p-h.p.sD.thsLlAsp.sLlchh......................tpsss.sh......l..hpAlslLEp.h..L..ppo..spNhph+LlLl+lY..th..L.Gshshuhp..ha.ppLclKplQhDol.u..ahlhsh...htshs.hstss.......................................phhpt.h......hp...aa.psstp.cssch...lhtuachssasp..........l.chhpa.pp.+LppShph.......hhsthEphhhphhh.tsp..t.........hpth...........................p.th.Dpcs..................................................................................... 0 82 125 183 +9630 PF09798 LCD1 DNA damage checkpoint protein Mistry J, Wood V anon Pfam-B_41058 (release 21.0) Family This is a family of proteins which regulate checkpoint kinases. In Schizosaccharomyces pombe this protein is called Rad26 and in Saccharomyces cerevisiae it is called LCD1 [1]. 25.00 25.00 38.80 31.40 18.30 22.00 hmmbuild -o /dev/null HMM SEED 654 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.96 0.70 -6.53 6 47 2009-01-15 18:05:59 2007-07-30 14:21:43 4 2 43 0 29 43 0 601.70 32 89.65 CHANGED MLRD+.................LphLppptcc-csppptphsplpscacpELpKL+ppLQ+LEDE+KFLlhEpRulssschps.....p.........p.h.ss-ussls.pupssus+p++cchp....hpphlsLs.s+llt..c-sSLFhD+lh.apIhGu-hosl-hLs+Iph-h.s-hss.cphlIsustPLGpuIpphLhphKpphpLDchVDpsLEsLAsLIKp.Ilhsp-spLulPFLlALMapslpFRsSAsSlpuLKDLF.FhsDLhhpFphlLKsPlHcSs.L-lclsPplFQYpllDpLsLhYSFDllEsshpl..l.ppssps.pphacE.hlhKsLhtshphsLTISaKsl..lNlIaShVEllhslssl...h..pssscslhssphWtslIo+Laplhp+plpsscla........hph..hhsFhGLpRshGsNssssLIcplIsppcl..........pulP........hlIp+-s.shst-s.....h...p.chEtWhlpL+pslssIhcpLlhpapcp.plsstEhLhphs+hlupEQthhhshhlstDopshthRhpLlphLlplIYhhWppapcplppphhh-sps.ELlhsLWRllasp.psps.tpp..h-hthLlsphcsLslcDppcha-Dsa--.sh.PtalcpELtsphsppstpthpspa-phhhEMA+pILES....hlohEEsDSLYluM .................................hLRsp.................lp.Lptp+ccEhph.ttphpphphpc........pclstLKpplQ+LEDEKKFLp.Eh+stoppcht..p.....p.................tsh.sssspss..s....pspsp.......osps+p.pchp........tpshhsls.s+ll...c-oSLFh-plh.HpIhGuchoTlEhLs+lpl-.hschph...cshhIsKttslupuIsphLht.hKKshpLDchI-phlpslssLIcc..lp.p..-spLAVPFLluLhapslpFRPSAspp.sl+chhhhlCDLlphapalL+s....slc-ss..hshcstPp.hQhpll-hhllhauhDlLEtllpl..hppastphhhphacc...lhpsh.hshh.s.o.paKsh..lNVlashVEllsh.soh.........shs.s...ss.pphhstp...shIspLhtlh.h-l.st-sa........................................sFaGL.RslGsNp.sthIsplI.p-ch..........pulP................plI.c-s...hspsp.................lshphEtaLLpL+.clhslh-sLlhhhts.thlhstEhlhphs+hluhEQshhhp...u.cS.slclRhpLIpphlplIahlhp-p.cplpp..hp...-s..t.-LhhsLhRlhhspspts..........ss.s.cp.hh-h...Rph.sthppL.slp-p..sphhpcthpch....spahp...t...Ehhtplppchuphhph.Y-pch.EhARpIL-s....hsohEEADsLYhsM............ 0 6 16 27 +9631 PF09799 Transmemb_17 Transmemb_17; Tmem17; Predicted membrane protein KOGs, Finn RD, Sammut SJ, Coggill PC anon KOGs (KOG4694), (KOG4502) Family This is a 100 amino acid region of a family of proteins conserved from nematodes to humans. It is predicted to be a transmembrane region but its function is not known. 23.30 23.30 23.60 23.80 23.00 23.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.24 0.72 -3.72 24 272 2009-09-11 14:19:48 2007-07-30 14:25:56 4 2 110 0 190 264 1 103.40 28 63.14 CHANGED hhLYhsshahshaalspllhhhhK......hhhhsshhhshtlshhllhsllEslRLhlGhtGNLpEcsstLshhhlLohhstlshlhahhhpshlLh..L-hslsslhlshhsh .........................hhhahNshahshahlsplhhhh.hK.......h..h..h.s...s.h.h.....h.........hh.tlsl....lllhhllEslRLahGhp.GNL.s.......Echs.Lsh.lhLThsstl.h.slaaL.L.h.p..shlLp.....lEhhlsslhlhhhh.h.......................... 1 55 75 129 +9633 PF09801 SYS1 Integral membrane protein S linking to the trans Golgi network KOGs, Finn RD, Sammut SJ anon KOGs (KOG4697) Family Members of this family are integral membrane proteins involved in protein trafficking between the late Golgi and endosome. They may also serve as a receptor for ADP-ribosylation factor-related protein 1 (ARFRP1) [1]. Sys1p is a small integral membrane protein with four predicted transmembrane domains that localises to the Trans Golgi network TGN in yeast and human cells [2]. 21.90 21.90 23.30 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.77 0.71 -4.19 27 331 2009-01-15 18:05:59 2007-07-30 14:27:17 4 3 259 0 233 308 3 132.70 30 71.10 CHANGED apps.hsPhhIlsQIlhLQshYYhshsllh.hhhstlsG.sho................L.-hlFsac............slchssshGhhlhhhalls.uLl................sslhLhhlVtRoKLsLDFAlTlHhlHLlhshlYo...pshPtshsWWhlplhusslhshlGpahChh+EL+sI.h ...............................................................................hsPhhIltQIl.hh.Qsh.aYhsh.slhh.hhhshl.hst.t.h.o...................l.chlFsa.c.................................l.ph..ss.s.Ghhhh.h...s.al.Ls.ulh................su.lh...Lh.h.llpRuKhshDFulTlHhlHLlhshhYs...tph.PsshtWWhlphsuhslhshlGpahCh.h.pELptI.h........................... 0 78 126 188 +9634 PF09802 Sec66 Preprotein translocase subunit Sec66 KOGs, Finn RD, Coggill PC, Sammut SJ anon KOGs (KOG4699) Family Members of this family of proteins are a component of the heterotetrameric Sec62/63 complex composed of SEC62, SEC63, SEC66 and SEC72. The Sec62/63 complex associates with the Sec61 complex to form the Sec complex. Sec 66 is involved in SRP-independent post-translational translocation across the endoplasmic reticulum and functions together with the Sec61 complex and KAR2 in a channel-forming translocon complex. Furthermore, Sec66 is also required for growth at elevated temperatures [1][2][3][4]. 21.30 21.30 21.40 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.03 0.71 -4.90 16 149 2009-01-15 18:05:59 2007-07-30 14:29:02 4 2 146 0 113 141 0 182.10 37 76.09 CHANGED hphlSlhTPLlYlulLlsSLhsFSshYRK++hpchupLcPhFs-ptsRclYhsLtch.-s.........................+lp-KVlKAALLRRusEsIRRslKL+EtcstlshLappGSlGDDlWpRFppttKhhEhEl+-llpEApshtPsWsQohFtsApEIshNpALRRRhssIpsRscpptchW-h+hs..psuhhhp .................................................h..hlSlhhPhhYlslLluoLhsFSshYRKR+stc........sp.l.tPaFss.ph..p....RslYhoLhchp.............................pls-pV..LKAALLRRAsEDI+R...llcl+ptKsulstLhQ+GSlG.DDlWpRFppAEKEhEtEl+DVltEANsht...P..uWGQhIFpoApEhsh.Np...hhRc+lpplppptppppchW-h+ts..pp............................................. 0 35 64 98 +9635 PF09803 DUF2346 Uncharacterized conserved protein (DUF2346) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4702) Family Members of this family of proteins have no known function. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.73 0.72 -4.13 6 116 2009-01-15 18:05:59 2007-07-30 14:29:41 4 3 106 0 83 121 0 77.40 29 84.11 CHANGED MGsWtLEluRMslYhTFPVAMFalFNQPEYFE-aVscpKRplaPPEpcpHRcclEchhcpl.....R-++-pcLL+thp.tEpKc .........MGs.pLElhKhulYlsFPlu.haa.h..h..Nps-hF...c..c...a.lh.....p........p+cc.la.PP.Epp.........p..t..........c..pElp..c...htc.ch.....ppp..cc.pchhcth.......ptt..................................................... 0 34 46 65 +9636 PF09804 DUF2347 Uncharacterized conserved protein (DUF2347) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4704) Family Members of this family of hypothetical proteins have no known function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.87 0.70 -5.13 36 201 2012-10-02 14:18:06 2007-07-30 14:30:16 4 7 172 0 144 318 1 266.40 32 54.98 CHANGED lFllpFDlKsGYslsWpcsh...sslpL-G.VEYKoLPSGlHpls-DllY.Fs........cc.ta.....hGlSsFhstsss.-ppR........ss+MhulGVLss.................s.thlspuWcasstLcphs.pphhpst....sshpsLppaa.....p..ph....tt..............................pttshspstshhsspphhsshHPshsLPphlcphGPLlFsLa+uuLLRKRILlh....sps.................PVctsCsa................VYslSlLSslPpslhshhssp....s.h.pPlFslGlpDl...shLtph........................uaIACToDpILthKscLaDlhVslssststpstt ............................................................................................................................................lFlhpFDh+.t.G.hl.Wp....hs...........s.....ls....L....-......G..V..Ea...K.....ShPS.....GhHplppDhlY.Fh................................cs..a.hGlusFhshss-.ctpR..........sA+MhuV..G.lLss..............................hsthaRahp.hLcphs.pphhps...........sphp..Lp...taa...........cp.ph..............................................................tht.s..h..s..sh..hh...t..ph.p.hHPshshsphlchFGs.IhsLa+.huLLRKRILIh....s.s.................................................PVt..sC.h...............................l.Y.s.h.....s.....hL.us.lshshhshh.sp........................+PhF....lsltDI....s..Lps.h.......................................ualACTT-cIht..KpcL.Y.DlhVshsssho.p....................................................... 0 40 73 111 +9637 PF09805 Nop25 Nucleolar protein 12 (25kDa) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4709) Family Members of this family of proteins are part of the yeast nuclear pore complex-associated pre-60S ribosomal subunit [1]. The family functions as a highly conserved exonuclease that is required for the 5'-end maturation of 5.8S and 25S rRNAs, demonstrating that 5'-end processing also has a redundant pathway. Nop25 binds late pre-60S ribosomes, accompanying them from the nucleolus to the nuclear periphery; and there is evidence for both physical and functional links between late 60S subunit processing and export [2]. 22.50 22.50 24.90 22.70 22.40 22.40 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.20 0.71 -4.10 41 300 2009-01-15 18:05:59 2007-07-30 14:30:41 4 6 257 0 202 285 1 141.20 26 58.48 CHANGED p++phtpp....scElsFDccsRpEaLTGFHKRKhpRpKcAQE.hcc+tR.t+hEERK+lR-ER+pchpctlcphccthp.lpcttsstcctpsspsppspt-...................s.t.......p.sspspptp.hDpcph...................................osVslEpl-s .................t......pt..thtplsFDccsRpEYLTGFHKRKhpR+.....Kp..Ap-phcc+h+.tphEcR++lR-ER+pp...h.p..c.....hl..p.....p.h...c..ct..h..p....hp...p...t....tsp.pp.......pttpp...p.p.p...............................................p...t.s.s.p.p..-.sth........................................................................................................................................................... 0 67 110 164 +9638 PF09806 CDK2AP Cyclin-dependent kinase 2-associated protein KOGs, Finn RD, Sammut SJ anon KOGs (KOG4713) Family Members of this family of proteins are cell-growth suppressors, associating with and influencing the biological activities of important cell cycle regulators in the S phase including monomeric non-phosphorylated cyclin-dependent kinase 2 (CDK2) and DNA polymerase alpha/primase. An association between mutations in the gene coding for this protein and oral cancer has been described. 25.00 25.00 25.00 26.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.36 0.71 -4.07 11 192 2009-01-15 18:05:59 2007-07-30 14:31:31 4 4 91 2 111 168 0 120.90 43 82.32 CHANGED MsYhsItss.Sphs.................ssTshPtssht..............................................................................suGSssosS.........................................................................................................................................ss.suuuhhpPlhSshs.PShG...ssssh.......oKYupLLuVIEEMG+-IRPTYuGS+SuhERLKRGIlHARhLVREC...LtETERsARp ................................................................................................................................................................................s.s......................................................................................................................................................................................................................................................................................................................s...ssu.s.h...p.lh....s..c.h.us...PShG.h.sp......sstss.........SKYu-LLulIEEhGK-IRPTYAGS..K..SAhERLKRGIlHARsLVREC...LtETERsAR.................. 0 25 34 69 +9639 PF09807 DUF2348 Uncharacterized conserved protein (DUF2348) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4723) Family Members of this family of putative uncharacterized proteins have no known function. 31.60 31.60 31.70 31.70 31.50 31.50 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.66 0.70 -5.27 5 176 2009-01-15 18:05:59 2007-07-30 14:31:58 4 4 121 0 114 164 0 204.20 28 87.66 CHANGED MFPELNsLLssoPDpsE........pGKlTLLCDu.KTDGSFLVHHFLSFYL+..AuCKVCFVALVQSFSHYSIVGQKLGVSLTsAR-+GQLVFLEGLKSul-llFp...su-sspPLpFLREussGsLcsLFcFVp-oL+..PusSuGs.WphPVLlVDDLSVLLS..LGVuAlAVLDFhHYCRATVCoELpGNlVlLVH-sE-AuDE-s-...lLLpGLSHQSHLIL+AEGLATGFC+DVHGQLcILWRpsSsSutpRuQoh..sYQYKIQD ............................................................................................................................up.hhll.p-t..pssuuFllpphLp.hL+............us..s...t.......lhhluh.p.shsHYp.lup+...l....G.hsLshtpc.psplsFl.-sLp.....ht..........h....ht...............t...t.ttsp.........t.hh.....ts.s.........s....t.L...p.La.p..lpptlp.....s.s.s.t.ts.............sslllDDlSlLhs.........hG..h.u...s.h.t....Vl....c...Fh....chsp.sls..h.p.....h..p.s..s..hV..........hLs+......ts.....tp.s.......p.cpst.............lhptLta.uplhlpspsLsTGhspDVcG.plpl.....c.......................................................................................... 0 38 55 87 +9640 PF09808 SNAPc_SNAP43 Small nuclear RNA activating complex (SNAPc), subunit SNAP43 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4746) Family Members of this family are part of the SNAPc complex required for the transcription of both RNA polymerase II and III small-nuclear RNA genes. They bind to the proximal sequence element (PSE), a non-TATA-box basal promoter element common to these 2 types of genes. Furthermore, they also recruit TBP and BRF2 to the U6 snRNA TATA box. 25.00 25.00 25.40 25.00 24.20 24.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.25 0.71 -4.63 17 185 2009-01-15 18:05:59 2007-07-30 14:32:26 4 4 132 0 126 174 0 174.70 25 47.41 CHANGED sl+pDhcpLLpcFtpt....coscFpsFpplWcchp...FpplFpG+ppssEhhtFsptlLhhshtYhhss.........pohppRluuLYhLYslY.pQ.sp.hhKIRlshpsapchpcaspphh....ppphh.-sshlhp+LhpcpAF+FsAh.phhs.shh+ph....p...hptpstpphhsstspspplhp.ph.....lpcLt.lcttYpchK ........................................hppDhctLlpc...Ftph...........pshpFpsFpclWcphp...Ft.tlapu.p...pph-..h..t.pFscphLthsh.p...ah.hss...................................................hohp.RluuLYhLYsLYpsQ........s....ps......h...............hKI+lslcsapplh....ch.pp.hh.......ptphh.-sshlhp+.L.h.p.pAFha...s...Ahs.phhs...t....h.pph.........p.....hh.th.....tph.th.p...t......hpth..hpttYtphp............................................................. 0 43 63 101 +9641 PF09809 MRP-L27 MRP_L27; Mitochondrial ribosomal protein L27 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4756) Family Members of this family of proteins are components of the mitochondrial ribosome large subunit. They are also involved in apoptosis and cell cycle regulation. 25.00 25.00 25.50 26.30 24.50 24.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.22 0.71 -4.46 16 274 2009-09-11 08:35:56 2007-07-30 14:33:14 4 3 242 0 196 253 1 95.10 31 65.91 CHANGED +sus++hshToKpGs+sa..hKG.............+GupshGhhspsG+alh.h-hV.palVPs.LpshcLKPYVSapsPplpps................hcsacpGhhcs-hh.chshE.spcG+l ......................................................................................thh.LToKpus+sa..YKG....................psspshGha.s..p.pGpYllshcKl.saVVP-....Ls...sF...+......LKPaVShpssthhpp............................................................................................ 0 59 103 160 +9642 PF09810 Exo5 Morph_protein1; Exonuclease V - a 5' deoxyribonuclease KOGs, Finn RD, Sammut SJ, Coggill P anon KOGs (KOG4760) Family Exonuclease V is a monomeric 5' deoxyribonuclease that is localised in the nucleus. It degrades single-stranded, but not double-stranded, DNA from the 5'-end, and the products are dinucleotides, except the 3'-terminal tri- and tetranucleotides, which are not degraded. The initial hydrolytic cut of exonuclease V on the dephosphorylated substrate produces a mixture of dinucleoside monophosphates and trinucleoside diphosphates. The enzyme is processive in action [1]. Exo5 is specific for single-stranded DNA and does not hydrolyze RNA. However, Exo5 has the capacity to slide across 5' double-stranded DNA or 5' RNA sequences and resume cutting two nucleotides downstream of the double-stranded-to-single-stranded junction or RNA-to-DNA junction, respectively [3]. 25.00 25.00 25.90 25.30 23.90 24.60 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.10 0.70 -5.20 13 244 2012-10-11 20:44:46 2007-07-30 14:33:33 4 5 193 0 168 260 3 317.10 25 67.42 CHANGED SPlcRF+p..ppslSVTDLsustWCElQhhYsLsc..hG+KccTtAM+pGsplHcpLEcElassVsVc.....V...TT+EDshuL+lhNhIptLppLpppG.............hsREl.VaGhl.cGpllsGlIDpLshcssc.phppp......p..h..........................................................clhloDsKTRtusolPo..psQhRsohlQL.LYp+hLschss...................................splshpplhscY.......sLDPpcsFossahsp.u.ht.......................................................p..t..hhcapsLpsLhphhhtphphp.LP...........................hlssphphcYcppsssp.....lluscpaha-.cslcthls-thsaW+GpR-scGV.cspEuWKCRhC-Ft-pCsWp ...............................................................................................................th.......phLsVTcL.hs.tWCEhph.Ysh.t..................hh.p....ts.s.........hctGpphHttLEp....Ela.p...ltl...............................l...poc.....E...Dshu..l+...h............h.Nhl.tl.pLhp.pG..................................................hsR..Eh.l..........a..G..hl.....c.................s..................hlsGlIDpL......p...h..p...s..p................................................................................................................................................................plhlsDhKT...R....t..p.........plPs...ps.t.h.c.s.splQl.hY+.hhhsph.sp................................................................tp.hsht.thh...pph.........tLs.sptshs..t..hht.p...............................................................................................................................................................................................................................................................hh.pht....s.L.tp.lhthh...t..hph..hs....................................................................................ht..hthpY..pts...................hspp.h.as.p.lpthlpp.htaWhGpRpspsV.........p-t...h...K...CphCca.tp.C.h................................................................................................................................................................................... 0 50 92 134 +9643 PF09811 Yae1_N Essential protein Yae1, N terminal KOGs, Finn RD, Sammut SJ anon KOGs (KOG4774) Family Members of this family are found in the N terminal region of the essential protein Yae1. Their exact function has not, as yet, been determined. The family DUF1715, Pfam:PF08215 has now been merged into this family. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -7.90 0.72 -4.42 82 669 2012-10-02 21:03:42 2007-07-30 14:34:13 4 24 410 0 356 588 16 38.90 34 17.48 CHANGED GYp-GhspGpppshpcGhp.Gapp.Ghp..hGhphGphtGhh .........GYp-GlspGpppuhp....cGhptGhpp.Ghp..hGhphGthpGh............... 0 93 179 275 +9644 PF09812 MRP-L28 MRP_L28; Mitochondrial ribosomal protein L28 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4778) Family Members of this family are components of the mitochondrial large ribosomal subunit. Mature mitochondrial ribosomes consist of a small (37S) and a large (54S) subunit. The 37S subunit contains at least 33 different proteins and 1 molecule of RNA (15S). The 54S subunit contains at least 45 different proteins and 1 molecule of RNA (21S) [1][2]. 24.10 24.10 24.20 24.50 24.00 23.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.82 0.71 -4.40 14 225 2009-01-15 18:05:59 2007-07-30 14:34:48 4 4 207 0 158 213 0 134.30 24 72.35 CHANGED hpApPh+KKKKlDP....pppttt+c+lc+plR.+hpKssppLpPl--hhss.p.lcpsppRp.thhclo.E-l-+Rsll.KpWupY+ppp+pt-hphlcphltuQpcALppLch.Ss-LYptAlts-su.hhPhchcsPstoPP............p.o+la ......................................................................................................l+...php.+.t.t....ph........Phtp.h.hs.....h.p...+p.t..lclut..-ch.++thlp+sWslapppp+ctcppplcp.hpu.pcAh-EL+.......t....S...............cLaptA.................t......-.........t...hhPh.p.h.phP...hPP...................................................................... 0 48 74 125 +9645 PF09813 Coiled-coil_56 Coiled-coil domain-containing protein 56 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4782) Family Members of this family of proteins have no known function. 21.10 21.10 21.10 21.10 20.70 21.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.07 0.72 -4.02 4 119 2009-01-15 18:05:59 2007-07-30 14:35:03 4 6 108 0 85 117 1 85.20 31 62.89 CHANGED MAt.......osKEGsA.aApRIDPo+EpLoPtQlcFMRplEhtQWpK..pht+hRsRNllTGLuIGulVLGIYGYTFYSVuQE+FLDEhE--AKAARt.uh.hppss .................................tt...h.............................................t.hp...php+........th.t.h.R..t+NhlTGLuluu.lVluIY.......uYThhuVpQEcFhD.....-.h..-c.hph............st..................... 0 21 38 62 +9646 PF09814 HECT_2 DUF2351; HECT-like Ubiquitin-conjugating enzyme (E2)-binding KOGs, Finn RD, Sammut SJ anon KOGs (KOG4784) Family HECT_2 is a family of UbcH10-binding proteins. 19.00 19.00 19.10 19.20 18.90 18.70 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.32 0.70 -5.73 28 316 2012-10-03 14:42:41 2007-07-30 14:35:36 4 9 236 0 227 300 2 298.30 16 76.93 CHANGED Ehhsplpshplhlphsspht........sls.sphplplsps....................shplpLPsclphsspshhththtst.............slphclplpptsss....................thsspslpsps........hplhCpsCppsllps........pshp................+lh-hPSEtWsphhD.WaCHpss.tstptt..............................ppLpPpps-.lhlGssahL..lppsphpphhhhtt...................t.lhCppCps.LG..........phsspsh+LaKhslph................t.ss..cph..pphlhtpllphlpspusc.............+Fhlpt..........sspphl.....hlWlhssshtlsp....stt...........................................tsspsuhKlLYp.......t..thhshhpss.slcplplP...stshpphhphLppsNshlPtshR..thss........aplu .........................................................................................................................................................................................h...............................h.h....h...............................hthth.h.t.........................................h.tpphpstp..............phhCppCtp.l..lpp........pph......................phhsLPStpW...tth..h.-.W....t.......CH.ss.tp..........................................ptlhs.ptsp.hh....lu.shhl..hp..pssh.p.t....................................................................................lhC.....t.pCt..lG..................................tt...tshcha.hhtl.h............................pp........pthhst.llph.ptpssh.................+hhlps....................ttp..l........h.lWlhssp.h.hh........s..........................................................................................................................t..tuhKlhYp........................t...............tht.....l..h.......h.thh..lttsp...hP.t.p......................................................................................................................................... 0 65 116 185 +9647 PF09815 XK-related XK-related protein KOGs, Finn RD, Sammut SJ anon KOGs (KOG4790) Family Members of this family comprise various XK-related proteins, that are involved in sodium-dependent transport of neutral amino acids or oligopeptides. These proteins are responsible for the Kx blood group system - defects results in McLeod syndrome [MIM:314850], an X-linked multi-system disorder characterised by late onset abnormalities in the neuromuscular and hematopoietic systems [1][2]. 20.20 20.20 20.40 20.30 19.60 19.70 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.30 0.70 -5.44 35 705 2009-01-15 18:05:59 2007-07-30 14:36:05 4 12 97 0 433 583 1 269.20 23 65.58 CHANGED c...hhhslhulhhahsDhusDlaluspYatpucahatuLsLsFlllsSlllQhhSahWappDhspsphp...............................................................................h.hhsllHlLQlGhhhRahcslphuhpsthpppppp............................hpthhhtpsDl...oMLRLlEoFLEosPQLlLQLaIhl...............................tp....ps..phhQhluhshShhSluWullsYp+s......LRtphsDKpphsh.husllhhlW+lhhI......................suRllulsLFuulhthasshhhhhhWllhh......hWshtpp.Ts.Ftp.ohshEhlaphlVGllhlFsaFN................VccupTRtRhhlYYshhll-slllhhl.h.....hhh+sshhsp.htlhlsssl....hssahlGlhhhllYYphhHPs ............................................................hhsh...h.p...thhh......ah..tt..p........h.h.hhsh.h...h...hhsshhhp...h.s.h..........ah...h...s.t..t.t.............................................................................................h.hph..h....h.u...h..h.........R......h..hpshhhhh......p..........ttt...............................hhhh..hp...ssh...shhplhpsaL.tos.PQLhLQlhlhl....................................................tp......tth.......hthh.hh.hShh.ulsashhthphh..............lp......hs...s....h........h...sh...huhl...h.hh.W+hh...l......................ssRhlsh.sLF.sshh....hhhhhh.hhhpahhhh................hWh..hhtt...sp....ht..............s...........h..................t.................hhh..h.lh.uhlhl.ash.hN.................................l.pcs.p.st..h+hhhaYh..............hhhhENhhhhhh.a..................h..h.t........................h.............................hhh.l....h.sahh.u.l.....hhhllaYt...hhHP.............................................. 0 122 154 274 +9648 PF09816 EAF RNA polymerase II transcription elongation factor KOGs, Finn RD, Sammut SJ anon KOGs (KOG4795) Domain Members of this family act as transcriptional transactivators of ELL and ELL2 elongation activities [1][2][3][4]. Eaf proteins form a stable heterodimer complex with ELL proteins to facilitate the binding of RNA polymerase II to activate transcription elongation. The N-terminus of approx 120 residues is globular and highly conserved [5]. 20.20 20.20 21.40 20.70 19.40 17.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.68 0.72 -4.09 29 307 2009-01-15 18:05:59 2007-07-30 14:37:57 4 8 211 0 216 291 0 104.00 31 29.65 CHANGED pYslhLGsphp.............scssps.....phhsl+YsFKPsSssssp..splpts.....pspphslsl................sssuspss..hsacGs..........spss..pppchlLlF...........DscspsahLE+lsosl.phphpRsss ....................aslpLGpohp........................cpsps.........tFpolRY..-FKPAS.lDsopt...uplpsu.........pssplslol.................phpuus..ss..slFcGs.......................p+sh..p...c-slLlh...........stc..otthhLE+Lsosl.pl++sRs..s....................... 0 60 99 156 +9649 PF09817 DUF2352 Uncharacterized conserved protein (DUF2352) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4803) Family Members of this family of uncharacterised proteins have no known function. 25.00 25.00 25.40 25.30 24.10 24.00 hmmbuild -o /dev/null HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.95 0.70 -6.34 9 120 2009-01-15 18:05:59 2007-07-30 14:38:32 4 4 80 2 74 113 0 412.90 28 87.48 CHANGED PspYhsslpst............h..tsp....schlalhc............................pspL-hpssshc-sh.pp.suls-........LSlcp.ht....................tslslt+ARpllshhpht+s..shu............slWlhCDGSD.ptTshLthEhst....psphhpGllhahssps......sh.hohpsLtppHpp+tt.uh.pspthuh.pha..........pspsplTlcloWssssp.....hLppsslu.stTlplc.t.h-ppuss.pphappLcFLlslt-slhohcsEhhpshpscus..ss.sph.ls-Lcpclsphhssssc.pspphspssst...tsh.phlpsRtshDhs-pLWshh+p.ssSYpDLptsFshlhQshp+usIts..pssssopLucLIpp.hpschshssLoGspPhchLLEIGL-KlhKDYhshFsppclsohNhLchhh..............................................................sophDtpEpshRlpKLaplhplL...pphLhlchphclh.haopsC.cYh+csPhs.pcla.......pl.lpsphlpchhpsccPhph+VphsSspt....+cVcTsa.hsscs.ls......schs.phpssspp.....ccthahhphlpspp ............................................................................................................................................................................................................t...................................slsh.pu+tl..ht....hhtps..shs.............slWhhCDuoD...tt.ThhLthph........sp.h..pGl.hhh.ssh.........t...p.p.tlhp.....a.tth..s..t..th...phh.................s.sth.lchpWts.st..................hp.ss.......tthhplt.......-.ps.h..thhppL....phLhhl...tcslh.shhp-h....sh....pt.psh........ht....t..hs.cL.pphp...t..........h.sphpt...t....h..tp.........th....h......Rt.hDhs-pLWphhpp..s.oap-lh......sh.phlhpshtpup.lts.hp.s.spshLuplIpp............hpt.pht..............s.LsGh.......PlphLlElGl-Klh+DYh.h.......ah.t.pchsshs..Lp.h......................................................................................ssth..s.pcth.+l...t+Ltplhthl...pph...hhp.thph....hs.p.s..phhpp..h..p.h...............pl.l.......th.l.....pp..h.hptppP..h.phph.st.........ppl.T.sh.hs.p..h.............t................................................................................................................................................................................ 0 21 27 51 +9650 PF09818 ABC_ATPase Predicted ATPase of the ABC class COGs, Finn RD, Sammut SJ anon COGs (COG3044) Family Members of this family include various bacterial predicted ABC class ATPases. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.29 0.70 -5.90 44 462 2012-10-05 12:31:09 2007-07-30 14:40:15 4 4 429 0 135 1778 323 421.10 45 75.58 CHANGED hppLpphLpplDs+sYtAYKplpGp.Ypas.....p...apLhlD+VQuDPFAsPScl+ltlstshs..................shstthh..psts+plAhpDals...Rphtpth...........thtssscoGhltlspsGQplLcRoushl...........................ssptl...clRhpVuLP....ApGRcIhu+tAhplLhptLPcllccslhhpth......stcplppalphscDQptLRppLtc.......tGLVAFVAsGuILPRcSGsS-+......Phc..sAVPFpSPc....oLc.lplpLPp.tGplpGMGIPcG.lTLIVGGGaHGKSTLLcALEpGVYsHlPGDGREaVlTcssAlKIRAEDGRslpsVDISsFIssLP..tGcDTs.pFoTpsASGSTSQAAslhEAlEsG..............upsLLIDEDTSATNFMlRDtRMQpLls+ccEPITPFl-+l+pLhcchGlSTllVs.GGSGsYhclADpVIhM-sYpstDlTtc.......A+clspp....hsstpttpstt.........sR ...............................................................................................................................h.ppLhphLppl-tpsYtAY+plcGp.....YcFs...................s........apLhIDHlQuDPaAsP.S+l+shhshphs.................................................sh....s......hh.....ps.ht.phA.hpDals...RtFtphh..........................t.....t........psutlpIstsGQplL-..RTulhh........................................spctl.........ElRhcluLP...............ApGR...sIh.u+pAtplLhptLPchlccuhhhc.pl.............stptLhppsclsEDQchlRppLpc.........................tsLVAFVAsGulLPRpoGssDh......Phc......sAVsFpo..Pc.......oLc..lshphPs....t........Gpl..p..G...M..G..I..Pc....G.ITLIVGGGaHGKSTLLpAL...Ep...GV...Ys...H....IsGDGREaVlTcssAhKIRAED.G..Rs.l.p.s.l.sI...SsFIspLP........hG.+.......DTs..pFS.T.p....s.A...SGSTSQA..AslhEu.lEuG......................................ussLLIDEDTSA.....T...N....F....MIRD...tRMQtLlu.+..pc..E...PI....TPhl..-RlppLh..cch....Gl.S.T.llVh..GGSGDYhD.V..A..DsVI..Mcs..YpshDV.TpcA+clstp.hsp.ct.t................................................................................... 2 67 107 127 +9651 PF09819 ABC_cobalt ABC-type cobalt transport system, permease component COGs, Finn RD, Sammut SJ anon COGs (COG4721) Family Members of this family of prokaryotic proteins include various hypothetical proteins as well as ABC-type cobalt transport systems. 20.80 20.80 21.30 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.69 0.71 -4.19 51 1105 2009-01-15 18:05:59 2007-07-30 14:41:44 4 2 991 0 157 535 10 127.90 35 65.84 CHANGED lhDIllsulluVshGlla.hsashlhsslsshhs...hsPhhpsl.................hhGlWhhuusluuhllRKPGAAlluEhlAAhlEhllGupaGst.slltGllQGLGuElsFA..lhtY++ashhshhLuuhsuulsshlh- ...........................................pDlllhuhlulhFGlla.hshshl.Y.shlpshht...htshhppl..................hhGlW...h.MAAslAulll.KPGuAlluEllAAhs.Esl.h..G..u..paG.ls.slluGll.QGLusElsFh..lh+Y.+.phshhshhluuhssslsuFsh........... 0 45 96 131 +9652 PF09820 AAA-ATPase_like Predicted AAA-ATPase Mistry J anon Pfam-B_5377 (release 16.0) Family This family contains many hypothetical bacterial proteins. This family was previously the N-terminal part of the Pfam DUF1703 (Pfam:PF08011) family before it was split into two. This region is predicted to be an AAA-ATPase domain [1]. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.03 0.70 -5.24 84 1607 2012-10-05 12:31:09 2007-07-30 14:44:05 4 10 265 0 256 1536 96 241.50 31 54.30 CHANGED PIGIQsFcclRpcs...YhYVDKTthIhpLhpsusha.FLSRPRRFGKSLhlSTLcsaFp.....Gp+cLFpGLhIcc.pp.........tW....tpYPVl+lshsstph.pohppLpptlpphlpphtptashthp.............................cpshssphttlIcphhcppGppVVlLIDEYD+PlLpslp.st......p...........hhcchRp...hL+sFYuslKs....t-saL+FshLTGVoKFupsulFSsLNNLpsIohsp..pYsslCGhT-pElcphhp..hpths.....shcthhpcl+chYDGYpFs..s.....slYNPFslL.hhpp .......................................PlGlpsFpcl..h...pps.ahYlDKTt...h...l.........p....l...........h....p....p...s..p....h...hhoRPRRFG.K.oLhhoh...Lc...taFp...............................tpccLF.p....s....Lh..I.tp.....p..........................h......s......pY..PVlh...ls.hp..shph..tsh...p..p....h.p.lpth.lp.th.p..ath.hp.........................................ptshttphttlhphh.hpp.h.s.pplllLlDEYDpPh.ps.ht.pt................................hhpphhp..........hh+.s.h.a.sshhp......tsthlc..hshlTGl.+h...s..p........ulF......Ss.......LNp.h..p.s..h....o....h.s..pa..s.p.hhGhTcpElcphhp...................thpt...hppl+phYDGY....pFs..........tlYNPasll.hhp.p..................................................................... 0 142 228 252 +9653 PF09821 AAA_assoc_C ABC_transp; AAA_36; C-terminal AAA-associated domain COGs, Finn RD, Sammut SJ anon COGs (COG4754) Domain This had been thought to be an ATPase domain of ABC-transporter proteins. However, only one member has any trans-membrane regions. It is associated with an upstream ATP-binding cassette family, Pfam:PF00005. 21.30 21.30 21.30 21.40 21.20 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.28 0.71 -10.04 0.71 -3.84 49 413 2009-01-15 18:05:59 2007-07-30 14:44:22 4 2 357 0 152 349 21 118.50 38 29.96 CHANGED spLAcclpl-lD-LhPlsEuhplLshAclpsGDlpLTstG+pascuslpcRKclFsppLhphlPLsspI+phLcccsscphscpphhctLccahssptAccsLcsllsWGRYAElatY.D- ...............PcLAcpLpl-lDDLaPlsEsLphLtFAcl..c.c..G..DlhLTshGcpFs.cus.h.pERKtlFucpL..lchVPLsspI+c.lLc-.+..s..s+pssppRFtpcL..E-...a..h....o..pptAccsLcslIsWGRYuElFsYD........................................... 0 38 81 117 +9654 PF09822 ABC_transp_aux ABC-type uncharacterized transport system COGs, Finn RD, Sammut SJ anon COGs (COG3225) Family This domain is found in various eukaryotic and prokaryotic intra-flagellar transport proteins involved in gliding motility, as well as in several hypothetical proteins. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.62 0.70 -4.94 88 807 2009-09-11 15:43:23 2007-07-30 14:46:11 4 13 632 0 316 818 488 225.20 19 41.63 CHANGED RhDlTpsphYoLSpsocplLppLcp..Plplphahss................hpphtsplcclLccYpthus.c...lplcalD..P........................ptssttpc...t.Gh.s.....................phasslll...phsscpphlshhs...................t.tthEhplopul...pclsps.....p................................................................................................................................................ptplshlsGpuEh.............................................................................................................................................sttthsphhppLppp..a..plpslsLs.................tspl..PpcsclLllssPpp.ls-pphhtlcpalh.pGGcllhhl-sh.t.............s......................t.sLss...LlppaGlplssshVhD .....................................................................................................................................................................................................................................................................................hDhotpphaolo.to.phlp..p.lpp...............sltlhhh...hp..........t............................h.tth.pph..l..ppat....t...h...st..p....lphphhs.....s................................t....tp..............................................................................hhh.....t.t....t.p.thl.h.t.........................................Ehpls..psl....plhpt.......t.....................................................................................................................................................p.hl.hh.hpupsph...................................................................................................................................................tt..th......t......ph......tptLp......pp..a...pl.....p..p..lslt....................tppl...psss....l.lllssPpp.hsp.....pEhptlcpalt.pG...G....p....l....lhhhssh..t...........p....................................sls..ll.ppa.Glph.psshlh......................................................... 0 156 231 275 +9655 PF09823 DUF2357 Domain of unknown function (DUF2357) Waterfield DI, Finn RD anon COG1700 Domain This entry was previously the N terminal portion of DUF524 (Pfam:PF04411) before it was split into two. This domain has no known function. It is predicted to adopt an all beta secondary structure pattern followed by mainly alpha-helical structures [1]. 20.60 20.60 20.60 20.60 20.40 19.90 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.57 0.70 -5.28 22 202 2009-01-15 18:05:59 2007-07-30 14:48:20 4 5 189 0 66 203 17 224.80 20 32.58 CHANGED tGplsFsNclGhsphtlt.ttp....thhplphEVhssKls....app-actlLpp.....lscphsplhhphhppT.hphpts.ptstss.h.hashlcp......hhpsL.pulctIhppPHppL.pppphh+ssplpchssthhttltcpst.......................hstphhspphhptcpchohDT.ENRFlKahLpphppclt...plpptltptttphttt.....hhpplpphtcplpchLppsha+cVGphpthsspShVLpptsGY+-la+ha ..................................................hph.s.hG..ph.hh.ttt.....t.hphth-VhstKhs.....................hpp-apthlp-.....ls.p.h.tlhhphhpt.o.h.t...ht......t.t...tss....hh....thlpp........hpphhpulppIhppP+ppLhpcpphhpsccl+chsstshp....ltccsthh......................tpphh.spphhssp..+..phohDThENR....FlKahlp.p.lhcplp...........pl.hp.tl...pt...t.pp...t...........................hhppl.ppht.phpphhp.p.shhptlup.hp.t.s...S.llp.t.GYpphapha....................................................................................... 0 30 48 59 +9656 PF09824 ArsR ArsR transcriptional regulator COGs, Finn RD, Sammut SJ anon COGs (COG4860) Family Members of this family of archaeal proteins are conserved transcriptional regulators belonging to the ArsR family. 23.90 23.90 23.90 150.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.84 0.71 -4.91 12 29 2009-01-15 18:05:59 2007-07-30 14:48:53 4 1 28 0 26 33 9 160.40 40 94.30 CHANGED lNDPu-LVPLhpsFsScs++KlaptLsstWhTct-l--hhGp-s..pcuLplL+KusLlEoQWRhP.csGppPpKEYHosYS+VQsNF.sShpDLuDlIhlsFhs.--lc-hh-clcphlcp.GssSlusloRsLshsPhaI+ulA+RSptLsV+GQ+lcllc- lNDPu-LVPLLpsFsScsaK+Va-tLoppWhTcpELc-hh.Gc-s..pcuLplLKKuuLlEopWRMP.cPGppPpKEYHooYo+lpANFQCohcDLu-lIhlshhsDE-l+-hh-clcctlcs..GNsSlssLoRphslSPhaI+ulAKRSppLsVKGQ+lclsc... 0 7 21 23 +9657 PF09825 BPL_N Biotin-protein ligase, N terminal COGs, Finn RD, Sammut SJ anon COGs (COG4285) Family The function of this structural domain is unknown. It is found to the N terminus of the biotin protein ligase catalytic domain. 24.90 24.90 24.90 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.17 0.70 -5.67 13 349 2012-10-03 00:28:14 2007-07-30 14:49:48 4 13 294 0 194 322 23 253.10 28 58.70 CHANGED MNVLVYsGsGoTspSV+HslpoLRhLLtPaYAVssVsspsLtpEPW.spsALLVFPGGADLPYCcsLsus..ss++IpcaVpp.GGsYLGFCAGGYaGSuRsEF-lGDPshEVsGsRELuFFPGssRGsAFpGFpYsSEsGARAspLplsp........tssspFpsYYNGGuVFl-A-Ka....sNVElLAcYsEcs-lsssps...........pAAlVas+VG+GpslLTGsHPEFssp.Lpptsst......thptll-pLctp-psRhpFh+tlLpKLGLc.............lspsss.sssPsL.......Tslalsupss.......splpchhssl..p-.sspsssp....hlcsp..sDpFphtcstp....................................................s..sut.......pssphtD.ccssKpllh..pspslPspchoPpFDhctYassL .......................................................................................................lhlYss.G.ss.......sl...c...psl.tL.+....th.ht...s...p..h...t...V.....l...ss.p...l.h..pps.W..t.p.ss.LL.VhP.......G.Gs.D.l..s.Y.s.psLs...st...................GsppIc.paVpp.GGsaL.GhCAGuYaustt.....hpFt.s.....s....thtl...upRp.LthasG...hstG..sh...h..t.s..a.Yp..op...tGs.p...s....s..l.htt.............t.h..aa.N.GGshF..ss.ph......thpllApYtp.................................sAhl.pp.......h.G.....c..GtslLoGsH.Eh..................s....t........hp....................................................h..hh.......................................................................................................................................................................................................................................................................................................................................................................................... 0 71 117 161 +9658 PF09826 Beta_propel Beta propeller domain COGs, Finn RD, Sammut SJ anon COGs (COG4880) Family Members of this family comprise secreted bacterial proteins containing C-terminal beta-propeller domain distantly related to WD-40 repeats. 22.40 22.40 22.40 23.20 22.30 22.30 hmmbuild -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.78 0.70 -6.16 28 199 2012-10-05 17:30:43 2007-07-30 14:50:25 4 9 169 0 97 204 204 474.40 25 72.41 CHANGED cYSsTNlQVtGVDEuDlVKTDGphIahh.s.s..............splhllcuhP.......s.pph+llup..lshsup....pcLaL..psc....pLllls.....st.t........ss...................s..h.tshspshlYDISD.ppPclltplplsGsYlsuRhlsutlYlVssphsphh..................................h.t.p.pshhPthhss............hhhsss.hahPs.sthssa...s..slsulsl..sssp.sssssllG.su.splYsSpssLYluhsphh...h...........................................................................pt.ttpppTtla+Fslss...sclpahusGpVsGpl...LNQFShDEap.GhLRVATThsp........................................................hptpstspNslalL....D.psLchlGclpsLA........GEcIYSsRFhGDpuYlVTF+plDPLFVlDLS-PppPclLGELKIPGaSsYLHPls-s+LlGlGp-sspp.............s..p.GlKlSLFDVoDsssPpEhsphhltppsspSssht-H+AFhacpp+...sllulPls.....................h.hpshhlacls....ssGhshhuplsptsss...................lhRulYlsD.....hLYTlSpstl+shshsshc ..................................................aSpTNsQhtGVDEuDl..VK.o..DGphlahhs..t......................................pplhlhcs.s........................pphphhuplphttt.....................plal....pss.................pLllls.....pt...............................................t.sthhhaDlu.....s.t...p...Pphhtphphp.Gp.h.....hs..uRhhs.shlYllspp...h.......................................................t...t.hhP.http....................hh...tp.hhh........st.......t..sh.........h..hlsuhsl.........pssp...hs.t...sh..hG...ss..stlYhStp.slYlstpth..........................................................................................ttttTtlh+hslps....sphphhupspVsGhl.....hspFuhDEas....GhhRlsTTht.............................................................t.ttpspNslalL........D..pshphhGpl..p.slu........s.Ep.IYusRFhGcpuYhVTF+...psDPLFslDLpsPppPpllGtLK..IPGaSsYLHPh...s-s+llGlGh-stt...................h...GlKlulFDVoDsss..Ppp.htphhl......t...p...ss.S..pshhs..a+..Ahhh...st..pc............slhshPhs..............................hpshhl...apls......ppG.hphhtplph.s.............................................h.Rslals-.....hlYslotthlps.shts......................................................................................................................................... 0 56 76 86 +9659 PF09827 CRISPR_Cas2 CRISPR associated protein Cas2 COGs, Finn RD, Sammut SJ anon COGs (COG3512) Family Members of this family of bacterial proteins comprise various hypothetical proteins, as well as CRISPR (clustered regularly interspaced short palindromic repeats) associated proteins, conferring resistance to infection by certain bacteriophages. 21.10 21.10 21.20 21.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.64 0.72 -3.84 233 1845 2009-01-15 18:05:59 2007-07-30 14:51:40 4 3 1369 9 558 1538 61 82.80 23 82.31 CHANGED M................................hhllsYDl.....ssc.......+ths+ltKhhcpaG.pplQhSVF-s.plspspht.plppclpchls...pp...Ds...lphatls...ppp..hpphthh ...........................M.......hhllsYDl.............spp............+thp+ltKhltpa.G..th..lQ..h..SVaps...pls.sspht..plhp..c.lp..ph..ls......tp....Ds...lphhpls..ppp.htp....hh................... 1 248 404 499 +9660 PF09828 Chrome_Resist Chromate resistance exported protein COGs, Finn RD, Sammut SJ anon COGs (COG4275) Family Members of this family of bacterial proteins, are involved in the reduction of chromate accumulation and are essential for chromate resistance. 25.00 25.00 31.90 31.90 20.60 19.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.61 0.71 -3.97 46 307 2009-01-15 18:05:59 2007-07-30 14:52:12 4 5 224 0 121 292 27 131.70 46 51.68 CHANGED WsTRpRP+lDRlACsWLIRRFIDspAcFlaVsssp....us.hsAlsFDhcG..staoHh....G-pCoF-shlccFuL.ssPuLt+LAtlV+uADsu.phshsPEAuGL.Al.hGLSppht.DDpphLpsuhslaDALYtahc ..........WsTRpRPtlDRlACsWLIRRFIDspAcFLalsssp....ss.......tsAlsFDhcG..stao.Hh....G-tsoF-shlppFuL..psPALh+LAtlV+u.....h..Dsu...hshsP.EAuGl...Al.hGLpct.hp.DDpphLct.uhslaDuLYsah.................... 0 33 66 87 +9661 PF09829 DUF2057 Uncharacterized protein conserved in bacteria (DUF2057) COGs, Finn RD, Sammut SJ anon COGs (COG3110) Family This domain, found in various prokaryotic proteins, has no known function. 21.80 21.80 22.60 22.30 20.90 21.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.04 0.71 -4.63 79 1101 2009-01-15 18:05:59 2007-07-30 14:53:04 4 3 811 0 143 559 26 179.40 33 83.49 CHANGED ssLslsps....lclLsls...t.phpsshhpppps....lsLssG.ppQllhRapshh.......p.psssp.phhpSsshll.sFssss.pslpLphPc.hpshppAcp.Ftc.sPphpLts.ssupslshpp-hL.htsGhphspshtpplspYNt.sussAuh.......................ssssssssssssssssss.............................ttspstphLpaWappAspps+cpFhpW ......................................h.TLclsss....l-LLsls...GpK...l...u...uuL..h....cuscs....lcLcsG..aQlVFRhpchl.......hsss-c..plahSsPlll.oF.ss.p...pplshphPc.lcs...t+.-Ap+..F..st.sPplpLlD.ssupsls.lchD...hL.thsu..h..th..s.hsa-h.-...spcYNp.uuttAul..........................................sthAs.hh.ss.s.sslhssss...............................shsstupshsEppLcaWaphADspTRppFhpW...................................... 0 20 49 97 +9662 PF09830 ATP_transf ATP adenylyltransferase COGs, Finn RD, Sammut SJ anon COGs (COG4360) Family Members of this family of proteins catabolise Ap4N nucleotides (where N is A,C,G or U). Additionally they catalise the conversion of adenosine-5-phosphosulfate (AMPs) plus Pi to ADP plus sulphate, the exchange of NDP and phosphate and the synthesis of Ap4A from AMPs plus ATP [1]. 19.40 19.40 20.30 23.30 19.30 17.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.82 0.72 -4.17 64 315 2009-01-15 18:05:59 2007-07-30 14:53:45 4 8 241 0 200 306 96 64.90 31 19.94 CHANGED sYNLlhTccahhllPRsppph...............tslulNuhGauGhlLl+sppphphlcctssh.....plLppluhs ..sYNllhTccahhllPRpppph....................tslu.lNuhGa.uGhlLV+scpphc.hlpcts.sh......plLpplGh......................... 0 47 111 171 +9663 PF09831 DUF2058 Uncharacterized protein conserved in bacteria (DUF2058) COGs, Finn RD, Sammut SJ anon COGs (COG3122) Family This domain, found in various prokaryotic proteins, has no known function. 25.00 25.00 25.50 25.40 21.30 22.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.97 0.71 -4.38 44 851 2009-01-15 18:05:59 2007-07-30 14:54:44 4 1 801 0 133 459 78 171.30 57 94.89 CHANGED SLQ-QLLKAGLlscKKsKcscppp..+Kpp....+pp+cs......sttssp.+ttlpcs+tpptc+D+pLNcp+ptctcpKuhtAQl+QlIcts+lsp.tpG-lsYNFsD..ssKlKclaVspphpcpLspGpLAIsph......pssYtllPctlA-KIspRDsshllhhss...pspst.....-EDDPY.....A-atIPDDLMW ..............................TLQEQLLKAGLVoSKKhuKVpRTA....KKSR..........VQA.REA...........................RtAV.EEN.KKAQLERDKQLSEQQKQAsLu.KEhKAQVKQLIEMNRIsl....upGD...I.......uFNFTD..sNlIKKIhVDKLTQsQLIsGRLAIARL......E.scYAIIPAuVADKIAQRDAsS..IVLpuA.......lStEEQ.......DEDD..PY..AD..FKVPDDLMW................. 0 25 57 98 +9664 PF09832 DUF2059 Uncharacterized protein conserved in bacteria (DUF2059) COGs, Finn RD, Sammut SJ anon COGs (COG3184) Family This domain, found in various prokaryotic proteins, has no known function. 23.60 23.60 23.70 24.40 23.50 22.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.77 0.72 -3.95 125 777 2009-01-15 18:05:59 2007-07-30 14:55:24 4 3 654 3 230 656 55 63.20 22 31.68 CHANGED shschppth.hshaspph..opp-lpplhsFYpSslGp+llptpstsppp.hp.thpth....spphhspl ................t..tphppth.hchYtcta.....oppElcslhsFYpSPsGpKllpppstltpc.ht.hhpth....htth....h............................... 0 52 123 178 +9666 PF09834 DUF2061 Predicted membrane protein (DUF2061) COGs, Finn RD, Sammut SJ anon COGs (COG3205) Family This domain, found in various prokaryotic proteins, has no known function. 21.40 21.40 21.50 21.60 21.30 19.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.46 0.72 -3.77 64 547 2009-01-15 18:05:59 2007-07-30 14:56:55 4 5 401 0 140 400 941 52.50 45 66.49 CHANGED hhKTloatllthslshsVuYll.....TGslhluuslAhlEshsphlsYaFHERlWp+ ....hhKTloFuslHFolAFsVuYlL........TG.slhlGuhlAhlEPslNTVuaYFHEKlWp... 0 40 84 119 +9667 PF09835 DUF2062 Uncharacterized protein conserved in bacteria (DUF2062) COGs, Finn RD, Sammut SJ anon COGs (COG3216) Family This domain, found in various prokaryotic proteins, has no known function. 21.50 21.50 21.60 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.86 0.71 -4.61 145 963 2009-01-15 18:05:59 2007-07-30 14:57:42 4 4 883 0 327 819 535 150.40 24 77.18 CHANGED ahR....hhthhhpc.t.hhphs.+ulAtuhAlGlFsu.hhPh..shQhllAshlAhhhRu.......Nlslulhssal.oNPlThsslah.hsYpl..GshlLutss...........................................hth..hph.pWht..............htslhhPhllGullhullsuhluYhlshhlh....+hhhp+c...........h..pp+ .................h..+..ht.hh.hc.h....hhphs.+p....lAtuhAlGlFhu..hhPh....shp..hl..l.u.hh..l.A.h.l.h...+.s.......N..lshu.l.hs.s.hl..sNP.l.Ths.sl....ah....hsYpl...Gphlhstss...........................................hth..hp..hpat.th.............p.htslhhshLlGsh...lhullh...ulluYhlsthlh....chhht+php.................................. 1 115 241 287 +9668 PF09836 DUF2063 Uncharacterized protein conserved in bacteria (DUF2063) COGs, Finn RD, Sammut SJ anon COGs (COG3219) Family This domain, found in various prokaryotic proteins, has no known function. 22.50 22.50 24.50 23.80 21.40 19.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.72 0.72 -4.00 112 668 2009-01-15 18:05:59 2007-07-30 15:01:16 4 4 545 1 222 634 74 91.00 24 35.49 CHANGED tphQpsFtsslhssst..ss..............hs...tssspRhslYRsthhsslhssLsssFPslppllGc-tapshuptahppps..spoPhhhchGtcFssFLp ........................................thQptFtttlhtspts..........................t.sh.sscRhslY+phhhsslhssLsssaPhhp....pllGc-t...a.ptlsc.t.alppps..spoPhhpchutcFspalt................ 0 55 117 179 +9669 PF09837 DUF2064 Uncharacterized protein conserved in bacteria (DUF2064) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG3222) Family This family has structural similarity to proteins in the nucleotide-diphospho-sugar transferases superfamily. The similarity suggests that it is an enzyme with a sugar substrate. 25.80 25.80 26.00 26.10 25.60 25.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.77 0.71 -4.50 93 550 2012-10-03 05:28:31 2007-07-30 15:02:34 4 8 506 1 212 557 305 121.80 26 49.00 CHANGED Tlt........phtpssshshhlshssstspstht.....................hhssshthh.Q.suuDLGpRhtpAhpps..........ttshp.slllIGoDsPsLssphLppAhptLppp........-h..VlGPApDGGYaLlGhp....p....hs.plFpslsWSo ...................................................................hhh.......t...hthhlhhs.sthttth.......................hhhtshp..hh.Q....tu.ssLGpRhtpAhtps.........................tshp.slllIGsDsPplssphltpAh..p..tL..ppp........................-s..V.lG..PA.p.DGGYaLlGlp.......t..hs.plF.p.s.lsWup................... 0 84 161 199 +9670 PF09838 DUF2065 Uncharacterized protein conserved in bacteria (DUF2065) COGs, Finn RD, Sammut SJ anon COGs (COG3242) Family This domain, found in various prokaryotic proteins, has no known function. 20.80 20.80 20.90 22.60 20.70 20.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.42 0.72 -4.26 86 1053 2009-01-15 18:05:59 2007-07-30 15:03:48 4 1 1049 0 199 485 964 56.60 49 87.80 CHANGED llhAluLlLllEGlh.hlhPsth+chhtpls.phssppLRhhGLsshlhGllll.allp .....lhlALuLVLVLEGLGPhLaPpuW.++Mlsshs.pLPDshLRRhGGuLVVAGlVlhYhlR.... 0 45 106 152 +9671 PF09839 DUF2066 Uncharacterized protein conserved in bacteria (DUF2066) COGs, Finn RD, Sammut SJ anon COGs (COG3249) Family This domain, found in various prokaryotic proteins, has no known function. 20.70 20.70 21.10 21.00 20.40 20.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.21 0.70 -5.24 60 369 2009-01-15 18:05:59 2007-07-30 15:04:52 4 1 364 0 106 346 429 233.00 28 62.88 CHANGED Ahsls.sLYpucl..sVs.upsspscstAhppALppVll+loGspp..shpssslpptlpsspphlppauYp..........p.ts........phhLpssFcspplpplLppAplPlWu.ssRPtlLlWls...-sstsRp.lls...-pss..st..htptlpptAppRGlPlhhPlhDLsDphslsssDlWGtFsssltpASpRYssDsllss+lppp.s............tt................p.hphpW.pL.......asssspppt............sspuushsthspshhssluchhuscY..A ...............................................A.phssLYpucl..sVs.spps.sscstAhtpuhppVllRsoGsp.s.....sh.pssslpptLc..pssp.al.sQauap.....p.pu.................................ptsLplpFsspplcsLLpp.At...L.PhWs.psRPslLlWll...-s..s.tspp.llh.......-pss...........hhptlpptAppRG.L...PLtl.P...l.uDh.sD.ss.ls..ss..-lhGs..sss..lppASpRYssDslLll+hpts..t..................t..........................................................................................................t.h.hcW.pL.......aspssppts..........pussussppshsthhstluchhup+.................................................................................................................................................. 0 33 56 82 +9672 PF09840 DUF2067 Uncharacterized protein conserved in archaea (DUF2067) COGs, Finn RD, Sammut SJ anon COGs (COG3286) Family This domain, found in various archaeal proteins, has no known function. 21.90 21.90 21.90 49.10 21.40 21.10 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.98 0.71 -4.79 13 57 2009-01-15 18:05:59 2007-07-30 15:05:35 4 1 57 0 44 60 7 190.70 25 92.48 CHANGED loh+hssc-EhccFhctLp+t.htssah.lcs+h........s+lhlpl.Gs-+.clc-shppl+pLtspl+pch...pcuhhchsLpsLhR-As..hslPs-llsDALphhGhcscl+s-..hlcTsAsh-Ell-hsccLuchYpElchhslTsp.s++llsshuastshsl--slEEhlEpGlLccsEst..IsLpcshcpsc+cL ...............lsh+htsccEh.chhcpl.p+h...sh..h..lcs+h..........splhlpl.Gpcc.clc-shppl+plhppl+sch....tcGlhpYslsslhc.ht..tslshslll-sLch.hGhpschccs..hlc..Tshsh-ElhclsccLs-lhs.Elph.shsop.s+cllsssuhh..tshss--ll-chl-tGlhcctEct.+h.Lp+shcpAhc.................... 0 13 19 30 +9674 PF09842 DUF2069 Predicted membrane protein (DUF2069) COGs, Finn RD, Sammut SJ anon COGs (COG3308) Family This domain, found in various prokaryotes, has no known function. 22.10 22.10 22.30 24.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.08 0.72 -10.46 0.72 -4.10 61 494 2009-01-15 18:05:59 2007-07-30 15:07:36 4 2 491 0 139 400 175 107.70 35 80.19 CHANGED phluhsuh..luLlhLhhsWphhluP............................................lpsssshl......lhhlPLLlPLhGlLcGcsYTatWusallhlYFhcGlstshssss..pthlAhlEllLssshFsushhYsR.h+spph ..........................hluhhuh..lALls...LhlhWplhluP............................................lpssushl.....hlhslPLlLPL.GlL+.spsYTatWushllhlYFhculsthassss..pph..hAhlEllLshshFhushhYsR.h+tt................. 0 25 73 107 +9675 PF09843 DUF2070 Predicted membrane protein (DUF2070) COGs, Finn RD, Sammut SJ anon COGs (COG3356) Family This domain of unknown function is found in various bacterial hypothetical proteins, as well as in prokaryotic polyketide synthase. 21.90 21.90 21.90 27.90 21.60 21.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.90 0.71 -4.80 41 170 2012-10-02 15:46:01 2007-07-30 15:08:55 4 6 154 0 87 168 17 176.50 20 30.93 CHANGED llDs+Nphssh.......lhPpspcstth.suhcphhpthth.........pphsltsGhsps....hspptslustGlpshllcsssp+sslllhDuNNhcsslhcclpctlsph...hp.h.lhToDsHhlsshstss...h.lG.hh...sscclhpthtpssppAtpslp.sp.suhtpshhp.lpshGpc..hpplsuss ....................................................................Llch+sphtsh........lhPpspcut.hpsu.hchhhcthsht.........pphplts.Ghhp..uh..sh..hscpssls.tuhtsh.llcs...s....p+sslhhhDuNNh-stlhcclp.cthspl...........h.o.DsHhl..sh.st..Gs.h.h.hGshh......scchspshptss.....pp.utpslc.sc.huhppsh...lhshGpc..hp.l.sh.............................................. 0 25 56 76 +9676 PF09844 DUF2071 Uncharacterized conserved protein (COG2071) COGs, Finn RD, Sammut SJ anon COGs (COG3361) Family This conserved protein (similar to YgjF), found in various prokaryotes, has no known function. 21.00 21.00 23.90 23.30 19.30 17.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.39 0.70 -4.93 36 298 2012-10-01 19:21:02 2007-07-30 15:09:20 4 2 252 0 118 282 111 211.30 27 85.83 CHANGED hsQpWp-LsFlHWsVDPpslpshlPsGhclDsa.-GpsaVulVsFhhpps+htulP.lPahpsF...sElNlRhYVhcpss...++GVhFhSL-ss+hhsVhsARshapLPYha..upMchppps..........tplpapupR+hs.........thp........phphpssst...scs..ssLspFLTtRathasptps.pshhhclcH.sWsLppAchhth.ssslhsusGhthss....sssplhaususcl ..............ptpWpclhFlHWsVsPptltshlP.su..hc.Dsa....-....G..p...s..a..lulVsFphpsh+htshP......l.Phhs.sF.....s.EsNlRhY.Vhpsss....++GVhFhul-ss+hh.sshluRhhht.l.P..Yhh..u.cMphppss......................pplpapop+chs.............shp...........phsh...p.h..s..s............hs..ps.....ss.lph...a...LTtR........a.th........asp.....tt.....s...phh.h.h.spH..W.Lptuchhth..psslhsstGh..ss.....s...shau.s.t........................................................................................... 0 56 91 115 +9677 PF09845 DUF2072 Zn-ribbon containing protein (DUF2072) COGs, Finn RD, Sammut SJ anon COGs (COG3364) Family This archaeal protein has no known function. 23.00 23.00 23.00 24.00 22.40 22.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.82 0.71 -4.22 10 88 2009-09-11 04:52:25 2007-07-30 15:09:51 4 2 57 0 64 91 8 101.50 22 83.37 CHANGED PH+CT+CG+lFcDGu.cILpGCPsCGs+KFLYV+-cccsp...........cs..ctsl-phtc-tccEltpspp.c.p..t....t...................ct-RIESVRIluPGoYELNLspLhcs-ElVlul.p-EGoYhlDLsShhK+p ......................................................................................................pCtp.h..ptt...hhpGC..CG.phF.a..tt.............................................................................................................................................................................................................................h-ol.l.t.G.Y-lNl..lhpp.thlht..tppG.Y.lpl.......t.............................................................................. 0 11 50 60 +9678 PF09846 DUF2073 Uncharacterized protein conserved in archaea (DUF2073) COGs, Finn RD, Sammut SJ anon COGs (COG3365) Family This archaeal protein has no known function. 20.40 20.40 20.60 62.70 17.90 19.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.28 0.72 -4.38 14 56 2009-01-15 18:05:59 2007-07-30 15:10:39 4 1 56 0 41 56 7 104.00 53 83.96 CHANGED LpphsoMEKIRLILDsV+cGsIVlLEpGLoP-EEu+LIEhTMpEIsPDsFoGIEIEoYPtpp..ppuhhu+LhG+ppsp.+LTVIGPANplKTL+K-cshISAhlS ....hpphsSMEKIRhILDpV+-GsIVVLEcGLoP-EEu+LIEhTMsEIsPDpFsGIEI.....EoY...Pppp.....tssohLu+lhG+ppsp..+LTVIGPANQlcTL+KDcslISALlS.. 0 8 32 38 +9679 PF09847 DUF2074 Predicted permease (DUF2074) COGs, Finn RD, Sammut SJ anon COGs (COG3368) Family This domain, found in various archaeal hypothetical proteins, has no known function. 29.60 29.60 29.60 30.50 29.50 29.50 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.78 0.70 -5.73 10 50 2009-01-15 18:05:59 2007-07-30 15:12:31 4 1 46 0 31 52 3 416.40 21 86.14 CHANGED slplshhhpSluFhhhulhlsssuhhuc+ts....hhSshlll.FlaslasTuhssuahhou....+ll-PL+uLPlshhsplhuVhhhI-ohsuhhFll....hlushhlsshstuLLGLlWuhhulLhG.aSluhllhltFGsphsGRtohs+slh+shGhllhhh.lhulahll.h.hphl...sthlsslhphYphlaP........hhs.sIhtshhu..hlLullYh.lhhshhYhhsl++LappL...EstcspGplphchpl+scushlohlhKDaKllhR+oQsLshlLhPlahslhhlhslsp.s..hh.....hshhhlshlsllSSlhLphhhtIEssGF......phLpSLPls++chlhsKhLhhhlI.hhlshslhhlshlass.tsslYhhhllssslLsshluhhashp+l......su-slphssh.GhlssIlhhllshlslulsulhsFhLstPlu......allssuhsl....IsuLllac+ ..........................................................hh.p.h.ahhhshh..h.....hh..p.........h...hh.....l.hlhsl.ahoshtshhhhst....tlhp.LhsLPlp.t.....s..hhl..u..hh..h..hl.ph.hh.hhlh....hhss...h...hhhhh.ul..Lshlhhh.htlhhh..a....slu...hhlhhhhGtth......+huhspslh+..h..hshll.hhh.l..hh...hahh....................thh....l..hh.hhaP........hsh.sl.t......hll...llYh..hhhshhahhsh+tl.h.pl.........hpuchp.hphp.h.phpsshhuhhtKDlKhlhRp....sph....lh.hllhPl.hhslh.....hhshl.....................hslhhlh.hh.shho.uhhhth....hhtlEtps..h......plLpoLPlsh+phhhuKlLlhs..ll.hhl.hh.hhIhshlash....sthhl.hhh..hlhsh.ll.sstluhhh.shpp.h.........pscslphs.h....shhshl.l.hhllts...l....hlGlsul.h.hahhshhhs................hlhshh.hl....l..h....................................................... 0 13 16 25 +9680 PF09848 DUF2075 Uncharacterized conserved protein (DUF2075) COGs, Finn RD, Sammut SJ anon COGs (COG3410) Family This domain, found in various prokaryotic proteins (including putative ATP/GTP binding proteins), has no known function. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.18 0.70 -5.48 63 776 2012-10-05 12:31:09 2007-07-30 15:18:44 4 21 611 0 219 3403 1226 288.50 20 51.73 CHANGED +plhllpGssGoGKollulslhscl..............sp.............ppthstalssNps.ht.....hlpcplstsh..........tthhpphhtpssuFIpshp......................t.sphDllllDEAHRLhs...........pts.......h.phtt.tsplp-lhcp.u+..lslhlhDpsQtlphs-hsshpp.....lcchhpphshpht.................................pLssQhRs.tuucshlsWlcsllphpthtshsh.............ppsa-h+lhcsspphppt.......lcp+.....sptss..p..uRllAuasasht.....p.psshtshhl.......tp.aph........................tWshpst.....................Wspps.psl-cVGola.....TsQGhEL-YsGVIlGs-lhac......psththptp..phpDtsstpp+.................tt.htpllpNsY+VLLTRGhcGhalassD .................................................................................hhhlpGssGoGKoll.h...h..plhtcl..............................................t..................................................................t..tt.p.s.......a.l...s.....h...N.....p..s.....h.....................h...l..h...p..p..lspph..............................h.p.p....h..h..p....s......s.....s.....h..l.pphp...................................................................................................tph.c.ll.llDEA.....p.hlhs...................................................p.t....................t.h.t....tspl.p.clh......pt....sc..............lhlh.hh.D.t...t.Q...h...l....p....pphht.t...............lpp..hhtth......h.t........................................................................................................pLppphRh.....t.......s.....t...t...h..h..p.....a...l.pt.hht...t.................................................tta...phphh...ps...tt.......h.t................l.tt............................................tt...................thh.s....s.sh...............................h........................................................................................................W.....................................ht.....phus.a.....shQGh-hsashlhhs.sh.........................t...p...................................hhhp.h.hLhpRuh.Ghhlh......................................................................................................................................................................................................................................... 1 59 123 170 +9681 PF09849 DUF2076 Uncharacterized protein conserved in bacteria (DUF2076) COGs, Finn RD, Sammut SJ anon COGs (COG3416) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. The domain, however, is found in various periplasmic ligand-binding sensor proteins. 38.00 38.00 38.40 38.40 37.90 37.60 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.75 0.70 -4.32 44 304 2009-01-15 18:05:59 2007-07-30 15:19:05 4 3 286 0 119 304 5 229.80 31 98.75 CHANGED MsspEppLI-sLFsRLppscss.........PRDspA-uhIsctlppQPsAsYhhsQslLVQEtAL+phpt+lppLcsph..............stttttt.uGGFLu.uh....FGustsp...s.Pts..ssts......................tssasptts..ts..............p.thsts............t................GuFLGsAhsTAAGVAGGslLhsulpshFssspt.tthss..t..................ssss.u..........p-sshsshtsspsts............................h-pus.st.s..shsDus.sss.s.s...................DD........Dssa ..............................................................................MsspEppLI-sLFsRLppApst.......................sRDspAEthItptlppQPsAsYhhsQslLlQEtALcphppclppLctpltt..................tptttttuuGGFLu..ul...........FGuuss.p......stPss.tssts......................sshsttt......s.......................psshsts...............ts.................uGFLGuAhpTAAGVAGGh....lLusulsuhFstppt.tthsthhtt........................t..tss..............ps..ss.hss.hh.sssss........................................cpst..s..........ts..tDss.sss.s..................s...................................................... 0 25 56 86 +9682 PF09850 DUF2077 Uncharacterized protein conserved in bacteria (DUF2077) COGs, Finn RD, Sammut SJ anon COGs (COG3455) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.90 20.90 21.30 21.70 20.60 20.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.17 0.70 -4.92 122 1603 2009-01-15 18:05:59 2007-07-30 15:21:13 4 8 979 5 300 1102 33 200.90 26 64.69 CHANGED NsLlss.ussLlthlspl+p..tsptssh.tplcpplhp..plpphcpph.pptuh......stpplhsu+YsLC.shlDEslh.sos....Wutpu.....tWtppsLhspaas-s.GGE+hFphLpclh..ppPtpphclLElhYhCLsLGFcG+Y.ph.tpGpp..plcplpppLhphlpph..........t...t....sl.ssphpssststpph.ptt.hPhW.hhhulss.hlhhshahshphhL .............................................psllss.spsLlthlhplpp......htstsps..ppl...tpplhp..plpthcpth..pptuh...........p.tppl.hsh+Ys...LC.shlDEu...sh.sss....Wusps.............................tWtppsLhspFas-s.GGE+hFplLc+Lh...ppPp..p...hhslLEhhahsLsLGF.cG+Y..pl.........tp.upt.........ph-pl...hp....pLhphlpph......tsh.....................sl..ssphttst..ss..php..h....t.........p..t...lshh..hhhshsh..llhh..hhahhhphh.............................................................................. 0 50 115 206 +9683 PF09851 SHOCT DUF2078; Short C-terminal domain COGs, Finn RD, Sammut SJ, Eberhardt R anon Jackhmmer:B0PET9 Family \N 24.00 15.30 24.00 15.30 23.90 15.20 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.20 0.72 -4.34 733 1938 2009-09-13 15:00:14 2007-07-30 15:22:51 4 75 1219 0 751 1854 980 30.70 31 15.57 CHANGED htspLcpLccLhspGhIoc-EFcppKpcLLs ...........ppLcpLccLhspGhIoc-EappcKpplL........... 0 286 560 699 +9684 PF09852 DUF2079 Predicted membrane protein (DUF2079) COGs, Finn RD, Sammut SJ anon COGs (COG3463) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 22.00 22.00 22.10 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.56 0.70 -5.77 53 276 2012-10-03 03:08:05 2007-07-30 15:24:01 4 2 206 0 118 317 177 416.90 18 80.11 CHANGED Rahsh.......ssuaDhGlFsQhlaphupG+.h.hSohps..................ptLGpHhs.llhlhs.....P.lYtla.Ps.hsLhllQshhluhuulslatLAppt.t.s........pphuhhlshlahht.sslh.ss.sL.asFHs.shhh.Phhhhulh.uhpp........c+hhhhhlhhlllLhs+-shulslhulGlhlhlp.................c..............h................uhshhhhuhsahllsspll........hPha.sst.t....hhhtt.....................au.hh.......sushpllhshltpPh.hhhptlht.......slhY.llhlhlPhhalshh..........s.shLlsulPllhh.llu.stss.hslsa..+Ysh.llPhlhlusl..shpptp......................................phthhWssshhLuLshshhhs..................................hsYhshhpphsps...ppslshlP.ssuuVhss...........s.LhPpLup..Rpslhhhsts....hp..........t...pph-allls.........t..shtsstct.pphlsphpppspaphhht...psGlhl ..........................................................h........suaDhGlFpQhhh.hs.p.tp...hsshpt..................phhusHhs..lhhlls......s.lYhla....Ps...s.h...hLl..llQshhl..uhuulslahlupph..h.s...............................pthuhhh..uhsYhL..sslh...ss..sh.hs.....FH...shuh..Ph.lhhuhh..shtp.............pphhhhh...hhs..hhllhs+-.....s.h...ulh..lhs.h...Gl..hlhht......................................................................................p.......................p..th................................uhhhhhhuhsahhlshtll........hPhhtts...............................as.hh....................................suhht.hhhshltpPh..hhlttlhs.s........................plta.llhlhhshhhlshh............pshhllsslPh.lhhphlu.s.s.....hshhh...pY.shslhPh.lhhusl...shtphp.................................................................................................t.h.hh..hh...shh.....h.h.h..shh.hhhhhs.................................hhhp...hppht.th...pphls..lP..ssusV...hss...............s.lhstLst....+p.lh.hh.tt......................t.callls.................................h................h....................................................................................................................................... 1 38 74 102 +9685 PF09853 DUF2080 Putative transposon-encoded protein (DUF2080) COGs, Finn RD, Sammut SJ anon COGs (COG3466) Family This domain, found in various hypothetical archaeal proteins, has no known function. 21.00 21.00 21.00 21.00 20.70 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.36 0.72 -4.23 6 51 2009-01-15 18:05:59 2007-07-30 15:25:35 4 1 29 0 24 55 12 47.10 31 78.22 CHANGED lpIpKcoW+Ks+phhsoFhshVpshGNSAplD..lPKEYlGKTVhlTll-cD- ....................t.h..pth.s.ha.tpVp.hGNSA+ls..lP+-alGKplhlsllcpp........ 0 10 15 20 +9687 PF09855 DUF2082 Nucleic-acid-binding protein containing Zn-ribbon domain (DUF2082) COGs, Finn RD, Sammut SJ anon COGs (COG3478) Family This domain, found in various hypothetical prokaryotic proteins, as well as some Zn-ribbon nucleic-acid-binding proteins has no known function. 20.60 20.60 20.60 20.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.21 0.72 -4.13 18 259 2009-01-15 18:05:59 2007-07-30 15:27:38 4 2 254 0 56 152 85 63.10 45 87.63 CHANGED atCPKCGsssa-psplpsTGGshSKlFDVQNc+FhsloCppCuYTEFY+....upsSshtNlhDhhhu ..........sC.KCGs.....p.....pYtscphp..u.TG.Gshu......K....l..FDVQN+.+.Fl.....ol......o.......CspCGYT.ElY+......upoSsuhNlLDhhhG............ 0 22 40 48 +9688 PF09856 DUF2083 Predicted transcriptional regulator (DUF2083) COGs, Finn RD, Sammut SJ anon COGs (COG3800) Family This domain is found in various prokaryotic transcriptional regulatory proteins belonging to the XRE family. Its exact function is, as yet, unknown. 21.30 21.30 21.40 21.30 20.70 21.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.06 0.71 -4.57 67 753 2009-01-15 18:05:59 2007-07-30 15:28:12 4 8 475 0 213 613 820 153.00 46 32.83 CHANGED TLQRPGA+GlPFFFlRVDpAGNloKRh.SAosFpFuRaGGuCPlWsVHpAFtp.PGcllsQluchPDG....ppYlslARoVspssuuastss+phAluLGC-lpaAsclVYAcGl...........sLssssu...sPIGhuCRlC-RssCsQRAhPPls+tLtlDppppuhsPY ...................................TLQRPut+GlP.FhFlRlD+AGNloKRp.SA.suFpFuRhGG.sCPLW....sVapAFs......p.PG+..ll..p.Q.lAph.P..D.G....pp..Yl.hlARTlpp.ssuu.as.tPs.+paAluLGC-lpHApclVYusGl...........................cls..ssss...sPIGsuCRlC-RpsCsQRAhPsls+tLslDpptpsh.PY.......................... 0 47 134 177 +9689 PF09857 DUF2084 Uncharacterized protein conserved in bacteria (DUF2084) COGs, Finn RD, Sammut SJ anon COGs (COG3811) Family This domain, found in various hypothetical prokaryotic proteins,as well as proteins belonging to the UPF0386 family, has no known function. 21.50 21.50 22.30 50.10 20.80 21.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.70 0.72 -4.28 25 284 2009-01-15 18:05:59 2007-07-30 15:29:07 4 1 276 0 48 145 6 84.50 70 98.69 CHANGED MNISKsEQRlLHlLAQGGcIphpRs-sG+lsclpChTR-GahLusCoLsVFc+LK+KRLIpScsGpPYRITcpGLpsVRuQLDNR .....MNLSRQEQRTLHVLAKGGRIsHlRDuSGRVTuVECYSREGLLLuDCTLAVFKKLKTKKLIKSVNGQPYRINTTGLNsVRAQsDNR............ 0 15 25 35 +9690 PF09858 DUF2085 Predicted membrane protein (DUF2085) COGs, Finn RD, Sammut SJ anon COGs (COG3815) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 26.10 26.00 24.00 21.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.35 0.72 -3.91 24 150 2009-01-15 18:05:59 2007-07-30 15:29:42 4 2 123 0 61 129 105 88.10 34 60.70 CHANGED hCHQhs-RSaahtGpQhPlCARCTGIYlGhll....shlhh.......hthhshslhlhllhhlP.hslDGhsQhhs...............hhcSsNh.......LRhlTGlhhGhuhshhl ...............CpphspRohhhhsh.hslCARCsGhhlGhlh........hhlhhhh.........hhh.h.h.h.slhl.sh....h.h..h.lP.hhlDGhsQhht..................................htpSsNh.......LRllTGlLsGhuhuhh................ 0 26 46 56 +9691 PF09859 Oxygenase-NA DUF2086; Oxygenase, catalysing oxidative methylation of damaged DNA COGs, Finn RD, Sammut SJ anon COGs (COG3826) Domain This family of bacterial sequences is predicted to catalyse oxidative de-methylation of damaged bases in DNA. 25.00 25.00 25.40 25.20 24.30 21.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.06 0.71 -4.94 42 288 2012-10-10 13:59:34 2007-07-30 15:30:15 4 1 258 0 107 277 10 168.60 57 71.64 CHANGED RGEY+YFsYPLPchltpLRsuLYP+LsPlANRWspphGhssRaPsphs-aLspCHtAGQpRPTPLLLpYusGDYNCLHQDLYGEhVFPLQVslLLScPGpDFoGGEFVLTEQRPRhQSRupVlsLpQGDullFsspcRPVpGsR.GhYRVshRHGVSclRSGpRaTLGlIFHDA ...................pGEY+YFsaPLP..phlppLRpuLYP+LsPlANcWttthGh.-spaPsshscaLtpCHsAGQsR....P....TPLlL+YGsGDaNCLHQDLYGEhVFPL.QVsllLScPGpDFTGGEFVLTEQRPRhQSRup..VlsLpQG-..ullFss+cR..PVpGsR.G.hYRs.....slRHGVSplRSGpRa.TlGlIFHDA...... 0 31 58 87 +9692 PF09860 DUF2087 Uncharacterized protein conserved in bacteria (DUF2087) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG3860) Family This domain, found in various hypothetical prokaryotic proteins and transcriptional activators, has no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 22.40 22.40 22.70 31.70 22.20 22.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.34 0.72 -4.22 55 586 2009-01-15 18:05:59 2007-07-30 15:32:31 4 10 465 0 149 460 21 72.10 36 40.26 CHANGED G+LpphP.u..+pp++lllLphlhppF.c.sppYoEpElNpllpph..a.s.DaAhlRRtLl-hGhlpRp.tGutY.......Wpt .........................G+LpphPp....+pK+Kll....lLpclh.p.c.h.css..ppYoE+EVNp.hlcph..........as..DasslRRhLl-aGalcRst.GstY.......hh............. 0 64 116 134 +9693 PF09861 DUF2088 Domain of unknown function (DUF2088) COGs, Finn RD, Sammut SJ anon COGs (COG3875) Domain This domain, found in various hypothetical prokaryotic proteins, has no known function. 23.30 23.30 23.30 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.92 0.71 -5.30 50 648 2012-10-02 00:55:42 2007-07-30 15:32:58 4 4 416 2 239 630 126 196.70 26 45.88 CHANGED YGcs..plplplP-cplhullpspt..htshtctpphlpcALpsPIuopsLp-llpst..c+lsIlsSDhTRP.sPs.......chhlPhLlccLpt.Gl.-csIpllsAhGhHRtpTcEEhtphlG......p-lhpch....cllsHsspDcsshshlGpos..............pGsslhlN+hssEADhlIssGsIp.HaaAGaoGGpKulLPGlAuhcTIhsNHsh...hl.ssputhG ....................................................................pt.....h.l..t........thh......t..............s..tthlpp...ulp.p.Plss..................Lpchsps....t..........cplsIlssDh....TR....s..sss.......phll.shllccl.pp....sh.ptp.l.tlls.AhGsHRst.Tc-.Ehtphl.G......pc.lhp.p.h....pll.sHpspsppshlpl...G.p.ss........................pG.sslhlN+hsh..E..u..D..hlIshGhlpsH..h.a...u...G...a...SGG.tKslhPGluuhcolttsHsh.hl.t........................................................ 0 137 205 230 +9694 PF09862 DUF2089 Protein of unknown function (DUF2089) COGs, Finn RD, Sammut SJ anon COGs (COG3877) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. This domain is a zinc-ribbon. 29.10 29.10 29.20 29.80 28.30 29.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.26 0.71 -4.28 18 297 2012-10-04 14:01:12 2007-07-30 15:33:38 4 2 274 0 103 262 1 98.50 33 87.76 CHANGED CPVCGcchpVT+LcCspCsTsIEGcFclscFspLopEphpFlclFlKsRGNIKElE+ELGISYPTVRs+L-cllsALGa............psps-tpsc+c-lLccLccGEISsEEAlchLK ..............................pt.t.h.l.ph.h.t..s.h.lpspFt....s.................h..hspLspEchpFlchFlpspGslKElp+phulSYPTVRsRLDclIpplsh....................tpt.t.t....plhp.l..ptpls.ptA...................................... 0 52 76 92 +9695 PF09863 DUF2090 Uncharacterized protein conserved in bacteria (DUF2090) COGs, Finn RD, Sammut SJ anon COGs (COG3892) Family This domain, found in various prokaryotic carbohydrate kinases, has no known function. 21.40 21.40 21.40 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.98 0.70 -5.58 34 447 2012-10-03 05:58:16 2007-07-30 15:35:24 4 3 431 0 108 433 338 300.60 50 48.52 CHANGED ELsaaLpcup..phs....c.cpDtpLp+LHtsTsR+.......ppWspLplhAFDHRtQLt-hAtpsGushs+IsshKpLhlcAutpVsp.....Gh...thGlLsD.spa..Gp-ALppAoGpGa.WIGRPlElPGSRPLchE..tGtslGupLhcWPtEHVVKCLsFYHPDDsstLRtcQEpplpcLapAs+psG+ELLLElIs.....Pcs.....tPhs.DsshhcslcRhYsLGlhPDWWKLtPh.ospuWpplsslIpc+DPaCRGlVlLGL-APt-pLtsuFtsAAs.slVKGFAVGRTIFu-su+tWLsG-lsDtphlscltspatpLlshWcptR .................................................................................................ELs.aLtRup..phs....RPDtDspLsHLHRVTs.R+.......ppWscLplFAFDHRpQLh-....h.A....p..p....s..G..A....s.....s....+I....stLKpLhLc.Auppsutph..................Gl...pspsGlLsD.spa..GQ-A.L.N.ss...T.G.pG......W.....WI.....G..R.P.lE...hPG...SR..P..Lc..hE..+G.slGS.pLlsWPt.......E.......H.......V.......V.......KCLVFYHP-DssplRhEQEtplpclacAsppSGHELLLElI..............Pcs...............t.s.sp...-phhhcslcRFYpLGIhPDWWKLsPl....osssW.......pplsplIpcpDPaC.RGl..VlLGLDAPtpcLcsuFsuAAshshVKGFAVGRTIFu-sSRpWLuGclsDtpLIupltppYppLIshW+pt........................................................................................................................................... 0 15 46 76 +9696 PF09864 MliC DUF2091; Membrane-bound lysozyme-inhibitor of c-type lysozyme COGs, Finn RD, Sammut SJ anon COGs (COG3895) Family Lysozymes are ancient and important components of the innate immune system of animals that hydrolyse peptidoglycan, the major bacterial cell wall polymer. Various mechanisms have evolved by which bacteria can evade this bactericidal enzyme, one being the production of lysozyme inhibitors. MliC (membrane bound lysozyme inhibitor of c-type lysozyme) of E. coli and Pseudomonas aeruginosa, possess lysozyme inhibitory activity and confer increased lysozyme tolerance upon expression in E. coli [1]. Structural analyses show that the invariant loop of MliC plays a crucial role in the inhibition of the lysozyme by its insertion into the active site cleft of the lysozyme, where the loop forms hydrogen and ionic bonds with the catalytic residues [2]. 21.30 21.30 21.30 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.68 0.72 -4.24 101 1381 2009-01-15 18:05:59 2007-07-30 15:40:44 4 15 1175 9 224 727 37 71.70 32 55.43 CHANGED YpCt.....ss...pplpspahsssp.........shlphss....pphhhspsh.SASGA+Y.......sss.phpa.W.sK....G.s-Ahlthttpss.........sC .............................YpC.........sp..pslsV.phsNsppt............ssh.s.hcs.......phl.hLppsl.S.AS.GA.RY........scu...hYsaW..oK....G...cpAplhppcchsh.......................................... 0 47 108 166 +9697 PF09865 DUF2092 Predicted periplasmic protein (DUF2092) COGs, Finn RD, Sammut SJ anon COGs (COG3900) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 27.50 27.50 27.50 39.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.26 0.70 -5.28 17 95 2009-01-15 18:05:59 2007-07-30 15:40:57 4 4 74 0 50 114 16 209.90 29 77.32 CHANGED cAhplL+sMusaLtutpphShph-ssh-slhtsGQKlpauuouplslsRPD+lRssh.pushscschhaDGKTholhu.ssshYupsstsu.slD...tll-cLpschulplPhuDLhlss.shstlt.pslhsuhhlGps.slsGs.CcHLAFRps-lDWQIWIspGspPlP++hVITt+pssupPQaolphpsWshss.shssssFsFpPPssAcpl-h..ssh .....................................A.plLcpMusaLtstcshohshcssh-.lhtsG...pKlpasu...ssplplpRPs+LRssh.pGs..tsst.....c..hhaDG+s.hTlhs.stshYuphssPs.ol-...ph.l-plppchulphPh.uDL...l..hss.shsslh.sslpsuhhlGps.hlsGstCcHlAFp....pps.hD..aQlWIspGspPLPp+hlI.T.+sh....sup....P.......Qassphp...sWshss.phs.sssFsFpPPssApplchh.h.... 0 14 29 41 +9698 PF09866 DUF2093 Uncharacterized protein conserved in bacteria (DUF2093) COGs, Finn RD, Sammut SJ anon COGs (COG3908) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 26.80 63.90 24.40 18.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.13 0.72 -4.78 30 196 2009-09-10 21:13:40 2007-07-30 15:41:37 4 1 191 0 75 146 668 42.10 57 55.22 CHANGED llpsGsaVhCAVTGppIPLD-LRYWSVc+QEAYss...uptuhpR ..llpsGoaVpCAVTGtsIPLDEL+YWSVsRQEsYAo...sthuhcR...... 0 19 44 55 +9699 PF09867 DUF2094 Uncharacterized protein conserved in bacteria (DUF2094) COGs, Finn RD, Sammut SJ anon COGs (COG3913) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.80 20.80 23.80 27.20 18.40 20.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.72 0.71 -4.28 33 490 2009-01-15 18:05:59 2007-07-30 15:43:05 4 6 421 2 119 397 5 130.00 25 47.21 CHANGED GaaGKlPutGDFVp+pLPpuhlpsWDsWlppulssspp..thsttWtpsahsuPlWRFslssulsG....tshsGllhPShDRVGRtFPLslssslssss.....s.hhhstssaaptlcshhluslctsssh..-tLspuLssLssP ..........................uaaGKlPutGDF.l.ppths.thht.hctWhttu.hth.tp.....t...t.ht.tsappsssWpFshssuhhs....tthlsGslhsSpDpVGRhaPlllhpthshpp.h......spthttstt.ah.ttltphhhptlp.spt.h..tthtptltth...h.................................................. 1 19 49 81 +9700 PF09868 DUF2095 Uncharacterized protein conserved in archaea (DUF2095) COGs, Finn RD, Sammut SJ anon COGs (COG4003) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 39.40 31.50 19.90 18.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.80 0.71 -4.36 3 25 2009-01-15 18:05:59 2007-07-30 15:43:31 4 1 25 0 21 28 1 109.50 47 90.30 CHANGED EKKKKPIDELPWQEYDIEEFKc+FPALARELEE-hG.LEIoGIRLDEYQVLEEEEEE.KIDFSGYNPTIIDFLRRCDTDEEALEIINWMEE+GEITPEMAKELRVTLVHKGVRAFGPKKEWGWYERHGKH ..................pY-h-EF+c+aPtLA+El..-tpu.l.Ips.phs.......-.......sauGYsPTVIDaLRRCcT-EEALEIINahEc+GEIopEhAc-LRhpLs+KGVRuFGs+KEhGaY.ER+u..h...................... 0 4 9 14 +9701 PF09869 DUF2096 Uncharacterized protein conserved in archaea (DUF2096) COGs, Finn RD, Sammut SJ anon COGs (COG4010) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 22.50 22.50 22.50 140.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.87 0.71 -4.38 7 27 2009-01-15 18:05:59 2007-07-30 15:43:47 4 1 27 0 19 25 2 169.20 39 95.52 CHANGED l-pQWVVLsELsocL.p+uh.VPp-VhpcLRhApsllsaYhhD.Hs..shc.Ltcs-+.LsplQthLhsls-...sDhsccal-Kht+AhRsEhshc.PhppSpFp..lp+....tss-sIRVph.c.lp.EhLt-luEapGVIhEh..--sDh.VhI.Gs+-clppALK-hu.h...W+p .lDppWlVLsELsocL.p+shtlPcplhpcLRlApslIoaYhlDsHs..shcsLpcsE+pLsplQthLhsLs-...t-hsccaLpKht+AhRuEhshcFPhp+SpFp..Vp+....ssh-slRlplpptlp.ERLt-luEapGVIhEa..E-csp.llIcGsc-+lppALK-hush...WK... 1 4 9 15 +9702 PF09870 DUF2097 Uncharacterized protein conserved in archaea (DUF2097) COGs, Finn RD, Sammut SJ anon COGs (COG4013) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 36.90 60.60 23.90 20.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.79 0.72 -3.90 8 32 2009-01-15 18:05:59 2007-07-30 15:44:29 4 1 26 0 22 31 4 88.80 31 93.27 CHANGED hclIch.......osEEhlEYl+cNVc.sDhlElsauRlalsGcVlshss........shlpLpl-uchh.Glh-lDlccIp-DlLElhHhsc-G-phlll ........c.lph.......ss--hlEYlcssVc.sDhlEl.asRlal.Gcllshps.........shhplph-schh.sh.lElDlccIp-DLlElhHhsc-scctll.h.... 0 5 12 18 +9703 PF09871 DUF2098 Uncharacterized protein conserved in archaea (DUF2098) COGs, Finn RD, Sammut SJ anon COGs (COG4014) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 56.50 56.30 22.70 21.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.10 0.72 -4.05 16 48 2009-01-15 18:05:59 2007-07-30 15:45:13 4 1 48 0 38 46 2 90.10 31 91.83 CHANGED c.IclGshVRYlsTGThGpVpcI.KsEsscpWllL...-sosLaYcschLEls-...tp++pc+..chst-Ehl-+l+ccc.-thpphDlssssss.....GGG ....IplGshVRYlsTGThGcVscI.Ks-...ss.cpWllL...-sosLaY+schLElh-...tp+cccc.......ctstcEhl-+lcppc..c.....thpthchstttss...GuG................... 0 8 24 32 +9704 PF09872 DUF2099 Uncharacterized protein conserved in archaea (DUF2099) COGs, Finn RD, Sammut SJ anon COGs (COG4022) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 226.80 226.60 18.00 17.60 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.36 22 64 2009-01-15 18:05:59 2007-07-30 15:45:37 4 1 51 0 50 63 6 256.00 45 90.71 CHANGED HlhEslG+s..RVll+sG..KVlEVu-PtlcaCPLacKhR.GlcclTpEsl+cNlEaRIc-FGMhTscRplchc.sFVuFGsSElhhTuLppGhlDAsVhVCDGAGTVlsssPpLVQGlGGRhSGLlcToPIsEVIc+IccpGGhVL-.psApID.ltGlctAhchGY++lAVTlusspsAcp..lRcht..................pssclhlhuVHsTGl.op--AccllphsDllTuCASKhIRchst.+slhQlGsulPlFAlTptGKcLlhcRh+- ....HlhEsLG+.u..RVlV+sG..KVl-VuEPhlcaCPLFcKaR.GIc.clocEsI+cNlEFRIc-FGMhTscRplchc.sFVuFGsSElhhouLpcGhlDusVhVCDGAGTVIsssPpLVQGlGGRlSGLlcToPI.-..VIc+Ic..cpsGhVLD.psAcIDQltGVc+AlchGYK+IAVTVs..s..s..c..c..AcclRclEt.................tslclhlFuVHsTGl.sc--AcplhchsDllTuCASKhlR-hst......t+slhQlGsulPlFAlTppGK-LllERh+..... 0 13 35 43 +9705 PF09873 DUF2100 Uncharacterized protein conserved in archaea (DUF2100) COGs, Finn RD, Sammut SJ anon COGs (COG4024) Family This domain, found in various hypothetical archaeal proteins, has no known function. 22.20 22.20 22.20 141.00 21.90 22.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.44 0.70 -4.87 6 22 2009-09-11 01:16:15 2007-07-30 15:46:46 4 1 22 0 16 22 0 216.40 46 98.18 CHANGED cschSK-LI+KAlpTIS+pc+sh.h.p...p..pch.apDAcsG+IDssEhK+AlapLIEAD-YLYKoAPsH-LspEcAcEFsKlLhpspcHIs+ILupFGF.Eh.EsphclD-...suLYIVuNKKlhKsL.KplsssLNIlsTEGsL-lEDM+hlNPcIPEKALhGIEKKscIs+cpIp+hIpKlsPp+lVVVVccsDsuDELlapR....AKEL.YsAE........KLsu--IL ............chp.SKcLI+KulpTISchcp.....hh.p................pppKch..apDAKsG+IDssEFKKAlapLIEAD-aLYKpAPpH-Ls-EcA+EFsKLlhpsp+HlsplLusFGF.Eh....E.p.tclDc...sALYIVSNKKlhKpL.Ksh.ss............sLsIlsTEGsL-lEDMKhINPpIPEKALtGIcKKscIsKcpIp+hIpphpPsKllVVVc..DcuDELlapR....AKEL.YNAE........Klss-ElL............. 0 3 6 12 +9706 PF09874 DUF2101 Predicted membrane protein (DUF2101) COGs, Finn RD, Sammut SJ anon COGs (COG4025) Family This domain, found in various archaeal and bacterial proteins, has no known function. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.36 0.70 -4.84 5 26 2009-12-03 16:38:53 2007-07-30 15:47:35 4 1 26 0 17 25 0 175.90 32 81.10 CHANGED hKsaLlPpPS-chP+S..hlh+lsKRshTPHELlSL+LQLlFLlYLI.lSLlssa.LpshLlLIlsuslhFLYLRYlLIRNp-ahl..DFcAYRsF...YlulSTIsFlualGYlLlRcaopshaaYLsYLusIslsVllFRaYFKsRYGRDYTYGlVEEVKsDLV+VFVHDDIAANVKPGYYWVPAVu.-A-sGDIVKLLVEpRT..FRGS+PVRIL ......................................................h.ph..p.+EhhsLhLQlhhhhall.hsl.l.h...hh.h.....hhh.ht.hhYhlh..LpalLhpshshhl..ca.EsYphF....YhululluhLshhGhlllc+huhc.sh.Yhshlshlhl.sVlhhhhhF+.+ahRsaTYGhV..Eltts.hshVhVp.DltuNVKPshYhV.tss.sh..GthVKlhVEpph..h+ushPh+Il.............. 1 3 5 11 +9707 PF09875 DUF2102 Uncharacterized protein conserved in archaea (DUF2102) COGs, Finn RD, Sammut SJ anon COGs (COG4029) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 38.80 38.60 19.10 18.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.34 0.72 -4.51 16 54 2009-09-11 09:37:18 2007-07-30 15:47:51 4 1 50 0 42 51 3 102.20 45 70.17 CHANGED hlllusss.soPu-lshtlaplshs.lpIKETCFGshIsGcc-sVccllcclRshDss+IFsK-RGFPsGDsRRCRApRGGu.RPGFHtLEtEhplLshIucAL- ..............hlllussu..soPuclsthlapl........shs.lplKETCFGshIpGcc-hVccllcclRpl-.s+IFsKDRGFP.GDtRRCRApRGGusR.GFHtLEtE.clLshIucAL-............ 2 9 27 36 +9708 PF09876 DUF2103 Predicted metal-binding protein (DUF2103) COGs, Finn RD, Sammut SJ anon COGs (COG4031) Family This domain, found in various putative metal binding prokaryotic proteins, has no known function. 20.80 20.80 21.00 21.00 18.90 20.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.29 0.72 -4.27 11 139 2009-01-15 18:05:59 2007-07-30 15:48:36 4 3 138 0 78 142 101 95.60 31 69.35 CHANGED s-tssccKlGGoHSTlIGGRpGpKhlpplApHPcVKKVIPGsIsusGsu..uG.lcuKVT.RuDspGNlRLLlp-GoSVQElRlVTTAuDcEpG-clhc-LN-hL .................h.....pt+lshsHSThlsGh..h.hLcplupp..stlcslhPGsIs.tspup..pss....lpl+lo.hsspsG.hKLlARpGsoVQElFlVTshspc-..pt......................... 0 20 51 70 +9709 PF09877 DUF2104 Predicted membrane protein (DUF2104) COGs, Finn RD, Sammut SJ anon COGs (COG4035) Family This domain, found in various hypothetical archaeal proteins, has no known function. 22.80 22.80 24.90 24.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.31 0.72 -4.04 8 27 2009-01-15 18:05:59 2007-07-30 15:49:00 4 1 27 0 17 25 0 96.20 44 94.03 CHANGED hl.FhlLlsslsFIlGShlGLpYSY+KYscPalE+plDslALllAVlGhl.lhVNss......hhhhlGhhhluaslGMRPGYGRhEhllGlllAlIlalLpt ........h.hLlshlsFlIGShlGLtYSY+KYspPal-KplDhlALllAllGul.hhlNss...........h.luhhhluFslGMRPGYGRhEhhlGlllAlllallh.h.. 0 4 7 13 +9710 PF09878 DUF2105 Predicted membrane protein (DUF2105) COGs, Finn RD, Sammut SJ anon COGs (COG4036) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 25.00 196.40 22.50 24.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.50 0.70 -4.76 10 32 2009-01-15 18:05:59 2007-07-30 15:49:21 4 1 32 0 21 29 0 216.30 50 96.77 CHANGED M-hshplapsslhsGlIlGhluLlsIuh....pKsDLshlllTDLlEhuMLVlIAuVGTDLAEALILPGLVVGlAELLAVSElhluR..........................p.+lhEchsl.phss..........h+hEVLcTAPsFlAllLVVYGllLoGFTGGAVAusGLLFYhLo++spsh.h......hpaEGIuulSGIuWALWIhGFlhFFlaPptWLhuLhlAGsG.lllKVuSKlGLIGslhc .......phh..lapsslhlGhllGhluLhuIuh....pKsDLphLlLTDLlEhAMLllIAuVGTDLAEALILPGLVVulAELLAlSEIhloR..........................c.+lhEchhh.phss...........hchEVLpTAPpFlAl..lLllYGslLSGFTGGAVhuuGLLFYslS++shGh.h......hpWEGluulSGIAWAlWl.hGFlsFFlhPphWLhsLhlAGhG.LllKVuSKlGLIGhlh.c. 0 5 11 17 +9711 PF09879 DUF2106 Predicted membrane protein (DUF2106) COGs, Finn RD, Sammut SJ anon COGs (COG4037) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 193.50 193.30 22.70 17.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.85 0.71 -4.88 9 32 2009-01-15 18:05:59 2007-07-30 15:49:49 4 1 32 0 21 29 0 151.80 54 93.62 CHANGED tpls+lhNhlSpPcsls+laAhhlsllhllGlh.lPh.shpcsQLYPKs..p.QlphposLAPYDRGGlPLppPuslKuQYPp.pPhlGhlTAYLoPluhalu-pThYhGTTIVSpPGGIlDEILYYTRGhDTVLESSILhhuFsIhSaLahscs .h.pls+lWNhLSpPcslsRlFAhhlslllllGlh....lPh.shsssQLYPKsh.QpQl.hcoPLAPYDRGGlPLccPAslKuQYPpapPhlGpITAYLoPlAhalucpThYFGTTIVSoPGGIlDEILYYTRGhDTVLESoILhlSFhIhSWlahN+.s. 0 5 11 17 +9712 PF09880 DUF2107 Predicted membrane protein (DUF2107) COGs, Finn RD, Sammut SJ anon COGs (COG4038) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.50 25.50 26.30 46.90 25.40 25.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.51 0.72 -4.09 7 32 2009-01-15 18:05:59 2007-07-30 15:50:03 4 1 32 0 21 28 1 73.70 52 88.98 CHANGED hYsGhhllIlGolusshGPtspDPlhRhlNhElPAhGVsLlhLuYNcsLALlTFluVsslhThVLlRAIlR.Etht .hYlGhhLlIlGoluslhGPts+DPlhRhLNhEVPulGVsLIFLuYNcTLALhTFlAVsAllTLVLlRAIl+.EEh........ 0 5 11 17 +9713 PF09881 DUF2108 Predicted membrane protein (DUF2108) COGs, Finn RD, Sammut SJ anon COGs (COG4039) Family This domain, found in various hypothetical archaeal proteins, has no known function. 29.00 29.00 69.70 69.40 28.90 28.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.15 0.72 -4.16 9 32 2009-01-15 18:05:59 2007-07-30 15:52:31 4 1 32 0 21 29 0 72.30 44 80.92 CHANGED M-hLs.lluhsssll..GuluslLtpcslsKlIhhullpuGhlsllsuttYLDVAhssuLl-PluTIllLlhhhK .......h-lLs.l..lussssll..GulGsIlpscslsKIIMhulL-uGhlslIsuhhYLDVAhluulh-PluTlILLlulhK. 0 5 11 17 +9714 PF09882 DUF2109 Predicted membrane protein (DUF2109) COGs, Finn RD, Sammut SJ anon COGs (COG4040) Family This domain, found in various hypothetical archaeal proteins, has no known function. 23.10 23.10 89.40 89.20 22.30 21.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.46 0.72 -4.04 10 32 2009-01-15 18:05:59 2007-07-30 15:52:45 4 1 32 0 21 30 0 77.90 46 91.52 CHANGED llplllGlIullsulRlhls+sRupKL.YLNslsFuIAuLIALhlcoPhGhlAAssYFIsSTloSNAIAaTlucl-ch ....lphllGlIullhslRlhls+sRApKL.YLsslsFsIuALIALYIcoPhGulsAhsYFIsSTlSSNAIAaTlsclcc..... 0 5 11 17 +9715 PF09883 DUF2110 Uncharacterized protein conserved in archaea (DUF2110) COGs, Finn RD, Sammut SJ anon COGs (COG4044) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.40 20.40 20.40 21.50 20.30 20.10 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.33 0.70 -4.72 10 44 2012-10-03 20:18:03 2007-07-30 15:52:59 4 1 44 0 33 50 1 227.00 42 95.66 CHANGED lsTppalhG-sp-Rul-uLcohVpN-LG-LDVca.clulpcDsaspVTLsG-DsElAtNLLpEcaGElssph-s...G-lYlGpLpSas--GaslDl..GlsVplPuDEL.sLGp.......G...oPcQltcRFGLVpHLP..lRhlcspc......t.ApLo-pphD+La-Wp+..u.ssRlsVNSsTRuEl+uslNRAGHuRDIlslERLGLLEpuVVC+EcTDuPGllAsIGPYLpu.............Eltsl .............................lshh.hl.GDs.-pul-uLcohlpNtlGDL-Vca.clulpccsaspVolpG-DpplutNLL+EEaGEls.clcs...G-sYhGpLpuhs-sGaslss..GhhlplPsDcL.sLG...........GsscQltcRFGlVpHLP..lchlpspc...............tupLo-cphD+la-Ws+..s.ssRl.V...NusTRucV+usls+sGHu+DlhplERLGLLEpullC+-sTDsPGllAsIGsYL..u.Eltsl..................................................... 0 5 17 26 +9716 PF09884 DUF2111 Uncharacterized protein conserved in archaea (DUF2111) COGs, Finn RD, Sammut SJ anon COGs (COG4048) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.90 20.90 21.00 104.00 19.90 20.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.72 0.72 -4.16 17 50 2009-09-11 16:51:47 2007-07-30 15:53:10 4 2 46 0 37 50 5 83.10 50 53.86 CHANGED lHpLlstLPVThRStppsGlRlEcGcVlDcsYTGPVLEpVLcsucll+ssP.sGsYcGlPVlVuPI+sc.GclluAlGlVDlsu ..lHpLls.LPVThRS+s+sGVRlE+GcVlDcsYTGPVLEpVlcpucll+ssPhpGsYpGlPVlVsPI+sp.GcVluAlGlVDlTu. 0 9 22 29 +9717 PF09885 DUF2112 Uncharacterized protein conserved in archaea (DUF2112) COGs, Finn RD, Sammut SJ anon COGs (COG4050) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 168.70 168.50 20.10 17.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.65 0.71 -4.53 18 54 2009-09-11 09:55:49 2007-07-30 15:53:23 4 1 50 0 40 52 5 144.40 58 92.47 CHANGED hKVhIYPsNSLILuDLVERFGHcPLshsppI+c+Vpsss........lDSPPhNlTsEDPK+GLKYAAlEVPSGVRGRMoLhGPLIE-AEAAIIlsc.us.............huFGChGCARTNELshaLlRcKsIPlLELcYPcs---uhphVp+IspFLcuL .hKVFIYPsNSLILuDLVERFGHcPLsltptI+c+Vpssp........lDSPPlNITsEDPKKGLKYAAlEVPSGVRGRMuLlGPLIEEAEAAIIhpc.AP.............huFGChGCuRTNELshaLlRc.K.sIPlLElcYPps-E-uh.hVp+IspFLcuL. 0 8 26 34 +9718 PF09886 DUF2113 Uncharacterized protein conserved in archaea (DUF2113) COGs, Finn RD, Sammut SJ anon COGs (COG4051) Family This domain, found in various hypothetical archaeal proteins, has no known function. 19.50 19.50 19.50 176.30 18.30 17.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.02 0.71 -5.16 17 52 2009-01-15 18:05:59 2007-07-30 15:53:35 4 1 52 0 38 49 4 184.00 33 96.99 CHANGED h.VEss.-ctGsphYccIhpssLpDLsLs+sIt+l+halcPctPlFlhsslh+psspsI+ltDlAslp.........spsthplpIs-EsYlsplLcpLWpphG+splcQPsRaplhlssshs........lt-hlVtDPpcsLhcclhDhh.RlhPEGFRVR+pshpssphshlASEcslpp-hh.chstchhcch.tttt ..hhVEst.-ctGtchYccIlpssLpDLsLs+uIt+l+hhhcPccPlFllullh+pssphIplpDlAslp........tspsthplpIp-EsYlspLLc+LWph.ucsplcQPsRapll..lcschs..........-hlVhDsccclhc+lhDhh.RlhPEGFRVR+.hhpspplshlASEcslccEhl.ccstchhccl.p..s.... 1 8 24 32 +9719 PF09887 DUF2114 Uncharacterized protein conserved in archaea (DUF2114) COGs, Finn RD, Sammut SJ anon COGs (COG4065) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 612.40 612.20 18.00 17.50 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.35 0.70 -6.15 17 54 2009-01-15 18:05:59 2007-07-30 15:53:53 4 1 54 0 38 54 6 449.10 58 91.99 CHANGED KP..YaIVASVElGNTTTKCILTuTNL-TG+oYllsKsV+MTRDVRsPKsGEElFG+Tl.GsELT+EuVuELV+DTLlcuhKcApLsIcpDLcFVVRSTGVsAuFsSP--VGphIhALAsGCLtAGVPP+KMTPsMohsslPc+lccaShhDKlhFDGAVsGVlPP...TGhEhVANEMEGELsTAGIK.GAKWTsVDFRNPClSlDFGTTLsGRITsD.....ppPYAKTlGNFCGLAGAIPDAIlRGoGlVDccsGsALDla.......tphptphspchsccYu-csH.EhIcIccVPtsRcRFGpVPVsscuAccuGlsLIGCDsGcNGSclscLpcIGt-lhccpultsLhtslDhVsAplshRLl-lAhccsll..pouIGlTGRAGIoGpKPcLIL-tlt-h.tlacpsp-clVFVDDGLARGAAlMARCMNSLGTPcsPlGGhRGGtCIhupRIKhp+ ....pPYYIVASVElGNTTTKsIlTATNhcTG+TYllsKsV+MTRDVRsPK.GEcVFG+TlhGlELT+EuVuEhV+-TLlcuhccApLsIc.DLcFVVRSTGVsAGFuSPEEVGphIlALAsGCLtAGlPPpKMTPsMohpsl.PcclpcaShl-KllFDGAVsuVlPP...TGhElVANEMEGELVTAGIK.GAKWTcVDFRNPClSlDFGTTLAGRITs-.....spPYAKTlGNaCGLAGAIsDAIlRGoslVDc+sGsALDla.......tthptphspchsccYA-chH.-hIpIccVPpshcRFGoVPVsPcuAccuGssLIGCDVGcNGSclscLp-IGt-I..hccpslssLhsslDhVsAplspRLl-lAh-cGllptsouIGlTGRAGITGcKPcLILEtlp-h.tla.cp.p-clVFV-DGLAhGAAVMARCMNSLGTPcsPlGGpRGGtCILupRhKhQp.. 0 8 24 32 +9720 PF09888 DUF2115 Uncharacterized protein conserved in archaea (DUF2115) COGs, Finn RD, Sammut SJ anon COGs (COG4066) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 51.60 51.40 24.70 24.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.12 0.71 -4.14 19 67 2009-01-15 18:05:59 2007-07-30 15:54:01 4 1 42 0 51 66 4 160.70 27 89.85 CHANGED hspp-LhphLtcpsppaolhDLhphpshhcc-hchlPtcYRcchhpshhphhhtsapcl+s..psss.hpspc..hDptphpchhshl....tptp.ppttpt..........h.hlhhllssYhhFlpccPhHPVGh.FPGGhpV.c+sGsYYCPl+-+pp-schulCpFCsu+pss ...........................ppt-LhphLtcpstphSlhDlh.hpshlcp-hphlP..ppY+cphhpt.hhphhhpshpcl+p..ppspthp.tp..h-tcphpchhshl....tp.t.pt.ppstcpp..........applh.hllssYhhFlhccPlHPlGh.FPGGhplhcc....sspYYCPl+c+..ppc.thulCpFClscp..... 0 10 36 46 +9721 PF09889 DUF2116 Uncharacterized protein containing a Zn-ribbon (DUF2116) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4068) Family This domain, found in various hypothetical archaeal proteins, has no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.80 21.80 21.80 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.22 0.72 -4.17 9 52 2012-10-03 10:42:43 2007-07-30 15:54:15 4 3 49 0 35 57 35 54.30 30 67.66 CHANGED EsH+HCllCGtuIP.-EphCS-+CpEtapp+pK+hh+pp.Ihashhllhlulahhhsah ......H+HChsC.G.tsIP..s.....-..csFCSpcCccpatpcp++.h.p.p.hhhhhhhhhlhhhhhh...................... 0 8 21 31 +9722 PF09890 DUF2117 Uncharacterized protein conserved in archaea (DUF2117) COGs, Finn RD, Sammut SJ anon COGs (COG4069) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 29.00 28.30 20.80 18.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.46 0.70 -5.14 16 49 2009-01-15 18:05:59 2007-07-30 15:54:38 4 4 43 0 37 49 2 210.00 37 57.38 CHANGED sDllsLLNaGKohpTG+sFGchVssR.....st.c.KPLIQIERPGpsDGslIsWN.......cts...pshs-cLScpLsLsh.p........hppslsptpshppsp...plhR+lsGV.PGEsIhVNGIVIG+AsSscVplluEN.GhlssIpGGpIKcHGlEKLtc......VDLp+AhVKTG.hlR+ss........hpschhc.cshstch..............s+VllIDHAAEcshEhh..csAshVlTlGDDTTsIuGD ........................................................DllhLlNaGKSt.TG+sFGphVspR..................sthpcPlIQIERPGctDGslI.WN......................pts...pshscpLocpLsLp......................ppslspthshh.ppp.....phhRclpGVpsGEsIhVNGIVlG+.upuscVtllucs.GhlscIhGGplKcHGlEKLtp......lDLpcAllKTG.hLRRps...........hpschtp.....pshspph.............................sclshlsHAuEcsh-hh....cc..sshVlTlGDDTTtlsGD................................... 0 10 21 29 +9723 PF09891 DUF2118 Uncharacterized protein conserved in archaea (DUF2118) COGs, Finn RD, Sammut SJ anon COGs (COG4072) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.64 0.71 -4.50 9 43 2012-10-02 20:27:15 2007-07-30 15:54:49 4 1 43 6 31 71 34 148.80 31 94.63 CHANGED h+lP+lYVEstts-................t.cVhlcps-chhhah-cscthhh.GKslYph.hc-hsD.lhtp+hp+chlIhhP..DtRphlalKcGocLh.lPsEGapVs.IschGsRVtcGsslAslpT+KG-lRalcuPVsGsVValpE.Ps....tRssYlaYIlPE ............................................+lPcLYVEs..p-.................s.cVhlcpstph.....tF.....l-h...s.c......thhhtt...KhlYch.acpas-.hhtsph.ppDhlIhhP............ct+phhal+cGsc.Lh.lPsEGapVh.IsshGsRVccGctlAslpT+KGE.lRal+sPhsGsV..lalpEhss....tRspYlaYIL.E................... 0 10 13 21 +9724 PF09892 DUF2119 Uncharacterized protein conserved in archaea (DUF2119) COGs, Finn RD, Sammut SJ anon COGs (COG4073) Family This domain, found in various hypothetical archaeal proteins, has no known function. 29.60 29.60 29.60 144.90 29.50 29.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.16 0.71 -5.02 10 27 2009-01-15 18:05:59 2007-07-30 15:55:00 4 1 27 0 18 26 2 196.00 39 94.08 CHANGED YupG+t..Ph+LFVAGlHGsEuKsToclLcclp.schp.....GslsllP.lVcsuKYISTLcPcYYco.hGKpILclIEch+P.-hYlELHSYStENacKLTupsRhsptGVPsalsLpsGVLlGSVSPhlR++hFs+-sLCLTlElP.............pttspcut-hlsclLclltcupoR-EFIEclc++YPcpscKAhchh+cFaGp ....h.spucs..Pp+LFluGlHGsEuKhTpplLcplp....sphp..................Gplhlhs.lscp..o+YlSTLs.cYYcop.GppllclIEcYcP..chYhElHsYstcsYp+LTshcRhsppGVPPhl-LssGVLluSVSPhlRp+.Fpc-slChTlElP..................shpstcshchhhclLchhhpupsR--hhcclhpcYPcpscpAhphu+caah... 0 5 11 14 +9725 PF09893 DUF2120 Uncharacterized protein conserved in archaea (DUF2120) COGs, Finn RD, Sammut SJ anon COGs (COG4077) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 39.30 95.70 22.20 20.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.82 0.71 -4.25 6 27 2009-01-15 18:05:59 2007-07-30 15:55:13 4 1 27 0 19 26 2 138.80 38 95.54 CHANGED lpcl.............hG+IMctl-AF+GSKPllcscElLhVRGhsR......DchhcchsSlhEaL....l-hLsccGhELls-h.............DEh...Vp......chsElhs-o-ha.D.hGFE+lKcoFEshGCssDYslG+p.cshhlhlsMWhDKsuhsPKFVEllsl ....................lhG+lMpph-AFKGSKPlhcp-plLhVRGlsR......Dc.hcchssIc-aL....h-pLsppGhElhsc..............D-h...lp......cIsE.lhs-.osha.DtaGFEtlKcuFEshGCcsDYtlu+K.sslhlsluhWhDKpshsPpFVEVhs.. 0 4 9 15 +9726 PF09894 DUF2121 Uncharacterized protein conserved in archaea (DUF2121) COGs, Finn RD, Sammut SJ anon COGs (COG4079) Family This domain, found in various hypothetical archaeal proteins, has no known function. 26.20 26.20 141.40 141.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.09 0.71 -5.02 13 34 2009-01-15 18:05:59 2007-07-30 15:55:24 4 1 34 0 25 33 2 194.40 38 71.02 CHANGED MSLlIuYhGKNGAVlAGD+RpIhFpGpcpsREcLEEcLYSGcI+oDEELtK+AsEauVp...IplsDs+cKVpplss.VLsGEVsol.Gt-oKRRRlYsTpGshsIl-lpssplTspohssGosllVFGN+asKclsppplK+php..hKhslccltclhcclhcpl.tcssTlScpa-lhhspsttcshct......sIc+Dlpc MSLlIuYhGKsGAVlAGD+RpIhFpGscppRcpLE-cLYSGcI+oDEELh++AcEhGlp...lpIsDsccKV+clus.lllGEVpol.uh-oKRRRlYsTpGshsIl-lhssplsspphtpuusllVFGN+asKclupcpl+cphp...phslpcltclhcclhccl.ppTPolSppa-lhhspsthsshct......slc+Dlp.p............. 0 6 13 19 +9727 PF09895 DUF2122 RecB-family nuclease (DUF2122) COGs, Finn RD, Sammut SJ anon COGs (COG4080) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 61.30 61.00 23.50 18.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.21 0.72 -3.84 18 59 2012-10-01 22:53:19 2007-07-30 15:55:36 4 1 58 0 37 53 0 105.40 35 71.33 CHANGED GuAAQpGlP-lhKLAhKhGp.sllVlsDlcDAlElL+P-hVhhlspsspctt.........phhc.........hcs+lhlVhsGu-.suhochElshGpslhhtulcp-lGslGtlAlhL..Ypl .GuAAQsGlP-lpKLAhKhsK.slllLs-lcDAlElLcP-hlhhlupsuppphp........p...hp.........hts+lLllFsGs-.sGFoKhElph.Gptlhl.tlpp-lGulu.hulhLYp.................... 0 13 19 28 +9729 PF09897 DUF2124 Uncharacterized protein conserved in archaea (DUF2124) COGs, Finn RD, Sammut SJ anon COGs (COG4090) Family This domain, found in various hypothetical archaeal proteins, has no known function. 21.40 21.40 22.30 142.70 20.90 21.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.52 17 49 2009-01-15 18:05:59 2007-07-30 15:56:01 4 1 41 5 34 50 1 147.50 38 93.47 CHANGED +GlushLpsFKshl....t.psucpI.sahGssGsCsPFApLhuYulRsh..pphFlPssch-cu+plp.lsclGhp...ls-.hshc.sDhlVlhGGLAMPchsloh-csppllpclsstp....llGlCFhshFp+sGW.cpIsFDhlIDupl-.Vpl ..+GlushLcsF+shl....t.psucpI.lahGosGsCsPFApLhuYulRsp..cthFlPssch-cu+plt.hsslGhp...hs-ttshc.sDslVlhGGLAMP+hslss-clppllpclsscp....llGlCFhuhFp+uGWtcc..IcFDhlIDuplc.V...... 0 8 22 27 +9730 PF09898 DUF2125 Uncharacterized protein conserved in bacteria (DUF2125) COGs, Finn RD, Sammut SJ anon COGs (COG4093) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.40 21.40 21.40 21.50 21.30 20.60 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.74 0.70 -5.12 45 259 2009-01-15 18:05:59 2007-07-30 15:56:21 4 3 225 0 86 216 94 300.50 20 82.17 CHANGED WahuAstlcpshssWhspppupGhts-hushs....lsGFPhRhclphsssslusP.........psGhuhpustlphhu.sYpPs+llsphss.stplshs......stsasltpss.....hpushshsspssLsh-chslsstshshsssh..t.......sshsplp....hthtps..ssssshchslpspshshssshh................thpshphcuplphshs........hshsslpptpspssplclppsplph.Gshtlpu.uGslslDssGhssGclslpspshcpllshhtpsthl.t.....................................t......shpsshshhuthsGptp.slslsLphpcGplhl.GslPlGtsPs ...........................................................................................................................................Whhhustlctthps.hsp.tstGhtspttshp....hsGaPhRhplhssshphtps..........tpuhshphsthpssu.hatPhplhhthsu.Pthl.....ph..s.............hthshshhpss........hphthsh.s.chplshcssslshts.ssths............sshtchp....hp..hs......ssssshc..hth..p...hsshshssshh..........................shttsphsushs.hslssh...................................ss..p...th..h....t...p...h.ps..t...s...l...s...lpphplsh.usht.hsu.oGshsls..p......s.......G.........h........hs..GclpLplsshpslhpsht.tst.hps........................................s...........shpsh.hthhu..ths..t.st..........s...shshsltl.p.cGph.h.G.lslGthP................................................ 1 20 55 65 +9731 PF09899 DUF2126 Putative amidoligase enzyme (DUF2126) COGs, Finn RD, Sammut SJ anon COGs (COG4196) Family Members of this family of bacterial domains are predominantly found in transglutaminase and transglutaminase-like proteins. Their exact function is, as yet, unknown, but they are likely to act as amidoligase enzymes [1] Protein in this family are found in conserved gene neighborhoods encoding a glutamine amidotransferase-like thiol peptidase (in proteobacteria) or an Aig2 family cyclotransferase protein (in firmicutes) [1]. 18.40 18.40 35.20 26.50 16.30 17.50 hmmbuild -o /dev/null HMM SEED 819 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.37 0.70 -6.51 40 471 2012-10-02 17:21:26 2007-07-30 15:56:54 4 7 408 0 164 493 127 725.70 51 74.92 CHANGED s-.sEV-Fpa-MpVsRltEsPRVTKPao--pWpslssLGcpVDtcLptsDVRLTMGGEPTFVSlDDh-usEWNTsAlGPsKRtLAssLl+RLRc+FA.PGGlLHaGQGKWYPGEsLPRWuhulaWR+DGpPl..W+ss..sLlAc-s....psts....ssscpAcpFlpslApcLGlsschlhPAYEDshaalh+EspLPsNVDP.tcu+Lc......DsppRtRls+VFccGLspPsGaVLPlp+hp.spss......WpSstWhhR.+s+laLlPGDSPlGhRLPLsSLPalsss-aPa.hh.tDPhpspssL.....................tttt.ssststptstpphtpth..........................thVRTALsVEsR-G+LpVFMPPl.-p.lE-YL-LluulEssApplshPVhlEGYsPPp.DPRLshl+VTPDPGVIEVNlHPAsoWcEhVchTpsLYE-ARpsRLGoEKFMlDGRHTGTGGGNHlVLGGsTPsDSPFLRRPDLL+SLltYWQ+HPSLSYLFSGhFIGPTSQAPRlDEARcDuLYELEIAhuQlPts...tsspsP..PWLVDRLhRNLLsDlTGNTHRAEhCIDKLYSPDusTGRLGLlEFRuFEMPPcsRMSLAQQLLlRALlA+FWcpPhcs.tLVRWGTsLHDRFMLPHFlhpDFtDVlsDLptsGasFcspWFssahEFRFPhhGclph.pGlcLELRpALEPWHVLGEEuusGG.TsRYVDSSlERLQVKlpGhss..sRaslsCNGRplPLpsTGssGEhVAGVRY+AWQPsSuLH.PTIslcuPLsFDllDoWsuRSlGGCsYHVAHPGGRsY-TFPVNuhEA.EuRRlARF.shGHTsGths..........................ss.tp.....ssEFPhTLDLR ...................................................-.s-spFpap.pVsRlhEsPRVT+PYo-ppWptl.tLGppVDtpLttsDVRLThGGEPTFVSlD...Dh-usEWNTsAhG........PpKRthAspLhpRL+sc.au.s...sGhlHaGQGKWYPGE.LPRWshulaWRpDGpPl..Wpss..sLlAc.t...ts.s...........................hsstpApchltslAttLGlssphlhPAYEDshhaLhcEtpLPs..s..lss......s.....s.c..Lt..............................-....p..Rtclt.chhppuLspssGaVLPlpthtst.t................WtSstWhhR.ct........+hhLlPGDSPhGhRLPLsSlsahs.t..p..h.h...h.tDPhtstssL.................................................................................................................................................................................t.hsRTALssEsR..................s.....................Gh..................LhlFhPPh..pt..lEcaL-LluslEssAp..pls.h.s.l.llEGYsPPt.Ds.RLphhplTPDPGVIEVNIpPutsWpEhsppTphLY-pA+.sRLsoEKFhlDG+HsGTGGGNHlslGGsTPuDSPhLRRPDLLtSLlsYWppHPSLSYLFSGhFIGPTSQAPRlDEuRp-sLYELEIAhupl.t.........................st.pss..PWllDRlhRpLLhDlTGNTHRuEFCIDKLYSPDus.oGRLGLlEhRuFEMP.PHs+MulsQpLLlRALlAhFWcpPhps.....LhRWGTtLHDRFhLPHFlhpDhtDVls-Lpt.tGasFcspWFssahEFRFPhhGphth..pG....lplELRtAlEPWHVhGEEus..suG.TsRYVDSSlERlQV+lp....G...hss.........pRa.lloCNGh.lPLpsT..s.psu.phVuGVRa+AWpPsSuLH.PTIsVcuPLsFDllD.s.......Ws.....tR..........SlG.GCpYHVuHPGGRsY-ohPVNuhEA.EuRRhsRF..shGHTPGt..hs......................................t........s.-hPhTLDLR............................................................. 0 45 105 133 +9732 PF09900 DUF2127 Predicted membrane protein (DUF2127) COGs, Finn RD, Sammut SJ anon COGs (COG4331&COG3305)) Family This domain, found in various hypothetical prokaryotic and archaeal proteins, has no known function. 24.30 24.30 24.60 25.50 24.20 24.20 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.51 0.71 -4.15 33 344 2009-01-15 18:05:59 2007-07-30 15:57:02 4 2 332 0 92 229 22 140.60 28 75.50 CHANGED tuL+slAlhEusKGllslluGluLhslhtps..lpphstpllp+.........hplsPssphsshh......LphssplossslphluhhshsYuslRhlEAaGLW+s+tWupahullSuulYlPaElYElhp+s.ohhphslhhlNlhlVsalhhplt .............................................................h.sl+slulhcus+ull.lluululh..h..ltspp..lp.thlpcllph.........hplsssshhsphh......Lctutt.lssssLt.hsushhhsYusl+llEuhGLW+t+hWupahullusulaLPhElY-....lh.p..ph.ohhplsshhlNlhlVhhhhhph............. 0 30 61 81 +9734 PF09902 DUF2129 Uncharacterized protein conserved in bacteria (DUF2129) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4471) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 22.00 22.00 23.90 22.40 20.20 18.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.33 0.72 -4.18 38 1016 2009-01-15 18:05:59 2007-07-30 15:58:04 4 2 1011 0 95 365 0 68.40 38 76.10 CHANGED RpulIVYLaplKps+.pLR+aGslhYhS++h+YllLYlsp-cl-plhcKlpph+FVK+VchSh+s.lcpsap ..........RpullVYLhh.+ct.+.pLp+YGcIhYhS++h+YlllYlspp-l-plhp+Lpch+FVK+VchSahptLcpsa............. 0 22 48 71 +9735 PF09903 DUF2130 Uncharacterized protein conserved in bacteria (DUF2130) COGs, Finn RD, Sammut SJ anon COGs (COG4487) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 24.70 24.70 25.20 26.30 24.60 24.60 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.55 0.70 -5.65 42 737 2012-10-11 20:44:46 2007-07-30 15:58:23 4 1 708 0 105 513 324 259.60 43 60.52 CHANGED -...c.hhpch-pp...lcphpcpI-clK....chttphSsphhGEsLE.ahEs........hhtsuF...PpshhEcssc.uppGu.....KsDaIaRshs.......ppspchsSIhaEhK.......pTKphs-calc+LccDppppps-aAlLVS.l.Pc.......hhV-s.tthcphaVs+PphFlslhslLRpuhlphhph+ppht....hhpspth.lhpF..csphcphhpuhscsas.hpcchppthcpIsKthp+hpcph-slltuspsLphuppphssholpcl..sthpsshttc ............................hp....sthppphEsp.LKsts-plEh..hK...shK..sQtSo.KhlGEoLE.asEscFp........phcphuF....Psuh...F-KDNK..uspGo...............KGDaIaRtpD.......csGsEhlSIMFEMKNE...cto..csK+KNp-FhKcLDcDRREKsCEYAVLVThLEs-s.phaNtGIVD.so...H...p..YEKMY.....VlRPQ.aFltllulLRsuulNoh..+hK..ppLt....hh+EpNh-locF..Ec-hcshppAhsKsYpssopsatculccI-KslK+hEcs+chLssocspL......+hAs.NKlps.lol++LsptsssMtt...................... 0 29 63 90 +9736 PF09904 HTH_43 DUF2131; Winged helix-turn helix COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4519) Family This family, found in various hypothetical prokaryotic proteins, is a probable winged helix DNA-binding domain. 21.90 21.90 22.20 23.00 21.50 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.09 0.72 -4.18 22 301 2012-10-04 14:01:12 2007-07-30 15:59:03 4 2 275 3 56 190 6 87.40 50 91.47 CHANGED SKopsuFhRRLYlAaLI-sspt.sVPuLhchTGMPRRThQDslsALs-lsIcspF.Q.cGtRppsGaYpIpsWGsIsctWlpp+lppIppsL .........chpssFhRRLYLAhLl-stcpsNVPcLhctTGhPRRTlQDslpALs.slG.IplpFVQ..-...GtRpNsGYYplsDWGPlDppWltcphppItus.................................. 0 5 17 37 +9737 PF09905 DUF2132 Uncharacterized conserved protein (DUF2132) COGs, Finn RD, Sammut SJ anon COGs (COG4628) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.50 20.50 22.00 36.70 19.60 20.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.13 0.72 -4.01 80 623 2009-01-15 18:05:59 2007-07-30 15:59:32 4 1 484 1 182 464 52 63.70 59 64.05 CHANGED QsNNPLHGloLEpllscLVcaY.GW-tLupplsIpCFpscPSlKSSLKFLRKTsWARcKVEsLYL ..psNNPLHGloLEplLscLV-aY.GWctLupplpINCFps-PSIKSSLKFLRKT.WAR-KVEsLYL. 0 44 101 146 +9738 PF09906 DUF2135 Uncharacterized protein conserved in bacteria (DUF2135) COGs, Finn RD, Sammut SJ anon COGs (COG4676) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 22.00 22.00 22.10 22.40 21.90 21.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.40 0.72 -4.01 24 419 2009-01-15 18:05:59 2007-07-30 16:00:37 4 15 368 0 47 179 49 47.80 72 16.17 CHANGED PhpGsYhVaVNYaGstssp.............thsssplsllos......Es..oPsEc.pcsahlPl ..PlHGRYQVYINYYGGRSET......................ELTTAQLTL..ITD......EG...SVNEK.QETFIVPM....... 0 7 24 37 +9739 PF09907 DUF2136 Uncharacterized protein conserved in bacteria (DUF2136) COGs, Finn RD, Sammut SJ anon COGs (COG4680) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.80 20.80 21.00 21.30 20.70 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.40 0.72 -3.90 64 642 2009-01-15 18:05:59 2007-07-30 16:00:53 4 2 550 0 147 444 36 75.70 38 75.54 CHANGED cucpsLpsWaphscpApWpsPp-lKppa.ssAshlt..sp+hVFNItGNcYRLIstlsa.....phphlal+alGTHsEYD+ls ........................p.tchsLpshhshlp+upappPp-lKpha.sShDth.+....scahVFsluGNphRLluhlpF........ptp+hal+alsTHtEYD+l........ 0 31 94 128 +9741 PF09909 DUF2138 Uncharacterized protein conserved in bacteria (DUF2138) COGs, Finn RD, Sammut SJ anon COGs (COG4685) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.70 20.70 21.30 22.00 17.70 20.60 hmmbuild -o /dev/null HMM SEED 555 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.85 0.70 -6.37 14 352 2009-01-15 18:05:59 2007-07-30 16:01:41 4 1 334 0 18 175 9 509.80 80 97.64 CHANGED KtWthhulls..AlslluusVhtlta.....tspusc+sLss....sslpl........DLscPDullDScSLSQLPKDlLpVPhL+DVLTEDFVFYYpscuDRLGlpGoLRRIlYEHDLsL+DpLlcpLhDQPApVALWRusDG+LpaahllhpRuGLAKlLEPLuhsAsuDuQLSKsthu...lsusslPVYpL+Ysus+sLhFAocsD+llsLSsPsMLacc-.......tp......sscAsslhusLLuG....+chaspuFGLts+ssp..PspQRlVVSAshLuFGYQRahPuFAGlRF-hG.s......sG...WpSalALsD.stplstpaDFsPlWpAMPuGAShCVuLPhsathApshLs+luuE...ssphs.......-tLsGsAGlCWYucS+LhoPLFVuplcusstt...chsptlGKLFspsIGAaEuKAsc..........thLPVoppppG.puplWpRpVSSRaGpYssupAspPsQL.u-hFFpVoLAhpspTLlFSLDDpLVscALssLsKpaPAhuDVlPpD..ullPLYlsPpuhApLlcpEThsSLPpDhEPVFhNAAQThLhPKLcALuphspYsLsLspshpsuuuWQWLPlpW ......................KGWRFYGLVGFGAIALLSAGVWALQY.....AGSGPEKTLSPL.VVHNNLQI...........DLNEPDLFLDSDSLSQLPKDLLTIPFLHDVLSEDFVFYYQNHADRLGIEGSIRRIVYEHDLTLKDKL.FSSLLDQPAQAALWHDKQGHLSHYMVLIQRSGLSKLLEPLLFAATSDSQLSKTEISSIKINSETlPVYQLRYNGNNALMFATYQDKMLVFSSTDMLFKDD..........QQ......DTEATAIAuDLLSG....KKRWQASFGLEERsAEK..TPVRQRIVVSARLLGFGYQRLMPSFAGVRFEMGN..........DGWHSFVALNDESASVDuSFDFTPVWNSMPAGASFCVAVPYSHGIAEEMLSHISQE....NDKLN.......GALDGAAGLCWYEDSKLQTPLFVGQFDGTA.....EQAQLPGKLFTQNIGAHESKAPE.............GVLPVSQTQQG.EAQIWRREVSSRYGQYPKAQAAQPDQLMSDYFFRVSLAMQNKTLLFSLDDTLVNNALQTLNKsRPAMVDVIPTD.....GIVPLYINPQGlAKLLRNETLTSLPKNLEPVFYNAAQTLLMPKLDALSQQPRYV..MKLAQ.....MEPGAAWQWLPITW....................... 0 3 7 13 +9742 PF09910 DUF2139 Uncharacterized protein conserved in archaea (DUF2139) COGs, Finn RD, Sammut SJ anon COGs (COG4697) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.90 25.90 26.30 26.30 20.00 25.80 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.03 0.70 -5.41 6 35 2009-01-15 18:05:59 2007-07-30 16:02:18 4 1 31 0 24 37 0 292.40 40 66.93 CHANGED LYaTLAFEAcAaFIc.ssspc+lY-FpLVGstPsSGGDTYNAVEsVD-hIYFGGWVHAPAhaRt+scG+.AsIsFoNKYSHVHcYDTusscV+LlWKEShHc.ccWAGEVS-IIYNPYsDcLLLARtDGHtNLGVYpLD.RRGtsR+L.scPuhKGshhhDhAhFul.+pahtGhpGlcslDLlot+h....EtFs.us.SlDGGshhpP+lGuh.sSlasRlFAFV+GGlhVhNPa.GEcatFVRLhDFs.ohYuPhRsNALslGGGlLluFNuapcAshRsssE.tplhp+hTNTlluPSlLVYIAPPhVKIVGuFGARlTSlEthGsKlLlAsNTsPNstthD ..............LYaTlAFEupAaFl..pssucplYcFchlGpsPsSGGDTYNAVpsVD-hIYFGGWVHAPA.hactcs.php..thI...sFpNKYSHVHtYDhcsspVcLLWp-uhHc...c...pcWsGEVS-IlYDPapDcLLlARtDGHtNLGlYplshcsGchchLsppPoh.KGshhcDhAhF.sl.hps.........appGlptltshDhlst+.h.....-tFs.hut..ohDGtshlt.thGsh.soh.NRhaAFh+GGlhluNPh.t..-phpFhRLhDF..s.huPhRsNsh.huGGllhsaNuapcuhhpshp..t.h.hthoNslsuPolLlYlsPP.s+IVushGARlTShp.htscllluhsThsNhtth.......................................... 0 10 12 17 +9743 PF09911 DUF2140 Uncharacterized protein conserved in bacteria (DUF2140) COGs, Finn RD, Sammut SJ anon COGs (COG4698) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.90 20.90 20.90 21.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.07 0.71 -5.12 28 414 2009-01-15 18:05:59 2007-07-30 16:02:32 4 2 407 0 70 269 2 183.70 34 90.22 CHANGED NhWKhuFhs...LLAl.lhshshlsstlhtPs.p....ptsspppscucs.shslsosKcpLNpllspYLpcht.ssphsYchhl.ssplhlpGshplhGpslshhlhFcPpVhcNGsVhL+spslSlGsLsLPlshVLpYlppsYcLPcaVslpPccpplhlpLschchpsshhl+AccIDLhsD.cIpFslhlP ......................................................NhWKhsFlh...L.lul.llusssh...lssplhpsp...c.........tpts.sht..pppups.shsls..osKpplNp.llstYLp-ap.....s.pcl..p...Y.+hhl.sspshhpGohplLGp.slshhlhFpPhV.h.cNGsV.pL+spslSlGsLsLPlp.VLpalcpsY.+LPcaVplssccpplhlpLsphchpsuhhl+AcpIsLh.sD.cIpFslhh............................................. 0 18 43 59 +9744 PF09912 DUF2141 Uncharacterized protein conserved in bacteria (DUF2141) COGs, Finn RD, Sammut SJ anon COGs (COG4704) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.00 21.00 21.10 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.49 0.71 -4.41 90 328 2009-01-15 18:05:59 2007-07-30 16:02:53 4 6 253 0 136 354 294 108.70 26 65.11 CHANGED Vplsslcss....pGplhlulass..scsa.......ptppshtstpspsss..sssplsFpsls.sGsYAlulhHDpNuNG+hDpsh.hGl..PpEsaGhSNNsp..hphG......PPsFpcut.Fsls.ttspslsI .............................................lplpslcss.pGplhlslass....tpsa.............ttppsht...th...ps.tsss....sshplshpsls.sGpYAlslaHDcNsNs+lDpsh...hGh..PpEsaGaSssst..hh..h.G......sPsapcutFtls..tstph......................... 0 45 93 121 +9745 PF09913 DUF2142 Predicted membrane protein (DUF2142) COGs, Finn RD, Sammut SJ anon COGs (COG4713) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 24.10 24.10 24.30 24.30 23.90 24.00 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.44 0.70 -5.80 34 382 2012-10-03 03:08:05 2007-07-30 16:03:08 4 2 312 0 85 365 107 367.10 17 74.57 CHANGED l..hhuhhhshssPshpsPDEshHhs+sh.......tl.sp...G.h......hssspsph.t.......................thttshhthhsptss.st.pthtpsshspptsshsph.s......YsshhYlstulGhhhuphlshsshhslhhuRlhNhlhashlhhhulthhsht+..hl...hhhluhlPhsla.suShssDuhthuhshhhhuhhlph....hhppphsp+...hhhhllsssLlshsK...ssalhLshlhhhls.......ttthspct......................................hhsthhsshhhslshhhhhths......pth.hhtsth............stslthllspshthht........lhhpsl.....h...phhsstF.....hGWhcs.l.s..hhhshhhslhhlhhsshsthph+.........phhhhhlslsshhhlhhhlhlshosssssh.......l.G........lQGRYalP...lhhll .......................................................hshhhhh.h.P.....hp..s.P.DE.hHh.psh.................tl.sp.....................................hspstt.t..............................................h.ht.......t.t.t.....t.t...ht.t.h..t.t....h.sh...p..t..h..s....s..................h.shhYlspulGhhl....uphl........t..........hs....h.....hhh.h....hhuRlhNhlh...ah....h....h....h...h....h....u..l....+h....h.......ht+........hh..........hhhlull....Phsl..........s....u.ShshD...shs.h...shshlhhuhhhph............htppphstp....phhhh.hlhsh.llshsK.......hsahhll...hL.hh..hls.................hp.h.httpt.....................................................................................hh.tthh.hh.h.h.hhlhh.h....h..h.hh.hh.....................h...hh..t............................ttp...h.t..h.hltp.h...hht.............hhhpsh......................h.s..h.hhs.h......h.hGhhs.ht......l..sh..h.hh.h.h..h.......h....h.h....lhhl.hh...s.thp.ththp.............................h.ph.h.hh..hlh...h...sh..l.hhlhhhh...hls..as.shstsh.............h.G........lQGRYah.P.hl.h............................................................................................. 0 30 55 75 +9748 PF09916 DUF2145 Uncharacterized protein conserved in bacteria (DUF2145) COGs, Finn RD, Sammut SJ anon COGs (COG4727) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 25.10 69.70 18.30 16.50 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.25 0.71 -4.79 20 193 2009-01-15 18:05:59 2007-07-30 16:04:25 4 1 187 0 37 127 1 199.40 53 72.14 CHANGED ctLsu....pGAcVsllARsGpDhSch..Gl+aoHsAaAlp...s.s-G....sWpVhpLhpcCss.spSpLap-GLu-Fah..-sshph-sulllPoPplQp+LlslLsosstt..tLHsspYSllAaPauocYQNsNpWsLElLAAAhtt...........sss...........RcpspuWL+.AtYpPssl+luhhpRLuuphhssslshcDHP.t.hhuGplpTsTssSlhpaL .......QpLsu.psssVVILsRpGQDhSph..cLpaSHAGaAhR.....p.PsG.......sWRVaHpLNsCGT....AcSuLYhQGLhEFlu..DDLlspsluVlRPpu-lAsALpsLLpSuh+Ls.hhHuPRYsllAaPFSusYQNSNtWlLEVhAtA.-A.p...........lho...........RscA+pWLQhpGYQPSlVssGsacRLGA+LFsPNVaTDDpPuEhlhpGNlulsosDSVhcFl.............. 0 8 23 30 +9749 PF09917 DUF2147 Uncharacterized protein conserved in bacteria (DUF2147) COGs, Finn RD, Sammut SJ anon COGs (COG4731) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 19.80 19.80 20.30 21.80 19.70 18.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.61 0.71 -3.82 158 1014 2009-01-15 18:05:59 2007-07-30 16:05:10 4 3 685 0 331 901 232 111.90 28 71.04 CHANGED G.hWpT.......ststs..Vcltp.su.....up...lsGplhph..........................tpspsp.....hcspsllGhpllhs.hc...sssp........ap.GplhcPcsGKsYpuplpl.....pss..ppLpl+G..pluh..hls+o.QsWpR .....................................................GhWpo.......ststs.....lcltp.ss......up.....hsGplhph.....................t.t..tppssss.........hcspshhGh.plltshct...sssp.........aptGplh-Pc..sGKs.Ypschpl.........pss....tpLpl+G...hl.u.h.t.hls+o.poWpR............... 2 94 198 252 +9750 PF09918 DUF2148 Uncharacterized protein containing a ferredoxin domain (DUF2148) COGs, Finn RD, Sammut SJ anon COGs (COG4739) Family This domain, found in various hypothetical bacterial proteins containing a ferredoxin domain, has no known function. 25.00 25.00 36.70 35.90 20.30 19.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.30 0.72 -4.25 11 138 2009-09-10 16:57:13 2007-07-30 16:05:19 4 2 131 0 61 116 3 69.00 55 39.09 CHANGED -FsGPpCsh+hlDLGIAlGSAsKsAp.LslDNRlMYohGsuApphGhl.DA-llhGIPLSsoGKNIaFDR ............tGssCAhpslDlGIAlGSAsusAuplpVDsRVMaSsGlAApcLshl..-..sc..h..V...h.uIPlSAouKNsFFDR.. 0 29 50 57 +9751 PF09919 DUF2149 Uncharacterized conserved protein (DUF2149) COGs, Finn RD, Sammut SJ anon COGs (COG4744) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 39.40 32.40 23.70 23.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -10.16 0.72 -3.89 29 201 2009-01-15 18:05:59 2007-07-30 16:05:37 4 1 172 \N 82 166 11 90.30 38 82.04 CHANGED hsulsNLhDlhlVhuVuLllAlVhshsh.phh....sppshshhpssspsshphlh.+.cGpclcphp.otp....uuGpG...p+hGssYc.hcsG+lIaVP- ....ssVsNLhDlhhVFuVuLhlAlVhphshsphh....uppchshlc.s.sGcts.M..clls....K.cGpclpcapsopp.......uuupG...p+lGssYc.LcsGclIYVPE..... 0 31 68 77 +9752 PF09920 DUF2150 Uncharacterized protein conserved in archaea (DUF2150) COGs, Finn RD, Sammut SJ anon COGs (COG4749) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.60 20.60 103.70 103.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.27 0.71 -4.70 14 47 2009-09-11 15:57:27 2007-07-30 16:05:50 4 1 47 0 36 48 0 187.60 39 94.39 CHANGED hpFYopERWpNWlsplcEsch-..s-s--suc...lhhshpDDlslAllKIlstacstpl.sc--AlscLcsl+-IVhs-st......sEDh..ll-ulQsSLlslFhAucpYltGshs.-sslcphlcsAhcAEt.--Dl-pALshsuphGutVIsGcshstchlcD..hphGLVsEWlsGlDolpsAhssscsh-E- ..pFYopERWpNWls+lcEp-h-...sEsE-su+...lLlNhpDDsslAlhKIlsta-cspl.scE-AlccLpsl+-IVLsEl-......cE-pshll-uVQTSLlslFhAAEpYlsGshst.-uslp-hlcsAs-AEt.-EDlDsALshsupsGstVlsG.ccLshphhc-..hpYGlVsEWlsGlDSlpuAMsss-sh-E-....... 0 6 29 34 +9753 PF09921 DUF2153 Uncharacterized protein conserved in archaea (DUF2153) COGs, Finn RD, Sammut SJ anon COGs (COG4755) Family This domain, found in various hypothetical archaeal proteins, has no known function. 23.00 23.00 23.40 103.20 22.20 22.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.53 0.71 -4.52 8 41 2009-01-15 18:05:59 2007-07-30 16:06:01 4 1 41 0 26 34 0 125.30 47 88.22 CHANGED sL-pWVKhQKplLcolcsh-..EchcsuDRL-LIluoRsAFpHMhRTLKAFDpWLQDPhlluHMPREMLhDVpsshhclhppLLELDI+HTSpa+-LlpKhu+EGKLsPllhhh+..stpcPsRR+cu ..pL-pWlKhQ+pllpsLcchE..-phc..s..hDRL-LILuoRsAFQHMhRTLKAFDpWLpDPhlhpHMP+EMLc-lcsphh-lL.cLlELDIcHTSpaR-hlsKhuKEGKLsPllhh.h..ppppspRcp..t............. 0 9 14 19 +9754 PF09922 DUF2154 Cell wall-active antibiotics response protein (DUF2154) COGs, Finn RD, Sammut SJ anon COGs (COG4758) Domain \N 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.37 0.71 -4.39 37 1584 2009-01-15 18:05:59 2007-07-30 16:06:22 4 7 1195 0 260 904 13 106.40 24 44.17 CHANGED thhGstpphpp..sapacDlNltshhGcshIDLspshlsps-ssIhI+phhGsscIlVP.DlsVplct.oslhGslphhsppp.tl.Npslphpossappst+clKIhsslhhGslEV ...............................hGp.ph..p..shph.cDlplpthhGcspIDLspsh.h.p....cp...s...s..Ih..lcthhGssplhVP.hc..hp..V..sLc.s..ss.haGssp..........t..p.....t....t..h.......p.....php...pt.h..............s....h.plp..lhhshhhGsl-V.............................................. 0 99 190 227 +9755 PF09923 DUF2155 Uncharacterized protein conserved in bacteria (DUF2155) COGs, Finn RD, Sammut SJ anon COGs (COG4765) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.30 21.30 22.40 22.50 20.80 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.12 0.72 -3.88 47 322 2009-01-15 18:05:59 2007-07-30 16:06:44 4 3 319 0 107 251 923 91.50 43 52.17 CHANGED AslpuLDKloGcsp-l-ltsGcssphGsLpVslptC..RhPs-.sPsssAFstl.........slpcpspst.......lFsGWMhASSPuLsAl-HPhYDVWVhcC .........................................................AhhpuLDKITG+h.psh-lhlGE.o.spF..G.u.Lplss+sC........hp......R......sssE.sspssAFlpl..................Tlscphpc.........lFsGWMaAsSPuLsulEHPlYDVWlpcC............... 0 32 66 79 +9756 PF09924 DUF2156 Uncharacterized conserved protein (DUF2156) COGs, Finn RD, Sammut SJ anon COGs (COG4866) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 27.20 27.20 27.40 27.20 26.50 27.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.65 0.70 -5.60 156 2342 2012-10-02 22:59:21 2007-07-30 16:07:05 4 28 1932 2 547 1913 59 286.60 23 38.43 CHANGED ttllppts...tpshuplshhpcpphhh.s....ssp....uhlha....thpspshlshusPlG...s.....pphsphlppahphuppt.uhtsshYplstchhshhpc.hG.....hphh+lGp-sllslpsa.sLs..G++hpslRptlN+hc+pG..hs..hchhth.....sphhscltplucpWhpt.c..........pE+sFohuhhctthht.p..hthsl..lcs..-G................................clhAFsshh.sht....psshslDlhRtpsc.uspGhh-hlhhph...hthhptpG.......hphhshuhushtth...t................................t+hhthlh..tpphashpGLRpaKtKapPpa.ts+Ylsh ........................................................................................................t...htphs.sssh.u.thhhht..D.+th..hh..s............ssp.............uhlha......phpss..t...hl.sh.G-PlG........st..............pph.t.ps...l.ptFhphsc.ph...u..hpss.ahp.lspp...hh..h.h..pc..hG.........................hphh+lG-..Eu.ll.c.h...ps....F..sl....s.....G++.....h.p.sh.Rptls+hc.+tu.hs...hchh..p..........sph...h....p.clpp.l.u-pWhss.+..................................pEpsF..ohuh..h....c...t....t.......ht..p.....tt.h.sl......hcs.....cG..................................................................cllAFsshh...sht...................ppshol.DlhRt..p..s.....c.s...spGhh-hlhhphhth..tp..p..p..G.................hphh.shuhA.....s.htsh...........s................................t...u..................p+hhthlhph.hpphas.h.pGL+chKpKapPpa.ps+Yhs.................................................................. 0 193 362 467 +9757 PF09925 DUF2157 Predicted membrane protein (DUF2157) COGs, Finn RD, Sammut SJ anon COGs (COG4872) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.40 25.40 25.70 25.60 25.20 25.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.11 0.71 -4.59 51 469 2009-01-15 18:05:59 2007-07-30 16:07:22 4 6 466 0 144 421 111 138.90 23 29.45 CHANGED ltpWhcpGlls.........sssspplhthhs....t..ssttp.....hshhLhhlGulhluhGllhFlAtNWppls+hs+hullhshlhssthsuhhhhh.............................ptpthhupAhlhluull.hGuhluLlGQhYphuuc.h..phhhhWulhslshAhhhppsslhhl ........................................................t.ls..........tthtth...ht...................th.p.............hhh...hlhhlGulhlusullhhlAtNW........pthschh+lullhsl.lhs...shhsuhhh.h................................................................pt..pph....hsps.hhh.hu.uhh.hGuhlAllGQh.YphuuDs.a..phhhhW.slhhLshuhlhp.t.hhh.h................................................................................... 0 41 89 121 +9758 PF09926 DUF2158 Uncharacterized small protein (DUF2158) COGs, Finn RD, Sammut SJ anon COGs (COG5475) Family Members of this family of prokaryotic proteins have no known function. 22.00 22.00 22.20 22.30 21.30 21.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.85 0.72 -4.53 18 536 2009-01-15 18:05:59 2007-07-30 16:08:06 4 3 515 0 65 179 7 47.80 68 73.29 CHANGED FplGDlVpLKoGGPcMTVo.shs........htsushhhCpWFsspu.....cpthFsE-oLt ...FhVSEEVTVKEGGPRMIVT...............GYSSGMVECRWYD...GaGV....KREAFHEsELV..................... 0 6 19 37 +9760 PF09928 DUF2160 Predicted small integral membrane protein (DUF2160) COGs, Finn RD, Sammut SJ anon COGs (COG5477) Family The members of this family of hypothetical prokaryotic proteins have no known function. It is thought that they are transmembrane proteins, but their function has not been inferred yet. 25.00 25.00 29.50 29.30 22.50 20.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.27 0.72 -3.89 37 244 2009-01-15 18:05:59 2007-07-30 16:08:49 4 1 231 0 96 238 242 89.80 49 93.67 CHANGED WMAWThPoAlFFssIushLhsMTlhE.l+pP.ss.R+GhLsIpTTRGDRLFIuLLuuAaIpLuWlGlss...........sslWhuhslSllahhhlhRhs ..............WMAWThPTAlFFhsIuhhLssMTlaE.ltpP..ss.R+GhLsIsTTRGDRLFIuLLuuAaIpLuWlGlss................s.s.l..WhAhslSllahlhlhRhs........... 0 16 43 65 +9761 PF09929 DUF2161 Uncharacterized conserved protein (DUF2161) COGs, Finn RD, Sammut SJ anon COGs (COG5482) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.40 20.40 22.90 38.00 20.10 19.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.52 0.71 -4.18 30 134 2009-09-11 04:54:42 2007-07-30 16:09:03 4 2 129 0 48 143 23 113.40 43 50.24 CHANGED llsVc.pcG..tVEVhsDPu.PasPRK....sp+++s+LLpEFpRRpGDPNsGGuTR.pslhTAYRQ-ALpsAsaLuttGPs+uucltctssVs.cATpIhpcNHYGWFERVp+GlYsLTstG+suL ...............hlsVp.tsu...tVEVlscPs........sh..t..P..R+.......s.+++sRLlcEapRRpGDPssGGuT.R.tslhTAYRQpALtCAstLs...tG.st+stclppt...ls.pAspILtcNhYGWF-RlpRGlYsLTssGctu.l........... 0 13 27 33 +9762 PF09930 DUF2162 Predicted transporter (DUF2162) COGs, Finn RD, Sammut SJ anon COGs (COG4827) Family Members of this family of bacterial proteins are thought to be membrane transporters, but their exact function has not, as yet, been elucidated. 28.40 28.40 28.90 37.90 26.10 28.30 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.59 0.70 -5.21 15 44 2009-01-15 18:05:59 2007-07-30 16:09:15 4 1 38 0 33 43 2 225.80 27 93.79 CHANGED sssLahhGlLhullIFGlKsGlGhGFuslS.+KtlhhIsuuYhllshll...utlAsphshplhphlhshGhslHsllulhlIhsGlaTl..+cWtspG..KDs...o+pThLAlslPCPsCLuAlhhSshllusslslSshplGhhlGssFhlsl....lloshls+....thshssP.sLGshMlhlGlYFLluulllPuhlpstphphsshth.........sssshhhslllhhsLllsGalhs+tc ...........lh.hGlLhu.lhIFGlKhGLuhG...a..u.sl.o.++thhhIs....hh....Yhhhhhll.......uhlssth.s.....phhphlhphshhlallhulhllhsGlaTl.....+c..Wttps..+ss.................sppohlshhhPCPsChuAlhhShhllushlslSshplGhhl.....ullhhlhl........lhoshlh+.....thphstPh.hLGshMlhlGl.YFLluulllPshhpstph.thsshsh.........shpslhhshl..hhhsLllhGalhp+h.p..... 0 12 27 29 +9763 PF09931 DUF2163 Uncharacterized conserved protein (DUF2163) COGs, Finn RD, Sammut SJ anon COGs (COG5449) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.60 21.60 21.70 21.60 21.10 21.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.78 0.71 -4.47 52 337 2009-01-15 18:05:59 2007-07-30 16:10:04 4 4 307 0 94 298 50 154.50 26 56.38 CHANGED Mpsh.sssLtsHL.psusTTlscsWtlsRpDGsshGFTDHDpsLsh-GlsacAsoGh........ousslppssGLulDssEsh.....GuLss.....sslsEsDltuGpaDGApVcsaLVNW...s-s.......st+hlhapGslGElpRus..uuFpAELRGLsc.tLspshGR.lappsCsAsL...GDucCtlD ...................................th.thl.ts.t.h.s......shshsahlp..p.t.sGhshuFT-aDpsL.s.h..........s.......u...hh....aps.ss.Gh.........ssuthppssulus...sshclp.....Gshss............stlp..c..tclttGhaDu.Apl...c...ha..hV.s.a......scs..........stphl.lhpGpl.uclpp.ss....s.thph-l+ulsp..tLspstsR.ha.pptCsAsL...GDtpCtls............................... 0 21 61 73 +9764 PF09932 DUF2164 Uncharacterized conserved protein (DUF2164) COGs, Finn RD, Sammut SJ anon COGs (COG5460) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.50 21.50 22.40 21.60 21.30 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.24 0.72 -4.18 47 470 2009-01-15 18:05:59 2007-07-30 16:10:22 4 1 443 0 102 324 16 74.70 32 88.77 CHANGED Iclsp-p+pphlspIpcYFpcEh-p-lGshpAchLLDFhscElGPhaYNpultDApthlpc+hpslpp-la....tlE......K. ...............Iclsp-p+pchlsplpcaht--hp.ElGpFpAchLl-FhhcclGPhhYNpultDApphlpc+hpslpp-la.lEp................ 0 23 53 79 +9765 PF09933 DUF2165 Predicted small integral membrane protein (DUF2165) COGs, Finn RD, Sammut SJ anon COGs (COG5472) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.80 20.80 21.50 21.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.85 0.71 -4.51 25 244 2009-01-15 18:05:59 2007-07-30 16:10:43 4 1 207 0 58 154 29 137.60 37 94.20 CHANGED h.hlRluKslhlhulAhFusLVuasNlTDYsoNasF.VpHVLsMDTs...FPss..sltaRAIssPhlapsuYhhIIshEslsulLChhGuhpLh+sh.psssssFppAKshAlhGlsluhLlWhsGFhsVGGEWFsMWtScpWN..G.puAFRFhhhlllVLIals.s- ........h.RhuKhlhshslAhasslVshsN.lsDasoNatF.VpHVhsMDoh....a.ss....slhhRAIsoP.......hlpphuYhhIIhhEshsulhhhhGuhthhtsh.t.sstsFppuKhhshhuhshshlhW.hsFhslGuEWFsMW.Sp.WN..G..sAhRhhhhhhhsLlals............ 1 18 38 48 +9767 PF09935 DUF2167 Protein of unknown function (DUF2167) COGs, Finn RD, Sammut SJ anon COGs (COG4714) Family This domain, found in various hypothetical membrane-anchored prokaryotic proteins, has no known function. 25.00 25.00 27.90 29.80 22.00 20.10 hmmbuild --amino -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.45 0.70 -5.54 15 227 2009-01-15 18:05:59 2007-07-30 16:11:22 4 3 219 0 39 156 51 235.70 49 77.40 CHANGED ulp.tsGsIsLstcpAsLsLPsuahaLsssDsp+lL.-taGNPPsssp..LGhlhPss....sspsWhVllpY-ssGYVoD-DAscIDhs-LLpsM+puscEsNcpRccpGhsslcllGWAptPpYDsso++LhWAtcL+spuu......ppolNYslRVLGRcGVlsLNhVAuhspLsplcpphpplLuhscFscGpRYADFssssDKVApYGLAALVuGs....lAuKhGLaAhlusFLtKFhKllllullulhu ................................sA.ssGPocIsLG.DcATLNLPcGFsalPAc-Auha...M..+ph..G....NhsD--h.hhGLVh.c-.......hshalslEYDDSGYVKDDDAKsWDADELhcsL+cGTcEuNK-Rht+Gl-sIEllGWlEKPsYDAssHRLIWSss.lpDhGsst...s-puVNYNTYlLGREGYhSLNLV...TDcuslD.c-h..PhA.c.clLoul+FNsGpRYADFNcSTDKIAEYG....LAALIGGl..............AAKKlGLLAhlGlhL....lKFWKVsAlGVlAlGA................................... 0 6 15 29 +9768 PF09936 Methyltrn_RNA_4 DUF2168; SAM-dependent RNA methyltransferase COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4752) Family This family has a Rossmanoid fold, with a deep trefoil knot in its C-terminal region. It has structural similarity to RNA methyltransferases, and is likely to function as an S-adenosyl-L-methionine (SAM)-dependent RNA 2'-O methyltransferase [1]. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.22 0.71 -4.79 28 174 2012-10-01 22:53:19 2007-07-30 16:11:38 4 2 169 1 76 201 25 181.90 45 76.98 CHANGED hsslhlALlHYPVhsKctclluTAlTNLDlHDIARouRTYGlppYYlVTPlpsQppLlc+llsaWpcGaGupYNPsRpEALplV+lssoL--sl--lpphpGpcPhllsTuA+.h.tsssloappl+chl..pscpPhLLLFGTGWGLspElh-psD.hlLEPIcGt..ucYNHLSVRuAsAIILDRLhGc ..................................................t.tplYluLVHYPVhNKptcllsTulTNhDlHDIuRsu+TYslptYalVsPlcuQ+cLsp+IlsaWppGhG..upYNPcRp-Ahphlclssol-sslccIcc.tcGp+Phl..l.sTuAR.....t........hsso.....loappl+chl..ps-cPhLlLFGT.G.WGLscElhptsD.alLE..PIpG.t..ucYNHLSVRuAsAIILDRLhG...... 0 44 67 75 +9769 PF09937 DUF2169 Uncharacterized protein conserved in bacteria (DUF2169) COGs, Finn RD, Sammut SJ anon COGs (COG5351) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 38.20 32.40 24.10 23.70 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.78 0.70 -5.33 44 398 2009-09-11 05:29:39 2007-07-30 16:11:59 4 39 234 0 153 377 32 263.10 25 49.67 CHANGED -.pGpphlslslKuoasls.tshh........sppQtslh.u-pah.G-Puhouhh.hpsDhshhKstsDlllpGpAauPtGcsssplp..VslcVGs.....hpKtlpVhGcRhW....h..ssstsosPpPFpp..hPlsa-pAaGG..........s.....tsh.cNPlGpGa..tpppttt...thslPslEts.sp.lpsss.....tp.t.PsGFGPlshtW.sRt....phAGT.YDcpWhcs...chPhL....PpDaD.RaapsAPsDQphst...h.pGGEtlpLhslps.p...sp.lpapLPtht.hshhh........h.hsss.........hpphphplDTlhl-s-tpplhLlWR ..................................................................h...tt.hhslssthshpl...............st.t.t.lh.ss..a....sts..t.t..uhh.......hDhs..Ksts-hl.l.sGpAau.s.tsp.sspt..hpstlplss.................hp.Kt...lplhGsR.a.........................sshthstsp.Pa.p..hslsaphAaG.G..........................................sh.tNPhGhGhh.t....tt........h......................hPslp.s..pp..hptst......................tt...ssuhGslshtW..Rh.................phh....Go.aDpt.W.hpp.......phPh..h......................PtDhD.+aapsAssDQ.hsth......h..puspphpL..hshpsp...........tt.lthpLPthpshshlh.........tpst........................hpphshphcTlhl.s.....c..p..p...phhlha+............................................ 0 56 94 124 +9770 PF09938 DUF2170 Uncharacterized protein conserved in bacteria (DUF2170) COGs, Finn RD, Sammut SJ anon COGs (COG3789) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 19.00 19.00 19.80 27.60 18.90 17.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -4.82 28 545 2009-01-15 18:05:59 2007-07-30 16:12:18 4 1 536 0 65 200 8 131.50 57 94.39 CHANGED hs.WslpsLtsuLsspsphpstphsh.llp..Gs-ssLplshp-hs-LPlaluloucQ.IlspuhLasss-Vp-..tsthN-tlL+sp.phhPLSohGltth.csc-hYshFGALSssSolsslltEltTLA-Nslcss-shpsahp .....................M..sWsPhsLAsALpslsE...pph-...ls..NsEuuLIIKMNDYGDL.IslLFTScQ.hlIETaICPVsoIss..ssEFNpFLLRNQ.KhhPLSSVGIopV.pQEEYYllFGALSLpSSLcDIlLEIToLVDNALDlAEITc-YS.p....... 1 12 26 44 +9771 PF09939 DUF2171 Uncharacterized protein conserved in bacteria (DUF2171) COGs, Finn RD, Sammut SJ anon COGs (COG3798) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 25.10 26.10 23.70 24.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.20 0.72 -3.97 17 134 2012-10-02 14:14:57 2007-07-30 16:12:29 4 2 117 0 59 143 4 66.70 44 70.39 CHANGED pI+EHh-VluuDGsHVGTVDHl-G.scIKLTKpDts.........tsGcHHaIPhuWVspV-ss.+V+Lstsu-pshp ...pI+-Hh-Vluu-Gs+VGTVDHl....-.G.spIKLTKsD...............sGpHHhIPhuaVscV-ss.pVhLstsucpsh.p.... 0 13 33 48 +9772 PF09940 DUF2172 Domain of unknown function (DUF2172) COGs, Finn RD, Sammut SJ anon COGs (COG4310) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. An aminopeptidase domain is conserved within the family, but its relevance has not been established yet. 19.70 19.70 19.80 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.26 0.70 -6.09 18 203 2012-10-02 19:46:12 2007-07-30 16:12:41 4 4 195 1 42 243 564 367.60 46 85.76 CHANGED LplHEVPSGTpVFDWTVPpEWNl+-AaltsssGc+llDFtcsNLHlluYStPVctploLcELpsHLaSLP-pPDhIPYhTSYYpcpWGFCLsapphpsLp-GcYcVhIDSsl.ssGsLoYGEhllPG..-occElLlSoalCHPSMANspLSGssVhshLAchLtspspR.YoYRhlFlPETIGSIsaLSRN.-clcc.lctGhVLoClGDcp.saoahtSRpG....sshhDRlutHVLpcps.psachhsFhs.hGsDERQaCSPGhDLPVsslhRohYGpYPEYHTStDsLchlsPcuLtsuashlpcslplLEsNpsYh.s.sshGEPQLGKRGLYsslusppph.t.pths.........hL.lLshuDGppoLLDIAEphshsFhcltshsctLhcsuLlc ...............................................................................LchHpl.oGTplaDWhVPpEWsl+-AaIhs.sp.Gc+IsDFtcpNL.HllsYS.slctchsL-ELpsHLaol.-.PDhIPYh..T..SY..Yp.cpWGFClsHsphp...tL....c......c......G..c..Yc..V..hIDuch..csGs..LsY.uEall.PG.........poccElLlSsal..CH.....P....S.h........AN...spLSG.s..lshhLA........+hLh.....u...h......cp...R......Y......oYRF..l.h...........h....P....t....T......I...GuI..saLu..+.........ph...-.pL.c.....+.............V+tGhV.LSClG....Dst.......sh...o.....h.h..+.S.++s......s.s....D.+lhhHs..L.pp.p..........p.sa.c...hasFhs....h....G...DERQas....u.PGhNLslsslpRoh.Yu..cY..spY.HTStDsL.s.FIo.cuLts.uhph..htc.h.I...hL.EhN.tsah.N.o.hs..EPpLG+RGLYto.....lus...ps.....ch........h.........hh.hLshsDGppsllDIAp....hhsh.hhphtphh-+ltthGLl................................................................................................................................................................................... 0 15 27 35 +9773 PF09941 DUF2173 Uncharacterized conserved protein (DUF2173) COGs, Finn RD, Sammut SJ anon COGs (COG4831) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 24.20 24.20 24.90 26.10 23.60 24.10 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.26 0.72 -4.11 11 47 2009-01-15 18:05:59 2007-07-30 16:12:56 4 1 30 0 31 40 0 106.30 40 93.38 CHANGED .pLccLMplsGVhAAscFosDGcLh..EacG..plscchAchsAphCuANshhspMpAcuaophoGpsGWpPhpGWslsGs-auVClsGshuVFVcpscssasclhcsLpps ..t.cLc+LMpLpGslAAGcaosDG+Ll..cYpG.....shsc-hA-MlApMsAANshMuphpA-uaothS......Ghc.WsPhhGWAVsuu-YsVClhGNhGVFVchscADFNplF+sLtc..... 0 11 23 27 +9775 PF09943 DUF2175 Uncharacterized protein conserved in archaea (DUF2175) COGs, Finn RD, Sammut SJ anon COGs (COG4847) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 25.50 29.00 24.10 23.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.43 0.72 -3.96 10 32 2009-01-15 18:05:59 2007-07-30 16:13:27 4 1 32 0 18 31 5 97.60 40 92.31 CHANGED p+WKCslCGcsIhpGpLFTFhSc.GsVHa-ChccctspKhpt-s..plssL.pL--hlpcslVhtccLsplu-sEElKcllcpscKplE+tAAcLT+clpch ..pKW+CslCspsIh.spLFTFhpp.GslHasCLccchhppsph-s...thhL.pL--.L+phllhtpcLppltp.EEsK+hlcphcKshE+pAuhLTphlpc.h..... 0 4 8 13 +9777 PF09945 DUF2177 Predicted membrane protein (DUF2177) COGs, Finn RD, Sammut SJ anon COGs (COG4852) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.10 20.10 20.10 23.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.68 0.71 -4.15 50 240 2009-09-11 01:55:54 2007-07-30 16:14:14 4 1 231 0 82 203 342 125.50 36 93.84 CHANGED Mpph.llhYluohllFlsLDhlWLuhlucshYcsplGslhtcp.....hRhsPAllFYLlYluGllhFslhPultsushtpul.lhGALlGhhsYuTY-LTNhAsL+sWshphsllDlsWGshlTusuuhhGhhl ............phlhhalsohllFLllDhlWLshhucph...YpptlGsLhtst.....schsP.AllFYllYlsGlshFslhPulpp.........s.sh.st.......sl.lhGALhGllsYuTYDLTNhATL+sWshphsllDlsWGohloulsushuhh.............. 3 26 46 61 +9778 PF09946 DUF2178 Predicted membrane protein (DUF2178) COGs, Finn RD, Sammut SJ anon COGs (COG4854) Family This domain, found in various hypothetical archaeal proteins, has no known function. 22.50 22.50 22.50 23.20 22.30 22.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.38 0.72 -4.32 12 56 2009-01-15 18:05:59 2007-07-30 16:14:43 4 1 44 0 46 58 1 111.00 20 81.38 CHANGED ppaphllhhlshhhGul......luaAhssGsshlAlhsVhhGlhhhhhl+p+l-s...VlEDERhh+luEKAShhTlplhsl...shALuGshlh....shp..hsthsphuhhlsaushhlllLY ..............................hhhhhllslh..hshl......huausps.....ups...hlu.hhshhhuhhlh....hhh...cp.plcc.......l...l.EDERsh+IsEKAuphTlp...lhhl...shul.tslhhh....hht....t...hh.h.h.h............................................................ 0 6 25 39 +9779 PF09947 DUF2180 Uncharacterized protein conserved in archaea (DUF2180) COGs, Finn RD, Sammut SJ anon COGs (COG4855) Family This domain, found in various hypothetical archaeal proteins, has no known function. A few of the family members contain a zinc finger domain. 20.90 20.90 20.90 21.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.54 0.72 -4.22 13 46 2012-10-03 10:42:43 2007-07-30 16:15:47 4 1 38 0 34 45 0 67.50 43 89.15 CHANGED MKCYhCAcEGKDT-AVuICIVCGMGlCh-HllR-ElshWcGGYPaPsc........KlKcsLPRILCh.CasAhpt ...MKCY.CstpGK.Do.-AVAlCIVCGhGlCh-Hsl.+..E-l.shhp.G.uYPhstp.........+hpcsl..PRlLC..ChsAhh.t....................... 0 8 25 28 +9780 PF09948 DUF2182 Predicted metal-binding integral membrane protein (DUF2182) COGs, Finn RD, Sammut SJ anon COGs (COG5486) Family This domain, found in various hypothetical bacterial membrane proteins having predicted metal-binding properties, has no known function. 25.90 25.90 27.50 53.40 24.10 25.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.62 0.71 -4.39 51 254 2009-01-15 18:05:59 2007-07-30 16:16:03 4 2 201 0 94 263 221 188.40 34 70.97 CHANGED hhsMWslMhsAMMLPoss...PhlhtYpclhttt......spstshhshhssGYlhlWhuaullAtslphsLtthuhhsshhs.sts..hluuslLlsAGhYQFoslKptCLspCRsPhsahhpc..WRsG...htuuh+hGl+pGhaClGCCWuLMslhFsuGlMsLhWMuhlolh.......hslEKl....sshGctls....+slGslLlshuhhhl ......hsMWhlMhlAMMLPoss.Phlhtapclhttt......sppshth.shhssGYLssWsuFullAtshphsLpthuhhs.shhs.ss...hluuslL.hlAGhYQaoPlKptCLspCRsPhsa.l..h..pp...a..R.su...htsuhphGl+HGhaClGCCWuLMslhFssG.sM.slhWMuslshh.......hhhEKh....hPt.Gc.h.lu....+slG..ssLlshushl.h................. 0 20 51 70 +9781 PF09949 DUF2183 Uncharacterized conserved protein (DUF2183) COGs, Finn RD, Sammut SJ anon COGs (COG4850) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.50 26.50 26.80 26.50 26.00 26.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.23 0.72 -4.07 76 563 2009-01-15 18:05:59 2007-07-30 16:16:46 4 9 433 0 307 521 33 99.20 37 20.65 CHANGED shaYVSsSP.WNLashLppFlpppthPtGs.lhL+ca....ssshpphhpsspt...+KhpplcclhppaPphcFlLlGDoGpcD.s-lYsclscpaPspltul..aIRpl ...............................................haYlSsoP.WNlashLppFlpp....psa..PtGs.lhL+-a.........sss.pshhpsusp......+Kpstlcclh.psFPch+alLlGDsGQpD..clYs-.hscpa.Ps+ltAI..hIRp......................... 0 99 187 265 +9782 PF09950 DUF2184 Uncharacterized protein conserved in bacteria (DUF2184) COGs, Finn RD, Sammut SJ anon COGs (COG4834) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.80 26.80 26.90 27.10 26.50 26.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.27 0.70 -5.32 26 275 2012-10-03 06:22:39 2007-07-30 16:17:03 4 3 246 0 39 242 28 236.10 25 74.81 CHANGED sGtA..phlu.stusDlPtl-lshscppsslthhuhuasaolpELcsApth.GtsLsspKspusppshppplsplsahG.....-sspGhsGLlNp.sulssssssss.......W......tspTs-pllsDlsphlsslhstosh..sthssplLLPsspathLupphhsssssholLcalpcsNh.........hs..tG.sLsIpsltt.LcssGsuGs.......sRhlsYcpssphlphtlPhshphLPs.Q.pslchcVPhht+hGGlplhhPtsltYhDGI .......................................h...t..phlu.ptuss.lstlslshscptsslthhuhthsaol.ELpt..Apts.Gps.ls..spKhc..uhphthphc.schsahG.......-ssh.s...lpGLlNt.sslssssssts...............W.......sssTs-...cIhss...lsp.hlsssh.tt.osh....sh.........s.Ps....s.LhlPsppa.shLssphhs...s..s...s.s.polLpalppNsh.............t..sLs.Ip.....s.......lh....L.......c......st.G..susp.........cp.h.lsY.........spc.........chlph...l.hshpt....lss..Qhc..s.lh..hsshht+hGuVt.hh.tsh.hhDGl................................................................................... 0 4 23 30 +9783 PF09951 DUF2185 Protein of unknown function (DUF2185) COGs, Finn RD, Sammut SJ anon COGs (COG4859) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.30 26.60 23.60 24.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.93 0.72 -4.19 11 257 2009-01-15 18:05:59 2007-07-30 16:17:12 4 19 221 0 31 197 7 88.00 33 31.24 CHANGED sIVSNsVh.-+R.hsFha+EssphEsDSGWRhFSG.EoD-YssDP.cNFpIlSlusIhplDsolhtlLppP.ssAaEhs--Gs.F.cltDa .....CIsosplh...ps.ptlsahaRE.pP..s..t....p..s...DSGWRhhSGsEoD-YhscP....cNhsl....lslsslhph-Psllsl.ls.t.P..Go....saph..scpGt.a..l......................... 0 17 22 27 +9784 PF09952 DUF2186 Uncharacterized protein conserved in bacteria (DUF2186) COGs, Finn RD, Sammut SJ anon COGs (COG4861) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.10 20.10 20.40 22.00 19.60 19.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.77 0.71 -3.94 11 178 2009-01-15 18:05:59 2007-07-30 16:17:31 4 3 169 0 45 154 10 143.50 27 41.90 CHANGED RLh.ctccLL-pWAtsYPs+Lps+hphhcFsu..st.sWhpphsltshsA......hhuGEsAAscLss.l+PsshslY....stpshsscLlhptRL+c.......sspGs.Vplhcs.....FWs.p..................shtchslssPhllYADLluoGDsRshEsAphIh...-chls ......................................Lh.cscpLL-tWsts..YsstL.ttp..h..phht...h...su.......p.......p..hh....t.h..s..ls..s...t...............thuGEsAus.h.st..hp...Pts.h.hlY....sssshsscLlhpt+h.+.........sscGs...lhlhch......FWps...................ssp.p.sslsPshLlYADLLsos-sRshEsAchl+..-....t............................................. 1 18 30 37 +9785 PF09953 DUF2187 Uncharacterized protein conserved in bacteria (DUF2187) COGs, Finn RD, Sammut SJ anon COGs (COG4873) Family This domain, found in various hypothetical bacterial proteins, has no known function. 21.60 21.60 22.00 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.74 0.72 -4.24 7 524 2009-01-15 18:05:59 2007-07-30 16:17:44 4 1 440 0 39 139 2 57.60 57 85.89 CHANGED MphAcVGsIIEFKsGLpGIVEKVNENSVIVDLThMENa+-L-l-p+TVVNHKNYKII ..................MplAcVGsIl......E.F....h.s....G...l..pG+V.EKlN.-NSVIVDlTIM-NFs-...L...D..l...-KTVlNHKpYKIl....................... 0 5 16 26 +9786 PF09954 DUF2188 Uncharacterized protein conserved in bacteria (DUF2188) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4876) & Jackhmmer:B5ZC26 Domain This domain, found in various hypothetical bacterial proteins, has no known function. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.02 0.72 -4.05 101 747 2009-01-15 18:05:59 2007-07-30 16:18:01 4 2 643 0 192 487 10 59.90 24 57.15 CHANGED haVs..P.tpsst.......WtV+tp....Gss+s.sphasTpp-AlptA+phA.cp....p.su-lhIHsp.D.GpIpccpoa ..................pptt........Wtl+st....sspps.otphss+p-AlchApphu.+p.............p.ssplhl+pp.D..spht............................... 1 56 107 147 +9787 PF09955 DUF2189 Predicted integral membrane protein (DUF2189) COGs, Finn RD, Sammut SJ anon COGs (COG5473) Family Members of this family are found in various hypothetical prokaryotic proteins, as well as putative cytochrome c oxidases. Their exact function has not, as yet, been established. 22.80 22.80 22.80 22.80 22.60 22.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.81 0.71 -4.20 69 418 2009-01-15 18:05:59 2007-07-30 16:18:23 4 1 318 0 155 393 69 127.20 27 45.87 CHANGED llhshsuGFhLl.GPh..hAlGLY-lSRRhEp...Gcsssh..tpshts.....hstshsphhh........huhlLhllhlhWhhsAthlaAlhh.s.ss.shss.hsshht.lhhsspGhthlhlGsslGulhAsllauloVl...........ohPllL.D ................................................hlhshssGFhLl.GPh...hAlGL.Y-hS+chEp.......Gptssh..tpshps.....hs.shsphhs..........huhlL.hlhhlhWhhhuthlaAhhh.u...sh.shss....hsshht.shhsspshthlllusllGslhAhlsauloVl...........ohPhhlD.......................... 0 36 96 124 +9788 PF09956 DUF2190 Uncharacterized conserved protein (DUF2190) COGs, Finn RD, Sammut SJ anon COGs (COG5471) Family This domain, found in various hypothetical prokaryotic proteins, as well as in some putative RecA/RadA recombinases, has no known function. 20.80 20.80 21.50 21.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.31 0.72 -4.27 30 380 2009-01-15 18:05:59 2007-07-30 16:18:52 4 1 318 0 62 202 18 105.40 45 95.92 CHANGED MKNYlQsGcslsls.Ass.uVsSGssVllGs..lhuVAh.sssssGpssphhspGVF.sLPKsuussh.shGstVYWDsospshTs..suouNshl....GhAhtsAussu..sossV+Ls ....................................AKNF.Vp-.GcTlslV..Aus...uIpSG-hV.V.Gs.............lhAVAl.TDIstG.....EsG-...G.....h...sEG.V......F.hLPK...h...p.s..D.Dh..psGppV.........YLKsu..t..lQLss........usu.ssh.l.....GVsWtsAusuu..ppVsVKlN.......................... 0 15 35 48 +9789 PF09957 DUF2191 Uncharacterized protein conserved in bacteria (DUF2191) COGs, Finn RD, Sammut SJ anon COGs (COG5450) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.40 23.40 23.60 23.40 23.30 23.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -8.03 0.72 -4.15 51 685 2012-10-02 18:44:02 2007-07-30 16:19:12 4 2 332 0 155 436 50 47.60 33 65.80 CHANGED MRTslslDD...pLlpcAhclsG.lpoK+plVchAL+sLlppcp....p+...th.cL .........RTsIclDD....cL...l..scA.hchsG..hpoK.+..s..sVchAL+p.llpttt......tc.....p....hh...................... 0 43 111 140 +9790 PF09958 DUF2192 Uncharacterized protein conserved in archaea (DUF2192) COGs, Finn RD, Sammut SJ anon COGs (COG4879) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.60 20.60 20.60 22.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.44 0.70 -5.13 12 48 2009-01-15 18:05:59 2007-07-30 16:19:24 4 1 46 0 29 44 0 223.30 36 83.96 CHANGED hs+plaRcRIcshhsLlpclLpt...thhoRpsllt.LpcsYcccsIcPhRGhu.spslY-KElAolYVVGKYGhGlhs-..t.FDclF.hEpph-EAhcll......lstphpEAhtchss.l..ts.ccspltRhLRllFTtslhGahsEptLlKsl+shp.s.hP-hpcphhsYutFYsAaKlAEsIAtGcIRs+.shcAhKhuhAlclGh..p+slPsDchIAhIAppVapVscphLs+lL ..........................................sKpla+cRIclhs-lhupllcp....psloRpsll-llcpsYcccsIcPhRGhs.ssslY-KELsolYVVGKYGLGLh..-.-...phF-+lF.hEpph--shclI........lsspspEAhcch..ss...t...sc-hlARsLRhsFTpslhuF.sE-chh+sL+slphu.hs-lccsspsau+FYsAaKlAEsIApGtlRs+.shpAhKpAlAlplGh..phshPpscYluhIAccVFpVscclLs+lL............................ 0 11 17 22 +9791 PF09959 DUF2193 Uncharacterized protein conserved in archaea (DUF2193) COGs, Finn RD, Sammut SJ anon COGs (COG4883) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 403.50 403.00 22.30 17.80 hmmbuild -o /dev/null HMM SEED 499 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.74 0.70 -6.24 9 32 2009-01-15 18:05:59 2007-07-30 16:19:37 4 1 31 0 23 33 2 497.80 65 99.61 CHANGED MpELYcKMlDEAMuAQ+ADV-slK+KRGpcFplpDAKPYVDsVpKMcssusQSpuVhsLHhsSVpuHa-lLsuLTcTVRPEDDPFVEHYQTPsILEILh-EDstFpKSl-tFlcuItKuEALIG+EulRRYGGFYGPTCVVDFALlPGSTSNVVNRILppsDIPhcHKQAILAAKSWGMNTSYGIG-hFAptlEtGsThuEAs+cEI-MlQhlYcpPlEAQAcLMDsAGppSFDsRKYMpsY+p+McssV+AAhD-GVHYGNIlTVPAYCVGDlAHHIuQSTFNMCKDDVVMAlIEAsTsVMEoTLpsAlspFKsEYslLoLATGuSAsAsEaILELDGFNAhMlVDLLTKRFHNYV.haPTRGAAAELHNsDFMDMIYRGW+hlDKAcRspsutpt.l.PhVuGasVDLSPIacNEVlMNPQRYAYPACAITVRFSALMRLADYPCLLTSEPVTAThMTNIIALHKEssuuPARsCKsCAsAsLlDaRHpYCQWKEAV ..MpElYcKMlcEAMAAQpADVpsIKcKRGpcFKIcDAKsYVDsVsKMcslssQupuVlcLHhcSVpuHa-lLsuLTcTVRPEDDPFVEHYQTPsILEILY-EDspF+KSl-KFIcsIs+uEALIG+EslRRYGGFYGPTCVVDFALIPGSTSNVVNcIL+psDIPtpHKQAILAuKSWGMNTSYGlG-sFApslEsGtThuEAlccEIchlphIY-pPl-AQA+LMDst..G....HpSFDVRKYMspYKK+McssVpAAh-ssVHYGNIVTVPAYCVGDIAHHIuQSTaNMCKDDVlMulIEAsosVM-sTL+ssl..sp..FKsEY-lLoLATGSoAsAsEYILELDGFNAsMVVDLLTKRFHNYVQLYP.TRGAAAELHNsDFMDMIYRGW+hlDKARRs+sGsts.ltPcVuuacVDLsPIccNEVlMNPQRYAYPACAITVRFSALMRLADYPCLLTSEPVTAThMTNIIALHKEssuuPARsCKsCAoAsLlDFRHpYCQW+EAV................ 0 8 18 20 +9792 PF09960 DUF2194 Uncharacterized protein conserved in bacteria (DUF2194) COGs, Finn RD, Sammut SJ anon COGs (COG4878) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.00 26.00 26.70 26.80 24.40 25.90 hmmbuild -o /dev/null HMM SEED 585 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -13.07 0.70 -6.47 8 111 2009-01-15 18:05:59 2007-07-30 16:19:48 4 3 107 0 24 106 3 535.50 29 88.40 CHANGED lhllLhh.hlu...........................................................................................................IshhhsKhGlpYshss+p.h..t.........s.sssslspsh-p....plh.................................LaDsNs-supcl.......................c..........pphcphLcYh+hphchls.u...pcshPSl.cY+sllIhhs-lstls-..pslhsalEsGGsllhAsshc+ssthssI...hGlp..p.-hsclcslhlspDlh..lGuscs.thpt.F.cpulslsLsscsplphlossc.pTPllWppchGpGphlhhNhsllpKp.lRGlassuhuhhsssslhPlINutshYlDDFPuPlPuG-tc.lp+-a.shoss-FYpKlWWPDlpKLuEcYsIKYTGlhIpsYpssTsss.shhctpspscalhaGppLL.ssuGElGlHGYNH.PLs.cs.s.hcc..tYh.Wts+EshtcuLcpLp+Flps.LhsshphssYVPPSNllscpGhcsLhcshPpIKoIuSsYhssc.ptsY.QEFplt-..cGhlclPRhTSG.hhsscphhhsshssLs.hGVhsHFlHPDDlLDtDRu.uphuWschhKsLcshlchl+cpaPaLRshTuSEsusulpcYtshcsphphscsulclslpshccpsthhlRts+sp..KlpGGp.....lh+h..susLYl ...........................................................................h..............................................................................................................................hhp..s......t...t.....................tt..th.h..tth..tt.phh...........................................lhcsssp...upph.................................hpphcphhc.h+hthp.hs.s...pt.hsshtsYpslllhhsph.ptlsp..h.pl...hpaVppGGsllh.....utp.pp.ss.h...ssl..........hGlpptt..t.t.p.s.pslhhppshh...Ghsp.h..t...s..t...h.cou......h...p.......l...............sLs......pssp.lhh..ho........scs....hP.llWppshGpG+llhhNs.s.h.hpcp.sRGlhssshuhhssshhhPhlNupshalDDFPuPlPpspsc.lpc-Y.phosp-FYpplWWsDhhclucchsl+YTGhhI..tsYpspsps....p.hp..........t..p.s..h.........pph.hhaGppLl....ppGGElGhHGYNHQPLhhts.phhtth..sYhsWtsppsMttulppLhp..ahpp.hhPphphpsYVPPSNllsppGhpsLtcshPplcsIuShYhsspp....s..a.QEFthsp..c..s..lhphPRhoSG.hhhsc.p.hthhstlshhhlhoHFlHPDDl.........lD.............t-........Ru.tphsWpp.......Lhcshcphhp.lppphP.l+shTtSEhssthppatshphphphp-stlplt..htshhptshhhlRhpps.....plpsGp.....hpcl....ssplY.................................................... 0 11 14 20 +9793 PF09961 DUF2195 Uncharacterized protein conserved in bacteria (DUF2195) COGs, Finn RD, Sammut SJ anon COGs (COG4893) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 73.70 73.60 22.40 16.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.52 0.71 -4.32 6 124 2009-01-15 18:05:59 2007-07-30 16:19:56 4 2 121 0 8 50 0 122.80 52 92.21 CHANGED sllLuusshs.uPhhAsAus.h.l-NsLuAClch+sut.....psucshltl.hslph++ShG-CGChSALlpYpslstpt.....susphsLQcGlhsstpsssR...TLsLAsDssLst-pplslplGCsssc ....l.hhphsuGh.lslhA.sGp.l.IcNsLAtCVDh+st+.....pphsNllhlpsshpL+KshGtCGChSALspYsS.lsp......RtSthhLQpGlhsLtKpss+...oLsLAs-stLV+DtpltlpluCs.P.. 0 2 6 8 +9794 PF09962 DUF2196 Uncharacterized conserved protein (DUF2196) COGs, Finn RD, Sammut SJ anon COGs (COG4895) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.20 25.00 24.00 22.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.88 0.72 -4.18 39 362 2009-01-15 18:05:59 2007-07-30 16:20:21 4 3 326 0 139 286 19 60.90 60 66.71 CHANGED hsGppRssIp.GhpVsI.VLKpDQRoGpLTcGlVp-ILTpSssHPHGIKVRLcsGpVGRVpcI .................MsGppRusIpsGhcVsI..VLKp..DQRT....G..+....LTcGlVKDILTNSssHPHGIKVRLp..D.G.Q..VGRVQpI...... 0 47 101 120 +9795 PF09963 DUF2197 Uncharacterized protein conserved in bacteria (DUF2197) COGs, Finn RD, Sammut SJ anon COGs (COG4896) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.20 22.20 22.60 22.20 21.30 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.96 0.72 -4.01 13 385 2009-01-15 18:05:59 2007-07-30 16:20:30 4 1 354 0 37 121 0 53.10 48 86.80 CHANGED M+VKCllCDpl-pID-cohhAKRLRNRPIHTYMCc-Cc-RIccpTppRhsTGpFph ...hpVpClICDpKshlD-coscuKRL+N.p.PI+TaMCc-CcpRlsp.p.c...utpa................ 0 13 22 31 +9796 PF09964 DUF2198 Uncharacterized protein conserved in bacteria (DUF2198) COGs, Finn RD, Sammut SJ anon COGs (COG4897) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 30.20 30.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.41 0.72 -4.22 9 297 2009-01-15 18:05:59 2007-07-30 16:20:41 4 1 297 0 21 75 0 72.70 63 95.13 CHANGED hphlhALhFPslLVlLFoRVTYN+aVGllLTsALlsAShhKGYTcohalIslDlsSLlAGaLahsphctct+pc ..IWYhSAAFFPClLVVLFSVITRSKWVGTlLTLILIGASlYKtYFHNEWIIFIDVVSLLAGYLIIDQLEFHK+Qc.... 0 5 10 17 +9797 PF09965 DUF2199 Uncharacterized protein conserved in bacteria (DUF2199) COGs, Finn RD, Sammut SJ anon COGs (COG4899) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 27.30 29.80 21.10 19.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -11.08 0.71 -4.36 6 159 2009-01-15 18:05:59 2007-07-30 16:20:54 4 2 152 0 24 125 3 142.40 40 83.45 CHANGED YpCssCGEhHEs.P.SauFcAPssYhplsEEERps.pschuDDLChIp......DGpcaF.IRslLEIPIhGp-EsahWGVWVSlSEsSFc+Yh-oF..spssps.saFGWLsNhlPhY.s.Thu......LcscVHhpsDGpRPhlhLacus.Hs...LshD.ccGI ...................................CtpCst.H......tthP..uhGhpAP......Y...p....lstcE.R.s.....ps..c.....hs.....uDhCll.......DtpchF.IRusL.IPII.sp.....cE...sh.aGVWVSlSccSFsch...ppa...c..c..p..t..p..s..s..sYF.GhLssclPsY.s..Ths......L+s..sVphpssGpRPhlpLc.p.o.s.HP...Lsh-.hpGI......... 0 9 18 21 +9798 PF09966 DUF2200 Uncharacterized protein conserved in bacteria (DUF2200) COGs, Finn RD, Sammut SJ anon COGs (COG4898) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.00 22.00 22.90 23.20 20.20 19.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.39 0.72 -4.20 37 512 2009-01-15 18:05:59 2007-07-30 16:21:09 4 1 506 1 101 279 74 109.20 58 93.45 CHANGED RIasMoFASVYPhYlsKsEKKGRT+pEVDplIpWLTGYspppLpptlscpssFETFFspAPplNPstsLIpGVICGhRVEEIEDP..LMppIRYLDKLlDELAKGKtMEKILR ........+lYsMpFAuVYshhIsKsERKGRp..p-EVcpllpWLTGY-..ssltt.pLcccVTatsFFtpAPhhsPppthITGhICGVRlEEI-DP..LMQcIRaLDKLlDELAKGKshpplLR............ 1 45 82 95 +9799 PF09967 DUF2201 VWA-like domain (DUF2201) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4900) Domain This domain, found in various hypothetical bacterial proteins, has no known function. However, it is clearly related to the VWA domain. 26.40 26.40 26.40 26.50 26.30 26.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -4.11 16 512 2012-10-10 16:07:06 2007-07-30 16:21:21 4 6 465 0 176 533 365 126.10 24 28.68 CHANGED lslslDTSGSIssttLspFtuElsuIt++h.tAp.lpllhsDsplpssphhcs..hct.lsclphsGGG.GTDasPll-tAsctcPsssVl..LTD.LpG.PschtP.thPVlWslst.psssss...P.FG+hlp......Ls ......hlslDoSGSh.......s...c..p......l......p........p..........hl..sElh....s....lhpp........h.....pt.....c.....l....pll.p.s.Ds.plpss.thlps...............t....ht.p.....h..p.....l...t..GuG.G.Tca....pssFc...a..s....p.....c....t......p...........p.....s.s..l.....l.....lhhTD..u......ps.......t..htP.....th.....s..ha.lh.t...........t...................P..au......h......................................... 0 70 116 150 +9800 PF09968 DUF2202 Uncharacterized protein domain (DUF2202) COGs, Finn RD, Sammut SJ anon COGs (COG4902) Domain This domain, found in various hypothetical archaeal proteins, has no known function. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.98 0.71 -4.57 12 134 2012-10-01 21:25:29 2007-07-30 16:21:30 4 4 106 5 72 171 17 151.70 38 65.03 CHANGED EtcsLlaMREEEKLARDVYlsLYchWt....lslFhNIup.SEppHhctVphLLc+Ysl.............pDPlhs-phGhFss.clQpLYNpLlpcGSpSh.DALpVGAhIE-LDIhDL.cchhpcosNp...DIphVYpNLhpGScNHhRuFsRslcphGhsYpPpYlSppcappIlpus ..........................EhcsLlaMhEEEKLARD..VYhpLYcpas....l.l.FtNIup.SEppHh.sA.lttL...lc+..Ysl..............s.DP..ss..s..p...t.h.G..hFsssc....lQpLYs..p....Llpp..Gp..p..S..h.h-ALpVGshIE-lDItDL.pchl.ppsssp..............DIphVYpsLhpGS.c.N.HLRAFhpt.L.p.p.h..G..h.sYpPphlsptthptllp..s......................... 0 23 45 65 +9801 PF09969 DUF2203 Uncharacterized conserved protein (DUF2203) COGs, Finn RD, Sammut SJ anon COGs (COG4911) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.80 20.80 21.10 25.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.89 0.71 -3.89 23 128 2009-01-15 18:05:59 2007-07-30 16:21:42 4 1 124 0 57 116 64 118.60 30 87.62 CHANGED phFolcEApcLLPhlcchhpph....tct+pplcch.......c...............htthcpph..cphtpplcttlcc....lpphGl.lKsl-.GLVDFPuhhss.c.laLCWKhGEs-ltaWHth--GFtGR+sIp ................................+hFTlcEApplLPhlcthlppl....tphppclpph.t.th...tt.tt.....................h.thctch..pphttplcphlpc....lpphGlhlKs..lc.GLlDFPuhhss..c.laLCW+hGEscltaaHth-pGFsGR+Pl........... 0 30 41 52 +9802 PF09970 DUF2204 Nucleotidyl transferase of unknown function (DUF2204) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4914) Domain This domain, found in various hypothetical archaeal proteins, has no known function. However, this family was identified as belonging to the nucleotidyltransferase superfamily [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.78 0.71 -4.81 15 161 2012-10-02 22:47:23 2007-07-30 16:21:59 4 2 147 0 66 166 23 151.70 25 59.21 CHANGED thpsLttlhppLp-+Gl..caVlIGusVlsLuhsp+hsssDVDLFlhshSsll-p-hac-lAcEpsW-hGposhGTsthlshls.uEplpVDh.aENIhDlalPtElLscucclsIsGlcl+sIslE-hlVLKA+tupcEsp-hlcclu...................chlh-..tplplshchl+chlphaPE.-scsIhcRL+p ..................t..pshttlhtpLp..cp..Gl......phhllGGs.shh.l.s......a...sp.c...hs.TcDlDh.....hh..t..s.s..us..h.....l.-t.....l...ccl.A.p..chuW...s..h...shh..s....t...s.....s.....t.hl......h......hs.t-sh..l......h...l.t......s..l.h...hs..t.phl.thp.....ht.h..t...uhc....shEDhh.lhhsphthpps....t...hc.lh....................hh.t.................................s.................................................... 0 26 44 54 +9803 PF09971 DUF2206 Predicted membrane protein (DUF2206) COGs, Finn RD, Sammut SJ anon COGs (COG4906) Family This domain, found in various hypothetical archaeal proteins, has no known function. 33.10 33.10 35.60 35.10 21.20 32.20 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.50 0.70 -5.64 9 29 2009-01-15 18:05:59 2007-07-30 16:22:19 4 3 23 0 23 30 1 355.10 19 51.06 CHANGED tp+tlLhllFhhullhSHYGsAYlhhhhlhhusLhhhlhthh..................p.ppt+hshpLlhlahllhhhW...YshlusuuhFpolssIhtplh......pol...hpp.LsPpsopGhtll.ss.sshhttLhKllplhs.hhIslGhltslhchhtpppp......lpYluhuhhhhhlLlAulhlPaFu.shNssRLaplohlhlAPahlIGh.shhchIsphhp+hh......sh+s.h....thh.ohFlllhhLhs.oGal.plhpspPhShulspts..............cshh.sp-VhuuKWluphpsps..lplhs-hhsshhhhh...............sYuslspphlssspph..................pssYlaLshhNlhpphhhlp.......hh.tthhhhN...hssshphltppNcIYDNtGStIYh .....................h.p+tlLhllFhhullhSHYuhuYlhhhhlhhshlhhhlht.h.h.........................p.pp.phshshlhlahshhhsW...Yhhlusushhts...lh...plhsp.hh........psl...h.p.hs.psspG...htlh....ht..shhthlhhhlphhh...hhlhlGhhthlhphhhtc.p...................hpY.lhhuhh.hllLlsslh..lPaa.u.shsssRLaplohlhLAPahllGhhthhphl.s+hhpthh..........t..h..............................phh.slalslhhLhs.oGhl.t.lhp.s.sho.hulspss...............tshh.s.spEltuspWlsp.psps............lhsshhph.hh.h....................a..ush...sht.htttpt..p................psuYlalsphNlppthhh.........hhstthhhhp....hsh.phlpspspIYsNusSplY...................................................................................... 0 4 12 19 +9804 PF09972 DUF2207 Predicted membrane protein (DUF2207) COGs, Finn RD, Sammut SJ anon COGs (COG4907) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.00 23.00 23.00 23.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.79 0.70 -5.92 46 1234 2009-01-15 18:05:59 2007-07-30 16:22:39 4 3 970 0 247 980 138 434.70 18 83.34 CHANGED sapIpphpsplplppcGshplpEplsYshcss..hcGlahshshsst...t.......hshpthslppss.......tssshplppssss..............phpltlasss.st.....sphphphpYplpssltha..pDh.uE..LpaplhGssWshslsplpsslphP......pshps..hchasasGshsspsp........tppsplthpsss.l.sssptlElchhh.Pp.....hsssstp.hptphhpph.t.tttht.p................tphhthhhslhhhlhhhh.hh.hhhah+hs+csphth.....................shhpa-hPtshsPshl.shlhptshs....................pc..ultAs.llcLhs+thlplpp................ccshhlplssps..........thsshEptllchlhspt....sspplshtpl...........................t.tpphppthppappslpcph..pptthhtthhhhpstt..............................hhhhshlhhlhuhlhh......................hh.....hhhthhhhhshlhhlhshlhhhh..........................tthhst..hotcGtphhtchcuF+paL..sDh.splphtsst..sltlW-chLsYAsALGlucc...Vt+phc ...........................................................................................tIpthpsphhlpt.-....ss....hphppplhaphppp.........hpG..hhslshhh.h.......s............sh.....htstpss.......................................hs.s.phpt.ptsh....................................................hhl.t.h..p......h..............s.h..phplpaplpshhhhh..............tDh...st................L.Wp.h...sp..s.....tl.tp.hphplphs.............ts...........ph.h.a..G..ttphp...............tp..t..t.....shp.hphps..h...tpt.....hplhhha..sp....................hsssts..t........h...t..p.h..h.............................................tthhhh.hh..s..hhh.h.l.s.hhh.............hh..h.h+p..s...p........h.......................................................................h.h.....ht.P...p..h..p..Phhh..t...h...l..h...p.ht.hp.......................pp....hht.As.llslhs+t.ltl.pt..............................................................tpthhl.ph.ptt...............h...hE....p.hlphhhttt...........p..hsthph...................................................................p.h.pphhpph..ppt.h..htph...p...................ht..t.................................................................h.hh.shh.hhlshhhh...............................................................hh..h.hh...h.....hh.h......h.hhh.l......h.hhhhh...............................................................h..h.sh....hs.ptGtphhh.hpuhpphL......p-h..sph.p.p.ttht...............shh.l.app..hLsaAhhhGhucc...h.p.h.............................................................................................. 0 89 175 205 +9805 PF09973 DUF2208 Predicted membrane protein (DUF2208) COGs, Finn RD, Sammut SJ anon COGs (COG4920) Family This domain, found in various hypothetical archaeal proteins, has no known function. 23.60 23.60 23.60 84.10 23.20 23.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.38 0.70 -5.12 13 43 2009-01-15 18:05:59 2007-07-30 16:22:49 4 2 43 0 27 35 1 234.40 30 94.20 CHANGED h+plLIS.l.lllaShVLohaPpaah.saILYFllahuIohshshRSh+..pshtshpEIusu+sLhE..EKcuscLhpKDcELhp.Ehpphs+uuhhshlhhhlalllhhllashl...hp....sthssslssths+FLsalshFEhhalluhh..lh+hlh.+.th.sshl..spsaKVoEKGIlsccphG...lthPhc...sschplNc-RKaVEl+.sspt.......plRLYs+-lc+L.slLpRl.KtLct ......................+hlLlStl.hllFShVLu...aa.PpY...a...h...haILYhllahuIohhhshRSt....+phpshtElspu+sLaE..EKcsscllpKDcEhhp.EhpchhKtshh.shhh.hhlhhI...lhhllasal..........................hthssshs.shhh+....FlsallhFphhaulshh...ht+h........lh..+h..pt...hshh..sp.uaKlpEKGllhscphu......lhhP.c...ssplphNc-+KaVEl+sssph.....sh+lRLYspDlc+lh-llp+l.+.lt.h......... 0 10 15 20 +9806 PF09974 DUF2209 Uncharacterized protein conserved in archaea (DUF2209) COGs, Finn RD, Sammut SJ anon COGs (COG4921) Family This domain, found in various hypothetical archaeal proteins, has no known function. 26.80 26.80 27.00 83.30 26.50 26.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.48 0.71 -4.05 4 22 2009-01-15 18:05:59 2007-07-30 16:23:06 4 1 22 0 19 23 0 123.00 38 95.52 CHANGED lAVDISGRH+.cDGaY.hVsAuVulEVsAs+I.pVcpVslhsVhp+-s.sLhDIVc.lccslsplu..F-hhIVsE+G-haNpPcWlspuhhupshKYtEohuEhcAIEhAH+lShSsR+LLhcELclp ...lAlDISGRHc.pstah.hVsAuVtspluus+lc+Vcplclh.httccs.slpDllphlc-slspls..h-h.IVsE+G-FaN.PchhVpuhhs+cFKYsEoluERcAIplAHHlShSsR+LLhct......... 0 3 11 15 +9808 PF09976 TPR_21 DUF2133; Tetratricopeptide repeat Anantharaman V, Finn RD, Sammut SJ anon Manual Family TPR repeat 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.85 0.71 -4.25 350 1664 2012-10-11 20:01:01 2007-07-30 16:23:50 4 81 1626 0 351 1305 476 141.70 30 54.91 CHANGED pspQssptpt..AShhYpplh.pu.h.............s...sscssp...............l.....st............u..schtscassT.YAshAuLhhAKttV-ss-hstApspLph.sls.ps.pDs.sLp.slAplRLA+l.lpppphDs...ALptL....ss....stspua.suh.hs-l+GDlLlu.pGcpppA+sAYptAl ...............................................................................................................t.phppspp.AShtYpph.h.puh..................s.tscs.sp............................l...ss...........spcht..s.c.tt.s.o.....Y.u.s..hA..uLp.l...Apth............l..-p.s.....-............hc............p.A......tspLpp...u.hs....ss...p....Dc....s.L..p......s.....l..h...pL....R..L....A.c..l...l.p.......p...p......h..D..s.......A....L..pp.L.....cs...........l.p....s....p...u...a......s....u.h....s..t..-....l...+.G-hhhu...pGcppsA+sAaptu................................................. 0 100 205 279 +9809 PF09977 Tad_C DUF2134; Putative Tad-like Flp pilus-assembly COGs, Finn RD, Sammut SJ anon COGs (COG4655) Family This domain, found in various hypothetical prokaryotic proteins, is likely to be involved in Flp lius biogenesis. 21.70 21.70 22.00 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.36 0.72 -2.97 75 253 2012-10-01 21:13:59 2007-07-30 16:25:29 4 5 204 0 106 358 18 100.40 25 18.09 CHANGED AuAtphsssss..........ss.ssAt...pssspsuh..........................ssst....shslpsGhass.....t.sstshhssss...................sssAVpVsssps.......ls.hhFsshhuh........sshs.lsApAsA ......................................................tuAtphsssss.............ts.ssAp.sss..tpNGh............................ssus............shsltsGpass........sssssppFs.s.uus.................................shNAVpVsssps.............VP..haFsu.h..........sshs..luAousA.............. 0 30 60 80 +9811 PF09979 DUF2213 Uncharacterized protein conserved in bacteria (DUF2213) COGs, Finn RD, Sammut SJ anon COGs (COG3566) Family Members of this family of bacterial proteins comprise various hypothetical and phage-related proteins. The exact function of these proteins has not, as yet, been determined. 20.90 20.90 20.90 21.00 20.80 20.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.96 0.71 -4.59 32 362 2009-09-11 16:51:15 2007-07-30 16:29:40 4 5 302 0 52 327 29 157.90 31 42.39 CHANGED ssRphss-GhLlscss.luRsGhphYsucElst.ts......pthlpVhRsPEpVFps-olASF-GhPlTlpHPc......c.VsscNW+slulGtlpNsRR....cu-hllADLllpDtsAIptI.-sG...hcElSsGYcs-a-.ss.......shcucQhsIpGNHlAl.VscGRAGspssl.u.............Dcs ...........................................t-GhLhsps.s..lu+s.G.h..Yhut-lsh.............t..hhl.hRsP-plFpspslsSapuhPlThtHP........thls..scNh.+.p..h.slGpltsstc.......pu.shlhuslhl.......hDps.............uIptI..psG....hcElSsGYpschp....ss.s....................shcu....hQhsI.t.s.NHlAl.V.scG..RuGspstl.tD............... 0 8 33 43 +9812 PF09980 DUF2214 Predicted membrane protein (DUF2214) COGs, Finn RD, Sammut SJ anon COGs (COG3556) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 26.30 26.10 21.40 20.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.91 0.71 -4.46 26 200 2009-01-15 18:05:59 2007-07-30 16:29:53 4 1 198 0 85 192 118 148.80 35 94.81 CHANGED hlssAllAalHaLuhhhsFuulssEphhLRss.hs..tspstplllsDhlYGluALslLlTGlhRlh.aauKGu-FYhpNPlFahKlsLalllulLSlhPTlsaIpWthslpcspsP..s.shsp+lthllplEhhhhslIPLhAshMARGlGh ....hhpAllAalHaLuhhhhhuslssEt.hhL...Rhs....hs......hsphpplhhhDtlYGluAlsl.LsTGlhRlh.hasK...GssaYlpsPlFHhKluLFlll...uLLSlhPTlshl+W+tsh..+p.....sthP....s...........ucspphtthlphphhLlhlIPLhAshMARGlG................................ 0 14 38 65 +9813 PF09981 DUF2218 Uncharacterized protein conserved in bacteria (DUF2218) COGs, Finn RD, Sammut SJ anon COGs (COG3553) Family This domain, found in various hypothetical bacterial proteins, has no known function. 24.30 24.30 24.30 24.70 22.70 24.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.77 0.72 -3.80 39 357 2009-01-15 18:05:59 2007-07-30 16:30:16 4 8 331 1 97 244 10 88.60 33 72.22 CHANGED SpuplsTspAoRYLppLCKHauHKhtVpaDsppGclshP....hGpspLpA..sscsLslplpusstpsLsplcpVlscHLpRFAhREs.lplsWp ..............pApl.TscusRYlppLC+Ha.u...HKhs.............sp....a..st..p..p..G.p..lphs..........su.s.spLss..csppLshslpu..spp.slsphcsVlssHlsRFAtREp.lslsWp................... 0 31 62 75 +9814 PF09982 DUF2219 Uncharacterized protein conserved in bacteria (DUF2219) COGs, Finn RD, Sammut SJ anon COGs (COG3528) Family This domain, found in various hypothetical bacterial proteins, has no known function. 19.00 19.00 22.30 20.50 18.10 16.70 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.86 0.70 -5.59 67 618 2009-01-15 18:05:59 2007-07-30 16:30:29 4 2 571 2 122 391 160 281.60 29 87.86 CHANGED hth-NDh.hh..u...sDp.tYTsGlplsastss...........thttthsphh...............t..thshpluQphaTPs..ch.....phpss.ssDRPYAGhLhhshshhthpsspt.......phshslGhl.GPsuhucpsQphhHp.llG.......sscsp..GW-pQlcN-hshplshctphphht............shth-ltshspsslG........NlhohsssGsthRhG...pLsssaGsstlp.ss.....ttthh.st..............shuhahasuscsphlspslh.l-Gssa........ppst..sl...shpthhscsphGlshth....t.shplsau....hshtopEFcsppc....hppaGolsluhta ............................................................................................Dp.sYTsGlhLuaopshh................................................shsphuh.pluQchaoPS..sh.........cp..spshhs...DRsau..uaL+sslp..h.u..hssshh...................chshslGsl.GP.sAhupcsQphsHc..lhG.......u-.c.p...uWssQlcN..chshslphhhphp.sh...........shhG..sshslhPpsssshG........NhhphluhGstlphG..pshssc.Ghuhls..st...........h.h.httpp.............shtahlFuGh-tRhlsps....hT..LpGpoh................psph......sV.....slpthVsphplGsshta...........s.shuholu....hsthTsEF+ssc-.......asahshslshhF........................................... 0 45 84 110 +9815 PF09983 DUF2220 Uncharacterized protein conserved in bacteria C-term(DUF2220) COGs, Finn RD, Sammut SJ anon COGs (COG4924) Domain This domain, found in various hypothetical bacterial proteins, has no known function. The family represents just the C-terminus. 20.50 20.50 20.50 20.80 20.40 20.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.23 0.71 -5.12 31 376 2012-10-01 21:47:57 2007-07-30 16:30:42 4 4 352 0 131 417 28 166.90 19 45.82 CHANGED GL.......ppcsshlRhRhL-sshsh....sshs-lslshpphupl..plssppllIlENctsaLshP.shs....sulllaGuGauhsplss..hs........WLppp.plhYWGDIDTaGFtILspLRphh.PpscSlLMDppTl.pa.psthstE..sp.pttp....Lst.LossEtslYcpLhpsthtsplRLEQE+IshphlhptL ................................................................................................h....tthtth...th.s.s.pplll.lENpss...a...hshs...phs....................ssl..sla.G....u....G....h.sh.p..t.h.t.p....ht.................................hlps...p...plh..YWGDlDstGh....tI.....hppl+phh....s..........t..........h..p...s....h...h.Ms...tt..h....th....p..hs.p.....st..h.t...........h.t...Lp....t-.hth..hp...tl...ht..............thc.lEQEhl.........h...................................................................... 1 53 95 118 +9816 PF09984 DUF2222 Uncharacterized signal transduction histidine kinase domain (DUF2222) COGs, Finn RD, Sammut SJ anon COGs (COG4999) Family Members of this family of domains are found in various BarA-like signal transduction histidine kinases, which are involved in the regulation of carbon metabolism via the csrA/csrB regulatory system. The role of this domain has not, as yet, been established. 20.50 20.50 20.60 20.50 20.20 20.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.56 0.71 -4.76 27 713 2009-01-15 18:05:59 2007-07-30 16:30:59 4 8 701 0 82 352 6 146.60 63 16.35 CHANGED hsRYp-L-cpLlspGsoIIEPLAIASEhGhppcsREsl++LIohsHR+pSslV+SIAlFDssNcLFVTSNaHpsFctLphscshshPphhplphpssolILRoPIluEu...ph....sss.stssssphLGYIAlELshsslpLpQYp-lh ....VHRYNDLQRQLEDAGASIIEPLAVSoEYGMsLQNRESI.GQLISVLHRRHSDIVRAISVYD-.s.NRLFVT..SNFHLDP.Sp..MQ..Lsss........sPFP......RpL.o....VsRcGDl...MILRTPIISES.......................YSP......DESssuDAKsopNMLGYIALELDLKSVRLQQYKEIF.............................. 1 11 26 54 +9817 PF09985 DUF2223 Domain of unknown function (DUF2223) COGs, Finn RD, Sammut SJ anon COGs (COG4945) Family Members of this family are found in various prokaryotic membrane-anchored proteins predicted to be involved in the regulation of amylopullulanase. 18.60 18.60 19.80 22.60 18.40 17.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.51 0.70 -5.03 16 159 2009-01-15 18:05:59 2007-07-30 16:31:13 4 21 101 2 90 164 135 215.70 27 30.19 CHANGED hlhphsDPhGDDpGsGsYsYPTsssF.tP..GhFDLhphplh-sussahFphphscl.sNPWsGPpGFShQhlplYlspppGupsssl......sGsNVph..ppsWDhslhls.GW.s..............ushlhsssGsth.....ssshpltssPs.spsIlsplsKphL.t.s.s..phthhllluu.DGYGPDp.hRP.Vu...spus......pWphGGus.......sst..ssssPhVlDlLlPpu..tsQtphLosa..tssphAll...hslsl ...............................h.lhphpDP.GDDpG.s.G.sYsYPssssFp....GhhDLhphplhpt..s....sshhhphphpcl.sNPWs..uP..............sGFShphlplYlc...h...s.p...u.u.ssshht............Ghsssh....ptsWchslhls..G....W.t...............................tshlh.ss.Gsth..............psshpl...tss.s...tssIhsplspphl.t.st......s........sht...hhVhssu.D...Ga...ussp.....hRs.ls...........spss...............tWphGGus.................tstPhlhDhLssps.........tQ...Lt....................h........................................... 1 35 48 69 +9818 PF09986 DUF2225 Uncharacterized protein conserved in bacteria (DUF2225) COGs, Finn RD, Sammut SJ anon COGs (COG1655) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.47 0.70 -4.88 23 252 2012-10-11 20:01:01 2007-07-30 16:31:21 4 3 242 0 98 280 16 223.70 28 87.25 CHANGED LYcKclpCPVCspcFpscpV+ouuhRlh........++DsDhph+Yps..lNPhaY-VhVCspCGYAAhcpcFs.plsstpp-hlppplss+aps+.......sast..RslspAlpsYKLALls.hplhcs+.S...ppAhssL+lAWlYRhtpc................cppEhhaLppALctaccAYpsEshshp.shscho...............lhYLluELsRRhGspc............-AlpWau+llssssssp....plh-hAR-tacll ...............................................hccplpCPlCsppFpppcl+outh+lh........cc-pDhpscY.ps.....lsPhhYsVhVCPpCGaAuhppc....Fs...p.l...s...stp...t...chlppp.lsp+.h.p.p............sast...RslppAlpsYtL.Al.ls.hph.hp...t.phs...phAhhsL+lAWlaR.hpc.................pppEphahppAhchYpcuhpp-.p.h..s.p..shsphp.....................................................................................lhY..L.lGpLph+hGshc............-Ahpahu+llsptpssp....hlhchAR-.ap.................................... 0 51 80 88 +9819 PF09987 DUF2226 Uncharacterized protein conserved in archaea (DUF2226) COGs, Finn RD, Sammut SJ anon COGs (COG1667) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.50 20.50 20.60 20.60 19.90 20.40 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -12.01 0.70 -5.29 4 41 2009-09-10 17:02:35 2007-07-30 16:31:33 4 5 23 0 31 41 5 233.70 16 84.24 CHANGED MhhP+s..pVVEschh.....GshcEIhc.luohs...GhlcIos+cG-tLh-uallVpsGKlVushlpclcotEchsuEEAlccLh.......uhpssVlDVYchsc-clphhhchps.....................psls..h.phcl-h.ht.................................................Es..t...s.pth-hsEEhlc-P-..........................................................pREElLKKhGIK.Ps.Ep.lEsILc-hhc.............sshEEhKppl.pph..plh+hpGhs-VhVh.csK.tEt..........sscpllthl++c.................slcEh.ochhR ...............................................................................................................................................................................................................uhl.h....t.....shhhh.tt..lh..................t.h.th..............hl-laphp.p.hphh..........................................................................................................................................................................................ps..t....t.pt.p.h.pphhp..c.p.t........................................................................lsR-cLhKphsl+..s.-p.l-pllcphht...................th...h.pp....h....t..................................................................................................................................................................... 0 9 13 23 +9820 PF09988 DUF2227 Uncharacterized metal-binding protein (DUF2227) COGs, Finn RD, Sammut SJ anon COGs (COG2389) Domain Members of this family of hypothetical bacterial proteins possess metal binding properties; however, their exact function has not, as yet, been determined. 25.00 25.00 27.90 27.90 23.90 23.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.28 0.71 -4.62 28 198 2012-10-01 21:01:47 2007-07-30 16:31:49 4 1 192 0 62 150 38 162.70 39 96.84 CHANGED PSGRsHDRlTl..h.ulPhshlhshh........lsts..huLhs.uuuaLhuGLhhSPDLDlp.S..cthpRWGhLRalWhPYpcll.HRShhSHG.llGohlRLhYLtshshslshllshhh..h.hhshsh.s.h.....thlhphhppp.p.llshllGLEhuuhhHhluDhsssshchh................................+tR+ .................PSGRTHs+IsLh...uLPslhhhha............huhos....hLl......s.uhuaLhGshhLoPDLDha..S.....psap+WGhLRhhWhPYp+lhsHRShhoHshllGsllRlhYhhllhsshhhllshlh..t..............................splhphhpp..ap.hpllshlhGlhluuhLHlIuDtlsoptK+hh..........+...++c...................................... 0 16 42 57 +9821 PF09989 DUF2229 CoA enzyme activase uncharacterised domain (DUF2229) COGs, Finn RD, Sammut SJ anon COGs (COG3581) Family Members of this family include various bacterial hypothetical proteins, as well as CoA enzyme activases. The exact function of this domain has not, as yet, been defined. 29.40 29.40 29.80 29.40 28.90 29.30 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.43 0.70 -4.88 84 1010 2009-01-15 18:05:59 2007-07-30 16:32:13 4 8 744 0 247 884 34 205.20 36 18.45 CHANGED plGIPRsLshYc.aPhWt.TFFscLG..ac...VllSstos+clhchGhcshsuEs.CaPsKlhHGHltsLl.c.....Ktl-hIFhPplst.pp.c........tpasCPhltuhP-hl+ssh.........ptshphlsPhlshps......cthtcphhc.h..............hp...l..spc...............-lppAhcpAhcp.cpacp.p...lcppucchlthhptptpp...................sIlLhGRPYpl.Ds.lNhGIschlsp.hGhsVlTtDsl ..........lGIPRsLNhY.EsYPaWt.ThFspL..G..ac...VllSscSo+pla-p.GlcolsS-s.CaPAKlsHGHltsLl..c.......+.slchIFh.....Pslsap....pp...c.........p...ss.spaNCPlVtuYP-sI+sNh........................ptslphhsPalshssp.....ctlhcplhc.h........................hp.h.s.l..stp....................................-hppAlptuh..pc..pta+p.c......lpp.tG..ccslthhpp.pstt............................................uIVLuGRPYHl.DPcINHGIschlss.hGhsVLTEDul............. 0 136 211 231 +9822 PF09990 DUF2231 Predicted membrane protein (DUF2231) COGs, Finn RD, Sammut SJ anon COGs (COG4244) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.00 22.00 22.00 22.10 21.80 21.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.39 0.72 -3.68 51 367 2009-01-15 18:05:59 2007-07-30 16:32:28 4 6 261 0 187 377 94 102.30 23 55.45 CHANGED ah-luaWslhsuslhshhAslhGhhEhhLs.shh...........tspsshhhHslsulsllulhss........palhRhcssp..pl.hhhLsluhlhssllslpuaLGupLsacaGlt .......................................................h.psuhW.lhsuhlsuhhAslsGhh.-hhhs....tt......................psppshhhHslhslshhslhss.....................phhhRtp...sst.....tl......h..........h..hL.hlu.hls..hs.llslsuaLGupLsapaGlt......... 0 46 119 163 +9823 PF09991 DUF2232 Predicted membrane protein (DUF2232) COGs, Finn RD, Sammut SJ anon COGs (COG4241) Family This domain, found in various hypothetical bacterial proteins, has no known function. 24.90 24.90 25.00 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.72 0.70 -5.42 46 942 2012-10-03 02:46:00 2007-07-30 16:32:53 4 5 917 0 226 724 132 260.60 20 81.98 CHANGED ulhsllhl..lsha.lPlluhlhshhhPlPhllhhh+puhphulh..sllssh.llhhlhss.hhu..lhhhlhhulhGlllGhhl+cppshtps.....lhhushshlluhllhahlhhhhhs....ls...hhsthhphhcpshppshphhpph.Gh.....s.....tphpchhpph.hphlthllPuhlllsuhlhuhlshllst.ll..+Rhphp.h.thssFppaphPppllWhhllsllhhhhhpt.sh....phlhhNlhhllshlhhlQGlullhaah+p+.phspsltl...lhhllsllhs..lhhlltlLGllDl ..............................................................h..hhlhhh..lshh.lP.h.lu...h..l...hthhhslPhhllhh+.t.........t.......p...hulh....uhlssh.....llssl..l.u.....s.s..h.s..........lhhh.l..hhhl.hul....llG..h..h..h.....+...c.....p.pshtph........................lhhs....s.ls..h.h.l.hh...ll.hh.hl.ht.h.hht...........ls......hs.....h.h.p.hp..ps.hpp..s..h..p.h.httt................................tt.pp.hhpph....hp.hth..hhPuhlllhs....hhhshlshhls.h...ll.....p+h.t.h.....s...h.s..h.s......h...t.hph.s.....t.....h.lhhh....h..l....slh...hh.h..h....h.t.........s....................lh...h.N.h.hl.hshhhh.lpG.luhltah..hpt+...thsthh.hh...lh..h..lhh.hhhs....hh.llhllGlhDh...................................................................... 0 85 164 200 +9824 PF09992 DUF2233 Predicted periplasmic protein (DUF2233) COGs, Finn RD, Sammut SJ anon COGs (COG3698) Domain This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.10 25.10 24.80 24.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.76 0.71 -4.24 288 1904 2009-01-15 18:05:59 2007-07-30 16:34:37 4 62 1410 1 489 1560 149 177.80 21 44.80 CHANGED ppslhulN......u.....s...h....hs.ptt...................s.G.hhlp.......sG..phht...............sttttshhshp.........tsuthhh..............shpphth........tshpps.l..t.G...........P..hLl.p............sGp.......hh............t.sssst........................tsRoAlGhs.....pcG..pllhls....l-G.....t......tGh.olt-hup.lh...p......h.Gsh.s.....AlNLDGGuSoshh......................hth..h...spP....ssst.................Rslssslhlh ................................................................................................................................t..h.huhN..uu...h......ap.p.t..............................shG..hhlp............sG...phlt..................ttspststhhlp............................sGhhhl.................s.....h....................................................................pshp.u.l....suG...........PhLl.p......................................sGp.........ls..............p..tss............................................sppsRoulGhs..........ccG....pllhll...s-s........................uh.shh-hAp.hhps.......L..G.s...s.............AltLDGGuSoshhh..........................t..h.........s..............................R.l.shhh..t........................................ 0 212 355 421 +9826 PF09994 DUF2235 Uncharacterized alpha/beta hydrolase domain (DUF2235) COGs, Finn RD, Sammut SJ anon COGs (COG3673) Domain This domain, found in various hypothetical bacterial proteins, has no known function. 21.10 21.10 21.80 21.70 21.00 20.80 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.00 0.70 -5.03 90 1245 2012-10-03 11:45:05 2007-07-30 16:35:17 4 29 586 0 602 1402 86 226.40 21 51.26 CHANGED +...pl.l.lChDGTs.....Nshpsstp.............................oNVh+la.chl..........spss............pQhsa...............YpsGlGT...............................tpthpphhssAh...Gtuhspplh....pA........YpF.........LhcpY....psGDc...Ia.....lFGFSRGAasAR........sLA.shl..sphGL.........lp.......................hhhhsacthtp..................at.ttht.t.pt...................phhtthppphs........................p.ssplcFlGlaDTVsul........................................stahhp...............th.....ssth...........sst.................VcpspHAlulDEpRttFp.s....Lhp.......................................................................................................................................sssttsp.......ppVWFsG..sHuDl...........GG...Gas..............ttpttLSc.lsLsWMlpcA ..............................................................................h.lhhDGT........pp.ttt....................................................oNlhclh.phh...................tt........................................hh.h............Y............GlGo.............................................t....h.t.hhs.sh.......u..........s...htt...pl....tu.........................................................hth..................lhp.h..............ssp.......lh............................hhGF...SRGAhhsR..........hs..thl......................................................................................................................................................................................................................................................................................hplphl.GlaDTVsul...............................................................................................h..t..................h.....h.h........................s..................sphshHhlu..hc..EpR...ht.F..s..ht...........................................................................................................................................................................h.phhasG..sHu...Dl..................GGGh............................................hs...hsh...hh....................................................................................................................................................................................... 0 164 331 481 +9827 PF09995 DUF2236 Uncharacterized protein conserved in bacteria (DUF2236) COGs, Finn RD, Sammut SJ anon COGs (COG3662) Family This domain, found in various hypothetical bacterial proteins, has no known function. This family contains a highly conserved arginine and histidine that may be active site residues for an as yet unknown catalytic activity. 21.50 21.50 21.60 21.50 21.40 21.10 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.35 0.70 -5.08 128 1684 2009-09-11 16:50:31 2007-07-30 16:35:46 4 16 655 0 676 1614 392 240.60 18 70.56 CHANGED hshcsatchssh...hhsussullhphhpPtlstulhcp................usap..............p..csht........RltcTspalhsss...................au.sspputthttcVRthHtpV+us.......hss............................G....tsYsAhssplhhWstsshshshl.tuhpt....h.sh.lsss-t-phapphthluphlGl..c.h..Ppotsphtphhpphh.pt.lpsstps+pl.sphlht................hsss..hhh.......t.hhthhh........hhshshLsstspc.hlGlshsshtpth....hhhhhht........tshthl.ttl ...............................................................................................h.......p.......hhhshtulhh..p..h.hhPtlstulhcp....................upht.......................p..cs...hp.........RhtcTstalhsss................................as..ssp.....utt..httpV+.thH.tplcus...p..........................................................G........h..a.ps.s.stlhhastsshhhshl...pshcp.......h..s....t..............l.o.t.t-.t-phap.p..h.thhuph.h.............Gl................c..h.............Ptoh..........t..........pht..........p..........ahcp......h..........h................p.....lp...s..sttscpl..hphlht.................hsh....h..................t.ht..thhh..............hhshshl.s..shh..pc...hhs.l....hsshtpth.......hhh.hht...............hh........h............................................................................................................ 0 164 398 587 +9828 PF09996 DUF2237 Uncharacterized protein conserved in bacteria (DUF2237) COGs, Finn RD, Sammut SJ anon COGs (COG3651) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.40 25.40 22.90 19.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.54 0.71 -4.61 79 426 2009-01-15 18:05:59 2007-07-30 16:36:10 4 2 408 3 165 373 1622 108.90 51 88.89 CHANGED shNVLGpsLpsCussPhTGFaRDGtCpTsspDhGsHoVCAhhTsEFLpaS+tpGNDLSTPtPcasFPGLKPGD+WCLCAsRWhcAh-sGhAP.VhLcATHppAL-lVsL-sL+paAh ..................................t.hNVLGtsLps..Cu...p..c...P.....h.....TGFaRDGhCpTsspDhGpHoVCAlhT...sEFLpao+u.h.GN.DLoTPhPpa..s..........FPGLcPGD+WClCAsRWhcA......h........p...........s...........G........h..AP.VhLpATHppsL-hlsL-hLppaA.h............................ 0 60 121 149 +9829 PF09997 DUF2238 Predicted membrane protein (DUF2238) COGs, Finn RD, Sammut SJ anon COGs (COG3647) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.30 26.30 26.40 26.90 26.10 26.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.88 0.71 -4.80 67 737 2009-01-15 18:05:59 2007-07-30 16:36:24 4 1 709 0 132 449 45 140.30 50 68.56 CHANGED sllshslLhhoh+.+F..hoshsYhLlhlahhlhhlGuH.YTYAcVPhhsW........lp-hhGhpRN..pYDRluHFh.Ghl.Ahsh+ElllRpphl......ps..thhhhhslshshulSAhYEllEWhsAlhs.G.csutAFLGoQGDlWDsQpDMhhA ...............................................................llllslLlsTt+.Ras.L.TsLhYsLIFhasllLhVGGp.YTYAcVPls...............lp-hl.Gh.o...RN....sYD+LGHFhQGLlPAlls...RElLlRthhl........+s.tthlsFLlsslsLAlSAhYELIEWWsAlsh.G.puA-s.FLG..TQGD.WDTQpDMhsA................. 0 36 83 113 +9830 PF09998 DUF2239 Uncharacterized protein conserved in bacteria (DUF2239) COGs, Finn RD, Sammut SJ anon COGs (COG3644) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 35.60 29.20 20.40 19.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.27 0.71 -4.68 37 209 2009-01-15 18:05:59 2007-07-30 16:36:37 4 3 197 0 74 206 12 178.10 51 91.18 CHANGED tsaTAFpGpRRlAoGsLh-VALAl+...tths.tsssuslLlFDDsTGRslDlDLRGoss-lhARhs.sss......................stsut...........PR...GRGRPKLGVVAREVTLLPRHW-WLuuQPGGASsALR+LV--ARRsssspDRtRtAp-AAY+FMoAhAGDLPGFEEAsRALaAsDtsphsphhtuWPsDlRsHAhtLA .......saTAFsGpRplAuGsLspVAlAlKpths....ttssuslLlFDcsTG+slDlDlRGosp-lhuRhs.s....................................tsttsR..GhGRPKLGVVAREVTLLPRHW-WLusQPGGASVsLRKLV-cARRspsst-+tRtAp-tAY+FMSAhAGDhPGFEEAsRALaAsDtsthtphIsuWPsDVR-ashtLA......................................................................... 0 14 32 54 +9831 PF09999 DUF2240 Uncharacterized protein conserved in archaea (DUF2240) COGs, Finn RD, Sammut SJ anon COGs (COG3612) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.00 20.00 20.10 21.60 19.70 19.60 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.50 0.71 -4.75 16 65 2009-01-15 18:05:59 2007-07-30 16:36:58 4 3 65 0 51 72 48 134.50 29 87.88 CHANGED pL+hsVAAPF+p..ptsspLscs-FlauLohDp+WhSP-pApcllctAtppGLLpp.csGslsssFDsuslplPhuF+Pscslh.................ppcssaEclLDtlsusuGls+p-lVuclNp.hp-pls..lsh-sAulllA+chGlDls....shhpcl .................LphslAsPFcp..cupsplscsEFlhsLohDhcWhSs-pAKcLl-hAhpcGLlpc.csstlhssFDssplplP.sFpPs..tphh.................pccssF-cll-hlss.tGhs+pEslutlNp.hpcchu..lsh-sAAllhA+cpGlDlschhpc.h....... 2 10 34 44 +9832 PF10000 ACT_3 DUF2241; ACT domain COGs, Finn RD, Sammut SJ anon COGs (COG3602) Domain This domain, found in various hypothetical bacterial proteins, has no known function. However, its structure is similar to the ACT domain which suggests that it binds to amino acids and regulates other protein activity. This family was formerly known as DUF2241. 25.00 25.00 28.70 28.20 22.90 18.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.50 0.72 -4.25 48 273 2012-10-02 00:29:19 2007-07-30 16:37:16 4 9 265 4 97 252 79 71.00 38 48.18 CHANGED MsGEp-LspLLpoMpPpLpsupaVFCols......shst.hsLpsl.usF+EpEGlTllLpcppAcptGL.saphsh+h ........MsG.psLptLLpoMsPpLpsGsYVFsTls............shssh.sl..pPl..uoFREs..EGLTLllppcpApptGL..shphhht................ 0 19 48 76 +9833 PF10001 DUF2242 Uncharacterized protein conserved in bacteria (DUF2242) COGs, Finn RD, Sammut SJ anon COGs (COG4259) Family This domain is found in various hypothetical bacterial proteins, and has no known function. 25.00 25.00 80.60 80.40 22.70 17.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.53 0.71 -4.24 15 159 2009-01-15 18:05:59 2007-07-30 16:37:31 4 1 154 0 57 182 20 120.40 59 41.58 CHANGED CEAARRALLSQGYllssucsctV-GpKsFQ.PcsDsHlplphRVVCAscshcushollFssALQDRYALKKosNSASVGVGuLGSlSLPhuSscDoLVKVuSETIsuupFY-RFFpLlc+YL ..CEAARRALLSQGYllouuc.sctV-GsKsFQ..PssDsHlpIsF+VVCAss..s.....tDGspShsaVNAlQDRYuLKKosTSASVGluVLGSlSLPIGSoDDShVKVASETlsuutFY-RFFsLV-paL.. 0 5 21 41 +9834 PF10002 DUF2243 Predicted membrane protein (DUF2243) COGs, Finn RD, Sammut SJ anon COGs (COG4329) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 55.00 54.50 20.50 20.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.05 0.71 -4.41 22 165 2009-09-14 14:06:24 2007-07-30 16:37:41 4 1 162 0 73 156 1 137.90 42 79.08 CHANGED uGlLlGlGLuuFlDtIlhHQlLQWHHhhsp.......................shclshl.DGLFHuhoalhslsGlhlLh...sh+pchsass+thhGulLlGhGhFpLh-GllsHplLtlHpV+....ssphLhaDlua.s.hGhlhllhGhhLlpps.pp ......uGlLhGlGLs.uFlDtllhHQLLpWHHhhsps.......................shclslluDGLFHAhoahhslsGLalLh...sh+R...+tsash+thhuulllGhGsFpLh-GllpH+lLtlHplRh................ss.shLh....aDlsa.l.huslhllhGhlLhhpsts..... 0 18 47 61 +9835 PF10003 DUF2244 Integral membrane protein (DUF2244) COGs, Finn RD, Sammut SJ anon COGs (COG5488) Family This domain, found in various bacterial hypothetical and putative membrane proteins, has no known function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.45 0.71 -4.81 79 402 2009-01-15 18:05:59 2007-07-30 16:38:12 4 2 380 0 144 369 562 136.80 29 81.50 CHANGED LpPppSLs.cuahhhhsshsslshhsulshh.hhGu.WsllsFhGL-lhulhhAhphshRpupttEplslsstphhlh+psspGptpcachNshWs+lphctpst..h..tlsLpupG+cVclGsFLu.--RtsltppLppAL ............LpPppSLu.ptFhhhhsslsshshhhuhhhh.hhGA.WslhsF.hGl-.llslhhAFthsh..R.pApshEcIslst.phhlhchsssu.chppachNPhWsRlchtppsc................tlpltucGcpl.lG.p.FLs.c-RtphAptLptuL......................... 0 33 78 107 +9836 PF10004 DUF2247 Uncharacterized protein conserved in bacteria (DUF2247) COGs, Finn RD, Sammut SJ anon COGs (COG4304) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.40 25.20 23.40 24.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.82 0.71 -4.53 10 135 2009-01-15 18:05:59 2007-07-30 16:38:36 4 1 131 0 18 82 1 158.70 35 97.50 CHANGED Mppsh...........hhtpthchsW+slhhGhppph.....lupcsVspaAhchlshuspspp...ElhLtlstchsspclsplLssLss........p.ppphptsh+KWh.alhLshlacspp-hsDsLcclEcIYADFsYPE-ltpFlpYMPscs...s..hs.ppNpcRlhspaccaLcp-puc ...................................................l........-hpsp+l+LSWcDIh...WGYpp....Kh.........luassls..saA.ch...hohu-pspt.....hcL.uh.ts+.sh.Elp.lL--Lus.............cpcsho.cpWL.all...Ls...cl.F.pp..+..cpap.-PLtcVEcIYsDFDYPE-I-SFVpYMPspDt..h.PstaohcENhtRLascWccYLssts..t...... 0 4 12 16 +9837 PF10005 DUF2248 Uncharacterized protein conserved in bacteria (DUF2248) COGs, Finn RD, Sammut SJ anon COGs (COG4307) Family Members of this family of hypothetical bacterial proteins have no known function. 22.50 22.50 22.70 22.70 22.20 22.40 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.21 0.70 -5.52 49 377 2009-01-15 18:05:59 2007-07-30 16:38:49 4 1 332 0 127 355 70 318.00 43 91.29 CHANGED pCssCup.laF-NstChpCustLGasP..pptplhslps.....ssps.....ap..................................................................................tha+hCsN.tshssCNWhlsscss...pshChuCchs+slPDLup...spNhttWpclEtAKRRLlYsLh+LGL.....Plhs+.....p..DsttGLsF-FLu-.....sssts.VhTGHssGlITlNluEADDucREphRpphsEPYRTLLGHFRHElGHYY...WspLlt.ss..shLttFRslFGD.EctDYutALppHYpsGP.P.ssWpppalSuYAouHPWEDWAETWAHYLHlhDsL-TAtuaGlplps...p.......th.........hshcPhpss.........shpp.llstWlPLohulNulNRSMGpsDhYPFVLsssVlcKLpFlHpll ...................................pC.pCsp...lhFENstChpCustLGa..pphtlhslts.....stts.......h....................................................................................t...........t.hphCsN.tthstCNWll..s.......ssss............ssh....ChuCphscphPs.ss...sts.htpWt+hEsAKRRLlhpLhcLtL.Plhs+....pp..DsptGLuFchLu....................sstpt.VhTGHssGlITlsluEuDDscREphR.hp.MsEPYRTLLGHFRHElGHY..Y.aspLlt....ss....s....hLptFR.plFGD.-ctDYstALp+HYpp.GsP..ssWp-sal.S.u.YAThHPaEDWAETaAHY.LHIhDsL-TAtuhGlsht................shp..........hshsshtss....sFpp.llp.tWlPLohulNplNRSMGpsDhYPFVLsssVlcKLcFlHpll.................................................. 0 35 76 101 +9838 PF10006 DUF2249 Uncharacterized conserved protein (DUF2249) COGs, Finn RD, Sammut SJ anon COGs (COG4309) Family Members of this family of hypothetical bacterial proteins have no known function. 24.40 24.40 24.40 24.40 24.30 24.00 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.25 0.72 -4.40 163 821 2012-10-01 20:42:06 2007-07-30 16:39:03 4 21 465 0 266 571 35 68.20 30 60.34 CHANGED plDlRsl..sstp+astIhsshssLtsGcs....hhllsc+-PhPLhtplptc......uh.saphhppssst.aclplp+ ..........LDlRsl..sshp.astIhushssLpsGps....hhlls..s+..cPhP..L.htpLcpc.........Gt.paphhppusst...Wclpls.................................... 0 76 190 239 +9839 PF10007 DUF2250 Uncharacterized protein conserved in archaea (DUF2250) COGs, Finn RD, Sammut SJ anon COGs (COG4344) Family Members of this family of hypothetical archaeal proteins have no known function. 24.40 24.40 24.40 24.40 24.30 24.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.90 0.72 -4.08 18 100 2012-10-04 14:01:12 2007-07-30 16:39:16 4 4 63 0 53 98 6 90.80 35 76.76 CHANGED hcplhpc.hhLplLpHLccuslDYuKslschoclPLpcVpchLccLcchGLlE+ss.usolKpo-sKhKhupEVH+HHTYYpLoRcG-hlLRcl ...................h.....c.htlpILtaL.cc.h.s.sDY.u+h..lA+.....p.....h........chsLp-lpchlccLEchGLlERsp.up.......hl.Kps.....ct.......+h..K........pp.Es++...HHs....YYcLoRcG-hllRp................ 0 15 32 47 +9840 PF10008 DUF2251 Uncharacterized protein conserved in bacteria (DUF2251) COGs, Finn RD, Sammut SJ anon COGs (COG4316) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 33.70 41.00 22.80 16.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.21 0.72 -3.87 9 172 2009-09-10 23:17:17 2007-07-30 16:40:18 4 1 172 0 20 82 1 95.30 56 69.74 CHANGED hltupS..ps+huVVFEDDG-TGYFYALDhp.p.tpPIlDuLalYNVpslo..stctPpclpItWS-DGhpAhLlINGYPHAlFDFsphhGYs+otaP.P ........h..topu..cthhusVFEDDGpTGYFYAhD.p.....p......tssIlDALHIYNVE.DlS..Dt..H..I........PscVcIsWsEsuphssLLINGYPHAsFDFsppsGYCRsGFP.P......... 0 6 14 19 +9841 PF10009 DUF2252 Uncharacterized protein conserved in bacteria (DUF2252) COGs, Finn RD, Sammut SJ anon COGs (COG4320) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.30 22.30 22.40 22.40 21.70 22.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.18 0.70 -5.56 62 553 2012-10-02 22:05:25 2007-07-30 16:45:36 4 4 397 0 238 582 38 375.30 28 85.90 CHANGED R....hspLlshRat+MusSPFuFaRGoAtlhtpDLss........ssssuhps.lCGDsHltNFGhau...ss-tpllFDlNDFDEshhGPapWDL+RLusSlslAucpp.uh.....................scppspphlpshspuYpcphpphuphshhphh..................hpspsspthlpchhc+App+.sptphhs+hophs........sthRhhp.psstlhtls................pstpptlppthppYhpols..sscp.hhsp....aplpDlAh+l.sGlGSlGhpsahlLlpuptps...c.sl..lLplKcAptSslstah.....tt.....tppGcRVVtuQRhhQusuDhaLGasphss..............+...s......FhlRplpshKsslsh..cp...h....st..cphtpauchhGtsLA+AHA+.us...css................hlsshhupscphs..pslscaAhpYAcQscpDattahcs .................................................................spLlslRat+Mus..SPFuFaR..GoAtlhtt.DLsp.............................tsssuhtl.lsGDsHltNFGhas...ssctpllFDlNDFDEshhGPapWDl+RLusSlslAucpp.uh...................sppptpphlpshspuYcpphpphsphsh.phh.........................................................htsppspthlpchlp.+Appc..s......ptphhs+hTphs.t...........sth+hh...t..psshhhtls.........................psptt.tltshh..p.p..Y.h...po..l.......tst...th.h..hsp....aplhDlsh+l.lGlGSlGhpsahlLlpupsss.....s.sl......lLplKEAt.s..ushs.ah...............t...............tppGcRVVtuQRhhQssuDshLGas.p..hss...............+.s......................ahVRphpsh+tslch..sp.....l......s.....pphtthuphhutsLApAHA+.us....sss...................hlssh.h..u..p..s.cphc....pslspaAhtYuspsctDattahp................................................... 0 67 139 199 +9843 PF10011 DUF2254 Predicted membrane protein (DUF2254) COGs, Finn RD, Sammut SJ anon COGs (COG4325) Family Members of this family of bacterial proteins comprises various hypothetical and putative membrane proteins. Their exact function, has not, as yet, been defined. 22.30 22.30 22.50 22.70 22.00 22.20 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.94 0.70 -5.99 89 512 2012-10-02 20:27:15 2007-07-30 16:46:06 4 3 472 0 161 484 58 341.10 25 80.16 CHANGED lppplWhhPslhulhullhuhlshhl-t....hhs..........hshhhtsss-uscslLoslAuohluVsshohSlhlsulstASuphoPRll.phhhpDpssQhsLusFlGoFlYuLlsLhslcssh.t................sspls.h.hhslllsllslhsLlhalcHlsphhplssslppltppsppslpphhpp.thss....sspsth.ts.......................ltutpsGYlQtI-hpsLtchApc..psshltltstsGsFVhtupslshh.......................pth..sccptcpltps....hslGppRohpQDspFGlppLsEIAhRALSPGlNDPsTAlsslspLsplLsp....................h.sshhtspps...sRlhl.shs.h..pcllcpuFstlpp.uussht.....Vhh+lhpsLtpl ...........................h..pplWhhsshhslh.ulhhuhlh.hh.hct.hhs............thhhps.ss-sscslLssluuohlslsshshSlhlsuhs.AouphoPRhh.phhl.c.Dp.ssQssLuhFluoFlaullslhslptst.st..............hspls..hhhslhlhhls..lhsllhalp.+lsp.hplsphhsplpptsh.p.s.lpphhtp.s..phs...s..............s.t..t.......t.ths.....................................lhutpsG....YlptlchspLtp....hs.pp......pph.plhlhstsGsalh.utslhhl...........................................tt.s....p.p..h..pp....lhps....hhlutpRohpQD.pFulp.lsEIAhRALSPulNDPsTAlpslsplsplLsh...............hht.t.sp.hhh..ttt................plhl...hs.h..pchlcssFs.ltp.uuspht.....VhhplhpsLtt.............................................. 0 56 107 135 +9844 PF10012 DUF2255 Uncharacterized protein conserved in bacteria (DUF2255) COGs, Finn RD, Sammut SJ anon COGs (COG4334) Family Members of this family of hypothetical bacterial proteins have no known function. 29.70 29.70 29.90 29.80 29.00 29.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.41 0.71 -4.13 14 98 2009-01-15 18:05:59 2007-07-30 16:46:22 4 3 93 0 40 99 4 111.40 34 86.82 CHANGED --Lc+IspA-DL+IAPaRcDGpThuTPTWIWsVhV-scLaVRuYpGpsScWapuAlsQ+AG+IpAuGhst-VsF-sl...Dssls-pIDsAYRsKYup.StYlsPMls.tcARuATl+lh ....................-Lsplspu--lpluPhctDG.ThtpsshIWsVhlsscLYVRuhpGpp.SpWYpuAhsppsG+IpsuGhphpVsFtss....DttlpsplDpAYRpKYut..s.hlss.Mls.ttsRsuTl+l.h............. 0 16 31 34 +9845 PF10013 DUF2256 Uncharacterized protein conserved in bacteria (DUF2256) COGs, Finn RD, Sammut SJ anon COGs (COG4338) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 25.20 26.80 24.60 21.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.61 0.72 -4.20 71 341 2012-10-03 05:12:49 2007-07-30 16:46:39 4 2 336 0 116 321 140 41.70 59 69.36 CHANGED t+KspLPp.KhCslCtRPFsWRKKWp+sW-pV+YCS-RCR..+p+ ......+KspLPp.KlCsVCtRPFsWRKKWt..+sWD-V+YCSERCRRp+..... 0 42 83 101 +9846 PF10014 2OG-Fe_Oxy_2 DUF2257; BsmA; 2OG-Fe dioxygenase COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4340) Family This family contains 2-oxoglutarate (2OG) and Fe-dependent dioxygenases. It includes L-isoleucine dioxygenase (IDO) [1]. 23.10 23.10 23.40 23.30 22.30 22.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.20 0.71 -4.92 113 432 2012-10-10 13:59:34 2007-07-30 16:46:58 4 4 397 2 130 367 28 193.40 28 79.36 CHANGED htssass.LshDpahsst...aRhRRaupathpsss......lhth.......scpsahQssp..aNth..pGul.RpFpsls..sshhpssshppllphhhphhsthp...........pshplplHQhRlpsssst...upsoPEGlHpDGhDal.hlhhlsR.pNl...pGGpopl...........asssppthhphpltcsh-slllsD..pclhHtlTPlpshsssp.......uaRDlLlloa ..........................................................................................tssasp.LshDsahsst........aRhRRYuphthpssp..........lh.h.........scpsahQosp....hNth..pGslhRpFpslp..sshlpssshppllthhhphsshss............shplplHQhRltAs.sp....u.ssPEGlHpDGhDal.slhhlsR.pNl.....tGG-shl...............hps.s.p.ptshhhplhcsh-shllsD..pclhHssoPlpshsssp.......uaRDlhVlT............ 0 28 70 103 +9847 PF10015 DUF2258 Uncharacterized protein conserved in archaea (DUF2258) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4345) Family Members of this family of hypothetical bacterial archaeal have no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 61.80 61.80 22.80 18.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.44 0.72 -4.17 10 57 2009-01-15 18:05:59 2007-07-30 16:47:08 4 1 53 0 39 54 0 76.90 44 43.67 CHANGED ELsTGlVIAuuYADKLRRVlFAsl.....pstlssc-llRssuELN+pLF-hLVpchclsKhDVVRIpV-..Yslc-uKlha .ELsTGllIAutYADKlRRshaAth.....pphlsp-pllRssuELN+tLYcpll.chplsKhDVVRIsV-..aclcss+lha....... 0 9 18 27 +9848 PF10016 DUF2259 Predicted secreted protein (DUF2259) COGs, Finn RD, Sammut SJ anon COGs (COG5497) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.70 26.20 19.10 24.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.14 0.71 -4.59 13 87 2009-01-15 18:05:59 2007-07-30 16:47:21 4 2 83 0 35 83 3 188.10 26 82.36 CHANGED AFtpaGlpDGSGFPYAslahlDstpspFl.sushRsphsc-su.......sLsthhpcspppspshh.stthpscsshhss.sshoEh.........oucsaplhlhs+ss.sshctshpLplpp...hshs.st.htsht.cshGFpL..htt.suspsphlpts+ulP.uRsCshsYcIcpVhl...psspsshlhlltscphGFEG.PstR..alslss+ .........hFtpYGhp-G...h.sYuslYhlDlspssFlpu.ushppphpccht.......sLh.p.hhpptphtspphthsth..hpup.hhhhsp.s...lsEh.........................suspaplhlshch..s...tthsssaplpLp.....................................phssushpc.tlpss+s.sh.RtsshsY+Icclhl....sspsshlhllchh.hs.pG.tshR..ahs.sh+........................................................................ 0 11 22 26 +9849 PF10017 Methyltransf_33 DUF2260; Histidine-specific methyltransferase, SAM-dependent COGs, Finn RD, Sammut SJ anon COGs (COG4301) Family The mycobacterial members of this family are expressed from part of the ergothioneine biosynthetic gene cluster. EGTD is the histidine methyltransferase that transfers three methyl groups to the alpha-amino moiety of histidine, in the first stage of the production of this histidine betaine derivative that carries a thiol group attached to the C2 atom of an imidazole ring [1]. 23.00 23.00 23.20 24.50 22.90 22.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.37 0.71 -4.25 314 818 2009-01-15 18:05:59 2007-07-30 16:47:50 4 20 692 0 412 836 262 124.80 34 32.13 CHANGED GlDLh....K-sshL.sAYsDutGVT.....AtFNhNl.Lp+lN+cLs...uc.............FchssFcHh......Aha..sspps...c.lEMaLhupcspsVpls......s........hsh..pFttGEpI+....TE.ShKash.....cphptlhpp.................AGhpstphWsDsps.hFulhl ...............................GhDLsKDsshLhtAYsDutG..VT.................AtFNhNl.Lpcl..NccLs..u-.............Fchcs.FpHh........Aha......ssppp...R.IEMaLhupcspsVpls.........t.........shthpFptGEplc....TE.ShKas......cphpthhpt.................A.GhphtphWoDsps.Fulh........................................ 0 120 255 343 +9850 PF10018 Med4 VDRIP; Vitamin-D-receptor interacting Mediator subunit 4 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4552) Family Members of this family function as part of the Mediator (Med) complex, which links DNA-bound transcriptional regulators and the general transcription machinery, particularly the RNA polymerase II enzyme. They play a role in basal transcription by mediating activation or repression according to the specific complement of transcriptional regulators bound to the promoter [1][2]. 25.50 25.50 25.60 26.10 25.20 25.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.32 0.71 -4.89 33 292 2009-01-15 18:05:59 2007-07-30 16:58:54 4 8 248 0 212 284 0 182.90 23 60.52 CHANGED hphhppLlst-cpLppslcpLpcapchppclppLcp-spplDppl+pllcpLtshcppLpshs......t.pp........................tshsptppppls......sc-LLcYA++ISKaops........................................................................................htPssah........t..h.Pa.PtE-phRtGhLuphshtss...t..h.pstpp.....................................ssstsptpsptttpppss ................................................t.thhphLlptDpplpptlch.........h.tpptph.ppchppLcp-scphD.p....c.lppl.cpLpcscp.Lssss.......sppch..........................................pshtpspcttl..s......sc-llcYA+R.ISttsts....................................................................................................................tP.sah...........t..hhPa.Ps-.phRtGhLuphp...st.th.............................................................................ttssttthttt.............................................................................. 0 60 104 170 +9852 PF10020 DUF2262 Uncharacterized protein conserved in bacteria (DUF2262) COGs, Finn RD, Sammut SJ anon COGs (COG4296) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.60 25.60 25.60 26.20 24.70 25.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.99 0.71 -4.01 50 250 2009-09-11 11:06:32 2007-07-30 17:17:49 4 5 177 0 44 249 1 141.30 24 59.29 CHANGED p-phlGsFphs+pht..ta-uphphh.spp.lplph...............t.stpccpphpp...slp....thcc...................l.......hpphcchccpl+phsucc..ll-lAN-.Whps..................s-p................................loc-cFhpplpLs...slslpscu.........shphaasDs....DhFhGHslhVpsshstshp..susl .......................................................hGphhhs+php...hapsphphh...spp.lpl.h..................stppppthpp...hh.pthcc...........................hhtphcpac.pch+phhucc...LlchAN-.Whc.p..................c-pt...............................lTcEpFtpplp..lp...slslptss.........shshaacDs....DlFhGHsIhlshshctslhsApl.................. 0 24 33 34 +9853 PF10021 DUF2263 Uncharacterized protein conserved in bacteria (DUF2263) COGs, Finn RD, Sammut SJ anon COGs (COG4295) Family This domain, found in various hypothetical bacterial and eukaryotic proteins, has no known function. 19.30 19.30 20.80 19.80 19.20 18.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.92 0.71 -4.21 20 353 2009-01-15 18:05:59 2007-07-30 17:18:14 4 11 248 4 211 358 19 139.00 26 40.87 CHANGED ApETlsll...........ssGhYlsssustlslptphctu.hpsophasPs.h..............s..pTtlcVssssThsuA....ppLsp........ssppVslLNFASA+NPGGGalsGApAQEEsLsRsSuLYhsLhp.....h...Ythp+tppshhYoDth...IYSPsVPlFR-c .............................................................................................................p.hh...t.........................................tt.splpVh.s.tsolpuA..................hpLtt........................tttpluVLNhASsppP..GGG.ahsG.At.A.QEEsLC+pSsLhssLpp......h...Y......p...t..........t..t.....hYs.th..................IYSPsVhVaRs....................................... 3 74 132 181 +9854 PF10022 DUF2264 Uncharacterized protein conserved in bacteria (DUF2264) COGs, Finn RD, Sammut SJ anon COGs (COG4289) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 30.60 26.80 24.40 23.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.98 0.70 -5.80 52 781 2009-01-15 18:05:59 2007-07-30 17:23:14 4 6 507 0 203 599 25 287.20 29 59.32 CHANGED psRpp.............hhpshpplhpslhshh.upspt+hsh.spossta....sppss..s.lEuauRsLhGlAPhLpt........tt........phh-hhhcultsGsDP...pss-YWshhpst.s...QplVEsAsluhuLhhu.ctlWpsLsptp+pplhpaLpphp.phphspsNWhhFplhlphsLpph.Ghth-p...tlcpslpclcp.aYl...........GDGWYsDGs.........................thphDYYs.uauhHhhhlhhsphhscpc.t.........hhpphppRtpcFuppa.+hhus-Gsh.saGRSlsYRhAshuhhuthuhtph....tsl.s.Gpl+shhppsl+hahsps.shasps......GhLolGashsp.thsEsYsusGSsYWuh+uF.lsLuLPtscsFWoutppshs .....................................t.sht.h.thh.phhtsl...h.s.tt.....thph.stttshh............sptts..p.hEuhsRshhshushht.......................................hhphhhpulhtGsc.P...pp.s.pY.h....s...ht.sh................QhhVEhu.luhsLhhs..p.hWp.Lstppppplhpah.t.p.hp.php...hs..sNWhhFplhlphhLpph.Gh.hst..............hp.thth.hc.p..aYh...........GsGWatDGs.........................th..thDYYs.uashp.h.h..hhphh.ct.t..................hthhhpRht.astt...h.hhhs.p.....G.th...hGRShsYRhA.ht.huthsht........th........h.s.G.h+..thhtttl+hahpp...thhstp......GhLolGa.h.....h...p.h...hu-sYsusGSsYWuh+sF.lsLulPtscsaWps.tpsh.................... 0 61 135 180 +9855 PF10023 DUF2265 Predicted aminopeptidase (DUF2265) COGs, Finn RD, Sammut SJ anon COGs (COG4324) Family Members of this family of bacterial proteins comprise various hypothetical proteins and putative aminopeptidases. Their exact function, has not, as yet, been defined. 25.00 25.00 26.30 25.30 21.60 21.40 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.99 0.70 -5.30 26 162 2009-01-15 18:05:59 2007-07-30 17:23:26 4 1 160 0 64 186 97 334.50 43 92.38 CHANGED hhhlhhshhLuGCss.......luYYhQuspGplplMpttcPlcchlsDs..sssstL+pRLthupphRpFAsp-LtLPDNsSYRpYA-LpRPaVVWNVhAsPchSLp.+pWCFPlsGCVuYRGYFspssA+stAspLpppGhDstVtGVsAYSTLG.....WF....sDPLLsTFlt.as-s-LApLlFHELAHQslYlpsDTsFNESFATsVEp.GlcpWL...A............ppusssscspatthcpRRpQFpuLlhsTRpcLptLYtsshsssptptt....KtthhpphRpcYupL+sp.W..................uGhuuYDtWhspslNNAcLushusYsphVPAFpuLacpsstc............WsRFYsuVcpL..upLPts-RctsLp ...............h..hhhshhLuGCss.......luYYhQ.spGphpLlps+cPlsclluDP.....spsstL+p+L..tpupphRpFAoccLtLPDNpSYRhYA-LsRPaVVWNVhAsPEhSLpshpaCFPlsGCVuYRGYaspusA+upAstL+tpGhDVhluGV.AYSTLG.....WF....sDPlLSohlt.as-...t...cLAsLIFHELAHQphYlcs....................DTtFNESaAohVEppGscpWh....t...............tp..u..t.s..stpsph..pp...tRRpQFttLlLsoRpRLcsLYss.s..l..s..sstp...Rst....KsthFppLRpcYtpL+sp..W......................uG...ptaD.t.Whsp.P..h...NNAcLhshuhYcpaVPAFtsLF.cps.sGD............WspFYstVcpL..up..LPh.tpRptsL................... 0 16 35 51 +9857 PF10025 DUF2267 Uncharacterized conserved protein (DUF2267) COGs, Finn RD, Sammut SJ anon COGs (COG5502) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.20 25.20 25.20 28.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.64 0.71 -4.06 135 355 2009-01-15 18:05:59 2007-07-30 17:24:29 4 7 195 1 178 362 14 123.70 24 78.49 CHANGED chp.palpclppcssh.ss............pppAhpsscuVLpsLtcRLsscputcLAupL..............Ph.lpshhhp........stttssp.........hstccFlp+lup.h...............pt..spt........sucpsspAVhsslpctlstsch.cclh.spLP.....p.shcpLa .....................hppalpplppcssh..s............cppAhpsscuVLpsLt-RLssp.......pstcLuupL..............P..lpshhhp..............sttsstp..........hshccFlp+lup..h................ss....sst........ssctsspAVhsslpctlstsph.cclhspLP.....p.shptLa................... 0 41 114 159 +9858 PF10026 DUF2268 Predicted Zn-dependent protease (DUF2268) COGs, Finn RD, Sammut SJ anon COGs (COG5504) Family This domain, found in various hypothetical bacterial proteins, as well as predicted zinc dependent proteases, has no known function. 23.60 23.60 23.60 23.60 23.50 23.40 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.09 0.71 -4.93 29 528 2012-10-03 04:41:15 2007-07-30 17:24:48 4 1 428 0 84 353 10 181.50 33 68.64 CHANGED happhpcphpphtphapts.slslh.......lh.sstpp.hht.phGhpussuhsshlhlh..lssshsps.....clpullAHEaHHssRhphhphs.tshTLh-sllhEGLAEpastphaGcphhusWss.hspppLpphhpphlcpphchp...shtphsshLaGsthut........hPthhGYulGYplVppaLppsst.oltchshhsucpIlc .........................................................................apphpp.hchhhs.hpt..sls.........lLl.uss.tp...hh.........p.h.sht.ussu.hsshlhlh...lssp.ols........phpullAHEhpHshRhp..h...............l..chp.....................s..hoLh-hllhEG....LAEpaspplaGc....tt.h..uPWso....ths..............p......ph.............hhcp...hltpplplp...thhchtsaL..a.G.....hsp..............hPph...hGYAhGY+lV+talppssh...sh.th.hsls.ApcIl.................................... 0 26 58 68 +9859 PF10027 DUF2269 Predicted integral membrane protein (DUF2269) COGs, Finn RD, Sammut SJ anon COGs (COG5528) Family Members of this family of bacterial hypothetical integral membrane proteins have no known function. 27.80 27.80 27.80 27.80 27.70 27.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.06 0.71 -4.43 56 382 2012-10-01 21:57:53 2007-07-30 17:27:15 4 1 316 0 152 427 95 142.60 26 94.20 CHANGED hlLKhLHlluuslLhGoGhGhAaahhhAp................................cotcstslAtss+hVVhuDal..FTssusllQPlTGhhLsals..GasL..spsWlhhSlsLYlluGshWLPVlh.lQh+hp...chApsAspssts.ls.tYhphh+hWhhhGhPAFh.uhlsIhaLMVsKP ...........h.hlchlHlluuslllGsshsh...A.hhh.h.h..Ap................................+su..s.s...ss..hu.t..s...t+h.ls.hschh.....h..h..ss..s.slh.PloGhhlsthh.....Gh....sl....sp..s..Wl....lhSl.sLasl.u.u.h.h.Wlsllh..hphcht..........chAtt..utt..tst.......ls.th.h..phhp..hhhhhuh....uh.h...shlslhhLMlhKP.............................................. 0 36 81 113 +9860 PF10028 DUF2270 Predicted integral membrane protein (DUF2270) COGs, Finn RD, Sammut SJ anon COGs (COG5530) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 40.70 68.60 22.80 21.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.06 0.71 -4.81 19 87 2009-01-15 18:05:59 2007-07-30 17:27:43 4 1 68 0 51 93 11 180.30 40 77.60 CHANGED uHLYRGElsRussWRoRLDpTTNWAVsssussLShuFuospus.hslLlshlhlhhFLhlEARRYRaa-laRuRlRhhEpsaaAshLsstths.css..WpphLApDhp+Ppa+lShhcAluRRLRRNYhaIhhlLhlAWhsKlslHPh.........stShsphlppAulGPlPGhhllsssshahhshlslul .hHhYRGElsRhssWRsRLDpTTNWAlsshAAhLShuhSossssHhsLLhuhhllhlFLhlEARRYRaaDlaRuRVRhlEcsaaAthLss.tss.sss..WpphLup-hcpPphplohh-AluRRLRRsYhalhhlLLlAWlhKls..s............stshsphhpsAulG.slPGhhVhsslshaYsshlsls.h.... 0 9 29 42 +9861 PF10029 DUF2271 Predicted periplasmic protein (DUF2271) COGs, Finn RD, Sammut SJ anon COGs (COG3656) Domain This domain, found in various hypothetical bacterial proteins and misannotated lysozyme proteins, it has no known function. 27.30 27.30 27.50 30.60 27.10 27.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.78 0.71 -4.68 62 357 2012-10-03 16:25:20 2007-07-30 17:27:59 4 5 342 0 130 295 47 138.00 31 75.56 CHANGED Asshpl..slplsplsstpactPY.....VAlWlpsu.csphspTLtVWhpps........+Wh+-.lRpW..WR....csucptp..lDGloGAT+ss.Gphthshsssts.hssLhsGpYplhlEsuRE.sGscph.....hclshsl.....spststphpGpsElusl ...................thslslpl.pLssh.thctsY.....VAlalpcs.puphstoLhlhstps.................KWhcc...L+pW...aR....tsu..ts..sp...lD...GlTGAThsu.Gc...shch.shchscsLhsusYplhlEuAhE.ctscph.........sclshsL.....ssputshpspGpp.lus.............................................................. 0 37 87 110 +9862 PF10030 DUF2272 Uncharacterized protein conserved in bacteria (DUF2272) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4322) Domain Members of this family of hypothetical bacterial proteins have no known function. However, given its similarity to the CHAP domain it seems likely that this is an enzyme involved in cleaving peptidoglycan. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.56 0.71 -4.82 13 136 2012-10-10 12:56:15 2007-07-30 17:28:21 4 5 128 0 47 156 22 179.70 29 62.05 CHANGED EssshuhpplutYW...t-ussscshhsGs.su.....................asWSAAFISWVMRpAGlss.pF.hussHusYlssAh...psGhsshshhth-PssttPcPGDLlCssRGRsphlsasuhhssss......asuHCDIVVu....sDs..pplpsIGGNVpsSVuMcplsLscuGpLsss...............................sss+hsWhllL+s ....................................................t.hhhthsttYW......p.shht..hst.......h..h..s.....................sWSAAFISWVM+pA....G..lss...Fshu.ss.H.hpYlpsAh.......pss....p......sh.hhh-s..ss.htP.psGDLlChsRups....h...t.h.ss.h.tshs...........hstHCslVVu....scs...cplphIGGNV..t.soVs....hpphslspt...Gpl....................................shh.WhhlLc............................................. 0 8 32 40 +9863 PF10031 DUF2273 Small integral membrane protein (DUF2273) COGs, Finn RD, Sammut SJ anon COGs (COG5547) Family Members of this family of hypothetical bacterial proteins have no known function. 25.40 25.40 25.50 25.80 25.30 25.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.10 0.72 -4.35 33 978 2009-01-15 18:05:59 2007-07-31 08:34:00 4 1 886 0 111 282 4 49.80 39 73.55 CHANGED p-hhppaph.llGullGhllAllhlohGFaKslllllhsslGhhlGhhl.cp ...........phhcpa+h.IIGuLlGLllAlLhlolGFaKTlllllLshlGlhlGhhl-...... 0 47 72 97 +9864 PF10032 Pho88 Phosphate transport (Pho88) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4554) Family Members of this family of proteins are involved in regulating inorganic phosphate transport, as well as telomere length regulation and maintenance [1][2][3][4]. 25.00 25.00 25.60 33.50 21.50 24.50 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.15 0.71 -5.08 27 200 2009-01-15 18:05:59 2007-07-31 08:46:54 4 5 167 0 148 182 2 178.70 39 92.04 CHANGED MsPtloNlhlhLlhMQlu++..lsh-D.PpllhhlRhhYlsspslhhslYhasph+IspKN.....DhTsLKYV-PusPhuutt.....cs+hssTTV+-YDlpplpph.h+uhhhGluMMuFMHlYhKYTNPLlhQSI.slKuAhEuNlV+IHlaGpPApG-..L+RPF...Ktssuhhuuhtt..upspoDKpol-sAEpsstu..GsKs- ......................hsPtlpNlhlhLshMQlu++..lsh-D.PsllhhlRshYlsopllhhslYhYlptpIspKp...........Dh.TsL...K.YVEPs.sh..uupp...............E.+hlsTT...Vp-YDhpplcph.l+u.hhGluM.MuhMHLYhKYTNPLllQSIhslKuAhEuNlVKIH.laGpPApGD..LcRPF...K..ts.suhhutht....t...stspoDKtul-tAEcshtuGhK............................................... 0 57 95 131 +9865 PF10033 ATG13 Autophagy-related protein 13 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4573) Family Members of this family of phosphoproteins are involved in cytoplasm to vacuole transport (Cvt), and more specifically in Cvt vesicle formation. They are probably involved in the switching machinery regulating the conversion between the Cvt pathway and autophagy. Finally, ATG13 is also required for glycogen storage [1][2][3]. 25.00 25.00 26.90 25.50 21.20 20.80 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.60 0.70 -4.94 38 337 2009-01-15 18:05:59 2007-07-31 08:57:08 4 5 247 0 227 334 3 203.60 28 31.14 CHANGED IppFahKuutlIlpSRl...............................hsspssst+sN+W...................FNlcl...p-psthp-pL+hW+s.tshtp...............hPPllIEsaLDhptLssspslhh.Dttsp.h.s.tu.......................sp..-llLERW.hl-hc.............sttttssstssphsplYKc....sllLFRSLashs+lLPAa+lp+ph..........ttt.......................slslthRlh.........sGp..h.stsp.sLopPl...hst.p...............phc...phpFsslpTshGpLplsVsYRssscF ..............................................................lp.FhhKssplIlpuRh.....................................................................t...s...spsss.s.psscW....................F..Nltl...c-h.st.h.pcph+thhs..tth.s............................hh.shslElhLcs......................................................................sctsphlLEpW.plchs..........................................pp..s.sp.phpsh.slY++...................hhlLhRSLhshsRlhPAa+lt+ct..............s.......................t.phslhaRlh...............up........s.hsp...................................shp....phphusltTshG.plslSlsYRhshtF......................................... 0 67 122 185 +9866 PF10034 Dpy19 DUF2211; Q-cell neuroblast polarisation KOGs, Finn RD, Sammut SJ anon KOGs (KOG4587) Family Dyp-19, formerly known as DUF2211, is a transmembrane domain family that is required to orient the neuroblast cells, QR and QL accurately on the anterior-posterior axis: QL and QR are born in the same anterior-posterior position, but polarise and migrate left-right asymmetrically, QL migrating towards the posterior and QR migrating towards the anterior. It is also required, with unc-40, to express mab-5 correctly in the Q cell descendants [1]. The Dpy-19 protein derives from the C. elegans DUMPY mutant, Swiss:P34413. 20.40 20.40 21.20 20.70 18.80 20.30 hmmbuild -o /dev/null HMM SEED 642 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.31 0.70 -6.35 12 344 2009-01-15 18:05:59 2007-07-31 08:58:36 4 6 92 0 200 352 2 453.40 30 90.58 CHANGED Alhsuhlah.altplaEN-paFSpluplEREhuFRTEhGLYYSYYKsll.cAPSFhcGlahlhpDshTEYPpsINsLpRFNlYPEVlLAhhYRsatthhphh..................................-PshFYltslFhLpGlhhshhahhuhhLS.GShLuGllsshhah..FN+s-sTRl.aT.PLREsFuaPFlhhQhhllThhL+p...ptspptphlhlhhoshh...FhLsWQFuQFllhTQlhuLFshashuhlsss...Khppllhhhh.......hShhlsalLhF....GNsMlLsShhhS.llulhslhtl.................ppp...hsthhh...clhhhllpshhhhshTlhLphhhpplL............shpDDtHlhshlcuKa..tsapsFcohlYsC.usEFshlptcs.h+lspThLlshhlhhhshhh.............hphhtsh...........................................................................hthlsppp.p.....................pcsp.t.csE................................................................................llYpslQhlsFshLAllIhRLKhhhTPahClhuuL.lCSppl............a.........lhppl+ht.............ssshhllshhhhphhsslppphshlsEass.sp.pELlpWIppsTp.sAVFAGuMsshAsVKLoTtRslVNHPHYEcsslRcRTchVYphYS++ssc-V+cpL.hphtssYhIl-tshCsp.Rs+s....GCsh.-lhDhcsscstsp.............s.hCpt..lttc..s+......P...aFspVFp.NppYpVhKl .....................................................................cpph.h.tc.uhYYsaac.hh....tu......sh.t..uh..lh.sp.o.........................................lNhlpph.lh.Elhhu....h.aphh.......t.h..............................................................pPh.FYl.hlahhtuhhh.hhahhu..........hhhS..so............h..............uGhlss..hhah..hN+h-sTRl.as.PLREsauhPahhhQhhhlThhl+........................t...h...........hh....h.h.hsshh...h.h.Wpaspahhhh.Qhhslahh..hsh......................ph..lhhh.h.......huhhhshhl.F.....N.hhL.o.h.s..h.hhhhhhh..........................................................................................p.............t..h.....ph.h......hh.h...hhhhhhslhlp.hhp.h.hh................htpp...+h..phlts+....h......htsFph.hh.hC.t.thth...th.....ph..po.lh.h.hhhhh..hhh.............h.hh..h...........................................................................................h.............................................st................................................................................................................hhaphhphhhhs.huh.h.th.Khhh..hshhChhuuh...ls.S.ph.................................................ht.ht............................h.hhhh..h.h..hph....sth.tph......htEa......s.sp..pLhpWIp.p..s.....t.s..............shuGs...hhusl+L.............s..................s.......hh.lsshPhYpptthht.Rs....h.Ythaupts.pplht.h..hthtspahllp..hC......t.............sCph....phhD.tp...s...t..tt..........................................hCt...h..t...........................F.hha..Nt.ahlhp.......................................... 1 55 69 126 +9867 PF10035 DUF2179 Uncharacterized protein conserved in bacteria (DUF2179) COGs, Finn RD, Sammut SJ anon COGs (COG4843) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.38 0.72 -4.36 189 5446 2009-09-10 22:22:21 2007-07-31 08:59:57 4 5 1792 2 766 3177 158 54.80 32 19.63 CHANGED RGlThl.pupGuYot....pc+plLhsVls+p-lhcL+pllpclDPpAFlsl.t-sp-VhG ...........RGlThl...pupGuYop......pc+pllhsVls.+.pEhscl+pllpplDPpAFlsl.t-spclhG............ 0 297 555 661 +9868 PF10036 RLL Putative carnitine deficiency-associated protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4380) Family This family of proteins conserved from nematodes to humans is of approximately 250 amino acids. It is purported to be carnitine deficiency-associated protein but this could not be confirmed. It carries a characteristic RLL sequence-motif. The function is unknown. 22.50 22.50 24.30 24.30 21.40 22.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.45 0.70 -4.95 15 168 2009-01-15 18:05:59 2007-07-31 09:07:27 4 4 123 0 110 159 3 220.70 42 95.44 CHANGED hh+.RtLssLpas.tssthshs-cc-FtsllhWLEDQKIRhYphEDRppLRplssuc...WscsapKYhpDlphPh.........clps+pptlsWLLshAl+L-YtDss...............ts.pstpch.pcpp+psp.pscpsp..sslshssscFptGlppLAstLsltt..............................Hs........DplVhLcuss+llpE+Lspcstscsslp...s.Ph..-p.tslGhss................tDssLccAApILRLLpIpsLRcLQocINEslVuVQslTAsP+TDs+LG.KVGR .......................................................................hc+KLpALsY....sshsh..p..DcpcFRshllWLEDQKIRtYpI....E-RssLRslpusc...WsphapK..YlpDlssPh............phpp+.p-tl-WLLuhAV+LEYsDss........................pphps....spptpss..sstspsspPl....sl.Dhs..s..s-F+sGVhsLAslLpIpp....................................Hs........Da..LlhLcA....lphl..lp-+Lstcu..lscssps.....ts.......h...shsh.c...c....t.lGFss............t.Dss.lspAApILRLLaIp-LR-LQTcINEsIVAVQslhA-PKTDp+LGKVGR.................................. 1 42 52 82 +9869 PF10037 MRP-S27 MRP_S27; Mitochondrial 28S ribosomal protein S27 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4570) Family Members of this family of small ribosomal proteins possess one of three conserved blocks of sequence found in proteins that stimulate the dissociation of guanine nucleotides from G-proteins, leaving open the possibility that MRP-S27 might be a functional partner of GTP-binding ribosomal proteins [1]. 25.90 25.90 25.90 26.10 25.80 25.60 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.47 0.70 -5.56 8 150 2012-10-11 20:01:01 2007-07-31 09:08:46 4 3 86 0 85 150 0 316.00 26 84.07 CHANGED MtsShlppuhhhu+hh........uu+RhLLSuAYssuchWEuRcpEPhpLA.s...LAphh-psa-RKLPV....SSLsluRFlDNIuSR--..lDpAEYYLYKFRHSPNCWaLRDWTlHoWIRQCLcasupD+uLYTLcN+VQYGIFPDsFTFNLLlDsaIKcG-aKsAsSVVpElMLQEuF-hPSTphLSLYu.Lhpa...........LAspPsLo.......hpEERslGASLLlsGhKQ-solGhSup.LhGhALLGKVEhppGl+AVa+sMPLhWusGYLs+ulplhEtlAos..sltLuc-sLclhpslLc.....sLou.sDussptp.pcs.-sttpp.....plsEc...-psEpuK.......LspYucpFpch+spLpsts+l-scul.s.s.thlpE+LsssEp.DlchYEp+lptWphEpc...............pLIQREccpREpA-pE.....atstpss+su ..................................................................................................................................................t....tt..h....s.hslsh...h........ls.hpsp-c..l-.sc.hla........+..............aRppspshhltshsh.p.shlR.hLchst.p-pAl.hLpsplpYGlFsDsaoaNlLhD.hlcctpa.csAhpVlh.clh.Q-sh.ps........sTphLulhs.ha+h......................t...........p..............p-.hp..htsthlhs.hhpcp.shhp.p.lhGhshh..hhsp..h..t...s.ttlhpt...s.hht.shhtch.pshphhtt.......t.t.ltp-...slphh....tthhp..........th.......t.....t.....t..p......p..tt.t.t............p..pp...pp...hct..h............l.p.hp.apt...hppphpthhp.p.pth.......h.pph.thpt...th.tpp...h..c......................h.h.....tcpp.......p...p..................t...................................................................................................................................................................... 0 20 25 52 +9870 PF10038 DUF2274 Protein of unknown function (DUF2274) COGs, Finn RD, Sammut SJ anon COGs (COG5639) Family Members of this family of hypothetical bacterial proteins have no known function. 23.80 23.80 24.00 25.20 21.60 23.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.00 0.72 -3.85 42 198 2009-01-15 18:05:59 2007-07-31 09:10:14 4 1 125 0 104 209 23 68.60 45 84.66 CHANGED Mo+LKLGsls.D-+PVKlTlELPAslHRDLsAYAclLuppsGps..scPs+LIsPMLpRFhAoDRuF..sKAR+ ..............sKL+LGsls.-ccPVKlTlplPAuL+....pDLstYAtlhup....p.hGps....s-sscLIs.MLcpFhAsDRuF..tKu+........ 0 15 61 84 +9871 PF10039 DUF2275 Predicted integral membrane protein (DUF2275) COGs, Finn RD, Sammut SJ anon COGs (COG5660) Family This domain, found in various hypothetical bacterial proteins and in the RNA polymerase sigma factor, has no known function. 24.60 24.60 25.20 26.40 22.60 24.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.27 0.70 -4.44 7 40 2009-01-15 18:05:59 2007-07-31 09:10:48 4 4 40 0 13 40 0 204.20 22 55.95 CHANGED .cpt..sDhhcphhu..tc.s..p.p..hh.thahphh+thultulhlhhhAsshhs.h..........hplsphslsplt.cshhslpsp..............hp.thhsls.tuhut...hpssut.uppssssslcplps.shhththsssh....sh.lttshsspppsht.h.ththhssthtshsts..............hhhsuchccptttlhhut..................shs.slpc+ts.cph.t..........st ............h.......DLscthhu..t..s..sspt.thh.thh..p+.hh+thAlhslslhhlAsss.h..h...........hplsshshshloscshtshpspa.............hpcsst.lshhshu....ltsGsh.utssuhsplsthhuhshhshhlsssh..............shpls.tshltsc.sh.hs..Lt.........hshlsthh+shutT.................hh..tphpph..hh...s............................................hs...................................................................................................................... 0 4 9 12 +9872 PF10040 DUF2276 Uncharacterized conserved protein (DUF2276) COGs, Finn RD, Sammut SJ anon COGs (COG5551) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.00 25.00 24.90 24.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.53 0.71 -3.90 63 439 2012-10-01 21:23:39 2007-07-31 09:11:45 4 2 340 0 176 405 13 121.00 21 43.08 CHANGED lpFhTPTthp..............cspthh..hPps.ttl.hpSlhc+asshss.hth..................stphht.hhspslplhshp.l....+shphph.........pttthsGhsGpssapht.......tthhpthttLlphupahGlGppsuhGhGpsc ......................................................................lpFhTPsth+p....................puphh.h..hPss..thl.h...poL..hp+a...sshss.hth....................s.phht..thspplpltshc..l....cs....hthth...........pttphsGhhGphsaphp.............htphhthLLthupasGlGtpsuhGhGth............ 0 82 133 161 +9873 PF10041 DUF2277 Uncharacterized conserved protein (DUF2277) COGs, Finn RD, Sammut SJ anon COGs (COG5552) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 64.60 61.40 22.10 21.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.58 0.72 -4.02 22 232 2009-01-15 18:05:59 2007-07-31 09:12:33 4 2 229 0 100 209 12 79.60 55 87.30 CHANGED MCRNI+sLhshEPP.....ATc-EIcAAALQaVRKlSGhscPSpANptAF-pAVt-IutsopcL.LsuL.sps......PP+sRtt.tA+ ..MCRNIppLRs.hp.PP.....ATs-EIcAAALQYVRKVSGhp+PSs.ANpEAF-pAVs-VsssTp+L.LsuLss+t......PPhcR.t.tA+............... 0 31 70 87 +9874 PF10042 DUF2278 Uncharacterized conserved protein (DUF2278) COGs, Finn RD, Sammut SJ anon COGs (COG5634) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 27.50 28.80 19.10 22.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.45 0.70 -4.60 27 254 2009-01-15 18:05:59 2007-07-31 09:13:42 4 4 223 0 76 227 3 209.10 41 82.83 CHANGED Msl...psYGVhKup.hphth-tttp..tsPHhplhlcssss...........................spaRsAlNlcSss......t.sucllYhhspph.p+Plhpp.LssLs.Gap.Lp...........................ssssthuLDYlRss.Lhsspsh+sls.hDhs...................GssNDlhchL-shlppuhtp.........................ssplYlFGph.............F......................psu.sG....lHslHMNQGss..p.......caps-NGlaQDGulllch.......sD..........pWsulFlAFtoQshhT.Ds...pGc ..............................MsL...psYGVLKGpslp.pht.....pst...toPHYpV+lpsp.ss...........................s-aRl.AINVcSps........h.sS-ll.Yhssp...sh...cp.shph.L..s..sLs.GaTclp...............................sspstsALDYlRus.....LaDspphhsLP..h-ts...................GscNDLs-hl-phlcpAhp.p.........................cuhlYsFGcp.............F.................................pPG...NG....IHDIHMNQGNs..p.......+apsDNGlWQDGulLlca......p-..............tp.W..hulFLAFpSQuasT.D-.pGp............. 0 20 34 53 +9875 PF10043 DUF2279 Predicted periplasmic lipoprotein (DUF2279) COGs, Finn RD, Sammut SJ anon COGs (COG5544) Family This domain, found in various hypothetical bacterial proteins, has no known function. 28.50 28.50 28.50 28.60 28.40 28.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.01 0.72 -3.71 20 603 2009-01-15 18:05:59 2007-07-31 09:14:20 4 1 591 0 71 238 32 86.00 69 70.70 CHANGED usDsWsup...........D+ApHF........hsSAsLuAsusp.........tp.h.huhhhosulGhhKELaDoppuGSGaSapDLAhDhAGushGhslhpts ............A.NDuWSGQ...........DKAQHF........lASAMLSAAGNEYupH..QGhScDRSA.hFGLMFSlSLGASKELWDS.RP..EGSGWSWKDhAWDVAGAoTGYslWQh.s.......... 0 14 34 52 +9876 PF10044 Ret_tiss Retinal tissue protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4402) Family Rtp is a family of proteins of approximately 112 amino acids in length which is conserved from nematodes to humans. The proposed tertiary structure is of almost entirely alpha helix interrupted only by loops located at proline residues. Three sites in the protein sequence reveal two types of possible post-translation modification. A serine residue, at position 41, is a candidate for protein kinase C phosphorylation. Glycine residues at position 69 and 91 are probable sites for acetylation by covalent amide linkage of myristate via N-myristoyl transferase. Rtp is differentially expressed in the trout retina between parr and smolt developmental stages (smoltification). It is likely to be a house-keeping protein [1]. 21.70 21.70 21.90 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.21 0.72 -3.45 7 126 2009-01-15 18:05:59 2007-07-31 09:14:58 4 3 87 0 79 121 0 89.70 44 69.00 CHANGED LlShEpLDRuSP-lWPEphPGhsEFho.sts.h...pssPpahssLsc-DhshlpcLGsLosspLhtKlKpLps.AYQLGLcEA+EMTRGKaLsIhs ......................................LlShEpL..RsSP-lWPEp....h......PGls.-Fss.p.p.sshp..sssscahsc.l.c.....p.-Dl..c...hlpELusLTsssLh-Kl+sLpshAYQLGL-E............u..+EMTRGKaLsIhp....... 0 24 31 56 +9877 PF10045 DUF2280 Uncharacterized conserved protein (DUF2280) COGs, Finn RD, Sammut SJ anon COGs (COG5556) Family Members of this family of hypothetical bacterial proteins have no known function. 24.00 24.00 27.40 24.50 23.60 21.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.90 0.72 -4.11 15 135 2009-01-15 18:05:59 2007-07-31 09:15:31 4 3 102 0 25 104 1 99.90 48 65.70 CHANGED MAsLss-VKuFIVQALACFDTPSpVscAVKcEFGlcloRQQsEoaDPTKtAG+sLuc+WtsLFccTRc+Fhp-sucIPIAN+AaRLRsLsRhupKAEph+NhuL ..........MAsLpppVKhFIVQuLACFDTPopVAcAV+pEFGlcloRQQVEsaDPTKsuG+sL.u+Kah-LFppTRccFppc.ltsIPIAN+AYRL+tLpRhhpcscp.+sh..h........................... 0 2 10 20 +9878 PF10046 BLOC1_2 Biogenesis of lysosome-related organelles complex-1 subunit 2 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4559) Family Members of this family of proteins play a role in cellular proliferation, as well as in the biogenesis of specialized organelles of the endosomal-lysosomal system. 26.80 26.80 26.80 27.20 26.70 26.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.22 0.72 -3.86 17 169 2009-01-15 18:05:59 2007-07-31 09:16:01 4 4 142 0 123 176 6 95.60 35 58.97 CHANGED hpchhsshsphlpuphptospchpLLEphNpssst+YtchpphspsLpsphcpLptp.pphpsalppIcpI-pplspLEpssptLDtasppLEsKlpsl ...........pchFschupalpuELsuosp-Yc..LLEpMNchsuh+Yh-hcslutslspslpcLsp.Ka.tpL.pPalpQIstI-cpVspLEpsshcLDsYoppLEs.KhKp................. 0 40 60 98 +9879 PF10047 DUF2281 Protein of unknown function (DUF2281) COGs, Finn RD, Sammut SJ anon COGs (COG5559) Family Members of this family of hypothetical bacterial proteins have no known function. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.14 0.72 -3.68 19 250 2009-09-11 14:35:08 2007-07-31 09:17:16 4 5 143 0 89 275 17 56.90 21 71.76 CHANGED sppplhpplppLPpclhpElLDas-FLhpKt..............h.pptsppthhhshtGt..h..pp.pshs.sslElQ+chh .....................h..ttlhpplppL.PcphtpEVlDFl-FLhpKt.................t.t.................................................................................................. 0 26 63 83 +9880 PF10048 DUF2282 Predicted integral membrane protein (DUF2282) COGs, Finn RD, Sammut SJ anon COGs (COG5572) Family Members of this family of hypothetical bacterial proteins and putative signal peptide proteins have no known function. 22.40 22.40 24.00 23.30 22.20 19.40 hmmbuild --amino -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -9.04 0.72 -4.06 74 316 2009-01-15 18:05:59 2007-07-31 09:18:13 4 5 262 0 115 309 38 55.50 49 57.74 CHANGED pEKCY......GV.uhAGpNDCuA.GsGToCAGTSpsDhQGsAWphVstGTC..........ppl............ttGoLp ..........hEKCY......GV.AhAGpNDCAs.....usuooCAGoSphDhQusAWphVPtGTCspl...............stGoh.t...................... 1 22 60 95 +9881 PF10049 DUF2283 Protein of unknown function (DUF2283) COGs, Finn RD, Sammut SJ anon COGs (COG5428) Family Members of this family of hypothetical bacterial proteins have no known function. 20.80 20.80 20.80 21.40 20.60 20.40 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.33 0.72 -4.11 63 354 2009-01-15 18:05:59 2007-07-31 09:19:17 4 2 242 0 146 369 32 50.30 28 59.25 CHANGED h+lpYDt-sDsLYlpl..spssht.......-ot-lss........slllDhDppGcllGIElLpAo ..............plpYD.-sDsLYlpl......psspht...........-op-lss...............slllDh......D.ppGcll.GIElhssp............... 0 47 102 126 +9882 PF10050 DUF2284 Predicted metal-binding protein (DUF2284) COGs, Finn RD, Sammut SJ anon COGs (COG5423) Family Members of this family of metal-binding hypothetical bacterial proteins have no known function. 25.00 25.00 28.30 27.60 22.00 21.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.38 0.71 -4.65 37 229 2009-01-15 18:05:59 2007-07-31 09:19:58 4 5 162 0 103 212 12 159.70 24 78.21 CHANGED chthlpstclsl.pppsth+Cp..sCssYGpphsCPPtssshcEh+chlpcYcpAlLhphphsspp.tp.............................hhthppplpphhhclE+phhttGa.hAhshhsGsCp..hCp..p.......Cshpp..tt...........C+hPchsRsShEAhGl-lhphscps.sh.hphht.............pphshhuhlLl ........................h.stclhhptchhth.Cc...tCssYGpshuCP..Ph.ss.....ssc-htthlppYcpuhlhphp..h.hpst.............................t.hh.....ptp.hpphhhchEcp.h..h......t..p.u.a...shsh.hsG.sCp..hCp....c.......Cshtp..sts............CRaPccsRsSlEAhGlDltphscph.shph.hst...........phhshhuhlh.................................... 0 56 88 95 +9883 PF10051 DUF2286 Uncharacterized protein conserved in archaea (DUF2286) COGs, Finn RD, Sammut SJ anon COGs (COG5399) Family Members of this family of hypothetical archaeal proteins have no known function. 24.00 24.00 24.30 107.50 23.40 18.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.63 0.71 -4.54 12 37 2009-01-15 18:05:59 2007-07-31 09:20:22 4 1 37 0 21 27 0 143.70 41 98.34 CHANGED M..slVs+hccspVspc-VlcG-ls-lV+clAtchLc.EWsPpsSDFlllRDhhplphPhPL.p.ElhEcl..+paphp+scscs.lplPla.IsasspWh.tpsapsccshVVhPYlD-ptpcElhchshpshut.t............................cEEt.pc.E ...M.+llllKuEsGcVssccls-.G-lscVl+clAp-.ALc.EWN.hsSDFIIhRDs.ElclPLPL.pP-lYEtl..+pF....hht+scscA.hsclPVYhISa-NpWt.-ssapDc+laVVu.YIsD-hpcpllssAsphToppc............................pE..tc...-.p......................... 0 8 11 16 +9884 PF10052 DUF2288 Protein of unknown function (DUF2288) COGs, Finn RD, Sammut SJ anon COGs (COG5626) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 58.60 58.30 22.40 17.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.07 0.72 -4.27 36 217 2009-01-15 18:05:59 2007-07-31 09:20:50 4 1 217 \N 72 199 26 92.90 42 87.76 CHANGED ssL+scLtt-TuplsWp-LpsaaARGsllhVsssLDLl-VAhulApDspspVppWlssGplu+sosppApcW.hpcs.phWAVVVuPWVLVQ-c .....o.sLhs+LhuETApIsWs-Lp.FFA+GsLlhVstsLDLlpVAcAlApDDsppVspWLusGtlu+sssppAt-a.hscssp..LWAVVVuPWVLVQp.... 0 18 41 59 +9885 PF10053 DUF2290 Uncharacterized conserved protein (DUF2290) COGs, Finn RD, Sammut SJ anon COGs (COG5619) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.10 34.90 24.30 23.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.49 0.71 -4.99 8 37 2009-01-15 18:05:59 2007-07-31 09:21:39 4 1 36 0 11 36 0 185.10 31 87.28 CHANGED Lss-.Nhsshpp.s......ssscloasGtEcsulhht-hsYucl..YpulhcppuYshhLlDGuLlQhpYchcpspLlpHRLuYYPsPtLhs.............hps-s-lhhpDhLhh-hspctllshP....lRFDFDs..tttp...cssHPsSHlTlGsssuCRIPVsusLTPcpFlcFVlRNFYpoha+chlut.....hsstph-Fc....soIsspEpslhHls..hs .............................................N.s..pp.s......th..lshss.tp.....pshpYpcl..YptlhcppsYshhL.hDGullQh.Ychp..p..spllpHRLuaaPuP.hlps.............aps-s-hYhpDhlhh-.....Ihpcpl....ls..hP....lRFDaDs..stht.......chtHPpSHLTlGphpsCRIPVsuPlTPphFlpFlL+pFYpothcsh.st.....hsphp...pFp.....olh..EtthhHhs..s......................... 0 4 7 9 +9886 PF10054 DUF2291 Predicted periplasmic lipoprotein (DUF2291) COGs, Finn RD, Sammut SJ anon COGs (COG5618) Family Members of this family of hypothetical bacterial proteins have no known function. 20.10 20.10 32.50 32.50 18.90 18.20 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.21 0.70 -5.04 21 215 2009-01-15 18:05:59 2007-07-31 09:27:20 4 2 195 4 58 151 13 196.30 36 91.75 CHANGED sllhsAsslllLsthupcIl...h..up-ssutsus.tFsPcphusphW.sclhs.sppcAlshs-ltstlssstsuAsppYG...ht.ss..shhV+hoGsVsssc.....suthslclcGs.tpsslplQhGPslpGoulRDAsuhlpFs-F+NQI-asphupAlNspspspVlt.h.....scsslsG+plsVhGsasL..sssph...hlsPlplpl .....................................s.h.shssllllshsGtclh......p.s.sDss.s......hs..s....s..hsPs.shusshW.scVhs...plppcAlshsEltstl..su..st..s..uAscpaG....st.sss.shsV+hoG.sVsphc.....uuhhsl.c.l-.G....t.hslplQhGPslpGTsLRD.AsshIpFs-F+NQlpasphupAlNpchtppVh..h.....ssp.shsGcsVsVlGsFsl..sssp.......lTPlpLp... 1 10 24 39 +9887 PF10055 DUF2292 Uncharacterized small protein (DUF2292) COGs, Finn RD, Sammut SJ anon COGs (COG5583) Family Members of this family of hypothetical bacterial proteins have no known function. 20.00 20.00 20.10 20.30 19.70 19.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.40 0.72 -7.45 0.72 -4.62 23 311 2009-01-15 18:05:59 2007-07-31 09:28:01 4 2 292 0 79 190 5 37.40 43 67.35 CHANGED chlccIpchLcsl+aGSlTlslQDGpVlQI-+sEKlRL ..................l.ccIpshLpsh+....aGolpIsVpDGpVlQlE+sEKhRL........ 0 27 55 67 +9888 PF10056 DUF2293 Uncharacterized conserved protein (DUF2293) COGs, Finn RD, Sammut SJ anon COGs (COG5586) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 26.40 25.40 24.90 23.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.98 0.72 -4.01 25 287 2009-01-15 18:05:59 2007-07-31 09:29:46 4 1 242 0 124 205 3 84.60 39 24.91 CHANGED pAl+sLaPthPtpc......tcsIlpHAh.cu......utRVG+.sushs.tt.+VpLAVhAHlRHpaTcYD..pLLcsGhsRcpARptVhctspslLpcWt .........................t.ulppLhPthPtsc......tpsIhp+Ah.cu........................pt+lut..sushs...t..tVpLAVsAHlRHhaTcYD..pLL..c.s.G.hs+csARthVhptlpshLscWR............. 0 26 68 98 +9889 PF10057 DUF2294 Uncharacterized conserved protein (DUF2294) COGs, Finn RD, Sammut SJ anon COGs (COG5609) Family Members of this family of hypothetical bacterial proteins have no known function. 21.40 21.40 21.60 22.00 21.00 21.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.25 0.71 -10.06 0.71 -4.44 23 741 2009-09-11 07:39:01 2007-07-31 09:30:33 4 4 437 0 101 280 9 110.00 38 92.03 CHANGED Mppo.+stlEpEISculppacK-hlGRGstplKThIlcsMlllpLcGlLTssEptlspo...tcGhthlKpsRspLh-st..ppclpcllpclhGpcVhuhaoDloscTGEplhVFhL-.......pslEK ..............................hEtchschlpphcK-hhG+Gs.p.l.+osh.hcsMs.IssLpGlLTPsEhhlspT....p-Gh.hl+tsRo-.hlcps..ppphp.chlpclsGtKl..hshaoDlsspssEtl.lFhh-....cslE......................................... 0 46 70 88 +9890 PF10058 DUF2296 Predicted integral membrane metal-binding protein (DUF2296) COGs, Finn RD, Sammut SJ anon COGs (COG5415) Family This domain, found in various hypothetical bacterial and eukaryotic metal-binding proteins, has no known function. 21.70 21.70 21.70 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.81 0.72 -4.22 30 334 2009-01-15 18:05:59 2007-07-31 09:31:15 4 6 254 0 214 322 1 52.20 42 14.26 CHANGED ahDRllDhLlGsss.......psRhALICppCptHNGhu...pcchp.l.papCspCsshN ....................hDRll-hLlG-ss.......psRaALI.CppCttHNGhAh...pc-hphl.ta+CshCthhN........... 0 60 110 171 +9892 PF10060 DUF2298 Uncharacterized membrane protein (DUF2298) COGs, Finn RD, Sammut SJ anon COGs (COG5427) Family This domain, found in various hypothetical bacterial proteins, has no known function. 24.70 24.70 25.00 24.70 23.00 24.60 hmmbuild -o /dev/null HMM SEED 473 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.96 0.70 -5.65 18 117 2009-01-15 18:05:59 2007-07-31 09:33:29 4 9 68 0 64 119 65 412.70 21 56.82 CHANGED lllsassahluhl...hsa.......sllls...slhlhshuhhh...hh+pc........h........t.hlts-hlFhsuFhFhlhlRsapPtl.h.....Gs..EKFMDauahsulhRusshPPpDsWFAGtpls.YYYhGaLlsulhuhLoulssphuaNLAlAshhAhsssuhaGlutslspth..............h.hhhhsshhshhh........ushhshh..h.................hthhushhhttssshsa................................................aWsuSRsl.ss.....................IsEFPhFoalhGDlHuHhlul...sFhlLsluhshshatptsppp..............................thhhlh...hulsLGhlhssNo.WDaPlah...sLshhslhhh.tt.h.h...................ph..hhs.............t.ssllslluhllhlPFhls..hpstuhtt.....hshl..............ppohh.phlhla.GhFhhhhh..uaLhsplppt............hhhhh..h.hh.......hhshslhulhhPllshuhhthhppp.................sssFhsLLlhsuhsllllsEllYlp-s.hss...sRMNTVF..KFYhplWllh ..............................................hhshhhahhs.h.....................hhhs....hh.hlh.shsh.hh.....hhptt...............................h.......ht.hh.h-..hlF.h..hhFhhhlhlRh.....hs...Psh.h............usE+.hDhuah....pulh+ushhPP.D.WauGt.ls.YYY.hGahhhuhhshlouhssthuaNLslshhhuhhhhssaulsht..lhtth................................................hhshh.h.shhh.........sh..hh..h........................................h.....h.h..h.....h.........................................hW....soRsl...s...................................IsEFPhFoalhuDhHsHhhsh......shhlh.hhshhh.th..h.hp.tttt..............................h.hhhh...hulhhGh.h.hhhNs.Wshshhh...hlh..h.h.slh.h.h.h.p..h.............................................................shhhhhhuhl.lhhPFhht..h.s.s..tt...............l.thh......................pto.h.phlhla.Ghhlhhhh.......hh.hhtthht..........................................................................hh......h..........h.h.h.hh.hhh....hhhhh......hhhhh..................................................t.hh.hl..hhhhuhhlhhhsEhhal.p-...................RhNTlFKhhhpsWhlh........................... 0 23 51 54 +9893 PF10061 DUF2299 Uncharacterized conserved protein (DUF2299) COGs, Finn RD, Sammut SJ anon COGs (COG5440) Family Members of this family of hypothetical bacterial proteins have no known function. 22.10 22.10 22.20 22.40 19.30 21.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.67 0.71 -4.73 13 64 2009-01-15 18:05:59 2007-07-31 09:35:12 4 1 45 4 34 53 58 135.50 29 87.04 CHANGED cItsWhpELGhhlp+...sssusthFHlssoPPt.sssslsllRssscosaYlluhulslcspH.phl.thphccRpchltplph-Ll+h.sV-FhhhPPsp-.PpsIQlu+.latDG..LTKNchlssltpV+NuulhVl.hlp ..............................ltsWlpEhGhhspc...sssupthFHhssssPt..uGsslsVl+PpscschYlluhultlc.pH.ptl.thphc-Rt+...hltplph-LLch.sV-F.hhh....P..s.sp-.PpsIpVu+.la.tDG..LT...pNchlsslhpV+Nuslhll.hh............................ 0 8 19 29 +9894 PF10062 DUF2300 Predicted secreted protein (DUF2300) COGs, Finn RD, Sammut SJ anon COGs (COG5445) Family This domain, found in various bacterial hypothetical and putative signal peptide proteins, has no known function. 25.00 25.00 26.00 35.30 24.30 22.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.88 0.71 -4.43 13 657 2009-01-15 18:05:59 2007-07-31 09:35:38 4 4 320 0 34 314 12 129.20 50 47.26 CHANGED +uspssp......hhsspAstuhthDthLsp.....oluhahpPhthshsshshsphW.t+psPtWhtcLsshssappssshslsclhpshPhpDptRctlh........stslholtsRl.........slspphhahuFuuaPs ........GspspE.......hssQAGpu.phDphLsh.....ShuhahAPpp.phth.shuphW...t+....p.hPtWht.Lps.ps.s.pssVhulhc..lushPhps+..tQchLh...................Itsh.hLtsRl.........D.ppEhhphuFsGa.................................... 0 5 13 24 +9895 PF10063 DUF2301 Uncharacterized integral membrane protein (DUF2301) COGs, Finn RD, Sammut SJ anon COGs (COG5413) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 31.00 29.50 21.80 21.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.73 0.71 -3.96 38 200 2009-01-15 18:05:59 2007-07-31 09:36:44 4 1 188 0 59 175 0 123.50 38 62.52 CHANGED luLphIHIYl+sl+phLQhhWhlGsluh....hhh.h....t.h...lhspPh....hLulGshFsuLsGluFKEaFCFp+hEshhlshLlPlhlLupLhGlhshththuLlu..ulLhlllAlRKathPltsDIGDKSlFpa ................hshsslHIYl+sh+hllQhhhhlGllst........................lhs.hPh....LulGshFhsLsGlsFKEhFCFphhthph.shllshLhLualhshh.htthhullu.....ulLhllhAlpKaphPl+hDIGDKohaph......... 1 9 32 51 +9897 PF10065 DUF2303 Uncharacterized conserved protein (DUF2303) COGs, Finn RD, Sammut SJ anon COGs (COG5532) Family Members of this family of hypothetical bacterial proteins have no known function. 21.90 21.90 22.10 22.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.51 0.70 -5.53 14 390 2009-01-15 18:05:59 2007-07-31 09:38:46 4 2 323 0 29 229 15 251.50 51 98.85 CHANGED M......s..coAlptlttshhsusphpp...uh..shl..hlP-...uh+lpsLEpahs......p.sRh+GshpspshsuFlpYspcptp..-ssphF..Ist-....shsssulhshtsts......s...........sGas-apAshshptTstapshhshsGcthsQtchu-alED.tsslls........hsssu.......................uchlplspshpAs+phshcuusptpsspspFshpcss.psscupp...hphPstFthphsPapshsphplphRlphhh..tsup.slhhcllp.-tl.EphspEhtshlpcthpst........lhlGohp .....................................................p.D..usAlppIp.shloAtsssp.lsthts.ssl..sLP-....sacl..c..sL..E..+Fth......sRFRFRGshoTsSI-DFs+YS...KchAs........Eu.......oR...CF..IDAD..........sMcAsoVlNlGT.l.-.p.................PG...HADNsAoLcLK..+TAPFpALL.s.lNGc+.sQKsLAEWlEDWADaLlu...............aDus...................................G-slphocAsuAVRKITI-AspouDaE-sDFSGKRSlMESVEAKoKD..lMPsuFcF+ClPFEGLc.ER.s..FcLRLSllT....GDcPVLVLRIIQLEAVp.E-hAsEFRDLLlEKFc-scl.....ETFIGTFo............................ 0 7 14 21 +9898 PF10066 DUF2304 Uncharacterized conserved protein (DUF2304) COGs, Finn RD, Sammut SJ anon COGs (COG2456) Family Members of this family of hypothetical archaeal proteins have no known function. 23.00 23.00 23.00 25.20 22.60 22.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.43 0.71 -4.23 15 588 2009-09-10 17:25:32 2007-07-31 09:39:02 4 3 561 0 136 408 84 113.90 23 92.15 CHANGED Mhh.lQlIuIllhlhsllhlhhph+csplshssslhWlhhalslllhslFP.plutpIAcllGlGRGlDsLhhluIhlhahLla+LYl+l-+LER-ITcLV+EIAIc..-t.p.hp+cc ................hphl.h.l.lhu.l.h...h.hhh.llpt.l+.+.sphphchulhWllhu...lshllhulaP.plhshlAphlGltps..sshlhhhslhhlhhh.hhphohplSchc..pclppLsQcl.Alh....pt.........pt............... 0 45 79 111 +9899 PF10067 DUF2306 Predicted membrane protein (DUF2306) COGs, Finn RD, Sammut SJ anon COGs (COG5395) Family Members of this family of hypothetical bacterial proteins have no known function. 29.10 29.10 29.20 29.70 28.90 29.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.51 0.72 -3.90 47 499 2012-10-01 23:59:14 2007-07-31 09:40:11 4 1 398 0 212 456 145 104.70 23 44.00 CHANGED hptKssp....hHRhlG+lalshhl.lsAlouhaltth...thhu.....asshalLuhhhlhshhhulhss++tp.......lttH+thMltsYhhulsssshhsh.l..........ssRhhhsllhs ............Rp+thphHRhhG+lal.shhl..lsulou.h..h.lsh.h..s...hs.h.hs.....hhsh.tlL.u...lhhlhssh.hAlhsh+..ptp...............ltpHRpWMlRsahhshsss..shphh..h....................h.hhh............................................ 0 79 141 186 +9901 PF10069 DICT DUF2308; Sensory domain found in DIguanylate Cyclases & Two-component systems Anantharaman V anon Manual Domain DICT is a sensory domain found associated with GGDEF, EAL, HD-GYP STAS, and two component systems [1]. It assumes an alpha+beta fold with a 4-stranded beta-sheet and might have a role in light response [1]. 25.00 25.00 36.90 36.30 23.80 23.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.48 0.71 -4.61 93 183 2009-01-15 18:05:59 2007-07-31 09:43:40 4 11 104 0 87 191 1 125.80 24 28.34 CHANGED hs.spushppLhpt.h.t...s..p.thahKsoLhALs+ulEDpllp......sp.pshlhuuFQptchappEs...cR.YpclAppup.plalhus................................................s-ssh......sssphhpshh....lsssDsLspEWaLllluss.....asuhllspEh ....................................t..........p.thh.KspLhulS+slEDpslp......ss.ps..llhusFQctphappcs...cRYpclAppss.plalhus................................................s-ssh......sspthpslt.....hsssDs.LspEWallllusp......asuhLlspE.............. 0 18 66 86 +9902 PF10070 DUF2309 Uncharacterized protein conserved in bacteria (DUF2309) COGs, Finn RD, Sammut SJ anon COGs (COG3002) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.30 26.20 23.50 23.20 hmmbuild -o /dev/null HMM SEED 788 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.47 0.70 -13.30 0.70 -6.43 67 634 2009-01-15 18:05:59 2007-07-31 09:44:26 4 4 569 0 133 484 108 793.90 40 94.76 CHANGED hlcpAschIsPhWPLcsalAsNPahGhpcpsFtpsusphtpltGschhhspshappthtpGcIspssLptsltt........................................................................................ttshshshsphhtthtp.................................................tt..ttl........slu-hhsshp......u....hshsshlscplspasuuaaDpGQuhWp...Pt+cpGhYsAW+phsppDhshtht...t.tthtphltphPpssppulspslppL..slspsthpsYhcphLhsLsGWAuhhpapphps...th.sppp.tslh-hLAlRLsh-hsLh............................................................hht..htthht...............................thtspttpshhs.............ppshthstlhpcAhEtuapcpLhpsl..........................t.....ssssssst..............+P..th.QusFCIDVRSEshRRtLEshsPt....hcThGhAGFFGlPltapshussstcsphPsLLsPphtl...........t.s...ttpthpppp...chptphpptapphKpsslouFshVEssG.hauhcLltcoLts.............................ptssshh.php....t..................................................................................uhohp-plshAtshL+uMuLT...psFA.LVllsGHGSposNNPasuuL-CGACGGpsGtsNARllAtlLNcspVRpuLtpc.GIsIP-DThFlAAlHsTTTDclphastct.lsts...tpsshcplcshLspAuphsptERutpLs...tssp...........p.hpplpcRupDWuEsRPEWGLAsNAuFIluPRphTpuhsLcGRuFLHsYDWpp.Dp..cuslLEhIloAPhlVupWINLQYYuSTVsschaGSGsKlhHNVs.GslGVhpGNuGDLRsGLPhQSVcc.spphhHcPlRLhVlI-APt-tlpcllpcp.............ssl+pLh-NtWL ...............................................l.lppAp+lIsPl.PlstFsApNPa.GhEspsFcpsAphL+cltsscla.stuhhcpthtpGcIspsslpptlsphh................................................................................................................................hph.sptthphhhpshtp.............................................................................tshtt.sh..............shu-hl.spp.u.........pshs-.lscphh+WsthYhDpttusWs..hP..pR..-..p.uhYpAWhclspa...Dhuhsht..........+phlpsLPp.c..sp.slpps...Lspl..uIs.p.pphpsYlcupLLuLsGWAGhhhaRupQt.............pp-.thLpDaLAIRLshE.hLl..............................................................................tstht..hhtps.p...............................................................................................h......phssschpshhp............hhtphspthhcpLWlpAaEhsapppLhppl........................................tss...p.p..s.psss......ps...psQhAFCIDVRSEsFRRHLEshuP.....hETlGhAGFFGLPIphpslspphp+spLPVhl.sPsapl........................cphssc...phphh.pppp....pshpshhh.s.a.+..th..KpsshsohhLsEhoGsh.hulphlspolhP.............................cpsts..lp.phppphhpcs..p.sphplp...........p......................................................................................slGhT.pEplshAtpsL+hMuLT......csFAPlVVLsGHuSpSpNNPatAuL-CGACGGtoGuhNA+lLAhlsNcssVRpuLt.pp.GlhIP-cTlFhAA.HpToTDplphh.l.sc...Lsst....ttcuh-pLpshhstsuppAspERhspLs.htptt............pssp-spRhApDWSElRPEWGLApNAuFIIGpRpl.....TcshcL-GRsFLHsYDWcp.Dp-G..plLssIlouPslVApWINLQYYASTVsP+hYGSGNKsspsVs.uulGVhpGNuuDLhhGLsW.QSVhs.spphhHpPlRLhVVIpAPpthlpcllspp.............tphpchVpNpWl..................................... 0 43 87 115 +9903 PF10071 DUF2310 Zn-ribbon-containing, possibly nucleic-acid-binding protein (DUF2310) COGs, Finn RD, Sammut SJ anon COGs (COG5595) Family Members of this family of proteobacterial zinc ribbon proteins are thought to bind to nucleic acids, however their exact function has not as yet been defined. 25.00 25.00 31.80 31.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.83 0.70 -5.42 23 268 2009-01-15 18:05:59 2007-07-31 09:45:21 4 2 257 0 54 193 9 249.40 52 99.43 CHANGED MalsElpFc..sapssslsssp+sINpll-thRYNGQILGREaPlshspsp....FtsRllCPEpsSLtscaNSspVppALppLscsGllhsphcllGcDlNS-tos....cpppPoWQlLYTTalcoCSP.....L+sGDsltPIPLY+hs.sphsu..pcsllKWQp-WQACDpLQMsGus..hEphALcEIu-ssScLh++GhcLspcIEthTpIPTYYYLYR.VGGpSLtsEppRpCPpCGu.-WpLspPLaDlFcFKCDcCRLVSNLS..W-ap ............MYllELpF-..sassTolsus-+AINsLhDshRYNGQllGREFPlshu-s......FhVRllCPEp-SLaPcapSthVptslpcLs-suLltPph+llGpDlNSEpsA....Ep.-pPSWQlLYTTYlcoC..SP.....L+SG-sLhPIPLY+ps..sshsuD.acslIKWQp-WQACDpLQMsGus..sEpsALcEIs-scSsLF++GaDLptRIEhlTplPTYYYlYR.VGG....pSLspEpsR.CPpCGu.-WhLc-PL.....aDlFaF+C-sCRlVSNlSW-a....... 0 10 20 38 +9905 PF10073 DUF2312 Uncharacterized protein conserved in bacteria (DUF2312) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG3750) Family Members of this family of hypothetical bacterial proteins have no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.70 21.70 21.90 22.40 21.50 21.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.28 0.72 -4.62 51 455 2009-01-15 18:05:59 2007-07-31 09:47:06 4 3 340 0 158 352 153 73.20 50 78.91 CHANGED lAu-pLRpaIERIERLEEEKKslu--IK-VYAEAKupGaDsKllRpllpL....RKpDps-+pEpEslL-lYhpALGM ...............As-pLRsaIERIERLEEEKKs........lu-DIK-VauEAKupGFDsKllRpIl+L....RKp-...ps...-htEpEul..L-lYhpALGM.................. 0 44 91 109 +9906 PF10074 DUF2285 Uncharacterized conserved protein (DUF2285) COGs, Finn RD, Sammut SJ anon COGs (COG5419) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.00 23.00 23.30 23.30 22.80 22.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.46 0.72 -3.70 33 292 2009-01-15 18:05:59 2007-07-31 09:47:52 4 2 153 0 155 286 28 100.70 27 57.06 CHANGED lhlPLcsshts....................RhcAhtRhhctLp.u+sssPssph..hsstp+tRhhphLpAlDuchsGAShR-IAtsLFGs..splstc..Wps..uulRsplpRLV+cGpuLhcGGYR+LLp .................................................................................................t......ss.stt.....hss.pct+hlthL+sLDu....ptsGA.oaR-lA.p.slaGt..csspts...ats..sthRsph..cph..lc+upthhcuGYRpLLp......... 0 23 94 128 +9907 PF10075 PCI_Csn8 COP9 signalosome, subunit CSN8 KOGs, Finn RD, Coggill PC anon KOGs (KOG4414) Family This PCI_Csn8 domain is conserved from plants to humans. It is a signature protein motif found in components of CSN (COP9 signalosome). It functions as a structural scaffold for subunit-subunit interactions within the complex and is a key regulator of photomorphogenic development [1]. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.52 0.71 -4.42 48 621 2012-10-04 14:01:12 2007-07-31 09:48:56 4 7 256 1 376 1244 4 138.70 20 59.51 CHANGED hhptphhu.hLlthLspsch.ssFchthchlPsshpps...................stplpplhpLtphLhpssYspha....phlcps....sp..phpshlssh..t-plRcclsphltpuY.....psIshpthuphLshs..sc.......phppasppp.GWpl..-........ssshhl.h.sc.ptpsps ............................................................hh....phhu.hLlhhL.h...p....h.ssFphh..hphl...s....t.....p..h..h.pt.......................................................................p..l...pplh.tL.tphL.ps.pas...pha....................p.h...css....................p.............sht..hl..s.sh.....p-sl...RcclsphlspuY............p.p.I.shsthsphLshs....sp................ph.ppa....s....pc...p...GWph..s...........ss.s.h.h..h......t.p.................................................. 1 132 192 290 +9908 PF10076 DUF2313 Uncharacterized protein conserved in bacteria (DUF2313) COGs, Finn RD, Sammut SJ anon COGs (COG3778) Family Members of this family of proteins comprise various hypothetical and putative bacteriophage tail proteins. 27.20 27.20 27.60 27.60 27.00 26.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.80 0.71 -4.88 60 660 2009-01-15 18:05:59 2007-07-31 09:49:48 4 5 514 0 114 541 6 159.10 22 80.70 CHANGED ptLhphLPssht.sc.....hptlhpu.usplsplptpupsllpphhPpous..hLscWEchhGlss....ssst.olppRRptlhsKhptpGuhohthhtplApsh.Gh...slpIpphp....................hpahapVphs..........shsssscslps......lcshlcch+PAH....hsYs .....................t..LhthLPsh.t.s.t.............hpth.hpuhu.phsplpppspplhpph.spTss....hlscWEchhGLss......sssp.olppRpptlhuKh....p..t.tGsh........ohtahhphhpsh.Gh...shplpphp........................................hpahhplphs............................shssss.sslpt..........lcshlpchpPuH.hhhh.Y................................................................. 0 38 77 97 +9909 PF10077 DUF2314 Uncharacterized protein conserved in bacteria (DUF2314) COGs, Finn RD, Sammut SJ anon COGs (COG3779) Family This domain is found in various bacterial hypothetical proteins, as well as putative ankyrin repeat proteins. The exact function of the domains comprising this family has not, as yet, been determined. 21.10 21.10 21.50 21.10 20.60 20.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.79 0.71 -4.10 10 385 2009-09-11 08:32:28 2007-07-31 09:50:18 4 9 337 0 61 293 6 114.60 27 42.17 CHANGED DscMttAhs+ARpTF+aFWRELuW-pRRIlsuLDhAh.VKVsF.pD.t.....susssEaMWlsclsF..DGctlhGsLsN-PcploNlcp.GDplshsh--IuDWhalssG+saGuFTlsAMRupM.SccERscHDpAWGlDF ........................................................t......hh.pAppphthF..plh..pp.....h..hhh...VKlsa....-....................ptspsEHhWlp...hph..s.st.hhhGlLs.N-P...hhlpslc..Gpphplsh-.cIoDWh.hh.t.sst............................................................................................... 1 19 36 43 +9910 PF10078 DUF2316 Uncharacterized protein conserved in bacteria (DUF2316) COGs, Finn RD, Sammut SJ anon COGs (COG4367) Domain Members of this family of hypothetical bacterial proteins have no known function. 26.00 26.00 35.70 32.70 25.70 25.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.75 0.72 -3.99 7 273 2012-10-04 14:01:12 2007-07-31 09:50:44 4 2 268 0 23 100 1 91.20 54 95.69 CHANGED MSLshtph+sT+pELptNFpLssLolpplAp-Lpho.sclEtlLpLc...........pplpDsWhLRsYL.ctlpppGppPhPaotLsG-....tpaWFLs ....MSLNKEQRRITsEELQAHFEtSTLSlphIAccLNlToE-VEKlLuMpsP.GlFuppLQRF.IHLVWDVRDlINDNIKtNGQTPcPYTYLKGEK...EDYWFLc............. 0 9 14 19 +9911 PF10079 DUF2317 Uncharacterized protein conserved in bacteria (DUF2317) COGs, Finn RD, Sammut SJ anon COGs (COG4365) Family Members of this family of hypothetical bacterial proteins have no known function. 24.20 24.20 25.10 24.70 23.70 23.20 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -12.61 0.70 -5.90 54 546 2009-01-15 18:05:59 2007-07-31 09:51:26 4 3 532 0 133 404 329 519.30 36 99.20 CHANGED Mphcplsh.pp...sphhtDYhspppp....lpsFaph..h.p.psapcplsphppph...t...RpsLschLppphpphss.upts.psI-tLtp.pohsVlTGQQhGLFTGPLYolYKllSsIsLA+chcpph.shshVPVFWlAuEDHDa-ElNahhh.t.tt...+Khphp....t..cs.psussphsspslpphlcphhppl....spopasptlhphhpcs...........Y.tpssohs-uFttlhpcLFtchGLlllDusDspL+plttshhpctlpppsshtpslpppppplc.phGhs..lQspscplNLFahp..c.stRptlp..tcsspF..hlpsschpaoccELlchlcscPERFSPNVlhRPlhQEhlLPslAaIGGPGEIuYWupL+psFcthslphPhllPRhohsllpp+hp+hlp+h..........plshpclhhpth.......phhpcph.t.tppsslptshppt+p.......plpptappLtshstphDsohtshlptsptphhctl-tLEc+hh+utcR+pcphlpphpclpspLhPpsu.QERhhNhh.ah..scaG.pllsplhp.hpshtsp.phhlpl .................................................Mchp.lslspp...sphlschhsuc........lpuhFpY.thhpp.puFccRhpc.sstc.......ctsLssslpcYhsc.....L.......p.h...o...ppp.tNIptL.sssohVVluGQQAGLFsGPLYThHKIhSlIpLuK-lp-ph.ptpVVPVFWIAGEDHDaDElNHTalhs.pptpl+Klpacshp....cs..osSchh.s..ptc.h+..phlcphFtphtE....TsaTpsllphhcch...........lcp.....hsoasDhFttLlpchF.....tsa.GllhlDuph.tLR+h..EsPhFKcllp+pptlpcuhcssQpphp.p.GhpshIpscss.lpLFhcp............-..s.RpLlp..h-stpF..hlpcsctsaoc-EllphhEppP.EpFSNNVVTRPLMpEalFsTlAFlGGPuEItYWuELKsVFchhslcMPhVhPRlplThlpc+lp+hLpch..........slshpclhh.s.s.l.......-t.+ppalppptspph.pchcthhc.......phpclapsLhp.ltt.psslp........hlpKNpphhhpQh-hL.++hhhslE+cp-lphcpFccIppsL+PhGu.QERlWNsh.hL..Nca.GhDhhc..h...PhsashpHhllp........................................ 0 48 94 121 +9912 PF10080 DUF2318 Predicted membrane protein (DUF2318) COGs, Finn RD, Sammut SJ anon COGs (COG4393) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 25.40 36.20 23.90 23.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.04 0.72 -10.75 0.72 -4.25 36 409 2009-01-15 18:05:59 2007-07-31 09:51:54 4 3 396 0 77 306 8 104.00 46 25.67 CHANGED plpDGKlH+ata.scsG+plRFFllpcsc...sphsssFDACplC..ucpGYh.pcGspllChsCss+hhlsoIG..psGGCNPlPl...saphcssplhIstssLpsGspaF .......................lcDsKLHRFsY...lus-G....K....slRFFlIp+hs..tsuhsssFDAC.lC..GDtGYh.pcssplIChsCsV+lhlsolG..psG..GCNPIPl.....pachc.spcIsIstpsLtuGssaF..... 0 28 54 70 +9913 PF10081 Abhydrolase_9 DUF2319; Alpha/beta-hydrolase family COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4425) Family This is a family of alpha/beta hydrolases which may function as lipases. This domain is the catalytic domain and includes the catalytic triad and the GXSXG sequence motif which is a characteristic of these enzymes [1]. 26.00 26.00 51.00 35.40 22.10 21.90 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.74 0.70 -5.71 25 395 2009-01-15 18:05:59 2007-07-31 09:52:33 4 5 288 0 111 304 47 266.90 43 52.72 CHANGED IRVYVGLsu.u-os-pRAcLAlcELcRsGAF-RpVLVlsssTGTGWl-PsuhcslEahasGDsAoVuhQYSYLsShLohLs-p-tuhcuAcALF-AVasaWppLPpspRP+LalaG.SLGAhuopsua..shhc.ltcs.hcGALWsGPPh.SphWpplsspRcsGSPthLPhacsGphVRF..ssp.sslst......sss.W.GthRllaLQauSDPlVaasscllaRcPDWlpEPtGs..........DVospl+WhPlVThlQlusDhssusssPsGHGHsYs.pcalDuWsAlssP.....pGWospctpRL+ ...................IRlYAGLsu.........ucsh........pupAchsltELtRTGuacRpVllltssTGoGWlsthsssulEYhhpGDsAhluhQYSaLsShluFLsD+-ssppuucALFpsVhphhp.pLP.p.pRPKLhlhG.SLGSaGGpuuF......sslsshh.up............sDGAlasGsPh.sslWpplptpRc.GSPphlPlhssGcs.VRF..hsps...p.clpp...........s..sPW.sps..RllYlQHsSDPlsWWoPchlapcP.DWh+.E..t..G................DV.PphpWhPhVTFhQVo.ADMs....hAs..ssP...sGH....GHsYt.sphhshWAAVhs......ss.T.............................. 0 26 81 99 +9914 PF10082 DUF2320 Uncharacterized protein conserved in bacteria (DUF2320) COGs, Finn RD, Sammut SJ anon COGs (COG5338) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.78 0.70 -5.74 39 519 2012-10-03 17:14:37 2007-07-31 09:53:37 4 6 416 0 193 493 51 322.80 18 73.62 CHANGED cP.atshG.hhGuFhlpPulphussassNhhp.ssstpusshhplssclphpS-WsRHtlshchpushstatssp.................s..spsshsspssuRlDlscpsplshpsphphtpEspsussshts.s..ptP..l...hpshuushulppchschplsssushp+psYpssph.sus.shsspDRshsphshssRsuYclpPuhpsFsEsphspRhYD...phDpsG......hpRDSsuhtspsGsph-hophlpG-htlGYhp+sacDsphpsloGhshsAsltWpPpphTolphssspplp-o.sssssuuslppshslplcHphpppLsspuphsaspp-Y.psts.....RpDchh.sspsuhsYtl.sRhltlpsplcacppsSsh....sstsastsslhlulplph ............................................................................................................................................................t.............sh.lhPtlp.shthssNh....t....t.................s......p..........s.............t......p......ss.h...h...........thpstlthhsc.h.t.p...p.ph.p.lshph.phtpahssp..........................................p..s...sshth.phphp.h.....-h.s.p.p.p.p.l.sh.phphthtpp..stsps....................s....h.h.......s...................p..................psh..s..ht.hsht.p.p...s...h.h........p.h.p.hshs...hpc...hYts.......sp.....................................p......p.......p........shs..hththphhaph..sth.p..hhph.hsp..p.ac......s..s.............p+s..s...pth...hhhhG..hp.h.p..h.s..tt..hpu..phthG..a.pp....sa.c....s....s.....p....h........ts.h.............s.......uhs.hsstl..pWt..s.....hp.h.oshphhsppth...........pss.....s....s...............s............s..............sh...ht.....pthslsh..s+p....h.p...s.plshpht.....hshtp.csa..psss........................ppDpph..shshs..hsYth..pRhlsl.....phth.pappp.sSsh......sshsappshhhlshp...h............................................................................................................... 0 61 127 157 +9915 PF10083 DUF2321 Uncharacterized protein conserved in bacteria (DUF2321) COGs, Finn RD, Sammut SJ anon COGs (COG4306) Family Members of this family of hypothetical bacterial proteins have no known function. 22.90 22.90 23.40 25.80 22.80 22.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.02 0.71 -4.73 5 34 2009-01-15 18:05:59 2007-07-31 09:54:32 4 3 30 0 8 25 0 131.90 41 91.49 CHANGED MGTYhsAQIChNGHphToussps.ELhpsFCs+CGutTIspCPcCsoPIRGcYYV-GVlulGp-Y-...sPoYC+NCGKsFPWTccuLEAAsELlE-s-sLSsDE+EpFpssLsDLllE..TP+TclussRFKKlluKhuououuulRDIlVDVASEolKKuI ............MshYchAplC.NGH.hssshsps.chhpsFCspsGttTIhpCPpCsssItGc....haV-GVhsh.u..tsap...sPoaCpsCGp.aPWscptltus.pllc.t.tpLss-EhpphppslscLhh-..s.P+TplAsh+aK+hhsKhtsssssuh+-IlVDVhSEssKKtl................ 0 4 8 8 +9916 PF10084 DUF2322 Uncharacterized protein conserved in bacteria (DUF2322) COGs, Finn RD, Sammut SJ anon COGs (COG4390) Family Members of this family of hypothetical bacterial proteins have no known function. 20.70 20.70 20.70 23.00 20.30 16.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.29 0.72 -4.28 17 258 2009-01-15 18:05:59 2007-07-31 09:54:53 4 2 251 0 58 181 35 98.30 50 92.36 CHANGED Fp-hLpsLPul-HLuulslhsupGplltpI.stsGphGSLpVYpALAppasu.lstsAApcGLElFAEHotDA+tpPGKHPNIDhLhpllpps.s.hplp.l ..................FpD.LApLPuIDHLuulclhDus.Gp.lVt.pI.shsGKhGSLplYptLA..ppFsu.LstpAAppGLthFA.EH.ssDA+spPGKHPNIDhLhpllpps.s.Lplcs.h................... 0 11 33 47 +9918 PF10086 DUF2324 Putative membrane peptidase family (DUF2324) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4377) Family This domain, found in various hypothetical bacterial proteins, has no known function. This family appears to be related to the prenyl protease 2 family Pfam:PF02517, suggesting this family may be peptidases. 23.50 23.50 23.50 23.60 23.40 23.30 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.27 0.70 -5.15 28 607 2012-10-01 21:07:14 2007-07-31 09:56:08 4 2 500 0 78 421 23 198.90 38 86.09 CHANGED lhhhlPlh..lhhah+..+....+htlsh+shhlGulsFhlhuh...lL.......Etslphhllp.stssthh...ppPhlaslYuuLsAGlFEEsGRaluh+aLh++ppthss................uluaGlGHGGlE.AlLlGslshlshlhht..hhspGphph.h........tthhttl.t.ltshsshphhLushERlhAlslpluLSllVhhul+..p+.+hh....alhhAIllHAlhDh.......sAlhQsth..hsthhssEsll ...........................................l...llshl......hhah+...K......+hplshts.hhlGslsFhl..sp....lL..............Ep.lHhhllpsptssshh...ppPhla.h...lYG.hhhAulFEEsuRh.lhF+aL.K+hp.tps........................................ulAYGlGHGGlE.hlhl.G.hhohls....hhlhh..sslpsts.p.h...................h.pth....chl.ph...sshphhLhuhERIhAlshQlhLolhV.htAV+...pK....Khh.......aLlhAh.hlHAhhDhh......suLhQsth....lss.hlsEsh.......................... 0 16 34 51 +9919 PF10087 DUF2325 Uncharacterized protein conserved in bacteria (DUF2325) COGs, Finn RD, Sammut SJ anon COGs (COG4378) Family Members of this family of hypothetical bacterial proteins have no known function. 24.20 24.20 24.20 24.60 24.00 24.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -9.99 0.72 -4.16 68 671 2009-01-15 18:05:59 2007-07-31 09:57:41 4 4 610 0 191 488 34 92.50 23 49.82 CHANGED slLllGG..cphsphcphlcc...hGsphhhH....pts.pcppt.lsspl.tsDhVllhTshluHsshppl+ctu++psh..shlhs+.puhs.sltptLpphts ................................sllllGu..cphsph+pllcc.....hGschhha......ptshsppp..lsspl.t.DhllhhTshluHsshppl+ptAK+psl..sllhs..+.pShs.sltptlpph.t....................... 0 70 142 167 +9920 PF10088 DUF2326 Uncharacterized protein conserved in bacteria (DUF2326) COGs, Finn RD, Sammut SJ anon COGs (COG5293) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.80 26.80 27.10 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.92 0.71 -4.55 36 203 2009-01-15 18:05:59 2007-07-31 09:58:16 4 2 190 0 45 195 8 132.60 23 26.30 CHANGED plscplY..spsu.hlshpssppup...aphph.h.......ssstSpGpsptclhC..FDlulhh.stpcp.pt.cFLlHDu.la-slDs+phtphlpllpc.hp..hshQallTl.p-cLst.........phspcphll....lcL..s.spss+LFthph .......................................t.h.pplh...ptts...hphp..sp.p.s.........ap...hph.h.......ssstupGhpphclhs..FDlslhths.tpp.pt.tFLlHDu.lh-slDscphtshlplh.pc.tp........hs...hQhIlol.pDclsp.t.............thp.pp.hl..............lcL.....sp.pspLFthp.................................................... 0 15 36 40 +9922 PF10090 DUF2328 Uncharacterized protein conserved in bacteria (DUF2328) COGs, Finn RD, Sammut SJ anon COGs (COG5385) Family Members of this family of hypothetical bacterial proteins have no known function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.86 0.71 -5.09 59 265 2012-10-11 19:05:54 2007-07-31 09:59:00 4 1 263 0 96 302 75 177.00 35 84.94 CHANGED VGAIsNGLELLs-sss....ptt..phsLlpcSupsAsARl+FhRlAFGAAGs.spplshs-spslhpshhpss.+.....hplsWpssss.hs+stV....+llLNhlllstsAlP+GGpl.pl......spsssthplpupGp+lplcsslhphLsGs...st.pslsupsVQhhhhhhlAppsGtplshphsssplhlss ......................................................VGAIsNGLE.LL--sus....sppt................AhsLI+pSAcs....A.o..ARLp....FsR.lAFGAAGo.....hG.....sp..ID....tu..-Ap....s.lspsah..p...s...p...+......splsWp..sscs....h.l....sKscV...........KLL...LNhlLlupsAlP.RG.GslsVt....h.s.sssss......p..ahlsupGphh..R...ls..schhchlsGs.....p..psl.cA.+sVQsaYs..hLLAccsG.hplslptss-plshs...................................................................... 3 25 57 70 +9923 PF10091 Glycoamylase DUF2329; Putative glucoamylase COGs, Finn RD, Sammut SJ anon COGs (COG5368) Family The structure of UniProt:Q5LIB7 has an alpha/alpha toroid fold and is similar structurally to a number of glucoamylases. Most of these structural homologues are glucoamylases, involved in breaking down complex sugars (e.g. starch). The biologically relevant state is likely to be monomeric. The putative active site is located at the centre of the toroid with a well defined large cavity. 20.90 20.90 24.20 23.60 20.70 19.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.59 0.70 -5.29 79 731 2012-10-03 02:33:51 2007-07-31 10:00:16 4 22 543 4 234 693 90 215.20 30 13.28 CHANGED sah.hhsuEu+ls.hlAl......spucl.pptW.hthG...Rshsshstp..sLhoas.......GshFpahhshLhhc.hs.........ss.h.pss+tssthQhpY....utp.......huh.s.WGlSpSshss.shth.............YpAaGsP.hsh...........c-tVluPaAshhul.hs.PctuhtsLcpLh...t.shhG.......cYGFa-Ahsh.T.tt............t..shlscsahAhcQGhsllhlpNh.psshhhchFtssP .......................................................................................................hhshhhsEuhls.hlAl..........spssl.pptW..h.....phG.....cth....ss.h..stp...tsLho.hu..................Gs.hFpahhshLhhc.hs...............ss..h.pss+tsshtQhpY....ut.p.......huh.s.WGlotSshss.s.t................................Yp.ua.Gs.Pths................cssVIuPaAulhuh.as.PptultslcpLt...t..shhG..........pYGFa-AhshT.............................................shlspsahAhcQGhhllhlpNh.psuhhhchFpssP................... 0 76 157 192 +9924 PF10092 DUF2330 Uncharacterized protein conserved in bacteria (DUF2330) COGs, Finn RD, Sammut SJ anon COGs (COG4402) Family Members of this family of hypothetical bacterial proteins have no known function. 18.70 18.70 21.40 19.40 17.20 17.20 hmmbuild -o /dev/null --hand HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.13 0.70 -5.59 26 145 2009-01-15 18:05:59 2007-07-31 10:00:36 4 6 120 0 43 106 19 275.80 34 66.64 CHANGED ssutAhCGhh...VusuDsplhssup.lllh+DG....pcTslsMp.sYpGc..s-cFAhVVPVPsl...spVplustclF-+l-phoAPRlschacp-sC............tt.t...sh..sssssuu.ussshG............lsV.schpVGtYElllLoup-usGLcsWLcpNGYplPsuApslLpPYlcpGhcFhssKlsssp.........suhspLcPLph+YcSschslPlRLuhlsA.......ssp...-lhlalLusp.RhpssNYppVtl..Nh.h.thhpp..p.h..Yttlhshsht...ssspAshTEYAhssussDPssh...shs.ptLtpLusssls..t.....................................................................................................................................hstpsaLTRLas+h.sspphsEDhhFp ..................................................................................shtshCGhh....Vsttssphh.stp.sllth.DG.......sppslsMp.shpu-......scs.hAh.VVPsPss...spVpsu-..t...phFscLDshoAPhl.c.h.c..s.p...........................t..sss.suustpssu.s..........spVhsphplGsh-sshLous-.sG....LpsW....L.s....c....NG.....Y..t.....l.........sussshLcPYl+pGhtFhsh+.lsssc.......................hhsssLcPlphsa.c..Ss+hhhPhRLuhhst.......psp.clhlahLoc+.R.phsshcssp.......................................................................................................................................................................................................................................................................................................................................................................... 0 19 35 42 +9925 PF10093 DUF2331 Uncharacterized protein conserved in bacteria (DUF2331) COGs, Finn RD, Sammut SJ anon COGs (COG4394) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 82.20 80.90 22.40 22.30 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.27 0.70 -5.67 50 370 2009-01-15 18:05:59 2007-07-31 10:01:19 4 2 355 0 113 360 39 352.20 41 96.25 CHANGED WDIFCpVVDNaGDIGVsWRLA+QLupEaGh.......pVRLWVDDLsuFs+lsPplcsshstQhhsGVplppWsssh..........shssuDVVIEAFAC-LPssalsuM..............stpspsPlWlNLEYLSAEsWV-sCHuLPSPpss.s...LpKaFFFPGFostTGGLL+...EssLhscRpuFpsssss+tshhppL.................G.....l.t.t....ssspllSLFuYEssuLsuLLctap...psspPlpLLVPpGRuhssl..t.....thhs...tshtsGsthp+GsLslplLPFssQpsYD+LLWuCDlNhVRGEDSFVRAQWAu+PFlWHIYPQ--DAHlsKL-AFLshYs.tslsstsssulpshapAWNsssssss.........Wtshtsths.....tlpp....+AppWstphhspsDLAspLlpFsps ....................................hDIFCpVlDNaGDIGVsWRLARpLsp.E.h.Gh.......pVRLWlDDlsuhtplsPshss...h..s.Qhh....pGlplppWpssh...........sshsssDVVIEAFAC-LPtsalpth...............tppptPlWlNLEYLSAE-WVpphHhhPSPpss.s...LpKaFFFPGFottTGG...LL+....EpsL..htpRps.a.pt.....s..ttppshhppl............G.....l........t.....ssshhlSLFuYcsss...lsshLcthp...........pu...s.......psstLLlstGphhssl...tp.h.hs...........t.htsGshhppusLplthLPFlsQpcaDcLLWssDhNhVRGEDSFVRAQWAu+..........PFlWHIYPQ--ssHLsKL-AFLst.hp...s.ths.......t.s..s....s....uhpphapsWNssts..s...........Wpshhpphs.....thpp....pApp.WsppLhsps.D..LuspLspFhp............................................................................ 0 20 56 86 +9926 PF10094 DUF2332 Uncharacterized protein conserved in bacteria (DUF2332) COGs, Finn RD, Sammut SJ anon COGs (COG4427) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 25.50 25.40 21.80 21.70 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.11 0.70 -5.47 37 294 2009-01-15 18:05:59 2007-07-31 10:01:55 4 2 278 0 75 245 44 317.90 35 96.49 CHANGED ltpsFppQActstphuSPhhucLhttlusphp.stshsupthhsatst..stupulPLRLhGuLHtLlLp.GpsstLsthYsst..pssstsh...sslppsltsapstlhshLcss.PQTNEVtRuAsLhsuhhhlupph.stPltLhElGuSAGLNLhhDRYtaphsss.p...h........GstsusltlsscWpGs...PP..hs...plspRtGhDLsPlDhsssssthpLpuYlWP-p.-RhtchcuAlulstp...ssp..l.cuDAh-hltptL...ttt.psshpllaHolshQYhPstpppthpstlpthGupAs.ppPLsalshE..spt..ssh.sttLp.......lchWssG....psphLucschHGpWlcWt ................................h..phhRp.uhhpspuuSshYptLshtlApD.-...shslhuth...........t.GpslP.hLhGulHhLl..Ls.G..+t..LtpaYsohs...p.........sspsh.........sthpchsppht-plhs.h.Lppt..sQTNEVtRsAhLhsuhthl.......hpph....ptPlsLlElGsSAGLpLhhDpYpYpY....s..ss..p.....a...........Gs.hsSsVp.l....sst..h...p...Gp......sP..hhp.......splVcRhGlDLpPhDlpss-chLhLpuhlWP-..pp.-.RlchhctAhuls+pt.......ssp.......LhcuDus.chLsshh....ttls...c-.AlhslaHohshp.lPt.ph.+ttlcttlptluAp....p..slh+Lh..............ss..cttl+.........................hcha.sG......htcslGcscsHGphhsWt................................................... 0 29 59 66 +9927 PF10095 DUF2333 Uncharacterized protein conserved in bacteria (DUF2333) COGs, Finn RD, Sammut SJ anon COGs (COG5345) Family Members of this family of hypothetical bacterial proteins have no known function. 20.40 20.40 20.90 20.60 18.50 20.30 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.97 0.70 -5.79 23 372 2009-01-15 18:05:59 2007-07-31 10:02:50 4 2 281 0 102 291 132 236.90 34 85.16 CHANGED a........................tt...h+hlssshslhlllhhhlGhYWSpEPssFsVpspApptA..............ptpp.....pplVsGYTTTsTLIcls-TLLsKPGGYLSNDlhPPGLahDNMPuWEYGVLlQlRDLu+AhR+-FSRSQSQSsEDtDLshAEPpFNFDscSWhlPSoESEY+cGIctLppYhsRLs-sspssAQFYARADNLpsWLpcVppRLGSLSQRLSASVGpc+lNssLAG-ssu.ppuo.sssphtlKTsWhcIDDVFYEARGuuWALlHhL+AlEVDFuDVLcKKNApVSLpQIIRELEATQpolWSPhILNGSGFGlLANHSLVMANYISRANAAlIDLRpLLuQG ..............................................................................................psh....................................................................................................lsph.LL..K..Gha.ls.D.h.hP...a..hDNhsuaphGh..tlRchuhthhcshuR.pupSt.ct-LttAp.sp.h.......phsp......pu...Wh.......h.......P......p........s.u.YppulctLpta.scLt..........p..tpA.F.uRADNL.paltpltpcLG.ShSthLp................................................t......cssWhp..h..DshFa.uhGthaAh.thhpAhc.DFtpVLtcKph.s.shpph.+pLcss.t..hpP.hl.l.NG.s..s.u.h.h.ssH.hsMu.Yl.Rspusll-lpplLp....................... 0 26 55 78 +9928 PF10096 DUF2334 Uncharacterized protein conserved in bacteria (DUF2334) COGs, Finn RD, Sammut SJ anon COGs (COG5298) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.30 25.30 25.50 25.40 25.20 25.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.56 0.70 -4.95 40 444 2012-10-03 16:37:10 2007-07-31 10:03:24 4 6 384 0 94 382 96 229.30 23 48.78 CHANGED pshlRlEDl.os.....h.ssh.cpL+pls-hlhcpslP..ahlusIPsahsspsth.........ltpssphlpsL+ahps+GGpIh.....LHGYTHph......hsthoG.shEhht........................................pp.thhtp+lpp.ulphhsp.sl.hsssac....uP+Yshoppsh.phhpphFshhhtphthttssh.......................................lpps....t.hhhPpslshl-tsp.............tp..pttphtptspsshsuhFaHsah....h......suhp..........Lpcllsth ..................................shlplcDVsP........h..tshsplctls-hLhc.p.s.lP..ahluslPsatssptth....................ht.t.phsphL+hht.s.cGusIh.....hHGYsHph.................t.su..hc....h...ht......................................sp.phhpp+.lpp.ulp.hsc..t.....pl...hP...huap....APpYthsppsh..ch.h.tph.hs..s.hh.tp...h.tpsh.........................................htp..........hhsp.sht.lp.pp..................t........ht.hthhp.ps..h.shahash..............tht.........l.p.ht..h........................................................................................................... 0 32 54 75 +9929 PF10097 DUF2335 Predicted membrane protein (DUF2335) COGs, Finn RD, Sammut SJ anon COGs (COG5346) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.70 26.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.14 0.72 -4.34 15 168 2009-01-15 18:05:59 2007-07-31 10:03:47 4 1 163 0 32 110 0 48.60 33 35.58 CHANGED PsPslLpcY-pIlPGsA-RIlsMAE+EpcHRHch-ppt.......lctpp+cs+hGQ ..........PPPshLppYcpIhPusAc+lhpMAE+EQs..HRHphp.t............lc.ptp.s+hsp............... 0 10 16 23 +9930 PF10098 DUF2336 Uncharacterized protein conserved in bacteria (DUF2336) COGs, Finn RD, Sammut SJ anon COGs (COG5330) Family Members of this family of hypothetical bacterial proteins have no known function. 27.80 27.80 27.80 27.80 27.60 27.70 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.19 0.70 -5.15 75 459 2009-09-11 00:20:12 2007-07-31 10:04:31 4 4 166 0 204 448 45 213.00 19 64.39 CHANGED VAtPlLppSshLsDsDLl-lspstu.psHhhAIApRtslotsVo-sLlctGstslltpLlpNsGAcloptuhppllcRutpssslttslstRs-LPsshhppLlstsucsl+pplht.ptt.hstpplppsh.....pcutspuss..thsststp.t.spthltpLcpsGc...Ls............suhlhphsppschs.hssAlutLuslshpplcplltcspsculhsLs+ssGLshsshtslhtthtthpph.sh.shpp.............hhspacpl.ssssApplLphh.......c ...............................................................................................................................................................lAtslLtpS..hLp-tsLlphhtpts..t.phhAlutR.tls..lssslhthst..sh..h...htN.tu..hs..sh..hhtt.tt........lt..hh.c.tls.thh..lh....hhs.ttht....t.h............t.htthh...........................tthtt....th...................t..t....h.tthlttlpt.ttp...hp.........................thlhphhhtt..pht.h.tulu.hsshs.thspthh.p.t.tslhhhhcthuhs.thh.hhhhhh....t....t.....thtt.............................h.t.htth...t.....h................................................................ 0 59 108 138 +9931 PF10099 RskA DUF2337; Anti-sigma-K factor rskA COGs, Finn RD, Sammut SJ anon COGs (COG5343) Family This domain, formerly known as DUF2337, is the anti-sigma-K factor, RskA. In Mycobacterium tuberculosis the protein positively regulates expression of the antigenic proteins MPB70 and MPB83 [1]. 26.00 26.00 26.10 26.10 25.90 25.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.15 0.71 -4.53 90 784 2009-01-15 18:05:59 2007-07-31 10:05:33 4 6 638 0 335 801 62 181.70 20 72.39 CHANGED PssplaspI...ptpltspst...........................t.hh...phth...Wpthuh......uusAsAsllslshhhs.............................................ssssshlAslsss.......sussshllphc.....psppl.lhshssssss.ss+shpLWhlsss..ssPhSLGllssssss.h..slssshtsthss....LAlolEP.GGSP.sutPTGsVlhpGtl ...........................................................................................................................Psstlhpp.l......ttlt.ttt...........................................................................................hhp..........phph..............W..+.hhs.h.......susAsusllssshhhh.................................................................................ttshs.sss.h.h.s.ss.hsst.............ssss..s..h...ll..h..h...s...........................pspt.....l...lh.....s.hsh.s.s......ss...sspshplWhl...............sst....ssPhShG.l.h.ss.ssss.h........hlshshhssht..........lulTlEP..tGGSs....pPT.ushlh.h....................................................... 0 106 225 294 +9932 PF10100 DUF2338 Uncharacterized protein conserved in bacteria (DUF2338) COGs, Finn RD, Sammut SJ anon COGs (COG4408) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 40.00 40.00 24.10 22.50 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.54 0.70 -6.12 6 271 2009-09-11 14:21:54 2007-07-31 10:06:33 4 1 260 0 13 118 0 418.70 62 99.03 CHANGED hsplLllGoGPsAIQLAlshptHusscluhsuR..sSp+ScRla-AlsptsphhpVuhQNstHpphpGpssIDphaKchcslps-W-TllLuVTADuYhsVLpQlshcsLppVKslILlSPThGSshLVpphLsshsp.-uEVISFSTYhGDTRhlDcp.....................tPp+VLTTuVK++lYlGSo..cusSpplp+LstlhsplsIplpshsoPLcAEsRNlSLYVHPPLFMNDFSLpAIFpsppsPtYVYKLaPEGPIT.slI+cMtthWcEhMplLp+hslpslNLL+FMsDDNYPV+spolsRtDIEsFspLssI+QEYLLYVRYTALLIDPFSpPDEpG+YFDFSAVPa++VapscpulhcIPRMPpEDYYRlthIQulAptLslssPplDphLppYEsulppahDsptcpphSsuF.spsFcpDhslIsppl..psps .........MSKlLMlGTGPVAIQLAslChL+uDacIDMVGRupoScKSKRLYQAYKK.-+pFEVKlQNEAHQaLEGKF-Is+LYKDVKsV+GEY-TVVhACTADAYYDsLQQLSLETLQuVKHVILISPTFGSpMlVcQFhSKhsp.DIEVISFSTYLGDTRIlDKE.....................sPN+VLTTGVKKKLYhGST....HSNSshspRIoALhEpLKIQLEVl-SPLHAETRNSSLYVHPPLFMNDFSLKAIFEG.TcVPVYVYKLFPEGPITMTLIREMRLMWKEMMsILpth+VPSVNLLpFMVKENYPVRPETLcEuDIE+FEhLPcIhQEYLLYVRYTAILIDPFSpPDEsG+YFDFSAVPFKpVYKNEQcVlpIPRMPSEDYYRTAMIQaIG+hLGlpTPMIDpFLsRYEuospuYpDhH.p-QpLSuQFssshFEtDtALVTKaLc...lNch.................. 0 6 11 13 +9933 PF10101 DUF2339 Predicted membrane protein (DUF2339) COGs, Finn RD, Sammut SJ anon COGs (COG5373) Family This domain, found in various hypothetical bacterial proteins, has no known function. 29.50 29.50 30.70 29.50 28.50 29.30 hmmbuild -o /dev/null HMM SEED 745 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.61 0.70 -13.65 0.70 -6.25 47 995 2009-01-15 18:05:59 2007-07-31 10:08:10 4 4 648 0 278 982 83 443.90 16 80.54 CHANGED lutNhhlplGhlsLhlGssFLl+Yuh-psh..ls.thRlshuhlsGluLlusG.hLRc+............t.shhuhsLpGGGhuslaholhu..AhphYpllssssAF.slhsllsshsssLAlhppu.hLAslGllGGassPlll.SoGsushhsLauYhsllshuhhuluhh+tWchLshhuhshohhhhhhhh....h..ht....h.hssspshhhlaah..lalh.lslhhs..hptt.th.........................hs...ssLlhussllshsLththsp............hhuh.u.lhhuhlhhsls.hhlh+p..thphhhhshs.......................................hlslulsFssLulsluh.ssphpsh.......hWAlpushh........th...........s..lhllsuhhhhhs....................p..hhsshh...lshsshsusahhhthpttttttt................hhhhhuhhhhhhu..hhhtlppt.....................hhhhhsh.shhhhhhh....h..hth.hhhh.shhhh.h................hhh.h...............hth.hhsa..........shshhhhhhhhhhhtt...............................thhhhhhh.hhh.shhhhhth...........shhpthlhshhhhshhhhhh....................h...........shhhhhhh.lhh.hhhhhhhts.hhhs.....slshhPlhN.L.l.hhh.shshlhhhhtp...............hhthshhshhuhhsh..lhlsh.lt+hhpt.thh....................htt.h....phuaSlhWhlhulshhhhuhp...+p.pchlhhsGhuLlulsllKlFllDhuslsslhRllSFlulGllLLsluhhYpphhs ....................................................hhhh.hhhuh..hh.h..t.....hs..h+h.hhhh.u.hhhhhu....h................................................h.s....h..uhuh..h.hh.hhh......h.......h..........h.....h.s..........h..hh.hh..hhuh....t..hs..h.s.....h.s........s..hh....................h..a..hh..h...h.............h...h.........h....h.....................................................................h.h.........h..h........................................................................................................hh...........................................hh..h.h.h.hhh.........t..............................................................h.hh.hhh.h..h.h..h.h.h........................................hh....h.shhh...........................................h.....h..h.....h.h.h........................................................h.............hhhhhhh...hhh..h.................................................................hh.hh..h.h.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.shhh.hh.uh..hhhhhu.....................hh..h....h......uhhhhhhshhKhhhh-h.t...h.+hh.ahhhG..hhhhu........h.................................................... 0 90 179 227 +9934 PF10102 DUF2341 Domain of unknown function (DUF2341) COGs, Finn RD, Sammut SJ anon COGs (COG5306) Family Members of this family are found in various bacterial proteins, including MotA/TolQ/ExbB proton channels and other transport proteins. The exact function of this set of domains has not, as yet, been determined. 23.40 23.40 23.50 23.50 23.00 23.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.15 0.72 -3.83 55 186 2009-01-15 18:05:59 2007-07-31 10:12:46 4 24 122 0 107 186 82 87.20 28 12.34 CHANGED scsuDlRFh....sssssp.LsYWlEphcs.....ssp.pAhlWV+lsth.......ssssIhlYYGNssAsssususss......F..hFDsFps...............shaphspssusss ...........suuDlRFs......sscpss..LsaalEpacs......tss..hAllWV+lPpl.s........spsslahYYG..N.s.....s.Ass..s..u..supss..........................FDs.ps................haphstt....s................................ 0 33 58 85 +9935 PF10103 DUF2342 Uncharacterised conserved protein (DUF2342) COGs, Finn RD, Sammut SJ anon COGs (COG5282) Family Members of this family of bacterial hypothetical and uncharacterised proteins have no known function. 25.00 25.00 30.50 27.60 21.90 24.50 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.82 0.70 -5.48 44 734 2009-01-15 18:05:59 2007-07-31 10:13:14 4 2 468 1 237 622 191 326.50 32 80.26 CHANGED V.sWcl.AtphApphs..us.......ssssspsststltpuschA-hhlcssTsLsssss......psplhsRscWlcsshssapplscPlspphspthts.....................................hhstluGthhGsQlGtsLuhLuscVLuphDhslshs...............ssGphhLVssNlsphtctLsls.c-hRlalsl+EssHpt.FsusPWLpsalhstlcpaupslshD..s....splpchscphp.s...sspphpph......tuhhcs.postQ+tALsRLpslhuLlEGas-hVhsssssthlPususlpcphccRRt.suGPs-phhtpLlGL-h+.+phccutshhctVsspsGhcstssVWscPDhLPsss-l-sPtsal ..........................................VsashAtphApphs....us.............s...sss.....sttttstltcAschA-halcssTtl.....sssss.....................pstshsRspWlcsshssappls......sPlspp..hstshss.hs.t..............................................hhpthuuhhhGsQlGpuLu.LusplluphDhulsls.............................ssuphsLlssNltshtpsL.clssp-htlalsl+EsAHpp.FstVPWLtsplhutlptaupshplDh.............stlc-hhpphss..............sPp...t.h......pphhp...............tsl.hpshpTstQ.ctALpRL-sLhuLlEGas-tVhstsssshlPusstlpEthcRRRA..su......GP...s.......EphhtsLlGL-h+s+ph+-ussha.ctlspt.....h......GhcutstlWscPDhLPsss-l-sPttal.................. 0 72 179 222 +9936 PF10104 Brr6_like_C_C DUF2343; Di-sulfide bridge nucleocytoplasmic transport domain COGs, KOGS, Finn RD, Sammut SJ, Coggill PC anon COGs (COG5085), KOGs (KOG4503) Family Brr6_like_C_C is the highly conserved C-terminal region of a group of proteins found in fungi. It carries four highly conserved cysteine residues. It is suggested that members of the family interact with each other via di-sulfide bridges to form a complex which is involved in nucleocytoplasmic transport [1]. Brr6 in yeast is an essential integral membrane protein of the NE-ER, wit two predicted transmembrane domains, and is a dosage suppressor of Apq12, Pfam:PF12716 [2]. 25.00 25.00 31.90 31.00 21.30 20.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.86 0.71 -4.65 41 216 2009-12-22 12:56:39 2007-07-31 10:13:49 4 3 159 0 165 210 0 130.40 30 33.73 CHANGED lstYLQLhhNhllhslllallhthhpsl+pDlpp+hcppptplhtEhppCpcpYhpNpC.s....spp.lPAL-ptCppWccCM......sp...Dsphh.spsph.oAcslucIlNuFlcsloaKohlhllhhhhhhlhssNhshG.hR ............ttYlQLhhNhhlsslhlallht.hlhsl+pDlppphcptptclhtEhstCpcpYhpNcC..ts...spc.lPAL-phCspWccCM......sp.....DPstl...tpsp...h.oAchlA-IlNuFl-slSaKohh....hlhhhlhshhhsoNhshu.hR.............. 0 49 97 145 +9937 PF10105 DUF2344 Uncharacterized protein conserved in bacteria (DUF2344) COGs, Finn RD, Sammut SJ anon COGs (COG5011) Family This domain, found in various hypothetical bacterial proteins and Radical Sam domain proteins, has no known function. 28.90 28.90 29.30 29.60 28.20 28.40 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.99 0.71 -4.80 74 639 2009-09-14 16:02:11 2007-07-31 10:14:24 4 5 634 0 218 549 66 181.20 28 45.89 CHANGED RlRl+auKpGph+alSHLDlhRhh-RAlRRuslPluaStGFpP+P+lohAsALslGspSpuEahDl-L....pp..sls.scphhppLstsLPtGlcllpsppl........hpssulsshlssupYplpl...........ss.pphpp..tlcphlstcpl.hp+p.s.K+G+p......phDlRstlhplplpstt...............h.l.h.thhpsssssl+P .................................+lRl+asKpGph+FlSHLDhhRhFpRAlRRAplPluaSp.G.Fs.PHP+lSa.AsALslGssSpuEYh..-lpL................p.c....sh.s.s...p.....c.......lhppLspt.hPs.Glcllcstch........tptssh.ts...hlsuupYtlph.......................ths.tphtp....tlps.hhst.c.p.l.l.p+to......K.pGp+.............phDl+shl.hp.lphtttp....................l.h.................................................................................... 0 113 185 210 +9938 PF10106 DUF2345 Uncharacterized protein conserved in bacteria (DUF2345) COGs, Finn RD, Sammut SJ anon COGs (COG4253) Family Members of this family are found in various bacterial hypothetical proteins, as well as Rhs element Vgr proteins. 25.50 25.50 25.50 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.78 0.71 -4.62 93 1108 2009-01-15 18:05:59 2007-07-31 10:15:10 4 20 365 0 273 1206 46 145.80 28 19.47 CHANGED usssssssth.thspPhllluuPAGIuhsTspohphsAupp.lslsuGpsspluuGpshhssuucslSLFA...........ppsGl+LhAupGplplQAp.s-slplhApcslplsS......spsclplsAppclhLsuGGu.al+lp.uGsIphssPGphplKuAs+shsGPsu ..........................................................................t.....ttthsthtpshllhuuPtGluhsosps....h..phsA.upp..lthsuG.pshsl.ussps.hhhssu.pslolFu......................pptG.......l+lhAu.pG.......plplQAQ.s-.shpl.hA.pcs..lplsS.................s.psclpl...s.Apcc.IhLsuGGu.aIpl.p..susIphsss.GphhhKuuth.h.Gst.................................. 0 26 89 172 +9939 PF10107 Endonuc_Holl Endonuclease related to archaeal Holliday junction resolvase COGs, Finn RD, Sammut SJ anon COGs (COG4741) Family This domain is found in various predicted bacterial endonucleases which are distantly related to archaeal Holliday junction resolvases. 19.40 19.40 19.50 72.80 19.00 19.30 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.79 0.71 -4.53 15 76 2012-10-11 20:44:46 2007-07-31 10:32:13 4 2 76 0 29 67 31 142.10 41 87.63 CHANGED lLllsIlsLhhphhplptch-p+tpphacpWpp..tt............ct....ct-sthppWppccEcclR+-uVp+S+uVltG+loEQLsPaF..P-FcYsPpDARFLGoPVDaIVFcGLo-......G.shpcIVFVEVKTGKsupLocRE+tlR-sI-sG+VpaEll+hc ............................................................................................pphhuclch.+pptltpspKcuVspSRAVLpG+huEQhAPhL..PEFpY.PoDA+FlGsPVDaIlFDGho-.......u.cshpIlhl-VKSG.supLocsppuItpAIccG+VRaEslRl.... 0 11 19 26 +9940 PF10108 DNA_pol_B_exo2 Exon_PolB; Predicted 3'-5' exonuclease related to the exonuclease domain of PolB COGs, Finn RD, Sammut SJ anon COGs (COG3298) Family This domain is found in various prokaryotic 3'-5' exonucleases and hypothetical proteins. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.51 0.70 -4.80 36 607 2012-10-03 01:22:09 2007-07-31 10:32:51 4 3 575 0 155 1077 343 200.10 36 78.28 CHANGED s-FL.hhhH+lVuISsl.hc.....ctspF.+Vhsl...us.sts......EpEllppFachlc..chp.PpLVSaNGpuFDLPVLcYRAlhaulsAspaa-tG-........hKWN..NYhsRYpsc.HhDLhDlLutat.spAsssLDplAslhGhPGKhGhcGupVhchapsGclpc......I+cYCEoDVlNTYLlYLRaphhcGplot-sYtpplphlpshL..ppp....s.......ps.+..hptFLpsW ..........................................................-Fh.h..ha+IluIuslhhp..................s.spF....+Vts.l..........st....p..s............................Ecpllp...pF....a....ph....l-..........ca..p....P...p.....L.V..S.aNGpsFDhPlLhhR.....A..L......h.a...s.....l..s....As..p...a.a-.h.sp.........................................K.WN......NY.h.s..R....a......p..p....p.........H........hDL.....h-....h..L..u.....h.......a...t.......sp..t....shsLD..sl..s.p..h..hG..l...P...G...K.....h...s...hcGs....pV....h..c.h.a.h......p...s..c...lc.c.........I.c.sYCEoDVlNT..Yhl...aL+aplh+Gt.lstcp.Y...lt.hpphL...tp..pts..aht.ahts............................................................................................... 0 53 109 133 +9941 PF10109 FluMu_gp41 Mu-like prophage FluMu protein gp41 COGs, Finn RD, Sammut SJ, Bateman A anon Jackhmmer:Q1GH51.1(1-98) Family Members of this family of prokaryotic proteins include various gp41 proteins and related sequences [1]. 25.00 25.00 25.10 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.72 0.72 -4.06 114 1089 2009-01-15 18:05:59 2007-07-31 10:35:28 4 3 785 0 169 674 13 81.60 26 78.61 CHANGED slpL.ppP...hph.sG...pp.....l...splsl.RcPpstDhcshp.........t.sp....ssc....tp.....htl....luplsu..ls.c-lppLshsDasplptthsshht ......................lpLppP...lptst....pp.........l...spl..sL...+cP..s.stsl+usp..........................shts........-ss....th.............hpl......lsplos...lstp-l..sphch.sDhtsltttlhsFh.h.......................... 0 32 80 130 +9942 PF10110 GPDPase_memb Membrane domain of glycerophosphoryl diester phosphodiesterase COGs, Finn RD, Sammut SJ anon COGs (COG4781) Family Members of this family comprise the membrane domain of the prokaryotic enzyme glycerophosphoryl diester phosphodiesterase. 28.90 28.90 28.90 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.67 0.71 -4.78 31 1075 2009-09-11 03:27:55 2007-07-31 10:36:16 4 4 932 0 95 639 6 147.50 28 26.66 CHANGED llYhpFuhLllshhplhtpp.ho....hcplhpp.shpplpplpssshlahlhYhlLllPFu..slshposLLsKlpIPpFI....h-alhpshhh.hlhhhlhhllhhalulRLlasLPlhllppp.sh+pAl+pSWphT+p.phhphlhhhlllhhhlsh.l ......................................llahphuhLlhh.shthh.pcp.hhp....h+plh+p.shth.l++.lhshph.lahhhYh.h.....lllPhh..shhhp.os..llspIhIPpFI......hs..L...hc.shhh...hl.....shhhhhlll........h.....YlslRLlF.s.LP.hhllccp.shpcAh+hShphTK+....pha+llhhhlhlhhhlh..hh....................................... 0 31 64 77 +9943 PF10111 Glyco_tranf_2_2 GlycosTransf; Glycosyltransferase like family 2 COGs, Finn RD, Sammut SJ anon COGs (COG4092) Family Members of this family of prokaryotic proteins include putative glucosyltransferase, which are involved in bacterial capsule biosynthesis [1][2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.65 0.70 -5.32 12 436 2012-10-03 05:28:31 2007-07-31 10:37:56 4 22 388 0 82 19422 6862 205.20 17 53.59 CHANGED SlIIPlthutt...l.c+l...Ltptpph..s.shpllls-ssssh...ppltplhsp.tth.hlpppstpphhu.utsRNtusphu....pochlhFhDlDshhs.shhpphlp.....tlpps.sthhhlPshYLsppuspthhp..t.h.cpthhcshhs.ttshht........hu..o.ushlhp+chFltlGGaDEsFhGaGhEDhEhhhR...Lthhthth..scshhh.chtphshh.........pGFRtaFthhuhshh...thhhh.HhaHppsptpsYh..pppttNcthh.pph+.h ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...............h......................t...........u....h.........h...p...N.........h....u...h.....t.........s...................p.s..p....h.....l.h..h.h..D....s....D...h....h.......p.......p......h...t...p...hhp.................................h.t.....t....t...............................h........h....h.......h................h........h..........h..............s......t...........................t........s..........p......................h......h.......p.................................................................t................................h....................p........p.....h......h.....t...................t.......h...h.t.........................................................hh..s........h......o.....u.......s.....h......h.....h..p....+.p.pa.....h....p.....l......G...........G.......a...-..E...s...a............h....s......a.....G.......h.......E...D.h...D..h..h..h..R.........L..h.t..h..s...h.......h........................................................................................................................................................................................h........................................................................................................................................................................ 0 28 53 67 +9944 PF10112 Halogen_Hydrol 5-bromo-4-chloroindolyl phosphate hydrolysis protein COGs, Finn RD, Sammut SJ anon COGs (COG4915) Family Members of this family of prokaryotic proteins mediate the hydrolysis of 5-bromo-4-chloroindolyl phosphate bonds. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.96 0.71 -4.91 20 680 2009-01-15 18:05:59 2007-07-31 10:38:18 4 3 601 0 75 370 10 187.00 27 78.74 CHANGED M+hhhphll.hshlGhshhslshhsshhuhs.sFll.uhlluhuuhhssahsspphspt+.............alccpGLocc-YcYl+psLcEA+pKIppLpKshhpl+slpshcpsp-lh+lu+pIaptl+p-Pp+FacscpFaYp+LDslV-LoEKYshLsppPhKst-hppsLccTRhTlccLscslppDLpclls-Dl-sLDhEl-lAK .................................................................................................................................................................hu..hhhlh.hhhhh.............h.....sh.l..hhs.h.hh.h..h..h..h......h.pphtptK....................................ahpphGLo.cp.-.h..c.h...h+pplscs.+...pplptlpcshs.ph+sl+......s....h.c.plschhplu+uI..a.pplcpcP.pph.hcscpFhYs+lssslcLs-pYscltc..pshK.spphpppLcp.o.ctslDplpcsltsDhccLhp-Dh-cLDhEhclsK................ 0 24 48 61 +9945 PF10113 Fibrillarin_2 Fibrillarin-like archaeal protein COGs, Finn RD, Sammut SJ anon COGs (COG4018) Family Members of this family of proteins include archaeal fibrillarin homologs. 25.00 25.00 534.40 534.20 18.10 17.60 hmmbuild -o /dev/null HMM SEED 505 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.56 0.70 -6.16 5 30 2009-01-15 18:05:59 2007-07-31 10:39:21 4 1 29 0 19 29 1 505.70 56 99.96 CHANGED M+DLI+EAlNDLDAAhEL+KLhhp.......tEpslpEVVDAVsDLScEEtpKLGusFRRFPLGCDLlEIuVGPCASDLohsslLuNClLADpMGhPIHVCAYAlADIAEsaGM+PIELh+EVhENVEVPLDLDHFGRYGPMRFP+-ITuCtGsCYhEGPPFKGCPR-RIH+RLIDKE+EtusDhEEWlKLASSVCVNVTsEQG..A-sHAAPL-EMc-VAEsARKaGKGVEAIhHVGDGYDDLIoGlKAul.DlcVDVFVlEGGPFNRAcDRLcAFA+AVAluRILVPGKVVATNGAYEDECRVGLRAGLNuILTGFP+NHHGYMCGYSPGTARRGpFGLPRVL+IMKEEl.uc.slTtsPlsK-pLEALARAs+FLGsN...hVYPpcIGshYlGDAHW.AuLssoslac+s+lsKTV--l....tst.suDTVALLGGRFlSWuIAcKLDtl.V-EllISDsDPWVE+ATV+lLs-ELcsslaussGDD+KAlEpADsSlITTsIPpIut+IupKhsDAlT..Ll M+-LIK-AlN.DhDuAhELpKh..........t-pDllElVDAVsDLShE-h.KLGusF++FPLGCDLsElsVGsCASsLsLhplltNClLoDhhGhPIHlCAYAlADIAE+cGhpPlEVM+cVh-sV-VPLDLDHFG+aGPMRFPK-ITtChG-CYhpGPPacGCPR-RIHKRLI-KEKEtusEh--WlKLSSoVsVNlspEQG..uEsHAAPL-Ehc-VAchA+KaGKGlEuIhalGDGYDDLIoGlcAsl.DlsVDVFVlEGGPFNRAKDRLcAFAKAVAsSRILV.GtVVATNGAYEDECRlGLRSGLNsIloGFPtNHHGYMCGYoPGTARRGNFGLPRVh+IhKEEl.ts.slshs.lsK-.LcALApusKFLshp....IYPp.pIGsaalGDAHW.sslpsSslYcplp..lsKTl--l....tpt..ss-KVulLGGRalSWuIAccLc...s-ElhISDsDPWVE+sTV+lLs-s.shssassNGsD+cAhcpADpolIoohIPpIshKIpsKh..-Ahsl......... 0 5 9 15 +9946 PF10114 PocR Hist_Kin_Sens; Sensory domain found in PocR COGs, Finn RD, Sammut SJ, Anantharaman V anon COGs (COG4936) Domain PocR, a ligand binding domain, has a novel variant of the PAS-like Fold [1][2]. Evidence suggests that it binds small hydrocarbon derivatives such as 1,3-propanediol [1]. 20.70 20.70 20.70 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.01 0.71 -5.11 71 805 2012-10-04 01:10:46 2007-07-31 10:40:04 4 113 595 0 236 604 20 166.20 26 39.32 CHANGED lp-l.lDlptlQpl.-sFschs.tlshsllD.hcGpsl..tss..sapchC.chhRspspspcpChcSDtphu.puht...G.c.tlY+C.+sGlhDhAsPIhlsGpalGslhsGQhhhccps.-h.hhpptup.chGhsccp...altAhccVPhloccplpsshphhhplushluphuhpplphtp .....................................................................hls.chlpcltpsFupuT.slAhll.....Vc...hc.........Gp.........l...o..chs.......sa..s.sFC..phh..Rp..ps....p...tp.p.p.Cpp...s.Dtp.u.G.hc...As+..............ss...pP.hI.Y+C.HsG.Ls.DhulPlll.ss.p.hlGhl.ls...GQVh.h...p.s..s..sc.......t..h........h...p..h.........s........c........s....h.ptc........hhpt..hpplPhhshp.cltssuphLthlsp..hhptthp......t......................................................... 1 116 193 212 +9947 PF10115 HlyU Transcriptional activator HlyU COGs, Finn RD, Sammut SJ anon COGs (COG5453) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. One of the sequences in this family corresponds to the transcriptional activator HlyU, indicating a possible similar role in other members. 21.30 21.30 21.30 21.40 20.80 21.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.09 0.72 -4.15 31 240 2009-01-15 18:05:59 2007-07-31 10:40:28 4 1 238 0 48 133 8 92.50 49 97.45 CHANGED M...uhhs+LF....Guuptst.......pscP....-YcGFhIhspPhpEuGQaRlAupIpKcl.sGEh.KsHcFIRuDllsoc--Ap-hsl+KA+hhIDQpG-plF ................MuhhsRLF.......Guspt.sEss........psEPh.....EYKGFhIhspPhu.EuGQaRVAG+IoK.pI..sGEl..KsHRFIRuDlhs.ucpDAt-.lhL+Kup.hIDp.GspIF.......... 0 9 23 33 +9948 PF10116 Host_attach Protein required for attachment to host cells COGs, Finn RD, Sammut SJ anon COGs (COG5622) Family Members of this family of bacterial proteins are required for the attachment of the bacterium to host cells [1][2]. 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.28 0.71 -3.95 79 395 2009-01-15 18:05:59 2007-07-31 10:41:06 4 4 363 0 166 378 47 128.90 23 82.09 CHANGED sallVADup+Aplhpspus.....tt.s.h.tltph.c.psp.tsp-lss.....D.psGc.hpstst..t..............ut.tps-h+phpccpFAcplAshLpc..ttppsch.ccLlllAsPphLGtLRppLppplpptl.hsElsKDLTp.t.....ssp-lpptl ..........................................hllVsDuppAhlhpstst.........th...h...ht...ph.p.pt...p.t..spphsss.psGp..t................................sh.tpss.....pchp..ccpFA+plAcpLpc..t.spp.tca.cc.LlllAsPp....hLGtlRppLsptlp.phl.ht-lsKDLsp.h.shpclpctl........................................ 0 49 98 125 +9949 PF10117 McrBC McrBC 5-methylcytosine restriction system component COGs, Finn RD, Sammut SJ anon COGs (COG4268) Family Members of this family of bacterial proteins modify the specificity of mcrB restriction by expanding the range of modified sequences restricted [1][2]. 20.90 20.90 21.10 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -5.51 40 909 2012-10-11 20:44:46 2007-07-31 10:41:42 4 2 849 0 160 671 83 298.40 25 74.67 CHANGED l........+spsYVGhlphsss......plEILPK..hsppss................................................+phLhpMLp...........hstslp.hppsshsslpptch.sLhElhhhh..Flptlppll++Glhp-Yhphp-spphl+G+Lplsppl+pss..sppc+htscaD.Fs.Dps.NRll+suLchltphs............psssshc..hlpcltshh....p-lshsp.stpshpphp..hsRhhscYcslhthsclILpp..psshstpGcp.pshuLLFsMpplFEpYVu....thL+................tplssshplcsQsssha.Lssc.pspth......FpL+PDl..llpppspt...............................hllDTKWKhlps.....tpphu..lupu.DhYQhhAYuptYpssput......LlYP ...............................................................................................................................................................................................................................htshhGhlt.h....t..t........lpl.s+hs..t..........................................................................p.hL.hhhLp...........................................h..hshh...hp.h...s.p..h.ss..h..tppc...pLhp.lh.hhh...F.ctl.pt.hl+..+G...Lh+-Yh..php.cs...s.h.l+Gclplp.pp.l.+p..sh..shp..tp.h..t..sphcEaohDsshNpll+ps.l.phlh.ppp...........................p.sp.pphp........plhc.l.h.ha................pt.l.s...h.h...p......h..p...ts..h.....sphh........hpphh....ppYctl.hphshhlLpt......tp..s...h..t...t..up...p......c.....h..h...u....h....L......F...s...hpt....LaEcaVh...........phLp...................thh..t.s..p....p.s..p...s.p...s...t..sha......hlsp......................h..plp..PDh....hh..c..p.p..t....................................lllDsK.a..Kthp...............spps...lsps..DL.aQlhuYsht......hpttps...........LlaP....................................................................... 0 51 99 130 +9950 PF10118 Metal_hydrol Predicted metal-dependent hydrolase COGs, Finn RD, Sammut SJ anon COGs (COG3687) Family Members of this family of proteins comprise various bacterial transition metal-dependent hydrolases. 30.00 30.00 30.10 34.80 27.30 29.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -12.03 0.70 -4.93 54 983 2009-01-15 18:05:59 2007-07-31 10:42:16 4 4 332 0 205 849 96 251.60 30 85.30 CHANGED lpsRclcF-hss..hs..taWhss-PhsTphhNuLSlhhPtGEpaFlcul+phpstl.....pDscLpp-lcuFIuQEAhHu+pHpthNp..tl.ptpGhs.lpt.h-thhphhhphhtc................hs.htpLAhssAlEHaTAlluchlLp.p.phh.tusDPphtcLacWHuhEEsEHKuVAaDlattlsu......pYhhRstshhhsohhhhhhhhtssth..hlppD.........tthhp.h+.shhcsh..phhh.u.t........Ghhp..thhsshl.sYac..PuFHPhpc....ss ....................sRclcFshss....hs..hpW.h.ss.sshhTHhhs.sLShlFPsGEcaFlcosRphp...spI.....pD.s....p.L+pclpuFIGQEAhHo+tHpthNcth..pptGhs...sct....hc....phth...hh...hp.hhpc..............hhshhhpLuhssAlEHaTAh....luchlLs...s.ph............tssDsp.htsLahWHuhEEsEHKuVAaDVaptltu.........sYhhRhtshlh.shs....hhh.sh.hh.h.hshh....hhppD..........tthhp..hc..shhchh..phhh....t...........uh.hp...thhtphh.sYh+..PsFHPtpps.t........................................................................ 0 53 112 163 +9951 PF10119 MethyTransf_Reg Predicted methyltransferase regulatory domain COGs, Finn RD, Sammut SJ anon COGs (COG4797) Family Members of this family of domains are found in various prokaryotic methyltransferases, where they regulate the activity of the methyltransferase domain. 25.50 25.50 26.60 28.10 22.80 25.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.75 0.72 -3.49 20 365 2009-01-15 18:05:59 2007-07-31 10:42:41 4 17 288 0 96 333 26 85.60 31 15.60 CHANGED ppsctYLhHEYLpthNpPhYat-FssphuptsLpYlu-ushtcshsthhhspst..pthltthscshhp..EQahDFhssRpFRpoLls+ ......................tsctYltHEYlpt.NsshYapcFhcph.pcpp....LsYluDsslpsp.a.sshhsp.pst..pph..lptss.cshhp...EpYhDFlssRpFRpoLls+..................... 0 29 55 72 +9952 PF10120 Aldolase_2 MethylPyrKinase; Putative aldolase COGs, Finn RD, Sammut SJ anon COGs (COG1992) Family Members of this family of archaeal and bacterial proteins are likely to be aldolases. 25.00 25.00 27.20 49.10 22.70 23.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.86 0.71 -4.88 52 250 2012-10-03 06:25:16 2007-07-31 10:43:11 4 10 148 6 156 245 49 164.60 30 46.24 CHANGED cplpcAlphltppst.hspLIPEVtoNluhuls...A+shpDVAulsGR.Isphts+stssusspFGuSpHlARllLsshchsschRuslNl+Ysccllcth....cchGhclsph-RppEPppsp.....oh.ahlcpshcph..sphPDl..IaDpGshGKEPhlhlhGcssh-Vlcplhpl ..........................................plcpAlph.l.tptpt.hhtLIPEVtoNluhuhs...AcshpDVAulsGR.Ih.p........h.tspshssusspFGuSpHlA+hlLsshchsPpl.RushNl+Ysccllcth....cp.hGhplsphccpp-Pppsp.....sh.ahlcpsh........pph..........sp..............sPDl..lh-pGshGpEPh.lhlhGcssh-Vlc+lhp............................. 0 38 83 123 +9954 PF10122 Mu-like_Com Mu-like prophage protein Com COGs, Finn RD, Sammut SJ anon COGs (COG4416) Family Members of this family of proteins comprise the translational regulator of mom [1][2]. 22.90 22.90 22.90 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.96 0.72 -4.91 6 135 2009-01-15 18:05:59 2007-07-31 10:43:53 4 2 114 0 27 112 2 45.50 40 68.66 CHANGED Mh+slRCGHCpKLLARhsshspLpIKCPRCtTLNHV+AsShp.p.Pp.p.pc .........+slRCspCsKLLu+h......s.shs...plpIKCPRCtslNp.................pt................. 0 10 15 22 +9955 PF10123 Mu-like_Pro Mu-like prophage I protein COGs, Finn RD, Sammut SJ anon COGs (COG4388) Family Members of this family of proteins comprise various viral Mu-like prophage I proteins. 21.90 21.90 22.10 22.40 21.70 21.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.02 0.70 -4.98 21 423 2009-01-15 18:05:59 2007-07-31 10:44:10 4 6 304 0 68 385 21 247.10 23 86.85 CHANGED lpLlPtGpFpupDGRs.p......ssWhlssssuttllsphs...ttttclsIDYEHQTlh.ttppGpPA...PAAGW..lcplpaRssc...GLa.ucVcWTscAtphIcscEY+YlSPVFhaDpssGplhplhsAALTNpPuLssht..tlsAlss...............t.s..pcpp.sphlctLhthLGLsssuss-t..............hh...........sthsssttsshsshtsshsthsttptphsshss..t..........sDhupalPlsshptl.......psclssLpsphsshph....-thlssAlp-G+lhPup.+sWuppLuppss..suLpsalssssslsALsupQT....h.sssststsusLos--hulsctLGlot--ahK ...............................................h.P....G......h....s.....pu..p...........h.hs..t.htth.h.ttht...t......t...sl.h.lDa-Hpshh.........st.s.......sA.u.GW....hp..t..h.hpts......u.l.h.sp.s.c...as.pu.tthltstpatalSsl.h....h...st.t........G...th....h......p.....lh.........sAL..TNp..Psl.th.......lh..shtt..................................t..t..p.....p..hpt.lhthh.s....l....ssstt..............................hh.................t..t........t.t.....t........t.......t.........................................................................s.t.hh..h.t....t.h.pt.l..........ttph...t...shp...t...p....t...ttth..........pthlptAhppG+.l.h..u.....ctah.thstp.....tthpshl..pt.t......s..shst.p......................t..................................................................................................... 0 22 44 57 +9956 PF10124 Mu-like_gpT Mu-like prophage major head subunit gpT COGs, Finn RD, Sammut SJ anon COGs (COG4397) Family Members of this family of proteins comprise various caudoviral prophage proteins, including the Mu-like prophage major head subunit gpT. 20.10 20.10 20.30 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.92 0.70 -5.44 23 696 2009-01-15 18:05:59 2007-07-31 10:44:45 4 10 447 0 86 552 204 140.20 27 35.41 CHANGED LpuLhsuFpssFpcGls.tAsopasplAhhVPSootsssYGWLGphPphREWl.GcRllpslpuaGYsIpNKsFEoTVuVsRsDIEDDplGlYuPlhpEhGcuAutaPDELlFuLLKsGFoThCYDGQsFFDTDHPV.spssGTsssVusss.............ussGssWFLLDsSRslKPlIaQpR+thpFsshscss.D-pVFhpscahYGVDuRsNVGFGFWQhAauSKp.sLsusNatuAhpAMpuh+uDuG+.LGI.....+PopLVVPPsLEpsAccllps-...hssuGu..o......Nsa+s.sscllVsPaL ................................................................................................................................................................................................................................................................................................................................pt..hs..htsh.tAhphMt.h.p....t.-.st+......LsI.....pPs..hllVPsuhEshApp.llput......htsus..............Nshps.hhplls.s..th............................. 2 32 59 73 +9957 PF10125 NADHdeh_related NADH dehydrogenase I, subunit N related protein COGs, Finn RD, Sammut SJ anon COGs (COG4078) Family This family comprises a set of NADH dehydrogenase I, subunit N related proteins found in archaea. Their exact function, has not, as yet, been determined. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.20 0.70 -5.18 8 32 2012-10-02 00:39:38 2007-07-31 10:44:57 4 1 32 0 21 166 53 219.20 57 99.39 CHANGED Mh...o..IuGplLslIPFGDIVhYhSsaohlhFlsAlhFTlllhlo+PE+QlEhpaGp.u.+hcpVsscEhRh+RFMAIlCGlATsGAhlTGDlFNFoLFlAlIGIsNIGIV.oAV+pcaVLsAAFpYGllAMlATLPLFGGAAlILAsTGTLSIaELuth...uts.hllauKlLLslGVlGEsGlAPFYAAKAEhFRAPGAPYILMIHLSSLLlhlRslEILLol .................................................................h....tsluGphhGhlPhGDIVhYhTsFollhFlsAllFThllhloKPE+QlEAphtchGs+hphVshc.EhKIRRF...MA..I...l...C..GlA.T.A..GA.M.l.TGDLFsFsLF..lAlIGIsNIGIV..SAV......K.p.c.a.VLNAAFpYGlIAMlu.oLPLFGuAAllLApTGTLS...laELu.ph........sss.....llat.K...l...Lh.slGhh..G.EoGlAPFY..AAKAEMFR.APGuPYI.LMIHLSSLLlIlRslEILLol................................................................ 0 5 11 17 +9958 PF10126 Nit_Regul_Hom Uncharacterized protein, homolog of nitrogen regulatory protein PII COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4075) Domain This domain, found in various hypothetical archaeal proteins, has no known function. It is distantly similar to the nitrogen regulatory protein PII. 22.90 22.90 22.90 74.90 22.30 22.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.23 0.72 -4.29 6 33 2012-10-01 21:59:08 2007-07-31 10:45:06 4 1 25 0 24 32 0 108.20 47 95.18 CHANGED M...........................................+lhl+LFVEsENlGKshNhLo-sGITGFYlhEY+GhSPscWKGFplc..EDPEhAIchlpDhSccAVlIsTVVsEEplc+IcchlcE+Lss-RYTlIclPlppIcVsts .....MKlLl+LFVEs-NlGKAINALoEuGITGFaLhEY+GMSPp-W+GFhlp..EDPEhAlcllp-hSpsAVlIsTVVsc-pltcIcchlcE+Lps-+YTllElPlhsIpVNt... 0 5 10 18 +9959 PF10127 Nuc-transf Predicted nucleotidyltransferase COGs, Finn RD, Sammut SJ anon COGs (COG3541) Domain Members of this family of bacterial proteins catalyze the transfer of nucleotide residues from nucleoside diphosphates or triphosphates into dimer or polymer forms. 21.10 21.10 21.10 21.30 20.90 21.00 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.69 0.70 -5.03 47 840 2012-10-02 22:47:23 2007-07-31 10:45:28 4 4 657 0 203 769 31 217.10 25 84.92 CHANGED M+ppl.pcLpclEpcp....sl.........+lLaAsESGSRAWGFsSs..DSDYDV..RFlYlcsh-aYlul.......p.t..+DVIEhslscpLD.lsGW-L+KsLpLht+uNPsllEWLpSPll..YhpssshhpplpthspchFsscpshaHYluhAppsh+pa....Lpsc..pl+..........hKKYhYlLRPlLuspWlpptts.hPPhpattLhtth...ssslhsclspLLshKpputE.hphhst..hshlcsalcpp...lcphpphspt.pp........schptLsphhhc ............................................................................h.th.tp........th...........pllauspSGSRuaGhso.......-SD...h..Dl....Rh.lal.ps.h.cha.l.pl..................................pt.....c-sl....E...h...s...h...s...c...p......hD..lssa-l+KhlpLhtpuNPsllEhL.tSPhh....h..p..p..t.p..h..ht...th..h.th..s....thF...s.pts..h.h+Y...huhAp..pph+ph..............hpsp.....ts+......................................................hK+hhalL.Rs.llssphl.ttt....s....h......Ph...................t.........hhtphp.....tLLthKptt.p.ht..........h.....lp...t.....hlptp.......ht.ht......s.h.pt......sht.lsphhh................................................................................................................................................................................................ 0 79 146 177 +9960 PF10128 OpcA_G6PD_assem Glucose-6-phosphate dehydrogenase subunit COGs, Finn RD, Sammut SJ anon COGs (COG3429) Family Members of this family are found in various prokaryotic OpcA and glucose-6-phosphate dehydrogenase proteins. The exact function of the domain is, as yet, unknown. 21.30 21.30 42.30 41.80 20.90 20.60 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.75 0.70 -5.49 65 561 2009-01-15 18:05:59 2007-07-31 10:47:06 4 3 535 0 172 439 195 257.90 38 76.06 CHANGED sssshSpp+PsRlIsl...shssspss......................................L-ApVps........GussusuEhllLRssussh.pcstulVhPLLlPDhPlhlWWsus.sPssshhc.LuphupRhIsDosthss.PhtsLpthtp.....shssussDLsWsRLosWRphLAtshDtPspcs....lsplslsst..ss.....sP.suhLlAuWLAsRLshslppttstt......................................slhulcLtsp.......ssslhLs.stsut.shplptsGtss......pslslscpsspphLAc-Lc..RhssD.lYtpul ..............................hAscAS+cHPsRlIsl......spssscss..........+........................................................LDApl+s............Gu-..uGsuEhlVLRhpGshu..p+ssSVVhPLLLPDsPVVsWWPss.u..P..s..ssu..pD..slGtlApRRIT.Dus.ss...pc..PhpsLtp..h.ts.......sassGDoDLuWsRlTsWRulLAuAlDQsPtp......lpus..pVput.....ss..................sP.sstLLAuWLutRLssPVp+ssstss........................................................ulhslpLsps..........suslhlpR.s.sup...hAslph..sGpss.............phluLs+Rs...st-hLuEELR..RLcPDclYstul.................................................................................. 0 53 128 162 +9961 PF10129 OpgC_C OpgC protein COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4645) Family This domain, found in various hypothetical and OpgC prokaryotic proteins. It is likely to act as an acyltransferase enzyme. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.37 0.70 -5.77 62 563 2012-10-02 17:00:17 2007-07-31 10:47:48 4 3 419 0 168 593 70 345.30 29 88.01 CHANGED RDhR.lDhaRGlALhhIhlsHl.....Psshhp.hlTh+saGFSDAAElFValSGhssuhsY.uphht.....cpGhhsust+lh+RsaplYhAalhlhhhhlshhuhhuhhh...psphlpph.....................slthhhppPht..slhshlhLpapPshhslLPhYlllhhhhPhllhhhhpt.sthslusShsLahsAthht..........hslssaP...sshtWaFNPauWQlLFVhGhhhuhthtpt....thh...hpthlhhlAhuhllhu.hhhthhhhh..h.t.h.......s.hh.thhhs..hsKopLu.hRllHhLALualssphls.tshthhp............t.hhpslhhlGppSL.VFssGshLuhhsphlhtth.ss....shhhp...hll.shsGlulhhulAhhhphh+p .......................................................................Rphc.lDhaRGLsLlhIhlsHl..........sssl...lp.....hh.....T...h+.....s..a.....Gh.s.....D.......A.....A..EsFVFlSGhssuhsY.uph..hh.....c.t.sh.hsus...t+hh+RuhplYhsalhhhhlhls.hhs.....h.......hsh......h.hh.......ps.hltph.....................sl.sshhppPht......slht.h.l..hLphpPshhsl...LPhYll.....hhlhh.PlhLhlhppp...h...hhLuhS.....sslWhh.uthht....................h..shsshs..ss.hW....a.FNPhuW.Qh....LFshGhhsshtstttt.....hh......hp.....h.lh.hl.usu.hl..l.hs..hhht......hhhhhs.h.................hh...h.hhs....sKspLuhhRll.phlAluaLhsthl.....hh.thhh............shhhcslshlGcpSL.lFshGshluhhs.p.hhhhhs..ss.........shhhshhlshsulshhhhhAhhhph...t........................................................................................................................................... 0 30 79 113 +9962 PF10130 PIN_2 PIN domain COGs, Finn RD, Sammut SJ anon COGs (COG5378) Domain Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases). 21.90 21.90 22.00 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.69 0.71 -4.12 15 82 2012-10-03 20:43:45 2007-07-31 10:51:19 4 1 55 0 44 124 12 122.10 24 90.15 CHANGED VlDANIlluulLt................................cs+phhlhhsttlchassptshs....Elp+ahstlspppplstpphtshLp....tLhptIpllscslap.phpppAccRhtchDscDaPhlALuLhLsusIWT-D+DFhssGlso.................WTocplphhLs ...................................llDsNllhusllt................................t.hh.hhh...h.....ttphph.hss.phhht....Elccahttl...h....c....t.pp..l..st...cpht....p.hlp....hLhp..h.lplls..pphht..phhppAhphhtp..h..D....c..DhPalALuLtl.....ss...l.W.TtDpch.....................................p......................... 0 15 36 40 +9963 PF10131 PTPS_related 6-pyruvoyl-tetrahydropterin synthase related domain; membrane protein COGs, Finn RD, Sammut SJ anon COGs (COG5617) Family This domain is found in various bacterial hypothetical membrane proteins, as well as in tetratricopeptide TPR_2 repeat protein. The exact function of the domain has not, as yet, been established. 22.90 22.90 23.10 23.30 22.80 22.50 hmmbuild -o /dev/null HMM SEED 616 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.99 0.70 -6.37 10 172 2012-10-03 03:08:05 2007-07-31 10:51:56 4 6 156 0 45 127 60 380.40 25 62.59 CHANGED hRYauPLuYalhAsLphLs.GslhpuatlhlhLshhlGAlu.Whhas++pGp..hluhlsulLalhhPtpl+lhahEGNlPpsss......huLsPhll.lhLahhhc++chRsllshsLhhsllsLoHhMhuAlssluhhlh....lLlhtlh.pp+hlpplhulluhslGls.............huSaWllPhL..+sGlssh-ssusutVhcshohshosuLsPhh......t.uhaYaGlulsllhluGlhhsp+++ssu....................hlssllhllhohsuslsllhpLPlspLlh.hRFsshA..huhllsuhhhhtph+K.........hhslhslhlul...LllDossshphlsassscstpstppl..pptcshscpRlull...DtoshGohsoaal...sshsspsplhGWsaQGusTupsIhhLNpAL-s.......tYatahFDsshphsspolllp+tllocpt..hptlcpss......thsGYphhpcsscthlYp.....ps...sspsatshsphhGlsIG+suh-lsLpasthcsussshl-....Dholp-Lp..+acllYLouhsh+scuphEt.................................................llh+lu+sGs...............pVll-hsch..p..scpstFLG............Vpupshshcs.chpshhasspchsssh.FtpcttpWpsshhsssp..hpshh.lsc+tlthhh+stpss........................plYFlGhNLhaH .......................................h...sahhhshLhhlh..u....hhu...h...hh.h...hhs.shl.u.sh.....u..hY.....hhu.+h.h....sh.p+ptuhLhulLYhhuhYphps.las.RssluEhlA........huhhPh.lL..h.sh...a.hl.h.p.ct..p...p+.h.hhhLsL..uMohlshu.H....hlohh.hs.s...lhlhla.......llh.h.h........pp......p.....hht..l....h..h..h.h.u...h..shu..ls....................................................hh..aLhPhl............hsss....pp...hp...s.........p..lac.s.h..s....p.sh.s..hh.p..h.......................sh.h.h....l.GLl.lhl..lt..L..hu.hh...hp..t..++.hlh......................lshl.lh.Lh..so.o..h.hPh....h...h....lp.....p..hPhpp..l...Q.F.....P..aRF....lslh.lh..huhh...lu..s....h....t.lh++..................hhhhhllhLsl...........sls.h.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p...pp.thhhhtt........................................................................................................... 0 20 30 36 +9965 PF10133 RNA_bind_2 Predicted RNA-binding protein COGs, Finn RD, Sammut SJ anon COGs (COG1532) Family Members of this family of bacterial proteins are thought to have RNA-binding properties, however, their exact function has not, as yet, been defined. 20.60 20.60 22.10 29.20 19.70 19.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.76 0.72 -3.94 20 129 2009-01-15 18:05:59 2007-07-31 10:56:29 4 2 114 0 73 120 3 60.10 34 92.39 CHANGED MCEuNlYL.hpsspcELlMEsVshlcsps-sl...hLpsIhG-pKhlcu.pIKclsLscH+Illcc ..MCEussYl..hps.spccllMEsVsplpsps.s.p.l...hLpDIhGcpKhlcu.cIcclsLhsH+Illc........ 0 32 55 67 +9966 PF10134 RPA Replication initiator protein A COGs, Finn RD, Sammut SJ anon COGs (COG5534) Family Members of this family of bacterial proteins are single-stranded DNA binding proteins that are involved in DNA replication, repair and recombination. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.42 0.70 -5.08 32 404 2012-10-04 14:01:12 2007-07-31 10:56:48 4 2 282 0 162 499 117 222.40 33 63.88 CHANGED +DshssM-aPlFSLupp...chp..slpYctss..splpVpsusc..GhATIaDtDILIassSQlhcA.......tstG....cs.o...Rhl+hTsa-lLthssRsTuGcsYpRLctAh-RLpuTsI.....pTslc.sssp..cppcsF.ulIsp.aphl..sc.t..sGRh...hulElpLs-WlYculh...stp.VLTls.-YF.cLppsLERhlYcLA...RKH.sGcQst.WphslppLpcKSGSsush+cFptplRcllpsss...LP-Ypl ............................................................+Dt.shMtaPhFSLuKp....tcht......slcactss..........hplcVp.....us...sch..Gh..ATIa.DtDlL..Ias.sSpls-A..............pstG.....t..u......Rhhpho.s.a-lL.p.hhsR...s.s.uuc.sYpcLcsALcRLpu...Tsl................p.T.sIc....pssp........pc...pcpF..uhIsc..ac.h.........scp.........sG+h........hsl-.lhLs-Whapulh.....psh...VLT..lc...s.Y.F..pL.pts..l-RhLY+ls.....R.KH...sG.c.p...tt.........W..phshppLh..pKoGSh.u....hp...cFth.lRtlhttp.......LPsYt.......................................................................................... 0 34 110 139 +9967 PF10135 Rod-binding Rod binding protein COGs, Finn RD, Sammut SJ anon COGs (COG3951) Family Members of this family are involved in the assembly of the prokaryotic flagellar rod. 21.00 21.00 21.10 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.28 0.72 -3.96 174 1787 2009-01-15 18:05:59 2007-07-31 10:57:32 4 5 1578 0 421 1174 304 49.40 36 20.68 CHANGED KuMRpus........ps....shh.su..s.sschapsMhDpphApphu.pp.s.GlGLA-hlhcQ ..............................KoMRcu.s.....c-....ulF...su.............p.pschapuMhDpQlAp.pho.pt..t..Gl...GLA-hlhcQ............ 0 114 241 325 +9968 PF10136 SpecificRecomb Site-specific recombinase COGs, Finn RD, Sammut SJ anon COGs (COG4389) Family Members of this family of bacterial proteins are found in various putative site-specific recombinase transmembrane proteins. 20.40 20.40 20.70 52.80 19.90 20.30 hmmbuild -o /dev/null HMM SEED 644 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.14 0.70 -6.48 31 317 2009-01-15 18:05:59 2007-07-31 10:57:55 4 2 305 0 84 286 27 626.70 36 93.99 CHANGED psshsschthLlcLhcWlR...........stt.ssstp...AhtRlphLlcsL-ppPshpsplpshltsllpphchhsLas-sGlhs+sGFhuElscRltt+lLPssP-ps-ls-LhsllFsctpDspWlsulscpthtRLhpLl..............sstscpph....ssphppsllsAlphLusplsuhGlsP-lhpph.sp...........................shcsoPFhsLpc-stshhsthtptppss...........chspLcshL-pCRsslsplap+hcps..GlSVsLsFpLcRl+ppLtRlctLLsllh.......ssstspthscLhspLlpsspc+pSlptLhssssphLAp+lsEpuucTGEHYITcs+pEYhtMh+uAAGGGhlhAhhshlKhhlsuLtLusFhcGlhsulNYuluFlLIahLHaTlATKQPAMTAsAlAuplcss..scsp...slcphV-.lspllRSQsAAlhGNlhlshPsAlhluhhhthhhGpPhloss+ApthLcols.h.usolhaAAhsGVhLFhSuLIAGahDNhhshc+lsptlthp.hLpthLGtptApRlAsahcpNluulsuNlsLGhhLGhsPslushFGLsLDlRHVshSouslGhAhsslGhpshphsthhhAlsGlslhGhlNlsVSFuLAhhlALRuRslphsstpplh+ulhpRlhppPhsFhhP .........................t...tpt.phLhtLhpaLR..................sst.pt......sttRlchllpsLcpsPphtsphuphlhthlsphchhsLhscsGlhu+puFhsEhspRlhp+hlPsh.-tspLtplFh.hl.F.sp.sDspWlpslspcphhpLhpLl.............stspppppt....ttphpcphLtAlphLohpluupuLpP-lhphh.sch...........................hpt-SPFlsLp+E...hhthhptht.pspts............................-sspLpVhl-QC+s.lsplp++spps..GsSlplsahLcRLcQpltRlchLlslhs....................tstsp.pthhpLhspLlpAstpppSlppLhcsshphLARploEpsucsGEHYIoRs+pEYhpMhtuAAGGGlllAhhshlKhhls.sh.t.lus.hhpulhsuhNYulGFhlIHhLHaTVATKQPAMTAushAppl-ps.......upst.....sh.p.clspLllslhRSQssAlhGNVhluhPlAhhlshuhsthhttsllssppAthpL+Sls.h.usolhaAAIAGVhLFsSGlIAGahDNhssatpltpRLthp.hL+thhu.tttp+hAsah+cNhuulsGNhhFGhhLGhssslGthhGLPLDIRHVsFSSuslGhAhsshsh......shshhhhulhuVhhIGllNlhVSFuLAlhlALRSRsl+hsph+tLhpslhpplhppPhshFhP.......................... 0 19 51 70 +9969 PF10137 TIR-like Predicted nucleotide-binding protein containing TIR-like domain COGs, Finn RD, Sammut SJ anon COGs (COG4271) Family Members of this family of bacterial nucleotide-binding proteins contain a TIR-like domain. Their exact function has not, as yet, been defined. 24.70 24.70 29.70 28.30 24.50 23.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.48 0.71 -4.50 28 258 2009-09-11 12:22:46 2007-07-31 10:58:20 4 8 238 0 79 235 14 122.30 31 42.51 CHANGED +VFIVHG.HD-sAKpcVu+Flc.pLGlcsIILHEQs.spGpTIIEKlEphus.VuFAlVLaTP-DlGsttspts........hp.RARQNVVFEhGahIGKLGRs+VssLl.KG.slEhPsDluGVVYsshDs.s.tuW .............+lFIlHG.+-p.thtp...p.ltphL.c.p.h.sh.cs.l....lhppps.......st...G....p.T...l...l.E...cl.Ep..t..s....sp....ssFAlllhTPDDhuhhpsptt.............................p.pARpN..VlFEhGhhhG+LGRp..+lhhL......h....cs...sl-..hPSDltGlshhphst..................................... 0 26 54 66 +9970 PF10138 vWA-TerF-like Tellurium_res; vWA found in TerF C terminus Anantharaman V anon Anantharaman V Domain vWA domain fused to TerD domain typified by the TerF protein [1]. Some times found as solos. 43.40 43.40 43.40 43.50 43.30 43.30 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.19 0.71 -4.49 77 225 2012-10-10 16:07:06 2007-07-31 10:58:34 4 3 174 0 64 274 6 199.20 36 54.93 CHANGED pA+VsLVLDtSGSMpshY+sGsVQclsERlLulAsphDD....D..Gsl-lahFuschpp...hs............slolsshps.al...........................cph.hs..s.h...........tt.hGt..ss.s.shccV.lspat..............s...ss...........................................PshV....................lFhTDGu..sp.cp.pt.hcchlp..cu..uphPlFWQFVGlGcs.p...............................FshLc+LDs...h...tsRhlD.NusF..Fslccls...plsDtcLY-pLL.sEFPsWlp.......tA+ttsll ................................................AtVsLVLDtSGSM..p...s.....Yp...c.G...s.VQ...p...lh-RllsLA...s...p...hD..D....D.G..plcVahFus...chcp..hs..............slTls.s....hp.s.al..........................pph.pss.....h...h...tthGt..ss.s...slMc-llcaat.pt.........s.ss.......h................................P..s..hVlFlTDGG..s..s..cp.pt..lcchlpcu.....uphPlFWpF....VGlGss.s....................ashLccLDs....h......t..s..RhlDNss..F..Fsh-chs.....plsD...pcLY-pLL.pEF..Wlpts+t.tl.h....................................... 0 14 37 55 +9971 PF10139 Virul_Fac Putative bacterial virulence factor COGs, Finn RD, Sammut SJ anon COGs (COG4458) Family Members of this family of prokaryotic proteins include various putative virulence factor effector proteins. Their exact function is, as yet, unknown. 25.00 25.00 29.00 28.00 18.30 18.10 hmmbuild -o /dev/null HMM SEED 854 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.33 0.70 -13.22 0.70 -6.61 14 467 2009-01-15 18:05:59 2007-07-31 10:59:05 4 3 251 0 78 343 25 401.00 27 97.56 CHANGED AI-WlsssRppA.RLspEADuLhhcLRRs+Npu+pLucsutpshslGhFG.SQAGKSaLlSsLAusppGcLpsphs..GcplDFlpclNP...u+EuTGLVTRFo.ppssss.........ssuaPlpLpLloEs-lAKIlsNuaFpDhspppst.hplscppIsshlpphpttppstsssGlos--VlsLhDYhppphspt.p..LpstaWspAhpLAPtLslc-RApLFSlLWGchspLTcsYhphApsLppLupApclhAPLusLV....tt.h.tucuIhsVssLttLsssss..ppltV+PhppGphussVolshApLshLssELhhslsp.st.shhEpVDLLDFPGhRsc.ph.t........t.sssspPluphhlRuKlAYLhERYTspQchssLllCssssppp-VsolutsLcpWlcpTQGEostpRupRpPGLlWAlThaDtRlssphsht............h.......................................................................c-uVpRalucPsc.uWsshLsLs-uGhpRlspaLsphsp.-hKhsRlpEQLsclt+cl..hcshhstaapssssschtcKpphuctlh+sLp...spsth.aGELLctLhssp-plR....pLYhp..............t...sttshssstsss.sstsssshDhFu-sssssss.....s..ss...ttsp-ppaApsVhchWlsHLRpLscsssLlphLGlstpslphLlcELlTuupRLcltppLtpsLtspcps.u.pp-phspRQVhpstslluDFluWLGatplstppR...PsSclpcGptlFstssp..ss.....tpLs+LutpPsstsshYlhDWLVuLtphshcNsGauAupEIospppptLusIL .................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 4 21 52 +9972 PF10140 YukC essB; WXG100 protein secretion system (Wss), protein YukC COGs, Finn RD, Sammut SJ, Desvaux M anon COGs (COG4499) Family Members of this family of proteins include predicted membrane proteins homologous to YukC in B. subtilis. The YukC protein family would participate to the formation of a translocon required for the secretion of WXG100 proteins (Pfam:PF06013) in monoderm bacteria, the WXG100 protein secretion system (Wss). This family includes EssB in Staphylococcus aureus. 25.60 25.60 26.10 26.20 24.70 25.50 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.02 0.70 -5.88 16 402 2009-09-10 16:26:00 2007-07-31 10:59:25 4 1 395 1 25 209 0 352.10 39 84.89 CHANGED php+ucl+hp-hhEhpllcphsPhFhc.plsts-DplhlshpsssphhsFppl+ppscp..pKl+huhs.Vtslpca.ppRlshllpP-NllFscsLpPthlHhGl+-slPPh-hs-EchLpphKshllslhstcasF--.Yhuhh-sh+hoshtKplhpucol-sLhtll.csa.cEp-ppp+shthVPK++a+l.Kalululhslll.hll.llYhhFhtpPhQ-shlpuspsFLsssYocVIosL-chsscphPpsspYpLAhSYlps-p...Ls-pQ+csI.hNslTLpSDcpYhLYWItlGRGph--AL-lA+pL-DspLIhaALsphh-pl+sDssLSGcERpccLcslps-lccY.h+ct ..........................t.lsKSpl+scchc.h..LL-pcusaFls.s.clsp.hcD...o.apIpYclscptps.F.-s..I..+phs+s..EKLRhLhN.ltsL.c.-h..po.RhTFhLtPDpLaFs+sthPhhtpRGlpsllsPhs.l.oEt-FLppYKALlIshFspK.sF-sLhpGsLElt+t..T..sFEppll-AtTL-.LsshL-EpYpKpcpchp.pNhshVpKhtaplFKalAluhssloVLLlshLua.hhF.hh.apE+l..cu.puFlKs.DYo.pVlssh--l-sccLsppuhY.hAhSYIpspK...Lpc-pK-sl.LNNlT.pSsccYLLYWh.lGpGch--AlsIAphLDDsslhhhALhpplpplKsNscLSs-cRscchKcYpccLp-hh-K.t................................... 0 6 12 19 +9973 PF10141 ssDNA-exonuc_C Single-strand DNA-specific exonuclease, C terminal domain COGs, Finn RD, Sammut SJ anon COGs (COG4199) Family Members of this set of prokaryotic domains are found in a set of single-strand DNA-specific exonucleases, including RecJ. Their exact function has not, as yet, been determined. 22.20 22.20 22.50 22.30 21.10 21.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.01 0.71 -4.80 25 1020 2009-01-15 18:05:59 2007-07-31 11:00:12 4 3 1017 0 98 691 27 186.00 30 25.15 CHANGED QLhDhRoKp......thhtlspshshh.t..tppht...................tpptshpsctlVlhDlPsshspLcplh.p......tpphppIYhhh.pccshYhsGhPoR-pFtphYphlhppscFcl+pchpcLupaLslscchlsFMlpVFhELsF.VoI-sGllslNppspKR-ls-SpsYQp+pcphch-cpLlYuohpElpcWl ........................................................................hs.Rs.p................t..t...hh.................................................t........p.tps....ctl..VlhDlPps.hppLcslh.p......ppphsplYhhh.....phs.....c.....s.hY.l....sGh..soR-pFs+lYKhl...h..p..h...-.h.sl..+p..c..h.ppL..upaLs..lppphLhhMlpVFhELtF..VT..IcD....Gl..lplNp.ps..sKR..sls-SplYQphppplcspchhhhushp-lhsal...................................................... 0 23 49 75 +9974 PF10142 PhoPQ_related PhoPQ-activated pathogenicity-related protein COGs, Finn RD, Sammut SJ anon COGs (COG4287) Family Members of this family of bacterial proteins are involved in the virulence of some pathogenic proteobacteria [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.14 0.70 -5.98 26 293 2012-10-03 11:45:05 2007-07-31 12:39:29 4 11 226 0 99 405 154 341.20 42 71.85 CHANGED LssYhpp.tspshpapl..ssstphss.schhhhchTSQpWpspp.sp......WpH.lsIhlPps....shsppALLllssGs.................................t.t.sssssptpsphhttlAppopsslsslsslPNQsLtassp....+cEDshlAaoWptahc....pp-tshPLhlPMstuss+AMchspchhtph.....plcpFlVoGuSKRGWToWLouh.sDpRVtAIsPhVlDhLNhpsslp+hhcsYG.sWshultsYhtpGl......schl.sosthspLhplhDPhpYh.....p+L.slPKallsuosD-FFlsDusphYascLsG..pKtLphlPNssHshhs..ppslpolssFhppl.tpspslP...plphplptsp.....t.tlplpsupt.PppVpLWpAsssss..RDFR ......................................................................................................................LssYhpp.pspshpY.s.....l..h.s..oss.l..st..lhhppapLhSQpWps-s.hV......s.ss...........tWpHpVsIalPcs.......sppccALlllssGh.................................ssphs..st..s..s..-.hs....-...s.L.sslAppTpTlVlsloslPNQhLsFps...Dt.......p....s..............hpED.....-.VuhoWshFhct............sppcthhPLpl...PMspAlspAMclspctLsph...................s.......I..ppF.....lloG.hSKRGWTT.WLoAl..sD......scVc.............AIlPh..sID..l.L.s...h....css...L...cHh....Ypo.YG........N...W...P...l...sFhsY.appGI...................scpl.co..P..s..F..spL.hp.Il....DP.Lp.Yh...........................sRL..slPKYIlNASGD-FFlPD...sopaYascLP..G...s.K.tLRhl.P..Nhs.....H....sls..........phs...po.LlsF....l.s+.h...pp..p.p.........sLP...pl.sh..l..ppph.....................lsVhh..S....Et...PspVhhWoAsNPsu....RDFR.............................................................................................................. 0 72 83 92 +9975 PF10143 PhosphMutase 2,3-bisphosphoglycerate-independent phosphoglycerate mutase COGs, Finn RD, Sammut SJ anon COGs (COG4255) Family Members of this family are found in various bacterial 2,3-bisphosphoglycerate-independent phosphoglycerate mutase enzymes, which catalyse the interconversion of 2-phosphoglycerate and 3-phosphoglycerate in the reaction: [2-phospho-D-glycerate + 2,3-diphosphoglycerate = 3-phospho-D-glycerate + 2,3-diphosphoglycerate]. 20.50 20.50 20.60 22.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.91 0.71 -4.61 92 556 2012-10-03 05:58:16 2007-07-31 12:40:27 4 7 462 6 268 542 95 172.10 32 41.38 CHANGED AppGhs.GlhcslssGhsPGSDsApLSlhGYD.PtphYsGRGPlEAhuhGlclpss.DlAFRsNhsTl...ccp......................hlhD+pAG+Ips.cEu...ppLhcsLsp..cl...ss..................phpahsusuaRsllVl....+...uss....h...u......spls.sosP..+.hpGp............ltchhPh.....sp.......s...sAcllschhpputclLpsHPlNt ................AppGhsGhhpslssGhsPGSDsApLulhG.YD.Php............hYp.G.RGsLEAhuhGlclpss...Dl.AhRs........NhsTl...cps................................hlhD+pAG.+Iss..EEu...ppLhptLsp....cl....ss.....................sclpF...hsuspaRtllVl.....+.....Gss...................s.......sp..ls..sosP..+..hssp.....................ltphhPh.....st................tut.oAcllschhpcutc.lLp.sHPlN..................................................................................... 1 107 199 243 +9976 PF10144 SMP_2 Bacterial virulence factor haemolysin COGs, Finn RD, Sammut SJ anon COGs (COG3726) Family Members of this family of bacterial proteins are membrane proteins that effect the expression of haemolysin under anaerobic conditions [1]. 25.50 25.50 25.50 25.80 25.40 25.40 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.27 0.70 -5.22 5 788 2009-09-14 08:20:40 2007-07-31 12:46:41 4 3 783 0 95 345 12 204.10 52 86.28 CHANGED MlRAKLKFRLHRTAIlLICLALLVlLMQGASYFSpSHQhARs-QVEELA+TLARQVAaSLSPLM..GscssNup+IsAILcQLTssSRILDASVYppsGoLVA+SGEsVpVRDRLALDGK+uGSYFNaQIVEPIsGKsGPlGFLRlTLDTHsLATEu+QVDNTTNlLRLMILLuLAIGlILARTL....LQsRRoRWQQSPYLLTAspPVKEE-ES ..................Ms+sKLKFRLHRsVI..VLhC.LA.LL.VsL.M.Q.G.AS.a.F.Sp..s...HQ+tR....ssQL.E..E..LARTLA...........RQVsLslAPLM....p..s-o...sD.ccRIpAlLcQLTcESRILDAuVYDcQGcLlA+SGE...o.VpVRDRLA.L...DG.K.+..A....G.u.YFN..Q..Q..IVEP.I....tG..K.N.G.P.LGYLRlTL.D.T.H.s.L.AT.E.u.p.Q.V.DN.T.T.N.IL.....RLML.L.LSLA.I......GVlLTRTL..........LQG+RT...R..W..QQSPaLLTAspPV.EE--................................. 0 5 24 62 +9977 PF10145 PhageMin_Tail Phage-related minor tail protein COGs, Finn RD, Sammut SJ anon COGs (COG5280) Family Members of this family are found in putative phage tail tape measure proteins. 30.00 30.00 30.00 30.00 29.80 29.40 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.33 0.71 -4.67 185 2672 2009-01-15 18:05:59 2007-07-31 12:56:46 4 25 1759 0 359 2411 301 209.40 19 20.43 CHANGED lpptshc..hus...p...............h.sh......oss..-hupuhttls..psG....hs.........spphh.shhtsshph.AsAsst..-hspsushhsshhssath.ss.......pp...ssphs.......Dhlstsupp.u.ssshpchupshp.thussupshGhsh.....pphsuhluhhspsGh.puspAGoul+shhtpl.........t..sppstpshpp..........l..G..l................................................................shtc.........upuphpsh.phltplppth.......tt..hsptpp..tshlp..........plF.........................G ..............................................................................................................................................hppthhp..hut..p........h...sh.....oss..-hupuhtths....ptG.......hs..............spphh.....shhtss.hph.....A..t..Assh.......s..h...s.p.s.uph.h....sphhssasl..ss..................cp.......hpplu.....................Dhlstss..sp.s.ss.sh...p...slucshp...thu.......ss.up.sh..Ghsh.......ccsuA.hhuhhtssGh..pustAuo..uh+s.hhtpl.................sts..sc..pstcshp.p..................L....G....l.........................................................................................................................shtc...........spsph...p.s.h.....phltpl....pth.......tt.....hsptpp.....thht.thhG.................................................................................................................................................................................................................................................................................................. 0 106 225 302 +9978 PF10146 zf-C4H2 Zinc finger-containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4451) Family This is a family of proteins which appears to have a highly conserved zinc finger domain at the C terminal end, described as -C-X2-CH-X3-H-X5-C-X2-C-. The structure is predicted to contain a coiled coil. Members are annotated as being tumour-associated antigen HCA127 in humans but this could not confirmed. 27.20 27.20 27.70 28.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.83 0.70 -4.33 7 142 2009-01-15 18:05:59 2007-07-31 13:15:50 4 3 99 0 93 126 0 206.40 47 91.22 CHANGED psItclRsKs.phcKl+sclhcEh-ss-spE+plp-hptph-tL.pEKhuHsEELR.IptDINshEshIKpocs-hp+phphhp+ha-Ehh.l+ttlschhcsh.....LulccL.phcEEE..hlo......................................pA+pshpPp.....hPss..........................PssP...sshhtuhhsstthp.sc...........................tsphc...sh..RpQsPP.......MKsC.SCaQQIHRNAPICPlCKuKSRS+NPKKsKRKp ....................................................................................................tltclRsKTlphEKlKsRlhtEh-shEsEE.+pLcEY+pEM-hLLQEKMuHVEELR.IHADINs......MEssIKQoEs-hs+.hh-ss.pRla.-.EYpPLKcclDth.Rhs......LGLpc..LPsLp...EEE..pp.lo.....-.hap...........................................................pt+t....hps.p................P.................................................................Ps..P......pshtAutssstphpssp.................................................................................p.p.sp..s...sshRQQPPP.........MKuCLSCHQQIHRNAPICPLCKAKSRSRNPKKPK+K............................................................................................................................... 0 35 43 73 +9979 PF10147 CR6_interact Growth arrest and DNA-damage-inducible proteins-interacting protein 1 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4848) Family Members of this family of proteins act as negative regulators of G1 to S cell cycle phase progression by inhibiting cyclin-dependent kinases. Inhibitory effects are additive with GADD45 proteins but occur also in the absence of GADD45 proteins. Furthermore, they act as a repressor of the orphan nuclear receptor NR4A1 by inhibiting AB domain-mediated transcriptional activity [1]. 21.80 21.80 21.90 22.60 21.70 21.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.55 0.70 -5.02 9 102 2009-01-15 18:05:59 2007-07-31 13:21:45 4 4 86 0 67 101 0 192.60 32 86.81 CHANGED Mttshhp...pcshh...pLshThs............th..tssp.phhPp+RphhtshhPs.psp...pW.ccs.+YpRchFGRYGht.SGVsPctLaPot.EclcEh.tEchtah.oLppMhcphctpppcccp+tpAREpclAcshtKhPphlAcactphtcpcpctpscKpR+-RLlt-sp-+FGapVDPRssRFpEMLpphEKE-+K+hKttKR+cKcEchhAthsttsup .............................................................................................t.tt.ph.P..hph.tshhP...ps.....pW.phs.+Ytt+.aGRYGtt.SGVsP.t..h..WPo..Eplc.ch..tE..chtah...oLtpM.cplctppttccp+pptR............EppIAcpMsKMPphltca+pphpc+.............cpctpscKpR+tRLh.tEsp-+hGaplDPRss+FpEhLp..chEKc-+....K+hKttK++tKp.Ethhut....................... 0 22 27 50 +9980 PF10148 SCHIP-1 Schwannomin-interacting protein 1 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4847) Family Members of this family are coiled coil protein involved in linking membrane proteins to the cytoskeleton. 21.70 21.70 21.80 23.70 21.60 21.30 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.54 0.70 -4.97 2 132 2009-01-15 18:05:59 2007-07-31 13:44:20 4 8 75 0 80 130 0 206.90 59 57.02 CHANGED shpAQ+NERESIRQKLALGSFaDDG.hhaTuCSKSGKssLSSRLQsGhNLQhCFVNDSuSD+DSDA-DS+TETSLDTPLSPMSKQSSShSDRDTsEE-.-S.-D.-FhphQ++LQtEA+hALAhA+PMA+MQVEVE+Q.p..++SPVADLLPHhPHISECLMKRsL+PsDhRDMolGQLQVIsNDLHSQIpuLNEELVQLLLhRDELHhEQDAMLVDlEDLTRHAcSQQ+HMhEK..uK ....................................................................................................AQ+NERESIRQKLALGS......F.a.....D....DsPslYTosS.+uGKPS..LSS.RLQSGMNLQICFVND.S.uS...D.....KD.S.....DADD.....S.....+..T............ETS.L..D..T..P...L......SP.......MSKQSS..Sh...S..D.R.DToE--.S.....ES..L-Dh.D...F.loRQ....+KL.Qu...EA+hALAhAKP.MAKMQVEVEKQNR......K.KSPVADL...L...P.H...M...PH..I.S..ECL..M..KRsL.+...ssD...LRDMTlGQLQVIVNDLHSQIESLNEELVQLLLhRDELHhEQDAMLVDIEDLTRHs..pupQ++.t-+................... 0 21 28 62 +9981 PF10149 TM231 NAcGluc_Transf; Transmembrane protein 231 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4838) Family This is a family of transmembrane proteins, given the number 231, of unknown function. It is conserved in eukaryotes. 20.50 20.50 22.90 21.10 18.10 20.30 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.89 0.70 -5.44 7 132 2009-01-15 18:05:59 2007-07-31 14:10:04 4 6 95 0 88 132 1 236.80 31 81.00 CHANGED Mthaslas+sshlhY+spLCShAoLhlhhhlhLohh.P..LlshhpshuhW.cppshhEQPsVpFpYphlhluph-s.......stuhlAhSoFssFN.pLpsps.psstlpshpEDsNhDGppDtLphpl..pLP.pso.plhthpLllhF-hpLpphsshsh.ohhslphp.P.....hhuupl.hpG-LpLpQ....psPhshRslcopas.Vpl.hNuoss.hspaphcsIhtph.ctNsuha.hsspph.Wphspuuts..h.lchtl...ch.p.slhY+suhWpplt.hWlpYhSlLhI.Lalhp+lKpFlFpppllpohpl.lPaKc .....................................................h.hatpsh...h.Y.st.hohuslhhhhhhhhthh.P..llhhhpstGhWh+pssa.EQPsVpFpaphlhls.hs.........tthlshSo.a.sth..N.phtss.p...h.......ph.s.......hl...phh.........p......pDh.NtDG....ph..Dhlphpl..pl.shpst..plhthpll..hhFphpLpphsph.hpohhhhp...t.s......h.supl.h.G-LpLpQ....pts.h..ts....h.....p...a..........p.hpl.hstst...h.p.......aph...pplhtth.ptN.shh..t....h...ah.s....s.t.ts...h.lph.l..........ph..........h.............Y.....suhWEhlK.hWlQYhuhhhlhhalhpclp.alFppplltoh......................................... 0 31 40 66 +9982 PF10150 RNase_E_G Ribonuclease E/G family Bateman A anon Pfam-B_234 (Release 21.0) Family Ribonuclease E and Ribonuclease G are related enzymes that cleave a wide variety of RNAs [1]. 22.70 22.70 22.70 23.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.64 0.70 -5.17 173 4570 2009-01-15 18:05:59 2007-07-31 16:14:19 4 14 3079 16 1101 3529 3482 263.90 41 37.13 CHANGED olsGRYlVlhP.......pssp..lGlS++Icsc..p-Rc+L+....plh.........................ppl...hsps.......hGlIlRTuupssspcclppDlphLhchWppIpcc.tpptt...........PsLlac-.slhh+slRDhhssclpclllDs....ppsapphppa.h..pphhPph.hp..pl..phap....sp.tslFctaslEppIpcsh.p++V.Lt...SGGallI-pTEALssIDVNoGph.sspps..h--Tsh+TNlEAAcEIARQLRLRslGGlIlIDFIDMpspccccpVhctLccslcpD+s+splssho.pLGLlEhTRKRhR .............................................................................SLsGRYLVhMP.......sss+..sGlS++I...-s-.ERpcLKchl...............................tpl.....hs.cs.................hGlIlRTAu....t.Gtst.--LptDlsaLh+hW.ppIpcp....tp..p...tt...s..............................PhLlapEss.lhh.RslR...Dh.h.s.p.-...lscIllDs...................ppsa.cphp.pa.h...p.h...hPch...ss.................+l...chY.p..............sc...hPlFstaplEspIpp..Ah..pRcVpL.....SGGhllIDpTEAhTsIDlNoGph...st.t..............t..s.............lE..-Tsh.pTNLEAAcEIARQLRLRsLGGlI.lIDFIDMs.stccp+tV.pp....Lc....cALpp.D.Rs+h.plstlS.phGLlEMoRpRh....................................................................... 0 343 700 917 +9983 PF10151 DUF2359 Uncharacterised conserved protein (DUF2359) KOGs, Finn RD, Coggill PC anon KOGs (KOG4467) Family This is a 450 amino acid region of a family of proteins conserved from insects to humans. The mouse protein, Q8BM55, is annotated as being a putative Vitamin K-dependent carboxylation gamma-carboxyglutamic (GLA) domain containing protein, but this could not be confirmed. The function is not known. 19.80 19.80 20.00 20.90 19.50 19.70 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.50 0.70 -5.97 6 158 2009-01-15 18:05:59 2007-08-01 11:58:53 4 5 88 0 99 172 0 346.60 30 64.08 CHANGED HGYRlCIQAlLQDKPKIsTsNLscaLELLRS+QsRPhKCLTIMWALGQAGFssLopGL+VWLGIMLPVLGlKuLSsYAIAYL-RLLLhHsNLTKGF.GhlGPKDFFPLLDFAaMPNNuL.................................SsSLQEQLppLYPRLKVLAFGAKPESTLHTYFPSFLSRA.TPuCPs-MKKELLsSLT-CLsVDshShSVWRQLYsKHLuQSSLLLpHLL+oWcplPpKhpK.SLp-TlQSF+VTNpEh.tKGuuusp-lpsCsoACpuL.pKh+GsuhPWsRLLL.lLLVFslGFlsHDlRoHuSFQuSpTuphLppSGlhssuQQAhu+lpsYShQG.SWLpcshPtahSchlpVltPshp.uhs+hptsssFlst+stshlsahp-plsphhphl.tphP-sl.QhhthL+ELLLhhh+shlLPshthLhtsltth.pph.tuCpGcVohsClpspl.phup.oWhhLQcsTsshhs.tluh .............................................................hlh...Pphh...h...h.....psh..pp....hhl.slhWsluQAuhssLs.GLplWhtlhLPlL............s.....hK.....s.....h..u.....s.....hs....l....t.hl.-........Rl........L.....................hps........s........l.....p........c.u...........t....hlsPppFhsl...hc.hsa....s..p..ssh.................................s..shp-phptlYPpLK...luhu.u.ps..to......p....h.....pp...h..h.p....h..h...s...cu...ssps....s....s...p..h.....pcEh.h.s.shh..CLs...s..s..shphWcp.LY.cpLptS.....shLLp+LhpsWcphs.tKh.....p..p......s......L.pcTlpSh+hpNpch..hp..cu..p..s...ppp....sl....hs....s.......cthCc.....slhtp.............h.......p...u........t...........h..hsp.........h..hl...hllhhhs..G.hlhaD..h..t.t..s...httS.st.hLppsG.....hh.h.ppsh.hh..h..tu.phht...s.h............t.............ht........................h..................................................................................................................................................................................................................... 0 23 39 65 +9984 PF10152 DUF2360 Predicted coiled-coil domain-containing protein (DUF2360) KOGs, Finn RD, Coggill PC anon KOGs (KOG4496) Family This is the conserved 140 amino acid region of a family of proteins conserved from nematodes to humans. One C. elegans member is annotated as a Daf-16-dependent longevity protein 1 but this could not be confirmed. The function is unknown. 24.30 24.30 24.60 24.30 24.00 24.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.79 0.71 -3.92 22 332 2009-09-14 15:15:59 2007-08-01 13:41:33 4 33 124 0 252 294 3 108.90 26 34.36 CHANGED lNpFllpTlpFLNRFushCEsKLsphppplQplEtphtlLEsKL...uSIs...................Ghppl.............................................sssssssssppsss.sssss................t.stssusssptsshtsss..t.................P.h...shhphp..pDPRYttYaKMLplsVPstAl+sKMpt-Gl-PshLDs ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s......t......s.hhthp....c-PcY.sKYFKMl.ph.GlPhtul+tKMht-..G..lDsslL-............................................................................................................................................. 0 124 163 225 +9985 PF10153 DUF2361 Uncharacterised conserved protein (DUF2361) KOGs, Finn RD, Coggill PC anon KOGs (KOG4484) Family This is a region of 120 amino acids that is conserved in a family of proteins found from plants to fungi. The function is not known. 25.00 25.00 28.70 25.50 19.50 21.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.46 0.71 -3.89 28 193 2009-09-10 22:45:35 2007-08-01 13:50:37 4 4 165 0 142 189 2 111.10 35 37.80 CHANGED clK++IRDlERLLp+c...sLPsclRl-pERsLpuL+hcLps..pptcppp+Kh...hcKYHhVRFFERKKApRclKpLcKphcps.s...........tttchcphpcpLcpsclDLsYVlhFP+sEKYISLY ........................lKp+IR..slcRLLp+p......sLPsslRlppERpLpuLcpplpp....pptppt....cpph.......hpK....YHhV......RFF...........ER+KspRpl+pLp+phcpsp...............................tttph.pplppplpttclDLsYsh..........aaPpsEKYlSLa......................................... 0 44 84 122 +9986 PF10154 DUF2362 Uncharacterized conserved protein (DUF2362) KOGs, Finn RD, Coggill PC anon KOGs (KOG4506) Family This is a family of proteins conserved from nematodes to humans. The function is not known. 25.00 25.00 27.60 27.60 22.80 22.30 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.65 0.70 -5.83 7 140 2009-01-15 18:05:59 2007-08-01 14:11:51 4 4 99 0 101 136 2 381.70 37 82.64 CHANGED ppshscLptplhhl+t...Pph....h-psLcpsLppFl-p..EslschDccAEthlpthtptcs-hptlspsht+hYs-phLEaAc..tss.-EpFApsaHpLlHSss.pplLshE+sYhhslophhpcpD.Elcphpphpt.EhsK.hppLstolsspDlN.shhAppapsQp.lcp+auoELpphpshQKpEYpcWV...........sSpls-phpsQstp.ptsustsshh.ppppphEESFTIHLGuQLKpMHNlRLlpsc.hhDhCp....hcscto..phpRLphAhuLYSoSLCGlVLLlssp...hss.c+-FhpssppsT-FHFspl-cQLEhlppsl..L.sptp+sphpctp.spsspstusts+sc..........+p.sslhsG-FaITRHSNLScsHVlFHLslD-slpS.....upIsuRcPsIhGLRNILKssspHDITTIoIPlLLhH.-MSE-MTlsWCl+RAELVFKCsKGFMhEhsSWsGu.....pTVQFLlPpsISEphFhpLusMlPplF+VusshhL .....................................................................pl..ptLppFlpp..pp.p..scpsp..lpth.t..ptp.sl.pthhptht+.atpp........hlpasc....ts.....s...............--....FutsaHpLlHS..s...s.cslLphEptahhsls-hhtttc.plpphp.pp..............p..........t.EhpphhppL.......shshosp-lN.slhupp..hpppp...............l..cppapsclct.pthQ+pEYppWl.th.t..............ss...th.......sp........h.....h.p..s.....................p......s....pt......t....pp....t...........h-ESFTIa...L..G.uQLKpMHNlRllpss..hh-hCp.................hpspts...........pRLphAhuLYSssLsGlVLLs.......s.......s.p.......hsshpc...........phhphsppsT-FHFspl-pQL.chlpp.h......h...t.p......pth.ppt........p.ttttt.................................................pp..pl.....s.G-haIT+HSNLu...........p.....h.HVhFHLssD-.s.lpS.....spIsuRcPslhGLRNIL+ss.spaDloTloIPlLLhc..-...hsE.....cMTl...s....W........Cl+RAELVF..............KClKGFhhE..soh....sus...........polQhllPpslo..cphFhplsshlPplF+lsssh............................. 0 46 53 81 +9987 PF10155 DUF2363 Uncharacterized conserved protein (DUF2363) KOGs, Finn RD, Coggill PC anon KOGs (KOG4508) Family This is a region of 120 amino acids of a family of proteins conserved from plants to humans. The function is not known. 25.00 25.00 26.80 26.90 21.90 19.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.55 0.71 -4.20 16 204 2009-01-15 18:05:59 2007-08-01 14:17:11 4 3 134 0 149 197 3 120.10 58 30.15 CHANGED lEpNPplAh-lLh+lhpos.p........hp-YhcsLlsh-lolpShElVN+Loppl.......pLPp-FlphYIspCIpsCpslK..-+.hQsRhVRLVsVFlpoLI+sKllss..+-LhsElpuFClEFS+lpEAssLF+hlKs ............VEsNPllAlEhLlKLhpSs..p........................Is-YFslLVs..M-MSLHSMEVVN..RLTTuV..............-LPsEFlHhYISNCIuoCEp.lK.........DKYMQNRLVRLVCVFLQSLIRN.KIINV....pD...LFIEV........Q.......AFC..lEFS..R..IREAAuLFRLLK.......................... 0 71 93 123 +9988 PF10156 Med17 DUF2364; Subunit 17 of Mediator complex KOGs, Finn RD, Coggill PC anon KOGs (KOG4512) Family This Mediator complex subunit was formerly known as Srb4 in yeasts or Trap80 in Drosophila and human. The Med17 subunit is located within the head domain and is essential for cell viability to the extent that a mutant strain of cerevisiae lacking it shows all RNA polymerase II-dependent transcription ceasing at non-permissive temperatures. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.42 0.70 -6.22 28 294 2009-01-15 18:05:59 2007-08-01 16:06:26 4 5 238 3 216 270 0 390.00 21 67.52 CHANGED Ms...............psh.lsLcP.....................tt..pp...........s..................................lslcphIspIhtEhG...sFtslsEcoLpccIppppsptptsptspp...........ppccspssp................................................................ppsph-ph.pt..+p-lhpplppAhsEssluLDFlSLLlSphpsp......................uhsohS.....PhLcphsPsuSLs...u-hlt.s..ttpttht...............ptlspGWKhcuLspAsshLhpuup+Lppplpp...........Epc.................................................................YWpcllplppps.....Wtlh......+h...tpspp.lGlcYGat-uGspF.+ccGlAsLRpssc.uslpl................s..........shthpts+hlRV+Ih......csthlsGp.Sphs........p.tssssslpspIppA+pslFEcELFapLpREAppLhshslplps..Nplhhpls.......sppltI-L..........lsh-cps.tstpt........sp.psthApslhtsL+lLLsthH+psLpp+tpsssshs.h.....................p.p.sss.hlLRPlluhh+Hpphhphhpphlsslspslpsss ............................................................................................................................t...................................................................................................................................hslpt.htp..tphu...t.h.lt..shpppltp.ttp..s...tptttp............pppptttt......................................................................................tpptt.c....ph......hp..plh...pplp.......pAhsEhslhhDhl.Sllhsp..................................................................................................thhshs..............Ph.pp.....hs....tphs..................................................................thlphhhKhcuLst..uuphLhpuAp.cLpppltp....................ppc.................................................................aapcllplppp......Wplp............................phts.lh.sc..h...u...a...tpu..u.sha.pptshtsh+psp......tshth..........................................................................s...hpt..thlp...Vpl.........tsh.hss...pt.............................p.t.ts...p.hppp....lptA.......p.......pslhpcElFtpLsREAhplh.s.....h...sph...h....stlhhph..........s...pl.lpL............................h...sptt.tt.t.................................pths.hl.h.L+.L.lt.hactp.pph...s.ss.s.h....................................p.p.p.....lLc.llt.hpH..hhp...htthlpphst.l......................................................................................................................................................................................................................................................................................... 0 70 116 175 +9989 PF10157 DUF2365 Uncharacterized conserved protein (DUF2365) KOGs, Finn RD, Coggill PC anon KOGs (KOG4514) Family This is a family of conserved proteins found from nematodes to humans. The function is unknown. 26.20 26.20 26.30 27.30 26.10 26.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.77 0.71 -4.32 6 109 2009-01-15 18:05:59 2007-08-01 16:13:18 4 5 96 0 81 116 0 135.70 33 51.13 CHANGED csGchTHFVAcNLEtKIR...............pSucpo.TPssusGPh........hsht.h.p...IP.....lDPsVLsDlEpcuQtLAsSVDsLLcsLsusLHulSulTs-slpsY+sAVsKLsDslDuNIKshYsLLAKsEEls+SMpPscpLAppIR-IKRLVDhLEolh .................................................t..st.h.FlspsLp.+l+...........................s.........s.......ts.tsh.......................t.h..p.t.lP.....lDss.lLp-LEpcupplussVDpLhcsLsuslpp.houlolsslpsa+cuVspls-.u.l.DtsIKu......hYpLlA+sEELs+uMpslptLAppl+cI++hl-hh-sh.............. 0 32 41 65 +9990 PF10158 LOH1CR12 Tumour suppressor protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4515) Family This is a region of 130 amino acids that is the most conserved region of hypothetical proteins involved in loss of heterozygosity and thus tumour suppression [1]. The exact function is not known. 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.62 0.71 -4.36 5 152 2009-01-15 18:05:59 2007-08-01 17:47:16 4 5 94 0 69 138 0 104.40 47 44.96 CHANGED pFLPVL+GlLSupTs.pTNp....pLERLcSppllpLCoRhQ-HLspCAcAVAuDQNpLVcRIKEVDsSlsoLauthp-+QKpYAuYAEQluKVNplSspLsRIQhlL-QsVPhMEpLNshLs--ERLEPFsh..................+P- ..............................pF.PlL+tslout...Ts....s.sp.....hLE+....LsSppllplChRhQs....HLp.CAphVA....-QstLstphKph-.thtpl.....ht.h.ptpcpaspasp.pht+...lpphpt.lpchp................hlpp.l..h-tlNphLs.tcpL..hsh........................ 0 22 27 50 +9991 PF10159 MMtag Kinase phosphorylation protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4520) Domain This is a glycine-rich domain that is the most highly conserved region of a family of proteins that in vertebrates are associated with tumours in multiple myelomas. The region may contain phosphorylation sites for several protein kinases, as well as N-myristoylation sites and nuclear localisation signals, so it might act as a signal molecule in the nucleus [1]. 25.00 25.00 26.40 25.10 24.60 21.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.83 0.72 -3.94 16 182 2009-01-15 18:05:59 2007-08-01 18:10:29 4 5 163 0 133 175 1 77.70 55 27.33 CHANGED GsRGGpspFpW-DV+ssp..cR................ENYLGHSlh.APVGRWpKG+DLpWYsK..scsssuptst........ph+EElp+lKptEpcAhttALG .........GsRGGpspFpW-DVKs..sp..cR................ENYLGHSLh.APVGR.WQKG+DLsWYAK.........scss....s.s..s..t....................pccEElpclKcAEc-AhttALG............................................. 0 46 76 103 +9992 PF10160 Tmemb_40 Tmem40; Predicted membrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4536) Family This is a region of 280 amino acids from a group of proteins conserved from plants to humans. It is predicted to be a membrane protein but its function is otherwise unknown. 25.00 25.00 25.10 36.70 24.50 23.90 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.87 0.70 -5.16 7 145 2009-01-15 18:05:59 2007-08-02 10:45:22 4 3 83 0 90 124 1 232.70 37 73.71 CHANGED hc.luss+sphashlLllPslhFhhFLhachspuhsKLphspS..IhluaYhLlalVullslhhshlphh.shsst......pshWpllphF....hLhlElSllhFhLthsahsutpSlpRshllouhlshh.slsQshhphths..hsLhhs..s..pcGththWlhpphlhhhVYhhlhhh..hp.phR.+LPu+suFY.YhhhhhhLNhlphhusuLhsspst.GhhhhsloshhYaulahPLlYlTFLtcFhpp.....................D..L.ssahu .........................lsps+shhaslhLllPslLFhlFLhh+h.puhtKlp....hspS..IhloaYhLlalVullslshshlph.h..sssst......cllWplhphF....hLslElSllhhh.Lthu..al.p...utpSlpRsLhlosl.....luhs.olhpuhh.hhas.....s.L.h..c.....s...h....s.hh.sc....GthtaWlhpphhhhhVYshlhhh...h..+.R..p..LP...............u+.uFYhYsshhhhLNhlphhGssLls.h.s.hh...Ghh.hhshTshhYauhahPLl.YhsFLtsFFtpp....................c..L.............................................. 0 22 42 68 +9993 PF10161 DDDD Putative mitochondrial precursor protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4542) Family This is a family of small conserved proteins found from nematodes to humans. The C-terminal region is rich in asparagine. Members are putatively assigned to be mitochondrial precursor proteins but this could not be confirmed. 21.00 21.00 23.40 22.30 20.90 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.53 0.72 -4.35 9 96 2009-01-15 18:05:59 2007-08-02 11:08:41 4 4 78 0 69 85 0 76.30 44 71.82 CHANGED ssphustssshulh.h.hposhshpSGAltP+PpphsFGLLtlhhsVIPuLaIGuhISKNhAsaLEENDlFVPsDDDDDD .............................ts..............h..RoshsspSGA..lhP+P..chsFGLL+l...........hslVI...Ph..LalGshISKNhAshLEEpDIFVP-DDDDDD............. 0 19 23 47 +9994 PF10162 G8 G8 domain He Q, Liu X, Bateman A anon He Q, Liu X Domain This domain is found in disease proteins PKHD1 and KIAA1199 and is named G8 after its 8 conserved glycines. It is predicted to contain 10 beta strands and an alpha helix. 21.00 21.00 21.10 22.90 20.90 20.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.55 0.71 -4.33 34 504 2009-01-15 18:05:59 2007-08-02 11:58:43 4 98 97 0 317 448 35 124.80 30 7.75 CHANGED soWtst.....pl.........Pps..ucsVhIssGpplllDss....ss.hpplhl........pGpLlFss.....pshsLpspsIhlpsG..plphGoppsPa.....psphsIsLpG.........................spsssph.......shGsKsIulhtsGsl-lHG...............csoWTcLss ...................................................Wt.st.................Pp..s..uspVhI..pGphlllDss.....sshhphlhl......sG...pLlFs-................pslpLps.c.hIllpsG................u.pLplG.o...cpsPa.....................ps..chpIsLhG...................................................................pts.ssph.................sshGsK..s.lu..........Vht.GsL-LHG...............hphs.WTpLs.t.................................................................................... 0 146 187 233 +9995 PF10163 EnY2 Transcription factor e(y)2 KOGs, Finn RD, Coggill PC anon KOGs (KOG4479) Family EnY2 is a small transcription factor which is combined in a complex with the TAFII40 protein [1]. The protein is conserved from paramecium to humans. 20.50 20.50 20.80 20.70 20.10 20.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.71 0.72 -4.05 24 256 2009-01-15 18:05:59 2007-08-02 12:50:33 4 9 183 24 181 241 1 83.10 35 66.19 CHANGED plcstlppp...LlpS..G-pc+lpchL+t+LhEsGWpDcl+phs+chlpp...............tt.sslsh-cLhshlsP+AcshVP-pVKp-lLp+I+saLs ...................hpstlppc...Llco..G-p-+Lc-LL.+p+L..h....EC.GW+Dpl+ths+.chlcc...................puhp..sloh--LlsplsP+uRs.hVP-sVKpE.LLp+I+saL.h............................................................ 0 67 94 144 +9996 PF10164 DUF2367 Uncharacterized conserved protein (DUF2367) KOGs, Finn RD, Coggill PC anon KOGs (KOG4517) Family This is a highly conserved family of proteins which contains three pairs of cysteine residues within a length of 42 amino acids and is rich in proline residues towards the N-terminus. The function is unknown. Several members are putatively assigned as brain protein i3 but this was not validated. 22.00 22.00 24.00 23.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.38 0.72 -4.15 4 109 2009-09-11 06:36:34 2007-08-02 13:25:12 4 1 77 0 78 100 0 88.90 38 74.20 CHANGED hPsYGAh-ous.tsPlPlV....lpssPsutPph............hsp.IIVVGGCPsCRIGhLEDsaoChGIhCAIFFFPlGILCCLAh+pRRCoNCGs.F .........................................................................................................s..............s.......t..........................sp..llVl..G......G.CPsCRlGsL.E..Ds.F.TsLGIhh...AIhhFPlGll.....CChuh...Rp+RCsNCGAhF............. 0 35 40 62 +9997 PF10165 Ric8 Guanine nucleotide exchange factor synembryn KOGs, Finn RD, Coggill PC anon KOGs (KOG4464) Family Ric8 is involved in the EGL-30 neurotransmitter signalling pathway [1]. It is a guanine nucleotide exchange factor [2] that regulates neurotransmitter secretion. 25.30 25.30 28.10 26.00 24.80 24.60 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.60 0.70 -5.66 23 341 2009-01-15 18:05:59 2007-08-02 14:02:37 4 3 198 0 221 310 0 380.40 31 83.45 CHANGED sLpsl+lluR-hpshsPhhocculph.LhpaAth...................................................................................................................psss.csthEALKCLsNhlh.sspsp..phhschthsstlscpLpps.t..................sp-scahthRLlFLhTu.sststppLhpchpslshlsttLppphpptspstpt....................-t.tlsEsLKllFNlThhhscpts.............p.uspht+lsslLpcl..................shsps.h-shhuphlNhLssL.sLpshcs.....hppssl................................tssshsslctLlphL-pplpph................pshc...............-plsPlLslLschschs...............................ctsR+al+ttlLPs...cDhcp...t.-pusoLpu+LlRLhTps..sslKcsuuEhLFVLCpcsss+hlKhsGYG.AAGlLAsRGl..sh......tp..t...............tauss.pcu-T--.............hpphp.sINPlTGph.s.-p....ssh-s...MT-EpKEtEA.+LhsLF-....+hpcpGllps.............tthhp-G+ ..........................................................................hLptl+lhuR-.p.shsshhscps........hph.Lhphu.................................................................................................................................p......p...shhEuL+CLsNhlh.s..sp.........phhh..-h.t.hs....t.l..sphlpt.........................................sp-.p..hh.shRlLFLho..hhpst.h.hppLhpch..phhph..lsphl..pttht.....hh.ttt.t.....................................pp.thhEhLKhlFNlTh...c.t..............................p.stth.hlhslLpch..................s.stp.hpth.hsphlNhLssl.slp.shc..s................ptt.s.h.........................................................................................................................s.shpslphLlphh-pplpph......................................pt.c.......................ptlsPlLslLpchscht...............................cthR+al+......t........p........lLPs...cDh..pp.......................h.p..h..us..s..lps+LlRLhTpss..sl+phsuEhLFl..L...C.......pcs........s...sphlKhsGYGsAAGhLss+Gl.....sh.......tp.....t............................aSp...p.psoco--..............htphp...tlNPl.TGph.p...........ssh.t....MT-EpKEhEA.+Lhshh-....+Lppptll.p.s.................................................................. 0 72 106 168 +9998 PF10166 DUF2368 Uncharacterised conserved protein (DUF2368) KOGs, Finn RD, Coggill PC anon KOGs (KOG4544) Family This family is conserved from nematodes to humans. The function is not known. 25.00 25.00 34.70 34.50 24.00 23.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.77 0.71 -4.54 13 98 2009-01-15 18:05:59 2007-08-02 14:20:32 4 2 76 0 72 84 0 127.10 39 88.52 CHANGED MGt..shu+u.p.pNh+cpQEa.....chQhERQLtMppph+pRQhAhplApuREhhpW..huuFaslssluhssuhhK+K.......+PshlsPllPLuFlluYQhDhuYGsplpRl+uEAEpIl-pEpp.LLphPtGh.TlcsI-c ...................G...hhscs.s...tsh+p.pQ...Eh..........chQhERQlhMQspMRERphAhp..IAhuREhhpa..husFaulAsluhsuuuh+.pK.......+s.shlhPllPLoFllsY...QhDhuYGohlpRh+uEAEpIhcpEpp..hLphPtGh.Thpsl-p...................... 0 28 32 53 +9999 PF10167 NEP Uncharacterised conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4523) Family This is the N-terminal 80 residues of a family of proteins conserved from plants to humans. It contains a characteristic NEP sequence motif. The function is not known. 25.00 25.00 27.60 26.20 23.40 24.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.41 0.71 -4.40 7 127 2009-01-15 18:05:59 2007-08-02 14:45:11 4 2 98 0 89 113 0 107.90 33 64.56 CHANGED hp-sEhth+h++so-KhoEslahlANEPSluLYRlQEHVR+slPpllp++s-lhphppphpGtsYDlEYuhpsVKohpc.....us.hFpsIpphL+puIthKpplpss..pu+p..cpptsssSs ..........................ph..thtc...so-+hoEslhllANEPSlALYRlQEHlR+ulPtlspc+schtphppp.pGthaslEYuhsAV+shtc.....us.hFcslpphL+puIth+pphpht...ps..............s.................................. 0 30 46 69 +10000 PF10168 Nup88 Nuclear pore component KOGs, Finn RD, Coggill PC anon KOGs (KOG4460) Family Nup88 can be divided into two structural domains; the N-terminal two-thirds of the protein has no obvious structural motifs but is the region for binding to Nup98, one of the components of the nuclear pore. the C-terminal end is a predicted coiled-coil domain [1]. Nup88 is overexpressed in tumour cells [2]. 20.00 20.00 20.00 20.00 19.60 19.90 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.25 0.70 -6.69 5 211 2009-01-15 18:05:59 2007-08-02 14:58:37 4 6 126 0 142 223 0 392.10 22 73.25 CHANGED G-pWpppLscHcLFs+L+EGL+lppcTppct.t................+NLLsCLDG-LFlWDucESsFLVsNLRSssusuppsshSpYQTLLCoNPPLFEVscVLlSPoQaHVALlGsRGVsILELP+RWGKcSEFEGGKpsINCRThPlAERlFTSSsSLsLRQAtWYPSEspEPHLlLLTSDNTIRlYNLp-PpoPs+VssLSph.c-osl+ssupoatASLGETAVAFDFGP..............................LussPKsLsGp+..uKpEhlsYPLYILYENGETYLlYISL.+psuslGKLhGPLsMaPuAEDNYGYDACAVLCLPCVPNIIVIATESGhLYHCVVLEAEE-E-touNEsWDuss-psPSLY..VFECVELELuLKLAosE-E.h.-sDFoCPI+L+RDPICppRYHCTHsAGVHSVGLoWIs+LcKFL-S-EEDKDSLQELAoEQ+ChVEHILCTKPLsscpPuPVpGFWIlsDl.LGAoMICITuoaECIuLPLLool+PsSPPLLCo+hDs-Vspp..+hLAEossSFEcHIRoILQRSVsNPlLLpSu-KDSSPPPcECLQLLSRATQVFREQYLLKpDLA+EEhQRRVKLLpsQKEKQLEDlphCREERKSLoEsAE+LAEKFEEAK-+QEsLlNRsK+lLpSh+oQLPVLS-SERDM+KELQpIs-QL+HLuNuIcQVK+KhsYQ+++Ms.uscSP+KsolTLs-+Q+KsIpsILKEpGEHIccMlKQIKcI+NHVuF ......................................................................................................................................................................................................................................................................................................................................................................l.ph.WaP.....tsphh..hL.sps.hc.aph..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 64 88 117 +10001 PF10169 Laps Learning-associated protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4811) Family This is a family of 121-amino acid secretory proteins. Laps functions in the regulation of neuronal cell adhesion and/or movement and synapse attachment [1]. Laps binds to the ApC/EBP (Aplysia CCAAT/enhancer binding protein) promoter and activates the transcription of ApC/EBP mRNA [2]. 25.70 25.70 26.10 25.80 25.60 25.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.24 0.71 -3.90 11 105 2009-01-15 18:05:59 2007-08-02 16:02:28 4 5 86 0 69 106 0 123.20 42 88.89 CHANGED KSLRSKa+R.KMRA.KRpKstPKELtRLKpsLu.p-....ut.lMc-lp-lsphhsscchc.Eptct-sp.......t.....pcctKM-V-sc...h....NpKTLpDpaGpYPsWhNpRQ.pKKh+tKppupK.sKsKs.....sKsls .............KSLRSKWKR.KMRA.KRcKsAPKELsRLKphLt.hD.....uchl.MpDlp....-IATVlss+chc..cphph-tp.............ppt.....pcstKM-.s-tK...+....N+K..TLhD..paGpYPlWMspRQ.tK+LKsK+pppK..s+s+s..sh..th..................... 0 17 23 46 +10002 PF10170 C6_DPF Cysteine-rich domain KOGs, Finn RD, Coggill PC anon KOGs (KOG4543) Domain This is the N-terminal approximately 100 amino acids of a family of proteins found from nematodes to humans. It contains between six and eight highly conserved cysteine residues and a characteristic DPF sequence motif. One member is putatively named as receptor for egg jelly protein but this could not confirmed. 20.70 20.70 21.10 36.90 20.20 19.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.45 0.72 -3.93 8 86 2009-01-15 18:05:59 2007-08-02 16:22:26 4 3 73 0 60 75 0 90.60 44 66.03 CHANGED FcCQLCsLouPYoYaGQKPPsopulVLLE-sYVMKDPF.......oPD+E+FLlLGS+CSlCSKsVCVGs-CSLFYoKRFCLPCVpcplpsFP.EIQp-l-K+K ..........................FpCplCslpt.apY.hGp......+PP.....s.pullhLE-sYVM+DPF..........................o.s.c+ccaLlLGucCSlCu+hVClus........pCSlFYo+RFChsCsppphptFP.clptclpK............................... 0 16 20 40 +10003 PF10171 DUF2366 Uncharacterised conserved protein (DUF2366) KOGs, Finn RD, Coggill PC anon KOGs (KOG4545) Family This is a family of proteins conserved from nematodes to humans. The function is not known. 25.00 25.00 35.20 30.50 17.90 16.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.78 0.71 -4.77 9 94 2009-01-15 18:05:59 2007-08-02 16:30:53 4 3 80 0 68 89 0 155.90 36 75.42 CHANGED sshthPp+acphh.pKhuhaa+uLlpDYpEssh-sspsu+sRPhKAulYLolLushhhsstpNPsEssF.spLhcssspLlLVsPpppNssSstalppL.chhNpG+LR+lSLGlhSllahssaDcssslYpAhC.alps.tahshacRllDVGFhG+WWhLcpKMhDYDlNp- ..................h......thatth.....stah+sLhpDYt-ssh-ssttspt+Ph+AslYsshLuushhshtpsPsEtsFpp.tLlcsospLlLlusthRN.pSptalppL.hhhspGpLRalsLGlhSLla.ssaDtpsslYpApCpYLps.pahsF.pRllDVGFhG+WWlLpt+McDaDlNp....... 0 27 31 52 +10004 PF10172 DDA1 Det1 complexing ubiquitin ligase KOGs, Finn RD, Coggill PC anon KOGs (KOG4816) Family DDA1 (De-etiolated 1, Damaged DNA binding protein 1 associated 1) protein binds strongly with DDB1 and Det1 forming a DDD complex which is part of the ubiquitin conjugation system [1]. 21.00 21.00 21.20 21.30 20.70 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.18 0.72 -4.15 11 148 2009-01-15 18:05:59 2007-08-02 17:01:35 4 2 101 0 93 145 0 62.20 44 51.23 CHANGED cFLpsLPs.sppNFS.h..ssss.psostRssVYlPT..cshP.s-QlIsT-ppNILlRaLpQph-cK ................cFLpGLPsascuNFSphpss...sss..ps....SspcsuV...YlPT..cshP.............s-Ql...IsT-p.TNILLRaLpQph-KK.......... 0 23 46 72 +10005 PF10173 Mit_KHE1 DUF2343; Mitochondrial K+-H+ exchange-related KOGs, Finn RD, Coggill PC anon KOGs (KOG4539) Family The members of this family function as mitochondrial potassium-hydrogen exchange transporters. The family is part of a large mitochondrial KHE protein complex. 22.50 22.50 24.30 25.20 20.50 20.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.38 0.71 -4.44 47 180 2009-01-15 18:05:59 2007-08-03 11:32:18 4 3 162 0 133 170 3 185.80 32 61.97 CHANGED +lhl..lPlos...........+.+s.........hlY..spphp...t....................................phhphhs+lssKAucs....Wsphppucpu....a....pp+llsaGp+lLp+lPaEEhuLKSl.....Pshsp.h+phpp..........................................................ttp..........plplhaP....sslpspplhspL.+plupcptthH++hhhhshlshPlThPhsLlPllPNlPhFYlsYRuauHa+AhpGucHLphLlc ..................................RlhllPlos.................+.+s.....hlY..spphssh.h....................................tpt.shhctlssKuucs....WsphccupcG....a....p++lssaGp+lLpRIPa-EhuLKSl..........Pshss.h+p.p.t...............................................................................................p................tlpllaP....shlss..........pplhphL.+plspcppthH++hhhhsllshPlThPhsLlPl.......lPNlPhFYlsYRuaSHa+.....AltGuc+LphLl................. 0 43 82 117 +10006 PF10174 Cast RIM-binding protein of the cytomatrix active zone KOGs, Finn RD, Coggill PC anon KOGs (KOG4809) Family This is a family of proteins that form part of the CAZ (cytomatrix at the active zone) complex which is involved in determining the site of synaptic vesicle fusion [1]. The C-terminus is a PDZ-binding motif that binds directly to RIM (a small G protein Rab-3A effector). The family also contains four coiled-coil domains [2]. 35.00 35.00 35.30 35.40 33.50 34.90 hmmbuild -o /dev/null HMM SEED 775 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.79 0.70 -13.90 0.70 -6.49 7 395 2009-01-15 18:05:59 2007-08-03 11:45:35 4 15 88 0 216 377 0 443.00 33 71.08 CHANGED LpupLcps.pEpE.L+pEhEhppocLspohsplcp..........................................................hhSsELc+-+.hRccEuuclshh+pphchsptpst..+hQ.phptLQpcL.+.Qt-lp+Lhpp..pusschtp..................h-hs.tt.pRlptE+-ttth-L.hlR.....c..ohEchpLphEp.pppLssts-plcKLhEhL.....................................................Kupppsppth+RhtEt-sphpcLEslL-p+EKpp.hh..........................plHp+hphp.DsAtTc.............A.QplI-.KDop.tphE+hLcc...hpsEl.hLpSpsshupt-Rpc...plEs.+SthtthKs+................hDpst.pLuRpc........cLEshspptsDh.+p+l-hLppsLpttppctshLQsEl-tLphcL-cpcshhsKpotplpshpEEtuphusEIcch+-hh-hpptclphLQcp.....-.phpccp+phpp.p-Rl+p............ppl-cslhEhE+hht+lcp.Rpptpp.ctEph-paKcchcplcpclpshptcLpEpphpL..h+tcsspLhSst.KpcochcphcIthEpt+-cppKL.pplc+........p.psphspphstlctsssthps-sttspsElDRLh-hLccs.pp+ssp-hchucLtp....sph+hscQpcssts.....hhhp.......c+hsAphhc.hcR......cc-slppuspp..........................................pplpcL.ushcplttEh-phhhpLppoQp.LhphppcLsshcsEhp+phEEshchK...QpAhlAAluppsuphAhhEhpu.pKchsp-chtthcpE+-..........................+lVcQ.K...p.......phQpRMpL ............................................................................................................................................................p..ts..t.hptpht....p.t..................p.t.....ht.tLppcL..+.......ph......................-....t..th..t..-..-p...-h.hhp.......t.phcchphphpp.pp..ph.thppplcpL.-..................................................................................................................................................................................................................................................................................................................................................................................t.ph............p.....p...t..ptchth.p.p.tt.ptp.t.h.sp.............h-tht.chcptpt..tphtt...th.t-ht.h.t.clpc...hhp.hc.tp.ch..h..tp......p.h..t.........p..ctp.h.....th............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 59 80 165 +10007 PF10175 MPP6 M-phase phosphoprotein 6 KOGs, Finn RD, Coggill PC anon KOGs (KOG4531) Family This is a family of M-phase phosphoprotein 6s which is necessary for generation of the 3' end of the 5.8S rRNA precursor. It preferentially binds to poly(C) and poly(U). 25.00 25.00 25.20 25.00 24.40 24.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.49 0.72 -3.65 6 144 2009-09-11 06:05:00 2007-08-03 13:18:23 4 1 114 0 97 135 0 102.40 40 63.94 CHANGED LSKulLcMKFMpRs+-pl-Kcts-Ecp+t...LhSsE...lstchLppoppall.EsSal.C......EsLl.GRlSFtGhNPElE+LM...........E....p.pAt+....puctt-cpscth.....DVsDpEMA ....LScsLLcMKFMQRsh...-pppKcp..h..-.E-c+c........l...huc..E.............l..shstl...p....ppps...a..ll...Ep.Sah.C........-sL.l.h.......GRMSFpGFNP-l.EKLM.....................php..sc.p........p.s..ct...p.cp.p..hth......DVoDpEMA.................................................... 0 29 40 69 +10008 PF10176 DUF2370 Protein of unknown function (DUF2370) KOGs, Finn RD, Coggill PC anon KOGs (KOG4812) Family This family is conserved from fungi to humans. The human member is annotated as a Golgi-associated protein-Nedd4 WW domain-binding protein but this could not be confirmed. 22.60 22.60 22.90 23.30 22.50 22.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.72 0.70 -5.02 26 493 2009-01-15 18:05:59 2007-08-03 13:29:02 4 7 220 0 283 488 0 134.60 24 66.36 CHANGED phstlssh.......p..sst.ut...thGtusDGVFsNLsAK...P-p.................tttppphPPoY-pAAADssPsYa-sshhssu.................hhtDElhl-GLPVGshhsFlWNhhlohsFQalGFLLoYlLHToHAA+pGSRuGLGLTLlpaGahM+sus.....................t.tts.thh...s...................Pssap...........p.....t....suaps.........................htsp.WluahlhhlGhhIhl+ulhsYh+l++hEchlLpsssppt ..........................................................................................thh...................................................................................................................P.P.....sYp.st.t................P.Y.t.ph.ts..............................................................................s...hp..th.....l....Gs.h.Fhhshh.huh.Fp.hlGFhh.hhhppohAu+.Guh.GhGlohhtahh.hh.t................................................................................................................................................................................t..hh.hhhhhhGhhh.hpuh.ta..hhp.............t.................. 0 66 110 190 +10009 PF10177 DUF2371 Uncharacterised conserved protein (DUF2371) KOGs, Finn RD, Coggill PC anon KOGs (KOG4823) Family This is a family of proteins conserved from nematodes to humans. The function is not known. 22.90 22.90 23.10 24.20 22.80 22.80 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.88 0.71 -4.30 10 150 2009-01-15 18:05:59 2007-08-03 13:41:28 4 3 78 0 102 135 0 130.10 39 32.60 CHANGED RQDSLRSp.p...........tstc++......+s+++p+u..-VVVVRGKl.....+LhSsSGhhllLGlllllVGlAMAlLGYWP+................................................ttt.tsttss.opspssshpspst.Gshs+hhppaLHS-+hKhLGPllMGIGIFIFICANAlLHENRD+cTKlIshR .........................................................tph+.......h+h+.+s...cVlVV+GKl.....+LhS.SGhhhllGlLlhllGh.....uMAVlGY.WPp......................................................tth...s..ts..t.spt..hpsps...uhhh+.hhptahHs-+h+hlGPllMGIGlFlFICANslLaENRDccTKll.hR........................................ 0 19 28 60 +10010 PF10178 DUF2372 Uncharacterised conserved protein (DUF2372) KOGs, Finn RD, Coggill PC anon KOGs (KOG4828) Family This family consists of proteins found from plants to humans. The function is not known. 21.20 21.20 21.20 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.79 0.72 -3.85 8 121 2009-01-15 18:05:59 2007-08-03 13:49:53 4 3 100 2 79 102 0 89.10 34 62.90 CHANGED lVlTQhGKhGTllslp........s.spu-.........lspPsaoscV......LLGp.D-Plh+lhA+pLlstlutEsus+sllLuluLKD+Ss-sLKsltcslcpspl ..............................VllTQhGKhGTl.lslp...............s...tts-....................hscPshsspV......LLGp...D.EPLlcl....hA+....pLlpal..ot......p......uu..s...+sllLuluLK.D+Sh-sl+slhpslpps......................... 0 21 34 53 +10011 PF10179 DUF2369 Uncharacterised conserved protein (DUF2369) KOGs, Finn RD, Coggill PC anon KOGs (KOG4806) Family This is a proline-rich region of a group of proteins found from plants to fungi. The function is not known. 20.00 20.00 20.00 20.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.28 0.70 -5.31 5 148 2009-01-15 18:05:59 2007-08-03 14:07:11 4 5 72 0 105 142 0 226.20 31 59.39 CHANGED sRhplsTIV......................ClGp+NpplVpG.L+sDoTYalDVFulcppRsoSSAYluootpT....................+cpsRouPltLp-GpLtQVcLcs++G....hKFFsFslPp..tssusQospLlVHsCsGs.VRlpLFRsGKLLt+o.tuFpGhRpFsVsul+PGc+YLlRap.sNDDEuh+TlRVh.AsSTcuspuPaPsLP-DToVKhls.hRoCSSATIAWhuopDc+.lKYClYp++psoNahE+pVsctsNhCpGslS+clh.....p.VsChYsHSPsps-.pscSlhppTIuNLtPuSTY...LLsVssstssG+uLPYRolhV+TssYC ..............................................shh...............................Chstpp..hl.t.h..pppY.hslashp..pt..shhh.st...p......................tp.......h.Lpptth..h.ltttts.....t.hpht.........p..hhhhl.sC...s...lp..lpl...ptp.lhpt...h.t.t.h.......................ps..stthYhlcl..............s.spc.t..sot.........hKlh...sTTt.scpsaPpLPpDsR.................lcshshhRpCoolTlAWh....so.-.....+....p....pYClhhpc.cs.p...........s.C.......pp.............h.C....p...................pth...hl.tL.sut.Y...hh.lhh.h......u.sl.Y..hhl+ht..C.................... 0 15 27 66 +10012 PF10180 DUF2373 Uncharacterised conserved protein (DUF2373) KOGs, Finn RD, Coggill PC anon KOGs (KOG4829) Family This is the C-terminal conserved region of a family of proteins found from fungi to humans. The function is not known. 25.00 25.00 28.30 27.50 19.30 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.23 0.72 -4.43 26 219 2009-01-15 18:05:59 2007-08-03 14:32:39 4 6 203 0 158 217 0 68.90 35 21.80 CHANGED LpYLppatps+ps....................................WKFsKp+QsallKahhs.-clPpcatshLlpYLpsLpG.suRp+Lhcpucpll ..................................................LpYLppatps+ps.........................................WKFpKs+QsaLl+phas..-.clPsp.ahshLltYlpuLpG.suRpclhppApph........ 0 48 84 128 +10013 PF10181 PIG-H GPI-GlcNAc transferase complex, PIG-H component KOGs, Finn RD, Coggill PC anon KOGs (KOG4551) Family PIG-H is a family of conserved proteins that complexes with three other proteins to form the GPI-GnT (glycosylphosphatidylinositol anchor biosynthesis transferase) complex. It appears to be a peripheral membrane protein facing the cytoplasm involved in the first step in GPI anchor formation. 20.50 20.50 21.10 20.60 20.10 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.98 0.72 -4.34 33 259 2009-01-15 18:05:59 2007-08-03 15:55:35 4 8 226 0 173 243 1 69.60 33 28.05 CHANGED Sllll+slGlQlsop......hh.spt..............pcFIPhscIhDlVIsEuF.psa.pVhaYLslll.........+spsp...............lhllF.p .............................ollllculGlQhoop.......ahsupp..................................pspFIshscIpDlVINE..uh..phh.pVhaYLslll.........cspsc...............llllF............................. 1 47 89 139 +10014 PF10182 Flo11 Flo11 domain Linder T, Bateman A anon Pfam-B_18862 (Release 22.0) Domain This presumed domain is found at the N-terminus of the S. cerevisiae Flo11 protein. Flo11 is required for diploid pseudohyphal formation and haploid invasive growth. It belongs to a family of proteins involved in invasive growth, cell-cell adhesion, and mating, many of which can substitute for each other under abnormal conditions [4]. 25.00 25.00 27.50 27.50 21.70 24.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.81 0.71 -4.65 14 112 2009-01-15 18:05:59 2007-08-03 16:57:57 4 12 42 0 73 110 0 141.30 29 19.18 CHANGED stssphshs.phtstshPthslsVpsVpaVtsNhYpVTlpapsspshsLppL......ppLpIlGlsssp.....sshhLautsps.s.lIs...NsscaouThhVtup....sssshhChP.sFpIpa-aspssssph..sssWp..hhspsashhhuCss.ssptp.uppshssa ............................tppsh.ph.s.h.slpsVpalp-ssYplTlphpupps.l.sL.c..L......tuLKIhGlsuPp.....tsh.Lautspp.shhI.s...sPscaosoap.....Vhup.....spsspshhP..sFpIpa-ahpusu.sph..hpsWp...hGsssas..h.sGCpt.sp....uphsh...h......................................... 0 7 32 66 +10015 PF10183 ESSS ESSS subunit of NADH:ubiquinone oxidoreductase (complex I) KOGs, Finn RD, Coggill PC anon KOGs (KOG4808) Family This subunit is part of the mitochondrial NADH:ubiquinone oxidoreductase (complex I). It carries mitochondrial import sequences [1]. 22.70 22.70 22.90 23.00 22.60 22.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.45 0.72 -3.63 45 270 2009-01-15 18:05:59 2007-08-03 17:16:03 4 9 222 0 169 257 0 102.20 25 65.39 CHANGED htpspt.........st..ssssupts..u+tp...............Ps................Gh......LascpP...G.....pphchEsWEhsaahuhhhslVlhulshsaK.P..Dhol.poWApcEAt+R....LEscG ..........................................................hht....................pss.s.ps.....sctt....p.........................sp......................uh.........Laspp..P...G.....pphchcsWEhsaahuhshsllhhushhsap.P..Dh...........s...........h..psWApcEAhcc....hEttt...................... 0 53 87 131 +10016 PF10184 DUF2358 Uncharacterized conserved protein (DUF2358) KOGs, Finn RD, Coggill PC anon KOGs (KOG4457) Family DUF2358 is a family of conserved proteins found from plants to humans. The function is unknown. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.71 0.71 -4.15 29 393 2012-10-03 02:27:24 2007-08-06 10:34:44 4 5 158 0 238 556 155 112.80 25 44.24 CHANGED tcllchL+pDhspha..scshsYuIYscDlhFpDPlspFcs..lppYphh...l+Fhsp.aFtslpl-lpclppssp....pIpsRWsl.pshshlP.....W+...scls............hsGhSphplspsG.hIhpH .........................t..hhchL+p-h..s..p..ha...hps...hs..h..s.lYspDlhFt.....DPl........t..sacG.............hppYpt.........l...chlsp.ha...........t....p..hpl-.lhpl.....p.......p......ts......cs.......slp..sRWpl..ps.h..s.t.hs........Wc......scht.........................hsGhSpahlsp.pu.hIhpH................................ 0 84 158 207 +10017 PF10185 Mesd Chaperone for wingless signalling and trafficking of LDL receptor KOGs, Finn RD, Coggill PC anon KOGs (KOG4357) Family Mesd is a family of highly conserved proteins found from nematodes to humans. The final C-terminal residues, KEDL, are the endoplasmic reticulum retention sequence as it is an ER protein specifically required for the intracellular trafficking of members of the low-density lipoprotein family of receptors (LDLRs) [1]. The N- and C-terminal sequences are predicted to adopt a random coil conformation, with the exception of an isolated predicted helix within the N-terminal region, The central folded domain flanked by natively unstructured regions is the necessary structure for facilitating maturation of LRP6 (Low-Density Lipoprotein Receptor-Related Protein 6 Maturation) [2]. 19.20 19.20 19.20 19.90 18.50 18.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.92 0.71 -4.53 7 123 2009-01-15 18:05:59 2007-08-06 10:54:47 4 4 99 12 84 133 0 158.90 44 78.42 CHANGED tKKDIRDYsDADhtRLLEQW..E--DslE.s-LPEHhRPssslDhSplDs..ssPEslLKhSKKG+TLMhFVoV..oGsPTccE....TE-ITpLWQsSLaNsphphpRallssNRAIFMh+DGu.AWEsKDFLVpQ-RCtcVTlEsp.YsGKs..............................tp.tpsK-EL ......................................................tKKDlRDas-ADhtRLL-QW...EcD-.slE-s.-LPEH..hRPs....s....lD.h.Spl..Ds.........spPE.slLKh..oKK...G+T..LMhF.VoV.....sGs.P....Tc.....cE....oE-ITpLW.QsSLaNs..p.hplpRahVsssRAIFMh+DGu.AWEhKDFLlpQ-RCt-VTlEspsY.sGhs..............................t................................................................... 3 26 36 62 +10018 PF10186 Atg14 DUF2355; UV radiation resistance protein and autophagy-related subunit 14 KOGs, Finn RD, Coggill PC anon KOGs (KOG4398), Wood V Family The Atg14 or Apg14 proteins are hydrophilic proteins with a predicted molecular mass of 40.5 kDa, and have a coiled-coil motif at the N terminus region. Yeast cells with mutant Atg14 are defective not only in autophagy but also in sorting of carboxypeptidase Y (CPY), a vacuolar-soluble hydrolase, to the vacuole. Subcellular fractionation indicate that Apg14p and Apg6p are peripherally associated with a membrane structure(s). Apg14p was co-immunoprecipitated with Apg6p, suggesting that they form a stable protein complex. These results imply that Apg6/Vps30p has two distinct functions: in the autophagic process and in the vacuolar protein sorting pathway. Apg14p may be a component specifically required for the function of Apg6/Vps30p through the autophagic pathway [1]. There are 17 auto-phagosomal component proteins which are categorized into six functional units, one of which is the AS-PI3K complex (Vps30/Atg6 and Atg14). The AS-PI3K complex and the Atg2-Atg18 complex are essential for nucleation, and the specific function of the AS-PI3K apparently is to produce phosphatidylinositol 3-phosphate (PtdIns(3)P) at the pre-autophagosomal structure (PAS). The localisation of this complex at the PAS is controlled by Atg14 [2]. Autophagy mediates the cellular response to nutrient deprivation, protein aggregation, and pathogen invasion in humans, and malfunction of autophagy has been implicated in multiple human diseases including cancer. This effect seems to be mediated through direct interaction of the human Atg14 with Beclin 1 in the human phosphatidylinositol 3-kinase class III complex [3]. 27.10 27.10 27.10 27.20 27.00 27.00 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.95 0.70 -5.47 39 520 2012-10-03 11:38:54 2007-08-06 10:55:36 4 16 250 0 371 532 2 295.80 16 56.42 CHANGED pCslCp....sspp.hhCssCsps...........p...Lhch+hchtplttcp-slpp+lpphlp...............................sththpth+schtpppc+hpplppplppl+pclcpp+pclpph+pslpp++sslp...tp.thtpccsp.lpphpsshc+tcpphpt.lpshhtppRshlhcplsplaslpph....................cpss........ptsas.Iu...s...........hslhsh+shpp................................................................hssppIssSLsahspllsLluchLulcLPtplshs..ppsh......................................................................shsths.cp.......stshppFhculshLshNlsaLsp........op...ulsls...hssa-shsplhp.lhphhhsp .....................................................................................................................................................................t.....h.php.thtphh.pp...pp...lt...pplpphlp.....................................................tp.thpphptphtp.t...p.+ht........l.ppplpp.ppplppt.ppplp.chptplpp+pptht.......tt....th........p.pttp..thp..p..h.pp.p...h..p..pp......cph.h.......p.........p..hppp......htt...pppp.lhp.p.lst...laslp..............................................p.ts.............................ptsas..Is..............s............................htl..s.s...s...p....sh.ps......................................................................................hst.t...plu..s...u...LuasspLlpllu.hhL..slsL.a.lt...h....t.pup..........................................................................................................................................................................................................................................................................t..h......t................s.s.p.....pFphulhhLspslt.Lph...............pp...slph.......p..hpsh..hh..........h.................................................................................................................................... 0 111 180 293 +10019 PF10187 Nefa_Nip30_N N-terminal domain of NEFA-interacting nuclear protein NIP30 KOGs, Finn RD, Coggill PC anon KOGs (KOG4036) Domain This is a the N-terminal 100 amino acids of a family of proteins conserved from plants to humans. The full-length protein has putatively been called NEFA-interacting nuclear protein NIP30, however no reference could be found to confirm this. 21.60 21.60 21.60 22.50 21.40 21.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.59 0.72 -3.84 17 264 2009-01-15 18:05:59 2007-08-06 10:57:05 4 4 215 0 181 250 1 97.20 39 42.18 CHANGED FVSpupl-E.+c.ttp.ht.......tcpscspsppp.cu+oLYEhLQppK.ctKptEa-Ep...hKlKN.h+uLD-DEsEFLDplpppc+ttEtphccEEscpLctF+ ......................................FVSpuplpEt+cpc.pEthcthpp..........scsspt.ps.pcthD...sRSLYEpLQcp..K..spKQpEaEEp...hKhKN...h.............RuLD-DEspFLDplpcppcthEcphccE-tcpLcpa+.......... 0 62 95 145 +10020 PF10188 Oscp1 Organic solute transport protein 1 KOGs, Finn RD, Coggill PC anon KOGs (KOG4033) Family Oscp1 is a family of proteins conserved from plants to humans. It is called organic solute transport protein or oxido-red- nitro domain-containing protein 1, however no reference could be find to confirm the function of the protein. 25.00 25.00 52.20 52.00 23.50 20.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.98 0.71 -4.44 10 139 2009-01-15 18:05:59 2007-08-06 11:10:40 4 3 104 0 83 133 3 170.40 48 48.65 CHANGED MlYlL-QRL+AQs...lsp-KuspVLcDIspslasscFlsELFpspt.....lhShstl+plhcslupCSIMRLspsSMsKLaDLMhMshKaQlhssp+PtEllp.lThsHL-ulpcllsp...sctpshlctshpplh-hassLsss-hhtlRpcLLsFLpshps+VSlhlcsshQs.psGsFhl .....MLYlLDQRL+AQs...IssDKu..........c+...........VlsDIlssMFscKFh-ELFKPQc.....LYScculRola-+LAHuSIMRLNpuSMDKLYDLMTMuFKYQlhlss+P+-lLh.lTaNHLDuI+s.hlpD......ssslhppVDp......shppL...h.........-hYu.sLosG-athIRpTLLhFFQDh+lRVSlFL+cplQs.ssGpFl...................... 0 29 41 63 +10021 PF10189 DUF2356 Conserved protein (DUF2356) KOGs, Finn RD, Coggill PC anon KOGs (KOG4262) Family This is a 200 amino acid region of a family of proteins conserved from plants to humans. Some members have been putatively annotated as being integrator complex subunit 3 but this could not be confirmed. The function is unknown. 25.00 25.00 30.10 36.70 20.90 20.50 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.48 0.70 -5.30 8 130 2009-01-15 18:05:59 2007-08-06 11:12:43 4 3 105 0 95 130 0 216.70 53 24.43 CHANGED pLWRDllh...............pPpshsPta.GlhQLhch+TS++FLt.sRLoPEMEpKlhFlhusVKFGsQK..RYQsWFpcKYLsoPEScSLhsDlIRFIssVlHPoN-lLsSDIlPRWAIIGWLLooC.........................psssspuNuKLuLFYDWLFFDPcKD.NIMNIEPAILlMhaSl...............+paPtlTsoLL-FLC.+lhcsassptc-pIRpGVpNuh+hIh-KtVl.sLs.lh-ssKL-R-Lp.hlRcpFs-FLS ............................h.LW+Dllh....................NPpsL.s.P.pFsGlhQLLptRTSR+FLt.sRLTP-MEpKLhFh.sSpV+FGpQK...RYQDWFQ+pYLuTP-SpSLRsDLIRaICuVlHPoNElLsSDIlPRWAlIGWLLToC.........................Tosl.A.suNAKLALFYDWLFF...sPcKD.s.IMN.IEPAILVMtHSh...............+sHPs.lTsTLLDFhC.RllssFaPs..hcsplRpGVhsSLphIl-Kp.Vl........spLsPLF-s.P....KL...D...+-LRshlRcpF.EFh..................... 0 35 48 71 +10022 PF10190 Tmemb_170 Tmem170; Putative transmembrane protein 170 KOGs, Finn RD, Coggill PC anon KOGs (KOG4349) Family Tmem170 is a family of putative transmembrane proteins conserved from nematodes to humans. The protein is only of approximately 130 amino acids in length. The function is unknown. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.48 0.72 -3.97 8 145 2009-09-11 14:34:23 2007-08-06 11:13:35 4 3 91 0 98 132 0 98.90 53 67.44 CHANGED LssFsEMWYaVFLWALFSSlFlHssAullAFsTLR+HKhGRF.hSIhIllMGlLuPloGGllTSAsIAsVYRAuGhsMuPlcAllaGVGQTllohllSFoRILATL ..................psasEMWYt.lFLWALhSSlFhHss.AGlLAhhTL....R+H.KhG....Ra..hSlhl....llhGhlu...slTuGhlTSAAlAGVYRuAGKpMhPhcALshGlGQThhslllSFh.RILATL................. 0 22 33 63 +10023 PF10191 COG7 DUF2354; Golgi complex component 7 (COG7) KOGs, Finn RD, Coggill PC anon KOGs (KOG4182) Family COG7 is a component of the conserved oligomeric Golgi complex which is required for normal Golgi morphology and localisation. Mutation in COG7 causes a congenital disorder of glycosylation [1]. 29.30 29.30 31.10 30.90 29.20 29.20 hmmbuild -o /dev/null HMM SEED 766 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.31 0.70 -13.21 0.70 -6.63 8 242 2012-10-02 15:56:29 2007-08-06 11:14:02 4 7 143 0 166 238 2 481.30 21 88.41 CHANGED DhSpFuuDcFDVKcWlNAua+uspcssuss+sct..aluoLVhKLQLalQ-lNsulEEoStQALpsMP+VlRDlptL+pEAuhL+-pMttVpp-Ip+hEp-TupsMpsLsRlDsVKpRMQhAtcoLQEADGWupLSA-lEDsFcosDlsssupKLsuMQpSLuAls-lP-aoE+psQLEsLKNRLEALsSPplVsAhsspsl.......DQupcaVpIFpcIsRhPQLptYYpKlHKs.LhphWpchspppts.........................hshsphLopFYDpLLphhcsQh+WssQlFpc.hc.llsllll-TLusLsPShsuplspAlc+Ass-.c........................LpsLl-lassousFu+slEphlttt.tp....ssLh+lh-LlcAVauPacsFptpYGsLEcopLptpluulslctu..........ElsDsVpcL-cSlsKlhsLhpuAV-RClshTsGhulspLlpALculhtpYlopaspsL+SLRhspsl-shssss......................E-WSlhQsolplLsssG-LhppsusFEppLtsplhshuuph..p.assp..ts..p....tppuss+sshptaphlspschs-hsshhcsLhslpEcu..........................................ssshslLutsppsls+hsppsH-LsaDslF.pI+QpLtpl.s+hcsaootultE.hs.DLPsFShsPQEYITsIGQYLLTLPQHLEPLlpu.sssLchsLcssch.assppttt..-hpshADhWLuplAEuopthYp-pIhpIptLos+uAKQLATDIEYLuNVL-ALGLslstsLQpIlTLL+As.-pY+sluK.....uLsh+LuusIsphRs ...............................................................................................................................................................................................................................................h..th..h........................................t.........h.t.......l.p...lp.hptchp.s.ts.Lp.cuttht....t....h.tpl......pt..hh.........t.......t.......t.......ph................................hsttl.thppsh.h..........t.hs.th.tt.h...ht..hppp...lct.hp......l....ht.ht....t.t...................................t.st.hh...tlh......th.tp..th...a.t..............h....W...t.....................................................................h....hht.hh..http..........hh........................h....h.......h....h......h....................................................................................................................................................h...............................................t.h..............t.s..tph..ho...t....h.h...ht..h..............................................................................................................................................ht.h.t...............................................................................................................................................................................................h.........th.......h.t....hh.p.h....h..h...h..h.....................................................................h..st..hs...............P.t.hpphuphlhsls..h-sh.......................................................................................................................................................................................................................................................................................................................................... 0 85 110 141 +10024 PF10192 GpcrRhopsn4 Rhodopsin-like GPCR transmembrane domain KOGs, Finn RD, Coggill PC anon KOGs (KOG4290) Family This region of 270 amino acids is the seven transmembrane alpha-helical domains included within five GPCRRHODOPSN4 motifs of a G-protein-coupled-receptor (GPCR) protein, conserved from nematodes to humans. GPCRs are integral membrane receptors whose intracellular actions are mediated by signalling pathways involving G proteins and downstream secondary messengers [1]. 23.70 23.70 23.70 23.70 23.50 23.60 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.29 0.70 -5.19 23 259 2012-10-03 04:04:29 2007-08-06 11:14:50 4 6 114 0 206 328 2 222.60 26 48.39 CHANGED shplcaclplhNss.........t..spcFSh-ppsl.phhhlhhllallhlhhshhhh.pthpppp.hchshphhshslhlphlShhlhhhahhhYu.hsGhGh.hhplhupllphhuphlhhhLLlhLuhGWTIop.hphs........shhhhhslhsllshhpsllhlh.sphhh-stppha.Y-uhsGahllhl+lshhlaFhhultp.sh..p+ps..t+psFhhhFshhshlaFluhPllshluhh.lssahRtKllphsshhhphluhlhh ............................................s..plpachhlhNsp..........s..hpcFSh-c........slhph..hhhh..l.lahlhhhh...h..h...hlptpt.hHssh.+.h..hh.s.u.l.hlp...hhShhh.hla.hhp.Yu.tsG.lG..................hthhucl...hphhSphh.hhhhLl....hL.u.hG.aols+..hph.s.......................st...h.hhshh.shh..sh.h..p..hhLhla..........t...p.....-s..t.p..sh........atac...o.sGhh.ll..h......lpl.shh......l.h.F...hhslh........h.....c+.s..........KptF.....ah.F.h.h........sh.hlW..FhshPl.h.shl.u.h..hst..ahRt.+llp.hh.hhhph.huhhlh.................................................................................................................... 0 93 119 170 +10025 PF10193 Telomere_reg-2 Telomere length regulation protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4346) Family This family is the central conserved 110 amino acid region of a group of proteins called telomere-length regulation or clock abnormal protein-2 which are conserved from plants to humans. The full-length protein regulates telomere length and contributes to silencing of sub-telomeric regions. In vitro the protein binds to telomeric DNA repeats. 20.90 20.90 20.90 21.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.36 0.71 -3.82 31 272 2009-01-15 18:05:59 2007-08-06 11:15:43 4 8 242 4 200 275 5 111.50 31 12.69 CHANGED sPlYl+DLlphLp.-s.ps...h-+hchALpsussLIRpKss...assElpphup-LsphLlsLps.pachppFp-hRhpuhlAllVsp...............................P.phupahsphaas..u-aSlpQRhslLssLuhuA ............................PsYl+DllphLptsp...s........h-+hchALpss.s.LlRppss......-lpchut-LsphLlpLp.....s.....chshssFpphRhpuhlAllVsp....................................Ph.psupals.pthas..sshSlpQRhslLssLuhuA............................... 0 65 108 163 +10027 PF10195 Phospho_p8 DNA-binding nuclear phosphoprotein p8 KOGs, Finn RD, Coggill PC anon KOGs (KOG4319) Family P8 is a short 80-82 amino acid protein that is conserved from nematodes to humans. It carries at least one protein kinase C domain suggesting a possible role in signal transduction and it is thought to be a phosphoprotein, but the sites of phosphorylation and the kinases involved remain to be determined [1]. 21.20 21.20 21.60 21.30 20.30 20.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -8.92 0.72 -4.09 13 132 2009-01-15 18:05:59 2007-08-06 11:19:25 4 2 94 0 67 134 0 57.90 46 70.61 CHANGED MSEsaaD-aEaYNacpDKtlhoG.tSGKtRTK+EushHTN+hsPuGHsRKllTKLpNoEpK+ ............p-thhDpa-hYshst.ph.hhu.G..uuKtRTK.+EAut+TN+.sP..uGHpRKlloKL.N.oEp++.................... 0 18 22 40 +10029 PF10197 Cir_N N-terminal domain of CBF1 interacting co-repressor CIR KOGs, Finn RD, Coggill PC anon KOGs (KOG3869) Domain This is a 45 residue conserved region at the N-terminal end of a family of proteins referred to as CIRs (CBF1-interacting co-repressors). CBF1 (centromere-binding factor 1) acts as a transcription factor that causes repression by binding specifically to GTGGGAA motifs in responsive promoters, and it requires CIR as a co-repressor. CIR binds to histone deacetylase and to SAP30 and serves as a linker between CBF1 and the histone deacetylase complex [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.62 0.72 -3.93 39 495 2009-01-15 18:05:59 2007-08-06 11:23:05 4 9 268 0 353 481 1 37.10 38 10.07 CHANGED SWHPshh+Npc+VWcsEQcthpEp++lc-hpc.EhccE .uWHPtshcNhc+VWtAEQct....ptEcKKh--htp.chpcE................ 0 116 185 289 +10030 PF10198 Ada3 Histone acetyltransferases subunit 3 KOGs, Finn RD, Coggill PC anon KOGs (KOG4191) Family Ada3 is a family of proteins conserved from yeasts to humans [1]. It is an essential component of the Ada transcriptional coactivator (alteration/deficiency in activation) complex. Ada3 plays a key role in linking histone acetyltransferase-containing complexes to p53 (tumour suppressor protein) thereby regulating p53 acetylation, stability and transcriptional activation following DNA damage [2]. 24.50 24.50 24.70 25.00 24.30 24.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.71 0.71 -4.40 23 232 2009-01-15 18:05:59 2007-08-06 11:23:59 4 2 212 0 178 230 0 133.70 32 22.73 CHANGED phDasolEERLK+EL+alGlh...................................................Dss-h...ss+pDDElsAELRpLQscL+plophNptcKp+LlslscEchAaQEYpsIL--LDpQlppAYhKRl+shtK+K+...pc+pst.................ptpAssu..................sL+sLL-+ .........................................chtslEpRlKpELhthGlL................................................................................-...s.Dh....sscpDDElhucL.RphQsEL+p.sthNps+KpcLlc..ls+EchuhQEhpph.l-slDspV.pAYhKhhpshtK++p.....p+ccts.....................tt.tshpt.................tc.hhpLh-...................................................... 0 58 94 145 +10031 PF10199 Adaptin_binding KYY; Alpha and gamma adaptin binding protein p34 KOGs, Finn RD, Coggill PC anon KOGs (KOG4273) Family p34 is a protein involved in membrane trafficking. It is known to interact with both alpha and gamma adaptin [1]. It has been speculated that p34 may play a chaperone role such as preventing the soluble adaptors from co-assembling with soluble clathrin, or helping to remove the adaptors from the coated vesicle. Another possible function is in aiding the recruitment of soluble adaptors onto the membrane [1]. 32.00 32.00 32.00 32.00 31.90 31.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.85 0.71 -3.89 38 207 2009-01-15 18:05:59 2007-08-06 11:24:57 4 5 184 0 156 211 0 139.40 22 39.17 CHANGED EppGhc..RlhEsL-sp.Wss.hc.tp.t...............................................................................................................hppcthshcps.pphp...................................................................tt.p.-...............shEplhp+lpth+cpspp..lsc......pcRccaAtchspc .............................................................................EppGlpRlhpALpup.Wss..hthcstpp.........p...t.........................................................................................................................................................................................................................hptphtshpts.pptt.....tt............t.......t..........................................................................................t.t.tp..s....psphcl-...........................shEpLhp+lpth+-puss..LPc.ppR+phAtKsstt.............. 0 52 82 125 +10032 PF10200 Ndufs5 NADH:ubiquinone oxidoreductase, NDUFS5-15kDa KOGs, Finn RD, Coggill PC anon KOGs (KOG4110) Family This is a family of short, approximately 105 amino acid residue, proteins which form part of NADH:ubiquinone oxidoreductase complex I. Complex I is the first multisubunit inner membrane protein complex of the mitochondrial electron transport chain and it transfers two electrons from NADH to ubiquinone. The protein carries four highly conserved cysteine residues but these do not appear to be in a configuration which would favour metal binding so the exact function of the protein is uncertain [1]. 21.20 21.20 21.30 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.22 0.72 -4.00 6 171 2012-10-02 15:44:21 2007-08-06 11:25:25 4 4 139 0 105 165 0 82.40 31 76.86 CHANGED MPFlDlQK+LGlNlD+WlhhQSuEQPaKhAuRCHAFEKEWIECuHGIGpTRAcKECKIEaEDFhECLhRpKTM+RLpsI+cQR-KLlKEGpYTPPP ....................................................sRCasFEpcal-C...s..c...uh...G...t..s.Ru...c....KcCphth-DahEClp+pKphpRhpslp....p....p+c+...t........................ 0 29 51 81 +10035 PF10203 Pet191_N Cytochrome c oxidase assembly protein PET191 KOGs, Finn RD, Coggill PC anon KOGs (KOG4114) Family Pet191_N is the conserved N-terminal of a family of conserved proteins found from nematodes to humans. It carries six highly conserved cysteine residues. Pet191 is required for the assembly of active cytochrome c oxidase but does not form part of the final assembled complex [1]. 22.90 22.90 23.10 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -9.76 0.72 -3.93 33 259 2009-01-15 18:05:59 2007-08-06 11:26:49 4 5 238 0 182 235 2 64.60 39 59.86 CHANGED hssSCpcl+psLttCL.cScClhhccposc-CLpss...tplPpcChtl+cuah-CKRuhlDh.RpRFRGN .....ssuCpsl+psLstCL.pS-Clhhp.c..+..oPp-CLcps....plP.cCptL+puah-CKRuhl....Dh.RpRFRGp............... 0 50 89 144 +10036 PF10204 DuoxA Dual oxidase maturation factor KOGs, Finn RD, Coggill PC anon KOGs (KOG3921) Family DuoxA (Dual oxidase maturation factor) is the essential protein necessary for the final release of DUOX2 (an NADPH:O2 oxidoreductase flavoprotein) from the endoplasmic reticulum. Dual oxidases (DUOX1 and DUOX2) constitute the catalytic core of the hydrogen peroxide generator, which generates H2O2 at the apical membrane of thyroid follicular cells, essential for iodination of thyroglobulin by thyroid peroxidases. DuoxA carries five membrane-integral regions including a reverse signal-anchor with external N-terminus (type III) and two N-glycosylation sites [1]. It is conserved from nematodes to humans. 21.20 21.20 21.30 26.90 20.80 21.10 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.83 0.70 -5.34 7 173 2009-09-11 16:49:30 2007-08-06 11:27:08 4 3 80 0 103 157 0 229.80 42 73.87 CHANGED Pa..ssRsshshsspllslhllFlshhluFLlILPGlRt+............................RhhhhhpllLSLFlGAVIlssphsssWtsup.hhhssYKuFSpphl.AclGhalGLhtlNlTLt.........uhPstphsE.sIsYNEtFsWctspshscpYccALE+GLP.PlLhVAEhFohs.sshuhhpQYRhAGaYASthLWsuFssWlLhNlLL..hsVshYGuhhhhh..TGhhhLhuhhsat.h...hpsssl+lu......suVlph+.aGhsaWLsLssGlLCl...lsGlslslhchshP+slpshlphu.cps .....................ah.ts+.shshssshhhllhlFlshhssFllILPGIRG+.............................Rhhhhl+VhhSLFlG.......AhIl...ss.phuspWtsu.p.hp.h.ssY+uFStthl..AclGhplGL.tlNlTLp.................................uhP.h...tp.........h.s....-....sIsYNEpFsW............p..........h...........s....p..........s..........hsppYtpALp+GLP.PlLhlAEhFo.p...pshshhtpYRhAG..aYsuhhLW................sAFhh.WlltNlhL..hss.hYGuhh.hhh..TGhh.lhuhhhas.h.s.h.....h..slpls................suhLphp..aG..sFW.ls.LssGlLCh...lhGhhhshhchh.Ppthpshhp...t..s.......................... 1 29 35 67 +10037 PF10205 KLRAQ Predicted coiled-coil domain-containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4421) Domain This is the N-terminal 100 amino acid domain of a family of proteins conserved from nematodes to humans. It carries a characteristic KLRAQ sequence-motif. The function is not known. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.24 0.72 -3.98 7 131 2009-12-22 15:39:10 2007-08-06 11:27:39 4 5 89 0 81 123 0 96.40 53 16.16 CHANGED QKLApEYSKLRAQspVLK+AVl-EQupssuL+EpLKpK-poLR+hpQEhDSLuFRNpQLt+RVp.LQpELt.....lscstspKs+..ssscpsu.huppspsVhcE-L .....QKLApEYSKLRAQspVLKKuVl...-EQApss........uL+EQLK.KDQSLRKhpQEhDSLoFRNpQLsKRVElLQ-ELs..........ls.Es+..uKKsK....csucs.suphstpptsVhtE-............................... 0 29 36 62 +10038 PF10206 WRW Mitochondrial F1F0-ATP synthase, subunit f KOGs, Finn RD, Coggill PC anon KOGs (KOG4092) Family This is a family of small proteins of approximately 110 amino acids, which are highly conserved from nematodes to humans. Some members of the family have been annotated in Swiss-Prot as being the f subunit of mitochondrial F1F0-ATP synthase but this could not be confirmed. The sequence has a well-conserved WRW motif. The exact function of the protein is not known. 26.50 26.50 26.50 27.40 26.40 26.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.38 0.72 -3.98 17 197 2009-01-15 18:05:59 2007-08-06 11:28:26 4 8 102 0 96 213 0 84.90 43 59.95 CHANGED MuhGthPucaNsKlpGsY...paYGp.DpPhhpVKLGELsuWhuRR-KoPpuhsuuhSRuaWRappKYhp..sK+sGIushhpllsuhhshhYhhsYs+lKHaRph ..............................h..............................tDpp.Lh-VKLGELsuWlhh...RD.hoPsulhuAhpRuaa.Ra.pKYlp..sK+uulushh...llsuhhhh.YhhsY.p+..lKHcR..h.................... 0 17 23 68 +10040 PF10208 Armet Degradation arginine-rich protein for mis-folding KOGs, Finn RD, Coggill PC anon KOGs (KOG4154) Family This is a family of small proteins of approximately 170 residues which contain four di-sulfide bridges that are highly conserved from nematodes to humans. Armet is a soluble protein resident in the endoplasmic reticulum and induced by ER stress. It appears to be involved with dealing with mis-folded proteins in the ER, thus in quality control of ER stress [1]. 20.60 20.60 20.70 20.60 20.50 19.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.17 0.71 -4.56 8 204 2009-01-15 18:05:59 2007-08-06 11:29:04 4 12 129 6 120 194 4 131.40 47 65.87 CHANGED -CEVClcFlsRhhpSL.scsVchcsDsIEctlhctCcss+GKENRFCYYlGAsp-SATpIhsEVo+PLSa+MPs-KIC.EKLKKKDuQICEL+Y-Kp.lDlsoVDLKKhKVK-LKKILs-WGEsC+GCsEKoDFIp+IpELtPKYs..tt...ps+TEL ..........................-CEVClphls+F...hpsL.sc..s.sp.h.s.ttI.Ep.thhchC.pps..+...s.........K..ENRhCYYlGuhp-uATtIls.Elo+PhoaphPspKIC.EKLKK+DuQIC-L+Y..-....Kp.l.DLs....o......VDL+KL+V+-LK+ILs-.WGEpCcGChEKoDal++IpE.LhPKYs................................................................ 0 46 59 86 +10041 PF10209 DUF2340 Uncharacterized conserved protein (DUF2340) KOGs, Finn RD, Coggill PC anon KOGs (KOG4147) Family This is a family of small proteins of approximately 150 amino acids of unknown function. 25.00 25.00 46.60 31.10 19.80 17.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.67 0.71 -3.95 18 127 2009-01-15 18:05:59 2007-08-06 11:29:24 4 5 113 0 92 121 0 111.60 43 71.62 CHANGED lTVRlIKSF.YRNlKNhlh+slDLpspTsc-Llc.l+pcIpTsuuh.RPFRsl..cYDoLKIYT+AHGSKTsNLVINh-cD-pWlL...............phpsts+oLh-hGVcNETElShFNhcDYhpaKtNPcpKW .....lTVRlI+SF.aRNhKslVh+slsL..spTsc-hhphlppcltpp.....suh..PaRNh.......pYDTlKIhppAHuuKTssLVlsh-cD...-.phlL...................p..pspoLtph.GltsETEluhFshcDYhpaKtNP..p.pW..................... 0 38 53 72 +10042 PF10210 MRP-S32 Mitochondrial 28S ribosomal protein S32 KOGs, Finn RD, Coggill PC anon KOGs (KOG4106) Family This entry is of a family of short, approximately 100 amino acid residues, proteins which are mitochondrial 28S ribosomal proteins named as MRP-S32.\ Their exact function could not be confirmed. 25.00 25.00 32.60 32.00 24.80 24.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.25 0.72 -3.67 8 112 2009-01-15 18:05:59 2007-08-06 11:29:54 4 4 76 0 66 106 0 88.40 44 60.99 CHANGED NCsV-lAlTpDGRTIVCaHPuV-lPYEHTKPlPcsD.lpssspoHEplLKo+Lp..phKptc.tPspEELSKlFaTTKHRWYP+Gp..cR+tKpsPsccc .............hpsclAlTSDG+TIVCYHPss-lPYEHTKPlPcss.....hpsst....ps+-phL+splp...phpphcptshh-pLuKhFaTTKHRWYPpup..pc+tp.sP.cc................... 0 17 21 43 +10043 PF10211 Ax_dynein_light Axonemal dynein light chain KOGs, Finn RD, Coggill PC anon KOGs (KOG4001) Family Axonemal dynein light chain proteins play a dynamic role in flagellar and cilia motility. Eukaryotic cilia and flagella are complex organelles consisting of a core structure, the axoneme, which is composed of nine microtubule doublets forming a cylinder that surrounds a pair of central singlet microtubules. This ultra-structural arrangement seems to be one of the most stable micro-tubular assemblies known and is responsible for the flagellar and ciliary movement of a large number of organisms ranging from protozoan to mammals. This light chain interacts directly with the N-terminal half of the heavy chains [2]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.11 0.71 -4.46 30 362 2009-01-15 18:05:59 2007-08-06 11:30:38 4 5 126 0 212 328 14 174.70 30 36.75 CHANGED sc-lLsoIlPPRcap.cc.uphalQhVSssPuoRpDVlpLp-pLDppLppRpAR-oGI.CPIRcELYuQCFDElIRQVTIsCsERGLLLlRVRDEl+hTlsAYpsLYESulsFGhRKslpsEps+s-hcpclppLEc-ppcLcpplpcLct+h-shc+ptpEcpptcEKp+p-ElsaLK+s.sppL+spLcpl ......................................................................h....h...pt.t.hhhp.sS.pP...s...oR.DVlpLp-.....LD....phLp......p......p...p...........A.....+....p............o....G............I....Cs.l...R...........c......c...l..YspsF........cELIR......QVTlsCsERGlLLh+lR--........h.ph....sls....s........a....p..pL.hcu....ul....s....a...uh+c.slps...c...p.t.c...t.......chpp.c....l.tp.......L.cp.chpcLpppltchpt+.h-th.p..c..pttct...pt...........tp..cth.tpclt...h...hct.t..tphp......h.................................................................................................. 0 108 131 176 +10044 PF10212 TTKRSYEDQ Predicted coiled-coil domain-containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4421) Family This is the C-terminal 500 amino acids of a family of proteins with a predicted coiled-coil domain conserved from nematodes to humans. It carries a characteristic TTKRSYEDQ sequence-motif. The function is not known. 19.90 19.90 20.10 20.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 518 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.79 0.70 -5.58 5 140 2009-01-15 18:05:59 2007-08-06 11:33:46 4 6 86 0 85 141 0 322.70 36 59.42 CHANGED LAGQALSFVQDLVoALLNFHTYTEQRlQIFPlDSAIDsISPLNQKFSQYLHENAuYVRPLEEGMLQLFESITEDTVTVLETsVKLKtFSDHFoSYVsFL+KILPYQLKSLEEECESSLCTuAL+ARNpELapDMK+lTAVFEKLcTYVoLLALPSTcP-GLLRTNYSoVFTQluuuLHuLHDllKElSKHYsQKAoLEQELPTATQKLlTTNDCILSSlloLTNuTGKIAoFFuNNLDaFTSSLSYGPKGuTsFlSP..LSAEsMLsYKKKAuAYM+oLKKPCP-SVPYuEALuNRRVLLSSTESREGLAQQVQQSLEKIuKLEQEKEHWMLEAQLuKIKLEKENQRIAsLlKso...puGpLsssspEsusLspuuEQsEusSscup+EPTpoTSLlGMLTlTs-sp.sPDcESRE-LIKoHYMARIAELTSQLQLADSKAVHFaAECRALAKRLuLAEKSKESLTEElKLAsQsISRLQDELhTTKRSYEDQLSMMSDHLCSMNETLTKQREEIDTLK.MASK ................................................................................................................................................................................................................................................................................................................................................................................................................................................hp...tp..p.....tt...............t.t.....t......tt...h.......s...t....p....t...s.t..t......ts.h...ssph.......t....h....h........t...s.-..ps+.E.pL...Ipsaa.t+l.-Lhpphphu-SKshpaht.EscsLtp+.Lthu.-c...p+cs....hp-c......hp.ssp...........slppLp-ELpTT+psYEcQLShhS-HLsohN-pLsppp-pIpth+...................................................................... 1 28 37 65 +10045 PF10213 MRP-S28 Mitochondrial ribosomal subunit protein Wood V, Coggill P anon Wood V Family This is a conserved region of approx. 125 residues of one of the proteins that makes up the small subunit of the mitochondrial ribosome. In Saccharomyces cerevisiae the protein is MRP-S24 whereas in humans it is MRP-S28. The human mitochondrial ribosome has 29 distinct proteins in the small subunit and these have homologues in, for example, Drosophila melanogaster, Caenorhabditis elegans, and in the genomes of several fungi [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.57 0.71 -4.05 21 297 2012-10-03 10:08:23 2007-08-06 14:08:43 4 2 268 0 222 400 31 121.40 28 36.00 CHANGED pa..p.ss..ppPlpacaooYhu-t....cPt.sRKVVlplpsssL...sL..sc+pp+KhhhLAGsRYssc........TD........ll+hSo-+aspttQNt+YLsslLptLlcEupc..s-sapDlPlDsRHpph+................pFPccWt .........................................p.slphphssYhupt...........cPt.sR+Vslp.lplpcL........sL..scctpcK.hh.c.L..s........G........sRYs.p.......................................oD............................llphss-+asp........ppQNtcYhhpllssLhpEutc....s....-sa..pc..h-.hchp...h................................................................................ 0 80 122 182 +10046 PF10214 Rrn6 RNA polymerase I-specific transcription-initiation factor Wood V, Coggill P anon Wood V Family RNA polymerase I-specific transcription-initiation factor Rrn6 and Rrn7 represent components of a multisubunit transcription factor essential for the initiation of rDNA transcription by Pol I [1]. These proteins are found in fungi. 27.70 27.70 28.30 27.70 22.60 27.60 hmmbuild -o /dev/null HMM SEED 765 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -13.15 0.70 -6.44 19 144 2009-01-15 18:05:59 2007-08-06 14:14:48 4 7 116 0 108 148 0 591.40 16 77.63 CHANGED hDPphusLhsluphhshps.css.scshslhshsSGEsussLslphlpccthphpp.......hthshhhpppsstas.s......husPI+QIpau...cshcppsshlulpp.sphhlhps.htt.............h...ppppuplsss.Lhplptsph.ushsauDVsFNPWspp...QhAllDp+GNWolWcl........tpp+pppsshpsspshs.GSl........tDsp-l......ssWtpIpW.............................sschspllVhsRpshh.hchpss.p..h.....Lhpu+shspIhDl++ssp...ppsFlLTopcllWhclss..............shp.hLSW+HahDscDsoL+l..sshts....ppphhhhlaSphs.llhsathuhpsst.hshts..ssallclsspsstpt..........phpsh.hp.t.sc.spssspph...t.phhtlatlsspLslppphhsssspss..spst....ppsshcthtspp.p...t.pp..hp.........................h+hhtc..h.slp..upspps.t-hs....Yuhtlupths..................th.E.hp...t...sphspspshhphLsp.sslssh.pshp-hsshlppl.phhpspphthsshhphhhthhh..............tstspl.slashLhpsW.ss.sp.h........+-plh+p...lstplsLohlts.................................................................................pcpt.t.......splpphhphstpp.....hscsspslLspWc....ttssss.ph........pphhpsshpps....tpsphPsh+.pppssps................pt......................sspsts.ts.p.h......spo.shsp.....s...P..oQhpsst.tsRpttptt......t+KKKRtuGF ..........................................................................................................................................................................hhshh.h.........p......p.h.hhshssGpssp.lpl..ht..pt........................t....ps....h..t...............tsPlpplphs....pt.tt.........p....h...hhlph.tphhlhp......................t...spl.ss.lh.tl..pph.ss..hsDhsF..sPh..p...phAllD.t.G..Wslacl.............h.p...t.t.h......h....t..spl..................c.t.h..........................sth.tl.a...................................sp.pthllhsppph..h.hp..ht..st............................l...tp.t.ph...lhDhtt..t.......p.hh.lL.To.pplhhhth...........................t...hlSh.H...hss.pD.olch..p...h.............tt.hhhhl.S....p...h.h.hh.h..........t...............t.hhlt.s.......................h....h.....................................hthh......th...hh.........t..................................................................................................................................................................................................................................................th.thh...................h.....................................t...hhp.h.p.a....................tt..htt....hh..hhhs.hhh..................................................................................................................................t....................ht.h..t........t......hst.ht.phl.spW........t.pst..t..................................p........tp..h.th+.pttt..t...................................................................................................s......................................s....t.................s..........p......t..t.p...................p+t++h.GF......................................................................................... 0 21 53 90 +10047 PF10215 Ost4 Oligosaccaryltransferase Wood V, Coggill P anon Wood V Family Ost4 is a very short, approximately 30 residues, enzyme found from fungi to vertebrates. It is a member of the ER oligosaccaryltansferase complex, EC 2.4.1.119, that catalyses the asparagine-linked glycosylation of proteins. It appears to be an integral membrane protein that mediates the en bloc transfer of a preassembled high-mannose oligosaccharide onto asparagine residues of nascent polypeptides as they enter the lumen of the rough endoplasmic reticulum (RER). 21.00 21.00 21.20 21.00 19.80 20.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.34 0.72 -4.46 12 156 2009-01-15 18:05:59 2007-08-06 14:22:28 4 2 139 2 105 156 1 34.50 44 50.21 CHANGED MIoDspLshlA.sLGhshhlLIVlYHalsssspct ...MIoDspLshhAshLGsshhlLlVlYHalpsNspp..... 0 28 56 83 +10048 PF10216 ChpXY CO2 hydration protein (ChpXY) TIGRFAMs, Coggill P anon TIGRFAMs Family This small family of proteins includes paralogues ChpX and ChpY in Synechococcus sp. PCC7942 and other cyanobacteria, associated with distinct NAD(P)H dehydrogenase complexes. These proteins collectively enable light-dependent CO2 hydration and CO2 uptake; loss of both blocks growth at low CO2 concentrations. 25.00 25.00 125.40 125.20 21.30 20.50 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.02 0.70 -5.58 18 117 2009-01-15 18:05:59 2007-08-06 14:46:53 4 1 69 0 41 129 44 363.50 58 91.73 CHANGED lIpRLpuGtALLsDoP-NllEVVGlLKSYGlVLDAYS+NLlYIAEcQFLshFPFFKYF.....................................NGc..hohsKLl+HahHDRINaEYAEYCMKAMhWH....GsGGLDsYLDSsEFpppAc+AIpA+h+pNPlhhsLppLFP-FL.EhlRphuYYouLGQFWRVMSDlFlsLSDRYDpGEIpSIsDVVsHItsGLVAsAucPITYpVpIcG-sY-llPtpAGLTFLhDsAVPYVEAVFFRGoPFhGTVSYNAQApQIPs-QucFpYGALYADPLPlGGAGIPPTLLMQDMhHaLPcYLp-aYcppsRGEcDl+VpIChSFQKSMFCVToAAIpGLtP...aPLDossPpcQtANRAYh-uWhsRlhs ..hIcRLEuGsuLLsDoP-NLhpllGILKuYuVllDtY.+NLlYIAEppFLs.FPFFKYF.....................................pGc..hphsKLh+HLhHDRINaEaAEYCM+AMhWH.....GsGthDsYLDosEFptsuc+sIpA+a+tNPl...hhsLp+LFP-ah.EQlRphuYYSsLG.FWcVMuslFh-LSDhYDpGclpslP-shpalhsGLhAsAu+PIhapVpItGEsY-IlPKStGhTaLh-sAlPYVEAVFaRusPFhGThSYNAQApQlPsDQp-FpYGsLYADPhPlGuAGIPPTLLMQDMhHaLP-YLp-aYpppsRGE-DlhVQIshoFQKSMFCVToAsI.puLtP...YPLDsssPcctpANRsahEuWhsRFh.p. 0 8 30 39 +10049 PF10217 DUF2039 Uncharacterized conserved protein (DUF2039) KOGs, Finn RD, Coggill PC anon KOGs (KOG3241) Family This entry is a region of approximately 100 residues containing three pairs of cysteine residues. The region is conserved from plants to humans but its function is unknown. 22.80 22.80 22.90 37.10 22.40 22.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -10.69 0.72 -3.97 16 147 2009-01-15 18:05:59 2007-08-06 15:03:23 4 4 120 0 92 144 3 91.10 46 43.81 CHANGED sQKHQN+ssFcsshapp.s.....ppc+lps...hthssl.CpRCp-hIcWKl+YsKYKsls..psp+CsKCpp+oV+cAYHplCcsCupchclCAKCt+ssp ...s.QKHQNp.huFKsshac+.s......pp.c+lss...hhhsGV.Cp.RCK-l...........lEW+lKYsKYKPLo..pP+KCs+Ctp+oV+cAYHhlCcsCApch.clCAKCsKp..s......................... 0 36 49 71 +10050 PF10218 DUF2054 Uncharacterized conserved protein (DUF2054) KOGs, Finn RD, Coggill PC anon KOGs (KOG3136) Family This entry contains 14 conserved cysteines, three of which are CC-dimers. The region is of approximately 200 residues in length but its function is unknown. 20.80 20.80 21.40 21.40 18.10 18.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.39 0.71 -4.31 9 153 2009-01-15 18:05:59 2007-08-06 15:05:51 4 5 112 0 107 132 3 121.10 44 50.44 CHANGED psps.s.sspCRsSlQG+pLlsD-cGaVCpphsl.LhsGCCs.......h...ssphasCcsC.hsspCCssYEYCVSCCLpPsppsLLEcVL.pusssp.tlhuss.pDtFEhCls+CRTsSpSV.HENcYRs.ctKHCa .....................ss....t.ttpCRNolQG+.tLlsD-h...GaVCpRpsl..hssGCCs........................hphssscpa..sC.cuC................hs.stCCssYEaCVSCC.LpPs+...ph...lLE..c.hL...........csss.uhp..slhh.ss..pDpFELClu+CRTSS.....pSVpHENsY+ss.tKaCY............................................................................. 0 38 57 82 +10052 PF10220 DUF2146 Uncharacterized conserved protein (DUF2146) KOGs, Finn RD, Coggill PC anon KOGs (KOG3692) Family This is a family of proteins conserved from plants to humans. In Dictyostelium it is annotated as Mss11p but this could not be confirmed. Mss11p is required for the activation of pseudo-hyphal and invasive growth by Ste12p in yeast. 17.70 17.70 17.90 17.70 17.60 17.60 hmmbuild -o /dev/null HMM SEED 895 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.71 0.70 -6.56 7 293 2009-09-11 14:24:08 2007-08-06 15:07:12 4 6 126 0 207 273 0 348.80 18 75.81 CHANGED pp.hhphDsplhVVGllGKos........ssphNphlchslhsstspts-....csplpsaapp-splLaLhhsus.Dstslh..hscslpss....s..phh-ahpt.cspasRhLLahhpVCHhlVlVEsopsFDhoahplF+tlchlR...cKhlhph.....L.phl+ssslup.hppcsRhCsPRlLFhFptsss..............sKPcp+-.hcpLEpslE-pIhclLRpppllTNsSusuLhslPpsptFVah.ssc.hcpDslhc.l-hLhtthcps......p..-p-p...lttstshpths.s.t.hpF...ptphpstppch-polhphlhpHVp.sL.ctGh--uss..+pttpopF.l.s.pcWh-hhppLaplhl....pNspcP..s.chus..hK..s.hcsh-t.lch-pKF.tphCpcuL.huh.tYpp.hP.pYssshHcpphspAhpha.cpuRGsp.pthh.pLpp.CpphWpSutptCE..ShhupsCshthH.s.t..p............HsuushhlSoCsCGRpQshRp-Pasl+pANa-FYph.s.pC..CttL.+hpFPlapPso...sDhtsAch.......spshpsh.p.tptpcp.s-ttsp.ch.pshp..hpsspp.......S.s.ssslshph.......pssctstppsps.tpssspp...............t..tpppt+-......tt.tthth.spos-aL.GhspssSshGLLPhFPSWtLssLGsSSlYsHssGLpp..QsuFlsGusaLhPWDV.lR..............hchpsWsAshpplpsc+tu..ppu+pcc+sDhhplKlFVGhEYECuRGHRFhhs.uPDpll+s.Gsslh+-Sus+.....VlpsDMPlYhPCsC..Rs..sphAQLhRlHVVTPKAPVplhl-PKVpl..pph....saspG.......pP..............lcLspSuYWILRLPaVYpG-pGshhsPpEhss..shslhtGhLhsshh ..............................................................................t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 74 98 149 +10053 PF10221 DUF2151 Cell cycle and development regulator KOGs, Finn RD, Coggill P anon KOGs (KOG3711) Family This is a set of proteins conserved from worms to humans. The proteins are a PAN GU kinase substrate, Mat89Bb, essential for S-M cycles of early Drosophila embryogenesis, Xenopus embryonic cell cycles and morphogenesis, and cell division in cultured mammalian cells. 17.50 17.50 22.80 24.40 17.10 17.40 hmmbuild -o /dev/null HMM SEED 695 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.20 0.70 -13.23 0.70 -6.35 6 167 2009-01-15 18:05:59 2007-08-06 15:07:27 4 5 88 0 104 154 0 447.20 40 95.15 CHANGED M.pcspKTVFVLDHssYFuEuscphl-hDhlcus+sp......hs.luKSLWTCAVEuShEYCRIlaDlaPttKL.lRFlVSDotu+hLNo.WssssQshp-LhsAlshVGsP.sRtsPpssDhSllHGLptAIEALAc.T-.Q+E................Scsstlhc-hp......tltNcGRlIClTsA+oDscMcpLEDplspllhppNtlAAu.Sc+hh.sIspC+LVIlNhYPsG--ohVosRsLpElS..PlLpsEVHSsKAuc.lusKLspLllsHFDLASTTVTsIPMK........EEQ+AsoSANYDVELhHu+cAHs..lhpGsshhsssShK....cGusYETVTLKWCTPRtssSs-hpYCsuAhhlTPVDVsSRPSSCLTNFLLsGRSVLLEpP+KS......GuKllSHhLSA+GGEIFIHsLshsRSp.......L--PPSIu-GsGGRVsD.YRls-hGchM+ppRLhPlscp...sDslsEt.h-........ch+ppLpRhoRYaPhhhupTsIF.N.ch..tl.PLlsLIsKppLoEpDlhpCppsIasLhpMcp+p-sLshsss...GsRlKs.Kc.-EQYRlhasELEpllppasusS.+HKplhcslhssRutssptE.....-+ccuGcKh.sKtshhtK.......sstuuco-chpss.DpEc--..cs....................shsDSPsSPch.p......Kpspssl--.....................................................pththtscGshSLLslhsshlEpupSpKRpEFsGRl.sshGNhAcLYscLuEKpts ..................................................................................................h.hslho..h-s.hEhpRlh.Dlas.t...p..lphhhsD...hhlss.W.....sh........t..p..lht.h..........hu.P..........t.t.....s.h.tGL..Alp.......sLs..o..Q.t................................................................................................NpGpllhh..........T...........php....p..ptph..l.t.h.p.l.p.N...................t.h..lp.hphhhl.hhs......t.........p.t.........t.s....h...lhp.................h..tl..Lh..papLuSTolosIPMK........E-pp..ss.t.SsNYDVplhH.ttsH.......h......t................p............................................s.....pshtlpWsTs......t.....................h.....s..u.h.hoss.stSRPS.CLhpFlhs...G+.sVhLp...p................stchhoHhL.sps......ucIFlHsLs.spsh.......h--sP..sI...p-...ssus+V....sD..YRhsph.t.hhp....hh........................................................................p.sp.hP.h.t.shhh.............l..h.pllhKpphspp-..pCpphlhpLhthttct-.Lshsph...stt...h.c..........pp.c-QYRhhatELpthlph..s.o.tHpplhpsh.thpsh..............................................................................................................................................................................................................................plht.h........t..t..............................h......................................................................................................................................... 0 38 45 79 +10054 PF10222 DUF2152 Uncharacterized conserved protein (DUF2152) KOGs, Finn RD, Coggill PC anon KOGs (KOG3778) Family This is a family of proteins conserved from worms to humans. Its function is unknown. 21.30 21.30 21.40 22.00 20.60 21.20 hmmbuild -o /dev/null HMM SEED 604 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -13.19 0.70 -6.30 6 150 2009-01-15 18:05:59 2007-08-06 15:07:47 4 4 108 0 86 149 0 440.40 38 91.23 CHANGED RLKsh....sGhLSSpasRRLLslLhllVlhlaYlhus.hRhs.......Fsuus.s+s.sutChcsclp.Wpt.t-ch-sslps......PtE.p.h..Pal.GNGaluLDlsus.pLaVp.puR...shhh.ssFpPhlslc.tGstsEpcuosl.a+pGll+plRCh........................s.sp-ChhVpppaaAHRoRPsVaVQcIQIoNPpcpllsl-Lss.ph.sh.cthooolchhps...t.....ph.lhoGhl.sVs....sthlhlVslsspchss+LpVs.+sshc.phhhslasScPls.....spht..hph.pppApctMhclhp............hshtshcp-HhclWusLahoGhphusuKst+T.sGssINsThYhhlSpsRAPhl-...sshoppc+-slEpsLsYu-tCasuHuThp.AENLW.pchSslppLhpLhshWhLTLQKRGC+sLlpAGApGllQAMVLSFGGhpFo-sHLphphDP+sLHpsYshRGlp.YsN....spINlulllDpDsKPaLaVulcpp-p..plYAC-AGCLDEPVpLTss.+sH.FPVhVTpPlTsLLYIooDhpHLpDLRHsLHlKEll...AHEEHhhpha+......GLPhLFWhSVsulIslFHLFLhKLIasEY.......CsPusKshaRs+ ...................................................................................................................................................................................................................................h.uNGhhh...........h.....................l...h.......t..............h...s.h...h......t.............t.h...h.hh..pG..hhhhpCh.............................t.shhlp.phhAHRshsplhlQplplsNs.h.p.....thph......t............p.h..t.ps..h.h.t..............ph.l.sth.......................t..h..h...hslst.phstplplt.+sphp..h..sl.h.St.sht.........p........p.hp.p.....shpthhchhp.............s.tphhp-H..thWtp.La.sGhphp......p..ps.p.s...su.plNhT.hYhhLSps..u.h.hp.........thstp.c.+p.....phcpsLsYt-tCasGHsThc.A-NLW.tchoohtplhpLsshWhLTLpKpGC+sLlt.....sGA.GhhQuMVLSFGuLpFoppHLpaphDPc.sLHpsath+sIp.Ypp....shlNluVlhct-..s.KshLpVulc..ptp...plYAC-AGCLs-PVpLTpt.psp.FPVhlTpPlTslLYI.osDhpHLp-LRHslHlKtll...AH-cHhht.........GLPhLFWhSlsuLIslFHLFLhKLIhpE.Y.......ht.............................................. 0 33 40 64 +10055 PF10223 DUF2181 Uncharacterized conserved protein (DUF2181) KOGs, Finn RD, Coggill PC anon KOGs (KOG3748) Family This is region of approximately 250 residues conserved from worms to humans. Its function is unknown. 25.00 25.00 30.20 28.50 18.80 20.60 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.62 0.70 -5.02 10 192 2009-01-15 18:05:59 2007-08-06 15:07:58 4 2 94 0 120 194 0 225.50 35 76.71 CHANGED DuhplTWhHAsNS+ptLs-sL..s..SslphIEADVslpthpp..usEpplPIMAHPPAhsSDlTLcEWLcpVl.........sppKGIKLDFKSlpAVcsSlDlLc.sltct....lphPVWINADILsGPsss..ossVDsopFhstlpphhPpsTLSlGWTTtassshssssYTpshVcpMhcllpt.....LsQslTFPVRAulstsShspLpWL...LspopRYSLTlWouAsDsl.sV-DLlhlRcshuhcRlYYDl.EsltsQ ....................................shh.lhW.hHAsNppt.httuL.....p.........ush.h.h....lEADVhlt..tt...sscps..............hP..IMA.H.P..P....th..s.SD...TLppaLp.plh....................ss..pK..G.I..KLDFKo.lpAlpPSlslLp..phppp.....................lptPVWlNADIltGPsss....ss.VsuppFLstVpp....haP........csTlS..GWT.T..a.h.p......h................sts....Yo.phl.ccMhplspt.......LsQsVTFPVRAu.hl.t..p..S.h.s.p.l.thL.........Lpp..S.....s..R...h....oLTlW.p..u..t...s..D..h...sl..psLlhltcshshpplaYDl.cs...................................................... 0 33 46 76 +10056 PF10224 DUF2205 Predicted coiled-coil protein (DUF2205) KOGs, Finn RD, Coggill PC anon KOGs (KOG3650) Family This entry represent a highly conserved 100 residue region which is likely to be a coiled-coil structure. The exact function is unknown. 26.70 26.70 26.90 27.20 26.20 26.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.62 0.72 -4.41 8 221 2009-01-15 18:05:59 2007-08-06 15:08:21 4 2 190 0 155 220 0 77.70 41 58.26 CHANGED huschsut-lE.th-pEt+p+LIpQlLpLQsoLc-LupRV-uVKEEN.KLcSENphLupYIENL...MosSsVhpSoosp..tpK ....................phps.-hc.phphEt...+pcLl.....p.....QshpLQsoLc..s..LupRlDuVKEEs.KLcoENphLtpYIpNL....MS.s..SsVhpoos...................... 0 46 76 122 +10057 PF10225 DUF2215 Uncharacterized conserved protein (DUF2215) KOGs, Finn RD, Coggill PC anon KOGs (KOG3817) Family This entry is the central 200 residues of a family of proteins conserved from worms to humans. The function is unknown. 28.70 28.70 28.70 28.70 28.40 28.60 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.55 0.70 -4.93 16 259 2009-09-10 22:40:12 2007-08-06 15:09:00 4 6 118 0 156 265 0 218.00 28 54.65 CHANGED aslslppt.hphaRlhhhlhGllLhhhushLSpshsFYYsoGhulGllsslLlllahhh+lhP..++ohhhhhlhGua.uhuhYhlphhhpNlp.Ilhpa..........thaVlsalhhsGh......lphhlshp.ssshstpstphlpWslphluhshlhpSs..spluhuhllhshhh......thhhhslp.........hhhthtphh+...............tph...ssth+.LspcEhcpQGphcopcuLppLR...........passSPptssh.....pshu+lpsPpphuchhpuo.sHloss .........................................pl.l.pp.hshhhhhhhhhGlhLhhh..u..phL..S.c.s...hFYYssGhslGl....h.hslllllahht+hhP....++s...h...hh.....l.lhG..uh.sh........uhYhlp.l.....hpslp.lhhpa..................................h.allsallhsGh.................lshhlshh.hss.........s..pohpl............lpWslplluhshhh.u.s...thuhshl.lh...hhh...........th..hshp.........................hh..hhhhhhp.......................................................................hh....s...+hLop-EYp..psthpTtpuLppL+...........phspp.Ppht.W......hhsplp..sP...c.hs.p.h.ut..Hh............................................................................................................................ 0 42 66 108 +10058 PF10226 DUF2216 Uncharacterized conserved proteins (DUF2216) KOGs, Finn RD, Coggill PC anon KOGs (KOG3819) Family This is the conserved N-terminal half of a proteins which are found from worms to humans. some annotation suggests it might be PKR, the Hepatitis delta antigen-interacting protein A, but this could not be confirmed. 22.40 22.40 22.40 24.40 22.10 22.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.45 0.71 -4.80 11 174 2009-01-15 18:05:59 2007-08-06 15:09:23 4 2 80 0 104 154 0 177.10 55 45.64 CHANGED usu--LScloDEELLpWSKEELVRRLRRsEAEKhulll-HGNLh+EVNRpLQhHLsEIRuLK-VNQKLQEDNQELRDLCCFLDDDRQKGK+lSREWQRhGRaoAulMRKEVulYLQKL+ELEt+QE-llRENLELKElC.......LhLDEE+.............sutusuGsRsSlDSQsuhs..s..u....ssRDVGDGSSTSSsGSsuSPDp ..............................s......thsD-EhhthuKE-Ll+pLR+tEu-+hshhlp+upLhpEVNRpLQhHLsEIRuLK-lNQ+LQ.......-DNQELRDLCCFLDD..DRQKG++.l.uREWQRhGRYoAuVM+cEVuhYhQKL+ELEs+Q..EEll+ENhELKELC.......lhLDEE+......................ss.usu.G.up.sShDuptsh...s.....st......hhRDlGDGSStSSsGSssSPDp.......................................................... 0 20 31 63 +10060 PF10228 DUF2228 Uncharacterised conserved protein (DUF2228) KOGs, Finn RD, Coggill PC anon KOGs (KOG3952) Family This is a family of conserved proteins of approximately 700 residues found from worms to humans. 25.00 25.00 27.20 29.90 20.50 22.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.53 0.70 -4.83 9 139 2009-01-15 18:05:59 2007-08-06 15:10:22 4 6 101 0 86 138 0 213.50 42 61.87 CHANGED GLpLVGPF-lLuGcacssphtps.sahhHWRaaYDPPEFQTllhsspsothHhGYaRDpPsshss.hluhN-sKpusphshlG-NlFsAllhFLp+ph+pps.pKtphuAhcKl.t.LpctApphshtLpppschhKpRcKclVsKshHtuGlV..VPhs.Ksc.......lGYR.LscoD..AsLK+ll+slsc...spscpccpcshs.lQtlhThlphANDECDaGhGLELGhDLFChGs+thHclht.LLshAYshLpRspFspI ....................sLpLVGPa-lLuG+a+htpt.....tps..sa..hHWRFaY..DPPEFQ..........Tlll.....s.spp.....sthHhGYa.R..DsPschPs.hluhN-.u..p..pss.h...Gs.NlFsAlhhal.cchp....t.hs.p..p.thshhppl...........tt...Lpchspp..shpLppps.hh..........h+.pRp+.+.lVs+TFHtsGlV..VPhD.+sp..........lGYRpLsto..D..spL++lhctlhc....utscpp+.psh.........s.lQ.EhhohsphAsDECDaGh..GLELGhDLFsh...Gp..c....h........hpphhtpLLshAYpLLpRs.FhpI.................... 0 34 40 65 +10061 PF10229 DUF2246 Uncharacterized conserved protein (DUF2246) KOGs, Finn RD, Coggill PC anon KOGs (KOG3994) Family This is a family of proteins conserved from worms to humans of approximately 300 residues. The function is unknown. 21.80 21.80 21.80 24.40 21.40 21.60 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.85 0.70 -5.11 9 180 2009-01-15 18:05:59 2007-08-06 15:10:35 4 5 127 0 98 165 1 216.90 32 67.15 CHANGED tLhp+sstsRuhshsssuuScpshhshhs.shhs+s.shPc-shGsFGspDpphpLsGslsassclsuhshphcs.h..ph.lP-sls.s.pscppphhhsp.lsE.......lh.pclpps..p..hssuplEsuhppCP-LL++-htplFP....shsssslTVlTlTQ+s......hupthEp-REpLhp+FlpuAKEIC.sLhotGYWADFIDPhSGtsaFushTssTLacTDsRaRpLGF+lEDLGsCpVIpHphhGTphFVGTIFTsAPscSslht+Lhu .................................................................................s.................................................t...t.ht....p..h.sp....s....t.............................t..h........t....th......t.lt......................ht...............ht.p..splEhslpsC..P.cLL+c..chcplF..P...................ph...........ss...splhllsl..oQ+o.....p.s.hs..hspp.sEtE+EhLhc..cFlpsAp-lChsLp........spGYWADFIDPpSGhsahu.s.h.s.ss.slh.-sDp...........p.a..p.p..L.......Gapl..ps.hGsC+VIpHshWG.o........psas...........GolFTsAssp...h.pl..s................ 0 36 50 75 +10062 PF10230 DUF2305 Uncharacterised conserved protein (DUF2305) KOGs, Finn RD, Coggill PC anon KOGs (KOG3975) Family This family of proteins is conserved from plants to humans. The function is unknown. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -12.07 0.70 -4.98 27 346 2012-10-03 11:45:05 2007-08-06 15:11:11 4 7 258 0 233 951 208 239.00 24 77.85 CHANGED schlllhIPGNPGLhsFYppFlptLtpph..............sspatlhsloauGa.shpsps................pchasLpsQlcHphshlcphls............hch+lhllGHSlGuYluhcllc+hs.................phplhtshhLhPTltchucSssGphhohh....hht.hhhhhhssh.hhhhhthLPttltphLl.phhhsss.................sh.ssshh....hhs.pslppslaMAtpEMppltp.c.....-phhpt....................+lhFaaupsDpWss.phhc-lhcths..p.........ss.....hpls-.cslsHAFs ...................................................................................s...llhhIs......GNPGlht.aYt.......FhptLhp..h.........................................................t.ph.lhshuhhGa.s..spp....................................p..tcha..s..L..p...tQl..c..a...phph..lpph..l...................................tphpllllGHSlGuYlshclhcch..........................................thplh.tshhL.....a...........P.......T..l..p.......c.hs..p.S..ss.Gph....hs.h.............ht.h.h..hhh.hhshh...........hhh......th.....lPt..hh...hp....hll..phhht.................................th.hshh......lhp..ps..ltps.l.h....hu.tpE......ht.p...ltp.p........pphht.....................h.......................................plhhha..up..pDtWss.phhcclhp.ths...............p......................h.lsp..psh.HuFs..................................................................................................................................................... 0 79 124 193 +10063 PF10231 DUF2315 Uncharacterised conserved protein (DUF2315) KOGs, Finn RD, Coggill PC anon KOGs (KOG4094) Family This is a family of small conserved proteins found from worms to humans. The function is not known. 21.40 21.40 21.60 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.45 0.71 -4.26 6 120 2009-01-15 18:05:59 2007-08-06 15:11:38 4 6 88 0 72 116 0 115.80 41 54.95 CHANGED ppDaIGPPDs.SNLRPlhh+hs-NETcLE++LRthRpEsppWNs-FWucaNhpFpcEKE-Flcpc.......L+cEuGpppclsA--MucFYKsFLDKNaptHhhYNhcWY++NhsllhLuhtVsLpRlapth ........................DhlGPPDthSNLRPlhhhh..scsE.o..L....E....p.............cLRthRpEsppWNppFWsppNhpFpcEK--Flppp..........................l+ppsG..ptpl..sA-.-MucFYKpFLs+NappHhhYNhpWYc+NhslhhhhhtVtlpphht.h................ 0 29 35 54 +10064 PF10232 Med8 Arc32; Mediator of RNA polymerase II transcription complex subunit 8 KOGs, Finn RD, Coggill PC anon KOGs (KOG3583) Family Arc32, or Med8, is one of the subunits of the Mediator complex of RNA polymerase II. The region conserved contains two alpha helices putatively necessary for binding to other subunits within the core of the Mediator complex [1]. The N-terminus of Med8 binds to the essential core Head part of Mediator and the C-terminus hinges to Med18 on the non-essential part of the Head that also includes Med20 [3]. 25.00 25.00 25.10 25.70 24.20 23.20 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.45 0.70 -4.84 7 254 2009-01-15 18:05:59 2007-08-06 15:45:29 4 6 216 8 171 240 1 224.60 26 88.67 CHANGED Mpp............spcph..........tsL-ulhpRLspLppSLsphltpLp......pchch.sWsoh.spFsll.upLsolophLtppp.shLcspslhPh.hhPs+scp..............sllsshLRTKssPcVE-hhttsctthts.tssputpp.................lhp.s+hhpph.............hshlochR-.Eh-..upppht.ppshp.tcsphllttht.upshp...................................t.p...sh....t.t...........ssslhp........hpou....s.pR ..................................................................p.h.............tsL-slhpRlspLppSLtshhtplp...................pp.th.sW.................sol.sphsll...sspLpolschL.........p.......c.......p....p.....sh........hc.shllhPhsh.Ps+sc-................................sl.lsshLRTK.-P-....VE-h.pphpt...t.u.s.ph.ss.c...s..utp.p..................................................ltphsch..hpph...................................hphlsp..tc.....-h.....-..tp...st.t.t......t....pp.p.......p.....p..........t-..........tphls.tt.h.t.utsht.................................................................................................................................................................................................. 0 48 83 135 +10065 PF10233 Cg6151-P Uncharacterized conserved protein CG6151-P KOGs, Finn RD, Coggill PC, Bateman A anon KOGs (KOG4085) Domain This is a family of small, less than 200 residue long, proteins which are named as CG6151-P proteins that are conserved from fungi to humans. The function is unknown. The fungal members have a characteristic ICP sequence motif. Some members are annotated as putative clathrin-coated vesicle protein but this could not be defined. 26.40 26.40 26.80 29.90 25.20 26.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.49 0.71 -4.10 25 228 2009-01-15 18:05:59 2007-08-06 15:52:44 4 5 197 0 155 227 0 109.10 35 61.65 CHANGED hGllshlLChALGlANlFph...ssl.IlFullsllpGhlllFlElPhLL+ICPhospFssFl++hssNahRAuhYslMAllpalSlshtsTSLlusAlhLsloulhYuLAult+Q ...................................hGllshlhChhhG....lhN...lhsl.....ssl..Ishullpl..hsuhlllhlEsPhhhph...sshus.shsphlc+h.ssa.RAshYssMAll..lsls..h..s..hoSLlus....Alhhs.oulhYuLuult+p...................... 0 38 68 120 +10066 PF10234 Cluap1 Clusterin-associated protein-1 KOGs, Finn RD, Coggill PC anon KOGs (KOG3647) Family This protein is conserved from worms to humans. The protein of 413 amino acids contains a central coiled-coil domain, possibly the region that binds to clusterin. Cluap1 expression is highest in the nucleus and gradually increases during late S to G2/M phases of the cell cycle and returns to the basal level in the G0/G1 phases. In addition, it is upregulated in colon cancer tissues compared to corresponding non-cancerous mucosa. It thus plays a crucial role in the life of the cell [1]. 24.20 24.20 24.20 34.00 23.90 24.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.61 0.70 -5.19 15 186 2009-01-15 18:05:59 2007-08-06 15:53:50 4 6 127 0 126 186 4 229.40 42 63.34 CHANGED RhLGYPRlISM-sFRsP.....NFtLVA-lLhWLlp.........................RYEPss-Isssl-oEp-RVhFlKulspFhsTKu+IKLNs+KLYtADuaAV+ELLKlTslLhsAhpos.....stp-E-s.....ottphslusKlsDLKtsRpLuS-lTs+GuuLaDLLsKEl.pl+-sRppslu..........RshElsplE+sl+puIpsspsclpphps.LsslpuDcssLEuKIc+KKpELERspKRLpuLpsVRPAaM-EYE+lEpELpclYppYlc+aRNLsaLEpQL-shs+tEptph- ..............................................RhLGYP.RhlSh-sFRs.P.......NFtLVu-lLhWLlp.........................+a-Ppspl.s..s.l-oEpcRVhFl+shsph...hh.....sKu+IKLNsKKLYtADGaAVpELLKlsslLhpAhpsp.....................................t.t-pp..........shhph.s.lss+.ht..-lKt...sRpLuS.-lTppGAsLa-LLucEh.ph+p..Rppslu..................+shElsp.hE+sh+..slpph.tplpphpp.lsslts-EssL-sKIc+++.ELERspKRLpsLp............slR..PAaM-EYE+hEpELpp.YphYlp+aRNLsaLEppL-phpchEpth.................................... 0 54 68 100 +10067 PF10235 Cript Microtubule-associated protein CRIPT KOGs, Finn RD, Coggill PC anon KOGs (KOG3476) Family The CRIPT protein is a cytoskeletal protein involved in microtubule production. The C-terminal domain is essential for binding to the PDZ3 domain of the SAP90 protein, one of a super-family of PDZ-containing proteins that play an important role in coupling the membrane ion channels with their signalling partners. SAP90 is concentrated in the post synaptic density of glutamatergic neurons [1]. 23.70 23.70 24.20 25.80 23.00 23.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.70 0.72 -3.63 17 204 2009-01-15 18:05:59 2007-08-06 16:22:19 4 4 187 0 153 182 0 90.10 43 72.48 CHANGED oclsTPssh+cus............................+ssss.Gsptl.scNKLLSpttp...sPYush..ssC..chCKsplcQ.Gp+YCppCAYpKs..hCAMCGKpl..sspsh+poss .................sclhTPDsaKcus..............................pso.spu.G.uR+l.sENKhLosppp..............hsPYu...........p............h....sp..............C..+lCKspVHQ.Gu+YCQsC.AY+KG..lCAMCGKpl.hsTKsY+pos.......... 0 54 78 119 +10068 PF10236 DAP3 Mitochondrial ribosomal death-associated protein 3 KOGs, Finn RD, Coggill PC anon KOGs (KOG3928) Family This is a family of conserved proteins which were originally described as death-associated-protein-3 (DAP-3). The proteins carry a P-loop DNA-binding motif, and induce apoptosis [1]. DAP3 has been shown to be a pro-apoptotic factor in the mitochondrial matrix [2] and to be crucial for mitochondrial biogenesis and so has also been designated as MRP-S29 (mitochondrial ribosomal protein subunit 29). 23.10 23.10 23.10 23.20 23.00 23.00 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.90 0.70 -5.49 29 361 2012-10-05 12:31:09 2007-08-06 16:22:52 4 8 283 0 251 385 6 276.70 23 66.47 CHANGED slVRctol-l...hctlppt.......ptsppsh+hllsGppGoGKSlhLsQshsaAhpp...sWlVlplPpspphlpusspat.ss..p.thasQPhhstphLpchhpsNp.phLpp..lp..locca.hh............tpsstsspoLh-LlphGhpcsptuh.ss...hpAlhpELpspus.............hPllhslDshsthhps..opY+ssch.......p.Icsc-Lslsphhhshlt.....spsshssGssl..LssTusspssp..sh.hsLtttts...................-Pah.....c..h.t.h.................shpVs.shoccEscshhcYatcsshlpcp............sscphspEchhhsusGNstEL.+h ..........................................hlRp..shcl........hp.hppt......................p.tt.sh+..h.ll.....hGtpGsGKohhLs.psh.p.auhpp....sWlllalPp.u.p.th.sp...s...spp.hh.ss...........p..........th.........asQPh.stphLp.........phhpsNc...phLpp...lp......lppc.a.hs................................................................tpthtts.psLh-llp....Gh.p.....p.....sp.....h..us.....ss....hthlhpELptpsp.................h.lLlslD...shsthht....oph+p.....ph.......................p.ltsp-Lslsphhh.ch............sppsh.sGshl....lstou.t.t.p..........h....s..lhttts.......................................ssah.........................................ht.ls.shs..pEhpshhpYahppthlpp....................................pp.h.cphhh.ustss..h........................................................................................ 0 96 144 210 +10069 PF10237 N6-adenineMlase DPPF; Probable N6-adenine methyltransferase KOGs, Finn RD, Coggill PC anon KOGs (KOG3350) Family This is a protein of approximately 200 residues which is conserved from plants to humans [1]. It contains a highly conserved QFW motif close to the N-terminus and a DPPF motif in the centre. The DPPF motif is characteristic of N-6 adenine-specific DNA methylases, and this family is found in eukaryotes [2]. 23.20 23.20 23.50 23.20 22.70 22.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.21 0.71 -4.57 31 349 2012-10-10 17:06:42 2007-08-06 16:25:33 4 12 224 0 245 348 10 157.00 29 54.77 CHANGED QLSQFWYu--TAptLu...ctllpssscs.....stIAhlSAPSlYttl+p.............phsspplhLhEaDcRFsl.hutc..Fs.......aYDYNpPhclPtpl.ct..phDhllsDPPF......LsEEC.pKsuhTl+hLh+s................ph+llhsTGctMpchs.phl....sschssFhPcHppsLuNEFRCYuNF-s ...........................................................hSQFWYu-cTsphLs....ct.lhp.....ss..p.s...................splAslSsPolattl+p............................tts.ph..pshLhEaDpRFsh..a.st-..Fs..........FYDas.p........P..h........c..l..s..pp......h....pp....ph-...hllsDPPF......Lsc-ChpK...h...up...ol.chLhp.......................................cll....h....sTuth.tth..htphh............shp.s.ta.Ppatp..Ls.pahhassat......................................................... 0 78 127 198 +10070 PF10238 Eapp_C E2F-associated phosphoprotein KOGs, Finn RD, Coggill PC anon KOGs (KOG3395) Family This entry represents the conserved C-terminal portion of an E2F binding protein. E2F transcription factors play an essential role in cell proliferation and apoptosis and their activity is frequently deregulated in human cancers. E2F activity is regulated by a variety of mechanisms, frequently mediated by proteins binding to individual members or a subgroup of the family. EAPP interacts with a subset of E2F factors and influences E2F-dependent promoter activity. EAPP is present throughout the cell cycle but disappears during mitosis [1]. 25.00 25.00 37.10 26.20 20.50 19.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.08 0.71 -4.27 15 144 2009-01-15 18:05:59 2007-08-06 16:52:27 4 2 111 0 99 138 2 130.80 42 54.62 CHANGED LY.DsctD-c-ccWVccph+tt....................tsssoDAlLsCPuChTslChcCQRHEpYpsQYRAhFshNCplsc-pll...p+phsspcp+csppcscpstptstt..............................................................................ssEhY+sVpCspCuTcVAVaD.c--lYHFFNVLsS ..............................................hY.DPchDscDptWVstpR+sh....................................................................sSDAV.LsCPuChTTLClDCQRHEpYpsQYRAMFVhN...Cpls.c.-clL..h.tt...ppc.p...pp.c..t...tpp.ppt..t.t.....t....................................................................................ttptp-hY+PVhCspCuTcVAVhD.cDEVaHFFNVlsS.............. 0 43 59 76 +10071 PF10239 DUF2465 FAM98AB; Protein of unknown function (DUF2465) KOGs, Finn RD, Coggill PC anon KOGs (KOG3973) Family FAM98A and B proteins are found from worms to humans but their function is unknown. This entry is of a family of proteins that is rich in glycines. 25.00 25.00 25.90 25.40 22.70 22.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.01 0.70 -5.22 13 277 2009-01-15 18:05:59 2007-08-06 16:52:43 4 9 106 0 148 226 3 270.10 39 72.72 CHANGED lLDsLcsLGY.cGPlh--s...sLppAspuGhuSs-apsLstWLsscL+llssl...-Eplo...ss-Dh-o.FpLElSGhL+ELuCPYssLsoG.lssRlpspccpLhLLhaLsoELQAs+llts++sppttpcp.........sosshQtlpslshsLslscs.ssslshhthFspIps+l..pchlpph.........spsplupPLL..KcsLsspQWccLEplspsLpsEY-hRR+MLl+RLDVTVQSFtWS-RAKs+tsphschapstRpsLss....psslslAcLLAARpDl..upl.+TSSushRcpTssuIN+lLh.G+VP.DRGGRssEhtsPs.EMPsWpKRp-GG ...........................................................................................................t.........h.tuhpt.Ght..ss-aptLshaLss...plp..hspl.......-Eplp.........ts.s-...h-..p..F.hElSuhLtEht.............CPatsLhsGclppclhppps..pLhLLhaLhoELpAs+hlphpp..ppt...pt........................................ssphhp.-lpthh.sLuhscs....ssshs.h..thhstlcp.Kl..p-hLs.pl....................sssplucPLL...ptsLsstpW...Ep...lEpl...............s.psLtsEYcsRRphLlKR....LDV..T....lQSFtWSDRAKs.p..s-.plsplapPhRpsLss..........coslolAcLLA.....AR.pDL..uplh+TSSushRcp.T.ssuINKVlh.GpVP.....DRGGRssEhpsP..............MPsWppRptt........................ 0 39 53 95 +10072 PF10240 DUF2464 Fam125A; Protein of unknown function (DUF2464) KOGs, Finn RD, Coggill PC anon KOGs (KOG4000) Family This is a family of proteins conserved from worms to humans. Members have been annotated as FAM125A proteins, but their function is unknown. 20.70 20.70 21.30 22.80 20.30 20.60 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.52 0.70 -5.04 12 190 2009-01-15 18:05:59 2007-08-06 16:53:13 4 6 92 1 109 163 0 219.10 34 81.91 CHANGED pPlTulshlushspsPpsassIspThD.ussAsLh+s...Fsp+sshYLChSps......sh.s.Vlsslplls-KsslPhGas.lscshDocppsh+KKplCl+hhP+soscsAlsDIplsu+sKpsP.sYphlG-IsuhhIahKpus.....P.slPcspsho........................................pshpshslsu.........sspPuPshPht.s.o.......psts.htts.hhpsuslYslSuh-GVPFhlpP+F....ssss.ssps.hthplcolsclcpEYsYsFssE .................................Ploulsh.sus..tspsPpsas....s.lupTsD.Gs-AsLa.+s....FtpKs.sR.YL....Cho+shs........sh.ssVlsDhplls...K.-.slPhGF.sl..p-ThDopptsh+......K.KRlClKhhP+.suscsA.....lsDIp..lh.u..+........o........Kp..sP..t.Y..phl.G.-lsuhsIWh+hup.............lPpsps.s..........................................................ps.ps..p..ss.........ss.pPu....Psh.s.t.s.s..................................ttts...ttts...h..ptu..sl.hshouh.DGVPFhl...p.+h.......t...t.t.....h.....hplpoht..clpccapYsF.hE................................................................ 0 29 39 76 +10073 PF10241 KxDL Uncharacterized conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3443) Domain This is a family of short proteins which are conserved over a region of 80 residues. There is a characteristic KxDL motif towards the C-terminus. The function is unknown. 24.00 24.00 24.20 24.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.86 0.72 -4.08 12 243 2009-01-15 18:05:59 2007-08-06 17:10:16 4 9 211 0 171 217 0 85.50 31 44.06 CHANGED ssRhtshhsstclsshlttQpph.u+Lptppc.LLphpsLupsRLppspscFtpth+hhp-hKcDL-ahh++lculKuKhtppYPpta .................pphtshlsstcl..sp.hlttQpph....s+hpppsc.Lhph.psluppRLpphpp....cF........tctt+slp-........hK.cDL-hlh++l.........cslKuKltppaPcta....... 0 46 78 132 +10074 PF10242 L_HGMIC_fpl Lipoma HMGIC fusion partner-like protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4026) Family This is a group of proteins expressed from a series of genes referred to as Lipoma HGMIC fusion partner-like. The proteins carry four highly conserved transmembrane domains in this entry. In certain instances, eg in LHFPL5, mutations cause deafness in humans [1] and hypospadias [3], and LHFPL1 is transcribed in six liver tumour cell lines [2]. 25.20 25.20 25.40 25.30 25.10 25.10 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.63 0.71 -4.28 18 458 2012-10-03 00:20:40 2007-08-06 17:10:50 4 5 97 0 283 414 0 165.10 32 78.27 CHANGED hulLWslholshAhlplluFlpPpWlss.......ssp............sspsuphGlaphCh.......hhp..hthpCsshshsFtslsSus....a.psushFluluhhL.LsslshhuLhshCp.....spola+lCuhhQhluulhlhlGChlYPhGWcSscV+chCG.cuspaplGtColtWAahhAIluhhsuhlLoFluhlLus+ .....................................hGslWs...lholshuhhshlsFhpPhWlhs.......s..............................................tspss.FGlaphC...............................thhs..ht.pC.ts...h..........sFss.IPSss........a.+s.us.hh...luhuhhL.....lss.l.sh....h.u...L....h.hh...Cs..................s.t.ola+..ls..uhhQh.hu..u.......hhlhlG..sh.laP.GW.s...........upcl.+.p....hCG...tpss.tap.l.G.p.C.sltWAahhAlhuhhsuhlLshhshshu.p...................................................................... 0 69 96 171 +10075 PF10243 MIP-T3 Microtubule-binding protein MIP-T3 KOGs, Finn RD, Coggill PC anon KOGs (KOG3809) Family This protein, which interacts with both microtubules and TRAF3 (tumour necrosis factor receptor-associated factor 3), is conserved from worms to humans. The N-terminal region is the microtubule binding domain and is well-conserved; the C-terminal 100 residues, also well-conserved, constitute the coiled-coil region which binds to TRAF3. The central region of the protein is rich in lysine and glutamic acid and carries KKE motifs which may also be necessary for tubulin-binding, but this region is the least well-conserved [1]. 32.30 32.30 32.80 32.40 31.90 31.90 hmmbuild -o /dev/null HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.98 0.70 -6.04 13 257 2009-09-11 16:48:08 2007-08-07 14:41:36 4 16 131 1 160 235 12 349.50 20 82.10 CHANGED hcGLYTssEhcusslpDK-sKltFLQKhIDllphsoGcsLts+PuKIVAGpEPE+TNELLQtlu+suts.pLspcpAV++l...ttuppps.sshsp.sp-tcscssppcctcccpppcc+ccsc.c.ppppcttp.ppph...c-ppp.pE+c+p.ccKpppsstccpc.sccppp+ppsptcppctp..........................................................t.tpptpp..pppcs+ppstpp.sucsccttpstctcctpsct.s.-ps..t..tp....................................................t.p..hpt....s.p........t............t......t............................................................................................................................s......stssoAR.usPR.Kcpp.....t..sttt.uchhssVlh-shp......s-s--c-phhh.tttt.sp................st...s.....ppt-pcGtL.Vp+ILETpK-h-stsupsp....ttppt.................tsptppshssc-lppLRpplQpLs+SspPLGKlhDaIpEDIDuMppELphW+pEt+pptpthpcEpphT-sAlcPLpspLtpLEppIsDppsKIssl+usILpN-p+Ip+hltsl .......................................................................................................................................tt.htp+p.tKhtaLpphlthh....t..h.spst+lluG.EsctTN.hLQhhu.sh.......hp.t..thhpth..................t..t.t.........t...tt......tt...t..t.......t.p....p..t.t..tt.ttp.t..p..t............................t...................t.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 79 101 139 +10076 PF10244 MRP-L51 Mitochondrial ribosomal subunit KOGs, Finn RD, Coggill PC anon KOGs (KOG4045) Family MRP-L51 is a family of small proteins from the intact 55 S mitochondrial ribosome [2]. It has otherwise been referred to as bMRP-64 [1]. The exact function of this family is not known. 25.00 25.00 35.40 35.40 24.10 24.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.19 0.72 -3.96 8 96 2009-01-15 18:05:59 2007-08-07 14:52:15 4 2 87 0 65 98 0 92.90 50 57.29 CHANGED hPc.KshDRWo-KRAhFGVYDNIGILGDF+hHP+-LIhG.PsWLRGa+G....NELQR.hIRK+pMVGsRMhh-D....hHNLpKRI+aL.....Y+RFNRpGKHR ..hP.h+shDpWsEKRAhFG..D.IsI.......LG......sh.chHP.pcllhu.PsWLRGapG....NELQ+.hlRK++Mlus+hasps....h+sLpKRIpYL.....YK+hNR+sKh........ 0 17 22 46 +10077 PF10245 MRP-S22 Mitochondrial 28S ribosomal protein S22 KOGs, Finn RD, Coggill PC anon KOGs (KOG3890) Family This is the conserved N-terminus and central portion of the mitochondrial small subunit 28S ribosomal protein S22. Mammalian mitochondria carry out the synthesis of 13 polypeptides that are essential for oxidative phosphorylation and, hence, for the synthesis of the majority of the ATP used by eukaryotic organisms. The number of proteins produced by prokaryotes is smaller, reflected in the lower number of ribosomal proteins present in them [1]. 25.00 25.00 25.00 25.00 23.30 22.40 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.59 0.70 -4.97 6 129 2009-01-15 18:05:59 2007-08-07 14:52:31 4 3 90 0 79 119 0 209.00 47 67.02 CHANGED pPlFhcc-VQpLLpshTpL-LD.KVFR+Rss+s.spscaKhMT--QL-cphhpslEpAcphLQMPPVlc.+p-ssc.VlAKDhuLKuh..uTsKaVFTDITaslscpcRpIVVREPDGTLpcAshEpRcRlNQlYFPLpGRplhsPhMFc.-cpLpchL-pscaEFVLDRsCVQFEP.-s-Yp+lotpVY-+lNE.................oppF-hLRSTRHFGPhAFaLAhp+tIDc.LLhDhIp+DhLcsuspLltLhptLH..P .............................................hFhctcVQplLhphTtlsLp.KsF+.th...t..p.....psspaKhMTptQLccthppshctA+hhLpMPP.Vlp.R...tshsc...VLucDthLcGh..-ouKaVFTDIoaulsc+-RhIVVREP.sGTLRcAoaEERDRhhQlYFP+cGRclhsPhhFp.-.Ep.....Lpphhppscat....lLshshsQFEPDs..s-Yh+lpppsY-clsc.................ptpa-lLRSTRHFGshsaahs.p+pIDs.LLh-.lpc-hlp-AspLlpLhphlH.................... 0 31 36 59 +10078 PF10246 MRP-S35 Mitochondrial ribosomal protein MRP-S35 KOGs, Finn RD, Coggill PC anon KOGs (KOG4078) Family This is a family of short mitochondrial ribosomal proteins, less than 200 amino acids long. that are highly conserved from worms to humans. The structure has previously been referred to as MRP-S18 but the current numbering fits the preferred nomenclature from these authors. 24.40 24.40 24.80 29.10 23.30 24.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.12 0.72 -4.10 6 114 2009-09-11 12:55:10 2007-08-07 14:59:26 4 5 84 0 67 106 0 96.20 56 45.93 CHANGED ppsspSFuSLLRpShhlQLGsscGKlllGKIFHlV--.DLYIDFGhKFHCVCpRPAssGEcY.RGoRVRLRLcDLELooRFLGuopDlTlLEADAsLLGLlpspsp ...............s..sscSFAShLRpSsLhQ.hGs.u.csKlVlG+IFHlVp-....DLYIDFGuKFHCVC+......RPph.sG..c..p.Y.+...Gs+VRLRlhDLELoo+FLGuspDhTlLEADshLLGl.ps..p.................... 0 20 25 48 +10079 PF10247 Romo1 Mit_gmP; Reactive mitochondrial oxygen species modulator 1 KOGs, Finn RD, Coggill PC anon KOGs (KOG4096) Family This is a family of small, approximately 100 amino acid, proteins found from yeasts to humans. The majority of endogenous reactive oxygen species (ROS) in cells are produced by the mitochondrial respiratory chain. An increase or imbalance in ROS alters the intracellular redox homeostasis, triggers DNA damage, and may contribute to cancer development and progression [1]. Members of this family are mitochondrial reactive oxygen species modulator 1 (Romo1) proteins that are responsible for increasing the level of ROS in cells. Increased Romo1 expression can have a number of other effects including: inducing premature senescence of cultured human fibroblasts [2,3] and increased resistance to 5-fluorouracil [4]. 24.10 24.10 24.90 25.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.19 0.72 -3.78 19 272 2009-09-11 01:43:06 2007-08-07 14:59:42 4 2 243 0 191 236 1 65.30 47 65.22 CHANGED sosa-+l....KMGhhMGssVGsshGhlhGsasshptGstspthhtslGphhlsSuuoFGhFMulGollRs ............PSsa-+h....KMGhhMGs..................sVGhshGhlFGsauhh+hG.......hts.ptlhpslGphhhsSu.uTFGhFMuIGosIRs.......................... 0 53 101 156 +10080 PF10248 Mlf1IP Myelodysplasia-myeloid leukemia factor 1-interacting protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4049) Family This entry is the conserved central region of a group of proteins that are putative transcriptional repressors [3]. The structure contains a putative 14-3-3 binding motif involved in the subcellular localisation of various regulatory molecules, and it may be that interaction with the transcription factor DREF could be regulated through this motif. DREF regulates proliferation-related genes in Drosophila [1]. Mlf1IP is expressed in both the nuclei and the cytoplasm and thus may have multi-functions [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.10 0.71 -4.66 9 258 2009-01-15 18:05:59 2007-08-07 15:00:23 4 2 114 0 123 239 1 155.10 36 64.12 CHANGED MRpMhtuF.u-PFu...hlSloDtph+u.sR.usstt..........ssa....hhuMsuhhhsMhshMpshhtsh-ph.ossssspoFSSSoVhoYSss.GDusPKVYQtTSpTRsAPGGI+ETR+olRDS-SGlE+MuIGHHItDRAHIlcRScN++TGDpEERQ-FINLDEu-AtuFD-EWppcs .............................................................................................h....tsh......s..s.t.............t.....t..t.............................s........h.shst.hh......s..h.......h..s............M.ph...............tp..h..tph....ss.s..s.s..spoFs..SS.oVhoYSps....sst....PplaQ..s..o.oppR.suPG....G..l..+..ETR+sh+DScoGl.ccMuIGHHItD..RuHllp+ppNp+oGcpEppQ-al.N..lsEs-AtsFDcEWppc.................. 0 32 47 83 +10081 PF10249 NDUFB10 NADH-ubiquinone oxidoreductase subunit 10 KOGs, Finn RD, Coggill PC anon KOGs (KOG4009) Family NDUFB10 is a family of conserved proteins of up to 180 residues. It is one of the 41 protein subunits within the hydrophobic fraction of the NADH:ubiquinone oxidoreductase (complex I), a multiprotein complex located in the inner mitochondrial membrane whose main function is the transport of electrons from NADH to ubiquinone, which is accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space. NDUFB10 is encoded in the nucleus. 22.20 22.20 22.20 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.73 0.71 -3.99 8 155 2009-01-15 18:05:59 2007-08-07 15:00:34 4 2 122 0 94 150 0 111.50 40 70.28 CHANGED DtPVThFR-.lVEp.ps+sKasYYHpcaRRVPslspCtssDhlChaEA-hQaRRDhtVDpEIVplLppRhcsCpQhEupsHh...QsCuK.lcQaccsscsa..+YGDLGAYusARKshMKQKHRMlhER+ .................DhPlThhR-..hl-...spp+h.aYHppaR...RVPslspChpsDhlChaEAc.Qa+RD+hVDpEIlpIlp-RlcsCtp...h...EG.....ssah.......QpCt+.lcQapcsscsa..+Y..t-LGshh.ss+pshhKQKpRhh.ct....................... 0 32 46 70 +10082 PF10250 O-FucT GDP-fucose protein O-fucosyltransferase KOGs, Finn RD, Coggill PC anon KOGs (KOG3849) Family This is a family of conserved proteins representing the enzyme responsible for adding O-fucose to EGF (epidermal growth factor-like) repeats. Six highly conserved cysteines are present in O-FucT-1 as well as a DXD-like motif (ERD), conserved in mammals, Drosophila, and C. elegans. Both features are characteristic of several glycosyltransferase families. The enzyme is a membrane-bound protein released by proteolysis and, as for most glycosyltransferases, is strongly activated by manganese [1]. 26.80 26.80 26.80 26.80 26.70 26.60 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.21 0.70 -5.34 62 1215 2009-01-15 18:05:59 2007-08-07 15:00:56 4 28 188 6 759 1192 4 302.80 22 67.15 CHANGED llh..s.G.GhNppRss..hhchlshA+hLNt...........TLVl...............P.hpp..hhWpcss.p.....Fsc...hF-l-p.........hhphl.sslclhchl.............Pphhsth....................................................phhhshphs....hsssphahpp..h..hhtch...........pVltlstssspLssphhshpl...Q+........uLpaspcIpphupphlp+hh................................psssalulHLRhp.Dh..hpsC.h.tht..ptt.htt.p............t.p..ps..hphpspsPhtspts.....shlLpslth...ps.......splYlAosp..........ht...th..psLpshhsphh...+pplsstc-ht.h.s..sphAtlDhhlsspu-hFlssp...ss.hsthlttcRchh..s.h........................ssF ...................................................................................................................................................................h.....s.G..Ghsp.+.t..........hhshlshA+hLNt.......................................................TLVl...........................................P....htt.....ha........p......c.s.....p........................Fpc........hF-h..ct................hh...p....l......t..slpl..c.l..............................P..h.t....................................................................................................................t....hph...t.....hss..ph.hhpp......hhshh.p............................tslt.l.....s.t.......h...s.spl...s....p..hs...ph..............p+h..........................................htuLpas.........p...l.pph.u...pph..lpchh.....................................................ts...s.alulHL.R..........h.c...D.h.........hptC....................t...h.tt.p...............................s..hph.psp.s.Ph.t..spts.....................shhhp..thsh...ps...................s..tl.Yl....Assp...................................hht..t.t.h....tsLp....phas.phh......p+ppl.h..s........p-...ht..h.t................sthA.t.lDhhlshpu-h.Flss....ss....hsthl.tcRphhsh.......phh....................................................................................................................... 0 207 476 627 +10083 PF10251 PEN-2 Presenilin enhancer-2 subunit of gamma secretase KOGs, Finn RD, Coggill PC anon KOGs (KOG3402) Family This entry is a short 101 peptide protein which is the smallest subunit of the gamma-secretase aspartyl protease complex that catalyses the intramembrane cleavage of a subset of type I transmembrane proteins. The other active constituents of the complex are presenilin (PS) nicastrin and anterior pharynx defective-1 (APH-1) protein. PEN-2 adopts a hairpin orientation in the membrane with its N- and C-terminal domains facing the luminal/extracellular space, and the C-terminal domain maintains PS stability within the complex [1]. 25.00 25.00 25.60 26.00 22.70 24.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.28 0.72 -3.76 11 135 2009-01-15 18:05:59 2007-08-07 15:39:15 4 2 126 0 86 119 0 92.60 43 78.83 CHANGED lPNEcKLpLCR+YahsGFAhLPFLWhVNssWF...F+EAFtKPuasEQppI+pYVltSAlGhhlWsllLoTWlshFQh..pRspWGshuDhlSFhIP..LG ...................ssEcKLsLCR+Yah......u.......GFAh....LPFLWhVNshWF...F+-AFhtP...sas..EQppI+pYVht....SAlGhllWsllLsoWlhlFQh...RstWGshuDhlSFsIPhG............................. 0 31 45 66 +10084 PF10252 PP28 Casein kinase substrate phosphoprotein PP28 KOGs, Finn RD, Coggill PC anon KOGs (KOG3375) Domain This domain is a region of 70 residues conserved in proteins from plants to humans and contains a serine/arginine rich motif. In rats the full protein is a casein kinase substrate, and this region contains phosphorylation sites for both cAMP-dependent protein kinase and casein kinase II [1]. 25.00 25.00 25.90 25.70 22.80 21.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.03 0.72 -3.96 22 247 2009-01-15 18:05:59 2007-08-07 15:39:49 4 6 192 0 177 239 2 84.40 51 39.93 CHANGED sNPN+ssppp.pphpp.....h..s............s.spLSRREREtlEtQpA+cRY.KLHspGKT-pA+ADLARLAlIRcpREtsAtR+EsEKct+ ..............................................................................tNPN+st.pps..p.pssp......lshs.............................ssspLSRRERE.plEKQ...cA+...ERYhKLHttGKT-pA+A.........DLARLAlIRcQRE-AAt++EtE+ct+..................... 0 53 88 134 +10085 PF10253 PRCC PRCC_Cterm; Mitotic checkpoint regulator, MAD2B-interacting KOGs, Finn RD, Coggill PC anon KOGs (KOG3903) Family This family constitutes the major, conserved, portion of PRCC proteins. In humans this family interacts with MAD2B, the mitotic checkpoint protein [1,2].\ \ \ \ \ \ In Schizosaccharomyces pombe this protein is part of the Cwf-complex that is known to be involved in pre-mRNA splicing [3]. 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.47 0.71 -3.59 43 260 2009-09-11 16:00:24 2007-08-07 15:40:11 4 6 218 0 186 243 0 214.30 19 52.85 CHANGED lsLFuLs............sspptsssssss.............tsussYpPhhhsspttts.tsst..sts.pstpsspsssps.s.tt................................t..........slutup+Rphht+t...............pspllchss-pph.ssspchhhpt.t.ct.sspt...p.sslpsht.................t.uKHpLppLlptApsp+-tLE-paAsuRpN++pAuuKYGa ...............................................................................................................................................................................................................................................................................shFuh...................................................................................................t........t.....................t..t..........................................................................................................................................hts.............thstpth+phhs+tt.t...............phpll-lssDcph...sss..p....p.h...htpths.tct...shps...thp..s...sht...................t++KHQls.Llpp........A.pppc.cLcppaups+hs++pspuKYGa................................ 1 61 98 147 +10086 PF10254 Pacs-1 PACS-1 cytosolic sorting protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3709) Family PACS-1 is a cytosolic sorting protein that directs the localisation of membrane proteins in the trans-Golgi network (TGN)/endosomal system. PACS-1 connects the clathrin adaptor AP-1 to acidic cluster sorting motifs contained in the cytoplasmic domain of cargo proteins such as furin, the cation-independent mannose-6-phosphate receptor and in viral proteins such as human immunodeficiency virus type 1 Nef [1]. 25.00 25.00 25.60 28.60 24.70 24.50 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.53 0.70 -5.84 8 272 2009-01-15 18:05:59 2007-08-07 15:40:32 4 4 94 0 174 211 0 351.90 47 46.84 CHANGED VYDQLNpILlS.DutLPE...sllLVNsoDWQGpa.....lu-LLQspphPVVCTCSsAEVQAlLSulloRIQ+aCNCNSpsPpPVKVullGuQpYLuAlLRaFV-pLupKo.PDWLsHhRFLllPL.GS.HPVAKaLGulDsRYSshFhDsuWR-LFsRsEsssosppt........................sDlluRIpQYlsG.AsssHQLPIAEAMLThKp+.............DEDSsQpFlPFlGVVKVGllEsspuo........G.D.-DusslS...........................tul.So.SPP...psoshuK-u.uTPP......................sSPShsuuhsu.uSPs....s-ulGLQVDYWsus.PsE+++.........-u-+......+Dt.ouKNTLKusFRSLQVSRLPpuG....puphssoMoMTVVTKEKNKK.....................................sshFLuKKsK-KEs-.SKSQsIEGIoRLICSAKQQQsh.LR........VhIDGsEWsDVKFFQLAuQWsTHVKaFPIuLFGtoK ..............................................lhDQLspILlS..Ds..tLPE...sllLlNssDhQGQ.h......lup...lLQpp........phP.l.VsT..sSss-lQAs.hssllo+IQ+aCNsNop.PtsVKlulsGuQpYhuulLRhF.V-pLupKs.sDWLsahRFLllPL...G................S...HPlA+YLuS.....lD.+Ys..shF.........D..sW+-LFp+.Es.ss..p...........................-...lssRltpYlsG....Ass..spQLPIAEAMLsh+pc....................DE-S..t.QpFlPFlu..s.VKVGhl.E.ssuss.......sDs-Dusss.s................................................................sl...So...oPP....pss..s...h....+.......t...s.so.PP.........................s.S.Puhs..suh..s..s...u..ps.........u-hhtLQVDY.Whst..s.s-c++....................cs-K.......cD..ssKNT.LK....ssFRSlQV.....SRLPpuG..........ps..t.h.s.ss.MuM.....TVVTKE......Kp.KK...........................................................hhhL.sKK.sK-K..-h-.......u.KSQsI-G.IuR..LICoAKpQpsh.LR..................................................V.IDGV.EWsDVKFFQLuAQWsoHVKaFPlslFuhs............................................. 0 34 47 94 +10087 PF10255 Paf67 RNA polymerase I-associated factor PAF67 KOGs, Finn RD, Coggill PC anon KOGs (KOG3677) Family RNA polymerase I is a multisubunit enzyme and its transcription competence is dependent on the presence of PAF67 [1]. This family of proteins is conserved from worms to humans. 19.90 19.90 19.90 20.80 19.70 19.20 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.43 0.70 -5.59 26 467 2012-10-11 20:01:01 2007-08-07 15:40:55 4 6 266 0 232 338 15 296.80 40 76.52 CHANGED cpRhsSacNYssLFshll.......supss......................ssLpLPs..WlWDllDEFlYQFQshspa+s+htpps..t.................-cht.l...ttp.spsWsshslLplLpuLlp+SpIpp.hph.pts...hp..s.....hsssh...sspslhthLGYFollGLLRlHsLLGDYptALKsLcsI-l.s+cu.las+VsssaloshYalGFuYhMhRRYsDAIRhFsplLlalp+pK....p.hpppsaQ.stl.Kps-pMYtLlAIshsls...Ptp...lDEoltptl+E+.Yu................-chhp.lppu...shpsFc-lFshuCPKFls..Ps.ssshcss...h.........-shppQlplFhc-VppQptlsplRSYLKLYsolslpKLAuhh-l...................-s-.......................pl+s.LhshKpcs+pl.hhstu.slhcGchhss...u-lDahID............pDhI+ls-s+.st+pas-aFl+pltKhc ...................................................................................................h...s.....h.hthhtEalaQht.h..ap..................................................at......h.thh..hh..s.h.................................t...hhh.hGaauhlslh+hpslhGDa..ulphht....lph.t....................t...h...l..s.hsh.YahGas.hMhpcY.-u.p.h.thl.hh.p.p.............t..........K..-phh..LhsIsh.sht...........Ptt.............lD-slptthpEK.as.............................DKhh+.MppG...s..s...sac-LFsaACPKFlo..Ps.ss....sh-ts..sh.....................-shchQhplFh-EV+QQhhhssIR..SYLKLYTThslsK.LutFh-h.............s.p............................................................phph.LlshKp+hpph..W..s.u......u.........h..L-Gc...u.............s-hcahl-.....................pshlhIs-sc...tptatcaFhpplt+h................................................................... 0 87 126 185 +10088 PF10256 Erf4 QRDY; Golgin subfamily A member 7/ERF4 family KOGs, Finn RD, Coggill PC, Bateman A anon KOGs (KOG4069 & KOG4101) Domain This family of proteins includes Golgin subfamily A member 7 proteins as well as Ras modification protein ERF4. 26.30 26.30 26.30 26.50 26.10 26.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.53 0.71 -4.23 32 473 2009-01-15 18:05:59 2007-08-07 15:41:10 4 7 230 0 323 429 0 117.60 28 53.06 CHANGED hVtI.Rca....tsshhspF........ps.taPsp..................................L.psh.lstp-FcphlsplNph.LtcAhsshshpshl-shlsslT..hhlh.hhh.....hshhc+..................pLpclppal......pphNp....phhptpGl...pllsP+cpG.Lplsh ...........................................................................h.....p.h....stGhsspFpo...cFPsp........................................L...su+..lstp-FcpolpplNsh...htcuhps......hstt..shhts.hhsChT...hhh.hhsh.........................hshhc+............................................s.h+plp+hl......ppp.Np.....cla.t..........hG..L.............phh.shcps.h.................................................................... 0 90 148 238 +10089 PF10257 RAI16-like Retinoic acid induced 16-like protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3695) Family This is the conserved N-terminal 450 residues of a family of proteins described as retinoic acid-induced protein 16-like proteins. The exact function is not known. The proteins are found from worms to humans. 20.50 20.50 20.70 21.90 20.40 20.40 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.20 0.70 -5.52 27 432 2009-01-15 18:05:59 2007-08-07 15:53:27 4 6 190 0 274 417 1 336.50 29 42.16 CHANGED CLEahLpcpIL-sLhpLuhs...shss...Gh+ppslphaspLlup.pps...lLsatsltRPlhcL......................................lphssps....usss........................Ep-hVplLpslCsplppcPtLLshahpspppp...................................................................t.t.sh....ttpssasLFshLlsas+pp........................GRlGphARcuLLhllshu.....p.shu...................................................................................palsp.oslss.......................lhustLu.uLYspLPppl....p..........l.uhsh.......tpp..................-hsths....tL.tFhuhLpasssllpps+s....................tlsssLhcthpptFL.slltPuLLpsS-...tuhhsshshLptlLcplspssLlcphlpFLLu................................ppp.t.....tth.lhppLlppps+ho.clshssh...tlh-sllttPspthLhsLlLcsL .....................................................hhEahlpcpl..hpplhphuht.....chss......th+tp.lthaphLlsp.cps.....LLt.at.slhcslhpL.......................................lphsuts.......sssh.......................................Epch....V.LLst..lCsplt.p..c.Pt.lLphahpsp.p..t....................................................................................................................................................................................tstspa..LFshLlsal+pc........................GplGppAR-uLLhlhulu.....ts.shu......................................................................................................................................................................palsppo.hCs.......................ll...usG.Lu.uLY..opLPpplc...........................................l.u...s..W.....h.ttc...............................................Dh.ths..............tLstFhs.L-FCs...sllph.A.+s..........................................................l.tp.pLhchlp.ptFLhslhtPuLhpso.............................tphlssouYLch..............hL+.......pl.......s.......ps.sLlpph.lpFlL...................................................................p.pp................spts.......lhcpLlp+hsp.....c.lshsoL.......thFcpLlthsscplhhpLlLp.L......................................................................................................................................................... 0 79 115 189 +10090 PF10258 RNA_GG_bind PHAX RNA-binding domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3948) Domain RNA_GG_bind is the highly conserved U3 snoRNA-binding domain of PHAX (phosphorylated adaptor for RNA export) whose function is to transport U3 snoRNA from the nucleus after transcription [1]. It is characterised by having two pairs of adjacent glycines, as GGx12GG. 25.00 25.00 27.60 25.00 22.50 24.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.73 0.72 -4.29 17 161 2009-01-15 18:05:59 2007-08-07 16:04:49 4 16 122 5 121 157 4 81.30 37 19.68 CHANGED clsscluppLpE.+s-LlhRllpslG.phshpLhpEThpIppsGGhhsssGsR+RTsGGVFhtLlKp...p.plsc-phctIatc-+cpp .............................lss-lu..h+LpE...Kp...cLltRlVphlG.c...tsl-LhpE.....Ttc.lEpsGG.....hhl....hsG......s...R...RRTPGGVFhpLlKp...p.plspcph+pIa..-pph.................. 0 41 63 97 +10091 PF10259 Rogdi_lz Rogdi leucine zipper containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3992) Family This is a family of conserved proteins which have been suggested as containing leucine-zipper domains. A leucine zipper domain is a region of 30 amino acids with leucines repeating every seven or eight residues; these proteins do have many such leucines. The protein in Drosophila comes from the gene ROGDI. 29.40 29.40 30.00 29.90 24.50 28.20 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.75 0.70 -5.29 24 229 2009-09-11 08:31:40 2007-08-07 16:05:10 4 4 203 0 164 223 0 258.20 25 80.33 CHANGED ElcWLlcp.lpssLsplh-sLpcCsthLh..............tt.s.phsLSos.........ps-...plKGllTRsGstIsphslpl+h.................schsp...hhhph.s.sps.hhLpQlpsspstlspulsllsshpths......................phpstsplhp.lptlhpplppu+ptLp..........hPscss....hhh.tpssssp.........................FsPsLssp..............lulclhIspscLslpl+sLctlp.......t.hst....sshshhsplh.th...pp......................................................................................tsh-.hscshsa..ssps.................................................VhhpcKhpVposDPhLhushsKLsultphlpphhssLss ..................................................................................................EhpWllpp.lpssLtpLpchLp-....Ctthhs..............................................s.tptpphsloss.....................................ps-.pl...KGhlTh.Gstlspu...clpl+h......................s+tsp.....hphs..hp.sp..htLpQlpsspsplspulp.llsshptsh.......................................................................................phpsusplhphlstlhtpL.pcu+spLp.............hPts.s.....h..htsst.sp...................................................................FsPsLPss.............................lslphhlppscLsltlhpLcshp........................ss.h.sh...........tt.........................................................................................c..sthhpa...tsp.......................................................................l.lpchhcVps...P.L.sh.shhss..t.htphhttlt............................................................ 0 40 73 126 +10092 PF10260 SAYSvFN Uncharacterized conserved domain (SAYSvFN) KOGs, Finn RD, Coggill PC anon KOGs (KOG3249) Domain This domain of approximately 75 residues contains a highly conserved SATSv/iFN motif. The function is unknown but the domain is conserved from plants to humans. 25.00 25.00 28.10 27.60 19.40 18.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.42 0.72 -4.21 9 137 2009-01-15 18:05:59 2007-08-07 16:05:22 4 5 114 0 96 133 0 70.90 41 37.18 CHANGED lhlllWllhhsluhchs.FGslFllhShhhhIahNhtp...RppGEhSAYSlFNcsscpl.GThsAEph-R-lt .............h.hhllWllLhslhh..c.l-..FGhlahllShFhhl..ah.shcs.......t.+cpG........E..hSAYSVFN.sCcsl.GTLsAEphER-lp.......... 0 35 50 77 +10093 PF10261 Scs3p Inositol phospholipid synthesis and fat-storage-inducing TM KOGs, Finn RD, Coggill PC anon KOGs (KOG3750) Family This is a family of transmembrane proteins which are variously annotated as possibly being inositol phospholipid synthesis protein [1] and fat-storage-inducing. The members are conserved from yeasts to humans and are localised to the endoplasmic reticulum where they are involved in triglyceride lipid droplet formation [2]. 20.50 20.50 21.30 21.70 20.40 19.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.97 0.70 -5.33 40 321 2009-01-15 18:05:59 2007-08-07 16:05:44 4 6 218 0 223 312 0 213.20 27 67.70 CHANGED .tshsY...ass+cNllNhhFVKpG.WhWTolhhhhh..lhhh.............ststtps...st.phh...............................hptlhRallsTlhWhlhTp...hh.h.slhDplhshTGGpCphssst...............................................................................................................houtsC+ptGGp...........................................WpGGaDlSGHsFLLshsoLhLhpE.............................h..htphhtsh.th.t......................................hhphhhphshhlsssLlslWhahLlhTslY..FHohhEKlsGhlhGhlshhll.Y ....................................................................sYhup+pNlhNh.hFVKhu.WhWTshhhh.h..lhhh........................shhhstt.....................................hhptlh+hslsThhWhhhTp.............lhstl.hhTG.pC..s.h...................................................................................................................t...hhoptpC+tt.GG.h...........................................Wp.............GaDlSGHsFlLshsshhlhpE..................................hth..htph.tt.t.....................................................................hphhhphhhhhhshlhslWhahLlhTsl.Y..FHshhEKl...hGhlhuhhshhhhY.......................................................... 3 58 101 169 +10094 PF10262 Rdx SelT; Rdx family KOGs, Finn RD, Coggill PC, anon KOGs (KOG3286) & COG3526 Family This entry is an approximately 100 residue region of selenoprotein-T, conserved from plants to humans. The protein binds to UDP-glucose:glycoprotein glucosyltransferase (UGTR), the endoplasmic reticulum (ER)-resident protein, which is known to be involved in the quality control of protein folding [1]. Selenium (Se) plays an essential role in cell survival and most of the effects of Se are probably mediated by selenoproteins, including selenoprotein T. However, despite its binding to UGTR and that its mRNA is up-regulated in extended asphyxia, the function of the protein and hence of this region of it is unknown [2]. Selenoprotein W contains selenium as selenocysteine in the primary protein structure and levels of this selenoprotein are affected by selenium [3]. 22.10 22.10 22.10 22.20 21.90 22.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.81 0.72 -3.95 82 990 2009-01-15 18:05:59 2007-08-07 16:06:05 4 6 744 31 497 872 69 87.60 31 61.58 CHANGED sclpIpY........CspCpahhR............ushhsQ-LLpoF.ss.......t..clsl.....................................................................hs....us.sGs..F-ltls.....sp.....................llas+hpsu.....................GFPsscpLhphl+st ..........................................................................................plpI.Y...........C..tp....Cp.ahh+.................ushhupcLh.psFss............tl.t..pV..sL............................................................................pP.....so..uGs.FElpls................sp.......................................hlW...s+pp.sG.....................GFP-sc.tLpphl+s.h............................................................................ 0 167 270 391 +10095 PF10263 SprT-like SprT-like family Finn RD, Coggill PC, Bateman A anon KOG3931, COG3091 Domain This family represents a domain found in eukaryotes and prokaryotes. The domain contains a characteristic motif of the zinc metallopeptidases. This family includes the bacterial SprT protein. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.55 0.71 -4.57 148 2631 2012-10-03 04:41:15 2007-08-07 16:06:41 4 16 2330 0 664 2121 615 152.10 24 66.24 CHANGED lpphhpphspph.aps..htt.........plpas...........chppsAGp........shhppp..................................................................................pIcls.hllpp......spp..tlhp......................slhHElsHhhhahh..........tcspsHGs.ca+thhpplss.....................................h..phhcpass.p........................pahapC............................sCst.h..tpptpl..cppp.....................................................................atCt.......................pCp.uplthhp ................................................................................h..thhtphs.phFtp...hp..................pltas..........................................pht.o.ouGp..................hh..hcst.....................................................................................cIc.lNshlhpc................spp....hltp......................llhHELsH.hhlaht..............t+st.H.sc...-...........a+thhppVhu............................................................h.sp.hhcpath.p...........................tpa..YpC.....................................pC...p.................+p....tp....l....p.hpp.........................................................................................apCs......................pCt.tpLh...h.................................................................................................................................................................................... 0 194 353 536 +10096 PF10264 Stork_head Winged helix Storkhead-box1 domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3897) Family This is the conserved N-terminal winged helix domain of Storkhead-box1 protein which is likely to be a DNA binding domain. In humans the full-length protein controls polyploidization of extravillus trophoblast and is implicated in pre-eclampsia. 21.20 21.20 22.50 21.40 21.00 19.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.43 0.72 -3.80 8 151 2009-01-15 18:05:59 2007-08-07 16:06:58 4 4 78 0 92 140 0 78.10 53 9.84 CHANGED MoPIsQSQFIPLuElLCssISDhNusphsVTQEsLhp+LpppaPGhshPSp-lLYsoLusLl+ERKIYpTucGYFIVTPQ ........MoPIuQSQFlPLuElLChAISsMNuAppsVTQEuLhE+LsppaP.G..ls..sPSpElLhpTLspLl+ERKIY.Ts-GYFIVTPQ................ 0 20 29 55 +10097 PF10265 DUF2217 Uncharacterized conserved protein (DUF2217) KOGs, Finn RD, Coggill PC anon KOGs (KOG3831) Family This is a family of conserved proteins of from 500 - 600 residues found from worms to humans. Its function is not known. 25.00 25.00 36.90 36.70 24.70 24.00 hmmbuild -o /dev/null HMM SEED 515 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.62 0.70 -5.74 11 211 2009-01-15 18:05:59 2007-08-07 16:07:11 4 6 90 0 112 184 0 382.80 38 89.17 CHANGED hshushstl+los.utK+llhusAhGsluLhhhA+pL+RR+t++c.......t...hh..........op+s.ut+tspsoupR.p......sssscsNDshSsluSttsuKpSuSupSluShpshpSsussuss..ussW-sts..p-sh...sssssosEsLYlMGMELFEEslppWEpALsh+pctst..u.hs.................sspst-Fsc+LEsLLptAYpLQE-hphhh...cssshh...............sD....c+shthshhststshptscssSlsSsDSFhSAsElh....-ph-hp..shsh.shc.tshYEEALpLsc-GcVsCRoLRTEhLtChoDsDFLAKLHCVRQAFpsLLpDcsNphFlu-sGRQlLouLls+AcKsPKcFL-uYE-MLpalpps-sWssschELEuRGVpsMsFYDIVLDFIlMDAFEDLEsPPsSVhAVlpNRWLSsSFKETALsTusWSVLKuK+phLKsP....sGFhuHFYsloEplSPllAaGFLGP+ppLp-lCsaFKcQlltFL+DlFDh-KVRYoolEpLAEDILplh+RRs-llhsYh .........................................................................................................h.......h..s..sh+hlhhshshush...h...hhup.h+R+t..................................s.h....h.t.hstpp...................ss.ppss.p..hsh.hs.u...u+..suss........tShh......uh............h.S.t.ss.s...s.....hpt.s...t..h.......h..hssppL..hh...GMEhhEcslppWEpALsht.pp...t.t....s............................................................................................p..ptcFhcclpsLLptAYpLQ.Epht...h...h....ss.hh.......................D....cps......hshtt...s...t....ht..t.sp..shsSt-SFh..SAh....E.h....-.hpht..t...h.....p...hshYppAhphsc-spl.sR......sl............R...o-hhtC.uDt-aLAKLHClRpAaphlhp-....tsp.ahscsG+phlssLhhhup+sPKtF..sa--Ml.ah.pspp...a..sphp.ELtsRG......VhshsFaDllLDFILhDuF-DL-sPPsSl.sVlpNRWLssuFKETAlsouhWSl....L.K..tK+p.hh.hs....sGFhuHFYslsEplSPlhs.aGFLGP..p...pL.-lCthFK................pQll.aLtDhFshppsRaooh.thucDlhph..pRsp.l............................................... 0 32 39 75 +10098 PF10266 Strumpellin Hereditary spastic paraplegia protein strumpellin KOGs, Finn RD, Coggill PC anon KOGs (KOG3666) Family This is a family of proteins conserved from plants to humans, in which two closely situated point mutations in the human protein lead to the condition of hereditary spastic paraplegia. Strumpellin contains one known domain called a spectrin repeat that consists of three alpha-helices of a characteristic length wrapped in a left-handed coiled coil. The spectrin proteins have multiple copies of this repeat, which can then form multimers in the cell. Spectrin associates with the cell membrane via spectrin repeats in the ankyrin protein. The spectrin repeat is a structural platform for cytoskeletal protein assemblies. 25.00 25.00 45.20 32.30 21.50 23.40 hmmbuild -o /dev/null HMM SEED 1081 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.63 0.70 -7.15 12 186 2009-01-15 18:05:59 2007-08-07 16:07:20 4 5 115 0 130 181 15 759.30 41 92.15 CHANGED soIIAElLRLupaIPp.F...s+t-ppcYucIl.hDFpYFptt-thEscIpps.cL.pLDE-h+poal.llpRFatlFcSlacYhhDLppal-ElppGhalppTlEolLpst-GKQLlCEulYLYGVMLLlhDhpIsGhlRERlLVoYhRY.....+usu-.sNhs-VscLhRuTGYsssss.s............................sYP.pYFsRl.lscphIshlIGRlRoDDlYp.ht.YP.P-HRSsALusQuuhLYVhLaFsPplLpspsuhMREIVDKaFsDNWlIsaYhGhTVsLhsAWcsFcAA+sAlpNTls.pslp.hhp+hppphsplhsphpplLpEGhLsEphlLDslp.plhshlR-sNVslRWlhLH.......................spppt+pspphl.h..phspcplhpLLLsouQhEhpl+phhpslLppKpp+Wpcs+cpusp+hpcLuchFSGs+sLscsppsppLppWFtplupcIppLshs-s..stuuRpI.pLlpALEpVp-FHQlssNlQVKQalt-TRphLppMh+hlNIc-csLhplthluDhSYAWclls...sasshMQppI+ppPphslplRusFlKLuShL-lPhlRIsQupS.........D......L.oVScYYSsELVsFlRcVLpIlPpohFslLtpIIpl.TNsl+EhPo+L-KcchK-aAQh-pRtpluchTtsIuhaopGILAMcoTLVGlIclDP+QLLEDGIRKELVppIscthapsLlFs..........t....ptpssphppcLspLupplcGhRpSFEYIQDYVNlpGL+IWp....EEhsRIlpapVEpECNuFl+pK.l.t..WpSpYQSp....IPIPpa.sh....stpuhsFlGRLspcllplT-P+solals.hsuWa-hcs.hpEVlus+.hauplpcslGshGLsuLD+Lhuahls+cLpphlpphpttl...-pthhsslpsltspLp....stsshscp..shchYtphhpphpthh......................splhthlhplGQhQllRppIutcLphpsKlcSspL.ssLcshNcAlLtD.lppHh...........ppspopPhPsp....llsslssaLpssGlp-PhpKlY..........................................hsscshPph.slhLhlhsLtpl..s+hpascplsshls.pctps....lDspsLhlGlhslL+QFpspppphaLsaluQal ......................................................................psllAElhRLtphlP..a.........................ttt..cat.ll.hDFpYhp..-.h-t..plptp.tL.tl-cchhps...hlpRFa.hFpulhtYh.-l...palp-lt..p...Gh...alp.oh-slh.s.cu.pQLh......sEshaLaGshLLlhD.ph.G.lRERhlluahRh...................p..pss...sphcclstL..h+sTuh........................................................................s.........pYPpt...aFtRh.hsp...hlphllupl+sDD..lYpt....a.P.Pp.HR.SsALu.QuuhLaVhLaFts.phLpsp.shMREIVD+aFsDNW..l................lshahGhslsL.........t........WtsacAApsAlttslp.t.tlp...htp...t.....tp...p......hpthlpcG.....lppphl.....lp.ph..plh.hhRpsNsslRWhhLH...........................tt.pphtp.l.t.....thp..t.lhpL.LLpsuphEh.l+phhtphLtp+ptpWtt.+pps.pph.cLuphFuG.hs.L.sp..pstpL.tWF.phupp.........l.pLphtp....ttsuRhh.plhpALcpVppacplptshplpphlt-opt.LppMh+hlslp-chhhphphlsDhSaAWtllp...tah.hhQptI+.pPt.shtl+uhFlKhu......Sh.......L-lPlhRlsQspS.........D......L.sVSpaYSs-LssalR.pVLpllPpohFt.L.plh.l.sp.hhc.hPs+lpKcc................l+cauQht.Rhpl.AphTttluhaopGlLhMcpThlGllc.lDP+pLLEDGIRKELVppls.thpt...L.Ft.....................................t.t.h..pLtthut.hpuh+pSFcYlpDYlpl.GL+la.....EEhpRIlphtlptEsp..sFhppp...h.....p..app........hshP.h..........t.u....sFhG+LhppllthTs.sp..ohahp.h.sWa.........p.ps..........p-lh.s.p.hhs.ltpslu..GlsuL-plhshhlsppLp.h.lt..htt.l......h.thhtth.ttlt..................s.....th..tt.....phY.thhtthtt.h.......................s.hhphlh..plGphQllRp.Is.pLp.ss+hcup.L..sslpshNpull.p.ltt................................................................t...shstt......hl.plpthLphsGh.pPhpplY..........................................lp.s.p....h....h..shh.hlhhlspl..s+h.astp....hs......hh..tt..ts...hD..shhhGhhslLpQFp......hlthhs.h................................................. 0 57 72 102 +10099 PF10267 Tmemb_cc2 Tmcc1; Transmemb_cc2; Tmemcc2; Predicted transmembrane and coiled-coil 2 protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3850) Family This family of transmembrane coiled-coil containing proteins is conserved from worms to humans. Its function is unknown. 23.30 23.30 23.30 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.27 0.70 -5.88 15 351 2009-01-15 18:05:59 2007-08-07 16:07:43 4 5 107 0 168 289 1 324.30 46 73.96 CHANGED +s+tsh-pLpQKILcloEQl+lEQsARD-NVsEYLKLsss........ADKQQsuRIKQVFEKKNQKSApoIuQLpKKL-pYH++LKElEpss...........tppP+-sl+-hppuLK-sssp.p..........httl+s.hsGhS........hVhSKPREFAsLIRNKFGSADNIsphcs...............................s..Lc....shtsEp..ss+uLuuuu.ohsspsKYsSD-..-CSSsouu.Sssususps........................psstpsspsslstlhEELpEI+csQupLc-sh-sLKsphp+DashlspoLQEERYRtERLEEQLNDLTELHQNEhsNLKQ-LAohEEKlAYQSYERARDIpEslEsCQTRISKLE..pppQQQslQLEuh-p.pAR..sLLsKhINllLuLhsVlLVhVSTlAshssPLh+oRh+lhsTllhlhllshhW+pWcpl .........................................................................................+.shspLpQKILKloEQI+lEQpuRD........sNVuEYLKLsss........A.DKQQ.suRIKQVFEKKNQKSApoIuQLpKKL-pY+++L+ElEpsG............cpsK-sh.+DhppuL+-stup................s+suhsuhS.s......................shVhsKsREhAs.LI..RN.KFG.SADNIspL+s.....................................................................s..h-....ph.s-t.......usps.hus.u..sh.s..psKYsS--...-C.SSsouu.Sss.us...uspu......................................tshh..tttthphlhcEl.pEl+cspspLp-shEsLKtp...hp+-.h...s....hh.psLQEERaRhER.LE-QlNDLT-LHQpEhhNLKQcLAshEEKlsYQuhERuRDIp.EslEsC.TRloKhE.......ppQQsl.........Qh.-shp.....A..+....sLLsKhINl...lL...slh.sVlLVhV.SThAphh.hPhh+o..........R.......+hhsThhhlhhlhhhh+pWtt.h............................................................................ 0 31 46 87 +10100 PF10268 Tmemb_161AB Tmem161AB; Transmemb_161AB; Tmem161AB; Predicted transmembrane protein 161AB KOGs, Finn RD, Coggill PC anon KOGs (KOG3978) Family Transmemb_161AB is a family of conserved proteins found from worms to humans. Members are putative transmembrane proteins but otherwise the function is not known. 18.30 18.30 18.60 18.50 17.10 18.20 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.81 0.70 -5.98 8 239 2009-01-15 18:05:59 2007-08-07 16:08:00 4 7 107 0 142 235 2 324.50 35 90.47 CHANGED AllGlQLVVTLlhsSlhQKluPHaSFARWLLCNGSLhRYpHPTE-ELRsLAG..KQK..sK.u++-R+tNG....ssEsKPholPKDIDlcL-TpPlsshDsLsLRaFsEYQWLlDFuVYuslVYLlTElYahhhssscEhNISllWCLLVlhFulKlLhoLTsaYFpSEEG.GERSlCloFuFhaLLlAMlVLlVsEchLEhGLEsG...............asShssshpsFLcpQGLpsuu........PhoKLshKlhLAVhCuhLGAhLTFPGLRLAQMHLDALphspD+.hlQhLLHlSFLsPlllVlLWlKPIsRDaLpssshG+.SssLhSsssF-TLRLWlIllLCVLRhslhRhHLQAYLNLApcpV-QMKKEAGRIoslElQ++luRlFhYLCVVuLQYlAPllLsLahTLhLKTLGsaSW.GlhsEs...s..sh.ssscsuPlss....................t-s-t......phpsTstplpsuhuSL+slFTPllaRGlhuFLTWWlusC.FsoSLFGlaYHQYLstu .................................hhhh.hhp+h...hhshhchllsps.Lhha.hPo..-ppLp.hss..........p.........p.tt.+p.p........pt............t..hplP+.sh....hpL.pp..l..h..Dhl..h..ha.p..h.ahh.h..h.s...hhlhhh...s.th.h..hh...................tp.....h..Nls..h.h.a.hhhsh.................a.s.h..p..h...l...hhp......................ps..hEpshhlshshh.hhlhuhhh..hhtcphh-hslp.u...............hs.shspsh...hhpppGh...t.....................P.h.sc.l......hh+h..hL.Ah.hs.uhlGuhhsF.PulRhAphah.......D....u.......lp.......h.....p.t.p.hhp......h..lLphsal.PhhhlhLWh..................+Pls+p.hlh...............................ht..t...........................l.......h....s......t.apphRlhhllhhshh+hhhh....hLQuYLshA.tth.p.+p.cuGpltshp....hpp.hl...hahYlsllsLQahs.Phlhh.l..hs.hhh.p....t....h.s...t...hs.h......................................................................................................................................................s.................................................................................................... 0 45 63 105 +10101 PF10269 Tmemb_185A Tmem185A; Transmembrane Fragile-X-F protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3879) Family This is a family of conserved transmembrane proteins that appear in humans to be expressed from a region upstream of the FragileXF site and to be intimately linked with the Fragile-X syndrome. Absence of TMEM185A does not necessarily lead to developmental delay, but might in combination with other, yet unknown, factors. Otherwise, the lack of the TMEM185A protein is either disposable (redundant) or its function can be complemented by the highly similar chromosome 2 retro-pseudogene product, TMEM185B [1]. 22.90 22.90 22.90 22.90 22.70 22.80 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -12.19 0.70 -4.76 13 397 2009-09-13 05:38:14 2007-08-07 16:08:33 4 14 158 0 224 366 7 149.00 22 56.90 CHANGED L+LDshlsWSaWhVFsPlW.haphllhhGuhlssss...........tspscahAhllulshpLlLlsFplLlC.pL.......tssshsWplVFlPL...a.hhpshulhthlhsh+a..............DcuhphphhahhshlphlF...........lhLKL......DsllshsWhlVFlPhaIshshthlhs................lahlhhsl.hhscs.plhsspccpphts.ulss......hhhslPhlsFpllLst+L-sss........plshhslFsPLhls ...............................................................................................................................................................................................................................................................hl.phlh.............lsL+L......-.p.hh.....h.....sWhh.l.........a.lPha.....lh.s..hhhl..hh.........................................hh..hh.h........................................................................................................................................................................ 1 74 105 163 +10102 PF10270 MMgT Tmem32; Tmemb_32; Membrane magnesium transporter KOGs, Finn RD, Coggill P anon KOGs (KOG3918) Family This entry represents a novel family of membrane magnesium transporters (MMgT) [1]. The proteins, MMgT1 and MMgT2, are localised to the Golgi complex and post-Golgi vesicles, including the early endosomes, suggesting that they may provide regulated pathways for Mg(2+) transport in the Golgi and post-Golgi organelles of epithelium-derived cells [1]. 21.20 21.20 21.30 22.00 21.00 20.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.34 0.72 -3.61 35 278 2009-09-21 12:36:32 2007-08-07 16:08:55 4 8 246 0 200 267 1 99.40 28 72.23 CHANGED lhhlGhlhLhHuuYSuh..pa+phh+h.......sp......shssLPhDIllEsllulllhhhGllhsstph+.lph...............htclp....................pssaspl...............psRsuFhshpp+p+tht ..................h.hhlGhlhLhHAuaSuh..pa+shh+h....................spp......phpsLPlD.IllEslluhllshhGllhss.sch+slph.................ssclp...........................ppsassl...............pscsuFhshp++t+.h.................................. 0 59 106 163 +10103 PF10271 Tmp39 Putative transmembrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3828) Family This is a family of conserved proteins found from worms to humans. They are putative transmembrane proteins but the function is unknown. 19.70 19.70 22.50 21.90 19.50 19.50 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.59 0.70 -5.82 11 215 2009-01-15 18:05:59 2007-08-07 16:09:16 4 3 90 0 111 180 0 321.40 44 87.45 CHANGED uSuhGhSsPPlss..T...l+Hs.IP-lshs.upLlFEhhlFhapllALhlQYlNIY+TVWWhP.Sa..s+pSLNFHLIDapLhsFIslhLuRRLlasl......lpcsspuspsohhpplhhll....s+hslLThsshoLshohlpLF+saShlsLLFLsYPFshYlslhshph-.p.st...t..................ushhc.sp-ah.hLRchh+pp...ls.t..shPsHuCs......SPshIRsEV-tLKsDFNtRhKcVLFsSlLoAYYVuFlPshFVc..........................ss.YYDhhWoCpphlhVhluuhlhLhsaLLPs+YCDLLHRAAsHLGpWQ+l-......stssssspHsWS-pslWPQGVLV+H.s+slYKAlGhaNV....AlPu-sSHhRFahhFppPLRllNlLhslEsulllYQLaSLh.poccWppslSluLlLhsNYashFKLLRDRllLu+sYuh .................................................................................................................................................................hss..s......pHs.hP-lshp.upllFEh.hFhh.llALhlpYlNIY+TVWWhP.sa..sppulNFaLID..lhhhhhlhL.uRRhlhsl.............l.ps...pts...t...ph.p.hhhhh.......h+hslh.shsthsL....s.h...shhpLapsaohlpLLaLsYP.hshYl.hhthp.-.....................................................................................ph..hlhp.....p.hcp....h.....shPsHsC.sh.....sPs.IRpEVEhL+hDFNhRhKclLhsS..........hhoAYYsuFlPhhFV...........................ss.a.aDh.hWus.phhlhV.lss.shhht.L.hP.sp...YsDl....L.H+uAhHLGpWp+l-.............s.hsss..tp.WoctshaspGslV+H.spslY+AhG..hsl....AhPussSHh....RFa.............hhF......ppPhhll..shLhhlpsullhhQlh.Lh.topcWpphlShullhFsNYhshFKLhRDhllht+hYp.............................. 0 28 37 73 +10104 PF10272 Tmpp129 Putative transmembrane protein precursor KOGs, Finn RD, Coggill PC anon KOGs (KOG3899) Family This is a family of proteins conserved from worms to humans. The proteins are purported to be transmembrane protein-precursors but the function is unknown. 21.80 21.80 22.30 23.20 20.90 21.70 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.12 0.70 -5.73 10 123 2009-01-15 18:05:59 2007-08-07 16:09:26 4 4 92 0 81 123 1 279.90 38 90.95 CHANGED hChVFPPsEFhS.AGLTVpsLhStaLGSEDhuFVpYHlRRTo...ssLLsHShLPhGY....althshhAsppshhpstpss-........sWphhhhh.....uVllsllsuslsaYWSp+sWspHPlsKsLuhauhspu...sW+AVASuINTEFRR.-KFspthsu.o+VlVT-oWllKsTsYplclApQpDl+L..........................oVscS+pH-lo..sDsspslQhlsIpVsshsPt.lcPFsIRLNus-Yt-LpE+LcsPIpssuNVslHpols-cFlEsF+upVctNssaphs...sspEhEPCluCMQspssIKltKpC.sp-p....pG.......cCQsCYCRPMWCloChuKWFASRQ..Dppc.-TWLuu+ssCPTCRA+FCIhDVChl ................................................................................................shlasPpEF.t.hGhTlpplh..utaLGpEphsFl.aHl+Ros...hslhhHohLPhuY....hhthp.hhstpp..h........t..............aphhhhh.....ulh.h.shhsshhha..h.Wp..ppWtpHPls...+sLthas..s..t...................sapsVAssINsEaRch.D+aththsu.u......pllsT-sWlhKsosY..plphApQpDspL..........................sVscucpaplo...-.....ss...slQh....lsIpVt..s.........hpst..lpsFsI..Rlsuh-atpLp-+lptPIt.s.tsl.h+.pol.-hFl-sFtp.VthNshaphs.........................t...ph-.ChuChp..ttsslKl.K.C.p.st........tG.........pCppCaCRPMWC.......lpChu+WFAuRQ..s..c.....-.hWLtu+ssCPhCRupFClLDVshl................................. 0 34 39 63 +10105 PF10273 WGG Pre-rRNA-processing protein TSR2 KOGs, Finn RD, Coggill PC anon KOGs (KOG4032) Family This entry represents the central conserved section of a family of proteins described as pre-rRNA-processing protein TSR2. The region has a distinctive WGG motif but the function is unknown. 21.90 21.90 22.10 22.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.56 0.72 -3.89 34 320 2009-01-15 18:05:59 2007-08-07 16:09:44 4 7 273 0 236 319 3 83.20 33 40.01 CHANGED hFppulshllppWsuLplAVpNs.WGGscSp-Kp-hlsstlh-hFss....................................spsh-..tp-lE-hLhphMt-EFss.slEDsSshpVApt .........FptulphhlppWsuL.plAV-Ns.WGGs.....p..S..pcK...t-alss.slh-.hFtp..................................................sp.s.sc..hp-lE-hLhphMssEFcs..lEDsSh.pVAp......................................................... 0 78 130 195 +10106 PF10274 ParcG Parkin co-regulated protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3961) Family This family of proteins is transcribed anti-sense along the DNA to the Parkin gene product and the two appear to be transcribed under the same promoter. The protein has predicted alpha-helical and beta-sheet domains which suggest its function is in the ubiquitin/proteasome system [1]. Mutations in parkin are the genetic cause of early-onset and autosomal recessive juvenile parkinsonism. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.27 0.71 -4.38 21 338 2012-10-11 20:01:01 2007-08-07 16:13:17 4 16 140 0 236 318 11 152.70 38 43.48 CHANGED opFRthYsRGDlPlplpHuust....pc..ltW..+lsscpLDYc...hYLPlFF-GLpEpcaPYpFlAppGsh-LLpp............utpKIlPllPpLIlPlKsALsT+c.clhpssLclLQpLVhsushlGtALVPaYRQLLPlhNhapt..+p......hNhGDtlca.....cpppsluDlIp-TLchLEcpGGsDAaINIKYMlPTYpS ......................otFtthYp+GshPhtl.assht......pp..ltW...........cs......c..pLs...ac...hhLPlFh-GLpEhppPYp..FhAtpGhp-hLtt............uspK.llPllPpLIhPlKs...............ALsp+s.plhptsLpsLppL.........u...p..........hlG.tALlPa..aRQl....Lshhshhhs...hp....................................s...u.p.h........ptpppht-lIppTLphhE.hG.........G...s...uhh.IK.hlPTYpS............................................ 0 104 134 186 +10107 PF10275 Peptidase_C65 Otubain; Peptidase C65 Otubain KOGs, Finn RD, Coggill PC anon KOGs (KOG3991) Family This family of proteins conserved from plants to humans is a highly specific ubiquitin iso-peptidase that removes ubiquitin from proteins. The modification of cellular proteins by ubiquitin (Ub) is an important event that underlies protein stability and function in eukaryote being a dynamic and reversible process. Otubain carries several key conserved domains: (i) the OTU (ovarian tumour domain) in which there is an active cysteine protease triad (ii) a nuclear localisation signal, (iii) a Ub interaction motif (UIM)-like motif phi-xx-A-xxxs-xx-Ac (where phi indicates an aromatic amino acid, x indicates any amino acid and Ac indicates an acidic amino acid), (iv) a Ub-associated (UBA)-like domain and (v) the LxxLL motif. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.95 0.70 -5.02 33 450 2012-10-10 12:56:15 2007-08-07 16:16:48 4 8 244 24 284 557 7 211.80 28 59.13 CHANGED pttsphPh.luph....tshssLtcEYspt.sssahpKh.........ptLppp..YsthRpsRGDGNCFaRAhsFuYlEhLLppps......chsc.hhpplp..phpppLh.....thGhschhhcDFh-thhpllcplpstspts.......pllptasctstSs..........llhahR..hlsSualpppsctapsFl.t.........holcpaCppplEshtpEuDcltItALupuL....slslcVhYlD..............................ps....ssspsspash..................p....................ttshI............hLLYRPGHYDILY. ............................................................................h.lut..........l.t-a.t....s.......hh.tKh..................ptLtpp....ashhRps+sDGNCFaRAhhauahEtLlpptc......................chpc.hhthh.t...p.pptl............................thGa...tph.h..h.csF.hpthhpllppl...tpttp.s.................pLhp.h..Fs.cp.s.h.Ssh.............................llhahR...LlTSualppps..c...h......a.p...Flps...................hslcp..aCp...ppV...-shtp....Es..Dcltl..hALspAL.............slslpl...Y.hD..............................ps...........tssths.a.hs...............................ts................................ststl............hLLY+P.G.HYDILY.................................................................................................. 2 105 150 216 +10108 PF10276 zf-CHCC Zinc-finger domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3456) Domain This is a short zinc-finger domain conserved from fungi to humans. It is Cx8Hx14Cx2C. 21.20 21.20 21.20 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.29 0.72 -4.11 37 906 2012-10-03 19:45:42 2007-08-07 16:22:25 4 3 865 3 386 717 277 40.00 35 42.20 CHANGED ss+plsC-G..........us.....ss......hGHP+Valslsppst...hhCsYCup+ah ...................................h.CsG..........st.....ss......huHP+Valslsc.psp....stCsYCGphah...... 1 115 224 304 +10109 PF10277 Frag1 Frag1/DRAM/Sfk1 family KOGs, Finn RD, Coggill PC, Bateman A, Wood V anon KOGs (KOG3979) & KOGs (KOG4320) & Pfam-B_15139 (release 21.0) Family This family includes Frag1, DRAM and Sfk1 proteins. Frag1 (FGF receptor activating protein 1) is a protein that is conserved from fungi to humans. There are four potential iso-prenylation sites throughout the peptide, viz CILW, CIIW and CIGL. Frag1 is a membrane-spanning protein that is ubiquitously expressed in adult tissues suggesting an important cellular function [1]. Dram is a family of proteins conserved from nematodes to humans with six hydrophobic transmembrane regions and an Endoplasmic Reticulum signal peptide. It is a lysosomal protein that induces macro-autophagy as an effector of p53-mediated death, where p53 is the tumour-suppressor gene that is frequently mutated in cancer. Expression of Dram is stress-induced [2]. This region is also part of a family of small plasma membrane proteins, referred to as Sfk1, that may act together with or upstream of Stt4p to generate normal levels of the essential phospholipid PI4P, thus allowing proper localisation of Stt4p to the actin cytoskeleton [3-4]. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.49 0.70 -4.86 89 980 2012-10-01 21:22:51 2007-08-07 16:52:53 4 16 284 0 645 961 31 204.50 18 57.40 CHANGED shth....lslhtslhshsshlhshhhuhh..p.hhphs...........................hP.ISs..hGshsPcptlaphhhslsuhhthhlhhhtahh...............................................hhthhppt.....................lshhuhhhGllss.lulshluhh......sssptt.slHhhhhhhahshshlahhhpshh.hhht............................................ttphshth+hhhshhshshhlsh.................hhhhhhpphh........................hhshsAhhEWhlshhhhhah.hohhhDFtsh ..........................................................t...hlshhhshhshsshhhs.......hhhuhh..p..hh.p..s...........................................................................hP...l......Ss..hG....shs.P.pphlathhhshsuh.h...........t.hhhhhhhath.....................................................................hht..h...pp.t........................lshhshhhuh.lss...hul.hhlu.h.....................................ppsp......h.....s..........l...H....h.hh.hh..hhssshhahh..hpshl.hhht.............................................tth..hs.hhh+...h...h..hhhht.hh..shlhh...........................................h.h.hh..httt...............................................hhshhAhhEWhlshh.hhah.hohhh-Ft..h...................................................................................................... 0 196 297 503 +10110 PF10278 Med19 Mediator of RNA pol II transcription subunit 19 KOGs, Finn RD, Coggill PC anon KOGs (KOG4043) Family Med19 represents a family of conserved proteins which are members of the multi-protein co-activator Mediator complex. Mediator is required for activation of RNA polymerase II transcription by DNA binding transactivators [1]. 25.00 25.00 26.20 26.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.35 0.71 -4.60 7 119 2009-01-15 18:05:59 2007-08-07 16:53:34 4 5 92 0 79 112 0 161.60 53 58.81 CHANGED uPFYLh+.E.Ps.sELTGspNLlscYsLEpuasKFsG.KKVKEpLSsFLPcLPGhIDhsGo..-NSSLRSlIEKPPIsGpph.PLTus.LsGFRLHsGPLPEQaRhh...+hp.Pp+KpKpKHKpp+sp-...shs............tpossDo......-+K+KKpK.+-DD.-RKK+KK-KK+KKpR+oP-p.ssG. ...................................................tPFYLh+..E.P...u.p.sELTGuTNLloaYsLEcuY...sK.FCG....KKl.KEcL..S..sFLPcLP..GhIDhPGsp.D.NSSLRSLIEKP.....PI.hG..pshsP.lTushL....s.GFRLH...s.....GP...LPEQ..hRhh.........................chp.Pp+K..p..KpK...HKpp+ppD....sss...................p.ost.Ds.......-.+.K+KKpK...+c-D..-..RK++KK...-..KK+..KKp.+cosc..s...s......................................... 0 25 31 58 +10111 PF10279 Latarcin Latarcin precursor Bateman A anon Bateman A Family This family represents the precursor proteins for a number of short antimicrobial peptides called Latarcins. Latarcins were discovered in the venom of the spider Lachesana tarabaevi [1]. Latarcins are likely to adopt amphipathic alpha-helical structure in the plasma membrane. 25.00 25.00 25.20 33.40 24.90 22.60 hmmbuild --amino -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.80 0.72 -3.79 7 14 2009-01-15 18:05:59 2007-08-08 11:04:13 4 3 1 2 0 16 0 66.10 32 75.41 CHANGED TuYsVsE-LEs.ELD-L-............................ttuh.......................................................................................LtchsEsLc.l-Dh.p.EEARu.hhthhKcchKchhpphhc+h .puYsVsE-l-spEL--hpttuh...............................Lt-hsEpLctlpsh.p.EEARu...t.h+chh+phhtthhph.. 0 0 0 0 +10112 PF10280 Med11 HSPC296_Med11; Mediator complex protein KOGs, Finn RD, Coggill P anon KOGs (KOG4507) Family Mediator is a large, modular protein complex that is conserved from yeast to human and conveys regulatory signals from DNA-binding transcription factors to RNA polymerase II. Not only are the polypeptides conserved but the structural organisation is also largely conserved. One or two subunits are either fungal or vertebral specific but Med11 is one of the subunits that is conserved from fungi to humans [2]. Med11 appears to be necessary for the full and successful assembly of the core head sub-region [4]. 25.20 25.20 25.40 27.30 25.00 25.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.40 0.71 -4.05 25 234 2009-01-15 18:05:59 2007-08-09 09:12:30 4 2 208 15 169 229 0 118.80 24 78.35 CHANGED sp-RLcuLs-IDpcIsplLppAupslppLup.pp........................ts+pthpppsppFhpoLspVsspLR+pIphL--ssss.scptssh....................................................................tsLsls.lsp+psshs+c+LpEhhpp.hcphls .......h.t-RlppLscI-+.....cIsp.lLppAupsltpLuppps.......................................pp..pthcpp...sppFh.pslp.p.l-scLpcQIphLscsshhpscpsssh................................................................................sshcs.s.hsh+p.hphs+p+hs-lhpp.hpph..................................................... 0 42 81 131 +10113 PF10281 Ish1 Putative stress-responsive nuclear envelope protein Wood V, Coggill PC anon Pfam-B_11056 (release 21.0) Repeat This family of proteins found in fungi is a putative stress-responsive nuclear envelope protein Ish1 [1]. 24.10 24.10 24.10 24.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.60 0.72 -3.86 71 642 2009-01-15 18:05:59 2007-08-09 14:52:20 4 9 141 0 466 616 0 37.40 36 30.74 CHANGED acsWos..scL+paLcp....aGl.sspst......s+-cLlpts+csh .....a-sWSc..ocLKpaLcp....pGlssspss......pR-cLlsts+cp............... 0 129 251 391 +10114 PF10282 Lactonase DUF2394; Muc_lac_enz; Lactonase, 7-bladed beta-propeller Wood V, Coggill PC anon Pfam-B_1372 (release 21.0) Family This entry contains bacterial 6-phosphogluconolactonases (6PGL)YbhE-type (EC:3.1.1.31) which hydrolyse 6-phosphogluconolactone to 6-phosphogluconate. The entry also contains the fungal muconate lactonising enzyme carboxy-cis,cis-muconate cyclase (EC:5.5.1.5) and muconate cycloisomerase (EC:5.5.1.1), which convert cis,cis-muconates to muconolactones and vice versa as part of the microbial beta-ketoadipate pathway.\ \ Structures of proteins in this family have revealed a 7-bladed beta-propeller fold [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.00 0.70 -5.37 81 4118 2012-10-05 17:30:43 2007-08-09 15:02:03 4 86 2565 28 1191 3490 508 268.50 22 71.36 CHANGED phalGoYTp..............ttupGIathpl...sspoGpLsshph.ss.....phssPoaLsh..ptssphLYulscts.......spGulsuaplsspp..upLphls......ph.s..tGssPsalulstssphlhsANYt.......uGslsVaslssc.Gslttssp.hhppp......Gs..GPp............pcQcssHsH.sshoPcscallssDLGsDclhhYch...sssst....Lst...hsph.pstsGuGPRHlsFps...s...................sphAYllsELuupVslh..pYs..tsG..ph.pplpslsslPt..s.hsup.....................s...tuAuI+losDG+FLYsSNRG...................psSIulFslstssGpLphlpthsotGph..PRcFsl.sssGcaLlsusQcSss.lslapRDspoGpLs.hsps.htsspPs.................sVhF .........................................................................................................................h...........................ttl..h.h.......t....t..t...t..ht......t...h................t.s.t....h.....h...h...........t..........t...t.h...l.ash..t.....................th..sh.t.ht..............t...hs..................t..................st......s.a.l..s..h...t....t..t.p..h...lh.....s.....u.s...Yt........................tu..h...h.h........htt...u...............................................................................................................s...ts....H..h....h.sP....s....tp..hl.h.s....s....s....L......t................D.p......lhh...ap.h....sttt.............h............h...th.......G.s....G.......PRHh...s.Fps.........s...................................................tph.hYl.h..s..E...l...s.....ss....lslh...........phs.......ps...............ph....p...h...p.p...hs...hhst......s..hsst.................................................p......tsu.s.l..p..l.....o.....s.....D.....G.....+.....a.....L.Y.s..us.Rs............................................tss...l...u..l..a....s......l............t....s...s........s.............p...............L....p.....h...l.....t..h..........h.........s......o.....t.....G..p......................P........R.........s......h.....s...l......s........s......s..............u.....c.....hL.h.....s......ss.p...p..o................s...s....ls..la..p..h..s.....pp.G.....lt.................ts............................................................................................................................................ 0 394 731 968 +10115 PF10283 zf-CCHH Zinc-finger (CX5CX6HX5H) motif Wood V, Coggill PC anon Pfam-B_93850 (release 21.0) Domain This domain is a zinc-finger motif that in humans is part of the APLF, aprataxin- and PNK-like forkead association domain-containing protein. The ZnF is highly conserved both in primary sequence and in the spacing between the putative zinc coordinating residues and is configured CX5CX6HX5H. Many of the proteins containing the APLF-like ZnF are involved in DNA strand break repair and/or contain domains implicated in DNA metabolism. 25.10 25.10 26.40 26.20 25.00 24.50 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -6.99 0.72 -4.10 22 261 2009-01-15 18:05:59 2007-08-09 16:02:15 4 30 96 6 185 282 0 25.50 48 7.07 CHANGED PcC.YGspCYR+NPtHhpcasHssps ...pC.YGspCY..R.KNPtHhpcasHPs....... 0 82 94 142 +10116 PF10284 Luciferase_3H Luciferase helical bundle domain Bateman A anon Bateman A Domain This domain is found associated with the the catalytic domain of dinoflagellate luciferase . Luciferase is involved in catalysing the light emitting reaction in bioluminescence. The structure of this domain has been solved [1]. This domain has a three helix bundle structure that holds four important histidines that are thought to play a role in the pH regulation of the enzyme. 25.00 25.00 26.60 58.10 18.30 16.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.38 0.72 -4.02 7 51 2009-01-15 18:05:59 2007-08-09 16:29:47 4 3 14 1 0 49 0 65.80 70 21.82 CHANGED CEK.GFEsusssKGGALsAtpVE+hG.-sFKsGLHpPpFHs-GLHKPhEuGGKsYETGFHYLLEsHE CEKTGLEuGGsu+GGALNAAQVtHLGc-sFKsGLHKPcacsEGLHKPHThGGKTY-TGFHYLLEAHE 0 0 0 0 +10117 PF10285 Luciferase_cat Luciferase catalytic domain Bateman A anon Bateman A Domain This domain is the catalytic domain of dinoflagellate luciferase . Luciferase is involved in catalysing the light emitting reaction in bioluminescence. The structure of this domain has been solved [1]. The core part of the domain is a 10 stranded beta barrel that is structurally similar to lipocalins and FABP [1]. 25.00 25.00 46.90 46.40 20.40 19.70 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.82 0.70 -5.25 3 71 2009-01-15 18:05:59 2007-08-09 16:40:55 4 4 18 1 0 69 0 174.50 64 66.51 CHANGED PLCKDPYGPEVpuLVEsLL+EApDD+TLCF-NFpcPCPQLTKEQVAhCKGFDYGDKTLKLPCGPLPWPAGLPEPGYVPKTNPLHGRWITVSGGQAAFIKEAIKSGMLGAAEAHKIMADTDHHQTGGMYLRINQaGDVCTVDASVAKFARAKRTWKSGHYFYEPLVSGGNLLGVWVLPEEYRKIGFFWEMESG+CFRIERRAFpsGPYMFLRQATEVAGKISFVFYVKVSNDPGSKPIPLQSRDYTALAGpDNVPDNLGKPYsCTAKDLDYPpKRDuWLDpNKcpMLcQR-lVuoAF ...........PLCtDPautElpslspsLLp-AppD+TLsasNFpDPCPpLTKcQVt........sKthpLPCGsLPWPAGhP.PsYVPKTsPLpGRWITlSGGQttFIKpuIcsGMLGtuEApKIhADTDHcpTGGMaLRINQhG-.CTVDASVAKaARAKRTW+SGHYFYEPLVsGup..GVhsl.EEY+pIG................................................................................................................... 0 0 0 0 +10119 PF10287 DUF2401 Putative TOS1-like glycosyl hydrolase (DUF2401) Wood V, Coggill PC, Bateman A anon Pfam-B_11571 (release 21.0) Domain This family of proteins is conserved in fungi. One member is annotated putatively as OPEL, a house-keeping protein, but this could not be confirmed. It contains 5 highly conserved cysteines two of which form a characteristic CGC sequence motif. It has recently been shown that this family is related to known glycosyl hydrolases [1]. 21.50 21.50 22.70 21.60 21.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.70 0.70 -4.88 39 173 2012-10-02 19:29:29 2007-08-10 10:30:32 4 7 100 0 120 172 22 219.60 41 50.78 CHANGED uusWsRsuYY...osuou-NlTFLNptGu.tuSGsaSssaGsSLSYAsocGsuuASSspsLs.-sh.............lsSspEasIaSsppCsss....sCGaYRpG.IPA.....YHGFuGssKhFlFEFpMPs.sssssu..hshDMPAIWLLNA+IPRThQY....ssssCSCWp....oGCGEhDlFElLsuusc...+LhSplHshQG.ss....ssGuGsssYFpRPTs..uohKuuVlFsusss....lplltl.s-shsFssolsussVpsWl ..................................................usWsRhuYY.ps.ssuos-NloFLNptGu..ttSGsho.sshGsuLS.YA..suDG..s...uuAuospsLs.ssh................lsSspEahIhSsppCsss.....sC.GaaRtG.hsA.....YHGFsGssKhFlFEFpMPp.spssou.........hs.DMPAIWlLNA+IPRThQY....spss..CSCWt........oGCGEhDlFElL....sussp...+hhSphHshQ........thusGspsYFpRPos..uohK.suVlFsusss.....lplltl.s-sh.sFs.s.olssssVsph........................................... 0 25 66 105 +10120 PF10288 DUF2392 Protein of unknown function (DUF2392) Wood V, Coggill PC anon Pfam-B_10085 (release 21.0) Family This is a family of proteins conserved from plants to humans. The function is not known. It carries a characteristic GRG sequence motif. 27.30 27.30 28.40 28.00 27.20 27.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.22 0.72 -3.71 35 268 2009-01-15 18:05:59 2007-08-10 11:46:09 4 8 242 0 195 274 0 104.40 28 23.31 CHANGED LAsclLutlsKGRGuulshpls.sssp......tclphlhPLRDlltpElphYsphttls.h.hs...hhhp................tpphscshoIpcLsppalpsLpps.aPSssSTVlRTusKL ....................LAhchlstlshGRGttlshp.....ss.sspp.......................tslhllhPlRDhhtpElshYsp.ht.s..l...phhhs....hh.t....................................................psscstSIpcLhppalpslppp.aPusluTVhRTusKL.................... 0 57 100 160 +10122 PF10290 DUF2403 Glycine-rich protein domain (DUF2403) Wood V, Coggill P anon Pfam-B_11570 (release 21.0) Family This domain is found in the N-terminal region of members of DUF2401 Pfam:PF10287. The function of this glycine-rich region is unknown. 20.90 20.90 21.70 22.20 20.80 20.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.13 0.72 -3.82 26 164 2009-01-15 18:05:59 2007-08-10 15:40:59 4 7 97 0 113 155 1 64.50 42 14.41 CHANGED scplpas....NVG..aSGoYpsVopMspto....C...sC..spsshpFSGslAPlsEcLSVHFRGPLpLpQFAVYhPs ......................ptl.Ys....NlG..hoGoYpcVosMppts........s.....sC...sppshsa..SGsluPLsE......E..LSlHFRGPlpLhpFusYhs............ 0 23 62 99 +10123 PF10291 muHD SAFF; Muniscin C-terminal mu homology domain Yang H, Coggill P anon Yang H, Pfam-B_7632 (release 21.0) Domain The muniscins are a family of endocytic adaptors that is conserved from yeast to humans.This C-terminal domain is structurally similar to mu homology domains, and is the region of the muniscin proteins involved in the interactions with the endocytic adaptor-scaffold proteins Ede1-eps15. This interaction influences muniscin localisation. The muniscins provide a combined adaptor-membrane-tubulation activity that is important for regulating endocytosis. 25.00 25.00 25.10 25.40 24.60 24.90 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.99 0.70 -5.20 32 429 2012-10-02 01:13:52 2007-08-10 17:13:25 4 5 231 1 244 384 2 258.90 34 33.14 CHANGED .GLsASlsEslsAhF+sGtlsc.............splsGElALsYsus.sss.......s..........tshslRlsNhpplEKVhsNppal..........pppss.......sc...aplshstlsspo.hsu.............hKYpl+hp.......s.sPlhlpssW+hEsppsulhlpYphNs....uhsss.....lsLpNlslsVsl-su..............psTus.o.cPp..usas+EppplsW+hs-.lslssps...t+LlARF.ssssss.........p.usVps+Fphc.ssshpt.......lslshh..............t...P....................s..ap.s.spRplsuG.p..............Yhup ..........................................................hslssuhsEsVsAaF+susso+.............l+ls..GEhsloFPuuhsphhss............sP...........ssLsFR.l.........p.Nhu.p..LE+..lhPNspLlhp........sspss...............sp..........Fhl.NhssLtst.LppsuptpPtu.................................aaNlshLKYQlp....sps................pssPLpLsshW+s.csspTslpl-Y+hNsp......uhsss...............ssLpNlpllVsl..-Gu...............sTshp.u.hPs...AsW..stEpp+lhW+ls................-...lo.pucst....GpLh...ARFphspuss..........pPusl..s....spFssc.....u...so..LSG.........hclpLs......................................................Gssa+h..s.hl+++hssG...+Yhu.s............................................................................................ 0 56 99 180 +10124 PF10292 7TM_GPCR_Srab Srab; Serpentine type 7TM GPCR receptor class ab chemoreceptor Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srab is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. The expression pattern of the srab genes is biologically intriguing. Of the six promoters successfully expressed in transgenic organisms, one was exclusively expressed in the tail phasmid neurons, two were exclusively expressed in a head amphid neuron, and two were expressed both in the head and tail neurons as well as a limited number of other cells [3]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.13 0.70 -5.74 17 186 2012-10-03 04:04:29 2007-08-10 17:28:29 4 4 10 0 183 384 0 249.20 17 86.59 CHANGED ppsCphMtpluoStaL+hoLhhpLllsllulPlhlhu...laphppspLFHhNh+lIhphHhhuhhlHshuRlhLHuhDLaNYhs....hssC-hhPshhRChllRhhYshGhalsssTslsLlIERhlAThpuspYEpptphhGlllshhQlsluhhhhhhhapphsF....sshh.YChshpsu...sh.hplshhlslhsQlluhlhFtaLh+hNc+lRs..tphpuoLSsRYQLEQNlpShpsL+hFushshlFlhhphhhhhhlhhhutphppspYhAllElstshPlYullhhhhlh+p.+c.+schppsLpsphphstsp...YF-papc...pls ..............................................................................................................................h..hhhs.hh.s.h....h.hh....hh......t.h.h..H.N.hphl.h..hhhh......h....h.lh..sh.................h....h...h...............h........h.h....hh.............p.s.p.h.h........t..h...............hC..........h.hhRh.h.hu..........h.h.h.......s.......th.o...hh..h..l.....h.....lERh..............lAThh...pp.Y..E................p.p.t..t....h.h...G...h.hl................s.h...............hp..hh.l.uh.h.h.h.h...h.hh..t...t.h.............t.hh...a....C....h.....................t..s..........h.....h....hh..h.h..h..hh.p..l.hs.h...h.h..h.........h.........h..l.h....t.h.N.p..................p....h....p....................t...sLop........+......aQ......lppNh..............p.shp.h.l...h........h.h..hp......h..hhh..h.....hh...h.h.h..h........h........................................h......................................................s...........h...h..................h......................................................................................................................................................................................... 0 64 90 183 +10125 PF10293 DUF2405 Domain of unknown function (DUF2405) Wood V, Coggill PC anon Pfam-B_12420 (release 21.0) Family This is a conserved region of a family of proteins conserved in fungi. The function is unknown. 20.20 20.20 20.30 20.20 19.90 20.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.75 0.71 -4.49 32 202 2009-01-15 18:05:59 2007-08-13 10:57:13 4 25 165 0 156 211 0 159.50 26 6.23 CHANGED hpschlRlhscuPplcp..hWsRllslpsh+lshc..phcpp....................lslps-sl+lslPHpalhaplhDNlssshKulKQLptph+s.....sssch....lhs+p..pstplP+lsl+octlhaplEDDPFEpcLuhIYclGllEQ+pRLp+.ptFct+spclppsspppt ........................................................................................................h...p.hRhhstsPthpt...hW..scllslpshclshc.tptpts.............................t.hslss-sl+ltlPachhha.clhDN.hlshhKulKpLhapaKs........ss.ch.......lhspt..tPptlP.clsl+s+phhh-lEDDsFEh+LuhIYclGl.EQ+pRLtR.ptFctKlpclpptp...p.......... 1 42 83 139 +10126 PF10294 Methyltransf_16 Putative methyltransferase Wood V, Bateman A anon Pfam-B_19672 (Release 21.0) Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.08 0.71 -4.81 15 2584 2012-10-10 17:06:42 2007-08-13 15:02:31 4 72 442 2 1819 5419 1399 156.00 21 50.49 CHANGED hhcss.LpIpE-sus.ulutplWDAulshshYLtpt..tt............tspphsshslLELGSGsGlVGlulAth....h.ssssVhlTDl--.hh-lhcpNIpl..t...hlssclpscsLcWG-.cL.s-hhssp.hDLILsADClYhEcuhssLlpoLpcLssssss..lLhuaK+R.....+cu-ppFFphlcct ......................................................................ht...........................h...G.h.h.l.W.s....uuh.....hL....u...p.......a.l.p................................................................t.t....h.psp.p.l.l.EL.....G..uG.s.G......L..s..u.ls.suhh.................................ss.p.V..h...h.T..D.......h....s...p........h.....l..p....................l...p...t..........N.l....ph...N....t............................t...s....p.......l.......p............s........t...............t....L..........p.......W............s.......p............................h...............................................p...............h..............................t..............................t...........................h..........D.............l...........l..........l...u...u....D....s....l...........Y.....................p.......h.....h.............Lh...p...s...l...p...t.h...h......t.........t.t..................h.hh.s.........t........................................t........................................................................................ 1 693 1089 1500 +10127 PF10295 DUF2406 Uncharacterised protein (DUF2406) Wood V, Coggill PC anon Pfam-B_13850 (release 21.0) Family This is a family of small proteins conserved in fungi. The function is not known. 21.10 21.10 22.90 53.60 17.60 16.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.61 0.72 -3.46 18 139 2009-01-15 18:05:59 2007-08-14 10:52:48 4 1 105 0 94 127 0 69.80 52 25.28 CHANGED AlpEAQPappAh.......htpppstphpstpc............+DhaGpsIs................................................pPDhSNPTRsR.ERPLDTIRuFEhAIsG .......................................AlpEAQPh.ppAh..................pppht.hps.p+..................+DhaGpsI..s................................................pPDlSNPTRsR.ERPLDTIRuFEhAIsG... 0 13 43 76 +10128 PF10296 DUF2404 Putative integral membrane protein conserved region (DUF2404) Wood V, Coggill PC anon Pfam-B_12178 (release 21.0) Family This domain is conserved from plants to humans. The function is not known. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.82 0.72 -3.85 32 588 2009-01-15 18:05:59 2007-08-14 14:30:48 4 15 262 0 414 579 1 90.30 24 12.53 CHANGED NAllGRlFhshh+opthpshltpKIpKKLs+l+...pPsFl.scltlpclDhGsssPhlosspl.plsscGphhhs.............hslpYpG.......shplplpTphsl ............................Nslluphhhphhpss.h.h.....ps....hlppplpcpLs.clp.....hPsal..sclpls-lshGsssPhl..pss.c.l...h..t....l......s........t....p.G........hhhc...............................h-lpYsG.......shplslpophp........................................... 0 130 232 350 +10129 PF10297 Hap4_Hap_bind Minimal binding motif of Hap4 for binding to Hap2/3/5 Wood V, Coggill P anon Manual Motif In Saccharomyces cerevisiae, the haem-activated protein complex Hap2/3/4/5 plays a major role in the transcription of genes involved in respiration [3]. Hap4_Hap_bind is the essential domain of Hap4 which allows it to associate with Hap2, Hap3 and Hap5 to form the Hap complex [2]. 21.10 21.10 21.50 21.80 20.60 20.40 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.05 0.73 -6.22 0.73 -3.92 11 175 2009-09-10 17:07:33 2007-08-14 17:00:41 4 3 128 0 121 171 0 16.90 77 3.20 CHANGED sSKcWVLPPRPKPGRKP ...TSK-WVlPPRPKPGRKP... 0 29 67 104 +10130 PF10298 WhiA_N WhiA N-terminal LAGLIDADG-like domain Mistry J, Bateman A anon Manual Domain This domain is found at the N terminal of sporulation factor WhiA. This domain is related to the LAGLIDADG Homing endonuclease domain while the C terminal domain of WhiA is predicted to be a DNA binding helix-turn-helix domain [2]. 23.40 23.40 23.40 23.40 23.10 23.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.47 0.72 -4.14 74 1805 2012-10-03 01:41:40 2007-08-14 17:02:56 4 4 1794 3 299 986 93 85.90 33 27.65 CHANGED pssppA...ELuAllRhsGuLpls..spp.lslclpT-ssulARRlapLl+cha.slpsElhVppptpL+K.sshYlVpltps..spplLccl.sll ................t..pt+AELuAllRhsGuLsls.....sp.......p....lslslpTENuslARRlaphlcchY..slpsElhV+p.+hp.L+K..NshYl.VRlspp....spclLp-Ltlh............. 0 110 208 263 +10132 PF10300 DUF3808 Deme6; IML2; Protein of unknown function (DUF3808) Wood V, Coggill PC anon Pfam-B_15386 (release 21.0) Family This is a family of proteins conserved from fungi to humans. Members of this family also carry a TPR_2 domain Pfam:PF07719 at their C-terminus. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.47 0.70 -5.93 20 568 2012-10-11 20:01:01 2007-08-15 15:32:33 4 21 235 0 370 612 25 413.60 24 75.08 CHANGED hpAlsLhLsschp-AhshLc...h...sspShaH.uLGtusltalpAhlTFEscchppAsssL+cAppssp+.Rc+uphhs.............t..h....phht..thctplCYAEshLh+AsLsF.hsEshlshIKGuhKlR+uYphYc-hhphhpth....................................................................................................shtps.........psccalcuGVphGhGlFpLhlSLlPspll+LLphlGFpGsR-hGLptLhc.uup.scsl+usLusLsLLhYashlp.hhuh.............................tphshccs-plLt.hpppaPpuulaLFacuRlptlpGpl-pAlphhcpsh..tsppchKQl+pLChaElhashsappcacpAhphhhhLhcpScWS+AhYsYhpushhshhscp............-ttstt.-ptsplhcplssLtt+hth..KslPhE+FshRKsp+apup..p............shhssPshEhhYhWNGashhuccth..puhhphh.p...............p.pp........D-ps.....lhpLL+GlsL+pL ...............................................................................................................................................uhthhhssphptu.thhp................................tpps.............hh...s........hshuhl.hhp.Ahh.sF..-..................pphp..Ahpthpps...phst.t..pcpsphhp.....................................t...h.......tth.h.....t.hchplshA-s.l.p.Ahlsh...pps..h..hthlKuhhplRpuathh..p..p..hhp.lpt............................................................................................................................................................................................................................p...hhcuGlphshGhhpLhlShl.Psplh+llphlGFp.G...........s+phGLp.L.....h.....p...u...............s...............p..tpsh+usls.slhLLh..aashh..hhsh.........................................................................................thshptscplL...t.hhttaP......p.us...la...hh.puRhthhcsp....lp...pAlphhp.psh.....p.........................p.p......p......h+................Q..........h..p..t...ls.h.a.E.hhhs..hh.hpapt...A...h...h.....h.Lh.p....s.p.W..S..+.......u.hYhYh......tu.shh..hhc.........................................................cph.plhp.ps............ssh........h.......t.+hth........+sh.Ph-pFsh.+Ksp........+a.tpt..............................hhh..shhEhhYhWsuhthhstp.......shh.h............................................................................Dp.s..........lh.lLhuhhh+.................................................................................................. 0 129 181 279 +10134 PF10302 DUF2407 DUF2407 ubiquitin-like domain Wood V, Coggill PC, Bateman A anon Pfam-B_17915 (release 21.0) Domain This is a family of proteins found in fungi. The function is not known. This domain is related to the ubiquitin domain. 25.40 25.40 25.40 25.50 25.30 25.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.50 0.72 -3.80 46 152 2012-10-03 10:59:06 2007-08-15 16:09:52 4 4 135 0 111 189 0 115.20 28 33.54 CHANGED hhlsIRFos......slPD...l.LsIs....s.ss.sTsstLKp...........................................hIRpcl..........sp.......hss+RLRLIa.......sG+h.Ls.-ssslssplphs........................................................ptpupts.p.................+h.YlHCslG-. ...................h.hlsIRFos......ulPD...l.LsIs......sssp..sTsttLKp...........................................hIRppl.......................................s.p.......ssp+RLRLIa.......sG+l.Ls.Dsoslusplph.....................................................................t..p.ptpupss.t....................................phYlHCslG-................................................................................................................. 0 27 58 92 +10135 PF10303 DUF2408 Protein of unknown function (DUF2408) Wood V, Coggill PC anon Pfam-B_16841 (release 21.0) Family This is a family of proteins conserved in fungi. The function is unknown. 25.00 25.00 45.80 25.00 24.10 24.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.89 0.71 -4.04 22 259 2009-01-15 18:05:59 2007-08-15 16:25:05 4 4 128 0 202 255 0 114.90 31 47.36 CHANGED plcplss-LtshapcLlsIRRslhshsopsKhs.....................................ssclppLpppLccl-spR..hDG...cFhus-.....sphlps.QsllsGLLDcChphhpDl.tccs......................pls.phpshYcpLl-I+sp.LEpLhlT+RWTLRE ................................s..-.lotsL.PlappL.ol+RsLhplpppuths.......................................................................spELhshphcLppIDshR....sDG.............KFhsss.............tph..t.QuhlssLLscCaclsp-lptptt.................................................................................................... 0 58 117 174 +10136 PF10304 DUF2411 Domain of unknown function (DUF2411) Wood V, Coggill PC anon Pfam-B_15078 (release 21.0) Domain This is a 38 residue domain that is found in proteins at the extreme C-terminal end of some HEAT repeats Pfam: PF02985. the function of this domain is not known. 20.10 20.10 20.10 20.10 20.00 19.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.46 0.72 -4.31 21 177 2009-01-15 18:05:59 2007-08-15 16:51:16 4 5 166 0 127 191 0 35.70 31 3.74 CHANGED pslpclhcsLcaVtppDs.DslV+tpspssL-pL-sh ..........l.clh+lL+aVtsp-s..Dsll+tHAphsLEpL-s.... 0 28 59 100 +10137 PF10305 Fmp27_SW RNA pol II promoter Fmp27 protein domain Wood V, Coggill PC anon Pfam-B_15444 (release 21.0) Domain Fmp27_SW is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation [1]. It contains characteristic SW and GKG sequence motifs. 25.00 25.00 26.00 27.30 24.90 24.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.00 0.72 -3.59 31 159 2009-01-15 18:05:59 2007-08-15 16:57:59 4 12 125 0 123 169 0 102.10 32 3.74 CHANGED pAhp+LpcphSpSWlp+lcthcphpppphpchpphhaGtcp.htpshs.scsllshsppPsLhshlhc-lDlsls+PpFslcclscFlachGKGhP+spcYSlL ....Ahp+Lpcp.upSWhcRlcpthphppsphcchp..........ph.............hhGssc...sspshp....p..sc..pll..sh.sppPsLhshllp-lclslsKP.S.Fslcc..lPcFlHclGKGhPcDhcYoLL.. 0 30 68 110 +10138 PF10306 FLILHELTA Hypothetical protein FLILHELTA Wood V, Coggill PC anon Pfam-B_18082 (release 21.0) Family This is a family of conserved proteins found in fungi. It contains a characteristic FL(I)LHE(L)TA sequence motif, where the bracketed residues are I, L or V. The function is not known. 25.00 25.00 32.00 30.70 24.20 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.02 0.72 -3.94 17 117 2009-01-15 18:05:59 2007-08-15 17:09:20 4 3 115 0 85 112 0 89.50 34 37.06 CHANGED +lLpRl...P+Fh+.asp.hhsuPhoalsSFLILHElTAIlPLhulWahFap..........................hshhs.htlPsa.......hlscuh............chhcchhp+h.Gathhstpc .................l.sRL...P+Fh+.Ysp.hhsuPl.oalsSFLILHElTAIlPLhuLahhFHh..........................................hsh...hs...hhlssh........hlscGs....................cpht+hhc+h..Gaht.t...ht..................................... 0 19 46 71 +10139 PF10307 DUF2410 Hypothetical protein (DUF2410) Wood V, Coggill PC anon Pfam-B_19378 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known.There are two characteristic sequence motifs, GGWW and TGR. 22.10 22.10 26.30 22.30 21.30 21.20 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.30 0.71 -4.94 20 120 2009-09-10 23:56:53 2007-08-15 17:10:28 4 1 115 0 88 119 6 193.30 40 37.24 CHANGED pSPLPNPpLWsusTIGhLpu.-sFuN.GGWWaDsclLuA..TG-GhthEcs+AWcGWWNEcIVcLlcLShcpKD..uLoVLLTGRuEssFucLIcRh....lcS+pLs...............FDhlCLKPp.sstspcasoThcFKpsFLc-Ll.pTYp.ps-EIRIYEDRl+HlKuFRcFFcphscp.........sRpslss-VIpVs-tspYLsPlsEsApVpchINsHNtsl .............oPhPN...spLWsssslG.h.Lps.pshss.G.G..WWpDsphLuA.....ospuh.ch.c.cs+u.......hcu.............WWN-pIVpLl.chShppKD..........sLo........V...LLTGRsEssFsclIc+h..........lpS+tLp...............FDhlsLKPphs.....................tspcapoTMcFKQpFLccLl.pTYc..pucEI+lYEDRl+Hl+uFRcFFpphNpp...............sR.tsl.su-VIpVs-hsphL.sPlhEhApVpcMIspHNtth................................ 0 21 45 73 +10141 PF10309 DUF2414 Protein of unknown function (DUF2414) Wood V, Coggill PC anon Pfam-B_22455 (release 21.0) Family This is a family of proteins conserved from fungi to mammals. One mouse member is referred to as ELG protein but this is not a homologue of human ELG protein. The function is not known. 28.90 28.90 28.90 29.00 28.50 28.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.98 0.72 -4.29 19 181 2009-01-15 18:05:59 2007-08-16 10:01:07 4 6 164 0 138 181 0 61.80 39 12.76 CHANGED tplR.Esl+lpGVDshST-DlKsahstY............................hs.p.ss+IEWIDDoSsNllatopcsutpALhsl ...................plp.-plalpG..V..Dc....hoTpDlhsYhp-a.................................s.Ps.+IEWIDDoSsNlVatsptsAtcALhs........... 0 41 69 108 +10142 PF10310 DUF2413 Protein of unknown function (DUF2413) Wood V, Coggill PC anon Pfam-B_20450 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 23.90 23.90 24.20 25.30 23.80 23.80 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.80 0.70 -5.67 21 142 2009-01-15 18:05:59 2007-08-16 10:03:45 4 2 132 0 109 146 0 405.10 31 92.26 CHANGED lL-hlDsLssspsssp...............stpspsss....sssps--DlLp.L--Ltppth.ppsp.....t................sp+pscsss........ssssssppppstscKsscospstpstshsssspsppp......tttsph.sshuShuuhWs..........WGshhusAp.h.psshcpAptsspcIpp...Ecsphhscplps.s.....stltsl.su...................cLt...hsTho....slhpslA.sl...socEhLpIHlsHDL..lsYsuL-.llhssFs+VMu.QVEGG....lhIthsp.sp......................t..tt..hp..ssss.+slshhpGhl.-GpKLs+ANhEshhccahpu+tsscpttcpupt..p......................-sssl+sSDIFluIQAlsh..........tustsp...t...s...h.hcsssssphsFslhL+D.spsIshpThSQuhPt+WhcWLDupt...............................tt.t-..-stslDPpEWVpEWlE-uLuLulGllAQcYVh+RMGl ....................ht.h-sLs.spsssp.................................tstsss...........ststs-pDlLs.L-p.Ltppp...p..st..........ts...........stppspsss....................ssstsp.p.ppt.s.tsp+sscssps.pttp...sspppsppt..........................tt.t.pshs.u....suuhW...............W.G.uh.huoAo.........tshcp..Apss...hpclpp.......Epspphscplpt.h.............s.lpsL.us..........................cLpt.s....hsThT....sllcslAPPI........uoHEhLpIHlsHDl..hsYsuL-sllassFs+VMu.QVEGGt...lslppspput.........................t..sts.hc..pssp.RslshhpGhs.-GpKLspAshEshspcahsscsshcptt.ppupp..s.......................csssl+pSDlFluIQsls.............ttssttp....t.ttsh....ht.pppspppls.FslhLpDshHsIsatThSQuhPt+WhcWL-uss...............................tt.tp..-ssslDP+EWVt..EWlE-sLsLulGllAQcYVs+RMGl....................... 0 30 62 93 +10143 PF10311 Ilm1 Increased loss of mitochondrial DNA protein 1 Wood V, Coggill PC anon Pfam-B_22448 (release 21.0) Family This is a family of proteins of approximately 200 residues that are conserved in fungi. Ilm1 is part of the peroxisome, a complex that is the sole site of beta-oxidation in Saccharomyces cerevisiae and known to be required for optimal growth in the presence of fatty acid. Ilm1 may participate in the control of the C16/C18 ratio since it interacts strongly with Mga2p, a transcription factor that controls expression of Ole1, the sole fatty acyl desaturase in S. cerevisiae responsible for conversion of the saturated fatty acids stearate (C18) and palmitate (C16) to oleate and palmitoleate, respectively [1]. 25.00 25.00 25.80 25.00 23.40 24.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.04 0.71 -4.74 22 113 2009-01-15 18:05:59 2007-08-16 13:16:10 4 2 109 0 85 103 0 171.60 30 86.20 CHANGED LSu+ollhu+shFLhsLAaahl+sPpslspsshVhlLGpuMpLsh..h.....hsp.pu.huhhulLhsh.AlsDLIsLht....sph....paapohsP.............................hRlhhaFhlssasYhspssh....................................................lpNplVFTYuFhEhhh.FhlassLR-ERpchhp+hs ..........lSopollhh+slFhhsLAhahhpsPpslsppshlhllupuMplPt..hp....hsp...sushuhhullhhhhulsDLlslhp....sph.....paapo.sP..........................................................................................lRhhlaFhlohhsYhhpssh...........................................................................l+NplVFsYsFhEhhh.FhlassLR-E+pp.hp+..h...................................... 0 14 41 70 +10144 PF10312 Cactin_mid Conserved mid region of cactin Wood V, Coggill PC anon Pfam-B_20647 (release 21.0) Family This is the conserved middle region of a family of proteins referred to as cactins. The region contains two of three predicted coiled-coil domains. Most members of this family have a CactinC_cactus Pfam:PF09732 domain at the C-terminal end. Upstream of Mid_cactin in Drosophila members are a serine-rich region, some non-typical RD motifs and three predicted bipartite nuclear localisation signals, none of which are well-conserved. Cactin associates with IkappaB-cactus as one of the intracellular members of the Rel (NF-kappaB) pathway which is conserved in invertebrates and vertebrates. In mammals, this pathway controls the activities of the immune and inflammatory response genes as well as viral genes, and is critical for cell growth and survival. In Drosophila, the Rel pathway functions in the innate cellular and humoral immune response, in muscle development, and in the establishment of dorsal-ventral polarity in the early embryo [1]. 22.50 22.50 24.00 29.20 19.90 21.20 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.34 0.71 -5.04 22 283 2009-01-15 18:05:59 2007-08-16 13:39:26 4 9 220 0 208 282 5 178.40 35 31.47 CHANGED p+tppstphccW.tcE-pFhLcQu+t+ucIRl+-sRuKPIDhLshtlchhp.................................pcsh-hph.h-P..slhpGL.shc-Lc-Ltp-Ichahp.LEpspt.ph-.aWpshhhlsccclpphcp.ptsts+uh..................ssVssDlpcllpsKohppLppLEtpIcpKlpSsps..lDhsYW.....EplL+pLpla..........KA+ApL+chap ...........................p.p-stphcpW.tpE-pFhL..cQAKh+ucIRl+-GRAKPIDhLuhhlph....................................p....................psshp.l-h...t-Ph..shhpGL.....ohp.....-hc-LhcDIcs.ahp...LEp.s...........p....Nh.............-.aWcshpslscDclp+hcp.htsps+uh...............................htsoVssDl..pplh..ps.KohppLpsL.ptpIctKlposps..........lDhsYW.....EpLLppLpsa...........hA+A+L+cha........................................................ 0 77 118 171 +10145 PF10313 DUF2415 Uncharacterised protein domain (DUF2415) Wood V, Coggill PC anon Pfam-B_25751 (release 21.0) Family This is a short, 30 residue domain, from a family of proteins conserved in fungi. The function is unknown. There is a characteristic DLL sequence motif. 20.10 20.10 20.10 20.30 20.00 19.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -7.96 0.72 -4.28 15 152 2012-10-05 17:30:43 2007-08-16 14:57:37 4 4 138 0 111 166 0 41.20 42 6.54 CHANGED GAFRssKFS.tsshp...DLLllSEHpGRVHllDLRp....shps+QVIs .GAlRshpFS..tsh........DLLsasEcpGRltlsDhRp....sFsp+Qll......... 0 29 64 97 +10147 PF10315 DUF2416 Protein of unknown function (DUF2416) Wood V, Coggill PC anon Pfam-B_28778 (release 21.0) Family This is a family of conserved proteins found in fungi. The function is not known. 26.70 26.70 26.80 27.40 26.50 26.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.41 0.72 -3.80 12 63 2009-01-15 18:05:59 2007-08-16 16:18:41 4 1 62 \N 44 55 0 118.50 32 77.50 CHANGED sthspoPhPuhhhuusL...lt+s.hssps.....................shhtPoptoshhFGuAphLGuahIYDGDlpNGuGFshAWSsLYLlVsG+sSlpulhhG+lhPLsLoslAluNAslYG++Fh .................................................................s.....sP.st..h.s.....h..ps.h.tpps...........................shhtPoptoshhFusspulGuYhlaDGDhpNGuGFssAWSsLYLlVsG..+pShpul.h....hG+..hhPLsLoshuhsNuhlYGpcFh.................................... 0 14 25 38 +10148 PF10316 7TM_GPCR_Srbc Srbc; Serpentine type 7TM GPCR chemoreceptor Srbc Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srbc is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -11.88 0.70 -5.14 62 285 2012-10-03 04:04:29 2007-08-16 17:46:55 4 8 7 0 280 476 0 218.40 23 87.25 CHANGED M.hhh.hhsshlullhuhhshhlNlalLhpIh.....ppKs-hhLFYaRFhlDlhauhslshahsahllh.........ph.schh...h+sllhalshsspslushRuhlslhIolERllAsahPIhY+saRpplPshhIhllhlshulh-phlLFsaCshsl.slPhsCsshtCulNpCahpYWthpcpllashhhhhSllLsh+La..lasp......tppsspplS+..As+lALlDuhhlhlFshlPshhsshh....hhshpshGPhsslhKhhGssIEulllh+lLh+cp .................................................................................h.h..hshh.s.hshhhshh..hl.h............hh........pp.ph...Lh.h.h...+h....h.h.Dhh..h.u...............h..........h...h...hhhh.hh.............................h....s.p.h.....p...h...hh..h....h...shsh......hh.t..h..Rs..h.......l.sh..hI.sh-Rhh................AsahPlhapp...h.+..p.h..h....ss....h.h..l...h.h.h..h..h.h.hshh......-thlh..a.h....h..Cshth.phs.....s.CsshtChhsp..C..ah.pY..a.hh...c.l.h.hh..hh.....hhhoh.hlh..h+Lh......hhpt................t.tt...tspp........hp+...............ANp...lulhDshhhhhFshlPshhhs.h........................h.hp.hGPh.sh.+.hGhhlEuhlh...h................................................................. 0 91 105 280 +10149 PF10317 7TM_GPCR_Srd Srd; Serpentine type 7TM GPCR chemoreceptor Srd Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srd is part of the larger Str superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.76 0.70 -5.43 59 467 2012-10-03 04:04:29 2007-08-16 17:47:12 4 12 7 0 462 1891 0 209.10 18 81.09 CHANGED hplaaslhhlhulhhphhLlaLIlh+SP+tlcshphlLhspshsphhtshhshhsQ...........hRhl..sspholshlshG.C+ah.ushhCahsaphhhpshhtushslllohhaRYhhLpphp...pppphhhhhhlhYhhslhh.............lhhhhs.sst.tlpthpphhaPsY...shs.h.s.......lsGhhshpshsshhshhhhslsshhlslhhhhhR+KllphLpp..s.phSpso+shp+pLlpuLThQshlPhhhal.shhhahhsphshhph.hhpahlhshsslssllsPllolYFlsPYRptlhc .............................................................................................................................................................................................................h....s....p.hhhhhhh...........p.s..ht........ht....h....hl.h...........s..h.......p.h.h.......h....s..h.h...............................R..l.............t.........h..h.h..h............G...s......p..h.......h........t................................h...s....h...............h...........................h................................................h.........s....h........h.......h.s.h..hh...+..h......h......h........................................t..........................h......h......h...........h......h..h....h...h................h....................................................hh..........h.h................................................h...........................................h.........p.h..........t.h..p......................................h..G..............................p....h...................t.............................................................h................h......h..........h....h....h..h..............h............h.........h......h...........h..h..h.....h.....p...t....t....h........h..t..h...l.........................h...s...t..ptt....tpph..........h...............h..............sLsh.Q...........s...h...l.......P..h..h.h.h.h..............hh..........a...h.......h.........................h........t....h.h....p...................h................p...hh.....h.................................h........s...hhsP...hh.hhh...l....sY+p.h................................................................................... 1 204 272 462 +10150 PF10318 7TM_GPCR_Srh Srh; Serpentine type 7TM GPCR chemoreceptor Srh Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srh is part of the Str superfamily of chemoreceptors [2]. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.97 0.70 -5.61 186 1040 2012-10-03 04:04:29 2007-08-16 17:49:53 4 23 6 0 1028 2479 1 244.00 20 84.56 CHANGED sSsphhshshHllshlslPlalaGhYsIlhKTPppMcsVKhsLlshHhasshhDlhlohlshPalhhPshuGaslGllphh.ulss.lQh....alhhshhs............hhssSllhlFENRa.h.lh.spt...hph+ph..Rhhahhhpahhs.hhhhhPshh.pl.....P.-Qppu+hhhhcph...P.C.....hspphhpt.sshallsh.-.t...hh.h..hshshhhhhhhhpllhFhhhhhhhlhpp.p....p.sh.SppThchQ+pFhhulhlQhslPhlllhlPhhhhhhshhhs..aas.QshsNls.hlhhuhHGlhSTlshlhlH+PYRchhhphhtt ...................................................................................................................................................................h....h..h...h.h....t.hPh....h...h.s....h..ah.l....h....h....p.o....s........hp.p.h+h.hh.........h.........p...h...h...s...h...h...h...-...............h....h.....h.......s...h...............h..h......h.........P..h................h..h..............h....P......h....h..u...h....h......s......h.....G..h....h.....t.............h......t.....h.......s...........h.......h................h...h...h...h...h....h...h..h................................h.h.s...hu....h.......h.....h....h.....F.....-....s..Rh....h......l.h..t..............h.h.phh........+h....h....h....h......h...........h...h...h.....s.....h....h......h.....h...h..s..h..h...h.......h........................s.......p...Q......p..........t....c................h..........h....p...h.h.......................P..s..............................s.............h...h...p............t...h...h.l....h..s....s.h.......................hh..h......hhh.h...h.h....h..h.h...h................h...h......p...h...h...h..h.......h.h...h......h...h.....h........h..h.t.t.................ph..S..t..pT..h....p.....h..Q.+..............p.h........hh.u..l...h...............hQ...h..............h......l............sh.h.....h.h.hh.Phh..h.........h.h....h....s........h....h...ht..................hhs....p.......h..............h...s....s........h.....h...h..........hhh....s...hH.Gh...h...u...o.....lhh.lhhppsYRphhhphh..h....................................................... 0 287 375 1028 +10151 PF10319 7TM_GPCR_Srj Srj; Serpentine type 7TM GPCR chemoreceptor Srj Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srj is part of the Str superfamily of chemoreceptors. The srj family is designated as the out-group based on its location in preliminary phylogenetic analyses of the entire superfamily [2]. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 24.10 24.10 24.10 24.10 23.90 24.00 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.95 0.70 -5.31 35 202 2012-10-03 04:04:29 2007-08-16 17:50:14 4 9 6 0 199 1601 0 225.70 31 86.97 CHANGED Ma.hsWhahalP+lFusLSFllNPlFlYLIhoEpps.phGNYRaLLlaFAlFNlhYSlssllVPlslaua+YsFhlalscGhFh...-poc..ls.ahlusRCuhlusSYAlLllHFlYRYLllassphhpchF.shhlhhShhhhlhahssWp.hlCahhhtus.Eh.RpYIRcsFpcpYssDShclshluuLYt-uSscshhRSWhuIhlhTslSshSlshallluhhIhpKLp..phssshSppTp+hQppLh+ALlVQTlIPIhlSF.PCllsWYuPlFslsLuphhNYhpllAlusFPFlDPlAIIhhLPsaRp+l ..............................................................................................................................................................................................h..ha.....hPh.h..h.h.h.u.h...hh....N....s....h.h....l.a.llhppp....p...hGpY+aLLh.......h..FuhFsh.h..h....Sh..hp......h.l....h.....P............h..................s..............h....h......s..........t.......s..h....................h..h...h......t.........G...................F.................t.....................h.t...h..h..h...s.........R................C.......s.h......lu.h...o.Y........u....l....L.........HF.la.....Ra...h.......s.............l..h........t..................s...p..h.h...t............t.....hh.............h..h..h.........h....h.............hh.......hh...h...h..a.h..h......hW..h...............h..s......h..h....h......h...s................s.........E.....h..+...........p....Y................l................p.................t............s..F...c........a.....s.......s...s......p.......h................s...h....l......h....s...h........a...............................t..........s......o.............t.........h..........h...........p.............o...........h.......h.....u..............h......h.....h....h.....o........h.....h.....u......................h.................s.........h..........h......h......h...h..................h.............u....h......h..............I...h...t....p.l.............t.......t........t..h.....S..t.p...o....p..hp...h...pLh...puL.................hl................Q...........o...............h..........I...............Phh....h...S.a...P...s....h...h.....s....a....a.........s.............h.......h....t....l..p..h..............................h...h...h...........t...s....h.sl...uhF.s.h......h.DPlA..lhhhlPshRpt....................................................................................... 0 50 68 199 +10152 PF10320 7TM_GPCR_Srsx Srsx; Serpentine type 7TM GPCR chemoreceptor Srsx Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srsx is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.69 0.70 -5.23 23 428 2012-10-03 04:04:29 2007-08-16 17:50:27 4 13 56 0 362 23544 3 174.00 18 62.30 CHANGED hllGlhGNhlhIhlhh+cKpL+S+sshL.slpClucllplsGplhhsh.hhh.thphspspCFhhlshhlauhshQuslhLhlslDhLIhVpFPhhY+plpppt..YlhhthhhPllaSshlhhhGalttss-..hllhCssPhALsspuh...phashSslhlslhllllYhhhhhl.....h+tpspp..............pssshp+lhKSLploVslalhuWhhsplsstlhlthststphtthlphasuhhlhluhopsaaVhhhpSsEYRpsa+pha ...................................................................................................................................................................h..hu.l.h.G..N...hh.h.l.h.h..h........h...p.....p.....................p..L...+.......p.............h...................h....h.............l.s...h.h....s......h.h.c....h....h.....h..............h.......s........p........h.....................h.......h.........h.....h............h..........t....................h.................t.............h...............t..................t........t..........C...........h......h................h............h...........h.........h............h........h.........h..........h....h............p.......u......s.......l......h...l.......h.......h...u...h..D..p.........h..l......u....l.....h........P....h...............p.........Y....t.......t......h.p............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 107 136 265 +10153 PF10321 7TM_GPCR_Srt Srt; Serpentine type 7TM GPCR chemoreceptor Srt Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srt is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.30 20.30 20.30 20.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.02 0.70 -5.67 39 386 2012-10-03 04:04:29 2007-08-16 17:51:09 4 7 7 0 383 347 0 231.10 25 87.42 CHANGED p.LhYs.hpuhsL....YsCs.....tp.hsps.GhpRPhhGhhhlshGlll......hLYlPshhslh+pc.h.phssYKlMlhLulhDhhulhlsSlhTGhLshhGssFCsYPphIalsGuluhusWhuuChssllLslsRss-ls.phhhphlFctphhahllhh.llYuhYhhhFTpPllFsopahuWFFDPhl..G+ssc.....hYhNhscshNNhllsssTshLYhhhshhlhtKht.s...sotthpKsppQIhhQusllChFpssuAhIYshMpFhtss.hllllGplsWphupGsssllYLThN+TIRpull+hlh..+sh+hpp ............................................................................................................................hhGh..hh.Ghhh................hh.Yh.hhh.h.h.h.p.p..p...h.....s..sa.........pl.M.hhL...................uh...........hDhh.s.hhh....s......s.......l...h..sGh.hh.h.......G...h....s....aCph.P.......h..h.h.hh.................G...hs...h............s...................W.hhsshhs..hhLhhp....Rh.h.pl..................hh.h.......t....t...hhh....h.............h.hhh..h...hYh....h.....hh...h.......h.....h......s..........s........h....h...a..s....s...th.....sa.......hh....s..Phh..........t....t...p.......................Y...sh.p....hh.N.N..h....hh...shhh.hhh..Yhhhh......hhl.h.h..p.............tt.....p...................h................p..h...............p.................l........hh.Qsh..hl.....Chh.ph........h..s.u...h.l..Y...sh..h.p.h.h.....s...s......h..hl.h.h.u..p..h..hWph..........spu.ssshlYlhhNpsl.Rpthhphhh........h............................................................... 0 132 152 383 +10154 PF10322 7TM_GPCR_Sru Sru; Serpentine type 7TM GPCR chemoreceptor Sru Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Sru is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 24.70 24.70 25.00 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.16 0.70 -5.32 38 299 2012-10-03 04:04:29 2007-08-16 17:51:26 4 5 5 0 293 318 0 226.40 25 90.73 CHANGED oIaGsppYhsapasahs..hs.sllAhlPhlYllPThhllh+Ihhhahpshhpps.p..lNhplFhlIhlsQlhshhFFlsDalhlRLPsTGlhTSWCAs.pPNcaLpllhhhshYh.sYsshlaPhLlsllRLlllhhPppa.pclsp+Ih.chulPhlalaPhhhohhhlPAlGhC+QlthPa.FGulhlhapsshauhpN..shh.hlhsohhahhhsllhNhlLahKLcph.p....phsspppsptspKAElSLTlToluMllsalsNshhslsalh.....shhsYhlhlRPFGNDh-ssllPWlFYLTHPlF+++pp ......................................l.....a.paphp.......hs..hhhhhshhh...shhhhhp.hh.hhh.....t...t.......hp..lF..........h.h.............................h.........shh...........h..hh.-ahhhR.l.....P..o...u........h....h.Ts..aC.....u.....p.........ts..p......h.....l.....hhh.....hhh.hh...h...tY.s....hhs...hl...hshhRlhhhh...............stp....................p..............h...........th........lh..hh..hs.......hhhhhs.hh.hsh.hh..s.uhChph..Pa..aG.ul.hlh........................t.......h................h.s................h.h.............shh..hh..hhhh..hhhhhhshhhhhKltphp....................t...pp..p.+uEho....lohThh....hlhsh..l.hs.....hhhh.....h..hh...................shh...a..h....h..hhRshh.DhphhhhshhFYhTHPhF+pp..h.............................................................. 0 41 58 293 +10155 PF10323 7TM_GPCR_Srv Srv; Serpentine type 7TM GPCR chemoreceptor Srv Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srv is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.64 0.70 -5.28 25 258 2012-10-03 04:04:29 2007-08-16 17:51:59 4 9 8 0 256 5739 1 205.70 17 78.89 CHANGED aashsllolPlYlhllhsLlchRptshs.apTsFYplLlp+sIADlhshlsahhstsh.phshhcsFYaphQpaY.AuhhashhYhslhIRssGlshLohpRahsIshPp.phsphhpssphapllhlaWlsshllull...sLtssshtYcshcs..hshls-cshlppsohhAhlhlslsClhhllsYshLahhlR................ppops........ho+uhpREh+LshpVhlllhA.hshhsaahhpshhu.....pstsss..lFYhRhlYPlssGllSaINPaslLlhN+-lp+plhpplps .....................................................................................................h.....h...........h....................hh...............................a.a....l...h...h..t.hhDl.h.............h.....h..........h..h...............................h..................h..........h......h...h.................t............h.....h.......s...............t.....h...........h.............h....h...h.......h.h...h..p.h.....h.....s....h...........h.....l.....s...h...p.....Rh..s.....lh..h.......P...........................h.....p............................h.h....p.t...........................h.....h.....h.....h.....h....h........a......h.............s....h......h......h....s....h................h.h..h.......s........p...............a.......tt.................h..h....h.....h............s.....................h........h...............t..................h.........................h.......h.......s........h........h......h....................h.........s.......h........h..........................h...............h.....h....h.h....h....h...h.p..........................................................................t..............p..t........t.p..E..h....p...L....h...h..........s.........h.....h....h.h............h.......h.........h......h........h......h......h.....h...h............h................................................t.......h...h.....h......h...........h....a....s....h......h.......s....h......h........s....h....h....s....shh....lh..hhsp.htt.hh.....h...................................................................................................................................................... 0 83 107 256 +10156 PF10324 7TM_GPCR_Srw Srw; Serpentine type 7TM GPCR chemoreceptor Srw Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srw is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. The genes encoding Srw do not appear to be under as strong an adaptive evolutionary pressure as those of Srz [3]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.16 0.70 -5.31 90 944 2012-10-03 04:04:29 2007-08-16 17:52:27 4 31 52 0 866 4349 0 230.20 17 73.85 CHANGED thph...hluhhulllNlhHlhlLTRKuMRssSlNllMlGIulsDlhshhhslhphh..hhhth............cC.h.....sss.oYhhhh..hphhhtslpchs+RsSsWLulhhAllRhLll+.Phss+hppLupsphuhhllhhlhhhshshohhha.hphp.lh............p.t.hh...........C........t..h...t...sthtYhhhhschh...pthhhp.hhhhhsulh.cllPsllhPIlTlhLlh-L+Ksp+p+pphp....ppt............cscpoT+LVlhhTIsFhluEhPhGl.shhl.............phh...hhpss......sl....hhlhpphshhhshlhslNussHhlIChhMSSQYRpTs+plhttp ..........................................................................................................................................................................hh.hhshhhN.h.h.p.h.h.l..L...p.....t..h.t..t..s.h......hhhsl.shsDh.h.hh..h......h........................................................h.........h.h...h.....h.....h.................h....hhpth...u.....hals.lhh......Ah..h...Rhh....l...t...........p..........h........h..t......s.h.hh....h..h..hhh..h..h..........h.....h.........h.....h...ht...h............................................................................s..................................................a............................................................................h....h.....h...h.s..h..............h.................p...............h..........l..s....s...h...h..h...hhs.h..h.....L..h.....h.....t.........l.....hph......p........p......t...p..tth.....................t................t...................ptppoo.....h.h.l...h.h...h..hh...ahl....s.ph..P....uh..h.hh.................thh.......................................................h...h.........h..........h...h.........h.h..h.hss......p.hhlhh.hSppa.Rp.hh.hh...h..................................................................................................... 0 227 294 847 +10157 PF10325 7TM_GPCR_Srz Srz; Serpentine type 7TM GPCR chemoreceptor Srz Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srz is a solo families amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. The genes encoding Srz appear to be under strong adaptive evolutionary pressure [3]. 21.30 21.30 21.30 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.98 0.70 -5.27 62 372 2012-10-03 04:04:29 2007-08-16 17:52:46 4 15 4 0 367 410 1 207.80 21 75.85 CHANGED hhhhhhhhhhlhhhllhPFYlY...VhKlNRc+D+phhlaP.lssHFYch...l+hsYhlhhhhlhh..hhhhhhhtt...................hhhhlhhhhhhhhhhh....LhllsplFalLlhLLAlp+FllYFF.PptE+hlhhspphh.+hlhhlYlhhllK.-lhhhhhhhhph.t..................phhthhahhh........................hhhhs....lllhlSulLYIPIhISlR.Khup.Lt.SsQpspPppYIaaQolhVhlhKhlhl.hhlhhhh...hsthhhh..........hhhhhDhlooPLIIQlSYLsCN+ ................................................................h................h.hahh...h...s..pcp..t.............................ha...lhp.hhath...hhh.hhhh.hhhhh...hhhh.hhh...................................h.hhh..h..hh....h.hhh...hhh........hhh....hs..psh..ph.l.l.llAl...p.....+hhlaF...h..P.p..cphh.h.pp....h...h.h.lh..hhYhhhhhh.p..hhhhh...h..h.h.h..........................................h..h.h....hh................................hh.hhp..hhhhhoshlYlP....l..h..h..pl.p.+.ht.....h..s.t.....sps.ppalhhQhhhl......h..hhKhh....h.hh..hh................h.....................hh.hhDhhh..hPhllQhoYlhCN.......................................................... 0 76 76 367 +10158 PF10326 7TM_GPCR_Str Str; Serpentine type 7TM GPCR chemoreceptor Str Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Str is a member of the Str superfamily of chemoreceptors. Almost a quarter (22.5%) of str and srj family genes and pseudogenes in C. elegans appear to have been newly formed by gene duplications since the species split [2]. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -11.93 0.70 -5.30 178 1469 2012-10-03 04:04:29 2007-08-16 17:54:00 4 34 10 0 1456 2149 6 223.40 21 82.72 CHANGED pphhphhuhhhuhhhNhlLlaLllt+ut+phGsYKaLMlhFolhulhaohl-hlspP...............hhHsass..ualhh.h..p.hth...spthtthhl...............s....las...uhauhsluhlulpFlYRYhulppsp.t..l.chFpGhtlhhWhhhslhhGhhWshshahhhtssp......sscal................+.pph..hcpYs.lshsclsahuhhaa............spsG..p....lphpshhshhhhshlhshsahlllahGh+hahph.pcltph....Spps+cLQpQLFpuLllQollPhlhhalPsshlhhh....Phhs....l...ph.shhushlshslulYPslDslshhhllpcYRpslh .............................................................................................................................................................................................h........h..s.......s.h.hh...hh..h..h...........h....u......Y.+....h....hh....h...h....sh....h......t......h.........h......a......s......h..h......p..h..h..h...P......................................................h.h..............t....hhhh........t......................h........h..h..........................................hh.s.....s.....a.s.........h.....h..h...........s...h..p....Fh......a.......R.a...h.sl...............t.......................................h...t.h.h..p.................h..h..h..h.................h.h......h....s........h.h...h..h..........a.....h....h.............h....h..h..h..h.h..........s...........p..t....hh........................................p.....h......t.....h...t.....h...p.........p...p....h......h.h.s........hha......................t.p.s...................h..p.h...........s.h..h.h..h.h...............h.................h.h...h...............................h...............p................h......h...h....h.....h.hu....h....t...........h.a...h.th....pph.t...........S...pplp.p..Q..................ha...h.s.....Ll..............h..............Q..............s.h..h............Ph.h....h...hahPh...h..h.h.h.h.h....P.h.h.t....................h..ph....t...h....s.t.h.h.....h.......h.h.s.ha.Ps.h.Dsl.hhhhl.ppaRph..h............................................................. 0 456 563 1456 +10159 PF10327 7TM_GPCR_Sri Serpentine_Sri; Serpentine type 7TM GPCR chemoreceptor Sri Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Sri is part of the Str superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.80 0.70 -5.46 54 363 2012-10-03 04:04:29 2007-08-16 17:54:30 4 17 7 0 359 1625 0 222.70 22 82.08 CHANGED hslDFssPhaLlhaaalIGslSlllNhhsIYLllFcSsKlcsF+YaLLhFQlsshloDlplohLhQPhsLaPlhuGashG..hhsph...htlooHhthshhshlhshQltuLhhCFlpKHQuIuplh..ppp..hsp....hhhhhhhhhshhaPhhsshshhhsslsc-pphcalcpsaPphhspFtsLssFslYphs...ahhhhhlhshhsshhshslhshhshchhphLppl+pplSspsYp+H+sAlpSLlsQhhsoslhh.lPshhhhhllhhthstuQ.....hIsphhhslhssHSslNslVllhosPsYR+hl ......................................................................................................................................................................h.h..h.h.....t..................t.....a.p.............h.h.h.......h.........h...Q.....h.........s.......h.h...s-....h.......h.s.....h.....l.........hp..s...h.............l.....a........P.........h...h.......u...G...a...s..h....G...hh.s.ph............................h.............s....a....h........h........s................h...h................h.......h......h...h...h...........h...p.............h.........t....s..............l.hh..C......F........h....h....K.Hp..s..l..s...t...h...............p.h.h...h...h...s...p.................h..h...h.h...h..h....h....h.....h.....h....h....h..h....s......h...h....h....h....h...h....h..............................t........h.............s....p...p.............p...p...h....t..h...................l......p................p......p............a.....P..............p........h....h............................h.......p.........h........t....h...........l........a......s...............h.h...h...h...h.......h...h.........h...h....h...h...h....h..h...h...h.....h...h.h..h...h..h.h.h...p..h...h............h...h.....t.........h...p.........p....h.....S......t...s..h....ph...a.pt.s.l........h..............o.....Lh..h..................Q................h....h.................h..s.....hhh..........hP...h..hh..h...h....h..h..hh.......h...........p.....................h.s.......h.h..h.hhs..p.u..h.s.h.hhh..s.aRth................................................................................................................. 0 104 134 359 +10160 PF10328 7TM_GPCR_Srx Serpentine_Srx; Serpentine type 7TM GPCR chemoreceptor Srx Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srx is part of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.91 0.70 -5.29 80 586 2012-10-03 04:04:29 2007-08-16 17:54:54 4 18 10 0 579 7241 0 219.70 18 82.16 CHANGED hhluhhGhlhNhhlhhthh+.h.shpsSFshlstspuluNslhsh.sFLha.hsPhslhshph..h.p..pohhsthhshhsYth.ushsplhlolNRFhAlahPhhYpplashphTph.......hlhhhahh.........uhhhhhlhhhhhsCthhas.phhsat..spt..hC.shh.sh.h.....hhhhhslshhsshlsllThhKlhthppph...thspt..pspp+pp+..phphhhQoshQshlahl-hlshahl.sph............hss.phhpFlssohsWhhlHshDGhlhlhFN..pclpphhhpphpt .......................................................................................shhG.hh.h...N...h...h....h....h......h......h.....h.....p.......................p...s...u.....F..........h..l......s..h........p.sh.s.s....h....h....h............h.....h......h....h......h......h......h.h.....P..h...........h......h...........t........................h..........................s......h.......h.......t.........h.....h...h.......h....h...h.......a.................h.....s...................h........p....h....h....h...uhNRh.hslh.h.s.....h....h....h..p......p...h..h...s.h.............T...h...h................h.h.h.h..h..h..hh....................................sh........h...........h...h...h............h.........h....................................t..............C......h.....h....h........a....s.........p......h......h..........a..........................t.....................C.......s.......h............h.........h.................h..h..h......h..h.....h...........h...h........h.............h......h..h......s.....h...h...s...h....h................+....l.h...h.......phth..........................ttt......t.t..p...p...pp......p+..............ph...t...h.h..........h...Q.............s.....h....h........Q.s..h........h..h........h....h......p...h...h.........h.h..h..h......h......................pp....h...h......F.....h.....h.s..s...h.....h.....hh...p..s..h..-.G....h.lh.hh.p...ph.........t............................................................................................................................................................... 0 177 228 579 +10161 PF10329 DUF2417 Region of unknown function (DUF2417) Wood V, Coggill PC anon Pfam-B_22799 (release 21.0) Family This is a region of a family of proteins conserved in fungi some of whose members also have the Abhydrolase_1, Pfam:PF00561, domain in their sequence. The function of this region is not known. 25.00 25.00 32.30 32.10 24.10 24.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.62 0.70 -4.95 21 122 2009-12-10 16:57:14 2007-08-20 13:09:06 4 4 112 0 92 118 0 228.20 36 43.81 CHANGED sE+p.LLs.................................ptsalsPDDPt....VSPaNLap.....lRhlRslsslhlhlohlhahhhLlS.Fl....u.PuhpsRGsuFhshshshlslhh.llsLhF..FulPSphtplhuhllusLLllDhlllluVstlRhc.GWlGluollWAslhulhssltDhhV.............................thGKpc-EERL.....TGR.......EoR+oLpEWlslhlcolhhlllhllshLhTlTLhLpAhDus..................hts.GphahVDssp.apVHLsCh ..........................................E+p.LLspp.............................................tspsaLsPDDPs.......VSPYNLap.....lRhhRhlsslhlhlshlWalhlLlS.Fh....o.Puh..psRGuuFhshshshlolhs.hhsLlF....FulPSp..plhshsluslLhlshhlhluV.phRhcEGaVGlsoslWAslhulaslhtshhV........................................thGKpcEEcRL...............TGR............EoR+oLpEWhslhlps.lhhhlhhllshLhohTllL+uhDup..................hss.GphaaVDss+.YplHltCh............................... 0 18 44 76 +10162 PF10330 Stb3 Putative Sin3 binding protein Wood V, Coggill PC anon Pfam-B_24989 (release 21.0) Family This is a family of the conserved N-terminal end of a group of proteins conserved in fungi. It is likely to be a Sin3 binding protein. Sin3p does not bind DNA directly even though the yeast SIN3 gene functions as a transcriptional repressor. Sin3p is part of a large multiprotein complex [2]. Stb3 appears to bind directly to ribosomal RNA Processing Elements (RRPE) although there are no obvious domains which would accord with this, implying that Stb3 may be a novel RNA-binding protein [1]. 25.00 25.00 25.60 42.90 22.60 17.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.10 0.72 -4.30 17 120 2009-01-15 18:05:59 2007-08-20 13:31:59 4 1 112 0 92 118 0 95.00 50 22.14 CHANGED lTPphLup.....lLLp+GPLAIRHlhsaLopplPuFuclssuKtRRLlhuALE.......uGs.cssVlFEKlGWGpWs.....A+chspPtphspphptos.u .........ITPuhLAph+Ls-ILLp+GPLAIRHIhuaLTssVPGFutlssuKtRRLlluALE...........uGsh-ssVlFEKVGWGpWs.....A++tspssphspt......s.................. 0 13 44 77 +10164 PF10332 DUF2418 Protein of unknown function (DUF2418) Wood V, Coggill PC anon Pfam-B_29723 (release 21.0) Family This is a conserved 100 residue central region of a family of proteins found in fungi. It carries a characteristic EYD sequence motif. The function is not known. 19.00 19.00 19.50 19.20 18.80 17.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.25 0.72 -3.70 19 132 2009-01-15 18:05:59 2007-08-20 15:52:43 4 2 128 0 103 132 0 101.10 36 22.07 CHANGED pcclapLpVW-Ps.aslplFshFSPsplllhhhh..........sssshpshhhhllhoh.hhhlhp.pFpphlcDcpllppEshpEYspKaVcP+hpshppDsslss ...............p.c-VWpLslWDPtsh..slpLFshFSPsHlllhhhhhs.......................Ssshhpslhlt.sllohph...hhLhs.pFsQphKDptllppEVhpEY-sKaV+PRhpshh+DVusp.h............ 1 28 58 89 +10165 PF10333 Pga1 GPI-Mannosyltransferase II co-activator Wood V, Coggill PC anon Pfam-B_50403 (release 21.0) Family Pga1 is found only in yeasts and not in mammals. It localises in the ER as a glycosylated integral membrane protein. It binds to the GPI-mannosyltransferase II subunit of the GPI and it is responsible for the second mannose addition to GPI precursors. The GPI-anchoring complex is a glycolipid that functions as a membrane anchor for many cell-surface proteins [1]. 20.00 20.00 20.30 20.30 19.90 19.50 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.06 0.71 -4.63 5 47 2009-01-15 18:05:59 2007-08-20 17:40:11 4 1 43 0 29 37 0 180.10 31 85.69 CHANGED lsllaLCslVlANTETapl+VP+-FPsposttpspuoY...sshlshIsL+NlshspIs....oTclssssssYV.ELscLp+sETYQVKICWTAIDPISIsDlsallIPHuTtFpGTlS-cA......RlhVpFcVpuDSYPlLsDsphVPVsVSllslKLGIPVDLYslLlYlllVl..uISlllshcPapLLc .............h..lhhhls.hlhANTEoh......Lh+VPp..sF..s......lp...ps.....tssssht.................h.sshls...h.o..lssps..h.ophh........sssh.....ph.c.s..Tpal.cLss.....L.....p.....ps-s.Yp......IKlCWsAhcPhshpslpphhhs+.sthttohsD.h............clh..lpa..p....hh..u-..SY........s..............ppp.shlslpls...l.ss...hL.....s...IPl-lYshllhhl.ls...hhhlh................................ 1 3 13 25 +10166 PF10334 DUF2421 Protein of unknown function (DUF2421) Wood V, Coggill PC anon Pfam-B_39020 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.17 0.70 -4.74 21 168 2012-10-02 19:04:43 2007-08-21 09:54:30 4 10 89 0 134 204 0 205.10 18 19.91 CHANGED PR.PsSu++tlcpsLupslcsluslastlhsahsp............sschclhuc............h.thhlpltt+LsuLpthlshLpaEhohpGhWPpppYtclpsLhp-lspLhspLhhlhspL..Pttah.cLhcpsGhhcpphhu-lhAVltMsppAL+oGssLPclhPssLsh+ph-hhcpphtstc...............................t.lshchlcspshppasVulshhhplhs+l...D-llllVKullGEta ................................................................................P.P.oupphlRpslupslhtlushYs.hlh.sthtt.............................................t.t..h...p...................................h..phh..h..p..h..ttpL.t.slpshlthhpaE.sl.tGpFP..p..p..p..Ypplhphhp.pllphl..stl.h..h...sh...p....pl...............s.............p........p....h...p..pt.l.hph.hth.........h.cp.........phh.up.lh.slhthlusulcstpsLPthhs...............h......p..h.h...h..t...........................................................h.p..thh.p.th...................hh..th...cphh..hp.hhG...................................................................................................................................... 0 47 81 117 +10167 PF10335 DUF294_C Putative nucleotidyltransferase substrate binding domain Bateman A anon Bateman A Domain This domain is found associated with presumed nucleotidyltransferase domains and seems to be distantly related to other helical substrate binding domains. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.37 0.71 -4.55 86 1001 2012-10-01 22:14:54 2007-08-21 11:02:28 4 14 661 0 348 952 154 140.10 29 24.93 CHANGED shaLtt.hApsulphpsP.LGhapplhsppps..ttttlDLK+tGlhPlVcssRlhALpp.................u.lpsssTh-RlctLtptGhlstchspsltcAachlhplRLppQlpph.psGp.....ssshlssspLsphERptL+-uh+hlcchQphlphca ...................................................................t.halsthucsAlphpsP..LG.hFpshhhc+..sG...ttttlDlK..ct..GlhPllchsRlhALtt.................G.hp.s...ss.Th-Rlcslh..c.t.s......lLspppupsLp-AachlhplRlcpQhtph..ppspt..........ssN.tlssc.pLsphE.Rc.tL+-uhpllpphQphlth+a.................................... 1 100 229 306 +10168 PF10336 DUF2420 Protein of unknown function (DUF2420) Wood V, Coggill PC anon Pfam-B_32350 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 21.00 21.00 22.00 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.23 0.71 -4.49 13 112 2009-01-15 18:05:59 2007-08-21 11:16:24 4 1 110 0 84 116 0 106.70 27 16.46 CHANGED shlhcssulhctols-hauslRp.h-pt.GphhssscELlLshspL-.LslsEDNlYsccIohsDllslFchLpppohpptc.slPcsLshpLo.spPRFlo+YNsLs-hscsstu ....................h..hcstulhppslsclhtuhRphlt...sphh.sssc....ELllplcsLs.LplsE..Ds...s..asp..ploLppllplaptLpppst.st....PpsLhlpLo.opPpFhscastLsphspps..s.................. 0 20 44 70 +10169 PF10337 DUF2422 Protein of unknown function (DUF2422) Wood V, Coggill PC anon Pfam-B_42729 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. This family is the C-terminal half of some member proteins which contain the DUF2421 Pfam:PF10334 domain at their N-terminus. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 459 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.39 0.70 -5.70 27 237 2012-10-02 19:04:43 2007-08-21 13:24:18 4 14 109 0 197 260 1 360.00 14 36.09 CHANGED hhhssWhh.pL.Dhtoh+lhhRshhsshhshhlhhhssshphhGsuuYLhslluhls.s.thshshhlhhhlhhhlulsluaAhu.llshhhuscsRtp..........................ssssshsssspsthhpu.hhsupsouVhslhLhht..lahhshhRs+....aphshIhs.IhsslhhsaushhPsh.tshslsttLlcPhulululshssulllFPhooshhhhpph.tuhlshL+sslphppsahpohcsss..h.................................pplcpshtplpshhstlcsshshhhhEhuhG+hsssDlpslhphlRplhhsssGLshhhphhpch........................................................sclspapp.pshphhc.......s.tc............phshpphsclh..ptlcc.hsshlcsscpuLppl....sphLphssph+...t.hh...hppptptp.phttph......tshtpphcpphpsFtsscphtLhthtp ..................................h.............s...hhhhh.........+s.ls.hlshhhh..s.shph...h.s.p.hsahhslhshhs.s.hh.hhhhl.hhhh.hhuhhhuhuhs.hlshhhu.tsh.t......................................................hpst..slhhlhhhh...hahhshhRsh.....h.hshl.hh....lhhs..l.hphushhsh....shph.sp.hhhshhhuhuluhssslhlaP...osp.thhhpth...hthlt..lpthl.ph.pht..hh.ts.t.....pt............................................................................ptl.ptth.ttltshhsthpsshthh.h-huhup..hsspDlptlh.p.h.h+plhh.h.uh..hhphh..h..............................................................................................................................................................................t.ht......hp...t......................................t.h.p.hh....htp..t.h.t.....ht.slthh.......ht.....t..........................................................................t............................................................................................................................... 1 55 107 167 +10170 PF10338 DUF2423 Protein of unknown function (DUF2423) Wood V, Coggill PC anon Pfam-B_46946 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 27.60 27.60 27.60 27.60 27.50 27.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.05 0.72 -3.98 20 136 2009-01-15 18:05:59 2007-08-21 13:32:42 4 4 128 0 108 129 0 44.80 43 29.28 CHANGED MAKSLRSpo+hps+ohKR...csVFthss-ARspRlusKLccphtppc MAKSlRups++ps+uhhR...psVFusstcARscRLSsKLp-hhtp.p...... 0 33 61 95 +10171 PF10339 Vel1p Yeast-specific zinc responsive Wood V, Coggill PC anon Pfam-B_50673 (release 21.0) Family This is a small family of proteins from Saccharomyces and related species. The function is not known but member proteins are highly induced in zinc-depleted conditions [1,2] and have increased expression in NAP1-deletion mutants [1]. The S. cerevisiae genes are named VEL by association with Velum formation in the wine making process http://www.ajevonline.org/content/48/1/55.abstract 23.00 23.00 289.00 288.80 19.70 18.50 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.48 0.71 -5.16 4 33 2009-01-15 18:05:59 2007-08-21 13:58:00 4 1 18 0 12 21 0 189.20 81 98.25 CHANGED llshF.shIsl..ssAlRFDLTNlTCptL+GPHCGTYLL+VsGpNuTaLGQphFVGhDALTpstsDhatRhLcpEsRhIPRLTTlAp.N-TsNFpPhhFTTshsTCNPQSIEsAhlPFlNTVTsEIpYDSWA.Tu.NAShITGLANQLhNuosYGVQVAoChPGFssslhsoPTVNlFNs--slPSWCpAIElcAVCPhDsGFs ......hhhh.hhls...ssAhRFDLTNlTCptL+GPHCGTYlMEVVGQNGTFLGQSTFlGADVLTESAGDAWARYLGQETRFLPKLTTIAS.N-TKNFSPLIFTTNI.TCNPQSIGDAMVPFANTVTGEIEYNSWADTADNASFITGLANQLFNSTpYGVQVASCYPsFASVILSTPTVNIFup--TLPDYCTAIQLKAVCPP-AGFs... 0 4 4 8 +10172 PF10340 DUF2424 Protein of unknown function (DUF2424) Wood V, Coggill PC anon Pfam-B_51256 (release 21.0) Family This is a family of proteins conserved in yeasts. The function is not known. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.39 0.70 -5.82 5 116 2012-10-03 11:45:05 2007-08-21 14:05:56 4 3 60 0 80 2905 696 294.20 23 74.90 CHANGED LpFLs+llslLPItll.....hcSIsllshs+R+LslDhLoRlFhRpShhLhDctICpYVLNPla-.lu.sLaK.ph.s.o..chslPp-Dpps.psslFsc+hlNpshhpSphaWhtph.PcsFcPclD.PlLLYaHGGGaALpLsPsoLlFLsNltKhFP.cM....AILlsDYoVTAspscopcYPhQlLpslslY-YlspohGCKNVslMGDSAGGNhVLslLLY...L+KpNK.llPKKAIAISPWlNsThhsEpE+saM+ts-clDulChKuLshFG+hYlsN...-supshpo-sFlNIE+Na-h-sWscI..lcKCcLLITYGDDElLphQIKlalcKIo-hptt.pa.pscNVLl-c...QGsHIG......P.ILsassNlD+WoKhsSlscILsF .......................................................................................................................hh..............................................................................................................................................................................p............h......Wh..h.p............t......ps..p...s.D....P...l...l.lYhH....G....G.....G.Y....h....l........p.........h....h....s....s....p....l.....p......h....L....h.....s......h....h....p.....h..l....s.....ch.............................u.I......L..l.....l.......DY.......s...L.......s......s............................p.........u...t...h........a........P......t...Q...l....h....p....h....l.....s......s........Y........p.............p......L........l......................s.........p.......G.........s............p.........N........l.....h......L.....h...GD.SAG....G..NL...s..l.s...h...l...pa........................L..p......p.........p........p...................s...........h...........P..............+........p.........s...l........LI......S.PW..l...ph...s.................p..pt...t.....t..p.....hp...t..s...p...ph.D..h.l.....s...h.....p...t..h....p..........h...t..c...hahss..............t..t..t...h.........s..h.ss.h.....tt............p...p...Wpcl.......hp....p...t......s...shlhh.G-cEhh+-plhpas.phh.............................................................................h.h............................................................................................................................. 0 15 37 72 +10173 PF10341 TPP1 Est3; Shelterin complex subunit, TPP1/ACD Wood V, Finn RD anon Manual Family TPP1 is a component of the telomerase holoenzyme, involved in telomere replication. It has been demonstrated that TPP1 dimerises and binds to DNA and RNA. Furthermore, TPP1 stimulates the dissociation of RNA/DNA hetero-duplexes [1,2]. Yeast telomerase protein TPP1 (Est3 in yeast) is a novel type of GTPase [3]. The key residues in Swiss:Q03096 are an Asp at residue 86 and the Arg at residue 110. The Asp is totally conserved in the family, whereas the Arg is not so well conserved. The N-terminal of TPP1 is likely to be the binding surface for TINF2, whereas the C-terminus probably binds to POT1, thereby tethering POT1 to the shelterin complex [4]. The complex bound to telomeric DNA increases the activity and processivity of the human telomerase core enzyme, thus helping to maintain the length of the telomeres [5,6]. This domain is conserved from fungi to mammals, hence family Telomere_Pot1 has been merged into the family [7]. The human shelterin complex includes six proteins: telomere repeat binding factor 1 (TRF1), TRF2, repressor/activator protein 1 (RAP1), TRF1-interacting nuclear protein 2 (TIN2), TIN2-interacting protein 1 (TPP1) and protection of telomeres 1 (POT1) [8]. 21.90 21.90 21.90 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.51 0.72 -4.06 51 102 2012-09-26 10:57:07 2007-08-21 14:13:12 4 1 93 2 67 107 0 118.70 21 19.44 CHANGED sWIpphlhsthptp..................................hhtps.sspll+llca................sssssshtul............................lSDusapIhulFo.pculppac..pcpc....pchp.........tsspssllhlpchplpht...sppthssca....................................hLp.....l ....................................sWIppllhsshp....................................hhssstsspllcllph..................sssssshtsl............................lSDusapIpslho.cculpphc.....hcpc.....phhh.........psspspllhlpchplhhp...sptthssca.......................hL................................. 0 14 27 47 +10174 PF10342 GPI-anchored Drmip_Hesp; Drmip_MAPK; Ser-Thr-rich glycosyl-phosphatidyl-inositol-anchored membrane family Wood V, Coggill PC anon Pfam-B_42324 (release 21.0) Family Some members of this family appear to be serine- threonine-rich membrane-anchored proteins, anchored by glycosyl-phosphatidylinositol. In A. fumigatus these proteins play a role in fungal cell wall organisation. In Lentinula edodes this family is involved in fruiting body formation, and may have a more general role in signalling in other organisms as it interacts with MAPK. The family is also found in archaea and bacteria. 26.00 26.00 26.00 26.00 25.70 25.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.48 0.72 -3.51 101 635 2012-10-04 12:14:07 2007-08-21 14:16:40 4 18 172 0 476 646 9 93.20 21 35.69 CHANGED os.P.shs.pslssG...p..hTlsWsss......sss.......ssholhLhpGss...s.hssh..tsl..ussls.su.....uo.aoass....ss..s..lssss....s..Ytlplh.s.ssss.......hsYSspFsl ..................tP.stssshssG....ps.hslpWpsst.......sss..........ssh.sl.hLh...sGss........tp..hs.sh.....tsl...us...s..ls....ss............uo..as.hss..............ss...s....lssss......s...Y....tlplh..s..ssss.......hsaSspFsl.......................... 0 169 295 400 +10175 PF10343 DUF2419 Protein of unknown function (DUF2419) Wood V, Coggill PC anon Pfam-B_35257 (release 21.0) Family This is a family of conserved proteins found from plants to humans. The function is not known. A few members are annotated as being cobyrinic acid a,c-diamide synthetase but this could not be confirmed. 19.90 19.90 20.00 19.90 19.40 19.80 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.78 0.70 -5.15 21 302 2009-01-15 18:05:59 2007-08-21 14:49:58 4 5 236 0 233 297 138 246.10 34 73.73 CHANGED HELHPps...cD......cpTVsalFlhDhLNFuFWo-ps.........tpaslpY+G+taTGYaSLsAAlpRAL-cGIsITsPpFa...pchT.chL+c.lF+Ssot..cclPLLp........ERlcsL+EuG+VLhcca-GSahNhlpsu.spSAssLlpLls-sFPsFRDcssacG+........+..VphYKRAQILVADLWusFpGcu....h.GcFcDIDpITMFADYRlPQhLppLGsLtYospL.pcl+ppchIssGuphElElRusSIasVEhlRctlp+pcsc.t........................................................................lNAILIDaaLWDht+chptcht.........plPaHRT.......RSIaY ...............................................................................................................p........ts.......ttsspalFhhDhLNF.s..FWsptt..............h.ap.................p.hpGYhsLsAulp+Alc..p.shsl.hssphh......tpho.p.lpp.lhcs......s.t......h..Plhp......................ERhph.LpE...sGplL.......h.c.c.......a....tGs....hhshl.p.p......u......p...t....S...At......tLlpllsppFP.s......FRDtsh...a..c..G....+...............p...VthaKRAQILVADlWusa.p....G.p..u...................h..G...p..F.....p.........D.Is.p....l..T....MFAD.......YRlPQhLhphGsLpY.S.spL.ptl.....cp....t.p......l.sGsp.E..................l................ElR.u.......sSlhslEhlpct....l.........p..h..p.................................................................................................l.s.ul...hlDaaLasht+phtt..p........................lPaH+spsIaY.................................................................................................. 0 93 135 198 +10176 PF10344 Fmp27 DUF2425; Mitochondrial protein from FMP27 Wood V, Coggill P anon Pfam-B_54917 (release 21.0) Domain This family contains mitochondrial FMP27 proteins which in yeasts together with SEN1 are long genes that exist in a looped conformation, effectively bringing together their promoter and terminator regions. Pol-II is located at both ends of FMP27 when this gene is transcribed from a GAL1 promoter under induced and non-induced conditions [1]. The exact function of the Fmp27 protein is not certain. 25.00 25.00 25.30 25.30 24.90 24.80 hmmbuild -o /dev/null HMM SEED 881 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.38 0.70 -6.86 29 253 2009-01-15 18:05:59 2007-08-21 15:54:39 4 23 161 0 164 252 0 580.20 17 30.91 CHANGED hhshhllhhllphhsGlsIppl...shhSl+tlshss+s.hpIpIctlthpl.....hspsphlplhshc.lclph........................pcpppppstpppsspttp................p.hthhphph.pphh+tlhphl.calphlslhlhpsslsh.shsptthshlphsspppphhhsp.......................hph.h..lpslhhthps.....tp..pllspsslslpshlphsh.......sLcshslslpluclplPhs..shhphhthhcp.cpptspt....t...t................................psppthpplp............p.hchhpplhchlp-lplplcphpl.ch..s.tsp..sh..............lsh+plshshp+hsppsPua+hhFppc.Dhshphhhshhslplths.t........ptpsscllplPssohohcoshhp.h..........ts.spspthps......................................sllphshslooPslDlphcplshlhshhtshhhh+ph.shsptppp.............................................................h.hhpchhPphshKhslc-Phlhlphs..t.s.......hshlhtShSslshslposc...........ttptplhashhophpltshphhhps..spth+pslhphcshphch.hshhsphplphpsslss....hshchsphcl.hpulppllhplstphp.c..hthptp............................................c.plh+tLPsaltphphphoslshplus..............spslp..-htcGlshcLcshsspacthphsh.sshpthppps.Sps.s.hps................t.pt.ths-hpphs.p.p......................................sh.s.h.psscph.hcshhslsph-ltlsshscp......sshhplphplpplhsphshaphashhsulshlpphhht..tshppppppsp.hphtt........................................th.chlplchchpslplhhpLPs-sshhlplhs ....................................hhhhh.h.ph.stlpItph...thh.lptlshp.pp...p..lplctlhhp..........phh.lh..p...l........................................................p....................................................th....thhphhhp.h..ahp.lslhh.pssh....ht.......t.h.hhhspptp....................................lpph.h..............p...phlsph.shthphhh.hsh........shp.hshslplhphph.hp......h.......t...............................................................t.t.h.p.h..................hh....h.th.pphphthp.hth.........t.........................h.hp.lth.hp+.p.p.s.hp..Fp.p..shs......hhhp.sl.h..............ttp.pchh.ls.hphshpss..................................................................h.hphslsos.lchc.pphshhhthh.......pt....t..hp............................................................................................hhs.h.h............................hh..p.o.h.hshp.......................hhh...t.....t....h.......t....thh..p.hth.................ht................h.....p..h.......hp.hh.ph..............................................................hhp.hP.....h.ht.t.h...hs................t..p....p..pu....phtthshph.....t....................pt............................................................................................................h..h...p.h.h.s.st.........s...lp..h.th...hp...hhh.h.sh.hlpphh..........................................................................phhthsht.t.ht..h.hstt..hhhp................................................................................................................................ 0 21 55 113 +10177 PF10345 Cohesin_load Cohesin loading factor Wood V, Coggill P anon Wood, V Family Cohesin_load is a common cohesin loading factor protein that is conserved in fungi. It is associated with the cohesin complex and is required in G1 for cohesin binding to chromosomes but dispensable in G2 when cohesion has been established. It is referred to as both Ssl3, in pombe, and Scc4, in S.cerevisiae. It complexes with Mis4 [1]. 21.30 21.30 21.30 21.70 21.20 20.70 hmmbuild -o /dev/null HMM SEED 609 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.82 0.70 -6.31 21 268 2012-10-11 20:01:01 2007-08-21 16:06:20 4 10 212 0 192 312 1 467.70 23 69.21 CHANGED ptph.hLltlA-cahpsApshsspl....pptphppYapLlshulpCLpslhp............shpLssch-AplphcluplLhpETcNhspA-shLs..+uhhlspp..sshh-lKapsphLL...splhhcos.ps..Ahptlsctlp.hps..................pptahasachLphpLslpp...t-.stAhphLppltphss.psppthhlhhhhhpshlphhps.spDslphlpph.httpth.....t..ph.pLtshhhllclhttlppsp.tsspppLptlpphhpphps.........ass..ptchtl.lth.........................hhhhpWLspcplhhLsYhloulsthhcppss..+upKahpculpphcc.h.............sl..hppphthhpslphhhhhahhhpthhhsshtpsppphp.htshspt..........pthtshhhhLtGlhtQtp.......uchcpAhthah..........................................phstspssps-LtlhusLNlhhIhpt.spppptp........................hsplhsplcshsspssspphthstshh..ltshpth.shphp......pp+pphppsLpth....p...................thspsphhshsLsllstpha.......tssst..........Epsphstp...uhphAppp................................................pstLWh.lssshhtp.hc.tGptsctcpstpphpplpp ...................................................................p.....lhthAc.hhpts...ht.h....................lpCLpslh..............php..s..p.EuhspLphuplLhpcTp.....N...p.AcphLp......+uh.....lspp..........ph.-lKapht...LL...splhhpps.hs....Ah.hlcp..hlp..pt.........................s.hath+h...l...hpls..h.....p-hstAhp.Lthhsp..hup......ssthh...hshhhl.cu.hlh.h..h...p.t...t-.sh..hlp.s....sp.h.......p....ph.....pLp.shhh..hlpls.h.l.tt.phppsp.pLptlpp.hpph.p....................tp..h..t.................................................................hh...pahspcphhsLsahl....o.sht.h.t..........s..p......+up+ahpculh.lpp.h..........................................................h.pp...hhp.h.h.hh.hhhhhthshsp.hth.s.ptht.htp................................t.h.s.hhhh.uhh...sh.......sphptA.t.a............................................hh.p.s.p...p-lhhhhshNhh.lh.t.tp.tp.............................h.plhptlps...tp...s.......sp...h......hshhhh..htsh.t...h..................ptKp.lppsLphu......p.......................t.s.sphhshsLs.hhshhhh........ssht..........Ep.phss....uhthAp+.................................................tstLW...ssshh..tp..chtspt.ctpthht.h............................................................................................................................................... 0 54 99 157 +10178 PF10346 Con-6 Conidiation protein 6 Wood V, Coggill PC anon Pfam-B_35316 (release 21.0) Family Con-6 is the conserved N-terminal region of a family of small proteins found in fungi [1]. It is expressed at approximately 6 hours after the induction of development and is induced just prior to major constriction-chain growth [2]. 25.00 25.00 25.80 25.30 23.30 24.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.16 0.72 -7.45 0.72 -4.54 23 178 2009-01-15 18:05:59 2007-08-21 17:03:05 4 5 51 0 145 192 0 35.80 43 41.08 CHANGED c-ssNlhtGaKA.............sL+NPpV........SccuKc+AcchLcchss ..................ts.sNVhtGhKA.....................slpNP....sV........S-cAKp+A+chL-pht....... 0 78 101 134 +10179 PF10347 Fmp27_GFWDK RNA pol II promoter Fmp27 protein domain Wood V, Coggill PC anon Pfam-B_5282 (release 21.0) Domain Fmp27_GFWDK is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation [1]. It contains characteristic GFWDK sequence motifs. Some members are associated with domain Fmp27_SW (Pfam:PF10305) towards the N terminus. 20.30 20.30 20.50 20.30 20.00 20.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.01 0.71 -4.25 33 294 2009-01-15 18:05:59 2007-08-21 17:12:47 4 35 229 0 216 303 1 141.10 33 5.80 CHANGED DYPLPllphP....sps.ph.shphpGslVluEphhps.cphRplhVPlsPsst.ptt...sshaulpl.RTlosVKhasDhphclsospsoplsWGtSYQPAlpphMtshDpFoKPslDPSs+lGFWDKlRLlhHG+hphphpp..phclthKGS...........+DPYpl ...............................................DYPhshhpl..................shplhushlhuEph.ps.cuhRphhl.ls.............pshhsltVpR....oh....sPlKhYpDhphclpo...splsWG.suapPuhpphhhsh.................-............t..ho.....K..........P....s....h....D...........PS.................t...luaWDKhR..LhhHG+hphshcp....hplph+uo...........cDPYph................ 0 64 115 183 +10180 PF10348 DUF2427 Domain of unknown function (DUF2427) Wood V, Coggill PC anon Pfam-B_52268 (release 21.0) Domain This is the N-terminal region of a family of proteins conserved in fungi. Several members are annotated as being Ftp1 but this could not be confirmed. The function is not known. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.24 0.72 -4.46 33 258 2012-10-03 10:28:09 2007-08-22 10:51:40 4 7 134 0 196 274 0 100.80 27 18.68 CHANGED shsssoaassspstcusLahHIhlhsluashlaPlullLuhs+.S+aalssphlthslslsGhhhutl.apuppPp...hYssNsHsphuhlLhhhhssQhshulhhpht ...................t...s.s.hh..tp.hcuh..lhhHIhlhsluasllaPl............u.h.l....L.....uhs.+...S.......R.......a....al..P...s..QhlthslshhuhhhG....t....l.....apupp...........hh..ssN.....sHsphuhllhhhhhsQllhGlhhth.......... 0 51 107 162 +10181 PF10349 WWbp WW-domain ligand protein Wood V, Coggill PC anon Pfam-B_5077 (release 21.0) Domain The WWbp domain is characterised by several short PY and PT-like motifs of the PPPPY form. These appear to bind directly to the WW domains of WWP1 and WWP2 and other such diverse proteins as dystrophin and YAP (Yes-associated protein). This is the WW-domain binding protein WWbp via PY and PY_like motifs. The presence of a phosphotyrosine residue in the pWBP-1 peptide abolishes WW domain binding which suggests a potential regulatory role for tyrosine phosphorylation in modulating WW domain-ligand interactions. Given the likelihood that WWP1 and WWP2 function as E3 ubiquitin-protein ligases, it is possible that initial substrate-specific recognition occurs via WW domain-substrate protein interaction followed by ubiquitin transfer and subsequent proteolysis [1]. This domain lies just downstream of the GRAM (Pfam:PF02893) in many members. 21.80 21.80 22.40 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.89 0.71 -3.20 31 266 2009-01-15 18:05:59 2007-08-22 14:06:34 4 9 168 0 161 239 0 107.50 30 39.43 CHANGED PGGGa.Gp...spaKloFppGGAI-Fupthh+lsppsppu..ht.t...............h..shuhst.ss...Pshst......................................Ps.....................s.hssss...............ssssssa.sssssts.hhst...PP.PsYsus.....t .....................sGGGa.Gp....spaKLoFpsGGAIEFuQthhpsssp.uppu..h.t.s....................h.ss...hshs..ss...sshsh......................................................hss.....Ps..................uhs..ss.........................................sssssa.ssss....s..s..s.hs..t...PPPsYsss............................................................................... 0 36 61 113 +10182 PF10350 DUF2428 Putative death-receptor fusion protein (DUF2428) Wood V, Coggill PC anon Pfam-B_6748 (release 21.0) Family This is a family of proteins conserved from plants to humans. The function is not known. Several members have been annotated as being HEAT repeat-containing proteins while others are designated as death-receptor interacting proteins, but neither of these could be confirmed. 21.40 21.40 21.40 22.80 20.70 19.80 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.63 0.70 -5.36 33 333 2009-01-15 18:05:59 2007-08-22 14:57:58 4 9 254 0 238 352 4 254.50 26 17.08 CHANGED hcphhscllshspplhphl+sllsp-o...PEGt..........................................hs-pht.......p.shssphllshuWRul+E..uShLLtsll...............................sshlotsplctlGphhhppLtpl+HRGAFpsVh.sFsshCpphhp.....spstplssLPppWLppslphlpsp...........sph....hTRRSAGLPahlsuILsucsss.......shhppshppLlclAchsst..........tsttt...p...............lPQVHAhNsl+sIFpsscLustsssaltcuLpLulcs.FsSssWu ................................................................................................................................hhpcll.hs.pl.ths.t.sll.sssu.......PEGh.....................................................................................spphp............pttsssphlLs..ssWRuhKE..s..uh..LLs..t.lht.h..h...................................................................t..s.sthl..shpp.l....cpl.Gphhh.ptLhphRH+GAFptsh.uFspl...spthhp.............sp..s.....sp..h.....p.p..L..PppWlpp.hlpt.l.psp..........................ssh..phTRRSAGlPhhltulLsu..-sppt..........sllppsh.ppLlp....lAp..sss......................................tt..p.....................................lPpVH..A..l..NhL+slFpco...pL...u...p..ps.s.a...ls.cu.h.phAlps.hsSshWs..................................... 0 85 128 197 +10183 PF10351 Apt1 Golgi-body localisation protein domain Wood V, Coggill PC anon Pfam-B_6317 (release 21.0) Family This is the C-terminus of a family of proteins conserved from plants to humans. The plant members are localised to the Golgi proteins and appear to regulate membrane trafficking, as they are required for rapid vesicle accumulation at the tip of the pollen tube [1]. The C-terminus probably contains the Golgi localisation signal and it is well-conserved. 20.20 20.20 20.20 20.40 20.00 20.10 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.32 0.70 -5.63 41 368 2009-01-15 18:05:59 2007-08-22 15:48:38 4 35 242 0 260 388 2 417.80 21 19.93 CHANGED llp+sShhh....hYsphsshthp.............ssttphcs...........hcplslphPclhssssSpQYhs..lYsIltsLLhas-Phccph.pc+lc+lhhshDhp..Dlpuhs.phlppLQpcl+pLtplppphph+pphLspps....................................htchhhlph-htpstt-Lhhlhpslpssptc..........pp...sspsshpaplpsccllW+llc-sppPhl.-htLssspapRpcsscGSshNplplphlpshNLh.sAhYspllsPa.p.............s.psscpshlclpWphttslGGIsVh-chclslhPLplpl-cchsc+lhpYlFPsppsp....................cpssspctpppsthshsstsssptstps.........................................................................................pt.t..s.tph.spssppt.pspc.tht...................p--lscMhpRuppahsltpl+lsshhlplSYK.....Gpsptpl.sVpDhhhplPslcYcNpshohhDLhhplKKcll+sllpHsG ............................................................................................llppssh.hhY.phs....p..........................t.....t............t.hcphtl.h.p.l..phtssu..tQY.h..hh.ll.sL.Lhas-..P.cc.h.pc+hp+.lhht.-hp...shtt...p.l.phQptl+....p....h.thh....pp.....hphp..h.l..ppt..............................................................................................................ph..hptp.hthptt.....cL..hhhpshpptphp.....................................tp...ssphhhphphthpphhWc.hhpcp........sp...........h..l..-htlpph.a.........s+hppt-.........sos.p...hlplt.h.hhNL......h......s..ss..hY......p..l..ltPh.s................t.scp.hlclh..hp.......hh.sluG.I.s.lh-.p.FE...lslhPLplpLpcphhcchhpahFPs.t............................................ctppspptppts...h.h..ss.sssp.tt..................................................................................................................................................................................................................s......pt.st....s...ttt..pt..ph...tht............................s--lscMhpRupp.....hphh.lKIspl..lplSYK..................Gptp...........t.....sl.....p-h.hhhPslcapNpTaohhDhhhtlK+chh+sllpps.................................................................................................................................................................. 0 84 144 224 +10185 PF10353 DUF2430 Protein of unknown function (DUF2430) Wood V, Coggill PC anon Pfam-B_67886 (release 21.0) Family This is a family of short, 111 residue, proteins found in S. pombe. The function is not known. 24.60 24.60 24.90 210.00 24.10 24.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.50 0.72 -3.87 2 3 2009-01-15 18:05:59 2007-08-22 16:11:30 4 1 1 0 3 3 0 107.00 75 94.13 CHANGED MLLLhhICClFlKhlLApVsLTFV-YAKLPspYAELLANhhsQpGlMLFsTuDlRItAYNYLlNslTEhNsDTDAYLCQLLTGQYTTDCYIFssss.-tPEshNsSh MLLLFCICClFIKLVLAEVNLTFVDYAKLPPKYAELLANLTDQHGlMLFDTADVRIEAYNYLVNNITEINTDTDAYLCQLLTGQYTTDCYIFDDSVYEGPENINPST 0 3 3 3 +10186 PF10354 DUF2431 Domain of unknown function (DUF2431) Wood V, Coggill PC anon Pfam-B_6967 (release 21.0) Domain This is the N-terminal domain of a family of proteins found from plants to humans. The function is not known. 24.70 24.70 25.70 25.00 24.40 23.80 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.31 0.71 -4.29 47 435 2012-10-10 17:06:42 2007-08-22 16:42:22 4 23 253 0 309 432 13 160.90 28 42.77 CHANGED LlVG-GDFSFuhuLhppht...sssLsATohDot.ppLppKYs.pht..pNlptL...cptGspVlaslDspphtpph...........................hpppp.FDpllFNFP.Hs.........G...tt.tp.....sppplp..............tppcLlhsFFpsupplhpt...................................tGclhlohtsGpP................Y.spWslc.tLAtc..sulplhcphcFphp..saP..GYcp+cs ........................................LllG-GsFSFuhuLh......p.......t.......ht........................st.....p..lhATsh-s..ppl...........htcYs..pst......psl.ptL.....cp......s....s..pl..h.aslDspphtpth..................................................................................ptt...aDpIhFNFP.Hs..................................G............pttlt...............................hpppLl.htFFp.s.stph.l.t.................................................................................tGplhlohhpsps........................a.s.Wpl.....thAtp............suhhl..pt.h.Fphptas....GYpptt.h...................................... 0 87 175 257 +10187 PF10355 Ytp1 Protein of unknown function (Ytp1) Wood V, Coggill PC anon Pfam-B_7247 (release 21.0) Family This is a family of proteins found in fungi. The region appears to contain regions similar to mitochondrial electron transport proteins. The C-terminal domain is hydrophobic and negatively charged. There are consensus sites for both N-linked glycosylation and cAMP-dependent protein kinase phosphorylation [1]. 25.00 25.00 29.90 48.70 21.30 22.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.82 0.70 -5.19 36 273 2009-01-15 18:05:59 2007-08-22 17:18:04 4 5 134 0 199 267 0 266.30 38 48.98 CHANGED lhuaVpl...shGslshhGascGpclhssLAHaIhGulFhhYGll.hu.hhGhhhphGhAW....................................ch...t.................ptthStEFh-ShlIhhaGssNsFhEHhuutss....WotpDLQHsSlullaauuGLsGlhlpppt.........................................................s.thuhNshPullIhhTGhhMSpHpQps.lSThlHt.aGhhLhuuuhhRhlphhhlhhcss..................ss.shsspsshphlssFsLlsGGllFMtSo--hlpshcthGh.sthhhhslshuhshLlhsWhhlLlt.l+sa .....................................h.hualphhhGslshhGas+usclhpsLAHaIhGusFhhYGll.....hshhh.lG.sW...........................................c.............................p.sshStEFh-ShlIhhaGs..sNsFhEH..hhG.ts.....WotpDlQHsohu...llaauuGLsGhhlppcp...........................................................s.t..phN.lPullIhl.TGhhMSuHsQp.hlSThlHs.aGhhLhuuuhsRllphshlhhc.s.......................................ss.shssh...sshp.hlssFsLhuuGllFMsu......T--plphhpphGh.sthhhhhlhhuhshllhhahhlllt.lhs............. 0 50 107 167 +10188 PF10356 DUF2034 Protein of unknown function (DUF2034) Wood V, Coggill P anon Manual Family This protein is expressed in fungi but its function is unknown. 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.38 0.71 -5.12 11 178 2012-10-11 20:44:46 2007-08-23 13:34:52 4 3 128 0 140 242 59 117.60 28 61.05 CHANGED STlapGTLYEhpVtphLpppLtshsLc+sGGutDuGlDlhGpWslssh.............s......s..............................hpsLcVlVQCKuhps.KluP+hlRELtGoasphsst...p..pssTlsIlsSPp.hT+sulphhsphslPllah+lsh.p...................htDG....hphs.hNsstLptha.NshuctlL ......................................ossh.GshaEhhs.t.Ltph....hhpL.+hG..Gt.DtGlDlhG.W.ls......................................................................................t.lpllhQCKs.pt...+h..sPt.lREL..Gsh........th...t..........................t.th....h...hhhhs.p.hT.thh..h.p.t.....hPhh.....hh..h.........................ts..................................................................... 0 38 73 116 +10189 PF10357 Kin17_mid Domain of Kin17 curved DNA-binding protein Wood V, Coggill PC anon Pfam-B_7469 (release 21.0) Family Kin17_mid is the conserved central 169 residue region of a family of Kin17 proteins. Towards the N-terminal end there is a zinc-finger domain, and in human and mouse members there is a RecA-like domain further downstream. The Kin17 protein in humans forms intra-nuclear foci during cell proliferation and is re-distributed in the nucleoplasm during the cell cycle [1]. 25.00 25.00 26.10 26.10 20.80 20.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.78 0.71 -4.49 30 331 2009-01-15 18:05:59 2007-08-30 09:51:48 4 10 284 1 245 323 2 125.20 44 35.57 CHANGED Rphtlhup..sspphl-saSppFppsFlpLL+ppaGpK+lpANplYpEaI.pD+cHlHMN................AT+WpSLTcFl+aLG+pGhC+V--o...-+........GhaIpYIDpssEslp+pcthp++c+t-ps-E-pppchlpc.Qlc .........................RQhhlhup..sspcalcpaSp-FppsFlpLL+ppaGpK+V.psNplYpEYI.u-+cHlHMN................ATpWtoLT-FsKaLGR.pGhC+V-ET-K.....................GhaIpaIDc..sPEslpRppt.hp++c+t-hsDEE+ptchIccQlc................ 0 81 133 200 +10190 PF10358 NT-C2 Eeig1; N-terminal C2 in EEIG1 and EHBP1 proteins Wood V, Coggill PC, Zhang D, Aravind L anon Pfam-B_7857 (release 21.0) Family This version of the C2 domain was initally identified in the vertebrate estrogen early-induced gene 1 (EEIG1) [1], and its Drosophila ortholog required for uptake of dsRNA via the endocytotic machinery to induce RNAi silencing [2]. It is also in C.elegans ortholog Sym-3 (SYnthetic lethal with Mec-3) and the mammalian protein EHBP1 (EH domain Binding Protein-1) that regulates endocytotic recycling and two plant proteins, RPG that regulates Rhizobium-directed polar growth and PMI1 (Plastid Movement Impaired 1) that is essential for intracellular movement of chloroplasts in response to blue light [2]. 25.20 25.20 25.20 25.30 25.00 24.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.67 0.71 -4.70 76 744 2012-10-10 12:23:49 2007-08-30 13:20:29 4 14 252 0 483 711 0 146.10 20 19.43 CHANGED ppcphKaphplplcclpshst......................ssplhlph.+csspt..........................................ttpstph.lppspspap....pphphssplhhs.....tcst......hppKhhpl.lht.....................................tpspp......hlGpsslslupass.......................tpstspphhhpps.....tps.suplplslphp.hpts.pt .................................................................................pt.Khph.hphpcl..s.sh......................sshlhlph.phhstt...................................................................................................phpsshtp......l.t......p.s..plpWp......cp.hphs.splhts..........spst.....................h.csp...hphsltpp..........................................................................................tsu+p....thLGpsslNLucassts........................................................................................tss.shph..hLcsh......ts..suhLplslph.hhpts................................................. 0 138 264 379 +10191 PF10359 Fmp27_WPPW RNA pol II promoter Fmp27 protein domain Wood V, Coggill PC anon Pfam-B_8838 (release 21.0) Domain Fmp27_WPPW is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation [1]. It contains characteristic HQR and WPPW sequence motifs. and is towards the C-terminal in members which contain Fmp27_SW Pfam:PF10305. 23.20 23.20 23.50 24.60 23.10 23.10 hmmbuild -o /dev/null HMM SEED 475 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.55 0.70 -6.08 29 179 2009-01-15 18:05:59 2007-08-30 16:16:13 4 19 134 0 139 186 1 450.00 27 17.85 CHANGED GLKu+h-shhlDLHQR+E.hpphp.....ctLs..+pp+lh+h+hptu-lchpshDlRslsAtFppsshsshhpst.p..pt.......................................asI.DsD...hsWhDhcDFlElchhtspss.P.chcIhPLhaoP+FoYhRcsstpsp.sspst...........................FGsEsoHsChh.tpscPtpsQhcLlc-RlppLccplpphpctltphp...........s.t.p.tphcph.pplppLtc+lchlpshlpchpppp....ptpptssssp..tp.t.t..............................ssstpssusFcNRFhlHNhpLKWNsslRshll+YlHplspR+uhsaahSp+ulchlp-llccppctppssppphpp............p......ppsppss...........................pphh-ph-chLp-spp...............................s.sapsppsYhlclIuPQIQLpS-csPcusVLloA.shcl+llsl.....hDppssssslss.llppRaslhhcsuphFVhp+c-h.sh..thhassssYGsp....tosWPPWLslEls ....................................................................................................GlKs+h-shhlDLHQR+E.hpths.....cths..pp.pcs.ph+hptuplchtssDlRslsuthttsshpphhp..st.t...t......t...s................................phsl.D.pD...hsWhDh-DFlE.lphh...sp.ps.P...chcIhPlhasPRhoYaRpsshs.th..sps..........................................................................FGsEsoHtChh.tpscPtplQhpLlpcRlppLctphpphpctltc.p.............t.t.t...tphc.h.pphp.Lpp+hphlpshlpchptp.....ttptt..t.ss......tp..t.....t..t........t...................................................ssstphtupFcNRFhlHNhpLKWNsslRslll+YhapsspR+uhhaahSpcAl+hl.-llc...cptpsppp.pttttpp........................t.......tp..tpppss...........................psh..-phcplLp-spp..................................................s..chssppsYhlcLIuPQIQLpS.-....cs.c....usllloApshpL+llsl........hDppphs.s.slss.lVppRasl.hcssQhFlhppcph.p........h.hassstYGs.....tstWPPWlshEh........................................ 0 38 79 125 +10192 PF10360 DUF2433 Protein of unknown function (DUF2433) Wood V, Coggill PC anon Pfam-B_83000 (release 21.0) Family This is a conserved 120 residue region of a family of proteins found in fungi. The function is not known. 21.40 21.40 21.70 22.10 21.20 20.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.80 0.71 -3.83 10 111 2009-01-15 18:05:59 2007-08-30 16:29:41 4 7 104 0 85 111 0 122.90 51 18.13 CHANGED YGlSYNEFSVssoh.......EsYRuKLupu+ssFs-lW-sVKsEV-.ssls........pp-sQppLLchuLulh-Kl.Psusssu.shhhshsh.ps.......tsAhh..shWpasLs..DuuaGphlLp.IsDu+luuEhpupGFNhucR ....................YGSSYNEFSVNPoL.......DHYRGKLAASKASFNDVW-TV+uEVE.sAIs.........pN-uQpsLLcNALslV-KM.PosAsGGN..PF.GG.Pssss.su...t.....GplDESAFK..NhWNFNLA..DAAFGaLVL-.IpsGRIuTEMRAQGFNFuHR........................... 0 27 50 73 +10193 PF10361 DUF2434 Protein of unknown function (DUF2434) Wood V, Coggill PC anon Pfam-B_84994 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 25.00 25.00 63.30 47.80 19.70 18.60 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.82 0.70 -5.35 10 63 2009-01-15 18:05:59 2007-08-31 08:48:38 4 3 61 0 51 64 0 270.70 46 54.05 CHANGED YSNGTLSNGopCYLuFspapPph....hpNGTFlNGTSCYuPlpsIGs+uulGlAaAlhFululhlTLlNLRKHG+paLPs-KRaphl..................GRRhpWaWhLFluACGhISsFhoIDVDRsYL.usPllLpSlFapLhhPGhhAAVWEAVRHWGSWQERQllD..+DPaAFscsupRp+tEhlLPllFYlFshlNFFLsVPRSWouIEhQRs.-QphppApPsATDsRFKAAuFltluuhLllsYSLcHSIY+Y+s+spushsp.llFalptsPspFllsl.....sLsulplGYulAuAFsaslSP...L+hs ......YSNGTLSNGS..pCaLsFp.YpPth....hsNGTFlNuToCYsPlpslusRuulGluFAshFulslhhoLlsLpKHG+haLPh-KRah.l..................GRRWQWYWhlFlsAsuhlShFhslDVDR.YL.thsllLpShFaaLhh.uhlAhVWEuVRHWGSWQERQhhD...Dsa..shp.pcspRp+.EhaLPLlFYlFhaLNFFlslPRSWstlphQRo.-QphshAtPsATDsRFKuuuhhhhsshllIsaSLtHSIh+Y+s+spuhhsp.hhhh.lphsPh+Fhl..sI.....sLsulhluathhsuFpashSP...l+h........................................ 0 8 22 39 +10195 PF10363 DUF2435 DUF2435; Pmp3; Protein of unknown function (DUF2435) Wood V, Coggill PC anon Pfam-B_7476 (release 21.0) Domain This is a conserved region of approximately 400 residues which is found only in eukaryotes. It is associated with HEAT domains Pfam:PF02985 in all members. The function is not known. 23.00 23.00 23.00 23.30 22.90 21.80 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.13 0.72 -4.09 40 249 2011-09-19 22:45:39 2007-08-31 11:17:15 4 8 223 0 180 258 0 93.40 27 9.87 CHANGED ssppthccAlpsLsDPLlPlRA+GLhhLppLlppcS...............slhslstlLslaLppLcDp.......DsFlYLNuI+GLusLs-hpsc.plLppLsphYsspspp ............................................s..pthpcslppl...p..D..s...sP.lRAtGLphLppLlcpcs............................slhp.h.ptlL.p.lhL.p.tLpcp.......DsalYLsuIps..l....ssLushh.Pc.plltpLhcpYhstp..p...................... 0 57 93 146 +10196 PF10364 NKWYS Putative capsular polysaccharide synthesis protein Wood V, Coggill PC anon Pfam-B_99492 (release 21.0) Family Found only in Vibrio species, pombe and one other fungi, this is a the N-terminal 150 residues of a family of proteins of unknown function.\ There is a characteristic NKWYS sequence motif. 21.40 21.40 21.90 31.50 19.20 20.80 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.85 0.71 -4.44 6 61 2009-01-15 18:05:59 2007-08-31 15:01:44 4 5 39 0 17 63 6 135.80 35 42.11 CHANGED s-thhpDspshchERsphLhE.sF-cahNppYshpWFDNEIKplhGIDVaspsFspssGaQTapNcphSllVI+s-KLsp.tspslu-FLs.pshslVp-NpucNKWYSslhp-FKsoYp.ss.Fl-cMhsS+Lo+HFaops ......................h-thhpDs.thc.-hs..h.E.sF-p..ahNhpY.hsWFDNElKpshGIDVhppsFDpspGa.s..apscphslllI+sEpLsp.hspslu-F..Lsl.p...shslp.p.sNpucNKWYSs..lapcFKppaphsptahppMYsp.Lh+HaYop...... 0 4 9 13 +10197 PF10365 DUF2436 Domain of unknown function (DUF2436) Mistry J, Wood V anon Pfam-B_5683 (Release 22.0) Family This domain is found on peptidase C25 proteins and has no known function. 25.00 25.00 25.40 33.50 21.20 20.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.98 0.71 -4.61 3 36 2009-01-15 18:05:59 2007-09-05 10:47:55 4 9 9 0 3 38 1 156.60 60 14.28 CHANGED ARpVcGIu-ulhVolEDAs-.lRoGpAcIVLsAcsVWsDuSGYQFLLDADHDTYGsVIP.DTGPLasNsoVPSNLY.AsFEYpIPuNADPusoTQNhIssG.oAcVsIPuGTYDYsITNPpP.suKlWIAGsGGspPARhDDYVFEAGKKYTFTM+KsGSGDGT-L ................uRpVctIt-hlhlolcsAs-.lRA.spA+lVLtAcsVWGDsTGYQhLLDADHNpaGuVIP.sTGsLas.s.ossusLY.AsFEYhlPuNADPssTspNhIlsG.pupVsIPuGsYDYsIsNPpP.suKhWIAGDG.stPuRhDDasFEAGKKYpFTM++sG.GDGT-h.......... 0 3 3 3 +10198 PF10366 Vps39_1 Vacuolar sorting protein 39 domain 1 Mistry J, Wood V anon Manual Domain This domain is found on the vacuolar sorting protein Vps39 which is a component of the C-Vps complex [2]. Vps39 is thought to be required for the fusion of endosomes and other types of transport intermediates with the vacuole [3]. In Saccharomyces cerevisiae, Vps39 has been shown to stimulate nucleotide exchange [1]. The precise function of this domain has not been characterised. 21.40 21.40 21.40 21.50 21.30 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.46 0.72 -4.03 23 369 2012-10-11 20:01:01 2007-09-05 13:18:27 4 21 253 0 253 369 0 105.10 34 11.14 CHANGED lDTsLh+sYhhs.pssLlusLLRl..NaC-hchscphL.............ccpppap-Ll-hYas+phHccALcLLpcluc.............ttss.hpshppt.......lpYLppLssspl-Llhc.....aucWlL ....................lDTsLh+sYhts...ss......s.........l.lusLlR..........lt..NaC..clccs....pc.hL.............ccpp.+as-Ll.LYp..tKshHccALplLh.chup..........................pts.sshpshpcs.......................l...pYLppL.s..s..ppl....cLIhcautWlL............................................. 0 75 128 200 +10199 PF10367 Vps39_2 Vacuolar sorting protein 39 domain 2 Mistry J, Wood V anon Manual Domain This domain is found on the vacuolar sorting protein Vps39 which is a component of the C-Vps complex [2]. Vps39 is thought to be required for the fusion of endosomes and other types of transport intermediates with the vacuole [3]. In Saccharomyces cerevisiae, Vps39 has been shown to stimulate nucleotide exchange [1]. This domain is involved in localisation and in mediating the interactions of Vps39 with Vps11 [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.39 0.72 -3.88 24 612 2009-09-13 14:58:24 2007-09-05 13:18:58 4 31 283 0 445 603 4 104.40 22 11.03 CHANGED LclLspHus+lsshpsLpL.............LPsshslpp......lpsalppslRppsppp+psplhpsLhpucplpspcphh..............ptcspplhls-pph.CslCcK+lGs.SsFshaPsu.slVHatCtcc .........................plLppaus..pl.cst.plLpl.............l..Ps..shslpp...............lps...aLtpsl.c..p.ts.p..ptc..p..splhps....Lt.p.u..c..t..l..p..lp.tc..hh.........................pt.pp.pthhl.sp.pph.C.hCpc......tl......s...........s......s.....s.........hsh.a..Ps.s...hhh.Hh.C...p........................................ 0 180 269 381 +10200 PF10368 YkyA Putative cell-wall binding lipoprotein Coggill PC anon Gene3D, pdb_2ap3 Family YkyA is a family of proteins containing a lipoprotein signal and a hydrolase domain. It is similar to cell wall binding proteins and might also be recognisable by a host immune defence system. It is thus likely to belong to pathways important for pathogenicity [1]. 22.00 22.00 22.10 22.00 21.90 21.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.32 0.71 -4.98 17 458 2009-09-13 15:40:04 2007-09-05 15:38:52 4 3 358 1 37 198 2 195.40 42 93.56 CHANGED shhuhhlLsGC.hspcst-plasthEpAscpE+.shppptcpLppLEcccpcLYppllp.sh-c.cplhphs-pAlpsspcREchlppEK-ulccupcEhcsscphh-cI-Dcch+cpAcplscshccRYcuapphhcsYpculph-K-LYphLpccchshcpls-plcslNpsYcclpctpccFNchTccYNctK.sFYctutlc .................................................................................s.hhSssLLAGC..hscK.....csh...h...pth-phtcpEc.slhssuKKlpcL-cctpcLappl.....spc..cstshtKtlcphlcNsD-RtK.hcKEc-ulcKupp-hKpAcsal-pI-sc....th....+Kp....scpl-cshKc+YchaschscuYpKAl....spEKpLachLppp..-sp.pslsEKsKslspsYKclpc.p-caschhpchscEK.sh.p..................................... 0 7 19 28 +10201 PF10369 ALS_ss_C Small subunit of acetolactate synthase Coggill PC anon Gene3D, pdb_2fgc Family ALS_ss_C is the C-terminal half of a family of proteins which are the small subunits of acetolactate synthase. Acetolactate synthase is a tetrameric enzyme, containing probably two large and two small subunits, which catalyses the first step in branched-chain amino acid biosynthesis. This reaction is sensitive to certain herbicides [1]. 22.60 22.60 23.00 24.00 21.80 22.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.13 0.72 -4.25 160 3802 2009-01-15 18:05:59 2007-09-06 10:36:43 4 13 3503 7 996 2241 1813 73.70 39 42.97 CHANGED cRELhLlKVpu.ss..psRsElhplsclFRA+lVDVstco.hslElTGsssKlcAhlcllcs.aGIhElsRTGhlAls.RG ..........pRElhLlKVpA..su.....psRs....Elhchs.clF....Ru..pllDVoss..s.hslp.l.........oGsscKl-Ahlphlcs..hu.I.hElsRoGssulsRG........ 0 309 658 854 +10202 PF10370 DUF2437 Domain of unknown function (DUF2437) Coggill PC anon Gene3D Domain This is the N-terminal 50 amino acids of a group of bacterial proteins annotated as fumarylacetoacetate hydrolase-containing enzymes. In most cases members are associated with FAA_hydrolase Pfam:PF01557 further towards the C-terminus. 21.30 21.30 21.30 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.24 0.72 -3.58 44 659 2009-01-15 18:05:59 2007-09-06 11:59:18 4 1 623 6 229 541 241 53.70 35 20.45 CHANGED M.+lsRF....sssss.spaGhlE.G-s........lt..lsGsP.F.sshp.TGpphsLu-..V+LLAP ............M.RluRF......stsss..ss...aGhl-..sss................lt..lsGcP..a..u.......ss..ph..T.......G..pp.asLs-..VRLLuP............. 0 74 162 205 +10203 PF10371 EKR Domain of unknown function Coggill PC anon Gene3D, pdb_2c42 Domain EKR is a short, 33 residue, domain found in bacterial and some lower eukaryotic species which lies between a POR (pyruvate ferredoxin/flavodoxin oxidoreductase) Pfam:PF01558 and the 4Fe-4S binding domain Fer4 Pfam:PF00037. It contains a characteristic EKR sequence motif. The exact function of this domain is not known. 19.40 19.40 19.40 20.30 18.60 18.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.90 0.72 -4.40 113 1931 2009-01-15 18:05:59 2007-09-06 16:54:57 4 33 1656 20 330 1395 43 62.40 40 5.31 CHANGED ss....ttp..tts........hst..ps.........P.........cFVpplhtshhutcGDpLPVSuhs....tDGTaPsGTupaEKRsIAl ..........................................ssps..........hhss...ss.....P..........-FV+slstsh.AtpGDsLPVSAhs....sDGTaPhGTopaEKRsIA...... 1 159 261 304 +10204 PF10372 YojJ Bacterial membrane-spanning protein N-terminus Coggill PC anon Gene3D, pdb_2fb5 Family YojJ is the N-terminus of a family of bacterial proteins some of which are associated with DUF147 Pfam:PF02457 towards the C-terminus. It is a putative membrane-spanning protein. 20.10 20.10 20.20 23.20 19.80 18.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.31 0.72 -4.11 6 127 2009-09-11 10:46:57 2007-09-06 17:03:14 4 1 127 3 15 76 0 68.40 58 33.78 CHANGED McphphSEtpaKsphppalcpIpt-huhlhpTlDEcDpClLC-hE-LpHhhs-hQslASSaYLQoYlp.F ..........MpEWGLSE.ELKIQTKQMIElAE+ELS....lMRpAID..KEDECILCKMEDIHHhLuNVQTLAATYYIQAYLSPY 0 5 9 10 +10205 PF10373 EST1_DNA_bind Est1 DNA/RNA binding domain Mistry J, Wood V anon Pfam-B_24280 (release 22.0) Domain Est1 is a protein which recruits or activates telomerase at the site of polymerisation [1][2]. This is the DNA/RNA binding domain of EST1 [3][4]. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.67 0.70 -5.19 69 696 2012-10-11 20:01:01 2007-09-07 14:14:08 4 18 255 2 489 771 76 252.20 17 29.44 CHANGED AhpYYphAtplhPssGpsaNQLulluhh.s...ssc.......................hpAl.YaahRSl.hsppPh.ssApsNLhthacc..........................................................................htphhptp.ht..........pttphhpphhhhFlpLhuhhaps..........hshpphspltppl..hpplphhLpp.........................................thhstphll+hlslslhshchhpp..................................t.......s...hphhhthhthlhpphsphh.............................pptsssspphLssl+l..hhsW..lttpsshhp...............ptpptphtt...hhthhsphlshhpth.....................h.sppts...pshhL.EDhp.......hpuFtPlttsh ...................................................................................AtpaYhpAhplhPps.Gp.sasQLAlLuh..p.........tpp......................................lpsl.a.aYhR....ul.ss.p..P....a...sAppsL.phhpc........................................................................................................t.p...............ptt..hpp..hhh..Fl.thpuhl.aht.........................shp.....p..h..t.t..htpph...............hpph.ph..hl.p.............................................h.st..h....hphhh....ls.hhshpthp..............................................................................p.....hthhht.hhthlhpth......................................................................................................t.......t..Ls.lph.....ta..hh............t...h..........................................................................h...h...hh............................................h.h.E-.........h.th.......h................................................................................................................................................................................................................... 0 147 237 365 +10206 PF10374 EST1 Telomerase activating protein Est1 Mistry J, Wood V anon Pfam-B_39673 (release 22.0) Domain Est1 is a protein which recruits or activates telomerase at the site of polymerisation [1][2]. 21.70 21.70 21.90 21.70 20.90 21.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.52 0.71 -4.16 27 478 2009-01-15 18:05:59 2007-09-07 15:36:39 4 10 230 2 319 468 0 120.60 24 12.61 CHANGED ttpclcphLWcphaYplhpha+ph..................tpp.ph..phcplppth.palcpuhtFYpsllppltspYp.ls.hphh.......................................................................t....t.pth.thssph.phslhssaRsLlhLGDlpRY+s.htpss ...............................................h..pplEphLWppsahphIpt.h+t...............................................tpppps..sptp.clp..st..hppa.LctupsF....Ypp..Llpclps.pap..lpl.phh.................................................................................................................t......t.tt.p.t.hs.s.hsp.tt.hp.h...s.hh.ssp+CLlpLGDluRY+ppht...s............................................................ 0 90 150 233 +10207 PF10375 GRAB GRIP-related Arf-binding domain Wood V, Coggill PC anon Wood V Domain The GRAB (GRIP-related Arf-binding) domain is towards the C-terminus of Rud3 type proteins. This domain is related to the GRIP domain, but the conserved tyrosine residue found at position 4 in all GRIP domains is replaced by a leucine residue. The Arf small GTPase is localised to the cis-Golgi where it recruits proteins via their GRAB domain, as part of the transport of cargo from the endoplasmic reticulum to the plasma membrane [1]. 19.70 19.70 20.90 20.80 19.60 18.30 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.23 0.73 -6.26 0.73 -4.00 14 129 2009-01-15 18:05:59 2007-09-07 16:52:08 4 3 124 0 96 131 0 19.00 51 3.40 CHANGED psccsVD+clloNlllsFL ...ps--sVD.R...cLVTNhlLpFL 0 28 56 83 +10208 PF10376 Mei5 Double-strand recombination repair protein Wood V, Coggill P anon Wood V Family Mei5 is one of a pair of meiosis-specific proteins which facilitate the loading of Dmc1 on to Rad51 on DNA at double-strand breaks during recombination. Recombination is carried out by a large protein complex based around the two RecA homologues, Rad51 and Dmc1. This complex may play both a catalytic and a structural role in the interaction between homologous chromosomes during meiosis. Mei5 is seen to contain a coiled-coli region. 21.10 21.10 21.60 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.41 0.70 -4.82 8 105 2009-01-15 18:05:59 2007-09-07 17:46:31 4 3 85 0 69 109 0 182.80 26 84.00 CHANGED ssslssppssh.h.pt.ssss.pstps+pPhusoL+E+L..+csRhpppshss.lKcl+l-spcscpshst.sts.............cph-SEspppps...h.c.+slCp.-ppphpsuosstls+..uhpchh+ccLcpcKt+Lp+Qlc-..........cpDpLRRLclV+hhchKNp.pcLppLI+KW++suQptLpcLpuhlu-..............pEs-..........chTLoELlspaGlD.sLlaaN ..............................................................................h................................h.tshtp.h..........pcht.p.pp...hp.hp.hp.cppp..p.t...........................hh.st..pt.p..p.h..p.hshs....p.p..h.ssshs..t.hp.p.h.s.pch.cpplpp.-Kt.......+L....cQlp-...........cc-hLR+LchVcha+.tKsp.ppLptLIcKWRsssQ.hLh...-Lp.pth.sp..........................................................-sc.................chohspLlcphslD.pLlaa.................................... 0 21 32 49 +10209 PF10377 ATG11 Autophagy-related protein 11 Mistry J, Wood V anon Pfam-B_21462 (release 21.0) Family The function of this family is conflicting. In the fission yeast, Schizosaccharomyces pombe, this protein has been shown to interact with the telomere cap complex [1,2]. However, in budding yeast, Saccharomyces cerevisiae, this protein is called ATG11 and is shown to be involved in autophagy [3]. 21.40 21.40 21.50 23.60 20.30 21.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.94 0.71 -4.25 28 284 2010-01-13 17:17:44 2007-09-10 14:03:15 4 6 233 0 202 291 0 141.20 27 11.99 CHANGED apsulh+RhcDlEph.....A+KlpK-s+stcpchpphtpc...KIuhcsFchGDLsLFLPTRpp..................................................................tt.ps................WAAFNl..usP+YFL+spstt.................................plcs................+-WhlGRlsclEcpsV.......................................s.cssNPFcLucGspWYhV-App. .........................................................................................cph.p.+p.hpt.pp..p....hp.phpp-...........KIuh+sFphGDLsLFl.scpps....................................................................................................................................asAFsl....usspYFL+ppsht......................................................plps.............+sWll.u+lhchEpphsc.........................................cspN.acLshGs+aYhlcss..................... 0 59 107 164 +10210 PF10378 RRM RMM; Putative RRM domain Griffiths-Jones S, Coggill PC anon Griffiths-Jones S, Domain This is a putative RRM, RNA-binding, domain found only in fungi. It occurs in proteins annotated as Nrd1 yeast proteins, which are known to carry RRM domains. It is not homologous with any of the other RRM domains, eg RRM_1 Pfam:PF00076. 19.30 19.30 19.60 22.50 18.90 18.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.66 0.72 -4.38 7 104 2009-01-15 18:05:59 2007-09-10 15:38:39 4 10 104 0 79 99 0 54.50 47 6.11 CHANGED .hP....hP.TPFDhsYGtSLLPSpLLhGSPalssPtpsssh.usphusthshstpphp .........t.........PsT.s.FDMsa.sPLLPSQLLlGSPF..QPGoPuuF.sSPQFpshtshtptp..t.......................... 2 12 38 65 +10211 PF10379 nec1 Virulence protein nec1 Mistry J, Morningstar A anon Pfam-B_11405 (release 21.0) Family This is a family of virulence proteins that are found in pathogenic Streptomyces species. 20.00 20.00 20.60 24.50 19.30 16.50 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.28 0.71 -5.01 2 20 2009-01-15 18:05:59 2007-09-12 16:07:43 4 1 14 0 1 7 0 165.00 91 84.55 CHANGED hNLKIRTLGDLMRTSGVTPKTQSSSPKRRVLTSLATILAASGVVVANPSAAFANSTFTAVGYCDTNYQCAGGSGSSSRWSVNFDDGPTVSTIDLHELYRDQSDTMSSFRILGSVMSRANHPNETVTIHQQFYRDNGGQVPLGEYETRFRASSSNNAQRFNFDQGIPNLPWNDQVSSVAIWITRK .........VNLKIRTLGDLMRTSGVTPKTQSSSPKRRVLTSLATILAASGVVVANPSAAFANSTFTAVGYCDT.NYQCAGGSGSSSRWSVNFDDGPTVSTIDLHELYRDQSDTMSSFRILGSVMSRANHPNETVTIHQQFYRDNGGQVPLGEYETRFRASSSNNAQRFNFDQGIPNLPWNDQVSSVAIWITRK............ 0 0 1 1 +10212 PF10380 CRF1 Transcription factor CRF1 Mistry J, Wood V anon Pfam-B_25525 (release 21.0) Family CRF1 is a transcription factor that co-represses ribosomal genes with FHL1 via the TOR signalling pathway and protein kinase A [1]. 25.00 25.00 32.80 32.80 21.80 21.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.81 0.71 -3.78 13 51 2009-01-15 18:05:59 2007-09-13 14:50:17 4 1 43 0 32 51 0 121.50 44 16.05 CHANGED sESTDE...............D-oLP...................ssss+s+h..hupKAKEVLSS.............................................................pssshRPPhLGTW.phDsKPFuIIDGLST+SL..................hs..ppppp.p-............p..........................ptst.sss.pppssssus-sssLs.....................LsELL.............................................Nh.............SELDD- ....sESTDE...............D-sLP...................sPss+pKp..hupKA+El..lSS.............................................................pslG.l+PPKLGoW..ph-s.KPFoIIDGLSTKSL........................ash.pptpp.pp................................................pppspss...pppppssuspsspLs.....................LsELL................................................Nh..........SEL-s..................................................................................... 0 6 18 30 +10213 PF10381 Autophagy_Cterm Autophagocytosis associated protein C-terminal Finn RD, Wood V, Coggill PC anon Pfam-B_10019 (release 7.3) Domain Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the vacuole. The small C-terminal domain is likely to be a distinct binding region for the stability of the autophagosome complex [2]. It carries a highly characteristic conserved FLKF sequence motif. 20.10 20.10 21.30 22.70 15.80 19.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.65 0.72 -6.81 0.72 -4.82 20 317 2009-01-15 18:05:59 2007-09-18 14:32:30 4 5 286 1 226 312 3 24.90 65 7.80 CHANGED Vc.YLhlFLKFlsSVlPTIEaDhTh ...Vc.YLllFLKFlsSVlPTIEYDaTh.. 0 80 125 186 +10214 PF10382 DUF2439 Protein of unknown function (DUF2439) Mistry J, Wood V anon Pfam-B_19050 (release 22.0) Family Proteins in this family have been implicated in telomere maintenance in Saccharomyces cerevisiae [1] and in meiotic chromosome segregation in Schizosaccharomyces pombe [2] 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.89 0.72 -3.77 22 223 2009-01-15 18:05:59 2007-09-19 16:23:01 4 16 177 0 160 201 0 81.80 29 12.84 CHANGED VtE.apCLYTsplppKpKpWpDGhL+aa..phNs+l.Lasp.ssshlsstahppp.......p.hs.spEhcl.p.thLlpl...s-hhpphpp-ls ............c.apsLYT.cph.p+.Kp.KpW.p.DGhL+hp....phsp+hhLY..D-...pushlsshahpst..............................ttlpsGc-h.chpp..hLlpl...p-hht...................................................................... 0 43 78 124 +10215 PF10383 Clr2 Transcription-silencing protein Clr2 Wood V, Coggill P anon Wood V Family Clr2 is a chromatin silencing protein, one of a quartet of proteins forming the core of SHREC, a multienzyme effector complex that mediates hetero-chromatic transcriptional gene silencing in fission yeast. Clr2 does not have any obvious well-conserved domains but, along with the other core proteins, binds to the histone deacetylase Clr3, and on its own might also have a role in chromatin organisation at the cnt domain, the site of kinetochore assembly. 25.00 25.00 26.10 27.80 24.30 24.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.93 0.71 -4.18 13 79 2009-01-15 18:05:59 2007-09-19 16:32:14 4 4 73 0 63 83 0 132.90 27 19.30 CHANGED lappGIFlGAEhLhVGDsVRL...........tsh.ht.t...psstssssDVMVI-....EIplclhpsssshpu.......pVRlsGclYTsscppA.h..p.sshs..sspP........hsh-EVhsphp.hlGMuuhucW.............apLhs.s........sslclopshVlGRhYE ....ahpGlFlGAEhlhlGDsVRL.............hsht..p.s...............ssts..pss-lhlIc....pIphchhs.sssshts..........slplhGclYshs.ppu.tt....s.....spP..................hh.cElhpphp.hl.....shtthupW............................athht.t.............psspls.splhGRhY...................................................... 0 14 32 50 +10216 PF10384 Scm3 Centromere protein Scm3 Mistry J, Wood V anon Pfam-B_19394 (release 21.0) Family Scm3 is a centromere protein that has been shown in Saccharomyces cerevisiae to be required for G2/M progression and Cse4 localisation [1]. The C terminal region of Scm3 proteins is variable in size and sometimes consists of DNA binding motifs [2]. 24.30 24.30 24.30 25.90 23.60 24.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.78 0.72 -4.48 35 154 2009-09-14 14:05:34 2007-09-19 17:11:47 4 9 141 3 104 165 0 57.60 34 9.22 CHANGED pcltpt+ppucp+hKstacsIh-KYup.-.cs.uDEIDL...pTGc...IVhDNGHL+slpscp ............cltpt+ptucp+h+stappIh-KYsp....-s.u.DEIDL...pTGp...IlhDNGHlcphpsp.t.............. 0 25 51 80 +10217 PF10385 RNA_pol_Rpb2_45 RNA polymerase beta subunit external 1 domain Coggill PC anon Gene3D, pdb_2a6h Domain RNA polymerases catalyse the DNA-dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared with three in eukaryotes (not including mitochondrial or chloroplast polymerases). This domain in prokaryotes spans the gap between domains 4 and 5 of the yeast protein. It is also known as the external 1 region of the polymerase and is bound in association with the external 2 region [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.04 0.72 -4.17 149 8806 2009-09-11 00:06:38 2007-09-21 14:13:17 4 34 7040 41 964 6923 2365 66.20 46 8.12 CHANGED YR+V.....ps.............GpV...oc-lhYLoA.-EcpahIAQAss..sl..........-ccG.phh....s.-hVhsR.hpu..-h...hhssspcV-ahDVS .....................YR+V......ts........................GhV....TDE.l..pYLoA.EEs.palIAQANu..sL..........D.-.c.G....pFs..........-.-hVssR..ppG.......Es................thhs...-cVDYMDVS........... 1 338 646 822 +10218 PF10386 DUF2441 Protein of unknown function (DUF2441) Finn RD, Coggill PC anon Gene3D, pdb_2aua Family This is a family of highly conserved, predicted, proteins from Bacillus species. The structure forms a homo-dimer. The function is unknown. 25.00 25.00 25.00 26.60 24.70 24.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.70 0.71 -4.15 2 90 2009-01-15 18:05:59 2007-09-26 11:48:52 4 1 90 2 4 56 0 137.10 76 70.61 CHANGED NTLaHFFFEREpLNusGcDuhQILpcHYpNpELHIpNENApVVMsYMDQTIRAhRETIVEMlRLQEaPpYPSRLSCLYAAKSYEDALKWKALFDSYNREVLQIVKLpVIGsSFEGDGNLLPKEDuIPFSQKhEQAREYWKG .....................................NTLYHFFFE+EQLNusGEDuhpIlppHYKN-ELHINNENApVVMNYMDQTIRAlRETIVEMVRLQEaP-YPSRLSCLYAAKSYEDALKWKALFDSYNREVLQIVKLRVIGsS.FEGDGN.LLPKEDGI...PFSQKhE..QAREYWKG.... 0 2 3 3 +10219 PF10387 DUF2442 Protein of unknown function (DUF2442) Finn RD, Coggill PC, Bateman A anon Gene3D, pdb_2auw & Pfam-B_2245 (release 23.0) Domain This family of bacterial and fungal proteins has several members annotated as being putative molybdopterin-guanine dinucleotide biosynthesis protein A; however this could not be verified. Hence the function is not known. This family also includes the DUF3532 that was found to be related and was merged into this family. Members of this family also fall into the NE0471 N-terminal domain-like superfamily, a family of proteins with a unique fold in SCOP:143880. 21.10 21.10 21.20 21.40 21.00 20.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.66 0.72 -3.96 153 848 2009-01-15 18:05:59 2007-09-26 12:29:25 4 11 504 5 290 765 94 71.90 20 66.29 CHANGED shsVphtss.....plhlphsD....Gcphtlsls.h...hppushppl..........pphplt.tstulpWs........shD.DluscuLhtstts .................hpVphhpsh.plhlphs-....Gpphhhshp.hh......ppsshppL.........h...ppspl...p.st..slpWs..............s.s.hDlsspsLht....t................. 0 92 202 258 +10220 PF10388 YkuI_C EAL-domain associated signalling protein domain Finn RD, Coggill PC anon Gene3D, pdb_2bas Domain In Bacillus species this highly conserved region of the YkuI protein lies immediately downstream of the EAL (diguanylate cyclase/phosphodiesterase domain 2) Pfam:PF00563 domain so that together they form a monomer which dimerises for its enzymatic action. The region contains three alpha helices and five beta strands and is the C-terminal half of the structure. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.13 0.71 -4.57 14 277 2012-10-01 23:40:40 2007-09-26 14:17:17 4 6 265 6 64 194 4 149.80 31 33.45 CHANGED hl-cshhK-+Lpp-hcpFIpaE+KKLps.aphsEphppclpph.lsclK+spp.s.Ehlhpluptls-hsFRlYlCDc-GaQhosNhhK.psGpWhlps-ahtKNWSWRPYFLENIh+Mppcp+G..llSDLYoDIETGEhIRTaSYPls-phYLFlDlsYpaLaEp-uLh ......................................................................................hptaht.chpphtt.hph.tp.h.ppltph.lt....p.cpst.p......s..p....hl.....t.....h.sp.t.l..s......ps..s..h..R..IahssccGhQpoGNs..h+.....p......s.....upW.....hlp....s..paht+NWSWRPY..FlcsIh..c.h.+..pp+s..hlS-hYtDlpTsch....hhThS..hs...l...s..s.p.hLhl...Dl.............................................. 0 24 43 52 +10221 PF10389 CoatB Bacteriophage coat protein B Finn RD, Coggill PC anon Gene3D pdb_2ifo Family CoatB is a single filamentous bacteriophage alpha helix of approximately 44 residues. It is likely to assemble into a complex of 35 monomers in a Catherine-wheel like formation [1]. It is the major coat protein of the virion. 24.80 24.80 25.50 25.90 24.10 24.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.13 0.72 -4.10 6 39 2012-10-01 20:22:05 2007-09-26 17:16:45 4 1 34 1 11 37 4 46.50 34 62.44 CHANGED AuuuGlDVu-VssAIpuAtusIuoIGuuVLsVlVul+VaKWlRRuh .............usutuhDlssVsoulsuAtssluslGuAVLslhluItlaKalR+Ah.. 0 1 4 9 +10222 PF10390 ELL RNA polymerase II elongation factor ELL Wood V, Coggill P anon Wood V Family ELL is a family of RNA polymerase II elongation factors. It is bound stably to elongation-associated factors 1 and 2, EAFs, and together these act as a strong regulator of transcription activity. by direct interaction with Pol II. ELL binds to pol II on its own but the affinity is greatly increased by the cooperation of EAF [1]. Some members carry an Occludin domain Pfam:PF07303 just downstream. There is no S. cerevisiae member. 21.60 21.60 23.80 23.80 21.50 19.90 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.86 0.70 -5.18 19 307 2009-01-15 18:05:59 2007-09-27 14:12:00 4 7 103 2 164 239 0 244.00 31 44.81 CHANGED pEstsYGLshuphsss..pholhaVKLT-oAl+AlppaQp.....p...sh+PsIpF.pGspG.................................................................hlpIPpsc.sss...................hppFsFhlSslup-sspG.SaDClpQhhtpsGsspLpsLGsIp-KlslpAosDSYpt....oRpphspsEE-p+s+soh.IKsssp.su++Vph..+ps...................................................................................ssshssssP...ppcpopPh...............................................................................p.sssh+pts............ssssppRshR-RllHLLAL+PY+KsEllhRLp+DGlspp-+psLsslLppVusls.p-ssasL+chhap-lQ.cDWPsYoEs-+phlcphLs+pLs .........................................................................................................................................................................................................................................................................................................................p.......................phh.l+Ls-sshpshpthpt...............ps.ItF..pG.pG.................................................................hlplPtss....sp...............................p.FsFhlSsh.sp-tspG.oh-....s..lpQh....stpGttpLpsLGslp-+l.slpA..s.sDSYph.......s+pphspsEE-.p+..spss...h..IK..sss...h....s.c.psph...pp.s..........................................................................................ths..ssss...ppppspPh.......................................................................................................................................p.t.thpptt..............................sslppRshR.-RllHLLAL+sa..+K.s.ELlh.R..L.p+.-G....l..s.p..-+stlss...hLppV.uphs...t....ct........oa...sL+c.hap-lp.cDWPhYo.-.-pp.lcphh.p...t......................................................................................................................................................................... 0 38 49 91 +10223 PF10391 DNA_pol_lambd_f Fingers domain of DNA polymerase lambda Finn RD, Coggill PC anon Gene3D, pdb_2bcq Domain DNA polymerases catalyse the addition of dNMPs onto the 3-prime ends of DNA chains. There is a general polymerase fold consisting of three subdomains that have been likened to the fingers, palm, and thumb of a right hand. DNA_pol_lambd_f is the central three-helical region of DNA polymerase lambda referred to as the F and G helices of the fingers domain. Contacts with DNA involve this conserved helix-hairpin-helix motif in the fingers region which interacts with the primer strand. This motif is common to several DNA binding proteins and confers a sequence-independent interaction with the DNA backbone [1]. 29.80 29.80 29.80 29.80 29.70 29.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.32 0.72 -4.41 58 544 2012-10-03 02:11:09 2007-09-27 16:59:55 4 29 240 212 323 547 116 52.00 38 9.68 CHANGED lphFsslaGlGspsApcaap.pGhRTL-DL...cp.pt.pLoptQplGlcaY-Dhpp ......hphFsslaGlG...spoApcaap..pGhRoL-...Dl.............+p....ps..pL..opp..QplGlcaY-Dht........ 0 92 151 228 +10224 PF10392 COG5 Golgi transport complex subunit 5 Wood V, Coggill PC anon Pfam-B_24958 (release 21.0) Family The COG complex, the peripheral membrane oligomeric protein complex involved in intra-Golgi protein trafficking, consists of eight subunits arranged in two lobes bridged by Cog1. Cog5 is in the smaller, B lobe, bound in with Cog6-8, and is itself bound to Cog1 as well as, strongly, to Cog7. 29.00 29.00 29.10 29.30 28.90 28.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.38 0.71 -4.19 16 280 2012-10-03 17:31:52 2007-10-01 16:23:45 4 6 245 0 199 280 3 126.10 26 20.85 CHANGED -sFL-ssFsspsFuNslLh.............tspstssssLDlsoslc+lpaDlpElDp+lcphhssst.pLLsphsshpptpu.llsplcsslptLstSacRLcscVlpPaccAtplpssLp+lapTscLLRsshhaltLuppL ........................sFL.sssFssppassphlt.........................................................thslus.Ls+Lthslppl-pclcp.lsspttsL.Lspssshpphps.llptlpsplpsLptuhcRLcpcll-PapphpphpttLp+lptsscLLRpshRhLtLu++L......... 0 63 110 164 +10225 PF10393 Matrilin_ccoil Trimeric coiled-coil oligomerisation domain of matrilin Finn RD, Coggill PC anon Gene3D, pdb_1aq5 Domain This short domain is a coiled coil structure and has a single cysteine residue at the start which is likely to form a di-sulfide bridge with a corresponding cysteine in an upstream EGF (Pfam:PF00008) domain thereby spanning a VWA (Pfam:PF00092) domain. All three domains can be associated together as in the cartilage matrix protein matrilin, where this domain is likely to be responsible for oligomerisation [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.09 0.72 -4.59 14 256 2009-01-15 18:05:59 2007-10-02 16:16:30 4 52 50 3 93 230 0 44.00 40 7.85 CHANGED pls.pEDsCtCEulltFQppVpstlppLspKL-sVo+RLptLEp+lh ...........cDsCtCEulltFQspspu.tlp...p..LT.p....+.......L...tt...hopRlp.LEspl........ 1 5 12 33 +10226 PF10394 Hat1_N Histone acetyl transferase HAT1 N-terminus Finn RD, Coggil PC anon Gene3D, pdb_1bob Domain This domain is the N-terminal half of the structure of histone acetyl transferase HAT1. It is often found in association with the C-terminal part of the GNAT Acetyltransf_1 (Pfam:PF00583) domain. It seems to be motifs C and D of the structure. Histone acetyltransferases (HATs) catalyse the transfer of an acetyl group from acetyl-CoA to the lysine E-amino groups on the N-terminal tails of histones. HATs are involved in transcription since histones tend to be hyper-acetylated in actively transcribed regions of chromatin, whereas in transcriptionally silent regions histones are hypo-acetylated [1]. 21.40 21.40 21.60 21.70 19.70 20.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.45 0.71 -4.17 45 314 2009-01-15 18:05:59 2007-10-02 16:40:42 4 9 267 3 220 310 2 156.60 28 35.93 CHANGED sssuN-AlplplV..............................p....sstphp..t.......ss.FpPpFTa.IFG-sE..pIFGYcsLpIpLhasusohcsalplpYspK.......sshphsD....lpppLtc..hlPp......................s.hhpscp.....-Fhpsl..pp......................cpcs.ac.PsGp..hl.csa.......................s..ss....tp...ac......................................................................Iapsslts.ss...hpch+pRlQhFlhhFIE ........................ssuN-AlplpLV......................................p.........sspshtpt.............tsFpPpaoaplFG-sE..pIFGY+s..LpIpLhasAsshpshlplp........YspK......th..sshpssD....lpsplpp..hlPt...............................s..hhpsts.....-Fhptl....c..........................................cpts...Fp..P...Gp..ll..csa...................................s..ss......tp.............ap................................................................................Ia+s..shs..s...sshpchapRlQhFlhhFIE........................................................... 0 74 119 183 +10227 PF10395 Utp8 Utp8 family Mistry J, Wood V anon Pfam-B_24590 (release 22.0) Family Utp8 is an essential component of the nuclear tRNA export machinery in Saccharomyces cerevisiae. It is a tRNA binding protein that acts at a step between tRNA maturation /aminoacylation, and translocation of the tRNA across the nuclear pore complex [1]. 21.00 21.00 21.40 21.60 20.70 19.90 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.20 0.70 -6.22 10 40 2009-01-15 18:05:59 2007-10-03 11:45:37 4 2 40 0 29 41 0 652.60 33 98.20 CHANGED MPSlopPFhLusLP+luSLsshcptsssspsus.s.p.cssplslGIStSoISpYlIsPTPKLlasaPlPsTsIVsuhsVhshss..............................ppclashGLosp+Kp+hLplppc...................stsssussEssspacl.Klcc+llslKlh..scsplIlVlhcsGhlchaca...............pLpht+sh..cl.Yo..pFVpchc.pstp-allllss.pscK..lsYKLlpL....sscssulhEL.sSsIlEshslssophsYphGpLYpL..ssscIplYolPsh.plppoIplP.hlscp....-hlSlpslusNRVLLossNKIYLlDlhasulLsph-.....op.pshQlLhsuVlsspspupso+.ThAlslshKNGsN.sotLcVINIsVGosoLc-.uLGKShppsss.....ppsthLcsLFs-csh...........spsElsslphhclLp-Lp..pppclpp............FDsIhhphLK.cK..........................Ea.sEsDRhl.DspFlsplL-LIFs.......pFtss-..a....P.+oLhYLLTHPLFPhs+T+GLLsphc..spscLhKQAIVTCPNLPLc-LLppLFo.pNsE.lhhDlshRlLp-ao+c-IppshK+L.....uplDlpshlshllpssss............phapLlsllIDusGLFuh-t-sl-cLsuhI-scVplhspNsphlsLl-phhLpspstspsupppps.....................................ccsspslstp...YoVEhL-l .....................................................PsLopsatlssLP+lssLs..ph.ss.....h.sus...p.sospIslGlStS.ISpYIlpPTPKLlasaslssTslVsshslhphps................................chashGLpspKp.phL.ltpp.....................st..ssssphhsphcl.K...h.cs+lhslKh....tpsphlhslhpNGhlphaca..................plphhpsh..clsYs..palpphp......t.tpcalhllss..pss+..lsa+Llpl......pssslhEl.sp.hhcshshpsuphsYp..GpLYpL.....spt..pI.haSlPph.plppsIpls.hlsc....pcllShpslusNRlLLossNpIYLLslhasSlLophc........ho+s+shplLpssVlss....c.pS.p..so+.ThAlslshKNtsN.sopLclINlDVGoNs.Lp-.uLGKShp.....ssss...............sps.lLcsLFs-cshs..........tss-lt.pl...chlccLp..pspDlsp............F-sIhhthLK.+c..........................Ea.s-pDRhl.D.sFlspVlcLIFs.......pFp.ss-...a......P.+TLTaLLTHPLFPhphTcsLLphLc..spPRLhKQAIVTCPNLPlp-LLppLhs......lcNpE.......l...hhDlshRllp-Fop-pIppphKcL..................splDlpshIphllshsps............phapLLslllDuhGL...Fs.h....ctshlcpLsphI-ppVchhppNophhsLl-pthhpp.thtpppscppss.....................................ppps.........hYolEhL-l................................................................................................... 0 5 16 27 +10228 PF10396 TrmE_N GTP-binding protein TrmE N-terminus Finn RD, Coggill PC anon Gene3D, pdb_1xzp Family This family represents the shorter, B, chain of the homo-dimeric structure which is a guanine nucleotide-binding protein that binds and hydrolyses GTP. TrmE is homologous to the tetrahydrofolate-binding domain of N,N-dimethylglycine oxidase and indeed binds formyl-tetrahydrofolate. TrmE actively participates in the formylation reaction of uridine and regulates the ensuing hydrogenation reaction of a Schiff's base intermediate. This B chain is the N-terminal portion of the protein consisting of five beta-strands and three alpha helices and is necessary for mediating dimer formation within the protein [1]. 21.60 21.60 21.60 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.38 0.71 -4.11 270 4354 2012-10-01 23:12:28 2007-10-03 14:31:21 4 10 4206 9 1027 3258 2398 116.40 44 25.73 CHANGED TIsAlATs..sG.pGuluIlRlS.....Gspuhplspp...lh.............sp.....t.p.........s...+pspa...s..plhD....ts.........sp.......hlD-s.l.llhFhuP........pSFTGEDVlElpsHGGhsllptlLp.tllph..............G..............sRhAcPGEFTcRAFLNG+ ..........................................TIsAluTs...G...cGuluIlRlS.....Gs..p.A...h.p.l...u.pplh...........................sc.....p...............s..+phpY..s...plhD............ts....................................sp.................llD..c.s..l...lhh.ah.u..P.........pSF.TG..EDllElpsHGG.lllstlLphllph...............G...............................................sRhA..cPGEFTcR.....AFLNG+....................... 0 354 652 860 +10229 PF10397 ADSL_C Adenylosuccinate lyase C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1c3c Domain This is the C-terminal seven alpha helices of the structure whose full length represents the enzyme adenylosuccinate lyase. This sequence lies C-terminal to the conserved motif necessary for beta-elimination reactions [1], Adenylosuccinate lyase catalyses two steps in the synthesis of purine nucleotides: the conversion of succinylaminoimidazole-carboxamide ribotide into aminoimidazole-carboxamide ribotide, the eighth step of the de novo pathway, and the formation of adenosine monophosphate (AMP) from adenylosuccinate, the second step in the conversion of inosine monophosphate into AMP [2]. 22.90 22.90 22.90 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.43 0.72 -3.97 148 3649 2009-01-15 18:05:59 2007-10-03 14:33:22 4 11 3192 22 1012 2751 1563 82.20 29 18.48 CHANGED tGllhopplhhsLs.p.pGluRppAaclVpcsuhpuhp......p.sps.........ht....-hLt...pDsplt...thlopc-.l.cplh.DPptalspssplhc+lh ..................................GLlhop+VhhsLl..c..pG.......hsRE.cAa-lVptpuhpuhc.......................................p..pss............hh....phLt.....sD.scls.......t.h.Lo.p......--.l..c.chh.....Dsp.haht....pssslhcRh.................................. 0 333 643 850 +10230 PF10398 DUF2443 Protein of unknown function (DUF2443) Finn RD, Coggill PC anon Gene3D, pdb_1zke Family This is a small family of highly conserved proteins from bacteria, in particular Helicobacter species, The structure is a bundle of alpha helices. The function is not known. 25.00 25.00 31.70 51.50 22.30 21.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.61 0.72 -4.18 3 57 2009-01-15 18:05:59 2007-10-03 16:39:50 4 1 56 6 5 26 0 77.80 81 99.60 CHANGED MFEKIDcIL+sIEDSR-EIEILLNlAKISLlDYIMIKRGSMDMPEHLshshLsQIDEEVEKLKEpI-ALNKIKKELLlF MFEKIRKILA-IEDSQNEIEMLLKLANLSLGDFIEIKRGSMDMPKGVNEAFFTQLSEEVERLKELINALNKIKKGLLVF..... 0 2 4 5 +10231 PF10399 UCR_Fe-S_N Ubiquitinol-cytochrome C reductase Fe-S subunit TAT signal Finn RD, Coggill PC, Bateman A anon Gene3D, pdb_1zrt Motif This is the N-terminal region of the E or R chain, Ubiquitinol-cytochrome C reductase Fe-S subunit, of the hetero-hexameric cytochrome bc1 complex. This region is a TAT-signal region. The cytochrome bc1 complex is an oligomeric membrane protein complex that is a component of respiratory and photosynthetic electron transfer chains.\ The enzyme couples the transfer of electrons from ubiquinol to cytochrome c with the the generation of a protein gradient across the membrane [1]. The motif is also associated with Rieske (Pfam:PF00355), UCR_TM (Pfam:PF02921) and Ubiq-Cytc-red_N (Pfam:PF09165). 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.99 0.72 -4.79 49 936 2012-10-02 00:19:25 2007-10-03 17:10:36 4 4 874 26 297 706 1269 39.70 41 20.61 CHANGED hssspsspsoRRDFLalATuusuuVGuuussWPhIsQMNPs ...........ppshsssRRcFLhhA.TussGuVGususAh.PFlsShsPS........ 0 73 167 224 +10232 PF10400 Vir_act_alpha_C Virulence activator alpha C-term Finn RD, Coggill PC anon Gene3D, pdb_1yg2 Family This structure is homo-dimeric, and the domain here is the C-terminal half of the structure, often associated with PadR upstream, (Pfam:PF03551), which is a transcriptional regulator. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.74 0.72 -3.62 37 838 2009-09-11 09:27:19 2007-10-03 17:40:19 4 3 630 1 235 647 7 88.30 21 48.18 CHANGED sslR-EhLlKlhusshhssssltsplpcphphppp+LspYcclc.pchhss...tpth.stpphhpaLsLctGlphEpthlcWs-cslthLsthp ............hRD-hhlKlhhs...sh.hs.s.sshht.lp...cphpt...ppcplpp.app...hc..pphass..............tt...sppphhphLsLct..ulthcpshlpWh-cslttLp...t................................................. 0 73 158 207 +10233 PF10401 IRF-3 Interferon-regulatory factor 3 Finn RD, Coggill PC anon Gene3D, pdb_1zoq Family This is the interferon-regulatory factor 3 chain of the hetero-dimeric structure which also contains the shorter chain CREB-binding protein. These two subunits make up the DRAF1 (double-stranded RNA-activated factor 1).\ Viral dsRNA produced during viral transcription or replication leads to the activation of DRAF1. The DNA-binding specificity of DRAF1 correlates with transcriptional induction of ISG (interferon-alpha,beta-stimulated gene). IRF-3 preexists in the cytoplasm of uninfected cells and translocates to the nucleus following viral infection. Translocation of IRF-3 is accompanied by an increase in serine and threonine phosphorylation, and association with the CREB coactivator occurs only after infection. 20.80 20.80 20.80 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.92 0.71 -4.70 43 649 2012-10-01 21:55:46 2007-10-04 10:54:49 4 8 78 11 242 858 0 168.80 31 40.48 CHANGED hspLclshaYpGchVt.phplssspGsRlhh....tsssststhht.............shpplhFPssst..l.....sppQpphsccLLssl-+GllLphssp.ulaupRLsps+VaWsusss.tss.....tPs.pLc+s.ppsplFshppFlp........................-Ltpa.p.pstssPp.a.plhLCFG-chss...pstpc+LIhVpl.PhhsRhlhEhsp ..............................................hpLcl.ph.hY+Gc..Vt..phslsssp.GCRlhh............tss.stst...s.................shppVt..F..Psspt...l............sp....pQ....pp....hspcLL.stl-+GllLthssp...u......la.u.pRL..Cps+VaWsuspsssts..........tPs...hlpRp.ppsclFshp.p.Flp.........................-Lhta...p.....pttt......P....p.....a.plhLCFGE-aPs.t.pshp++LIhV.pl..shhs+.lhp.h........................................................................... 0 25 43 115 +10235 PF10403 BHD_1 Rad4 beta-hairpin domain 1 Bateman A anon Bateman A Domain This short domain is found in the Rad4 protein. This domain binds to DNA [1]. 20.60 20.60 20.60 21.60 20.50 17.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.81 0.72 -4.45 56 421 2009-01-15 18:05:59 2007-10-05 13:00:34 4 18 274 3 311 451 1 57.10 35 6.66 CHANGED phscshPcol...........pshKsHPhalL-+pL++pEslhP..sspslGhhps.t...................EsVYtRssVh .....h..pcshPpsl...........psaK..sHPl........YsLcRaLp+pEslhP...ssp....lGhhps.....................EsVYpRssV....................... 0 91 161 258 +10236 PF10404 BHD_2 Rad4 beta-hairpin domain 2 Bateman A anon Bateman A Domain This short domain is found in the Rad4 protein. This domain binds to DNA [1]. 21.30 21.30 21.40 22.70 21.20 20.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.27 0.72 -3.66 62 397 2009-01-15 18:05:59 2007-10-05 13:01:31 4 16 257 3 291 426 3 61.20 34 7.04 CHANGED L+St-pWh+p..GRslKhsEp.Ph...KhVpt.t.......p........p.tpp...............................phsLYuhaQT.......-.ahPP ......l+St-sWh+p..GR.sl+..hGE.....p..Ph...KhVpt.pst.....ttp.........pttppt.............................................................thsLYuhaQT.......-.YhPP............. 0 80 150 241 +10237 PF10405 BHD_3 Rad4 beta-hairpin domain 3 Bateman A anon Bateman A Domain This short domain is found in the Rad4 protein. This domain binds to DNA [1]. 20.60 20.60 20.70 22.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.52 0.72 -4.27 34 411 2009-01-15 18:05:59 2007-10-05 13:03:48 4 17 264 3 308 438 6 76.00 38 8.82 CHANGED lP+NsaGNl-latssMlPhGsVHlp......hsslt+lA+...........tLsIDaA.AVTGFcF......+stpspPVhsGlVVscEap-tlhpsa ....lP+NpaGNl-l..a..hss.MlPtGsVHlp............................hss...ht+lA+..............................pL.sID..aApAVsGF-F...............+sttu..hPlhpGlVVscEhc-hlhpsh................ 0 91 162 258 +10238 PF10406 TAF8_C Transcription factor TFIID complex subunit 8 C-term Wood V, Coggill P anon Wood V Domain This is the C-terminal, Delta, part of the TAF8 protein [1]. The N-terminal is generally the histone fold domain, Bromo_TP (Pfam:PF07524). TAF8 is one of the key subunits of the transcription factor for pol II, TFIID. TAF8 is one of the several general cofactors which are typically involved in gene activation to bring about the communication between gene-specific transcription factors and components of the general transcription machinery [2]. 19.10 19.10 19.70 19.30 19.00 18.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.42 0.72 -3.91 25 316 2009-01-15 18:05:59 2007-10-08 10:18:23 4 7 242 0 219 302 0 49.80 39 13.82 CHANGED sYIPsa..LPsFPscHTYppTPhappslo.D.+plRc+hsc-uRhsE+uLh+Lh .........aIPsa..hPsFPssHTYhp.TPha..p..c.ho.....DhptlR..c....+tspppRpsEcALp+h............................ 0 60 110 174 +10239 PF10407 Cytokin_check_N Cdc14 phosphatase binding protein N-terminus Wood V, Coggill P anon Wood V, Pfam-B_23062 (release 22.0) Family Cytokinesis in yeasts involves a family of proteins whose essential function is to bind Cdc14-family phosphatase and prevent this from being sequestered and inhibited in the nucleolus. This is the highly conserved N-terminus of a family of proteins which act as cytokinesis checkpoint controls by allowing cells to cope with cytokinesis defects. These proteins are required for rDNA silencing and mini-chromosome maintenance [1]. 21.20 21.20 21.20 21.90 21.10 20.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.35 0.72 -4.36 16 70 2009-09-11 05:31:51 2007-10-08 14:49:30 4 2 54 0 49 73 0 73.10 37 7.39 CHANGED ppsKKFLah.TcsssoLhpLs-EIhs+hpKlYPs.p..clcIhoLQDpstCDLDP-FlVcDVFs.ssshVRVILcsch ..........s.h+KFLah.Tcs.ssoLhpLupEIls+acKlYPshp...sl-.IloLQDp..s..u..CDLDs-FlVcDVFs.ssshl+Vllcs-.h............... 0 15 29 44 +10240 PF10408 Ufd2P_core Ubiquitin elongating factor core Wood V, Coggill PC anon Wood V, Pfam-B_4085 (release 22.0) Family This is the most conserved part of the core region of Ufd2P ubiquitin elongating factor or E4, running from helix alpha-11 to alpha-38. It consists of 31 helices of variable length connected by loops of variable size forming a compact unit; the helical packing pattern of the compact unit consists of five structural repeats that resemble tandem Armadillo (ARM) repeats. This domain is involved in ubiquitination as it binds Cdc48p and escorts ubiquitinated proteins from Cdc48p to the proteasome for degradation. The core is structurally similar to the nuclear transporter protein importin-alpha. The core is associated with the U-box at the C-terminus, Pfam:PF04564, which has ligase activity. 20.40 20.40 21.50 21.20 19.40 20.10 hmmbuild -o /dev/null HMM SEED 629 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -12.77 0.70 -6.28 50 532 2009-01-15 18:05:59 2007-10-09 16:35:59 4 19 290 5 355 518 10 538.70 29 57.82 CHANGED LGslhslSshs............sphspta......h..tsstptsppplpsthpslptphpshhppLapllppll+suspsR..pphLpahupl..lphNptRpph..........................phc.ppluS..-GFhhNlshlLl+LspPhhc............hsKlc+IDssY..................................hhp..sshlcl.......p-ETclpus.ccscphhspptp..............................................pFlo-hFFLThpshahGhtshhpchpcltpplpchpcphpphptp...............p..htphppplcthhspphshcsh.lhcsshhppshpFhshsstalh+lh...........s.tp.hsp.pthphP......h.tt.........................................................................s.........................aphlPEahl-slhsah...hahhph.......sshhhst.phcphlphslhhhpssp.hlcNPaL+u+llclLhhhh.sht..tppthh.s........lhpsppls.....pcpLlhuLlchYl-lEpTGsp....sQFY-KFNhRapIstlhcplW.ppsta+p.pltc.scs...........................................................................s.s.hFlRFlshllNDssa.LLDEulspLpcI+ch..ppphpstsphtsh......................................sppp...cpc....................................................pppplpptcpps+ohhtLupcolphhphhT..pplscsFh.psEll-RlAuMLNYsLptLsGPKsps.L.KV+ssccYs..FcP+plLsplsclYlNLs..........ppcp...FltAVupDGRSas.phFpcAhpllpchs.lhstp.lpphpphspclcc ............................................................LGshhp.hSsh.................stsspta...............a......ss.t.s.tph.ts.ttslp.hhp.hppplhplh.ppl..l....hs...sp...sR....pthLp.ahuth..lphNttRtth..........................php.p...h..huo....DuFhhNlhhlL.pLsp.h.................KltplcspY...................................hp..pttlph........pcETpl.ss.pt.p.thhtp...t...................................................pF.o-sFaLThtshahuhhs..hhpp....h..p.h..pplcchppt.hpphptpt..............................p.hhpphcpphcthhp.hhsh..csh..l.......h-...ph.ppshp.Fht.hhh.hll.plh.....................t............thphP...............Ls.t.............................................................................................................s...t.hthlP.Eahl-slsphh...hFlhph......................s..hh....t..t...hpp.hlthhlh.hhts.p....hl+NPaL+A+L.splL..hh...ht......pt.hh.p................................hhppp.hs.......tp.Lh.uL..hcha.s.-lEhTGsp..............spF..h-KFshRh.l..lhc.lW..ts..a+t..phhp.spp..........................t.....Fl+FlNhLhNDsha.LLDEu.....l.ptLtcI+ph..Qt.hpsp.tphtth......................................................................s.pt.....ppp.............................................................................cpppLtp.tp.s+.hhhLup-TlshhphhT..p..p..l..tpsFh..tPpls.p...........RluuMLNa.LppLsGPKpts.L.KVc..s...............p.cYs..FcP+pLlsplsslYlpL....................................sppp.........FhtAlupDtRSY..s.plFppshphlp.+h...s...h.....sst.l.tpaptlsp+hp.......................................................... 0 132 197 291 +10241 PF10409 PTEN_C2 C2 domain of PTEN tumour-suppressor protein Finn RD, Coggill PC anon Gene3D, pdb_1d5r Family This is the C2 domain-like domain, in greek key form, of the PTEN protein, phosphatidyl-inositol triphosphate phosphatase, and it is the C-terminus. This domain may well include a CBR3 loop which means it plays a central role in membrane binding. This domain associates across an extensive interface with the N-terminal phosphatase domain DSPc (Pfam:PF00782) suggesting that the C2 domain productively positions the catalytic part of the protein onto the membrane [1]. 25.50 25.50 25.50 25.80 25.00 25.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.88 0.71 -4.44 61 847 2012-10-10 12:23:49 2007-10-11 15:54:09 4 39 146 25 477 790 5 136.70 24 15.32 CHANGED hsspslhlpplhl......psl...........P.h....p.ts.....ss.........cPhlplhptpphl.................t.phpthppt.............ptpphhhhhs.sl..l.p..GDlhlch.hppp........hhtcph.....hFphhFNTuFlp...................................................................................................................................sshLhhs+s-LDt.stcs...cpaspsFpVclhFsc .............................................................................................psLhl+plhh.psl...........P.F...ptts................uC.............................cPhh.clh..ttpphl..............................................pst..hph.ht.ph....................................................................ptphhhh..h...h...s.....sl...hl.p...G....Dlh.lch.aHtp..p.........thpcph.............hFph.FpTsFlp.................................................................................................................................................................................................................sthLhhsKp-LDt.spc.....s.......cpasptFpVclhFt.................................................................................................. 0 147 223 334 +10242 PF10410 DnaB_bind DnaB-helicase binding domain of primase Finn RD, Coggill PC anon Gene3D, pdb_1dd9 Domain This domain is the C-terminal region three-helical domain of primase [1]. Primases synthesise short RNA strands on single-stranded DNA templates, thereby generating the hybrid duplexes required for the initiation of synthesis by DNA polymerases. Primases are recruited to single-stranded DNA by helicases, and this domain is the region of the primase which binds DnaB-helicase [2]. It is associated with the Toprim domain (Pfam:PF01751) which is the central catalytic core. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.22 0.72 -3.98 185 3150 2009-01-15 18:05:59 2007-10-11 16:20:54 4 23 3048 9 677 2386 736 58.20 24 9.81 CHANGED lh-Fhhcphtp..p.....h.sl..sss-u+sphlpp.shsllspls.sssh+phhhpcLucthulstppl ................L.pFhhpplht..p......h.sL...sss-G+sphhpt.shsllspls.stsh.RphhhppLupclGl..pt............................ 0 208 432 568 +10243 PF10411 DsbC_N Disulfide bond isomerase protein N-terminus Finn RD, Coggill PC anon Gene3D, pdb_1eej Domain This is the N-terminal domain of the disulfide bond isomerase DsbC. The whole molecule is V-shaped, where each arm is a DsbC monomer of two domains linked by a hinge; and the N-termini of each monomer join to form the dimer interface at the base of the V, so are vital for dimerisation [1]. DsbC is required for disulfide bond formation and functions as a disulfide bond isomerase during oxidative protein-folding in bacterial periplasm. It also has chaperone activity [2]. 21.00 21.00 21.10 21.20 20.80 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.57 0.72 -4.71 144 1562 2009-01-15 18:05:59 2007-10-12 11:30:55 4 5 1318 12 304 941 348 55.30 33 22.65 CHANGED lp..ppLp......phh..shplp..slpsoP...lsG.LaEVhs..ss...s......llYsstcG.callt.Gplh..-hpspp ....................lppsLt......ch....sl.p.ss..sIpsoP...lsG...hhpVho...su....s.......llYsos..DG....+allp.G.s.la..Dlssp.t....... 0 60 159 243 +10244 PF10412 TrwB_AAD_bind Type IV secretion-system coupling protein DNA-binding domain Finn RD, Coggill PC anon Gene3D, pdb_1e9r Domain The plasmid conjugative coupling protein TrwB forms hexamers from six structurally very similar protomers [1]. This hexamer contains a central channel running from the cytosolic pole (made up by the AADs) to the membrane pole ending at the transmembrane pore shaped by 12 transmembrane helices, rendering an overall mushroom-like structure. The TrwB_AAD (all-alpha domain) domain appears to be the DNA-binding domain of the structure. TrwB, a basic integral inner-membrane nucleoside-triphosphate-binding protein, is the structural prototype for the type IV secretion system coupling proteins, a family of proteins essential for macromolecular transport between cells and export [2]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.04 0.70 -5.84 37 675 2012-10-05 12:31:09 2007-10-12 13:00:04 4 14 429 36 140 6042 663 312.90 31 53.44 CHANGED shpluulPh.shpsEtpHhhlsGosGoGKoshlpcllsphRtR........G-RAllaDhsGsFhppFac..........ssp.DllLNPhDsRsssWshasEsps.hDacshApullP..t..ssss-sFWspuARtlFspssh+L.tpp..spposppLhctlhssslcpLcphLtsT.usslhutp..sp+.sstSl+uslsshlcslphLsstt..........ssFSIR-Wlpssp......suhLFlosptsptssl+PLlohWlslAhpslhu..hst......spc..+R.lWahhDELsuLp+LssLtpsLscuRKaGGshVlGlQuhuQLcclYGpc.tApolhuhhsT+lhhpss..sscsAchhuc..lG..cpElcchpEshSaGtss.hRDGs...........ohsppcpt...c.lVhso-Ihs.LssLpu.....alphssshPls+lplp .........................................................................................................................................................................................................................s...hsslsh....hp.t.E.......pp.h.hlhGos.GsGK..o..p..h..l..p....p..l...h....p.......h..h...p..+....................G..c..h..s...l..l..a..D..........p......s.....p.........a........h......p......p....a..a...p........................................t........D.....h.....l.....l........N...P...h.....D......t....R..........s.............t.....Ws.........a......p........-....h........h........p.........................-..........h..........p....p.......h..........u.p....sl...ls...............tst....tcsaW.tu......u....ct..lh........s.............p....h............h.....h.....h.h....t.....p...p.....................p..........t..........s..................t.........p........l............h...p.........h........h............h................t.........................p............h...........c..............p............l..........p....p.....h.....l........t........s.........o....................s...............t...........s.......h.........h.p....tp..................hp+.....s...s....h........S....l.....p.....u........s.......l....s..s...h.....l.......p......s.h...p.....h....ltt.......................................t.F.o....l...+....c...W....hps.t................tuhLF.l...o........p.....t.....p...p.....c....s.....s....l....p.P....l.l....u....h....h....l....s....h....s....h...p....t.l.hs........hs.................................sp...p.......p+.....l....a....h.h..h.DE.ls........o....L....t..+.........l......s..p....l........ph..l....s..p.uR.......caG.........ssh.l.hG.h.Qsh.s.Q..L.....c.....c.....h....Y....G..c.p...h.A....t....s....l...h.s....h.h...s.oph..h.h..p.ss.....s.t...p...hA.chhut....l.G.....c..p..-...h.......p...h.......p.....c...p....h......o.h....u....t..s...................t.s...u.s.........................sh.sp..p......ph........p....l..l.......sc.l..p.Lsshps.....alhhst.................................................................................................................................................................................... 0 27 81 120 +10245 PF10413 Rhodopsin_N Amino terminal of the G-protein receptor rhodopsin Finn RD, Coggill PC anon Gene3D, pdb_1edx Domain Rhodopsin is the archetypal G-protein-coupled receptor. Such receptors participate in virtually all physiological processes, as signalling molecules. They utilise heterotrimeric guanosine triphosphate (GTP)-binding proteins to transduce extracellular signals to intracellular events. Rhodopsin is important because of the pivotal role it plays in visual signal transduction. Rhodopsin is a dimeric transmembrane protein and its intradiskal surface consists of this amino terminal domain and three loops connecting six of the seven transmembrane helices. The N-terminus is a compact domain of alpha-helical regions with breaks and bends at proline residues outside the membrane [1]. The transmembrane part of rhodopsin is represented by 7tm_1 (Pfam:PF00001). The N-terminal domain is extracellular is and is necessary for successful dimerisation and molecular stability [2]. 21.10 21.10 21.40 21.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.73 0.72 -4.28 17 2285 2009-01-15 18:05:59 2007-10-12 14:21:53 4 2 1603 37 51 1764 0 30.00 80 15.25 CHANGED NGTEGsNFYlPMSN+TGlVRSPaEYPQYYLA-Paha ...............NFYlPMSN+TGVVRSP..FEYPQYYLA-PWpY... 0 1 10 28 +10246 PF10414 CysG_dimeriser Sirohaem synthase dimerisation region Finn RD, Coggill PC anon Gene3D, pdb_1pjq Domain Bacterial sulfur metabolism depends on the iron-containing porphinoid sirohaem. CysG, S-adenosyl-L-methionine (SAM)-dependent bis-methyltransferase, dehydrogenase and ferrochelatase, synthesises sirohaem from uroporphyrinogen III via reactions which encompass two branchpoint intermediates in tetrapyrrole biosynthesis, diverting flux first from protoporphyrin IX biosynthesis and then from cobalamin (vitamin B12) biosynthesis. CysG is a dimer of two structurally similar protomers held together asymmetrically through a number of salt-bridges across complementary residues in the CysG_dimeriser region to produce a series of active sites, accounting for CysG's multifunctionality, catalysing four diverse reactions: two SAM-dependent methylations, NAD+-dependent tetrapyrrole dehydrogenation and metal chelation. The CysG_dimeriser region holding the two protomers together is of 74 residues [1]. 20.40 20.40 20.40 20.40 20.00 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.93 0.72 -4.49 132 1426 2009-01-15 18:05:59 2007-10-12 16:12:34 4 8 1263 6 269 987 131 59.40 36 13.80 CHANGED tslGpLAshuuphRscV+pplsshstRRpFWEchh.pG.huptlhsGppppAcpthpptls ...........pLGplAchAuphRscVKpphs.shspRR+.FWE+hF.sspl..Aptltsscpptsppthcphl........................ 0 54 139 201 +10247 PF10415 FumaraseC_C Fumarase C C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1fup Domain Fumarase C catalyses the stereo-specific interconversion of fumarate to L-malate as part of the Kreb's cycle. The full-length protein forms a tetramer with visible globular shape. FumaraseC_C is the C-terminal 65 residues referred to as domain 3. The core of the molecule consists of a bundle of 20 alpha-helices from the five-helix bundle of domain 2. The projections from the core of the tetramer are generated from domains 1 and 3 of each subunit [1]. FumaraseC_C does not appear to be part of either the active site or the activation site but is helical in structure forming a little bundle. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.29 0.72 -3.90 258 5840 2009-01-15 18:05:59 2007-10-12 16:51:58 4 10 3710 79 1358 4033 1826 55.00 43 11.79 CHANGED LVTALNPhIGY-pAAcIAKpAhcpGpol+-ssl.chGhl.......oc-phDcllcPppMspP ................lVTALNPaI.GY-pAAcIAKpAtc.pGpol+Essl.ch.Gh.L.........oc-chDchl.cPppMhtP....................... 0 415 827 1129 +10248 PF10416 IBD Transcription-initiator DNA-binding domain IBD Finn RD, Coggill PC anon Gene3D, pdb_1pp8 Domain In Trichomonas vaginalis, thought to be the earliest extant eukaryote, the sole initiator element for control of the start of transcription is Inr, and this is recognised by the initiator binding protein IBP39. IBP39 contains an N-terminal Inr binding domain, IBD, connected via a flexible, proteolytically sensitive, linker (residues 127-145) to a C-terminal domain. The IBD structure reveals a winged-helix-wing conformation with each element binding to DNA, the central helix-turn-helix contributing the majority of the specificity-determining contacts with the Inr core motif TCAPy(T/A). The binding of IBP39 to the Inr directly recruits RNA polymerase II and in this way initiates transcription [1]. 25.00 25.00 25.00 26.90 24.80 23.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.39 0.71 -4.34 94 107 2009-01-15 18:05:59 2007-10-12 17:53:28 4 2 1 7 107 109 0 122.10 22 48.18 CHANGED ppD.pp.pahpLcphlss..p+pt+.............spphpsFsptLptlhpast+sss.p..RshlsGltWhsss...lslNscpLphLls+sKSo.....INGshpphGatth..t....tt.h...hs....hhps..shsph+.pWolRphss ...............D..pa.pLpphlss..p+pt+.............spphpsFsptLptlhpash+sss.p..RshlsGltWhsss...lslNscpLphLls+sKSo.....INGshpphGatth.tptp..tp.lhthhs.....htts..t.+.pWolR....s..................................... 0 107 107 107 +10249 PF10417 1-cysPrx_C C-terminal domain of 1-Cys peroxiredoxin Finn RD, Coggill PC anon Gene3D, pdb_1prx Domain This is the C-terminal domain of 1-Cys peroxiredoxin (1-cysPrx), a member of the peroxiredoxin superfamily which protect cells against membrane oxidation through glutathione (GSH)-dependent reduction of phospholipid hydroperoxides to corresponding alcohols [1]. The C-terminal domain is crucial for providing the extra cysteine necessary for dimerisation of the whole molecule. Loss of the enzyme's peroxidase activity is associated with oxidation of the catalytic cysteine, upstream of this domain; and glutathionylation, presumably through its disruption of protein structure, facilitates access for GSH, resulting in spontaneous reduction of the mixed disulfide to the sulfhydryl and consequent activation of the enzyme [2]. The domain is associated with family AhpC-TSA, Pfam:PF00578, which carries the catalytic cysteine. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.89 0.72 -4.30 86 5521 2009-01-15 18:05:59 2007-10-15 10:44:00 4 14 3627 357 1733 3878 622 35.40 38 17.34 CHANGED ALQhs-cp.s..ssPAsW.ps..........Gccsl...ssotc..u..hpcahps .....AhQasppH..G-..VCPAsW.ct...........Gccsl...p.s.o.-............h...................... 0 559 995 1407 +10250 PF10418 DHODB_Fe-S_bind Iron-sulfur cluster binding domain of dihydroorotate dehydrogenase B Finn RD, Coggill PC anon Gene3D, pdb_1ep3 Domain Lactococcus lactis is one of the few organisms with two dihydroorotate dehydrogenases, DHODs, A and B [1]. The B enzyme is a prototype for DHODs in Gram-positive bacteria that use NAD+ as the second substrate. DHODB is a hetero-tetramer composed of a central homodimer of PyrDB subunits resembling the DHODA structure and two PyrK subunits along with three different cofactors: FMN, FAD, and a [2Fe-2S] cluster. The [2Fe-2S] iron-sulfur cluster binds to this C-terminal domain of the PyrK subunit, which is at the interface between the flavin and NAD binding domains and contains three beta-strands. The four cysteine residues at the N-terminal part of this domain are the ones that bind, in pairs, to the iron-sulfur cluster. The conformation of the whole molecule means that the iron-sulfur cluster is localised in a well-ordered part of this domain close to the FAD binding site [2]. The FAD and and NAD binding domains are FAD_binding_6, Pfam:PF00970 and NAD_binding_1, Pfam:PF00175. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.62 0.72 -4.46 180 2835 2012-10-02 17:47:23 2007-10-15 12:37:55 4 37 1790 3 745 2221 217 38.60 43 13.13 CHANGED EphMsCGlGhChuCs..lpst..........hpl..CpDGPVFsspclt .....EppMs.C.GhGtChuCt..lpsstt...................hhV..ChDGPVFssppl........ 0 318 533 651 +10251 PF10419 TFIIIC_sub6 TFIIIC_subunit; TFIIIC subunit Mistry J, Wood V anon Pfam-B_14433 (release 21.0) Family This is a family of proteins subunits of TFIIIC [1]. TFIIIC in yeast and humans is required for transcription of tRNA and 5 S RNA genes by RNA polymerase III. Yeast members of this family are fused to phosphoglycerate mutase domain. 20.80 20.80 21.10 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.25 0.72 -4.49 25 235 2009-11-13 15:40:22 2007-10-15 13:05:59 4 6 207 0 162 231 0 34.40 35 13.12 CHANGED lplhGL-ocpPllplsspl.apGsWcchlGT-lhFs ....hpllGl-ocpPllplsspl.FpGpac.-slGTplhFp..... 1 46 78 128 +10252 PF10420 IL12p40_C Cytokine interleukin-12p40 C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1f42 Domain IL12p40_C is the largely beta stranded C-terminal, D3, domain of interleukin-12p40 or interleukin-12B. This interleukin is produced on stimulation by macrophage-engulfed micro-organisms and other stimuli, when it dimerises with interleukin-12p35 to form a heterodimer which then binds to receptors on natural killer cells to activate them to destroy the micro-organisms [1]. This domain contains two disulfide bridges, one of which serves to bind p40 to p35 and the other to hold the beta strands within the domain together. The cupped shape of the p35 binding interface matches the elbow-like bend between D2 and D3 in p40 [2]. The domain is often associated with family fn3, Pfam:PF00041. 25.00 25.00 30.30 29.20 21.20 22.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.07 0.72 -3.88 21 132 2009-01-15 18:05:59 2007-10-15 14:05:34 4 6 63 8 36 104 0 87.30 42 29.28 CHANGED scsaLcCpApNYuG..cFpCuWhht..pssh.hhpl+upR.....................................sSsssptVoCuhs................pchulpCp.--stCPhAEEotPIplslcstpc ......KsFL+CEA+NYSG..cFTC.Whss.hsssl..pFsl+..usR.....................................sSsss...psVTCusssh..........ppp.pcYolpCQ.-cssCPhAEEshPIclsl-shp...................... 0 1 6 15 +10253 PF10421 OAS1_C 2'-5'-oligoadenylate synthetase 1, domain 2, C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1px5 Domain This is the largely alpha-helical, C-terminal half of 2'-5'-oligoadenylate synthetase 1, being described as domain 2 of the enzyme and homologous to a tandem ubiquitin repeat. It carries the region of enzymic activity between 320 and 344 at the extreme C-terminal end [1]. Oligoadenylate synthetases are antiviral enzymes that counteract vial attack by degrading viral RNA. The enzyme uses ATP in 2'-specific nucleotidyl transfer reactions to synthesise 2'.5'-oligoadenylates, which activate latent ribonuclease, resulting in degradation of viral RNA and inhibition of virus replication [2]. This domain is often associated with NTP_transf_2 Pfam:PF01909. 19.90 19.90 19.90 22.40 19.00 19.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.44 0.71 -4.81 49 511 2009-01-15 18:05:59 2007-10-15 16:12:05 4 20 58 2 142 448 0 173.60 46 48.00 CHANGED sscPs.splYspLIcpps..t..ptGEFSsCFTELQ+sFlcpRPsKLKsLIRLVKHWYppCpc+......htssLPPpYALELLTlYAWEpG.sspspFshApGFRTVL-L...ltcYppLCIYW....Th.YsFccphlppaLppQLc+sRPVILDPADPTsNVusu.sh.sWclLAcEAptWLppsChpststssVssWcV.sth .................................s..pPs.splYspLIptts......ptGEFSsCFoELQ+sFlp.pR....P.sKLKsLIRLVKHWYppspcp................t....ssLPPpYAL.ELLTlYA...WEpG.st..pspF...shApGF.R.TVL.-L.........l.ppYppL.....CIYW.....Th.YsFcs.hlppaLppQLp.......+s.......RPlILDPADPTtNluts.s...WchLApEAtthh.ptsChhptptssltsWpV...h.................................... 0 29 35 37 +10254 PF10422 LRS4 Monopolin complex subunit LRS4 Mistry J, Wood V anon Pfam-B_63451 (release 22.0) Family Monopolin is a protein complex, originally identified in Saccharomyces cerevisiae, that is required for the segregation of homologous centromeres to opposite poles of a dividing cell during meiosis I [1][3]. The orthologous complex in Schizosaccharomyces pombe is not required for meiosis I chromosome segregation, but is proposed to play a similar physiological role in clamping microtubule binding sites [2]. In S .cerevisiae this subunit is called LRS4, and in S. pombe it is known as Mde4. 21.70 21.70 21.70 24.00 21.30 21.60 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.88 0.70 -5.02 5 29 2009-01-15 18:05:59 2007-10-15 16:21:14 4 2 28 2 18 21 0 226.90 38 72.84 CHANGED LQLlA-YYcSVL-sE+I...........YhEa..........sp.phpFhuspKTNAscs-.............sphlT-EsLpLQ+Q.......................IsQLssDLQlp+pEsE......................KL+clpKTQKAl......................LESKLpotKupVDphK.....cpossuscupsRssshuts-cppRupthppt+.o.........FHLLSP....IhsscpP.............................sSssupc+p...............uGLRplLcsGcsTIFDp.Sp..cD-.................................sDEDu-pssslpshphutsosR+.lsuL+sos- ................................................LQLLuNYYKuhl-uERI...........Y.EY..............psphphuu.s+hssspus.............sp+ls-ETLhLQRQ.......................lsQLsppLQ.hphpENE.........................................KL.hplQKsQKAL.................................................................hpSKLsopcthIDcLK.cLpspphsscpcspppssssups-pppsspttpss+so..........hH.LL..SP......lssRcps....t.....tsp...........................sSssupc+s...................pGLRplLooG+sTlFDs....Spp.DD-..................................s-sschppDssssp.t.................................................................................................... 0 2 8 15 +10255 PF10423 AMNp_N Bacterial AMP nucleoside phosphorylase N-terminus Finn RD, Coggill PC anon Gene3D, pdb_1t8s Domain This is the N-terminal domain of bacterial AMP nucleoside phosphorylase (AMNp). The N- and C-termini form distinct domains which intertwine with each other to form a stable monomer which associates with five other monomers to yield the active hexamer. The N-terminus consists of a long helix and a four-stranded sheet with a novel topology. The C-terminus binds the nucleoside whereas the N-terminus acts as the enzymatic regulatory domain. AMNp (EC:3.2.2.4) catalyses the hydrolysis of AMP to form adenine and ribose 5-phosphate. thereby regulating intracellular AMP levels [1]. 20.90 20.90 20.90 22.20 19.30 20.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.90 0.71 -4.66 67 952 2009-01-15 18:05:59 2007-10-18 11:24:16 4 3 935 27 168 567 55 159.40 47 32.95 CHANGED ttAVs+LppLY-pusshLpsshpphl.sGs.PsschRA....hYPplRlsssshups......coR..uaG+VstPGsYuTTlTRPcLFcsYLtEQlpLLhpsHsVslpVGsSspsIPhpFslsssstlp.sstssstc......LpchFssPDLushsDcIssGh.apstsstshP ........................s.pAl-+LctLYEpulsALRsAlupYlpsGpl.....P.D.ppsRtt...........FsYPpLsVoaDuss............ps......c..........TR..AaG+hocsGsYoTTlTRPsLFRsYLpEQLsLLhp-YGspIsVpsSppcIPYPYVl-uup.Ls..l.Dcohu.A.s......LschFPTT-LAplsD-hADGlacPsphpP...................................... 0 21 76 119 +10257 PF10425 SdrG_C_C C-terminus of bacterial fibrinogen-binding adhesin Finn RD, Coggill PC anon Gene3D, pdb_1r17 Domain This is the C-terminal half of a bacterial fibrinogen-binding adhesin SdrG. SdrG is a Gram-positive cell-wall-anchored adhesin that allows attachment of the bacterium to host tissues via specific binding to the beta-chain of human fibrinogen (Fg). SdrG binds to its ligand with a dynamic "dock, lock, and latch" mechanism which represents a general mode of ligand-binding for structurally related cell wall-anchored proteins in most Gram-positive bacteria. The C-terminal part of SdrG(276-596) is integral to the folding of the immunoglobulin-like whole to create the docking grooves necessary for Fg binding. The domain is associated with families of Cna_B, Pfam:PF05738 [1]. 20.50 20.50 20.50 21.20 19.10 18.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.02 0.71 -4.27 35 1380 2009-01-15 18:05:59 2007-10-18 15:44:03 4 52 266 14 22 1140 1 161.70 27 17.80 CHANGED sshsssthsslsu....pIps...lsppssp..asphlYVNPpppshppss.....lslpG............phspuushsssssplKlYcVsss.ppLscShhssh.s.sphcDVTsph.............phohssNs.phslsFs..slsps..YVl+hsGcasssusp...lshpopLsuhsppt........h..ssshsasN .....................s.thspppssslpu....plsp...lspsssp..appslYVNP.p..p....s...hssss.............l.lpG................phssuuplssss.splKlYcVsss..s.......p......LscSahsss.........ophcDVTsph..................ploassss..shslsFs....clsps...Yll..hVsu+a-ssuss..slshpsslpshssph...............hssthsas....................... 1 9 10 20 +10258 PF10426 zf-RAG1 Recombination-activating protein 1 zinc-finger domain Finn RD, Coggill PC anon Gene3D, pdb_1rmd Domain This is a C2-H2 zinc-finger domain closely resembling the classical TFIIIA-type zinc-finger, CX3FX5LX2-3H, despite having a valine and a tyrosine at the core instead of a phenylalanine and a leucine, hence CX3VX1LX2YX2H. The structure, nevertheless, contains the characteristic two-stranded beta-sheet and alpha-helix of a classical zinc-finger. The domain binds one zinc and, in complex with the zinc-RING-finger domain, helps to stabilise the whole of the dimerisation region of recombination activating protein 1 (RAG1) [1]. The function of the whole is to bind double-stranded DNA. 20.70 20.70 21.00 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.46 0.72 -3.98 53 4732 2009-01-15 18:05:59 2007-10-18 16:00:50 4 15 3377 1 20 4638 0 29.20 66 5.20 CHANGED LslRCPVK-CcEEVhLGKYs+HlSSHKEs+ .......LslRCPVK-CcEElhhGKYupHLSSHKEhK..... 0 1 2 4 +10259 PF10427 Ago_hook Argonaute hook Bateman A, Wood V anon [1] Motif This region has been called the argonaute hook [1]. It has been shown to bind to the Piwi domain Pfam:PF02171 of Argnonaute proteins. 23.00 23.00 24.30 24.30 21.50 22.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.80 0.71 -11.98 0.71 -4.13 36 202 2009-01-15 18:05:59 2007-10-19 10:48:31 4 10 53 1 85 153 0 133.70 41 8.72 CHANGED GpPosPh..ssV..............................cp..GsuhWG+sssS.ssh.tstssssssouWGs....sssuss....suspshpssW..s-csssshus...........suWp-ptssssGh...Ws..spsSp.ssuSh.suuWupt.sst....tthct....uhhu.uphttcuh.hu+ ...........................................................h..................................DN..GTSAWG+Ps....so....usu.....WG-...s.ssssssuW....Gs..s.....ssusssh.............KsuuKSMQ..D.GW..GscDhslsusR....suWEEE-..-GGh...WN..osuSQtS.sSS...hssuuWGpt.usK..........+phpp......uhts..Gs..s-uW.MN..................... 0 6 14 35 +10260 PF10428 SOG2 RAM signalling pathway protein Mistry J, Wood V anon Pfam-B_35594 (release 22.0) Family SOG2 proteins in Saccharomyces cerevisiae are involved in cell separation and cytokinesis [1]. 25.00 25.00 87.00 42.90 24.30 23.90 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.40 0.70 -5.83 17 158 2009-01-15 18:05:59 2007-10-19 11:53:20 4 23 126 0 122 165 1 355.20 22 46.12 CHANGED ah+RLSsLP.....Ecctppps.......................h-sllEus+GlLFulhQlpstlp.tlhslspccssppsslc......lhYssssHl-pL.psLcph-stspss.........hpppslhctChohlsuap+lhs.LppslcthhssuDsRYlRshhh.lasShhElpNuhstlss..t..........ppt.........s...h....tp.t.tshhpspphosTts+.sshs.pp.ps................................................sslpps..........................................................phsssss...........................pspscssphs.......s...s.ssPposcoh.shsssssspls..................s..t.spst-cpl....cplappLpsssshshpslsplppphs+shtsuppspp...scsltt....................................hhpsLhcpCpsshclocsLppRLsshp............................................p-sh.shpsphpaW...chspuFlc.................uhlsllsph+plpssh.h..ss-lhshLtslp+uoK-sshllphSsaphl ........................................................................ah+RhS.LP.....Ep.p..hp.pth.................................................................hts..llcsu+tlLauh.plpstlp.thhshhpstp..hpt.s.lp......lhYsspsal-pL.ptlpph-p.stpt...........spslhctC.shlsuatplhs.Lttslphhhs...psDs+alR.l.hhhlasShhElpssht.lss..................................t.............................................h..............ssp...t.t.........t................................................................s.....................................................................................................................................................................s.......................................t..tts.......................sstssp.h...s.t.....t.t............................s..t..p.pph....cpla.tLppshphs.pshs.hppthsphh.t.....uttppt....spthh.....................................................hhppLhppCt.shphochLpp+Ls.hp............................................pps..s.tsp....htha...p.hpsFlp...................shhphhs.th+th...s.h.h...s-hh..htslpcsh+-sshhlt.Ssap............................................................................................. 0 32 67 106 +10261 PF10429 Mtr2 Nuclear pore RNA shuttling protein Mtr2 Finn RD, Coggill PC anon Gene3D, pdb_1q42 Domain Mtr2 is a monomeric, dual-action, RNA-shuttle protein found in yeasts. Transport across the nuclear-cytoplasmic membrane is via the macro-molecular membrane-spanning nuclear pore complex, NPC. The pore is lined by a subset of NPC members called nucleoporins that present FG (Phe-Gly) receptors, characteristically GLFG and FXFG motifs, for shuttling RNAs and proteins. RNA cargo is bound to soluble transport proteins (nuclear export factors) such as Mex67 in yeasts, and TAP in metazoa, which pass along the pore by binding to successive FG receptors. Mtr2 when bound to Mex67 maximises this FG-binding. Mtr2 also acts independently of Mex67 in transporting the large ribosomal RNA subunit through the pore [1]. 23.70 23.70 23.70 24.10 23.60 23.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.18 0.71 -4.49 7 52 2012-10-03 02:27:24 2007-10-19 13:07:04 4 1 50 4 32 75 0 161.10 37 88.94 CHANGED ss.sQ.........s-sFlKKlLApLD......p.ps.sclppalp.F..........ppstIlhNupPhuss......stFLphW.ptsshTpHtlouhDhHlIP..GoGThlsNsssKVRF.DESGRD+hGpsusl.h..............sspshscsRPlWGoaaGlsLpLllD-plhpss.stsIsShNYphVa+P-DSllpI ....................s.s.sphhpsFlK+lLApLD........s.ss.splspalshF..........s.sspIIhNusPhups......stFhphWpsps.tTpHtLouhD.....h...H..sIP......GoG.....Thl..hNsssKVRF.DE..SGRs+hGpsAsl..tss..............ss.spsRPlWGsaFGlslpLllD-plhpss.stlIsuaNYphsY+P-DSllpl..................................................................................... 0 8 17 28 +10262 PF10430 Ig_Tie2_1 Tie-2 Ig-like domain 1 Bateman A anon Ciani B Domain \N 25.00 25.00 49.50 48.00 23.40 16.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.32 0.72 -3.63 4 53 2012-10-03 02:52:13 2007-10-19 13:35:17 4 11 28 2 19 50 0 95.50 79 9.53 CHANGED AMDLILINSLPLVSDAET.SLTCIASGW+PHEPI.TIGRDFEALMNQHQD.PLEVTQDsTREWAKKVVWKREKASKINGAYFCEGRVRGpAIRIRTMKM .AMDLILINSLPLVSDAET.SLTCIASGW+PHEPI.TIGRDFEALMNQHQD.PLEVTQDVTREWAKKVVWKREKASKINGAYFCEGRVRGEAIRIRTMKM 0 1 2 6 +10263 PF10431 ClpB_D2-small C-terminal, D2-small domain, of ClpB protein Finn RD, Coggill PC anon Gene3D. pdb_1qvr Domain This is the C-terminal domain of ClpB protein, referred to as the D2-small domain, and is a mixed alpha-beta structure. Compared with the D1-small domain (included in AAA, Pfam:PF00004) it lacks the long coiled-coil insertion, and instead of helix C4 contains a beta-strand (e3) that is part of a three stranded beta-pleated sheet. In Thermophilus the whole protein forms a hexamer with the D1-small and D2-small domains located on the outside of the hexamer, with the long coiled-coil being exposed on the surface. The D2-small domain is essential for oligomerisation, forming a tight interface with the D2-large domain of a neighbouring subunit and thereby providing enough binding energy to stabilise the functional assembly [1]. The domain is associated with two Clp_N, Pfam:PF02861, at the N-terminus as well as AAA, Pfam:PF00004 and AAA_2, Pfam:PF07724. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.63 0.72 -4.08 772 19559 2009-01-15 18:05:59 2007-10-19 14:59:04 4 59 4882 83 4604 13942 5521 81.80 27 12.29 CHANGED LsccplppIlcl.....lp.clpp.+L..t.c...cp.l.pL..clo-sAhc.hlucp.G...aDstaG.ARPL+RhIQcplcssL.........ActlL....pGp..lt.p.Gspl..pl .............................................Lsc-slhpIlsh.....tLs..chh.p.p..L...........h.p....cs....lpL.cho-p.A..hc...h...l.Acp..u.......h.....-...........p.....h.....G.ARsL+pll..pc....tlpc.l...............schhl..ttp.h.................................................. 0 1527 2924 3867 +10264 PF10432 bact-PGI_C Bacterial phospho-glucose isomerase C-terminal region Finn RD, Coggill PC anon Gene3D, pdb_1tzb Domain This is the C-terminal half of a bacterial phospho-glucose isomerase EC:5.3.1.9 protein which is similar to eukaryote homologues to the extent that the sequence includes the cluster of threonines and serines that forms the sugar phosphate-binding site in conventional PGI. This domain contributes a good proportion of the active catalytic site residues. This PGI uses the same catalytic mechanisms for both glucose ring-opening and isomerisation for the interconversion of glucose 6-phosphate to fructose 6-phosphate [1]. It is associated with family SIS, Pfam:PF01380. 24.50 24.50 24.90 24.60 23.70 24.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.68 0.71 -4.52 29 276 2009-01-15 18:05:59 2007-10-19 17:18:44 4 2 269 10 113 214 335 153.00 27 45.09 CHANGED hpphps.A+pLAttlts..tlPllaus.shhtssAhRaKsplsENAKhPAhhshlPEhsHN-lsuhpss...........................htphthlllpsp.-p.ps............thhhshspclh.tpsssshplcsp...usS.hLpclhtLlhlsDasSlaLAhhhGlDPhslshIsthKccls ..............................h...hsN.AKsLAttLss...thPllaus.sshssssucRhtp.hscsutp.Ahsushsch.......+shlsuhtts................................s.ts+h.lllltDc.s.-ttcs.................................tthc......hcpl...h......ts+ssslppl.ph......tu....o.slp+hsuLlhhusaAulYLAltht.................s........................................ 0 54 87 103 +10265 PF10433 MMS1_N MMS1; Mono-functional DNA-alkylating methyl methanesulfonate N-term Mistry J, Wood V anon Pfam-B_64607 (release 22.0) Domain MMS1 is a protein that protects against replication-dependent DNA damage in Saccharomyces cerevisiae [1]. MMS1 belongs to the DDB1 family of cullin 4 adaptors and the two proteins are homologous. MMS1 bridges the interaction of MMS22 and Crt10 with Cul8/Rtt101 [2]. Cul8/Rtt101 is a cullin protein involved in the regulation of DNA replication subsequent to DNA damage. The N-terminal region of MMS1 and the C-terminal of MMS22 are required for the the MMS1-MMS22 interaction [3]. The human HIV-1 virion-associated protein Vpr assembles with DDB1 through interaction with DCAF1 (chromatin assembly factor) to form an E3 ubiquitin ligase that targets cellular substrates for proteasome-mediated degradation and subsequent G2 arrest [4]. 24.50 24.50 24.50 24.70 24.40 24.00 hmmbuild --amino -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.68 0.70 -6.36 38 910 2009-09-11 06:56:25 2007-10-22 15:39:32 4 19 291 34 656 897 17 467.30 21 40.61 CHANGED p...hLhluhcS...tclhhLthpp................................psss.......Fh......pshsssssphpphGpplslDPpuRshulsuhpshFtla.Lpp.............ht.phhttst.tsPl...hp........................-GhIhphsFLa...stsssss.hlhhLhhs..ppppschhsYc............Wpsspslppshs+hshs....lssphclP.....shlIPLsp..................ssuallVsspphhlap..........st.......phhshphs............hppsslhssasts............................hppptcclhLsc-sGplhhltlsptst..........php.lGp..sslsssFshLcss........t..lLhsuustGs.uhhlphs.......................................................hspschlpch.NWuPllDhsllc..pppssp....t...............classSGsu.pcGulpplRpGlpup.thphshpph.s.spslWsls.....tssts....ssallhShPhpotlLpl.......................Dhs--l..ht....ulshsspTLtsushs...sshllQVTssulplsshtstphtpphp...........sspplhsAsssssphLlsssspstptlplpht+h.........h.ppt.ph......h..spssslshp...Ph ......................................................................................................................................................................................................................Dhlhlsocp...hphhhlpap.................................ttpth......t...........tshtc.shhp...sG.hhh.lDPp.uRhhhlthh.ct.hhlh.hpp....................p..tt.thtt..l........p.....................................................ph.l.hsh.hhl..................uhtpPhhhh.......l..s..............tpt..t........ph...h..hhc....................................................................................ht...h....sh.p.ch.p.s...........h....phs.............shllsVPt...........................................s..uG.ll.lh.spp.lhYp..........................t...............th......................................................ppshhhsh.....................................................hthsh.hh.L.lt..s.c.tGclahltlphspp..............................lppl+lphh.....s.....p...ss.ls..ss.hs..hLcsG...............hlFluSchGs.s.L.hphs.......................................................................................................................................................................hp.pltl..l...-phs.....sluP..l.hDhpl..s..ch.tp.p.sps...........................................plhsssGts...tc..uoL+ll.RpGltlp.......-hs.s...-LsG..spslWol+.......................................tp.pc..........hcsal..ll..S.....a..s..s....t.Th...lLpl..............................................p.s-.El......pt..GFhs.s.t..Tl.hsupls.....................pstll..........QVpsp.....u.lR...h.l..........................p...s..t.....t...h..hp..p....atss...................ttpsIstsu..ss.p..pp.lllshs.stp.l.hhhph.p.t................................................h........................................................................................................................ 0 225 359 545 +10266 PF10434 MAM1 Monopolin complex protein MAM1 Mistry J, Wood V anon Pfam-B_58835 (release 22.0) Family Monopolin is a protein complex, originally identified in Saccharomyces cerevisiae, that is required for the segregation of homologous centromeres to opposite poles of a dividing cell during meiosis I [1][2]. MAM1 is required in S. cerevisiae for monopolar attachment [3]. 25.00 25.00 60.40 60.40 19.80 19.80 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.08 0.70 -5.20 3 26 2009-01-15 18:05:59 2007-10-24 10:37:39 4 1 25 0 15 26 0 249.80 48 85.97 CHANGED +cKRsLSsKDTNV.....lp.sNp.ppRpRsL+NKsoh....IsDSSslppPpKNscccpLsKassERpl+RhoNssNlssp-sNs.................pp..oQ+I-NNsAs+E...uG-sLTRssL+ELQppIh-hElssF.pCcHulCsQ.hshcsLcpsRTWFLFELEMoEstsc....NLRpSCYsKYVYoAIDpSW+hsNhLhcAs-supEaFPIEQLLIPc.plD.psppKth-....IEslSI-h-SIhETN+sss.pthV+KKpLPsSVLp+Rsc+clFDEhslDAcEVlNshSoSSS ...................................pKRsluNKDsNh.......l+.sNphpphSRhLspK.......IpsosspcpP+...KNhpccsLsSa+p.-+Sl+.KpNssNlssc-cK-...........................TQcLp...NNlsscE......uscs..LT+sNLKcLQccIF-pEhs.sI.sCcHsLCSs....ENR+..c..IKaSRLWFLFEL-MSt.Nhsc....NLRhSCYsKaVYsAIDcuW..phENILh...cEp-K+Y.........EaFPIuQLLIPN..sIDassc....pK+ccN...I.EDLTlEI-SIIETNHp..........cKRaLPpSlLhKRccchAFDDh.cLDA+KlLNDhSAoSp........ 0 1 6 12 +10267 PF10435 BetaGal_dom2 Beta-galactosidase, domain 2 Finn RD, Coggill PC anon Gene3D, pdb_1tg7 Domain This is the second domain of the five-domain beta-galactosidase enzyme that altogether catalyses the hydrolysis of beta(1-3) and beta(1-4) galactosyl bonds in oligosaccharides as well as the inverse reaction of enzymatic condensation and trans-glycosylation. This domain is made up of 16 antiparallel beta-strands and an alpha-helix at its C terminus. The fold of this domain appears to be unique. In addition, the last seven strands of the domain form a subdomain with an immunoglobulin-like (I-type Ig) fold in which the first strand is divided between the two beta-sheets. In penicillin spp this strand is interrupted by a 12-residue insertion which forms an additional edge-strand to the second beta-sheet of the sub-domain. The remainder of the second domain forms a series of beta-hairpins at its N terminus, four strands of which are contiguous with part of the Ig-like sub-domain, forming in total a seven-stranded antiparallel beta-sheet. This domain is associated with family Glyco_hydro_35, Pfam:PF01301, which is N-terminal to it, but itself has no metazoan members. 21.70 21.70 21.90 22.00 21.50 21.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.03 0.71 -4.93 38 264 2009-01-15 18:05:59 2007-10-31 12:16:33 4 20 163 6 155 264 1 166.00 27 16.75 CHANGED .LstTsttus.sosYo.ssssltso.Lhs......ssTtuuFYVlpHschoStssssapLplsTSs.GslTlPphsG.olsLNGR-SKIhVTDaslG.u.psLlYSTAElhTatphss.c.sVLVLYussGEpsEhAl......uspupshslcGpssslshpptsusllls.aspssshpllplss.....lclhLLDRssAYpa ......................................................s.....hs.sssslhsh.lhs......sto.stFalh.pp..tp.ss...ssts..aplpl..s...T.o.t.Gs.l.slPp.s.......u....slpLsGR-S.K.lhlsDasl......G..s......p........pLlYSTA-lhThtphss.c.sVll.LaGs.tG-tsEhsl....phs.s.t..s.p..s.slpG..sphsh.p.tps........s...t..l.hls..asp.sssh..p.hlplsss.....lhlhllD+psAhp....................................................... 0 48 88 127 +10268 PF10436 BCDHK_Adom3 Mitochondrial branched-chain alpha-ketoacid dehydrogenase kinase Finn RD, Coggill PC anon Gene3D, pdb_1gkz Family Catabolism and synthesis of leucine, isoleucine and valine are finely balanced, allowing the body to make the most of dietary input but removing excesses to prevent toxic build-up of their corresponding keto-acids. This is the butyryl-CoA dehydrogenase, subunit A domain 3, a largely alpha-helical bundle of the enzyme BCDHK. This enzyme is the regulator of the dehydrogenase complex that breaks branched-chain amino-acids down, by phosphorylating and thereby inactivating it when synthesis is required. The domain is associated with family HATPase_c Pfam:PF02518 which is towards the C-terminal. 20.90 20.90 21.20 21.60 20.80 20.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.12 0.71 -4.73 57 905 2009-01-15 18:05:59 2007-10-31 13:35:54 4 12 296 36 574 859 9 170.50 32 40.88 CHANGED sslSL+phlpF...Gp.ps.............sppp....hh.uupFlppELPlRLA++lp-lppLPhslsppPsltpVpphYhpSFcclhp..a...............................................................................................................pshc-....................spc...Fs.chLpplhpcHs..sl.lsolApGlhEh+..cth........................ssp.......plppFLDcahhuRIuh....RhLlsQH........................lsL.................t..t.psst................................ssa.lGhIss ..................................................................................................................slShcphlpF....Gp...s............................................sppp.....h.SspFlp.pELPl.........RLApplc-lptLP..tlsp...pP..........s.lptV.psh.Yh...pSFpclhp..a...............................................................................................................................p....c.s.hcc.......................t.p.c.Fs.chltplhpcHs..sV.lsshApG...llEh+.cth............................ssp.......................plphFLD+ahhu..........RIuh....RhLhsQH........................ltL...........ht..t.t.sst...........................................spa.lG.Is........................................................................................................................................................... 0 171 289 439 +10269 PF10437 Lip_prot_lig_C Bacterial lipoate protein ligase C-terminus Finn RD, Coggill, PC anon Gene3D, pdb_1vqz Domain This is the C-terminal domain of a bacterial lipoate protein ligase. There is no conservation between this C-terminus and that of vertebrate lipoate protein ligase C-termini, but both are associated with the domain BPL_LipA_LipB Pfam:PF03099, further upstream. This domain is required for adenylation of lipoic acid by lipoate protein ligases. The domain is not required for transfer of lipoic acid from the adenylate to the lipoyl domain. Upon adenylation, this domain rotates 180 degrees away from the active site cleft. Therefore, the domain does not interact with the lipoyl domain during transfer. 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.81 0.72 -4.31 94 2280 2009-01-15 18:05:59 2007-10-31 14:37:17 4 6 1825 11 300 1274 29 85.30 33 26.19 CHANGED coPcFshpppcRFsh.Gtl-lplsVcpGhIpch+IaGDFhus.ts.lpclpptL.hGhpYctcslppsLppl..shp.pYhs.slph--lhphl ...................puPpFshppscRFsh.G.t.V-l..phsV.c.c.GpIpcs............+.Ia....GDFFu....t............-...l.p.slcptL....pGs..pY..c..t..-s..lpps.Lcsl..slscY.hs..slphcElhphh................................. 1 115 189 254 +10270 PF10438 Cyc-maltodext_C Cyclo-malto-dextrinase C-terminal domain Finn RD, Coggill PC anon Gene3D, pdb_1h3g Domain This domain is at the very C-terminus of cyclo-malto-dextrinase proteins and consists of 8 beta strands, is largely globular and appears to help stabilise the acitve sites created by upstream domains, Cyc-maltodext_N Pfam:PF09087, and Alpha-amylase Pfam:PF00128. Cyclo-malto-dextrinases hydrolyse cyclodextrans to maltose and glucose and catalyse trans-glycosylation of oligosaccharides to the C3-, C4- or C6-hydroxyl groups of various acceptor sugar molecules. 22.20 22.20 22.20 22.90 22.10 21.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.30 0.72 -3.92 32 208 2012-10-02 20:10:03 2007-10-31 15:17:05 4 4 182 12 58 208 92 79.20 31 13.36 CHANGED cLhHa.sP.psGlYVYhR................................h.sscoVhVlhNpsscshsLsLsRFpEhltssssup-llo.spphsLsc....sLslss+ushllpl .............hh+F.hP..ppGlYVYtR................................phsscoVhVllNsscpp.tslsls+apEllssps.supDllo.G+pls.Ls.c....slsLss+sshlLEh................ 0 21 43 54 +10271 PF10439 Bacteriocin_IIc Bacteriocin class II with double-glycine leader peptide Coggill P anon Manual Family This is a family of bacteriocidal bacteriocins secreted by Streptococcal species in order to kill off closely-related competitor Gram-positives. The sequence includes the peptide precursor, this being cleaved off proteolytically at the double-glycine. The family does not carry the YGNGVXC motif characteristic of pediocin-like Bacteriocins, Bacteriocin_II Pfam:PF01721. The producer bacteria are protected from the effects of their own bacteriocins by production of a specific immunity protein which is co-transcribed with the genes encoding the bacteriocins, eg family EntA_Immun Pfam:PF08951. The bacteriocins are structurally more specific than their immunity-protein counterparts. Typically, production of the bacteriocin gene is from within an operon carrying up to 6 genes including a typical two-component regulatory system (R and H), a small peptide pheromone (C), and a dedicated ABC transporter (A and -B) as well as an immunity protein [1]. The ABC transporter is thought to recognise the N termini of both the pheromone and the bacteriocins and to transport these peptides across the cytoplasmic membrane, concurrent with cleavage at the conserved double-glycine motif. Cleaved extracellular C can then bind to the sensor kinase, H, resulting in activation of R and up-regulation of the entire gene cluster via binding to consensus sequences within each promoter [2]. It seems likely that this whole regulon is carried on a transmissible plasmid which is passed between closely related Firmicute species since many clinical isolates from different Firmicutes can produce at least two bacteriocins. and the same bacteriocins can be produced by different species. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.56 0.72 -3.85 35 1162 2012-10-02 23:56:30 2007-11-06 13:42:44 4 3 367 0 62 351 0 66.30 33 90.98 CHANGED M.........pphcsLspcpLusl.G.....G.........phspshsuhsuusssGshsGushus...........sGuhsGA......hhGussGuluGt .........pphM.....hpQFphMDsEMLusVEG..G........sths.s..shsu.ssu......uA.s....s.G.hth.Ghtsssh..............hsuhhGu......hhGuhhsuh...h....................................... 0 6 29 42 +10272 PF10440 WIYLD Ubiquitin-binding WIYLD domain Bateman A, Thorstensen T anon Thorstensen T Domain This presumed domain has been predicted to contain three alpha helices. The domain was named the WIYLD domain based on the pattern of most conserved residues [1]. It binds ubiquitin. In the Arabidopsis thaliana histone-lysine N-methyltransferase SUVR4, Swiss:Q8W595, binding of ubiquitin to this domain stimulates enzymatic activity and converts its activity from a strict dimethylase to a di/trimethylase [2]. 21.10 21.10 21.60 21.10 20.70 19.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.08 0.72 -4.04 8 79 2009-01-15 18:05:59 2007-11-08 17:56:48 4 5 15 \N 49 87 0 63.40 36 13.75 CHANGED M..ss....p....cRhDAAh-+M+phGhccshlpsslKpLLp..lYscN.WhLIE-DNYcsLlDtIFspc-cp ....................tp....pRhcsAhctMpthGhscppl+sllcpLLp..lY..s....pN..W.hIE-.-sY+sLhDslh-pp-p...... 0 5 28 41 +10273 PF10441 Urb2 Urb2/Npa2 family Bateman A, Wood V anon Pfam-B_28626 (Release 22.0) Family This family includes the Urb2 protein from yeast that are involved in ribosome biogenesis [1]. 21.30 21.30 21.30 21.30 21.10 20.50 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.29 0.70 -4.75 26 228 2009-01-15 18:05:59 2007-11-19 11:21:27 4 4 205 0 167 223 0 220.80 21 16.86 CHANGED sssphlsthlphlp...phltpc..shhhsQhsl-hllsllsslst............................p.t.pspsslahphspllsslLthHRh+lss+hHllhsshspLLphLh......sptphssssss.........................ApthuRLlsphs-P.....................p.p.ttts..............Lssthsth+c.hs+ahshlLssYlphplphs.....lsssl+ptLpsGlYuIhDlho..........pp-.lphlsuuLDsuu....RshF+sLYs-Yp+huKWp.p .....................................................................................................................................................................................t.......hhpsl....hllppc..shthsp.hsls.hlshlsslss.................................sth.pspth.sslahtlppllt....sllpp.....Hpphh.tph.hllsshppL.lpslh.....ttptspt.stss.sps..................................................shphuRL...lpphhp.......................................................................hsttpcthsp.ah.al..lhpY......lph.hcss.......................lhspl+ptLpsGlY....sll....Dlhs..........cps.hphlpuuL...ssuu.......Rs....lFKpLYs-Yt+atKap.pt....... 0 46 84 131 +10274 PF10442 FIST_C FIST C domain Borziak K anon Borziak K Domain The FIST C domain is a novel sensory domain, which is present in signal transduction proteins from Bacteria, Archaea and Eukarya. Chromosomal proximity of FIST-encoding genes to those coding for proteins involved in amino acid metabolism and transport suggest that FIST domains bind small ligands, such as amino acids [1]. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.74 0.71 -4.23 188 1182 2009-09-11 08:20:13 2007-11-19 11:45:35 4 33 821 0 440 1024 155 135.40 19 29.02 CHANGED AhphYpchlu.....t.t.t........pphst..........................hPlul........t.......................h.ssphh...........................................lRslhtls.tssul.shhsslppGtplphh.ts..spshhpshpphhpph..........................................t..tsthsl.hhsChuR...hht.pt.tppc.lpt.lpchhstt..sl..........sGFhoaGEhtsh .......................................................................................................thhtchlst..t..............pph...........................hslul....h.......................h..spp.h..................................lRshhth.s..s.sul.thh.s...s....l.p...G.pplph....tp..sssh...hcshppshpph.......................................ttttpstssl.hhsChuR..h.hht...t...t...sp-.hpt.lpphh..s....h.sh.........................sGFasaGElh..h................................ 0 126 291 379 +10275 PF10443 RNA12 RNA12 protein Wood V, Bateman A anon Pfam-B_18000 (Release 22.0) Family This family includes RNA12 from S. cerevisiae. That protein contains an RRM domain. This region is C-terminal to that and includes a P-loop motif suggesting this region binds to NTP. The RNA12 proteins is involved in pre-rRNA maturation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.40 0.70 -5.99 18 149 2012-10-05 12:31:09 2007-11-19 12:57:18 4 12 135 0 119 171 38 416.20 37 49.99 CHANGED Rp-clcpLppWLtEsssTFlVlpGPRGSGK+ELVh-+sLpsccp.............sLhIDCcplhc.ARsDsthIsssAsQlGYhPVFSWhNSlSuhlDLAsQGLTGpKuGhSEopEuQlpshLpsospAL+cIuLppp...................ccsDcss..........................................................slp---YLptHPEt+.................PVlVIDpF......hp+ucp..suhlYcclu-WAAsLVpsNIAHVIFLTsD.VuhsK.LocALPNp...VF+slsLuDsS.csA+paVlspLtt.......................................................................................................p...................ptstpppptsp.t.......................plp-LDssl-sLGGRhhDLpshsRRl+s.G-oPccAlschIpQuus-IhphFLt........t.tssspsWospQAWpLIcpL...Spsssl.Ys-llhssLFKu.......ssEs..uLpsLEpuELIolsp.ssGpsscI+sGKPlYpAAFppLlsDcslpsthchthlsplIshEsscIcKhE-ELphL.ucl .............................................Rh-tlpplpt.WLhEsssT..FlllpGP....+GSGKcELVhcpsLps.ccp.............hLlIDCc.l.c.A.+uDsshIpshAsplGY.hPVFoWhNS..lSuhlD.LAs....QGhhGtK..uGhSEoh-sQlp.........pILpsospAL+pluLppp..............ppsccss........................................................................................................................................................plp---aL..ptH..PE.t+..........................PVlVIDsF......hp..K.u.cp...sshlYcclu-WAA.sLsps.N.IAHVIFLTsD.luhsKsLucALPsp...VF.+sl.sLuDsS.-su+paVlspLps...........................................................................................................p..................................................................................pphp-LDssIcsLGGRhoDLphhsRRl+s.Gp.oPppAVpcIlpQuus-I...h+halh...........t.ssp.sppWos.pQAWhLI+tL.................up..psslpYs-llhsslFKu.........ssEs..sLpuLEpuELIolpp.ppGpsppI+sGKPlapAAFppLhp.Dphlpsph-hthltpllphEsppIpKhEpELthLup.h............................................................................................. 0 41 73 105 +10276 PF10444 Nbl1_Borealin_N DUF2455; Nbl1 / Borealin N terminal Mistry J, Hartsuiker E, Wood V anon Manual Family Nbl1 is a subunit of the conserved CPC, the chromosomal passenger complex, which regulates mitotic chromosome segregation. In Fungi and Animalia, this complex consists of the kinase Aurora B/AIR-2/Ipl1p, INCENP/ICP-1/Sli15p, and Survivin/BIR-1/Bir1p. In Animalia, a fourth subunit (Borealin/Dasra/CSC-1) is required for targeting CPC to centromeres and central spindles. Nbl1 has been shown in budding yeast to be essential for viability, and for CPC localisation, stability, integrity, and function [1]. The N terminus of Borealin is homologous to Nbl1 [1]. This family contains both Nbl1, and the N terminal region of Borealin. 21.20 21.20 21.20 23.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.41 0.72 -4.60 17 162 2009-01-15 18:05:59 2008-02-06 12:35:15 4 4 136 5 116 162 0 58.40 30 18.74 CHANGED pphpshlpshshE.lpsRhccl+uphp.hhpshcsth-hclh+lPpulRchpht-lhsch .........phpuhlcshphE.....l........ppRs+pl+uphp.hhpslcsphchcl.RlPpulRchsht-hhtph.... 0 37 59 91 +10277 PF10445 DUF2456 Protein of unknown function (DUF2456) Hartsuiker E, Wood V, Mistry J anon Pfam-B_97171 (release 22.0) Family This is a family of uncharacterised proteins. 25.00 25.00 32.10 31.20 24.00 20.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.31 0.72 -4.07 6 48 2009-01-15 18:05:59 2008-02-06 12:37:41 4 1 46 0 35 47 0 90.40 34 26.54 CHANGED tsLcuh.pspFht..sphhhhpalEWhl.pslRGhlLulhhahhlWPlThGILAuIGp+.tpHDYYFNs..h.hPQVhKLIYGsVlGhlosPllAhlhh ..................hp...h..h......t.phhpalphhltphlRuhllulhhFlllWPlohGILuuIGp+...tupDahaps..h.hhPQlhKLlYGsVluhlsTPlhshlhh.... 0 11 19 32 +10278 PF10446 DUF2457 Protein of unknown function (DUF2457) Hartsuiker E, Wood V, Mistry J anon Manual Family This is a family of uncharacterised proteins. 30.00 30.00 30.60 32.00 27.10 29.80 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.77 0.70 -5.76 10 78 2009-09-13 16:18:35 2008-02-06 12:38:57 4 4 71 0 66 77 0 404.60 41 60.99 CHANGED RFHEFASS..EDD-Wlpcs.ssaKpKlTlsDsMKKEpAIRKLGEEAE......EEAhEEE....................-tD-DDspDsDp-Dt..tpt-s...........DsDSslt.hsp..DDGNEoDNEAGFA-S.DE.oDstS-YsFWAPsusTsATos...psl-ssRpshsR+sSsoSh-Shscppsp+p....s.....ppspRpPhKss..+hRPuTPcLPDSTDFVCGTLDEDRPLEsAYhSChEtRRhuKplhIPQDIDPSFPTSD...P-DE-D-.pchp..hshpsD............-usRsRttt.ts++pSPtsSP+RhhSPPP..R.........+ttttS..P++L.RSPPPPh+h+Ss..................sttuusssssoh...sscGlsh..upLspRPshT+TKSLPRTPNsF.......p+hsthsPh.pusE+Euoss+-sHsRGsIDIVcGLEKKRQ+RKEKFaRpHC.RKAsKEphpR.RPhPGKGAERM+ELGLtsAcphhua..ulG...............psuphVLSV ................................................RFHEFuSu..c.-DDWlpps..sshppKlT.lsDs.h+KE.sIRKLGcE.AE........EEA.pEE....................----.--pp..-p-..t..pDp...pp.--..--.ptD....-tp.....-t......s-.s-....p.sp..sD..G.coDsEsGFA-S.DEsD.t.s-hthWsPst..sssssps...tshshhR..s...tc...pt.S....soShtShps.ts.p...............pRp...h+......+h.R.ssT..PsLPDSTDFVCGTLDEDRPLEsAYhSChptRRppKph.IPQDIDPSFPTSD...s..EDE.----..tcp...........ptps-.p..h.pt.........................-t.+sRt......tp+po.ptSP+RhpSPPP..R.......................+.hhupS..P++L.cuP.ss.hRh+SP..................sths.hpshss.....tstu.hph..tsLut...RPsh..s+T+SLPRsPs.F..............................+hp.t..s...tptp.pttspsp-.hHsRGsIDIVtGLE+KRQ+RKEKFappaC.p+A.tK-p.h.pc.....+P...hPG+GAERM+ELGL.hAt...............thu...............pss..hVLSl..................................... 0 13 31 52 +10279 PF10447 EXOSC1 Exosome component EXOSC1/CSL4 Mistry J, Wood V anon Pfam-B_6887 (release 22.0) Family This family of proteins are components of the exosome 3'->5' exoribonuclease complex. The exosome mediates degradation of unstable mRNAs that contain AU-rich elements (AREs) within their 3' untranslated regions [1]. 21.20 21.20 21.20 21.40 21.10 21.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.03 32 355 2012-10-03 20:18:03 2008-02-06 12:46:39 4 8 310 1 250 337 11 79.70 41 34.22 CHANGED lPchGslVlsRVoRls.+tApspILsV................................................sssshps........................sF+GlIRppDVRuTE+D+VclhcsF.+PGDIVRApVlSL ...............................lPpsGslVhs+Vocl.st+hAplpIlsV...............................................................ss...pslpp.....................................................sFpGlIR.ppDVR..uTE..+D+Vc.....h.....hcsF.RPGDIV+ApVlSL........ 0 84 134 203 +10280 PF10448 chaperone_DMP POC1; 20S proteasome chaperone Mistry J, Wood V anon Pfam-B_75798 (release 22.0) Family This family contains chaperones of the 20S proteasome which function in early 20S proteasome assembly. The structures of two of the proteins in this family (DMP1 and DMP2) have been solved, and they closely resemble that of the mammalian proteasome assembling chaperone PAC3, although there is little sequence similarity between them [2]. 22.10 22.10 31.80 31.20 19.30 18.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.79 0.71 -4.30 18 73 2009-01-15 18:05:59 2008-02-06 12:51:02 4 1 39 6 47 72 4 144.80 21 96.35 CHANGED Mphtphppphsst.s....................utcsclslp...thchssKhslsltlNuphDsohcs...sh............hsYhhsl..................pphtushpshlhlusus-hth.sl.spplucLls+phpsss.............hlolSS+hht......t.spssshphLhhlLcsl+ ............................M..hphppphstt.t...................sstshplhlp...sschssKl.lsltlssphDsohcss.ps..................hshhhuh...........................ppphushpslshlGsssDhph.sl.spQlucLltcphts.s...............hlohS.S+hht.......ptsssschthLhhlLcslK.................... 0 5 23 44 +10282 PF10450 POC1 POC4; POC1 chaperone Mistry J, Wood V anon Manual Family In yeast, POC1 is a chaperone of the 20S proteasome which functions in early 20S proteasome assembly. 20.40 20.40 21.20 20.80 18.40 20.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.82 0.70 -5.64 3 40 2009-01-15 18:05:59 2008-02-06 12:54:17 4 1 38 0 25 39 0 242.90 33 98.06 CHANGED MLFKQWN-hstP+H.LD.P.Iu+N.pSLplhPVPcVahPp.hDlspYpssVlTTKIMsPLFPppLLphppIu-IpTTLplcpsp.st-SEcHSWNY-ENFPNEV..s.KpDosscplhuFSaPIauFcDTLIhhIEENFIKhSAIFoNhIoRslIsp.LAQhsPDIp.IsI.GTSDKIsslKpLTpscCoLpPPEFITGFIGSlLTQL...PSKELKVFpsIVAPSEGPIGFEKhoLsslcuLVDlCucLLshcPSc...YSsEChRLWRLDuAAIGAQSGLYI .........................................................................................................................MlhK.Ws-...P+H.lp..........psh..tshsp.l.h.s....ph..p.hp.hll......t...hhssLFP.cp.L...lphpplG..clpsols.lp...ts.p.......sps.sscc.sasaDEpa.spl........-...cpcst.pp.phhshphPI..aths..c..oLlhshp..-NFlphssI...hsN.hlo+pllsp...L.s.ph.....ps.....-..I...l....ll..us....S...........D+.Is..sh........K...s...ho......ps.....s.....oLpPPEFITGhluSlloQL.........sppshp..hpsLVssSEGP...GFEKlslsshssLlclhu...phLshpspp...Ysppsh+.W+httss...psGLYl............................. 0 3 13 23 +10283 PF10451 Stn1 Telomere regulation protein Stn1 Mistry J, Wood V anon Pfam-B_51291 (release 21.0) Domain The budding yeast protein Stn1 is a DNA-binding protein which has specificity for telomeric DNA. Structural profiling has predicted an OB-fold [1]. This domain is the N-terminal part of the molecule, which adopts the OB fold. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution [2]. 20.90 20.90 21.50 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.60 0.70 -5.54 7 123 2012-10-03 20:18:03 2008-02-06 12:56:02 4 9 115 3 95 163 1 178.30 20 49.51 CHANGED atchtpt.....VsaahspLh+hsphastsp..shhlpDl+pphc.Shphsp....YhshhtshlaWhN+PlppI+llGsllGhpa+hlttp-ahhhplDDCos............hLpCpspcsplhShuhslssh.lGhTlpVhGhhs.......hph.ELpVpalc.hshsLppEIcaWchshph+cpLshPWclss.hltt.hptcpcht.tt...............coPpppps......pssaIEpL...cphcscLplhSPasspsp...........s..lhs...hphlsstspl-pt ........................................htt...................................................................tshhFatN+.PlphlplsGhlluh.p..h.....................t..........c....h..hlh.s..lDDuSG.t...................................slps.......h.....st....tp......................................................G.hl.....plhG.hp..................................t..php...hp.h....hh.........s...httEhthWp.h.ph.h.....L....s.......................................................................................................................................................ttttttttt................................................................................ 0 15 45 80 +10284 PF10452 TCO89 TORC1 subunit TCO89 Misry J, Wood V anon Pfam-B_61649 (release 22.0) Family TC089 is a component of the TORC1 complex. TORC1 is responsible for a wide range of rapamycin-sensitive cellular activities. 20.20 20.20 20.90 20.50 19.80 19.90 hmmbuild -o /dev/null HMM SEED 613 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.16 0.70 -5.63 6 69 2009-01-15 18:05:59 2008-02-06 13:00:05 4 5 53 0 46 70 0 286.50 20 51.06 CHANGED RQFST+SRuK...SsASFKG...L+RVhoHDGThsps.sh.spaushKKoKSSDuLh+RRslSGLsMTALst................puPlp..................huusGL+PcRs+popsVLsL+-upt.hDs-STTDEEVEhFo--p.c.........-Apss......-spstst+stPpppp.h.php.hptp.sh..st.cp.psh.tt..sh+s....hs+.lDSssthh........sc.pI-t......................S+ppppsaDussussslppslsstph.pp............s-h..........................................pt.sHsstpcc.pED+hsssspssp.spssApohtpttccuuptspssppppp..lsD-........p..t.scsp......-pYlPDMILSQSTGVER+F-pplShQNSLu.......................c.tst.s.pthcstphpcs+aNhlppplstsl.ssp...............psptsFSouISSLTssLpRssPpSh.sss+hNss.hp+spQp.L.Rtpph....tt................hspssppssSssplNsFuQFLpSsshsu-SRTQpKLWLQREsSIhDLsuQs.Duu-AlFhASNlEsKREFERIS+EYosVRRFtNPLs-uLpRlp.....ptpshppppppcusouhpuuS.s........hFssY.ppsKohcEhhssupp..hclpplLsuIWpSpotpFNKDsNPL ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ps............+ppFE+lspEY.sVRRahNPls-uL.Rlp.....................th.phppptt.p...s......................................................................................................................................................... 0 11 28 45 +10285 PF10453 NUFIP1 Nuclear fragile X mental retardation-interacting protein 1 (NUFIP1) Mistry J, Wood V anon Manual Family Proteins in this family have been implicated in the assembly of the large subunit of the ribosome [2] and in telomere maintenance [3]. Some proteins in this family contain a CCCH zinc finger. This family contains a protein called human fragile X mental retardation-interacting protein 1, which is known to bind RNA [4] and is phosphorylated upon DNA damage [1]. 22.30 22.30 22.80 22.30 21.90 22.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.02 0.72 -4.46 39 274 2009-09-11 12:31:40 2008-02-06 13:04:56 4 9 247 0 202 267 1 56.50 29 11.43 CHANGED htptphpshphpG.pplpL....p.T.sE-It+WhcER+KpaPTptplp....cKpptcctptcctph .......................h.......hth.G...pphpL........p.o.sE...-ItpWhEER+KpaPTpsplc........cKc.phpptp.pctt............................... 0 74 112 166 +10286 PF10454 DUF2458 Protein of unknown function (DUF2458) Hartsuiker H, Wood V, Mistry J anon Manual Family This a is family of uncharacterised proteins. 33.10 33.10 33.70 33.50 33.00 33.00 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.93 0.71 -4.32 9 64 2009-01-15 18:05:59 2008-02-06 14:04:39 4 3 62 0 52 65 0 144.90 32 61.78 CHANGED DsppITsassAL+YVh+plspssch.pcIRcLIpcQccapcpahppR-tLlp+.puph-pp+cL.....-slLpslst.hsp..tp.sp.c.c.ptcLpthDtphhht.pph..t.hthhcsLplPhFhhpp..........................ht..clpp.phhhlplL.D.ltp .............sphIssa.tAL+hlh+plupscshtpcI+cLIppQcc+E+pWapuRpsLltKQps+ttpp+pl.................cplLpslGs..lsp....p.sss......c........cpptELppa.D.pKVapA.ppMscthptcL+sLulPFFshpp.................................hstc-lpthph+hLplLpDhh................. 0 19 29 42 +10287 PF10455 BAR_2 Bin/amphiphysin/Rvs domain for vesicular trafficking Mistry J, Wood V anon Pfam-B_12557 (release 22.0) Family This Pfam entry includes proteins that are not matched by Pfam:PF03114. 22.70 22.70 22.80 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.97 0.70 -5.49 10 190 2012-10-03 12:17:00 2008-02-06 15:55:33 4 5 139 0 139 628 1 273.70 37 82.41 CHANGED oI.olss+Tp+hlpEpLGpVp.............DISpLPt-YlpLEpKsDsLcKlYp+lLhlo.cTaEs-uYDYPP..shsESlsD..aspolutKhpphtNhoSspcsphhhhGpstp-tsptpKt..................p.sss.hP+TLstALS+sAtcuo..hpcL........................................csp-hssLupshtphSss.tcIupA+h-pDphIlKcFNpcLcclLsppFtKsccLRKKVpcoRLpFDhhRpclcp...................................scPEsEE.............................................t.t..LEshEDEFVSATccAVhhMpcllcsSchlsLLKlFtssQLpYacpuscpLcp.LssLst ...................................................................................................................shAt+T.phlpEpLGpsp.......................................................................Dh.opLPt-Yl-LEc+lDuL+tlap+hLtVT.spYpsEuYDYPs..NlpEShsD..hu+olupKlp...LopAo..SssEA.pshL.hu.Pss.sc.................................PKThsa.Alu+Auhsuot.hpph....................................................ppsssp.cs.L.up.uLcpau.s.p+lupARLs.QDs.Ipo+...F.tshp.ssLNs.s...l...thAs.......+sRKsV-suRLphD..ss+sp..hcst...................................................pp.sph.pp...................................................................................................phchclE..pAED.......E.FVspTE.-.AlslMcp.V..l.-..s.....s.....-.s.L..cpL.t...-L...lsAQLpYa+puhchLpph......t.............................................. 0 34 77 122 +10288 PF10456 BAR_3_WASP_bdg WASP-binding domain of Sorting nexin protein Coggill P anon Pfam-B_43522 (release 20.0) Domain The C-terminal region of the Sorting nexin group of proteins appears to carry a BAR-like (Bin/amphiphysin/Rvs) domain. This domain is very diverse and the similarities with other BAR domains are few. In the Sorting nexins it is associated with family PX, Pfam:PF00787.13, and in combination with PX appears to be necessary to bind WASP along with p85 to form a multimeric signalling complex [1]. 24.90 24.90 24.90 25.70 24.70 24.60 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.56 0.70 -5.11 8 753 2012-10-03 12:17:00 2008-02-11 16:57:52 4 10 512 8 150 521 1 143.90 60 42.00 CHANGED DEKsWKpGKRKAEKDEhVGusFF.TIs.Pph.ssLDLp-VEpKlEsFppFTKpMD-uVhpLpssusEah++psGsh+KEYQKlGpAFpsLupuFphDthstSusLNcAlutTGcsYEpIG-hFAEQP+pDLc.lh-sLs.Y+GhLuNFPDIIpVpKGAlsKVKESp+hstpu+lssp-tsshpcRssshSYAl.AEhpHFHspRlhDa+uhMQpYLcQQlpFYQcIupKLccALspYD ............................................................DDKQWKLGKRRAEKDEMVGAHFMLTlQIPs...EHQ.DLQDVE.ERlDsFKuFAKKMDDSVhQLTHV......AS.ELVRKHLGGFRKEFQRLGNuFQSISp..........A..Fh..L.D..P............P..h.t...S....ps....LspAls............................................................................................................................................................................................................................................................. 0 43 55 99 +10289 PF10457 MENTAL Cholesterol-capturing domain Coggill P anon Pfam-B_16187 (release 22.0) Domain Human meta-static lymph node (MLN) 64 is a late endosomal membrane protein, and carries this MENTAL (MLN64N-terminal) domain at its N-terminus. The domain is composed of four trans-membrane helices with three short intervening loops [1]. The function of the domain is to capture cholesterol and pass it to the associated START domain Pfam:PF01852 for transfer to a cytosolic acceptor protein or membrane. In mammals, the MENTAL domain is involved in the localisation of MLN64 and MENTHO in late endosomes, and also in homo-and of hetero-interactions of these two proteins [2]. 20.50 20.50 20.70 20.80 20.00 19.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.43 0.71 -4.65 10 190 2009-01-15 18:05:59 2008-02-11 17:18:49 4 7 92 0 107 176 0 151.70 49 43.35 CHANGED ScVRRpFCLFVTFDLLFloLLWIIp.lssscuIppsL-pEVl+Ys...a+sShFDIhLLAVaRFhlLlLuYAlh+L+HWasIAlTTslooAFLIsKVll...shhSQssFsalLsIsSFlLAWlETWFLDFKVLPQEscsEcha.luu..........................psss-RsPLLsPu..........slScGpFYSPs-Shs ..........................................S-VRRTFCLFVTFDLLFloLLWI....Ip...ls..s.ssuIpps.LcpEllp.Ys...atoShFDIh..lLAhFRFtsLl..LuYAl..h...+.L+...H.....WWsI.AlTThlooA...FLls......K...V.ll......ph........h............op....s.....s..FuYlLsIh....SFlLAWlETWFLDF+VLPQEscpcphh.hhs........................................p.ssstRss..ll.ss..............shSp...upFYSP.-o.u................................................................................. 1 26 35 66 +10290 PF10458 Val_tRNA-synt_C Valyl tRNA synthetase tRNA binding arm Bateman A anon SCOP Domain This domain is found at the C-terminus of Valyl tRNA synthetases. 25.10 25.10 25.10 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.03 0.72 -3.89 31 4203 2012-10-01 23:07:44 2008-02-12 16:38:06 4 17 4133 4 940 3276 1648 65.80 36 7.28 CHANGED Dl-cEpuRLpKELt+lppElc+lptKLuNPuFlsKAPs-VVEpE+s+lt-hppphptl+ppLspLu ............sh-tElsRLpKEls.KlppEls+lptKLuNEsFVu+APptVlpcERcKhtchppphspl.pppltpl.......... 0 326 626 804 +10291 PF10459 Peptidase_S46 Peptidase S46 Rawlings N, Mistry J anon Manual Family Dipeptidyl-peptidase 7 (DPP-7) is the best characterised member of this family. It is a serine peptidase that is located on the cell surface and is predicted to have two N-terminal transmembrane domains. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 698 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -12.83 0.70 -6.11 61 519 2012-10-02 13:45:52 2008-02-15 12:22:33 4 2 290 0 157 649 484 661.60 34 96.35 CHANGED ADEG.MWh.ppl...p..tsphpp.hGlclssppLhslstss.....hsAlVp.sGGCouuFVSscGLVlTNHHCuauuIQppSos-+saLpsGFhApsts-ELss.PuhplphlpplsDVTcplpsulp..shssptphptlpsthppltpc.sppcs......sh+spVtsFasGspYhLhphpcapDVRLVaAPspulGcFGGDsDNWhWPRHTGDFohhRsYsstsspPAtaups.NlPhpPcpaLplospGlc-GDashlhGaPGpTsRahsssplcpphchshPtplchhptthsllcphhppssch+lpY..AuphsuhsNhhKshtGhhculpchshlspKptpEsplttalppssph.tpatsshsplpthhppppphtppphhhppsht....ss.plhshAppLh+hspctp........pRttuhp-cshstlppplpph..psassslDctlhtthLsphtptsstpc.hsslpp.hlstpt........tt.....hpphscphastopLss.cshhthhpt.stsshps..scDPhlphAhul.hsthtthcpppcphsuthppspttahcAlhthh...spshYPDANtTLRloYGpVcG..Y..ss+.DuhhhssaTTlcGlhcK..p.sGstsFslPpphl-hhpsp-a...Gta..........................t.sssPVsFlossDhTGGNSGSPllNu+GELlGLsFDGNaEuluuDahassshsRsltVDhRYlLahl-clssAspLlcELsl ..................................................................................ADEG.MWh.ppl...p.....ttphpp.hGlplsspplas.stss.....hsAVVp.......h...sG.......G.CTuphVSscGLVlTNHHCuauuIQppSosE.+DYLpcGFhApohs-E.LPs.PshplphlpclpDVTcpVtttlp......st.s..tp...p...pt.....thlpshhpt.lspc.htpps...............shcsplhsFasGNcYaLhhhppapDVRLVhAPPpSlGKFGGDTDNWhWPRHTGDFShFRlYss.ts.spPAtYSt-.NVPh+Pc.paLtlShpGlc-GDasMlhGaPGpTsRYhsuttlcpthpht.ssthphpshphslh.ccthppssp..hRIpY..ASchAu.uNYaKNhhGh.culpchsllspKpttEpphttahpppsp.................t....p.atp.s.hsplpphhspppththpthhhppshh.......ss.....phhphA..hp...lhphttthp...................tct.......th.cpthtp.hppthpph..cs.a.s.plD+plht.thLptYtphss.t.pp.hs.sh.hp.hlspch........tss.....hpthl-t.haspSh.lsstp.........s.ht.......pah...........ptss.hpt..ppDPhlp...huhul..hsthhph.p...pphpphssphpttcphahpuhhchp.s....pshYPDANtTlRloYGpVpG..Y......sPp..Du.hhh.......s.aTThcGlhcK......c..sss...-FslPtc..lh-Lhpp.+-a...GpY........................s.t.GphPVsFl.ossDhTGGNSGSPlhNucG.ELlGLs.FDG.NaEulsuD....hs.F-.ssh.pRsIsVDlRYlLall-KhusAspLlcEhs................................................... 0 62 113 141 +10292 PF10460 Peptidase_M30 Peptidase M30 Rawlings N, Mistry J anon Manual Family This family contains the metallopeptidase hyicolysin. Hyicolysin has a zinc ion which is liganded by two histidine and one glutamate residue. 20.20 20.20 20.20 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.16 0.70 -5.58 6 71 2012-10-03 04:41:15 2008-02-18 13:56:05 4 8 52 0 39 167 8 301.10 24 54.19 CHANGED DsosRsATLppQsTA...oDGssVNhWVEsuE.suuKISsshlcsLsspFAs.sGuIYDhLpslGG.hWGPpu.tuohIs.ssQPlDIVIL......NF-+sGQPaGtlGYFWupNsFppuu..tPhSNcSlSLYLDoETLYLGGpsG..hpslloTMAHEuhHMQNFYRRuVhhGupYua-sWLEEMTAMhMEDhtSppIsssYNsIRDsRF.sYhsYtu.GuYNCuLhpa.TsaGssC-SYuVSGSLGGFLNRQh.GLsFYpsLLopsotosShuVLssAI+sApPuuoLu-tltpaussssuLhPssuuPAGFGaPuRpDusFoLPhIDPs..hhsulRoLs....sulPsTLpuaGoaPVsRosssGTYSETV+VPAGs......TLSVVVc ....................................................................hh...................ts...hh.hWltss..................plop...t.hpplhpcFss........tlYs......hhsslhGp.....s..................sps..s...t....ss.hls......h.Illh........s.h....p....s.s...spshG..h.....h.GYFaups.a....s......t...............tshSNpu..hYl...Dstshhh.ss.s.ss............sp.....h....huTluHEapHMlNF..p+s.lh.....p...s..tt.h.h-sWLpEhhuhshEc..hh.......utph..........................h..s.....sRhs.t.a.p................shs...sslhta...ss.......u.....ss..h..s..Yuhuh.hhtaL..h....cQh.....Ghsh..hhphl...s....s..tssptslhsssh.thsss..s.hsphhtpaths........t...st.asa.t............................................................................................................................................................................................... 0 9 18 26 +10293 PF10461 Peptidase_S68 Peptidase S68 Rawlings N, Mistry J anon Manual Family This family of serine peptidases contains PIDD proteins. PIDD forms a complex with RAIDD and procaspase-2 that is known as the 'PIDDosome'. The PIDDosome forms when DNA damage occurs and either activates NF-kappaB, leading to cell survival, or caspase-2, which leads to apoptosis. 20.10 20.10 20.60 20.60 19.70 17.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.60 0.72 -4.24 2 40 2009-01-15 18:05:59 2008-02-19 12:42:30 4 27 28 0 19 43 0 33.40 66 4.02 CHANGED WssL.TtLcctu.+R.hatRspVP+FSWFhVV.RP .......WuDL-TaLEEEuPpR.hWA+CQVPHFSWFLVVSRP. 0 1 1 7 +10294 PF10462 Peptidase_M66 Peptidase M66 Rawlings N, Mistry J anon Manual Family This family of metallopeptidases contains StcE, a virulence factor found in Shiga toxigenic Escherichia coli organisms. StcE peptidase cleaves C1 esterase inhibitor [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.07 0.70 -5.55 15 252 2012-10-03 04:41:15 2008-02-19 13:26:58 4 13 181 1 40 186 20 293.50 42 30.59 CHANGED EssslsYSsstWSshLPt-Wl+PGlslpF.........spsspsuclts...clGsss-Lhlpolshhhh......TpPhs....psphhc..sclscEhFQ+hPsS+Llsssasst+l-h...VhhP.cu.hhsstsPuhsshpuushR-uhu...l.sGl.NhshGIpuusG-upsuhphhushlh..AhsupGpYsss....................aGG.GGG..GhsThDtohsspFsHEhGHsaGLsH..uGpshu.....satpush.sSsWGaDus+pcFluNhhsssss...hpsCpss.......hss...cG+satpDsMpGGusspss.tsRaohassapsspIQsahcNphh .............................................................pttlsYupphWSs.lPhsWhpPGloLpl........tcpssppGhlps......IphGussELllpsIDlGML........h...P.Rs....Rs..p...hhpp...tphtt-YFQKlPsS+LlhssYsPhHhph..........Vs.hPsGslhT..-.t....ss.uhGGWHsGsMR-ulG..KthVSoGIsNANhGIsoouGhu.p.....p....a.....shhsspIs...AHss.hGhYs..st..........l............VHGGS..G..GG......GI...VT.L.-sT..h.uN.EaSHElGHNYGLG.HassG..uo......sH.....s.sS...sWGWDu.+pRFIsNh..h.ppss....spptss.........pls.PF.Du.apahhDAMs...GGtsppsu.hsRFThapPhsu.t.hhQcahpNth................................................... 0 24 30 34 +10295 PF10463 Peptidase_U49 Peptidase U49 Rawlings N, Mistry J anon Manual Family This family contains Lit peptidase from Escherichia coli. Lit protease functions in bacterial cell death in response to infection by bacteriophage T4. Following binding of Gol peptide to domains II and III of elongation factor Tu, the Lit peptidase cleaves domain I of the elongation factor. This prevents binding of guanine nucleotides, shuts down translation and leads to cell death. 23.90 23.90 23.90 23.90 23.70 23.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.53 0.71 -4.54 7 29 2012-10-03 04:41:15 2008-02-21 10:51:40 4 2 29 0 11 41 4 181.90 20 57.80 CHANGED Iphp.tsLchlWhhsahhhshhpp........l.phhcsss.phcL....................E.spshlpcupphhpahtphlpsh..psas.........ttlPpPth..ptu.p.....tss-lFLpAlualhhHEluHlhhpc.hh.......ssp.shpEEh-sDsaATpalLss.pp.s............+RtluIulAhhhlp.Lhlcpphshps..THPsspsRI.sslpt.p.pts-phh.hhs ..........................................................................h....hphhW.hs...h.hhpt........h......t..................................htht.t.h.hh....p.hp..sh....pt...............thsp.t...t.t........hssplhhhAls.allhHEluH.lhhtH.th...................................s.s.shp-EhpADpaAhchllsphtp.s.p..................hppthuIhhulhhh..lh....p.p.phshpp..oHPshppRl.s.lpt.....tp..h........................................... 0 5 8 11 +10296 PF10464 Peptidase_U40 Peptidase U40 Rawlings N, Mistry J anon Manual Family This family contains P5 murein endopeptidase from bacteriophage phi-6. P5 murein endopeptidase has lytic activity against several gram-negative bacteria. It is thought that the enzyme cleaves the cell wall peptide bridge formed by meso-2,6-diaminopimelic acid and D-Ala 20.80 20.80 20.90 113.30 20.70 19.40 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.37 0.70 -4.92 2 29 2009-01-15 18:05:59 2008-02-21 11:15:17 4 1 1 0 0 30 0 171.00 92 96.88 CHANGED VQYSLRALGQKVRADGVVGSETRAALDALPENQKKAIVELQALLPKAQSVGNsRVRFTTAEVDSAVARISQKIGVPASYYQFLIPIENFVVAGGFETTVSGSFRGLGQFNRQTWDtLRRLGRNLPAFEEGSAQLNASLYAIGFLYLENKRAYEASFKGRVFTHEIAYLYHNQGAPAAEQYLTSGRLVYPKQSEAAVAAVAAARNQHVKESWA VQYSLRALGQKVRADGVVGoETRAALDALPENQKKAIVELQALLPKAQSVGsSRVRFTsAElDSAVARISQcIGVPASYYQFLIPIENFVVAGGhETTVSGSFRGLGQFNRQTWDGLRRLGRNLPAFEEGSAQLNASLYAIGFLaLENKRAYEusFKGRVFTHEIAYLY............................................ 1 0 0 0 +10297 PF10465 Inhibitor_I24 Peptidase_I24; PinA peptidase inhibitor Rawlings N, Mistry J anon Manual Family PinA inhibits the endopeptidase La. It binds to the La homotetramer but does not interfere with the ATP binding site or the active site of La. 25.00 25.00 74.60 74.30 22.30 21.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.88 0.71 -4.44 2 11 2009-01-15 18:05:59 2008-02-21 15:18:23 4 1 10 0 0 10 0 139.90 77 89.06 CHANGED ThsKWF+Is+tDptLpshaPELptGTlhKV...ltpsp-Dh.sDpGIIEl.LssGchlsIYD+shohWChWpocSl-..ElEEl...sp.Vsptshu-FpGERISYALAKLAAQENNDGYEGNLMQAAAEYIEaLEpplS ..TVDKWFRINRADpGLCNYWPELSAGTVFKVRELuKECEDDIEPDTGIIE...IELSDGKIINIYDKPITYWCLWNTESVENGEIEEVVE..RTN..QVVQKPKA-FQGERISYALAKLAAQENNDGYEGNLMQAAAEYIEWLETQIS. 0 0 0 0 +10298 PF10466 Inhibitor_I34 Saccharopepsin inhibitor I34 Mistry J anon Manual Family The saccharopepsin inhibitor is highly specific for the aspartic peptidase saccharopepsin.\ It is largely unstructured in the absence of saccharopepsin [1], but in the presence, the inhibitor undergoes a conformation change forming an almost perfect alpha-helix from Asn2 to Met32 in the active site cleft of the peptidase. 25.00 25.00 26.10 39.40 21.40 21.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.43 0.72 -3.91 2 7 2009-01-15 18:05:59 2008-02-21 15:24:18 4 1 7 3 2 6 0 68.40 82 97.96 CHANGED MNTDQQKVSEIFQSSKEKLQGDAKVVSDAFKKMASQDKDGKTTDADESEKHNYQEQYNKLKGAGHKKE ...MNTDQQKVSEIFQSSKEKLQGDAKVVSDAFKKMASQD.KDGKTTDADESEKHNYQEQYNKLKGAGHKKE..... 0 1 2 2 +10299 PF10467 Inhibitor_I48 Peptidase inhibitor clitocypin Rawlings N, Mistry J anon Manual Family Clitocypin binds and inhibits cysteine proteinases. It has no similarity to any other known cysteine proteinase inhibitors but bears some similarity to a lectin-like family of proteins from mushrooms [1]. 20.60 20.60 24.10 24.50 19.70 19.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.89 0.71 -4.28 2 19 2009-01-15 18:05:59 2008-02-21 15:42:18 4 1 3 6 10 23 0 128.00 42 92.90 CHANGED LEDGIYRLRAVTTHNPDPGVGGEYATVEGARRPVKAEPNTPPFFEQQIWQVTRNADGQYTIKYQGLNTPFEYGFSYDELEPNAPVIAGDPKEYILQLVPSTADVYIIRAPIQRIGVDVE.GsQtNTLsYKFFPVDGSGGDRPAWRFTRE ............GhYpLRA....sPssGlGG.YATspGspc.VpstPpoPPFFERQlWpl..T..+..s..p..-.GpYTI.p.hpshsssFt.auFSh..D.p..h..PpuPVI.su-..h.E.a.h.h..p.hP.ssspshhIpA.h.hlGhsh..sGs.p.t.pp...................................................................... 0 0 10 10 +10300 PF10468 Inhibitor_I68 Carboxypeptidase inhibitor I68 Rawlings N, Mistry J anon Manual Family This is a family of tick carboxypetidase inhibitors. 25.00 25.00 29.50 29.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.31 0.72 -3.80 2 8 2009-01-15 18:05:59 2008-02-21 15:58:10 4 1 5 10 1 12 0 85.50 43 83.72 CHANGED lVF.hhVLssupsN-CVS+GFGChPpScCP.EhRLSYsGCsTVCCDlS+LTGCcsKGGECpPh-+.C+EL.uEssSCuctQKCCVhL ..sh....LlllspupAN-CVSpGaGClPcScCPcEuRlsYu..GC..uTVCCDlS+lsuCcu+GGECpPhcpsCKE.LpupouoCs+GQKCCVal. 0 1 1 1 +10301 PF10469 AKAP7_NLS AKAP7 2'5' RNA ligase-like domain Buljan M, Coggill P anon TreeFam_TF105406 Domain AKAP7_NLS is the N-terminal domain of the cyclic AMP-dependent protein kinase A, PKA, anchor protein AKAP7. This protein anchors PKA for its role in regulating PKA-mediated gene transcription in both somatic cells and oocytes [1]. AKAP7_NLS carries the nuclear localisation signal (NLS) KKRKK, that indicates the cellular destiny of this anchor protein [2]. Binding to the regulatory subunits RI and RII of PKA is mediated via the family AKAP7_RIRII_bdg. at the C-terminus. This family represents a region that contains two 2'5' RNA ligase like domains Pfam:PF02834. Presumably this domain carried out some as yet unknown enzymatic function. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.40 0.70 -4.63 50 426 2012-10-03 21:31:48 2008-02-21 19:00:45 4 20 246 0 280 884 46 205.50 22 57.73 CHANGED sTHFlulPL.spsp.lppphpchppplL....................................................................tpt.sl..pshhhsspplHlTLsshtLhspp.-lptAt.....chLppsp....pp.......................lhph......pslplpl+.Glchhss.................cVLYA..plp..t......spLpplss....plhcp.FtppGlhh.tspch...........................................phchHhTlhpspht...........................tppp...psh..sspplhccatcacFGphpl..splcLsph.hspss.sGaYcstuslpl ........................................................oHFlulsl..s......p....sp....l.p..p....ht....ph..ppplh.........................................................................tt..s.....tl......tsh.h....h.sstpLHlTls..h..h.......p.Lhspp....clppuh..............phLp.php............tp.............................................................l.p.....h.t.....pslplphp.Glphhss..........................t.s...pVLau....tst..tt..........spLp.phsp.....tltcp...F.p..t.G....lhhtcptt....................................................................................................h.p.hHhTlhpshhh............................................................tpc.....tph.....sspplhc..pa.t.s...h....a......G...p...........l...pplplsph....t.....t..............t.Y.......................................................................................... 0 94 145 212 +10302 PF10470 AKAP7_RIRII_bdg PKA-RI-RII subunit binding domain of A-kinase anchor protein Buljan M, Coggill P anon TreeFam_TF105406 Domain AKAP7_RIRII_bdg is the C-terminal domain of the cyclic AMP-dependent protein kinase A, PKA, anchor protein AKAP7. This protein anchors PKA, for its role in regulating PKA-mediated gene transcription in both somatic cells and oocytes, by binding to its regulatory subunits, RI and RII, hence being known as a dual-specific AKAP [1]. The 25 crucial amino acids of RII-binding domains in general form structurally conserved amphipathic helices with unrelated sequences; hydrophobic amino acid residues form the backbone of the interaction and hydrogen bond- and salt-bridge-forming amino acid residues increase the affinity of the interaction [2]. The N-terminus, of family AKAP7_NLS, carries the nuclear localisation signal. 22.70 22.70 22.70 23.20 22.00 22.60 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.90 0.72 -4.14 3 57 2009-01-15 18:05:59 2008-02-21 19:01:47 4 2 29 0 22 52 1 59.70 63 33.75 CHANGED uuEPDDAELVRLSKRLVENAVLKAVQQYLEETQNKpQPGEGsSVKAEuuDpNGsss-NNRK ...G.uEPDDAELVRLSKRLVENAVLKAVQQYLEETQNKp+P.G-G..SSVK.sEtu.Dp.NGsss-NNRK....................... 0 3 4 6 +10303 PF10471 APC_CDC26 Anaphase-promoting complex APC subunit 1 Buljan M, Coggill P anon Treefam_TF101057 Family The anaphase-promoting complex (APC) or cyclosome is a cell cycle-regulated ubiquitin-protein ligase that regulates important events in mitosis such as the initiation of anaphase and exit from telophase. The APC, in conjunction with other enzymes, assembles multi-ubiquitin chains on a variety of regulatory proteins thereby targeting them for proteolysis by the 26S proteasome. CDC26 is one of the nine or so subunits identified within APC but its exact function is not known [1]. The APC/C becomes active at the metaphase/anaphase transition and remains active during G1 phase. One mechanism linked to activation of the APC/C is phosphorylation. The yeast APC/C is composed of at least 13 subunits, but the function of many of the subunits is unknown. Hcn1 is the smallest subunit of the S. pombe APC/C, and is found to be essential for cell viability, APC/C integrity, and proper APC/C regulation. In addition, Hcn1 phosphorylation indicates a specific role for the phosphorylation of this subunit late in the cell cycle [2]. 23.90 23.90 24.60 24.80 23.80 23.80 hmmbuild --amino -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.70 0.72 -3.24 42 164 2009-01-15 18:05:59 2008-02-21 19:08:13 4 4 145 8 112 156 0 79.20 22 53.31 CHANGED M.LRRpPTsIpLo.-.DltcaEct+pcppppppppppppp.....................tt.sttp.ssptp.lsscptphhpptspthu ....M.LRRpPTplpLph-.Dl.p-aEsh+pcpppppppppphpt.............................t...t.s.sttstspt..t..................t............................................................ 0 32 51 84 +10304 PF10472 CReP_N eIF2-alpha phosphatase phosphorylation constitutive repressor Buljan M, Coggill P anon TreeFam_TF105548 Domain This is the conserved N-terminal domain of CReP, constitutive repressor of eIF2-alpha phosphorylation/protein phosphatase 1, catalytic subunit. It functions in the dephosphorylation of eIF2-alpha under basal conditions in the absence of stress. In response to translation inhibition, there is reduced synthesis of the labile CReP that contributes to elevated levels of eIF2-alpha phosphorylation [1]. The C-terminus, family PP1c, is shared with the apoptosis-associated protein Gadd34 and herpes simplex virus [2]. 25.00 25.00 54.80 54.00 17.70 17.20 hmmbuild -o /dev/null HMM SEED 411 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.40 0.70 -5.76 2 32 2009-01-15 18:05:59 2008-02-21 19:10:12 4 3 24 \N 15 35 0 362.50 65 55.21 CHANGED MEsGTttuRKR.GPRhG.hFh.PFh.RRSpAsSScFPsP.u.pNsGN.....SA.PEpRspYWTKLLSQLLA.LPuLhQKlLlWSQLFGGhhPTRWLDFAusYSALRAL+GREcsAAPTsQKSLSSLpLDSS-s.VsSsLDWLEEGlpWQhSssDLcLcLKAptpALDsAA.sFLLEQQLWGVELLPSSLQutLhSpRELsSSsSGPLslQplsNFpVVSYLLNPSYLDhhPpLtlphQsSsGsuphVGFpTLTPESshLpEDtCHPQPLpAEh.ssuWptCPPLSTEGLPEIHHLRMKRLEFL.QANKGQ-LPTPDQDNGYHSLEEEHsLLRMDPpHCpDNPsQhVssAuDhP....EsTEcK.ELlhpEV.....ppSPptus..sElPhEKEstEs+hsssDhS.htt ..............Es...t.sR.....t.t.hFh.P.F.h..hpSpsssSphPsP.ustp.ts....sSs.PpsRsp.W.hKLLSQLLAPLPuLLQKlLlWSQL.FGGMhPTRWLDFAGsYusLRAL+GREcsAAPTAQ.K.SLSSLpLDs.S.-sussSPLDWLEEGlHWQCSssD......LcL-LKAKGsALDPAApAFLLEQ.QLWGVELLPuSLQu+LaSsRELGSSPSGPLNlQRlssFsVVSYLLNPSYLDChPRLElSYQNSsGsGELVsFQTLTs.ESuCLpED.tCH.PQPLsAElosASWQGCP...PLSTEGLPEIHHLRMKRLEFLQQASKGQ-LPTPDQDNGYHSLEEEHsLL.RMDsKH......C......pD..sPTQhVPsAG.slP.GssQEsTEEKIELLTpEVPLALEcpuPo.EuCPSsE....lPhEKEPGEsplSVVD.S.lc.s........................... 0 1 1 3 +10305 PF10473 CENP-F_leu_zip Cenp-F_leu_zip; Leucine-rich repeats of kinetochore protein Cenp-F/LEK1 Buljan M, Coggill P anon TF101133 Domain Cenp-F, a centromeric kinetochore, microtubule-binding protein consisting of two 1,600-amino acid-long coils, is essential for the full functioning of the mitotic checkpoint pathway [1,2]. There are several leucine-rich repeats along the sequence of LEK1 that are considered to be zippers, though they do not appear to be binding DNA directly in this instance [2]. 33.00 33.00 33.00 33.20 32.90 32.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.95 0.71 -4.32 30 150 2012-10-08 13:49:02 2008-02-21 19:16:05 4 14 53 0 66 118 1 136.90 33 13.97 CHANGED DEKKpL+lhEcLKESc+cuDsL+D+VEsLERELEhSpcNpEpslL-AEsuKAElET....LKscl-phsppLpsLEhDLsslR.......SEKEsLs+pLQccQp+VSELEthsoShcsLLcEKEp-...+lQhcE-oKsAlEhLQsQL+ELsE ...........ccKpLcltpcL+cscccpssLcc.+lEsLEp-Lphupcspppshh-uEsu+tElps....Lcscl-phsppLpsLch-Lsslp.......pEK..csLsppLpccpp+lpELEphp...ss...hp..phlp...ptEpE...chphtcp.psshp.LppphpcLpE................. 0 3 7 24 +10306 PF10474 DUF2451 Protein of unknown function C-terminus (DUF2451) Buljan M, Coggill P anon TreeFam_TF106152 Family This protein is found in eukaryotes but its function is not known. The C-terminal part of some members is DUF2450. 26.40 26.40 27.10 26.50 26.10 26.30 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.48 0.70 -4.61 12 199 2009-01-15 18:05:59 2008-02-21 19:22:53 4 8 130 0 132 188 1 211.50 37 25.45 CHANGED LYGLppRhlAsEShhhlAcphchhcshlpphLPtpptsh.........lppFasp............slus.ls-lRchlapsssuphlslp.............shlsthsslKW-lpEs.sspHssYVDsl.p-hppFsh+Lppht.ppssls.c..h.tsLW-pslplhtchL....V-GauplKKCostGRALMpLDhpphhstLEplost+..s.chpaV-sYIKAaY....LsEp-hcpWhppp................pEYohKplsuLVptssuu......s++tRpcLLshl-s ...........................hYGLsERlVAsES.lhhlupthch.h.pshLpslhsts.pp.sh.............LppFasp................hl.ss.s.s-L+c.lahhsusph.l.chp.............................................................phl.hhusl.K.W.-l+El..hspHs.YVDhLlp..-.hpp.F.spRLtphs..ppstlsh....lpplLW-pslclsscsl....VEGau.p.V...K.KCSsEGRALM..QL..DhQphh.pL.........E...+l...s.s....h+.......shP..c......tpaV-..sYIKAYY....Ls.E.s..-.hcpWl+pH................pEYStpQlssLlshshss......p+ptRpplLthl-......................................................................... 0 56 76 109 +10307 PF10475 DUF2450 Protein of unknown function N-terminal domain (DUF2450) Buljan M, Coggill P anon TreeFam_TF106152 Family This protein is found in eukaryotes but its function is not known. The C-terminal part of some members is DUF2451. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.85 0.70 -5.51 12 433 2012-10-03 17:31:52 2008-02-21 19:23:24 4 20 221 0 301 888 8 203.70 22 22.10 CHANGED l-ulEtsYFpp......cFDssha.Lc+lssst..hshpp......l-cptsplppQtchluc+l.phlhppppshspthpplp-lcccLptusshCtsuR+pLstuppphTphsLtlhupp+K+psLhphL.pLsslKphpsh-hclcphlp-usYstAlplL.EspphhpsappasClp-lspplpshhthhtppLDshL.plstsF-scpYsplhpAYtLLs.+spthh-KlpphFlpslcopopsVL+shhp.t.....pscchp+hsYppLCtplsp-pah.CLhchhpsLaclhhSYaplhpa ..................................................................................................................................................................p.-+Lpp.h-hVp...hplhppI.ppppuah.puhsp.lp.pLpsp...lptu...st...tsp.......p.hR.c..p..Lpthcp..t.h...sp..s..uLp....ll.p.....pp+Rpsl.....h.t.lhp.....tLptl.p....pl......ppspsplpth...........l.....p................p.s....-.ass...A...l...p...ll.......p.sp..p..h..h....p..p..............h.t..t..h...p...s....h...p...p.....l...t...p...pl...pph....t.htp..h........................................................................................................................................................................................................................................... 2 109 163 246 +10308 PF10476 DUF2448 Protein of unknown function C-terminus (DUF2448) Buljan M, Coggill P anon TreeFam_TF106107 Family The family DUF2349 is the N-terminal part of this family. This protein is found in eukaryotes but its function is not known. 19.60 19.60 19.60 20.30 19.50 19.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.39 0.71 -4.80 3 69 2009-09-10 19:42:20 2008-02-21 19:24:03 4 3 37 0 30 52 0 195.40 60 34.97 CHANGED aRRoREADKAFlK..STuuVuoPspVILLRILAFLACAFLlAsTLYGhs-..pS.sspQTLSGGVlPPK...suNsScASDpTssuupuWQDLhGLLPE+ATEsl+lsWQaGpsHQhAVVSVGLLTCLTAlLlAGPIRLRRIDAhASVLWhLlLCLYLAEsYLpTsuPSWLDTlKFuTTSLCCLVGFAAAVATRKSTGPRRARsRRs.sst .........+R.READQsasQ......s.SSuVKuPsQVILLRALAFLACAFLLssALYGsSc...........shss.Gus.lPPu..........GssuS.s.s.s..s.s.TssuA-GWpQLLuLLPEHssEKLpEAWAFGQSHQhuVVAlGLLTCLLAMLLAGRIRLRRIDAFuosLWALLLGLHLAEpYLpssoPSWLDTLKFSTTSLCCLVGFTAAVATRKuTGPRRaRPRRa....ss......................... 1 1 4 11 +10309 PF10477 EIF4E-T Nucleocytoplasmic shuttling protein for mRNA cap-binding EIF4E Buljan M, Coggill P anon TreeFam_TF101531 Family EIF4E-T is the transporter protein for shuttling the mRNA cap-binding protein EIF4E protein, targeting it for nuclear import. EIF4E-T contains several key binding domains including two functional leucine-rich NESs (nuclear export signals) between residues 438-447 and 613-638 in the human protein. The other two binding domains are an EIF4E-binding site, between residues 27-42 in Q9EST3, and a bipartite NLS (nuclear localisation signals) between 194-211, and these lie in family EIF4E-T_N. EIF4E is the eukaryotic translation initiation factor 4E that is the rate-limiting factor for cap-dependent translation initiation [2]. 19.70 19.70 21.20 20.00 18.50 19.60 hmmbuild -o /dev/null HMM SEED 578 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -13.04 0.70 -5.72 5 194 2009-01-15 18:05:59 2008-02-21 19:26:45 4 5 79 0 92 179 1 427.30 31 63.88 CHANGED YSK-ELL-IKELP+S+cRPuCLo-KYD........................SDGVWDPEKWHuSLYPuSupSsPsEuhKK-pDs-RsoLKRRIsDPRERVK-DDLDVVLSPQR...............................................RSFGGGCpVsussuSRRssSPLE.K-s-uhRLh....................GuRRIGSGRIhs.......................................uRsFERDaRGsch-RcsEcsRDRE+-aKDKRFRR-aGDsK.........RVFu-RRRND....SYsE....EEPEWFSAGPTSQSETIELpGFDDKILEED............p+u+KRo++Ro.....pSlKEshVECNGGlu.c.c.slslspEsuADQEVPcsssLPEs.................sPG-FDFNEFFN......h.s.h...-psht.ss.usSRFS+WFp.............................................hEsttps.h..hc..t.tp.phhP..psL-psh.P+LsSh.ppsh..s.ss.h.....ht.ssspppshFpcLLs...............h.sN..sp..ph...............................hss.P..s.la.ptt..h....pph.s..s.hs..pt.htt..sshs.h..h.hhhph.shptps.slst.......t.phhs..p....p...pp.ppphhpp..sphshs.l.ph.............sS.hs.uFTPTSVlRKMhccKcKc+ .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.st.h............................................................................................................................................................................................................................................................p.......................................c..pPEWhs..Ppo..-hI-LhGF-..-.....................................................................................................s..........................................t.........................................................................p.p.....t......hp..........................................S.h...ht....................................................................................t..................t....p.............................sh.t..p.....h.......p..lp..........p......s...ts......................t..............p......h..pc.hlt...........tt.............................................................................................................................................t......................................................h........................................t.....p..............................................t....................t.t..t..pt...................................................................................................................................................................... 0 26 33 65 +10311 PF10479 FSA_C Fragile site-associated protein C-terminus Buljan M, Coggill P anon TreeFam_TF105915 Domain This is the conserved C-terminal half of the protein KIAA1109 which is the fragile site-associated protein FSA [1]. Genome-wide-association studies showed this protein to linked to the susceptibility to coeliac disease [2]. The protein may also be associated with polycystic kidney disease [3]. 18.80 18.80 23.30 21.70 18.70 18.20 hmmbuild -o /dev/null HMM SEED 615 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.01 0.70 -6.45 4 151 2009-01-15 18:05:59 2008-02-21 19:31:11 4 4 85 0 95 148 0 477.70 45 20.51 CHANGED sosSssusspQhTsWETLVlFAINascLNVQMNMuNVMGNssWhTpshpSpGRLSlGSssc+sh.hulsLGuSpLDAKGGlVGGTI-lNplchhhH..IpE-ssppPsHKhtIphhulEh+lDYMGoSVLMs+hSuhshplpDEWKssppsshssthspp...u.IhlHGDLoWDpLQlMISKSTTsDllKhhhKLpEFFoQQFKoSKRlhSsLp..PpLtspo.tAslc+Rppcp.......L..............DAtHHRHWp+sLc.ssGhhls.pLhs.LsccGshlGGohEL+GpsISLACFHG.NF+uKSWALFpL+-PsIsFsTEAph.....pSscp...lhlhQTLshpLGpsTtsQp.....ppshAsVsRlo...RN.haP.phcolpEWF...........cYuhA..NpEl-slcphPh.EsEpptso...solpRhRuuGSups..tph.thNHNpEsIFALPuLQL+FKopHlQGsssP-ht-s....KPcV.CSFlT-FpDHIhVTsDA-A.hhFLHDLloSYLpEKEKslus........Pp....hshpP..........................Gp.ssL..hpsSHospuss..........................................usSuTtsoVsupppp................DWRcFpCpTWHLEPTVRL...lSWsG+pI-PhGlDYILpKLGFpHARTTIPKWLQRGhMDPLDKV.AlhhlpLLhh ......................h..........phssWETLVlFAlNhppLsVpMNMuNVMGNssWhTpshpopGRLolsSstc+ph.huluLutSpL-u+GGlVGGsl-lst..lchh..............hH...........Ip...E..csspp........P....tHpl.tlphtuh-hRlDYMGoulLMuhhSshshplpDEW+h...s....h.sshs.s...pc...........................u.IhlcGcLpWD.hQlhIo+STTsDllKhhhKLpEFFoQQFcoS+Rs.hSoht....shl........sts...ssh..cppptpp....................................-s..tHHRHW.tlL..................thh......sshhl.s...h.h.LPp..p..u..h.........hGGohpL+GpphoLACFHG.NF+.S.KSWALFpLc-PsIsFhTEAppl.........p...psppp..........shlsQTLshpLGps.....s.............hpp...............pp..sMAsls+lo....Rp...P.thtolpEWF............................................................................pYshA..spElsh.lcp.h.sh.pt..-pthss.....s.hpthRusu...S...........................hNHppE..sIFALPphQLchKo.HhQ.tspp.Pshp.-s.........KP..pV.-CShlTEFpDH.IhVThDA-h.hhFLHDLlouYlKEK..E+s.hhs............p....hs.ps.......................................................................tp.ss.h...p.tspsphtp...................................................................................t.s.s.psh..............................................................DWRcFhC..pTWHLEPTl.R.L...lSWsG+pI-PhGVDYILpKLGFpHARTTIPKWlQRGhMDPLDKllullhhpLh..s...................................................................................................... 0 39 46 75 +10312 PF10480 ICAP-1_inte_bdg Beta-1 integrin binding protein Buljan M, Coggill P anon TreeFam_TF105393 Family ICAP-1 is a serine/threonine-rich protein that binds to the cytoplasmic domains of beta-1 integrins in a highly specific manner, binding to a NPXY sequence motif on the beta-1 integrin. The cytoplasmic domains of integrins are essential for cell adhesion, and the fact that phosphorylation of ICAP-1 by interaction with the cell-matrix implies an important role of ICAP-1 during integrin-dependent cell adhesion [1]. Overexpression of ICAP-1 strongly reduces the integrin-mediated cell spreading on extracellular matrix and inhibits both Cdc42 and Rac1. In addition, ICAP-1 induces release of Cdc42 from cellular membranes and prevents the dissociation of GDP from this GTPase [2]. An additional function of ICAP-1 is to promote differentiation of osteoprogenitors by supporting their condensation through modulating the integrin high affinity state [3], 25.00 25.00 26.20 28.40 19.30 18.40 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.30 0.71 -4.54 2 77 2012-10-04 00:02:25 2008-02-21 19:31:49 4 2 49 0 42 66 0 168.10 64 91.50 CHANGED MFRKsKKRHSSSSSQSSEISTKSKSVDSSLGGLSRSSTVASLDTDSTKSSG..NssSDssAEFRlKYVGAIEKLphs.uKsLptPLDLINYIDsAQQDGKLPFVPs-EEhIhGVSKYGlKVuo.DQhDVLHRHsLYLIlRMlCYDDGLGAGKsLLALKTTDspppEhSlWVYQCsShEpAQAICKVLSouFDssLsSEKP ....................................................MFR.KGKKRHSS.S.SSQS.SEISTKSK..SVDSSLGGLSRSST.VA.SLDT...D..........STKSS.G..QS.NsNSDTCAEFRlKYVGAIEKLchs-uKsLEGPLDLINYIDVAQ...QDGKLPF.V.P.E..E.EhI.hGVSKYGIKVoo...sD.QaD..........VLHRHuLYLIlRMVCY..DDGLGAGKsLLALKTT..DuppEE...........hSLWVYQCsSh-QAQuICKlLusuFDslLss-.............. 0 11 13 24 +10313 PF10481 CENP-F_N Cenp-F_N; Cenp-F N-terminal domain Buljan M, Coggill P anon TreeFam_TF101133 Domain Mitosin or centromere-associated protein-F (Cenp-F) is found bound across the centromere as one of the proteins of the outer layer of the kinetochore [1]. Most of the kinetochore/centromere functions appear to depend upon binding of the C-terminal par to f the molecule, whereas the N-terminal part, here, may be a cytoplasmic player in controlling the function of microtubules and dynein [2]. 25.00 25.00 28.50 25.00 24.70 23.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.24 0.70 -5.05 10 64 2012-10-08 13:47:38 2008-02-22 09:02:08 4 10 40 0 34 53 0 257.70 59 11.94 CHANGED MSWAVEEWKEGLPo+ALQKIQELEuQLDKLKKERQQRQFQL-SLEAALQKQKQKVEsEKoEuusLKRENQSLhEoCDsLEKuRQKlSHDLQVKESQVNaLEGQLNSsKKQIEKLEQELKRaKsELE....RSQpuht....su-spLsssoTPQKoFusPl.oPuphapDu+hE-LpEKYNKEVEERKRLEsElKsLQ....s.KKhsQs.lsQuTloHRDIARHQASSSVFPWQ.QEpTPS+tSusuhETPl+RssousahsWEpE-TPsKss.p.tptpt.soShtussssopLh-QLKsQNQ.EL+S+VsELEhRLQuQEK- .......MSWAlEEWKEGLPoRALQKIQELEuQLDKLKKE+QQRQFQL-SLEAALQKQKQKVEsEKsEsssLKRENQpLhEhC-sLEKs+QKlSH-LQVKESQVNh.EGQLsSsKK.......QIE+LEQELKRhKSELE....RSQQutt....uuDspLs.ssTPQK..FsoPL.TPs.....opaE-LcEKYNKEVEERKRLEtElKsLp.....s.K+hsps.lsQuThsHRDIARHQASSSVFsWQ.QEpTPS+.Stss.cTPh+Rshuuuah.hE.EsoPs+ss.p.tptst..uuh.ssspsspLhcQLKsQNQ.......EL+u+lsELEhcLQupEK-.................................. 0 7 10 18 +10314 PF10482 CtIP_N Tumour-suppressor protein CtIP N-terminal domain Buljan M, Coggill P anon TreeFam_TF106469 Domain CtIP is predominantly a nuclear protein that complexes with both BRCA1 and the BRCA1-associated RING domain protein (BARD1). At the protein level, CtIP expression varies with cell cycle progression in a pattern identical to that of BRCA1. Thus, the steady-state levels of CtIP polypeptides, which remain low in resting cells and G1 cycling cells, increase dramatically as Dividing cells traverse the G1/S boundary. CtIP can potentially modulate the functions ascribed to BRCA1 in transcriptional regulation, DNA repair, and/or cell cycle checkpoint control [2]. This N-terminal domain carries a coiled-coil region and is essential for homodimerisation of the protein [3]. The C-terminal domain is family Pfam:PF08573. 27.00 27.00 28.00 32.70 25.90 24.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.66 0.71 -4.32 4 111 2009-01-15 18:05:59 2008-02-22 09:04:36 4 3 51 0 55 100 0 115.10 67 16.20 CHANGED FtEhLs+LKEhHDKEl.GLQsKlscLppERChDAQRlEEhFoKNppLREQQKlLpEsI+VLEcRLRAGLCDRCsVTpEhh+KKQpEhEss+.psLphIotLpNEhpoLpEEN+pLpEELc ..Fp-hhsKLKEhH-KEVpGLQsKlscLppERh..hD..AQRL.EEhFoKNQQLREQQKsL+EoIKVLE.............c.RLRAGLCDRCsVTcEphRKKQQEFENh+pQNL+LITELh....NE+NsLQEENK+LpEpLp........................ 0 2 6 17 +10315 PF10483 Elong_Iki1 Hap2_elong; Elongator subunit Iki1 Coggill P, Eberhardt R anon Pfam-B_26773 (release 22.0) Family This family is a component of the RNA polymerase II elongator complex [1,2]. This complex is involved in elongation of RNA polymerase II transcription and in modification of wobble nucleosides in tRNA [3,4]. 27.50 27.50 27.50 27.80 27.20 27.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.69 0.70 -5.04 22 253 2009-01-15 18:05:59 2008-02-22 09:11:26 4 4 198 3 176 260 0 240.80 20 82.24 CHANGED LLp+lLSL+-.sSPLhLlLDSLtQsup.LlpEal++u.....t.pp.cllalSFEThpc..PphsspFlssp.............stshppllppltst.........stspssp..cpLlllDSlN.lhsp.......plspFlushh.sPt.......sollusaHp-.............s......ahPuslslLpalAoTllclpsh.p..hccpshcpplsc..hsht..hsss.................shslplppRRKSGRult.pahlssss+.h..hhp.........c-....................sphsp...--sEt....hTFNLshocKQ+tAREpV.LPah-AQpt.u........usGGtIlYE .....................................................................................................phh.t......sshhL.h.Dslt..uh.llpthhp.p........t.t...lhhlu.hp.s..pp.........t.hs..htsp.........................................th.s..th.tt.hth..............................tt.sp..............thhlhlDS....ls...lhtp.............ph.phluphh..pst......................................................ss.llshhHp-l..................................a.P.t.s...hshLphlAsshlplps.......h.........t..................................................................th.hph.hR.....R+s.G......R..s..h....p..t...hhs..t...sp.......h...................tp............................................................................tt.st....psp.t........hTFNLtL..oc+p+ps+-plsLPah.upp.....................ttsutIhY-.............................................. 0 54 92 137 +10316 PF10484 MRP-S23 Mitochondrial ribosomal protein S23 Buljan M, Coggill P anon TreeFam_TF106116 Family MRP-S23 is one of the proteins that makes up the 55S ribosome in eukaryotes from nematodes to humans. It does not appear to carry any common motifs, either RNA binding or ribosomal protein motifs [1]. All of the mammalian MRPs are encoded in nuclear genes that are evolving more rapidly than those encoding cytoplasmic ribosomal proteins. The MRPs are imported into mitochondria where they assemble coordinately with mitochondrially transcribed rRNAs into ribosomes that are responsible for translating the 13 mRNAs for essential proteins of the oxidative phosphorylation system [2]. MRP-S23 is significantly up-regulated in uterine cancer cells [3]. 37.50 37.50 37.80 37.70 32.20 30.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.61 0.71 -4.24 9 115 2012-10-03 14:45:55 2008-02-22 09:19:38 4 2 85 0 64 100 0 123.60 46 73.18 CHANGED Au..SRLEKlGTIFTRVpGLL+uGAhKh--+PlWYDVYtAFPP+tEPcYsRP....sPps..sVRpIaYtEDslRAKFac.ps+uptshsLhs.spposoQpFlphYppLcspG..sh-EEh.......h.cTupsLht-tlh.p ......AtSRLEplGolF..oR.scsLlRuGslK...-KPLWaDVYpAFPPhcEPhacRP.......tsct.......sl....p...cIaYpEDtlRAK.....Fap.....hssu.c.shsLh.......ssshp.SssQ+FVc+Yp-Lpp.G..th---p.......a.-TucsLLu-thh..p............................................. 0 20 25 45 +10318 PF10486 PI3K_1B_p101 DUF2447; Phosphoinositide 3-kinase gamma adapter protein p101 subunit Buljan M, Coggill P anon TreeFam_TF102035 Family Class I PI3Ks are dual-specific lipid and protein kinases involved in numerous intracellular signaling pathways. Class IB PI3K, p110gamma, is mainly activated by seven-transmembrane G-protein-coupled receptors (GPCRs), through its regulatory subunit p101 and G-protein beta-gamma subunits [1]. 18.20 18.20 18.80 19.60 17.40 17.20 hmmbuild -o /dev/null HMM SEED 857 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.68 0.70 -6.97 4 166 2009-01-15 18:05:59 2008-02-22 09:24:22 4 4 47 0 94 139 0 485.90 28 92.84 CHANGED ToCTEDRIQHALERCLHGLStss.hSopWsAGLCLNhWSLpELVsRDsGpalILlEplLt+sREsQcpspY-LlhPLALhFYasVLpAPaIP.sS-LL.KAhuhaHpFLTWPsPYCsVhpEhLoFIssEhKAPGIoYQRLVRsEQGLPs+sapSSTsTVLLlNPsEVpuEFLS...lApcLSsspps.csshspLlpHhaQAohGs+CcssuLHppLpA+slccLp-IaosssEA.EhAuu.s.-su.uREtLps+Lp-l.....uttAGh.sGshssupPs+lpPIPhPsu+CYTYSW-QDsFDlLsplLhpEspL..lps.lhu-DEEs--EE...--c.EssGpsP-RDSlhSs...h.p.ss..................S.tss..phssLu++.hp.FVSuLSsshDSGYsEDS-EuSpEhsthsp.p.ERupp+.tp....+IhpLFKoKuplshR+....Lpss.s..........stsLPLRRAGShssP..p..p.PuRu+Ro+SLPQ.thGpph.s...hst.hs..RRPFLSsD-D.....sKluThRVVVFGSDRISGKVARAYSNLRh+EospPhLTRaFKLQFaYlPVKRS...ssu.hhsP....toPSPss...ht.s.u-......EtSTNDISHYlGhLDPWYcRNVhGLMpLs.ssLC.QShKtEAE...shEsSpTphPILADMlLYYCRFATRPlLLQlYpsElTF..loG-KpTElFIpsLEL..........GHSAspRAI+ASGPGuKRLGIDGDp-slPhTLQIsYSKuslSGRSRWsslEKVCTSVsLpKACpp.EELsSphE............CLNLslTEVVKRQNSKoKKSFNQlsTSpIKlDKlQIIuppus.FslCLDQDEpKILQsVlRCEVSPCY+PEcsDhsphs+sP..sLsuQstsEhpSL....LCLPIsTFSGAlP ...................................................................................................................................................................................................h.h.........hh....thh.ph..hhh.h...hhhP.shtsh..phh....lp.E....ss..Gh...a.pc..hlhtEQtl........p.............p.p......hhlhhs....h.........h..............p.tt.....t............t.h..h..ll.p..hpsshG...th.h.tltt.Lp..........hh..h...h.p.........t.............h..h......................................................................................................................................................................................................................................................................................................................................................................................................................................................p......pttt.hh..hhphphahhPh.........................................................t..p......lu.hlu.hD.WYptsh.............t...................t.................hh.Dhh.aYhR.u...hhh.ha.hp......t......pphhh.tl..th............................................................................................................................................................................................................................................................................................................................................................... 0 9 19 49 +10319 PF10487 Nup188 Nucleoporin subcomplex protein binding to Pom34 Buljan M, Coggill P anon TreeFam_TF101106 Family This is one of the many peptides that make up the nucleoporin complex (NPC), and is found across eukaryotes [1]. The Nup188 subcomplex (Nic96p-Nup188p-Nup192p-Pom152p) is one of at least six that make up the NPC, and as such is symmetrically localised on both faces of the NPC at the nuclear end, being integrally bound to the C-terminus of Pom34p [2]. 25.00 25.00 29.80 25.20 24.50 20.70 hmmbuild -o /dev/null HMM SEED 931 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.60 0.70 -6.63 4 230 2009-09-11 16:46:00 2008-02-22 09:32:58 4 8 183 0 149 233 0 642.30 25 42.33 CHANGED cphstlspFLppsKppLLE.Lshhp.psTppu-pscs+chlAp.lLthslpspcF.upsEc.LsphLph.l.Ec.phl.chhhsl..hss.c..pthh.cLcstsahpp-Rl....stllpp...llcllophlssps.hsupshpshppL-pEla.s.cpplpsLl........................-hhhspsllaClchhp.LLpLhllNtsVssphsppWF..h.sphlt.s+LhpsTshu........t+ahshp-hs...phhlu+lpuhholloI.hLsLsouh-pup.shso..shhtDspsFchVsssLhs..-sspp.........shVLYuWShILht+.hh.pp.s...............Fs.sp.tphIphhuppA-slsVhppLpcLschLp.D....slaTslhssahhhSlshIslTupTopshtslIsssPcslIEsFlsNssapsc........lshl+uKhPLl...LhPhIpL..A.IsschAtaphccltSa..lsKhshhs..Ychhs.s.hscpso......-Llchpu.lLVhPP.hEpss.ph.hslPpuTKupIls.......................usps.ps.......LlhF.YpYNGWoLlGRlLQNL.+la....sshDs.Q+.EhhIsIIsLlsNVlsscsuV-cShhlLp.hSs.hsps.......sIlpVIaclhEp..uLpp+shcllspCsshhTsLssphsahVWsYLs+SsLL................-phGKsGLuNlIhGSIEhssG-YsFTISllKLspsLIcssLo.....s.thsp+oKpsIlspllhHLlclaEoahhW+YNpph.Q+hEIuhhLhphFpsILt.VasID.tssspsKlppl......hocAAspllshFLlss-s.shsstolhshl.s.spspohh.hGsu..spLYspslppAaslssLlIsIRsss+.LpPSsLE+hLF...sposcLlslY.ph......Po...l+lpIlcLLssLssssWssth.P.LhSaLGps.utshLcullsDLsoslpDaplthoLY.FhssllEoKQ-GLulLFLTGcssss .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....h..........................................h............lPtsThGplh..........................ssps..........llha.ap.asshshhsp.l.p..hh..p...sh................................sp.s..p....p.p.lh.ll....s..Lh.tpl............l..........p.t.....p.................s........h.......t.....p.......s.....h...........................................slhshlh.....lhpp........sl..............s...................s........lpl..lsuClphlssL..h..sh.....Ps+VWs.L.t.+os.hL.................................................ps..s.t.u.G.h..hshl.h.....s....h.E.spGcYshhluhlcLh...psLlc.s...tls......................................pp....t..........psl..........l............s....h....h.h....hh.h.-hh.shtpW+ass.t..+..plsthlhphhptl.L.hsa.....t.h...s....tt.s.....ps..plptl......................hspuu.ph.llshhh..hss.ss....thshs.............s.p..s.t...hpt......hh..phlp.shphsp.ll.p..l.p..............p..h..ss..............L.EptLh............ttu..s.t.L..ltlhshh.....................ss......h..th.p...ll....c.h.........L.ts.....l...s......s.......................................SlhuhLGs.p.u.t.t.hhpth.lsplpp....h.ps.plp...h.lhcFloshl.p..sp..hLh.hhLshp..................................................................................................................... 0 41 72 112 +10320 PF10488 PP1c_bdg Phosphatase-1 catalytic subunit binding region Buljan M, Coggill P anon TreeFam_TF105548 Domain This conserved C-terminus appears to be a protein phosphatase-1 catalytic subunit (PP1C) binding region, which may in some circumstances also be retroviral in origin since it is found in both herpes simplex virus and in mouse and man. This domain is found in Gadd-34 apoptosis-associated proteins as well as the constitutive repressor of eIF2-alpha phosphorylation/protein phosphatase 1, regulatory (inhibitor) subunit 15b, otherwise known as CReP. Diverse stressful conditions are associated with phosphorylation of the {alpha} subunit of eukaryotic translation initiation factor 2 (eIF2{alpha}) on serine 51. This signaling event, which is conserved from yeast to mammals, negatively regulates the guanine nucleotide exchange factor, eIF2-B and inhibits the recycling of eIF2 to its active GTP bound form. In mammalian cells eIF2{alpha} phosphorylation emerges as an important event in stress signaling that impacts on gene expression at both the translational and transcriptional levels [1]. 23.90 23.90 26.10 25.30 20.60 23.20 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.00 0.70 -5.23 5 184 2009-01-15 18:05:59 2008-02-22 09:35:22 4 6 97 0 70 171 0 131.80 36 35.04 CHANGED shPISshsuCSsc..sah.thAsc-lpoSSsspSI........Sas-EuE-uuuoSDSSphEu-hE.-sEG-+LW-shscSsDPYNPLsFTAslQTssT.sPKs.o.ptpshSspps..sSsuEuPlsSss-sSoSsDDSW-uSuDEEEst...cLWsSFCpNDDPYNPLNF+APhQoSucs.cus+hDSsosStssVuI........put+SoR+Sp+A.LlsKlpccCs+pLSsEoLSls.hptclh.s.spGs++cosKlKKV+FSPsVpVH+MssWSaAhctuRKGPWEEaARDRCRFQKRItETEcAIGYCLohpHREKMasRh .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h+...+V+F...s.....t....s.p...l+hlssWsh..uuctuR+G.sWEphAtDRsRFp+RIscsEthlusCLssptRtRhhsR...................... 0 13 17 39 +10321 PF10489 RFPL3_antisense Ret finger protein-like 3 antisense Buljan M, Coggill P anon TreeFamB_TF341410 Family This short transcript is purported to be the antisense protein of exon 2 of RFPL3 gene, however this was not confirmed. Since the RFPL3 (ret-finger protein-like 3) gene is expressed in testis the suggestion is that this may have a role in the antisense regulation of the RFPL genes. RFPL transcripts encode proteins with tripartite structure of RING finger, coiled-coil, and B30-2 domains, which are characteristic of the RING-B30 family. Each of these domains is thought to mediate protein-protein interactions by promoting homo- or heterodimerisation [1]. 27.00 27.00 29.70 29.50 26.30 26.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.63 0.71 -4.34 6 31 2009-12-01 17:09:26 2008-02-22 10:15:53 4 2 5 0 4 22 0 71.80 47 51.62 CHANGED MGNLCGCIQGDSKKPSKKRVKRKPYSTTKVTSGSTFNENTRRYAVHTNQCRRPHGSRVKKKRYPQEDDFHHTVFSNLERLDKLQPTLEASEESLVHKDRGDGERPVNVRVVQVAPLRRESRsI- ...................t...D..htsS+chspcc..Sso..pVos.soh.t.hRRYul.TpQt.R..s...+Kth.s.E........................EEs.shKctGsGER.VpsRVhps............................ 0 4 4 4 +10322 PF10490 CENP-F_C_Rb_bdg Rb-bdg_C_Cenp-F; Rb-binding domain of kinetochore protein Cenp-F/LEK1 Buljan M, Coggill P anon TF101133 Domain Cenp-F, a centromeric kinetochore, microtubule-binding protein consisting of two 1,600-amino acid-long coils, is essential for the full functioning of the mitotic checkpoint pathway [1,2]. This domain is at the very C-terminus of the C-terminal coiled-coil, and is one of the key Rb-binding domains [3]. 19.20 19.20 23.40 34.30 18.60 16.10 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.44 0.72 -4.64 4 55 2012-10-08 13:53:08 2008-02-22 10:18:33 4 10 32 0 26 45 0 46.60 67 1.96 CHANGED sp.tpspEso-aEP-GLPEVVp+GFADIPoGcsSPYILRRTThuppsSP .....hpstEcpEso-FEPEGLPEVVKKGFADIPTGKTSPYILRRTThuTRTSP.... 1 1 4 10 +10323 PF10491 Nrf1_DNA-bind NLS-binding and DNA-binding and dimerisation domains of Nrf1 Buljan M, Coggill P anon TreeFam_TF105308 Family In Drosophila, the erect wing (ewg) protein is required for proper development of the central nervous system and the indirect flight muscles. The fly ewg gene encodes a novel DNA-binding domain that is also found in four genes previously identified in sea urchin, chicken, zebrafish, and human [1]. Nuclear respiratory factor-1 is a transcriptional activator that has been implicated in the nuclear control of respiratory chain expression in vertebrates. The first 26 amino acids of nuclear respiratory factor-1 are required for the binding of dynein light chain. The interaction with dynein light chain is observed for both ewg and Nrf-1, transcription factors that are structurally and functionally similar between humans and Drosophila [2]. The highest level of expression of both ewg and Nrf-1 was found in the central nervous system, somites, first branchial arch, optic vesicle, and otic vesicle. In the mouse Nrf-1 protein, Swiss-Prot:Q8C4C0, there is also an NLS domain at 88-116, and a DNA binding and dimerisation domain at 127-282. Ewg is a site-specific transcriptional activator, and evolutionarily conserved regions of ewg contribute both positively and negatively to transcriptional activity [3]. 20.30 20.30 20.30 26.00 18.40 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.48 0.70 -5.18 4 147 2009-01-15 18:05:59 2008-02-22 10:19:02 4 6 78 0 85 152 0 178.40 61 35.18 CHANGED GPVGVAAAAAIATGKKRKRPHSFETNPSIRKRQQTRLLRKLRATIDEYsTRVGQQAlVLssoPGKPNslFKVFGAsPLENVlRNhKuhlLpDL-sALAp+AP.Ppspss.sl.ELPPLlIDGIPTsV-KMTQAQLRAFIPhMLKYSTGRGKPGWGKESsRPsWWPpDLPWANVRSDsRoE-pKpKVSWTpALRpIVhNCYKaHGREDLLPtFtEp ..................................GPVGhAAAAAlAouKKRKRPHsFEoNPSI.RKRQQsRLLRKLRsTlD.EaoTRVGQQAlVL........shoPu....K..PNssaKVFGAtPLEsVl+ph+shlh--L-sAL..AppA...P...P......ss........ELPPLsIDGIPsSV-KMTQ..AQLRAFIP.MLKY.STGRGK.PGWG+ESs+P..WWPc-.l.PWA.N..V.R..Ds...RoE-.pK..Q.+.............V.S..WTpALRpIVhNCYK.HGREDLL.sFt-p............. 0 25 31 59 +10324 PF10492 Nrf1_activ_bdg Nrf1 activator activation site binding domain Buljan M, Coggill P anon TreeFam_TF105308 Domain In Drosophila, the erect wing (ewg) protein is required for proper development of the central nervous system and the indirect flight muscles. The fly ewg gene encodes a novel DNA-binding domain that is also found in four genes previously identified in sea urchin, chicken, zebrafish, and human [1]. Nuclear respiratory factor-1 is a transcriptional activator that has been implicated in the nuclear control of respiratory chain expression in vertebrates. The first 26 amino acids of nuclear respiratory factor-1 are required for the binding of dynein light chain. The interaction with dynein light chain is observed for both ewg and Nrf-1, transcription factors that are structurally and functionally similar between humans and Drosophila [2]. The highest level of expression of both ewg and Nrf-1 was found in the central nervous system, somites, first branchial arch, optic vesicle, and otic vesicle. In the mouse Nrf-1 protein, Swiss:Q8C4C0, there is an activation domain at 303-469, the most conserved part of which is this domain 446-469. Ewg is a site-specific transcriptional activator, and evolutionarily conserved regions of ewg contribute both positively and negatively to transcriptional activity [3]. The family Nrf1_DNA-bind is associated with this domain towards the N-terminal, as is the N terminal of the activation domain. 19.10 19.10 19.30 19.30 18.70 18.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.36 0.72 -3.54 7 86 2009-09-16 13:14:49 2008-02-22 10:19:52 4 5 61 0 50 83 0 60.50 45 11.56 CHANGED othlslPV....S..MYQ....ThlA.shtQh.........lsPh.QV..........psG............................husthHthhhpussu.ts...Qhl..pVloLcs ................GlVpIPV....S..MYQ....TVVT.SlsQs..........suPV.QVAhus...lssc.u-ph.Th.............................................................s..................................... 1 12 16 30 +10325 PF10493 Rod_C Rough deal protein C-terminal region Buljan M,, Coggill P anon TreeFam_TF101176 Domain Rod, the Rough deal protein, displays a dynamic intracellular staining pattern, localising first to kinetochores in pro-metaphase, but moving to kinetochore microtubules at metaphase. Early in anaphase the protein is once again restricted to the kinetochores, where it persists until the end of telophase. This behaviour is in all respects similar to that described for ZW10 [1], and indeed the two proteins function together, localisation of each depending upon the other [2]. These two proteins are found at the kinetochore in complex with a third, Zwilch, in both flies and humans. The C-terminus is the most conserved part of the protein. During pro-metaphase, the ZW10-Rod complex, dynein/dynactin, and Mad2 all accumulate on unattached kinetochores; microtubule capture leads to Mad2 depletion as it is carried off by dynein/dynactin; ZW10-Rod complex accumulation continues, replenishing kinetochore dynein. The continuing recruitment of the ZW10-Rod complex during metaphase may serve to maintain adequate dynein/dynactin complex on kinetochores for assisting chromatid movement during anaphase[2]. The ZW10-Rod complex acts as a bridge whose association with Zwint-1 links Mad1 and Mad2, components that are directly responsible for generating the diffusible 'wait anaphase' signal, to a structural, inner kinetochore complex containing Mis12 and KNL-1AF15q14, the last of which has been proved to be essential for kinetochore assembly in C. elegans. Removal of ZW10 or Rod inactivates the mitotic checkpoint [3]. 20.30 20.30 22.00 21.40 19.90 19.40 hmmbuild -o /dev/null HMM SEED 551 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.84 0.70 -6.21 6 118 2009-01-15 18:05:59 2008-02-22 10:27:05 4 3 83 0 78 103 0 461.60 32 29.23 CHANGED cGhhhP.lA+hRLPF+Lll...ppshhsILssELo.Esa.oLLLlsphhps.........shDhhhhSAsKpsh.pchK.phhcpsssppaplhs+s.sthlQoIhphlcslsssphAhhIhahlTppsP...........................-GsDplpAhhhChchAccacpslssps-A+-K....hcKlph+Y.lptTpplLhsatLNDpchL+lltpPscLIsALYcHosl.pp.......shsDIpslVpEIAclpsLslppIpshLLpKWLshhhsu.......................lhc-pshs-sss-lEsss......sltRshYlLpuashs..puVhFLlu..souLhsssshosup+pLtlhssaphh.sDsssoahcphhpcphhsLKClhaLpuLthhNI..TlppFps..osK.ullKtLWpsHuppPpulcllAplCLtYclYsPQlWNGlLp+hltFNh..................................lsaLptlL-slSuhcpLaplsuFupAWctlIptPFhoAspshS.sQpusLt+oLVhL.KCPlstsLsLlsIAchhlplshPuhA.usLLhhsps-cpcQpIKphluossspslhQQItEL.ssG.luslsptlsstl.s .......................................................................................p....hs.huphRLPF+.lh.......tpshhp....Il....ssELs..-sh.phh.lsphhph.................shDphhhssspp....h.pphp......p...t..ptpp...sl.spp.....tthh....ps.l.phl.sl.ssp...husshhh.lspphP.............................Gs.+hpuhphChhlucc....a.h...psh......ss...p...s....cs+c..c..h...p..........hpKlch....pa....tpptTEtl..Lhsa.....t.Lss....tchLc....llspPscLIhsLYcH.sl.pp..........spshs.DIpssscEIAclp...plslpplh.hLLpKWLs..............................................................................................................hps.t.p.t.spph.php.p...cp............sLp.RlhYLLp......shshc......pthhal..h.........s...h.h..h..shp..p..ho.hsp+tRAlpsLhh..ls.sp-..........slpoh.........h.........cp..shpclp.hLKClh....aLsphEhLslsh.oh-hFps..ssKpuhlKu.LW+s..a.uc.psh....ulcLlspLCL-YclaD.pl.WNulLp..+lltFsM..................................l.shLpplLp.slo.sh.pLhpl.s..hspuWppllphPhhsAs.s.s.pQ.tth..cshlhl.tCPl..sLshhthApphhphphsthA.usLh.hspspcpppplpthlt.p....lhpQl.-h.phG.lshh.p.h...................................................................................... 1 24 30 55 +10326 PF10494 Stk19 Serine-threonine protein kinase 19 Buljan M, Coggill P anon TreeFam_TF105332 Family This serine-threonine protein kinase number 19 is expressed from the MHC and predominantly in the nucleus. Protein kinases are involved in signal transduction pathways and play fundamental roles in the regulation of cell functions. This is a novel Ser/Thr protein kinase, that has Mn2+-dependent protein kinase activity that phosphorylates alpha -casein at Ser/Thr residues and histone at Ser residues. It can be covalently modified by the reactive ATP analogue 5'-p-fluorosulfonylbenzoyladenosine in the absence of ATP, and this modification is prevented in the presence of 1 mM ATP, indicating that the kinase domain of is capable of binding ATP [1]. 21.00 21.00 21.00 21.60 20.40 20.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.77 0.70 -4.98 22 201 2009-01-15 18:05:59 2008-02-22 10:40:10 4 3 164 0 134 203 0 230.00 26 72.35 CHANGED h+sshhcplsh.hsshs.pphspllphRps.lPs.llshsplau.....lh..pssThV-+pl.pLl+pGtlR+hhlss..............tthhhhhcshctllppsshlps...................hp..................................c+Flphlpppssshhhs....pphFsttp............hstLlpsGh.Ls..........................sthssss...............................a.lSlPssGsal+LlsuGRpaLlshLp+...s+a+Ehh.spLpc+W.Ghs.........................+hp.hYGhshcWhLp-slGAGllElFsT.slG.uhRh ....................................h.............tshp.hh.h..hthhps.lPP..llhhuplau............ll..ps.ThV-RpltpLhcpGtlRhhtlst...........................th.Dhcsllh.ppshhpp..........................hhp................................................................h..........pcFhphlh.s..s.t..s.shs......pphhotpp.........................lstLlpAGh.Lsh...........................................p..csts......................................................................ahlulPshG.al+hhspGRptllshlp+...s+a+Eh.hs.LhpR...............................+h.thhtlshpahlc-hlGuull-shpT.ssGhhlRh...................... 1 37 71 105 +10327 PF10495 PACT_coil_coil Pericentrin-AKAP-450 domain of centrosomal targeting protein Buljan M, Coggill P anon TF105408 Domain This domain is a coiled-coil region close to the C-terminus of centrosomal proteins that is directly responsible for recruiting AKAP-450 and pericentrin to the centrosome. Hence the suggested name for this region is a PACT domain (pericentrin-AKAP-450 centrosomal targeting). This domain is also present at the C-terminus of coiled-coil proteins from Drosophila and S. pombe, and that from the Drosophila protein is sufficient for targeting to the centrosome in mammalian cells. The function of these proteins is unknown but they seem good candidates for having a centrosomal or spindle pole body location. The final 22 residues of this domain in AKAP-450 appear specifically to be a calmodulin-binding domain indicating that this member at least is likely to contribute to centrosome assembly [1]. 25.00 25.00 25.50 27.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.47 0.72 -4.05 18 211 2009-01-15 18:05:59 2008-02-22 11:00:32 4 5 142 0 139 223 0 80.90 41 3.77 CHANGED +hhRpEoFR+uLsaQK+Yl.LhlssaptCppssLphlscM............GspPs.......tphpcs+p+a+oslpsllAlhR........................M+hlsccWp ...........+YLRsESFRKALlY.QK+YLLL..hl..suaQpCEpssLphlAch.....................................Gst.Ps.......htpp.cshs+FRoAlpsslAlsR........................h+als+cWp.................................... 0 33 51 93 +10328 PF10496 Syntaxin-18_N SNARE-complex protein Syntaxin-18 N-terminus Buljan M, Coggill P anon TreeFam_TF105868 Domain This is the conserved N-terminal of Syntaxin-18. Syntaxin-18 is found in the SNARE complex of the endoplasmic reticulum and functions in the trafficking between the ER intermediate compartment and the cis-Golgi vesicle. In particular, the N-terminal region is important for the formation of ER aggregates [1]. More specifically, syntaxin-18 is involved in endoplasmic reticulum-mediated phagocytosis, presumably by regulating the specific and direct fusion of the ER with the plasma or phagosomal membranes [2]. 21.20 21.20 21.20 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.86 0.72 -4.15 33 245 2009-01-15 18:05:59 2008-02-22 11:01:51 4 3 213 0 167 252 0 81.90 26 24.35 CHANGED sDlTslFpp.Vtpsp......pp.ss.s............tptthtph.phpDpFlKEAhclhppIspLpsaLpplRpsYlshs......tsphopspcDph ....................DlT.hF+t.Vtthc.....................ptttt.s..................tp...htps....tspssFhccAhclhspIspLcpaLhphRpsYls...h.p.h.s.htp...hoctpcs................................ 0 43 84 132 +10329 PF10497 zf-4CXXC_R1 Zinc-finger domain of monoamine-oxidase A repressor R1 Buljan M, Coggill P anon TreeFam_TF101076 Domain R1 is a transcription factor repressor that inhibits monoamine oxidase A gene expression. This domain is a four-CXXC zinc finger putative DNA-binding domain found at the C-terminal end of R1. The domain carries 12 cysteines of which four pairs are of the CXXC type [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.76 0.72 -3.90 13 365 2009-09-11 09:20:48 2008-02-22 11:03:32 4 20 145 0 233 361 3 98.80 40 16.64 CHANGED IYDshpGpoCHQCRQKTh............Dh+ssC.....psppC...hp..Gp.........................FCGcCLhsRYGEcscEshtssc.WhCPhCRG........ICNCS...aCR+K+GhsPTGlLh+pA+tpGasSVtcaLh ...............................................................................hYs...GpoCHQCRQ.KTh............-t+.s...C................c...s..spC....tsp.....sp..................................FCssCLhsR.YG.E.c........lccs...h.............h..s......s........s...........WhCP.CRG...........ICNCS................hCR++....c....Gh...ss...TGhhhhhsp..GatsVtthL.......................... 0 60 121 173 +10330 PF10498 IFT57 Intra-flagellar transport protein 57 Buljan M, Coggill P anon TreeFam_TF106156 Family Eukaryotic cilia and flagella are specialised organelles found at the periphery of cells of diverse organisms. Intra-flagellar transport (IFT) is required for the assembly and maintenance of eukaryotic cilia and flagella, and consists of the bidirectional movement of large protein particles between the base and the distal tip of the organelle. IFT particles contain multiple copies of two distinct protein complexes, A and B, which contain at least 6 and 11 protein subunits. IFT57 is part of complex B but is not, however, required for the core subunits to stay associated [1]. This protein is known as Huntington-interacting protein-1 in humans. 40.00 40.00 45.40 49.90 39.80 39.10 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.03 0.70 -5.71 18 178 2009-01-15 18:05:59 2008-02-22 11:11:11 4 3 124 0 127 178 4 298.40 38 81.42 CHANGED ME-Ll-KLKLLsYEccahcptp..hKPLoRaYFu....hssNPuEQFahFosLssWLhs..psG.psh-pPpEaD..DPNuTssNILs-L+shG..hss-FsPsKL+sGsGEtVshlLssLu-puLpppsF...sa++Ppasp.......-c..s-pcsh..-DDs-hhhpcl---...-sp-p--.............hhp.th.....s.tttspsptllcSplDstEWplElERVhPpL..Klpl+sDs.+DWRsHl-phpphpcslppthspspthLc+.....lps-ls.psLE+lpoREchlNsQlpphlp-.a+ptppphsphpcphpphstulpphpttLsclspcL-plKpch-E+ssshoDsuPlhplKpuls+L+pEIppMsl+IGVlp+olh .....................................................-plh-KL+lLpY-..pphh.t..p........h+s.s+aYFs.....ssN.....suEQFahFstLssWLhp....ptG.p.hptPp.E......aD...DPN.sshspIlttL+phs..hss-FsPsKL+pGhG-tshhlLstLuppALphh.ta...pap+.s.hs........--......p.tsh...-D-sE.h.hhpcl--ph...ttpcs--............p..hhp..t.........th.ttptptppl..hpopsDst-WplElERVhPpL........KVp...l+.s...D......s..+DWRhHl-QMcphpssIpsth..pcscs.Lc+.....Lps-lscsLEKItoREKalNs..QLcphlpc.Y+shptpLupspcpappsssslscpop.Lsc....ls-cLEplKpch-E+GsshoDu.uPllpIKpuls+L+pEhhpMsl+IGllppsl.............................. 0 62 73 104 +10332 PF10500 SR-25 Nuclear RNA-splicing-associated protein Buljan M, Coggill P anon TreeFam_TF105478 Family SR-25, otherwise known as ADP-ribosylation factor-like factor 6-interacting protein 4, is expressed in virtually all tissues. At the N-terminus there is a repeat of serine-arginine (SR repeat), and towards the middle of the protein there are clusters of both serines and of basic amino acids. The presence of many nuclear localisation signals strongly implies that this is a nuclear protein that may contribute to RNA splicing [1]. SR-25 is also implicated, along with heat-shock-protein-27, as a mediator in the Rac1 (GTPase ras-related C3 botulinum toxin substrate 1) signalling pathway [2]. 55.00 55.00 55.50 55.40 44.10 50.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.98 0.70 -4.88 6 90 2009-01-15 18:05:59 2008-02-22 16:23:21 4 2 66 0 53 97 1 186.70 48 80.28 CHANGED spS+S+SpShsc...ptpcp+c+.......pp.psppR.pptt......................pcshKKt++Rt+.........ppSSSSSSSoSsSsSppp.ppcp+K+Rtt+KpK..t+KcKKct++.cKcupcct........E.ttlscAhsts.Lchhpppst.pt.sP.............shTDEQKu+lpsh+PhTKEEY-ARQSVIR+VVDPETGRTRLIKGDGElLEEIVSKERH+EINK.............QATRGDGttFQtRs.Ghh ..............................................t.................................................................................................pt...tptpp.p..t............up.sSSo.SS..S......SoS...S...S.....sS...s......p...p..spppt+KK+t..KcK........c.K............p.c+K.cKK+....c..KKhKK+...sKc+t..........ptpt.......sc....s.h..Pssslct.hpcps..tpcp.s..sP................lhTDEQKSRlpAM+PMTKEEW-ARQSlIR+VVDPETGRTR..LI.KGDGEVLEE...I.VoKERH+EINK............................QAT+GDGhhFQh+s.sh.h......... 0 15 20 36 +10333 PF10501 Ribosomal_L50 Ribosomal_S39; Ribosomal subunit 39S Buljan M, Coggill P anon TreeFam_TF105895 Domain The 39S ribosomal protein appears to be a subunit of one of the larger mitochondrial 66S or 70S units [1]. Under conditions of ethanol-stress in rats the larger subunit is largely dissociated into its smaller components [2]. In E. coli, in the absence of the enzyme pseudouridine synthase (RluD) synthase, there is an accumulation of 50S and 30S subunits and the appearance of abnormal particles (62S and 39S), with concomitant loss of 70S ribosomes [2]. 25.10 25.10 25.10 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.29 0.71 -4.10 30 196 2012-10-02 01:16:24 2008-02-22 16:43:46 4 3 179 0 144 199 2 108.30 23 36.59 CHANGED pphssssplpph..l.ppstppsht...........................s.s.sahphsLsD.phKFtFlKphppphG+hlPDstlpplpTlpclhpah.......psss...cscp.sctl..ptcthshP.NVplhpct ..................................................................p..h.sspphpph..l............phhpphht.......................................s.sa.phsL.p.D.phKFphlppltpphG..HhlPsspLpph....pol............pclhsah........spss...pspphh-tl....pthsLP.Nl+lh.c......................... 0 28 59 106 +10334 PF10502 Peptidase_S26 Signal peptidase, peptidase S26 Mistry J. Rawlings N anon Manual Domain This is a family of membrane signal serine endopeptidases which function in the processing of newly-synthesised secreted proteins. Peptidase S26 removes the hydrophobic, N-terminal, signal peptides as proteins are translocated across membranes. The active site residues take the form of a catalytic dyad that is Ser, Lys in subfamily S26A; the Ser is the nucleophile in catalysis, and the Lys is the general base. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.88 0.71 -4.65 32 3046 2012-10-02 16:34:55 2008-02-25 12:55:21 4 13 2357 14 823 4403 991 93.00 32 34.94 CHANGED lhhNsosShPlGLYplpshsp........hpsGclVhsssPpssAhhhA.pRGYLspGl......PLlK+VhAlsGpcVshsssh.lsl-G..lshupshtpDphGRsLPsap.tstslssGElFLhu.ssssSaDuRY..FGPl..ssSul..lGhApPlaT .......................................................................................................s.......................................................................................................................................................................................................................................................................................................................................................................................................................................................p.......h...........h......VPtGp.Y.FhM........G.D.....N.....R..s..s....St.DS........R......Y.........a......G.....h.....V..P.cpsllG+A.h.lh.h............................................... 0 221 509 690 +10335 PF10503 Esterase_phd Esterase PHB depolymerase Mistry J, Fushinobu S anon Manual Family This family of proteins include acetyl xylan esterases (AXE), feruloyl esterases (FAE), and poly(3-hydroxybutyrate) (PHB) depolymerases. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.53 0.70 -5.10 2 1139 2012-10-03 11:45:05 2008-02-25 13:32:45 4 32 653 0 468 2296 946 182.10 23 48.03 CHANGED LAYtLYlPP.....Gt.R...PhVVMLHGCpQps--FAtGTRMNhLAcchGhsVLYPEQo.RApsptCWpWac.pppptGRGEssslAuLsculsstHthDsuRVYVAGLSAGuuhAslluhpaPDhFAAVulHSG.shGpAssshoAhssMRpG.t.sPussssAhssA.....hPsllhpGDuDpsVts+NA-pLssthhphsGhusupGA.tuu.RVpptpoG ............................................................................................h....hal.P..s..................s...h....t......s...h..s..Ll.V.h.LH..G.........C.....t...t........s....u.....p....s......h.....t...p.........s......o.....t......h....s.........p.........l...A....-...p....t...s......a.l......V......l....Y...P........p....t......s............t.....t........s.......t............s..........p...........t....C.......a......s.........h........h.s..............s........s.....p.......p.....p......s.......t.......s......-.....s.............s..............h.........l......t...s.......h.....l....p.....t...l...h...p......p...a....s.........l...D.......s.s..R..V..a..ls..Gh...SuGu..hM.s..s.h..hu..s.........p..........a.......P.-......l........F....A....A.......s....u..........h....h......u...........G.....h.......s......h.....s......s.....h..................s....................................................................................................................................................................................................................................................................................................................................................................tsttsts.................................................................................................................................................. 0 165 303 401 +10336 PF10504 DUF2452 Protein of unknown function (DUF2452) Buljan M, Coggill P anon TreeFam_TF105992 Family This protein is found in eukaryotes but its function is unknown. 20.70 20.70 21.50 21.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.96 0.71 -4.96 10 150 2009-01-15 18:05:59 2008-02-26 12:47:45 4 1 134 0 83 166 179 138.60 36 80.19 CHANGED spVsLVEsNPsP.sGhplVSPYpouRhu-PhDLVsLApplQcADphI+sNACsKLTVIAEQIRaLQEQARKVLEEu+RDtDLHHAACNFsK+PGpIYaLYcR-SGQpYFSMLSPcEWGs.SCPHpFlGuY+LEHDhSWTPhE-lEcpDsclchs-+Llspss ....................................................t....................ss..sthh..s...DlltLApplppADp.hl+sssss..KLp.lIs-QlphLQpQA+clLc...-uccstcLHpusCNFhKtPGplYaLYpRp....s....GppYFShlSPcEWGs..ss.s.p..c.alGuY+LptDhSW..T..Ph-clpppptphthhpphh...s.............................. 0 30 42 62 +10337 PF10505 NARG2_C NMDA receptor-regulated gene protein 2 C-terminus Buljan M, Coggill P, Berhardt R anon TreeFam_TF106272 Domain The transition of neuronal cells from pre-cursor to mature state is regulated by the N-methyl-d-aspartate (NMDA) receptor, a glutamate-gated ion channel that is permeable to Ca2+. NMDA receptors probably mediate this activity by permitting expression of NARG2. NARG2 is transiently expressed, being a regulatory protein that is present in the nucleus of dividing cells and then down-regulated as progenitors exit the cell cycle and begin to differentiate. NARG2 contains repeats of (S/T)PXX, (11 in mouse , six in human), a putative DNA-binding motif that is found in many gene-regulatory proteins including Kruppel, Hunchback and Antennapedi [1]. This C-terminal domain belongs to the PD-(D/E)XK nuclease superfamily [2]. 20.70 20.70 20.90 21.60 20.40 19.60 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.32 0.70 -5.16 5 96 2012-10-11 20:44:46 2008-02-26 12:56:43 4 4 76 0 65 111 1 201.00 35 25.27 CHANGED --cS-hlsPsNuNoshpLWoLQpts-.......DaplhlRhSlsslc...cocupcslts+F..l.l.lKLEYQs-aGsEtMSKSELl+tWscphL+suohuasuRISA+TaclhLcEKLTLtsLEcpL+caYsoShsNLLoHLhphLKlLsulPsGDYLLpHus..KDKFLLslhosDspsTPsSasLH.l.pTcoussDpPuloussWlPIDPoLlsphHcEpsLLPCoFPs .........................................................ppsphhts.pcuNhsYpLaoLp......................-lhLLVRsS......lptlc...csppccpl.pp.ph...slhlhPKlEYQssa.Gs.EsLTcSELs+hWspohL+ssspha.su+IsuhTu+lhhlEclo.ppL...ccp...lsshp...s..shhslLpcl..........LK....pL....s..uLptGs.YLLpHss...cDs.lllhKss-......p....s.o..csu....asLa....pscss.ssh.s.s..h...p.ssWlPlDPslhhshHhcptRlPCoFPs............................................. 0 20 25 43 +10338 PF10506 MCC-bdg_PDZ PDZ domain of MCC-2 bdg protein for Usher syndrome Buljan M, Coggill P anon TreeFam_TF105384 Domain The protein has a high homology to the tumour suppressor MCC (mutated in colon cancer; or MCC1 hereafter) and was named MCC2. MCC2 protein binds the first PDZ domain of AIE-75 with its C-terminal amino acids -DTFL. A possible role of MCC2 as a tumor suppressor has been put forward. The carboxyl terminus of the predicted protein was DTFL which matched the consensus motif X-S/T-X-phi (phi: hydrophobic amino acid residue) for binding to the PDZ domain of AIE-75. 21.50 21.50 23.80 22.30 21.30 19.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.99 0.72 -4.14 7 229 2009-01-15 18:05:59 2008-02-26 13:21:28 4 6 60 0 113 229 0 64.60 36 16.56 CHANGED ccLpuplEcLcshNchLstsLpthKtpsEphsh.lGctEupAsAL..+LALpapp+shEsashhhAlht ......c+LpuplEcLcStschLsts.E-p+ppstchshhlt+t-ushsAh.....+Lth..papp+scEh.shLhAlhE...... 0 20 28 55 +10339 PF10507 DUF2453 Protein of unknown function (DUF2453) Buljan M, Coggill P anon TreeFam_TF105823 Family Some members of this family are purported to contain GAF domains but this could not be confirmed. The function is not known. It is likely to be a transmembrane protein. 25.00 25.00 25.00 25.60 23.70 23.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.34 0.72 -4.26 16 194 2009-01-15 18:05:59 2008-02-26 13:33:18 4 15 115 0 142 191 6 101.10 44 34.68 CHANGED hsuulPFlGFGFLDNulMIlAG-hIDpslGhhLGlSTMAAAALGNllSslhGlthtshlEphsp+ls.hphPsLT.tQhc.ptsphusplGsslGlhlGClLGMhPLLFhss .................hsulPFlGFGFlDNhIMIlAG-hI-holGhhhulSTMA..AAALGNllSDlsGlshush.lEths.p+l.......G..hph...........P....sLostQhc.....h.........hps+husphGpslGlslGClLGMhPLlFht.s.......................... 0 65 83 122 +10340 PF10508 Proteasom_PSMB Proteasom_PSMB5; Proteasome non-ATPase 26S subunit Buljan M, Coggill P anon TreeFam_TF106231 Family The 26S proteasome, a eukaryotic ATP-dependent, dumb-bell shaped, protease complex with a molecular mass of approx 20kDa consists of a central 20S proteasome,functioning as a catalytic machine, and two large V-shaped terminal modules, having possible regulatory roles,composed of multiple subunits of 25- 110 kDa attached to the central portion in opposite orientations. It is responsible for degradation of abnormal intracellular proteins, including oxidatively damaged proteins, and may play a role as a component of a cellular anti-oxidative system. Expression of catalytic core subunits including PSMB5 and peptidase activities of the proteasome were elevated following incubation with 3-methylcholanthrene. The 20S proteasome comprises a cylindrical stack of four rings, two outer rings formed by seven alpha-subunits (alpha1-alpha7) and two inner rings of seven beta-subunits (beta1-beta7). Two outer rings of alpha subunits maintain structure, while the central beta rings contain the proteolytic active core subunits beta1 (PSMB6), beta2 (PSMB7), and beta5 (PSMB5). Expression of PSMB5 can be altered by chemical reactants, such as 3-methylcholanthrene [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.55 0.70 -6.23 9 249 2012-10-11 20:01:02 2008-02-27 11:21:59 4 12 149 0 143 379 6 365.10 21 83.06 CHANGED htshhpchlpplpst...phh-pLsthpsslstps.hptlspth.........lassLs..os....p.-ph.lthplLcplltshsscsh...hsphpshLpcGLspssssl+thshhp....ltRhlpppsushphlsspslh.hlhhsltspchssuptAhphLsplsppphthctl.......pspLpslhpp.s-hlRhRlY-lslcluuhSssshshptp...lhcplLpElps.-DVLlphssl-llscLAp.spaGhpaLtppulh-plsshlptscpDP...hushhlsGhh.....+FastlAsh.ss.plhpsaPphhpplh...phhsstD.shhssAhDolGhlutsl-GKphL....+sssshcphltthusttp.hshch+hphLpulsslh..tppptpppl........shscsWYpphuppshp....lhhsllppPFPEl+suuhchhpslssasWslpt.hlsssGFl-alLDcpoEpsK-hc.tKapll+plsp..p....usslhucsphl....+LpcYlp.GPaahps.sssAsp..ss- .....................................................................................................................................................................................................................l..hh..................ph.........l.hs.L..p..sp...tlp.hsh.t.......ltph....pp...t..t....s.....ht..hh.s...p......l....h...ll.hl.tt...php.l.sptA..hphltpls.t..p.t.............th.p...lh....h......htpLtpl.h.tp...ss....h.h.R.......h.R..lhp................l....lspl.sphS.......p...h.p.hhhp.t..s..l..ls.llp....plps..cDhLl...p...hssh-hlspL.u....ppps............h....pal...p.p.slhptl....sshl.t..ptss.............tt.hhl..uhh.........................................t..h.............h..h.......h.............h.......h................s..........u.hp.shu...h.s.ts..puh.hl...........h...h..h....t.........t...hpshs.h.........................................................hht.ht......................h.t.....pps..-hp.s.hthh..h.........h............h.......t........t.hhphhhs.t.t...............h+hthh..h..............................h..........thp.hht.G.a................................................................................................................................................ 1 59 77 114 +10341 PF10509 GalKase_gal_bdg Galactokinase galactose-binding signature Finn R, Coggill P anon PROSITE_PS00106, Pfam-B_2277 (release 22.0) Domain This is the highly conserved galactokinase signature sequence which appears to be present in all galactokinases irrespective of how many other ATP binding sites, etc that they carry [1]. The function of this domain appears to be to bind galactose [2], and the domain is normally at the N-terminus of the enzymes, EC:2.7.1.6 [3]. This domain is associated with the families GHMP_kinases_C, Pfam:PF08544 and GHMP_kinases_N, Pfam:PF00288. 20.20 20.20 20.20 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.79 0.72 -4.69 183 3091 2009-01-15 18:05:59 2008-02-27 17:35:22 4 16 2627 25 790 2189 438 51.10 45 12.61 CHANGED tptFpptF....upp..sp..h.hhpAPGRVNLIGEHTDYNsGhVLPhAIshsshsu.sphp ..................p.phFtphF.....G.t......ss......h.shtAPG.......RVNLI.......G.EHTDYNsGaVlPsAI.shsThsust.R............ 0 262 473 658 +10342 PF10510 PIG-S Phosphatidylinositol-glycan biosynthesis class S protein Buljan M, Coggill P anon TreeFam_TF105857 Family PIG-S is one of several key, core, components of the glycosylphosphatidylinositol (GPI) trans-amidase complex that mediates GPI anchoring in the endoplasmic reticulum. Anchoring occurs when a protein's C-terminal GPI attachment signal peptide is replaced with a pre-assembled GPI [1]. Mammalian GPITransamidase consists of at least five components: Gaa1, Gpi8, PIG-S, PIG-T, and PIG-U, all five of which are required for function. It is possible that Gaa1, Gpi8, PIG-S, and PIG-T form a tightly associated core that is only weakly associated with PIG-U. The exact function of PIG-S is unclear [2]. 20.40 20.40 21.50 20.60 19.30 20.30 hmmbuild -o /dev/null HMM SEED 517 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.50 0.70 -5.83 32 374 2009-09-10 22:53:53 2008-02-27 17:48:41 4 11 269 0 266 360 2 405.30 24 84.23 CHANGED luahhl..hlhl.......................GlPlWa+TTol...YRAsLPhpplpshspth.........................psplphslslhlpssp...........h.ssttcpl.pplpctlss.....p...t.taplplphhpts.pppppht....t.........................h..tt..th.phphp.tspph.lhhspssl..sp.....lsthlushLh...........tplhptchtplsplhsstshttsp........................................ulpaussYcl.sFSLLssssp..shs..WDI-........sulcc.YhpPllptLuslsNFol-oQl.Yaushs...hpshhspt.t............................................................................................................aslsps-LuphINssEhsLsss...hspsPs............lNFllYl.......Pss..ppuPlhlppps..............ssuFllPQWGGVhlhN.s..........tpp.sh.......lscptLtssh.sFspQLhpLLGlspsss.......................ssshcl-sLhRhpshpsltpussTLtSLs+LscslspIsIPcpVtspVpp..ulptlpp.uhctLpts.......phppALthuppAhshuE+AFF-t.sMltQhYFPsEHKhAVYlPLhuPlslsllhull+.hKchppcp+p .................................................................................................................................................................hhhlhhhlGlPhWapoTph...aRs.LPhpth.tht...................................h....h.l.l...t...................tptl...t..h.p.hp.....................hphph...........t...............................................................t.....t........h...hhhs...tt.....t...............ls.hhtt.lh..........................hht....th.h..hh...s.t...t.....................................................................hsht.s.thcl.shoLhssssp.....p...W-lp...................t.slpp.alpPhlp..tlusls..NF.o....lcoQl.has.hs.......hpsp.........t............................................................................................................hh.l...p..psLsth...l.ss...sE.h..Lsss..........hs.ts.s.................................................................lpallal..........P.....tpsPlhlt....ttp........................ssuah.P......pWGulhlhN.s................tt..h.................hsp.th....hh.hFh.s..pLh.Lh...Ghsps........................................h.hclstlhhhpshp.lhpussTLtSLspLh.tpls..phsI.ppluppV.p.......ulttlpt.shp.ltts..........................................htpuh.h.up.A.t.uEpAFF-..ohlt.hYFPp-pKhAlYlPLhhPhslsllhuhhp.hhth................................................................................................ 0 103 157 222 +10343 PF10511 Cementoin Trappin protein transglutaminase binding domain Finn R, Coggill P anon PROSITE_PS00313 Family Trappin-2, itself a protease inhibitor, has this unique N-terminal domain that enables it to become cross-linked to extracellular matrix proteins by transglutaminase [1]. This domain contains several repeated motifs with the the consensus sequence Gly-Gln-Asp-Pro-Val-Lys, and these together can anchor the whole molecule to extracellular matrix proteins, such as laminin, fibronectin, beta-crystallin, collagen IV, fibrinogen, and elastin, by transglutaminase-catalysed cross-links. The whole domain is rich in glutamine and lysine, thus allowing and transglutaminase(s) to catalyse the formation of an intermolecular epsilon-(gamma-glutamyl)lysine isopeptide bond [2]. Cementoin is associated with the WAP family, Pfam:PF00095, at the C-terminus. 22.30 22.30 22.30 22.30 21.40 22.10 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.26 0.73 -6.52 0.73 -3.99 21 190 2009-01-15 18:05:59 2008-02-28 13:12:49 4 14 25 1 41 164 0 16.90 61 22.81 CHANGED GQDsVKGph.lKGQDss .GQDsVKGpsPVKGQDss... 0 2 2 2 +10344 PF10512 Borealin Cell division cycle-associated protein 8 Buljan M, Coggill P anon TreeFam_TF101077 Family The chromosomal passenger complex of Aurora B kinase, INCENP, and Survivin has essential regulatory roles at centromeres and the central spindle in mitosis. Borealin is also a member of the complex. Approximately half of Aurora B in mitotic cells is complexed with INCENP, Borealin, and Survivin. Depletion of Borealin by RNA interference delays mitotic progression and results in kinetochore-spindle mis-attachments and an increase in bipolar spindles associated with ectopic asters [1]. 19.50 19.50 19.70 23.70 19.00 19.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.66 0.71 -4.17 9 119 2009-01-15 18:05:59 2008-02-28 14:20:31 4 3 68 2 73 116 0 115.60 33 42.21 CHANGED RhSRuphtTPhsup...htsShs+.hshhTP.......+hsspss.hshLRpsphuEslYSho..GSPl.Ass...c.hslslPItsG....cshcL.As-l-oh.lppLDscsLppIKpLpspLspIssphc ................................................S+tthhTPtssp........thS.hs.p.psh...hTP...............+hs..sps..tts.h.L.R...pPtsuEhlhohu..GSPL.sss...............cph.lslPlssG.....csh+L...ss.-.lpph...slt..pLDs....csLspIKpLpspLtplss.h................................. 0 17 20 49 +10345 PF10513 EPL1 Enhancer of polycomb-like Mistry J, Wood V anon Pfam-B_3033 (release 22.0) Family This is a family of EPL1 (Enhancer of polycomb-like) proteins. The EPL1 protein is a member of a histone acetyltransferase complex which is involved in transcriptional activation of selected genes [3]. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.34 0.71 -3.99 83 1066 2009-01-15 18:05:59 2008-02-28 14:26:12 4 25 277 0 649 973 5 150.90 23 16.50 CHANGED hRh++lshp....ptl.lhp..t.c-l.shpt..............................................................................t.t..p.............p..ptplpthhstsp..........................................lPpPshp....hp...hpp.hsst.atpPp.sYl+ap.................................s--hs................tscYDhD-cDptWLcph...........................Npc...............t.shp.................lopppFEhlh-chE+p ......................................................................................................................................................................................................................................................................t....th.s......p.h.lhp....p-h.s.....................................................................................h..tp.......................t..ccp.hptshpttp..........................................................................................................................................lPpPphc...........lp.......hpp..ss....a.p..Pp..pa.h.+ht...ttt................................................st-hs............................tscYDhD-cD...ttW.Lc.h.h..............................................................................Npc.................................................t.h...t................................................lstppFEhlh-clE+........................................................................................................................................................... 0 168 282 466 +10346 PF10514 Bcl-2_BAD Pro-apoptotic Bcl-2 protein, BAD Buljan M, Coggill P anon TreeFam_TF102001 Family BAD is a Bcl-2 homology domain 3 (BH3)-only pro-apoptotic member of the Bcl-2 protein family that is regulated by phosphorylation in response to survival factors [1]. Binding of BAD to mitochondria is thought to be exclusively mediated by its BH3 domain. Membrane localisation of BAD mediates membrane translocation of Bcl-XL. The C-terminal part of BAD is sufficient for membrane binding. There are two segments with differing lipid-binding preferences, LBD1 and LBD2, that are responsible for this binding: (i) LBD1 located in the proximity of the BH3 domain (amino acids 122-131) and (ii) LBD2, the putative C-terminal alpha-helix-5 [2]. Phosphorylation-regulated 14-3-3 protein binding may expose the cholesterol-preferring LBD1 and bury the LBD2, thereby mediating translocation of BAD to raft-like micro-domains [3]. 25.00 25.00 35.70 35.70 23.30 23.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.06 0.71 -4.29 4 66 2009-01-15 18:05:59 2008-02-28 16:01:59 4 2 34 2 24 48 0 140.30 56 92.03 CHANGED MFQIPEFE.SEQEDuSpsDRGLGPS.oGDtP.G.tKpahpAPGLLtphspQQ.GQssusSHHGGsGshEhRSRHSSY.AGsE-DEtM.tE-.uPFRGRSRSAPPNLWAAQRYGRELRRMSDEFcsSF.KGLPRPKSAGTATQMRQSsSWTRhlQSWWsRNLG+GuSsPSQ .........................................................................MFQIs-.-.oEpEDs........SsspRuLuPp.ststs.u.....htsPuhhhtssHpQ.tp..ssupsH+G.Gs.ushEhR..SRH..S.......Sh.uG..s--....DEuh.tEE.uPFRGRSRSAPPNLWAApRYGRELRRMSDEFpsSF.K.G.LP.RPKSAGTAoQMcQSsSWp+hh.uhasRp.ucGtstPs.............................. 0 1 4 9 +10347 PF10515 APP_amyloid beta-amyloid precursor protein C-terminus Finn R, Coggill P anon PROSITE_PS00320, Pfam-B_2082 (release 22.0) Family This is the amyloid, C-terminal, protein of the beta-Amyloid precursor protein (APP) which is a conserved and ubiquitous transmembrane glycoprotein strongly implicated in the pathogenesis of Alzheimer's disease but whose normal biological function is unknown. The C-terminal 100 residues are released and aggregate into amyloid deposits which are strongly implicated in the pathology of Alzheimer's disease plaque-formation. The domain is associated with family A4_EXTRA, Pfam:PF02177, further towards the N-terminus. 19.90 19.90 20.50 20.20 19.20 18.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.50 0.72 -3.71 9 380 2009-01-15 18:05:59 2008-02-28 17:07:07 4 21 99 18 118 341 0 51.60 67 8.22 CHANGED lIllullhl+.R+shusluHGhVEVDss....hoP...EE++LscMQppGYENPTYKaaE .............................................................VIVIoLVML+.K+Q...Yso.I.pHGlVE.VDsh....lTP......EERHLsKMQppGYENPTYKahE........ 0 21 30 66 +10348 PF10516 SHNi-TPR SHNi-TPR Mistry J, Wood V anon Pfam-B_14727 (release 22.0) Repeat SHNi-TPR family members contain a reiterated sequence motif that is an interrupted form of TPR repeat [1]. 20.70 20.70 20.70 20.80 20.60 20.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.33 0.72 -4.71 25 326 2012-10-11 20:01:02 2008-02-29 10:58:00 4 17 229 0 195 330 10 37.60 37 7.83 CHANGED A-sashLGElSLEsEpFsQAspDhcpuLpL+pplhssc ..........Apsa.tLGElSlEuEpaspAlpDappsLslppphh.sc.......... 1 65 104 158 +10349 PF10517 DM13 Electron transfer DM13 Iyer L, Mistry J anon Manual Domain The DM13 domain is a component of a novel electron-transfer system potentially involved in oxidative modification of animal cell-surface proteins [1]. It contains a nearly absolutely conserved cysteine, which could be involved in a redox reaction, either as a naked thiol group or through binding a prosthetic group like heme [1]. 25.00 25.00 25.60 26.40 24.60 24.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.33 0.72 -3.67 66 749 2009-01-15 18:05:59 2008-02-29 15:11:02 4 14 535 0 284 582 71 101.70 27 42.30 CHANGED G....pFpt..pss...HhspGplpl....supphlthp.shss...G...PDh+laLuss......tsptsh.h.psph..lplGsl.Ksa..sshhhslPsslclscassVllWC-pFu.hhuuAphp .........................pFpt..pss.....ct.s.p.G.pspl..........hsucp.hlh.t.sacupt.G...PDhalaLsps........................sshcpt..........tclu....l....ct....ppc..shsLPps.l...c.l..sc..aspVslaCcchps..FGuApl..... 0 108 166 254 +10350 PF10518 TAT_signal TAT (twin-arginine translocation) pathway signal sequence Bateman A anon Bateman A Motif \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.85 0.74 -7.47 0.74 -3.49 64 1505 2012-10-02 00:19:25 2008-02-29 15:28:33 4 168 868 0 484 1326 535 25.60 34 5.54 CHANGED phoRRshL+uuuusuussshuussss ....loRRsFLKuuuusuAusuluush............. 0 160 342 422 +10352 PF10520 Kua-UEV1_localn Kua-ubiquitin conjugating enzyme hybrid localisation domain Buljan M, Coggill P anon TreeFam_TF106147 Domain This domain is part of the transcript of the fusion of two genes, the UEV1, an enzymatically inactive variant of the E2 ubiquitin-conjugating enzymes that regulate non-canonical elongation of ubiquitin chains, and Kua, an otherwise unknown gene. UEV1A is a nuclear protein, whereas both Kua and Kua-UEV localise to cytoplasmic structures, indicating that the addition of a Kua domain to UEV confers new biological properties. UEV1-Kua carries the B domain with its characteristic double histidine motif, and it is probably this domain which determines the cytoplasmic localisation. It is postulated that this hybrid transcript could preferentially direct the variant polyubiquitination of substrates closely associated with the cytoplasmic face of the endoplasmic reticulum, possibly, although not necessarily, in conjunction with membrane-bound ubiquitin-conjugating enzymes [1]. 21.70 21.70 24.30 23.40 20.30 19.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.44 0.71 -4.62 21 202 2009-01-15 18:05:59 2008-02-29 16:08:18 4 3 148 0 125 190 17 167.40 46 60.55 CHANGED hlhADFsSGlsHWusDTWGos-pPllGp.aIcsFREHHlcPpsIT+HDFlEsNussshsplssLhhhshphhhpsss.....h.......shps...FlhshshalshTNQlHKWSHthhu.lPshVhhLQchcllLsR+cH+hHHpuPa-shYCIToGWLNhsL-clsFW+phEtllphhTGhpPRucD .....................................lhADFhSGlVHW......uuD...TWGSs-hPllGcsFIRsFREHHlDPouI...TRH....D.FIETNGDNs.....hlslhsLhhhsaphhstss.t....th...h..............shps.Flhshh.lalshTN........QIHKWSHohhu..LPtaVhhLQch+lILPR+HH.R.lHHluPH-oYaCI..T...TGWlNhsL-....ph....tFapthE.llphhTG.pPRspD.................. 1 45 66 98 +10353 PF10521 DUF2454 Protein of unknown function (DUF2454) Mistry J, Wood V anon Pfam-B_82729 (release 22.0) Family A Schizosaccharomyces pombe member of this family is known to interact with Tel2. Tel2 is a component of the TOR complexes [1]. 22.30 22.30 22.30 22.60 22.10 21.80 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.79 0.70 -5.40 29 199 2009-01-15 18:05:59 2008-02-29 16:15:39 4 5 178 0 142 207 0 256.50 20 52.36 CHANGED cllhsLusasss...ppshooppspshsptllp...............sss.hphlpp..........hLhppl+Phhh+s............p..thottG+.............shpP..h.tht.s......t.psWKhssshtshslsWhlh..pthspspls....sphshllPslLsLlDDp...ssshKhpGspLLpplLp.h.psp.........LppTsLssVap-ulhspLh.lPshostspolpLlpssaPsLhsLh.h.......................tsspsp.......phtplschlpcsILsshtthps...............a.pLsphLlppl.thltp.LGhpsstaLpcll.hltp.hlpsPhhsth ........................................................................................h..h.has.....p.sWs...o.ts..tphsttlhp...................pt..tt...htp...........hlhthl+Phh.+p.......................t...............................t..............psWKpps..sht.t.shtWslt..............psspshls.......................pph.hllPs.hLsllDDh...psphKhtGlphLpplltps.tsp........................LtppshspVh.culh.s.pLh........spshpLlphshsslhsLh.h...................................psstt.........c.pphschltphlLss.hthttp...................tlpph.hhpp.ls.hlpp..lGl.hss+aLpclh.hl.t.hlps..h................................................................... 0 37 72 109 +10354 PF10522 RII_binding_1 RII binding domain Bateman A anon Vijayaraghavan et al. Mol Endocrinology 13(5):705-717 (1999) Motif This domain is found is a wide variety of AKAPs (A kinase anchoring proteins) [1]. 14.40 14.40 14.40 14.40 14.30 14.30 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.55 0.76 -6.60 0.76 -2.98 14 161 2009-01-15 18:05:59 2008-03-03 10:00:48 4 11 27 0 47 185 0 18.00 44 1.91 CHANGED plcphAsplVptVIppAh .pLEatAspLlptlIQpAh... 2 4 4 9 +10355 PF10523 BEN BEN domain Lakshminarayan I, Bateman A anon Lakshminarayan I Domain The BEN domain is found in diverse animal proteins such as BANP/SMAR1, NAC1 and the Drosophila mod(mdg4) isoform C, in the chordopoxvirus virosomal protein E5R and in several proteins of polydnaviruses. Computational analysis suggests that the BEN domain mediates protein-DNA and protein-protein interactions during chromatin organisation and transcription [1]. 21.30 21.30 21.40 21.60 21.20 21.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.34 0.72 -3.92 70 842 2009-01-15 18:05:59 2008-03-03 10:50:14 4 14 114 0 412 780 0 78.60 21 21.22 CHANGED pphsptLlptlFscphhsp..shssphp..................t.sp..tLDsptlphI+phlppthshpp.......phWt.pshptls..phhpstpppt ........t.phsppLLthlFscph.hssp.shsGphpp................ttss+p.tLDspclphl+.....palpthhsspc............p.a..phtppls..phhpp..p..h..................................... 0 71 102 212 +10356 PF10524 NfI_DNAbd_pre-N Nuclear factor I protein pre-N-terminus Finn R, Coggill P anon PROSITE_PS00349 Family The Nuclear factor I (NFI) family of site-specific DNA-binding proteins (also known as CTF or CAAT box transcription factor) functions both in viral DNA replication and in the regulation of gene expression in higher organisms. The N-terminal 200 residues contains the DNA-binding and dimerisation domain, but also has an 8-47 residue highly conserved region 5' of this, whose function is not known. Deletion of the N-terminal 200 amino acids removes the DNA-binding activity, dimerisation-ability and the stimulation of adenovirus DNA replication [1]. 21.50 21.50 22.20 22.90 20.80 19.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.18 0.72 -4.36 7 412 2009-01-15 18:05:59 2008-03-03 11:18:10 4 5 78 0 136 328 0 42.20 88 9.29 CHANGED sh...pQ--htPFlEtLLPaV+A.AYsWFpLQAAKR+aaKcH-K .....s...shhQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK... 0 19 35 75 +10357 PF10525 Engrail_1_C_sig Engrailed homeobox C-terminal signature domain Finn R, Coggill P anon PROSITE_PS00033, Pfam-B_11539 (release 22.0) Domain Engrailed homeobox proteins are characterised by the presence of a conserved region of some 20 amino-acid residues located at the C-terminal of the 'homeobox' domain. This domain of approximately 20 residues forms a kind of a signature pattern for this subfamily of proteins [1]. 21.70 21.70 22.50 21.70 19.90 21.60 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.13 0.72 -7.54 0.72 -4.82 23 236 2009-01-15 18:05:59 2008-03-03 14:14:06 4 1 120 2 106 226 0 30.00 64 10.64 CHANGED AoGs+NsLALpLMAQGLYNHST.sshsccp-sp ....AoGtKNsLALpLMAQGLYNHST.sshppcp-p............... 0 27 36 68 +10360 PF10528 PA14_2 GLEYA domain Linder T, Bateman A anon Linder T Domain This presumed domain is found in fungal adhesins and is related to the PA14 domain. 19.70 19.70 19.70 20.20 19.50 18.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.37 0.71 -4.11 8 132 2012-10-02 01:24:23 2008-03-03 17:31:24 4 25 47 1 112 246 18 116.20 24 15.87 CHANGED shsIVaQhahhs.tT.sTYTLsV.sNsDDlFaGWFGs.KAhSGWSss.......NYDsYAhWp.......GppuhushssssLssGpalPlRFlhANGuuhGuFsFsFsussosslssT........oYsY...TuTC ......................hsl.hp.GYFhsspo.GsYoFsl........p..sDD...hshlhh.Gs......sA......as.s.....hs.ps......................Nhsh..hsh..h.........ssss..s..p.s..sh..shpL..puGhYYPlRl.ha.sNt.s.ssushsh.shpsP.sGshhhs......................t................................... 0 18 59 104 +10361 PF10529 Hist_rich_Ca-bd Histidine-rich Calcium-binding repeat region Finn R, Coggill P anon PROSITE_PS00328 Repeat This is a histidine-rich calcium binding repeat which appears in proteins called histidine-rich-calcium binding proteins (HRC). HRC is a high capacity, low affinity Ca2+-binding protein, residing in the lumen of the sarcoplasmic reticulum. HRC binds directly to triadin. This binding interaction occurs between the histidine-rich region of HRC and multiple clusters of charged amino acids, named as the KEKE motifs, in the lumenal domain of triadin. The region in which this repeat is found in many copies is long and variable but is the acidic region of the protein. There is also a cysteine-rich region further towards the C-terminus [1]. HRC may regulate sarcoplasmic reticular calcium transport and play a critical role in maintaining calcium homeostasis and function in the heart. HRC as a candidate regulator of sarcoplasmic reticular calcium uptake [2]. 17.50 9.00 17.50 9.10 17.40 8.90 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.44 0.73 -6.52 0.73 -3.76 25 173 2009-01-15 18:05:59 2008-03-04 12:26:43 4 10 17 0 51 157 0 15.30 56 11.48 CHANGED HRH..RGHc-E-D-D-D .....HRH..pGHccEED-Dss..... 0 6 6 6 +10362 PF10530 Toxin_35 Toxin with inhibitor cystine knot ICK or Knottin scaffold Finn R, Coggill P anon PROSITE_PS60029 Family Spider toxins of the CSTX family are ion channel toxins containing an inhibitor cystine knot (ICK) structural motif or Knottin scaffold. The four disulfide bonds present in the CSTX spider toxin family are arranged in the following pattern: 1-4, 2-5, 3-8 and 6-7. CSTX-1 is the most important component of C. salei venom in terms of relative abundance and toxicity and therefore is likely to contribute significantly to the overall toxicity of the whole venom. CSTX-1 blocked rat neuronal L-type, but no other types of HVA Cav channels [3]. Interestingly, the omega-toxins from Phoneutria nigriventer venom (another South American species also belonging to the Ctenidae family) are included as they carry the same disulfide bond arrangement. suggested that CSTX-1 may interact with Cav channels. Calcium ion voltage channel heteromultimer containing an L-type pore-forming alpha1-subunit is the most probable candidate for the molecular target of CSTX-1 these toxins [3]. 25.00 25.00 27.80 30.10 20.60 24.70 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.80 0.72 -6.97 0.72 -3.86 6 67 2009-09-11 14:32:54 2008-03-04 12:49:05 4 1 6 0 0 71 0 26.30 76 26.11 CHANGED CIs++cSCppD.++GCC....ahhoCsC CIPKHHECTSN.KHGCCRGshFKYKCQC 0 0 0 0 +10363 PF10531 SLBB SLBB domain Iyer LM, Bateman A anon Iyer LM Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.64 0.72 -4.08 139 11758 2012-10-03 10:59:06 2008-03-04 15:21:41 4 110 4407 39 3040 9399 3756 52.50 24 14.22 CHANGED hlslsGp.Vpc.....Pushclth..Gsslp-llph..uGGhstp.ut.......t..........sGshhshs .......................htlsGc.Vpp......P.G.h.h.plsh......G.......h.........s...lt..-hlpt...AG.Ghsps..up..............p...............st......h.............................. 0 1080 2052 2579 +10364 PF10532 Plant_all_beta Plant specific N-all beta domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Babu and colleagues [1]. It is found associated with the WRKY domain Pfam:PF03106. 21.60 21.60 22.00 33.00 21.50 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.35 0.71 -4.01 10 22 2009-01-15 18:05:59 2008-03-05 11:41:14 4 4 3 0 1 22 0 105.40 36 16.50 CHANGED pKlulDEossKLKLSYhsh.spsp..REsYIsDDEDVhlYLTss-pEuhRsVLHVEllsc.t.s-ch.EQlshV-R+..SSlGpNasplsstsc-h..csssshhhsE.......ssEsllEs-sp ........pKlulDEsshKL+LSY....spsc..REsYIsDDEDVhlaLTpscpcuhhsVLHVE.lst.t.sEch.EplShs-t+..SSVGhNhtc.ss.p-.h..c.stshh.s-.......tsEsllt.c....................... 0 0 0 0 +10365 PF10533 Plant_zn_clust Plant zinc cluster domain Iyer LM, Bateman A anon Iyer LM Domain This zinc binding domain was identified by Babu and colleagues and found associated with the WRKY domain Pfam:PF03106 [1]. 20.60 20.60 20.90 20.70 16.40 20.10 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.76 0.72 -4.32 21 226 2009-01-15 18:05:59 2008-03-05 11:44:15 4 2 43 0 90 230 0 45.10 53 14.48 CHANGED +++Csttspsputt....suuuu+CHCSK+.RKt..RlKRslRVPAISuKlAD ................+++Cpst.spstsp.........suuuG+CHC..S.K+..R......K...Rl...KRoIRVPAISs.KlAD........ 0 11 54 73 +10366 PF10534 CRIC_ras_sig Connector enhancer of kinase suppressor of ras Finn R, Coggill P anon PROSITE_PS51290 Domain The CRIC - Connector enhancer of kinase suppressor of ras - domain functions as a scaffold in several signal cascades and acts on proliferation, differentiation and apoptosis. 22.00 22.00 22.90 22.30 21.90 21.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.04 0.72 -3.93 9 212 2009-01-15 18:05:59 2008-03-05 11:48:43 4 14 49 1 128 176 0 92.00 52 12.49 CHANGED sL+oLspKLpAus+sLQshIpuRh+ssu.-utsopchPschLsuVV-LItAA+uLLuWLsRh.FotloDaoss+.cIhpLChELsssVpccssss .........................NL+oLscKLpAuu+NLQNaI..suRR+...ustYD.G....psS..+..Kh....PNDFLTSVV-LIuAAKuLLA....WLDRsPFsulsD.aSlo+NpllQLCLELTTlVQpDCsV........ 0 10 17 69 +10368 PF10536 PMD Plant mobile domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Babu and colleagues in a variety of transposases [1]. 24.00 24.00 24.10 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.50 0.70 -5.46 44 1219 2009-01-15 18:05:59 2008-03-05 12:39:04 4 78 27 0 764 1162 0 222.00 17 35.00 CHANGED uhuhhtsls....t..phphDps..LlsuLs-+WpsETpTFhhPhGEhTlTLpDVshLhGLsltGp......sVsushssp....hpch.......htchhthp.t.....................................................tphpppss.phs.Wlpphh....thssp.....................hpptAFLlhhluthlFss...pssptlshthh.slstslsp.....ssphuhGsAsLAhLYppLs....pustps............ssslsGshh...LlQlW...saERhthh....RP...............................p.t...h..............................................ps..............Ph..........sthWps...p.....psps.............hppth.........-t.p.sph.WpPY......spshttt....hhst.s.....................ppth......shhp..pss..L.l.............sh.......s......tlEhahPpRVhRQ.FGhtQs.lP......hph......hphhtp...tshtsastphtsh...h..t.tts..sp...........stYhcWatp ............................................................................................................................................................................................hs.....sh.-h..lhth......................................................................................................................................................................................................................................................................h...h............hsht...h......h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 15 235 306 +10369 PF10537 WAC_Acf1_DNA_bd ATP-utilising chromatin assembly and remodelling N-terminal Finn R, Coggill P anon PROSITE_PS51136 Domain ACF (for ATP-utilising chromatin assembly and remodelling factor) is a chromatin-remodelling complex that catalyses the ATP-dependent assembly of periodic nucleosome arrays. The WAC (WSTF/Acf1/cbp146) domain is an approximately 110-residue module present at the N-termini of Acf1-related proteins in a variety of organisms. The DNA-binding region of Acf1 includes the WAC domain, which is necessary for the efficient binding of ACF complex to DNA. 25.00 25.00 27.20 25.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.27 0.72 -3.85 28 339 2009-01-15 18:05:59 2008-03-05 13:56:12 4 46 223 0 226 350 0 96.40 36 8.79 CHANGED ccVahlptTpEhFpsY--Yh......pRhshYppchaoCc.............lTG+SsLTahEALcSE..................ccttcplcp.FPptL+pPlLchlp.aspho+l-pLV-claphhKscaFsGEpV .............pVahh..TtEhFpsY-..-..ah......pR......h....h...hsp...........plWoCp.............lTG+..ssLTa.EALcSE........................p.cst.cplpp.FPp.LctPlLchlp.asph.o...pLccL.......s.........-p.l............ath....h+scaasGEpV..................................... 0 67 116 180 +10370 PF10538 ITAM_Cys-rich Immunoreceptor tyrosine-based activation motif Finn R, Coggill P anon PROSITE_ Domain Signal transduction by T and B cell antigen receptors and certain receptors for Ig Fc regions involves a conserved sequence motif, termed an immunoreceptor tyrosine-based activation motif (ITAM). It is also found in the cytoplasmic domain of apoptosis receptor. 25.00 25.00 28.30 27.30 18.30 17.60 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -6.84 0.72 -4.39 6 300 2009-01-15 18:05:59 2008-03-05 14:30:42 4 5 137 0 0 316 0 24.00 71 2.81 CHANGED +pGCYRTLulFRY+SRCYVGLVWC ....sGCYRTLsLFRYKSRCYIhThWs. 0 0 0 0 +10371 PF10539 Dev_Cell_Death Development and cell death domain Finn R, Coggill P anon PROSITE_PS51222 Domain The DCD domain is found in plant proteins involved in development and cell death. The DCD domain is an approximately 130 amino acid long stretch that contains several mostly invariable motifs. These include a FGLP and a LFL motif at the N-terminus and a PAQV and a PLxE motif towards the C-terminus of the domain. The DCD domain is present in proteins with different architectures. Some of these proteins contain additional recognisable motifs, like the KELCH repeats or the ParB domain. 27.80 27.80 27.80 31.50 27.40 27.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.70 0.71 -4.59 10 245 2009-01-15 18:05:59 2008-03-05 14:41:54 4 20 39 0 127 228 2 119.10 44 24.57 CHANGED LGGsIFsCNssThcECac+pLFGLPu+ahsaV+sIKPGLsLFLFNYss+pLHGIFEAoSpGthNI-spAatspt..s........opaPAQ....VRh+lthpChPLsEspF+ssIhpNYhs.......ss..KFRaELo+sQspcLlcLF ........................uGhIFhCNspThpEshc+pLFGLPtp.htc...V.c.sIcPGhsLFLYsass+pLaGlFEAsS.GGhNI.-PsAapspt.............ppFPAQ....V+hph.ct..Ch..PL...EspF+ssl.....pahs.......t...KFphELshtpshpLhpLF.............................. 0 20 76 103 +10372 PF10540 Membr_traf_MHD Munc13 (mammalian uncoordinated) homology domain Finn R, Coggill P anon PROSITE_PS51259 Domain Munc13 proteins constitute a family of three highly homologous molecules (Munc13-1, Munc13-2 and Munc13-3) with homology to Caenorhabditis elegans unc-13p. Munc13 proteins contain a phorbol ester-binding C1 domain and two C2 domains, which are Ca2+/phospholipid binding domains. Sequence analyses have uncovered two regions called Munc13 homology domains 1 (MHD1) and 2 (MHD2) that are arranged between two flanking C2 domains. MHD1 and MHD2 domains are present in a wide variety of proteins from Arabidopsis thaliana, C. elegans, Drosophila melanogaster, mouse, rat and human, some of which may function in a Munc13-like manner to regulate membrane trafficking. The MHD1 and MHD2 domains are predicted to be alpha-helical. 29.10 29.10 29.10 29.10 28.50 28.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.37 16 574 2009-01-15 18:05:59 2008-03-05 15:06:33 4 32 217 2 337 493 0 126.90 35 9.55 CHANGED -sslpPLMDaLDspLulhscsh.KpshpRlLp.tLWchVlsplcctl..s..pp........................................................htpp+sLosppsshlphsLpslcpaFHA......pGpGLp.........................hshLp+oschpsLcptLpLaspsT-pLIcpF ......................................................-ssltPLh-a.....LDs.sLslhspshpcss..h.p+VLp.cLW+h..Vlssh-chl...lPshsct..........................................................................................................ppt+sLoscpsshlchsLcsl.p.paFHA......tG..s..G.L..........................hshL.KSschp...........sLchtLpLYspsT-pLIcpa.................................................... 0 83 129 238 +10373 PF10541 KASH Nuclear envelope localisation domain Finn R, Coggill P anon PROSITE_PS51049 Domain The KASH (for Klarsicht/ANC-1/Syne-1 homology) or KLS domain is a highly hydrophobic nuclear envelope localisation domain of approximately 60 amino acids comprising a 20-amino-acid transmembrane region and a 30-35-residue C-terminal region that lies between the inner and the outer nuclear membranes [1]. During meiotic prophase, telomeres cluster to form a bouquet arrangement of chromosomes. SUN and KASH domain proteins form complexes that span both membranes of the nuclear envelope. The KASH domain links the dynein motor complex of the microtubules, through the outer nuclear membrane to the Sad1 domain in the inner nuclear membrane which then interacts with the bouquet proteins Bqt1 and Bqt2 that are complexed with Bqt4, Rap1 and Taz1 and attached to the telomere [2]. SUN domain-containing proteins are essential for recruiting KASH domain proteins at the outer nuclear membrane, and KASH domains provide a generic NE tethering device for functionally distinct proteins whose cytoplasmic domains mediate nuclear positioning, maintain physical connections with other cellular organelles, and possibly even influence chromosome dynamics [3]. 23.00 23.00 23.60 26.70 22.60 22.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.03 0.72 -4.12 20 319 2009-12-22 14:12:53 2008-03-05 15:18:45 4 33 80 2 156 310 0 56.40 58 1.68 CHANGED altRlhRsALPlQ.LLLLLLlslAsLlP.......hpt--aoCshsNNFARSFpPMLRYs......NGPPP.........h ...........alhRVlRA.ALPLQ.LLLLLLlsL.ACLlP........sEEDYSCshuNNFARSFaPMLRYs......NGPPPh... 0 26 37 87 +10374 PF10542 Vitelline_membr Vitelline membrane cysteine-rich region Finn R, Coggill P anon PROSITE_PS51137 Domain In Drosophila melanogaster the vitelline membrane (VM) is the first layer of the eggshell produced by the follicular epithelium. It is composed of at least four different proteins. VM proteins are similarly organised with a central highly conserved 38-amino acid domain which is flanked by unrelated regions. The domain contains three highly conserved cysteines. 20.30 20.30 20.80 26.10 17.00 16.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.06 0.72 -4.33 2 82 2009-01-15 18:05:59 2008-03-05 16:08:28 4 1 15 0 37 86 0 36.90 75 25.87 CHANGED uh.APPCPpNYLFSCQPNLAPsPCut.APuYGSAGAYo .....SIPAPPCPKNYLFSCQPsLsPVPCS...A.P..A..s.SYGS.AGAYS..... 0 5 5 21 +10375 PF10543 ORF6N ORF6N domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.73 0.72 -3.95 60 807 2009-01-15 18:05:59 2008-03-05 16:34:40 4 9 514 0 115 705 44 86.90 32 37.57 CHANGED p.plhclRGp+VhhsppLAclYss-ocplppshpRNt.......cRFs.scahFpLstcEhcp...l+sphs..............................thstpstt.hhaTEpGsshLupl ...............t..phhphps.pVhhsppLAclYGs-.spplppshpcNt.......pRFs.scahFpL.....ss..pEhcp....l.+.s.p..hsh.p...................................thut.pttt.hlaTEpGhuhhuph............................... 0 42 87 110 +10376 PF10544 T5orf172 T5orf172 domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.69 0.72 -3.61 204 1694 2012-10-01 19:55:08 2008-03-05 16:40:48 4 26 830 0 930 1844 598 92.50 31 29.87 CHANGED GhlYl.h...........tp.t..st.......hKIGhTps...lpcRlpphp..............sshshphph......hth.............h...sstcl.EphlHpphps...hcl...............cpEaFc.....ls.....hcplcp.....hlc..pht .................................................................GhlYl.l...................ppspshsp............ph..aKIGhT........hs....lppRhcphs....................................................................sclcV.......................................................hhss.Dhpsh....EspLh.cp..Fcc....t..h..h..p.......................................t+EhFc.........Vs..........plpphh....ph........................................................................ 0 747 823 896 +10377 PF10545 MADF_DNA_bdg Alcohol dehydrogenase transcription factor Myb/SANT-like Finn R, Coggill P anon PROSITE_PS51029 Domain The myb/SANT-like domain in Adf-1 (MADF) is an approximately 80-amino-acid module that directs sequence specific DNA binding to a site consisting of multiple tri-nucleotide repeats. The MADF domain is found in one or more copies in eukaryotic and viral proteins and is often associated with the BESS domain. It is likely that the MADF domain is more closely related to the myb/SANT domain than it is to other HTH domains. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.10 0.72 -3.79 116 1496 2012-10-04 14:01:12 2008-03-05 16:45:37 4 95 82 0 954 1699 0 82.30 20 23.59 CHANGED LIphl+ppPsLacppp..pYp......sps................t.+ppsWppluppl...s..hs....s..p.....s+p....+WpsLR...spap+ch.cphp...............tspthtspah....ahcphpFL .............................................LIphhcppPsLas...tpp...p.ap......spp..........................t.+p...psW..p...p...lu...ppl.......s.......hs............s..pp................hpp.+aps..LR.......sp.Yp+ch.+chp..................tttthhsph.....ahcphpFL..................................................................... 0 247 332 748 +10378 PF10546 P63C P63C domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 25.00 25.00 36.80 36.10 24.70 15.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.12 0.72 -4.05 12 137 2009-01-15 18:05:59 2008-03-05 17:02:07 4 6 134 0 16 88 3 89.00 38 30.96 CHANGED Ih-tFlucclpPahKpFPssaYcplFRLpGhpasscssp.RPthhGphTNcllYpRLAP...tlLEEL+cpssp.ttctt+.t+hHQhLTp-IGHPcL ............IhthFlhp-s.psWpKcF..sDshYctLaRhpGls..tcpss.+PhlauhloschIYs.lhP...plhs-lKtptsh.........tpKhHQaLssD.G........ 0 5 8 11 +10379 PF10547 P22_AR_N P22_AR N-terminal domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 25.00 25.00 27.80 27.70 23.50 23.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.66 0.71 -4.28 13 394 2009-01-15 18:05:59 2008-03-05 17:05:07 4 6 279 0 35 249 7 115.70 53 47.21 CHANGED lssp..sVsFpGppL.l..sVppsG..psYsuMKPIVEuhGLuWpSQapKLhpscstsshlcIsh.VuuDGKpRcMlClPL+KLsGWLhoIsPsKVRPElRDKlIpYQcEChcVLa-YWp.pGtA.....R ............hshhsVPFHGss..L.a..lVs.aN..G..EPY.VPMKPlVpGMGL...s...Wp...S...Q...hsKL.+pRFts.sls.E....Is.........h..VA..........p.D......GKpRpMlsLPLRKLsGWLpTI....NP....N...K.V....+.PE.........IR-KVIpYQEECDDVLY-YWT.KGhVhNsR................ 0 8 21 32 +10380 PF10548 P22_AR_C P22AR C-terminal domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. It is found associated with Pfam:PF10547. 21.00 21.00 21.10 21.30 20.80 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.28 0.72 -4.11 16 459 2009-01-15 18:05:59 2008-03-05 17:07:46 4 11 251 0 11 261 0 73.00 45 29.70 CHANGED pcp...ashpFT-pELppLsWLahuhp+spshhpplh.sLctlsSshusplYuhuhEYtpshcps+plLp+hstchc ..............pc+hssphoAcEhsoLVWLWpausRuQsLh+cLYPALKpIpSsYoG+sYDhuaEasYllshAR-VLlpcTcclD...................... 0 2 6 9 +10381 PF10549 ORF11CD3 ORF11CD3 domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 25.00 25.00 26.70 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.72 0.72 -4.24 9 414 2009-09-11 15:21:10 2008-03-05 17:11:40 4 6 250 \N 11 158 0 56.50 43 32.33 CHANGED phohppphNphstEacpcKslAShsG+sLscW.+hcKPhL.AclcphcpchQlplshh .......ShppEhNtsCt-hc+-KslAShsuptLNcW.+hsKPsllu+IcpLtppuQlhlshh.............. 0 3 5 8 +10382 PF10550 Toxin_36 Conantokin-G mollusc-toxin Finn R, Coggill P anon PROSITE_PS60025 Domain The conantokins are a family of neuroactive peptides found in the venoms of fish-hunting cone snails. They possess a high content of gamma-carboxyglutamic acid (Gla) (4-5 residues), a non-standard amino-acid made by the post-translational modification of glutamate (Glu) residue. Conantokins are the only natural biochemically characterised peptides known to be N-methyl-D-aspartate (NMDA) receptor antagonists. 25.00 25.00 26.70 26.00 19.40 18.50 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.13 0.74 -6.50 0.74 -2.29 3 3 2009-09-10 23:18:32 2008-03-05 17:15:11 4 1 3 1 0 3 0 14.70 76 19.21 CHANGED GEEElQKMspELhRE GEEEVAKMAAELARE 0 0 0 0 +10383 PF10551 MULE MULE transposase domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Babu and colleagues [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.32 0.72 -3.82 126 2654 2012-10-03 01:22:09 2008-03-05 17:40:22 4 142 229 0 1688 6089 517 90.00 18 13.95 CHANGED us.....ah....h...........hhs.......hhGhst......phh....l........uhslh.ss.............Estcs.ap...hhhp.....shp..psh................hlloDtppulhpAlpp..la............Pt..sp..pphChhHlh...csht ................................................................................................hhuhstpt.....phh....sh........u.hs.l.l..ps.......................................Estcs..at...........ahhp..................................................phh....phhst...........p...sh..sllsD.p.pt...u....hhpAlpp..sa............Pp.....sp....pthCh.hHlhpsh.h............................................... 0 642 1072 1371 +10384 PF10552 ORF6C ORF6C domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 21.90 21.90 21.90 21.90 21.60 21.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.26 0.71 -4.36 23 471 2009-01-15 18:05:59 2008-03-06 12:49:20 4 10 383 0 34 350 3 108.30 23 46.84 CHANGED ptltlhhps.cphccclpplcpclpcLc....pshsLhss-scplp+pVpppVsphLGG+tosAYpc..l+pKlapDIapplK+pFsVsoYssI++Kch-pAhchl.ssapssp..shptcIp .................................................h.hhhps..phppphpp.lcpc.......lptLc..................pp.sl...stsptp....plpptlpp+Vh....thhs.Gh.t..s.............sap.........c....hppp...lapp........h.hppl....KctFsV.spYspl.+pca-cAlpal...ppWpPsh..tht.pI................. 0 7 24 29 +10385 PF10553 MSV199 MSV199 domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 21.10 21.10 21.50 22.80 20.80 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.79 0.71 -4.29 16 37 2009-01-15 18:05:59 2008-03-06 12:58:01 4 10 5 0 0 37 36 134.10 32 36.17 CHANGED hlDIhp...FIctssa-hchh..hhpphWhshhs.......cpplhIosslLpaltYpsp...........app.+cs..FcchLcpNpIpa............................................pclpas-s.lpp..YsplpcEhcpls..sslspp+WlIlpscsFKhhIh+LNTpsuchIRcYYlplEcllp ...................hlDIhpFlchspaclshs..hFs.hW.slss.................ppphhlspslLcWhGYpGc...............hppQ+ps..Fp+hLcpNpIpY............................................pElshp-..lp...YsplpcElptls.ssslspp+alIhcscshKhuIMpLpTKsuchIRcYYlsLEcllp.......................... 0 0 0 0 +10386 PF10554 Phage_ASH Ash protein family Iyer LM, Bateman A anon Iyer LM Family This family was identified by Iyer and colleagues [1]. It includes the Ash protein from bacteriophage P4. 21.80 21.80 21.80 21.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.35 0.72 -3.71 19 596 2009-01-15 18:05:59 2008-03-06 14:21:45 4 7 351 0 35 460 0 95.40 36 41.17 CHANGED YospAstKouAGhssPphstApscA....usFh..ssths............................thplM..........VupAGtPpuhPsSh....................loGhusPVphsT.clsss.G..GshhphshEAA..................hMsTl............ohspsph .................................Ysh.AshKsuAG.hts.p.......st.cA......ssFh..shthp.......................................hhp.M..........VupAGt..spshPsSh....................hsGhusP.Vphs.T.p.clssS.G..Gs..hphhhEAA..................hMA.Th.......s.spsp.................................................................................... 0 1 8 20 +10387 PF10555 MraY_sig1 Phospho-N-acetylmuramoyl-pentapeptide-transferase signature 1 Finn R, Coggill P anon PROSITE_PS01347 Domain Phospho-N-acetylmuramoyl-pentapeptide-transferase (EC 2.7.8.13) (mraY) is a bacterial enzyme responsible for the formation of the first lipid intermediate of the cell wall peptidoglycan synthesis. It catalyses the formation of undecaprenyl-pyrophosphoryl-N-acetylmuramoyl-pentapeptide from UDP-MurNAc-pentapeptide and undecaprenyl-phosphate. It is an integral membrane protein with probably ten transmembrane domains. This domain is located at the end of the first cytoplasmic loop and the beginning of the second transmembrane domain. 20.10 11.30 20.10 18.00 20.00 11.10 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.95 0.74 -5.85 0.74 -4.24 64 3122 2009-01-15 18:05:59 2008-03-06 15:12:43 4 4 3072 0 644 1899 1136 13.00 74 3.72 CHANGED KpuTPTMGGllll ..KpGTPTMGGllIL. 0 203 418 541 +10389 PF10557 Cullin_Nedd8 Cullin protein neddylation domain Finn R, Coggill P anon PROSITE_PS01256 Domain This is the neddylation site of cullin proteins which are a family of structurally related proteins containing an evolutionarily conserved cullin domain. With the exception of APC2, each member of the cullin family is modified by Nedd8 and several cullins function in Ubiquitin-dependent proteolysis, a process in which the 26S proteasome recognises and subsequently degrades a target protein tagged with K48-linked poly-ubiquitin chains. Cullins are molecular scaffolds responsible for assembling the ROC1/Rbx1 RING-based E3 ubiquitin ligases, of which several play a direct role in tumorigenesis. Nedd8/Rub1 is a small ubiquitin-like protein, which was originally found to be conjugated to Cdc53, a cullin component of the SCF (Skp1-Cdc53/CUL1-F-box protein) E3 Ub ligase complex in Saccharomyces cerevisiae, and Nedd8 modification has now emerged as a regulatory pathway of fundamental importance for cell cycle control and for embryogenesis in metazoans. The only identified Nedd8 substrates are cullins. Neddylation results in covalent conjugation of a Nedd8 moiety onto a conserved cullin lysine residue [1]. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.12 0.72 -4.15 109 1474 2009-01-15 18:05:59 2008-03-11 09:11:25 4 25 308 28 994 1441 15 66.30 42 8.55 CHANGED lpp-RphhlpAuIVRIMKsRKplsHspLlsElhpplpp...+Ftssss.IK+pIEpLI-+EYlcRs..psts .....................tpDRphtlpA.A....IVRIMKsRKp.lsHspLlsEl..............hpQLpt...........+Fh..Pp...sshI.K.........KpIEsLI-+-YlcRscss..t.................. 0 369 551 808 +10390 PF10558 MTP18 Mitochondrial 18 KDa protein (MTP18) Tolvanen M, Bateman A anon Tolvanen M Family This family of proteins are mitochondrial 18KDa proteins that are often misannotated as carbonic anhydrases. It was shown that knockdown of MTP18 protein results in a cytochrome c release from mitochondria and consequently leads to apoptosis [1]. Overexpression studies suggest that MTP18 is required for mitochondrial fission [2]. 25.00 25.00 61.10 35.40 22.60 22.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.83 0.71 -4.34 24 236 2009-09-11 16:45:10 2008-03-11 10:46:53 4 3 206 0 171 223 16 155.30 37 70.83 CHANGED oslRYAu.h.Rhtp.........uYssDlGEuFRPls.shLV+uuYGVSauYlluDls...cuhKshhcscthhs..............................................................................hpchph.ssscpslaQulAShulPuFTIHolVpaSuhhhpps..+........shh.RpWsPsulGLuslPhLsa.hDcs......V-psl .....................................................h...RYluYss-lGEuFRsls.shlV.......puuYGVuhuYlluDss..pcGhKAhhpspt.hs....................................................................................pchth...ssscphlaQulAShulPuFTIp.plsthSthhhpss......+.............shh..RpWsP..sulGLuslPhl.a.hDcsV-th........................................ 0 59 93 140 +10391 PF10559 Plug_translocon Plug_Sec61p; Plug domain of Sec61p Coggill P anon Finn R Domain The Sec61/SecY translocon mediates translocation of proteins across the membrane and integration of membrane proteins into the lipid bilayer. The structure of the translocon revealed a plug domain blocking the pore on the lumenal side.The plug is unlikely to be important for sealing the translocation pore in yeast but it plays a role in stabilising Sec61p during translocon formation. The domain runs from residues 52-74 [1]. 22.10 22.10 22.70 22.20 21.80 21.30 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.42 0.72 -4.44 68 686 2009-01-15 18:05:59 2008-03-13 11:30:10 4 8 470 12 424 593 89 34.60 52 7.47 CHANGED llYhlhoplPLYGl.....sssssDsFhhhRslh.AuspG ........hIFLVhs.QlPLaGI........hSocouDPFYWhRsIL.ASNRG.. 0 141 242 351 +10393 PF10561 UPF0565 Uncharacterised protein family UPF0565 Coggill P anon UPF0565 Family This family of proteins has no known function. 25.00 25.00 25.10 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.93 0.70 -5.53 12 182 2009-01-15 18:05:59 2008-04-16 15:38:41 4 5 101 0 127 176 0 192.10 24 72.60 CHANGED shpl.slsGhp..pRsNsllhhtPhhp...............spp.ssptllaFsGDhpsa.pp......hppsssssp.hpasLEslAhlLsp+Fsts......HIhVl+sSph..spFSsasNFlpussh............................................GsPcp...o.sthtuhpHLhpLL.shhpclhp...t.ph..p.h......................................................................................................................................................................................s.shtchpLhLlGFSKGCsVLNQllhEhp.......................ht..ssphphhlspIsshYaLDsGaststtsalTppssLcpLupt......ulplalHsTPaQlpDshRsWI++EhcpFlchLcphuhsh ...............................................................................................................................sht..h.hl..th..t.......hhhlcs..h.....t....husapshl.ss..............................................G.s..............u...th...lh.t.........................................................................................................................................................................................................................................................h.lhGFSKGssVlsphhhEht.................................................................th....h.ht.lpthaalDsGhs.t.tsa.s..thlpthsp.........th.hhhHhTPhphts...Rshlt.E.t.h.phLt......s............................... 0 47 64 100 +10394 PF10562 CaM_bdg_C0 Calmodulin-binding domain C0 of NMDA receptor NR1 subunit Coggill P anon Pfam-B_7118 (release 22.0) Domain This is a very short highly conserved domain that is C-terminal to the cytosolic transmembrane region IV of the NMDA-receptor 1. It has been shown to bind Calmodulin-Calcium with high affinity. The ionotropic N-methyl-D-aspartate receptor (NMDAR) is a major source of calcium flux into neurons in the brain and plays a critical role in learning, memory, neural development, and synaptic plasticity. Calmodulin (CaM) regulates NMDARs by binding tightly to the C0 and C1 regions of their NR1 subunit. The conserved tryptophan is considered to be the anchor residue [1]. 20.20 20.20 20.20 23.20 18.10 19.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.85 0.72 -7.01 0.72 -4.09 4 144 2009-01-15 18:05:59 2008-04-16 17:43:46 4 12 71 0 48 149 0 28.80 70 3.30 CHANGED IsYK+H+shKcKphELA+pAsD+WRuNIp .IAYKRHKDARRKQMQLAFAAVNVWRKNLQ... 0 11 15 32 +10395 PF10563 CdCA1 Cadmium carbonic anhydrase repeat Bateman A, Coggill P anon Bateman A Repeat This domain is the cadmium carbonic anhydrase repeat unit of the beta-carbonic anhydrase of a marine diatom [1], that uses both zinc and cadmium for catalysis of the reversible hydration of carbon dioxide for use in inorganic carbon acquisition for photosynthesis (thus being a cambialistic enzyme). Compared with alpha- and gamma-carbonic anhydrases that use three histidines to coordinate the zinc-atom, this beta-carbonic anhydrase has two cysteines and one histidine, and rapidly binds cadmium [2]. 20.80 20.80 21.80 30.80 19.70 20.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.41 0.70 -4.83 18 25 2009-01-15 18:05:59 2008-04-16 17:46:55 4 3 10 6 16 27 2 195.00 32 65.05 CHANGED hWpuulsssssahplhs.+hshs-hD.....cssDG.s.ss..sphstP......................................+hLushh.hsspcGhRsp+h.cstsp.ss.................psGcs.pVHhsHSu...hhuCG....Ypp.ahsGhFsshs..................hssasu..GutsltsAsGV...l.slVs..-EpYhY.shlcGhhl-sctp.....................cpp.lssuauslpaplDtpsahIoAuu .............apuuhssVNlHaHlGsEHhShG-aD.....css-G.s.ss..sphsts......................................+hhs..uhtshs...hscGh+shchstssst.ss........YtapaC.thpVGcTYEVHWsHSu...hGACG....Ypp.ahsGVFsshs..................hsshsu.tus...tsltsulGVpuQVaslVNt.-E..p.hhY.shlcGhhl-ssts.......t-hshY.GSTTGpt.csN.-lCsua.usloWplDRpCHhloAuu. 0 11 15 16 +10396 PF10564 MAR_sialic_bdg Sialic-acid binding micronemal adhesive repeat Coggill P anon Chen Z Repeat This domain is a novel carbohydrate-binding domain found on micronemal proteins. Micronemal proteins (MICs) are released onto the parasite surface just before invasion of host cells and play important roles in host cell recognition, attachment and penetration. Toxoplasma gondii can infect and replicate within all nucleated cells [1]. This domain interacts with sialylated oligosaccharides; the protein in Toxoplasma gondii is a monomer but several MAR domains are carried on the protein. Each MAR domain contains one central sialic acid-binding pocket [2]. 25.00 25.00 26.00 25.20 24.80 24.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -10.82 0.72 -4.23 17 151 2009-01-15 18:05:59 2008-04-16 17:48:43 4 12 14 13 55 153 0 94.00 27 32.00 CHANGED LDpaC.sphuppsspsshp.hh........hhhAR.t.ttptttsh.WRCYspsphphstst.....ClDsCGshhs..C.uslpt.ssphhs+s....th.plIsppt......................hC .........................lDthC.tph.tthstphhp.shsss......shVARhsh.s.psup..pst..WRCYstppLshstps.t...ClDsCGshhs..C.Gs.....lstsssp....ahTtp....pl.phIppt........................................ 0 34 36 54 +10397 PF10565 NMDAR2_C N-methyl D-aspartate receptor 2B3 C-terminus Coggill P anon Pfam-B_53396 (release 22.0) Domain This domain is found at the C-terminus of many NMDA-receptor proteins, many of which also carry the Ligated ion-channel family Pfam:PF00060 further upstream as well as the ANF_receptor family Pfam:PF01094. This region is predicted to be a large extra-cellular domain of the NMDA receptor proteins, being highly hydrophilic, and is thought to be integrally involved in the function of the receptor. The region also carries a number of potential N-glycosylation sites [1]. 25.00 25.00 32.80 31.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 681 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.28 0.70 -5.85 11 229 2009-01-15 18:05:59 2008-04-16 17:49:41 4 14 86 4 78 194 0 403.00 39 42.23 CHANGED HLFYWpLRaCFhGVCSG+PGLlFSISRGIYSCIHGVpIE-K...thsSPoh....shssopSNhL+LLRoAKsMssloslNG....SP+sAh-Fh+p..cSulhDhupc+tshstsc.....s.sa.sc-shFuD.hS-lcpphuNlphccsNsYtcp.hpHthshst...........pPpSlGuuuSh-Gshs..................sopPRulh....KKslDhlttpsssssss.ptthssh.G+.sashKS.........sRYh.......st-..sh+SDlSDhSo+ssoYts.Eus..t+++pph+DoLKKRsuouK..R-hsElEL.......................shh+s.......spuus..cphYhh.pDKEslpsFalDQh+s+EGss..h-clDhs-hauscussh+pssss.........hthsoShlphc-h.....p....tpthspthshuspsps......sstussthuss..aCRSChS..............................phsuauuhs..tpsspusshRC-uCp.+sGNLYDISEDp.Lpct.........................tpsttupsshstthsQssstphp+pptttpLpRQHSYDsh.l-ht+Ets........................h.ssRSVSLK-K-Rah-Gsshh....................................h......utchhss+ssthhuucsttt.p.................sst..............hphSKSLYPD+sopNPFlsTasD.......D...QpLlHGupua.hhKpphs.............httpsps.tposhussSassssu..........Rhsp-lslu.....ppshshsssp...h.......ssPRshNuu..oNs+VYcKlsSIESDV ...................HLhYWphRash...po.sp.sh..lh..uhSRGIYSChpGVt...tp.....tsPsh....shs.spuNhL+hLpsAKshssh..sshps....S.ppshchh..................................................................................................................................................................................................................DhS-hSs+sssahp.-s......+p.p.+ssh.KRs.su+..+-.s.................................................tt.p..pp.a...t-pt...sh.....p..Es.sh..Epl-hs-.ap-ts-sh......++ssss.........h..sps..p.ccs.............hhtpthshtthsts.......s-hppp.tts..aCRSC.S..............................ph.sYus............p.shpssh+C-uCh.+hGNLYDIsEDp.Lp-h................................tts.t..tthsps.s.phtp.tp...p.lpRQHS.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 3 13 31 +10398 PF10566 Glyco_hydro_97 Glycoside hydrolase 97 Coggill P anon Naumoff D Domain This domain is the catalytic region of the bacterial glycosyl-hydrolase family 97. This central part of the GH97 family protein sequences represents a typical and complete (beta/alpha)8-barrel or catalytic TIM-barrel type domain. The N- and C-terminal parts of the sequences, mainly consisting of beta-strands, form two additional non-catalytic domains [1]. In all known glycosidases with the (beta-alpha)8-barrel fold, the amino acid residues at the active site are located on the C-termini of the beta-strands [2,3]. 28.00 28.00 28.10 28.10 27.90 27.80 hmmbuild --amino -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.74 0.70 -5.30 173 795 2012-10-03 05:44:19 2008-04-16 17:52:05 4 17 278 12 178 849 187 272.00 35 40.12 CHANGED D.s.SWIK.PGKhsas.WWshtss............................tu...s...sscshKpYIDFAAchGhcalLl.........DtGWt.................sh..ps.sc....h-lpcLspYA+pKGltlhLWhppps...............hcpp.h.-cthphhpchGlpGlKsDFhsp...................-sQhhlpaYpcllcpAAca+lhVshHG.sh+PsGhpRTYPNhlspEuV+GtE..pths.......sst.spHsshLPFTRhluGPMDYTPGhhp.tht..................pspspsT...hu+QLAhaVlh.SPlQMh..uDtPpsY.....pp.......shpFlcslP ..................................DsSWIKPG.Khshs.WWphhss.......................t.s.ptuh...sspshKcYIDFA.Ap.....pGh-hlLl....stGWp....................t..hshhp.shs-...........hDlppLscYA+pKG..lhLhhpac..s..................................h-pp...h.-cthphhpchGlpulKss..ahss..........................ssQhhssaYhcllccAAca+lhVshHt.sh+P.oGlpRTYPNhlspEus+GtEh....ptas........sspspHsshLPFTRhluGPMDYTPGhhppthp................spsts.poT.hu+QLAhaVlh.SPlQMh..ADhPpsYtct.........sshpFlcsVP.................................................................................... 0 71 152 170 +10399 PF10567 Nab6_mRNP_bdg RNA-recognition motif Griffiths-Jones S, Coggill P anon Pfam-B_57293 (release 21.0) Domain This conserved domain is found in fungal proteins and appears to be involved in RNA-processing. It binds to poly-adenylated RNA, interacts genetically with mRNA 3'-end processing factors, copurifies with the nuclear cap-binding protein Cbp20p, and is found in complexes containing other translation factors, such as EIF4G as in Swiss:P39935 and Swiss:P39936. 25.00 25.00 31.10 31.10 24.60 19.60 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.09 0.70 -5.44 5 25 2009-01-15 18:05:59 2008-04-16 17:54:03 4 4 24 0 15 24 0 317.80 46 30.36 CHANGED INYKVLPKGcDsY+TRSLLFENV-..+-lDLHsFl+pFVKauPlESlYLIcsss...............csscsh-sDscN.............................pSILLSFLTKssCLDFYNNlLQRLSEFKTpL+SscLolSFVslpY.spp............sDEEush...p.hl..suLchNIlo+GATRSIsVEFpssVp..cpDhlpcKLPFLcsScNKRYILEuVDlINA--ssssFPpNYsILTFLNISMAlEVhDaL+t...p+shuIS+ChFVolss.h..................p+.Ss.Ssssssssp.psscosSVSshSt...................lSLossooslS..LscElDs....ht.lcL-tpsLplshS-Y.pPhIEpHscHL ........IsYKlLPp....GDDsYRTRSLLhENV-..cSIDLHShlcsFVK.sslESsYLIcus................psscsp-s-scs.............................hSILlSFLTKusCLsFYNNlLQR...LSEFKThLKS-uLsLpFVsLsYcscshsp..tt.......................t.p.NsEEsDl....s.hl.suSL+aNIsN+sATRSIhIEFc.osVc......KsD.LhcKKL.pFL.cpucNKRYILESIDLVNs-ss.sspFPcNYAlLTFLNISMAlEVLDYLKph..o+sLGISKChaVSlsshs.p..............ss++hSs.SsssusNscossssssspSshSs...........................S.hSLoSh.uSsVS..Ls-ElDhhs.pKLpulcLcsphLplshp-YpsPpIEpHosHL........ 0 1 6 13 +10400 PF10568 Tom37 Outer mitochondrial membrane transport complex protein Wood V, Coggill P anon Pfam-B_30563 (release 22.0) Family The TOM37 protein is one of the outer membrane proteins that make up the TOM complex for guiding cytosolic mitochondrial beta-barrel proteins from the cytosol across the outer mitochondrial membrane into the intramembrane space. In conjunction with TOM70 it guides peptides without an MTS into TOM40, the protein that forms the passage through the outer membrane [1]. It has homology with Metaxin-1, also part of the outer mitochondrial membrane beta-barrel protein transport complex [2]. 21.50 21.50 21.50 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.44 0.72 -3.71 17 336 2012-10-03 14:45:55 2008-04-16 18:03:22 4 12 211 0 210 319 1 67.70 30 20.00 CHANGED LHlWGhshslssIsspClAshahhphshspp..........hplVtSsNsslSsoscLPsLhs..ssphls....GatsIlpaLp ..........lahWus..s..au.LPSlcspsLAlhsYh+hssss...........hclhtpsN.s.h.hSPoGcLPhLps.....ss....ph..lo......thtpIlpaL......................... 0 40 81 150 +10401 PF10569 Thiol-ester_cl Alpha-macro-globulin thiol-ester bond-forming region Finn R, Coggill P anon PROSITE_PS00477 Domain This short highly conserved region of proteinase-binding alpha-macro-globulins contains the cysteine and a glutamine of a thiol-ester bond that is cleaved at the moment of proteinase binding, and mediates the covalent binding of the alpha-macro-globulin to the proteinase. The GCGEQ motif is highly conserved. 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.30 0.72 -4.72 83 1638 2009-09-13 17:40:59 2008-04-17 09:17:48 4 94 646 55 611 1531 44 29.30 44 2.09 CHANGED sLspLlphPtGCGEQsMlphsPslhshpY.Lc ........tlppLlphPaGCGEQsMsthsPslhshpYLp......... 0 121 201 399 +10402 PF10570 Myelin-PO_C Myelin-PO_N; Myelin-PO cytoplasmic C-term p65 binding region Finn R, Coggill P anon Pfam-B_1437 (release 22.0), PROSITE_PS00568 Domain Myelin protein zero is the major myelin protein in the peripheral central nervous system and is essential for normal myelination. The family is a single-pass transmembrane molecule containing one Ig-like loop in the extracellular domain and this highly basic 69 residue C-terminal cytoplasmic domain which is the region that interacts with protein p65 [2]. 25.00 25.00 29.80 29.30 22.10 21.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.35 0.72 -3.91 4 47 2009-01-15 18:05:59 2008-04-17 09:23:04 4 3 30 0 19 35 0 66.80 75 27.58 CHANGED VRaCWLRRQuhLQRRLSAhE+GKLp+.AKDuSKR.uRQsPVLYAMLDpSRSsKuASEKKuKGh.GESRKDKK ............lRYCWLRRQAALQRRLSAMEKGKLHKsuKDuSKR.GRQTPVLYAMLDHSRSTKAASEKKuKGL.GESRKDKK..................... 0 1 2 4 +10403 PF10571 UPF0547 Uncharacterised protein family UPF0547 Coggill PC anon UPF0547 Domain This domain contains a zinc-ribbon motif. 27.30 27.30 27.30 27.40 27.20 27.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.04 0.72 -7.45 0.72 -4.24 31 219 2012-10-03 10:42:43 2008-04-17 09:30:18 4 12 202 0 65 235 57 25.80 44 10.55 CHANGED KpCPpCsppl........shusphC....shCGasFpt ...+pCPcCpppl........stuscpC....PpCGatFh.p... 0 21 30 44 +10404 PF10572 UPF0556 Uncharacterised protein family UPF0556 Coggill PC anon UPF0556 Family This family of proteins has no known function. 25.00 25.00 31.70 30.30 21.10 16.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.09 0.71 -5.04 6 52 2009-01-15 18:05:59 2008-04-17 09:36:22 4 5 40 0 36 49 0 142.50 48 69.88 CHANGED MAs.............hs.lh.hhhhh.sspuhpcspohEFDlRPGG.laoFopslt...cYpCsFTYAuQGGTNEpW.MSlGLS-DsphFSCSlWRPQGKSYLFFTQFKAEl+GAKIEYupAYSQsussupuDVPLKsEEapVo-osVopRsGpF+upLuKLsllu+spHDEL ................h...............hh.hhhhh........tusp.p........spohtFDl+PGG..lcoFopslt......capChFTYAuQ..GGT.NEpWpMolGhS-DpphFoCo..lWR...P...Q.G.KSYLaFTQFKAEl+G.AcIEYuhAYSpsuhttpuDVsLKsEEFpVoc..o..sVspRsGtF+ucLSKLslVA+t..t+sEL......................... 0 13 16 22 +10405 PF10573 UPF0561 Uncharacterised protein family UPF0561 Coggill PC anon UPF0561 Family This family of proteins has no known function. 25.00 25.00 25.40 25.90 22.80 24.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.65 0.71 -4.28 5 61 2009-09-10 19:05:18 2008-04-17 09:38:23 4 4 45 0 43 47 0 107.90 50 52.62 CHANGED MEs.u-upGcuVphKPGGRLDMSHGFVHHIRRNQIARDDYDKEVKQ.AKEKQRRRHTssPRRPRRPDLQVYpPRpRpGSp.ssss-sEEpsESoSo.poEsEssGspLFpLDYEADuG-ITSlIVaK ...........................thp.KPGGRLDMsHGFVHHIRRNQlARDDYDKcVK......Q..AKEchR+R+T...suPpR....P..R+P..D.....l.QVYhPR+R..cs.o...tt.s..ss-hEE...ssESoSS.to-.EspuppLFpL-YpADuGclTSlIVap........................................................ 0 10 13 23 +10406 PF10574 UPF0552 Uncharacterised protein family UPF0552 Coggill P anon UPF0552 Family This family of proteins has no known function. 22.20 22.20 22.40 23.70 19.90 22.00 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.40 0.70 -4.89 5 71 2009-01-15 18:05:59 2008-04-17 09:41:11 4 4 46 0 43 55 0 196.30 55 88.99 CHANGED MSRIYpsTALpNKsVHsE+asGoW-PusaQuG-GVLLEGpLlDaSRHoIoDsKs+.......KERYYVLYIRPSRIHRRKFDsKGNEIEPNFSDTKKVNTGFLMSSY............................KVEAKG-SDRLot-QLssLV...NKspLl.......KIT-+H.sP+ETaAFWhPEuEM-KTELEsGpEVRLKT+GDGPFlFSLAKlDSGTVTKCNFAGDupAGASWTDNIMApKSspssu.ScspuQGDGA-DDEW ..................MShIYpsssLpscsVps.phsusWsPs..t..aQuGsGVLLEGpLh.DlSRHsI.Dspsp.......KpRaaVLYlcPuplH+R+FDs+GpEIEPNFSsT+KVNTGaLMSSY............................KVEAKG-oDRLo.-tLpsLV....sKs-.LL.......ulTpph..oPspolAFWhPEuEM-thELElGstVRLKT+GDGPFl-SLAKL-uGTVT.K.CNFA.....G...DsKTGASWTDNIMApKsocsss..sE...h.+p.QGDGA-D-EW....... 0 18 21 26 +10408 PF10576 EndIII_4Fe-2S Iron-sulfur binding domain of endonuclease III Finn R, Coggill P anon PROSITE_PS00764 Domain Escherichia coli endonuclease III (EC 4.2.99.18) [1] is a DNA repair enzyme that acts both as a DNA N-glycosylase, removing oxidised pyrimidines from DNA, and as an apurinic/apyrimidinic (AP) endonuclease, introducing a single-strand nick at the site from which the damaged base was removed. Endonuclease III is an iron-sulfur protein that binds a single 4Fe-4S cluster. The 4Fe-4S cluster does not seem to be important for catalytic activity, but is probably involved in the proper positioning of the enzyme along the DNA strand [2]. The 4Fe-4S cluster is bound by four cysteines which are all located in a 17 amino acid region at the C-terminal end of endonuclease III. A similar region is also present in the central section of mutY and in the C-terminus of ORF-10 and of the Micro-coccus UV endonuclease [4]. 20.50 19.60 20.50 19.60 20.40 19.50 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.58 0.73 -6.71 0.73 -3.58 505 3890 2009-01-15 18:05:59 2008-04-17 09:52:34 4 11 2745 20 789 2381 391 17.00 50 6.34 CHANGED Cps+.pPpC....stCPlp..sh.C CpAR.KP+C....shCslp..ch.C..... 0 236 509 651 +10409 PF10577 UPF0560 Uncharacterised protein family UPF0560 Coggill P anon UPF0560 Family This family of proteins has no known function. 19.40 19.40 19.40 19.50 19.30 19.10 hmmbuild -o /dev/null HMM SEED 807 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.66 0.70 -13.35 0.70 -6.28 7 258 2009-01-15 18:05:59 2008-04-17 09:54:33 4 17 49 0 134 241 0 425.80 28 87.35 CHANGED lplKVQV.DsushpPLAsAsVElFuNpoul.ASGsTsuDGsshlthsY+LGo.llVoAoKpuYVsNSlPW+spRLPlauSVSLhLLPERsATLhlYEDlVpIluG..GARsQPhVpFQR+AlpLP.ssoYssLoAhLTsAuo..chcuFPahhGh-.uNuo.Gssshh-LsPlAAlSVHLhsusGspl.lsGPIpVSlPLPscss.LptsspVPAWRFD.phGsWl+sGhGhl+c-GsQLhWTaluPQLGYWlAAhsPspsG.lhsp....DIsoYHTlFLLuILGuhAlllLlLLClLLYYCRR+CLKPpppHRKLpLs.sL-..K+DQATShS+lNLl.....................................osts.ts..ss.L+...sopcDhspp..tphlpHpccspoth.....................tph-.a.hKuppsut.ppt.t.h.p--.ppuhsoh.sp....cp..tutusttphussss.sh.........t..tpths-u+ss-hhhopSlDpLpRPo..sh.opPGQlIhCuSlDplp-us.YRpshPTLVIPAHYh+LPuEtshsupshh.pspppp-htshpst.tpsa.........Q..stt.ptQthusppupsupuppW.u..ssshutSVoIPsshN-ushAQh.NuElQhLTEKpLhELGs...hPHPRAWFVSLDGRSNApVRHSYIDLQpuspstS......................sDASLDSGVDhNE.+suR+hc...............hpE+pt.pt.tssss....huhopLlYhEDh-.SuSEstsshs.SPEDsuLpslL-tuspsphsphsphtcpps+possSshps.sp.ppc.stpsptc.s-pst--tspsKKSPWQKREERPLhsFN .............................................................................................................................................................................................................................................................s........h.......................h...h....s..h.hphsh.......................ss.h.ha.ap.t...............G..W.pts.s.l............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 18 26 57 +10410 PF10578 SVS_QK Seminal vesicle protein repeat Finn R, Coggill P anon PROSITE_PS00515 Repeat \N 25.00 2.00 26.50 2.10 18.20 1.70 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.92 0.74 -5.80 0.74 -3.09 13 217 2009-01-15 18:05:59 2008-04-17 10:01:42 4 4 4 0 105 234 0 12.20 53 57.33 CHANGED uQlKSFGQhKSp ....uQ.Ko....FuQpKo.. 0 92 92 92 +10411 PF10579 Rapsyn_N Rapsyn N-terminal myristoylation and linker region Finn R, Coggill P anon PROSITE_PS00405 Family Neuromuscular junction formation relies upon the clustering of acetylcholine receptors and other proteins in the muscle membrane. Rapsyn is a peripheral membrane protein that is selectively concentrated at the neuromuscular junction and is essential for the formation of synaptic acetylcholine receptor aggregates. Acetylcholine receptors fail to aggregate beneath nerve terminals in mice where rapsyn has been knocked out. The N-terminal six amino acids of rapsyn are its myristoylation site, and myristoylation is necessary for the targeting of the protein to the membrane [1]. 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.47 0.72 -4.18 4 91 2012-10-11 20:01:02 2008-04-17 10:43:49 4 29 73 0 53 118 3 78.90 50 17.87 CHANGED MGQDQTKQQIEKGL+LYQuNpTpKALclWtpVLc+To-.sG+FRlLGCLITAHSEMGKY+-MLcFultQlssAREh-DP- ..........MGQcQs.KQpIE+.GLp.LY.puN..ppccALpsWp+sL...c...K...os.c.h...t.s.RF...p...lLGtL.hpAHsEhG+Y+-MLcFAlhQl-hAcELEDss................. 1 12 15 35 +10412 PF10580 Neuromodulin_N Gap junction protein N-terminal region Finn R, Coggill P anon PROSITE_PS00412 Domain \N 18.50 18.50 22.20 22.20 15.90 15.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.51 0.72 -4.30 3 72 2009-01-15 18:05:59 2008-04-17 10:48:03 4 3 46 0 26 57 0 28.50 80 13.39 CHANGED MLCCIRRTKPVEKNEEADQKIEQDG..IKPEDKA .......LCChRRTK.pVEKN--.DQKIEQDG..IKPEDKA.. 1 1 3 9 +10413 PF10581 Synapsin_N Synapsin N-terminal Finn R, Coggill P anon PROSITE_PS00415 Domain This highly conserved domain of synapsin proteins has a serine at position 9 or 10 which is a phosphorylation site. The domain appears to be the part of the molecule that binds to calmodulin [3]. 25.00 25.00 25.50 25.50 22.50 21.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.51 0.72 -4.13 5 136 2009-01-15 18:05:59 2008-04-17 11:41:42 4 5 42 0 56 160 0 31.30 77 6.20 CHANGED MNYLRRRLSDSuFluNLPNGYMsDLQRP-PPu .......MNaLRRRLSDSoFhANLPNGYMoDLQRP-ss....... 0 5 11 28 +10414 PF10582 Connexin_CCC Gap junction channel protein cysteine-rich domain Finn R, Coggill P anon Pfam-B_1437 (release 22.0), PROSITE_PS00408 Domain \N 22.50 22.50 23.20 24.60 22.10 22.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.22 0.72 -4.16 76 1238 2009-01-15 18:05:59 2008-04-17 11:42:18 4 11 126 6 649 1009 0 66.10 49 21.43 CHANGED hEluFlhsQYhLY.GFplsslahCs.ptPC...Pps.VDCFlSRPTEKTlFllFMhslos...lsllLNlhElhaL ..........................................hEluFlhsQYhLY..GF.p..lssla.pCs......p.............PC.............Pp.s..VDCFlSRPTEKTlFllFMhslus...lsllLNlhElhaL............ 0 56 121 321 +10415 PF10583 Involucrin_N Involucrin of squamous epithelia N-terminus Finn R, Coggill P anon Pfam-B_7423 (release 22.0), PROSITE_PS00795 Domain This is the N-terminal three beta strands of involucrin, a protein present in keratinocytes of epidermis and other stratified squamous epithelia. Involucrin first appears in the cell cytosol, but ultimately becomes cross-linked to membrane proteins by transglutaminase thus helping in the formation of an insoluble envelope beneath the plasma membrane [1].\ Apigenin is a plant-derived flavanoid that has significant promise as a skin cancer chemopreventive agent. It has been found that apigenin regulates normal human keratinocyte differentiation by suppressing it and this is associated with reduced cell proliferation without apoptosis [2]. The downstream part of the protein is represented by the family Involucrin, Pfam:PF00904. 21.50 21.50 77.20 77.20 18.70 18.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.66 0.72 -3.67 9 42 2009-09-11 05:37:53 2008-04-17 11:45:25 4 19 29 0 11 37 0 68.00 69 15.11 CHANGED MSQQHTLPVTLPPALSQE.LKsVSPPssTQQEQhKQPTPLPsPCQKV.SELPscVP.cHtEKHso.VKG MSQQHTLPVTLsPALSQE.LKTVsPPssTQQEQhKQPTPLPsPCQKVssELPVEVPsKpEEKHhTsVKG.. 0 1 1 1 +10416 PF10584 Proteasome_A_N Proteasome subunit A N-terminal signature Finn R, Coggill P anon PROSITE_PS00388 Domain This domain is conserved in the A subunits of the proteasome complex proteins. 23.00 23.00 23.10 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.30 0.72 -6.55 0.72 -4.61 74 2896 2009-01-15 18:05:59 2008-04-17 11:46:05 4 19 554 763 1805 2718 83 22.80 63 9.05 CHANGED YDpshosFSP-GRLaQVEYAhcA .......YDpslTsFSP-GRLaQV.....EYAhcA... 0 629 1011 1482 +10417 PF10585 UBA_e1_thiolCys Ubiquitin-activating enzyme active site Finn R, Coggill P anon PROSITE_PS00865 Domain Ubiquitin-activating enzyme (E1 enzyme) activates ubiquitin by first adenylating with ATP its C-terminal glycine residue and thereafter linking this residue to the side chain of a cysteine residue in E1, yielding an ubiquitin-E1 thiolester and free AMP. Later the ubiquitin moiety is transferred to a cysteine residue on one of the many forms of ubiquitin-conjugating enzymes (E2) [1]. This domain carries the last of five conserved cysteines that is part of the active site of the enzyme, responsible for ubiquitin thiolester complex formation, the active site being represented by the sequence motif PICTLKNFP [2]. 19.90 19.90 19.90 20.20 19.80 19.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.45 0.72 -4.57 65 1276 2009-09-11 07:00:39 2008-04-17 11:47:32 4 44 343 39 841 1263 19 45.50 39 6.15 CHANGED thTEsYssstcsspc..s...hPlCTl+shPpphpHCIpWA+.thFpphFs ............................hTpsYp..s..s.-..sspc...p......hPhCTl+shPpp..-HCI..pWA+..hFpphF............ 0 310 475 688 +10419 PF10587 EF-1_beta_acid Eukaryotic elongation factor 1 beta central acidic region Finn R, Coggill P anon Pfam-B_9497 (release 22.0), PROSITE_PS00824 Domain \N 25.70 25.70 25.80 25.90 25.60 25.60 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.64 0.72 -7.26 0.72 -3.79 37 495 2009-01-15 18:05:59 2008-04-17 12:56:35 4 11 254 0 245 487 0 27.50 62 9.97 CHANGED LFGSD-E.EDcEAc+l+p.cRLAtYstKK ..LFG.S.D.-..E.-..ED.tEAt+l+E.ERLtpYspKK.. 0 77 120 191 +10420 PF10588 NADH-G_4Fe-4S_3 NADH-ubiquinone oxidoreductase-G iron-sulfur binding region Finn R, Coggill P anon Pfam-B_202 (release 22.0), PROSITE_PS00642 Domain \N 20.10 20.10 20.10 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.03 0.72 -4.72 191 4123 2009-01-15 18:05:59 2008-04-17 12:59:17 4 128 3222 15 1243 3421 1974 41.00 43 5.37 CHANGED R+sllElLLssH....sh-CssC-.psGpCcLQchuhchGlpct+a .......RcslhEhLLtNH....PL.DCslC-.puGcCcLQ.-huhthGhsppRa......... 1 463 862 1067 +10421 PF10589 NADH_4Fe-4S NADH-ubiquinone oxidoreductase-F iron-sulfur binding region Finn R, Coggill P anon Pfam-B_339 (release 22.0), PROSITE_PS00645 Domain \N 20.70 20.70 20.80 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.35 0.72 -4.81 299 3722 2009-01-15 18:05:59 2008-04-17 13:02:48 4 63 2661 15 1286 2965 2211 45.80 42 9.34 CHANGED Vc.hspphhc.Fh...tcESCGpCTPCRtGot.hhhcllc+.l.tp...G..cu....p....................tpDl ............Vphspph.c..Fa.....t+ESCGpCTPCR-Gos.ahhcllc+.l..pp..Gcup.tDl........................... 0 477 891 1106 +10422 PF10590 PNPOx_C Pyridoxine 5'-phosphate oxidase C-terminal dimerisation region Finn R, Coggill P anon Pfam-B_685 (release 22.0), PROSITE_PS01064 Domain Pyridoxine 5'-phosphate oxidase (PNPOx) catalyses the terminal step in the biosynthesis of pyridoxal 5'-phosphate (PLP), a cofactor used by many enzymes involved in amino acid metabolism. The enzyme oxidises either the 4'-hydroxyl group of pyridoxine 5'-phosphate (PNP) or the 4'-primary amine of pyridoxamine 5'-phosphate (PMP) to an aldehyde. PNPOx is a homodimeric enzyme with one flavin mononucleotide (FMN) molecule non-covalently bound to each subunit. This domain represents one of the two dimerisation regions of the protein, located at the edge of the dimer interface, at the C-terminus, being the last three beta strands, S6, S7, and S8 along with the last three residues to the end. In Swiss:P21159, S6 runs from residues 178-192, S7 from 200-206 and S8 from 211-215. the extended loop, of residues 167-177 may well be involved in the pocket formed between the two dimers that positions the FMN molecule [2]. 20.30 20.30 22.10 21.20 19.50 18.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.45 0.72 -4.46 243 2399 2009-01-15 18:05:59 2008-04-17 13:57:12 4 8 2210 18 735 1792 1629 42.50 54 19.13 CHANGED WGGaRlhPpplEFWQGpssRLHDRhhY.............p.+..............................................p.ss.s.........WphpRLtP .....WGGaRltPcplEFW.......QGpt.pRLHDRhhYpR...........................................................p..ss.s.......Wpl.-RLuP................................ 1 211 437 608 +10423 PF10591 SPARC_Ca_bdg Secreted protein acidic and rich in cysteine Ca binding region Finn R, Coggill P anon Pfam-B_3882 (release 22.0), PROSITE_PS00613 Domain The SPARC_Ca_bdg domain of Secreted Protein Acidic and Rich in Cysteine is responsible for the anti-spreading activity of human urothelial cells. It is rich in alpha-helices. This extracellular calcium-binding domain contains two EF-hands that each coordinates one Ca2+ ion, forming a helix-loop-helix structure that not only drives the conformation of the protein but is also necessary for biological activity. The anti-spreading activity was dependent on the coordination of Ca2+ by a Glu residue at the Z position of EF-hand 2 [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.69 0.71 -4.05 28 665 2012-10-02 16:17:27 2008-04-17 14:18:31 4 31 135 6 337 1603 18 112.60 31 30.93 CHANGED hssCs-p-LscFspRhRDWlpslhtphhccpptp...........................t..ph.pcsppp.hphhppsltWpFscLDts.tDphLs+pELtsl+usL...hshEpChpsFhpsCDsspDphIohpEWspC ...................................................................................................................................h..sCsst-htphs.RhtDWhpslh.phhpptp.p...............................................................................hp..h.....tp.....pp..h..shppp.sltW.h.F...s.pLDps.tDt...hLs+........pE.L....t....s.....l...c.t...l................sp...E.....+.C......h.............c..........t.............Fh.cpCD.s....s..pDptIohpEWstC.......................... 0 74 101 207 +10424 PF10592 AIPR AIPR protein Iyer LM, Bateman A anon Iyer LM Family This family of proteins was identified in [1] as an abortive infection phage resistance protein often found in restriction modification system operons. 25.00 25.00 25.90 25.10 24.40 24.30 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.03 0.70 -5.10 65 420 2009-01-15 18:05:59 2008-04-17 14:47:42 4 2 382 0 123 388 93 298.40 19 53.89 CHANGED h-pNVRsaLtsp.........s....VNpuIccTLp...ppPppFhhaNNGITllusclph..sssptt.......hplcshQ.........IVNGuQTosolaps.........tp.ptslsp.......lhV.l+lhhs......ps-....slhscIochsNoQNpVptpDhtupcshppclEphhpshh............haYERtcGpatstpstt....................p.hhspsthtphhssatpcPphsststpphapph.............t.hpphhp..tphst....hatphlshh.llhcthcphlpptt.....................tshstYsl...thhsthhttt.........................................shttlhppp.........plspphhptlhth.spphpchhppsstshtsh ..................................hppNlRsaLsts...........s....VNcsIpcTlp.................ppsptFahaNNGITlls.s..clph..pppst..........hpl.cshp............IlNGuQTssslhps..........................ttttplpp.......shV.l+llth.................psc....phh.spIocssNoQNpVpspDh.t.utcshppplcc.hhppht..............hhYcctcspht.spptph....................thhhsh.sthtp.hh..s..s.h..p.......p.........p.........Pp.h.....s.t.p..ttpphhpph.............t.hpphhs............sphst......hhhphlthh..hlhp....thpphhtptt...........t............................tphhhath......hh..hhttp................................................p..t..tt..............th.tth..p.h......................hh.......................................................................................................................................................... 0 40 94 115 +10425 PF10593 Z1 Z1 domain Iyer LM, Bateman A anon Iyer LM Domain This uncharacterised domain was identified by Iyer and colleagues [1]. It is found associated with a helicase domain of superfamily type II. 25.00 25.00 25.10 48.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.68 0.70 -5.15 35 212 2009-01-15 18:05:59 2008-04-17 14:51:00 4 3 199 0 78 223 142 234.80 28 28.00 CHANGED pSLccAlpsFllusAlRplR.....Gt.sscHs................SMLlHsS+hssspcpltshl.pph..lpplcpslpttsstt.........hscL+placpch..................ps.sh.sas-ltstLhpsl....s................s..lclhllN.ussp..-s....LDYssspt.h......ssIslGGspLuRGLTlEGLssSYFhRso+...hhDTLhQMGRWFGYRtsYtDLCRlah..........ot-lhphFpplspssE-LRppl.cphttss.....hTPppaulpVpsps...sLtlT ..........sLccAlhsFllssAl+thR..........Gp...tpc+p.................SMLlHsSphpsspppltphl.pph....hpplppslpttpsts...........hschcplappcht.......................tt.ph.sa--l..h..p.tLhpsl...p.....................s..lplhhlN.ssss..ct.......l-appppt...........tsIslGGspLuRGLTlEGLssoYahRsop...thDTLhQMGRWFGYRts.YtDL..sRlah..........st-lhphFppls.pspp-lRppl.cphtt.ts......hsPhpashplpsp.....h............................. 0 24 58 70 +10427 PF10595 UPF0564 Uncharacterised protein family UPF0564 Coggill PC anon UPF0564 Family This family of proteins has no known function. However, one of the members, Swiss:Q22CP8, is annotated as an EF-hand family protein. 21.10 21.10 21.10 21.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.63 0.70 -5.54 15 182 2009-01-15 18:05:59 2008-04-18 09:08:17 4 5 94 0 110 180 3 304.80 25 51.40 CHANGED .cITVPpPFphshREpc+pp.t.hpppclcp.hpchpcc..-csEhp++F+ApPVPppVhlP.LYcclhccpEcRRcpl+ccS+thhLps.pKPFpFhtR-cpKcphh.pp.............pcccsppF+A+P...lPcphhtshlpph.....................pEcEhhRppRsphRup-hlps.SphPschtpptpppp.t...............+ptpspppttcpshcsh+s+slPDFccLacsFpcphscp.............KppppsTlscPFshppS.p+sss+cphspts.pht.............................pcsl+tspc................................P................................hpt.p.h.phhcpph.c+.t+pp...cphp.RcKptpchuspsppphpstssspphpcppccphhph+cc.cKpcpcEYpppl ...........................plTlPpPFpMplREtp++t....htp...s..phE..pp.hpcp...t-..c......t....cpp..+pFRApPVPspValP.LYpclhcppEtRRp.hppcp+thLhup..KPFpFht.........+-cpccth...ttpp..th.............tppp.tpt.h+A.+.........l.P+shhtssht-+....................................hpEpEhhRpl+.hphRAt-hLpp..ushP.p.tt..tp.....................................pptc.hs....t..........t..p.h.phps+hpstsPDa-tLhc.papc..phtcp...............+p....+.sTh.scPFp....Lpsu....htt..sp.+tp.h.t..s.httc................................................ppp.p.sph................................................P.....................................s...s..ppttt.pth+pp....pch.t.pt..p.p..h.p.php.p.....h.cp....ht.p.tt.....s.t.p..t................................................................................................................................................................................ 0 37 50 76 +10428 PF10596 U6-snRNA_bdg U6-snRNA interacting domain of PrP8 Coggill P anon Chen X Domain This domain incorporates the interacting site for the U6-snRNA as part of the U4/U6.U5 tri-snRNPs complex of the spliceosome, and is the prime candidate for the role of cofactor for the spliceosome's RNA core. The essential spliceosomal protein Prp8 interacts with U5 and U6 snRNAs and with specific pre-mRNA sequences that participate in catalysis. This close association with crucial RNA sequences, together with extensive genetic evidence, suggests that Prp8 could directly affect the function of the catalytic core, perhaps acting as a splicing cofactor [1]. 29.80 29.80 30.70 31.80 28.70 29.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.01 0.71 -4.87 11 381 2012-10-03 10:25:13 2008-04-18 15:27:45 4 48 305 0 252 401 3 149.80 76 7.47 CHANGED FpcYpls+tssahWTpp+HDGKLaN..LpsY+sDhIpALGGlEsILEHoLFKuTtassWEGLFW-K.............sSGFEpoh.Kh++LTNAQRoGLsQIPNRRFTLWWSPTINR..........usVYlGFQlQlDLTGlFMpGKlPTLKISL...IQlFRuHLWQKIHESlVhDLt...QhhDsEh ..............................FKpYQlhKpNPFWWTpQRHDGKLWN...LNsYRTDhIQALGGVEuILEHTLFK...........GThFPoWEGLFWEK.............ASGFEESM.KaKKLTNAQRSGLNQIPNRRFTLWWS.PTINR..........ANVYVGFQVQLDLTGIFMHGKIP..TLK..ISL...IQIFRAHLWQKIHESlVMDLC...QVFDQEL................... 0 89 139 206 +10429 PF10597 U5_2-snRNA_bdg U5-snRNA binding site 2 of PrP8 Coggill P anon Chen X Domain The essential spliceosomal protein Prp8 interacts with U5 and U6 snRNAs and with specific pre-mRNA sequences that participate in catalysis [1]. This close association with crucial RNA sequences, together with extensive genetic evidence, suggests that Prp8 could directly affect the function of the catalytic core, perhaps acting as a splicing cofactor [2]. 21.20 21.20 21.20 21.30 20.90 21.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.65 0.71 -4.57 20 335 2009-09-11 09:17:08 2008-04-18 15:56:36 4 42 290 0 237 321 1 133.60 72 5.89 CHANGED sppEusWsLhspsTKEpTApAa.LpVSccuIspFcNRlRpILMoSGSoTFoKIAsKWNTsLIuLhTYaREAllsTppLLDlLVKsEsKlQsRVKhGLNSKMPoRFPPsVFYoPKELGGLGMLShGHl.LIPpSDL+aS ...................s.+DGVWsLpNcsTKERTApAF.L+Vs-Eslp+FcNRlRQILMuSGSTTFTKIsNKWNTALIuLhTYaREAsVsTp-LLDlLVKCEsKIQTRlKIGLNSKMPSRFPPVVFYTPKELGGLGMLSMGHl.LIPpSDLRWS........... 0 88 134 197 +10430 PF10598 RRM_4 RNA recognition motif of the spliceosomal PrP8 Coggill P anon Chen X Domain The large RNA-protein complex of the spliceosome catalyses pre-mRNA splicing. One of the most conserved core proteins is PrP8 which occupies a central position in the catalytic core of the spliceosome, and has been implicated in several crucial molecular rearrangements that occur there, and has recently come under the spotlight for its role in the inherited human disease, Retinitis Pigmentosa [1]. The RNA-recognition motif of PrP8 is highly conserved and provides a possible RNA binding centre for the 5-prime SS, BP, or 3-prime SS of pre-mRNA which are known to contact with Prp8. The most conserved regions of an RRM are defined as the RNP1 and RNP2 sequences. Recognition of RNA targets can also be modulated by a number of other factors, most notably the two loops beta1-alpha1, beta2-beta3 and the amino acid residues C-terminal to the RNP2 domain [2]. 23.50 23.50 24.70 24.20 21.80 23.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -10.18 0.72 -4.12 25 346 2009-01-15 18:05:59 2008-04-18 16:21:12 4 41 284 0 246 321 7 92.10 69 4.12 CHANGED EKlDhTLLNRLLRLIlDpNlADYITuKNNVslsaKDMsHlNpYGlIRGLQFoSFlaQYYGLllDLLlLGlpRAsElAG..PsstPNsFhpFpstps ............EKIDLTLLNRLLRLIlDHNIADYhTAKNNVslsYK......DMNHTNSYGlIRGLQFuSFlhQYYGLVlDLLlLGLpRASEhAG..PPQhPN-FLpFpDh.s........................... 0 97 143 207 +10431 PF10599 Nup_retrotrp_bd Retro-transposon transporting motif Wood V, Coggill P anon Wood V Domain This is the highly conserved C-terminal motif GRKIxxxxxRRKx of nucleoporins that plays a critical and unique role in the nuclear import of retro-transposons in both yeasts and higher organisms. It would appear that the arginine residues at positions 2 and 9-10 constitute a bipartite nuclear localisation signal, with two basic peptide motifs separated by an interchangeable spacer sequence, that is crucial for the retro-transposon activity [1]. 25.00 25.00 27.50 27.50 18.80 18.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.48 0.72 -3.39 6 66 2009-01-15 18:05:59 2008-04-18 16:42:08 4 6 41 1 25 66 0 97.30 60 7.65 CHANGED ppuTPsPuSlFsh.GsoNsNhs.........psssPS....osFuFus..shs.......................tstsssSuhssss....hSso..........Ps.....hshGssst.........shssRKIApMR.pR+R ....u.soTPNuSSVFQF.GSS.TTNFNF........TNNNPS....GVFTFGAssSTP.......................AASAQPSGSGuFs....FsQS..........PAu....FTVGSNGKNhFSSSG.TSVSGRKIKTAVRRRK......... 0 3 4 10 +10432 PF10600 PDZ_assoc PDZ-associated domain of NMDA receptors Coggill P anon Pfam-B_10923 (release 22.0) Domain This domain is found in higher eukaryotes between the second and third PDZ domains, Pfam:PF00595, of glutamate receptor like proteins. Its exact function is not known. 21.20 21.20 22.00 22.30 21.10 20.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.44 0.72 -3.68 12 321 2009-09-11 14:48:04 2008-04-23 15:51:39 4 37 37 29 104 277 0 65.50 53 8.76 CHANGED KPoohahs..DsYuPPDITsSYS.thDNclS.su.lG.......ch.psLsP.sSPsRYSPlPKshLG-DDloREPR .....KPoohYhs..DsYuPPDITs.S.....aSt.....hDNHlSpsu.LGh.......c...shs.P..sSP..uRY.SPlsKphLG--D.hT.R.EPR................................ 0 4 14 41 +10433 PF10601 zf-LITAF-like LITAF-like zinc ribbon domain Protasio A, Bateman A anon Clustering of trematode sequences Family Members of this family display a conserved zinc ribbon structure [1] with the motif C-XX-C- separated from the more C-terminal HX-C(P)X-C-X4-G-R motif by a variable region of usually 25-30 (hydrophobic) residues. Although it belongs to one of the zinc finger's fold groups (zinc ribbon), this particular domain was first identified in LPS-induced tumour necrosis alpha factor (LITAF) which is produced in mammalian cells after being challenged with lipopolysaccharide (LPS)[2]. The hydrophobic region probably inserts into the membrane rather than traversing it. Such an insertion brings together the N- and C-terminal C-XX-C motifs to form a compact Zn2+-binding structure [3]. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.05 0.72 -4.11 46 755 2009-01-15 18:05:59 2008-04-24 10:49:46 4 16 214 0 542 755 1 70.80 27 40.68 CHANGED hpssPs.hhCPpCpppshTplphcsGssTa.........hsshlLhhhh..hsC.shlPashcssKDshHhCPsCsshlGhach ...........h...tPsphhCPpCpppl.......hTpl.phpsGhho.a.................lhsh...h.....l.....slhsh...h.....s.....C....sh...lPhhhss.hp.-spHhC..PsCpthlupap................ 0 237 298 446 +10434 PF10602 RPN7 26S proteasome subunit RPN7 Mistry J, Wood V anon Pfam-B_4112 (release 22.0) Family RPN7 (known as the non ATPase regulatory subunit 6 in higher eukaryotes) is one of the lid subunits of the 26S proteasome and has been shown in Saccharomyces cerevisiae to be required for structural integrity [1]. The 26S proteasome is is involved in the ATP-dependent degradation of ubiquitinated proteins. 29.40 29.40 29.40 29.50 29.30 29.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.75 0.71 -4.89 63 673 2012-10-11 20:01:02 2008-04-24 12:50:55 4 12 331 0 453 671 11 171.80 30 40.18 CHANGED DpshhcphppcNppclpcL-pclcchc.cNhscp-l+puhhchu-aYtclGDhcpAlcsapc.sh-hssusup+lDhhhsllRlulahsDhshVpphlp+Acshh-p..uuD........................................W-p+NR..............................LKsYpGlhtlshRcacpAAphhL-ohsTFsu......sElhoapslshYuslsulhoh-Rs-L+pK ...........................................DpshhpphpcpsptclccL-pclc......shc..pNhhc.pplR............p..uh.chu-aahclG.Dh.............psA.................lcsap.............+..sh....-.h....s....s..uhsphl-hshpll+l....ulaht.Dhsh.lp.pt..lpKAc...shh-p..usD.................................................................................hc.p.+s+..............................LKsh..pGLhplu.h....R...sa+pAAph..........FL-s..h...ssass.................hELhshpslshYsslsulhoh-Rs-L+p................................................ 0 160 250 371 +10436 PF10604 Polyketide_cyc2 Polyketide cyclase / dehydrase and lipid transport Lakshminarayan L, Mistry J anon Manual Family This family contains polyketide cylcases/dehydrases which are enzymes involved in polyketide synthesis. It also includes other proteins of the START superfamily [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -11.10 0.71 -4.01 179 5359 2012-10-02 19:24:03 2008-04-24 14:10:18 4 67 1547 79 1946 8699 1599 142.00 13 81.08 CHANGED thplpps......h....pls.ussppVashlsD...h..sshs......p....W......ts..slh...ps..p......h.s...t...sss......................hh....tthp.hsu..........................lppplsp..h....c.s....tpthsach...............hsht...ph..tss.hplps...ss..s.......G.....sp..ls...ash......pht......................shh.............t.h........ht....thlpsh....hpt.sLppLppthc ...................................................hptsh.tl..s..AssppVa.s...h...l.sD.......h..p.ph.s.......p..........W............hs....tlp........ps.......c..........hh....p.......sss......................................th......t..php..hsu.....................ht..........................h.p..p..p......l..h...p......h.............c...s...........sp..t..l.s..aph............................hsht.....ph...t......sp..h..pl..ps........ts.......s................G..............op....lp...........hph........p.hp...............................s.h....................t..h.........ht......thhp...t...........hpt.s....lptLtt................................................................................................................................ 0 564 1317 1714 +10437 PF10605 3HBOH 3HB-oligomer hydrolase (3HBOH) FIGFAMs, Mistry J anon FIG094011 (Release 2.0) Family D-(-)-3-hydroxybutyrate oligomer hydrolase (also known as 3HB-oligomer hydrolase) functions in the degradation of poly-3-hydroxybutyrate (PHB). It catalyses the hydrolysis of D(-)-3-hydroxybutyrate oligomers (3HB-oligomers) into 3HB-monomers. 25.00 25.00 27.00 25.30 23.40 24.90 hmmbuild -o /dev/null HMM SEED 690 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.16 0.70 -6.45 13 134 2009-12-10 17:34:17 2008-04-24 14:14:15 4 3 127 0 41 138 14 584.70 48 90.33 CHANGED lsluACsGussssss.....................hNs+PuFl..GsVphpuYDGsoDDLLTAGLGKoGLuSATAPuhAsPssPTAAELRRLAIasNYRALVDsousGGYGoLaGP..NVDAsGssTs.G-GKIAGsEYlAYuDDGoGppNVTlhVQVPsoFsPspPCIVTATSSGSRGVYGAIuouGEWGLK+GCAVAYTDKGTGsGsHDLsTsTVsLIDGTRsstsuAGpsutFsAsLuAss.LAsFNuuhPpRhAaKHAHSQQNPEpDWG+hTLQAlEFAaasLN-paGstsssGts..hplpPusslVIASSVSNGGGAAlAAAEQDTcGLIDGVAVuEPplNlssssulsV+RGusP.lsusG+sLhDYsThANLLQsCAAlAsuhss.APhtsshsss......sltsNRCsoLsApGLloGuTTusQAssALstLct.sGapPESslLpAShashpsssuIAVTYANAYuRASVpDpLCsaSFAuTsAsss.....ssshusushAolFusGNGlPPTsGlsLVNssutG.sPh.shsSlS.usup.DhNl-GALCLRsLhoGsDus............u.tlppGlsplptoGNLpGKPAlIVpGRuDALlPVNHTSRPYlGLNptsEGusS+LSYlEVTNAQHFDAFlG....lPGYssRaVPLHhYhhQALDhMYupLpsGsPLPPSQVVRTlPRGGssu..sAPslosANVPPIssoPuAuDtIslssu....sVsVPD .........................s..huuCtus.psst.....................hNthPu.al..GsVphssY.........DG.........soDDLLTAGLGpoGLuusosPuhAsPspPTAAELRRlAIauNYRALVDhossGGYGphaGP..NVD.h...s.GssTh.G-GhIsGsEYlAauDDGoGppNVollVQlPsoFssspPCIlTATSSGSRGlYGAIus.GEWGLK+GCAVAYsDKGsGsGsH-luossVsLlDGThsstssAGssuhFsAshosu.p.LAAFNuthPNRhAaKHAHSQpNPEpDWGp.TLQAlcFAaasLN..-pa.Gshs.......ss..st.p..hphpssshhsIAuSVSNGGGAulAAAEQDs....p...GhIsuVsVuEPplNl......ssshs.VppGGss..lsshG+PLhDYsThANLhpsCAAhusu..h..ss..uPh.huhh.shsh....tuhtspRCAsLt..AtGLloG..ussssQAs-.....ALtpLcs.hGa.s-SDhlpAshhs.ps.PulAsTYANAYsRupVpDNLCsaSFAsssssss.....ssssssushsshFusGNGlPP.TsGlpLl.Nsu.....tG........uls.phus.DhshsGAhCLRpLhTus...................thultsulstlthsuNLpGKPAIIVpGRuDALlPVNHuSRsYluhNptsEGtt.SpLsahEVsNuQHFDuFls...........lPGaDTRFVPlHhYt.pALshMasaL+s.GsPLPPSQVlRTlPRGGssG..uAPAlosANlPslstsPuA.stIssssu....slsVP............................... 0 4 16 29 +10438 PF10606 GluR_Homer-bdg Homer-binding domain of metabotropic glutamate receptor Coggill P anon Pfam-B_17370 (release 22.0) Domain This is the proline-rich region of metabotropic glutamate receptor proteins that binds Homer-related synaptic proteins. The Homer proteins form a physical tether linking mGluRs with the inositol trisphosphate receptors (IP3R) that appears to be due to the proline-rich "Homer ligand" (PPXXFr). Activation of PI turnover triggers intracellular calcium release [1]. MGluR function is altered in the mouse model of human Fragile X syndrome mental retardation, a disorder caused by loss of function mutations in the Fragile X mental retardation gene Fmr1. Homer 3 (and to a lesser extent Homer 1b/c) has been shown to form a multimeric complex with mGlu1a and the IP3 receptor, indicating that Homers may play a role in the localisation of receptors to their signalling partners [2]. 25.00 25.00 26.50 25.80 18.30 18.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.73 0.72 -4.07 5 118 2009-01-15 18:05:59 2008-04-24 14:35:50 4 4 37 1 60 89 0 50.30 66 5.05 CHANGED ALTPPSPFRD.SVuSGSSsPu.SPVSESlLCsPSNspYsSlIL+DYoQSSSTL ....ALTPPSPFR.D.SVsSGSosPs.SPVSESsL..........C..........h.PssspYsolIlRDYsQSSSoL........ 0 2 8 30 +10439 PF10607 CLTH RanBPM_CRA; CTLH/CRA C-terminal to LisH motif domain Coggill P, Bateman A anon Pfam-B_3765 (release 22.0), UPF0559 Domain RanBPM is a scaffolding protein and is important in regulating cellular function in both the immune system and the nervous system. This domain is at the C-terminus of the proteins and is the binding domain for the CRA motif (for CT11-RanBPM), which is comprised of approximately 100 amino acids at the C terminal of RanBPM. It was found to be important for the interaction of RanBPM with fragile X mental retardation protein (FMRP), but its functional significance has yet to be determined [5]. This region contains CTLH and CRA domains annotated by SMART; however, these may be a single domain, and it is refereed to as a C-terminal to LisH motif [6]. 23.00 23.00 23.00 23.00 22.80 22.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.68 0.71 -4.50 132 1458 2009-01-15 18:05:59 2008-04-24 14:51:03 4 27 290 0 1000 1417 5 169.80 20 40.24 CHANGED hhtphpcIhcsl..ptclppAlpWssc..............................ppspLhch......p...........................ssLc.FcLph.paIELl+................p......................................................................................................................................sp..................................hhcAlpau+c......pl.s...hsp...........................t....hhp-lpphhuLLsats....................................................................pshpp.......................Lhss.pp..............hpplsppFspsh..hphh..shs.......p........ps........................LthhlpsGh.ulpshh ...............................................................................................h.ptpcItctl.tps.p.lppAlph.s..pp..........................................p.s.p..Lhcp..........s.....................................................................................................ssL...FpLpp.p.a.lELl+...............s......................................................................................................................................................................................................................................................................................................................................................................sp...............................................hp..cAlpaupp...........pl..s......hsp...............................phhc-.l.p.c.s.huLL.sa.ssstp.......................................................................sshtp......llss..pp.................hpplssthspsh...h.phh...shs...........p...........ps........................Lthhlptsh.sh....h............................................................................................................................................. 0 321 541 796 +10440 PF10608 MAGUK_N_PEST Polyubiquitination (PEST) N-terminal domain of MAGUK Coggill P anon Pfam-B_18049 (release 22.0) Domain The residues upstream of this domain are the probable palmitoylation sites, particularly two cysteines. The domain has a putative PEST site at the very start that seems to be responsible for poly-ubiquitination [1]. PEST domains are polypeptide sequences enriched in proline (P), glutamic acid (E), serine (S) and threonine (T) that target proteins for rapid destruction. The whole domain, in conjunction with a C-terminal domain of the longer protein, is necessary for dimerisation of the whole protein [2]. 21.60 21.60 23.00 21.80 20.80 21.10 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -10.03 0.72 -3.58 14 339 2009-01-15 18:05:59 2008-04-24 15:02:41 4 33 36 21 110 279 0 70.70 44 10.98 CHANGED p+YRYQD--os.P.E......pSssplsscshssELlclu-K..sluph-Nl.......HGaVsHsHISPh..................................................pAsssPllVNT-oL-sssY...VNGT-s-aEaE ..............................................tYth.spps...P.p.........s.hstt......................................sh..p..hosh...................................................pAsss..PllVNTDoL-sssY.........VNGT-u-hEYE................................................. 0 5 15 41 +10441 PF10609 ParA ParA/MinD ATPase like Mistry J, Wood V anon Pfam-B_177 (release 21.0) Family This family contains ATPases involved in plasmid partitioning [1]. It also contains the cytosolic Fe-S cluster assembling factor NBP35 which is required for biogenesis and export of both ribosomal subunits [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.68 0.72 -4.10 250 4990 2012-10-05 12:31:09 2008-04-24 17:05:32 4 43 4013 3 1687 4768 2898 81.20 48 23.71 CHANGED LDYLllDhPPGTGDhp.LolsQpl..........s..............ls.......Gul.lVTTPQ-l..........AlhDscKulshh..c+hslPllGlVENM......ShahCPp.C.....ucppplFG..pG ..............................LDYLllDhPP..G..T.G..Dl.p.LolsQpl...........P..................................ls....Gu..llVTTPQcl.................AlhDA...cKuls.Mh..c..K...l......p.l..P.l.L.........GlV.E...N.M......S...h..a..hCss..C.....Gcc..ptlFGp........................................... 1 591 1076 1434 +10442 PF10610 Tafi-CsgC Thin aggregative fimbriae synthesis protein FIGFAMs, Mistry J, Coggill P anon FIG009025 (Release 2.0) Family Fimbriae are cell-surface protein polymers, of eg. E coli and Salmonella spp, that mediate interactions important for host and environmental persistence, development of biofilms, motility, colonisation and invasion of cells, and conjugation. Four general assembly pathways for different fimbriae have been proposed, one of which is extracellular nucleation-precipitation (ENP), that differs from the others in that fibre-growth occurs extracellularly. Thin aggregative fimbriae (Tafi) are the only fimbriae dependent on the ENP pathway. Tafi were first identified in Salmonella spp and the controlling operon termed agf; however subsequent isolation of the homologous operon in E coli led to its being called csg. Tafi are known as curli because, in the absence of extracellular polysaccharides, their morphology appears curled; however, when expressed with such polysaccharides their morphology appears as a tangled amorphous matrix. The gene agfC is found to be transcribed at low levels, localised to the periplasm in a mature form, and in combination with AgfE is important for AgfA extracellular assembly, which facilitates the synthesis of Tafi. The genes involved in Tafi production are organised into two adjacent divergently transcribed operons, agfBAC and agfDEFG, both of which are required for biosynthesis and assembly [1]. 25.00 25.00 26.80 26.60 22.20 20.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.53 0.72 -4.21 2 412 2009-01-15 18:05:59 2008-04-24 18:01:26 4 1 407 3 12 89 0 103.40 84 92.46 CHANGED LLLAALSSQITFNTTQQG-lYTIlPpVTLopsClCcVQIhulR-GpuGQSpTpQcpTLSLPANQPIsLT+LSLNISP-DpVKIlVTVSDGQSLHLSQQW.PSsEKS ...................LLLAALSSQITFNTTQQGDlYTIIPEVTLTQSCLCRVQILSLREGSSGQSQTKQEKTLSLPANQPIALTKLSLNISP-DRVKIVVTVSDGQSLHLSQQWPPSSEKS...... 0 1 3 7 +10443 PF10611 DUF2469 Protein of unknown function (DUF2469) FIGFAMs, Mistry J anon FIG004032 (Release 2.0) Family Member proteins often found in Actinomycetes clustered with signal peptidase and/or RNAse-HII. 25.00 25.00 29.00 137.90 20.50 19.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.36 0.72 -3.65 12 430 2009-09-11 15:00:03 2008-04-25 09:17:54 4 1 430 0 109 192 35 101.10 69 98.07 CHANGED MSAEDLEpYEoEMELpLYREYRDVVGlFpYVVETERRFYLsNpV-lpsRoss.G-VYFEVoMsDAWVWDhYRPARFVKsVRVlTFKDVNlEELsKs.-l.-lPc ..MSAEDLEpYEs-MELsLYREY+DVVG.FoYVVETERRFYLANsV-lpsRs.s.s.G-VYFEVphuDAWVWDMYRPuRFVKpVRVlTFKDVNIEE.lsKs.Dl.clPp...... 0 33 80 101 +10444 PF10612 Spore-coat_CotZ Spore coat protein Z FIGFAMs, Mistry J, Coggill P anon FIG014057 (Release 2.0) Family This family has members annotated as Spore coat protein Z, otherwise known as CotZ, It is a cysteine-rich spore coat family, and along with CotY is necessary for assembly of intact exosporium. 19.90 19.90 20.40 21.00 18.30 17.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.82 0.71 -11.46 0.71 -4.34 11 223 2009-01-15 18:05:59 2008-04-25 11:28:31 4 2 119 0 21 136 0 148.80 56 93.76 CHANGED MSCsppcth.......tspNCVs-sVctIc-LQ-us..--..sCsTuCasshLu..sos...shuDThPFlLaTKpGsPFpA...Fu.ssuplsss...Ch.SsFFRVEslcD.sCAsLRVL..........pshststshLchs-ps..........lCps..............hcLcKTshCIpVDLsCFCAIQCLssphl ......................MSCNpNccH.......ss.sCVssVV+FIpELQ-Cu..TT....TCGSGCEl...PF...LG...AHNsA..SVANTRPFILYTK....sG.sPFEA.....FA..PSusL..suCc......SPIFRVESlDD.D..s.C.AVLRVL...................oVVLGDs...osVPPsDDP................ICTFLu.........VPNARLlSTsoCITVDLSCFCAIQCLRDVoI........ 0 4 11 13 +10445 PF10613 Lig_chan-Glu_bd Ligated ion channel L-glutamate- and glycine-binding site Coggill P anon Pfam-B_203 (release 22.0) Domain This region, sometimes called the S1 domain, is the luminal domain just upstream of the first, M1, transmembrane region of transmembrane ion-channel proteins, and it binds L-glutamate and glycine [2]. It is found in association with Lig_chan, Pfam:PF00060. 20.30 20.30 20.30 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.10 0.72 -3.99 82 1132 2012-10-03 15:33:52 2008-04-25 14:02:08 4 36 106 435 539 1967 2 64.60 46 8.17 CHANGED PasM.h+ps.......h...tuspp.aEGaClDLlcclAchl.sFsYcltlV.tDG..pYGshpp..sG.........pWsGhlGELlp ..................................PYVM.h+cs.......tp.h...pGN-R..aE...GYC...lDLlp........E..l......Ac..h......l......G.......F.......p...Yc.l.plV...sDG.............KYGup-s...ss...................pWN.GMVGELl................... 0 155 196 354 +10446 PF10614 CsgF Tafi-CsgF; Type VIII secretion system (T8SS), CsgF protein FIGFAMs, Mistry J, Coggill P, Desvaux M anon FIG077109 (Release 2.0) Family The extracellular nucleation-precipitation (ENP) pathway or Type VIII secretion system (T8SS) in Gram-negative (diderm) bacteria is responsible for the secretion and assembly of prepilins for fimbiae biogenesis, the prototypical curli. Besides the T2SS that can be involved in the assembly of prototypical Type 4 pilus, the T4SS that can be involved in the biogenesis of the prototypical pilus T, the T3SS involved in the assembly of the injectisome and the T7SS involved in the formation of the prototypical Type 1 pilus, the T8SS differs in that fibre-growth occurs extracellularly. The curli, also called thin aggregative fimbriae (Tafi), are the only fimbriae dependent on the T8SS. Tafi were first identified in Salmonella spp and the controlling operon termed agf; however subsequent isolation of the homologous operon in E coli led to its being called csg. In the absence of extracellular polysaccharides Tafi appear curled, although when expressed with such polysaccharides their morphology appears as a tangled amorphous matrix [2]. CsgF is one of three putative curli assembly factors appearing to act as a nucleator protein. Unlike eukaryotic amyloid formation, curli biogenesis is a productive pathway requiring a specific assembly machinery [1]. 25.00 25.00 29.70 29.60 24.30 18.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.69 0.71 -4.19 6 534 2009-01-15 18:05:59 2008-04-25 14:25:13 4 2 527 0 56 165 266 127.60 69 95.73 CHANGED h+hhGl..hAhLlhhtA..ussApAusLVYpPhNPsFGGsPhNuuaLhupANAQNphpcsstsss.............ps.Spu-hFApQLQoQLLSuLusQlspAIFG-ssQcuGshoFsupoloas+sssslslsIsDssTGpsTpIsVP..ss ......................hl...........l.hhhh....SP.LoWAGsMTFQFRNPNFGGNPNNGAFLLNSAQA.QNSYKDPSY.s-DFGI..........ETPSALDNFTQAIQSQILGGLLoNINTG..............K..P.....GR...........MVTNDaIVD.IA.N.R.D.GQ.LQLNVTDRKTGpTSTIpVSGLQs...................... 0 12 22 37 +10447 PF10615 DUF2470 Protein of unknown function (DUF2470) FIGFAMs, Mistry J, Coggill P anon FIG076093 (Release 2.0) Family This family is a putative haem-iron utilisation family, as many members are annotated as being pyridoxamine 5'-phosphate oxidase-related, FMN-binding; however this could not be confirmed. 21.00 21.00 21.00 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.98 0.72 -3.76 50 682 2009-01-15 18:05:59 2008-04-25 15:11:28 4 12 625 9 274 592 273 81.30 25 33.83 CHANGED tuDPlsst...susplspHMNsDHu-AlhtYAptauGl.....sss...ssA.......pMtul-spuhcLpl.............ss.tslclsF-ps.......Lsssp-s+psLVtMs .................................h............tppllpHMNs-Htcsltths.c..p.a..ush...........tps...........pss..........................phhulDhpGlclth...........t...............sp.pslRl.Fsp.........ss.-.ps.l+psllth................................... 1 87 181 241 +10448 PF10616 DUF2471 Protein of unknown function (DUF2471) FIGFAMs, Mistry J, Coggill P anon FIG076041 (Release 2.0) Family The function of this family is unknown. Members all come from Burkholderia spp. Swiss:A2WH83 is annotated as Serine/threonine-protein kinase, but this could not be confirmed. 25.00 25.00 31.20 31.00 22.30 18.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.56 0.71 -3.93 7 81 2009-01-15 18:05:59 2008-04-28 11:07:54 4 1 56 0 27 68 4 122.40 49 95.15 CHANGED spp.slsslpFcpAAc....DLc+IVtpIAtRYlspt........VslTWphLhsIEtEAhuDLGFtSRpDushlshF.R.ushphPtsD..-hlDasp.ossLPAVathshsthEpsu.cA.pp................Ahss ...................p-psLAALpFcsAA+....DLEpIVppIApRYIpQp........VPLTWRLLHAlEAEALADLGFASRHDshlhsLFpRPu-hcaPETD..DsVDFGp.SsALPAVFuFAVuAYEtAA.c.sttp...sst................ 0 1 5 13 +10449 PF10617 DUF2474 Protein of unknown function (DUF2474) FIGfam, Mistry J, Coggill P anon FIG073099 (Release 2.0) Family This family of short proteins has no known function. 24.80 24.80 24.90 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.92 0.72 -4.36 18 344 2009-01-15 18:05:59 2008-04-28 11:30:32 4 1 323 0 76 225 3 39.40 44 83.46 CHANGED ppshW.pRLuWLllIWuuSVhALGlVAhlhRLhMsAAGLpo ......h...hW.+RlsWhlsIWsuSVLsLusVuhLhRllMsuAGhpo..... 0 6 24 51 +10450 PF10618 Tail_tube DUF2473; Phage tail tube protein FIGfams, Mistry J, Bateman A anon FIG030252 (Release 2.0) Family This bacterial family of proteins contains phage tail tube proteins related to the Mu phage protein Swiss:P79679 [1]. Bacteriophage Mu has an eicosahedral head and contractile tail. The tail is composed of an outer sheath and an inner tube. 21.60 21.60 21.90 22.00 21.40 21.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.01 0.71 -4.37 24 282 2009-01-15 18:05:59 2008-04-28 12:44:06 4 1 261 0 33 148 1 116.70 36 98.55 CHANGED Mut..plsGsshl+lsGt.plpspsu.sphs.ushpREslhG..stlhGapEp.p.sshlcsslhsstshs..ltclsshsshTlohEhsNGpsYllssAahs-s..sspsc-GplslpacGppsph. .............u..+luGTsal+lDGp.pLslsG.G.hchshsshtR-slhGh.sushsa.KEo.+.APalcsohpsstshs..lscIs.stsshTIs.sE.huN..GpsYlhosAWhtsps..shss--GTl-.lcFcGppush......... 0 6 14 25 +10452 PF10620 MdcG Phosphoribosyl-dephospho-CoA transferase MdcG FIGfams, Mistry J anon FIG003211 (Release 2.0) Family MdcG is a phosphoribosyl-dephospho-CoA transferase that is involved in the biosynthesis of the prosthetic group of malonate decarboxylase [1]. Malonate decarboxylase from Klebsiella pneumoniae contains an acyl carrier protein (MdcC) to which a 2'-(5' '-phosphoribosyl)-3'-dephospho-CoA prosthetic group is attached via phosphodiester linkage. MdcG catalyses the following reaction: 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA + apo-[acyl-carrier-protein] = holo-[acyl-carrier-protein] + diphosphate. 26.10 26.10 26.40 45.10 22.10 25.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.39 0.70 -4.80 47 349 2012-10-02 22:47:23 2008-04-28 15:00:13 4 2 315 0 97 355 6 191.60 29 95.30 CHANGED M.....................p.HDLlalpssu....hh.......ssss.shltshh.....tsuhPlVVRRs.s....sssGtlslGlth.....tspcp..RhuhhlssssltcshsP.sLsph.......hst..tshsh.htslps.L.s.s.ssh....shshtlhGSsuap......................hhTGlshlpssSDLDLllps.sssh...phpplhphLt...ppts.hRlDsplphPsG.uVuh+Ehtssss.....pVLlKstcGspLhsps..a ......................................t.HcLlhhts.s....hh...................s.st.taltthh.....ttuhPllVRRs.s....sssstlslGlps.....ttppp..RhAh.hshssltphhsP.sLsph.........tt...tsh.sh..h.slpt..ltshh.pth....shshGlhGShuap......................hsTGlshlpss....SDLDLL.lps.spsh........phtthlphl...tt.h.tphs.hRlDspl.hs.p.G.uhuh+Eahtsst.............plLlKospGspLhtss.................. 0 13 40 66 +10453 PF10621 FpoO zf-Fe2-S2-FpoO; F420H2 dehydrogenase subunit FpoO FIGfam, Mistry J, Coggill P anon FIG062058 (Release 2.0) Family This is the FpoO subunit of F420H2 dehydrogenase, an enzyme which oxidises reduced coenzyme F420. Reduced coenzyme F420 is a universal electron carrier in methanogens. 25.00 25.00 45.10 45.00 20.20 22.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.66 0.71 -4.03 3 11 2009-09-11 06:40:42 2008-04-28 16:08:27 4 1 10 0 9 10 0 103.40 43 94.51 CHANGED MTDCDLCG+AIPTVIPVRVFRPRLKFAYPEGVWKGLCEuCLDSAQKTYLElNKcpsSCR+GKCsLCGcKTpVaPVElQVPDFSKGlVlKcVclChKCL-ulsEoYIRaKKEQIE...CEHGH .............MsDCDLCGhulPTlhPVRVhtPhhchuYPEGVWKGLC-sCL-SApcTY.Eh..scspsus....ppGKCsLCGs+stlasVElplPsFpps...cshplChpCLc.spEsa...h+-.................... 0 1 6 7 +10454 PF10622 Ehbp Energy-converting hydrogenase B subunit P (EhbP) FIGfams, Mistry J anon FIG124174 (Release 2.0) Family Ehb (energy-converting hydrogenase B) is an methanogenic archaeal enzyme that functions in one of the metabolic pathways involved in methanol reduction to methane. This family contains subunit P of Ehb. 19.50 19.50 20.60 118.80 18.70 19.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.70 0.72 -4.03 6 26 2009-01-15 18:05:59 2008-04-28 16:16:51 4 1 26 0 17 23 5 84.10 58 91.35 CHANGED LhP+hhhuLGGYIhET...........pFPaRNlIVsNPTsEPIKIEVPsaD-pWIEcH+cLGLhVVPVs-DDDFluha+MsccKlc+u .LLPKhsMSLGGYIRETh.......t..pFPYRNlIVGNPTsEPIKI-VPsYDEsWIE+H+cLGLIVVPVpc-DDFVGlF+hVcpKlcc.... 1 3 7 13 +10455 PF10623 PilI Plasmid conjugative transfer protein PilI FIGfams, Mistry J anon FIG136100 (Release 2.0) Family The thin pilus of plasmid R64 belongs to the type IV family and is required for liquid matings. pilI is one of 14 genes that have been identified as being involved in biogenesis of the R64 thin pilus [1]. 19.70 19.70 19.80 26.20 19.60 18.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.83 0.72 -4.05 2 72 2009-01-15 18:05:59 2008-04-28 17:04:57 4 1 62 0 0 21 0 80.10 63 92.57 CHANGED Mstp...+hplllhsspCc++l..h.sspD.s.hhh.FpTsDNsLllVhpssRhlhRhERhPGSpCcW+cso.u.+p+.ps.L ......MPQQHPGRLQlLVVDsHCKR+LFSTKTsTDPDELARRFCTPDNCLVVVLpsNRFLFRLERAPGSHCR..W+KGSpSRHQHLQDWL....... 0 0 0 0 +10456 PF10624 TraS Plasmid conjugative transfer entry exclusion protein TraS FIGfams, Mistry J anon FIG105028 (Release 2.0) Family Entry exclusion (Eex) is a process which prevents redundant transfer of DNA between donor cells. TraS is a protein involved in Eex. It blocks redundant conjugative DNA synthesis and transport between donor cells, and it is suggested that TraS interferes with a signalling pathway that is required to trigger DNA transfer [1]. TraS on the recipient cell is known to form an interaction with TraG on the donor cell [1]. 25.00 25.00 42.70 31.60 23.80 23.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.24 0.71 -4.68 2 81 2009-01-15 18:05:59 2008-04-29 14:53:22 4 1 66 0 1 44 0 147.20 75 98.09 CHANGED MKNLApIsLVTVlQFIACYLA-WGsAETshILhFllLWQGLFIWLF.QIRKKpplSDEFKFSKGlWYllMPVsSLLSPLLSLMlFIhGTLYELRRlSGClSl+EWhpsQls-QhsEDhtLDF-sVpac.ssaY..NPuTGhsMHGGhDSAGNTFGopWQD.sDR ...................M+sLsHIsLVTVIQFIACYLAsWGsAETlFhLhFIVLWQGLFIWLFSQI..RKKRsVSDEFKFSKGVWYIhMPVSSLLSPLLSLMVFIIGTLYELRRVSGClSl+EWMQSQVN.EQsNEDLHLDFDslp...sDFYRTNPATGLPMHG.GhDSAGNTFGosWQDYsDR..................... 0 0 0 1 +10457 PF10625 UspB Universal stress protein B (UspB) FIGfams, Mistry J anon FIG002192 (Release 2.0) Family UspB in Escherichia coli is a 14kDa protein which is predicted to be an integral membrane protein. Overexpression of UspB results in cell death in stationary phase, and mutants of uspB are sensitive to ethanol exposure during stationary phase [1]. 25.00 25.00 61.00 60.80 18.10 17.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.47 0.72 -4.27 8 648 2009-01-15 18:05:59 2008-04-29 16:18:06 4 1 646 0 50 149 0 97.70 80 96.31 CHANGED MISGDsILhALhlVTslNhARYlSoLRsLLalMR-ucPLLYQQVDGRGFFTTHGNhoKQlRLapYI+opEYLcHHDPlFstKC-RVRcLFILoSuLlulsllshFhl .MISs.slhaALhlVshlNMARYFSSLRALLVVLRsCDPLLYQYVD.GGGFFTSHGQPNKQVRLVWYIYAQRYRDHHD-EFIRRCERVRRQFILTSALCGLVVVSLIAL..... 0 2 11 30 +10458 PF10626 TraO Conjugative transposon protein TraO FIGfams, Mistry J anon FIG055244 (Release 2.0) Family This is a family of conjugative transposon proteins. 25.00 25.00 26.10 25.90 20.20 22.20 hmmbuild --amino -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.94 0.71 -4.86 11 300 2009-01-15 18:05:59 2008-04-29 16:58:48 4 1 111 0 28 251 2 162.60 48 87.16 CHANGED LPGMKGlplTuGM..sDGh+h.pscs-hGYtFGlAhoTYsKssN+WVhGuEYLpKpaPY+shpIPluQFTuEGGYYashLSDssKshFlslGhSALuGYEoVNhGcpLL.DGusLpcpDuFIYGGAlTLEhEsYLoDRllLLlpsRERsLaGoDsG+FHsQaGlGlKhIl ................LPt.+GlElpuuh..s-Ghp...ststhsYthGhAlosYsKpuNpWlhGuEYlp+phsY.....+s.......hpI..PltQFTuEGGYaaplLSDspKslFlhhGhSALAGYEol.NhGcKlL.DG..uTLtc+DsFlYGGAlTL-lEsYLoDRlsLLhpsRERhLaGuDst+FHsQhGlGlKFhl................................ 0 7 24 28 +10459 PF10627 CsgE Curli assembly protein CsgE FIGfams, Mistry J anon FIG091002 (Release 2.0) Family Curli are a class highly aggregated surface fibres that are part of a complex extracellular matrix. They promote biofilm formation in addition to other activities. CsgE is a non-structural protein involved in curli biogenesis [2].\ CsgE forms an outer membrane complex with the curli assembly proteins CsgG and CsgF [1]. 25.00 25.00 35.50 27.90 24.70 24.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.06 0.72 -4.23 20 509 2009-01-15 18:05:59 2008-04-29 17:28:52 4 1 505 0 44 135 11 103.40 76 77.48 CHANGED ElsGLllDRTloRhG+DFYhhFSsta+-ls..stshNLolcEpssupuGShlsVphspcsIYpTalu.pppslccpAppAlthVhptlsphphpt.....sosDLu..pDEh ........EVPGLLTDHTVSSIGHDFYRA..FSDKWES-Y....TGNLTINERPSARWG.S.WITITVNQDVIFQTFLFPhKRDFEKTVVFALlQTEEALNRRQIsQuLL..STuDLA+DEF............ 0 8 16 31 +10460 PF10628 CotE Outer spore coat protein E (CotE) FIGfams, Mistry J anon FIG006437 (Release 2.0) Family CotE is a morphogenic protein that is required for the assembly of the outer coat of the endospore [1] and spore resistance to lysozyme [2]. CotE also regulates the expression of cotA, cotB, cotC and other genes encoding spore outer coat proteins [1]. The timing of cotE expression has been shown in Bacillus subtilis to affect spore coat morphology but not lysozyme resistance [3]. 25.00 25.00 28.60 78.40 18.10 17.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.22 0.71 -4.91 9 180 2009-09-11 05:35:09 2008-04-30 14:11:34 4 1 178 0 35 94 1 178.80 59 98.44 CHANGED MuE......YREIITKAVVuKGRKFTpuoHTIoPsc+PoSILGCWIINHpYcA.+KsGKsVElpGpYDINVWYSYscNTKT-VVTEpVpYsDllpLpYRDcssl.sD-h-VlA+VlQQPNCLEAsISssGsKIhVpVEREFlVEVIGETKVsVuVNPpspp-D.s....hph-hpD-EhE-lsPsFLtsppEE .....MSEaREIIT.KAVVGKGR.K.aTp..STHTspsssc..P.T.SILGCWlINHpY......EA......+Ks......GKpVEI-GaYDVNsWYSacsNTKTEVVTERVsYsDpVslsYRDc.N.ap.uDDhElIARVIQ.PNCLEAhlSPNGNK..IVVpVEREFlsEVVGETKlCVuV..N..P-..Gss.-.sD-........-...apl..t.D..-EaE-LDPsFlV-s.E............. 0 13 25 27 +10461 PF10629 DUF2475 Protein of unknown function (DUF2475) Coggill P anon SWISS-PROT(UPF0573) Family This family of proteins has no known function. 21.60 21.60 21.80 21.70 21.00 21.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.44 0.72 -3.73 19 290 2009-01-15 18:05:59 2008-04-30 14:19:30 4 7 77 0 189 287 0 56.90 28 33.47 CHANGED sPthlPGYpGasPthtaphGp..TYGpsTtphhps.pstt.tp.h.............h.p..t.ssphhlspR ......thlPG...YsGasPth..+..ap..hGp..TYGpsTtphh..tt......t...................................t..................................................... 0 73 91 134 +10462 PF10630 DUF2476 Protein of unknown function (DUF2476) Coggill P anon SWISS-PROT(UPF0572) Family This is a family of proteins of unknown function.\ \ \ The family is rich in proline residues. 21.70 21.70 21.70 35.10 20.40 21.50 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.88 0.70 -4.51 3 61 2009-01-15 18:05:59 2008-04-30 14:48:06 4 3 21 0 35 65 0 234.80 48 96.78 CHANGED M.GoRPsSPSADLAPhWu.QssGPoPAKRsRL.pEPAsPEsLAQPu.EDPA.....-shTShVhLsAGsALclPL-sVDLlLElpPhSVLpVSLQGHTlI..llPEsLtSSV.-hsht.tcuQPGh.s.s..GAtupD......VslppEoFCAsV.EhhhpE-ss-EDADsEFsEhWMsuPDDpAsGLasSusSl.SPhp-GQVPGPso.ussPuAEppSPRFIW-L-hsMLcPlPuSPLQPLPPSPSPNPQEQs.....LPsRPPC......KARRRL ...................................M.GoRPRSPSAp.sAPhWu.QPuGPu.PAKRhRL..cEP..AssEsh..ssPshEs..Pss..ssssLTS..lVVLuuGCALplsL-.-VDLVLEPtPTSlLpVSLsG..H..TLI..LlPEsLLuSs...tts.spssussGLEss.shLuA..tc-......Vllppt.FCuuVPElAsQE-A..h.-EDA..-spF.p.hhsussGpAuGL..asS..s..p..uh.uP..h.pu.hst.Pps...sssPusEppSPp.sha-L-h+LLEPhPoSPLQPLPPSP...S..PG.Pptcs...........hPt.pP......P...C......KARRRL....................................................................... 0 4 4 15 +10463 PF10631 DUF2477 Protein of unknown function (DUF2477) Coggill P anon SWISS-PROT(UPF0574) Family This is a family of proteins with no known function. The family is rich in proline residues. 20.60 20.60 20.80 87.60 19.70 19.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.15 0.71 -3.96 3 22 2009-01-15 18:05:59 2008-04-30 17:09:40 4 1 20 0 12 26 0 135.90 75 92.45 CHANGED MDPHEMVVKNPYAHISIPRAHLRPDLGQQLEsuPosSSSSEoQPLPVGoCsPEPltLLQsTEAPGPKGsKG.pGsAP.psQQAWQQPCNPYSSGQRPAGLTYAGPPPAGRGDDIAHHCCCCPCCSCCHCPRFCRCHSCCCl MDPpEMVVKNPYAHISIPRAHLRPDLGQQLEsAss..oSSsEhQPLPsGsCssEPT+LLQPT.EsP.GPKG...s...KGspGAsPhQsQQAWQQPGNPYuSuQRPAGLT...YAGhPPlGRGDDIAHHCCCCPCCpCCHCP.FCRCHSCCCl. 0 1 1 1 +10464 PF10632 He_PIG_assoc He_PIG associated, NEW1 domain of bacterial glycohydrolase Naumoff D, Coggill P anon Pfam-B_97991 (release 22.0) Domain The English-language version of the first reference can be found on pages 388-399 of the above. This domain has been named NEW1 but its actual function is not known. It is found on proteins which are bacterial galactosidases [1]. The domain is associated with the He_PIG family, Pfam:PF05345, a putative Ig-containing domain. 25.00 25.00 25.90 25.10 22.70 21.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.56 0.72 -7.11 0.72 -4.62 4 47 2009-09-11 10:36:46 2008-05-06 14:04:35 4 5 32 0 17 47 0 28.80 58 4.97 CHANGED PpINsP+slGNYPuoPFLFYIPTSGpRPM P+INuPpVhGspPuoPFLahIPsoGpRPM 0 11 14 17 +10465 PF10633 NPCBM_assoc NPCBM-assoc; NPCBM-associated, NEW3 domain of alpha-galactosidase Naumoff D, Coggill P anon Pfam-B_97993 (release 22.0) Domain The English-language version of the first reference can be found on pages 388-399 of the above. This domain has been named NEW3 but its actual function is not known. It is found on proteins which are bacterial galactosidases [1]. The domain is associated with the NPCBM family, Pfam:PF08305, a novel putative carbohydrate binding module found at the N-terminus of glycosyl hydrolases. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.62 0.72 -3.93 29 604 2012-10-03 16:25:20 2008-05-06 14:23:52 4 85 329 11 233 617 92 78.10 22 12.53 CHANGED spsGcpsslphshsssu.sssspslplslss.PsGWs...stssssphs...slssGpslpsshpVpsPssAsuGs....Yslpspsphs ..............................sGps.hshp.lsl.p.N.ps..ss......s..h..p..s...h..sls.h......s......s...P.sG.......Ws.........s....s..s...s....p.hs......sl.s............s..............G..........p.s.........ts....sshslss..Psss...sGs......Y..lshpst..s............................ 0 110 180 223 +10466 PF10634 Iron_transport Fe2+ transport protein FIGfams, Mistry J anon FIG005173 (Release 2.0) Family This is a bacterial family of periplasmic proteins that are thought to function in high-affinity Fe2+ transport. 20.50 20.50 20.50 21.20 20.10 19.70 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.86 0.71 -4.32 30 560 2009-01-15 18:05:59 2008-05-06 17:09:52 4 1 550 30 101 300 159 153.60 53 80.44 CHANGED AtAtEhPIGc...Ptph..sGMEluAVYLQPl-MEPp.Gh.....hssscoDlHLEADIHAsc..sNsNGFucG.-.WlPYLslsYpLpchsssc.spcGshMP.MVAsDGPHYGsNl.KL.......tGsGpY+lpapIpPPup......ttFGRHsDKETGV.........us..WFcPaslpY.-FpasG ...............................................................htEhPIGc.......Ppph..ssMEIAAVYLQPI-MEPc..Gh.....thsAucuDlHLEADIHAsc..sN.sGFu-G-..WhPYLs.lsYcLpppDoG......p.....h......pcGohMP.MV.A.s.DGPHYGsNl.Kh........GsGpYcLsahIpsPpp......tuatRHlDcET.G.V...........Gt....Wac..P..hslcY.-FpYsG................................. 0 26 56 85 +10467 PF10635 DisA-linker DisA bacterial checkpoint controller linker region Murzin A, Coggill P anon Murzin A Domain The DisA protein is a bacterial checkpoint protein that dimerises into an octameric complex. The protein consists of three distinct domains. the first, N-terminal region, from 1-145 is globular and is represented by family DisA_N, Pfam:PF02457; the next 146-289 residues is this domain that consists of an elongated bundle of three alpha helices (alpha-6, alpha-10, and alpha-11), one side of which carries an additional three helices (alpha7-9), thus forming a spine like-linker between domains 1 and 3. The C-terminal residues of domain 3 are family HHH, Pfam:PF00633, the specific DNA-binding domain. The octameric complex thus has structurally linked nucleotide-binding and DNA-binding HhH domains and the nucleotide-binding domains are bound to a cyclic di-adenosine phosphate such that DisA is a specific di-adenylate cyclase. The di-adenylate cyclase activity is strongly suppressed by binding to branched DNA, but not to duplex or single-stranded DNA, suggesting a role for DisA as a monitor of the presence of stalled replication forks or recombination intermediates via DNA structure-modulated c-di-AMP synthesis [1]. 25.00 25.00 26.20 74.70 23.60 24.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.81 0.71 -4.70 39 608 2009-01-15 18:05:59 2008-05-06 17:13:32 4 5 605 8 154 378 117 145.50 42 40.47 CHANGED LcDsusILs+ANQAlpTLEKY+ssLDcslssLssLEhEDlVTltDVspslQRhEMVhRIspEIcpYllELGsEGRLlshQLcELlss.l-p-thLlI+DYs.....pcshshpplhcplpslssp-Ll-hsslu+lLGYssssshlDs.l ....Lc-susILo+ANQAltTLE+Y+shLc-shssLotLEhEDhVTlpDVssVlQRlEMVhRIssEIptYlhELGsEGRLlpLQLsELlss.l-s-ttLll+DYh..................ppstshppsLppLppLoss-LL-hstlu.+hlGYssssph.Dp............. 0 67 124 144 +10468 PF10636 hemP Hemin uptake protein hemP FIGfams, Mistry J anon FIG024330 (Release 2.0) Family This is a bacterial family of proteins that are involved in the uptake of the iron source hemin [1]. 25.00 25.00 30.90 30.40 24.10 16.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.53 0.72 -4.36 56 837 2009-01-15 18:05:59 2008-05-06 17:34:36 4 1 806 3 146 406 26 38.00 57 57.52 CHANGED stlsocsLhp.GspplhIpHsGphYpLRlT+sGKLILTK .......RplsSpsLLG.scu+llI-HsGQcYh..LRpTpAGKLlLTK... 0 25 79 112 +10469 PF10637 Ofd1_CTDD Oxoglutarate and iron-dependent oxygenase degradation C-term Wood V, Coggill P anon Pfam-B_18095 (release 22.0) Domain Ofd1 is a prolyl 4-hydroxylase-like 2-oxoglutarate-Fe(II) dioxygenase that accelerates the degradation of Sre1N in the presence of oxygen. The domain is conserved from yeasts to humans. Yeast Sre1 is the orthologue of mammalian sterol regulatory element binding protein (SREBP), and it responds to changes in oxygen-dependent sterol synthesis as an indirect measure of oxygen availability. However, unlike the prolyl 4-hydroxylases that regulate mammalian hypoxia-inducible factor, Ofd1 uses multiple domains to regulate Sre1N degradation by oxygen; the Ofd1 N-terminal dioxygenase domain is required for oxygen sensing and this Ofd1 C-terminal domain accelerates Sre1N degradation in yeasts [1]. 20.70 20.70 21.50 20.70 20.30 20.60 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.94 0.70 -5.26 39 253 2012-10-10 13:59:34 2008-05-07 10:58:38 4 9 224 4 187 298 18 273.60 28 46.78 CHANGED s-lsaLhpYlsPpYLss-sh-clpcpFt-cSslpLccFLpschuspl+phlctp-pc...phPtpup-lpt..s...................WpsAtPPHKpRYhahcsps........t..pt..t...pEh.................................Lsslh.S.uF+KWLshlTul.pl..................................................sSpp..llsRRFR.G.DYTLAostc..........ttc.....stLEsoLsLT..Po..................................................sW-ss...............................................................EhGGYElYM...................ss-----...............................tDsAlY+u...................................................................s-pDDulLhopssuWNsLslVLRDpGlL+FVKYVStsAKGsRWDlospasVc..-p-.p-cpp ..............................................................................t.-hphLhcalsPsYLsschhpplpcpFp-p..SplpLppFLppchhpplpphlcpp-hp.....................................................WpsttPPpK++Yhhhptpp............................h.p.................................................h...h.hpLhpSpuFhphLu.hTuL.pl..................................................................................................................ssp.p...h.hRRa...+pG..cYTLssstp............................ttcstL-.hsLsls..ss..................................................uWps........................................................................................................-hGGaphYh....................spt.tt.......................................................t.th.pt..............................................................................................scs-Ds.Lhohsss.NpLslVhR.D.pusL+FVKYluppu.s....sp............................................................................ 0 65 99 153 +10470 PF10638 Sfi1_C Spindle body associated protein C-terminus Wood V, Coggill P anon Wood V Domain This C-terminal domain of spindle-body-associated protein Sfi1 has an important role to play in the bridge-splitting during bi-polar spindle assembly, and this separation event possibly requires interaction with integral components of the nuclear envelope, such as the Mps2-Bbp1 complex [1]. Centrally to this domain is a region carrying centrin-binding repeats with repeating units containing tryptophan, family Sfi1_central, Pfam:PF08457. 20.80 20.80 22.60 63.20 18.80 17.10 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.54 0.72 -3.62 5 21 2009-01-15 18:05:59 2008-05-07 13:35:04 4 2 21 0 13 21 0 108.00 46 11.30 CHANGED +shslsc-LpTPhKoPs.ttusTIPGSERVK+a+MEslKs.RYSRARRA..IPSPIKSSsVLDSTlK++Ls.........ssstls-csTTststph.lccpsK+luSKV+cIcFs+IPp .......hshpp-LcTPlR..oss..psusTIPGSERlKpaRMEAMKS.HYSRARRA..IPSPlKSSSlLDSTAKKpIs.........hcss.sslssssopshPhh...c+ps+NhsSKlc+IcFsRIPt.............................. 0 1 6 12 +10471 PF10639 UPF0546 Uncharacterised protein family UPF0546 Coggill PC anon UPF0546 Family This family of proteins has no known function. Many members are annotated as potential transmembrane proteins. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.52 0.71 -4.26 16 235 2012-10-02 19:55:49 2008-05-08 09:31:42 4 5 195 0 151 384 13 111.00 33 75.97 CHANGED LLlVullWGsTNPhl++uStGhppspps.......hhh-h+a.........LhpphcYhlPhllNQoGSVlFahsLscs-LSluVPlsNSLsFlhTllsu.hlLtEchhst+ohlGhhLVlsGssLCs ........................................................................hlhVuhhW.G.hT.sP..hl.++uu....ts.hpthpp...........h...p..hhtchth...........................................Lhhs.pYhl.PhllN......sG.S.lh.a.a..h..h...Lup.s............-LSLuVPlsN..SLuF.lFTllsu.hhltccl.h.u.p.p.s.hhGhhLhlhGlslC................... 0 44 77 117 +10472 PF10640 Pox_ATPase-GT mRNA capping enzyme N-terminal, ATPase and guanylyltransferase Coggill P anon Karlin D Domain This domain is the N-terminus of the large subunit viral mRNA capping enzyme, and carries both the ATPase and the guanylyltransferase activities of the enzyme. The guanylyltransferase enzymatic region runs from residues 242 (leucine)-273(arginine) [1], the core of the acitve site being the lysine residue at 260 [2]. The ATPase activity is at the very N-terminal part of the domain [3]. 25.00 25.00 324.10 324.10 20.70 20.00 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.90 0.70 -5.64 14 75 2009-09-10 16:44:40 2008-05-09 14:14:22 4 1 47 0 0 67 0 314.60 67 37.27 CHANGED hspssssc-lpHEVELlalpPPLlTLoNlh..sluoppESYIhFolss.tccpsKlRs+lPhS+lHGLDlKNsQLV-slDsIlWE+KoLlpEpc..l.c.ptsllRaSTEE+alFlDY.....K+ahSuI+LELVNllps+lKsll.VDFKlKYFLGSGAQuK..SSLLasLNH...PKs+PssoLEFEIlsp.s....pplspstLhsELpslh+tlF.MusscslhL.s.sthcsPl+ThMLKKQ-l.sl-L-sLYlToKTDGVhshVplsppulaCaFoHLsYhI+Yshp+pl-splhL..aGEAlK..tsphhslalIKlhpPt.........lsDRlcEtcalp-pL .l-.QsSTAYEINNELELVFIKPPLITLTNVV..NISTIQESFIRFTVTN..KEGVKIRTKIPLSKVHGLDVKNVQLVDAIDNIVWEKKSLVTEsR..L.H.KECLLRLSTEERHIFLDY.....KKYGSSIRLELVNLIQAKTKNFT.IDFKLKYFLGSGAQSK..SSLLHAINH...PKSRPNTSLEIEFTPR-s....EpVPYDELIKELTTLuRHIF.MASPENVIL.S.PPINsPIKTFMLPKQDIVGLDLENLYAVTKTDGIPlTIRVTScGlYCYFTHLGYIIRYPlKRhIDo.EVVl..FGEAVK...DKsWTVYLIKLIEPV.........IsDRLEEScYVESKL. 0 0 0 0 +10474 PF10642 Tom5 Mitochondrial import receptor subunit or translocase Wood V, Coggill P anon Pfam-B_89651 (release 22.0) Family This protein family is very short and is only found in yeasts. Tom5 is one of three very small translocases of the mitochondrial outer membrane. Tom5 links mitochondrial preprotein receptors to the general import pore [1]. Although Tom5 has allegedly been identified in vertebrates this could not be confirmed. 20.20 20.20 21.40 21.40 19.70 19.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.20 0.72 -4.32 11 73 2009-01-15 18:05:59 2008-05-09 16:07:55 4 1 72 0 57 61 0 48.30 39 89.28 CHANGED MFGs.stsQ.ScEEl+tpEcpAstTlppAshuAshLa..lSPhlhchl..pK..h ...MFGs.s..p.ScEEl+AtEtpAstTlppssssushLY......LSPhslchVp+..h..... 1 12 29 49 +10475 PF10643 Cytochrome-c551 Photosystem P840 reaction-centre cytochrome c-551 FIGFam, Mistry J, Coggill P anon FIG055090 (Release 2.0) Family A photosynthetic reaction-centre complex is found in certain green sulphur bacteria such as Chlorobium vibrioforme which are anaerobic photo-auto-trophic organisms. The primary electron donor is P840, a probable B-Chl a dimer, and the primary electron acceptor is a B-Chl monomer. Also on the donor side c-type cytochromes are known to function as electron donors to photo-oxidised P840. This family is thus the secondary endogenous donor of the photosynthetic reaction-centre complex and is a membrane-bound cytochrome containing a single haem group. 26.30 26.30 26.50 26.60 26.00 25.70 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.61 0.70 -5.06 7 19 2009-01-15 18:05:59 2008-05-09 16:46:09 4 4 15 2 16 20 1 172.00 39 84.60 CHANGED MDNKSNGKLIALAlGGAVLMGALFFGVSFLTGYplPAENlSslLTPL+SFhGWFLLIFhASLIIhGLGKMSS+ISDKWFLSFPLoIhsIVAlMFs.L...W.+.........GRTTTlDGcYIRoVspLcAFL...................scPAsussVPsA.............PA.......................GhDFsAAKcLhss+CNKCHolsSVtDtL+.KY+KpGps-hlVhcMpuhPsSGIoccDsssIh.alsE...KY ...............................D.ppphpLhulAhhGAhhMGsLhahlShLTGa..PA.NhS.hLsPLpSFhGWhhLIhhuSlhIhshG+MSutISspWFlShPlshhhIVhlMFh.L...W.+.........G.RTTh.-Gp.IRoVtpLpta....................ppsshstphs.s..st................................s.shsuAcpLhst+CNc.CHTlcoVt-th+.+YcKpGpl-hlVp+MpshPsSsIoccDshpIh.YLpppa......... 0 1 7 13 +10476 PF10644 Misat_Tub_SegII Misat_Myo_SegII; Misato Segment II tubulin-like domain Wood V, Coggill P anon Pfam-B_7826 (release 21.0) Domain The misato protein contains three distinct, conserved domains, segments I, II and III. Segments I and III are common to Tubulins Pfam:PF00091, but segment II aligns with myosin heavy chain sequences from D. melanogaster (PIR C35815), rabbit (SP P04460), and human (PIR S12458). Segment II of misato is a major contributor to its greater length compared with the various tubulins. The most significant sequence similarities to this 54-amino acid region are from a motif found in the heavy chains of myosins from different organisms. A comparison of segment II with the vertebrate myosin heavy chains reveals that it is homologous to a myosin peptide in the hinge region linking the S2 and LMM domains. Segment II also contains heptad repeats which are characteristic of the myosin tail alpha-helical coiled-coils [1]. This myosin-like homology may be due only to the fact that both myosin and Misato carry coiled-coils, which appear similar but are not necessarily homologous (Wood V, personal communication). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.64 0.71 -4.03 51 270 2012-10-03 12:11:42 2008-05-09 17:02:44 4 8 225 0 187 2002 14 111.20 33 21.93 CHANGED +EIlTlQlGpauNalGoHFWNhQEuha..sYsspsp.......stl.spDVlaRp...Ghs..h.psphTaTPRlllhDLKGuhGoLpptut.LYp......................ptsp.tsssssWs.......s...phshpc.ps...htpspapps.L-p .................+EllTlQlGph.uNal.usHaW.N.......h........Q.....-uhh.............shssppp..................s.l...sp.D..V.haRs.........Ghs......h.pGptTYTPRhllhDLK.G.u.h.G...o...Lpp...ss...LYp..........................tpt..sss.hhWp.............u...phshpp..p..hsps.Y..psLt.t.................................................................................. 0 53 98 150 +10477 PF10645 Carb_bind Carbohydrate binding Wood V, Mistry J anon Pfam-B_63360 (release 22.0) Domain This is a carbohydrate binding domain which has been shown in Schizosaccharomyces pombe to be required for septum localisation [1]. 25.00 25.00 26.80 39.00 21.60 21.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.09 0.72 -4.15 5 22 2009-01-15 18:05:59 2008-05-12 12:10:23 4 4 14 0 20 21 0 52.20 46 19.55 CHANGED uoCGuApYDuupYVC.DsshLCPIlsGsPLpsCNGACYssShYuCoNGuLu.l t.pCGsu.YDPupYlC....ssphLCPIssG.shphCsGACYsthhYsCsNssLs..... 0 8 15 19 +10478 PF10646 Germane GerMN; Sporulation and spore germination Rigden D, Coggill P anon Rigden D Domain The GerMN domain is a region of approximately 100 residues that is found, duplicated, in the Bacillus GerM protein and is implicated in both sporulation and spore germination. The domain is found in a number of different bacterial species both alone and in association with other domains such as Amidase_3 Pfam:PF01520, Gmad1 and Gmad2. It is predicted to have a novel alpha-beta fold. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.35 0.71 -10.70 0.71 -3.93 160 1597 2009-01-15 18:05:59 2008-05-12 17:01:59 4 13 985 0 421 1297 99 116.80 19 37.80 CHANGED hslYa........hs...sssth..........Llspsphh...............shhptslppLl.pG.P.............hhsslPssspl...........h.slplp.....s.hshlshop.phhp....ssss.cph....h....ltsllhTLs.phs.t......lppVpl.hl-Gcshphht ..................................................................................h.plYa........hs.......pss.ph.............llspsptl.................sssps.hhptslptLl.pG....Ptst..............htssls..ss.spl............................h..shslp........ts.hspls.hsp..phhp.......hssp.tpph........h.ltpllh.TLs.phs.s.........lppVpl.hlsGp.h....h....................................... 0 178 338 387 +10479 PF10647 Gmad1 Lipoprotein LpqB beta-propeller domain Coggill P anon Rigden D Domain The Gmad1 domain is found associated with the GerMN family, Pfam:PF10646, in bacterial spore formation. It is predicted to have a beta-propeller fold and to have a passive binding role rather than a catalytic function owing to the low number of conserved hydrophilic residues. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.71 0.70 -4.99 26 420 2012-10-05 17:30:43 2008-05-12 17:22:56 4 16 404 0 114 623 108 247.70 24 43.11 CHANGED sGul................sp....hsuspssslsGshu..pst.shsSsAlStsuppsA.............uVs.......sssssLhlGshGussspsh....sup.....slocPSashs.sslWsVss.usssscllh......sss......Gpss........................stV-ssshs..........slsslplSRDGsRsAhll....s..uplhluslp+sssG...thtlssPhclsss.tpsssuluWtssssllVhspsssssV.h...VslDGutsssh.susshssslhusuussst.....lhsssssulhp.....spssphWpplsshhss....PshP ......................................................................................................Gthht......hss.tphp.h.s.GshG......pss.s.susAlut..s..s..p.....hsA................................uVs.............sstps..Lh.s..s.s.h...G..u.p.s.hpsh...............sGt......sL.s.+.P..oa.......s......h.......s...ss.........lW..sss-..sssslplht.......................ss..............Gpht...............................................phsV-.h.suhst.............tlsslpl....S.DGsRsAhll.......s....sp..l.hl...uhlp+spsu.......phhhshsh.phu..s....hs...slssluWhss.s.p..Ls.Vh..s..ps..s.ss..sVth.......lsl.DGus.......sss....ut...hsss.hsulsusssp.............lhls...s..stsll............tpts.tWppl.thhss.....s........................................................................................................................................ 0 37 82 109 +10480 PF10648 Gmad2 Immunoglobulin-like domain of bacterial spore germination Coggill P anon Rigden D Domain This domain is found linked to the GerMN domain Pfam:PF10646 in some bacterial proteins. It is predicted to contain an immunoglobulin-like all-beta fold. 25.70 25.70 26.30 27.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.78 0.72 -3.81 20 66 2009-01-15 18:05:59 2008-05-13 09:15:26 4 10 62 0 37 66 15 87.70 28 26.99 CHANGED hplplppPpssshlsu......slpVpGp..Ap.sFEuslshclp-usupll.tcthspAssussuhGpFpsslshsss..ss.pupltVhpssspsGs...h .................IhlhsPt.sshVsu......shpVpGp....Ap...sFEuslsh+l...hDusGpll.scshsp..As.su..usshGsFpsplshps.t.st..puhl..plaphSscDGo.h.................. 0 22 31 36 +10481 PF10649 DUF2478 Protein of unknown function (DUF2478) FIGfam, Mistry J, Coggill P anon FIG046046 (Release 2.0) Family This is a family of hypothetical bacterial proteins found in the vicinity of Molybdenum ABC transporter ATP-binding gene-products MobA MobB and MobC. However the function could not be confirmed. This family appears to belong to the P-loop superfamily by alignment to Pfam:PF03266. However, the characteristic P-loop sequence motif appears to have diverged beyond recognition in this family. 22.10 22.10 22.30 22.10 21.80 22.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.80 0.71 -4.90 57 205 2012-10-05 12:31:09 2008-05-13 16:49:41 4 3 152 0 78 206 9 157.00 37 79.33 CHANGED lAAlshsss..sss.DtlLsshAtcLtup.GhRluGlV............Qtp.....tptssppsshp.lpsL.ssGppltIoQsLGsuupuCRLDsuuLspAsutlppAlsps..sD........LlllN+FGKpEupG+GhpshIspAlutslPVLsuVsppthcsWppFuuuhustLss-ppultsW .....................lAAlhh.s.ps.sss.DsLLuphAtchppp.GlRluGhl........................Qpp.h...tssppspp.chc.lccl.soGt.p..hhI......S......Q...s....LGs..GS.....p..G..C..R..L.....DPuu....LA.c.A....u.u.sltsslpsu...sp........LLllN+FGKtEs-G+GhR.shItt.Ahut.sIPVLTsVsp.phl-u.W.c.cFuGshushLsssppulpsW.................................... 0 13 38 55 +10482 PF10650 zf-C3H1 Putative zinc-finger domain Wood V, Coggill P anon Wood V Domain This domain is conserved in fungi and might be a zinc-finger domain as it contains three conserved Cs and an H in the C-x8-C-x5-C-x3-H conformation typical of a zinc-finger. 20.90 20.90 20.90 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.40 0.72 -6.75 0.72 -4.45 14 159 2009-01-15 18:05:59 2008-05-14 12:01:54 4 9 125 0 107 155 0 22.10 48 1.64 CHANGED hlC.aElsGthCNDcsCpaQHh+ ...hC.a-LsG.sCNDc-CpaQHh... 0 31 47 76 +10483 PF10651 DUF2479 Domain of unknown function (DUF2479) FIGfam, Mistry J, Coggill P anon FIG015005 (Release 2.0) Domain This domain is found in phage from a number of different bacteria. It is purported to be a putative long tail fibre (Bacteriophage A118) protein, but this could not be confirmed. 21.10 21.10 21.20 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.66 0.71 -4.74 17 665 2009-01-15 18:05:59 2008-05-14 14:45:55 4 14 402 19 25 471 10 166.10 25 34.80 CHANGED M....lpKhss.l-hphssphpshhstsIpFaspDp.sTAhlpFpls+cshPLslSptpscstlhLhhsss........hhhtslclhDsh..pGhlpYslPschlKp..sGpVpuplalp.p......sspslshtp.FoFpIccuLlsphss.ht.hYIcpF-clcchlpcphpcl..........cpslspspshhpp ...................................M......K..t.lphp.psph.sl.s....hsI.pF.h.ptDp.sou.sLphs.l.s.c....ss........t........s............lsLosp.tlps.clslhtcss.......t.hhsc...s...l.p...IlDsh..cGhlpYhlPschlcp..sGplcAplh.lpp.........ss...ps....l...s.s....pp.FoFslp..cuh.lsshsutph.sY.l...hsh.....pcl.....hchlp....cphpph.............cpshpphpp....p...................................................................... 1 9 17 25 +10484 PF10652 DUF2480 Protein of unknown function (DUF2480) FIGfam, Mistry J, Coggill P anon FIG020045 (Release 2.0) Family All the members of this family are uncharacterised proteins, but the environment in which they are found on the bacterial genome suggests a function as a glucose-6-phosphate isomerase (EC 5.3.1.9). This could not, however, be confirmed. 25.00 25.00 114.20 114.10 24.20 19.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.00 0.71 -4.68 18 81 2009-01-15 18:05:59 2008-05-14 18:13:37 4 1 75 0 40 88 197 167.00 47 98.13 CHANGED M..t-...EIlNRVANSpLhTFDLEDaYPcGpRhhlDIppWLh-GllL+EK-FRptlKsHDWSpYpcpaVAlaCSTDAIlPuWAYMLlsspLpPaApKVVhGsL-sLEohlYp-llsplDlssacs+PVIIKGCScKPVPpsAYlhlhpKLQPVAKSlMYGEACSoVPLYK+ ........-EI.lN+VA.pSsLlsFDLE-aYPpG.cR.h.hhDIcsaLapGhlL+EK-FRptlKpaDWopYpspaVAlhCSsDAIlPsWAYMLlsspLpPaA+cllhGsh-pLEptLapchlsplDhopapDc.VllKGCoccsVPpsAYlhhsp+L.pPlsKSlMaGEsCSoVPlaK+......... 0 17 36 40 +10485 PF10653 Phage-A118_gp45 Protein gp45 of Bacteriophage A118 FIGfam, Mistry J, Coggill P anon FIG0160151 (Release 2.0) Family This domain is found in bacteriophage and is thought to have a gp45 function within the phage tail-fibre system. 21.60 21.60 21.60 79.80 19.40 18.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.05 0.72 -4.11 2 34 2009-01-15 18:05:59 2008-05-16 10:25:53 4 1 26 0 1 17 0 60.50 87 100.00 CHANGED MsERVFRKpT.FGsSEIahssRTKMIANPAFpQKIPL.ETGC-pMsDYIEELKLKGYEEVTR MTERVFRKpT.FGsSEIahssRTKMIANPAFpQKIPL.ETGC-pMsDYIEELKLKGYEEVTR 0 1 1 1 +10486 PF10654 DUF2481 DUF2482; Protein of unknown function (DUF2481) FIGfam, Mistry J, Coggill P anon FIG020094 (Release 2.0) Family This is a hypothetical protein family homologous to Lmo2305 in Bacteriophage A118 systems. 25.10 25.10 25.90 83.80 25.00 25.00 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.61 0.71 -4.44 4 25 2009-01-15 18:05:59 2008-05-16 11:19:24 4 1 20 0 1 17 0 126.00 65 98.31 CHANGED sVMElTENKARQREIISYlsNpsLshs-hKcLQKELNpLMNcNTEEKpKTaWsKThcRlVtNKpWp-ITltEFl-LRHAGLos.AIADaFplS+uslFNaT..pcN+pEYa+hFshs.YpKsKEhWsD ..sVMEhTcsKARQREIISYIsNNDlpLs-LhcLQKELNpLMNENT.EKQKTYWoKTFDRIV++KcWsEITIpEFs-LRNAGLTsYAIA-HFKVSKulVFNYT..QRNKKEYYplFDMsEYQKNKEhWND...... 0 1 1 1 +10487 PF10655 DUF2482 Hypothetical protein of unknown function (DUF2482) FIGfam, Mistry J, Coggill P anon FIG030041 (Release 2.0) Family All the members of this very small, very short family are derived from bacteriophages, of the SA bacteriophages 11, Mu50B, system, and from the Staphylococcal_phi-Mu50B-like_prophages subsystem. All members are hypothetical proteins. 25.00 25.00 45.10 45.00 23.50 23.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.21 0.72 -3.95 4 148 2009-01-15 18:05:59 2008-05-16 11:42:32 4 1 106 0 1 35 0 98.00 70 98.61 CHANGED MTKNYKDMTQ-ElRDLLuEKsGELYELspEIccETEFDlLLFSolGVssGDhluSSpsALGsshsLAsLLcNpssacDlsNVIpMpKLQKhLGlDDsKED ..MTKNYKDMTQEElKDLLSEKoGELYELAKEIKtEocFDILLFSoIGVhDGDaluu.SsSVIGcsFDLAsLLDsscuY+DIlNVlQMpK.QKhLGIDDsKED...... 0 0 0 1 +10488 PF10656 DUF2483 Hypothetical protein of unknown function (DUF2483) FIGfam, Mistry J, Coggill P anon FIG032091 (Release 2.0) Family This is a family of proteins found in bacteriophage particularly of the SA bacteriophages 11, Mu50B, family, homologous to phi-ETA orf16. 25.00 25.00 41.10 41.00 19.40 19.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.42 0.72 -3.90 7 131 2009-09-11 01:17:25 2008-05-16 13:49:18 4 1 122 0 4 32 0 71.50 78 98.11 CHANGED M.KpTV...TYlIKhKDt..sLYITN+PTstpsT.pYSss+pcAREFsGh--ssIDMstHpAIKKTVTETpEYEEVth ...M.KQTV...TYIIRHRDM..PIYITNKPTDNNSDISYSTNRNRAREFNGMEEASINMDYHKAIKKTVTETIEYEEVEH.......... 0 1 1 4 +10489 PF10657 RC-P840_PscD Photosystem P840 reaction centre protein PscD FIGfam, Mistry J, Coggill P anon FIG031038 (Release 2.0) Family The photosynthetic reaction centres (RCs) of aerotolerant organisms contain a heterodimeric core, built up of two strongly homologous polypeptides each of which contributes five transmembrane peptide helices to hold a pseudo-symmetric double set of redox components. Two molecules of PscD are housed within a subunit. PscD may be involved in stabilising the PscB component since it is found to co-precipitate with FMO (Fenna-Mathews-Olson BChl a-protein) and PscB. It may also be involved in the interaction with ferredoxin [1]. 23.40 23.40 23.60 117.30 22.40 23.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.77 0.71 -4.35 8 13 2009-01-15 18:05:59 2008-05-16 14:15:59 4 1 13 0 11 13 0 142.40 66 98.98 CHANGED MQsQLSRP.TusNQVRsSs.................SGPWSGNAAHKAEKYFITSAKRD+pGpLQlpISPASGRRKLSPTcEMIsKlIuGEIELaVLTTQPDIuIDLsQKVLDNENRYVIDFDKRGVKWTMRDIPVFYcSL+ppLCVEIDRpoYTLDEFFK ...MQsQLSRP.TusNQVRtSs.................SGPW.SGNAsHKAEKYFITSAKRD+ps+LQlpIsPASGRRKLSPTpEMIsKlIsGEIELaVLTTQPDIAIsLppKVLDNENRYVIDFDKRGVKWTMRDIPVFYsohp+pLCVEIDR+sYTLsEFFK 0 1 2 8 +10490 PF10658 DUF2484 Protein of unknown function (DUF2484) FIGfam, Mistry J, Coggill P anon FIG030013 (Release 2.0) Family A role of this family in UDP-N-acetylenolpyruvoylglucosamine reductase, as MurB, could not be confirmed. 25.00 25.00 25.80 52.30 23.30 19.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.78 0.72 -3.86 23 57 2009-01-15 18:05:59 2008-05-16 17:16:29 4 1 43 0 11 57 5 75.80 44 91.47 CHANGED Ms....luLhLuslWslsAsllAhLPSRppHWptAal..LlAlGlPLLGaVsapsGPhhGlhsLsAGhSlLRWPVhYlh+hlR ......MshSLlLAslWslsAsllAhLPuR.pHWptAhl..LIusGlPLlGaVsappGPahGlhsLhAGhShLRWPlhYLh+hlR.. 0 1 8 9 +10491 PF10659 Trypan_glycop_C Trypanosome variant surface glycoprotein C-terminal domain Bateman A anon Pfam-B_1351 (release 3.0) Domain The trypanosome parasite expresses these proteins to evade the immune response. 21.40 4.60 21.60 4.70 21.30 4.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -11.30 0.72 -3.70 103 238 2009-01-15 18:05:59 2008-05-17 14:12:18 4 6 9 2 67 241 1 95.60 29 21.67 CHANGED C.tt.tpspspCp.....p.ssCpas..ppppcsp..Cchppsp....tppps.tsstttt.........stppsp+C.ttcp.cscCtp.s...........CKW..-sppC..KDSShLlsK+hAL..huuAFlAL........LF ..............................................C..pthpppppCp...pttCpap..tptpptc....Cp.pppptppps.sssststs........spsssppCps+p..cscCcp.s.....................CpW...........-tppC......+-SShLlsKpFAL..huAAFsuL.L............................. 0 0 67 67 +10492 PF10660 MitoNEET_N Iron-containing outer mitochondrial membrane protein N-terminus Coggill P anon Wiley S Domain MitoNEET_N is the N-terminal region of the MitoNEET and Miner-type proteins that carry a zf-CDGSH, Pfam:PF09360, redox-active 2Fe-2S cluster. The whole protein regulates oxidative capacity. The domain is an anchor sequence that tethers the protein to the outer membrane. 21.30 21.30 21.30 26.70 20.60 17.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.95 0.72 -4.12 9 136 2009-01-15 18:05:59 2008-05-19 13:08:18 4 2 85 3 88 127 1 53.10 38 42.79 CHANGED M..pslSpllps.hPthLttlPlPcohtshh+Lshp-WLsLlshsussAulGYlsY+sFhsKc+pp ..................hp..hs.hhtthPhPpohtsahp.LshpEWltLl.PhhuslAslGYLAh+sFhsKc+p............... 0 20 25 48 +10493 PF10661 EssA WXG100 protein secretion system (Wss), protein EssA FIGfam, Mistry J, Coggill P, Desvaux M anon FIG043089 (Release 2.0) Family The WXG100 protein secretion system (Wss) is responsible for the secretion of WXG100 proteins (Pfam:PF06013) such as ESAT-6 and CFP-10 in Mycobacterium tuberculosis or EsxA and EsxB in Staphylococcus aureus. In S. aureus, the Wss seems to be encoded by a locus of eight CDS, called ess (eSAT-6 secretion system). This locus encodes, amongst several other proteins, EssA, a protein predicted to possess one transmembrane domain. Due to its predicted membrane location and its absolute requirement for WXG100 protein secretion, it has been speculated that EssA could form a secretion apparatus in conjunction with the polytopic membrane protein EsaA, YukC (Pfam:PF10140) and YukAB, which is a membrane-bound ATPase containing Ftsk/SpoIIIE domains (Pfam:PF01580) called EssC in S. aureus and Snm1/Snm2 in Mycobacterium tuberculosis. Proteins homologous to EssA, YukC, EsaA and YukD seem absent from mycobacteria [1]. 21.00 21.00 21.80 21.60 20.90 20.30 hmmbuild --amino -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.95 0.71 -4.52 4 267 2009-01-15 18:05:59 2008-05-19 16:20:43 4 1 265 0 8 83 1 130.50 47 86.05 CHANGED ItuuADSYLpcsGKhchKlDRlpcocpEKNc..cthcETELDKsuIpLFssEh-cclpcKppsEpc-h-clcsuLF.cphcss.sVK-TKcpLFSu-Y.ssu..schApo....EspTcsshS.sllhhhuGsllhlCsGlYslhR+lac ................tshh.slssp............................pcE...Ec+hss-Lsp.YDTTLFNKD.sKtVN-.s..htcpK.c-.pQpIKNchFQNQ.A.S+u..T.RL..s...ET.KKVLFSK...o.NlpKo...oESDKS..PYI..QNKQEKpIaP.Y.ILhSlGA.hLTLGhlI.........FsIa+t..................................... 2 2 4 7 +10494 PF10662 PduV-EutP Ethanolamine utilisation - propanediol utilisation FIGfam, Mistry J, Coggill P anon FIG047026 (Release 2.0) Domain Members of this family function in ethanolamine and propanediol degradation pathways, however the exact roles of these proteins is poorly understood [1-3]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.59 0.71 -4.70 20 1157 2012-10-05 12:31:09 2008-05-19 16:22:45 4 8 759 0 91 4581 2185 127.50 43 87.54 CHANGED MK+lhLlGtoGsGKTTLsQsLpG.....c-L.+Y+KTQulcapsss.IDTPGEYlEsRpaYsALhsous-ADlIuLlhsAsp.hssFsPGFuuhFsKPVIGIlTKlDLu.sppplphscphLppAGApcI.FclSulsspGl-ELhsYLpp ..........................................................................MKRlh.h..lGsstsGKTT.Lh.pu.L.pG....................p...h........hh.........+......K............T........Q...........A..........l...............E.............a.............s.............s.............p...........s.........I...............D........T.......P...G..........E........Y.........h........p........p..................p........h.......Y....p.....A........L........l.......s......o........h........p.........-.....s.........D......h.....l....h..h.V.h...s........A........s........s........................t.......o.....h.......h...s.......s.....G.......h......h.........s.......h.......h........s........+..........h.....I............u......l......l...o...KsD.h...s...............s.....t...c......l.....u....h....s.....+.....p...h.....L........p.......c........s........G......h........c........c........I........F.........l..suhsspuV.ppLhcaLt.s.......................................................................................................... 0 46 65 79 +10496 PF10664 NdhM DUF2485; NADHqo1-M; NADHdh-M; Cyanobacterial and plastid NDH-1 subunit M FIGfam, Mistry J, Coggill P anon FIG006356 (Release 2.0) Family The proton-pumping NADH:ubiquinone oxidoreductase catalyses the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 sub-complexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit [1]. The cyanobacterial NDH-1 complex contains additional subunits, NdhM and NdhN, compared with the minimal set of the bacterial enzyme and these seem to be specific for thylakoid-located NDH-1 of photosynthetic organisms [2]. The three subunits of NDH-1, NdhM, NdhN and NdhO are essential for effecting cyclic electron flow around photosystem I, by supplying extra-ATP for photosynthesis in both plastids and cyanobacteria [3, 4]. 19.80 19.80 20.90 90.80 19.20 18.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.31 0.72 -4.14 21 90 2009-09-11 07:47:26 2008-05-23 11:37:31 4 1 84 0 43 86 121 108.50 58 75.56 CHANGED LKsTTRHVRIFTAcVc.ss-Llsss..spLTLDlDPDNEFlWs-sulpKV.p+FcELV-utuGp-Lo-YNLRRIGSDLEtaIRpLLQsGEluYN.suRVLNYSMGLPRssp .LKSTTRHlRIasAclc.ss-Llsss...spLTLDlDPDNEFlWs--ulpKVYp+FcELV-shsG...t-Lo-YsLR+IGSDLEHaIRpLLQsGElSYNhsuRVlNYSMGLP+lt.t.................... 0 7 27 38 +10497 PF10665 Minor_capsid_1 Phage_Gp9; Minor capsid protein FIGfam, Mistry J, Coggill P anon FIG016324 (Release 2.0) Family This is a putative tail-knob or minor capsid protein from bacteriophages. 25.00 25.00 25.60 25.30 23.50 21.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.40 0.71 -4.24 5 90 2009-12-03 14:34:41 2008-05-23 13:52:04 4 1 89 0 7 71 0 111.50 28 97.94 CHANGED Mlht.PlPhchLIHoloYcEY..hGEDca...GpssYuKPllIE+VRVsPscchssuosucolhaNAVlFlDulNS.sPhhpFpc...pSKIsFcGK-asIpKVIPsYss.ScslHHaELEVl .............................shchLlcslphc..th...t.s.c.sca...spss.YscslpIcpVRhDh.......s.......pshsss......ssu..cptths..ul.....IFl.sshS..ssh.s-hpp.....tu+lh.....as.G.c.-Y.sIscl.ssYts.s.splapaElEV.......... 0 2 6 7 +10498 PF10666 Phage_Gp14 Phage protein Gp14 FIGfam, Mistry J, Coggill P anon (Release 2.0) Family This phage protein family is of unknown function but is expressed from within a cluster of tail- and base plate-producing genes [1]. 21.30 21.30 21.70 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.80 0.71 -4.32 2 24 2009-12-01 14:53:38 2008-05-23 14:29:37 4 1 22 0 1 16 3 127.00 72 98.20 CHANGED MsQNNVINIQLEESYQEFQLGTELFRVGLGDcMRRKWIEADEKYKKKLEKLNKYNIDNTDEMSSE-YFsLEEDVKEALTEAYAlLLDDEcAFsKCYtQCKDILKMYQVYsQVAE.IVGSVEKQQNEIQKKYKAKMTKKAK ....MsQNNVINIQLEESYQEFQLGTELF+VGLGDEMRRKWIEADEKYKKKLEKLNKYNIDNTDEMSSE-YFsLEEDVKEALTEAYAlLLDDEcAFsKCYtQ.CKDILKMYQVYsQVAE.IVGSVEKQQNEIQKKYpAKMTKKAK........................... 0 1 1 1 +10499 PF10667 DUF2486 Protein of unknown function (DUF2486) FIGfam, Mistry J, Coggill P anon FIG008383 (Release 2.0) Family This family is made up of members from various Burkholderia spp. The function is unknown. 21.50 21.50 22.30 21.50 21.00 21.30 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.90 0.70 -3.83 10 64 2009-01-15 18:05:59 2008-05-23 16:06:39 4 2 63 0 15 70 3 213.10 43 94.62 CHANGED MopspssS...IPsLTDVLVPG+......Ps.ARuouuDss.........s+DsAAhPlLss..s.s.t.susut.....ccspsssc.lsscPlPoPclssVthPuc...................sDAPAcPu..uut+VluccAsAhpAPhPssLAsDsstsssuuAs...........hsAu-sA...PcussPuAsssussphutA............sspsAAshTs.DAppIAERLRsRlTsYLTG-GR-sIEARCRDALH-HouWLVGQITREVALALETEVhcWVR-AVcEEIARRsuG ..................................MspspssS...IPsLTDVLVPGp......Ps.ARssuusss............................pAu.P..............sssu...............pp.....sttpscss.sscPsssst..sssst.hPss...................pDAshtPu..st.cssA.tt.uhthPhsssLAs-.....s.s.shsAh.............h.AscsA...Pch...ssPAAhssssPtlsps....................................................Asu.hhss.h-AppIAERL+uRhTpYLTGEGRulIEARCRDALH-HuuWLVGQIsREVALALETEVhsWVp-AVsttLARRpss.......... 0 1 2 7 +10500 PF10668 Phage_terminase Phage terminase small subunit FIGfams, Mistry J, Coggill P anon FIG022212 (Release 2.0) Domain This family of small highly conserved proteins come from a subset of Firmicute species. Its putative function is as a phage terminase small subunit. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.94 0.72 -4.19 6 212 2012-10-04 14:01:12 2008-05-23 17:12:29 4 8 173 0 19 242 9 60.70 37 24.56 CHANGED MARpRsPcRDcAhchahESuGsh.LhDIAscLslssSQIRKWKupDKWs-.......p........hNuslT.......ppK ....................MsRt..RsP.p.....RDpAhcla...hc...ssGphpL+-I..AscLs......Vo.s.....upIR.....+WKopD.K...Wsp..............p.......hpussp............................... 0 5 13 15 +10501 PF10669 Phage_Gp23 Protein gp23 (Bacteriophage A118) FIGfam, Mistry J, Coggill P anon FIG018382 (Release 2.0) Family This is the highly conserved family of the major tail subunit protein. 22.60 22.60 23.70 26.80 22.40 22.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.54 0.71 -4.00 3 16 2009-01-15 18:05:59 2008-05-23 17:22:57 4 2 15 0 2 7 0 120.90 88 55.90 CHANGED MYEGLTKVFDYALAKEMFFAALFVALFIILLIITKRIWDDSKIVRVEMKEERDKMETEREKRDKESKEERDKFISTMNEQQRLMDKQNDMMGQQQQSIDSLSKSVGKLAHKVDLLEHKITK .......MYDGLTKVFDYALAKEMFFAALFVALFIILLIITKRIWDDSKIVRIEMKE.EREKVEEEREKRNKESKEERDKFISTMNEQQRL...MDRQNDMMKQQQQSIDSLSKSVGKLAHKVDLLEHKITK........................... 0 1 1 2 +10502 PF10670 DUF4198 NikM; Domain of unknown function (DUF4198) Coggill P, Bateman A anon Pfam-B_42996 (release 22.0) Family This family was previously missannotated in Pfam as NikM. 34.00 34.00 34.00 34.00 33.80 33.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.44 0.70 -4.34 108 1021 2009-09-14 15:50:14 2008-05-29 08:59:34 4 7 729 0 303 893 241 211.50 19 81.58 CHANGED ApAHthWlhPssshl.ssst.sh.htlthspshsssh...........................shph.......hsP............cGp.sshhp.......shtts.ptt.h..............ptssaplsh....thptsshashhhpst.hhpppt.....................s....sphhhphsKshl.......stsssspshppsh..GhslEllPls+P.ss....lhsGpshphpll.hcG+Phss.scVplhhtstchpsp..................shtlpTDssGhhohshspuGhahhsAhhpsst ..................................................................................................................AtAHthal.ss......p.....h..pttt..........htlha.s.c..shptt.h..tt....................................................shph...hps................sGp....tsh.t......................phtphph.........................ppssatlsh..........thp..s.uhas..h.hhps.h..h.hphpp...........................................t...sphhhphsKshl........p..t...s..s...p..t..h...p..psh....GhslEllPlsc.......P...s......lhsGps.....hphpll.hcG+Pls..s..splplphtsh.h.p.t....................................................shphpTD.s.pGhhshshspsGha.hhts.hpt..h........................... 0 105 215 264 +10503 PF10671 TcpQ Toxin co-regulated pilus biosynthesis protein Q FIGfam, Mistry J, Coggill P anon FIG032035 (Release 2.0) Family The toxin-coregulated pilus (TCP) of Vibrio cholerae and the soluble TcpF protein that is secreted via the TCP biogenesis apparatus are essential for intestinal colonisation in the disease of cholera. TcpQ is part of an outer membrane complex of the TCP biogenesis apparatus, comprised of TcpC and TcpQ, and the TcpQ is required for proper localisation of TcpC to the outer membrane. The domain is found in other Proteobacterial species apart from Vibrio. 21.50 21.50 21.60 21.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.74 0.72 -4.10 60 393 2009-01-15 18:05:59 2008-05-29 13:17:40 4 6 315 2 76 322 9 85.90 27 34.59 CHANGED stsaphp.sspsLcpsLpcWApps........GWplh...Wp.sst..Da.lsushsasG..sFpp.........Alpplhps..hpssshslpsshat..usplltlspht.............p ............s..tWph..sssTL+psLpcWApps...........sWplh...Ws.ssh........-apl-uslsapG..sFcs.........Alpplhph..h..p...sspt.s....lhsph.p......tppllhVss....pp................................. 0 19 40 58 +10504 PF10672 Methyltrans_SAM S-adenosylmethionine-dependent methyltransferase Coggill P anon Meireles D Family Members of this family are S-adenosylmethionine-dependent methyltransferases from gamma-proteobacterial species. The diversity in the roles of methylation is matched by the almost bewildering number of methyltransferase enzymes that catalyse the methylation reaction. Although several classes of methyltransferase enzymes are known, the great majority of methylation reactions are catalysed by the S-adenosylmethionine-dependent methyltransferases. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.82 0.70 -5.56 3 4405 2012-10-10 17:06:42 2008-05-29 15:15:01 4 12 3015 23 953 3434 475 251.00 27 55.36 CHANGED RRLFHGRGRpWPGLEQITCDWLQGQLLVNLFKEVDDAFLuuLKcGLsALsuuslWAoKQGRolVLQHRYADGAPSEVLlGELs-oPVVVEsGLKYQLDIGRNQNFGLFLDMRhGRcWVQENAKuKNVLNLFAYTCGFSVAAIAGGAcQVVNVDMARGSLSKGRDNHRLNGHDlspVSFLGHDIFKSWGKIKKuGPYDLVIIDPPSFQKGSFALTKDYKKILRRLPELLsEGGpVlACVNSPAVoPDFLIEoMAEEAPsLcFlERLDNPPEFsDVDs-AuLKVLLFR .........................................................................................................h.htt.............................................................................................................................................................................................................................t....t....h..h...h..G....c...h..s..p..h..h.....l....p....E...p..G....h....+...h.h.Vs...l.p..c..s..h.cTGlFLD.pR....t.s....R......p........h.......l.......t........p.....h............u.........c.......u........K...............c.......VLNh.F.oYTGu...FoV..t..A.u...h..G........G..A...p.p.ss.oVDh.SppuLch.u.c.....c.N.h.p.LN........u.......l.......s.....h......p......p...t..............c...............h....l...p...s.......D.....s.......F.................c...h....h.........p....p......h...........c....c........p.........t...............p...a.DlIllDPP...........s..F......s......c......s......p......h....s.....l.....p...+...s...Y....t...c...l........t...t....s....h....pl....L...p..s....s.G.h...l.h......h..s......ss......s.........h......................................................................................................................................tttp.hh.t.............................................................................................................. 0 328 573 786 +10505 PF10673 DUF2487 Protein of unknown function (DUF2487) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 26.50 26.30 22.60 21.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.76 0.71 -4.39 15 163 2009-01-15 18:05:59 2008-05-30 11:09:48 4 1 162 0 32 124 0 137.50 48 89.88 CHANGED l-pYLpu+-YIDTAlIPLlslsh..spchKphsppGEFstlLupElERQhKGRlhLhPuFTYlsssppptth..cLpcWpsclpppuFcHVhalTuDpsWK...ttpshpsp.llW.lPulPLEp.....................hscshK+cllc-plpQllshLhp+W ......lEpaEQAR-YVDTulIPLlSISs..ucchKpsVEQGEFlcLLShELERpaKGRVlLLPAFTYLs-.sQ+scps..RLp-WoscLpppGFKHIsYVTSD..hu...WK..pthp-lpGc.LhW.hPolsLEp.....................hsDptKREll+s+l+plhshL.pKW.................................... 0 11 23 26 +10506 PF10674 Ycf54 DUF2488; Protein of unknown function (DUF2488) Mistry J anon PRODOM Family This protein is conserved in the green lineage and located in the chloroplast. 25.00 25.00 32.80 32.20 20.20 19.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -10.13 0.72 -3.86 24 115 2009-01-15 18:05:59 2008-05-30 11:33:57 4 2 101 7 50 116 96 90.40 50 67.26 CHANGED sTYaalhASc+FLhpE...EPLEEVL+ERpRaYtEpsKcIDFWLVppPAF..LpuPEhupl+schPpPuAAllSTsspFIsaLKLRLEaVhpGpFEAP .......pTYaallASp+FLl-E...EPh-ElL+ERhRpYtEpsKElDFWLVhpPuF..L.-ssphsclpt+l......sp......PusAlVSTstpFI.T.alKLRL-aVlpGpFEA....... 0 14 36 46 +10507 PF10675 DUF2489 Protein of unknown function (DUF2489) Mistry J anon PRODOM Domain This is a bacterial family of uncharacterised proteins. 19.80 19.80 20.70 20.80 19.40 19.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.40 0.71 -4.47 51 301 2009-01-15 18:05:59 2008-06-02 09:38:11 4 1 297 0 73 216 44 130.00 39 84.44 CHANGED IluLusYAshLLhpL+cQpt.p........pppptstppRptplh-SIclIApA.hhpcpC-lSEGslRltsLh-hlshs.p...tspYsuhhplYchlcchPpt-sR+pLsKpcRh+hDhpR.phEschcctIhp-sppL .............IluLuuYAsaLLhpL++Qpt.p........pptphAhppRpspIh-SlpllspA.hl.psQC-LSEusIRlhsLh-hlttptp........tppYPAhhcLYclV+cMPpt-s.RppLsKpERM+.-Lp.RppAEucLpcsIhtElptL............. 1 15 35 59 +10508 PF10676 gerPA Spore germination protein gerPA/gerPF Mistry J anon PRODOM Family This is a bacterial family of proteins that are required for the formation of functionally normal spores. Proteins in this family may be involved in establishing normal coat structure and/or permeability which could control the access of germinants to their receptor. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.37 0.72 -4.13 32 769 2009-01-15 18:05:59 2008-06-02 11:22:48 4 1 146 0 83 320 2 70.30 37 91.63 CHANGED MPuhl...uslpIpslsusGslslGDshtISPpussKohuGuGuhNsGDhl....h.NhhshTsshDsDlsDQs.htNs ......MPuhV...sslhIpN..ssGshslGDsasloPhssoKuasGuGusNsG.hls...shstlSsTsshDsDlsDQs.hhs.s.................... 0 17 45 53 +10509 PF10677 DUF2490 Protein of unknown function (DUF2490) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. They appear to belong to the outer membrane beta barrel superfamily. 29.80 29.80 29.80 33.60 29.70 29.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.78 0.71 -11.19 0.71 -5.03 62 262 2012-10-03 17:14:37 2008-06-02 12:42:47 4 1 180 0 128 283 185 196.50 20 83.77 CHANGED chssWhphthptpl...s......pchthph-hphRhtcs.............hs.chpphhlRsulsYpl.ssphplsh...GYsahhsps.tt.............thsEpRhapphhhphs..htphplspRhRhEpRahpt..........................ssphphRhR.plphphPlspptht......hhhhsElFhsl........stpsaspsRhhs.Glsaplspp..hplphGYhtp.......ttsspspchlth .............................................................thWhpht.hptcls.........pchphth-hphR...htss.............hs.phpphhl+sulsYpl.ssphplth.......GY...sahhsp.hts.....................ptEpRhatphp.hphp..hs...ph.....pls..pRhRhEpRahtt..........................................ssch.phRhR.tlphshsl.spphht.......hhh.sElFhsh........t.hsppthcpsRhhs.Glsaplspp...hpl-lGYhtQh.pt...ttss...p....h......................... 0 66 112 122 +10510 PF10678 DUF2492 Protein of unknown function (DUF2492) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. 19.70 19.70 20.90 20.90 18.50 16.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.50 0.72 -3.90 22 608 2009-01-15 18:05:59 2008-06-02 12:58:25 4 2 593 0 55 219 6 75.70 69 97.41 CHANGED hsSlHGHpVhplMltpspsho+tpLpphltpcFGppARFHTCSApshsA-pLlpFLhpKGKhl.scpGhsssts+lCp .....MDSIHGHEVLNMMIESGEQY.THuSLEAAIK.....A...RFGEpARF.HTCSAEsMTAuELVAFLAAKGKFI.s.s.E.-.GFSTcpSKICR............... 0 8 20 36 +10511 PF10679 DUF2491 Protein of unknown function (DUF2491) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 33.50 33.40 20.80 18.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.16 0.70 -11.18 0.70 -5.33 16 485 2009-01-15 18:05:59 2008-06-02 13:05:10 4 2 472 0 38 197 8 212.60 70 97.47 CHANGED M.......FpchFG.+csps..ssPps.........P..hGLtlGtulplDsLthc..LLsschtltls.sssphItAhGcVcLspuspLhRaYsDD-sal.QVlssGs.stsclcDlpLahaa-otsluucu-apchlts..plutspYch-Gh.papRhWsssts..pscsVshsEclhptsss...spclpQhsMLYtRplst.spcEhLLlssEEc.....pps-hslshulGlsLpssDlpl .........................h....FQRLFG..KcsKP..AlsRG................P........LGLHLNuGFTLDTLAFR..LLE-.pLLlALP..GE-..a.....TVAAVS+IDLG..GGSQIFRYYTS........G..DEFL.QINTTGGpDlDDIDDIKLFVYEESaGIocEsHWR-AIs.s......ps.....hGAM.TLN..W.....Q......E.K...RWQRFFNSEEP.GNIEPVYMLEKVENQscA...KW-VHNFTMGYQRQVT-....D....saEYLLLNGEESFN-.hGEPEWlFSRALGVDIPLTSLcI......... 0 9 17 24 +10512 PF10680 RRN9 RNA polymerase I specific transcription initiation factor Wood V, Coggill P anon Pfam-B_44021 (release 22.0) Domain Initiation of transcription of ribosomal DNA (rDNA) in yeast involves an interaction of upstream activation factor (UAF) with the upstream element of the promoter, to form a stable UAF-template complex. UAF, together with the TATA-binding transcription initiation factor protein TBP, then recruits an essential core factor to the promoter, to form a stable preinitiation complex [1]. This Rrn9 domain, which seems to be constrained to fungi, is the two highly conserved regions of proteins which form one of the subunits of UAF and appears to be the region responsible for the interaction with TBP. The family includes the S.pombe Arc1 protein, Swiss:Q10204, which is found to be essential for the accumulation of condensin at kinetochores [2]. 25.00 25.00 31.70 29.40 23.30 22.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -10.06 0.72 -4.24 22 106 2009-01-15 18:05:59 2008-06-04 12:54:52 4 4 105 0 80 106 0 72.80 33 14.00 CHANGED L-sLc...tcDLulHLYsuahL+ph...A...ttp.hp.s.........................................sthhPp+pWouWPh.sspVP.sspphh....D ......L-sLc...ppDLulHLYsAahLK+t................ttp..p.s.................................................sthhPp+pWTAWPhs.sscVPtsspph.p............................ 0 11 36 65 +10513 PF10681 Rot1 Chaperone for protein-folding within the ER, fungal Wood V, Coggill P anon Pfam-B_27706 (release 22.0) Family This conserved fungal family is an essential molecular chaperone in the endoplasmic reticulum. Molecular chaperones transiently interact with unfolded proteins to inhibit their self-aggregation and to support their folding and/or assembly. Rot1 is a general chaperone with some substrate specificity, its substrates being the structurally unrelated Kre5 Kre6 Big1 Atg22, which are type I, type II, and polytopic membrane proteins. The dependencies of each for Rot1 do not share similarities. However, their folding does require BiP, and one of these proteins was simultaneously associated with both Rot1 and BiP. In addition, Rot1 may cooperate with BiP/Kar2 in the folding of Kre6 [1]. 23.20 23.20 23.20 36.10 22.90 22.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.53 0.70 -4.99 28 127 2009-01-15 18:05:59 2008-06-04 13:13:12 4 3 108 0 89 128 0 202.10 44 86.02 CHANGED LhGTWSoKSspVhTGPGFYDPls-hLlEPsLsGISYSFTcDGaaEEAhYRssuNPpsPsCPpuhlhaQHGoYplp.sNGoLhLsPhtVDGRQLlSDPCsss.....hus...YoRYsQsEpFppapV.lDs.Ya.Ghh+LpLapF.DGoPhpPhYLsY+PPhMLPTpTL..NPostst........................tpcthR...........phhcpshps.tpp......shhpp...thh..sthaWhulhhhuhGuhshh ..........LhGTWooKSppVhTGP...G.........FYDPlc-hhlEPshsGISYSF....TcDG.aaEEAhYpshuNPpsPsCspuhhhaQHGoYplp.sNGoLhLsP..htsDGRQLlSDPCssp.........tup........YoRYsQs.EhF.........ppap.Vhl......Ds....Y.......H...uhhpLpLap.a..DGo.PhpPhYLsY+PP.MLPTpoLNPsspst........................ttp..t.hR.................phhppp.h.t...hpp......shhtp....t..p.shhWahuhhhhuhGuhhhh................................... 0 27 48 76 +10514 PF10682 UL40 Glycoprotein of human cytomegalovirus HHV-5 Mistry J, Coggill P anon PRODOM_PD113623 Family This is glycoprotein UL40 from human cytomegalovirus or herpesvirus 5. The signal sequence of the UL40 polypeptide contains an HLA-E ligand identical with HLA-Cw*0304. The first 37 residues of UL40, including this ligand, are predicted to encode a signal peptide. The virus thus prevents the lysis by NK (natural killer) cells of the cell it has invaded [1-2]. 25.00 25.00 38.00 27.60 20.60 19.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.75 0.70 -4.83 2 171 2009-01-15 18:05:59 2008-06-04 14:21:54 4 1 6 3 0 168 0 212.50 94 96.35 CHANGED hRhtFhhssMAP+TLll...llhhtl.u.hs..ApTssTTsGAhhupsP+.....C.hVFpGWVYAhYHpGsMsLMTlDV.CCR.pssNo........tcspLLI-VGNpTRpt....oCpsH..u.Q.tDC.sphVHVpGlspStFhLopLpSCCLNp.SpLSE+VAYHL+hRPAsFGLETWAMYTlGlLuLGSFSSFYsQlh+sL....psaHYAhKt ......................................TRIGFTCAVMAPRTLILTlGLLCMRIRSLLCSPAETTVTTAGshSAHGPh.....CPLVFQGWAYAVYHQGDMALMTLDVYCCRQTSSNTVVAFSHHPADNTLLIEVGNNTRRHVDGISCQDHFRA.QHQDCPAQTVHVRGVNESAFGLTHLQSCCLNEHSQLSERVAYHLKLRPA.TFGLETWAMYTVGILALGSFSSFYSQIARSLGVLPNDHHYALKK........ 0 0 0 0 +10515 PF10683 DBD_Tnp_Hermes Hermes_DBD; Hermes transposase DNA-binding domain Bateman A anon PDB:2bw3 Domain This domain confers specific DNA-binding on Hermes transposase [1]. 20.50 20.50 20.70 23.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.29 0.72 -4.51 4 22 2009-01-15 18:05:59 2008-06-04 16:21:41 4 5 13 2 13 19 0 61.90 36 13.87 CHANGED pspEL+hVStpDKcEAIEKCTQWVVcDCpPFSAVoGuGFhchVKFFlKIGA.YGEpVDV-DLLPsPsT ............................pKpchhc+ssp....aslpDhRPFshVpGpGFhcLsphhlplGApYGppVsl-slLPpP.T..... 0 1 7 11 +10516 PF10684 BDM Putative biofilm-dependent modulation protein Mistry J, Coggill P anon PRODOM_PD064586 Family This is a family of tightly conserved proteins from Enterobacteriaceae which are annotated as being biofilm-dependent modulation protein homologues. 27.40 27.40 27.40 90.60 27.00 27.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.12 0.72 -3.76 2 398 2009-09-11 08:44:46 2008-06-04 16:56:56 4 1 396 0 9 28 0 71.60 95 100.00 CHANGED MCFINLRSVADTINTQTRRITMFTYYQAENSTAEPALVNAIEQGLRAEHGVVTEDDILMELTKWVEASDNDILSDIYQQTINYVVSGQHPTL .....................MFTYYQAENSTAEPALVNAIEQGLRAEHGVVTEDDILMELTKWVEASDNDILSDIYQQTINYVVSGQHPTL.. 0 2 2 5 +10517 PF10685 KGG Stress-induced bacterial acidophilic repeat motif Mistry J, Coggill P anon PRODOM_PD027049 Family This repeat is found in proteins which are expressed under conditions of stress in bacteria. The repeat contains a highly conserved, characteristic sequence motif,KGG, that is also recognised by plants and lower eukaryotes and repeated in their LEA (late embryogenesis abundant) family of proteins, thereby rendering those proteins bacteriostatic. An example of such an LEA family is LEA_5, Pfam:PF00477. Further downstream from this motif is a Walker A, nucleotide binding, motif GXXXXGK(S,T), that in YciG of E coli, eg Swiss:Q8X7B4, is QSGGNKSGKS [URL]. YciG is expressed as part of a three-gene operon, yciGFE, and this operon is induced by stress and is regulated by RpoS, which controls the general stress-response in E coli. YciG was shown to be important for stationary-phase resistance to thermal stress and in particular to acid stress. 20.40 20.40 20.50 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.29 0.72 -6.39 0.72 -3.93 45 2185 2012-10-01 20:07:30 2008-06-05 11:36:42 4 12 679 0 402 1036 15 21.90 59 59.07 CHANGED sFAs.Dt-+tpEhupKGGcuSsu ......NFtp.D.c+A...SEAG+KGGQpSsG..... 0 82 192 315 +10518 PF10686 DUF2493 Protein of unknown function (DUF2493) Mistry J, Coggill P anon PRODOM_PD031789 Family Members of this family are all Proteobacteria. The function is not known. 24.50 24.50 24.60 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.43 0.72 -4.43 34 308 2009-01-15 18:05:59 2008-06-05 14:06:06 4 3 207 0 113 326 118 70.40 36 28.33 CHANGED PpGs+lAFoGGtDap...Daph........IWssLDcl+A+h.....PDMl..LlHGGss+GAEpIAApWAcsR..sVsQlsF+PDWp+HG .............................Gs+lhloGGt-.as...Dpch....................last.L.D.p....l+s+t.....P-.hl...LlHG.....Gu..+GA-tIAupWA...cpR.....t..Vsp..l..s...FpsDWpc+............ 0 22 74 95 +10520 PF10688 Imp-YgjV Bacterial inner membrane protein Mistry J, Coggill P anon PRODOM Family This is a family of inner membrane proteins. Many of the members are YgjV protein. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.74 0.71 -4.81 34 708 2009-09-11 16:42:10 2008-06-05 17:27:52 4 3 624 0 127 358 26 145.00 42 89.78 CHANGED thhhuQhlGhlAhslslhuF.pKpcc+lhhhlhstsllhulHFhLLGuhsAAshhhluulRhhhulhspS.........phlhhhFlslsllh...shhshpshhsllslhGolluThAhFphcGlp.hRhhhlluossWlhpNlllGShGGsLhEshhlssNhlslYR...happptps ....................ahlAQulGsl.AFhlGIosFas+--+Rh+hpLslausllulHFhLLGshsAuhoslLsulRoh.l.o....l+Tc.S.........hhVMs.lFIlL...ohsh..............Gl.sph....p.c.l.ELLPllGTlluTaALFpscGls.hRs.V.MhhuTsCWVIHNhahGSIGGohlEuoFllhNulsIlRaaRhpppu................. 0 23 55 90 +10521 PF10689 DUF2496 Protein of unknown function (DUF2496) Mistry J, Coggill P anon PRODOM_PD581819 Family This family consists of proteins from Gammaproteobacteria spp. Many members are annotated as being like the E coli protein YbaM. 25.00 25.00 40.00 39.90 18.50 15.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.73 0.72 -4.37 25 670 2009-01-15 18:05:59 2008-06-06 09:53:35 4 1 667 0 66 195 1 43.90 73 79.54 CHANGED sL-sAP-ElKLAVDLIhLLEsN-I-PpsALuAL-IVppDappKL ..SLENAPD-VKLAVDLIVLLEENpIPAcTVLRAL-IVKRDYEpKL. 0 4 17 43 +10522 PF10690 Myticin-prepro Myticin pre-proprotein from the mussel Mistry J, Coggill P anon PRODOM_PD189357 Family Myticin is a cysteine-rich peptide produced in three isoforms, A, B and C, by Mytilus galloprovincialis, the Mediterranean mussel. Some isoforms show antibacterial activity against gram-positive bacteria, while others are additionally active against the fungus Fusarium oxysporum and a gram-negative bacterium, Escherichia coli D31. Myticin-prepro is the precursor peptide. The mature molecule, named myticin, consists of 40 residues, with four intramolecular disulfide bridges and a cysteine array in the primary structure different from that of previously characterised cysteine-rich antimicrobial peptides. The first 20 amino acids are a putative signal peptide, and the antimicrobial peptide sequence is a 36-residue C-terminal extension. Such a structure suggests that myticins are synthesised as prepro-proteins that are then processed by various proteolytic events before storage in the haemocytes as the active peptide. Myticin precursors are expressed mainly in the haemocytes. The family Mytilin has been merged into this family. 28.50 28.50 28.60 39.60 25.70 28.40 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.82 0.72 -3.96 5 179 2009-01-15 18:05:59 2008-06-06 09:56:07 4 1 5 1 0 179 0 98.50 78 99.26 CHANGED MKATILLAVlVAVlVAVpEAcuhuCTSYaCuKFCGoAuCoaYlChlLHsGKhCtCLHCSRs+.PhthotcA+shNEt...hDhoPpMN-MENLDpGMDM...........l MKATILLAVVVAVIVGVQEAQSlsCTSYYCSKFCGSAGCSLYGCYhLHPGKICYCLHCpRAESPLALSGSARNVN-pNpEMDNSPlMNEhENLDQEMDMF.. 0 0 0 0 +10523 PF10691 DUF2497 Protein of unknown function (DUF2497) Mistry J, Coggill P anon PRODOM_PD475087 Family Members of this family belong to the Alphaproteobacteria. The function of the family is not known. 20.90 20.90 21.80 29.30 20.00 20.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.48 0.72 -4.01 43 281 2009-01-15 18:05:59 2008-06-06 10:13:57 4 1 277 0 96 223 45 73.60 40 34.00 CHANGED pcsLlStssspssssshpsLupslp............ssp..tpTlE-lVc-hLRPMLp-WLDpNLPslVEclVcpEIcRls+p .........................................................................................................................sllSptotpplusuFpsLscslp.............ss..tppolE-lstEhLRPhLpsWLDcNLPsLVE+lV+cEI-Rls+.s... 0 29 54 67 +10524 PF10692 DUF2498 Protein of unknown function (DUF2498) Mistry J, Coggill P anon PRODOM_PD060534 Family Members of this family are Gammaproteobacteria. Many are annotated as like E coli protein YciN. The function is not known. 25.00 25.00 26.00 26.00 17.90 16.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.58 0.72 -4.37 12 633 2009-01-15 18:05:59 2008-06-06 10:33:02 4 1 631 2 51 152 1 81.20 75 98.44 CHANGED pspppPIscpsLLh.ANplI+-HEDYlpGMcATsVEQKssVLVF+GEaFLDEpGLPTsKTTAVFNMFKaLAHhLStKYpLhc ....p.KETQPIDRETLLtEANKIIREHEDTLAGIcATGVTQRNGVLVFoGDYFLDEQGLPTsKSTAVFNMFKHLAHVLSEKYHLVD................. 0 4 14 32 +10525 PF10693 DUF2499 Protein of unknown function (DUF2499) Mistry J, Coggill P anon PRODOM_PD077121 Family Members of this family are found in plants, lower eukaryotes, and bacteria and the chloroplast where it is annotated as Ycf49 or Ycf49-like. The function is not known though several members are annotated as putative membrane proteins. 22.10 22.10 24.50 23.70 20.90 20.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.36 0.72 -3.94 39 145 2009-01-15 18:05:59 2008-06-06 11:16:05 4 4 121 0 85 138 93 85.40 48 53.36 CHANGED sLSlsTWhIHluSllEWhlAIhllhcau..phptppshphLulAMlPsLlSAhsAhTWHhFDNs.sLthLVsLQAhhTllGN..hsLshAAapl ..........ALSlPTWhIHlSSVlEWlhAhhLlapYu..phsspptW+tLuhuMlPtLsuAhCACTWHhFsNs..uLphLVsLQAshTllGN..hTLshAAah.......... 0 27 54 76 +10526 PF10694 DUF2500 Protein of unknown function (DUF2500) Mistry J, Coggill P anon PRODOM_PD076478 Family The members of this family are largely confined to the Gammaproteobacteria. The function is not known. 22.00 22.00 22.00 22.00 21.40 21.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.27 0.72 -4.06 28 788 2009-09-11 12:25:21 2008-06-06 11:50:33 4 3 759 21 73 338 5 111.30 46 90.70 CHANGED PlhhhllhhlllshhsFhah......phhpcatpspsAPhhslpspllsK+ppshscspspp............psp....cYalpFchpsGs...................+hEFpV.usc.YptLs.GDpGpLTaQGs+FlpFsh .............................................PLFFIllluLI.lVAAo.F+Fh......QQRRE+AsN-hAPlppc.VsVosKRE+slsDRRSRQpEV.ssA......uooh....RYEsoF+Pp.sGG...................EpsFRL..suppYHALssGD+GTLoY+GTRFluF..s......................... 0 17 33 49 +10528 PF10696 DUF2501 Protein of unknown function (DUF2501) Mistry J, Coggill P anon PRODOM_PD096667 Family Members of this family are all Proteobacteria. Several are annotated as being YjjA or YjjA-like, but this protein is uncharacterised. 19.80 19.80 19.80 23.00 19.30 19.60 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.54 0.72 -4.00 19 613 2009-01-15 18:05:59 2008-06-06 13:05:51 4 1 595 0 55 226 8 79.70 64 49.43 CHANGED KNNhLu.u.ssAssVKspLhuKLGhsstp.tspDssYtsGlpGlLssusGpplsLs..Gs.sLppplKpKACDhVLpQGpu..Lh .............KQKLAS...s..TssENIKNQlLpKLGLsopE.QccDTNYL-GlQGLLKTKDGQQLNLsN.....IGoTPLAEKVKTKACDLVLKQGLN.h.h.... 1 4 20 38 +10529 PF10697 DUF2502 Protein of unknown function (DUF2502) Mistry J, Coggill P anon PRODOM_PD029719 Family Members of this family are all Gammaproteobacteria. The function is not known. 20.60 20.60 29.10 23.20 18.00 18.20 hmmbuild --amino -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.13 0.72 -3.88 13 798 2009-01-15 18:05:59 2008-06-06 13:44:39 4 2 515 0 54 211 7 83.10 63 77.09 CHANGED pIsL.l.......PuVpLQIGDRDpRGpYWDGhcWRDcsWW+pH............YpWcGsRWpc+ssttcpta.+cp...................chDc+p........s+Gss++H ............EITL.LPSIKLQIGDRDchGNYWDGGHWRD+caW+p+....................YEWRtNRWa+H-s..s..h.+.+.s.a.cK+pt...............thccRDDHR...G+GtG+tH.............................................................. 0 3 13 30 +10530 PF10698 DUF2505 Protein of unknown function (DUF2505) Mistry J, Coggill P anon PRODOM_PD099734 Family Members of this family are all Actinobacteria. The function is not known. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.53 0.71 -4.73 34 427 2012-10-02 19:24:03 2008-06-06 16:13:37 4 2 348 0 112 314 13 158.00 27 92.63 CHANGED h-hsspas.ssl-pVapshsccsYaps+hpphus...tsplsphsssusG.....hplshpp..sl.sc.pLPuhlpphhssclplpcpEpW.ssht..supspuphpsslsGsPsslsGshtLps......susuophplsuslcVcVPLlGGKlEphluspltchlssEpchsppWl ...................................................................p.sspas.sss-pVhthhsctsa...Wpshh.p.phus....splp..s.hss...s...s..cu.......lplshtp....hlssp.....L..Puhl..pp.hls.u...s.Lplc..pspoW..ss..hs..sus...tpuo...ls...ss..lt.....GsPsphsGptslps................susG.oclphsuslpVpl......P.....llG...uKlEphhusplsphhshEpchsspWl............................... 0 35 81 103 +10531 PF10699 HAP2-GCS1 Male gamete fusion factor Bateman A, Coggill P anon Billker O Domain The gene encoding Arabidopsis HAP2 is allelic with GCS1 (Generative cell-specific protein 1). HAP2 is expressed only in the haploid sperm and is required for efficient guidance of the pollen tube to the ovules. In Arabidopsis the protein is a predicted membrane protein with an N-terminal secretion signal, a single transmembrane domain and a C-terminal histidine-rich domain [1]. HAP2-GCS1 is found from plants to lower eukaryotes and is necessary for the fusion of the gametes in fertilisation. It is involved in a novel mechanism for gamete fusion where a first species-specific protein binds male and female gamete membranes together after which a second, broadly conserved protein, either directly or indirectly, causes fusion of the two membranes together. The broadly conserved protein is represented by this HAP2-GCS1 domain, conserved from plants to lower eukaryotes [2]. In Plasmodium berghei the protein is expressed only in male gametocytes and gametes, having a male-specific function during the interaction with female gametes, and being indispensable for parasite fertilisation. The gene in plants and eukaryotes might well have originated from acquisition of plastids from red algae [3]. 19.70 19.70 20.20 26.60 19.40 19.00 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.41 0.72 -4.00 22 100 2009-01-15 18:05:59 2008-06-07 15:35:39 4 1 68 0 69 107 1 48.30 38 6.85 CHANGED hllspshhshsGt...pCsKIGVShpsapsp.s...sh.Cst..GoCLpsQLtcaap ..........hlLs+shhslsGh...-CsKIGVSapuFpsQ.s...sh.Cst.huoCLpsQLtcah...... 1 30 46 62 +10534 PF10702 DUF2507 Protein of unknown function (DUF2507) Mistry J, Coggill P anon PRODOM_PD089657 Family This family is conserved in Firmicutes. The function is not known. 30.00 30.00 30.80 30.10 29.60 29.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.66 0.71 -4.48 22 327 2009-01-15 18:05:59 2008-06-09 15:33:57 4 2 325 2 57 203 0 123.60 42 79.59 CHANGED ssFuhpLlR-hlLPslLGp-psslLYWAGKcLAR+aPlpohE-l.tFFppAuaGsLplhKpK+pphhFpLoGshlspRlpp..ppssFpLEAGFlAEplppppthssEuh..thp++..pppVhlpVph ...............shFuhpLlR-hLLP-lLGsDtssILYWAGKcLARKaPLpohE-lhpFFcpAuaGsLollccK+pchpapLpGsllspRhcp..p.ccssFpLEAGFIAEplQpQpshssEuh.tphc++..p.c.pVphhVp...................... 0 16 34 44 +10535 PF10703 MoaF Molybdenum cofactor biosynthesis protein F Mistry J, Coggill P anon PRODOM_PD122919 Family MoaF protein is essential for the production of the monoamine-inducible 30kDa protein in Klebsiella [1]. It is necessary for reconstituting organoautotrophic growth in Ralstonia eutropha [2]. It is conserved in Proteobacteria and some lower eukaryotes. The operon regulating the Moa genes is responsible for molybdenum cofactor biosynthesis. 22.40 22.40 22.70 22.60 19.20 22.30 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.64 0.70 -5.32 12 151 2009-01-15 18:05:59 2008-06-09 16:14:14 4 2 115 0 36 122 6 252.70 41 92.97 CHANGED sssstaIsVGALA-GFA.csplLsssssLuG+shsLphssGtshtpthssp........psLpWpthttt.................GpusYRAoplRsulYaVDalc.tpspt.SVSLVlDhpptphouVhGpLPsc..Atsp.sshsRshpsh.LTuVcspFhaGslss.htsus..huPTc-LIGhRshYpYSPoEsYEHIYLNssaYsWQCLpGsE+GLADsDRC+haKlA-pLYLFVWREKllPTLGVllIDLp...thRoDGKIFGYpsuDFushsNFslGAaupVLNpTpH ....................................................s.pssalpVGALA-GF..-sphLsssssLsG+slslt.....hssG..thh.ph.t.F..ss........psLpWcttpss.................GpssY+AoplRsslaFVDal.........c.sptss.olSLVlDhsptshouVhGplPsc..st.chsuhsRVtpsh.lTuVpstFhaGslss.h.tsus...u.Tc-LIGpRshYpYS..PoEsYEHIYLNssaYsWpCLsGsE+GLAD.V.......DRC+haKlA-sLYLFsWREKl.l.PTLGVllIDLp...ttRocGKlhGYpsuDhusluNFPlGAauplLNpTha....................... 0 3 10 23 +10536 PF10704 DUF2508 Protein of unknown function (DUF2508) Mistry J, Coggill P anon PRODOM_PD057080 Family This family is conserved in Firmicutes. Several members are annotated as being the protein YaaL. The function is not known. 20.40 20.40 20.50 20.90 20.20 20.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.30 0.72 -3.80 23 434 2009-01-15 18:05:59 2008-06-09 16:57:29 4 1 431 0 88 236 2 72.40 30 89.37 CHANGED M......Fh++K....scl++....-hD-cLlphlpcs+ccappp+pl.ppoh-..ss.p-lhhptKlscAKYhFLh+EA+pRplphp .............................................hhp+p.....cl+c.........p.hD.....p.c..Llphlccs+pchppt.+phhcps..h-....s..pps...lhph+lAcAKYhaLh+EA+pRtl+h.p...... 0 36 65 75 +10537 PF10705 Ycf15 Chloroplast protein precursor Ycf15 putative Mistry J, Coggill P anon PRODOM_PD014917 Family In some species of plants the ycf15 gene is probably not a protein-coding gene because the protein in these species has premature stop codons. Most of the members of the family are hypothetical or uncharacterised [1]. 25.00 25.00 25.60 39.40 21.10 18.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.25 0.72 -3.93 3 64 2009-01-15 18:05:59 2008-06-09 17:56:30 4 1 62 0 4 51 0 72.20 68 98.05 CHANGED ETLVSSIFWTLAPWNNMLLLKHGRIEILDQNTMYGWYELPKQEFLNSEQPEPITHYIKKFPLMKcIGPpcpp+..htphSchlllotssoNHahN .....................M..LLL..KHGRIEILDQNTMY..GWYELPKQEFLNSEQP..IhTT.KKa.lhhclsP.cppK..h...................t............ 0 1 3 3 +10538 PF10706 Aminoglyc_resit Aminoglycoside-2''-adenylyltransferase Mistry J, Coggill P anon PRODOM_PD012767 Family This family is conserved in Bacteria. It confers resistance to kanamycin, gentamicin, and tobramycin [1]. The protein is also produced by plasmids in various bacterial species and confers resistance to essentially all clinically available aminoglycosides except streptomycin, and it eliminates the synergism between aminoglycosides and cell-wall active agents [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.02 0.71 -4.65 3 186 2012-10-02 22:47:23 2008-06-09 17:57:47 4 1 128 4 26 138 5 140.20 38 79.89 CHANGED MNcLHIsLIHpIFAAADclNLPLWIGGGWAIDARLGRITREHDDIDLTFPGDR+AEFEsLlcthGGpITEQTDYGFLAplQGlLLDCEPAaasD-AYEIE-sPPGSCPhssEGVIuG+PVRCNSWEAILWDYFYYtDEVPpu-WPsKHlaSasLAC-SLG-ssVcsLRcQF+oR ............................llshh-ctslshWlsGGWulDAhLG+.TRcHcDIDlsFsuc+ps.cl..sllc.h.G....h..+.l......c...h..s...hh..ltc.t...th..lDhcPh.hss-.u...htts.st.u.u...p...........t.....p....s.hts+.l.C.s............................................................................................................................................................ 0 7 19 24 +10539 PF10707 YrbL-PhoP_reg PhoP regulatory network protein YrbL Mistry J, Coggill P anon PRODOM_PD110862 Family This is a family of proteins that are activated by PhoP. PhoP protein controls the expression of a large number of genes that mediate adaptation to low Mg2+ environments and/or virulence in several bacterial species. YbrL is proposed to be acting in a loop activity with PhoP and PrmA analogous to the multicomponent loop in Salmonella where the PhoP-dependent PmrD protein activates the regulatory protein PmrA, and the activated PmrA then represses transcription from the PmrD promoter which harbours binding sites for both the PhoP and PmrA proteins. Expression of YrbL is induced in low Mg2+ in a PhoP-dependent fashion and repressed by Fe3+ in a PmrA-dependent manner [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -10.95 0.71 -4.98 22 504 2012-10-02 22:05:25 2008-06-10 11:39:02 4 2 480 0 35 279 172 183.00 67 87.86 CHANGED lpLpppp.lupGspRhsYtHPtcssphlKVhpspt...................tsth+phtpElptYhplph.......+pth.hshls+haGhlpTshGhGhlh-hltDhsGshu.TLpphh...cpsthss.thtptLcphhphlh-scIlsp.-lpspNIVhtcpspup.....phhllDGhGstph...lPltshuphhs++plp+phc+hhpcht .............................................................I+LSEQoPLGTGRHRKCYAH...PED...A...p...RC..IKIVY+Ru...........................-.G.G.DKE.IRRE.LKYY.AHLu........RRLcDWSuIPRYHG..TVETDCGT..GYVYDlIs....DF..D.G....K.PS.ITLT.EFAt....QCRY.E..E.D...lA.....p....LRQ...LL....K....pL...KRYL....pDN+IVTM.S.LKPQN..ILC+RISESE....VlPVVCD.N...IGESTL....IPLATWS.KWCChRKpERlW+RFIAQP.A................................................ 0 11 18 27 +10540 PF10708 DUF2510 Protein of unknown function (DUF2510) Mistry J, Coggill P anon PRODOM_PD056443 Family This is family of proteins conserved in Actinobacteria. Many members are annotated as putative membrane proteins but this could not be confirmed. 20.90 20.90 20.90 20.90 20.30 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.26 0.72 -4.49 31 349 2009-01-15 18:05:59 2008-06-10 13:08:27 4 19 198 0 127 284 30 37.80 43 13.84 CHANGED sGWYPDPus....sp........phRaWDGppWTsph...pPh...Pussstss ..sGWYPDPsG....ss........thRaWDGspWTcps.............pPs.....st......t....................................... 1 48 96 123 +10541 PF10709 DUF2511 Protein of unknown function (DUF2511) Mistry J, Coggill P anon PRODOM_PD064657 Family This family is conserved in bacteria. The function is not known. 20.40 20.40 21.10 20.80 19.70 17.00 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.07 0.72 -3.64 14 641 2009-01-15 18:05:59 2008-06-10 13:21:12 4 2 562 0 53 212 2 86.40 68 76.30 CHANGED slo+hcaGc.cWsFopEEVtLpC+sGsAL.ashNsuT.hpYPLN-lAppphct.Gp..upsIssIhlDDPs....pPG...................pKhS.LsPal-cA.pLC .........TVSRFEVGKDKWAFNREEVMLTC..R....P......G...........N......A....L...YV..IN..PSTLVQYPLNDIApppVAo.GKTcAQPIuVIQIDDPs.....sPG.......................EKMS.LAPFIERApKLC.......................... 0 7 16 31 +10542 PF10710 DUF2512 Protein of unknown function (DUF2512) Mistry J, Coggill P anon PRODOM_PD032002 Family Proteins in this family are predicted to be integral membrane proteins, and many of them are annotated as being YndM protein. They are all found in Firmicutes. The true function is not known. 23.70 23.70 23.90 25.00 23.50 23.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.93 0.71 -4.71 20 314 2009-01-15 18:05:59 2008-06-10 13:55:09 4 1 198 0 59 209 0 127.60 34 92.27 CHANGED M+..HltALslKalhshslLhllLshhassoFscllhlollLolloYhlGDLalLPR......huNhsAoluDFGLualllWlhuhhhhs..sshsluhuollSAllluluEhFFHtYhhcplls.....p.tp.....phpapTEhu-E .....................Mp.HhhsLllKhhhhhhllhlh...Lsl.hhs..ho.hspllhholhlohsuYhlGDhhILs+........hGNhsAohuDhsLualslWlhs.hhhs..sshpIuhuu.l.l.uA.l.llul.uEhaFHtahpppshp.....p.pt.t.....t.tathEhu-E.............. 0 21 44 49 +10543 PF10711 DUF2513 Hypothetical protein (DUF2513) Mistry J, Coggill P anon PRODOM_PD457411 Family This family is found in bacteria. The function is not known. 29.40 29.40 29.40 31.40 29.00 29.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.39 0.72 -3.94 27 250 2009-01-15 18:05:59 2008-06-10 14:04:32 4 2 231 0 43 168 3 102.10 31 83.09 CHANGED KhDh-llRclLLplEst.h...............tt.thstasp-..pl.YHlthL..p-AGllpuphpth............st..........hltpLTasGH-FLDslRcsslWpcsK.phtppsu...uhol ....................................KhshDhlRclLLclEsptphspsl...................pshth.spash-......slhYplthL..p-Ashlssp.htht................st.hh.h........hlpclTasGH-FLDsIR-spsWpcsK.phtsKsssho................ 0 14 25 36 +10544 PF10712 NAD-GH NAD-specific glutamate dehydrogenase Mistry J, Coggill P anon PRODOM_PD017095 Family The members of this are annotated as being NAD-specific glutamate dehydrogenase encoded in antisense gene pair with DnaK-J [1]. 19.10 19.10 27.00 26.90 18.80 18.50 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.41 0.70 -6.24 21 144 2009-01-15 18:05:59 2008-06-10 16:57:01 4 3 117 0 30 146 1412 408.70 35 78.00 CHANGED hsLptshptscsthDthhshshcLVAhlhpcLL.........GtVcpsluLVhuhcpLssLLVhhGVthGlLcHhlDlhlspsstuL.DtDLLhLsGuLVLGtcVcDAVGVDVEGchDLRHuARstRsshpVELs-cLVVtpHhsLsLEcsDtHutLVVhsGtEcLsLLGRDpuVAlDQsG-Hssp+hDAcRQRuHVEQQHVLcV.....ALQssuLDsuAcucsFVRVcshVRLLA.EElhHhhhDLtHsGhsADpcclVDlsttpAuVLpptLsRL-psL-plhcpuFpLGAuphcscV..LcstsltpDctpVDhGLhttRphDLtLhspFLpsLQsphVlsQVDullhLELsspVVD-spVEVFTAcEtVAVGshHFEpA....lsDhpDGcVEusAAcVlDRDthshh..LVcslGpRupGRLVDDspchcsuDhAGVLGsLTLuVVEVuRpGDDpltchhApluhGuFLHLhQccstcLtRplhLA....hphDPsVAlsulsDh.tpphhVLhphtVscusADQALctcpGVhRVtcsLsLuRLscpshsllscscDRRtGutAFsVhDchplsAl.HDucAuVGsscVDTssFsH .........................................hp..hph.pthhs....hh.hphlu..h.....hhphhh..........tthpphhtLlhthtthhthhVhhulhhslhpHhhshhhhpsttsl.cuDllhhsuhhlhttplpDsVulDlEuphDLRpsu+pthss.plEhspphVlttph.shsLpch-t.ttLlVhtttEtLthhutDttVhhDphtccss.........tthsspt...pttpVpQp.lhtl.....shppssLpttspspshltVpshsthhs.cch.p.hhphhHsuhsspppphVsht.hpsslhpt.htthptshcplhtphhphtstph.hph..hts.t...tts.tplchGhhtttphshthhsthhp.sLptphlh..hplpshhhhchhtp.hDps.lplhssp.tlshut.phcth....hschpptplEtssspV.spc.hhh....h....lpsluptstuthVDDs.phpssshsulhGsL...sLtlV.cVst.tDstht.hhsplhhuthLchhpc.ttsLhtt.hLs......hphc.tlshh..tph.h..h.lhh.hhlh.hssspAhsttpushtVtctLshstlss.pshshht.ssctRtushshtlhpphthhsl..+st.stVutsplsss.ht................................................. 0 6 13 28 +10545 PF10713 DUF2509 Protein of unknown function (DUF2509) Mistry J, Coggill P anon PRODOM_PD077226 Family This family is conserved in Proteobacteria. The function is not known but many of the members are annotated as protein YgdB. 25.00 25.00 26.00 25.80 20.90 20.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.98 0.71 -4.40 10 534 2009-09-10 15:07:53 2008-06-10 17:00:33 4 1 521 0 38 192 1 120.60 58 89.88 CHANGED hLLlLG.LLLpGlppQLDuhhthsusEpptL+AastApSALsWGpuQsWutpst..........WpCpphsp.sh+uCLRhhSsuphlllcGput......slpLaQ.......pGssssssllhssHGW.DFCP.K-suhCp.hs ...................hLLlLGSLLLQGhsQQpcSaAuRVohESpuLRRQAlVQSALtWG.+.hpsWpspss....................hQCpp.Yu.....u.....osARVCLRlLu-sEslLlAGh-G.......VoLWR.........TGcV.I-GsI.VFSP+GWSDFCPLKEtALCQlP...... 0 2 9 21 +10546 PF10714 LEA_6 Late embryogenesis abundant protein 18 Coggill P anon Covarrubias A Family This is a family of late embryogenesis-abundant proteins There is high accumulation of this protein in dry seeds, and in the roots of full-grown plants in response to dehydration and ABA (abscisic acid application) treatments [1]. This LEA protein disappears after germination. It accumulates in growing regions of well irrigated hypocotyls and meristems suggesting a role in seedling growth resumption on rehydration [2]. As a group the LEA proteins are highly hydrophilic, contain a high percentage of glycine residues, lack Cys and Trp residues and do not coagulate upon exposure to high temperature, and for these reasons are considered to be members of a group of proteins called hydrophilins [3]. Expression of the protein is negatively regulated during etiolating growth, particularly in roots, in contrast to its expression patterns during normal growth [4]. 20.50 20.50 21.30 51.70 20.10 19.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.71 0.72 -4.17 6 28 2009-01-15 18:05:59 2008-06-10 17:33:36 4 2 14 0 17 26 0 80.70 48 71.36 CHANGED SEcc..psp.-sLPhEsSPYscYcDLEDYKppGYGTpGHQEPKsG+GuGuTDAPT.SGu.hpucutsou....TDAtNp+usP .........p.....ppscpptLPh-sSPYlpYcc...LEDYKh+uYGscGH.pPK.sG+GGGu.TDAPTlSGssh..........ucutsus.........sDAhNpps.................................. 0 3 9 13 +10547 PF10715 REGB_T4 Endoribonuclease RegB T4-bacteriophage encoded Mistry J, Coggill P anon PRODOM_PD091708 Family The RegB endoribonuclease encoded by bacteriophage T4 is a unique sequence-specific nuclease that cleaves in the middle of GGAG or, in a few cases, GGAU tetranucleotides, preferentially those found in the Shine-Dalgarno regions of early phage mRNAs. Phage RB49 in addition to gpRegB utilises Escherichia coli endoribonuclease E for the degradation of its transcripts for gene regB. The deduced primary structure of RegB proteins of 32 phages studied is almost identical to that of T4, while the sequences of RegB encoded by phages RB69, TuIa and RB49 show substantial divergence from their T4 counterpart. 27.40 27.40 27.60 28.30 27.20 27.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.62 0.71 -4.10 14 136 2012-10-03 00:09:25 2008-06-11 13:56:22 4 1 74 1 0 99 0 142.10 24 90.69 CHANGED Mp..........c.hphaphpatph.sphtchpcs.........tthsp.Ft...........lhap.p.hsp.h.+phctcashthhpplhspl...ph.t.huh...........cthphpsthh........ElpcGshhlulsssp.s.sh...tulththtlhsptph.tphshhhl .....................p.t..h.hpp....c.hp.at.patph.sphtctscp........tslu.tFt...........lhappphhsc..sl...+..phccpashthFpclpspl..hph.....phluh...st....c....hlcshcapstph........Elp.....c....Gslhhu.h.osspss.ph...psuhphthsllpptph.tptphh................ 0 0 0 0 +10548 PF10716 NdhL NADH dehydrogenase transmembrane subunit Mistry J, Coggill P anon PRODOM_PD026898 Family The NdhL family is a component of the NDH-1L complex that is one of the proton-pumping NADH:ubiquinone oxidoreductases that catalyse the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. NDH-1L is essential for photoheterotrophic cell growth. NdhL appears to contain two transmembrane helices and it is necessary for the functioning of though not the correct assembly of the NDH-1 complex in Synechocystis 6803. The conservation between cyanobacteria and green plants suggests that chloroplast NDH-1 complexes contain related subunits [1]. 25.00 25.00 53.50 53.50 22.20 21.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.02 0.72 -4.21 24 91 2009-09-10 15:53:21 2008-06-11 14:19:09 4 1 85 0 38 89 84 79.60 44 78.07 CHANGED M..................................hhs.l..............psllVlhsYsuLuuhYLlVlPlhLahWhNpRWashuphERhhhYhLVFLFFPGhlLhAPFLNhR.psps. .....................h..........h...............phhhlhlhYhsLushYLLVlPhhlahahppRWYhtuphERhhhYhLVFhFFPGllLhuPFLNFR.ps+p.. 0 6 24 35 +10549 PF10717 ODV-E18 Occlusion-derived virus envelope protein ODV-E18 Mistry J, Coggill P anon PRODOM_PD579825 Family This family of occlusion-derived viral envelope proteins are detected in viral-induced intranuclear microvesicles and are not detected in the plasma membrane, cytoplasmic membranes, or the nuclear envelope. The ODV-E18 protein is encoded by baculovirus late genes with transcription initiating from a TAAG motif. It exists as a dimer in the ODV envelope and contains a hydrophobic domain which is putatively acting as a target or retention signal for intranuclear microvesicles [1]. 23.10 23.10 23.10 62.90 23.00 23.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.20 0.72 -4.54 18 56 2009-01-15 18:05:59 2008-06-11 16:54:42 4 1 53 0 0 50 2 83.90 47 99.70 CHANGED MDshRss.......ssssth...phsslNPNhLMTILIsLVIIILLIhLFQ.SSsussSu....ssssptu..ahNPLNATMRsN.....PhV.NTsQRphL ....M-.hRss.......sssss..stplstlsPNhhMTILlsLVIIILLIlLFQSSSsussSu........s..s..........sssphuFhNPLNATMRsN.....PFV..NTsQRph.......... 0 0 0 0 +10550 PF10718 Ycf34 Hypothetical chloroplast protein Ycf34 Mistry J, Coggill P anon PRODOM_PD019546 Family This family is of proteins annotated as hypothetical chloroplast protein YCF34. The function is not known. 25.00 25.00 44.20 44.10 16.40 15.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.81 0.72 -3.59 23 81 2009-01-15 18:05:59 2008-06-11 17:13:16 4 1 79 0 31 81 84 75.60 54 91.41 CHANGED MCICVsCpaVDRCpTYHsVEpQHt.sHLopsPDFcPppPpIHVslh..s..ssthtlEWDVhuCpSFhp-.G+WsRLRP .MCICVNCcaVDRC.TYHAVEpQHpp..s...HLo-...sPcF-Pp..p..PsIp.VNI+..........s.p....sstlchEWDVluCpSFhpEhGKWu+LRP....................... 0 8 22 30 +10551 PF10719 ComFB Late competence development protein ComFB Mistry J, Coggill P anon PRODOM_PD066657 Family This family is conserved in bacteria. Some members, with three conserved cysteines, are annotated as late competence development protein ComFB. 25.00 25.00 28.70 28.60 20.30 19.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.71 0.72 -4.24 60 450 2009-01-15 18:05:59 2008-06-11 17:24:40 4 1 402 0 154 351 11 83.20 28 65.09 CHANGED l+NahEclVhctlpp......tphpp..t.tspcslsDlsslALNpLPP+Ylppchshh.ahh.spptcpphcpplhsAlpcAhphVpppspc ..........l+NhhEplVhph.lsp.h...........ph..p......t.hspcpls..DlsslALNpLPPhYlppchshh.hth..sptthtphcsclhtAlppAhthltps.p................................ 0 52 109 135 +10552 PF10720 DUF2515 Protein of unknown function (DUF2515) Mistry J, Coggill P anon PRODOM_PD131865 Family This family is conserved in Firmicutes. Several members are annotated as YppC. The function is not known. 25.00 25.00 28.30 28.20 21.40 20.90 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -11.93 0.70 -5.52 19 260 2009-01-15 18:05:59 2008-06-12 09:26:28 4 2 151 0 33 210 0 311.30 50 86.73 CHANGED llptIcccTchtNpDNISRTpAYhpaY.RaPEI+WuhLAoMVSRNuGWsMTDL+GphapplLsppppcthFLhYERANWlIFpDAYPQLLLYE.S++pspPLFHLLshFpVSpFMppEWp+FWcctspcRLhhALIINEQNhIQpPlIppsha++pVFcol.Fhlp-hhHFssVlFPshcG.......tLaGholpcFpslccRIpLGKpLupLLFcsch.hsphhcFAhpssHTGSRhDY.pahh.......................Gsp+htSPtLRpsasslsHphssp.pDWFpcpt..shhh.hppp.scphclT-hYh+Kpcplphhhhlpc ......llppIKcpTchhNtsNloRTpAYhpaYhRasEI+WAhLupMVSRNuGWNMTDL+GchYsplLscpsppphFhhhERuNWLIFpDAYPQLLLYEpSp++ppsLFHLLsahNVStFMEp.WphF..Wc.................ptst.ppLhhALIINEQNhlpK.VIpNsaFKKpVhpohhFKLp-hhphspllFPhh.Es...................hLaG.olppFpoLpcRItLGK+LhuLL.F+spa.huphhpaAtppsHTGSRtDY.salh..........................suh+haSPsLp.sa.shtHcEhc....h.cDWFscht..shha.hccE..phpspITE.Yp+phEpIthA.lsp.+................................ 0 8 20 21 +10553 PF10721 DUF2514 Protein of unknown function (DUF2514) Mistry J, Coggill P anon PRODOM_PD034813 Domain This family is conserved in bacteria and some viruses. The function is not known. 24.70 24.70 24.80 24.90 24.60 24.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.01 0.71 -4.54 17 309 2012-10-02 17:03:51 2008-06-12 11:24:49 4 3 255 0 30 214 1 150.60 41 93.15 CHANGED WhhhhhlllLshs.hhsh....a...ttGpphpstsatpchA..........cccus....pt.stltspstARtcEQcRptAtscstpcAppctstApAsAssAsAuuspLRppsscL....suuppt.ssssuAsspupsAucsuhVLu-lLu+usppAtpLActuDcuplAG.sCE+tYDulpt ...........................................................h...hhhllhhshh...G.........a....hhGsshuDp..uWppK.WA..........-RDuu......t.oppl.ssphuA.Rh.hEQtRphApDEssKDAQpctAchp..AcAAs...hus.uss..pL.Rs-up+h....lsA..A+c..sushAAAspuKospsstshLsshLG-hstcApha.AchADcpa..hAGhTCpphY-olp.s............................................... 0 3 8 19 +10554 PF10722 YbjN Putative bacterial sensory transduction regulator Mistry J, Coggill P, Bateman A anon PRODOM_PD093695 Domain YbjN is a putative sensory transduction regulator protein found in Proteobacteria. As it is a multi-copy suppressor of the coenzyme A-associated temperature sensitivity in temperature-sensitive mutant strains of Escherichia coli the suggestion is that it both helps CoA-A1 and possibly works as a general stabiliser for some other unstable proteins [1]. This family was expanded to subsume other related families: DUF1790, DUF1821 and DUF2596. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.20 0.71 -4.10 185 1845 2012-10-01 22:01:34 2008-06-12 12:57:55 4 8 1501 2 429 1139 363 127.10 20 69.51 CHANGED sh-hlpphlpptshp.....hph.....tp-.spts..............ltsphpshthhhhhts..........pspthth.......shsh.hphs................pptsthhphlsphNpphhhs+h..hh.pppsthh.....hchslsl.....ttulospplpphlptshptsppahstlpt .....................................................................phlcphlpphshp.....a.p....sss.spuh............hhsphpsh.p..hhhshp............-h.s.hl....hhu.h.h.tcl................sssh.slhthLsthNt...pt.hhs+h...hl..ctp..scsh...........lptslsl......tGl..o....c..p.h..t..hhlpps.cthphhh.....t.................................. 0 128 285 369 +10555 PF10723 RepB-RCR_reg Replication regulatory protein RepB Mistry J, Coggill P anon PRODOM_PD763888 Family This is a family of proteins which regulate replication of rolling circle replication (RCR) plasmids that have a double-strand replication origin (dso). Regulation of replication of RCR plasmids occurs mainly at initiation of leading strand synthesis at the dso, such that Rep protein concentration controls plasmid replication [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.76 0.72 -4.25 15 388 2012-10-02 18:44:02 2008-06-12 13:18:45 4 3 310 2 20 199 17 75.20 41 85.33 CHANGED M.....SQhtNAVTSSsssKRtYRKGsPh........osAE+Q+suluRK+tTHKclpVFlpNtLK-pLhplCcccGlTQAEhIEcLIcpEhscps........p ...............................M.SQhtNsVTSSs..KRhYRKGpPl.........osuE+QphuluR..K....+so......+K.....clpVFlpsphKshLtphCcccGlTQAEhlcc.LIcpEhtth..s.............. 0 3 7 14 +10556 PF10724 DUF2516 Protein of unknown function (DUF2516) Mistry J, Coggill P anon PRODOM_PD057095 Family This family is conserved in Actinobacteria. The function is not known. 25.90 25.90 27.00 26.70 24.50 25.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.35 0.72 -4.01 22 270 2009-01-15 18:05:59 2008-06-12 13:23:55 4 2 269 0 89 211 12 99.70 35 97.19 CHANGED M...............hlhsltshlhhlLtlsshssulaAhlcuAhpRsDAFsAAsKtTKshWLsILGlulhlhllhh........shlshhul.luslAssVYlsDVRPtlcplp.................hssW ........................h.thlhhlLhlssllsulhAhVcAAhpRsDAasAADKhoKshWlsILGsAs.hlshlh......................sslsh.Luh..lu...hlAsuVYllDVRPpltplp...............G.....h....................... 0 25 67 85 +10557 PF10725 DUF2517 Protein of unknown function (DUF2517) Mistry J, Coggill P anon PRODOM_PD055257 Family This family is conserved in Proteobacteria. Several members are annotated as being protein YbfA. The function is not known. 25.00 25.00 41.40 41.20 20.70 19.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.18 0.72 -4.29 11 540 2009-01-15 18:05:59 2008-06-12 13:48:29 4 2 535 0 42 123 1 62.40 80 92.11 CHANGED hYpsYPhapIlLRRhhVlLsGlLALPVMLFh..+DRARFYSYLHRVWsKTSDKPVWLpQuEpusp ...LY+-YPAalIFLRRoaAVsAGVLALPhMLFW..KDRARFYSYLHRVWSKTSDKPVWMsQAEKAT.s... 0 1 10 24 +10558 PF10726 DUF2518 Protein of function (DUF2518) Mistry J, Coggill P anon PRODOM_PD032672 Family This family is conserved in Cyanobacteria. Several members are annotated as the protein Ycf51. The function is not known. 25.00 25.00 25.20 133.60 21.30 20.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.70 0.71 -4.66 26 73 2009-01-15 18:05:59 2008-06-12 13:59:10 4 1 73 0 29 78 119 145.00 41 86.59 CHANGED M..shsphltpsstWluauulshulLTllAFlhpWGlRFRLVGlTuFolLLosusaAFulua.ps.stl-GAlphslVaDNGsshlVspsssshsspslpsTLcQlAuNL+usGRsus....pVplRlRplppspsGlScPllLGElpRs ........M.hst.hhshspWhuhuolshslLTllAFlh+WGlRFRLVGlTuFhhlLosuhauhuluh.hs+spI.GAl+aslVYDNGusplVlslssslstsplEATLcQAAsNLhShGRsus..ptplsIRhRslhHspsGlScPlhLGclpR.... 0 5 19 27 +10559 PF10727 Rossmann-like Rossmann-like domain Bateman A anon Bateman A Domain This family of proteins contain a Rossmann-like domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.57 0.71 -4.41 8 557 2012-10-10 17:06:42 2008-06-12 16:45:08 4 5 547 4 188 1095 262 118.70 31 39.88 CHANGED MptP...........p.sRLpVGIlSAGRVGsALGtAL-RAGHsVsuloAlScAS+pRAppRLPssslhsl.-lsc+uELllLAVPDA.ELsGlVpGLAsstssRsGpIVsHTSGApGlsILAPLscpGsIPLAIHP .....................h.....................plGlIG.A.G+VGssLuhsL......p.p........s.......s....a..................l.....s.........u....s.....t........u....h....S.........p......s.........S......t.....p.....c.....A....t.....p.......h.....l..s....s............s.......s.......h............h.......s...............t....-...l.......s.......s..p...u..-.LlllsV.P.Ds...tl.ss.l.s....p....t...L...s.....t..t...t.....hp........s...G...p........l.ls.H..s..SG...A..h...u..ss.lLsP.hpptGu..hshulHP...................................................................... 1 69 144 174 +10560 PF10728 DUF2520 Domain of unknown function (DUF2520) Bateman A anon Bateman A Domain This presumed domain is found C-terminal to a Rossmann-like domain suggesting that these proteins are oxidoreductases. 22.20 22.20 22.40 31.20 21.90 22.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.46 0.71 -4.70 98 840 2009-09-11 11:03:44 2008-06-12 16:53:23 4 12 824 6 260 779 206 129.80 28 45.49 CHANGED ssshulEu...sppshslhp.plspplGscshhl.sscpRshYHsAAVhuuNalssLhshutclhpps..Glst.......................chLtPLlcsolcsh.hphGs.pALTGPls......RGDts.....TlppHlpsLpp.........sphtplYptLucthh ...............sshsl-u...sptsh.th.hp....sLstplGscshtl...spp.pRhhYHAAushuuNalsslhs..uhclLppt....Gls.......................................................................chLhPLlpsslcsh.hp.....p............G......p...............ALTGPls......RGDts.....slppHLphLps............sphtphYphhupth.h................................ 0 101 200 242 +10561 PF10729 CedA Cell division activator CedA Pollington J anon PRODOM Family CedA is made up of four antiparallel beta-strands and an alpha-helix. It activates cell division by inhibiting chromosome over-replication. This is mediated by binding to dsDNA via the beta-sheet. [1,2]. 25.00 25.00 79.50 79.40 19.80 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.74 0.72 -4.43 3 438 2009-01-15 18:05:59 2008-06-19 16:21:05 4 1 434 2 15 70 2 79.30 91 97.33 CHANGED lMKPLRQQNRPIISYVPRVEPAPPEHAlKMDuFRDVWhLRGKYVAFVLMGEHFRRSPAFSVPESAQRWANQIRQEGEIpE MKKPLRQQNRQIISYVPRTEPAPPEHAIKMDSFRDVWMLRGKYVAFVLMGESFLRSPAFTVPESAQRWANQIRQEsEVsE. 0 1 1 8 +10562 PF10730 DUF2521 Protein of unknown function (DUF2521) Pollington J, Finn RD anon PRODOM Family Family of unknown function specific to Bacillus. 25.00 25.00 115.60 115.00 19.30 17.80 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.90 0.71 -4.38 10 131 2009-01-15 18:05:59 2008-06-19 16:34:35 4 1 131 0 18 56 0 143.40 62 99.77 CHANGED MsVIsSFs-++REKQlcaEKplLRELSLcplppul+caFtsl.asFhpphpshlp-uCIDhAIEAYLLGu+aG+FGYYGEshpclptRstcEEccLscsLasaLpsWup.tpsptsp-sLYtAscpFIssWWpEGFpcucKRaKLRLH ...MNVIVSLpEKQKEKQLKYERKMLRELSLKTLRoNIRDAFp......MQELHRQYEDYCIELGIESYLLGARYSKFGYYGESFFDVKYRALEEEQQLTETLFQFLTSMThREIcLpDEELLFESCQQFIGhWWQEGYEKGERRYRLKLH. 0 3 10 12 +10563 PF10731 Anophelin Thrombin inhibitor from mosquito Coggill P anon Rawlings N Family Members of this family are all inhibitors of thrombin, the peptidase that is at the end of the blood coagulation cascade and which creates the clot by cleaving fibrinogen. The interaction between thrombin and fibrinogen involves two different areas of contact - via the thrombin active site and via a second substrate-binding site known as an exosite. The inhibitor acts by blocking the exosite, rather than by interacting with the active site. The inhibitors are from mosquitoes that feed on human blood and which, by inhibiting thrombin, prevent the blood from clotting and keep it flowing. 25.00 25.00 27.70 105.10 19.80 18.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.09 0.72 -3.87 4 8 2009-09-14 14:04:23 2008-06-19 16:55:34 4 1 5 0 1 8 0 64.60 58 71.31 CHANGED MAoKLlVIAhLClALlA.lVQuAPQYApG-EPoYDEDD.s-EslpPHSSSssD-s.--FDsSLL-c MAoKLFVIAhLClALVA.lVQuAPQYApG-EPoYDEDD.s-EsLpPHSSSsoD-s.-EFDsSLL-c. 0 0 1 1 +10564 PF10732 DUF2524 Protein of unknown function (DUF2524) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillaceae bacteria. 25.00 25.00 73.70 73.50 21.50 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.92 0.72 -3.86 8 132 2009-01-15 18:05:59 2008-06-23 11:22:36 4 1 132 0 18 52 0 84.00 70 93.53 CHANGED MATRQSV-EaLQ+sEpAl-aApEQacpAp+QEHYN-pEYS-AQhhLEsAVN-Ls+LshSAN-QQREQLaRhRLQLQpLQNpMIL .MAERQSLEsYITQAEQAVEYAKEQL-pGMRQEHYNTMEYSDAQLQLEQAYNDLQsMQQHANDEQREQLNRARMAIRQLQHQMII. 0 2 10 12 +10565 PF10733 DUF2525 Protein of unknown function (DUF2525) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. The family has a highly conserved sequence. 21.40 21.40 21.60 24.40 21.30 18.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.64 0.72 -4.13 4 435 2009-01-15 18:05:59 2008-06-23 11:30:14 4 1 433 0 18 61 0 57.80 90 76.89 CHANGED DVDALLAAINEISESEVHRT.-Dsp+sslDGRchHTaRELAEAFELDIHDFSsSEVNR .DVDALLAAINEISESEVHR..S..QND..SE..HVSVDGREYHTWRELADAFELDIHDFSVSEVNR... 0 1 1 11 +10566 PF10734 DUF2523 Protein of unknown function (DUF2523) Pollington J, Finn RD anon PRODOM Family This is a family of phage related proteins whose function is uncharacterised. 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.32 0.72 -3.75 28 269 2009-01-15 18:05:59 2008-06-23 11:32:35 4 1 213 0 36 185 2 82.70 23 83.97 CHANGED tsLhshLtslht.lls+l..LsulGluhsohsGlsshhsthht.htshhsulPs....sllslluhhGlspuluIIhuAlshRlshpsl ....................................h..LhshLh.lht.llh.+l........lhulshhhhs.h....ssl.thhs........hh....shlpsthsulPs...........shhthlhhhGlspuLshlhuAhsh+huhp..h...... 0 8 20 28 +10567 PF10735 DUF2526 Protein of unknown function (DUF2526) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function is restricted to Enterobacteriaceae. The family has a highly conserved sequence. 25.00 25.00 49.60 49.50 20.50 19.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.51 0.72 -3.99 3 445 2009-01-15 18:05:59 2008-06-23 11:37:21 4 1 444 0 16 60 1 76.60 90 99.99 CHANGED MSHLEEVpsRVDAAIAEuVIAHMNELLIALSDDAQLSREERYsQQQRLRTAIAHHGRQHKEDpE....ARREQLTKGGoIL MSHLDEVIARVDAAIEESVIAHMNELLIALSDDAELSREDRYTQQQRLRTAIA.HHGRKHKEDME....ARHEQLTKGGTIL... 0 2 3 10 +10568 PF10736 DUF2527 Protein of unknown function (DUF2627) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to a family of Enterobacterial proteins. It has a highly conserved sequence. 25.00 25.00 29.40 48.80 16.40 15.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.80 0.72 -4.34 2 354 2009-09-11 09:21:57 2008-06-23 11:38:56 4 1 354 0 9 18 0 37.80 95 81.88 CHANGED MCGIFSKEVLSKcVsVEYRFSA-PYluASsSNsSsLSM MCGIFSKEVLSKHVDVEYRFSAEPYIGASCSNVSVLSM... 0 1 2 4 +10569 PF10737 GerPC Spore germination protein GerPC Pollington J, Finn RD anon PRODOM Family GerPC is required for the formation of functionally normal spores. The gerP locus encodes a number of proteins which are thought to be involved in the establishment of normal spore coat structure and/or permeability, which allows the access of germinants to their receptor [2]. 22.40 22.40 24.20 23.30 22.10 22.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.15 0.71 -4.50 8 149 2009-01-15 18:05:59 2008-06-23 12:10:43 4 2 142 0 20 99 0 167.80 53 85.78 CHANGED IppLE+plpELQpElspLKp+PuTsI-RIEYKFDQLKIEpLEGTLNIGLNP.oDsp.slEsFpV.ssssssluhhpQ-pssplhppIcQpV-tYLsEEsPplLcpLEppY-spLD-o.+paIlEDI+KQhDSRIcYYlpphtpcpsssPspc....t-cIAptVK+DIp+Al-pFLpHIPs ......................................IhsLEcQV+pLQcELNEL..KsR....P....So....SIsKVEYK..FDQLKVEsLsGTLNIGLNP..uc..sp...pIEDFpV..-sETl.cVs.............PEs...-ssPcaaQ........sIhQchacYL-EEAhscIh+hEpcp.cosLDEhYRQhhl-DIKKQM-cRlsYYLSQsp...shEsh........sossc..hlc-hIlQthKpDI-+Ah.uFIpHIPu................. 0 4 12 14 +10570 PF10738 Lpp-LpqN Probable lipoprotein LpqN Mistry J, Coggill P anon PRODOM_PD017067 Family This family is conserved in Mycobacteriaceae and is likely to be a lipoprotein [1]. 21.60 21.60 22.00 22.20 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.27 0.71 -5.01 19 257 2009-01-15 18:05:59 2008-06-24 13:21:36 4 3 90 0 49 152 0 179.40 35 72.00 CHANGED ssssP..................................lspaLcppGVshps.sPpshsuLslolPhPsGWpshssPNlssuhslI.....upuhhhssApllVaKLhGsFDPscAIp.HG.sDuQth.sac.phsAShAsasGFPSShIEGoYc.sGhphpo.pRhVIATu...usp+YLVpLoVT.ohsspAs.stusss-AIlsGFpVus ...............................................st..shP.........tholt-YlcspuVphpPlpsss.su.slslPhPssWp...hs..sss..lsssassIs.p...usus.....sPsA.hllVhKLp.G.DhD.PAcslp.+uss-.uppLsuap...sssuShAsasGFPSu..h...lpGoYcp.s.Gh....p.hpsupRpVlssu.....sssp.YLVpLs..lT.shsspus.shusAscsIspGhpls.s...................................................... 0 5 27 42 +10571 PF10739 DUF2550 Protein of unknown function (DUF2550) Mistry J, Coggill P anon PRODOM_PD031809 Family This family is conserved in Corynebacterineae. The function is not known though most members are annotated as either secreted, or membrane, proteins. 20.60 20.60 20.70 22.10 18.00 20.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.59 0.71 -4.19 20 360 2009-01-15 18:05:59 2008-06-24 13:38:10 4 1 358 0 88 232 1 130.70 34 85.61 CHANGED sLslllssllhlh..lhRhhlhRphGuhssslR.....hsssss+GWphGlhRYssscLcWaRLhSLpstPchslsRpul-lhsRRsPpusEhhhlssssl.llc........lps....cs.sphElAhstsAhTuFhSWLESuPssp ..........lsslllhh.hhhh....hhRhh....hhRp.GuhssslR........sh.s.....uss.spG.WphGssRYs.ss.phcaYRlhSh+hhPshhLpRpulclh..s+..RsPp.s.-Ehhhhsssh..h..llp........lps...................+s.sshElAlDtsulTuhhSWLEuAPss.t....................... 1 28 67 84 +10572 PF10740 DUF2529 Protein of unknown function (DUF2529) Mistry J, Coggill P anon PRODOM_PD099482 Family This family is conserved in the Bacillales. The function is not known. Several members are annotated as being YWJG, a protein expressed downstream of pyrG, a gene encoding for cytidine triphosphate synthetase. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.08 0.71 -4.83 14 343 2012-10-02 15:05:26 2008-06-24 13:41:14 4 1 343 2 29 139 0 168.40 53 97.13 CHANGED M.KIFoTQLoGlFsRIt-KEp.uIEDuARLLAQAllG-GplYltGhsEhpulthpAhputEshsSutsLs.sss...plssoDRVLlFsphssDtEshthscpLh-pGlshVslS.sstpcssslsphsclHIDhplptsLlPsED.GsRhGaPushsuLYlYauLphslcEhL ...............................................MSKILsTQLhGIFNRl.EKQpL-IpMAAQsLlQ.AIGGEGaVYlKGYcDLpaaEoa.....lLaScE+LKSS++Lc.sl...pchpEIDSTDRVLLFuPFYsD.pVshDlpKLl-hDlDlVLIS....N.......pPKT-....DhP-HLsHaIDLSTPRPIVYTED..Y.DKIlQPHsMAhNYlYY-IYTQMlEMs......................... 0 7 16 23 +10573 PF10741 T2SM_b GspM_II; Type II secretion system (T2SS), protein M subtype b Mistry J, Coggill P, Desvaux M anon PRODOM_PD110875 Family The T2SMb family is conserved in Proteobacteria and Actinobacteria, and differs from the T2SM proteins in Vibrio spp. (Pfam:PF04612). 21.50 21.50 21.50 21.50 21.40 21.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.08 0.72 -4.44 28 187 2012-10-02 17:03:51 2008-06-24 13:57:02 4 3 172 0 71 217 26 108.80 24 55.89 CHANGED tusssaltusssshAuAsL.p+lpshlsps...GsslsooQhhs..sspup.....hsplulplshcsshssLpplLhsLEsupPhLaVDpLslps.......shsssssups..Lplphsluuhh ........................t.t..al.st.ssshAuA.sL.pclpphlsps...Gs..ssttsphhs..hp...s.psp..............hsplslplplps....sh..tsLtslLhsLEstp.PhLhl-pLslpt..............phttssstts.tLplthtltuh........................... 0 24 42 58 +10574 PF10742 DUF2555 Protein of unknown function (DUF2555) Mistry J, Coggill P anon PRODOM_PD060530 Family This family is conserved in Cyanobacteria. The function is not known. 25.00 25.00 74.40 74.30 24.40 19.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.62 0.72 -4.34 21 69 2009-01-15 18:05:59 2008-06-24 14:57:06 4 1 69 0 28 62 107 56.90 55 77.10 CHANGED lot-plssFscpslApLApRLE-DDYssPF-GLpDWHLLRAlAhpRPELspPYlHLl ....otcclsuhTpp-VApLApRLEpDDYssPF-GLpDWHLLRAlAFpRPELscPYlHLL. 0 5 18 26 +10575 PF10743 Phage_Cox Regulatory phage protein cox Mistry J, Coggill P anon PRODOM_PD064131 Family This family of phage Cox proteins is expressed by Enterobacteria phages. The Cox protein is a 79-residue basic protein with a predicted strong helix-turn-helix DNA-binding motif. It inhibits integrative recombination and it activates site-specific excision of the HP1 genome from the Haemophilus influenzae chromosome, Hp1. Cox appears to function as a tetramer. Cox binding sites consist of two direct repeats of the consensus motif 5'-GGTMAWWWWA, one Cox tetramer binding to each motif. Cox binding interferes with the interaction of HP1 integrase with one of its binding sites, IBS5. This competition is central to directional control. Both Cox binding sites are needed for full inhibition of integration and for activating excision, because it plays a positive role in assembling the nucleoprotein complexes that produce excisive recombination, by inducing the formation of a critical conformation in those complexes [2]. 23.70 23.70 23.80 28.90 23.50 23.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.87 0.72 -3.96 6 218 2009-01-15 18:05:59 2008-06-24 16:14:00 4 1 186 0 11 94 0 81.20 49 85.59 CHANGED M.......ppQlophsoDhlsh-tFAphIGKTspAVppMlcAGKLPslcMpDPppssG+.GEhWlahsEWsphs+phh-otPsE....WhhWhsh ...............lhth.hDAl.hpcFAchlGKs.sAVppMlctuKLPlI-h.p.DPppssuRAGEhWValPtaNcul+pAa.sRPhE.RDuWLhWhGL....... 0 1 5 8 +10576 PF10744 Med1 Med1-Trap220; Mediator of RNA polymerase II transcription subunit 1 Wood V, Coggill P anon Pfam-B_51442 (release 22.0) Domain Mediator complexes are basic necessities for linking transcriptional regulators to RNA polymerase II. This domain, Med1, is conserved from plants to fungi to humans and forms part of the Med9 submodule of the Srb/Med complex. it is one of three subunits essential for viability of the whole organism via its role in environmentally-directed cell-fate decisions [1]. Med1 is part of the tail region of the Mediator complex [3]. 21.10 21.10 21.30 21.50 20.40 21.00 hmmbuild -o /dev/null HMM SEED 393 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.24 0.70 -5.70 30 276 2009-01-15 18:05:59 2008-06-24 16:21:09 4 5 202 0 189 267 0 357.00 20 39.21 CHANGED l................s-hlphlhthh.tth..............sl-slp+hsphhuh-uhs-cl.....................clshtpshlsl-lDhspt.pshlhsstLhl........................................th.hss.psphsphphuh.tp...ps.stLhphhptpshscFppsLphLspl.phss...........................................p.ptchshFtAlcsLtpsLp........th.ph..pptsss.phpphhpsshGhhthc..pss....chthplpYahpst.lhctpp...........................................h..shphhtssthtshhNpssphpsssssthh.hhs..hs.........P.hhh.sppsssphssshshhlspshs...................................................................................pt.h..h.....st.ppapYths........sshps.tsph.lpclshscspplstllsl.............................................LRp.sh..hssLlposhpppt ..................................................................................................................phhphlp.hh.t................sh-sl.p+hhp.h.uht..shh-ch....................................p..htt.ph....ls.........clsh..stt....p....shlpsshhhh..........................................th..s.s.tsphs...sh.phth...tc..............ps...........t.h....spth...p.t...........pshspFptpLctLspl.pLst.................................................................................p.chpha.AlpultpsLp........................ph..h....ph..........................p.........t.s.s.........t.........t............hp.........p......h.h.pup.h.Ghht.+....pss....+l.hslpYahs.tplhptpp................................................................ht...h......ts.......sht...hstssp....tss.st....hh...hh.......s.hhts..............Psh......spsspsphs..s........sh.hphspshs...sht.lttht.sh.hsh................................................................h....tpt............tpp.t.h..h..sh...........s..t.....ppHsYhh................shpsh.Gph...lpclsFpHPtplshllsl.............................................LRp.sh..hssLltsshp...s............................................................................................................................................................................................... 0 46 83 151 +10577 PF10745 DUF2530 Protein of unknown function (DUF2530) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to mycobacteria. 22.40 22.40 22.40 23.00 22.30 22.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.55 0.72 -4.00 10 133 2009-09-11 11:12:09 2008-06-25 13:38:17 4 1 133 0 42 94 37 75.50 49 87.02 CHANGED sspPPPLPssLL-PhPVIsVGsLuWLV.AsVsAFsVsu.LcsWRPlTlAGLuVGlLGToIFLWQR+AARRGuRGAQsGL ...............h..psPsLPssL.l-shPVIsVGsluWLV.As..V.sAal.Vsu...l....s.....s.....WpPV..olAGLusGl.LGTsIFlhQhuAARRGuRGAQtGL......................... 0 12 31 38 +10578 PF10746 Phage_holin_6 Phage holin family 6 Pollington J, Finn RD anon PRODOM Family Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. 20.10 20.10 20.90 20.60 20.00 19.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.21 0.72 -4.25 4 52 2009-01-15 18:05:59 2008-06-25 13:44:29 4 1 39 0 1 40 0 62.70 52 94.44 CHANGED LpLDFsNEVlKAAPIVGTusADsAuRlFFGLoLNEWFYsAAIAYTVVQIGshVhcslhc.K+tsKs .....................LsLDFNNEll.KAAPlVGsusADsuARLFFGLSLNEWFYVAAIAYTVlQIGAhVhcplhcaK+ts+.......... 1 1 1 1 +10579 PF10747 DUF2522 Protein of unknown function (DUF2522) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus. 25.00 25.00 43.10 43.10 24.00 23.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.84 0.71 -4.15 12 130 2009-01-15 18:05:59 2008-06-25 13:45:38 4 1 129 0 20 70 0 140.40 55 97.14 CHANGED RpY.lYLIc-EhApaYFGREphhFcLFp-hchss..tppcplhtKQlpYITcslPhh+hcptLpptL.s+.hphpphpshapl....stcuputlhlt-+hIpltspGshsAEoshFElLRKlsssFLAhDhpsp+aGWLs.Pl+tcp ......+TYELYLIpEDlA+uYFGREhLFF-LFscao-Su.ohSEKcVLhKQhhYIThPLpVh+IHH+LEQsLcsh.u+YcRs+aTHsL...aoGApauEIMVKs+YIchsoSGNlShETTFFEVLRKsEhTFLAMDYENsKYGWLN.PLKQs.+... 0 4 12 14 +10580 PF10748 DUF2531 Protein of unknown function (DUF2531) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 22.00 22.00 22.00 22.80 21.60 21.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.95 6 489 2009-01-15 18:05:59 2008-06-25 13:47:50 4 1 480 0 33 166 0 125.50 64 94.58 CHANGED pspRWlLLshsLshLTGMRDPF+PPEDRCRIucLspWRYQGhVucG-plsGlLKDuQpKWRRVcpsphL.sGWplspLTAsplsLpTGcGC-PspWRWpRpGspsE.AMDSpsssssssRtttu+usppDAsGG ................+spRhl.L.hsluLs.LLTGMRDPF+P.P.E.D.h.C..R..I..u..E.L..S..QWRYQGhVG+GERhIGlIKDGQ+KWRR.VppsDlL.E.N.G.WTIlQLTs-sLTLsT...GsN...C..EPPQWhWQRQ.Gc.s.NE.AMD...S...+sTssuD.sRRTGGKuucSDAsG............................... 0 2 7 20 +10581 PF10749 DUF2534 Protein of unknown function (DUF2534) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 21.40 21.40 22.20 53.20 20.40 21.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.59 0.72 -4.06 4 453 2009-01-15 18:05:59 2008-06-25 13:49:56 4 2 450 0 21 70 0 83.00 86 88.81 CHANGED MIhhtKLpotcGKKFLluLhlVFhlAhoVVuRAThsGVlEQYNhPhScWToSMFhlQuAMlhVYSlVFThLlAIPLGhaFLGuc- .........MIMAKLKSAKGKKFLFGLLAVFIIAASVVTRATIGGVIEQYNIPLSEWTTSMYVIQSSMIFVYSLVFTVLLAIPLGIYFLGGEE.. 0 1 1 11 +10582 PF10750 DUF2536 Protein of unknown function (DUF2536) Pollington J, Finn RD, Eberhardt R anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. Structural modelling suggests this domain may bind nucleic acids [1]. 20.50 20.50 23.30 22.50 20.20 17.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.14 0.72 -4.39 8 132 2009-01-15 18:05:59 2008-06-25 13:52:36 4 1 132 0 21 57 3 67.80 71 93.71 CHANGED MNFpLDLIcDKVEFFEApcLpsLEKKIN-QIEpNKAILLpVHsVSHQspVs.csGRhhYSAVVHFKAKp ....MsFoF-MLEDKVEFFEAuDLsSLE+KIuEQID..NNKALMLEVHHISHQMlhDPESKRPYYSAVVHFKLKK.... 0 5 13 15 +10583 PF10751 DUF2535 Protein of unknown function (DUF2535) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 82.70 82.60 21.70 18.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.97 0.72 -3.84 9 124 2009-01-15 18:05:59 2008-06-25 13:53:25 4 1 123 0 16 49 0 82.80 65 95.60 CHANGED MLhKSLEFKpssGppVKloEIPVLcpDpPahFhlphRLphaltclapupsp+sVYSFR-YLKRplKWsDYpplappstLKHNA ..ITKSFYFTHSTGpCIKIFEIPVLQuQHPLuFLIQSRLQLFIAKIQKpK+PRFSYSFREYLQsCLKWNDY.NVYKTNTLEKNA 0 2 8 10 +10584 PF10752 DUF2533 Protein of unknown function (DUF2533) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 48.10 48.00 22.60 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.80 0.72 -3.68 10 130 2009-01-15 18:05:59 2008-06-25 14:56:50 4 1 130 0 17 59 0 83.60 69 94.30 CHANGED hpVH+AITAHS+KQ+pplKpFlpLDtcREtAIE-AVscC+pGcsFoTDsINcITccMN-LAKpGIVPpRKlVTsEMV+EYVuRh .MEVHKAITAHSRKQNEuVKAhLQLDAQREAAIEAAVSLASNGKcFSVDlINsVTKQINsLAKN.G.Vo.QRKYVTcEMVMEYVSRL. 0 3 9 11 +10585 PF10753 DUF2566 Protein of unknown function (DUF2566) Mistry J, Coggill P anon PRODOM_PD060138 Family This family is conserved in Enterobacteriaceae. The function is not known. 22.10 22.10 23.00 23.00 22.00 20.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.91 0.72 -4.05 7 798 2009-09-11 10:22:14 2008-06-25 16:39:19 4 1 477 0 30 144 2 53.90 69 84.61 CHANGED hapthLlhYhlGhslohhlhahlS+D.phpIRhluAhllGlTWPhShPVsLLhSLF ......Lap+hLlFYslhssIuFlIhWFlSH-.Kp+IRhLSAFLVGhTWPMShPVALLFSLF............ 0 2 7 17 +10586 PF10754 DUF2569 Protein of unknown function (DUF2569) Mistry J, Coggill P anon PRODOM_PD088965 Family This family is conserved in bacteria. The function is not known, but several members are annotated as being YdgK or a homologue thereof. 25.00 25.00 25.10 25.70 24.90 24.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.84 0.71 -3.99 15 609 2009-01-15 18:05:59 2008-06-30 10:56:40 4 5 578 0 63 219 7 142.60 64 86.26 CHANGED ssshp+IGGWLlh.PhshLllo.lss.lsllhhh.ulhsstsaphLsupssshh.....hhhhhullhslsMa..........haTLalshLFF+R+pthP+haIlhlLloll.lslcuaAh........oPl.DshAl+plhhsLLuAslalPYh++ScRVKpTFlc ............T.TsPQRIGGWLLG....PLAWL.LV.AL......LSso......LA.L....L.LY...ssALuoP.QTFpTLuuQuhssp..........lLWGlSFITAI.....AMW..........YYTLWLTIAF..FKRR.RCVPK.HYI..I.WLLlSVL....L..AVKAFAF............SPVpD...ul.A..VRQLLF.....sLL..ATALIVPYFKRSuRVKsTFVN... 0 10 20 42 +10587 PF10756 bPH_6 DUF2581; Bacterial PH domain Mistry J, Coggill P, Bateman A, Ginalski K anon PRODOM_PD034109 Domain This domain has a bacterial type PH domain structure. This domain was previously known as DUF2581. This family is conserved in the Actinomycetales. Although several members are annotated as RbiX homologues, RbiX being a putative regulator of riboflavin biosynthesis, the function could not be confirmed. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.28 0.72 -4.09 95 695 2012-10-04 00:02:25 2008-06-30 11:00:18 4 2 302 0 232 583 18 75.20 24 44.92 CHANGED RsRltlsssG..lslRslh.us+hlsWs-Itslp...hspusph...........sclchsD....sphlsl.ulphs.st.ptshsuhsplt.s ..................RsRlts.sscG....lslRshh.ss+hlsWsplt.ulp..........hspu.sph........................................splchs-..................sph.lsl.u.lphs.sh.thshsuhtth...s........................................................................................ 0 71 179 218 +10588 PF10755 DUF2585 Protein of unknown function (DUF2585) Mistry J, Coggill P anon PRODOM_PD589556 Family This family is conserved in Proteobacteria. The function is not known. 25.00 25.00 38.50 38.10 17.70 16.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.22 0.71 -4.80 20 70 2009-01-15 18:05:59 2008-06-30 11:03:00 4 1 69 0 27 80 10 161.90 57 83.99 CHANGED lLhuMGRsPICsCGhVKLWcGsltSucNSQHluDWYT.SHlIHGFLFYuhsaLlhtRhs........huhR..LhlAlhlEuAWEIlENSshII-RYRsuTIuLDYhGDSllNSluDhlhMllGFlhAuRLPValTVslAIuFElhsuhlIRDNLTLNVLMLlaPlEAI+pWQuG ...............LahMGRlPICpCGh.VKLWcGsVsSStNSQHluDWYT.SHlIHGFLFYuLsaLlht+tP..........huhRLhlAhlIEuuWEllENSPhII-RYRsuTIuLDYaGDSIlNSshDTlhMhlGFlhA.tRLPVhlTVslAIsFElhsuhhIRDNLTLNVlMLlaPl-AIKpWQuG................. 0 6 14 18 +10589 PF10757 YbaJ Biofilm formation regulator YbaJ Pollington J, Finn RD anon PRODOM Family YbaJ regulates biofilm formation. It also has an important role in the regulation of motility in the biofilm. YbaJ functions in increasing conjugation, aggregation and decreasing the motility, resulting in an increase of biofilm [1] 25.00 25.00 35.80 35.80 21.20 18.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.63 0.71 -3.98 4 531 2009-01-15 18:05:59 2008-06-30 11:44:24 4 1 529 0 38 112 3 120.80 84 98.44 CHANGED MDEYSPKRaDIAQLKFLCEsLYDEuIAoLG-SpHGWVNDPTSAlNLQLNELIEHIAoFllsFKIKYss-u-LoEQlEcYLDDTasLFSSYGINs.-LQRWpKS+pRLFthFSspplCT.hpT ..MDEYSPKRHDIAQLKFLCETLYHDCLANLEESNHGWVNDPTSAINLQLNELIEHIATFALNYKIKYNEDNKLIEQIDEYLDDTFMLFSSYGINhQDLQKWRKSGNRLFRCFVNAT+pNPsS.L....... 0 1 8 23 +10590 PF10758 DUF2586 Protein of unknown function (DUF2586) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.04 0.70 -5.40 17 164 2009-09-13 14:48:51 2008-06-30 11:53:10 4 2 148 0 26 149 4 324.10 34 83.60 CHANGED uhGpVplNsLN.hQGshsplERphLFIGhus.p...N.hGpllslssQSDlDslLGtuDSsLKppltAAphNuGpNWsAhshslss-sp...a.sAlctA.ppshSsEuVV....lsssssspAplsshpstts-LhspaGRhVahlsAssGl......spspoWusYhsthssLppGlAuppVtlVPpLa..GsplGllAGRLCscuVoIADSPMRVtTGullG.LG....shPhDpsGh.LshAsLpuL-ssRaSVPtaYPDY-GhYWuDGphLDs.GGDYQsIEsLRVVDKAARpVRIlAIt+IuDRsLNSTPsSIAsscsaFu+PLR-MS+SsphsGppFPGEIcsPpDsDIsIsWho+ppVpIalhVRPYssPKpITssIhLD .............................................................................................................................hspVplsthN.h.G.htplEphhLFlG.us..p...s....sphh.lsspoDhsthl...u.tss.L+t.lhAA..Nusps...W.uhs.h.lstssp............a.pAlctA.ptshShEhVl....lstss.sspsplsthtshttcLhsphGRhlahlhsstu.........tpspsWs-YhsthsslppulAsptVtlVPp...La...........G..sp..........Gl.l.AGRLs..sp....u....Vo...........lADSPsRVtTGull..s.ls......phPhDtsG..t.t.ls...hA...sLpuL-ssRaSVPhWYsDY-GhYWuDGpTLDspGGDYQsIEslRVlcKAuR+VRlhAIs....+I.u.D.R.s....LNST.....s..s...S.lssppphFs+sL..RcMup.usplsu....FP..G..Elh....sPpDs......DlsIhW.spptVplhhhlcsapsPhpIslsl.LD............................................................................................ 0 6 15 19 +10591 PF10759 DUF2587 Protein of unknown function (DUF2587) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with no known function. 25.00 25.00 99.70 99.30 17.50 17.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.20 0.71 -4.87 15 199 2009-01-15 18:05:59 2008-06-30 11:59:36 4 1 198 0 76 162 39 164.30 68 89.78 CHANGED GPDG.....................sspusp-.sscstspslsDLVEQPAKVMRIGoMIKQLLEEVRAAPLDEASRsRL+EIHcoSI+ELEDGLAPELhEELERLoLPFo--ssPSDAELRIAQAQLVGWLEGLFHGIQTALaAQQMAARAQLEQMR..ptALPsGhs.s..........pGpsptsttuts.G..s...........GQYL ............................sssthhs..............sp..-s-sccpulTDhVEQPAKVMRIGTMIKQLLEEVRAAPLDEASRsRLREIHtoSI+ELEDGLAPELREEL-RLoLPFsED..us...PSDAELRIAQAQLVGWLEGLFHGIQTALFAQQMAARAQLEQMR.....pt...ALPP.Gsutsst........tG.......................................h....................... 0 23 58 72 +10593 PF10761 DUF2590 Protein of unknown function (DUF2590) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 20.10 20.10 22.30 31.00 17.40 18.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.27 0.72 -4.29 18 127 2009-01-15 18:05:59 2008-06-30 13:00:03 4 1 114 0 21 86 3 99.70 49 93.49 CHANGED lDLhIhssDlsLDuuspPhllssRsoIAQDIKHtIhESGLshpLIuERushhpsDhhpplELlVE-DtRLVPGTlplsEsssGplhlTApTh-aG....lslclsh .lDLLIpssDhsLssGspPhhCssRtSIuQDIhHuIlESGLsscLlAERSPshRuDlhpphcLLlEsDcRllPGTlplsEp....s...splalTApTaDFG..slshpl.h........ 0 5 12 14 +10594 PF10762 DUF2583 Protein of unknown function (DUF2583) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as YchH however currently no function is known. 25.00 25.00 51.90 51.80 23.20 19.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.83 0.72 -3.85 5 543 2009-01-15 18:05:59 2008-06-30 13:04:58 4 1 530 0 40 129 1 86.10 83 96.79 CHANGED MKRKNAutLGNVLMGLGLVlMVsGVGYSILuclsQFNlPQFFAHGAllSIFVGALLWLVGARIGGREpVADRYWWVKHFDKRCRRssHR ....MKRKNASLLGNVLMGLGLVVMVVGVGYSILNQLPQFNhPQa.F..AHGAlLSIFVGAlLWLAGARVGGHEQVsDRYWWVR.HY.DKRCRRsDNR............................ 0 1 8 23 +10595 PF10763 DUF2584 Protein of unknown function (DUF2584) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins have no known function. 25.00 25.00 25.30 25.90 20.80 18.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.62 0.72 -4.15 9 175 2009-01-15 18:05:59 2008-06-30 13:07:09 4 2 175 8 18 82 0 78.70 58 95.31 CHANGED MGMPlEhNTMIVTKG+EpR..lEENlFpLEKEGYRlYPl-lPl-VRKTKcGEpoGoAhlpKLEWcsu+TpITYcLlSLpSTN .............................M.hEhpThIl...op...tKEhR...............I..-.-.NlFpLch-GY+laslp..lslhKopE.EplGoAhlpKLEWENGKTpIsYpLlSLpSsN....... 0 2 10 12 +10596 PF10764 Gin Inhibitor of sigma-G Gin Pollington J, Finn RD anon PRODOM Family Gin allows sigma-F to delay late forespore transcription by preventing sigma-G to take over before the cell has reached a critical stage of development. Gin is also known as CsfB [1]. 21.00 21.00 21.10 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.66 0.72 -4.25 21 219 2009-01-15 18:05:59 2008-06-30 13:09:27 4 2 219 0 51 130 0 46.10 44 71.70 CHANGED hClICcpp+pc...GIhlhs+FICp-CE+cllsspss-spYpaYh++LK .hCIlCcpc+p-....GIhlhsphICh-CE+chVsT-ssDscY.aYl+pL+....... 0 21 37 42 +10597 PF10765 DUF2591 Protein of unknown function (DUF2591) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.30 21.30 21.80 21.70 20.80 20.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.60 0.72 -3.84 16 234 2009-01-15 18:05:59 2008-06-30 13:18:17 4 1 198 0 36 151 1 106.50 32 91.15 CHANGED McaScLSDhElNhhVAcApshp..h...........................s.h.hh..ps.sthttas.CssPuDAhPIIpcs+Iul..........tsssscWsAp...................ssssh.hh.spsPLRAAMIVFLh.p ....................................caupLSDhElNphVuth.htps..thh.........................t...s...h...hst...h..s.t.h....tshc.CssPuDuWPIIp+.t.+Iul................h.........sp.tspWsAp.............ssps.sh.h.h.sp...sPLRAAMIsFLh.Q........................... 0 0 9 22 +10598 PF10766 DUF2592 Protein of unknown function (DUF2592) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 25.20 25.30 18.90 24.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.88 0.72 -4.61 5 530 2009-01-15 18:05:59 2008-06-30 13:40:05 4 1 516 0 39 104 2 40.20 89 79.27 CHANGED LLKSLlFAVVMVPVVMAlILGLIYGLGEVFNIFS+lG+oK- .LLKSLVFAVIMVPVVMAIILGLIYGLGEVFNIFSGlG+KDQ...... 0 2 8 23 +10599 PF10767 DUF2593 Protein of unknown function (DUF2593) Pollington J, Finn RD anon PRODOM Family This family of proteins appear to be restricted to Enterobacteriaceae. Some members in the family are annotated as YbjO however currently there is no known function. 23.50 23.50 149.90 149.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.87 0.71 -4.50 6 522 2009-01-15 18:05:59 2008-06-30 13:44:05 4 1 520 0 34 121 2 143.70 77 88.68 CHANGED hssPs.V.VAulAIIuhRsLslLhLhspLGlcGlu-FIpcSlQsWshTLlFhuSLlLlhlEIhCuFulh+GRNWuRWsaLlsQllssuYLhhASLGahhPElFoIsGES+t-IhHSLlLQKlPDlLlLsLLFlPupSRRFFtlQ .L.NVPALVQVAAlAIIhIRGLDVLMIhNTLGVRGlGEFIHRSVQTWSLTLVFLuSLVLVFIEIWCAFSLVKGRpWARWlYLLTQIsAuuYLWAASLGYGY..PELFSIsGESKREIFHSLhLQKLPDhLILhLLFVPuoSRRFFpLQ..................... 0 1 6 19 +10600 PF10768 FliX Class II flagellar assembly regulator Mistry J, Coggill P anon PRODOM_PD868899 Family The FliX protein is possibly a transient component of the flagellum that is required for the assembly process. FliX may contribute to the targeting or assembly of the P- and L-ring protein monomers at the cell pole. The family carries a potential N-terminal signal sequence and at least one transmembrane domain indicating that it might function either in or in association with the cell membrane [1]. 25.00 25.00 40.10 39.90 21.20 19.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.88 0.71 -3.95 19 70 2009-01-15 18:05:59 2008-06-30 13:46:07 4 1 70 0 38 74 23 137.40 39 97.41 CHANGED M.+l.Gssusosusuutsp..ppuGu.uuFulss...ssssspst..usuustuhuulD..ALLALQuh...-D...shER.R+RuVpRGcshLDsLD-LKhuLLsGplspusls+Ltssstph+suouDPtL-ulLsEIELRscVELAKhup...s .................M.+l.Gssusssssuspss...ctsuu..uuFuh..ss...ssssspsp...usuustusuulD..ALLALQul.....-D....ssER..R+RuVpRGcshLDsLD-LKhuLLuGplssspLtcLtssltph+ssouDPtL-ulLsEIELRscVELAKhstt.. 0 15 24 28 +10601 PF10769 DUF2594 Protein of unknown function (DUF2594) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Enterobacteriaceae. 23.70 23.70 72.10 72.00 23.50 23.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.37 0.72 -3.90 6 530 2009-01-15 18:05:59 2008-06-30 13:52:11 4 1 527 0 36 97 0 73.80 83 99.99 CHANGED MSssDFoTuussppLAsEVoCLKAhLTLlLKAIGQADAG+VIlNMER.IAplEDspQAtVFsNTlpQIKpuYRQ ...MSTPDFSTAENNQELANEVoCLKAMLTLMLQAMGQADAGRVhLKMEKQlAhIEDEsQAAVFSpTVKQIKQAYRQ.. 0 1 8 21 +10603 PF10771 DUF2582 Protein of unknown function (DUF2582) Mistry J, Coggill P anon PRODOM_PD055730 Family This family is conserved in bacteria and archaea. The function is not known. 24.20 24.20 24.20 24.20 23.90 23.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.30 0.72 -4.12 23 279 2009-01-15 18:05:59 2008-06-30 14:03:51 4 3 136 4 47 185 10 65.40 34 76.42 CHANGED Mccph............IGtsAGclWphLs-..tschShspLp...+csuL.scc-lthAlGWLAREsKIthc.....cpstplhlpL ....................M.+pp........lGhsAGcVWphLsp..pt.p.hohppLp...+tspL..scc-lhhAlGWLuREsKI.php.....phpt.hhl............. 0 13 36 44 +10604 PF10772 DUF2597 Protein of unknown function (DUF2597) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 113.10 113.00 24.00 23.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.51 0.71 -4.25 29 118 2009-01-15 18:05:59 2008-06-30 14:05:21 4 1 113 0 20 81 2 133.10 52 88.93 CHANGED psFDhslhshhVHVcssoloITDsSssspTRGVPDGaVDGDVuA-GElELDspsFphlspAA+sAGSaRulcshDhhFYApsGp.-EhKVEsFGsKlhloDlLsIDPcGGscss+KlKa.VTSPDFV+INGVPYL ..................hsFDshlsu.hlHlEphoLsITDNSAsAQTRGVPDGaVDGDVuA-GElElsopsFphLsstA+sAGSWRul.PhDhlFYApsGs.-EhKVEsFGsKlplSslLsIDP.cGGutsT+KlKahVTSPDFV+INGVPYL...... 0 3 11 13 +10606 PF10774 DUF4226 BssS; Domain of unknown function (DUF4226) Pollington J, Finn RD, Bateman A anon PRODOM Domain This family of mycobacterial proteins are uncharacterised. 22.00 22.00 22.20 22.40 21.30 21.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.40 0.71 -4.03 9 168 2009-01-15 18:05:59 2008-06-30 14:13:42 4 4 64 0 19 82 0 109.40 45 37.19 CHANGED cpsGsuA-AlpstcsALAcQpussu-sDtplssAlhsAHssss-Gtc+LstltpEI-sAVsppus.uLDTPAGAR-Fp+FLluKh+-ItpVVssAshpspuKuslhsuLsuhY ..........ptGsustslpsccuuLAppp..ussAEsDcplsssLtsAHsuhs-GtRRLcAIuAEI-sA...Vscpss.huLDTPAGAR-Fp+FLhuKt+-IppVVAsAst-upu+uAllcuLsupY........................ 0 3 8 16 +10607 PF10775 ATP_sub_h ATP synthase complex subunit h Pollington J, Finn RD anon PRODOM Family Subunit h is a component of the yeast mitochondrial F1-F0 ATP synthase. It is essential for the correct assembly and functioning of this enzyme. Subunit h occupies a central place in the peripheral stalk between the F1 sector and the membrane [1]. 25.00 25.00 39.30 38.50 23.90 17.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.12 0.72 -4.25 18 131 2009-01-15 18:05:59 2008-06-30 14:18:51 4 3 129 0 99 122 0 69.20 44 51.92 CHANGED RsFSo..sstpt-llQDLYL+ELKuaKssPlsssDucGpV+pashPssPssPp....s-l..uu-.LcuYc.spsVE ..........RsFhs..ssppsDhVQDLYL+ELKAYKssP..h.....pt..sDA-G.....pVppFshPpsPpuPE....s-L...As-.LKuYEspsVE............... 0 27 55 84 +10608 PF10776 DUF2600 Protein of unknown function (DUF2600) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins. Some members in the family are annotated as YtpB however currently no function is known. 25.00 25.00 148.20 112.40 19.60 19.10 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.03 0.70 -5.60 29 211 2009-09-11 11:05:58 2008-06-30 14:30:12 4 2 203 0 50 164 0 326.10 55 92.33 CHANGED PlVH+ELuhW+c+Ah+IPscEL+pQAlASIccKTFHCEGGuIhALLAs-+p--sIRFIVAYQTISDYLDNLCDRSTSLDPpDFtsLHpSMhtALoscsE..susYYRaR--Q-DGGYLc-LVcTCQcVLpchcpY-tItPhLpELAsYYCDLQlHKHVc.-.ERcPRLpsWF-ta+csLPt.MoWaEFSACuGSTLGIFCLVAYAacs-Lp-c-stKIRpuYFPYlQGLHILLDYFIDQEEDRhGGDLNFCsYYpscpthh-RhpHFlEcA-cplusLPHscFHRLIsRGLLGlYLSDpKVsuQ+ph++hA++llKhGGhsSpFFYhNG+hYRK .........PlVH+ELshW+c+AhpIPssEL+sQAlASIccKTFHCEGGuILA.L.L.us.-c.+ccsI+FIVAYQTISDYLDNLCDRSTSLDPsDFttLHpSMh.ALoPcs-.....suNYY+aRc-QDDGGYLc-LVcTCQcVLpchp+Y-tItPhLcELAsYYCDLQlHKHVchE.EREPRLpsWF-tH+c.slP.-...MsWaEFSAC.uGSTLGIFC.LVAYAa.c.s-Lp-EchtKIRpuYFPYlQGLHILLDYFIDQEEDRhGGDLNFCoYYcscpthl-RhcHFlEcA-cslscLPHucFHRLIsRGLLGlYLSDpKVus.Q+.ph++hAR+llK.h.G.GhsShFFYhNG+hYRK.................. 0 23 39 42 +10609 PF10777 YlaC Inner membrane protein YlaC Pollington J, Finn RD, Eberhardt R anon PRODOM Domain Members of this family include proteins annotated as inner membrane protein YlaC in E. coli and Salmonella. The function of this family is unknown. 20.30 20.30 21.20 21.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.03 0.71 -4.67 8 536 2009-01-15 18:05:59 2008-06-30 16:19:27 4 2 531 0 39 134 3 153.20 75 97.48 CHANGED MsEIpRLLscsI-RlNpcEKRDN+PRFShsFIRcHPhLFlAMasualAsLsVMhhophh.hsSlhhhsVLFllhsAhhhhDlpPpYRaEDIDVLDLRVCYNGEWYsTRtVssphI-cILssPpVssshKsplcKhlssKGElsFYDVaoLAhspps ..............................MTEIQRLLoETI-sLNsREKR.DNKPRFSISFIRKHPGLF.IuMYVAaFATLAVMLQSETL.oGSVWLLVVLFILhNGFFFFDVhP...RYRYEDIDVLDFRVCYNGEWYNTRFVPssLlEsILNSPcVuDsHKpQLQKMIsR.KGELSFYDIFTLARAEuo...... 1 1 7 23 +10610 PF10778 DehI Halocarboxylic acid dehydrogenase DehI Pollington J, Finn RD anon PRODOM Family Haloacid dehalogenases catalyse the removal of halides from organic haloacids. DehI can process both L- and D-substrates. A crucial aspartate residue is predicted to activate a water molecule for nucleophilic attack of the substrate chiral centre resulting in an inversion of the configuration of either L- or D-substrates in contrast to D-only enzymes [1]. 21.50 21.50 21.50 23.20 21.30 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.63 0.71 -4.71 12 73 2009-01-15 18:05:59 2008-06-30 16:26:33 4 4 42 4 8 69 11 122.20 37 74.63 CHANGED WVuhsh+shApaspFlstuWpthcsthtTRhhEcuA-plRtto...lhs..hshss.sttLhutGas.t-lccl+ssl-hFsYGNPKYLllhoAhpE.uhptRshuGts...hsut.ssphPhGhsp.h...hpLl-.ccAspcspplLpDItcshhphtsuSDa ..aVuFshRs.hupaPsFlstAWtsh+PslpTRaAEcuADtlRhpS...lssu...sshss..Ts.+.LhthGas-p-Iccl+ssLDlhNaGNPKhLl...................................................................................................................................... 0 4 7 8 +10611 PF10779 XhlA Haemolysin XhlA Pollington J, Finn RD anon PRODOM Family XhlA is a cell-surface associated haemolysin that lyses the two most prevalent types of insect immune cells (granulocytes and plasmatocytes) as well as rabbit and horse erythrocytes [1]. This family has had DUF1267, Pfam:PF06895, merged into it. 34.70 34.70 35.30 35.10 34.60 34.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.44 0.72 -4.06 47 409 2009-01-15 18:05:59 2008-06-30 16:30:38 4 1 232 0 46 250 1 70.50 30 88.24 CHANGED hpclppclsplcsc.cp.......htpthpshEhtsthp-ps..........ltslscpLpcIppNppWlhR...hllGullsu....llshlhK ...........p-lhpclsplcschcp.......htpclcsLEpp...sstp-pc..........ItslscpLc+IssNspWllR....llluullsu....llshlhK.............. 0 21 36 40 +10612 PF10780 MRP_L53 39S ribosomal protein L53/MRP-L53 Pollington J, Finn RD anon PRODOM Family MRP-L53 is also known as Mrp144. It is part of the 39S ribosome [1]. 24.80 24.80 25.00 25.00 24.00 24.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.43 0.72 -3.99 20 175 2009-01-15 18:05:59 2008-06-30 16:32:23 4 2 165 0 124 165 0 51.80 33 44.15 CHANGED FNPFutsu+sARLFLuhl.....ssos+stuhplpsclLscso..sppPplcVtFKDG ........FsPFstpu.+ssRhFLshl.......sst+sptoslsspllscss...pptPplpVtFtDG.... 0 23 57 95 +10613 PF10781 DSRB Dextransucrase DSRB Pollington J, Finn RD anon PRODOM Family DSRB is a novel dextransucrase which produces a dextran different from the typical dextran, as it contains (1-6) and (1-2) linkages, when this strain is grown in the presence of sucrose [1]. 25.00 25.00 26.50 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -8.96 0.72 -4.48 5 506 2009-01-15 18:05:59 2008-06-30 16:43:54 4 2 504 0 33 111 1 61.60 90 97.22 CHANGED MKVNDRVTVKTDGGPRREGVILAVEEFSEGVMYLVSL-DYPAGIWFFNElDSpDGTFVEhts .....MKVNDRVTVKTDGGPRRPGVVLAVEEFSEGTMYLVSLEDYPLGIWFFNEuGHQDGIFVEKA.E.... 0 1 8 20 +10614 PF10782 DUF2602 Protein of unknown function (DUF2602) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 26.20 26.20 23.60 22.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.18 0.72 -4.13 14 323 2009-01-15 18:05:59 2008-06-30 17:10:47 4 1 304 0 25 89 0 57.20 57 82.95 CHANGED hs++phhtcls-L.spYCpuChlKcHhRKp.GKsaAHpFCIppCTlGcclKphGppLp ............scpphlscIsDLhsTYCppC.lKp+hRKhpGKTtAHpFCIscColGKpIKQlGspLp..... 0 4 10 17 +10615 PF10783 DUF2599 Protein of unknown function (DUF2599) Mistry J, Coggill P anon PRODOM_PD414589 Family This family is conserved in Actinobacteria. The function is not known. 25.00 25.00 25.70 28.70 23.70 24.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.40 0.72 -3.34 12 152 2009-01-15 18:05:59 2008-07-01 12:49:54 4 3 148 0 39 108 0 94.80 43 49.17 CHANGED lD+spWspass......h.oLpVhPTpsGRpsstp.s.....tstAWsEllsLuP-............AsssGMRtQFlCHap..........aAchhtPuKsSWNLEPWRPsVssp-hlApGCNPG ...................l-pspWsphss......h.SLpVhPops.G.R..ss.sp.p.s.......h-sAWsEVluhsP...c............AsosGMRsQFlCHap..........aAc..........ssKsSWNLEPa.RPsVsss-hlAsuCNPG................ 0 6 22 31 +10616 PF10784 Plasmid_stab_B Plasmid stability protein Mistry J, Coggill P anon PRODOM_PD189613 Family This family is conserved in the Enterobacteriales. It is a putative plasmid stability protein in that it is expressed from the operon involved in stability, but its actual function has not yet been characterised. 25.00 25.00 25.90 25.30 23.70 23.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.15 0.72 -4.36 11 396 2009-09-11 13:35:50 2008-07-01 12:52:36 4 2 221 2 9 189 0 68.40 37 59.10 CHANGED R+hohYLpP-t.puDphApshl-olspptRGchhRsAhlsGhALaplDPRLPhLlushhscphossplspll .........R+hohYL+Pst.ps-t.Asthl-ols.ptRuch.RsAhluGhALhphDPR...hshlLushhs--hsssslsph.................. 0 0 2 4 +10617 PF10785 NADH-u_ox-rdase NADH-ubiquinone oxidoreductase complex I, 21 kDa subunit Mistry J, Coggill P anon PRODOM_PD104546 Domain This family is the N-terminal domain of NADH-ubiquinone oxidoreductase 21 kDa subunits from fungi, lower metazoa and plants. 25.00 25.00 30.40 29.20 23.40 21.00 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.82 0.72 -3.74 35 183 2009-01-15 18:05:59 2008-07-01 13:39:47 4 2 163 0 125 165 0 84.40 35 51.07 CHANGED hpocYPlI.....DsDP...ahpRVlsYhRsSDYshhuussuuhPshhahhEphsPstst.h......ssshRhushlGhhGGFhhsYpRSstRFhGapE .......................hpscYPlI.....DscP............pap+Vlu.hRsuDYshhushuususshhahhtphssspht................usuMphuuhlGhhGGFhhsYQpSshRhhGapE............... 0 43 80 110 +10618 PF10786 G6PD_bact Glucose-6-phosphate 1-dehydrogenase (EC 1.1.1.49) Mistry J, Coggill P anon PRODOM_PD110134 Family This family is conserved in Firmicutes and Proteobacteria. Several members are annotated as being glucose-6-phosphate 1-dehydrogenase (EC:1.1.1.49) but this could not be confirmed. 25.00 25.00 25.70 25.40 24.20 23.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.66 0.70 -4.91 9 269 2009-01-15 18:05:59 2008-07-01 13:44:42 4 1 265 0 18 153 0 202.90 45 98.10 CHANGED hLospsh-lFshPhFpFtQlK.KasPE-IspIKA-YKtpWQhWKplp.pVupQLss..sFAcPHIEpWsNGWplRuHFaAhY+hph.pspuAhluVlLN++pLQVhL.appY+uD+pQholppYNphLspl....Dphchuca.lWctsEpEasDahslsph.....pppshshcsc-chaplGK.h.+sc.sh.DhtcFIhcTIccLhPLYE+h+ .......LTtpshsLFshPhFpFtQLK.pasP-cIspIKs-...YKppWppWKtl..pVAttLss...sFAcPHIESWsNGWpl.RuHFaAha+hEppp..spsAhL...ulLLN++pLpV.L.appYKu-cpthslspYNp.hLsph....cshchtsa.lWcts.EpEasDahslpph.....pppphphcsscchFpIGKhh.ps-.phpchtphhspslp-LhPLYpth................................................................................ 0 3 7 12 +10619 PF10787 YfmQ Uncharacterised protein from bacillus cereus group Mistry J, Coggill P anon PRODOM_PD089522 Family This family is conserved in the Bacillus cereus group. Several members are called YfmQ but the function is not known. 20.90 20.90 20.90 36.20 20.80 19.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.78 0.71 -4.85 7 191 2009-01-15 18:05:59 2008-07-01 14:15:02 4 1 116 0 14 119 1 139.80 65 98.44 CHANGED MThWhIlhLVlFuhhKllVoslPouVVE.lluKFElHscLp--ssolohsG+pLEGppKpclIppFNEAlFL-+YYh.PusE.........GTPLlIpTKpGK+-VphalYpYDDHlDVVKQY....KKKllAYpLRScsLQsss.h.hstshh .........MTTWFIVhLhlFGAhKllVSShPsoVlESIISKFEhH.KL-EE.NsoloIcGpNlEGEpK.plIH-FNEALFLD+aYhPPHsE.........GTPllIcsK+GKK-lpF.lYSaEE.HVDVlKQY....KKKVVAYpLRSKsLQsps.hhlotDh.A........ 0 3 8 9 +10620 PF10788 DUF2603 Protein of unknown function (DUF2603) Mistry J, Coggill P anon PRODOM_PD098479 Family This family is conserved in Epsilon-proteobacteria. The function is not known. 25.00 25.00 74.70 74.50 20.60 19.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.74 0.71 -4.41 12 175 2009-01-15 18:05:59 2008-07-01 14:22:06 4 1 173 0 16 72 0 135.20 56 83.07 CHANGED lschupsLGlpccp.pTlhchhp..psNEhhLpLcsGshspsEPWFhlDEpsph+sllohp.lptLlpslKpup+ENFcL+LEKsIhQphPlDFsDVWsVAh-EI++httpst.....sIslcpLlccIK+EHPNLFhshc ......................IDEhSpsLGhcKc-.RsIFKhKpops-NE+sLh..LEsGSFDo.sEPWFlhDENDclHTLlSlpSLpNILEsLKpuQKENFELRLEKAIaQQIPlDFsDVWpVAMDEIKppAQpss.c..lsIDL-KLlpcIK+EHPNLFVDMp. 0 4 13 16 +10621 PF10789 Phage_RpbA Phage RNA polymerase binding, RpbA Finn RD anon PRODOM Family Upon infection the RpbA encode phage protein binds to the ADP-ribosylated core RNA polymerase and modulates function to preferentially bind T4 promoters.\ This is a non-essential protein to the phage life cycle. 21.00 21.00 21.10 22.50 20.80 20.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.43 0.72 -4.20 8 36 2009-01-15 18:05:59 2008-07-01 14:49:10 4 1 35 0 1 35 0 103.20 35 85.48 CHANGED uDIpsp.hpo-us.cspNKIRKAWVLphsDsptcpLQul.Qc..sRFtLYupIDc-Vp-pWIcLMR++ps-uLssGuKhVhsh.GpchLtcpYphDsDEhLIsAAplV....huch ........tDIQsK.hpo-us.pspN+IRKAWVLphs.-sstctlQsh.pp..sRatlYptIDc-VpcpWI-LM+++ps-uLssGAKhlhsp.stchLEcpYphssDEhLl.AuplVhtp.h........................... 1 0 0 1 +10622 PF10790 DUF2604 Protein of Unknown function (DUF2604) Finn RD anon PRODOM Family Family of bacterial proteins with undetermined function. 20.90 20.90 21.20 144.30 20.30 19.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.23 0.72 -3.84 3 5 2009-01-15 18:05:59 2008-07-01 15:11:21 4 1 5 0 3 5 0 76.00 69 56.89 CHANGED VVVNGQPVsVEANVNAPLHsVlAKALEpSGNVGQPsENWELKDEuGsVLDlsKKVEDaGFTNGVKLFLSLKAGVAG VVVNGQPTQVEANPNQPLHVVRAKALENTQNVAQPAENWEFKDEAGsLLDlDKKVGDFGFANsVTLFLSLKAGVAG 0 0 2 3 +10623 PF10791 F1F0-ATPsyn_F Mitochondrial F1-F0 ATP synthase subunit F of fungi Mistry J, Coggill P anon PRODOM_PD068018 Family The membrane bound F1-FO-type H+ ATP synthase of mitochondria catalyses the terminal step in oxidative respiration converting the generation of the electrochemical gradient into ATP for cellular biosynthesis. The general structure and the core subunits of the enzyme are highly conserved in both prokaryotic and eukaryotic organisms. 23.00 23.00 23.00 24.40 22.50 22.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.12 0.72 -3.75 14 131 2009-01-15 18:05:59 2008-07-01 15:39:51 4 1 128 0 99 115 0 92.00 53 70.67 CHANGED Mhh.....+RuLSTLI............P.PKlsosts.luuussAtRhtpVVsFYcpLPpGPAPthpss..uh.lu+YpAKYF..pGcNASG+PllHhlhullhhGYuh-YaFHL+ ............................M.alhRRuLSTL.I......P.PKlA....Ssps.lGuAssA......tRMpcVVsFYc+LP+GsAPt..sKssGh...lGRYpA+YF...G+NASup...........PllHhlhullhlGYoh-YYFHLR............................ 0 26 55 84 +10624 PF10792 DUF2605 Protein of unknown function (DUF2605) Mistry J, Coggill P anon PRODOM_PD070476 Family This family is conserved in Cyanobacteria. The function is not known. 25.00 25.00 46.40 46.30 18.70 18.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.33 0.72 -4.32 17 56 2009-09-11 15:07:39 2008-07-01 15:50:20 4 1 54 0 21 59 22 96.20 46 90.09 CHANGED ps.ststLLcslL-sLLcDFpaWFpRuccLLpsps.shhstc-ppsLhpRlcpupptltus+uLhpAostthuVsMpsMsPWHpLVsEsWtlAsRhppt ..sp.sps-LL+olL-PLL-DFpaWFsRucpLLEs-plsFhospEQpcLLsRV+pAQpEVpss+hLFpATstQsGl-hpshhPWHpLVsECWplutRaRp.h.. 0 2 14 19 +10625 PF10793 Gloverin Gloverin-like protein Pollington J, Finn RD anon PRODOM Family This family of proteins are Gloverin-like. Gloverin is a 13.8kDa inducible antibacterial insect protein which inhibits the synthesis of vital outer membrane proteins leading to a permeable outer membrane. Gloverin contains a large number of glycine residues [1]. 25.00 25.00 35.90 63.40 23.70 16.70 hmmbuild --amino -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.93 0.71 -4.58 5 27 2009-01-15 18:05:59 2008-07-01 15:51:29 4 2 12 0 7 27 0 137.10 61 91.41 CHANGED QVShPPGYAc+YPpYhKaSKpsRHPRD....VTWDKplG.sGKVFGTLGQNDDGLFGKGGYp+-FFNDHRGKLTGQAYGTRVLGPuGDSTNaGGRLDWANKNApAALDVsKQIGGRoGloASGSGVWcLDKNT+LSAGGsLSKsFGHsRPDVGlQApIpHDW ....................t..p.Ys.....S+..RHPRD....lTW-+phG.sGKVFGTLGpsDpGLFGKuGYppphFNDcRG+LpGQAYGoRVLGPsGDSTsaGGRLDW..uNcNApAAlDls+QIGGpoGhoAoGSGVWsLDKNT+LSAGGslSK.pFGHp+PDVGlQAphpH-a...... 0 6 7 7 +10626 PF10794 DUF2606 Protein of unknown function (DUF2606) Finn RD anon PRODOM Family Family of bacterial proteins with unknown function. These proteins have been classified as membrane proteins 25.00 25.00 25.40 99.40 24.80 23.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.49 0.71 -4.72 4 45 2012-10-02 19:08:27 2008-07-01 15:55:09 4 2 45 0 2 34 0 112.50 51 95.78 CHANGED Ma.hIhp.G.lNKYsphlah....LsIlhhsuhhsss-pshpKsl.PVThHVcst-tpPlcshplhlhK..-ps.pPSpEIG..IGKTDccGclhW+ssRKGcYhVhLspsEsp.s....lhpD+cspplI.Isl .............................hhsushuss-pphuKsl.sVThHVcNKEKpPlKsFEIhLMK..DpsPpPS+EIGISIGKTDcEGKlIW+ssRcGcYIVhLPNsETphl....hlNDRctsclIsIs.h 0 1 1 1 +10627 PF10795 DUF2607 Protein of unknown function (DUF2607) Mistry J, Coggill P anon PRODOM_PD067168 Family This family is conserved in Gammaproteobacteria. The function is not known. 20.50 20.50 20.90 20.60 20.40 20.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.51 0.72 -4.17 7 95 2009-01-15 18:05:59 2008-07-01 15:59:13 4 1 95 0 10 53 4 93.30 56 99.24 CHANGED M..athpth.pph+Rpsl...huVsLhlhhshAsltHplDlsPEHHspHHCQLFuuspHGlsp.....u.P.l.sPsappt..pslhtpshphtplh..hhARuPP.hhu ...........................p+hsl..MLSVVLuLWFNVAVIDHQLD.LHPEHHLQHDCQLFASAAH...GLKT......SQWlL...P...SWRQNPP..QARVEQPIQ.RsQVLa.SYFARSPP....AA. 1 1 2 6 +10628 PF10796 Anti-adapt_IraP Sigma-S stabilisation anti-adaptor protein Mistry J, Coggill P anon PRODOM_PD028886 Family This family is conserved in Enterobacteriaceae. It is one of a series of proteins, expressed by these bacteria in response to stress, that help to regulate Sigma-S, the stationary phase sigma factor of Escherichia coli and Salmonella. IraP is essential for Sigma-S stabilisation in some but not all starvation conditions [1]. 21.80 21.80 22.10 22.60 20.80 21.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.87 0.72 -3.60 8 516 2009-01-15 18:05:59 2008-07-02 10:59:42 4 2 476 0 39 130 0 84.70 75 97.83 CHANGED MKNLIucLLsKLAcKEtEuKpLsAQVEALEllloAlLpsh.csss.pcLIcsVEpAlssApssssss.+.DoElLpp.lp+LLphsps ......MKNLIAELLhKLAQKEEESKELsAQVEALEIIVTAMLRNM...AQN-QQcLI-QVEGALhcVKPD..A..SlPDc.DTELLRsYVKKLL+HPRp........................... 0 1 4 25 +10629 PF10797 YhfT Protein of unknown function Mistry J, Coggill P anon PRODOM_PD140382 Family This family is conserved in Firmicutes and Proteobacteria. The function is not known but several members are annotated as being homologues of E coli YhfT, a protein thought to be involved in fatty acid oxidation. 25.00 25.00 79.40 79.30 20.20 19.60 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.30 0.70 -5.63 13 324 2009-01-15 18:05:59 2008-07-03 11:56:38 4 2 316 0 28 135 1 424.10 79 98.09 CHANGED lcIlLlAllGuhuAlLANpulAVFpDGlRPIlPEhlEGpMsR+ELuusuFALShGhVhGFGIPhoLusuIllsH.lhLsTDIIGlhssssh....lAullGulaGlhllhuLpsVlslFstLPVNFlsuLuplGsPVlsAFAlFPAlAluYQFGhKpGllshllshluRllls...+at.h.hu.s.....lsLsPEGIulllGMlhLlhaAh+c+hs-css.........ushsolFs-RlpRIpKNhhhLulsGAL..lAAsuuhtIlAGsslSltLLAcu............tlspAAlAshsRulGFlPLIATTAlATGVYGssGhTFVFssG.hluPNPhlAuILGAllIhlEVhLLssIu+aL-+aPulRsuu-NIRTAMspllElALLIGGshAupphuPshGFhllsulYlLNEshGRPll+hAsGPVAAIlsGIlhNlLhllGLF .....IQIIVVACLTGMTSLLAHRSAAVFHDGIRPILPQLIEGYMNRREAGSIAFGLSIGFVASVGISFTLKTGLLNAWLLFLPTDILGVLAINSL...............MAFGLGAIWGVLILTCLLPVNQLLTALPVDVLGSLGELSSPVVSAFALFPLVAIFYQFGWKQSLlAAVVVLMTRVVVV...RY............FPHLNPESIEIFIGMVMLLGIAITHDLRHRDEN.................DIDASGLSVFEERTSRIIKNLPYIAIVGAL..IA..AVASMKIFAGSEVSI.FTLEKAYSAGVTPEQSQTLINQAALAEFMRGLGFVPLIATTALATGVYAVAGFTFVYAVG.YLSPNPMV.....AAVLGAVVISAEVLLLRSIGKWLGRYPSVRNASDNIRNAMNMLMEVALLVGSIFAAIKMAG......YT......G.....FSIAVAIYFLNESLGRPVQKMAAPVVAVMITGILLNVLYWLGLF........... 0 8 13 22 +10630 PF10798 YmgB Biofilm development protein YmgB/AriR Pollington J, Finn RD anon PRODOM Family YmgB is part of the three gene cluster ymgABC which has a role in biofilm development and stability. YmgB represses biofilm formation in rich medium containing glucose, decreases cellular motility and also protects the cell from acid which indicates that YmgB has an important function in acid-resistance [1]. YmgB binds as a dimer to genes which are important for biofilm formation via a ligand. Due to its important function in acid resistance it is also known as AriR (regulator of acid resistance influenced by indole) [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.79 0.72 -4.16 6 700 2009-09-11 05:26:58 2008-07-28 14:30:18 3 1 416 2 58 197 0 55.90 43 68.79 CHANGED tpEuulluslVpplLtossaVoNKsIIhpLI+pLETpsDlVptDlhRpsLElVVtpTsDD..I .................EptsLGphVspLhpuGcslsNKsIIhpLIppLEoppDhhphDlhRpsLEhVl..TsDDh.................... 0 2 6 42 +10631 PF10799 YliH Biofilm formation protein (YliH/bssR) Pollington J, Finn RD anon PRODOM Family YliH is induced in biofilms and is involved in repression of motility in the biofilms [1]. YliH is also known as bssR (regulator of biofilm through signal secreton). 20.40 20.40 20.50 119.10 19.50 18.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.59 0.71 -4.44 4 440 2009-01-15 18:05:59 2008-07-28 14:32:18 3 1 439 0 14 59 1 124.30 86 99.55 CHANGED MsVDRL+pDLLNKLINARIDLAAYLQLRKAKGYMSVSESDHLRDNhFELsREh+s+A.RLp.HlDtEEhssLR+At-ALAsAAVCLMSGHHDCPTaIAVNADKLENCLToLTLsIpsLpcHuPLpps ...MhVDR.RhDLLN+LIsARlDLAAYlQLRKAKGYMSVSESNHLRDNFFKLNRELHDKSLRLNLHLDQEEWSALHHAEEALATAAVCLMSGHHDCPTVITVNADKLENCLMSLTLSIQSLQKHAMLEKA..... 1 1 1 7 +10632 PF10800 DUF2528 Protein of unknown function (DUF2528) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. Some of the sequences are annotated as ea10 however the function of this protein is unknown. 22.00 22.00 22.10 31.80 20.00 21.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.29 0.72 -3.63 4 191 2009-01-15 18:05:59 2008-07-28 14:32:53 3 1 157 0 4 72 0 105.50 89 89.24 CHANGED uacIlV-lDHslLTEEKLsELspFWS-u-hclE+HGs.LpAhLphhAs+hhuhslpp......lSscssaN..ust.EGaPshDGSpGlRls-hDph..F-uDDhcVhtl .....KASIEIE.IDHDVMTEEKLHQINNFWSDSEYRLN.KHGSV.LN....AVLIMLAQHALL.IAIS..SDLN.....AYGVVCEFDWND.GNGQEGWPPMD....GSEGIRITDIDTSGIFDSDDMTIKA.A....... 1 0 1 2 +10633 PF10801 DUF2537 Protein of unknown function (DUF2537) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.30 20.30 21.10 31.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.04 0.72 -3.88 8 113 2009-01-15 18:05:59 2008-07-28 15:02:40 3 1 112 0 28 72 1 83.60 58 76.69 CHANGED TPWATGLTVAuFVAAVluVAlVVLolGLhRVHPLLAVGLNlVAVGGLAPTlWGWR+TPVhRWFVLGAAVGVsuAWlALLllAh.G ...sPWATGLsVAuFVAA...VlAVAVVVLSlGLl.R.VHP......LLA......VuLNl.VAVuGLAPTLWuWRRsPVLRWFVLGAAVGVAuuW...lALLsls............. 2 7 20 25 +10634 PF10802 DUF2540 Protein of unknown function (DUF2540) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Methanococcus. 25.00 25.00 82.90 82.80 23.90 19.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.21 0.72 -4.31 12 26 2009-01-15 18:05:59 2008-07-28 15:06:32 3 1 13 1 9 21 0 75.30 39 84.61 CHANGED phtLhcplDs+slRYaLHKL-sl.splss-lLtcuhcscKpa+poloLo-pEccIlcKYG.KuTNhLlNahIlpppp .pFtLh+slDsRslRYhLHKlEsl.cpIss-l..Lt+AhcscKpa++olTLo-cEccIlcKaG.KuTNlLlNhhIlppc.... 0 1 3 6 +10635 PF10803 GerPB DUF2539; Spore germination GerPB Pollington J, Finn RD, Eberhardt R anon PRODOM Family Members of this family are required for formation of functionally normal spores. They may be involved in the establishment of spore coat structure or permeability [1]. 25.50 25.50 25.60 50.80 25.10 25.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.56 0.72 -4.21 7 135 2009-01-15 18:05:59 2008-07-28 15:07:18 3 1 134 0 19 64 0 52.40 69 75.89 CHANGED MNFYlpQoIpINhlRltuloNSSVhQIGSAGSIKsLSpLYNTGuasEPAP.ssus ...MNFYlNQSIhINpl+I-SITsSSVFQIGTAGSIKuLSKFSNTGGFTEPhRPLpAK... 0 4 11 13 +10636 PF10804 DUF2538 Protein of unknown function (DUF2538) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 20.50 20.50 23.80 23.50 20.20 20.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.02 0.71 -4.34 3 217 2009-01-15 18:05:59 2008-07-28 15:07:59 3 1 215 2 10 43 0 153.40 91 99.52 CHANGED MSRKTYEKlANINGMFNVLEQQIIHSKDMALFRSEFFYVNHEHRENYEALLIYYK-SslNPIVDGACYILALPEIFNKVDVFESELPFSWVYDENGITETMKSISVPLQYLIAAALEVTDVNLFKPSGFTMGMNNWNIAQMRIFWQYTAIVRKEAL ....MSRKTYEKIANINGMFNMLEQQIIHSQDMAHFRSEFFYVNHEHRENYEALLIYYKNSIDNPIVDGACYILALPEIFNSVDVFESELPFSWVYDENGITETMKSLSIPLQYLVAAALEVTDVNIFKPSG...FTMGMNNWNIAQMRIFWQYTAIIRKEAL.............................................. 1 3 4 10 +10637 PF10805 DUF2730 Protein of unknown function (DUF2730) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.24 0.72 -4.30 11 133 2009-09-10 19:13:11 2008-07-28 15:08:36 3 1 124 0 31 115 5 98.70 21 91.33 CHANGED hh.hlpspWsl.lhulhshssshhhhhhsppYA++c-ltpL.......-pRLsphEs+lcsLPTpp-VpcLclplscl+G-hKshssslpsloHQscLLLEpcL..pcc ............................................................t.ash.lhshh...hsshhhhhh.pp.p.a.sp.ccchpp...............L..............................cpRlsp.lEsclpslPspp-lpcLplplsplcG-l+slsspl....psl....s+..pLLlEptl.....pt.................................. 0 10 22 26 +10638 PF10806 DUF2731 Protein of unknown function (DUF2731) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 21.40 21.40 21.40 22.30 21.20 20.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.71 0.71 -3.87 11 76 2009-01-15 18:05:59 2008-07-28 15:10:58 3 4 74 0 52 74 0 94.50 29 29.59 CHANGED hpVPsPlKplFDsFPLpsY..tslsppcsus.pslpp+pahF..............sssssp.sssssFpLGVaNVhphptst.....shLsoDPhuLhspLhLC+KNsLtLPo.........tsss...................spsssslhhLShhAusDppLPILlE ..hsVPsPlK+lFDpFPLhTY..ss.sttspshttp..hpp.+hasF..............ts.t.t.t..................................................................................................................................................................... 0 7 22 41 +10639 PF10807 DUF2541 Protein of unknown function (DUF2541) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. All proteins are annotated as YaaI precursor however currently no function is known. 21.90 21.90 24.00 23.70 21.70 21.10 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -4.39 2 436 2009-01-15 18:05:59 2008-07-28 15:11:09 3 1 433 0 6 84 0 132.60 88 99.68 CHANGED M+SlhplSVGLlhGluh.osAtANDHKILGVIAMPRNETNDLsLplPVCRlVKRIQLoAD+GDlpLSGAoVYFKsuRuASpoLNVPuuIKEGpTTGWININSDNDsKRCVpKIsFSGHTVpSSDMApLKlIGDD .............MKSVhTISASLA...I.S.LML.CCTAQANDHKILGVIAMPRNETNDLALKLPVCRIVKR...IQLSADHGDLQLSGASVYFKAARSASQSLNIPSEIKEGQTTDWININSDNDNKRCVSKITFSGHTVNSS.DMATLKIIGDD........ 0 1 2 4 +10640 PF10808 DUF2542 Protein of unknown function (DUF2542) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. The family has a highly conserved sequence. 25.00 25.00 57.30 57.20 22.20 22.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.75 0.72 -3.71 3 411 2009-01-15 18:05:59 2008-07-28 15:46:52 3 1 410 0 6 41 0 78.50 81 99.60 CHANGED MDVQTIFVVlAFLLlPLFCFREAWKGWRoGAVDKlVKNAREPVYVYRAcsPlLYWSYlsLYlGhGlLolGMIIYLLFYR MDVQQFFVVAVFFLIPIFCFREAWKGWRAGAIDKRVKNAPEPVYVWRAKNPGLFFAYMVAYIGFGILSIGMIVYLIFYR. 0 1 1 4 +10641 PF10809 DUF2732 Protein of unknown function (DUF2732) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 25.40 25.30 24.80 24.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.57 0.72 -4.43 8 350 2009-01-15 18:05:59 2008-07-28 15:49:26 3 1 279 0 29 161 1 73.20 45 96.25 CHANGED M+NschtoTpous--shLspLLs-ARhEERKsRAhAlShRL-ALAlHIsp+phousEAAELLR+EAs+aEsESQ.ElH ......................................t....hslLLspARhEERpspApthuuRLDuLAsHITpcpLs+VEhsELLRppAEphpNput.-........ 0 0 7 21 +10642 PF10810 DUF2545 Protein of unknown function (DUF2545) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function is restricted to Enterobacteriaceae. The sequence is highly conserved. 25.00 25.00 68.60 68.50 19.10 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.95 0.72 -3.58 2 416 2009-01-15 18:05:59 2008-07-28 15:50:10 3 1 412 0 5 40 0 79.40 86 99.91 CHANGED MIYLWhFLAlsIlsVSGYIGQVhshhSAlSSFhGMVILAALIYhhshWLpsGs-lVoGlhhFLAPACGLhIRFMVGYG+R ....MIYLWMFLALCIVCVSGYIGQVLNsVSAVSSFFGMVILAALIYYFTMWLTGGNELVTGIFMFLAPACGLMIRFMVGYGRR.. 0 1 1 3 +10643 PF10811 DUF2532 Protein of unknown function (DUF2532) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 280.50 280.30 21.30 20.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.04 0.71 -4.42 11 41 2009-01-15 18:05:59 2008-07-28 15:51:57 3 1 41 0 6 23 0 158.00 85 95.55 CHANGED KLIhCFLILVSAVKVNADFNsIQDNFEYQE...EQLsIELPWSDCTEIHKLLEEKLSFSEQQIKKENKI+EKYKQFYLKHNNP.oNFSMQFLEKKSETNGVETLISGFLKFCEDNFQTSKSKSNSLNYaIKKQQDQWaNsIRNENYKIYYRKKY-DNIhRNN .KLITCFLILVSAVKVNADFNNIQDNFEYQE...EQLhIELPWSDCTEIHKLLEEKLSFSEQQIKKENKI+EKYKQFYLKHNNP.TNFSMQFLEKKSETNGVETLISGFLKFCEDNFQTSKSKSNSLNYYIKKQQDQWFNDIRNENYKIYYRKKYEDNIFRNN.. 0 1 1 1 +10644 PF10812 DUF2561 Protein of unknown function (DUF2561) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacterium spp. 20.50 20.50 20.60 21.10 20.30 20.30 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.42 0.70 -4.72 7 80 2009-01-15 18:05:59 2008-07-28 15:52:41 3 2 78 0 14 39 1 203.10 60 94.39 CHANGED Mss.p.SAhRc..GsD.shu.-slDRILlGACAAlWLshlGsuVAAsVALhDLGRGapphuusscTsWVLYuVIsVSALlIsuAlPlLLRARRhAcsEPsspshshscpsut..shtsutssspstpcpss....ssuht.ssth.............sstAVDRlWLRsTlslsushGsAhlAVusATYLMAVG+DsASWVuYGLAGlVTsuMPsl.WhalRpLRt ..............MVS.RYSAYRR..GPD.sISPDVIDRILlGACAAVWLVFsGVSVAAAVALhDLGRGFHEhAGsPH.TTWVLYAVIVVSALVIVGAIPVLLRARR..MAEAE..P...As.RP....oGA....u.s..RGGc......oluSGpPA...tRAsA..E.SAP...VpHAcAh..csAAEW...............................SSEAVDRIWLRGTVVLToAIGIALIAVAAATYLMAVGHDGsSWluYGLAGVVTAGMPVIEWLYsRQLRR.................................................. 2 1 8 12 +10645 PF10813 DUF2733 Protein of unknown function (DUF2733) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 21.20 21.20 22.60 25.70 20.70 19.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.92 0.72 -7.29 0.72 -4.39 12 28 2009-01-15 18:05:59 2008-07-28 15:54:04 3 1 27 0 0 22 0 32.40 43 45.92 CHANGED MGs.lhSlC+RRhsPlhDVcGp.IsltcDFE.h ..........MGh.lhSlC+RppNslhDVcGp.IslscDFE.h.. 0 0 0 0 +10646 PF10814 DUF2562 Protein of unknown function (DUF2562) Pollington J, Finn RD anon PRODOM Family This protein of unknown function appears to be restricted to Mycobacterium spp. 25.00 25.00 26.90 25.80 24.40 23.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.75 0.71 -4.21 7 75 2009-01-15 18:05:59 2008-07-28 15:54:39 3 2 75 0 13 36 0 131.90 70 92.93 CHANGED LTPRpRLoRGLpYosVGPVDVTRGl....lGLGlcSApSTAutLRRRYppG+LAR....ELAAApEsls.ElAAAQEVVAsLPpslQcA.....Rpt+RR.+RPhllAG.VAVsVLAGGAVsFSIVRRSsp...PEPSPhPPSVEVpP+P .......LTPRERLTRGLuYSAVGPVDVTRGL....VGLGLQSARS...TAAuLRRRYREGR....LAR....EVAAA.QETLAQELsAAQDVVANLPQALQDA........RTpRRs+++hWIFAG.IA........A.Al.LAGGAVAFSIVRR.SSR...PEP.SPRPPSVEVQPRP............ 0 2 7 11 +10647 PF10815 ComZ ComZ Pollington J, Finn RD anon PRODOM Family ComZ is part of a two gene operon. It affects competence regulation by negatively affecting the transcription of the ComG operon. ComZ contains a leucine zipper motif [1]. 21.00 21.00 21.10 21.60 20.90 20.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.67 0.72 -4.19 7 116 2009-01-15 18:05:59 2008-07-28 16:00:15 3 1 115 0 14 41 0 55.40 79 83.53 CHANGED EKoMcFhQIAMKalPEAKt.L-csGI-LohEhlQPhhsLhhpVMsEAYELG+sDAp ......EKSMQFLQIAMKHLPE.AKAILDDNGIALDMEKAQPVLELLMKVMNEAYELGKADpE.... 0 3 8 10 +10648 PF10816 DUF2760 Domain of unknown function (DUF2760) Gunasekaran P, Mistry J anon Pfam-B_001564 (release 23.0) Domain This is a bacterial family of uncharacterised proteins. 25.00 25.00 116.20 116.10 19.50 18.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.49 0.71 -4.62 29 227 2009-09-11 06:04:09 2008-07-28 16:01:46 3 1 225 0 68 201 15 125.90 59 60.21 CHANGED psossuALQLLuLLQ+EARhIDFlpEDluuaoDA-lGAAARllHpGC+KlLp-aFolpPVRsE.....sEGoRlolstGFDusplRLTGNVsGpuPFsGsLhH+GW+sscV+LPKlusspDs...s.llAPAEVE .......sAosDuALQLLuLLQ+EARLIDFlpEDlusaSDtElGAAARVlHsGspKVLcEahTLsPlRsE.....pEtoRloltsGFssppIRLTGNVsGpAPFsGTLlH+GW+ssslpLPKLu-saD.s....o..llAPAEVE. 0 23 34 55 +10649 PF10817 DUF2563 Protein of unknown function (DUF2563) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacterium. 21.20 21.20 21.50 22.50 19.50 21.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.33 0.72 -3.59 3 55 2012-10-01 21:44:22 2008-07-28 16:02:45 3 1 51 0 6 23 0 102.30 73 98.01 CHANGED MFVDT-LLHSGGNESHRAGGHA+-GADQLAtGPLhSGMFGDFAAADAFHsAVsuAHAQHVRNLQAHpEALTuVGoKAHHAAsGFTsMDspNAsEL+AVRsSuuT .......MFVDVGLLHSGANESHYAGEHAHGGADQLSRGPLLSGMFGT.FPVAQTFHDAVGAAHAQQMRNLHAH.RQALITVGEKARHAATGFTDMDDGNAAELKAVVCSCAT................................. 0 1 3 5 +10650 PF10818 DUF2547 Protein of unknown function (DUF2547) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 23.10 23.10 23.80 46.00 22.90 23.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.36 0.72 -3.36 8 80 2009-01-15 18:05:59 2008-07-28 16:03:26 3 1 79 0 11 55 0 95.20 43 92.21 CHANGED KpsFWSQLLhShIAIFALPpsQshphtp....sNcs.QsollpQplsp.ssplspclpQQshaltph.h...t.hpIpPp......Fhssshpap....sPIRAGPhs ....KssFWSpLLLullAIFALPsuQuhp.ps.....ssEN.psol..Q..QhLp.slplsc-spp.Qs.h..sphshpsct.hphpPH......Fhscshshs....APIRAGPl.h 0 1 4 10 +10651 PF10819 DUF2564 Protein of unknown function (DUF2564) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 59.20 58.90 21.40 21.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.61 0.72 -3.86 7 123 2009-01-15 18:05:59 2008-07-28 16:06:46 3 1 123 0 17 47 0 78.60 67 93.60 CHANGED GasDh+QlEhAVETAQKhsGtAT+uhssshlcsAhQAlEsAR.Qhspupphts.lDp.sFltpppplLscspHQLcEucc .tVNDFEEVKFRVETAQKMVGSATISMDPDTLEHATTAVEAARSQLEIMKSVATDLDE.PFLMNEEKKLs+CEHQLsEA+H..... 0 3 9 11 +10652 PF10820 DUF2543 Protein of unknown function (DUF2543) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Enterobacteriaceae. The family has a highly conserved sequence. 25.00 25.00 103.90 103.70 21.10 20.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.60 0.72 -3.97 5 335 2009-01-15 18:05:59 2008-07-28 16:07:32 3 1 334 0 14 60 1 80.80 93 99.94 CHANGED MoNDIPLKYYDIADEYuTEoApPVSDuER-sLAHYFQLLITRLMNNEEISEEAQ+EMAsEAGIsEsRIDEIAsFLNQWGNE MNHDIPLKYFDIADEYATECAEPVA-AERTPLAHYFQLLLTRLMNNEEISEEAQHEMAAEAGINPVRIDEIAEFLNQWGNE....... 0 1 2 8 +10653 PF10821 DUF2567 Protein of unknown function (DUF2567) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.16 0.71 -4.73 19 131 2009-09-11 10:54:39 2008-07-28 16:08:36 3 1 131 0 34 108 0 156.80 42 71.54 CHANGED GLuusGsLlGuLWAWlAPPIHuVVAlTRuG-RV+tYLGuESppFFsAshhhlGLLSVLAVVAuVhhWQ.WRpHRGPhhVAuLuhGhssAAulAAGVGAhlV+LRYGulDhsssPlop.-+slsYVspAPPVFFu+pPLQIAhTLhaPAulAuLVYAlhAAuTuRDDLGG .........................GlussGlllGu.LWAalAPPl+u.VVs..hTRsGpplhtaL.G.uES.ppaFh.As.hhhlGLhsVluVVAuslh.Wp.hR.c+RGP.hVuuLulGhssAAu.lAA.uVGuhls+hRYus.....lDhsssPlst..sttls..hVspA..PPlaa...uctslph....Ahs.Lhhs.sslA.uLVYulhAuusuRDDLGs................................................. 0 7 23 30 +10655 PF10823 DUF2568 Protein of unknown function (DUF2568) Pollington J, Finn RD anon PRODOM Family One member in this family is annotated as yrdB which is part of a four gene operon however currently no function is known. 21.10 21.10 25.00 24.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.20 0.72 -3.76 12 114 2009-09-14 14:03:47 2008-07-28 16:14:56 3 1 105 0 44 113 0 90.70 29 80.50 CHANGED lhlRFlLELsslsuluhaGaphs.shhh+hslulshP...llhhllWuhFtuPpusp+.......lpGhsRhhlElllFuhushAhhhssp.hhulsaAslhll ............sltFlLELssLssluh....aGaphs...ts......hh....h+hslulhhP...llssllWGhFsAP+.uthp.......l.shh+lhlElhlFus.ushu.lhhhsphhhuhhhuhlhh................... 0 13 33 38 +10656 PF10824 DUF2580 Protein of unknown function (DUF2580) Pollington J, Finn RD anon PRODOM Domain This family of proteins with unknown function appears to be mainly found in actinobacteria. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.06 0.72 -3.66 56 605 2012-10-01 21:44:22 2008-07-28 16:17:35 3 2 159 0 185 723 2 97.70 19 80.66 CHANGED Mo....spLpVpPstL+phAupHspsAsplss...ussus..uhssplttoHGslsuphpsshpthhssRpsshsphtssssclApsLcsAAuhYppsDcssupslcs ................................lpVssstLcshA...up...hsphAsplss.......sssss........st.ss..t.s.ssshG.......h..s..s..t..hs.ss.hp.t...hhst.t....p...s...shsph....s...sshsshApsLpsuAstYppsDpssuttlt.t........................ 0 51 128 167 +10657 PF10825 DUF2752 Protein of unknown function (DUF2752) Gunasekaran P, Mistry J anon Pfam-B_001601 (release 23.0) Family This family is conserved in bacteria. Many members are annotated as being putative membrane proteins. 20.90 20.90 20.90 20.90 20.80 19.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.45 0.72 -3.94 79 584 2009-01-15 18:05:59 2008-07-28 16:18:56 3 1 475 0 185 535 65 51.50 32 38.74 CHANGED sCPh+tlTGh..CPGCGhpRuhhsLl+G-lsuAhphNshhhsshshlhhhhhh ................sC.h+tlT.Gh..CPGCGspRuhhsL.lHG-lsuAhphNshhlhs.h.shhshhhh.h.............................. 0 71 142 172 +10658 PF10826 DUF2551 Protein of unknown function (DUF2551) Pollington J, Finn RD anon PRODOM Family This Archaeal family of proteins has no known function. 21.70 21.70 22.10 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.80 0.72 -4.08 12 39 2009-01-15 18:05:59 2008-07-28 16:20:34 3 1 26 0 36 39 0 81.70 41 78.54 CHANGED LcpYLpRDpsGlR+slLplFLcscphThs-la-tLpcc.FsVSh+uVuuMVGllsS+LGILps..sshGspslYpLKEcYtslV ...LpcYLpRDpsGlR+slLplFLcscphTss-lach.Lpcc.FslSh+uVuuMVGlhsS+LGILps..pshsspshYpLKEcYtslV........... 0 7 28 32 +10659 PF10827 DUF2552 Protein of unknown function (DUF2552) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.70 21.70 21.80 117.40 20.80 19.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.79 0.72 -4.05 5 119 2009-01-15 18:05:59 2008-07-28 16:21:31 3 1 119 0 13 32 0 78.80 78 98.21 CHANGED M-QKLKsL+NTAQNKTWVSFLNpNHPYTLLHWSIGGs-SlKKDVWLLQDEMTFETcEFPTLEpAIsWIuENM-pITDVL .MDKQL+TLRNIANERTWASFLNDNHPYSLLHWSIAGVGQEuKDVWLLQDEVTFQTTEFPTLD-AhpWISENMEQVTDVL 0 1 5 7 +10660 PF10828 DUF2570 Protein of unknown function (DUF2570) Pollington J, Finn RD anon PRODOM Family This is a family of proteins with unknown function. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.48 0.72 -4.46 7 240 2009-01-15 18:05:59 2008-07-28 16:22:39 3 2 193 0 18 152 10 105.20 32 83.99 CHANGED h.palhu...slshl..ILGLsuWhWhQSppIsoL+AENpsQAQTIppQpcANppLs.tLpQERQAV.tQQchsNElcptsppstEplKoIltppsCA+scLPpuVlD...RLHp .............................hh..ha.hlul.uhl...llu...hhGh....h.phS..as.lu+.pAc....scsQspTlcspscs...h...shl.......ss.....ulQ........ph......pplltp.ptpsQ.Qhp.p.-....u-.tppE....pl+ssIucDc.CA+s.LPsu..........+ht.................................. 1 1 4 12 +10661 PF10829 DUF2554 Protein of unknown function (DUF2554) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 76.00 75.90 18.00 17.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.73 0.72 -3.88 3 416 2009-01-15 18:05:59 2008-07-28 16:23:53 3 1 410 0 11 60 0 67.60 84 99.76 CHANGED MlTKsLSVVLLTCALFSGQLMAGHpGHEFVWVKNVDHQLRHEADSDELRAVAEESAEGLREHHNWQKSRKPEoaFR .MhpKslSslLLsCALFSGQLhAtppGH-FVWVKNVDHQLRHEADSDELRAVAEESAEGLREHFYWQKSRKPEAGQR... 0 1 1 5 +10662 PF10830 DUF2553 Protein of unknown function (DUF2553) Pollington J, Finn RD anon PRODOM Family This family of bacterial proteins has no known function. 20.70 20.70 21.00 54.20 20.10 20.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.70 0.72 -3.46 8 101 2009-01-15 18:05:59 2008-07-28 16:24:25 3 1 101 0 15 44 0 73.00 59 92.91 CHANGED pp+lDIT-+VTGRhcsGpLsLYc-NEhIGc..Msuts.QYELKsGYoacspKFY+hsDsssps-tKYVD.CD.EsGWC .o.hKIDITN+VluKF.+.sshLELYpsc.MIGKFYVYTEsKQYVLEDGYlYEsGKFYRIID.THR.GNspsAEuCD..LGWC... 1 2 9 10 +10663 PF10831 DUF2556 Protein of unknown function (DUF2556) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 71.90 71.60 22.90 22.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.85 0.72 -4.33 3 414 2009-01-15 18:05:59 2008-07-28 16:24:39 3 1 413 0 10 45 1 45.60 92 88.54 CHANGED MIRKYWWLVVFAVSVFLFDALLMQWIELLoTETDKCRNMNSVNPLKLVNCoDL MIRKYWWLVVFAV.VFLFDsLLMQWIELLATETDKCRNMNSVNPLKLVNCDEL 0 1 1 6 +10664 PF10832 DUF2559 Protein of unknown function (DUF2559) Pollington J, Finn RD anon PRODOM Family This family of proteins appear to be restricted to Enterobacteriaceae. The sequences are annotated as yhfG however currently no function is known. 20.50 20.50 20.60 20.50 19.30 18.90 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.81 0.72 -4.17 3 338 2009-01-15 18:05:59 2008-07-28 16:27:31 3 2 335 0 17 58 0 53.80 83 95.45 CHANGED KKLTDKQKSRLWEtQRNpNFQASRRLEGVEVPLVTLTAEEALARLEELRRHYER ...KKLTDKQKSRLWE.pRNtNFQASRRLEGVEhPLVTLTAsEALARL-ELRRHYER...................... 0 1 2 10 +10665 PF10833 DUF2572 Protein of unknown function (DUF2572) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.30 20.30 21.20 20.70 20.00 19.80 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.48 0.70 -4.93 5 87 2009-01-15 18:05:59 2008-07-28 16:28:22 3 2 84 0 12 71 0 200.60 36 82.57 CHANGED KGllTLTILhLLSulLlIhMLFDDDtL+aHpSlhuQRKlYVpQsLpLQ+hopEQKpslCpcl....PLNoopssppIoFcptutsDu...puaFlWCcRpoLFKppPKKuhNpGuLSpaINcEpluLFpH+FpusPt.Lsss+usYLYWlD-sQsElpIsGsIsAIlIAEGDLKIsGKGRI+GsVITGGsLoLE.uVpluY+KtTVssLVppYSpWQLAEKSWaDFss ...................................................................................................+GhhTLslLlhlSulLslhhLhcDshLphaputtsQRphYVppplpL.chotpccpptC.pl....shspstpshplsh.t..Lp..t...s-u...lpaalWCcR.sLF.p.p.pPp+st.p...s..tlppFls.t..p..tl...shFp..phu..psP.t....sspp.PplYWhss.sps-hplsts..lpul..lIAEGDLclpGpG+IpGslITsGpLoL-..s..l.plsYuKpsVstllppYSpWplAEpSW.DF......... 0 1 5 11 +10666 PF10834 DUF2560 Protein of unknown function (DUF2560) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 42.00 48.60 23.00 20.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.60 0.72 -4.02 2 125 2009-01-15 18:05:59 2008-07-28 16:28:58 3 1 115 0 8 42 0 78.70 72 95.74 CHANGED MAEIhshTE.QphpL-Ih+LV.spsAAAEcAhtFlusscLphELFK.ph..utupushhsRs.EAlRcuKEALDLFTsGA ...MAEIIPMTEEQKFQLEIYKLVMNQNAAAEEAFQFIGTDELKLELFKIHFQSG.GANSDITTRTIEAVRKSKEALDLFTTGA...... 0 0 0 0 +10667 PF10835 DUF2573 Protein of unknown function (DUF2573) Pollington J, Finn RD anon PRODOM Family Some members in this bacterial family of proteins are annotated as YusU however no function is currently known. This family of proteins appears to be restricted to Bacillus spp. 22.80 22.80 23.00 80.70 22.40 22.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.75 0.72 -3.94 7 119 2009-01-15 18:05:59 2008-07-28 16:29:26 3 1 119 0 11 45 0 80.30 77 97.34 CHANGED tpKhcEQlDGLlEKYTELLLGEos-EhKEcVKtWllYSHIAKSMPPLAKHWNusYP-AK-tlKclIpcIKcLNEt+R..psK ....S.EKFNEQFDGLLEKYTELLLGESNEERKEQVQKWALYSYIAKTMPALVKHWNET..YPDAKEEMVQLITcIK+LNEEKRNE...Q.. 0 4 7 8 +10668 PF10836 DUF2574 Protein of unknown function (DUF2574) Pollington J, Finn RD anon PRODOM Family This family of proteins appears to be restricted to Enterobacteriaceae. Members of the family are annotated as yehE however currently no function is known. 25.20 25.20 26.30 48.90 23.80 25.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.03 0.72 -4.28 4 385 2009-01-15 18:05:59 2008-07-28 16:30:48 3 1 383 0 7 60 0 75.30 65 99.38 CHANGED MpKYhL.GIIhLAYGluSPsFuSDTATLTIsG+losPTCSh-VVNsQLQQRCGphhahssspppASoPs+GVTTEVlslsusSpRpIVLNRYD ........................................hthVNuQsQQ+CGQLhasVDTpa.sSSPsKGVTTEVVsssuDSKR+IVLNRYD..... 0 1 1 3 +10669 PF10837 DUF2575 Protein of unknown function (DUF2575) Pollington J, Finn RD anon PRODOM Family This family of proteins appears to be restricted to Enterobacteriaceae. Members in the family are annotated as yaaY but currently there is no known function. 25.00 25.00 29.70 29.70 19.40 17.80 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.47 0.72 -3.88 2 193 2009-01-15 18:05:59 2008-07-28 16:31:56 3 1 192 0 4 26 0 68.10 82 99.23 CHANGED hpHSLRSDGAGFYQLAsCEYShShhKIAhuGtFhsslCpMAMKShFFhhp.hNRRLTLTAVQGILhRFSLF ................hpHSLRSDGAGFYQLAsCEYShShRKIAhuGtFhsslCtM..AMKShFFhhs.hNpRLTLTAVQGILhRFSLF......... 0 1 1 3 +10670 PF10838 DUF2677 Protein of unknown function (DUF2677) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as UL121 however currently no function is known. 20.90 20.90 21.00 31.50 20.70 19.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.06 0.71 -4.59 4 34 2009-01-15 18:05:59 2008-07-28 16:32:48 3 1 14 0 0 29 0 163.10 63 91.10 CHANGED hsllh.sstGstss.hCssspsplKlpChL.pLDpRLaW.lpDop....RVhsFc.-spp.hs...........apV-VRtsh.ssp.asl.LhhPLh.psTVuLLL.DhtpsR.EclLChuhlPph+tlpsCthDsDLulLYuVCllLSlSlVsAulhKlDYDpoht..hpuYKS ..............LMChALMARGTaGAYICSPNPGRLRISCALSV..LDQRLWWEIQYSSGRLTRVLVFH.DsGEcGDD.........LHLTDTRHCTSCTHPYVISLV.TPLTINATLRLLIRDGMYGR.GEKELCIAHLPTLRDIRTCRVDADLGLLYAVCLILSFSIVAAALWKVDYDRSVAVssKSYKS..... 0 0 0 0 +10671 PF10839 DUF2647 Protein of unknown function (DUF2647) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins are annotated as ycf68 but have no known function. 22.30 22.30 23.70 26.30 21.20 20.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.46 0.72 -3.90 4 31 2009-01-15 18:05:59 2008-07-28 16:33:42 3 1 25 0 6 19 0 67.80 65 51.29 CHANGED AYSSCLsRohhssKLLLRRIDGAIQVRSpVD.TFYSLVGSGRSGGsP.....susLhpp.aIsh.sh.uhLStst ..............AYSSCLNRSLKPNKLLLRRIDGAIQVRSHVDhTFYSLVGSGRSGGGs......st.L.SR.pIp.lSVautLShpp.................... 0 0 0 2 +10672 PF10840 DUF2645 Protein of unknown function (DUF2645) Pollington J, Finn RD anon PRODOM Family This family of proteins appear to be restricted to Enterobacteriaceae. Some members in the family are annotated as YjeO however no function for this protein is currently known. 25.00 25.00 26.10 25.70 20.70 20.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.53 0.72 -3.86 5 170 2009-01-15 18:05:59 2008-07-28 16:39:21 3 1 157 0 19 68 0 99.80 65 98.68 CHANGED MS.+htlLsthYhIlChLhIhlhSsLDcEaMIDGp-IsNlC-VhRtl.sDDoRDFsuhhsLPLhhPFFasslh++hKShFLsllullLluYWlWpFFIRapFC ........Ms.pMFlLCCIWFIVAFLWIsITSuLDKEWMIDGRGINNVCDVLhYLEpDDTRDV.GVIMTL.PLF.FPFLWFA.....LWR.....KK.RGWFMYATA....LA.IFGYWLWQFFLRYQFC.... 0 1 8 10 +10673 PF10841 DUF2644 Protein of unknown function (DUF2644) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Pasteurellaceae. 20.90 20.90 20.90 23.00 20.00 20.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.96 0.72 -3.94 10 46 2009-01-15 18:05:59 2008-07-28 16:40:06 3 1 38 0 8 46 0 58.90 55 70.91 CHANGED pELITNsDGRLSTTuFIQFaGALlMAGILlYuVaLDRsYVsELFssFAlFCG.GusATKGh ..ELlTNs.DGRLSTTuFIQFFGhLlMAuILhauVYLDRshVs-LFhsFAhFCu.GusATKGh.......... 0 1 6 8 +10674 PF10842 DUF2642 Protein of unknown function (DUF2642) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillus spp. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.09 0.72 -4.48 10 100 2009-01-15 18:05:59 2008-07-28 16:40:24 3 1 82 0 24 81 1 61.00 45 78.44 CHANGED uPQlVSllDPYVYQTLQollGcclVVpTlRGolRGpL+DVKPDHlllEus.pslaaIRlQQIVhVhP ............phluhssPYVhpslppllGpplVlET.VRGsl.+G.pLcDVKPDHl.llE.s....sssahlRlpQIValhP.......... 0 8 18 18 +10675 PF10843 DUF2578 Protein of unknown function (DUF2578) Pollington J, Finn RD anon PRODOM Family This is a Saccharomycete family of proteins with unknown function. The protein in S. cerevisiae is strongly induced in response to many stress conditions and is repressed in drug resistant yeast strains. 25.00 25.00 144.70 143.00 20.80 19.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.33 0.71 -4.63 12 62 2009-01-15 18:05:59 2008-07-28 16:41:18 3 1 42 1 37 43 0 168.30 55 97.50 CHANGED Mo.KKcKp.....................PKspohohpSsES...............lKsFEDLpsFEsFl+sET..Ds-FDahHs+LpYYPPFVL+EsH--.EKIKsTsNp+SKKFhRcLppHlcKHLlKDlccshth.-L+FcctucpEoFs+lsW+atDpo-a.........asR+a+lplsVoCpp-sAMVDVDYKohP ..........................hpc.........................pKtPKhpTloTcsGEo...............lKVFEDLpsFETal+sETE.D.s.-FDalHC+LpYYPPFVLH-uH-.DPEKIK-TsNSHSKKFVRHLHQHlEKHLLKDIKpAlshPELKF+-KsKcEoF-.+......IsW+Ys-ET-a.........asR+FKlpVpVsCsH-sAMV-VDYKThP.. 0 4 18 31 +10676 PF10844 DUF2577 Protein of unknown function (DUF2577) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function 21.50 21.50 21.90 24.40 21.40 20.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.26 0.72 -3.96 46 176 2009-01-15 18:05:59 2008-07-28 16:41:52 3 2 132 0 36 161 2 103.90 26 92.34 CHANGED chlcphutsshpsspPspl..hhGpVlossP.Lplplsp..p.llLspc...Lhlsc.hlpchphphphpt..tp....................................................hhhpssLchGDcVlllp...psG.QpalllD+l ...............hlpthuhpuhpsspPspl..hhGcVhossP.Lcl.plsp..p..llLsp-.....lllsc.plpphphphphpt...t................................................................h.h.ssLcsGDpVhllp......hpuG.QpahllD+l................... 0 18 30 31 +10677 PF10845 DUF2576 Protein of unknown function (DUF2576) Pollington J, Finn RD anon PRODOM Family The function of this viral family of proteins is unknown. 20.40 20.40 20.60 21.50 19.80 17.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.47 0.72 -4.80 13 18 2009-01-15 18:05:59 2008-07-28 16:43:07 3 2 17 0 1 15 0 44.90 69 27.79 CHANGED NAPVVsSpHDYDR-QI+RELNSLRRsVH-LCTRS.uTuFDCN+FLcSsD ....NAsVVsotpDYDR-Ql+RELNSLRRsVH-LCTRS.uTuFD.CN+FLcSsD.. 1 0 0 1 +10678 PF10846 DUF2722 Protein of unknown function (DUF2722) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 21.00 21.00 21.10 21.70 20.20 20.90 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.57 0.70 -5.44 11 93 2009-01-15 18:05:59 2008-07-28 16:44:51 3 2 76 0 69 95 0 283.10 20 67.85 CHANGED M................poh.t..p..pphsspspsppppp...............................................................................................................suL.sLLGsNVssaPaSEsuhlculcL+sEQE+TKQpYYKLEssNKslpllcpAlpApIPsNhIPhLa..h.Nss.......................................................................................pspspspsss.spss.spssusshlsspp.s.............psPhsY+Fsssosss.............p.h..sspRRshSPA+IGAu.....AVAsLusssssh+............ptsssshR+...p.spsHpRphShPs................................ptspsssp.ps........................sTSslphps.sspslp+pstssspssp-.hToh.Hhl....h+.t.......................................p.p+cH.....+Rp+Ss.othtsIDLs..s.......t...............p.scss-ssp+pppspppps.......s....................................-Dpsho.soSh....................spsssposht+hPpsls..ph.N.s ..............................................................th...........................................................................................................................................................................................ph+.EpE+T+Qphh+LEptphph-lL+huhpuGlPsshIPhlF....hsssst.hs.t............................................................................................................................................................tp.t..tpttp.tpp..th.tss....t..p..............................s..hth.s......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ss...................................................................................... 0 9 34 60 +10679 PF10847 DUF2656 Protein of unknown function (DUF2656) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 23.00 23.00 23.60 23.00 21.30 20.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.64 0.71 -4.23 12 19 2009-09-16 17:37:28 2008-07-28 16:45:57 3 1 18 0 8 18 18 129.70 41 90.86 CHANGED hFlLSHNLQlpSshVPulusp-LApGL...hupustlpsspsLsHPHWhVclc...SsLSsp-hAp-LVcuWcphRpsh..GHshsHslLALGGRKDot.usPGuP..LQpGpWGVDVVEssss-sFLpuINW-ALKuGRPsD .........hhlLSHNhplpss.VPslshp-lApsl...hsppstlpsspslsHPHWhVclp.....ushSspchupthsp.uWpphRpuh....ucshsHslLALGGRKDos.ussuuP..LQpGtWGVDVVETsss-sFLpuINW-uLpuGRPtD....................... 0 2 3 6 +10680 PF10848 DUF2655 Protein of unknown function (DUF2655) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 41.10 40.60 21.30 16.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.96 0.72 -4.12 2 140 2009-01-15 18:05:59 2008-07-28 16:47:03 3 1 139 0 2 13 2 81.30 87 84.83 CHANGED hSVAphShGpTAQLStKQsGaYSPEhh.STGKDCNPQPANCLKsQYVLRHCCVDDRSsKMGYSsKhhVLTphssETASLFHC .....MSVAphShGpTAQLStKQsGaYSPEhh.STGKDCNPQPANCLKsQYVLRHCCVDDRSsKMGYSsKhhVLTphssETASLFHC. 0 1 1 1 +10681 PF10849 DUF2654 Protein of unknown function (DUF2654) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as a-gt.4 however currently no function is known. 25.00 25.00 73.00 72.40 21.30 18.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.42 0.72 -4.24 13 35 2009-01-15 18:05:59 2008-07-28 16:48:29 3 1 34 0 0 26 0 70.20 49 63.06 CHANGED AcKKApKhL+KNpREIcRL++HAEpAlhssN+-tYhYAIpKLRsIhKQ.Phsc-llpshWhToRpQlh-hl ...Ap++AsKlL+KNpREIpRLp+HAptAlhsNNhstYtYAIpKLRcIhKQ.Phs-ELlphhWpToRpQI.-hl....... 0 0 0 0 +10682 PF10850 DUF2653 Protein of unknown function (DUF2653) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 21.60 21.60 23.10 47.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.03 0.72 -3.68 4 127 2009-01-15 18:05:59 2008-07-28 16:49:31 3 1 127 0 16 75 0 81.40 66 89.06 CHANGED MppI.IsEQ-IIsAlClYIApp+pItPE-V.V.ELhYDDcoGFuAElpsshppp.LhosslIpALR.alc-.hpsNPausslcLpLDcccGIh .........l..p-llsulClahupcctltPE-V.V.ELMYDDDYGFSAEVEVN.GR.Q.QILIQANLIEALRLLLDREYNVNsFAARLQLELDDEEGIY.... 0 5 12 12 +10683 PF10851 DUF2652 Protein of unknown function (DUF2652) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 22.10 22.10 22.60 32.20 22.00 21.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.45 0.71 -4.09 6 72 2009-01-15 18:05:59 2008-07-28 16:50:17 3 1 65 0 15 46 51 104.00 71 46.52 CHANGED lApLLEuVI-Au.psLKLAKLEGDAAFFaAsssssssh.lscphstMRpuFhpRREphc+D+.CpCcSCtQlcsLSLKFVAHtGEVApQ+VK+psELAGhDVILVHRMLKNpVPVsEY .......VAQLLESVIDAu.KGhKLAKLEGDAAFFWAPGuNsSVl.VC-RsspMRQ+F+sRREQIKKD+sCDC+SCpQt-sLSlKFVAHpGEVAEQKVKRNVELAGVDVILVHRMLKNEVPVSEY.... 0 4 9 13 +10684 PF10852 DUF2651 Protein of unknown function (DUF2651) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 28.40 28.10 24.30 24.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.94 0.72 -3.62 4 152 2009-01-15 18:05:59 2008-07-28 16:51:26 3 2 106 0 11 94 1 77.30 58 96.00 CHANGED MsEL.hsFhIhPLhIhIlSIsGThhhKshYlMPhlohslhLllshTlas.uFhhWsshYollSFhlSYITllhl..hc.spN ......MsELIFllhIhPLhIhIlSVlGTpKsKTaYVMPIVTFu..sF..LIlsVhsF..sPpFFFWVGMYSIhSFIVSYhTLLF...V+GYclsE........... 0 2 7 7 +10685 PF10853 DUF2650 Protein of unknown function (DUF2650) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Caenorhabditis elegans. 22.90 22.90 23.80 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.24 0.72 -4.50 7 51 2009-01-15 18:05:59 2008-07-28 16:52:14 3 1 9 0 51 41 0 37.20 38 31.25 CHANGED CPppolaaaacCCGphspECChpLpsWVhlhLhlhhls ..CPpsohaaaacCC....Gpts....p....-CCaplpsWlhlhLhlhhh............. 0 19 24 51 +10686 PF10854 DUF2649 Protein of unknown function (DUF2649) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as Plectrovirus orf 10 transmembrane proteins however currently no function is known. 25.00 25.00 27.70 27.50 20.90 20.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.57 0.72 -3.86 4 27 2009-09-10 17:06:49 2008-07-28 16:52:44 3 1 6 0 0 26 0 66.10 69 99.22 CHANGED MQNDWIKLKEFFIaIFLFIDKTNVESIpMWNLTQNEYLTLMVGVWlVILFLTWFFLWMVFKIVGYFK ............MQNDW.KLKEFFIHIFLFIDKTNVESIThWNLTQNEYLTLMVGlWIVILFLTWFhLWMlFKIVuYFK................ 0 0 0 0 +10687 PF10855 DUF2648 Protein of unknown function (DUF2648) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillales Staphylococcus. 19.80 19.80 21.20 20.60 19.70 18.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.59 0.72 -4.52 3 72 2009-01-15 18:05:59 2008-07-28 16:52:57 3 2 72 0 4 21 0 32.80 78 72.31 CHANGED MKKLAVILsLuGAAFYGFKKYQN+VNQAPNIEY MKKLAVI....LsLsGuhaYuFKKYQ.p+VNQAPNIEY.. 0 0 0 4 +10688 PF10856 DUF2678 Protein of unknown function (DUF2678) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.10 25.10 28.00 28.00 24.30 23.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.72 0.71 -4.36 4 64 2009-01-15 18:05:59 2008-07-28 16:56:09 3 3 51 0 39 49 0 105.60 58 82.36 CHANGED M--apTRo.GT....ppPLFGETpsRDRIlNLslGGhTSlLVL.ThlSuhVFPp.PP.slNIFFslCIhhhs.osllL..............................IFWYRQGDL-PKFRsLIYY.hholVLLClCANLYFHDVt+ ...................M--asoRTYGTu.GhDN...RPLFGETSA+DR.IINLlVGuLToLLlLV.TlISAFVFPplP.P+PLNIFFAVCI.LssloshlL..............................IaWYRQGD..L-PKFRpLIYYhlhSIlhLClCANLYFH-Vt....................................... 0 13 16 24 +10689 PF10857 DUF2701 Protein of unknown function (DUF2701) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 25.00 25.00 25.30 96.90 24.90 19.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.85 0.72 -4.20 7 13 2009-01-15 18:05:59 2008-07-28 16:57:47 3 1 13 0 0 12 0 63.50 42 74.66 CHANGED lslhllslshlsalLlYLl+WohlhshhsplKl+llph..TTRRSFppLDsVYYTsDspV.GlNlE .lslhlllIsIlsaLLlYLl+Woalh-hhNclKl+llph..TTRRSFscLDsVYYTDDspV.GVNVE 0 0 0 0 +10690 PF10858 DUF2659 Protein of unknown function (DUF2659) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 34.80 34.80 34.90 326.80 34.70 34.70 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.52 0.70 -4.94 3 43 2009-01-15 18:05:59 2008-07-28 16:58:51 3 1 43 0 7 30 1 220.10 83 99.91 CHANGED MTDILDEVLsDcNEEKRLIFFKKLLPIVIIISLIAITIMVINNNNKs+QIcNNQKNGDIFVKoVsLEospsNcELAlsTLEN.LVosSNTKIQEIAtLEQVAIKISsppaSEAKDLLNKIIENKEYSEIoTSYARIuWCSLVI.....DDcNLDIsDKEKLlKYLNYFDDEsKPFWATAoIhKAIWDIKNNMcscAEKNL+uLltSNNoSDLLKDQAKALLsNL-+ MTDILDEVLSDQNEEKRLIFFKKLLPIIIIISIIAITIMVVINNNKDKRIKNNQKNGDILVKTVGLETTKDNcELAFNTLEN.LVTTSNTKIKEIAALEQVAIKIScKKYSEAKDLLNKIIENKEYSEISTSYARISWCuLVl.....DDpNLDIQDKEKLTKYLNYFDDEKKPFWATATIIKAMWDIKNNMKsQAEKNLKNLLISNNVSDLIKDQAKALLVNLNK. 0 1 2 2 +10691 PF10859 DUF2660 Protein of unknown function (DUF2660) Pollington J, Finn RD anon PRODOM Family This is a family of proteins with unknown function. 25.00 25.00 41.40 131.50 20.10 18.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.95 0.72 -3.76 4 44 2009-01-15 18:05:59 2008-07-28 17:00:12 3 1 44 0 7 28 0 86.70 82 63.36 CHANGED LhY.KIss+KKNhhsuptNNI--o.slALNsppp-NK...KLTLQE+IELSWpFLYDITEsILNKFSKEDlhpVNKCGplLaENGVRYEH LMYKKIAARKKNILPApGGNIDDSPNVALNSQKPENK...KLTLQERIELSWpFLYsITEVILNKFSKEDVIQVNKCGQVLFENGVRYEH 0 1 2 2 +10692 PF10860 DUF2661 Protein of unknown function (DUF2661) Pollington J, Finn RD anon PRODOM Family This viral family of proteins have no known function. 20.80 20.80 20.90 25.50 20.00 19.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.77 0.71 -3.90 7 19 2009-01-15 18:05:59 2008-07-28 17:02:46 3 1 18 0 0 18 1 103.30 41 36.17 CHANGED hsLVaVWYH.cspFlaNospaPFWHNlhYpuppYpsallYal-Npss..lPt....stslphlsFKchhscc.phppLpshhs...KIDYMKLpllhs..............schl..spphlLLMDMDCsl .hsLVaVWYH.cspFVhNTspaPFWHNlpYauppacshVlYhl-sps..sh..pl..Ps...stslphlNFKcshsph.phsplpslhp...KIDYMKlshlhs..............schl..spsalLLMDMDCsl. 0 0 0 0 +10693 PF10861 DUF2784 Protein of Unknown function (DUF2784) Gunasekaran P, Mistry J anon Pfam-B_001600 (release 23.0) Family This is a family of uncharacterised protein. The function is not known however it is conserved in Bacteria. 22.60 22.60 22.70 24.30 22.50 22.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.70 0.71 -4.28 45 266 2009-09-11 08:43:52 2008-07-28 17:03:21 3 1 255 0 81 218 171 116.00 35 92.57 CHANGED YpllADhllllHhhFllFVlhGGlLll..........Rh.thhhlHLsAluWushlphhGh.hCPLTsLEshLRptAGpsuY.suGFl-HYlhsllY......PstlssslphhlGslVlls....ashlhhRp ..........thhADhlllhHhhFllFVlhGGhLsh...........RhhphhhlHlsAlsWGsulth...h.sl..sCPLThlEshLRptAGtssh.ssGFlpHYlhsllY.......Psshs.ssspllhuslVlhs......ahhhh...h............. 0 24 57 71 +10694 PF10862 FcoT DUF2662; FcoT-like thioesterase domain Bateman A, Pollington J, Finn RD anon PRODOM Domain Proteins in this family have a HotDog fold. This family was formerly known as domain of unknown function 2662 (DUF2662). The structure of Rv0098 from M. tuberculosis [1] suggested a thioesterase function. Assays showed that this protein was a thioesterase with a preference for long chain fatty acyl groups [1]. The maximal Kcat was observed for palmitoyl-CoA although longer and shorter molecules were also cleaved. In solution this protein forms a homo-hexameric complex. 20.80 20.80 21.00 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.09 0.71 -4.69 6 71 2012-10-02 20:54:35 2008-07-28 17:09:44 3 2 71 2 9 28 0 151.50 66 81.73 CHANGED +VLcPYp.csCRYLlcA..........ph+Ap.sulhAhGsFsIsESsYIcsTGHFNAVEl.lCaNQLuYshhApuVtNc.IssLcuWSl-DYhc+QLSshLItphSSpF+KPlNPpKFSGRlps+slphhp+o..h.aLhlssshcFWD-sGGtupGEs.LAh ..............RVLEPYSCKGCRYLIDA..........QYSATEDSVLAYGNFTIGESAYIRSTGHFNAVELILCFNQLAYSAFAPAVLNEEIRV....LRGWSIDDYCQHQLSSMLIRKASSRFRKPLNPQKFSARLLCRDLQVIERT...WRYLKVPCVIEFWDENGGAASGEIELAA....... 0 3 7 9 +10695 PF10863 DUF2702 Protein of unknown function (DUF2702) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 25.00 25.00 56.30 51.40 21.60 20.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.85 0.71 -4.47 8 43 2009-01-15 18:05:59 2008-07-29 08:49:20 3 1 41 0 27 41 0 140.30 44 69.44 CHANGED MSRupEIK-KpsLQA+lQhuFSsssuKVLuWLpsStppssussh...tp....s-hs-S+cuFhcLPVlQhGSGLohp...pstspssssIpTIGDFIcSDKclSoLuKcK+spps...pppsslaRlsK-DTKAMlALKpKMRsppRcplRcc ...MSRtKEIpEK.sLQAKLQ.oFSsNsutVLsWLcpsppss.s.sst...pp................p-lp-u+cuFacLPVlphGSGLpFt...pts..tsspc-IpTIGEFI...puD.KKlSoLuKKK++sp.ss.....pRs...shaRlsKDDoKAMlALKpKMRcsp+-slRpp..... 0 3 13 24 +10696 PF10864 DUF2663 Protein of unknown function (DUF2663) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as YpbF however currently no function is known. 22.90 22.90 24.10 24.50 22.50 22.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.69 0.71 -3.93 10 52 2009-01-15 18:05:59 2008-07-29 08:50:26 3 1 51 0 18 47 0 129.20 42 87.62 CHANGED QMLpALIcRKcKaE+hc+QshhhphAullshshallalhsKshu...hphsthLutlhusssaLhhlLssuhuYssuhYaKKKcEKAEsEaHtLRCEIIQKSpDLWsppEpW+uRcpVFchMK+cYDINLYaE .....QMLpullcRKpKaEphtcpshhaphsullshslhhlalhhpshs....ts.phhlpthlupssaLhallhsuhuY.hsuhYaKKKcEKAEs-FHpLRCEIIQKSsDLW.ps-pWcsRcplFchMK+cYDINLYaE........ 0 4 11 14 +10697 PF10865 DUF2703 Domain of unknown function (DUF2703) Pollington J, Finn RD anon PRODOM Domain This family of protein has no known function, but it may be distantly related to the thioredoxin fold. It contains the CXXC motif that is characteristic of thioredoxins. 20.10 20.10 20.80 21.60 19.90 19.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.70 0.71 -4.18 14 60 2012-10-03 14:45:55 2008-07-29 08:51:33 3 10 50 0 45 62 2 107.50 29 63.03 CHANGED LsIcWp+Lsss.stTC-RCusTucslppAlppl+phLp.hGlpVplpcttlsspphuht...hESNpIhIsG+slEphl.uupVsposC....Chss.sDscCRslphssppYEslPtcLllcAu .................l.Ipa.hLshs...TCsRCtsTtpslpcAlpplpthLp.hGlclhlpch...plss...p..phsht.......pSspIhlsGpsl-.hh.shpsspohC....shss.ssspCRsh.thp.G....ppY-shPtthIhcAh......... 1 21 40 43 +10698 PF10866 DUF2704 Protein of unknown function (DUF2704) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 19.90 19.90 23.70 22.80 19.80 19.10 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.17 0.71 -4.87 12 19 2009-01-15 18:05:59 2008-07-29 08:53:22 3 1 19 0 0 16 1 161.50 41 78.97 CHANGED sss.sss-tpssRKsVcuss-EYTVDGLKLKstYVtYYKpLphlVDFhVMhlSKplsMKEY-pVYSLGRQLYElLRulFVDEPFKLWLEpNspchsss...+cpIhKhLQspLphslt....hKosTFKshlhNlLNocLs...s+YDsustYIKPNCIVsTaNCCsLsFc. .............tsss.......tpspp+slcs..cpaTVDGL+LKosYVtYYKQLKtLV-hlV..halSK.plshK-YcEVYoLuRQLYEllRulFV...DEPFKLWLEp.....NsppLsss..tth+D..cIaKpLpspLcsssss.......scssThKshllNVlNscLstp.scaDssstYlKPNCIV.TasCCsLsFc.............................. 0 0 0 0 +10699 PF10867 DUF2664 Protein of unknown function (DUF2664) Pollington J, Finn RD anon PRODOM Family This family of proteins is a viral family, annotated as UL96. Currently no function is known. 21.20 21.20 21.20 29.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.86 0.72 -3.36 8 32 2009-01-15 18:05:59 2008-07-29 08:54:00 3 1 29 0 0 24 0 91.60 35 73.95 CHANGED MRhsLE+pQ+cFL+pshGscHPLospQslpsh+stsRpps+psppslpsVustlh.cp+tpl+........cppppA+pLQ+.hDlD-hlDoLsElKDs .....MRhcLE+pQ+pFL+csaGspH.LoppQulpshcsss+pppc.sppssppVustlh.cp+uplp........p-hpps+pLpp.hc.VD-hLDoLsElKDs................... 0 0 0 0 +10700 PF10868 DUF2667 Protein of unknown function (DUF2667) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Arabidopsis thaliana. 20.90 20.90 21.10 21.20 20.60 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.60 0.72 -3.61 5 19 2012-10-01 23:31:40 2008-07-29 08:56:03 3 1 4 0 17 30 0 84.20 29 94.17 CHANGED MGSLRLSTVAIA.VVVCLSILLISPTEVDGRtVCDhstGtCosh...STCs-sCpslc....usFpGGECtshuuhsGholCaCC+s...VpSuAEhESM ..................h.slh.lllClSlL.Ll....SPh.....c...lsG..p..h..C.D.h.......hGsCs.h......tppCscsC+php....ppatGGpChshst.ss....shChCCh...............t......... 0 10 10 11 +10701 PF10869 DUF2666 Protein of unknown function (DUF2666) Pollington J, Finn RD anon PRODOM Family This Archaeal family of proteins has no known function. 22.00 22.00 22.60 33.90 21.30 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.74 0.71 -4.12 9 43 2009-01-15 18:05:59 2008-07-29 08:56:51 3 2 30 2 32 46 0 127.30 38 94.58 CHANGED E-+IpFTAK+GKWhVlK+LhIDEpTspl-IARLLASIsETlstKI.-Fhs..hDhc+Ic-hhsthhchKK....EE-Iscslpth+S.tsophhst...ppEs+.hl+ch........Lp+LGlphcVsuK.lEKYlEKs ...................E-+IpFsA.....K+GcWhVsK+LhID-pTpsh-IARlLASIsETlstKIP-YLs..hDlctlpphhc-lhphKK....-c-Ispslp+LKSPuTo+Klsphhppc-tK.hLKchL.p...hlLpRlGlppclssK.lEKYlEK....................... 0 3 4 19 +10702 PF10870 DUF2729 Protein of unknown function (DUF2729) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 20.20 20.20 79.20 79.00 19.80 18.40 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -9.07 0.72 -4.24 8 13 2009-01-15 18:05:59 2008-07-29 08:59:52 3 1 13 0 0 13 0 54.80 65 84.38 CHANGED NLLsYCKLKLVKpVSKTVuuLLCKCV....APEDo.D..sGDRYlQINNNCNFIYINVVp .pLLTYCKlKLVKuhSKphuuLhC+CV...hus-Ds.D...GDRYhQhNNNCNFIYINVVK. 1 0 0 0 +10703 PF10871 DUF2748 Protein of unknown function (DUF2748) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 25.00 25.00 791.70 791.50 18.90 18.70 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.60 0.70 -6.07 2 44 2009-01-15 18:05:59 2008-07-29 09:06:05 3 1 44 0 7 27 3 439.80 89 99.98 CHANGED MoslYHILc+lPAI.+pDM.lEYEpLA.pLlpSGKLRlDs-sphNFsRhoEPuLNlslhlSpEELssP+L..cTpthh.NlY+p.hpc....pKlppIhssLpKphth..sVcp-lh.hLARlFVQSAHPIVI+WLLLp+sEVFloYSppIGDhMDhsoWphsGtNSGMQShNGpslAIaVSCGGNP..Fspp.p-pshYGsGasAhARLQIIAAQELGHaADIhRD.pup.lsRHSsN.ShTKApspVhhAR+sDl.+CaplLppL.psGhpp.lsYEpplKFYptNKVpGlKlhhh+hh.FhYK.+Lh.h.pppshIFV+haKs-pY.uLMlcAMhhDM.uNLpPtA-VYKpcsP-hEEAIAClEALARVPQQsIKWGalTThphMpDLYhIYYppVIPSLIspYphITGcsY.Rsh.....NahSph.hap.KKL..hhK....PsREl MTSIYHILDRVPAIYKQDMEIEYEHLAMQLIKSGKLRIDTDDCCNFARFTEPALNISLMVSpEELTSPHLIPETTKLFQNLYRNSASD....QKIKSIFDNLKKQIQKLQPVKKEVTEMLARIFVQSAHPIVIRWLLLNKTEVFLTYSHNIGDMMDMVSWQRVGGNSGMQSTNGKDVAIFVSCGGNP..FAENNKDHPTYGNGFAAsARLQIIAAQELGHFADIKRDDKGRQITRHSANFSGTKATDKVRIARKNDIIHCHNLLuKLLKAGMKKQLDYETKLKFYNANKVSGLKVYAIKFMIFIYKFRLLNYSSRNNLIFVRKFKTDcYMALMI-AMFKDMQANLSPsADVYKNKNPEIEEAIACIEALARVPQQTlKWGYLTTKETMHDLYKIYYNEVIPSLITSYNAlTGENYpRDFKKPKSNFFSKINIFSNKKL..VLK....PVREL. 0 1 2 2 +10704 PF10872 DUF2740 Protein of unknown function (DUF2740) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function has a highly conserved sequence. 25.00 25.00 30.20 122.00 24.10 23.70 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.46 0.72 -4.30 3 83 2009-01-15 18:05:59 2008-07-29 09:12:15 3 1 81 0 1 6 0 48.00 94 100.00 CHANGED MPKQLSPDQDKLHKNILRDRFLSSFKQPGRFRAELEKVKLMQKEKGHE MPKQLSPDQDKLHKNILRDRFLSSFKQPGRFRAELEKVKLILKRKGHE 0 0 0 0 +10705 PF10873 DUF2668 Protein of unknown function (DUF2668) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as Cysteine and tyrosine-rich protein 1, however currently no function is known. 26.20 26.20 26.40 26.40 25.50 26.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.20 0.71 -4.06 5 49 2009-09-16 09:25:17 2008-07-29 09:12:45 3 2 35 0 26 38 0 129.00 62 85.86 CHANGED MDALR..LPRRPGVLL.KllLLFVYAGDCpAQCGK-C+SYCCDGSTPYCCSYYAYIGNILSGTAIAGIVFGIVFIMGVIAGIAICICMCMKNNRGTRVGVIRAAHINAIS..YPM.APPPYTYDHEMEYsTDL.PPPYSPAPQASAQRSPPPPYPGNSRK ....................R.....tshlhL.cllLLhlhA-cCLA.QCGpDC..+SYCCD..GoTPYCCSYY.AYIGNlLS..GTAIAGI...VFGIVFIMGVIAGIAICICMCMKNp.Ru.TRVGllRTo+INslo......oYPh...sPP.PYsY-aEMpassDL.PPP..YoPsP...ptssphSPPPPYPG.s+K....................... 0 4 7 13 +10706 PF10874 DUF2746 Protein of unknown function (DUF2746) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 21.90 21.90 22.20 22.20 21.40 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.90 0.72 -3.78 5 73 2009-01-15 18:05:59 2008-07-29 09:14:46 3 1 72 0 1 59 0 60.10 54 51.59 CHANGED I+pQVsNoHDTNlRDDLD-lt..........EhVp-GF+clc+DIstL+E-LsTERpERIEGDRRR- ..I+-QlsNTH..-TNMRDDLD-lt......................................-hV...+-GF+plp.......R.DIuGLREEL..RTERlERIEGD+R+........................ 0 0 1 1 +10707 PF10875 DUF2670 Protein of unknown function (DUF2670) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 29.50 39.60 19.40 19.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.65 0.71 -4.14 3 44 2009-01-15 18:05:59 2008-07-29 09:15:40 3 1 44 0 7 29 0 139.20 78 86.46 CHANGED MWQALRRLIAANPMGFFLWSIITKWYLIIAVASLITLYYTVLGLKKIGFIDYFGRTTVEILDTSKAVAQNCTsKLGPNWs+LVN.......FWNCLSDPGEYcHEEGTGAKVLEDEINKLhsKQAD......SluDscsPIINPYEcLsNsN .......MWQALRRLhAANPMGFFLWSIITKWYLIIAVASLITLYYTVLGLKKIGFIDYFTETTVEILDTTKAVAQNCTTKLGPNWN+LVS.......FWNCLSDPGEYKHEEGTGA+VLEDEINKLTPKQAD......SlADAE+PIINPYEtLENsN.................. 0 1 2 2 +10708 PF10876 DUF2669 Protein of unknown function (DUF2669) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 34.40 34.30 21.80 21.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.57 0.71 -4.45 4 30 2009-01-15 18:05:59 2008-07-29 09:16:31 3 1 24 0 4 23 0 126.40 50 93.33 CHANGED hpIEslTYsMTPANAhsAWsuLKpAhtLLpusDlsslGcpps..huushLsslLuNLGDPulptlEslVLKaTosc.tDGppYRLS..-RFspHFNpaRuHLl.VLhEGLhYQaADFFhGGsuhhssh.s.hstsp .....hpI-slTYhMTPANAMsAWpuLKpAhsLLpuhDhsuluNsps.....huussLuslLupLGDPulpElEslVhcpTuhcssDGsp.YRLS..DRhspHFNs+RsHLl.VLhEGlhYQauDFFsGGhuuhpsl.P..sApp....... 1 0 2 3 +10709 PF10877 DUF2671 Protein of unknown function (DUF2671) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Rickettsia spp. 25.00 25.00 31.50 186.70 20.90 20.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.16 0.72 -4.19 2 42 2009-01-15 18:05:59 2008-07-29 09:17:29 3 1 42 0 6 19 0 90.00 93 99.82 CHANGED MQEKELSNNFLEEQpp.KEDsSPF.DlKYICQASLLITDSIRKGYDVTQLsNGDINVTElRIVNVHYNWNSEKGKFVKTNQIEFNNsKGG MQEKELSNNFLEEQE...KSKEDsSPFFDVKYICQASLLITDSIRKGYDVTQLPNGDINVTEVRIVNVHYNWNSEKGKFVKTNQIEFNNSKGG. 0 1 1 1 +10710 PF10878 DUF2672 Protein of unknown function (DUF2672) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Rickettsiae. 20.70 20.70 23.40 38.90 19.20 17.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.25 0.72 -4.24 3 43 2009-01-15 18:05:59 2008-07-29 09:18:14 3 2 43 0 7 19 0 67.80 79 64.98 CHANGED hIIKclKKIKAYF..IKS.hIKNIDcSLETEQlNFYLKKIINLEGYYaGNYDLTTIKEKYYTLI...INNDL ..........IIIKELKKIKAYLINIKSSIlKNIDEPLETEQlNFYLKKIINLEGYYHGsYDLTTIKEKYYTLI...INNDL....... 1 1 2 2 +10711 PF10879 DUF2674 Protein of unknown function (DUF2674) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be conserved to Rickettsia spp. 25.00 25.00 150.20 150.10 20.60 19.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.16 0.72 -4.24 3 40 2009-01-15 18:05:59 2008-07-29 09:18:51 3 1 40 0 6 13 0 67.00 93 99.93 CHANGED MQNPTQKVISFSEHKADIERIKKuIEEGWAIVKLVPNKDRFIGLLEKISHAE.DETIYIPPRKKIIVN MQNPAQKVISFSEHKSDIERIKKSIEEGWAIVKLVPNpDRFIGLLEKISHAE.DETIYIPPRKKIIVN. 0 1 1 1 +10712 PF10880 DUF2673 Protein of unknown function (DUF2673) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Rickettsiae spp. 19.80 19.80 19.80 89.00 19.40 19.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.02 0.72 -3.93 3 42 2009-09-11 07:30:40 2008-07-29 09:21:59 3 2 42 0 6 19 3 64.20 87 78.54 CHANGED MKNLLKILLILAFSAPVFASS...QlP.DPASVTTTQIpAMSTsDQQAWVASLTADQYNMLSPDVQKW MKNLLKILLILAFASsVFASS..MQMP.sPASVTTTQIQAMSTDDQQAWVASLTAsQYNMLSPDVQKW.... 1 1 1 1 +10713 PF10881 DUF2726 Protein of unknown function (DUF2726) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 24.70 24.70 24.80 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.28 0.71 -4.52 35 880 2012-10-11 20:44:46 2008-07-29 09:23:18 3 9 678 0 167 661 25 119.50 22 36.87 CHANGED pc......tLhsssEpshhptLcpulss....ph.plhs+Vphu-llps......stpptppAhsphsu+phDFllsD.psh...pslssIELsss..sHp......psptppRDthhcpshcsAGlPllclphppshssspl+cpl.tsl .................................................t...lhs.tpEtthhptL.phlsp.........ph.hlhs..pVphspllp...........ttpphhph.hp.h.hs.p.tp......hDaVlhc.t..psh.................pslssIELDss..p.Hh.............ptp..ptcRDthhpplhcpAGlPll.Rhp.pt.....p............hh...................................... 0 45 92 132 +10714 PF10882 bPH_5 DUF2679; Bacterial PH domain Pollington J, Finn RD, Bateman A anon PRODOM Domain This family of proteins with unknown function appear to be related to bacterial PH domains. This family was formerly known as DUF2679. 22.30 22.30 22.30 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.93 0.72 -4.22 27 319 2012-10-04 00:02:25 2008-07-29 09:24:37 3 5 295 0 86 264 11 101.30 19 37.49 CHANGED lcYtl-cpsLhI.pthhu..phpIPh.....spIpplphssshh..tsl+lhGhuthtahhGphh.hpchGpsphasTp.scphlhlcTss.psYsISPcsh-pFhppLcp+t ...........................................htlpsppl.l.ps.hs..phpIs.h.......spIp.s...l.........p.....h..h.s...sls.....tth..Rs...Gsu..ssthhhG.pFp..hp.shu.p....hhhh.sTp..spsllhl.c.Tc...c..psh...h..l....oscp.pt.........t....................... 0 36 71 81 +10715 PF10883 DUF2681 Protein of unknown function (DUF2681) Pollington J, Finn RD anon PRODOM Family This family of proteins is found in bacteria. Proteins in this family are typically between 81 and 117 amino acids in length. 27.20 27.20 27.20 28.30 26.30 27.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.91 0.72 -3.78 13 42 2009-12-08 16:02:13 2008-07-29 09:25:47 3 1 32 0 7 42 0 83.90 34 81.75 CHANGED M.hslhlh.ussulhuslhualha+sc+upcc........tpLppEppQlpsEtpstpspVKNhcl+QKNEEss+phSpcsV.-pLppcG.hR.D ...........hshp.l.lhuhsulhshlhuYlha+lcptccc.......stpLhppNpQLpsEpsstpspVKpapl+pKN-EsspphuRssll-pLpppG.hR-....... 0 1 5 7 +10716 PF10884 DUF2683 Protein of unknown function (DUF2683) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Methanosarcinaceae. 22.80 22.80 25.00 24.70 21.80 21.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.69 0.72 -4.05 7 17 2009-01-15 18:05:59 2008-07-29 09:28:00 3 1 13 0 12 17 10 74.50 35 88.79 CHANGED MVQAhlsIoDcoNpILsIlKAKYsLKDKStAI-hllppYpp.hLEP.EL+PEFlEchppIhc.cchl.lsol-sL+thhtt ...................MVps.lslsDcsNpIlpIhKApasl.cKS.AIshhlppapp.hhEs.El+PEFlcchpcl.KtpchIclsshcsLRccht.h... 0 2 4 6 +10717 PF10885 DUF2684 Protein of unknown function (DUF2684) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as yqgD however currently no function is known. 25.00 25.00 35.70 35.70 19.50 19.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.12 0.72 -4.12 2 241 2009-01-15 18:05:59 2008-07-29 09:29:20 3 1 239 0 7 33 0 76.70 77 95.94 CHANGED hstDsThoLpVtsTGSLSVNpYGWINIWhAILGpFFsQFPhFFEuph.lhps.h..hsDNAsIhRIYhL.F.sllGhK......p++ss ........MstDsThoLpVpsTGSLSVNpYGWINIWMAILGQFFTpFPLFFESCLILLKTWLEIFPDNAGILRIYLLQFSAIVGYKT..........RRAA....... 0 1 1 2 +10718 PF10886 DUF2685 Protein of unknown function (DUF2685) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as uvdY.-2 which is an open reading frame within uvsY. However currently there is no known function. 25.00 25.00 31.50 31.30 23.00 21.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.65 0.72 -3.95 6 36 2009-01-15 18:05:59 2008-07-29 09:30:35 3 1 35 0 0 25 0 53.80 53 96.13 CHANGED splCVVCKpPlccA.LuVcTspGsVHsG.CtpalpEhslSESs.....-p.LpETQLLh ...cICVVCKpPI-sA.LVV-T-pG.PVHPGsChsYlp-h..P....lS..ES.s.....-cpLsETQLLl 0 0 0 0 +10719 PF10887 DUF2686 Protein of unknown function (DUF2686) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as yjfZ however currently no function is known. 25.00 25.00 69.60 69.50 18.10 17.40 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.75 0.70 -5.67 3 406 2009-01-15 18:05:59 2008-07-29 09:32:52 3 1 257 0 2 128 0 263.10 67 99.79 CHANGED MSMPLSNAhsoQasTsNHFLHHPpVDSElT+Ka.cYARhDLENIYL.PLsRGNNHNYDGKSsVEIRKLDISKpSW.PFNYVTssCREaDGITTTGRMLYRNLKITSALDEIYGGICKKAHAATELAEGLRLNLFMKSPFDPVEDYTVHEITLGPGCNVPGYAGTTIGYISTLPASQAKRWTNEQPRIDIYIDQIhTVTGVANSSGFALAALLNANIELGNDPIIGIEAYPGTAEIHSKMGYcVIPGDEDAPLKRMTLQPSSLPELFELKNGEWNYIGK .......MohP.....po.h.hssah.HaspsDp.lphKa.phARhDpENIYL.PLsRGNNHNYDGKSVVEIRKLDISKps.W.PFNYlTpsC+E.DGITTTGRMLYRNL+ITosLDEIYGGICKKAHAuTELAcGLRLsLFMKuPF-PVEcYTVHEITLGPGCNVPuYAGTTIGYISTLPASQAKRWTN.EQPRIDIYIDQIhTVoGVANSSGFALAALLNANI-LGNDPIIGIEAYPGoAtIauthGYcVIPGDEDAslKRMTLpPSSLPELFELKNGEWNYltp........ 0 2 2 2 +10720 PF10888 DUF2742 Protein of unknown function (DUF2742) Pollington J, Finn RD anon PRODOM Family Members in this family of phage proteins are the product of the gene phiRv1, however no function is known. 22.70 22.70 22.80 22.70 21.80 22.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.36 0.72 -4.06 4 68 2009-01-15 18:05:59 2008-07-29 09:33:11 3 2 51 0 7 28 0 93.70 67 74.31 CHANGED ASRtVSWWsVHpaVtPhLstA..GsWPMAGTPAWstLDDsDPhKWAAlsDAucHWsLRVETsQpA.A-ASp-VSAAADWsuluRclpc+cshahtRshl ..............sSRtVSWWSVHEaVAPsLsAA...spWPMAGTPAWssLDDsDPhKW.AAICDAARHWALR..VETCQ.....sApA-ASRDVSAAA.DWPAluREI.pRRRssYItRssV................. 2 3 5 7 +10722 PF10890 DUF2741 Protein of unknown function (DUF2741) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as ubiquinol-cytochrome C reductase however this cannot be confirmed. 24.30 24.30 25.00 24.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.51 0.72 -3.84 4 42 2012-10-01 20:10:32 2008-07-29 09:36:22 3 2 24 0 29 45 0 69.90 60 74.83 CHANGED MGKpPl+LKAVsYALSPapQKVMPGLWKDlsuKItHKVSENWluAsllhu.PlsGThpYA.aYpEpEKLcHRY .........MGKt.PVRhKAVVYuLSPFQQKlMsGLWKDlPsKIHHKVoENWISAsLLls.PlVGTYpYst..apEpEKLpHRa........................ 0 8 21 25 +10723 PF10891 DUF2719 Protein of unknown function (DUF2719) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Nucleopolyhedrovirus. 21.40 21.40 21.90 21.50 21.20 20.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.98 0.72 -4.30 5 33 2009-01-15 18:05:59 2008-07-29 09:37:14 3 1 32 0 0 27 0 74.40 32 83.87 CHANGED MLRALKRRFKsAssEp+REED..VVLCPRCYFVAPGcISVADYTRMHIKFNEQFAD+CsNNFsVTQPKTWuNYoNCSALYYPL ..........................................cc..psp..VVhCs+ChFVAPhSlSaEEYlcLHcpFNphhsspC.........................h..t.................. 0 0 0 0 +10724 PF10892 DUF2688 Protein of unknown function (DUF2688) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as KleB however currently no function is known. 25.00 25.00 25.00 59.10 24.70 17.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.00 0.72 -4.31 3 19 2009-01-15 18:05:59 2008-07-29 09:37:23 3 1 15 0 2 9 1 60.30 64 84.44 CHANGED MsKGKIE..IVETsCRRCGKSIRTLSHSLIGAD-LREKLGGICG-CITPEEDpcITEuhLuA ..MsKGKIE..IlcTsCRRCGKSIRTLS+SlIGAD-hREKhGuICGsCITPEEDpclpEhhLtA..... 0 0 1 2 +10725 PF10893 DUF2724 Protein of unknown function (DUF2724) Pollington J, Finn RD anon PRODOM Family This is a family of proteins with unknown function. 25.00 25.00 26.00 25.70 24.50 24.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.67 0.72 -4.11 4 172 2009-01-15 18:05:59 2008-07-29 09:38:01 3 1 164 0 8 86 0 64.50 68 94.77 CHANGED MLpsEPSFASLLVKQSPuMHYGHGWIhscDGKRWHPC...+SQsELLtGLuTK+.tpp.........allKul+plpR ...MLTKEPSFASLLVKQSPA...M...H...a...GHGWIMGcDGKRWHPC...RSQDpLLAELSTKKpGp..........WLLKuhhRLF+..................... 0 0 2 6 +10726 PF10894 DUF2689 Protein of unknown function (DUF2689) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as TrbD however currently no function is known. 25.00 25.00 30.10 29.70 20.10 18.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.10 0.72 -4.19 4 128 2009-01-15 18:05:59 2008-07-29 09:38:19 3 2 101 0 2 84 0 58.30 79 62.49 CHANGED MNMRNINlITAhSVPsKoVSDDFMHAVLSNCTTRIVLPAPKcFuSESLPHNFNMAAVGVMK ..MNMRNINVITAhSVPsKoVSDDFMHAVLSNCTTRIVLPAP+cFuSESLPaNFNMAAVGVMK......... 0 0 0 1 +10727 PF10895 DUF2715 Protein of unknown function (DUF2715) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Treponema pallidum. 21.70 21.70 22.10 23.10 21.30 21.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.21 0.71 -4.77 8 104 2009-01-15 18:05:59 2008-07-29 09:38:40 3 1 15 0 11 46 0 146.80 34 86.65 CHANGED Chh......sshAupVFlSP+lGhsuhhhhGpsh.upthptst..p...........asPhlG.lsVulhAcNGhshssslDAuLTpLMFRuQsLlGYuhRhG..........................................shtalPsoGlsl....huop-ct........lhG......VPlpLshQaaFssahGl-sssouuVGlshp.sh.................................Dhpa......................................spa.........plPlolRlGPVFRl ..........................................................................shsspl.lSs+lG.ht.h.hG.ph.s.hh...........................sPhhG.lsluh.Ac.NGhhhthslDAuLTpLhFp.......upsLhGYuhR.G..........................................t..hhhs..hhsh....hspttt..............u......lslplthpahhsthhGlshshssuhsl.......................................s.th.............................tpa..........hhPhslplGPsFRh................................................................................... 0 11 11 11 +10728 PF10896 DUF2714 Protein of unknown function (DUF2714) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycoplasmataceae. 23.90 23.90 26.40 51.60 22.60 23.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.67 0.71 -4.39 7 55 2009-01-15 18:05:59 2008-07-29 09:39:24 3 1 52 0 16 37 0 145.60 37 85.27 CHANGED spYcclhps.s....FlsYppLhspsLLcss.shpSs.hpcahcphh.Aatp+h-lVFcsFVIoashN.+FShstLlPhlstsEsSNo-uhN.hpssNsppapphLsp.aNphhp.pLhppp+hVElhPslIlFhuppTcpLKllFscphlhs ...pYc-hhssss....hloY-pLhuoVLLcsplGFpSclYpcFhp+hphAhcp+h-IhFcsFlIoFNlNLKFSsshLlP.lLsspEsSso-AlN.Fpss..ps.paspFLts.aNphIp.pLlpps+hVEIhP.slIlF+Spp.ssoLKllFScchl.o...... 0 9 15 16 +10729 PF10897 DUF2713 Protein of unknown function (DUF2713) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 65.30 65.30 22.90 22.20 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.59 0.70 -5.12 3 258 2009-09-11 09:11:29 2008-07-29 09:40:13 3 1 235 0 1 98 0 209.60 84 88.19 CHANGED VFLIEQINDLKMWVNKYIDDCTDEDLNDRDFIASVVDRAIFHFAINSICNPGDNKDATPIERCTFDVETKNGLPSTVQLFYEESKDNEPLANIHFQAIGSGFLTFVNACQEHDDNSLKLFASLLISLSYSSAYoDLAG..+VNINEYNENYLKAQFEELSQRDMKKYLGEMKRLADGGEMNFDGYLDKMSHLVNEGTL-PDILSKMRDAAPKLIDFAKSFDPNSKEKIKILTDTSKLIYDLFGVKSEK ..............................MWVNcYIsDCTDE-LNDR-FIASVVDRAIFHFAINSICNPtDNKDAo.IEpCTFDVETKNsLPSTVQLFYEESKDNEPLANIHhQAIGSGFLTFVNACQEHDDNSLKLFASLLISLSYSSAYsDLut..pV.INE.NEsYLpAQFEcLSQRDMKKYLGEMKRLADGGEMNFDGYLDKMSHLVNEGpL-PDILSKMRDAAPpLIsFAKSFDPsSKEcIKILTDTSKLIYDLFGVKSEK............................. 0 1 1 1 +10730 PF10898 DUF2716 Protein of unknown function (DUF2716) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.00 21.00 28.90 24.40 19.10 18.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.17 8 75 2009-01-15 18:05:59 2008-07-29 09:40:36 3 1 68 0 20 62 0 133.90 45 81.17 CHANGED NWhtLScpEh-plW-+lYcca+F+..PuhStaPSh+sPsPalTaDlSshFsstuthss.....--lEpcsL+AFppsTtssEahhALDWQH-CYhlsP+lshp+D...EFs.EWhlPVaPNGDYYFFlpcDF+WGhLGHPWEpSITIFGc-LI ..............tWh.ho-pE.-plWsclasch.cFp..Pu..h..s..taP.uFchPs.PalT.a..clSph...h..sc.ss.hss........h--L-ccsLpsFpc.....sTss..sEahhALDWQH-CYhhsPHhph..p+s............EFs.EW.......l...PlF.PNGDYYFFlpcDFcWGhLGHPWEpoIolFGctLI........... 0 6 14 16 +10731 PF10899 DUF2743 Protein of unknown function (DUF2743) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 21.10 21.10 21.60 22.30 20.90 21.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.28 0.71 -4.92 9 148 2009-01-15 18:05:59 2008-07-29 09:41:27 3 2 147 0 18 78 0 120.60 50 33.97 CHANGED pphh..pYssEchpa.h.s+pp.......lp.hshPMsCFsDIPLpclp.Hsp.....tYGpauIuhcKchulppshsPVhY....h.pss.hhpslhphhptl.s.........................................ptpspptl.psLthhhhhlKsa-ss..............pcsFYsE+EWRhlssh....t.....hh.cpp.....otpph..pt.hphp....+FsssDIcaIhls ............sSWSYRNGQRTVYGD..................................................SPVVCFTDMPIAAYLE.T.GVRRL..ERNEKIGLYAIVLPKEQM.FN..YGARPVIY.......GLDpHN.hp...............................................................................................................................................................................................suphppucph.c.hs............................................................................... 0 8 11 15 +10733 PF10901 DUF2690 Protein of unknown function (DUF2690) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.70 21.70 21.70 23.80 21.10 21.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.63 0.72 -4.03 6 208 2009-01-15 18:05:59 2008-07-29 09:43:39 3 5 124 0 29 142 0 109.60 43 54.09 CHANGED pG+ShsGKsP.h.uCstsh.....s+hlsusshshuhVEL+aSsoCKTAWA+lT...s.s.s.cs.A.lhRsoDGKclss.uAGGsGch.....ssGpTssYTPMVhshDsRpuhAp ...............................tYDGKsPh.h..s.......oCDssuhs.....tcsphlsp......ss..........t...h......u.h.VELRaSssC+sAWAKlsl.....s.ssshss.A.....Ahl.......s+...ts..sGp.thos.sSsuGNGsl.........ppGQTS..sY..TsMVaDLss...................................................... 0 9 22 27 +10734 PF10902 DUF2693 Protein of unknown function (DUF2693) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 21.40 21.40 21.40 21.70 21.30 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.70 0.72 -4.07 6 77 2009-01-15 18:05:59 2008-07-29 09:44:26 3 1 68 0 3 63 41 71.70 32 60.54 CHANGED GpHpllFcKuDGolRsMhATRDshLl.spppGc.hh..sscsp..RKEscES....lsVYDlcscuWRSFplD+LISlsGhsltcLltl ............................phhapKsDGolRptpGThcsshhs..hppttp.........ts....p+cspss....lsaa.Dl-tpsWRoF+h-pLlsl...........h...... 0 0 3 3 +10735 PF10903 DUF2691 Protein of unknown function (DUF2691) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 26.50 26.50 24.40 21.50 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.99 0.71 -4.39 9 107 2009-01-15 18:05:59 2008-07-29 09:49:18 3 1 92 0 9 81 0 144.00 44 95.65 CHANGED pRGloF-IPNtYsphLhcILpsl-IssasW..l.ustEuYhltcGpL.scpLFscs.plhpGtsh+chlcss.YYlIFsDLKAaPpGchls-IpTYE-FhcScCElllLlsDusYVslYsKDpchlEhLYpNAhppuFppl-YITDENDsRTpLoV ...KRGIoh-I..Ps..p..Y..ssh.Lh..+lLKP.lcIs..s...a....sWh.l.sspEuYllhp.spL..sptLFsc-spl.h-Gp-L+cllK..sNhYYlI..FsDLKAaPKG....c..hl.....h-IpT.....YE..EFhcScCElVlLlsDupYlpIYsKspchIEhhYpNAhspGFh.V-YlTDENDsRTpLoV........... 0 2 6 7 +10736 PF10904 DUF2694 Protein of unknown function (DUF2694) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacterium spp. 20.90 20.90 20.90 21.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.25 0.72 -4.01 3 64 2009-01-15 18:05:59 2008-07-29 09:49:44 3 1 61 0 7 23 0 99.50 77 97.97 CHANGED MTDANPAFDTVHPSGHILVRSCRGGYMHSVALSEuAMETDAETLAEGILLTADVSCLKALLEVR-EIVAAGHTPSAEVPTsRDLDVAIEKLLAHQLRRRs+ ..............MTDANPAFDTVHPSGHILVRSCRGGYMHSVSLSEAAMETDAETLAEAILLTADVSCLKALLEVRNEIVAAGHTPS....AQVPTTDDLNVAIEKLLAHQLRRRNR............. 0 1 3 6 +10737 PF10905 DUF2695 Protein of unknown function (DUF2695) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 30.50 29.90 21.20 20.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.69 0.72 -4.24 6 82 2009-01-15 18:05:59 2008-07-29 09:50:16 3 1 77 0 8 46 3 53.30 52 46.29 CHANGED Llsa.lsc+Lss.sCDHTh+auppahpp+plsh...csllchLtcpGGaCDCEllhN ........LhDY.VDE+Lu.cp.sCDHThRaupcahps++l-h...EolhEtLpEhGGYCDCEIlhN.. 0 5 8 8 +10738 PF10906 DUF2697 Protein of unknown function (DUF2697) Pollington J, Finn RD anon PRODOM Family This is a eukaryotic family of proteins with unknown function. 22.80 22.80 23.40 22.80 22.70 22.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.30 0.72 -3.97 9 39 2009-01-15 18:05:59 2008-07-29 09:50:42 3 1 38 0 29 37 0 66.00 40 78.69 CHANGED EEWLYhKLlsSsuFppaVR+lasKlNtIp.pPh.cppss.sphh....Y+PTphpKFpAFRllaaDEhKpoF .....EEWLYh+LlsSPuFHRFVR+lapKVNtI+.s..scps.ss..phl......a+PTt.hpKhpAaRhLFhDEh+ssh........................... 0 3 14 25 +10739 PF10907 DUF2749 Protein of unknown function (DUF2749) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins appear to come from the Trb operon however currently no function is known. 22.00 22.00 22.30 22.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.31 0.72 -4.10 8 39 2009-01-15 18:05:59 2008-07-29 09:50:55 3 1 18 0 15 40 1 58.90 40 41.19 CHANGED MSptVlIALlls..VAuuuusATslIVpsc.......susssuhuEEQRssRE+FFGusp-.PPI+cGQEM+PRW .....................................................................s...tt.ps+REpFFuus.c..-IRGGQcMcPRW....... 0 2 6 11 +10740 PF10908 DUF2778 Protein of unknown function (DUF2778) Gunasekaran P, Mistry J anon Pfam-B_001575 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 24.10 24.10 24.20 24.90 24.00 24.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.82 0.71 -3.90 28 375 2009-01-15 18:05:59 2008-07-29 09:53:21 3 7 237 0 108 328 3 100.10 35 40.73 CHANGED tpl-AaSGhGphhDcPctsslc.hGshPPusYhlh.R-u..........................................hhcGVc..AhRLpPl....................G.pGtSpGClohcshscFhphhcthchp.ph..ssVPuh ...........t..h.AaSG..st.hhspPp..hss.t.tGshPstp.Y.lh...Rps..............................................................hh.....pGVp..shRLpPs...................G.hG.SpGClohp.....shs.....pF.phhthh.hp.ph..shlst....................................... 0 19 44 70 +10741 PF10909 DUF2682 Protein of unknown function (DUF2682) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 24.80 24.80 24.90 90.40 21.80 24.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.61 0.72 -3.70 7 13 2009-01-15 18:05:59 2008-07-29 09:54:05 3 1 13 0 0 12 0 82.20 44 87.91 CHANGED LhsVQcssLcLspEspsaLssssss..............pLEpcLhpLlhpsNsIsFD.....cppthp.LKsNlhsCINIhIDLIhIKphh LltlQssVLDlh+ElspaLNsssPsh.........cthshL--hLTKhLhcuNAIsFD.....cppsLchl+sNIshCLNhhIsLITIK+Yl. 0 0 0 0 +10742 PF10910 DUF2744 Protein of unknown function (DUF2744) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 25.00 25.00 26.00 25.70 24.50 18.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.42 0.71 -4.30 6 89 2009-01-15 18:05:59 2008-07-29 09:54:53 3 1 88 0 4 50 1 133.50 44 89.78 CHANGED ch.opEssDPscPEEtFlWsLpslPshsGssu......lhsssaL+thSKHLW-CGht.sD..............S.lPtQpl.KaQsPhRGppphhNssupWVshDsP-Pp.hRlpDstshTsQEppA.Lt.a+phGhl.st.s ....h.oQEcsD.ccPEEthAWAhh.s.LPtsu.G..ssu......lopPshhctWS+HLa-hGht.HsD.LcpLADEsGNIHVSpLPpQph.KaQsPhRGsRppa.NsA.ApWVspDsP-P..hRl.Ds+..pLTpQEppA.l.p..a+phGhI.s....................................................... 0 0 3 3 +10743 PF10911 DUF2717 Protein of unknown function (DUF2717) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as gene 6.5 protein however currently there is no known function. 25.00 25.00 29.70 29.30 19.90 18.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.68 0.72 -4.48 4 32 2009-01-15 18:05:59 2008-07-29 09:55:47 3 1 29 0 0 19 0 75.70 44 90.28 CHANGED MLpPIp+hlpNPsDIPslPRAstEYLQVRaNtuYlhtSGhlstLRtsGaSEuaIAGFlpGLphA...SpslDEhE.lRKEQ ...........MLpPIpphhppPpDlPslPRustEYLQVcFNsuYhhtSGhlsth+ts.GhSEuaIhGFltGLtYA...SpllD-h-..hR+-..... 0 0 0 0 +10744 PF10912 DUF2700 Protein of unknown function (DUF2700) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Caenorhabditis elegans. 21.80 21.80 22.00 24.40 21.70 21.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.80 0.71 -4.20 6 52 2009-01-15 18:05:59 2008-07-29 09:57:19 3 4 5 0 52 47 0 136.20 22 65.64 CHANGED lPARPLVssLulhGllRuhuphhhu.sshhpRlschhaLhLNLLLLFGAsKNstsALKWSQRlshhsVlLuVIpFMIaPVhFASasASG....h-pNtThh...clEplusKTp.Ec+..FVhGhLoGYslEFussLhIGlElLKYlLlNRLW .................................hR.hlhhhuhhGlltshh...hhhs...ssh..h.t...h.....hs.hh..h..lhl..phllLaGshppsthsL+hupplshhsl..lls.hl.hhlhPVhhuShhASG............hpts..tsh.........p.....h..........s..t....ppp..........Fh.Ghh..Gh.hEhhhh................hhlsh.hhpahhlp............................. 1 8 11 52 +10745 PF10913 DUF2706 Protein of unknown function (DUF2706) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Rickettsia spp. 25.00 25.00 107.70 107.60 17.90 16.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.10 0.72 -4.37 2 39 2009-01-15 18:05:59 2008-07-29 09:59:14 3 1 39 0 4 15 0 58.70 90 99.87 CHANGED MLK.hKhhllLIMLuQLLSCTPSAPYEIKSPCVus-IsDtuplshNPClRRPVNS.lsIs MLKSLKhLLVLIMLAQLLSCTPSAPYEIKSPCVSADIDDGSSlSVNPCIRRPVNS.VNIV. 0 1 1 1 +10746 PF10914 DUF2781 Protein of unknown function (DUF2781) Gunasekaran P, Mistry J anon Pfam-B_001738 (release 23.0) Family This is a eukaryotic family of uncharacterised proteins. Some of the proteins in this family are annotated as membrane proteins. 21.20 21.20 21.30 21.20 20.90 21.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.98 0.71 -4.43 42 369 2009-01-15 18:05:59 2008-07-29 09:59:44 3 7 206 0 245 335 1 142.00 24 59.60 CHANGED Dhlhhlaalhphshsh.....llDs..hhP....thh.t........htsltpaYlspapDhlh.....p.sPs.........ahpsh..................................hhhEhhaplPhhlhslY.................ulhps....sshhhshsLlauspsshophsplsphhh...............h.h..spctp..lltlYhPah.....llshlhslchhhpht ........................................................Dhhhhhahlhthshsl..................hhDh..h.hs..........lh.sh.........tpltphY.hpphpDhLhs.......sss...........ah+sh..................................hhhEhhapl.PhhhhuhY............................................ul..hts.........s.sah.shsllaustssp...sphspluthht...............hshthspc..tp..hlhsh.hsYh.....llPhllhhchhhp....................................... 0 57 107 171 +10747 PF10915 DUF2709 Protein of unknown function (DUF2709) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 23.00 23.00 25.00 386.50 21.20 22.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.39 0.70 -5.37 3 41 2009-01-15 18:05:59 2008-07-29 10:02:31 3 1 41 0 8 17 2 238.00 79 99.72 CHANGED sVIosslKchLLQFLK+pKsAELLsTYLFFLEpKaQLQPVLFlRDKlIYQSAEDsIspLEcEGKLWRETEIpIusG+PuVNEpTKKIYICPFTGKVFGDNTHPNPQDAIYDWVSKCPENKERIuGlRVKRFFVSEDPEVIKoYI...KsRR-PIpKlVYSSuITGKLFsS+pAVIEDFK+SYLKsloLVEVQsQNKFKIEDsFLoFIQDpL-E-KIApFVEoLA-asEFcsYVcpWVEsE- .MNISGSIKQKLLQFLcKQKuPELLATYLFYLEQuLpLsPVVFVRDKIIFKSsEDAIplLEtDKKIWRETEIQIoSGKPEVNEQTKRIYICPFTGKVFADNVYANPQDAIYDWLSSCPQNpERQSGVtVKRFLVSDDPEVI+sYI...VPPKEPIlKTVYASAlTGKLFHSLPsLlEDFcoSYLRPMTLEEVQNQNKFQLEoSFLoLLQDALEE-KIAEFVESLADDTAFHhYISQWVDTEE. 0 4 5 7 +10748 PF10916 DUF2712 Protein of unknown function (DUF2712) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillales. 21.70 21.70 21.80 22.50 19.40 21.60 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.93 0.71 -4.23 2 33 2009-09-10 14:58:50 2008-07-29 10:03:04 3 1 30 0 2 23 2 139.40 58 98.59 CHANGED hppFhppNhRhlhAhslGl.lhAss.a.KAussNhtFchhl..shuNuhSsstaRpTopssNPWKVpLppSsEGKGTIhoFWL.h.stNpphspuSpIhNVKQGuts+YhtA.p.us+shshLAsENNNYsupoYhlDGlWDEETW ..............hppFhp+.hRLlhALVIGlLVF.APshH.SKAA..DNsIGFDFKLKPNCANSGSoSRYRETSSVNNPWKVRLcsSTEGKGTIASFWLGTYNKNKsAspGSsIMNVKQGAKT..RY.CGAYKVANKNTTYLAAENNNYNSKTYYVDGIWDEETW.... 0 1 2 2 +10749 PF10917 DUF2708 Protein of unknown function (DUF2708) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Caenorhabditis elegans. 23.20 23.20 23.40 23.60 22.90 23.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.31 0.72 -4.16 6 26 2009-01-15 18:05:59 2008-07-29 10:04:11 3 2 5 0 26 18 0 43.70 55 53.18 CHANGED MNhYSlFVFAlLuISuVo......ssG++Ct.GGNG.YGuG....VlIGAtK ...MNVYSVFl.FAlLAISSVS.......tG++Ct..GssG.YGuG.......s.....IlIGAtK....... 0 8 13 26 +10750 PF10918 DUF2718 Protein of unknown function (DUF2718) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 25.00 25.00 42.80 42.30 19.40 19.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.01 0.71 -4.11 4 13 2009-01-15 18:05:59 2008-07-29 10:04:30 3 1 12 0 0 8 0 124.50 54 96.89 CHANGED MLCIFYLuRLCNLIIYSLYSLLMYPMpKLISFMFGELNPFc-VLPcscKKDD-ssl.........sIhPsEssslPpphP.plp-pt-pssls.pNs..................NGVFDFMKhPNPFKRY..YEYs.sYs.ppspspPp+Vp...cKuFlE+MlEMVE MLCIFYlARLCNLIIYSlYSLLMaPMpKLISFMFGpLNPFsss.s.spKhpDshss..........htPh-sppIsp-hPLslh-ptppsshs..ss..................NGVFDFhKIPNPFK+Y..YEh....s.pNshKpPs.......KGhh-.MMphlE..... 0 0 0 0 +10752 PF10920 DUF2705 Protein of unknown function (DUF2705) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 46.40 45.90 24.50 24.30 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.79 0.70 -5.16 2 30 2009-01-15 18:05:59 2008-07-29 10:07:08 3 1 28 0 4 15 3 206.10 66 99.79 CHANGED M+.pthhIhlVhhlF.tul...uas.shpshPhLDGhPluhus.h.hp.lLhWalPIluhSFhhSGsI+Dhh.SYt.LplsRpap+.hWlhpQFLhlhlhlllFT..QlAlhaIao.hShas.......st.FlhhhLhY.lhLhslFShQhhhELahcuQhA.L.IssYlIhSllhAchlhQ.sosph.aYhLlPNYu.GhRTGL..aopouThlIps.huLhIllll.lslhIlulhKFKphDhL ........MKNNKLIILVVIC..LFLQAILFMAFDFPFKTLPILDGFPVGLATPVVTRLLLYWYLPIIAFSFYISGNlKDLLSSYGFLQISRNaKKEYWLMKQFLKLhI+VILFTSLQLALIFIFTPYShas.......Tu.FlYLILGY.lMLFTIFSLQYLLELFIDAQKALLLINGYVIISILMADLIYQNTTVTWPYYLLLPNYGMGFRTGLI.FoNTSTlLIShPTSLIILLVVLLCVFIlAIKKFKTTDIL............ 0 2 4 4 +10753 PF10921 DUF2710 Protein of unknown function (DUF2710) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacteriaceae. 26.20 26.20 26.20 27.40 23.60 26.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.26 0.72 -3.91 2 57 2009-01-15 18:05:59 2008-07-29 10:08:06 3 1 56 0 5 17 0 107.90 88 90.74 CHANGED MVStssuRuE...LSD+DLVESVLR-LSEAADKWEALVsQAEsVTYSVDLGsV+AVANSDGRLLcLTLHPGVMTGYuHuELADRlNlAlsALR-EsEAENcARYGG.LQ ......MVSGSDSRSEPSQLSDRDLVESVLRDLSEAADKWEALVTQAETVTYSV..DLGDVRAVANSDGRLLELTLHPGVMTGYAHGELADRVNLAITALRDEVEAENRARYGGRLQ.... 0 1 2 4 +10754 PF10922 DUF2745 Protein of unknown function (DUF2745) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 21.40 21.40 21.90 21.70 21.00 19.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.73 0.72 -4.07 4 21 2009-09-10 19:09:16 2008-07-29 10:09:42 3 1 18 0 0 15 0 87.50 50 98.76 CHANGED MGRLYSGNLNAFKAAssRL.p.hDlsVhh--a.-phspppChp....LRlEDRuGpllsopTFpH+DEDVLaNhsTsWLN+hasQLKcWK ..MGRLYSGNLssFKsAssRLhc.hDl...sVhhEsap.ppsu+tChp....LRl.sRuGpll.sop..TFp..HpDEDVhhNhpT-WLp+haspLKcWK. 0 0 0 0 +10755 PF10923 DUF2791 P-loop Domain of unknown function (DUF2791) Gunasekaran P, Mistry J anon Pfam-B_001611 (release 23.0) Domain This is a family of proteins found in archaea and bacteria. This domain contains a P-loop motif suggesting it binds to a nucleotide such as ATP. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.91 0.70 -5.87 28 286 2012-10-05 12:31:09 2008-07-29 10:10:10 3 3 243 0 96 309 63 370.90 34 90.05 CHANGED Ips+cRssllsuL+uGVVP+hGLchIsVGRspElpALhp-l-plusGGuuFRhllGEYGuGKTFFlphlRshAhc+saVsucsDLoP.-+RLHustGpuculYpcLhcsLoT+T+PcGsALpsIl-+Wlsslpc-shspu.............sssEplIcp+Lutls-hstGa-FAplLcuYa+uptpG-Epl+susl+WL+GEass+s-A+psLGV+slIDDsuhY-aLKhluthl+puGasGLllslDElVNLhKlpsstsRppNYEplLpllNDshQG+s.sLGhlhuGTPpFl.DsRRGlaSYtALpSRLupstas.psuhp-hpuPhIRLssLo.E-lhlLhp+lRclaAtststp....thlsDptlpsFhcpssuRlGuphhlTPRphl+cFl.slLsll-QNPshchpclhstst..hss- ...............................................................................................l..+.tpsllpuL+uGV.VP+h.GlthltVGRptElpuLhp.D.l..-.h.l.s.c.G..Gu.u.FRhllGcYGoGKoFhLphlRphAhc+shVs....scsDLoP...-R.RLpustspu....hshYpELhpNluT+...s...cP-G.s.......ALt.ll-+alsphppp..shtps..............................ttscphIhphl.spl..s-..h.st.Ga-FA.pllptYhc....u.....h.ps......c-ph+.st....sl+WlRGEhss+o-A+.....p...tLG...V.c.t.lIsD..s..sh.Y-hLKhhutFl+tuGYsGLllhlDEhV.N.l.a.K.......ls.......s..s.sRptNY..EplLphhN..DshQGp.spt.LshlhuGTPphl.......DtR+G.laSYpALps......RLupsp.......hs.....p.....sshh-h.uPsl+.L..t..sL.....o..sE-..hh.hLhp+ltclaAtt.s.p........hhls-pt.l.ttFhpttht+lGsth.hhTPRphl+sFl.plLsll.Qssshshpplhtt..h..t.......................................................................... 0 40 67 83 +10756 PF10924 DUF2711 Protein of unknown function (DUF2711) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as ywbB however currently there is no known function. 21.20 21.20 23.20 26.20 20.20 20.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.47 0.70 -4.86 6 65 2009-09-10 23:06:20 2008-07-29 10:12:01 3 1 63 0 5 62 1 198.10 52 90.20 CHANGED -cSPILtQLPpsFpSAAlLLHPFlpMPhGWEsuhRKpsYEHI.YPSsEEIlphG+sVSWpcVMossGLpShsELAlAlhTuIsA..LR-EYtRcDLAc+LasslctDLYYPoEDhTSlFLltsLLKVLGSKGAcslYYuEPIh-scGshpls-ssPhslhsLssuElIITDEphDaAFMSlaDSFoTLFLAK-psIccIlpuhshEAlICscsThIsWYh ............-cSPILpQlPtpapSAAILhpPFlQMP.GWEcuhRKpPYEHI.YPSsEEIIppGKuVSWKchMShoGL+SaA-LAhAh.hTSIuA..hp-EYpRcDLAE.+LasN..l.+.p.D.LYYPoED.aTShFLlppLLKlLGSKGucplYaS.EPIh-ssGlLplssToshD.lhDlu.ss.ELIITsEcp-aAFMSlYDSFsTLlLAK-cNIc.lVpuMNhEAlICDccThIsWY............................................................... 1 1 4 4 +10757 PF10925 DUF2680 Protein of unknown function (DUF2680) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as yckD however currently no function is known. 23.90 23.90 24.10 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.76 0.72 -4.10 14 166 2009-01-15 18:05:59 2008-07-29 10:13:36 3 4 89 0 53 148 1 60.30 27 44.87 CHANGED LT-pQKp-IpsLtpQlh-lpKpllsKaVchGllTp-Qu-pIKppIDpthphh....cpN...........GFhP ..................tplpshppphhph+KphlsKhVctGhlTp-QA-pI+ppl-pphphh....pps...........sh........................... 0 30 43 50 +10758 PF10926 DUF2800 Protein of unknown function (DUF2800) Gunasekaran P, Mistry J anon Pfam-B_001630 (release 23.0) Family This is a family of uncharacterised proteins found in bacteria and viruses. Some members of this family are annotated as being Phi APSE P51-like proteins. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.37 0.70 -5.34 27 496 2012-10-11 20:44:46 2008-07-29 10:43:57 3 3 403 0 62 832 284 331.50 38 92.63 CHANGED HAhLSsSuu+RWLpCPsSst.LppphsDpsSshAp.EGTsAHtluEhtL.............pptl.cshshhs..........hs.....................tt.hhs-EMt-hsptYl-hVhEphp.h.....sshlhlEQRlchuphlP-......uFGTuDslllu..ss...hLpIl.DhKYG+G.V.VsA...................ppNsQhhlYALGAlc.ashlYD.hcpVphsIhQPRh...cs.hSsap..lssp....-LhpWupphlcstAphA..............tspucapsGcaC.pFC+s+.ssCpARActshpls.ph-Fpsssh............................Losp-lup....lLsphs.lcpWsc-lcshAhpphhs.Gcph...sG..aKlVpGR.usR+as.D-csltpsLpstGhtpp.lYp.pcLlo.TphEKhh.....sc...........cpascll.tshlh+ssGKssLsspoD+RPul ....................................................................................................................HAhLSASuu++WLpCP..s...S........hp....h..p...p..t.h...s..-..p...s....S.....aA..t..EGThAHt.L...uEh..hl...................ptth...pt..st.h...............hs....................................................................p.thhs.c..E......M......t.....c......h..s.....c..t...Y.....s...s..h...V....-hh...pth........................sshhh..l...E...p....+..lDh..uc.aVsp....................uFGTuDslI.lu..........ss.......hLpIl..DLKYGpG...l....V..s..A...................t.pN..s...Q.hh..LY....A.L......G.....A.h.....c.....h....a....s..h.....l......Y.......D.......hcp........Vp......h....T.....I......h.....QP..Rh..............ss...hS.s.h.p...ls..hp.........c..L.h..p.W...upp..hlc...s..tAphA.................................htGpG..ca....ps......G...p...a..C..pFC+h+...sp...C+sRAch.hpls...p...ph.psPth......................................................Losp-luc.....lL.phstlppWsp-....lcpaA....h....s....pAhp....Gc.ph...sG....aK..L.V..EG..R.SpRtas...Dppsshp.h.l.h.p..sGa...cs...hhc....pcLlolTphEKLl......GK...........+tFsclh..tshI.KPpG..K.oLssp.oD+RPsh................................................................................................. 0 28 52 60 +10759 PF10927 DUF2738 Protein of unknown function (DUF2738) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 20.30 20.30 20.50 20.50 18.70 19.90 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.68 0.70 -5.32 6 39 2009-01-15 18:05:59 2008-07-29 10:44:37 3 2 27 0 12 40 35 219.30 26 63.81 CHANGED ssshphp+hhFsKs.hascltsuH......RINl+Y-ccsshs..PLsh+TslLFSFGlpp.sshQDpsc.sYShsLsha-tp.GPospEptahshLpplts+s+cHL+c.sl++shs.....Kh.hssLhssMsshYhKhp-sls..........sPs+uPsLYPKllhut+ss.phsT..hFaKcscGpslpIs....llpp+C+Vlssltl-SIFlGsKsSlQlKlh-VllsE.........sls.p++plhhsphPss.ppE.p+csssssE--Lspp...tp.Ecl ..............................................s...................tss+......hlslpYcpp...t..PLhl..pss..hha.S.FGlps.s.....ohpsuss..saShsLhh..h.shp...s.sspEs.sFhpslctIts+s+cal.hc.s.+csht..........Kh......p.cslh-hh....p.....h.hhpp.t-sh...........sPscsPthYs...KLlhsppss.chhT...hhhp.sctptlch........hltt+spshsslth-Sl.a.h..s..sK.uhQhKL.psll..hE..........ht.............................................pt........................................... 0 12 12 12 +10760 PF10928 DUF2810 Protein of unknown function (DUF2810) Gunasekaran P, Mistry J anon Pfam-B_001682 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 32.00 38.80 18.40 16.90 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.35 0.72 -4.23 12 654 2009-01-15 18:05:59 2008-07-29 10:46:18 3 2 649 1 55 192 7 53.90 79 45.01 CHANGED plpsLsFsRslTKAEQADMGKLKKSV+GLVVVHPMTALGREhGlcpVTGaAP+t ......KLMcMPFpRAITKKEQADMGKLKKSVRGLVVVHPMTALGREMGLpEMTGFuKo.s. 0 3 12 34 +10761 PF10929 DUF2811 Protein of unknown function (DUF2811) Gunasekaran P, Mistry J anon Pfam-B_001693 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 20.90 20.90 21.60 24.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.90 0.72 -3.96 31 108 2009-01-15 18:05:59 2008-07-29 10:46:54 3 2 64 0 50 111 199 58.20 47 69.16 CHANGED ShpsElPEsLtpuMppFI-sHPsWDQYRlhpAALAsFLlQN.G........sssRsloRhYlssLF ..Sl.sElPEsLapuhpsal-sHPsWDQ.RlhsAALutFLlQN.G........ss..sRslsRlYLssLF.......... 0 6 29 46 +10762 PF10930 DUF2737 Protein of unknown function (DUF2737) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 46.10 47.40 16.30 14.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.85 0.72 -4.36 5 174 2009-09-11 15:20:40 2008-07-29 10:47:25 3 1 162 0 1 62 0 54.20 81 99.29 CHANGED M..RGLSYNPuILPoEhI...IR++hKPMPo..REELLKRNSFPSVNcNKYLNAMLRKs..KK ...MRGLAYNPGILPAEMI...IRQRsKPMPS..REELLKRpSFPSVNpNKYLNAMhRSG.KK.... 0 0 0 1 +10763 PF10931 DUF2735 Protein of unknown function (DUF2735) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as glutamine synthetase translation inhibitor however this function can not be confirmed. 25.00 25.00 31.20 29.30 23.40 21.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.53 0.72 -3.72 11 50 2009-01-15 18:05:59 2008-07-29 10:47:42 3 1 50 0 27 53 0 51.00 37 76.29 CHANGED SA+IYQFPl..GGRsGhsp+p...tth.spttssssssslssuSWYH--AlpE-ps ............SApIYQFPl..uuRtuhsppc...pst.spptuss.hssssssuuWYH--AlpEpp.. 0 4 10 15 +10764 PF10932 DUF2783 Protein of unknown function (DUF2783) Gunasekaran P, Mistry J anon Pfam-B_001590 (release 23.0) Family This is a bacterial family of uncharacterised protein. 19.80 19.80 20.00 22.60 19.60 19.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.81 0.72 -4.48 27 179 2009-01-15 18:05:59 2008-07-29 10:48:18 3 1 165 0 68 159 43 60.50 54 83.47 CHANGED LspsPNlssP...........DsFYptLIssHcsLo--pSptlNARLILLLANHIGDhsVLpcAlphA+pu ..................LsTpsNl.scP...........DsFYEALI-sHRsLo-ppSphlNA+LlLLLANHIGDhsVL+EAlshARp.s.. 0 10 32 49 +10765 PF10933 DUF2827 Protein of unknown function (DUF2827) Gunasekaran P, Mistry J anon Pfam-B_001771 (release 23.0) Family This is a family of uncharacterised proteins found in Burkholderia. 25.00 25.00 25.40 25.30 21.20 23.60 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.19 0.70 -5.98 9 211 2009-01-15 18:05:59 2008-07-29 10:49:20 3 4 68 0 56 172 41 347.10 44 93.20 CHANGED plGIolhl.RsssQSlW.NGItQNslaLshLLppSPhltcVslVNssDs.shssuL.hsthssslhshs-stcphDVlIEhuuQlss-ahchh+tRGsKlVohpsGs-YVhshEshhFs+s..shlFpsssYDplWslPpht+oshsaLpolhRuPVphVPalWcPhFl-pptpsL.ttG.tFGY+P.......G+.sthRlolhEPNIsVVKsslhPMLlu-pAYRtpP-hlcal+VsNoh+h.+-pPpFVthApuLDlVRcG+soF-uRashspFhApasDsVVSHpWENs.NYlYaDlLaGGYPLVHNSshLsDsGYYYPD.DspsGApsLh+AhpcHDsch-sYpp+uRshLtslsstsstNlutYsstLs ...................................lGIolhl.Rs.spulWtNGIpQNslaLshLLcpSshltcVhhVN.s.G..ss....sh..ssuL.hst.hsls.lhshs-s.hc.plDVlIEhuuQlss-ahpth+s+GsKlVshpsGp-YlhshEshlFs+ss..uplFsussaDtVWslPphtposhsaLpolhRuPVplVPalWsPhFl-+tttpL....t.tGhpFGYcP..........G+...tthRlohhEPNlsVVKoshhPMLls-EAYRtpPDhlpalaVsNoh+h.K-cssFVpF.s.p.sLDlV+puhsoF-sRashspFhApa.s.DsVVSHpWENs.NY.hYaDlLYGGYPLVHNSshLs..c..sGYYYPDFDstsGu+sLhcA.hccHDsph-sYpp+ucclLcplo.tNstNlstYsptl.h.......... 0 14 17 36 +10766 PF10934 DUF2634 Protein of unknown function (DUF2634) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as phage related, xkdS however currently there is no known function. 31.80 31.80 32.00 32.00 31.60 31.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.25 0.71 -4.30 26 250 2009-09-11 16:39:40 2008-07-29 10:53:59 3 2 179 0 57 234 7 109.00 29 77.38 CHANGED oYplcFppschhsp.......l-slEAl+QhlhhsLpT-Raca..IY....SpsYG.sElpsLIGp..hspshhcsElpRhlpEALlhcs+IpsVcsFph.phpsspl..plsFsVhTh.Gph ...................pathDhcps..chhtp..........l-sh.EAl+QhlhKsLpTcRac......a..IY........opsYG.sElp.c.LlGp...spshhcsElpRhIcEALhh-s..RIpsVssFph...ph.p...t...c.sl..plsFsVpohhGp............................... 0 27 49 51 +10767 PF10935 DUF2637 Protein of unknown function (DUF2637) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 22.80 22.80 23.10 22.90 22.60 22.60 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.29 35 337 2009-01-15 18:05:59 2008-07-29 10:56:07 3 7 169 0 121 387 2 142.20 20 43.45 CHANGED tsshshshslussuaslS.F..suL+clAhttu.hsst..hualhPlslDusllsuohhhlhh...tpssttt+hhsashhshu..........................................suhSlsuslhashhssts..pt...t.t.............hlthhhhhl....sPlthhssh+h......h.h.......hhtpshss ..................h.hshssshsluslAhslS..a.......suLpslAhp...tG...hssh..............huahhPlslDuslhssshhhlhh....ht....h..s.h...h...hh....phh.sWhh...h....ss..u..................................................sshs.lsusl..h.ashhhtth..ph......................hh.hhhh............ssh.hhh.h.....h....................t.t.......................................................................................................................................................................... 0 33 90 114 +10768 PF10936 DUF2617 Protein of unknown function DUF2617 Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 19.50 19.50 19.70 19.80 19.30 19.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.05 0.71 -4.68 15 185 2009-01-15 18:05:59 2008-07-29 10:57:39 3 1 180 0 42 119 0 142.50 39 95.31 CHANGED hhppLsss.sDsuAusLuluLsusssssLAstclsh..ssuslpLsVLGASHtVslptss.uphsEpVuChssss....tsLPs.....psp.usGYchpopscslstucFpspAssLpscssccssuLsGsFPG-.stAlTALsAcss......ss..uhpW+TWHlY...Pps..GplVsT+S+hc...P ..............................hhpLths.sDspus.LuhsL.....s.t.s...sLAshcl...th.........sus..........tlpLsVLGASH.Vslct.........t...up...hsEpVuChstss.....ssLPt..................shp.s.t....Ychtuc.s...cshs.........ts.sFtthApcLht.......hs..s..c.....t.....st....hLsGsF.PGs....st...AlTAl...hup.p...............ts....thpW+TWHhYPpt...........GplVtTpoph...... 0 12 29 39 +10769 PF10937 DUF2638 Protein of unknown function (DUF2638) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 22.00 22.00 22.10 22.00 21.00 21.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.73 0.71 -3.05 16 190 2009-01-15 18:05:59 2008-07-29 10:58:45 3 5 164 0 134 177 0 92.30 28 77.69 CHANGED sPhI+FlGpRp..sppsspsstsHPsossshlP.........suhuuappphppasPhp......pss..tu..ustsuuuhusl......pPhp.GEhhshs-LPsRF+.ppPhsEAElEuIpoG.GA ...........................................................................I+FhG....................t..tts...sHPhsss.t.hs.................t.h..h.....h.t...P................t................tth.us.ps.uss.sss....................pshp..sphhsht-LPtRF+...R...ps...h..sptElEsIpoGGs.......... 0 35 67 108 +10770 PF10938 YfdX YfdX protein Bateman A anon Bateman A Family YfdX is a protein found in Proteobacteria of unknown function. The protein coding for this gene is regulated by EvgA in E. coli [1]. 22.60 22.60 23.00 23.40 22.50 22.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.98 0.71 -4.56 23 564 2009-01-15 18:05:59 2008-07-29 11:02:18 3 2 501 4 62 218 7 153.00 55 68.75 CHANGED pAlpclppAttAlps.GpscpAhptlpcApuplchhtsc.sphs...t...................................hlPVsspltlh-t.hssssptcsulppApptlppGchptAhchLchhss-lshshshlPLsph.sulppAtpLLcpsKhpEAstsLptAh.solVlsp.shs .................................AMRDlQhARhALFc.GDs-..KAKcLss-AouLLsDDST-WsKFAKssKKsslssD...........................pYIsINuSlsluEs.Y....ls..TPEKcAAIchANEKMs+GDKKGAhEpL+LAGVuVhENQhLhPLcQTRsAlscAp+LLDcppYYEAsLALKuAc.DGIIVDSpu..l...................... 1 20 43 52 +10771 PF10939 DUF2631 Protein of unknown function (DUF2631) Pollington J, Finn RD anon PRODOM Family This is s bacterial family of proteins with unknown function. 25.00 25.00 28.30 27.90 18.00 16.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.25 0.72 -4.13 13 179 2009-01-15 18:05:59 2008-07-29 11:51:58 3 1 177 0 44 115 0 66.90 47 58.98 CHANGED hAuTElEhasG.......VDst-VPSAsWGWSphshRshpIuGlluuhFLLsMlhGNHpG........+VEDhaLIGF.AA ...................tsptsEhasG.................V-.T......t-sPSAAWG..Wpp.....lspRshpIsGhhssh.FLLuM.l.hGNHpG........HVEslaLlsFAs......... 0 11 33 41 +10772 PF10940 DUF2618 Protein of unknown function (DUF2618) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. The sequences within the family are highly conserved. 25.00 25.00 35.90 35.50 18.40 16.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.16 0.72 -4.18 3 84 2009-01-15 18:05:59 2008-07-29 11:52:30 3 1 84 0 9 19 0 34.40 68 89.18 CHANGED KGRSIMAHIRRTRHIMMPSYRSCFSYSlFsSQ.SoSHhAL .......ts+IMAHIRRTRHIMMPSHRshFDaSFFst.........h..... 0 2 2 5 +10773 PF10941 DUF2620 Protein of unknown function DUF2620 Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 25.00 25.00 35.00 43.10 23.90 23.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.41 0.71 -4.06 12 324 2009-01-15 18:05:59 2008-07-29 11:54:02 3 1 316 0 27 96 0 113.90 80 97.87 CHANGED h+IsIGGtlpKcpIcchlcch.ussphpshlhuDh-AAMtVKsGphDYYlGACpTGuGGALAMAIAllGhs+ssTIuhPut.spcEcIcphVspGKhAFGFTs-HhEpAlshLlshLh ...KKIGVAG.LQREQIKKTIEAT.APGsF.E.VFIHNDMEAAMKVKSGQLDYYIGACNTGAGAALS.......IAIAVIGYNKSCTIAKPG.IKAKDEHIAKMIAEGK.VAFGLSVEHVEHAIPMLINHLK..... 0 7 12 21 +10774 PF10942 DUF2619 Protein of unknown function (DUF2619) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 55.20 55.00 19.60 17.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.91 0.72 -4.03 15 172 2009-01-15 18:05:59 2008-07-29 11:56:57 3 1 170 0 42 92 0 69.00 58 71.95 CHANGED RlLSusIEloAAlLML+hNDlcKAltINulLAlVGPsIhIlohoIGLhulAu+lShsKLlhIhhGVsLI RLhSGShEIhAALLMLhlNDs+KALhINuhL.AhVGPTVLIlTMTIGIsulA.uclSahKLhalslGlsCI. 0 19 34 37 +10775 PF10943 DUF2632 Protein of unknown function (DUF2632) Pollington J, Finn RD anon PRODOM Family This is a family of membrane proteins with unknown function. 25.00 25.00 290.30 290.10 20.80 20.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.52 0.70 -4.85 2 43 2009-01-15 18:05:59 2008-07-29 11:58:42 3 1 4 0 0 24 0 180.40 93 100.00 CHANGED MF-TNaWPFPDQAPsPFpAQl-pLouTENVYIFLTTLFGILQLVYVhFKLLCTMFPoLHFSPIWRGLENFWLFLSLsSLAIAYWWLPSMTFTGYWALTlIATILVhlhLIMMFVKFlNFVKLFYRTGSFAIAIRGPIVLVALDVTIKLHCTPFAILVKElGsIFYLSEYCNKPLsAAQIAAL+ICVNGQWFAYTRSSTTSAA+VAAANSTAKYHLFlLQGVA-YTQLSSVKFE ......................EQLSSTENVYIFLTTLFGILQLVYVhFKLLCTMFPsLHaSPIWRGLENFWLFLSLsSLAIAYWWLPSMTFTGYWALTlIATILVLlMLIMMFVKFlsFVpLFYRTGSFAIAIRGPIVLVALDVTIKLHCTPFAILVKEVGNIFYLSEYCNKPLsAAQlAAL+ICV................................................. 0 0 0 0 +10776 PF10944 DUF2630 Protein of unknown function (DUF2630) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins have no known function. 21.10 21.10 21.20 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.86 0.72 -3.88 14 143 2009-01-15 18:05:59 2008-07-29 11:59:06 3 2 141 0 52 97 1 79.40 54 94.45 CHANGED ssDp-ILsplpcLVsEE+cLRpphppGcIDpupE+p.RLpplEspLDQCWDLLRQRRAhRsuGpDPD-ApsRPsspVEsYhp .....s.sDp-hLu+Ic-LVAEE+tLRuphppGtIspsEE.pp.RLRclElcLDQCWDLLRQRRAhRpsGsDPc-AtVRPsspVEGYp........ 0 15 37 49 +10777 PF10945 DUF2629 Protein of unknown function (DUF2629) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as yhjR however currently no function is known. 20.80 20.80 20.90 22.50 18.90 20.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.32 0.72 -4.61 7 472 2009-01-15 18:05:59 2008-07-29 11:59:37 3 1 469 0 27 83 0 43.90 84 69.07 CHANGED phQsDhhALSQAFSLP-lsYtDISppEpLstAltRWPLLAEhAc ...............h.FQNDIlALKQAFSLP-IDYADISQREQLAAALKRWPLLAEFAQ.. 1 3 6 17 +10778 PF10946 DUF2625 Protein of unknown function DUF2625 Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as ybfG however currently no function is known. 19.90 19.90 20.50 20.10 19.70 19.20 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.29 0.70 -5.39 12 157 2009-01-15 18:05:59 2008-07-29 12:00:21 3 1 150 0 24 112 2 191.50 47 90.87 CHANGED Lc-Llshc-sAWsllp-WhspApN+hElLss.sspAppsLhsLQVoTRSshGAllYpoGGllIDtGWLRlLGSGp.+LsRslssWNtG+.tht..hptssaLLlADDslGGaFAlNGGuLG.....-csGplYYaAPDoLpWEsLplGYS-FLtWslsscLscFYcsLRWpsWps-VtpLsucps.asFhPhLWspps...slctpS+csVslpEha ........hppLls.-cSAW.llppWlpsApN+sElLsss.ps.tA.c.psLhphQloT+SPMGAllYpoGGlLIDpGWLRIhG..SGp.+.LP...Rshhs...WN.t....pFs......tsphLllADDVhGGhFAlNG.....GsLG.....ccsGplYYauPDoLpWEsLpluYSEFLtWALsGDL-sFYpslRWpsWp-DVtpLsusps.asFhP.La.ppt.....-tpp+p.lslppha.................... 1 7 15 23 +10779 PF10947 DUF2628 Protein of unknown function (DUF2628) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as yigF however currently no function is known. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.68 0.72 -4.25 71 909 2009-01-15 18:05:59 2008-07-29 12:40:52 3 16 750 0 179 662 139 105.40 19 55.47 CHANGED hshashhp....sts..............ttts................ththlp.pG..........asa..hAahF..uhh.........ahhhp+hW....hhs.hshlslslshslhhhhh............s.shhh..................hltlhl....slhhGhpusth...hhhcht+ ...........................................................................................pt.....................ththhc..u...................asa..hAhhF..shh.........ahhh++h..W............hhu...hhhhhl.sl.shs.l..lshhht.......................hhh..............................................................hltlhl....tlhhuh.uNth....ahpph.................................................... 0 41 96 128 +10780 PF10948 DUF2635 Protein of unknown function (DUF2635) Pollington J, Finn RD anon PRODOM Family This is a family of phage proteins with unknown function. 20.10 20.10 20.40 20.10 19.70 19.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.29 0.72 -4.66 20 259 2009-01-15 18:05:59 2008-07-29 12:43:16 3 1 235 0 33 149 2 46.20 45 73.56 CHANGED lKPAp.G....hsV+DPptGchLss-GcpVscsu.aWhRRLpDGDVlhspsps .........................lKPst..G......+sV.DP....sp........uchL..PsEGc...pVscss.aWhRRhssGDlhplspp.s.... 0 4 11 25 +10781 PF10949 DUF2777 Protein of unknown function (DUF2777) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus cereus. 25.00 25.00 69.00 68.20 22.20 18.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -10.88 0.71 -4.61 10 135 2009-01-15 18:05:59 2008-07-29 12:51:55 3 1 134 0 21 80 0 182.20 56 96.18 CHANGED Ms....pR..pphLhppsRpashGTl.hIc-pasFh--EEs-thLlEslspsslElhcsscWppuphh.-pshlppusEphsLpsGEpIRhcKpLthuhhpLLs-LsDssFhtFlspLpSLGaSlYDClaCaNtLsF.......spppsspGVNFlpFsN--tlCulQHHasRtpsps......DRFEaTtusGcRhlhpps ..............M...hQR..KHILYNQPRAHTlGNVEYINNEWlFF.DDEN-EAFLLE-IAEDGFEILYNNNWLPARFY.EQslL.QIAsEQHpLQNGEMIRIRKKLLLSYpEWLEELPDSlFsLLTEsLQSL+YSLYDChYCHNaLSFL......PcEEssEGVNlLLFDN-EMICoLQHHFVR+soSNK.....NhFcFTKsNGEcLHI-u.T... 0 4 13 15 +10782 PF10950 DUF2775 Protein of unknown function (DUF2775) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 22.50 22.50 25.40 22.70 21.80 21.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.75 0.72 -3.83 8 136 2009-01-15 18:05:59 2008-07-29 12:54:08 3 9 11 0 35 126 0 76.40 38 100.80 CHANGED pDlt.YW+hlMKDpDMPEtIpuLLs.p......phcs.pph..t..........pctulc-FEPRPpho..sYssN-lchpEKKpslc-FEP+PNlSAYGDN-IctpEK.KsslpDFEPRPNlSAYs ...................................................................................pph-sh....sslo...tYs-spl.cspp.ppp..hscDFEPRPNlotY.sDt..hchcpp.pshs............cDFEPRPslotYs.................. 0 0 31 35 +10783 PF10951 DUF2776 Protein of unknown function (DUF2776) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.90 20.90 21.10 83.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.46 0.70 -5.39 7 355 2009-01-15 18:05:59 2008-07-29 12:55:53 3 1 344 0 11 142 0 329.40 81 98.46 CHANGED MNYGISlLFRAIPLlMulhChuYGuFlhp.Gss..usthVAG.VlhSLuhICIALFsTAATIIRQlI+sYNphhpahLPllGYLuAslThlhGhsh.hspusssusFVAGHVlhGVGhIsACVATsAsuSTRFoLIPtNucss.ssthPtsAFopttuhhLIslsllholluaIWuahLLuco-ppss.aVAGHVhhGLAsICsSLIALVATIsRQlRNsaoc+ERhhWsthVllMGSIolLhGlaVL.usosssphusGYIhIGLGLVCYSISSKVlLLAtlWR+chKLANRIPLIPVhTALACLFLuAFLhEhusscssYFIPARVLsGLGAICFTLFSIVSILESGTS ......MNIYIGWLFKLIPLlMGLICIALGGFVLESSGQ..SEYFVAGHVLISLAAICLALFTTAFIIISQLTRGVNTFYNTLFPIIGYAGSIITMIWGWALLAGNDVMADEFVAGHVIFGVGMIAACVSTVAASSGHFLLIPKNAAGSKSDGTPVQAYSSLIGNCLIAVPVLLTLLGFIWSITLLRSADITPH.YVAGHVLLGLTAICACLIGLVATIVHQTRNTFSoKEHWLWCYWVIhLGSITVLQGIYVLVSSDASARLAPGIILICLGMICYSIFSKVWLLALVWRRTCSLANRIPMIPVFTCLFCLFLASFLAEMAQTDMGYFIPSRVLVGLGAVCFTLFSIVSILEAGSA......... 2 5 7 9 +10784 PF10952 DUF2753 Protein of unknown function (DUF2753) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 27.10 27.10 27.10 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.88 0.71 -4.19 7 99 2009-09-11 03:22:03 2008-07-29 13:02:31 3 2 99 0 15 59 2 134.70 69 95.28 CHANGED WE+HTLLA-pAhppss.htoIlHYQ.ALu.Sppl...psspt-L-DhlslpVlSCHNLApFWRttGDscYELKYLQLASEpVhsLlPQCPpppC-uFlsoLGCCpuALl-FhKRHPNPtIA+plpcIsosspCELIApF+Lp ....................................WE+HTLLADhAhQ-sDHLRSILHYQQALTlSppl..sEu-EI-hEDRhhISVISCHNhApFWRshGDspYELKYLQLASE+VLTLIPQ...C.pssCEAFlDSLGCC+KALlDFMKRHPNPcIA+hVQ+IDTAopCElIApFRLN.............................. 0 1 4 8 +10785 PF10953 DUF2754 Protein of unknown function (DUF2754) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Enterobacteriaceae. 25.00 25.00 91.90 91.80 24.10 19.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.57 0.72 -3.92 3 443 2009-01-15 18:05:59 2008-07-29 13:05:01 3 1 440 0 15 74 1 70.00 90 92.22 CHANGED MpLosKIRRDWHYYAVALGLIFILNGVVGLLGFEAKGWQTYAVGLVTWVISFWIAGFIIRRRPEEopsAE MNLPVKIRRDWHYYAFAIGLIFILNGVVGLLGFEAKGWQTYAVGLVTWVISFWLAGLIIRRRDEETENAQ....... 0 1 1 8 +10786 PF10954 DUF2755 Protein of unknown function (DUF2755) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as YaiY however no function is known. The family appears to be restricted to Enterobacteriaceae. 25.00 25.00 90.60 90.60 18.60 17.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.33 0.72 -4.29 6 443 2009-01-15 18:05:59 2008-07-29 13:09:54 3 1 440 0 15 59 2 100.70 89 98.89 CHANGED MADFTLSKSLFuGK.RssSSTPGNIAYALFVLFCFWAGAQLLNlLVHAPGVYE+LMQsQ-TGRPRV-IGLGVGTIFGLlPFLsGClIhuVlAlhLRWR+R+ ............MADFTLSKS.LFS.GKYRNASSTPGNIAYALFVLFCFWAGAQLLNLLVHAPGVYERLMQ..VQETGRPRVEIGLGVGTIFGLIPFLVGCLIFAVVALWLHWRHRR. 0 1 1 8 +10787 PF10955 DUF2757 Protein of unknown function (DUF2757) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as YabK however currently no function is known. 22.50 22.50 23.10 34.70 22.30 22.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.75 0.72 -3.79 8 173 2009-01-15 18:05:59 2008-07-29 13:10:27 3 1 173 0 32 85 1 72.70 56 99.25 CHANGED MAlaYaCRHCGhcVGoLspsslpo.-pLGFacLT-cE+sDMIoYcpNGDlHVKTICEDCQEuLpcNP-YHpacpFIQ .....MthaYYCRHCGspVGSlsu..pp....Vho.-sL..apLTEpEhs-MIpa+E.NGs.I.alKTICEpCQEsLss.PEYaEY-pFlQ.......... 0 10 22 24 +10788 PF10956 DUF2756 Protein of unknown function (DUF2756) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated yhhA however currently no function is known. The family appears to be restricted to Enterobacteriaceae. 20.70 20.70 21.40 31.10 20.60 20.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.61 0.72 -3.78 5 476 2009-01-15 18:05:59 2008-07-29 13:14:17 3 2 469 0 27 111 1 101.90 81 74.37 CHANGED MKRLLlLAALLPFuuLAQPINThNNPNQPGYQIPSQQRMQTQMQTQQlQQKGMLNQQLpTQTRLQQQHLQoQlNNNsQRVQQGQPGphN.sRQQVLPNTNGGMLs .......MKR.LLl.LTALLPF...VG...FA..QPINTLNNPNQPGYQ..IPSQQRMQTQMQTQQIQQKGMLNQQLKTQTQLQQQHLENQINNNSQRVLQSQPGERNPARQQMLPN.TNGGMLN........ 0 1 3 15 +10789 PF10957 DUF2758 Protein of unknown function (DUF2758) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 20.50 20.50 20.60 20.60 19.80 20.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.93 0.72 -4.15 7 170 2009-09-11 11:48:29 2008-07-29 13:15:39 3 1 166 0 16 74 3 59.30 56 94.78 CHANGED MlKVtVFDcEHEKDLpp-lNpFLKplc-spllDIKYsVAAh.p-s-tEQlYCFSAhIlY+p ..............Ml+VKVF..DEpH.E...K...DL..E...DAVNsFL.KK.IcDs..phVDIKYQ..Vuso...hss...--sQIY.CF.SAMIlY+s................. 1 1 8 10 +10790 PF10958 DUF2759 Protein of unknown function (DUF2759) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillaceae. 20.70 20.70 21.30 23.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.57 0.72 -4.38 11 150 2009-01-15 18:05:59 2008-07-29 13:16:40 3 1 147 0 29 71 0 51.80 62 84.06 CHANGED IFuLVolLAlaGslRuLKcKNlLullFuuuohhVFGWFoIMTllasGaP.ssH .......IFsLV.TLLAVFATLRTL..RE..KNhLAsGFAlATVLVFGWFTIMTVLasGYP.sA.s........ 0 8 20 23 +10791 PF10959 DUF2761 Protein of unknown function (DUF2761) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as KleF however no function is known. 25.00 25.00 26.40 92.20 20.20 19.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.07 0.72 -4.36 5 34 2009-01-15 18:05:59 2008-07-29 13:22:50 3 1 21 0 4 19 1 92.10 78 70.28 CHANGED PYPPGFVCPsTGRVAVLVRDYAsSDLNGDAPAYWYSAQSEEWGLDPWRLVEGVDPHTpGGSaDVCFAsGSoRTVGPLMTFFL..uAuDAARLsA+cGc .PYPPGFVEPoTGRVAVLVR-YAsSDLNGDAPAYWYSAQSEEWGLDPWRLVEGVDPHstGGSFDVCFAsGuoRTVGPLMTFFL..SAucAApLsstc.s... 0 0 1 3 +10792 PF10960 DUF2762 Protein of unknown function (DUF2762) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as holin-like protein BhlA however this cannot be confirmed. 22.60 22.60 22.90 22.70 22.50 22.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.44 0.72 -4.27 14 187 2009-01-15 18:05:59 2008-07-29 13:23:53 3 1 139 0 26 134 0 71.30 32 92.17 CHANGED M.Ep-llplhhoQGhaAlLFshLLFYlLKpsc......................EREp+YQplIpc....lT-pLshlpshtccVp-hhp+ ...................tlhphhhoQGsaAsL...FlhLLF....alhKpsc..........................cREc+hQphI-+....hs.-phpslp....c....hhp-lpch.t............................... 0 11 24 26 +10793 PF10961 DUF2763 Protein of unknown function (DUF2763) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 21.60 21.60 21.90 22.30 21.10 21.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.16 0.72 -3.59 10 138 2009-09-10 16:34:48 2008-07-29 13:24:32 3 3 107 0 90 143 0 89.80 34 86.04 CHANGED VYlSs.GsVh-.ppRSPWRLShlsDFFhGllsFIshFFpTlls.ssscstppssuusp+ascGtGssGsssGtpthGpIs+GuGPsssP..hGG ......................sYlsp.GpVh-....pposWRLuhIsDhFaGIhpFlhhF.FpTlls..............ssscp......tp.h.s.s.......s..s.sp.....c....a.....s.s.....Gt..Gs....sG.s.....s........stthGths.th..tGst...................................... 0 27 43 67 +10794 PF10962 DUF2764 Protein of unknown function (DUF2764) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.50 20.50 20.90 20.50 20.30 20.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.56 0.70 -5.17 10 168 2009-01-15 18:05:59 2008-07-29 13:25:16 3 2 164 0 42 131 10 218.90 28 88.05 CHANGED MopYYaLluGLPslslEDsKLsYols-FcsELtssLSccDpKLIDLhhhcFDNtNlLtahcss-stscscGp...lop-Elsshlush+-u-....sscaPsYFp-FlppYht..................psucEp......lsaEDcLuAhYYsYAMcsuNcFlpsWFpFNLslpNILsAhsuRKhphDlu.hl....VGcs-Vs-sLRTSsARDFsLu-ElshLpsllcluEcp-lslREKpLDll+WcWlE-ss.F.cYFslEplhAaLLpLtMIERWlpLDKc+GpphFRcIIsslK+ul ........................................................sp.YYhL.uhLs..t.-s..l.aohtphps.lh.thotpD.p.hslhhh.hD.tN.hthh..httcs......u...............hs.ppl.thht....t.t...........th.sah.pFl.ta.......................spp..............h...stLht.ahta.h.p..tspFlpsaapFp.slpslLsuhpuRhht.h...D...hu....l....su.ss.lhphLhpp.su..satLst.p.h..lppl....hp...tppsh..ht.....EcpLshhcW....phlE-hs.hhcYFsh-tlhuallpLthlpRhhphcp-+Gtp.F+pllpphcpt................ 1 17 30 35 +10795 PF10963 DUF2765 Protein of unknown function (DUF2765) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 25.50 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.52 0.72 -4.03 23 119 2009-01-15 18:05:59 2008-07-29 13:28:37 3 1 113 0 21 93 0 81.90 40 88.01 CHANGED IsLslsG..sDhpFssTsssYNcalNshspssKVsPA.+NaLhpsVcsEpK-sLccLl....psPGu..shplsutlhcEasPclpIsVKc ..................IsLslsG..s-lsF.pPs.suYNpalN-hshsNKVuPA.+sYLhRhVs.s.EsK-ALpcll....chPGs..uhQlsupVs-.YuPcl-IpVKp........... 0 4 11 14 +10796 PF10964 DUF2766 Protein of unknown function (DUF2766) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 21.20 21.20 21.30 86.00 21.10 19.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.66 0.72 -4.09 3 372 2009-01-15 18:05:59 2008-07-29 13:29:13 3 1 371 0 20 76 0 78.70 90 95.56 CHANGED MScPLNsDQELVSDLVACQLVIKQILDVIDVIAPsEVRDKMSSQLKNIDFoTHPAAADPVTRRAIEKAIALIEMKF...TPp MSQPLNADQELVSDVVACQLVIKQILDVLDVIAPVEVREKMSSQLKNIDFTsHPAAADPVTMRAIQKAIALIELKF...TPQ... 0 1 6 12 +10797 PF10965 DUF2767 Protein of unknown function (DUF2767) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 20.70 20.70 20.80 21.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.27 0.72 -4.13 4 459 2009-01-15 18:05:59 2008-07-29 13:29:51 3 1 445 0 22 84 1 64.50 87 98.89 CHANGED MGNcsK--tLYQEMCRVVGKVVLEMRDLGQEPKHIVIAGVLRTuLANp+IpRStLphQAMEpVI+uLst .........Msp.sK.-D-LYpEMCRVVGKVVLEMRDLGQEPKHIVIAGVL.RTALANKRIQRSELE...KQAMETVINALVK...... 0 1 1 13 +10798 PF10966 DUF2768 Protein of unknown function (DUF2768) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillus spp. 20.00 20.00 22.20 22.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.73 0.72 -3.83 11 157 2009-01-15 18:05:59 2008-07-29 13:30:25 3 1 157 0 28 87 0 57.20 58 86.96 CHANGED KMWhulGuMGhMFluVssIYLSRaKlps+FL+hlsuhlAYhhMllSGlIlhhVVhSGP KMWhALGAIGFMFhAVuhIhLSRaKlKNKaLKuIsALVAYsLM.IVSGIlIFlVVFSGP.......... 1 9 19 22 +10799 PF10967 DUF2769 Protein of unknown function (DUF2769) Pollington J, Finn RD anon PRODOM Family This family of proteins have no known function. 20.70 20.70 23.30 21.20 20.30 19.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.90 0.72 -3.67 16 59 2009-01-15 18:05:59 2008-07-29 13:34:54 3 3 24 \N 52 56 1 55.10 30 45.26 CHANGED Ch..CssCPSastp.....tsctlFChpGpSptphhpc..GChCsp.CsVhpphpLpst.YYCh ..........Ch..C...Csoas.p..........................t.ttlaCspG.p.S.p.sphppcp.sChCss.CsVapc..a..pLpps....YaC......... 0 12 31 43 +10800 PF10968 DUF2770 Protein of unknown function (DUF2770) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as yceO however currently no function is known. 25.00 25.00 34.60 34.40 19.90 19.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.54 0.72 -4.28 6 426 2009-01-15 18:05:59 2008-07-29 13:36:04 3 1 421 0 15 52 0 36.00 77 92.93 CHANGED MRRLhshLlNNlREHhMLYlhLWhLLAllDllalaa .MRRLLHYLINNIREHLMLYLFLWGLLAIMDLIYVFY 0 1 3 10 +10801 PF10969 DUF2771 Protein of unknown function (DUF2771) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 28.00 27.70 22.70 22.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.08 0.71 -4.96 13 173 2009-01-15 18:05:59 2008-07-29 13:37:06 3 1 173 0 40 114 0 161.60 33 94.94 CHANGED KR.....llulLhAslslllsuusGhhsWhLsRcsuPppPEISAYSHG+hsRVGPYpYCslls.c-.......Cpsspsp..GELsVst+tPlQLSlPpsIucAPWpLLplY-DPssssssh....aR..PsophAVTIP.oVDP.....pRGRLsGltVQLlTlVhDc..sGE.pslPHAEWSlcss ............hlAhl.sllVlllus..ss..uts.h.Wh.h.s.....ts.p.s.P.ppPcIosh.S..cGchpcVsPa.aCsls.s.p............................C......p.s...s.....p.st......spLs.......Vstc.tslpLSVPctIucsPWpL.Lpl.Y.pDPussspsh.........ap..ss.s.phu.VTls.......olDP.......pRsRLssltVph.l..........sl.s..........h.Dc..sG-.pshs.spWSVph.h............................................. 0 8 28 37 +10802 PF10970 DUF2772 Protein of unknown function (DUF2772) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as spore germination protein GE however this cannot be confirmed. 25.20 25.20 26.00 66.70 24.10 23.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.61 0.71 -4.04 10 140 2009-01-15 18:05:59 2008-07-29 13:38:11 3 1 138 0 20 94 0 122.30 52 94.64 CHANGED sShVpsh+lsSlGISSVlQlGDopcIsh+o+sLAVQRphslFassEG...ctcpFplFs.cPIPhPtsposVps.shhHEsPsIcVpul+lhGlSuSSllpIGSTstVpu-SRlKHIRQLhs.....Psup ...lSlVQNVsIlSLGIuuVFQVGDuNQhELKSRALAVHRElPsYl+sEG...+hDAFcIFTDEaITIP+.R.T.....TD....V+l.NIlNE.CPFIEVNNVclRTlLNSusFQIGNVDYVFsNSRIhQIRQaIT..P...up...... 0 4 12 14 +10803 PF10971 DUF2773 Protein of unknown function (DUF2773) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 40.00 40.00 24.40 17.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.69 0.72 -4.01 3 466 2009-01-15 18:05:59 2008-07-29 13:38:37 3 2 290 0 3 167 0 76.90 66 18.48 CHANGED AL+NAHTPsuLLTTLTEPcaRSLAMNNPQLAADVKTAWLKEDPSLLLFVEQPDLSLLRDLVKTGATRKIRSEARH+LEEKQ ...........ALpNtHTPsulLss.h-Pp.h.lAhNNPphssDVhpAhLKcDP.LhLhlspP-LsLlRpLshsGtTRtIRppAh++L-Eh.. 0 2 2 2 +10804 PF10972 DUF2803 Protein of unknown function (DUF2803) Pollington J, Finn RD anon Pfam-B_1049 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 33.30 25.00 19.10 22.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.32 0.70 -4.83 51 345 2009-01-15 18:05:59 2008-07-29 13:47:24 3 2 277 0 85 277 98 147.80 25 71.11 CHANGED hhtllsLlhlhhus......sAhA..................pphacVEVllFcpss......tpssEpWs.tps..hpsspshslhs..............................................................................................................................................................................ttt.hhLspsphpLsshtpcLpppssapsLlHhuWpQshh.scssu..........sl+lhuGcphsppa.............................................................................................................................................................hpl-Gslplalp+.....YLal-ssLplpp ...............................................................h....l.llhh..hhu.......suhA..................tt.aplElllFcpss............t.ssEpWs.tt...ht.pt...h...........................................................................................................................................................................................................hhs.pt.tL.......ss.hppLpppssapsLhHhAWpQs.t....ppu..........s.htl.uGpchttpa.......................................................................................................................................................................tl-Gplplhht+.....alhh-s.h.lp.......................................................................................... 0 20 38 65 +10805 PF10973 DUF2799 Protein of unknown function (DUF2799) Pollington J, Finn RD anon Pfam-B_1111 (release 23.0) Family Some members in this family of proteins are annotated as yfiL which has no known function. 25.00 25.00 31.90 31.10 23.20 19.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.42 0.72 -3.96 46 616 2009-01-15 18:05:59 2008-07-29 13:48:22 3 4 513 0 80 303 17 85.20 45 64.46 CHANGED ssWtphGhpDGhpGpstps.sphtctssp.sht..sshttYppGYpcGttpYCs.s.uYplGhp........................................GptYhGlCs.s.....pFtppYppGpppa .........DWYslGhcDAloGss.h..+.sscuau-s......p..ADpuhYhcGYtEG.pcchCpsDhsYthGLSG+sa...usCsssE.pAsph+csWQ+GtcE.p........... 0 14 32 52 +10806 PF10974 DUF2804 Protein of unknown function (DUF2804) Pollington J, Finn RD anon Pfam-B_1045 (release 23.0) Family This is a family of proteins with unknown function. 25.00 25.00 25.30 26.60 23.80 23.20 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.95 0.70 -5.59 56 302 2009-01-15 18:05:59 2008-07-29 13:50:15 3 3 259 0 114 274 21 317.20 26 94.31 CHANGED GpspaGthpu.spslshtchshhp.hcpshssht+hh+hKpapahslhssc..ahlulAluDlpYlusuFsYlaDhcssp..................hhppshlpPh.............shthphsspshpu..ps...papptphplph..p......................t...psthclp..............hsspslps-lplp..sststslslssPsuhs.G..a.saTpKpsulslpG.pl..plsscs................hs...hs.................puhAshDaotGah++cT..sWpWAohsuhhpt..........G..plGlNL.uuGhs-..TussENslWlsGchahL.sslpFcas+......ps.h.......t.........sW+lpS.....p.cscl...-LpFpPhspRp-+hNLhlltSsF+QhhGpFsGpl.hsss...GpplplcslhGhsEDHaA+ .............................h.s.hpt.spplshpch.hhp.hppslpu.......+hh+hKcapahslhssc..ahlulsluDlsYluhuhsYlhDh.pss.p.........................hh.ppshlpP...h......................ththphsps....shpu..ps.........ph.pp.tphplph...p...................................................p..puthplp.................hpst.slphchplp....ssstpsl...slssPhuhss....apYTpKps.slsspG..pl...phsspp.......................................hs....hs......tpuhAshDaupGhhphcT..tWpWu.uhsuhsp.....................G...tlGlNL.usGhs-....os..tsENsla.lsG...c..hphL.ssVpFchsp........................ts.h...............p................sWplpo.....t.sspl...-LpFpPhhpRp.pc..hNlhl...ltSsh+QhhGpFsGpl.thss...GpplplcshhGhsEcphA+.............. 0 52 83 103 +10807 PF10975 DUF2802 Protein of unknown function (DUF2802) Pollington J, Finn RD anon Pfam-B_1126 (release 23.0) Family This bacterial family of proteins has no known function. 25.50 25.50 25.50 25.50 25.10 25.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.14 0.72 -4.22 44 303 2009-01-15 18:05:59 2008-07-29 13:52:17 3 1 299 0 84 222 66 70.00 42 48.15 CHANGED lshGpcltclcpplp.plpp+lpcl.......p.ppDPpsthYscAsKhVphGAsl-ELhppCsLs+AEAELlhpL+pp .....................................luhGp+lpEhpchlp.pLs-+l.pcL...........E.ppDs.suphYo+AsKhVpLG.AslsELhpcCcLs+AEAELhhpLpp.... 0 22 43 66 +10808 PF10976 DUF2790 Protein of unknown function (DUF2790) Pollington J, Finn RD anon Pfam-B_1206 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Pseudomonadaceae. 20.70 20.70 21.20 32.40 19.30 18.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.86 0.72 -4.19 48 247 2009-01-15 18:05:59 2008-07-29 13:56:06 3 1 62 0 76 187 5 82.00 36 89.77 CHANGED hhlsshussAhApssssps.......................s.lppYcYGMpLDlA+Vluhos.ss..ssC.pVVPupMsYcDSpGch+tlpYpthGs.uCpp ..........................h.hlhshu.hAhAtpstptstt.....................stlcpYpYGMpLDlA+Vluho...sss..ssC.tVVPupMsY-DSpGph+slpYpshGs.uCs...... 1 5 19 46 +10809 PF10977 DUF2797 Protein of unknown function (DUF2797) Pollington J, Finn RD anon Pfam-B_1162 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 25.50 25.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.62 0.70 -4.73 54 434 2009-09-11 12:03:15 2008-07-29 13:57:29 3 2 422 0 141 419 288 227.10 37 82.58 CHANGED G+plslpasGtI..pClsCG+cTKKSasQGaC.........asChpcLA.....pCDtCIM+PE...tCHactGTC.R-PpWucptChpsHhVYLA..NoSulKVGITRpoQ..lPTRWIDQGAspAlsIhcVssRhhuGllEstLpcp..luDKTNW...RpMLKups-.slDLhspcspLhphlspplppl.......................h.thst.......ht...p..thhplsaPV.paPpKlpShNhDKsPplp....GsLhGIKGQYLIhDsG....VlNlR+asGYpls .............................................................GpplplpahGtI..hCspCG.c+TcKSau.QG.aC..................asCh.p.c.lA......pCDhCI.h+PE.....pCHact..GTC.R-spaucphChp...sHhVYLA....NoSslKVGITR..pop..lPTRWlDQGAsQAlPIhcVsoRhhuGhlEstLtpp..luD+TsW...RphLKGc.sp...slDLhthpppLhtt.h..sp.tl..tpl............................ttphu..........slp..l..pt...t.hplpYPV...pa...PpKl..p.Sh.N.l-..KsPhlp....GhLhGIKGQYLlh.DsG.......VINlRKasuYpl............................................................................ 0 34 85 122 +10810 PF10978 DUF2785 Protein of unknown function (DUF2785) Pollington J, Finn RD anon Pfam-B_1219 (release 23.0) Family Some members in this family are annotated as hypothetical membrane spanning proteins however this cannot be confirmed. The family has no known function. 25.00 25.00 25.20 25.10 21.60 24.50 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.76 0.71 -4.82 29 640 2009-01-15 18:05:59 2008-07-29 14:24:14 3 3 450 0 63 382 3 163.20 34 63.61 CHANGED LhhcIspttsD.ulFpRSFos...LllAhllptcpp.....h.hLosp......phpplhpphlpYhthEpDhRGalppcGWAHuhAHsuDhLspLsppsph.spschhhl..lpslh.phlpp.sthahssE--RLupslhshlpps.lspcplhthlpph.....tshsshpt..t...s.hhthhNhppFLpsLhlp ............................Lh.pl...p.tpt.ushpRoFss...LlhuhlLpscpp.....h.hLopcphpslhsQhlpYlshE+Dh+Gas.p.phGWsHuhAHuADhLsEllppsph.sppphcEl..hssLt.phh.+h.hohhaps.sED.RLspslhphl.ps.lpp-plhshl+sl......phs..cp......p.hhh.htNh+shLpplahp............................ 0 24 37 46 +10811 PF10979 DUF2786 Protein of unknown function (DUF2786) Pollington J, Finn RD anon Pfam-B_1231 (release 23.0) Family This family of proteins has no known function. 22.20 22.20 22.30 22.20 21.90 22.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.99 0.72 -4.24 127 678 2009-01-15 18:05:59 2008-07-29 14:24:43 3 4 601 0 146 563 51 41.60 35 15.60 CHANGED pKhLp+ItKLLthup.u..ss.pEAtsAhppApcLMtcaslcts ....pKhLp+Ip+LLthAp.u......ssttEAtsAhppApcLMtcaulst.......... 0 43 88 127 +10812 PF10980 DUF2787 Protein of unknown function (DUF2787) Pollington J, Finn RD anon Pfam-B_1249 (release 23.0) Family This bacterial family of proteins has no known function. 19.70 19.70 20.70 21.70 19.30 19.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.85 0.71 -4.19 38 284 2009-09-11 12:53:47 2008-07-29 14:26:16 3 1 121 3 38 170 13 107.90 36 89.71 CHANGED sppLtplLsphls......t.pt.ssuptlslNFRD.sYSA-pGGaHPVEIplp+.....psspWplsYITDFuYhG.shasELp+-LDFcFpst.sapthtG.hhslp...psp-LaplWppNFlsY.hshssac.lplos ............s...hpthLtthlp......p.ph.tpuctllhNhRD.sY.tcptGhHPVEltlpp.....pps.W.l.alssFuY.s..s.hspL-hpL.Fchtpt.haps.tGhhs.ltts.-st-LaplWpssFhta.lshpsap-lplT............. 0 12 15 27 +10813 PF10981 DUF2788 Protein of unknown function (DUF2788) Pollington J, Finn RD anon Pfam-B_1255 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 39.30 39.10 22.70 18.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.23 0.72 -4.17 33 319 2009-01-15 18:05:59 2008-07-29 14:26:59 3 1 319 0 80 181 25 51.90 46 72.55 CHANGED lhlsslhhaMuFIIaDLuK+SpAG+FGphllFhsLulGshGFlhKsllphhh .....hlsulhhFlGhhIhDlhKpupss+FGphIlaLVLhLGshGFlsKslIphhh... 1 16 42 63 +10814 PF10982 DUF2789 Protein of unknown function (DUF2789) Pollington J, Finn RD anon Pfam-B_1269 (release 23.0) Family This bacterial family of proteins has no known function. 20.40 20.40 21.50 69.20 20.20 19.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.49 0.72 -3.96 60 290 2009-01-15 18:05:59 2008-07-29 14:30:47 3 2 263 1 93 240 24 74.30 46 91.22 CHANGED M-psppshssLFpQLGLsssppuIcpFIspHp..LssslpLt-AsFWosuQtsFL+EplpcDA-WuEllDpLsstL .....M-psppshspLFpQLGLsusctuIcpFIspHp..LsschpLt-AsFWosuQusFL+EplpcDA.-WutllDpLshtL. 0 19 43 70 +10815 PF10983 DUF2793 Protein of unknown function (DUF2793) Pollington J, Finn RD anon Pfam-B_1370 (release 23.0) Family This is a bacterial family of proteins with unknown function. 21.20 21.20 21.30 26.60 21.10 20.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.98 0.72 -4.18 50 230 2009-01-15 18:05:59 2008-07-29 14:32:52 3 6 199 0 76 196 45 87.10 41 24.90 CHANGED HNEALphL...DuLVQLuVtutshssPPusPupGspalluuu.A.oG..A.WuGps.GplAhapsGu..WtFlsPpsGWtsaltsc....uthhl..ac..GusWt .......................HNEALphL...DulVpLuVpuhshssPPuoPA-GDRYIVuus.A.oG....A.WuGps.GplAta.s..Gu.....WhFlsPts..GWhuaVssE..............utlhl..ac..GusW............. 0 19 52 62 +10816 PF10984 DUF2794 Protein of unknown function (DUF2794) Pollington J, Finn RD anon Pfam-B_1384 (release 23.0) Family This is a bacterial family of proteins with unknown function. 21.70 21.70 23.70 42.40 19.70 16.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.81 0.72 -4.12 33 238 2009-01-15 18:05:59 2008-07-29 14:34:32 3 1 235 0 81 190 1084 84.10 62 68.45 CHANGED VtFcRpELspILsLYGRMVAuGEWRDYAIDhh+DtAlFSlF....RRuuEhPLYRIEKcPcLup+QGtYuVlutsG.pILKRGp-LtpVL ..sFcRpELstILslYGRMVAsGEWRDYAIDaL+D+AVFSlF....RRuuEhPLYRIEKsPKLtpKQGtYuVlussG.hILKRGH-LppVL................. 1 21 51 61 +10817 PF10985 DUF2805 Protein of unknown function (DUF2805) Pollington J, Finn RD anon Pfam-B_001474 (release 23.0) Family This is a bacterial family of proteins with unknown function. 20.80 20.80 20.80 55.90 19.30 19.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.45 0.72 -4.03 58 227 2009-01-15 18:05:59 2008-07-29 14:35:38 3 1 217 0 75 214 1538 70.80 51 81.03 CHANGED lsRIIEMAWEDRTPFEAIctpFGLsEppVIpLMRppLKsuSF+hWRKRV.oGRpTKHtth+s....ths...RhhsssQ..h ....sRIIEMAWEDRTPFEAIctpaGLsEs..pVIplMRppLKsuSF+hWRKRV.oGRpTKHtphRs......pht.....Rhhs.pQ................ 0 28 57 68 +10818 PF10986 DUF2796 Protein of unknown function (DUF2796) Pollington J, Finn RD anon Pfam-B_1354 (release 23.0) Family This bacterial family of proteins has no known function. 25.50 25.50 25.80 29.10 24.30 25.40 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.00 0.71 -4.27 46 343 2009-01-15 18:05:59 2008-07-29 14:37:18 3 2 245 0 71 281 191 133.20 31 85.86 CHANGED tsAH.HGtuclslAh-G.ssL.lplpuPuhslVGFEHAspocp-+ttlspAhspL.ppstpLFslssuAsCplppsplctshhtt.t.t...................+-+csHD....................+pp........................................pHu-hpupYpapCssssp...Lpplshs.hFppFPuscclpVphlsssuQpuscLosssspl ..........................tAH.HG.splsls.-G...ppL.hchpuPuhslVGFEHsspsstpcttlttAht.L.tps.tlFsls.tAtCphtp..lptshhsc.ptpc.t.c.....................+-+.c.tHD.............Hptc...................ccptpccttt.........................................pHu-hpspYphpCtt.t....Lptlpht.hFphFPshpplpsphls.ptQtuhplt.tps.................................. 0 19 39 58 +10819 PF10987 DUF2806 Protein of unknown function (DUF2806) Pollington J, Finn RD anon Pfam-B_001505 (release 23.0) Family This bacterial family of proteins has no known function. 24.50 24.50 25.10 24.80 24.40 21.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.22 0.70 -4.85 32 239 2009-01-15 18:05:59 2008-07-29 14:37:38 3 1 199 0 53 155 23 210.80 36 73.28 CHANGED tpRutpRtptpttppQpNlEsIhthAhsh...s.s-ssucs..hDsDWltpFhphAccIpsppMQpLWu+ILus.ElppPGoaSh+oLpsL+phTp+EAphhp+ssuhusphss-tp.cll.uhh.t.s.hphhpc.sspslsluphGlsauslLpL.-lGllptsE.....hEouplshsptlphphpspshpLps+pss.........lhhsYY+hTssGsELsp........Llss.........cssppYhctLts .......................................h.pRAhpRtpppptp+QpNlEpIhthAtsp......scs-..ssu-s...DpDWlh+FF-hApcIpNspMQcLWAplLtp.ElsNPGshSh+sLchL.psMT.KEApllp+ssuhusphGu.-pph+ll.Ghh.t.....................uhhshsc+hsspsl.slupatLsYuSLLhLh-lGLlpssE......hEoG.cl..phcssl.hls.Y.pGpsh...p.....Lpspscs.........lplhYY+FTssGsE.Lsp........LlGs.........KsstpYa-tl..s............................................... 0 10 23 39 +10820 PF10988 DUF2807 Protein of unknown function (DUF2807) Pollington J, Finn RD anon Pfam-B_001516 (release 23.0) Family This bacterial family of proteins shows structural similarity to other pectin lyase families. Although structures from this family align with acetyl-transferases, there is no conservation of catalytic residues found. It is likely that the function is one of cell-adhesion. In PDB:3jx8, it is interesting to note that the sequence of contains several well defined sequence repeats, centred around GSG motifs defining the tight beta turn between the two sheets of the super-helix; there are 8 such repeats in the C-terminal half of the protein, which could be grouped into 4 repeats of two. It seems likely that this family belongs to the superfamily of trimeric autotransporter adhesins (TAAs), which are important virulence factors in Gram-negative pathogens [1] [2]. In the case of Parabacteroides distasonis, which is a cmoponent of the normal distal human gut microbiota, TAA-like complexes probably modulate adherence to the host (information derived from TOPSAN). 21.90 21.40 21.90 21.50 21.80 21.30 hmmbuild --amino -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.03 0.71 -4.75 118 860 2012-10-03 02:33:51 2008-07-29 14:38:36 3 13 349 33 304 1003 203 162.60 17 72.10 CHANGED sFsplp.lssshslhlp.p...u.spp.plplpuscsl.hsplpspl..c...sspLpIphc......cshsssp..............h.......hlplshss.Lptlshsuu.uplpsps......................lpspphplph............sGuGslplp.lpspplpsphsGuuslp.ls....................Gpspphpl.pls......GuGplcupsLp.sppsplplsGuGshplt.sspplcupls..GuGsltht..GsP ......................................................................................................................................................aptlt.ht.s.sh.pl.hh..p....u....st...pl...l....p..s...t.p..p.h..hp.l....p.hth...p......sstLh.lthp.........................pthp..t..................h........lhl......t...h...p...l.ptlp.h....s.u...s..uslp..hps...............................................................l.p.s..s.p..h.p.lph................................sGu......usl...p.h...p...l..p.s.s...p.l.php.h..s.G..u.uslp..lp.......................................G.p..s.p.phph..phs..........Gsu...pl..ps...t.p..lt....sppsp.h.ph.pusuphph.....s...p...t..p..l...p..hp...ht....usupl.h.s........................................................................... 0 98 229 290 +10821 PF10989 DUF2808 Protein of unknown function (DUF2808) Pollington J, Finn RD anon Pfam-B_001529 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 22.20 22.20 22.30 23.00 22.00 22.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.66 0.71 -4.58 57 175 2009-01-15 18:05:59 2008-07-29 14:42:16 3 6 79 0 83 185 160 141.50 22 69.84 CHANGED uhh.F.......s.ps.pplphhpspsttp..ptspahhhlhspcttpultplslsh...Pctacs..thpspplplpthp.s.........tpplP...stsphsp...s....spslplh.spPlssG.sslslshpslh.N..PptsGhYpFsspuhssGss..PhstalGohplsl .................................................F....sps.phlphhts.stht..tsphYhhhhhspstspsltplslsh...spthcs....hphptlpspsh..t....................spsls...tpsphsp...c....spslslhhspPlssG.sslslslpslp.N...PphsGhYhFsspshssGps..shshalGshhlph............ 0 16 57 79 +10822 PF10990 DUF2809 Protein of unknown function (DUF2809) Pollington J, Finn RD anon Pfam-B_001533 (release 23.0) Family Some members in this family of proteins are annotated as yjgA however currently no function for the protein is known. 23.50 23.50 24.70 24.30 22.90 22.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.96 0.72 -3.58 49 416 2009-01-15 18:05:59 2008-07-29 14:43:19 3 4 395 0 103 363 9 94.20 33 68.37 CHANGED GLhs+p........salpsahGDlLhshhlahhlthlhst............shhhluhhsllhuhhlEhhQlapsshl.sl+ssplutllLGssFsWhDlluYslGhh ..................................h.s+c........shlpsYlGDsLaslhl.ahhhthlhsph...................pstpluhhuLhFshhlEhuQLapsshl.slRss...slutllLGp..sFsWtDlluYslGs........ 0 38 73 87 +10823 PF10991 DUF2815 Protein of unknown function (DUF2815) Pollington J, Finn RD anon Pfam-B_002212 (release 23.0) Family This is a phage related family of proteins with unknown function. 21.70 21.70 22.70 22.30 20.90 20.60 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.13 0.71 -4.74 20 443 2009-01-15 18:05:59 2008-07-29 14:50:00 3 2 386 0 55 299 47 172.00 42 92.86 CHANGED TKVlTsp.VRLSYs.plaEPcuh.sGp.....csKYSloll.......IPKuDspTlpuIcpAIcsAhcEGh.uKh...GsKhs..sslKhPLRD.GDh-ps.D-..tYss......saFlNAsS+p...+Ptll.....D+sspP...............l...h-psEVYSGCYuRsSIsFYAasssGNKGIusGLsNlQhl+DGEsLG.G+s.sAEDDF-slp..s-DD .................................................................................................hKVlTGp..VRhSYs.plaEP+oh....pGp.....csKYSloll.......IP...Ko.D...sp...Tl.....p.......tIcp.AI-sAhc-..Gh..uKh......Gtpls.........usLK.h.PLRD.GDh......E...Rt.....-..........D....sY....ts......saFl..NAsSpp.....tP.tll.........Dpstp....................l....h-puclYSGsYsRsSI..s..h..au...a.N..o..NGN....KGIAsGLsNlQhlc..DGEsLG.Gts...uA-DDFsshsps............................................. 1 28 46 52 +10824 PF10992 DUF2816 Protein of unknown function (DUF2816) Pollington J, Finn RD anon Pfam-B_002257 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 37.80 36.10 20.40 17.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.24 0.72 -4.38 12 70 2009-01-15 18:05:59 2008-07-29 14:53:14 3 1 3 0 60 73 1 63.00 55 23.64 CHANGED EYEppppp..VPhp+hlTDYYsVEYpTEYIPQshhEK.lEYVPV-+ht-RV-YhsVERpsshp .EYEEpppp..VP+E+hVTDYYAVEYQTEYlPQVh.EKhsEYVPV-RhpERVEYhsVERQVV+p...... 0 60 60 60 +10825 PF10993 DUF2818 Protein of unknown function (DUF2818) Pollington J, Finn RD anon Pfam-B_002280 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.20 25.20 22.80 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.20 0.72 -3.49 21 217 2009-01-15 18:05:59 2008-07-29 14:54:45 3 2 214 0 65 167 17 94.20 50 91.41 CHANGED uulWlllLLALluANLPF.lspRlhslh.Phts......KshhhRLlELllhYhlVGhluhhLE.pphGpltsQGWEFYAIThsLFllhAFPGFVYRYLh++ .....................uhahlllLALlsANLPF.LspRLFull.Pl+p............Ksh...hh+lhELlshYhlVGsLuahLE.uRuGshasQGWpFYAlThsLalVFAFPGFlaRYhh+................................... 0 12 36 51 +10826 PF10994 DUF2817 Protein of unknown function (DUF2817) Pollington J, Finn RD anon Pfam-B_002258 (release 23.0) Family This family of proteins has no known function. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.95 0.70 -5.36 32 141 2012-10-02 19:46:12 2008-07-29 14:55:34 3 3 121 0 72 727 173 323.40 32 89.81 CHANGED F.up.oYspARp+FLsAAcssGs.....plpsashsh..GssGEsLuhDlAhhGsscApp.lLlloSGsHGlEGasGSusQlshLp-t..hspshssssul.LhlHAlNPaGFuahRRssEcNVDLNRNFl.......DaspshssNssYsclpshLlPsphsss...tt..tptslhphhtp+GhtshppAlssGQYpaPcGlaaGGptssWSstsL+pllpcah.sss..c+lsaIDlHTGLGshGtsphltts..pcssshp+spphaGst....ltshhss......sususshpGhhhpu...h..hphhsshp.hsulslEF.....GThsshpVhpALps-pWLah.................asc.sss.sptttl.+pplt-AFYPssspW+ptVltp ..................................................................................................................FupsYtpARp+FlsAA...pstut.....tlppa.pst.....G.....ssGEsLsh...Dl.Ah.......hGs.t.c..A..p+..lLllsS.GsHG...V...E..G......asGS.us..Q.....l.....s..h....Lpct..........hs...p...th....s........s...ss...ul....L.....hl.HAl..NPaGa..A..a..h..R..Rs.sEcNVDLNRNal........Da.s.p..shss....N...ssYtpl.p....shLhPsphsss...tt...stttl.tphhtpcG.suhppAlstGQYphPcGlFYGGptssWSptslcpllpcah..uss.......pclshIDlHTGLGshGtsph...lhts...psssthpcAt..p..h.aGss.............ltshhsu.......sususshpGhh...hps...h....hphh.s...ptp...hsslslEF.....GThss.......p...h..hpALp......s-pWLah.................a.sc.sts..sttttl.+ptlt-uFYs.ssssW+thll......................................................................................... 1 19 37 62 +10827 PF10995 DUF2819 Protein of unknown function (DUF2819) Pollington J, Finn RD anon Pfam-B_002304 (release 23.0) Family This bacterial family of proteins has no known function. 19.10 19.10 19.80 20.60 17.80 17.50 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.60 0.70 -5.67 20 554 2009-01-15 18:05:59 2008-07-29 14:56:58 3 1 539 0 47 328 5 305.80 70 59.78 CHANGED hstp+tlu.GLAsLpp.spsphtapVsaWpscpGVsuspphhLptsssshhhsssppsstpst.......tsDEcRlhsspssl-tus.lPppWplh-sNssLhstups..spAATllFuhsppspl-tLApplapLRRppGstLKIlVREs.pssLRtsDEpLLLssGANLllstssPhSRhLThIEulQGQhFoR.lPpDlcsLLsh.spshth+GYlssssFsptVpshhssohhsp.lptsLVpLphlPGlpstpsLpLC+hRRsGDlsTsssspLYLFLpACRhNDl-sALspIFclPlsDlFsschlatsptpIpuElppl .....................hpEYRSLF.GLASLRF.QGDQHLhDIAFWCNEKGVSARQQLslpQQsshWTLsppEEstIQP.........RSDEKRILSsVAVLEG.APPLSE.HWpLFsNNEsLFN-ART..AQAATlVFSLpQNsQIEPLARsIHTLR..RQRGS..AhKIlVREN.sASLRATDERLLLuCGANMVIPWNAPLSRCLThIES.VQGQpFSRYVPEDITTLLSM.TQPLKLRGFQp...WDsFCsAVpsMMsNsLLPAcGKGVLVALRPVPGIRVEQALTLCRPNRTGDIMTIGGNRLVLFLSFCRlNDLDTALNHIFPLPTGDIFS....NRMVWFEDcQISAELVQM...................................... 1 5 11 29 +10828 PF10996 Beta-Casp Beta-Casp domain Mistry J, Wood V anon Manual Domain The beta-CASP domain is found C terminal to the beta-lactamase domain in pre-mRNA 3'-end-processing endonuclease. The active site of this enzyme is located at the interface of these two domains [1]. 20.30 20.30 20.50 20.80 20.20 19.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.83 0.71 -4.19 183 2338 2009-09-11 08:10:03 2008-07-30 13:33:19 3 44 1375 45 1245 2305 359 124.90 25 21.06 CHANGED sQEllhh.......Lpchhp...ptt...........................hslalDSshuhcssplap..pa..chhs...p.h....pphht.tt...................................h.ph.phh..............cshcpscp.........l..p..t.t.P........s...lIluuoGM.hsuG....hlhcalcphhscs+Ns.llhsG...aQupGThG......+pl .......................................................................................sQELlhh.......Lcphhpptt..............................................tphPlals..SshAtcssplap..pa..hp..hhs.ppht............cphhttpt........................................F...p..h..phl.........................................pshccppp...........l...tt.t...t..P............slllAusGM.h.puG...hshcah.c.ph....t....s....c.t+....Ns.llhsGYpspGThG+pl................................... 0 446 759 1049 +10829 PF10997 DUF2837 Protein of unknown function (DUF2837) Pollington J, Finn RD anon Pfam-B_002349 (release 23.0) Family This bacterial family of proteins has no known function. 20.40 20.40 21.80 21.10 19.60 19.60 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.45 0.70 -5.22 23 271 2009-01-15 18:05:59 2008-07-30 15:56:18 3 3 243 0 64 198 15 241.70 42 92.79 CHANGED LThlIHlIsTLAYusRluGVRTt+lAsAhSLFNlhsLlSRsuNhhQuPLlupll-pulp.....................tpthusLtsphRh.llhuATlGTllGhlLlPTFlplFs+AIhth.cpsGSVP+Lhh+sh..ohpslcphcpplplPphp.lcpht...hppIP+cllllNhllouIYTlGVLuALYAGhLsPE.aRsTAutLSullNGhATILhslhlDPphSllTDcslcGcpspt-l+phshhlsho+llGTLLAQhlFlPuAhhIsalsc .........................hThlIHsl-TluYulRLuGVRsp+IAlAlSlhsllhLlSRTuNhlQuPLlGtlVDpuhh......................sss....hsL.t.hRl.lLluAolGTlluhlLhPThh+lFuhsIpph.-ssGS.h.+hhhpsh..sh.p.tl.+hh+c..hl+hPphphlcplp...htsIPK+lhllNhhsTuIYTsG.VLSuLYAuhLh.P-.atosAsshSGlINGhATILLslhlDPplullTDcslpucpstpshpphhshLhhoRlhGT.LLAQllhlPuAhhIhalh.p....................... 0 24 47 53 +10830 PF10998 DUF2838 Protein of unknown function (DUF2838) Pollington J, Finn RD anon Pfam-B_002381 (release 23.0) Family This bacterial family of proteins has no known function. 25.20 25.20 28.20 28.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.55 0.72 -4.18 23 224 2009-01-15 18:05:59 2008-07-30 16:17:06 3 5 177 0 166 216 8 108.90 41 24.04 CHANGED -Klsashulhslh....hsualhGthPphFhhaYTshhhhhMPlRaYoYpKpsaHYFLsD....hCYFlNhLsLlaI.....W.........laPpStpLFlssasluhGsLuhAlIsWRN.SLVhHSlDKlTS .........-KlsahhGVh..slh......hsualhGthPphhhhaYol.hhhhhPlRa.as.Y++..p...saHYFLhD....................hCYalNh...Lhllhl......W...................haPp..SppLFhs.sasluhGsluhAl.lhWRN.SLVFHShDKlTS.................................. 0 67 116 150 +10831 PF10999 DUF2839 Protein of unknown function (DUF2839) Pollington J, Finn RD anon Pfam-B_002396 (release 23.0) Family This bacterial family of unknown function appear to be restricted to Cyanobacteria. 21.90 21.90 22.90 23.50 21.40 21.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.45 0.72 -3.94 37 107 2009-01-15 18:05:59 2008-07-30 16:21:36 3 2 77 0 38 103 267 65.90 40 86.86 CHANGED MGEAKRRppp.GLsP+ptcppp.........hlshlPlocpQsp.hhtloh+usWlGIuhLslhWlslphl....GPssuWW ........MGEAKRRcph.sLss+ttpppp...........llsWlPlT+sQsc.hhphos+uuWlGIuhhshhWlslRhlGPshGWW........... 0 6 25 36 +10832 PF11000 DUF2840 Protein of unknown function (DUF2840) Pollington J, Finn RD anon Pfam-B_002399 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 25.50 55.60 21.70 19.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.87 0.71 -4.66 34 168 2009-01-15 18:05:59 2008-07-30 16:25:56 3 1 109 0 89 180 13 146.90 51 87.02 CHANGED LTcVELsWlEKRlEpWIRFG+sspEpllDRpRRlsuFtPuulFAFVRWAuNDaGTllSRlDIlRAlsPG-uaQTlPaV+PGG-lLL+lpGWPKVEcVLptIDAlEAlGIDPs-suPDaWRHVHNRLuAut.PRsYTt-RHtAWLtRRpl ................................LT+VpLsalEcRlEhalRFGcsucpphL.....DRpRRlssFtPGulFuhVRWtANDaGTlhSplDIlpAssPs-shQTlPhVcPGu-lLL+hcGWs+VcpVLptIDAIEAlGIsPs-VuPcaWRcltNRLuAs.s.psYTtERHtAWLtRRt.h...................... 0 9 54 74 +10833 PF11001 DUF2841 Protein of unknown function (DUF2841) Pollington J, Finn RD anon Pfam-B_002409 (release 23.0) Family This family of proteins with unknown function are all present in yeast. 23.50 23.50 24.30 30.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.87 0.71 -4.27 34 181 2009-09-11 16:39:17 2008-07-30 16:29:38 3 2 115 0 128 184 0 134.50 39 28.24 CHANGED L.l.uss.ptlhpaYppsFcplQQhsC+tIAKAaIKllEP+KQspa.PYsttc..............poKPsWWP........ssVpH+EPDHLpK....................-RlpLLlaIlpphh.............hhsst+Lcpsstcstpt.hps.......t+hplLcEIacVtchE .........pluspptlpsYYcpuFcshQQhsC+tIAKAaIKllEP+KQspa.PYsGuc..............poKPsWWP........psVhH+EPDHLpK...................s-RlcLLlHIlpplht............htl.ssc+Lc-sstsspp..p...hps.......c+h.lLcEIacVtc..................................... 1 19 56 103 +10834 PF11002 RDM RFPL defining motif (RDM) Mistry J, Bonnefont J anon Manual Domain The RDM domain is found on RFPL (Ret finger protein like) proteins. In humans, RFPL transcripts can be detected at the onset of neurogenesis in differentiating human embryonic stem cells, and in the developing human neocortex [1]. The RDM domain is thought to have emerged from a neofunctionalisation event. It is found N terminal to the SPRY domain (Pfam:PF00622). 27.00 27.00 45.00 58.70 22.70 20.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.76 0.72 -4.29 23 72 2009-01-15 18:05:59 2008-07-30 16:34:42 3 3 20 4 24 79 0 42.00 65 13.91 CHANGED VVSQKsDI+PshQLGpLVS+IKELEPpL+slLpMNPRM+KFQ .VVSQKNDI+PshpLttLVS+IKELEPKL+plLpMNPRMRKFQ 0 6 6 7 +10835 PF11003 DUF2842 Protein of unknown function (DUF2842) Pollington J, Finn RD anon Pfam-B_002411 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 30.20 30.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.13 0.72 -4.11 51 214 2009-01-15 18:05:59 2008-07-30 16:46:55 3 1 213 0 71 172 148 61.50 37 83.93 CHANGED RKhluhllLllhlslYhllusslsshhh....p.shhlphlhYlllGllWl..LPhthlhphhsps- ...RKhlGsllLlshlhlYullAsslAsthlh....ssshWscLlaahlsGllWl..LPAhslltWMupP..... 0 17 44 53 +10836 PF11004 Kdo_hydroxy DUF2843; 3-deoxy-D-manno-oct-2-ulosonic acid (Kdo) hydroxylase Pollington J, Finn RD, Eberhardt R anon Pfam-B_002426 (release 23.0) Family This is a family of 3-deoxy-D-manno-oct-2-ulosonic acid 3-hydroxylases, which catalyse the conversion of 3-deoxy-D-manno-oct-2-ulosonic acid (Kdo) to D-glycero-D-talo-oct-2-ulosonic acid (Ko). It contains a potential iron-binding motif, HXDX(n)H (n>40). Hydroxylation activity is iron-dependent [1]. 25.00 25.00 38.70 74.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.69 0.70 -5.41 13 168 2009-09-10 20:14:51 2008-07-30 16:52:13 3 2 164 0 50 124 11 277.40 55 93.40 CHANGED cWsspss.sc.ttshhstLEpGKVLaFPcLsFsLsscEc.shLcPsh........sDsKpKNIShcPppGsl+G............ssscssstttlpullsRapppstsLlppLLPpYsssL+husTShR..PsphtsRssSWRKDDoRLHVDAFPSpPshGcRILRVFoNINPcstPRlWRlGEPFcshA+RFl.Ppsps.hP.hpuhL...........LptLtlTKph..........RSpYDHhMLpLHDtMKuDh-YQ+suPQ.pphsFPPGooWlsFSDQssHAsMSGQaMLEQTaaLPscAhtcsppSPLplLE+LsG+sL ..........t.Wstp...ss.tpphltuLEpGKVLaFP+LpFslpstEc.tLLDPsl........sDsKRKNIShcPptGsLpG............VsG.D...uss..utlRsLlsRatppAtsLVssLhPpYp...stL+sAsTSLR..PhpltsRpTSWRKDDSRLHVDAFPSRPNYGERILRVFTNlNPpGtPRlWRVGEPFpslA+RFL.Pplcs.sP.hsAWL...........hchL+lTKp...........RStYDHLMLpLHDtMKADL-YQKsuPQ.QshsFPPGSsWlCFSDQssHAsMuGQFMLEQTFaLPVpuMtpsppSPLtILE+LpG+sL... 0 9 26 37 +10837 PF11005 DUF2844 Protein of unknown function (DUF2844) Pollington J, Finn RD anon Pfam-B_002433 (release 23.0) Family This bacterial family of proteins has no known function. 21.20 21.20 21.90 40.70 20.30 16.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.63 0.71 -4.36 22 138 2009-01-15 18:05:59 2008-07-30 16:55:01 3 1 72 0 46 122 8 136.00 42 85.09 CHANGED ApAsLGusssossusstshst.........................sttssssssuYol+phs.usGT..........slREYsssuGhVFAluWpGPshPDLssLLGoYhspahsuspt....tttsttshtVpssDlVlcouG+htsasG+AalPshlPuGVsss-l ........................ApAtLGGsshossAsststss.........................ssuussss..suYTl..RphshuuGT..........sl+EYsosuGsVFAluWpGPshPsLssLLGuYFPpYpuuspssc....tA+uspssstVssuslVlcoGG+MtuasGpAWLPsALPuGlossDI... 0 5 18 29 +10838 PF11006 DUF2845 Protein of unknown function (DUF2845) Pollington J, Finn RD anon Pfam-B_002437 (release 23.0) Family This bacterial family of proteins has no known function. 21.00 21.00 21.30 21.10 20.80 19.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -9.88 0.72 -4.15 40 206 2009-01-15 18:05:59 2008-07-30 17:05:35 3 2 125 0 59 181 22 83.40 32 79.66 CHANGED sssuohRC.s.sslVohGDsth-VLtKCG-Ps...............p+s.hststst...............tthsttspsplE......cWsYshGP.sphhphlpFcsG+LscIcstt ............tssohRC.G.spLVstGDsth-Vl....t+CG...pPh...............s+cshshshsh...........................sthh...ptptsplE...........cWlY..GP....sshhhhLpFcsu+LhcIcst.t........................... 0 21 34 50 +10839 PF11007 CotJA Spore coat associated protein JA (CotJA) Pollington J, Finn RD anon PRODOM Family CotJA is part of the CotJ operon which contains CotJA and CotJC. The operon encodes spore coat proteins. Interaction of CotJA with CotJC is required for the assembly of both CotJA and CotJC into the spore coat [1]. 25.00 25.00 26.10 25.90 22.80 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.52 0.72 -4.30 50 348 2009-11-09 12:01:11 2008-07-30 17:14:02 3 2 319 0 62 234 1 37.80 39 49.19 CHANGED htlAhAYVPhQpap.s..hY.ssccALppGTlFP-Lst.........Pa ......p.sphYlsaQ..hs.s...a.sPc-ALp+.GTlaPsLhcPY........... 0 31 48 53 +10840 PF11008 DUF2846 Protein of unknown function (DUF2846) Pollington J, Finn RD anon Pfam-B_002451 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as lipoproteins however this cannot be confirmed. 34.30 34.30 34.30 34.30 33.90 34.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.35 0.71 -10.54 0.71 -4.26 42 198 2009-01-15 18:05:59 2008-07-31 09:09:09 3 2 175 0 66 183 26 121.40 24 62.95 CHANGED hloGC...............tstsssphpphtssp.....sp.utlYlY..R.suhhu......s..uhphsl.alsuphlGp.htsssahah-lssGp.aplus...cpthts................splslss-uGcsYalc.p.phss...............hsGsssl .............................................hsGC.............hhstt.utp.htthpssst..........sp.AslYlY....Rssohhu......t....shthsl.a......lsu..ch....lGp.htsssahah-lssGp.aplus......ppthts................pplslsscuGpsYal+.p.p.sh...............hsG................................................ 0 17 33 52 +10841 PF11009 DUF2847 Protein of unknown function (DUF2847) Pollington J, Finn RD anon Pfam-B_002453 (release 23.0) Family Some members in this bacterial family of proteins with unknown function are annotated as YtxJ, a putative general stress protein. This cannot be confirmed. 23.10 23.10 23.10 55.60 22.50 23.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.18 0.72 -4.44 34 477 2009-09-11 05:12:55 2008-07-31 09:20:33 3 2 462 1 92 238 54 101.40 49 90.37 CHANGED hp.LpohpphcplhcpotppssllFKHSTsCsISphuhppacphhstppp..lssYaLclhptRslSNpIAccasVpHcSPQllllcNGpslacsSHhsIstssLp ............KLooI-paEpllEc...Nch..hhlhKHSpTCPIStsAY...cpFppah.Ecc...hcuYYLhVQppR-lSshIAcchsVKHESPQshYhhsGchVWNssHtsIshsuL.s..... 0 34 68 84 +10842 PF11010 DUF2848 Protein of unknown function (DUF2848) Pollington J, Finn RD anon Pfam-B_002463 (release 23.0) Family This bacterial family of proteins has no known function. 23.40 23.40 23.40 48.00 23.30 22.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.13 0.71 -4.83 37 154 2012-10-02 17:33:27 2008-07-31 09:27:34 3 2 143 0 64 156 32 191.50 44 83.57 CHANGED AGWTGRDpsAlp+HIsELstLGVs.PussPlaYRVusshLopuspl.pV.GscoSGEsEhlllps...puplaluluSDHTDRclEsauVuhSKQhCsKPlupphWchs-Vts.HWDpLhLRua.ht.s..G.ppsLYQ-GoLuulhsPs-Llsths...sps.......hss...GsuMhsGTlssh.Gsl...psAspachELpDPstsRolcHuY .............AGWsGRDtsAlpcHIcELt.t.LGVstPussPhaYRVusshLTQuspl.pVh.Gs.coSGEsEhl.Llps...sGchhVuluSDHTDRclEsauVshSKQhCsKPluppsWch.s-Vts.HWDplhLRoa.hs.s..G.-.csLYQ-GsLuulhsPp-Llpphs...tts.........thss.GsuMhsGTlus....h.Gul...csuspFchELpDPlhsRolcHsY............................. 0 14 28 48 +10843 PF11011 DUF2849 Protein of unknown function (DUF2849) Pollington J, Finn RD anon Pfam-B_002560 (release 23.0) Family This bacterial family of proteins has no known function. 22.50 22.50 24.30 33.10 22.20 17.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.99 0.72 -3.74 52 228 2009-01-15 18:05:59 2008-07-31 09:33:50 3 2 227 0 69 182 40 89.40 38 80.45 CHANGED sKVlTANcLh-GsVVahsusspWoccls-AtlhscctpuphhLttu..ttpsspVVGsYls-sp.sssG.scPs+..hREphRspGPoshhctp ...KVlTANRLhDGpsVWLsAs...GpWscslspAhlhccs-ssssL.ttu...sspsstVVsshllDVc.pp.sG.....lhP..h+...lRE+IRt.pGPTlhs...s.... 0 19 44 52 +10844 PF11012 DUF2850 Protein of unknown function (DUF2850) Pollington J, Finn RD anon Pfam-B_002587 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Vibrionaceae. 21.00 21.00 21.30 21.20 20.70 18.30 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.48 0.72 -3.80 16 208 2009-01-15 18:05:59 2008-07-31 09:42:15 3 1 111 0 20 110 1 78.70 42 55.31 CHANGED plYGpWlE.ssssYAs-phsLospGVhhNsRlluTsF-FDGphlpapsGsshhhYphssstps.pl+phpPuaY.shFhh ...clYGpWlE.ssssYts-plplsppGVhhNsRLloTpF-FDGstlphpsGsthaha-l.sptps..QhKpppss.s...hFh......................... 0 2 5 15 +10845 PF11013 DUF2851 Protein of unknown function (DUF2851) Pollington J, Finn RD anon Pfam-B_002589 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 40.10 35.00 18.50 18.30 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.19 0.70 -5.73 27 226 2009-01-15 18:05:59 2008-07-31 09:46:59 3 2 217 0 67 232 79 357.70 37 86.43 CHANGED MpEchLHalWpa+hFstps....LpTTpGc.lpllssGhhNpp.uGPDFFNA+lcIsup.hWsGNVEIHh+SSDWahHpH-pDsAYDsVILHVVaEpDs...pI.hRpsss.lPsLhLpthlspplhcpYppL.....htpcp..hlsCtspltslsshhhpsW....l-+LhhERLEpKsphlppLLppspscWEslhaphLs+sFGhplNucuF.plApslsFpslcK.......ppps.hplEALhaGpAGLLp.....tc...pD....pYhtpLpcEapaLp+KapL..pshssp.hpFhRlRPsNFPolRLAQLAsLap+p........psLFSplh.pspohp-ltplhp.lps.............SsYWcsHYsFs+topp+sKp...lucshl-lllINTllPlhasYupptupcphp.phhphlpplpsEpNs .....................EphLHYlW+a+hFshps....LpTT....sGpslcllcsGhhNps.AGPDFFNA+l+I.ssp.lWsGNVEIHh+uSDWatHtHcpD.sYcsVILHVVh....pt..Ds.....pl.h.c......p.sGp.lPpl.Lp..l.....s.pplhppY..c..pL......htspp.......a.sC..hph..lss.lsphhhpsW....hstL.hERLEpKsptIpphLppspssWEpshFhhLA+sFGaslNu-AFcphAppl.Ph.pslpK.......p+s...sh..hQlEAlhhGpAGLLp..........pp.....pD.......sYhtpLp.cEapaLp+KapL...pshssphW+FhRLRPtNFPplRlAQLAtLatpp..........psLhSplh.pspslpplpplhp.sps................SsYWpsHYpFsp.St.ppp.Kp...lupstlslllINTllPhhasYupppspcth.p...pcshphLcplpsEpNp............................................ 0 32 62 67 +10846 PF11014 DUF2852 Protein of unknown function (DUF2852) Pollington J, Finn RD anon Pfam-B_002611 (release 23.0) Family This bacterial family of proteins has no known function. 22.50 22.50 23.20 41.40 22.30 22.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.60 0.71 -4.13 35 197 2009-01-15 18:05:59 2008-07-31 09:51:09 3 1 178 0 58 151 19 110.90 47 81.94 CHANGED shLDscG+sAWIAuMVLGFIhFWPlGLALLuYhIWS....+RMh..sp................uspscptp.t.t+hth.ht...............hpoSGNsAFDsY+t-TL+RLEEEQc.......pF-sFLcRLRcAKDKpEFDpFMs-Rpp ...................s.hlcsthpsAhIAhMVLGFhl.FWPl....GLAhLAYhlau......p+ht.thpc................spttcph.h.th...cptptp.s..................hsooGNsAFD-aRp-pLcRL-EEp+.......-F-sahpcLR+A+DK-EFDpFMs-Rc.s.............. 1 11 31 41 +10847 PF11015 DUF2853 Protein of unknown function (DUF2853) Pollington J, Finn RD anon Pfam-B_002619 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 31.80 31.70 21.30 19.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.36 0.72 -3.56 29 185 2009-01-15 18:05:59 2008-07-31 10:25:34 3 1 180 4 55 137 20 101.30 48 93.30 CHANGED scYhsDlKccsut..sDh-llpKlstuhGPuIYNpDuuhVuuSDtpEL-pV+pNalhKKLGL.sD..sscLcsuIppVhEphGtopRsKaRsshYYhLsK+hsKpSs ...........s.cYhsDl+chsus..sD.sllpKlsptLG.AlhNpDuShVSuSDscELcpV+sNalhKKLGl.pD..sschDcuIscVsEshtss.RNKaRsshYYLlAKphGK.t.h.......... 0 12 40 48 +10848 PF11016 DUF2854 Protein of unknown function (DUF2854) Pollington J, Finn RD anon Pfam-B_002643 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 26.30 35.40 20.00 24.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.95 0.71 -4.67 23 116 2009-01-15 18:05:59 2008-07-31 10:33:03 3 2 100 0 54 115 127 144.00 44 69.27 CHANGED lGhlAYhs........ssssLSLsshFYGlPlLLGGLALK......uuELpP.shptssssplhtlR-p.ATsp.splhpDVTRaRYGQcAHLEsSLctLtL..h....s--p.PpLhtlcEhptpusYuLhLcF-ssuVsl-cWp-K.p-+lu+FFu.GLc............Ac.lsp ..............................hGFsAYhs........ssusLslsshFYGlPlhLhGlALK......suEL+PlP..psots-hhsLR-ppATsp.splRpDVTRaRYGpcsHL-cuLcpLtL.s.....sccphPhLptlcEp.s.h................-.G.pYsLlLcF-us.tlsLppWpc..+..QpKhspFFGPGlpAclt........ 0 12 38 50 +10849 PF11017 DUF2855 Protein of unknown function (DUF2855) Pollington J, Finn RD anon Pfam-B_002665 (release 23.0) Family This family of proteins has no known function. 27.00 27.00 27.70 27.10 25.90 25.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.78 0.70 -5.28 35 108 2009-01-15 18:05:59 2008-07-31 10:38:43 3 2 98 0 56 115 238 310.10 33 86.50 CHANGED FAlTANNlTYAshG-th.......sYWpFFPs..........ss...................uaGhlPVWGFAcVlpSpssslssGERlYGYaPhAoc...LslpPscVsssuFhDsusHRpsLsPlYNpYtRssuDstaps..sp-shphLlpPLFhTSaLlsDaLt-ps.......................a.aGA.......ppllloSASSKTAhGLAasLpt....pssuhcllGLTSssNhs.........FVculGsYDpVloYDclssLss........sssslhVDhAGsspllssLHp+hu-pLt....hsshVGsTHhcp.....ts.ssslPusp...sphFFAPsphp+Rhc-WGsssapp+hspuWpsFh..ppu......psalplpchpG.cAhppsYpchlsGclsP .................FulTANNlTYAhhG-..t..h.......tYWpFFPs..........pp...................saGhlPlWGaApVltSppssltsGpRlaGYaPhuoa...Lhl....ps.sclsttuFhDs...usHRtsLsslYNpYtp..sss....-s..ha..ps..pp-shphLhRPLFhTSahlsDalt-ps............................h.auA.......pplllsSASSKTAhuhAahLp....................tptshcllGLTSssNhs.........FscuLGhYDpVlsY..-plsslss...........ss.slhVDhuGsssl....hspL+p+huc..plt........hss.hlG...hoc.pph...........................ts.stshsusp.......sphFFAPsphpcRhp-hGsst.htp+hstuWppFh.tts......tsalplpphpG.puhppsatplhsGpssP............................ 0 16 35 47 +10850 PF11018 Cuticle_3 Pupal cuticle protein C1 Mistry J, Coggill P anon PRODOM_PD021041 Family Insect cuticles are composite structures whose mechanical properties are optimised for biological function. The major components are the chitin filament system and the cuticular proteins, and the cuticle's properties are determined largely by the interactions between these two sets of molecules. The proteins can be ordered by species. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.70 0.71 -4.12 14 170 2009-01-15 18:05:59 2008-07-31 10:41:37 3 7 37 0 121 192 0 118.90 26 69.21 CHANGED sssAVuuoppsllRSas..usVSpYSKuVDTPaSSV+KsDoRloNsshp.uhutshh........................................aAAPshs...........huAPshsphAaAAPs.hhttts.......................tPsht..ths.huAPs.........spssYAAsAshhtt.h..................................stslsY.....SPAssV...SHhoas.GhGspYua ............................................................................t.t.t..sshS..ph..p..h...h..........................................................................................................hhh.sAPsht...............hsAPs..ht...t...s.a.u.sPs.hh...tt..........s....................uPsht....p.h..httPs..............h.st.s.....................................u.............................................................................................. 0 31 42 102 +10851 PF11019 DUF2608 Protein of unknown function (DUF2608) Mistry J, Coggill P anon PRODOM_PD862984 Family This family is conserved in Bacteria. The function is not known. 22.10 22.10 22.10 27.30 21.60 22.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.52 0.70 -5.39 27 271 2009-09-11 09:18:18 2008-07-31 10:42:32 3 3 120 0 52 180 6 238.10 23 89.29 CHANGED hhpspohpcV.tchlppt......pssLllhDlDcoLl.psp.pslups.ahphphpcl......thhts.pcshcplh.chl....hl.phhphchl-sshsphlsp.lpppphslhulophs........shpthphcpLpp.hslsFssss.........h.pps.h...hh.sthspsshahcGIlhosshs+u....tsLphhLsphsthPcpIIalDsspcsltshtpthpp..tsItahGhpYsstpt......s.hsphsps.h.pptphlpschsthhhpth.............sp ...............h.pspshpcl..chltpt......pshLllhslDpsll.ps.........t.p.htppshhph.hppl..........h.s..ppshp.phh.tchl....hh.pptphchlEs..phsshlsp.hpppthslhulophs................shpthhhctLpp.hslsFspps...................pp..........stttptshahpGIlaous..hs+u....tsL.hhLpphshhPc.....pllalssppcsl.shpphhpp...hsIsahGhcYsshph.........h.hspltp.hhh.p..tchlps-hthh..p......t........................................................... 0 12 17 27 +10852 PF11020 DUF2610 Domain of unknown function (DUF2610) Mistry J, Coggill P anon PRODOM_PD199303 Domain This family is conserved in Proteobacteria. One member is annotated as being elongation factor P but this could not be confirmed. This domain is related to the Ribbon-helix-helix superfamily so may be a DNA-binding protein. 20.70 20.70 20.90 21.40 20.60 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.63 0.72 -4.20 9 66 2012-10-02 18:44:02 2008-07-31 10:43:35 3 2 66 0 20 44 4 81.70 67 73.33 CHANGED hKKF....hlsC-FGGQpuPFs.lYIGpP+s-sHPlpaQssWLScERGGsIPpcVh-SLp+LacLAccNslsFt-LCVYALs............sApps ...............YKcF....EFDCDFuGQRAKFK.FYIGTPQEGHHPLQFQAKWLSDERGGTIPD-VMcAISQ.LNDLAKKNGVPLsDL....CVYALGsAQE.s............... 0 5 9 10 +10853 PF11021 DUF2613 Protein of unknown function (DUF2613) Mistry J, Coggill P anon PRODOM_PD383784 Family This is a family of putative small secreted proteins expressed by Actinobacteria. The function is not known. 21.70 21.70 23.50 23.30 21.40 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.60 0.72 -4.26 8 144 2009-01-15 18:05:59 2008-07-31 10:44:20 3 1 135 0 33 86 0 57.40 51 94.35 CHANGED Ms........RhlsPAlASAVlGlsLGusAlhGlThhsppsopPsh.p.ussuDsulLspVEYGuR ..................M.RhluPAsASsVVGllLGuuAlFGlTLhsppDspPsl..s.uscssSS.VL.s..cVEYGsR. 0 5 20 29 +10854 PF11022 DUF2611 Protein of unknown function (DUF2611) Mistry J, Coggill P anon PRODOM_PD055124 Family This family is conserved in the Dikarya of Fungi. The function is not known. 20.40 20.40 26.30 25.80 20.20 19.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.52 0.72 -3.89 20 135 2009-01-15 18:05:59 2008-07-31 10:45:16 3 3 114 0 102 127 0 71.80 35 85.81 CHANGED MGssYpIhG+pVsSHhLAluTLGolhu.slshsstGss.....ps....ssPsIsASSc........DEEKFIppaLtch.............ppcp ..MsshYpIhG+pVssHhLAhuTLGshhu..sshh.ussGsp.....................ptt......ssPPIsASSs.........................DEE...c...FI...pcFLcphctt.......pp................... 1 27 58 90 +10855 PF11023 DUF2614 Protein of unknown function (DUF2614) Mistry J, Coggill P anon PRODOM_PD355753 Family This is a family of proteins conserved in the Bacillaceae family. Some members are annotated as being protein YgzB. The function is not known. 19.90 19.90 20.00 20.20 19.80 19.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.66 0.71 -4.29 9 167 2009-09-14 14:03:03 2008-07-31 10:46:40 3 2 166 0 33 113 7 109.80 67 95.57 CHANGED thKYSsKINKIRTFALSLVFlGhllMYlGlFF+sp.llMslFMlLGhLslIASTsVYFWIGhLST+AV.VlCPsCtK.TKhLGRVDhCMHCcpPLThD+sLEGKEFDEpYNp.....K .............................u..KYSsKINKIRTFALSLlFlGhhlhY...l...G....l...FF...+...pshllMThFMhlG.hLulIASTVVYFWIGML.STKs....VQll.......CPSC-KsTKMLGRV..D.tCMHCNpPLTLD+sLEGKEFDEKYNKK.shp............ 1 10 23 27 +10856 PF11024 DGF-1_4 Dispersed gene family protein 1 of Trypanosoma cruzi region 4 Mistry J, Coggill P anon Pfam-B_187 (release 23.0) Domain This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. Other domains on this protein include DGF-1_N, DGF-1_2, and DGF-1_5. This domain is just downstream from the C-terminus, but not the C-terminus of proteins, also annotated as being DGF-1, that constitute family DGF-1_C. 20.90 20.90 23.60 23.10 19.90 19.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.89 0.72 -3.59 20 257 2009-01-15 18:05:59 2008-07-31 10:47:44 3 4 2 0 34 257 0 73.20 79 3.14 CHANGED VDGCNRTPuMP.LSHTATLTETRShTPT......W....TPuh..STs+YSPTp.....YusTETLQVTETVALsPTRTPTA..SVSSTLWWSD ......VDGCNRTPuhP..LSHTAT..LTETRS.......T..P..T......W........TPSh.....SssHYSPTp.....YGPTETL......QVTETVALsPTRTPTA..SVSSTLWWSD...... 0 0 0 34 +10857 PF11025 GP40 Glycoprotein GP40 of Cryptosporidium Mistry J, Coggill P anon Pfam-B_197 (release 23.0) Domain This family is highly conserved in Cryptosporidium spp. Many members are annotated as being a 60 kDa glycoprotein. 19.70 19.70 19.70 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.91 0.71 -4.68 11 884 2009-01-15 18:05:59 2008-07-31 10:48:02 3 1 25 0 1 890 0 141.60 80 54.38 CHANGED EoTPKEECGTSFVMWFGEGTPVATLKCGuYTIVYAP.KDpTDPAPRYISGEVpoVTFEK..SDNTVKIKVsGpEFSTLSosSSSPTENsG.Sus.QspSRSRRSLoEEsuETsATVDLFAFTLpGGKRIEVAVPssc-suKRsKYSLVADDKsFYTGuNSGsosGlY+L ....EsTPKEECGTSFVMWFGEGTPsATLKCGsYTIVYAP.KDpTDPAPRYISGEVpoVTF-K..p-sTVpIKVsG.-FSTLSssSSSPTENsG..Sus...QspSRSRRSLo.EE....su..Es.ATVDLFAFTL.cGGKRIEVAVPssE-soKRscYSLVAsDKs.FYTGuNSGsosGlY+L.................... 1 1 1 1 +10858 PF11026 DUF2721 Protein of unknown function (DUF2721) Mistry J, Coggill P anon Pfam-B_520 (release 23.0) Family This family is conserved in bacteria. The function is not known. 28.30 28.30 28.60 28.50 26.80 28.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.53 0.71 -4.51 95 398 2009-01-15 18:05:59 2008-07-31 10:48:42 3 1 349 0 183 381 997 124.60 29 85.49 CHANGED shssPshLlsuIuhLLhshosRhspl...ssphRpLp....sp..hcs.pps.th......ptpl....ppLc+.RhpLIppshhhushohllsslshhhlFl...........shshhsshlFshuhlhLhhuLlhhLhElpluspuLclph .................hssPuhLhsAluhlLhuhoNRhhplsshlRpLp.............sp....hpp....stsshh..............ttp.lpsL++Rhpll+th.hhushShhlsslshhhlal............shphhsshlFshullhLlhoLhh.lhElploscuLclp........ 0 49 119 161 +10859 PF11027 DUF2615 Protein of unknown function (DUF2615) Mistry J, Coggill P anon PRODOM_PD288703 Family This small. approximately 100 residue, family is conserved from worms to humans. It is cysteine-rich with a characteristic FDxCEC sequence motif. The function is not known. 20.80 20.80 21.20 34.70 20.70 20.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.84 0.72 -4.09 13 97 2009-01-15 18:05:59 2008-07-31 10:55:39 3 2 82 0 60 106 0 96.60 47 97.95 CHANGED MuDs......FDsCECIaSHEtAMRRLlsLLRQSQuYCTDoEC.pDlPu.P..ptsusus...shhhlhhsWhllAhsLYlhRPs.....ohRssp......sssKPpsspsssuss...PPsPsls ......Mu-s..FDPCEClhSHEtAMRRLIslLR.QSQuaCTDoEChp-.lsG.P.......uu..sss.........shhhlh..hsWhllAhhLallRPs.....shRssp.........hss.KPssspssps...........PPsPs..s............................ 1 23 28 42 +10860 PF11028 DUF2723 Protein of unknown function (DUF2723) Mistry J, Coggill P anon Pfam-B_590 (release 23.0) Family This family is conserved in bacteria. The function is not known. 24.20 24.20 24.30 24.90 23.80 24.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.09 0.71 -4.79 45 364 2012-10-03 03:08:05 2008-07-31 10:55:57 3 5 295 0 152 370 354 175.40 39 18.99 CHANGED DsGEaIusuhpLtVuHPPGAPLahhlGph.Fohhs...huplAhtVNhhSuluSAholhhhaaslshLltc.h..............tphstsphhshhuuullGALAauFSDoFWFsAVEuEVYAhuuhh.sAllhalhLpWccph.ps.............css+WLlllualhGLSh.GlH...hhsLLslPAluhlaaa+ca.tploh+s .....................................DsuEaIssuhcL.pVuHPPGAPhFhLl.up.......l..F.o.h.F....s.....pss....plAh...hV.N.hhSuLhSAsslhFLFaolo+Ls+c.hh.................t.pths.hsphlhlhuuGllGALsaoFSD.oFWFsAVE..uE.V.YAhuuhF.sAllhWL....hL+.....W.-c.tssps................cusRWllLIAalsGLSl.GVH...hlsL....L.s...lP...AIshlaa.a+ch..p.p...t...................... 1 64 106 129 +10861 PF11029 DAZAP2 DAZ associated protein 2 (DAZAP2) Pollington J, Finn RD anon PRODOM Family DAZ associated protein 2 has a highly conserved sequence throughout evolution including a conserved polyproline region and several SH2/SH3 binding sites. It occurs as a single copy gene with a four-exon organisation and is located on chromosome 12. It encodes a ubiquitously expressed protein and binds to DAZ and DAZL1 through DAZ repeats [1,2]. 25.00 25.00 33.60 31.60 23.60 23.40 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.06 0.71 -3.71 9 94 2009-01-15 18:05:59 2008-07-31 11:08:40 3 3 68 0 56 93 0 128.90 44 53.94 CHANGED YssAPPu...YpphYpsuashPPsA..ths.hsAsa....Pss.hhhP..stshslushuppsPMtaYPhG..........sVYPsGu..........TVhVpGG.aDAGARFGsGsu.s.oIPPPPPGpsPNAAQlAAMQGtNVlhTQRKsNaFhGGSsGGYTIW ...................................................................................................................PPs....Ypth..Ypss...as.h...ssss....thsthsush....Pu..ss.ha..hPh...spshsl....u..sh.up.s.....h.P..h...u.YY.PlG..................s....hY..PP...Gu..................olhVpuu..a.DAGARF.ss.u.us.sssIPPPPPGCsPNAAQLAsMQGtsVlloQ+KssFFhGGosGGYThW..................... 0 16 20 36 +10862 PF11030 Nucleocapsid-N Nucleocapsid protein N Pollington J, Finn RD anon PRODOM Family This is the N protein of the nucleocapsid. The nucleocapsid functions to protect the RNA against nuclease degradation and to promote it's reverse transcription [1]. The NC protein promotes viral RNA dimerisation and encapsidation and initiates reverse transcription by activating the annealing of the primer tRNA to the initiation site [2]. 19.60 19.60 22.00 26.80 17.30 16.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.20 0.71 -4.44 2 41 2009-01-15 18:05:59 2008-07-31 11:15:03 3 1 5 0 0 33 0 133.30 84 100.00 CHANGED MNSMLNPNAhPhQPpPQVVAhPhQYP.GFpPtFRRpRNPGFRPMFpRR..NNuNQNRuRQsR.RlQNppRG..hNhpsstQRuNRRQ.NQ.S.slPFEQQLLMMANETAhuATaPPEhQslAPTKLVKIAKRAAMQIVSGHATVElSsGspDos++lATFTIKVshN ...MLNPNuhPFQPQPQVVAMPIQYPMGFQPRFRRRRNPGFRPMFQRR..NNSNQNRSRQsRsRIQNQRRG..lNoSRTQQRANRRQNNQQSLSLPFEQQLLMMANETALSATFPPELQSLAPTKLVKIAKRAAhQIVSGHATVElSsGppDos++lATFhIKVshN 2 0 0 0 +10863 PF11031 Phage_holin_T Bacteriophage T holin Pollington J, Finn RD anon PRODOM Family Bacteriophage effects host lysis with T holin along with an endolysin. T disrupts the membrane allowing sequential events which lead to the attack of the peptidoglycan. T has an usual periplasmic domain which transduces environmental information for the real-time control of lysis timing [1]. 19.80 19.80 19.90 22.40 18.40 19.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.30 0.70 -5.19 5 43 2009-01-15 18:05:59 2008-07-31 11:27:23 3 1 38 0 0 44 0 200.40 49 98.42 CHANGED scVStuh+u-lLhslLDRLFKDssoGclLlpRVhlllLLFlMAllWYsssElFuaYKco+YETYsEIlQsERsc+FEsAApEQLQIVHVSScADFSuVaSFRPKNLNYFVDlIAYEGKLPoTlsEKsLGGaPVNKTS-EYpVHLsG+HYsScp-F...AYLPo+ccohE...lsYMYSCPYFNLDNIYAGoIoMaW++KPc.Is-.....E+LssICNQAARILGRA+ .........................s-lLFGlLDRLFKDsuTGKVLhSRVhllllLFlMullWY+s-plhshYKpopapsYschlpp-+ss+F-ssAhEQLQIVHlSStADFSAVaSFRP+NlNYFVDllAYEG+LPsolscKsLGGaPlDKTSpEYpsHLsGppa.Sspch......saLPo+..c.s.c...hsYMYSCPaFNLDNlYAGoluhaWhc.c.Pp.lsp.......-+LpslCuQAuRhLGRs+... 1 0 0 0 +10864 PF11032 ApoM Apolipoprotein M (ApoM) Pollington J, Finn RD anon PRODOM Family ApoM is a 25 kDa plasma protein associated with high-density lipoproteins (HDLs). ApoM is important in the formation of pre-ss-HDL and also in increasing cholesterol efflux from macrophage foam cells [1]. Lipoproteins consist of lipids solubilized by apolipoproteins. ApoM lacks an external amphipathic motif and is uniquely secreted to plasma without cleavage of its terminal signal peptide [2]. 20.60 20.60 21.00 20.70 20.00 20.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.31 0.71 -4.72 4 77 2012-10-03 08:47:39 2008-07-31 11:35:15 3 2 40 5 37 102 0 164.30 37 93.66 CHANGED MhHplWsaLLYLYulhhsSlu.CPu.s.LsssGlctppFPp.aLGpWYFlAuAA.PspctLATFcPlDNlhFshttuuss.pL.LRAsIRhKsGhCVPRcWhYhLscGsT-LRhEG+Pch+TpLFS...usCPcsIILKET..spuYpRhLLYuRpPpsstcsVp-FpshsuChsao.hL.hPppQ-ACploS .......................................h..hh.hh.lhhts....C.....s.lss.shstpph...hLGpWaFlAuAu...spppLtphpshD..shhhshssuoss.pLplptslRhcpGhClscpW.h..Y.pLs..p..s..o.ss..L.p..h...E..G+.sc...h+spLas...ssCPss.IhLp.Eo.........spsapR....hL.LYsRsscss-cslEEFcuhspCLshc..thh.hP.ppthC.h........................... 1 1 3 18 +10865 PF11033 ComJ Competence protein J (ComJ) Pollington J, Finn RD anon PRODOM Family ComJ is a competence specific protein [1]. 25.00 25.00 25.90 25.40 24.70 24.30 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.37 0.71 -4.49 7 123 2009-01-15 18:05:59 2008-07-31 11:38:10 3 1 121 0 16 69 0 128.10 58 96.00 CHANGED M.......ELoISYpQhhlhph-upPPslDWTDEshE+GYApuDsAV.FEAlpshpspltlpLssphphtuh.RplTVPFpVtp-slhIpSlhSp+lphsIPpGcYpLsh.osP...spcs-Lat.pY.l.Fpsh .............MELTISYSQLMLMNYDGEQPYVDWTDEDFERGYAcADGoV.IFEA.LSDY..T..CEVKVTsGKH..I.E..K.EEV.lRTloVPFTV.c.N.E.sIslTSILSN.K.FpIPIP.NGEYTVVLQAsPLEEPTDDELYKIQYEFFFES.K....................... 0 4 10 12 +10866 PF11034 DUF2823 Protein of unknown function (DUF2823) Pollington J, Finn RD anon PRODOM Family This family of proteins are possible glucose repressible proteins however this cannot be confirmed. Currently, no function is known. 25.00 25.00 34.90 32.90 24.20 24.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.23 0.72 -3.85 11 69 2009-01-15 18:05:59 2008-07-31 11:43:39 3 3 46 0 56 63 0 67.40 55 89.33 CHANGED METlK....NAuNYVuEoVQuAsusASKEANKpVAKDSsAuluTRssAAtDAlsDKt-EppHDAKA-sHKpu ..................M-olK....pAuNYVoEpVQuAousASKEANKpVAKDS...sA...ulGTRsoAAtDAluDKhcEppH-ucA-saKpt........... 0 18 32 49 +10867 PF11035 SnAPC_2_like Small nuclear RNA activating complex subunit 2-like Pollington J, Finn RD anon PRODOM Family This family of proteins is SnAPC subunit 2-like. SnAPC allows the transcription of human small nuclear RNA genes to occur by recognition of the proximal sequence element [1]. 21.50 21.50 22.00 23.60 20.40 21.40 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.03 0.70 -4.96 3 65 2009-01-15 18:05:59 2008-07-31 11:57:22 3 2 32 0 36 49 0 234.00 43 98.19 CHANGED MKPPQRRRAlPARYLGEVTGPAAWSAREKRQLLRLLQARQGQPEPDAAELARELRGRSEAEIpcFIQQLKGRVAREAIQKlHPGGhcGPRRpETQsPAPIEVWMDLAEKITGPhEEALTVAFSQVLTIAATEPVSLLHS+PuKPTQARGKPL.LLSAPGGQEDPAPEuSSPAPtAPu.........sscssGSsP+TPG.......PAPEAPSESLAGsSTEcDFAVDFEKIYKYLSSVSRGG+GPELSAAESAVVLDLLMALPEELS+LPCTALVEHMTcTYt+LTAPQssLAGGuLu.PGTEDuGAGS+GPEETsQASPQAoEsAtpSEP+SuWQAAGICPLNPFLVPLELLuQAsoPAR .................................................................................................................................................................ltsalp.LKt.sspEslpp...th..t.Rpppsp..APIElWhDLAc+lTGshEEulssAFSQhLTIuAsEPloLhaS......hPs+s.spsps+.h..hp.s.t.ppp.ss..sts...s......................................................................u.ss.s...hp....ph..........VDFEKIYKYLSphu+sspuspLS..s.s..ESAVlLcLLMuLPEELshLPCssLhcHhhpsYhpL..hu..Pp.s....ust....s.s....sss...Essssssp.s....pps.....s....s.......st.s.................p.....hptht......hs.PLNPFhlPLphLtp......................................... 0 1 3 9 +10868 PF11036 YqgB Virulence promoting factor Pollington J, Finn RD anon PRODOM Family YqgB encodes adaptive factors that acts in synergy with vqfZ , enabling the bacteria to cope with the physical environment in vivo, facilitating colonisation of the host [1]. 20.10 20.10 22.90 50.00 18.50 15.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.18 0.72 -4.45 2 382 2009-01-15 18:05:59 2008-07-31 13:08:21 3 1 379 0 6 24 0 43.00 84 96.41 CHANGED MKKKPVAQhE+Q+.LLENPhsYGLLSphphAIVVNCFTLsph. MKKKPVAQhERQHoLLENPCAYGLLSQFQAAIVVNCFTLNKII... 1 1 1 3 +10869 PF11037 Musclin Insulin-resistance promoting peptide in skeletal muscle Pollington J, Finn RD anon PRODOM Family Musclin is a muscle derived secretory peptide which induces insulin resistance in vitro. It encodes a 130 amino acid sequence including a NH(2) terminal 30 amino acid signal sequence. Musclin expression level is tightly regulated by nutritional changes [1,2]. 20.70 20.70 22.40 21.60 18.20 17.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.61 0.71 -4.53 3 49 2009-09-11 15:35:46 2008-07-31 13:09:12 3 1 36 0 27 34 1 123.70 65 94.62 CHANGED MLDWRLASVHFILAlTLMLWuSGKVLSsDlAocsF-.StslulpuPPTAoEEKSAT-LAAKLLLLDELVSLENDVhETKKKRSFSGFGSPLDRLSAGSVDHKGKQRKVVDHPKKRFGIPlDRIGRNRLuNSRG ...........MLDWRLsusHFI....LAlo.LhhWSSGKVlSs-ss.o.E.A...FD....SullclQS.sPTsp..EEKSATDLsAKLLLLDELVSLENDVIETKKKRSFSGFGSPLDRLSAGSVDHK.GKQRKsV-hPKRRFGlPhDRIGhNRLssoRG.......................... 1 1 3 9 +10870 PF11038 DGF-1_5 Dispersed gene family protein 1 of Trypanosoma cruzi region 5 Mistry J, Coggill P anon Pfam-B_157 (release 23.0) Domain This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. Other domains on this protein include DGF-1_N, DGF-1_2, and DGF-1_4. This domain is just downstream from the C-terminus, but not the C-terminus of proteins, also annotated as being DGF-1, that constitute family DGF-1_C. 25.00 25.00 39.10 39.10 17.90 16.70 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.88 0.70 -5.19 3 286 2009-01-15 18:05:59 2008-07-31 13:12:31 3 5 2 0 52 286 0 248.30 89 11.79 CHANGED AGGSLT.sDIRso.....GuAVPc+LhVALPPPFR..WARDPQLGTHLoF.shsSpuQPsGauGPWGE.MLRNATWVRNATNPSTVLELAVPVHRGYFIGADETIVIRCDAVAVaGGCKGVLLGuFTIsSNTPPAlASALSAITGVVAGAAAVAVVVTGGLGSILEMQALGVFARMSCASAQERASTVALPYFLSVFAALDPLWMVVGNALLAAVFGCVHYGVTAAFQRWRGVDAASAWAAMRFPSLTYVVAHAMHLGIFFGSVFALAMPGARVQHYVIGVVGVLYG ....AGGSLTQNDIRGG.....GSAVPThLMVALPPPFR..WARDPQLGTHLSF.VPVSTAQPpGFGGPWGA.MLpNATWV.RNATNPSTVLELAVPVHRGYFIAADETIVIRCDAsAV.GGCKGVLLGSFTIRSsTLPAAASALSAITGVVA.GAAAVAVVVTGGLGSlLEMQALGVFARMSCASAQERASTVALPYFLSVFAALDPLWMVVGNALLAAVFGCVHCGVTAAFQRWRGVDAASAWAAMRFPSLTYVVAHAMHLGIFFGSVLALAMPGARVQHRVIGVVGVLYG............... 0 0 0 52 +10871 PF11039 DUF2824 Protein of unknown function (DUF2824) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. Some members in the family are annotated as the P22 head assembly protein gp14 however this cannot be confirmed. 25.00 25.00 25.00 25.20 24.20 24.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.87 0.71 -4.49 3 167 2009-01-15 18:05:59 2008-07-31 13:13:44 3 1 163 0 4 76 0 148.80 65 99.31 CHANGED MITFpPTRNIDLIEMVGNHPDIIAGSNNGDGYDYKPECRYFEVNVHGQFGGIVYYNEIQPLTFDCHAMYLPEIRGFSKEIGLAFWRYILTNTTVQCVTSFAARKFRHGQMYCAMIGLKRVGTIKKYFKGVDDVTFYSATREELIDFLNHGR ........................MIpFpPTRNIDLIEhVGNHPDIIAGSNNGDGYDYKP-CRYFEVNVHGQFGGIVYYpEIQPL...TFDCHAMYLPEIRG.FSKEIGL.AFWRYILTNTTVQCVTSFAARKFRHGQMYCAMIGLKRVGTIKK.YF.K.G..VD...DVTFYSATREELIDFLNHGR...... 0 1 1 1 +10872 PF11040 DGF-1_C Dispersed gene family protein 1 of Trypanosoma cruzi C-terminus Mistry J, Coggill P anon Pfam-B_30 (release 23.0) Domain This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. This is the very C-terminal part of the protein. 25.00 25.00 49.90 48.00 20.30 19.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -9.84 0.72 -4.09 26 256 2009-01-15 18:05:59 2008-07-31 13:20:46 3 4 2 0 31 256 0 80.50 80 3.82 CHANGED WYAEDRHWQELREPRRGGLEALLRDDEESDE-TQKPH-hTSSSYASGTTsASSYRPPAP.....................QPMAGDTRSDALSLhDRASSASspIs .WYAEDRHWQELREPRRGGLEALLRDDEESD.EETQKPH.-.MTSSSYASGTTVAS.SYRPPAPP.....................QshAGDTRSDAhSLhDpuSoAut.l............ 0 0 0 31 +10873 PF11041 DUF2612 Protein of unknown function (DUF2612) Mistry J, Coggill P anon PRODOM_PD048079 Family This is a phage protein family expressed from a range of Proteobacteria species. The function is not known. 22.90 22.90 23.00 23.70 22.70 22.80 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -4.72 18 287 2009-01-15 18:05:59 2008-07-31 13:21:32 3 2 240 0 38 254 0 165.80 26 78.34 CHANGED shtshllhQYpspP+hhuhlpthtshh.sshhphlss.lsslaDlDsAsGhtLDllGchVG.luRhlps....t.aFuF..shtuhsa.......stu.ahs.hcstsssstLsD-sYRhLI+sKlhpNhhcsTlsslsshhp.lFuss...sallDshDMo..hsssVssphhoshhhtllpphsllP+PsGVplpa.ll ..........hhthlhspatspPph.shlphhsp.h..sstshhps.h.phaslDoA.GhtLDlhGphlG..hsRhls...s...t.ahua.....s...t..s.hsa..................spus..a...s.s.p.s..s.ss.......htLsD-tYRhll+sKhhtNhh.cuossslsthLchhassp...phhlh....Dst-Mo....hhlh.p....t..hssh...ph...thlpp....h.....lh..s+ssGV.h.....h.......................................... 0 3 17 27 +10874 PF11042 DUF2750 Protein of unknown function (DUF2750) Mistry J, Coggill P anon Pfam-B_609 (release 23.0) Family This family is conserved in Proteobacteria. The function is not known. 21.60 21.60 22.10 21.80 20.90 20.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.36 0.72 -4.01 71 710 2009-09-11 13:47:04 2008-07-31 13:22:01 3 2 507 0 130 400 30 105.20 33 68.29 CHANGED RhphFl..pcltpppplWsLpcccG..alhhsos-t-s........hPlWsscchApt..asss-WpchpstsIsLscah-pWlssLpcDslhlul...stsp.pGhhlpPp-lsppLt .........RhphFl..p-ltcpppVWuLpccpu....alhlsss-p-p.........hhPlWsp+ptApt..hss..-..-...Wt.-h.cshsIshshFhEtWLssLc-Dsht.Vu.lsh.sssh..GhllpspcLsp-L.................. 0 27 54 99 +10875 PF11043 DUF2856 Protein of unknown function (DUF2856) Pollington J, Finn RD anon PRODOM Family Some members in this viral family of proteins with unknown function are annotated as Abc2 however this cannot be confirmed. 25.00 25.00 71.00 70.90 24.30 23.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.19 0.72 -3.87 3 202 2009-01-15 18:05:59 2008-07-31 13:23:26 3 1 181 0 3 78 0 94.10 88 99.85 CHANGED MPAPLYGADDPRRCSGNSVSEVLDKFRKNYDtIMSLPQETKAER-FR+sIWLAEKpEKERIpQTSIRPFRKATYTKFIE.IDPRLRNYRSRYGAISNN ................MPAPLYGADDPRRCSGNSVSEVLDKFRKNYDhIMSLPQETKEEKEFRHCIWLAEKEERERIYQTSIRPFRKATYT+FPE.IDPRLRNYRSRYGAISND 0 0 0 0 +10876 PF11044 TMEMspv1-c74-12 Plectrovirus spv1-c74 ORF 12 transmembrane protein Mistry J, Coggill P anon PRODOM_PD019016 Family This is a family of proteins expressed by Plectroviruses. The plectroviruses are single-stranded DNA viruses belonging to the Inoviridae. Except that it is a putative transmembrane protein the function is not known. 20.80 20.80 20.80 22.00 20.40 20.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.45 0.72 -4.41 4 35 2009-01-15 18:05:59 2008-07-31 13:24:13 3 1 6 0 0 33 0 48.60 76 87.95 CHANGED MPTWLTTIFSVVIlLulFhahGL.IYQKIRQIRGKKK-KKEIccKEspc ....MPTWLTTIFSVVIlLGIFsWIGLSIYQKIKQIRGKKK-KKEIE+KEspK... 0 0 0 0 +10877 PF11045 YbjM Putative inner membrane protein of Enterobacteriaceae Mistry J, Coggill P anon PRODOM_PD079046 Family This family is conserved in the Enterobacteriaceae. It is a putative inner membrane protein, named YbjM, but the function is not known. 25.00 25.00 31.30 30.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.78 0.71 -4.36 6 477 2009-01-15 18:05:59 2008-07-31 13:25:27 3 1 474 0 30 104 2 118.90 78 99.49 CHANGED MtppphWhuhhsCFlLF.lVhL..thtspGuFc.uuG+sElGLLhFlLPGAVASahSpR+RlL+PLhGAllAAPlCLllh+LahsPsRSFWQELAWlhSAVFWCuLGALCaLFlpoLhpthRp+..pR ..........MKHKptWAuslCCFVLFIVVCL.Ls..hHM..KGAFR.AAGHPEIGLLFFILPGAVASFFSp....RREVLKPLFGAMLAAPCSMLIMRLFFSPTRSFWQELA.WLL.SAVFWCALGALCF.LFISSLFKsQHRKN.Q...... 0 1 5 17 +10878 PF11046 HycA_repressor Transcriptional repressor of hyc and hyp operons Mistry J, Coggill P anon PRODOM_PD091544 Family This family is conserved in Proteobacteria. It is likely to be the transcriptional repressor molecule for the hyc and hyp operons, which express, amongst others, the protein HycA. This protein may be harnessed for the reduction of technetium oxide, an unwelcome product of radio-nucleotide bioaccumulation. HycA produces formate hydrogenlyase, one of the key proteins necessary for metal compound reduction [1]. 25.00 25.00 91.40 91.30 21.50 19.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.85 0.71 -4.26 4 465 2009-01-15 18:05:59 2008-07-31 13:26:04 3 2 462 0 21 85 0 147.90 88 96.20 CHANGED MTIWEISEKADYIApRHRpLQ-QWHhYCNSLVQGITLSKARLHHAMSCAPpc-LCFVLFtHFtIaVsLA-GFNSHTIpYaVEsK-Gp-+pLIAQAQLshDGhlDG+Vs.RDR-QVLEHYL-KIAsVYDsLYsAlEpDhPVcLSpLlhuc ..MTIWEISEKADYIAQRHRRLQDQWHIYCNSLVQGITLSKARLHHAMSCAPDKELCFVLFEHFRIYVTLADGFNSHTIEYYVE..TK...DGEDK...Q.R...IAQAQLSIDGMIDGKVNIRDREQVLEHYLEKIAGVYDSLYTAIENNVPV..NLSQLVKGQ........ 0 1 3 12 +10879 PF11047 SopD Salmonella outer protein D Pollington J, Finn RD anon PRODOM Family SopD is a type III virulence effector protein whose structure consists of 38% alpha-helix and 26% beta-strand. 25.00 25.00 27.20 26.00 24.00 23.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.79 0.70 -5.47 2 272 2009-01-15 18:05:59 2008-07-31 13:26:32 3 6 136 0 9 152 0 274.40 60 89.82 CHANGED MPVTLSFGN+pNYplNcSRLA+LhSsDKEcAlaMGsWD+hpDpFRscKKpcsLEVLaolIHGptRtp.uEhpVslpshpKIaAFppLpchAsPupQDhFsh+hDhsQTQhLh.lsspVIspsNl+clLNlS-sslhcsMpc-EcpLFLpIC.hhGtKhohaPELLpt.hspL+ctVstssplKstVYchMRsuEs.ch.hVEWpsoLTE-EKshLtClphGsFp.TTQFhKIGY.ElpGEVhFsMhHPslSYLLpsYpP...u-hh.TNoh.F.chLNpDYsDYpspKh.IDsILc+lYhoHppoLaIuccussRNhLl ..............MPVTLSFGN+p.NYplNcSRLA+L..hSuD.K.EcAlaMGsWD+hpDpFRscKKpcALEVLaolIH...Gp.GRtc.uEh-Vsl-shs..K....IaAFc+LpphAsP...up..QDhFsh+hD.ho..Q..T...QhLhhlsspVISpsN.l+..clLNlSDssVlcsMsc-EcpLFLpIC.hhGtKhoh+PELLQp.hspLRctVstsspIKstVYchMRPuEs.chshVEWpsoLTtDEcshLsClphGsF-.oTQFhKIGY.El.pGEVhFsMhHPsl.YLL+uYpP...s-Fp.oNoh.F..chLNpDYsDYpspKh.IDsILc+lYhoHppoLaIucsussRNhLl..................................................................................... 0 2 3 5 +10881 PF11049 KSHV_K1 Glycoprotein K1 of Kaposi's sarcoma-associated herpes virus Mistry J, Coggill P anon Pfam-B_38 (release 23.0) Domain This is a highly glycosylated cytoplasmic and membrane protein similar to the immunoglobulin receptor family that is expressed as an inducible early-lytic-cycle gene product in primary effusion lymphoma cell-lines. This domain would appear to be the cytoplasmic region of the protein [1]. 20.90 20.90 21.30 43.00 20.40 16.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.59 0.72 -3.99 13 1340 2009-09-11 12:24:34 2008-07-31 13:28:52 3 4 3 0 0 1052 0 67.90 71 41.93 CHANGED YTLTCPSsASLPISWYCNsTRLhRL.Tsp.TlTl.ss.lTCNFTCVsQSGHpHSIWIpWasQPVLQTLCAQPSNT .YTLTC.SssSLPISWYCNsTRLhRLTpp.olTV...so.l.s.CNFTCVpQSGHRpSIWITWasQPVLQTLCAQPSNT................ 0 0 0 0 +10882 PF11050 Viral_env_E26 Virus envelope protein E26 Pollington J, Finn RD anon PRODOM Family E26 is a multifunctional protein. One form of E26 associates with viral DNA or DNA binding proteins, while a second form associates with intracellular membranes [1]. 25.00 25.00 69.30 69.20 20.30 20.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.53 0.71 -5.13 8 16 2009-01-15 18:05:59 2008-07-31 13:29:20 3 1 13 0 0 16 0 186.80 49 99.87 CHANGED MEosp.....lsssaAs.K..tuAlss.....hV+TVVTTTssSspstsp......cs+IsQlIAQLp+TRLsFsKLopLQ+KRVRNMQ+LlRKKNplIAsLAApLpsppp........+...sKaFAVshsKNllhThSGSEpFVRpRVA-LCAh.GGEQVFCuRRADCARDRpRlAcALssSLGuGVlspusNKRFEIh-s-KlVSAKLIlQQVLHDGhcuDssAa .........MEosp....h.sstaAs.K.pthAVss....ohs+TVsTTTssSshscp.....-ppp+IsplIupLppT+LsFsKlpplp+K+lRphQpLlRKKNpIIAsLstpLpstpc................hKaFuVshscNslhTh.GsEpFVRpRlA-LCsh.suE.VFCttRsDss+DRptlAphLssuhGutVlshtsN+RFEhlps-clsSuKhll.phLpDt.puDhsAa..... 0 0 0 0 +10883 PF11051 Mannosyl_trans3 Mannosyltransferase putative Mistry J, Coggill P anon Pfam-B_379 (release 23.0) Family This family is conserved in fungi. Several members are annotated as being alpha-1,3-mannosyltransferase but this could not be confirmed. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.95 0.70 -5.05 117 725 2012-10-03 05:28:31 2008-07-31 13:29:41 3 19 160 0 543 759 22 250.80 23 44.84 CHANGED u+GIVhsu.......Gs...th.hh.h.thl+hLRp.hG.....spLPlEllh.s..s-h..spchppplls............................................hls....spsllhsshhsp.hh...........................s..ap.......hKhlAllhSSFEcllhLDADslslps.P-p.hFp..scsappsG.hlhW...-hhppshsshhhplhs.....................h.....tt...t.......................th.hpph.sth.ptssESG.llls..Kp....pHhpsLLLuhaaN.hasss.......hha.hhs.G......D.....KEoFhhAshhh............................s..psYa.spp.ssslGhhst..........................................p.phs.usthuphD ............................................................................................................tcGIVhss.........us....phh.hsh..thl+hL.Rp.hG.....spLPlElhhhs..s.-h.......spphpptlhs................................................h.ls...........sp..s.l..hh..p......phhsp..hh....................................s.ap....h.KhhAll.hSoF-c.llhLDuDslshp....s.Ps.........h.lFp................sp.sap..ps.G.hlha........s.....hh..p..p...p..h....tshhhp.lhp.........................................................................................tth.tth...phptESG.llls..Kp....p..H.h..ts.Lllshahs..hht.p........h.h...h.hs.G.......DKEhFhhuhhhh............................s...psahhspp..s.ssh.Ghhpt.......................................................thpu.thhphs................................................................................................................................................................. 1 117 294 503 +10884 PF11052 Tr-sialidase_C Trans-sialidase of Trypanosoma hydrophobic C-terminal Mistry J, Coggill P anon Pfam-B_103 (release 23.0) Motif This is a highly conserved sequence motif that is the very C-terminus of a number of more diverse proteins from Trypanosoma cruzi. All members of the family are annotated putatively as being trans-sialidase but this appears to be a diverse group. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.89 0.72 -7.06 0.72 -4.06 45 797 2009-01-15 18:05:59 2008-07-31 13:30:14 3 9 3 0 143 798 0 24.70 62 3.66 CHANGED uuos+GsshL...LLLL..LLGLWG..hAAl ...uuTs+G..us..lL..PLLLL..L.L.GLWG.FAAL..... 0 0 0 143 +10885 PF11053 DNA_Packaging Terminase DNA packaging enzyme Pollington J, Finn RD anon PRODOM Family Phage T4 terminase functions in packaging concatemeric DNA. The T4 terminase is composed of a large subunit, gp17 ad a small subunit, gp16. The role of gp16 is not well characterised however it is known that it binds to double-stranded DNA but not single stranded DNA [1]. 25.00 25.00 30.30 29.60 19.80 18.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.91 0.71 -4.61 7 72 2009-01-15 18:05:59 2008-07-31 13:33:08 3 1 69 15 0 59 837 144.80 36 88.29 CHANGED MsD..L.DhspLhDlsu..lPG.lpuE-spV.YpPlVLp-VcSpPpsRshDL-cDYslVRcNhHFQpQMLMDhAKIhLEsAKNu-SPRahEVFusLMsQMTssNKElL+lHK-MK-IT.pphtp...uupsshpsplQNssl.....FhGSPs-Lh-.ElGD..p. .................................................htt....G....s.p.....apP......l..lpSp....P...p.s...cssDlcpDYphsRpshH.hQpphhh-AuchhLEsA+so-uPRthEVFupLhpphssssccLlcLpKcMK-los-p..t.p..ts.ssspsshpNs.Tl.....FhG....Sss-Lhc.plts.......................... 0 0 0 0 +10886 PF11054 Surface_antigen Sporozoite TA4 surface antigen Pollington J, Finn RD anon PRODOM Family This family of proteins is a Eukaryotic family of surface antigens. One of the better characterised members of the family is the sporulated TA4 antigen. The TA4 gene encodes a single polypeptide of 25 kDa which contains a 17 and a 8 kD polypeptide [1]. 25.00 25.00 63.50 63.40 18.30 17.90 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.83 0.70 -5.00 12 29 2009-01-15 18:05:59 2008-07-31 13:51:46 3 1 2 0 0 29 0 240.70 41 97.62 CHANGED hshhsllpsSLLhlu.s..GputssttsspYTAshGtslpCLuElNuARcAAGLssFt-A.ossppLscPssp-h....psuopW+.clCcaLlP..ps-ssstusshpPFptGTYAFKsLTsspssCK-sV-YWKuAacNFoG.LPPocspuss..lYssQpNVSFVALYNPpusAoADCpVVTCTpssssss..........ussplpu-stpt.........spsGaAlICKThPuAF.s-sosPFTp-QWcKIVsSLTGS........sSsshPSlsshhIsshuhhuL ......................h.h.sllphSLLhl.s.....upptsptttsphTAs..tslcCLsEhNssRcAAGLssFtp.A.sssthLspsuspph.....psso.Wp.clCptlls..pscss.spssshs.F..GTaAatslTsspssCK-sV-YWKuAacsFsG.LPPohptsss..lYscppsVSFVALYNPpssss.AsCtllTCTpssss........................ussplpu.-sttt.........sppuhAllChT.PsAh.sssosPFTp-QWcKIlpSLoGS........sSsssPolsshhlsshuhhuL..... 0 0 0 0 +10887 PF11055 Gsf2 Glucose signalling factor 2 Pollington J, Finn RD anon PRODOM Family Gsf2 is localised to the ER and functions to promote the secretion of certain hexose transporters [1]. 25.00 25.00 46.40 46.30 17.70 17.40 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.29 0.70 -5.94 9 45 2009-01-15 18:05:59 2008-07-31 14:01:25 3 1 44 0 29 42 0 367.10 46 95.97 CHANGED hElYlRhNDDhE+DYsFQlcpscThcs.lhKIFs.......oh.hsLRPSlFacscPlGFpKSspPGYLTEsGsLlFcY-Ascpcahcpls..cchlh-plWPGQLILPhWchsphshasFlslhlsWLYTDLPDhISPTPGICLTNQLSRhhh.lAppash.clAppLht-lp.s.sulsAQhLFFlhHllKllhIhhhhYhGhhNPlphN.aphhsh+pph..................hKcpLhslGWhGARRAThD-Yp-hYhpYtIccaGGhlpAaRA..Glhcphts.GlpLusGEGFpTPL-c+hThsThpshcpct.....KFhLS.-YFspLtpshcp.l-cpsG..uchsspI+pFRRaGlhcss-clpclVphRKthsspcc..............p.KhE .MEIYlRhNsDhE+DYsFQVss-cThps.lhKIFs............u..hsLRPSIFacpcPltFhKShpPGYLTEsGsLlFcY-Aspcc.lppls.csc.lh-plWPGQLllPcWchschshasashlhLsWLYTDLPDhISPTPGICLTNQlSRhh.h.lAcp.hshsclAs+LtpElp.s.sulsAQhlFFlhHlhKlslItLFLhhGlhNPISFNshphhs.l+s.s.......t.............s.hKppLpolGWhGA+RAThD-YpspYhsYhIcKhGGhVtAa+A..GhhcphtssGlpLssGEGFpTPL...-c+.aTtsTFpsl.cp-t.......KFlLS.EYFlcLppsL+c......l...-ch-G..-l....uchNtpI+cFRRaGlaEss-+ltplVphR+phscp.p.............pppt................................. 0 4 15 26 +10888 PF11056 UvsY Recombination, repair and ssDNA binding protein UvsY Pollington J, Finn RD anon PRODOM Family UvsY protein enhances the rate of single-stranded-DNA-dependant ATP hydrolysis by UvsX protein. The enhancement of ATP hydrolysis by UvsY protein is shown to result from the ability of UvsY protein to increase the affinity of UvsX protein for single-stranded DNA [1]. 22.80 22.80 24.80 27.40 22.20 22.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.45 0.71 -4.01 9 67 2009-01-15 18:05:59 2008-07-31 14:03:14 3 1 65 0 0 59 954 129.50 31 92.29 CHANGED phccDhhID........sopLppEuhc.PhLauKWLphaoshpp.hhclEhpt+pslK-Rh.aYoG+u-..sEVsh-h.....hcpS.ElKhslsuD-cllclssplpYhphlhcFhppuLctlpsRGFsIKshIEhR+hEuGt ........................hcpD.hID........sspLp.-uhp.shLasKahchasshpphhhplEtchKphhKp+hpYYsG+us..sphhh-p.....hppo.ElchhlsADc-ll+lssplpYhphllcFhcssLctIpsRuapIKNhI-h++FpsG... 0 0 0 0 +10889 PF11057 Cortexin Cortexin of kidney Pollington J, Finn RD anon PRODOM Family In the middle of cortexin protein there is a single membrane-spanning domain which indicates that this protein may be a membrane protein involved in intracellular or extracellular signalling of the kidney or brain, since it is expressed specifically in the kidneys and brain only. The protein is highly conserved among species [1]. Cortexin is also thought to be important to neurons of both the developing and adult cerebral cortex [2]. 25.00 25.00 47.50 47.30 19.80 19.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.90 0.72 -4.38 5 92 2009-01-15 18:05:59 2008-07-31 14:10:38 3 1 36 0 63 63 0 80.10 57 97.67 CHANGED Msosasl.PSPss.............................................sSussluusSLoLEQKTuFAFVGlLhlFLGLLIVRCFRILLDPYSSMPoSSWsDclEGLEKGQFDYALs ........................Msus.sh.spsh...............................................ssssssssul....oLEQKTsFAFVhlLhlFLGlLIVRCFRILLDPYSSMPoSTWsDphEuLEKGQFDYALs...... 0 3 8 22 +10890 PF11058 Ral Antirestriction protein Ral Pollington J, Finn RD anon PRODOM Family Ral alleviates restriction and enhances modification by the E.Coli restriction and modification system [1]. 25.00 25.00 84.80 84.70 21.70 16.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.53 0.72 -4.60 2 90 2009-01-15 18:05:59 2008-07-31 14:13:30 3 1 82 0 1 27 0 53.70 89 99.79 CHANGED MTTTIDTNQWCupFh+CpGCKLpuECMVKPEEMh.VhEDGKhVDKWAIRTTtMIARELtK...KAh ...........................MVKPEEMFPVMEDGKYVDKWAIRTTAMIARELGKQNNKAA..... 0 0 0 0 +10891 PF11059 DUF2860 Protein of unknown function (DUF2860) Pollington J, Finn RD anon Pfam-B_002667 (release 23.0) Family This bacterial family of proteins has no known function. 21.30 21.30 21.30 22.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.60 0.70 -5.37 29 262 2012-10-03 17:14:37 2008-07-31 14:23:33 3 1 230 0 25 207 14 288.20 40 91.00 CHANGED GFSGplolhsGhsuspSsh.....ss.psssphssh..ssuupocosslshPLGslpYTFupt.spQlFhGsscsDlhsGphthElGYcpphssssslshShlPslhp.sEsWpDPYhssssRppTDhsspuhRlphpplhs..usasl-hAaucp-lDpEpou.................ts.LcRsuphahhchsYphsL..spshhLpPulsYtppDADGcAhSaspaGsplohhhhhscHplslThuhupppYDu..sNPIFs+s.p--sphuhhhsYpYpshasacshuhsuhsGaspscSNIsFYDpsphllSlGlsYp ...............................................................................................GaSGsLSlssGhtcsKSNh.....sT..ssssh..losh...sus.sS-oshlsh.huplhYs.h.s.NsplFhts.sscslsG...htLGYc+ta.cthshohShlsSLh..pcsatsPYhhss.RppTDlst.GaRlu.hh..s..stFoh..sYsau..cpKlDc-p.u.................pupLpR...-upYaplchsYshuL.......Ls.ulsYs.pDAD...GcAp..SaopaG..aplGsph.h.F.u.p...s.hh.lT..suhuhpcY-s..scP...IFsKp.pDu..shhph.hthV...hpphhGapsl.hhu.hGlpcpsSs.I.s.FYDpp.pll.TGluYp.............. 0 6 14 20 +10892 PF11060 DUF2861 Protein of unknown function (DUF2861) Pollington J, Finn RD anon Pfam-B_002683 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 26.00 48.60 21.50 19.80 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.57 0.70 -5.04 43 183 2009-01-15 18:05:59 2008-07-31 14:27:32 3 1 112 0 17 107 0 251.20 41 89.45 CHANGED WFcs.TPLppuaQtLlEschsphappLlcshppsP...lppph.pLltpul..pspCG+uLsspshsDhh.uVThh+phsQo.tt.ph+Luh-shTps.lscls.sp..ssplluu-.uhs...........ttpYs.scphsLhuPhuAGlYpLolsspc.t.......lllusssupphlc.o...Spc-apIchsAh.supsP....SluhapahDtNap.l...............aSpThcsD.Psuh.ssshPSptahlolSlIppcaQGsItlEQhQplohshD ...WFcp.TPLppuaQpLLpschpphappLlcshphp....lpspL.phlhpul..pssCG+uLs.sp.shPD.WlpuVT..lhRp.lQSssp.th+lul-stop...s....l..sclphoph.sspllus-suhs..........ts.Ys.scphsLhsPhuAGlYplslsstctt...a.tWllluss.upphlcWo...Sc..c...-.apI.pp..sshhsupCPhPphSlula.sahDsNYp.l...............WSpoh...cs.DhPs..s..L...ssslPSspahloVShIppRaQGsltlEQ.QhIohshD................. 0 3 6 13 +10893 PF11061 DUF2862 Protein of unknown function (DUF2862) Pollington J, Finn RD anon Pfam-B_003005 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 26.50 33.20 23.50 21.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.90 0.72 -4.26 26 91 2009-01-15 18:05:59 2008-07-31 15:02:59 3 1 73 0 34 85 165 61.70 46 76.61 CHANGED IGpKV+..l..s+l+DRlspcllctltppshGplpsaKhsDGpGlGllVcLssGppsWFFEDElp. .IGpKV+..V..pRlRDRlsspllcpLsp..hGslpsFKhTDGp.GIGllVchs.D.ssssWFFEDElc... 0 5 22 32 +10894 PF11062 DUF2863 Protein of unknown function (DUF2863) Pollington J, Finn RD anon Pfam-B_002981 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 26.60 26.30 24.10 23.00 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.10 0.70 -6.38 12 116 2009-01-15 18:05:59 2008-07-31 15:34:25 3 1 112 0 44 96 40 389.90 56 97.80 CHANGED t+hRsppps+Lu.-ApcLlsLAhuLstSGSRlEDtaWEppLsttls+LL+suspssL-AALDHLaptsssAY-.LA-tsEohuESh.lppsGtcaDlLLlAAPlLAWoRYpIPoGsl.sshhpsLtspLQuHVLAsss+lALssaLaS.DQLPRoaspThpLspcLupAALsup.sl+lphpshPETushLuDsRYLlAulusPpGpPlFRWQEpsps............pR-tsLtpWppQutssLusLLPGCthElLLPDAYasusRpAD+psRPhSlRAuVsaLpssLshssspL+AVluuFsEc....plEEYRIGFTh+sps-VlYGlVWPLhGp-......................u-s-sssslppIpAlL+EsGVp-lhphsthFs.EYC-DCGAPLaPs.pGElVHAEhPE-.u.sss.ph ...............................................h...Rp+tupRLsPDA-+LVuLuLALhASG.SRlE.DpaWEs+LsshLuKll+NGsQosLDAALDHLtpscs-.AYssLA-hAETtSEShslE....+.-...G..p..YD........ALLlAsPlLAWTRY..hI..P..SGslKs-lspsLpsHL.QAH.VL.AssspVAlAPaLYSIDQLPR.pHs.......-TapLsp.........pLApA.ALusp.ss......KlshsDhPETuPILA.D....P.....RaLLAlVuAPt.GtPLFRWQE-pcs.........p+..hERspCLEQWssQusssLushL..PG.......CEFEs..LLPDAYasACR-ADcRlRPhTVRsAlpYLhsTlu.ssPpcLRAVlAGFGEc....RIDEYRVuFTtRGS.sDVIYGlVWPLYGREsGps............p.puEss.suPLEEIsuLL+EsGVoDlRRHuu+FEPEYCDDCGsPLYADPhGEIVHAEMPED.AssuQPHF...................................................................................................... 0 6 22 32 +10896 PF11064 DUF2865 Protein of unknown function (DUF2865) Pollimgtom J, Finn RD anon Pfam-B_002953 (release 23.0) Family This bacterial family of proteins has no known function. 19.90 19.90 38.80 33.00 18.00 16.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.87 0.71 -4.22 28 141 2009-01-15 18:05:59 2008-07-31 15:47:56 3 1 83 0 72 148 4 114.00 35 34.84 CHANGED GuspshCVRoCDGhaFPlsttsssush......ptCpuhCPuucsclah....sssshctAsuhsG.csYuchPsAFtYRpphs...ssCoCpsts.s....uhu.hshpscsohct.GDllsopsuh ......shpshCVRoCDGtYFPluhssssuc..h.t....ptCpuhCPuucsplYh....s........upshctAsuhsG.csYssLPsAFtYRpphs...ssCoCpsts.s.....u.u.hs.hppcs.s.h.ct.GDllsstptt......................... 0 12 29 41 +10897 PF11065 DUF2866 Protein of unknown function (DUF2866) Pollington J, Finn RD anon Pfam-B_002950 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 29.80 32.10 20.40 19.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.43 0.72 -4.05 9 112 2009-09-11 09:26:37 2008-07-31 15:52:26 3 1 56 0 28 74 3 63.30 54 73.21 CHANGED hslpuCRVSsPlppPWGtuhRlVEW.hctsuphpRRVVss-uTstElstsltpHV.GR+athsss ..slpuCRVS.shppPWGtshRlVEW.hchDuphtRRlVss-sTEAElstsltp.V.GR+Yhh...... 0 2 4 14 +10898 PF11066 DUF2867 Protein of unknown function (DUF2867) Pollington J, Finn RD anon Pfam-B_002931 (release 23.0) Family This bacterial family of proteins have no known function. 20.90 20.90 20.90 21.00 20.70 20.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.65 0.71 -4.48 57 1086 2009-09-11 16:38:42 2008-07-31 16:01:40 3 3 983 0 243 790 164 137.50 35 34.25 CHANGED ltshhssscahDsasltls....s..ss.ph.htchhs....psPsWlstLhtlRstlVp.h.GLcs.....................shtsusp.........luhFslhspssp......ElllutcDpH.Lshclslthpstpt........lphoThV+h+Nhh.GRhYhhsltPhHt.lIV.shL ......................................hhhs.................................................................h..s..h..LWpsRuhhD+hl..Gtch............t+G.Rsptp..........hLpsGDs......................lDsW+Vl.hl-Pc.................p.L.L.hG.h..c.....ss...t......L.....G....R..L..s..h...s..lc..-cG..chcp..............lclp.....Aaa....HP.+Gh..GhhYWhhhhPsHh.aIF+GM.............. 1 87 158 214 +10899 PF11067 DUF2868 Protein of unknown function (DUF2868) Pollington J, Finn RD anon Pfam-B_002930 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as putative membrane proteins. However, this cannot be confirmed. 26.50 26.50 26.60 27.60 26.30 26.40 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.05 0.70 -5.29 35 236 2009-01-15 18:05:59 2008-07-31 16:14:17 3 3 228 0 62 233 20 301.60 30 66.01 CHANGED phhhhht..p+htpsspss..hs.AL......hthhtptplt+WhlutlsHthWlshhhuuLhsLLlllsspcauFsWpTTL..LusssahpLspsLuh.ss...hlGhulPssphIpASchs.............sssssustttWusaLlusllsYGlLPRLlLhlhshhth+psttp..LslshPtYtpLppRhtssshphus.t-s-ssthspspsssshttts.....................................sslhsulElssp..............hs.shuttpsls-............sccthpplhtthsttsssplllsscsppsPDRGs.lshlspLsppssssphllLhsssu......................shssp+lppWpptlpphsls ...........................................................................tphh+stpss.phs.tL.......ht..hcp.hsRWhluthsHuLWLhsLlusLlslLhLlhsRpYsFsWEoTL..Lus..sshlplsphL.u.hlPu...hL..G..Fs...l..PDsphlhuoph...................ss.sstpuWushLlGsllsYGlLPRLLhhhhChhhh+puppt...LDLptPhYptLhcRhtsphtctss.p-ss..ssshspht.hsss................................................................................................................................................................................sthhlslEhc.p..............h.sthsptlhDts.....................hssRcpht..tL.ppLpph.P.....up..lll..us.csppsPDRGs.LthlscLuc..sAtust..lhhL.s.tu.....................ps..sp+ltpW+ptLpphth...................................................................................................................................................... 0 17 34 54 +10900 PF11068 YlqD DUF2869; YlqD protein Pollington J, Finn RD, Eberhardt R anon Pfam-B_002915 (release 23.0) Family The structure of a representative of this family has been solved (pdb:4dci) and found to form a tetrameric structure of prefoldin-like architecture with the beta-barrel core and helical coiled coil tentacles. This suggests that this family may act as molecular chaperones. 27.00 27.00 27.20 27.60 25.90 25.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.52 0.71 -4.11 29 228 2009-01-15 18:05:59 2008-07-31 16:23:28 3 3 221 8 66 190 120 129.00 34 95.09 CHANGED lplhRslsVKslVT.phKc-htpplptplspl-pplpQL-hpsp+hlp-hppps.........pQltplppQhspc+schhEp+ppllpQlpQlppLplspEVtpGplEuhhclplGDslhppM.pspIll+DGllpEIR ....hplhpsVsVKtllT-p.Kpclhpphppphppl-p-hpQLchptp+h.pc..p...................pQ.pplppphspchschhEp+cpl..QhpQlchLpLGsElpptplEshhcVplGDshpcph.sspIll+DGllhEIR............. 0 20 49 60 +10901 PF11069 DUF2870 Protein of unknown function (DUF2870) Pollington J, Finn RD anon Pfam-B_002904 (release 23.0) Family This is a eukaryotic family of proteins with unknown function. 21.50 21.50 21.50 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.13 0.72 -3.96 21 183 2009-01-15 18:05:59 2008-07-31 16:36:42 3 2 128 0 101 164 1 83.20 47 31.03 CHANGED cApLWaAGKpL.tcscpLs-Y.lG+NEKTKllVKlpt+spGAPuREPhlsp-pQ+phMthha++QEEhKcLEps--csalsSpWusspuL+pphpGhts.l ................ApLWWAuKEL.tcsKpLsDY.lG.+NEKTKIIlKlpp+GpGsPuREPhlsp-pp+thMhaha++Q............EEhKcLtps-.-cshhsu.WusspsL+pphpGht............................................... 0 32 46 78 +10902 PF11070 DUF2871 Protein of unknown function (DUF2871) Pollington J, Finn RD anon Pfam-B_002884 (release 23.0) Family This family of proteins has no known function. 26.10 26.10 26.50 26.70 25.30 26.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.59 0.71 -3.99 17 570 2009-01-15 18:05:59 2008-07-31 16:50:41 3 1 524 0 40 228 0 122.30 48 84.36 CHANGED MKKlh.suhhYhIlGLluGlFYREaTKhpsFs..GpTpLsllHTHhLVLGhlhFLIhLhL-K.FtLops..phFshFFllYNlGLllTlshhhh+GlhpVhGhsh....ssuluGlAGlGHIlloluhlhFhllLp+ul ....................M++Lh.uhhhYhIIGLhSGhFY.REhTKshsas..G.sTpLslVHTHoLlLGhhhFL...IlLsL-.K.lFpLoph..hh.FshFFhlYNlGlllTluhhss+GhhQVs..Gts.h...........spuhuGhAGlGHshhhsuLlhhhhLL+pu................ 0 19 28 37 +10903 PF11071 DUF2872 Protein of unknown function (DUF2872) Pollington J, Finn RD anon Pfam-B_002883 (release 23.0) Family This bacterial family of proteins has no known function. 23.20 23.20 23.90 41.70 21.20 23.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.70 0.71 -4.31 31 147 2009-09-10 21:27:12 2008-07-31 16:58:52 3 1 144 0 38 146 453 138.40 54 92.64 CHANGED YLSGEIHTDWR-cIhpGupt.sLslpFouPVTDHsuSDssG.sILGsEcspFW+D+KuAKlNuIRT+phI-cuDlVVVRFG-KYKQWNAAFDAGaAuALGKslIllHsc-htHPLKEVDAAAhAVscTP-QVVclLcYVhp .YLSGEIHTDWR-cItcuuct.hsLslsFsuPsTcH-uSDssG.sILGtpsssaW+D+puuclNuIR...........T+phlpcADlVVVRFG-.KYKQWNAAFDAGYAuALGKPlIll+sc-lpHPLKEVcAuA.AsscTscQlVclLpYVh.p......... 1 18 31 35 +10904 PF11072 DUF2859 Protein of unknown function (DUF2859) Gunasekaran P, Mistry J anon Pfam-B_001915 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 25.60 25.20 21.50 21.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.64 0.71 -4.73 34 268 2009-01-15 18:05:59 2008-07-31 17:10:28 3 1 208 0 65 232 4 133.20 41 84.96 CHANGED ltDhG.GtsshPaapuls.pssp............s.stss.sssspushLPVpSscLo...PGsVptRslp.......hPGh...pPlFLlGDDshSppWLpp+tspLcphpAlGLVVNVsohtsLppL+plA.PGLslhPsoG-DLAcRL...sLcHYPVLITsTG.lp ..........................ltDhG.utss.Phhpsls.p.p..............s..ssst..stushLPVposcLo...PGpVt.tR.slp.......hPGh...tPlFllGDDshSppWLpp+tstL+phpAlGLVVNVpohptLptLpphu.sGl.LhPssuD-LApRL...tLpHYPVLITssul....... 1 8 30 52 +10905 PF11073 NSs Rift valley fever virus non structural protein (NSs) like Gunasekaran P, Mistry J anon Pfam-B_001643 (release 23.0) Family This family contains several Phlebovirus non structural proteins which act as a major determinant of virulence by antagonising interferon beta gene expression [1]. 25.00 25.00 25.80 25.50 22.00 21.20 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.44 0.70 -5.11 15 202 2009-01-15 18:05:59 2008-07-31 17:12:38 3 1 47 0 0 155 0 225.20 55 91.26 CHANGED PsIspssuhschhsoVsYlsFsp.tspslSsapshEIPlcpaR.uhcpRspLScFhspuEhPtpWGsu...SpVpptssphFDstIpcLuchslcshpR.shPNlccALSWPLGaPohcFFphus.h-sappshppKsshuTpllRh.....Gs.upsLD-slVpsH++lLtEuppRGlsp-hhsGaDlhKEIAhlQhlRllsAlshDhss.....................................sssssslhshlhpp+tshpsp..thlGNppWhPl....s .........................................VSVEYI+GDGPPRIPYSMVGPCCVFLMHHRPSHEVRLRFSDFYNVGEFPYRVGLGDFASNVAPPPAKPFQRLIDLIGHMTLSDFTR..FPNLKEAISWPLGEPSLAFFDLSS.TRVHRNDDIRRDQIATLAMRS.....CKITNDLEDSFVGLHRMIVTEAILRGIDLCLLPGFDLMYEVAHVQCVRLLQAA.+EDISN.....................................AVVPNSALIALMEcSLh..................h...................... 0 0 0 0 +10906 PF11074 DUF2779 Domain of unknown function(DUF2779) Gunasekaran P, Mistry J anon Pfam-B_001581 (release 23.0) Domain This domain is conserved in bacteria. The function is not known. 23.10 23.10 23.10 23.60 22.80 23.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.99 0.71 -4.10 49 211 2012-10-03 01:22:09 2008-07-31 17:15:01 3 3 194 0 74 206 203 136.70 28 22.99 CHANGED halDFEThssAlPhacsspP.YpQlPFQaSlHl.ppss...tp...hpHh.paLs.t.s......hDP+pthlcpLhptlspp................................................................GsllsYNpu.FEpopLpE.huph.........................hs-htphlppIh..............pphlDLhchFppt..................aYcsphpGShS ...............hahDFEoh.p.ulPhacsspP.ap.QlsFQaSlcl.ppss...ts...h.pHh.talt.p.h......tDPRpt....hhppLhphlspp................................................................ushlsYNp.u.FE...ps..pLpc.lAph......................................................hschpptlppIh......................pphlDLhshFppt..................hhpsphpGshS........................ 0 34 61 70 +10907 PF11075 DUF2780 Protein of unknown function VcgC/VcgE (DUF2780) Gunasekaran P, Mistry J anon Pfam-B_001695 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.10 22.10 24.10 22.70 21.60 21.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.94 0.71 -4.19 32 234 2009-01-15 18:05:59 2008-07-31 17:15:52 3 1 220 0 62 185 18 154.60 35 91.16 CHANGED llhhhslhossupA.h.hsshstt.......................tt.htttsspssssspsssLlsslhupLsloppQAtGGhGuLLulApssLuss-aopLuptlPGh-sLhuus...............s..su.suhLsph...LG................sslpuhsslpsAFptLGl...ssshlstFssllhsY.LsppG...sushLhpuLu .......................................h.hhhhslsussuhA..as..tssts........................ss..ss.ss..tphsps.ttsssLlstlsoQLslospQAsGGsGuLLuhA....p....NpLsusphSpLsphIPGlssLtuss......................s..s................t....Lu................................sltshspVspAFssLGl...DsuMlppFsPllhpYLspQG....ASpsLLtSLu............. 0 13 26 45 +10908 PF11076 YbhQ Putative inner membrane protein YbhQ Mistry J, Coggill P anon PRODOM_PD083760 Family This family is conserved in Proteobacteria. The function is not known but most members are annotated as being inner membrane protein YbhQ. 25.00 25.00 26.40 26.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.86 0.71 -4.55 3 433 2010-01-12 15:19:43 2008-08-01 11:37:37 3 1 432 0 22 82 1 131.30 87 98.04 CHANGED MKWQQRVRVATGLSCWQIMLHLLVVALLVMGWMSGoLV+VGLGLCALYuVTVVhMLsFQRHHEuRWREVGDFLEELTTTWYFGAALIALWLLSRVLHNNLLLALAGLVILAGPAVVSLLAKDKKRastsFuSKHG ..MKWQQRVRVATGLSCWQIMLHLLVVALLVVGWMSKTLVHVGVGLCALYCVTVVMMLVFQRHPEQRWREVADVLEELTTTWYFGAALIVLWLLSRVLENNFLLAIAGLAILAGPAVVSLLAKDKKLHHLo..SKH.RV...... 0 1 1 12 +10909 PF11077 DUF2616 Protein of unknown function (DUF2616) Mistry J, Coggill P anon PRODOM_PD264505 Family This cysteine-rich family is expressed by the double-stranded Nucleopolyhedrovirus, a member of the Baculoviridae family of dsDNA viruses. The function is not known. 25.00 25.00 81.60 81.50 22.40 21.80 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.54 0.71 -4.77 14 34 2009-01-15 18:05:59 2008-08-01 11:38:50 3 1 33 0 0 29 0 179.10 33 94.21 CHANGED ME...LIKPFlKYS+hYRsss.ssst..+phlacpWhp-hpppphth.......pshphts.th..........................CpFChs......sppps.......thhCppChFPL..s.......s.cpEhthYsLLSVCYaEssspss.sp.............................+sVWRp................Rl+hsW.p.cpp.+........lYplhhs..................p.ClQCpp......scpsssp.hhpFshchFCppChFPLFpI ...MELIKsFlKYS+tYRsss.sppt...+phlacpWsp-ltspphth.......pphhhpshtt..........................CpaChs........sspps.......thhCcpChFPl..h.p.....s.cpEhthasLLSlCYaEps..sst.....spst...........................+hVWtp......Rl+hsW.st-hstp........hYplhts...............p.ClQCppt.....sppsstp.hhpFshphFC.pChFPLFsI..... 0 0 0 0 +10910 PF11078 Optomotor-blind Optomotor-blind protein N-terminal region Mistry J, Coggill P anon Pfam-B_595 (release 23.0) Family This family is conserved in Drosophila spp. Optomotor-blind is one of the essential toolkit proteins for coordinating development in diverse animal taxa, and in Drosophila it plays a key role in establishing the abdominal pigmentation pattern, in development of the central nervous system and leg and wing imaginal disc-formation of Drosophila melanogaster. This is the N-terminal region of the protein and does not include the T-box-containing transcription factor that plays a part in DNA-binding. 21.00 21.00 21.80 81.10 19.60 20.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.11 0.72 -3.63 3 125 2009-01-15 18:05:59 2008-08-01 11:41:54 3 4 24 0 11 132 0 86.40 95 25.50 CHANGED suPPsPPYFPAAALAALuGSsAGsHP.GLYPGsLlPKhPPH..huHPHP...HHPL.GuAYTTAEDVVLAAVAAHQHHPAM.RPLRALQ ....Q.QPPPPPY.FPAAALAALAGSPAG..PHH.PGLYSAAGGLRFPPH..PGHPHPHSHPHAHHPL..GSAYTTAEDVVLASAVAHQLHPAM.RPLRALQ 0 3 3 8 +10911 PF11079 YqhG Bacterial protein YqhG of unknown function Mistry J, Coggill P anon PRODOM_PD123329 Family This family of putative proteins is conserved in the Bacillaceae family of the Firmicutes. The function is not known. 25.00 25.00 41.30 29.50 21.40 22.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.56 0.70 -5.57 11 186 2009-01-15 18:05:59 2008-08-01 13:12:26 3 2 165 0 35 146 0 223.40 52 94.96 CHANGED MpQp-IHpFLpRaFpANsCsIlEcSPuahTVQLTlEMDKcLMNRPFYWHYLEKTGGlPNPMpLTLITcppcss-slcGEhIHFGSPRLaQIFpus+chGualRLYEphssssstpsPLcPWLGhNlKlSYQCDRKKDhllSlGLpLIsGpllEsFa-+LpphsLoscIPDYCFTlSPlIKPcSGlpRlcphlcshhcs-sc-WAcpAhcRWpcDLcLL-pFYEcsEEKPEsYclEKpALcc.YEP+IplsllNGGLFYLp .................M..tpl.pah.pahts.ts.hhptt.thhpVQLoh-MDK.LMNRPFYWHYlEKTGGsPNPM+LTLITssEs.tps.-....GEhIHaGSPRLHQIFposKchGuaIRLY...Ecl.....c.......ps...u........us...c...sPLc..PWLGlNlKlSYQCDRKKDhLhSlGlHLISGThhtsFH-pLpplcLTP+IPDaCFTlSPlIKPpSGlpRlEshLcshlup-DHsWAcEA+hRWpcDLsLLs+FYE.....-.........s.........-Eh.....P.........Es....YclEKpA.Lpc...QYEP+IslpIINGGLFYl.p....................................... 0 10 23 25 +10912 PF11080 DUF2622 Protein of unknown function (DUF2622) Mistry J, Coggill P anon PRODOM_PD066031 Family This family is conserved in the Enterobacteriaceae family. Several members are named as YdiZ, a putative cytoplasmic protein. The function is not known. 20.80 20.80 21.10 20.80 19.70 19.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.00 0.72 -4.13 7 726 2009-01-15 18:05:59 2008-08-01 13:13:26 3 1 473 0 30 125 0 88.60 52 98.26 CHANGED Musu-lTRYVlTVpaHE-oLTEINELsNHLTRsGFlLThsD--GslHELGTNTFGLlSu.St-ElptLsuGLupuALsKcs-IsVsTaE-WtKs.p ......ups-lspYVlohhhpEssLTElNELsNhLTRsGFhLThsD--Gs.HELGTNTFGllSop.ut-EI+-LlsuLspsAss+Ds-loIsTa--a.pt..p.................. 0 2 3 17 +10913 PF11081 DUF2890 Protein of unknown function (DUF2890) Mistry J, Coggill P anon Pfam-B_629 (release 23.0) Family This family is conserved in dsDNA adenoviruses of vertebrates. The function is not known. 23.30 23.30 23.60 24.10 23.00 23.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.72 0.71 -4.23 18 154 2009-01-15 18:05:59 2008-08-01 13:32:05 3 4 79 0 1 180 0 167.20 35 88.93 CHANGED MsP+tps.KpLtsp.sPsc.........D.EEpWD..SQA.......tEEthE-W............DSL-E-.pEtE.EVEEtsssp.....phssoSsu.utSpsstSsPspsst...h+sp...pRWDpTuphssPossusss...................................................thstpc.suhRph+NpIhssL....QpSpGp.............tShTRp.hLYH+u.s..p+sLc.hctLaspYCuhs ....................................................................................................hp.s...p.............-.EEpW-..SQA.......pE-phE-.............tS.t--tE.E.hE..El-Etpssp..............ss..ss...t..s..sssspssh.osPstsst.......+ss...pRWD...pstt..ssst...t..ssttt...........................................................thp.tpshpShRth+stIhssL.....QpstGp................hShTRp.hLaH+u.s..ppsLc.hctLas.aht.................................................................... 0 0 1 1 +10914 PF11082 DUF2880 Protein of unknown function (DUF2880) Pollington J, Finn RD anon Pfam-B_001492 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.00 93.20 20.10 19.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.71 0.72 -4.09 3 5 2009-01-15 18:05:59 2008-08-01 13:35:43 3 1 5 0 5 5 0 78.80 60 65.78 CHANGED lMAAulhhASuAsAAsslEhP+PRGKDEAPEAPVACMKAVKAALPNPDpFKWVuGTsRKVAEDAYSVVADVEYLupDGA ...lMALSlhsASuAsAAussEAP+PRGKDEAPEAPVACMKAVKAALPNPASFKWVGGTsRKVAEDAYSVVADVEYLAQDGA 0 0 1 3 +10915 PF11083 Streptin-Immun Lantibiotic streptin immunity protein Coggill P anon Pfam-B_43518 (release 22.0) Domain Streptococcal species produce a lantibiotic, streptin, in a similar manner to the production of nisin and subtilin by other lactic acid bacteria, in order to compete against competing bacteria within the environment. The immunity protein protects the bacterium from destruction by its own lantibiotic. In general, there is little homology between the immunity proteins of different genera of bacteria. 20.70 20.70 20.90 23.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.12 0.72 -4.01 12 173 2009-01-15 18:05:59 2008-08-01 13:38:42 3 2 152 0 7 87 3 96.80 46 22.38 CHANGED IAplDl+LsphpEKIATLNKMAEVLlNLpSc-.po++LA+Y-FSKLNLTEoloLEpVpcEIphLQppLshhlDcYEphlR+L-pFVclLN..hsctht.ca .............IAElDhcLppsQEKIATLNKMAEVLINLKS-sppo+KLA+Y-FuKhNhTESIpL-plscEIhcLQpELup.lscYEclsR+L-pFlKllN..hsK................... 1 1 3 4 +10916 PF11084 DUF2621 Protein of unknown function (DUF2621) Mistrey J, Coggill P anon PRODOM_PD086666 Family This family is conserved in the Bacillaceae family. Several members are named as YneK. The function is not known. 25.00 25.00 25.50 25.50 24.70 24.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.83 0.71 -4.50 12 164 2009-01-15 18:05:59 2008-08-01 13:46:17 3 1 163 0 32 112 0 136.90 71 96.97 CHANGED M....LpG..WFhhFILhWsllLlsLhuIGGFFMFRKFLKRLPKEDGKS.LDWp-aYI-pT+HLWs-EpKpLLpELVpPVPELFRDVAKpKIAGKIGELALcEcAspIsp-LlIRGYIlATPKRDHKFLhK+LpE+pIDhuPYEpLh .......LpGWF.WFIlhWsVlLlGLMSIGGYFMFRKFLKRLPKEDGhShLDWpEaYIsKTRHLWsDEpKQLLEELVSPVPELFRDVAKuKIAGKIGELALpEpAopITpDLII+GYIlATPKRDHKFLlKKLpEKcIDaosYpsLL........ 0 11 23 26 +10917 PF11085 YqhR Conserved membrane protein YqhR Mistry J, Coggill P anon PRODOM_PD101610 Family This family is conserved in the Bacillaceae family of the Firmicutes. The function is not known. 26.00 26.00 38.90 37.50 25.80 25.80 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.25 0.71 -4.73 12 156 2009-01-15 18:05:59 2008-08-01 13:48:14 3 1 155 \N 28 109 0 166.00 51 98.23 CHANGED M......sppppphcpppppp.hohhs+slhhGFsGGVhWShluYlsahFsFoEluP.NhlLpPaslG-WKcshlGshluIlhIGllSIusAFLYashL+KlcuhWsGllYGlhLWhlVFalhNPlFPsl+slp-LshsTllTTlClYILYGlFlGYSISaEhNEhp..ppp.t....tppp .......................pphts.....................pphl.pIGhFGGlFWGuIhYhhalFsFTEsuP.NalLhPFAhGuWK-GshGNllGIVshGLLSIllAFLYpAhLtKFcGlhPGllYGLhWWuLLFauhG.lhPslKosh+Ls+-TIVTTICIFILYGVFIuYSlSatsNsp+tcpEt.t+s.....c.......... 0 6 18 20 +10918 PF11086 DUF2878 Protein of unknown function (DUF2878) Pollington J, Finn RD anon Pfam-B_001539 (release 23.0) Family This bacterial family of proteins has no known function. Some members annotate the proteins as the permease component of a Mn2+/Zn2+ transport system however this cannot be confirmed. 20.90 20.90 23.00 23.00 19.10 18.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.18 0.71 -4.22 59 319 2009-01-15 18:05:59 2008-08-01 13:54:18 3 1 313 0 84 276 229 151.30 30 86.58 CHANGED hthllNhlhFQhsWhhsVlhs.sphhhhhhhh.....lhhHhhh......s..phts-hphllhlsslGhhlDulhhthGlhpF.............ssshhPl..WLhhLWhhFuhslspuL.saltphsh.lhsllGuluGshSY.hAGt+luA.VphshshhhohhlLulhWullhPll ....h.hhlhsslhFplhWhhuVlut..p...p.hhhlhsll.........llhphhh......s..pttsphphhlhhsllGlhlDohhhhhGlhsF.............ssshhPl....WLlsLWhsFuhhhs+.L.shlpphsh.lhslhGulhGslSY.asGh+.huA..VphshsshhshlsLsltWsslhsl........... 0 18 34 64 +10919 PF11087 DUF2881 Protein of unknown function (DUF2881) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. Some members are annotated as p34 however this cannot be confirmed. 25.00 25.00 115.60 115.30 24.60 24.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.65 0.72 -4.49 2 14 2009-01-15 18:05:59 2008-08-01 14:59:55 3 1 6 0 0 2 0 54.00 75 88.52 CHANGED Ms-FstsllTllTAIIGVAIlAVlVSppSNTAGVIpuuouGFSshLtoALuPlh Ms-FstsllTllTAIIGVAIlAVlVSppSNTAGVIpuuouGFSshLtoALuPlh 0 0 0 0 +10920 PF11088 RL11D Glycoprotein encoding membrane proteins RL5A and RL6 Pollington J, Finn RD anon PRODOM Family RL5A and RL6 are part of the RL11 family which are predicted to encode membrane glycoproteins. Two adjacent open reading frames potentially encode a domain that is the hallmark of proteins encoded by the RL11 family. 25.00 25.00 25.20 33.50 24.50 24.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.38 0.72 -4.18 4 33 2009-01-15 18:05:59 2008-08-01 15:20:20 3 1 5 0 0 28 0 87.50 44 92.09 CHANGED hpKLpsopGcNlTIscccD.hoTcWcph..ssGst.LCNVTupGssllNs..TlCVSSCoHTSLsLCNhTpts-ulaslG+hhs...DE.sGELWhloVS .................hp+LpsopGcNlTlscc+c.hootWcpa......D.sG..st.LCNVTupsssslNo..ohCVosCuHooLsLCNhTpts-uhaslu+hhs...DE.sG-lWhlpVp............ 0 0 0 0 +10921 PF11089 SyrA Exopolysaccharide production repressor Pollington J, Finn RD anon PRODOM Family SyrA is a small protein located in the cytoplasmic membrane that lacks an apparent DNA binding domain. SyrA mediates the transcriptional up-regulation of exo genes involved in the biosynthesis of the symbiotic exopolysaccharide succinoglycan. It does this through a mechanism which requires a two component system [1]. 25.00 25.00 25.50 25.50 24.70 24.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.81 0.72 -4.15 6 37 2009-01-15 18:05:59 2008-08-01 16:07:23 3 1 26 0 17 35 0 37.80 45 41.27 CHANGED AlAsYFsotShhsAhVsTLsCulLLQluYFluVLFLla .AlAoYahstShhsshlpTLhCuVLlQlGYFhuVLhLVh.... 0 1 6 9 +10922 PF11090 DUF2833 Protein of unknown function (DUF2833) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function are found in the bacteriophage T7. Some of the members of this family are annotated as gene 13 protein. 25.00 25.00 26.70 26.30 24.60 24.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.78 0.72 -3.82 5 41 2009-09-11 15:53:06 2008-08-01 16:15:47 3 1 38 0 0 34 35 84.80 42 57.97 CHANGED ssVshslsGhVLAIGGNpGDpVWFVTSchVa+LocKpKREFRKLIhEYRDtML-Q.YsoIWNYVWVGNKSHIRFLKoIGAVFHcEaT ................lsh.hhGhslAIGGss....u.spsWFlTSspV...h....phstpt+hcFR+hlhcahDphLcp..Ysp.LWNaVWsGNpuHIRFLKolGAh..Fcp-........ 0 0 0 0 +10923 PF11091 T4_tail_cap Tail-tube assembly protein Pollington J, Finn RD anon PRODOM Family This tail tube protein is also referred to as Gp48. It is required for the assembly and length regulation of the tail tube of bacteriophage T4 [1]. 20.20 20.20 20.30 20.50 19.80 19.90 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.01 0.70 -5.74 8 48 2009-01-15 18:05:59 2008-08-01 16:25:33 3 1 47 0 0 41 153 313.60 32 90.19 CHANGED +VK-.Is.csschhtuh.uupssAGtsocsc..ssophhsAQFPspRAuGNDsst.a.lsDLYKNGLLFTAYshouRsos....sLRshRp.....ssssIhSptsusVpsphstho...........tshhsppAlANILLPRSpSDVDssSH+FND.....ls-SLls+GGuouoGsLSs.......hASTAlaGuLESITpGhhAD.........suEQIYssoRoMYuGu-sRTKsasWpLTPRShpDLhpIlpIYchFshaSYGpoutSphAtElKuplDshY+sThhc.hss.sshpNpT........LhEtIT.uFLoNVhVVSNPslWhI+NFGsooua.....-shp-sFGPsQIpSlRFDKTPDGpFNGLAluPNLPSoFsLElTFREIIsLsRuo ........................................................................................h...hhs.htt..stt.suG..o.tt....pthhsAQaPstRsuusDss..h.hssLYpNGLLFoAashsup.os....phRs.Rp.....ttp..phhp.tttssl..th.t.s..............hspps.l...ssILhPRupoDs-ssSH+FND.....Vt-SLls+Guuouo.GhLSN.................hAST...AlaGulE.......Slo.......p.GhhAD........................pGEQlhssu+uMYsGs-sRTKsFoachoPRshpDLhpIlpIYchFp.....hhSYGpsGpSp..hAt-..l+s.lDshY+sThhp.h.....s.s.tsps...............hhEthT.uhloNV..hVlosP...slW...hl+sFu...pssph.........-shp-hF...GPstIpSIRhsKoP-GpFsuLA...huPNhPS...o...hsLElThpEllsLsRu....................................... 1 0 0 0 +10924 PF11092 Alveol-reg_P311 Neuronal protein 3.1 (p311) Pollington J, Finn RD anon PRODOM Family P311 has several PEST-like motifs and is found in neuron and muscle cells. P311 could have some function in myo-fibroblast transformation and prevention of fibrosis [1]. It has also been identified as a potential regulator of alveolar generation [2]. 25.00 25.00 56.20 56.10 20.10 16.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.27 0.72 -4.01 4 38 2009-01-15 18:05:59 2008-08-01 16:31:11 3 1 24 0 16 33 0 62.60 68 93.40 CHANGED MVYYPELhVWVSQEPFPsK-MEGtLsKGRLPVPKEVNRKKpsEstAASLsPlGusEh+SPtIuYLHsF .MVYYPELhVWVSQEPFPNK-MEGRLPKGRLPVPKEVNRKKssETsAASLTPl.GSs..EL+SPpISYLH.F...... 0 1 1 3 +10925 PF11093 Mitochondr_Som1 Mitochondrial export protein Som1 Pollington J, Finn RD anon PRODOM Family Som1 is a component of the mitochondrial protein export system. The various Som1 proteins exhibit a highly conserved region and a pattern of cysteine residues [1]. Stabilisation of Som1 occurs through an interaction between Som1 and Imp1, a peptidase required for proteolytic processing of certain proteins during their transport across the mitochondrial membrane [2]. This suggests that Som1 represents a third subunit of the Imp1 peptidase complex [2] 21.40 21.40 22.70 22.10 21.30 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.04 0.72 -4.18 10 77 2009-01-15 18:05:59 2008-08-01 16:33:09 3 1 76 0 57 72 0 83.00 31 76.66 CHANGED MAPPTPVhotcElppphssthpsstch......cCpLKSLTQaECsF+so.tts....sEhICLPFKRLFpcClhP.............c+hlNIElTDppTN .............................MuPP...sslhstppl.tphtp..pt.tph..............pCp.LhplsQapCsh..css.......t.......stll.ChPhpRLFc+C...........................ts.thslEsTshpt.................. 0 9 30 48 +10926 PF11094 UL11 Membrane-associated tegument protein Pollington J, Finn RD anon PRODOM Family The UL11 gene product of herpes simplex virus is a membrane-associated tegument protein that is incorporated into the HSV virion and functions in viral envelopment [1]. UL11 is acylated which is crucial for lipid raft association [1]. 20.30 20.30 20.50 43.60 19.80 19.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.80 0.72 -4.14 11 49 2009-01-15 18:05:59 2008-08-01 16:39:40 3 1 32 0 0 40 0 39.80 52 42.96 CHANGED MGQusStutss....CCR..pNhLlTcsGEsluLsA-sF-sF-L....- MG.uhSsupss....CCR..pNhLlTcsGEVVoLsAcsF-shDlE........... 0 0 0 0 +10927 PF11095 Gemin7 Gem-associated protein 7 (Gemin7) Pollington J, Finn RD anon PRODOM Family Gemin7 is a novel component of the survival of motor neuron complex which functions in the assembly of spliceosomal small nuclear ribonucleoproteins. Gemin7 interacts with several Sm proteins of spliceosomal small nuclear ribonucleoproteins, especially SmE [1]. 21.50 21.50 23.30 30.60 20.90 19.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.50 0.72 -4.24 7 68 2009-01-15 18:05:59 2008-08-01 16:52:05 3 2 59 2 45 74 0 78.10 46 57.14 CHANGED popEQ+tRusLRERaL+SL.sMss+sssFThHEtsp..VsApFtAoDlsltNFhVSpLpTPIGlpsEAlLRsoDllSaTFcs ............pEQcARuhLRERaLRo...LluMs...G+p.VsFs..LHEslc..VsA.cFsAoDlDltNFhVSpLpTPlGV.Q.sEALLRssDIIuaoFc...... 0 15 19 34 +10929 PF11097 DUF2883 Protein of unknown function (DUF2883) Pollington J, Finn RD anon PRODOM Family This family of proteins have no known function but appear to be restricted to phage. 25.00 25.00 30.40 30.30 19.00 16.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.67 0.72 -3.86 3 35 2009-09-11 15:42:54 2008-08-01 17:04:17 3 1 34 0 0 22 0 73.20 86 100.00 CHANGED MLNNNVVYLGYPGLPPNKLEGLMLELRTVARCSGLEFRFQDTPRRGKNYTQMHILKQRSKTGAFVMHYKPRKEKF MLNNNVVYLGYPGLPPNKLEGLMLELRTVGPSSGLEFRFQDTPpRGKNYTQMHILKQRFKTRAFVMHYKPRKEKF 0 0 0 0 +10930 PF11098 Chlorosome_CsmC Chlorosome envelope protein C Pollington J, Finn RD anon PRODOM Family Chlorosomes are light-harvesting antennae found in green bacteria. CsmC is one of the proteins that exists in the chlorosome envelope. CsmC has been shown to exist as a homomultimer with CsmD in the chlorosome envelope [1]. CsmC is thought to be important in chlorosome elongation and shape [1]. 26.30 26.30 26.70 126.80 26.10 26.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.47 0.71 -4.47 5 13 2009-01-15 18:05:59 2008-08-04 09:12:01 3 1 13 0 11 12 0 138.80 73 99.18 CHANGED MSESYQKLRKDFKDLEFTDRLTFLAEGsLLTGQSAVVGGLELAGSVVETVAGTVGSLlDATGIG+LLGsTGGVVGETIDRVAITVKDVSRSAGELYSDAVKNVENVTDNAA+AIGDAGVSASEAVKNlsGSFQKosGKK MSESYQKLRKDFKEL-FTDRLTFLAESlLLTGQSAVVGGLELAGSVVETVuGTVGSLlDAoGIGslLGsTGGVVGETIDRVAITVKDVSRSAG-LYsDAV+NVENVTsNAAKAlGDAGVSASEAVKNlAGSFQKssuKK.. 0 1 2 8 +10931 PF11099 M11L Apoptosis regulator M11L like Pollington J, Finn RD anon PRODOM Family Apoptosis regulators function to modulate the apoptotic cascades and thereby favour productive viral replication. M11L inhibits mitochondrial-dependant apoptosis by mimicking and competing with host proteins for the binding and blocking of Bak and Bax, two executioner proteins [1]. 25.30 25.30 25.40 42.40 24.70 25.20 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.96 0.71 -4.51 9 129 2009-01-15 18:05:59 2008-08-04 13:01:09 3 1 37 6 0 96 0 160.50 52 77.66 CHANGED +sllh........................hYlsctslsc....LsshEpslLshIppsC-hIppsYppshs.lss.hlchsshSh.sIpcIKsplhpsLhsDspPSVKLAolSLlShIhc+hh..scslhh.shlhs-IhstIotptcplIsFIpcppc.sss...h...cphlplhshhshh.hlsYhhlKahh .p..hVY........................YYhsKpRLD-hYRpLsopoRSalDlIshhCDKlNNDYs+DhNlMYD...hASscSa.slhDIsNEV.soILhsspulGVRLATISFIopluKRshNslcTI+MhoLLScsIsD......-pFlDYIschs..ssss..s.hpT.Rchl+lhGlssIh.FsTYtsLKYh....................... 0 0 0 0 +10932 PF11100 TrbE Conjugal transfer protein TrbE Pollington J, Finn RD anon PRODOM Family TrbE is essential for conjugation and phage adsorption. It contains four common motifs and one conserved domain [1]. 25.00 25.00 74.40 74.20 22.10 19.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.32 0.72 -4.12 4 124 2009-01-15 18:05:59 2008-08-04 13:04:03 3 2 103 0 2 68 0 65.30 77 78.99 CHANGED +llsFLlRLolTlIVISPslYWSWDsVKsTTA-DhlhAsllIhhsGlhhhlLYhFhslLTKlhptD ..RFIDFLIRLLITAIVISPVIIWSWDTVKETTADshLAAAFVILYSGVL.LFILYFCFSALTDLQKs.... 0 0 0 1 +10933 PF11101 DUF2884 Protein of unknown function (DUF2884) Pollington J, Finn RD anon Pfam-B_001481 (release 23.0) Family Some members in this bacterial family of proteins are annotated as YggN which currently has no known function. 30.60 30.60 31.30 32.70 29.20 29.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.31 0.70 -4.98 39 793 2009-01-15 18:05:59 2008-08-04 13:22:51 3 1 717 0 94 390 15 209.20 48 88.49 CHANGED CsVslpt-lhlsspplplhpsss.......pphhIspsspLalsGcplsLsscQpptlppYppslpptlPplhplAp-ulplApsAlsplhsshhu..spshsplpphhsplptplpphhhppssphhhsspthst....hpp.appchEpthcphlppShGolhhslusph.....tpGs....phsshtpphssltppl-pphcppupsl-t+AcplCsplpsLsp.EppLpttlPpLtshplhp ........CSVoP+DDVIlSPQoVQV+GcNG........NLVIo.PDGNVhhNGKphoLsAAQRcQA+DYQucLRSsLPWID-GA+sRVEKARlALDKlIspphG...psSphRuRLTcL-AQLKpQMNRIIEpRoDGLTFHacAIDQ.............VcA....-GpQLVsQuMGGILQDSlNEMGsK..tshcuGG......NPLQ.slhGSLGGLQsu.IQsEWKpQE+DFQQFG+-VCuRVsoLE-sRKAL..............sst............ 0 12 31 60 +10934 PF11102 Cap_synth_GfcB DUF2886; Cap_synth_GfcC; Group 4 capsule polysaccharide formation lipoprotein gfcB Pollington J, Finn RD, Eberhardt R anon Pfam-B_1366 (release 23.0) Family This family includes lipoprotein GfcB (YmcC), involved in group 4 capsule polysaccharide formation [1]. 20.20 20.20 20.20 21.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.13 0.71 -4.76 39 859 2009-09-11 10:59:46 2008-08-04 13:41:14 3 1 602 2 69 362 161 193.80 52 91.11 CHANGED LsuCopp.........hpshssolphuh.............hsstssslospplps.sshsshhhplsstspshhlLshs-p...............hpWhosDpthlshcsGhllpTpuLss..sLhus..............pssshhthhpt.....ssstshphphphsst........chshh.hpsphph.upcsltlstt.shpshchpEpsphs........stp....apNpYWl-sssGp....llpScQhluPshshlphphL ...............................LpuCoto.............ppplssolhsSL................FGssslploDp.p.IQs.hPYAS.Yhp.LNsGsplFVVLAasEs...................sQpKWlo.QDpAhLVTppGRLVKTl.hhss..NLlEVs...........N.usDPLhpshpI.......hDGusWTRshsWoEt.......pph..RhATspSsFp.asGs-Tlpluuc...cshspVhsEEVoos............ptpWpNpYWlD.S.pGQ....lRQScQhLGAshhPVchTh..................................................................................... 0 8 27 49 +10935 PF11103 DUF2887 Protein of unknown function (DUF2887) Pollington J, Finn RD, Bateman A anon Pfam-B_1330 (release 23.0) Domain This bacterial family of proteins has no known function. These proteins may be distantly related to the PD(D/E)XK superfamily. 22.40 22.40 22.70 22.60 22.10 22.30 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.13 0.71 -4.71 29 232 2012-10-11 20:44:46 2008-08-04 14:02:13 3 5 44 0 85 297 45 160.80 36 68.55 CHANGED TDslFY+LFpp.PshlFELls.pssspAcsYcFsSVElKppsFRlDGVFlPhtst.stPlaFsEVQFQpDpphYpRlFAElaLYltppp.p.scWpuVlI....YssRsl-.sshhsacplLsuspVpRlYLsELsshpp.slsluLltLh.lhspppssppA+hLlppsp.....pphhsttppppll-lIpTIllYKFsplS+cElcAML .........................TDplFYplFtp.PphhF-L..ls..sstpsp.YpFsShplKphtFRlDGlFhP.tp...s.PlahsEsQhQ.DtthY.RhFsElalYltp.p...psWpslll....ass+ph-......t.......apthlp....tp....l......p.RlYL....s....-.....Ltp.....t.s.lslu.h.l.pLl..h.sptps.ppuptLlppsp.....pp........ppllpLIEoIllYKhPphoccElptMh...................................... 0 5 66 85 +10936 PF11104 PilM_2 Competence_A; Type IV pilus assembly protein PilM; Pollington J, Finn RD, Eberhardt R anon PRODOM Family The type IV pilus assembly protein PilM is required for competency and pilus biogenesis [1-2]. It binds to PilN and ATP [3]. 66.50 66.50 66.50 67.00 66.40 65.90 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.87 0.70 -5.56 37 952 2012-10-02 23:34:14 2008-08-04 14:34:40 3 4 881 1 318 846 348 315.90 25 86.32 CHANGED GlDIoSouVKllELoc.ps..spa+lEsaAhtslPcsuls-tsIt-h-uVucsl+plhcctssps+psAhAVsuSuVITKhI.hsusLs-pELEsQl.chEAspaIPasL-EVslDFpllG.s...tsssscVcV..LLuAsR+EsV-sRlssl.....-tAGLpscVlDVEuaAlpR.Ahphlhpp........Lsssspsp.....sVA.............llDIGAshTslsVlpsGphlYsR-QsFGGpQLTppIt+pYGhohEEApttKppGsLPc.............sYp.-lLpPFhpslsQplsRuLQFFhoooths.pVDtllLuGGsAsl.GLschlppclGhsThlsNPFtsMplusc.lptptLpp-usuhhlAsGLALRuF .............................................GlDIuspulKhlpLp......t....pt.....sp....hplppauhhslPts....sl.s....-s.p.....lh-h.pt...l........sptlpchhpph...s.h.p..s+.pss.h..ul..ss.s.s.V.Is+.hlp.hss.sh....s-.c-.lEt.t.l.phEs..sphl..P..a..s.l-E..lsl.Daph.ls.p..................s..ss..s...p..h.....pV..LlsAs++-.l-shhssh.....ptA..GLpshll..D.V..csaAlt.p..sh.p.....h....h..tp.......................hs.t.t.....t..tt.....hls...................llDlGu.shoslslhpsup.hl.a.pR....ph..s.hG.u.ppls.ptlt......pt..h.....s..l.s.h...ppA...........t.......h.h...t.....p.....t....s....l.....sp.....................................s..h..........p....s....lp.....h...hp.p...lspplpRsl...paa..h...s...s...s...t..tp...p................lsp..lhLsG.G.sutl...GLsphl.ppp.hshss.hhs.Ph..t....h.th.s.p...p..p.....ht.ptst..hhhuhGLAhRt..................................................................................................... 0 121 226 283 +10937 PF11105 CCAP Arthropod cardioacceleratory peptide 2a Pollington J, Finn RD anon PRODOM Family CCAP exerts a reversible and dose-dependant cardio-stimulatory effect on the semi-isolated heart of experimental beetles. CCAP also increases free hemolymph sugar concentration in young larvae and adults of the meal-worm beetle [1]. 20.20 20.20 21.20 35.90 19.70 19.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.81 0.71 -4.43 4 47 2009-01-15 18:05:59 2008-08-04 14:41:47 3 2 33 2 22 50 0 102.10 32 82.57 CHANGED hpsohshLLhLlshlhC....l-CuhssppPRsacthssEs...ss......pKRPFCNAFTGCG+KRSpsssss.......PsshhpRp.........-.lppc..sNE...EuLusLlDLNoEPAVE-L.RQIMSEAKLWEAIQEAs+EIahQKptpK..p ..........................................h.hhhhhh.....h.ss.s...h..p..p..p...tp..............pKRPFCNAFTGCG+KRop.......................................p....tshtsh..hps-.t.-pl.+QhhopsKlaEsIpEAphEl..pppt................................................... 0 10 12 19 +10938 PF11106 YjbE Exopolysaccharide production protein YjbE Pollington J, Finn RD anon PRODOM Family YjbE is part of a four gene operon which is involved in exopolysaccharide production. The expression of YjbE is higher than the rest of the operon yjbEFGH. It appears to be restricted to Enterobacteriaceae [1]. 25.00 25.00 26.40 26.00 21.40 21.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -9.89 0.72 -4.01 2 379 2009-01-15 18:05:59 2008-08-04 14:48:45 3 1 368 0 17 71 1 79.80 92 99.09 CHANGED MKKlL.GlFAIsALuAsSspAAPVpVGEAAGSAATSVSsGSSSATusSTVuSsVGVALAATGGGDGSNTGTTTTTTTSTt ......................MKKVL.YGIFAISALAATS.AWA.....APVQVGE...AAGSAATSVSAGSSSATSVSTVSSAVGVALAAT.G..GG....DGSNTGTTTTTTTSTQ...... 0 1 1 13 +10939 PF11107 FANCF Fanconi anemia group F protein (FANCF) Pollington J, Finn RD anon PRODOM Family FANCF regulates its own expression by methylation at both mRNA and protein levels. Methylation-induced inactivation of FANCF has an important role on the occurrence of ovarian cancers by disrupting the FA-BRCA pathway [1]. 21.10 21.10 22.50 22.50 19.70 20.40 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -12.16 0.70 -5.19 7 69 2009-01-15 18:05:59 2008-08-04 14:58:34 3 3 50 1 39 61 0 285.20 27 85.35 CHANGED M-slLcplctFl-lLslu....postVtsWD.tsl+RAhpWAtYhcpla++hpspsslcpuLppcLpst.pp.uss..hPs.hp.hsFpsLupscpllhhpLLpN.sltstph.hll.pL.......s.pt-t-sLpsshsphssp+ushphL......................phsutppssphppsohhpsp.................uclLhchLpchhp..sps.cpstphLspLh..cth.pssahpllAssLLpssss.pp.p.............spchLlpWLht...psshhssFCp.l.utLLsploh+hsphpshYhshLpphupph..........................ph-l.cuhhlusEsp.hsa-tLhp+hpuL..hpus.sl..pptshssLcshptQD....Gs.pl.GhSlWsDlhLtLt .................................MEtlLpplctFs-lLslu....postVpsW-stslcRAlpWApYhcclac+htpp.stlR.pulpccL..cst.tp..sss.....hs...t.h......shpsLupsc.ll.hpLLpN.sL.....tstththll..ph..........s.p.p.ptl..tphsphhppc.s.phL.....................................ps.htsp.................uplLhppLpphhp.....hps.p.stphL..splh...pth.ps..s..hhp.hAssLL.........s.....p.........................pp.llpWLht.................thhtshCh...shlLs.hs.+hstht.hhhshhpphhphh...........................h-l..cthhlss...tsp....appL.hp+h.sL..hpus.sl..ppts.ptLpthptpD.......ps......pDhh.th............................................................................................................................... 0 7 13 24 +10940 PF11108 Phage_glycop_gL Viral glycoprotein L Pollington J, Finn RD anon PRODOM Family GL forms a complex with gH, a glycoprotein known to be essential for entry of HSV-1 into cells and virus-induced cell fusion [1]. It is a hetero-oligomer of gH and gL which is incorporated into virions and transported to the cell surface which acts during entry of virus into cells [1] 25.00 25.00 42.00 41.60 20.70 20.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.70 0.72 -4.09 9 36 2009-01-15 18:05:59 2008-08-04 15:34:02 3 1 25 16 0 30 0 109.10 32 71.22 CHANGED PCCpl.sLstsp.lPulasIssIalssspo.CsGhslApL...+ptsspsTh...phCuNGFNlhSFhlulLp+ls.ss.tEphcLLstLpp.hsuF.lsshpsssssu.thp....uhpG .PCC+I..psh..sspp.hP..thasIssIaLssspp.CsGhslApL...+ppssphsh...phCsNGFsLhuFhlullp+h.s.ss.t--lcLLstLpp.hssF.hpsFpssssNuSth.....hss..... 0 0 0 0 +10941 PF11109 RFamide_26RFa Orexigenic neuropeptide Qrfp/P518 Pollington J, Finn RD anon PRODOM Family Qrfp/P518 has a direct role in maintaining bone mineral density [1]. Qrfp has also found to be important in energy homeostasis by regulating appetite and energy expenditure in mice [2]. The c-terminal 28 residues are the functional 26RFa [3]. 25.00 25.00 39.20 39.20 23.70 21.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.78 0.71 -4.26 3 30 2009-01-15 18:05:59 2008-08-04 15:46:31 3 1 27 0 16 31 0 123.40 60 97.11 CHANGED hRPYsLlYhLFLPLGACFPLLDRREPTDAlGGlGAcMsWADLAcG.RPas.WGSPsWlRAPQPQALLVlARELQASGREHAGhpFRLGRQD-GSEATGFLPA-uEKsSGPLGTLAEELNGYSRKKGGFSFRFGR ...........tsasLshL.LhLPLGsCFPLLDR+pPsDshGshGu.thsWAcLAtG.+..P.as.W..Gus.pWhRAspPpALLVlA+tLQsSGRE+AGhpFRFGRQD-GSEAsGFLPAs.uEKsSG.PLGsLAEELNGYSRKKGGFSFRFGR 0 1 1 4 +10942 PF11110 Phage_hub_GP28 Baseplate hub distal subunit Pollington J, Finn RD anon PRODOM Family These baseplate proteins are also referred to as Gp28. Gp28 is the structural component of the central part of the bacteriophage T4 baseplate, which possesses a hydrophobic region and is membrane bound [1]. Gp28 forms a complex with gp27 which is another structural component of the baseplate [1]. 25.00 25.00 36.10 35.50 23.00 17.40 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.90 0.71 -4.53 6 31 2009-01-15 18:05:59 2008-08-04 15:49:07 3 1 30 0 0 24 0 148.30 46 84.65 CHANGED IPKLGlKHh+LLKDh+GsD-sh+lLlDSIpPGLoAAEsDhVhLHLLtFNsKlpohpphDGashclsDlYlCp+hEFpapGpTFaFKsPthh.-pFlohsDhLo+...phsD-ps..-hsFh-hPAFVlcWA--IhoTIAlssPsGsIpGhusIlGll IPKhGLKHapllKDhKu.P-csL+lLlDSIpP.sLosAEsDFVslHLLEFNGKlpsppplDGasYclsDlYlC.Q.+LEFpapGpTFhF+sPthh.-pFhsls-hLpp....lplsD-sh....c..sFh-MPAFVhcWAs-IhoTlAlsGPNGsIpGlhsIlsl.......................... 0 0 0 0 +10943 PF11111 CENP-M Centromere protein M (CENP-M) Pollington J, Finn RD anon PRODOM Family The prime candidate for specifying centromere identity is the array of nucleosomes assembles with CENP-A [1]. CENP-A recruits a nucleosome associated complex (NAC) comprised of CENP-M along with two other proteins [1]. Assembly of the CENP-A NAC at centromeres is partly dependant on CENP-M. The CENP-A NAC is essential, as disruption of the complex causes errors of chromosome alignment and segregation that preclude cell survival [1]. 25.00 25.00 25.10 25.40 24.00 24.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.95 0.71 -5.04 6 65 2009-01-15 18:05:59 2008-08-04 15:53:17 3 1 51 0 36 61 0 158.40 50 95.51 CHANGED MA...lLRPaDKLPpLNsATlLLVGsE-uhppQLApuML+ccpsFplplHLApSLPLPs-ppphRPRIDLIVFlIsL+SKhSLpsVctSLsHLDssFFLGKVCFLlTGAGpssHCsVchsoVhKLAcoapSPllhs-hchEDhpsAhApRLLphLQICAGhVP.GVSALhLsoLhRsots ...................................MulLpPhsKhPtLN...sAolLLVGsE-thhppLA-uML.+..E.-ss...p..lpVHLAp.SLPLP..sp.ss....RPRIDLIVFVlNL+SK...a..SLpssEpSLpHVDusFFL.GKVCFLsTGAGptspsSl+hssVhKLApoYpSPlLas-Lcsc..shRsshAQRLl+hLplsAGhVP.GlSAL.L.shh+so..s................ 0 9 12 20 +10944 PF11112 PyocinActivator Pyocin activator protein PrtN Pollington J, Finn RD anon PRODOM Family PrtN is a transcriptional activator for pyocin synthesis genes [1]. It activates the expression of various pyocin genes by interaction with the DNA sequences conserved in the 5' noncoding regions of the pyocin genes [2]. 18.40 18.40 19.60 18.40 17.50 17.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.26 0.72 -4.15 26 221 2009-01-15 18:05:59 2008-08-04 16:51:16 3 2 194 0 36 165 0 74.50 34 80.92 CHANGED TsahLhApa.upsllPl-cVsp-YF.pl.oscphhcKlpsGcIsLPlh+hc.sSpKus+hVplpDLAsYlDc+tptAc ...............phhhLhtca.ssshlsLptVspcYF.tl.ospshppKssuscl.slPshRls.sSp..Kuth...hVplpDLApYlDc+ppp............ 2 6 10 24 +10945 PF11113 Phage_head_chap Head assembly gene product Pollington J, Finn RD anon PRODOM Family This head assembly protein is also refereed to as gene product 40 (Gp40). A specific gp20-gp40 membrane insertion structure constitutes the T4 prohead assembly initiation complex [1]. This protein in T4 stimulates head formation [2]. 25.00 25.00 26.50 29.70 24.20 23.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.70 0.72 -4.08 10 33 2009-12-03 14:07:40 2008-08-04 16:51:44 3 1 32 0 0 25 0 55.70 46 51.23 CHANGED VlQEllIh.c-GpsHLVYIaclpac...DGplplDauTss-t....K-ELtPHVccslphQI .......VlQEIlIpLcDGssHIVYlpclcas...cG+lslDFuT.s--c...KsELuPHVEKClshQl... 0 0 0 0 +10946 PF11114 Minor_capsid_2 Minor_capsid; Minor_capsid-2; Minor capsid protein Pollington J, Finn RD anon PRODOM Family Most of the members of this family are annotated as being minor capsid proteins. The genomes carrying the genes usually have three similar proteins adjacent to each other, hence this one being named as No.2. 26.40 26.40 26.50 26.40 26.10 26.30 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.51 0.71 -4.21 5 120 2009-12-03 14:36:31 2008-08-04 16:51:54 3 1 116 0 15 97 1 109.60 29 94.67 CHANGED M.h+IpVDLuGhKcKlSspuh+RG+lAlsNQhhhDMEQYVPhR..-GhLRuSu+lsSsGptIsYoTPYARAQFYGss....staph+NYTTPGTGKRWDhK..AKuhahuDWp+....AFlK.GMG ...........................tlhlcLs...t...hct...pl.p.p.uhp...+u...phtlsspshtshs.YVPhc.......sGpL.+s.o..u...p...l.sss..G...t..lhasssYARtQaYGss..................hphpphssPs...sGt+WDp+..ApsphhppW.c....sh.p............................................................. 0 10 13 15 +10947 PF11115 DUF2623 Protein of unknown function (DUF2623) Mistry J, Coggill P anon PRODOM_PD065638 Family This family is conserved in the Enterobacteriaceae family. Several members are named as YghW. The function is not known. 25.00 25.00 33.80 33.50 21.90 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.03 0.72 -4.05 6 450 2009-01-15 18:05:59 2008-08-04 17:16:34 3 1 449 0 21 54 0 94.90 86 99.75 CHANGED MNNHFGKGLMAGL+AspAcoAsclspFCuDYKRGFVLGYoHRMaEpTGDRQLSAWEAGILTRRYGLD+EMVMDFF+EssSshAlRFFhAGYRLEs ....MNNHFGKGLMAGLKA............THAD........SAVNVTKFCADYKRGFVLGYSHRMYEKTGDRQLSAWEAGILTRRYGLD.KEMVMDFFRENNSCSTLRFFMAGYRLEN................... 0 1 4 12 +10948 PF11116 DUF2624 Protein of unknown function (DUF2624) Mistry J, Coggill P anon PRODOM_PD060869 Family This family is conserved in the Bacillaceae family. Several members are named as YqfT. The function is not known. 20.80 20.80 20.90 21.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.73 0.72 -3.47 11 138 2009-01-15 18:05:59 2008-08-04 17:17:48 3 1 137 0 21 76 1 84.00 58 88.95 CHANGED M.hlhQplVpQKLNplTsc-LL+YuKQYGlslTpsQAcplhsll+GKsINIFscsERp+llKclppITuPpTAppVNcLFpQFs.........s ......M.NLI+QlVNKKLNpIosKELLKYSKEY-VPITsuQA-QIVhLMKGKNINIYDssERLcLLKQIAKVTSPuTAQQVNsLFQQLl..K.... 0 3 12 15 +10949 PF11117 DUF2626 Protein of unknown function (DUF2626) Mistry J, Coggill P anon PRODOM_PD060869 Family This family is conserved in the Bacillaceae family. Several members are named as YqgY. The function is not known. 25.00 25.00 50.50 50.20 20.60 20.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.77 0.72 -3.57 8 157 2009-01-15 18:05:59 2008-08-04 17:19:28 3 1 157 \N 28 77 0 79.60 73 98.46 CHANGED MDRMFRVLuFWTGIFAVMFYlGDMpssuLLFFGQTuhFlhLuYLpLSERMYlYIFGAYLTVFFlGFTYYSTFlhVPGhGc .M-RMFRVLGFWTGIFuVMFYlG.............DM........ps.....sALLFlGQTGFFVLLSYLKLTERMYIYlFGAYLTVFFlGFTYYTTFlhVPGuGc..... 0 9 19 22 +10950 PF11118 DUF2627 Protein of unknown function (DUF2627) Mistry J, Coggill P anon PRODOM_PD058321 Family This family is conserved in the Bacillaceae family. Several members are named as YqzF. The function is not known. 21.70 21.70 22.20 39.70 21.40 18.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.50 0.72 -3.88 13 153 2009-01-15 18:05:59 2008-08-04 17:19:58 3 1 153 0 29 89 0 77.20 61 95.06 CHANGED MtRllALllLLIPGslAAlGIKLMRDolFGIlhsPFshL.......WLQFLuGllhFshGlhhlAGFILaRDRKRNKVssRF++ ..MpRhlALLlhLIPhulAshGIKLMRDTlFGILh.s.....P.h.u...hL.......WLQFLlGhlhFulGhYlhGGFlLHRDRKRNKVQsRFR+......... 0 10 20 23 +10951 PF11119 DUF2633 Protein of unknown function (DUF2633) Mistry J, Coggill P anon PRODOM_PD053432 Family This family is conserved largely in the Bacillaceae family. Several members are named as YfgG. The function is not known. 20.90 20.90 21.50 20.90 20.40 19.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.91 0.72 -4.25 7 453 2009-01-15 18:05:59 2008-08-04 17:21:01 3 3 451 0 33 99 7 54.10 73 72.81 CHANGED hR++hssphT+IlLLISFlhhFGRhlYuuIsAh.HHQp+.pu.phs.olp........pthpp ...KRHRFNoRMTRIVLLISFIFFFGRFlYSSlGAWQHHQsKKE.AQ.QSoLSVE....oP.......sQR................. 0 1 4 19 +10952 PF11120 DUF2636 Protein of unknown function (DUF2636) Mistry J, Coggill P anon PRODOM_PD053231 Family This family is conserved in the Enterobacteriaceae family. Several members are named as being YhjT, but the function is not known. 21.90 21.90 21.90 23.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -8.99 0.72 -4.47 8 456 2009-09-11 14:18:15 2008-08-04 17:21:37 3 1 452 0 25 86 0 61.90 80 97.67 CHANGED MslSDIlQLlllCALIFFPLGYLs++shRRlRsThRhhhh+PRYVKPAGsL+Rs........o+V+uscp ...MTISDIIEIIVVCALIFFPLGYLARHSLRRIRDTLRLFFAKPRYVKPAGTLRRT........EKARATKK......... 0 2 4 15 +10953 PF11121 DUF2639 Protein of unknown function (DUF2639) Mistry J, Coggill P anon PRODOM_PD049467 Family This family is conserved in the Bacillaceae family. Several members are named as being YflJ, but the function is not known. 21.70 21.70 21.70 25.80 19.80 18.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -7.90 0.72 -4.45 8 197 2009-01-15 18:05:59 2008-08-04 17:22:01 3 1 123 0 16 79 0 41.90 69 77.77 CHANGED HaGSKGWYVcELKKhGIppaE..GRKLESYKsHhLuNLL..cph ............YaGoKGWYVtELKKLGlRhaE..G+KLESYRsHlLpsLLt........... 0 2 9 10 +10954 PF11122 Spore-coat_CotD Inner spore coat protein D Mistry J, Coggill P anon PRODOM_PD057197 Family This family is conserved in the Enterobacteriaceae family. CotD is an inner spore coat protein that is expressed in the middle phase of mother cell gene expression. Along with CotD, CotH, CotS and CotT it is assumed to assemble into the loose skeleton of the matrix, between the shells of SpoIVA and CotE. Coat proteins do not share much sequence similarity between species, but this does not imply they do not share secondary, tertiary, or quaternary features [1]. 19.60 19.60 20.50 20.40 18.60 18.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.48 0.72 -3.67 15 144 2009-01-15 18:05:59 2008-08-04 17:22:21 3 1 131 0 25 95 0 100.70 52 82.33 CHANGED sslVHPT+pssscshscslVPHIHPsHTTpVN+phhcHhHYaPpTpSshspss..................ppahts..sss............................................G+sss ......APVlHPTKQCVsHoFSsTVVPHIaPTHTTHVaHQplK..sQpaF.PQTsSNVNsVs.........................................................................HsHpluPhs..P..ssssushGs..s.................h...........sss............................................................................... 0 6 15 20 +10955 PF11123 DNA_Packaging_2 DNA packaging protein Pollington J, Finn RD anon PRODOM Family This DNA packaging protein is also referred to as gene 18 product (gp18). This protein is required for DNA packaging and functions in a complex with gp19 [1]. 21.00 21.00 21.60 50.20 20.30 19.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.62 0.72 -4.10 5 41 2009-01-15 18:05:59 2008-08-05 09:05:58 3 1 31 0 0 31 0 78.70 63 93.67 CHANGED DKoLIKFLEMLDTEMAQRMLuDLpDDERRSPQLYNAIGKLL-RHKFQISKLpPDEsILGGLAAuLEEYsclVGssGLTDD-h ....hsL.phLEMLDTEMAQpMLtDLpDcE+RoPQLYNAIsKLLDRHKFQIuKLQPDhpILGGLAuALEEYpphVGssGLT-D-............................................ 0 0 0 0 +10956 PF11124 Pho86 Inorganic phosphate transporter Pho86 Pollington J, Finn RD anon PRODOM Family Pho86p is an ER protein which is produced in response to phosphate starvation. It is essential for growth when phosphate levels are limiting [1]. Pho86p is also involved in the regulation of Pho84p, a high-affinity phosphate transporter which is localised to the endoplasmic reticulum (ER) in low phosphate medium. When the level of phosphate increases Pho84p is transported to the vacuole. Pho86p is required for packaging of Pho84p in to COPII vesicles [2]. 23.70 23.70 29.40 133.80 20.00 23.60 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.85 0.70 -5.43 10 45 2009-01-15 18:05:59 2008-08-05 09:16:28 3 1 43 0 28 40 0 287.50 41 91.77 CHANGED QKDssLNcPLDt-APPTltpoSLpPELApAuLsLpuDah+QtQuhhN+alFaHPlslollslslsshluhpL...as.hs.hSsols-.LYphhlhsK+-hlhsllhslsssuhlFuhluh.sahVoDthtchsschltpspsEpIFGhNL+cau............................spchsscs+cl.t........pscNTaIIlYR-oPIAllolssshspSoc-shlh+ITGltlR+ValKushh-sLIDWAhlRoRplhp-a.psK........upSlplllDsYSFDpphcKhLtp+GFphlp.ShcLss............clL.....ssLFGIo+-TaGlph .QhDssLscPLDh-APPTIhsssLcPEhuoAALNLsuDal+QpQulsNKalhaHPlslsllslslhlals.+l...shPlp...o.s.Slst.hYplhhhNK+shlsullhohhssuhlFollut....loDsahppp.s.lstspuEplFGhsLpchs............................tpcpspcsh.............spNTcIIVYR-TPIAlISLssshs..lSoc.-shVhslTolGsR+VYlKSGIlEDLIDWAhl+o+slhpp...sK.......hupoh+LLlDlYSFDsshcchLcc+GFshlp.Shcls-.......................+LL..............GuLFGVp+-hWGlph.. 0 3 14 25 +10957 PF11125 DUF2830 Protein of unknown function (DUF2830) Pollington J, Finn RD anon PRODOM Family Several members in this viral family of proteins are annotated as lysis proteins. 25.00 25.00 46.90 46.40 17.40 15.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.76 0.72 -4.26 3 70 2009-01-15 18:05:59 2008-08-05 09:18:49 3 1 15 0 0 56 0 52.50 83 73.16 CHANGED FKHEEYPCpcQQRSSTLYVLIsLAIFLSKFTNQLLtSLL-LLIRIVcTLQQLLT .FKHEDYPCRRQQRSSTLYVLIFLAIFLSKFTNQLLLSLLEAVIRTVTTLQQLLT 0 0 0 0 +10958 PF11126 Phage_DsbA Transcriptional regulator DsbA Pollington J, Finn RD anon PRODOM Family DsbA is a double stranded binding protein found in bacteriophage T4 which is involved in transcriptional regulation. DsbA, along with other viral proteins, interacts with the host RNA polymerase core enzyme enabling initiation of transcription. DsbA acts as an enhancer protein of late genes in vitro. The protein consists of mainly alpha helices [1]. 25.00 25.00 65.90 65.40 20.20 18.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.21 0.72 -3.89 7 36 2009-01-15 18:05:59 2008-08-05 09:23:25 3 1 35 0 0 26 10 68.50 52 76.26 CHANGED hIpEASs.+hp.Euat-hlK-I+stAKpEhGl-GKhFNpLh+lYH+QpR-pFEsps-ElsplYDplFpt .hIKEASDpKhplEuYs-hIKDI+p+AKcELGVDGKhFN+LlsLYHKpsR-pFEsEs-EllELYDslFs.t 0 0 0 0 +10959 PF11127 DUF2892 Protein of unknown function (DUF2892) Mistry J, Coggill P anon Pfam-B_604 (release 23.0) Family This family is conserved in bacteria. The function is not known. 21.10 21.10 21.20 21.20 20.90 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.31 0.72 -4.14 172 1924 2009-01-15 18:05:59 2008-08-05 09:28:24 3 11 1526 0 602 1414 277 64.10 27 55.37 CHANGED Mp...............NhGshDRhlRlllGlsllshshhsh.................thhhhhlGhs..hLhTulhuaCPhYtll.G...lsT.....sthp .............................hpRhlpl.s.u..Gslll..huslh.uh..hst.......................hhhlsuhlGss..LlhsGloGaCsh...ttlL.p....h......p.h........... 0 191 418 532 +10960 PF11128 Nucleocap_ssRNA Plant viral coat protein nucleocapsid Mistry J, Coggill P anon Pfam-B_645 (release 23.0) Family This family of nucleocapsid proteins is from ssRNA negative-strand viruses of plant origin. 25.00 25.00 107.60 107.30 20.00 19.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.13 0.71 -4.80 5 129 2009-01-15 18:05:59 2008-08-05 09:33:54 3 1 10 0 0 80 0 178.00 68 65.42 CHANGED RsLSKFIRETlF+-uDlcossLC-aLSSADPShFPASVFLKIsLDNLPTEVSSRCKMuIAGNKAIRYAlFApKF-.KDplusPTsussEslpEYlpKpEKLEKA+AIV-hLCSLuSNF-AQKKMHPLSPERoSRKNFTLQLTCAIVaSLSpcGRlDMRctI-ocKIEAFKRDENlYGctNu .RKVPEFVKSKLY.DGDVSLSQISEELSHAPTKKFPARVFLKIDIDNLPSAVCSRCKLNIAGNRSVRYAGFASSFQTKQKLSPAVGATPESLMPLLETNQKIEKSIAIRDFLKTMEGQWKNQKRLHPLSDEKPTIKNFTLKLTCAIIYSLTPDGRIDMAERIITDKNKGFQNDRNFFGDGE.G.. 0 0 0 0 +10961 PF11129 EIAV_Rev Rev protein of equine infectious anaemia virus Mistry J, Coggill P anon Pfam-B_124 (release 23.0) Family The sequence of this family is highly conserved and carries a nuclear export signal from residues 31-55, and RNA binding/nuclear localisation signals of RRDR at residue 76 and KRRRK at residue 159. Rev is an essential regulatory protein required for nucleocytoplasmic transport of incompletely spliced viral mRNAs that encode structural proteins. Rev has been shown to down-regulate the expression of viral late genes and alter sensitivity to Gag-specific cytotoxic-T-lymphocytes (CTL). Equine infectious anaemia virus (EIAV) exhibits a high rate of genetic variation in vivo, and results in a clinically variable disease in infected horses. 25.00 25.00 121.80 121.70 18.40 19.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.76 0.71 -4.17 2 322 2009-09-11 15:12:54 2008-08-05 09:34:35 3 2 4 0 0 217 0 132.90 91 96.22 CHANGED PQtPL-sDpWCRlLRQSLPEEKIPSQTCIAR+pLGPGPsppssuRRDpWlRtQl.pAEsLQEpLEWRIRGVQQsAKEL-cVN+tIWRELpapccQ+GDauuassYpRtpEc+WGE.SSPRVL+PGDSKRRRKHL .PQGPLESDQWCRVLRQSLPEEKIsSQTCIARRHLGPGPTQHTPSRRDRWIREQILQAEVLQERLEWRIRGVQQsAKELGEVNRGIWRELHFREDQRGDFSAWGsYQRAQER+WGEQSSPRVLRPGDSKRRRKHL. 0 0 0 0 +10962 PF11130 TraC_F_IV F pilus assembly Type-IV secretion system for plasmid transfer Mistry J, Coggill P anon Pfam-B_678 (release 23.0) Family This family of TraC proteins is conserved in Proteobacteria. TraC is a cytoplasmic, peripheral membrane protein and is one of the proteins encoded by the F transfer region of the conjugative plasmid that is required for the assembly of F pilin into the mature F pilus structure. F pili are filamentous appendages that help establish the physical contact between donor and recipient cells involved in the conjugation process [1]. 21.80 21.80 21.80 22.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.23 0.70 -11.46 0.70 -4.93 71 713 2009-01-15 18:05:59 2008-08-05 09:37:15 3 7 493 0 136 633 29 242.30 22 29.40 CHANGED stsphschLPa.....tpYsscsplal.....sspS..hGhhaElsPlsstscp......htctLpshlpp..sh..ssss..slQhhhhsssslsphlpp.hpshh...................ts............hhpphhtpphpaahcuht.p.t....sshsh......plRchRlhlhhphs.......tspsshpplpphpcpltusLpssGlt.spphsspslhsalhchh..NPpsshthss............Yst.p.................slspplhhssschcs.pp.shhhhs.....................t.scphlhshsl+phPct ......................................................s..sphschLPa.....hpY.......pp-..........splah.........sspo...hGhhh-hhPlsssscs......lh-slpphLps...chPcss....slQhhhh.s.s..s..plsshlcp.hhphhp.p................up................hhpchhttphtaahcuss.phth......pshsh........plRchRlhl.......hphs..............sthps.....pss...ht..phpplpcplp..u..uLtssulp.spphsupshhshltchh..N.sssthh.ptt..............phs..p..................slspQlh.ss.ch....cl.ct...shhhhs...............................t.spthlhshplpc.Pp........................... 0 28 67 108 +10963 PF11131 PhrC_PhrF Rap-phr extracellular signalling Pollington J, Finn RD anon PRODOM Family PhrC and PhrF stimulate ComA-dependent gene expression to different levels and are both required for full expression of genes activated by ComA, which activates the expression of genes involved in competence development and the production of several secreted products [1]. 25.00 25.00 26.10 43.40 23.60 17.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.64 0.72 -4.38 4 36 2009-01-15 18:05:59 2008-08-05 09:44:28 3 1 21 \N 6 17 0 37.00 62 95.28 CHANGED LKSKLhlhCLAhusVFsussl.ApAspppFcVApRGMh .LKSKLFVICLAAAAIFTAAGl.AsA-tt-FHVsERGMT 0 2 2 2 +10964 PF11132 SplA Transcriptional regulator protein (SplA) Pollington J, Finn RD anon PRODOM Family The SplA protein functions in trans as a negative regulator of the level of splB-lacZ expression in the developing forespore [1]. 25.00 25.00 29.20 29.10 21.70 20.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.57 0.72 -4.23 7 78 2009-01-15 18:05:59 2008-08-05 09:46:18 3 1 57 0 13 43 0 66.80 50 92.81 CHANGED Mph....psapsGD.VYVIYRNPHstsVApIpEAtlVpHP.c.sELALFLaETYaPlspD.AlFso.-pAEphYpphFc ...............p.apsGD.VYlhYRNPHstsVApIppAElVsHPp+cGELALFLaETYHPLu--DAlauo.-EApphYp.hat...... 0 3 7 9 +10965 PF11133 Phage_head_fibr Head fiber protein Pollington J, Finn RD anon PRODOM Family This head fiber protein is also refereed to as Gp8.5. Gp8.5 is a structural protein in phage. It is a dispensable head protein. 20.60 20.60 20.80 20.80 20.00 20.10 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.77 0.70 -5.20 3 66 2009-01-15 18:05:59 2008-08-05 09:51:36 3 3 62 1 12 64 7 123.30 22 66.00 CHANGED MhsSFTAhANSsIlAY+LLsalEuE-pIEISaAcEcTIP-YVul+DLcsGDhTslshYPLAAWpVIAsSDIslG....D+lTTGKNGTlKhTcDspssFGYAVupApcGQLVTlI..IS+uFDplIpsDDlG-ssDsGphLhchssssGspshlI...DsKAhlpuNoTssNKKpLcD.LLlSsLsVKAF......LsusToD-NKANL............cpLhVSNPclLuhLsGsPSoEsKssLRoMIGAGsPYTLPAATTTTLGGVK+uAAVusSTATDVssuVKDFNuLLTALKNAGIIu ........................................................................................................................................................hs...................................................................................................................................................................................................................................................................................................................K..ph.st..........ss...uo.u.ssstshhtphNsllstL+suGhh................................................ 0 3 7 8 +10966 PF11134 Phage_stabilise Phage stabilisation protein Pollington J, Finn RD anon PRODOM Family Members of this family are phage proteins that are probably involved with stabilising the condensed DNA within the capsid [1]. 19.70 19.70 21.50 21.10 17.20 16.60 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.71 0.70 -5.92 3 145 2009-12-03 14:24:52 2008-08-05 10:02:23 3 2 137 0 6 107 10 449.10 82 97.02 CHANGED hQLPLhKGLGKDhKsADYIDALPVNMLATPKEVLNASGYLRSFPGIsKKsDVsGVSRGVcYNT+pNAVYRVCGNpLYKG-KsVADlAGpGRVSMAHSRsSQAVsssGKlpLYRYDGTVKTLSNWPKDKtYTQYDLGsVRDlCRLRGRYlWsKDGoDpFGVTDLEDESHPDRYpAhYRAESQPDGIIGIDSWRDFIVCFGSSTIEYFSLTGAADsuuALYlAQPALMVQKGIAGTaCKTRauDuYAIISHQATGAPSVYLIGuGQAosIATATIEKIIRSYTADELAouVMETlRFDSHELLLIHLPRHVLCYDASASQNGsQWSLLKTGFYD-PYRAIDFMFtDNQITCGDKsEuLLGQLpFsuSuQYEpQQEHLLYTPLFKADNARlFDFELEASTGVAQIADRLFLSATTDGINYGREQMIEQNuPFsYDKRILWRRlGRVRKNIGFKlRVITKSPVTLSGCQIRME ...........................................QQLPLMKGVGKDFRNADYIDYLPVNMLATPKEILNSSGYLRSFPGIAKR.SDVNGVSRGVEYNMAQNAVYRVCGGKLYKGE....S.....E..V....GDVA..GSGRVSMAHGRTS...QAVGVNGQLVEYRYDGTVKTVSNWPs...DSGFTQYELGSVRDITRLRGRYAW.SKDGTDSWFITDLEDESHPDRYSAQYRAESQPDGIIG.IGTWRDFIVCFGSSTIEYFSLTGATTVGAALYVAQPSLMVQKGIAGTYCKTPFADSYAFISNPATGAPSVYIIGSGQVSPIASASIEKILRSYTADELADGVMESLRFDAHELLIIHLP..R..HVL..VYDASSSANGPQ..WCVLKTGL..YDDVYRAIDFlYEGNQITCGDKLESVTGKLQFDI..SSQYDKQQE.....HLLFTP.LFKADNA..R.sFD.LEVESSTGVAQY....AD.RLFLSATTD..GIN.YGREQMIEQNEPFVYDKRVLWKRVGRIRKNlGFKLRVITKSPVTLSGCQIRIE...................................................................................................... 0 2 2 2 +10967 PF11135 DUF2888 Protein of unknown function (DUF2888) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins with unknown function are annotated as immediate early protein ICP-18 however this cannot be confirmed. 21.50 21.50 21.60 262.10 21.20 21.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.87 0.71 -4.48 2 13 2009-01-15 18:05:59 2008-08-05 10:27:34 3 1 13 0 0 11 0 143.20 81 92.36 CHANGED GcPapppGshh.lPFupsFTIDLVNh.hpoEhpV+lpMoPphslGTFVVtPKphFSI+RAspGDAuFKVtRutGW.spT.QsLohhhYcR...l-hcsGs..pplETDG..GTVlVPGcsTGQRFupAhAh.hhFLap+.FllpGV GEPYTCKGDLCEIPFuRNFTIDLVNLSVSTEFQV+ITMTPHHDLGTFVVEPKKVFSIKRAsKGDAAFKVtRAAGWLPDTPQVLoLFVYERLpPVEWHStChYENLETDG..GTVIVPGEATGQRFGTATAVPThFLFKRMFVVKGV 0 0 0 0 +10968 PF11136 DUF2889 Protein of unknown function (DUF2889) Pollington J, Finn RD anon Pfam-B_001473 (release 23.0) Family This bacterial family of proteins has no known function. 20.80 20.80 20.90 25.00 20.00 16.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.27 0.71 -3.89 49 270 2009-01-15 18:05:59 2008-08-06 10:06:50 3 2 200 0 113 274 147 120.40 34 54.07 CHANGED HpRplphpuat+.p........DGLa-l.EuplpDsps.s.s........h.sspslHchplplTlD.sshpIhsspAphctsPas.hCspsssshppLlGhslu.Ga+cplpcpluGspGCTHLpELLt.slussAhQs ..................HpRtlphcuYtR..s........DGLa-l.EApLpDpKsh-hs..t...thh.sutslH-hhl+lTlD.pchslhcscA.shchsPas.pCssussuhptLlGlslt.tsaR+plpctLuGssGCTHLsELht.slsTsAhQs......... 0 25 62 89 +10969 PF11137 DUF2909 Protein of unknown function (DUF2909) Mistry J, Coggill P anon Pfam-B_764 (release 23.0) Family This is a family of proteins conserved in Proteobacteria of unknown function. 28.30 28.30 28.40 32.20 27.00 28.20 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.07 0.72 -4.23 51 339 2009-01-15 18:05:59 2008-08-06 10:14:11 3 1 327 0 118 287 343 64.30 35 88.64 CHANGED hKl.llllhllsIlsSLsSALaFLh+D...pu...........cupRhs+uLshRVuLSlsLhllllluhhhGhl....pP ...h+l.llllhhlhIlsSLsuALaahh+D..cG............popRhlpuLshRVuLSlsLhlhllhAhhhGhI......ss..... 0 22 62 92 +10970 PF11138 DUF2911 Protein of unknown function (DUF2911) Pollington J, Finn RD anon Pfam-B_001491 (release 23.0) Family This bacterial family of proteins has no known function. 20.90 20.90 21.80 23.40 19.60 18.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.90 0.71 -4.57 69 234 2009-01-15 18:05:59 2008-08-06 10:24:24 3 10 92 0 137 269 303 145.40 35 62.05 CHANGED SPtspstt..........pl.uhsc......lplpYSRPuh.....+GRp..IF.......Gs...LVPYG...........cVWRTGANpsTplshscDltlsGcpltAGsYuLaTIPsccs.WslIhNp......sts.pWGs.........pYc..ppcDll.Rlplssp.phs..pshEphohshsshpsst.uhlpltW-pshVslsl .......................SP.spspt......ph.uhsp...lplpYSRPuh......+GRp..IF.......Gs.....L..VPYG..............clWRTGANpuTplsFscDVpluGctltAGsYuLaoIPscsp..WslIh.Np.......shs.pW.Gs........hpYc..pspDllRlpVtsp.phs..p.hEsholsh...sshssst...upltltW-pstVslsh.......................... 0 68 117 136 +10971 PF11139 DUF2910 Protein of unknown function (DUF2910) Pollington J, Finn RD, Eberhardt R anon Pfam-B_001487 (release 23.0) Family Some members in this bacterial family annotate the proteins as cytochrome C biogenesis proteins however this cannot be confirmed. Currently no function for this family is known. 31.00 31.00 31.00 31.20 30.90 30.90 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.33 0.70 -5.20 46 566 2012-10-02 18:22:22 2008-08-06 10:24:54 3 2 322 0 120 408 26 215.20 21 94.89 CHANGED lLshAlslulsPhplshslLlLpp.....s+PhhphlsFlsGhhlsshslsslhlhlhcsls......shpts.......sstthshlplllGssllhlushhhhpt..................................................tststhsphhsphp.....shs.stuhhhuhlhslhps.tshl.ahsAhuslsuuuhssssphsAllsFsllussslhlPLlualluspR..spshLtpl+sWhpsppphlluslhsslGhhllhp.Gls ....................................................llshAlshulsPhhlhs..s..l...l..h.....lpp.........scsh...p..h..hsFlsGhhlshsu..lshsslhhhsshs........shstt.........sphssshlp.lllGllLllhushhhtpp.................................................................................httsthsph..hpphp..........sht..sss.s.hh.huhlhsl.sps.tshh..hluAhshIsuushshssphhshlhFsh.l.shss.shlP.llual..ht..sp+....h..pthhtthp..sW..hp..p..p.pth...hh.shlhhhlGhhhlhpul......................................... 0 35 83 107 +10972 PF11140 DUF2913 Protein of unknown function (DUF2913) Pollington J, Finn RD anon Pfam-B_001499 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Gammaproteobacteria. 20.40 20.40 20.80 20.50 20.20 19.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.19 0.70 -5.03 24 383 2009-01-15 18:05:59 2008-08-06 10:47:32 3 1 273 0 51 246 3 190.00 28 91.67 CHANGED sYsptlhclspsuLspLptpp.psupshpsssscsHaLspWlspALKppRFs+hluc..sLptW.+puRShGssApLctlhp+Ipt.Yt....sstp.tpshstsplcuhLsplcptsWhVtT....-p.lss..KlplpoDGpsSLl.lsupphpppFss..spLlKPlohalRG...scpthhptAhppGhhlaphschpShVKaHt-YhlaPtNptstLspL..s ..........hp..lppll.sALstLpttp.cst.p.t.h.s.u.stpscaLlcWlspAlKpp+Fp+hlsc..cLpthhctu.Rsh..ust..upLtshhph..........sspp.tt.hshhclcuhLsphcptsW..p..lts......sp..lsp....plphh.ssGpss.Lh.lptsph-psFss.....schlpPlshhlpu...ph.t.thhpthhtsu.hh.p..c.t..lhhthph.h..tt.......l........................... 0 3 12 32 +10973 PF11141 DUF2914 Protein of unknown function (DUF2914) Pollington J, Finn RD anon Pfam-B_001640 (release 23.0) Family This bacterial family of proteins has no known function. 20.70 20.70 20.70 21.90 19.80 20.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.48 0.72 -4.45 39 174 2009-01-15 18:05:59 2008-08-06 10:47:56 3 2 158 0 77 178 90 68.50 36 22.50 CHANGED pu..pt.lhHhWh......pssc.ssclsLslp.GsR...aRsaSpKphh..stsGcWcVcVhspsGpllushcFpls ..................ppplhHlWh..........tsGcphs+Is...LsIp.GuR...uYRsWopKpshs...sssGc.WpVcVloE..s..Gp.hIuslcFpV.... 0 29 52 69 +10974 PF11142 DUF2917 Protein of unknown function (DUF2917) Pollington J, Finn RD anon Pfam-B_001647 (release 23.0) Family This bacterial family of proteins appears to be restricted to Proteobacteria. 25.00 25.00 25.10 25.00 24.40 24.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.97 0.72 -4.78 40 253 2009-01-15 18:05:59 2008-08-06 10:49:20 3 3 129 0 98 236 21 65.20 30 55.56 CHANGED tapLs.sGpshsh+stpssp.LpVt..sG...plWl...Tt.-u.......s.....spDaWLpsGpoLpLtpGp+lhluut...hssu ..................atls.stpshsh+..ssp.ssp...LpVp...sG..........tlWl...Tp.ss........c.....scDaaLpsGpslcltcGp+..lhlout..h...ss............... 0 9 40 68 +10975 PF11143 DUF2919 Protein of unknown function (DUF2919) Polllington J, Finn RD anon Pfam-B_001684 (release 23.0) Family This bacterial family of proteins has no known function. Some members are annotated as YfeZ however this cannot be confirmed. 22.70 22.70 23.00 23.60 22.60 22.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.71 0.71 -4.37 29 662 2009-01-15 18:05:59 2008-08-06 11:06:50 3 1 644 0 64 283 15 137.30 54 96.12 CHANGED as.ls.aDc+GplKsPlhhahsLlaLARuWslFlhusssRpstssLLplFYP-+ssFaluLssGlsAlLhh.llhucRp....ct.phlt..laphh+hhLlhslllphshhhhtlhhpthhashshulphllhhWhhlYLhpS++Lpthhpshpp ..............at...pYDspGhL+hPhhhWh..sLLh.ARsWVLF.VIAGuS.REQGosLLNLFYPDHDNFWLGLlPGlPAVLAF.LLSGRRs....shPp......lW+hLh.hLLLLAQlV.LCWpPa.lWLsG....-....uVoGlGLALllADIVALIWLLTNRRLRACFst...c.................... 0 6 16 41 +10976 PF11144 DUF2920 Protein of unknown function (DUF2920) Pollington J, Finn RD anon Pfam-B_001778 (release 23.0) Family This bacterial family of proteins has no known function. 24.70 24.70 24.70 24.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.35 0.70 -5.45 20 738 2012-10-03 11:45:05 2008-08-06 11:26:01 3 2 129 0 22 553 3 187.50 29 98.00 CHANGED NpTapIDSCDDVELsIKR..pSKLEaRloYDDpK-IKAIVhIIsGhGuDss.sahcahpcalA+pacVsslsVsYHChssR....sphuAphhh-c.DhhllcssLcslsl....shtslss.cphpphhphLspplpphKppshlspsapLp.LSsohhPscsEYQNFGIMtAlDllNAlhalh+c..........hsphus......lPpIhsGuSYGGYLApLsAKIAPWhlDuVlDNSuhAh...shhchI.hG+El-a.ph.ptush...hp..slplthasKTaWTp.scsSsaaFussphhIRslLNpcHLpIQupa..pshYloYHShpDphs.Psc-KpphachLcpLGFDssLplIpcEspIDG+FIKsL-HGhGlohKtLh+KcLPhlLEKl.shpsch..pc.cpISYPCc-hlYpFc-pscKlpLcI ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 10 14 19 +10977 PF11145 DUF2921 Protein of unknown function (DUF2921) Pollington J, Finn RD anon Pfam-B_001920 (release 23.0) Family This eukaryotic family of proteins has no known function. 18.80 18.80 19.30 18.90 18.70 17.90 hmmbuild -o /dev/null HMM SEED 909 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.63 0.70 -13.66 0.70 -6.83 7 137 2009-01-15 18:05:59 2008-08-06 11:43:06 3 5 21 0 96 115 0 572.50 24 90.89 CHANGED Y......pp+Cps..stus.h...s.sststhhuppl.h.pssah..uGssslhshssspsh...........+.soFssppl.....t+s+sst.hlpVsuhLsLt....uspsshhshsht.pp.ph.hlst.........ph.hpGhYoE............upt.LCMVGsu...................sshus-ssslhh.tshlLhLphPKs.oLssphVpGpLcSssthsp...F-slpLluh.pp..pY.a.....pLs.tt..C.....cPhs.t.cthh.tupusps..h.tthCclLcch..t...shpl..shc.Csuoc.hsshhsshph............tshthhhpsl+Cps.t.......sss+s.lsFpshos.Eph..hs...t.t....ptoLsAEGhWcsosupLChsAC.lup.s.....sthhsts.CphRlSLpFPssaSIRsRShllGplhssss......sptshuFcplh.s.....pshppph.phh.h+YsYTcl-cAtchhtps.c.ush.....++...spaPts.t..........ShSDht.choh+hsut........upu.........sshslGshhhp....h..hutsphsshss.h....scshslpppp..LLNVShcIoh.................ossh.hppstlShEGlYDpcsGphhhlGCRcl.ph..p...s......tsuhDCpI.lplpasslsu+..spsph+spIpSpR.tcsD.LaFcthclpspshhh....c.ph.-sl.RhslEslhsllohThShshhshQLhah+pps.-shPhlSlsMLsl.ALGahhPLlhshEALFh....stppp.h...........hptsthhps.-hhl+llTllsFLLpLRLhQhVWpuRtps.pppstc.t..su-++sLhlsLslYlhGhlluhhlphstsph.hstt.ps..............u.p.tshhp-ltpYsGLhhDhFLLPQlIhNuh.pscs+.PLushFYhGsThlRhhPHsYDhhRupshsshhp..ssahaAssph....DaaupAhDVllPlsAhhLAhllFlQQRashthlh.t ...................................................................................................................................................................................................................................................................................................................................................h.s.h.s............................ta.a..............t...............................................C....h.......................................................................................................................................h..s-G.ap...ts.h.h.uC..h..................t.Ct.thph.h....hohptts..hG.h.s.t...............h..........................YpY..o.hp.stt.h.t......t......t......aP..................h...h..t.......................................h..l...h..................................................................hth..sh.h......................................h.hEGha....G.h.hluC..h...................t.DC.l...l.hths..t.........h..h.ItS.R..t.sD..hh...hth....h............................t..t.h.+...h-.hh.hh..sh.hhh.h.Qlhahpt.s..p..s...hSlhhhhh.shGh.h.Lhh.s.t.hhh........................................t........p.hh+h.hhhthhh.hplh.hsht.+.......t.......................sc.t.shhh...hahhhhhhhh..p........................................h.tahGlh.DhFLlPQllhNh.h.....t..p.p..slt..aYhGhThhRhhP+hYchhp........................s.t.....shash..hhDlhlshhsh.huhhlahQQphs.......p.................................................. 1 4 55 82 +10978 PF11146 DUF2905 Protein of unknown function (DUF2905) Mistry J, Coggill P anon Pfam-B_542 (release 23.0) Family This is a family of bacterial proteins conserved of unknown function. 22.20 22.20 23.90 23.90 21.70 20.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.18 0.72 -4.03 79 498 2009-01-15 18:05:59 2008-08-06 11:50:21 3 1 488 0 217 396 105 63.30 39 91.19 CHANGED hu+hLlhhGll.llllGhlh....hl.t+.....hs.l..GRLPGDIhlcctshsaYFPlsTsIllSllLSllhhlh ........hs+hLlshGll.LlllGlhh....al.t+..........hs..h...GRLPGDIhlc.+s..NhsFYFPlsTslllSllLSl..lhhlh.... 0 80 152 189 +10980 PF11148 DUF2922 Protein of unknown function (DUF2922) Pollington J, Finn RD anon Pfam-B_001999 (release 23.0) Family This bacterial family of proteins has no known function. 22.70 22.70 22.80 23.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.89 0.72 -4.32 49 773 2009-01-15 18:05:59 2008-08-06 11:52:37 3 1 650 0 145 467 0 68.00 28 90.76 CHANGED KpLpLsFpsssG...Kphslslsssp-slotsplcssMspllspslFtssuG.sL.sshpuA+lV-psssslh. .........KsLcLsFpssts....Kssplplspspss......lT....pp.lcssMssllstslhpss....s.u.sl..sphpuApll-+ssosl................................... 0 71 119 135 +10981 PF11149 DUF2924 Protein of unknown function (DUF2924) Pollington J, Finn RD anon Pfam-B_002010 (release 23.0) Family This bacterial family of proteins has no known function. 21.80 21.80 22.00 24.10 21.30 21.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.59 0.71 -4.15 31 178 2009-01-15 18:05:59 2008-08-06 12:38:05 3 5 96 0 88 183 81 132.10 37 82.95 CHANGED lhs....clAtLsshshscL+thWpclas.stssphs+saLppRLAYRlQEhsaG....GLsppscp+L-plucptp.tphht......................ppsspphssGTpLlREWpGhpHpVtVhsDG.F-apG+pa+SLSAlA+tITGTRWsGPtFFGL+ ..........................................................................................h..tplAtLtshshscLpthWpclas.ptssp.hs+paLpp+lAYRlQEhshG....GLstpspp+Lctlucphsttthht..............................ttsstt.hsGThLlREWpGppHpVhVhs-G.F-apG+hacSLSAlA+tITGT+WsG.hFFGL+............... 0 43 68 79 +10982 PF11150 DUF2927 Protein of unknown function (DUF2927) Mistry J, Coggill P anon Pfam-B_739 (release 23.0) Family This family is conserved in Proteobacteria. Several members are described as being putative lipoproteins, but otherwise the function is not known. 20.40 20.40 20.90 21.30 20.20 20.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.15 0.70 -4.65 65 243 2009-01-15 18:05:59 2008-08-06 12:55:41 3 1 191 0 58 212 104 206.30 34 67.68 CHANGED sssplscsFhclAhttEYsps............hstlpRacsPl+ltlp...usssuspstDhstl...lsRL...uclsGhsI.shsss..........pANlplhhssccc.......l........pchlPpsustsls.h.shsctthChststsshsp.hptspsllhl.s-psshhhtsClHEElAQuLGLhNDo.clhsSlFNDD.s.aulLTsaD.lLL+hLYcPcL+sGMohs-ltshL.tl .........................................................................................................s.ss.ltcsFhplAhpsEYstu...........tphlp+Wc..tP.lRlhhc...........ttssc+shctphltsalp+L...uplTGhsI.phsss...........pANlhllhsppsc...ht.tl............................cchhspsustsh........csshChsshps.s.s.s.s.phstuplllsVcpspsct.chluClhEElsQslGLsND..Stp...s...hPSIFNDc....s.pslLoshDhlLL+lLY-PcLcsGMotsplpslLp..h.... 1 11 31 45 +10983 PF11151 DUF2929 Protein of unknown function (DUF2929) Pollington J, Finn RD anon Pfam-B_002101 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 21.90 21.90 22.90 22.30 21.70 21.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.78 0.72 -3.89 28 624 2009-01-15 18:05:59 2008-08-06 12:57:35 3 1 618 0 61 231 0 57.20 33 90.30 CHANGED M+Y....llshFWohlLsphlsalluuLsus.sas...hhsssIluVlhulllhllss.llstcss .........M+a....llohhWuhlLsphlsalluSLsGG..oas.........hsps.hIhuVlhsllhhllus.slsspp.s......... 0 14 33 49 +10984 PF11152 DUF2930 Protein of unknown function (DUF2930) Pollingtin J, Finn RD anon Pfam-B_002135 (release 23.0) Family This family of proteins has no known function. 23.10 23.10 23.40 23.20 22.60 22.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.10 0.71 -5.15 30 160 2012-10-02 14:34:25 2008-08-06 13:06:36 3 4 101 0 74 152 122 170.80 32 78.48 CHANGED lsLhsGsLshsLhllNp.lss....ssloPu.pRA-VLuslhussLlLlulLWpphpP+.ss-tVsLpG..cpGhtlsssLspul+pELAWuSchLLTsTsssolLlaa.cGplLLRRGll............sss....phpPGsIspRshcppp........sl.LVNhtLYPGRs....EF-...hlPsNh.uVllQPLGsc...........GhlllGuhosRsFo+pDEpWlsGhA-KLc ...............................................LPlhsGshshhhlLlN+..lss..........sphos.uQsRu-lLulhLuss.lLsullWp.plp..........P+.u.......s....ssVsL.G........ppsh.hsssl..sp..shppELAWuo.hLLpsT.sstulllhh..ps..phllphGhh......................s...........phh.sthhppshpp.tt..................Lssh.haP...u+.........Eht........hlP.sh.ull.hQ..Plspp............Ghllluups.RuaoppDctWltshupKl........................................................... 0 20 50 66 +10985 PF11153 DUF2931 Protein of unknown function (DUF2931) Pollington J, Finn RD anon Pfam-B_002146 (release 23.0) Family Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. Currently, there is no known function. 22.20 22.20 23.10 23.70 22.10 22.10 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.28 0.70 -5.00 30 307 2009-01-15 18:05:59 2008-08-06 13:16:21 3 2 183 0 64 250 1 193.90 24 84.36 CHANGED hphlhhll.hsL...lsuCssssh...............p.hpWphusshPpthsshVspsthhstscps..h.ts.sshs.....pp.ttt..tWsst........t..suh.sscspslPchlhlpWhSlh-pKtYppplpl..PcslpppMtpstphpspts.h......hRsslllGLAPGGhVpVWLps...tstsslhls+hpApplsss..phh.......phshshsph......pcpppsalcpcslPhG ......................................................hh...hh...h.uCtt.......................tW.hsh.hhPphhP.s.VThshh..hsspt.h....h.p...s.......p..tt...hspasth.............t...hstspphPpplhhhW.sSlhDpKhYpTchtl..sps.ltphhhs......hshsptss.hh............ahsphlhGLsPsGplpVWLps....stpslhl.s.pht.......pplpscc.shsK........t.p.h.h..............tthtshlc.cshPhs................................................ 0 9 34 45 +10986 PF11154 DUF2934 Protein of unknown function (DUF2934) Pollington J, Finn RD anon Pfam-B_002301 (release 23.0) Family This bacterial family of proteins has no known function. 19.80 19.80 20.50 20.20 19.60 18.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.88 0.72 -4.59 64 414 2009-01-15 18:05:59 2008-08-06 13:46:20 3 11 258 0 181 424 25 39.60 35 34.70 CHANGED tpp.p-cpIRcRAYplWEpcGpP.pGpspcaWhpAEpplpup ..........ttcccpIRctAYplWE.pcGpP..pGc..sp..caWhpAcpplct...... 0 44 82 116 +10987 PF11155 DUF2935 Domain of unknown function (DUF2935) Pollington J, Finn RD anon Pfam-B_002056 (release 23.0) Domain This family of proteins with unknown function appears to be restricted to Firmicutes. The structure of this protein has been solved and each domain is composed of four alpha helices. A metal cluster composed of iron and magnesium lies between the two domains. 21.20 21.20 21.60 21.20 20.80 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.68 0.71 -4.03 73 699 2009-10-15 12:33:13 2008-08-06 13:51:02 3 4 223 52 132 510 4 123.20 30 85.59 CHANGED hhppslcpchFWhcIhtEHuhFIpssLsspE....pcllppAcpFtpp.F-pLhtcuhphssth.............................................................tlpplspcshp.tspplpsFKpplhcthlss+l.hshhhPLhhDHhlREupaYlphLpp ..............................................................ptlppphFWh+lht-HuhFlptsLcspE..........pcLlcpAppFtpp.F-.plhtpuhphps.h.................................................................tlpph.spcspt.tspplppFKpplhphh.lps+l...tshhhPhhsDHhhREsphalphLp.h.................................... 1 53 112 117 +10989 PF11157 DUF2937 Protein of unknown function (DUF2937) Pollington J, Finn RD anon Pfam-B_002314 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.30 25.30 44.30 30.20 25.20 25.20 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.92 36 166 2009-01-15 18:05:59 2008-08-06 14:10:53 3 1 162 0 56 163 20 162.70 29 95.05 CHANGED hlhchLclhhhs....sGslhusQhPtFsppYtQRLsuplpEhppslssFptsAppa.ssshpphltpappss-.hhpscupshpphhsRhptLppshpth..pus.ap+hhhhhphschclhppThpsap.ulsLospulsaulssullhs.hlhthhhtlhthhht...+pppp ....MltphlcLllFs....hGhlhulQlPtFhspYtQRlsutl.EsppslpuaptoAppahtsshpthlp+apsssDsshpscupshpphhpRhphLppphtsh..pushat+shahhpsschclhppThssap.tl.Lss-ulsaGllsuLlls.hlhchhhhlhshhhp..tt............. 0 12 28 39 +10990 PF11158 DUF2938 Protein of unknown function (DUF2938) Pollington J, Finn RD anon Pfam-B_002317 (release 23.0) Family This bacterial family of proteins has no known function. Some members are thought to be membrane proteins however this cannot be confirmed. 23.00 23.00 23.10 23.10 22.70 22.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.96 0.71 -4.87 36 258 2009-01-15 18:05:59 2008-08-06 14:19:11 3 1 249 0 69 220 189 145.00 41 88.88 CHANGED lhIGlGATllMDLWullh++lhGlsshsaAhVGRWlsHhh+G+hhHpsIupussV..tuEphlGWhsHYhlGlhFAslLlslhG.sWltpPTlhPAlhhGllTVssPahlhQPuhGhGhAAS+TPpPhpsRlpS.....LlsHslFGLGLYluAhhl ...................................lhlGlGATllMDlWuhl...hphh.Gh..sshsauhVGRWhh.al.h+Gp....lhHs..sIupusPh....ttEhslGWhuHYulGllauhlhhhls.Gss...WhspPshhPAll.hGl.l.T.lsAsaFlhQPuhGhGhAAS+sPpPsts.R..lho......LluHssFGlGLYhsAhh.h................... 0 16 37 50 +10991 PF11159 DUF2939 Protein of unknown function (DUF2939) Pollington J, Finn RD anon Pfam-B_002321 (release 23.0) Family This bacterial family of proteins has no known function. 21.50 21.50 21.70 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.87 0.72 -3.81 35 245 2009-01-15 18:05:59 2008-08-06 14:27:23 3 2 210 0 84 227 37 94.70 26 44.55 CHANGED slsshsshhhuSPYl....ulaplcsAlcspDstsluphlDaPuLRpSL+sQlssthhpph....ssphtssshusLuthhus.ullssh....VDshlTPpGltslh .....................lhssh.hhh..hu...oPah.......sLtplppAlcs+Ds....sslspaVDaPu.lRtSL.+...pQlsstlhpph.....ssphpsssh....s.tlut.hlus.....slsssh.......VDshlTPpulsth......................................... 0 24 48 66 +10992 PF11160 DUF2945 Protein of unknown function (DUF2945) Pollington J, Finn RD anon Pfam-B_002448 (release 23.0) Family This family of proteins has no known function. 20.60 20.60 20.60 20.70 20.30 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.68 0.72 -4.15 31 280 2009-01-15 18:05:59 2008-08-06 14:57:41 3 4 259 0 131 297 20 61.40 36 61.29 CHANGED GD+VpW......sScsGcspGplhchpTc-schpG..+phcASp--PQYclcSDKTs+hAsHKssuLp+h ..........GDcVpW.............sSpsGpspGplhc....h....h..T..c..cs....c..h..p..G..+..s.....h..c..ASpD-PpYp...lcS-+os+.AsH+spsLpc.h............... 0 35 77 107 +10993 PF11161 DUF2944 Protein of unknown function (DUF2946) Pollington J, Finn RD anon Pfam-B_002487 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 21.60 21.60 21.60 22.80 21.20 21.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.50 0.71 -4.62 41 142 2009-01-15 18:05:59 2008-08-06 15:05:40 3 1 141 0 58 133 19 185.90 47 96.43 CHANGED MD-IV+pAlAKWPNVPsCaGWLuLDtRGpWhhRD-t.sQ.....ttG............h.Gs.lpHssLlsFIsRNYpsDtcGpWFFQNGPQRVYVEL-hTPalhRlps......t.s........hslpsHTGt..sht.spusalDEpGplhLs...................sshtluLlHD+Dlshhustsp...t...s................................ttplslps.ltpu-lstRFGFVtsPtst ...............MDDIV+QAlAKWPNVPpChGWLhLDcRGpWRhRD-tAQ.....AsG.............t.GsPlRHsuLlsFIsRNYppDtcGpWFFQNGPQRVYVELshTPWllRLsst......ss.......................huLssHTGt..sh..p.sstsaLD-tGsllhs...................ss.plAhLHD+DLshhuctscht.t....ut.s........hhth.s........................stshsl.psltps-VstRFGFVssPAtt................................... 1 7 30 46 +10994 PF11162 DUF2946 Protein of unknown function (DUF2946) Pollington J, Finn RD anon Pfam-B_002520 (release 23.0) Family This family of proteins has no known function. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.01 0.71 -3.91 87 687 2009-01-15 18:05:59 2008-08-06 15:45:01 3 3 389 0 224 685 20 122.20 19 86.11 CHANGED suW....lul..hAl...Lhthl.APhlupuhh........thstssh...ssshCssssst....tsshstststptsss.t.................................sC...saCshhstss......sLssshsshhshhhthtths.......sshhtshsstthassups.RAPP ..................................................h..luh...hAl...l.h.h...hl...uPhlupshs..............................stssh......ttsh.C.ssssst.............tsshstsss.t.pts..st.t..t.tt.....................................sC...saCs..h.hst.s......sLs.ssh..ssh.h.s.hh.hhhst.hs.......sshhhshh..tt.hast.sps..RAPP................................. 0 35 85 163 +10995 PF11163 DUF2947 Protein of unknown function (DUF2947) Pollington J, Finn RD anon Pfam-B_002524 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 26.30 25.80 23.40 23.30 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.96 0.71 -4.64 22 169 2009-01-15 18:05:59 2008-08-06 15:50:15 3 1 168 0 34 105 7 151.10 56 96.65 CHANGED YlPL-pYpRKWIFsHpshPVss-DhApIKPhoptRAsplWpcaISspSscs-pFscsDWss+sssWhps...cWpuAWDS.--ssLP-tlhtalc.WpD-ssVYFCYEKYplIET+WsVFhRsWKNFLFaDDGPlLlG+K+pQAlhFppsGphpLGp ........YlPLDpYpRKWIFTHtSMPVP-tDLApIKPMsptRAAQhWKENISspSPDAERLSSpDWPtK..sssWst-..ssWhutWES.D.-.spLPEtlssals.WQDDVTVYFCYEKYNVlETKWuVFKRaWKNFLFY.D.DGPhLlGRRRKpALWFsocGpVKhG......... 0 5 12 25 +10996 PF11164 DUF2948 Protein of unknown function (DUF2948) Pollington J, Finn RD anon Pfam-B_002527 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 25.00 25.00 47.70 47.50 23.70 22.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.48 0.71 -4.55 42 207 2009-01-15 18:05:59 2008-08-06 15:56:21 3 2 205 0 71 164 1063 138.90 46 92.73 CHANGED L+LhAhDs-DLtVISAhlQDAVh.su-hpWctpp+RFuLllNRFcWEss........tptpssERVposLth-sVhsVpSpGlc+sctDt.VLuLLulsFcP....uEssuGplhLshAGsGslRL-VEslEspLpDlotsatusuc..PsH .........LKLlALDtEDLpVlSAHlQDAVlcluDlpahscc+RFsLshNRFsWEcs.......................hcptshcRhpouLpFscV.ts+upGIs+p.spD.sVLSLLulcF.s....ups.....PuGslpLsFuus..u..AIRL-VEClEspLsDlGssWpstut..PcH.......... 0 20 43 51 +10997 PF11165 DUF2949 Protein of unknown function (DUF2949) Pollington J, Finn RD anon Pfam-B_002571 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 22.20 22.20 22.70 24.70 21.90 22.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.68 0.72 -3.79 37 156 2009-09-10 15:59:59 2008-08-06 16:07:02 3 1 70 0 69 147 81 57.80 39 82.18 CHANGED ssspLlpFLpp-LuLspsulslul+........ptctppuPLPhlLWpYGLloLpQL-plhDWlpp ....h..ppLlpFLpp-LulsssulslAlR........ppp..tp.......us...LPMlLWQYGLloLpQL-plaDWL-.t.. 1 10 50 67 +10998 PF11166 DUF2951 Protein of unknown function (DUF2951) Pollington J, Finn RD anon Pfam-B_002585 (release 23.0) Family This family of proteins has no known function. It has a highly conserved sequence. 24.90 24.90 25.40 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.39 0.72 -4.14 6 299 2009-01-15 18:05:59 2008-08-06 16:12:50 3 1 192 0 5 56 0 95.60 72 98.34 CHANGED ht.hpR..hEp-hRIpRLE-NDKphFsoL-cI+cGQ+sQEhVNpKhDhTLDuIpRERELDccsKEcNpKNI+DlKMWlLGLlGTIhuSLlIAlLRTlFGI ......FGFTKR..HEp-WRlpRLEENDKTMFpph-cIc-u.+TQEpl.pKLD+sh-p..lpR-...+E.DEKNKccNsKNIRDlKMWILGLIGTIhSTlVIALLRTlFGI........................ 0 4 4 5 +10999 PF11167 DUF2953 Protein of unknown function (DUF2953) Pollington J, Finn RD anon Pfam-B_002617 (release 23.0) Family This family of proteins has no known function. 21.70 21.70 22.30 22.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.47 0.72 -4.06 60 397 2009-01-15 18:05:59 2008-08-07 09:14:46 3 1 368 0 104 330 4 50.90 29 23.20 CHANGED sphGhtDsAhTullhGhlaultuhlhshh........pp.plplpPsFpcphh.cs .......hphGhsDAAhTGllsGhhaulhuhhhshl.phhph...ptsphplsPsFppphh.t......... 1 45 85 92 +11000 PF11168 DUF2955 Protein of unknown function (DUF2955) Pollington J, Finn RD anon Pfam-B_002614 (release 23.0) Family Some members in this family of proteins with unknown function annotate the proteins as membrane protein. However, this cannot be confirmed. 21.90 21.90 21.90 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.72 0.71 -4.51 42 253 2012-10-02 19:04:43 2008-08-07 10:34:55 3 2 195 0 82 281 23 138.00 25 40.93 CHANGED +sLRluhGssluhsluhhhuashuhhsslhshhlL.shssshshphhhplhhsslhssh.shllsshlppaPlhhslllulhhhhthhhhhcssthLhushhllsholl.thushs..sssh.sllhulhhuhllulhlshls ...............psLRIuhGssluhslsh.hhshshuhahslhPhhlL.uhssshshcshhphlhssslsslpssllsshhtpaPhlhs.l...llhh..ha..hapat..h..h..scs.shhLFGshs..lls..ho...lhltFu...o...as...ss..shpslhhu.hhusllulhlshlh....................................... 0 7 21 53 +11001 PF11169 DUF2956 Protein of unknown function (DUF2956) Pollington J, Finn RD anon Pfam-B_002632 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 82.50 82.20 22.90 22.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.57 0.72 -4.07 27 155 2009-01-15 18:05:59 2008-08-07 11:07:38 3 1 154 0 34 105 7 105.30 60 91.04 CHANGED SsETQpEAh+lAKuTQKPGQTKEQTKLIAQGIEKGIApYKKQQKsKuR-tDKt+Kpph+sKpp........sppppspp..hspsp.shLPWs...LLsLSWlGFhuYlhh ....SppTQQEAlKIAKATQ+PGQTKEQTKLIAQGIEKGIA.YKKQQKEKpRQADKhRKKslKAKppsspt.....ts--.s...sph.s.ssspsppupLsWl...LLsLSWlGFluYhh.h. 0 5 13 24 +11002 PF11170 DUF2957 Protein of unknown function (DUF2957) Pollington J, Finn RD anon Pfam-B_002671 (release 23.0) Family Some members annotate the proteins to be putative lipoproteins however this cannot be confirmed. Currently no function is known for this family of proteins. 25.00 25.00 36.40 39.30 17.70 17.50 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.21 0.70 -5.83 12 197 2009-01-15 18:05:59 2008-08-07 11:20:59 3 2 60 0 48 160 10 358.20 48 79.41 CHANGED TaQlpalESsVPposGoVsPTR...sG..hshoGohsp.Ts.LPTsc.NpCAFhLpsuo.........lDsscPshlFlGpGVsGGuIPGATIpFsGl.....lGlGslPspTFPaYPFIuFupTETDhoKlAGsYNtlGaH.lPSt..........satPsssshp.TLNADGS...Csst...ssuoCpsoGssas.+....ususssF.Sssh..tGp.shPohupsts.h......A+GlhIVGKLpstLVPllIRsGYApsssssh.....sssADDEsGIulLuPssAlAssSlpGtYIGssSshsYtsoslsGs......pushLDPh.sspssLsusaphDaoQss.sGslossshsus.sushTGplhFsGusauaL-..osusoP.Fslus .TaQlpal-SPVPtoTGTVsPTR....AG..ssloGTlspt.Tu..LPTsc.NpCAFhLs..suS...........................ls.scPshlFlG.GVsGGuIPGATIQFsGl..........lslGpVPspoFsaYPFIGFospETDloKlAGsYNpLGYHpVPSt..........Naussulsup.TlNADGo...Cssosh.......suG....uCh....soG....ssas.p....ususssFpSpsh...tsQ....lhPohuphs..h....t.A+GhhIVGKLpsQLVPllIRTGsAN..s..sssssh....sssADDESGISlLuPtsAlAuGS.sGtYhGsDSsFcYpsTulsGu......puThlDPh.sspsuLssuhsLDYsQss.PGslTsspssuo...us..ss...TGphlFoGGlauaLD..ssssss.Folsh................................ 0 4 8 22 +11003 PF11171 DUF2958 Protein of unknown function (DUF2958) Pollington J, Finn RD anon Pfam-B_002712 (release 23.0) Family Some members are annotated as lipoproteins however this cannot be confirmed. This family of proteins has no known function. 20.30 20.30 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.44 0.71 -4.23 21 220 2009-01-15 18:05:59 2008-08-07 11:26:14 3 10 148 0 82 208 67 105.00 36 22.38 CHANGED LlTsp.RspLLANGptp........pchDPhPVVKLFsPsusuTWLlTELD.tDGD.phFGLsDLGhGhPELGsVSLuELtul+GPhGLslERDLaFpuppsLSsYActActsGuIls ....................................................hs.t.+t.Lhtpstt..tt......thD.hPV.V+hFsP..ussTWLLsEhD..P.t.Ds..........D...tuFGLs..D.l..GhG..h.PELGhhuLsELtul+...s.hs...ls........lcRDlaFp...........sp+.l..a.t.u...t....s........................ 1 9 48 70 +11004 PF11172 DUF2959 Protein of unknown function (DUF2959) Pollington J, Finn RD anon Pfam-B_002747 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 31.50 31.50 36.30 36.00 31.10 30.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.31 0.71 -4.56 27 148 2009-01-15 18:05:59 2008-08-07 11:40:00 3 2 143 0 54 137 29 200.00 51 92.68 CHANGED llhLoG..CQSAYYuAMEKVGlHKRDIhVDRV--A+-oQp-ApEpFpSALEpapullsa-GG-LEctYspLscpYEsSpsuAccVpsRIcpVE-VAcALF-EWppELcpYsssoLRcsSpp+LcpT+ppYppLlpuM++AEsKMpPVLssh+DpVLYLKHNLNApAIuuLpGEhsslps-lspLlp-MppuIsEuspFIpphp .....................h.hhLoGCQ.S....A...YYuAME+.V.G.hHKRDI..hVDRVE-A+-SQp-AQcpFsSALEphpuLssaDGG-LEssYsplNDcYEsSppAAp-VpsRIsslEDVA-ALF-EWpsELstYosA.oLRRsScpKLccT+ppYppLlpuM++AEsKMsPVLssh+DNsLYLKHNLNApAIuuLpGEFsoLcpDIstLIppMNpuIsESscFIppL.......................... 0 16 29 45 +11005 PF11173 DUF2960 Protein of unknown function (DUF2960) Pollington J, Finn RD anon Pfam-B_002756 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 20.60 20.60 21.10 99.90 18.50 20.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.66 0.72 -3.68 20 165 2009-01-15 18:05:59 2008-08-07 11:57:25 3 1 165 0 35 76 9 78.50 62 89.53 CHANGED MARpItYTaKspsKcIsFSY-caHslaEAVAAAEGIDLTsaLtMEpQlthsS.+uspAV+saRcscFt+hGFocIpal+- MARTIlYTYKsQ-KsLsFSY-KHHNIaEAVAEAEGIDLT-FLKMEtQlEulS..DTKuVRNFRDNaF+KLGFupITLt.... 1 5 12 24 +11006 PF11174 DUF2970 Protein of unknown function (DUF2970) Mistry J, Coggill P anon Pfam-B_713 (release 23.0) Family This short family is conserved in Proteobacteria. The function is not known. 20.20 20.20 31.30 31.00 20.00 20.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.58 0.72 -4.38 68 334 2009-01-15 18:05:59 2008-08-07 12:00:31 3 1 266 0 122 275 95 55.70 35 81.05 CHANGED hhpshpuVlhuhhGVpppps+p+Dhsphs...PlhhIlsGllhshlFlhsLlhlVphVl ....hhpsl+uVhhuFFGVp.pscs+ppDhsphs...PltlllsGllhsslFlssLlhlV+hVl... 1 19 56 89 +11007 PF11175 DUF2961 Protein of unknown function (DUF2961) Pollington J, Finn RD anon Pfam-B_002770 (release 23.0) Family This family of proteins has no known function. 20.30 20.30 21.60 20.30 18.20 20.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.64 0.70 -5.05 37 285 2009-01-15 18:05:59 2008-08-07 12:01:15 3 5 193 0 83 254 31 241.70 34 57.17 CHANGED hhRp.lllRhYWDspptPoVpsPlGDFFusGau......hsthsShslss..ssssu........hssYa.MPFp+pA+IplpNpsstshtthaaplsY.s.h....ttlsccssYFHApW....+cppsh.......................shspsashl-.lcG+GpYVGshl...ulps.............hpss.....WaG..EG-.+halDG-..phPolpGTGoEDYFsuuWs...........h.hppassPatGhshhppsssh.t.....................hhuhYRaHlhDPIhFpcsL+lTlp...........ctch.p....phpsDhuSsAYW...YQs ......................................................................................................hRc.lll+hYWDspppPoVpsPlGDFFssGau.............................tsphsSlslss.....sPstu..........hNsYa.MPFp+pA+IslpNppspsh..t....ha..apIs....Y..s.h.......plspc......shYFHApW+Rpp.s.....................................phtps.a.sllD...lcG....+GpYVGshl...ulps......................pph........WaG..EG-.+ha..lDG-.....phPolpGTGoEDYFsuuWs...............................hppassPahGhsh...hpppssh.t.....................hhuhYRaHl.DPlhFpcsL+lolp................csph......ptpsDhuSsAYWYp................................................................. 1 33 56 69 +11008 PF11176 DUF2962 Protein of unknown function (DUF2962) Pollington J, Finn RD anon Pfam-B_002773 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 23.30 23.80 20.50 19.80 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.81 0.71 -4.65 31 261 2009-01-15 18:05:59 2008-08-08 09:11:59 3 6 220 1 177 255 0 150.00 30 79.84 CHANGED KlpKplu....tthcslHPpSRKsppLsRsshR-cKlpptKttptc+ps..hhp+ltaFpctlpp......................pppsaohp-hpplIc...................palsRp..DsELcplcpc....RRtuRP.ssRpphLcpphctEpc-ac..sGa.lPDLocscslchL+p.W....s.Gshss..lsslKhl+lop .........................ptht....tpppslHstSR+utpLpRtshRpc+hpch+p.t+t.t+pph.......hh......c.........+ltaFppplss......................pptshotp-hpplIp...................p.alpRh..cpEL-plchc.......+Rp...GR.....ssRcsllcpph-pEpppac..sGh.lPDlssspslchl+p.W.......s.Gshtt..lssl+hh+ls.t.................. 1 51 86 131 +11009 PF11177 DUF2964 Protein of unknown function (DUF2964) Pollington J, Finn RD anon Pfam-B_002804 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 20.80 20.80 20.90 21.70 20.60 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.97 0.72 -4.04 8 102 2009-01-15 18:05:59 2008-08-08 10:27:27 3 1 52 0 27 87 3 59.70 48 91.07 CHANGED MlRtchRlVLATlAVFIALAGlssslHGlLFD.ssshhYGlluLllGlAuFVlhLNPsPsDc ......MlR.phRlVLAsIAlFlsLAGhhsAl+GLLFDtsssltYGshAlslGVssFVlhLNstspD...... 2 2 4 9 +11010 PF11178 DUF2963 Protein of unknown function (DUF2963) Pollington J, Finn RD anon Pfam-B_002790 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Mollicutes. 21.70 21.70 22.40 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.96 0.72 -4.54 42 215 2009-01-15 18:05:59 2008-08-08 10:44:50 3 8 18 0 139 213 4 48.20 33 56.38 CHANGED Yp..DGc.pIphI..p....EYsspTGphIKcThY.p.DGcsIphIp..EYs..pssphIKpT............h ...............................a..cGp..pIphI..p........Eassp.TuchIKpThY.p.D.G.p.sIp.Ip..-as...ptp.h.p..................... 0 33 35 98 +11011 PF11179 DUF2967 Protein of unknown function (DUF2967) Pollington J, Finn RD anon Pfam-B_002838 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Drosophila. 25.00 25.00 37.20 37.20 21.70 21.10 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.88 0.70 -4.89 2 40 2009-01-15 18:05:59 2008-08-08 11:05:57 3 2 15 0 8 40 0 242.90 79 53.62 CHANGED PSTAPPPTRhREHFTFDPPQSPKSARoSEKupSHFoFK....EsAppAthuSsphhuuG...................AAoEusEtp-ER...uluNRsKKLRsRERDs...................N+ISPSVSPSpSpRsSPKRE++RTTP.sSTGAIuKlsSAPPTMKDuNFFGSStpQKQR.SQp.Ps...pQLSPSS....QQRKYSSSSSSGSS-RCLR-sTusGTMFPFDREALDYERIQRECFAPSSsTASsSSDS.EAENCSVYERK.uADIFQ ...................PSTAPPP.TRGREHFTFDPPQSPKSARTSEKARSHFTFK..........EDAQQARRASNSYY.AGG..........................................AAoEAAE.AQEER......AVANRNKKLRARERDSMAGNANANANANGGS.NSoRNRISPSVSP..SSSNRTSPKRERKRTT....PSVSTGAIAKINSAPPTMKDGNFFGSSQNQK.QRPS.QQ.QPSPQQQQLSPSSQA.AAQQRKYSSSSSSGSS-RCLRD.V...A.AAGTMFPFDREALDYERIQRECFAPSSA.TASTSSDSDEAENCSVYERKLSADIFQ............... 0 1 2 5 +11012 PF11180 DUF2968 Protein of unknown function (DUF2968) Pollington J, Finn RD anon Pfam-B_002850 (release 23.0) Family This family of proteins has no known function. 27.00 27.00 27.40 79.90 25.60 25.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.24 0.71 -4.92 12 142 2009-09-11 05:56:16 2008-08-08 11:12:26 3 1 95 0 44 135 4 192.10 43 81.57 CHANGED pPsst.......sstsssss.ssAsutuu.....spusVsELpphlpsppLoELRTTYNGuYGASLLFpsc-hTYYVALFQpKsFWRVIKTsshspAEtlY+sFucQospLA-lElRRspLpAQKA.hER.lAhupsRAppLQADlpltppQcttVsscQpQsRpEsssLptp+ppspsQLcphQRQlctLQtptpp...ulP .....................s..........t.ssssss..ssAsusus.........upusVsELQphlpu+cLoEhRTTYNGoYGASLLFssp-hTYYVALFQpKsFWRVIKTtscs+AEtlYcsFucQotpLAssElc+scLEuQKAhh-RtIAhsppRAppLQADLslsRpQpAtVusRQcsspsEssALpsp+pttQsQLRpLQpQlRpLQ+Qsps......s........... 0 4 14 28 +11013 PF11181 YflT Heat induced stress protein YflT Pollington J, Finn RD anon PRODOM Family YflT is a heat induced protein. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.90 0.72 -3.87 12 479 2009-01-15 18:05:59 2008-08-08 11:33:15 3 3 229 0 85 298 1 95.40 28 57.40 CHANGED .hlctapNppclhpslppLptpGhspcDIYVluHDc-Roccls-sT.....csssluhcE.pGhhsslhshFp.cpGDELRs+hpclGlocsEAcpaEccLDcGKlllhV .......hVt.apscpEshpslpcLpp..cGhppc-I.hVlo.+....-...c.......cch........-cl.t.-so................ssN.....s.h.sscc...pulhsphhshF......p........tpt...D...p.....c.sth.....pp....hs..hscpEhp.YcpclppG.+hllh....................... 0 28 65 76 +11014 PF11182 AlgF Alginate O-acetyl transferase AlgF Pollington J, Finn RD anon PRODOM Family AlgF is essential for the addition of O-acetyl groups to alginate, an extracellular polysaccharide. The presence of O-acetyl groups plays an important role in the ability of the polymer to act as a virulence factor [1]. 25.40 25.40 25.40 26.20 24.60 25.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.81 0.71 -4.60 13 100 2009-01-15 18:05:59 2008-08-08 11:43:13 3 5 83 0 31 89 3 173.80 41 71.82 CHANGED slhLuluuhsAtA..u-ssLYsssAPpsSAFVRhhNuosuplslsl.GssshpslusspsusahahssGuts.....hplGupslsscl..suspahTlVspssu.....pshLlE-PshpscpKA.LthhNLoss..ssoL+TADGcs-VVcsVussupucRpINPVKlsLAlhsusppVushcsl.sLc+G-sho .............u.huLulhuht....A.A..u-...uALYussAPcGSsFVRhhNAusup..lss.o..V.GssslppVushu.uosa..pFhPtGsYo.......splGups..l.PVcL...ss-cYYTlVspsuG......pspLlE-.P.s.FcN+pKuLlRV.NLosp..pLoLKTADGKT-VVcsVuspupGcR-I.NPVKVsLALasGs+KV..ucl+.sV.sLcRGEsh................................ 0 4 12 24 +11015 PF11183 PmrD Polymyxin resistance protein PmrD Pollington J, Finn RD anon PRODOM Family PmrB forms a two-component system (TCS) with PmrA that allows Gram-negative bacteria to survive the cationic antimicrobial peptide polymyxin G [1]. The TCS is linked to another one via the polymyxin resistance protein PmrD. PmrD is the first protein identified to mediate the connectivity between the two TCSs. It binds to the N terminal domain of the PmrA response regulator which prevents its dephosphorylation, thereby promoting the the transcription of genes involved in polymyxin resistance [2]. 21.00 21.00 22.10 27.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.93 0.72 -4.17 3 441 2009-09-10 18:00:43 2008-08-08 11:46:37 3 1 427 2 10 87 0 79.60 76 92.99 CHANGED MEWLVKKSpsNKpcu.CHVllLCDuGGALKMIAElcS.cltL+sGDlLSPLpDApYCINREKpQTLKIlsAoCYSsDEWpRtsK MEW..LVKKSppsKpcs.pHVLMLCDAGGAIKMIAEVKS.DFAVKVGDLLSPLQNALYCINREKLHTVKVLSASsYSPDEWERQC+.... 0 1 3 5 +11016 PF11184 DUF2969 Protein of unknown function (DUF2969) Pollington J, Finn RD anon Pfam-B_002861 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Lactobacillales. 25.00 25.00 29.40 28.90 20.80 19.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.39 0.72 -3.89 19 584 2009-01-15 18:05:59 2008-08-08 11:53:44 3 2 582 0 56 191 0 74.00 46 97.69 CHANGED MSKK-KcIElplp-tcs....ss.sshplhI.GK+hIGpItph.-pcF..sshpssssphhhKsh-pAlptllcpaNLp ....MS.KK-KcIEIplsDsKsplstcsh-ua..pLhI.GKKlIGEIs-l..DspF..AIlpsuss-uFaKpLEcAlEhlIcsYNLp........ 0 10 25 40 +11017 PF11185 DUF2971 Protein of unknown function (DUF2971) Pollington J, Finn RD anon Pfam-B_002776 (release 23.0) Family This bacterial family of proteins has no known function. 21.80 21.80 21.80 21.80 21.60 21.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.31 0.72 -3.27 145 706 2009-01-15 18:05:59 2008-08-08 12:00:33 3 24 537 0 185 600 22 90.70 22 28.45 CHANGED MWspYuss.t.....pGlslta.....................................................tttt............thhhtpVpYsspp..tht.hphhttt....................................................pthhhh.......KppsapaEpEhRl ........................hWu.tYuss.p..........pGhslta..........................................................................t..pt........................tthhh..t..p..V..p..Ypsp....pht..hhph.h..t..........................................................................................h.pthhhhKspsWpaEpEhRl................ 0 76 122 149 +11018 PF11186 DUF2972 Protein of unknown function (DUF2972) Pollington J, Finn RD anon Pfam-B_002895 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as sugar transferase proteins, however this cannot be confirmed. 21.90 21.90 23.40 23.30 21.20 20.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.51 0.71 -4.42 21 237 2009-01-15 18:05:59 2008-08-08 13:53:51 3 1 88 0 9 179 0 178.40 36 43.56 CHANGED alDhpEIhsc+sFpThppLucta..pFs.Pp..s-cphappth.huch.hslLPlsLhlp................sshslhh...........s.h.h.......................hhphpcphhsIspplh....pps.hh-pl.......hlhlcpp-achlhp...spplhpplKpYLpcF...l.tLccplchccs+hlpEcDlLpYL..Kcp+plph+hKplLDcE.LsaIKppRPDIVsSWKYYQEFE+hCcELc ..YlDhp-IptppAapThphLuhpa..sFs.P....pp.phFp.p....hs.l..hhlh..PlpLhhsp................phpI.h...........p.l.l..s....................h.phppp.hlcIh.pplh.....pp.hhssh..hlhIcppcap.Lpp...sphLhppl+pYl.cF...l.tlcppls.pcpphh+EcDlLsYh..pcspslhhphppIh-pE.LpalKpp+PcIlsSWcYYpEFEchhpt.................... 0 3 8 9 +11019 PF11187 DUF2974 Protein of unknown function (DUF2974) Pollington J, Finn RD anon Pfam-B_002933 (release 23.0) Family This bacterial family of proteins has no known function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.36 0.70 -4.98 24 1013 2012-10-03 11:45:05 2008-08-08 14:07:40 3 19 778 0 142 1032 52 195.30 27 47.94 CHANGED hupSsRFpslcltsalsplstc..ppQFuAhsapls..........ssohhluFRGTDsollGWKEDFpMuap..lsAQctAhcYLpplhpphss........s.lhLsGHSKGGNLAhYAAhp.hpsslp.......................pRItclYoaDuPGh.cphhp..sstapplts+lpchlPpsSllGhhLEpspph....hlVpSss..ulhQHcsaoWpl.psspFlpt.cslsssSphhppslppWlsslst-pRcthh-sLFs ................................................................................h.....Ratthph.thhp.th.....p........p......tp...Q.Fu.A..h.s.a.pls........................s.s.h.hl.s.FRG.....T.D........s........o.....l...l......G.............W.....+..E.....D.h.....p...h.....s......a.h..t......p......h...s..u......Q........c.......p..A......h.....p.Y..L..c.p...h.h.t.p.a....ss...........................................p.lhlsGHSKGGNL.A.h.YAAsp....hp.tp.h.p...............................p.p..I..t....t.lasa........Du.P....Gh...p..p..h..hp.......p...t....h..p..t..h...h..t+.h.h.hl.Pp.tul.lGhhhp....t..............hl...V..p..S.....p......hh.....Q....H..hp.Wph....t......ttph..........t..s.....u...htt..h.th.tths.tpht.hht.ha.t....................................................................................................................................................... 0 35 74 101 +11020 PF11188 DUF2975 Protein of unknown function (DUF2975) Pollington J, Finn RD, Bateman A anon Pfam-B_2875 & Pfam-B_3379 (release 23.0) & JH:B0MX27 Family This family of bacterial proteins have no known function. These proteins are likely to be integral membrane proteins. The proteins contain a highly conserved glutamic acid close to their C-terminus. 26.10 26.10 26.10 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.65 0.71 -4.45 99 988 2009-01-15 18:05:59 2008-08-08 14:12:43 3 1 759 0 233 831 18 140.00 17 82.57 CHANGED hhhsthhtth...........sphhsthshhhhshh.................................lh...shhsslshhhulhphh+Llpplpcs.psFot..pslctL+hIshshlshsllhhhshhhhhhh.st..t..................hhhhhhshsshhlu.llhhllpclhtpAl.......cl+pEpDLTI ...................................................................................................h................................h.hh....................................lh.....hhhssls...h...h..h...slh....ph.h+....Ll..ph.l..pc.......s..psFop..tslptl+plths..hl...s...hu....ll....h.h....lh....h....sh.h..h.hh.....ht........s...h................hhhhsh.h.s.shllu.lhh..hllp.c.lhppAl.......clcpEsDhTl...................... 0 90 171 199 +11021 PF11189 DUF2973 Protein of unknown function (DUF2973) Pollington J, Finn RD anon Pfam-B_002929 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently they have no known function. 22.20 22.20 22.30 30.30 20.10 21.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.18 0.72 -4.20 34 106 2009-01-15 18:05:59 2008-08-08 14:16:03 3 1 95 0 32 106 363 66.70 34 72.64 CHANGED hhsLlYhhsFhhLhh.AapshhpGhhsssht...............p.p.tt......................s+ssphTsHPELLDpsGclscE-LLsVR ...........................hh.LlYhhAFsllhhlAhtshh+uhhsssht...............ppp..s......................sppsphssHPELLDcsGpllpEsLLslR. 0 4 18 29 +11022 PF11190 DUF2976 Protein of unknown function (DUF2976) Pollington J, Finn RD anon Pfam-B_002963 (release 23.0) Family This family of proteins has no known function. Some members are annotated as membrane proteins however this cannot be confirmed. 26.10 26.10 29.80 40.90 25.30 25.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.89 0.72 -4.46 22 218 2009-01-15 18:05:59 2008-08-08 14:22:53 3 1 179 0 63 180 9 86.90 41 72.80 CHANGED LPshpsPosG.......sGsshhpplpsYhhDGhhLlGLlluAhuFlsVuhsulpsasEl+s..GKtsWucFGshllVGllLllhslaLlscAssI ......LPshEsPopG.......sGsGlhpTlpsYuhDhlhLluLllsAsuFlsVuhpuluoYsEl+s..G+tsWupFGhslsVGslLlllsIWLlTcAssI...... 0 6 27 50 +11023 PF11191 DUF2782 Protein of unknown function (DUF2782) Gunasekaran P, Mistry J anon Pfam-B_001700 (release 23.0) Family This is a bacterial family of proteins whose function is unknown. 21.20 21.20 21.60 21.60 20.80 21.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.61 0.72 -4.06 41 297 2009-01-15 18:05:59 2008-08-08 14:24:31 3 2 290 0 102 233 41 111.00 24 91.26 CHANGED p....llhss....hLussshAtsst.pss........ss..................p.-P.............-lsI..hpcss.pplpEYRlsGplhhlKVpPcsG...hsYaLlcs-Gp.upascpst....s.shhsPtW.............................plapW ..........................................................................h.....h....hhsh.s...usss......ps.s............s..................................cs..................-hhl......hp-ss..splpEYRlsGphaslcVpPtpG....tsYallcssus.ssascppt.....schhhstW...............................plapW........................... 1 20 64 87 +11024 PF11192 DUF2977 Protein of unknown function (DUF2977) Pollington J, Finn RD anon Pfam-B_002980 (release 23.0) Family This family of proteins has no known function. 20.60 20.60 20.80 25.10 19.50 17.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.05 0.72 -4.14 7 236 2009-01-15 18:05:59 2008-08-08 14:27:35 3 2 170 0 4 81 0 61.20 58 49.41 CHANGED MplhlN...........D+stIhGasshGuh-pu...l-lc.splPtpFpcpFcPctahh.su..pIlhNpsYhc.p. .........MpIhVN...........c+NElIuYs.hGGhcpu...lDl-...NlP.pFpp.FcP+tFhaoNG......EIhaNpsapcEc.s..... 1 4 4 4 +11025 PF11193 DUF2812 Protein of unknown function (DUF2812) Gunasekaran P, Mistry J anon Pfam-B_001697 (release 23.0) Family This is a bacterial family of uncharacterised proteins, however some members of this family are annotated as membrane proteins. 22.10 22.10 22.20 22.30 21.90 22.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.43 0.71 -3.94 34 805 2009-01-15 18:05:59 2008-08-08 14:30:02 3 2 587 0 77 554 3 113.20 26 57.20 CHANGED hsh-+.-EpaLpcMupcGatLpchshhh......YpFc+.scPpchhYclDap.........sppcpp-Yhphac-sGWchl.....sph..sphp.......................aFpcttsptt.....-IaoDppo+hpphpp.hhphhhhhhhhhl ............h.sh-+-EpaLccMth+GWphpp..h.phhh.........apFcp.sp.Pccs...h..Y+l-hp............p.ppchpcYhphac-pGWchl.......s.h....ushp................................ha+K....psssh......plao..-.pts+....hphh+p..hhp.h.hhhhhh...h................................. 0 35 59 69 +11027 PF11195 DUF2829 Protein of unknown function (DUF2829) Gunasekaran P, Mistry J anon Pfam-B_001848 (release 23.0) Family This is a uncharacterised family of proteins found in bacteria and bacteriphages. 25.00 25.00 25.10 27.30 24.70 24.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.81 0.72 -3.50 24 638 2009-09-11 11:01:00 2008-08-08 14:31:38 3 2 599 0 49 240 8 75.90 53 85.74 CHANGED MsFpEAl..tlKpGcKhsRp.GWsGtphaltl...........................psssh..psalhlpss....psph....hs.WsPossDlLA-DWhlV .............MTFEEILP.uL.KAt+KhVRs.GWuGsEpYVpL................................................hDsl.ctsGhsLp.hTPYFLIsso.........cGEG....FSMWuPTsCDVLAsDWl.V.. 0 13 27 37 +11028 PF11196 DUF2834 Protein of unknown function (DUF2834) Gunasekaran P, Mistry J anon Pfam-B_001850 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.90 21.90 22.50 23.10 21.70 21.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.33 0.72 -3.83 47 198 2009-01-15 18:05:59 2008-08-08 14:32:12 3 1 185 0 55 167 264 99.00 30 76.31 CHANGED lYLhLAlhGslhPhht.ltal..tpsG.......h-ltshlss.hhuN.suupulshDLhluAlslslallsEu.+hph+thah...slhsohslulusGhPLaLaLRp+p ............................lYhllulhuh.slsahhslpal...pp.u............hshst.alph..hhsN.sAusu...huhDlhIuslslh.lahhs-uhR..hsl.+phWl.....hhluohhluhuhuhPhYLhhRER..... 0 14 32 46 +11029 PF11197 DUF2835 Protein of unknown function (DUF2835) Gunasekaran P, Mistry J anon Pfam-B_001851 (release 23.0) Family This is a bacterial family of uncharacterised proteins. One member of this family (Swiss:A4VM42) is annotated as the A subunit of Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV). 25.00 25.00 56.50 56.40 21.60 19.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.89 0.72 -4.20 42 218 2009-01-15 18:05:59 2008-08-08 14:34:21 3 1 218 0 59 150 18 64.60 43 91.10 CHANGED hhFslslShpcals.aYpGtAppV.VhspsGcplplPApphRPFlTppGl+GpFclphspps+alsLp+ ......hhhplslShpphls.aYpGhAspVhVpocpGh+lpLPAp+LRPFLop.GV+GpFRLshDpss+FlpLE... 0 15 31 47 +11030 PF11198 DUF2857 Protein of unknown function (DUF2857) Gunasekaran P, Mistry J anon Pfam-B_001886 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 31.00 31.00 32.70 38.20 30.70 30.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.00 0.71 -4.76 32 266 2009-01-15 18:05:59 2008-08-08 14:35:22 3 2 219 0 73 222 8 177.30 33 90.10 CHANGED sLNpAlLsphlpsL+pGp.lppCcshGhs.--lptLpphshpcl..LssusVshss....lslsp-sht+llppsccppp....cpphl-RhLcLGAShpMhppaFGL.ospEVutRRclL.ulstppGRhshhsE.ppcptlWctWpt..pptslt.h-ssphL-hhMhlAEphsl.............sLoslWshlppW.pps ........................lNpulLsphLhsL+sGp.l++CcshGhs.-ElshLppholpcl..Lu.susVshss....lsls+-sLp+lLspuccppp....c.ppl-RALcLGAShphhpca.FGL.ospEVutRRcll.Gls...h...+.pG.RpsshsE.-p-stlWcpWp.....ctslpt.DssphL-hhM.lAE....phsl.............sLosVWstlppW.pps...................... 0 7 33 54 +11031 PF11199 DUF2891 Protein of unknown function (DUF2891) Gunasekaran P, Mistry J anon Pfam-B_001921 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 45.90 45.60 22.40 21.90 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.14 0.70 -5.85 33 328 2009-01-15 18:05:59 2008-08-08 14:37:19 3 1 324 0 81 223 116 319.90 46 94.86 CHANGED As+FApLsLssls+EYPNKlsHshsussDlp.sP+pLHPsFYGsaDWHSuVHGaWhLV+LL+pa...P-hspsspIpusl-cplTsEslAsElAYhppststu.FERPYGWAWLLpLstELppac.........sstupcWupsLpPLsphlspRhpsaLP+LsYPlRlGsHsNTAFALuLAhDaAcsstcpsLpphItppupcaahpDpssPstaEPuGcDFLSPsLtEAcLMRRVLs....sspFspWhctFLPplsstt.sphlpPusVoDRoDGKluHLDGLNLSRAWshculAsuLPps..ttstlpssApcHlsuulsplsut.cYhGsHWLuSFAlhA ......................ppaAplsLsslpREaPN+hsHphsussDlp.sP+tLHPhFYGsaDWHSSVHuaWhlV+LL+pa.....schspt..-pIhssLcsphTcEsstsEhtYLph....t..p.+........GFERPYGWuWhLtLuspls.hs..........stuppWApslpsluphhhcphhpaLP..KhoYPlRsGTH.NTAFALuLsLDaARt...hp...csp.lttsIlppA++aahsDts...h.u.........aEPuGs....-FLSPuLhEAcLMpRVls....ss-FssWhcsFLPpLspp-.s.sLhsPs....pV.....o..D+..oDGK...l.....A.HLDGLNLSR.AWsh+uLAshLsEs..t+.ttlcpAAscHlspulsplss..DYhGsHWLuSFALhA......... 0 24 50 66 +11032 PF11200 DUF2981 Protein of unknown function (DUF2981) Pollington J, Finn RD anon Pfam-B_003040 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.00 21.00 23.30 23.20 20.20 18.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.15 0.70 -5.35 2 8 2009-01-15 18:05:59 2008-08-08 14:59:40 3 2 5 0 8 8 0 120.40 34 56.25 CHANGED YLILFsFSLVVVSMVALDMAElVHFFPKAPELHshoLLGhTuPFVTRFKsPNASVLFPFNFFKLPNDTTGVLPQL.H...............................DphVVTKFThoplDpHsHKHCNKhsot....RFDDDlSuVLDNhDssDDLpsLRNALKQRLtspN.......TstssoGssD...NYNAYDNPDGKV.uuKDlNS.................spsSsNsssSsspKpptsDSFYDHLPFMPHPGETEGESEEVSKDEFPPETNDLTPEGKSEVVVLYKLQKRs.phhhS.....alhhhlY ................................................................................................................t...........................................................................................................................................................................................................................................................Y-clPFhPH.spTEGEoEElStsEhPPEspsLT.pt.pEVsslhKhQKR............................ 0 4 6 7 +11033 PF11201 DUF2982 Protein of unknown function (DUF2982) Pollington J, Finn RD anon Pfam-B_003059 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 25.70 38.90 19.50 17.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.78 0.71 -4.86 24 166 2009-01-15 18:05:59 2008-08-08 15:05:09 3 2 160 0 37 120 8 148.40 48 67.74 CHANGED SlpLotptlpYaH+pGsWhlcWpNIpRlsls+hppshpphpLPaIGIKLKphsshLssIsPRLAotLLhEQRsLLhhusppcp.shpshEshLhs-tsalsspGppY+GllAMaupRhphLpchLGYclalstssLDRsspEFluLLRcaptts .......................s.aTLTsoHlQ.HahpGGWVl+WsNIppIGlsphc.p-GWaQPLPWlGI+LKcYsPaLsuIsPRLuTpLLlpQRuLLhLuApQps..c...tpp.......aEDhllDspsYls.scGcpYpGL.AMLANRMpYpRcahGYDlFISpp-LDRsu-EFVGLhRRYLAAA......... 0 6 14 27 +11034 PF11202 PRTase_1 DUF2983; Phosphoribosyl transferase (PRTase) Anantharaman V, Pollington J, Finn RD anon Anantharaman V Family This PRTase family has a C terminal RNA binding Pelota domain [1]. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 31.70 31.70 37.40 37.30 26.20 25.20 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.58 0.70 -5.41 68 350 2012-10-10 14:25:38 2008-08-08 15:38:30 3 7 335 0 73 275 5 247.50 50 60.57 CHANGED sl...p...........GSYtssDVpFLL+sl.s..hpp.sss--+EchIQSG.phHYSEMLshEhpPoptYhcLFcpALppsutRlAtsVuhlActlhp......c.ps.ssllLVSLARAGTPlGlLl+Rhlpp........Ghs.....ssHYuISIIRDRGIDpsALcaI..hp+Hs.spslVFVDGWTGKGAIopELpcAl.t.p.h.ppth.............s.......hss.cLsVLADPutCutlhuTc-DaLIPSuhLNuTVSGLlSRTVLssch.l.....ssssF...HGshaYccLtstDlSphFlDslssth ............................h..GSYtPsDVpFLLKsl.s..hp.sssEc+EclIQSGptHYS-MLs.E.tPoptahcLFccALcpsupRlAptVstLAptlhp.......c.ts...p..slVLVSLsRAGsPlGVLl++hl+t.........Ghs...s.HYuISIIRDRGIDtsALcaI..hp..+..H..s..spsIVFVDGWTGKGAIotELtcAL.phpsth.........................s.....hpscLsVLADPsuCuhLhuoc-DaLIPSuhLsusVSGLlSRoVhss-hl..........tthHGshhaccLpphDsSphalDsVsph.h........... 0 20 44 61 +11035 PF11203 DUF2984 Protein of unknown function (DUF2984) Pollington J, Finn RD anon Pfam-B_003006 (release 23.0) Family Some members in this bacterial family of proteins are annotated as membrane proteins however this cannot be confirmed. 21.30 21.30 21.30 21.40 21.20 20.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.14 0.72 -3.89 22 356 2009-09-11 11:43:42 2008-08-08 15:46:26 3 2 151 0 77 242 0 95.90 31 21.98 CHANGED PAsupRpTWlsLRhcsppshtAlptR..............sts..ultpslssuspRlAspLtspGhsApshpusEhspssstlstt....................................+EsWcthcstsu.assuYtls ............ssupRcTWlsLRhs.spcs..htAlptR.............................sts.hultcshhsuupRlAscLcspGhcAps.s.sus-l.schssshu..s..........................................................cEpW+sl+.ttsu.ahsuYth........................................ 1 18 50 70 +11036 PF11204 DUF2985 Protein of unknown function (DUF2985) Pollington J, Finn RD anon Pfam-B_003090 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.00 20.00 21.00 27.60 19.90 19.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.85 0.72 -4.23 20 240 2009-01-15 18:05:59 2008-08-08 16:59:53 3 12 109 0 186 235 0 78.80 38 14.21 CHANGED h+alpTPhGFhlTlYuh.lshWGuhlhLhLlshlsh........ssup+chWIElssQILNALFslsGhGLAPaRh+DhYahlhht ..................h.palppPhGhhlslYuhhlssaGuhlhLlLhGhls.h................ssup+phWlElssQlLNALFslhuhGlt.PaRhhDhahhhph............. 1 43 96 150 +11037 PF11205 DUF2987 Protein of unknown function (DUF2987) Pollington J, Finn RD anon Pfam-B_003120 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 25.30 25.30 20.30 24.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.78 0.71 -4.54 25 171 2009-01-15 18:05:59 2008-08-08 17:07:43 3 1 169 0 42 124 3 144.00 43 67.67 CHANGED stAps.lslpYssFYs+LKpssKtsaphVcluFhlscsps.t..CpIpsuplssEccptsLslosspcL.hlPhDcsL+ss+ALlsls.s..spuppCsluhQVhAKp.....hptphotscLtplhspMcshLschuGhhs+....aFtPclsGlshcFs ..............................................App.hhhoYScLaophKsNs.KEsHsDV+huhaFsDspopp..CsIcKAaMEKE.-HYEpLslssupEL.hlPlDssL+pANPLVhVs.o..sp-ccCDhSMhVhAKcs....hpuclohpclpsLlsQMpshL-pLGGMFuu....aFsPsVpGVTlEFs............ 0 6 16 29 +11039 PF11207 DUF2989 Protein of unknown function (DUF2989) Pollington J, anon Pfam-B_003139 (release 23.0) Family Some members in this bacterial family of proteins are annotated as lipoproteins however this cannot be confirmed. 20.90 20.90 20.90 54.50 20.50 20.50 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.47 0.71 -4.87 26 162 2009-09-10 14:55:00 2008-08-12 09:52:50 3 1 160 0 37 114 8 199.60 47 74.36 CHANGED hLsGCtp..stsssplCcssPpL.CsDLs.cDuaCRhcRs-LIhpRaplpc..sPospppYppLphlEsYpKClELAutIpaIppp-+pscRhcAhhpShpplpcLpppT+sSp-PtllYY+WophsDpsAhpphLthEspspl.pssElQhuLAoYYh+pDspKshpLLh+sLpLhpts................phN....................s-llpuLAolappppshcpAYlWuhl .....LsGChE..pcsT-plCtsNPpL.CppLN.cDGQCRl.RT-LIW+RF-lhK..sPoDcpplcEhphlpsYcKCLELASQ.IQsIcpochKpsRhsALlpSscE.cRlst-L+pSpsPpsLYahWSQhGDpsAp+tFLplEGpscL.cT.AEMQaALATaYo.sRDp.KTlpLLp+sLcLsstp................slN....................s-lLcuLAShapthpch-cAYlWAhl...... 0 6 13 25 +11040 PF11208 DUF2992 Protein of unknown function (DUF2992) Pollington J, Finn RD anon Pfam-B_003142 (release 23.0) Family This bacterial family of proteins has no known function. However, the cis-regulatory yjdF motif, just upstream from the gene encoding the proteins for this family, is a small non-coding RNA, Rfam:RF01764. The yjdF motif is found in many Firmicutes, including Bacillus subtilis. In most cases, it resides in potential 5' UTRs of homologues of the yjdF gene whose function is unknown. However, in Streptococcus thermophilus, a yjdF RNA motif is associated with an operon whose protein products synthesise nicotinamide adenine dinucleotide (NAD+). Also, the S. thermophilus yjdF RNA lacks typical yjdF motif consensus features downstream of and including the P4 stem. Thus, if yjdF RNAs are riboswitch aptamers, the S. thermophilus RNAs might sense a distinct compound that structurally resembles the ligand bound by other yjdF RNAs. On the ohter hand, perhaps these RNAs have an alternative solution forming a similar binding site, as is observed with some SAM riboswitches [1]. 23.80 23.80 27.80 27.60 22.90 22.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.82 0.71 -4.25 29 465 2009-01-15 18:05:59 2008-08-12 10:41:15 3 3 432 0 61 366 9 127.70 37 93.45 CHANGED LTlaFDs.FWlGlhEphp.sschcls+lsFG.sEPpD.ElhcFlhpch.s.Lphspshtsph......pp++hNPKRhQRpsp...Kchcp.tsluTKAQpAlKhphEppKpc+KptsKpcKcptpcp+aplKppK++pKH+GH ..........LTVaa-s..tFalGlhEpp.c...p..........s.......p..........hpss+hsFG.sEPpDtElhcFlppp.h......hp.Lpassthtsph...........pp+phNPKRlQ..RpstKp..hpp....tthu.TKAQpAl+hphEtpKpc+Khpp+pc+-tp+p++htlKppK.t+pKH+GH................. 0 24 41 48 +11041 PF11209 DUF2993 Protein of unknown function (DUF2993) Pollington J, Finn RD anon Pfam-B_003144 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 27.30 27.30 27.40 27.40 27.00 27.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.45 0.70 -4.44 148 794 2009-09-11 14:20:34 2008-08-12 10:49:44 3 4 452 0 220 612 29 216.40 16 79.06 CHANGED sssts...................hsthhpptlsptl.psph..ph........spss.pVplsuhP.h..pllpGcls.............plplsspsshh.....sht.....ht...phplpsps.lplshsth...........h.st..hphhp.....sspssspltlstsslsptlss..t.........hpth...................................hspsplplsu.....................................lshpsph......pl......tpt.............h.lphsshp.ht.s........h..............hsthhpths..h..sht...t..........hshsl.plp..plp.lpssslplps.p ..........................................h....shthhscptlsppl.ppts.......t..........ssss..pVslt.s.hP.hlh.plh...sGcls.............plslpstssph.....sht.......ht...........phphshcs.lclshssh..........h........phhp......ssplsusstlst.splschlst.........t...........htth..........................................sts.s.plpltu....................................h.lslpssh..............pl...............ssst.............thhlphssss.huhst.....................h.......................................................................................hpslhpshs.tph.....shtp.............hshsl.phs..slp.sssssl.hph............................................................................................................................... 0 56 159 205 +11042 PF11210 DUF2996 Protein of unknown function (DUF2996) Pollington J, Finn RD anon Pfam-B_003176 (release 23.0) Family This family of proteins has no known function. 21.90 21.90 23.60 28.90 21.20 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.57 0.71 -3.83 21 102 2009-09-11 16:37:34 2008-08-12 14:04:55 3 1 88 0 45 104 110 115.30 41 66.98 CHANGED tlE-KPFsEFlppchlPuLppuLpsc.GlsshsLshtcsphPlsG....spCWhVhGchsts....RpFWLsFspcsIsusKshuLA-sGscPShlESFLIDE++lTLsLLlsRllQRLNGQKWLGs .....hEcKPFsEFhppchlPuLppuLppp.GlsslcLsFpcs.hsh.h....t.h.plpGsa.pst...+pFhlaFsptsIpG..KuFohussGspPSTlEsFL.IDE+...+lTLcLlVhhllQRLNGQKWLs.h................ 0 7 30 41 +11043 PF11211 DUF2997 Protein of unknown function (DUF2997) Pollington J, Finn RD anon Pfam-B_003181 (release 23.0) Family This family of proteins has no known function. 20.80 20.80 21.90 21.30 20.60 18.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -7.91 0.72 -4.58 41 147 2009-01-15 18:05:59 2008-08-12 14:10:29 3 1 124 0 60 136 151 48.10 38 65.72 CHANGED lcapIpsDGpVcEpVpGlpGpuCpchTctlEssLGsVp.pcchTuEaat ...........lcahItsDGcVptpVpGltGpuCpchTptl.EstLGsVh..spchTsEaa.......... 0 22 47 58 +11044 PF11212 DUF2999 Protein of unknown function (DUF2999) Pollington J, Finn RD anon Pfam-B_003194 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 26.70 134.00 22.90 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.78 0.72 -3.94 10 145 2009-01-15 18:05:59 2008-08-12 14:26:43 3 1 145 0 26 66 7 81.80 78 97.98 CHANGED MNPIIAlLKEHNVSDEQIsELFQsLT-NPhhAMAhIsQLGIPsEKLQtLMulVMsNPuLIKEAVEELGLDFuKVEAAKA+Lp MNPILAhLKENNISsEQINELFQTLTQNPLAAMATluQLGLPQDKLQhLhAQVMQNPALIKpAVEELGLDFuKVEAAKAKLQ. 0 3 7 17 +11045 PF11213 DUF3006 Protein of unknown function (DUF3006) Pollington J, Finn RD anon Pfam-B_003197 (release 23.0) Family This family of proteins has no known function. 21.50 21.50 21.50 21.50 21.40 21.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.21 0.72 -4.22 42 334 2009-01-15 18:05:59 2008-08-12 14:37:12 3 2 303 0 107 256 3 68.80 33 87.57 CHANGED psllDRF.E....schAVlEhpscp.....hsls+shLPtsu+EGD.VLpl...........plplD.cpTcpt+...cclpchhccLhc ..........+sIlDRh.E.....sphAVlchcspp.......hsls+spLPp.ss+.E.GD.Vlhl......................ptsphplDtp-T-cc+...ccIpchhccLh........................... 0 52 85 101 +11046 PF11214 Med2 Mediator complex subunit 2 Wood V, Coggill P anon Wood V Family This family of mediator complex subunit 2 proteins is conserved in fungi. Cyclin-dependent kinase CDK8 or Srb10 interacts with and phosphorylates Med2. Post-translational modifications of Mediator subunits are important for regulation of gene expression [2]. 21.10 21.10 21.50 22.00 21.00 21.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.42 0.72 -4.00 10 58 2009-01-15 18:05:59 2008-08-12 14:40:30 3 1 35 0 39 58 0 122.50 45 41.27 CHANGED M.........................shcsKLssshDDIL+sSu.lh..p.l+sschpSsllTGhNspL.....ppsLsc+IspFauILD-s.spLNcocahl....Dslhp.+pcpc................EhK...EEEE+p++...........E-EE+K ..............NLps+Lp...ssLD-ILKoSGalh..ElIcpN++QSNlIousNNpLIp..lpppLsscIppFHuILDpTloKLNDAcWCL....sshlEp..KKKh-...........................EhK.lKEEtt...+K+c........EpEt..tcp.tct......................................................................... 1 18 27 38 +11047 PF11215 DUF3010 Protein of unknown function (DUF3010) Pollington J, Finn RD anon Pfam-B_003238 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 77.50 77.40 19.00 18.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.52 0.71 -4.25 31 135 2009-09-11 10:24:04 2008-08-12 16:16:03 3 1 126 0 48 117 14 136.00 43 95.65 CHANGED M.+lCGVELKGsEAIlsLLshcsshaslP-sRsp+hslscsssspsl+pFphsFscLhpDYpV-pVlI+cRtpKGKFAGuAsuFKlEAAIQLl..sslpVsLlSssslKppLK+sPh.lsFptsGLKpFQ.csAFpsAauhL ..M+lCGVElKGuEAllsLLo.hcsssasls-sRsp+hslscsssspsl+cFphsFtKLhpDYpV-cVVI+cRppKGKFAGuAsuFKlEuAIQLh..ssls..VpLlSPssIKpphK+sPh.lsFcpssLKpaQ.cpAFpsAYAh..... 0 8 17 36 +11048 PF11216 DUF3012 Protein of unknown function (DUF3012) Pollington J, Finn RD anon Pfam-B_003254 (release 23.0) Family This family of proteins with unknown function is restricted to Gammaproteobacteria. 20.40 20.40 21.70 21.00 19.90 19.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.56 0.72 -4.27 16 235 2009-01-15 18:05:59 2008-08-12 16:33:21 3 3 142 0 40 118 13 31.90 62 64.02 CHANGED VGSEsWCccM+-KPKGDWTANEAs-aAKHClh .VGScuWCEDMcEKPKG-WTAp-As-aAKHClh. 0 7 14 29 +11049 PF11217 DUF3013 Protein of unknown function (DUF3013) Pollington J, Finn RD anon Pfam-B_003257 (release 23.0) Family This bacterial family of proteins with unknown function appear to be restricted to Firmicutes. 25.00 25.00 35.40 35.30 18.90 18.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.98 0.71 -4.50 12 479 2009-01-15 18:05:59 2008-08-12 16:37:24 3 2 473 2 34 218 0 155.00 58 98.53 CHANGED MuKhshLshL-Epl-Kp.FsaDhtlsWDKKNHAlElsFhLpApNssulchsDspGpsSsE-Il.FEDtlLFYNPsK.SpFD.t-DYLsslPY-sKKGlu+phLshFspaLp-sh-pG.sDLMDFL.sDsss-pFtLpW-spsFEpshsp..LpEs-...aYsYP+ ...MApaGFL-lLEEELDKp.FsaDaplsWDK+NHAVEVoFlLEAQNs.u.uVEhlD--GElSs-DIl.FE-uVLFYNPuK..Sshs.tEDYLssIPY.PKKGhSREFLAYFAhFLKDsA-.GLcsLMDFL.pDs-AEEFshcWspEsFEcG+ss..LcEsE......FYPYP+........... 0 4 14 23 +11050 PF11218 DUF3011 Protein of unknown function (DUF3011) Pollington J, Finn RD anon Pfam-B_003246 (release 23.0) Family This bacterial family of proteins has no known function. Most members belong to Proteobacteria. 25.00 25.00 25.00 26.20 24.10 24.00 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.02 0.71 -12.00 0.71 -4.54 5 96 2009-01-15 18:05:59 2008-08-12 16:42:20 3 3 52 0 30 104 9 167.40 36 75.04 CHANGED VVRCES+DpcRVHCDhsscsGlQLVRQLSuoSCI+GSpWGoDRuGVWV-pGCRAEFsuu.....supGuuaPusst....hRRlVRCESNGs+p.SCPVtLRGAPVRLLRQLSlhPC+EGRoWGp++sEIWVScGC+GEFElAc--GpsFVDs...........PaoLTCESKsR+RRoCGsSlcpG.sVLppQLSuTPCEEG+oWGWS..RssVWVssGCRAEF ................................................lpCpSpc.tphcCsh.httts.LVcplStspC.cGppWs.c..ptt..lWVppGCRucFstt.......ts...shst............ttslRCcSs.stt...C..sshh.Rts..pLlRQhS.hPChcspsWs...ctsplWVscGCcGEFt.ut.....ts.G.....s.hssh.......................shhlsCpScppphptCshss.p..ht..shlhpQhSpo.C.EspoWGas..pstlWVssGCRAcF.............................................................. 0 8 20 25 +11051 PF11219 DUF3014 Protein of unknown function (DUF3014) Pollington J, Finn RD anon Pfam-B_003267 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 127.80 127.20 19.80 19.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.68 0.71 -4.35 31 126 2009-01-15 18:05:59 2008-08-12 16:46:51 3 1 121 0 57 133 68 160.20 37 55.15 CHANGED -llR+hVVhlDNLApGphsp+htPlpsPptpFsVpc.......sspth....IssssYpRYDsasshlsulsspphsshYpchpPLhppAYsELGYPcttFsspllpAI-plLss.PhscsPlpls...........psuVhYpFADPpLEuLssuQKhLlRMGPcNs++lKutL+cl+p ..cllR+hVVhVDNLupup.lsp+h.Plpsst.....ppFpVsph................ssphh....lss.ssapRYssYVshlsulDspshsphYpchhPLhppAasELGYPc.tt.FN-RllpAI-hLLss.PtspsPltlh...........pspVpYpasDPpLEuLssuQKhLlRMGP-Ns++lKutLRcl+.s.. 0 12 30 46 +11052 PF11220 DUF3015 Protein of unknown function (DUF3015) Pollington J, Finn RD anon Pfam-B_003292 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 26.80 26.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.81 0.71 -4.59 16 196 2009-01-15 18:05:59 2008-08-13 09:16:49 3 1 165 0 52 142 74 138.20 38 88.21 CHANGED AhAsustGs.GCGhGs.MlFpGpsGhssHlLAuTTNGTSGNpTFGMTSGTsGCsosu..slshsupshlhhss.h-pLucDMA+GpGEsLsuhAslLGlpspD+s+Fstlh+cpFspIFsst-s.TucclhsuhhslhppDspLuKYst ....................................h..A.Assss.GCGhGs.hl...FpG...Q..cGhss+lLAuTTNGT.GNt......TFG.hToG.TsGCpssu....tl.shsucshhhhss.h-pLucDMAhGpGEuLsshAslhGltspD+scFstls+ppFu.pIFsssss....outpVhssh.slhtpD.hLttYs............................ 0 17 31 44 +11053 PF11221 Med21 Subunit 21 of Mediator complex Coggill P anon manual Family Med21 has been known as Srb7 in yeasts, hSrb7 in humans and Trap 19 in Drosophila. The heterodimer of the two subunits Med7 and Med21 appears to act as a hinge between the middle and the tail regions of Mediator [2]. 28.80 28.80 28.90 29.10 28.40 26.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.78 0.71 -4.22 30 284 2009-01-15 18:05:59 2008-08-13 10:00:02 3 4 250 3 206 269 0 134.00 30 86.37 CHANGED MuDRL.......TQLQssLDQlspQFsuolsalspspsss.hsssp........................phustpsss...........................................................sspsFpsphcELupDlllKs+QI-hLIsSLPGlssStccQh.cpIccLpcELpplEp-+tctl+ct-pLLccl-pllsslspu ...............................................MuDtLTQLQssls..p...lAppFssslshlpppts.sphss.p...............................................thsptps.................................................................spshppttp.hApslhhps+..pI-...hLIsoLPu....-..s..otptQh..pplpcLpc.E.cpstpphpcslpct-.tLLcclpshltslsp.s................................ 0 67 112 168 +11054 PF11222 DUF3017 Protein of unknown function (DUF3017) Pollington J, Finn RD anon Pfam-B_003304 (release 23.0) Family This bacterial family of proteins with unknown function appear to be restricted to Actinobacteria. 25.00 25.00 25.30 25.60 23.30 24.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.54 0.72 -4.19 44 367 2009-01-15 18:05:59 2008-08-13 13:05:21 3 1 325 0 76 203 15 71.80 41 71.31 CHANGED PILlVshlhssuhsLssusaWRRGALlIGIGVGlAAlLRLsLo--RAGLLsVRS+sIDFlTTsTVuAAMlYIAhTI ...............hhhhh.hhhhuhl.ls....h....s..s....aW..RRGshllGhullhAAlLRlVL.s-pp..AGLL.uVRSRhlDhlhhss....lGAsMVhlAhTl..... 0 25 60 73 +11055 PF11223 DUF3020 Protein of unknown function (DUF3020) Wood V, Coggill P anon Wood V Family This family of fungal proteins is conserved towards the C-terminus of HMG domains. The function is not known. 25.00 25.00 27.60 27.60 16.50 17.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.42 0.72 -3.56 11 47 2009-01-15 18:05:59 2008-08-13 13:24:13 3 3 35 0 35 50 0 48.80 43 5.16 CHANGED NRcRKK+.WRctNuc+NK-NDL+SRlh.....KKApthaGpcso.tcKpsWh--EF .....N+cRK++.WR-tNsp+N+-sDLRuRlp.....KRAsthFG.cp-o.tcKppWl--EF.. 0 12 25 35 +11056 PF11224 DUF3023 Protein of unknown function (DUF3023) Pollington J, Finn RD anon Pfam-B_003319 (release 23.0) Family This bacterial family of proteins with unknown function appear to be restricted to Alphaproteobacteria. 20.30 20.30 22.90 22.20 19.80 17.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.58 0.71 -4.49 35 63 2009-09-10 17:53:36 2008-08-13 13:36:20 3 5 5 0 38 59 0 122.90 21 52.07 CHANGED shCIGpT.s.ssspLplhlsps.....pppphhsh...spGpSLFhlpsplssphlppst..pLpphhthsppp............spslphshYhLVsppslppFhp.stp..................ptphhshpslspaGsllhs+.....tpsshhch............FsEpcsLp ...........................hhCIGpT.t.ssspLhlhhsps.....p.pphh.....spGpSLFhlpsplssphltps...tLpp..hhthsppph...........pthlphphYhLVpppplppFhp.h.p...................tthh.hpslsphuphlhsp.....httt.hph............hsEtphL.................. 1 8 8 8 +11057 PF11225 DUF3024 Protein of unknown function (DUF3024) Pollington J, Finn RD anon Pfam-B_003325 (release 23.0) Family This family of proteins has no known function. 21.20 21.20 23.00 22.60 20.00 19.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.80 0.72 -4.09 30 192 2009-01-15 18:05:59 2008-08-13 13:40:33 3 1 180 0 35 131 2 57.50 36 48.42 CHANGED thpsslA+lpaspspstWplahhcps......hcWp.Yp...cspsLcthLchlpcDspusFW ......hppslAKlhapcu.....pppWtLYhhcpp......ttWh.Ys.hspspsLoshlctVccDPcuhFW.. 0 14 20 32 +11058 PF11226 DUF3022 Protein of unknown function (DUF3022) Pollington J, Finn RD anon Pfam-B_003318 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 25.30 25.10 21.70 19.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.35 0.72 -4.19 13 102 2009-01-15 18:05:59 2008-08-13 13:42:54 3 1 53 0 25 82 4 109.60 39 85.85 CHANGED -hsEL-pslsD.Fs-posssVHsDspoGRlplpluWltss.suss+thRpuVsLsh-ssslcRYAsLDsusRhRV+AhL+DhVcssL-p....LEDtptc.psVsl-lTss.LDsA ....chsELs+llsDhFP-Q.ophs.+sD.psGR.l.lpVpWluhRhuuos+..RpolslRhssssLtRYhuLcshtRuRs+AhL+AhscAsLsu....LE-p...cAsupsVsh-sT.htLs-t......................... 0 2 4 12 +11059 PF11227 DUF3025 Protein of unknown function (DUF3025) Pollington J, Finn RD anon Pfam-B_003327 (release 23.0) Family Some members in this bacterial family of proteins are annotated as transmembrane proteins however this cannot be confirmed. Currently this family of proteins has no known function. 25.00 25.00 59.80 47.00 18.40 18.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.46 0.70 -4.83 21 157 2009-01-15 18:05:59 2008-08-13 13:46:56 3 3 155 0 60 158 30 208.90 45 74.55 CHANGED PlRFVsQst..LPsGpAYEsaIapTGpVPTR.-NLHDFFNALlWLpFPcsKstLNtlputsIupt..Glt.sh..RGslRDAlTlFDENGllll.uss.....sLtss.................Lcs+pWppLFlcpRstWt........ppspshhFGHALhEKLlpPaKulTAHshhl.l..sst.h..shsp.puhL-tpLs.....httushhss+sFuPLPlLGlPGWas-NpssuFYcDopVFRPtRR ...........lcFlsQss...LP.sGhAYEsaItpTGpVPTR.cNL...HDFFNALlWhtaP+hKutLNApQuss...Istt.....Gl....u..ss......RGslRDAlTLFDENGslhssu-.s.....s..Lt..sA.................Lpu.a-WppLhl.ppRstWs.........................s.pscshlFGHALhEKLl.sPaKulTAHuhhVpV..ssshh.th.....stsppp...............uhLDtpls......................tspthss+sasPLPlLGlPGWhstNtssuFYsDstVFRsuRp............................................. 0 10 35 48 +11060 PF11228 DUF3027 Protein of unknown function (DUF3027) Pollington J, Finn RD anon Pfam-B_003334 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 26.80 25.90 24.90 18.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.32 0.71 -4.63 20 441 2009-01-15 18:05:59 2008-08-13 13:56:36 3 3 428 0 110 316 124 189.60 48 66.63 CHANGED G-HLGspsEu-+lsTHhFtsphPGYpGWpWAVTVu+sscucplTVsElsLlPGssALlAPcWVPWp-RlRPGDLGsGDLLPsssDDPRLssGa.......................................ssssDstl-.................psshELGLGRpRVLSttGRppAAcRWY-G-tGPcothA+sAP..ssCsTCGFalPLuGuLsphFGVCANEhuPsDG+VVuhDHGCGAHSEs.l ...........................................G-al.GshsE...s-plsTHhFtsphPGYcGWpWuVslupss...c..u..cp.sTVsElsLlPGt..cALLAPpWVPWc-RlR..P..GDl...us.....GDlhssss-D.PRLtPGa..........................................................................................s.s.stD.st......................tsstE.lGLGRp+lLSt.GRspsApRWhsGphGPcsshu+...pAs....tsCso.CGFalsLsGoLst.hFGVCuNEaS.uDG+VVphcaGCGuHS-s.s........................................................... 0 32 80 102 +11061 PF11229 DUF3028 Protein of unknown function (DUF3028) Pollington J, Finn RD anon Pfam-B_003337 (release 23.0) Family This eukaryotic family of proteins has no known function. 19.00 19.00 20.50 25.00 18.20 18.40 hmmbuild -o /dev/null HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.87 0.70 -6.24 3 60 2009-01-15 18:05:59 2008-08-13 14:00:20 3 4 42 0 35 71 0 530.00 60 38.40 CHANGED MNKLRLLVENSQQTSGFALALGNIVHGLSVCGHGKAEDLGNRLLPAWIKlVLTEGAPTMLCLAALHGMVALVGSEGDVMQLKSEAIQoSHFQGRLNEVIRTLTQVISVSGVIGLQSNAIWLLGHLHLSTLSSNQSRTSVPTDYSYLPEuSFIRAAIGFFITGGKKGPESVPPSLLKVVMKPIATVGESYQYPPVNWAALLSPLMRLNFGEEIQQLCLEIMVTQAQSSQNAAoLLGLWVMPPLIHGLSLNIKKYLLlSsPLWIKHlSDEQIlGFVENLMVAVFKAASPLuSPELCPSALQGLSQAMKLPSPSHHLWSLLSEATGKIFDLLPNKIRRNDLELYISIAKCLSEMTDD-ANRVAQITKSNIEKAAFVKLYLVSQGRFPLMuLTDlLSVAVQHREKETLAWMILHSLYQARIVSHANTGVLKRMEWLLELMGYIRNVAYQSTSlQNVALDEALDFLLLIFAAAVVAWADHsAPLLLGLSASWLPWHQENGPAGPAuSLLGRSPMHRVTLQEVLTLLPsSMsLLLQKEPWKEQTQKFIDWLFSIMESPKEALSAKSKDLLKATLLSLRVLPEFKKKAVWTRAYGW .....................MNKLRhLsEssQQTSGFALALGNlVHGLSVCGHGKAEDLus+LLPuWI+lVLsEGsPTMhCLAALpGhVALVGSEuslhQLKS.EsIQoSpFQuRLNEVIRTLTQVISlSGVIGLQSNAhWLLGHLHLSoLSSsQSRsSVPoDYSYLPEsSFIRAAIsFhlsG...GKK..G..PEuVPPSLlKlVMKPIAoVGESYQ..YPPVNWAALLSPLMRL.NF..G...EEIQQLCL.EIhVTQA.....QSSQNAAsLLGhWVhPPLIHuLSLshK+YLLlShPLWhKHVSDEQI..uFVEsLhVslF+suS....hssPELC.SALQGLSQ..AMKLPSPu+HLWSLLs-ATtKIFDLLPNKIRRsDLELYIulAKCLSEMTD--.ssRlsQlTKuNlEKA.AFV+LYLVSQG......RhPLhsLsDlLosAlp...HpEKEsLAWMlLHSLYQAR.I.VSHA.NTGVLK....RM.EWLLELMGYIR.NVAY..QSTShQNs.sLcEALDFLLLlFAsAVVAWADHs.APLLLGLSASWLPWHpEsGPuG...ss.suhLG+SPhcRVTLQEsLTLLPsSM.LLLpKEPWKEQTQK..FIDWLFSIMEuPcEuLSApS+DLLKATLLuLRsLPEFKKKAVWTRAYGW......................... 1 6 8 17 +11062 PF11230 DUF3029 Protein of unknown function (DUF3029) Pollington J, Finn RD anon Pfam-B_003347 (release 23.0) Family Some members in this family of proteins are annotated as ykkI. Currently no function is known. 23.00 23.00 23.10 23.00 22.90 22.70 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.60 0.70 -6.31 19 631 2012-10-01 23:28:04 2008-08-13 14:10:42 3 1 584 0 51 368 5 448.40 70 94.14 CHANGED sLoh+QKpphLAhEAEssLshhplsstspcth....cpGlICDhFEGaAPa+PRYlLPDYA+FlppGScaLELsPspDlDEALNsLhIlY+HVPSVTshPVYLGpLDsLL.PFlpths--plYpKLKhFhlhLDRTLPDuFhHsNIGPsDshhsRhIL+l-uELKQssPNLThhYD.-lTPD-hlp.AspshhpCSKPahANashaspsas.psaGIVSCYNuLPluGGu.TLVRLNL+plAc+Apu.h--FhsplLPcYsphhhplI-sRtpFLhEESpFF-o.FLV+EGLIccsRFssMFGIaGhAEAVNhLlpp-shpp+..............YG+scpANpLGhpI.ppLsclVsspsstY..uhssRhLLHAQuG..I.shDpsVTPGsRIPhGcEP.-.lsHlpssAtaHpYasSGISDIhslDcTl+pNP-AhlslsKGAhshGhR.FTsNlussDLlRVTGYhVKhSDltKacp.cuuRpNTThLGutAscNoplhpRps.R ...........................................................V.LoPEQKRHFLALEAENs..LPY.Pp.LPs-ARcAL....DEGVICDMFEGHAPYKPRYVLPDYARFLANGSEWLELEGAKDLDDALSLLTILYHHVPSVTSMPVYLGQLDALLQPYVc.ILTQDcIDlRIKRFWRYLD.......R..TLP.DA.....FMHANIG....P....uDoPlTR.....AI.....LR.....ADAEL....KQVuPNLTFIY......DsEIT......PDDLLLEVAKNICE.CSKPHIAN..GP.V.aDKIF.......T......K...G....tYG...........IVSCY.........N.........S..L.........PLuG..GG..STLVRLNLKAlAERS.c........S..lD...DFFo.RTLPHYCpQQIAIIDuR...........CE...........FLYp....pSHFF..ENSFLV.......KEGLIsP..E......R..F.......sP.......MFGMYGLAEAVNLLCEpEG.lsA.R..............YGKpssANElGYRIS....AQLA-FVsNTPVKY.................GWppRAh..L..H.AQS..G...I...SSD.I.G.TTPGARLPYGDEP.D.PITHLQTVAPHHAYYaSGISDILTLDETIKRNPQALVQLCLG..AFK.AGMREFTANVSGNDLVRVTG..YMVRLSDLEKaRA.EGSRTNTTWLGEEAARNTRILERQPR...................................................................................................................................................................................................................................................................................... 0 14 27 38 +11063 PF11231 DUF3034 Protein of unknown function (DUF3034) Pollington J, Finn RD anon Pfam-B_003362 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 27.80 26.70 19.90 23.60 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.44 0.70 -5.02 28 135 2009-09-11 03:37:16 2008-08-13 14:21:12 3 1 128 0 44 144 8 247.80 49 87.40 CHANGED ATGGloslEGuAGGGIsPWAlluGYuocsEhGuouhhTcVslsDYsLsshGAuluhtsRlELShA+QsFDlss....hust........................s.clpQDIhGsKlRLhGDllYsp...hPQlSlGlQaK+.c-.ts...............lspulGA.p-DpGoDhYluAoKLaLsuhhGpNlLlNsTlRuT+ANQhGLLGFGGs...pps.....sapl.hEuSlAllLs..+phAlGsEYRpKPDNLu..........hspE-DWtDlFlAaFPNK+LSlTuAYsDLGsIAsh.......ccQpGhYLSlQu ......ATGGsopIEGSuGGGIsPWAsluGYuopsEhGs.oAasT+lsls.DY..pLsVtGsul..uhsNRlELShA+QpFDlss..........htt.........................................s.plsQDlhGsKlRLhGDllYsp...hPQlSlGlQaK+.pD.hs................lPphlGA.ccDpGsDhYluAoKLahsuhhGhNlLlNuTlRhT+ANQhGLLGFGGs...pps............uhplhhEuSsAllls.....phAlGsEYRpKPs.NLu........................hupEsDWtDlFlAaFPNKHlSlssAYscLGpIAsh........csQpGhYLSlQ................ 0 10 27 38 +11064 PF11232 Med25 Med25_PTOV1;Med25_PTOV-SD2; Mediator complex subunit 25 PTOV activation and synapsin 2 Coggill P anon manual Domain Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-active part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [4]. The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA domain, an SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This family is the combined PTOV and SD2 domains. the PTOV domain being the domain through which Med25 co-operates with the histone acetyltransferase CBP, but the function of the SD2 domain is unclear [3]. 25.00 25.00 29.30 41.30 21.40 24.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.99 0.71 -4.54 7 184 2009-01-15 18:05:59 2008-08-13 14:21:37 3 11 64 4 76 182 0 147.60 62 31.02 CHANGED .ppssp+.LhWSGsLEWpEps+s....sps+lp+oL.Cplhls.t-.p..lcT-pWPpKLhMQLIPppLLsslGshF.+sS+hV.F+.T..ssEsLcuLhphM...usGFAGClHF...PppssC-l+VLhLLYSsc+phFlGhIPpDQusFVptlRpVlspp+ps ...............ptlsNKhLAWSGVLEWQEKs+P...-sssKlpRoLPCQVYVNp..GE.s...L+T.-QWPpKLIMQLIPQQ.LLT.TLGPLF..RN.S..+h.VQFHF.......T...+.....Dh.EoLKuLhRIM....sNG.F.AGCVHF...Pap...u...sCEVRVLML...L..YSScKKIFhGLIPaDQSuFVNuIRpVIss+KQ.s......... 0 20 23 39 +11065 PF11233 DUF3035 Protein of unknown function (DUF3035) Pollington J, Finn RD anon Pfam-B_003372 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Alphaproteobacteria. 25.00 25.00 25.30 28.10 23.40 22.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.91 0.71 -4.18 39 170 2009-01-15 18:05:59 2008-08-13 14:37:10 3 2 168 0 49 144 739 125.90 27 84.80 CHANGED slsL...uuCut.....shpslttspssPDEFsllsptPLshPssas..LssPsPGssp.sD...sPpspAlsALsG..tt.......shssu-tuLlspAuphussssIRphlssE-tphtc+ssthsphhhhspsshhtshtpt..lcs.t-htthcp.sGstssuusP ......h.hhlouCut.....phtcsluhspssPDEapVhpptPLphPPsat..LssP..ssGt.s...p.p-....ssttpuh....tsl.s..............tshstupthllttu.st..t.tsshp......phthhtttt.h...hhh..t....s.....................................t........................................... 0 18 37 39 +11067 PF11235 Med25_SD1 Mediator complex subunit 25 synapsin 1 Coggill P anon manual Domain The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA, domain, this SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This The function of the SD domains is unclear [3]. 25.00 25.00 53.10 52.60 21.70 21.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.54 0.71 -3.94 3 61 2009-01-15 18:05:59 2008-08-13 14:48:58 3 6 30 0 22 54 0 153.70 77 22.21 CHANGED LPVGGu.SVPGsh.sKQ.VSLPPhsVlPPQluuAPQpPLPPV..pPPQMQVPQNsoLoAAHsAAQsAVEAAKNQKsu.uNRF..sslsPh.p.APuVGPPFSQsPAPsLPPG...P.hsPKPS....PASQsSLVTTVoPGsG.APV.hQQQssPQQPssPSMssTsAlGGV..QuPQPSQ.Q .................LPVGGG.SAPGPLQsKQPVPLPPAsPSG.AoLSAAPQQ..PLPPV...PPQYQVPGN...LSAAQVAAQNAVEAAKNQKAGLGPRF.....SPIsPLQQAAP.GVGPPFSQAPAP.L.PPG...PPGAPK.PP.......PASQPSLVSTVAPGuGLAPs........AQP.GAPS.MAGTVAPGGV..SGPSPAQL.G........................................ 0 1 3 7 +11068 PF11236 DUF3037 Protein of unknown function (DUF3037) Pollington J, Finn RD anon Pfam-B_003382 (release 23.0) Family This bacterial family of proteins has no known function. 21.30 21.30 22.30 21.70 21.10 19.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.27 0.71 -4.04 37 274 2009-01-15 18:05:59 2008-08-13 15:41:15 3 1 266 0 105 270 22 115.80 32 66.30 CHANGED YAllRlVP+VERpEFlNlGVlLastpptaLts+h.plDc.pRLpuhssclDhchlpttLpuhptlCsGspsuGs..........luthshtcRF+WLoAsRSollQsStsHsGhss.DsstpL-+LhcphV ........YAllRhVP+VERpEFlNlGVllast.p...ppalps+h..plcp..sRLpshs.s.ch......D.hsslcttLcuhcplstGsppuGs.............hut.s.h...spRF+..W.LsAsRSo..llQsS.sHsG..hss...D.ttpLc+LhcphV..................................................... 0 46 87 101 +11069 PF11237 DUF3038 Protein of unknown function (DUF3038) Pollington J, Finn RD anon Pfam-B_003387 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 25.00 25.00 58.10 58.00 20.30 19.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.74 0.71 -4.45 26 109 2009-01-15 18:05:59 2008-08-13 15:46:52 3 1 71 0 43 118 119 162.30 35 84.26 CHANGED hs++spp+LDLLLLAlEuLslsuuEuhLtsuppLsLpshhssRVpLW+hRssNPlR+us.pRpsLshtEscALlhllsthAcphpshlRplL......supp.s.hspthhupahcRFpshhpcRMN.+Rutl.hh.s.spphspLthcLLhpLhhsoGssGhpRLhhSLhD ......cphpppLDLlLLAlEuLshsuuEshLtsuppLsLpshlssRlsLW+hRssNPlR+ut..tRppLslp-A+uLlhIlsahA+phpshIRphL.h.pQhpppphs.tpshhLupYl-+FpshappRMN.c+stl.hh..s.p-plspLAlpLLhpLLFsoGssGhpRLWhoLhD... 0 6 30 41 +11070 PF11238 DUF3039 Protein of unknown function (DUF3039) Pollington J, Finn RD anon Pfam-B_003408 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 21.80 21.80 34.50 32.20 20.90 20.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.87 0.72 -4.52 19 435 2009-01-15 18:05:59 2008-08-13 15:55:13 3 1 428 0 112 273 89 58.20 56 62.78 CHANGED psGDp-RFuHYV+K-KIhESAlsGpPVlALCGKlWsPsRDPc+.aPVCPcCKEIYEshp ....s.suDs.+FhHYVpK-KIs-SAVsGs.VVALCGcVassoRsPc.....PVCP-CKcIY-uhp..... 0 33 82 104 +11071 PF11239 DUF3040 Protein of unknown function (DUF3040) Pollington J, Finn RD anon Pfam-B_003409 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.67 0.72 -3.93 28 478 2009-01-15 18:05:59 2008-08-13 16:00:21 3 1 392 0 158 361 191 86.20 37 67.41 CHANGED MPLSEcEpRhL-plERuLhs-DPcFAsslpu........sshptsstRph....lhulsshVhGlslLlsGlshphhh......lulhGFllMhuuslhu ..................MPLS-cEQR.hLcplEpuLhs-DP+FAsulps..........................sthct.ss.tRph.........lhu..s...s.h..h.l.lG.LuhLVuGVshspsh.........LuVhGFllMhuushh............................. 0 67 123 152 +11072 PF11240 DUF3042 Protein of unknown function (DUF3042) Pollington J, Finn RD anon Pfam-B_003420 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 25.00 25.00 32.60 31.20 20.00 18.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.51 0.72 -4.17 17 543 2009-01-15 18:05:59 2008-08-13 16:08:46 3 1 531 0 49 136 0 53.90 62 94.27 CHANGED h+pFspGhlsGsluTluAlAGulhshKKslI-P.EcKtshIEENRK+AsRKphu ..KsFuKGllTGVsuTVAAVAGAVauhKKpVIEPEEp....KsAFIEENRKKAAR+RVS...... 0 8 22 36 +11073 PF11241 DUF3043 Protein of unknown function (DUF3043) Polligton J, Finn RD anon Pfam-B_003428 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins. This cannot be confirmed. 25.00 25.00 79.90 79.80 23.10 22.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.16 0.71 -4.67 28 434 2009-01-15 18:05:59 2008-08-13 16:14:09 3 1 431 0 114 317 91 175.10 37 78.01 CHANGED sKGRPTPKRcEA-s..................ARpR.......shs.p.-RKsu+ptpRsptpcccp+.ptuMhsG-EpYL.sRD+GP.RRalRDaVDuRhsluphhhPluLlllllhhl....ss.......splphh..sshshhllh.llhllDuhlLuR+lp+tlsp+FPc...psthulshYAhtRAhQhRRhRhP+PpV.cRGpp .....KGRPTP+Rp-Acu.....................sph+.............pLs...p..-+Kts++ppR...tpt+c+ppc........uM.sG-...-...taL.sRDRGP.RtYlRDhVDSRh.slupahMPlALsllllhhs........s...........stlthh....hoh.shl.lll.llhlIDuhhLsR+lp+tlcp+FPs.......t.s.thul.shYAhsRAhQhRRhRsP+PpV.cRGc.t............ 0 35 84 106 +11074 PF11242 DUF2774 Protein of unknown function (DUF2774) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 26.50 26.50 26.50 29.10 26.30 26.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.02 0.72 -3.98 7 26 2009-09-10 16:41:49 2008-08-13 16:41:06 3 2 25 0 0 18 0 60.70 49 66.55 CHANGED hc.cVpVahh.+EtGhsFl-IA+th.slpup-suclWspV-pA+...tcFcs+EKVVYRK+hlN.Kh+p ...Mc.cVhVahL.HEoGhuFIEIA+cl.slpA+-sAhhWscscsAK...t+FcsREKVVYRKRhlNpKlKp.. 0 0 0 0 +11075 PF11243 DUF3045 Protein of unknown function (DUF3045) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as gene protein 30.1. Currently no function is known. 21.40 21.40 21.90 73.90 18.50 17.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.02 0.72 -3.75 2 40 2009-01-15 18:05:59 2008-08-13 16:59:00 3 1 39 0 0 16 0 88.30 84 99.19 CHANGED MFVVHsIh-sc.NTTRDYGHVN.FFRspP.FRu.KDtcIFKcCVEQGFIYl.caahpG.thR.TTYp+oLppL.DElhYNRsVsph.c.c MFVVHTIYENEGNTTRDYGHVNQFFRCNPE...F..RAQKDERIFKKCVEQGFIYV....KHWM.....QGNKVR.TTYHRSLTELNDELIYNRAVNQTLKDE... 0 0 0 0 +11076 PF11244 Med25_NR-box Mediator complex subunit 25 C-terminal NR box-containing Coggill P anon manual Domain The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA, domain, an SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and this C-terminal NR box-containing domain (646-650) from C69-747. The NR box of MED25 is critical for its recruitment to the promoter, probably through an interaction with pre bound RAR [3]. 28.40 28.40 28.60 28.90 26.10 28.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.34 0.72 -3.53 7 67 2009-01-15 18:05:59 2008-08-13 17:52:11 3 7 31 0 14 67 0 76.00 58 13.24 CHANGED pppptstsshs...pQsht..p.............h......sstp...pQh.Q.hus..GQhhhpusspus....thp...ttpshtst....shhh.hh .......................HLQPPGAPALLPPPHQGLGQPQ............LGPPLLHPPPAQSWPuQLPP.RAPLP.GQhhhuGGsRuPlspsGLQP....SVMED.....DILMDLI......... 0 3 3 4 +11077 PF11245 DUF2544 Protein of unknown function (DUF2544) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 25.00 25.00 25.50 70.20 19.10 18.20 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.63 0.70 -5.02 8 347 2009-01-15 18:05:59 2008-08-14 09:01:38 3 1 334 0 4 148 0 222.80 56 84.50 CHANGED FaLsVsTPcuVtYGpYK..uhhhps-pLsllSWoG..uusAPolsLpDhsp.lspSsCPGLps.......osWs.CshhshsVhV-uDl..a.GCPWlVShassSpsPh......uhssYsGPss+sSlCPoVsVAsYDlSWsENYVsHsKsLpLQSTGGslEpTLSTYLMEsGKLCDGSpa...D-RGAYCRaVuQhlTFoooGCDsAcVTVTPscHPITDKQLHDMVVRVDTSS..RQPIDSTCRFQYlLNEL .....F.loVlTPppVtYG.p.Yp...o.h..s.hpG-sLpllSWS...G...sssAPoLsLpDacs..lspSsCPGlst.......phap.CuhhThclTV.so.D.s..Y.GCPWlsSh..ashosss.......uhsoYouPTs+solCPolPVuoYDISWsENYVsHsKsLpLQSTGuslppTLSTYLMEuGKLCDGSpF...DsRGAYCRhVSphlTFoo.GCDpupVTVTPsRHPlTD+pLHDhVVpVsTSS..pQP.IDSTCRFQYlLNEL............. 0 1 1 2 +11078 PF11246 Phage_gp53 Base plate wedge protein 53 Pollington J, Finn RD anon PRODOM Family The baseplate of bacteriophage T4 controls host cell recognition, attachment, tail sheath contraction and viral DNA ejection. The structure of the baseplate suggests a mechanism of baseplate structural transition during the initial stages of T4 infection. The baseplate is assembled from six identical wedges that surround the central hub. Gp53, along with other T4 gene products, combine sequentially to assemble a wedge [1]. 20.60 20.60 20.90 67.30 20.10 19.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.21 0.71 -4.84 12 70 2009-01-15 18:05:59 2008-08-14 09:05:47 3 1 68 0 0 63 1097 187.90 33 87.24 CHANGED M.lFoaFsslcYtup.s.................ssIFRsa+hYastshhsaphcsYhlsGu.RPEplApplYGNspLYWllLMhNslhDPaasWlpsp-ssYphAtp+Ysps....splhaHhstpuc+a...aslVphPcsstsWYDh...............GspsppalpapGslts............VsshEtphhcNEp+RpItIlsssslpsFls-hhR.MEts ...............M.hFsaFssl.Ysspss.........tp....h.hpsIFRph+hhhchhthshhhcsYhIpsu.RPEplApplYGsspLaWllLhhNslhDsahsW.psp-shaptstp+Ysss....splhYalspc.spca...hsllphscs.tshY-h...............GshspphhphpsuLss...........................VssaEttlhcNEpKRpIpIlsss.lssFls-hh+.hEh.s............ 0 0 0 0 +11079 PF11247 DUF2675 Protein of unknown function (DUF2675) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as Gene protein 5.5. Currently no function is known. 25.00 25.00 30.30 43.10 20.40 20.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.22 0.72 -3.93 5 48 2009-01-15 18:05:59 2008-08-14 10:08:16 3 1 22 0 0 46 0 89.70 54 82.40 CHANGED MAMTK+FKVSFDVTAVlsSEsEcphc-sLl-LAKKAGAGEcl.oPh-REhLVQuLTHGsEGAsuFslKpGlREuIK-tasEhS..Dps.hKlSPATVREVF ...MshsKph+VoFphthhh.pp..tlLcK-hL+LsKpVGuGthV.Nu+Q+EhlVQhLTHGhEGlhoFVVRoShREAIKDMHEEYu..-KcoFK.SPATVREVF........ 0 0 0 0 +11080 PF11248 DUF3046 Protein of unknown function (DUF3046) Pollington J, Finn RD anon Pfam-B_3651 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 25.70 25.20 21.10 16.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.82 0.72 -3.99 23 439 2009-01-15 18:05:59 2008-08-14 11:27:04 3 2 408 0 109 273 72 60.60 40 73.10 CHANGED MRLTEFaphlp-cFGsuYucslspDHVLusLGGRTsspAl-sGs-sRpVWpALCs-a..DVPtcc .......MR.TpFaphls-tFGssaup.lstDpVLsuL.uuRTstpAl-cGl-s+tVWpAlCssh..-lPtp...... 0 33 80 100 +11081 PF11249 DUF3047 Protein of unknown function (DUF3047) Pollington J, Finn RD anon Pfam-B_3654 (release 23.0) Family This bacterial family of proteins has no known function. 21.30 21.30 22.90 22.20 19.40 17.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.15 0.71 -4.90 39 127 2009-01-15 18:05:59 2008-08-14 11:30:24 3 2 110 0 59 135 779 187.20 27 71.98 CHANGED ssuWp.hthss...tpTcYplh...pp....sspt.sl+AposuuASsLhpchslc.....ppssh..LpWpW+lsphhs..ssDtp............p+suDDhsuRlhVsFc....thshh.pht...............sLsYlWus.phshGsh..hsNPao.spsphlslpoGss.psGcWhscpRsltsDYc+sFG.........cc....sstlsululhoDoDNTsspupAaYGDIpl ..................................................tsWp.hth.t...ttopYphs..pp.......tsttsLcupucsuuSsLhp...p.hs.lc......hppssh..LpWpW+lpphhs..ssD..tp............p+suDDhshRlaVsFc.......thshh.pst................sLsYlWus.p.....tshGsh..hsNsas...s+sphll........lcoGss.psGpWhsccRslhtDac+sFG.........cc....sstlsululhoDoDNTtspupAaauDIth............. 0 18 40 53 +11082 PF11250 DUF3049 Protein of unknown function (DUF3049) Pollington J, Finn RD anon Pfam-B_3659 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 29.00 27.60 24.70 21.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.83 0.72 -4.20 24 167 2009-09-11 15:04:28 2008-08-14 11:41:50 3 2 18 0 118 149 0 55.40 38 19.82 CHANGED psFPPPlsolsttt......sthhhRscRc-GRLVlptVpsss.tp..hhpApRpsGRLpLphs ..........sFPPPlsolu..pts.........s.lhhRspRp-GR.L..l..LptVp...lss.pp...hhcApR.psGRLpLphh........ 0 11 68 95 +11083 PF11251 DUF3050 Protein of unknown function (DUF3050) Pollington J, Finn RD anon Pfam-B_3661 (release 23.0) Family This bacterial family of proteins has no known function. 22.20 22.20 25.70 23.60 22.00 21.30 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.61 0.70 -5.12 32 145 2012-10-02 21:56:19 2008-08-14 11:51:00 3 1 136 0 65 178 375 227.80 46 88.88 CHANGED pLhpHsLYpplpol--LphFMEpHVFAVWDFMSLLKuLQppLTssssPWlP.stssphtpLINEIVluEEoD.shpG.....thtSHFEhYLcAMccsGAsTptIcpFlphlppG.sl.tAlppsslPtsstpFlphTaclIpsupsHplAAAFsaGREcLIPsMFppllcph...shsppphspF+YYL-RHIELDuD-HGPhAhpMlppLCGsDspKapEstpsuppALcpRltLWDuI ...............................................LhpHPlYsplpolcsLphFMEpHVFAVWDFMSLlKpLQpcLTCsplPWlP.ssss.pstRLINEIVLuEESDpt.sG.......tthSHFELYLcAMc-sGA.sTptIcpFlphlp......pGhsl............tpALpp.s......s......lstssppFlptThchltputsHp.......lAAAFsaGREclIPsMFpsllcch...shs.spphspFpYYL-RHIElDu--HGPhAhphlscLCusDsp.+hpE.shpsuhpAlpsRlpLWDul......... 1 30 51 58 +11084 PF11252 DUF3051 Protein of unknown function (DUF3051) Pollington J, Finn RD anon Pfam-B_3671 (release 23.0) Family This viral family of proteins has no known function. 21.30 21.30 21.90 299.10 19.20 21.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.33 0.71 -4.81 5 73 2009-01-15 18:05:59 2008-08-14 11:55:00 3 1 1 0 0 64 0 189.00 94 84.08 CHANGED WLEHIRLIVVTMVPILLLPDTTIGRHIHIKRQYLQSVLLWLARNKLYYRVKRCRSKRFVLEQDHIGPELFRTQLLQYAGLDVFQDVARLQDFLERGYDTFRSVLGVHLLEHERFVSRILEQPIMEPDQAVRLCFHYHNTLDHDDDDSGLETDFNNLSLGRPYINGLPPNEKIIFIHSNLVGHSTsSHDI WLEHIRLIVVTMVPILLLPDTTIGRHIHIKRQYLpSVLLWLARNKLYYRVKRCRSKRFVLEQDHIGPELFRTQLLQYAGLDVFQDVARLQDFLERGYDoFRSVLGVHLLEHERFVSRILEQPIMEPDQAVRLCFHYHNsLDHDDDDSGLETDFNNLSLGRPYINGLPPsEKIIFIHSNLVG+STYSHDI... 0 0 0 0 +11085 PF11253 DUF3052 Protein of unknown function (DUF3052) Pollington J, Finn RD anon Pfam-B_3674 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 20.90 20.90 20.90 20.90 20.70 20.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.37 0.71 -4.54 19 439 2009-01-15 18:05:59 2008-08-14 11:58:01 3 1 432 0 121 286 61 125.90 48 86.46 CHANGED Ap+LGhpsGtlVQElGaD-DsDp-lRcAlE-shGs-LlDEDs--VVDsVLLWWR.--DG...DLsDuLVDAhssLu-sGhlWlLTPKsGRsGaV-Pu-IuEAAsTAGLsQTousslGstWsGoRLlt.P+u+ ......................................................Ap+LGhpsGplVQEhGWD-DsD--lRpslE-hhGs-LlDE.DsD-slDlVlLWWR.--DG...DLsDsLVDAh.ssLu.-cG.hIWllTPK.sG+..s..GplpPu-..luEuAsTAGLstToohsluptWsup+LVp.+..t.... 1 38 85 108 +11086 PF11254 DUF3053 Protein of unknown function (DUF3053) Pollington J, Finn RD anon Pfam-B_3678 (release 23.0) Family Some members in this family of proteins are annotated as the membrane protein YiaF. No function is currently known. 20.70 20.70 21.00 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.55 0.70 -5.10 8 635 2009-01-15 18:05:59 2008-08-14 12:50:38 3 1 566 0 51 216 3 224.70 65 95.30 CHANGED sRhFtP.....LLAlLlsh.LuuC.GDKEPEQRKAFI-FLQoplhcS.Gs+lPsLTs-QKcsFGsYssDY.AILosFSQphspAlsuu.pssltQhssIRospDhls+RDsLcpupsuLshlupplpssKupADsA+usLKQP-DLKsVaDpAas+TVosPAssLhsslPhssuhlpsLlpVGDaLpsQGsQVpFsuuu......lQFpsspQsspaNphlus.....LssQtpshhsAQpplsslt .................SRWFAP.....lsALLMVlSLSGC.FDKEGDQRKAFI.DFLQNTVMRS.GERLPTLTADQKKQFGPFVSDY.A.ILYGY...SQQVNQAMDSGLRPVVD.SVNAIRVPQDYlT.QpsPLRphNGSLGVLAQQLQNAKLQADAAHuALKQ.s.DDLKPVFDQAasKVVTsPADALQPLIPAAQsFTQQLVhVGDYIAQQGTQVSFVANG......IQFPTSQQASpYNpLIuP.....LsAQHQAFNQAWTsAVss.s............................................ 0 2 11 30 +11087 PF11255 DUF3054 Protein of unknown function (DUF3054) Pollington J, Finn RD anon Pfam-B_3684 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 25.00 25.00 27.60 26.90 24.90 24.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.56 0.71 -4.23 34 368 2009-01-15 18:05:59 2008-08-14 12:51:26 3 2 352 0 112 243 15 107.70 39 76.76 CHANGED hlluDllslllFsslGRtSHups....hsssslhtTuhPFLhGhhluhhls...tuapt........pthhssulhlWlsslslGhlLRsh..putusshoFll...VuhsssullLlGWRulst ......lsuDVlsVllFuslGR.t.SHucs....LsssGlhpTuhPFlhGhslGWlls........tshpt......s........sthh.sGlllWlsTlllGMllRth..oupGsu...s...s...Fll...VAsshsAlhLlGWRAls.u........... 1 33 85 106 +11088 PF11256 DUF3055 Protein of unknown function (DUF3055) Pollington J, Finn RD anon Pfam-B_3685 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Firmicutes. 25.00 25.00 26.80 54.90 23.30 18.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.61 0.72 -3.85 9 448 2009-01-15 18:05:59 2008-08-14 12:57:36 3 1 339 0 39 150 0 80.60 55 88.16 CHANGED LYDDsEps+sRFVuFhGEppRaDLullaTsRaaGKsLVlshQosRFAIlGtDDlcEsGYLp+lapl.sEE-ApELppFLtEl ..LYDDsEpopVpFVGFhG.E.c.oRYDLhLlaTsRHYGKTLVLsMQTNKFuIIGsDDLcE.GYltHlhul.stEEu-ElppaLpEl..... 0 12 23 32 +11090 PF11258 DUF3048 Protein of unknown function (DUF3048) Pollington J, Finn RD anon Pfam-B_3658 (release 23.0) Family Some members in this bacterial family of proteins are annotated as YerB. However currently no function is known. 25.00 25.00 72.10 30.40 24.20 19.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.65 0.70 -5.27 32 284 2009-01-15 18:05:59 2008-08-14 13:08:54 3 3 253 1 96 284 560 276.90 28 78.69 CHANGED PLTGhss...pch..ptRPlAVhlsNt.ssApPQuGLspADlVYEslsEGulTRhhAlat.....ophsctlGPVRSAR.Yalchu.ta.cuhasatGuS.....spAhshlcsss...lsslsu.....h....sushF...hR.sssRpAP....HNhYsohcpltpsspp.puhphptt.t..........t.ssssssGptspplplpa....stspssapYD.psptYhRhpsGpsplDtsT....GppltspNlllhcsphpsh...DstG+hs..h-lh......GuGc..uhlhpsGphhclsWc+..sstpshhthhstsGpplshssGpoWl .........................LTG..ht..tth..ptRslAVhlsNp.....ssA.RP.QoGLspADlVYEhh...s...EG.s.l.......TRhhAlap.....sphscplGPVRSuRsYalplupta.culhlatGuo.....st.sh.shlppss.....lsplss..........hp......sushF............aR..sss.RpuP...............HN..hYsshpplt.pshpp..pGh.......ph..stp.phh...........................tts......sss...s...s..uptsp..plplpa............sttps.papYst.psptYhR......h..t.s........G......p......s......p........hD.tps...........uptlsspNlll.tsshpsh.......DstG.p....hs.....lchh..................uuGp..uhlhpsGchh.cspWp+........ss..sps.hhhhh..s.ts.G.p.......l.lssGpTWl........................ 0 51 81 89 +11091 PF11259 DUF3060 Protein of unknown function (DUF3060) Pollington J, Finn RD anon Pfam-B_3702 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. 20.70 20.70 21.10 20.80 19.90 20.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.06 0.72 -4.35 29 317 2009-01-15 18:05:59 2008-08-14 13:35:01 3 2 141 0 67 209 0 62.90 38 39.12 CHANGED sVuGssNslslsGsCssVTVsGssNpVss-s...lcp.lolsGhsNoV...................oYcsG.....sPpl.sspGus....Ns ....................sVsGssNolsls.GsCtsVTVsGssNpVssDs...lc..s.lslsGhsNsV...................sa+s.G.....sPpI.sstGssN................................................................................. 0 13 34 55 +11092 PF11260 Spidroin_MaSp Major ampullate spidroin 1 and 2 Pollington J, Finn RD anon Pfam-B_001419 (release 23.0) Family Dragline silk is composed of two proteins, major ampullate spidroin 1 (MaSp1) and major ampullate spidroin 2 (MaSp2) [1]. MaSp1 contains five alpha-helices [2]. Only the C-terminus of the proteins are shown. 25.00 25.00 25.90 25.20 22.30 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.79 0.72 -3.90 26 173 2009-01-15 18:05:59 2008-08-14 13:44:16 3 9 43 3 0 180 0 82.40 52 17.94 CHANGED susSRLuSsuAuuRVSSslSoLlSu.....GssssuuLussISslsSploASssGLSuC-llVQsLLElloALlplLuSAslGpVNhuus .......suuSRLSSPsAuSRVSS...AVSsLlSu.......GssNsAALuNsISslsSQlSuSsPGLSuC-VLlQALLElloALlpILuSSsIGpVNhuus.................. 0 0 0 0 +11093 PF11261 IRF-2BP1_2 Interferon regulatory factor 2-binding protein zinc finger Pollington J, Finn RD, Bateman A anon Pfam-B_1430 (release 23.0) Domain IRF-2BP1 and IRF-2BP2 are nuclear transcriptional repressor proteins and can inhibit both enhancer-activated and basal transcription. They both contain N-terminal zinc finger represented in this family and C-terminal RING finger domains [1]. 25.00 25.00 25.20 39.60 24.60 23.90 hmmbuild --amino -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.75 0.72 -4.51 11 146 2009-01-15 18:05:59 2008-08-14 13:46:33 3 2 75 0 86 140 0 53.40 76 10.11 CHANGED o+RQaCYLCDLPRMPWAMlaDFSEsVCRGCVNYEGADRIEhVI-sARQLKRsHG ...SRRQpCYLCDLPRMPWAMIWDF.....o..EsVCRGCVNYEGADRIEhV.I-oARQLKRuHG................. 0 21 31 55 +11094 PF11262 Tho2 Transcription factor/nuclear export subunit protein 2 Pollington J, Finn RD anon Pfam-B_002604 (release 23.0) Family THO and TREX form a eukaryotic complex which functions in messenger ribonucleoprotein metabolism and plays a role in preventing the transcription-associated genetic instability [1,2]. Tho2, along with four other subunits forms THO [2] 23.10 23.10 23.10 23.50 22.80 22.80 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.84 0.70 -5.37 35 353 2009-11-24 09:31:40 2008-08-14 13:48:43 3 7 271 0 252 345 7 279.40 35 17.64 CHANGED pshshlsss.hassFWpLSLhDlthP..pchY-p..plp+lcpplpp..............tpsthpppp+chcplp..pphppLtsEtppH.hp+sppspppLpcppsp.Wh......ttsspplp........pFlppClhPRhlhSshDAlasu+FlphLaphpsstFshhshhcp...lhpsphLhshlhssTppEApNlGhFhsclLphlppWppcpp.hacc.s.......................hp.......p.pphhsac...papphla+Waphlhpslhp.........................................................................................................................sL..ppp-ahpI+.NulhhLppllshFPhlpphupplhptlcpltpp-............pRcDlplsusullupLp ........................hWpslosp.FYsTFWpLohYDltlP..pptY-c..Els+L+tphpsl................................................p-.pphs.s++c+..-+..cchp..sh.c+LhpEpccp.hc+hppshp+Lpp.c..Kc.p.Wh..................................tsscspplp..............phLQhClhPRslhSshD....AlYCu+Flc.hlH.p.pTPsFs.TlhhhDp.................lFp..s..lhhhlhsCTppEApphGRFLsphLcplt+W+s..-cs..ha-+Es...................................tshPGFhhhhc.................p.ss.lsa-...sF..++lhaKWHhplspuhhp.........................................................................................................................CL..pss-YhpIRNslhlLpcll..s..haPh..l...tph...uptlppplpclpppE.............pR.DlhhhAtuhhupL......................................... 0 96 149 211 +11095 PF11263 Attachment_P66 Borrelia burgdorferi attachment protein P66 Pollington J, Finn RD anon Pfam-B_003349 (release 23.0) Family P66 is an outer membrane protein in Borrelia burgdorferi, the agent of Lyme disease. P66 has a role in the attachment of Borrelia burgdorferi to human cell-surface receptors [1]. 25.00 25.00 29.50 39.60 23.50 19.50 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.44 0.70 -5.33 7 107 2009-09-11 05:11:33 2008-08-14 13:50:47 3 1 47 0 3 91 0 182.80 58 57.03 CHANGED SDFSIhGHISKKAN........Tsccsp.FsPpsN+LpFsppRosNFAFSlGsGIGhAWNpD-GEKESWuIpGusSYSpRIFGpQDKKSGIGlGIoYGQNLY+PTSSN.plIQpIAtKoFpTLNAEISTYEDNKKGIIPGLGWIASIGlYDLLK-pPpSD..sIIssLTssTssp...sssps....loFscAsKl.GGALYIDYAIPlESIS.sTaIlPYVGsHhLG...SLpuScKolYLKsGLEL-pLIKLT ...............................................P.tNtLpFspppp.shtFShGsuIGhAWNpDpGEcESWtlpGupSYspRlFGtQDKKSGluhGIsYGpsLY+s..psop.phlppIutpuFQoLNsElSoYEDNKKGlIsGLGWIsSIGlYDlLRpKShEN...shhos.hosNppst.....oTssps.......loFp-AhKL.GhALYLDYAIPlcShSspsYllPYlGAahL.G...plp.sspplYLKsGLpL-pLI+hT..... 0 1 1 1 +11096 PF11264 ThylakoidFormat Thylakoid formation protein Pollington J, Finn RD anon Pfam-B_003380 (release 23.0) Family THF1 is localised to the outer plastid membrane and the stroma. THF1 has a role in sugar signalling [1]. THF1 is also thought to have a role in chloroplast and leaf development [2]. THF1 has been shown to play a crucial role in vesicle-mediated thylakoid membrane biogenesis [2]. 19.80 19.80 20.10 55.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.27 0.70 -4.72 27 122 2009-01-15 18:05:59 2008-08-14 13:52:47 3 3 108 0 59 137 144 204.00 37 80.16 CHANGED TluDoKRsFapsaP+sIsulYRRll-ELLVElHLLShpssFphDslFAlGlspsF-pFhpGYpPppchsslFsALCsussh-sppl+psApphtphspspshpplhpaLppt...stts.t.lts.htthttp.pa+YSRLhAlGLasLLppups...ttc.pphpchlcclupsLsLstc+VcKDLslY+SNL-KhpQAhELh-EhltuER+K+Ecptt .........TVuDoKpsFhptap+PIsolYppllpELLVphHLlphppsFpYDslFALGlVTsa-phMpGY.Ppp-+suIFpAhhpA..lstDPcpYRpDApplpphA+ut.sssslhphhsp....ttphps.LtshhpthtspspFpYSRhaAlGLapLL-.uss........pcsphlcc.lspuLslspc+lp+DL-lYRusLpKhsQAhcllcEhlptE+KK+cppt.t........................ 0 16 42 54 +11097 PF11265 Med25_VWA Mediator complex subunit 25 von Willebrand factor type A Coggill P anon manual Family The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA domain which is this one, an SD2 domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This VWA or von Willebrand factor type A domain when bound to RAR and the histone acetyltransferase CBP is responsible for recruiting Med1 to the rest of the Mediator complex [3]. 20.40 20.40 20.50 21.40 20.00 19.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.45 0.70 -5.32 5 158 2012-10-10 16:07:06 2008-08-14 13:59:46 3 13 86 0 84 159 1 182.70 42 27.72 CHANGED Msss.Scu.hpt...VuDVVFVIEGTANLGPYFEoL+ocYILPsIEYFNGGPlsEo-..FGu-hGuTQYuLVVFNTsustPEshVQsauPTpssa-FlpWL-uIpFsGGGuESCSLlAEGLusALQhFDDhpcMRpplGQTshHRHCILICNSPPYlLPo..............sESsoYsG+TsDsLsslhs..ERuIaLSIIuPRKLPALRtLF-KAsssssl.s.possDYAK-PpHMVLl+Gls ...................................................s.lADVVFVIEGTAsLGPYapsL+ppYllPslEYF..suGs.s-pc...h.tp.h.sss.YulVVaposs.sh..spshlp.s.hu.Tpss.phlphlct......l.........pFhG.GGhEssu.lAEG......LusAL.thF-...-hpph.R.pt..h.......sps..p+hC.lLIsNS....PP..Y.hPs..............s-.sh..pa.pu.t..os....-p..Lst.ht..cpsIphSlluP.R+lPsLhhLap+Asss..h.......hshups.pahVLlps..s...................................................................................................... 1 24 44 62 +11098 PF11266 DUF3066 Protein of unknown function (DUF3066) Pollington J, Finn RD anon Pfam-B_3735 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.60 25.60 26.20 89.80 25.10 25.50 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.40 0.70 -4.64 15 78 2009-01-15 18:05:59 2008-08-14 14:55:13 3 1 74 1 27 84 174 214.60 68 92.46 CHANGED DFsS-sYKDAYSRINAIVIEGEQEAHDNYIsLucLLPDct-ELp+LAKMEtRHhKGFpACG+NLpVsPDM-FA+cFFutLHuNFQpAhAEGKlVTCLLIQuLIIEuFAIAAYNIYIPVADPFARKITEGVVKDEYoHLNaGEpWLKtNF-osKtELpcAN+pNLPLVW+MLspVssDAclLtM-KEuLlEDFMIAYuEALssIGFoTREIMRMSAtGLs .DFsS-sYKDAYSRINAIVIEGEQEAa-NYIsLupLLP-pp-ELt+LAKME.RHhKGFpACG+NLsVssDMsFA+cFFusLHsNFQpAhtE..GKlVTCLLIQuLlIEuFAIuAYNIYIPVADPFARKITEGVVKDEYoHLNaGEcWLKsNF-sSKsEL.cAN+tNLPLlhpMLsQVssDApVLuMEKEsLlEDFMIuYtEALssIGFooR-IhRMuAhuL.s..... 0 5 18 25 +11099 PF11267 DUF3067 Protein of unknown function (DUF3067) Pollington J, Finn RD anon Pfam-B_3740 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 29.80 29.00 21.50 18.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.22 0.72 -3.92 16 113 2009-09-11 02:36:25 2008-08-14 14:58:43 3 1 93 1 61 110 105 98.80 42 66.27 CHANGED ucElhpLLhc+WGtSYDlQLp+p+sR.....laLQVMWtYLEQtSFPLoEt-YhtHLsplsphLsthGtuptV+salpsT+-+PRL....GKAVSlPLcl...spRhsEal ...........................p-LtpLlhpKWG+SYDlQLh+p....c..tp......lalQ.......VMW.+YLEQpSFPL.oEpEYht+LsslAphLpsWGssppV+salpp.......o.......+-.......R.PRl.....G.K....A..VSI.l-h.....s.t+.sEah................ 0 19 46 57 +11100 PF11268 DUF3071 Protein of unknown function (DUF3071) Pollington J, Finn RD anon Pfam-B_3805 (release 23.0) Family Some members in this family of proteins are annotated as DNA-binding proteins however this cannot be confirmed. Currently no function is known. 24.10 24.10 24.70 53.90 24.00 24.00 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.98 0.71 -4.52 27 435 2009-01-15 18:05:59 2008-08-14 16:07:47 3 1 433 0 113 326 73 167.20 38 47.77 CHANGED Mp-L+lVulssDGppllhpss....sG..ccaplslDDcLRuAlRts.............................................................hst.......splchEhss...................tLpPR-IQu+IRAGtosEcVAptuGhslp+VcRFttPVLtERs+sAchAptstssps............tsPu........TLuEllsptlss+Ghs.ssspWDAWRcs-spWhVplpapsutpsps.......AcWpas ......................M.cLplls.lpssuppllhpss.................u....ppahlslD-pLRsAltts...................................................................................................................phphphcs................................................sLsPREIQuRIRuGAosE-VAttuGlsls+VcRFAtPVLtERs+hsEhApsstsh+s............s.u.Push..........TLuEllspshsA+Gl.s.sslsWDuWRc-Ds...pWtlplpW.tuut..s..s..pp.........Acapap............................. 0 34 83 105 +11101 PF11269 DUF3069 Protein of unknown function (DUF3069) Pollington J, Finn RD anon Pfam-B_3783 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Gammaproteobacteria. 25.00 25.00 71.90 71.70 21.30 18.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.76 0.71 -4.03 12 138 2009-01-15 18:05:59 2008-08-14 16:08:23 3 1 138 1 25 67 4 120.10 63 83.47 CHANGED Vlsh-clPEpLh....shhsplhEssEtphpcuWsuLPASApslLsp.EpFHuhh.lupAahtlshlup.phs-hs-s.ss.pcpEYpu.lhs+lhcsslK-hlKpLKKARpDtshhpsh+pVht VlEFDpVPEuMa....cMVTSIHEVSEEsVREAWDoLPASAQNlLDNFEQFHALlSVSQAFAGLNVMEEFPTLNLPcpMoEE-K-tYRAQLLDQVLHNCVKDMVKQlKKARRDPILKR-FK-VF.A.. 0 2 6 17 +11102 PF11270 DUF3070 Protein of unknown function (DUF3070) Pollington J, Finn RD anon Pfam-B_3804 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 28.60 28.60 16.90 15.70 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -7.18 0.72 -4.18 20 37 2009-01-15 18:05:59 2008-08-14 16:09:35 3 2 1 0 37 37 0 32.60 43 23.32 CHANGED hAPsccPP...sE-sHEAP.sschssustpssssu hAPscpP....sE-sH.uP..Schtsu.tsstspu. 0 37 37 37 +11103 PF11271 DUF3068 Protein of unknown function (DUF3068) Pollington J, Finn RD anon Pfam-B_3769 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed. 25.00 25.00 25.60 25.40 23.90 21.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -12.03 0.70 -5.36 22 301 2009-01-15 18:05:59 2008-08-14 16:10:06 3 3 204 0 84 243 5 319.50 26 82.78 CHANGED Rh...hushLhulGshhllhAlllshalsutltclPlDlssss..lstsssssh.s.stss.pt..thps..s.............ppphsstcstssDhlshpsspslhc.............scspphlpshssphslDRpouhslss..............psthssssh..shpGl.a+FPFcsE+coY.aaDshhppshshsasup...-slsGh..psY+FpQpls.sshs.................sh.hh.tt................sstssshshhYsssR.shW........V-PhTGslVphpEp.pchhsssstp................hshl-h.chshs-colps.lspAcctpsplthhschlPhshhhlGllshlsGlhL ....................................................................hushhluLG.ssLlh.hAlllstasssc.ltclPL..DlstTh....hop...s......s...s........s........shss......s..h.h.st....p...h..s.s...ssshsp.............................ppphsl.tssus...sD..s.lslplus..oltRtp.................................tpssssll.Ahs-phs.lsR.couhslsssst......................stpsss..ssl.....t+-Glp.Y+FPF.cTE.+.+..o.Y.aFDshspcshs....hsass-......ccl..sGh...ssY+FpQs....l.s.sslut.............................................tst.sh.hshssth.........................................sspphshshaYs..spR..shW........V-PhoGslVctpEchpcaaupsstp..........................chshhph.phphs--TlpptlstAcst..........pcp..lthhuchlPhshtslGllhLlsGhh........................................... 0 20 56 75 +11104 PF11272 DUF3072 Protein of unknown function (DUF3072) Pollington J, Finn RD anon Pfam-B_3823 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.60 25.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.83 0.72 -4.31 13 81 2009-01-15 18:05:59 2008-08-14 16:19:39 3 4 79 0 40 88 5 56.90 53 46.16 CHANGED NsEKDPc-WsTGDEPMTGAQASYL+TLsEpAG..EshsssLoKA-ASchIDcLpppsGR .............hpKDPs-WsTGDEPMTGAQtSYL+TLuccAG....Eshss..sLTKA-ASc+IDcLpppoG+...... 0 15 28 34 +11105 PF11273 DUF3073 Protein of unknown function (DUF3073) Pollington J, Finn RD anon Pfam-B_3852 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 30.80 30.50 21.70 21.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.42 0.72 -3.44 29 430 2009-01-15 18:05:59 2008-08-14 16:25:40 3 1 424 0 111 261 50 67.00 49 84.81 CHANGED GRGRAKAKQTKVAR-LKYpSPsTDhspLpREL..uG.........sspspssss-shsDc.st......................cpYuc.s-t.c .GRGRAKAKQTKVARcLKYpSssTDh-pLpREL..uu........tstssp.sc.s.....-.-...s..hs..D.-.h.s-.................................tDcYsc.st..p...................... 0 34 82 103 +11106 PF11274 DUF3074 Protein of unknown function (DUF3074) Pollington J, Finn RD anon Pfam-B_3858 (release 23.0) Domain This eukaryotic family of proteins has no known function but appears to be part of the START superfamily. 27.50 27.50 27.50 27.50 27.30 27.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.31 0.71 -4.66 28 152 2012-10-02 19:24:03 2008-08-14 16:30:32 3 2 107 0 125 153 0 197.00 26 41.03 CHANGED WhuR+SlHc..................uhsapcacptLpsp.......+s-sEcpa.............lcslsutchlpph..................tst.spsphplhplpapFPh..sho...sRsFsslllshchs.................................ptsscpahllShPlsc....................pss.tpshlpGpYtSVEhlp.hs.................................................................................tpssslEWhMsTpSDsGG..slP+Whpcp....uhPpuIspDlstFLcWs ..........................................................WhuR+SlHc..................shsapcappsLppp.........+s-sEppa.......................htslsuhcplpph..........................................tssphplhplpaphsh.....shssRsFssLllosphs...................................ts.cpahllShPhpt.........................................ts.s.ppshlhGpYtSVEhlcpl........................................................................................................................................tpsstlEWhMsTpSDsGG...slPcaltcp....uhPsuIspDsshFlpWh................................. 0 33 68 106 +11107 PF11275 DUF3077 Protein of unknown function (DUF3077) Pollington J, Finn RD anon Pfam-B_3820 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.61 0.72 -4.02 19 142 2009-01-15 18:05:59 2008-08-14 17:03:34 3 1 33 0 59 171 1 73.60 36 84.52 CHANGED TsGtssFhpttt.pst.sLFRlpPGlPhpcAh-puShLhGhl+cLThcu.M...-sc.hhhhAu+YLSuMAKALhDDhElGhp ................t.Fh...t...tt..sLFplpsGlPhccAhppAStLhssspcLshcuhh....cs.ppp.hha....AuhaLutMAKALlDDhths........................ 0 0 0 15 +11108 PF11276 DUF3078 Protein of unknown function (DUF3078) Pollington J, Finn RD anon Pfam-B_3846 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 29.80 28.90 20.60 19.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.09 0.72 -3.81 36 294 2012-10-03 17:14:37 2008-08-14 17:08:44 3 2 198 0 84 291 141 93.90 33 25.50 CHANGED hsQsuFs.NWtuGGsssluus...hslshptNYc.+cchpWcNplphpaGlsptc..spc.......h+KosDplclsSphGh..c.ttspWYYShhhsF+TQFssGY ..............hsQstho.NWh.tGGps.shuhh...sslshptNYp...+p+htW-NplphchGhspspucs........................h+posDtlplsSphGh...p..ssppWYYohthpFpTQFssGY............... 0 31 75 84 +11109 PF11277 Med24_N Mediator complex subunit 24 N-terminal Coggill P anon manual Domain This subunit of the Mediator complex appears to be conserved only from insects to humans. It is essential for correct retinal development in fish. Subunit composition of the mediator contributes to the control of differentiation in the vertebrate CNS as there are divergent functions of the mediator subunits Crsp34/Med27, Trap100/Med24, and Crsp150/Med14 [2]. 25.00 25.00 25.80 28.00 23.60 23.60 hmmbuild -o /dev/null HMM SEED 990 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.89 0.70 -13.65 0.70 -7.05 6 199 2009-01-15 18:05:59 2008-08-14 17:12:07 3 4 83 0 82 183 0 558.40 40 94.03 CHANGED hKsspLKQhILpAWKERWoDhQWuINlKKhhP+GVohDlhNLA-ALLpQAhIGsusNPLlLSYLKaulsuphVSauuVLssISKaDcFsRchClpALLEhh-.hsDsloC+GKuEEsI.Ls+AhlShVaWLLQhhstss-+hp-st-h..ssttEphLptshpsL-+llpSsh.hullaIuKhEEsp.asslccphLclsp.lss...........lsssplcsphccssoL.psh.pMhslcucpl.cshhsolpsLIhlEshhN.ssDTQ.hVtQLhhlKRhp+hPhshhhhEIh+AChluL.pss-socEhhWsAFTFlKlPQlLtpL+thspstp.pD.......ao.DVspAFEhLLp.TPLLDhhDp+CsCsslphLLpEhsKhtLLoEspscpLsAKRsu...hsspLKps-.Nus.pP.NsshILRAEPslosILKTlsuDaSKs.EuLLGVLsphLSGpSLDLlLusAuspGKLKoFsp+hIphNEaoKp.lsGEhuKsAslRAhLFDlSFLMLsalsQTYGSEVILSEuusS....FFEpWlpsCMsEcsKshNPcp.shp.sDssKVEpLlshLNsSs....phKhsphKWcElCLoIPAslhcVL.AWENtsLSstsIpKIhDNIKu+lCShAVCAsuWLsAahpMlt.DEp.KPhsMIpQLsoPlsu...EphhQ..ah+ERlslMspIlc+MptDVhppsssp.Khtsth.p.....ppLsu+tPlcEthc-sa+sVLEKGWlss+uhahL-oLLphGGsaWhsspLVccLL+pphhcchsRsh-llaAIhpLDhppsTlsLLuallPhLLhcpuphcslsDP.uRsLAKLsVaChloohpop.tstsSutptp++RpchcDhspL.sLDshpsSthMRhl.....ussp--sshhuusGsc.....uhsSShSASpL+s........lsh+EPLppsLtsLFhlhSphluocphuP+s.FV.pFlp.hVEsucp.phuslLphhP.uhVppLlKlsuhsc.KV..lLplhDLslPhGRphAspsls .............................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 26 32 56 +11110 PF11278 DUF3079 Protein of unknown function (DUF3079) Pollington J, Finn RD anon Pfam-B_3866 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 21.50 21.50 24.90 42.80 20.70 19.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.91 0.72 -4.42 10 130 2009-01-15 18:05:59 2008-08-15 10:29:55 3 1 129 0 38 95 4 50.40 70 69.48 CHANGED MAKKFPLHPuHPERICWGCDRYCuADALACGNGS-RTQHPsELFG-DWhtau ....MAKKFPLHPpHPERICWGCD+YCssDuLuCGNGSsRTQHPsELhG-DWYch.... 0 5 18 29 +11111 PF11279 DUF3080 Protein of unknown function (DUF3080) Pollington J, Finn RD anon Pfam-B_3870 (release 23.0) Family Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. Currently this family has no known function. 25.00 25.00 48.50 48.30 20.40 19.90 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.10 0.70 -5.28 24 161 2009-01-15 18:05:59 2008-08-15 10:30:32 3 1 160 0 29 132 67 301.60 36 90.76 CHANGED hhcsYhpRLAslLDss...s.sh.shsshsl...Ps+Rp....hhhslsclsluLL-h.hpLppC.pLtpLIuE+NS.LGKVtsshpchpYphphlpuhppCl......p.t.tspslpspLpphhppKpppLshthhNslhsscphcpthohusphLshst.tsphuchhsAlpplsthhtt.........tttpsphssppltph.cpLpppchhGpLhhShpptsthLsssTphLppp.stpslCssspssschphLpNlFhphYltplQPYlApLspthppLts.ltpltpphs.h...hss.hptah................................sshappa+pustpHVphWQpLacpC..uhssu ........s.F-cYlpRlANV.pts................shs.ss..hss.sL.......PcKR-....Lhls.lssloIuLLDu.YpLRpC.uLFpLIAERNShLGKVQDthpcacYQlsLLpulpsCL..................ss..splspsL+spLhslps.KpppLsspaaNhLasS-shRpQLo.uopahstph....shu-ll.ALcpLsslppp.................hhsp...pslsopsLs..stQEsLcKp+llGclhaSLspuosaLcssTptLcsp.sssllCusp+ss........s+hpYL+NVFpp.YlcclQPYLApLDptYhQLsstLshhpp.ps.....hs......................................hpssappFRhAsppHspaWQpLFtRCtlslG........... 0 7 14 23 +11112 PF11280 DUF3081 Protein of unknown function (DUF3081) Pollington J, Finn RD anon Pfam-B_3884 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 64.20 64.10 22.50 19.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.60 0.72 -4.45 22 143 2009-01-15 18:05:59 2008-08-15 10:44:01 3 1 139 0 27 89 11 79.40 54 89.94 CHANGED cl-hpphLpla-pIpppGcp..p-st.....YhhpGlpAapDaDGYTlaLpspsVsLslhFHNoYcl-Y..................-pccphspFh++lpslh ...L-ssKlLpAYEsVMpNGoP....TEaG.....KlYEGIEAauDYDGYNlahRGNGVELKlGFHNTYHLsY..................EQEHL+DSFLKKLuhL.A.. 0 6 15 22 +11113 PF11281 DUF3083 Protein of unknown function (DUF3083) Pollington J, Finn RD anon Pfam-B_3898 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 21.00 21.00 22.70 22.40 19.80 17.00 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.09 0.70 -5.31 12 45 2009-09-11 05:40:26 2008-08-15 10:44:43 3 1 40 0 16 36 16 301.00 55 87.65 CHANGED ssYQpLSctlFpLsEcc-LpNlHlIANDKLPhVRapsEuYshpTscQllFFYNPtYHEupphahsssh+A+KlRllFLATG--lRuNSApFHp+VpphlpcLtspLslpp.plKlRDHQHLoYDlFAKuKGsKpoYGaKLRuIssRYpuRpCsLPpsaouloYsTlslPlsR+l+ppl.sp.psp-.assLYpplpDsFhpAspsppLs+sAMlANGlsPlVRNSch-plssssElQMlGFDPptppsphls+WsuscLV-olpFllsAuppDps-tGaGRFhNpVEpAl+shss-lsl-tp+p-lhlRFHQHISYp ......................sCYQsLSRhlFsLu-pa-L+NVHVIsNDKLPVVRaHsEAYChpTsEQlLFFYNPtYHEAppLasp-saRARKlRIlFLATG-DIRuNSAsFHh+VpclLscLhspLPlpp.plKlRDHQHLoYDLFAKuKGsKEoYGYKLRuIsPRY+ARpCpLPcshuuLTYVTVoLPLSR+LKptlhs-.sssD.FsPLYQ+LpDsFlcAsss+pLs+lA....MlANGLTPLVRNSKa-+l-upsElQMlGFDPsssEpQllp+W-uspLVEssHFsIVAuscDpcDtGaGRFMNpVEsAL+sFssElulD.-R-DLlVRFHQHISYp................ 0 4 6 10 +11114 PF11282 DUF3082 Protein of unknown function (DUF3082) Pollington J, Finn RD anon Pfam-B_3896 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 25.90 25.60 24.20 24.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.65 0.72 -3.92 20 104 2009-01-15 18:05:59 2008-08-15 10:45:45 3 1 98 0 52 104 104 81.20 36 44.12 CHANGED cpuPLshLSGuloSuhlualsahLopplsshFAt+PsphSsslsQsIuoAl+TLllGhshLATFsFAFlulGLhllhlpsLh ...............................sPlpsLhGulhuusluhhhYtLosslsssFut+sls...s.s.h.sppIusslRTLlhGlshLATFlFuhlulGLhllslQlhh...... 0 13 36 48 +11115 PF11283 DUF3084 Protein of unknown function (DUF3084) Pollington J, Finn RD anon Pfam-B_3912 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 50.80 49.90 19.60 19.30 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.63 0.72 -4.12 16 146 2009-01-15 18:05:59 2008-08-15 11:08:44 3 3 145 0 52 150 102 78.60 44 18.38 CHANGED uGalLIlulLlLGGlIATlGDRLGoKVGKARLSlFsLRP+pTAVLlTllTGolISAhTLulLhssS+pLRpGlFpl.-pI .....ualLllslllLGGlIAhlGD+lGo+lGKpRLoLFsLRP+pTAlllTllTGslIuuhT.lullhhsScslRpuLFph-p........ 0 16 36 50 +11116 PF11284 DUF3085 Protein of unknown function (DUF3085) Pollington J, Finn RD anon Pfam-B_3922 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 25.10 25.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.92 0.72 -3.79 24 126 2009-01-15 18:05:59 2008-08-15 11:13:49 3 1 107 0 43 108 2 88.70 37 66.35 CHANGED sslhLVtDcGVYlhusst..hs..sGp...hlsYApGCsPcts..--WachtRpthGGDDhsEhl-s..shlpphlpss.c...Lplphossplplhs ...stllLVKDpGVYlhuptu.cts......sGp.....hlAYAtGCsPcsD...--Wa-hucpphGGDD..FuEhlcs..shhppllpssts...Lhlph.osoplplt.s......... 0 4 22 34 +11117 PF11285 DUF3086 Protein of unknown function (DUF3086) Pollington J, Finn RD anon Pfam-B_3929 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 23.60 23.60 24.20 96.50 23.00 23.50 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -5.59 18 73 2009-01-15 18:05:59 2008-08-15 11:19:56 3 2 71 0 28 88 163 274.90 53 76.67 CHANGED llplALp-LppRRpuLptEIEcLEpRKcplEpEl+ooFuGpSDsIA+RVKGFQDYLsGALQDLApSsEQLELVsQsshVpPSPLD....ppusssssss......tssssusupsFps-pcLIRptLppapppPDaYAsPWpLRRSlEshcsEhl-DWFFNQGGRGA..ShGSR.+NlLVuuAlIuILu-LYGDpFQsLVLAupPERLGEWRRGLQDsLGLuREDFGPsSGIVLFERuDALlERADRLEEcGElPLIlIDsAEcsV-lslLQFPLWLAFAusPp..Ehhp--c ..................................lp-uLp-LppR+psLphplEpLE+R+ccIcpEh+ooFAGtSpslAhRVpGFpDYLsGuLQDLutuAEQLELls..psshhpPu..Phs...........................ptttsstt............ssss.hsspsFpsppchI+phL-papppPDaYusPWpLRRohEshcsEhlpsWFFsQGGRGAlpShGSR.pNlLluSAlISILscLYGDchpsLlLAssPERLGEWRRGLQDsLGluRpDFGPspGIVLFEps-ALlp+ADRL.ccsplPLIlIDsuEcplslslLQFPLWLAFAssPpph.p..t........................... 0 3 18 26 +11118 PF11286 DUF3087 Protein of unknown function (DUF3087) Pollington J, Finn RD anon Pfam-B_3938 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 22.60 22.60 22.70 22.80 22.50 22.20 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.82 0.71 -5.01 25 105 2009-01-15 18:05:59 2008-08-15 11:24:09 3 1 103 0 37 106 10 163.40 38 96.62 CHANGED MpLppIDKppYR+phNhlhlshlusLslLSLshushLIuLFG.................stusuNF+aNLlGVlluhllsuslLp.ph+s+saMpElhYVWcLKQhhN+IhRKL++lKsAAspsD...hsAlhlLpFYYsu.+QlapLDsNTlTlsslpp-lspLppthtphuLslstcpa-sphL ..MpLppIsKphYR+phNhlhlshlssLslhSLshushLIsLFG.................spusuNFHhNLlGVllulhhssullp.plKs+PaMpElhYVWcLKQhhN+IaRKlpplKAAAp.psD...ssALhhLpFYYsu.+QlapLDsNTlTh.s.sls..p-hspLpphhtphuLsLshspacsshL.............. 0 4 14 26 +11119 PF11287 DUF3088 Protein of unknown function (DUF3088) Pollington J, Finn RD anon Pfam-B_3952 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 30.90 64.70 24.60 24.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.28 0.71 -4.61 19 111 2009-01-15 18:05:59 2008-08-15 11:28:02 3 1 87 0 36 105 3 111.40 45 85.00 CHANGED s+DhLFLLcPGFtDsut.PGt.FaCscssslEGLLusaPsLtsplDVc+lsasRPRtslltLlG-spQSlPVLlLusspssssss...tstsspRFlsDsccIhchLupRaGhPcs ...........+DhLFLLcPGFtDstp.Ps..FhCsculslEGLLushPsLpsplDVc+lsasRPRcsllAhlGEscQShPVLVLus..stss...ssu...pshsspRFlsssccIhchLuppaGhs+.h............... 0 10 17 28 +11120 PF11288 DUF3089 Protein of unknown function (DUF3089) Pollington J, Finn RD anon Pfam-B_3953 (release 23.0) Domain This family of proteins has no known function but appears to have an alpha/beta hydrolase domain and so is likely to be enzymatic. 23.40 23.40 23.40 23.60 22.80 23.30 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.58 0.70 -5.17 25 100 2012-10-03 11:45:05 2008-08-15 11:32:33 3 2 78 0 52 106 398 198.30 29 55.10 CHANGED tssVFaVaPToa...hstu..tWNuslsssps...thtchhltspAusF.spssclaAP+YRQAoltAa.....h.sspsp..ustA...hchAYsDVtcAFctaLsphssuRPllLAGHSQGuhhLhcLL+ccIuGcP.lpcRllAAYlIGhs.lsl...-ts.sshtslPsCpsssQsGClloasoa..t........ttt..s.su.c..upsts.spphlCsNPlshsssss .............sDVFalaPTs....h......ssss......h..Ns....sl...ssttt..tthspthlttpAusF.ssssplaAPhYRQssltsh...................h.tttss......spps.....hp.hA.YsDVtpAFchYLpph.NpG.RPhILAGHSQGuhhlhcL.L+..c...phssps.lpcRlVAAYllGhs.lss.....pth.tth.slshspssspoGClloasoh...s........................s.t..ths...........ttthhChNPhshtss.s...................................... 0 23 42 45 +11121 PF11289 DUF3092 Protein of unknown function (DUF3092) Pollington J, Finn RD anon Pfam-B_3988 (release 23.0) Family This viral family of proteins has no known function. 18.70 18.70 20.00 19.80 17.60 17.50 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.92 0.70 -5.18 4 77 2009-01-15 18:05:59 2008-08-15 13:33:13 3 1 69 0 0 37 0 267.00 88 99.20 CHANGED MDLFMSIFTLGuITRQPuKIENASPASTVHATATIPLQAShPFGWLVlGVALLAVFQSASKVIALHKRWQLALYKGlQLVCNLLLLFVTIYSHLLLLAAGMEAQFLYIYALIYILQIlSFCRFIMRCWLCWKC+SKNPLLYDANYFVCWHTpNYDYCIPYNSVTDTIVlTSGDGhopPKLKEDYQIGGYSEDWHSGVKDYVVlHGYFTEVYYQLESTQloTDTGhENATFFIasKLVKDsspVQIHTIDGSSGVVNPAMDPIYDEPTTTTSVP ................MDLFMRFFTLGSITAQPVKIDNASPASTVHATATIPLQASLPFGWLVIGVAFLAVFQSATKIIALNKRWQLALYKGFQFICNLLLLFVTIYSHLLLVAAGMEAQFLYLYALIYFLQCINACRIIMRCWLCWKCKSKNPLLYDANYFVCWHTHNYDYCIPYNSVTDTIVVTEGD.GISTPKLKEDYQIGGYSEDRHSGVKDYVVVHGYFTEVYYQLESTQITTDTGIENATFFIFNKLV.KDPPNVQIHTIDGSSGVANPAMDPIYDEPTTTTSVP................................... 0 0 0 0 +11122 PF11290 DUF3090 Protein of unknown function (DUF3090) Pollington J, Finn RD anon Pfam-B_3954 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 19.50 19.50 19.70 19.70 19.00 19.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.18 0.71 -4.79 15 303 2009-01-15 18:05:59 2008-08-15 13:34:29 3 1 301 0 84 177 117 172.20 51 88.62 CHANGED asFDsP-RFVsGTVG.PGsRTFaLQARsGuRlVSVuLEKsQVulLAE+ls-LLDElsR+tG..sslPss.ssshsDscPL-tPl-EEFRVGThuLuWDscsppVVIEshuls-..............-sDsc....shs-ss-ssDsLRVhLssssARAFAcRApcVVuAGRPsCPLCGpP.LDPcGHlC ........hacpPDRFVsGTVGQPGsRoFaLQA.tc.s.uRVVSVhlEKpQVshLA-+lspLL-Elsp+hG..s.VPPs.ssp.hcDLu..PLcs...Pl..-tEFRVGTMuLuWDsEspplllEh..hAlo-s.................thDsu........llhs-sEpuPD.ulRVhlTsppARpFusRuppVVu.AGRPsCPLCupP.LDPE.GHlC.................... 2 30 66 80 +11123 PF11291 DUF3091 Protein of unknown function (DUF3091) Pollington J, Finn RD anon Pfam-B_3979 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 31.00 31.00 21.10 21.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.25 0.72 -4.05 11 57 2009-09-11 07:39:49 2008-08-15 13:35:06 3 1 1 0 0 77 0 93.00 33 58.13 CHANGED IcpLp-EIc+DYssaSccslEclp+ppspLcphasp.pSctpNhTCs+PcNIstcDlpsLpshItcpppshhshshacL++caLhplpcsLpNsscc.SE ........hcphccEIc+cYssasccVhEclhppsspLcphasphQSctpshTCs...........cPcNIstc.DlssLpshItp.phshh.hshhpLcpphh.plhcphpN.spp.u.............. 0 0 0 0 +11124 PF11292 DUF3093 Protein of unknown function (DUF3093) Pollington J, Finn RD anon Pfam-B_4007 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. Some members are annotated as alanine rich membrane proteins however this cannot be confirmed. 21.30 21.30 21.30 22.20 21.20 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.73 0.71 -4.39 23 347 2009-01-15 18:05:59 2008-08-15 13:41:23 3 2 338 0 90 226 64 143.40 38 85.77 CHANGED shYcERLWsPhWWWlhushl...sulhAh.lshus.slhsh.lshAllhslusslhLhh.....upscIcV.......ssspLhsGcA+LPsshlucstslsssstputhGppLDstAallhRuWlsshVhlsLsDPsDPTPYWLlSTR+P-cLluALp ......................hYpERL..WsPhh..WWlluhul...hALh..sh.shh.uhs.u.Lssh.lshsll.ssl..ssssllth.....Gp..s+IpV.......ssttLhsGcAplPhshluRstsls.upstpushGpphDsuAFllpRsWltshlhlhLsDPsDPTPYWlVSTR+Pc+LhuAl..................................... 0 26 66 86 +11125 PF11293 DUF3094 Protein of unknown function (DUF3094) Pollington J, Finn RD anon Pfam-B_4017 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 20.90 20.90 20.90 21.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.57 0.72 -4.50 13 47 2009-01-15 18:05:59 2008-08-15 14:02:50 3 1 47 \N 13 42 25 53.60 58 92.01 CHANGED oSRLSPEDQp+V-pYLuuPQHQVERpPFRPWhLhllVLAVsIGLGLLSRLLShLs ..oSRLsPEDQp+V-pYLpuPhHQVERtPFRPWhLhhllLAVVIGLGLLSRLLShLs...... 0 1 5 9 +11126 PF11294 DUF3095 Protein of unknown function (DUF3095) Pollington J, Finn RD anon Pfam-B_4020 (release 23.0) Family Some members in this bacterial family of proteins are annotated as adenylyl cyclase however this cannot be confirmed. Currently no function is known. 20.40 20.40 23.20 22.50 18.00 17.30 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.20 0.70 -5.43 25 117 2009-01-15 18:05:59 2008-08-15 14:20:26 3 2 97 0 48 132 14 331.50 38 95.68 CHANGED FYssLPs..hstFpplsDsstYpPLP-DWslsluDIVsSTtAIusGRYKsVNMsGAusIuAlhNuh....ssh-hPFVFGGDGAshAVPsshhcpuRpALussptascp-asLsLRlulVPVuslRspGh-V+VARausSsslsYAMFuGGGLuWAEsphK.......sucatlssssssst.PDLoGLSCRWspI..PucpGpIlSlllhPsssssstsattlhccllshs-p...sscuuHPls.ps.plphsspuLshEA...RhptG.....t.hhhppltllh.slhuhlhh+p....shphssasscpYhpplspNoDFRKaDDuL+MslDsss-ptcplcshLppupspGhl+YGLHtQspAlMTChVsoshpccHlHFlDGAsGGYAtAAppLKs ....................Fattlsh..hptF.ptlhDss.YpsLP-sWhlulsDIVsSTtAIupGRYKsVNhsGAusIuAlhNuh......sthchP..FVFGGDGAshAlPsshhttAcpALussttasppphsLpLRsulVPVsslRspGhDl+lARatsStphsaAMFsG....GGlsaAEtthK.......tupahl...sss.sst.PDLoGLSCRWssl..suppGtllSlllhPsssssstthttlhpcllshhct....p.ctu+Pl.st.p...t.thphs...p..slthEu......+stts......h..hh...hthhhlhhtshhhhhhhph....th...hsth..s..sppYhpplstsoDFRKaDDuL+hslDssspphpplpthLptutttGhhpYGlHppspAlMTChVsssh.pcHlHFlDGAsGGYAhAAttLK............ 0 9 22 27 +11127 PF11295 DUF3096 Protein of unknown function (DUF3096) Pollington J, Finn RD anon Pfam-B_4028 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 20.70 20.70 21.00 20.90 20.00 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.73 0.72 -4.45 19 138 2009-01-15 18:05:59 2008-08-15 14:25:07 3 2 126 0 65 135 5 38.60 59 76.26 CHANGED PlluLIAGILILlhPRLLNalVAlYLIllGLlGLhshth .....PlluLIAGILILlhPRLLNYIVAlYLIllGllGLhsh..h..... 0 12 33 47 +11128 PF11296 DUF3097 Protein of unknown function (DUF3097) Pollington J, Finn RD anon Pfam-B_4031 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 46.70 46.60 20.80 20.00 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.73 0.70 -5.38 18 374 2009-01-15 18:05:59 2008-08-15 14:29:13 3 2 369 0 98 278 7 265.80 56 95.51 CHANGED DcYG.pDVLAsstRsttht...ss-lPsEhGlVVE-suoGFsGAVVpsEput....VpLEDR+G+pRsFPLusG.FLl-GpPVsLstP..tsusAp.......PsR..TASGSlsVtutcARVApASRIaVEGpHDAELVE+VWG-DLRlEGVVVEaLcGVDDLsuhlAcFpPGPGRRlGVLVDHLVsGSKEoRIA-uVst.....sHVLVsGHPYlDIWQAVKPpplGlcuWPsVPRGp-WKpGlCptLGW.tu-pAD...uWpRlLupVcSapDLEPsLLGtVEcLIDFVTs ..............YutDlLu........st.......h..Rp......p..................ss-hPsEhGhVVEDss....oGFVGAVVthEpuh....VsLEDR+G+pRsFPluPG.FhlDGpPVsLstP....pt..usAu......................pR..TASGSlAVsutcARVAtsSRIaVEG+HDAELVE+VWGcDLRlEGVVVEaLsGlDDLsslVA-FpPGPGRRLGVLVDHLVsGSKEoRlA.-p.Vpp.....spaVLVsGHPalDIWQAVKPpRlGlpsWPcVPhspDWKpGlCctLGWPtu......stuD...uW.p+lLupV+sa+DL-PsLlGcVEcLIDFVTt................. 0 30 74 94 +11129 PF11297 DUF3098 Protein of unknown function (DUF3098) Pollington J, Finn RD anon Pfam-B_4061 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 29.70 28.00 24.60 23.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.27 0.72 -4.26 28 232 2009-01-15 18:05:59 2008-08-15 14:34:55 3 1 213 0 70 203 188 67.00 45 81.64 CHANGED hsFsKpNYlllhIGhslIAlGFhlMuGtsSss......tlauah+lplAPhlllhGasl.laAILhpP+ ......hsFsKpNallLsIGhAllllGFlLMoG.......suSs.......-.ssF.p.s.-.IFShRRI+lAPlVsllGFlhhI.YAILh+P+... 0 28 55 67 +11130 PF11298 DUF3099 Protein of unknown function (DUF3099) Pollington J, Finn RD anon Pfam-B_4064 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 23.20 23.20 23.40 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.61 0.72 -4.23 24 391 2009-01-15 18:05:59 2008-08-15 14:43:59 3 2 356 0 114 278 53 69.10 35 54.99 CHANGED hITsAstohp--h+sRh++YslhMulRhssllLAslsh.....ualpllhls..sulsLPWlAVllAN.spssppppcs ...........hITsA.t.u.t.phct.R...+cYhhhMulRsssllhA.slsh.h.....shl.ulshls..sulsLPalAVllAN.sts.+psp..t............ 0 34 89 109 +11131 PF11299 DUF3100 Protein of unknown function (DUF3100) Pollington J, Finn RD anon Pfam-B_4068 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 20.20 20.20 21.80 22.80 19.90 19.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.52 0.70 -5.15 27 247 2009-01-15 18:05:59 2008-08-15 14:51:52 3 1 215 0 74 233 4 231.70 44 74.12 CHANGED llluE.hIGshplslGsu.sllLLPhlaAlllGhhlshthh........phlsccptphAusllhlulh.LlA+hGsslGPsl.pllsAGPALlLQEhGpl.GTllluLPlA.llLGh+REuIGATaSIuREPslAlIu-KYGhcSPEG+GVLulYlhGTlFGslFholLAuhlushsh.FcPhALAMuuGVGSGSMMuAusGuLsuth.P.chscpIhAaAuASNLlosssGhYhslFluLPLsphhYphl ..................lluE.hIGhhphslG.u.pllLLPhlaAlllGhhlu.hthh........phlsccphphAuslltlulhhhhAKhGhslGsslspllpuG.........ALllQEhGpl.GTllluLPlA.llLGl+REAIGATaSluREPslAlIu-+YGh-SPEG+GVLuhYlsGTlFGslahullAuhluu.hsh.FcPhALAMuoGVGSuSMMuAAsuuLsshh...P.-...hucp.lhAhAAASNLloshhGhYhslFluLPLspahYch.... 0 23 50 62 +11132 PF11300 DUF3102 Protein of unknown function (DUF3102) Pollington J, Finn RD anon Pfam-B_4016 (release 23.0) Family This family of proteins has no known function. 20.10 20.10 20.10 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.57 0.71 -4.12 14 260 2009-01-15 18:05:59 2008-08-15 15:05:59 3 5 129 0 69 230 14 117.60 29 44.05 CHANGED RTPhlIAuEINhI+cQopKhLLtsAlEIGRRLpEAKuLlPHGEWhcWLcESVuYSppTAspLMplacEYGphh................so.ssp-u..N..............ttshssLsYTQALlLL...GlPEEER-pFls-pDVps.MopRELpQAVcE+ ................................EIp.h.pp....stp....hh.shlEIG+RLtcsK.....p.h.l..sHG.....-atcWLcpplsaSppoAp+hMplhcca.us.....................t...........t.........................t.h.pL.hppslh.Ll........sl..s-t.-c...phh........t..................-........l.tp...Moh+ELptslpp................................................................................................... 0 28 52 58 +11133 PF11301 DUF3103 Protein of unknown function (DUF3103) Pollington J, Finn RD anon Pfam-B_4046 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 25.00 25.00 30.40 29.20 22.30 19.70 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.08 0.70 -5.75 15 133 2009-01-15 18:05:59 2008-08-15 15:14:23 3 1 126 0 20 86 3 341.20 63 88.23 CHANGED KRpLAhpLScpYsplcssLpppIsphpLssslspLlppstsss....h.pphppA-pslRphKGlssho-s...............LLplRLADssMLsuWQpGc.sPLFAFEPsGDDcsWpYIEAYDltGplHhLDVYplP-pPVhVVDssupc-l+AGLtsM+sEhs............t...sspp.phtstusts....ppt.spupspsIsTTlLKKIRLpDDpEPWISGKAEIYAIVTGVsPSRDEPsLDlVEMPYLDYDcpsYYPNQllIaWsRYRWGAADllLMEpDDGTNYKpLAphLlcAAEplLKsIPDPEVQGYAIIsQITscIIcslPDuhhTNDDDFVDVaYTLhpspsYsDHsGAuGNAssTFsPLTIsPT ...........................................................................................KRpLApphSpsYAshtpoLKoQIospsLSlslS-Llcs.sPss-......hSpQLppA.DpslRolKGIs..paT-p...............LLQLR..LADsoMLppWQpGp.SPLFAFEPS.G.sDcsWQYIEAYDVYGQIHQLDVYQLPDVPVFVVDsDSuhELKAGLQAMRAEMp+L..........ttssplsspcSs.uhcsuspo....hspuusu-ssPISTTVLKKIRLpDD+EPWISG+AEIYAlVTGVDPSRDcPTIDLl-MPYLDYDcQDYaPNQllIHWsRYRWGAADhILMEQDDGTDYK-LAKpLVcVAEEVLKhIPDPEVQGYAIIsQITuKII-AIPDGVLsNDDDFVDVFYTLMQDTpYsDHPGAsGNAsATFEPLTI.PT................................ 0 3 5 12 +11134 PF11302 DUF3104 Protein of unknown function (DUF3104) Pollington J, Finn RD anon Pfam-B_4053 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 20.00 20.00 20.00 22.10 19.40 19.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.34 0.72 -4.60 20 52 2009-09-10 14:59:42 2008-08-15 15:35:57 3 1 25 0 24 55 153 68.90 44 75.02 CHANGED shFLuV+sGDhVlVpsss..tsspttpp.sWWMGpVlpspGG....ARsPpssoLFQVADVDoGhI+WVNADpVo+llhshc .....PhFLtV+sGchVlVpppp..........t.sWaMupVlhspGG....ARsPcssoLFQVADVDoGhIpalNAD.Vo+Il.p..s...... 1 1 7 14 +11135 PF11303 DUF3105 Protein of unknown function (DUF3105) Pollington J, Finn RD anon Pfam-B_4062 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 25.00 25.00 25.20 38.20 23.40 24.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.80 0.71 -4.31 37 224 2009-01-15 18:05:59 2008-08-15 15:41:28 3 4 181 0 135 235 28 128.10 34 48.60 CHANGED ssH...lpts..lsYs.....psPPsuGsHsshWhsCu..hY..spsl.sppsVHsLEHGAVhltYcPs.lsssplppLpphsps.ts............hsllSP.....hss.hssPlslsAWu+pLpl.css--splppFlppa.........hpuP..ptPEsuAsC .....................t..H.hphs..lsYs.....psPPsuGsHss....hWtsCst.lY..spsl.sEphVHuLEHGAVhlsYcPs..hsssp...lppLpphlpu.ts.......................hhlhSP..........h.s.....cs..PlsLsuW...........G+p..............Lpl.cssc.....D...tclspFlppa............hpss..phPE.su.C............................................ 0 43 90 126 +11136 PF11304 DUF3106 Protein of unknown function (DUF3106) Pollington J, Finn RD anon Pfam-B_4069 (release 23.0) Family Some members in this family of proteins are annotated as transmembrane proteins however this cannot be confirmed. Currently no function is known. 23.10 23.10 23.10 23.70 23.00 23.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.83 0.72 -3.63 30 277 2009-01-15 18:05:59 2008-08-15 15:48:36 3 2 230 0 106 262 41 94.00 27 48.62 CHANGED sWscLossQppsL.uPLupcWss.hsstp+c+WlplAppasphoP--Qp+hppRMpcWspLoscQRppARpsapph+pLs....Ppp+p...ppWctYQpLssEc+ctLAtpt ...........................WtpLs..pp.hL.tsht.pWst.hs...p+p..+hh.p.hutca.phoPppppc.hptRhs.cWtphoP-pRctsRpp.ap.p.h.+pLs....spp+p......ppap.ta.ppLs.tp+pthtt..t..................... 0 26 67 86 +11137 PF11305 DUF3107 Protein of unknown function (DUF3107) Pollington J, Finn RD anon Pfam-B_3881 (release 23.0) Family Some members in this family of proteins are annotated as ATP-binding proteins however this cannot be confirmed. Currently no function is known. 21.00 21.00 21.00 25.60 20.90 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.37 0.72 -4.10 26 426 2009-01-15 18:05:59 2008-08-15 15:52:41 3 1 423 0 109 263 78 73.40 43 94.06 CHANGED MElKIGlpsssRELslpSsQos-EVpphVu-ALssss.GlLsLoD-KGR+hlVPssplAYVEIGssssR+VGFus .MElKIGlpsusRElslsos......p...os-..-lcptlspALuss.s....ulLs.LTDcKGR+hLlPuspIAYVElGss.ssRpVGFG....... 0 33 80 101 +11138 PF11306 DUF3108 Protein of unknown function (DUF3108) Pollington J, Finn RD anon Pfam-B_3856 (release 23.0) Family This is a bacterial family of putative lipoproteins. The structure for Swiss:Q64U78, PDB:3fzx, the first structural template for this large family including several homologues in the human gut microbiome and in metagenomic datasets, folds into a beta barrel that topologically looks like a small-scale porin (such as FepA). Swiss:Q64U78 is a putative exported protein, and this fold is of the YmcC-like type, with a predicted signal peptide SpI cleavage site AGAMA|QNQDC, and a Phobius server prediction of non-cytoplasmic localisation for amino acids 21-236. The possibility of it being a membrane protein can be ruled out by the hydrophilic nature of the solvent exposed surface outside the barrels. Analysis of sequence conservation suggests that an area near Glu172/Trp206 is potentially interesting. These two residues are also conserved in Dali hit PDB:2in5, a hypothetical lipoprotein classified as a new YmcC-like fold in SCOP (SCOP:159271, with a 12-stranded meander beta-sheet folded into a deformed beta-barrel) despite large structural differences between the two structures, suggesting similarity in function. 27.00 27.00 27.00 27.10 26.80 26.60 hmmbuild --amino -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.52 0.70 -4.71 135 1119 2009-01-15 18:05:59 2008-08-15 15:57:36 3 3 918 1 417 1035 746 234.00 15 85.25 CHANGED hlshhssssssusps........................................................stphpYplph.tsh.....thu...pushph......hpss....ttaphpspspssul.......h..hptthts....hhhst.tth....stpattptppsshppptp..hpFshp.ptpsh.................hptt.tp.sh.tsh...D..huthhhhphthtt.sh.........phshslhsscch.hphphph.........hup.Eplp.sstGp.hcslphp..................ttthhpp.ppshphWhu.......ss..hl.P.V+lcttt.........hG.shphpLsphp ........................................................................................................................h.......s..s...ttt..................................................................t................shphpYphph..tsh..........phu......pushph........tpss...................tsaplshp.sps.sth...................thhs...thth.ss..tslh...Ptpappptpt.tttpp.t.p....hpFshs.stpst................ttptpt.tphshtt.ssh.....Dh..hohhhpl.tt.htsssh.............shshtl....h.csc.ph..ph.p.hph.........hGp..Eplp..s.....s..s.G....p...hc..sl.....+hp.......................thhpp..ppp..hphW.hu........sshshl..P.V+lphhpt.......G.phph.l.t..t.......................................................................... 0 129 280 358 +11139 PF11307 DUF3109 Protein of unknown function (DUF3109) Pollington J, Finn RD anon Pfam-B_4077 (release 23.0) Family This bacterial family of proteins has no known function. 20.90 20.90 22.80 20.90 20.60 20.70 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.26 0.71 -4.78 28 212 2009-01-15 18:05:59 2008-08-18 12:42:32 3 1 210 0 65 204 121 181.80 49 92.84 CHANGED MlplGcslVSc-llcppFlCsLssCKGsCCVEG-hGAPLcpcEstlL-clhscV+shLpcculcsIccpGs.lpstpG-h.Tshl.su+ECsassatccGhshCuIEpAYppGhlsacKPlSCHLYPIRVp+hsshsAlNY-+WplCcsAppLGcEhpVPlYcFlKcALlR+aGcsWYp-Lcph ....................MlQlscslVS.DllcccFlCs..LssCKG...tCC.lEGDAGAPl-.-ElthLEclhstlh.s.Lsscutt.sI-cQGsshpD....p.-GD.lsTslV..s.s+-ClFssat-..........c.....G.....hshCAIE.+.A.Y.cpG.cscah.KPlSCHLYPI..Rlp..ca.s.s.ap.AlNYc.RWc.lCcsAsthG+chslPVY+FLKEPLIR+FGc-WYpELc..h.............................. 0 23 50 62 +11140 PF11308 GHL1-3 DUF3111; GHL; Glycosyl hydrolases related to GH101 family, GHL1-GHL3 Pollington J, Finn RD, Naumoff D anon Pfam-B_4091 (release 23.0) Family This family of bacterial and lower eukaryote glycosyl hydrolases is related to CAZy family GH101, and is made up of sub-families GHL1-GHL3. In the example Swiss:C02A26, the substrate-binding Asp is residue 596, the nucleophilic Asp is residue 706, and the proton donor Glu is residue 747. 25.60 25.60 25.60 28.20 23.40 24.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.10 0.70 -5.09 24 306 2012-10-03 05:44:19 2008-08-18 13:05:20 3 2 258 0 36 239 5 281.20 36 43.87 CHANGED sLpp.hGlc+halth.DsWttuha...pP-hlssuccuG................YLhusaDpYtshh....pDtahsAp......htcptulpptDGohhtthp..usGhhh...sstshsaV+pphsclhp....ththsuhFLDV.uss.sp-sa........sscHhhscppshps+ptphpaltpc.sllhGSE-GsshsspslsFsH.........asst-hhtpcpus.ahG........hP...........................lPLYphVaHDslIs..........s.........Whhsp.ph.....ss.ppchhLhslLasssPhlphs.ts.hth....p.......htphhctpphhpshHcplsppchssachlststhlQ............pop ....................................hpp.sGlp+hWlGL.ssWt.uhh...pPphVspAcphG................YLlGsYDSYpohht.u.s.csW.TAph......ssphh-pssl.pptsGph.tGFh....upGhhLN.........P..s..hths.V+pRhpcIlp....hspaNShFlDsDuTu.hh-DY........pscc...hssppp.lpAt.pRhpalsp.p.shllGSEsGNshsspslsFAHGhpo.s.htWsDpcM+ps+pSsYYlGtaa....stthPth..ah.K.s.lKt.a+......plhhsP.YpVPLYphVapDplIo..........oa.......+Wt.solKh.....ps.hssR.LhthLaNsPPhhHLs.csphpp..........+.............hpplpca.psat.hHcphhccthssFpaLscpt.VQpo.............................................................................................................................................. 0 16 23 29 +11141 PF11309 DUF3112 Protein of unknown function (DUF3112) Pollington J, Finn RD anon Pfam-B_4107 (release 23.0) Family This eukaryotic family of proteins has no known function. 23.30 23.30 23.80 23.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.06 0.71 -4.44 32 153 2009-01-15 18:05:59 2008-08-18 13:10:15 3 2 86 0 121 150 3 176.40 30 42.87 CHANGED PhsG.pphFthhhhslYsllsslllh....llsusl..hYhLs.ptappt+plhhsuulhlhlashsslsllslua.hlPpp.......................h.IESFu.h...................................................phssphhllhhoolllhluuhh.......................RssssFp..........t.hsps.u.htutsshYlh.hshElllsllYllsRlDLRFY .............PhhG.pphhthhhhslYhhlsssllh....llsusV.thahL.s..psh.pth+sl..hhuush.lhlhuhhslsllsluh.hh...Ppc..............................lEpFu..h.......................................................................phpsphhllhhoohLlhlGAsh.......................RssssFt..........p.hsps.uhhps+ssaYlh.aslEllVshhYhluRlDhRFa.................... 0 25 60 105 +11142 PF11310 DUF3113 Protein of unknown function (DUF3113) Pollington J, Finn RD anon Pfam-B_4134 (release 23.0) Family This family of proteins has no known function. It has a highly conserved sequence. 25.00 25.00 56.30 56.30 23.10 22.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.00 0.72 -4.29 3 239 2009-01-15 18:05:59 2008-08-18 13:28:53 3 1 165 0 3 60 0 59.70 73 98.36 CHANGED MQQQAYINATIDIRIPTEVEYpHFDDVDKEKEsLADYLaNNPDELLKYDNIsIRslslEV ..MQQQAYINATIDIRIPTEVEYpaasDVDKEKEsLADYLaNNPsELLcYDslpIRslslEV 0 3 3 3 +11143 PF11311 DUF3114 Protein of unknown function (DUF3114) Pollington J, Finn RD anon Pfam-B_4178 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as cytosolic proteins. This cannot be confirmed. 25.00 25.00 26.20 25.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.77 0.70 -5.02 10 265 2009-01-15 18:05:59 2008-08-18 13:51:55 3 3 200 0 27 196 0 253.50 36 72.05 CHANGED lGSstFppLWpttptpsss....KLLphlLshlcMPpELoG-LccsppL.....lscFSs-LuP+csFW+phuplVQpAFPsssLop...psp..lp++lHQFRYlISoQQAQaVRcHa+..ptGMTDupALApYLpt.............p+.sssYshhESARLHNKht...h..cssphl.YPDspsp.h.....NhKlLlsFHoEFILDppGpFLNplDs....................EthopNGllNGASFNYus.........+....NsspHtpLDVcPsphaDPtFRccshcu....F+SP..........p.pchphuahspKuhY ..........................................................................lGSssapplaphpthp.pt...........clL..hhthlth..cLuuplcppthL......lt+FusslsPcssFWc.huphVppAaPspphup...................spp...hs+plHQhRYhIspQphpa..lRsa....a+.....ppGt...TDtpALttYlp..................t..shchshtpSuRLHNKh......h........ptphh.aP-sts..h........NhKl.h..s...F..H.oEFILs.pp.GpFlsphDs.............................ps.......pppsllNGsSFNYus...............p..........N.scpHp.LDl.sPs........t.......haDsphR+psh+s....ahS.P..................chp...h.......................................................... 0 1 9 19 +11144 PF11312 DUF3115 Protein of unknown function (DUF3115) Pollington J, Finn RD anon Pfam-B_4191 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.10 21.10 21.50 21.60 20.80 20.60 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.06 0.70 -5.57 20 129 2009-01-15 18:05:59 2008-08-18 13:57:06 3 3 119 0 96 125 2 302.80 36 84.10 CHANGED LF+ssFpspL....................................tusc.......LpphlQsVKucLYNRDahuAFss..--hhtAYAsRWSPSRALuYuolFssLt...........phhp.lssstss...................................................s+VLCIGGGAGuELVALAula.........stsppttustsus.................................lslslVDIADWSsVVc+Lssslpos.....................h..........pscsFslsFtcsDlLshsps........................phhslhps................hsLlTLhFThNELFops.hucThcFL.+LospscsGoLLLlV-SsGSYSclplG..................pK+aPhpFLlDphLlussssp.......sssWEhlpppDSpWaRh-tp.....tlcYslt....LENMRF.h+LYR .....................................................h.t.h.....s.tp.......lpphlQtlKspLYsRDa.sAFss..pphhtAYAhRWSPuRALuYuulFtp..l............chht.htsstt....s............................................................................................................tpVlClGGGAuuElVALAuhh....................pph.st...tt.......................................................tlslshlDIAsWusVVppLssslpot..................h.............p....spphsspFtppDlLphspt........................phhtl.ht......................t.sLlTLhFThNELFops.hscThpFLhpLss...htsGslLLll-SsGSYSplslG..........................................p++aPhpFLlDphLlustttp................Wch.l..pp-ShWaRhstp..........LcYslt.........LENMRa.h+LYR.................... 0 15 45 81 +11145 PF11313 DUF3116 Protein of unknown function (DUF3116) Pollington J, Finn RD anon Pfam-B_4194 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Bacillales. 23.20 23.20 23.30 23.80 22.90 23.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.72 0.72 -4.37 6 124 2012-10-04 14:01:12 2008-08-18 14:10:29 3 1 42 0 3 60 0 84.90 44 97.09 CHANGED ME+ssccLlhpVL.hs+sssssIpcLohphlphssspsaTKNELLhslYWLEhpGYlpRsppssppR.YohTtcGchLLp+lpsph .............................MEcPscpLIhpVLphscsssssIccLol.E.hlpFsslssaTKNELLaslYWLEppGFIhRs..sps..s..p.....pR...Yo.hTpKGchLLp+lcp..l..... 0 3 3 3 +11146 PF11314 DUF3117 Protein of unknown function (DUF3117) Pollington J, Finn RD anon Pfam-B_4211 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 52.10 52.00 21.10 20.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.65 0.72 -4.44 9 364 2009-01-15 18:05:59 2008-08-18 14:14:43 3 1 361 0 92 169 30 50.70 73 87.12 CHANGED MKPRTGDGPhEVTKEGRGIlMRVPLEGGGRLVVElsssEAtpLussLssVs .MKPRTGDGPhEsTKEGRG.IVMRVPlEGGGRLVVELssDEAstLGstLKsV....... 0 32 75 88 +11147 PF11315 Med30 Mediator complex subunit 30 Coggill P anon Pfam-B_28118 (release 23.0) Domain Med30 is a metazoan-specific subunit of Mediator, having no homologues in yeasts. 25.70 25.70 30.10 28.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.89 0.71 -4.08 6 107 2009-01-15 18:05:59 2008-08-18 14:37:18 3 4 82 0 74 96 0 135.60 50 66.20 CHANGED NslsLsRlGQETVQDIloRhhEl...FthL+shQhssssTpppusup-+huKlQEphRol+lLF++L.RllY-+Cs-.s.sshp.sslEsLIPYtsEshsph-sp.hutph+hllQE+cEllE....pV+tKNcQL+cIlD+hR.hlW-INoM..LuMpRs ..................NssoLCRlGQETVQDIV.RThEl...FQ..hL+shQ...lPNG.sT.ppsshpDRhsKlQ-pLRplplLF++L.RLlY-KCNEss....uGh-......h..ssE..........p..LIPYh-E.sts.+p.-sc....u...Rh.sspE++E..lhE......pl+.KNpQLKpIhDphRp.lIW-INsM..LshR.................................................................. 1 24 30 52 +11148 PF11316 Rhamno_transf DUF3118; Putative rhamnosyl transferase Pollington J, Finn RD, Eberhardt R anon Pfam-B_4218 (release 23.0) Family Most members of this family are uncharacterised, but one is a putative side-chain-rhamnosyl transferase [1]. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.40 0.70 -5.16 30 99 2012-10-03 05:28:31 2008-08-18 14:48:30 3 4 64 0 36 148 27 203.30 22 69.03 CHANGED pllGLCRFSYsu....hG.GFp....assh.tcRtAhLYAPsRL-cRFphFEslsLPSLtuQTD.-FphlllhGcshPcta+sRLccLstshP...Qh+lhhpsPtp.pRpsh+csl.ptsppsssssslQFRlDDDDAlulDFVtRlRpsspsh.slh..sppsplulDFs+Galh.phssc.Glsht.thpsahssuLuhhhps.ssppolhsasHc+lhpphP.slohsstsMalRshpspNDSppphstp .......................................hhh.hRFsh.......s..................ttp.thhhs.thlp...pRhtlFEphsLPSlttQTs...s....FphllhhssshPt.hpp+Lpplh..tshs...phpl.h.......h....hs...sht....pt...p.....hhpphh....tt.............hh.....s..sss.hhphR..lDsDDAluhcFltpl+pth.t.th......tppp.hslsaspGhhh...t.t...s.......h....h...hhshhhuhh..............tt.......shht.h..s.Htph.phhs......hh......p..t.....shalpshpt.Nssp.....h................................................................................ 0 15 32 33 +11149 PF11317 DUF3119 Protein of unknown function (DUF3119) Pollington J, Finn RD anon Pfam-B_4223 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 30.70 30.20 20.80 20.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.72 0.71 -4.06 23 112 2009-01-15 18:05:59 2008-08-18 15:54:12 3 1 99 0 51 110 104 120.40 40 74.53 CHANGED VpLsPSaplPlllllhulsLLhl....sW...sulllulFGLFLLlQohoLRLcFTscsL.VaRuscplR...........RFPYs-WlsWRlFWshlPsLFYFRElpS.....IHFLPILFDsppLcppLcp+sss ...........................V.lpPsaplPllllhluhsLhh....lp........h.....sul.sl.uLhGLFLLhQosplRlpFsss.sL-VhpusphlR........................RFPYspWhNWclF.........Ws..hPlLhYF+EspS............................IHFLPIlFsscpLpspL.c+ss......................... 0 14 36 48 +11150 PF11318 DUF3120 Protein of unknown function (DUF3120) Pollington J, Finn RD anon Pfam-B_4230 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 27.60 134.60 20.60 19.70 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.65 0.71 -4.09 22 71 2009-01-15 18:05:59 2008-08-19 09:01:02 3 1 71 0 28 77 136 201.10 47 85.40 CHANGED AuhLVslPVFlQAPWVRhpPhuuhLhThlllsluh.Lthhpppp.hthhGsLLlGFShSWLAGslaWGWLRhcPlhHLPVEAlALPlAlsGLps+W.+lGssFYLuSLlGTAhTDlhhhlTGlMshW.pVlpAs..spA..s.lLppAutplhpPhulshlhhhAsllltluphhhptup....p.psWuhuuAVL.oTLlVDuLFLlsAll .AuFLVslPVFlQAPhVRhhPhhSllhThshlhlu..lhLhpp.sc.pthWG-LLlGFShSWLAGulYWGWLRhcPlhHLPlEAluLPhAlhGL.tppW.+lGshFYLGSLlGTAlTDlYhaLoGLMsaWRQlhps-..ss.A..s.lLpsAltpltTPhuluhsllLuhlLlhlGhhshp.pp.......ptchWuhuGAVL.oTllVDuLFhluAh.... 0 4 18 26 +11151 PF11319 DUF3121 Protein of unknown function (DUF3121) Pollington J, Finn RD anon Pfam-B_4233 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as phospholipase proteins however this cannot be confirmed. Currently this family has no known function. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.11 0.71 -4.78 17 451 2009-01-15 18:05:59 2008-08-19 09:15:55 3 4 419 0 48 252 42 169.90 40 71.10 CHANGED hstsppCpp.supLpRLuCFDplhsTPhphsttt.t......psstWpRAhspEtpRsscsshhhhppst.....ssshllTs.....................sAlG..ss..p.PlLhlSClssIoRlpLhLscslpps+Vplsl......t.sshsp.Whsc-sGhlLcuGRGLPAIcplKphluuppLplco.sssslDsLpFDhssLspulcPL ...................................................................................l.pthpsCRpEsusLERLsCYD+lh..s.P...hpssshssuhst..........................hutuWpRAhppE.p.cRpuss.s.tlLlTps.Gc...tsollITo.....................PAlG.....psss+.PV.LhhSClDNITRhpl.....AL...+..s..l..c..s....pcIsV..ol......cpRslcspWh..l.R.-sGsLL-SuRGLsuI-pIKpLhuucpLhlco..sssuAspLTFslcGLscAlsPL..................................................................... 0 12 22 34 +11152 PF11320 DUF3122 Protein of unknown function (DUF3122) Pollington J, Finn RD anon Pfam-B_4242 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 25.00 25.00 32.80 32.50 22.20 21.30 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.74 0.71 -4.24 21 64 2009-01-15 18:05:59 2008-08-19 09:22:56 3 1 59 0 24 73 130 133.00 37 78.06 CHANGED Atl+ppppsssphhhR.....ShpoLRDhctpoWQllha+cscs....tpslpLRlVGFPGphclsHPpsLhlpsuppphhhs....ssshh..tpssssssuEaDLsslLspLspspPL+LpLP...sshspLsVPPaVVpEWppL .........A.l+ppp-tsGphhh+.....ShpoLRD.chpoWQlVhaKcsps....spslsLRlVGaPGphclsHPpsLplpsuptphlhs....ssshh....tpsstsssupasLsslLspLspspPLcLpLP...uh.splsVPsaVVpEW+sl........ 0 3 16 22 +11153 PF11321 DUF3123 Protein of unknown function (DUF3123) Pollington J, Finn RD anon Pfam-B_4246 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 44.10 44.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.45 0.71 -3.79 4 54 2009-01-15 18:05:59 2008-08-19 09:36:50 3 1 4 0 31 42 0 100.10 64 58.26 CHANGED VSVRTRVGKLsss...pRpLVLWLuAVVVSsA-..E..GaLsVlYKGsFP.-DPF+sVRV.ARc-sKhhsssAAsssuss.s.................st.susAP.RPTTAGKSltlLKt.h.Et ..........................VRVRT.VG+LGTo...sh+LVMWLGAVVVSDAD..D....GHLEVIYNGNFPRDDPFRTVRV.AVKDVKL.uPRPAPTPA...........................NhAAP..RPTTAGKsLPRLKMhhLE................. 0 0 4 16 +11154 PF11322 DUF3124 Protein of unknown function (DUF3124) Pollington J, Finn RD anon Pfam-B_4248 (release 23.0) Family This bacterial family of proteins has no known function. 20.40 20.40 20.60 99.30 20.30 20.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.44 0.71 -4.21 30 107 2009-01-15 18:05:59 2008-08-19 09:39:30 3 1 102 0 63 113 21 125.30 39 72.35 CHANGED LspGpTlYVPlYSplYoss.....cppshsLssTLSlRNTc.spslhlsplcYaDTsG+Ll+pYlspPltLsPLuosclll-EcDssGGoGANFIVcWpuspslstPllEuVMIGstusQGlSFsopGpsI .....h..GpolYVPlYSpIYpts......cppshsLosTLSlRNTs.spslhlsplcYYDosGchl+sYlcpPltLtPluohchhlscpDspGGoGANFlVcWpusptlspPllEuVMIus.tu.sQGlSFsopG+sI............ 1 20 45 60 +11155 PF11323 DUF3125 Protein of unknown function (DUF3125) Pollington J, Finn RD anon Pfam-B_4250 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Staphylococcus. 19.20 19.20 19.20 19.40 18.60 18.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.75 0.72 -4.18 5 369 2009-01-15 18:05:59 2008-08-19 09:44:08 3 3 69 0 3 125 0 40.30 58 77.09 CHANGED MIFSQNLFRRPTPThIVCRNWESNFSLLGP+.QLAp.scasF..chhLlauP ...........MIFSQNLFRpPTPs..hhTRIEK..SLLQAHF+SVNYCQYNFlccpTLIasP........ 0 2 2 3 +11156 PF11324 DUF3126 Protein of unknown function (DUF3126) Pollington J, Finn RD anon Pfam-B_4268 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Alphaproteobacteria. 25.00 25.00 29.60 48.30 24.40 19.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.71 0.72 -4.30 18 204 2009-01-15 18:05:59 2008-08-19 10:47:42 3 2 195 0 74 130 111 62.70 53 85.20 CHANGED ElcKL-sYL+cpFtssplpVpsRP+psDSAEVYlG-EFIGVla+D--EGEhSYsFpMsILDlD .....El+KL-AYhKRsFsNsclpVcARP+K.sDSAElYlu-....EFlGlla+D---G-lSYsFsMAILDhD... 0 22 45 55 +11157 PF11325 DUF3127 Domain of unknown function (DUF3127) Pollington J, Finn RD anon Pfam-B_4273 (release 23.0) Domain This bacterial family of proteins has no known function. However, it does show distant similarity to Pfam:PF00436, with proteins such as Swiss:D1W984 being similar to both families. This suggests that this family may have a DNA-binding function. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.80 0.72 -3.97 29 271 2012-10-03 20:18:03 2008-08-19 10:48:04 3 1 225 0 71 260 272 86.50 36 67.45 CHANGED ElpGK.Iplls-ppshGp.sG..a+KpEhVlc.....TcsQYPpclplphhp.DKssh.sshp.sGpclcVuhslcuREas........s+aFsslpu..W+l- ............ElpGKlItllssppus.op.sG...a+ppEaVlE.........Tc-QYPp+lsh.....-hhs.DKlsp...slp.hG-clpVSFslcuREWs........s+aFNolpA..W+l-.................... 0 28 57 70 +11158 PF11326 DUF3128 Protein of unknown function (DUF3128) Pollington J, Finn RD anon Pfam-B_4309 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.20 21.20 21.20 21.20 21.00 20.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.12 0.72 -3.86 24 194 2009-01-15 18:05:59 2008-08-19 11:52:37 3 1 184 0 150 191 0 86.90 29 48.69 CHANGED hPs.....shSChpAFDphhhCa...................olGGQh+shYRYGphssCscphccFhaClpppo.....................cspclpEha+cchhpp.........ptpsSSEDlWchR ........................p.phSCtpsFDthhhCh...................ShsuQhpshYRYGchc..sCspphsDahhChch+s..h.........................................+tptlp-ha+c.+thpt.........pttssS-DlWchR........................................................ 0 41 82 124 +11159 PF11327 DUF3129 Protein of unknown function (DUF3129) Pollington J, Finn RD anon Pfam-B_4316 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 25.00 28.20 23.90 23.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.35 0.71 -4.46 23 214 2009-01-15 18:05:59 2008-08-19 11:59:31 3 3 57 0 193 217 0 197.00 32 62.76 CHANGED VsAHGslhsspGssGusss.uLulssuo...............................PRDssst.sssQtDToIhRstch.sstusshGRT.uuGsscsupsspthM...........................................................................................................uussLPpl.osGGslphThHQVNsDGAGPaos.lDsTusGs..tsapphpVTpslPGhs.t.....................................................................uhopupspDFPlsVphPushoCTGTVuGtpNVClVRspNsAtAGPFG ....................................................................................................................................tuHuslhss..pG.....s...sG..s..s.hs..ululs..ss..s...............................sRss.sp.....sshQtDoslh+ptph............s.sts......s.....s.hG+T..t.s...G..sh...s..h..sphspshh..........................................................................................................................sssslPpl.ss..sGp..lshT.hH..........QVN..t...DGA.GP..asCtlDsousGs....sap..shp..Vs.p.sl..PGts......................................................................shs.....t...u...p.sp-a.slplphPsshsCsGs.su...G....p......N....VClVRspN........sAhu.GPFG................................................... 0 84 133 176 +11160 PF11328 DUF3130 Protein of unknown function (DUF3130 Pollington J, Finn RD anon Pfam-B_4322 (release 23.0) Family This bacterial family of proteins has no known function. 20.60 20.60 20.60 21.80 20.20 20.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.96 0.72 -3.94 2 128 2009-01-15 18:05:59 2008-08-19 13:10:10 3 1 49 0 2 91 1 82.80 54 94.66 CHANGED MpEIKVcEcThppauochtppupu.sYLPhKsGNMAaSRANSIsQLRoALh-LV-sVEsFQhVscpDAoRLKphG.uaAhpDphhtphhs .MpEIKVpEsThppHATKLtScusu.pYLPhKsGNMAYSpAN..SIsphRoALh-LVDsV-sFQsVsppDAsRLKchGhuas+pDQthupchs............ 0 2 2 2 +11161 PF11329 DUF3131 Protein of unknown function (DUF3131) Pollington J, Finn RD anon Pfam-B_4335 (release 23.0) Family This bacterial family of proteins has no known function. 20.30 20.30 20.30 20.60 20.20 20.20 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.14 0.70 -5.78 24 249 2009-01-15 18:05:59 2008-08-19 13:20:03 3 11 171 0 76 236 7 243.20 30 16.30 CHANGED hphApsAWpYFcpNhp.spTGL....VNolss...asosTMWDhuSalhALlAAccLslIsppEF-pRlp+hLssLuplPLhpspLPN+sYsTpTtphssYtspPst..lGWSAlDlGRLLlsLpllpppaPpasstlspllt+Wphsphlp.cGpLaGuphtps..thphhQ...EGR..lGY....EpYAApuapLaGhsshpAhph......tsa.p.hspl.GlslPhDsRcstph.ts.s.lso-PYlLpulEhGhD.st.............htthAcplapsQcpRacpTGhlTAhoEcslspuPYFlYssl..aupGpsWsoloc...sGpphsphp....slSoKuAFuhasLa.cssYocpLhptl.ppLhs..pcGaYpGhYEssutsscuhT.hNTNullLEuLhYptpGp ...................................h.phA+psW+YFtp.sp.tp.s.sL.........s.N...p.......ttpTo.hslGsYLhullAAR-hGhIshcEh.cRlptsLsoLs+hpha.pG.cL..phYpTpThp.......h.p....Ph.................h.SAlD.GpLhshL....hhlppt............................................................................................................................................................................................................................................................................................................................................................................... 0 16 35 58 +11162 PF11330 DUF3132 Protein of unknown function (DUF3132) Pollington J, Finn RD anon Pfam-B_4348 (release 23.0) Family This viral family of proteins are 55kDa. No function is currently known. 19.70 19.70 20.50 268.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.63 0.71 -4.29 3 62 2009-01-15 18:05:59 2008-08-19 13:35:40 3 1 2 0 0 38 0 124.00 96 74.21 CHANGED SSSHYFFSKNITPTSVERNFGGVAQLEVERAKLSFETFGNKFLLKDVFMFSDQSLGDNILSYTLLKEEGHIDGMRTAGDDVLLEKDGEVVMILDSRDEGRMWIKDDVWAEVTEHGSKSAREYCM SSSHYFFSKNITPTSlERNFGGVAQLEVERAKLSFETFGNKFLLKDVFMFSDQSLGDNILSYTLLKEEGHIDGMRTAGDDVLLEKDGEVVMILDSRDEGRMWIKDDVWAEVTEHGSKSAREYCM 0 0 0 0 +11163 PF11331 DUF3133 Protein of unknown function (DUF3133) Pollington J, Finn RD anon Pfam-B_4400 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.46 0.72 -4.39 13 112 2009-01-15 18:05:59 2008-08-19 13:43:25 3 2 20 0 70 97 2 45.10 40 5.96 CHANGED uGGAPFlsCssChELLQlPpchhlsp+p.pp+lcCGuCSplLphSls .......hGGAPFllCpsChcLLQl.Ptchhls.p+t.tp+lp...CGuCScllphsh....... 0 10 32 55 +11164 PF11332 DUF3134 Protein of unknown function (DUF3134) Pollington J, Finn RD anon Pfam-B_4408 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 25.80 25.40 24.50 24.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.53 0.72 -3.84 16 82 2009-01-15 18:05:59 2008-08-19 13:50:37 3 1 74 0 34 81 101 72.70 40 82.45 CHANGED NPuLpc.sRppPAsVlPl+pEssLLsWLcsoGRLlsc-.p-...c..osEEEElS-Lhus--sh.c..--ss--h ...NPuLpc.sRp-PAsVlPl+cEssLLsWLEsoGRLlsc..-pp-t....chsssEE..EElStLMut--sapsc.--ps-.......... 0 5 23 32 +11165 PF11333 DUF3135 Protein of unknown function (DUF3135) Pollington J, Finn RD anon Pfam-B_4409 (release 23.0) Family This family of proteins with unkown function appears to be restricted to Proteobacteria. 20.70 20.70 20.70 23.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.44 0.72 -3.82 23 146 2009-01-15 18:05:59 2008-08-19 13:53:26 3 1 145 0 36 92 4 80.70 49 70.31 CHANGED ppLPsFD-LhtLAcssP-th-pl+cchscphIsssscphptRLcuhQh+IDthhp+sKNPhpsslhltphhpcphhchpp...sLs ........sLPsFDELstLAcccP-AF-Qh++-Ms-EMI.SAScsM.QpRLaAQQSHIDRVlup.CKNPsHsNVsLMpELppQhl+F+sAL.p.......... 0 6 16 30 +11166 PF11334 DUF3136 Protein of unknown function (DUF3136) Pollington J, Finn RD anon Pfam-B_4419 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 20.90 20.90 21.90 29.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.10 0.72 -4.55 15 46 2009-01-15 18:05:59 2008-08-19 14:02:40 3 1 27 0 17 48 116 63.50 56 84.37 CHANGED LoIGELEAsYslYCKALRhLlt-G+s.pcIcRTlCWc+LppLHpSLPppY+SP-cLhthhQ+chp .LTIGELEAGYPlYCKALRhLltcG+ohp-IcRTVCWs+LEoLpRsLPsRYKuPshLhsllpR-lp......... 0 1 6 12 +11167 PF11335 DUF3137 Protein of unknown function (DUF3137) Pollington J, Finn RD anon Pfam-B_4422 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.00 25.40 24.70 24.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.79 0.71 -4.51 34 227 2009-01-15 18:05:59 2008-08-19 14:23:45 3 1 152 0 68 230 31 142.20 21 43.07 CHANGED hhTlFcGllhthshsKpFpupTllhpc.........sthhspltt.ttt.......p+lcL-sPpFEctF-VYosDQlpARYlLoPshME+lltLppph.....ssslphuFhs........sclhIslsspp......shFE..sslhpslsptpplpp.hhp-lpthhsIlcpLpLs .............................................p..s.FpG.hhhhph....+p...hp...upshlhss...........tthhst.htshttth..........p+lchEs.s....pFpcpFpVausDplpARhlLoPshME.pLhplppph.....tsslphsFh.s........sclhlslsstp......shFp.......sslh...p....sl......s...p......p....tlpp......hhpclt.hhsllctLp.......................................... 0 22 42 60 +11168 PF11336 DUF3138 Protein of unknown function (DUF3138) Pollington J, Finn RD anon Pfam-B_4423 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 21.90 21.90 22.10 28.50 21.80 21.80 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.70 0.70 -6.32 4 54 2012-10-03 17:14:37 2008-08-19 14:28:28 3 2 46 0 17 61 7 491.40 62 97.68 CHANGED MK+KhlsALlAhALPGhAs.....AuosAspI+ALQAQlssLQpQhsELpsu.LAApsu.......AAuG....QuuuAAAuusP.sD........ussshTpDDlsph+pQlANtpLKVDuLs-AAsTGPIAGLSVTGYlDPTYlaNRussTSuFQFhNH-.usYsYaNSTFGDVYLDIKKTFGVGPhAPSAEITlMPNRGsG.olhssu.GusGNNIlNTAVVsVPLSsTpTF.uGLhsSFGGYEVQQSNQMLTLTHsLLYDFSDPGShIGs.GhNa..spssWAWKFllGNEQaRTtGulspTGsNA.hGpP.TpSNpsPTFTARVDYshSSALDlGGShNlGRQTLhSusspAGG...YGhtssuuuPYGsaFFsEADATYTLuDhQYNAElDYGQQQ+uAaNGGpAQWYGlSLLuHRKasssslGRMGATLRYDaLsNsKNGGGGuuIuLsusG....hDsssGFGIDssCLAsSpANGs...ECKGAsR.AlshDLLFYPTQQlTVKlEYRHDWAsptVFLRsDGSYuKSNDLLusQhIYoF .........................MKKKLICLLVAGALPGhAh......AuSTSApIKALQAQlsALQtQhKthpst.LAup.u..............uA...suGAtutss..u..sAsuAsP.ss.............G+ApAsLTsD-VopMKQQIAN.QQLKVDuLTDAAsTGPlA.GLSVTGYIDPTYlYNRAAGTSSFLFANH..E.......s.......s.....YNYFNS...TFGDLYLDIKKTFGVGPMAPSAEITLMPNRGNGITLLQNSRGsIGsNlLNTAVVNVPloA..oT.T.LVAGLlPSFGGYEVQQSNQMLTLTHNLLYDFSDPGSYlGh.GsNY..TKGsWAWKFhLGNEQYRTYGSVTQTGTNA.LGDPITTSNKVPTFTARsDYTWS..SALDlGGShNIGRQTLs.SAh..stssus.....YGsGGtAsSsYGsFFFuEADATYTLADlQYNAElDYGQQQHAAa....N...GG.h...AQWYGLSLLAHRKFNsPVlGRMGsTLRYDhLsNsKNGGG..GGGI.AL.NGNG....MD.s.usG...FGlDADCLAsSKANGGLGFEC.KGAsRQDVALDLLFYPTQQITVKVEYRHDWANNKVF.LR.NDG.S.......YuKSND..LLATQFIYSF........................ 1 2 6 10 +11169 PF11337 DUF3139 Protein of unknown function (DUF3139) Pollington J, Finn RD anon Pfam-B_4425 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.79 0.72 -3.70 12 437 2009-09-11 16:35:17 2008-08-19 14:32:33 3 3 346 0 28 180 0 81.70 40 65.70 CHANGED MKKh.hhhhllhllllo....llluhhhaFsh...ptcpshchlssYlscptlppspl...Kphc.hphshKpG..haYttV.sFKD-PshpY.Yp ...............MKKhK.h.hhlIlllhIu........lllu.....h..saaF....uhcshpt+cs..lph..lDs.....YL.p.-.Ks.lK.ccI...Ko..cK....spYSsKcG.....ha..YpcV.sFKDEPs.lTYshp............................................ 0 12 19 25 +11170 PF11338 DUF3140 Protein of unknown function (DUF3140) Pollington J, Finn RD anon Pfam-B_4435 (release 23.0) Family Some members in this family of proteins are annotated as DNA binding proteins. No function is currently known. 25.00 25.00 27.50 27.10 21.70 20.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.09 0.72 -3.87 24 152 2009-01-15 18:05:59 2008-08-19 15:01:41 3 3 123 0 77 151 3 90.50 41 67.61 CHANGED -Fp-lVNMospELccWLcT-cSpSsGpspc....GEosGHpSGRRIV-ILc....Kc+sDLoD-Dh-HM++VVuYl+RHLuQ.....pPcuc..hpcocWRaSLM ......-FpchVNMTssELccWLcoccSpssGppptt.........uEosGHcSGR+IlcILc....K++sDLo--DhcHMRKVVuYl+RHlAQ........cPsss....sp..cocWRaSLM.............. 0 26 49 63 +11171 PF11339 DUF3141 Protein of unknown function (DUF3141) Pollington J, Finn RD anon Pfam-B_4443 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 29.70 29.70 29.70 29.70 29.60 29.60 hmmbuild -o /dev/null HMM SEED 581 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.94 0.70 -6.18 28 171 2012-10-03 11:45:05 2008-08-19 15:06:39 3 4 134 0 86 231 61 521.50 42 76.93 CHANGED DAhQRolLahDlhRpRGspah-HptpshPsVLtFcaEllhDGRcLP+PVNYuLlRIlPPss........hssDs...sK.RPFVVlDPRAGHGPGIGGFKsDSEIGlAL+AGHPCYFluFhPcP.PGQTlEDVs+AEutFlccVtchHPcus.KPsVIGNCQAGWAlhhlAAhpP-LhGPIllAGAPLSYWAGh....pGcNPMRYuGGLLGGoWlouLsSDLGsG+FDGAWLVQNFEsLNPANTLWsK.YsLYuclDTEusRaL-FERWWGGahhLNstEhpaIVDNLFlGN+LusGclhsuDG.pplDLRNIRSPIlVFsSaGDNITPPQQALsWIsDLYsss--I+upsQTIVYslH-slGHLGIFVSuuVA+KEHsEhsuslDhIDlLPPGLYEhsIs-ttssssssphh.spYhlchctRpl-DItu.hscssc-DERtFssVARlSElNhslYcpFlpPaV+AhsTs.sAchhRphHPhRlphphaSDpNPhhshlsshApplRpsRpPVusDNPFlphQpthSctl.puLDtaRDhRDshtEphFhslYuuPhlpALsGhpttpcs.t+......s.ss-cRphhphchtplpssl ..................................................................................................................DshQRulLahDshRpRGsphhpH.ttshPslLpFphEhlhDGRph.+PsNYuLlRIh...ssts.....................h.h.Ds.........tK.RPalllDPRAGHGPGIGGFKt.DSElGhAL+.s.G..H.PsYFl..sFhPpP.PGQ..Tlt..DVhpActtFlccVtt...hH..P..ps...+Ps.l..lGNCQuGWtlhhlAAhpP-.lsGPlllsGuPlSYWAG.....pGpN.PMRYsGGLhGGo..W.h...s...t...Ls.uD....L.....Gs.....G+FDGAaLVpNFEsLNPusshWsKhYsLYupl....Do..E..s..t..RaLc....FERWWGGahhLstp.EhphIV-NLFlGN+LspGplhh.......ssG.pplDLRsI+uPIllFsShGDNITPPp..QAL...sWIsDlYtssp-ltstsQ.pIVYhhHpplGHLGIFVSupVA++EHpphhsslchI-hLsPGLYEhplsp.ss........ssspt.p..........pahlphctRpl-Dlpt.....t.ps-phFtsVtplS-hN.thYctahpPhlpuhsss.sAchhcthHPhRhph.hhS.c.t.NPhht.lt.hAp.lRt..pRp.sss....tcNPa.hthpphhup.lttuL-haRchRDshhEthFhslYus.shht.hh.........................................h........................................................................................... 0 21 45 70 +11172 PF11340 DUF3142 Protein of unknown function (DUF3142) Pollington J, Finn RD anon Pfam-B_4454 (release 23.0) Family This bacterial family of proteins has no known function. 30.00 30.00 30.50 30.00 28.80 28.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.33 0.71 -4.62 16 151 2009-01-15 18:05:59 2008-08-19 15:13:56 3 1 145 0 46 134 20 188.10 38 62.79 CHANGED ssuR.hhu.sVhLssphps..Lchs-plhppllp.ltcWQupGsplsGlQIDFDAsot+LssYstFLccLRppLPtphtLSITuLhsWssou-...LssLhs.VDElVlQsa......pGhashsthtpal.pls+ls.lPF+lGLsphG.............Eh-sshp..............ppLhssP......................WFR..............................................................shslaLhNs .............................s.ssR.shs.slhlssRhsp..Lchssplhttllph.hpcWpsuGsplsGlQIDFDAsTt+LscYssFLcpLRppLP..............s.shtLSITGLhDWspous...LssLht.lDElVlQsa...............QG.htsssphtpYLstlspLp....lPF+luLsphG.....................Eh-s.h...............p.LtssP..htthh..l........................................................................................t....................................................................... 0 4 16 28 +11173 PF11341 DUF3143 Protein of unknown function (DUF3143) Pollington J, Finn RD anon Pfam-B_4460 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 28.70 69.00 22.20 16.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.08 0.72 -4.09 23 90 2009-01-15 18:05:59 2008-08-19 15:17:38 3 1 85 0 41 88 79 64.50 43 57.69 CHANGED lGsp+ssp-.ssWtlppPsWsAplhL-pE-LtVpatpsG.....pcppRuFsYuLSRpDVEsAlhuG .LGspQspp-hssWhlc+PsWpApL.LDh--LtVpYhpuG.t..tp-hpRsFpYuLSRpDlEsAlhuG. 0 6 26 37 +11174 PF11342 DUF3144 Protein of unknown function (DUF3144) Pollington J, Finn RD anon Pfam-B_4465 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 25.40 25.10 21.60 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.52 0.72 -3.91 20 130 2009-01-15 18:05:59 2008-08-19 15:19:34 3 1 113 0 41 104 45 79.40 41 72.66 CHANGED sFachADpFIpLANcpsppp..c.......sGcVuuuhhaAAARFNAahuussststsphts-K-pAlcaFsspY+cMLc-NL-Dal .....Fa-RADpFIpLANphspst..c.........hGpVuA.AhhaAuARFNuahAAtshtpts-htscK-pslcaasppappMLp-NlD-Yh............ 0 5 20 31 +11175 PF11343 DUF3145 Protein of unknown function (DUF3145) Pollington J, Finn RD anon Pfam-B_4467 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Actinobacteria. 25.00 25.00 27.00 26.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.94 0.71 -4.83 17 330 2009-09-10 15:28:37 2008-08-19 15:23:36 3 1 327 0 91 220 102 153.90 50 92.65 CHANGED VLaVHSAPsALCPHlEWAluusLuttVsLcWosQPstsGshRAEhsWpGs.sGouApLASuLRGWptLRaEVTE-sosGsDGtRasaTPsLGlFaAsTsssGslllsEsRLRshhp.utt....ssh-lt.cElchsLGpAWD-ELEsFRaAG-..GAsVpWL.+pV .............VlalHSuPsALCPHlEWAluusL....sst........s....sLcWTsQPAtP.GphRAEssWsGs.VGTuupLAoALRuWphLRFEVTE-sosG.....s-GpRaSaTPsLGlapAshsssGsllVsEhRLRuhlstut.....sutpLssElsplLGpsWDsELEPYRtuGs..uu.VsWLpp.h...... 0 33 74 87 +11176 PF11344 DUF3146 Protein of unknown function (DUF3146) Pollington J, Finn RD anon Pfam-B_4468 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 22.00 22.00 22.00 71.70 21.90 21.70 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.61 0.72 -4.55 18 70 2009-01-15 18:05:59 2008-08-19 15:27:15 3 1 70 0 28 68 130 79.20 60 91.76 CHANGED PpTTAalRVppQSasptpl-GEVpAGsFcWpFpWpFcpGc.LhVcPSLGRALIpDuLhRFLh+sDYpLEPGGDYsFTlRA+ .PpTTAalRlpcQSWppGplEGEVpAGsapWpFpWpFcpGc.LpVcPSLGRALIp-PLtRFLE+pDYpLEPGGDYpFTlRA+. 0 4 18 26 +11177 PF11345 DUF3147 Protein of unknown function (DUF3147) Pollington J, Finn RD anon Pfam-B_4475 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 21.80 21.80 21.90 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.50 0.72 -4.04 11 537 2012-10-01 21:19:26 2008-08-19 15:39:40 3 2 300 0 60 163 5 109.80 44 87.60 CHANGED hlL+FllGGhAVsluhIluphlsuK.hGGIFAsFPAVaLAAllhsGhpasupp....upplSpGAlsGMlusllClLsshhhltt.ptWthullhullsWFVuussIaplhph .........................lh+FhlGGhAVhLu.IlusplshK....hGGIhAThPAVFLs.u.l.hhh.u.hpatsp..............uhpl.SpG..Alh.Ghhu.s.l..l.slhl.T.hhl+..t.+.ta..hhuh......Ihuhl.....sWFl.ulsIFphhch............ 0 24 34 51 +11178 PF11346 DUF3149 Protein of unknown function (DUF3149) Pollington J, Finn RD anon Pfam-B_4495 (release 23.0) Family This bacterial family of proteins has no known function. 20.60 20.60 22.10 21.10 20.50 19.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.89 0.72 -4.40 14 230 2009-01-15 18:05:59 2008-08-19 16:01:56 3 2 230 0 48 142 6 38.60 43 21.33 CHANGED chWhpL.FusslGLhShhVIhhslGlhuahsthFlhKhhpssp .....hhhpL.FusslGL.SMlVIlsslGlhhaahuaFhYKhhp-pp.......... 0 5 22 35 +11179 PF11347 DUF3148 Protein of unknown function (DUF3148) Pollington J, Finn RD anon Pfam-B_4488 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 34.60 33.40 18.50 17.70 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.98 0.72 -4.36 23 92 2009-01-15 18:05:59 2008-08-19 16:04:34 3 2 88 0 41 92 86 62.20 48 59.37 CHANGED slGspV+Lhst.PYLKTADPMPMLRP.PDLVslsE.GpVluh+PtshhuVRFcRGoFLlssphL ...lGspVpllthPPYLKTA-PMPMLRP.sDllplu-.GtllsR+PtshWuVRFs+GoFLl-upYl.. 0 7 26 37 +11180 PF11348 DUF3150 Protein of unknown function (DUF3150) Pollington J, Finn RD anon Pfam-B_4471 (release 23.0) Family This bacterial family of proteins with unknown function appears to be restricted to Proteobacteria. 23.50 23.50 23.70 25.50 23.30 23.40 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.42 0.70 -5.29 25 147 2009-01-15 18:05:59 2008-08-19 16:09:22 3 1 122 0 27 96 19 250.10 31 72.16 CHANGED lLcsllllsLs.lslWoG+K+Lps.cDlth......splPPpplAoLGoK+lhDPcsLpsFsplKpcAp+hhpchGl+F..LG....GaAlPpccssplspcLcsIcscFtpt............KpsFLssY-ptlc-Whsp.s.sc.......auslI+ssss.stcplcpplsFsap..hhclpsss.......thscuLsppVsuLsspLhpElAppAcc...haccsht.........G+..pplop+sLss.L+slpsKlsuLuFlcP.plsslsphlcplLsslPc..sG.slcGsphhplhulls.hLscs ..............L-plllhcl-.hslWSGc++Lps.pDhphs....suplPPcclASLGSK+IsDPcsLpsFppLKpcApRLh.pphGlRF..hu....GaAVPps+h-plsscLscIpp-Ftpt............Kpp.FlssY-pslp-Whsc.p..Pc........auc.hIRpuh..sh-sVcpRlpFsat..hhphpPs............ttstsLsccVss..hucsLhpElAp-Apc...haccshs.............G+..splspps.lss.L+pl+-KlsuLoFlcp.chpPll-hlcphhsplPc....sG.slsss.htplhuhlhhhSD................................................................................................................... 0 10 19 22 +11181 PF11349 DUF3151 Protein of unknown function (DUF3151) Pollington J, Finn RD anon Pfam-B_4506 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 37.00 36.80 23.80 18.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.63 0.71 -4.13 18 342 2009-01-15 18:05:59 2008-08-19 16:16:08 3 1 337 0 92 227 37 130.20 54 86.91 CHANGED LhGPPPohLPs-ssApttLssGsss......ssVAAcHPsuSluWAt....LAEpALssG.............csVpAYAYARTGYHRGLDpLRRsGWKGaGPVPWuHEPNRGFLRuLuALu+AApsIGEs-EhsRCtphLcDuDPsAsspL ............................huPpPlhLPs...D...s...ssc.thhttupss......ssVAspHPouSluWAh....LAEsALts.s.................psVsAYAYARTGYHRGLDQLRcsGWKGaGPVPauHpPNpGhLRsltALA+AAptIGEsDEhsRCpshLcDs..sPpAhtsL...... 0 29 70 88 +11182 PF11350 DUF3152 Protein of unknown function (DUF3152) Pollington J, Finn RD anon Pfam-B_4512 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.10 25.10 25.30 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.34 0.71 -4.80 17 313 2012-10-03 04:41:15 2008-08-19 16:23:17 3 2 261 0 93 280 119 198.50 43 61.58 CHANGED LssG.ushspsGsGTacsVsususplGpGt.+hapYhVEVEsGlsssu..sssuFAthV-tTLusP+uWo..pssphuFp+V-sus......PDFRlpLsSstTscphC...GhchthE.sSChs.......ssRVhlNpsRWlcGAhsFpGDluuYRQYlINHEVGHulGa.sHpsCstsGtLAPlMMQQThulss..................ss..CchNsWPaP ...........................................l..sssasptGssTacsV..s...u.s.s..sps..G....pGt..+lapYsVElEsGlssss...s.suFAphV-pTLss.P.+.uWs.........cssphuFpRlsuup......................PDFRIpLsSPs...Tsc..p..hC....GhchphE....sSChss.............ssRVhINtuRWl+GAss..F...p.G...D......luuYRQYl..INHEVGHul.G...a..sH-sCstsGtLAPVMMQQThulsst.....................c.....shhCp.NPWPYP......................... 0 28 67 89 +11183 PF11351 DUF3154 Protein of unknown function (DUF3154) Pollington J, Finn RD anon Pfam-B_4516 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 24.80 24.80 25.40 24.80 24.60 23.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.63 0.71 -4.37 32 105 2009-01-15 18:05:59 2008-08-19 16:45:58 3 1 98 0 38 117 312 117.70 25 71.28 CHANGED ssltcss-sFhsNsEptuttptphpptulpQhutEFts...tuhacphhsulNRlP...........RPshuhushuLhlhA.hhcPlhFustMtuhul.............VPEPLWWLhGslVuFYFGAR..p.sKspshp ...................................................sltphh-hhh..ss-.ttsttphphpt..tthppht.t...Ehth.....t.....sh.....hcphhstlNchs...............RPhluass.hul.......hlhu..hhsPlh.....hstthtuhsl......................lP.-.sL..ahLlu..ssl.hY.huu.R..p.tKtpt.................................................... 0 13 27 33 +11184 PF11352 DUF3155 Protein of unknown function (DUF3155) Pollington J, Finn RD anon Pfam-B_4534 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 21.00 21.00 24.00 109.80 19.40 18.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.75 0.72 -3.76 6 71 2009-01-15 18:05:59 2008-08-19 17:19:52 3 1 69 0 28 65 125 88.50 74 75.98 CHANGED ARRRKRKSRRR.EGRRIL-hVPQYuIESGE-KPVTAAR+FIcupGItPPAlLlV+RNEHTTDRYFWAEKGLFGAQYVEENHFLFPSL+lL ............u+KRKRhSRRR.tGpRlLtpVPpasIEoGEcKPVTAAR+aItppGIhPPALLhV+RNEHTTDRaFWuEKGLFuAQYsEENHFLFPSLRh... 1 5 18 26 +11185 PF11353 DUF3153 Protein of unknown function (DUF3153) Pollington J, Finn RD anon Pfam-B_4513 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. Some members are annotated as membrane proteins however this cannot be confirmed. 20.80 20.80 21.10 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.28 0.70 -4.89 26 198 2009-01-15 18:05:59 2008-08-19 17:20:04 3 4 196 0 50 139 128 206.20 24 80.70 CHANGED lh.lllhLolLLoGCVchcsslshsu.c......+lpLuhplsShosphh.PWppphcpph+phtsphp.....psspsch..plpssshsup-hpphhpphhsssup..........suuls.hsss................plplpcpNaLlulcppLplslDLcsLs.lss...........LsLplsLssshshpthptss.s.st.t.......pt.plsWpLpsGclNcL-hsh.........WhhS.LGlGullIllllhluhhLpth+hph ..................................................................................................................shlLhLl.LLoGCl+lcsolslsscc.................................tpIs...hs..h..........sh...s.S.....h..s..st.s..shtsshpp+.l..p..tts.hssc.......p.s.h...thp..hsh.....s..ht-.l.phh...hs.ssp..............sssls......................lslcpss.ll.h....LphchDL.pslss.ss......................-lphslshPht.spsh.....susp....t..................p...lpW....pLpPG.lsphpsp.........................................................s................................................................................................................................. 0 12 30 44 +11186 PF11354 DUF3156 Protein of unknown function (DUF3156) Pollington J, Finn RD anon Pfam-B_4555 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 19.10 19.10 19.40 19.30 18.10 17.50 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.96 0.71 -4.86 12 216 2009-01-15 18:05:59 2008-08-19 17:21:06 3 1 212 0 21 130 0 157.70 53 85.54 CHANGED psLuu..p-t.ut...puthhphssGlplclpERscppFLhHlVSscaplpsssshsspuph+l+psGWLRRpGlsstsppGss.t..tllstLp.s.PsLtpsLtsLDa+chslstc.....supWplslE.aGASEVVsRhPuaRRYlRLssEQRhhLhuuhhthpphLp .....................RDLuGahCE+LoE...+SAlL+Lssh.spVhs+.ppKRLFMASIpSCEFcVcGsls..hPlpG+IRlHQsGWLKR.hPVlFsuuKuou....uLlsaLN.paPsLQQALSELDaRRFoLVlH.....c+cWhCSIEhaAASEVVC+MPPlRRYLRLEccQRlLLLSVlsMlsQAhs.... 3 3 4 12 +11187 PF11355 DUF3157 Protein of unknown function (DUF3157) Pollington J, Finn RD anon Pfam-B_4561 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 21.70 21.70 23.40 22.30 20.90 20.30 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.30 0.71 -5.02 15 126 2009-01-15 18:05:59 2008-08-20 09:00:52 3 2 125 0 23 84 4 187.60 46 94.64 CHANGED h+shs..llALlLls.....soAhAA-......thlTLEsGtpVpL+DDFTWEYlhhcots.................p.ss.sstssssssusssssssPsss..........ssp..hTshslupsc.ltphuKuGlcVsLssupa-us.cLsLshslospSocsVlhVclclolas-sGplLcpcclplWpuIhRMP-TYLRttppccucslhl-ss-+spap...lslcIhElco ....................................................MKphlLlAsllsh......SsaAuE.......slpLcDGR.lpLNDDFTWpYV.pETps............................tpsophuPllhusPVls...........psT..GsolsVusc+PlhQLSDSGVDVlLuuspYEpG.pLtLoouITNQSSQSVItVcltlpV.DspGsh.ccpcVTlWQSIKRMAETYLRPppuhcGKslcLsls-puQYp...lpApIppI-T................................ 1 2 4 14 +11188 PF11356 Pilus_PilP Type IV pilus biogenesis Pollington J, Finn RD anon Pfam-B_3750 (release 23.0) Family Type IV pili are required for auto-agglutination, twitching motility, biofilm formation, adherence and DNA uptake during transformation [1]. PilP is an inner membrane protein, required for pilus expression and transformation [1]. PilP interacts with PilQ which suggests that the two proteins may have coordinated activity in functions such as pilus extrusion/retraction [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.67 0.72 -3.75 91 1330 2009-01-15 18:05:59 2008-08-20 10:47:43 3 3 966 2 259 918 191 88.20 22 37.51 CHANGED lauphssttt..ss..............sssuspopLsl...pLhGll..ssssppuhAlIt..ssupppsatlG-plsG..ssplppltsD..+Vll.ppsGchcsL .....................................................................................................sssssp.sphs.l...sLtGll....st...sppuhAlIp...psspQp...shthG-p.lsu..ssplppIptD....pVll.pppG+hpp.............. 0 73 139 197 +11189 PF11357 Spy1 Cell cycle regulatory protein Pollington J, Finn RD anon Pfam-B_3875 (release 23.0) Family Speedy (Spy1) is a cell cycle regulatory protein which activates CDK2, the major kinase that allows progression through G1/S phase and further replication events [1]. Spy1 expression overcomes a p27-induced cell cycle arrest to allow for DNA synthesis, so cell cycle progression occurs due to an interaction between Spy1 and p27 [1]. Spy1 is also known as Ringo protein A. 25.00 25.00 29.30 25.60 19.50 23.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.65 0.71 -4.07 6 228 2009-01-15 18:05:59 2008-08-20 11:48:45 3 6 51 0 119 278 0 110.50 46 48.09 CHANGED p-hpAFh+LLEDslVQcFLuhDpsh+lSDKYLLAMVlsYFpRAGLhstpYs+IpFFLALYLANDMEEDppssKp-IFsahLGKs.WpphhPpFLKLRcphaspMsaRAhVSRcpCEEl.AhsPpHWlWsRDR ........................pchpAF.+Lh-.D.sllpcFLhh....Dhsh+lu...DKYLLAMVh..sYFp.RAt....h...p.Yp+lpFFlALYLANsM.E.EDpptsK.pIF.ahhG................+..........s...+sph.s.FhKhR.phappMsaRAhVSpcphEEl.Ahs...PpHalWtR-R.................. 0 37 40 55 +11190 PF11358 DUF3158 Protein of unknown function (DUF3158) Pollington J, Finn RD anon Pfam-B_3964 (release 23.0) Family Some members in this family of proteins are annotated as integrase regulator R however this cannot be confirmed. This family of proteins with unknown function appear to be restricted to Proteobacteria. 21.10 21.10 21.70 21.30 20.40 19.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.98 0.71 -4.74 14 141 2009-01-15 18:05:59 2008-08-20 11:49:37 3 1 114 0 44 138 10 148.80 45 91.21 CHANGED FpsLpQssFppLp+uA....sLKGLLpPFKGKGsL-thApQCpsLRssLhsLAp.plLsQspphPFs.LLsVcLsp.QsTuAGTTFLRWR.....phDputMGVulWppllssstTPssLlcDLauhEhQRIsLNMQISLlHoluRQAt-CAsKhApAEssYhpRhtthsss ..........psLppssa.pLpHuu....sLKGLLKPF..KGKGphpthAppCtuhR-tLhsLAp..plLtQ.AptaPap..LLPlcLsp.QsTuAG...TsFLR........WR.....phspspMGVulWpplhsss......pTP.sLlp-LauhEhQRIsLNMQISLlHoluRQAtECApKhupA-ssahpRhtt...s................................... 0 5 21 34 +11191 PF11359 gpUL132 Glycoprotein UL132 Pollington J, Finn RD anon Pfam-B_4015 (release 23.0) Family Glycoprotein UL132 is a low-abundance structural component of Human cytomegalovirus (HCMV) [1]. The function of this protein is not fully understood. 24.40 24.40 24.90 33.70 22.80 24.30 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.57 0.70 -5.03 3 84 2009-01-15 18:05:59 2008-08-20 12:50:24 3 1 12 0 0 73 0 190.80 78 86.68 CHANGED MTSSTssPooTsoosTVToATSsssosSTNlTTAa-uSTpsc.ss.lpclLuIllYCVoGoSILSFLlVLlAVLYSSCp++PGRha+FoD-EuApLLD.sDDsGupo.h...GuGSRRG.pIPAu.SSSShYQRL-stDaDE..-usSAAREoMc+DP-.NVIYF+KDGNLDTSFVNPNYG+GSPMTIESHuDDDEc..IRYYMSVYDELTASEMEEPScS.sWQIPKLlKVoTpPVTLKEPEY ......hTSSTsVPTSTSSRNoV-sATSSs.PTTuhNMTTsHESS..VH.ssRNDEIM...KVLAI...LFYIVTGTSIFSFIAVLIAVVYSSCCKHPGR.FRFA..DEEAVNLLDDTDDSGGSSPF...GtsSRRu.......ssSS.hpRLpspDapp..pptStscpp.ccDs-.N.VlYFcKcGNL.sSFVNPpYG+tSshhIESphsDsEp..I+YYhSlYDELsAptMt-PSpS..WpIPKlhKVuhp.VoL+-PEY......................................................................................... 0 0 0 0 +11192 PF11360 DUF3110 Protein of unknown function (DUF3110) Pollington J, Finn RD anon Pfam-B_4086 (release 23.0) Family This family of proteins has no known function. 20.20 20.20 20.30 20.20 18.70 20.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.92 0.72 -4.17 25 139 2009-01-15 18:05:59 2008-08-20 13:01:11 3 4 125 0 55 137 647 85.00 34 47.06 CHANGED VaVLLasAGo-sEGIHSLclsu.........cslVLMFEscDDApRYAsLLEAQDFPsP...oVEsl-c-EI-tFCpcAGY-sclVtuGFhPts..-RLLluPP-p ...............haVLlap.s.p.-sEGIa.olp.hss.........+stVLhFEscDDApRYssLL.EAp.Da..sss...sVptl-sc-ltphCppss.Yphplltts..........lhh.PPp........................ 1 15 39 51 +11193 PF11361 DUF3159 Protein of unknown function (DUF3159) Pollington J, Finn RD anon Pfam-B_4163 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed. Currently this family of proteins has no known function. 25.20 25.20 25.20 25.70 24.30 25.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.03 0.71 -4.82 29 387 2009-01-15 18:05:59 2008-08-20 13:48:20 3 2 327 0 139 362 262 187.60 35 79.02 CHANGED hGGhpGllcSslPsllFlss.s...........htsLhhulhuAlulAsllllhRLlpRcslpsAluGhhGVuluAhlAhhoGc........A+saFLhGIhhshsaullhhlSlllRhPllGhlhuh....lpucst........sWR.ccpthlRAashuThsWsslassRhlVQh.LYhu...spsu.........hLusARlsMGaPLhslulhVoahsl+putpt. ........hGGhpGllp.SsLPsllFVls..s...........s.tuLts..ulhsAluhAsll.hlhRLl++c.slp.s.A.luGhh..GVuls.Ahl..Aaho.G.p........A+saFLhGlhhshs...hullhhlSlllRhPllGllhuh.....lpup.st.................................sW.........R..ps......s...h...h...+A...ashAThsWshlFssRhlVQh.L.Y.u...spss.........hLGsAR...lsMGhPLhuLulhloahhl+ps...h......................................................... 0 44 101 127 +11194 PF11362 DUF3161 Protein of unknown function (DUF3161) Pollington J, Finn RD anon Pfam-B_4173 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 25.40 26.20 23.00 23.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.01 0.72 -4.08 7 30 2009-01-15 18:05:59 2008-08-20 13:54:41 3 2 18 0 17 31 0 77.50 33 35.88 CHANGED pchLpsslc.ssh.SssAhsctRuhlss.Nt.aKalt-+shp+GusVPVhIsEhhp-GLpspsQTt...s+KlsshhpYhppoaFc..llcVls ..........slp.stF.slssLh+tRu+lsslNhlaKhlh-.....+shp.K....Gu.......s.lPlhls-hhphGLpstuQTs....tKLssh......pahc..llpl................. 0 8 10 15 +11195 PF11363 DUF3164 Protein of unknown function (DUF3164) Pollington J, Finn RD anon Pfam-B_4249 (release 23.0) Family This family of proteins has no known function. 23.70 23.70 23.80 25.60 23.50 23.60 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.01 0.71 -4.91 30 235 2009-01-15 18:05:59 2008-08-20 14:24:56 3 1 198 0 53 219 8 187.30 33 91.65 CHANGED YhpDAcGpLlP.stlKslDph....RDchVpclVspActlppplscFKscsFsDluAFlsLSAccYus.+lGGpKGNlTLhSFDGpYKlphtls-plsFDEcLQAAKsLIDEClp-WopGucsEl+ullscAFpVDKpGplssu+lLuLRRlcIsDc..+WpcAMcAIuDSlpVsGSKsYlRhYER.ss..supapsIsLDlAs ..........................................ah.supGpLhP.t.l+shDth....cD-hVpclhspAttlpppltpFK.tpshs-lsuFlsL.A-cYus.ch..GGp..KGNlTLh..oaDGph+lphuht-plsFD-.c.l.ps.A.KsLI....D...-ClpcW.oc....G.u.c....s.c.l.psllppAFps...D.KpGplsssclLtLR+l..c..Ip...D...p...cWpcAMpAIs-ulpVssoKsYlRhacR.sp...sspap.IsLDlu............ 0 16 37 45 +11196 PF11364 DUF3165 Protein of unknown function (DUF3165) Pollington J, Finn RD anon Pfam-B_4331 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.00 25.00 27.50 27.00 23.50 22.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.88 0.72 -3.85 7 341 2009-09-10 15:00:51 2008-08-20 14:42:00 3 1 339 0 21 93 0 81.80 62 96.35 CHANGED MhYLIluILllhaYlFhAPcSIKsThNhluhVhllshLllLhsLuhl+IhQhPsEhFlGluMllluYaAL+DIhphspcs+. ..MVYLllGILLLLLYlFATPcSIKGTVNIVuMVslLVALLILLVLSFLKIFQLPoEIFluIAMLlLAYFSlRDIoLMPlKKp+... 0 1 4 12 +11197 PF11365 DUF3166 Protein of unknown function (DUF3166) Pollington J, Finn RD anon Pfam-B_4333 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.90 21.90 21.90 22.20 21.60 21.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.26 0.72 -3.53 4 340 2009-01-15 18:05:59 2008-08-20 14:45:14 3 3 45 0 193 276 0 94.50 47 14.45 CHANGED EDsuDLRCQLQFs+EEuuLMRKKhAKlscEp-chcpELpKY+ShaGDlDS...huchsuGuPcosRpttLcLcLKhsc.puNhLutKlsEL-sENR..sh+ ....Essu-L+ppLQFVcEEApLhR+phAcl-ccNcplpp...ELpK.Y+o..h...a.G.-lDu.....................sp....p.....t..u.u..ss..ss.......+p.s...t..L.p.cLKh...sc.phN.LStKlhcLphENR..hLp.................... 0 20 34 85 +11199 PF11367 DUF3168 Protein of unknown function (DUF3168) Pollington J, Finn RD anon Pfam-B_4337 (release 23.0) Family This family of proteins has no known function but is likely to be a component of bacteriophage. 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.80 0.71 -3.96 123 1027 2009-01-15 18:05:59 2008-08-20 15:06:50 3 4 854 1 179 692 99 112.10 16 88.23 CHANGED slhttLtss.s...slssllss........la.....-tsP.......ts...s.....shPalslutspspst.ssh...ssts..t..phplplcl...a...u...pss.......tps.s..ppl....ssslpsAL................t.hthhthphttspsh..pcsssh.....hh+.....ss..lph .........................................................ltthhss.........la........ch.hP........ps.......s....shPal.shs.h.p.s.hss..ssh...ssps..t..phtlplcVa.....u......pss....sps.h..ppl...tptltpsl..........................t.phh.tsh..p-.pt.......hh+h............................................ 1 46 110 141 +11200 PF11368 DUF3169 Protein of unknown function (DUF3169) Pollington J, Finn RD anon Pfam-B_4342 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.60 25.60 27.20 29.40 25.50 25.20 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.57 0.70 -5.17 11 562 2009-01-15 18:05:59 2008-08-20 15:09:01 3 1 550 0 31 240 2 235.20 36 99.32 CHANGED MKpt+p......hhRhlhhlLluullGGllGhhsuth.......hpplshsshtslthlphlupllllllhshshhhhhpshKapphhppphD-Dtsc..ph.ppt.+phphuoIlhslshllshlsllls..hllshhsssss...Lhhslh.hlhhlhhlhhplhhhKhhphl+sh+hsthsshc.hcchhtuhDEuE+ptphcpuachhhslNthlL.slhllLhlluhhTs.splhulLllshIal.....Ylslthhhhs++aa ............................MK.............hLhalhhllLG.G....hlGhh..lGh.hhuph-.........cplhhs...sh.s..hsh..ISh......lshlI.hh.l...shh.l.shh.hh.+.+.uhKa+pLhp...c...Eh.D.-Dhs-..pY.lph.Rp......lth...GoIhhslps..lhh...hlslhI.....Vl.hh..tsssth...............hah....h....h...l...hhl...hhhhhphh..hhKh..t....h..p.c.hs.hhAs.c..scchlp.t.h.DEGERphpLptsF+hhht..s.hlL..hhhlhlh.lhSh.hT.G..Q.hhuh.L....Llh..AIal.....Y.shh.L.hs+Raa............................ 0 4 12 24 +11201 PF11369 DUF3160 Protein of unknown function (DUF3160) Pollington J, Finn RD anon Pfam-B_4384 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 38.00 34.70 21.10 20.80 hmmbuild -o /dev/null HMM SEED 631 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -12.86 0.70 -6.20 17 97 2009-01-15 18:05:59 2008-08-20 15:47:45 3 6 73 0 39 99 5 586.80 29 79.38 CHANGED sLspsshshLp+NGFVVl.........ts..ssppcplsph........YcsLc.p.........slPlFIToDSlLHlYHIQFD-TL+plEcccFastLWclspsLLssSlcsYs.....suoGc...+EAARRNsAYFuVAhsLLp..Pc..pht........................tpaphElPuhV+p-VEAELsLI-AppGhshSPIF....p......YpE...DYSQYlPRGHYT+S-pLpsYF+AhMWaGRhShLLcss...........p....................stp.....-AplQTlp....AhLIosphc.pcpcLhccWcRlYslTAFYVGhSDDLGPYEYhcALcsVFGsp....tsshsspslpcL+scLtchcs...PcIYGGTGthh....hss-ttsppLcsTcGFRhMGQRahPDSYlhpsLl.........................s......RhaPpGLDlMulLGS-RAtphL.cphs..SsapsYstpappL-sEFsuhsst-WN+NLYWuhLYuL..pPLhpsassGYPTFMQTpAWpDKpLsTALASWTELRHDTILYAKQsYs....psushs.pE........psshGYVEPsP-FYsRhLALT+MTpsGLs-hclLDcpuc..........pchppLcshLp+LhpISpKELENKcLT-E-Y-aI+sFGpplpsh..............htsVDtcspposlVADVaTss.....tstVLEEGsGhlDhllVAYc.sDGRlhlusGPVhSYYEFhQPhu-RLTDEcWR-ML.pspsPE..+PEWs .........................................stt.hthLtpNuFslh.........................s...p.pplhph........Yc.ptchp.....................phP.FlToD.hLchaHlhFDphL+plEcpphhstLhclsp...shhptp.hpphp.....p..pspp..hcpsAtpshs.aFulAhtLLp....................................p.plPtthppt...sptElph.Iput....ps...t.Sshh.s............hp-.....DYS.apPRGHYT+s-..pLppYF+uMMWhGphsFhhpsp......................................tphhp....Ahlls.phhp.....p.phhchWpplapshsFhhGpoDslshhchtphlppshstt.......h...hsppth.p......phh...tpl.tp..h.tt...spI.s................................p.ppstsaR....hMsQRah.Du.lhppLs...................................................ststt..RshP.pGLDl.hAshGocpAtplL.cph.t...ppap....pYptph.pplcpphtshstp.t.hppslYtsWlhsL....tsh.hpp......hs..pshPsFMpotsWppKpLsTuLuSWsEL+HDTILYuKQshs.......EhG.sss...t..............shGY..VEPssphap+hh.......sLsphstps.....Lp....p.....hsh......ls-ptp..........tphpplpphhphLhpIucKELp.sp..pLop--YchIp.h.Gs.pl-.h.................................hps.sts.spphslVADVhTss........ts..tlLc.GsGhstplhVsh..hsGpl..hlspGsVhSYYEFhpsh.s.c.RLTDEcWpchL....ptpsst.....hPtWh............................... 0 23 28 33 +11203 PF11371 DUF3172 Protein of unknown function (DUF3172) Pollington J, Finn RD anon Pfam-B_4527 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 55.60 54.20 20.20 19.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.78 0.71 -4.68 10 83 2009-01-15 18:05:59 2008-08-21 09:45:08 3 2 78 0 41 83 146 136.60 53 66.78 CHANGED FNhuTlA......lLGGVhVLGIGIGluhSSTsohsPpNVASpEhIDpuAPssElCVQaGASAhVhDhRlFlTLNPFsVaVoQPsMQPGCVLRRsNWslLEQcpLlou-QlR-CKsRMNTFGYsGsL-u.cPcIcClYQN-uApNLF ........hNhsTlAlLAGlhVLGIGlGhuhoSTsshsPpNlASp-hlDpusPsPElChQaGASAhVhDhRlFlTLNPFslaVoQPshQPGCVlRRsNW.ulLcp.ctlloscQlR-CKpRMNTFuasGsLcs.pPplcClYQs-supNhF.................. 0 12 30 39 +11204 PF11372 DUF3173 Domain of unknown function (DUF3173) Pollington J, Finn RD, Bateman A anon Pfam-B_4543 (release 23.0) Domain This family of proteins with unknown function appears to be restricted to Firmicutes. These proteins appear to be distantly related to HHH domains and are therefore likely to be DNA-binding. 25.00 25.00 25.20 29.10 21.60 24.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.66 0.72 -4.36 20 326 2012-10-03 02:11:09 2008-08-21 10:35:08 3 1 232 0 31 166 3 61.50 41 86.57 CHANGED ptTlo+pDLIclGappppApsII+pAKplhVp.+..............GasaYsNKRLshVPsulVEE.lLGlpl ........tTlopcDLlpl.GaspppApcII+pAKplhVp.+..............GasaYss+RLshVPtplVEE.lLGlpl................................. 0 5 15 23 +11205 PF11373 DUF3175 Protein of unknown function (DUF3175) Pollington J, Finn RD anon Pfam-B_4566 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 21.00 21.00 22.40 22.40 18.10 17.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.80 0.72 -3.96 14 130 2009-01-15 18:05:59 2008-08-21 10:43:47 3 2 128 0 55 117 6 83.60 60 71.96 CHANGED +WSpcVTE+SsALDLEtsVFp.cDPccIAtSLKRSA-pScRRKuoPFpSAMSMLsFYINRAG+sLscspRpsLEcAK-ELRctFGR ...............+WSpcVTcpSD.ALDLEtslFcpcsPccIAtSLK+SA-cScRRKusPFQSAMSMLsFYINRAG+sLscsRRpsLEcAKccLRcsFGR......... 0 12 29 40 +11206 PF11374 DUF3176 Protein of unknown function (DUF3176) Pollington J, Finn RD anon Pfam-B_4567 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.40 21.40 21.50 23.40 19.70 19.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.31 0.72 -4.19 34 177 2009-01-15 18:05:59 2008-08-21 10:46:33 3 4 49 0 146 180 1 104.10 27 17.65 CHANGED llulLhhhcs+shss.Wsh...hloLNsllSlLoTlspushhhslupuluQLKWhaFp.ppp...p.......LsDhphhDsASRG.saGulh.lL.....hphps+.pluslGul....lhllu.luhsPFsQQllp .........................lslLhhhcspslsp.Wsh.........hlohsslluhlsslspuslhhslupuluQhKWhhap...pt......p.......LtDhphhDpASRG..shGuhh.lL..........hphps....t...t........lusluul....l.hl....hs.hhhsPhhQphl.s................... 0 21 72 113 +11207 PF11375 DUF3177 Protein of unknown function (DUF3177) Pollington J, Finn RD anon Pfam-B_4580 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.00 25.00 74.10 74.00 21.10 20.60 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.43 0.71 -4.85 22 74 2009-01-15 18:05:59 2008-08-21 10:54:46 3 1 72 0 29 77 136 186.90 44 95.59 CHANGED hcsLVWLsYRLAshFslulPLlLLIWAhh++tsAl.RLLsIYW+VuSLLsIolhLhhsph............PlualouhlA.lLhslSl....WFWVDLNEELtDhPs.pPLsLsh+hWRWulTsaulluhhhshssLsCshshs....tss.CpsWLEsP.........lFchlFtushotuhhuFlGhluLlhYllsLlpalllRLP+QGRsAst ......psLlWhDYRLAllFslhlPLlLLIWuhhc+.culsRLLhIYW+VASLLhITlhLhlsth............slualouhhAplLIslSl....WFWVDLN-ElcDhss.psLtLshpsWRWAlThausluhlhplshLsCuhshs....sss.CplWL-sPh........hac.hFtushssuhhuFlGhluLllYllhLh.alllRLs+QGRsAh.t...... 0 5 19 27 +11208 PF11376 DUF3179 Protein of unknown function (DUF3179) Pollington J, Finn RD anon Pfam-B_4591 (release 23.0) Family This family of proteins has no known function. 19.30 19.30 23.00 22.10 18.70 17.50 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.62 0.70 -5.22 35 131 2009-01-15 18:05:59 2008-08-21 11:11:21 3 4 102 0 49 143 88 243.70 28 77.52 CHANGED P+DGIPAls.....sPpFhssspssh..lcss-sVlslplsGp..sRAYPh+ILsWHEIVN.Dpl.uGtPluVTYCPLCsouhs.F-cplss..tslsFGVSGpLhNSshlMYDR....pT-ShWpQhhGpAlsGsh....sGppLcplPshhpoWspa+pcaP-u.hVLupssuapRsYup........sPYss.........YDsssp..h......lassthsscc..lssppcVlGl.shtst.thAashspltpts.....t.plsspslslsacsstsS.....................thhsups....................................pclsphssF..WFAWt.....A..FaP-osl .....................ssIPuls.............pPpahs.stpsp...ltsp-.Vhsl.thsGp..s+AYPhphlhaHElVN..Dpl.ushslslTaCPLssouhs.a.cpph.ss.............t.hpFusoGhLhpushlhaDR....pTpShWpQhhGp..AlsGsh...........tGppLp.lP.shhpoWtpappt..aPcs.hVlsps......p..s.h..tRsYsp...........sPYts................Ytt.tt..............hhsst..spp..hsshphVlult.tt...th..........Aashstltptt.......plsttslhhhhpss.ts....................................h.................................................................................t.l..t...h..hFua........s..FhPps................................ 0 16 36 49 +11209 PF11377 DUF3180 Protein of unknown function (DUF3180) Pollington J, Finn RD anon Pfam-B_4592 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 24.50 24.50 55.10 55.00 24.40 24.40 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.77 0.71 -4.31 30 431 2009-01-15 18:05:59 2008-08-21 11:16:20 3 1 428 0 112 293 37 137.30 29 82.88 CHANGED LlusAlsuAlluall..lhtha.tshPs.lshhsuloLhllAllplhhAhhlRp+l.....husspptlcPltuARslsLAKAoAhsGAlluGhasGhhlalL.ptspltsuus-shsusluuluulALllAGLhLE+sC+lPs ...........lssslsuAsluahh..lhthh.sshPs.lPhhsslslhllAlsphhhAhtlRs+l...............hsssppt.lc.PhtuApsLsluKAuAhsGAllsGhahGhllhhl.tphplssstpcshsssluuluulAhssAGlaLEpsC+lP... 0 35 82 104 +11210 PF11378 DUF3181 Protein of unknown function (DUF3181) Pollington J, Finn RD anon Pfam-B_4595 (release 23.0) Family This family of proteins has no known function. 19.70 19.70 20.70 27.90 18.40 17.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.92 0.72 -3.97 23 76 2009-01-15 18:05:59 2008-08-21 11:23:09 3 1 75 0 34 78 96 87.90 43 84.25 CHANGED lccLsusIuD+lYlplupWHLYLGDAtLupsLA.Eshshl-pG..upsAAcpuL-ulpVslGGGpspLPLu+LlPsuQlp-LEEILEpa ..lcsLAupIG-plYIDlApWHLYLuDA.........+......LppsLAcchhshlpps...hscsslppsLcslpVtlGGG+pplPLh-LlPspphhcLh-lLEca.. 0 7 23 32 +11211 PF11379 DUF3182 Protein of unknown function (DUF3182) Pollington J, Finn RD anon Pfam-B_4440 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 26.00 26.00 26.70 44.20 25.40 25.90 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.98 0.70 -5.76 15 90 2009-09-11 05:33:34 2008-08-21 11:34:51 3 1 85 0 36 90 3 331.40 45 93.94 CHANGED hVlspsscstssp..HEtsTpttLucplApLhGhpauGshc......sstptsu..sYlVPscTLVs.spAppLGlpuscDLaGGlVP+uFlATKAIoHsLspssAssPsGWopsFupplpssVLPGaosFShsDARpAut+LLtsGsVRlK.spusGGpGQpllcssspLDstLuuhssspLtspGLVLEccLcpspTaSVGQlpluGllsSYaGsQpLThsssGpcVYGGScLhVVRGua-ALLtLsLsspsRpAVppApsYDpAAttsY...PuhhASRRNYDVA....pGhDupGtsR..sGVLEQSWRlGGASuAElAALcuFppDPslptVpAuT+ElYG.-stlPssApllYRGsDspsG.LoKYsp ............h.Vlsasscsts..sp..HEhtspttlAcplApLhGhpauGthc........sshttsu..hYhVPscTLss.tpAtpLGlpuspDLhGGlVPHuFlATKuIsHPLstss.ussPtGWsttFAppltsslLsGaosFShsDAppAutpLLt.pGsVRlK.spusGGpGQhllpDhspL-thLsuhs-ptLtppGlVLEcsLppshTaSVGQlplsGhhhSYaGsQp.TtsspGpcVYGGScLhVsRGuapsLhsl.sLsp.thRtAlppAphaDpAsttsa...PuhhASRRNYDlA.......pGhDupGt.+..sGVLEQSWRlGGASuAElAALpAFttDPuLptV+AuohElYt.s..hPssAplhY+GsDspsG.lhKYh.......... 0 4 13 24 +11212 PF11380 DUF3184 Protein of unknown function (DUF3184) Pollington J, Finn RD anon Pfam-B_4192 (release 23.0) Family This eukaryotic family of proteins has no known function. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 691 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.36 0.70 -6.59 4 258 2009-01-15 18:05:59 2008-08-21 11:50:57 3 9 118 0 96 260 13 177.60 23 32.61 CHANGED lssR....DRETDELRHSLRSVEQHVRWHRGRVVhVSPGHHPsWVDGAKNFLAGhCGuARVQALRSSGTHLRVTTVHQDAVMPYuhRLTVDSHsIEQpLWRVRNhTsVHVYMNDDYFVNRDVAITDLhNEYGGTIVRTE+GhltcGhpGssS.uoWsEGVtNTpLFNhhELDlpHEDaLPcsLl+pWpp......t.sQ.sspulhp.....hpc.lPs.............................luhschlssA....aspsshSpP..PTuhP+.R.R.....aYATHAPFVYCTNMaRaLssRYptEhAtsphp+RtRSApDLaVPFlYNAFIMARPWQASP+FLPYLhpL+puh+ttcscAh............................s..sPPhp...............IhL-N-DGCAPATLhts..ASEslauKFssNlctNcchlccVppssPLaFNINAGFooscAADQLRcFLHGhFPTPVYLE......................oSuuGsA...pptsLSRLFGDLMALPVVsVVSYEEGVCPLVRSLALAFAGHHRGGV+VpV-p+G....sAsLtEsRtsLsHRVhSAMPssACTYtcpVoVcsutRGESlAElARRAhsth....tGGVELPuTCGuGGAGLRVRGFVVDARTRusPlRSssALhcALAVPAQTLSLEDFRAVAVGPSEtDVVLVVSREDA-AKAVHWVNGASESDLLlTYPLPVEAYEDMsAEVRWS ............................................................ts...............................................................................................................................t..h...s..............Th.sS.sIE..lapl..shst.alYhNDDhhh.p.l..tphh........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t................................................................................................................................ 0 54 64 83 +11213 PF11381 DUF3185 Protein of unknown function (DUF3185) Pollington J, Finn RD anon Pfam-B_4606 (release 23.0) Family Some members in this bacterial family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 21.40 21.40 21.70 25.70 21.20 21.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.83 0.72 -4.21 19 111 2012-10-03 00:20:40 2008-08-21 13:37:02 3 2 105 0 46 85 6 58.90 45 81.59 CHANGED KllGlsLllsGlsLhhaGaphhsShuSphochhTGssTccshhhhlGGsVusllGlhhl ..+hIulALlVuGlVLLYFGh....QuacSlsssloRhFT........GoPo..sKTlhllsGGsVAsllGLhtl..... 1 13 27 37 +11214 PF11382 DUF3186 Protein of unknown function (DUF3186) Pollington J, Finn RD anon Pfam-B_4607 (release 23.0) Family This bacterial family of proteins has no known function. 24.30 24.30 24.40 24.70 23.60 24.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.82 0.70 -5.29 25 294 2009-09-11 00:08:05 2008-08-21 14:08:03 3 3 288 0 100 240 10 292.40 28 96.48 CHANGED MIshRaHllSlsAlFlALAlGlllGssh............LpsslhssLpschspLcpctspLpsptpslpppssss-satstlsstllsspLsu+sVsllphssusssssculpchLppAGAsVsuplsLp-pah..sssss-clpolsss.hhssustlsptss-sus...sthlu.slh.s........stssss-csslLssLpcss..hls..hp...sssshsusssllls.......Gsssss...tsssttstslschspuLsstGtus.....Vl...sGtptousssuhluhhRu....tssVSTVDslDsshGclsshLALtptl.sGt.sGpYGsussAsulhPs .................hIshR.HslSlsAlFLALAlGllLGush............hssslhssL...p...pphp.s..LppphspLps.ptsth.ppplssucsa.sppluspllpssLsu+sVsllph.ss.uscsDhsulschltp..AGuslsuplsLs.ppFh...sssps-pLpsllss.hhPs..GspLssphh.-tus.s..Gph..Lu.sLh.s...................................stsspspcsslLssLp..-sG...als....ap........ssthtsAsssVlls............Gss.ss........sssttstslschutuLsstus..us...................ll...sGp.t.ou.spsu....hlushRu.........sstlSTVDslDpt..sGplsshLALtp.l..sGt..sGpYGsutsApulss................................................................. 0 46 80 94 +11215 PF11383 DUF3187 Protein of unknown function (DUF3187) Pollington J, Finn RD anon Pfam-B_4660 (release 23.0) Domain This family of proteins with unknown function appear to be restricted to Proteobacteria. These proteins are likely to be outer membrane proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.90 0.70 -5.31 18 149 2012-10-03 17:14:37 2008-08-21 14:09:55 3 2 139 0 32 161 75 297.70 39 89.81 CHANGED hhhllhhhsshss.sssssshshuPhhshsQoPlpshtLoPpLRSuhsLssuphEhhhotohASlWu....po-phhhDY.psphsluhcaphssphplclphpahhtusspLDuhlpsFHDhFGlsQsGRscssccpaplph..s.ptGhphpsFpGcol.tuulolh....lpaplhpstpc..ulSlGuoLhasssssupFpss.uh-.ulplsauhpts.pash.......auslGhsahssspshhs....h+stshthuhGhcaphhspasLlsphchapGhh-ss..schucsSsElshGaRYhh.pssAlElullENlhNhDNSsDluFplGhRaph ....................................................................................................h...h.hhhhhhshsshssphpaGPhhsYAQuPltutuLoPpLRsGFoLP.sp.Elauuh.ohASlWu.........cossYthDaYpNQhslGl+WQhsscWQhELsYRas.AhsNpLDulThuFHDhFGlsQNGRDpVc+cRFpIsh..P.cptlthcDFsGsTL.suuholY....spYQlasscpH.....GLSL..Gu..oLYYNp..sGhhchs.....pFE..QuLQLNYoYpp..s....sHpl....auhlGl.s.a.+sssss..s....lsh+.cs...oh...s...husGYcYplsspH.pLhspa+..a.Y..QGss.-us....pEhp...cs...us.Ehl..L.GYRYhh.ssSALEluls....ENl.hssDNSTDIAhpluYRa+.................................................... 0 10 20 27 +11216 PF11384 DUF3188 Protein of unknown function (DUF3188) Pollington J, Finn RD anon Pfam-B_4573 (release 23.0) Family This bacterial family of proteins has no known function. 24.60 24.60 25.00 24.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.14 0.72 -4.39 33 165 2009-01-15 18:05:59 2008-08-21 14:10:22 3 1 164 0 20 53 76 50.00 44 76.71 CHANGED luAPhLIhlul.luhh...pRpGsD+ltulPsllsGh..uLllpusVsRth+R++ ..NuLFLsSIGhlIlLa...ohsspsthhsL.uLsTGl..hllslGshhhh+t+K.......... 0 5 9 17 +11217 PF11385 DUF3189 Protein of unknown function (DUF3189) Pollington J, Finn RD anon Pfam-B_4499 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes 20.40 20.40 25.90 22.30 19.40 18.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.56 0.71 -4.64 23 202 2009-01-15 18:05:59 2008-08-21 14:20:44 3 2 152 0 63 128 0 145.10 44 89.91 CHANGED hlIYpsaGGsHSSslAAuIHLspLPhc+hPspcElhplPha-php.pp-hGplhYhGpDEhGNcVYslGhts.tphltsslcshh.plhphp...ppcllllso..hlshhh+lGGahSR+htls.hGRPllshGhppsY.plsplVcpsKpp .......................................hhIYpsaGGoHSoslAAAhHLscLPsD.Rh.oKEEILslsaFsKLp.scDhG+llFhGhDEtGN.VYolGsts.u+lVlPAh+cLh.clLppp...pEcIlhssToPsVsl.MphGGhhSRRh+lshIGhPLllaGsphshcsl.cLVphsKc.t................................ 0 37 50 58 +11218 PF11386 VERL Vitelline envelope receptor for lysin Pollington J, Finn RD anon Pfam-B_1349 (release 23.0) Family VERL, the egg vitelline envelope (VE) receptor for lysin, is a giant unbranched glycoprotein comprising 30% of the vitelline envelope. Lysin binds to VERL and creates a hole as VERL molecules lose cohesion and splay apart. These proteins are important in the mediation of fertilisation [1] 25.00 25.00 26.50 26.80 23.60 22.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.65 0.72 -4.01 5 193 2009-01-15 18:05:59 2008-08-21 16:03:27 3 6 11 0 0 191 0 76.70 70 39.52 CHANGED VPITRESGINMMhIHYspNcosDSPGMCVFtGPYSVPKNDTVVLYTVTARLKWSEGPPThLSIECYMPKSPV..APEPEA ......VPITpEpGINMMLIQYoRN..ch..L.DSPGMCVFWGPYSVPKNDTVVLYTVTARLKWSEGPPT.LSIECYMPKSPs..APKPE....... 0 0 0 0 +11219 PF11387 DUF2795 Protein of unknown function (DUF2795) Pollington J, Finn RD anon Pfam-B_1395 (release 23.0) Family This family of proteins has no known function. 21.00 21.00 21.10 21.00 20.70 20.20 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.80 0.72 -4.09 58 486 2009-01-15 18:05:59 2008-08-21 16:11:08 3 5 367 0 217 407 134 44.80 39 51.93 CHANGED L..pshcYPusKppLlppA+cssAss.c.llcsLcplP.D+c..Ysoss-V ......LpsssaPAoK--Ll-hAt+sGAs..-.Vl-sLppl..Dcs...Y-ohp-l...... 0 58 141 184 +11220 PF11388 DotA Phagosome trafficking protein DotA Pollington J, Finn RD anon Pfam-B_001493 (release 23.0) Family DotA is essential for intracellular growth in Legionella [1]. DotA is thought to play an important role in regulating initial phagosome trafficking decisions either upon or immediately after macrophage uptake [2]. 25.00 25.00 37.20 33.00 20.20 18.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.32 0.72 -3.99 2 138 2009-01-15 18:05:59 2008-08-21 16:11:59 3 1 11 0 2 137 0 103.60 83 27.67 CHANGED FDosTGLDcSsFDPsQLTKPFGKTCQssYuLLCsWFQNKSDKLlQIQSLIsGsPALuQDGVKQPDLSssPpRt.VpGPhSSTVYGFlNNSMMVQLPGQPGIKPLT ........................................FDSsTGLDsSsFDPsQLTKPFGK.TCQGTYALLCTWFQNKSDKLVQIQSLIN.GsPALSQDGVKQPDLSPsPpRthVEGPLSSTVYGFlNNSMMVQLPGQPGIKPLT. 0 1 1 2 +11221 PF11389 Porin_OmpL1 Leptospira porin protein OmpL1 Pollington J, Finn RD anon Pfam-B_001515 (release 23.0) Family OmpL1 is a member of the outer membrane (OM) proteins in the mammalian pathogen Leptospira. Specifically, it is a porin [1]. 25.00 25.00 49.90 48.90 19.00 18.20 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.82 0.70 -5.34 3 83 2009-01-15 18:05:59 2008-08-21 16:12:22 3 1 36 0 4 74 1 247.50 83 87.46 CHANGED ITKDGLDAATYYGPVRSTDTCTVGPSDPTCVQNPuKPAGEGNYlGVAPRKAIPAENRLITLDRTTGGlINARSTKGAMVGGNLMVGYESDFGKYFFWRVAAEYTQKISGGITKADIAGYNIVDMTWGFSSIVIPATVGIKLNVTEDAAVYMGAGLNYFNGGWSLNGSNNIKGGHDILAAAGAGSVANLLSDGTDPITTREHVRFRTSGIAPNFLIGTQARVTDKGHVFLELETIMSAAYAVGKTQSlGGAsTLuPFPAYPIVVGGQI .........................ITKDGLDAATaYGPVRSTsTCT.VussDPTCVQNPuKPsGEGNYlGVuPRKAIsAENRLITLDRTTGGhINARSTKGAMVGGNLMVGYESD..FGK.YFFWRVAAEYTQKISGGITKADIAGYNIVDMTWGFSSIVIPATVGIKL.NVTEDAAlYMGAGLNYFNGGWSLNGsNNIKGGaDILAAA...Gss..oVANLLuDGTD........PlTTREHlRFRsSGIAPNFLIGTQARVTDKGH.VFlELETIMSAAYuVGKTQShGGAosLuPFPuY........................... 0 2 3 3 +11222 PF11390 FdsD NADH-dependant formate dehydrogenase delta subunit FdsD Pollington J, Finn RD anon Pfam-B_1352 (release 23.0) Family FdsD is the delta subunit of the enzyme formate dehydrogenase. This subunit may play a role in maintaining the quaternary structure by means of electrostatic interactions with the other subunits [1]. The delta subunit is not involved in the active centre of the enzyme [1]. 20.70 20.70 21.20 44.80 20.50 19.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.05 0.72 -3.88 52 250 2009-01-15 18:05:59 2008-08-21 16:12:50 3 2 245 0 113 242 44 61.60 45 72.72 CHANGED +LlcMANQIusFFpo..scsculsslAsHlp+FW-PRMRcpLhsh.lp......t.uuts.LsPlVhcAl .....+LlcMANQIusFFpoh.s+c..-AlsGlAsHIp+FW-PRMR+pLhshl-......s..uut...s..LsPlVhcAl.............. 0 21 62 82 +11223 PF11391 DUF2798 Protein of unknown function (DUF2798) Pollington J, Finn RD anon Pfam-B_1194 (release 23.0) Family This family of proteins has no known function. 30.40 30.40 30.40 30.50 29.90 30.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -8.73 0.72 -4.22 84 363 2009-09-10 17:28:17 2008-08-21 16:15:59 3 2 310 0 117 347 466 59.20 27 71.18 CHANGED llMShhMuhl..hoslhohhsh.Ghs.s..sFhtpWhpuahlAaslAhshsl.llsPhlp+lstpls ............hhhuhhMuhl...hShlhohhsh.Ghss.....talttWh.p.uahlAaslAFsssl.llhPll++lsthh....... 0 24 71 99 +11224 PF11392 DUF2877 Protein of unknown function (DUF2877) Pollington J, Finn RD anon Pfam-B_002434 (release 23.0) Family This bacterial family of proteins are putative carboxylase proteins however this cannot be confirmed. 25.00 25.00 26.70 26.50 22.60 23.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.31 0.72 -3.85 21 929 2009-01-15 18:05:59 2008-08-21 16:23:37 3 3 528 0 50 363 13 109.10 43 39.83 CHANGED hlGhG.GLTPSuDDhLsGhh.uhhhh.t...pshpphht.htphlths..pTTtlStphLppAhpGphspsltpLhtslhpst..spstpslcplls.lGuoSGsDhLhGlhhuhph ................hlGhG.GLTPSuDDhLoGhl.sshahsu...........tsucphptp..htp..s..ph.p.sTTtlSsthLchAhQGhhspslh+F.....lHsl.ts......t...............ps......s.pAI-...plht.lGpoSGsDhLhGhhhGppL.......... 0 24 36 42 +11225 PF11393 IcmL Macrophage killing protein with similarity to conjugation protein Pollington J, Finn RD anon Pfam-B_002787 (release 23.0) Family IcmL contains two amphipathic beta-sheet regions, required for the pore-forming ability which may be related to the transfer of this protein into a host cell membrane [1]. The icmL gene shows significant similarity to plasmid genes involved in conjugation however IcmL is thought to be required for macrophage killing. It is unknown whether conjugation plays a role in macrophage killing [2]. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.11 0.72 -4.11 23 289 2009-01-15 18:05:59 2008-08-21 16:26:36 3 3 199 0 37 173 10 101.70 27 48.53 CHANGED su+Ih.l.Ph-pPthspstlhsaupcslhpuashDassYcsplsphtsp.FTppGats.ahsuLppSshLcsl+sp+hslsusss..usspllppshh..tshhtWplphPlpl ...................................shppshhspsthhpauspslspuFshDa..spacppl.ssltst.aos.cGatt.ahs.AL.p.s.SsllcslKcc+h.sls.sss..Gsullsppsph....tshhhWphphPlhh........... 0 19 23 32 +11226 PF11394 DUF2875 Protein of unknown function (DUF2875) Pollington J, Finn RD anon Pfam-B_002814 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 20.70 20.70 21.70 22.10 18.00 17.90 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.49 0.70 -5.92 8 205 2009-01-15 18:05:59 2008-08-21 16:27:43 3 2 67 0 20 260 0 232.10 34 83.25 CHANGED AppuppYsLElRGlGlslsp.pQpcIW+cIccKssNauSlhSQcPcDYssS.soRps-hclsspsAFKauApcuV-YWPlPVhllGPP+shccua..RAAusIsusRppAoLGVTLFLWQ-DtNTscupuMlE+LFsFFDsHPDVPpALlhSpDGslsRshhRsPG...oss..psuphVPshPDSMsAlLVoRSDRVDRhIRPYAV-psEtlspssTpaDlsKLWNFaW-psc.....uFssaYE..AspKptGscsP.ususMSosaWQupLPsLWKTIuNKGPGpFcPSPalPVRWspWQVKpFDsAPlLGYLHRPI+VpLsD-HGKPLKsAtQAcAL+AGWtQAlsTLP-GpK.PsRVFYDTTsspthtIsLsQALHsls..upul-LuDV+EGYDIGRRl.GNTGVSSsLVQIuLAlIASYcDGGsSAsVptpssGoATIhMVSPPDtApKAttsQsp.GscPF ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 4 8 11 +11227 PF11395 DUF2873 Protein of unknown function (DUF2873) Pollington J, Finn RD anon Pfam-B_002845 (release 23.0) Family This viral family of proteins has no known function. 25.00 25.00 57.00 57.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.32 0.72 -4.39 2 75 2009-01-15 18:05:59 2008-08-21 16:30:08 3 1 73 0 0 10 0 43.00 96 97.46 CHANGED MsELTLIDFYLCFLAFLLFLVLIMLIIFWFSLElQDlEEPCsK MNELTL..IDFYLCFLAFLLFLVLIMLIIFWFSLEIQDLEEPCTK. 0 0 0 0 +11228 PF11396 DUF2874 Protein of unknown function (DUF2874) Pollington J, Finn RD anon Pfam-B_002962 (release 23.0) Domain This bacterial family of proteins is probably periplasmic and of unknown function.\ There may be between one and six copies of this domain per sequence. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.79 0.72 -4.00 181 1589 2012-10-01 23:09:26 2008-08-21 16:38:52 3 9 279 21 355 1152 36 57.30 21 63.99 CHANGED psl.shsplPss....lpstlppp..asstp.lpc....hphpp........phYclclp.......tpphclhasppGphl .................h..hstLPps.......lpshlppp...asssp.ltc.....lcpcp........stYclcls.........sphclhFstpGphh..... 0 136 252 283 +11229 PF11397 GlcNAc Glycosyltransferase (GlcNAc) Pollington J, Finn RD anon Pfam-B_002901 (release 23.0) Family GlcNAc is an enzyme that carries out the first glycosylation step of hydroxylated Skp1, a ubiquitous eukaryotic protein, in the cytoplasm [1]. 20.40 20.40 20.40 20.60 20.30 20.00 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.12 0.70 -5.33 25 216 2012-10-03 05:28:31 2008-08-21 16:46:00 3 11 113 0 109 246 398 269.10 25 60.42 CHANGED oIFVslASYRDsEhhsTLcsLhspAspPcRLalGVs.Qpts..s-ss...........................................................................................................ssFhsc...........................tht.........h.tsplphhslssspu+GssaARahspthYpsEcYhLplDSHpRFl.sWDppllshlppLp.........ssKsVLSsYPsGYps...tsppphpcpsssththspF.sspGhlplp..........................uphhss.pp.ttP.....l.ssalAuGFlFucGp.Fs+-VPaDPal..hFpGEElhhosRhaT+GaDlYsPs+sl..laHhYsR.....sspsKhWs-pp....................tpWhtppppScpRsphLL..................................shtsspss........hhthshYGlGspRolspa.pauGlshtp ......................................................................................................................lFlsluuaRD...p.hh.Tl.shhppAttP.ppl.hulh.Q.....t.s.........................................................................................................................................................................................................................t..h.tsplphhphph.pupGsshARahsp....t....ha.....ts...EpahhplD.SHhpFh.tWDp.hlt.hptht.................................s.pslLotYP..s...hp...............ttt....t.p.p.h...h.hhhht.h....ttp..Gh.hphh..........................................u.t.ht..............P...........h.t..ahu.uG.a..Fupup.hhpcV.P.hDPph...hF.GEE.hhh.......usRhaTpGaDh.YsPpp..s..l..haH..Ypp.........tptsphaps.........................th....t..pphu.hpRhhhhl..................................................................................h..ht.ashGp.Rohtpa..h.sls...t.................................................................................. 1 60 86 105 +11230 PF11398 DUF2813 Protein of unknown function (DUF2813) Pollington J, Finn RD anon Pfam-B_002207 (release 23.0) Family This entry contains YjbD from Escherichia coli (Swiss:P75828), which is annotated as a nucleotide triphosphate hydrolase. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.24 0.70 -5.38 21 824 2012-10-05 12:31:09 2008-08-21 16:54:35 3 5 785 0 109 1485 191 326.10 57 61.92 CHANGED MaLERIEIsGFRGIpRLSLslcp.sVLIGENuWGKSSLLDALSLlLss-scLYpFshpDFHhs.u.ppsps+cLpllhTFpEpc.s.-ppstRY+pl..pslWs.scs.GhcRIYYRlpuphs.sssVpTphuFLDtpGps..lslcch-pLsppLlplHPVlRlRDA...RRhtptph.tt.t.p........Rlp+clcphhRcLhspPtplsc..sEl+puLpAhppLl-HYFu.pstppsp.+ph+cp.ppsppsaph..hpslsphlcpsss..+phRllLlGllsshLpA+Gshp..Lc+tARPILLlEDPEsRLHPhMLulAWpLLshLPhQ+IsTTNSuELLS.VPLcpIpRLVRpoc+stuapLs.psLop-DhRRIuFHIRaNRstA ...............................................................................................................................................................................................................MhLERV.EIVGFR....G.I.....N.R.......L..S...L.h.L...-...........p.......N...s.....V...L..I...GENAWGKS...S...LL.D...A...L.......T.L.....L......L.......S.......P..........E.......s.....c....L.....Y......H......F...p..c...c......D.....F..a...a....Ps.....G....D......h.........p......u....+.E..+...H......L.p....I...I....L.........T..Fc......E.o...sG.....+.....a........+.s.....p..R.Y..R.sL...........css...W...........s.s.s.p.D.....G...hH...R..IaYRl..E....GE.....pu...t....D....G.......o....V....h....Th..R..o...F...L....D...p...-.....GpP.......lsl...-..c.....I.....s......c...s......+.cL....l..RL.h.....PVL....R....L...RDAR............FhRRlRs.us...s.s.N.sss.s-.........................sss+QLD...L...s......REL.ss.p..P.Q..p..L.oc.........GpI..R..Q..G.L.S...A.M.sQ.L.LEHYF......................SE....Quu.....u.....p......s..c.h.R...hh...Rc..+..s....p.s...c...pcu.W+h.............LD.I..N.RMI....c....cP.s..u..........RS.h..RlIL.....L.GLF..uTL......L.Q.AK...G..o.lc......L.c+...c..A...R.P....LL.L.lEDPETRLHP...I.M.......L.S..V....A.W....p....L.L......s........L....L........P......L....Q....R....I.....sTTNSGEL..L..S..L.s..PlE.pls..RLVRESu.R.V..AAaRL..GPsG..............LS.sEDuRRIu..FHIRFNRsSu............................................................................................................................................................................................................................. 0 15 46 81 +11231 PF11399 DUF3192 Protein of unknown function (DUF3192) Pollington J, Finn RD anon Pfam-B_002991 (release 23.0) Family Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. 21.80 21.80 21.80 22.40 20.20 21.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.21 0.72 -4.09 32 113 2012-10-01 23:09:26 2008-08-21 16:59:05 3 1 64 0 48 96 32 102.80 40 79.35 CHANGED susVlhlh-ssssphs.......W-c.........+pthN+pplucLpLGt..ohspVhslhGsuDFoEAhpps.......cpplpVLFYRTp+h+uDGhTTKDECTPLlFcNspL..luWGpsu.Ypph .........................................susVl.lh-spspths.......Wcc..ppthN+pplscLsLG...ohspVhshhGssDFoEAhtpp.......cpplpVLFYRTp+....p+SDGhTT.KDECTPLlFcNspL..luWGpsAYpph............... 0 8 20 34 +11233 PF11401 Tetrabrachion Tetrabrachion Pollington J, Finn RD anon pdb_1fe6 Family Tetrabrachion forms a parallel right-handed coiled coil structure with hydrophobic interactions and salt bridges forming a thermostable tetrameric structure. It contains large hydrophobic cavities. No function is known for this family of proteins [1]. 20.50 20.50 21.60 114.20 18.10 17.00 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.60 0.72 -4.17 2 2 2009-01-15 18:05:59 2008-08-22 10:30:29 3 1 2 8 1 5 0 49.00 100 3.22 CHANGED IINETADDIVYRLTVIIDDRYESLKNLITLRADRLEMIINDNVSTILAS IINETADDIVYRLTVIIDDRYESLKNLITLRADRLEMIINDNVSTILAS 0 1 1 1 +11234 PF11402 Antifungal_prot Antifungal protein Pollington J, Finn RD anon pdb_1afp Family Antifungal protein consists of five antiparallel beta strands which are highly twisted creating a beta barrel stabilised by four internal disulphide bridges [1]. A cationic site adjacent to a hydrophobic stretch on the protein surface may constitute a phospholipid binding site [1]. 25.00 25.00 41.10 40.80 23.90 23.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.05 0.72 -3.77 6 27 2009-01-15 18:05:59 2008-08-22 10:34:21 3 1 23 2 9 23 0 52.00 53 56.43 CHANGED ApYsGKCaKKDNhCKYKspuGKTsIsKC.s...K+Cs+DGsKCEaDSY+sKsh l.cYhGKCTKu-NpCKYKsDpGKsslppCPphs..NKKCsKDGNpCcaDShs+...... 0 0 2 6 +11235 PF11403 Yeast_MT Yeast metallothionein Pollington J, Finn RD anon pdb_1aoo Family Metallothioneins are characterised by an abundance of cysteine residues and a lack of generic secondary structure motifs. This protein functions in primary metal storage, transport and detoxification [1]. For the first 40 residues in the protein the polypeptide wraps around the metal by forming two large parallel loops separated by a deep cleft containing the metal cluster [1]. 25.00 25.00 93.70 93.70 21.20 19.80 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.76 0.72 -3.81 2 7 2009-01-15 18:05:59 2008-08-22 10:37:10 3 1 6 6 2 4 0 40.00 100 65.57 CHANGED QNEGHECQCQCGSCKNNEQCQKSCSCPTGCNSDDKCPCGN QNEGHECQCQCGSCKNNEQCQKSCSCPTGCNSDDKCPCGN 0 2 2 2 +11236 PF11404 Potassium_chann Potassium voltage-gated channel Pollington J, Finn RD anon pdb_1b4g Family Fast inactivation of voltage-dependant potassium channels occurs by a 'ball-and-chain'-type mechanism. It controls membrane excitability and signal propagation in central neurons [1]. Inactivation is regulated by protein phosphorylation where phosphorylation of serine residues leads to a reduction of the fast inactivation [1]. 20.00 20.00 20.20 23.80 17.90 19.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.18 0.72 -4.16 2 56 2009-01-15 18:05:59 2008-08-22 11:11:04 3 3 29 3 24 54 0 28.50 76 4.63 CHANGED MlSSVCVSSh.+GRKuGNKsssKsChpt-Ms ..MISSVCVSSY.RGRKSGNKPPSKoCLKEEMA... 0 2 7 13 +11237 PF11405 Inhibitor_I67 Bromelain_inhib; Bromelain inhibitor VI Pollington J anon pdb_1bi6 Family Bromelain inhibitor VI is a double-chain inhibitor consisting of a 11-residue and a 41-residue chain. This protein is the 41-residue heavy chain which is joined to the 11-residue chain by disulphide bonds. The inhibitor acts to inhibit the cysteine proteinase bromelain [1]. 25.00 25.00 298.50 103.10 18.80 17.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.45 0.72 -3.91 2 3 2009-09-25 10:05:36 2008-08-22 11:27:54 3 1 1 2 0 4 0 41.00 95 50.00 CHANGED -EYKCYCsDTYSDCPGFCKpCKAEFGKYICLDLISPNDCVK EEYKCYCTDTYSDCPGFCKKCKAEFGKYICLDLISPNDCVK 0 0 0 0 +11238 PF11406 Tachystatin_A Antimicrobial peptide tachystatin A Pollington J anon pdb_1cix Family Tachystatin A contains a cysteine-stabilised triple-stranded beta-sheet and shows features common to membrane-interactive peptides. Tachystatin A is thought to have an antimicrobial activity similar to defensins.Tachystatin A is also a chitin-binding peptide [1]. 25.00 25.00 39.90 39.90 22.80 18.70 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.57 0.72 -4.10 2 3 2009-01-15 18:05:59 2008-08-22 11:36:20 3 1 1 1 0 4 0 36.30 96 82.58 CHANGED YSRCQLQGFNCVVRSYGLPTIPCCRGLTCRSYFPGSTYGRCQRa YSRCQLQGFNCVVRSYGLPTIPCCRGLTCRSYFPGSTYGRCQRa 0 0 0 0 +11239 PF11407 RestrictionMunI Type II restriction enzyme MunI Pollington J anon pdb_1d02 Family Type II restriction enzyme MunI recognises the palindromic sequence C/AATTG. It makes contact with the DNA via the major groove [1]. 25.00 25.00 85.10 84.90 18.90 17.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.61 0.71 -4.70 2 9 2012-10-11 20:44:46 2008-08-22 12:45:49 3 1 8 2 3 11 4 168.90 46 99.93 CHANGED MGKpELpsR.sWQthuGLhuptAEpph.sVF.A.FpGTcYVlhccPKcLKslYup............NP.......paGVS.DaAIpNpcT+KhLasEIKpQ-GaV.Gcs.PpsGRGNAHERSCKhFTPGLLKshpplusl.s.thLPFWlVapGcITRDsKRsREITaWaDcYtspaFhW+ss.Sup.LlpHFpcpL++hLD ....Mupp-LptRtsWQs.SG..h..tAtsAEpshhssFptsFc...sTcYhlpp+Pp-hKslYspV.Lstp.httIasP......p........hhp......aGlS.DaAIpNpcTtKhlasElKRQDGWVEGtp..SsGRGNAHER.CKhFTPGLhcshRphutl.s.thLPFWlVh.GDITRDPpRsREIsaWappYptpaFhWRss.stpsLlpHFpp.Lh.hL...... 0 3 3 3 +11240 PF11408 Helicase_Sgs1 Sgs1 RecQ helicase Pollington J, Finn RD anon pdb_1d8b Family RecQ helicases unwind DNA in an ATP-dependent manner. Sgs1 has a HRDC (helicase and RNaseD C-terminal) domain which modulates the helicase function via auxiliary contacts to DNA [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.46 0.72 -4.15 5 29 2012-10-03 03:05:55 2008-08-22 12:54:14 3 2 28 1 20 63 15 77.40 40 5.87 CHANGED ElscLshuYERLRslulslGNRMNPPlssaM.PDslLKKlAshLPATE-EFlsLlGsN-ss.sRKYKYFKcTltcLRK+Rpc .......EhsslphsYc+LRElSlslGsRMsP.P.l.ss.Fh.sDslLKKhAshLPhs-ppFspLsslpcp..pc+F.KY..FKsTlhcLp+cRp.p.............. 0 1 10 18 +11241 PF11409 SARA Smad anchor for receptor activation (SARA) Pollington J anon pdb_1dev Family Smad proteins mediate transforming growth factor-beta (TGF-beta) signaling from the transmembrane serine-threonine receptor kinases to the nucleus [1]. SARA recruits Smad2 to the TGF-beta receptors for phosphorylation [1]. 25.00 25.00 27.00 27.00 20.50 18.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.10 0.72 -4.04 3 74 2009-01-15 18:05:59 2008-08-22 13:01:21 3 6 56 3 44 72 0 40.10 79 3.24 CHANGED uRSPNPNNPMEYCSTIPPaQQAssSP..uSPPPSVMVPVGV ...pSPNPNNPAEYCSTIPPLQQAQ.A..SGsLuSPPPTVMVPVGV. 0 8 15 28 +11242 PF11410 Antifungal_pept Antifungal peptide Pollington J anon pdb_1dkc Family This peptide has six cysteines involved in three disulphide bonds. It contains a global fold which involves a cysteine-knotted three-stranded antiparallel beta-sheet along with a flexible loop and four beta-reverse turns. It also has an amphiphilic character which is the main structural basis of its biological function [1]. 21.00 21.00 21.60 21.30 20.90 20.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.43 0.72 -3.86 8 33 2009-09-11 11:38:32 2008-08-22 13:09:29 3 2 21 2 11 33 0 33.30 43 33.95 CHANGED CIsNGutCpsDuu.ssCCSGFCap.p.PG..hshGhC+sR .......IssGtsCptDGShG.CsSGaChp.t..s...s.GhC+........ 0 1 4 9 +11243 PF11411 DNA_ligase_IV DNA ligase IV Pollington J anon pdb_1ik9 Family DNA ligase IV along with Xrcc4 functions in DNA non-homologous end joining. This process is required to mend double-strand breaks. Upon ligase binding to an Xrcc4 dimer, the helical tails unwind leading to a flat interaction surface [1]. 21.20 21.20 21.20 21.90 20.80 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.59 0.72 -4.21 7 68 2009-01-15 18:05:59 2008-08-22 13:42:40 3 9 55 4 47 66 1 35.80 55 4.08 CHANGED ST+cHFAcEYDpYGDSYhsDTsttpL+-VFpplpss .....ST+cHFA+EY.DsYGDSYasDTDh.sQLKEVFstI+s...... 0 14 18 30 +11244 PF11412 DsbC Disulphide bond corrector protein DsbC Pollington J anon pdb_1jpe Family DsbC rearranges incorrect disulphide bonds during oxidative protein folding. It is activated by the N-terminal domain of DsbD, a transmembrane electron transporter. DsbD binds to a DsbC dimer and selectively activates it using electrons from the cytoplasm [1]. 21.60 21.60 21.70 21.60 21.20 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.96 0.71 -4.27 187 2597 2009-01-15 18:05:59 2008-08-22 13:54:41 3 21 2012 10 617 2080 1106 118.10 24 21.34 CHANGED pschLssppA.h..........tsssp...pltlphp.lssGaalYhcph..th.....ph.....pssssh....hsphpaPss.p...hhpD.th...ps.sYc.spltlsls..lssss............tth...plplsaQuCs-..slChPspsphhhhhs ....................................t...hl..ppA.F.hph.........ppspp....plslphp..lp.s.Gaa.lYp.cph.........h.......................ph.ssspsh.....hup.h.p.h.P.tu..p.....hh..p...D.....pha.u...p...s...phYc.sp...l.slsls....lptss......................ssh...plplsa..QGCu-...u..hCYPPpsphh....s............ 0 170 352 492 +11245 PF11413 HIF-1 Hypoxia-inducible factor-1 Pollington J anon pdb_1lqb Family HIF-1 is a transcriptional complex and controls cellular systemic homeostatic responses to oxygen availability [1]. In the presence of oxygen HIF-1 alpha is targeted for proteasomal degradation by pHVL, a ubiquitination complex [1]. 17.80 17.80 17.80 18.10 16.40 17.70 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -7.85 0.72 -4.77 17 330 2009-01-15 18:05:59 2008-08-22 14:00:38 3 13 102 4 83 283 0 34.00 61 4.79 CHANGED .sspshsDLDLEMLAPYIPMD.DDFQLsslst.s. .......s.hspshs-LDLEMLAPYIPMD..DDFQLpshs..p......... 0 4 15 35 +11246 PF11414 Suppressor_APC Adenomatous polyposis coli tumour suppressor protein Pollington J, Finn RD anon pdb_1m5i Family The tumour suppressor protein, APC, has a nuclear export activity as well as many different intracellular functions. The structure consists of three alpha-helices forming two separate antiparallel coiled coils [1]. 22.70 22.70 23.30 23.50 22.60 22.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.58 0.72 -4.20 13 215 2009-01-15 18:05:59 2008-08-22 14:08:54 3 43 72 1 114 197 0 80.70 35 6.60 CHANGED ssashL+phcpLEcE+-hLLtGL-tlE+u+-WYhsQLpslQcp.cplGphusth-hho-hppcpLshphs+lpclNcsLttLhp ...................ht.hphhcELEpE+....slLLtsL-t.E+t+-WYhs.QLpslpcRhc......pLstscs.......h...........shtoDhppcp.Lphpttplpps.cphhshh................... 0 20 29 61 +11247 PF11415 Toxin_37 Termicin; Antifungal peptide termicin Pollington J anon pdb_1mm0 Family Termicin is a cysteine-rich antifungal peptide which exhibits antibacterial activity. A cysteine stabilised alpha beta motif is formed due to an alpha-helical segment and a two-stranded antiparallel beta-sheet [1]. 25.00 25.00 41.00 41.00 19.80 16.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.18 0.72 -4.33 8 150 2012-10-01 23:31:40 2008-08-22 14:24:34 3 1 18 1 0 139 0 34.90 64 56.29 CHANGED CsFpSCWAoCQtQHGIYFRRAaCDGSpCpCValNG ..CshppCWAsCQApHGRYFRRAYC-GShC+CVFNNG.... 0 0 0 0 +11248 PF11416 Sed5p Integral membrane protein Sed5p Pollington J, Finn RD anon pdb_1mqs Family Sed5p interacts with Sly1p , a positive regulator of intracellular membrane fusion, allowing SM proteins to stay associated with the assembling fusion machinery. This allows for participation in late fusion steps [1]. 21.30 21.30 22.70 21.40 18.60 17.90 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.64 0.72 -7.24 0.72 -4.51 11 45 2009-01-15 18:05:59 2008-08-22 14:31:23 3 1 44 1 30 42 0 28.10 50 8.41 CHANGED h.sIpNRThEFQQsVsoYcKhN+Kpsspt .M.sIpsRThEFQQsVhoYcKpNKp.ppp....... 0 5 17 29 +11249 PF11417 Inhibitor_G39P Loader and inhibitor of phage G40P Pollington J anon pdb_1no1 Family G39P inhibits the initiation of DNA replication by blocking G40P replicative helicase. G39P has a bipartite stricture consisting of a folded N-terminal domain and an unfolded C-terminal domain. The C terminal is essential for helicase interaction [1]. 20.40 20.40 20.80 20.70 20.10 20.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.42 0.72 -3.99 2 43 2009-01-15 18:05:59 2008-08-22 14:38:22 3 1 41 3 6 44 42 67.20 27 43.94 CHANGED MI.c-slpILphlpthYP...tchpPsDhKshVphWpphLt-Y.hp.l..slpchhtsNKFPPoVu-llcA .........................Mhcp-shcllthlpssYP....php...p...s........c..t....hlslWhphL.cDhsaphlttslcpal..t..ss.p..aPPolA-lh..p............ 1 4 5 5 +11250 PF11418 Scaffolding_pro Phi29 scaffolding protein Pollington J anon pdb_1no4 Family This protein is also referred to as gp7. The protein contains a DNA-binding function and may halve a role in mediating the structural transition from prohead to mature virus and also scaffold release [1].Gp7 is arranged within the capsid as a series of concentric shells [1]. 25.00 25.00 28.00 80.70 22.70 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.27 0.72 -3.67 3 7 2009-01-15 18:05:59 2008-08-22 14:45:34 3 1 5 11 0 9 0 91.00 65 97.55 CHANGED PLchEEHE-ILNKLsDPELscSERTEALQQLRssYGSFlSEYsELTcApEKLsAEK-DLIVSNSKLF...RQlGLTcKKEE..EcKQc-lSETITIEDLEup .Phc.-pHE-ILNpL.DPELspSERTEALQQLRssYGSFlSEYsDLTcopEKLsAEK-DLIVSNSKLF...RQlGlTccpEE..-hKpt-lSETITIEDLEtp 0 0 0 0 +11251 PF11419 DUF3194 Protein of unknown function (DUF3194) Pollington J anon pdb_1pu1 Family This family of proteins has no known function however the structure has been determined. The protein consists of two alpha-helices packed on the same side of a central beta-hairpin [1]. 21.50 21.50 21.90 22.00 20.40 21.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.77 0.72 -3.78 4 34 2009-01-15 18:05:59 2008-08-22 17:01:13 3 1 34 1 25 37 2 83.60 34 86.78 CHANGED IGLPcLoEE-LIElG-luQ+lIIcaIF-+Lu+SEV+DlEVTsRINpGETLDLELEVYlEVPlFV+VDVEuLIDEAlDKAYculEcaL ..................h.cLsptpl.clu-hAtcsh.thlFs+l.spS-VcDl-VTlplpc...s.psLsLEl-VYlpsP.hsc.sDs-pllD-AlcpAhpsV-ch.... 0 4 11 19 +11252 PF11420 Subtilosin_A Bacteriocin subtilosin A Pollington J anon pdb_1pxq Family Subtilosin A is a bacteriocin from Bacillus subtilis.The protein has a cyclized peptide backbone and forms three cross-liks between the sulphurs of Cys13, Cys7 and Cys4 and the alpha-positions of Phe22,Thr28 and Phe31 [1]. 25.00 25.00 33.90 33.80 18.10 16.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.79 0.72 -3.92 2 17 2009-01-15 18:05:59 2008-08-26 12:13:47 3 1 16 1 2 6 0 33.70 94 77.33 CHANGED NKGCATCSIGAACLVDGPIPDFEIAGATGLFGLWG NKGCATCSIGAACLVDGPIPDFEIAGATGLFGLWG. 0 1 1 1 +11253 PF11421 Synthase_beta ATP synthase F1 beta subunit Pollington J anon pdb_1pyv Family The NMR solution structure of the protein in SDS micelles was found to contain two helices, an N-terminal amphipathic alpha-helix and a C-terminal alpha-helix separated by a large unstructured internal domain. The N-terminal alpha-helix is the Tom20 receptor binding site whereas the C-terminal alpha-helix is located upstream of the mitochondrial processing peptidase cleavage site [1]. 20.90 20.90 22.10 39.10 20.20 20.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.61 0.72 -3.30 12 41 2009-01-15 18:05:59 2008-08-26 12:47:47 3 3 21 1 19 40 0 45.40 55 8.25 CHANGED MASRRlLSSLLRSouR+p...uu+.......sPthssst.u......RsSPsGaLLNR MASRRlLSSLLRSuSRtp...uu........sPthssPp..hp.....RsSPsGaLLNR 0 3 10 14 +11254 PF11422 IBP39 Initiator binding protein 39 kDa Pollington J anon pdb_1q87 Family IBP39 recognises the initiator which is solely responsible for transcription start site selection. IBP39 contains an N-terminal Inr binding domain connected to a C-terminal domain. The C domain structure indicates that it interacts with the T. vaginalis RNAP II large subunit C-terminal domain. Binding of IBP39 to Inr recruits RNAP II and initiates transcription [1]. 25.00 25.00 184.80 184.10 21.00 19.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.34 0.71 -4.77 7 7 2009-01-15 18:05:59 2008-08-26 13:06:39 3 1 1 5 7 8 0 181.70 26 53.63 CHANGED hcslpIhths.pppppF+ppslphWpcllp.stth.shshp.FIcthsppF...................ptspQshcN........uhpsIp.lLsspssshlThsDFhpFhAtFGP.-olhhKItphL..s.t.tpWL.h..pPph.pphs....slsuaFsps..NChlhph.sGhpa+saNhPhl.sususYLhDEssppapSWcphhp.h ..cslpIhths.pppppF+ppslphWpcllp.stth.shshp.FIcthsppF...................ptspQshcN........uhpsIp.lLsspssshlThsDFhpFhAtFGP.-olhhKItphL..s.t.tpWL.h..pPph.pphs....slsuaFsps..NChlhph.sGhpa+saNhPhl.sususYLhDEssppapSWcphhp... 0 7 7 7 +11255 PF11423 Repressor_Mnt Regulatory protein Mnt Pollington J anon pdb_1qey Family Mnt is a repressor which is involved in the genetic switch between lysogenic and lytic growth in bacteriophage P22. The C-terminal domain of the protein consists of a dimer of two antiparallel coiled coils with a right handed twist, which is both stronger and has closer inter-helical separation compared with those found in left-handed coiled coils [1]. 20.90 20.90 25.70 34.00 19.30 17.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.69 0.72 -7.10 0.72 -4.63 3 59 2009-01-15 18:05:59 2008-08-26 13:11:58 3 2 55 6 1 38 0 29.60 66 36.09 CHANGED RDDAERhADpQSElVKKMVFETLKDhY+K+ .DDAERhA-pQS-hVKKhVFDTLKclY+K.s. 0 0 0 0 +11256 PF11424 DUF3195 Protein of unknown function (DUF3195) Pollington J anon pdb_1rki Family This archaeal family of proteins has no known function. 25.00 25.00 64.20 64.00 19.80 18.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.18 0.72 -3.80 4 9 2009-01-15 18:05:59 2008-08-26 13:26:27 3 1 9 2 6 10 0 82.00 44 90.33 CHANGED alIl+TlPKKEtIVARDLCDClYYYDppVhC+slusuRVYlhTpl-hLcpCLph+YF+KLlKslElaD.Vpp.cPs.CscChllpIG-VY ...lllpTls+KEthVARDLCDCLYaa.DptVhCcsluPG+VYVpT.p.hphLcpCLsM+YFKpLlKtlElaDcVSp.pPs.ps.shh................. 0 1 2 3 +11258 PF11426 Tn7_TnsC_Int Tn7_TnsC; Tn7 transposition regulator TnsC Pollington J anon pdb_1t0f Family TnsC is a molecular switch that regulates transposition and interacts with TnsA which is a component of the transposase. The two proteins interact via the residues 504-555 on TnsC. The TnsA/TnsC interaction is very important in Tn7 transposition [1]. 20.30 20.30 20.70 23.30 19.30 18.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.19 0.72 -4.25 10 45 2009-01-15 18:05:59 2008-08-26 14:47:22 3 2 43 2 12 38 1 46.90 38 8.70 CHANGED IKps-WcTLcS-DLRFlaSQssssp.shYptLcssGLlFDlpuhh+csG ....lK.s-WcoLsssDLRalYSQppspp.sha-pLKpp.Gl.IlDhpslhpt..... 1 3 6 7 +11259 PF11427 HTH_Tnp_Tc3_1 Tc3_transposase; Tc3 transposase Pollington J anon pdb_1tc3 Family Tc3 is transposase with a specific DNA-binding domain which contains three alpha-helices, two of which form a helix-turn-helix motif which makes four base-specific contacts with the major groove. The N-terminus makes contacts with the minor groove. There is a base specific recognition between Tc3 and the transposon DNA. The DNA binding domain forms a dimer in which each monomer binds a separate transposon end. This implicates that the dimer has a role in synapsis and is necessary for the simultaneous cleavage of both transposon termini [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.24 0.72 -4.33 2 160 2012-10-04 14:01:12 2008-08-26 14:57:22 3 12 12 2 152 76 7 47.00 39 24.22 CHANGED RGouLSDhEpu.l.sM+..ssplpEhup+lsRpRpCIpcaLKsPspYGso ...............lo..EpAplDlM.hQLG.hSlptMS+plsRSRsslcpY..lsDPlsYG..t....... 0 8 136 152 +11260 PF11428 DUF3196 Protein of unknown function (DUF3196) Pollington J anon pdb_1td6 Family This proteins is the product of the gene MPN330 and is thought to involved in a cellular function that has yet to be characterised. The proteins has 11 helices and a novel fold [1]. No function is currently known for this protein. 26.70 26.70 26.80 26.80 26.50 26.50 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.81 0.70 -5.11 5 47 2009-01-15 18:05:59 2008-08-26 15:32:04 3 3 47 1 11 28 1 224.10 27 86.95 CHANGED Ml....KK.sshlKphsshKppFtshppl.pshpp...tN.DpLTpFFhslLpKlcsLVKcKDFKpAh-plpEELsoPYlPhsLluaFcuhhhlIs+sL.....aEsENstLAsLs+c-llsplLssaPs..sLshlcYLLsKpcsFlcss-LphhsalLTsKclh-lcKhshhpALspIssFlsppF-YYNSKLKQpFslTLscFslhtp.sopsYFsQLlcplpphFhKEPSppEFAsEIIssllVsYFPhHPsa.slscLApsIaQYVpNslpNclsshKs.ElpKlIVcslhcpLDc .............................................h........................................ppYa--ILcplcpllcc+castAhslIspELshPYIPh.....hhppFcphhhclK+ph............h.pp.t.....sp.Lsppphhphltpshsp..plshhph.hh.+.ht.hp.....l..t..hpphL.spphtt.sKh.ll.hLt..p..IsppFphhpphhcpoh.ls.hp.s.....pt...h.pl.s.lpp..h...K.s.Pshhphsppll..hhh.haPhh..h.p.p.lAhsIhtYhppthts..hphp......p.l.......................................................................................................................... 0 6 9 10 +11261 PF11429 Colicin_D Colicin D Pollington J anon pdb_1tfk Family Colicin D is a tRNase which kills sensitive E.coli cells via a specific tRNA cleavage. It targets the four isoaccepting tRNAs for Arg and cleaves the phosphodiester bond between positions 38 and 39 at the 3' junction of the anticodon stem and the loop [1]. 21.70 21.70 22.90 23.70 21.50 19.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.24 0.72 -3.83 8 39 2009-01-15 18:05:59 2008-08-26 15:48:53 3 10 37 3 7 52 0 87.90 39 12.70 CHANGED QLpKKFK.HAsDFGlsspstNppTLscFcDsIpcHlusssTVpKGTYRptpsSKVYaNssTthsVIlcpsGsFl..SGW+lsPto-phphYlcss .....QLpKKaK.HAsD.FGlsspptNppTL.spFccuIppHlsst..sT.lp.cGTYRt...tpsSKVaaNss.TspsVllcpsGpFl..SGWKlsPtspphp.ahps.h.......... 0 3 5 6 +11262 PF11430 EGL-1 Programmed cell death activator EGL-1 Pollington J anon pdb_1ty4 Family Initiation of programmed cell death in C.elegans occurs by the binding of EGL-1 to CED-9 which disrupts a complex involving CED-4/CED-9 and allows CED-4 to activate CED-3, a caspase. It is the C terminal domain of EGL-1 which is involved in the formation of the complex with CED-9. The formation of the complex induces structural rearrangements in CED-9 and EGL-1 adopts an extended alpha-helical conformation [1]. 20.50 20.50 23.50 22.70 19.90 15.70 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.40 0.72 -6.46 0.72 -4.32 3 13 2009-09-11 00:00:58 2008-08-26 16:23:42 3 2 5 2 13 13 0 20.90 62 19.47 CHANGED Y-IGoKLAAMCD-FDAEMMSY .a-IGoKLAsMCDDFDAcMMSY.. 0 2 3 13 +11263 PF11431 Transport_MerF Membrane transport protein MerF Pollington J anon pdb_1waz Family The mercury transport membrane protein, MerF has a core helix-loop-helix domain. It has two vicinal pairs of cysteine residues which are involved in the transport of Hg(II) across the membrane and are exposed to the cytoplasm [1]. 21.20 21.20 21.50 22.00 19.40 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.13 0.72 -4.31 13 68 2009-01-15 18:05:59 2008-08-26 16:37:18 3 2 60 3 14 54 6 45.20 58 57.94 CHANGED TPlLVILLGsVGLSAlsGaLDaVLLPALAlFIuLTlYALW++pppp ....TPlLVILLGsVGLuALsG..YLDYVLLPALAlFIGLTlYAlhR+cpt.s...... 0 6 10 13 +11264 PF11432 DUF3197 Protein of unknown function (DUF3197) Pollington J anon pdb_1wn9 Family This bacterial family of proteins has no known function. 20.60 20.60 21.00 106.70 20.50 19.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.41 0.71 -4.07 3 15 2009-01-15 18:05:59 2008-08-26 16:40:03 3 1 15 2 9 15 0 113.80 55 85.09 CHANGED hQAlpA+Lscl-LoEARLlLITDRQDERsQARYAALLThG+....EALLoAPAFGPAYGPuGAcALAELVRWApupGh.RspETVLSuGDFsRVLAEPDA-EV+RLlAAoNPoDPAIY ..LcALKsAL...cGl+hsEAKVhLITDWQD+R-pARYAL.LL+sGK....+cLLssDAFGPAF.PuGEcALuELVuhLlppGAR+FYEAVVSPGEhsuLL-LPPEEll+RlhAhANPTDPuIY 0 2 5 9 +11265 PF11433 DUF3198 Protein of unknown function (DUF3198) Pollington J anon pdb_1x9b Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently, this archaeal family has no known function. 25.00 25.00 58.40 58.00 21.40 18.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.44 0.72 -4.09 3 4 2009-01-15 18:05:59 2008-08-26 16:42:54 3 1 4 1 4 6 4 51.00 50 41.21 CHANGED L+DTu+FEShINSsSKSVFVRNLsELERLAKRLGKSYcIQLEpAKEKWKVK L+Dph+FEphINSpSKphFVcNLsELEcluh+LGcuYp.pLEpAKcKWKVK 0 2 3 3 +11266 PF11434 CHIPS Chemotaxis-inhibiting protein CHIPS Pollington J anon pdb_1xee Family The chemotaxis inhibitory protein, CHIPS, is an excreted virulence factor which acts by binding to C5a and formylated peptide receptor (FPR), blocking phagocyte responses. A fragment of CHIPS, which contains residues 31-121 comprises of an alpha helix packed onto a four stranded anti-parallel beta-sheet. Most of the conserved residues of CHIPS are present in the alpha-helix [1]. 25.00 25.00 43.10 42.30 20.00 18.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.14 0.72 -4.37 3 98 2009-01-15 18:05:59 2008-08-26 16:52:19 3 1 97 2 1 14 0 89.50 99 60.91 CHANGED NSGLPTTLGKLDERLRNYLKKGTKNSAQFEKMVILTENKGYYTVYLNTPLAEDRKNVELLGKMYKTYFFKKGESKSSYVINGPGKTNEYAY NSGLPTTLGKLDERLRNYLKKGTKNSAQFEKMVILTENKGYYTVYLNTPLAEDRKNVELLGKMYKTYFFKKGESKSSYVINGPGKTNEYAY.... 0 1 1 1 +11267 PF11435 She2p RNA binding protein She2p Pollington J anon pdb_1xly Family She2p is a RNA binding protein which binds to RNA via a helical hairpin. The protein is required for the actin dependent transport of ASH1 mRNA in yeast, a form of mRNP translocation. ASH1 mRNP requires recognition of zip code elements by the RNA binding protein She2p. She2p contains a globular domain consisting of a bundle of five alpha-helices [1]. 25.00 25.00 36.30 196.30 21.20 20.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.42 0.71 -4.93 6 29 2009-01-15 18:05:59 2008-08-26 17:02:09 3 1 26 2 17 23 0 208.20 61 83.75 CHANGED -hlcpllslaSsYISsYlclLNKaIshLRRVSTLRFERsTLIKaVKKLRFaNDsLhoash.......psphshcscsLpcslpslGSaFlKsLEhlDLLNYYLTQsLQsElISKTLNpDLlls--sIssl-DTYsaFVKFoQWhlESLu...lsDsLLslEllQFolKCAlEDslDls-T-sIhLQEVhPVcsppEapsLhtpWpslLssKhut ......-llEQIlsLhS+YLSSYIHlLNKFIuHLRRVuTLRFERTTLIKFVKKLRFYNDslLSYNsp......cs-h-spuDohc.........clllPIAShFlKClEThDLLNYYLTQSLQKEIlSKTLNEDLTLosEoIlAIDDTYNHFVKFoQWMIESLp...IsosLLsLEVVQFAlKCAcEDGT........slsETDNIFLQEllPVsSEEEFpoLostWpuILcuKLss.. 0 1 7 14 +11268 PF11436 DUF3199 Protein of unknown function (DUF3199) Pollington J, Finn RD anon pdb_1xn8 Family Some members in this family of proteins with unknown function are annotated as YqbG however this cannot be confirmed. Currently the proteins has no known function. 25.00 25.00 25.40 27.60 24.60 24.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.68 0.71 -4.27 6 44 2009-09-10 23:59:37 2008-08-26 17:06:38 3 1 38 2 7 46 0 124.00 47 96.62 CHANGED .hITP-ELhsYSVaEpVKsRssEhLctDILEAEs-lhplsG+cFoDtch.P.....lPEplRLALL+LAQaaAhhNsDEShhKGapSEKl..GDYSYTlusus.SlsKPDVhtLLtDYl....sphstscs+h+hR ...LIsPs-lhsYSVa.-pVKsRPppLLpQDIlEAEuEhtplsGH.cFsDpsh.P......LP-cl+LALlKLAQYaALl.Nu...D....E...oshcuYpSEKl..GDYSYTlussu..ulpKP-VapLLp-aI....sths.tpsph+hR............ 0 2 5 6 +11269 PF11437 Vanabin-2 Vanadium-binding protein 2 Pollington J anon pdb_1vfi Family The Vanadium binding protein, Vanabin2, contains four alpha-helices connected by nine disulphide bonds. Vanadium accumulates in Ascidians however the biological reason remains unclear [1]. 25.00 25.00 39.00 38.40 19.20 18.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -11.94 0.72 -12.75 0.72 -4.00 10 14 2009-01-15 18:05:59 2008-08-26 17:19:14 3 2 3 1 8 11 0 92.40 33 55.68 CHANGED CtspCposCssl+sC.+tpCtpsCtuspstp......hCp+sCt+spChss.......sCcsChpc.C.ht.hctCRsssCuppCP.t............tKshKsssC+cCMcpNC ..ChspCpssCssl+sC.ptpChpsCtuspp.t......hCp+sChhspC.ss.......sCcpChts.C.ht.hctC+pspCuppCP.t..........t.tpth+sssC+pCMhpNC 0 5 5 8 +11270 PF11438 N36 36-mer N-terminal peptide of the N protein (N36) Pollington J anon pdb_1qfq Family The arginine-rich motif of the N protein is involved in transcriptional antitermination of phage lambda. N36 forms a complex with boxB RNA by binding tightly to the major groove of the boxB hairpin via hydrophobic and electrostatic interactions forming a bent alpha helix [1]. 25.00 25.00 27.90 36.70 17.60 16.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.82 0.72 -4.44 2 67 2009-01-15 18:05:59 2008-08-26 17:27:41 3 1 62 1 0 41 0 34.90 97 32.86 CHANGED DAQTRRRERRAEKQAQWKAsp.LhsshptpsspR. DAQTRRRERRAEKQAQWKAANPLLVGVSAKPVNRP.. 0 0 0 0 +11271 PF11439 CesA DUF3200; Type III secretion system filament chaperone CesA Pollington J anon pdb_1xou Family This family represents a chaperone protein for the type III secretion system - TTSS - translocon protein EspA, to prevent the latter's self-polymerisation. The TTSS is a highly specialised bacterial protein secretory pathway, similar in many ways to the flagellar system, that is essential for the pathogenesis of many Gram-negative bacteria. The twenty or so proteins making up the TTSS apparatus, referred to as the needle complex, allow the injection of virulence proteins (known as effectors) directly into the cytoplasm of the eukaryotic host cells they infect; however, the injection process itself is mediated by a subset of extracellular proteins that are secreted by the needle complex to the bacterial surface and assembled into the type III translocon - EspA. EspB and EspD. EspA polymerises into an extracellular filament, and, as with other fibrous proteins, is apt to undergo massive polymerisation when overexpressed. CesA is the secretion chaperone protein that binds to EspA. CesA is dimeric and helical, and it traps EspA in a monomeric state and inhibits its polymerisation. 25.00 25.00 28.10 140.30 22.90 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.30 0.72 -4.06 2 64 2009-11-23 17:28:21 2008-08-27 09:06:07 3 1 63 3 1 15 0 91.60 95 88.57 CHANGED MsIVpQT+NKELLDKKIRSEIEsIKKIIAEFDVlKEsVN.LSEKAKTsPQAAEpLNKLIEGYTYGEER+LYDSALSKIEKLIEThpPsRStSQ.T M....SQTRNKELLDKKIRSEIEAIKKIIAEFDVVKESVNELSEKAKTDPQAAEKLNKLIEGYTYGEERKLYDSALSKIEKLIETLSPARSKSQST. 0 0 0 1 +11272 PF11440 AGT DNA alpha-glucosyltransferase Pollington J anon pdb_1xv5 Family The T4 bacteriophage of E.coli protects its DNA via two glycosyltransferases which glucosylate 5-hydroxymethyl cytosines (5-HMC) using UDP-glucose. These two proteins are the retaining alpha-glucosyltransferase (AGT) and the inverting beta-glucosyltransferase (BGT). The proteins in this family are AGT. AGT adopts the GT-B fold and binds both the sugar donor and acceptor to the C-terminal domain. There is evidence for a role of AGT in the base-flipping mechanism and for its specific recognition of the acceptor base [1]. 25.00 25.00 31.00 30.10 18.90 16.90 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.14 0.70 -5.57 2 14 2009-01-15 18:05:59 2008-08-27 09:18:47 3 1 11 9 1 15 23 335.00 57 79.25 CHANGED CGVTKFSLEQRDWFIKNGHEVTLVYAKDKSFTRssuHDaKSFSIPVlLAKEYDKsLKLVNDCDILIINSVPATSVpEsTINNYKKllDNIKPSlRVVVYQHDHSsLSLRRNLGLEETVRRADVIFSHSDNGDFNKVLMKEWYPETVSLFDDIEEAPTVYNFQPPMDIsKVRSTYWKDVSEINMNINRWIGRTTTWKGFYQMFDFHEKaLKPAGhSTlMEGLERSPAFIsIKEKGIPYEYYt.+plDphplAPN.PsQILDpYlNSEMLERMSKSGFGYQLSKLspKYLQRSLEYTHLELGACGTIPVFWKSTG-NLKFRVDNTPLTSHDSGIIWFDENDMESTFERIKELSSDRA .......CGVTKFSLEQRDWFIKNGHEVTLVYAKDKSFTRssAHDaKSFSIPVlLAKEYDKTLKLVN.DCDILIINSVPATSV.EE-.TINNY.KKIIDNIKPSlRVVVYQHDHSsLSLRRNLGLEETVRRADVIFSHSDNGDFNKVLMKEWYPETVSLFDDIEEAPTVYNFQPPMDIsKVRSTYW.KDVSEINMNlNRWIGRTTTWKGFYQMFDFHEK.aLKP.AGhSTIMEGLERS.P.AFIsIKEKGIPYEYYph+plDphKlAPNhP..sQILDpYVNSEMLERMSKSGFG...YQLSKLsKKYLQR...SLEYTHLELGACGTIPVFWKSTGENLKFRVDNTPLhSHDSGIIWFDENDMESTFERIKELSSDRA................................................................................................................................................................................................................................. 0 1 1 1 +11273 PF11441 MxiM Pilot protein MxiM Pollington J anon pdb_1y9l Family MxiM, a Shigella pilot protein, is essential for the assembly and membrane association of the Shigella secretin MxiD. MxiM contains an orthologous secretin component and has a specific binding domain for the acyl chains of bacterial lipids [1]. The C terminal domain of MxiD hinders lipid binding to MxiM [1]. 25.00 25.00 259.70 259.40 19.40 17.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.70 0.71 -4.01 2 8 2009-01-15 18:05:59 2008-08-27 11:11:16 3 1 8 3 0 4 0 115.00 99 80.35 CHANGED uSSNSEKEWHIVPVSKDYFSIPNDLhWSFNTTNKSINVYSKCISGKAVYSFNAGKFMuNFNVKEVDGCFMDAQKIAIDKLFSMLKDGVVLKGNKINDTILIEKDGEVKLKLIRGl SSSNSEKEWHIVPVSKDYFSIPNDLLWSFNTTNKSINVYSKCISGKAVYSFNAGKFMGNFNVKEVDGCFMDAQKIAIDKLFSMLKDGVVLKGNKINDTILIEKDGEVKLKLIRGI 0 0 0 0 +11274 PF11442 DUF2826 Protein of unknown function (DUF2826) Gunasekaran P, Mistry J anon Pfam-B_001753 (release 23.0) Family This is a family of uncharacterised proteins that is highly conserved in Trypanosoma cruzi. 20.20 20.20 22.50 22.50 18.60 17.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.43 0.71 -4.57 2 60 2009-09-11 10:16:18 2008-08-27 11:20:43 3 1 2 0 2 60 0 131.00 79 68.08 CHANGED pRERtRE.LLL.LASLLPhVASHADYh..ADHGWCASTSDVVCRHFTAPVKHTSRRMLWLWIWCRtCSRHhFARLLFTALR.IDASP.ESFsAAPCVVLstphohll................Csh.LTV.tpsVhhp+s...+pRhasshhtCspaspuphlt ...t..cRERE.LL.LLLAS.LLPFVASHADYCLSADHGWpASTSDVVCRHFTAPVKHTSRRMLWLWlWCRcCSRHYFARLLFTALRQIDASPPESFTAAPCVVLPAQ.ShVVhRLsDupsPLRWsMQGCGLPLTVLGTAVWMR+P...HERMYCGphKCVKYAESQhLQ... 1 0 0 2 +11275 PF11443 DUF2828 Domain of unknown function (DUF2828) Gunasekaran P, Mistry J anon Pfam-B_001814 (release 23.0) Domain This is a uncharacterised domain found in eukaryotes and viruses. 29.00 29.00 29.20 29.40 28.00 28.80 hmmbuild -o /dev/null HMM SEED 534 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.85 0.70 -6.30 19 303 2009-01-15 18:05:59 2008-08-27 11:21:35 3 3 139 0 139 295 228 343.40 21 87.77 CHANGED TENsusTahoS....ssssLDhFhplss.pss..........cplhsLhttAWs....cDs.hslKllhplRslR.sG+u-+pu...FhpshhaL.tcp..aPpTlttN....................................................hpplspaGha.KDlhpllhphlc.....................................................................................................................................................pshcLauctLtpDhp.lt..tt..............hpplSLAAKWsPo.spphsctshhsptlut..........................hthsccchRK.sLsPLR+tLplsEhhMuA+pWsplsYs+VsSlAMtpYpchFh++DspR.............FptYLpslppGc.......sKlsAuulhPa-llpphhs.sspst......................A-hQWcshscplpppGp..lpNslAlsDVS.....GSMsu............PMcVsluLGLLlSEh..sssPa+sclITFSpsPphcpl..pGc.sLt-+sphlpphsWG.hsTshptVF.-hILpsAlcspLs.-cMl+clFVFSDMEFDpAssttt................................hcTsaEslpc+apcsGYt..lPclVFWNLpsssu...sPVsuscpGVALVSGFStslh+hhLps...........................................phsPhshMh..................................culs .....................................................................................................................h......t.....t.h...h.....s................h.....................................................................................hGhh.c.h...........................................................................................................................................................................................................................................................huKh...........................................................hpt.....hp....h.-..ht.t...........h..h.a...st.s.........hF..p...t.....................h...a..th..t...................h..t........h...h.............................................................................................................................p.hsh.shs.....tp................h..s.uhshhhst....t..att.hh.att.......h..................t.........h..........thph.tsh.p.ll.........uht..th......t.hhlhoDh.hp.s............................................................t.h...a.t.t.s.......h.........PphlhWslt.............................h........sh.hhsG.s.thht.h.t........................................................................................... 0 40 86 114 +11276 PF11444 DUF2895 Protein of unknown function (DUF2895) Gunasekaran P, Mistry J anon Pfam-B_002001 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.60 22.60 22.80 50.90 21.60 22.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.18 0.71 -5.10 26 239 2009-01-15 18:05:59 2008-08-27 11:22:14 3 3 191 0 62 214 10 196.20 50 89.80 CHANGED +ptlsspcpHIhoLRlshuhLsllhlshhhuhhpAPpcLslHlPPDLRu.GSTRph.....W-VPspsVYuFuaYIFQQlNRWspsGcpDYtpsIt.tLpsYLTPuCpshLppDhc.RtpsGELppRsRsVa.EIPGRGYusp....+VhlhopcsWsVpLDlsscEhap...uEsVK+sl.lRYPL+VVRhDlDsE+NPaGLAlDCYsus.PpRl ................ptlsptpsHIpTLRlusu...hLsllhlshshGWhpAPccLTIHlPPDLRS.GSTRhW....WEVPPpoVYuFuaYIFQQLNRWPpsGEpDYspNlp.pLusYLTPuCpsaLppDachRpssGELRpRVRslY.EIPGRGYu-s..........+VpshSpcsWhVpLDlsuDEYat........uE.VKRAL.lRYPl+VVRh-lDPppNPFGLALDCYsus.PQRl................. 0 8 29 50 +11277 PF11445 DUF2894 Protein of unknown function (DUF2894) Gunasekaran P, Mistry J anon Pfam-B_001968 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 59.20 59.10 22.50 21.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.78 0.71 -11.41 0.71 -4.05 44 142 2009-01-15 18:05:59 2008-08-27 11:23:51 3 2 136 0 46 157 12 178.40 44 83.45 CHANGED LDAW....REpGADRLDPVRF+hl-ALpRRAAApsGsARcLLDsRLAsLl-uYAsplpcApsssssspss......................................................sussstPA+GsLAuLlchlu...+p.......A.scpRus..............sth..hsE.c....hL.DY...................FRcT...WS+lss-pQlRQSL-QVP+NAGPLNSSsLVHRSLoLMRELSPcYLpQFLSYVDALSWLEQhsuss ................................L-AWRppGADRlDPVRF+hl-ALt+RAAupsGtsRplL-sRLusLlcuYAttlppstsssts.t...............................................................ss......ssstsspusLusLlp.lu.................u.sthht............................................ssth.hhsE.t....hL.Dh...............................FRco...Wo+lps-pQlRQSL.ppVPcNAGPLNSssLVHRuLsLMRELSPtYLppFLSYVDALuWLEQhsuts................. 0 7 21 34 +11278 PF11446 DUF2897 Protein of unknown function (DUF2897) Gunasekaran P, Mistry J anon Pfam-B_002015 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.70 21.70 22.50 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.79 0.72 -4.14 32 221 2009-01-15 18:05:59 2008-08-27 11:24:52 3 1 220 0 49 148 6 61.10 35 93.54 CHANGED .shpsWlIIllVlGVIsuNLAsLKaoA+hKhsphsc.hpphppps........t.cppsst- ..........apsWlIlllVluVIVGNlAsLKaTAp.hKhsph.-cppcpsc.pt..........t.........t.............................................. 0 7 14 34 +11279 PF11447 DUF3201 Protein of unknown function (DUF3201) Pollington J anon pdb_1yb3 Family This archaeal family of proteins has no known function. 25.00 25.00 26.60 38.10 24.00 22.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.01 0.71 -4.80 4 14 2009-01-15 18:05:59 2008-08-27 11:29:45 3 1 14 1 12 17 0 150.90 53 88.93 CHANGED EFLN+hWE-sFcLREEL+EELc..GFcVEEVsEVFNAYLYlDGcWcEMKYPHPAFsl+PGGEVGATPQGFYFVFAFsKE-LocEFlccFlcsFcK.SFIYGhENFLEDFYN...ssPhSh-EVac+IhpScEchINFEVDhshs+EELK+cLh+.F .....paLN+hWt-lFcLpEELKEELp..GFcVE-VEEVFNAY.IalDGEWccMcYPHPAFplKPtGEVGATPpuaYFVhAlsKEc.loc-FlptFlctF.+.SFIYGuENFLpDhYN..ccsPhsscEVhccIccScEclhpFEsshs.sh-cLKctLh+............... 0 2 2 7 +11280 PF11448 DUF3005 Protein of unknown function (DUF3005) Gunasekaran P, Mistry J anon Pfam-B_3492 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 45.50 45.50 19.40 18.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.45 0.71 -4.47 11 108 2009-01-15 18:05:59 2008-08-27 11:30:58 3 1 68 0 27 98 3 119.10 48 72.35 CHANGED usssRRAssR.lpLDNssTc..DsTVDTDGKshEAA+hAotht...DplhhSNASLsNAls-ss-G..lAGhDSRPGGNpPtlAhRsGapVlcpGhlsss..........................sspGpRsp+VIplpcs ..........ssstRAtsR.lpLDNssTt..DsTVDTDGKshEAuRhASth....DplhhSNASLsNAhPEssDG..hAGhDSRPGGN+PtlAhRsGatVlccGhssss............................st.s.pRst+llplp.s.. 0 2 5 15 +11281 PF11449 DUF2899 Protein of unknown function (DUF2899) Gunasekaran P, Mistry J anon Pfam-B_002023 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 24.40 24.40 25.80 24.50 22.80 24.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.94 0.70 -5.24 19 285 2009-01-15 18:05:59 2008-08-27 11:31:55 3 3 211 0 48 243 397 219.80 29 79.21 CHANGED AAlhGAlPGCGGAllVsotassGpluFGAlVAVLsATMGDAAFLLLAspPpsulh..llsluhllGlloGhlVshhatcch..h+st.tptsttspts........................pthphthh........ha.hlllPuhllulh..tuhpl-hsphh.th.thslthhGshh.uh..hhlWAhop.htshpshsspssth....p....lp+ssp-TsFVosWVlsAFLha-lslthsGl-..LtshatthuhhhPLIullIGLlPGCGPQIlVToLYlsGslPhSulluNuISNDGDALFPAIAluPKuAlhATlYoulPAllVGYGaYal .............................uullGslPtCGhulhsss.ahpthlshGsllAlhhuT.s-Ah.lllutt..s...hshh..lhslthlhuhlsGhllshh.hht.h...hp............................................................................................................................................................................................................p.p.ptth.......t...htpuh.tcT.hl.salhhshhhhphhhhhhG.-..lss.htthuhh.PhhuhhlGhlPsCusplllTpLY.lpGslshuu.hushlossGsuLhshht.hs.....ct.t.h..hpl..hs...l.....Ahhluhshh........................................... 0 18 29 42 +11282 PF11450 DUF3008 Protein of unknwon function (DUF3008) Gunasekaran P, Mistry J anon Pfam-B_3521 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.50 21.50 22.00 45.70 19.00 18.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.69 0.72 -3.83 21 166 2009-01-15 18:05:59 2008-08-27 11:37:14 3 1 163 0 47 105 274 57.50 69 82.60 CHANGED AKSKAQQ+AAGAALuAKRG-scpS-L+GAS+pMh-SMSEKELE-hAST++KGKPcHts .AKSpAQQKAAGAALuAKRGEhKhSELpGASKpMa-SMoEKELEEhApTKRKGhPpHhS........ 0 12 24 32 +11284 PF11452 DUF3000 Protein of unknown function (DUF3000) Gunasekaran P, Mistry J anon Pfam-B_3481 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 41.60 41.50 18.10 18.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.02 0.71 -5.03 38 408 2009-01-15 18:05:59 2008-08-27 11:38:31 3 1 407 0 108 282 65 181.80 45 89.57 CHANGED upPstF+cAVsAMpAsslRPEIcLGsIRPP.QRLAPaSaALGA-Vp....cs-...t.hstpu-ssAFGRLILLaDPsGs-AW-GT.hRLVAYlQADLDss.AsDPLLPEVAWSWLs-ALcuR..ssphsALGGTVTuTTSVRaG-IuGPPcAaQlELRASWTAos.....s.DLuuHVpAFs-VLppsAGLP..PtG....VTcL ..........h.Ps.FppAlpuhpu..s....p...hRs-lcltpI.sP.pRLAPaShA..luu-..Vs.....ts-.................................-s.AhGRLILLHDPsGp-AWp......G........T.hRlVAalpADlDsuhAsDPLLPcVAWSWLsDuL...............-u+.............sss.......apAluGTVTussStpFGsluus.....PcupplElRASWTsss.............................DluuHlpAas-lLtpsAGLP...Psslss........... 0 35 80 100 +11285 PF11453 DUF2950 Protein of unknown function (DUF2950) Gunasekaran P, Mistry J anon Pfam-B_002484 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 20.20 20.20 22.20 23.30 20.00 19.50 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.56 0.70 -5.41 42 260 2009-01-15 18:05:59 2008-08-27 11:39:24 3 3 231 0 69 187 20 245.90 54 89.42 CHANGED QpsFsoP-tAspAhssAlss..sDpssLtplLGsshccllssss..hsppshtcFlptapppH..pl.tc.....scscAhLtVGsssWshPlPlVKpusG.WpFDstAGpcEllsRRIGRNELsAIpshhAYlDAQp-YApp...ttstshcYAQ+llSosGK+DGLYWssss.GcstSPLGPhhupus.tpsh..........spsYaGYaaRILs.uQGssAsGGthsYllpGphhtGFALlAWPAcYGsTGVhoFhVspcGpVYp+DLG.cTsphApshppFsPD..tsWphV ........................................................................................QppFsoPEsAAsAFusA..lAs..psEspLssLLG-DWRpaLPs-G..sDs-uVsRF.RDW+cuHcIVpc......-ssAaLsVGc-sWpLPlPhVKcssG.W+FDhAAussEILTRpIGRNELuslQAh+AYVDAQpDYhhp.......sp+aApRIISS-GpKDGLYWPocs..G.-...........sPSPLGPsFSsAu.................ss-GYHGY+FRIlo.spssc...................GhALlAWPh+YG-TGVMSFMVNQDcclYQuDLGc-Tcu+spAITcFsPD..spWQs.............. 1 12 25 46 +11286 PF11454 DUF3016 Protein of unknown function (DUF3016) Gunasekaran P, Mistry J anon Pfam-B_3517 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 27.30 73.70 23.20 19.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.70 0.71 -4.60 32 121 2009-01-15 18:05:59 2008-08-27 11:41:40 3 2 116 0 55 121 21 139.00 37 77.50 CHANGED sG....tVpVpapcPcpapDlc.tusthpsphcpphhppLscplschAp+hLsssQpLclplTDlDLAGchcP......ttss.s-lRlV+-laPPRIshsYpLh.psGpVltp.Gcc+LsDhuF.hppsshhss...csh.tYE+phLsDWhccp ..............G.VpVpWp-PppFpDl+..osptpsthcpchhppLspphpc.As+.LssGQpLclslTDlDLAGchcP...........htusshsDlRlV+.DIYP.PRIshsYpLh.tsG+Vltp.G-c+LsDhuF....h.s.hs..thss...-sh.tYEKphLsDWh+cp.. 0 13 29 42 +11287 PF11455 DUF3018 Protein of unknown function (DUF3018) Gunasekaran P, Mistry J anon Pfam-B_3532 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.30 21.30 21.40 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.16 0.72 -4.04 24 130 2009-01-15 18:05:59 2008-08-27 11:42:13 3 1 117 0 50 103 9 63.50 43 76.54 CHANGED stpRV++HR-+LRstGLRPVQIWVP.DsRsPpFtsEsRRQsthlsp.-tp-sclhtFl-cs.sDh.......-sW .........s.ppRVp+aR-pLRttGLRPlQIWVP.DsRpPcFstEs+RQutLVAt.ctt-s-..sFlDts..sDh.-s........................ 0 10 21 37 +11288 PF11456 DUF3019 Protein of unknown function (DUF3019) Gunasekaran P, Mistry J anon Pfam-B_3539 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 50.70 50.60 24.20 19.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.55 0.72 -3.70 30 88 2009-01-15 18:05:59 2008-08-27 11:47:29 3 1 49 0 36 69 8 100.50 29 77.97 CHANGED LploPchCls.ccupsCphslplpWQsss.tshClh.sppsh.hhCapssppsphslphcsspslpFhLhcpcssps....LAssplcVs.s..hpsR+RhRssWsL ..hploPchCls.ccspsCphslplpWQtspstphClh.sppph.hhCapstpptphshphcsppshpFsLl....s.pcsspsLApsclcVs.s..hpsR++hRssWsL... 0 4 13 24 +11289 PF11457 DUF3021 Protein of unknown function (DUF3021) Gunasekaran P, Mistry J anon Pfam-B_3526 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 23.10 23.10 23.10 23.20 23.00 22.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.96 0.71 -4.28 33 949 2009-01-15 18:05:59 2008-08-27 11:48:11 3 2 811 0 85 479 2 136.00 30 95.57 CHANGED M.....h+phlptshhGlslGhhl.llhhhh.ts.....psshss..p.hlshh....................llGhlhuhhohIFcp..-chSlhtphlhHFhsshhshhshshlss.Wh.ht...shlhhhlh..Fl.llYllIWhhhahphppclc+INppL .................................................M.....Kphhp.s.hh..h....G.lhIGhhls....llhs..hlhsst.....hs.hsshos.........hhp.hh......................hIGhlash.us.hlFch....pc.aS...h.hp.t...slhHFhlh...hssals...l...uhlsG.WF.Phph....hhlhhhlh..Fl.llY.ll.IW.hIhahpptpplcpINp.L.............. 0 17 41 63 +11290 PF11458 Mistic Membrane-integrating protein Mistic Pollington J anon pdb_1ygm Family Mistic is an integral membrane protein that folds autonomously into the membrane [1].The protein forms a helical bundle with a polar lipid-facing surface. Mistic can be used for high-level production of other membrane proteins in their native conformations [1]. 22.30 22.30 22.40 134.60 20.30 18.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.58 0.72 -3.58 3 23 2009-01-15 18:05:59 2008-08-27 11:50:52 3 1 23 0 3 19 0 84.00 77 88.55 CHANGED MKVTDQEKEQLSsAID+MNEGLDsFIpLYNESEKDEPLIQLEDETAELl+cApEpYGp-clNpKLNAIIKEILS.lSLc--GcEc MKVTspEKEQLSsAIDRMNEGLDAFIQLYNESEhDEPLIQh-D-TAELh+QARD.YGQEpLNEKLNTIIKQILS.ISLScEGcc.... 0 1 1 2 +11291 PF11459 DUF2893 Protein of unknwon function (DUF2893) Gunasekaran P, Mistry J anon Pfam-B_001947 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.00 22.00 23.60 38.50 20.70 19.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.01 0.72 -4.18 32 244 2009-01-15 18:05:59 2008-08-27 11:53:01 3 2 227 0 72 189 24 65.70 41 26.50 CHANGED SsPERAlLEhLsclscppoh.ccscplhpGLssLRPchLppLLppCpulKspRLFhhhAcctsHsWtp+L ........SsPEhAhhEllssls....pthSF.EcsspLhpGLssLpPctlppLLppspSVpspRLFLahAchhsHtWhpcl...... 0 13 39 62 +11292 PF11460 DUF3007 Protein of unknown function (DUF3007) Gunasekaran P, Mistry J anon Pfam-B_3514 (release 23.0) Family This is a family of uncharacterised proteins found in bacteria and eukaryotes. 25.00 25.00 33.60 40.10 21.90 21.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.21 0.72 -3.95 30 109 2009-01-15 18:05:59 2008-08-27 11:58:10 3 1 100 0 54 107 95 98.30 43 74.84 CHANGED MsRhcVlh....IGlulhllGGluYhshp.hhGl-uhsAGIhupulLVlsl.luWsuSYla..RVloGcMTahpQR+cYccsa............LQcRh-uLosEEhptLht-lpp .MpRhDVlh....IGlGlhlhGslhYhsLp.hhGlDuhpAGhhuQs.lLVlG.l.luWluoYlF..RVsspcMTYtpQh+-YEcth............lpKRl-pLo.-ElptL.t-lE.......................... 0 16 38 50 +11293 PF11461 RILP Rab interacting lysosomal protein Pollington J anon pdb_1yhn Family RILP contains a domain which contains two coiled-coil regions and is found mainly in the cytosol. RILP is recruited onto late endosomal and lysosomal membranes by Rab7 and acts as a downstream effector of Rab7. This recruitment process is important for phagosome maturation and fusion with late endosomes and lysosomes [1]. 21.20 21.20 21.30 21.30 21.00 20.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.03 0.72 -3.82 14 188 2009-01-15 18:05:59 2008-08-27 12:46:05 3 2 72 1 109 162 0 59.20 43 18.62 CHANGED TLQELR-VLpERNcLKuplhllQEELthY+st.hppcpt......sss..t.....t.hsphppEsst+ ...TLQELR-VLpERNELKuplhlLQEELthYK..s.....p.hppcpt............st....h.....p.hsp.p.tp................................. 0 21 29 59 +11294 PF11462 DUF3203 Protein of unknown function (DUF3203) Pollington J anon pdb_1ywy Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 87.60 87.40 20.00 17.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.28 0.72 -4.07 5 34 2009-01-15 18:05:59 2008-08-27 13:11:16 3 1 34 1 9 28 1 74.00 55 97.07 CHANGED Msl-IDs...sopsCslslEss+c+usssslcItTDuEtRMSVlsl-GcRlaITEsEADALTVAGAsDuRRHLKADD MoIEIDs...ppthColplEGsRc+u..sls..sl+IsTDsEtRhSVlhlDGcRlaIoEp-AptLsVAGApDpRRHLhADD. 0 2 2 7 +11295 PF11463 R-HINP1I R.HinP1I restriction endonuclease Pollington J anon pdb_1ynm Family Hinp1I is a type II restriction endonuclease, recognising and cleaving a palindromic tetranucleotide sequence (G/CGC) resulting in 2 nt 5' overhanging ends [1]. HINP1I has a conserved catalytic core domain containing an active site motif SDC18QXK and a DNA-binding domain [1]. 25.00 25.00 27.40 27.00 23.00 22.40 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.22 0.71 -4.92 3 24 2012-10-11 20:44:46 2008-08-27 13:12:27 3 1 23 6 3 21 5 187.10 54 83.72 CHANGED MGYDL-cIEpVKAsVLSG.YKADVNVlILVphK..csLDlcNIQVKLVSNp+GFNQIDKRWL+oYQEMWNFPcNIYcILpYFTGELcPp+pcsK.DKRRMFhsEFSQEEQsclLDWL--NKlLILoDILKGRG-FAAEWVLVIQKlsNNh+WlLKNINEVlQHY.suG-VplSP+GSLKIGRVTIQRKGGDNGRESANMLQFKIDPTcLL ...............MGYsLspIcpVKAslLsG.YKADINl.lhlhhK..pslDlcNIQVKLVS.N.p+GFNQIDKRWlKsYpEhWphscsIhplLpYFTGELp......t.sp+RhhhsEFoppEQ.hlLsWFppNKlLlLoDIL+GRG.-FuAEWhLVhQ.Klsp.N.+WsLKNINEVlQHY..ssG-V.ISP+GSL.KIG+...lThQRKGGDNGR.oANMLQFKIDPsELh....................... 0 1 2 3 +11296 PF11464 Rbsn Rabenosyn Rab binding domain Pollington J anon pdb_1yzm Domain Rabenosyn-5 (Rbsn) is a multivalent effector with interacts with the Rab family.Rsbn contains distinct Rab4 and Rab5 binding sites within residues 264-500 and 627-784 respectively [1]. Rab proteins are GTPases involved in the regulation of all stages of membrane trafficking [1]. 20.60 20.60 20.80 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.88 0.72 -4.39 6 155 2009-01-15 18:05:59 2008-08-27 14:31:52 3 5 98 4 111 146 0 41.40 48 9.63 CHANGED LLQQIsNI+uYI+QA+tutRhDEVcsLpcNLRELQcEhtcQQ .........LLQQIpNIcuaIcpA+pssRhDEVcsLpcNLRELpcchtpQp...... 0 25 38 74 +11297 PF11465 Receptor_2B4 Natural killer cell receptor 2B4 Pollington J anon pdb_1z2k Domain 2B4 is a transmembrane receptor which is expressed primarily on natural killer cells. It plays a role in activating NK-mediated cytotoxicity through its interaction with CD48 on target cells in a subset of CD8 T cells [1]. The structure of 2B4 consists of an immunoglobulin variable domain fold and contains two beta-sheets. One of the beta-sheets, the six-stranded sheet, contains structural features that may have a role in ligand recognition and receptor function [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.36 0.72 -4.13 4 44 2012-10-03 02:52:13 2008-08-27 14:39:07 3 2 25 6 18 121 0 106.40 44 31.50 CHANGED CsDSuEEVVGVSGKPVpLRPSNIQTKcVSlpWKKph..SHsphc.IlsWsNsuspssshsSSDIYGF-scsFALSIKSAQLQDSGHYLLElTspsGpVCTKNFQlLIhD ........................C.sSu-cVVulSGpPlpLp.Ps..slQTK.h.h.S..lpWKhph.....Spsphc.I..LpW.cNsus...sp........s...s............s...c....pau..F.p....hc....shuL.....IKuAQ.QDSGhYhLElTs.psGpV.pstpFQV.lFD................ 0 1 1 3 +11298 PF11466 Doppel Prion-like protein Doppel Pollington J anon pdb_1z65 Family Dpl is a homologue related to the prion protein (PrP). Dpl is toxic to neurons and is expressed in the brains of mice that do not express PrP. In DHPC and SDS micelles, Dpl shoes about 40% alpha-helical structure however in aqueous solution it consists of a random coil. The alpha helical segment can adopt a transmembrane localisation also in a membrane [1]. The unprocessed Dpl protein is thought to posses a possible channel formation mechanism which may be related to toxicity through direct interaction with cell membranes and damage to the cell membrane [1]. 25.00 25.00 27.40 27.40 18.20 17.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.19 0.72 -7.10 0.72 -4.38 7 74 2009-09-11 05:32:31 2008-08-27 15:40:05 3 1 51 1 14 79 0 29.70 73 16.75 CHANGED MRKHLGshWLAllChLLhScLSsVpARGIK ....MRKHLGGCWLAIVCVLLFSpLSuVKARGIK 0 1 1 2 +11299 PF11467 LEDGF Lens epithelium-derived growth factor (LEDGF) Pollington J anon pdb_1z9e Family LEDGF is a chromatin-associated protein that protects cells from stress-induced apoptosis. It is the binding partner of HIV-1 integrase in human cells. The integrase binding domain (IBD) of LEDGF is a compact right-handed bundle composed of five alpha-helices. The residues essential for the interaction with the integrase are present in the inter-helical loop regions of the bundle structure [1]. 24.00 24.00 24.30 24.80 23.50 23.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.24 0.72 -4.34 9 180 2009-01-15 18:05:59 2008-08-27 15:40:40 3 4 78 27 106 162 0 106.50 42 20.50 CHANGED EccLQ+LHsEIK.uLKlcssDlcKClcAL-ElusLpVTophLQKNs-slsTLKKIRRYKus..................QslMcKAstlYs+FKshFl.scsEohhshs.phsppEpc.p-pscp .........-p+LQ+lHu-IK.uLKlDssDVp+CLpAL-ELusL..QVTpQhLQKpo-hlsTL.KK......IRRaKus.......................................p.slM-KAshlYs+aKshFLhscs-sh.st..shs.ts.t-pttcc.t..p..................... 0 19 25 69 +11300 PF11468 PTase_Orf2 Aromatic prenyltransferase Orf2 Pollington J anon pdb_1zb6 Family In vivo Orf2 attaches a geranyl group to a 1,3,6,8-tetrahydroxynaphthalene-derived polyketide during naphterpin biosynthesis [1]. In vitro, Orf2 catalyses carbon-carbon based and carbon-oxygen based prenylation of hydroxyl-containing aromatic acceptors of synthetic, microbial and plant origin [1]. 22.00 22.00 175.70 175.40 21.40 20.90 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.80 0.70 -5.20 12 25 2009-01-15 18:05:59 2008-08-27 16:02:56 3 1 21 8 8 28 0 292.20 28 93.44 CHANGED hDhp+hhuslctsAthlssPhu+cpshslLssFp-sFsp..Gslla+sss..phssplsYRahs..sh.DshuhAlstGLl.scssHPlssLlschssLhs.ssp.usDhssstGhpKhWsaFss..hpsluclhulPuhPtulttptchFtchGL.-+VphlulDYpp+ThNlYF....tutGslstppstuhh+.sGhssPup....thltaspc.....uaslhsTLsa-oucI-Rlsahshph...sssphPA.lpsclc+F...lcssP.t..tph...shuhsausptcYlKhps.Yphshsphhsh.s.tshs ...hshschhuslccsAthlssshu+cpVhslLssFp-sFsp..uV.lhaRsTo..psscsLsaRFh.s..ssh.DPhuhAlspGLl.scssHPlusLls-lpshhP.hsttulDFulspGhpKsWsaFss..hpsluclhulPuhPtultsptchFt+aGL.-+VphlulDYpp+ThNlYF.......hssGshstcslpuhlp-sGhs...sPop....phlthspc.....uaslYsTlsWDSscIcRlsasshsh...sst...slPupl..pspl-+F...scsAPhtspscp..hlhuhuhus....pscYhKlps.Yphp.h.h.........s.............. 0 2 4 7 +11301 PF11469 Ribonucleas_3_2 DUF3204; Ribonuclease III Pollington J anon pdb_1ztd Family This is a family of archaeal ribonuclease_III proteins. 25.00 25.00 155.20 155.00 23.90 17.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.47 0.71 -3.90 3 14 2012-10-03 08:45:47 2008-08-28 09:06:36 3 1 14 2 12 17 0 118.40 62 88.42 CHANGED KGLAKLGDSLINFLaSLALTEaLGKPTG-RVPNASLAIAL-hoGLSKll+PRsDKHAKGDhAEALlAYAWLcGpISpEEAVEILppNLss-VhcFoRKKEsIG+ALAsLh-hIuERLsuc KGLuKFGDSLlNFlaSLALoEaLG+PTGsRVPNASLAIAL-hAGLp+hl.PRsDKHGKGDhAEAlIAYAWLEGtITlEEAVEIl+pNhopDVhcFSRKKEAIGpAhA.LL+lIuERLs..h. 0 2 2 7 +11302 PF11470 TUG-UBL1 TUG; GLUT4 regulating protein TUG Pollington J anon pdb_2al3 Family TUG is a GLUT4 regulating protein and functions to retain membrane vesicles containing GLUT4 intracellularly. TUG releases the GLUT4 containing vesicles to the cellular exocytic machinery in response to insulin stimulation which allows translocation to the plasma membrane [1]. TUG has an N-terminal ubiquitin-like domain (UBL1) which in similar proteins appears to participate in protein-protein interactions [1]. The region does have a area of negative electrostatic potential and increased backbone motility which leads to suggestions of a potential protein-protein interaction site [1]. 23.30 23.30 23.30 24.30 23.20 22.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.69 0.72 -4.05 28 216 2012-10-03 10:59:06 2008-08-28 09:53:01 3 9 197 1 158 213 3 62.90 38 12.64 CHANGED sls.ss+ptplKVosusslppVLppuCcKaslcss.....casLp+ps.Kh....lDLSLsaRhusLsssu+LEL ...........slssstRRtplKVoPsp.hLhpVLp-u..CpKhs..lsss.......pasLK..app..Kh....l.DLSlsaRhusLsssA+LEl...... 0 41 75 126 +11303 PF11471 Sugarporin_N DUF3205; Maltoporin periplasmic N-terminal extension Pollington J, Coggill P anon manual Domain This domain would appear to be the periplasmic, N-terminal extension of the outer membrane maltoporins, Pfam:PF02264, LamB. 21.60 21.60 21.60 21.60 21.50 21.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.00 0.72 -4.32 19 821 2009-01-15 18:05:59 2008-08-28 09:54:01 3 6 505 0 61 363 7 58.20 36 11.80 CHANGED Mp.phh.lslhslLhhssh.....tAhAstLTlEQRLtLLEccLppscpELppscpchpc.hcp ..................pp..luhhlhL.hssss....shpu.hA.p...p.L..T......lEQRLtLLEpcLppsc.pchpcscsch+p.hh.p...................... 0 4 15 34 +11304 PF11472 DUF3206 Protein of unknown function (DUF3206) Pollington J anon pdb_2au5 Family This bacterial family of proteins has no known function. 23.60 23.60 23.60 243.30 22.70 23.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.73 0.71 -4.36 2 13 2009-01-15 18:05:59 2008-08-28 09:57:28 3 1 13 1 2 3 0 128.00 91 94.76 CHANGED IlSTpKtPNFpYp-.hcpFL.NhLAFohGhhTtDhSpFsP.VLt.MEc-PsWLpEusuhhQullVtSLl-stNassstpLhsEhspLlpLY.thtpcpLTpNp-sLalslaDKFhhLLLssDEhIh.L IISTNKAPNFQYTDEMDRFLMNTLAFSVGLVTEDYSTFDPEVLKIMEEEPDWLQESVAWCQSLVVGSLVDSGNYDDTGELMDEFNCLLNLYDRARQRELTSNEDNLFLNIHDKFLALLLTDDELITNL 0 1 2 2 +11305 PF11473 B2 RNA binding protein B2 Pollington J anon pdb_2az0 Family B2 is expressed by the insect Flock House virus (FHV) as a counter-defense mechanism against antiviral RNA silencing during infection. In vitro, B2 binds to dsRNA as a dimer and inhibits the cleavage of it by Dicer. B2 blocks cleavage of the FHV genome by Dicer and also the incorporation of FHV small interfering RNAs into the RNA-induced silencing complex [1]. 20.50 20.50 20.70 21.20 20.40 19.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.56 0.72 -4.39 2 10 2009-09-11 16:34:22 2008-08-28 09:58:13 3 1 6 6 0 12 0 74.50 55 62.45 CHANGED M.SKLALIQELPDRIQpAVEsshuMSYQ-APNNVRRDLDNLpACLNKAK.TVsRMVTSLL-KPShsAYLEGKs ............SphtLlQ-LPc...pIQpAVcsAhuhshpssPspVt+DLDNh+ACLsKhctTshRhssSLLsKPpVVAhLcGcA.. 0 0 0 0 +11306 PF11474 N-Term_TEN Telomerase reverse transcriptase TEN domain Pollington J anon pdb_2b2a Family This is the N terminal domain of the protein telomerase reverse transcriptase called TEN. The TEN domain is able to bind both RNA and telomeric DNA and contributes towards telomerase catalysis. The TEN domain has a structure that consists of a core beta sheet surrounded by seven alpha helices and a short beta hairpin [1]. 25.00 25.00 417.10 417.10 19.70 18.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.42 0.71 -4.91 2 2 2009-09-11 09:41:36 2008-08-28 10:51:22 3 1 2 3 1 3 0 188.00 100 16.91 CHANGED INNINNNKQMLTRKEDLLTVLKQISALKYVSNLYEFLLATEKIVQTSELDTQFQEFLTTTIIASEQNLVENYKQKYNQPNFSQLTIKQVIDDSIILLGNKQNYVQQIGTTTIGFYVEYENINLSRQTLYSSNFRNLLNIFGEEDFKYFLIDFLVFTKVEQNGYLQVAGVCLNQYFSVQVKQKKWYKNN INNINNNKQMLTRKEDLLTVLKQISALKYVSNLYEFLLATEKIVQTSELDTQFQEFLTTTIIASEQNLVENYKQKYNQPNFSQLTIKQVIDDSIILLGNKQNYVQQIGTTTIGFYVEYENINLSRQTLYSSNFRNLLNIFGEEDFKYFLIDFLVFTKVEQNGYLQVAGVCLNQYFSVQVKQKKWYKNN 0 1 1 1 +11307 PF11475 VP_N-CPKC Virion protein N terminal domain Pollington J anon pdb_2bai Family This is the N terminal domain of a family of virion proteins which contains a zinc finger domain. Currently no function is known. 21.10 21.10 81.60 80.60 20.50 17.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.72 0.72 -4.41 5 30 2009-01-15 18:05:59 2008-08-28 11:34:25 3 2 7 1 0 33 0 32.00 93 1.67 CHANGED MATTMEQEICAHSLTFEECPKCSALQYRNGFY MATTMEQEhCAHSLTFEECPKCSALQYRNGFY 0 0 0 0 +11308 PF11476 TgMIC1 Toxoplasma gondii micronemal protein 1 TgMIC1 Pollington J anon pdb_2bvb Family TgMIC1 is released as part of a complex by Toxoplasma gondii prior to invasion. The complex which consists of TgMIC4-MIC1-MIC6 participates in host cell attachment and penetration and is critical in invasion. This is the C terminal domain of TgMIC1 which has a Galectin-like fold which interacts with and stabilises TgMIC6 providing a mechanism for an exit from the early secretory compartments and trafficking of the complex to micronemes [1]. 25.00 25.00 29.50 252.00 19.10 17.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.57 0.71 -4.64 2 5 2009-01-15 18:05:59 2008-08-28 11:35:37 3 1 3 2 3 5 0 137.00 66 29.94 CHANGED chclhGDShpAhLctGQQLhlTa.SspLcVuVGSCHpLssNF.DhaLpFpTsSpSu.D.VEl--sAGsu.LTIGLGppGRlsVVhpYsp.suu.t.sAYsVtDSGCpo.EtV.hpGlssGApLVhsTlG-sssu.St KTEIHGDSTKATLEEGQQLTLTFISTKLDVAVGSCHSLVANFLDGFLKFQTGSNSAFDVVEVEEPAGPAVLTIGLGHKGRLAVVLDYTRLNAALGSAAYVVEDSGCSSSEEVSFQGVGSGATLVVTTLGESPTAVSA 0 2 2 3 +11309 PF11477 PM0188 Sialyltransferase PMO188 Pollington J anon pdb_2c83 Family PMO188 is a sialyltransferase from P.multocida. It transfers sialic acid from cytidine 5'-monophosphonuraminic acid to an acceptor sugar [1]. It has important catalytic residues such as Asp141, His311, Glu338, Ser355 and Ser356 [1]. 25.00 25.00 28.50 39.90 21.00 20.80 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.21 0.70 -5.64 4 21 2009-01-15 18:05:59 2008-08-28 11:52:36 3 3 19 18 3 32 0 347.70 37 81.47 CHANGED TlElYlDhAoLPoLpQhhchIQpp--h.ss.RlhuhuRasIssshlsc..hNhpFa..hsN+spstllstL.DphstspphhplplahNhhHulsll.sIhshh.pt.cKloh.cLsLYDDGStEYVsLaphpchs-hppp..Iptpcs.LtphLssspsphsNshhsRYsWpplasopYHhLptDaF-pp..LpsLKchLupshppMcWstasphsspQpshahsllGFssEh.ppQhhts.pssFIFTGTToasus.-hh-hhAQQQlslLNctppssSshalG..psYcLaFKGHPsushlNchIhsph.shIpIPApIsFElLhMTshLPDpVGGhASSlYFolPs-plN+llFhoSDp.p.+p-hLsss....LlpVMlpLsIlsEs ............................................lplYlD.AoLPsLpQhhchhpppc-h.pp.RlhuhuRatls-s.lpp...Nh.phh.hhsN+ss.....ptLhshl.cphsts...lplclahNhuHShplltPIhthh.pths+lpIppLsLYDDGShEYVcL.p.pcstshptp.............lptucppLpphL.ssp.phsN.shhspYsWpphaPspYHhLppDYF-pt..LpsL+cYLupshppMcWssapp..LospQpshahslVGFs.sEh..pphhpsppssFIFTGTToapus.c.h-hhAQQQlNllNchhptpushalG..spYclaFKGHPp..uu.lNchIhssh.shhpIPApIsFElLhMTGh.LPDpVGGlASSLYFSlPpE.KlsaIlFToscp.p.+cDhLpss....hlpVMhpLsIlccp......... 0 0 1 2 +11310 PF11478 Tachystatin_B Antimicrobial chitin binding protein tachystatin B Pollington J anon pdb_2dcv Family Tachystatin B is an antimicrobial chitin binding peptide and consists of two isotopes B1 and B2.Both structures contain a short antiparallel beta sheet with an inhibitory cysteine knot motif. Tyr(14) and Arg(17) are thought to be the essential residues for chitin binding [1]. 25.00 25.00 106.00 105.90 18.70 18.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.35 0.72 -4.23 2 2 2012-10-01 22:06:18 2008-08-29 08:54:07 3 1 1 2 0 2 0 42.00 95 100.00 CHANGED YloCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF YloCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF 0 0 0 0 +11311 PF11479 Suppressor_P21 RNA silencing suppressor P21 Pollington J anon pdb_2cwo Family P21 is produced by Beet yellows virus to suppress the antiviral silencing response mounted by the host. P21 acts by binding directly to siRNA which is a mediator in the process. P21 has an octameric ring structure with a large central cavity [1]. 28.90 28.90 29.00 351.60 21.10 28.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.10 0.71 -4.53 2 5 2009-01-15 18:05:59 2008-08-29 08:55:10 3 1 2 4 0 6 0 173.80 91 100.00 CHANGED MKFFhpDGETSRAlSRSESLLRRVKELGTNS.QSElSEClsEFNELApFNHLLVTVEHREWMEpHPpQSScLRsPSRlGEMLKEIRAFLKVRVVTPMHKETAS-TLNAFL-EYCRIsGLsREDALREKMRKV+SsVLFHHSELLKFEVTENMFSaTELLKLNLSLRVISSQILGhAl MKFFhpDGETSRAISRSESLLRRVKELGTNSPQSEVSECINEFNELARFNHLLVTVEHREWMEKHPKQSSELRsPSRLGEMLKEIRAFLKVRVVTPMHKETASETLNAFLEEYCRITGLTREDALREKMRKVRSTVLFHHSELLKFEVTENMFSFTELLKLNLSLRVISSQILGIAV 0 0 0 0 +11312 PF11480 ImmE5 Colicin-E5 Imm protein Pollington J anon pdb_2dfx Family Imms bind specifically to cognate colicins in order to protect their host cells [1]. Imm-E5 is a specific inhibitor protein of colicin E5. It binds to E5 C-terminal ribonuclease domain (CRD) to prevent cell death. The binding mode of E5-CRD and Imm-E5 mimics that of mRNA and tRNA suggesting an evolutionary pathway from the RNA-RNA interaction through the RNA-protein interaction of tRNA/E5-CRD [1]. 25.00 25.00 31.30 31.30 20.60 17.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.64 0.72 -4.16 5 26 2009-01-15 18:05:59 2008-08-29 08:55:51 3 1 21 2 5 23 0 78.30 50 81.21 CHANGED MKLSsKAAIEVCpcAAK+GLsIu+IEGGIWHpP..GFEARlDsIWDGhDsPlDh.oclscNNppAtEsI+-D.SsGHDAFIVTlsu ..........MKLSsKAAIEVCpcAsK+GLhIhtI-GGhWhsP..GFchc.ss.Ws..hD.P.ch..splscNNplAhEsI+-DhpsGasAFIlTlt... 0 0 2 4 +11314 PF11482 DUF3208 Protein of unknown function (DUF3208) Pollington J anon pdb_2ebe Family This bacterial family of proteins has no known function. 25.00 25.00 104.60 104.30 16.80 16.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.52 0.72 -3.92 4 19 2009-01-15 18:05:59 2008-08-29 09:07:12 3 1 19 6 13 20 2 107.40 58 95.15 CHANGED htAVRLhQGYLWHP+uhslDLcuhLPtEls..........sA+lLWDpVsPPhsFFEsGpPTtoQpFYQhTlLhlh-E.P..EuL+s.AEtAupALG.lLEGhPPGVGW.LhEDLRsL ....hpAVRLhQGYLWHP+-hslDLcshLPtEls...........sA+lLhDpVsPPhsFFEDGTPTtoQpFYQlTlLhls--tP..-uL+PhAptsupuLsslLEuhPsGVGW.LhEDLRsL. 0 3 8 13 +11315 PF11483 DUF3209 Protein of unknown function (DUF3209) Pollington J anon pdb_2ehw Family This family of proteins has no known function. 25.00 25.00 25.70 82.10 23.90 22.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.55 0.71 -3.78 5 29 2009-09-11 16:33:41 2008-08-29 10:10:54 3 1 29 4 14 28 0 122.20 47 99.94 CHANGED MoCHEIEALRLGLMNVLGTsD-uARpHAccELEGcL..cGPIEALAEA-oLAAlcRHLDAALVDLEEElAAT-c--PEYDYLRGRLVAVRDAEpALpRLsscGEAlLsDLGEoHcsLHEsFPVDE MuCaElpALRLGlMNlLGhcD-ssppHtcpELtstL....pGPIcuLA-AcoLsulpRaL-uALlDLEEclAst-s-DPchsYhRGhLlAV+csEtpLppLptpu-ulhcDLsEhHchLHEhFPscc. 0 4 10 13 +11317 PF11485 DUF3211 Protein of unknown function (DUF3211) Pollington J anon pdb_2ejx Family This archaeal family of proteins has no known function. 21.60 21.60 22.30 22.80 21.20 21.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.62 0.71 -4.13 11 43 2012-10-02 19:24:03 2008-08-29 10:24:31 3 1 17 1 16 38 2 130.40 35 98.28 CHANGED M.chplpIpTsH-h-ulhpILSDPpFslPplhPshKplp.hpssoFpu.Gc....ahhhsach+Gslalu.sclsYsaplsss.................sstGsG+Lphsh.ppsclplplEY-GhhE+h.u.shlc+hlpchtccl-EcIRhERIKRKI ....M..phsh.TpHDhpslhcILSDP.Fhl.tlLss.cplp.spsspFcs.ss.....huhhsllhpGshYlGss.ploYshphttt...................ssuG+lphph.pcs...cIplhl.-a-Ghhtphst.hhlpp+lpchhcph-EclRLERIKRKI.................................................................... 0 2 2 14 +11318 PF11486 DUF3212 Protein of unknown function (DUF3212) Pollington J anon pdb_2euc Family Members in this family of proteins are annotated as YfmB however currently no function for this protein is known. 21.50 21.50 24.40 23.40 19.90 19.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.69 0.71 -4.35 4 24 2009-09-10 16:55:32 2008-08-29 10:36:29 3 1 24 2 5 22 0 116.00 57 76.49 CHANGED MaYFSPEQQaNAWlVSDLVKQlFp+cstppssh+cLssFAEc+F+IsIDaVFSIIhNIGDIEp.hlsp-hEchLSSYLssLhPaVTtDMhcsS+pNAppYL.+E+ssDVY+LFhs.sshhph .........hpYFSPEQQaNAWlVSDLVKQlF+++stCssGI+ELssFAE-+FHIsIDFVFSIIhNIGDIE..sLspEIEspLSoYLouLhPhlTADMhcoSKsNAatYLp+E+ss-lY+LFh..sshhph 0 1 2 3 +11319 PF11487 RestrictionSfiI Type II restriction enzyme SfiI Pollington J anon pdb_2ezv Family SfiI is a restriction enzyme that can cleave two DNA sites simultaneously to leave 3-base 3' overhangs. It acts as a homo-tetramer and recognises a specific eight base-paid palindromic DNA sequence. After binding two copies of its recognition sequence, SfiI becomes activated leading to cleavage of all four DNA strands. The structure of SfiI consists of a central twisted beta-sheet surrounded by alpha-helices. 25.00 25.00 206.60 206.30 18.30 17.30 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.74 0.70 -5.22 2 6 2012-10-11 20:44:46 2008-08-29 10:52:14 3 1 6 4 3 7 0 235.80 54 95.61 CHANGED hcpcYhc.s.-clEplEKpTLRhlVQAl.pYppcAppIFEppss.sSs....luEDITp.ALEhhtha.IspRhhGhIDYKpspaL.pPchhl.QsLhVDuKAppttsps.TLQhSQlsM..chRpppSGcslshputl.P...L.phNDs...hlTToIFV+a.YR....VpuR.+ELpSIhVhulPpuhLpppYNPDsssohahAG+cuPsRtEhhRlRl.FDRLKctssWRlQ.l.hssDs.a....W..........st ..........LppsL-+LEEIEKsTLRLVVQAIYDYRcpAlEIF+cEuDLsSD....IuEDITREALDRLGMsRIDpRLFGKlDYKRACYLFHPDYAl+QALFVDSKAEKsuspu.ATLQhSQLSM..tV+QpRSGpsVslpGchPT...lIolcsp..+YLTTTlFVKYNYcc....-uts+pLKSIlVAAVPNGhLQDRYNPsPpDTIWtAGRNAPohGE-FRVRLSFSpLKcKAAWRVQsIPMPP-s.a..................... 0 2 3 3 +11320 PF11488 Lge1 Transcriptional regulatory protein LGE1 Wood V, Coggill P anon Charwan C Family This family of proteins is conserved from fungi to human. In yeasts it is involved in the ubiquitination of histones H2A and H2B. This ubiquitination step is a vital one in the regulation of the transcriptional activity of RNA polymerase II. In S. cerevisiae, Rad6 and Bre1 are present in a complex, also containing Lge1, that is required for H2B ubiquitination. Bre1 is the H2B ubiquitin ligase that interacts with acidic activators, such as Gal4, and recruits Rad6 and its binding partner Lge1 to target promoters [1]. In S. pombe the equivalent protein to Lge1 appears to be Shf1. 21.80 21.80 21.80 22.20 21.10 21.30 hmmbuild --amino -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.64 0.72 -3.79 20 157 2009-01-15 18:05:59 2008-08-29 11:04:03 3 1 85 0 81 150 0 100.90 37 32.56 CHANGED sPal.............tlhplc.............Dpspppc..............lcptap-hs......plDpcLcpLptphh+hE.hphshLpspsp+-uLpVQLT.pEK........L-shhhh ...........................................................................................................................s.ah..............h.ch-ttsp............Dpsppsc.....................lpsRsKAIs......oKsKEIEpVY+QcsETFGMVVKMLlcKDPsLE+sIQhsLcpN........L-Elhh..................... 0 7 20 48 +11321 PF11489 DUF3210 Protein of unknown function (DUF3210) Wood V, Coggill P anon Charwan C Family This is a family of proteins conserved in yeasts. The function is not known. The Schizosaccharomyces pombe member is Swiss:O94497 and the Saccharomyces cerevisiae member is Swiss:P40563. 17.50 17.50 22.90 20.60 17.00 16.60 hmmbuild -o /dev/null HMM SEED 711 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.48 0.70 -5.84 15 113 2009-01-15 18:05:59 2008-08-29 11:05:14 3 5 100 0 90 116 0 597.80 28 77.07 CHANGED P+IPPRP.s+Rh-RSsSP....ssaAPSPLN-.s.............ptp.s.p...sPpRPsSV.slPSlGEEGtEYpslsssshoss........soPsp..oRsVusDL+LHAP+PSlPsuoAKu+lpAVTRTDSppAsAsGhG.tsuuPttcshtcpsocSLpspsosopssSusspstp..Sht..s-EHGI.PEIGQpVPM.hsNAGDVQAPSPuPhp.............pt.tptHtRppSuRt..uLPPGSYGLHGHGl.ss-+FEKAaYEKHP--hs+E-pupatsslGpsRs.DaAlSuDDLN+IV+toAspGsGLus..thsuTPpEplGahAs-EhopRhuoPsPcSsp.................hcsPl+.p..s.t...............................hIHlDpPh+p...............tp..tt.sttt.s.......-pPILAuD.....EVphcsuhpp...PAVSPohc+c............p...ptp..SsscSRssS+ssshptshst.hc...........psPLEDVcEYEPLFPED-pcsh.ctshspss..........................+.c.+sp.hp++FPSpDlWEDoPsShphp.usVSTP-..................tpcshEsPcQpssRcoptsshcs.....................pps.....psc.....ptpcstsRsph.pQRFPS...RDlWEDuPESppLsTTlpss...................................--hpspSPtss.sKPslPu..RPp...+.............s.csttps.s.spcc+pPPslPsRP..................KPQIPARPuKstsppsu-.......pstp.s............sKsKPsVPuRPtGSKIAA..L+AGFluDLNuRLpLG........PQuPp+.....ppppct...............ppptsssEKuPLSDA......RKGRARGPtRR+PAst...............sst+hsshsplshspsassWpls .............................................................................................P.lPsRP..pt..ps..s............SPhs.......................................+..s...h..hs.p..-ht.....................s.p...t.htp-..h.AspPslPstpsptph.sVTps-sp.utthh.....s.s...t.........................................t..t...................tpcpuI..plu.pVPh...phGsVQAPoPus.t.................t.p.p..St......Psuohuh+sHt......psth-pshhpKHPc.hth.pts...........c..phuhSspcLNclV+pst......s...t..usPp-pluh.t.-chspRhsss.sts................................................................lHlc...p.........................................................t.............................ptPlLAsD.....Elt.pst..t...PAlsPthtpp...........................u.s...ps...+.ps+s....t.....s....t........................ttslcclcEhEPLF..---tp....p...tttp.................................................................p....p.........pcFPS..pDlWE...D.sPsuhphp..spVoosc...........................t..psPt....pt.tppppt.t.p...................................t.......t.t.p...tt.c..t........pp+..FPS...+DlWEDs.P-Stphtsplp.s.......................................pp..t.pss..t....pp.P..tlPt...RPp..................................................s.p.s.....t..s...ttp+tsPslPc+P..................KPp..lPsR.sp.tpppstp.........p....s...................s+sKPslPsR.P....s..GuK.....IAA..lpAGFhscLNs+LpLG............Ptssp...........tpp.t....................................pttp.tc.+..sPLu.DA......RKGRARGPtRRtP..sh...........s...ssstc..hstssp.hph..shshWpl............................ 0 15 45 72 +11322 PF11490 DNA_pol3_a_NII DNA_pol3_alph_N; DNA polymerase III polC-type N-terminus II Finn R, Coggill P anon Pfam-B_853 (release 23.0) Domain This is the second N-terminal domain, NII domain, of the DNA polymerase III polC subunit A that is found only in Firmicutes. DNA polymerase polC-type III enzyme functions as the 'replicase' in low G + C Gram-positive bacteria [1]. Purine asymmetry is a characteristic of organisms with a heterodimeric DNA polymerase III alpha-subunit constituted by polC which probably plays a direct role in the maintenance of strand-biased gene distribution; since, among prokaryotic genomes, the distribution of genes on the leading and lagging strands of the replication fork is known to be biased [2]. It has been predicted that the N-terminus of polC folds into two globular domains, NI and NII. A predicted hydrophobic surface patch suggests this domain may be involved in protein binding [3]. This domain is associated with DNA_pol3_alpha Pfam:PF07733 and DNA_pol3_a_NI Pfam:PF14480. 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.30 0.71 -4.48 63 1162 2012-10-02 15:09:17 2008-08-29 11:09:41 3 24 1157 0 139 793 71 115.20 23 8.07 CHANGED sYappllpphsp.tssshpshLpppphph.cssclh.lhstschhtshl+ppthshlhpphhphGFs....hphpsplpp.p.spp.h....ppappp+tpc-p...phspp...shpphpppppptpppppp ............pYappslppsst..to..s..hh+uh.hppppsph..ps.spLh.l.sssslppsahccpt.hsslhcpapphGFs.....phphphchsc.t.s....ppph.............psactpppppsp...ptspp...........Ahcthcphcttts..tp.p........................................ 0 44 85 113 +11323 PF11491 DUF3213 Protein of unknown function (DUF3213) Pollington J anon pdb_2f40 Family The backbone structure of this family of proteins has been determined however the function remains unknown. The protein has an alpha and beta structure with a ferredoxin-like fold [1]. 20.30 20.30 21.30 86.90 19.80 18.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.01 0.72 -4.11 2 12 2009-01-15 18:05:59 2008-08-29 11:11:46 3 1 12 1 9 16 0 87.70 47 91.88 CHANGED h.lKF.ssls.EcApIhQYELppc-chaRlFlNsYA+su.VlhD-phhshEEL...hEcLKsEVltEKplTlQELIEtShSWNNVh.S+u .l.pl+F.GpIs.EEAphhQYELppctuVaRlFlNGYA+sGhVlFD-pplscEclLchLcchcPEVlpE+clTlpELIEpShSWpNlh....t.. 0 1 1 6 +11324 PF11492 Dicistro_VP4 Cricket paralysis virus, VP4 Tate JG, Finn RD anon pdb_1b35 Family This is a family of minor capsid proteins, known as VP4, from the dicistroviridae. The dicistroviridae is a group of small, RNA-containing viruses that are closely structurally related to the picornaviridae. VP4 is a short, extended polypeptide chain found within the viral capsid, at the interface between the external protein shell and packaged RNA genome[1]. 22.60 22.60 23.40 40.80 22.30 20.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.83 0.72 -3.96 11 50 2009-01-15 18:05:59 2008-08-29 11:12:18 3 7 19 1 0 54 0 57.20 45 6.17 CHANGED uusEhpps....GsIScsAosVuplAssls..plPllGshA+s....spWlustVuslAplFGa ......soEssp.....GsISclAouVssVANsl-..pIPllGpIAKP....lpWVushVusVAulFGa 0 0 0 0 +11325 PF11493 TSP9 Thylakoid soluble phosphoprotein TSP9 Pollington J anon pdb_2fft Family The plant-specific protein, TSP9 is phosphorylated and released in response to changing light conditions from the photosynthetic membrane. The protein resembles the characteristics of transcription/translation regulatory factors. The structure of the protein is predicted to consist of a random coil [1]. 25.00 25.00 43.60 43.40 21.10 20.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.78 0.72 -3.43 5 29 2009-09-11 09:35:24 2008-08-29 11:52:16 3 1 16 1 15 31 0 77.20 43 68.38 CHANGED AT+G.GoAss+EEKuhhDalhGhlhKcDQhhETDPlLpKVDtKuPSGSTsu+KAsu+uPAsSA.....Ac-EGGsGGFsLGuLFAK ..........stt.tuusucEEKGlhDaIhGtlhKc-Q.h.lETDPlLpKV-pKs.u.Gs.....osup....Kus..ssustpts......tcccGGsGG.....hG...GLFuK... 0 1 7 11 +11326 PF11494 Ta0938 Ta0938 Pollington J anon pdb_2fqh Family Ta0938 is a protein of unknown function however the structure has been determined. The protein has a novel fold and a putative Zn-binding motif. The structure has two different parts, one region contains a beta sheet flanked by two alpha helices and the other contains a bundle of loops which contain all cysteines in the protein [1]. 22.80 22.80 22.80 23.00 22.70 22.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.33 0.72 -3.92 4 21 2009-01-15 18:05:59 2008-08-29 13:09:29 3 1 20 1 8 17 1 99.50 61 93.18 CHANGED M...KIlVNG+EAGTKEKGCALCGuTWGDYYE-lDGE+LFFCCDICAhEFlNMlsEVKK+TuWs+lDELlINGNY.pGRsC.uKsGs+ch+FYVKFs--AsIcTF+.hs .....MKIlVNG+EAGTKEsGCALCGGTWG.DYYEEIDGEKLFFCCDlCA.LEFVNMlNEVKKRTsWSRIDELlI..NGNYYTGRTCsAKsGs+EYKFYVKFNDDAsIETFKEls.................. 0 3 4 7 +11327 PF11495 Regulator_TrmB Archaeal transcriptional regulator TrmB Pollington J anon pdb_2f5t Family TrmB is an alpha-glucoside sensing transcriptional regulator. The protein is the transcriptional repressor for gene cluster encoding trehalose/maltose ABC transporter in T.litoralis and P.furiosus [1]. TrmB has lost its DNA binding domain but retained its sugar recognition site. A nonreducing glucosyl residue is shared by all substrates bound to TrmB which suggests that its a common recognition motif [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.31 0.70 -5.39 30 611 2012-10-02 13:01:53 2008-08-29 13:10:22 3 5 362 2 163 568 67 147.60 19 52.42 CHANGED tl.hl+uhpsllpphp-lIpsAcpElhluls.chLcplpctLhsuhccGVpVhLllhs.....sss.s.hchhsssstlRhpcsss.shl....lluDtppulhsspp........pscscpYulhhpcpsLhhhlsthFhsshWtpuphlh...spshshPhpasshRhulpslpphhppuhslpuplpGhshp.oscphslpGcl..ss.............hsthst.hsshhlcoscu.plsVGGhsAhlEDlEupplplptt ....................................h.lhsl+uppsllpphpplIppAcpcl.hl....u.....s....h....p....-.....l.....p.p.....l....c...s....t.....LppstccG..Vplh.hl..hhs..............hpssh.p..h.c...h....h...t.h....s.p..............s..c..ht..tt..t..c...hh.l..................lssDs.cchlhu.s......................ppp..u..hhs...p.....hhhhh.............................................................................................................................................................................thhh..................................................................................... 0 39 99 140 +11328 PF11496 HDA2-3 Class II histone deacetylase complex subunits 2 and 3 Wood V, Coggill P anon Chahwan C Family This family of class II histone deacetylase complex subunits HDA2 and HDA3 is found in fungi, The member from S. pombe is referred to as Ccq1 in Swiss:Q10432. These proteins associate with HDA1 to generate the activity of the HDA1 histone deacetylase complex. HDA1 interacts with itself and with the HDA2-HDA3 subcomplex to form a probable tetramer and these interactions are necessary for catalytic activity. The HDA1 histone deacetylase complex is responsible for the deacetylation of lysine residues on the N-terminal part of the core histones (H2A, H2B, H3 and H4). Histone deacetylation gives a tag for epigenetic repression and plays an important role in transcriptional regulation, cell cycle progression and developmental events. HDA2 and HDA3 have a conserved coiled-coil domain towards their C-terminus [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -12.01 0.70 -5.60 26 146 2012-10-05 12:31:09 2008-08-29 13:22:22 3 6 101 6 102 1604 22 269.30 22 34.08 CHANGED sssu-ahlPssMsphQK-Lh-pllslatpsIlca.h...cspspppsh.................................pphchhhpphphlusHP..L.Ll-HaMP+ph........hht-.stchttoSuKFtlLscLlshl..............pppp......................hpllllscssKph-LlEulLhG.....+tlphpRhsuphlhscs+chs............................................................................................t.thusslaLhsocslhpp..sss..tp.phshlIuhDssl-spsPslpplRppptp................................................t............sPllRLlshsSsEHhtLpasp.....tt.p.h.th..t.......................LRchlGslssD...shslhpp..lchlssal .........................................................................................................................t..stcahlPhshsthpK-hh-pll.s.h.ctps.lhca..h...........csp.s..pps.h...................................................................................................pph..ph..h..hpplp.lssHP....L...l........l.....-....H.h..h..Ppph......................................................h.t..c..........s.....t......p....h..t...t..s..SuKFphLppLl.shl.........................pphp......................hcllllscs.s+phDLlEuhl..lG...............pphp..hpRhsu..p..p..h.hsp.ppths............................................................................................................................................................................thshtlp.L.h.ss.p......thhp..........p.s.....phsh.lIshDs.s..h.......-spt..ssl...phlcppt.pp...................................................................................t.hPll+Lls.soh-HhtLhhsph.........pp.p..l..phhtt.......................hps.hGpl.ss....h....p.................................................................................................................................................. 0 21 53 89 +11329 PF11497 NADH_Oxid_Nqo15 NADH-quinone oxidoreductase chain 15 Pollington J anon pdb_2fug Family This protein, Nqo15, is a part of respiratory complex 1 which is a complex that plays a central role in cellular energy production in both bacteria and mitochondria. Nqo15 has a similar fold to Frataxin, the mitochondrial iron chaperone. This protein may have a role in iron-sulphur cluster regeneration in the complex. This domain represents more than half the molecular mass of the entire complex [1]. 25.00 25.00 26.50 114.90 19.00 17.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.51 0.71 -4.40 3 17 2009-01-15 18:05:59 2008-08-29 13:32:55 3 1 17 15 11 16 2 127.70 62 96.40 CHANGED MAsAs-ccLYcQWVELLGWLc-EApoRGLuFEKVADFPDYIYRMERPYDLPTTVMSVuLSsG.GQPLLLAAVSPRHVDLKGISLRLMGGSKHWHLHA...Gu+GLLEGKRPFTRERLuVLLDGAhRGlAsV ......Mutup-ctLYcAWVELLuWM+EYApt+.GVpFEKEADFPDFIYRMERPYDLPTTlMoASLSDuhGEPFLLAsVSPRHAcLK+IuLRL..s+tHhHLHA..EsG+G.L.l.sGKlPLTKERhasLADRARculuh..... 0 3 7 11 +11330 PF11498 Activator_LAG-3 Transcriptional activator LAG-3 Pollington J anon pdb_2fo1 Family The C.elegans Notch pathway, involved in the control of growth, differentiation and patterning in animal development, relies on either of the receptors GLP-1 or LIN-12 [1]. Both these receptors promote signalling by the recruitment of LAG-3 to target promoters, where it then acts as a transcriptional activator. LAG-3 works as a ternary complex together with the DNA binding protein, LAG-1 [1]. 23.30 23.30 23.70 24.90 22.50 23.20 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.93 0.70 -5.74 2 17 2009-01-15 18:05:59 2008-08-29 13:48:28 3 5 8 1 16 20 0 259.40 31 60.52 CHANGED MKPSTSK...ospSPPPEEPsusaVNspLPsP-DEPhlu-hssappGpE.tR.RSp.AhtpapKsR.E.IusQRAVTApLapRaTEDEERKRhEpQKNKEAMNASsu..ouSRNG..pl-NRKRRND..Vus.ouEEEW+RA.QQQHWMGQttP.h.paQMQQQYH.QQQ....httQHHphh................u.+SVPTP.uShHpPSPutMpssC...................PhsDENsLsVPpGEWFDKLAlhVAEpYsssTILGPDTYDsaLsELDh.ps.u.ThpoP.Ehs.....poAs.P..NPQp.tQhtQQQNKMRhhQQQp.....hEQQRpQQhMpQpQQQ....HppQQMLL.QQQQ.pQhQQppQMN.GGQF.TQApQpAsYhQpMQ+M...pQ.ppQQQQAQpHQQA.QQHQQ.tQpt.MGYulPNGY.tp.phasP.YG.HHMPp.TsFANIN .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..ppQ...QpphhhhQQQp............p.hppQR.pQQh.Qtp...Q....p.pp.QQh....p.......ppt..........pt...psshhp.....h..pph...p................................................................................................... 0 4 5 16 +11332 PF11500 Cut12 Spindle pole body formation-associated protein Wood V, Coggill P anon Chahwan C Domain This is the central coiled-coil region of cut12 also found in other fungi, barring S. cerevisiae. The full protein has two predicted coiled-coil regions, and one consensus phosphorylation site for p34cdc2 and two for MAP kinase. During fission yeast mitosis, the duplicated spindle pole bodies (SPBs) nucleate microtubule arrays that interdigitate to form the mitotic spindle. Cut12 is localised to the SPB throughout the cell cycle, predominantly around the inner face of the interphase SPB, adjacent to the nucleus [1]. Cut12 associates with Fin1 and is important in this context for the activity of Plo1 [2]. 21.50 21.50 22.80 22.80 21.30 20.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -10.99 0.71 -4.52 12 71 2009-09-11 11:20:04 2008-08-29 14:19:16 3 3 70 0 60 71 1 150.90 34 21.61 CHANGED -ups+PRo+LTpALhDSR......cp.....st.t.....stpct...css.sscss..........t.pshp..tc-sD.TINLs-PRSpSGKYWKuEF...DsY+s+optEl+KLIp...............Y+phAKsYA+cKDpEApcLucKLKEEE...tKVupMEcclTpLsSsMsscsspss.....+EpLhp- ..............................................t..p.p.Ro+LopuL.c.uRp.............t..p........sp.................shppp.ppstpscp.................................pspspc..ps-sDsTlsLs-P+SQSG.KYWKuEF.........-sY+scsctEhcKLlp...............Y+plAKSYA+cKDsEAhcLs...cK.LKcEp...tKVtpMEc+lsc.hAuphssptsp...ts.....p.............................. 0 10 27 46 +11333 PF11501 Nsp1 Non structural protein Nsp1 Pollington J anon pdb_2gdt Family Nsp1 is the N-terminal cleavage product from the viral replicase that mediates RNA replication and processing [1]. The specific function of the protein is unknown however the structure has been determined. The protein has a novel alpha/beta fold formed by a 6 stranded beta barrel with an alpha helix covering one end of the barrel and another helix alongside the barrel [1]. Nsp1 could be involved in the degradation of mRNA. 25.00 25.00 41.00 38.30 19.90 18.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.49 0.71 -4.25 3 172 2009-01-15 18:05:59 2008-08-29 14:20:12 3 4 67 2 0 150 0 115.00 98 1.95 CHANGED HVQLSLPVLQVRDVLVRGFGDSVEEALSEAREHLKNGTCGLVELEKGVLPQLEQPYVFIKRSDALSTsHGHKVVELVAEMDGIQYGRSGITLGVLVPHVGETPIAYRNVLLRKNG ..................HVQLSLPVLQVRDVLVRGFGDSVEEALSEAREHLKNGTCGLVELEKGVLPQLEQPYVFIKRSDALSTNHGHKVVELVAEMDGIQYGRSGITLGVLVPHVGETPIAYRNVLLRKNG 0 0 0 0 +11334 PF11502 BCL9 B-cell lymphoma 9 protein Pollington J anon pdb_2gl7 Family The Wnt pathway plays a role in embryonic development, stem cell growth and tumorigenesis. BCL9 associates with beta-catenin and Tcf in the nucleus when the Wnt pathway is stimulated leading to the transactivation of Wnt target genes [1]. 25.00 25.00 28.60 38.20 24.40 24.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.95 0.72 -4.43 7 124 2009-01-15 18:05:59 2008-08-29 14:29:02 3 1 64 6 72 108 0 39.90 55 3.04 CHANGED GLSpEQLEHRERSLQTLRDIQRMLFP-.-+shuhts.st.p ..GLSpEQLEHRERSLQTLRDIpRhLFP-.Ep-.shtt.................. 0 9 15 39 +11335 PF11503 DUF3215 Protein of unknown function (DUF3215) Pollington J anon pdb_2grg Family This family of proteins with unknown function appears to be restricted to Saccharomycetaceae. 25.00 25.00 26.30 46.00 22.00 19.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.32 0.72 -4.35 4 29 2009-01-15 18:05:59 2008-08-29 14:51:24 3 1 19 1 17 22 0 80.50 36 91.50 CHANGED hslsEhlssslGolsFDENtNll-ooGlG....ppRlpDIscLSpscLDppGaulhpDssl.spla+css+TlsVYTs..............up .........tEhlPcslGTLsFD-NtNll-ooGlG.....cs+lpDIhclSpscLsppGa.ulhpD...splllplaKc..ss+TlslYTs...pp.............. 0 2 9 17 +11336 PF11504 Colicin_Ia Colicin Ia Pollington J anon pdb_2hdi Family Colicins are toxic molecules secreted to kill other bacteria in times of stress. Colicin Ia kills susceptible E.coli cells by binding to the colicin I receptor leading to the formation of a voltage-dependant ion channel. The protein can be divided into three domains, a translocation domain, a receptor binding domain and a channel forming domain [1]. 25.00 25.00 25.90 53.30 24.00 16.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.35 0.72 -4.08 3 56 2009-01-15 18:05:59 2008-08-29 15:50:56 3 2 40 2 0 53 0 70.40 98 12.45 CHANGED KNTPDGKTIVSPEKFPGRSSTNDSIVVSGDPRFAGTIKITTSAVIDNRANLNYLLSHSGLDYKRNILNDRNP .....KNTPDGKTIVSPEKFPGRSSTNHSIVVSGDPRFAGTIKITTSAVIDNRANLNYLLTHSGLDYKRNILNDRNP 0 0 0 0 +11337 PF11505 DUF3216 Protein of unknown function (DUF3216) Pollington J anon pdb_2hjm Family This family of archaeal proteins with unknown function appears to be restricted ton Thermococcaceae. 25.00 25.00 72.60 72.20 24.10 23.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.17 0.72 -4.19 2 14 2009-01-15 18:05:59 2008-09-01 09:09:30 3 2 13 4 11 16 0 95.10 49 60.44 CHANGED MDhsEKVKtLs.cLtE-pLtcAI-RFlTLscGIEKTRGEtFAKAuIYGFLEGILTTLKhKapsEcIppLLs.lKpARE.pEAhLRKspPPlh.ppsL ........lE-VKuLscELGE-sLlpsIDuFlsLNcGLEsK+GE-FlcVuILGFLEGILTTLKhKa.p--+lscLL-cV+ppRtEL-thFRKs+sPlh-......... 0 1 1 6 +11338 PF11506 DUF3217 Protein of unknown function (DUF3217) Pollington J anon pdb_2hql Family This family of proteins with unknown function appears to be restricted to Mycoplasma. Some members in this family of proteins are annotated as MG376 however this cannot be confirmed. 23.60 23.60 23.80 108.70 21.90 23.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.19 0.72 -4.33 2 7 2009-01-15 18:05:59 2008-09-01 09:20:58 3 1 7 6 3 5 0 99.70 69 100.00 CHANGED MLNpVFLEGEIESopWShKKTGFLVTIKQhRhFGER.FTDaaVhYANGQLAaELEtaTpKaKTISIEGILRTYLE++StIWKTTIElVKIhpPpsclhIDYpE. MLNpVhLEG.I-S.pWShpKTGFhVTIpQhRhFGpphFTDYYVIYANsQLuhELEKashcachluIcG.LpTY.-+KSclWKTpI.h.KIh................ 0 2 2 2 +11339 PF11507 Transcript_VP30 Ebola virus-specific transcription factor VP30 Pollington J anon pdb_2i8b Family VP30 is a nucleocapsid-associated Ebola virus-specific transcription factor [1]. It acts by stabilising nascent mRNA in Ebola virus replication. The C terminal domain of VP30 folds into a dimeric helical assembly. VP30 assembles into hexamers in solution by an N-terminal oligomerisation domain which activates the transcription function of the protein. The oligomerisation is mediated by hydrophobic amino acids at 94-112 [1]. 20.00 20.00 22.50 22.30 19.30 17.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.62 0.71 -4.32 3 31 2009-01-15 18:05:59 2008-09-01 09:42:50 3 1 20 4 1 29 0 130.00 60 46.56 CHANGED LTLclLlcIAEaWAsp-IsclDDoKLRALLTLoAVLlRKFSKSQLupLCEoHLRHENLGQDQA-SVLEVYQRLHSDKGGsFEAALWQQWDRQSLoMFISAFLaVALQIPCESSoVVlSGLcpLhPPQsNus ...LTLcsLs+IhpYhpRp-IsclD-opL...RAhLoLosshlRKh..s+S.lsshsEhHlpHENLsQDQssslhpsYptlH.DKGGpFEAALWQtWD+pSlohFlpAhLaVhpsIPCESShsV.uuhcphl.Ppspu....... 0 1 1 1 +11340 PF11508 DUF3218 Protein of unknown function (DUF3218) Pollington J anon pdb_2igs Family This family of proteins with unknown function appears to be restricted to Pseudomonas. 25.00 25.00 30.30 134.00 21.50 17.40 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.31 0.70 -4.83 2 9 2009-01-15 18:05:59 2008-09-01 09:51:37 3 1 6 8 1 8 0 187.90 63 96.74 CHANGED pINIYQNPGQSluslYKGhApQCsPG.sFPEsQhlEAWDIPLhLHPEFlPsGDlSKhDppYuTlLAtE.ApslhlthpMsp-Kt+sCs.ElhsLloShupNLspIKuRaGusYLshhKtSPNhYPTsVGhph.uuGu.sQ-SGl.VSYGssLtpLT.uphQAMsLPtslKtLls.GlGl+LstPpa.ssaNsItoGlRYTTuVslhLAYaAsl ..pINIYQNPGQSluslYKGhApQCsPG.sFPEAQhlEAWDIPLhLHPEFlPsGDlSKhDppYuTlLAtE.AphlhlthpMsp-Kt+sCs.ElhsLloShupNLspIKuRaGuuYLshhKtSPNhYPTsVGhph.uuGu.sQ-SGl.VSYGsNLtpLT.uphQAMsLPtslKtLls.GlGl+LstPpa.ssaNsItoGlRYTTuVslhLAYaAsl....... 0 1 1 1 +11342 PF11510 FA_FANCE Fanconi Anaemia group E protein FANCE Pollington J anon pdb_2ilr Family Fanconi Anaemia (FA) is a cancer predisposition disorder. In response to DNA damage, the FA core complex monoubiquitinates the downatream FANCD2 protein. The protein FANCE has an important role in DNA repair as it is the FANCD2-binding protein in the FA core complex so it represents the link between the FA core complex and FANCD2 [1]. The sequence shown is the C terminal domain of the protein which consists predominantly of helices and does not contain any beta-strand. The fold of the polypeptide is a continuous right-handed solenoidal pattern from the N terminal to the C terminal end [1]. 22.20 22.20 24.00 23.50 20.40 20.00 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.86 0.70 -5.25 2 95 2009-09-11 15:02:58 2008-09-01 11:06:15 3 2 65 1 56 91 0 198.80 32 49.62 CHANGED u....LupShpLPcthpstLPhlppLLcs.pE....h-DussstLplLHECsPsph-lLCu.LpLsplS-.sL.phCohLLtLuPDLShSsAosLhppLhLt+ILSLspsASRhLsTAlTShCu+YshPsCpALl-PlLQtsthGsAQs-LLCpLV.h-sLEPctplLhhtphLthsWcEtsh.VlpuLL-pplEho.EcFslhhE+LCppu.thopSMtaAKhhLoVhTKYQuNls.sp+hsLu.sLp.NpTFL+KSLpAALK+lu ............................................................................................................................ht.lst.Lph.phs-.slhplCs......lhs..L.o...sshSh....ss.....u.ss.lh.psLhLs+lluL..sp.sASRh..LhsAlsshC.t..+ashssspullhP.l.L.p....ts.t.......h.Gss.Qs-LLspll...t-..sLc...............sch.....p...h..hh...l....t...p..h.....L................p........h..s...........W..pEth.h.VlpslLppp.....hs....p.ht.hhp.l.....t.....tt..u.t....hspShtauphhhshhsphttplt..p.....l..hlt.stThhp+sh.shl....t.......................... 0 15 22 37 +11343 PF11511 RhodobacterPufX Intrinsic membrane protein PufX Pollington J anon pdb_2ita Family PufX organises RC-LH1, the photosynthesis reaction centre-light harvesting complex 1 core complex of Rhodobacter sphaeroides [1]. It also facilitates the exchange of quinol for quinone between the reaction centre and cytochrome bc(1) complexes. In organic solvent, PufX contains two hydrophobic helices which are flanked by unstructured regions and connected by a helical bend [1]. 25.00 25.00 30.80 30.70 23.10 22.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.92 0.72 -4.58 5 18 2009-01-15 18:05:59 2008-09-01 12:54:39 3 1 18 3 3 14 33 66.20 38 84.78 CHANGED MscaNDhLss..NsKoRLRADlshLMLKGAGYAAVFVlulWFlIuuhtlIG+hLPEQSRpTPDPsppuA ..........ho..sDaLss..ssKspLRlWlshQMhKGAGaAussFhuslhllsshtslGphLP.ppppAPuPs.h..... 0 1 2 2 +11344 PF11512 Atu4866 Agrobacterium tumefaciens protein Atu4866 Pollington J anon pdb_2jmb Family Atu4866 is a protein with unknown function from Agrobacterium tumefaciens however the structure has been determined. Atu4866 adopts a streptavidin-like fold and has a beta-barrel/sandwich which is formed by eight antiparallel beta-strands [1]. Atu4866 has a potential ligand-binding site where is has a stretch of conserved residues on the surface [1]. 25.00 25.00 25.70 25.30 24.50 23.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.57 0.72 -4.24 14 114 2009-01-15 18:05:59 2008-09-01 13:19:10 3 2 95 1 47 122 1 77.40 61 65.27 CHANGED tpp+sYlGMWVTuDGaIRHELLPsGRYDEARGsRcSAYpGRYclTGsHI-YhDDTGFTADG-Fh.ssVLaHuGMVLaRc ..................tHPYVGMWVTsDGpIRpELLPsGRYDEARGsRcSAYpGRYpVsGsHI-YhDDTGFTADG-Fh.-slLaHuGMVhYRp..... 0 11 26 33 +11345 PF11513 TA0956 Thermoplasma acidophilum protein TA0956 Pollington J anon pdb_2jmk Family TA0956 is a protein from Thermoplasma acidophilum which currently has no known function however the structure has been determined. The protein has a two-layered alpha/beta-sandwich topology and is a putative Elongation factor 1-alpha binding motif [1]. 25.00 25.00 181.50 181.30 22.20 18.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.27 0.72 -4.01 3 3 2009-01-15 18:05:59 2008-09-01 13:27:54 3 1 3 2 3 7 4 110.30 51 97.07 CHANGED MsLCAMYNISMcspHPTTICVVMDKFLDSFuELlDVl--sDpDE.LMDFISRYARTDEIMPEDKTVGFVVINuDKKlMSVSFSDIDENM...KcsI+EIlKKYKDpGYKVEsDh MsLCAMYNISMcspHPTTICVVMDKFLDSFuELlDVl--sDpDE.LMDFISRYARTDEIMPEDKTVGFVVINuDKKlMSVSFSDIDENM...KcsI+EIlKKYKDpGYKVEsDh 0 1 2 2 +11346 PF11514 DUF3219 Protein of unknown function (DUF3219) Pollington J anon pdb_2jn9 Family This family of proteins with unknown function appears to be restricted to Bacillaceae. Some members in this family of proteins are annotated as YkvR however this cannot be confirmed. 25.00 25.00 82.10 82.00 16.80 16.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.37 0.72 -4.10 2 29 2009-01-15 18:05:59 2008-09-01 13:38:47 3 1 29 1 7 31 0 92.60 55 97.71 CHANGED KplhLNsspl-hhsYpEEo...cscptI..uFshpVopppYHDIssLLY-hhFsVpVPEcsLtFRG.hspY.TSLTNLYE.sAVuEFalElpEhDtptD ..................lhLNsVplchhsYpEEs.......cstRpI..uFsLcVTSETYHDIAVLLYEKTFcVcVPE+sLsFRGpITNYSTSlTNLYccspVu-FalElTEl......... 1 1 5 5 +11347 PF11515 Cul7 Mouse development and cellular proliferation protein Cullin-7 Pollington J anon pdb_2jng Family The Cullin Ring Ligase family member, Cul7, is required for normal mouse development and cellular proliferation. Cul7 has a CPH domain which is a p53 interaction domain. The CPH domain interaction surface of P53 is present in the tetramerisation domain [1]. 25.00 25.00 25.10 28.80 20.10 18.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.81 0.72 -4.08 8 206 2009-01-15 18:05:59 2008-09-01 13:50:29 3 43 71 2 103 188 0 76.00 52 2.97 CHANGED RcDFpSsDsYAhYVR-slpsGMhVRCCc-YEElppGDhGpVh+l-s-GlpDLNVQVsWps+GpTYWV+ahHVEllGsss .........Ru-FtSsssYAhYVR-slpsGMpVRhhc-YE.Els.tGD.Gc.h+.sssGl.s..VQV.WpspGpTYWV+ahhlEllGh.s............... 0 19 23 51 +11348 PF11516 DUF3220 Protein of unknown function (DUF3120) Pollington J anon pdb_2jpf Family This family of proteins with unknown function appears to be restricted to Bordetella. 25.00 25.00 240.50 240.50 20.50 19.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.34 0.72 -4.06 2 4 2009-01-15 18:05:59 2008-09-01 14:06:47 3 1 4 1 1 4 0 106.00 98 16.14 CHANGED KQQLpEpAPSHANLDVKWLDGLRAGSMALQGDVKVWMQNLEDLHTRRPDEFsARLQQSTDALYSHLEAQWAKQHGTPPTASDVsGMPQWQEYTAMLRERFAGLDTI KQQLHEQAPSHANLDVKWLDGLRAGSMALQGDVKVWMQNLEDLHTRRPDEFAARLQQSTDALYSHLEAQWAKQHGTPPTASDVAGMPQWQEYTAMLRERFAGLDTI 0 1 1 1 +11349 PF11517 Nab2 Nuclear abundant poly(A) RNA-bind protein 2 (Nab2) Pollington J anon pdb_2jps Family Nab2 is a yeast heterogeneous nuclear ribonucleoprotein that modulates poly(A) tail length and mRNA. This is the N terminal domain of the protein which mediates interactions with the C-terminal globular domain, Myosin-like protein 1 and the mRNA export factor, Gfd1 [1].The N-terminal domain of Nab2 shows a structure of a helical fold. The N terminal domain of Nab2 is thought to mediate protein protein interactions that facilitate the nuclear export of mRNA [1]. An essential hydrophobic Phe73 patch on the N terminal domain is thought to be a important component of the interface between Nab2 and Mlp1 [1]. 25.00 25.00 32.40 30.40 23.10 22.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.44 0.72 -3.96 4 26 2009-01-15 18:05:59 2008-09-01 15:41:18 3 4 26 4 17 26 0 101.20 57 19.60 CHANGED MS..p-pIspsLKslVAEKLpsl.NFsEDVpYVAEYIVLLhSNGGoh-SlVQELsoLFDoVSppsLssVVQTuFhAL-hLppG-slpslhuKlp...hhuupssupusos .......MS..pEQhoENLKVIVAEKLssl.NFNEDlcYVAEYIVLLIsNGGTlESVVpELuoLF.DoVSp-sLssVVQTAFFALEALQQGEosEsIVuKIR...hM.supuhupt...s.............................................. 0 2 8 14 +11350 PF11518 DUF3221 Protein of unknown function (DUF3221) Pollington J anon pdb_2jqo Family This family of proteins with unknown function appears to be restricted to Bacillus. Some members in this family of proteins are annotated as YobA however this cannot be confirmed. YobA is a protein with unknown function. 20.80 20.80 21.00 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.45 0.72 -4.39 9 132 2009-01-15 18:05:59 2008-09-01 15:47:07 3 2 109 1 15 95 1 105.10 40 84.54 CHANGED lhL.lhhhlhotsssptsE...spphohEGYlIl+.NspshhlsDcshpsK.-hpphhEpplhpcasusIVL....thpshsshcpLpsGpKIKVWasplhES.Pu+hhlpKaEll ..............................hh.lhLhlhu..s.sssppsE..........sccsshEGYlIl+.N-.s...s..a...hlsDcshpsK.ELppY.hEp...php..p-aPucIlL.....hc-c..-.u..acpLKsGDKIKVW..S.p...h.hESYPu+hhVpKaEl.......... 0 2 9 11 +11351 PF11519 DUF3222 Protein of unknown function (DUF3222) Pollington J anon pdb_2js3 Family This family of proteins with unknown function appears to be restricted to Rhodopseudomonas. 25.00 25.00 141.40 141.30 19.50 17.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.56 0.72 -4.02 3 7 2009-01-15 18:05:59 2008-09-01 15:50:14 3 1 7 2 6 7 0 74.70 74 100.00 CHANGED MT-FAAEDVRKIAAALVKTAIETVSEEDGGARNQCKLCNASVPWLQTGDEIpHpPDCAVAIAQRIL.AKs+LHSV MTDhAAEDVRKIAsALlKTAIEIVSEEDGGAHNQCKLCsASVPWLQTGDEIpHAPDCPVVIApplLuu+P+LHuV 0 0 1 2 +11352 PF11520 Cren7 Chromatin protein Cren7 Pollinton J anon pdb_2jtm Family Cren7 is a chromatin protein found in Crenarchaeota and has a higher affinity for double-stranded DNA than for single-stranded DNA. The protein contains negative DNA supercoils and is associated with genomic DNA in vivo.Cren7 interacts with duplex DNA through a beta-sheet and a long flexible loop. The function has not been completely determined but it is thought that the protein may have a role similar to that of archaeal proteins in Euryarchaea [1]. 39.00 39.00 60.50 60.20 38.80 37.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -8.87 0.72 -4.33 7 51 2009-01-15 18:05:59 2008-09-01 16:18:15 3 1 43 5 34 42 0 58.70 53 95.26 CHANGED M....sscc..shcs+ss.sGKEhcLhPhKsW.LsP+GRKGVhlGLF+sPp.GKYFRt+Vs-t.P ............tcc..sl+l+ss.sGKEh-LhPpKVWsLsPKGRKGVKIGLFKsP-oGKaFR++lP-sYP.. 0 10 19 24 +11353 PF11521 TFIIE-A_C-term C-terminal general transcription factor TFIIE alpha Pollington J anon pdb_2jtx Family TFIIE is compiled of two subunits, alpha and beta. This family of proteins are the C terminal domain of the alpha subunit of the protein which is the largest subunit and contains several functional domains which are important for basal transcription and cell growth. The C terminal end of the protein binds directly to the amino-terminal PH domain of p62/Tfb1 (of IIH) which is involved in the recruitment of the general transcription factor IIH to the transcription preinitiation complex. P53 competes for the same binding site as TFIIE alpha which shows their structural similarity. Like p53, TFIIE alpha 336-439 can activate transcription in vivo [1]. 21.10 21.10 21.20 21.40 20.90 20.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.99 0.72 -4.04 8 111 2009-01-15 18:05:59 2008-09-01 16:44:00 3 5 89 3 80 101 0 84.30 38 19.99 CHANGED SsSDTSESDcD.ssscsPstt..........spch--p--DD-.E-ssD-PsVhVuGRPasYcEVSQpPcLVuQMTPQEKEsYIphGQchFpclY- ......................................spS-oS-s-c-.s....sc.ss.ss...............phc.tc-p-----hE-s...sD..-P..sVhVAG+Pashs-V.o...P-LV.A.QMTPpEKEsYIp.hsQchFpcha-...... 0 28 35 58 +11354 PF11522 Pik1 Yeast phosphatidylinositol-4-OH kinase Pik1 Pollington J anon pdb_2ju0 Family Pik1 is a regulator of membrane traffic and participates in the mating-pheromone signal-transduction cascade. The protein is localised to the nucleus and cytoplasm in the Golgi. Pik1 is thought to have an actin-independent role in membrane transport [1]. 21.40 21.40 21.40 24.60 21.10 21.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.62 0.72 -4.22 20 95 2009-09-10 18:11:33 2008-09-01 16:58:07 3 4 83 1 64 94 0 50.50 41 5.27 CHANGED SauFQlsRRllN+lQpllFsss...................pps+ps+h+ENltPALVLsuhVhoSlA ......ShuFQssRRlhN+lQpIlFsss....................sts+pp+h+ENlhPuhVLuShlhuSlA................. 0 16 39 60 +11355 PF11523 DUF3223 Protein of unknown function (DUF3223) Pollington J anon pdb_2k0m Family This family of proteins has no known function. 21.00 21.00 22.60 21.90 20.10 20.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.61 0.72 -3.69 30 196 2009-01-15 18:05:59 2008-09-02 09:24:51 3 16 68 1 129 193 9 74.50 34 13.45 CHANGED h+thL+..+YssGcpls..sp-tpsllptlLpaHP.....cs-pKlGsG.l.cthpVtpasthp.t....S+CFalVRs..DGop.....-DFSYpKCl ...........h.ptlL+..catssppls..tp-pphllcplLpaHP................csccKIGsG....l.ctlpVshHPtap..t.....SRCFallRp..DGot......pDFSYpKCl.............. 0 38 90 114 +11356 PF11524 SeleniumBinding Selenium binding protein Pollington J anon pdb_2jz7 Family Selenium is an important nutrient that needs to be regulated since lack of the nutrient leads to cell abnormalities and high concentrations are toxic.\ SeBP regulates the level of free selenium in the cell by sequestering the nutrient during transport. SeBP acts as a pentamer and delivers the selenium to the selenophosphate synthetase enzyme [1]. Each subunit is composed of an alpha helix on top of a four stranded twisted ss sheet, stabilised by hydrogen bonds [1]. 27.90 27.90 28.00 58.30 27.10 27.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.74 0.72 -3.85 3 13 2012-10-01 20:55:33 2008-09-02 09:31:10 3 1 13 5 6 11 0 83.00 52 98.99 CHANGED MlFEDKFIITTADEIPGLpLYYlGIVSslSD..NVDcIVEsL+EKVKAKGGMGLIAFRITs...ADG.KaLGYGTAVKADEGQFTMA ...MlFEscFIITTAc-IPGlpLYhhGIVSssSD..NVDpIlcsLcEpVpAKsGhGLluFRITs....uDG..KhlGYGTAVKADEuQFsMs. 0 0 1 4 +11357 PF11525 CopK Copper resistance protein K Pollington J anon pdb_2k0q Family CopK is a periplasmic dimeric protein which is strongly up-regulated in the presence of copper, leading to a high periplasmic accumulation [1]. CopK has two different binding sites for Cu(I), each with a different affinity for the metal. Binding of the first Cu(I) ion induces a conformational change of CopK which involves dissociation of the dimeric apo-protein. Binding of a second Cu(I) further increases the plasticity of the protein. CopK has features that are common with functionally related proteins such as a structure consisting of an all-beta fold and a methionine-rich Cu(I) binding site [1]. 19.90 19.90 19.90 69.40 19.80 19.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.62 0.72 -4.33 5 22 2009-09-11 07:29:11 2008-09-02 09:51:31 3 1 19 9 13 18 18 71.80 56 78.22 CHANGED VDcuslcKSl-LKDGSTVHlFKDGKMuMEDKhG+uhpMKcGpVMET+DGQKIhM+GDEVhRLDshL+KcH.p+G ..A..spuscchl.LtDGuTlYIFKDGKMA.Es+aGRAVhhphGsshpTKDGppIshpusEVARLsSLLp+cH....Gs.. 0 1 10 13 +11358 PF11526 CFIA_Pcf11 Subunit of cleavage factor IA Pcf11 Pollington J anon pdb_2npi Family Pcf11 is a subunit of an essential polyadenylation factor in Saccharomyces cerevisiae, CFIA. Pcf11 binds to Clp1, another subunit of CFIA whose interaction is responsible for maintaining a tight coupling between the Clp1 nucleotide binding subunit and the other components of the polyadenylation machinery [1]. 21.10 21.10 21.30 21.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.13 0.72 -3.84 10 54 2009-01-15 18:05:59 2008-09-02 10:47:12 3 4 53 2 37 53 1 83.30 29 14.42 CHANGED s...............ps.spNIQSRNWYLDD.-WVpFKDD-llphoossss.t....................pphppshss.p.ts..hs.pph+spYVVVPsssosMs.+ ...tsp....ssstKNlQSRsWYLcDpcWlp...F..+-..--Isuss.pssst.p.ts...................................pptspstss..........................hptpYVlVPpstpsMs.p......................... 0 6 20 34 +11359 PF11527 ARL2_Bind_BART The ARF-like 2 binding protein BART Pollington J anon pdb_2k0s Family BART binds specifically to ARL2.GTP with a high affinity however it does not bind to ARL2.GDP. It is thought that this specific interaction is due to BART being the first identified ARL2-specific effector. The function is not completely characterised [1]. BART is predominantly cytosolic but can also be found to be associated with mitochondria. BART is also involved in binding to the adenine nucleotide transporter ANT1 [2]. 20.90 20.90 20.90 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.62 0.71 -4.32 32 294 2009-01-15 18:05:59 2008-09-02 10:47:57 3 6 116 4 198 272 4 113.80 29 40.86 CHANGED hs..cpchlhpplhpaltSs..WpsslpsFh-ppC..hhF--p...-EsplphpplapcYppll-phlpphlp........phuls.cpatpsl........................p.tpppphtpslhp.lhshpDFphF+chMlppNh-l-hpu ....................................tt....c.chlhttl.shlhsst.aph.hpsFh-pps..............hF-.cp...............-E....sKLpYopIapc.YppLlEchl-phLp........t.Ghs..ppFtpsh.....................................ttcschst...s.lhp..llshsDFhhFKpMMlphphEhph.......................................... 0 84 104 150 +11360 PF11528 DUF3224 Protein of unknown function (DUF3224) Pollington J anon pdb_2ooj Family This bacterial family of proteins has no known function. 25.00 25.00 27.50 29.70 21.10 18.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.47 0.71 -4.65 11 250 2009-01-15 18:05:59 2008-09-02 12:59:52 3 1 238 4 66 200 15 131.80 34 97.93 CHANGED Mphp..hhGsFplohWsE..oshs-ssss..hspsplspsa.pGDlcGpSplcaLhsY......pu.tuuAsaVGhEphpGslsG+pGoFVLQHpGphspGssssshs.lV.sSGTupLtsLsGshshshss.ssp.u.cFphshtsu ...............................hp.......puoFoVspWsE..............ps.l..ss...t...s.......c..s.h.s...........l...........spAslshph.sGsLpG.p.u.ps.EYLhsY.......su..hpusAphl..Ga.+....FcG.shtG.+pGoFs.hpcp..Gsa.s.pG.t.h...c.s..s..hp..ll..puTG-LtGL...sGshshphsp..spc..hhpath.....h............ 0 28 48 56 +11361 PF11529 AvrL567-A Melampsora lini avirulence protein AvrL567-A Pollington J anon pdb_2opc Family AvrL567-A is a protein from the fungal pathogen flax which induces plant disease resistance in flax plants [1]. The protein has a novel fold [1]. 19.30 19.30 20.30 69.80 18.00 19.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.57 0.71 -4.16 3 13 2009-01-15 18:05:59 2008-09-02 13:09:21 3 1 2 2 1 14 0 126.90 81 84.57 CHANGED MEDVPAELTGVSEGYTRFYRSPTASVTLSGLVcVKWDNEQMTMPLFKWIGGEQAEELHFCVHIAHSSGRRLNpARTLGTVNSNMDQHWVEAYRSSGVTRCTIQDCHLFANDIPNFPDYIKIKLVPKT .........MEcVPAELTGVSEGYTRFYRSPTASVsLSGLVcVKWDNEQMTMPLFKWIGGEQAEELHFsVHIAHSSG.+LNpARTLGTVNSNMDQHWspAYRsSGsTRpTIQDpHLFANDI...PNFPDYIKIKLVPKT..... 1 1 1 1 +11362 PF11530 Pilin_PilX Minor type IV pilin, PilX Pollington J anon pdb_2opd Family PilX is a protein from Neissaria meningitidis which is crucial for the formation of bacterial aggregates and adhesion to human cells [1]. The structure of PilX is similar to all pilins as it has the common alpha/beta roll fold. PilX subunits have surface-exposed motifs which are thought to stabilise bacterial aggregates against pilus retraction. It also illustrates how a minor pilus component can modulate the virulence properties of pili which have a simple composition and structure [1]. 25.00 25.00 35.30 34.70 23.90 23.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.47 2 59 2009-01-15 18:05:59 2008-09-02 13:32:49 3 3 58 6 2 56 0 126.50 71 79.05 CHANGED SYIEKGYQSQLYTEMVGINNl.KQFILKNP.DDNpTlKSKLcIFVSGYKMNPKIAcKYsVSV+FVstEKPRAYpLVGVPpsGTGYTLSVWMNSVGDGYKCRDAsSApAa.-TLSuDsGCEAFSNRKK .SYIEKGYQSQLYTEMVGINNlhKQFILKNPh.DD.NQsIKoKLEhFVSGYKMN..PKIAcKYsVSV+...F..V..s..tE........KsRAYpLVGVPKsGTGYTLSVWMNSVGDGYKCRDAASAcAapETLSuDsGCEAFSNRKK.......... 0 1 1 1 +11363 PF11531 CARM1 Coactivator-associated arginine methyltransferase 1 N terminal Pollington J anon pdb_2oqb Family CARM1 is an arginine methyltransferase which methylates a variety of different proteins and plays a role in gene expression. This is the N terminal domain of the protein which has a PH domain, normally present to regulate protein-protein interactions.A molecular switch is also present on the N terminal domain [1]. 20.40 20.40 20.50 21.20 19.90 20.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.60 0.71 -4.35 3 78 2009-01-15 18:05:59 2008-09-02 14:17:24 3 4 42 10 39 69 0 91.60 59 17.53 CHANGED M...AAVSVFPGVRLLSIGDANGEIQRHAEQQsLRLEVRAG.DuAsIALaNsE-VCVFKCTVoRETECSRVGKQSFIITLGCNSVLlQFATPADFCSFYNILKoCRGpcuERSVFSE ................................................................................................h.stE-V....CVFKCSVSRETECSRVGKQSFIITL.GCNSVLlQFATPsDFsSFY.N.ILKsCRGHshE+SVFSE.................... 0 8 12 22 +11364 PF11532 HnRNP_M Heterogeneous nuclear ribonucleoprotein M Pollington J anon pdb_2ot8 Family HnRNP M is a splicing regulatory factor that binds to the auxiliary RNA cis-element ISE/ISS-2 which promotes splicing of exon IIIb and silencing of exon IIIC in the fibroblast growth factor receptor 2 (FGFR2) [1]. By binding to ISE/ISS-3, HnRNP M plays a role in the regulation of alternative splicing in FGFR2 as it induces exon skipping and promotes exon inclusion [1]. 20.70 20.70 22.70 23.10 20.30 18.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.44 0.72 -3.85 3 73 2009-01-15 18:05:59 2008-09-02 14:30:45 3 4 35 2 30 54 0 29.50 80 4.60 CHANGED -psoQNEKRKEKshKR.GGNRFEPYSNPsKR ........ERPsQNE..KRKEKNIKR..GGNRFEPYuNPTKR. 0 1 3 11 +11365 PF11533 DUF3225 Protein of unknown function (DUF3225) Pollington J anon pdb_2owp Family This bacterial family of proteins has no known function. 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.69 0.71 -4.45 24 251 2012-10-03 02:27:24 2008-09-02 14:42:48 3 5 240 6 86 307 27 123.10 51 80.95 CHANGED hpINtPcVlAEVsAAFtcYEpALssNDVssLDtLFWssPcTlRYGsuENLYGh-tI+AFRtsRsusuLsRplh+TslTTFGcDhAssssEFpR..cGus.RlGRQpQTWlRh.s-.GW+lVAAHVSlh.ps ..........................................................pIshPtllAEVoAAFhcYEpAL....luNDlssLDtLFWpss+.TV...R.a.G..........s.u..E.N.LYG.h.-sI.+.A.F...Rt.u.R...sus.......G...h...s..R...p.L.......t........c....T..sITT......FG..c.....D.h.....A....s...s..o..T..E..FpR........cG..os....+l.G..R..Q........Q.....T..WlR....h......ss....G........W+lVAAHVSLh..s...................................................... 0 14 41 60 +11366 PF11534 HTHP Hexameric tyrosine-coordinated heme protein (HTHP) Pollington J anon pdb_2oyy Family HTHP is from the marine bacterium Silicibacter pomeroyi and has peroxidase and catalase activity. HTHP consists of six monomers which each binds a solvent accessible heme group and is stabilised by the interaction of three neighbouring monomers [1]. The heme iron is penta-coordinated with a tyrosine residue as proximal ligand [1]. 25.00 25.00 25.70 32.10 24.00 23.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.56 0.72 -3.91 9 36 2009-01-15 18:05:59 2008-09-02 15:55:30 3 2 34 8 21 45 5 69.40 51 80.69 CHANGED hs-sWLPoLhTsTPpEGapLAVKhARluVKhTQPDs-lRppLRssYucDAsuLIAsSpVlAsaFATlAAANsYW+ ..........hsoLhTsTPpEGhpLAlKluRhulKhTQPDs-lRcpLRs.YupDustLIAsSQVVAhcFtTlAAANNYW+...... 0 6 12 18 +11367 PF11535 Calci_bind_CcbP Calcium binding Pollington J anon pdb_2p0p Family CcbP is a Ca(2+) binding protein which, in Anabaena, is thought to bind Ca(2+) by protein surface charge. When bound to Ca(2+), the protein becomes more compact and the level of free calcium decreases. The free Ca(2+) concentration which is regulated by CcbP is critical for the differentiation process [1]. Calcium signalling is widespread in bacterial species, and prokaryotic cells like eukaryotes are equipped with all the elements to maintain Ca2+ homeostasis [2]. 21.50 21.50 21.90 21.50 20.10 20.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.32 0.72 -3.79 17 53 2009-12-03 17:50:01 2008-09-02 16:15:08 3 2 45 3 25 55 5 97.80 26 66.48 CHANGED cRIthEIlVDAY........stcEpthGWYpaL--sLphPFpAhhht...............tpVpVluhu.p-ps.tphhVplph....spcphslsLppLtshcsDspopp........AltDW+YWls ..................................................th..chhVDsY........stpEphhuWhtaLp-plphPFpuhhht.......................s....tcpVpVluhssppsp.tthhVplch....sccphslsLs-Lcsh-ssspspp.............slsDapaWh.................. 0 7 19 22 +11368 PF11536 DUF3226 Protein of unknown function (DUF3226) Pollington J anon pdb_2p62 Family This archaeal family of proteins has no known function. 20.80 20.80 20.80 21.00 20.00 20.70 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.51 0.70 -4.92 4 17 2009-01-15 18:05:59 2008-09-02 16:23:43 3 2 15 2 12 23 0 205.50 28 76.19 CHANGED h+lLllEG+TDsuFFhsllKKLYGF+Eu+..scsl.hhE...Kht-hs+slsLcKDshtLlVhHupGKsplh+sLpshLcAlch.hh.slchlGlARDVDp-c-lhsWspShl+puuFEs+hsssalllp..slKIhPhGIGpl...sFst..l-hKK.ELEhlhshLAKt-ulLEKh+sSlpuLppDhGRKLpPKDlMHlLuIApsasGDshSGLYcpFItc.I+cN+chV.chLs.hslL.hLshhh ....plLhlEG+TDspFFpslhKKLatF..+Eucthstpl...hh-.............phhEhsph.hL..c+..-sshlslhsupGpsslh+sLtshlcu.hcl.hthplptlGlshDlDcsc-s.shtp.hhp.ttap.t.tshhhhl...th.lhshhhG.h...shp.t..l..-.+K.plE.hhhhLhctpuhLp+hc.ulp.Lp.shtcKLpPK-lhalh..A.tahGs.hpGhYc.al.h.hh.php.l.p.hs..t.........th........................................................................................ 0 2 2 8 +11369 PF11537 DUF3227 Protein of unknown function (DUF3227) Pollington J anon pdb_2p9x Family This archaeal family of proteins has no known function. 21.20 21.20 23.90 23.80 20.80 20.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.19 0.72 -3.91 2 9 2009-09-10 21:06:52 2008-09-02 17:19:47 3 1 9 4 8 11 0 96.40 31 95.49 CHANGED hp.s+sllstThhphLpphuPth.ssLEAaLpAphNtslElAhEDPtKFhcAVpcLFGEauAphh.hsll.cLp.h.sKp....shEpllt.lKKhh....G .............p.sc-llspslpthl+cluPsLcslLEh+L+uphsK.Gh.ElAhEDPpcF+-AVS+LFGEa.SA+LlthhllscL+.hlt.p.phpsLEpllp.l+.............................. 1 2 4 6 +11370 PF11539 DUF3228 Protein of unknown function (DUF3228) Pollington J anon pdb_2pd0 Family This family of proteins has no known function. 25.00 25.00 207.50 207.30 21.00 19.10 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.37 0.71 -5.21 11 64 2009-01-15 18:05:59 2008-09-02 17:22:04 3 2 62 6 40 71 54 192.10 49 89.79 CHANGED hplsL-sFsh+Qacpp.hpsshIs.hspEpFlp+lNchhc.sshcllDGYAPFCKHlFlcNFT-sp......stslcITscNc+LL....+ouY.ARs-pELPVLsRWFshpsVp.ppl..ApYLDlILYS+EQlpKEssthtp...........ssDauIlSIKsQspsaElPMtPITMhRNAL.l-EGGSGVsLDREcYhcSVcaWpcaAslhs .........plsLssFAhpph.sps....hpsstIp.hs.-pFlp+lN-t.....s...shcll-GYAPFCKhhhlcN.aT.us+......shslsITscNcHLL....RSGYpARsspELPVLsRWF..cs..Vc.ssl...............ApYLclILYSREQlsKEsssh...............-ucWGIlulphpsEs.ElPMsPITMhRNALGlEEGGSGVPLDR-tYpcSVtaWcppAshhs. 0 21 28 36 +11371 PF11538 Snurportin1 Snurportin1 Pollington J anon pdb_2p8q Family Snurportin1 is a novel nuclear import receptor which contains an N-terminal importin beta binding domain which is essential for its function of a snRNP-specific nuclear import receptor [1]. Snurportin1 interacts with m3G-cap where it enhances the m3G-cap dependent nuclear import of U snRNPs in Xenopus laevis oocytes and digitonin-permeabilized HeLa cells [1]. 20.80 20.80 20.80 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -8.09 0.72 -4.18 6 77 2009-01-15 18:05:59 2008-09-02 17:22:04 3 3 55 15 45 67 0 40.00 63 12.48 CHANGED HPRLSpYKsK.tsuh-QucRRR+hLElQKp+R..LDahs+uR+ ....HPRLS.Q....YKuK..aSuL.EQSERRR+LLELQKsKR.....LDYVNHARR......... 1 12 17 28 +11372 PF11540 Dynein_IC2 Cytoplasmic dynein 1 intermediate chain 2 Pollington J anon pdb_2pg1 Family Intermediate chain IC 2 forms part of the complex cytoplasmic dynein 1 along with a heavy chain (HC), two light intermediate chains (LICs) and three light chains (LCs). The complex is responsible for hydrolysing ATP to generate force toward the minus end of microtubules [1]. IC binds to the HC via the N terminal binding domain on the HC and ICs contain binding sites for the LCs. The ICs are responsible for binding to kinetochores and the Golgi apparatus through an interaction with the p150Glued subunit of dynactin which is another complex [1]. 25.00 25.00 26.60 26.90 21.00 20.10 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.25 0.72 -4.58 10 218 2009-01-15 18:05:59 2008-09-03 09:13:31 3 4 80 7 86 257 0 32.80 60 5.67 CHANGED u++sh+LuhScVsplDFsPKEsVoYSKETQTss .....tRtshKLGhuKlTQVDFPPRElVoYoKETQTPs.. 0 24 30 53 +11374 PF11542 Mdv1 Mitochondrial division protein 1 Pollington J anon pdb_2pqn Family Mdv1 is a component of the mitochondrial fission machinery in Saccharomyces cerevisiae. The protein is also involved in peroxisome proliferation [1]. Mdv1 along with Fis1 is also involved in controlling Dnm-1 dependant devision, a GTPase involved in the mediation of mitochondrial division. In this role, Mdv1 is the linker between Fis1 and Dnm1. Mdv1 plays a key role in the regulation of Dnm1 self-assembly [2]. 25.00 25.00 28.00 27.00 23.40 22.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.48 0.72 -4.04 2 12 2009-01-15 18:05:59 2008-09-03 11:17:30 3 2 12 3 4 11 0 49.60 90 7.29 CHANGED DADGKLLTEGGENENLRKNASKKETSLFQGFKSYLPIAELAIENTERLNY ..DADGKLLTEGGEDENLRKNASKKETSLFQGFKSYLPIAELAIENTERLNY... 0 1 1 2 +11375 PF11543 UN_NPL4 Nuclear pore localisation protein NPL4 Pollington J anon pdb_2pjh Family Npl4 is part of the heterodimer UN along with Ufd1 which is involved in the recruitment of p97, an AAA ATPase, for tasks involving the ubiquitin pathway. Npl4 has a ubiquitin-like domain which has within its structure a beta-grasp fold with a helical insert [1]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.37 0.72 -3.70 9 177 2012-10-03 10:59:06 2008-09-03 11:20:48 3 14 147 2 124 200 2 78.60 30 14.25 CHANGED MstphllRVQSs-GhKRlphopppThsphhcKVtcphuFspp.tFulah-RNposEl.uSpspplp.ltl+HGDhL.aLh.s .................hllRlpSs-GhcRl.p.s..s.t..pp.T....huplh....ccltcp.h..s.hsss...u...h.sl.a..hs........c........s...........t......s...........sc.......l.t........u.......s...s.......s.p..o....L....s........LtlpHGDhl.aLh............................... 0 46 72 100 +11376 PF11544 Spc42p Spindle pole body component Spc42p Pollington J anon pdb_2q6q Family Spc42p is a 42-kD component of the S.cerevisiae spindle body that localises to the electron dense central region of the SPB [1].Spc42p is a phosphoprotein which forms a polymeric layer at the periphery of the SPB central plaque. This functions during SPB duplication and also facilitates the attachment of the SPB to the nuclear membrane [1]. 21.10 21.10 21.30 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.69 0.72 -4.05 5 30 2009-09-11 12:37:10 2008-09-03 12:45:58 3 3 26 2 21 27 0 72.90 38 16.96 CHANGED NcLIKQNKELpsKLcEKQ-EI-RLNlLlGSLRAKLIKYTELNKKLpc-tQstQpp.sslscsto-stsDulhhsc+u .......NchlpQNK-LphKLc-KQsEI.cLpplspoLRuKL.KYs-lsKK..LEcpshshphp.sshppphs-..ss........................................ 0 3 9 18 +11377 PF11545 HemeBinding_Shp Cell surface heme-binding protein Shp Pollington J anon pdb_2q7a Family Shp is part of a complex which functions in heme uptake in Streptococcus pyogenes. During which, Shp transfers its heme to HtsA which is a component of an ABC transporter. The heme binding region of Shp contains an immunoglobulin-like beta-sandwich fold and has a unique heme-iron coordination with the axial ligands being two methionine residues from the same Shp molecule [1]. Surrounding the heme pocket, there is a negative surface which may serve as a docking interface for heme transfer [1]. 25.40 25.40 26.10 31.60 23.20 25.30 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.96 0.71 -4.40 3 63 2009-01-15 18:05:59 2008-09-03 13:00:48 3 2 61 2 4 59 2 148.50 42 44.61 CHANGED ADKGplYoClIpRsYRHPVSGQIEDSGGEHuF-IGQGMVEGTVYSsGMLEVTDAG-lYLTFRMSLADYSGNYQFWVQPGGTGuFQAsAYulTpsGTDTNGTTtDIAIuLPoVNoVVRGSMYVEPMGREVVFYLSPSELpEGYSGDMlAohVT ......................tuplYsshlppsYcHPloGpIED...uG...Gptuh....sIGQGMVEusVausuhLEsoDuG+lhLThRhuLADa...sushp...FhlQ.s.sGsG...uFpuVshslTppGoDs...NGTT........tD.ltIplPohNslIRuSMaVEPMGR-VlFYl..ssuph...pt.ossh.s.h........................ 0 1 2 3 +11378 PF11546 CompInhib_SCIN Staphylococcal complement inhibitor SCIN Pollington J anon pdb_2qff Family SCIN is released by Staphylococcus aureus to counteract the host immune defense. The protein binds to and inhibits C3 convertases on the bacterial surface, reducing phagocytosis and blocking downstream effector functions by C3b deposition on its surface [1]. An 18 residue stretch 31-48 is crucial for SCIN activity [1]. 21.30 21.30 22.10 21.80 21.10 21.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.37 0.71 -4.23 4 473 2009-01-15 18:05:59 2008-09-03 14:17:44 3 1 163 22 5 62 1 111.40 56 97.12 CHANGED MKhKKYIlAGTLAlLLuoTulssl-tNcAsASo..ph.hpspapcc+lA-EL+oLLsposVNcLAsGSLNsYYKRsIhhspY+AKuALKopsFspMu-AKhtLEpIYcEIDEhlpp ......................MKIKKYIlAGTLAlLLuoTulssl-KNEAsAST..ph.hpspYQcc+LA..-EL+oLLsp..osVN.cLAsGSLNsYYKRsIhhupY+.AKuALKoKsFcpMo-AKhpLEpIYsEIDEsL+S....... 1 4 4 5 +11379 PF11547 E3_UbLigase_EDD E3 ubiquitin ligase EDD Pollington J anon pdb_2qho Family EDD, the ER ubiquitin ligase from the HECT ligases, contains an N-terminal ubiquitin-associated domain which binds ubiquitin. Ubiquitin is recognised by helices alpha-1 and -3 in in the UBA domain. EDD is involved in DNA damage repair pathways and binds to mono-ubiquitinated proteins [1]. 25.00 25.00 46.70 45.50 23.00 22.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.53 0.72 -4.47 3 94 2009-01-15 18:05:59 2008-09-03 14:33:28 3 7 74 4 61 100 0 52.30 84 2.12 CHANGED suIPAuhVPEELIuQAQVVLQGKSRNVIIRELQRTNLDVNLAVNNLLSRDDED .........slPAoslPEELISQAQVVLQGKSRsVIIRELQRTNLDVNLAVNNLLSRDDED.. 0 21 26 44 +11380 PF11548 Receptor_IA-2 Protein-tyrosine phosphatase receptor IA-2 Pollington J anon pdb_2qt7 Family IA-2 is a protein-tyrosine phosphatase receptor that upon exocytosis, the cytoplasmic domain is cleaved and moves to the nucleus where it enhances transcription of the insulin gene [1]. The mature exodomain of IA-2 participates in adhesion to the extracellular matrix and is self-proteolyzed in vitro by reactive oxygen species which may be a new shedding mechanism [1]. 20.50 20.50 21.20 21.10 20.40 19.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.85 0.72 -4.07 6 114 2009-01-15 18:05:59 2008-09-03 14:53:12 3 6 52 8 47 134 0 83.70 52 9.80 CHANGED -EpaGYIlT-pcPLSs-cGl+LhElLAchl+lsoosFhslpVlGPAVTF+lRsNtQNloTADVsctAsssKspLEppoGL+ILQoGlup+s .............t.EcaGYIlTcpcPL.oh.ttGh+LlEhLAchlchsousFhsISVVGPAlTFRlRpNp.QNlohADVsppAs...s...KscLEspTGLpILQTGVGpR.......................... 0 6 10 24 +11381 PF11549 Sec31 Protein transport protein SEC31 Pollington J anon pdb_2qtv Family Sec31 is involved in COPII coat formation as it forms through the sequential binding of three cytoplasmic proteins: Sar1, Sec23/24 and Sec13/31. Sec13/31 is recruited by the pre-budding complex and polymerisation of Sec13/31 occurs to form an octahedral cage that is the outer shell of the COPII coat [1]. Sec13/31 is a hetero-tetramer which is organised as a linear array of alpha-solenoid and beta-propeller domains to form a rod in which twenty-four copies assemble to form the COPII cub-octahedron [1]. 25.00 25.00 25.30 48.00 23.70 19.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.58 0.72 -4.32 6 24 2009-01-15 18:05:59 2008-09-03 15:43:24 3 6 22 1 15 26 0 50.20 66 3.95 CHANGED sssPssssNVhSGQTPHLN+KANDGWNDLPLhVKEKPoRAKPVoVAPsuhh ......ss.PPh..NshSGQTPHLN+KANDGWNDLPLcVKE...KPSRAKAVSVAPssl... 0 1 7 14 +11382 PF11550 IglC Intracellular growth locus C protein Pollington J anon pdb_2qwu Family IglC protein is involved in the escape of F.tularensis live vaccine strain [1]. It has been shown that the expression of IglC is essential for F.tularensis to induce macrophage apoptosis [2]. IglC adopts a beta-sandwich conformation that has no similarity to any known protein structure [3]. 25.00 25.00 55.40 55.30 21.20 17.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.27 0.70 -5.02 2 33 2009-01-15 18:05:59 2008-09-03 16:03:29 3 1 32 2 2 17 0 203.70 95 100.00 CHANGED MIMoEMITRQQVTSGETIpVRTDPTACIGSHPpsRLFIDSLTIAGEpLDKNIVAIEGG-DVTKADSATAAASVIRLSITPGSINPTISIshGsLIKSsVRsKlpEKlSsILQASATDMKIKLGNSNKKQEYKTD-AWGIMIDLSNLELYPISAcAFSISIEPTELMGVSKDGMpYHIISIDGLTTSQGSLPVCCAASTDKGVAKIGYIAsu ..MSEMITRQQVTSGETIHVRTDPTACIGSHPNCRLFIDSLTIAGEKLDKNIVAIEGGEDVTKADSATAAASVIRLSITPGSINPTISITLGVLIKSNVRTKIEEKVSSILQASATDMKIKLGNSNKKQEYKTDEAWGIMIDLSNLELYPISAKAFSISIEPTELMGVSKDGMpYHIISIDGLTTSQGSLPVCCAASTDKGVAKIGYIAAA. 0 1 1 1 +11383 PF11551 Omp28 Outer membrane protein Omp28 Pollington J anon pdb_2r2c Family Omp28 is a 28-kDa outer membrane protein from Porphyromonas gingivalis. Omp28 is thought to be a surface adhesion/receptor protein. Omp28 is expressed in a wide distribution of P.gingivalis strains [1]. 23.90 23.90 24.50 24.70 23.80 23.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.15 0.71 -4.74 4 100 2009-01-15 18:05:59 2008-09-03 16:20:42 3 2 61 2 16 103 108 175.20 19 45.94 CHANGED Dshhhc.tsshpptatloGhPsshls.RcthhhS.shsh..hsashslhp....pstssslAIsSthsGpplsVTVpVth-pGso.sh+lV.lYlLENGLlhsQss............hGssl.sasHNcVLRtuhouhh.GDhhsshtshtthohuVslhhstuaNuENhslsAhVsDsss.pshss.+stlsspsDa ............................................................................................................sshPpshhs.R......p........th..h.......p...................t...tsh....t...t.pl...hp..............h.ss..s.....s..lsl.p..s.p..hs......s..s....p..hpls..lps.p...h...t.p.s.s...s.....t..h.+Lh..laLlEDulhu.Qt.s...................sssh..h..ps..Y...sHNHVlRs.....sl.s.u.sh....G-c.l.s.h..ts..ss....p..pshp..hshsls..p..s.h.s.....s......ps...hp...lVAalh....s..sss.....s.pshptth.......................................................................... 0 8 15 16 +11385 PF11553 DUF3231 Protein of unknown function (DUF3231) Pollington J anon pdb_2rbd Family This bacterial family of proteins has no known function. 27.70 27.70 28.10 27.90 27.60 27.60 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.75 51 448 2009-01-15 18:05:59 2008-09-03 17:01:37 3 3 139 2 115 456 0 159.20 22 94.95 CHANGED hthhh.....ppsLsusElutlassh.ssshshslhptFhpsscDc-l+phlpcuhcl.upcalctlpplhpc-slslPpua..-s-Vssssss....FoDphhlhalphhspsulssYuhuhuhshRpDlthhatchhtchhphhpcshclhlc+Ghh.pPPhhsspccl ............h........sppcsLpssElhplWshhhssphshshhphFhppspDc-l+p.hlp.p.s.h.c.h.s.ppplcplpplLpppsl..sl.Ps...s...s...cssssh.s.s.ss.....asDthhhthl.shhh.t.tulsshutuhuts.hRpD.l.thhasphhhcphphspchhclhhcK.GWl...PPhh..................... 0 45 112 113 +11386 PF11554 DUF3232 Protein of unknown function (DUF3232) Pollington J anon pdb_2rdc Family This bacterial family of proteins has no known function. 20.90 20.90 21.00 21.80 20.00 20.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.72 0.71 -4.30 3 14 2009-01-15 18:05:59 2008-09-04 09:07:04 3 2 14 2 7 16 0 125.40 28 65.53 CHANGED MhQ.p..usVlSaVupahKuoEpsM-RYK+VlsIsKuDEVAl+LLEGLIDAuTRYFuKVVEMEpRLQTARFRLEGEELR-LTE-LDRSRRhAHEAhISSLHVFNRYlVKEYGE-LpEAGapGGIFP+PEAsRDRIAIAD..WAGELLoGIYE.sR++ .........................................t.h...........................................llpshhpphpcYshhVlch..-spltsh+.chsGp-YR-hsEpLD.+pRoshHssslSsl+ILNRhA-ppt........atss...ps-hsc.t.sIsc..at...............h................. 0 3 4 5 +11387 PF11555 Inhibitor_Mig-6 EGFR receptor inhibitor Mig-6 Pollington J anon pdb_2rf9 Family When the kinase domain of EGFR binds to segment one of Mitogen induced gene 6 (Mig-6), EGFR becomes inactive due to the conformation it adopts which is Src/CDK like. The binding of the two proteins prevents EGFR acting as a cyclin-like activator for other kinase domains [1].The structure of Mig-6(1) consists of alpha helices-G and -H with a polar surface and hydrophobic residues for interactions with EGFR. A critical step for the activation of EGFR is the formation of an asymmetric dimer involving the kinase domains of the protein. Since Mig-6 binds to the kinase domain it blocks this process and EGFR becomes inactive [1]. 25.00 25.00 25.20 25.20 22.60 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.65 0.72 -3.58 6 119 2009-01-15 18:05:59 2008-09-04 09:35:41 3 9 40 6 49 99 0 64.70 64 8.47 CHANGED +PPpVPPR-PL.SpssSRTPSP+u............................Lso..stlMPsTQSFAuDPKYsosKslQtQus-uu ...................pPP+VP..P...RE.PL.S.psSRTPSPhs.....................................LsS..stsMPsTQSFAsDPKYsosps..lQt.uscuu........... 0 2 8 18 +11388 PF11556 EBA-175_VI Erythrocyte binding antigen 175 Pollington J anon pdb_2rji Family EBA-175 is involved in the formation of a tight junction, a necessary step in invasion. This family represents the region VI which is a cysteine rich domain essential for EBA-175 trafficking. The structure is a homodimer that contains a five-alpha-helical core stabilised by four disulphide bridges [1]. 21.50 21.50 22.80 22.80 19.60 18.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.18 0.72 -3.88 11 128 2009-01-15 18:05:59 2008-09-04 10:54:31 3 10 18 2 19 138 0 77.90 58 8.31 CHANGED lccTREcIIphSppNhCsNchS.cYCshhccchss.uTCSc-cpKsLCCSISDYCLKYFshsSpcYYsCh++EFpDPsYcCF .lKsTREpIIhhSph.pKCNNslSlcYCsolcDKISS.sTCS+E+oKNLCCSISDaCLpYF-h.ShEYasCMK+EFcsPpYpCF. 0 5 7 16 +11389 PF11557 DUF3233 Protein of unknown function (DUF3233) Mistry J anon Pfam-B_5068 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.50 22.50 22.70 43.60 21.40 22.40 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.98 0.70 -5.61 6 107 2009-12-15 10:43:27 2008-09-04 10:56:04 3 2 103 0 13 62 1 315.40 57 98.66 CHANGED Mlphsh.....+l......s...huhlLlhstuspAcsh.ch.....lpsclEpsFooulVLoDS-sIThGIsDFDPNchlchcpps....hGos-SIphRpphoVYoLPa...ThsLo--.su.............apHpLpsRLShlpt-s-lplhsssu.oDsh+-psaGuhltYshpYploEsWTlssuhGsHLMaY+NsaoYNsch.pphtslLDGhhVNsSAhAhllEPNlchpYppcpsWGKWcasSoh+YFhGpuaGsAss.upsuNPcGWhlsNulphaYshschtctsQulYsphKRVDlGGDss-sLGTcHYYEhulGWLhssPphhphlDNlGIGlohNhGSAL+GGSIVLaFNc ..........................................................h.Sho...uoA...cS...h.Dh.....IQcaLEQAFSSSVVLSDSDVFTsGFNNFDPN-WFcsDN-N....LGTsESIEpRK+aKSSTLPh...TluLSE-.-A.............haQHQLFFRLSAsVID--LsIus..h.s.u...co-+aRpSVLGGulhYRYQY+LT-HWTLTPAIGTHLlYYRNohTYNNPphKh.L.hSsLDGLLVNThAWAsLlEsNlKlQY-EEKSWG+W+ASSuWHYFsGaGWGcANN.G-VGNPEGWYlANoLTGhYDF..TQlGRSVQSIYuSIKRVDVGusspEPLGToNYYEAShGWLMTPPFEh-hVDNIGlGLTFNYGSAhKGGSIVLFFNQ.......... 0 2 4 9 +11390 PF11558 HET-s_218-289 Het-s 218-289 Pollington J anon pdb_2rnm Motif This family of proteins is residues 218-289 of Het-s, a protein of Podospora anserina. Het-s plays a role in heterokaryon incompatibility which prevents different forms of parasitism [1]. This region of the protein is the C-terminal end and is unstructured in solution but forms infectious fibrils in vitro which has a structure consisting of a left-handed beta solenoid which contains two windings per molecule [1]. 25.00 25.00 30.00 28.90 20.20 19.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.86 0.72 -4.00 5 15 2009-01-15 18:05:59 2008-09-04 11:24:25 3 1 9 18 14 19 0 63.00 33 22.32 CHANGED KlssIsVRN.s+cIpopEsAKV+LGNsaosuALusAIthsDRToNcs-oVpuKGsSsVHIGN+YG ...KlpsIsu+NpA+cIpsE-pA+lclGNsaSpssLspuht...lsDpTpNpV-sVsA+ssS+VpIGNpYG 0 0 7 13 +11391 PF11559 ADIP Afadin- and alpha -actinin-Binding Wood V, Coggill P anon Pfam_B017401 (23.0) Family This family is found in mammals where it is localised at cell-cell adherens junctions [1], and in Sch. pombe and other fungi where it anchors spindle-pole bodies to spindle microtubules [2]. It is a coiled-coil structure, and in pombe, it is required for anchoring the minus end of spindle microtubules to the centrosome equivalent, the spindle-pole body. The name ADIP derives from the family being composed of Afadin- and alpha -Actinin-Binding Proteins Localised at Cell-Cell Adherens Junctions. 29.90 29.90 29.90 30.10 29.80 29.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.66 0.71 -4.31 24 277 2009-01-15 18:05:59 2008-09-04 11:38:10 3 9 197 0 184 254 1 144.40 26 27.58 CHANGED cpshpaINppLhohGasp.tthhsss............................phshstllNslasLlttpc+shctpEslppphpphpu-hpphpsshp+Lc.sphpthp+Ehs.thpppc+plpppl+shpppl+sp+--lp+hpshlpshcoQhsp-h+++-pEhpKLKc+Lpp ...........................................ps.pYlsppLh.shGh.p.hp..p.t..............................thshspllNsl.cLl.htp..c......+shptpEslpsphpplpu-hs+hpsphp+L........c.pplpptcRchs.thppp-pphpsph.....+..shpppl+pt+-...Elp+hps......hlpphps...Qhsp-h+++-pEhp+LKc+Lpp......................................... 0 61 101 146 +11392 PF11560 LAP2alpha Lamina-associated polypeptide 2 alpha Pollington J anon pdb_2v0x Family LAPs are components of the nuclear lamina which supports the nuclear envelope.LAP2alpha is a non-membrane-associated member of the LAP family which is unique. This family of proteins is the C terminal domain of LAP2alpha which consists of residues 459-693 and constitutes a dimeric structure with an antiparallel coiled coil. LAP2alpha is involved in cell-cycle regulation and chromatin organisation and preferentially binds to lamin A/C [1]. 25.00 25.00 31.50 30.70 21.90 19.00 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.61 0.70 -5.18 2 25 2009-01-15 18:05:59 2008-09-04 11:50:26 3 3 17 2 9 47 0 209.20 60 33.77 CHANGED AKoVVSHSLTTLGlEVuK..sQHDKI-ASE.SFPhHESILKVlEEEWQQlDRQLPSlAC+YPVSShEAspILSVPKVDDEILGFISEATPhuuhQAuSTESCsppLDLALCRuYEAAASALQIAsHTAFVAKuhQADISQAAQIlsSDPScspQALtILs+TYDAASYlC-AAFDEV+MuApsMGsuThGRRYLWLKDCKIs.ASKNKLsssPFKGGTLFGGEVpKVIKKRGNKp ..............AKTVVScSLTTLGlEhSK.QSQHDKIDASE.SFPhHESILKVIEEEWQQlDRQLPSLACKYPVSS+EATpILSVPKVDDEILtFISEATP.sGlQAuSTESCsKpLDLALCRsYEAAASALQIAoHTAFVsKAhQADISQAAQILSSDPo.cttQALsILS+TYDAASalC-AAFDEV+MuA+oMGsSTsGRRaLWLKDCKls.ASKNKLsssPFKGGTLFGGEVpKVIKKRGNK....... 0 1 1 1 +11393 PF11561 Saw1 Single strand annealing-weakened 1 Wood V, Coggill P anon Pfam_B03980 Family This family of yeast proteins is involved in single-strand-annealing, or SSA. SSA entails multiple steps: end resection and ssDNA formation; annealing of complementary ssDNAs; removal of 3' single-stranded non-homologous tails; gap fill-in synthesis; and ligation. Saw1 in combination with Slx4 catalyses the 3' non-homologous tail removal during recombination. Saw1 interacts physically with Rad1/Rad10, Msh2/Msh3, and Rad52 proteins, and works by targeting Rad1/Rad10 to Rad52-coated recombination intermediates [1]. 20.50 20.50 21.80 21.00 19.20 18.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.34 0.70 -4.95 9 45 2009-09-11 06:58:03 2008-09-04 13:11:48 3 2 41 0 30 45 2 210.00 28 91.69 CHANGED Mssslshl+lusshllPlRlFlpR+plLt........sptshhp...lsppsIlpLpp.shplhLSpsDlpuLlspl+c-Ll.llhp.............................hppss.......hhshplpshcchs+hchpl+hhhphclslhl.shcclthlpph..h.h...................hsptstsLhhhspph.h............................p.ppstcp-...cK..lpaph+ssh...sltcsIclYVh ...MssplshlplspshlLPlRIFlNR+plLp.....................sp.o.suohhpt......P.lsspSIIsL+s.ss+IhLSppDhps.Lss-I+c-LLhIla.-hss....t.....................................................................................t...phl.c...l.hssspsp...phhssplpshpchsKhphpL+hctphclcIh.lsshccl.s.plRchlhh......................shs.ps.pL.hh.pph.h.....................hl.-stp.t..................t.pts.pp-.....K.....lpahhps.h....slschIclal.............................................................................. 2 7 17 28 +11395 PF11563 Protoglobin Protoglobin Pollington J, Eberhardt R anon pdb_2veb Family This family includes protoglobin from Methanosarcina acetivorans C2A. It is also found near the N-terminus of the Haem-based aerotactic transducer HemAT in Bacillus subtilis (Swiss:O07621). It is part of the haemoglobin superfamily. Protoglobin has specific loops and an amino-terminal extension which leads to the burying of the haem within the matrix of the protein. Protoglobin-specific apolar tunnels allow the access of O2, CO and NO to the haem distal site [1]. In HemAT it acts as an oxygen sensor domain [2]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.62 0.71 -4.57 104 1086 2012-10-01 21:46:00 2008-09-04 14:07:44 3 53 846 19 390 1089 31 155.20 20 36.20 CHANGED tpsclpthhpalshs...pcDhphLpphpshlpstlssll-taYpclhph.sphschhss......................psplp+h+pthppalpcl......hsus...hcttahcthpc.lGthHs............clslc.pahhushshlhptlhptlhpp............hshpchtthlpAlsKllhl-.slhhpsYhcstpp ...............................h.........thhths...ttphthltp.ht.thh.ts.phctlsppFY.ch.lt.p.p..Pc.h.tp.hlss.............................pps.hpcLK.pshpp..al.h.p.l......hsup........hD..p..c.a...l.p.h.p...pp...luphHs..............+IGls....schhhsuaphlhchlhthlhsp........................................................hs.htchhphhpshh+hl.lshplhhpsY.t....h.......................................................... 0 140 270 329 +11396 PF11564 BpuJI_N Restriction endonuclease BpuJI - N terminal Pollington J anon pdb_2vla Family BpuJI is a restriction endonuclease which recognises the asymmetric sequence 5'-CCCGT and cuts at multiple sites in the surrounding area of the target sequence. This family of proteins is the N terminal domain of BpuJI which has DNA recognition functions. The recognition domain has two subdomains D1 and D2. The recognition of the target sequence occurs through major groove contacts of amino acids on the helix-turn-helix region and the N-terminal arm [1]. 25.00 25.00 112.60 112.60 24.50 24.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.03 0.70 -5.46 2 14 2009-01-15 18:05:59 2008-09-04 14:28:20 3 2 13 1 3 15 0 256.30 42 61.35 CHANGED M..sPEc..FRhph.Rs+hKN.l-NlLshhAs.Is.ls.hsKtpFspphNshlhphhGp.-pTcKTlsNaRTEIu.pLFGMhhEc-thlasusRspchhEDpD..tFFKshhapFpaPsGh.K.scllchsthtlQh+.h.YlLplhh...pA-ppsI.Loc--lAahlhNsLpshps+..slElhp+IhEsRusclshcs+..Gc.hpYu....+-.LsYh.lAsLlplpGshhKlN.hEtpsIN...pFhtscspFsuYt..h....To.-Dh+sFhpsW.pY...VNpchu ...Y.sP-cYaaRlHH.RPRFKs-lEsVLlahAspIS.ls.LscppFspphNphlppah....s.N....scps.pKTIsNWRTEIu.uLFGhhh......EcpG.hlh.suspApcLs-sQDL.cFFKpFhasFpYPuGHhKsppIlchhp.slpFK.....PspalLpLht...pu.cp...+..shhLTc-EluahlaNDLRsspc+pssh.Elhp+IhcNRtsclta-sp................GDVhRYA....pDILDYM.lAsLl.plpG.s..p....ahlNs.EppuIs...cahpssshFcsYsphht.tpsohc-l+phcptWF-Y...VNpt......... 0 2 2 2 +11397 PF11565 PorB Alpha helical Porin B Pollington J anon pdb_2vqg Family Porin B is a porin from Corynebacterium glutamicum which allows the exchange of material across the mycolic acid layer which is the protective nonpolar barrier. Porin B has an alpha helical core structure consisting of four alpha-helices surrounding a nonpolar interior. There is a disulphide bridge between helices 1 and 4 to form a stable covalently bound ring [1]. The channel of PorB is oligomeric [1]. 21.10 21.10 21.10 30.60 20.90 19.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.33 0.72 -3.78 4 21 2009-01-15 18:05:59 2008-09-04 14:58:00 3 1 13 16 6 20 0 103.70 35 77.37 CHANGED AtsAslsSo..sp-lsshhDhhsCsILcsuLstsGLscEspppsELAAsLcspus.........lG-hs.......suhuuclADRAQTCGIVcsD......TtLppLSSNLSS ...................u..Aslsss..spthsshlsshsCulLcs...uLtss.....G..lhcEsoTRsELAtsLcspus...ss..hsplsshs.......sshuuplADRA.TCGIVKsD.......o.Lp...tLSSNhSs.................. 0 3 4 6 +11398 PF11566 PI31_Prot_N Inhibitor_PI31; PI31 proteasome regulator N-terminal Pollington J anon pdb_2vt8 Family PI31 is a regulatory subunit of the immuno-proteasome which is an inhibitor of the 20 S proteasome in vitro.PI31 is also an F-box protein Fbxo7.Skp1 binding partner which requires an N terminal FP domain in both proteins for the interaction to occur via the FP beta sheets. The structure of PI31 FP domain contains a novel alpha/beta-fold and two intermolecular contact surfaces [1]. This is the N-terminal domain of the members. 18.50 16.60 18.70 17.10 18.20 16.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.94 0.71 -4.63 42 340 2009-01-15 18:05:59 2008-09-04 15:45:36 3 5 214 2 203 338 1 146.10 22 42.76 CHANGED hh.htshpsslpsstDslshhlHhhhhps.sFchh...t........................lscspphpppp..........................t..lPttWst.s.ssYshpYtpsts..shpalLpshthssphllps...lshs.spphsshplsspcalst..................pptsshsshhpp.......hpcLhshhcpp................................................llp.lhsshpptth ..............t.......psshpsspDulhhhlHhhhlps.Gahhh...s........................hscssphpstp...........................................hhP.ppW..st..spssYshpYtcs...........s...stphllpshh.hss.tlllss.........lsht..scplpplpl.psccalsp..............................pp.sshsph.hts........................hpcL.phhcsp................................................ll..lhsthpp..................................................................... 2 59 93 148 +11399 PF11567 PfUIS3 Plasmodium falciparum UIS3 membrane protein Pollington J anon pdb_2vwa Family UIS3 is a membrane protein essential for sporozoite development in infected hepatocytes. This family is 130-229 of the Plasmodium falciparum UIS3 protein which is compact and has an all alpha-helical structure.PfUIS3(130-229) interacts with lipids, phospholipid lysosomes, the human liver fatty acid-binding protein and with the lipid phosphatidylethanolamine. The interaction with liver fatty acid-binding protein provides the parasite with a method to import essential fatty acids/lipids during rapid growth phases of sporozoites [1]. 25.00 25.00 118.90 118.00 21.00 20.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -3.86 2 9 2009-01-15 18:05:59 2008-09-04 16:22:46 3 2 8 6 6 11 0 97.60 57 46.07 CHANGED INKlNlpt.lhENpNslDlslKRaN.FhD.s+LuhQ+HFpcLSN-Q+c.hlNsh-YhpKhVQsLpEsRslslSKhQEshAVhslcaaLpc.Y.....Qpc. INKVNlKG.LhENsN-lDVPlKRFNhFhDNs+LAhQHHFscLSN-QpcYhlNDhDYlRKlVQoL-EsRNlslS+hQEDhAVLslEaFLpc.Ytp............ 0 1 2 5 +11400 PF11568 Med29 Mediator complex subunit 29 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-active part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Med29, along with Med11 and Med28, in mammals, is part of the core head-region of the complex. Med29 is the apparent orthologue of the Drosophila melanogaster Intersex protein, which interacts directly with, and functions as a transcriptional coactivator for, the DNA-binding transcription factor Doublesex, so it is likely that mammalian Med29 serves as a target for one or more DNA-binding transcriptional activators [4]. 25.00 25.00 27.50 27.10 24.50 23.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.99 0.71 -4.15 7 108 2009-01-15 18:05:59 2008-09-04 16:40:37 3 2 83 0 61 99 0 135.60 45 69.25 CHANGED p-ch....Dsls+VKsLl.ssLR-Sl.phhKpuAphLpQNp..Ds...pptpst-ssh.RF-KsLE-FaAlCDQlElsL+TAhpChpQtss........Ss+alPs.Vhss.....spsssh..sslsYspYLssVpspIppAKDIHcsLlssupplsst...........- ..............................................pch....Dslp+hKhLl.s.L+ESLp...shhKsAApsLhQNsslDNG.....tKuuD.s.slp........RFDKsLEEFYAlCDQlE.....LsL...+hAhECLo.Qsss........Ss+a.Ps.Vsss.....spsssspsssLsYsQYLssl+uQIssAKDIHssLlssupplssp...................................... 0 19 24 45 +11401 PF11569 Homez Homeodomain leucine-zipper encoding, Homez Pollington J anon pdb_2ys9 Family Homez contains two leucine zipper-like motifs and an acidic domain and belongs to the superfamily of homeobox-containing proteins. The presence of leucine zippers suggests that Homez can function as a homo or heterodimer in the nucleus [1]. It is thought that the first leucine zipper and homeodomain 1 (HD1)of Homez is responsible for dimerisation and HD2 has a specific DNA-binding activity. Homez is also thought to function as a transcriptional repressor due to the acidic region in its C-terminal domain [1]. Homez is involved in a complex regulatory network [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.89 0.72 -4.91 9 166 2012-10-04 14:01:12 2008-09-04 16:47:08 3 12 50 1 72 289 0 52.90 41 7.20 CHANGED sh.ssscsl....Lp-YYhpH+hLpEpDLDsLspKSpMShpQV+-WFAp+.pcpscs .....................h...sspth.....LccYY.h.p.H.+...h...L...pE.pDLspLss+SpMShpQV+-WFup+..c.s..t.......... 0 6 10 31 +11402 PF11570 E2R135 Coiled-coil receptor-binding R-domain of colicin E2 Pollington J anon pdb_2ysu Family E2 is a DNase which utilises the outer membrane receptor BtuB to bind to and enter the cell. This family of proteins is E2R135 (residues 321-443) which is the part of E2 which is responsible for binding to BtuB in a coiled coil formation [1]. 22.80 22.80 23.00 23.30 21.90 22.70 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.88 0.71 -4.07 2 37 2009-12-15 10:45:41 2008-09-04 17:03:58 3 11 21 6 3 46 1 126.60 56 19.71 CHANGED HP.EuhcRpY-+A+AELsttscslAp.ppR.Aps.pshsut+utlptAsKplt-..AEhpp.DhhsasP.tthtphWQ....psphhppDlpNpptthcAAtpphs-hs.......AALSuA.EpRKQKEpKtKDAcsKLs ...............HP.EuhcRsY-+A+AELspAsc-lApsQpRpApAlps.......hsuR+SELDtAsKslt-h.AElKph-RFA+-PMAuGHRMWQMAGLKAQRAQTDVNNKpAAFDAAAKEKuDAD.......AALSuA.EpRKQKEsKc+DAcsKLs....................... 1 1 1 2 +11403 PF11571 Med27 Mediator complex subunit 27 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species {1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Mediator exists in two major forms in human cells: a smaller form that interacts strongly with pol II and activates transcription, and a large form that does not interact strongly with pol II and does not directly activate transcription. The ubiquitous expression of Med27 mRNA suggests a universal requirement for Med27 in transcriptional initiation. Loss of Crsp34/Med27 decreases amacrine cell number, but increases the number of rod photoreceptor cells [4]. 25.00 25.00 25.70 26.20 22.90 24.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.38 0.72 -3.86 18 176 2009-01-15 18:05:59 2008-09-04 17:06:26 3 3 146 0 120 170 0 91.90 36 25.59 CHANGED shpths.apsSc............hplapclospsppAlhphhs.sp.tthsltphhsaIpSY...pslFssPCs+Cs+hl.....ptt.LP.....PshR..shss........h-saHpsCh ........................ts..thshastSp............apVFp+lo-+ApsAl.laah.....sphPc...hs.....l+shhsWlpSY...hpLFpsPCp+CG+hL......pstLP.....PsaR..chpo................................hEAaHpsCp................................ 0 38 55 93 +11404 PF11572 DUF3234 Protein of unknown function (DUF3234) Pollington J anon pdb_2z0r Family This bacterial family of proteins has no known function. Some members in this family of proteins are annotated as TTHA0547 however this cannot be confirmed. 20.20 20.20 22.30 45.80 19.40 18.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.39 0.72 -3.90 2 11 2009-01-15 18:05:59 2008-09-04 17:08:03 3 1 11 12 5 9 0 103.50 64 98.53 CHANGED MAPDLSGTWYVLEGDPGEHLVVEALGERLSGIWTSRELAEAFLAHHPHLGMRVSALESRALKEAaLRALGMLQVEAVMVDYRPGTHRAQVARVKDLLEEVRRA .....MtPDL.SGTWYVLEG..-..PGEHLVlEALGpRLSGIWTScELAcuFLA+.HPc.L.GMRVSsLESRALKEAFLRALGMLpVEAVhVDYRPGoHRAplARVc-LLEEVRRA......... 0 1 3 5 +11405 PF11573 Med23 Mediator complex subunit 23 Coggill P anon manual Family Med23 is one of the subunits of the Tail portion of the Mediator complex that regulates RNA polymerase II activity. Med23 is required for heat-shock-specific gene expression, and has been shown to mediate transcriptional activation of E1A in mice. 20.60 20.60 28.40 20.80 18.80 20.00 hmmbuild -o /dev/null HMM SEED 1341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.02 0.69 -14.13 0.69 -7.23 7 204 2009-01-15 18:05:59 2008-09-04 17:13:51 3 8 102 0 133 209 1 793.90 37 84.96 CHANGED Ms........ppllch.-p..........hlcsp..p....s......ps.psphtphst.....hsshhuthtpcp+pphl+.hlhhltthsp.p.........phchhhp.Lhchsttthl.uphLCtphh..pp..h.ppphh.Epa+hl+csIstlDYKGlRpIh+hhhE.phhphP.slSsphlsplhtlp-ll.+Ih-RsusLLPAYhslsEIh+shshps.h...W+Lsphlush.pcFRslAphhoIhG+sshLPIVpHs.....uatsthhs.Wclc.sshph.hpup..LPYcP-h.hpsQhhLLpaVLcQPYS+DMlsshhshp.pp+pppsshEp.LshllhpAMEpoEspspp.................pstspalW.HLos.lIYhlhapasshtshlpsL......+pKlstpplpRuRDclMWllLQalSuuhphstIss.hh.lhcLas.LYs-c..psl.lP-.sssphs+thAssClWhHL.KcAtscp.phscsIPpsLKhpaEhLpp.....ss.sh..ts-aplAlLsNAYSTsschFspsMssLh-sl.usscssps......uh.h.As.shsshshphLDuLTh+uKhSLl+pIlshhhp.upsKtssP....husALlETYsRhLVYpElESLuh+uhhuphh.......P.....pshpspAhs.LahLhEhhuYRhHHlpsah+hQh.....LoahpsLsulspssphQlapslEsssLRLI.............ptlGSsphpsp.shhlpc.c.PtslsSs........sEELN+sLIholARuh+lT....stss.upshsc-hLtsIhphTPasWsp+oLppFP.shp-hhpQpshsp.tsp.t.hhos--EhtphsoMpsEN-lIt+Fups...sssPlFLCllFKhl..h....pTpsIoshhYclLE+hus+slssplRphsDallhElusotsst.plpKsl.cslspMlW+aNllshDRllLsLhhRst-us-AQlsh.IhQLLLLp......ss-FppRlp.Fhpp.ss.-HappssahctHlsaHppFPEph.h-tssppsp.........LPlYFGNVChRhLPVlDlhlHRaLEh..hplsp....LtslLDplu.LYKFHcpPITalYsTLahacthhps+s..KKpLVsslhuplc-sRP..WsLoEtYpta.hppppsp..W.P-.sYahpLlcRhssolDuhs.....sFsupDWRasEFsNPsApALYloClELhuLPsuPptVsNulIsVllps.sllP.t.l.sahNslGLlhsALPpsYhssla-chhpslsssphtshp..h..sF-.FsFcshcpuhL-....pshssllslupuhapHhulupLsshscphh.plh.hV+TEhpLlYlh+lVGPaLpRh..........hpptcshuslslhhh.h.c.sshsp.uh.l.Y.s.lCDhhYphKYhFsG ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................W..h.s.hh.h.hh.t..hs..phh..l.......thh................p.p.lhWhhhphht....h.....................................h..sp.t..........h..h...s.....h.....h...t........................................................t..................t...hhhh....p.........h.hh.p...h..........................h.shshthls.....shohp.p..h...h.......h..........t...s............s.thh.Thsh.hhh.....phh.+.hhsth...............h.p.t...t..hhlhEhhsaRh...........h.....+.ph......L.h..hp.h..s..h...s..p.ht.p.pl.hhhEshshpLh.......................shhss.h...ts....hh.p...stthhp..................pLNRhhlholAhsh+hp.........t......hh..phLtplht.o...at....Ws.pohthFP..h..pt.hh..t...........p..........s................hts..p..p.hh.p.h.................tt.thl.pa.......................hLChhhhhh..h....p........s.....h.h..phl......pt..........hssptls.tl.hhsDhll.....ch....t.........t.t.........h....psh....t....ls.hlaphpllshDhhlLsLh.+s..cs..............ps.h...lhp..llp.......s-h.p.Rlp.ahp...s.cc...a......h.s..ph.pp..th.t..s-....p...t................................................................hPhaasslshRhLPlhslhlaRhlE.....h.p.......lLthhu.hhta.H.s..hoalhshL.hha...h...th..............Ll.thh.s.hp...p........hop.a.......t..........................pahhtLl.th............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 62 75 111 +11406 PF11574 DUF3235 Protein of unknown function (DUF3235) Pollington J anon pdb_3b4q Family Some members in this family of proteins with unknown function are annotated as RpfA however this cannot be confirmed. 21.10 21.10 21.90 22.10 20.90 20.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.28 0.72 -3.75 4 59 2009-01-15 18:05:59 2008-09-05 09:48:56 3 1 57 2 13 35 0 89.20 47 42.43 CHANGED GLNSAPTPRslsAsst.PtPAsAAPA.stEYsAssshNoN.l.......sVcuhYsAlsspLAphGlsVPsElpuaYNA..................hlu GLNSAPTPRDss.Ass..PA.......Ps.pssss..sQphA.Asuu.su-cL...........................AVDAlYsAlcsRLA...uhGluVPsplcuaYpAN+s.sFsuFYtANRtsIDhhh.h.................. 0 2 8 13 +11407 PF11575 FhuF_C FhuF 2Fe-2S C-terminal domain Moxon SJ, Bateman A anon Pfam-B_11690 (release 9.0) Domain This family consists of several bacterial ferric iron reductase protein (FhuF) sequences.\ FhuF is involved in the reduction of ferric iron in cytoplasmic ferrioxamine B [1]. This domain is the C-terminal domain that contains 4 conserved cysteine residues that are found to be part of a 2Fe-2S cluster [1]. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.89 0.72 -7.19 0.72 -4.27 60 1049 2009-01-15 18:05:59 2008-09-05 10:52:59 3 4 988 0 178 640 12 22.30 54 8.86 CHANGED RcsCClhYpl..sus.ph..Css.CP..hh ..RRoCC.cY+l....Pss..pp......CGD.CsLh.... 0 35 93 140 +11408 PF11576 DUF3236 Protein of unknown function (DUF3236) Pollington J anon pdb_3brc Family This family of proteins with unknown function appears to be restricted to Methanobacteria. 25.00 25.00 121.40 120.20 21.90 20.40 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.79 0.71 -4.81 7 27 2009-01-15 18:05:59 2008-09-05 11:05:31 3 2 27 2 19 28 0 154.70 53 91.72 CHANGED lE-hIppAahEShpstRhGDctEElctIpphIpsA++lVVsspNpcKhcVlpcllpchs.scsphLpIsTN.ADLTRhPAloKuLhAlDlocADllIARGRLGlPGSGShLlIMDp+GRlLTuuhSPSplIHppslc-ulppEhpcALpRIGhp ..hEchIKpAYhESlpspRhGD+hEElctIpphIhsAK+IVVsTpNpKKFcVlp-llpcl..........s.......splphLpIsTN.ADLTRMPAlsKuLhAlDhscADLlIARGRLGlPGSGShLlIMDsKGRILTuulSPSplIHKpslEcsVcpEhhcALcRIGl.. 0 4 9 15 +11409 PF11577 NEMO NF-kappa-B essential modulator NEMO Pollington J anon pdb_3brt Family NEMO is a regulatory protein which is part of the IKK complex along with the catalytic IKKalpha and beta kinases. The IKK complex phosphorylates IkappaB targeting it for degradation which results in the release of NF-kappaB which initiates the inflammatory response, cell proliferation or cell differentiation [1]. NEMO activates the IKK complex's activity by associating with the unphosphorylated IKK kinase C termini.The core domain of NEMO is a dimer which binds to two fragments of IKK [1]. 21.00 21.00 21.00 23.10 19.90 20.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.28 0.72 -4.22 9 160 2009-01-15 18:05:59 2008-09-05 11:12:20 3 2 60 4 67 138 0 66.90 46 14.09 CHANGED .tpslpphpcLlpENppL+EAlKQoNptMK-RaEELttWpE+Q+EER-Flpp+hcEA+phlptLshEN ......pth.pphpchLpENppL+...-Al+QsNQhh+cRhEEL.taptpQ+EE+-Fh.p+FpEA+chlppLohEp.................... 0 9 17 37 +11410 PF11578 DUF3237 Protein of unknown function (DUF3237) Pollington J anon pdb_3c5o Family This family of proteins has no known function 21.60 21.60 22.10 22.20 20.70 19.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.79 0.71 -4.91 66 476 2009-01-15 18:05:59 2008-09-05 11:23:30 3 11 281 12 270 484 95 145.20 24 71.09 CHANGED PsL..chhasl........pl.......clsssh.....slGp..................sstG........pRpl.lsls..GGphpGs......plp.......................GplLs.sGuDathlp.s-G...........................hscl-sRasl..cTc..........DG......shIalpspGhhpss......thhp.thtt...upslss......s.phah................+ss.pFETu.s.s+Y.pWLsptlhVG....pupptss.t.....VhhcsapV ...........................................PtLphhhph........pl.........pl...ss.sh.....tlGp...............................ss.t.G........pRph..lslh....GGphpG..........tlp.....................................G..plls...sGuDath...hp..sss............................hscl.cs+Yhl....pTs................DG.....shIhlpspGhhp.ss...t.hht..htt.......spshs.................s..phhh..................pss...pF..ETu..s...tcY.paL.sp.tlalG..puphh.ss..t.....Vhhchap........................................... 0 46 135 217 +11411 PF11579 DUF3238 Protein of unknown function (DUF3238) Pollington J anon pdb_3c5p Family This family of proteins with unknown function appears to be restricted to Bacillus cereus. 19.80 19.80 20.00 19.90 19.40 19.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.29 0.71 -4.67 2 116 2009-01-15 18:05:59 2008-09-05 11:29:37 3 2 59 6 10 87 1 181.60 58 86.79 CHANGED MspIVKlRuSVFhPhuhhEPhKDstTG+lhpatGDuREFTPaAsNshRSRlEQEVslDFYKcElFoYAsssIsT.KlTNPDGSsphppGchSTENIVCTsIsWspDtVpFcMpASASNPLNshAPssDYLLslpVNKsGolclpG.HDGFPCaEFYKQVDFGsFEpIYhHDFRETsDTPtALuGEM-YSFpppl ..................................MspIVKlRuSVFhPhuhh.....E.shKD.t.TGplhpatGDuREFTPaAVNo..hRSRlEQEVllDFYKcElFoYAsssIs..T.KlT......N....P.DGSlphcpGcs.STE.N.IVCTsIlW.ss-.tVpFcMpASASNP.....LNshA.PssDYlLslpV.p+s.GolclpG.HDGFPCaEFYKQVD..FGs.FEpIYTHDFRETsDTsAALuG-M-YSFpppl......................... 1 1 7 7 +11412 PF11580 DUF3239 Protein of unknown function (DUF3239) Pollington J anon pdb_3c8i Family This bacterial family of proteins may be membrane proteins however this cannot be confirmed. Currently there is no known function. 25.00 25.00 25.30 27.10 23.90 23.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.57 0.71 -4.11 7 82 2009-01-15 18:05:59 2008-09-05 11:49:43 3 3 81 2 19 66 0 124.90 42 54.56 CHANGED P+phGshppLYsphpLsPAlluElsPRslVLLuLVsssssssspspaALssRslsplsGhs...++lGpRVPuVAlp....utcshcst-pW-plSPMPIsWGTsDssVlcRAEpsIsps.WspLppslspl-c ......P+pVGsAppLYssYsLsPAhIAEVNPRDhVlhALVNsssD.spss.P.....paALssRsloslsGhc...RpVGpRlPsVAVs....Gppospsp-pWppISPMPIuWuTsDssVlp+AtcsIPpcpWphLp+sls+l-.p... 0 3 13 18 +11413 PF11581 Argos Antagonist of EGFR signalling, Argos Pollington J anon pdb_3c9a Family Argos is a natural secreted antagonist of EGFR signalling which functions by binding growth factor ligands that activate EGFR by forming a clamp like structure using three disulphide-bonded beta-sheet domains [1]. 25.00 25.00 28.00 26.70 18.80 23.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.92 0.71 -4.19 3 39 2009-01-15 18:05:59 2008-09-05 12:56:11 3 2 23 8 25 50 0 93.60 46 41.30 CHANGED HSlKDlRILYQVGsSEcDLPV...........................CAPNAV.....................CSKI....................DLYETPWIERQCRCPcuNRsP.plIlHHHc+ssuo ......................................................................................................Cp.asVpp+..-.hh-ps.Ipp.CpCPcuaRCP......pHHopsu......... 0 7 8 19 +11414 PF11582 DUF3240 Protein of unknown function (DUF3240) Pollington J anon pdb_3ce8 Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.13 0.72 -4.07 11 148 2012-10-01 21:59:08 2008-09-05 13:09:45 3 2 146 1 47 234 49 95.10 29 92.06 CHANGED hopphh.LsLlhs+-l+s-LsDhLhph.DhlSGFTlhcssGaupchphhshuEpV+GttctltlpllhsppshpplLspL+pthscssltYWltPVtshGpls ...............................h.....h.LplhhshsL+DsllDh.Lh.cp..s.h.ssFhhhpshuaus..p...p...h...h....hS.......pE.QVpG+...p......chsphclhlscptstsLlpsLcpph..sspth...hah...........t.................... 0 10 34 40 +11415 PF11583 AurF P-aminobenzoate N-oxygenase AurF Pollington J anon pdb_3chh Family AurF is a metalloenzyme which is involved in the biosynthesis of antibiotic aureothin by catalysing the formation of p-nitrobenzoic acid from p-aminobenzoic acid. AurF is a non-heme di-iron monooxygenase which creates nitroarenes via the sequential oxidation of aminoarenes [1]. 25.30 25.30 25.30 25.30 25.10 25.20 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.88 0.70 -5.50 33 580 2009-01-15 18:05:59 2008-09-05 14:47:50 3 1 308 10 194 533 184 282.80 21 88.21 CHANGED M...........................spppshppLhcu.sc+uh...cPh....p..........sss+ahhPsphssLhssshapphspppphclsppchhshhssslhFEshlh....ptlh+shhstsssssptphshptlsDEshHslMahchhppls............ss.hphpRhhphlhphhsshh.pshstahsshluEphlssh.+shhcDsp.lpPhl+plhplHlh-EuRHhpFuc-hh+phhsph..sttpRthhushlshshthahss.lp.thhtts......slcstcultpsh..sssptt.phh+shhuslhphhccsGlh ........................................................................................................phhtpL.cu.usp.csh...sPhh.-lDWpss..........sssphhhssph.s...Lhs.pshapphsccp+hclspachushhshslahEp.h.Lh....phlhpphht.t.s...s.s.s.s.php.a.sh.s.p.hsDEspHshMFtch.l.p.+hs................sshhth..chhp..h.l..ht.h.............h.s..s......sh..hs......hshass....h.ll...uE-h.ls...ph...p..+...ph....hc....Dsp..l.pPhh+plhplHlh-EARHluFu+....chl....ccth.sph......sphpRt....h.hphh...hsh.sht.hh.hps.h..hs.st.hhhhh......shs..cshtpth...tssp.p..phhpt.hhtplh.hhcchGlh.................................................................. 0 49 126 168 +11416 PF11584 Toxin_ToxA Proteinaceous host-selective toxin ToxA Pollington J anon pdb_1zld Family ToxA is produced by particular Pyrenophora tritici-repentis races and is a proteinaceous host-selective toxin. It is necessary and sufficient to cause cell death in sensitive wheat cultivars [1].ToxA adopts a single-domain, beta-sandwich fold which has novel topology. The protein is directly involved in recognition events required for ToxA action. It is thought to be distantly related to FnIII proteins, gaining entry to the host via an integrin-like receptor [1]. 25.00 25.00 25.10 256.70 18.40 17.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.72 0.71 -4.25 2 16 2009-01-15 18:05:59 2008-09-05 16:00:25 3 1 6 4 2 14 0 117.80 97 67.67 CHANGED QGSCMSITINPSRPSVNNIGQVDIDSVILGRPGAIGSWELNNFITIGLNRVNAsTVRVNIpNTGRTNRLIITQW-NTlTRGDVYELFGDYALIQGRGSFCLNIRSDoGRENWRMQLEN QGSCMSITINPSRPSVNNIGQVDIDSVILGRPGAIGSWELNNFITIGLNRVNANTVRVNINNTGRTNRLIITQW-NTLTRGDVYELFGDYALIQGRGSFCLNIRSDSGRENWRMQLEN 0 1 1 2 +11417 PF11585 Stomoxyn Insect antimicrobial peptide, stomoxyn Pollington J anon pdb_1zrx Family Stomoxyn, localised in the gut epithelium, is an insect antimicrobial peptide which functions in killing a range of microorganisms, parasites and some viruses. Stomoxyn has a structure consisting of a random coil in water however in TFE it adopts a stable helical structure. Stomoxyn is thought to have a similar function to cecropin A from Hyalophora cecropia due to structural similarities [1]. 25.00 25.00 26.20 102.30 23.30 18.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -8.13 0.72 -4.16 2 2 2009-01-15 18:05:59 2008-09-05 16:08:59 3 1 1 1 0 3 0 42.00 81 62.69 CHANGED RuFRK+FN+hlKKlKHTISETAHVAKDsAVIAGSGAAVVAAs RuFRK+FN+hlKKlKHTISETAHVAKDsAVIAGSGAAVVAAs 0 0 0 0 +11418 PF11586 DUF3242 Protein of unknown function (DUF3242) Pollington J anon pdb_1vr8 Family This protein from Thermotoga maritima is a hypothetical ORFan protein, TM1622, whose structure has been determined. The protein is composed of seven beta strands and three alpha helices [1]. 25.00 25.00 134.20 134.00 22.70 20.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.81 0.71 -4.60 4 11 2009-01-15 18:05:59 2008-09-05 16:31:12 3 1 11 1 5 12 0 129.20 42 81.71 CHANGED PpuYSlcoAlhlLps.pYhLssVtcl.-uYGsl..ucGhsA..lF-shsGhFalFKYhsEp.AKp.WKKlsKchGhs.phsYh..shhshGhFos+h-upcIlsWWKDNWLFllpGcs.s--FspaV.clYtclK PcuYSl-oAIhlL-s.cYhLsDlcEI.DuYGDV.phKG+VA..lF-scpG.hlalatYcuE-.AKphWKKlsK+hGhsShRohL..-LsshGhFSThh-GKcIluWWKcNWLFllEG+sslE-FVcaVhcVYpclK. 0 2 3 5 +11419 PF11587 Prion_bPrPp Major prion protein bPrPp - N terminal Pollington J anon pdb_1skh Family This family represents the N-terminal domain (1-30) of the bovine prion protein (bPrPp). The proteins structure consists of mainly alpha helices. BPrPp forms a stable helix which inserts in a transmembrane location in the bilayer, with the N -terminal (1-30) functioning as a cell-penetrating peptide [1]. 20.80 20.80 20.80 21.60 19.00 20.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.96 0.72 -7.02 0.72 -4.28 8 461 2009-01-15 18:05:59 2008-09-05 16:47:14 3 6 190 1 19 408 0 28.10 83 11.54 CHANGED Mu+..luCWlLVLFVAsWSDVGLCKK.PKP ..MVKSHlGuWILVLFVAhWSDVGLCKKRPKP. 0 1 1 4 +11420 PF11588 DUF3243 Protein of unknown function (DUF3243) Pollington J anon pdb_3d0w Family This family of proteins with unknown function appears to be restricted to Firmicutes. 20.00 20.00 20.00 21.90 19.50 19.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.58 0.72 -3.99 17 247 2009-01-15 18:05:59 2008-09-05 16:59:24 3 1 189 4 63 156 0 80.70 49 91.39 CHANGED MslL-s.a-pWKsaLucplppupptGhsccslschAhplG-YLAppV-PpNpcE+lLp-LWcVAs--Ep+sLAshhVKlVpp ...........................MoVL-N.FDpWKsFLG-RlcpApptGLsptsluchAaclGD.YLAscVEs+NcpE+LLtELWcVADEpEQHsIAslMVKhVpp.... 1 25 49 53 +11421 PF11589 DUF3244 Domain of unknown function (DUF3244) Pollington J anon pdb_3d33 Domain This domain adopts an immunoglobulin-like beta-sandwich fold and structurally is most similar to fibronectin. 22.00 22.00 22.00 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.26 0.72 -4.33 10 185 2010-07-04 00:38:26 2008-09-05 17:09:39 3 9 75 3 38 154 25 101.30 21 65.17 CHANGED s.ohpphs+......slshcGcW....t-cttRSI.sslP.oASI..DGslLsIcFpsslsslTIslpc.ppGsVlYEsslssusuptholSlsshssGcYplclopstG.aLhGpFhlE ..................................t......................h.....t..t....RSl....hPhp.s...l...-..s..s..h..l.pl.pF.t.p.shss.l.s....lplps...psGpll.Ypsshss.s.ss.t.h.sIsL.ssh.s.sGpYpLplpssps.hh..hGpFp............... 0 18 33 37 +11422 PF11590 DNAPolymera_Pol DNA polymerase catalytic subunit Pol Pollington J anon pdb_1dml Family This family of proteins represents the catalytic subunit, Pol, of the Herpes simplex virus DNA polymerase. Pol binds UL42, making up the DNA polymerase. UL42 is a processivity subunit which binds to the C-terminal of Pol in a similar way that the cell cycle regulator p21 binds to PCNA [1]. 20.00 20.00 21.90 34.10 19.90 19.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.79 0.72 -4.28 8 108 2009-01-15 18:05:59 2008-09-08 09:10:44 3 2 21 4 0 170 0 36.70 78 3.03 CHANGED c-sAtRLsuAGFsslpuGA....spEEETRQ+L++AFcILA .DDVAARLRAAGFGslGAGA.....TAEETRRMLHRAFDTLA 0 0 0 0 +11423 PF11591 2Fe-2S_Ferredox Ferredoxin chloroplastic transit peptide Pollington J anon pdb_1fct Family The structure of chloroplast ferredoxin in water is unstructured however in a 30:70 molar-ratio mixture of 2,2,2-trifluoroethanol, residues 3 to 13 form an alpha-helix. The rest of the peptide remains unstructured [1]. This family is the N-terminal of the [2Fe-2S) ferredoxin from C.reinhardtii. This protein catalyses the final reaction in a pathway which allows the production of H(2) from water in the chloroplast [2]. 25.00 25.00 62.90 62.20 19.80 19.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.64 0.72 -4.31 2 6 2009-01-15 18:05:59 2008-09-08 09:34:00 3 1 4 1 3 4 0 33.00 80 25.98 CHANGED MuMAhRSoFAARV.GA+PAVRuARPuuR.hSs.A MAMAMRSoFAARV.GA+PAVRuARPuuR.hSs.A 0 1 3 3 +11424 PF11592 AvrPto Central core of the bacterial effector protein AvrPto Pollington J anon pdb_1r5e Family This family of proteins represents the bacterial effector protein AvrPto from Pseudomonas syringae. This is the central core region of the protein which consists of a three-helix bundle motif. AvrPto is part of a type III secretion system from P.syringae which is involved in the bacterial speck disease of tomato. In resistant plants, AvrPto interacts with the host Pto kinase, which elicits an antibacterial defense response. In plants lacking resistance, the Pto kinase is not present and AvrPto acts as a virulence factor, promoting bacterial growth [1]. 21.80 21.80 24.10 61.20 20.40 15.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.24 0.72 -4.26 2 21 2009-01-15 18:05:59 2008-09-08 09:57:52 3 1 19 2 2 19 0 105.30 53 65.34 CHANGED DNVTSsQLLSVR+QLAESAGLPRDQHEFlSSQAPpSLRspYNNLYSHTQRTLDhADMQHRaMTGASGINPGMLP+ENVDDMRSAIoDWSDMREALQHAMGIHADI .....s.sVTusQLLsVRHQLA-uAGLPR-QHcFIoNp.APpoLRsRaNsLYs+TQRTLchADhQHRYMTGsSulNPGMhPHENVssMRoslScWSDMREALQHAMslHssh... 0 0 0 1 +11425 PF11593 Med3 Mediator complex subunit 3 fungal Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [1]. Mediator subunit Hrs1/Med3 is a physical target for Cyc8-Tup1, a yeast transcriptional co-repressor [2]. 27.60 27.60 27.70 27.70 27.20 27.50 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.49 0.70 -5.22 5 30 2009-01-15 18:05:59 2008-09-08 10:35:47 3 3 26 0 19 34 0 319.90 39 85.24 CHANGED DsILsssloL--Lc-hLu.csEu.o+DsVsc+IpcA+DuILPLRLQFNEFlpllSsI..EchuspTsQEKFLhIRSKLL-Lpc+lQsLSpDFcpLQPLFsTVsEYScTh..+-KKFQlLETL.............................GoYscsussuASsShspsSs+SsAATTuSTAsTPuA..AslssApoAus.........PssTssl.............Gos.ssss.h.soTsssshstsKKPRKPRQTKK.....sAsAAKsQ..............ASAsAsAsAuA.sp............................................ssh.tss.NuuMtsslPN...sTPs.Mt.....lsusSP.........s...............NuMuSPL......NsMSPMpNhsQ....MGt.s.h...GQhs.sss.GstsspsspsshpussTPStSM...hN.NNITPANILNMS.........s.uF-p.sQsQsPQQttsQsQ.pshNMsM.sDsNN....hD.lDLNNLDLuSLNMDFL ....................................spllssslpL--LpshLA.p..........s-s.s+DpVsppIpcsccpILPLRLpFN-FlphMusI.......-t.ppssp.tKaLhIRsKlLpLss+hQsLSpchptLQPLFsTlsEY.coh..ps+pFp.LEsL.............................uohscsusAssSsu...tshsos+SsAAo.TsoTssTPtA....sshsp..upohss.........Psoossh.............uos.sTss.stsoTs.ssshsssKKPRKPRpTKK..............ApsQ......................spApApApAQs.tpp......................................ss..psshsuuMsuslPN...sTPs.M........lNusSP..........p..............NsMuo.PL......NhMSPMsNs.p....hGt.s.t...uphp..t...us.hss.s..sshpts.ss.ps....hs.NslTPANILsMs.............................shpp.pQ............p...p...th...s.Ns.......lDLNsL-LuuLNMDFL......................................................... 0 2 7 16 +11426 PF11594 Med28 Mediator complex subunit 28 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Subunit Med28 of the Mediator may function as a scaffolding protein within Mediator by maintaining the stability of a submodule within the head module, and components of this submodule act together in a gene-regulatory programme to suppress smooth muscle cell differentiation. Thus, mammalian Mediator subunit Med28 functions as a repressor of smooth muscle-cell differentiation, which could have implications for disorders associated with abnormalities in smooth muscle cell growth and differentiation, including atherosclerosis, asthma, hypertension, and smooth muscle tumours [4]. 23.40 23.40 23.40 26.50 23.30 23.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.50 0.72 -3.80 3 97 2009-01-15 18:05:59 2008-09-08 11:35:36 3 1 79 0 66 90 0 102.40 48 51.22 CHANGED EIRstVDQsopKF..LDIARQpEsFFLQKRhpL..SVhKP-plLKEEsp-LK...pEl..........QRKDpLl.............pKHhoKI-aWcNL...LoDtpshaKshs-lPs-GRtslsE.uo...............uslPus.c ..........EI+stV-Qshp+F..LDlARQhEsFFLQKRhpL..Ss.KPEpllKEDlsEL+...sEL.........................QRK-sLl....................QKHhsKLcpWQpl...LpDlpsttc..tp..h...h..t..............................h................................... 0 17 22 45 +11427 PF11595 DUF3245 Protein of unknown function (DUF3245) Wood V, Coggill P anon Chahwan C Family This is a family of proteins conserved in fungi. The function is not known, and there is no S. cerevisiae member. 26.40 26.40 26.60 27.20 26.10 26.30 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.03 0.71 -3.93 12 68 2009-09-11 06:21:26 2008-09-08 13:01:24 3 3 59 0 51 64 0 134.70 31 67.63 CHANGED KssVuLu+oQ+LluSWLss.osstpups.ps-pELQp...c.hpsVPppLGlGAslPppuscu..o.pp.cLsS.....sDcL++QLLGKshp+hhutttt.....s.s..pppuss.ssptsspsstht--sD-DD--.EuRouhhu+pt...........+KR+ ........................tsVuLu+up+LluSWlsshossc.tsss.csEpEhp...tthh..ps.hPc+LGLGAs.lPppttcu....s..t...tS.....sDcLc+pLLGK.............................pppsst.tttppppst..sscps---DE-.EuRouhhu+pp....................t........................................................................... 0 13 25 40 +11428 PF11596 DUF3246 Protein of unknown function (DUF3246) Wood V, Coggill P anon Chahwan C Family This is a small family of fungal proteins one of whose members, Swiss:A3LUS4 from Pichia stipitis is described as being an extremely serine rich protein-mucin-like protein. 27.80 27.80 96.00 30.60 27.70 27.70 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.24 0.70 -5.08 5 14 2009-09-10 21:41:12 2008-09-08 14:01:14 3 3 10 0 9 15 0 190.90 37 28.88 CHANGED DDDDDETDC--..ET.............llPTsosslTTlsGoGuVTlTsGGSGu..sshttou..........oILPTtoup.......DDDD..sD--TDsEssT.sh.ssGoVTpsPTGoTotTllsoc...uTTssDDDDssssEooIs..DsTssTssT.hTssGsPTs.TVTTNusATTTs+Tpscss.....lTYTuTGpspTTtsTp--ED...CDEThshThThhsPoTTVh.sstlhsNsVTVIupho.h.pshEsDutpssGSGSGSsGSGGS..GSGSS ..........................................s.h.s..s....h.h.sss...hssssto...........s.lsThssp..................ssD-...D-ETDCETsh.sh.PsGss........ThhsTsosohTllsTc...sTTos----psssEssh...psTs.sp.T.hT.sts.TT..lssphssssp.....chs.....lTYTusGpshTThhoppsE....C-EThhhThThhsPpTTVh.sstl.sssVTllsp.o.h.ps.psDsh.......pGpssuuuS..uSGuS....... 0 3 5 9 +11429 PF11597 Med13_N Mediator complex subunit 13 N-terminal Coggill P anon manual Domain Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function. Med13 is part of the ancillary kinase module, together with Med12, CDK8 and CycC, which in yeast is implicated in transcriptional repression, though most of this activity is likely attributable to the CDK8 kinase. The large Med12 and Med13 proteins are required for specific developmental processes in Drosophila, zebrafish, and Caenorhabditis elegans but their biochemical functions are not understood [4]. 25.00 25.00 34.70 33.50 24.70 24.70 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.33 0.70 -5.79 18 237 2009-01-15 18:05:59 2008-09-09 14:16:19 3 6 144 0 154 218 0 328.70 28 18.84 CHANGED Ms..st.......................sthoNhhsl........sthssIha+ha..............s...s.hhp.hp+h.phc.LplhhRp.ss...................................tthtLWlF.hssp.p.ht...............ptts.pstlshpstoLhhpA.tsl.tpsLhcs.hsphspttsss.sssp+s......ssssp+........................................hushashFlousosslot.hl+ppssI.LsoRohlshspp..........................................................phh.sPhslssshou.sh..Los........................................................................................................................................................hpsplsusGcLsVuhpshshPulhsLss.s-sst.h..h..tsslhhuPouuhuphhusssssssssssssssssttsssps...............sshpphphtshphLpthGhs.sss.pspsWlplpshpshhsplssphhptsppsppss....shchlhWPtphCF .......................................................................................................................................splsuIpW+ha....t...................s..s..tsshh.u.h.cshps.phltshhRp.t........................................st+pLWlF....ah..s.p.-..ssh.t.ht.................pp..s.hp.st..lsh...ps.p.oL.lhcA.hts.lhp...+.s.Lhpc.shs+hGcahl..pP.hp..psc+.......hspSp+..................................................LSsuFsFFlHG-SsVCoSl-lppHpslhLlscpHlph..Apps............................................................tshpVhluPaGls.GsLTGpuh.chos....phhtc....................................................................................................................................................p....hc..ss..ssVcVlVu....u.s+hhaPuthlLl....stsD.s....................ssssuth.......u....t...h...s..spps..sus.psss.ps....s...hssss.s.t......................................p..ptpth.ph.s........t....tt...p...t.ts.....thh.hsht.C........................................................................................................ 0 30 56 106 +11430 PF11598 COMP Cartilage oligomeric matrix protein Pollington J anon pdb_1fbm Family This family of proteins represents the five-stranded coiled-coil domain of cartilage oligomeric matrix protein (COMP). This region has a binding site between two internal rings formed by Leu37 and Thr40 [1] 20.80 20.80 20.90 21.10 20.60 20.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.27 0.72 -3.89 12 203 2009-01-15 18:05:59 2008-09-09 14:26:53 3 22 60 15 94 168 0 44.50 45 5.26 CHANGED DhscpLl...spLpphsplltEL+-.l+p.......................Ql+EhshLRsTItcCpuCG ...........spthl...pplpphNplLsEl+-hlRp..................................................QVKEhoaLRNTIhECpuCG... 0 10 19 44 +11431 PF11599 AviRa RRNA methyltransferase AviRa Pollington J anon pdb_1o9g Family This family of proteins represents the methyltransferase AviRa from Streptomyces viridochromogenes. This protein mediates the resistance to the antibiotic avilamycin. AviRa methylates a specific guanine base within the peptidyl-transferase loop of the 23S ribosomal RNA [1]. 26.10 26.10 26.60 132.50 24.60 26.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.73 0.70 -5.35 2 11 2012-10-10 17:06:42 2008-09-09 14:39:40 3 1 11 2 6 13 1 242.50 39 98.23 CHANGED Mp.Y+a.scp.s.pDhAsGhVLapssG.PsFPVRLAhEIapRsLth..tcu.loLaDsCCGuuYhLolLGLL+ppoltplhuSDlsstslpLAscNLuLLo.uGlstRc.Eh+t..ppatKsSal-AhpuhcRlcEhLTtE.......I+Th.lFcspsLpss.hs.sPDlllTDlPYGphscWEt..sutslsthhpuLuushsucslIsV.hD+ppKI.ss.hptLER.hlGpRph.lh..+Au-lh ....p.Y+atosptsYpDhASG+VLasAsGhPuFPVRLASElhQRshphL.p...p...uPhTLaDPCCGuGYLLTVlGLLHscplpplluSDlsscsLplAt+NLuLLotpGLppRpcpL+chhcpasKPSah-AlcSAcRLcchlsts.ss.s.tshtpADlhcsps.tsh.stshss.cIVlTDLPYG-hosWpG...suss...PlsphLsultslL.P.spuVVsV.os+ucKlp.p.sch+tlc+lKlG+Rpssla..h......... 0 2 6 6 +11432 PF11600 CAF-1_p150 Chromatin assembly factor 1 complex p150 subunit, N-terminal Pollington J anon pdb_1s4z Family CAF-1_p150 is a polypeptide subunit of CAF-1, which functions in depositing newly synthesised and acetylated histones H3/H4 into chromatin during DNA replication and repair [1]. CAF-1_p150 includes the HP1 interaction site, the PEST, KER and ED interacting sites. CAF-1_p150 interacts directly with newly synthesised and acetylated histones through the acidic KER and ED domains. The PEST domain is associated with proteins that undergo rapid proteolysis [2]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.26 0.70 -4.96 13 305 2012-10-03 14:45:15 2008-09-09 14:49:06 3 48 200 0 206 304 4 187.60 19 26.44 CHANGED ssEucshEsss-s-.SVLSpSSh.SShSso.SSPEussss.chpss..........sSPhsuSTslc+hopKpl+pptEKpKh+hp+....ttth..............hctE+pE+ccl+...pEtKtttc+tK......EEt++cKcEEK-l+E+ER..+E..KKE+-EKEKAEK.RlKEE+RKE+p..Es.t.....AKhEEKRK....KEEEKRL+EEc.......cRIKAE..KAEITRFFQKPK.TspAPK.........TLAuuCGKFAPFEIKEsMlL ..........................................................................................................ts........................................................t...............................ps.....ps.....hp.p.h..p..pp.hp...t.t..t...cp.p..p..p...pc............t.tt.........................pp.pc+p.c.c.c.ct+..ccp..+p.p.c....c....p..pK.......cpcccc..+.c.c....c+.c...tcc....cE.+......cc.....c+p..ccE.c-+..tc.....+pc.....+cccc.cc.cp...............t+..cp.cpt.......+ccp....cphtpct.............................ptp.....t.....h.t.h.................................................................................................................................................... 0 66 109 166 +11433 PF11601 Shal-type Shal-type voltage-gated potassium channels Pollington J anon pdb_1s6c Family This family of proteins represents Shal-type voltage-gated potassium channels which interact with Kv channel-interacting proteins to modulate cell surface expression and function of Kv4 channels. The interaction of the Shal-type protein Kv4.2 and the Kv interacting protein KChiP1 forms a structure which is like the structure between calmodulin and its target peptides when they interact. Interactions of an N terminal alpha helix in Kv4.2 and a C terminal alpha helix in KChIP1 are essential for the modulation of Kv4.2 by KChIPs [1]. 20.50 20.50 22.60 22.10 20.10 19.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.43 0.72 -4.43 4 194 2009-01-15 18:05:59 2008-09-09 15:44:02 3 6 79 11 112 179 0 29.00 79 5.01 CHANGED uuVAAWLPFARAAAIGWhPlAspPLP..P ...AGVAAW.LPFARAAAIGWhPlAssPMP.AP... 0 19 28 59 +11434 PF11602 NTPase_P4 ATPase P4 of dsRNA bacteriophage phi-12 Pollington J anon pdb_1w44 Family P4 is a packaging motor which is involved in the packaging of phi-12 genome into preformed capsids using ATP. P4 is located at the vertices of the icosahedral capsid. ATP drives RNA translocation through cooperative conformational changes [1]. 20.40 20.40 20.90 28.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.02 0.70 -5.44 3 7 2009-01-15 18:05:59 2008-09-09 16:09:10 3 1 5 60 0 10 1 299.70 30 90.31 CHANGED s+LhDAKPsAl.+lApPAAlcTsussoah...AAVhE.Sl+DGGs.......LhAVGlc...PlVlDKDAtphlAsKtKSuDsESs..........uVshVsVpNssl+..........osPLLus+PVsu..usHRWsscshhSGlslVsGuTGSGKS.......csLNuK.cPDVTIRWGEPuEuYDp.DouVaVuDLsEMLhVslhLAsLsaRV.VIDSlRNLlFulsGAAouGGISsuhYuLLTDIuNlsAphGClVVAulNPMSsDDK.VELVassltASosGhllssDussVSQTlhRTGcGRIFsG.tPLsRsTHslphccPcHopLs-HTShuSsu+LESG....oVDsDDEN-SsPRRGA ..........................................................................................c.hDApshuh.hhut.sshpsss.ts.....uuVhc.u.cpGs........lhuls.......Vh.Kss..hht.t.huscs.ss..............V...sshhh..........ss.hluppPVs.....HRas.....SGlhslhGtssSGKo........tLstc.cs.shIRaGEshpuYsp.Dhsltlsslsc...sslhLu.shsapV.slDSlRsllhpltGsAouGGIstshauhLTDluslhsphsC.VVhslNPhsss-K....lE.hVatpshASssuhhlssDss...pshhRTscGhIhsu.tsLstssc..phphspphphh.+oo.susu.ltps....oV.ssDc..shsRRhs.................. 0 0 0 0 +11435 PF11603 Sir1 Regulatory protein Sir1 Pollington J anon pdb_1z1a Family Sir1p interacts with the BAH domain of the Orc1p subunit of the origin recognition complex (ORC) resulting in the establishment of silent chromatin at HMR and HML in S.cerevisiae [1]. The amino acids from the ORC interaction region of Sir1p are presented on a conserved, convex surface that forms a complementary interface with the Orc1 BAH domain, critical for transcriptional silencing [1]. 25.00 25.00 25.70 25.10 24.80 24.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.64 0.71 -3.99 2 55 2009-01-15 18:05:59 2008-09-09 16:23:57 3 3 21 4 21 61 0 126.40 31 27.48 CHANGED h.p....lssRhhVhDGaLlDhsccKPIN.+sPclhhLL.pDcphthlsQ.NLVcWschKK.pss..lslKuhpLFKahK.lh+cFhhhtDh+lhhh.hcupL+Yc.sc+hT..CpCs.l.Lsc.ss.Ylas. ......hh........lSsRasVl.DGaLlDhsc......+Ks.IN.c...sPclhp....hL.sDc-tcplsphsLlDWschK...c.s...pss..lslcuppLFchlcthh+cFhpstD.G+l..hlhsscsph+hc..scchT..p+Ch.lhLschsshYla................................ 0 2 12 19 +11436 PF11604 CusF_Ec Copper binding periplasmic protein CusF Pollington J anon pdb_1zeq Family CusF is a periplasmic protein involved in copper and silver resistance in Escherichia coil. CusF forms a five-stranded beta-barrel OB fold. Cu(I) binds to H36, M47 and M49 which are conserved residues in the protein [1]. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.30 0.72 -4.12 186 1350 2009-01-15 18:05:59 2008-09-09 16:47:19 3 19 804 6 364 993 141 68.20 33 32.48 CHANGED Gplppl-.tssplTlpHuslss..lsh.P.uMTMsFs.ls...sss..lsslpsGspVcFphpp.ssss.hs..lsplp..p .......GhVcslDhpspplTlpHsPIss.........ls...W..P.uMTM.sFs.ls...sss..thsplKsG-cVpFshhp.psst.hh..lpslp...s............................. 0 72 191 266 +11437 PF11605 Vps36_ESCRT-II Vacuolar protein sorting protein 36 Vps36 Pollington J anon pdb_2cay Family Vps36 is a subunit of ESCRT-II, a protein involved in driving protein sorting from endosomes to lysosomes. The GLUE domain of Vps36 allows for a tight interaction to occur between the protein and Vps28, a subunit of ESCRT-I. This interaction is critical for ubiquitinated cargo progression from early to late endosomes [1]. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.79 0.72 -4.17 22 246 2012-10-04 00:02:25 2008-09-09 17:02:47 3 7 224 4 174 261 0 86.50 33 17.27 CHANGED phssSspPlLppsEp.lhlpcsVGLYpGcp..KhhsppsG+laLTo+RlIYsDstcspp.ulsltLphlpps....chsuuFh.ppSsKIhlhLcp ..................t.hpshL....sEshlhhp..csVsL.Y...-Gct..........Kh.............p.........apsGplhLToHRllah.Dpp.p...s....p..p.........s..lulsLppltph.........Eht.uuhl..tpSsKIhlalc............................... 0 52 88 142 +11438 PF11606 AlcCBM31 Family 31 carbohydrate binding protein Pollington J anon pdb_2cov Family This family of proteins represents the family 31 carbohydrate-binding module of beta-1,2-xylanase. This protein is from Alcaligenes sp. strain XY-234. The AlcCBM31 module makes a beta-sandwich structure with an immunoglobulin fold and contains two intra-molecular disulfide bonds. AlcCBM31 shows affinity with only beta-1,3-xylan [1]. 25.00 25.00 35.20 108.70 19.20 16.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.18 0.72 -4.02 3 5 2009-01-15 18:05:59 2008-09-10 10:58:01 3 2 3 6 0 6 0 92.00 48 19.81 CHANGED EPPENCoDDFNFNYVSDsEIEVYHVDK.GWSAGWNYLCLDDYCLSGTKSNGAFoRoFSAsLGQTYKlTFKVEDhsGQGQQIlD+slTFTsQVCN ...psPEsCo-DFGaNYVSDoEIEVFHpDp.GWSAGWNYlCLDDYCLsGsKSNGuFsRoFsAsLGQTYKITFKVED..GpGQaIlDKslTFTNpsCs. 0 0 0 0 +11439 PF11607 DUF3247 Protein of unknown function (DUF3247) Pollington JE anon pdb_2e12 Family This family of proteins is the protein product of the gene XC5848 from Xanthomonas campestris. The protein has no known function however its structure has been determined. The protein adopts a Lsm fold however differences with the fold were observed at the N-terminal and internal regions [1]. 25.00 25.00 66.70 66.40 21.00 20.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.17 0.72 -3.77 7 26 2009-09-10 23:21:42 2008-09-10 13:05:27 3 1 26 2 6 28 0 98.30 64 94.18 CHANGED MPKYAPHVYoEQsQIATLEtWVsLLDGQERVRIELDDGSMIuGTVAVRPolQTYhDEpccEGsNGQLRLDpLDASQEPQWIWMDRIVAVHPhPLGA.PpVMP ...MsKaAP+VYoEQsQIATLEpWVsLLDGQERVRIELDDGSMIuGTVAVRPolQTYhD-pcsEGlNGQLRLDpLDASQEPQWIWMDRIVAVHPL.LGAsPpVMP...... 0 1 3 4 +11440 PF11608 Limkain-b1 Limkain b1 Pollington J anon pdb_2diu Family This family of proteins represents Limkain b1, which is a novel human autoantigen, localised to a subset of ABCD3 and PXF marked peroxisomes. Limkain b1 may be a relatively common target of human autoantibodies reactive to cytoplasmic vesicle-like structures [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.92 0.72 -4.23 5 78 2012-10-02 20:46:34 2008-09-10 13:06:52 3 15 49 1 42 105 2 86.10 62 6.32 CHANGED CpSLLaVaNLPsN+D...uKAIuNRLRRLSDNCGGKVLuISGsSAlLRFuNQEuAERApKRMENEDVFGNRIoVSaoPt.R-ss-scschp.ss .........................CHTLLYVYNLPsN+D...uKSlSNRLRRL....SDN.C..GG.KV.L.....S....I......o...G..s.S...AILRFlNQ-SAERAQKRMENEDVFGNRIhVSF.o.P+s+Ehs-scsp....shs............ 1 12 14 25 +11441 PF11609 DUF3248 Protein of unknown function (DUF3248) Pollington JE anon pdb_2e6x Family This family of proteins is thought to be the product of the gene TT1592 from Thermus thermophilus however this cannot be confirmed. Currently there is no known function. 20.20 20.20 20.20 73.70 18.20 17.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.83 0.72 -4.27 2 19 2009-01-15 18:05:59 2008-09-10 13:13:55 3 1 19 4 13 17 0 63.20 63 84.51 CHANGED L-tLGtpLVWRhG+sE.pDslVVRlGhASATPRFtcLPRLhshs-AEhpchsQpGRlhlEWV- L-tLGpHLVWRIG+uEsE-l...LVVRVGLASATPRFtcLPRLhNlsDAElccLlpcGRVRVEWVE. 0 3 8 13 +11442 PF11610 Ste5 Scaffold protein Ste5-Fus5 binding region Pollington J anon pdb_2f49 Family This family of proteins represents the Fus5 binding region of Ste5. Ste5 functions in the yeast mating pathway and is required for signalling through the mating response MAPK pathway. Ste5 has separate binding sites for each member of the MAPK cascade. This region of Ste5 allosterically activates autophosphroylation of Fus3, a mitogen-activated protein kinase. Auto-activated Fus3 has a negative regulatory role, and promotes Ste5 phosphorylation which leads to a decrease in pathway transcriptional output [1]. 20.60 20.60 21.60 68.50 19.20 18.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.31 0.72 -4.42 2 10 2009-01-15 18:05:59 2008-09-10 13:47:16 3 1 9 0 1 10 0 30.00 98 3.34 CHANGED TPVERQTIYSQAPSLNPNLILAAPPKERNQ TPVERQTIYSQAPSLNPNLILAAPPKERNQ 0 1 1 1 +11443 PF11611 DUF4352 TRF2; Domain of unknown function (DUF4352) Pollington J anon pdb_3cfu Family Members of these family are poutative lipoproteins that fall into the Antigen MPT63/MPB63 (immunoprotective extracellular protein) superfamily. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.52 0.71 -3.98 49 1093 2012-10-03 03:07:29 2008-09-10 15:00:56 3 22 747 2 203 850 26 123.50 19 50.75 CHANGED hplu-ssp.h.sshplsVsss.....ths.s.sphhp............pssspalllslolcN.puccshshss..pFpLh.cspupphc.hshsspptt......s................h.hsslsPGppssGplsFclspssp....hpLpa.....ssh.asst......tl ...............................................................spssp.......s.s.h.cl.....olsss..........hs.s.sp..h.t..ps.....................pstspa...lhls..lsl.....c.N...puccsl..s...h....s...s...t...pa.pLh....s...sc.sp...p...a..c...hs.ssst.t......t................hhhpplsPspshpG.p.......l.sF.-l..sp...ssp.....hplph....ssh.......t.................................................................. 0 69 147 184 +11444 PF11612 T2SJ GspJ; Type II secretion system (T2SS), protein J Pollington J, Desvaux M anon pdb_3ci0 Family The T2SJ proteins are pseudopilins, which are targeted to the membrane in E. Coli. T2SJ forms a complex with T2SI (Pfam:PF02501) and T2SK (Pfam:PF03934) which is part of the Type II secretion apparatus involved in the translocation of proteins across the outer membrane in E.coli. The T2SK-I-J complex has quasihelical characteristics [1]. 20.40 20.40 20.40 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -11.27 0.71 -4.52 87 934 2009-09-11 07:38:19 2008-09-10 16:15:32 3 4 804 11 192 656 242 157.00 26 74.61 CHANGED c-tspppspclppLQRuhshl-pDlpQh..................................ss....RssRst...Gt..sshhhtssp.................ltFsRsGWtNP.th............PRusLp+VsY..pl............p........ssp..LpRhhashhDs.stst.tPthp...slLss.........Vpshplca..h..........sss..............pWpcp...Wss.............sptLPpulclplphpsh....G.plpRhahlssts ..............................................................................................p..hsttcspclstlQpshshlppDlpQh..................................hs............R.sRss....ut..sthh.htss...h...t...............hltFsRtuhhNP..h..............sRusL.p.VsYcl.................c......................ssp.......Lp.R.hsa.s..hsDs....sss....pPthp............sLlst..................Vcuhplpa.....a.................................sss.........................pWppp.....Wss...........................sptlPh.AVclsLphpph....G.plpRhahl.t.s................................................ 1 54 108 151 +11445 PF11613 UCN2 Agonist of corticotropin releasing factor R2, Urocortin-2 Pollington J anon pdb_2rmg Family This family of proteins represents urocortin 2, a member of the corticoliberin family which is a selective agnonist of corticotropin releasing factor 2. The backbone of the protein is mainly alpha-helical but it contains a helix-loop-helix motif [1]. 22.00 22.00 40.90 40.50 20.70 20.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.74 0.72 -4.00 4 62 2009-01-15 18:05:59 2008-09-10 16:49:57 3 1 35 5 36 68 0 37.80 66 26.12 CHANGED lsLSLDVPhslhplLh-.A+AKshRsQAAsNA+lLA+l hTLSLDVPTNIMNILFslAKAKNlRApAAANA+LhAQI. 0 2 3 10 +11446 PF11614 FixG_C Bre5; IG-like fold at C-terminal of FixG, putative oxidoreductase Pollington J, Coggill P anon pdb_2r39 Domain This domain is part of a transmembrane protein, FixG, itself part of the FixGHIS operon closely associated with the FixNOPQ operon that is the symbiotically essential cbb3-type haem-copper oxidase complex. FixG expression is induced by oxygen-deprivation. This C-terminal domain adopts an E-set Ig-like fold. 25.60 25.60 25.60 25.90 25.50 25.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.69 0.71 -3.99 345 1156 2012-10-03 16:25:20 2008-09-11 09:45:12 3 18 966 1 346 998 117 120.90 25 25.60 CHANGED hlhslspRsslclsVl+DRssLah.phscGp....IcNsYsl+lhNpsppspp.aplslp...G.............shpl...............s...........pplplsssps......tphslhlps..sspt...........ht...pssp.slphpl....p........t........s.stthp.sp.....pcopFhsP .............................hhhslspRpshtlsVl+DRs.hhh..pp..ssGp....IcNsYsl+lhNpsppspp.aplslp..G..hs............slpl.............................t.....sppslpl.tsuchhplsVhlps..s.pt............hp...pssp...slphplp.......p.....s.ssphs.hp......pcopFlsP................................... 0 95 216 281 +11447 PF11615 DUF3249 Protein of unknown function (DUF3249) Pollington J anon pdb_2pqr Family This family of proteins represents the gene product of the protein CAF4, the yeast protein YKR036c. This protein contains seven WD40 repeats in its C terminus. The function however is unknown [1]. 20.70 20.70 20.70 151.10 19.50 17.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -8.99 0.72 -4.23 2 13 2009-09-13 21:11:59 2008-09-11 10:25:08 3 1 13 2 4 10 0 60.00 100 9.25 CHANGED QKGQVGIFSFQNNYADSATTFRILAHLDEQRYPLPNGAAEKNLPSLFEGFKATVSIIQQR QKGQVGIFSFQNNYADSATTFRILAHLDEQRYPLPNGAAEKNLPSLFEGFKATVSIIQQR 1 1 1 1 +11448 PF11616 EZH2_WD-Binding WD repeat binding protein EZH2 Pollington J anon pdb_2qxv Family This family of proteins represents Enhancer of zest homolog 2, (EZH2) a 30 residue peptide which binds to a WD-repeat domain of EED by residues 39-68. EED is a component of PRC2 complex which is involved in gene expression [1]. This interaction is required for the HMTase activity of PCR2 [1]. 25.00 25.00 32.00 30.80 18.50 17.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.14 0.72 -7.19 0.72 -4.08 6 144 2009-01-15 18:05:59 2008-09-11 10:59:13 3 3 38 1 55 146 0 29.80 68 4.73 CHANGED KuhFsSNRQKI.E+T-ILNpEWKphRIQPl ...KoMFsoNRQKIhERT-ILNQEWKpRRIQPV. 0 2 6 21 +11449 PF11617 Cu-binding_MopE Protein metal binding site Pollington J anon pdb_2vov Family This family of proteins represents a unique protein copper binding site that involves a tryptophan metabolite, kynurenine in the protein MopE. The production of kyneurenin by modification of tryptophan and its involvement in copper binding is an innate property of MopE [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.57 0.72 -4.17 36 222 2009-01-15 18:05:59 2008-09-11 11:40:36 3 34 44 0 161 228 684 29.10 42 8.14 CHANGED DCsDsssslpPGA..sElC.DulDNsCDGtsD-s ...........DCsD...tsstlhPuA....sE....lC.s..G.l.DssCsGthDp....... 0 78 87 128 +11450 PF11618 DUF3250 Protein of unknown function (DUF3250) Pollinton J anon pdb_2yrb Family This family of proteins represents a protein with unknown function. It may be the C2 domain from KIAA1005 however this cannot be confirmed. 27.70 27.70 28.70 36.30 27.60 27.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.27 0.72 -3.85 11 173 2009-01-15 18:05:59 2008-09-11 13:07:17 3 8 74 1 82 129 0 107.00 44 9.07 CHANGED TFCThsFaDFETpoTPlspGhpshYsFTopYsVphDsLhlpYLposulhl-LHpsh.upE..acTlAuutlshctll.....-psu.+ltuosshsususp..shGsl-Yhh+lphP .............TFCTYuFYDFEhpsTPllp.G.pPtY.sFTSQYlVcscsLFLpYlQ...............psol.pLElHQAh..us-...acTlAAsplphcclL...................Epss........+.lauos...sL..s.Gsp......G-.....saGslEYWhRL+hP.................................... 0 23 31 51 +11451 PF11619 P53_C Transcription factor P53 - C terminal domain Pollington J anon pdb_2rp4 Family This family of proteins is the C terminal domain of the transcription factor P53. While the rest of the protein is quite conserved between the different transcription factors such as p53 and p73, the C terminal domain is highly divergent. The DM-p53 structure is characterized by an additional N-terminal beta-strand and a C-terminal helix [1]. 25.00 25.00 56.50 55.20 21.80 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.46 0.72 -4.23 2 16 2009-09-11 05:01:43 2008-09-11 13:16:24 3 1 12 4 7 17 0 70.20 66 16.58 CHANGED DDSAAEWNVSRTPDGDYRLAITCPNKEWLLQSIEGMIKEAAAEVLRNPNQENLRRHANKLLSLKKRAYELP ........A-WsVSRT.DGDYRLuITCPpKEWLLQSIEGMIKEAAAEVLRNPNp.-NLR+HAN+LLSLKKpAaELP...... 0 1 1 4 +11452 PF11620 GABP-alpha GA-binding protein alpha chain Pollington J anon pdb_2juo Family This family of proteins represents the transcription factor GABP alpha. This alpha domain is a five-stranded beta-sheet crossed by a distorted helix termed an OST domain. The surface of the GABP alpha OST domain contains two clusters of negatively-charged residues suggesting there are positively-charged partner proteins. The OST domain binds to the CH1 and CH3 domains of the co-activator histone acetyltransferase CBP/p300, a direct link between GABP and transcriptional machinery has been made [1]. 21.90 21.90 21.90 34.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.92 0.72 -4.00 5 88 2009-01-15 18:05:59 2008-09-11 13:47:53 3 5 69 1 55 81 0 86.80 60 19.93 CHANGED s--lIVQcIDI+EPIusLKKLLEsRLplS.LctY-IaLQDhQ.L-PD+SLFDQGVKs-GoVQLSVQVQTt.s.ccKLNIlEIVKPs--Vcs .......s.s-hlsQsIDINEPlGNLKKLLEPRLQsS.LDua-ICLQDIQ.L-P-+SLFDQGVKTDGoVQLSVQlh.oht.................G..-.KLNILEIVKPsEsVc....... 0 14 18 35 +11453 PF11621 Sbi-IV C3 binding domain 4 of IgG-bind protein SBI Pollington J anon pdb_2jvg Family This family of proteins represents Sbi domain IV which binds the central complement protein C3. Sbi-IV interacts with Sbi-III to induce a consumption of complement via alternative pathway activation [1]. When not interacting with Sbi-III, Sbi-IV inhibits the alternative pathway without complement consumption. The structure of Sbi-IV consists of a three-helix bundle fold [1]. 25.00 25.00 34.20 34.10 19.60 19.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.36 0.72 -3.82 2 163 2009-01-15 18:05:59 2008-09-11 14:00:45 3 2 160 4 2 45 0 68.00 96 16.01 CHANGED VSIEKAIVRHDERVKSANDAISKLNEKDSIENRRLAQREVNKAPMDVpcHLQKQLDALVAQKDAEKKVA ...........VSIEKAIVRHDERVKSANDAISKLNEKDSIENRRLAQREVNKAPMDVKEHLQKQLDALVAQKDAEKKVA.. 0 1 1 2 +11454 PF11622 DUF3251 Protein of unknown function (DUF3251) Pollington JE anon pdb_2jwy Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. Some members if this family are annotated as putative lipoprotein YajI however this cannot be confirmed. 19.90 19.90 20.00 22.50 19.50 19.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.77 0.71 -4.84 7 846 2009-01-15 18:05:59 2008-09-11 14:18:11 3 1 485 1 36 261 2 165.70 41 85.84 CHANGED CA.QpEl.ph+pslSpLNQchTpLsppssKlpQQNtLNAcSspGVYLLPuApTsuRL-SplGpLphSLtsIpstAsGopssL+Ips.SscsLPAFsApVEaGQlpuTotshtpsssQsQ.hsAPsSlLssS-ssIsLpLsGloP-QLGFlRlHslQP......lsp .............................................................................................Qh+ppluTLppphTp.......lpppTspI...Tpps+.L......s......tcppps.la....L.P...utpp.hhLp.upIGsLthpllsIsPsssGoslpLcl.s..ssPls.shuhsspaGphpsTpsph.th.hQ...spL...h....N....ushplLsso.sDIsLpLKGlSPspLGal+Ip................shp.................................... 1 1 6 20 +11455 PF11623 DUF3252 Protein of unknown function (DUF3252) Pollington J anon pdb_2jz2 Family This family of proteins has no known function. Some members are annotated as Ssl0352 however this cannot be confirmed. Currently there is no known function. 20.80 20.80 21.10 69.80 19.80 20.40 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.56 0.72 -4.56 16 94 2009-01-15 18:05:59 2008-09-11 14:25:39 3 1 88 3 43 88 112 52.90 62 50.51 CHANGED hILPGssV+VpNssshYatapGhVQRloDGKAAVLFEGGNWDKLlTFpLsELE ...ILPGssVpVpNssssYatYpGhVQRloDGKAAVLFEG.GNWDKLlTFcLsELE. 0 7 28 39 +11456 PF11624 M157 MHC class I-like protein M157 Pollington J anon pdb_2nyk Family This family of proteins represents M157,a divergent form of MHC class I-like proteins which is the protein product of the mouse cytomegalovirus. This protein is unique in its ability to engage both activating (Ly49H) and inhibitory (Ly49I) natural killer cell receptors. M157 is involved in intra- and intermolecular interacts within and between its domains to form a compact MHC-like molecule [1]. 20.60 20.60 21.10 21.20 20.20 20.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.79 0.70 -5.27 5 132 2009-01-15 18:05:59 2008-09-11 14:50:58 3 1 6 1 0 116 0 237.50 23 63.71 CHANGED TVTRoGaHKRTISVsNGRPVVVWDsGDKNPKICKICPAVoSINsEYlFLDIQKMRLsNLLuQuLh-sQRICVRYsCLFL..+FDV.......lCDVYHTTD+VRlTYp+QTsKINIQGSGTFshSDAKshGlaMLpsNVcEIKp+WRsTVQpLKQLGYMNcTEVEFWYNT...TGLTTCVVTSRSNsPFTVELSLNTNSSAIVT.....EESTVDsQsVTVKAPGSasQRCYVTSSLGWKGVVT..PPSQYRTK......RsPVN.ISSSKhTGI .........................................................................................h.......................................s.so-h...ssEhhFhtpQppaLpp.hhphl.tt.stt.........l.slcYpCpah....hhsh........tCpV..hhsss....psp..l.....hhh...........hs....sp.t..s.......ss..ps..........s.s........s...............h........s....s......scuhulphLpspsppIppRWpssspclhphup.scs-hpFhhsh......pGhspCslpSpuslhaplpl...s..s...s..s..s....s..tshss........shssh..hps.ss....sps.s.s..s..t....s....tChlpSohG.Wpsslp.........................................t............................. 1 0 0 0 +11457 PF11625 DUF3253 Protein of unknown function (DUF3253) Pollington J anon pdb_2ns0 Family This bacterial family of proteins has no known function. 20.60 20.60 21.10 22.00 20.00 20.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.83 0.72 -4.10 28 140 2009-01-15 18:05:59 2008-09-11 14:57:13 3 4 137 1 72 146 38 83.60 37 82.99 CHANGED hsssspplctsILcLlupRusspolCPSElARAl.......us-s..WRthMssVRpsAtcLsptGclplppKGc....sV.DPssh.+GPlRlth..ts ...............................................h....ttplctsIlpLlspRus....spolCPSElARAl......................................us-s....WRsLMtslRcsAhcLspsGclpIhQ+Gc......sV...DPssh....RGslRlph...tt......... 0 26 49 61 +11458 PF11626 Rap1_C Rap1_C; TRF2IP; Yippee-Rap1; TRF2-interacting telomeric protein/Rap1 - C terminal domain Pollington J anon pdb_3cz6 Family This family of proteins represents the C-terminal domain of the protein Rap-1, which plays a distinct role in silencing at the silent mating-type loci and telomeres [1]. The Rap-1 C terminus adopts an all-helical fold. Rap1 carries out its function by recruiting the Sir3 and Sir4 proteins to chromatin via its C terminal domain [1]. Rap1 is otherwise known as TRF2-interacting protein, as it is one of the six subunit components of the Shelterin complex. Shelterin protects telomere ends from attack by DNA-repair mechanisms [2,3,4,5]. 20.10 20.10 20.10 20.20 19.90 19.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.93 0.72 -4.10 81 175 2012-10-03 10:46:08 2008-09-11 15:24:49 3 12 150 7 118 164 1 88.60 26 15.59 CHANGED lpp.hpphGh...spphls.pALtsoot-hthstp.llph.........hpt......sps.....................P.....sh.GlWTpcDDphLpss...................................sspshcp.............LhpKH...G.ppplptRhcFLpt ..............................................h....hpphsh...ststls.pALhpsSu-hthssphlhsh.................hpt..........sps.................................h..........P..sh.GlWTccDDpsLpts...................................-pcshcp.............LlcK+...G.spplptRhcFlp.t................................ 0 25 52 88 +11459 PF11627 HnRNPA1 Nuclear factor hnRNPA1 Pollington J anon pdb_2h4m Family This family of proteins represents hnRNPA1, a nuclear factor that binds to Pol II transcripts. The family of hnRNP proteins are involved in numerous RNA-related activities [1]. 25.00 25.00 25.00 25.00 24.40 24.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.23 0.72 -3.79 8 294 2009-01-15 18:05:59 2008-09-11 16:01:28 3 6 38 0 124 141 0 36.70 76 13.49 CHANGED GssYNDFGNYsuQ.pSNYGPMKuGsaGGh.R..sSG.PYGGG ............GGuY.N.....DFG.N.YNN....Q.SSNFGPMKG.GN.FGG..R......SSG.PYGGG............. 0 1 3 7 +11460 PF11628 TCR_zetazeta T-cell surface glycoprotein CD3 zeta chain Pollington J anon pdb_2hac Family The incorporation of the zetazeta signalling module requires one basic TCR alpha and two zetazeta aspartic acid TM residues [1]. The structure of the zetazeta(TM) dimer consists of a left-handed coiled coil with polar contacts. Two aspartic acids are critical for zetazeta dimerisation and assembly with TCR [1]. 25.00 25.00 30.20 42.30 19.50 18.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.46 0.72 -4.49 43 108 2009-01-15 18:05:59 2008-09-11 16:17:06 3 4 42 2 36 124 0 32.80 62 22.10 CHANGED DPKLCYlLDGILFlYGlIlTuLaLRhKFp+uts .DP+LCYlLDGILFlYGlIlTALalRhKhpcut.... 0 2 4 7 +11461 PF11629 Mst1_SARAH C terminal SARAH domain of Mst1 Pollington J anon pdb_2jo8 Family This family of proteins represents the C terminal SARAH domain of Mst1. SARAH controls apoptosis and cell cycle arrest via the Ras, RASSF, MST pathway. The Mst1 SARAH domain interacts with Rassf1 and Rassf5 by forming a heterodimer which mediates the apoptosis process [1]. 22.70 22.70 22.70 35.10 22.50 22.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.58 0.72 -4.14 11 152 2009-01-15 18:05:59 2008-09-11 16:32:35 3 4 93 2 86 143 0 48.80 64 9.94 CHANGED -a-FLKshoh-ELppRLtSLDspMEpEIEEl+pRYpuKRQPILDAI-AK ....Da-FLKsholE-LQpRLtuLDPMMEpEIEElRQ+YpuKRQPILDAh-AK.. 0 25 30 54 +11462 PF11630 DUF3254 Protein of unknown function (DUF3254) Pollington J anon pdb_2job Family This family of proteins is most likely a family of anti-lipopolysaccharide factor proteins however this cannot be confirmed. 25.00 25.00 26.50 45.80 21.50 17.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.16 0.72 -4.04 10 69 2009-01-15 18:05:59 2008-09-11 16:40:43 3 1 21 1 0 84 0 94.50 46 83.19 CHANGED CpAQuhpsL.lsullsKlssL..Wcsucl-hLGHpCpapVcPsl++acLYa+G+MWCPGWT...sIpGEu+TRSRSGslp+ul+DFVpKAhppGLITEE-ApsWLs ..............CpAQ.apsl.suulssK.ls.sL..W+ssps-hLGHpCpashpPplKRapLYa+G+hWCPGWT...sIcGc.upT.RS+S.Gsstcss+DFVpKAhppGLITpp-Ap.WLp. 0 0 0 0 +11463 PF11631 DUF3255 Protein of unknown function (DUF3255) Pollington J anon pdb_2joz Family Members in this family of proteins are annotated as YxeF however no function is currently known. The family appears to be restricted to Bacillus. 25.00 25.00 32.10 223.80 18.90 18.50 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.64 0.71 -4.50 2 18 2009-01-15 18:05:59 2008-09-11 16:44:32 3 1 18 1 3 17 0 121.40 77 86.84 CHANGED lMloGCpQpK.-EsPFYYGTWDtGhtPGPhDGV+StTVTFTcDpVlpppVhpGRGEVthP..sYKVISQsTDGoIEIpYLG..aPlKSTLKRGcNsTLIWc..GppKTMTRI..KTGtE-tcEK lMloGCQQpK.-ETPFYYGTWDtGLpPGPMDGV+StTVTFTKDcVlpppVIcGRGEVphP..AYKVISQsTDGoIEIpYLG..aPlKSTLKRGcNsTLIWc..GpTKTMTRI..KTGtE-tcEK.. 0 1 1 1 +11464 PF11632 LcnG-beta Lactococcin G-beta Pollington J anon pdb_2jpk Family This family of proteins is LcnG-beta, which with LcnG-alpha constitute the two-peptide bacteriocin lactococcin G (LcnG). This family of proteins represents the N terminal domain which has an alpha-helical structure and is amphiphilic. Both peptides have a GxxxG motif which they use for interaction through a helix-helix structure [1]. 19.90 19.90 78.50 78.00 18.90 17.00 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.68 0.72 -4.21 2 5 2009-01-15 18:05:59 2008-09-11 16:57:19 3 1 3 2 0 5 0 35.00 70 63.87 CHANGED t.htWLsWlpPAh-FlpGhuKGhhKEGNKsKWKNl KKWGWLAWV-PAYEFlKGFGKGAIKEGNKDKWKNI 0 0 0 0 +11465 PF11633 SUD-M Nsp3; Single-stranded poly(A) binding domain Pollington J anon pdb_2jwi Domain This family of proteins represents Nsp3c, the product of ORF1a in group 2 coronavirus. The domain exhibits a macrodomain fold containing the nsp3 residues 528 to 648, with a flexibly extended N-terminal tail from residues 513 to 527 and a C-terminal flexible tail of residues 649 to 651. SUD-M(527-651) binds single-stranded poly(A); the contact area with this RNA on the protein surface, and the electrophoretic mobility shift assays confirm that SUD-M has higher affinity for purine bases than for pyrimidine bases. 22.40 22.40 23.10 83.20 21.70 22.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.65 0.71 -4.91 6 195 2009-01-15 18:05:59 2008-09-15 09:39:56 3 10 84 11 0 175 1 142.70 83 2.41 CHANGED ssp.lhss.ussssssllsslshuapuMlpauKcpGhLhPlshDYsAhsKlLKR....hcspEGlhTs-GhcFYhYSpccPLp-V.pALNuhs+sllhhPFGalspGhsLA.SAssMRsLTVPasVllsScssVPlYpuYhsus .........S.AFYVLPSEAPNAKEEILGTVSWNLREMLAHAEETRKLMPICMDVRAIMATIQRKYKGIKIQEGIVDY.GVRFFFYTSKEPVASIITKLNSLNEPLVTMPIGYVTHGFNLEEAARCMRSLKAPAVVSVSSPDAVTTY.NGYLTSS.. 0 0 0 0 +11466 PF11634 IPI_T4 Nuclease inhibitor from bacteriophage T4 Pollington J anon pdb_2jub Family This family of proteins represents IPI from bacteriophage T4. This protein is a nuclease inhibitor which is injected by T4 to protect its DNA from gmrS/gmrD CT of pathogenic Escherichia coli into the infected host [1]. The structure of this protein consists of two small beta-sheets flanked by N and C termini by alpha-helices. The protein has a gmrS/gmrD hydrophobic binding site [1]. 25.00 25.00 25.30 93.50 23.90 18.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.57 0.72 -4.10 2 6 2009-09-11 14:10:27 2008-09-15 09:51:27 3 1 5 1 0 5 0 75.70 85 79.37 CHANGED ATLTSEVIKANKGREGKPMISLVDGEElKGTVYLGDGWSAKKDGATIVISPAEETALFKAKHISsApLKIIAKsLL ATLTSEVIKANKGREGKPMISLVDGEEIKGTVYLGDGWSAKKDGATIVISPAEETALFKAKHISAAHLKIIAKNLL 0 0 0 0 +11467 PF11635 Med16 Mediator complex subunit 16 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function.\ Med16 is one of the subunits of the Tail portion of the Mediator complex and is required for lipopolysaccharide gene-expression [4]. Several members including the human protein, Swiss:Q9Y2X0, have one or more WD40 domains on them, Pfam:PF00400. 25.00 25.00 25.60 25.10 22.40 24.80 hmmbuild -o /dev/null HMM SEED 753 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -13.32 0.70 -6.57 18 264 2009-01-15 18:05:59 2008-09-15 10:38:09 3 7 176 0 170 275 0 534.40 21 81.27 CHANGED pphts..scsssssIluhpWLssp+.slhhpptshhs........................tt.sssshappplp.h.PhGshHPsssKtuhlslp+sGtlc.laYQ........K.ssplsshp.p..............spphlsaAuIs.ssssp.llls....saSthspplphY+lplsW.............s.....sptsp....hcs........p...........PSlplpchps.h...csh...s.............................p.lhpLo+Lcllstss.......ts.susp...I..Llsass.s..............t.sSllpRapLspps............phlpshFtpls.ppsss....t..p.h.phchhsclhhpppltsIssthhsh...............................................................hlshhacDGoIphhcpsohplhsss................scssshloshhssGFpa.........Ppls.......................htlSPohsullhhctpGp........th.....hthhpsshss.pp.t...h....susshAhta..shuhphshuuDDlslllphpl..lpplsccp...........tppahssllsthhshhsh.......shDt.scthlDKlls.ppsl.KshsLQhtLG..pht......sscsss.c.....luhllLpL+shsh........h...h..hs.....................hsc.-hlhSLlssh+Wll-hhsYlhppLh.l.........h.p.t.............p.s..polshhlhhuphsRthlhphh.ptlppl....pthltth........phaslh.spus...........p.....hh...h....phh..sss.Plshsha.....EpaLsclsshhpt.......................................................................t.pttsp.thE.pLllpuplPsp......................ltthsphllsps.......sssltschchsslahhDhshLplupsccsp..hhth....ttt.lh....................hDslpph.lthp................................................................tsplRRCsRCs..slotsspss........ps.......tshtt...Whhha....hRpClCGGhWh.p .....................................................h..........t..hth.ah...t.t................................................s.hh..ht...s.hs.as...h.uhlhlp.sG.lt.h.h................................p.p..ph.p.......................thlthushs..tts......hlhh....s.s....tp.....lphh+l.lphs...................p..............................s..sh.h..h.................................................tlsplphl............t...t.....l...lh...hhp................................tohl.pap.ltpt...............lp.hh.phs.........................hph.t.h...t..l.tlt...hs................................................................hlhhhhpDuslphhpt.shp......................p....h.p..tsuhpa.....t...........................hS.s.h.hh.hp.ptp...............h.hht.........p..............................hthhh.......hthh........s.-.h....hhh.............p.hs.p................h.tphht.h.p.hph.................-h.........tl.p.....l....+s.h.lt...t...t.............tp.hs...phshlh.lpl.+p..h...............................................c....hl..l.t.hpWhhchhhhhhtpl..........................................................tp.....h....s.t..h...h...lt..h........th.h...............................t.t.................................................h.........h.....ps....ht.h......phL.th....................................................................................................................................................................t....-..h...ts..s........................h......hh.....................................h..............l.t....t.....................................................hD.hpp..h..............................................................................................................................................h+tCs.RCs....s...............................hh....h..h........C........h............................................. 0 32 68 132 +11468 PF11636 Troponin-I_N Troponin I residues 1-32 Pollington J anon pdb_2jpw Family This family of proteins represents the cardiac N-extension of troponin I. This region of the protein (1-32) interacts with the N-lobe of cTnC and modulates myofilament calcium(2) sensitivity [1]. 25.00 25.00 26.70 25.50 18.30 17.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.93 0.72 -4.26 5 46 2009-01-15 18:05:59 2008-09-15 10:53:55 3 2 30 1 15 41 0 32.20 66 15.79 CHANGED EA..EEEEsVsc.PKP.....PPsssPP.lRRKSSANYRSYAT ..................sc....pp-sstps.PAP.....uP......lRRRSSANYRAYAT. 0 1 1 3 +11469 PF11637 UvsW ATP-dependant DNA helicase UvsW Pollington J anon pdb_2jpn Family This family of proteins represents the DNA helicase UvsW from bacteriophage T4. The protein is a member of the monomeric SF2 helicase superfamily and shows structural homology to the eukaryotic SF2 helicase Rad54. UvsW is thought to have a role in recombination and the rescue of stalled replication forks [1]. 25.00 25.00 71.20 70.40 19.10 18.00 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.62 0.72 -4.29 7 32 2009-01-15 18:05:59 2008-09-15 11:21:53 3 2 31 1 0 24 0 54.20 63 54.58 CHANGED ltuCpTl-GLpELEKYYsKRhKEl..-lpsoDDISlRDAluG+RpEhEsps-stcE IuSCQTlEGL-ELEtYYcKRsKEs..ELKDoDDISlRDALAGKRsELEspD--pEE... 0 0 0 0 +11470 PF11638 DnaA_N DnaA N-terminal domain Pollington J, Bateman A anon pdb_2jmp & Jackhmmer:B3CS04 Domain This family of proteins represents the N-terminal domain of DnaA, a protein involved in the initiation of bacterial chromosomal replication. The structure of this domain is known [1]. It is also found in three copies in some proteins such as Swiss:B5V0X4. The exact function of this domain is uncertain but it has been suggested to play a role in oligomerisation. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.12 0.72 -4.40 116 3333 2012-10-02 15:09:17 2008-09-15 11:28:02 3 9 3077 2 743 2291 1830 62.70 28 14.06 CHANGED sshWp.pltppLcppluppp..asoWlpslph.th.sss...............lhltsPspFhtsWlppp..........YhcpIpphhpp ...............plWppsLp.pLp..c..c..ls..tsp...........assWl+.s.lps...pl.pcss...............lhlhuP....Np.Fs..h.-Wlcs+..................YhshIpphh..t....................... 0 255 491 630 +11471 PF11639 HapK REDY-like protein HapK Pollington J anon pdb_2jdj Family This family of proteins represents HapK, a protein of unknown function, with two homologues PigK and RedY. The monomer structure of the protein contains a four-stranded anti parallel beta-sheet, three alpha-helices and a short C terminal tail which it uses for dimer formation [1]. The surface of HapK has a deep cavity with consists of a kinked helix and a beta-four strand. HapK could be involved in prodigiosin biosynthesis, specifically the binding of a bipyrrole intermediate such as HBM or MBM [1]. 25.00 25.00 30.20 32.60 21.90 21.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.38 0.72 -3.40 4 20 2009-01-15 18:05:59 2008-09-15 11:51:59 3 2 19 2 9 20 14 102.90 37 68.24 CHANGED McsIlHKIRLhDlucssAFEsWVpssDYATCPcLPSVRuFDVaRVSspt-APFHYlElIplou.ctFc+DMpTssFtsLVpsFspMAEVVEEluGEpLusGYpA .....MpsIlH+I+L+DusssssFEpWVcsoDYssCspLsSVpuFpVh+l.S....s...s.s-....A.......P.aH...YlElIcVsuhctFs.p-MpTpsFpcLsscFsphA-VVpEhsGc.ltsGYt............... 0 6 7 7 +11472 PF11640 TAN Telomere-length maintenance and DNA damage repair Wood V, Coggill P anon Pfam-B_6865 (release 23.0) Domain ATM is a large protein kinase, in humans, critical for responding to DNA double-strand breaks (DSBs). Tel1, the orthologue from budding yeast, also regulates responses to DSBs. Tel1 is important for maintaining viability and for phosphorylation of the DNA damage signal transducer kinase Rad53 (an orthologue of mammalian CHK2). In addition to functioning in the response to DSBs, numerous findings indicate that Tel1/ATM regulates telomeres. The overall domain structure of Tel1/ATM is shared by proteins of the phosphatidylinositol 3-kinase (PI3K)-related kinase (PIKK) family, but this family carries a unique and functionally important TAN sequence motif, near its N-terminal, LxxxKxxE/DRxxxL. which is conserved specifically in the Tel1/ATM subclass of the PIKKs. The TAN motif is essential for both telomere length maintenance and Tel1 action in response to DNA damage [1]. It is classified as an EC:2.7.11.1. 21.40 21.40 21.40 23.20 21.30 20.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.79 0.71 -4.49 24 182 2009-01-15 18:05:59 2008-09-15 12:47:21 3 6 154 0 117 188 0 151.90 24 6.10 CHANGED Msshth....t.hhshLpSsKl+-Rsculpp.....Lppllpss...................pplssKshctlhcuLhphlppE+pta.........ppsssttuss...tpRLspsupslRhhlcpulc............ph+h+shpsllspIhchhhsss......tslhpslshDas+sL.sslLphssahccLs.tppWpp ..............................p..l...shptLpss+hp-Rpctl-c..................hcpllppspt...................pphppt..sa..cslFc.sLh+hlppEppsh.....................htpsssssust..ttp+lpchuuhlRhhl....cpusc............+htphpsptLlsalh-slhsss........hhtshut.DasphL.pslLshphahpclu.pppW..p................................... 0 24 52 89 +11473 PF11641 Antigen_Bd37 Glycosylphosphatidylinositol-anchored merozoite surface protein Pollington J anon pdb_2jo7 Family This family of proteins represents the core region of Bd37, a surface antigen of B.divergens which is GPI-anchored at the surface of the merozoite. The structure of the protein consists of mainly alpha folds and has three sub domains [1]. 25.00 25.00 180.60 180.40 20.70 20.20 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.60 0.70 -5.06 4 14 2009-01-15 18:05:59 2008-09-15 13:13:05 3 1 2 1 0 15 0 205.90 60 73.07 CHANGED VKoLE....slRpELKGQREsFLScIIcSsGshTILQLVsaLRVlDTDLLLpVDusKV-cAGtKVKAYL-pIGI+GpsVEEuLDNLMhKVh.clT+GTVEuSsQuTD.SEELKsLLLKFSEDLKAEQEhHu-Kctoc.-LLp...sLpsp+DELlKKFsthsPoFLToEDISuFLTVP-YGsPhsuuKWKpVE+KIpD...KLESSDlsT..-LKoLlAcLIEQREpMMDLLYGPhGca ..VKTL-....-LRpEL+uQREphLSpIIcSDGPFTILQLlsaLRlIDTDLLLKVDpshVccAGcKVKsYLEpIGIsG-SVEtuLDpLMhKVY.clT+GTVcSsspuTD.SE-LpoLLLKFSEDL+AEQEhHupKctuc.cLlp...ohtpp+-ELlKKFsslsPTFLTsEDlSuaLsVP-YGhPhsuschKpVEthIpt...KLEoS-l.s..pL+sLlAclI..REphMDLLYG..Gc.. 0 0 0 0 +11474 PF11642 Blo-t-5 Mite allergen Blo t 5 Pollington J anon pdb_2jmh Family This family of proteins is Blo t 5, an allergen protein from Blomia tropicalis mites. This protein shoes strong reactivity with IgE in asthmatic and rhinitis patients. The structure of the protein contains three alpha helices which form a coiled-coil [1]. 20.70 20.70 20.70 21.70 20.30 18.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.66 0.71 -4.34 7 41 2009-01-15 18:05:59 2008-09-15 13:22:55 3 2 9 8 0 46 1 113.40 44 85.15 CHANGED hststhsspD-aRpEFDRLLlpphpc+hcclE+tLhpLScQlpELEKoKSK-lKtpILRElolu.sFlcGApGahERELKRTDLNhhE+aNFEtALuTuplLhKDLppLtK+VKulcoc .........s.....spcDc.aRNEFD+LLhpphpcphcchEctLLtLocQls-LEcoKoKEhK-pIL.RElsls.shIcGu+sahpRELKRTDLNlhE+aNaEtAlsosclLhKDLcchpp+VKslc.... 1 0 0 0 +11476 PF11644 DUF3256 Protein of unknown function (DUF3256) Pollington J anon pdb_2p3p Family This family of proteins with unknown function appears to be restricted to Bacteroidales. 25.00 25.00 40.40 39.40 20.80 19.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.07 0.71 -4.81 9 96 2009-01-15 18:05:59 2008-09-15 14:03:42 3 1 96 2 13 81 0 195.30 36 92.44 CHANGED hhsshulpAQEhKTlFlsMPDSlsPLLopVNRcDhI....DFlpSpMKApVcNpFGtpSEhpcLosDYlplQhospSohpMKlLsLsDoTpVlCsVoTVCuPACDSpl+FYTTDWK.LssssFl.olPshsDFlt.ssDoss.Ysap-ApppADM.LMKscLSc-sspLohThTTP-YhuKEstEKLcPFLR+.PlVYpWcsG+Fp .......h....hulpAQ-h+slFlsMPDSlsPhLo+ssRpDhl....Dalp.uc.MKApVcNpFGspS..chpcLos..DYlplQhospSohpMK.lLslssosp........lICsVsTssu..PssDSclcFYos-Wc.Lssspah...shPshssF..lt.....ssDohs..cap-hppphDhhLhpscLSt-sspLohohoTPpYhucEstE+lcsaL+c.PllYpWpsspF.................. 0 5 11 13 +11477 PF11645 PDDEXK_5 DUF3257; PD-(D/E)XK endonuclease Pollington J anon pdb_2ost Family This family of endonucleases includes a group I intron-encoded endonuclease [1]. This family belongs to the PD-(D/E)XK superfamily [2-3]. 21.50 21.50 21.50 21.60 21.20 21.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.92 0.71 -4.52 2 21 2012-10-11 20:44:46 2008-09-15 14:40:57 3 1 20 4 4 13 7 133.70 33 94.51 CHANGED STKLKGDIAEQAAILRALKLGWGVLKPLGDRLSYDLVFDVEGILLKVQVKSSWFSEKTGNYVVDNRRTRTNRRNIVRSPYRGNDFDFAVAYVEELELFYVFPVDVFISYGSEIHLVETDKRQRKPRSFGYREAWHLILQKGAAQKETSA .........p.TK.KGDlA.ttAhlchlchGasVLpPhuD+.sYDLVh.............-.t-GlLh+VQVKouhhsppsGhh.VcpRsoh.oscpslsppphpsp-hD.ahlsYstph-.hYhFssclF.............................................h...h..pt............................. 0 2 4 4 +11478 PF11646 DUF3258 Protein of unknown function DUF3258 Pollington J anon pdb_2oq8 Family This viral family are possible phage integrase proteins however this cannot be confirmed. 25.00 25.00 27.20 70.10 19.70 17.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.31 0.72 -3.88 4 7 2009-01-15 18:05:59 2008-09-15 14:47:15 3 2 7 4 2 11 0 101.90 39 21.51 CHANGED P..hhuclYhchL.tph.K.sLpshpppDYARa.llWstls.DttI--..........ah.+DIuh.lDpsapLsK.hphcP.Yp+huWsEc.NhslsEDDlhhscolt+Y .......hScVYh-FLpaKlpKtsLssKhppDYARhallWphlscDtsI-p..........Yps+DIGhFIDpCapLPK.hNhtP.YsKMuhsEclshDVsE-DhlsPKoVp+Y 0 0 1 1 +11479 PF11647 PMT_C C-terminal region of Pasteurella multocida toxin residues 569-1285 Pollington J anon pdb_2ebf Family This family represents the C terminal region of Pasteurella multocida toxin (PMT) which displays a Trojan horse-like shape with three domains, C1, C2 and C3. The C3 domain possesses the Cys-His-Asp catalytic triad. PMT is an enzyme toxin carrying the cysteine protease-like catalytic triad which functions on the cytoplasmic face of the plasma membrane of target cells [1]. 25.00 25.00 26.40 25.10 23.20 21.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.00 0.72 -4.36 14 184 2009-01-15 18:05:59 2008-09-15 15:09:40 3 27 97 6 23 185 1 66.70 40 3.18 CHANGED osppLhcp..ups.GhsIGESapplhuchtllcsh..L....tpppVpsha.....l-chls-p.sushssh .........oVsELl-s..ApVsGKlhGESYQpVlDtLs.hH....suhh.....t.-sVcpLh.pLp+pIEuYlhsHPsSGRspA...... 0 1 5 16 +11480 PF11648 RIG-I_C-RD C-terminal domain of RIG-I Pollington J anon pdb_2qfb Family This family of proteins represents the regulatory domain RD of RIG-I, a protein which initiates a signalling cascade that provides essential antiviral protection for the host. The RD domain binds viral RNA, activating the RIG-I ATPase by RNA-dependant dimerisation. The structure of RD contains a zinc-binding domain and is thought to confer ligand specificity [1]. 25.00 25.00 26.20 25.00 24.60 24.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.25 14 227 2009-01-15 18:05:59 2008-09-15 15:56:30 3 13 67 45 119 217 0 117.10 32 14.61 CHANGED cphpLLCtsCpshsshusDlRpVEsoHaVslsssFp.chapspppPhs.tKhac-ap.supIpCtp..CucsWGhthhYKusp.LPsLKIcuhl....hEs.stsphphpKWpclsFphspF.Dhs-hstts ..............hpLLCp..pCpshsCpusDI+h.l.EpsH+Vsl.sss.Fp..phah.spt.p...p..s.t+pat..-ap..hs..upIhCpp.....Cup.sWGhhhha+s..hc...L..P..sLKI+sFV..h.hps....sts+p.ph.+KWpcls..hph.s.h.Dh.phs..t................................ 0 33 40 72 +11481 PF11649 T4_neck-protein Virus neck protein Pollington J, Finn RD anon PRODOM Family This family of protein represents gene product 14, a major component of the neck in T4-like viruses along with gene product 13. Gene product 14 is rich is beta-sheets. The formation of the neck to the head of the bacteriophage is crucial for the tail attachment [1]. 25.00 25.00 26.90 26.50 18.40 17.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.39 0.70 -5.09 12 78 2009-01-15 18:05:59 2008-09-16 09:11:18 3 2 77 0 0 70 1199 236.50 35 80.71 CHANGED M......................s.psphhNPYl..........NhppasspQsLt-sLVsESIphpGl-hYYlPREaVshD.lhsEDhpoKFp+uaphtAYlsoaEGYpGpssahSKFGhplsDEhThsls.chFppps........DGp..........cPppGDLIYF.....Ph...sNsLFEIsaVEs.cPFYQhGcshhhclpsppFhYSuE-l...pPt.....lpc.-sl..-ph.t.slt.lhslsGhsDhsltp....htcssphpscstchhpph.slsscu...................osFssh ..............................................tpp.hssYl..........Nh..stapspQpLh-sLVsESIphhGh-lYYlPRphVshDhlauE-.psKFspA..aphtAYlssaEGapGttshhSKFG.hplpDElThsls.phFccpl............ssp................cP.pEGDLIYF.....Ph...sspLFEIsaVE.....s.cP.....FYQhG+shlhclpsphF.YS.sE-l...sss.....lpc.-sI......p....ph.t.slp.lhslsGhs-hshsp....htpssphpscstp.h.....l.sts........................s............................................... 0 0 0 0 +11482 PF11650 P22_Tail-4 P22 tail accessory factor Pollington J, Finn RD anon PRODOM Family This tail accessory factor of the P22 virus is also referred to as gene product 4 (Gp4). The proteins structure consists of 60% alpha helices. Gp4 is the first tail accessory factor to be added to newly DNA-filled capsids during P22-morphogenesis. In solution, the protein acts as a monomer and has low structural stability. The interaction of gp4 with the portal protein involves the binding of two non-equivalent sets of six gp4 proteins [1]. Gp4 acts as a structural adaptor for gp10 and gp26, the other tail accessory factors [1]. 19.40 19.40 19.40 22.70 18.60 18.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.79 0.71 -4.68 6 137 2009-01-15 18:05:59 2008-09-16 09:28:17 3 1 135 24 8 65 4 158.50 51 97.54 CHANGED MsplhTKGDlVLhALRKhGlASNATLTDVEPQShEDGlNDLEsMMAEWh....upGI-lGYtFAssEps..PsPDD-oGLssphpsAVuapLhlRlhsDYulEPTs+lluoActuY-sLhhshlplPulcRRs..........DMPsGpGNpassasss+YYsc+t.....hssDss......ss .......................M.plhTKGDlVLtALRKhGlASNATLTDVEPQShEDuVNDLEsMMAEWh....upGIsl.GYhFAssDps..PhPsD-pGLsspAhsAVhapLAsRlssDYuLEsospllosAchuh-tL....hKlsuhc.Rtp..........cMPsGSGNphssast.+Ya.tct......stD.sss............ 1 3 3 5 +11483 PF11651 P22_CoatProtein P22 coat protein - gene protein 5 Pollington J anon PRODOM Family This family of proteins represents gene product 5 from bacteriophage P22. This protein is involved in the formation of the pro-capsid shells in the bacteriophage. In total, there are 415 molecules of the coat protein which are arranged in an icosahedral shell [1]. 23.70 23.70 24.30 23.80 23.50 23.60 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -11.94 0.70 -5.87 13 302 2012-10-03 06:22:39 2008-09-16 10:33:18 3 5 281 0 45 236 443 327.50 22 91.19 CHANGED MANshLT..hslIscEslcslpspsshupslc+tYsspF....suhphGsolslhhPsphpststsshs.........sspslsEtpVslslsp.csVshphoucEhtLcls-hpcRhl+PAhppLAspl-uslht.............shtppsoNhVup...ssush..s.hsshtsAsstLspttlPps.sRpshhsPpspschuss.sshhtsu.ptspsAaccusl.sphuGhDhhhspslhstssGu.usuhTVsGusphss...................Tlslsuhss..........lptGDhhohAGVhtlp.lTKpsh...sp.ppFpVst.....usuusslpIhPshlssssss.sp....atsVssssAsssslohlsssssshh.NLhapcsAhsLsssPL.hPhtuuspshssoa....cGlSlRlhsthDtpscsspsRlDlLYGs.sslcP-h.ultlss .........................................................................................t....h.hhpthl.th..t.hs.hsp..t..lp+.htsst......s.p.tssssl..hh.Pst.ps.....p....tp....shshss......ssssl.ptps.s.sphsp.cps.hphps..-....l..p.....t....p.hh.p...phl....p....sutptLA....ssl-ptl.u...p.......................hhhp.t.u..u..h.s.l..s.................s.....s.....s..sst.ps.......as.l.upstphlpstslsps..sphhlh..sPtshtp..ht.ts.st.h.ths..s...phsppA..h.c.supI..spl..uGhchhho.....sshh.....h.pts...u.s..s.hhlpst.p.t....................hlslssss.........hthGp.hphss..................................................................................................hht...h.hh......s...........................sh.h...................thth..u.................................................................. 0 14 29 36 +11484 PF11652 DUF3259 Protein of unknown function (DUF3259) Pollington J anon PRODOM Family This eukaryotic family of proteins has no known function. 25.00 25.00 34.20 48.80 21.00 18.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.89 0.72 -3.83 4 92 2009-01-15 18:05:59 2008-09-16 13:06:44 3 1 42 0 59 80 0 82.20 64 46.83 CHANGED MplQDpQLAtQLhRLRu-Ip+LKl-QsC+hH+EMLsDATaELEEps-.uDL.CDlPhuuuhuLSTPLKhlGlTKMNINoRRFSLC ....MphQDpQLARQLhRLRu-Ip...+LKlEQsC+LH+cMLsDAoaEL.EEp....sE........uDL..hC-h...Phs...u.uhuL..SsPL+hIGlTKMNINSRRFSLC. 0 6 12 28 +11485 PF11653 VirionAssem_T7 Bacteriophage T7 virion assembly protein Pollington J anon PRODOM Family This family of proteins represents the gene product 7.3 from T7 bacteriophage. The protein is localised to the tail and is thought to be important in virion assembly. Particles assembled in the absence of the protein fail to adsorb to cells [1]. 25.00 25.00 26.00 25.60 20.40 19.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.30 0.72 -3.23 5 48 2009-09-11 12:45:27 2008-09-16 13:30:53 3 1 31 0 0 44 0 92.00 51 99.01 CHANGED MG..KKIKKAVKKVsK.VpKl....s+.sttAuGGhLG..GGcstsp.V.......EAssP.As....AQlVEVP.cDcV-oED-uQTESu+KKARAuGKKuLSVARSSGGGINI .......................................K.lpKshKpVsK................ht.st.pssGGhhG....GGpscsphs.............psPQAA.....AQlV....-VP.cc-VssED.-AQTESG+KKARAGGKKSLSVARSSGGGINI............. 0 0 0 0 +11486 PF11654 DUF2665 Protein of unknown function (DUF2665) Pollington J, Finn RD anon PRODOM Family Some members in this family are annotated as Non classical export proteins, however no specific function is known. 21.90 21.90 23.10 22.40 21.30 20.80 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -8.15 0.72 -4.39 8 66 2009-01-15 18:05:59 2008-09-16 13:38:40 3 3 66 0 48 65 0 46.30 35 57.78 CHANGED YLISRslDPlLAlulGssSYYLaEp..Rs..GRPpGHoLNELlt++ascps ....YLISRhhDPlhAlhlGssAaalhpp..Rp....t+.pG+.plpp...ll..h.h................................. 0 16 28 44 +11487 PF11655 DUF2589 Protein of unknown function (DUF2589) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 21.00 21.00 22.30 21.90 20.90 20.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.98 0.71 -4.12 34 199 2009-01-15 18:05:59 2008-09-16 14:19:00 3 3 97 0 60 181 27 153.20 21 77.55 CHANGED shssLIsG.hpAsspAQthluppthchlpphh.............pssp.............hshphphslssppt.............php.lplPLlsllshssLtlcclslcFshclss..spsppsppstshph......psp.t................sspppssp......psssssphplplphpppssPEGlt+ll-hhsps ....................shppllsu.htAsspAQthhup.pthshlpphhh............psst..................hhhthphphslsspp.........................p.t.lplPLlsllshssL.tl-cssl..sFphcl..ss.spsppspsshphph......................thp.thh................hthpsphuspppssp..........psspssphp..lplphptpt.PEult+ll-hhsp............................................ 0 9 25 47 +11488 PF11656 DUF3811 ComReg_Spx; YjbD family (DUF3811) Pollington J, Finn RD anon PRODOM Family This is a family of proteobacteria proteins of unknown function. This family is unrelated to Pfam:PF03960 which contains a set of transcription factors that are also named YjbD. 21.50 21.50 48.00 47.90 21.40 21.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.98 0.72 -3.92 6 505 2009-01-15 18:05:59 2008-09-16 14:21:29 3 1 502 0 25 113 1 86.90 83 97.82 CHANGED M.+LT.KDMTESEQRElKTLLD+ARhstGRsLTNuEsN+IK-EhI-KlMspREthAKtAR.......tE+KppphpP.opsTFSW..SAss.usRu+R ......LPRITQKEMTEREQRELKTLLDRARIAHGRsLTNSETNSIKKEYIDKLMsEREAEAKKAR.......QLKK..KQAYKPDsEASFSW..SANT.STRGRR......... 0 1 6 14 +11489 PF11657 Activator-TraM Transcriptional activator TraM Pollington J, Finn RD anon PRODOM Family TraM is required for quorum dependence. It binds to and in-activates TraR which controls the replication of the tumour-inducing virulence plasmid. TraM interacts in a two-step process with DNA-TraR to form a large, stable anti-activation complex [1,2]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.66 0.71 -4.71 11 77 2009-01-15 18:05:59 2008-09-16 14:26:03 3 1 62 0 11 52 8 141.40 40 96.89 CHANGED MsDcI-ElI+-IAsKHGIAVuRDDPILlLQTINsRLMpDSp+AQQEhL-pFKEELEuIApRWGsDAKsKAERlLNAALuASKEAMs+shp-uspuuAcAl+pEl-suLuc.lsu.lpcuR+luhhNllAuuhslhAAslslhshL ........sDcl-ElIcEIAsKHGIAluRDDPILlLQTINp+LhpDo....tpAQ...p-lL.-pFKpELEuIupRWu..-D....AKsKAE+hLNAALs......ASK-A...Mscs.hp...-us....ptuuc..ul+c...E...l.-sshsp.l..ts...lccu..R+luh...hNhlAushslhAuslslas...................................................................... 1 2 5 10 +11490 PF11658 DUF3260 Protein of unknown function (DUF3260) Pollington J anon Pfam-B_003054 (release 23.0) Family Some members in this family of proteins are annotated as YhjU however this cannot be confirmed. Currently this family has no known function. 24.80 24.80 38.30 38.30 24.70 24.70 hmmbuild -o /dev/null HMM SEED 518 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.78 0.70 -6.36 23 576 2009-01-15 18:05:59 2008-09-16 14:50:18 3 1 549 0 49 313 9 502.60 71 95.59 CHANGED hWNlYFllKFuLaatGhlsFpsLhNLsFsshLll..Pls...pptl+hlRphlAlslulsLhaaDoaLPPhsRLhuQsupltsFShuYLlELluRFlshphlluhhllllsYhhlupWlRlosaV.........lsuLlhlslhshsssshshhsusss..........................sstspssstsssssssshsshLssFappEupRpssFss..sssupsFDlLVLsICSLuWDDL-sstlpsHPLhs+FDllFcNFNSATSYSGPAAIRLLRASCGQpuHssLYpPAs.pQCaLFsNLApLGFpspllhNHsGcFDsFLp.l+cp.GshpsPlhspsulssshpuFDGSPIacDhsVLspWhp.pR.psssstsAhaYNoIoLHDGNRlssu.th.sohsSYthRtp+Lh-DlspFhcpLcpSGR+lhVllVPEHGAAL+GD+hQluGLREIPoPsIsHVPVGlKllGh..tssppGsslpIspPoSYLAlSpLlu+llssssFpsss.shtpLspsLPpTphVuENssslVMphsu+.Yl+LsG.usW .................G.WNFYFLVKFGLLWAGYLNFHPLLNLVFAAFLLM..PIP...+YpLHRLRHWIAlPlGFALFWHDTWLPGPpSIMSQGoQVAuFSosYLlDLlsRFINWQMIGAIFVLLVAWLFLSQWIRlTVFV.........VAILlWLNVLTLuGPuFoLWP.....AGQPTsTVTTTG............GNAAATVAssGssPV.lGD.hPAQTAPPTTANLNAWLNsFYsAEuKRKoTFPusLPu.DAQPF-LLVINICSLSWSDlEAAGLMSHPLWSHFDI.FKNFNSATSYSGPAAIRLLRASCGQsSHTsLYQPAs.N-CYLFDNLuKLGFTQHLMMsHNGpFGGFLKEVREN.GGMQoELMs.QosLPVhLLuFDGSPVYDDhAVLNRWLs.spE.+-p.....NSRSATFYNTLPLHDGNHaPGV....SKTADYKsRAQKhFDELDAFFTELEKSGRKVMVVVVPEHGGALKGDRMQVSGLRDIPSPSITcVPVGVKFFGM..KAPHpGAPIlI-QPSSaLAIS-LVVRllDGKlFTE.D..s.......VsWpKLTSsLPQTAPVSENuNAVVIQYQsKPYVRLNG.GDW.................... 0 6 14 32 +11491 PF11659 DUF3261 Protein of unknown function (DUF3261) Pollington J anon Pfam-B_003077 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 26.00 25.70 23.40 22.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.91 0.71 -4.63 32 248 2009-01-15 18:05:59 2008-09-16 14:52:44 3 1 246 0 60 186 11 157.50 39 79.73 CHANGED lhsL.lLsuCup.p..pps.s.....lsls.ssplsLssssths............................................hshsQhlohphssp....p.......+pLhspLcl-sp.clsLsGhushGptLhsLsYDspslpsppt..hLsssl...scplLusl.LshWPhpuhpstLs...GhpLtsps............ppRp.lhspssph.............llcIcYpp ...............................................h.hsL.hLsGCSp..p..ppps.....pshLpsss..plsLPsPuhsss...........................................lstQQLlousasuc.....o.........poLLVhL..psDsp..+lsLAGLSulGhRLFhlsYD.u.pslcsEQu..llsspL.PPApQVLADlMLoaWPhuAWpspLs..sGWpLpDsu....................spRp.LhsssGpl.............ls-IpY..t............................................ 0 9 19 40 +11492 PF11660 DUF3262 Protein of unknown function (DUF3262) Pollinton J anon Pfam-B_003096 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 23.50 23.50 23.50 23.50 21.90 23.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.76 0.72 -3.88 24 210 2009-01-15 18:05:59 2008-09-16 15:02:28 3 2 173 0 64 172 6 74.10 33 90.38 CHANGED MouAQtsAFpAu.SG.lsPsthsslhlGhllulLhLWusWAlhssYpGWusp.plsptshhtsslRhslLLlVlsaahh ......MssuQhuAFpAu..oG..hsPushshlhlGhlhulLLLWusWAhhosYpGWusp..plsptphhthslRhlhLhllhsFhhh....... 0 4 30 52 +11493 PF11661 DUF2986 Protein of unknown function (DUF2986) Pollington J, Finn RD anon Pfam-B_003109 (release 23.0) Family This family of proteins has no known function. 21.30 21.30 23.70 49.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.27 0.72 -3.63 42 221 2009-01-15 18:05:59 2008-09-16 15:04:49 3 1 191 0 59 167 4 44.50 59 68.34 CHANGED MNRKKKIsphLpp+sK+sNAKlps..ss...KP+YISKAER....tKltupsp .MNRKKKIsQhLpp+tKKtsAKLcs...SN.....KP..+YISKAER....AKLsAcpt....... 0 6 14 39 +11494 PF11662 DUF3263 Protein of unknown function (DUF3263) Pollington J anon Pfam-B_003189 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 22.30 22.30 22.30 22.40 21.80 22.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.33 0.72 -4.52 18 368 2009-01-15 18:05:59 2008-09-16 15:51:34 3 1 342 0 112 259 77 76.40 49 76.45 CHANGED ssLocR-ppILcFERQWW+asGA..KEpAIR-pFulSuTRYYQlLNsLIDsPtALttDPMLV+RLRRLRssRp+sRuAR ......s.tLop+-+sILsFEcpWa+.......hu.Gu...KEpAIR....-phGlSusRYYQhLNtLlDcscAlstc..PhLVpRLRRlRu...pRp+uRuAp..................... 0 39 89 107 +11495 PF11663 Toxin_YhaV Toxin with endonuclease activity YhaV Pollington J, Finn RD anon Pfam-B_003231 (release 23.0) Family YhaV causes reversible bacteriostasis and is part of a toxin-antitoxin system in Escherichia coli along with PrlF. The toxicity of YhaV is counteracted by PrlF by the formation of a tight complex which binds to the promoter of the prlF-yhaV operon. In vitro, YhaV also has endonuclease activity [1]. 22.80 22.80 23.20 51.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.86 0.71 -4.44 17 374 2009-01-15 18:05:59 2008-09-16 16:06:56 3 1 357 0 48 161 27 138.30 65 91.19 CHANGED plaAHshFl-pL-tLhspVEph+t+DPpsYtp+sssKhLAAlh+Lhh-sIPtDPs+spaRQGsTLGs.c++HWhRA..KFhp.paRLFFRa..copu.KlIlhAWVNDEsThRsYGu+sDAYsVFp+MLspGpPP-DWssLLptupsp .........................A.LYAHPCFQETYDALVAEVEsLKGK.DPENYQRKAATKLLAVVHKVIEEHITVNPSSPAFRHGKSLGS...GKNKDWSRVKFGA..GRYRLFFR.Y..SE.KE..KVIILGWMNDENTLRTYGKKTDAYTVFSKMLKRGHPPADWEoLTpETEEs........ 0 12 30 42 +11497 PF11665 DUF3265 Protein of unknown function (DUF3265) Pollington J anon Pfam-B_001494 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Vibrio. 20.60 20.60 20.80 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.72 0.72 -6.86 0.72 -4.10 16 241 2009-01-15 18:05:59 2008-09-16 16:28:22 3 5 69 0 28 244 0 26.70 45 50.19 CHANGED +sAhHFhaAlsFsscssstphulushsP .+sAWHFaYALshVhKlVCuuhGIAhlTP........ 0 0 0 15 +11498 PF11666 DUF2933 Protein of unknown function (DUF2933) Pollington J, Finn RD anon Pfam-B_002197 (release 23.0) Family This bacterial family of proteins has no known function. 20.30 20.30 20.30 20.30 20.20 20.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.08 0.72 -4.43 40 286 2009-01-15 18:05:59 2008-09-16 16:47:17 3 2 243 0 91 187 35 55.20 43 68.32 CHANGED hlshhslAuaaLlsEHpAHl...hGhL...PaLLL.LACPLMHlFM.HGGHGsHupccsssps.s ..........................lshhulAuahLlpEHhuHl....hGhh...PaL..LL.LlCPL....MH...L....Fh...H...G.GH.GsHpccsspsp.p.............. 0 15 45 74 +11499 PF11667 DUF3267 Protein of unknown function (DUF3267) Pollington J anon Pfam-B_002418 (release 23.0) Family This family of proteins has no known function. 27.10 27.10 27.30 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.50 0.72 -3.86 44 858 2009-01-15 18:05:59 2008-09-16 16:53:05 3 1 553 0 83 438 6 109.10 28 60.12 CHANGED lslllhl.hlHEhlHhlhahhhttp..pht.hthphthhhhash.sspsls+ppahlhhlsPhlllohlhhhlhhh....h..htsshhhhlhs....hpsuhsssDhhhhhhllpp..spsshlp ....................h.hhllhh.hlHEhIHhlhFhhapcs....Ksp.hth.phchl...h.hats.ssp..shsKhpFp.IhhlhPh..llloshhlhLhhh.................hhh...p..hhh.F.lhu....lpsuhshsDlhhltlllpp..spsphl............... 0 30 56 74 +11500 PF11668 Gp_UL130 HCMV glycoprotein pUL130 Pollington J anon Pfam-B_002736 (release 23.0) Family This family of proteins represents pUL130 from Human cytomegalovirus, a glycoprotein secreted from infected cells that is incorporated into the virion envelope as a Golgi-matured form. The protein promotes endothelial cell infection through a producer cell modification of the virion [1]. 25.00 25.00 54.70 54.30 21.20 20.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.03 0.71 -4.71 4 61 2009-01-15 18:05:59 2008-09-17 09:14:16 3 1 11 0 0 55 0 141.30 64 69.22 CHANGED aCP.haPSPPRshu.FouYcphssGPECsNpTLYlLaNRpGQpLIERPSsWVpKlsaYLSGRspsVFQ+Fs+sAoosoc.slpIos-DtKlFGAHMlPKpoKhLRalVpDGTchphC.M+lpTWA+shpsY.luFQsRlphTpAspps.ohCT+PNLlV ............YCPFlYPSPPRSP.QFSGFQpVsTGPECRNETLYLLYNREGQTLVERSSTWVKKVIWYLSGRNQTILQRMP+TASKPSDGNVQI.SVEDAKIF.GA....HM..VPKQTKLLRFVVNDGTRYQMCVMKLESWAHVF...RDYSVSFQVRLTFTEANNQ...T...YTFCTHPNLIV............... 0 0 0 0 +11501 PF11669 WBP-1 WW domain-binding protein 1 Pollington J anon Pfam-B_003402 (release 23.0) Family This family of proteins represents WBP-1, a ligand of the WW domain of Yes-associated protein. This protein has a proline-rich domain. WBP-1 does not bind to the SH3 domain [1]. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.04 0.72 -10.85 0.72 -3.94 9 225 2009-01-15 18:05:59 2008-09-17 10:46:17 3 5 71 0 128 194 0 96.00 42 32.88 CHANGED YhC.-oGaCCGEotCC........................oYYYELWWFWLVWsllIlLuCCCsh+HRRsKpRL.....QpQQRQ+EINLlAYptApp......os.sh.hRhLssahLPsYEEVss+PsTPPPPYo .........YhC..-s.GaCCG..p.o.....tCC.....................sY..YYELW.W.F.......W.......L...lWsl..l...I..l...L...S.CCCsh....+HR...Rs.KhRL.......QpQQRQ+EINL.hAYptApph........ss..h.....h......ph.hssah.PsYE-Vspp..P....s..oPPP.PYo........................................................ 0 21 33 76 +11502 PF11670 MSP1a Major surface protein 1a (MSP1a) Pollington J, Finn RD anon Pfam-B_001532 (release 23.0) Family MSP1a is part of the A.marginale major surface protein 1 (MSP1) complex and exists as a heterodimer with MSP1b. The complex has adhesive functions in bovine erythrocytes invasion [1]. 21.30 21.30 21.30 23.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.44 0.71 -4.38 3 306 2009-09-10 23:09:43 2008-09-17 11:25:55 3 4 4 0 1 364 0 102.80 79 77.43 CHANGED MS.EYVSSQPADSSSAGGQQQESSVSSQSDQASTSSQLGsDSSSAuGQQQESSV.SQSDQASTSSQLGsDSSSAuGQQQESSV.SQSsQASTSSQLGTDWRQEh+SsVASVEYhLAARALISVGVYAAQupIApSpGCASLSVAEVEEI ............................sDSSSAuGQQQESSV.SQSsQASTSSQLG..sDSSSAuGQQQESSV.SQSsQASTSSQLG..sDSSSASGQQQESSV.SQSsQASTSSQLGs.D............................................................ 0 0 0 1 +11503 PF11671 Apis_Csd Complementary sex determiner protein Pollington J anon Pfam-B_4678 (release 23.0) Family This family of proteins represents the complementary sex determiner in the honeybee. In the honeybee, the mechanism of sex determination depends on the csd gene which produces an SR-type protein. Males are homozygous while females are homozygous for the csd gene. Heterozygosity generates an active protein which initiates female development [2]. 25.00 25.00 28.30 27.90 22.10 22.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.21 0.71 -4.40 5 496 2009-01-15 18:05:59 2008-09-17 11:44:59 3 6 18 0 8 464 0 126.60 68 61.76 CHANGED TSRKRYSRSREREQKSYKNENSYREYRETSRERSRsRptRERSRE+KIISS.ss..NNYNYNNNYNN.N..pYNKh...........YYNINYIEQ..IPVPVYYGNFPPRPIMVRPWVsMQEQVPRFRYIGPLTPFPPRFIPPNtYR.RPPLNPRFRPMY ............TSRKRYSRSREREQKSYKNENSYRc.YRETS+ERSRDR.pERERS+E.+.....I....SShs.......s.......p..h....N...................N........h...p..N....s.....Y......Np.....p............p.ph................YY.N..IN.Y...IEQ......IP..V....P....l...YYG.NFPPcPh..tPWls.M.QcQlP...R.FR.a...IGP..T.sF.PRFIPP.NhYR..RP.P.LNPR..FtPh......... 0 2 2 8 +11504 PF11672 DUF3268 Protein of unknown function (DUF3268) Pollington J anon Pfam-B_4693 (release 23.0) Family This family of proteins has no known function. 24.70 24.70 26.00 25.70 23.80 24.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.39 0.72 -3.94 13 115 2009-01-15 18:05:59 2008-09-17 12:50:20 3 2 104 0 24 97 7 101.90 37 68.03 CHANGED tPlpCPYCuupspLsssphlYG.+phsshPhl.....YlC..ss...CcAYVGsHP......tTshPLGTLADtshRptRppAHcsF-pIWc..sppho.RocAYtWLApcLsl.s.ccsHlG .......hCsYCGu.tuhL.sp.u....sth.Ys.hp.s..p...s..h.....alCss...CsAalGsHs.......ps..c..hPLGpLAsspLRpt+ppsH.tsh-slhp...............ss...thp...RscuYpWL...A...ppLGl.sht.sHhG.............................. 0 4 10 18 +11505 PF11673 DUF3269 Protein of unknown function (DUF3269) Pollington J anon Pfam-B_4706 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 25.70 25.40 24.50 24.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.40 0.72 -3.54 3 181 2009-01-15 18:05:59 2008-09-17 13:34:48 3 1 153 0 5 62 0 72.80 80 98.93 CHANGED Ms.hEKYYLYRsDGTE-IKVtKcKDNVNsV+oLTGAHFS-EcKhMTDS-LK+FKAsHGLLYEEELGLQuTIFDI ....MP.KEKYYLYREDGTEDIKVIKYKDNsNEVYSLTGAHFSDE.KKIMTDSDLKRFKGAHGLLYEQELGLQATIFDI.... 0 2 2 5 +11506 PF11674 DUF3270 Protein of unknown function (DUF3270) Pollington J anon Pfam-B_4714 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Streptococcus. 25.00 25.00 25.70 25.60 23.70 21.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.14 0.72 -3.97 8 347 2009-01-15 18:05:59 2008-09-17 13:38:56 3 1 344 0 25 115 0 89.60 53 94.35 CHANGED hsL++hpsa.c..DhpYQc...cphPcYQ-aQshsppss..KLcELlFFupIAsFClhTVLhoFlhLuhpLsohaAFslAhslS....LulhpshcphIK+ .............MPVRKLQS..Y.....Es-YQE...ppQhP+YQsYsPEApocA..sLKElLFFlNIAVF.CIshslFSFIFLALKLuTsLAFshAIuhS....LllLpltRslIK+....... 0 2 6 15 +11507 PF11675 DUF3271 Protein of unknown function (DUF3271) Pollington J anon Pfam-B_4697 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Plasmodium. 20.40 20.40 20.60 21.50 19.50 19.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.75 0.70 -5.25 12 39 2009-09-10 14:53:48 2008-09-17 13:49:26 3 1 5 0 39 40 2 169.30 39 85.08 CHANGED NIILSFFILVIFSNVK....AATFQDs.NsssPKsIuYsSVuQPhssFs+pccpHspYLDlINslh+DpScNhKYAYpGuNYHWlITDFDISIDNSSptLK+phScKtpEuLhhGosYFIuYIKDpIKaLlSpaMHKYDFEpNYtssLphLucDLKsLIYDcF-pchKpDLIKYEstPEscKh+cpAKchhcsLVpNSuhplpGYFIKlpcDusYhcLscspslYFsIsIsKs.uphsathKh.cs-lsEhlss ................NIIlSaFILsl.sNV+....uAoFQst.Nssps.........p.IuY.slspPhshhph.cppahpaLshINphhpcpscs.h.cYsapsssaHalITcFcl.IsNus..lpthh.ppt.p.h..us...h.hh...h.......h.pYsF.....t....h.ppLKs.h.spF..ph...hlp.p....p...................h.hps........................................................................................................ 1 0 6 38 +11508 PF11676 DUF3272 Protein of unknown function (DUF3272) Pollington J anon Pfam-B_4726 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Streptococcus. 25.00 25.00 41.60 41.50 22.90 22.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.78 0.72 -3.96 7 324 2009-01-15 18:05:59 2008-09-17 14:07:26 3 1 321 0 18 94 0 56.30 61 95.59 CHANGED shpQFlhhAlhTshETYFFNtSlhoGpahhAhFauhLLhRslphsYhls+hsctl.K.....p+K ..hpQFlhhAlhTAhETYFFN-uhMTGRYIMAAFWAILLFRNFRVSYVMGKIVDlIDQ...chs+K.......... 0 2 5 9 +11509 PF11677 DUF3273 Protein of unknown function (DUF3273) Pollington J anon Pfam-B_4727 (release 23.0) Family Some members in this family of proteins are annotated as multi-transmembrane proteins however this cannot be confirmed. Currently this family has no known function. 25.00 25.00 25.40 25.20 20.10 19.90 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.80 0.70 -5.13 9 59 2009-01-15 18:05:59 2008-09-17 14:11:00 3 2 17 0 51 59 0 236.90 35 81.42 CHANGED ssEsspsRGPpapPhhsFF.sh.LRhGFhlQhluhh.LhhluhashGt..huIhsF.LpAhPE...phussha.hlhhhhuhaLlGTLhIh.FplhssDDo..KtsRGaRAGoKhLttAoolshlShhLphVpa..lhusaahstpWhschttstusWlhahhupllsuFuLhlYGuuhFaLEsYHsEGTuEphuWlshhhFhhAGlsElL..hsFhssGshhoLhhL......lulhsuolWAhhFEPlhc+asspLpposlpN-.....hhKscpshuY .........................sps.psRG.ph.PhhsaF......ssh.LRhGhhlQhhuhh.hhhh.hhshGs..hulhsF.Lpuhs-...phupuhhhhhhhhhuhallGslhlhsFQ.hlsDDo...p.sRGaRuGoKhLphAshlshluhhLphl.a..hhs.hahstpWhtchttstSpWhhahhuplhcuhuLhhYGhuhFhlEsYHsEGsuEtauahshhhFthuGlsElh..hhFhshush.sLhhh......huLhssslWAhhFEPlhp+hsspLpppslps-.....h.+.pph..Y............ 0 17 27 43 +11510 PF11678 DUF3274 Protein of unknown function (DUF3274) Pollington J anon Pfam-B_4733 (release 23.0) Family This bacterial family of proteins has no known function. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.94 0.70 -5.19 10 137 2009-01-15 18:05:59 2008-09-17 14:28:57 3 5 81 0 29 183 0 241.10 28 36.78 CHANGED sohPAMssLtshRFaQRMWT+RpRsG......csVtVG..sPPtphsLRtcsEs+YPGuGss...........sshhupuslsps-pRhINGEsLpPPacPcM.asGEs........tphsGsso...........tuG....hDsPDDVstslALGNstAch+hhtlssp....ssstchppsctp...tuaN.pG+s.sDQTpsaRssssss.s............sthllhRcETPsEsRtRMppsscuhp-NSYHSulL+SsENHRWVTAMDVAIGQAcoLDDPsWRclLlAhADW+h........stcphcphp-Lss......ap+LStcsQcLlcAospYYcpGsFPupp ..............................................................................................................................................t..h.sh..L...tFaQRhaThh.R.t.........hlG...sst...hh...p.hh.ssth............hshh.pssh...p.hhlsu.tL..shtsph..tGp...........h.Gpss...........huG....h.t.Dshshshulus..h..p.....h..t......................t...h.t.tht.....hN...t...tppo...hph...tt...................thhhlpRpcT.Pp..EsRtphtp..s.ssthttsoaHSull..pssEspRhVsAhDlAIGpup.olDDs..s.ahphLhthADW+h..............s.t......p..t........at.t.p..tsp.hhpts..Yh.pG.hP...t........................................ 1 3 12 18 +11511 PF11679 DUF3275 Protein of unknown function (DUF3275) Pollington J anon Pfam-B_4743 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 25.00 25.00 58.10 33.20 19.80 22.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.49 0.70 -4.70 5 108 2009-01-15 18:05:59 2008-09-17 14:35:44 3 2 82 0 33 99 2 189.60 49 92.06 CHANGED IsLPG.pLslRTIs.GRNG-FsVG+LtTsIGEFcVKDALLDQacEGKY+G-FVIocIaP+pYsuGG+hVsEIRApLDuMsLDuhspLScD-scchussEsDPLDE.Ptl.usossuSAPsPuPSPpsspPuSpRAo+D.h.chuPFGhcs.SsPAppsspASssc-sDA.......ELFGslWP...lGEsVKLDoTVDR+pLRpQssRLGpLG.YcLDF+pQhWshc .........IslPG.pLslRTIp.GRNGsFsVGRLuTsIGpFslKDs.L-QY.EGKYcG-FlIppIhPpsYss.GGthhhEIRApLDuMTLsslDpLScDEA.cph.usp-lD...PLDE.....t..t.ssssuts..ttsp...t........tss.pussD.....sPFGhss.stPstts..s.ss..p-sDA...................tLFG.hlWP....LG-sVKLDoTlD.R+...tLRtQhsRLGp.LG.YthDhppQpWph.p........................... 0 4 20 28 +11512 PF11680 DUF3276 Protein of unknown function (DUF3276) Pollington J anon Pfam-B_4744 (release 23.0) Family This bacterial family of proteins has no known function. 22.20 22.20 22.20 23.90 22.00 22.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.66 0.71 -4.32 27 215 2009-01-15 18:05:59 2008-09-17 14:53:19 3 1 206 6 67 189 126 117.10 36 92.57 CHANGED M..p-pc.h......-pE-lFS.Ksl+AG+RTYFFDV+uT+usDYYLTITESKKhspcDG...phpacKHKIaLYKEDFppFtEsLp-hhsaIhpcph.....p-Vho-capcs.pccppp.tt...pt...........spthpDlsF ..........................t......-+--lFS.Ksl+AG+.RTYFFDVKcsRt.sDhaLsITESKKhs...ss-G.....pF-KHKIFLYKEDFpcFhpsLp-slsa.Ipppph.....pcshpcp.tpps.pcpt..t.................................................................. 0 32 60 65 +11513 PF11681 DUF3277 Protein of unknown function (DUF3277) Pollington J anon Pfam-B_4749 (release 23.0) Family This family of proteins represents a putative bacteriophage protein. No function is currently known. 25.00 25.00 25.10 25.10 24.50 24.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.80 0.71 -4.71 7 159 2009-01-15 18:05:59 2008-09-17 15:46:29 3 1 142 0 23 117 4 138.20 38 96.58 CHANGED Mu.TYSFhDVoASlsGPsGuhsLGYGsAsAEEGIslAhussKNTMTlGADGEsMHSLHADKSGpITlpLLKTSPhNpKLMAhYsAQSlsStLWGpNlIslpppsuGDlssuRusAFpKtPDhp.AKDGshlpWVFDssKIDphLGoa ......................................shthsh.ts..h...l.s.Gtusu-p.sIslshsuspsTMTlGADGEVh+SlpADKSGTlTlsLLpTS...PsNppLSh.hYNs.Q.p.t.Ss.s.saGs.sslsI+spsuuDlh.TApsstFpK.PD.stupsusshsW..sFcshclD........................................................... 0 1 9 16 +11514 PF11682 DUF3279 Protein of unknown function (DUF3279) Pollington J anon Pfam-B_4753 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 26.60 25.90 24.60 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.57 4 650 2009-01-15 18:05:59 2008-09-17 17:06:46 3 1 218 0 13 267 1 116.50 50 91.63 CHANGED phlKsaLA.sups+hlsAcpAtptshtchhC.hSCGssLpLp.tsDsQ.sWFEHDphslsE+thtpCsalcPEc+E.phl.+LpphshsshslscstpWaCVhCppcY.GcKhCsuCtTGlYShssspp .........................hhhpsahA.DupG+hssA+pAtpt..shpchhC.h.CGssL....php.thDsp.PWFEHsp....sl.sE.+t..pCsalcPtc+Ehphl.....+LQphsss.shPVs...+htpWaCh.CccDY.G.E...+hCs+CtTGhaScths.................... 0 0 1 4 +11515 PF11683 DUF3278 Protein of unknown function (DUF3278) Pollington J anon Pfam-B_4766 (release 23.0) Family This bacterial family of proteins has no known function. 24.40 24.40 24.90 25.00 24.30 24.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.15 9 428 2009-09-10 23:48:43 2008-09-17 17:07:30 3 2 313 0 30 252 1 124.50 34 72.54 CHANGED MK.KEThs-KlIKpFYGIoGPLDEaKRppss+lGNpsFIhLahhhlhuslluhhLutpaPp..lAhhh...shlhlhshhsusYlhaps++ptlsslEl-htsp.ptp+phhhtulKhGlaaslhhahlhsL ................MK..KEshopKLI.K+hYGIsGPLDEa++pEs.s+lGNpsFIhLFalhhhusLIsLl..L...uh..ca...sp..plAhhYs...llhllsh.lh..usYl.s.hph.+.+ptl...st...hE.....p...h...h.s.t.....Kpp+.p....h....+....h.....u.lphGlhaulhhahh...h............................... 0 2 12 17 +11516 PF11684 DUF3280 Protein of unknown function (DUF2380) Pollington J anon Pfam-B_4754 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 27.70 27.70 29.30 34.20 26.20 27.60 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.39 0.71 -3.98 26 121 2009-01-15 18:05:59 2008-09-18 09:19:21 3 3 79 0 53 129 9 129.80 30 79.71 CHANGED s..tplAlFshphlDoSspssh.Gspss-tuRLshlsctlRctlsppGtapllDluPss...tpls+hsshtpCsGCctchApcLGA-hulsGpVQKVSNLILshNlhl+Ds.coGchlputsVDIRGNTD-SWpRGhpYl...l+splh ................plAlhshphhDsotpst...s.pss-ttRLthhpstl+ptlsppG.apl..lsh.s.sht...tthtp.......s.....s....htpCsuCthphAcphGA-hslsGhVpKlSsLIlslslhltDs....coGchltstss-lRusoD-oWpRuhpal...lcp.h.......................... 0 15 30 36 +11517 PF11685 DUF3281 Protein of unknown function (DUF3281) Pollington J anon Pfam-B_4757 (release 23.0) Family This family of bacterial proteins has no known function. 21.10 21.10 25.00 23.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.81 0.70 -5.22 6 95 2009-01-15 18:05:59 2008-09-18 09:36:49 3 3 24 0 4 63 1 178.40 40 67.00 CHANGED KKKlLIGuslISSssLLuSCGKoETAsELRIVDpCNssNDLCcFELsDAlVSRYTNlLGKTIERVESQTPL+..cIpGTITWNsPAGAoLADNo-VpTcLGsuCQsDsCTANSNPTAYNLssGoNoISVSGhVTVsGKplDLAo-V.PslIDTttVtsS.HVFPTGsLPsGLTLpsLVssLNhsSc.....sAHGTFSusGSNL+ITCsoGYEWLDstsPsYG.phThucssRuVAhspW.p.....-psshhstptDshs.hTpNG.hsss..sshsWpAGCWs .........................................KcKlLIuhsllSusslLtuCuKoETsp-LRIV.pCsss......pD..........hCchEhsss.sppYss.LsKpl.h.psppslp..tlptolpW...tsuphhs.ssl.sphs..tCpsssCssstNPsAaph...Gpp.lo..V.GphTVssK.......phslspph...s.sh..h..o..hhF................................................................................................................................................................. 0 4 4 4 +11518 PF11686 DUF3283 Protein of unknown function (DUF3283) Pollington J anon Pfam-B_4778 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 26.00 35.80 24.90 24.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.77 0.72 -4.17 10 120 2009-01-15 18:05:59 2008-09-18 09:52:53 3 1 119 0 13 56 3 60.80 64 84.46 CHANGED NLSLLPssEKN+IELDKQASFhVW+lKpAKsGPEsItcphpKlsDssE+saFcQulEKYK+ NLuLLsAsEKN+lELDKQASaLVW+LKpAKuGPEcIscQhuKIusEuEKpWFQQSlEKYKR. 0 1 4 9 +11519 PF11687 DUF3284 Domain of unknown function (DUF3284) Pollington J anon Pfam-B_4781 (release 23.0) Domain This family of proteins with unknown function appears to be restricted to Firmicutes. 23.70 23.70 24.80 24.20 23.50 22.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.34 0.71 -4.37 15 343 2012-10-02 19:24:03 2008-09-18 10:54:25 3 1 293 0 39 178 1 119.40 28 84.05 CHANGED M.cIspp...LshsscchFsplhpSshhDlcptTG+clthtpL.sGhsYpKpasstt....puplc..IT..chttsphYphpTsos+ssapscYplpsls-spoclpasEphpspshhtphNshlsshlhuh .................................................McIspp...LplsucphasplhpSllhDl+psTG+p.l.ph.cpL.pGapY..hKph....up.st.....puplc....Is..chhtsphYphphsoscsp.hslpYplps.l.s-s.p.sclpYsEphpshs..hhp+hNshlhuhhhs..................... 0 11 25 28 +11520 PF11688 DUF3285 Protein of unknown function (DUF3285) Pollington J anon Pfam-B_4791 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 82.00 81.70 19.20 17.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.12 0.72 -4.34 13 68 2009-01-15 18:05:59 2008-09-18 11:37:32 3 1 68 0 29 68 90 45.00 62 71.33 CHANGED ssPsPSaVKLAMRNMVRKGupSLhHFuLTslGLLGhllhlAaLsR ...pPsPSaVKLAMRNMVRKGupSLhHFsLTslGLLGhLlslAaLsR. 0 5 19 27 +11522 PF11690 DUF3287 Protein of unknown function (DUF3287) Pollington J anon Pfam-B_4801 (release 23.0) Family This eukaryotic family of proteins has no known function. 27.00 27.00 27.70 27.70 24.60 24.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.42 0.71 -4.00 15 46 2009-01-15 18:05:59 2008-09-18 12:48:28 3 5 6 0 16 44 0 106.90 28 35.22 CHANGED sp-h.s............t.........sPtah.sscsFpPu.........sVhhPpslh...phssc-+ccshsFhcpVtspH+chscc+pcLthKhQsl......hERch+tLEScPtcW.chshsplAcIPchLh.alctpspss ......................ps.sss.............ss.a..sscsF.Pu.........slhhPpshh...phssc-pccshsFhcplsspH+chpcc+pcLthKhppl.......ERch+tLEucPhpW.hhshsphA.lP..hh.hh..tst.s................... 0 15 15 15 +11523 PF11691 DUF3288 Protein of unknown function (DUF3288) Pollington J anon Pfam-B_4815 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 28.00 37.00 22.50 16.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.11 0.72 -4.25 18 73 2009-01-15 18:05:59 2008-09-18 13:06:01 3 1 73 0 29 74 110 93.60 43 93.36 CHANGED p-QsHPhappDRphVDpLLsps..P....s-.sLsELARLhIRYcsFPGAcDlppDL-+lLppWpLoc-cLap+TRplaucG............tVhsutssp-p-Dh ....-QpHP.appDRshVDpLLspc....P....s-hsLs-LARLhIRYpsFPGAc-IQpDL-+lLppWpL.Tc-cLFpKTRpIaupG............tVapstss.p-pps............ 0 5 19 27 +11524 PF11692 DUF3289 Protein of unknown function (DUF3289) Pollington J anon Pfam-B_4824 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 38.70 29.90 21.40 21.40 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.97 0.70 -5.13 6 334 2009-01-15 18:05:59 2008-09-18 16:00:26 3 6 270 0 35 192 3 246.80 54 93.64 CHANGED Muss.......LuFPhslFpTQ++hNDYuADDM+sGDLo-ppLppsFsLpcVSs+lD......PY...pLp+hssFs..p.pah....st+hchlT+pcCscILFDEhRshShsFShaGsY+pLIs+hIsHMQpssGusFpDhpLspAh+-hIhsDpopsSolttIKpIlsctlDacptlaP.phpsphthsls+.shLPKFsRhpDpFNGLGIoVHDlaATpIoIcSLplppcpYpAlV+YpsQDHFGLDcpDI.ph+F+sFpFFRlWFVLQRaNcFGaKPFMTNMcAslpIsGs ........................................................h.s...........lshPCTLFETlshFDDaSADDMQYGDMsEpDF.LSLGLSDISAKVDPY...RLIK.Y.c..h....uP...s.shuh.....ssoSGsKISppECsDILFuEMK-LAK.MF.SFF.G...pYKoLIc-LI-HFRYG....NG.ssF+.Sp.................pLNhu.F+E...+Ipp..h....s.h.N.......S..slp....lIK....csI.-ss.I...sps.s.s.....h.Y..p......s...h....hhpp..I+......s.pLL.....u....SRL.......sKF........N.......cF.cDs.FNGLGISVHDIsAp+IoLhShQ.h........Y......s.h.uao.ATlHFhuQDHFGLDsTDI....KN......ph......Ys.+a....RF.FRIWFhLQRa+c..F..A.........FKPFhTNhpsh......ct......................... 0 10 16 23 +11525 PF11693 DUF2990 Protein of unknown function (DUF2990) Pollington J anon Pfam-B_4826 (release 23.0) Family This family of proteins represents a fungal protein with unknown function. 21.10 21.10 21.40 22.00 20.70 21.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.96 0.72 -4.11 6 16 2009-09-10 22:19:26 2008-09-18 16:02:46 3 2 16 0 12 19 0 63.40 49 26.64 CHANGED lusLsAsShAAPshFDNhYDYSDDLAEFLGRVSKaI-su.KDlhsuosTCDTSpIALPAaASGLP ..usLsAsuhAAPs..hF-slYDaSs-LAEFhu+VSKaI-su.+-lhs..uosoCDsSKIuLPuaASGLP.... 0 1 3 9 +11526 PF11694 DUF3290 Protein of unknown function (DUF3290) Pollington J anon Pfam-B_4838 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.74 0.71 -4.47 10 692 2009-01-15 18:05:59 2008-09-18 16:08:16 3 2 519 0 48 304 4 107.30 33 97.65 CHANGED sFYoYsYlpoQtsppsahthlLhllhllshlhFshhYlRc+hsTKYRDLuIIulLhLlLhlGIQIssapsspsscsQspphlsFIcsVAcDhsVccs-VaVNoouhsDGhIV+Is...sphYplshss-sssYhLpKscLhs....chV.VcK ..........pFYshpYl.sp.s..pshlhhhhhhhhlhhhhhhhhhhhpt..pht.caR-l.hh..h..h..hhhphppap..pstpsp....pt.htFlc...lupp.tVs.pplhlNoot.t-G.hll+ls...p.aYch..ss-.p..pYhLpch.Lhp..p.lchl.................................................... 2 11 24 31 +11527 PF11695 DUF3291 Domain of unknown function (DUF3291) Pollington J anon Pfam-B_4872 (release 23.0) Domain This bacterial family of proteins has no known function. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.99 0.71 -4.76 34 150 2012-10-02 00:20:33 2008-09-18 16:51:27 3 2 130 0 59 154 71 131.50 34 84.44 CHANGED LAQlNlu+.ltu.shDsPpls-FhssL-plNuLA-puPGFVWRhp...s-susu...T.shpshsDPt.....hllNhSVW-sl-uLpsFlacoh.Htphh+RR+EWF.cphscsphVLWWV.PsGHp.PolsEAhcRLpaL+ppGsostAFsa+psa ....................................LAplNluh.hht.Ph-s..P.....ht-FhssLs.lNuhA-pusGFlWRhp...s-.s.ssu.........sslphh.s-st.....................hlsNhSVWcsh-uLpsFsapsh.Htphh+..R+p-WF.pph..t.csthshWWV.ssGch.P.ohpEAhpRLphLcppGsoshAFsapp.......................................... 0 17 43 53 +11528 PF11696 DUF3292 Protein of unknown function (DUF3292) Pollington J anon Pfam-B_4874 (release 23.0) Family This eukaryotic family of proteins has no known function. 23.70 23.70 24.70 23.70 22.30 23.60 hmmbuild -o /dev/null HMM SEED 642 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.75 0.70 -6.65 8 115 2009-01-15 18:05:59 2008-09-18 16:53:43 3 4 60 0 98 119 2 426.50 31 86.56 CHANGED GPTDSHsLSQl-s.-E..KGLAQKAGsos..cVoDlGWs.pSDpl-EclVsGLSNEDLWMLIRRFNKQIYaVKAVP-uPLQcLDLNRAEDEpFSPDKLRATLERFYTTVlVGLTsFhKHIARLRSWREPcRTssFCu......VYFlAWLLDhLlPTIFGhLlsLVlaPsoRpaLFPPAPlALVD..osTGGVQKPKAGVLGSHDSlTGAPEKYKGEAAEQEASNLlsSlAoVAVGSAAGKHDQGsP-s.....APMEuSVPDAhDlsu+uADAQuAAtG-sPs-sHDKTRcPMKcTVhNuANpuMRVlSDIsDsaERFuNALSPTPPFsusTPpLRLuGlLuuGhLsSLlTSSYVFlKhuTFhlGhuFFGDPllQRslsYLN+caPpWp+LL-LQNoLLKGVPTNAQLTLTLLRIGEuNuAPLPPPPSS.......SL+KsPopPAolsccplsL.sASppEIspAAAP-Ppsssscpcppsc..K...KKshGuRlluFF+GTTATGIEoKLAlDRsRAtAGSpHAKN+lGlL++KGccTL.PhGPlcFDARYKGKRGsVVIDSSpcPPlLYFTTD.oshLsDhRLEsRKKGoVLFsIPVTDIpEhRKIGGLGWKGKLlsGWAVG.oKEVVDGLVIsG.ccscQpYQLTAMpsRNpLFNRLVAIDGQVWESa .................................t....................................................................hh.sh.sp...ph........a.h...................s.tthDh..up..ppphs..spplRuplERhYhslh.thhthh..pclhRLhoW..p..-..RTthhhh...................sY.huWhhshlhshh.hhhhhLl.h.s...R.hhFPsss.uhhs....psu.shtps.ts.huspsshT..Gus..cphc.GEusEp...EA...pshltuh.sslsh......tshs.up..s.....t.s.....t.........................t...tt....t......t.............................................tscsc..h.........tth.........h.t.th.t..hphhtth.D.hE+hus.................hh.....................a..........h.h....hh...hh.h..hhhs..hhh+..shhhGhshFG.pPll.......hhthlsp.hspWpp.h.....hphpp..o....l.h+GlPTsuQLslTLLRlGEhstsPlP.sPP..............................................................t..t..........................s.tt.....httt.t.........t................t......t...............................t.t...phhthh+t.s..htts..hpsththsphht..h.s.....t........s+p.hluhl...................t..............................................GPhp.......F.s+hptppGhhhlst..t....t............s...l.a...............stt....................................................ts................aphslt-l.phpKht......G...hGh.pt+hh...sths...tp.......chhDGh.l.....p..tpth.......hsuh..RptlFNRLhuhst.ph.Wp.......................................... 0 30 49 83 +11529 PF11697 DUF3293 Protein of unknown function (DUF3293) Pollington J anon Pfam-B_4879 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 26.50 25.30 24.20 24.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.37 0.72 -3.86 40 218 2009-01-15 18:05:59 2008-09-18 16:58:22 3 6 213 0 70 177 62 70.30 36 47.65 CHANGED ITAaNPhS.phhosppNphtsppLtpcL...tphshshh..sshG....tssst...sW.hEcSahl.slshp....pAhpLGpcFpQNAI .....ITAaNPtS.....phlSsp-NphpsppLtppL...hph.shs..hh..ps.s........sssph...pW.hEcSasV.s..hshs.......pAhpLupcFsQNAI.................... 0 17 37 57 +11530 PF11698 V-ATPase_H_C V-ATPase subunit H Mifsud W anon Pfam-B_2481 (release 6.5) Family The yeast Saccharomyces cerevisiae vacuolar H+-ATPase (V-ATPase) is a multisubunit complex responsible for acidifying organelles. It functions as an ATP dependent proton pump that transports protons across a lipid bilayer. This domain corresponds to the C terminal domain of the H subunit of V-ATPase. The N-terminal domain is required for the activation of the complex whereas the C-terminal domain is required for coupling ATP hydrolysis to proton translocation [3]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.44 0.71 -4.18 34 388 2012-10-11 20:01:02 2008-09-24 15:29:57 3 8 307 1 258 379 5 116.70 42 25.79 CHANGED +phooFDcYhuElpSGpLcW.SPsHps........ppFWpENsp+h.c-ssacll+pLsclL..............ppop..DshsLAVACpDlGchl+h.aPpG+sll...pphssKtclMpLhsc..sDppV+hpALhulQ+lhspsWc ................t.pLooFDEYssElpS.G+L.cW..SPsH+s........ppF..W+ENAt+h..p-csacll+hLsclL................psSp....DsplL.A..V.As..aDlGcaV+..............p.....a.Pc.G..+pll...-pLG...uKphVMpLhs..c..c..DppVRapALhAlQ+lhhpsap......................... 0 97 150 216 +11531 PF11699 CENP-C_C Mif2; Mif2/CENP-C like Mistry J, Wood V anon Manual Domain CENP-C_C is a C-terminal family of fungal and eukaryote proteins necessary for centromere formation. CENP-C is the inner-kinetochore centromere (CEN) binding protein. In the budding-yeast, Mif2, the yeast homologue, binds in the CDEIII region of the centromere, and has been shown to recruit a substantial subset of all inner and outer kinetochore proteins [1]. Mif2 adopts a cupin fold and is extremely similar both in polypeptide chain conformation and in dimer geometry to the dimerisation domain of a bacterial transcription factor [1]. The Mif2 dimer appears to be part of an enhanceosome-like structure that nucleates kinetochore assembly in budding yeast [1]. This C-terminal domain is the region via which CENP-C localises to centromeres throughout the cell cycle 2,3]. 29.00 29.00 29.00 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.83 0.72 -4.00 11 190 2012-10-10 13:59:34 2008-09-25 10:52:40 3 10 165 2 133 209 11 84.50 36 12.58 CHANGED F+htKhhsp.s.FhuoGhlcLPs.uu.KpspsS+csphlFaVhpGtlpVolpcspFhhupGshFQVPpGNpYsltNht.sc-ApLFFsQ ..................h+htKhhsp.s.FhuuG.hlcLPP.sutKp.ps.oppsp.h.lFaVhpGplpVTl.........pc.....spFh.lspGshF.VPp.GN...hYslpNhh.spputlaFsQ............. 0 37 73 106 +11532 PF11700 ATG22 Vacuole effluxer Atg22 like Wood V, Mistry J anon Pfam-B_14077 (release 22.0) Family Autophagy is a major survival survival mechanism in which eukaryotes recycle cellular nutrients during stress conditions. Atg22, Avt3 and Avt4 are partially redundant vacuolar effluxes, which mediate the efflux of leucine and other amino acids resulting from autophagy [1]. This family also includes other transporter proteins. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.52 0.70 -6.02 50 1392 2012-10-03 03:33:39 2008-09-25 16:14:42 3 4 1200 0 527 2826 2253 417.40 25 93.84 CHANGED so++ElhuWYhYsauspsaslsshuoalPhhlpphucp.tGh.hssp..sC.........................................................tsspCllhhhG..hplsssSashYshulSshlQullhlsluuhAD.....ausaRKplLlsFuhlGuhsshhahhl.ssppa....hluulLhllus.ssa....us.shVhhNSaLPhLscspsphptttt..............................................tt.......t..t..h........................................................oplSuhGhulGaluullltllslhllhshpss...........shsl......plslhlsGhWWhlaolPshhhL+s+su.......................tthpshhsh..lhhua+pLhpsh..+psppL+pshlFLsuWFlluDulsTlsususlaupspLshssss.....LshlsllshhsuslGuhhas.hlsc+hslpsp.........psllshlhhhtllslaG........hlGhh.t......hhGhppsWE...........hashushYGlshGulpShsRShaupllP......GpE..utFFuLauls-KuSShlGPhllGhIhDtT..G...................slRsu..........FhFLslLhllslslh.hhlsl-+G+c-ut ...........................................................................................................................................................................................t......huWhhaDauspsa.shhl.h.o.h.h...h.shah.t.s.h.s.......................................................................................................s.....h.ps.s.u..hh...a.shu....luslllul.luPllGshAD..........ht.uh.++...h...hl.h..h...h....s...h.l..s...s..h...ss..h.h...h.....h....hl.....s..ssth...................hhsl..sl...h....h..l..u....t...luh........ss..u.l..hYs....uh.L..splsssc..ph........................................................................................................................................................................................s+lSuh.Ga.uhGY...l.Gu.hls.....h.l..l.s..l.h...h.h.h.t.spts.............................t.shs.h..............R.ls.h.hh.sAlWa...h.l..F.s.lPh..hh..hl..cp.t..........................................................................................h.t.h.tph.....lt.tu.a.p...c...L.....h...p.......o.....l.......p..p...l..p....c...h..+..s........l....h...h....F..L..l.A...h..hh...a..p..DGl...s...s....l..hs....hu.s.ha.u.t.........t..t..h..........G.hs...ssp...............lll.h...h.l.l.s...p....l..s..A...h......h.uul....l...hG....hl.sc...+.h.Gs..+................ph.l....h..h...s.l....h..hh..h.h.l.s..h.h....u..............h.h..............................h.p..s.s.ht.....................F.a.l.h.u..h...l...l....G.h.h........h..G..s...h.Q...u...h...S...R...Shhu...cl.hP..........spp.......upaFGh.Ys.ls.G..Ks.suhlGPhl..h...uhh.sp..ho.....G.............................s.h..p.....h.u....................hh..s...l..h.l..l.h.l..l..Ghhlh...hhl...........t................................................................................................................................ 0 195 351 455 +11533 PF11701 UNC45-central Myosin-binding striated muscle assembly central Wood V, Coggill P anon manual Family The UNC-45 or small muscle protein 1 of C.elegans is expressed in two forms from different genomic positions in mammals, as a general tissue protein UNC-45a and a specific form Unc-45b expressed only in striated and skeletal muscle. All members carry up to three amino-terminal tetratricopeptide repeat (TPR) domains towards their N-terminal, a UCS domain at the C-terminal that contains a number of Arm repeats Pfam:PF00514 and this central region of approximately 400 residues. Both the general form and the muscle form of UNC-45 function in myotube formation through cell fusion. Myofibril formation requires both GC and SM UNC-45, consistent with the fact that the cytoskeleton is necessary for the development and maintenance of organised myofibrils [1]. The S. pombe Rng3p, is crucial for cell shape, normal actin cytoskeleton, and contractile ring assembly, and is essential for assembly of the myosin II-containing progenitors of the contractile ring. Widespread defects in the cytoskeleton are found in null mutants of all three fungal proteins [2]. Mammalian Unc45 is found to act as a specific chaperone during the folding of myosin and the assembly of striated muscle by forming a stable complex with the general chaperone Hsp90. The exact function of this central region is not known [3]. 25.00 25.00 25.10 29.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.77 0.71 -4.58 31 280 2009-01-15 18:05:59 2008-09-26 10:22:29 3 10 207 3 188 266 1 194.20 28 23.10 CHANGED sspplcslLppLs.......................................................................................phstssRshAslhhs+h...L...csppcshpchhscalpshl.....pcsch-..................................shlhshsslsslaPsss-lsuslhhsc.....shhthlhshss............tcscphphssLELLsuAChscssRphlscphhphLcphhppspppt..........................hphhAslsLsKl .............................................................................................................t..cplchllspLh...........................................................................................h.lssssRhhAolhls+l...................L.....-st+-p...a...pchhpcalpsph.....ppts.h-..................................splpAhpsloslh.ussDlusplhttc.....Glh.ph.llsLss............scpcp.phsslEhLhtAssctsptshIhppslslLcclhppscs-p...........................l+lhAhVuLsKL.................. 0 46 82 138 +11534 PF11702 DUF3295 Protein of unknown function (DUF3295) Wood V, Coggill P anon Wood V, Chahwan C Family This family is conserved in fungi but the function is not known. 21.40 21.40 23.90 23.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.92 0.70 -5.51 9 124 2009-09-14 14:02:36 2008-09-29 10:57:28 3 5 88 0 107 121 0 354.50 33 70.17 CHANGED pptuuDlPELSuSV-SssS-pApchppphcp.......................Phs.th+PslhRpDSh.uShuRG+E+HlTShsLE+MVloIp..EKKsLEPLusshssluP....ht..sppspPp.ssSpoospc.s...........p.hpsoESsooousssNsS-.uu..slssusTSsouothhsp.......sSllRGF..SPS.lSoShRSpsphussPsPs.......psusshK.t..KKKtuhFTLGGSSGs-.EsSF-DRh...............shpss++poL..............Ss-hp+usssp................KKssoFccpV.sh+shp-ps.-sD.sthps---.VsESAI..-DD-DSDW.EDSlpESGpuSl--+phFQRVDS+PN.LsS..RRSLLThMhHQss+h....s.sSRSosALpRSR.ToPNGPSlsASPs-sDEtsLhM.RG...........splPRScshlh+sos.pSsuH....SPRTTRRNMLATELTESLRRpLLWERpQKuAT..ANAhLKRRHTupD.hANLppYPp......................sssscDK......Dst..ssSaN+Ys.chG.hpYHspGW ..............................................................................................t......hP..StShtS..s............................................c..h.t.............phhss.thtphh..l......pt........h.................................................................................................................................................................................................................t....p.p...hasl.GsS.ss..t.........t......................................................ott..tps...............................................h.sshtp..p............pshpt......p............p..spp-.l..s...ESAI..t...-D-....ss-W..EDShp.-SGpuSh--c..hFpRV.-S....+s..s.LsS.............R.SLlT.hhht.psp...+............s.sSpSosAl.p.s.R.....sssshssSP....c..s--...ssLhM..+u.................chP+Sps..hhshs....tthtt...........SPRTTRRNMLATELTESLRRpLLWERQQK..sss.....ss...AhhKRRHTupD.hssLppaPt..................................s.ht.tsp......p........thsphh...p...s..tYHspGW........................................................................... 0 21 50 91 +11535 PF11703 UPF0506 UPF0506 Bateman A, Ranganathan S anon Ranganathan S Domain This uncharacterised family is found in Schistosoma genomes. Although uncharacterised it appears to belong to the knottin fold. The sequence is composed of two repeats of a 6 cysteine motif. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.82 0.72 -4.01 11 31 2012-10-01 22:06:18 2008-09-30 15:03:58 3 2 6 0 18 81 0 56.50 41 53.64 CHANGED CRo.GQhCSRThF+RCCGNLVCQL+GaFNGsCVpCLAEt+FChhSSECCScRCRhFRCR ...........C+phGp.Cs+ThF.p+CCG.shlCpLpu.FpGpCV..p........CLstsphChpss-CCSt+Cphh+C+....... 0 17 18 18 +11536 PF11704 Folliculin Vesicle coat protein involved in Golgi to plasma membrane transport Wood V, Coggill P anon Pfam-B_3276 (release 23.0), ADDA_17305 Family In yeast cells this family functions in the regulated delivery of Gap1p (a general amino acid permease) to the cell surface, perhaps as a component of a post-Golgi secretory-vesicle coat complex [1]. Birt-Hogg-Dube (BHD)4 syndrome is an autosomal dominant disorder characterised by hamartomas of skin follicles, lung cysts, spontaneous pneumothorax, and renal cell carcinoma. Folliculin is the protein from the BHD4 gene and is found to have no significant homology to any other human proteins. It is expressed in most tissues. These same symptoms also occur in TSC or tuberous sclerosis complex, suggesting that the same pathway is involved, and it is likely that the target is the down-stream Tor2 - an essential gene. Folliculin appears to bind Tor2, and down-regulation of Tor2 activity leads to up-regulation of nitrogen responsive genes including membrane transporters and amino acid permeases [2]. 21.10 21.10 22.40 21.40 20.70 20.70 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.94 0.71 -4.67 20 292 2009-01-15 18:05:59 2008-10-02 11:05:11 3 3 213 0 195 274 0 170.90 24 32.06 CHANGED stsaVSopaPssspcYshLpphsh+oLSsEs..............shsusPlhFGDshcGasluhsFKlpDspARGscR+Yullllscsc........hp.LhpsWsalsptFsclIshIpppspp...tppptpppppsspps...................................sth.ss..........hhRtpshpps+SLsELTsD-tIFl+lHtahshlLpslst ....................................h...alShp.s..st.athlpp.shR........sLStEh.............................................tsscsuslhFGDs.ppG.a.shuatFplhD.pARGhpRhYullhlsscc.........t.lhppashlhsthp.c...lhpplpstsc+hh..tpppttppp.ts.p.s...........................................................t...s..............p........t.....t.........tsRsLsplssp..hahplH..hs...lL.th.s........................................................................ 0 45 80 143 +11537 PF11705 RNA_pol_3_Rpc31 DNA-directed RNA polymerase III subunit Rpc31 Wood V, Coggill P anon Pfam-B_203281 (release 23.0) Family RNA polymerase III contains seventeen subunits in yeasts and in human cells. Twelve of these are akin to RNA polymerase I or II and the other five are RNA pol III-specific, and form the functionally distinct groups (i) Rpc31-Rpc34-Rpc82, and (ii) Rpc37-Rpc53. Rpc31, Rpc34 and Rpc82 form a cluster of enzyme-specific subunits that contribute to transcription initiation in S.cerevisiae and H.sapiens. There is evidence that these subunits are anchored at or near the N-terminal Zn-fold of Rpc1, itself prolonged by a highly conserved but RNA polymerase III-specific domain [1]. 28.90 28.90 28.90 28.90 27.40 28.60 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.97 0.70 -4.42 17 327 2009-01-15 18:05:59 2008-10-02 11:50:45 3 7 238 0 222 301 0 213.80 25 92.96 CHANGED MS.......hRsuutsshs...........shsslsashs..tth..stssss.aPph.slPls..t.ssscEchhsthhhsatst.h+-uPaYTssh..t...............t.tt.............t...pcGlpRYSDKYppp++hssolcp..cP.aphchFPcELatshthsppt.pth.......s+h+pcssl.ph.......p.D.s.thphLE.Khpp..sp-.-ctpptct...........---cp-E-.D-Da....D---D..sDY.AEpYFDsG-t......DDhsD...................................tssa ....................................................................................................Mu........RGtu.tut.hsh................shptls.h.sts...t...................sh...pP.......s....slFP.........sh...hP.ls.....h.hsp.p-chhlthttphppt.h.+puPaahtst...t.....................................................................ppslcRYSc.+.Y.pt..t.p............t.....ph.ph........ps..hs..h..chhPp.ELhtphph...pp.p.t.........t.h..h....................sphttp..sh..t.............................t-....p..ch...p...t....L..EtKt..p.p...t.s.....p.-p-ptpptpp......................cccpc--...t..t---h.....-.--.p.-..tsDY.hppYFD.sG-s......D.D..-..............ts.................................. 0 59 105 164 +11538 PF11706 zf-CGNR CGNR zinc finger Bateman A anon Pfam-B_19432 (release 10.0) Domain This family consists of a C-terminal zinc finger domain. It seems likely to be DNA-binding given the conservation of many positively charged residues. The domain is named after a highly conserved motif found in many members of the family. 20.80 20.80 21.10 20.90 20.50 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.56 0.72 -4.45 80 961 2009-01-15 18:05:59 2008-10-03 09:25:53 3 5 409 1 366 972 15 43.60 41 22.67 CHANGED Rl+tC...ssssCshhFlDpS+sssRRWCSM.......spCGNRsKsspaRtRp ............RL+tC.....ts.ssCphhFlDpS+stpRRWCoh.......psCGNRt+sttaRsRp........ 0 143 279 331 +11539 PF11707 Npa1 Ribosome 60S biogenesis N-terminal Wood V, Coggill P anon Pfam-B_2493 (release 23.0) Family Npa1p is required for ribosome biogenesis and operates in the same functional environment as Rsa3p and Dbp6p during early maturation of 60S ribosomal subunits [1]. The protein partners of Npa1p include eight putative helicases as well as the novel Npa2p factor. Npa1p can also associate with a subset of H/ACA and C/D small nucleolar RNPs (snoRNPs) involved in the chemical modification of residues in the vicinity of the peptidyl transferase centre [2]. The protein has also been referred to as Urb1, and this domain at the N-terminal is one of several conserved regions along the length. 22.10 22.10 22.40 23.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.91 0.70 -5.44 42 265 2009-01-15 18:05:59 2008-10-03 11:36:46 3 8 231 0 187 273 0 302.70 22 18.51 CHANGED sths-lhpsWshhspsss..shhshlsplLsthlthlssphp....hpphG.sl................s+hlLpp..c......hchl.h+uLsutc.............plhp..ssLcLLsplVp.suuthAcclhssh..ch..shphh.phh..pppphtp.ptt..............olRpshlcFhLuhLphs.ssth+pclLpp+ch.hsshhctlpp.DshpslhplLpslcscllp-p.sls+stKsclasphsLp+lss.LYthcsp........................slschs+chLhtlsTssphGlhhssstha.tt......................................h.ppphplpNphL.hpllp.tL+PtpsshptpL.......llpIlcssPELlusYhsch.tphp....P+hossWhuts ......................................................h....phhphaphhtt.tpp.....h.hl.thLthhlhhhssphp......ht.hu.tl....................s+hllpp.c......hchl.csLsupp...............phhp...ssLpLLsth...Vs.pusthAcplhpph..ch..shpslsphhphpctphtt....................................slRpshlpFhLuhLth.....s..ssthhpplLpp...+ph...hsshhct.lpp.Dshphlh.lLpsLcppll.............p..........s.............p.sls+..stKsphFspt..sL.pplss..LYshpst...................................................................t..tltchsHphLhtlC...o...s.p.pGlhh.ss....................................................t.tp.th.tht.NthL.hphlp...hl.+.shp...ss.hptcL.......llpIhcssP-LlssYhpph.hpht....P+ho.stWhs........................ 0 56 98 151 +11540 PF11708 Slu7 Pre-mRNA splicing Prp18-interacting factor Wood V, Coggill P anon Pfam-B_999 (release 23.0) Domain The spliceosome, an assembly of snRNAs (U1, U2, U4/U6, and U5) and proteins, catalyses the excision of introns from pre-mRNAs in two successive trans-esterification reactions. Step 2 depends upon integral spliceosome constituents such as U5 snRNA and Prp8 and non-spliceosomal proteins Prp16, Slu7, Prp18, and Prp22. ATP hydrolysis by the DEAH-box enzyme Prp16 promotes a conformational change in the spliceosome that leads to protection of the 3'ss from targeted RNase H cleavage. This change, which probably reflects binding of the 3'ss PyAG in the catalytic centre of the spliceosome, requires the ordered recruitment of Slu7, Prp18, and Prp22 to the spliceosome. There is a close functional relationship between Prp8, Prp18, and Slu7, and Prp18 interacts with Slu7, so that together they recruit Prp22 to the spliceosome. Most members of the family carry a zinc-finger of the CCHC-type upstream of this domain. 22.90 22.90 24.80 23.10 21.90 21.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.99 0.70 -4.61 41 344 2009-01-15 18:05:59 2008-10-03 11:54:42 3 8 276 0 246 345 2 231.90 39 50.39 CHANGED phsaDuKRDRWsGYDsspacpVl-ca-ph-pt+cphptpp.................................sctttcpspst-ptpthhccs-h...................spppps..ssRsLRlREDTAKYLhNLcssSA.......hYDPKoRsh+-ss........t.ssthhts-sFlR.toG-....utchpchptaAWEstc+Gs...............................slclpAsPTphElhtKchppc.+....-pt+ppp+pslhcKYGspp...h.t..hp.hlspoEphhchpt.......httpths+SpatEDlhhpsHspVWGS .......................hsaDuKRDRWNGYDs.p.-a.p.p.ll.--.Ypch-tt++ph+tpp.......................................................................................sptpt-p.spps-p.cpphs-c.-hs...........................phssppphssRNLRIR.EDsAKYLhNLDs.sSA...........hYDPKTRuM+-ss..h..............tts.sts.asu-NFhR.hoG-....shphtphQhaAWEu.c+Gs..........................................................................clHlpAsPTphEhLhKphc.c+.......-ph+pp....p+psll-KYGupc...thp.s.s.pc.....h...hl.spoEpaVEYsctGt.....................lKGt.+thspSKYtEDlhhsNHTslWGS.................................. 0 88 139 206 +11541 PF11709 Mit_ribos_Mrp51 Mitochondrial ribosomal protein subunit Wood V, Coggill P anon Pfam-B_4972 (release 23.0) Family This family is the mitochondrial ribosomal small-subunit protein Mrp51. Its function is not entirely clear, but deletion of the MRP51 gene completely blocked mitochondrial gene expression. 21.80 21.80 22.10 21.80 20.10 19.80 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.85 0.70 -5.21 20 145 2009-01-15 18:05:59 2008-10-03 12:51:24 3 4 133 0 108 143 0 327.90 26 79.88 CHANGED Mutt.......husLLRsSRlAplspsh.sh.psstp.............hPTHQlItT.sSShtR.u-WGLKpsLPs+hso.....palphN-lDThcclsDaEssu.saphsph+FQEhGlslsts................sppsPlFpstsspo..sppsshpshhs...........ph.hhsshosuchpchLpp....lssh+scFppaLhcptsct.................................................hsspsltchltcFLs..............................tsspphsspusschpuouGLSYs.pGpL.........psoPNGVptphlsPGRhl.....psscp.hsAlGGhVAsssss..................tsshhpthsschsRptph.aclpcAplsp.sGplhltssss .....................................s.......p.hupLLRpSRlhtlspsh...ststtp................aPh+QsItT.sSohtR.u-WGLKpslPtK.ss...............shlpls.p.lDThEpl.T-Fcssu.spthsht+aQEhslPlshsh..................................................................................spp.pshFcsp.css...sts.shts.ht..................................................................phsh...hsshots-hpchLp+....l+ph+scFpphLhcphspp.....hp....................................................................tssttltsh.....lhcFLsh.s.................................................tsps.hs..ppssspspsouGLSY.pssth.................ppp.....psl...s+hl......hPtphh......tttpt..hhuluGhlsps..t........................t.stht..th.ssh..tttph.hps.pAplsp.pGpl.ltst.s.................................................................................................... 1 29 59 91 +11542 PF11710 Git3 G protein-coupled glucose receptor regulating Gpa2 Wood V, Coggill P anon Pfam-B_11657 (release 23.0) Domain Git3 is one of six proteins required for glucose-triggered adenylate cyclase activation, and is a G protein-coupled receptor responsible for the activation of adenylate cyclase through Gpa2 - heterotrimeric G protein alpha subunit, part of the glucose-detection pathway. Git3 contains seven predicted transmembrane domains, a third cytoplasmic loop and a cytoplasmic tail [1]. This is the conserved N-terminus of these proteins, and the C-terminal conserved region is now in family Git3_C. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.37 0.71 -4.76 19 259 2012-10-03 04:04:29 2008-10-03 13:01:14 3 4 124 0 213 414 3 195.20 24 38.37 CHANGED sht.RlluIsuSshSlhusllulYhhhsIs.RR+.............lFRHp............LIhhLlhhDhlKAllLllYPshlLs...psssYssssFCpllGFFTAhuIEGu.................DlAIlsFAlHhulLIF+Ps............pssphEGGLY+aRhalYslsh.llPlllASLAFIs.............................tsGYpshssWCYLPscPhWYRLVLSWlPRYhIhlsIhsIYhsIYhaVp+chcpl ............................................................................................................hh.......l..shu..shu.lh.u...h..h..h.s...h...hh..h.h.h.h.....p.h.h...................haR.+...p.............................................................LIhs....Ll.lu...Dhhpulsh.l.l........sh..hhlt....ps..t..l..h..s..s...s..s..h.C.hpGahhp...hu.s....uo.................s.hhllsIAlH.T.a.lhlhps.......................ht.h....p....hh.l....as.h.sh....hl......lh..hs..s.L.s..h.ls.............................ttts..Y...s..s.s.Gs.WC..Wl..ss.....c....h...................h....h...RLh..lpal...h.alh..hhs.hl.hl.Yshl.ahhl++pht..h................................................................................... 3 53 120 185 +11543 PF11711 Tim54 Inner membrane protein import complex subunit Tim54 Wood V, Coggill P anon Pfam-B_3533 (release 23.0) Family Mitochondrial function depends on the import of hundreds of different proteins synthesised in the cytosol. Protein import is a multi-step pathway which includes the binding of precursor proteins to surface receptors, translocation of the precursor across one or both mitochondrial membranes, and folding and assembly of the imported protein inside the mitochondrion. Most precursor proteins carry amino-terminal targeting signals, called pre-sequences, and are imported into mitochondria via import complexes located in both the outer and the inner membrane (IM). The IM complex, TIM, is made up of at least two proteins which mediate translocation of proteins into the matrix by removing their signal peptide and another pair of proteins, Tim54 and Tim22, that insert the polytopic proteins, that carry internal targetting information, into the inner membrane [1]. 32.50 32.50 32.70 33.40 31.70 32.00 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.24 0.70 -5.87 17 146 2009-09-10 21:07:40 2008-10-03 14:14:01 3 5 135 0 114 155 0 370.20 34 85.84 CHANGED NPAL+hhGl.Ps....h..+LPSRNWMIFaolsuuhsuulhYD+tEp+chppKWschVc+lucEthsssp....hsRKlTVFlAsPPs.DhLcsuhchF+cYVKPlLsAAuLDY-llptcRpG-IRAtlAERIRclRRcth......................................tspst.sh-c..sppccslhshRphhGlh.....................-.ssstGslsIGRtsaKEYlpGLHEGhLGPL-.P.........................tspsttssst...pss.............tt.t.c..tssp...ptp......-s.-ccpp...........ssstP.aIsss-YssssLsPph..................P.hh.pPshslPhPpllGFLNhPh+lYRFhs+RhhA--huccsAulV.ss.hRPap.ts.................................tphhpcEEp-W.Kp............V+ptcE.........KcpEWlp............-lVhDsRlssRM+hap ....................................................................NPAh+..hhGl.Ps.......h..+LPS....RNW..hIFholsuuhsuulhYD++cp++hpp+atchVtplucEsl.ss.sp............hPRKlTValusPPu.D.t.LcsuhcaF+cYVKPlLsAuuLDaEllpuc+pG-lRstlAp+IRchR+phtt..............................................................-.....ptpp....h.phR.ph.hGlh.....................................-..hstGslslGRtsaKEYlpGlHEGh..LGPLp.P............................................................t.s.t..............................................tt.t.p........t...tp....p.tt....pt.pccpp....................p.pp.s.stP..alpsspYsst.pls.sph..............................................................P..h...pPshslshPpllGFhs.hPhRlaRF.hs+RhhA-plucpssslVhss...hRPaptsst..............................................................................................................................................................tphhp.pEEp-.W.Ks..............Vcptp-............................+pppWhp..............lshDsRlsp+Mpha............................................................................................................................ 0 32 64 97 +11544 PF11712 Vma12 Endoplasmic reticulum-based factor for assembly of V-ATPase Wood V, Coggill P anon Pfam-B_2410 (release 23.0) Family The yeast vacuolar proton-translocating ATPase (V-ATPase) is the best characterised member of the V-ATPase family. A total of thirteen genes are required for encoding the subunits of the enzyme complex itself and an additional three for providing factors necessary for the assembly of the whole. Vma12 is one of these latter, all three of which are localised to the endoplasmic reticulum [1]. 25.50 25.50 25.90 25.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.75 0.71 -4.48 35 291 2009-01-15 18:05:59 2008-10-03 15:33:25 3 9 252 0 209 275 2 136.80 23 54.40 CHANGED ppLLcuo..cl.alPs.s.psc.............................................osEahApht+L+tttcpccYp+hh........t................................st.t...sp.stthcpl+pQlssllNlllSVlusshulahhsp..h.............................shshssRlLLulhsullVhlAEVslYst.....Ylp+lccA+ ..................................................................pLLcsocl.hlP.........s.c.............................................ssEhhAphp+L+tptppccYpchs+ssss.pp.........................................p...sp..s.pphcsl..+p...plhhlhNhllolhushhssahhsp.h.h................................hshssRlllulhsuhlVhlAElhlhst.hhpp.t................................... 0 66 110 171 +11545 PF11713 Peptidase_C80 Peptidase C80 family Rawlings ND, Bateman A anon Rawlings ND Domain This family belongs to cysteine peptidase family C80. 19.70 19.70 19.70 20.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.92 0.71 -4.42 23 303 2012-10-03 02:24:44 2008-10-03 15:44:17 3 64 136 14 38 286 6 155.10 33 7.28 CHANGED spastplIlQL..................psDslstcAAt....sLhuK+PssSlllQhss-......Gsh+slaus...............sstL.....sGpl+hQlVGHGcs....t.ssspLuGhsscpLAstl.....pphppthsssh......pPc..+IoLVGCshsus......ptpsuaspchhptlpsp.Gl........pssVSAppucVpV............-tpG+Kcsl ............................+achQlIlQh..................psDslst+.AAs....sLAuKHsp..sSlllQhDs-..........usaRlhYGs.....................sspL......sGK.lRWQlVGHG+s.........ptspspL.u.Ghss-pLAscL.....pphppttphss......pPc..+I..slVGCulsus..........spppuFupphhpthptp.uh........phslsu.ptcltl............stt.G++.h.h................................................................ 0 18 28 35 +11546 PF11714 Inhibitor_I53 Thrombin inhibitor Madanin Rawlings ND, Bateman A anon Rawlings ND Domain Members of this family are the peptidase inhibitor madanin proteins. These proteins were isolated from tick saliva [1]. 25.00 25.00 26.30 26.10 19.10 18.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.75 0.72 -4.00 3 5 2009-01-15 18:05:59 2008-10-03 16:12:31 3 1 2 0 0 5 0 78.20 59 98.99 CHANGED MKHFAILILAVVASAVVMAYPERDSAK.DGNQEQERALhVKVQERoDs.DADYDEYDEDGTTPTPDPTAPTARPRLRGNQ MKHFAILILAVVASAVVMAYPERDSAK.DGNQEKERALhVKVQERs-G.DADYDEYDpDuT..TPTPDPTAPTARPRLRtNQ.... 0 0 0 0 +11547 PF11715 Nup160 Nucleoporin Nup120/160 Wood V, Coggill P anon Pfam-B_1841 (release 23.0) Family Nup120 is conserved from fungi to plants to humans, and is homologous with the Nup160 of vertebrates. The nuclear core complex, or NPC, mediates macromolecular transport across the nuclear envelope. Deletion of the NUP120 gene causes clustering of NPCs at one side of the nuclear envelope, moderate nucleolar fragmentation and slower cell growth [1]. The vertebrate NPC is estimated to contain between 30 and 60 different proteins. most of which are not known. Two important ones in creating the nucleoporin basket are Nup98 and Nup153, and Nup120, in conjunction with Nup 133, interacts with these two and itself plays a role in mRNA export [2]. Nup160, Nup133, Nup96, and Nup107 are all targets of phosphorylation. The phosphorylation sites are clustered mainly at the N-terminal regions of these proteins, which are predicted to be natively disordered. The entire Nup107-160 subcomplex is stable throughout the cell cycle, thus it seems unlikely that phosphorylation affects interactions within the Nup107-160 subcomplex, but rather that it regulates the association of the subcomplex with the NPC and other proteins [2]. 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 547 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.63 0.70 -6.38 25 339 2012-10-05 17:30:43 2008-10-03 16:27:08 3 28 235 9 234 658 13 407.00 17 36.15 CHANGED lslss....................tstsst.pssspsEcshtpchhsosu.....hhppshsRhlhWRhh.....ssscsLplpslslspsh.p..........sl+lpFspslh...ssslshs-ppsp......slhlhshssohplasLpltsshapp...............sshs-pshp-hspphsssshshptsthhsss........sssplhluhssGullclp+ps......pustapphphsstsh.ttlpshh.h........tttpht.ssssuhshss..htspsalh...slshcppL+lWslsotph....h..h............................st....tp.hphhhsssp.....sshhplh.....h.p.tpsstphhslhhus.psupFthaplp.ss....sss.hhps.hst........t.........slhshphp.hchs.sshclWsLW.cssssh.lpplphs.p................................tshpsshs.pssss.thhps.t..sDst.-palchlhpsu+aotpsl......cpALphapcsht.hpsps.s........sLpcclssslsuplphct..pt.h..........s.ppatptssppWp+FathstchccptpcPluLsl.sh....sss......shllpsssluhl..RsssshEpl .....................................................................................................h.....................................................................h.hp..........tphltl....shtht..t........................................lphph.ts.h.....sh.h....tp.ttt...................hh.lhhhs.ps....h.hhhh.h....th..t.....................................th...t..p..p......shsh..s..hhh....................................s.t...hhls..h.t.....sGslhhl.p..h.......................s.hh......hhptt.h...t.lp..shh.h.........................tt....shshshsh..p..........p.p.s............a.........lh..............sls.sppL+hWshpstph...............................................................................................h.s..t....................t...h.h..........................................hh.hhh.h........t...tu.p.hhhhth..s................t..............................sl.t..t...t.....hp..........t............p.t.....p.....lasLW..p...s.pst...h..l..h.hph...t.p...................................................................................t.................t.............cs..phahp.lhts.sh..as.thl....................tpuL....hap.p...........................................ltpplh.slttph..p.p.............................tth.t...t.W.pahthh.php.thtts.h..ul.h..............th........................................hllhtt..huhh..h..t.h-................................................................................................................................................ 1 67 116 193 +11548 PF11716 MDMPI_N Mycothiol maleylpyruvate isomerase N-terminal domain Bateman A anon Bateman A Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.85 0.71 -3.75 100 2030 2012-10-02 14:44:17 2008-10-08 12:55:26 3 16 384 2 776 2315 367 128.60 18 55.18 CHANGED ltsttpplsshlssLs.csp..hssPos.ssGWTltclluH...lststphhsth.............................h....ts...sh.t...........................stthpttsstssssllsthcpstsphtsshtshsspthhhhh....hssts......hhttphhEshlHthDls ..........................................................h.....tthsshlssls..ss..p.......h...s.s...P.....os..s...s.G..........W..........o..........lpcllsH.....ls.t.s...t.....p..hht..s.h..........................................h...........ts.............................................................................ttthp..t..t..s..s..h..s.s.s..p..l...h......st..hp.p...s..h..t...t...h.......h..t.t....h.....tt..h...........s...t..t.....................h.............s..h.........s..........hhhhhhh-hhhHt.Dl.................................................................................................................................. 0 295 606 732 +11549 PF11717 Tudor-knot RNA binding activity-knot of a chromodomain Wood V, Coggill P anon Pfam-B_4165 (release 22.0) Family This is a novel knotted tudor domain which is required for binding to RNA. The know influences the loop conformation of the helical turn Ht2 - residues 61-6 3- that is located at the side opposite the knot in the tudor domain-chromodomain; stabilisation of Ht2 is essential for RNA binding [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.87 0.72 -4.26 56 1173 2012-10-02 16:56:36 2008-10-08 14:59:09 3 25 331 25 766 1153 19 57.70 30 10.62 CHANGED ltlGs....+lhsp+..............csph+hAcIlpt+h.....ppsp.....................hcYYVHYhsaN+RLDEWVstcRlsh .............................................................................h...Gp..+lhshc...........sshhacAcllp.hc..........ppsp.....................................................hpYa.VHYh.GaN.+R..........hDEWVstsRl..h...... 0 255 376 601 +11550 PF11718 CPSF73-100_C Pre-mRNA 3'-end-processing endonuclease polyadenylation factor C-term Wood V, Coggill P anon Pfam-B_2254 (release 23.0) Domain This is the C-terminal conserved region of the pre-mRNA 3'-end-processing of the polyadenylation factor CPSF-73/CPSF-100 proteins. The exact function of this domain is not known. 19.90 19.90 21.50 21.50 18.80 17.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.29 0.70 -4.98 35 365 2009-01-15 18:05:59 2008-10-08 15:00:35 3 12 290 0 246 354 0 216.70 26 31.03 CHANGED cGp..hl...SGlLVpps..FshpLluPsDL+E.YosLoToslsp+Qslplsus.h-LlpatLcphF.G....tl-.lsccc......................................thlVMsslslhhcppp........lplEWpu.NhhNDslADoVhAlLlsl-SSPAuVKtouppp..p.............................ptp.s.ppphcplhhhLcspFGss.hs.hppsc.....................hhlphspppuslsh......pshss.-sp.......scsLcpclpshlpRhpphstPl .....................................................................................................sp.hlSG.l.L.Vpcs..Fshp.lhuPpDLpp..YosLsso...slp.......Q+Qslshsus....hs..L...lp..a..tLppha.G....slEplpppc...................................................................................................t.shhVh.t........s.lslphpsst...........lhlEWtu.s.hNDhlADuVhslllplpus.PtssKtssp...t...t........................................................................................t.tp.....p.p.....hhcpl.hhLps.FGcs..h..htpsp.......................................................................................................h.lpl.stppAplsh.................pshpl...csp........spsLcphlp.hhp+hhphhhP............................................................................................................................ 0 86 140 205 +11551 PF11719 Drc1-Sld2 DNA replication and checkpoint protein Wood V. Coggill P anon Pfam-B_1966 (release 23.0) Family Genome duplication is precisely regulated by cyclin-dependent kinases CDKs, which bring about the onset of S phase by activating replication origins and then prevent relicensing of origins until mitosis is completed. The optimum sequence motif for CDK phosphorylation is S/T-P-K/R-K/R, and Drc1-Sld2 is found to have at least 11 potential phosphorylation sites. Drc1 is required for DNA synthesis and S-M replication checkpoint control. Drc1 associates with Cdc2 and is phosphorylated at the onset of S phase when Cdc2 is activated. Thus Cdc2 promotes DNA replication by phosphorylating Drc1 and regulating its association with Cut5 [1]. Sld2 and Sld3 represent the minimal set of S-CDK substrates required for DNA replication [2]. 28.00 28.00 28.20 29.10 27.80 27.90 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.87 0.70 -5.39 23 196 2009-09-11 16:31:55 2008-10-08 17:09:09 3 11 170 1 145 183 0 323.50 21 46.24 CHANGED hspL+sELKpWE+sFtspN.GRcPsRsDIKpsPEIutKYKpYscLKs..supts..............pspKRcp...............sh....psspTPpKs.............................t.p...t....p....ps.................shcstlGPTPQtsGKsLuLF.Dhh.ps.t......................ssuoPoss+hsospssss...........................pTPoKpctsh...............tt.tsptc.ssp...tcTPhssscchhlst.h........p...............................................hpTPsaL+R..............pp.sh.s.t.................................hSPpPl+..p+.hs....KuLSplsp.....Ec-plc--h-lLcEhEsEpt.................................p.tppphsDspss..................................................tsp.spshKKK..sQKRoTR+V+M+Psh.tcspspsphspsDhccE...........h..................................................................shhcpcsho-hhsph...t.p..sppcctpcscsptcpp.............tpspt.+K.............lpssup...Na+RLKlps+..tptp..hpuRa.pRR ......................h.tpl+hcLKpWE+sFtppp.GR+Ps+pDI.K.p.s.s.c.ltthYKp.Ypp....L+p..ttps...............................................t.pctpt..............................s.psPtpt......................................................................................hss..s.........................................................................................................os.p................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 39 70 114 +11552 PF11720 Inhibitor_I78 Peptidase inhibitor I78 family Bateman A, Rawlings ND anon Bateman A Family This family includes Aspergillus elastase inhibitor and belongs to MEROPS peptidase inhibitor family I78. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.87 0.72 -4.24 8 389 2012-10-01 19:32:51 2008-10-13 17:23:26 3 4 345 0 161 477 26 62.50 29 58.57 CHANGED CsApshQsLVGQPhssspssph.....spslRVl+PGDhlTMDYpssRLNlhlDtsssIosl+CG ........................C.st.hpthlGp.s..h...os...t.h.p..ph..........hsptsR..llp...P..G...sh.lTMD.......apscRLNlplD.s.s.spl.pplpCG.......... 0 32 86 132 +11553 PF11721 Malectin Di-glucose binding within endoplasmic reticulum Finn R, Coggill P anon Pfam-B_783 (release 23.0) pdb_2jwp Domain Malectin is a membrane-anchored protein of the endoplasmic reticulum that recognises and binds Glc2-N-glycan. It carries a signal peptide from residues 1-26, a C-terminal transmembrane helix from residues 255-274, and a highly conserved central part of approximately 190 residues followed by an acidic, glutamate-rich region. Carbohydrate-binding is mediated by the four aromatic residues, Y67, Y89, Y116, and F117 and the aspartate at D186. NMR-based ligand-screening studies has shown binding of the protein to maltose and related oligosaccharides, on the basis of which the protein has been designated "malectin", and its endogenous ligand is found to be Glc2-high-mannose N-glycan [1]. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.30 0.71 -4.49 62 826 2012-10-01 23:47:32 2008-10-15 13:30:42 3 160 245 3 487 1418 44 162.90 26 22.05 CHANGED hshtlNsGGsph....tsssGhhYpsDpt..suss..ph..t................................ht.stshhtspsp.....pLYpotRh.......us..o..hpYhh...........hlpsGs..YslpL+FAElhass......tthGcRlFDlhlpu.phllcsaDIhp..cusGhs.tsshcpa.ss.sl.ssstLp.......Iphhhhtpsp..l....ushs.Ph.IuAI .................................................................................htlNsGutth........s.ts...h...hapsDt.....ssst.......h..t...........................................htp.st.th..s.stss.......tL..apTt...Rh..........u.t...o.....hp.Y..hh................h..ht..sG..s...YsV.........pLpFAElhass................tthGp.........R..l.........FDlhl...p.........G..p.............h.....lhc-...h...D..I.....hp.......psG.....uts......sshcp.h.....ss..sV..ss.s.t.Lp.......lc.hh......ht..st........uhhs.PhlsAl.................................................... 0 154 331 413 +11554 PF11722 zf-TRM13_CCCH CCCH zinc finger in TRM13 protein Andreeva A, Bateman A anon Andreeva A Domain This domain is found at the N-terminus of TRM13 methyltransferase proteins. It is presumed to be a zinc binding domain. 20.90 20.90 20.90 22.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.61 0.72 -4.57 27 181 2009-01-15 18:05:59 2008-10-20 16:55:17 3 11 158 0 116 172 2 30.90 40 6.89 CHANGED thpCpaal.+K+.R....hC+Mtt.ttGspaCspHts ........pCpaalt.+K+.R....aC+Mss..tsGp...paCGEHt.... 0 38 62 97 +11555 PF11723 Aromatic_hydrox Homotrimeric ring hydroxylase Radauer C, Mistry J anon Pfam-B_24837 (release 22.0) Domain This domain is found on aromatic hydroxylating enzymes such as 2-oxo-1,2-dihydroquinoline 8-monooxygenase from Pseudomonas putida and carbazole 1,9a-dioxygenase from Janthinobacterium. These enzymes are homotrimers and are distantly related to the typical oxygenase [2]. This domain is found C terminal to the Rieske domain which binds an iron-sulphur cluster. 20.50 20.50 20.60 20.90 20.40 19.00 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.59 0.70 -5.07 7 55 2012-10-02 19:24:03 2008-11-07 17:10:43 3 3 44 49 16 58 6 216.30 38 59.09 CHANGED aVGDu-...PsPLtcDlPPs............FLD-Dhtlh.G.+RhlpuNWRLulENGFDsGHlaIHpsS.hVhusDhAlPLGF..tP.uDcsth..shs-s.suPKGlhchh......scHh.PlFEusl.GpsshtGphh..pthVu...............lplSlWLPGVLKV-sFP.PshhpaEaYVPIDEsoa.YaphLu+hssocE-tcsFcsEa.cpcW+chAlpsFNDDDlhAR-uMpsFYusDpGWscEhLapsDtsIltWR+LsSp+sRGIQ ......................alGDt-....PPPLscDlPsp..................hLD-shslh.GhpchltuNWRlusENGFDsuH.l.alH+sShllcspDhsLPLG..h..hP..sspst...ts..-t..stPKGlhshh......s-phhP.lh-shh.G..shtss.t.....hhs...............hp....hSlWLPGVL+VpsaPtPchsQaEaY.VPID-spH.Yaphls+hssscc-tpsFctca....cpt...ahsh...sLpGFNssDlaAREuhpsFYs-spGWspE.LhcsDhuIltWR+LAScasRGI........................ 0 7 13 15 +11556 PF11724 YvbH_ext YvbH-like oligomerisation region Bateman A anon Bateman A Family This region is found at the C-terminus of a group of bacterial PH domains. This region is composed of a helical hairpin that appears to mediate oligomerisation based on the known structure. This elaboration of the bacterial PH domain is only found in Bacillales. 21.50 21.50 22.50 26.10 20.10 21.40 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.92 0.72 -4.29 7 123 2009-01-15 18:05:59 2008-11-09 20:00:03 3 2 123 6 16 66 0 60.40 65 30.21 CHANGED -aAcpSLshAsslhsphR.t.csslsppFKchschAFsWLssshppashKDFG.VF-KYIpN ........QYAHKSLDMASNAFSR.ISNA.QVNLAEQFKEMNEIAFNWLVDTKKQYNVKDYGFVFEKFINN... 0 9 12 14 +11557 PF11725 AvrE Pathogenicity factor Mistry J, Coggill P anon Pfam-B_735 (release 23.0) Family This family is secreted by gram-negative Gammaproteobacteria such as Pseudomonas syringae of tomato and the fire blight plant pathogen Erwinia amylovora, amongst others. It is an essential pathogenicity factor of approximately 198 kDa. Its injection into the host-plant is dependent upon the bacterial type III or Hrp secretion system [1]. The family is long and carries a number of predicted functional regions, including an ERMS or endoplasmic reticulum membrane retention signal at both the C- and the N-termini, a leucine-zipper motif from residues 539-560, and a nuclear localisation signal at 1358-1361. this conserved AvrE-family of effectors is among the few that are required for full virulence of many phytopathogenic pseudomonads, erwinias and pantoeas [2]. 18.10 18.10 19.80 19.70 17.80 18.00 hmmbuild -o /dev/null HMM SEED 1774 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.70 0.69 -14.34 0.69 -7.55 9 200 2009-01-15 18:05:59 2008-11-11 16:24:21 3 3 58 0 14 220 0 959.80 27 91.93 CHANGED Mp....h.slspspsssltssssssssssssLpQ.pspsssQpuutSLsu.G.+shtphsplpQ..........pspussppsAs.....................sPps.ss.tts......tt.phtp.ht...p....p...st....p.t.........................................ph.ts.lsp.u................t.Phsptpssssshstpt.ssst.ts.stssQpsssuc.ct.pppthttpcsssPt...........t.hpstLstss...tuphchpspph.pshpspss............IoLDscGK.pFushpsPAlssLLcppLGKssQsYLAHpusssup....pHhLL-spGHLhcltps-suhull+SSpssshh...utscsssVpLppcssplpls...pupspts.sLPGcAHhAhLoGl.......phsssG-plRlHDDKlaphss.huhWp...ssc-ssaSpLospGDGplYA.+sscsLsNLSSsp..hschhlcclpSFSVu.cGpVAlLospDops.pLs.Mssl.us.ts+hphoLcLss.......Gcu..pApuVuLos-RLFlADo-G+LYSAsRsphcss.-s.LchhPpps.....spthGtccploGFhosDcGQlpALlKsp.GphHupsLs.pssphcsGWNLoDuLVl-NphGLssh.sPssccpLsLsRtGplGLp-G+lphaDusTcsWc-Aul.DhcpLpRGhDusAYlLK-Gcl+cLsls.cpsshsaucssshA.sthpsKsEhGsAlpGL-.scslpAhAhlusppFlALDDpGclpuHp.KsGp......PhsLo+cGLsG-lKslsLDccpNLaAlTpsGcLFphsREsWQssc.us.....................................................upWptlshP.sppslcuLtssscpplssplcD.......uhhpL+utpWpshps+.....sstpsuhpslFsRLspuhKshpIPGTGlTl+hssphhG+oGhEppp.+SphuEhlRAalapsThpsPRPhKNsu.plQHRapGR-GLptVYchpuhLhKQL-...pssupssssupDLpo+lspLD..LG.cGtpLlc-LcpFR-ELEspuppAlhhlG..........QppGVLNppGplscptK.....tt.tthspthNhppSG+DLsKtLpsAlppsuPSspssstpLLpphhssGlchSHpKuDhPhGppRDssDcpuLoKuRLsLDlVTLs-LtcLlDKAchlospss..stlppLcpcLssLR-ppYG-NPVKphTDMGFTcNtALEusYDuVKuFlNAF+KtcHAVSVshRsshtopsQu-LAcKhKssLhpL-pG.-pIuhoRSYGssLoTsFlsh..schsssshPuAGhTssRNYsLshpRs-GGlsVhhhR-GusssosuVuuG+DlhPhhsscspsAc..Ss.lusc++hsPshRlGusloushptoppsulsFsls---lsuFlDsLhpGpLsPLplLcKGh-Hps+pu++hsFslssuuss-LRsGlslo-.......ssScPhoAssRsusGhssosNLhohoctp.TQcs-pophppuScNRPRFhNssshGAphpsplusuHss.pts.............sootst.psGo.sAhssssVssslulDsRTspplphchKpApPlTspslscLopoLGcsFKDsAopthLucLtc.psuc.....Pc-....pLcuLpchFus+sspsDcQYcALRsLK+hsscpcAu.scpusLDsARapTsYTNLS+LsppulhshIhsplpuAhsPoNAsRluchMspDPtLKullKphQuSsuT.ApVpLE.KDsLhD+l-cG.h-GplspcElusLhpDRNNhRI+ulsV.posspoEGFsoPTsLlSsssSAulShsKsLGpINFhYGpDQ-pPpuaTl-GElu+ssssltsAhscLKK-GhEhKS ........................................................................................................hh.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sth.............t..tt.hp..tttla.hp........Wt.......t..t..hs.l.hthpGphh..psst.h.p.ss...............h..s..stssh..t..............................................................t..........h.thsh...t.t..p..lhhhstpsthh.h........................................................................h.hsh.........s.......t.t.pstWphpp..hh..p.Gh........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 4 8 +11558 PF11726 DUF3296 Protein of unknown function (DUF3296) Mistry J, Coggill P anon Pfam-B_768 (release 23.0) Family This family is expressed in Gammaproteobacteria. One of the E coli members is annotated as yagK, but otherwise the function is not known. 22.30 22.30 22.30 23.00 22.10 22.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.31 0.71 -4.71 36 1241 2009-01-15 18:05:59 2008-11-11 16:48:00 3 3 492 0 89 791 27 153.40 27 81.94 CHANGED ppltpslcpslscasRlhslRlDL+hPp..ph.sp.....s..........sslocFhpsLcsplptp.................phhpspltYlWs+Epsp.ss..phHYHlsLhlN.p.a.t.Gphppps.slsspIppAWtpuht.hth....cspsllphs....cps.....tYhls...htpts.ppshc..slhhRlSYLAK..tcoKph.spt..t+sFGsSp .........................................hpphlpthhp.as+......lhshRlDlth.c....s.ssh......st...........................tsls..phhptLps......clp......................p.h.shltYhWshEasp..pt..thHhHhhlhls...............Gph.p.t..........s.........pl..uthIppsWp.psht.....sh..........phtshsp.hs...tps......................................thhlp..............t.s.stpshp..pl.h...p.h.sYLAK......tc..s....Kth...s......psaGpS.................................... 0 20 37 61 +11559 PF11727 ISG65-75 Invariant surface glycoprotein Mistry J, Coggill P anon Pfam-B_785 (release 23.0) Family This family is found in Trypanosome species, and appears to be one of two invariant surface glycoproteins, ISG65 and ISG75. that are found in the mammalian stage of the parasitic protozoan. the sequence suggests the two families are polypeptides with N-terminal signal sequences, hydrophilic extracellular domains, single trans-membrane alpha-helices and short cytoplasmic domains. they are both expressed in the bloodstream form but not in the midgut stage. Both polypeptides are distributed over the entire surface of the parasite [1,2]. 22.70 22.70 22.80 23.80 22.60 22.60 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.08 0.70 -5.35 20 223 2009-01-15 18:05:59 2008-11-11 17:19:18 3 4 11 0 71 209 0 254.70 29 57.37 CHANGED thlhhhhssllhhslshApttph.......pus+KLsp-GApuLCph+cLucpV.scpuDplhccspphsstlptthcplp.hhpplt............c.s-ucpccl+phhpcAKpclpctltp.ucphsccsccptpclKcAApcAht.t.p..tt.pSsGLpcVLsWaCtsp....scst..s..spNC.-slshppch......ssst+ssIsC....sttspss.hpssosssMcpAl-pWspsKP.......K.....c.t.psssstCpp.ts.stspPCTlhEp.WpscYcsuhpclccLEcstcpuppstct .....................................hshhlhsshhslslshupp.s.hs.t.......hss+pLstpuAtsLCpMK+Ll-sV.sspsDph.c.ppshpa.l.ssspt.thp+l..............................S-pcphpsshsps..+hpthpthpp.usphhctht...cptccAKpAAssA..................Ss.u.LpclLcWHChsc....tcs..t.s...ssNC.-sssa+cca........cspphsI....C....phpshssspspsThu.sMctAL-tWsps.KP..........K........hph...susstssp.pusssscPCTlhEs.WhhsYcsstphltcLEpshp.uhtshc.h............................. 0 45 71 71 +11560 PF11728 DUF939_C DUF939 C-terminal domain Bateman A anon Bateman A Domain This region is a presumed intracellular domain found in a set of bacterial presumed transporter proteins. The region is about 160 amino acids in length. 21.50 21.50 21.60 21.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.95 0.71 -4.44 30 916 2009-01-15 18:05:59 2008-11-13 17:09:13 3 3 912 2 73 425 1 166.60 34 52.19 CHANGED h-cclccapppIEpph+pILhchuthLpssctp..hpstplscLcphlccA+slAa+chcNph.h+pcshYh+YF-MRcpQh-lLc+MhsplsslphsscQsphlAchhcclusslppsNsuphhLccLpch+cpFcchsLPpTREEFEsRAsLaQhLp-hEpaLhlKsp ........-cclccY+pplEpph+cILpchs.aLpptps.........c....sp........u.......t.htcLcph....lccAppluap-hcNa.h.h+.....p.s......sYah+...YF-.MRp..pQsclLcpMtt.lsslphss...pputhlApLhpclut....slspsNsusphLcc.l.pchhpha+ph.sLPpTRcEFEsRAsLlQlLpEhcpalplKs..... 0 25 44 59 +11561 PF11729 Capsid-VNN nodavirus capsid protein Mistry J, Coggill P anon Pfam-B_805 (release 23.0) Family The capsid or coat protein of this family is expressed in Nodaviridae, that are ssRNA positive-strand viruses, with no DNA stage. These viruses are the causative agents of viral nervous necrosis in marine fish. 35.30 35.30 35.30 92.30 35.20 35.00 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.16 0.70 -5.48 3 443 2012-10-04 01:49:40 2008-11-17 14:19:14 3 1 133 0 0 217 0 193.10 74 99.75 CHANGED MVRKG-KKLAKsATTKAANPQPRR..RsNNRRRsoRsDAPVuKASTVTGFGRsTNDVHL.SGMSRIuQAVLPAGTGTDGaVVlDpIIVP-LLPRLGHAARIFQRYtV-TLEFEIQPMCPANTGGGYVAGFLPDPT....DuDHTFDALQATRGAVVAKWWESRTVRPQYTRTLLWTSoGKEQRLTSPGRlILLCVGNNTDVVNVSVLCRWSVRLSVPSLEpP.EplAsPILTQGsLYNDSLostYN...................HFcSlLLGSTPIDlAPTGTIF.QLDRPLpIDYSLGTGDVDRAsYWHF+KhuGNls+PAGYhpWGlWDuFN+TFTsGlSYYSDsQPRQILLPV......GTlpsRhDSEN ...............................................................................................................................ANTGGGYVAGFLPDPT....DNDHTFDALQATRGAVVAKWWESRTVRPQYTRTLLWTSo...GKE.QRLTSPGRLlLLCVGsNTDVVNVSVhCRWSVRLSVPS.LETP.E-TTAPIhTQGsLYNDSLuT..N...................DFKSILLGSTsLDIAPDGAVF.QLDRPLSIDYSLGTGDVDRAVYWHLKKFAGsAuTPAGWF+................................................................................ 0 0 0 0 +11562 PF11730 DUF3297 Protein of unknown function (DUF3297) Mistry J, Coggill P anon Pfam-B_797 (release 23.0) Family This family is expressed in Proteobacteria and Actinobacteria. The function is not known. 19.10 19.10 19.10 100.60 17.40 19.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.18 0.72 -4.29 30 320 2009-01-15 18:05:59 2008-11-17 14:20:50 3 1 318 0 106 230 31 70.90 61 82.61 CHANGED PDRLSssPcSPaass-lLpRslGIRFpGpE+TsVEEYClSEGWVRVsAGKAhDR+GpPhTIKL+GsVEsaa .PDRLSsNPRSPaastEsh-+..sIGIRFNGcERoDVEEYCISEGWlKlsusKAhDR+GNPhhlpLKGsVEAaY. 1 15 52 79 +11563 PF11731 Cdd1 Pathogenicity locus Mistry J, Coggill P anon Pfam-B_826 (release 23.0) Family Cdd1 is expressed as part of the pathogenicity locus operon in several different orders of bacteria [1]. Many members of the family are annotated as being putative mitomycin resistance proteins but this could not be confirmed. 25.50 25.50 25.50 25.70 25.40 25.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.91 0.72 -4.06 25 195 2012-10-03 02:11:09 2008-11-17 14:34:06 3 3 188 0 48 189 25 90.70 35 88.25 CHANGED M..........s+s.scs..................................thpcLpcLPNlGcAsApDLphLGIppsppLtGccPhchYpcLschoGpppDPCVlDVFhulspFhpGp..-....spPWWpaTcERKpt ...................................................................................ppLpcLPslGcuhstDLp.hlGIcplssL+GcsPh-lYcch...Cphp.G...pphDPClL.latssVhahpspp......hp.WWpap-cp+t............................. 0 20 33 40 +11564 PF11732 Thoc2 Transcription- and export-related complex subunit Mistry J, Coggill P anon Pfam-B_819 (release 23.0) Family The THO/TREX complex is the transcription- and export-related complex associated with spliceosomes that preferentially deal with spliced mRNAs as opposed to unspliced mRNAs. Thoc2 plays a role in RNA polymerase II (RNA pol II)-dependent transcription and is required for the stability of DNA repeats [1]. In humans, the TRE complex is comprised of the exon-junction-associated proteins Aly/REF and UAP56 together with the THO proteins THOC1 (hHpr1/p84), Thoc2 (hRlr1), THOC3 (hTex1), THOC5 (fSAP79), THOC6 (fSAP35), and THOC7 (fSAP24). Although much evidence indicates that the function of the TREX complex as an adaptor between the mRNA and components of the export machinery is conserved among eukaryotes, in Drosophila the majority of mRNAs can be exported from the nucleus independently of the THO complex [2]. 20.40 20.40 22.10 24.10 19.10 20.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.41 0.72 -4.19 22 297 2009-01-15 18:05:59 2008-11-17 15:09:49 3 5 248 0 207 292 3 75.90 44 4.48 CHANGED luKlsasNPhhshpshLpQIcsYDNlhphVV-ssKYhosLuaDlLsasllppLs.ssRsphppDG.hs.uhWlpuLus ...luKluauNPsllhch....lLsQIpsYDNLIssVV-.sh.KYh.TsLsYDVLsa..........sl..lpuLu....p.s......p.....+p+hpcDs.hshSpWLQuLAo............... 1 67 114 171 +11565 PF11733 NP1-WLL Non-capsid protein NP1 Mistry J, Coggill P anon Pfam-B_837 (release 23.0) Family This family is the non-capsid protein NP1 of the ssDNA, Parvovirinae virus Bocavirus of cattle and humans. 25.00 25.00 96.70 96.00 21.50 19.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.32 0.72 -3.73 3 376 2009-01-15 18:05:59 2008-11-17 17:06:10 3 1 14 0 0 133 0 93.50 90 83.10 CHANGED MSScsTKD+HRSpKRpsSPhR-ERKRsWc.p++SRSRSPIRR+GE+ShGSaRppsQ-s+QSSsTASKsSDpATKTKEsTSG..sTRTNPYTVFSQHR MSSGNMKDKHRSYKRKGSPERGERKRHWQTTHHRSRSRSPIRHSGERGSGSYHQEHPISHLSSCTASKTSDQVMKTRESTSGKKDNRTNPYoVFSpH..... 0 0 0 0 +11566 PF11734 TilS_C TilS substrate C-terminal domain Sammut SJ, Bateman A anon pdb_1ni5 Domain This domain is found in the tRNA(Ile) lysidine synthetase (TilS) protein. 21.00 21.00 21.10 21.10 20.80 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.34 0.72 -4.75 140 2601 2012-10-01 21:04:40 2008-11-18 16:14:21 3 9 2576 4 480 1909 354 70.80 26 16.01 CHANGED lp.lRh+...tuGpchp...hs....+stsRpLK+laQEhslPsWhRs+lPLlah....s-.pL.lsss.Glhlspt.....h.s...........ttt..h....plp...W ................lplRh+...psG-hhph..hG....pssp.+KLK+la.-tplP.ht.R.c.phPl.lh........s.p..pl..lsl....s..Glhhspt.....h................................................. 0 178 312 403 +11567 PF11735 CAP59_mtransfer Cryptococcal mannosyltransferase 1 Mistry J, Coggill P anon Pfam-B_916 (release 23.0) Family The capsule of pathogenic fungi is a complex polysaccharide whose formation is determined by a number of enzymes including, most importantly, alpha-1,3-mannosyltransferase 1, EC:2.4.1.- [1]. 25.00 25.00 25.00 25.00 24.70 24.20 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.54 0.70 -5.15 37 382 2009-09-11 16:30:58 2008-11-19 09:33:18 3 7 108 0 245 384 4 212.70 30 50.86 CHANGED +lFIAhsLa.pstplltstastull....pLhchLGs.....-NVFlSIYEssSsDsopssLp.tLc..LcplslppsIhh..s.tschs..th......................h+RIsaL.............AclRNhALcPL.p....................ttpshtaD+lLaLNDVhFsspDllpLLhsss.................sp..........................................YstsCulDF...sp..s.pF.................YDoassRDhp.Gpth............shshaPaFpss....sspcshstppsl.VpSCW.........sGhsuhcAp.Fhtsp.........................slRFRu....sth....h-uSECsL....IpsDh.....ss ...............................+hahAhsla.pstpll.shhu.sllplhphLG.........................cN.lalS.lhEss.SsDpotshLp..hc..hp.....phsh.ph.lhh.......ppht........................................cRItaLAplRNtAlpP.L.p........................................t.t.s.ta...cpllF..lN.Dlh..hsst..DhlpLlhppp....................tp..........................................................hthsCu..hDa..............t..s.hF.........................................................YDsashRDhp.G.th.......................h.has..hh....t.st.............tppphh...t.t.sh.VhuCW.........sGhsshsst.h.....................................................................lpFRt.....t.......h.tuEspLhh.D.....s....................................... 0 69 141 197 +11568 PF11736 DUF3299 Protein of unknown function (DUF3299) Mistry J, Coggill P anon Pfam-B_876 (release 23.0) Family This is a family of bacterial proteins of unknown function. 21.20 21.20 21.30 21.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.30 61 436 2009-01-15 18:05:59 2008-11-19 10:02:20 3 3 323 0 135 407 201 144.10 36 78.53 CHANGED lsWp-Lh.P..ts.s................t.hspltsts.................thpps.sth.......................................pp.hsuslsppLsGptV+lPGalVPL-.sssttlTEFLLVPYaGACIHVPPPPPNQIVaVphscG.htlps.........LasPlWVpGpLpscphss.-lA........psuYshpusplpsY....c .................................................................sW.-Lh.Pts..p.....h....................p.ts.s.................h.ppt..th.......................................p...suslsppLsGp..pV+lPGallPLE.....s-psplTEFLLV..P.YaGAC.IHVPPPPPNQIlaV+hs.cG...ht.lp.p.........las.slaVpGpL+scph.ss..-LA........psuYphpusplt.Y...................... 0 38 74 105 +11569 PF11737 DUF3300 Protein of unknown function (DUF3300) Mistry J, Coggill P anon Pfam-B_886 (release 23.0) Family This hypothetical bacterial gene product has a long hydrophobic segment and is thus likely to be a membrane protein. 25.00 25.00 26.80 30.50 20.70 24.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.72 0.70 -5.10 71 636 2009-01-15 18:05:59 2008-11-19 10:38:50 3 1 598 0 129 443 35 275.10 57 50.70 CHANGED hopupL-QhlAPIALYPDsLLoQlLhASTYPL-VVpAsRW...psNspLpG-.........shhpAlpsps....WDPSVKuLlAFPplLphMu-clsWTppLGDAFLuQpp-VMsulQpLRppAppsGsLposppQpVsppp.ps...................................IhIEPAsPpVVYVPhYsPslVYG.sW.a.ssYP..Ph.Ya......s.Psh..........sshs..suhuaus.Glsl.uhsh.......a...uu.......hc..Wssc.....................................plslshpphpphstp...........sstpWp..Hs ....FSTAQIDQWVAPVALYPDuLLSQVLMASTYPsNVsQAVQW..S+..D..NPhhQGD.........AAIQAVu..sQP....WDsSVKSLVAFPQLMALMGENPQWVQNLGDAFLAQPQDVMDoVQRLRp..LAQQTGSLKSospQKVlossKpsssst.ssssss...........sshstsssTVIsIEPANP-VVYlPNYNPsVVYG.sW..sN....TAY..P..PV.YL......PPPsGpsF......sDSFV+GFGYSh.GV..ATTYAL......F...SS........ID....WDDDDHcHHccDpcs.t...............htpsussapHNGDNINIsVNNFNRIoGppLsspshs.........W...pss..................................................................................................................... 0 28 54 94 +11570 PF11738 DUF3298 Protein of unknown function (DUF3298) Mistry J, Coggill P anon Pfam-B_854 (release 23.0) Domain This family of bacterial protein C-terminal regions is highly conserved but the function is not known. Several members are annotated as being endo-1,4-beta-xylanase-like, but this could not be confirmed, and the structure can be defined as a heat-shock cognate 70kd protein 44kd ATPase. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.83 0.72 -3.47 347 1368 2009-11-03 10:59:44 2008-11-19 11:12:14 3 22 951 4 301 1129 32 88.50 19 30.96 CHANGED LpD.........l.hps..t.................hhptl......pphlppph.......t.....................pptt..t.................................ptht.shtt.......................................spsah..lssc......u.ls...lhas....Y-l....u........Pauh......G.hh....phplPhpp ................................................................................................................................................sl.hps...s............hhphl.pphlpppl....t.....................pp.pt..............................................................thh..tht....t................................spsFh......lssc........u.ll...lhas.YElu............Pauh...............G.hsphtlPht................................ 0 106 205 258 +11571 PF11739 DctA-YdbH Dicarboxylate transport Mistry J, Coggill P anon Pfam-B_935 (release 23.0) Domain In certain bacterial families this protein is expressed from the ydbH gene, and there is a suggestion that this is a form of DctA or dicarboxylate transport protein. Dicarboxylate transport proteins are found in aerobic bacteria which grow on succinate or other C4-dicarboxylates [1]. 23.00 23.00 23.10 25.50 22.80 22.90 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -10.98 0.70 -4.88 59 772 2009-01-15 18:05:59 2008-11-19 11:34:00 3 4 734 0 150 542 34 200.70 45 24.22 CHANGED lplsplss.Glslsshphphphphst..........t.lplp.................................ssssshhuG.pltlsphshsh.pst..pshlplpslsLscllpl.shpsltuoGplsGslPlhlsss.thplpsGpLsucss.Gplphsssss..cshu..tss.shphshssLcshcYspLsuslshsssGchhlslplcGpN.....ss...........htsspPlplNhshc-NlhpLh+SLphu..splpptlc ............................................LRIAplhN.h.lss+NITADLQGpYPa...................oEppPLhLo....................................-VSlDVL.GGpl.hhcpLRhPQ.HD......PAlL..RLpNlSuSEL..loAl..s.P..KQ..FAMSGshSGuLPLWLNNc.KWll+DGWLsNsGP..h..TLRlDKDsA...DA....ls...............+s................N.h...oA...GsAI....s....WLRYMEIo+SpTcI.NlDNLGh.LThpAsIsGpS.+s........................csKsssVNLNYpHEENlFsLWRSLRFG..DNLQsWLE......................................................................................... 0 31 66 113 +11572 PF11740 KfrA_N Plasmid replication region DNA-binding N-term Mistry J, Coggill P anon Pfam-B_844 (release 23.0) Family The broad host-range plasmid RK2 is able to replicate in and be inherited in a stable manner in diverse Gram-negative bacterial species. It encodes a number of co-ordinately regulated operons including a central control korF1 operon that represses the kfrA operon. The KfrA polypeptide is a site-specific DNA-binding protein whose operator overlaps the kfrA promoter. The N-terminus, containing an helix-turn-helix motif, is essential for function. Downstream from this family is an extended coiled-coil domain containing a heptad repeat segment which is probably responsible for formation of multimers, and may provide an example of a bridge to host structures required for plasmid partitioning [2]. 24.10 24.10 24.30 24.30 23.90 23.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.43 0.71 -3.66 107 609 2009-01-15 18:05:59 2008-11-19 13:15:57 3 4 377 0 187 552 51 116.60 23 36.79 CHANGED lsppplhpA.ustLhspGp.......pP...........TlcsVRptlG......pGShsTlsphLccacppptsts.........stst...hP.ssltpshsphhtplhppApptAppplpptcpphptpttphptptppttpphpphppphpp .................thpplhpA.sptLh.st.Gp.......pP.................ospsVRptlG.......sGShosl..sphlccapppppsts.................tss...lP.tsltp.....thsphhstLhpp...........Ap...p...tA....ppt..........lstt.c..........p....t...hptph...tp...hpp.phppt...ttphpphptpht.t......................................................... 0 27 74 132 +11573 PF11741 AMIN AMIN domain Finn RD, Coggill P anon Pfam-B_11438 (release 23.0) Family This N-terminal domain of various bacterial protein families is crucial for the targetting of periplasmic or extracellular proteins to specific regions of the bacterial envelope. AMIN is derived from the N-terminal domain of AmiC, an N-acetylmuramoyl-l-alanine amidase of Escherichia coli which localises to the septal ring during division and plays a key role in the separation of daughter cells. The AMIN domain is present in several protein families besides amidases suggesting that AMIN may represent a general targetting determinant involved in the localisation of periplasmic protein complexes [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.31 0.72 -4.03 172 2653 2009-01-15 18:05:59 2008-11-19 13:52:53 3 59 1806 0 715 2042 505 99.40 24 20.71 CHANGED lclhss..ttth.clhlpos.tshphpshtl......ssPsRlVlDltssphshshp.........phpsssss...lpplc.ssp...pssssRlVlcl.ct...ssps........h..tppsptlllsl ...................................s...sp.s..+lslcts..p.....h...p....h.ptFsL.......ss.P.s..Rl.V.l...Dlpssshssshp.....................phph.s...ssh...........lppl..R..suph.......sss...s.sRlVl-L.cp.........ssps..........ss..tt.tpplhhs.................................................... 0 182 458 606 +11574 PF11742 DUF3302 Protein of unknown function (DUF3302) Mistry J, Coggill P anon Pfam-B_953 (release 23.0) Family This family of unknown function is expressed by proteobacteria. 21.00 21.00 21.00 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.85 0.72 -4.13 8 755 2009-01-15 18:05:59 2008-11-19 18:01:27 3 1 490 0 69 251 11 74.40 78 65.64 CHANGED LDYFALGlLlFVslVlFYGlIlIHDIPYEIAK+RNHPHQDAIHVAGWVSLFTLHsIWPFLWIWATLY+P-RGWGhssh .........LsYFALGVLIF.VFLV.IFY...G...I..IhlHDIPYLIAKKRNHPHADAIHsAGWVSLFT.LHVI.WPFLWIWATLYpPERGWGMQs.t............... 0 8 18 42 +11575 PF11743 DUF3301 Protein of unknown function (DUF3301) Mistry J, Coggill P anon Pfam-B_952 (release 23.0) Family This family is conserved in Proteobacteria, but the function is not known. 25.00 25.00 39.00 47.80 21.60 21.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.07 0.72 -4.46 55 323 2009-01-15 18:05:59 2008-11-19 18:02:21 3 1 323 0 92 257 40 94.80 33 82.77 CHANGED sLh...hlhll.shsshhaWpt+phpEhAhtts+ptCcchslQLL-tuhshp+hphtt...pGphphpppatFEF.SssG-sRYpGplhhhGt+ltplcLssa .....sLhlllhl.shsshhaWpt+phtEhAhthscppCpchclQLLs.uhst++l.phsh...sGphphpphYtFEF.SssG-spYpGplhMhGt+ltplplsPa.... 0 26 52 73 +11576 PF11744 ALMT Aluminium activated malate transporter Bateman A anon Bateman A Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.26 0.70 -5.82 18 320 2012-10-02 19:04:43 2008-11-20 17:12:55 3 13 54 0 205 1758 29 327.50 31 66.11 CHANGED achG...+cDPRRVlaulKVGlALoLVSllYahcs.acslGssAlWAlhTVVVVhEFSl................GATLsKGLNRuLGTLhAGuLAluhphlu.shoGc...hEsllluhslFlhuhhATah+haPthKs.Y-YGhhlFlLTFsllsVSuYRscph.lphAhpRhhpIslGuslClhlslhlhPIWAGEDLH+LlspNhpplupuLEG..........................CVspYhcshEh-........hps.s--slapGY+olLsSpupEEoL............hsFApWEPsHGta+.a+aPWppYlKlGuslR+CAaplhAL..........HGCl..poEIQuP....p-lRstF+csht+luhEuuKlL+-Lucsl+pMc+sos..p.llpcl+hAscpLpppI.cspshLhlsop..t........t..........................................................................st...p.................hcshE.upuLslATFASLLIEhVARL-sll-ul-ELu .................................................................................utpDs++hhauhK..hGl..uL.s.L.l...S..hh..h.h.h...p..........h.p.t...h....u.......p..u.h..W.Al.h.T.V.V.VV.h...E..a...o..l...........................................GuTlsKGhNRuhuTlh.AGhl..u.hs.h.t..lu...th.s.Gp.......h.ps.h.l..l..s..h...l..F...l....h............uh...h.....so.......a.h.+..h..h.Pt.......h.K.......a.-...Y...G...h.h.l..F.lLT.a.s.....l......l....s...V.......S........u.....a..........p.........s......p........p...............h.........h...p....h..A.........h......p...R..h.............hI.slGsshslhlslhlhP..l....WA....G...p-....LH....p.ss...tshpt..l....Ap.l.-u............................................hsppY.hp.t.t.t..............................ttp..hhp..sa+...ss.lpSp..s.p.--sl.............hthA..hWEP..HG.at...hpaPW.p.pY.+..l..............us.hhRp.h..u......h.Ah.............tu.hl.......s.p....p....ss......php....h....htt.h.phu.p.supsLct.hu.t....lpphph........t.hh.th.t.Ah.ppLp..l.p...............................................................................................................................................................................................................t.h.hh.hs..l.ch...hp.l..th..h.................................................................................................................................................................................................................... 1 50 144 177 +11577 PF11745 DUF3304 Protein of unknown function (DUF3304) Mistry J, Coggill P anon Pfam-B_956 (release 23.0) Family This is a family of bacterial proteins of unknown function. 20.60 20.60 21.10 20.70 20.20 18.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.67 0.71 -3.89 44 431 2009-01-15 18:05:59 2008-11-24 11:52:26 3 5 195 0 98 376 5 114.60 29 62.93 CHANGED sLuuCspst...................sshtuhs........lhuhNaTshsItpFhl..sGt......hGusshss.......huGGGGtsCChslPppW+.GhplpVcWp.......................ptphtp.p.tsphtptt.VslPphsth........sshlpV+FLPscclcl ........................................................................h...sssps............................sshsuss...lpshNahc.h..s..IhtFsV...sG..............tGusshu.........sGGGuhsCChsls.....GhsscVpWp......................................hpptphtt...thph...pp...pphs.lslPchpht........pshLpVHFhPsc+Vc.................. 0 10 27 46 +11578 PF11746 DUF3303 Protein of unknown function (DUF3303) Mistry J, Coggill P anon Pfam-B_958 (release 23.0) Family Several members are annotated as being LysM domain-like proteins, but these did not match any LysM domains reported in the literature. 21.10 21.10 21.40 21.90 20.50 20.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.92 0.72 -3.61 33 128 2009-01-15 18:05:59 2008-11-24 11:53:10 3 1 72 0 67 134 549 85.20 24 86.48 CHANGED hallpapF.sspsp..s.pshtcahcsG..t.t-p..-GacllsRhHsPtsupGhslscA-sspslacahtsWpcthslph-lsPsloDpEhls .....................halhphpF.stppp....pshtcahcsG.....tsh..-GhchltRhtssssupsahlscu-ssptlacaht.Wtch..hch.-lsPVlosp-hs.... 1 7 33 49 +11579 PF11747 RebB Killing trait Mistry J, Coggill P anon Pfam-B_983 (release 23.0) Family RebB is one of three proteins necessary for the production of R- bodies, refractile inclusion bodies produced by a small number of bacterial species, essential for the expression of the killing trait of the endosymbiont bacteria that produce them for attack upon the host Paramecium. R-bodies are highly insoluble protein ribbons which coil into cylindrical structures in the cell and the genes for their synthesis and assembly are encoded on a plasmid. One of these three proteins is RebB. 18.80 18.80 19.50 20.70 16.40 17.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.23 0.72 -4.10 32 294 2009-01-15 18:05:59 2008-11-24 15:51:45 3 1 94 0 104 256 12 66.90 41 67.18 CHANGED ITDuVT.QsNscVlGpAPAhAMGslaQusup.uh.....uluhpNAVsuQQptphhupAusspGVstlhohsosus ..............ITDuVT.QsNsKVlG.-APAhAhGsLYQuhup.uh.....uhshcNAs..suQQQtphhupAuostGltplhulssss.s.......... 2 31 51 92 +11580 PF11748 DUF3306 Protein of unknown function (DUF3306) Mistry J, Coggill P anon Pfam-B_984 (release 23.0) Family This family of proteobacterial species proteins has no known function. 25.00 25.00 42.50 41.60 20.80 20.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -3.43 58 273 2009-01-15 18:05:59 2008-11-24 16:08:42 3 3 268 0 74 236 87 114.20 32 54.64 CHANGED uFluRWSR.RKtss..................ts-psst.t......................ssssstssssptsptsphsspchht...................sLsDs-sl.s.ssDhssFhsc.sVsppl+psALR+Lap.sPtassh.DGLs-Ys-Da.ossssls .........FhSRWSp.RKhps..................ps-p.hpst.....................................s.sstss..s.ts.tspsshp.sppshp...................sL.ssscsl.stpsshusFlss..slspplK+AALRKLFh.sscFsth.DGLc-YstDYossssl.s........ 0 13 36 54 +11581 PF11749 DUF3305 Protein of unknown function (DUF3305) Mistry J, Coggill P anon Pfam-B_976 (release 23.0) Family Several members of this family are annotated as being molybdopterin-guanine dinucleotide biosynthesis protein A; however, this could not be confirmed. The family is found in proteobacteria. 25.00 25.00 26.40 31.60 23.20 23.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.91 0.71 -4.31 51 269 2009-01-15 18:05:59 2008-11-24 16:10:00 3 3 264 0 73 213 96 134.40 34 80.71 CHANGED htluVlhc+ps....ssRWsshpWphsullP.......sts.......phLppc...st..pahhsshslcLa+s-spuYhhNLsupsPslaVlh+tspss................hpshhlTAS.t-Atsa....hDsG-p.V-plPMP.sltuWlpAFlspHa..E...hKR+...Rc .......................htlhhpct.....suRWtshpWplpuhh.........................pshtps...p.......sshsLpLa+DEpssYRhNLoopsPpLFllhcpst-s..................hpsstlTAS.s.Aupa....hD.G-phV.sssMPhslpAWhcAFlscHt..E..th+RK+c.... 0 12 35 53 +11582 PF11750 DUF3307 Protein of unknown function (DUF3307) Mistry J, Coggill P anon Pfam-B_1017 (release 23.0) Family This family of bacterial proteins has no known function. 20.60 20.60 20.90 21.00 19.70 20.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.07 0.71 -4.40 37 380 2009-01-15 18:05:59 2008-11-24 16:32:56 3 1 364 0 129 369 88 125.40 23 62.06 CHANGED hlllhLllAHlLuDFhhQspphlppKpp.....+sphLhhHhhIhhlhhhllhhhhth............hhhhlllsloHhlID...hhK.hh.pt......................shhhFllDQlhHlhl..llslhhhh..................h..h.hhhhh.phlhh ............hhhhLl.luHlluDFhl.Q........opphu.ctKpp..................pht.hLhhHhhl.hsls..hhll.h.h.h.h.s....................hhhhhll...hlo.HhlID.........hhKsthtpph.............................phhhF...l....l....DQhlHlls...lhhlhhhh............................hhhh............................................. 0 40 87 112 +11583 PF11751 DUF3308 Protein of unknown function (DUF3308) Mistry J, Coggill B anon Pfam-B_993 (release 23.0) Family Some members of this family of bacterial proteins are annotated as being one of the several TonB-dependent siderophore receptors, but this could not be confirmed. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.76 0.70 -5.10 103 599 2012-10-03 17:14:37 2008-11-24 16:45:15 3 4 144 0 319 828 889 278.90 23 82.68 CHANGED stuppcspaoQYhh.NhhhlNPAaAGspss.hplthhhRsQWsGl-......u..APpT.shohss.lsc..ps..............GlGlslhsDphGs...hspsshtssauYplpl....ucpsp...........L........oFGlpuGhh.hsl...chsphs..........sssDs.thtt.......sphhsslusGhhahs....c......paahGlSsssll....ppphspsstt.........sp.thpahhhuGYsFshs................phphcPuhhh+hstss....sh......plDlssphhhpt..............phhhG...suYR..................hs....pulushlGhphs.....shhlGYuYDhs..hss.lst..hssGoHElhlta .............................................................................................h..uQpsspaoQYhh.N.hhlNPA.....h..uG.t.....p..ss..hplthh.aRsQWs.Gl.c..................s..uP...pThshuhstslsp...pp................................GlGlpl..hsD.phGs.....hspsshtssauYp.lpl...........spp...tp................LuhGlpsGhh..phsl...Dhsclsh........................sssDs.hhsss..............sphh.s.husGlhahs........s...............paah..GhSs.plh........s.phs.sstt......................sp.thpahhtuGYhhsls....p...................phph.c...Puhhh+hs.tss.sh..........thDlssp..h..hhpp..............phhhG.....suYR....................hs........sulsshhGhphs...........phtlGYuY-hs..hss..lst......hssGo..HElhlta............................................................................ 0 176 283 319 +11584 PF11752 DUF3309 Protein of unknown function (DUF3309) Mistry J, Coggill P anon Pfam-B_1113 (release 23.0) Family This family is conserved in bacteria but its function is not known. 20.90 20.90 21.10 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.76 0.72 -4.18 41 288 2009-09-10 20:38:52 2008-11-25 14:50:30 3 1 240 0 131 241 23 49.00 56 91.77 CHANGED uTILLIlLILlLlGuLP..sWsaSRuWGYuPSGhlGllllllllLlLhGRI ................GTILlIlLILlLlGulP..sWPaSRuWGYuPSGhlGllllIlllLlLhGRI.... 0 22 65 91 +11585 PF11753 DUF3310 Protein of unknwon function (DUF3310) Mistry J, Coggill P anon Pfam-B_1078 (release 23.0) Family This is a family of conserved bacteriophage proteins of unknown function. 20.10 20.10 20.10 20.40 20.00 19.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.28 0.72 -4.02 30 564 2009-01-15 18:05:59 2008-11-25 15:45:59 3 7 485 0 27 404 1185 61.10 40 51.67 CHANGED sVsp..PsHYp..psslEsI-hI.t.ht..hpphhuah.hGNslKYlhRhp+K.NG...hEDL+KAtpYlch ..................................lpp..PuHYs........h.......ssl...EhIDhI.p.hs.........h.tthuFs.hGNulKYl...sR..s....st....K..sG...................pEDLpKApaYlp............. 0 6 17 23 +11586 PF11754 Velvet Velvet factor Mistry J, Coggill P anon Pfam-B_963 (release 23.0) Family The velvet factor is conserved in many fungal species and is found to have gained different roles depending on the organism's need, expanding the conserved role in developmental programmes [1]. The velvet factor orthologues can be adapted to the fungal-specific life cycle and may be involved in diverse functions such as sclerotia formation and toxin production, as in A. parasiticus [2], nutrition-dependent sporulation, as in A. fumigatus [3], or the microconidia-to-macroconidia ratio and cell wall formation, as in the heterothallic fungus Fusarium verticilloides [[4]. 25.00 25.00 25.80 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.52 0.71 -4.72 28 414 2009-09-11 07:54:36 2008-11-25 15:51:15 3 6 103 0 314 445 0 200.10 30 48.62 CHANGED shpYcLpVhQQPhRARACGtGs................tDRRPl..DPPPllpLplh-tss....ppt-hshlpssha.hlhssLhssc....shspsph.t.sssss.................................................................................................pshLoGshVuo....hthLc-ssst..................................usaFlFsDLSVRpEGpYRLpFpLhchstts..........................stsssshhs-lhS-sFsVaoAKcFPGht-STsLS+slA-QG..s+l+lR+....-sRt ..................................................................................................................................................t...apL.lhQpP.tARhsG.st..................................t-R+..Pl..sPPPllpL.h...lhc...tp...................tt.s.s....h.ps......a.hhhssL.ssp.......s.tp..sth...............................................................................................................................................................................................................................................................................................................tpsLhGshsuu....hhhL..pc.ssst......................................usaFlFsDLSVRpEGpaRLpFsLhchst.t............................................ttsshhtpshSpsFpVasuKcFP...Ghhp.S...T.Lo+thupQ........G..h+l.lR+.cs....................................................................................................... 0 113 182 260 +11587 PF11755 DUF3311 Protein of unknown function (DUF3311) Mistry J, Coggill P anon Pfam-B_1042 (release 23.0) Family This is a family of short bacterial proteins of unknown function. 30.00 30.00 31.30 31.00 28.80 28.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.18 0.72 -4.44 56 487 2009-01-15 18:05:59 2008-12-03 10:22:49 3 2 384 \N 172 357 10 61.80 30 83.95 CHANGED hhllhlPhlshlhs.PhhNcscPhlhG.hPFhhaathhWlhloullhhlsaph.................................ttttpc ..........h.hLlllPhlsllh.s.shhNclcPhlhG.hPFhhaa.llWlhloullhsllYhh.........................................................tttt..................... 0 36 94 133 +11588 PF11756 YgbA_NO Nitrous oxide-stimulated promoter Mistry J, Coggill P anon Pfam-B_1030 (release 23.0) Family The function of ygaB is not known but it is a promoter that is stimulated by the presence of nitrous oxide [1]. It is regulated by the gene-product of the bacterial nsrR gene. 25.00 25.00 26.90 26.50 21.30 18.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.83 0.71 -4.05 25 846 2009-01-15 18:05:59 2008-12-03 11:49:24 3 3 813 0 106 453 8 107.60 52 93.30 CHANGED LsG+LtpEacTlpAMlclYC+pH...Ht..............tsthCp-CpsLhcYAcpRLDRCPYGc.sKPTCs+CPlHCYKPp.+cph+pIMRauGPRMLLpHPIhAIRHLlsE++sVPppsstpsS .................ss.+RIsREKhTIccMIcLYptcp....p................................uuu.spchcuLhsYApKRLD+CsFG..E...-..KPuCKpCP..VHCYpPupREcMKpIMRaAGPRMLa+HPl.LolRHLlD-..+.+sVPchPpc..+.................... 0 26 60 83 +11589 PF11757 RSS_P20 Suppressor of RNA silencing P21-like Mistry J, Coggill P anon Pfam-B_1073 (release 23.0) Family This is a large family of putative suppressors of RNA silencing proteins, P20-P25, from ssRNA positive-strand viruses such as Closterovirus, Potyvirus and Cucumovirus families. RNA silencing is one of the major mechanisms of defence against viruses, and, in response, some viruses have evolved or acquired functions for suppression of RNA silencing. These counter-defencive viral proteins with RNA silencing suppressor (RSS) activity were originally discovered in the members of plant virus genera Potyvirus and Cucumovirus. Each of the conserved blocks of amino acids found in P21-like proteins corresponds to a computer-predicted alpha-helix, with the most C-terminal element being 42 residues long. This suggests conservation of the predominantly alpha-helical secondary structure in the P21-like proteins. 23.30 23.30 23.50 24.40 21.70 23.20 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.62 0.71 -4.04 3 159 2009-01-15 18:05:59 2008-12-03 13:32:31 3 1 7 0 0 135 0 143.10 82 81.60 CHANGED +VGAVs+R........TDsLcphIp-FNsSLAllsAMKsDANoD+hsG+aS.AccKL+lLssIEAplRILLcllRR+llRDcLGlRShpDTFcFhlpKYSSL.ocVPaSEVhRpKlKhVuptVIc-LSREHKLDlSERuFPGa...GIL ............................................KVGuVVERLCDPSVTLTEVMDE.INDFNSFLALVHSMKSDMNGDHQDGHHEMGEHKSRLLCNIEAKLRlLLDIIRRRFTR-KLLCTSATDVhGFFVhRYMSS.SHTSFEoVMRTELKLVV.KAVLS-LSRsHKLDFSERAFsAY...GIL.... 0 0 0 0 +11590 PF11758 Bacteriocin_IIi Aureocin-like type II bacteriocin Coggill P anon Lonsdale D Family This is a small family of type II bacteriocins usually encoded on a plasmid. Characteristically the members are small, cationic, rich in Lys and Try, and bring about a generalised membrane permeabilisation leading to leakage of ions. The family includes aureocin A, lacticins Q and Z, and BhtB as well as an archaeal member. 25.00 25.00 25.90 52.70 24.50 23.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.57 0.72 -4.35 5 21 2009-01-15 18:05:59 2008-12-04 11:40:09 3 3 20 0 3 13 0 48.30 59 3.59 CHANGED MA.aL+ll+alAKYGoKAVpWAWcNKGKlL.-WLNlGpAl-WVspKI++IhGl ..AKYA.......IRYGAKYGKKAVKSGWDYGKKVAKSGWNKGKS...IAQKIPRIHKV. 0 0 2 2 +11591 PF11759 KRTAP Keratin-associated matrix Coggill P anon Lonsdale D Family The major structural proteins of mammalian hair are the hair keratin intermediate filaments (KIFs) and the keratin-associated proteins (KRTAPs). In the hair cortex, hair keratins are embedded in an inter-filamentous matrix consisting of KRTAPs which are essential for the formation of a rigid and resistant hair shaft as a result of disulfide bonds between cysteine residues. There are essentially three groups of KRTAPs, viz: the high-sulfur (HS) and ultra-high-sulfur (UHS) KRTAPs (cysteine content: 16-30 and >30 mol%, respectively) and the high-glycine/tyrosine (HGT: 35-60 mol% glycine and tyrosine) KRTAPs. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.58 0.72 -3.72 22 198 2009-01-15 18:05:59 2008-12-04 14:46:09 3 2 24 0 66 249 1 53.40 52 89.57 CHANGED Ms..YYusY.aGGhGYGh........GaG.GLGhGY.GCGhu................G.u...........................sGYG.....GsGYG.Ga..GYGs........hpP.sYGtY .......Ms...YYusY.YGGhGYGh...hG.GLGhGY...G...CGhG........h..G..hG.s......t.GaGY......Gs.GaG..GY..GYGs........hts.hhtth........... 0 17 17 17 +11592 PF11760 CbiG_N Cobalamin synthesis G N-terminal Enright A, Ouzounis C, Bateman A anon Enright A Domain Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process [1]. Within the cobalamin synthesis pathway CbiG catalyses the both the opening of the lactone ring and the extrusion of the two-carbon fragment of cobalt-precorrin-5A from C-20 and its associated methyl group (deacylation) to give cobalt-precorrin-5B [2]. The N-terminal of the enzyme is conserved in this family, and the C-terminal and the mid-sections are conserved independently in other families, CbiG_C and CbiG_mid, although the distinct function of each region is unclear. 25.00 25.00 25.40 26.30 24.90 24.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.57 0.72 -4.28 150 1206 2009-09-10 21:45:10 2008-12-09 13:07:16 3 21 1181 2 319 1016 221 83.40 43 18.54 CHANGED htshlpphap...p.hcsllhlhAsGIsVRtlAPllp...cKpp.DPAVlVlD-sGpalIsLLuGHhGGANpLApplAphLGupsVITTAo- ............h..phlpphap...p.hcslIhlhAsGIsVRsl.....AP......l.lp...sK.ts.DPAVlVlDEpGpaVIsLLuGHhGGANpLu+plAth...L.....Gu.sPVITTAoD..... 0 108 216 280 +11593 PF11761 CbiG_mid Cobalamin biosynthesis central region Enright A anon Enright A Domain Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process [1]. 22.70 22.70 22.80 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.90 0.72 -3.75 143 622 2009-01-15 18:05:59 2008-12-10 15:34:47 3 14 614 0 166 505 48 88.40 20 21.68 CHANGED hhulDhhupt.hGapl...........................sshpshpplsutllsGctl..tlht-........................hh.thphhssp.stp................................................stlhlo....................thttstlhh+P ..........AlDhlApp.hshph.........................................................................................csh.ps.sKplsuhlVstcpVulh.-................................th.ht..h..h..h..p......p.s...phc................................................shlhlo.................h...........h.............................................................................. 0 63 122 149 +11594 PF11762 Arabinose_Iso_C L-arabinose isomerase C-terminal domain Bateman A anon COG2160 Domain This is a family of L-arabinose isomerases, AraA, EC:5.3.1.4. These enzymes catalyse the reaction: L-arabinose <=> L-ribulose. This reaction is the first step in the pathway of L-arabinose utilisation as a carbon source after entering the cell L-arabinose is converted into L-ribulose by the L-arabinose isomerases enzyme [1]. This is a C-terminal non catalytic domain. 25.00 25.00 26.90 31.50 19.90 19.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.35 0.71 -4.52 38 1016 2012-10-02 11:40:13 2008-12-17 11:58:04 3 3 992 9 186 624 33 114.70 58 23.32 CHANGED sKPplEVHPLuIGGK-DPARLVFsutsGsAlssSllDhGsRFRLllN-V-sVcPppshPpLPVA+sLWcPpPsLcsuupAWIhAGGAHHTsaShulosEplpDaAchsGlEhllI ........-KPhL-Vp.LGIGG.K.-.DP.ARLlFs.sp.s.G.P.AllASLIDL.G.-RaRLLVNslDsVcss+sLPKLPVApALWKspPsL.TAuEAWILAGGAHHTVFSaALshsphcpFAEhtsIElsVI.... 0 60 122 157 +11595 PF11763 DIPSY Cell-wall adhesin ligand-binding C-terminal Wood V, Coggill P anon Pfam-B_85585 (release 23.0) Family The DIPSY domain is characterised by the distinctive D*I*PSY motif at the very C-terminus of yeast cell-wall glycoproteins. It appears not to be conserved in any other species, however. In fungi, cell adhesion is required for flocculation, mating and virulence, and is mediated by covalently bound cell wall proteins termed adhesins. Map4, an adhesin required for mating in Schizosaccharomyces pombe, is N-glycosylated and O-glycosylated, and is an endogenous substrate for the mannosyl transferase Oma4p. Map4 has a modular structure with an N-terminal signal peptide, a serine and threonine (S/T)-rich domain that includes nine repeats of 36 amino acids (rich in serine and threonine residues, but lacking glutamines), and a C-terminal DIPSY domain with no glycosyl-phosphatidyl inositol (GPI)-anchor signal. The N-terminal S/T-rich regions, are required for cell wall attachment, but the C-terminal DIPSY domain is required for agglutination and mating in liquid and solid media [3]. 25.00 25.00 71.40 71.40 23.40 18.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.72 0.71 -4.33 4 5 2009-01-15 18:05:59 2008-12-18 10:57:59 3 1 2 0 5 5 0 126.40 31 15.91 CHANGED NDsIsPSYV.Yh-oN...........G.hhASopGss-G..NlFYYDSTlcRllTCss.RPhYplhp-Ds......souFpIhKssDGp..Fphs.usssE.hcl+l.ssGplahTo..hDscsushss.pshRAssVhLpA NDSISPSYV.Yh-SN...........GauhASosGss-G.lNlFYYDSTlcRIlTCCsVRPlYplhp-Dst.....uoSFsIhKNuDGs..FTasEuusuEPhcL+lLsDGRlahTSs.hDscs.ushs-.cshRAsNVsLpA 0 4 4 5 +11596 PF11764 N-SET COMPASS (Complex proteins associated with Set1p) component N Coggill P anon Pfam-B_7375 (release 23.0) Domain The n-SET or N-SET domain is a component of the COMPASS complex, associated with SET1, conserved in yeasts and in other eukaryotes up to humans. The COMPASS complex functions to methylate the fourth lysine of Histone 3 and for the silencing of genes close to the telomeres of chromosomes [2]. This domain promotes trimethylation in conjunction with an RRM domain [4] and is necessary for binding of the Spp1 component of COMPASS into the complex [3]. 25.00 25.00 26.00 25.40 24.60 24.80 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.14 0.71 -4.35 31 286 2009-01-15 18:05:59 2008-12-18 13:08:07 3 12 199 5 190 257 0 151.20 32 11.68 CHANGED cpphsthhsL.sl..pshlhDcEDhphL+pshpch..t-hs.......NhphWsW+ppphpt.p...tp.s................spp..cpp......h.sthlpssTGsARoEGa+KIscp-KucYLPH+h+sppshpphphcspt.................................ssttp.suppsSRssRussRRhlusIsspp..h...us-SDlL ......................................................................p.pthh.Lhsh...shshDpEDhpaLphsh....cch.......h.tpc.s...s..h...hNsohWla+pt.hp.th...........................................pp..ppp......hpsthhppsTGsARoEGah+Iscp-Ks+YLspph.hspptt-p.tspspt...................................sspsp.ssppsupppRuppRRLluslssp.......usD..SDLL.................... 0 43 78 141 +11597 PF11765 Hyphal_reg_CWP Hyphally regulated cell wall protein N-terminal Coggill P anon manual Domain The proteins in this family are all fungal and largely annotated as being hyphally regulated cell wall proteins, and several are listed as the enzyme EC:3.2.1.18. This enzyme is acetylneuraminyl hydrolase or exo-alpha-sialidase, that hydrolyses glycosidic linkages of terminal sialic acid residues in oligosaccharides, glycoproteins, glycolipids, colominic acid and synthetic substrates. 25.00 25.00 28.60 35.30 22.80 24.60 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.93 0.70 -5.68 8 149 2009-01-15 18:05:59 2008-12-19 16:57:30 3 11 20 0 117 153 0 315.40 33 36.77 CHANGED llslhhllssshAlsITps+VDRGslsLslGDITIpuGuaWSIIDNulosFsGsLpVpsuAGLYIoSTSsLLALQVTLsShLuSIpNsGlIoFNShsSLTuosYsLlGlSFsNsGEMYLuASG.shssshulTuuoWoNsGLLsFYQNpRooGsVSLGsPhGoIsNNGQICLaNclYpQTTpIsGoGChTANpsSoIalSNsLLslsosQsFYLADSpSSlhspAlSsspTFNVYGFGNGNpIGLsLPLlu.s.hssAYsYDsoTGILTLR.sGhLuQcFNIGhGYssuhFpIVTDsusGLsSs.hGuloYSGPVPsRsLPusCQ.PCKPlPsuP ..........................................................................h..hhhhht..hh.u...h.pls.p.spl..shGsh..shsh..G.Dl.pIpsG.uhaSIl.sN...s...h....ssahGsl..pVppsuuhYIooss.s..h.l.uLpVs...l.h......s.hhts..lpNsGllsh.suh.p.ohs.ussasl.hGh..o.FpN..sG..-hah..uu..sG....hs..ss.s.hs...l...s.u.s.s.W...pNs..GLlsa.aQ..s.p.+..o.s.....Gs.....l.p.L..G.t........s.......h....s.....oI...sNsGpICLpNps.apQ.s.o.sIpG.oGClo..sspsSsl..a..l....sN..s.hh...s..lsssQshYLssspS..Sl.h............s..p.uh..u.s....sp.Ta..pVhGFG.....N.....G..N+..I...GL.......shsL..h...s......h.................ssasYsssoGILTL.......p......s..sh.........h..s.pp...FsI....GhGY-sshF.p.l.s.o.D.s.ut.Gl.sshhGu..lpYsGssPs.t..sh.Ps..Cp.sCpshPthP........................................ 0 12 44 117 +11598 PF11766 Candida_ALS_N Cell-wall agglutinin N-terminal ligand-sugar binding Coggill P anon Candida_ALS Domain This is likely to be the sugar or ligand binding domain of the yeast alpha-agglutinins. 25.00 25.00 28.10 27.70 20.00 18.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.73 0.70 -5.47 24 165 2009-01-15 18:05:59 2008-12-22 09:42:18 3 36 42 5 84 174 0 240.80 38 24.30 CHANGED sWsl.DGsssssGDTFTLpMPsVFKFpss..psolpLss....ssssYAoCsh.sG-hhssaSpLpCosssslss..spps.GolshPlsFNsGGSusssDLpsuppFsuGsNTVoFsDGspplSsoVsF-uus.......ssssshlhtuRhsPolsphphahluspCsNGYsSGs...lGaossss..tlDCSslpstIosplNDWhaPpoucs.hsaohoCousu..lhIoapN.lPAGYRPFl-uhh...sssssshphsasscYpC ......uWsl.DGossssGDTFTLsMPCVFKFpos.....psolsLss......sussYATCshpsG-.hssaSoLpCTlss....slss..stpAhGolohPlsFNlGGSusssDLps.SpsFsuGsNTVoFsDGs...pplSssVsFpsss..........ssssshl.h..tuRhhPolsc.hps.hhluspCsNGYpSGs......lGas..ss..ss....sssID.CSslclu..Io..NslNDW.aPh.oucs....hoaohoCousu..lhIoapN.lPAGYRPFlDAhl....sss..ss..shshsYpNpYsC........... 0 10 23 82 +11599 PF11767 SET_assoc Histone lysine methyltransferase SET associated Coggill P anon Pfam-B_8752 (release 23.0) Domain SET domains are protein lysine methyltransferase enzymes. SET domains appear to be protein-protein interaction domains. A subset of SET domains have been called PR domains. The SET domain consists of two regions known as N-SET and SET-C. SET-C forms an unusual and conserved knot-like structure of probably functional importance. Additionally to SET-N and SET-C, an insert region (SET-I) and flanking regions of high structural variability form part of the overall structure [2]. This domain is found in fungi associated with SET and N-SET domains. 21.00 21.00 21.10 45.10 20.10 20.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.16 0.72 -4.66 20 114 2009-01-15 18:05:59 2008-12-22 13:49:53 3 4 113 0 88 118 0 69.40 38 5.99 CHANGED aIsccaVPspssosp-lK+pL+pYsWsclhs-+oGaYIlFsshc....EAc+CapspsspthFsac.....hhMc ..FIu+paVPlhssolsHlK+RLKtasacclR.sD+TGaYIlFpsoc.G...p.EsERCacssstpthFsYp.....MsMc........... 1 15 43 75 +11600 PF11768 DUF3312 Protein of unknown function (DUF3312) Mistry J anon Pfam-B_5984 (release 23.0) Family This is a eukaryotic family of uncharacterised proteins. This family shows similarity to WD40 repeat proteins. 26.20 26.20 26.20 26.20 26.10 25.20 hmmbuild -o /dev/null HMM SEED 545 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.64 0.70 -6.28 7 103 2012-10-05 17:30:43 2008-12-22 14:09:06 3 6 72 0 71 106 0 404.70 39 68.62 CHANGED KppasEuRGh.Wol+N...K+Pp+LRDsLKcLEEhLppppslhs+W+sphhhQLhhusGlLlp..ls..sspLp+VhhD+.LVGKLhu-pIoDAllTcoaIllSa.spNplshVpLpK.sshs....ppLEKlShhD.KIpas.I...PGsssR+lsR+LslNupQ-lslsWhtssps-V.aPWpPssp-pDRANlhlhphsus.pLElLsYhhTEhDPlsVpFSh.psaQlhTVEpplutps-...shhDsCIYEhs+sKhcRVuVTpIPLpoplsssAhsssc-KLhLGC.DuSllLaDttRtlThLs.pAslhPshluWHssuullhVusppGpLQsFDlALuslphQLluE..-stPpusLpLppaF....sss.sLsclpWu.tP.lspptcshspssshLhLhF-pGPLuslphhhGshh+GpltsstLhu.....pYI+hsQV-cAlslLtShNWsThGt.ChhuhptIsNalh+t.Lss-REsQLEtsLGoFhsPs+PLs-pThLEapD.VschsRRFFHaLLRhppFpKAF.LAlDIsstDLFMDlH.lAtshG-lsLAc.A .........................................................................................K.ph.pt...Rsh.hh.cN...+pst+L+csL+.lE-hhpppphlhspWpst....h..lhhssGllhp..ls..sspl.phhh-+.LhGKL..p.lsDshhoppallhsa.tpsplshl.hp+..t........pt.-Klu.h-.+l.hh.l...sG.s..s+phtR+LslNt.tDhh..lsWh..sp..s-s.aPWtP.h.p-p-RANlhlhthtth.pLElLs.hhTEhsP..lsspFs.hpps.plhTlEppluhct-..........s.s-.ChYEh...........s......p.s...K.hp.p.h.......ulTpl....Pltopshssshsssc-+LlLGC..D.uolhLa-............hpp..sTh...hs.ps.......t.ll.........PshluhHssuullhVusppGpLQhFDhALuslphQLluE...sh.Ppph...Lphs..phF......thp.sLhphpW..ts.h...t.t.sph..shL..hLhacpGP..luslhhhh.GhhhpG..plt..tlh.......pYl+hsph.cAlslLtuhNWsT....h.Gt.ChhshptIs.NaLh+t.LsspREsplEtsLGsFasPs+PL.-ssh.EapD..lpchsRRFFHaLLRhphatcAF.lA.DltshDlFhp....................................................................... 0 29 34 52 +11601 PF11769 DUF3313 Protein of unknown function (DUF3313) Mistry J anon Pfam-B_1303 (release 23.0) Family This a bacterial family of proteins which are annotated as putative lipoproteins. 20.70 20.70 21.50 20.70 20.60 20.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.00 0.71 -4.95 64 738 2012-10-02 17:45:13 2008-12-24 10:50:44 3 4 658 0 152 414 39 195.50 44 87.44 CHANGED lhluGCuus.sp.......uuhlss...astLppsp...sstsshpahssshc........hspYsplhl-Plph...............h.t.spsshp.lsp.psh....pplssahsptlppplupp..hpls..spP.uPsslhl+sAlTslssss.slpsh.cllPhuhlhsssptu....sGtpstssplshEsclhDusoscllutsVc+ttupsh...stsspsh.shsss+ssl-paApchsp ...........................LALoGCASKlspP-p..............YSGFLss...YScLpEoTS.AoG+PlLRWVDPsac........tSp.YD.s.IlasPlTY......................YPs.PKPoo.Q.V.uQ...psL....-clLsYsNschKcAIupR....pPLV.....TTs..GP+oL..IhRuAITGVDTSK......E....G....LQFY....EVlP....VA.LlVAGTQhA.............TG+RTMDTcLaaE..uELIDAATNKPVlKVVRpGp.GpcL.................sNpoTPh.u.h-slKpVlD-hAsDs..p.............................................................. 1 30 68 109 +11602 PF11770 GAPT GRB2-binding adapter (GAPT) Mistry J anon Manual Family This is a family of transmembrane proteins which bind the growth factor receptor-bound protein 2 (GRB2) in B cells [1]. In contrast to other transmembrane adaptor proteins, GAPT is not phosphorylated upon BCR ligation. It associates with GRB2 constitutively through its proline-rich region [1]. 20.80 20.80 22.60 21.10 19.90 19.60 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.96 0.71 -4.55 3 26 2009-01-15 18:05:59 2009-01-05 11:29:25 3 2 23 0 15 20 0 149.50 60 96.31 CHANGED MLKsCGNosVAVSVGISLLLLLVlCGIGCVWHWKHRsoTRFTLPKFLQRRSSR+KDsTKTFSLSPplIGPRHKsSVETQDHKSAuKcNNhHDNYENVEVCPPKAKGcTDKcLYENTtQSNhEEHIYGNETssDYYNFQKPSPSEsPQDEDIYILPDSY .....MLKSCGNs.sAISVGISLLL.LLVlCGIGCVWHWKH...+suT.RFTLP+FLQRRSSR.+K.ssTKTh.LuPclIG.+HKhSVcTpDH+Sus+ssslHcNYENVEsGPP+sKtcTDKELYENTpQoNFEEHIYGNETuS-YYNFQKPpsScsPQDEDIYILPDS... 0 1 1 3 +11603 PF11771 DUF3314 Protein of unknown function (DUF3314) Coggill P anon SwissProt (UPF0575) Family This small family contains human, mouse and fish members but the function is not known. 25.00 25.00 150.50 42.80 19.80 19.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.41 0.71 -4.48 3 44 2009-01-15 18:05:59 2009-01-05 12:59:39 3 2 29 0 22 33 0 150.10 65 57.07 CHANGED MFASFuFssLEE.sPLSVSHFuIGQssIc.uh+VSIFRYCsPTPYLASuhTG.LYK+MRWNVEtssEusGcGcsaDu..........SlsEYYFLCYEDThEsAc........EutsossDSsuclsRlWSIGRWVpotPpssTDDllDWlLCPlPsGsYKQLLsLG.EEPSSshATDLLVQlL ...MYASFGFVsLEETsPL....SISCFFCGRFSIS.SH.-VSIFRYCsPsAYTAS+FPRYLYKKMRWNLEsTs.E.ssupGp-.............ShVDYYFLCYRDTWE............DsGpoPAN..SCPQIQKLWSIGRWVPLGP...s.-DDLhSWILCPQPsGDYQQLLTIGFEEPSphLATDLLVQlL............. 0 1 3 7 +11604 PF11772 EpuA DNA-directed RNA polymerase subunit beta Mistry J, Coggill P anon Pfam-B_4675 (release 23.0) Family This short 60-residue long bacterial family is the beta subunit of the DNA-directed RNA polymerase, likely to be EC:2.7.7.6. It is membrane-bound and is referred to by the name EpuA. 21.30 21.30 21.60 23.40 20.50 19.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.24 0.72 -4.52 20 529 2009-01-15 18:05:59 2009-01-07 11:27:24 3 1 523 0 56 207 0 47.00 48 72.40 CHANGED lll.VllLhllsLslGLMlGYullGsG.sPhslLp.cpWpcIlshhptp ........lll.VllL.uhLsLslGLMlGYGllGcG.sPauILSPspWpcllpKFTGp... 0 14 30 42 +11605 PF11773 PulG Type II secretory pathway pseudopilin Mistry J, Coggill P anon Pfam-B_4690 (release 23.0) Family The secreton (type II secretion) and type IV pilus biogenesis branches of the general secretory pathway in Gram-negative bacteria share many features that suggest a common evolutionary origin. Five components of the secreton, the pseudopilins, are similar to subunits of type IV pili. Pseudopilin PulG is one of the secreton pseudopilins, and is found to assemble into pilus-like bundles [1]. PulG interacts with proteins H, I and J within the multi-protein complex as well as blocking extracellular secretion and reducing the amount of PulE protein as well as the amounts of PulL, PulM, PulC and PulD when G is over-expressed [2]. In Klebsiella the pilus-like structure is composed largely of PulG [3]. 25.00 25.00 28.90 28.90 22.30 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.44 0.72 -4.25 9 340 2009-01-15 18:05:59 2009-01-07 11:56:40 3 1 333 0 23 146 0 79.20 58 88.03 CHANGED ILLESLlAlAlLssIsoLlLoplspsRpctspphpppElLslApMAlQTpQccLolNGlsIplpcopptlhlacpucEllcl .ILLEAlVALAIFASIATLLLGQIQ+NRQEEAclLQKEEVLRVA+MALQTGQsQloINGVEIQVhuSEKGLEVYHGoEpLLuI.... 0 1 5 13 +11606 PF11774 Lsr2 Lsr2 Mistry J anon PD026347 Family Lsr2 is a small, basic DNA-bridging protein present in Mycobacterium and related actinomycetes. It is a functional homologue of the H-NS-like proteins [1]. H-NS proteins play a role in nucleoid organisation and also function as a pleiotropic regulator of gene expression [1][2]. 21.00 21.00 21.30 21.00 20.30 20.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.38 0.72 -4.14 69 720 2009-01-15 18:05:59 2009-01-07 13:29:32 3 16 402 5 243 584 10 101.80 37 66.89 CHANGED MAcKVhVp..LVDDlDG.utA-...ETVpFuLDGVsYEIDLSscNAs+LRpsLp.alssuRRlG...ut....c+ut....................sssps.pssts+cpsssIRcWARpNGapVSsRGRIPu-ll-AYctA .............................................................................................MAp+hhl..pLlDDlDG..s.A-....-oVpFuL..D..GhsYpIDLospNA.pcLRp...s...Lt.a..lttuR+su.tp.......................................ttsts...pssssptpsstlRpW....A+p.sGap......V.....SsRG..RIss-lh-AYctA........................................................................................................................ 0 94 195 223 +11607 PF11775 CobT_C Cobalamin biosynthesis protein CobT VWA domain Bateman A anon Pfam-B_10956 (release 9.0) Domain This family consists of several bacterial cobalamin biosynthesis (CobT) proteins. CobT is involved in the transformation of precorrin-3 into cobyrinic acid [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.29 0.70 -4.92 2 462 2012-10-10 16:07:06 2009-01-07 13:44:22 3 6 430 0 134 465 1894 195.60 50 32.61 CHANGED sFh+cc-tphhsssVp.llD.SGSMtGR.IplAhssA.hlAcsL-RhsV.s.IhGFTTh...Gu.shEsh.ttGhst..uhh.slh+.h.cpAsuP.hRAR..hGhhhcph.LhpNlDGEuls.htphhhGR.Ep+KIhhhhSDGAPs.......sAGshhcpHLRplhcEIEThS.l-LhAIGlhpDssR.YY+phshlsssEELGsulspcLuclh.t ...........................................saKpE+-.scF+DTVVTLLlDNSGSM.R......G......RPI....slA.AhsADlLARTLERC.G.VKlEILGFTT......+....A......W.K.G...G.p.u.REtWltt....G..+..P......t.t..PGRLNDLRHIlYK..s..ADsPWRRA....RpsLGLMh+.EGLLK.ENIDGEAL..WAapRL.huRsEpR+.ILMlISDGAPVDDSTLSsNsus..YLEc.HLRtV.I....ptIEs.....+u.sVE..L.lA..IG.I..G..H....D...V...TR...YY..p..+.AVTIscs-pLuushhppLutLF..p........................................................................ 0 34 83 104 +11608 PF11776 DUF3315 Domain of unknown function (DUF3315) Mistry J anon PD456999 Family This is a Proteobacterial family of uncharacterised proteins. Some of the proteins in this family are annotated as being putative membrane proteins. 23.30 23.30 23.40 23.60 23.20 23.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.49 0.72 -4.44 112 1010 2009-01-15 18:05:59 2009-01-07 13:51:14 3 4 799 1 169 569 18 51.00 42 38.50 CHANGED Gc+lP.spYRsc.pYslcDW+tapLssPs+Gh+Wlths.GcYlLlshuoGlIlpl ................G-plP.phYRsc.cYsIcpWp.h+.s.LPAPstG..p+WshhG..GsYVLIssssGpIlc......... 0 21 61 115 +11609 PF11777 DUF3316 Protein of unknown function (DUF3316) Mistry J, Coggill P anon Pfam-B_4718 (release 23.0) Family This family of bacterial proteins has no known function. Several members are, however, annotated as being putative acyl-CoA synthetase, but this could not be confirmed. 24.40 24.40 24.40 24.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.35 0.71 -4.31 23 216 2009-01-15 18:05:59 2009-01-07 14:36:55 3 1 189 0 28 150 0 126.10 28 67.67 CHANGED MKplhlls.....hhlhhussuhA...............shtpp.pspolpssshso+psAYsh........Ghshlpshptpos....ppLstcLplhsp..hshpsh+lpD.upVoVpchhpss...............GplpYpshlclcac.....................YphRsuN ..............................................................Kphllhh.....hhlhhu.ssuhA...............th..t.pscol.p.us.hhsoppphY.us........Ghspl.ph.h.Pp....phhup-Lpl..hpp...hs.+s.s+lps..spVs.VQphhpts...................GhlpYpshl+hsap................................YphRpoN............................................................. 0 4 14 24 +11610 PF11778 SID Septation initiation Wood V, Coggill P anon Pfam-B_41015 (release 23.0) Family This family is required for activation of the spg1 GTPase signalling cascade which leads to the initiation of septation and the subsequent termination of mitosis. It may act as a scaffold at the spindle pole body to which other components of the spg1 signalling cascade attach in pombe [1,2,3]. In S.cerevisiae it is both required for the proper formation of the spindle pole body outer plaque and may also connect the outer plaque to the central plaque embedded in the nuclear envelope [4]. 25.00 25.00 27.80 25.80 21.30 18.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.72 0.71 -4.27 9 50 2009-01-15 18:05:59 2009-01-08 14:46:36 3 1 38 1 31 52 0 130.50 28 20.46 CHANGED slpplpGNssphlT.-sltpph.phs+Ep..YssLtl-plDplshVphQNhlKslllhLcIPasKLpp+lPLluIpLpaEpphLhpFANpLHhhlYsc.lshKphTp.Ahs-alps+shtplcHPLc.CL-sLacpltpp ........................htpl.Gssuptlshcsltpsh..clspcp...YspLtl-plDslshlpLQNllKslllhltIPatplhcplPhlsIpLphEhtlltpFsscLHhplYs..pphshK...p...hss...Ahppahps.pp.hsplcH.Lc.CLptLacplh..t................................... 0 6 18 30 +11611 PF11779 DUF3317 Protein of unknown function (DUF3317) Wood V, Coggill P anon Pfam-B_3618 (release 23.0) Family This is a short family of proteins conserved from fungi and plants to human. One each of the human and mouse members is annotated as being androgen down-regulated protein expressed in mouse prostate, with a potential signal transduction function, and all appear to be membrane proteins. 25.00 25.00 25.00 25.50 24.20 24.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.71 0.72 -4.53 27 282 2009-01-15 18:05:59 2009-01-08 14:49:58 3 3 177 0 194 250 0 56.70 29 53.89 CHANGED thtcalphhhapYplThulYhls.PhE+hlhNshl....hhllthllhuhhha....lPspltthhp ........h..hpalphhhYpYplshulYML-.PWE+hlF......Noll....lslluhlla.ssala....hPpalhhhh.t................ 0 44 78 133 +11612 PF11780 DUF3318 Protein of unknown function (DUF3318) Mistry J anon Pfam-B_1341 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.30 22.30 24.30 23.80 21.50 21.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.67 0.71 -4.63 18 198 2009-01-15 18:05:59 2009-01-08 14:52:58 3 1 158 0 68 175 98 138.50 29 75.94 CHANGED hp+LpshhPsuhRhplhls.Apscptpllpuhh.h....sptphhlshshWhshshspRslhhh+psuhhps.......ppa.tlshh.ulslsGhhssslphsptcsltlslAuGLuuhushplWppspu...spsplsADp.tAlchAscpGY .......h.pcLpshhPhtlRp.lhlhtusspp.pLhpstt.hR.....splphhhsLpha.shs.hspRsLhhh+p.suplps..................spaPh.uh.h.uhlhhGhstulhchhspcuLthshAlGLuuhtuYRlWppspu....ctshtADt.tA.t.u.c......................... 1 11 37 56 +11613 PF11781 RRN7 RNA polymerase I-specific transcription initiation factor Rrn7 Pollington J anon Pfam-B_4705 (release 23.0) Domain Rrn7 is a transcription binding factor that associates strongly with both Rrn6 and Rrn11 to form a complex which itself binds the TATA-binding protein and is required for transcription by the core domain of the RNA PolI promoter [1,2]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -8.02 0.72 -4.45 25 186 2012-10-03 10:42:43 2009-01-08 17:32:32 3 6 161 0 119 197 6 34.60 32 6.39 CHANGED hphh.cs.t..Cuh..Cput....hhhsc-GhaaC.pCGsht- ..........h.ct....Csp..Csup....hhhtc-GthaC.psGphhE... 0 24 50 90 +11614 PF11782 DUF3319 Protein of unknown function (DUF3319) Coggill P anon Pfam-B_4745 (release 23.0) Family This is a family of short bacterial proteins, a few of which are annotated as being minor tail protein. Otherwise the function is unknown. 25.00 25.00 28.40 46.60 19.40 22.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.06 0.72 -4.18 9 129 2009-01-15 18:05:59 2009-01-09 10:19:37 3 1 128 0 20 64 2 77.90 55 71.29 CHANGED MtpslY.RGa.LpsusussshW+spIKs+llpGsLsAVKKSIDWWhDTuolIDP+EFsulsppp...tsouspoEsapGapIKNDTGEPNtW .Mth.hY.pGF.Lpsstspsp.WplpI+sp.hsGsLuAVKKSIDWFCDTASIIDPKEFpSlGpK+...psuuuspEpFNGaTIKNDTGEsNcW..................... 0 2 5 14 +11615 PF11783 Cytochrome_cB Cytochrome c bacterial Mistry J, Coggill P anon Pfam-B_4681 (release 23.0) Family This is a family of long bacterial cytochrome c proteins, found in Proteobacteria and Chlorobi families. 21.00 21.00 21.10 22.90 19.30 20.80 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.46 0.71 -4.70 41 134 2009-01-15 18:05:59 2009-01-09 14:18:37 3 13 96 1 78 132 23 171.90 31 31.52 CHANGED KhhWDWSpAGp..........................p.....t.sp.sahppKGsF....patc.sltPpYtWaNG.phphhhhs-tlc.s.............sps...ltlstPhGshsDst.u+IaPFKlapG+QPYDtptppllsscha..G.................tuhWss.....aDaspAlptGhc.............................................ts......Glta.....SGca.....................................sFlcTphaaslsHhVuPK-cA.L..pCs-CHspsuc .........................................ch.WDWupAGp.t................t....................tt.sp.sY.thKGsFtatc.slhPtYtWaNG.phph.hhhs-ths..s...............sts....splstPhGshsDsp.u+IaPFKlapGcQsaDttpppllss..cha.................t...suaWps.....aDaspAlptGhc...................................................ts......Gl.a.....SGca.................................................................................................sFlcTphaaslsHhVuPKc.cA.L..sCs-CHssss.... 0 23 48 71 +11616 PF11784 DUF3320 Protein of unknown function (DUF3320) Coggill P anon Pfam-B_4770 (release 23.0) Family This family is conserved in Proteobacteria and Chlorobi families. Many members are annotated as being putative DNA helicase-related proteins. 25.00 25.00 25.20 26.00 22.10 24.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.52 0.72 -4.44 41 216 2009-01-15 18:05:59 2009-01-09 14:23:31 3 10 211 0 81 219 9 51.20 28 3.04 CHANGED ssthhpts.pshLtphlpcllphEGPlpcshLscRltpuaGhpRsG....sRIppt .........t.hptp.tshLtphlppllcsEuPIppshLspRltpuaG.lpRsG....s+lcp........... 0 28 46 61 +11617 PF11785 Aft1_OSA Aft1 osmotic stress response (OSM) domain Mistry J, Finn RD, Wood V, Wahls W anon Manual Domain This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The OSM domain has been shown to be involved in the osmotic stress response [1]. 21.60 21.60 22.00 22.00 20.80 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.01 0.72 -3.43 10 122 2009-01-15 18:05:59 2009-01-14 09:32:12 3 6 114 0 86 120 0 51.10 40 9.37 CHANGED sosu-.sspuLAPPsRsusptp.......ssTPDYFuulps..shuLEPNPFEQSF......Guuss-......TP ...................................s.............s.p.s.............ssss.shhst.ts.....shsLEPNPFEQSF.........Guss.-...s................................ 0 23 45 72 +11618 PF11786 Aft1_HRA Aft1 HRA domain Mistry J, Wood V, FinnRD, Wahls W anon Manual Domain This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The HRA domain is involved in meiotic recombination. It has been shown to be necessary and sufficient to activate recombination [1]. 25.00 25.00 25.00 26.50 24.60 23.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.77 0.72 -3.72 5 90 2009-01-15 18:05:59 2009-01-14 09:44:44 3 6 87 0 65 91 0 79.20 61 14.87 CHANGED PuuusP..asW.usuSLRoG....PLSPAMLsGPTG......SsDYFSssu+hpu........GFPTPNESSLRTGLTPG.GuG.............SMFPAPSPNTQAlLsp ............sso..suss..asW.........ssSLRoG....PLSPAMLuGPst......ssDYFssht+..............GFPTPNESSLRTG....LTPG...GuG........................SMFPA.P.SPNoQAlht................................................. 0 16 33 52 +11619 PF11787 Aft1_HRR Aft1 HRR domain Mistry J, Wood V, Finn RD, Wahls W anon Manual Domain This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The HRR domain is involved in meiotic recombination. It has been shown to be necessary and sufficient to repress recombination [1]. 25.00 25.00 34.60 33.50 19.20 22.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.47 0.72 -2.93 9 76 2009-01-15 18:05:59 2009-01-14 09:46:02 3 4 72 0 57 73 0 75.40 49 14.56 CHANGED uATPuTI-FHRTAlsAA...................cpss..usTSpP.pstsptsp....sshshpPsps....PFs.HDssDAANGLaMLApGs ....GATPoTlDFHRTAlsAA.........................pp.....s.....t.............u...sT...SpP.p-.sst.hpt......sph-hcssss.....PFs.HDssDAANGLFMLApGt. 0 9 26 45 +11620 PF11788 MRP-L46 39S mitochondrial ribosomal protein L46 Wood V, Finn RD, Coggill P anon Pfam-B_1897 (release 23.0) Family This is the L46 subunit of the mammalian mitochondrial ribosome, conserved from plants and fungi. 21.90 21.90 22.70 22.10 20.90 21.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.41 0.72 -3.40 37 276 2009-01-15 18:05:59 2009-01-15 13:02:55 3 6 247 0 192 272 0 110.20 24 36.02 CHANGED hplpuullLpR.PllotchsshEppa..a.........pcL.c+l.hpasphhhachsohs-h.ca.t.ptp.hppp.t..h..sh.ph.tpphpcp.cpplt.........................................pPssRlT..-AD ..........................clhuullLpR.PllopplsshEppa............pcL.c.plphphsth..achts.t-......ch.....ptph.h.pp...p..............s.tth.h..hp.......pht.p.c.thtt..............................................................pPtsRhTcAD....................................................... 0 54 98 153 +11621 PF11789 zf-Nse Zinc-finger of the MIZ type in Nse subunit Wood V, Coggill P, Finn RD anon Pfam-B_1696 (release 23.0) Domain Nse1 and Nse2 are novel non-SMC subunits of the fission yeast Smc5-6 DNA repair complex. This family is the zinc-finger domain similar to the MIZ type of zinc-finger [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.23 0.72 -4.45 22 399 2012-10-03 15:03:13 2009-01-15 16:14:09 3 17 271 2 265 1659 117 59.30 29 18.81 CHANGED D-ltlpp..sphsLpCPlThpshcpPVps+.........pC.sHsFE+puIhphl.....................pptppscCPl.uCu ........................................-l.hpt....t....hshpCP.lT...tt.......h...c...p...P..Vpsp........................pC...sH..s..a.-+.c.uI..hphl.............................................p.p..p..pp.h..pCPhhGC............................................ 0 102 155 226 +11622 PF11790 Glyco_hydro_cc Glycosyl hydrolase catalytic core Wood V, Coggill P, Finn RD anon Pfam-B_1680 (release 23.0), IPR013781 Family This family is probably a glycosyl hydrolase, and is conserved in fungi and some Proteobacteria. The pombe member is annotated as being from IPR013781. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.93 0.70 -4.94 51 454 2012-10-03 05:44:19 2009-01-15 17:13:44 3 34 246 0 280 584 106 211.60 20 50.29 CHANGED Gluas......sss.........spth....ttsusloWh.YNWs.hssushsst.........EFVPMlWGsp.st.......shhsslpss..............tsspalLuFNEPDh..ss...QushsP.psAAphahphhpP........tss+lsuPul..ssu..........sh......WhppFhss...Cs..................sC......plDalulHaY.......ss.sss.hpsalsphhstas........cPlWlTEFu..t..........sssssppp....pppFhppslsal-sp...shVpRYuaFs.............s..ssssstLlstp......G.sLTslGphY ....................................................................................................................................................................thsWh..Ysat.......................palPh..has.t..........t.htt.h.th.....................t.phlh..sa...NEPDh....ss..........tush.ss..ttAstha...phh.t.s.................thpls....uPuh..sss.........shs.........W.l.p..p.Fhps...sp.....................ss......phDalslHaY..............................tss..hps...h..hs...hl...pph....h...s...t.as......................cPlWlTEau.......................................stsstpp.............ttpa.hppshs..hh.-t..t............shV.t+Y.uaFt..........................hhp...t...........u...tho.hG..a.................................................................................. 1 99 179 242 +11624 PF11791 Aconitase_B_N Aconitate B N-terminal domain Bateman A anon Pfam-B_2605 (release 10.0) Domain This family represents the N-terminal domain of Aconitase B. 21.70 21.70 21.70 25.90 21.50 20.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.91 0.71 -4.40 94 1508 2009-05-07 16:09:31 2009-05-07 17:09:31 3 5 1476 2 324 1082 761 149.10 63 17.65 CHANGED sY+pHltER.us..GlPPhPLs.A-QTupLlELLKsPPsu.-cc..................................aLL-LLppRVPPGVD-AAaVKAuFLsuIspG-sssslIottcAlcLLGTMhGGYNlpsLl-hL....cDsc.................lAstAAcsLppTLLlaDA.FcDVt-hucs.NsaAKpVlcSWAcAEWF .....tYR+HVtER....A.A...GIsPhPLs.ApQsAsLVELLKN.PPsG...EE-..................................FLLDLLpNRVPPGVDEAAYVKAuFLAAlAKG....-spSPLlo.........sc+AlELLGTM.GGYNIcPLI-hL....DDsc..................LAslAAcALp+TLLMFDs.FaDVpEKAKAGNta....AKpVlQSWADAEWF..... 0 75 179 264 +11625 PF11792 Baculo_LEF5_C Baculoviridae late expression factor 5 C-terminal domain Bateman A anon Pfam-B_5141 (release 7.6) Domain This C-terminal domain is likely to be a zinc-binding domain. 20.30 20.30 20.30 20.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.38 0.72 -4.57 24 63 2012-10-03 10:42:43 2009-05-07 17:12:19 3 2 61 0 0 99 2 42.90 50 16.91 CHANGED pLpslNGholct.C.pHcal..shE+QhRAGDEhVSFI+YCphCshh ................L.slsGhoL+t.C.pHcFl..T.lE.+QhRAGDEhVSFI+YCphCsh.h. 0 0 0 0 +11626 PF11793 FANCL_C FANCL C-terminal domain KOGs, Finn RD, Coggill P anon KOGs (KOG3268) Domain This domain is found at the C-terminus of the Fancl protein in humans which is the putative E3 ubiquitin ligase subunit of the FA complex (Fanconi anaemia). Eight subunits of the Fanconi anaemia gene products form a multisubunit nuclear complex which is required for mono-ubiquitination of a downstream FA protein, FANCD2. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.86 0.72 -4.01 10 241 2012-10-03 15:03:13 2009-05-07 17:18:36 3 10 152 2 176 501 25 65.60 32 9.50 CHANGED tph-CGICaAYRL.........sGplP-hsCDNP+Cup.FHpsCLhEWL+oLhsSRQSFslhFGpCPYCsc.lolcho ....................-CsIC....a.uh.h.h..............................ss..sl..P.c...hs..C...cs.pCsphFHpsCLhc...Wh...pu.....t....s...o...p...p..........h.........h..G...p...CPhCpp.lt....h.................... 0 50 77 128 +11627 PF11794 HpaB_N 4-hydroxyphenylacetate 3-hydroxylase N terminal Bateman A anon Pfam-B_3148 (release 6.5) Family HpaB Swiss:Q57160 encodes part of the 4-hydroxyphenylacetate 3-hydroxylase from Escherichia coli [2]. HpaB is part of a heterodimeric enzyme that also requires HpaC. The enzyme is NADH-dependent and uses FAD as the redox chromophore. This family also includes PvcC Swiss:O30372 may play a role in one of the proposed hydroxylation steps of pyoverdine chromophore biosynthesis [1]. 30.00 30.00 30.50 43.10 29.10 29.60 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.52 0.70 -5.00 80 1009 2009-05-07 16:38:44 2009-05-07 17:38:44 3 4 755 14 223 742 285 248.40 39 52.63 CHANGED TGc-Yl-SL+.ct+..pVYltGE+V.t.-VssHPsh+sslpuhAphYDht...a...........cschp-......hhThhospsup.tssphaplscot-DLht+pchhcthuchotG.hh..sR..usDhhsuhhsshtst..chaus.............atcshpcahchsp-pDLhhstAhssPpsDRu...pss......pps.Dlhl+VVccs-c..........GIlVcGAKhhsTuushocplhlhshts......h.stt-...c-aAlsFulPhssPGl+hlsRpuhpsstt....ssaD.Pluu+a-..EtDulllFDcVhVPWE+VFh .................TGp-YlcSLp..-s+..clYlhGE+Vc..DVosH........P......uFRsuhtolAplYDhh.a..........csph..p-..........hhshs..ospsut.hsp+.a.FchscSs-DLhppRcuhtpWu+ho.h.G.hhGR....oPDahsuhhsshtsss.thaGp.............atpNhcsaYpchp-ssLhhsHAllsP.hDRp...css.........pcstDlal+l.cEs-s..........GIlVSGAKslATsusloc..hlh.sss.........hhscs....tDaAlhFssPhDu.GlKlIsRtuap.sssh...tosaDaPLSSRF-..EpDAllVhDcVhIPWEpVhl.................... 0 83 142 186 +11628 PF11795 DUF3322 Uncharacterized protein conserved in bacteria N-term (DUF3322) COGs, Finn RD, Sammut SJ anon COGs (COG4924) Domain This domain, found in various hypothetical bacterial proteins, has no known function. The family represents just the N-terminus. 21.60 21.60 21.60 22.00 21.20 21.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.05 0.71 -4.65 30 182 2009-09-11 10:44:22 2009-05-07 17:43:26 3 3 174 0 63 162 20 178.50 24 46.76 CHANGED PsDl+tpl.pRhWccuplhtuhhts..pshaPhpLsL+sPsupplupchs.pVRcWl.pplpss........ups.......clcW+plsaRh.hGpsplPsclhlcohpcAlsllG+p.....pphpp.acthlphspsphPpLlshltpcPhpsl..chustW........spllsllcWhppHscsGlYLRplslsGV.coKFlE.p++ulLscLLDl .................................................plttpl.t+.apptphhtthhhs.......tsh.aPh.p..lslthP.sstthstphp..sl+pal.pshpph...........utt......plcWcphph+h..hu.ssplPsclhlss...s....phlshhGcp......tchpp..hp....phhtt.httphs.t...Lht.h..h.........thh....phtpth................thtpllsllpalpt.p..ss.sGh.h...lRplslsGl.DoKalE.p+pullspLls.................. 0 22 46 60 +11629 PF11796 DUF3323 Protein of unknown function N-terminus (DUF3323) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Beta-proteobacteria). 25.00 25.00 28.50 27.90 23.70 19.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.32 0.70 -5.02 17 132 2009-05-07 16:45:11 2009-05-07 17:45:11 3 5 126 0 50 132 0 205.00 23 50.04 CHANGED sphsGslpLss.ot.--+puLusLhGRsht..stshplslsch-ss.Lpss...thsshsLtcsL-thhGshlpppppctttppphtphaht.....t.p.hht......psttttWhpplhsttt.........lpphhtss.ctstpllppsspsLttL........s......th..lulhAuplsG..DsHuhDsspssupLlLpAL..............httsptsssss-t..................+pplatpsGllhD-lSsssLshulhshs ...................................................................................................................hsGtlpLp.shst.pE+psluthhG..+sht..tpphplslsph-ps.Lppo....thtshsltplLpthhG...l..p.p.pccpttp.ttppptaht............th.th.ht......pshh..tp.Wlppl.hptth..........ltphh...tps..pt.hp..phlthshpslsp.L...........................Ph.t......p...LslhAspls.u..DsHuhDpsp..supLllpuL..................thhhst.p..s.psucp..........................ppcLahpsGllh.D.-lSshlhshsLhs..t..................... 0 22 39 46 +11630 PF11797 DUF3324 Protein of unknown function C-terminal (DUF3324) Moxon SJ, Bateman A anon Pfam-B_7106 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. 24.10 24.10 25.10 24.20 23.90 23.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.67 0.71 -4.46 23 905 2009-05-07 16:47:03 2009-05-07 17:47:03 3 3 246 0 66 605 3 142.20 31 41.83 CHANGED sspslsIpNcYuYsIullLppsssp...lpPcLcLscVpsuphNt.+ssltAsLQNspsshlsplsl-ucVhppsscchLapscccslphAPNSsFsa..slsh.psp.......pLcsGcYpLchsspuscp..................pWcas+cFTIsucpAKclNcp ......................p.ppuhulpNcY..u..YslulhLpps.cpt.......lpPpLpLtcVpssphNu..+sslpsslQNspsshlsplplpuplhccspp.cslhpppppshphAPNSsFsa...slsh.ptp.........................tlcsG.pYplphpsp..sspp....................pWpap+.-FTIsscp.AcclNp.................................................................................................... 0 26 53 57 +11631 PF11798 IMS_HHH IMS family HHH motif Bateman A anon Pfam-B_1349 (release 2.1) Motif These proteins are involved in UV protection, eg (Swiss:P07375). 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.25 0.73 -7.56 0.73 -3.60 92 4693 2012-10-03 02:11:09 2009-05-07 17:52:39 3 18 3144 158 842 3371 466 32.10 35 7.97 CHANGED .psh.tphl..tsl.slpcl.GlGpphtpp.Lpp.hGl .........tch..phl....tsL.PlpclaGlG+toscK.Lpp.hGI........... 0 246 500 691 +11632 PF11799 IMS_C impB/mucB/samB family C-terminal domain Bateman A anon Pfam-B_1349 (release 2.1) Domain These proteins are involved in UV protection (Swiss). 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.64 0.71 -4.20 106 7945 2009-09-16 12:11:34 2009-05-07 18:00:12 3 45 4183 220 2026 6312 2268 123.60 19 27.86 CHANGED sp.lp...................pttt+olupppoh.st..sh...pshpplp......th.ltplspclsp+Lp....ppphh...............spslslp................l+........ht..........shp...phsc....phslsh.ssss.spp....lhph...shplh.......tph.hp.....t.....lch.lGlphsplt.....ptsptphsla ...........................................t............pt.pKSlusppo..a.sc.....cl.....pshp.phc.........th.l.p.p.ls.....pclsp+Lc...............ppp..hh......................................scpl..s..lp.................l+.............hs.............................................................................sap........s.ho+................phs..l......t...s....o....ps...sps............lhph........Ahplh.............tch.hp.............sts........lRh..lGlp..l..spl..........t.................................................................................................................. 0 644 1225 1687 +11633 PF11800 RP-C_C Replication protein C C-terminal region Finn RD, Bateman A anon Pfam-B_4463 (release 6.6) Family Replication protein C is involved in the early stages of viral DNA replication. 20.80 20.80 21.50 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.29 0.70 -4.62 53 475 2009-05-07 17:07:56 2009-05-07 18:07:56 3 4 187 0 99 454 17 181.40 26 51.77 CHANGED GsWpslpppapslhsplP.Rpsshsp..LcslhsclphLpp-ltshLctttp................spp.suN........-sps-+HhQNSsP-..uh.E.E......Ps..ppppt...........................spss..psppspspPh..............................+sh...PLuhVLcACP-ItsYussG.It...sWRDLhsAAshVRsMLGlSPSAap-AscsMG.csAAlslAsILpR..uspI.sSsGGYLRsLTcRAppGcFSlGPMlMALL..+sputs ..................................................................................................................................Wtthpthattlh.ttls..R..p...sshtp..Lt.lhsphpt.l+...ppl...phLctp.p.......................spphssN..........tspscp.phpsSpsc.....th.-.p..st...ppppt.....................................................................................................h...sl..shlhp.uCPpht.t...as.....s..t...ht........tWp-lh.tsA..hlR.hlGls.psaptAtphhG.ptAuhsluhl.hp+..........htpl..posGGYLR.hst+uttGthpht..hhuhhtt...s.............................................. 0 12 45 63 +11634 PF11801 Tom37_C Tom37 C-terminal domain Wood V, Coggill P anon Pfam-B_30563 (release 22.0) Domain The TOM37 protein is one of the outer membrane proteins that make up the TOM complex for guiding cytosolic mitochondrial beta-barrel proteins from the cytosol across the outer mitochondrial membrane into the intramembrane space. In conjunction with TOM70 it guides peptides without an MTS into TOM40, the protein that forms the passage through the outer membrane [1]. It has homology with Metaxin-1, also part of the outer mitochondrial membrane beta-barrel protein transport complex [2]. 27.00 27.00 27.10 27.20 26.90 26.80 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.11 0.71 -4.39 17 336 2012-10-03 01:14:49 2009-05-07 18:10:42 3 12 206 0 199 332 0 110.30 24 36.99 CHANGED Ahhuhlpp+lpslhpYsLYlsscNYpphTR+hauphl.FPh.a.tP.ph+spApcpschlsl............................ptppp.ppc.sptsspls.Shhpchpth......Kppppp.l+p.t.shchhshLpchLsph................phlhuss.....oSs-hLhhualhlhhh.pLPss.hlhsaL+ .........................uhhuhlcpch.shh.ashalsscNY.phT+.hauphhsFPhpahhPsphpptthp+hthhth.............................................................................................................................................................................................................................................................. 0 38 77 135 +11635 PF11802 CENP-K Centromere-associated protein K Wood V, Coggill P anon manual Family CENP-K is one of seven new CENP-A-nucleosome distal (CAD) centromere components (the others being CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S) that are identified as assembling on the CENP-A nucleosome associated complex, NAC. The CENP-A NAC is essential, as disruption of the complex causes errors of chromosome alignment and segregation that preclude cell survival despite continued centromere-derived mitotic checkpoint signalling. CENP-K is centromere-associated through its interaction with one or more components of the CENP-A NAC. 22.90 22.90 23.50 23.10 22.60 22.80 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.86 0.70 -5.16 5 82 2009-05-20 17:23:14 2009-05-20 18:23:14 3 3 60 0 52 68 0 210.10 40 81.50 CHANGED MSt.pp-L.Ps..pDssssh-scEELL+ECEslWK-ME-CQSKLoLlGsETLs-SDAQLSLLlMQhKsLTAElcQWQKRsP...................EIIsLNcDVLL..sLGKEElQKl+pDLEMVLSolQuKNEKLKEDLEREQpWLDEQQQIl-oLsslpcElKNpVsThSESRIFsELpsKhhclKEaKEKLLosLG-FLEEHFP..LP-cptSscKKRKupp.-sSlQLITLH.........EILEILIN+LhslPHDPYVclcDSFWPPYlELLLRsGIALRHPEDPoRIRLEAFHQ ................................................................................................thpt.ll.ppCEp.at.hp..c..hQpcl..h.tsEshs.....pps....t...lhh.phps..LpuEhpphpccpP...................c.hhs.s..tlLh..tlu+cchp+lpppLEhlLSshpuKpccL+csLcREQpWlpEppplhp....ulp.h.pclp...p.ph.pho-.p.phh.p.....-hppK...hhphc......phcccLh.tLuchL-cHaP......L..Pct..sspKK+...+s..hp..csssp...hhslc..................-hlE..hLlN+hhcsP+...D...PYVpIsc.oaWPPYlEhLLRsGIAlRHP-Dss+IRLpsF................. 0 14 20 32 +11636 PF11803 UXS1_N UDP-glucuronate decarboxylase N-terminal Coggill P anon Pfam-B_36254 (release 23.0) Family The N-terminus of the UDP-glucuronate decarboxylases may be involved in localisation to the perinuclear Golgi membrane. 20.50 20.50 20.90 20.70 20.40 20.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.53 0.72 -4.22 5 56 2009-06-11 15:38:14 2009-06-11 16:38:14 3 3 38 0 26 47 0 67.40 63 18.26 CHANGED Mlppthppll.oGlNRRMMKlLlALALIAYIASVWGsYlNM.RSIQEsGElKIE....QKI-EsVuPLREKIR-LEpSFTQKYP .............pt...hlh.suhNR..hhKllhulAhhA.YlAoVWG.....NFVNM.....RSIQENGElKIE....SKIEEhVEPLREKIRDLEKSFTQKYP....... 0 2 3 9 +11637 PF11804 DUF3325 Protein of unknown function (DUF3325) Assefa S, Bateman A anon PFAM-B_2004 (release 23.0) Family This family of short proteins are functionally uncharacterized. This family is restricted to Alpha-, Beta- and Gamma-proteobacteria. 26.80 26.80 27.70 27.40 25.40 26.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.39 0.72 -4.10 44 248 2009-06-16 09:39:29 2009-06-16 10:39:29 3 2 191 0 82 247 7 104.10 29 92.30 CHANGED hlsshhL.uauuasuLuLAMp+Hacplh.s+ssss.sptphL..R....hhGWhhLslohhhsltshGh...uhGslhWhGhlohuulllll.hLsYp.......P+...hlhhhu.hsuslhu.........slhhhh ..........lhshsL.sasuassLuLuMs+Hacpl.........h..s+s..ss...tpp..phL..R.....hhGahhLslulhhsltspGh...uhGsVhWhGhLohuAhllsh...hLsap.......PR....hhh.hu.shsslhs.hh...h................................. 0 13 33 62 +11638 PF11805 DUF3326 Protein of unknown function (DUF3326) Assefa S, Coggill PC, Bateman A anon PFAM-B_2030 (release 23.0) Family This protein is functionally uncharacterized. It is about 300-500 amino acids in length. This family is found in plants and bacteria. 25.00 25.00 56.30 38.10 23.80 23.00 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.93 0.70 -5.72 25 128 2009-06-16 09:50:57 2009-06-16 10:50:57 3 3 111 0 50 129 189 315.40 48 87.33 CHANGED hs...slL.lVPTGIGCs..................IGGYAGDALPsARLLAussssLITHPN......VlNGAsLYWsssslhYVEGYuLDRFApG-huL+PV+ppRlGLlLDAuIEsEL+pRHLQlA-AsRATLGLslsshlhTDtPLtVpLptus.SGuSWGsl-pPDuLLRAucpLh.csGAsAIAVVuRFPDD.sotthptYRpGpG.VDslAGAEAVISHLlVRcLplPCAHAPALuPlsl.....sspLDPRuAAEElGaTFLsCVLlGLSRAPclls........sstspssslpus.plsAVVsPtGAlGGpuVLAsh-+...sl.P.lIuVtpNsolLpVoscsLGls..........lltlssYhEAAGlllAlRcGls.suLpR ...............hsslLIVPTGlGAt..................IGGaAGDALPlA+hluulsDpLITHPN......VlNGApLYWs.sNshYVEGYuLDRFAtGphuLpPV+p..N+lGLlLDpuIEs-Lt.RHlQsA-AsRAoLGLslschlhTDsPLplphphus.SGtShGoltsPDuLLRAscpLhppssApAIAlVuRFPDDsssthhptYRpGpG.VDslAGsEAlISHLlV+pFplPsAHAPALtPhsl.....ssplsPRuAAEElGaTFLsCVLsGLSRAPphlp...............tt.s....stsss.lhss.pVsulVlPtsAsGGpulLuhhpp...ph..s..lIsVp-NposhpssscpLslp..........shtVssYhEAhGllsAh+sGls.sulp........................ 0 10 35 46 +11639 PF11806 DUF3327 Domain of unknown function (DUF3327) Assefa S, Coggill PC, Bateman A anon PFAM-B_2060 (release 23.0) Family \N 27.10 27.10 27.40 28.10 26.10 26.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.89 0.71 -3.81 35 840 2009-06-16 13:03:42 2009-06-16 14:03:42 3 6 641 13 81 482 3 128.80 46 31.19 CHANGED pt..................hVTF............lWR......uss..............sshtlasshsultpp..........tpphpRLsuT.........DlW...................................ahohplPusaRuS................Yshhsssss.................hcsstcpthRtlhsp......upsDPLN.pshhsst........hhpttSsLcLssAsspsths ............................................................tsp.h..cVTF............hWR....s-ptS..............sl++ValhlsGVTD++psu......spsMpR.lsGT.........DVW...................................phThpLsAsaRGS................YsFlPstp-s.......................PDct.t..LRcGWRplLsp......AhADPLNspsatsGR..........GpssSsLchPpAPhQstW.s................................................. 0 13 32 60 +11640 PF11807 DUF3328 Domain of unknown function (DUF3328) Assefa S, Coggill PC, Bateman A anon PFAM-B_2062 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in eukaryotes. 25.00 25.00 25.20 25.00 24.50 24.50 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.96 0.70 -4.66 101 775 2009-06-16 13:12:53 2009-06-16 14:12:53 3 5 79 0 677 817 0 184.60 18 76.48 CHANGED ptptpppttahhh.............hhhhhhhhhhshhhhhhthhhht..t.h.t.........................................h.h.........................s.....aps.sss...-h.-puW.............tshhlstpt....h.phshs.........h....st.............shhhtlp.saH....pLHCL.....................phlRptha.ph....................thtpss.pt.....................htHhpHCl-hLRQslhCpuDsslh....s..h............................stshss...............................hss...p+pC+sa..-tlhc..............Wspcp ................................................................................................................t......................................h...h....h.......................................................................h.....h...................................p.......a.....t...ss................ph.-ts...W.............................t....hh.h.stpt...............h.ths........................ss...................thhhtlp.saH.....pLHCL...........................phlRptha.th.............................t.t..t..tt.t....................................hhHh..pHCl-hLRQ......slhCpuDssl.......sh.hht...........tt.hss..................................hss.....HpC+.s..aptlhp..ahtt......................................................... 0 107 349 527 +11641 PF11808 DUF3329 Domain of unknown function (DUF3329) Assefa S, Coggill PC, Bateman A anon PFAM-B_2082 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. 27.10 27.10 27.30 27.60 26.90 27.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.06 0.72 -3.90 33 1029 2009-06-16 13:53:43 2009-06-16 14:53:43 3 10 1017 \N 175 598 65 89.50 47 20.74 CHANGED hphsap.phltcLslhhlsslllGhlhGplshhLhlushshLsWHhhplhRLppWLapc+phs.PPpupGsWptlFsGlYRLQpRsR++Rpc .......................E.RLoWK.+LlhELlLhslP..A..hl...lG....hhh...G....a.l...P...W..h..L..L...A..u..l..s..ul..L...l...W...Haa...sLl...RLShW..LWs-R..shT..PPsG..pGuWEsLhaGLaphQhRN++RR+E................... 0 29 76 127 +11642 PF11809 DUF3330 Domain of unknown function (DUF3330) Assefa S, Coggill PC, Bateman A anon PFAM-B_2077 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. 25.00 25.00 26.70 26.60 23.70 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.79 0.72 -4.11 9 204 2009-06-16 14:09:36 2009-06-16 15:09:36 3 2 158 0 20 76 4 67.30 73 44.19 CHANGED sssMNssssosTSCCVCCKEIPLDAAFTPEGAEYVcHFCGLECYQRFpARA...pssscssstPs.ss.....sspP...os ................s.ssMNAN-PS.TSCCVC.CKEIPLDAAFTPEGAEYVEHFCGLECYQRFQ.ARA...pTATETslcPs.ACDS.PSs......... 0 6 10 15 +11643 PF11810 DUF3332 Domain of unknown function (DUF3332) Assefa S, Coggill PC, Bateman A anon PFAM-B_2104 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. 25.00 25.00 43.10 43.00 19.10 18.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.03 0.71 -4.61 25 244 2009-06-16 14:14:27 2009-06-16 15:14:27 3 1 223 0 47 189 10 171.90 36 95.19 CHANGED +pphhtssshshssstLoGClGphuloshlh.chNl.....puVDNRYuRuGla..hlluPVYGlsssADhllFNSIEFWoGpNPl.......stc.PulsDs.s.cshh....clNsp.lctsLscsPlshh......pphcpuphphlDspshphplshssGpptpLhGh+.psspVshYl.DGchlsh............sohspLtshtpss .........hhhhshhhhhuss..houChGphuloshlt.caNh......psVDN+as+EhlF...hlluPVYGlss.hADhhlhNSIEFWTGpNPl.......stt.ssVsDh.h..cslh....clNsp.lshphpcsshphp........+.hEpush..c.lpscshph.lshssGppph.Lhshp.ssspVshaL.sGchhss....lstptltuhhpss................................................................ 0 10 21 37 +11644 PF11811 DUF3331 Domain of unknown function (DUF3331) Assefa S, Coggill PC, Bateman A anon PFAM-B_2106 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family vary in length from 96 to 160 amino acids. 25.00 25.00 37.00 37.00 20.20 16.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.37 0.72 -4.41 24 164 2009-06-16 14:22:22 2009-06-16 15:22:22 3 2 48 0 66 160 2 97.10 39 75.80 CHANGED ttpsts.hsutssss...........thtlpllER.osoolhVpWsDss+C+YuEQpW+hshARpsGhCALSGpsIchGDsVa+Pph.R..shPsNusAMILAuslsphhst ..........................htss.........sshp........tsplpllER.SsoslsVpWs-ss+C+YGEQcWRtthAcpsGpCALSGpsIptGDsVa+Ptt.R..ssPuNusAMIhAusltt...st.... 0 1 7 30 +11645 PF11812 DUF3333 Domain of unknown function (DUF3333) Assefa S, Coggill PC, Bateman A anon PFAM-B_2108 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. This presumed domain is typically between 116 to 159 amino acids in length. 26.50 26.50 26.50 26.50 26.20 26.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.59 0.71 -4.23 62 456 2009-06-17 11:52:16 2009-06-17 12:52:16 3 4 432 0 116 348 598 144.50 29 36.69 CHANGED hctpl++R.tAE+RF+haGluAIslulhhLslLLsoIlupGhsAFppThlplslplstpt.l-.ps.t.......ptltstsYtsllpsAltp....hshp.tst.st+thtt.llSpsAttpLR-hlhssPpllGpThshhlhAsucl...DshhKGps.s..tpshtps ............ctthK+R.tA-+pF+hhuhhAlhluLhFLsllLsSlhspGasAFpQThlhl.lphspts.ht.........................shlttu..sh.hlhpsultc....hsss..s.sp...t.p.chsp.hlS.pptthtlcchlhts.sthlu.pspshhl.usuch...D.hhKut..........s.......................... 0 37 80 95 +11646 PF11813 DUF3334 Protein of unknown function (DUF3334) Assefa S, Coggill PC, Bateman A anon PFAM-B_2118 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family are typically between 227 to 238 amino acids in length. 25.10 25.10 25.70 45.10 22.00 25.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.52 0.70 -4.81 19 187 2012-10-01 19:50:22 2009-06-17 13:02:51 3 1 184 0 41 122 9 223.30 68 99.13 CHANGED hppsplloT-DILLpLCpSVopVLosATpSpVpYSuMVQ+Is+TsLKPDlGCFVLFDGGFSGLVVINFSApAAhElYppYMLsMGMPcpELAhSHTSDEVuNVMGELMNQlVGDFTuKlp+ELQTsITQNQPKMLslNKQlhLSVDTNLDcPpARRVoFpTtpNpIFYLEhAMDKTEFIpLp-FEhcEc..DPDsLltppt......ttttspssssts..ttc............................s-sDDLLcpLGl .........MpKsplVTTEDILLhLCpSVSsVLoSATsS.lpYSAMVQKIsKTSLKPDhGCFVLFDGGFoGLVVINFTAcAAlElYssYMRNMGMPE-ELAl.HTSDEVGDVLGELMNQlVGDFTNKVRKELQTsITQNQPKMLoLNKQVhLSVDTNLDRPQARRVTFoTsNNNIFYLELAMDKTEFIQL-EF-hp.E-p.sPD-ILttsp......ppp.p.spssssps..tpc............................ssusDLLDpLGI............................. 0 6 15 29 +11647 PF11814 DUF3335 Peptidase_C39 like family Assefa S, Coggill PC, Bateman A anon PFAM-B_2152 (release 23.0) Domain \N 25.00 25.00 25.20 29.10 24.80 24.40 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.17 0.70 -5.19 25 210 2012-10-10 12:56:15 2009-06-17 13:04:35 3 5 202 0 49 172 23 201.10 47 55.40 CHANGED lPaYpQTT-FTCGPAsLhMAMusLcspht.spppELplWREATTIFMTSGHGGCuPaGLALAAh+RGacsclalsssusLFlDuVRsppKKpVhpLVccsFtpplpppsVslphpshohpclcptlspGttslVLISoYRhsGcKsPHWVllouhD-calYlHDPcl-.tpppcsshDstalPIu+p-Fs+MupFG+s+.LRAAVllppp ...hPaYhQTTsFTCGsACLLMAhus..L......c....t..shp.oRspElpLWREATTIFMsuGHGGCuPpGLALAAtR......R..Gac..V-lhsssp.u....s.FlD...uVRsssKK.......pllcLVHpcFsppLsppsVshh.tssstspLcphlpp.GupsLlLI..SoYRhsu..c.K..tPHWVllouh....s-.cFhalHDPc............s-................pc.....cpsh-stalPVu+ushsphhsFG+p+.hpAsVllt.p............. 0 15 30 42 +11648 PF11815 DUF3336 Domain of unknown function (DUF3336) Assefa S, Coggill PC, Bateman A anon PFAM-B_2157 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in bacteria and eukaryotes. This presumed domain is typically between 143 to 227 amino acids in length. 25.00 25.00 27.90 27.00 24.80 24.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.68 0.71 -4.68 82 500 2009-06-17 12:07:04 2009-06-17 13:07:04 3 5 249 0 353 491 21 145.30 26 20.75 CHANGED thhcpp...hhp...........pLhcphcs........................upoYc-WhpsAtpLDch.....hGtstW+.psspss..hYDaphlppplppLcpsRtps-h.........................................ppLhhllcsslpRNhuuh...ssspLYp+oah..GTKpLI--YlcElttslphlscs....pplssp...phhthhppspps ..................................h.............php....pLpp.phcs............................Apo..Yc-WtpsAtpLDch.....hGt.stW........+.....psspos.......hYDhpllppplppLcctR...p.ps...-h............................................pplh...h...ll+s.s....lhRNhu..sl......sss.pL.Ypcoah....G.T......KpLI-cYlsE..lt..psLchlspt............pphs.p...thhthhppht................................ 0 100 200 307 +11649 PF11816 DUF3337 Domain of unknown function (DUF3337) Assefa S, Coggill PC, Bateman A anon PFAM-B_2058 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in eukaryotes. This presumed domain is typically between 285 to 342 amino acids in length. 25.00 25.00 26.00 25.30 21.20 24.60 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.04 30 297 2009-06-17 12:16:39 2009-06-17 13:16:39 3 23 247 0 215 309 1 296.90 26 35.92 CHANGED ptshssppphhspp...........sssssshhshsps..tthpssupss.ptt.t...........stsPst.shs..tppsphssohhs.cchch....s.h.tc.ttt.pt......................ptstppspppp.hc.shuuhltpshppa..pphtssst........................sphpPs.ttcss.hh......plPscoslllpE..sstusspslaRt..plsshspct..........-hlp...cslPhWluchLLcNphP.K.-s.sKlsFhLpPa..............c.....................sthPshscs-.................................sssRLsAscMLRs+KIhsYVsE+l......sps.....pp.pspt...............................................................hcPc-aLELhCps..................pl..Lss...sMTLuTlRshlWKouuDllLpY+tpsc .......................................................................................................................................................tpp...........stttsthhsh.ps......hhhps.up.ss.ppp.pp.....................s.hPshhsh-....cpuhhshsh....cc.ph......h.s.h..........................................................ttst.sstptp..s..c.Nhsshl.phhhca....pht.ps.p................................................................................tsphp...s.ss.....pp......tss...hh..................plP.....scosllhtE..........uust.slaRh..hlpshss-t.......................-...h.........cshP.Wlh-hl..l..psp.h........P...c.....sKlsFhLpPa................................s.............sshsphh.pts................................................RLsAs....cMLpl+K.lhtaVhE...+l.....st.ps............................tp.ptst..........................................................................................................hhs--hlELhCps....................pl..Lss...sMsLtTl+palWKs..us..DlhLpY+tp..p.............................................................. 0 69 121 179 +11650 PF11817 Foie-gras_1 Foie gras liver health family 1 Coggill P anon Pfam-B_4417 (release 23.0) Domain Mutating the gene foie gras in zebrafish has been shown to affect development; the mutants develop large, lipid-filled hepatocytes in the liver, resembling those in individuals with fatty liver disease [1]. Foie-gras protein is long and has several well-defined domains though none of them has a known function. We have annotated this one as the first [1]. The C-terminus of this region contains TPR repeats. 23.30 23.30 23.30 23.30 23.20 23.10 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.44 0.70 -4.90 22 379 2012-10-11 20:01:02 2009-06-17 18:05:52 3 16 226 0 279 399 0 215.80 20 20.99 CHANGED +h.-s+hluDhlsh+I................hRhhLhsupss..sAlppaptHhp+hp-h......lsp.h.ut......t..tatuW.uR....htlhA-Llcpsshsthsshp...................................sGaahppAAc.ahhtR+phtpph.............P........pssuuplhsphahhcsh............sspsppthshsts..pthsHSt.IlshLppAhtpFpphpp...........sRhsptLsh...chAcEYh+hs.sascAhphlcsl..shsaRp-sWhsLhpchhhtL+cCAhphtDscshlpsshELhsh ......................................................................................................................................................................................................................................................ph+.hhthhth+h.................h.chhh.....tps....tAhtphptHhphhtph.........................hs.....p............t..th.sW.up....h.hhuplhpps..................................................................................................suhhh..uA..hh....+pthht.h........................................................................................................................t.h..............................................t...th..............t......h..psthhlphhp....pAhtpapphtp.........................Rhtphlhh.....phucpa.ht.ht..pap...pAhphhp.h....hhpa+.........pEtWh.tlhpphhhthhcCthhhsphtshlphshchhs.t............................................. 0 81 141 219 +11651 PF11818 DUF3340 C-terminal domain of tail specific protease (DUF3340) Assefa S, Coggill PC, Bateman A anon PFAM-B_2330 (release 23.0) Family This presumed domain is found at the C-terminus of tail specific proteases. Its function is unknown. This family is found in bacteria and eukaryotes. This presumed domain is typically between 88 to 187 amino acids in length. 28.60 28.60 29.40 29.20 28.30 27.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.63 0.71 -4.23 98 1144 2009-06-18 09:33:45 2009-06-18 10:33:45 3 5 1125 0 222 767 468 142.30 45 20.88 CHANGED ohl-st-hGEss.-NALPWDpIssApYsphsshssh...lspLpppHppRlspss-Fthlpcclphhcpp+-c.pslSLN.spRctEpcpt-pp..pLpth.......NpRhp......t.tG..p....cslpsh.........................-.........................................-h..sc-h.........tt.Ds....aLcEuspIhhDhh.......phpp .....p....tsEsGEphEDNALPWDoIc.u.As..Y.s+s...sc..ls...sh....hscLhccHpsRIAcDPEFphIhcDIAcapsh..KD+...shVSLNhutRccEssccDup..RLsRl..................N-Rh.K.........p..cG......K...s.LKcL........................D..............................................................................Dl....PKDY...............pcPDs......YLDEolpIslDhscl..p............................ 0 60 117 178 +11652 PF11819 DUF3338 Domain of unknown function (DUF3338) Assefa S, Coggill PC, Bateman A anon PFAM-B_2474 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in eukaryotes. This presumed domain is about 130 amino acids in length. 34.60 34.60 35.40 35.30 34.10 34.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.59 0.71 -4.72 8 236 2009-09-11 14:35:59 2009-06-18 10:36:45 3 8 64 0 127 216 0 132.30 48 17.22 CHANGED hEuKsclIsuSSGolluSGStsu-sS.csKKE+lpsLKcKpcsLc-+Lpt+LcELKKlCLREA........................ELTG+LPsEYPLpPGE+PPpVRRRIGTA......FKLD-..lLs.sE-stLpsLEschAlQQQIsEAA++LusEscLSKsl .................................tuKsplI.sSs.GolluSG.u...pu-s.ut.psK...+.-hltsL+p+pcsLpEpLpp+lEELK+lCLREA........................ELTGcLPtEY.....P..LcP.G...Ec.....PPpVRR.RlGTA.................FKLD-...pl...L.....s.t..tE-...stLp...pLEpchulQppIsEAA++LAs-PsluKp...................................... 0 17 30 65 +11653 PF11820 DUF3339 Protein of unknown function (DUF3339) Assefa S, Coggill PC, Bateman A anon PFAM-B_2694 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in eukaryotes. Proteins in this family are about 70 amino acids in length. 25.00 25.00 36.80 25.60 24.40 23.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.39 0.72 -3.68 25 199 2009-06-18 09:40:15 2009-06-18 10:40:15 3 5 29 0 126 175 1 66.80 52 81.93 CHANGED MsDWGPVlluVlLFVlLSPGLLFQLPG.+sRhVEFGshpTSGhSIlV.Hslla...FullsIhllAlplH....lYs .......MuDWGPVllulVLFlLLSPGLLhQlP.G..+.sRhV.EFGshpTSGhuIlV.Hulla...FuLlsIhllAlslHlY.................. 0 16 74 100 +11654 PF11821 DUF3341 Protein of unknown function (DUF3341) Assefa S, Coggill PC, Bateman A anon PFAM-B_2731 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in bacteria. Proteins in this family are about 170 amino acids in length. 25.00 25.00 25.20 26.50 24.60 24.50 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.02 0.71 -4.79 37 213 2009-09-11 12:21:10 2009-06-18 10:43:59 3 3 197 0 114 226 320 170.60 31 77.66 CHANGED tptslhA.Fscs-sLlpAs+plRt...tGa..phh-saoPaPlHGLDcAhGl.t.oplshhshhhGlsGsssuhhhphahshhD..........aPhsI.GGKPhh..Sh....PAFlPlsFElTVLhAAhuhlhuhh.hhstLs.hp+Phh.ss...+socD+Fhltlsssss...-tpchpphLcphGAhclphlp ......h..hslhA.Fsss..-sLlpAs+pl+p...tGa..chh-...saoPaPlHG..L-cA.hG....l..tco+....lsh..hshhhG.lhGhssuhhhthashhhD.................aPhsl.GGKPha..Sa....PuFlPlhFElTlLhA.Ahshllshh.hhspLs.h.t+shh.ss.....R.s.ocD+Fhltlpspss....stpphpphLcphGAhclp...t................................................................................................................. 0 54 88 104 +11655 PF11822 DUF3342 Domain of unknown function (DUF3342) Assefa S, Coggill PC, Bateman A anon PFAM-B_2751 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in bacteria. This presumed domain is typically between 170 to 303 amino acids in length. The N-terminal half of this family is a BTB-like domain. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.32 0.70 -5.29 9 124 2012-10-02 01:20:04 2009-06-18 10:46:24 3 4 99 0 82 233 4 273.00 41 42.62 CHANGED s.IpVpDcspNppRsFpCspcLLhocMpYFssllph.................................................................sspch............lsIpV+CDlpIFsWLMpalct.................ctPplsPsNVVSIhlSSsFLpMppLl-psLtYh+t+LsplVtousNhsClsspLlsRLusMhscs-Lst....l+Dc+schps..+lhspLIp+.LC-..sps..............................ts..uph.....suLhhCthCtpLhsppp.p+lpp.........sPu....................+hslspRGclhhoHsts+..................shsspta..lsshpcELcuWt...WRlhGuhpalhCpRCpphhslh-ho..pCphtPtshsassssup.Dstu.suhashC.tppshca- ................................................hlIHVCDEsK.shpcDFhCPpcLLlscMpYFA-hl...................................................................s.u.Q+h-E..........lDIS.VH.C.D.lpI...FsWLhpal++s...............................................................tttchPp.L..-ssNVlSILlSSpFLpM-.sLl-pClpYC..Hpp.hs.....tIVt..ossNhsClsssLloR...........lAshFoptEl-t....l+DKK.DKhpS..+LasKhIpp.Lh-sp..s...................................cu..ssh.....usLa..RCshC..tpllspphpp..plsC..........hPu....................phplsp+Gplh..hHh.RD.p.............................................................sWslppY..l..sLacEL+SWRc.....VYWRLWGshpaL.hCspCtphF.sh-hs..aCpaHscss.a.ssssp........s.sGhYsCC.sppshRa-............................................................................................................................................................. 0 32 41 62 +11656 PF11823 DUF3343 Protein of unknown function (DUF3343) Assefa S, Coggill PC, Bateman A anon PFAM-B_2956 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 78 to 102 amino acids in length. 25.00 25.00 25.10 25.00 21.70 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.04 0.72 -4.72 58 857 2009-06-18 09:51:36 2009-06-18 10:51:36 3 6 709 0 160 521 6 70.90 27 76.57 CHANGED ptphlllFpospcuhpsE+.....lLKcpsl.shcllPsP+pl.psu..C.Glulchst.pp.tptltphlpctslthpt....laphp .....................calhhFpoTstslpsc+.........................hLps..t..uh.sh+lh..slPR.cl..puG...C.Glslhhs.....s.ss.t-clphll.ttt..hpsla..................................... 0 90 133 147 +11657 PF11824 DUF3344 Protein of unknown function (DUF3344) Assefa S, Coggill PC, Bateman A anon PFAM-B_3041 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 367 to 1857 amino acids in length. 27.20 27.20 27.50 27.50 26.20 25.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.88 0.70 -5.02 42 118 2009-09-11 16:28:30 2009-06-18 10:53:44 3 42 17 0 94 122 4 276.20 22 40.86 CHANGED ssshsss..a.Gst.....s....Lsohtp..ssV.....pGslhhss..........hhGhssps..............spsh....slP.su........slchA+LYls.sWst.........ppsh.sphslsFNG..........ppht.........................ssts.Ysshps......assh.p....shas.YDVTs...hlss....G..psssslss...........sshcG.....plhussLlllYc.ssstsphp.YWlN-GsDhlph...............sscpssups.Fsus.ss.s..lpsAsLhohhhoustt......sslhFNu............sslhsus............................pushhshp.tasVsshlpsspNpshh.s............tsssYhpsshulLslch ......................................................s.....tt..a.us......s....lsohtp..ssl.....pGslhhss............h.uhssps...............spsh.....slP...ss......s.....slphA..+LYls.sWst.........pssh.sshslsFNG...........................pth.................................sssstYhshss...........ass.hhs.sh.shhh.YDVTshlps.....u.pssh.slss............tsshcu......plhshsLllsYp.ss.sssphp.halN-GpDhlsht.t...............tscpssups.sFsss.hs.t..tlpsApLhshhhuu.s..s.........ushhFNu....................pslhssss.................................pus.hshp.taDV..ssh...lps..ss..spshhts................tsushhpsh.uhLslp................................................... 0 50 62 72 +11658 PF11825 Nuc_recep-AF1 DUF3345; Nuclear/hormone receptor activator site AF-1 Assefa S, Coggill PC, Bateman A anon PFAM-B_3322 (release 23.0) Family Nuclear receptors (NRs) are a family of ligand-inducible transcription factors, and, like other transcription factors, they contain a distinct DNA binding domain that allows for target gene recognition and several activation domains that possess the ability to activate transcription [1]. One of these activation domains is at the N-terminal, although there are two distinct motifs within this domain, between residues 20-36 and between 74 and the end of this domain, which are the binding regions. One of the co-activators is TIF1beta, which appears to bind at the first motif [2]. 27.30 27.30 27.30 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.83 0.72 -4.15 14 198 2010-01-08 15:29:35 2009-06-18 10:54:52 3 5 47 0 73 155 0 100.60 44 24.06 CHANGED uoSlssssths.hss........Hssh......sshss.t.h....osluSP.lNulGSPaSVIo.SolGssShulPuT.PulGassh.SPQhN...shs...uVSSSEDIKPP.GLpsl......phsspusGuh .........................................ss.ts......s.HPSh......sshss.t.l.....SsluSP.lNulGSPasVIo.SuhGs.uh...slPuo.PuluaushsSPQl...NS.....shN.........uVSSSEDIKPP.h.GLpGl.....hphsupssGsh............. 0 2 10 30 +11659 PF11826 DUF3346 Protein of unknown function (DUF3346) Assefa S, Coggill PC, Bateman A anon PFAM-B_3462 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 231 to 659 amino acids in length. 25.00 25.00 37.30 36.50 20.00 16.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.47 0.70 -4.57 4 170 2009-06-18 09:56:55 2009-06-18 10:56:55 3 2 126 0 11 121 0 220.10 88 43.69 CHANGED FTDFQLHELTGRWLSEN.MPEGFKSDRFRFLARTITASEEAPpEGpDGEIRIKPNLYILVWEPSFa-ELLTRDYFF.LFPPEILKQHTLVFQLYSFFRSRMuRRhTDsMLLSELNQKLARNI-WRRFShDLIRELK+LuEGKsoE-lFlVNLWGYHLTIpuh-pstKlsDYQIDI+CDs-E.VlRYSRA+TTNsGKRsM.APThPNPLRNElhsKQcL-pLSuIIDGEF .............................................FTDFQLHELTGRWLSEN.MPE...G.FKSDRFRFLARTITASEEAPsEG.SDGEIRIKPNLYILVWEPSFaEELLTRDYFF.LFPPEILKQHTLVFQLYSaFRSRMuRRHTDsMLLSELNQKLARNIEWRRFSMDLIREL+R........LS-G.KGoEDLFVVNLWGYHLTIcoh.EcGKVhDYQVDIKCDVEE.VLRYSRA+TTNAGKRNM.APTLPNPLRNEhVoKQpLsELSuIIDGEF............................................ 0 1 3 8 +11660 PF11827 DUF3347 Protein of unknown function (DUF3347) Assefa S, Coggill PC, Bateman A anon PFAM-B_3580 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 169 to 570 amino acids in length. 25.00 25.00 38.70 38.70 22.70 22.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.35 0.71 -4.27 46 213 2009-06-18 09:58:49 2009-06-18 10:58:49 3 7 83 0 98 245 40 172.30 25 54.87 CHANGED hshh...hhhsusppppcpp.h.....................pthtttt....................tphthsphtspphpplhssYhplKsALVss-sppApssAppLtpshpshshp.......phhspttpphtphh...............slcpQRctFptLSpphhshlcts...hsssslYhpaCPMA.sscGA.WLSpsccI+NPYaGcpMLsCGpVpc ....................................................s............tt.t........................................................................................tt.thstttppphptlhssYhplKsALspsDsptApssApplhpslpt.lshs................thts.ptt..pphhphhpp.h.tt................tclcpQRp.tFptLSpshhsllcth.....tsspslYhpaCPMsp.sspGu.WLSp..sc..c...l+NPY.......aGspMLsCGplp......... 0 47 84 95 +11661 PF11828 DUF3348 Protein of unknown function (DUF3348) Assefa S, Coggill PC, Bateman A anon PFAM-B_3615 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 244 to 323 amino acids in length. 25.00 25.00 25.10 98.20 23.90 24.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.52 0.70 -4.95 19 133 2009-06-18 10:49:21 2009-06-18 11:49:21 3 1 131 0 44 153 14 243.60 46 94.84 CHANGED MsQsPpRsshuGPsLlRLLARLsss-sspSppuLu-RLSQWLsWoDAlALSuA......LsupsPssssuspssusstc.....-tARlRusLspuIsscts.ts.tRtts....h.hsshssssp.stsDaAsaRQpYLuhQpsMcssIusLRGRLRstLAuposs.hARLAhlDAlMEpsLusRERsLLAslPsLLts+FERLRpsccts..ststss...........................hstsGuWLssFRpDMQSVLLAEL-lRhQPV-GLLAALRsp ..........................Mhpss.RssLuGPsLlRLLARLscs..Dls....tStpuLuDRLSQWLuWTDAIALSuA......LsussP.us..s..s.....u...s..cst.utstc....tpsARVRsuLApAIssssshhs..sRtts.........pssshss.s..ss...sss....DaAsFRQ+YLuhQQsM-sslGpLRGRLRptLAsposs.hARLAslDAlMEpsLusRERoLLusVPsLLus+FERLRcAcptsh.t.ts.t.st.sts............................................................sssPsuWLDsFRcDMQSVLLAEL-lRFQPV-GLLAALRsp. 0 6 19 32 +11662 PF11829 DUF3349 Protein of unknown function (DUF3349) Assefa S, Coggill PC, Bateman A anon PFAM-B_3716 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 99 to 124 amino acids in length. 25.00 25.00 28.80 28.80 24.70 23.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.25 0.72 -3.56 24 263 2009-06-18 10:51:17 2009-06-18 11:51:17 3 2 128 6 68 170 2 97.70 43 87.82 CHANGED hsshlsslluWLRAGYPpGVPssDhhsLLALLpR+.Lo--EVptVAppLhc..pup.s.......spsDIsshIsplTcc.PsP-DlcRVpA+LAutG.....WPLssscp .........spalpSllsWLRAGYPEG.VPssDphsLLALLpRp.Lo---lppVs..pcLhc....pGs.s...................Dp.-IushIoclTcc.PuPEDlpRVtu+LAAtG.....WPLsss.c.............................. 0 14 42 58 +11663 PF11830 DUF3350 Domain of unknown function (DUF3350) Assefa S, Coggill PC, Bateman A anon PFAM-B_3789 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 50 to 64 amino acids in length. 25.00 25.00 29.50 30.70 22.00 18.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.84 0.72 -3.94 11 178 2009-06-18 10:57:16 2009-06-18 11:57:16 3 7 70 0 79 164 0 57.80 56 5.47 CHANGED -lsPpusLSPshttt..........tssttct+RoscELRcLW+pAI+QQILLLRMEKENtKLp .............................EL.PhSPLpPshE-t..............ss.t..pcc+RsScELRpLW+KAIcQQILLLRMEKENQKLp. 0 13 18 42 +11664 PF11831 Myb_Cef DUF3351; pre-mRNA splicing factor component Assefa S, Coggill PC, Bateman A anon PFAM-B_3985 (release 23.0) Family This family is a region of the Myb-Related Cdc5p/Cef1 proteins, in fungi, and is part of the pre-mRNA splicing factor complex. 27.00 27.00 27.60 27.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.63 0.70 -5.14 29 298 2009-06-18 10:58:48 2009-06-18 11:58:48 3 12 252 0 217 288 1 227.90 29 30.07 CHANGED TPNPlhos.hppst...............thGhTPhpsh..........GtTPh..pTP...RDphslN...........tt.....htpospph+hp.pppt+ppL+suLuuLPpP.cN-aEl.lP-pt......pp-.scptEp.....h...EEDuu-t-tRc+ttcptpcptchc+RopVlQRsLPRPs.lsht.Lh..............phssp..s.sccllpcEhhtLlppDsh+aPh...sssp..ttp............................t.th-phs-stlppA+thlptEht..h.................t..sppppphc............sasps .................................................................................TPNshh..os.hRp.s...................t.uhTPtts............utTPh.....pTP..........hRDphsIN............ppt...........stss......P.p....+.t..ppph+pp....L+tuLuuLPtP.cN-aElslP-pp...............ppE...pptct..................h....EDsu-h-tRppttc-tpcttEh++popslQ+sLPRPst.ls.phLh...................................................p.s...st..hppu-cLIpcEhhphltaDsh+.a.Ph...sssp...ttp.................................h..h-phscppLp..pApphltpEht.h............................t.t...................................................... 0 74 117 174 +11665 PF11832 DUF3352 Protein of unknown function (DUF3352) Assefa S, Coggill PC, Bateman A anon PFAM-B_2160 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 538 to 575 amino acids in length. 26.80 26.80 27.40 26.80 26.60 26.60 hmmbuild -o /dev/null HMM SEED 536 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.62 0.70 -6.20 32 156 2009-06-18 11:54:13 2009-06-18 12:54:13 3 5 102 0 59 174 55 465.90 18 84.34 CHANGED slhuushlLlshusuhaahhtpp.psl.hhpps.hshPhuAphlPpsA.lslplhssPscl.shtphsss.pp+pthppthpp........h+cshhAtsGlcapp-.lpsWlGsplolAllss....s.sp..................tpsuhlhsLssc-sptA+pFLpph.appcshsG.sslphpsY+Glplhpspsshht.............shusAll..s-p.hlLlAss.tllcpAlDshQhss.shtus.thppslppL..scs.lAhlhh.s.ssht.......pa........hpl.spslsphp.slpullsulshpspuLthcuhhhhpsph.....sss.sps.....Lpphsusshhhlu..........sLpp.appls........psss.hsphlp.lhpphhpthshslspslhsh...pGchhh.hlsts..........suWlhsspppsssss............p..LDphhp.spG.hphsslshsspslssWop.........................................L.h..t.t.........ltsp........lsss...........+uthsspthaussLsulspthss.....tpsLtss.phppslssh.....tsstt.hhLshptstshLsp.hhPh........hhh.hthsupslhsslpululss...........................tss...ssshhphchhLplt .......................................................................................h...hsshhlhshuhushhhh.tt........ts..p....sshhlPppA.hhhph.sss..pphtth.....tphhs.......ppppt.......hppthpp........hppph...hst.ss....lsapp-..lpsWlGp.clslA..lhs.shpt..........................tpsshLlslshp..ctptucphl.pphhppps.ts.hplp.pppYpGlsl..hthpsstst......................h.ssAll......scp..allhuss.pl.lcpAI-shpsss.slsps..sappshppl......sps.luhlah.shssl....................................phh................hsh.sp.hs.ph.p...thp......shs.....hulshptpulthcshh....hhpss.......................s...st.......hlph..hsspsh.hh..hs........................................slsp.a.pt.htp.h......ttss.h..phhpphhtphpp.hslsltpslhsW..hpGEauh.hl.pt.t............................tsshlhhsptp..stt.............t..lcthht..ptt..h.htph.h.tt..l..Wp....................................................................tt.....................h.hh...........hs..st..hhht.htshtth..........sl.tp.th.p.h.............t.hhlshp.....h..t.....s...................t.hh.thpulshs...............................ttt..t.phhh........................................................... 0 17 45 57 +11666 PF11833 DUF3353 Protein of unknown function (DUF3353) Assefa S, Coggill PC, Bateman A anon PFAM-B_2231 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 205 to 258 amino acids in length. 28.40 28.40 29.30 28.60 27.70 26.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.22 0.71 -4.85 34 209 2009-06-18 11:57:17 2009-06-18 12:57:17 3 4 100 0 110 207 132 176.70 25 71.90 CHANGED sp-ASFEElQpARsphLpph.usDtpspsplEAAYDulLMppL+pRQpG+ltlsptlphscp...........tsssstsstsssshlpch................s.lshPpspslhhphsh.GsLullhllh.........s..ssssspLhLuluhhuslhhhh++...tp+hhpuhhholssLhlGlllGullsshl.s.hls..hshospplpulsshllLaluuhhL .............................................psASa-EIp..tA..+shllpph..sscppshtp.........lEuAYDtllMppLpp.RppG....Kl....plspcl+hsc......................stssssWlpph......................hp..h.Pstp.sl.hhphhhauhlsshslh............s...stssssh...lA..luhh...uslY.Flp.c.+............tp.phh+uhhhshssL.hhGhhhGshl..shl.s.........hs.p.h.uhhshhhhalss.aL................................................................................ 0 26 73 98 +11667 PF11834 DUF3354 Domain of unknown function (DUF3354) Assefa S, Coggill PC, Bateman A anon PFAM-B_2265 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 60 amino acids in length. 29.60 29.60 29.70 29.90 29.20 29.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.41 0.72 -4.36 43 290 2009-06-18 12:00:32 2009-06-18 13:00:32 3 39 100 0 145 279 0 67.50 36 10.74 CHANGED hhRVoIa...sspc....spts.GKLlhLPsSlcELlcIuupKFGh.s.s.ocVhsp-.GAEIDDlclIRDGD+Lall ......................tRVola..tsspp.......t.....GKl.lhlP..s.olp-LlplAupKhGh..t..s....opl.hstc.GucIDDIslIRDsD+Lal...... 0 25 77 113 +11668 PF11835 DUF3355 Domain of unknown function (DUF3355) Assefa S, Coggill PC, Bateman A anon PFAM-B_2268 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 111 to 177 amino acids in length. 26.70 26.70 49.10 49.10 23.00 22.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.13 0.71 -4.43 5 57 2009-06-18 12:04:15 2009-06-18 13:04:15 3 2 3 0 21 51 0 152.20 44 28.56 CHANGED TPTKCSTPVPSsAITKuVAESTPTslEHVFPATsusSsPSIuSAAtsTPVSLTtTKEAEADMsKVE-Ko-cTlpDLCsK....INpMLEsp.....E.hhusDSTtsVslhussTsssshsLEhsp-..App..h.phsppp.hs.hps.............ssc..hspc-hltthclsoK ...............hPTKCSs.sPSsATT+ssAcSosss.E+VFPAThus.sPSssSushsTsss.sETccs-usMDKs..ps-cThQDLCs+...............IsphLEshRshK.-.ohShD.stslsshSsNsssss.........hhhEVSsE..AsshphVsoschshssshps...tp..p....ssst....sMsp--hhchhcVsoK......................... 0 0 0 5 +11669 PF11836 DUF3356 Protein of unknown function (DUF3356) Assefa S, Coggill PC, Bateman A anon PFAM-B_2406 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 104 to 119 amino acids in length. 26.90 26.90 29.00 28.20 26.30 25.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.07 0.72 -4.21 42 205 2009-06-18 12:07:32 2009-06-18 13:07:32 3 2 175 0 72 166 41 99.10 39 86.19 CHANGED apGEV...........slslsGpppsh+LTLGALAELEsthus.....ssL.......suLspR...Fcsup.hSu+DlltllsAGLRGGGhsssps.-lhpspltGGsh..ss.ucsAApLLspuF..shs ................hRGElshplsGccasLp.L.TLGALAELEsshts.....ssL.......sALltR...FssG+.hSuRDlhplIsAGLRGGGpsss-c..-lushps...cG.G.ss..uh.AplsupLLsssF.ss..s.......................................... 0 17 48 58 +11670 PF11837 DUF3357 Domain of unknown function (DUF3357) Assefa S, Coggill PC, Bateman A anon PFAM-B_2464 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 96 to 119 amino acids in length. 27.80 27.80 27.90 27.80 26.20 26.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.59 0.72 -4.20 35 152 2009-06-18 12:11:01 2009-06-18 13:11:01 3 4 62 6 22 158 0 103.30 29 16.65 CHANGED hsshshp.YsP.LPssstsss.s...........+RRsh...pshsslhsullhLhhllull.............sspssssst...sss.ssspssss..............SRGsspGVSEKSsss........hsus.ssuFsWoNuML ...................s.....t.YsP.LPsssssutt............++R.......tshsslhsushlLhslsshh................stssss.ps............tss.pssts...............SRGsspGVSEKosss............hhuu.ssuFsWoNuML........... 0 2 11 17 +11671 PF11838 ERAP1_C DUF3358; ERAP1-like C-terminal domain Assefa S, Coggill PC, Bateman A anon PFAM-B_2558 (release 23.0) Domain This large domain is composed of 16 alpha helices organized as 8 HEAT-like repeats. This domain forms a concave face that faces towards the active site of the peptidase. 23.90 23.90 23.90 24.00 23.70 23.80 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.01 0.70 -5.19 123 3372 2009-06-18 12:17:03 2009-06-18 13:17:03 3 33 1415 15 1516 3073 300 295.80 18 35.56 CHANGED hlhlNssshuah+lpa.sscphsplhp............tltpht....sshsRshlhusshshsps..........GphsspshlsLlp.....th.....ssEs-.......hsVhspllsplsshttshh.hpspttpp.hpp...h..plhst.hhcpl.uhp.s...........sshphthl+shhhssus.........ssts.hphhpshls...G.sps............slss-LRhslhsslstt.Gs..tpt.....hpplhp.hppss....................o.....sstcptAlpuhuthsssplhpc.shshlhs.....s....s.lp....sps.lthshtGh.............t.sspp..phlhsahpppa.-pltph......hsstss..hhshhltlhssth.......s..........stctlpplcpah..p...........sct...stuhc.Rsltpsh-slp ..............................................hlhlNssph..uaY+lpY..cspth.pt.lhp.........................tltt.p.................l.s.s...h..sR.stlls-hhslscs..........upls.hsp.hlsl.lp.....hh......tp.Ess..........hhsh.ps.hh.....p..t.l......s..h....ht.hhh......t....s....s..t..hpp.......hpp...........................hh.p.l..hpt.....hh...p....p.....l...uhp.st.................................................psh..t.pt..hhR..sh.hh..shss....................t..ts...hppu..tph.h.p.t...ahp...........s.....s.......................................slss.s...lR...h..l.htshhpp...us....tpt..........hshl.hpph.p..p....ss...................................s.....ss.t+pph.htAL.....u....s.s........p...........s............st.h..l.pp..h.......l..p.hhhs......s............p.....lp.....sp-...h.hthh..t.s.h..................h...ssths.phhhsah..p..p.pa.p...h.l.ph........h.stts........hsthl......t.h.hsph.h.t...................stp.p.htphctah...p............t........................................................................................................................................................................................................ 0 434 694 1236 +11672 PF11839 DUF3359 Protein of unknown function (DUF3359) Assefa S, Coggill PC, Bateman A anon PFAM-B_2625 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 80 amino acids in length. 28.40 28.40 28.40 28.40 28.00 28.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.26 0.72 -3.65 2 169 2009-06-18 12:27:18 2009-06-18 13:27:18 3 1 117 0 29 87 0 84.90 74 98.60 CHANGED MpplL.hSulshuulLAsGCuSlocp..............sEttlouspssAspu.uRAsEAYtKA-EALAAA.tAQpsAsEANE+AhRMLE+AShK ..................MNNVLKFSALAL.A.A.VLATGCS.S..sS..K..E........................TEARLTATEDAAARuQARADEAYRKADEALAAAQKAQQTADEA...NERALRMLEKASRK.............. 0 8 13 23 +11673 PF11840 DUF3360 Protein of unknown function (DUF3360) Assefa S, Coggill PC, Bateman A anon PFAM-B_2754 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 489 to 517 amino acids in length. 25.60 25.60 26.40 27.10 25.00 25.50 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.75 0.70 -6.16 11 257 2009-06-18 12:32:27 2009-06-18 13:32:27 3 2 199 0 48 167 14 447.20 65 98.72 CHANGED MSss........................ppsSYpclH+PuSEFpTR--YL-HELpIMpPKRW+lNLPhRDaRFEhEDhVPAhAATIGKlVMVuAlsAAFAush.......GLusEFVlENVRYELLIAulLFVILhSGFLNPsANLAGTHGPhIPLIPlIsAAGGHPLALGIhlGlFGLlLuloKGGShLspLTScGVsGGLLlYLGFlGhhSQlpKLhuWAsuh........uMualuFVVIhsTIllYAhLt+hpKRWLAIPLsullAullAFuMGAPF-..FsTpPGLPNhsPhYWWGE.sTGWpLGLPslpHFIAVlPFAlLAVAMWSPDFLGHRVFQcLNYPK+u-+VLMslDDTMTssSlRQhVGSlLGGGNluSSWGTYMIPAAIAKRPIPGGAlLTGlhCllAAlhGYPMDLAlWpPVLsVALlVGVFLPLLEAGMQMsR-sKsoQSAuIslFuSAlVNPVFGWALTMLLDNhGLIGsKERuspLohhcRhlIPsssFllhsuuMuhVGMLP...GIPAlL ..........................-h.cspoYc-LHRPuSEFtoRs-YL-HELQIMpP+RatlNLPhRDaRFEhEDhlPAh.AGTIGhlsMhuAlhhuaAssL.......sLu.cpFslEssRhEhLIsAl.FsllhSGFl.PpANLAGsHGPhIPLIshlshAGuHPLAhuILlGVFGLlLuhhKGGShLssLTSpGssGGLLlaLGFsGThuQlppl.pWAsGl........sMuYIuhlVlhlsIllYAhLt+hpKRWLAlPlsshhuhllAhALGAsF-..FpTp.GLPNhsPsYWWGp..ppGWhLGhPshpHFlsslPFAlLAVAMWSPDFLGHplFQclsYP++TEKVLMslDDTMThCSlRQhVGohLGGuNhTSSWGTYhlPAAIAKRPIPuGAlLhuhLshhhAlhGaPMDlAlW.PVhpsALlVGVaLPLLEAGMpMs+csKs.oQuAuIslFuSAlV..NPVhuWALTMLLDN.GLIGsKERuupLShhs+llIPusshlIhhhAMhAVGML....Gl.Ahl........ 0 10 18 34 +11674 PF11841 DUF3361 Domain of unknown function (DUF3361) Assefa S, Coggill PC, Bateman A anon PFAM-B_2780 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 154 to 168 amino acids in length. 27.20 27.20 27.60 28.50 27.10 26.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.92 0.71 -4.52 9 228 2009-06-18 12:35:40 2009-06-18 13:35:40 3 3 88 0 122 212 0 153.90 50 23.12 CHANGED hTFAtEFIpccGLthLhpllEsusc............hu............-hLsasLoAFlELM-HGhVSW-hLosoFI++lhuaVs.psth.csollppuLuILEshV.sSsslathVtpElTltpLlphLps.sspplQspAIALlNALFlKAs-s+Rp............phAsslupKpl....RthIhssllt ........hTFApEFIsh-...GltlLsp..hVEsGoc..............hu.....................................-hL.uaoLTAFlE.LMD..H.G.IVSW.D.hlSlsFIK+IAuaVs....pshh..DsSIlQ....pSLAILEShVL.N..........SpsLYppVup.EITlspLIsHLQs....osQp..IQThAIALINALFLKA.s-.s+RQ........................-MAshluQKpL....RslILsplI+................ 0 26 35 68 +11675 PF11842 DUF3362 Domain of unknown function (DUF3362) Assefa S, Coggill PC, Bateman A anon PFAM-B_2839 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 117 to 158 amino acids in length. 27.60 27.60 29.10 28.30 27.20 26.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.01 0.71 -4.30 9 1200 2009-06-18 12:37:54 2009-06-18 13:37:54 3 3 1181 0 232 851 107 136.70 46 19.49 CHANGED DQVQsFYPSPMATATAMYHSsKNPL+KVsRc.....sEsVDIV+GEKpRRLHKAFLRYHDPsNWPlLR-ALKpMGRADLIGsGKpHLIPsaQPh.sDG......uYQSAR+KNSo.......................................ssG..shps...........spspKG+hLTQHTGLPPRsss .DQVQsFYPSPhAsSToMYYTGpNPLt+lt.a.c......oE.cVhV.PKG-+QR.RLHKALLRYHDPsNWPLlRpALctMG+c.cL....IGsp+cCLV...Ps..........ss....hc.......................ph.pp...u..R..+..p.spp...........................s.t.st...h.ttt.................................................................st.sttttttt........................................................................... 0 66 143 189 +11676 PF11843 DUF3363 Protein of unknown function (DUF3363) Assefa S, Coggill PC, Bateman A anon PFAM-B_2310 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 323 to 658 amino acids in length. 28.40 28.40 42.40 30.30 25.60 24.80 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.27 0.70 -5.82 40 413 2009-09-14 15:03:38 2009-06-18 13:59:04 3 5 192 0 169 399 41 221.20 32 58.09 CHANGED LhlGRLp+LEchGLApphuPGpWplpscsEssLRcLGERGDIIKpMHRAh.stcGh-..Rssusaslcspt.s..sPllGRLlsRGLc.DELpspuYsVVDGlDGRsHHlclsch-ssucsss.GuIVElRsh.s.......................................................................................Dsptppp..lslssRSDLsLppQlsAsGATWLDRphluc-s.sLut..uGFGtEVRpAhcpRs-+LhspGLAcR.pupRllasRsLlsTLRcRELsssupcLuucTGLsaptussG-pVuGsYRp+lsLASGR.FAMI.....D.....cGLG.FpLVPWpPsLE+pLG+cVoGls+ssGGl-WshGRcR.GLGl ......................................................................................................................................................................................................................................................................................................................................................................t...h.lthho.hslttQht..usTWLDpphlttt....shs......GF....Gt..psp..tAhttR.thh.tpt.App...ptttl....hhttshlthLctpEltthutphu.t.u..h..sttGp.ltGhhptph.LuSG+.aAhl.....-.....puht.FpLVPWps.l-pplGppl.uhhp.ssulsWph..uRt+.u.............................................................................................................. 0 24 107 141 +11677 PF11844 DUF3364 Domain of unknown function (DUF3364) Assefa S, Coggill PC, Bateman A anon PFAM-B_2336 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 60 amino acids in length. 27.40 27.40 27.40 27.40 27.10 27.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.77 0.72 -3.94 40 333 2009-06-18 13:02:42 2009-06-18 14:02:42 3 3 292 45 92 315 8 54.70 45 14.47 CHANGED MsQss-.......+lhDHhsLF+cPEYp-hhpsKpcpFEssh.ssccVscltEWTKTWEYREKN ............MsQ.osE.......+lLDHspLF+-sEYpchFpsK.ppFEsst...sspcVpclhEWT+ohEYREKN. 0 24 57 72 +11678 PF11845 DUF3365 Protein of unknown function (DUF3365) Assefa S, Coggill PC, Bateman A anon PFAM-B_2563 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 198 to 657 amino acids in length. 27.80 27.80 27.80 28.00 27.50 27.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.67 0.71 -4.44 153 684 2009-06-18 13:07:17 2009-06-18 14:07:17 3 87 394 0 307 670 98 181.10 17 39.40 CHANGED lhhshlh........hhhhhhhhh.........................h...p..pts..tppshpp.Acshhpph...............shRp..........asspphtsh..h...................................shthshhs...........P.uhhsppl...tc.......thsp.tt.s..hhph+hsoh..pspNPp.NpsDs.aEppsLcpFcpss............tphhph....su..pphh+hhp..P..lhh.pps..CLpCHu......t................t.thsaclG-lpGuhslsl.Ph .............................................................................................hhh......h....h.hhh...h....................p.....t.h.....t..hh..pp..uphhhpth...............t.h+p..........ahsphht.h..h........................................................................s..hth.s....s.shhshpl...tp........hsppt.s.......hph+hsuh...ps...pNsp..stsc.....s..aE.pchlcpFppst.................t........ph.p.....ss...pshhRhhpP..lhh..pps..CLpCHu....t.t.......................t.saphG-ltGshslphs..................... 0 130 249 287 +11679 PF11846 DUF3366 Domain of unknown function (DUF3366) Assefa S, Coggill PC, Bateman A anon PFAM-B_2678 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 200 amino acids in length. 27.90 27.90 27.90 30.10 27.70 26.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.36 0.71 -4.43 29 415 2009-06-18 13:09:44 2009-06-18 14:09:44 3 5 380 0 90 371 29 192.40 21 34.03 CHANGED lhhhullhslslHShLEYPhaYuhFhlshhh.hLhhh.s......sth+...hshphs......hsLt...shsllsshhhshhhhp..shppsphL.ha.....tspsss.....hppsssshhatp..sas.hsshh.hsh.p.tssshhhshhphuhclh...P.....psshhpph..hhuhshhGcpscA..............pphhppsphlas......................hppapsh...hpphstssss ........hhhhulhss..lhlHuhLEYPhaashhhh.hhl.lLhhhss............pth+..........hhs.hsth..................hhLh...s.shhlhsl.lshhhhhp....sh...p...p...s.phL.st..ap....tspshs.ph.....hppsp...shlh.t.pp..tashaushh.l.sh..tpppsphhps.hhchuhphlp.pP.....psshhpph..hlhhth.GcpscA............................cphhpphphlaP..........................................pth........................................................................................ 0 15 42 65 +11680 PF11847 DUF3367 Domain of unknown function (DUF3367) Assefa S, Coggill PC, Bateman A anon PFAM-B_2726 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 667 to 694 amino acids in length. 26.00 26.00 26.00 33.70 25.90 25.40 hmmbuild -o /dev/null HMM SEED 680 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.20 0.70 -13.27 0.70 -6.58 14 220 2009-09-11 09:45:00 2009-06-18 14:11:38 3 6 191 0 63 201 18 617.40 44 51.49 CHANGED llhhhlsFtQsPGplusDTKlDLslNPhtFLsRAhahWssshshGQlQNQAYGYLFPpGsFFhlschLtlPsWVsQRLWWulLLsluFhGsltLscsL......slGuss.sRVlAAlAaALSPRlLTsLGulSuEshPhhLsPWVLLPlltsh+.....................................ssRRhsA....hSuLAlshhGuVNAsATlsullsAsLalh..stt..pRhhph.huh.Wh.....hulhlAohWWllPLLlLG+YuPsFhsaIEouusTTshhohsElLRGsssWssals....sthsAG.tpLVssshhlhsTshVAulGLhGLshhthPtRthhsshLhsGlslluuuahst.suPhup.VpsaLDG..sGssLRNlHKh-sllRLPLslGLApLL.stl........slsh.ushtth...hhp.th...........t+hAsslhslhllssusuPAaoGclhss..GsapcVPsYWp-AA-WLssc...ssu.o........RsLllPGusFup.sWGpo.hDEPhQsLh-sPWuVRDulPLssPtsIRhLDul-ctLpsGtsssGLussLtRt.GluhVllRsDLsscsstsshsh.hh+pslt..coPGlspsts.FGsshtsssh........hlsDsshcs..hh.ul-lap....Vsss..........sssss..hthssscshhhVsGusEuLhpLsusshhtst.....................usslhsu-.t.ppss...ssss....ssoDsstspcssaGp.....sssssushhsss..................-....spsshshlss ................................sshhlsFsQsPG.lusDTKhDLssNPhpFLucAhphWssphshGQhQNQAYGYLFPp...GsFFhluchLt...l.PsWlsQRLWWulLLsluFhGhlRlA.ctL........ulGusu.oRlluAsuaALSPRlLTT.LGuISSEshPhhLAPWVLL.Phlhsht...t............................................sstthAAtuulAVAhMGAVNAsATlAuslsAslahh....................s+t...........s.Rh....hhRh...huh...Wh......huhsluohWWlhsLhhLuthSPPFLDaIE..S...usVTTpWhSLsElLRGTsSWTPFVs.....sptsAG.tsLVTsshhl..luTshlAA...hGLsG.L...s...h.t............hPtRt...hhlhhLhlG.lllhss.uahut...suPhut.lpsaLDu..sGssLRNlHKhsPllRLPLsLGlAphL.u+l...................sl.stssst.t....hhpspps............................+plAssllslhslhsuso.AWoG+lhPs..GsastlPpYWpcAA-WLspc..ssss.................RsLVlPuAsFApQsWGho.+DEPLQsLhs.s...PWuVRDulPLsPPtsIRuLDuVpphhssGtsss.....GLus.sLtRt.GluhVllRsDLDscsspostsh...lh+pslt..tsPGlt+l.spFGsshssssl.ts......hl.-sshcs...thsAl-lap...Vsss...............sssus...shhssscthshVsGuP...EslhpL..sp..p..tth...u............................uPslhsuD.u...ptsuh.....ssst.....sloDsshsR-s-aGp......lsspsSshhuss......D....hptshsh......................................................................................................... 0 14 44 58 +11681 PF11848 DUF3368 Domain of unknown function (DUF3368) Assefa S, Coggill PC, Bateman A anon PFAM-B_2745 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is about 50 amino acids in length. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.93 0.72 -4.40 50 241 2009-09-14 14:59:35 2009-06-18 14:13:08 3 2 142 0 94 302 28 47.90 28 32.24 CHANGED plpGTLGlLltAKpcGlIs..pl+sl..l-pL.ppsGaalupplhpplLppss ....lhGTlGlLltAKppGlIs..pl+sh..lcpL.ppsG.ahlupplhpplLphh............... 0 32 74 88 +11682 PF11849 DUF3369 Domain of unknown function (DUF3369) Assefa S, Coggill PC, Bateman A anon PFAM-B_2927 (release 23.0) Domain This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 170 amino acids in length. The domain appears to be related to the GAF domain. 25.00 25.00 25.10 26.00 24.40 23.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.79 0.71 -4.32 63 346 2012-10-02 14:34:25 2009-06-18 14:16:17 3 20 234 0 123 289 44 170.90 27 29.85 CHANGED lhouLRSYcsIpsI-psRp.......GLE+llcAousLh..phcolppFusGlLsQlusLLsls.ssulhs.................sspss..........tpp......tt...hpllAuoGp.asshhsps......htt....hsspltphlppulpp.+psla.pcsthlhYhssp..s.spphllalcst.psLs-h-ppLl-lFspNlulua-NltLhpclccs ............lhouLRoYcplptI-ps+p.......GLp+llcASuslh..p.t+olppausuVLsQlsuLLslp..ssulhs............hshpss....................spp.....tt..hplluusGp.apt.hhsps........htt....lstcl...tptlcpslsp.....+pshh..tssthlhahpsp..p...uhps..llYl...pst...p..pLs-..h.-.cpLl-lFstNluluF-NltLhpcLcc............................................. 0 38 66 86 +11683 PF11850 DUF3370 Protein of unknown function (DUF3370) Assefa S, Coggill PC, Bateman A anon PFAM-B_3037 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 452 to 532 amino acids in length. 25.00 25.00 67.30 67.30 22.70 22.50 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.38 0.70 -6.02 31 109 2009-06-18 13:19:48 2009-06-18 14:19:48 3 3 55 0 43 130 73 441.60 43 92.32 CHANGED shhlsss.......................shhtsstssssthss....p........QclRPLPGpLssVPVlNSNsPElVpu-GILlSTFPs......ttptpPtspL......sasFsGRFDlFuHHlatspsspch.....cs.LaLullstNPuspPVTlplhpGuopLS......ps-APFlsLPshhscshG..slhuGPGsRsus-lL.RGcpstcLPsp.lsIPPtptphLlsLPIPlpGls.......................................NGRshhlRhcSsGPlhhAslAthu........................psPo.pcWhslLpsGpLu.sR-ahPoPh.....thpsthIYSRVAGVuhGopWpAploD...ttt.L..shsstslSaPloSLp+GphGTsQlQoA.LhsRhPDTAasAHGNYGVcYDLsLPLpNsusp..slpLsLpoPlKssc....tttLhF.t..ps.shFRGolclcht-sttt.....+hlHLs.RpGQ.G.sLsplsLpPGcp+pVcVsLlYPADATPPQVLoVhPlp .........................hhhhhhh...........s...hssp.ls...psQplRPLPGpLDslPVFNSNSPElVpsEGILLSTFPs......pscthPsAHL......NashpGRFDlFuHHlspspsspch.....co.LYlGllstNPu.spPVTlclLpGuSaLS......psDAPFlsLPshsc.sspG..sVauGPGSRlssDlL.RG.......pppsphPsp.llIPPspschLlshPIPlpsls.................................................NGRSshhRLp..Ss..G......s.......lYhAsLAhau................................p..sst+sPoLp-WpsLLppGpLu.PRDhhPTP......tsssphIYuRVAGVuhGSpWpAplsD.s.ttp..hLt..lPtsGpuhSYPlSolptGshGTsQlQoAshhsRYPDTAYpAHGNYGVcYsLoLPLhNsospspoVslsLpTPlKpsp....pstLhFhpsssstlFFRGoVclpYpDcput.pp..RahHLVQRpGQ.GpsLlplslpPGcpR.VpVchlYPPDATPPQVLTVpT..p..... 0 2 25 39 +11684 PF11851 DUF3371 Domain of unknown function (DUF3371) Assefa S, Coggill PC, Bateman A anon PFAM-B_3115 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 125 to 142 amino acids in length. 25.00 25.00 26.60 26.70 23.30 24.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -11.07 0.71 -3.46 20 373 2009-06-18 13:21:43 2009-06-18 14:21:43 3 4 107 0 112 312 0 132.00 43 32.44 CHANGED hQARAHGL.sshsSsuls...os-Lss+l..lKQpssh-c.............................................ssp....c......lhs.p.tppPphssss.............sLDls...........................-GshsFsDsLss.sspshshu..hptpp.cL--lLMDD..........sLSPl..uoDPLLSosSPs..uSKsSSRRSShShEEs- ......................................................................................................................hQARAHGL.shhs.SsGls......os-Lss+l..lKQEsslEp...............................................sspp.................hh...p....ts...shssss...........................sLDLs.................................-.Gshs..Fs.ssLu.stss....s...s.hshu...p.tp.cL--.lLhDD...............oLSPl..soDPLLSohS....Ps..ASK.s..S.SRR...S.Sh.ShEEs.................... 0 4 14 44 +11685 PF11852 DUF3372 Domain of unknown function (DUF3372) Assefa S, Coggill PC, Bateman A anon PFAM-B_3259 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This presumed domain is about 170 amino acids in length. 29.70 29.70 30.50 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.88 0.71 -5.11 37 294 2009-09-14 08:08:02 2009-06-18 14:23:36 3 29 264 8 103 277 30 163.90 36 13.66 CHANGED ohpsNN.WsVGLPhtp+stssWshIpsll..sssshcPsts-IttspphFpElLpIRpSSsLFRLsous-I.pRVsF+NsGssQhsGLIVMSI-D.G.sts...........tslDsph-ulVVlhNuosppho.hsls....shs.u...apLHslQtsus..Dshltpu.sasss......pGpFTVPAhTsAVFVpsp ......................t.hpsNNaslGLP.tscst...ssWslhcsll..sss..shcPsss-Ittssshap-LLplRpSSsLFpLsouscl.pRVsF+...NoG........s.s.p.hP.GlIVMoI.....-D..u.tts...........sslDsthsulVVVhNAospsho.hsh.........shs..u.....apLHslQs..suu....Dsslp.tu..shssu......sGphTVPAhosAVFsp...................................... 0 29 62 92 +11686 PF11853 DUF3373 Protein of unknown function (DUF3373) Assefa S, Coggill PC, Bateman A anon PFAM-B_3442 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 472 to 574 amino acids in length. 28.00 28.00 28.50 40.80 27.70 27.90 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.81 0.70 -5.96 18 144 2009-06-18 13:25:15 2009-06-18 14:25:15 3 2 135 0 39 127 11 496.60 45 99.04 CHANGED MKphhphhlsush....hshohshtutss-hsh.p.cl-pLppclppLcpQht.................thppplpchEc..............+ohuchLphuGDaRa+lDulch+..s.....................................................h.th...s.....................................................ssutcscN-slaTNRhtLsh+AKsocclohpuRLsMYKsaG.....t.ssssspsaauspsh.hhDGshscsPo.sshLpV-cAYhsWpN....IuDpP.hahSlGRRPSTsGsPupLRpNpp+sus..sPuhLlcasFDGhTlGas....s-.lsulsG.sasKlCYGRGF-su........assst......NshcDTDhhGhsllPhDssshplphpa......s+uaN..lhshPs.hss.F.................stsssssplGsl.sahGhuhh.phcslGsG......shshFsShuhSpTpPsss.........h.hssh.....puhLsus..............ss+TGausYlGscash...ssssKhGhEYN+GSKaWhoFs.PAtDDhhsSKluTRGssaEsYhIpEL........hs+hFh+LuapYYDa-YTGSNsalGA....PhKIs-lpus.h...hhs.......................................slcpApDlhuohcl+F ..........................................................MKThhSLlLuAsL....LooSLhAc.ss-DElspLQpQLAplpuELApI+cEp-spscpsp...............................ssptclADLNDR.s.DcTEh..............pAALs+lKFGl-FpTuVsNhsYK.......................................................................................................................................................................................VsGQDhssN..shahNcL+LNMsAclNDcscFaGRLSMsKNWu............QhGaSussh..sLDuspNspo.S.GssLhVDRAYhDYh.....Issp.WhsolGRpPuTDG..PsSNLRsNuLRpST..hPA.LsINshhDuAslsYc....PEsL.p...-ac......sslRhsYG+saphspt...............hhcWhusppsuD......sNlhhsss.....GtLsI-uhsDshlhh..slsa......hssFs.......l.shsshh..ss.h............................hpsssssNLGDl.sluslpFp.shc..uhGs.......NFNaFsSLGaSpusssph...............sshh......puh.Lpsp............t.s-cDGYAVaVGuRYDh.....ocuhKlGaEa.aGS+YWhTMo.PuhsDPlsh+h.TRGsAa-hYlIapL........-chtFlRLSYTpIp..auspGhPF.Gu.....sKh...DcucAssh......MhMh.......................................sVK........................................ 0 14 32 36 +11687 PF11854 DUF3374 Protein of unknown function (DUF3374) Assefa S, Coggill PC, Bateman A anon PFAM-B_3548 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 665 to 712 amino acids in length. 20.70 20.70 20.70 20.80 20.60 20.50 hmmbuild -o /dev/null HMM SEED 637 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -12.94 0.70 -6.27 30 127 2012-10-03 17:14:37 2009-06-18 14:29:26 3 2 73 0 59 117 34 615.40 28 90.44 CHANGED Gas-sDDh+..tuNsa.......Gs.psGhsutlsADlpapscs...GYpsplpAcpLGh-suphplpsG+.GpYplsL.sYcpls................pYpsssshos..............a..............WhssG..........hsthpssl....ssh-LslcRc........+hulGhp....Yps........shapsalp....Yp+EcKpGp+...p.......uShshh.s.ps...................s......lucPlD.toTcplsAGlphpG-s.....WhsslsYpGShacNchssLs.....hssshssh...............................s..AhsPDNpAHplulsGQYphs.pophsGRlshGpMoQDpshlshs......................ssPh..pshDGpVDTlshslphoo+lspcL+LsuuaDYsDRDN+oss.hpasQhph..........ssls.Gtst.....NsshDpp+pphclsusYRls+sh+LpuGY-acps-Rott-............REpTsEsslau+hph+shsshshtlKuuausRsGScYpss.phou........upsNsLLRKYY...L.ADRcRsplchcssa.....ssh-sLolshsscYupDDYsc.Tp.lGLTEucDhGYDlsluatls-clslpAFhspQaI-.......SsQu..GSsshusss..........W...puslcDchsslGsGhsYs...sLh-s+LsLGhDYoaSsSpScTpl.............spshsssYGDYaupsHs....lphaupYplo-phu..........L+LsYpaE+YpDsDaup..lss......suIs.....slhohGslsHsYsAHhlhLohSYpL ..............................................................................................................................................a.st-s.p..tsNth.......us...psthhutlsuDlp...h...pspp.......GYpsplpAppLGh-sshhplpsG+.Gpaplpl.sYppls................pap....sssshos..............a....tt.......h..........hhss..u..........t.....................hshh.ssl.............ssh-LulpR-........+hslGhp...aps........tshapshls....Yp+Ec+sGt+......p...........sShshh....s..ps...................h..........lscPlD.toTcplpAGlphtGcp.....W.sslsYp.s..Sha+N.chs..s.Ls.....apsshsss...............................h................st.uhsPDNpuHplu...lpuph...shs...psphsu+lhhupMoQD....pshlshsh.........................thPs.....pshDGcV-hhths...lphsu+lop..slplsuuac...YpDRDNposh..tasp.h.h................................................sshs.Gtst......NssaDhp.ppphc..lsusYRlspshpLpuGYcacps-Rsht-................................Rc..pTcEsslau+hphpshsshshhlKspaupRcGSpYpss..thop.........................upssshLRKa....L.ADRcRpplchcssa........................sshpsLolshssphs.DDYsc..ot.....l.GLocscsh.u.YDlshsa.lssslphpAahspphh-.............ScQu...Gussh.usss............................................................W....hsphc.D.cs..sslGhGhsap...sL..h-s+LplGhDY....oYSsup.SsTpl......................ttuhsssYsDhhup.Hs......lphaupYphs-phu..........l+lsapaEcYp-sDatpp.hs.......ssls......sllshGs.hspsYsAphlhlohsYph.................................. 0 9 23 38 +11688 PF11855 DUF3375 Protein of unknown function (DUF3375) Assefa S, Coggill PC, Bateman A anon PFAM-B_3589 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 479 to 499 amino acids in length. 25.00 25.00 26.50 30.10 23.60 22.90 hmmbuild -o /dev/null HMM SEED 478 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.43 0.70 -5.75 28 246 2009-06-18 13:31:11 2009-06-18 14:31:11 3 2 231 0 95 249 38 458.70 24 95.80 CHANGED lptlR..pcpPAWRLLtuppAPllluhLcphFh.ssspslPps-LtptLccpLtsLccpttp.........tthPps....ApsYLs-Wst..p...GWL++hhsp.sss.-spa-LTsssccAlpalssLpppp..hsuTESRLtolhptLcpluptspsDPctRltpLccc+sclctEIsclcuG.clshhDssplt-RhpplhphucpLhuDFRcVccsa+pls+plRcclhs.pus+G-lLcplhsphDs.ltsS-pG+SFcuFachLhssspppclsphlcplhsh...stpLcsctR..Lpplhtchhctu-cVpRshp+hucpLR+alcspshhEsR+lhplLpphpstAhsh.....pct...sthhhshsthusshphshpp.hhpsPs.......psthsshsh.sssssch...shsshhststlDhpclttt.....lppsLt.pps...pholucllpphP.hppG..LuclluaLplAt.........ptsh......................lscppppplpapst..sGh....hRpsplPplhhsc ...........hR..pcpssh+LLtuppuPhlluhLpphF..stpttlspscLhppLpcpLttLpppttt............t.hsps.......ApshlpcWsp..p...G...WLp+hhst..sss..c.........hacLTstucpAlcalpsL...p.ppp..hsuTtSRlpslhptLcplutpsssDPppRlttLccchpclctEIpclct....G..p..h....shl-spphh-chpplhphucpLhsDF+cVppshcpls+plRcclhs.....sct....spG-lLcplhsth-t.ltcS-pGRoFpuFaphLhssppppplsptlcplhph..hht.tLssp.Rt.l+plhtclhctuppVpcshpphucpl+palpspshhcpRplhplLpph.stAhsl.....ttt........t.thhh.ht.ths.ssh.h..s.ltp...hhpsss.......pst.h.s.sh.h..tsssssl.s...shsslhspstlDhtpLtpt.......lpphlt..pps........lolupslpphP..pps..LuclluaLslut....ptsh.................................sspsp.tphlpaps......-Gp.........RphplPtlhh................................................... 0 24 63 86 +11689 PF11856 DUF3376 Protein of unknown function (DUF3376) Assefa S, Coggill PC, Bateman A anon PFAM-B_3667 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 770 to 1142 amino acids in length. 25.00 25.00 25.90 25.90 21.90 21.90 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.73 0.70 -5.13 23 113 2009-06-18 13:33:06 2009-06-18 14:33:06 3 3 102 0 35 96 6 449.00 32 49.25 CHANGED hsslhuuluslPRppsh+spLttlpt.spphp+hcplhsuhRschppthpp...........thu...shhtsp.pstcLpthcscstptt.....shttshsatshhpsclst...hlctlspllsths.ttsssp.tttthpshhtph.t.........thsslsshps.sstsshtthhpttc.....hsh+hRRLphlsccLs.................csh......csssstc..........ssslsttcpslhshl.t.ulhpstcthtt...............hucshpc.lsssshsh...........ss......plhchlsphth..............t..pshtthctpu..-phLus.hltthsp........sh+tpl....LhsYL.u....asaaDlsshPhh..tstsh..sphp.lplsRISP-DAso..lpsssst........spLtGhphssFGAFhsRuaRcNDalWGRLcGA-Rllcllhsstc......................................................................................................stls......-hshhs......................lc+..ptFhslLcEEhspLh...........................................................................................................phs..............shlusLcsp.....ss .....................................................hpsIhup.ssh.tcpss+.cLtplttp.c....pthphlssuhhschcpp.sp...........t.u...slh.shhphhphss.cstssppu......h.tuauhphhst.schst............lscp.hshhh.hhsh.....ttsssp.shsp...stltph................stsshshs....pu..sshsshtshhphtc......shclttLt.lsctlp............................psh......css.ssts............sslst.hhssh.shht..sl.phtcthtt...........................................usshpp.lssshhss............s......sltphhsshss....................................................t.sL..hthcths..st.Ltp.hlshLss........p.t.ph.......hhthh.s....hsh.hphshhPh.....tt.c.......slclsplSs-stsh..Lssshps........pKLsGhpLtpFGAFapRuWRtNDahWGRLDGAshLVclLLsspc......................ht.................hhtthspptt....t.ttphhhs.hpp.tt...h.t.t.................................hhs......-hshhs....................st..ltt...th..plht-Ehstlh................................................................................................................................................p.............................lupssstst.hht.h......................................................................................... 0 13 30 34 +11690 PF11857 DUF3377 Domain of unknown function (DUF3377) Assefa S, Coggill PC, Bateman A anon PFAM-B_3829 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 70 amino acids in length. 25.50 25.50 26.00 29.40 25.10 25.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.53 0.72 -4.35 20 249 2009-09-11 16:26:36 2009-06-18 14:39:46 3 11 47 0 110 190 0 73.20 44 12.91 CHANGED sssscccccthspDs..s-lllcl..D....-s.suossAlAVVIPhlLsLClLsLlYsllp.F++KGTPR.................+lLYCKRSlQ-WV ..................s...ttc.-tttspDc.....scIllcl..D....-s...uuoVsAlAVVl.P.slLhLClLsLlYslhQ.F+RKGsP+.................plLYCKRSlQEWV. 0 4 15 44 +11691 PF11858 DUF3378 Domain of unknown function (DUF3378) Assefa S, Coggill PC, Bateman A anon PFAM-B_3989 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 80 amino acids in length. 27.00 27.00 27.00 27.70 26.90 26.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.64 0.72 -3.92 33 937 2009-06-18 13:41:57 2009-06-18 14:41:57 3 2 910 4 78 516 5 78.90 34 26.11 CHANGED sshVl.pssppplpphpppYtshhssphsssshFtAKhsssoIosYpSGKVlFQGpsAEp.Au+a.th.spppttsssssspsh .........................sIll.phopcsIpshhppY.ps..h.s.s.sts.shhpahuKhsssTlolY.pSGKVhFQGp...pAEthAspa..th.st.ppptt.t.....s..................... 0 18 35 60 +11692 PF11859 DUF3379 Protein of unknown function (DUF3379) Assefa S, Coggill PC, Bateman A anon PFAM-B_2469 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 234 to 251 amino acids in length. 26.50 26.50 26.80 29.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.45 0.70 -5.19 25 166 2009-09-14 14:58:51 2009-06-18 14:53:41 3 2 162 0 38 115 10 236.60 48 98.12 CHANGED MDELEFRR+hhuDPpspDs-hLpthppsspcppFls-lppLDt+lppALc.VDVPDDLADKlLLpQ.....phppcp..........spR+p.s+h.tlAlAAS...VAFssGlhh..u.lphus...............ssLu-pALuHVaHE.s.hshphDcslshppVNAKLushusphstcF...PG+VaYssaCcFpG.s+uLHLVhQGcpG.KVTLFIVPlcschsh.t..sFsDsphpGhshtsssAshlLVGEpupDLshlpcclppshh ............................................MDELEFRR+lhSDPKp+Ds-hLshhsuS-uNsKFlDDlLpLDppIspAhK.VDVPDDLADKILF+Q....lc-.c+.................llRPpFsR+.AMAlAAS...VAFsAGLll..GQlpWGNhhl........sPApASLu-hAlpHVhHEcs..FVpcl.DEpsshpQINAKMtPFuhphpucF...PYHVYYLNHCGFGc.sNAlHMVFQGE+G.KVTLFlsPIcStpss......tFppcGMsG...llpPl.usASlILVGEcsEsLsslAp+Lhshlp................................ 0 6 14 26 +11693 PF11860 DUF3380 Protein of unknown function (DUF3380) Assefa S, Coggill PC, Bateman A anon PFAM-B_2757 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 194 to 284 amino acids in length. This protein is found associated with Pfam:PF01471. 41.90 41.90 41.90 42.20 39.00 40.60 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.24 0.71 -4.23 35 230 2009-06-18 13:56:43 2009-06-18 14:56:43 3 11 203 0 58 211 23 167.30 36 58.52 CHANGED lphAslhAVscVESsGt.GFh...s....ss+PtILFEtHlFaRpLtt...........t..stphssphssls...s........p.tGtYts................usuca..............pRLcpAtulsp.....cuAhpSsSWGhaQlMGa.paptlGY.sSVpshscsMpt.uEspQLcuhl+Flct....ss.LhcsL+s+c......WssFA+tYNGPuatcN..pYDtKLspAYp+as ............................................................-.AslhAlscVESsGp.uah........sstcPtILFEtHhFhRplss............................................thhs.pts..sls...ss................p.tGtats..................tstca...............+LppAhslst.....cuAlcSsSWGhhQlMGh.pa....phhG.Y.sSVpshVsthpp.u-stQlchhl+FIct...............st.LhpAL+..s+c......WssFA+tYNGPuascN..pYDt+l..scAap+a................. 0 12 32 44 +11694 PF11861 DUF3381 Domain of unknown function (DUF3381) Assefa S, Coggill PC, Bateman A anon PFAM-B_2792 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 156 to 174 amino acids in length. This domain is found associated with Pfam:PF07780, Pfam:PF01728. 25.00 25.00 34.50 33.80 22.80 23.20 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.10 0.71 -4.73 39 321 2009-06-18 14:02:02 2009-06-18 15:02:02 3 10 278 0 238 322 5 161.80 31 20.48 CHANGED lhps....c+K++pRpGY--GDhhl.a+phslh-Fl+s-..cPl...shLuphNclsh............cD.thchlpchcpTTsElhpChcDLKVLG+K-F+tLL+WRpplRchluhspt................ppcpt.......phElcthsEEppl-..cElp...p..hhE+pptct+.+c++cpscpKp.....K-ll+.Qhphh ......................ppKKpKt-GY-EG.D.hsl..a+phsss-Flpss..sPl...shLup.hsclsh.....................................................sD..t....ptlpcp.tTTcEI+tCCcDLKVLG+K-h.+t..LL+WRhcl.Rchhuhphcpptt................pttpp................................tt-s.pshc--t.plp......c-lp.......p....hhccp..t...tc.hK..+..cc++..c.pc+Kp.....Kchh+.php................................................................................................. 0 84 133 200 +11695 PF11862 DUF3382 Domain of unknown function (DUF3382) Assefa S, Coggill PC, Bateman A anon PFAM-B_2882 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 100 amino acids in length. This domain is found associated with Pfam:PF02653. 28.90 28.90 29.40 29.40 28.70 28.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.22 0.72 -4.10 45 897 2009-06-18 14:04:04 2009-06-18 15:04:04 3 5 808 0 148 478 130 101.60 41 23.71 CHANGED hspsL+pAlhuAllshllhssllGlpLp.t.GspLslp..sp.s.hhhhVuhuslshFlhpLa......+shls.tshsssp..hsslss......shsptpRhllhsLllhAlla .......................h...plt.ALhSAshhhlLAulhhGlQL-...LDGT+LVV-sAuslpWpaVhIusAlVFhFQLl......RPhhp..+ulKsVS..us+alLPu........h-Guos.+QKlalhALLVlAVsW.............................................. 0 18 50 96 +11696 PF11863 DUF3383 Protein of unknown function (DUF3383) Assefa S, Coggill PC, Bateman A anon PFAM-B_3017 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 356 to 501 amino acids in length. 27.00 27.00 27.20 27.00 25.80 26.90 hmmbuild -o /dev/null HMM SEED 481 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.33 0.70 -5.82 26 463 2009-06-18 14:05:44 2009-06-18 15:05:44 3 3 282 0 49 410 14 282.30 18 97.36 CHANGED slsclVsVslslsstuusst.shsshLlhs......psshssspphpsaoohpsVus.sFGssoscYpsAthaFuQ.....sspPsplhluRasp......s..s.l.u..ht......s.hpth.sGshplshsu...h..stlshst.hshsssAt.hpstht...........ss.hs..tttahltssssGs.....................................................................................................................hs.s.sss.huthlthpss.t....sh.s.uhss-ohspAlsshts.hsssWashthust..hsssphlAlAsahp.....ttshp+lashsspcssslsss....ssslhttlhs.sshppohshYsss........ssY.sssuhhuthhossasuspstlohta+p..sGlsu......-.slssopusALcspssNhasph......ssshuhhpcGhhsuG....pahDphtshsWLpstlpsslhslLhss..sKlPhs-sGhshLhusl.psshspulsNGhlusGh..........stshGshssGchlt.tGYalhs.ss.hsp.upssRpsRptsshphshphuGAIHpl-ltssls ...............................................................................................t....hh............................h..h....t.h...............h.hs..hh.............h.h...........................................................................................................................................................................................................................................................................................h.t..t......tah.h.........h.................tth..huth.t..............t.thh...hh..st..t...........................t....h..h.............t........hhuhh.h.sh..tt.................s..hhhpt...ul.........t....h....s.sthtth.............t...t..h.hh..........tth....httGhh.ss.....ahD.hh...ah.t.hp.th.th...h.p.....tpls.sttG.shl..s.h...ttshtthht.Ghh....................................s...a.h.........t.s.t.httR....h.h.h..usulp.h.h........................................... 0 9 28 40 +11697 PF11864 DUF3384 Domain of unknown function (DUF3384) Assefa S, Coggill PC, Bateman A anon PFAM-B_3114 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 422 to 486 amino acids in length. This domain is found associated with Pfam:PF02145. 28.10 28.10 28.50 29.10 27.30 26.70 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.63 0.70 -6.02 26 254 2009-06-18 14:07:29 2009-06-18 15:07:29 3 12 181 0 161 269 0 408.70 28 26.91 CHANGED pps.lspRhcthcplscu.lppa....sh....ssl.tlWpsup...DLlpsppsscs.R+suhpLLptslpppct...............................ssstRthaFcsI...........sss.sps-s..ls.......hpLpuLpsLT.....ccG+cl..sa.hppslhshlspal.shh...............t.h..t+...tpt.........ts.s....-sshlsslLphls...sll+astphhsppplttllpplhpl.CppToss.............................................................................................................................................s-l....pssLplhDulIp..YuslPspohsshlplLC...................................................................................................................................................................................................................................................................................................oh.hshs..............slspssh+.....slcNLhco+htpphlpsLpshL.......tssppp....hp........shslLR.........................GAltllphllhss...tppthst...............lphs.......lhsuhhsslc....pssstlsh-llphhpsll......tphsph.hhppsWs....hhslhsp...................................hhsphtshttstss.tp............................pplhsphpplhsp.lEpLhcp..hphp.p+c.chhpFlhpstphlssussphllthhpspphssPsss......tWlpshppllcpFh.spspssslRlpslpslpcs .......................................................p..s.lspRhchhtplsch.hpph....ph....psl.t..lWpssp...DLl.ps.pp..s.s...-s.RpushpLLpsllptQs.p...............................hs.hhR....thaFpsI.............ps...sss-D..lp...................pLcshpsLT.....-pG+cl..sh.h-pclh.h..lhpWh.............................................................................................................................sh.sphl.hll...sllKFNtshhc.phlsth.lp..lhhl.ChpTsss.............................................................................................................................................sDl....css...LpllDAlls..YsslP.scsLs.hl....sLC..........................................................ph.hsht......................................plsps.sW+.....hh+NLht.o+hG..pssl.hshpplh...........................psps.........hc.................sssllR.........................GAVhhlth.hLhus........cth.s......................lp.s.......llsuhhp.Ahp....ssst.h.Vsh.Ell.hslppLl......tchtpp.ltth.sWs...hll.slhpp.....................................lhpplpsh.pt.p........................................................lp.s.lpcllss.lEpLhpp..sphp.sspcchhpllpps.tcphP-ushh.ll.....hctpphpPscs.tWlpsl...phlhcpFa.pppp...psslRlpsLpsl...s................................................................................ 0 46 76 129 +11698 PF11865 DUF3385 Domain of unknown function (DUF3385) Assefa S, Coggill PC, Bateman A anon PFAM-B_3188 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 160 to 172 amino acids in length. This domain is found associated with Pfam:PF00454, Pfam:PF02260, Pfam:PF02985, Pfam:PF02259 and Pfam:PF08771. 27.60 27.60 27.60 30.90 27.50 26.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.31 32 388 2012-10-11 20:01:02 2009-06-18 15:09:47 3 34 278 0 259 390 7 159.30 37 7.12 CHANGED VlpPYhcYPpLLslLhslL+sE.pp.hslRpEsl+llGlLGALDPa+a+....plppstp........s......tppss.ssshhLhh.uhts......ss--aassVsl.psLhp..ILpDsSLspa+s.sllpuI.......hpIF..ps.hulcC.lsaLspllPshlpslR.ssss.shhEh.happLupLlslV+ ............................VlpPYhcYPpLLslLlshLKoE..ps..tslR+Esl+lLGlLGALDPYKa+........lpppts.......................................................stp..t..tspss..s.ss..hL...l...u.h.s.....................st-EaYPsV..sI.ssLhc..IL+DsSLupaHs.tV...lpAI.......hhIF...+o.LGl+...C..VsaLspllPshlsVlR....ssss.....shhEh.hFpQLuhLlslV+.................................. 0 97 151 223 +11699 PF11866 DUF3386 Protein of unknown function (DUF3386) Assefa S, Coggill PC, Bateman A anon PFAM-B_3390 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are about 220 amino acids in length. 25.00 25.00 30.80 30.60 24.70 21.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.15 0.70 -5.14 27 98 2009-06-18 14:28:42 2009-06-18 15:28:42 3 3 89 0 35 100 142 208.80 36 90.83 CHANGED hssssssc-lFRsAYENRYTWDssFsGYpucshap....ps-cphpGphclu.sDLKspVpGI-Dpclt+ultuQLaEVsIHRVRRoFEpsHGcNoFsh.GcsspsG.lEl.lVuGKutGD+Y+l+sshlshVaR+IHGsllTIpThsspDTGpG.YLS+pYsS.YtDPpTG-tpuu+spFcDpat.l....ushWlLspR.sIcs.cspuppss....pp.......FpFss..........lphL ...h.hpphsAc-lFRsAYENRYTW.DtsFPGYpAclthp....pssphapGphpls..sD..hcscVp...sl-D..EclpculpsQLh-lslHRlRRsFE.psHG...cNs.Fsh..GcscpsG.lEl..lV.uGc..u.GspY+l+ssplshVpR+lcusslTIpTtssh-TGpG.YLS+pYsuhapDPpTs-h+s.....s+ppFcDpatpl....GsYalLsp.R.sIcs.pspupp.h....pp.......FtFpslphL.............. 0 9 24 33 +11700 PF11867 DUF3387 Domain of unknown function (DUF3387) Assefa S, Coggill PC, Bateman A anon PFAM-B_3465 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 255 to 340 amino acids in length. This domain is found associated with Pfam:PF04851, Pfam:PF04313. 26.80 26.80 27.50 27.10 26.40 26.70 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.89 0.70 -5.48 52 900 2009-06-18 14:30:44 2009-06-18 15:30:44 3 11 772 0 213 818 195 283.60 26 33.78 CHANGED osutG+GcsslDtpcAl.slhhEKh-ll+shh......+GFDYpsahsuss..pplphlssAhsalL..............u......c..........s..uK+RFhcsVhsLoKAauLsssp-E.Atsl+-EluFFpAl+utLhKhssssp..t..ssp-...hphsl+QllspAlso-t.VlDIFssuGlc+P-ISlLSD-FLt-V+phc.cKNLAlElLcKLLs--I+sRp+sNlVps+pFoEhLcpslp+YpN+uI...........soApVI-ELIphAK-hppstpRu-cLGLop-ElAFYDALAs.N-SAlc.hG--pL+pIAtELspplRpssol.DWs..tREolRA+lRlhVKRlLR+YsYPP.DhpppAscpVLcQA.Eh...lu....ppas ...................................................................................................................................................h...h.pth.ph.ht..thh.........thsh..t......h.......t............t.h...thh.......t...s.phlh.........................t..........p..hc...ppFht.shthtpuhsls...................s............p...t.......tt.......h.pp-.l.ta...httl+shl.h+h......ttst...........s.....tp............hpttl....ppllspul..h.........u.p..t....l........p.....l........h.t....h..........t............p.....l..sl..h.s............p.c.F..l.t.c....l.pphp.....pps..hthph..Lc+h..lppplp.hhcpN.sptppap-hlpphlpcYpspt..l.....................pstphlpc.Llp.h...u.p.chp.p.t.t.p.p...s...p...p.h..G..L....s..t..-.E...h..AFY....-.hLsp...s.............c..sh.....h......p...h...s..........c..........c..........p...L...p..t...l...Ac-ls..ptl....+...p...s...h..s..l...D.Wp....p+-ss+A+h.R.hhl++..l....L+...+...a...t..Y..P..P.....-h......t..ctAhptVlpQAE.ht....t................................................................................................................ 0 79 150 185 +11701 PF11868 DUF3388 Protein of unknown function (DUF3388) Assefa S, Coggill PC, Bateman A anon PFAM-B_3650 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 261 to 275 amino acids in length. This protein is found associated with Pfam:PF01842. 25.00 25.00 39.00 38.20 20.70 19.70 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.15 0.71 -5.16 8 391 2009-06-18 14:32:00 2009-06-18 15:32:00 3 2 390 0 45 120 0 192.80 73 72.19 CHANGED VTKhRpPKLRDRLAlRHGRYIpRDADDKKTFRFVR-ELGLLVDFMAELFK+-GHKLIGIRGMPRVGKTESIVAASVCANKRWLFlSSTLIKQTVRSQLhcDEaSc-sIFIIDGIVSsRRusE+HhQLlRElMRLPusKVVEHPDlFVcpSEYThDDFDYIIELRssssEEIpY-hsEcsphtstssF.......uuFsF ...lTKL+pPcLRDRLAVRHGRYI-pDAcDKKTFRFpR--LGLLVDFLAELFKcEGHKLIGIRGMPRVGKTESIVAuSVCApKRWLFlSSTLIKQTVRSpLIcsEYs.sN.pVaIIDGhVosRcuN.+H.pLVpElMsLPohKVVEHPDlFVcsSphTh-DFDYIIELRcs.spEIpYEchcc.p.ht.opNNh.....s.......... 0 14 28 37 +11702 PF11869 DUF3389 Protein of unknown function (DUF3389) Assefa S, Coggill PC, Bateman A anon PFAM-B_3739 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 80 amino acids in length. 25.00 25.00 28.30 48.20 21.40 17.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.39 0.72 -4.09 17 137 2009-06-18 15:30:29 2009-06-18 16:30:29 3 1 136 0 26 81 4 69.60 55 99.37 CHANGED MllsFSpGKlIso.pElhlRLssss.VsLQApsDslpLlu.sAsVllAsGuss+WSlKLDs-pQLpslupplGlslt .........................MVIpFStGKlIsT.+ElVlRLsttt.lTLQAps-sIpLhu.GANVhlANGSEsKWSlKLDsE-QLpsIAppLGhDl...... 0 2 7 18 +11703 PF11870 DUF3390 Domain of unknown function (DUF3390) Assefa S, Coggill PC, Bateman A anon PFAM-B_3832 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 90 amino acids in length. This domain is found associated with Pfam:PF02589. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.22 0.72 -4.06 34 1109 2009-06-18 15:32:31 2009-06-18 16:32:31 3 7 1031 0 194 665 49 93.20 28 19.55 CHANGED +LRpEshpps....tp..hpupsuptsphEphsaKsauhhsosPslY+.........hhsahssch...pshhP..s.ltsWTpsRshPcPAtcoL+-lh+t+tptp .................................................shs.ttpuhpsts..EphAhKhFuhssopPs..la+huhth.................sthhsa..h..h...ssh.........tsh..sP......u..l...psWscsR-hPpsstco..FRpWa+c+tt..p...... 0 51 126 163 +11704 PF11871 DUF3391 Domain of unknown function (DUF3391) Assefa S, Coggill PC, Bateman A anon PFAM-B_2190 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is typically between 122 to 139 amino acids in length. This domain is found associated with Pfam:PF01966. 27.80 27.80 28.70 28.00 27.60 27.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.31 0.71 -3.93 121 695 2009-06-18 15:45:03 2009-06-18 16:45:03 3 4 415 0 262 656 53 130.20 21 32.31 CHANGED hhh...........+IslscLplGMaVptls..sWp..scsal.h......sphhlcspppIpp.lpppGlppVhlDsp+u......s.s.......................thptssss....ph...thttpph.............t..............hshppch.......ppAp......phhscupshhpplhschptG.pslshpsspt .......................h+lslspLp.GMalpths..sWh..c+PFh.h.......ssFhlcsppplpt....l+p.Glp.pValD..ss+u....t.shs............................................s.ttt.sts...th.......t.tspth....................................................t.sshcccl...........ppup......phhpcutshhpshhsch+.s..thshtth.......................................................................................................... 0 57 149 212 +11705 PF11872 DUF3392 Protein of unknown function (DUF3392) Assefa S, Coggill PC, Bateman A anon PFAM-B_2322 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 110 amino acids in length. 25.00 25.00 63.30 63.20 21.60 21.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.19 0.72 -4.07 21 196 2009-09-10 17:12:07 2009-06-18 17:11:28 3 1 195 0 46 106 9 104.90 45 96.17 CHANGED -hllsLlsplupalpPaLs-IulAlVAClLVlaGs-IN+hL+phlushsFllRThsFlLlsAFGYGllllaloPhlspsLtplsshaLsslllssFllIGhhApRp .....lhshLAshuphltPaLSEISlALVAChLVlhGu-INuaL++tLpshpFllRTlsFlllsAFGYGLllVhAoPaluRsLuphssthhhslllssFllIGlhApRp.. 0 8 16 33 +11706 PF11873 DUF3393 Domain of unknown function (DUF3393) Assefa S, Coggill PC, Bateman A anon PFAM-B_2361 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is typically between 188 to 206 amino acids in length. This domain is found associated with Pfam:PF01464. 31.20 31.20 34.20 31.70 28.70 28.60 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.26 0.71 -4.81 30 814 2009-06-18 16:15:39 2009-06-18 17:15:39 3 2 791 0 95 402 21 192.00 64 53.05 CHANGED pKhlhlhhhlhl.....LsuCo............................tp.spptsshsssh............h.+Dppu....lctLhsphu.....pplcchWGpcEhhhAu++cYVKYoDsYpoRuclsF-cGtIhlETlus..pP...pppL+pAIlpTLLhssDPstlDLaS..s.pclslsu+PFLhGQVlDpcGcsIpapWRAsRaAcYLlpN+Lps+plpptp.laaVpIsMVssHhchRut+ .........................................................................KKhLALAlIAPL.....LlSCS........................................ooKKG..ssYNEAa......................VKDTNG.........FDILMGQFA.....HNIENIWGh+EV.lI.AGPKDYVKYTDQYQTRS..HINFD-GTITIETIAG..T-P.......sAHLRcAII+TL.LMG.DDPuSVDLYS..DVcDIpI..SKEPFLYGQVlDNTGQPIRWEuRAosFADYLLpN+LKoR..os....G....L+....l....I..YSVTIsMVPNHLDKRAHK...................................................................... 0 13 40 70 +11707 PF11874 DUF3394 Domain of unknown function (DUF3394) Assefa S, Coggill PC, Bateman A anon PFAM-B_2758 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 190 amino acids in length. This domain is found associated with Pfam:PF06808. 28.20 28.20 28.50 28.50 28.10 27.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.02 0.71 -4.74 44 269 2009-06-18 16:26:19 2009-06-18 17:26:19 3 4 259 0 78 243 664 173.50 39 20.60 CHANGED culllhlsAslAMLlFuAATQGaFls+o+haEoslLLLlAFoLFRPGFWhDhlhPsapphsssclhphhtphssGpslRlplpG.sh..pGc.hppTlhLsls........-su.sutpRLpsh.GLpLhp..........-ssphllDtltFGSs.A-psGl-FD.a.......pIstlphs.s-RPsKEhhaIPALLLLullshhQ..RRRtpps .....phhLshloullAMLlFoAATQGWalT+s+aWEslhLLllsFohFRPGFWhDhlhPsh..hhsuscltphspphssGpslphhVsG.sh..pG-.h.p+...TVtLPht............-tu...sup..-...Rl.....t.uh.GLt.Lh..p..........ps.s+.h.ll-hV.pFGSP.ActuGl-FDa............cItpVhhs.s-RP.KEa.halPAlLLhhllsh.Q+RRhpp.s.................... 0 24 51 66 +11708 PF11875 DUF3395 Domain of unknown function (DUF3395) Assefa S, Coggill PC, Bateman A anon PFAM-B_2767 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 147 to 176 amino acids in length. This domain is found associated with Pfam:PF00226. 27.10 27.10 27.60 28.30 22.80 23.10 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.84 0.71 -4.58 28 266 2009-06-18 16:27:41 2009-06-18 17:27:41 3 6 206 0 180 262 3 142.70 35 24.75 CHANGED Phpcppppcthtcp+cppcpplspc+pcAppslplhpstsp+phppEpp+sGLlIhpAhYGphss..................................................thstppshpst...........slDVTlslQshV.ccSpLhlsp.....................ssKupLhGFaDP..........................s....spcKhL+IpYpa+sphHplhlsDs-slplP ......................................Ph.ptppcpphtcp+cpttppltp.++.pEAppAlpLMpps..spRhhptE..c.p+...tGLlIlpAhYGphss................................................................................................ptptppps....................pslDVTlPLQsLV.......cc..S......p......LhLsc........................................ssKus..L.GFaDP...................................................ss...G-pKpL+.VhYpF+uhhHpVhstDp-sltlP.................................................. 0 63 99 146 +11709 PF11876 DUF3396 Protein of unknown function (DUF3396) Assefa S, Coggill PC, Bateman A anon PFAM-B_2995 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 302 to 382 amino acids in length. 25.00 25.00 26.80 26.40 24.30 23.70 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.41 0.70 -5.21 23 202 2009-06-18 16:28:56 2009-06-18 17:28:56 3 1 102 0 71 241 2 193.20 27 58.49 CHANGED utYpFphh..........uhpp..hp.thspc.ulsslsFolPhtalpp.s.Psh..aptLhhphAppLssppGaAGhuhsl.shshctspspEahhupRa.sGlDVssstps.....tshpls..sc.......................IKsVsWLThlssshlcpLG...GspuL+stL..s.s.hslpshss.GllIpAGsh.PphGsspcss.....hPssYlhlN+sL+slhs..pphstLphtshsupu.h..sptusptWL+RF .....................................a.h.hh..........s.pt.....t.stp.shssLphslPhtalpp..t..ssh..atthh...hthsppLps.pGauGh...uhs...l...sts..hp......p.....t.s...h...E..ahlu.p...+a.sGLpVs.sssth.............tshphh...sp.......................I+slsWlThlupthlppLG................Ghstl+ttL...s.s.hshpsa..........ss.GllIpAGth.PphGshsts.......hP.sYhhlNphL+PlRh..pph..ttLp.h..hsuts.h...sptsottWhtRF........................... 0 8 24 48 +11710 PF11877 DUF3397 Protein of unknown function (DUF3397) Assefa S, Coggill PC, Bateman A anon PFAM-B_3446 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 114 to 128 amino acids in length. 25.00 25.00 26.60 26.40 23.20 22.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.81 0.71 -4.09 22 609 2009-06-18 16:30:47 2009-06-18 17:30:47 3 1 607 0 69 290 1 112.90 36 95.07 CHANGED hhhhshhhllls.lhshllsth...hplp+p....th.ph...sDluh.hhhlulhhl.tphaspShlshlllhl.llulhlslhhhhpppphha++hhKhaWRhsFLlshhhYlsL....llhshhh ..............hlh.hlhllLshllshIl.sph.....FpLp+h.....th.pF....sDLAhPhLlhthalloscsascsh.L..Ph.l.hL..sl.lLuI.l.ls.h.ahhhKccs.....h...hYs+FhKhFWRhsFLLThlhYlth....llhhhh.h........... 0 18 39 50 +11711 PF11878 DUF3398 Domain of unknown function (DUF3398) Assefa S, Coggill PC, Bateman A anon PFAM-B_3712 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 100 amino acids in length. 28.30 28.30 28.50 28.90 28.10 28.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.29 0.72 -4.16 20 472 2009-06-18 16:32:43 2009-06-18 17:32:43 3 9 84 0 212 372 0 93.00 36 5.31 CHANGED hp+tsplpssPLR-LlcFPsDDlplphlsRctRTlpssVPc-s.pph..sha............V+-ClcsYsp-WplVph+Y.cphSushp.hhsppphchp+.....LspQsFE ...............h.p+.splps.ssLR-Ll.FP.DDlplshlsRctRTlpsoVP--u..pch..sha.............................................................V+-..Cl+oYspDWtlVshKY..cph.Su.s.a.p..p......h.s.scphchpK.....LPppsFE................................. 0 39 55 115 +11712 PF11879 DUF3399 Domain of unknown function (DUF3399) Assefa S, Coggill PC, Bateman A anon PFAM-B_3857 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 100 amino acids in length. This domain is found associated with Pfam:PF02214, Pfam:PF00520. 25.00 25.00 30.60 30.00 24.50 24.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.69 0.72 -3.80 11 201 2009-09-10 20:21:33 2009-06-18 17:34:18 3 7 78 0 106 192 0 103.70 45 19.28 CHANGED LppK+NG.LsptLp.sustt-p.hhsKspShhEpQHHHLLHCLEKTT...................sH.EFlDEph.a-pshhEsuhtshs.S+SsSlSSp.....ulosoCCoRRsK+..shpLsNushsu ........................lppK+NG.h.p.uLp.s.....Gs....t--p..hhsK..s.pS.h...FEpQHHHLLHCLEKTT...........................sH.EFlDEph.FppsshcsuhtshsoSRSsSlSS...ps.........u.lsooCCuRRtK+..sh+lsNushs...................................... 0 18 27 59 +11713 PF11880 DUF3400 Domain of unknown function (DUF3400) Assefa S, Coggill PC, Bateman A anon PFAM-B_3996 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 50 amino acids in length. This domain is found associated with Pfam:PF02754, Pfam:PF02913, Pfam:PF01565. 25.00 25.00 33.90 32.40 23.40 22.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.31 0.72 -4.74 20 251 2009-06-18 16:36:00 2009-06-18 17:36:00 3 7 245 0 83 236 35 45.40 65 3.53 CHANGED SRYs-Ds.s..l-ADYIVVEMA+HlLGENWhs-YVt+ANsGGIERVLl .SRYs-Ds.s...lpADYIVVEMA+HlLGENWhs-YVp+ANsGGIERVLl.. 0 15 52 69 +11714 PF11881 DUF3401 Domain of unknown function (DUF3401) Assefa S, Coggill PC, Bateman A anon PFAM-B_2478 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 231 to 250 amino acids in length. This domain is found associated with Pfam:PF02145, Pfam:PF00595. 25.00 25.00 40.40 34.60 24.90 24.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.96 0.70 -4.97 11 199 2009-06-18 16:41:43 2009-06-18 17:41:43 3 5 38 0 83 145 0 232.40 45 15.83 CHANGED K+hpusussh.upsRhRAoLRDlc.SPptstKSol--DlKKLIh..DSPsstpp+ch................s..sssSs.RRSLaRTLSDESlh.SGpRpsSauoScu.hL-QuLPsDlLFo......sSTLP..........Pp..shPhpp.uht.uhpsl+u-hSASDsSLsDh.-p.+th.h.DPGLMPLPDTAus..L-WSpLVDAA+AFE...................sQRss.Fsohs-s.ppupshpsstp......spphsspsspsh........hsuccSPss.....................LsGKVsQLEshLK ............................Kphpossppl.upspLRASlRDl+.SP+ts.hKSTlE-DLKKLIh.hDSPssEpp+ch................................s..ss.Ss.RRuLpRTLSDESlh.SupR-sSFuSstS.hL-puLPsDlLFo......sSThP...............Pp...shPhpp.u.ht...Ght..sl+...u-hSAS-sS.LsDh.-p.+....h.DPGLMPLPDTAus..L-WSsLVDAAKAaE.........................................................sQRus.h.su.s-s..pps.sh..tssps............ptssss.+sh..........ssu.c-u.Pss.....................LsuKVsQLEshL+............................................... 0 3 11 31 +11715 PF11882 DUF3402 Domain of unknown function (DUF3402) Assefa S, Coggill PC, Bateman A anon PFAM-B_2702 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 350 to 473 amino acids in length. This domain is found associated with Pfam:PF07923. 25.00 25.00 26.30 26.90 23.10 24.90 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.35 0.70 -5.42 22 336 2009-06-18 16:43:09 2009-06-18 17:43:09 3 9 231 0 228 328 0 384.80 36 46.72 CHANGED PhulhEAsclaus+l+hohuh+QLWcER...-cFh+h-RG..........tsspp..s.s.-lpp.h.p.p.p.hhphst.p...pt..........ssppplp......hlptVEshYppsLspLpohlhVLL+hllushs.........................sphptts......shhsss.scp.s.t...................sh..pl-hhRt+EIssKAlSulLlLLLKWFKlSHlLKFEYhoQLLhDusalsLlLKhFsh.............................pshsphlss+sch...phsFaphC...........hh...........................................................................hs-.lsphuhssst.sptsht...a.shRNF.FSsINhL+IhpK.lsKpKspRhhhLspaKSSsILK+hL.KlspspLphYsLKlhKpQlPYpGRKWRp..uNMclIouIYLpsR.cL+D-WLsGsDl-u-hp-uhspEpALRuLlcaaNhRRY................................c.M........................sht...thtthhp....................................................................................Ept-hFhpph ....................................................................................tu..ass+l.+.pchpp.L.ppR...p+FhtaphG..........................................tssps..ssh..s...psl..p.cthpphppch.hhshschphp..pcth.................................................ttpc-lp........sssEhhYps....hLPsLsphhIsLLKllL..usss............................................................sspscspu................hNhhscshspphs...psh.......................psh..p.tlDlsRpKEIhsKAlSulLLLLLKaFKlsH........lhpFEYhuQhLl.uNhlPLlLKhFs..............................................................................Qsl.phlssKssh.........s.hsa.ptshtp.....................................................................................................hsE.lst.uh.sssssp........a.saRNh.FSsIN.hLRILpK.loKhKppRs.h................h.....LV.aKSusILK+sL.KV................p.shhpLYlLKLlK.Qs.YhGRpW..Rp....SNM+shoAIY.psRhcLpD.DWh...hG..........s...D..l.....Ducs...-h.s.p.EpsLRu.lcha....N.RRY.......................................................sc.h..................................................................shp......thpthht........................................................................p....................................................................................................................... 0 68 106 173 +11716 PF11883 DUF3403 Domain of unknown function (DUF3403) Assefa S, Coggill PC, Bateman A anon PFAM-B_2739 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00069, Pfam:PF08276, Pfam:PF00954, Pfam:PF01453. 27.30 27.30 27.40 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.48 0.72 -3.69 31 401 2009-09-14 08:07:09 2009-06-18 17:45:01 3 35 35 0 184 445 0 45.40 38 7.07 CHANGED SEsssIPQPK.PGaCltRush-s-sSSSpph..s-ohTVNQhTsSVIDAR .......o-ssplPpPKpPGaslt+s..sh-s.....s..sSSopp.....-shoVNphTholl-uR......... 0 18 139 156 +11717 PF11884 DUF3404 Domain of unknown function (DUF3404) Assefa S, Coggill PC, Bateman A anon PFAM-B_2879 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 260 amino acids in length. This domain is found associated with Pfam:PF02518, Pfam:PF00512. 25.00 25.00 27.80 26.90 19.90 19.10 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.61 0.70 -5.50 10 181 2009-06-18 16:46:17 2009-06-18 17:46:17 3 3 113 0 17 103 2 254.20 48 54.22 CHANGED hlhus.AaAD..SLPE..RIDsFpphFshpsAspoYDlRplQusYPTpLLoPsShLPQTucYPLK-IQpLYplApTCpGphPL..SPLlTEPLVFTRAlC+GopLsscWFuRSGLIHPGGGSYAsRYlpKaP-pcppLtpaMHI+ERslAspspLLu+L.QpMss-uIsALluGushFls.s-ELWLR+GstYalaspssWpssssstsLshslhopsssCalppGNICWcl.ED+SclLpauhIhLllANlhLllG...WulYRWNsKRcEM+ .........................hlss.sphD..sLPE..RhslFhphh..ppuos.hs.polp.pYPptLLoscShhPphopYshpDIptLaphAppC......pu+hPh..SPhls-sl.FphALC+t.osLuscWFsRsuhlHPuGGoYAt.RYl.E+aPsp.tsL....hsahHlpcps.Aucs.....plL.ap.L.QphupsulsALluGhphalu.ss-LWLppss.......taalhspppWps.ssphslohs.hp.Ac.psChhphuNlCas...cspSclL......hhShllL.lhh.slhLlhG...hSlYphppc+pEhR........... 0 4 6 13 +11718 PF11885 DUF3405 Protein of unknown function (DUF3405) Assefa S, Coggill PC, Bateman A anon PFAM-B_3057 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 636 to 810 amino acids in length. 25.00 25.00 27.20 25.40 24.80 23.70 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.97 0.70 -6.26 25 217 2009-09-11 13:24:11 2009-06-18 17:47:24 3 5 82 0 180 215 1 433.40 36 68.33 CHANGED lCF-RauRaGPY.....GhG..ht...................................................ps-+puhppsht......................pVD..W.cpV...........sWuphQpcChp+NppR.Fts..................pssphpshshsp...ttt.t.....tt...........................phsRTAlllRsapsapaT-cDlhhlRuLIsELSLpS.GGEYsValLlcVKD.sslt..Iaus..--sYp+sLc-s.lPtEF+uMssLWsEpphphhYPs.lpc.h..................+tsaputh.hPlQhFuhpaPEaDaaWpWEhDsRaTGHaY+hhs+lspaA+pQPRKuLWERNuRFYlPuhHGsa-.sFpphVcht...............................................................tu.ccslWGP.......p....tshss.PPss....cD..cYcWGVGEEADLIshsPlFDPp.sTsWlhcs-lpGY.stp...s.tsPRRAulITtuRlSR+LLtsMHcEshtt+HthhSEMaPsTsALa........HG..............hKAVasPHPVahD+pWssctlsphaN...uG.sssoGGs+sSsau.s+Epsh........+..GsoWaYsusaussLacRWLGa.chss....sGGc...t........hc.................stcGRhCLPsMLLHPlKc .......................................................................Ca-RhsRhGPY.....Ghu.................................................................ppts.tt.htt..............................lc..a..ppl...........sWuphQppChp.tN.ttRFt...............................................t.t..h..ht.......................t..............................................................................................sRoAlllRsassh...pasppshh.lRuhIsELSLtS.GGEYpVahLlcV+-psh....I.asD..pcsYpphlpcs.lPtEFpuhshLWs-tththhYst.l.c..........................+slaputa.hshQhFu..hpHPEaDahWpWEhDhRaoG+aYchhs+ls.pau+pQPR+tLWERstRaYlPshH.Gsa-.sFpphlc...................................................................s..ppslWGP....................p...........t..shPPs..s.....pD.....papWGVGE-ADhIshsPlacP...sosWlhps.clhGY.ptp.....................sPRRssIlTtoRhSR+LL.sMHpEshhtpH.hhoEMhPsosALh........HG......................hKAVasPHPlahD......RpW.ss..p...htphaN.............sG.su...touu.tsSsau.tp....Epph..........p..GhoWaYputhsspLappWhGh.p.ss...tGGp.t........hE..............................ttpGRhCLPshLlHPlKp............................. 0 44 95 152 +11719 PF11886 DUF3406 Domain of unknown function (DUF3406) Assefa S, Coggill PC, Bateman A anon PFAM-B_3286 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 270 amino acids in length. This domain is found associated with Pfam:PF04548. 25.00 25.00 25.60 25.10 20.30 19.70 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.23 0.70 -5.55 9 103 2009-06-18 16:48:28 2009-06-18 17:48:28 3 3 27 0 72 111 2 243.70 45 26.62 CHANGED sstPssVuVPhPDMsLPsSFDSDs.PsHRYRhL-ssspaLsRPVL-sHGWDHDsGaDGlslE+thslppphPu..osssQVoKDKK-sslph-uuhShK.HsEstoohsGaDlQolGKcLAYolRGET+FKNh++NKTsuGlSsTaLGDslusGlKlEDplhlGKplpLlsSsGsMpupGDsAYGuslEApLRcKDYPlupphooLGLSlhcW+uDLAlGsNLQSQh.lGRsSphss+hsLNN+toGQlolRssoSEplpIALlullPlhtulhpphps .............s...ttssVPhPDhsLPsSFDSDs.PsaRYRhL-s.ssphLsRPVL-scGWDHDsGaDGlslEpshsltsp......hPu..uhssQloKDK+-hslph-susuhK.as-stoohsGhDlQo.lG.+pLuYslRuET+F+Nh++NpTsuGlShThlGcphssGhKlEDplhlGKRltLVsssGshpup....GDsAY..GushEspL+t+DYPlsps.uoLuhSlhpW+.t-huluuNLQSQhplGRsoph........ss+ssLNN+tsGQlol+ssoSEplQIALlullPlhttlhpph......... 0 13 41 56 +11720 PF11887 DUF3407 Protein of unknown function (DUF3407) Assefa S, Coggill PC, Bateman A anon PFAM-B_3559 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 360 to 454 amino acids in length. This protein is found associated with Pfam:PF02470. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.60 0.70 -5.11 43 1339 2009-06-18 16:50:23 2009-06-18 17:50:23 3 3 164 0 385 1189 0 215.70 19 52.68 CHANGED psLsslLcplDPsKLNuTLoAlApALcGpG-clGpulsshNslLtplNPphPslpcDlpsLsslu-sYu-AAsDllssLcsuoTTSpTlssppssLDulLlussGhusoGs-lLssspssLlcusssLtPTspLLscYSPthsChlpGh.tthssthtcshG.G.NGhultlsss...............................................hlhGss.PYpYP-sLPplsA.+G.G.tt.PuCh...sLPc..sscs.....a......Ps.hLV.ssTGh.........ssssa.p..............s.sl..................GpPhhtpal................h.Gst.sGsss.........................................................................ssP .........................................................p......lsthlssh....upul.s...G.p.G.spls.psl.......sphsplhspLs..s...ph..s.s..lt..p..s...lcsLsslssshucsssslhphlsshs.s.sspslssppspLsshLt.......shsshuss.s.s......shlspspspLhp....sls..pL.ss.sh.phLsphs.s.pl.p...h...lt.h.......h................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................ 0 62 221 326 +11721 PF11888 DUF3408 Protein of unknown function (DUF3408) Assefa S, Coggill PC, Bateman A anon PFAM-B_3594 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 128 to 160 amino acids in length. 50.10 50.10 51.50 51.20 49.70 49.70 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.72 0.71 -4.24 32 653 2009-06-18 16:51:41 2009-06-18 17:51:41 3 1 126 0 52 483 10 133.40 20 94.25 CHANGED sppplccctlhchhttts............................spsssptsssscpsppppssts......................t.ptpppcts.p-YcppFL.pssplssRps..lYlsc-l+-+ls+llpsluttcholuual-NlLpcHhcpap-pIpphhppphp...........a .....................................................................................t.................................................tt.tt.....t.p...tp.tttt.................................tp.tttpp+tphp-YcptFL..p..ssc..hp..sRps..lYlsp-h+c+lppllpslGp..pcholuual-NlLccHl-pap-cIpphhtpp....ph...... 0 20 47 52 +11722 PF11889 DUF3409 Domain of unknown function (DUF3409) Assefa S, Coggill PC, Bateman A anon PFAM-B_3824 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in viruses. This presumed domain is about 60 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF05550, Pfam:PF05578. 32.40 32.40 56.90 39.50 28.30 27.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.84 0.72 -4.27 8 239 2009-06-18 16:53:22 2009-06-18 17:53:22 3 16 46 0 0 263 0 55.50 75 2.50 CHANGED Dcsus+p+cKKPDRl+KGsMKIsPKEoEKDSKTKPPDATIVVEGVKYQVKKKGKV+ ........-cGAsspKppKPDRlcKG+MKIsPKEsEKDSKTKPPDATIVVEGVKYQVKKKGKVK........ 0 0 0 0 +11723 PF11890 DUF3410 Domain of unknown function (DUF3410) Assefa S, Coggill PC, Bateman A anon PFAM-B_1956 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 90 amino acids in length. This domain is found associated with Pfam:PF02826, Pfam:PF00389. This domain has a conserved RRE sequence motif. 27.20 27.20 27.40 27.40 27.10 26.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.84 0.72 -4.33 64 939 2009-06-19 10:12:49 2009-06-19 11:12:49 3 4 934 10 136 570 33 80.40 53 21.55 CHANGED slLPss.lsplslstph-..ps.sLtpLs+hlYDlRcDDuhh...R.csh............tpssuFDtLRKpY.....RREaSuLpl.t......spspss..hphLptLG .....................t.LLPsPphu+lsLc..u..sLD....ps.TLK+LsHLVYDVRRDDAsL...R+su...................uhPGpFD+LRKNYh...-RRE.WSSLhV.h......s-ctss....AulLscLG................. 0 28 58 100 +11724 PF11891 DUF3411 Domain of unknown function (DUF3411) Assefa S, Coggill PC, Bateman A anon PFAM-B_1986 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 168 to 186 amino acids in length. This domain has a conserved RYQ sequence motif. 25.00 25.00 30.40 26.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.01 0.71 -4.58 14 218 2009-06-19 10:19:12 2009-06-19 11:19:12 3 6 31 0 146 212 4 165.50 34 39.91 CHANGED sRhLADPpFLaKluhEpslshsssshuEhppRt-p.Fh..sEh-hshsshlsuslsshhlValhAPshuhsssus..su................thpthhtslP.sNsFptuh.shppaoltpRluolhhKGsphusVGhsuGllusuluNsLhst++thp.....pp......sspsPPlhpouhsaGsFhGlSuNlRYQllsGl..- ........................tRhLADPp.FlaKlshEpslslsssshu-hppRtcp.Fh...pEhDhshssllhuslsshhlVaL.APshuhtssss.......................thtthhtshP..sshFptuh....shp.p..asltpRlushhh+GsphusVGhsuullGs..............uloNsLhsh+.+php.....ps.....................sspssPlhpsAhsausahulSuNlRYQllsGl....................... 0 38 100 128 +11725 PF11892 DUF3412 Domain of unknown function (DUF3412) Assefa S, Coggill PC, Bateman A anon PFAM-B_1106 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 120 amino acids in length. This domain is found associated with Pfam:PF03641. 25.00 25.00 32.00 58.30 22.50 20.80 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.63 0.71 -4.29 49 901 2009-06-19 10:27:43 2009-06-19 11:27:43 3 3 892 10 132 465 51 123.00 71 27.74 CHANGED uMppV+paR+spuDAYpFNWSL+Ip.-FQhPF.PTHEsMAsLsLHhsQsscpLAANLRRAFSGIVAGNVKp-GI+tIEcaGPFcl+GDspLMcthDpLLpuFVpQpRMKLP.GotY.PCYcI.ss .......MPhVKEpRR-TGDAYSFNWSh+IsPDLQhPFEPoHENMANLcLa.D.QPsElLAAsLRRAFSGIVAGNVKEsGIRAIEcaGPYKIpGDt-lM++MDcLLQuFVAQHRMKLP..GSAYlPCYEIs.s............................... 0 23 53 95 +11726 PF11893 DUF3413 Domain of unknown function (DUF3413) Assefa S, Coggill PC, Bateman A anon PFAM-B_1403 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 250 amino acids in length. This domain is found associated with Pfam:PF00884. 27.10 27.10 28.60 27.80 26.60 25.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.56 0.70 -5.50 39 977 2009-09-14 14:01:40 2009-06-19 11:32:37 3 2 864 0 128 567 33 247.20 52 42.40 CHANGED Mlpptp..pa+-clSphlsWGHWFuhFNlllAhllGsRYl.hhssa........PsThlGhh...YhhlShlGHFoFLsFhhaLlhlFPLoalls.p+hhRslusllATlulslLllDT.laspashHLsshVacLlhsstp.spls....tpWthhal...shPl.Illlphhhupah...Wc+lcplp+p.+.hG+hluhhhhhsFluSHllaIWADAshY.pPIThQcssaPLSYPhTA+oFhcK.aGlLsppphppphptptpsps.........plpYPlpsLphs ..........................................................................MVsppp..pYRE+VSQhlSWGHWFALFNILLu....hllGSR.YL.FluDW...............Ps.TL..sG+lY.hl.S.hlGH.F.SFLVFAsYLLlLFPL.TFI...V....h.....Sp.....R...LhRF.LSsIlAT.AGhTLLLlDoEVFsRF+LHLNPlVWpLllNP-p.sEhu.....RDWQ..L..hFI....ulPl..IL.LlEh..lFAsWu...WQ.K..LRSLsR+.....+...auR..P..L..A..Ahhhlu...Fl..A..S...H.llYIWADAsaY.RPITMQRANLPLSYPMTAR+FLEK.HGL..LDuQ....EYpRRLhE..QGsP-Al.........ulpYPLspLca.p........................................ 0 26 53 93 +11727 PF11894 DUF3414 Protein of unknown function (DUF3414) Assefa S, Coggill PC, Bateman A anon PFAM-B_1638 (release 23.0) Family This family of proteins are functionally uncharacterised. The family is found in eukaryotes and has a conserved LLG sequence motif. 26.40 26.40 27.00 26.90 26.30 26.30 hmmbuild -o /dev/null HMM SEED 1691 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.42 0.69 -14.40 0.69 -7.68 32 424 2009-09-11 06:03:47 2009-06-19 11:35:43 3 13 249 0 291 403 2 1089.40 19 83.73 CHANGED hWss..hcpLapslp....hhsppp.psl..ppL..pLcpht.shhsLLcpPs+sspsRpplp..............pGtlphuctschplsp-hlppulhLuDtLsLcEltAs-Llh..supppp....th.uhshh.sAllhYaptRphlLssL+hlhptt.t....phs.....p-l.pchhphh..........................pahcphltshshlcphLshls-.ls..............tsplh..sphp...shs..cphchh+s..........hlhcp+chLuthLashspp...shhstsshhpllptLpp..l.......sshDtlhlthlsulltsh.............................................tss.pthshhsscphtpslpsclpspt........................Wth.shpuslhhhahshhtuhsptssstts...shshpccs-...phhppAlp.............sGuhcaLhthshcs........p..t.....thh..L.pt.s.................................................................sshh.....cp....................................hhhpphcshlpshI.sshs.hlpcL+scE--sths..st.p.s.s.s....................................................................shptcLEcFhlhluhhYpscP...-huhp.aWssp-..ssh...............................htFlpaAuchtssslh.ssahcMLsuLos.G.psAptsaphLppss...........tt.tsloWcphFpsLphYhpplp...........p..spspsl.hspss.psh.................stcphhhLsuaLpLlsplspp..s.pssR.tl.......hcpssap..ssllhsLhps..........slP.tL+As.....................................lhpsLuulh..........s+s.ptutplWptlDphhhtsshtsss.t.....................pshshptc............hpthpshtppaststuFlpLlpsLlpst.....t.......h.aPpsLGsu..........................hR...hsGlpPYlc....Flhsplhhc..spshtcsspphplt.ssLchhhpsLpsas.s.....hl.thspsts.......................sh.salt.p.Puhtlh.phLhspchhphlhsll..ppussplsphst.....p.ltpslhhuLpllshsLphQssahchlpsslp.........................................s.hhtshshhhp..ulssh.cth.hplsllshh.LahusspspluhsSl+ILpplupp.phs...............s+llslhsp...tspupcIptuFlppl-sp.sp.tt.................................................................tt..hplKhpILshL.ssLstts..pPslAHhLLGFcsp.tshhphs.p..shhsstpohL+ulLslLpphhsshps..........lshh.scLsphshpllhpLCp..sshoSthsLpalRs.....ssalhphlpp.Phls..hhhps...............................ussshhshLphRu....hlhphhulEl+s..huhpuphoptpplhshLls.............tphspsstsh..sp.........................................................lhslLDhlshsh....pphs..plshacs.hslp.shpp..........hpspt..uht.......lhs.hphLppllphchpshps.h..........p.sptp.l.....pEhptllpahsphNptpphttsphphLcSWsQLlplllssss...hsssp+pshIl-slphllsKlp...h-....shthu.plsplshsLhstlcpshh........................................ttphsshhss.............+La...lhpshlcuIhpss.us.pLRsshYshhhpYLptlhcsc...................................................hpcpshpllcshGccLl-slCsDAhsGcshsRhtAlhlL-sLlpl....s..............psshllphlspssaLplllcSl+pp-ttLps................hhs.sshhhtLh...........hacuplshLhRlApo..+pGAptLlpsslhphltpsphhshcP.Dlslc..hhcp.................................sss.hppaaplLhPsLplhssllhSh.................G.p.....scpshtpshpaLt...pppchlhull+csshht.........................................t..tthspht..pptl.plspthhllpshpp ...................................................................................................................h....l..p............t....h........p.....tp.+t.l..........................tt.h............................h.ls...t....sh.lus.hplsEh.shthhh...........tu.tt...........................s.....hh.a.a...tpt.hht.h..hh............................t....................................hh.thh.t.t.h.t.h..htt....ht...........................tt.........................t...................................................h.p....ls..lh...............h..hht.htt...................................................t..p.h.......h.hh......hh..h..............................................................h...h.............................................................h...h.h.h.h.h..h.....................t.............h...h........................tsh..h...............................................................................................................................................................hh...............ht..c...t................................................................................................................................ht........hh...h..ha..................p.s.....hh.......................................................t.Fh.............t...............h..shh.hltsls.........s..su..haphh..t...........................hs.atthht.h..a...hp.........................................................................................t..............................................................tp..hl.shhtlhttlh.............t...................................hhthht................h...lhut.....................................hh.hltth.................t..hW....l.c..................................................................................................................t..h..p...pth....shhthh..Lh...........................t...........................................t.......h..hhp.....hh..h.h...................h...t..p.h...........l....hphh.....l..h.....................................................................................................hh.ph..tt...hht.hh.h..........h...................................................sh.hh........hh.hp....h.t.h..........................................................................................h.....................h...shtl.h..h.........................................hh..h..........tl..sh..t.h.............................................................................................................................................ht..lhthh..sht........shshhLLthp..........................t.......................ohhpsll.h..h................................h.t..htlh..Lh...p...ss..hh..hp........thh...h.........................................................h......t.....hhphhsh.h...............t................hht.l......................................................................................................................................................h.phhp.h...........................h.....ht.....................................................................hp...h..h...hh.........................................p.....h....h..................hht.....sW.phh.h.h....................................hh.p.h...h..........................h.t....h..h...h............................................................................................................................................h....hh...h..h..................h....+...Y..h....hh.hh..................................................................................t....h..t..s.tthhphl.pDs...stt....ph..uh.hlt.l..h........t...................t.hh..h.p.shh..hhp..l........h...................................................................hh.s.h.hh.phst.....G...lht.thh...h...thht.......p...........t......................................................................................................ph.thh..hhplh...shh......................................................................................................................thh.hh..........h..hhp...................................................................................................t.................................................................................................................................... 0 108 169 246 +11728 PF11895 DUF3415 Domain of unknown function (DUF3415) Assefa S, Coggill PC, Bateman A anon PFAM-B_1962 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00141. 25.00 25.00 25.00 27.40 24.50 23.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.76 0.72 -4.11 35 127 2009-06-19 10:41:03 2009-06-19 11:41:03 3 2 39 55 5 183 0 77.90 45 21.94 CHANGED sKhpssFpsshpKLulLGpstssLlDCS-VlPhPtPsssp...AaFPAGpohpDlE.tACsssPFPoLsoDPGPsToVsPVPss ....chtssFpsshtKLulLGpctssLlDC..SDVlPsPhshssp....uaFPAGhohsDlE.pAC..sp.oPFPoLsoDPGPtTolsPlPs......... 0 3 4 5 +11729 PF11896 DUF3416 Domain of unknown function (DUF3416) Assefa S, Coggill PC, Bateman A anon PFAM-B_601 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 190 amino acids in length. This domain is found associated with Pfam:PF00128. 25.00 25.00 25.00 26.10 24.40 23.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.28 0.71 -4.34 88 711 2009-06-19 10:45:08 2009-06-19 11:45:08 3 10 684 18 232 658 95 186.60 32 25.33 CHANGED stsRlsIEsVpPtlDsG+assKtlsG-.lsVpAclFpDGHDtluAsLhaR...........tsss......ps....Wp...................pssMp....h......uNDRWpupFsssp.Gpapapl-AWhD.auTW++slp+KlpAGt...-lsL-Lt-GutLlpcAst.pst...us.....ppttLpp.hsstLps..t..ssspp...luhhLssclsplhspts.R.shsTp.sp.hslhV-Rc ..............h..sRlsI-sVpP.l-s.....GpaPsKtllGE.lsVpAslat-GH-tluAsl.h.h+...............................sstt......pt....hp..............................phsMp..............sh......GsDpWpuphs..s.................cp.GpapapV-uWsDsauTW++shptKlsAG.............-lpl-LtEGstLlpcAsp.pss.....ts......................stp...sL....p....t...s.......sssLps..ts..........ssss+...luhhlssplsplht.pt....s.........R.phlop.utshs....lhV-R...................... 0 62 140 191 +11730 PF11897 DUF3417 Protein of unknown function (DUF3417) Assefa S, Coggill PC, Bateman A anon PFAM-B_724 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 145 to 860 amino acids in length. This protein is found associated with Pfam:PF00343. This protein has a conserved AYF sequence motif. 25.00 25.00 25.60 43.50 22.70 21.70 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.79 0.71 -4.15 79 640 2009-06-19 10:53:20 2009-06-19 11:53:20 3 8 576 0 243 588 45 116.40 36 14.10 CHANGED shpphs........Vhsp.LP....cpLptL....p-LAhNLaWSWpscspsLFcplDspLWcpss+NPVthLtplsppcLcplupDtsalschctlhpchcpYhss..spWapp....sptt.spslAYFShEaGlp-oL ..............t.pphsVpspLPcpLptLs-LApN..LaWSWs.cspcLFpplD.spLW.c.p..ss+NPVtlLtpls.pRLcpLupDpsFlpchcplhschcsYhst....hWapp..................sstt..........ssslAYFShEaGlscsL....... 0 92 186 230 +11731 PF11898 DUF3418 Domain of unknown function (DUF3418) Assefa S, Coggill PC, Bateman A anon PFAM-B_1028 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 582 to 594 amino acids in length. This domain is found associated with Pfam:PF07717, Pfam:PF00271, Pfam:PF04408. 25.00 25.00 25.60 25.50 23.30 23.10 hmmbuild -o /dev/null HMM SEED 586 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.63 0.70 -6.12 105 1852 2009-06-19 11:18:30 2009-06-19 12:18:30 3 22 1658 0 364 1658 366 534.70 42 45.93 CHANGED K+pGpVhAhE+lTLYGLslVscRpVsYupIDP...ttuRElFIRcALVpG-h.....pT+t....sFhpcN......pcLlc-lEcLEcKuRRRDlLVD--sLasFYDpRlPsclssspsF-pWaKptp....cppPc...LLhhs+-pLhpcpustlostpaP-thph.uslcLsLoYpFEP......GpscDGVTlpVPlslLNQlsspth-WLVPGhhcE+lhALlKuLPKslRRphVPsP-aA...cuhltp..........h........pst..................ps.....sLhcuLscpLp+ho.Gl.....plst-sach..sp....LPsHLphsF+V...lD-cG.+.......h.............lupGRDLstL+pph.tspsppsls....psu.......................................................................s.s.....hcppulps.........W..s.FGsLPcphphc...p.u.G.....hpl.puYPALV....Dps...s.......uVulclF-sttcAppsp+tGlpRLlhLpls.s.l..KhLc+...pLPst............................scluLha............ssh.........Gps.ptLh-D.............hlssulcphhh....pt...s............................hs....psppsFpphhccsR........ucLsstspclsphlpplLsthppl...........p+pL+....u.phshsh.shuhs..............Dl....c...pQLspLlhtGFlspsshppLtchsRYL+AlphRL-KlstsP.sRDptphtclpsl.ppap.phhsp..h.t.up.h......sspl.pchRWMlEELRVSLFAQpLGTshPVSsKRlpcthppl .........................................................................tpGuVhAhE+VTLYGLPlVstRtVsYupIDP.........s...huRElFIRpALVEG-a.................................pT.+a......sFapcNh+LhtElE-LEcKoRRRDlL..VDD-sLFpFYDpRI....sp-lhSu.........ppFDsWWKpsp............................cc..s....Pc......LLshp+s....Lh....p...c.s.A..p..p....lop.saPshWcp.....G..........s.....l......cLtLoYpFEP................Gs.s..s...DGVTVclPLslLNQl....p.t....psF-W.lPGLRcELlhuLIKSLPKslRRshVPAPsaAcAhLspl...................ssh..................ph.................sLlcsL.t+pL++hT...GV..............plst--W...p...h....sp....lPsHL+hsF+l...lD-...+s.+.........t............................................Ltcu+sLttL+ppL...ts...p.s...p.pols......tsA............................................................................................s..s...h-pp.s.l.ph........W..........s...F..GpLPc...p...hptc.....+.u..s.....hpl.puaPALV..........Dct.....c...............uVul+.lF-s.h......-t..ppuhhpG.l+RLlhLsls.sPl..KaLpc....pL..Pst.......................................................s+Lu.Lha.......sPa.................................Gpl..pLl-D.............slss.ulDpllt....tp..u.............................................................l.hsctsFttLt-c.lR........ucLsssshclu+plppILsthhsl................................................p++Lc.....u...phshsh....shuls.....................................................Dl...+...sQlssLlapGFlstsshpp..Lsch.RYLpAlphRL-KL.s.s..s....P...pRDpt...ph...hclcp...ltp...tap...phhsc........hs......stt..........stcl....p-lRWMlEELRVShFAQpL..GTshPlSsKRlhpshpp................................................ 0 97 210 297 +11732 PF11899 DUF3419 Protein of unknown function (DUF3419) Assefa S, Coggill PC, Bateman A anon PFAM-B_1329 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 398 to 802 amino acids in length. 28.20 28.20 28.30 28.30 22.10 25.20 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.08 0.70 -5.74 42 261 2009-06-19 11:23:06 2009-06-19 12:23:06 3 16 242 0 167 284 73 367.10 30 64.16 CHANGED tshhcchashhap........shlYsphWE.........DPclDhpsLpls....sscpllsIsSuGsNhLuY..LstsPsclpAVDLNPspstLlcLKlAAh+sLs.apsFachFGcupp.......ps.pplhtppLsPpLssputpYWpp+......t+phs..hhs+GhYcpGhhGp.hlths+.hls+lhGl....plcpLhpApolcEQRphapp+.lpslh.s..pllphls.........sphhhhhuL....GlPssQhshl.................................................tsss..lhphltc+L-tlhssh.l.p-NYFha.slstpYst.........sshPsYLptcsappl.......+sss...-plclassslp-hLtphsssolsthlLhDu.DWhss..............................splss.hpp.lsRsspsGuRVlaRoAutp....................shh.tph............cphsapsctusthpst.......DRsshYuuhalhp..ct .................................................................................h......hat.hhs.......thlYs.sWEDPclDhchLpls....ss-..pllsIsSuGsNhLsY..L.....psP.t+lcAVDLNPsQstLL-LKlAuhp..tLs.ap-ha..............chFG..cupp................s.shpplh.p.c.LuP..pLsspuhpYW.p+......hphhs......hhsculYcpGh.tp.h.lphs+..hlh+lh..G..l...........plcclhpup.olpEQRphapp+......lpshhhp.........hhlphhh............sphhhhhuL....GlP.sQhs.hl.........................................tsts..lhphlhspL-.lhpph.l.p-NYFhahsltG..cYsp..........pshP....sYLp.csatpl............+sss...-plclHsssls-h....lt....p.h.ssso....lshhllhDshDWhss....................................pphsp.hpt.ls+s...h.p.s.GuRVlhRoAuhp....................s..hh.tph............cp..hsapsctssthtst..............D...RsshYuuhalht..h............................................... 0 55 102 139 +11733 PF11900 DUF3420 Domain of unknown function (DUF3420) Assefa S, Coggill PC, Bateman A anon PFAM-B_1362 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00023. 25.00 25.00 25.90 25.50 22.90 24.80 hmmbuild --amino -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.33 0.72 -3.86 26 221 2012-10-02 12:10:21 2009-06-19 12:25:11 3 20 51 0 50 251 0 49.20 42 12.95 CHANGED DLDsloLEKpLP.-VlcpIcplRhp.t..........phsth.........sshp-K+l+RIH ..slDsloLEKpLP.EVlcKIctLRtpst...............sppsphsth......................sshc-K+IRRI+............ 0 6 27 37 +11734 PF11901 DUF3421 Protein of unknown function (DUF3421) Assefa S, Coggill PC, Bateman A anon PFAM-B_1420 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 119 to 296 amino acids in length. 26.20 26.20 26.80 26.60 25.80 26.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.51 0.71 -4.37 49 531 2009-06-19 11:28:17 2009-06-19 12:28:17 3 18 87 0 352 549 3 104.40 31 58.48 CHANGED huGpD.su..tslYVGRuh+pGshlPuKVlPs.+ttsalsaGGpEhttps..aEVLssssh........pWl.sssGslP.....sAl.sGpotp.GEsLYlGRupapGslssGK.lp.SHtshYIPasGtElp.hs ................................h.......t...haluR.s.h.ap.s.sh..lPu+lhsp.pt.hsahsa.s.utEht.....hpp..aElLsstth................pWl..sss.s.G.p.l.Ps.....................sAl.uG.....pss.s.....G..E.sLYlGRuh.a...p.G..slhsGK..lp.s...u.+.t...shalsasGtEht........................ 0 119 159 290 +11735 PF11902 DUF3422 Protein of unknown function (DUF3422) Assefa S, Coggill PC, Bateman A anon PFAM-B_513 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 426 to 444 amino acids in length. 25.00 25.00 38.00 37.70 22.70 22.30 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.31 0.70 -5.60 78 308 2009-06-19 11:40:50 2009-06-19 12:40:50 3 3 279 0 144 330 270 407.80 35 95.89 CHANGED hpsHPL..RtsLtsElHARPa.hlssPtplsalAhh.......sspppss.-tspLssLtpphGhs.P.........sssus+aths.hGt...hpL+WEpHTEFsTYThht......sssspsFssss.....hshhP.....tsWhtphPG.....phlsusclplt...stssssptt.....h.ph.F.........................sspoLsuSpltsGsAhlhoDFRl.cscGasRhlV..hspslsstphGRlVQRLlEIETYRhhALLuLPhA+plssplsph-ppLuplspphsss...ssss-.............plLscLoplAAclEshsAposaRFuAopAYttlVppRlppLREp+ls.GhpThsEFhpRRhsPAM+TCpuspcRhcsLScRlsRAusLLRTRV-lphppQNppLLpSMs+RuclQLRLQpTVEGLSVsAISYYsluLluYlhculptht.....ls.l.s..hslhsul..ulPlVlhsVWhslRRl+++ht ....................................................................................................................h..tHPhRttLhsElHARPa.hlssPtplhalAhh.......................sstptst.cpstLtpLspphGhs..P.........ptsusHathp.hGt....hpL+WEpHTEFsTYTahts...........sssstsFssss.....hthhP...tsWhtphPG....phlsulclplh....stsssstth........ph.F.............................ssssLssSpV....t.....sG.....sA.........tlhoDF+l.cs-.....GasRhLl.....hs.pshsspphGRllQRLlEIETYRhhALLuLPhA+phsspLsph-pp..L...sp..lspphsss...ssssc.........................pLLscLopLAAclEshsApotaRFuAopAYtplVppRlppL+Ep+lt.GhpThs-FhpRRhsPAh+TCpuspcR.psLopRlsRAssLLRTRV-lphcpQN.......p.......pLLpSMscRAclQLRLQpTVEGLSVsAIoYYsluLhuYhhculpt....ht.......hs..l.s.splhsul..hlPlllhsVWhslRRl++ph.h................................................. 0 33 80 106 +11736 PF11903 DUF3423 Protein of unknown function (DUF3423) Assefa S, Coggill PC, Bateman A anon PFAM-B_670 (release 23.0) Domain This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 73 to 118 amino acids in length. This protein appears to be related to ribbon-helix-helix DNA-binding domains, suggesting these proteins may also bind DNA. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.31 0.72 -4.22 57 485 2012-10-02 18:44:02 2009-06-19 12:44:36 3 2 458 0 138 370 49 72.40 32 83.02 CHANGED huhVKIsD-La-plRpsupshsRSIsuQhEaWh+lGhlsE..hsPsLsas.............................tltphhhpts........thphsph ....sll+Iu-pLa-slRttupshSRSlNuQhEaWh+lGhhsE...pPsLsap.............................tltphLhptt..............sh.................................................... 0 33 66 106 +11737 PF11904 GPCR_chapero_1 DUF3424; GPCR-chaperone Assefa S, Coggill PC, Bateman A anon PFAM-B_942 (release 23.0) Domain This domain, and the associated ANK family repeat Pfam:PF00023 domain, together act as a chaperone for biogenesis and folding of the DP receptor for prostaglandin D2. 25.00 25.00 26.20 25.90 22.10 24.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.10 0.70 -5.28 39 534 2009-06-19 11:46:30 2009-06-19 12:46:30 3 13 131 0 299 479 1 286.90 32 59.12 CHANGED RlDoTLhGF-.sh.......phpRucpSalF........cG-ssss...........plh.ls+cc+hlhsthhph............sttttcpplssh..............hpsslhsstlcspphshpcs................huW..Rp-+sEhl.s...sacu+lYsh.psV.l.h+pR..............................p-+Loc-pptphcs......................tpsshpshhs....................................-pph..........................................s.phpp.....shssss.sshs.c.phhsst...pht..spshG+...............sh.c.......sppppph+sslWloc-FPLp...............h.-pllPllDllA..sspthp+L+-h........................................loh...cLPs.GFPVKl...............................pIPlh.slpAtlTFspapthp..........................................................................schppshFpIPssYph .......................................................................................................................RhDsTLhuFp.ph..........phpRGphSalF........pGpssss..............slh.lsHcp+hlhsthht............................sttp.ctplshh......................hsos.lhsstlcscp.l.sFp+s.....................huW...Rp-Ks.............EhV.s...sac.AK...........VYsh.s..sV.l.s+pR...........................................................p-HLocc-pt+.cu.........................hpshpphhs.......................................ppph...s...........................................................................................................t..hpt...........sh...ssss.s...slo.-..-Yhsst...........p.t...s.phG+............................h..th.........ppppp.pF+..AslWhsp-a..P.Ls..................................l.-plhPll-lhA...sstths+LR-F........................................lph...cLPs.GFPVKl...............................-....IPlh.slsAplTFtphphh.................................................................................t.p.p.FtlPtsY......................................................................... 0 80 134 207 +11738 PF11905 DUF3425 Domain of unknown function (DUF3425) Assefa S, Coggill PC, Bateman A anon PFAM-B_1128 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 120 to 143 amino acids in length. 27.10 27.10 27.80 27.50 26.40 24.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.10 0.71 -4.27 62 451 2009-06-19 12:01:03 2009-06-19 13:01:03 3 17 80 0 392 468 1 130.20 22 31.52 CHANGED sthlu.a................h.sstts.tp.lPs.tLpPTt.Qh.....plPHtsh.lDhl.PaPphRDpLlpthpp...............h-ps.....chhp-hhss........................................hsttshtt.........................................psshlhh...tc...hc.psWEloptFhc+as..............hlhcut...phh.........ospa...Rt ..............................................................................................................s...t...tp..lP..tLpP.T..hQh.....................ph.PHtsa.lDhl.PaPphRDpllt....thst.....................hsps.....phhtshhss.........................................................................st..................................................................................................pss.hhh...tc..shchpsWclsttFhc+as..............hlhpst.................................................................... 0 75 180 332 +11739 PF11906 DUF3426 Protein of unknown function (DUF3426) Assefa S, Coggill PC, Bateman A anon PFAM-B_1212 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 262 to 463 amino acids in length. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.78 0.71 -4.54 92 481 2009-06-19 14:23:17 2009-06-19 15:23:17 3 4 476 0 190 502 91 144.30 22 40.45 CHANGED hsshhlhss.LhuQhsah..RsplsphhPphpshhpt..h...C..lGCpls...hslpslp..pt..shcphs.......sssslhlpuslhNpuchstshPtlcLslpDtpsp.lhp+sh..............P.tpalt.t........pst.....lsssps....hplpl......plp...ssss........pAssaclphh ..............................................................hshhhhhsLssQh.saht.ts..plstt.Pthp.shhpt...h........C...l.s..Cp......lss..hshptlp.h..ps...slcphs................ttsshlhlpuhlhNpushs.shPtlcLsltDtssphl.sp+hh.........................................tP.tpYltst.....s.......pst...................lsssps....hphtl.......plt...ssss........psssaplph.................................... 0 50 121 156 +11740 PF11907 DUF3427 Domain of unknown function (DUF3427) Assefa S, Coggill PC, Bateman A anon PFAM-B_1236 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 243 to 275 amino acids in length. This domain is found associated with Pfam:PF04851, Pfam:PF00271. 27.70 27.70 27.80 28.50 27.10 27.40 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.81 0.70 -5.45 60 723 2009-09-11 10:09:40 2009-06-19 15:27:28 3 13 675 0 139 602 88 256.20 25 28.44 CHANGED ph.lsppl....st.p.hth.hlt.hh.........................t.hthtph.p.lpp........t.........tphhphLphpaasspt......................hpht........hhplspphpphlpss...........thpp.lt-llphsl......pp.......t.......sslplapcYoRc-lhthls.hspspss.....htGhhhhps....pspslFlThpKs-c..hSsospYcDhhlspphF+WpSpsspshcSscspphlpppcp...Ghp...lhLFVRcps....uhsts.FhaLGps.phss......pp.................uppslshpacLc...pPlssslachhsp ....................................................................................................hhotphhsuh+.h-.hlLc.lh...........................................pp.p.hs..h.pp.h.p.h.pt...h.hspt.........shp...s...shphLshsFasss...........tph...........hlphpt........hhplspthpph..Lpsp.................tFpthlpDllchuh....pt........p..ttp.....ssLhLap+YoRc-hs+lhs...asps.tsus.........hhG.Yhhtpp.........phslFlTacKp-c..hssss.......pYcDpals..ppph+Who+ss+.slc.Spcsppllppctp...shp....lhlFV++pc.........sput..FaYLGps...phlp...ts..................tupssVshshpLc...pslpsclachh................................... 0 49 95 121 +11742 PF11909 NdhN NADH-quinone oxidoreductase cyanobacterial subunit N Coggill P anon Lonsdale D Family The proton-pumping NADH:ubiquinone oxidoreductase catalyzes the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 subcomplexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit [1]. The cyanobacterial NDH-1 complex contains additional subunits, NdhM and NdhN, compared with the minimal set of the bacterial enzyme and these seem to be specific for thylakoid-located NDH-1 of photosynthetic organisms [2]. 25.00 25.00 60.10 59.90 21.10 16.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.59 0.71 -4.89 20 98 2009-06-19 15:22:00 2009-06-19 16:22:00 3 1 89 0 45 90 139 149.60 50 87.57 CHANGED M...sLLh....oGppFh+........DLEpsGuLAlasPLEGGhEsRhLRRLRAuGYpohhhSARGLGDPpsaLhplHGVRPPHLG+pslGpsuAlGcV.hV.P.ls....pL..ssuKuLVLWlLEGpVLSpuELphLssLsppEPRLKlVlEhGGsRslRWpPLpph .......................MsLlhoGptFlcDLEppGuLAlYsPLEGG....aEGRYhRRLRusGYpshplSARGLGDspsaLhplHGVRPPHLGKpslu.ptuAlG.laalPP.lshpLppLsssuKGLVLWllEGpVLScuELpaLstLsph-P+lKVVlEhGGtRpFcWpPLpp.h. 0 8 29 40 +11743 PF11910 NdhO Cyanobacterial and plant NDH-1 subunit O Coggill P anon Lonsdale D Family The proton-pumping NADH:ubiquinone oxidoreductase catalyzes the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 subcomplexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit [1, 2]. The three nuclear-encoded subunits NdhM,NdhN and NdhO are vital for the functional integrity of the plastidial complex [3]. 25.00 25.00 31.40 47.00 22.00 19.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.12 0.72 -4.34 21 95 2009-06-19 15:36:07 2009-06-19 16:36:07 3 2 89 0 43 93 99 69.00 52 67.02 CHANGED lKKGuLVRVsREthpsSlEAtASDsphP.....sYlFEusGElLsl+....GDYAQVRa.phPsPsVWLRlDQLEsh ...lKKGsLVRVs+EphpNSlEAtASDs+hP.....sYlFEspGElL-l+.....G-YAhV+a.tlPTPslWL+lDQLct............... 0 7 27 39 +11744 PF11911 DUF3429 Protein of unknown function (DUF3429) Assefa S, Coggill PC, Bateman A anon PFAM-B_1072 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 147 to 245 amino acids in length. 39.10 39.10 41.90 41.30 34.60 37.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.95 0.71 -4.21 83 410 2009-06-19 16:34:30 2009-06-19 17:34:30 3 3 382 0 189 393 578 145.90 27 71.46 CHANGED tpsPphshh...LGhuGLlPFlhsshhhhhs....shtths...........................shhhhlsYuAlILSFLuGlhWGhuhptt..............st.shhthshullsuLhu......W.....sshhh....sshh......uhhhl.hhGalslh.hhDhthhpt....shsPsWahpLRhhLTsllshslhlshhh ...................p...Pp.shh..LGhAGLlPFlssslhhhhh.....sh.h.t............................hh.h.lsYGAlILSFLGGl+WGhuhstt........................sphsh.hphshu.llssl..hu......Wssllh.........ssth......ullhl.hhuFshlh..hhDtthhtt................thhPsWa.hthRhhLThlsshslhhslh.s........................... 0 58 110 145 +11745 PF11912 DUF3430 Protein of unknown function (DUF3430) Assefa S, Coggill PC, Bateman A anon PFAM-B_1305 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 209 to 265 amino acids in length. 28.60 28.60 28.60 29.00 28.30 28.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.36 0.70 -4.51 51 133 2009-09-11 05:55:51 2009-06-19 17:37:34 3 6 4 0 133 133 0 208.80 16 78.26 CHANGED MKl.....hh.llll....llhlhshh.................hssph...........lshh.Pap....sspCs....................................sss.GhG........as........hsp.Chths..................hhhhptssssshhphphhp..........ttsCt.....ts.hss..hshp....hssCht..................................................st..s.............hshhhhsss.ps.h.....sssoh....lthhh................................ss................pCsss.......shhhhtahssspp....l........ssshohpahCss......sh.shhphC............sssCpstsh......ph.sCs..tssshhtp.spt ...........................MKl......llll.lhhhhshh.........................ssspa......................lshp.sap...sspCs....................................sss.GhG..............ash...............hsp.Chsht................................h.hhtss.sssshh.php.hs............sCp.....ss.hss..psap....hspChp......................................................sst...........hhhhh.s.ss...ps.h......Pssoh....lhsha...............................................ss....................pCsss..................thhhhpahsssps.ht............ss.s.p.hhCss...s..sh..hs..........sssCps.sh........h.pCt............................................................ 0 98 133 133 +11746 PF11913 DUF3431 Protein of unknown function (DUF3431) Assefa S, Coggill PC, Bateman A anon PFAM-B_1346 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 291 to 390 amino acids in length. This protein has a conserved NLRC sequence motif. 33.10 33.10 33.30 33.20 32.70 32.80 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.70 0.70 -4.90 48 316 2009-06-19 16:41:07 2009-06-19 17:41:07 3 6 70 0 237 321 32 220.90 36 63.07 CHANGED spslVlAphpsED.ssWlpp...l...s-....appsIYsVD...........cs..sush........psPtNKG+EuM...........sYLoYIIDaY-pLPss.hlFlHup+....hsWHNDs..saDssshl+pLphs.hVpppGYsNLRC.....pasPG.CPspl+...Php.....t.s.........ppsscthhsp......uappLFssss.................................VPcslussCCuQFAVSR-plpcRP+p-YlcaRp.......Wllc.......................TcLsDpl.SGRVhEYlWHllF........utps..........laCPctppCYCcs ...............................s.tlVlAphpp-.s.ssWlpp...h.......s-.......aptsIYsVD.................ssps.h..........psPtNKG+EuM....................sYLoYI...IDpYDs....LPsh..hlFhHucc....................htWHNDs....thc.ss....hl..pp.Lphp..hlpc...pG.....YsN..LRC........pas.PG....C.P.spl+......Pht............t.t...........................pttpp.th...hsp..............satcLFssss.........................................................lPch.lussCCuQFAVoR-pl..pp+P+pcYhchRc........Wllc.......................TpLsDth....SGRlhEYhWHllF.................stps.................lhCPpt.t.CaCp.h...................... 0 33 85 167 +11747 PF11914 DUF3432 Domain of unknown function (DUF3432) Assefa S, Coggill PC, Bateman A anon PFAM-B_1326 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 100 amino acids in length. This domain is found associated with Pfam:PF00096. This domain has two conserved sequence motifs: YPSPV and PSP. 26.70 26.70 28.00 27.10 25.10 26.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.43 0.72 -11.15 0.72 -3.97 6 489 2009-09-10 16:59:27 2009-06-19 17:47:26 3 11 427 0 32 454 0 94.20 81 24.48 CHANGED A-KAussSosSs.l....ouYsssssoSYPSPl.TSYPSPV.TSYSSPsuSsYPSPVHooFPSPSlATTYPSsosTFQTQVhTSFPoSsVTNsaSS.VoTuLSD ...............V.EKAAPVSTASP.l....PAYSSSVTTS.Y.PSS...I.ATTYPSPVRTsYS.SPAPSSYPSPAHTTFPSPSIATTYPSG..TATFQTQVATSFSSPGVTNNFSSQVTosLo... 0 2 4 10 +11748 PF11915 DUF3433 Protein of unknown function (DUF3433) Assefa S, Coggill PC, Bateman A anon PFAM-B_1502 (release 23.0) Family This is a family of functionally uncharacterised proteins. The family is found in eukaryotes, and represents the conserved central region of the member proteins. 20.40 20.40 20.90 20.80 19.00 20.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.05 0.72 -3.82 166 434 2009-09-14 14:57:32 2009-06-19 17:52:00 3 9 77 0 373 464 0 90.50 22 15.65 CHANGED .pahhsalPsllushlthhapsl-hphptlpPahpL...pp.......susAppSlhls...Ysuphs.hhsshpAlp......pp.Hahlshsohssll.sh.hlsllsu.ul ..............................................h..ahhpalPsllushlthhaphl-hplphlpPahtL.upt..................sssAppolh..ls...Y..huths.hhsshpAlp......pt...Hahlshsshssll..s..hlsllsuu.................................................. 1 57 180 310 +11749 PF11916 Vac14_Fig4_bd DUF3434; Vacuolar protein 14 C-terminal Fig4p binding Assefa S, Coggill PC, Bateman A anon PFAM-B_1661 (release 23.0) Domain Vac14 is a scaffold for the Fab1 kinase complex, a complex that allows for the dynamic interconversion of PI3P and PI(3,5)P2p (phosphoinositide phosphate (PIP) lipids, that are generated transiently on the cytoplasmic face of selected intracellular membranes). This interconversion is regulated by at least five proteins in yeast: the lipid kinase Fab1p, lipid phosphatase Fig4p, the Fab1p activator Vac7p, the Fab1p inhibitor Atg18p, and Vac14p, a protein required for the activity of both Fab1p and Fig4p. The C-terminal region of Vac14 binds to Fig4p. The full length Vac14 in yeasts is likely to be a protein carrying a succession of HEAT repeats, most of which have now degenerated. This regulatory system is crucial for the proper functioning of the mammalian nervous system. 25.00 25.00 36.10 27.40 19.00 19.00 hmmbuild --amino -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.02 0.71 -5.13 38 357 2009-06-19 16:55:15 2009-06-19 17:55:15 3 16 279 0 254 343 2 162.60 47 22.35 CHANGED -cpLLcpRushIIRpLChhLss.........E+lY+sluplLpp........c...sDlpFsshMVpsLNslLlTusELtpLRppL+...t.t.pp....tshsLFssLapsWC+NsVuslSLCLLupsYEhAasllphhu.....-h.ElslshLlQlDpLVQLlESPlFshLRLQLLEPc+a....PaLhKsLYGLLMlLP.QSsAFphL+sRLpsVs .....................................................ppLLEtRGshII..RpLChhLss.......EpIa+shuslLtp..............................-.pDlcFAShMVpsLNhlLlTusELh...........pLRppL+....................shpspp....upslFssLa+SWCHNsluslSLCLLs.QsYcpAhsllphhu.......-l..EloVshLhplD+L.....VQLlESPlFsh.................LRLQLL..-sp..p..h.........PaLhKsLYGLLMLLP..QSsAFthLppRLpsVs................. 0 94 148 215 +11750 PF11917 DUF3435 Protein of unknown function (DUF3435) Assefa S, Coggill PC, Bateman A anon PFAM-B_1788 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 435 to 791 amino acids in length. This family is related to Pfam:PF00589 suggesting it may be an integrase enzyme. 25.80 25.80 26.50 26.00 25.50 25.50 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.21 0.70 -5.75 20 446 2012-10-02 14:09:14 2009-06-19 17:59:53 3 11 52 0 340 472 0 248.00 16 52.15 CHANGED lEDlhphh+shlsTsctpFhhGhpRlQLsLhhhLushTusRPsAlLp...........Lpa+DltloLh+sPcG.usshhhl-lpscasKpah.Gtpph....Ns...............FhlPElIa...-PoLlLsP+saLlulLFttpAFps......slsosccLhp.LpVsssptph.L.L+schhDhalFp+s.hpstthcI...spshohsshpshl+phGEIsGFppshpsYphRhGuuctlspSt.lo-upcNllhpH.AsscTF.caYhspplcpDhQulhpGhsspctlhRhssphSRolDsRRPpcLosppctslccpPclpchtccpccL+p..................paGpstcsptss......................hhpchp+tppclpssRpRhpcch++ch+ccFspcQsllDIERQLS..Gpslcp-.hppshppsphhPPpph+LlcpLhshP.upol--E..hpRRhpulcAVsth ....................................................................................................................................................................................................................................................................................................................t..hh............t.t..h......................l.....hc.ph...ph.lhp........t...........h..t.......t.s...s.....h..h.hp...h..h............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 20 91 252 +11751 PF11918 DUF3436 Domain of unknown function (DUF3436) Assefa S, Coggill PC, Bateman A anon PFAM-B_18 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF03572. This domain has two conserved sequence motifs: DPRL and SYEP. 27.00 27.00 27.20 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.67 0.72 -3.73 47 2173 2009-06-19 17:05:20 2009-06-19 18:05:20 3 4 1540 0 25 2173 0 53.40 58 14.49 CHANGED ApVLTAGVQoSLNDPRLhISYEPohlp.P.ttsPthssLo.EpLlAhLQpuI+a-l .ApVLTuGVQoSLNDPRLhISYEPSsl.E.sP...p.Q.s.PsLosLTpEELLAhlQcsI+aEV.... 0 1 3 10 +11752 PF11919 DUF3437 Domain of unknown function (DUF3437) Assefa S, Coggill PC, Bateman A anon PFAM-B_1910 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 142 to 163 amino acids in length. 21.20 21.20 21.50 23.10 21.10 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.99 0.72 -4.27 32 338 2009-09-17 10:53:11 2009-06-19 18:08:46 3 8 258 4 240 340 5 89.90 40 5.22 CHANGED pphhphHuulLGLuAlVtAFPYtsP.................WlPphLsp.Luphup.c.ss.ltpos+cslu-FK+s+...pDoW.phcpptFoc-..pl-sLc.sl.............lh.oYas ..........t..lhp+HuuVLGLuAhl.uhPYssPs................WhPplLhp.Lus+s......s..D...P........s....sltpolKcslu-F++TH.pDsW...p.cpptFTp-..QLpsLp.sl....l.ssYaA....................... 0 75 131 205 +11753 PF11920 DUF3438 Protein of unknown function (DUF3438) Assefa S, Coggill PC, Bateman A anon PFAM-B_1942 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 276 to 307 amino acids in length. 25.00 25.00 25.30 25.20 24.90 24.70 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.84 0.70 -5.56 25 270 2009-06-19 17:10:54 2009-06-19 18:10:54 3 3 195 0 68 269 20 253.70 42 93.79 CHANGED h+hhlhhhslhhshsss.ApAsElh+WERlPLslPLpVGpERlVFlD+NVRVGhPss..lsu+LRVQSsGGAlYL+AsEshssTRLQLQDscoGElILLDIsApsttsspsshEPVRIVhusssss.p.t..........sssussusssuss......ts.pts+hpsPlPVVLTRYAAQsLYAPLRTVEPVsGIppVsl+hshcLoTLh..PohPVcAssLuuWpL-shhVTAl+LpNpuspplsLDPRtLQGpFluATFQHshLGPtGsscDTTsLYLVTcG+uhspALlP.pht.hc..........shstp..................sspp .........................................................hh.....hhhh.hhh.hhs.hu.pA....sElh+WERhPLslsLpVsQERlVFlD+...NV.RVGhPss..Ls..s+.L.R.lQSs.G.GAlYLpAppsh..sTRLpLQsspsG-lILLDlsApt....spt.hE.PV+lV.sspsss...t...............ttttttt.st..t.............t..ps..sphpsPlPVsLTRYAAQpLYAPLRTVEPlsGltpVslchshslsTLh......PshPlpussLuuWp....lsshhVTAl+LpNpu.uptlsL...........D.PRtLp....G....pFhsATFQHth..LGstGs.spDTTslYLVTcG+s.spuhls.t...................h............................. 0 8 32 55 +11754 PF11921 DUF3439 Domain of unknown function (DUF3439) Assefa S, Coggill PC, Bateman A anon PFAM-B_1105 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 46 to 94 amino acids in length. This domain is found associated with Pfam:PF01462, Pfam:PF00560. 27.10 27.10 27.10 27.10 27.00 26.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.87 0.71 -4.36 20 546 2009-06-19 17:20:45 2009-06-19 18:20:45 3 27 7 32 0 565 0 67.30 55 33.02 CHANGED oCPGIcYLScWIpKNSull..h.ts.s..ssPDSAKCSGSGKPVRSIICPTTTTTTTTTTTTMPTTTTLPTTTKMSMVKVPLVPPEAFGRVMNACAYFPSYIFLHLVHGLAAVPLVYLVCHASQLL ......uCsDIhYLScWIupHsGlV...hp.t...s..sh....ssPDSA+CSGTNTPVRAVh........ps....oh..............................................................................s.............. 0 0 0 0 +11755 PF11922 DUF3440 Domain of unknown function (DUF3440) Assefa S, Coggill PC, Bateman A anon PFAM-B_1674 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 53 to 190 amino acids in length. This domain is found associated with Pfam:PF01507. This domain has a conserved KND sequence motif. 25.00 25.00 34.30 26.10 24.30 21.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.13 0.71 -4.72 18 1399 2009-06-19 17:24:49 2009-06-19 18:24:49 3 7 655 0 74 628 8 100.50 38 40.47 CHANGED GVslppMRVspPFtsptpcuLpLY+lIEP-TWu+hluRVsGsNFuulYGsopu..hGa+s.lpLPcua.TW+pYshFLLcTL...Pcph+ppYhcKlpl.lpaW+p+.G...GsLs-...................................ch.cDl.tl.phR-lPSaKRhChsILKNDahC+hhuFu.TKp-ttc+ .............h..t..RlspPa...t.psL..ht.l-sthW.thhtRVtGs..sshYstp......huh.t...h.hP.th.sWppa..hLLpsh...s..h.tt..Y.p+hth.htaatpp.......h.......................................................sa+thshsl.ppDh.hp.htas.sp.......................................... 0 15 36 47 +11756 PF11923 DUF3441 Domain of unknown function (DUF3441) Assefa S, Coggill PC, Bateman A anon PFAM-B_1795 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in archaea and eukaryotes. This domain is typically between 104 to 119 amino acids in length. This domain is found associated with Pfam:PF05833, Pfam:PF05670. This domain has two conserved residues (P and G) that may be functionally important. 26.30 26.30 28.40 26.80 26.10 25.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.55 0.71 -4.49 39 309 2009-06-19 17:27:43 2009-06-19 18:27:43 3 16 254 0 231 326 2 108.90 35 11.46 CHANGED p.stsppstsphhphlspLsupPpssD.plhtslPVsAPasAlp..caKYKlKlpPGss.KKGKuspphlphF....................................hpstp.....................hspEtcll+sl+...sp-lstsls.ucl+....lshs .........................t....tttttpphshlssLsGpPhspD.plLhAlPVCAPasulp..pYKYKVKLpP.G.ss.KKGK..A...sKpslshF................................................hpspc..................h.ss+Ep-Ll+ul+...-p-lhpsls.GKVKlsh.................... 0 87 138 194 +11757 PF11924 DUF3442 Protein of unknown function (DUF3442) Assefa S, Coggill PC, Bateman A anon PFAM-B_890 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 334 to 948 amino acids in length. 29.20 29.20 29.20 29.20 29.10 29.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.79 0.70 -4.82 50 2024 2009-09-11 14:47:36 2009-06-19 18:35:19 3 70 725 2 124 1347 552 264.00 39 30.81 CHANGED phhspts..pphssthpsp.....ssptspphths............hhsspsssplpphL...sthups....plslshss.chphpssplshl.....hshh...-sspp.LhFsQhuhpp....p.sscsssNlGlGhRp...h.t...ssah.lGhNsFaDh...-hsps.ppR.hGlGuEh.apcahchsuNhYhslosh..+sstshp.thp......................E.RsA.sGaDlphphhLPthPp.....husplhappahGcp..VslFsp...sphppsPpuhshulsYsP.lPllolss....ph.ppupuspspsphs....lplsaphGt.shppQls.stls.......tcsl.tspRa.-hVcRsN .......................................................................................................................hAp.u..tshGphhtsp......su-tA+shthG...........hsospsspplpsWL....utaGsA..........pVsLplsp..phphcs.S.ph-hh.......hPhh...Dspp......LhFo..Qhuhpp.....p.DcRhhuNlGlG.Ra..h.t.......ssWh.lGhNsFhDp...............Dhsps..ppR...hGlGuEh..WpDYL+hSuNhY..h...houW...+cu.shc...ap....................................p..RsA.pGaDlcspuhL..Ps.aPp......Lu.uplhaEQ..YaGDp.....VsLFsp...............sshp....+....sP..tAlohGl....s....Y.......T.....P.lPLlTlss........pa..+pGpuupscsphu....lplsYphGp..PhpcQLcsppVs......t....tcoL..tGSRY.DhVpRNN............................................................... 0 27 46 86 +11758 PF11925 DUF3443 Protein of unknown function (DUF3443) Assefa S, Coggill PC, Bateman A anon PFAM-B_1634 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 400 to 434 amino acids in length. This protein has two conserved sequence motifs: NPV and DNNG. 25.00 25.00 25.60 28.20 23.20 24.60 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.10 0.70 -5.78 24 153 2009-06-19 17:37:27 2009-06-19 18:37:27 3 2 74 0 49 152 4 356.40 44 86.77 CHANGED ssssssuNssslTVs.sussssh...NhPhVSVTlCsPG......o.o..pCpTlDsVLVDTGShGLRlhuoAl.sul.ssLPtpos......uuuslA.ECspFso.uaTWGsV+pADVplGuE.p.........AuslPlQlIuDss.ssslPs..sCsssGs..stsosssLGANGILGIGshstDC..GssCss...ss..hsusYYsCssu..soCsssplPlupQVsNPVstF.usDNNGVll.phPslu.ssGtsSsoGpLlFGIGTQsNNslsu..sshlsoss...sG..hos.sapGp....shss..FhDS...GSNuhFFs.ssulss.....Cu...s.ssa..YCP....soshsloAolsussG.s.ousssFslu.NAssLhuss..shAassLuGs.h....s.sshaDhGLPFFY.........GRsVYhulcpss.ssG......sGP.alAF .......................s.sssuuNshslTVs...sGssssh.....NhPhlSVTlCsPG.........T.o........pCQTIsNVhVDTGShGLRllsoAl.uul..uLPssss........uGusls.ECutFso.ua..TWGsVRpADVpIuGE.h..........AuslPlQlIuD.u....osssPs.........sCosuuu..shsTsusLG.ANG........ILGIGsh.shDC..GssCss.........os....h.usYYuCssu.....soCss..TosPluQ...QVsNPVstF.AsDNNGVIl.phPslu.ssGuu.SAoGoLlFGIsTQuNNsLsu..ssll..soso...sGs....lou.shpGp.....shss.......s....FhDoGSNuhFFs..su...u...hst..............Cu....stuaYCP....uSopohouTlsutNG.u.ssssohslu.sussLFush..shAhssLuGsh......s.ssh....hDhGLPFFa.........GRslYhuh-tsu.suG......suP.YhAF........ 0 6 21 32 +11759 PF11926 DUF3444 Domain of unknown function (DUF3444) Assefa S, Coggill PC, Bateman A anon PFAM-B_1267 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 210 amino acids in length. This domain is found associated with Pfam:PF00226. This domain has two conserved sequence motifs: FSH and FSH. 26.10 26.10 28.70 28.70 25.40 24.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.60 0.70 -5.13 29 318 2009-06-19 17:39:28 2009-06-19 18:39:28 3 14 19 0 231 311 0 192.20 32 39.01 CHANGED ppssssssphssPDs-FaDF-c.RsppsFtssQIWA.lY.DspDGMPRhYAhIc+Vhss.sF......+L+loWL-sps..psE......pWhstshshu.CGcF+l..upspthcshshFSH.h..hshcpsp+sthpIaP+KGElWAla+sWs.sWsss...t.scphacY-hVElLs.sassp.tGlsVs.LsK........lcGFpolFppptpps.....hhhIPpcEh.hRFSHplPua+hoGcEtp ..............................tt........hshs-s-FasF-ppRs..ppFtssQlWA.hY..D.s.DsMPRhYuhIp+Vhs.....sF.................clploaLcsps..psc....h..........pW.hpp.shshu.CGpFch....sp.p.p.........h.ps.hs...h.FSH.l...............thpts..t............p............t............h............hpIa............P++G-lWAl....Y..+s........Ws...p.Wstp.......s.....phhhcY-hVElls.saspp...tGlpVs.LhK.............................lp.Ga..tolFppt.ptt............hhhIsppEh.hRFSHplPuaphotpc..t............ 0 30 117 172 +11760 PF11927 DUF3445 Protein of unknown function (DUF3445) Assefa S, Coggill PC, Bateman A anon PFAM-B_501 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 264 to 418 amino acids in length. This protein has a conserved RLP sequence motif. This protein has two completely conserved R residues that may be functionally important. 25.00 25.00 28.70 28.40 22.90 24.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.64 0.70 -4.99 75 510 2009-06-19 17:41:17 2009-06-19 18:41:17 3 8 251 0 334 538 505 230.50 26 64.60 CHANGED hulps.....hc..h.ppWlcl....D.ppYhsphsp+pclLpc.cscpshsh..........................PcupsAshELh-hlhp.aLsp+YP.phFph.......ptss.............hpNphsGpphshshs.........................s......................P.....Lphluc.lpEDhsllhpc.................................sspahLpAuslsFsus.WslspKlGhs......LstIH..sPVPtYc..pclppuh..................p+aFp+lpsscPlpRhNWslps...cs.pLhhss.......tphtphs.t..........ttptpssscphalRsERQoLpRLPcSsAllFoI+TYhhsls-...lpp ...............................................................ul..ht....pphlpl...D.ppYhppht.+tplltp.csppshth....................................Ppsh.A..shEl.hchl...........hp..aLst+YP.ph.Fph.............pts.s.......................hpNphhupp.hshs.t..........................................p...............................................P.......Lphluc.lp-Dhsllhpp...........................................sspahltAuslshsus.Wshpp+hGhs......ltpIH..sPV.Ptac..pplttu..h..................p+Fh.p+lps...sp.slpRhN.....Wslps.................ss..pLhhs...................tthtp.s..................p.ptp.shpphalRsEhQoLhRL.Pp...SsAllFsl+sahhslpcl...................................... 0 69 177 275 +11761 PF11928 DUF3446 Domain of unknown function (DUF3446) Assefa S, Coggill PC, Bateman A anon PFAM-B_833 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 80 to 99 amino acids in length. This domain is found associated with Pfam:PF00096. This domain has a single completely conserved residue P that may be functionally important. 25.00 25.00 28.80 28.10 22.90 22.30 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.15 0.72 -3.79 13 977 2009-06-19 17:42:26 2009-06-19 18:42:26 3 16 615 0 79 862 0 79.10 48 22.95 CHANGED G+Fol-Pss..ssusShW..sEsllSLVS.GllGhsssPuo................ossSSSS.......oSuSotSssL.......Ssolppopss..sIY.S...usPsYSossuDIas- .........G+Fuh-ss......us..ohW...PEslhSLVS.GllGMsssPso.....................oosSSSS.........sSuSs.t.SssL.........SCo.sp.tS.-ss.....sIY..S....AsPsa...sss.suDIaPE...... 0 3 10 30 +11762 PF11929 DUF3447 Domain of unknown function (DUF3447) Assefa S, Coggill PC, Bateman A anon PFAM-B_10 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00023. This domain has a conserved SHN sequence motif. It seems likely that this region represents divergent Ankyrin repeats. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.96 0.72 -4.17 607 1243 2012-10-02 12:10:21 2009-06-19 18:47:56 3 305 1 0 1242 1262 1 73.30 47 16.44 CHANGED Fs.s.c.....IT.p.pCL.phSFLGGNs-IhsECL.K.....h..p..cPs....p.......pC.McaAI.lSHNIDF.VoF..L........hN..E..Y....sl.....c...I.s..........lp....Cs...h......apNLc.....uF....llY .....................ap.scIT.ppCL.phSFLGGN.-IhsECL.K..............h.p....cPs.........p..pC.McYAIISH.NIDF.VoF..L........................hN.E........Y...sl.........c....I..s........................Lp...........Ct...t.apNLc.uFlla................................................ 0 1242 1242 1242 +11764 PF11931 DUF3449 Domain of unknown function (DUF3449) Assefa S, Coggill PC, Bateman A anon PFAM-B_769 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 181 to 207 amino acids in length. This domain has two conserved sequence motifs: PIP and CEICG. The domain carries a zinc-finger domain of the C2H2-type. 49.60 49.60 73.20 72.20 44.10 43.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.47 0.71 -4.80 37 347 2012-10-03 11:22:52 2009-06-19 18:56:47 3 14 298 1 237 326 6 185.00 48 37.88 CHANGED uhtEatlpphsphLspphppThpplcRKpuhTtcEppt.................................................tt..spssppss............................pscsc.p-p-p.laNP.hpLPLGaDGKPIPaWLYKLHGLshpapCEICGNhsYhGR+AF-+HFsEhRHsaGh+CLGIs..sottF+pITpIc-AhpLac+lp.............ppppppph..........csppt..EhEDscGNVhscKsYpDLK..+QGL ........................................................................................................................u..hEhplhc.hschLs.cphptT+pNVpRKQuhTtcERpt.........................................................................pp.pppts.............................pscs-..--..--chlYNP.hpLPLGWDGKPIPYWLYKLHGLslpasCEICGNasYhGp+AFc+HFs.........EhRHsaGhRCLGIP.....NTspFtsITpI--Al.p.LWcKlc...............pppppp..ch...............ps-p-..EhEDupGNVhscKsYpDLp+QGL........... 0 83 133 197 +11765 PF11932 DUF3450 Protein of unknown function (DUF3450) Assefa S, Coggill PC, Bateman A anon PFAM-B_773 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are about 260 amino acids in length. 26.90 26.90 26.90 26.90 26.60 26.60 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.38 0.70 -5.11 59 359 2009-06-22 08:13:35 2009-06-22 09:13:35 3 6 256 0 117 325 524 242.50 31 91.02 CHANGED hphsh...........sll....sshhhssus.sst.........................sssls..pshpttsptspsutpSQp+I-phs-pspshht-hcpltccl-sLcl...YssplpchlssQppclsuLpppIsplpphcptlsPLMhcMl-sL-pFlphDlPFht-...cRtpRl.ppLcshMscu-losu...EKaR+lLEAYQIEh-YGpplpsYpupl.sl..sGpphpV-hL+lGRluLlhpohDuppshhWsspsp....pWps.Lss.phtsslscAlclAccQtsssLlpLPl..................ssE ............................................................hhh......hhhhhhsss...sst........................As..sLs..psp.shpsp.tspuu.sp.SQp.+IDphs-pstphhsEhcplpcElcsLpl....Y.pspLpp..hVs..sQpp-hsuL.......ppQlpplcpocpslVPLM.hcMl-u.L.cpaVt.Dh.Phphp.......pR....ppRl....pcL.cphhscADVosu...EKaR+l.LEAYpIEh-YGpplssYpupl.sl....supplpsDhLpLGRluLlhpohDupphhhWssppp......tWpt..lss...s.tpslspuhplApcQhsPsLlpLPl......t.................................................................................................. 0 40 72 94 +11766 PF11933 DUF3451 Domain of unknown function (DUF3451) Assefa S, Coggill PC, Bateman A anon PFAM-B_877 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 199 to 238 amino acids in length. This domain is found associated with Pfam:PF06512, Pfam:PF00520. This domain has a conserved ADD sequence motif. 27.50 27.50 27.80 27.80 27.30 26.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.66 0.70 -4.55 27 544 2009-06-22 08:19:43 2009-06-22 09:19:43 3 20 95 0 211 695 0 216.20 37 12.31 CHANGED SchSphS.cshpppc..............ptppcppccchp.tpEcscs-ph.+pcspss.pp.t...........ph.++h.......pps.LSlsGSsFssRRsS+u.SlFo..hR....sRt+psGs-s..................-aADDEpSsst.....S-p+........GulhlPhh...p+poutsShs.....S+uS+l....hsspG.....chh.u.sh-sNGhsshhttsthhs.......s.tphlPt...t.......sshp-sphpp....p.psh.hs..shlpp..sttc..p.pshstssh ................................................................S-hScLSSKSsKEcR....................st.pc..c.++.p+c.ptt.pEc.t.-s.-.c.h.K.S-S..--uh++psah...........................................p.sht++hs......................pQo.LSl.....GS....F..ss...R.RsS+s.SlFS...FR.....GRs+DlGSEs......................................-FADDEaSsht..........-sEuR.......RsSLFlPhth.tcRRsSp.....S.s.........S+uS.+hh..........hshsG......Khp....S.sVDCNG.VVSLlsussshs....................ssupLLPt..............................ssos-sph+p....h.sshtho.h-hhpc...ttRp.pshp....h................................................................................ 0 15 25 102 +11767 PF11934 DUF3452 Domain of unknown function (DUF3452) Assefa S, Coggill PC, Bateman A anon PFAM-B_1048 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 124 to 150 amino acids in length. This domain is found associated with Pfam:PF01858, Pfam:PF01857. This domain has a single completely conserved residue W that may be functionally important. 26.30 26.30 26.30 26.40 24.90 26.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.69 0.71 -4.85 35 334 2009-06-22 08:24:09 2009-06-22 09:24:09 3 6 142 2 170 321 0 135.60 30 14.91 CHANGED .stssh..........t..ttss......loLoplL+us..+lslhcFFcclppas.chssl.sshass................plccLcpsashhtsLh++acchapplFhss.............stp..p.............psssphscla+FuWlLFLhh+sc.shphhcDLVsshpLLlClLslllhp ...................................................ttsss.......t..psss...loLTclL+ss..clS............lhpF.Fpc.h+ca....chssls.phhpp..............................................................phccLc+sasV.slla+.Kac..hFpc...lFtss.......................................pp..........................hptssssp.clacFsWhLFlhsKup..hh.htcDLV..sS...apLLLCsL-llhtp.......................... 0 42 67 113 +11768 PF11935 DUF3453 Domain of unknown function (DUF3453) Assefa S, Coggill PC, Bateman A anon PFAM-B_1335 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 239 to 261 amino acids in length. 25.00 25.00 28.40 26.70 24.00 23.30 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.46 0.70 -4.96 30 316 2009-09-16 16:45:01 2009-06-22 09:26:07 3 9 245 7 222 311 2 219.90 24 24.22 CHANGED Lhcsp..DssVhKplI.sssslYtllhcal........hssssspphWsphstlKspIlsh.acos.hP......................GlKlsslKFlpplllsQopsss.st...........p.t.ps-hSLupVPssHsllphp.sLEtEuppLL-.tLLshhtpss.....................hsusllsAslNs...............................Luhlh+pRPph...ss+llsulhsass.sh......tss..h+lph+sV-+sl+hhlhphLK.pphs...phsuplpphls...chupshtclscps.p..cpsh..sccspcshc .........................................................pp..sstVhKpsI.shsplY.hshphh.....................ss.tpthWphhsthKspIlph...hcss.ss......................Gl+hpslKFlptllls.o.tssss............................p.pp-hSLs.hlP....t.s.H..shlpht.tLptEup...t.hL-..hLLphhhpss.........................hsuh.lhsslss..........................................................................Luhlh+pRP.h...hs.cllp.ul.shpssh..........ss..t+splpSlc+sh+hhlhpllK....psss..........hpspl..pphlt.............clutstt-..lhcph.p...tpth...tp......p.......................................... 0 68 123 184 +11769 PF11936 DUF3454 Domain of unknown function (DUF3454) Assefa S, Coggill PC, Bateman A anon PFAM-B_1847 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 60 amino acids in length. This domain is found associated with Pfam:PF00066, Pfam:PF00008, Pfam:PF06816, Pfam:PF07684, Pfam:PF00023. 29.10 29.10 30.00 30.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.79 0.72 -3.77 19 235 2009-06-22 08:32:58 2009-06-22 09:32:58 3 173 80 0 93 184 0 64.00 58 3.08 CHANGED sppa.TPPSQH..ss..ut.-sT...PsH.lp.....VP-.HPaLTPSPESPDpWSSSSPHS.hSDWSEGloSPss ....................stpa.TPPSQH.....uuuh....-sT........PsH..p.lp..........lss.HPYLTPSPESPDpWSSSSPHS.sSDWS..-us.oSPs.. 0 12 21 51 +11770 PF11937 DUF3455 Protein of unknown function (DUF3455) Assefa S, Coggill PC, Bateman A anon PFAM-B_1386 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 174 to 251 amino acids in length. 25.00 25.00 27.70 25.20 24.40 24.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.63 0.71 -4.36 40 358 2009-06-22 12:19:23 2009-06-22 13:19:23 3 4 227 0 214 360 21 143.70 28 72.58 CHANGED Puup.plthsshGcGsQsYpCps.....sssssh.............................shl..GshAs................................LaDs...uupt.....................................................................................lG+Ha....st....sP........sachp.....sG..SplsGpphu.........sAPsss.........uslsaLhLsupst........Gshssls.V.RlsTsGGsuPs..sCsst...spphpVpYsAcYhFas .....................................t.phhh.shuhGhQsYpC.......stssth.............................hhh..us.As................................Lhct....s...............................................................................................lGpHa.........s....sP.............sachp.....ss......st..hhupths.........................sAPsss............sslsaLhLpsps.........Gshsslphl.Rl..sTtGGs.sPs.....sCsst......s....tthtV.YtApYhFa................................ 0 52 118 175 +11771 PF11938 DUF3456 TLR4 regulator and MIR-interacting MSAP Assefa S, Coggill PC, Bateman A anon PFAM-B_1799 (release 23.0) Family This family of proteins, found from plants to humans, is PRAT4 (A and B), a Protein Associated with Toll-like receptor 4. The Toll family of receptors - TLRs - plays an essential role in innate recognition of microbial products, the first line of defence against bacterial infection [1]. PRAT4A influences the subcellular distribution and the strength of TLR responses and alters the relative activity of each TLR. PRAT4B regulates TLR4 trafficking to the cell surface and the extent of its expression there [2][3]. TLR4 recognizes lipopolysaccharide (LPS), one of the most immuno-stimulatory glycolipids constituting the outer membrane of the Gram-negative bacteria.\ This family has also been described as a SAP-like MIR-interacting protein family. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.16 0.71 -4.22 23 696 2009-06-22 12:34:13 2009-06-22 13:34:13 3 19 141 0 392 667 1 101.80 23 50.13 CHANGED ssKCpsC+hlssELc.ulscss.++c...l..tuh+lsscGcpcs+ph................ppSEhRLhEllEslC-+hp-Yslpp.csspp+hhhths.s.tphtt..s..th..............ss.sssLcttCcpllEcaE-plt-aatpcp.t..tslpchLCpcp......sptCp ....................s...Ct...sCph....ls.ph....t.thtph....t.pp...................thp.t.p.pt.th...................t.SEhclhEhh-t..lCp.p...............................................................................................................h.tCpthl.Ep..E-..l.phhhp.....tt.........pl.phlC.p........t.C................................ 2 109 147 264 +11772 PF11939 DUF3457 Protein of unknown function (DUF3457) Assefa S, Coggill PC, Bateman A anon PFAM-B_1822 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 162 to 197 amino acids in length. This protein has a conserved CSL sequence motif. 25.00 25.00 25.20 26.50 21.70 24.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.77 0.71 -4.71 38 586 2009-06-22 12:36:17 2009-06-22 13:36:17 3 2 578 1 62 233 16 150.70 64 90.57 CHANGED thsshsttLtusaccltpspM+-lPlhN.suLsVcAlGFp.........hap...G.ph......lGlllTPWFMNLVhlP.....s..sspsass.hssGs+tsls.hPuGsa-Flsuphs.......slGsapuCSLFSPMhcFssptsAhssAcAsLssLhs........Ptppss...................................................s.........st.hsR.RshLpGth.s ................Fp.TuPtApVQAAFEEIA+RSMHDLSFLH.PoMPVaVSDFT............................LFE.G.QW.......TGsVITPWMLSAVIFP..............G.PDQlWPh.RcVuEKlGLQ.LPYGTMTFTVGEL..-....................GVSQYLSCSLMSPLS+S.hShEEGpRLsDDCARMlLS.......LPVoNPD............................................................s..PcsuR.RALLFGRR.S............. 0 12 32 45 +11773 PF11940 DUF3458 Domain of unknown function (DUF3458) Assefa S, Coggill PC, Bateman A anon PFAM-B_160 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 402 to 419 amino acids in length. This domain is found associated with Pfam:PF01433. This domain has a conserved FSAPV sequence motif. 20.90 20.90 20.90 20.90 20.60 19.90 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.24 0.70 -5.61 177 1678 2012-10-11 20:01:02 2009-06-22 13:40:18 3 12 1582 20 408 1377 673 363.90 42 42.45 CHANGED sspp....asLslpQpsPsTssQ......s..p....KpPhhIPlsluLl.ss.s.G..pt....hshp..........................p.pVLplsp....scQoFsFp..sl...s.......pcPVsSL..LRsFSAPVpLch.shosc-LhaLhtaDoDsFsRW-AuQpLhtphlhphlsphp...........tt..t......lss.....t.hlsAhpslL...tc...ssl..DsAhhA.hLsL...Pocsplupth....p..ssDPsulatuRctlppplAptLpsphhtlYp..ph......tt..sap......sspshGpRuL+NhsLsaL....sttss.sts...phshpQapsA....s....NMTDphuALssLsps..p........sp..ppp.....tLssFhp+Wpc-sLVhDKWFulQAsssp..s.....ss...lppVcpLh.pHPsFshpNPNRlRuLlGuFu.sNsssFHstDGuGYpFlu-hllcLDshNPQlAARlh ...............................................p.sppYoLTlpQ...p........s...P...s.....T.....s.sQ......s.....-.........KpP...LHIPhsltLh.ss.c....G.cs...lsLphsG........................pssssV..LslTpuEQTF..sFc....sV.......h.........t..p...P.V.PSLLRpFSAPV+L-a.....c.aS-ppLsFLhpHspssFsRW-.AuQpLhsphl...ttslschp....................pG..ps..........lsl.ss..........c.lh-Ah+sVL...h.D........cpl......DPAhtAplL....oL....PSts.-lA-hh...............chlDP.AItpsR-sl..pcsLAs.pLt.--hhulYp.....st............t..ps..tYp...........s-ttshu+RsLRNssLpaL.......uh.s.-...........s..t..us.............s.LspcQacpA...........s.............NMTDplAALuAhlsu....p......hss......R-t....hhtcFpc+W+..pDuLVMDKWFhLQ.AosPs....s............s.s.....L-.pV+.tLh.pH..u.F..ohpN..PNR.................lRSLIGuFusuNPssFHAtDGSGYpFLs-hl...hcLsshNPQV...AuRLl............................................................................................................................................... 0 111 235 329 +11774 PF11941 DUF3459 Domain of unknown function (DUF3459) Assefa S, Coggill PC, Bateman A anon PFAM-B_897 (release 23.0) Domain This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 110 amino acids in length. This domain is found associated with Pfam:PF00128, Pfam:PF02922. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -10.14 0.72 -3.86 120 2445 2012-10-02 20:10:03 2009-06-22 13:42:40 3 16 1643 30 652 2023 110 89.70 22 15.94 CHANGED haRpLLsLR+pc..PtLsss....phsssp.s.hhussslhspa+h.......sss.......tLt.lhhNLus...pssshs...........ssuclLausssstt..................sspLss.....tusshh ......................................YpcLlsLR+pp.......PhLs.tu............php.s..lt....s................s........s....s...s..h....l..hhhcch.............psp...............sLl.llsN...los.............pshshs.................tsstp.l..l...h.ss..ttst.....................shpL.s.hphhh.h............................................... 0 172 395 542 +11775 PF11942 Spt5_N Spt5 transcription elongation factor, acidic N-terminal Bateman A anon Bateman A Domain This is the very acidic N-terminal region of the early transcription elongation factor Spt5 [1]. The Spt5-Spt4 complex regulates early transcription elongation by RNA polymerase II and has an imputed role in pre-mRNA processing via its physical association with mRNA capping enzymes. The actual function of this N-terminal domain is not known although it is dispensable for binding to Spt4 [2]. 24.20 24.20 24.30 24.20 23.90 23.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.89 0.72 -3.37 23 279 2009-06-22 13:40:14 2009-06-22 14:40:14 3 21 236 0 194 277 0 95.10 31 9.27 CHANGED FlDhEAEVDD-E-E--.-p-t..s-..ptt.hhtph.............pcc+pp+ph-cccppppctssccls-hl.+..cRY.......tcp.....sstpstpsssshlspp...hLLP ..........Flt.EA-VDDE..-E-E-..-..E-.tsp-l..hp.t....tt..lcpst..............................................................hppD-pppR.chs..pphpp.pp.c.stEcluchh..+..c+Yucpp...........shttuust.usslsQp....hLLP.................... 0 53 97 162 +11776 PF11943 DUF3460 Protein of unknown function (DUF3460) Assefa S, Coggill PC, Bateman A anon PFAM-B_2362 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 70 amino acids in length. This protein has a conserved WDK sequence motif. 25.00 25.00 52.00 51.90 22.90 22.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.00 0.72 -3.89 22 216 2009-06-22 14:40:06 2009-06-22 15:40:06 3 2 207 0 69 136 18 60.20 49 91.95 CHANGED s.YpS-hTpFlspLKtc+PpLtpcQ+tGRALLWDK.slDh-tptchcsu+VsQ+PYVYpsp ....YpS-hTQFLspLKppKPpLEpcQpcGRuLLWDK..slDhE.Eppp.hptA+VsQ+PYVY.pp.. 0 9 37 53 +11777 PF11944 DUF3461 Protein of unknown function (DUF3461) Assefa S, Coggill PC, Bateman A anon PFAM-B_2125 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 130 amino acids in length. This protein has two conserved sequence motifs: KFK and HLE. 25.00 25.00 58.30 50.50 24.60 24.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.65 0.71 -4.28 11 664 2009-06-22 14:41:40 2009-06-22 15:41:40 3 1 661 0 62 158 6 125.00 83 97.46 CHANGED MYssLcolGIssscpIE+YTLRpEuspDILKIYa+KpKG..-hFuKSlKFKFPRQpKoVlsDuGu.psY+ploEIussLphll-ELDplsp..+p+p-sDlKpclLcDL+HLE+VVssKIsEIEccLEcL ..MYDNLKSLGITNPEEIDRYSLRQEANNDILKIYFQKDKG..EFFAKSVKFKYPRQRKTVVAD.GlG.QGYKEVQEISPNLRYlIDELDQICQ....RDRS.....EVDLKRKILDDLRHLESVVoNKISEIEADLEKL.. 0 6 18 40 +11778 PF11945 WASH_WAHD DUF3462; WAHD domain of WASH complex Assefa S, Coggill P, Bateman A anon PFAM-B_2071 (release 23.0) Domain This domain forms part of the WASH-complex of domains and proteins that activates the Arp2/3 complex, see Pfam:PF04062. The Arp2/3 complex regulates endocytosis, sorting, and trafficking within the cell. The WAHD domain attaches to the FAM21 proteins via its N-terminal residues and to the microtubules via its C-terminal residues. 24.20 24.20 24.20 25.80 23.30 24.10 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.69 0.70 -5.12 6 199 2009-12-01 10:25:16 2009-06-22 15:43:54 3 8 109 0 106 172 0 237.80 44 56.81 CHANGED EuQsYSVPlIpPDLRREEul+QlsDALQYLppVusDlFsRIspRVEcNRs+LQAIssRlplAQAKIcKI+GSKKAhKVFSSAKYPAsDcLQ-YsSlFsG.ss-shp.K+s+a+lQoKhhsLD-+ulQEKLpYasVplNs+pccc.....Dss-EGLGSLP.RNlsSVSSLLLFNTsENhYKKYVhLDPLAGsVTKT+sslEsEcEEKLhDAPloITpRpQLERpsAENYFYVPsLGpVPEIDVPshLPDLPGlADDLhYSADLGPGIAPSsPus..IP-LPohs..T-sl.-sutscppttt.sPP ...............................hh..t.t...sh.phhpsLt.Lpth.tplF..p.plptplcps+pplp........sIsp+lshAQAKl-.pl...pG.S..+K..AIpVFSuA....KY.P.A...s....cph..p.c..asul..F.........s...s...spc.....s...h...........p.p....s.........p.h.........c....l.......p...s.............c..t......p......hs.c..psl...p.Ecl...p.......ha.......Vpspp.p..p.t........sptccGLGsLP.ps.lsSloSLLLFNTsEN.h.Y...........cc.....Y.....s..h....l.D...sL...t.....u....s....hs.c..oc..hhlt....p...p..c.....c...cLh-APhSIs.+R.-pLcpps....s-s..Yh.Y..lPsLG..pV.P-I..D..VPshLP..DLPGIAsDL..hYs.s.Dh.s.....su..IAPSssss....lP-LPsht..sp............................................................. 0 41 51 78 +11779 PF11946 DUF3463 Domain of unknown function (DUF3463) Assefa S, Coggill PC, Bateman A anon PFAM-B_2277 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 140 amino acids in length. This domain is found associated with Pfam:PF04055. This domain has two conserved sequence motifs: CTPWG and PCYL, plus a highly conserved CxxCxxHC motif. 25.60 25.60 25.80 27.60 25.40 24.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.04 0.71 -4.58 67 326 2009-06-22 14:45:43 2009-06-22 15:45:43 3 8 316 0 145 297 55 138.20 51 37.89 CHANGED hhlSPGYuYE+AP-..........Q-pFLsRpcT+pLFRplht....tspp++.....WpFNpSPLFLDFLtG...pp.sYcCTPWGsPohslFG.WQ+PCYLl.sE.GYspTFc.......ELMEpTcW-pYG....supsP+CusCMlHCGYEsTAVh-shtp.hphhcssht .................hhlSPGYAYE+APD..........Q-HFLs.RccT+pLFRclFt....tGc.t.++.....Wp.FspSsLFLDFLsG....pp....sYcCTPWGsPshslFG.WQ+PCYLl...uE.GYspTF+.......ELMEsT-WDpYG....sGp..KCAsCMsHCGaEsTAVhcohspPhcshps...ht.......................... 0 44 98 125 +11780 PF11947 DUF3464 Protein of unknown function (DUF3464) Assefa S, Coggill PC, Bateman A anon PFAM-B_2676 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 137 to 196 amino acids in length. 25.00 25.00 52.10 52.10 24.70 24.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.06 0.71 -4.76 29 127 2009-06-22 14:52:24 2009-06-22 15:52:24 3 4 99 0 66 129 115 147.80 32 82.77 CHANGED .tss+pshsFc....Pcppppt.tt............................pstptptppppp........................tttt........psIPchVusRMhRRluhhoGlPohlGhusFlhuYhllspslh-.lPshlsllsShhhFhlGllGLSYGlLSASWDpp.sGSL...........LGhpphp.Nl...................sRhhpuh+stpp .................................................................................................................tp........................t......................................tptp.ptttttt.........................................tpppptsIPpsVupRMh+RhuhhsGlPshlGhusFss.Yhlhsp.thh-.lPshsshhsShhhFGlGllGlSYGlLSASWD.pp.GSh...........LGhpEhptNh...................schhpuh+tt........................... 0 16 44 59 +11781 PF11948 DUF3465 Protein of unknown function (DUF3465) Assefa S, Coggill PC, Bateman A anon PFAM-B_2827 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 131 to 151 amino acids in length. This protein has a conserved HWTH sequence motif. 25.00 25.00 25.30 25.20 22.70 21.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.63 0.71 -4.33 31 258 2009-06-22 14:56:44 2009-06-22 15:56:44 3 2 187 0 36 166 4 137.10 50 92.21 CHANGED hpthhhh.h...hslhss...................huhs......htus-s.............................tlppAFpscQSslQVpG..........pGsVh+lLPDDscGSR..HQ+FIL+L...so............GQTlLlA....HNIDLA..PRlssLphGDpVpFhGEY...........EWNs.KGGVlHWTH+DPp..s+H..sGWLKHsGphY..p ...........................................................................................................................ssss.............................sLpQAYps.....pQ.SDl..QVQG..........hGpVsKV..LP..DD.N-GS+..HQKFIL+L...sS...............GQTLLVA....HNIDLA........PRIPsLchGDpVEFYGEY...........EWNc.KG.GVlHWTH+..DPp..sRHs.cGWLKHNGplYp............. 0 10 25 32 +11782 PF11949 DUF3466 Protein of unknown function (DUF3466) Assefa S, Coggill PC, Bateman A anon PFAM-B_2541 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 564 to 612 amino acids in length. 25.00 25.00 25.60 26.10 24.90 23.60 hmmbuild -o /dev/null HMM SEED 599 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.22 0.70 -5.95 28 183 2009-06-22 14:58:19 2009-06-22 15:58:19 3 3 170 0 42 145 7 542.80 34 98.04 CHANGED hspplpLsslulLsusu....ssusA.........slYcllpls..............shssshpss....YuhAlssssp....................................................hsssucs+hph..t.........csssGlsh...t--s.ash-.ppapY.......hpshstsph..sasssps.aAph........................t.sWhs.h..pslssssssss...ssh..sps...................hsohsshlpul..sssGht..VGstosstp................t..h.hpsFppRuhspstst.........lsP...............sptspsGuassshs......hNG..s..lsVG.sSpshusss.....sp............hptsh.s.......ts.s...sh..............................sh......ap.s+AhlW.hDsss.............lsp...pph.shs.psssDssh..hhpupuhulsssG.........hshGsuassh.sscssh.......hpsulahspp..sps.sht.....sWssp..............DpshpsShAsDINcN.....................sllVGpt+p.......sGshss+hFhhDss...sss..spal.........s.....ss.FasGu...uocstuINNpsplVGhl-spssp-.....+sRt+pGFlYshss................pp.hsh.sstsWhhc..-lpsu.s........sshspssca+IlpAssINDsGsIsATAhhppsu......Yc...spup.sh..........susGt..s.phsVsVKLhPhsu....us.pss.lpscss-s.pshERp........GAu.hu..WhsLhsLs.Llhh......RR+ .......................................h.shphpllAhulLlusp......sAsA.........AlYpVhEls..................ssstshtss.......aGlAIpssss...............................hshsucs+hphsss.........css-GlsY..........p-Es.s.F.uh-..psasYs....-....hpsacpYChsph..hY..o..o..Cps...WAssph..........................s.tWps.......-.lsuspsssu....hAa...lts.........t.ssh........spStssllNul...sssups.......lG.pststphs........................t.hh.spshptR...ps........lsP.............t.spsl.stuWp...........ssu..s..hsVGShSps.sssp.....u....................................................................................sh.........ato+sslW.hsusu................ssp......hsh..stu.s..s.scs.sp.....hhpuSht..s..lspsG........splhuVGYsoa..ssssph.......hpAul.F.hss......sss.s.ts........sWssp.l...sshp.p.ss....DhhaoNSlsoDlNcN.....................hlslGsAK+tt.............sGus..sN+hFlh-ss.....oss....usal.........o..........sslFFoGu.....uucusuINshNplVGpl..Dssss+E.....tGKPRRKRGFIYshusst..............................ss.u+..hthhss+uWhLD..DLT.sss........sssssNspaRIl-AoDINDu.....GlIuATAh+sssu.......Y-..osupsuh.........................sussp....-plVAVKLhPIss......us..sss.Ip....p+....us-p..pssERp........Guu...hGhhhLhsLu.l.l.GF.....RR+............................................. 0 8 18 30 +11783 PF11950 DUF3467 Protein of unknown function (DUF3467) Assefa S, Coggill PC, Bateman A anon PFAM-B_2299 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria, archaea and viruses. Proteins in this family are typically between 101 to 118 amino acids in length. 25.00 25.00 25.50 26.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.17 0.72 -3.97 52 326 2009-06-22 14:59:57 2009-06-22 15:59:57 3 3 305 0 126 294 210 88.80 37 81.29 CHANGED Mppppttt...........................................p.....lplc.lsp.......pshpus.....................YuNhshlsp.SspEFhlsF....ht....hh.ssts.psc..........lpsR.....llloPppAK+LhpsLpcslpca....EptaG.......................................................................................pIct .....................................p.................................................ptp....lp.I-..Lsc...........-sApGs.....................YuNlAlIsH.SsoE.FllDF...............lp.......hh.PGhP..cup..............VpSR.....llloPpHAKRLlpALp-Nlt+YEptaG.Ip........................ 0 59 106 118 +11784 PF11951 Fungal_trans_2 DUF3468; Fungal specific transcription factor domain Assefa S, Coggill PC, Bateman A anon PFAM-B_2291 (release 23.0) Family This family of are likely to be transcription factors. This protein is found in fungi. Proteins in this family are typically between 454 to 826 amino acids in length. This protein is found associated with Pfam:PF00172. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.30 0.70 -5.86 90 3818 2012-10-01 23:57:08 2009-06-22 16:07:12 3 72 166 0 3117 4021 1 286.50 11 52.41 CHANGED hll.paa...hsplss..hhsshsps................tssatphllshuhpst.................slhpulluhuuhah.......................pth...p........pppthpattp..ulptlppslpphpppts.........................pphlss....l...Lhlshh................-.....lspus....tt....Wth.HLpssppllpphtt...........................t.ptsp..............thpahhphhsah-llushsts...........................................................................ptshhshpthpphhps...........................st.t...........hc....hhGhsss......lhhl.....luclspLspptcptptp...............................................phtppspplppplp.php.p.sshptthssttthtpht...................................hph.aphushlYlh..phl.................hshsss.........................s.plpphlpplhphlptls....sssh............tsLh...a.........PlFlsG...spshss................ppRphh..........pph.hppht....pph.....ushppshpl..lpclWcppc..................................st............................phc..W..hclh..............c................................................hshslhlh ...............................................................................................................................................................................................................a........h.......................................p...h.t..t.h.h..........h.shpp...........................................................................hlhp.u.ll..u.h..u..u..h..ah........................................................t.....t......................tttsh.pahtt.......u.l.ph.l.p.pt.l.t..p.ptt.p...................................................................tt.h....l.ss....h........hhL.hhh..................................................c...............hht.sp.....tp....................h.....H.h.pu..s...h.tllp...h......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...................................................................................................................................................................................................................................................................... 1 361 1347 2518 +11785 PF11952 DUF3469 Protein of unknown function (DUF3469) Assefa S, Coggill PC, Bateman A anon PFAM-B_2159 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 108 to 439 amino acids in length. 27.60 27.60 28.00 28.50 26.30 27.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.77 0.72 -4.29 18 183 2009-06-22 15:08:59 2009-06-22 16:08:59 3 7 89 0 117 169 0 88.40 34 28.79 CHANGED l-shRshhEo-cpWphR+cFlhpphssa........................thDcLlsLuhlauNhsFLGC+Ysp.clh-+lhcMAEGIsltcs.ppp...pRsplhtppp ....................l-phRs.tEo-cpWchR+cFlL+phsca......................t.ph..-pLluLShlWuNtlFLGC..pYsp.clh-KlhpMu-GItlpptsphp...pRs.l.tp..t..................... 0 33 45 80 +11786 PF11953 DUF3470 Domain of unknown function (DUF3470) Assefa S, Coggill PC, Bateman A anon PFAM-B_2503 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00037. This domain has a single completely conserved residue N that may be functionally important. 23.20 22.70 23.20 22.70 22.60 22.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.01 0.72 -4.36 133 1106 2009-09-16 09:23:58 2009-06-22 16:12:12 3 11 970 39 322 784 1301 43.00 43 39.76 CHANGED ppp...alcLNu-LAc..tWP....sITp+K..-shsDA-cWc.....shpsKlp.h.Lpc ............ppalcLNAELAc..tWP....sITc+K..sshsDA--Wc.....GhtsKlp.h.Lpc........ 0 86 190 252 +11787 PF11954 DUF3471 Domain of unknown function (DUF3471) Assefa S, Coggill PC, Bateman A anon PFAM-B_2961 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 98 to 114 amino acids in length. This domain is found associated with Pfam:PF00144. 26.60 26.60 26.60 26.60 26.40 26.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.28 0.72 -4.16 67 608 2009-09-14 14:36:04 2009-06-22 16:15:38 3 30 431 0 263 599 75 102.60 19 21.22 CHANGED thpttttsshsstsLspYsGpYpps......h.........Gshslshcssp....Lhl.phssps.....t......hpL.hths..tspFhhchss...hp................hpFphs.ssGplsphshp.....ts..................hpshthtphs ...........t...t..tssssstsLpsYsGsYpps....h................h.......GslpV...p..t....c.s.sp.........Lhl.phssps....t............hpL.pa.hs...tcp.F.hhp..hss......hp.......................htFph..s..s.Gp.hsphphphs.............................t............................................ 0 85 167 217 +11788 PF11955 PORR Plant organelle RNA recognition domain Moxon SJ, Barkan A, Coggill P anon Pfam-B_1780 (release 8.0) Family This family, which was previously known as DUF860, has been shown to be a component of group II intron ribonucleoprotein particles in maize chloroplasts. The domain is required for the splicing of the introns with which it associates, and promotes splicing in the context of a heterodimer with the RNase III-domain protein RNC1. All of the members are predicted to localise to mitochondria or chloroplasts [1]. It seems likely that most PORR proteins function in organellar RNA metabolism [2]. 20.50 20.50 20.70 23.50 19.70 20.40 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.02 0.70 -5.69 27 336 2009-09-11 12:40:23 2009-06-23 17:10:07 3 12 23 0 211 323 0 298.50 30 73.17 CHANGED hh+-.sLDph..lp+p+cl+hshpl+cllhupPs...ph...........lslptLsch.pcpLsl........+sh.sFlc+aPplFclah...........tspt.hthpLTststsLhpcEtplhpppp..sshss+LpKLLMhohs+.plsLcpltpl+t-LGLPcDFcssll.caPshF+lsp.......tsstphLELlsa...cssLAV..oslEctt..p................ttthppthsFs.lpaPpGaplp+phpptl.........pcaQchPYlSPYp...ssp.pls....sso..tchEKRsVuVlHElLSLTlEK+sphc+LscF+ccaslsp+hpchll+HPGIFYlStK...spppTVhLREAYccs.pLl-+..sPlhhh+-KhhcLhphsthtppt ..............................h+s.thDth..hppp+ph.phhhtlhphlhppss...th................hslp.ltph.tp.ltl......hthhtalc+aPplFphh..............tt..hhplTsphttLhtcEtthhpp.pp...s.hsp+LpKLLMhoh.s+.plslpplttl+h-..LGLPpDF.pss.ll.pa.Pph..Fplsp...........pttthLcLssa....sspLAl..oshEpttt..........................t..t.tthsFs...hphP.pGap.hpcphpph.l.........pcaQ.clPYhSPYp........csp...tlp.....so....tphEKRsVullHElLSLTlcK+h.hcplspF+cthsls...pp...hpphlh+.HPslFYlSh+......spppoVhLREuY..p........p.........s..p..Ll-p..sP..lhhh+c+hhpLht.sh....t......................... 1 21 125 171 +11789 PF11956 KCNQC3-Ank-G_bd Ankyrin-G binding motif of KCNQ2-3 Cooper E, Coggill P anon Pfam-B_10256 (release 23.0) Motif Interactions with ankyrin-G are crucial to the localisation of voltage-gated sodium channels (VGSCs) at the axon initial segment and for neurons to initiate action potentials. This conserved 9-amino acid motif ((V/A)P(I/L)AXXE(S/D)D) is required for ankyrin-G binding and functions to localise sodium channels to a variety of 'excitable' membrane domains both inside and outside of the nervous system [1]. This motif has also been identified in the potassium channel 6TM proteins KCNQ2 and KCNQ3 [2], that correspond to the M channels that exert a crucial influence over neuronal excitability. KCNQ2/KCNQ3 channels are preferentially localised to the surface of axons both at the axonal initial segment and more distally, and this axonal initial segment targeting of surface KCNQ channels is mediated by these ankyrin-G binding motifs of KCNQ2 and KCNQ3 [3]. KCNQ3 is a major determinant of M channel localisation to the AIS, rather than KCNQ2 [4]. Phylogenetic analysis reveals that anchor motifs evolved sequentially in chordates (NaV channel) and jawed vertebrates (KCNQ2/3) [5]. 20.50 20.50 24.00 67.00 18.60 18.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.45 0.72 -3.96 4 127 2009-06-24 10:53:54 2009-06-24 11:53:54 3 9 37 0 50 111 0 100.90 62 12.78 CHANGED s.pDpsushshpussR-SDTsLSl.SVsHEELERS.SGFSISQs+-sh.shGss..............Auuus..ps+sRP.alAEGEoDTDoD.aTPsGs..PlSuTG-G.hu-ssWsu .............pDp.usR.pcushRDSDTslSl.SVsHEELERS.SGFSISQs+Esh...shss.s.................uusu...hschRP.YlAEGEoDTDoD.hTPsGs..PhSuTG-G.huDssWs.... 1 2 6 19 +11790 PF11957 efThoc1 THO complex subunit 1 transcription elongation factor Wood V, Coggill P anon Pfam-B_2059 (release 23.0) Family The THO complex plays a role in coupling transcription elongation to mRNA export. It is composed of subunits THP2, HPR1, THO2 and MFT1 [1]. The THO complex is a nuclear complex that is required for transcription elongation through genes containing tandemly repeated DNA sequences. The THO complex is also part of the TREX (TRanscription EXport) complex that is involved in coupling transcription to export of mRNAs to the cytoplasm [2,3]. 21.00 21.00 34.60 25.00 19.80 20.60 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.76 0.70 -6.00 21 319 2009-09-11 16:24:37 2009-06-24 18:08:11 3 16 247 0 226 332 6 432.10 28 70.86 CHANGED tplhsLLDllhhhsc......pEp..spsuhlFa.Llc-llDh.TlssCcclFs.YlESRh-phpptphhp..+sh..............lLRpCN-LLRRLS+.....s.ccssFsGRlhlFLsphF....Plu-+SulNl+G-aps..ENlTsas.........................c.s.pppssssshclDhs..........................................................hYshFWuLQpaFssPspl...asst.phssFcpslcsslssFpphpp....-h.tp.ttptpt...-sptusc+ptss.-s..................p-hssu....FssKYLTuccL.....h-LplsDss.............FRRplLlQhLIhhpalhslsppppsp..............psh......hhLspccspahpphcptltphLppsh..p.......GthFhRhVcplLsp-csWlpWKsEGCP..shccs......hshtsh.ssccs......hpKhhhs...h............h.hGstsLshLhp.cspsulcpLKctpR.hpl......Pshcsahpslh-schchDhupsp....ppp.thcsctspsW+sLRlhu+ppLttFs+ls-s.pslcslh.........-t.sspcsshh.pshsp.ppcch ......................................................................................h.thlslh..hsp..................pt...spssh.ah..Llt-lh-s.s..lstCcplFs.alEpphph..hpt..p.hh...p..Kph.................................................lLRhCN..-...LLRR.LS+.............s..psslFCGRl.lFLuphF....PLu-+S.u............lNlp......upash...-NhThas...............................p.tptptsss..pth.lDhs................................................................................................................hY.hFWuLQchFpsPsph...apt...........patsF...t..pth..ptsLssFp..phph.........p..........................ptpt.pcch.tt..t.....................................................t..tsp........a.s.KaLTopcL............hcLp....Ls...Dss.............FRRplLlQhLIlhpaLhs.sphpptp....................................................hLsppp............ppah..pphpp.tl...hphLppss...c............GthFhchlcplLp.p-csWspW.....K.t-.u.Cs..shp+.............ts......t.s.ppt.................hpKhh.........t........................................h.hGs...pLspLhp....p.sp....s....hpthcs......pR....h................Psl.ppahpth.h-...phD.tp..........................cpphth.sp....tshsW+sLRhhu+p.....p..thF..p...h...s....p.....ptl....hh......t...............p.........................t................................................ 1 81 129 187 +11791 PF11958 DUF3472 Domain of unknown function (DUF3472) Assefa S, Coggill PC, Bateman A anon PFAM-B_2598 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, eukaryotes and viruses. This domain is typically between 174 to 190 amino acids in length. This domain has a single completely conserved residue G that may be functionally important. 25.00 25.00 26.50 25.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.28 0.71 -3.94 23 181 2009-06-24 17:14:14 2009-06-24 18:14:14 3 13 133 0 72 188 25 176.30 25 40.36 CHANGED YhGhQssu.......pocRplL.FSVWustsocss..h..-.........upsshss..pFGsEGo.Gspsh.hsYPWpuGpsY+Fplpst....psGtohasuahpDss..s...p..cahhIAShphP+ssshlsssto...FlEsFs....scpGshsR.puaas.spauhstts.phhclhpsshosssssspt..phsa.suGsss.sthalpssGhh .............................YhGhQsps............pspcplL...FSlWsshsssss.....h....h..........ussspss....pF.GsEGo..Ghpst.hsas.Wpsu.p.tY+hhlpst......tss....pTpa...ssahpDss..s............s..phphIuohchPpsssshstshu..FlEsas......ppGptt...R..tuhhp..stauh..shss.....ph.phpptp....hohs.ssp......pa.thuss..s....thhhhpssG..t................................................. 0 35 53 61 +11792 PF11959 DUF3473 Domain of unknown function (DUF3473) Assefa S, Coggill PC, Bateman A anon PFAM-B_3065 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 130 amino acids in length. This domain is found associated with Pfam:PF01522. This domain has two completely conserved residues (P and H) that may be functionally important. 29.60 29.60 30.30 31.30 26.20 29.30 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.83 0.71 -4.73 51 224 2009-06-24 17:18:14 2009-06-24 18:18:14 3 3 209 0 97 225 89 131.50 30 46.29 CHANGED SSlaPlp.HD...hYG....hPcu.PRhP.aphsssst.....................lhEhPloThplh....stplPsuGGGaF..RlhPYs.....lh+hslpplNpps.pPslFYhHPWEl..DPsQPR....l..shshcs+FRHYhNLspsptRLcpLL.....p-Fcasphsplhtptt ............................SSl.Psh...s...pYG.....h.ps..PRhs.athtpt.........................lhElPsoss.hh.....hplPhuGGhah..RhhPht......lh+hhlppl.cp-.ts..hlhYFHPWEh....Dst.pP+......h...phshh..p..phRpassh.pphppRLcpLl.....psapasphpphh...th......................................... 0 34 79 92 +11793 PF11960 DUF3474 Domain of unknown function (DUF3474) Assefa S, Coggill PC, Bateman A anon PFAM-B_3095 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 126 to 140 amino acids in length. This domain is found associated with Pfam:PF00487. 27.80 27.80 27.80 27.80 27.70 27.70 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.02 0.71 -4.15 30 528 2009-09-16 09:00:21 2009-06-24 18:21:57 3 3 223 0 164 530 0 79.10 35 20.44 CHANGED MAoWVLSEC.GL+PLP.+lY.s+PRsuh....tpssss..plRhhssspshs.s.th....hs.s..t.....RN....WuLpVSAPL.Rlsolsc--cc...................ppspcFDPGAPPPFpLADIRAAIPKHCWVKDPW+SMSYVVRDVAlVFGLAA ......................................................................th................................................................................................................................................s....P....t....s.P.PF.olu-I+sAIPtHCap+sshRShSYVl+Dlsls.....h........ 0 36 99 143 +11794 PF11961 DUF3475 Domain of unknown function (DUF3475) Assefa S, Coggill PC, Bateman A anon PFAM-B_3098 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 60 amino acids in length. This domain is found associated with Pfam:PF05003. 29.70 29.70 32.00 30.90 23.70 23.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.49 0.72 -4.14 20 207 2009-09-11 16:23:54 2009-06-24 18:24:45 3 4 18 0 134 194 0 55.90 43 11.06 CHANGED ILAFEVAsshoKsssLhc.SLS-pslppL+ppsLpS-GVcpLVSsDpstLLpLusAEh .....ILAFEVAsshsKsss.Lap.SLS-pplp+L.+pclLpS-GVppLl.....SsDps.LLpLAsA-h...... 0 11 78 109 +11795 PF11962 Peptidase_G2 DUF3476; Peptidase_G2, IMC autoproteolytic cleavage domain Assefa S, Coggill PC, Bateman A anon PFAM-B_3119 (release 23.0) Domain This domain is found at the very C-terminus of bacteriophage parallel beta-helical tailspike proteins. It carries the enzymic residues that induce autoproteolytic cleavage to bring about maturation of the folding process of the helix in a chaperone-like manner. The domain thus mediates the assembly of a large tailspike protein and then releases itself after maturation. These C-terminal regions that autoproteolytically release themselves after maturation are exchangeable between functionally unrelated N-terminal proteins and have been identified in a number of bacteriophage tailspike proteins [1]. 25.00 25.00 25.30 25.40 24.60 24.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.53 0.70 -5.24 19 176 2009-06-24 17:27:32 2009-06-24 18:27:32 3 11 141 9 16 159 78 206.70 42 34.28 CHANGED ssstIhuStuspss..tsYslshGhGsssss..stslKhp.....uhsGslphsGslssuu..uDYAEhFEShsGpsIcs...GhlVTL-..G-KIRhAptuDp...hlGVlSpTuullhssushpWpsRYLpsEFGshlYcphp......D-........pus........hhchPh.NPcacPshc..YhsRpcRsEWslVGLlGQlhVRsDpTVpsG.chlp..ups.G......IuTpss..pG..hpVMclTssastp+GhulALs....hl+ ....................usptIhup.u.pu-....puYShhhutGsspssp...shssKW.......upsGphhhsGAshsuS.hoDaAphFEohDsphI-s...GYhVThs...t-KIRhAsssDs...ILGls.....SuTsAlIusuuuhpWpcRYhpDpFGth.Y.psp.....................................up...........phtPllNs-aDPspc...YluRc-RsEWhsVGLIGQhhVRsDsTspst.sYhc..sss.G........IATKu-..sG..ahVhKhTut.......s.h.....h........................ 0 7 9 10 +11796 PF11963 DUF3477 Protein of unknown function (DUF3477) Assefa S, Coggill PC, Bateman A anon PFAM-B_3147 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in viruses. Proteins in this family are typically between 246 to 7162 amino acids in length. This protein is found associated with Pfam:PF08716, Pfam:PF01661, Pfam:PF05409, Pfam:PF08717, Pfam:PF01831, Pfam:PF08715, Pfam:PF08710. 25.00 25.00 30.80 30.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.19 0.70 -5.46 4 151 2009-09-16 08:59:33 2009-06-24 18:29:47 3 10 56 0 0 167 0 295.00 47 7.14 CHANGED MuKhuKYGLGFKWAPEFPWMLPsAuEKLuNPp+SEEDGhCPSsAQc.cstGhshsNHV+VDCpRL.u.ECCVQSulIRDIFVcEDPpcVEs.ThhALQ.GoAVLVKPshthSlpAhtsLGshPKshsMGLF+RhslCNTtcCssctHVAapLahlpPsGVChGsGcFlGWhVPlshhPpYu+pWlQPWslhLRKsGpKGuhsScHhRpuhphsVYDFsVEDAht.VHsEPKGKaSpKAYALl+sYRGlKPlLaVDQYGCDYoGsLAcsLpsYG.-hTLp-M+th.slWtssh-h-VsVAWHVsRDsRhsMRLQohATlpulsYVApPTEDlVDGsVVl+EPsHlLussuIlL+hPs ............................M.KhsKYGLth+WAPEF.Whh.sAtEcLssP.pS-.shhCsosuQt.cshGhs..NHVhlDCppl.t.EhpVQSslIR-Ihhppc...lEs.h..Alp.tpAlLlpssh.......hshGs.PpshsMGLF+ph........tsptaVsapL.hhpss.sshGt.pFlGWhlPhshhP..s+pah..h.hhlcctGpc.sh.ps+hhtshp..sYsFpVEDAh-.V.HDEPKGKaScKAYALl+sYRGlKPlLaVDQYGCDYoGsLAcsLpuYu..-hTLp-M+th.slWtpshsh-VsVAWHVsRDsRhVMRLQohATlpulsYVApPTEDlVDGsVVl+EPsHlLussuIlL+hPp. 0 0 0 0 +11797 PF11964 SpoIIAA-like DUF3478; SpoIIAA-like Assefa S, Coggill PC, Bateman A, Eberhardt R anon PFAM-B_640 (release 23.0) Family These proteins adopt an alpha/beta SpoIIAA-like fold, similar to that found in STAT (Pfam:PF01740). They adopt open and closed conformations arising from different arrangements of their alpha-2 and alpha-3 helices. They may be membrane associated and may function as carriers of non-polar compounds [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.48 0.72 -3.76 151 602 2009-09-14 13:55:16 2009-06-24 18:34:40 3 3 394 6 265 584 59 107.30 20 83.94 CHANGED sll.slphs..GcloppDhcp.lhstlcph.hp..ppsplplLh-hs.sh..puhsh.tAhhpph...h.....hp.phpc...hc+lAllussp.ahphhsphhsh.h.....hssph+hF..pppspAhsWLpp .............................................hlshthp..Gcls.pp-hcp....hhsh.lctt.hp....ppsplp.lLhchs..ph....pGh..sh..sA..hhcsh.......pht.....hp..phpc....hc.+l....A.llu.ssp.Whphh.s..plhsh..h.......hstcl.+hF..scppcAhtWLp.t..... 0 106 181 232 +11798 PF11965 DUF3479 Domain of unknown function (DUF3479) Assefa S, Coggill PC, Bateman A anon PFAM-B_1065 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is about 160 amino acids in length. This domain is found associated with Pfam:PF02514. 27.30 27.30 27.80 30.40 26.90 27.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.66 0.71 -4.44 61 302 2009-06-24 17:37:06 2009-06-24 18:37:06 3 6 209 0 139 323 268 160.50 33 12.73 CHANGED h+lVhloh-sphsuultpAstpLppc..hsuLclshasss-htcsstshpphppslspADllhuohlFhE-hlpslhstLpstp.pscshlsh.Shsplh+LT+lGpFsh....spspuuhhshlK+hts...pppusspspt.......phphlRplP+lLKalP.GKAQDlRsah ..................+lVhlsl-spapuulppAsp..pLspp...h.ul-lssahspEL...........ccstsh...pphpp-lspADlhluSLlFhE-hsp.hlhpslpspRpphcAhllF.ShPElM+LsKlGsFsM...........upspuuhsphhK+ht.......p.p..p...u..s.shtsu....................hlKlL+plP+lLKalP.sKAQDhRsah............ 0 38 82 118 +11799 PF11966 SSURE Fibronectin-binding repeat Pallas J, Coggill P anon Pfam-B_5998 (release 23.0) Repeat Streptococcal surface repeat domain - SSURE - is a protein fragment found to bind to extracellular matrix protein fibronectin but not to collagen or submaxillary mucin in Streptococci. Anti-SSURE antibodies recognised the corresponding protein on the surface of streptococcal cells. The full-length proteins are thus fibronectin-binding surface adhesins. 25.00 25.00 29.60 47.50 20.30 19.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.76 0.72 -4.39 6 697 2009-06-24 17:48:00 2009-06-24 18:48:00 3 17 253 0 24 516 0 80.90 74 34.13 CHANGED DNGsAKNPAL.PLcG..LTKGKYFYpVsLNGNTsGKcGQALLDQlRANGo+oYpATV+VYGs.KDGKsDhsNlVATKpVsIsIN .......DNGsAKNPALsPLtp..hTKGpYFYpVsLsGNssGKpcQALIDQhRANGTQTYpATVpVYGN.KDGKsDLsNlVATKpVTININ....... 1 6 6 12 +11800 PF11967 RecO_N Recombination protein O N terminal Mian N, Bateman A anon COGs Domain Recombination protein O (RecO) is involved in DNA repair and Pfam:PF00470 pathway recombination. This domain forms a beta barrel structure. 21.00 21.00 21.10 21.30 20.40 20.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.43 0.72 -4.12 154 4019 2012-10-03 20:18:03 2009-06-26 14:16:15 3 8 3998 7 873 2670 1561 78.10 27 31.53 CHANGED M....h...pspulVL+spsasEsstllslhTccpG+lssls.+Gu.+p.+..S+htu.hhpPhshlchhht......tpps.Lts.lsps-hlpsh ......................ppculVL.+sp.sa..sEsshllslhTcctG+lphl.A.+Gu...+p..t+......Sphtu..hlQPFshlphphp.........t....p.....p.....s.lps.lpps-hlp............................ 0 293 578 740 +11801 PF11968 DUF3321 Putative methyltransferase (DUF3321) Wood V, Coggill P anon Pfam-B_6141 (release 23.0) Family This family is broadly conserved, being found in fungi, plants, arthropods and up to primates. it may be a methyltransferase. 19.80 19.80 19.80 20.20 19.60 19.60 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.65 0.70 -4.72 25 219 2012-10-10 17:06:42 2009-06-26 16:42:37 3 2 191 0 159 216 23 192.80 31 60.29 CHANGED upIcucIcphGGL-uYQhASphGQspcRGGDSSKlLVcWLpsthhphctp..p...................................L+sLElGALSspNshSpsshFc.VsRIDLNS..QpsuIhcQDFM-RPLPps-sE+FclIShSLVLNFVPsstpRG-MLpRhspFL+ssp......................t..s.LFLVLPLPCVsNSRYhspppLppIMsSLGFthspp..+popKLhYaLach...........psp........hpspha+KcplpsGss+.NNFsI ...................................................................................................t.l.tthtt.GGhptYQ.uS...up..p+t...ou+hlhphlt.......................................................................lphL-lGults..............N.hs.p.......thhp.shtIDLps..pp.tlhppDFhc................p........P....l..............s........p......t....p...............s....-.......p.................FcllshSLVLNaVPsstpRGpMh++spchLp.suh................................LFll....lPts.....C.l..p..NSRahs......pphpthhpuLGaphhp...cpss+lh.ahhach..................t................................................................................................... 0 50 81 129 +11802 PF11969 DcpS_C Scavenger mRNA decapping enzyme C-term binding Moxon SJ anon Pfam-B_9894 (release 8.0) Family This family consists of several scavenger mRNA decapping enzymes (DcpS) and is the C-terminal region. DcpS is a scavenger pyrophosphatase that hydrolyses the residual cap structure following 3' to 5' decay of an mRNA. The association of DcpS with 3' to 5' exonuclease exosome components suggests that these two activities are linked and there is a coupled exonucleolytic decay-dependent decapping pathway. The C-terminal domain contains a histidine triad (HIT) sequence with three histidines separated by hydrophobic residues. The central histidine within the DcpS HIT motif is critical for decapping activity and defines the HIT motif as a new mRNA decapping domain, making DcpS the first member of the HIT family of proteins with a defined biological function. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.73 0.71 -3.86 99 1192 2012-10-01 23:45:21 2009-06-26 17:03:18 3 21 724 20 661 5559 3144 113.70 25 50.60 CHANGED phla...shlhptpp..sppl....la..cD........-thlshpDhp........Pp.........ushHhLsIP.++..................cIpolpD......L.....spc.chsLLtchhphupclhppph.t..............h.ps.hphshHhtP..ohaHlHlHllu......h.sptht .............................................................hla.s.hl...ttp.........ssph......l.a..cD................-phlshp.Dlp......Pp.......................AshH..hLllP..+.c.........................................cIto.lp.c.......L....................st.c....c.h....s....L...L.t.c.h.h..p...h..u...p...c..l.h.p..p.p.h.t............................................s..p.p...h..ph....G.h....H....h.ts.............oh.h.H.LHlHllu..............t............................................................................... 0 218 363 534 +11803 PF11970 Git3_C G protein-coupled glucose receptor regulating Gpa2 C-term Wood V, Coggill P anon Pfam-B_11657 (release 23.0) Domain Git3 is one of six proteins required for glucose-triggered adenylate cyclase activation, and is a G protein-coupled receptor responsible for the activation of adenylate cyclase through Gpa2 - heterotrimeric G protein alpha subunit, part of the glucose-detection pathway. Git3 contains seven predicted transmembrane domains, a third cytoplasmic loop and a cytoplasmic tail [1]. This family is the conserved C-terminal domain of the member proteins. 21.00 21.00 21.00 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.48 0.72 -4.22 19 170 2012-10-03 04:04:29 2009-06-26 17:08:30 3 3 114 0 132 185 0 74.00 30 13.66 CHANGED +cRRppIp+Qh+uIFIYPhuYlhlWlFPhlhcshpas.ap.......h..pptPhhWlshlushhpPhsshVDshVFhhRE...+PW+h .........t..php+lpR.l+hhhlYPluYlllhL.Plsstthths.............p.spsPshhhhhluushhshsGhVDsllaslpc...+....h...................... 0 28 68 110 +11804 PF11971 CAMSAP_CH CAMSAP CH domain Bateman A anon Bateman A Domain This domain is the N-terminal CH domain from the CAMSAP proteins.i 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.84 0.72 -4.33 27 382 2012-10-03 10:10:54 2009-06-26 17:12:11 3 57 114 0 173 3124 9 79.90 31 5.82 CHANGED hspscpsshhs.lcslhcDhsDGssLstllHaYpPphlplcslshppshShtcslhNlpllpphspcpL..psh.h.sh-Dllhs. ...................stpssh..t.s.tV..psLs..p.shpDGshL.C.tLlpp...Y..pPp.hl.s...L.....c...-Is.........h..+sp.h.S............h..slp.Nlphh...s..hs.ppth..p...h.p..Dhh.h................................................... 1 40 54 97 +11805 PF11972 HTH_13 HTH DNA binding domain Fenech M anon Pfam-B_8688 (release 14.0) Domain This is a helix-turn-helix DNA binding domain. 24.20 24.20 24.20 24.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.56 0.72 -4.12 12 171 2012-10-04 14:01:12 2009-06-26 17:21:52 3 4 128 0 53 149 5 53.90 53 15.22 CHANGED LPpLl-LlloRPlVSuuMlscpLclTspuAlpLVpELGl...REhTGRGRaRAWGll .........LPpLhELlLppPLVSuthltKcLpVT.puAh+...L...l...sE...LsL....RElTGRGRFRAWGll...... 0 7 28 39 +11806 PF11973 NQRA_SLBB NQRA C-terminal domain Bateman A anon Pfam-B_3622 (release 8.0) Domain This family consists of the C-terminal domain of several bacterial Na(+)-translocating NADH-quinone reductase subunit A (NQRA) proteins. The Na(+)-translocating NADH: ubiquinone oxidoreductase (Na(+)-NQR) generates an electrochemical Na(+) potential driven by aerobic respiration [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.20 0.72 -3.63 74 761 2012-10-03 10:59:06 2009-06-26 17:24:09 3 6 729 0 165 657 475 51.30 46 11.49 CHANGED VlALuGPpVpp..PRLlRTplGAslspLssGc..lpss.psRlISGSVLsGcpstu .....VlALAGPpVpp..PRLlRThlGAsLsp..Lss..u-....ltsu....-......sRlISGSVLoGppup.s..... 0 44 94 137 +11807 PF11974 MG1 Alpha-2-macroglobulin MG1 domain Bateman A anon PDB_2a73 Domain This is the N-terminal MG1 domain from alpha-2-macroglobulin [1]. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.78 0.72 -3.43 168 1084 2012-10-03 16:25:20 2009-06-26 17:28:18 3 42 1060 0 261 942 44 99.60 36 5.76 CHANGED lsslu.lt.spps.....................phhlps.ss...upshss...pVpl......hs.pps.t.........sht..psp..os......spuhhph.........................................................lph....psshshs.......phslsupss ....................lSDlG.lo.s+ph........cspl.........clFspSLcs..Gts.sGl.cVpL..ls.cpGp.............sLs..pus..oDupG+lplcss.................................cssslllA+.....c...................csphohLcL....phsuLDLu...-FsluGt..s................................................... 0 75 154 208 +11808 PF11975 Glyco_hydro_4C Family 4 glycosyl hydrolase C-terminal domain Mian N, Bateman A anon IPR001088 Domain \N 21.90 21.90 22.80 21.90 20.90 21.60 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.45 0.70 -4.57 8 2959 2012-10-02 13:39:36 2009-06-26 17:33:48 3 5 1463 33 464 1786 410 216.50 29 50.24 CHANGED -clclphhGlNHhsWhpclphp.GsshhspLh-tluphucss.phcs..h.tssascshh+utsh.ths..hlssoatpahhhs...cahlchphssh...........hsRsppllcchpKplhpp...........htchhKppp.hlpspcl-c+.uuhhu-tthshl...aNsccthhhlNl.NsGtIsNlPssusVEVsClls+NGspP.hhlGsLPppltu.LhcpplsspchssEAhlTtchpplapAhhhsPpV ..............................................................................................plphchhGlNHhsahppl..hc..Gps.h....h.s.....p.......Lhc...tl....t...p............t...t........................h.........s..........c...........s............hh............t...s..........h..s.............p.....s..h..l.+.........h....t....................hlP.s..s..Y.h+Yahhs.............c...h..lp.....th..tc.h...............................hsRu.....ps...l.p.........chc+p.l.h.............................................ph..h..c.s.......p.....h..t...h...p....s..p...c..l.-..t.....+....s...us..h..Y...u.c...........s..shphl...................as.s.c.t..s..h.h.h.lNl.N......p............G..........t.......IsNlP...s-ssVElsClls.tsGspP..htlG.pl..Ps.t.htG....Lhps.hshEpLsl-Ah....lstshpcshpAhshsPhs................................. 0 162 297 368 +11809 PF11976 Rad60-SLD Ubiquitin-2 like Rad60 SUMO-like Wood V, Chahwan C, Coggill P anon Wood V, Chahwan C Family The small ubiquitin-related modifier SUMO-1 is a Ub/Ubl family member, and although SUMO-1 shares structural similarity to Ub, SUMO's cellular functions remain distinct insomuch as SUMO modification alters protein function through changes in activity, cellular localisation, or by protecting substrates from ubiquitination [1]. Rad60 family members contain functionally enigmatic, integral SUMO-like domains (SLDs). Despite their divergence from SUMO, each Rad60 SLD interacts with a subset of SUMO pathway enzymes: SLD2 specifically binds the SUMO E2 conjugating enzyme (Ubc9)), whereas SLD1 binds the SUMO E1 (Fub2, also called Uba2) activating and E3 (Pli1, also called Siz1 and Siz2) specificity enzymes. Structural analysis of PDB:2uyz reveals a mechanistic basis for the near-synonymous roles of Rad60 and SUMO in survival of genotoxic stress and suggest unprecedented DNA-damage-response functions for SLDs in regulating SUMOylation [2]. The Rad60 branch of this family is also known as RENi (Rad60-Esc2-Nip45), and biologically it should be two distinct families SUMO and RENi (Rad60-Esc2-Nip45). 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -4.29 42 1068 2012-10-03 10:59:06 2009-06-26 17:36:11 3 36 345 50 689 8130 273 69.50 32 32.96 CHANGED lplhl+stssc..clthclctsoplpcLh.ptappcpslst.....pplcLlFDGcplpsspThp-hslcDsDh..l-lll ................................................................plpVt.s..p.-ss...pl.p.F..+.l...K..c....s....T......Lp...KLh...cu....Ys......c....+.....p...G...l..sh..................psl+Fh...F.D........G..p......p.l......s....t...s....p...T.P...ppL.-.ME.D.t.Dh..I-V............................... 1 230 363 528 +11810 PF11977 RNase_Zc3h12a Zc3h12a-like Ribonuclease NYN domain Bateman A anon [1] Domain This domain is found in the Zc3h12a protein which has shown to be a ribonuclease that controls the stability of a set of inflammatory genes [1]. It has been suggested that this domain belongs to the PIN domain superfamily [1]. This domain has also been identified as part of the NYN domain family [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.90 0.71 -4.39 28 659 2012-10-03 20:43:45 2009-06-29 14:20:43 3 19 191 6 414 616 25 146.30 36 23.62 CHANGED phRhlVIDGSNV.Ah......sH......sppch......FSs+Glthslpa..FhpRGHcslslFlspa..phpt...................ssthp-pchLpcLhc....tshlshsP.....Spph.Gpp..saDDRaILchApcpcGllV.SNDpaRDlhp.......cpsch+chlcp..........................RlLhaoFs.........tDthh.ssD.PhGRpGssLpc ............................................................................s.h+.lVIDGSNV..Ah.................sH............Gppch.......................FSs.+.GItlAVpa...Fh.c..+....G..+.p.s...l..s..VFlPpa..Rpcp...........................................ss..hs-..pchLp...cLpc....ptl..Lsh...TP..........o+ps.G.pc..hs..s.aDD.Ral..l..clA..h.c..s..c............G..lIV.SNDpaRDhhs..........-pspa..+.....c.....hlcc...........................RLL.aoFs....................sshFM.PcD...PhGR.GPpLp.p.................................................................. 1 101 165 282 +11811 PF11978 MVP_shoulder Shoulder domain Bateman A anon Bateman A Domain This domain is found in the Major Vault Protein and has been called the shoulder domain [1]. This family includes two bacterial proteins Swiss:A6FXE2 and Swiss:A1ZGE7. This suggests that some bacteria may possess vault particles. 25.00 25.00 25.50 30.60 22.90 21.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.49 0.71 -4.21 20 152 2012-10-01 22:02:33 2009-07-05 15:15:55 3 8 82 41 76 140 4 117.60 58 14.85 CHANGED GPDFhTDllpVETuDHARLpLpLuYNWaF-V.stsc.t...tsp+hFoVsDFVGDuCKsIASRlRuAVAuhsFDcFH+NSu+lIRpAVFGhcpt..........Gps+...spLpFssNsLVlTsVDlQSVEPVD .........GPDFhTDlIslETuDHARLpLpLuYNWaF-l..spp.csp...psu+LFSVPDFVGDACKAIASRVRGAVASlsFD-FHKNSu+IIRsAVFGh-ps..........scsR...cphhFstNsLVloSVDlQSVEPVD............. 0 41 49 61 +11812 PF11979 DUF3480 Domain of unknown function (DUF3480) Assefa S, Coggill PC, Bateman A anon PFAM-B_2031 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 350 to 362 amino acids in length. This domain is found associated with Pfam:PF01363. 25.00 25.00 29.40 28.90 19.40 23.10 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.16 0.70 -5.83 8 179 2009-07-05 15:08:20 2009-07-05 16:08:20 3 9 82 0 101 161 0 292.60 50 28.68 CHANGED slPssPYLhGlLIpRhEVPWAKlFPLRLMLRLGApYRYYPCPLhSVRtRcslYGEIGpTIhNLLsDFRNYpYolPsVpGLlIHhEsp+ToIpIPpsRYs-lhKslNsS.s-HlLAhGusF...ochADuHLVClQN.-u......pYpTQAIsIcsQPRKVTG...ASFhVhsGuLKoSSGhhAKsSIVEDGLMVQIsPEpMptLRpALRsMKDapIsCG.l......-usDsQ.EhVsIpWV-s-pshNhG..VlSPIDs+uh-GlsSh+l..apuo-Ypsss.+IIRWTEVFhlps--c.pstsusstshoRLuEplA+usCtALsPHLcLLtpsGhsKLGLRlolDo-pVuYpAGSpGp.LPspYhNsLDstLlPll ...........................................lPssPaLhGlLIp+hEhPWAKVhPhR...LhLRLGAcY+.hYPsPLhSlRhRcslaG.EhGc.T.IhNlLsDhRNYpYoL.slptLhlchE.tposI.p.IPpp+as-h...............h.Ks.hNpS.N-HVluhGAsF...spcADSHLVClQN.sG........................sYpTQA.S..h.pspPRK.VTG..............ASFhVFsGALKoSSGalAKSSIVE...DGlMVQIssE..sM-uLRpALR-..KDFp.IsCG+s........Dut-.....p..Ehl.pIpWVDs.-.cp.hN.t..G..VhSsIDGpSh.....-.....u.....lssh+l......ht...ts.-a..c.s.s..t..+hlRhTEV..Fahtp-....pp.....hu......ssttphscplApAhshALsPHLchL+psGhsKlGLRVolD.oDp.VtapAGS..pGp.LPtpYh...NsLDssLlPVl................................................... 0 22 32 63 +11813 PF11980 DUF3481 Domain of unknown function (DUF3481) Assefa S, Coggill PC, Bateman A anon Pfam-B_2819 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00754, Pfam:PF00431, Pfam:PF00629. This domain has two completely conserved residues (Y and E) that may be functionally important. 28.50 28.50 35.10 48.00 25.10 24.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.08 0.72 -4.16 6 219 2009-09-16 09:17:33 2009-07-05 16:10:11 3 9 43 0 64 168 0 83.80 51 9.77 CHANGED -sos-T.lShpPsshLhoLDPILlTIIAMSulGVLLGAVC.GVVLYCsCh+suhocRs..hSsL.............ENYN......................FELsDGlKLKK-.hssQssh ...............t.............ps........LDPILlTIIAMSuLGVLLGAlC.GllLYCsCtasGhSpRs....hosL.............ENYN......................FELhDGlKhKhc.hNpQpsh........ 0 2 8 23 +11814 PF11981 DUF3482 Domain of unknown function (DUF3482) Assefa S, Coggill PC, Bateman A anon PFAM-B_3168 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 289 to 301 amino acids in length. This domain is found associated with Pfam:PF01926. THe central region of these proteins contains a hydrophobic region that is similar to Pfam:PF05433. 26.40 26.40 26.40 27.20 25.40 26.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.61 0.70 -5.18 27 234 2009-07-05 15:13:17 2009-07-05 16:13:17 3 4 229 0 64 219 15 277.10 36 60.69 CHANGED RcALAchsLHAhhp..FDoVAssh-sEt+LacslutLLcp.tps.LpcLlsspppptptRpptAsplIA-LLl-sAuhR..tplssptst....ppshpphppplRppEppshcsLLpLYpFcppDhpsscLPLhsttatpDLFsP-sL+thGlchus....GuAuGAusGsGlDLhsGGlTLGsuAslGAlsGGs.......hpsspphup.cl......huKl.......pGp.+cLsl....DsssLpLLhlRphpLlsuLppRGHAupcslpLts..sp....p..............pth.....cp..spLschLpcA....Rs+PpWSsLs.....................st..chtptpRpphlppLsspl ..................................................................................................................phLuRhsL.HshstFDoVA.sh-GEh+LacsLAhlL..p.hc.stLcRLhstpppphpphpppAtchIA-hLlDsAAhR..........pplspstsst..............................pshpphppplRphEpph...ptLhtLYpFhcs-hsssc..hhsttatpD.hFssEhL+phGl+hGs..........GsAsGAhhGhGl....DlhshGsoLG.hushlGulhGGh........h.ssps.......l..hsKl.............pGt.ppLps.....DstsLpLLAhRth.pLltsLppRGHAAps.slcLps...p...............tsW.............pt..s+LPp.Lp+A.Ru+..pWS.oLs.....................st...c.pptcRtthhptL....hh........................................................................................... 0 19 35 56 +11815 PF11982 DUF3483 Domain of unknown function (DUF3483) Assefa S, Coggill PC, Bateman A anon PFAM-B_3204 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 230 amino acids in length. This domain is found associated with Pfam:PF02754. 26.80 26.80 27.40 43.00 23.40 26.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.62 0.70 -5.05 14 145 2009-07-05 15:16:29 2009-07-05 16:16:29 3 4 143 0 52 139 9 221.60 55 35.17 CHANGED hLstlLPhLlahuLuLsslGAsRRsthWRpGpsopVsalu..GLhAhP+RYLVDLHHVVtRD+YhS+THVATAGGhVluhsLslLsashuLtsp.....hLsahLLsAshs.MhlGulFVupRRh...sPPuRLS+GPW.RLP+SLhuFuluhhlsoLssA.G.hlssshGGWsLsslLuhul.hhGssEhhhGhshGGPMKHAhAGALHLAaH.RsERF...........sGGRSTuLKslDLs......ss.L .......shLlssLLahulAhhshuss+RsshWRhGRuopsshhG..sLhAhPKRYhVDLHHVVARD.YlA+THVATAGGhluAhsLshlsaGhulau......hLshhlLlAshs.MhVGAlFVa+RRh.....ssPARLS+GPW.pLPh.LhAFAluhhLhsLssA..G.....t....h.G.....uaslhshLhlGl..GshphhhGhshGGPMKHAhAGhLHLAaH.RtERF...........uus..sTuLKs.DLpc...h..... 0 11 23 37 +11816 PF11983 DUF3484 Domain of unknown function (DUF3484) Assefa S, Coggill PC, Bateman A anon PFAM-B_3216 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 65 to 81 amino acids in length. This domain is found associated with Pfam:PF02491. 27.40 27.40 27.40 27.80 27.00 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.35 0.72 -3.14 15 535 2009-07-05 15:18:48 2009-07-05 16:18:48 3 4 530 0 32 179 0 75.30 36 16.84 CHANGED V-hLAQtAVsG-phlc++P.s...........phcpsssstpsss.ppPtppsstsppp...........spcscpcls-Rl+uhFGshFD ....................VshLAQsAl+GEpsLp+QPlsh............hs.pa.spss.sspss.s..s...-.t.....s..hts.scs.s-................tsspsKsKls-RhRulhGsMFD. 0 3 11 20 +11817 PF11984 DUF3485 Protein of unknown function (DUF3485) Assefa S, Coggill PC, Bateman A anon PFAM-B_3236 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 223 to 526 amino acids in length. This protein is found associated with Pfam:PF09721. 26.10 26.10 27.60 26.10 25.90 25.70 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.23 0.70 -11.15 0.70 -5.24 59 191 2009-07-05 15:21:24 2009-07-05 16:21:24 3 2 160 0 96 206 48 204.30 17 52.21 CHANGED thhhssslllh..hussshhhp.t......ptssssppshthhPtp.husWptssss............stht.phhsssphlh.ppY..tssp.....spsVsLaluYapsQ...pssphlHpPps.Ch...sus..GWphhspp..ptslshsst...shplschhhptts.....ppphlhYWathssph.h......ssshps........KhhthhstLhspts...Duull+lsss............ppsstsAtptlppFl.pphhsslsthl. ...............................................................hh...hhhhhhlh...hsshshhhtst.........tt...thss....tp.......ht.thPtp.husWpttstsh...........stht.phhsssphlt.psY......tsss........sptVsLalu..aapsQ...ptspt.lHsPps.Ch.....sus...GWphhspp..tt.s..lshsst.......thplschhhpt....ss.......ppphlhaWathsu.ch..h......ssthtt...........Khh.hhstlttpts...-ushlplss.............tts.ttu.thhppFh.pphhs.l............................................................. 0 36 81 91 +11818 PF11985 DUF3486 Protein of unknown function (DUF3486) Assefa S, Coggill PC, Bateman A anon PFAM-B_3271 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are about 190 amino acids in length. 25.00 25.00 25.90 25.00 24.70 24.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.95 0.71 -4.22 34 265 2009-07-05 15:23:11 2009-07-05 16:23:11 3 2 219 0 56 231 5 173.50 25 95.02 CHANGED Ms++SpV-hLPts...lRphLpphLpspshothclLppl.phh.t.......sh..........plS+SulpRYup+hEphhtpl+pupEhAchhspphu-.spschuchlhphlpohhFcshhsht-stcpp........hchLsplApslsplppAustpc+hpt-lRpphttpsss........ts-phsppsGl......Ss-shspIRcplL...G .............................huRtSpl-.hLPp...s....l+p...hLpphLp-pshoth......c.hl.ppl.phh...................sh.......plS+SulsRY...up.......+h....-p....hhtpl+pspEh...uchhspphu.-...spscho.chlhphlps.hacthhphpEstpss..............thlsplutsh.tpL...ppAuh.tpc+hcp-.....l.......+tphttthsp.........s-ch....s...tp..sGh......ot-hhptl+p.lLG....................................................... 0 24 45 51 +11819 PF11986 PB1-F2 Influenza A Proapoptotic protein Mistry J, Gavin OL anon pdb_2hn8 Domain PB1-F2 is a protein found in almost all known strains of Influenza A virus - a negative sense ssRNA Orthomyxovirus [1]. It originates from translation of the viral polymerase gene in an alternative reading frame [1]. PB1-F2 consists of two independent structural domains, two closely neighboring short helices at the N terminus, and an extended C-terminal helix [1]. Although the protein has originally been described to induce apoptosis, it has now been shown that PB1-F2 more likely acts as an apoptosis promoter in concert with other apoptosis-inducing agents [1]. PB1-F2 promotes apoptosis by localising to the mitochondria where it destabilises the membrane. This will cause release of cytochrome C which activates the caspase cascade of apoptosis through the endogenous pathway [1]. In this way it acts like the Bcl-2 protein family which are physiological apoptotic regulators in cells [1]. 19.50 19.50 19.50 24.30 19.30 19.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.24 0.72 -3.95 24 10057 2009-07-06 12:30:56 2009-07-06 13:30:56 3 1 10020 2 0 3113 0 83.60 71 99.95 CHANGED MEQEQDTPWTQSTEHINIQKRtsGQQTQ+LEHPNSTQLMDHYL+TMsQVsMHKQTVsWKQWLSLKNPTQsSLKTRVLKRWKLFsKQEWTN .MEQEQsTPWTQSTEHhNIQKRGsGQQhQ+LEHPNSTQLMDHYLRhMSQVDMHKQTVsW+.WhSLKsPTQtSL+T+sLKpWK.FNKQtWTN. 0 0 0 0 +11820 PF11987 IF-2 Translation-initiation factor 2 Mistry J, Gavin OL anon pdb_1z9b Domain IF-2 is a translation initiator in each of the three main phylogenetic domains (Eukaryotes [1], Bacteria [2] and Archaea [3]). IF2 interacts with formylmethionine-tRNA, GTP, IF1, IF3 and both ribosomal subunits [2]. Through these interactions, IF2 promotes the binding of the initiator tRNA to the A site in the smaller ribosomal subunit and catalyses the hydrolysis of GTP following initiation-complex formation [2]. 20.20 20.20 21.20 20.30 19.20 20.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.32 0.72 -4.18 243 5391 2009-07-06 13:34:25 2009-07-06 14:34:25 3 51 4886 6 1587 4288 2608 108.80 42 13.02 CHANGED s.....oL-shhsphp...pu..p...hcpLslIlKADVQGSlEAlpsuLpKls..s--VclpllcuGVGuIoEoDVtLAsA.S..s......AlIlGFNVRssspA.+chA-pcs...V-IRhYslIYcll--l+t ......................................spL-shhpphp.....pG..-.....hcp..lslllKADV.QG.SlEAlpsSLtKls......s..-....-.............V....+V....pllcuuVGuIoEoDls.LAsA.S.s............AlIlGF.NV.R.s..s.s..p.A.+c...h.A...-......p....-s......V-lRhYslIYclI--lc........................................................ 0 547 1002 1339 +11821 PF11988 Dsl1_N Retrograde transport protein Dsl1 N terminal Mistry J, Gavin OL anon pdb_3etu Domain Dsl1 is a peripheral membrane protein required for transport between the Golgi and the endoplasmic reticulum [1]. It is localised to the ER membrane, and in vitro it specifically binds to coatomer, the major component of the protein coat of COPI vesicles [2]. It is comprised primarily of alpha helical bundles [1]. It complexes with another subunit of the Dsl1p complex called Tip20 which forms heterodimers by pairing the N termini of each protein [1]. A central disorganised region between the N and C termini of Dsl1 contains binding sites for coatomer [1]. The C terminus of Dsl1 contains a binding site to the Sec39 subunit of the Dsl1p complex [1]. 25.00 25.00 43.40 43.20 20.30 20.10 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.97 0.70 -5.58 7 25 2009-07-06 14:57:28 2009-07-06 15:57:28 3 2 24 3 13 24 2 341.10 49 47.20 CHANGED plhsspspllptltpDP.lh.csss+.psp.....hDhpslLpp-spLsc-LptLppLKhlusLlhEapsNh-lhELENCYYSLQNL+KKh+ssss.htpuhpFQQSlsoalDsLHlpLls+lhcllsphFWpIstsohpFppplphtcDcsp...h-Y-shhpFlppsaFscshlD.ppWhItshshu-.p-pVpppLsoIhpsYlphsplhctlKphlFscphphohps....p+LplspS...Gt.hlscplpSatsLssFl.sslo.cDpttlhhcLGsslssEhhKhlKpNupplLpppss.hKp.ls.lN-pLppLSpcot..sWsYsGp-lpcLLNDcplapsLhLDplhpppl.cIR ..u.LhsNKsEIIcpLhKD....PLll.cs-o+..coNt............phcLDss-LLpREuhLusELphLssLKTh.sLIcEhKTNlplLELENCYYSLQSLRKKh......R..N.NuuaL.KQSFsFQQSluTYVDTLHL-LVooLYcILTstFW+ITEN.SIpFsPoVEaG+D...+V+...IEYDTFM-FVsQQYF..PcGsLDspuWFIhDMs.uDuQEpVRuKLNTIhK-YhsLSpllshIKsuIFhsG+cISYEs..ppshLlFuKS..SS..+GpcslossLsSFcuVsDFhLDsLuh+D++oLuhcLGPLhsTEFTKFVKsNASlILcshc.SPLKsLVSsINsKLs+......Lsu+Sc...sosWoauGcEIpDLLhNKplYaNLLLDKlLEuHIo-IR. 0 1 6 12 +11822 PF11989 Dsl1_C Retrograde transport protein Dsl1 C terminal Mistry J, Gavin OL anon Manual Domain Dsl1 is a peripheral membrane protein required for transport between the Golgi and the endoplasmic reticulum [1]. It is localised to the ER membrane, and in vitro it specifically binds to coatomer, the major component of the protein coat of COPI vesicles [2]. Binding sites for coatomer are found on a disorganised region between the C and N termini of Dsl1 [1]. The C terminal domain is involved in binding to the Sec39 subunit of the Dsl1p complex [1]. The N terminal complexes with another subunit of the Dsl1p complex called Tip20 which forms heterodimers by pairing the N termini of each protein [1]. 30.00 30.00 30.30 30.50 29.70 29.90 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.78 0.70 -5.25 5 58 2009-07-06 15:01:51 2009-07-06 16:01:51 3 4 55 1 39 61 0 287.90 29 38.41 CHANGED DGWDEEWDID..ID-lcppsspt...........p-cIpITpLPctFtpIhpcFEpuscslucupV-spYhtYKFNLLQTuFFAMsosKasN-WaQLYpDMRYlhoc..NscLhRLpELstRhhEsNLshp+KhVppllpcQLspL+cNE+sPsWDsTIcsLLPFIccEllssLp+Itt.-up+aLLsFLsFLaNDCllsNILpWcIISEKNSENLSELIpLllNuT-IsuLsspPcYR+hREKluIluKlLPLHLKDIMEMFYNGDFYLFuTEEIIQWIlLLFADTPLRRDAIDDIpEIRpEupD ....................................................................................sWs-..s.Ws.-......h-.ctt.pptppt........................t...ppplplTplPp.hhtl.ppap...p..sh.sp..s....t.hp.p..p....h.h......ths.......l..L..t..o..hhAh....sps........pa..s....p..s...h.LasDhpYlhp..c......spp..L.chp.......-hs....h......p.h....lp.............pphp.phchV..h........pl..lpt..p..hp.p.hp...c.s........-cp..s...sh..ct.shppl...l.shl.h..ppl....h.p....h...ppl.pt...ppt.....phlhp.hlshl..hNshllppIlphp.ISEhpSppL.......ucLls.......ll..h..s.........s..o..c..l....t..tL..s....p...ps....s....Yh...c...s..hpKFshltplLssHLK-IM-hFYpG-ha..FuT-ELlphIchLFA-osLRcssIs-Ih-hRt........... 0 10 22 38 +11823 PF11990 DUF3487 Protein of unknown function (DUF3487) Assefa S, Coggill PC, Bateman A anon PFAM-B_2242 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 121 to 136 amino acids in length. This protein has a conserved RLN sequence motif. 25.00 25.00 29.00 28.80 23.90 22.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.62 0.71 -4.72 25 230 2009-07-06 16:44:55 2009-07-06 17:44:55 3 2 188 0 64 200 9 119.40 36 94.45 CHANGED -sTlsFLP..cRLNp-PlVhRGhTssEhhlssshuhshG.lllGlsLuhl..suhlt..hlPshhllsslhslhlGushLpRlKRGRP-saLaRp.lph+lstph......lGsppLIh+SGhWolRRo....................tpt ..............tTlpFLPpRLNpcPl.Vh+GLTssEh.hlsshh.us.ssG.hllGlP.luhl..hs..h.h.h.hhPss...s..llsshlslhlGu....th..LpRl...KRG+P-saLaRp.Lp.h.p...lsphh.........lGsppLIh..+SGsWohRRo........tp.................. 0 7 28 51 +11824 PF11991 Trp_DMAT Tryptophan dimethylallyltransferase Assefa S, Bateman A anon PFAM-B_2054 (release 23.0) Family This family of proteins represents tryptophan dimethylallyltransferase (EC:2.5.1.34), which catalyses the first step of ergot alkaloid biosynthesis [1]. Ergot alkaloids, which are produced by endophyte fungi, can enhance plant host fitness, but also cause livestock toxicosis to host plants. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 390 to 465 amino acids in length. 25.00 25.00 28.60 25.50 23.80 22.30 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.09 0.70 -5.24 53 313 2009-07-06 17:00:22 2009-07-06 18:00:22 3 10 118 12 161 321 0 311.10 24 78.37 CHANGED t-pch..WWcpouPhluplLpsAs.YslcpQapaLthapphllPhLGPaP.........ttttpahShl...optGhPlEhShNa........ppstp.sVRhshEPls...hsGTs.tDPhN.ptustchlpcLsph.hssl.DlphacaFtpplslspp..-pttlpppt..................h+s.....pthluhDLc....sup..hslKsYh......hPthKuhsoG.h.ssppllhcul+pls.t......thtsuhphlcsalssps........................sthpsth.luhDh.lsPs...coRlKlYltp.p.sohsplc-laTL.GGRhs..s.ssshcGLc.hl+cLW.pllsl..........sh......................ps.thstss..stphs.......hhhsa-l.pP........Gp.shPp.......splYlPl.h..............uhsDttlApuLspF.acphG..apchApp.YtssL ........................................t.Wh.ptsushhsphhttus.Ys.ptQhphLhhhtphl....lPhLGshP............tt..hahShl...ottusPhEhShsh.............spst..hlRhshEPls..hsG..st..tDshN.ptshtphlppltt................ssh...chphapthtpplhlstp...ptthh.tt...................hhs...............p.hluhDhp.......ssp........hshKsYh...........hPth+uhssu.............s.......tplh...hpulcpl.........thtsuhphlppahtpts...............................ththth..luhDh.lsss...........coRlKlY.hhpt..p.sshsplpchhTL.GGRh.p....s....ts.culp..hlcplW.tlhsh.....t........................t.....htt..t.ths..........hhhsapl.ps........sp..hPp........splYhssh..............s.sDhtlspslspahpphG..ht.t.sp..Y............................................................................... 0 28 75 132 +11825 PF11992 DUF3488 Domain of unknown function (DUF3488) Assefa S, Coggill PC, Bateman A anon PFAM-B_3123 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 323 to 339 amino acids in length. This domain is found associated with Pfam:PF01841. This domain has a conserved PLW sequence motif. This domain contains 6 transmembrane helices. 25.00 25.00 25.10 25.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.05 0.70 -5.79 60 397 2009-07-06 17:02:56 2009-07-06 18:02:56 3 6 379 0 159 408 140 317.90 25 47.85 CHANGED ptlsR..psh..ha.LL.hshshllhPhhtplPhWhsu...lsshsllWRshlhhtph............sh...PsR....................allsslsls...ussslhhshsp.hhuh-sslsLLllhhuLKhlEh+spRDshlllhLuaFllhssFlhsQulhhshhhllslhhlhsuLlsl....pt.stsp.......................h..tpsh+huuplhL.ulPLhllLFllFPRl.sPLWtlP.sstsupTGLSDsMsPGsIupLspSsplAFRs.pFsus............P..spspLYWRGhVLppaDGcsWpts...........tttthtss.stsss.................t.tupslc...YplhhEPopppWLauL-......hshsss....sssths..sDhp.hhsp+P..lspph+YpspShsph .........................................t.........h..hh.lL..hshhhsl.hPh....h.hplPh...hh...hs...lh.s.h...shhhp.h.h.h.hht.ph.............h..P.sp....................hl.h.shlhls...shhsl...h...hp..h.ss..h.......h.......u...h-susslLlhhhsLK...hlEh+st.RDhhhllhlu.h..FhlssshlhsQu.....h....hhshhhl....ls..l....hh..hhss..L.l..tL.......pt..sptp.....................................................h...ttsh+h.ust.lhhhu.l.P.L...hl...lLFlhhPRl...s.....PL.Ws.l........P.......s.s........s...p.....u.p...T..G.LSD.p..hsPGslupLspssplAFRs.pF.sss...............P..s.p........ph.....YWRuhVhppa...D.....G.p.pWpts...................................thts..sts.p....................................................tspslp...Yplh..hEPs.p.ppa....LhuL-........hshsts........ts..s..tht.....s-hp.lhtpps..lsp.h.hpYphpSh...................................................................................... 0 48 110 140 +11826 PF11993 Ribosomal_S4Pg Ribosomal S4P (gammaproteobacterial) Assefa S, Coggill PC, Bateman A anon PFAM-B_3290 (release 23.0) Family This family of proteins are ribosomal SSU S4 p proteins. This protein is found in gamma-proteobacteria. Proteins in this family are typically between 162 to 178 amino acids in length. 25.00 25.00 28.70 33.10 19.60 24.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.91 0.71 -4.47 19 166 2009-07-06 17:08:16 2009-07-06 18:08:16 3 1 165 0 38 120 7 163.90 44 97.59 CHANGED MQlcslt...LINEhQhGspLNpAVcpuRRu-.FuLlLuMLSpDsRDhs.hch.pstps.......pcptLRppFpls...psQsLtustsshphustpAphap....ptGhsuh+LpptLsPEsLshpsp-stsls-plhpNhShpsR++ltsp............pshphs.tphYspLssspptsplttp ........................................MQlHTLDKAslIsElphGpulspAV.pc..GRRAD.FALLLuhhSsDVRDsoPl-plcssss.......oEpsLRpcFELu...pPQ.tLcsDpSSYpluAcQAslFH.........puGhsSAKLuHYLpPEsLsaRPpDTpsLPEEVYtNLSGHpRR+Lus+............psspl.stchYcpLssAhRpsplps.t....................... 0 6 14 26 +11827 PF11994 DUF3489 Protein of unknown function (DUF3489) Assefa S, Coggill PC, Bateman A anon PFAM-B_3545 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 84 to 211 amino acids in length. This protein has a single completely conserved residue W that may be functionally important. 28.30 28.30 29.20 30.50 28.20 27.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.47 0.72 -4.03 26 127 2009-09-11 08:24:30 2009-07-07 12:41:14 3 4 70 0 54 132 26 73.80 42 46.18 CHANGED scsspsRp...................soKQsplIshLp+PcGA...TlspIscsTGWQsHTlRGsluGsl+KKLGLslso-Kss...u.sph..YRIs ............................................tsRt................soKQAplIsMLp+PpGA...TlspIspATGWQsHTVRGshuGshKKKL...GLslsScKss.......uttRs..YRI................................. 0 20 40 48 +11828 PF11995 DUF3490 Domain of unknown function (DUF3490) Assefa S, Coggill PC, Bateman A anon PFAM-B_3558 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 160 amino acids in length. This domain is found associated with Pfam:PF00225. This domain is found associated with Pfam:PF00225. This domain has two conserved sequence motifs: EVE and ESA. 25.00 25.00 31.50 28.70 24.40 23.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.13 0.71 -4.68 12 129 2009-07-07 11:50:29 2009-07-07 12:50:29 3 9 24 0 81 127 0 153.30 50 17.22 CHANGED -Fc+QpppII-LWpsCpVSLlHRTYFaLLF+GD.uDpIYhEVELRRLoaL+pohupss....A..ss.pslolsSSlKALpREREhLuKhhsp+hotEER-pLYhKWGlsLsoK.+RRLQlsp+LW.osspDhpHVcESAslVAKLlGFsEsGpts.KEMFsL.................sFss.s .............pFc+pppcII-LWcsCpVSllHRTYFFLLF+GDsuDsIYMEVELRRLoaL+poaupss............tsh...ss..sh.....ohsSSh+sLpRER-hLu+pMp++LotE.EREpLYhKWGlsLsoK.pR+LQlsp+LW.ocspDhpHlcESAslVAKLlGahEsGps..KEMFuL.sFss.s.................. 0 9 48 65 +11829 PF11996 DUF3491 Protein of unknown function (DUF3491) Assefa S, Coggill PC, Bateman A anon PFAM-B_3393 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 286 to 3225 amino acids in length. This protein is found associated with Pfam:PF04488. This protein is found associated with Pfam:PF04488. 25.00 25.00 25.50 25.40 24.20 23.60 hmmbuild -o /dev/null HMM SEED 936 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.47 0.70 -7.05 8 259 2009-07-07 11:54:01 2009-07-07 12:54:01 3 4 123 0 7 238 0 528.00 32 36.34 CHANGED AVh-ssSRaacPPLospNcpuTVIAGcoPLTVIPlRLL---osERlcpAhuYKDYKIplpGGpGGLTVQIGGAGaYslTusP.uscNsISFRAIPtsFuVsFsLS.+tpQsVPLh+PNG.TclplLKIpQKGFsTIlGSuuGpDpLTGN.+DT+FYlSoGGGslaSGuGpNpYaIP+.LpssLsIsLosNSssHplhLs.po....hEh+ssussLoLI..h.thtsssIal.shDspsphspasssFpV+hsDGITlpAlc+tsst...........spLuVpoCD.ptWptpaPEEsuas-sIlcaL+chsWhLAPcVplhtpcupssYhshp+pLVYp.Pc.aSElplpupcsYpTtVpGssGsoYIl.s..sssspspslcIhLA-DsspPQTlDLSsllPoLVpG+hss....ssSIsLpl..SSs+YslsLolSWpscs..lPppThlplpPpcphpLG-ha+hL..ppssupWssLa+suhlIPcch.slLSlNNTshLMls+spps..sEHlLulENpuslshKlhGpLhSGaIKGu.....Wcs.spshsslp+hsloIPsHshpYLsFcGc.......cNlLF+ShLcutsLcs+spsphplS++pWppYD-IpVp....AToLpLpcFpRYpIuotscsLSRpLMYAQshVpIssRDlslKLFYl.REssGIGAlRLlFKNFFpESM-shscpTLEKEsKPlLsusPcphIssuY+sHLclhLG-cpLNLAplVpEFusop+IlshpcDhssHtllh.psppspsLsllTaTlssps-ssptt.p.p.hahDsh.pcY+.LPhsThs-s.YYLsPsoGDLYlTplls..scspsQAFll+LKsaKppWtcapphllSusHpphpp.l..ssTsLpFsGPElp+hEIDaspsss.h.h.sscllShSshlF.ss-QVlpYsP+hupQFaSh.-YMLW-L+-Rsptupc......A+saDsYLh-ushphhc+NspWKIssslLcaAlG.YYR .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 1 1 5 +11830 PF11997 DUF3492 Domain of unknown function (DUF3492) Assefa S, Coggill PC, Bateman A anon PFAM-B_2107 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 259 to 282 amino acids in length. This domain is found associated with Pfam:PF00534. This domain has two conserved sequence motifs: GGVS and EHGIY. 23.30 23.30 23.40 23.40 23.20 23.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.73 0.70 -4.85 39 322 2012-10-03 16:42:30 2009-07-07 16:23:34 3 7 290 0 121 316 12 260.00 28 41.01 CHANGED s-lsLllEGTYPYVpGGVSoWlcplIpshP-hpFsllhlGuptcchtphpYplPsNVsclcphaLhst.........t..............tppup..ppshpthcplpchhcpsst.......stthhpph......hhtltpss..shs.psFLtScps.......WchlschYpcht.....spsSFsDaFWolR.hhtPLhtl..lupsl.PtsclaHslSTGYAGhLGulhptppspPhlLTEHGIYT+ERcl-lhpupWI...............hshhRchWl+aFctlu+hsYptA-pIluLactNRphQlptGAss- ............................................................................hclsLlhEGoYPYVpGGVSuWspplIpu.hs-hcFslhhluuptpp.s.phtYplPsNVsclcphhL.st..............................................tpttpptthphhpphhphhpp.hts...................stt.h..pt.h......h.tLtptt..thshtshL.tScph..........................ac..hl..pc.....hhpphh.....................spssFs-...haWsh...Rp..hht..sl.h.l..................lspsl..Pps..clhHulSTGYAGllGshhp.t.ppstPhllTEHG.....I.....YsRERch-lhpupWl......................shh+phWlpaFttluchsYptAc.lhsLap.t.spthQhphGAs........................................................................ 0 35 73 104 +11831 PF11998 DUF3493 Protein of unknown function (DUF3493) Assefa S, Coggill PC, Bateman A anon PFAM-B_3788 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 79 to 331 amino acids in length. 25.00 25.00 25.20 25.40 24.00 24.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.27 0.72 -4.09 21 125 2009-07-07 15:28:10 2009-07-07 16:28:10 3 7 83 0 70 127 107 77.70 35 29.61 CHANGED hssptcs+LhsEspsPaRGLR+hlalAhuASuslGhhlhhhRlhu.......Gssl.tslsNLulQlGuluLhshLhhhEpp .....h...pptt+LhuEspuPaRulRpFhYlAFsASuslGshlhls+Llu............ussl.....pslsNhulplGulslhshLahh-pp........ 0 22 52 66 +11832 PF11999 DUF3494 Protein of unknown function (DUF3494) Assefa S, Coggill PC, Bateman A anon PFAM-B_3080 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 243 to 678 amino acids in length. This protein has a single completely conserved residue G that may be functionally important. 25.00 25.00 25.80 25.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.50 0.71 -4.69 43 181 2009-07-07 15:31:23 2009-07-07 16:31:23 3 33 84 5 76 193 13 198.30 33 42.53 CHANGED lLutoul.TssGs..osls.Gsl.GsSP.susu..lsGFs....hss..............Gslasssh.............shspAhsDhtsAYs...........sAsups.ssshstl..ssGsl.uGhTLsP....GlY.....ch.suu...lslos.slTLDutG....sssuVaIFQlu...usLosu.uuu.......plhLs....sGApApNlFW......Vuuu..solGssosFpGsllups...u.Iol....sTG.....uols.GRhLAps..AVTLsssslsp ...............................................................hLutsul.ossss....osls.Gsl.GsSP..uou..lTGFs...shss...........................................sGplauush....................hhspAhsDhtsAYs...........sAAups.......s............s.s..httl......ssGpl..GGhT......LsP....G..lY.....ph..ssu...lslou..slTlDutG......ssssVaIhQhu...usLssu.uus.......pVhLs......sGAp.ApNlFW......pVuuu....solG...s...suphpGslLuts....u.lshsTG..........uo.ls.GRhLups..AVTLssssl..t........................ 0 20 68 75 +11833 PF12000 Glyco_trans_4_3 DUF3495; Gkycosyl transferase family 4 group Assefa S, Coggill P, Bateman A anon PFAM-B_3335 (release 23.0) Family This domain is found associated with Pfam:PF00534. 25.00 25.00 25.10 27.10 24.50 22.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.98 0.71 -4.63 53 208 2012-10-03 16:42:30 2009-07-07 16:34:35 3 4 180 0 79 213 126 168.00 33 40.39 CHANGED ppGHclshls.tpsptshs.....GVcllpYps..pctsstssas.ahpshEspsh+GpushcusppL+pp.GapPDlIluHsGWGEsLFL+-laPcupllsYhEa.aYpspGs.DssFD..PEa....sss...psth+........lRh+Nhs.hhphppuDhuloPTpW.......QpupFPs.a+s+IpVlHDGlDTsthp ..........................................t..upclshls.ppsptth.......GVphltYps...scts...sspsa....hhpshEptsh+GpushcsstpL+pp.GFtPDlIluHsGWGEsLFl+-laPcs.llsYhEa.aYcspGs.DssFD..P-h....shs..pshhp........lRh+Nss.llslpt....sD.hGloPTpW.......Q+spaPs.h+.s.+IsVlH-GlDTshh.t... 0 17 57 68 +11834 PF12001 DUF3496 Domain of unknown function (DUF3496) Assefa S, Coggill PC, Bateman A anon PFAM-B_3407 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 110 amino acids in length. 30.70 30.70 30.90 30.90 27.10 27.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.24 0.72 -3.81 7 174 2009-07-07 15:42:55 2009-07-07 16:42:55 3 17 36 0 67 132 0 100.90 43 13.79 CHANGED h+oQMELpIKDLESchu+hKTuQtDhspoELEcYKpLYl-ElKlRcSL..SscLsKosE+LAElsTKLhlE+cQ..scShhoohsTRPslEsPCVusLp.shshNRhhIPR...-slhl ....................psphELplKDLEscl.S+hK......T......upc-s.............poE.LEcYKphahcELKsppSL..SpKLs...K..o..sc+lA-lsTKLLhEKpp..p+shhoolssRPs.Es.PsVtNLs.ShslsRphhP+t.................................. 0 8 10 19 +11835 PF12002 MgsA_C MgsA AAA+ ATPase C terminal Mistry J, Gavin OL anon pdb_2r9g Domain The MgsA protein possesses DNA-dependent ATPase and ssDNA annealing activities [1]. MgsA contributes to the recovery of stalled replication forks and therefore prevents genomic instability caused by aberrant DNA replication [1]. Additionally, MgsA may play a role in chromosomal segregation [1]. This is consistent with a report that MgsA co-localises with the replisome and affects chromosome segregation [1]. This domain represents the C terminal region of MgsA. 21.00 21.00 21.00 23.60 20.70 20.20 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.93 0.71 -4.57 278 4662 2009-07-07 16:02:56 2009-07-07 17:02:56 3 19 4329 28 1089 3504 745 165.20 43 37.81 CHANGED GSDPDAALYaLARMl-uGEDPhaIARRLlhhAuEDI.GhADPpALtlAhuAhpAhctlGhPEuclsL..AQAllYLAsAPKSNusYtAhspAhpsl+p...ssshsV.PhHLRNAssphh+clG....aGpsYcYsHDhs....sua.ssQpY..LP...-p.lps......t.......aYpPs.ppG..hEpclpccLpplcp .............................................GSDsDAALYa.hARhlcA.GtDPhalARRLlhhAuED...IGhA-PpAhtlAluAh.puh..pc.lG.....hP.E.u..c..l.sL..Ap.All.aLAhuPKSNusYpAhstAhssl.+p....s...s....s...h....sV.PhH..LRsA..hph.h...K....c.LG...................h.G.ps..YcYsHDhs.........sua...ss...........QpY..hP-clts.......pp....aYpPs..spG...hEtpltpphthlt.p....................................... 0 359 698 920 +11836 PF12003 DUF3497 Domain of unknown function (DUF3497) Assefa S, Coggill PC, Bateman A anon PFAM-B_3419 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 213 to 257 amino acids in length. This domain is found associated with Pfam:PF02793, Pfam:PF00002, Pfam:PF01825. This domain has a single completely conserved residue W that may be functionally important. 27.20 27.20 27.60 27.20 27.00 27.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.31 0.70 -4.98 32 930 2009-07-07 16:18:38 2009-07-07 17:18:38 3 137 93 3 416 750 0 222.40 24 14.93 CHANGED tp+hpsscss..p.lspcLtchT.....ps.shauuDlhssscllpplsphhs.pttthhsspccst................................................................pshVpsssNLLc.pstppWcplppscp....suopLLcslEchshhlApsh...hth..pphhhsssNlhLplthhssps.hps..hFPp..........hphstsplplstpshptssp........................................sG....shlshllY+sLGtlLsspssshth...tp.....hpsssp....hlsosllssslspp ..........................................................phhsGcsss..plsppLtchT......ps..sh..auuDlhsolclhppls..s.hhs...p....htth....h....su....s.p-sthp..............................................................pshVpslssLLpscshptWc....chppsct........sushLLcslE-huhsLApsh............hth.........p.ht.....hso...sN...l........h....L.pl.t.h..lssps..hps..hpFPp.................................stsstspl..plst.sshp.ssp...................................................................su......hhlshllY+..sLGthL.ss..p..p..ss..hth........t...............t.stp..................hlso.llsssl..................................................................................................................................................... 0 73 107 227 +11837 PF12004 DUF3498 Domain of unknown function (DUF3498) Assefa S, Coggill PC, Bateman A anon PFAM-B_3438 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 433 to 538 amino acids in length. This domain is found associated with Pfam:PF00616, Pfam:PF00168. This domain has two conserved sequence motifs: DLQ and PLSFQNP. 29.50 29.50 31.40 31.40 28.80 29.20 hmmbuild -o /dev/null HMM SEED 495 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.91 0.70 -5.36 7 291 2009-07-07 16:20:56 2009-07-07 17:20:56 3 9 66 2 120 241 0 410.40 38 42.45 CHANGED LpDlotALsNPsslppps.......spc....+sss........P.susulSoGlQp.hh.pDls..............u.sDhsRLPSPT...........................tsKDlFaVo+sshhpsSPuhsS.S.S-hsEs-h..thssG............+SlShhDLQ-...............spstp........uhs......s...........hstpsspushsss..lpp..........ssph....css.sp..thLhPLSFQNPVYHhusshP...ssst.....cuuopscuosSSH.........................SssE-h.h..........h..pt......c-hst+ss...-hoccphshst...ttp.....slPppsosG.t..R+h.............sututpuph......Pssh.....t.h..........pssshhss..................p.spsG.u+.+QpopSppt-sss.+stuh..tts...........SPss......ssl-RTAAWl.N.N....-...........c...spc-hppsEK.....YppEIshLpE+LRhSsp+L-EYEtRLhsQ-pQhQKhL.EYQtRL--SEpRLRp.Q.-KD.QhpuIIsRLMuVEEEL++DHu-MQtVl.-uKQKIIDAQc+R...hsuLpusss............................RlhsuLs.lpE+a ................................................................................................................................................................LtDlphuLpsPt..p..............t-..h..s............s.h.tu.SuthQt.hh..pshs.........................Dht+L.SPo...........................tspD.aahs+s....hh........psu.po.o.S-hsE.-...ph...ss............+SlShhDLQs.............................stshph..ts..s.......s..s.................................h..t..ss......thsth.....plpp..........ss.h.......css.....sp..thh.PLSFQNP.laphsssh.P...h.Ptu............cuu......t...sh..us.SS+.......................................S.pSE-hth..........................h..ps..................--hspp.s.s.......-hsR.R..phs..p.....ph............h.Pt...Q..so.sG.t.....Rpht............................ss.....s.s...pups............Pssh.........tph............................SoGs.h..u.......................................................t.s......t.G..sR..R.QQ.SpSpc.........p.ss....p......p.....s.h.tt.tts........................................oPss................ss..-RT...s..AWl.s...st...-................ch.p.pcph.p..psc..........................................hpp..-IthLp...-+L+hSs++LEEYEpchhsQEp.phpKllhpYp..AR...LE-...uE.....cRLRpQQt-KD.QhKuIIuRLMsVEEEL++DHs.......c.Mptsl.-sKQ+lIDA......QE+p...IsuLcuANs............................RLhsAlttlpt..................................... 0 16 27 63 +11838 PF12005 DUF3499 Protein of unknown function (DUF3499) Assefa S, Coggill PC, Bateman A anon PFAM-B_3439 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 125 to 163 amino acids in length. 25.00 25.00 27.20 30.20 23.10 22.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.86 0.71 -3.95 18 376 2009-07-07 16:22:24 2009-07-07 17:22:24 3 1 375 0 101 250 99 115.10 47 91.41 CHANGED ss.RRCSRsuCucsAVATLTYVYADSTAVlGPLAshuEPHoYDLCspHAcRLTAPhGWElVRlsst.t.....sstPssDDLhALA-AVREAup..stt.s.s.........................ssshssP........pssR....RGHLRVl..........s-s ....R.C.+suC.t.AsATLTasYu-STAVlGPLAs.t.EPHuaDLCspHAp+lTAPpGWEllRhsu......................sscPssDDLhALAsAVREuGhstst.hhsts.....................................................................ss.....tstthsR........RGHLpVlsD.s..................................................................................................................... 1 33 78 97 +11839 PF12006 DUF3500 Protein of unknown function (DUF3500) Assefa S, Coggill PC, Bateman A anon PFAM-B_3479 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 335 to 438 amino acids in length. This protein has a conserved GHH sequence motif. This protein has two completely conserved G residues that may be functionally important. 27.20 27.20 44.20 29.00 27.00 23.30 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.11 0.70 -5.49 36 209 2009-07-07 16:26:39 2009-07-07 17:26:39 3 7 142 0 112 226 111 297.20 27 81.26 CHANGED stpspsssssshhsAApshLssLsscQ+ppshhs..........h..Dssptps.........W.s.h....sh......RsGlslsphsscQ+phshslLpssLS...spGapcstslhth-.p.......hL...........phtttt.........................hs.tpYalslFGs...Pus...s..psWGapatGHHlulNhsh..ssspl.shoPhFhGupPsthpp........hsG..hp.....sLtpEcshuhpLhpuL....sscQpppAhltpth...............................................sc.hphsushtc.....................h..pGltsupLsssQpchLhsllpp.Ylshhspphsc..tphscl..ptthspsaFuWhGs....htsscsaY..aRlpuPshllEassps..................sstsHlHolhRssss.DaGts ......................................................................h....t..htthhtAupshlssLsspQ+ttsth.......h......ss.phpp..............................................W.s..........h.............+tGlpl.sp..hss..t..pppsshplLpssLS....spGYpcshshhths.p.....hLt..........ph.st.......................................................................hsttsY.hslFGp.....Pus....................s...psWuhphtGHHlslNhhh...sspl.shoPhFhGspPshhst..................htG...hc.....hhttEpphuhpLhpuL....ssppptpAhl.tth...............................................p-.hphsus.tc.....................hs.pGlhsu..phospQpphlhsllpp.althhspphtp..tphppl..ptthscTaFuWhGs....htssc..s.hY..aRlpuPslllEaspps..................sst.HlHolhRs.ss.DaG..t................. 0 49 81 103 +11840 PF12007 DUF3501 Protein of unknown function (DUF3501) Assefa S, Coggill PC, Bateman A anon PFAM-B_3488 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are about 200 amino acids in length. The structure of protein Swiss:Q63J81 from B. pseudomallei has been solved. This protein contains two domains, domain I (1:31, 46:81) is a helical domain, domain II (32:45,82-193) is a mainly beta protein with a beta barrel. According to crystal contacts the proteins probably functions as a dimer. The gene neighbourhood analysis suggests that this protein may be functionally related to rubrerythrin and ferredoxin. The wedge surface between the two domains might be functionally important. The fold of this protein could best be described as a circularly permuted C2-like fold (details derived from TOPSAN). 25.00 25.00 104.10 103.70 23.20 23.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.09 0.71 -4.63 22 127 2009-09-11 15:56:04 2009-07-07 17:34:23 3 2 125 2 46 112 664 190.20 41 94.10 CHANGED hplshsslhs.hpsYu+hRtph+spllthK+pRpVplGstlslhFEsptTlRaQIQEMl+lE+hhc-tsIpcEl-sYssLlPsuusLpATlhIEhsc.s-RcptLscL.hGlccplalclsstt.tlhuls-tDhtRpss-.+sSoVHaL+F-lsss.hsth+s...uss......ltlGsDHssYshpst..lsssstpuLhsDLt ........................plTRssLLo.lEsYuKlRtph+s+llshK+cRtVplGs+lpllFEscsTlpYQIQEML+lE+lh-cpsIppEL-AYsPLlPcGssLKATlhIEhps.scR+ttLs+L.hGIE-claLcVssct..VhAIA.....-EDh-R-su-.KTSuVHFLRF-Lssshhsth+s...Gss...lplGsDHPsYshpst..lsssltsoLsuDL..... 0 14 30 40 +11841 PF12008 EcoR124_C Type I restriction and modification enzyme - subunit R C terminal Mistry J, Gavin OL anon pdb_3evy Domain This enzyme has been characterised and shown to belong to a new family of the type I class of restriction and modification enzymes. This family is involved in bacterial defence by making double strand breaks in specific double stranded DNA sequences, e.g. that of invading bacteriophages. EcoR124 is made up of three subunits, HsdR, HsdS and HsdM. The R subunit has ATPase and restriction endonuclease activity. This domain is the C terminal of the R subunit [1]. 21.60 21.60 21.70 21.70 21.40 21.50 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.93 0.70 -5.20 79 1157 2009-07-08 08:20:11 2009-07-08 09:20:11 3 9 1042 10 158 990 58 263.60 21 28.79 CHANGED hlLhcsapchhcpapc............slp-Lpphhsss.......thspl..ts-psccpFlchFpch.+htstLpsas-.as.........................sptpht................hsppphp-acutYhsltpcl+cpp...ppppt.........s....pDlcF..El-LlcsscI...........NhDYIhpLltch..........ppspppppt..hcplhchlps..s.thcsccclltpFlpplph...........ph.sppslpctappatppcppcplpplspppsL....stctlpphlsshh...tptph..ptsplsc......hls.hphth......tpphp.....hcpplhpcltshlc+a ..............................lLhcsac-hhptapc...........................shpcLpphhsss....................hspl...tsEppp+cFlchFtph.+hhphLpsa--.ap...............................s.ttht........................hsppph..p-Yc..utY.slhcpl+...cppt..pppp.................sh...sDl..sF..El-h.h+stp.I...........NhsYIhpLltph.....................pppt..pppp.hc.....plcc.....hlpp........s.p.+tcccllppFl.p.php..................ph.psp...s.ltpta.pah.ppcpppchpthhpcpsl.....sppth+phhsphh...tt..hp.p.tt.tsplsc..........h...phhh.............phhp........h+ptlhptltthhcc...................................................................................................................... 0 53 104 135 +11842 PF12009 Telomerase_RBD Telomerase ribonucleoprotein complex - RNA binding domain Mistry J, Gavin OL anon pdb_2r4g Domain Telomeres in most organisms are comprised of tandem simple sequence repeats [1]. The total length of telomeric repeat sequence at each chromosome end is determined in a balance of sequence loss and sequence addition [1]. One major influence on telomere length is the enzyme telomerase [1]. It is a reverse transcriptase that adds these simple sequence repeats to chromosome ends by copying a template sequence within the RNA component of the enzyme [1]. The RNA binding domain of telomerase - TRBD - is made up of twelve alpha helices and two short beta sheets [2]. How telomerase and associated regulatory factors physically interact and function with each other to maintain appropriate telomere length is poorly understood. It is known however that TRBD is involved in formation of the holoenzyme (which performs the telomere extension) in addition to recognition and binding of RNA [2]. 21.20 21.20 22.20 22.60 20.60 20.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.70 0.71 -4.25 54 413 2009-07-08 09:22:28 2009-07-08 10:22:28 3 9 237 6 156 402 1 125.40 26 14.05 CHANGED ppVstFlhshlpp....ll........P.ph.aG.........spcNppthh.pplppalph.t+aEphslpclhptl+lschp.WLtttttsppht.......................chpppppllt.......palhWlhp.pllhsll+shFYlTE....p..ttppsclhYaR+slWpplsp.slsp..htp ...................................pVhtFlhthlpp....ll..............P.ph.hG..........................sp.pN.pphhh.pplppalpl.t+atphslppl.hpth+l..p.........sht.WLp.t.th..tt.p.h.............................................................................phphpppllt.........palhW.lhssh.ll.LlpshFYlTE....s..thp+p..plhaaR+slWpcL.p.tltph..t.......................... 2 52 83 123 +11843 PF12010 DUF3502 Domain of unknown function (DUF3502) Assefa S, Coggill PC, Bateman A anon PFAM-B_3448 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 140 amino acids in length. This domain is found associated with Pfam:PF01547. 47.30 47.30 47.90 47.30 47.00 46.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.51 0.71 -4.19 18 679 2009-09-11 16:21:44 2009-07-08 11:40:33 3 5 494 0 91 398 5 134.70 42 27.71 CHANGED GIEGpHY-+lscs................pI+hhss..psY.shss.Wsh..GNhhl...phshEs-s..D+W-pacchsppApsSPhLGFpFDsssVcopIuulsNVhpcapssLhTGoVD.P-ctlschhpKLcsAG.lDKVhcEhQ+QLD-a.ptpsp .....................G.EGcpaEKlssp................+l+....lL.....c....u...h...p..s..s.......h.....c.h.u.u....Wst.....GNshI....l.Yhp...Es...s...s-....p...plcp.p.c....c......h.tc....A.+..p.S.P..hLG.FhFsocsVKoEIoulsNlhppatsulsTGTV...D..P-cs...lPcLhcKLKsuG..h-KVhpEhQKQhDEa.lpsp........ 0 56 77 82 +11844 PF12011 DUF3503 Domain of unknown function (DUF3503) Assefa S, Coggill PC, Bateman A anon PFAM-B_2686 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in viruses. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF00271. 25.00 25.00 54.30 52.00 21.40 21.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.18 0.71 -4.53 12 81 2009-07-08 10:43:55 2009-07-08 11:43:55 3 4 41 0 0 79 0 166.00 60 25.25 CHANGED M-NcLPsIFaFPNCVslFPacYSQpEh-cMppp-+ctFShAVFPlIKHRWpcuallhc.spsaKLssE.pp.ph..p+ls.ssl.s.....Psplshph+pYhhss.h+IoFECYSYLpCpphs.clpshs-...llRGLlEGGNpLpIFSssh.....G.phssoIGIhGNspPFsKlPLtSLpP .......MEKNLPDIFFFPNCVNVFSYKYSQDEFSNMSc....sER-uFSLAVFPVIKHRWHNAHVVKH.KGIYKVSsE..A.RG..+KVSPPSLGK......Ps+INLouKQY.IYSE.+sISFECYSFLKCITNs..EINSFDEY..ILRGLLEAGNuLQIFSNSV.....G.KRsDTIGVLGNKYPFSKIPLASLTP................ 0 0 0 0 +11845 PF12012 DUF3504 Domain of unknown function (DUF3504) Assefa S, Coggill PC, Bateman A anon PFAM-B_2196 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 156 to 173 amino acids in length. 29.70 29.70 29.70 30.20 29.60 29.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.91 0.71 -4.49 23 439 2012-10-02 14:09:14 2009-07-08 11:46:23 3 21 79 0 263 356 1 158.10 35 17.41 CHANGED uhlh.Scl-E-hLWps+p.LGspSPhsLLpoLhaasT...KYFsl.+Tl-pHtcLuFuplh+ps+pt.........tsc.shlRahssp.t.pstpsh...ucppc...................ptt.h.hp.h-ssssPh+CPVpha-hYL.Kps..slcptpssFYLpPc+sssssuslWYsppslucpsLpphls+lhhs .......................................................................lh.S+l-E-hLWcs.KQ.LGsaSPhsLLsTLhaFNT...KaFsL.+Ts-pHhcLuFuplh+pp+p....................tsc.shlRah...s.s.h....ttp.s..t..p..t........scpp+...................ppt.h...hE.hENsp.NPh..RCPV+.....la-hY.LuK.sP..psl+pRs..DsFYLpP......E...p.....s.s...........ss........s.S.....P........l....WYospslscspLppMLsRlhh.......................... 0 90 109 169 +11846 PF12013 DUF3505 Protein of unknown function (DUF3505) Assefa S, Coggill PC, Bateman A anon PFAM-B_2856 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 247 to 1018 amino acids in length. This region contains two segments that are likely to be C2H2 zinc binding domains. 26.60 26.60 26.60 26.60 26.50 26.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -10.86 0.72 -3.82 16 197 2009-09-11 05:54:46 2009-07-08 11:50:44 3 15 33 0 172 211 0 103.00 23 12.06 CHANGED pLFha.spapVhICRp..CcauVhP..pplpsHL+t+H+ph........psttt.....plppsl.........psas..tpcspssphPs.s.s.PlPtLPla.sGhtC..s.spCpYlspshpsh+cHhpppHs .........................................h..ph.llICpp..Cpaulhs.....splpsHLpp.cH+th.............tt.tp..tltptl..............ppht......l..htss....p...s...l....p...h..Ps....t.stPlstL..shh....sG.ht.C.................stC.ta.hstshpshppHhpppH................... 0 42 80 167 +11847 PF12014 DUF3506 Domain of unknown function (DUF3506) Assefa S, Coggill PC, Bateman A anon PFAM-B_3293 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 131 to 148 amino acids in length. This domain has a conserved KLTGD sequence motif. 39.70 39.70 41.00 39.80 38.80 39.00 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.79 0.71 -4.35 12 152 2009-07-08 10:53:22 2009-07-08 11:53:22 3 10 108 0 101 146 1 130.20 41 25.46 CHANGED pslosaspl-...hoPoppcPapGlalGsausaGsEhlhLpp+.Gp.ptscssppp.................psh.hhthlEAlKLTGDPNVPtGploFhAc.IGcpshlp..cc.sha....Gs..thh+upG+lAs.GF+sscal-u-Llhlssc ...................................p.t.otaspl....hsssspcPapGlaVGsausaGsEhl.lp......p+.sph.......tps.................................................................ssh.ahthlEAlKLTGDssVPtGploFhAc....lGp....s.uhlp..pc.shas.........Gs...thhKupG+lApsGF..+ss+al-ucLlllssc................... 0 23 58 84 +11848 PF12015 DUF3507 Domain of unknown function (DUF3507) Assefa S, Coggill PC, Bateman A anon PFAM-B_3482 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 180 amino acids in length. This domain has a conserved ENL sequence motif. 25.00 25.00 25.30 25.80 24.60 24.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.04 0.71 -4.78 10 67 2009-07-08 10:56:31 2009-07-08 11:56:31 3 2 65 0 45 66 0 169.00 29 11.77 CHANGED lsslph.htols.sphhstps.hp.tpp.ppcWspl...FspuslFpGhD-llaGphlshVYccscos+lssh.lsKaGlosacNlslsspSRFaPAsENLpPcYpcSsVRRsLAlohLKp.......YsLLsssshphl.sphs...saDpTaAG-LAsshpLlss.psPp-hGppLlphGLLQs..+slpShllDVV .............................h......................................tphshapu.Dsll......aG.hl.....sss.pssp.o.....s+lpuhllsphGhpsasplslSssStaYsAlpNLs.ccQpspVp+uLAVshLKh.........aspL.ssst...hppl.....t....s.........phs.....ptaDEhaAG-LAspM.phlps.t.........s.........chspplh..GhhQp..+h.l.sh.lDVl....................... 0 7 20 39 +11849 PF12016 Stonin2_N Stonin 2 Mistry J, Gavin OL anon pdb_2jxc Family Stonin 2 is involved in clathrin mediated endocytosis [1]. It binds to Eps15 by its highly conserved NPF motif. The complex formed has been shown to directly associate with the clathrin adaptor complex AP-2, and to localize to clathrin-coated pits (CCPs) [1]. In addition, stonin2 was recently identified as a specific sorting adaptor for synaptotagmin, and may thus regulate synaptic vesicle recycling [1]. 96.90 96.90 125.00 124.30 96.60 96.30 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.26 0.70 -5.29 2 50 2009-07-08 10:57:23 2009-07-08 11:57:23 3 3 30 1 27 46 0 287.10 77 35.28 CHANGED MTTLDHVIATHQSEWVSFsEEP.FPs..pGGTEEHhPGLSSSs-pSESSSGENHVVDtGSQDhSHSEQDDSSEKMGLISEAASPPGSP.QPsPDLASAISNWVQFEDDTPWuSTSPPHp...ETALsLTMPCWTCPSFDSLtRCPLTSESSWTTHSEDTSSPShusSYTDLQLINsEEQsSGpASGsDSTDNSSSLQEDEEVEMEAISW.AuSPAMNGHP.APPVToARFPSWVTF-DNEVuCP.PPVsSPhKPNTPsuAoshPDVPaNShGSF.KRDRPKSTLMNhsKVQKLDISSLNRsPSVhEAPPWRATNPFLNETLQDVQPSPINPFSAFFEEQER ................MTTLDHVIATHQSEWVSFsEEP.FPs.SpGGTEEHLPuLSSSsDpSESSSGENHsVDGGSQDhSHSEQDDSSEKhGLISEAAS.PPGSP.QPsPDLASAISsWVQFEDDTPWuSTSPP.Hp.s.E.TALsLThPCWTCPSFsSLtRCPLTSESSWTTHSEDTSSPShusSYTDLQLIsAEEQsSGpASGADS.TDNSSSLQEDEEVEMEAISWQASSPAMNGHP.AsPVTSARFPSWVTFDDNEVSCPLP.PlTSPLKPNTPPsASVhPDVPYNShGSFKKR-RPKSTLMNFSKVQKLDISSLN+s.PSloEAPPWRATNPFLNETLQDVQPSPINPFSAFFEEQER.... 0 1 2 6 +11850 PF12017 Tnp_P_element Transposase_37; Transposase protein Assefa S, Coggill PC, Bateman A anon PFAM-B_3357 (release 23.0) Family Protein in this family are transposases found in insects. This region is about 230 amino acids in length and is found associated with Pfam:PF05485. 34.00 34.00 34.30 34.10 33.30 33.90 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.61 0.70 -5.13 6 118 2009-07-08 11:00:53 2009-07-08 12:00:53 3 6 45 0 59 139 0 205.90 34 36.85 CHANGED uNuSTQTEssllsp....ENcoLRpKIRsLEpEl+pLRQQLE-uppLEpSLspIFT-TQIKILKsGGKRusFNSsDhSsAICLHTAGPRAYNHLY+KGFPLPSRsTLYRWLSDV-IpTGsLDVVIDLM-N--MD-ADKLCVLAFDEMKVAAAFEYDSSADllYEPSsYVQLAhVRGLKKSWKQPVFFDFsTtMDsDTLpsIIpKLH++GYsVVAIVSDLGsGNQ+LWpELGISEp.K ..............................................................t....p.hcp....plp.p......hchchp....pLc...p....pl.....c.p...........p....p...lc.....c....sL...pph......Fop.s..Q.l.+..h...L.......p..ss.s...p.+..s.p.ass--hutAlsL.+...s.s.uP+uYpaLh.c.+.saPLP....SppTLh+a.lsslplp.G.hh..c.s.llcl....h....c..s..c...t..h...s...ptD+..lC....lLsaDEMp......lssth-YD...............s..........s.......t..........D.....h........l............h...........c.........s....u...sa.....lt.....ls...hlRGlpcsWKQPl...aa.....sF...s.o...t.Ms....s..ssL....p....p....I....l....p....+....LpphGh.VlAlVSDhGssN.phhpcLGl................................................... 0 23 28 57 +11851 PF12018 DUF3508 Domain of unknown function (DUF3508) Assefa S, Coggill PC, Bateman A anon PFAM-B_3527 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 280 amino acids in length. This domain has two conserved sequence motifs: GFC and GLL. This family is also known as UPF0704. 25.70 25.70 26.20 26.10 25.40 25.60 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.70 0.70 -5.42 10 142 2009-07-08 11:03:06 2009-07-08 12:03:06 3 5 95 0 95 138 2 241.40 28 45.54 CHANGED ulLccslPsosp+I-ppLpsspctshcYTulLEchst....ssthspcl...h..LK-ALYNlRQaEsFLphlLoDlhssApp......V.EhhppchtuplEpLKpsl+sKoAVPTsQVF.....PhFsALupLWsuLp-EphLlssLsNLhspLpsFlssacLhhP..tpsh.slLsstsV+oDtsRhcp.........shtcclslu-apspEa...LhPEsTAsFpcL.lQYpGFCuaTlss+DGLLLPGNPulGlLKa+-KaYsFuo+cAAh.cFupc.P-pYIstltEpA++ssELIpLLcLcQpFpol ............................................................s..lp.s...h.ptlptplp...tstp.shpa.sullcphht...............sshhttph...h.....hp-tLhphRQaphalphlhpsl.thtpp.......l.p.hhpphtsthpplp............s..........l..........p...........+s..........ulsotpVa..............PhFhtLuplWs.shpc.Eh.hls.lsplhtplpsh..ht.s..pt.hhs....t.h.h.thL.p.....th.lps.c..p.chpc..................................p..h.....hphs-h...pp....ph.........hhs..ps...sssh...p..c...h..l.papGFCuhohstp....s.G...LL......l...........PGs............P...sl..Gllpa....pp....+hYsFsoc-ush.pFupp.P-palthlhchs+ppscLlpLlpLppph...h................................... 0 37 49 74 +11852 PF12019 GspH Type II transport protein GspH Mistry J, Gavin OL anon pdb_2qv8 Domain GspH is involved in bacterial type II export systems [1]. Like all pilins, GspH has an N terminus alpha helix [1]. This helix is followed by nine beta strands forming two beta sheets, one of five antiparallel strands and one of four antiparallel strands [1]. GspH is a minor pseudopilin; it is expressed much less than other pseudopilins in the type II secretion pilus (major pilins) [1]. The function and localisation of minor pseudo-pilins are still to be fully unraveled [1]. It has been suggested that some minor pseudopilins may assemble either into the base or the tip of pili, or both. They function as initiators or regulators of pilus biogenesis and dynamics, and/or as adaptors between various pseudopilin component and other members of the T2SS [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.61 0.71 -3.93 214 1652 2009-07-08 11:47:00 2009-07-08 12:47:00 3 3 953 3 399 1257 143 122.00 15 68.53 CHANGED ssppLtsslphARscAlp..p.spslslsstss..stth................tt...Wssuhhlhht..tssstt....t...........................thssshphshs...........................................ssplpF..s.ssGhs..........................tshs..ht.......hsssspspt..............l.hl.ss.....sG .................ApclhstlphA+scAlh..p.s...ps.lslphs...ss..uhph..........................tsp...W..pss.htshhs....ssstt..........tth.h........................h.ssssththp.t.....................................................sspltF..p..spG.s................................sshs....lp.........htssstt.....................th........................................................................................... 0 100 222 319 +11853 PF12020 TAFA TAFA family Assefa S, Coggill P, Bateman A anon PFAM-B_3899 (release 23.0) Family This family of secreted proteins are brain specific and thought to be chemokines [1]. These proteins are found in vertebrates. Proteins in this family are typically between 94 to 133 amino acids in length and contain a number of conserved cysteines. 25.00 25.00 25.90 41.70 16.90 16.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.24 0.72 -3.88 3 241 2009-07-08 13:15:59 2009-07-08 14:15:59 3 2 38 0 140 188 0 84.20 68 71.79 CHANGED +sGTCEVIAAHRCCNKNRIEERSQTVKCSCLPGQVAGTTRA+PSCVDASIVlQKWWCcMEPCLEGEECKVLPDpSGWSCSSGNKVKTTRV ..............sGTCEVlAhHRCCNKNRIEERSQTVKCSChPGQVAGTTRApPSCVD..............A.SIVhpKWWCcMpPCL-GE-CKlLPDhoGWoCoo.G.N.KlKTT+l..................... 0 5 20 68 +11854 PF12021 DUF3509 Protein of unknown function (DUF3509) Assefa S, Coggill P, Bateman A anon PFAM-B_2180 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 92 to 110 amino acids in length. This protein has two completely conserved residues (G and R) that may be functionally important. 25.00 25.00 25.20 25.00 24.50 24.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.93 0.72 -4.24 19 201 2009-07-08 13:16:45 2009-07-08 14:16:45 3 1 60 0 39 130 2 93.10 31 94.84 CHANGED cph.phls-sFss.YpVshu..csDGullLTLpsssG.hlh+RhlotsQhs-tppLppllsul+R-LAlctGchs.plluthppt..............schts..........httt .....p..phls-sFss.YpVshp.sRsDGulLLTLpsppG..hlhcRslossQLs-.ppLppllpulRR-LAlptGch..plluthppt...........tp.........h.................... 0 2 6 26 +11855 PF12022 DUF3510 Domain of unknown function (DUF3510) Assefa S, Coggill P, Bateman A anon PFAM-B_2857 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 130 amino acids in length. This domain is found associated with Pfam:PF06148. 29.10 29.10 29.70 30.10 23.90 28.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.45 0.71 -4.02 21 198 2009-07-08 13:21:08 2009-07-08 14:21:08 3 8 161 0 133 191 2 126.70 34 17.21 CHANGED lpplhppsh.ptLcplpslsphYRhTN+psPop......sSsYVssllpPLpsatptt......thlspthhcchlpcllsplocpYhptss-llsoVcKpp-SLpRl+ptptpsuss...............shsD-DKI+hQLhlDl ..............................ppLscpCh.p.L+p.spslPphYR.tTNKp..lPop.............sSsYVssh..LcPLpp.hhstp............................pshltpthh....pphlptslspsTccYhphls-VLsoV+KhEESL..........+..RLKpspppssussss................................sssuhSD-..DKIRlQLhLDV............................. 0 56 76 109 +11856 PF12023 DUF3511 Domain of unknown function (DUF3511) Assefa S, Coggill P, Bateman A anon PFAM-B_3314 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain has two completely conserved residues (Y and K) that may be functionally important. 25.00 25.00 33.70 32.60 17.70 17.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.32 0.72 -4.64 14 157 2009-07-08 13:24:58 2009-07-08 14:24:58 3 4 20 0 99 137 0 46.60 52 35.52 CHANGED hthsDPEhKR+RRVAuYKsYuVEGKlKuSlR+ua+WIKs+ho....p........llcG .......hsDPEh+R++RVAuYKsYuVEGKlKuSlR+uF+WIKs+ho....plhaG................. 0 12 58 82 +11857 PF12024 DUF3512 Domain of unknown function (DUF3512) Assefa S, Coggill P, Bateman A anon PFAM-B_3525 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 231 to 249 amino acids in length. This domain is found associated with Pfam:PF00439. 33.70 33.70 37.20 34.40 28.00 30.30 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.52 0.70 -5.08 13 193 2009-09-11 13:40:57 2009-07-08 14:46:16 3 5 85 0 107 170 0 213.90 36 37.66 CHANGED ppKc.s....+p.h+ht........cscspspshssshstE+lLtplcpsucEAps+ls++hssuKhGFLR+psDGoTohslltss-sputEt..sp+sVsLGshsGKLpsGsssLtGFKED+RNKVTsVs.LsYGsaoSaAPpaDSsFuslup--oDLlYuTYG--oGspsAhSlp-FlcssspaupthscsLLDhlTsG-HS+oltplppppppp.p.sc......................pspstttppusshlDatuL+oloslGlDsphL .........................................................p.p....+p...phh........ps.hptp.hsssh...E+..t.lcphscE.uts+ls.phhssuphta.++psDGohh.hsllp..ss-s.st-..........thpsVcLu.hou+L.s..Gh..s.oL...G.FK..-D+RN+VT...lp.h..YusaoSaAPpaDSsFuslsp--o-LlYSsYG-.......-oulps.uhS..lpEFltss..ssYsht.hsDsLLD.lTsG-HS+ol.plcppp..s.t...c.................................pht.s.h....tss...s..chhuhcshs.shulshphh....................................................... 0 26 34 66 +11858 PF12025 Phage_C Phage protein C Assefa S, Coggill P, Bateman A anon PFAM-B_3530 (release 23.0) Family This family of phage proteins is functionally uncharacterised. Proteins in this family are typically between 68 to 86 amino acids in length. 25.00 25.00 61.90 61.70 24.50 21.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.26 0.72 -4.02 3 67 2009-07-08 13:51:34 2009-07-08 14:51:34 3 1 54 0 0 34 0 65.40 71 83.07 CHANGED sLSL+sSRSSYFATFRHQLslLoKTD.ALDEEKWLNMLGsLLKDWFRYE-HFVHG+cSLlDILKERGLL sLSL+sSRSSYFATFRHQLslLoKTD.ALDEEKWLNMLGshlKDWFRYEsHFVHG+cSLlDILKERGLL 0 0 0 0 +11859 PF12026 DUF3513 Domain of unknown function (DUF3513) Assefa S, Coggill P, Bateman A anon PFAM-B_3541 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 192 to 218 amino acids in length. This domain is found associated with Pfam:PF00018, Pfam:PF08824. This domain has a conserved QPP sequence motif. 25.00 25.00 28.20 28.20 21.60 20.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.38 0.70 -4.78 14 329 2009-09-16 12:09:40 2009-07-08 15:01:52 3 10 100 3 138 289 0 197.70 44 27.92 CHANGED ssst.p.phsttthhEDYDYVHLpup-php+pptplhc+t..............................................t.pp.EpLcppstcstpps...............oPsp.hs.........lsspD+pLLhaYucQCptahssLhpAlsAFhuSlsssQPP+lFlsHuKhVIluAHKLVhlGDTLsRpspst-lRscVhpsSstLCphLKslVLuTKsAALpYPSsuAlp-MsscVpcLsppsppF+shLt ....................................................................................s......p.sttshh-DYDYV+LpG..+-c..hp+ppcplh-pt.sh.....t.......................................phppachLcp.phs.p.s.h-ps..h.tt...........................................pPspshs..............sssluspD+QLLhF....Yh..-QCcsahssLhsAlDAFao.........sVussQPP+...........IFVAHSKFVILSAHKLVFIGDTLsRpsput...DlRscVhptSshLC-hL+slVhsTKtAALpYPSssAhQ-MVcpVp-LuppsppF+psL.h........... 0 22 34 72 +11860 PF12027 DUF3514 Protein of unknown function (DUF3514) Assefa S, Coggill P, Bateman A anon PFAM-B_3570 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 368 to 823 amino acids in length. 25.00 25.00 27.30 33.70 16.20 20.30 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.71 0.70 -5.14 26 49 2009-07-08 14:03:17 2009-07-08 15:03:17 3 2 7 0 32 48 0 222.90 18 45.07 CHANGED lhh+s-l.shLpph.ssphchsshllKluhlhp.hhhhhst.ph...sptlhhs.t.phhc.phlD.lLhclhssspss......thshpthhphhhshptpph.spshthasssF..appllslhpcll.ts+hsssh......h...pllshlllSCslplGsMY......t.hthhsptsss.tp......................ptthhshhshplhpshtahYssssp...slphl..sslshPphllchCSpph.hsplpppltpsph..sFslhhpclspplsphlspps ......................hhp.ph.phl.p..hpp.phthhllKhshl.p.hh.hhsh.th...sp.lhhs.t...h-..hl-.lLhplhsh.pps.......h.hphhhthhhshhtpth..tshthhsssF......Ypplhplhtplh..h+.sps.......h....plhshlllussh.lGtha........hhhhtp.tht.tt..........................t.h.hshhshplhhshhahYssssp.....l.hl..sshshPthllphsSpth.hs.lpp.htpsph..sFslhhpslspplsphh.p........ 0 9 9 9 +11861 PF12028 DUF3515 Protein of unknown function (DUF3515) Assefa S, Coggill P, Bateman A anon PFAM-B_3590 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 166 to 214 amino acids in length. This protein has a conserved RCG sequence motif. 25.00 25.00 37.50 32.40 20.20 19.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.28 0.71 -5.00 26 415 2009-07-08 14:05:35 2009-07-08 15:05:35 3 2 352 0 114 292 45 149.20 30 92.10 CHANGED tssllussslsslLssAusht.....tslulushPAPpAssst.CpsLhssLPppLushpRuthh-PsstGsAAW...Gss.tsllLRCGl-p.Psp.hsssush.sV..cs...VpWhtsssts...sus.........................so.has.VsRsshVtlTls.sss.......Gs.oshpsLoslhsss......hthtshpsss ..........................................................h.........hhhhhhhsh..........................................s..sh.s.sss....ust............Assst...Ctslh..ssLPpp.l.....u.....s....ht+....t...s..hh...ps...s...ss.usA.......uW.....Gss...sl.l.LRCGV-p.Pu.....t....h.........p.........s..s.......ush.ps.........V.........ss...........VsWFp.psss.....G..............................................ts.hao.lsRtshVtlTlP..ss.t.......us..tshss.Ls-lhsshhths..t.......................... 0 34 87 110 +11862 PF12029 DUF3516 Domain of unknown function (DUF3516) Assefa S, Coggill P, Bateman A anon PFAM-B_3601 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 460 to 473 amino acids in length. This domain is found associated with Pfam:PF00270, Pfam:PF00271. 25.00 25.00 31.60 30.80 21.60 20.50 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.60 0.70 -5.68 23 417 2009-07-08 14:07:34 2009-07-08 15:07:34 3 5 397 0 120 382 22 445.70 48 54.35 CHANGED APEH.lENt+thAKA....GDDP...KKpRKlhRKKsPE.GaVsWucsTF-RLlpAcPEsLsSpFcVopuMLLNVl..........uRsGDsasuh++LLp-NH-sRspQp+hl+RAltlaRuLlsAGVVEc.l..-pPD....spGR.psRLTlDLQtDFALNQPLSPFALAAL.ELLDsESsoYALDVlSVlESTL-DPRQVLhAQQcpARGEAlAtMKA-GlEY-ERMtlLEEVTaPKPLtElLcsAa-hYRpuHPWlsDapLuPKSVVRDMaERAMTFs-aVspYGLuRSEGlVLRYLuDAY+AL+pTVP--tRTEELpDlI-WLGELVRQVDSSLLDEWEpLssPss...stssp........shsspsPpslTANpRAF+VhVRNAhFRRVELsAtcch.stLupLDs......tsG....hssssWp-AL-sYa-EH-cIGTGPDARGPpLhhl-cs..........s......................c......hWpVRQllcDPsGDHDWuIsApVDLsASDEAGcsVlcssslsp ..................................APEH.IENtKAlAKA........GDDP.....KKh+K...l....tRKKsPE.GFVsWuEpTFp+LlpupPEsLpu+h+lTcuMLLNll............sRs....G.....D...shtshc+Llcsspp......shtppp+hhcRAlpIacoLlsusVVc+l......-pP.s...........ssG.R.....phtlsh-LpcDFALNQPLSsFALAAl.-LL......D.....P......-S......s.................o......Y..ALDVlSVlEATL-cP+plLhAQp+pARGEAlAtMKA-Gl-Y-ERMttL--loaP+PLc-hLpsAF-hYtpupPWls-htLpPKSVVRDMhE+AMTFs-alupYtlsRSEGlVLRYLoDAYRuLcpTVP-st+o.-ELpDlIpWLGElVRplDSSLlDEWEpLssPts........ttspp.........................spsspslTuNpRAFpVhVRNAMFR+Vp.LhAh-ch.-tLGt...L-s.......................thu...........hsss.sWp-sLssYaDEa..--lssGs-ARuPtLhhl.-pp................s.....................................c...hWpVRQI..lcDPsG-HDWuIsApVDLsu.o.D-sGcsVlcshsh.s.................................. 0 42 88 112 +11863 PF12030 DUF3517 Domain of unknown function (DUF3517) Assefa S, Coggill P, Bateman A anon PFAM-B_3933 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 340 amino acids in length. This domain is found associated with Pfam:PF00443. 25.00 25.00 39.90 27.20 23.30 22.90 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.04 0.70 -5.89 7 66 2009-07-08 14:28:27 2009-07-08 15:28:27 3 3 65 0 54 70 0 333.30 29 14.32 CHANGED DYFEhLh.LAsFGshEsthlLppGFLLKCLEIlWLD+EDSKRL+RQYhsYh+LlEKGR+FSHRKLh-LLuhLLs+IDhTssPos-DcR+sLPsG+aoLThoEsshlRPL.GRspELslL+KlLpQ.ssPpAsRsIVulhlDAEPEAGLhDPICKVLE-GLRluP.AcLCAPFLEATLIFCpRSPDc-RIVuLIDaVAKGV-SINDSGG+EHLAFFTulhssRNERLuL-EsWFLSpllD+IPDWAPTLLhaPDRsVRNMTh-hL+pILFosEAp-hs..-DaQsRau-lAKELV+ASl-+LRKsaLssPGssVEs+sl..Eol+sVI-HCLsoYF.sDSE-DQ- ......................................................................................................DYFthlltlAphGthEsthlLpc.GFLh+.sL.cllhhD..pcs......o.p...cLpc.pYssh.h+.ll..p...K...sRphS.....atp.LhpLLshLlsp....l....D....h....sh..s.........P....s......s....s....s..pp.....c.....p....h....t.....s.....s..+...a..........sl...........Ths..E.spll.p.s.h.s+p.p..s.h.hhlcKlLp.ppsstso+....pIlshllst..p...sph..sL..t..-sl.h+sLccGlpspP..upL.ssPFLc.A..sLlFCptusshsplpsLIcalscsscS...lss...u......s......GcpaLsFapslh.ss.c.Npp.h..ttsc..t.hhhstl.l-plPcWAPsLLtasDc...sVRssT.caLpplLF...........s........t.....p.h......p....-.h...s...-.....-.......h......pp......h..h....pc....h....u+pL......upsClp.....hL+csals..s.s...s..p..l.pspsh.pslphVlppChthYF.sptp....................................................................... 0 7 24 41 +11864 PF12031 DUF3518 Domain of unknown function (DUF3518) Assefa S, Coggill P, Bateman A anon PFAM-B_3830 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 260 amino acids in length. This domain is found associated with Pfam:PF01388. 40.20 40.20 58.30 43.20 29.50 23.90 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.77 0.70 -5.58 6 191 2009-09-14 14:01:10 2009-07-08 15:30:20 3 4 82 0 96 173 0 241.10 64 14.63 CHANGED DSLAKRCICVSNIlRuLSFVPGNDsEMu+HPGLlLILGKLlLLHHEHPcRpptPtTYp+EE-c-pGluCS....K-EWWWDCLpsLRENsLVTLANISGQLDLSsYsEoICL..PlLDGLLHWhVCPSAEAQDPFPTsuPsSsLSPQRLVLEsLCKLSIpDsNVDLlLATPPFSR.EKLassLVRalGsRKs.VCREMuVsLLSNLAQGDstAARAlAlQKusIGsLluFLEDulshAQaQQStHSLhH.Mt..sPsh-PsSlDMM .......................DSLA+RClCVSNIlRSLSFVPGNDhEMSKHPGLlLILGKL................lLLH.HcH.....PE....RK...pAP...TY...E.KE..E...-....p..DpGVuCs.........................KsEWW...W.DCLEhLRENTLVTLANISGQLDLSsYs.ESICLPlLDGLLHWhVCPSAEAQDPFPT..lGPNus..LSPQRLVLETLsKLSIQDNNVDLILATPPFS..R..EKLYuoLVRaluDRKssVCREMulsLLuNLAQGDoLAARA.IAVQKGS.IGNLluFLEDulshsQaQQSQpuLhH.MQ..sPshEPsSVDMM.......................... 0 25 33 60 +11865 PF12032 CLIP Regulatory CLIP domain of proteinases Mistry J, Gavin OL anon pdb_2ike Domain CLIP is a regulatory domain which controls the proteinase action of various proteins of the trypsin family, e.g. easter and pap2. The CLIP domain remains linked to the protease domain after cleavage of a conserved residue which retains the protein in zymogen form. It is named CLIP because it can be drawn in the shape of a paper clip. It has many disulphide bonds and highly conserved cysteine residues, and so it folds extensively. [1] [2] 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.19 0.72 -3.87 120 622 2009-07-08 14:42:14 2009-07-08 15:42:14 3 13 68 4 294 670 0 53.10 30 15.22 CHANGED CpsP..ssp.sGpClslppCpslhpl.hpp.....p.hsspctpaLcp..upC..Gh.....tss.pshVCC .............ChsP..s..sp.sGpCltlppCs..lhpl..lpp.....p.hss..pptpaLpp...upC....Gh...............tss.pshVCC.................. 0 55 100 249 +11866 PF12033 DUF3519 Protein of unknown function (DUF3519) Assefa S, Coggill P, Bateman A anon PFAM-B_2444 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 117 to 1154 amino acids in length. This protein has a single completely conserved residue Q that may be functionally important. 27.50 27.50 28.20 27.70 26.70 26.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.53 0.72 -3.85 10 158 2009-09-11 06:57:10 2009-07-08 16:12:16 3 3 48 0 3 164 0 96.40 26 22.41 CHANGED ll-phsKl-cpst.hAhcapN....l+lslpcppsspclhsaa..puacchh-ss.hhpsSsh.......hsttphpshsuscsNPTsKPLTSQEsL....LKsoENLNEsTsEsspLSP ..........................llcph..+.h.cp.tht.hAhchtN.....h+l.s.lpsphssppL.saa..puac....hp.-sphhhphpsh..........htttt.hps..hs.scs..NsTpKsLpoQEsL.........hpppct.tp............................................. 0 2 3 3 +11867 PF12034 DUF3520 Domain of unknown function (DUF3520) Assefa S, Coggill P, Bateman A anon PFAM-B_3604 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 180 amino acids in length. This domain is found associated with Pfam:PF00092. 25.00 25.00 34.10 31.40 19.50 17.70 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.16 0.71 -4.16 49 418 2009-07-08 15:16:07 2009-07-08 16:16:07 3 13 396 0 115 369 71 179.40 45 30.57 CHANGED uTLhTIAKDVKlQVEFNPApVucYRLIGYENRhLscEDFsNDpVDAGEIGAGHsVTALYElsssusts........h-sLRYtt.......stts..........sstssELAhl+lRYKtP....suspSpLlphsl...t.tsshspuSs-h+F..AuAVAuFGphL.+sucah.......sshsasplh.sLApsutGp..DthG.........YR.sEFlpLlc...hApuLp .............................tTLhTlAKDVKhQlEFNPuhVsEYRLIGYEsRtLpsEDFN.NDpVDAG-IGAG+pVTALYEls.sGppup.........lDcLRYt..................sptss.........sspssELAalKlRaKtP.............pup..cSpLl-.hPl......s.ssh.spsS.cDhRFuAAVAuaGQhL.RuScah........sshoasplhphAppupGp..DstG.........YR...uEFl.cLlchAcsh.s.......................................... 0 48 77 96 +11868 PF12035 DUF3521 Protein of unknown function (DUF3521) Assefa S, Coggill P, Bateman A anon PFAM-B_3612 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 42 to 74 amino acids in length. 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.57 0.72 -4.11 23 1098 2009-09-11 15:50:18 2009-07-08 16:24:58 3 4 252 0 1 353 0 30.30 39 57.64 CHANGED MPDAhphuu..hpthspllsh...ttaVGphRRs+php.+ ...........MPDAhhhsu...hpsh...p...phhp.......ht.hRps+thp.c...... 0 0 0 0 +11869 PF12036 DUF3522 Protein of unknown function (DUF3522) Assefa S, Coggill P, Bateman A anon PFAM-B_3665 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 220 to 787 amino acids in length. 25.00 25.00 27.10 26.90 22.40 24.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.25 0.71 -4.50 24 294 2009-07-08 15:30:14 2009-07-08 16:30:14 3 4 111 0 167 260 2 162.00 32 30.55 CHANGED .ttthhtslhhslSslshlPslhhsh++pahhEuslhhFshhhShhYHuC-shss.hh.....hshhpachLp.hshluuhhuhaVplhshsthsp.hcpslphhshhhhhlhttts.hshh.hlsPlhhulhhhlstahhc.t+tpphhstphh...........................hhhhhhssulhhhhhuLs.....-spDsYthhHo..hWHh ..........................h....hhtsLlLsLSNLhFlPslhlul+ppa.hhE...uslYhaoMhFSshYHACD.tss..h..........hChhpachL......p....a......hshh...uohhuhaVol.h..shup..hpthhctshhhhshhhhuhhhthst.h...s.hhshhhPh.hhuhhlhhs..t......ahhc..hc..p.+phassphh..............................................................................................................................hhhhhlhs.Ghhhshhu.l.h...hp..sp.....-sYhhhHS..hWHh........................................... 0 45 67 110 +11870 PF12037 DUF3523 Domain of unknown function (DUF3523) Assefa S, Coggill P, Bateman A anon PFAM-B_3746 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 257 to 277 amino acids in length. This domain is found associated with Pfam:PF00004. This domain has a conserved LER sequence motif. 25.00 25.00 29.40 29.30 23.30 23.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.84 0.70 -5.42 14 270 2009-07-08 15:33:07 2009-07-08 16:33:07 3 5 136 0 167 233 5 216.40 41 43.44 CHANGED PsPPussussssssp.............t.tshsusFDPoALERuAKAL+pL-pSsaA+cAhELhKhQEpT+QtEhpschcchpAthuphpsE+tRl-t-E+RKslpppsppcptpupYcDcLuRcRhpccLppQcppNpE.L+pQEEush+QEthR+pT............ccEtcLc+cNhht+lcAEscuRh+tcRcNcDlphchl+.+usEcRcThlEuIpTshuhlGsGhpshLoDpsKlshsVGGlThLAhGlYTs+pGstVshpalEppLG+PSLlR ..........................s............................t.t.t..tFDspuLERuAcAh+plppS.pA..+c.............shpl.phQEtTh.QhE.pschpchcsthtphc...-ph+hptEEc............RKshpppspppptpApYpDpLuRpRhp.............c...........phtt.............Qphh.pp-.l+hQE.ESs.+QEthR+tT......................hccchcLc+cs.hh+hpAEscu+A+tpRpNtDl.hcpl..+h+AtEcRpThl-uIpTshsh.hGt............GhpshloDhsK.lhssVu.GlThLAsGlYou+pustVstpalEttLG+PSLVR.......................... 0 62 89 134 +11871 PF12038 DUF3524 Domain of unknown function (DUF3524) Assefa S, Coggill P, Bateman A anon PFAM-B_3749 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF00534. This domain has two conserved sequence motifs: HENQ and FNS. This domain has a single completely conserved residue S that may be functionally important. 25.00 25.00 25.20 25.10 24.70 24.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.07 0.71 -4.70 31 155 2009-07-08 15:36:23 2009-07-08 16:36:23 3 6 115 0 73 154 105 154.80 47 44.42 CHANGED M+ILlL-sahuGSHppatctLhpp...pa-hplloL.PuRtW+WRhRGuAloau...pp.hhtpppa...DlllAToMlDLAslhuLpP.pLup....lPtllYFHENQhsYPhss......spp+chpashlNlhSALAADtVlFNSsaN+cSFLsulpshLc+hPDhts.tshlcpIttKupVLsssl ....................MpILllEsFaGGSH+plh-hLtcpl....t....-hslhTL.PA++W+WRhRsuALhFu....pp..lshsppa......clLhAoShLsLspLhuLpP.cL.up....hcpllYFHENQLsYPVpc......spcRDhpauasplhSsLsADhVlFNStFNh-SFLsulspFh+hhPDa+P..psltphIcsKspVlahPl.................................. 0 26 37 53 +11872 PF12039 DUF3525 Protein of unknown function (DUF3525) Assefa S, Coggill P, Bateman A anon PFAM-B_3833 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in viruses. Proteins in this family are about 360 amino acids in length. 25.00 25.00 334.60 132.90 18.50 17.70 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.57 0.70 -5.98 2 191 2009-07-08 16:05:37 2009-07-08 17:05:37 3 4 4 0 0 133 0 144.50 43 73.73 CHANGED LlFEPVTRGKaTF.YPFGHWCLRDTNSMIlYEG+FVs.ctTSlGs.FKLoKShRPl+sGtshHLVPFHlQKLLDSMD-cs.PYSAsHNCTTVILcuIMYRSslGFlFAYulSWAVYhVLRPPQhAATsYpWhaPERoWDpS+hYphLGFAAGGTlPMEhlDpE........P.E-c.S................................DsuRo...s-ND.................................................cQ+cs-h.pEWWhSpDSlcsVpNDlhYhLSFL+sTsIPE-l+L-lVELsYsQhscDEccRIPEP.GT+ILsMPsW+PsNWA+LIDETHRVLSQFopYsPRlLNEhssWL+GLupNLYRVsEPIL.LLlRAMRAAhoVSpRAsRSlYpChCHWLDVMYGGSAPpRlKTVWGLTGhIsSGMTSQ .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +11873 PF12040 DUF3526 Domain of unknown function (DUF3526) Assefa S, Coggill P, Bateman A anon PFAM-B_3851 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 149 to 170 amino acids in length. This domain has a single completely conserved residue P that may be functionally important. 25.00 25.00 35.80 34.00 22.00 21.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.69 0.71 -4.08 38 152 2012-10-03 10:13:34 2009-07-08 17:13:32 3 4 74 0 62 180 55 155.60 23 34.11 CHANGED oph-hshsl+...csh......csssDhthpthtpphlppas.ptscs......LPh.saphhh...hhtscphssphhsphhcphhpthtpQpplschhuhlSPslulpphspslAGTDhtpahcFhppsEpa+tphtphhst.................stshpptphssspaptlPpFpap. ...............................................th-hshthc...csl......pss.D..htphhpphhtpas.ptscs......Lss..sachhh...hhttcphssphhsthstphtpthtpQptlspthuhluPslAlpph.hpLAuTDhtsphpF.pps-ta+tpltphh...................................................sthhpcpthss.scapplPpFpap............................................ 0 24 41 48 +11874 PF12041 DELLA Transcriptional regulator DELLA protein N terminal Mistry J, Gavin OL anon pdb_2zsh Domain Gibberellins are plant hormones which have great impact on growth signalling. DELLA proteins are transcriptional regulators of growth related proteins which are downregulated when gibberellins bind to their receptor GID1. GID1 forms a complex with DELLA proteins and signals them towards 26S proteasome. The N terminal of DELLA proteins contains conserved DELLA and VHYNP motifs which are important for GID1 binding and proteolysis of the DELLA proteins. [1] 20.70 20.70 20.70 26.70 20.20 18.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.64 0.72 -4.32 20 463 2009-07-09 08:13:55 2009-07-09 09:13:55 3 4 232 2 35 475 0 60.40 62 14.67 CHANGED DELLAVLGYKVRSSDMA.-VAQKLEQLEhVMuss.p-DG.lSpLuoD.TVHYNPSD..LSsWlpSMLoELN.P.ssssstss ......................s-hh.-VAQKlEpLE.sh.s....................t-DG...lS.......pLASD....TVHYNPSD..LSoWLESMLSElN.s.ssp....s.s....... 0 7 24 29 +11875 PF12042 RP1-2 Tubuliform egg casing silk strands structural domain Mistry J, Gavin OL anon pdb_2k3n Domain Spiders use fibroins to make silk strands. This family includes tubuliform silk fibroins which are used to protect egg cases. This domain is a structural domain which is found in repeats of up to 20 in many individuals (although this is not necessarily the case). RP1 makes up structural domains in the N terminal while RP2 makes up structural domains in the C terminal. [1] 21.10 21.10 24.00 21.30 19.30 20.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.27 0.71 -4.60 17 225 2009-07-09 10:00:34 2009-07-09 11:00:34 3 18 29 3 0 228 0 156.70 38 69.36 CHANGED susupuuSuuuuuus.......uSAFAQu.hSuuLusSSsFusAFuSsoSsSuuuslAhpluhssApoLGl..ususALAuAlupAVuuVGs.GASusuYAsAlusAluphLuspGlLs..uuNAu.ulASShAsAlououuosu................utuputuuuSuhtpuuopouupoAus .....................................sutuuutuuu.tuu.......uSuFupu..uuuLusSosFs...osFuSu....sS.tSssusluhphuhssAsoLGl..ssusulAsAluQA.VuuVGs.GASutuYAsAlusAhuphLuspGlLs..suNAu....oLASShASAlouSAuSsuush............ssu..tusupu....tuuuu.uhtpsuopSuutut..t............................... 0 0 0 0 +11876 PF12043 DUF3527 Domain of unknown function (DUF3527) Assefa S, Coggill P, Bateman A anon PFAM-B_3945 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 120 amino acids in length. This domain has a conserved CDCGGWD sequence motif. 25.90 25.90 60.30 44.70 25.40 25.20 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.49 0.70 -4.91 12 128 2009-07-09 12:13:05 2009-07-09 13:13:05 3 4 19 0 91 118 0 219.60 24 44.32 CHANGED SsophpuhLphshK.NGhPhF.Fslcs.c....-lhsAshh+ss.......sshshhYTFaohtt..++psuuhhsttp...p......lVGQMpVSs.hs.p.ttpt......hhpEFVLas...t.upp................p..ps.h.pt.t............p.....t...p.hptp..........sh.ps-LpssLElAAlVl.p.s..hpppps...................................h.tpSstsspV..llPuGhHGhPpst..uPSsLIpRW+SGGuCDCGGWDhuCsLtVLssttpps..pp.h.......tppsh-LFhpGspcps...PuLshsslc-GhYtVcFcupLSsLQAFSIClAhlHspc ....................................s..tuhlph..c.ps..hF.h.htt.p.....t.hsAp..p.s........tt.p..Yhha......p+tpt...t.............hluphpsSs....p.t...ttt.......h.pEaVLhs.......t....................................................................................................................................p.Eh.Ahl.t............................................................s..phpl..lhPsG.Huhsppt....tPssLlpRW+pGGt.CDCGGWDhuC.lhlLts...tt......t...................p.hcLhh............puttpt....shhphh.htpG.a.lpFpup.lo.LQuFuhslshlpsp......................... 0 12 57 73 +11877 PF12044 Metallopep Putative peptidase family Assefa S, Coggill P, Bateman A anon PFAM-B_3942 (release 23.0) Family This family of proteins is functionally uncharacterised. However, it does contain an HEXXH motif characteristic of metallopeptidases. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 625 to 773 amino acids in length. 25.00 25.00 25.90 25.50 24.90 22.80 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.25 0.70 -6.02 28 198 2009-07-09 12:24:12 2009-07-09 13:24:12 3 6 159 0 150 210 5 375.90 31 58.75 CHANGED shclhNlp-sph.VppsslllcGpssstp.....tssslpVppssp...t.aPshs..a.....................sVssup.........FKAllhLs..PGt.Nplphp.........h..........sssspsphlslpYpPhhps.Pl+Lslllu+DSstpaDusstphp..pptsssL-tAI+KlRhuuhLhQAFTsEpMhcsGaGpRoFpFpEEhphssh..........ps.h.+sph....KlHllRSc+TltElRc.slAQQNspupcsstLFshsh-slcphst......psspthpsushhLDo+aDs...ptp.............hIpGHAALGGGssp.lpLAIFGSHuLaSWPsshEpl.sssFpDsTpsssscVANDsNcsGThWEshslslGAahHElGHhhGsPHQps..GlMLRsYsp.h...NRoFhs+EshuhRocopG.....s.hhPpscC.....pWpRLDhLRFhhHPsFpLPtD...................sh.s.ssssha......shssst..shlsussGIh ....................................................t..pl.shppsph.l.p........hlllpGphsp.p................hsthlplpp.ps..........t.hPshs...a.......................Plp..s..up.........FKAllhLs..PG...Nplphp............h..................ss.psphlslpa.h.Phhps.....PlpLslllu+DSstp..aDss.tp.t..pp.sssL-sAl+KhRhuAhL.hQAaTuEpMhcss...............hGpRsFphpEEhphssh...............ps.h.+pph.....+lHllRop+TlsE.lRs.p....hsQQt...p.....upsps.tLaphshcslcphht......tstpp.hsushhLDo+aDsptp.............hlpuHAALGuussp.lpLAl.FGSasLaSaPssh-cl.sssFhDsT.pssss...VuNDsscsGo.WEshslslGAahHElGHhFGsPHpps...GlMhRsYsh.h...NRoFhs+EshshRspppG.......................t.hh..tpps............tWpRLDhlRFh.hHPsFplPtD....................h.sstt.pha......shtsst..hhhhs.sGl...................................................................................... 0 38 74 127 +11878 PF12045 DUF3528 Protein of unknown function (DUF3528) Assefa S, Coggill P, Bateman A anon PFAM-B_3981 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 185 to 298 amino acids in length. This protein is found associated with Pfam:PF00046. 29.50 29.50 37.60 37.00 27.60 26.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.12 0.71 -4.06 29 255 2009-07-09 12:25:28 2009-07-09 13:25:28 3 3 89 0 82 221 0 140.80 51 50.90 CHANGED sDFSSlPSFLs..psSSpPhTYsYS.SNL.PQVQPVREVsFRDY.GlDsSsKW.HaR......................GshupCY...........uuE-.lhHRDsLsssssh...uEhlhKN...su..ssh......Hsuos....uo.osFYusVGRNGVLPQuFDQFF-TAYGsu-s.sss-.hsssKsss+h...sssss ...............sDFS.olsSFLP..psoSp.hTYsYS.SNL.sQVQPVREV..o.FR-Y........ul-s.usKW.HaR.........................G.NhupCY...........S.uE-.lhH.RDCLsss.os................u-hLhKN.su..shh............H.uust.......so.osF...YosVGRNG...VLPQuFDQFF-oAYussps....sss-...stpKsttph.....sst...................................... 0 3 11 38 +11879 PF12046 DUF3529 Protein of unknown function (DUF3529) Assefa S, Coggill P, Bateman A anon PFAM-B_3346 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 176 to 190 amino acids in length. 25.00 25.00 27.80 27.30 22.60 21.60 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.88 0.71 -5.24 26 113 2009-07-09 12:27:30 2009-07-09 13:27:30 3 2 104 0 56 112 148 165.90 36 87.39 CHANGED .ssshhSTLlLTlLLuIGLhFFlRAusKDRTshhclh....Ssps.......shplhstlppahcpRuaphsssDscppllpFcGtVtsShhLAlhLohLuulGhsCLGLVlp.LhPphuhW.lh.LslLu.PLAGhaYhp+ApR.Eplcl+Lhsss.pss.............sSplplcAHRDELh....pLtcsLpLpucG ..........ssh.oTLhLslL.slGLaFFl+uSsK-Rhpphphh....ssts..........tplhsplpsaFppRuaplss.p.ppphlsFEGhVtsShhlAlaLohLsslGhssluLVLshhhPs.....hu.......hhhh.LshLu.PLAGlaYWp+AuRhEp..lpl+lhsss...sts.............tStlplpuHRDElh....pLpcpLpLppp........... 0 15 40 52 +11880 PF12047 DNMT1-RFD Cytosine specific DNA methyltransferase replication foci domain Mistry J, Gavin OL anon pdb_3epz Domain This domain is part of a cytosine specific DNA methyltransferase enzyme. It functions non-catalytically to target the protein towards replication foci. This allows the DNMT1 protein to methylate the correct residues. This domain targets DMAP1 and HDAC2 to the replication foci during the S phase of mitosis. They are thought to have some importance in conversion of critical histone lysine moieties. [1] 20.70 20.70 21.20 20.70 19.80 20.60 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.64 0.71 -4.50 38 305 2009-07-09 13:01:57 2009-07-09 14:01:57 3 35 122 5 168 317 0 136.40 27 11.89 CHANGED tcshPp+pLssasla-.scuchpsl-hh.hc.sh-l....ahoGhh.tsht......-sp..............sp.pt.............tuhpspu....lup......IcpWtIshh-..sut.shlhlsTshApYcLhKPuppYt.ha-hhhc.+splshtlhchLpcss....choh--llstltpt .....................................................pthPp+plssaslas..pc.............u.chhsh-hh.hc..sh-l.....ahSGhh.pslh..........cpp..............sp.pt.................Ghps.cs......hus................IppWh.I..shhc........sut.sllsloTs.....hA.t.YhLh........cPoppYtsha..shh.c.+hhluhhllchLpcss.....choac-lls+ltp.s............................. 0 57 94 132 +11881 PF12048 DUF3530 Protein of unknown function (DUF3530) Assefa S, Coggill P, Bateman A anon PFAM-B_2450 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 272 to 336 amino acids in length. These proteins are distantly related to alpa/beta hydrolases so they may act as enzymes. 29.80 29.80 31.40 30.30 29.30 29.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.88 0.70 -5.44 36 175 2012-10-03 11:45:05 2009-07-09 14:03:12 3 2 157 0 59 189 50 266.50 22 94.97 CHANGED hhhthhLhhhhhhshh.......tt.........stptss.tss........................tt...............hpchLPs...................sEhptLps...sscphLsLhcstpsup.pGsllllsstspsADhsshlu.LRcpLsctGWsolSlo.Ps..........t...shh...ttsttsssssstp.ss.spp...........s..t.t..........................................phhptpp.......pplhsplsshhs..hspptsup.llllupGpuAshshchLupppssp........ss...................sLlhlssh.......hPsptt......tsLsphlup.lplPlLDlatss.pshshpsAt.....pRcptup+tpp.sYcQhtLht.....ss.ssppp...lhpplcGaLpphs .................................................hs...................................................................................hpc.lPt............................................pp.p.lps.....ssppalsLapstpssp..GslIllsstspssDhsthlu.LRcpl.sshGWsoLolshPs.................t....h..........tsttsssssttt.ts.ttt..............................t...............................................................................................p..pspt.......ptlhsplpushs..hut.ppt.sc.llLlupGsuAhhshchluppp.st....p...................sLlhlss.........stptt.........sL..ph.lsp.lplshhDlh...hts.tsh.....stpsAt.....tRhphup+tptssYpQhsl.sh........s..s..psppp...lhpplcGWlpt.................... 0 14 27 45 +11882 PF12049 DUF3531 Protein of unknown function (DUF3531) Assefa S, Coggill P, Bateman A anon PFAM-B_2583 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 149 to 199 amino acids in length. 25.00 25.00 34.50 32.00 21.20 20.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.73 0.71 -4.53 21 130 2009-07-09 13:05:26 2009-07-09 14:05:26 3 3 98 0 64 130 123 144.70 44 69.87 CHANGED McVpFREhsPFssWIWLcFussPopsE+pYl-plFcSWalLG+LGGFNAENLQlpEsGsDlSaMsYDs-ps....sushsALMHNMG-hEYpspWARCWhDLGTSDulALDVLINALpQlsp-hVpIccllIGG.N.EDWPlp..-pt-sh......Fsp ............McVpFR-FsPhs.sWIWlEFts.sPoptEhphlcplhpSWallG+LGGFNupNLQlp.p.suhD..h..uahsYDs-pu.....ssshsuhhHNhG-lEaQssWuRsWlDLGTSDhluLDVLINuLpQlSs-aVsIcplllGGpN..pDWshp.spp-....s.............. 0 13 42 57 +11884 PF12051 DUF3533 Protein of unknown function (DUF3533) Assefa S, Coggill P, Bateman A anon PFAM-B_2348 (release 23.0) Family This family of transmembrane proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 393 to 772 amino acids in length. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.16 0.70 -5.71 39 601 2012-10-03 10:13:34 2009-07-09 14:10:01 3 6 468 0 205 969 15 280.00 21 43.58 CHANGED shllLslhhlulhu.lYWGuhYppss+h+slphllV.tDs....s........tt..shlGssltshhsph.s............thss..aplhs.scat...........hspclhchlacp+YWuAlhlpsNATpsLhsul..ssusssasso..shhphlapouRc.oshsohlhs.lptlpphhht..hssphl..plhpshsshspsshht......hlssshsashhDh+PhsssshhussplGlIYhlIloFFp.........hshhssl+tph.....tphl....+htph......llaRhlsShlshFhlSLhashlo.huFplsF...................shsa...G+.uGFlVaWMhsaLsMtAlGhssEshshll....sP.aluhaLlhWlIlNlus..shhPhsLs....PsFYRaGYAh.PlHsshclh+sI...aasspp.splGhshGlLhAWlslshh .........................................................................................................hh.................h.s...........h...s.....u....h.....a.....c....s...h.....s...phpp...l.lAVVNpDpG.............tt....s...s...lG..s..sh..s.shl.pp.ps..............hcaphh.......................................spccs.pctlp.pucYauulh..IPs.s.........hopplts............sl.......pst...p....s..s....hphp..................sssph.l...h..s.sup........t...s.s...h.......p.....p.........h.h.t.....p.h.h....p....slppshhp.....phsp..th..h.........s....l.psh..ss..................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................... 0 50 115 181 +11885 PF12052 VGCC_beta4Aa_N Voltage gated calcium channel subunit beta domain 4Aa N terminal Mistry J anon pdb_2d46 Domain The beta subunit of voltage gated calcium channels is coded for by four genes 1-4. Gene 4 can produce two types of beta4A domain (beta4Aa and beta4Ab) according to how the gene splicing is carried out. This family is part of the beta4Aa N terminal domain. It is made up of an alpha helix and a beta strand. It is thought to regulate the channel properties through protein-protein interactions with non Ca channel proteins. [1] 25.10 25.10 32.50 35.50 25.00 25.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -8.10 0.72 -4.09 8 450 2009-07-09 15:44:57 2009-07-09 16:44:57 3 7 87 11 136 407 0 42.20 78 8.59 CHANGED GSADShsS+PS-SDlSL-E-REu.................LR+EsERQAtsQLE+AKsK ......GSADSYTSRP.S.DSDVS.LE..E.DREA.................lR+EsE+QAtsQLE+AKoK.. 0 16 28 69 +11886 PF12053 DUF3534 Domain of unknown function (DUF3534) Assefa S, Coggill P, Bateman A anon PFAM-B_2753 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 150 amino acids in length. This domain is found associated with Pfam:PF00595. This domain has a conserved GILD sequence motif. 25.00 25.00 25.20 25.00 24.50 23.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -4.32 7 242 2009-07-09 17:29:17 2009-07-09 18:29:17 3 9 78 1 93 240 0 108.90 49 11.13 CHANGED MKV.TVsFG+TtlVVPht-up.pVppL.........lppAs.RYt+spsptst.WlpVa+LE.asDGGILD.DDlltDVs-.D+DpllAlaDE...Q-s..tpGG-usSusSs......sppSP-.apsEhusp..huAFpPh..tuEI-VTsos.L+h...uhPLhVR..RSSDP .....................................................................................st......hlpl.p+Lp.....spG.G.ILD.DDhltDVs-.D+-.......p.L.lAVF-E.....Q-P........ppuGDu.sS..uSSo.......GTQSP-h..FtsElusp..........huA.FpPh.......suEIEVTsSs..L+h...shP..LhVR..RSSDP............................................. 0 25 34 62 +11887 PF12054 DUF3535 Domain of unknown function (DUF3535) Assefa S, Coggill P, Bateman A anon PFAM-B_2858 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 439 to 459 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF02985, Pfam:PF00176. This domain has two completely conserved residues (P and K) that may be functionally important. 26.20 26.20 26.30 27.90 25.70 26.10 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.27 0.70 -5.86 32 289 2009-07-09 17:31:25 2009-07-09 18:31:25 3 20 241 0 212 284 4 412.50 26 24.30 CHANGED VWpsLlpph.....s.pslssthssalpshlpLhhsPhGh.h.shsh-sslll+PSut...................thpstcpcptpptptt.....s.ts...ph.DsthhtG-lpllGhD......sllRsRltAA+AlGhhhuhh...spssltshhps...........hLhstLsSstusp+hluulllscaspstt.ppsts.................phhsttLhphLp.....t....p.shYcElsshl.ptlRspCpsLlsohtssuh.h....pls.lshhspupspssspuFol-sAcclssph....apchtpthsssp+l........uhpsLE-sRpplhsulpps+p..scpphcsplhuuhAuAllshpsLPcKLNPlI+sLM-SlKcEEsthLQp+SApulupLlp.hst...st+ssPssKll+NLCsalCsDooETPchs........................ssphppsILoLt......+cpspts..........................hssssphpcpscpApIpR+GuphulpplsppFGsplhsplPpLtshh .........................................................................lW.tLlpph.....s.thl.....t..th.tsa.htshlpLhhpP.tl.....shchs.hhhcssutsh........................................t.tstphp.t.pttttt................t.....stp......h.h.ush......p.hl..s.-...........slh+uRlhAAcALGhlhshh.......spsshtshhp...........................................hLl.tL........sS.uuhp+lssuhlls-aAp.tttppshs............................................................................hlpttLhp.hLp.....................................c..hYcElss.h.pthpspCppLlsshtstsh.........................psp...sps......sss...sholc..pAp.clssp.........apchppshs..s..+h................................hp.LpspRpp...l.hslppspp...ppthphplpushAuAlls..........h..p..........tl..P.cK...LsPlI+slM-olKcEEs.t..LQphuAp.slApLlp.hs.........spp.sPssKllpNLsshhCsDsspTPphs................................................hpsthppsILoLh..............+cppttt......................................................................ts..sp.h..pctt.ct..ut.lpRRGAp.ALpplsppFGsplhpclPpLhphh.......................................................... 0 67 112 174 +11888 PF12055 DUF3536 Domain of unknown function (DUF3536) Assefa S, Coggill P, Bateman A anon PFAM-B_3129 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 274 to 285 amino acids in length. This domain is found associated with Pfam:PF03065. 25.00 25.00 39.30 39.10 18.70 17.90 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.78 0.70 -5.24 23 127 2009-09-11 10:41:18 2009-07-09 18:40:01 3 5 114 0 74 140 17 241.30 31 32.54 CHANGED WsQp........WRsPLRcALDWLR-pLAthaEppusthhpDPWtARccYIcVlLcR.s.-shpsFltc+stc.lstc-plpsL+LLEhQRpuhLMaTSCGWFFDElSGIEoVQllcYAuRAlpLAc-lss.sh.EscFlp+LppAhSNlsphusGsclYcthV+PuhlsLhc.VuAHaAlsSLapshscps.hasYplt.cphchhtuGpsplAl...G+htlpSphThEptphhasll+hGsashpsGVp.apsppsatphhpplhttappushstll+hlsccFutpsYoLccLFp .........WpQp........WRtPLRpAL-hLR-plstha-ptutth..h.p...D.PWtARc...cYlpVl........hsc......s........p......shppFltc.attp.Lstpcph..psL+LLEhQRtuhhMaTSCGWFF--lStlEslQhlpYAtRAlp.Lupch.......ss...........p...l....-tpFlptLtpA.SNh.p.htsGtplapphVhsshhsh.p.luuphA.lsslhpt...h................p..................t..thhs...apht..thp.....st..thhh..Gph.h.s.hT..pt..h.hssh.hGt.p..s.h................h.....t................................................................................................................. 0 33 59 68 +11889 PF12056 DUF3537 Protein of unknown function (DUF3537) Assefa S, Coggill P, Bateman A anon PFAM-B_3199 (release 23.0) Family This family of transmembrane proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 427 to 453 amino acids in length. 25.00 25.00 32.10 26.00 21.30 24.60 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.39 0.70 -5.76 9 121 2009-07-09 17:42:07 2009-07-09 18:42:07 3 3 18 0 83 119 2 332.00 35 86.54 CHANGED psscELt....pFcshLpWhshDpSs.hssslSW.lFhlLullVPhlu+hLLsCt....sC-.hppRsa-shVQlSLShhAuluFlsLSchhR+aGlR+FLFlD+LhscSp+VRhsYssclppSh+lLuhalLPCFsscusY+.IaWYhou.sphPalhpshhupslAChlph.sSWhYRTolFllsClLF+LlCaLQlLRh--FA+hF.....ptps-ltslLtEHl+IR+pL+hISHRFRtFILhsLllVTASQFsuLLhTT+sputlslhpuGELALCSlsLVoGLhICL+SAsKITH+AQulTulAo+WHsssTlsSh-ph........poPsus........hss..shp.ssstsssssEpsst....-DshcsTcI..hP.asp.shSaQKRQALVoYLENNpAGITVaGFhlDRoaL+TIFhl ..........................cpLp....pFcthLpWhsh-p..Ss....hshhhSahhFhhlulslPhhs.t.hhl.s.............s...p.csaph.Vphs.ohhAsluahsLophh++...........h.GLR+h..LalD.p.ltsc.otpV+tsYttp......l....ptuh+lLuhhllPshhscssa+.laaa...........t..h.P.........a....h.p....s.......h...pslAhhhth.sSWhYRTslahhsClLF+LlCpLQlLRhcsau+hh.....ptp.u-ltsllpEHhRIRcpLphISHRaRhFllhsLlhlTuSQhsuL......lhsst.........ps.hshhpuG-LAlsShs.lsGlhlsLpuAsKITH+AQulsulAu+WHshsThss.st.........tsPpss............t.......t.......s..pp..ps.-..t......ts.hs.ssph..hs.h...h.SapKRQAL...........VsYLppN.uGITlaGahlDRshLpslFhh................... 0 18 54 70 +11890 PF12057 DUF3538 Domain of unknown function (DUF3538) Assefa S, Coggill P, Bateman A anon PFAM-B_3373 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 120 amino acids in length. This domain is found associated with Pfam:PF00240. This domain has a conserved SDL sequence motif. 25.00 25.00 34.40 32.80 22.60 19.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.57 0.71 -4.49 9 155 2009-07-09 17:48:44 2009-07-09 18:48:44 3 2 64 0 55 144 0 110.50 61 11.73 CHANGED LsEVlQchRpVppRLpPFlpRYa-ILpsssTh-.pssTptREpsQRlhshVuEuLHhLupA.hAlSDLhhsLpsssPRHL.ssRPh.........S.ahssPshhssuhh.lPh.hslus......sVs...housGs ....hsEVLQELpRlEsRLpPFLQRYaElLssAsTs-Y.sNN.p.-GREEcQRllNLVGEuLRLLGNshVALSDLRCNLussPPRHLHVVRPM.........S.H.YTTPMVLQQAA.....IPIQINVGT.......TVTMTGNGp..................................... 1 11 16 35 +11891 PF12058 DUF3539 Protein of unknown function (DUF3539) Assefa S, Coggill P, Bateman A anon PFAM-B_3564 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 90 amino acids in length. This protein has a conserved NHP sequence motif. 25.00 25.00 45.40 45.40 18.90 17.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.07 0.72 -4.16 17 75 2009-07-09 17:50:22 2009-07-09 18:50:22 3 1 72 6 29 75 90 86.10 51 97.33 CHANGED ssEpYLNHPTFGhLYtVsPss.....-u+-laATLYAQRhFFLVshpspG.........hpFEsIshtDARhhsEhpLpphRRss.opEhpphppl..FcQTFl .s.sEpYLNHPTFGhLYpVs.ss.....-sp-lasTLYAQRlFFLVshpscu..........spFEsIshsDARhhsE.+LpphRRsG.sp-apphppl..apQTF..... 0 4 19 27 +11892 PF12059 DUF3540 Protein of unknown function (DUF3540) Assefa S, Coggill P, Bateman A anon PFAM-B_2948 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 212 to 238 amino acids in length. This protein has a conserved SCL sequence motif. 27.10 27.10 27.10 28.30 26.70 26.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.19 0.71 -4.68 14 188 2009-07-09 18:02:22 2009-07-09 19:02:22 3 2 93 0 49 145 5 162.70 28 85.87 CHANGED phs.upVtt...tts-shhlhpssGh.hps++AsSCLltPpsGDhV.Ll...........ssss-spphYllAlLpRssssss.tLphsG.clsLcs.tttlpl.usstlcLputp.plshpspchphpupphththpchph.upthpsphtpsphlucphcshhpRlhQpscpuhRpVcpl-pscAtplshpscpshph+u+psslTAcsllKlDupQIHhG ........................h..hupVht...hhss.hhhhht.ts...hpsc+AsSCLltPphGDhV..l............ss..ctpphallAlLtRssssts.tlth.sG..tlsLch........tsttlpltutcplslcst..phshpupp..........................hphlucphpshhpplhptstps.RpVpss-pl+Auphphtscpphth+u+pshlsAptllKIDutQIHhG........... 0 18 30 39 +11893 PF12060 DUF3541 Domain of unknown function (DUF3541) Assefa S, Coggill P, Bateman A anon PFAM-B_2172 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 230 amino acids in length. 28.20 28.20 32.50 31.90 19.80 17.50 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.47 0.70 -5.06 17 126 2009-07-10 10:48:53 2009-07-10 11:48:53 3 1 122 0 23 93 2 222.40 66 59.53 CHANGED sssp.oappsAphI+sshEspLYoLPshhpGHYGlRMYR.ThDsKYusAshsDhhpVssp.shaAsplcpPc.Itthuppclssht.....cs.RuptRhpAhcshPEaLahussLLt.huRhDEaGLpt.scs+LpplL++hDhcphhTD.tMIcAWAAQLAN.VYWh+QLG.tDhVpcahpAFpcsYPDspDpcLsppQatNKlYGMTHhIFAsSEYYQH.Vcpp-aQW .......sss...hoaptoAsLI+pTYEoQLFTLPuFKEGHYuLRMYRQTLD-KYAAAIhSDLuRVAStLNaFAAEVsTPEQIppYupKRLppYp...cs-DERoQRRasATpsMPEYLYLGlDLLGsMARANEYGL..pH..Kp....DsKLRpVLRRYDFo.YATDcsMIEAWAAQLANQVYWLRQLGEQDVV-sFIpAFRcTYPDp+DscLscQQYGNKLYGMTHIIFADSEYYQHsVSpppHQW. 0 1 6 18 +11894 PF12061 DUF3542 Protein of unknown function (DUF3542) Assefa S, Coggill P, Bateman A anon PFAM-B_2032 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes and viruses. Proteins in this family are typically between 516 to 1283 amino acids in length. This protein is found associated with Pfam:PF00931. 27.20 27.20 55.50 30.90 26.60 25.80 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.61 0.70 -5.50 6 85 2009-07-10 10:50:28 2009-07-10 11:50:28 3 7 5 0 0 85 0 331.00 57 41.90 CHANGED hcEhIppTK.EhRApYS.FPpssLsuNcV........shcSPcFVMEhIDsVVtNLssLVKIsDssSh.FV.t..KcpIppVhKELKLLR.FVCFVSN+s.IEPQ.p+pTFas+sLhtAS+hAMVsWLY..hP....uNss.DLsPuEsspLLSDahcMKIKsIpPsIp+..IYIDVLQALKSTh.PpsQpcH......As..uGhVET.pHsL...........hVuLoDQMAshpEMLsLLRDNLIHLPILDLEFHLQDhDoVIlDuGLLlYSLYDhctp.EDsoLE-lNptLthDLP+sIE.IKhhlYLVhQKAFQsNLPRIHGLGYVDFLL+NLK-FQDRYSDS.LAFlKsQLQVIQpEhESLQPFLKsVsEEsHNKa-+..EcCss.lItKAYEVEYlVDAC...IsKcVPcWCLcRWLlDIIcEIssI.KtKlpE ............................................................................................................................................WKclIWKTKQEFRApYS.FPKosLAsNKV........s.tsPcFVMEhIDshVtNlNVLVKINDPsShhFVPG..KEQIEQVLKELKLLRFFVCFVSNKC.IpPQYppoTFYoHALIEASHIAMVVWLa..LPl.YGNtNQDLsssE..VSpLLS....DFhEMKIKsIpPslsp..IYIDVLpALKSTI.PpAQpKH.....sh...uuhVEhPsHsL...........hVGLSDQMAsLQEMLCLLRDNLIHLPI...LD.LE....FH.LQDMDSVIlDAGLLIYSLYDhcGphEDTsL--hNptLGFDLPRNIE.sIKsMVYLVMQKAFp...sNLPRlHGLGYVDFLLKNLcDFQsRYSDS.LuFLKNQLQVIQpEFESLQPFLKsVsEEPHNKhKpLNEDCAsQIIRKAYE..VEYVVDAC...INKphPpWClERWL.DIIEEITCIKAcIQE........................... 0 0 0 0 +11895 PF12062 HSNSD heparan sulfate-N-deacetylase Assefa S, Coggill P, Bateman A anon PFAM-B_2134 (release 23.0) Family This family of proteins is are heparan sulfate N-deacetylase enzymes. This protein is found in eukaryotes. This proteinenzyme is often found associated with Pfam:PF00685. 25.00 25.00 31.30 26.80 18.20 23.00 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.48 0.70 -6.10 7 310 2009-07-10 10:56:38 2009-07-10 11:56:38 3 9 87 0 174 264 0 383.60 61 55.88 CHANGED hshsllSlhh.saaLaos..s+t.tpstcsst...phsC..s..hh......P.p.h.s+shc.....sss+TDPpVLVFVEShYSpLGp-IltILpSpRFpY+sElAsG+.slPsLTsps+G+YsLIIaENlhKYlNMDpWNRpLLDKYChEYuVuIlGFhpu.sEcsLhshQlKGFPLalasNltl+DhslNP.SPlL+lT+suchcpGsLPG.sDWslFQhNHSTYEPVLhuph+oscp..s..s.......h.hATVlpDLGLcDGIQRVLFGpNLsFWLHKLlFlDAluaLoGccLoLsLDRaILVDIDDIFVGccGTRhpscDV+ALlsTQNplRshlsNFTFNLGFSGKaaHsGT-tEDtGDDhLLpsscEFhWFPHMWpH.QPHLaHN.ohLhppMhLNKtFAl-HsIPsDhGYAlAPHHSGVYPVH.QLYcAWKKVWslpVTSTEEYPHLKPARhR+GFIHsuIMVLPRQTCGLFTHThFacEYPGG.pcLDKSIpGGELF ......................................................................................................................................................................................................h..hshh.hhh.saal..s......p........................ps....t...........................................s.p.......t..+o-..PhVLlFV.ES.YSpLGp-IltlLEusRFp..YphplAst+sshPsLT.-.....ps....+G..+asLlIaENlhKYlshDuWNRpLLD+YCh-.Y.uVGlI...G.Fh.+s.sE....poL.u..h...Q....LKG.FP...L.lasNl.tL.+DhplNPpSPLLhlT+sschc.GsL...PG..pDWTlF...Q.NHSoYpPVlh.Aph.......pstc.....s...h..s..t...........shhsTV.lpDhGLaDGIQRVLFG.ssLsFWLHK...LlFlDAlu.aLosc+LsLsL..-RYILVDIDDIFVGKcG..TRMpspDVcA...LlpTQp.LR..s...lssFTFN.LGFSGKFaHs.GT-t.EDtGDDhLLp.hcEFWW..FP.HMWSHMQPHLFHNt...S.....sLs-QMhLNKpFAlEHGIPsshGYAVAPHH.SGVYPVH.QLY-AWKpV.WsIpVTSTEEY.PHL+PARaRRGFIHpsIM........VLPRQTCGLFTHTIFYpEYPGGspELD+SIpGGELF.............................. 0 34 48 107 +11896 PF12063 DUF3543 Domain of unknown function (DUF3543) Assefa S, Coggill P, Bateman A anon PFAM-B_2213 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 217 to 291 amino acids in length. This domain is found associated with Pfam:PF00069. This domain has a single completely conserved residue A that may be functionally important. 26.30 26.30 27.90 27.00 25.50 25.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.41 0.70 -5.15 32 333 2009-07-10 11:05:01 2009-07-10 12:05:01 3 7 232 0 217 306 0 231.20 29 26.39 CHANGED pss..cE-pc..slpplEphAo+u..cslhtFA-VKapQLhPhsP...............ts.s.spssh............psssLsscshhol........ScEuLVLYVKuLuLLuKuMclAusWWppppp......................sshts..Ss+lNplVQWlRsRFNEsLEKAEhl+L+L..pspphLspccsspsp........................sssloAEKLhYDRALEMSRsAAlsELsscDLtG.......CEluYsTAlhMLEAlL-s-..........................stlss-D+phlp+h......lsuIppRLssL+pKht .................................................................................pEcschlptLp.hhths..cslhthAth+hsth..shss...................................................thttt.s......................................ss.hsphshph...............psEtLVLYlKuhpLLupuh.p.lAtt.hpp.................................................sphss.....Ssplpp.V.Vphlpp+appslppschlph+L....pphh.ccp..hsp.............................................psloAEKLlYs+Al-MspsAAl-Ehhp...p..h..ps........C.hpYppAlhhLEuLhcph......................................................................hctpDpp.lpKh.........................hpsIcpRLssLpp...t.............................. 0 55 95 166 +11897 PF12064 DUF3544 Domain of unknown function (DUF3544) Assefa S, Coggill P, Bateman A anon PFAM-B_3553 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 198 to 216 amino acids in length. This domain is found associated with Pfam:PF00628, Pfam:PF01753, Pfam:PF00439, Pfam:PF00855. 27.00 27.00 91.00 58.30 19.00 19.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.52 0.70 -4.53 4 139 2009-07-10 11:09:03 2009-07-10 12:09:03 3 9 40 0 32 136 0 190.30 79 18.22 CHANGED uKPVLuGus....GRRISLTDMPRSPMSTNSSVHTGSDlEQ-sccK......soSSHaSAuEESMD....STASPASsKsG.uGshusSPKPFpspsus...oKQE+ousTGSILNLNLDRSKAEMDLKELSEoV......QpQussssLhSPKRQIRSRFQLNLDKTIESCKAQLGINEIS-DsYsuVEHSDSEDSEKoDSSDSE.hSD-EQKsKNpp-ssps+Eu ...........SKPlLSGGs......GRRISLSDMPRSPMSTNSSVH..TGSDVEQDsEKK......AsSSHFSASEESMDFLDKST......AS....P..ASTKsGQAGSLSGSPKPFS.PQhssP..lsoKt-KT..S..TTGSILNLNL.DRSKAEMDLKELSESV........QQQSs..PVPLISPKRQIRSRFQLNLDKTIESCKAQ.L.GINEISEDVYTAVEHSD....SEDSEKSD...SSDSEYlSD-EQKsKNEPEDsEDKE.G....... 0 1 5 13 +11898 PF12065 DUF3545 Protein of unknown function (DUF3545) Assefa S, Coggill P, Bateman A anon PFAM-B_3270 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 60 to 77 amino acids in length. This protein has two completely conserved residues (R and L) that may be functionally important. 25.00 25.00 30.70 45.40 21.60 21.10 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.12 0.72 -4.22 22 162 2009-07-10 11:21:43 2009-07-10 12:21:43 3 1 162 0 38 93 1 59.60 52 94.82 CHANGED MDphph....D-llth.-ps...ppo+u+.s.pKRKWREIEAlKD+pRLpKELp-hDhsh-.hsl--...lch ......MDpLca....DEllch.Ehsp.hcpSRoK.P.sKRhWREIEAI+DR+RLcKELh-hDsshD...hD-Ic.............. 0 7 14 26 +11899 PF12066 DUF3546 Domain of unknown function (DUF3546) Assefa S, Coggill P, Bateman A anon PFAM-B_3237 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 93 to 114 amino acids in length. This domain has two completely conserved Y residues that may be functionally important. 25.00 25.00 26.10 26.10 24.40 24.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.46 0.72 -4.05 19 273 2009-09-11 14:59:35 2009-07-10 12:24:31 3 13 212 1 194 287 0 104.60 30 13.50 CHANGED oaKpFhts..-Dslscs-uhp+YscYKh-apppQhpcFF.tHK-E-WF+pKY..HPcphsphpccppphhpsphpsFhp.hcsGhhsplpl-ht.t....chhsssllch-sGspp- ................hKtFh.p..--shscs-s.p+YscYKt-apppphpsFFptHKcEE....................WF+pKY......cP-..p..hs....thppctpph...hpsthp.Fhp.lcsGhhDph.l-h..t.....p.hssshlchEtGpt................................... 0 67 109 159 +11900 PF12067 Sox_C_TAD DUF3547; Sox C-terminal transactivation domain Assefa S, Coggill P, Bateman A, Eberhardt R anon PFAM-B_3310 (release 23.0) Family This domain is found at the C-terminus of the Sox family of transcription factors. It is found associated with Pfam:PF00505. It binds to the Armadillo repeats (Pfam:PF00514) in Catenin beta-1 (CTNNB1), which is involved in transcriptional regulation [1]. It functions as a transactivating domain (TAD) [2]. 25.00 25.00 25.30 27.10 22.80 24.20 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.68 0.71 -4.14 12 166 2009-07-10 11:26:27 2009-07-10 12:26:27 3 4 45 0 80 139 0 182.20 34 53.20 CHANGED hsshua.aR-htshG..spa-s...YuLPT.PEhSPLDsh-...s-suFFss............ss.pE-sphh.......s.hsYpssas.........pp.pssshh+phshsps...ssst.............shhss.sssthY..uphhss.pst.has.......phG.QhSPPPEut...ph-sl-plpps.-Lhu-lDRsEFDQYLs....ts+s-ht....hsaps..............ss.ssspp...ssL.SlLSDAooAsYYsshss ........................................................................s...tpaR-h.shG......sph-s...YuLPT.P-hSPLDslE......s-.sFFss..........................sh..--pth.............................s..sa..p..s..t...hs......................p..tss....h.tt.hshs.s...ss..........................h.ts..s.ss..thY..sthht............s..............tst..hps..................thG..QLSPPPEs.........sh-uh-php.s..ELLu-hDRsEF-QYLs....tpp..s-hs..........Lshps..............tp............................ss.ssspp.....sul.SlLuDAouA.YYsshs.s................ 0 5 10 32 +11901 PF12068 DUF3548 Domain of unknown function (DUF3548) Assefa S, Coggill P, Bateman A anon PFAM-B_3247 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 184 to 216 amino acids in length. This domain is found associated with Pfam:PF00566. This domain is found at the N-terminus of GYP7 proteins. 27.00 27.00 27.00 27.40 26.90 26.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.31 0.70 -5.04 9 226 2009-07-10 11:34:16 2009-07-10 12:34:16 3 4 118 0 123 196 0 178.70 30 28.80 CHANGED ssshKVlFEK-GValHosst+..p-pDshIsGhlRll.EKcu-lhl-WpPlE-.shssstlhhttcsupus............ctscassuYEs-WshloTVshcc+.ttpt....stssspu+W.uFohsLs-L+Sl+ps+.uh...GasYLlhss+cGssLPsLHFHpGGo+thL+sLp+alhLspSPpDu+lhLVhsps.pALSpSF-pL.....--sohsllp+ahpD .....................................................s....pllapppsValHssstc.....cs.IsGhlpll.....p.....cc......s......p...sh.lcat.....Ph-.-.....s.s..ssthhhs.tps..sps.h.............................................p...sph.ssh-s-W...shlso.....sp.....p..pp.t...p....st.......s.....s.....s..t....s...s..t.u..pa.....uF.s.hsLs-L+Sl+ppc.uh...GW..sY..LVhsh+.sG.ssL.PALaFHpGso+.hlcsLppal.hLspSspDtphhLV.sps......puLspSFppL....pps..s.shh............................... 0 31 49 85 +11902 PF12069 DUF3549 Protein of unknown function (DUF3549) Assefa S, Coggill P, Bateman A anon PFAM-B_2034 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 340 amino acids in length. This protein has a conserved LDE sequence motif. 57.80 57.80 86.90 86.80 27.30 20.30 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.11 0.70 -5.66 29 187 2009-07-10 11:52:14 2009-07-10 12:52:14 3 1 184 0 44 152 13 331.00 50 98.51 CHANGED MspIsTLophLpsussQaplaDlGRRVpcIstppFtpIEptptPYPaPlQpHAphAIsF.W..sts..........pQsaIWFLKLPLDE+GLLp.AsRspFIchllEALGppLspsho--.QQ..ppLusNPYsFKPsp-KhAhFpAhl+tpLspssSpaYEhAtpYLSuph....s......h-sWQslGLQGIADlssRlpppsp.tshltcAlsp....hPtpVhhALstsLEHhs.lscpLu-tlhpphppt......cshhhshhlRALuu..usspshpsh.h.slLsptphhcs-..lLlsIAu...RsWpsL.p-spthphaL-pLAppp..Q....shFsQlFADLVtlPtLRstlLshLRpsspSstLupAIupLFp ....M-sIpTLopLL......psossQYQlFDLGRRVptlDsppFtplEtuQpsYPaPlQpHAphAIsa.W..spp..........+QPaIWFLKFsLDERGLLpQAshupFIphllEAhGscLsp........slo.EE...QQ.....pKLANNPYTFKPo--KlAhhHuplRtpLsLssSQYYEHAQp.YFSGsLG..........W-pWpoVGLQGIsDlCAR............Lpp-pN.ushLc....KALs+.............LPspPLaAlhusLEHls.Ls-cLAc+L.EhspsplttppsDlhLLuAllRALAu....ussshhpsh.hsulL.............sS...stls+sE...VLIulAG...RsWhsL.psssptppFLhRLApsts.Q.....sLFNQLFADlVhlPsLRsshLsLL.cussSspLApAltpL.p................... 0 8 18 32 +11903 PF12070 DUF3550 Protein of unknown function (DUF3550/UPF0682) Assefa S, Coggill P, Bateman A anon PFAM-B_2472 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 249 to 606 amino acids in length. 25.00 25.00 27.50 26.70 17.50 20.60 hmmbuild -o /dev/null HMM SEED 513 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.76 0.70 -5.76 6 181 2009-07-10 11:59:48 2009-07-10 12:59:48 3 4 103 0 130 194 0 376.60 40 82.71 CHANGED CaLL-KS+pLFNuLRDLPQaG++QWQsYFG+TFDVYTKLWKFQQQHRphLs.chGL+RWQIGEIASKIGQLYYchY..LRTSETsaLsEAFsFY.pAIhsRuYaspss+E-..s-Lhs..K+LRaYARFllVCLLLs+h.chVt-Lspclcphl--hppsapscDptEWpLVlpElsuFlcADs.ltVLpsDphslhhsp.....c.sPhst.Pshtpsshpp..LSLp-uLlhusspsplKFoELTlDhFRMLQsLEtEPssuh.ph...............htpth...t.stpsu.....ltEsuccPshRsNPHKYLLYKPThSQLhsFLusuFKELPsNuVLLlYLSAsG...........................shsos+pst-uPY-hGGlhTusp.+shssGsslphRspu...tKEs.HCLYPGDLhPFTRKPLFlllDSssShAFKshs.shFGQPLlhLLSP..sohPsuhp-p..sp+GSLFThFLpuPLhAFsalsGlu..clctcLW-+sQchLc+hhs-huplLsc.SRs.........lD.salQFhGD-FLRhLlLRFVFCpush+l.H+sF+ .....................Ll-+ucphFstlRDLP.aG....ptp.....pta.Ft+sFcsYT+LWKaQQppR...thL...s.c..hGL+RWpIGEIAS+IuQLYYthY..hRTS-spaL.Eua.FY.tAIhtRtYapp.......s...t.....t...........................cLhl..KpLRahARFllVsLLLp+h.chVppLlpclpt..lp-hpp.pap...p..-.hEWphVlpElttFlcA-s.hh..hps.p....hhhsp....................................t..sh...s.htp...h.t...LpLt-hllhu.tp.pp......lKFoELTlDhFRMLQsLEhEP.t.h.p................................................t.t...........h.p..s...pp...sh..NP+KhlLY+PohophhshLus.hc.....E..Ls.suhlLlYlSAsG.....................................................................................s..pt.p..t..p....s.......p...s..t......t...p................t......tsp.t..........tt....p.laPtDLhPFTR+PhFlllDSpsShsap.................t..h.....t.hGpPhhhLhSP.............t.h..s..hts.........ttpGs.FThFLpsPh.Ahh.h.sls.......ph..thappspthl..tp..t-htt..hlhp.st............l..p.sahphhsD.FLRhLlhRalFCpsshth.............................................................. 0 47 68 99 +11904 PF12071 DUF3551 Protein of unknown function (DUF3551) Assefa S, Coggill P, Bateman A anon PFAM-B_3610 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 79 to 104 amino acids in length. This protein has a single completely conserved residue C that may be functionally important. 26.50 26.50 34.20 28.80 21.60 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.19 0.72 -4.43 37 127 2009-07-10 12:00:59 2009-07-10 13:00:59 3 2 24 0 54 129 3 83.00 30 89.04 CHANGED MRphhhsshuh......................................shshhhsssssAp.A+D.YPaCLp...usthuas.GcCsasoapQCpsoASGpsApC.......shNPhhs..h..spsppttptp ...........................................hhhhsh.....................................hssshhhss.sssAp..A..t.-..a....saClp........utthuhs..ssCsasohtQCpAoASGp.sAtC.......shNPhas....h..st.tt.....t................. 0 4 14 28 +11905 PF12072 DUF3552 Domain of unknown function (DUF3552) Assefa S, Coggill P, Bateman A anon PFAM-B_3508 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is about 200 amino acids in length. This domain is found associated with Pfam:PF00013, Pfam:PF01966. This domain has a single completely conserved residue A that may be functionally important. 29.00 29.00 29.00 29.00 28.70 28.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.39 0.71 -5.10 58 2218 2009-07-10 12:04:02 2009-07-10 13:04:02 3 6 2166 0 441 1381 168 199.80 32 38.76 CHANGED hhllhs.llull.VGhslGahl.....RKpluEp+lssAcppAcpIl--A.....................c+-AEshKKEAlLEAK-Eh++hRsEhEcEl+-RRsElQ+.EcRLlQ+EEsLD+Ks-sL-c+EppLpcKEpplttppp.lcppcpcl-pllpcQppcLE+ISGLopEEAKphllcpsEpElp+EtAhhIKEhEpcAKEcADK+A+cIluhAIQRsAADa .............................................................................................h..llhhllshl..lGh.s...lG....ahh..........................t+...ph....t....pp...p....htpAcppA.c.pI..lppA..............................c...+...EA-....sh....K...KEt..lLE.......AK...E.E....h...........p....chRp.......-.......hE...p.......E.......h....+.......p+...Rp......E....lp....c........E.p....R...Lh....p.......+.......E....c.s....L.-+....Kp-....p....Lcc+.......EppL.......-p+.......cpplt.......p.......c.......tpt.......l.......-p..+cpclc...........clhpppppE......LE+I.......uuLop-EA+phlLpplEc-lpc-hAhhl+-hEp-AK-.cu-KpA+cIlspAlQRhAu-h............ 0 198 340 403 +11906 PF12073 DUF3553 Protein of unknown function (DUF3553) Assefa S, Coggill P, Bateman A anon PFAM-B_3361 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 60 amino acids in length. This protein has two conserved sequence motifs: GQVQS and TVNF. 20.30 20.30 20.30 20.40 20.20 20.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.59 0.72 -4.65 22 88 2009-07-10 12:12:10 2009-07-10 13:12:10 3 2 88 0 32 90 325 50.90 50 47.26 CHANGED LEPGhhVRHPspPDWGlGQVQSNIuuRlTVNFcctGKlVIDus+VsLhhVh- .......LtPGhhV+HPspP-WGhGQV...QS.....slus+l.TVNFcctGKhVI-uspVtL.hV............ 0 10 24 28 +11907 PF12074 DUF3554 Domain of unknown function (DUF3554) Assefa S, Coggill P, Bateman A anon PFAM-B_2029 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 287 to 356 amino acids in length. This domain is found associated with Pfam:PF02985. 25.00 25.00 25.10 25.40 24.50 24.60 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.88 0.70 -5.01 30 248 2009-07-11 10:07:29 2009-07-11 11:07:29 3 46 204 0 185 264 0 312.50 20 13.21 CHANGED s-p+slhsphLttl.s.tt.............lSpclspulhsllsKE....uNEsuLsphlsshspahhtl.......psclscclsstltK...Ghs-K+.s..lR+hWhhphsshhh............pstshshsspFhsthhphhccssssPhssspssslsuAalllu.........t..........t...h..hhhs.+sshLhss+haoK.ssps-lpahhcsLpslhsstp.p.ttpt....saupAhlaslsus..phssRcpAhphLpplhtppsth.......lucsllsulhshltshchh............stssshchlphlhpsls.s.sshst..........hcp.hhpLLllup.thls.t.....sWhuLs.+s...plDPupllpcpsc ....................................phl.tls..ts.............lutplsphhhshltcE....spEssLhttlpshshahshh.........pspls..cplhphhtK...GhspKpss..lR+salhphhthhh........................ss.shthhts.llshLhphhccshsp.sh..t.ss..t..ss..th...suAhllhs..............ths..htt.........t...h..pl.h.....h.s...K..ph.hhs..+hhpp.....ssp-shhhh......hchhp.tlhhph.tph.ttth.....pthtpAhlhhlhut..shp.lRcpA.hp...slpplh.tp.sth...........lups..ll.ptlhph.lpshchh..pt...............ttssptt..p.h.hh...s.pslp.s...s.hpt...........t......hcp.hhphLllu+p..l..sts........Whs.Lhh+h....tlDPtphlpc...t....................................................................................... 0 49 92 149 +11908 PF12075 KN_motif KN motif Assefa S, Coggill P, Bateman A anon PFAM-B_3795 (release 23.0) Motif This small motif is found at the N-terminus of Kank proteins and has been called the KN (for Kank N-terminal) motif. This protein is found in eukaryotes. Proteins in this family are typically between 413 to 1202 amino acids in length. This protein is found associated with Pfam:PF00023. This protein has two conserved sequence motifs: TPYG and LDLDF. Kank1 was obtained by positional cloning of a tumor suppressor gene in renal cell carcinoma, while the other members were found by homology search. The family is involved in the regulation of actin polymerization and cell motility through signaling pathways containing PI3K/Akt and/or unidentified modulators/effectors [1]. 19.50 19.50 20.50 19.80 17.80 17.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.95 0.72 -4.16 15 233 2009-07-11 10:36:48 2009-07-11 11:36:48 3 29 67 0 116 195 0 39.40 60 4.24 CHANGED PYSVET....PYGYpLDLDFLKYV-DIE+GsTIKRl...slpRRs+ ...........PYSVET....PYGacLDLDFLKYV-DIE+G....s....Tl+Rl.......slpRRs+............. 1 15 25 59 +11909 PF12076 Wax2_C WAX2 C-terminal domain Assefa S, Coggill P, Bateman A anon Pfam-B_3756 (release 23.0) Domain This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF04116. This domain has a conserved LEGW sequence motif. This region has similarity to short chain dehydrogenases [1]. 27.00 27.00 36.20 36.00 26.70 26.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.95 0.71 -4.51 10 156 2009-07-11 10:43:14 2009-07-11 11:43:14 3 5 27 0 77 166 5 157.20 47 28.21 CHANGED VlLpGssoKVARAIAhuLC++sl+VsMh..sKE-acpLppcl.sschpcNLVhSpo...aos....KlWLVGDGLoscEQh+ApcGTLFlPFSpFPPpchRKD.CsYpsTPAMtlPKohpNlcSCENWLsRRVMSAa.Rsu..GlVHALEGWscHECG.sph...ul-+l.....WEAAL+HGFpP ....................VhLpGsh..SKl..upAlAhhLCp+tlpVhhh..sp-cappl.ppch.ss-hpphLV.......s.op...aps.........KsWlVGchlsscEQthAPtGThFh.Fs....P.h....p.hR+D..CsYtphsAMtlP.cs.hpslcoCE.hhsRtVhpAh.+su..GllHsLEGWstHEsG.....sl...cl-hlWpAALpHGFpP........................ 0 10 46 66 +11910 PF12077 DUF3556 Transmembrane protein of unknown function (DUF3556) Assefa S, Coggill P, Bateman A anon PFAM-B_2567 (release 23.0) Family This family of transmembrane proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 576 to 592 amino acids in length. 25.00 25.00 238.70 31.50 23.10 22.90 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.75 0.70 -6.50 14 188 2009-09-11 14:10:01 2009-07-11 12:04:22 3 2 108 0 38 132 5 566.10 57 98.52 CHANGED MGFlpPchPsVDhsEWpctsRu-Rl+sMAcHWAEhGFGTPhhlHlhYllK.IhlYlLGuWlls.htTsGlsGhscluuWWsEPIVaQKlVLaThLFEVlGLGCGaGPLssRFaPPhGuILYWLRPGTIRLPPWPc+VPhTpGspRTshDVsLYuulLssllsALhosGs.....GPlsth.....sstlGllssWplssllslLuVLGLRDKlIFLAARGEhYushhlsFhF......G...usDh......IlAsKllhlslWhGAATSKLN+HFPaVlusMhSNNPlh+s+...hlKRpha+caPDDLRPStLuchlAH.huTslEhLVPLlLhFupuGas.TslAAsl.MlsFHLsILSshPMGVPLEWNVFhlFullhLFlua...uslGls-lpsPh.lulLhAslssssllGNLaPc+lSFLPuMRYYAGNWsTolWCh+P.uA--Klcp.sllthuuhstsQLp+h.Yus.EsAplhhahuhAFRuMpoHGRALhoLhhRAl.ustDED-YslpEGEhlCusslGWNFGDGHLHNEQLIAAlQpRCsF-PGElRVllL-uQPIH+QpQpYRlVDAATG.hERGYVpVuDMlscQPWs-...........-...lPVcVt.s ..MGFLpPcLPD.V.DhspW.ptsRpp+lpshupHWAEhGFGTPhhlHLhYlsK.IhL.YlluGhhIl.hhTsGlsGh...oclusWWspPIVaQKVVlaThLFEVlGLGCGaGPLssRFaPPhGuhLYWLRPsTIRLPPWPD+VPaTpGspRTsVDVALYAllLhhllhALhos.Gs.....sPlsth........GstVGLlssh.lVshlllLuVLGLRDKsIFLAARGEhYhshhlsFhF.............s...hsDh......IlAhKllhLslWhGAATSKLN+HFPaVlusMhSNNPLlRsR...hIKRhha+caPsDLRPuhLu+lhAH.suTslEhlVPllLhhu..psGaP.ohlAssh.MVsFHLsILoslPMGVPLEWNVFhIFullhLFlua......uslulsDlcsPl.LAlllAVlAhlVIhGNLhPcKISFLPuMRYYAGNWsTolWCh+P.uAEsKlpp.ulVt.uuhsssQLt+h....Yss.-pApI.h.hs.hAFRAMpoHGRALhoLhhRAh...cDEscYsIp-GEhlCushVGWNFGDGHLHNEQLIAAlQcRCsFpPGElRVllL-uQPI..HpQpQpYRlVDAATG.FEcGYVpVuDMlsRQPWs-...-.lPVHVt..p.............. 0 7 22 33 +11911 PF12078 DUF3557 Domain of unknown function (DUF3557) Assefa S, Coggill P, Bateman A anon PFAM-B_2154 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 150 amino acids in length. 26.20 26.20 26.30 26.30 26.10 26.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.91 0.71 -4.50 33 542 2009-07-11 13:46:58 2009-07-11 14:46:58 3 7 5 0 539 545 1 145.30 20 38.35 CHANGED Kl+-Ah+YLlp+Lh..uuRss.lplpp...Lplss....tslL.RlP...psl+l+lppLcl...sspsspllcsl..psllcsoohPLpolplsss........phscsllpouchLhltsthh.............ptLhplpsp+Vclt......hshpphhcL.lcpW.lcpu......+p.lGThaoFshpp ...........................................pshchlhphhh....ss.R...p.t...l.pVpp...lt..h.t.t..................tt.h..h..phP......sh..+h.c.l.p.pLph..............hp..........phhptl..........s.hl..c.s.ohPLcp..l.phtht..................tth.cp....sl.lp...s.AcpL.hlpt.h.................................phltpL.p.Npp..lhlp.......ththsh...p..s..h.htl....lcpW.hpst......+p.lGTpaph....t............................................... 0 42 43 539 +11912 PF12079 DUF3558 Protein of unknown function (DUF3558) Assefa S, Coggill P, Bateman A anon PFAM-B_3489 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 177 to 195 amino acids in length. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.49 0.71 -4.61 27 517 2009-07-11 13:51:31 2009-07-11 14:51:31 3 3 154 0 133 423 2 165.90 21 85.88 CHANGED sssluGCu..s.sGsssstsssss....sss.....phss........hhtt..CssloDspl.schh......Gsssh.h....ssshsus......sCpWptss......h....shs.loh.aacsuslspERphtpththslp-h....sIsG+puh.hupssss.....sssCslsl.stusshls.....W.l.phps....tshs..Ds.C...shApcLspholsps. .............................................h..hlsGCs....s.s..Gs..s.ss..ussss...................sps.ts.sptp.ss...............................hhc...C.ts.losptl.sc.hh.................Ghssh....h........psshsus...........tCpWpuss......h................ths..loh.....h.hp.....ts.s...l....s...p...p...c....p.....ssctht.hp...s.psh..........sIsGp.......su..h..hhtssps.......susC.tVsh.....tsu.s.....s...h..lp.....hsl....shps......sts....Ds..C...s.Atclhp.sltp..p............................................................................................................... 0 28 94 120 +11913 PF12080 GldM_C GldM C-terminal domain Assefa S, Coggill P, Bateman A anon PFAM-B_3275 (release 23.0) Domain This domain is found in bacteria at the C-terminus of the GldM protein. This domain is typically between 169 to 182 amino acids in length. This domain has two completely conserved residues (Y and N) that may be functionally important. GldM, is named for the member from Cytophaga johnsonae (Flavobacterium johnsoniae), which is required for a type of rapid gliding motility found in certain members of the Bacteriodetes [1]. 29.00 29.00 35.80 34.80 28.40 26.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.00 0.71 -4.33 26 164 2009-07-11 14:11:47 2009-07-11 15:11:47 3 3 148 0 57 170 155 176.40 31 35.45 CHANGED sIuuDKMNVlYRGlsNPhoIulPGlssspVs...uous..G..Lp+tuss.tahhpPs..p.Gpc.lpIsVoup.ss.Gpsls.sppsFRlKslPsPh.........Gslpspt....ssphs+psLpsus.lsAsh.pDFsF-lshpVsuFplplssp.....ssltlsGschsups+shlp+sc+G-tl.ItcIcsphss ........sluADhMNVLYtGh..sNPloIShPGlssspVs...sohs.....GupLsppGsu...palsp....Ps....s.Gpc.lsIsVoup.....Gpshp...uphsFRV+plPcPs.........ushpthp.....ssthsKssLhsss.lsAsl.-Dh.hD.lsapVsuFch.hh..hcp.......sslsssGsphos..pt+stl+php+GcphhIsclpspss.s.................. 0 28 51 57 +11914 PF12081 GldM_N GldM N-terminal domain Assefa S, Coggill P, Bateman A anon PFAM-B_3275 (release 23.0) Domain This domain is found in bacteria at the N-terminus of the GldM protein. This domain is typically between 169 to 182 amino acids in length. This domain has two completely conserved residues (Y and N) that may be functionally important. GldM, is named for the member from Cytophaga johnsonae (Flavobacterium johnsoniae), which is required for a type of rapid gliding motility found in certain members of the Bacteriodetes [1]. 25.00 25.00 29.70 25.60 24.30 24.10 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.17 0.71 -4.78 30 171 2009-09-11 04:40:41 2009-07-11 15:15:05 3 4 147 0 54 178 163 186.50 24 38.17 CHANGED EVLsuFshhN..cpLcposssscppNcpshssL..ssKss-sstchtphhppAppl+ptoschhsalpslKpplhpsss..ctp....hpshc+sstl-..pthhhGst.hsscGp..clhpclspY+stlhthh..stphpp.hh.ttlptphssss......ppps.tppcahphpFps.hPllAulshLTphQs-l+psEu-llspLluut .........................................................................................-VLsuFshlp..csLppostsssp...pNpthhpsl..ptp..hppsst..+htthhp.cAppl+pho.cslhsalppLKtplscp...u-st..csp...........htslc.pp-sl-.s.......sthhh.us....hsscGp..cLhpplssY+..pplhphl.............t..hpt..hh..phlp.sphsTpss....................ppps.ts+sW.phhFcs.hPssAulThLoplQuDl+tsEu-llpsLlss.h........ 0 26 48 54 +11916 PF12083 DUF3560 Domain of unknown function (DUF3560) Assefa S, Coggill P, Bateman A anon PFAM-B_2138 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 120 amino acids in length. This domain has a conserved GHHSE sequence motif. 27.20 27.20 27.40 27.40 21.90 21.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.68 0.71 -4.08 24 275 2009-09-10 16:38:42 2009-07-12 10:48:23 3 5 224 0 27 224 3 124.50 54 30.84 CHANGED sshtpRtpsRspRapshAs+Asscucptaptucphs-slPh.GQPILVGHHSEpRcR+hh-RhcsshtKAlthp-KAc.......aapp+A.tusspsshspcsP-s.lc+l+thhAsh+pspcph+sts+hl+pt .....os..LsERQEARAERFouYSuKRAuESsQAl-cV-RLAuhIP..GQPILVGHHSERRARRDAQRIENGMKRA.VMLaERAE.......YWEERAc..S.AL.hHAKYKERPDV..RaRRIKKIEADLRKAEKoIApSpKYLsha............ 0 6 17 26 +11917 PF12084 DUF3561 Protein of unknown function (DUF3561) Assefa S, Coggill P, Bateman A anon PFAM-B_2401 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 110 amino acids in length. 25.00 25.00 27.40 27.20 22.90 21.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.51 0.72 -4.20 10 531 2009-07-12 09:49:57 2009-07-12 10:49:57 3 1 525 0 37 121 1 104.80 72 98.87 CHANGED MRNopNl.hlopsDs.hppcDEsTaShsGAVVGFlSWLLALuIPFLlYGuNTLFFh.LYTWPFFLALMPVuVlVGIALasLLcG+LlYSlhuTlloVsslFuhLFhWLhG ......................MRNSHNI.TlT.s.NDu.hsEDEETTWS.LPGAVVGFlSWLh..ALAhPh.LlYGS.N.TLF.FF.lYTWPFFLALMPVAVVVGIALHSLhcGKLhYSIlFTLlTVGlMFGALFhWLLG...... 0 1 8 21 +11918 PF12085 DUF3562 Protein of unknown function (DUF3562) Assefa S, Coggill P, Bateman A anon PFAM-B_3549 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 62 to 84 amino acids in length. This protein has two completely conserved residues (A and Y) that may be functionally important. 26.30 26.30 32.40 32.20 25.00 23.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.04 0.72 -4.15 14 97 2009-07-12 09:51:43 2009-07-12 10:51:43 3 1 71 0 22 65 2 69.50 39 94.74 CHANGED hs.ssh.............-tlcphApcpuhss-.Vcphhs-phcsLuu-ARVHDYlplFAhR+VRcphR....................spsstt+ ................MhQssl.-.lpulApcpshss-tVpchh--plspLusGARV+DYLpVhshRRVREplR........sR.....st................... 0 2 4 13 +11919 PF12086 DUF3563 Protein of unknown function (DUF3563) Assefa S, Coggill P, Bateman A anon PFAM-B_3639 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 50 amino acids in length. This protein has conserved AYL and DLE sequence motifs. 26.90 26.90 26.90 27.20 26.50 24.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.92 0.72 -4.22 5 136 2009-07-12 09:54:44 2009-07-12 10:54:44 3 1 79 0 42 82 23 51.90 45 93.77 CHANGED MYL...lS+LFLFLTKshDpusKEcp...DAYLAEATDLYDLEFRMRKID.R-Ash+pPSshss+ .....................huhlh..hLop.hE..p..s.....t+ERp...-AYLAsuoDlhDLEhRhRpL-...pshu..........h................ 0 1 10 22 +11920 PF12087 DUF3564 Protein of unknown function (DUF3564) Assefa S, Coggill P, Bateman A anon PFAM-B_3736 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 118 to 142 amino acids in length. This protein has a conserved WSRE sequence motif. 25.00 25.00 44.20 99.20 21.90 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.71 0.71 -4.53 7 93 2009-07-12 09:56:13 2009-07-12 10:56:13 3 1 57 0 26 75 2 119.20 52 95.53 CHANGED MRlTl+lsT.-sss.tuaAlLWLDpsp+tWSREuHtGl-LPpWGsltsspGsstl...suADsspslCpLpGLclsshpGshEu.EpGpAhWh..ut+sPhhGtW+lQsVDpssspPEat.Fsttp MRLTI+ls.u.-uss...puaAVLWlDTDEphWSREuHpGIDLPsWGcV+sspGsstL...CuADsscuLCpLcGLshust..............pt...Es.EpGsAhh......us+sP..sGAWRLQsVDpssspsEHc.FTsht.. 0 2 5 17 +11921 PF12088 DUF3565 Protein of unknown function (DUF3565) Assefa S, Coggill P, Bateman A anon PFAM-B_3033 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 30 to 78 amino acids in length. This protein has two conserved sequence motifs: WVA and CGH. 25.00 25.00 29.40 29.30 20.40 19.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.18 0.72 -4.20 19 114 2009-07-12 09:58:00 2009-07-12 10:58:00 3 4 112 0 40 105 12 56.00 50 70.94 CHANGED lGaHpD-EsHWVApLuCGHsQHVRHpPPWpsRsWVlT.pGRpshLGt.LsCtKC-puustD ..hsacpD-EsHWVA.LuCGHsQHlRHpPPWpsRsWVlottGRpphlGphlsCt+Csputs........ 0 10 19 32 +11922 PF12089 DUF3566 Transmembrane domain of unknown function (DUF3566) Assefa S, Coggill P, Bateman A anon PFAM-B_3331 (release 23.0) Domain This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 136 to 304 amino acids in length. This region represents a transmembrane region found at the C-terminus of the proteins. 45.90 45.90 52.10 51.60 38.50 38.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.53 0.71 -4.09 24 434 2009-07-12 10:02:00 2009-07-12 11:02:00 3 1 432 0 113 310 175 118.90 40 54.26 CHANGED ssRRscLplsRlDPWSshKloFllSlulhllhlVAsslLahlLsuhGVFsplsshlsslsssssu......sltshlohGpVhuhusllullsVVLhTALuTluAhlYNlsusLlGGlcVTLu-c ......psR+ApLplsRlDPWSshKlShlLSlAhhllhhlAsslLahlLsuhGVasplNpslsslhsssuu......phsp..hlohGpVluhssLlGslNsVLhTALuTluAFlYNlsusL..lG.GlEVTLu-c.. 0 34 83 105 +11923 PF12090 Spt20 Spt20 family Assefa S, Coggill P, Bateman A anon PFAM-B_3386 (release 23.0) Family This presumed domain is found in the Spt20 proteins from both human and yeast. The Spt20 protein is part of the SAGA complex which is a large cmplex mediating histone deacetylation. Yeast Spt20 has been shown to play a role in structural integrity of the SAGA complex as as no intact SAGA could be purified in spt20 deletion strains. 32.00 32.00 34.90 35.10 30.30 28.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.21 0.71 -4.93 32 305 2009-07-12 10:47:42 2009-07-12 11:47:42 3 4 214 0 208 305 0 201.60 29 23.78 CHANGED ssppl.L++at.................................................p.......sSlhl+lasspa+a..................................................................s.ppshhhsYss.h.+thLcalcptplP.pthh-lL..........................pp..s...............slpFY-GCl.IlplhDa+s...................................................................t..............tt...........t..t.tp.tt...............pPcsapslL+PTstolhhDlhhhsss...................................................tphssp.hhphEucllt.sTsts................LsL-ss.s.......stpttshLp.p .......................................................................h...tl.LcKh............................................................ssLllpLaPsphtas.....................................................................................s.scshphPYcp...pplL-alcscplP.s.Ll-lL................cc..s......................sl..Fas............GCl.Iscl+DaRp.......................................................................................................................................................................................st.psp+llL+PT....poLhsDlp.h.sss..............................................................................ttpaopc-...tLtlESpllh.ATstP................LsL-Pshs.........stpstshh...hh................................................... 0 58 93 145 +11924 PF12091 DUF3567 Protein of unknown function (DUF3567) Assefa S, Coggill P, Bateman A anon PFAM-B_3056 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 90 amino acids in length. This protein has a conserved EIVDK sequence motif. 40.90 40.90 50.00 47.60 32.30 30.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.88 0.72 -4.01 11 117 2009-07-12 10:52:43 2009-07-12 11:52:43 3 1 116 0 45 74 12 87.40 57 98.69 CHANGED MQMlYsS-sasVVch.........sPDssh................ttLsptGaEIVDKpup+ElaLDGuhAEhFppclpshhpspPo.EEVDDhLspYsuLhppPVllH ..MQMIYNSsNYCVVEF................sPpss+..............tshsuGGYEIVDKNupREIFlDGplAE+FRccVcpLIps.EPol-EVD-FLGpFDoLMpQPVVlH. 0 2 20 33 +11925 PF12092 DUF3568 Protein of unknown function (DUF3568) Assefa S, Coggill P, Bateman A anon PFAM-B_3573 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 130 amino acids in length. 37.00 37.00 39.00 38.70 32.50 32.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.51 0.71 -4.43 8 164 2009-07-12 10:55:55 2009-07-12 11:55:55 3 1 45 0 26 90 3 129.80 34 95.07 CHANGED +KlhllhLhsh.slsLsuC.hlsAllsGsus.tuGsstY..lsGpYhs-l.stsacsVYsATlpAIpsspsaslpspshs.....p..psAsIsus........sKsss-slplclpKlscNsTclpIKhGshGDpttSusLhspIpcNl .............Khhlhhlhsh.s..lsLsSC...ssullsGhu..sssuu.......s.stY..lsG...pYhhpl.stshpsVYsATLpAlpssssaslp.spshs.......s..pcApIsus........scssssshplplp+lssssT+lpIKaGshG..DpttSusLhspIppsl.......... 0 15 19 19 +11926 PF12093 Corona_NS8 Coronavirus NS8 protein Assefa S, Coggill P, Bateman A anon Pfam-B_2038 (Release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in coronaviruses. Proteins in this family are typically between 39 to 121 amino acids in length. This protein has two conserved sequence motifs: EDPCP and INCQ. 27.00 27.00 52.50 52.50 18.40 17.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.79 0.71 -3.74 2 88 2009-07-12 11:00:27 2009-07-12 12:00:27 3 1 70 0 0 20 0 68.90 56 96.56 CHANGED MKLLIVhshlo.s.CI..csslQcCspNpPa.lEDPCPhtY.s-W.I+htoR....poAhLstL.....GKhlPhH..hatshs.oCpP.lpINCQsPssGuLIsRC.Y.a-...hsta+DVhVVL ....MKLLIVhshlo.s.CI..psslQcCspNpPa.lEDPC.lp....................................................................................... 0 0 0 0 +11927 PF12094 DUF3570 Protein of unknown function (DUF3570) Assefa S, Coggill P, Bateman A anon PFAM-B_3745 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 396 to 444 amino acids in length. 25.00 25.00 170.60 170.20 19.50 19.50 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.54 0.70 -5.74 41 141 2009-07-12 11:02:17 2009-07-12 12:02:17 3 1 118 0 71 143 85 412.60 24 92.13 CHANGED tlstsLshs..........ssullussstsps.....................................cl-...hthhhYpE............ps.psssstshsshc...............pshu-cp.slshchslDsloGASssssss..................h.tspt...sssapstuGtssh.t...................DTRhphssshsp..hspshphshusphSpEhDYhShuhsh.ulspshspcNTolohuhuht.Dpl....thshthpt.....t....................................s-s+pshshhlGloQlls+phlhphshshstssGYLosPY.+hlphhDss..............s.s.............................................t.hh.hEpRP-sRh+tul...hhpspatls..sssh....chpYRYasD-WuIsSHTh-scahhtlssph..h........lpPphRYYpQssAcFYp......shh.psp.s..h............s..........pahouDhRLushsuhohGlp...........hs...........hphtsthplshphthYppssshs.........G.....................................................thpshhlplshpF ......................................t......hhhshshhsts.t..sts.th.tct......................cls...hthhhYp-....................ps.psssssshsshp......................pshucct.slshchslDslouASssssss...................................t....ssths...h.uG.ts.......................................................DsRhphshshsh..hppstphshGsshSpEhDYpShGhsh.uhspshspcNTololGhuhhhDpl.psh..hssshps...................h........t......................scs+pohshtlGloQllspphhhphshshstpsGa........LosPY.+hlp..hhDss..................s..........................................................t.hh.hEphPDsRh+hul...hhpsphhls....sssl..chpYRYapDDWGlsoHTh-hchhhhlss.pa..t..........lpPphRYYsQ.ou.AcFYp.........shh.ts...s.....h...........................................................s............pahouDacLusasuhshGlt.................hs........hphttthphshphtaYppssshs........................................................................h.h........................................................... 0 28 54 65 +11928 PF12095 DUF3571 Protein of unknown function (DUF3571) Assefa S, Coggill P, Bateman A anon PFAM-B_3506 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 85 to 97 amino acids in length. 25.00 25.00 25.10 59.60 21.60 17.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.79 0.72 -3.75 23 90 2009-07-12 11:05:05 2009-07-12 12:05:05 3 2 87 1 41 92 116 83.50 41 82.07 CHANGED MuDP...LlRspDpYVVLE.PspsEphLTssEhLtaLpphLpp..h-sL.PtDLpchsolsstAp+Ll-TAC-LEl.uPGhslQWaAVRLE ......Msss...lhh.ppDpaVlLE.PspsEpFLospELht+LcshLpp..tssL.P.DLpchsolsstApaLl-osCEL-l.GsGt.hlQWYsVRLE. 0 7 27 37 +11929 PF12096 DUF3572 Protein of unknown function (DUF3572) Assefa S, Coggill P, Bateman A anon PFAM-B_2130 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 100 amino acids in length. 35.20 35.20 46.70 61.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.82 0.72 -4.14 41 202 2009-09-11 14:10:56 2009-07-12 12:08:04 3 1 199 0 63 161 66 88.30 42 89.90 CHANGED hst-sAEslAlpALuWlAus--LLshFLutTGhusssLRpsAsDPsFLuuVLDFlhhDDAhVhAFs-utGls.ppshtARtsLPGu....st. .........sptsApslAlpALuWLAsD.c-lLsRFLAhTGlpssslRpuAp-PuFLuuVLsFlhscEssLltFC-psGhcPsthtpAhthLsGu..s..... 0 14 37 46 +11930 PF12097 DUF3573 Protein of unknown function (DUF3573) Assefa S, Coggill P, Bateman A anon PFAM-B_2286 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 372 to 530 amino acids in length. 26.60 26.60 26.60 27.30 26.40 24.90 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.37 0.70 -5.85 5 128 2009-07-12 11:17:35 2009-07-12 12:17:35 3 1 33 0 12 96 0 354.20 51 73.37 CHANGED hFpKphLIloslLshuhhsahspS.thhs...........................s.QsssSpplD+psIocLQpQIppLQuQINpL-pp.......psoNtsupFsTYSSKVsss....olsulGpoKsLussphNNphsoDIhsNlss-ssIIsLuupslGGlFspcGuIDVGNAPAITTQGQlTYLGSYSGNNTIPIGQISSNLFASTIlGQR-KFDsYSIFFGGaIEADAQsWFGSuIo+u....ssuosFsSNGQNIYLToAsLYFLANlGHYVTAsaDFsTsEsNNFuLGNAFVIFGNLDTSPaFVTVGKs.RlSVGoFGGGGPWTSGITtNFLsPGRVTNluLNYKoDThNANVTVFuopNpHAsFSsAlFYAsKhTsNIA.VGFNhGYlaDlAGAsNsSls+hh.phsahsKoVGpFN .....................................h.Kh.llh.hhL....h.aht.S.th.s.................................Qsstup..-pptl.pLppQIppLQupIspLcpp............p.s....p.pFsTYSSKVssp.....hh..t.Gpspsl.sp.s..ss..tsclhsN.s.pps.lsLusp...tlFspsGuIDVGsAPAITTQGploYLGSYSGNNolPIGpIsSNLFASTlltQRsKFDsYSlFFGGaIcADAQhWFGosIs.s....psu.sshsuNGQNIYLToAsLY..FLuNlGHYVTAphDFsTs..-sNsFsLtsAFVIFGNLDTSPaFVTsG+s.+lSVGoaGG...GGPWTuGITtsFL.sPs+VTNl.SlNYKssshNANl.sVFuop....sp+AsFSsulFYAsp........hTsslA.lGFNhGYlaslAGAsNs.Slsp.h.phsh.scslGshN............................................. 0 7 7 7 +11931 PF12098 DUF3574 Protein of unknown function (DUF3574) Assefa S, Coggill P, Bateman A anon PFAM-B_3542 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 144 to 163 amino acids in length. This protein has a conserved TPRF sequence motif. 25.00 25.00 25.00 27.10 24.70 24.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.34 0.72 -4.44 21 194 2009-07-12 11:23:53 2009-07-12 12:23:53 3 1 194 0 67 196 59 103.70 42 69.59 CHANGED lps-LhFGhshssss.......sVS-A-appFlcc-VTPRFPDGLTVhDApGQW+s.ssGp..lsREsoKllhllasssssp.csslpsIRpuYKp+FpQpSVhhsspPsCVSF .....................popLaFGhsps.sGs.......sIoss...-WppFlDcpVT..PRFsDGLTVaDAcGQWhs.ssGp..lsREpSKslhllas..s.s..s.ss..cssl-AlRpsYKpcFtQpSVhhlppsVCVuF................ 0 14 27 46 +11932 PF12099 DUF3575 Protein of unknown function (DUF3575) Assefa S, Coggill PC, Bateman A anon PFAM-B_2229 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family are typically between 187 to 236 amino acids in length. 27.90 27.90 28.50 28.80 27.40 26.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.18 0.71 -5.21 39 605 2012-10-03 17:14:37 2009-07-12 12:36:53 3 4 114 0 65 504 70 197.20 28 78.01 CHANGED h+phhhlh................................................hhllhhsshh............................................tuQplAlKoNlLYDA.sss.NlGsEhslu++WolslsspYNsWp..a.ucs........+ph+tahspPEhRYWh.......scpa.sGHFlGlauphGpYNh..ush...hs............phpcpR.YpG..hhaGu.GloYGYpahLuc+WslEhslGlGYsphcYcKYsssc.Cush.hspscp.sahGPTKAulSLlY.ll ......................................................................................hhhh..........................................hpspphAlKoNlLa.u.s.t.........s.........sNlGlEhs.l....u....c....+....h...olsls..s..sas.s...Wp....h....s.c...s.........+..p..h+hahl.pPEhRYWh............................................scph..sG..aFlGlau...tsup.aNh.......................ph.p.cp+Y.p.G.......h..h...Gu..GloYG.....YphhL.....u..c....+..Ws.....lEhslG...lG....YhphcY.......c.+....Y.tsh.......s......sst...h......htpt.pp...s...a....h....G....P....TKsulSllYhh.......................................... 0 21 53 65 +11933 PF12100 DUF3576 Domain of unknown function (DUF3576) Assefa S, Coggill P, Bateman A anon PFAM-B_2102 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 100 amino acids in length. This domain has a single completely conserved residue G that may be functionally important. 25.00 25.00 25.60 42.20 24.50 16.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.37 0.72 -3.98 34 167 2009-07-13 12:12:39 2009-07-13 13:12:39 3 1 167 0 52 147 1293 101.70 45 60.38 CHANGED ssluV.NpYLWpASL-sLsFhPltosDPFuGVIlTs.YusPsusscpa+.sTVaIhDssLcAcuLpVul..QspsG......tsVsssTspslEsAILoRA....RQLRlp ....s.huVNpYLWpAoL-TLSFhPlsSsDs.GGVIlT-WYusPsssscRaK.lslaIhDssLcuDulcVslhcphpps.ttWh.ssssusTspslEDtILoRA....RpLRl.s............ 0 21 39 42 +11934 PF12101 DUF3577 Protein of unknown function (DUF3577) Assefa S, Coggill P, Bateman A anon PFAM-B_2116 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 143 to 307 amino acids in length. 25.00 25.00 27.00 26.40 24.10 17.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.51 0.71 -4.28 18 264 2009-09-11 09:40:19 2009-07-13 13:15:57 3 2 181 0 85 234 9 132.20 44 83.97 CHANGED ssssspspYFNLHssGlGYLNcIRcVp.........s++GpsFhACsIAALpGsoDsspYsaFDssVsGpEApcLIcRCpcAVcs-+KVLluFpluDLasDsFTasp.....Gc+pG..-suVSLKuRLlhlsWIKVDGchlYpApspssssss .........p..tsspppYFsLpTsGIGYLsRlR-Vs.....s+cGpsFLuCsIAALs.Gss...Ds...sp.....YphFDspVuGsEApcLlpR.C.pA........l.....-.t.....c+KV.LluFRL.uDlhsDsFhhsK.....G-+tG..-....susSLKuRLl+lshIKlsGphVYpstttptt..s........... 0 8 42 68 +11935 PF12102 DUF3578 Domain of unknown function (DUF3578) Assefa S, Coggill P, Bateman A anon PFAM-B_2328 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 177 to 191 amino acids in length. 25.00 25.00 26.40 26.40 24.40 24.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.06 0.71 -4.69 18 298 2009-07-13 12:20:20 2009-07-13 13:20:20 3 10 279 6 54 186 75 180.50 34 35.53 CHANGED pclhspYhpt+spp.Fppp......thsphlcpcl.cplpppshlspptah.VcGSsGQGsWAclPWlulhscplTsospcGhYlVYLFssDhsslYLoLsQGhTchppph....tspsplcphsphh+shlpp..ppt..hsspIpLtsph.........hspsYEpusIthKtYshs.sl..PsccpLhpDLcpllshYpphhsph ...........................................plhp.hhpt+Tpp.http...........htpll+p.cls...cp..lps......alccc..cY...Vs.GplupG.N.aupVPWIulhDcsITp.......pTpcGaYlVYL.Fp..s-h-sl.YLoLsQGhochsc.a.........psKsthc.phu..plpspl.sp.....spp.hsssphhhupspt........hspGYspusIta+hYDls..ch.............s-..-chlpDLcchLchapplhs..s............................................... 0 17 37 44 +11936 PF12103 Lipl32 Surface lipoprotein of Spirochaetales order Mistry J, Gavin OL anon pdb_2zz8 Domain Lipl32 is an outer membrane surface lipoprotein of Leptospira like bacteria. 25.00 25.00 88.20 87.90 19.70 19.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.28 0.71 -4.82 4 121 2009-07-13 14:59:56 2009-07-13 15:59:56 3 1 47 9 6 90 0 179.70 90 77.15 CHANGED shcltlPYssslsYaGYlcsuspPDuhlcG.KcAYYLYlWVPAVlsElGVRMISPsu..upPscuDhVpcuFpsshpu-.p..+aFDTaIplERhsllps-cIs.KuAKsh.hptLsssDDus-h.cp.ptupYNSLhRhcSplusPsKA............LVRGLYRIuFTTYKoGcVcGSFlAoVG....sslPGVsh .TVKThLPYGSVINYYGYVKPGQAPDGLVDGNKKAYYLYVWIPAVIAEMGVRMISPTGEIGEPGDGDLVSDAFKAATPEEKSMPHWFDTWIRVER.MSAIMPDQIA.KAAKAKPVQKLDDD.DDGDDTYKEERHNKYNSLTRIK..IPNPPKSFDDLKNIDTKKLLVRGLYRISFTTYKPGEVKGSFVASVGLLFPPGIPGVS.P... 0 1 3 5 +11937 PF12104 Tcell_CD4_Cterm T cell CD4 receptor C terminal region Mistry J, Gavin OL anon pdb_1q68 Domain This domain is the C terminal domain of the CD4 T cell receptor. The C terminal domain is the cytoplasmic domain which relays the signal for T cell activation. This process involves co-receptor internalisation. This domain is involved in binding to the N terminal of Lck co-receptor in a Zn2+ clasp structure. 20.60 20.60 20.60 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.87 0.72 -7.13 0.72 -4.36 5 89 2009-07-13 15:23:31 2009-07-13 16:23:31 3 15 45 9 23 90 0 27.50 74 6.42 CHANGED RRRAcRMSQIKRLLSEKKTCQCsHRhQK .........RRpAcRMSQIKRLLSEKKTCQCP..HRhQK.... 0 1 1 5 +11938 PF12105 SpoU_methylas_C SpoU, rRNA methylase, C-terminal Assefa S anon PFAM-B_2024 (release 23.0) Family This domain is found in bacteria. This domain is about 60 amino acids in length. This domain is found in association with Pfam:PF00588. This domain has a conserved LFE sequence motif. Some members of the Pfam family SpoU_methylase, Pfam:PF00588, carry this very distinctive sequence region at their extreme C-terminus. The exact function of this region is not known. 25.00 25.00 25.60 26.50 24.20 21.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.72 0.72 -4.41 53 711 2009-07-13 15:25:21 2009-07-13 16:25:21 3 2 708 1 104 324 122 58.50 70 25.78 CHANGED RQRpsAGhYspp..tLss-php+lLFEhuaPhlAchC+c+GhPYPtLc.-pGpIlsctsa .....RQRQNAGMYLRENSMLPEsEQQRLLFEGGYPVLAKVAKRKGLPYP+VN.QQGEI-ADA-W............ 0 14 41 76 +11939 PF12106 Colicin_C Colicin C terminal ribonuclease domain Mistry J, Gavin OL anon pdb_2dfx Domain Colicin is a protein produced by bacteria with Col plasmids. Its function is to attack E. coli through actions on its inner membrane ion channels or through ribonuclease or deoxyribonuclease actions. The C terminal domain is the ribonuclease domain. It specifically cleaves tRNA anticodons which recognise codons in the form NAY (N:any nucleotide, A:adenosine, Y:pyrimidine) which corresponds to Tyrosine, Histidine, Asparagine and Aspartic Acid. E5-CRD can be referred to as an RNA restriction enzyme that specifically recognizes and cleaves single-stranded GU sequences. [1] 25.00 25.00 37.10 29.80 22.10 15.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.21 0.72 -4.37 12 86 2009-07-13 15:32:11 2009-07-13 16:32:11 3 14 21 10 69 94 0 34.60 49 16.18 CHANGED GsVssshscpssscT.PDulsppsSusVhs.psssY .GsVlhVls-ss-scThPDSVoo.PSuoVVV.pVVVY... 0 3 69 69 +11940 PF12107 VEK-30 Plasminogen (Pg) ligand in fibrinolytic pathway Mistry J, Gavin OL anon pdb_2doh Domain Pg is an important mediator of angiostatin production in the fibrinolytic pathway. Pg is made up of five subunit kringle molecules (Pg-K1 to Pg-K5), of which the first three make the protein angiostatin. VEK-30 is a domain of the group A streptococcal protein PAM. It binds to Pg-K2 of angiostatin and activates the molecule to mediate its anti-angiogenic effects. VEK-30 binds to angiostatin via a C terminal lysine with argininyl and glutamyl side chain residues known as a 'through space isostere'. [1] 20.80 20.80 20.90 21.10 19.20 20.60 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.87 0.73 -6.24 0.73 -4.10 8 126 2009-07-13 15:37:18 2009-07-13 16:37:18 3 14 3 12 0 125 0 16.50 71 9.92 CHANGED DDsELcRLKNERH-HDc ..-sELERLKNERH-HDc 0 0 0 0 +11941 PF12108 SF3a60_bindingd Splicing factor SF3a60 binding domain Mistry J anon pdb_2dt7 Domain This domain is found in eukaryotes. This domain is about 30 amino acids in length. This domain has a single completely conserved residue Y that may be functionally important. SF3a60 makes up the SF3a complex with SF3a66 and SF3a120. This domain is the binding site of SF3a60 for SF3a120. The SF3a complex is part of the spliceosome, a protein complex involved in splicing mRNA after transcription. 19.10 19.10 19.40 19.10 19.00 18.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.79 0.72 -7.02 0.72 -4.40 20 244 2009-07-13 15:39:18 2009-07-13 16:39:18 3 8 212 1 178 228 0 27.60 47 5.54 CHANGED sulu..ss-.asEFYcRLcpIK-aH++aPN ..........slo..uss.FsEFYsRLKpIK-FHR+aPN.... 0 61 94 144 +11942 PF12109 CXCR4_N CXCR4 Chemokine receptor N terminal Mistry J, Gavin OL anon pdb_2k03 Domain CXCR4 and its ligand stromal cell-derived factor-1 (a.k.a. CXCL12) are essential for proper fetal development. CXCR4 is also the major coreceptor for T-tropic strains of human immunodeficiency virus 1 (HIV-1), and SDF-1 inhibits HIV-1 infection. Additionally, SDF-1 and CXCR4 mediate cancer cell migration and metastasis. The N terminal domain of most chemokine receptors is the ligand binding domain and so the N terminal domain of CXCR4 is the binding site for SDF-1. [1] 20.30 20.30 20.50 26.60 18.90 19.70 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.39 0.72 -4.15 4 102 2009-07-13 15:43:43 2009-07-13 16:43:43 3 2 77 15 21 91 0 32.40 82 9.34 CHANGED I.huDNhSEE.lGSGDY-shpEPCFpcENucFNR .....IYTSDNYTEE....hGSGDYDShKEPCFREENAHFNR. 0 1 2 6 +11943 PF12110 Nup96 Nuclear protein 96 Mistry J, Gavin OL anon pdb_3bg0 Domain Nup96 (often known by the name of its yeast homolog Nup145C) is part of the Nup84 heptameric complex in the nuclear pore complex. Nup96 complexes with Sec13 in the middle of the heptamer. The function of the heptamer is to coat the curvature of the nuclear pore complex between the inner and outer nuclear membranes. Nup96 is predicted to be an alpha helical solenoid. The interaction between Nup96 and Sec13 is the point of curvature in the heptameric complex. [1] [2] 25.00 25.00 29.20 28.40 23.50 22.20 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.65 0.70 -5.30 36 380 2009-07-13 15:55:46 2009-07-13 16:55:46 3 13 307 11 219 392 9 259.90 30 20.01 CHANGED s.hEphhhhLsupclpcAschAlpupsh+LAsLlo.l.usssss+p.hppQLppW+ppss..s.lspshhclYcLLuGsshhspss.t....................tlsWhpshGL+LaYupssp....slccslppatcslsp............s.....................pt..hpshhatLL+lY......sspst..........shpth...lts.shssssLch+LsWhlhplLpshshhpt.t..........................................t........DpLshsaAspL...cspGhap.AlFVlhHlssssp....Rcpsl+plLtRphptlss.t..............sp.palhpcLplPpsWIapApAlhs+sps ........................-thh.hLsspclpcAsphuhpstsh+LAhlluph..s.........s..s.......t.h+p.hptQL.tWpptph...p.............lpp..hhpla...tLLuGpsh.hptuptt.....................h.tlsWhpslul.pLWYhhssss...............olscAlptapcshps.......................t.st.PhP...Yhpt................................sstpsh.DlhapLL..pLa.................upppt................................slpph......LpP...h...sh...o................sssh..D..aRLuWh..LhpsLpulshpphs..............................................tt...s.lphsaAtpL................ps..uhap.A..lFVlh..H.lp.pttt..............................RppslpphLt+p.s.t.hhtp........................................t.phl..hppLtlPtpWlhtAtAlhtt...................................... 0 79 126 184 +11944 PF12111 PNPase_C Polyribonucleotide phosphorylase C terminal Mistry J, Gavin OL anon pdb_3gcm Domain PNPase regulates the expression of small non-coding RNAs that control expression of outer-membrane proteins. The enzyme also affects complex processes, such as the tissue-invasive virulence of Salmonella enterica and the regulation of a virulence-factor secretion system in Yersinia. In Escherichia coli, PNPase is involved in the quality control of ribosomal RNA precursors and is required for growth following cold shock. This family contains the C terminal protomer domain of the PNPase core. The function of the C terminal protomer is to catalyse phosphorolysis through its two active sites. [1] 25.00 25.00 27.20 27.20 24.70 23.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.79 0.72 -4.76 16 611 2009-07-13 16:01:04 2009-07-13 17:01:04 3 3 604 16 52 368 0 38.90 67 3.77 CHANGED Essphu-WQcspahFcGKGuAGtHSAsspspussT+sps ...EuP+HSDWQ.RPoFsFEGKGAAGGHoAT+HASAsPsRPQP.. 0 2 13 34 +11945 PF12112 DUF3579 Protein of unknown function (DUF3579) Assefa S, Coggill P, Bateman A anon PFAM-B_2027 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 98 to 126 amino acids in length. This protein has a conserved FRP sequence motif. 25.00 25.00 28.40 28.10 19.70 19.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.98 0.72 -4.08 19 284 2009-07-13 16:43:08 2009-07-13 17:43:08 3 4 249 1 88 221 35 93.20 46 62.29 CHANGED hpsp....scchhIpGlTppGKsFRPSDWAERLsGlhupF....psst........+hsYSPastPss.hsGlKCVhVctcL+-hEPhAacFlhNFA+DNcLplh-.tshh ...........h...pspchhIpGlTp.sGKpFRPSDWAERLsG..l..husF....psss...........+lpYS.aV.+Psl..lssl+CVhVDp+LcshpP.AFcFlhsFApDNsLpVh-s..h............ 0 18 51 68 +11946 PF12113 SVM_signal SVM protein signal sequence Assefa S, Coggill P, Bateman A anon PFAM-B_2829 (release 23.0) Motif This region is presumed to be a signal peptide sequence found in Sequence-variable mosaic (SVM) proteins [1]. This domain is found in phytoplasmas. This presumed signal sequence is about 30 amino acids in length. 31.70 31.70 34.10 34.10 28.70 27.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.46 0.72 -4.12 32 70 2009-07-13 16:58:34 2009-07-13 17:58:34 3 1 15 0 53 69 0 32.70 50 22.02 CHANGED MFKLKsphhll..pIhLFlhLGLhhIsNN.ppVMAM ..MFKLKNphpll..sIhLFhhLGLhhIsNN.ppVMAM.... 0 16 17 31 +11947 PF12114 Period_C Period protein 2/3C-terminal region Assefa S, Coggill PC, Bateman A anon PFAM-B_2037 (release 23.0) Family This domain is found in eukaryotes. This domain is typically between 164 to 200 amino acids in length. This domain is found associated with Pfam:PF08447. 25.00 25.00 49.90 27.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.43 0.71 -4.70 24 250 2009-07-14 11:56:36 2009-07-14 12:56:36 3 7 74 0 85 198 0 189.60 38 17.03 CHANGED ssDuhSsSS-LhshlLpEDutS............usuSusSsS..s............tpsS................sussoSpoSpYFGShD.SSEsscps+pstssp.....................ptphh+hVhpDPlWlhhssss-plMMTYQlPsRsh-pVL+ED+EKL+php+.QPpFoc-QKcELucVHsWlpptslPptlslpsClsCpspsss..............shctchpphphsch ...................................................................................ppDu.SsSS-LLplL.......Lp..E..DupS....................uT.GSusSuShssstst....................................uoG.Sts.sssost.............................ssuSpSSpoSKYFGShD..SSEssppsppststp....................tcphl+h.VLQDPlWLlhAss-ccVMMTYQlP.s.RshpsVL+ED+E+L+shQ+pQP+Fo--Q+cELupVHsWl...ppssLPptlslp.sClsCpspsps.............................s..................................... 0 10 19 41 +11948 PF12115 Salp15 Salivary protein of 15kDa inhibits CD4+ T cell activation Assefa S anon PFAM-B_2039 (release 23.0) Family This is a family of 15kDa salivary proteins from Acari Arachnids that is induced on feeding and assists the parasite to remain attached to its arthropod host. By repressing calcium fluxes triggered by TCR engagement, Salp15 inhibits CD4+ T cell activation. Salp15 shows weak similarity to Inhibin A, a member of the TGF-beta superfamily that inhibits the production of cytokines and the proliferation of T cells. 27.80 27.80 28.00 30.10 27.60 26.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.39 0.71 -3.87 44 93 2009-07-14 12:43:31 2009-07-14 13:43:31 3 1 6 0 7 99 0 128.00 23 94.54 CHANGED lphhh..........lhlhhslsh..................................sptpspsss.........................slttphsphhsssc...phtpplhphCpp.......................ptlssphls..apsCoahCt.........ssttsphphp....Ls-GhPCG....pspsC.ppGpCss..........h.sC ......................................lhhhhshsh........................................s...ttpp................................s.shphPshhsshp....shhspltptCspt...................p.htsIsshphc...hpsCphtCp.......tpspp.ssplohs.............Ls-GpPCu....sppsC..pscCstp.......ssC.............. 0 7 7 7 +11949 PF12116 SpoIIID Stage III sporulation protein D Assefa S anon PFAM-B_2045 (release 23.0) Family This stage III sporulation protein is a small DNA-binding family that is essential for gene expression of the mother-cell compartment during sporulation. The domain is found in bacteria and viruses, and is about 40 amino acids in length. It has a conserved RGG sequence motif. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.64 0.72 -4.05 19 411 2012-10-04 14:01:12 2009-07-14 14:22:57 3 3 388 1 93 317 10 77.20 61 88.65 CHANGED +-Y....IE-Rsl-lupYIlEs+ATVRpsAKsFGVSKSTVHKDlTERL.pINPpLApcV+plL-hNKAERHIRGGpAT+pKYpctc ..............+-Y..IcERslclupYI.l-s.+..tT.VRthAKtFGVSKSTVHKDlT..E..RLPcINPcLApEVKcILDh.....p.....Ku.RHlRGGcATK.KY+c..p................................ 2 48 76 82 +11950 PF12117 DUF3580 DUF_B2046; Protein of unknown function (DUF3580) Assefa S, Bateman A, Coggill P anon PFAM-B_2046 (release 23.0) Family This domain is found in viruses, and is about 120 amino acids in length. It is found in association with Pfam:PF01057. 25.00 25.00 56.30 56.20 22.50 17.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.97 0.71 -4.16 6 117 2009-07-14 14:37:12 2009-07-14 15:37:12 3 3 31 0 0 122 0 114.40 62 17.73 CHANGED t+WGKVPDWSENWAEPKlpTPINSlGSh+Ssppos+STPLSQNYALTPLsSDL.tDLALEPWSTPsTPVAsTlpspNTspuGspu.QsuQtSPTWSEIEsDLRACFopEpLcoDFp-..sLD ........HHWGpVP-WsEpWtEPKlQTPINoPss.pslpTos+ooPtspNYshTPlppDL.lsLALEPWSpPsTPshpssptpsst.sssh+....psSPTWSEIEsDlRAhFsppplpps........s.. 0 0 0 0 +11951 PF12118 SprA-related SprA-related family Assefa S, Bateman A, Coggill P anon PFAM-B_2057 (release 23.0) Family This protein is found in bacteria. Proteins in this family are typically between 234 to 465 amino acids in length. There is a conserved GEV sequence motif.Most members are annotated as being SprA-related. 27.30 27.30 27.40 27.40 26.80 27.20 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.99 0.70 -5.09 40 267 2009-07-14 15:26:22 2009-07-14 16:26:22 3 4 261 0 76 193 40 227.00 38 76.53 CHANGED sl.ssuhsshsstssss.sttshtt.t.......................................................................sststsssssspsss..........................................................................s...........ssssstpscp.......................spppsppppttss...........................pp.s..tppphp.p..plpcLppRDpEVRsHEQAHsulGGpaA.GuPoasYppGPDG++YAluGEVsIDlS.lsGDPpATIpKMppVpAAAhAPA-PSutDhpVAApAtpphspA+uELsppcpppssptppspt...........ttpsp.pshp..pppp.phsspsptst................t.lp ............................................................................................................................................................shshhsspsthsp......................................................................................................................................................................................................................................................................................................pppsppppp.p.s.pp.ppspppps.pps.....................p.hs..t.tscL.s.p.pphpQVcELpuhD+pV+AHEtAHtAAG.Gsh.A.GusSFoYp+GPDsphYAsuGEVsIchpp.ussP..EtTIspucQltAAAhAPADPSsQDh+VAAs.AtphphEARAE.ht..c...+s..pcsp..cppppsp.........ttppp.p.ps.t.hpp.p..............s.p....................................... 0 31 54 66 +11952 PF12119 DUF3581 Protein of unknown function (DUF3581) Assefa S anon PFAM-B_2081 (release 23.0) Family This protein is found in bacteria. Proteins in this family are about 240 amino acids in length. 25.00 25.00 47.40 47.10 20.70 18.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.34 0.70 -5.12 22 187 2009-07-14 15:30:18 2009-07-14 16:30:18 3 1 183 0 36 129 22 211.70 61 90.92 CHANGED MFLssYaspps..splslospQAScFAKplAsDFNPIHDsDAKRFCVPGDLLFAllLscaGLSppMpFcFsGMVucsltLpF.spssssphslsDppsKsYLcVpRuG-sopspshIEshl+sYVsFSGhNFPHlLlPLMcp+pVMINPsRPLVIYESMSh-Ls...pLDhs.pssLcLsssslcl-GKRGsVsLpFpLhsssclVGoGhKcLllSGLRsY-pcs ...MFLosYaSpps.....p.....phpFTRpQAS+FAKtVAGDFNPIHDcDuKRFCVPGDLLFAllLpK.GlSQKM+FcFoGMVuDGVsLplps.c.spcE.uVsDssGKpYL+hSRcG-Vs+ssAhIEpllpsYVpFSGhNFPHIMVPLME-pQhMINspRPLVIYESMpl-Fo...R...LDls..cP-V-FsGAoh-V-GKRGhVTLsFsh+-sGplVGcGlKRMVsSGL+PYDQt.t............... 0 6 15 27 +11953 PF12120 Arr-ms DNApol_Rpb2_rif; RNApol_Rpb2_rif; Rifampin ADP-ribosyl transferase Mistry J, Gavin OL anon pdb_2hw2 Domain This protein is found in bacteria. Proteins in this family are typically between 136 to 150 amino acids in length. The opportunistic pathogen Mycobacterium smegmatis is resistant to rifampin because of the presence of a chromosomally encoded rifampin ADP-ribosyltransferase (Arr-ms). Arr-ms is a small enzyme whose activity thus renders rifamycin antibiotics ineffective [2]. 25.00 25.00 41.70 41.30 22.50 20.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.17 0.72 -4.07 15 184 2009-11-19 18:25:00 2009-07-14 16:38:57 3 4 154 1 54 158 11 99.60 62 64.61 CHANGED PFaHGTKADLpsGDLLpsGapSNYpcs.+hhNHlYFTuTLDAAsWGAELAs....G-G+sRIYlVEPTGsFE-DPNlTDKKFPGNPTRSYRSpcPLRlVGElTcW ..PFaHGTKAcLtlGDLLssGahSNacss.+hhsHIYFTAhh-uAsWGAELAh............u-G+.sRIYIVEPTGsFEDDPNlTDKKFPGNPT+SYRopcPLRIVGElpDW.......... 0 20 40 51 +11954 PF12121 DD_K Dermaseptin Mistry J, Gavin OL anon pdb_2jx6 Domain This protein is found in eukaryotes. Proteins in this family are typically between 30 to 76 amino acids in length. This protein is found associated with Pfam:PF03032. This domain is part of a dermaseptin protein which is used as an antimicrobial agent. The full protein is almost completely defined in an alpha helical domain. It creates high levels of disorder at the level of the phospholipid head group of bacterial membranes suggesting that it partitions into the bilayer where it severely disrupts membrane packing. 20.90 20.90 20.90 22.10 19.80 20.60 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.06 0.72 -6.41 0.72 -4.01 14 62 2009-07-14 15:43:20 2009-07-14 16:43:20 3 2 15 2 0 61 0 24.50 56 52.07 CHANGED uLWssI....Kphu.pA...AGKAALsAVs .uLWpslh...KphG.cA........AGKAALGAVs....... 0 0 0 0 +11955 PF12122 DUF3582 Protein of unknown function (DUF3582) Assefa S, Bateman A anon PFAM-B_2087 (release 23.0) Family This domain is found in bacteria, and is approximately 130 amino acids in length. It is found associated with Pfam:PF01694. There is a conserved ASW sequence motif. This domain has a single completely conserved residue F that may be functionally important. 22.00 22.00 22.00 22.20 21.50 21.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.48 0.72 -4.03 32 749 2012-10-01 21:59:08 2009-07-14 16:54:12 3 2 745 15 96 340 17 98.80 56 35.72 CHANGED MlclhslsNsRhAQAFlDYhtsps.Ichplhs.....pspu.hslalh.ccpphspsptElppFlpsP...tcs+Y.tASW-sGsTps.phc.Yps..o......hlst..h...hspAGPlTLs .............MhhITSFuNPRlAQAFVDYMATQG.VlLTIQQ.................csQo...DlWLA.DESQAERVRuELARFLENP...uDPRYLAASWQuGcTsS.GL+.Y+Ra.P......FhAsL.....RcRAGPVThl..................................... 0 16 36 66 +11956 PF12123 Amidase02_C N-acetylmuramoyl-l-alanine amidase Mistry J, Gavin OL anon pdb_2ir9 Domain This domain is found in bacteria and viruses. This domain is about 50 amino acids in length. This domain is classified with the enzyme classification code EC:3.5.1.28. This domain is the C terminal of the enzyme which hydrolyses the link between N-acetylmuramoyl residues and L-amino acid residues in certain cell-wall glycopeptides. 21.40 21.40 21.70 26.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -7.91 0.72 -4.32 16 260 2009-09-10 17:34:38 2009-07-14 16:57:45 3 4 95 2 9 151 0 44.60 42 18.36 CHANGED phshpu+llhps..sGLsYh.Tcshss.spLc+hptWLDp+...GWaYEh ...pLthsuphIlps..-GLsYhlT-spss.spLcthTsWLDp+...GWaYEh. 0 0 4 4 +11957 PF12124 Nsp3_PL2pro Coronavirus polyprotein cleavage domain Mistry J, Gavin OL anon pdb_2kaf Domain This domain is found in SARS coronaviruses, and is about 70 amino acids in length. It is found associated with various other coronavirus proteins due to the polyprotein nature of most viral translation. PL2pro is a domain of the non-structural protein nsp3. The domain performs three of the cleavages required to separate the translated polyprotein into its distinct proteins. 25.00 25.00 29.60 100.60 24.50 18.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.02 0.72 -4.41 5 173 2009-07-14 16:00:46 2009-07-14 17:00:46 3 5 66 2 0 152 0 65.90 97 1.12 CHANGED SEEHFVETVSLAGSYRDWSYSGQRTELGVEFLKRGDKIVYHTlESPlEFHLDGEVLPLDKLKSLLS SEEHFVETVSLAGSYRDWSYSGQRTELGVEFLKRGDKIVYHTLESPVEFHLDGEVLSLDKLKSLLS. 0 0 0 0 +11958 PF12125 Beta-TrCP_D D domain of beta-TrCP Mistry J, Gavin OL anon pdb_2p64 Domain This domain is found in eukaryotes, and is approximately 40 amino acids in length. It is found associated with Pfam:PF00646, Pfam:PF00400. The protein that contains this domain functions as a ubiquitin ligase. Ubiquitination is required to direct proteins towards the proteasome for degradation. This protein is part of the WD40 class of F box proteins. The D domain of these F box proteins is involved in mediating the dimerisation of the protein. Dimerisation is necessary to polyubiquitinate substrates so this D domain is vital in directing substrates towards the proteasome for degradation. 25.00 25.00 26.20 26.30 22.50 21.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.00 0.72 -4.35 6 176 2009-07-14 16:02:24 2009-07-14 17:02:24 3 10 86 3 91 159 0 39.60 79 7.41 CHANGED hcYFspWSEu-QVDFVEaLLSRMCHYQHGpINuaLKPMLQ .....lKYF-QWSESDQVEFVEHLISRMCHYQHGHINSYLKPMLQ 0 20 27 54 +11959 PF12126 DUF3583 Protein of unknown function (DUF3583) Assefa S anon PFAM-B_2092 (release 23.0) Family This domain is found in eukaryotes, and is typically between 302 and 338 amino acids in length. It is found in association with Pfam:PF00097 and Pfam:PF00643. Most members are promyelocytic leukemia proteins, and this family lies towards the C-terminus. 22.70 22.70 23.50 24.40 22.20 22.60 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -12.04 0.70 -5.36 3 98 2009-07-14 16:17:51 2009-07-14 17:17:51 3 7 36 0 23 120 0 236.60 58 40.35 CHANGED LDTMTQsLQEQDRTFGAAHAQM+SAIGQLGRARADTEEhIRERVRQVVAHVpAQERELLEAVsARYQRDYEEIAGQLGRLDAVLQRIRTGSALVQRMKLYASDQEVLDMHuFLRKALCpLRQEEPQSL+AsV+TDGFDEFKVRLQDLlSCITQGpDAAVS++ASPEAASTPRD.....PlsTDsPE-l.p......sQhQA.tL......AhVpsVPGAHPVPVYAFShQsPohREEuSNTTTuQKRKCSQT-CPRKlIKMESEEGcEuRLApSSPEQPRPSTSKAVSPPHLDGPPSP+SPVlccElhLPNSNHVTSDsGETEERV..................VVISSSEDSDAENss ................LssMTQsLpEQ-tsFsusHAQM+uAluQLs+sRu-TEELIRtRVRplVsHlpAQE+ELL.EuVpsRYQRDYcEhAucLuRL-AVLQRIRsGusLVpRMKpYASDQEVLDMHuFLRpALs+LRQ.EEPQsLpAsV+TDG..F-EFKlRLQDL.SCIT..QGpsuuss+p.AsPEusssPp-.....shss-....................................................................-sosTssspKR.Ksspsp..CsRKlIKMESpEt.pEsRLApSSPEQPtPSTS+AsSPP+LDG.sss.cSss.tp-h...sssNHssu.s.s...u.Estcpl..................hllusSccsct....s................................................................. 0 3 3 7 +11960 PF12127 YdfA_immunity SigmaW regulon antibacterial Assefa S anon PFAM-B_2114 (release 23.0) Family This protein is found in bacteria. Proteins in this family are about 330 amino acids in length. The operon from which this protein is derived confers immunity for the host species to a broad range of antibacterial compounds, unlike the specific immunity proteins that are linked to and co-regulated with their antibiotic-synthesis proteins. 26.70 26.70 26.90 39.30 25.10 26.60 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.00 0.70 -5.53 22 608 2009-07-15 08:13:42 2009-07-15 09:13:42 3 2 588 0 119 350 208 312.90 69 96.50 CHANGED lhhlllsslhllhlhlhFs.FlPhs..LWIoAhuuGVclulhsLlGMRLRRVsPppIVpshIpApKAGL.....c.lssspLEAHYLAGGNVspVVcALIAAp+As.IsLsFcRAsAIDLAGRDVLEAVQhSVNPKVI-TP........slsAVAKDGIpL+sKARVTVRANIc+LVGGAGEETIIARVGEGIVooIGSussHKpVLENPDpIS+sVLsKGLDuGTAFEILSIDIADlDVGcNIGAcLQsDQAEADKclAQAKAEcRRAMAVApEQEMpAhlpEMRAKVVEAEAEVP+AhApAhRcGpl.GVMDYYphpNlpADTpMR-uIu+tu......cs .......ls.hlllslllllhL.llh.Fo.F.V..P..lG..LWISALAAGV+VuIh.TLVGMRLRRVsPp+llsPLIKApKAGL...........s.losNQLEuHYLAGGNVDRVVDALIAApRAs.I-LsFERAAAIDLA....GRDVLEAVQMSVNPKVIETP........hluuVAhsGIE.VKAKARlTVRANIsRLVGGAGEETIIARVGEGIVSTIGSScpHpcVLENPDsISKTVLsK.GLDuGTAFEILSIDIADVDIG.....KNIGAcLQT-QApADKpIAQAKAEERRAMAVApEQEMKA+VQEM+AKVVEAEuEVPlAMAEALRsGNl.GVhDYYNhKNIpADTsMRsuIuKts.ct........................... 0 66 99 112 +11961 PF12128 DUF3584 Protein of unknown function (DUF3584) Assefa S, Bateman A anon PFAM-B_2142 (release 23.0) Domain This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 943 to 1234 amino acids in length. This family contains a P-loop motif suggesting it is a nucleotide binding protein. It may be involved in replication. 28.70 28.70 28.70 28.70 28.40 28.50 hmmbuild -o /dev/null HMM SEED 1201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.17 0.70 -13.81 0.70 -7.23 10 142 2012-10-05 12:31:09 2009-07-15 13:32:20 3 7 123 0 45 146 29 971.80 22 91.34 CHANGED ILIDoa..lp.GpV.ELsLsGHTplsGTNuuGKTTLhRLlPlFYGEtPS+lVsps.tRcsFs-aYLP+-SShllYEYpR.sG.QLCpsVhspt.uDG+GVpYRFIcuGYcpshFlsupptthhtshossElt+phRpsGhssSphLs.hccYRuIIQN-Rohh...GpcppELRsLAppFSLssu-pslpHIDKlssushsKptch-slKsMlsAIhp-...Dsssssppp...lsppclpsWlpDhpthpulpplpschcclpp-aspLpSlhh+LptL+stlhucppplppcQp-ppstppchcpplcpL--phpEspcchsp-lSstcuclstpcscL-tl-cp+tsapDsDlEphptDh-plPphcs-hpplptplctlpsptpclppca-cppptlcppppcclpshcsc.tpslREp+sct...+sstccshpuLEuphppQhcttppchsccphclcpplschctphsssshosEphtslpts-pclccAcEphppupsshppLpscppph+pcRDpApctLpcusppLpch+sphp-lcpQLhPpusSLlcFL.RsE..AssWccsIGKlIssELLcRTDLsPphspus..s.s..sLaGlpLDLpsl-sP-astsEp......pLRp+Lsth-csLpoApspppptEcpLsplNspl-ptpcslscAppuhcpscpcLpRLpsppcshpc+lppAlpc++ppsppplspLcpplKtLpccppsalcphc-chpEtphppptthpsV.ushDsQlspLpptlputcsss+Aclcphccthcs-LpspslDspsltcLcpclcsLcppIppspsccpclpcYcsahpcsWhp......c+spLttplpslcsuhp-LppcLsptppsh+pctpcLppppcAs-cthsplsEslp.pl+shhcpL.AplphstssspAs....hplu.phcth--hpppccRLtsslcpalE+Fcsll....sc+SGSsLhEsWp+h.ppcsshlu-+ul...p.hch..l.hlpplhsll..ssQpppulh-hsslhusslscFYpsLsshsRRIcohS++Lo+cVssptphEulu-lsVclpo+l-EL-aWssLctFsc...sFcpWpcp..-Ghs-lPc-phssuMpRlsshLss.uuhshulssLl-lElsLpEsGpclhIRN-+QLpcsSSpGhoYLIlhphhlGFsphLRG..cusspIpWPVDELGpLctsNlc+LhshhssNpIolLSAhPssDsclLsLFp++Yhlcc ...................................................Eltlc.GpsphhGTpusGKoTl.Rhl.hFY.st..p+l.l.pp..+cuFpcaYhPhssSaIlYEh.+tsG.....hhpslht.t.spu..tlp.aRFls.tsap.p.....alpppp.....hshs.tphhpphh.t..t.slthothls.hpcYRsII..sstp............t.chh.h..tpaulhp..utpphppI.+.hh.psl.hspchch-hlKp.hlht..h.p-.......sh......ppp.........p.pchptWhp...c.p......h.....p..t..h...........hp.ph.ppltpt........hppl.th.p.plt.thttthtts.p.ttpp...hppp.pchptphpphpcp...hp...p.ppchppchsthptslsthpppLctlpppctta.tpts.Ipphhtc.h.pph.....phppch....pplppphphhspphp-lpstapt...h...hppltpph....p.......p........hpt....hptp.....sph....ppphtpt........tpttcpphpthppph.......ppphp..ttht.ph...p..ppp.ph.......p..p.htph..ch.....p...h....p...phph..pp-th..phth.......hc......ttl..pp....t.p.p....ptp....ph.ptt...h..p...p...h..p....tp.....pph.ptp....hp........pt.t...p....tl.pt....h....p.phpp........h....ptpltplpthL.sp..psoLhpaL.ppp....t.sWcps.lGKVls.c.l.Lhp..p-LpPphstt.....s....t.........shaGlp....l..s....Lpsl.....p.....h.....s.....phhtpcp.......phptp...hpthpp.th.tp....h.p.....p.....t....pp.h....cppl.tt....h.pt....p....lpp....hpp.......ph....s.tp.......t...thc.p..p.t....ch....pphhspp....pp.ppchppthptphpphtpplpphptplpphppphpphhp.t....ccphtphc.....phpt.hpph.sthcpphttlptphpp....tppphppphpthcp.hp...p-L.pupGsD.ttpltphcppltplppplphhcppcs.lhc....Ypc.hppha.........tp.phppphtth.cpthpphppphpthtpphptphpph.ppphpt.ctt.tphp..p.hp.......t.p.hh.p.....tp...h.........s....pt...t.php..........ht...phh.pphpph...h.ph...tt...hhtp...hpphht.phpt.h.......st...th.c....h.t...........t...sct...........hph........h..l.p.hhp.........sp..p.l.p.hp..h..h.pl.tt.hthhtphptcltt.stclsp...th.t.....hthlt....p...thp.h..sp.p..h.t.....h...h..lp.h.t.......th.....t.h..t.......t..thst........hh..htphhp..h.t..tt.....thtltphhclphplhEssp.c.hhhh..cpltpsuSpGhshLlhshh.lhhhphh+t.....ts.....phtlph.hDElGpLp.pNlptlhphhppppI.llsu.P.s.................................................................................................................................. 0 8 23 38 +11962 PF12129 Phtf-FEM1B_bdg Male germ-cell putative homeodomain transcription factor Assefa S anon PFAM-B_2140 (release 23.0) Domain This domain is found in bacteria and eukaryotes, and is typically between 101 and 140 amino acids in length. Phtf proteins do not display any sequence similarity to known or predicted proteins, but their conservation among species suggests an essential function. The 84 kDa Phtf1 protein is an integral membrane protein, anchored to a cell membrane by six to eight trans-membrane domains, that is associated with a domain of the endoplasmic reticulum (ER) juxtaposed to the Golgi apparatus. It is present during meiosis and spermiogenesis, and, by the end of spermiogenesis, is released from the mature spermatozoon within the residual bodies [1]. Phtf1 enhances the binding of FEM1B -feminisation homologue 1B - to cell membranes. Fem-1 was initially identified in the signaling pathway for sex determination, as well as being implicated in apoptosis, but its biochemical role is still unclear, and neither FEM1B nor PHTF1 is directly implicated in apoptosis in spermatogenesis. It is the ANK domain of FEM1B that is necessary for the interaction with the N-terminal region of Phtf1 [2]. 21.10 21.10 21.10 21.20 20.70 20.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.92 0.71 -4.88 3 198 2009-07-15 12:34:29 2009-07-15 13:34:29 3 2 71 0 83 159 0 143.60 60 22.68 CHANGED S+lcDAluWYQKKIGAYDQQIWEKSIEQ+pIKGL+NKPKKsGHlKPDLIDVDLVRGSTFAKAKPElPWTSLTRKGlVRVVFFPlFu+WWlQVTShRIFVWLLlLYhLQVIAlVLYsh.......sQp.HIVslSEVLGPIsLMLLLGTVHCQIVSTphs+PssssG ..................p.p-hlsWYQKK..IGAYDQQIWEKSlEQ.ppI.........K......Gh....+NKPKKhuHlKPDLIDVDLlR..GSsFAKAKPE....PWTSLTRKGlVRVVFFPhF.pWW.lQVTSh.hIFhaLLlLYl..hQ.VhAlVLah...........................................hs......lsloEVlGPlhLMLLlGTVHCQIVSTphs+sshss............... 1 14 21 46 +11963 PF12130 DUF3585 Protein of unknown function (DUF3585) Assefa S anon PFAM-B_2156 (release 23.0) Domain This domain is found in eukaryotes. This domain is typically between 135 and 149 amino acids in length and is found associated with Pfam:PF00307. 23.20 23.20 23.30 23.70 23.10 23.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.74 0.71 -4.33 25 600 2009-09-11 08:12:16 2009-07-15 13:54:22 3 17 92 0 329 593 0 135.50 35 15.02 CHANGED hhsspplpp-LpplEpp.csLEtc..GVtlE+pLRst.t.............sc......p--pLhh-WFpLlpcKshLlRREuELhhhtpphpLEccpspLcpELRthhuht-tpKTpt-cp+EctLlpcllplVppRstlVcsL-pcRlREpcEDcplcp ..................................................tp.lptplptlEpp.ctl-tc..ustlEctLRstht................................spp......p--plht-WFpLlpcKssLlRc-sE.............Lhhh.t.c..-..hcLE.c+.tpLppELRphh.sh..t.......-.......t.p..KTptc....cp+Ep.tLlpcllplVppRDtLVppL-p.....pRh..p.-tEEDp.ht.t............................................. 0 63 93 196 +11964 PF12131 DUF3586 Protein of unknown function (DUF3586) Assefa S anon PFAM-B_2164 (release 23.0) Family This domain is found in eukaryotes. This domain is about 80 amino acids in length and is found associated with Pfam:PF08246, and Pfam:PF00112. 25.00 25.00 27.50 27.20 24.00 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.91 0.72 -10.06 0.72 -3.93 16 127 2009-07-15 13:00:10 2009-07-15 14:00:10 3 3 28 2 21 137 0 77.40 43 21.85 CHANGED supuPoPsP.oTts......PtssslhQhhChDhtCpcGCpcsslPsspCl.hsGGGSshspCssppVhhpsY.pSscCoGs ....................s.s.usTPsPoTpopppA....Pp.sshhsQhhCTDttCppGCcpssls.TspCl.ssuGuSuhssCGspplp.psY.oospCoGs........ 0 16 21 21 +11965 PF12132 DUF3587 Protein of unknown function (DUF3587) Assefa S anon PFAM-B_2181 (release 23.0) Family This protein is found in viruses. Proteins in this family are typically between 209 and 248 amino acids in length. 25.00 25.00 40.90 40.40 23.50 23.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.48 0.71 -5.08 52 74 2009-07-15 13:11:26 2009-07-15 14:11:26 3 1 7 0 0 73 0 182.50 31 81.44 CHANGED ppstsss.s.s...........pt.hlPhchl..hhhucFlpFt-atphlpshhPptt.s....lptpLap.hST+phpssFlNGKpLpIcYNFDspRhcc-RlLIsl-oLlPlFGGlh...ss.shcpFsolscLpsFVchcVHLstCS.shpa.ASCsC..HLhsspt.t..........pthspP....sssCpps.HFHHaCupHVttWLphaLtssIlh+Esp.ph.ascphsc .....................................htt....................s..phh..h.hsphhph.ph.phht.hhsptt.s....lptpLap..hST+phpssFlNGKpLclcYNFDssRhcc-Rl.LIslcoLhPlFGGlh.......ss...shcpFsolsclpsFlctcl+LstCo.shpa..A.oCsC..+hhspptp....h.....pthhps..........sstCpht.HFHHaCspHVttWLphaLtshIhhpEsp.thhtp................... 0 0 0 0 +11966 PF12133 Sars6 Open reading frame 6 from SARS coronavirus Assefa S, Bateman A, Coggill P anon PfamB-2188 (release 23.0) Family This family is found in Coronaviruses. Proteins in this family are typically between 42 to 63 amino acids in length. 27.00 27.00 63.50 63.30 17.90 16.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.93 0.72 -4.29 6 86 2009-07-15 13:35:43 2009-07-15 14:35:43 3 1 82 0 0 15 0 57.60 96 98.29 CHANGED MFHLVDFQVTIAEILIIIMRTFRIAIWNLDVlISSIVRQLFKPLTKKNYSELDDEEPMELDY MFHLVDFQVTIAEILIIIM+TFRIAIWNLDVIISSIVRQLFKPLTKKNYSELDDEEPMELDY. 0 0 0 0 +11967 PF12134 PRP8_domainIV PRP8 domain IV core Mistry J, Gavin OL anon pdb_3enb Domain This domain is found in eukaryotes, and is about 20 amino acids in length. It is found associated with Pfam:PF10597, Pfam:PF10596, Pfam:PF10598, Pfam:PF08083, Pfam:PF08082, Pfam:PF01398, Pfam:PF08084. There is a conserved LILR sequence motif. The domain is a selenomethionine domain in a subunit of the spliceosome. The function of PRP8 domain IV is believed to be interaction with the splicosomal core. 25.00 25.00 34.00 33.00 19.10 18.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.53 0.70 -5.01 14 350 2009-07-15 15:36:11 2009-07-15 16:36:11 3 45 291 12 240 338 7 220.60 76 10.05 CHANGED EsaLNSpNYuELFuNps.paFVDDTNVYRVTlHKTFEGNlsTKPlNGslFILNP+oGQLFLKVIHTSVWAGQKRLuQLAKWKoAEEVsALlRSLPhEEQPKQlIVTRKuMLDPLEVHh.LDFPNIsI+sSELpLPFtuhhKl-KluDlVLKAsEPpMVLFNlYDDWLc..slSsaTAFSRLILlLRuL+lNtE+s+hIL+PshsllTpspHlWPoh.oDppWlcVElpL+DLILsD ........................EPaLsSQNYuELFSNQI.hWFVDDTNVYRVTIHKT......FEGNLT.T.KPINGAIFIFNPRTG.....QLFLKIIHTSVWAGQKRL.GQLAKWKTAEEVAALIRSLPVEEQPKQlIVTRKGMLDPLE...VHL.LDFPNIlI+GSELQLPFQAshKlEKhGDLILKATEPQMVLFNLYDDWLK..oISSYTAFSRLILILRALHVNs-+sKlIL+PDKTslT.psHHIWPoL.oDE-WIKVElpL+DLILsD............ 0 89 137 200 +11968 PF12135 Sialidase_penC Sialidase enzyme penultimate C terminal domain Mistry J, Gavin OL anon pdb_2vo8 Domain This domain is found in bacteria and eukaryotes, and is about 30 amino acids in length. The protein from which this domain is found is a sialidase enzyme which is used by virulent bacteria as a toxin. It is the penultimate C terminal domain. 20.40 20.40 21.20 21.80 20.00 18.50 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.65 0.72 -6.69 0.72 -4.22 23 64 2009-07-15 15:38:49 2009-07-15 16:38:49 3 6 14 1 18 69 0 24.80 37 4.57 CHANGED hsDsElass-pphppNcEIYsLspI ....sDsElYtsEphhphNpElasLspl. 0 2 6 18 +11969 PF12136 RNA_pol_Rpo13 RNA polymerase Rpo13 subunit HTH domain Mistry J, Gavin OL anon pdb_2waq Domain This domain is found in archaea, and is about 40 amino acids in length. It has a single completely conserved residue E that may be functionally important. It is found in the archaeal DNA dependent RNA polymerase. The domain is a 'helix-turn-helix' (HTH) domain in the Rpo13 subunit of the RNA polymerase. This domain is involved in downstream DNA binding, and the entire subunit has also been implicated in contacting transcription factor II B. 25.00 25.00 33.50 33.50 18.50 17.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.13 0.72 -4.26 7 23 2009-07-15 15:39:56 2009-07-15 16:39:56 3 1 23 4 11 17 0 46.10 56 46.09 CHANGED cEtth.EE-EEsEhss....lslpslEhhh+sTElW-pLlpGcl ...............t.EEpcVE.....EssE-EhPp....LSIQDIELLMKNTEIWDsLLsGKI 0 5 5 9 +11970 PF12137 RapA_C RNA polymerase recycling family C-terminal Assefa S, Bateman A, Coggill P anon PFAM-B_2201 (release 23.0) Domain This domain is found in bacteria. This domain is about 360 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF00176. The function of this domain is not known, but structurally it forms an alpha-beta fold in nature with a central beta-sheet flanked by helices and loops, the beta-sheet being mainly antiparallel and flanked by four alpha helices, among which the two longer helices exhibit a coiled-coil arrangement. 26.80 26.80 27.80 27.80 26.40 25.90 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.95 0.70 -5.62 50 1007 2009-07-15 16:36:59 2009-07-15 17:36:59 3 5 973 2 150 632 98 352.20 58 38.01 CHANGED pDIpIHVPYhcsoAQphLhcWYccGLNAFpcTCPsGpslacpatppLpshLt..ss....spssh-pLlppopptpppL+spLEpGRDRLLELNSsus-pAppLs-sItppDs.sspLssFhhplFDhlGlcQE....Dpu-sullLcP.....o-+Mhsss.aPGL..s...E-GhTlTF-RcpALuREDhpFloWEHPhlpsuhDllhou-hGsoululLcNcuLPsGTlLLEhlasl-upAP+sLQLsRFLPsTPlRlLlDppGssLusplsa-shscpLpsls+phApclVpthpsplcpllppu-phupsphptllppAppphpppLssElpRLpALpsVNPsIRp-EI-tLcppppphhshLspAplpLDAlRllVss ................+DIQIHVPYLEcTAQulLsRWYHEGLsAFEcTCPTGRslYDuhhscLIsh...LAuP......pps-uaD-LIcpsRcp+-uLKAQLEQGRDRLLElHSNG.GE+AQtLA-u......Ip.................EQDs...DTsLlsFAhsLFDIlGIsQ-....DRG.-N...hIVLTP.....SDHMLV..PD.FPGL...s..EDGhTI..TFDR-sALuREDsQFITWEHPlIRsGlDLILSGDsGooslSLLK.N..KALPVGTLLlELlYVVEAQAP.......K.........pL.........QLsRFLPPTPlRh.L.L.Dc.....NG.NNLAuQVEFEoFNRQL..su..VNRHouSKL.....VsAV.Qp-VHsllphGEsplEppAcuLI-sA+p-A-pcLouELuRL..c.AL..+.AVNPNI.R.DDElsAl-spRpplhpsLsQ.AuaRLDALRLIVVs.......................................... 0 35 65 113 +11971 PF12138 Spherulin4 Spherulation-specific family 4 Assefa S anon PFAM-B_2198 (release 23.0) Family This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 250 and 398 amino acids in length. There is a conserved NPG sequence motif and there are two completely conserved G residues that may be functionally important. Starvation will often induce spherulation - the production of spores - and this process may involve DNA-methylation. Changes in the methylation of spherulin4 are associated with the formation of spherules, but these changes are probably transient. Methylation of the gene accompanies its transcriptional activation, and spherulin4 mRNA is only detectable in late spherulating cultures and mature spherules. It is a spherulation-specific protein. 28.50 28.50 28.60 36.00 25.90 28.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.97 0.70 -4.95 44 221 2009-07-15 17:01:16 2009-07-15 18:01:16 3 11 122 0 141 236 0 229.90 25 69.40 CHANGED tplllPlYl.YP....tssss.Ws.L.................t......................lsshPsltaslllNP.ssGPGtt.....sDssYssslscL....ptss...spllGY..............................VpT....sY..usRsls-Vtp-lspY.......................................ssW.t....................................................................................................................................................................................................................................................................................................................................................................................................thslcGIFlDE..ssssht...sphsYhpplsshl+s.................................htssthVlhNPGs.hP......tssahs...h..u.DhhVsFEssassa...sshsst....t..hlsshs..ts+hst......llaul..................P.............ssspltshsp.phpptu..uhlalT........shsss....Ypshs..shWpp.........hlsslss ...........................................................h..lhlPhYh.aP...........hsss..Ws.L...........................................hss..ssh..hhlllNP.ssGP........Gtt......sDssahpslspL.................phss....scllGY..............................VpTsY............usRshsplhp-lppY.......................................tsW..................................................................................................................................................htlcGlFhDE....sssph...............stl.sahpplsshl+s.................................hts.s..t.hVlhNPGs.hs..........ssahs........h....uDhh.Vs..FEssassa........tts.sst.........t...hhsshs.....p+hstllasl.....................P.............................t.pphttlhp.thtp.ts..uhlahT......................s....sss.....atshs..shapt.ht....t.................................................................................................... 0 41 82 114 +11972 PF12139 APS-reductase_C Adenosine-5'-phosphosulfate reductase beta subunit Assefa S, Bateman A, Coggill P anon PFAM-B_2202 (release 23.0) Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 112 to 142 amino acids in length. This family is found in association with Pfam:PF00037, and has a conserved FPIRTT sequence motif. The whole beta subunit has the enzymic properties of EC:1.8.99.2. 25.00 25.00 25.50 39.80 23.00 18.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.87 0.72 -3.84 43 261 2009-09-11 09:55:26 2009-07-16 09:44:11 3 10 227 18 53 254 806 83.70 48 62.12 CHANGED lRGYADFsPhGuoshPhRuo-c..IMWTlKFRNG.slKRFKFPIRTTsEG...ohsPhtGhst...ssDL.cophLhoEst...................tthclsssc .lRGYADFsPhGuolhPhRus-s..IMWTlKFRNG.slKRFKFPIRTTPEG...oh.ssa.s.uhst...sscL.csp.lLasEst...................ht...................... 0 25 44 50 +11973 PF12140 DUF3588 Protein of unknown function (DUF3588) Assefa S, Bateman A anon PFAM-B_2205 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 866 amino acids in length, and the family is found in association with Pfam:PF02820. The exact function of this family is not known. 25.00 25.00 25.40 30.90 24.30 24.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.58 0.71 -4.02 15 369 2009-07-16 09:36:54 2009-07-16 10:36:54 3 15 82 0 178 312 0 113.30 42 17.18 CHANGED ssssVslYlNK+sssGPaL-+p+ltpLPsaaGPGssslVLppslQuslssAYpsppVhphLp..ps...GGE..slpApa-GKsaplpLPs...VsosshVh+FLcplCppLp....C-NLFSspPho ..........................................ssp.tlYlN+pstsGPaLs+p+ltpLPpphGPupsslVLpps.lpthIssAacsppVhphLp....ps.....sGp....slpAp.acGcpa...p...hslss...VposshVhcFlcplCppLp....CsNLFusp.h.................... 0 23 35 88 +11974 PF12141 DUF3589 Protein of unknown function (DUF3589) Assefa S anon PFAM-B_2240 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 541 and 717 amino acids in length. The function of this family is not known, 25.00 25.00 50.60 44.50 17.60 17.10 hmmbuild -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.94 0.70 -6.27 31 145 2009-07-16 10:10:52 2009-07-16 11:10:52 3 2 27 0 113 176 2 461.20 29 74.19 CHANGED ppCpp.lphptslplopspshs....ssLcplhspllpp.p....shap-lpshF.tclpcphccphl.cK+Wa+FuGSSVWL-pYGVHaMlSRllYS.pGh+spPhlSLhYsQlFDcNWpElps.hpLllP...................h..sctsac.hp...FPsFLPIPhaashc...ppppaYGPEDPRllLhc.....NphGh-EPllVaNua+cc.ls......................p........ahRoMFhsaPap.p................tp..as+shEL+lcsppctppEKNWTPF..ls..........pt.pshDpalaFlYpassLclL+CsL....ssus......Ccaha+......phsspsplGsLRGGTpLlslsphh...........................thspsRclWlGaARuHlcpCGCGpsMYRPNhllLs+s..................spaclualSShlsFslsl.sWshp.............csh....Cts..tsVLIPNuISpWslpp.........................................t.t..hpDhhTlolShADposcllal+GlLppIhsls................................t...htppppttttsshtlcCAlptSpca ...........................................pCpp.lphptp..lpl.op.t.h...h.p....ssLtplhpplhpp.p...........hhpclpshF....clp.p.............p............pphl.....cK+Wa+FuGSSVWLcpatVHhMlSRllYS.pGt+spPhlSLlauQlFDcsWpElp.s.hpLh.lP................................................................tphphp..hp......aPphLsIPhhhs.p.....tptpahGPEDPRllLhc.....Np.Gh-EPlllFNhhcpp.hp.......................................hRuMFhhhPap.......................................sclhcLplpsp.pht.phEKNWoPFhs................t.tpttcsplaFlYpapsLclLKCsL....ssu.................Cphhac.........t.pppspsGslRGGTpllslsphhs.........................................h.ps+plWlGFsRsHlppC.GCGpphYRPphhllscs......................p.spaplshlSs.lsFslslhsWs.t.................pshCss..tNlLIPNuIu.Wpltp........................................................................p.thpDYhslolStuDtsspllpl+GlLphlhpl...........................................pttpt...pp..hpCsh..u.pa....................................................... 0 17 53 113 +11975 PF12142 PPO1_DWL Polyphenol oxidase middle domain Assefa S anon PFAM-B_2219 (release 23.0) Domain This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length, and the family is found in association with Pfam:PF00264. Most members are annotated as being polyphenol oxidases, and many are from plants or plastids. There is a conserved DWL sequence motif which gives the family its name. 20.60 20.60 20.60 21.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.59 0.72 -4.45 39 355 2009-07-16 12:13:27 2009-07-16 13:13:27 3 8 124 8 85 375 0 52.10 52 10.25 CHANGED DhTDsDWLsupF.lFY.DE..NsphVRV+V+DsLD.sc+LGYsY.pcVs..lPWlsuKPssp ....DhTDsDWL-u.uF..hFY.DE..pucLVRV+VRDsLD.spp..LtYsY.Q-Vs..lPWLss+Pss............... 0 16 67 78 +11976 PF12143 PPO1_KFDV Protein of unknown function (DUF_B2219) Assefa S anon PFAM-B_2219 (release 23.0) Domain This domain family is found in eukaryotes, and is typically between 138 and 152 amino acids in length. and the family is found in association with Pfam:PF00264. Many members are plant or plastid polyphenol oxidases, and there is a highly conserved sequence motif: KFDV, from which the name derives. This is the C-terminal domain of these oxidases. 20.60 20.60 20.60 20.60 20.20 20.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.70 0.71 -4.26 32 337 2012-10-01 19:33:20 2009-07-16 13:34:11 3 12 111 0 86 438 1 119.10 40 22.90 CHANGED FPl.sL-cs.lp.spVpRP+.poRoc+E.K--EEEVLVI-GIEh-psh.h.lKFDValN.D-scsss..ssspsEFAGSFlslPHt.+pp.......t+p...hpTsh+LGlo-LLEDLsA-sD-.ollVTLVPRsGs..s.l.pIsGlcI-h ......................FPh.sL-ps.lp..ssVpR.Pp.tsR....occE....K--E..EEVLV.I-GIEhccc..h..VKFDValN..s-sstss..sssssEaAGSFsplPHt..ppt..........ttp......hpoph+huls-LL.-Dlss-s...Dc..ollVolVP+hGs..t.l.slsulpIth................................. 0 9 70 79 +11977 PF12144 Med12-PQL Eukaryotic Mediator 12 catenin-binding domain Assefa S anon PFAM-B_2250 (release 23.0) Domain This domain is found in eukaryotes, and is typically between 325 and 354 amino acids in length. Both development and carcinogenesis are driven by signal transduction within the canonical Wnt/beta-catenin pathway through both programmed and unprogrammed changes in gene transcription. Beta-catenin physically and functionally targets this PQL (proline-, glutamine-, leucine-rich) region of the Med12 subunit of Mediator to activate transcription. The beta-catenin transactivation domain binds directly to isolated Med12 and intact Mediator both in vitro and in vivo, and Mediator is recruited to Wnt-responsive genes in a beta-catenin-dependent manner. 25.30 25.30 25.90 25.90 19.70 17.40 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.72 0.71 -4.94 4 106 2009-07-16 15:03:35 2009-07-16 16:03:35 3 6 38 0 42 87 0 181.20 53 11.28 CHANGED PP-LhQsts.G...RlsYtQpuMNMYTQNQPLPPGGPGLEPPYRPsRsP.MNK.MPsRPNYTuhMssMQ.usMsslMGL-K...QYsMsaKPQPsMPQGQhLRpQLQsp..sQuhIGQ.QlRQMTPNpsYouMQsS....QGYToaGSHMGMQQHsSQsGGMsPsSYGsQsFQuTHPAsNPslVDPpRQLQQRPSGYVHQQAP.uYsHshQsTQRFs.HQs ............................hhpp..........phsY....t..t.p.GhasQNQPLPs..GGPRlD...s..aRP.sRhs..hpK.hssRP..sYsuhh.P.............ssM...su.......lhul-s...p..pp..h.hp.ppQPshsQGQp......L.R.pQ......LQs+..............pQ.......GhhG.Q...p.......l....+Q.hsPs...s...p.Y.GLQsu.......Q....GYTsYsSHhuLQQHs..u...uus..hV..sPSYsupsY.usH..oNPsLhDshRplQQR.PSGYVHQQAP...sYsHsLsuoQ.Rhs.HQ.... 0 2 5 16 +11978 PF12145 Med12-LCEWAV Eukaryotic Mediator 12 subunit domain Assefa S anon PFAM-B_2250 (release 23.0) Domain This domain is found in eukaryotes, and is typically between 325 and 354 amino acids in length. The function of this particular region of the Mediator subunit Med12 is not known, but there is a conserved sequence motif: LCEWAV, from which the name derives. 20.30 20.30 20.30 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.94 0.70 -6.01 7 175 2009-07-16 15:05:44 2009-07-16 16:05:44 3 10 89 0 108 157 0 397.50 46 22.82 CHANGED s-FVQSthLuRRLAYhso++Lu........Lls...............................usppspsIpspsssshsssssspsssppsPhphsFo-ahpCspHRslVhtLSsILQsITLpCPoALVW.............shutscopsph.GSPLD+LPlAPSsLPMPst..ssthNppVRtpLhpsEppIK.RuptsEsRWshDKhQpu......AG.shs+VLpTL-sLDsHsF-RhDpsNSlDoLYs+IFsstpscstcpsp.........................................ppD-ulVplLCEWAVSppR.GcHRAMVVApLL-KRQs-l.sphss-S.....ps.D-K-SlsSGuhsssuhPVFQcVLhpFLDspAPsLs-..soppp+spFsNLVhLFsELIRHDVFSHsAYMCTLISRGDLhs...................Tushop.sususpsssstthc-pshtss.chc..ths-S...D..sp.................hhpchpEctpsS.-tssssp..cstpp-.shphshstshs.sRHh.YspHFPlsQ--s..upH..-CNQRhllL ..........................................................t.EFVQSthLSRRLAYhCsR+Lu.......h.Ls...............................................................................................................sspssch.h.s.........s.ss..shsssss.....stssss.sP.phsFsD.hh....CspH.+.s.lVauLSChLQslhLsCPSALVW...................pYuhscs+............ts.GSPLDhLP.lAPSsLPMPtG..sosh...sQQVRs+lhElEpQIKpRGpAVEsRWS...FDKCQEuT........A.GhTIuRVL+.T.LEVLDpHsF-..R..sD....oNS..l-oLhp+IFh.s.s.K.....DspEhs.........................................ssD.-AV.VsLLCEWAVSsKRsGcHRAMsVAKLLE.KRQuElEAE+.sG.-.S..................EshDEKtSl.uSuSluusoh.PlFQsVLlpFLDT...QAP.Lo-...spo.-..p...E+sEFhNLVLLFsELIRHDVFSHshYhCTLISRGDLuh.....................................................sus.spP.oP....sc..ss--...............K....-.t.cht.........t...t...t..........hs..p....h....h-..sp..............................................h..scs....pspsS.pp..s..pp...c.s..c-p........h.p.hh....s....hs.sRH..lpYsoH.FPI...P...--s.....sSH...ECNQRhllL........................................................................................................ 0 35 41 73 +11979 PF12146 Hydrolase_4 Putative lysophospholipase Assefa S anon PFAM-B_2243 (release 23.0) Family This domain is found in bacteria and eukaryotes and is approximately 110 amino acids in length. It is found in association with Pfam:PF00561. Many members are annotated as being lysophospholipases, and others as alpha-beta hydrolase fold-containing proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.62 0.72 -4.13 137 1506 2012-10-03 11:45:05 2009-07-16 16:11:07 3 13 1223 0 398 8635 1896 80.60 30 22.07 CHANGED GhpLh.hpp.Whs...s..ss......+uslll.h.H.Gh.s.EHus..+..Ypc.luppLst.pGaslauhDpRGHG.pS.....s......G.....p..Ru...h..hssas..ph..l.c..D.lpph...lp .........................................htLahpp.W.s...........ps........s+u.l.lhl.h.H...G.....h....t.....E..H..uu.....R..........Y...p......c.....l....s..c....pL...s..p.....s..G......a..s..V..a..u..aDtRG...HG.+S......s........................G...........+G......h......h..s..s.h.s......ph....V.p.Dltphh.t........................................ 0 130 239 317 +11980 PF12147 Methyltransf_20 Hydrolase_5; Putative methyltransferase Assefa S, Bateman A, Coggill P anon PFAM-B_2243 (release 23.0) Family This domain is found in bacteria and eukaryotes and is approximately 110 amino acids in length. It is found in association with Pfam:PF00561. The family shows homology to methyltransferases. 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.83 0.70 -5.55 16 478 2012-10-10 17:06:42 2009-07-16 16:16:36 3 6 461 0 55 393 49 296.00 64 53.99 CHANGED s+hRcFI.ppFspssppsuLhcAD+tGsop-Eu-tLuuPhst.S.psLaWphsRtul+hGGhhScGl+LGhcTGFDSGSTLDYVYRNpspGpG...slG+hID+sYL-AIGWRGIRpRKhHlpELlptAltRL+upGpPVRllDIAAGHGRYVL-ALsststt.....s-uIlLRDYSslNVppGpsLItp+GLssIA+FEpGDAFDtsoLAulpPtPoLAlVSGLYELFsDN-hVtpSLuGLApAlpPGGYLlYTGQPWHPQLEhIARALTSHRsGpAWVMRRRSQAEMDpLVctAGFcKlsQRIDpaGIFTVSlApR ...............................................-KMpsFIsRLYAsps......p+FDYpHEDRsGPSADcaRhLuusPsPhSPhDLsYRhhRtuMKL.hGsaSsGl+LGhsTGFDSGSSLDYVYpNQPQGSs....uFGRLIDKhYLN.SlGWR.GIRQRKs.HLQhLI+Q.AVAcL..+u..cG...hsVRVVDIAAGHGRY...VLDALsscPu.......ls.-I..LLRDYSELNVsQGQtMIAQRGhSups.RFEQGD....A....Fs...sE....-L.uAL......o.P.RPT.L..A.IVSG.LYELFP-.N-.t.V.KNSLAGLAsAIEPG.GlL.IYT..GQPWHPQLEhIAtVLTSHK.D.GKP.WVMRVRSQuEMDoLVccAGF-..KCTQRI..DEWGI..FTVShAVR.............................................................. 1 7 25 38 +11981 PF12148 DUF3590 Protein of unknown function (DUF3590) Mistry J, Gavin OL anon pdb_3db3 Domain This domain is found in eukaryotes, and is typically between 83 and 97 amino acids in length. It is found in association with Pfam:PF00097, Pfam:PF02182, Pfam:PF00628, Pfam:PF00240. There are two conserved sequence motifs: RAR and NYN. The domain is part of the protein NIRF which has zinc finger and ubiquitinating domains. The function of this domain is likely to be mainly structural, however this has not been confirmed. 20.90 20.90 21.60 20.90 20.20 17.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.05 0.72 -4.37 8 130 2009-09-11 05:19:17 2009-07-16 16:52:31 3 11 53 7 70 130 0 88.80 47 12.11 CHANGED hlDARDsshGAWFEAplVsVo+cssscs............-sllYHVKYDDYPEsGVlplssKslRPRARTlLpWc-LcVGpVVMVNYNsDpPKERG ...............................VDs+DpphGAWFEAplspls..pcs.sst-........................................EDlIYHlKYDDYP.EsGl.lphss+..DlRsRARTll+Wp-LpVGpVVMlNYNs-sPcERG....... 0 14 18 36 +11982 PF12149 HSV_VP16_C Herpes simplex virus virion protein 16 C terminal Mistry J, Gavin OL anon pdb_2k2u Domain This domain is found in viruses, and is about 30 amino acids in length. It is found in association with Pfam:PF02232. This domain is the C terminal of the HSV virion protein 16. This protein is a transcription promoter. The C terminal domain is the carboxyl subdomain of the acidic transcriptional activation domain. The protein binds to DNA binding proteins to carry out its function. Such proteins include TATA binding protein, CBP, TBP-binding protein, etc. 20.10 20.10 20.60 61.80 17.70 18.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.05 0.72 -7.38 0.72 -4.27 2 19 2009-07-16 15:54:40 2009-07-16 16:54:40 3 1 10 3 0 46 0 30.10 85 6.14 CHANGED DsssYuuLDhADFEFEQMFTDALGID-FGG .DuAPYGALDMADFEFEQMFTDALGIDEaGG 0 0 0 0 +11983 PF12150 MFP2b Cytosolic motility protein Mistry J, Gavin OL anon pdb_2bjq Domain This domain family is found in eukaryotes, and is approximately 50 amino acids in length. These proteins are found in nematodes. They complex with MSP (major sperm protein) to allow motility. Their action is quite similar to the action of bacterial actin molecules. 18.80 18.80 22.30 21.50 18.50 16.00 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.36 0.70 -5.75 3 68 2009-07-16 15:56:21 2009-07-16 16:56:21 3 4 10 3 60 63 0 286.80 43 80.89 CHANGED NPPAKEDTWAFQPIGSPFP-uPVKshGQQNMYVALWYKHGKPIHGRAWNNGGVVECSFPYNKAELTGKpDLGGQIQVLQYKGDHNSLGYWYEWIKYKDRaEKhDct+ElVRCGDSFPILWpcRsGGsLLGYVDNKTElAhFSasGKV-KhpGuALANMhIIsRNLhGGPPaC.Ct.CtusPPsP..llRVpcDEWhDlRpGDPWPTptssV+ALGRoLDTLPGsNPDQYVALWYppGEPVMGRlW.N-GGKlAAsFGWsNpEYs.pcVGSIQLLhhLP-SlRGFDYcWhPFPEAAQFGsKtWpPVHVspHKGNISPuVLplscGKEILGKlDlRNEKAohGYGGKEKlhTGsA.VcsshVLCRKAKA ................................EDpWAatsIGuPFPssPV+s.sppNhYVALWYKHGK.PIHGRu.W..NN.sGVVE.CSFsYpcs..ELosttpLtGQIQlLpY...h.....Gsaps.GFWY-WlpaKc..R.hp.p..-tttph.l.RCGsShPl.hh.p+.ttslLGalD.pTE.AhFu..sGps.p..Gst.hsshhllsRNhhst.st.s.p..t.t......sPPs.s.................+lh..hD-WhDhR.GDsaPstt....V.Ahs+sLsT.sGsp..QYVALWYh.pG.c.P.VhGRsW.sps.GKltAsFuh.spEas.tplGSlQlLhplsspshGa-YsWhsa...............s-uspast..K.ahP.VHl....G.hussll.....tG+plLuplDh+pEphs.huatGKEp.h.Gss.stshhVLCR+th................................. 0 22 28 60 +11984 PF12151 MVL Mannan-binding protein Mistry J, Gavin OL anon pdb_1zhq Domain This domain family is found in bacteria, and is approximately 40 amino acids in length, There is a single completely conserved residue G that may be functionally important. The domain occurs in two types of proteins. In mannan binding proteins [1], it forms a homodimeric molecule which complexes into a homo-octamer. In thiamidases it occurs without repeats but in the presence of other domains. MVL is distinct amongst other oligomannoside binding proteins in that it exhibits specificity for certain tetrasaccharides. Each molecule of MVL has four distinct carbohydrate binding sites. 25.00 25.00 28.20 27.20 21.50 19.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.29 0.72 -7.89 0.72 -4.29 16 75 2009-07-16 15:56:42 2009-07-16 16:56:42 3 5 57 32 26 83 0 38.30 38 13.12 CHANGED sspApussPshCushGup..WsGQWssTsssuh.....oVsshp .......tApAssPohCushGGp..WsGQassssssu.t...puspshp.... 0 5 13 21 +11985 PF12152 eIF_4G1 Eukaryotic translation initiation factor 4G1 Mistry J, Gavin OL anon pdb_1rf8 Domain This domain is found in eukaryotes, and is about 80 amino acids in length. It is found in association with Pfam:PF02854. This domain is part of the protein eIF_4G. It binds to eIF_4E by wrapping around its N terminal to form the eIF_4F complex. This complex binds various eIF_4E-BPs (binding proteins) to regulate initiation of translation. 18.80 18.80 19.40 19.70 18.10 17.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.59 0.72 -4.08 20 155 2009-07-16 15:57:30 2009-07-16 16:57:30 3 6 127 1 110 160 0 75.70 36 6.16 CHANGED stpstsspsss.p.ohSphLppLcpApsI...-DlhshpYPpslpuPDschppss....hKYcYsPsFLLQF+cllphpsDsp ...................t..t..sspssp.sspsSthhpsLpoA+hl...-.DltphsYPt..uIpuPsstlNtss....tKa+YstpFLLQFpsVhp-+Ps..h.... 3 33 64 96 +11986 PF12153 CAP18_C LPS binding domain of CAP18 (C terminal) Mistry J, Gavin OL anon pdb_1lyp Domain This domain family is found in eukaryotes, and is approximately 30 amino acids in length, and the family is found in association with Pfam:PF00666. CAP18 is a protein which is derived from rabbit granulocytes. It has two domains, an N terminal DUF and a C terminal Gram negative LPS binding domain. This domain is the C terminal domain. 20.50 20.50 20.70 22.80 20.40 19.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.10 0.72 -4.30 11 48 2009-07-16 15:57:40 2009-07-16 16:57:40 3 1 33 6 9 49 0 28.10 49 16.48 CHANGED uchLpKutEKIuctlK+IGQKIKDF..Ftp .GshhRKupEKIGcclK+IGQ+IKDF..htp.. 0 1 1 1 +11987 PF12154 HCMVantigenic_N Glycoprotein B N-terminal antigenic domain of HCMV Assefa S, Bateman A, Coggill P anon PFAM-B_2260 (release 23.0) Family This domain is found in viruses, and is approximately 40 amino acids in length. The domain is found in association with Pfam:PF00606. There are two conserved sequence motifs: SVS and TSS. This family is the amino-terminal antigenic domain of glycoprotein B of human cytomegalovirus. 25.00 25.00 25.40 35.10 17.70 16.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.85 0.72 -4.05 3 111 2009-11-24 12:46:55 2009-07-17 10:42:19 3 2 5 1 0 89 0 36.30 76 11.90 CHANGED HHSSHTosAA+SQSGSVS.QHVTSSQTVSHcsNETIY .HpuSHTopsspuQotSVSSQ+VTSSEAVSHRANETIY. 0 0 0 0 +11988 PF12155 NADHdh-2_N NADH dehydrogenase subunit 2 N-terminal Assefa S, Coggill PC, Bateman A anon PFAM-B_2270 (release 23.0) Family This domain is found in eukaryotes, and is approximately 90 amino acids in length. It is found associated with Pfam:PF00361. All members are annotated as being NADH dehydrogenase subunit 2, and this region is the N-terminus. 25.00 25.00 26.10 26.10 18.40 18.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.08 0.72 -3.81 2 221 2009-07-17 10:13:53 2009-07-17 11:13:53 3 1 205 0 0 181 0 85.30 93 47.06 CHANGED MELTLGLIILIVLhYGLKAPTLRLAhlhhGhl....LLhtspLLshTQAIKMLVhLSGLAlLCMLDHphSaRSSSLLILLVILGNLLL MELTLGLIILIVLTYGLKAPTLRLAMLLAGA......V.....G.AAGLL.AEPHLLCWTQAIKMLVMLSGLAILCMLDHRTSHRSSSLLILLVILGNLLL 0 0 0 0 +11989 PF12156 ATPase-cat_bd Putative metal-binding domain of cation transport ATPase Assefa S anon PFAM-B_2287 (release 23.0) Family This domain is found in bacteria, and is approximately 90 amino acids in length. It is found associated with Pfam:PF00403, Pfam:PF00122, Pfam:PF00702. The cysteine-rich nature and composition suggest this might be a cation-binding domain; most members are annotated as being cation transport ATPases. 27.00 27.00 27.30 27.00 26.80 26.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.55 0.72 -3.45 88 725 2012-10-03 05:12:49 2009-07-17 13:04:01 3 7 717 0 192 664 62 83.30 30 10.60 CHANGED tsCaHCGtsl.s..tstphpsplpGpp..........+thCCsGCpuVspsIhcsGLssYYchRspsutpspt......tp.hp...phphaDps..-lpppFVppp..ssts .......pCaHCs.sh......tp..t.hphchtGpt..........+.hCCsGCpuVhphlh-u.GLssYYctcsssuhp..stt....h.pp..hp....phthhD....pl.tpalp.t.t..tt......................... 0 60 126 165 +11990 PF12157 DUF3591 Protein of unknown function (DUF3591) Assefa S, Bateman A, Coggill P anon PFAM-B_2298 (release 23.0) Family This domain is found in eukaryotes and is typically between 445 to 462 amino acids in length. Most members are annotated as being transcription initiation factor TFIID subunit 1, and this region is the conserved central portion of these proteins. 25.00 25.00 37.20 25.90 16.40 24.30 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.52 0.70 -6.09 20 386 2009-07-17 12:30:32 2009-07-17 13:30:32 3 36 271 0 294 397 1 387.00 32 30.51 CHANGED FNI...SNDctY-hLKcsapsKVRuTluslslEHShPAh+LQhPaYKlcLs+p-hRuFHRPshshp.......Pssphs..Fs+lKhpK+K+c+......uKslpElFp..sopDLohsD.susllLhEYSEEhPhhLSNFGMuoKlINYYR+pstcDssRPKhc..lGETplLtspD+SPFh.FGpV-PGchVPTLaNNMhRAPlFKHcscsTDFLllRSos...sssuscaYLRsIsplFsVGQpaPs.sEVPGPHSR+VTshsKNRL+MllYRllp....+stp++.....lplcsls+HF.PDps-hQNRQ+LK....EFhcY.pRcu..t..pGaWclKps-slPsE.cslRsh.....loPE-lChl-SMpsGtppL.cDuGYspssch............tc-tct............thslEppLAPWssT+NFlsAopGKAMLplHGtGDPTGpGEGFSFl+T.SMKGGFh.psscsspspstpshstpch.p.puuH.YNVupQQ+hY...--EIc+lWptQppSL ....................................................................aNl...SNDphY..h..pp........t...l....+...s...shu.s....lpHShPAlcLp.Paa.s.chs..chRpFHRP.hphh....................st.t.h......h.thhc+.Kt.p.........................................upsstphFh....pspD.LohpD..usllLhEYsEEtP.lhsphGMus+lhNY.........Y...........+...............+c.s.s..cD.............s........s.....h..s.chc...............h..GEs.hh..ht..........s...SPF..hG...p...lcPGph..l.......slpNshaR.A...........Pla................H................c...............p.....TD..FLllRopp......................................taa...lRpls.....pla.slGQph......P...hEVPuP.pS++ssshh+shL..ph.hhYRlhh...........ps..p.pc..............lphp-lpct..F...Ps..p...s..-.p.Rp+LK................hhpa.....p+..s.s......t..........pshWhl+.s..s....plPsE.-plRsh...lsPEp..........sChh.SM.suppcL.cDsGasppt..h............................................pp...ttpp......................................ths.E.phAPWssT+sFlt.....Ah...p...u..........K....s...h...LplpG.uDPT.G.p..G..pG..FSal+h.s.Kss........t...t..tp.....p......................t.............t........l...ss..pchc.tthp..sstp..pha...--pIp+l.phph.s...................................................................................... 0 118 175 242 +11991 PF12158 DUF3592 Protein of unknown function (DUF3592) Assefa S, Coggill P, Bateman A anon PFAM-B_2016 (release 23.0) Family This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 150 and 242 amino acids in length. 29.00 29.00 29.00 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -11.43 0.71 -4.41 88 745 2009-07-17 13:18:40 2009-07-17 14:18:40 3 7 584 0 211 607 46 147.30 21 83.87 CHANGED hhhhhhh......shhhhshuhhhhhhshhhh.......................................hth.psh................................................................................hpspGplhshp.....................t.tptppsstapsh.lpaphs......sG..........p.......php....atps.......ssp.st...........ht.....hGp.pVsVh.........YsPssPpp..................uhlp..............................................................................................h...........thashshlhhh.........................................................................................................................................................................................................................................................................Gh........hhh.....hsGlhhhht .....................................................hh.......hhhhh..sl.G...lh..l.llhu.shhhhtp.........................................hphscss................................................................................hcspG.hl.l.css................pp.pp.ps.s.....p.ss.sah..P....s..ltapss......ss.............p.....plt...........asss...........hsushhp.........................................s......tGc.pVs.VaYsPscPpp..................Ahls.....................................................................................................................psh....h.............htashhslhhh..............................................................................................................................................................................................................................................................................................................Gh..........lhh...hhGh.....hh.............................................................................................................................................................................................................................................................................................................. 0 73 134 179 +11992 PF12159 DUF3593 Protein of unknown function (DUF3593) Assefa S, Coggill P, Bateman A anon PFAM-B_2028 (release 23.0) Family This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 98 and 228 amino acids in length. There is a conserved LHG sequence motif. 25.00 25.00 43.70 43.70 18.80 18.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.10 0.72 -4.20 34 137 2009-07-17 13:37:51 2009-07-17 14:37:51 3 4 116 0 75 140 115 91.60 48 58.03 CHANGED hs.ssLFuLSLlPYLsFLaaL....p+spp.hPpLslhGFthTLlFVulTIssulhApphaG.spLusVDhLHGuAEuFLTLSNhllllGh..hpthpptp ......h..ssLFAlSLhPYLuFLaaL.....s+opp..hPpLslhGFahlLlFVusTIsAuIhAphtYG.psLusVDaLHGuAEuhLTLoNlllVLGhppultp.h..................... 0 24 48 68 +11993 PF12160 Fibrinogen_aC Fibrinogen alpha C domain Mistry J, Gavin OL anon pdb_2jor Domain This domain family is found in eukaryotes, and is approximately 70 amino acids in length, and the family is found in association with Pfam:PF08702. This domain is the C terminal domain of fibrinogen in mammals. The domain lies in the C terminal half of the alpha C region in these proteins. The function of the domain is that of intramolecular and intermolecular interactions to form fibrin. 20.50 20.50 22.10 20.90 20.20 20.00 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.46 0.72 -4.51 12 67 2009-07-17 15:11:56 2009-07-17 16:11:56 3 4 38 2 24 74 0 68.70 47 10.34 CHANGED usEKV..TSu...uToTT++SCSKTlTKTVl.GPDG++EslKEVlsS-DGSDCu..cs.cls.....hsHsF...pGuLD-hp ...........upEKVTSu...uToTTR+SCSKTlTKTVl.GPDG++EVsKEVlsS-DGSDCs..-sh....-Ls.........hhcsF...tGshDth...................... 0 1 2 8 +11994 PF12161 HsdM_N HsdM N-terminal domain Bateman A, Assefa S, Coggill P anon PFAM-B_2036 (release 23.0) Domain This domain is found at the N-terminus of the methylase subunit of Type I DNA methyltransferases. This domain family is found in bacteria and archaea, and is typically between 123 and 138 amino acids in length. The family is found in association with Pfam:PF02384. Mutations in this region of EcoKI methyltransferase Swiss:P08957 abolish the normally strong preference of this system for methylating hemimethylated substrate [1]. The structure of this domain has been shown to be all alpha-helical. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.20 0.71 -3.74 142 4604 2009-07-17 15:36:28 2009-07-17 16:36:28 3 25 2890 12 944 3684 555 134.60 20 25.85 CHANGED lpstlWshA.-hLRss..h...ssscY...tchlLsllFl+hls-hhptp....ppph................................................hh...t............hh.......ss............phpa...sp............lh.p.....................ss......ltptlpp.............hhptlppp........................lpslh..p...ph.t.t...........psphLtpllpth ...............................................ppplWphs.-hLRus.....h.sssca...pphllsllFlKhls-phppt...........hpch..................................................................................................ht.th.t.........................ah..l......st...................phpa.......ps.....................htpp......................................ttps......lhphlsp................hhptlcpt....s.............................s.........hculh..c...ph.p.....t.t..........tps..phLtpllph............................................................................................................................................. 0 338 667 826 +11995 PF12162 STAT1_TAZ2bind STAT1 TAZ2 binding domain Mistry J, Gavin OL anon pdb_2ka6 Domain This domain family is found in eukaryotes, and is approximately 20 amino acids in length, and the family is found in association with Pfam:PF02865, Pfam:PF00017, Pfam:PF01017, Pfam:PF02864. This domain is the C terminal domain of STAT1. This domain binds selectively to the TAZ2 domain of CRB (CREB-binding protein). In this process it becomes a transcriptional activator and can initiate transcription of certain genes. 21.60 21.60 24.80 28.00 19.40 19.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.35 0.72 -6.82 0.72 -4.48 8 87 2009-07-17 15:38:57 2009-07-17 16:38:57 3 8 48 1 30 65 0 24.20 65 3.35 CHANGED SRLQ..-NhhPMSPD-acELcRhVu ..SRLQoTDNLLPMSPEEFDElSRhVG........ 0 5 7 12 +11996 PF12163 HobA DNA replication regulator Mistry J, Gavin OL anon pdb_2uvp Domain This family of proteins is found exclusively in epsilon-proteobacteria. Proteins in this family are approximately 180 amino acids in length. The structure of HobA is a modified Rossmann fold consisting of a five-stranded parallel beta-sheet (beta1-5) flanked on one side by alpha-2, alpha-3 and alpha-6 helices and alpha-4 and alpha-5 on the other. The alpha-1 helix is extended away from and has minimal interaction with the globular part of the protein. Four monomers interact to form a tetrameric molecule. Four calcium atoms bind to the tetramer and these binding sites may have functional relevance. The function of HobA is to regulate DNA replication and its does this by binding to DNA-A, but the exact mechanism of how this regulation occurs is purely speculative 20.20 20.20 20.80 79.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.12 0.71 -4.99 15 195 2009-07-17 15:52:56 2009-07-17 16:52:56 3 1 194 6 22 95 3 176.60 56 99.59 CHANGED Mp-FlpWoLEsIRc-..puhhu....WhEE+RhEWsPLlAspLcaLL-.GtohIVlTD-cRcWFpsYhlspINpsp.psRP.hLPhhsL+ulas.hcslpss..EsIsLlpDMLsIuFsNsYhaaYIGK.usct+upIAKs+-sSahWlFDEphQNSFhLsSpD-tLDhKLLpLa+LF-cSLsAsLFu+VsL ..................MsDFLoaTLEsIRss..Gs.FMA.....WLEpRRlEWAPLhAuRL+aLLE.G+TFlLhsDEpRuWaEEYhLpNIN.o+.ssRP.hLPFVS.LsSLhc..++.lpsp...E..DIALLsDhL-ISFPNGalaFYIGp.uoDs+ShIAKS+DDShLWLFDEQ.LQDSFYLsS+Dc.cLDlKLIoLY+LFDsSLDAlLFuKVpL.... 1 5 18 22 +11997 PF12164 SporV_AA SporV_proteinAA; Stage V sporulation protein AA Mistry J, Gavin OL anon pdb_3g74 Domain This domain family is found in bacteria - primarily Firmicutes, and is approximately 90 amino acids in length. There is a single completely conserved residue G that may be functionally important. Most annotation associated with this domain suggests that it is involved in the fifth stage of sporulation, however there is little publication to back this up. 25.00 25.00 28.10 27.70 24.10 23.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.96 0.72 -4.00 17 224 2009-07-17 15:54:56 2009-07-17 16:54:56 3 1 222 8 32 178 4 92.40 35 44.56 CHANGED MppplYl+h++clpls.ppplpLtDlApltusp.slhp+Lpshslh+l...opcc+phhVlslhclIptI+pp.hPsl-lQslGtspsIVElphpK+ps ......ppslYl+hRpRlcVp.shcVpLGDlAQltuss.slhcpLpsh....hlY+l................sscD+s..+lVI.DlhKlIchIp....pp.hsplplp..hlGsupTlVElhhcK+ps................. 0 9 22 24 +11998 PF12165 DUF3594 Domain of unknown function (DUF3594) Assefa S, Coggill P, Bateman A anon PFAM-B_2040 (release 23.0) Family This presumed domain is functionally uncharacterised.This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00628. 21.00 21.00 22.10 21.20 20.60 20.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.94 0.71 -4.54 8 251 2009-07-17 16:40:26 2009-07-17 17:40:26 3 8 37 0 129 263 0 126.50 65 52.68 CHANGED RTVE-lF+DFKuRRuGIl+ALTsDVE-FYpQCDPEKENLCLYGaPNEsWEVNLPAEEVPPELPEPALGINFARDGMpEKDWLSLVAVHSDuWLLSVAFYFGARFuFs+sDRKRLFsMINDLPTVFEVVTG.AcKQsK .........................RTVE-lF+DF+uRRuGll+ALTsDVEcFYp...CDP.E.K.ENLCLYGhPsE.sWEVsLPAEEV.PPELPEPA.LGINFARDGMpcKDWLSLVAVHSDuWLLuVAFYFGAR........hs+s-R.....KRLFsMIN-LPTlFEVVTGt.pt.......................... 0 20 83 110 +11999 PF12166 DUF3595 Protein of unknown function (DUF3595) Assefa S, Coggill P, Bateman A anon PFAM-B_2166 (release 23.0) Family This family of proteins is functionally uncharacterised.This family of proteins is found in eukaryotes. Proteins in this family are typically between 578 and 2525 amino acids in length. 26.90 26.90 27.10 28.80 24.60 26.80 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.30 0.70 -5.66 24 389 2009-07-17 16:45:50 2009-07-17 17:45:50 3 9 130 0 250 313 3 308.60 33 22.84 CHANGED ptlPFLaELRslhDWshocToLslhpWhplEDIaspla.hpsthps.cph.t.pGpphshhsKhhhGshhlhhllhllahPLhlaSshsss.spsN.lhssshplslssh........shaph........ssphphhsssthtpl.....tt.htppptthp........hhtshstpslptsplsspSsshWslSssuhppllppLpssst.....slphphsap...hpRshsttsshtths..p...............hths.stptsLtphLps..............................sstsl.lsphaspalps.s...sstsp.hpt..............ttpshhshshthhpsptst...............sstaWsls.tpss...t....................t.sttsppsslhhhlhs-cssss.h..uhlsuh..GIlulYsohlLslG+hlR.uhhsstspplhhpchsps-cllplCpsIhlsR......p...ss-htLEptLahcLlplhRSPEpllchTtst ............................................hhlPFLhELRslhDWshos.Tolsl.pWhphEDlaspla...hc..s..ht...pp.......t............h...u.........h.K.hh.Gh.hhhhllhhlhhPlhhhu.hpss..s..N.shpss..hplpltsh.................shath.............t.pl...hp.p.hpph.................h........shp...................altt....a..t.p.-l.h........hphpspSsthWtlssssptphhppL.pst.................th.hhhpap...hpRph...s...tt...h..ht..........................h....t........ttt.lhp.hltt.....................................................ttshhl.phhP.alps.s...tspsp.hp.........................t.thh.shph.....h.tt.........................................taW.hp............................................tttt.l..hhhhs-cs.ss.h....shlssh...GIhuLYhoh..VLslu+.hlR.thhst.shpIha--LPssDcllplCp-IalsR......p...tt-htlEctLahcLlhlaRSPchhlchTp................................................ 0 95 131 192 +12000 PF12167 DUF3596 Domain of unknown function (DUF3596) Assefa S, Coggill P, Bateman A anon PFAM-B_2234 (release 23.0) Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00589. It is likely that this domain acts to bind DNA. 21.90 21.90 22.40 22.50 21.20 21.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.93 0.72 -4.44 60 686 2012-10-03 08:51:45 2009-07-17 17:52:51 3 6 484 0 96 500 13 62.00 47 16.03 CHANGED hsulcsR.....ssp.....LplsFpYpGhRsREph....s..L.sTstNhKpApplhspIptcIphGsFs...YucaF....PpSp ...............PsGVEs+.....Gup.....LRIhF...hY+G.....tRsREsL......G..lsD..Tsc......N......RKhA.G.c.LR.splsatIc.....tGsF-...YucpFPsS.............. 0 16 51 80 +12001 PF12168 DNA_pol3_tau_4 DNA polymerase III subunits tau domain IV DnaB-binding Assefa S, Bateman A, Coggill P anon PFAM-B_2296 (release 23.0) Family This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau. 25.00 25.00 29.50 28.00 21.50 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.21 0.72 -3.58 16 635 2009-07-20 12:56:15 2009-07-20 13:56:15 3 5 631 0 50 312 0 82.10 63 13.02 CHANGED Rsthp+p.........ptsossKKucsAuup+ht...sSsLERlApl....oppspsp..uspssE.ppstccEsYRW+sopss-.ppspsl.sTPpsLK .........RpQLQRs............QGuTKsKKSEPAAAoRARP..............VNNuALERLASV..............T-RVQARP.....sPSALE..cAPsKKEAYRWKATTPVh....QpKEVV.ATPKALK.......... 0 2 12 31 +12002 PF12169 DNA_pol3_gamma3 DNA polymerase III subunits gamma and tau domain III Assefa S, Bateman A, Coggill P anon PFAM-B_2296 (release 23.0) Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.51 0.71 -4.44 52 3954 2009-07-20 12:56:32 2009-07-20 13:56:32 3 19 3800 30 939 3190 2267 136.90 21 21.86 CHANGED losspV+sMLGLuD+uplhcLh-slhcGDsssuLsthcstYstGADPhslLpsLh-hhHhlohhchss.s.sps.t...hspsEpp+htshAppLuhtsLsRhWQhLLKGhpElppAscPhtAAEMsLlRLsasusLPsPc-hl+pl ..........................phptlpphLGslspshlhpllcAl.hp......s-s..t...pshshlpplh.spG.h-hpphlp-L.......lph.l.+clhl..hph.......s....P....s.ht..................t..s.ttp.p...h.p.ph........A...pp...l...s...s.ppltthh.p.h...l..h...pu.p.p...-lp.h........u..s.s......+hhlEhsllRhht........................................................................................... 1 304 617 796 +12003 PF12170 DNA_pol3_tau_5 DNA polymerase III tau subunit V interacting with alpha Assefa S, Bateman A, Coggill P anon PFAM-B_2296 (release 23.0) Family This domain family is found in bacteria, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau. The extreme C-terminal region of this domain 5 is the part which interacts with the alpha subunit of the DNA polymerase III holoenzyme. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.67 0.71 -4.25 5 839 2009-07-20 13:12:31 2009-07-20 14:12:31 3 7 835 1 114 517 50 140.40 57 22.10 CHANGED KALEHEKTPELAAKLAAEAIERDPWAAQVSQLSLPKLVEQVALNAWKEESDNAVCLHLRSSQRHLNNRGAQQKLAEALSsLKGSTVELTIVEDDNPAVRTPLEWRQAIYEEKLAQARESIIADNNIQTLRRFFDAELDEESI .......................................................+ALEHEKTPELuAKLAsE.Al...ER...D.s.WAApVupLuLP..KLVEQlAL.N.A.a.K.Ec..s.s.s.s..l.s.LH..LRSoQRHL.N.spuA.QQcLucALSpL..pGssVE..LTI.lED.DNP.As.+TP.LEW.RQAIYEEKLAQ.ARc.SIIADsNIQTLRRFFDAELDEESI...... 1 17 44 81 +12004 PF12171 zf-C2H2_jaz Zinc-finger double-stranded RNA-binding Assefa S, Bateman A, Coggill P anon PFAM-B_2302 (release 23.0) Family This domain family is found in archaea and eukaryotes, and is approximately 30 amino acids in length. The mammalian members of this group occur multiple times along the protein, joined by flexible linkers, and are referred to as JAZ - dsRNA-binding ZF protein - zinc-fingers. The JAZ proteins are expressed in all tissues tested and localise in the nucleus, particularly the nucleolus. JAZ preferentially binds to double-stranded (ds) RNA or RNA/DNA hybrids rather than DNA. In addition to binding double-stranded RNA, these zinc-fingers are required for nucleolar localisation. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.24 0.72 -3.71 140 2095 2012-10-03 11:22:52 2009-07-21 10:15:49 3 234 343 2 1311 3181 32 26.70 31 6.11 CHANGED paaChhCs.+hFpsppsh..psHhpo..KpHK .....aaCthCs..+tFpsppph..psHhpo.....+pHp...... 0 368 611 979 +12005 PF12172 DUF35_N Rubredoxin-like zinc ribbon domain (DUF35_N) Bashton M, Bateman A, Krishna SS anon Pfam-B_1390 (release 4.2) Domain This domain has no known function and is found in conserved hypothetical archaeal and bacterial proteins. The domain is duplicated in Swiss:O53566. The structure of a DUF35 representative reveals two long N-terminal helices followed by a rubredoxin-like zinc ribbon domain represented in this family and a C-terminal OB fold domain. Zinc is chelated by the four conserved cysteines in the alignment. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.06 0.72 -3.94 158 1253 2009-07-21 15:56:27 2009-07-21 16:56:27 3 17 452 2 560 1184 271 36.80 28 19.70 CHANGED hpustcscLhhp+CssCGph....h.aPPc.shC..spC.t.ut...shp ............tthtpscLhhp+C..ssCGph....haPP+.shC......PpC.t.ut.p.............. 0 123 340 478 +12006 PF12173 BacteriocIIc_cy Bacteriocin class IIc cyclic gassericin A-like Coggill P anon PfamB_170026 (release 23.0), manual Family This class of bacteriocins was previously described as class V. The members include gassericin A, acidocin B and butyrovibriocin AR10, all of which are hydrophobic cyclical structures [1]. The N- and C-termini are covalently linked, and the circular molecule is resistant to several proteases and peptidases [2]. The immunity protein that protects Lactobacillus gasseri from the toxic effects of its bacteriocin, gassericin A, has been identified. It is found to be a small positively-charged hydrophobic peptide of 53 amino acids containing a putative transmembrane segment [3] - a structure unlike that of the more common immunity proteins as found in Pfam:PF08951. 20.40 20.40 91.60 91.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.07 0.72 -3.95 3 5 2009-07-23 10:40:23 2009-07-23 11:40:23 3 1 5 0 0 6 1 89.00 48 98.02 CHANGED MVTKYGRNLGLsKVELFAIWAVLVVALLLsTANIYWIADQFGIHLATGTARKLLDAVASGASLGTAFAAILGVTLPAWALAAAGALGATAA ............hs.....huhNhsLsKIEpFsIaAlLV.VALLlssuNIYWIADKFGIHLAPGWYQDlVDaVSAGGSLuTAFAlIlGVTLPAWIlsAAuAFGAsSA 0 0 0 0 +12007 PF12174 RST RCD1-SRO-TAF4 (RST) plant domain Overmyer K, Coggill P anon Pfam-B_8550 (release 23.0) Domain This domain is found in plant RCD1, SRO and TAF4 proteins, hence its name of RST. It is required for interaction with multiple plant transcription factors. Radical-Induced Cell Death1 (RCD1) is an important regulator of stress and hormonal and developmental responses in Arabidopsis thaliana, as is its closest homologue, SRO1 - Similar To RCD-One1. TBP-Associated Factor 4 (TAF4) and TAF4-b are components of the transcription initiation factor complex TFIID. 21.20 21.20 21.20 25.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.01 0.72 -4.18 16 135 2009-11-10 14:39:24 2009-07-24 10:38:49 3 7 24 0 80 130 0 69.30 31 11.44 CHANGED scsPp...SPahPFshLhsslspplspsch.llpppYpch+cp+IoRp-hl+plRpIVG.DpLLtpslpplppp .................t..hPp...oPhhs..FshLhshlppplspsc...hphlpphYpch+.....cp+IoRcchl+plRtIVG.DclLhpslpplp....... 0 10 44 65 +12008 PF12175 WSS_VP White spot syndrome virus structural envelope protein VP Mistry J, Gavin OL lg7 pdb_2edm Domain This family of proteins is found in viruses. Proteins in this family are approximately 210 amino acids in length. There is a conserved NNT sequence motif. These proteins are structural envelope proteins in viruses. This is the beta barrel C terminal domain. There is a protruding N terminal domain which completes the proteins. Three of four envelope proteins in white spot syndrome virus share sequence homology with each other and are present in this family - VP24, VP26 and VP28. VP19 is the other major envelope protein but shares no sequence homology with the other proteins. These proteins are essential for entry into cells of the crustacean host. 20.40 20.40 23.30 143.10 20.20 19.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.30 0.70 -5.24 3 37 2009-07-27 13:17:05 2009-07-27 14:17:05 3 1 2 13 0 26 0 186.50 49 100.00 CHANGED M-hu.hhuVsuAILAILol.IlVlIVIhVhpIslNK+l-p+s+slhsNhDEhlRlPIsucA+usaF+hs-sSasTcsLGKVtl+.NGpSDu-MK-ccADLVIToVpG..sRTLsVssGpspohEhThKlaNNTuRDIsIsulolsPsl.......NssS+sFsuSKslpSFoPsllccDcluTFVsGATF-AsIssTssupLlsMhhcss.s..clccK .Mch....uVsuAILAhhsl.IhVhIsIhhhp.slsKpl-p+scsl.sNhDE.lRlPlsucstusaF+hs-sSasocsLGKltl+.NGpSDupMK-ccADLVITsVpG...RsLpVssGpshohEhThKVaNNTuRcIsIsuhphsPpl........ssS+sFsuSpsspSFoPsslccDcluTFVsGsTFsAsIssTssup.h..hhp......thtp.................. 0 0 0 0 +12009 PF12176 MtaB Methanol-cobalamin methyltransferase B subunit Mistry J, Gavin OL lg7 pdb_2i2x Domain This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 460 amino acids in length. MtaB folds as a TIM barrel and contains a novel zinc-binding motif. Zinc(II) lies at the bottom of a funnel formed at the C-terminal beta-barrel end and ligates to two cysteinyl sulfurs (Cys-220 and Cys-269) and one carboxylate oxygen (Glu-164). The function of this protein is to catalyse the cleavage of the C O bond in methanol by an SN2 mechanism. It complexes with MtaA and MtaC to perform this function. 25.00 25.00 184.40 28.20 21.40 21.00 hmmbuild -o /dev/null HMM SEED 461 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.49 0.70 -6.07 9 44 2009-07-27 13:17:44 2009-07-27 14:17:44 3 2 29 8 32 42 3 442.20 49 99.39 CHANGED ulK+aTcMAYsSADDMlFGpuhaPVKsGhslcIGAGhshPElNhA..PRstutpSh-cLl+EYc+IssDshpRhlplGhPulllEsEHVpQMopsP-WGuclA+sQ+shME-aHDcYGlKsuLRpTluDIREsc-hhtLR.GDphsshhEuFEpCApNGADhLSlEShGGKEVaDYuIlRNDlsGlLauIGlLGolDMEalWscIucIAKKpssluuGDTDCAQANTAMFlAGGLhsKNluHTlAslARAISAsRSLVAYEsGAlGPGKDCGYENsIlKAIuGVPIutEGKoSTCAHSDlMGNLshpsCDlWSNESVEY+tEFGGTTstsWsEoLuYDCALMNTAhphGppKsL..RDhhhlSD+YRDPQuYlLAYDNAY+lGcAIVK-GcDIYLRuKsAAlcsspIlcEGh.cG+LcLoRFE+cuLsssppslEuLsDDpDcFhp-ClpKYtpcVKsF+PEsYs ..h.hp+aTcMAYssuD-MlFGpuKpPVKsGhGhpIGuGhshPElNhA..P.t..tutpSh-KllpEYccIspslhpRhlplGhPulllEpEHVtQhopNPcWGu-lspsp+slME-YHDcYGlKsuLRpT.uDlREpcch.pLR.G-passhhEuFEtCApsGADhLSlEShGGKEVaDaullRsDhtulLauIGVLGohDMEhlWp-IscIAcKssslsuGDTDCuQANTAMFlAGGLhcKsluHThAslARAIuAsRSLVAaEsGAhGPsKDCGYENsIlKAIsGlPIutEGKousCAHSDlhGNLshtsCDlWSNESVEa+tEhGGsTspsWhEsLuYDCuLMNsAlppGppK.L..RDhhshSD+YRDPQuYlLuYDNAY+IGpAIl+sGcshYLRuKsAAlc.shpllc-us.sG+LpLoRFEpcsLspsppslEuLsDDp-cFhs-slp+YppcVchFcPpsYs.......... 0 8 24 27 +12010 PF12177 Proho_convert Prohormone convertase enzyme Mistry J, Gavin OL lg7 pdb_2ke3 Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF01483, Pfam:PF00082. There are two completely conserved residues (Y and D) that may be functionally important. This protein is the C terminal domain of a prohormone convertase enzyme which targets hormones in dense core secretory granules. This C terminal tail domain is the domain responsible for targeting these dense core secretory granules. The domain adopts an alpha helical structure. 25.00 25.00 28.20 59.10 19.90 18.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.96 0.72 -4.37 3 51 2009-07-27 13:18:22 2009-07-27 14:18:22 3 1 38 2 24 52 0 41.00 76 5.49 CHANGED phKDsEDSLYNDYlDsFYNsKPYKHRDDRLLQALl-Ils-D ..QLKDSEDSLYsDYVDVFYNTKPYKHRDDRLLQALVDILsEE.. 0 1 3 10 +12011 PF12178 INCENP_N Chromosome passenger complex (CPC) protein INCENP N terminal Mistry J, Gavin OL lg7 pdb_2qfa Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. INCENP is a regulatory protein in the chromosome passenger complex. It is involved in regulation of the catalytic protein Aurora B. It performs this function in association with two other proteins - Survivin and Borealin. These proteins form a tight three-helical bundle. The N terminal domain is the domain involved in formation of this three helical bundle. 20.60 20.60 21.90 20.80 18.30 18.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -7.65 0.72 -4.41 7 80 2009-07-27 13:21:03 2009-07-27 14:21:03 3 2 45 1 33 72 0 37.40 46 4.94 CHANGED spuhtpLhplhspKlt-FlsplDshchlWL-EIppEAt ..t.Gsh+LLElC-pKLhEFlsshDNKDlVWLEEIpEEAt.. 1 2 6 17 +12012 PF12179 IKKbetaNEMObind I-kappa-kinase-beta NEMO binding domain Mistry J, Gavin OL lg7 pdb_3brt Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00069. These proteins are involved in inflammatory reactions. They cause release of NF-kappa-B into the nucleus of inflammatory cells and upregulation of transcription of proinflammatory cytokines. They perform this function by phosphorylating I-kappa-B proteins which are targeted for degradation to release NF-kappa-B. This kinase (I-kappa-kinase-beta) is found in association with IKK-alpha and NEMO (NF-kappa-B essential modulator). This domain is the binding site of IKK-beta for NEMO. 25.00 25.00 25.90 25.70 24.50 18.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.65 0.72 -4.13 9 110 2009-07-27 13:23:03 2009-07-27 14:23:03 3 3 39 6 48 84 0 37.60 47 5.40 CHANGED SE-LltEuhsLCspLEoslp-sh+EQ-pSlhuLDWSWL .....tplltEshshhupLpohlp-....sscEQspShMsLDWSWL.... 0 2 5 14 +12013 PF12180 EABR TSG101 and ALIX binding domain of CEP55 Mistry J, Gavin OL lg7 pdb_3e1r Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. This domain is the active domain of CEP55. CEP55 is a protein involved in cytokinesis, specifically in abscission of the plasma membrane at the midbody. To perform this function, CEP55 complexes with ESCRT-I (by a Proline rich sequence in its TSG101 domain) and ALIX. This is the domain on CEP55 which binds to both TSG101 and ALIX. It also acts as a hinge between the N and C termini. This domain is called EABR. 25.00 25.00 25.00 25.00 19.50 23.70 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.68 0.72 -4.60 6 110 2009-07-27 13:23:54 2009-07-27 14:23:54 3 1 43 2 55 98 0 34.90 50 8.56 CHANGED cppl+cs.-hNppW.pYDppREsYV+uLhs+lKEL ...cpplpc.s.-hNtpW.pYDtpR-tYV+GLhAplhEh.... 0 4 8 21 +12014 PF12181 MogR_DNAbind DNA binding domain of the motility gene repressor (MogR) Mistry J, Gavin OL lg7 pdb_3fdq Domain This domain family is found in bacteria, and is approximately 150 amino acids in length. MogR is involved in repression of transcription of the flagellar gene in Listeria bacteria. This allows a phenotypical switch from an extracellular bacterium to an intracellular pathogen. MogR binds AT rich flagellar gene promoter regions upstream of the flagellar gene. These regions follow the pattern 5'-TTTTNNNNNAAAA-3'. This domain is the DNA binding domain of MogR. 25.00 25.00 83.10 82.90 21.60 20.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.88 0.71 -4.43 4 125 2009-07-27 13:24:16 2009-07-27 14:24:16 3 1 120 2 5 94 0 143.50 71 47.21 CHANGED LLQNMSNsKpNDhpLEu.EFKKIEKQFQVcYEELlDLYNRMVLFQIDIEKpGGMcsYEKSsITWLKSELELLYtVYQFsQRHsLNIhNISKYlSKpELNLF.KTESQLQNTYYKLKKpEIPFENIcKQKPGRKRKYssVKET.sEhKpE .LLQpMSNNKhNDhpLEs.EhKKIEKpFpVcYEELlDLYNRMVLFQIDIEKHGGhRAYEKSsITWLKSELELLYEVYQFsQRHGLNIlNISKYVSKNELNLFPKTESQLQNTYYKLKKcEIPFENIEKQKPGRKRKYhPVKETlVphKpE............. 0 1 3 3 +12015 PF12182 DUF3642 Lipoprotein; Bacterial lipoprotein Mistry J, Gavin OL lg7 pdb_3ge2 Domain This domain family is found in bacteria, and is approximately 60 amino acids in length. There is a single completely conserved Y residue that may be functionally important. This domain is from a bacterial lipoprotein, a major virulence factor in Gram negative bacteria. 22.70 22.70 23.90 23.00 22.30 19.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.60 0.72 -3.55 10 315 2012-10-03 08:47:39 2009-07-27 14:26:22 3 1 274 1 16 161 0 78.20 62 47.50 CHANGED spsoslDGoYsusD.-s....DQlTLploGsoGThTclEsDG-cElcQVss-usNQ+LII...GD.DsKpYphsGNQLsl-DhDc-s ......AQPT-IDGTYTGQD.DG....D+ITLVVTGTTGTWTELESDGDQcVKQVTFDuANQRMII.....GD.D..VKIYTVNGNQllVDDMDRDP................... 1 1 3 7 +12016 PF12183 NotI Restriction endonuclease NotI Mistry J, Gavin OL lg7 pdb_3bvq Domain This family of proteins is found in bacteria. Proteins in this family are typically between 270 and 341 amino acids in length. There is a conserved CPF sequence motif. The type IIP restriction enzyme, NotI, is a homodimer that recognizes the 8 bp DNA sequence 5'-GC/GGCCGC-3' and cleaves both strands of DNA to create 5', 4 base cohesive overhangs. 25.00 25.00 62.30 62.10 19.30 18.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.90 0.70 -5.30 13 53 2012-10-11 20:44:46 2009-07-27 14:38:24 3 1 49 4 17 50 1 226.60 26 80.31 CHANGED ElFGaps..ss.u.hsspppupct.CPFh.spps.KsoKspst.shGVCSl...h..........scsupslIlCPhR.atpchpIhs-suchhhs...................h.tuhhhthtpththssshGcs...sGsEh.ls...t.....lDalLsphss....G+lhDaVulElQslshoGsh+shhpph.............hppspuphuhsW............osstKRllPQLhaKGsllc+.....hstKhshllpcshathlsc.h................lspht.hpsushsa.hlh-hc.sss....t.thpL ........................EhhG.th..s..t..h..hthhtp..CPFh.sppC.K.spKsps....GsCol.....h...................tpssp.hIlCPhR.hhp....chphhhpshphhhs............................................t+p...sGsElpls......t.sth.lDalLsshss....sclhDaVulElQslspTGss.ts.ppphht...........hppstsphuhs.W............psstKRllsQlhhKsplhcp.....hstKhshllppthathhsp.h................htphp..hpttshsh.hhhshc.stp............................................................... 0 3 7 16 +12018 PF12185 IR1-M Nup358/RanBP2 E3 ligase domain Mistry J, Gavin OL lg7 pdb_1z5s Domain This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00638, Pfam:PF00641, Pfam:PF00160. There are two conserved sequence motifs: TFFC and EDF. Nup358/RanBP2 is a nucleoporin involved in ubiquitination of many different protein targets from various cellular pathways. It complexes with Ubc9, SUMO-1 and RanGAP1 to perform this function. This is the ligase domain which binds to Ubc9. 25.00 25.00 25.00 25.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.07 0.72 -4.25 4 96 2009-07-27 15:58:16 2009-07-27 16:58:16 3 30 40 4 59 87 0 62.00 51 4.32 CHANGED Ds.IVaE+cPTsEpcAhAcpLpLPPTFFChhspsD.ospDpscsEDF-otV+KlptcLhspssEt ...-slIVaEtpPTsEpKAhAcpL+LPPTFFChhsts...D..ssE-p..pssEDFpotl+Klptt.h.pspc.t........ 0 8 12 29 +12019 PF12186 AcylCoA_dehyd_C Acyl-CoA dehydrogenase C terminal Mistry J, Gavin OL lg7 pdb_2oku Domain This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF02770, Pfam:PF00441, Pfam:PF02771. There is a conserved ARRL sequence motif. The C terminal domain is an alpha helical domain. The flavin ring of Acyl-CoA dehydrogenase is buried in the crevice between the two alpha helical domains and the beta-sheet domain of one subunit, and the adenosine pyrophosphate moiety is stretched into the subunit junction of a neighbouring subunit, composed of two C terminal domains. 19.80 19.80 20.50 21.20 19.30 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.37 0.71 -4.22 7 120 2009-07-27 15:59:57 2009-07-27 16:59:57 3 2 119 2 18 101 7 112.90 46 19.94 CHANGED TsGoYlApIR.EapslsssPEhpsLps+LtcMss+a-AssstVpEspDQELlDFhARRLhEMAAchlhuHLLlQDAsKs.D.FtcSApVYlpYupAEl-KapsFIppFst-DLAh .............TNGoYlupl+.E.apphss..usE.hpsLhsRlpcMss+aEsssstVK..EspsQE...l...hDFhARRLhEMAAclIMuaLLlpDAo.KusELFuKSApVYlpaAEuEVpKHtsFIcsFcsEDL........... 0 9 16 18 +12020 PF12187 VirArc_Nuclease Viral/Archaeal nuclease Mistry J, Gavin OL lg7 pdb_2w8m Domain This family of proteins is found in archaea and viruses. Proteins in this family are typically between 211 and 244 amino acids in length. These proteins are nucleases from fusseloviruses and sulfolobus archaea. 19.70 19.70 19.90 20.90 19.60 19.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.27 0.71 -4.89 4 13 2012-10-11 20:44:46 2009-07-27 17:00:09 3 2 13 2 3 15 1 174.20 44 76.43 CHANGED sFK.+YWuNps.-sa.lPs.aLGREYLlhGhLhIpLtpW+.KGLlcFDlYl+.TGlGTLTsVINcpYYpsl.DKYDLTlYh+hK....spYaPhlaIDlTGSSWTEEQSccR.....YGEulYAILSsKVpVApph-VhG.RshFIHYsDpEDKLKhIsALpILNLE+QGK...I+h-caEhsAtSpYYlIPlphWKNls-LRV ....................................FKLKYWGspt.-sYlLPs.hLGREYLllGKL.hIpLupW+.KGhl-aDlYl+.oGlGTLT.NsIN.pYYcslpDKYDhTlYl+sK....cpYaP..LlaIDITGSSWTEEQSccR.....YGESlYAILSsKV-VApcas..VhG.RVaFlHYsDsEDKLKsIoALQILNLE+psK...IKhDcFEpsAtSpYYLIPlphWKNlhELR........... 1 1 1 3 +12021 PF12188 STAT2_C STAT2_Cterm; Signal transducer and activator of transcription 2 C terminal Mistry J, Gavin OL lg7 pdb_2ka4 Domain This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF02865, Pfam:PF00017, Pfam:PF01017, Pfam:PF02864. There is a conserved DLP sequence motif. STATs are involved in transcriptional regulation and are the only regulators known to be modulated by tyrosine phosphorylation. STAT2 forms a trimeric complex with STAT1 and IRF-9 (Interferon Regulatory Factor 9), on activation of the cell by interferon, which is called ISGF3 (Interferon-stimulated gene factor 3). The C terminal domain of STAT2 contains a nuclear export signal (NES) which allows export of STAT2 into the cytoplasm along with any complexed molecules. 20.00 20.00 76.60 75.30 19.70 16.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.65 0.72 -4.17 5 42 2009-07-27 16:00:45 2009-07-27 17:00:45 3 5 23 1 11 33 0 56.00 62 7.09 CHANGED SQPlPEPDLPcDLQQLNT--M-IFRNslsIE-IMPNGDPLLAGQNo.VDEAYlSppS ......SQslPEPDLPpDLRHLNTE-ME...IFRNs.h+IEEIMPNGDPLLAGQNT.sDEsalsp.S... 1 1 1 1 +12022 PF12189 VirE1 Single-strand DNA-binding protein Mistry J, Gavin OL lg7 pdb_3btp Domain This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved IELE sequence motif. VirE1 is an acidic chaperone protein which binds to VirE2, a ssDNA binding protein. These proteins are virulence factors of the plant pathogens Agrobacteria. VirE1 competes for the ssDNA binding site of VirE2. 25.00 25.00 26.10 101.90 17.80 16.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.00 0.72 -3.85 4 9 2009-07-27 16:02:35 2009-07-27 17:02:35 3 1 5 1 3 6 0 62.30 61 95.08 CHANGED MAIIKLpsNKsRsssssEcPpEl.tEEhShsH.ssGFTsLDLcMIELEsFVL+CPLPE-NLAG MAIIKLpANKNRsssssEcPpcl.hEEhSssa.ssGFTSLDLcMIELEsFVL+CPLPE-NLAG 0 1 3 3 +12023 PF12190 amfpi-1 Fungal protease inhibitor Mistry J, Gavin OL lg7 pdb_3bt4 Family This protein family is found in eukaryotes, and is approximately 50 amino acids in length. These proteins are fungal protease inhibitors. 25.00 25.00 25.20 29.30 21.60 21.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.44 0.72 -3.48 2 9 2009-07-27 16:04:19 2009-07-27 17:04:19 3 1 6 1 2 11 0 85.00 68 91.29 CHANGED Csht..YGsLVCGosYCcppPChpP.h...C.pso.aRA+HAGKCACCPACVThLGEsAsCKhYSKELGETPSAlC+-PLKClptVCT+ls .Clhs..YGsLVCGTsYCcpNPChpPshs..C.pso.aRA...pHAGKCACCPACVTLLsEsAsCKhYSKELGETPSAVC+EPLKClptVCTKls. 0 1 2 2 +12024 PF12191 stn_TNFRSF12A Tumour necrosis factor receptor stn_TNFRSF12A_TNFR domain Mistry J, Gavin OL lg7 pdb_2eqp Domain This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 184 amino acids in length. This is the stn_TNFRSF12A_TNFR domain from the tumour necrosis factor receptor. The function of this domain is unknown. 21.40 21.40 21.40 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.79 0.71 -4.10 2 45 2009-07-28 09:54:12 2009-07-28 10:54:12 3 2 30 4 23 46 0 108.70 54 87.92 CHANGED MsP...RsL.phhV.hhhLVL......ppAsupu.CspGpuaStDLsKCM-CusC.sp.+SDFC.sCsuts.t..F.hlWsIhhuAhulhLllsllShhlhhp+CRR+pKFTTPIEETGucus.ul.LIp ..........................L.thLVhshhLsL.tusuG.ppsPGsuPCucGoSWSuDLD.KCMDCuSC.uRP+SDFChGCuAAPPAs..FpLLWPILGGALSLs..lV.LuLLS.GFLVWRRCRRREKFTT.PIEETGGEGCPuVALIQ...................................................................... 0 7 7 10 +12025 PF12192 CBP Fungal calcium binding protein Mistry J, Gavin OL lg7 pdb_2jv7 Domain This domain is found in eukaryotes, and is approximately 60 amino acids in length. There is a single completely conserved residue C that may be functionally important. This is a calcium binding domain from the fungal protein CBP (calcium binding protein). This protein is a virulence factor with unknown virulence mechanisms. CBP complexes as a highly intertwined homodimer. Each monomer is comprised of four alpha helices which adopt the saposin fold, characteristic of a protein family that binds to membranes and lipids. 25.00 25.00 59.60 58.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.86 0.72 -4.02 5 12 2009-07-28 09:54:30 2009-07-28 10:54:30 3 2 11 2 8 14 0 57.20 47 25.99 CHANGED QPuVscAhs.aNpsVssFpKAouAAuCsWlsCluSLuuoSAACAAAluELGLcPPsDLA .......tPultcAhspaNpsVssFpcAouAAuCsWlsClpSLuuoSAACAAAluELGLs..hDhA...... 0 1 2 5 +12026 PF12193 Sulf_coat_C Sulf_coatprot_C; Sulfolobus virus coat protein C terminal Mistry J, Gavin OL lg7 pdb_3f2e Domain This domain family is found in viruses, and is approximately 70 amino acids in length. It is the C terminal of a coat protein in sulfolobus viruses. 25.00 25.00 38.40 136.40 22.50 17.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.44 0.72 -4.16 3 5 2009-07-28 09:55:13 2009-07-28 10:55:13 3 1 5 1 0 5 0 69.00 86 54.25 CHANGED VSAVLTQYGITGPNRAIYQGFGLKVARALNRIGSGPALVNMINGLKuYYISAFNANPQVLDAVVNIITG VSAVLTQYGITGPNRAIYQGFGLKVARALNRIGSGPALVNMINGLKAYYISAFNANPpVLDAVTNIITG 0 0 0 0 +12027 PF12194 Ste5_C ste5minscaffold; Protein kinase Fus3-binding Mistry J, Gavin OL lg7 pdb_3fze Domain This domain family is found in eukaryotes, and is approximately 190 amino acids in length. This domain is the penultimate C terminal domain from the protein ste5 which co-catalyses the phosphorylation of fus3 by ste7. It is involved in the MAPK pathways. This domain is the minimal scaffold domain of ste5. It binds to the mitogen activated protein kinase fus3 before it is phosphorylated. 25.00 25.00 185.80 185.00 22.50 17.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.08 0.71 -4.67 5 22 2009-09-14 13:25:02 2009-07-28 10:55:46 3 2 21 1 12 24 0 192.70 54 22.33 CHANGED hTSISSILSLKRE+P--LsLlLQIDppKlp.-DshI..lItNoLpALshKFsshphClVDusGhVlshGols+hI.sLsSIus.hc+ssusp+FSPshL+splY..spsIpcNLGIVllSNSuMEpuKSlLFpDY+sFsShGRRRPNELKIKVGYLNsDYSDpIsELVEIuoWsalLEALCYSFuLSFDDDD..DD-Es h.TTISSILSLKRE+PDsLsIlLQIDFpKLKp-csll..llYNSLKALolKFscLphChVDpsshVlsaGslhctIssl-ulss.LcppsuuppFSPhWLKNoLY..PcsI+cpLGIlslSNSsMEscKSlLF.DY+sFsuhGRRRPNELKIKVGY..LNVDYSDKIsELVEsuSWshlLEoLCYSFuLuFD-DD..DD--..... 0 1 6 12 +12028 PF12195 End_beta_barrel Endosialidase; Endobetabarrel; Beta barrel domain of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is approximately 80 amino acids in length.This domain is the beta barrel domain of bacteriophage endosialidase which represents the one of the two sialic acid binding sites of the enzyme. The domain is nested in the beta propeller domain of the endosialidase enzyme. The endosialidase protein complexes to form homotrimeric molecules. 26.50 26.50 28.40 34.90 24.10 26.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.85 0.72 -4.25 3 30 2009-07-28 13:31:40 2009-07-28 14:31:40 3 9 26 18 2 27 19 83.00 73 8.81 CHANGED SRSLHlTGGITKAANQpsATI+IsDHGLFAGDFVNFSNSAVTGVSGNMTVATVIDKNTFTVTTsNsQsSDlNNAG+sWsFGTS .........SRSLRlYGGITKAANQQVAYIRITDHGLFAGDFVNFSNSGVTGVTGNMTVTTVIDKNTFTVTTQNT.....QDVDQNNEGRYWSFGTS....... 0 0 0 0 +12029 PF12196 hNIFK_binding FHA Ki67 binding domain of hNIFK Mistry J, Gavin OL lg7 pdb_2aff Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00076. There are two conserved sequence motifs: TPVCTP and LERRKS. This domain is found on the human nucleolar protein hNIFK. It binds to the fork-head-associated domain of human Ki67. High-affinity binding requires sequential phosphorylation by two kinases, CDK1 and GSK3, yielding pThr238, pThr234 and pSer230. This interaction is involved in cell cycle regulation. 25.00 25.00 29.00 27.90 20.40 19.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.11 0.72 -4.57 5 73 2009-07-28 13:32:34 2009-07-28 14:32:34 3 2 40 1 31 53 0 39.30 63 14.58 CHANGED SVsSQDPTPVCTPTFLERRKSQlhEl..sDD-cDDEIVLKLPl ......oVDSQGsTPVCTPTFLERRKSpVA-h..NDDDcDs.EIVFKpP.... 0 1 2 9 +12030 PF12197 lci Bacillus cereus group antimicrobial protein Mistry J, Gavin OL lg7 pdb_2b9k Domain This domain is found in bacteria, and is approximately 40 amino acids in length. This domain is found in bacillus cereus group bacteria. It is an antimicrobial protein. 20.30 20.30 20.70 20.40 19.90 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.12 0.72 -4.37 4 37 2009-07-28 13:32:52 2009-07-28 14:32:52 3 1 31 1 9 30 0 38.90 38 34.23 CHANGED s.hhlpSssGsFANSFspsGsTWYhKGK.hpsspGsWsuaYE..s+ ...............pssG.FAssF.t.s....G..hpWYhKG...t.hc..scGpWsGaYE....t.... 0 2 4 5 +12031 PF12198 Tuberculin Theoretical tuberculin protein Mistry J, Gavin OL lg7 pdb_2g6r Domain This domain family is found in bacteria, and is approximately 30 amino acids in length. This protein is a theoretical model of the tuberculin protein from Mycobacterium tuberculosis. 25.00 25.00 83.90 64.50 18.90 16.30 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.30 0.72 -4.06 2 6 2009-07-28 13:34:07 2009-07-28 14:34:07 3 2 2 0 0 6 0 34.00 77 21.36 CHANGED ohtplspsuGutS.u+NGsQhRhlAssGPtSuTh SLASVSSVSGGGSVSRNGNQARFVAPTGPASSTF 0 0 0 0 +12032 PF12199 efb-c Extracellular fibrinogen binding protein C terminal Mistry J, Gavin OL lg7 pdb_2gox Domain This domain family is found in bacteria, and is approximately 70 amino acids in length. There is a conserved VLK sequence motif. It is the C terminal domain of bacterial extracellular fibrinogen binding protein. It contains a helical motif involved in complement regulation. This motif binds to complement and changes its conformation to a form which cannot activate downstream components of the complement cascade. 21.40 21.40 21.50 21.80 21.20 19.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -9.08 0.72 -4.12 2 315 2009-07-28 13:34:20 2009-07-28 14:34:20 3 1 161 12 2 47 0 64.70 69 47.90 CHANGED TshphphppKllpAQphVp.Fc+T+TVusHRKAQ+AVNLlpFpaphcKhhLQcpID.VLK.shl+ ..TshphphppKllpAQphVp.Fc+T+TVusHRKAQ+AVNLlpFpashcKhhLQcpID.VLK.shlK..... 1 2 2 2 +12033 PF12200 DUF3597 Domain of unknown function (DUF3597) Mistry J, Gavin OL lg7 pdb_2gqb Domain This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 126 and 281 amino acids in length. The function of this domain is unknown. The structure of this domain has been found to contain five helices with a long flexible loop between helices one and two. 23.50 23.50 23.50 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.75 0.71 -3.75 24 206 2009-07-28 13:34:35 2009-07-28 14:34:35 3 7 187 1 69 167 11 119.50 50 74.91 CHANGED IFupIhstIF..........ucutsAsPsusssssssssuss...................suP...ussPsu................ssVDVuslL-thssp.pup+LsW+TSIVDLMKhLslDSSLsuRKELApELGYoGD.psDSAsMNIWLHKQVMpKLAtNGGKlP ...................................................IFspIhstIa.............ucups..A.s.ss...utssss.s...sss.s.sssss.....................uAs......ussPsu............................ssVDVsAlL-shstp...suptL...NW+TSIVDLMKhLGLDSSLptRKELApELGYo.GD..hsDSAoMNlWLHKpVhpKLt-NGGKlP........ 0 8 33 51 +12034 PF12201 bcl-2I13 Bcl2-interacting killer, BH3-domain containing Mistry J, Gavin OL lg7 pdb_2ipe Domain This is a family of pro-apoptotic Bcl-x proteins, B cell leukaemia/lymphoma 2, or BIKs. BIK proteins rely for their activity upon an intact BH3 domain lying between residues 48 and 80, as in UniProt:Q13323. 25.00 25.00 41.10 40.70 21.00 19.40 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.01 0.71 -4.76 2 21 2009-07-28 13:35:07 2009-07-28 14:35:07 3 1 19 0 10 21 0 135.00 52 98.40 CHANGED MSEsR.huRDl.hcTl.a-Ql.pPPsh.Es.uMp-......PhcDhD.hEChEGpstlALRLACIGDEMDlsLRuPRLsQLstlAhHp..LAhhYspT.slRslhRShhcuhTsL+ENIhp.WRs.sPGuWVSs-Qs.htLh.hlhLlh.LLuGuhaL.L ....MSpsRPloRDhhh-TlLaEQl.pP.sh.Es.G.ho-.....tEshsPhc..Dhs.hEChEssDtlALRLACIGDEMDVsLRuPRLAQLstlAMHS..LAhhYsQT.slRsVLRShhcGhTsL+ENIhp.WRs.sstuWVo...........p.s...h.lh.hlhLlh.hh........................................... 0 1 1 2 +12035 PF12202 OSR1_C Oxidative-stress-responsive kinase 1 C terminal Mistry J, Gavin OL lg7 pdb_2v3s Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00069. There is a single completely conserved residue F that may be functionally important. OSR1 is involved in the signalling cascade which activates Na/K/2Cl cotransporter during osmotic stress. This domain is the C terminal domain of OSR1 which recognises a motif (Arg-Phe-Xaa-Val) on the OSR1-activating protein WNK1. 25.00 25.00 26.60 26.10 23.10 22.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.49 0.72 -4.32 16 453 2009-07-29 08:07:16 2009-07-29 09:07:16 3 6 84 3 231 395 0 35.70 50 2.76 CHANGED lpLhLRlc-sKKhtt.+.K-NpAIcFpFslt+DssE-VA .......lpLhLRlcDsKK.tt.+.+-NpuIcFpFslt+.DssE-VA.... 0 35 56 140 +12036 PF12203 HDAC4_Gln Glutamine rich N terminal domain of histone deacetylase 4 Mistry J, Gavin OL lg7 pdb_2o94 Domain This domain is found in eukaryotes, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00850. The domain forms an alpha helix which complexes to form a tetramer. The glutamine rich domains have many intra- and inter-helical interactions which are thought to be involved in reversible assembly and disassembly of proteins. The domain is part of histone deacetylase 4 (HDAC4) which removes acetyl groups from histones. This restores their positive charge to allow stronger DNA binding thus restricting transcriptional activity. 25.00 25.00 26.20 26.20 22.70 24.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.60 0.72 -4.05 6 235 2009-07-29 08:07:40 2009-07-29 09:07:40 3 4 46 8 78 202 0 86.40 69 9.97 CHANGED ssDPslREpQLQQELLLlpQQQQIQKQLLIAEFQKQHEpLTRQHpAQLQEHLK.QQELLAhK+QQELhE+cRKLEQQRQEp........ElE+HRREQQL ........sDPslREpQLQQELLhlpQQQQlQKQLLIAEFQKQHEpLTRQHpAQLQEHl.K................QQ..Eh.........L.A.........hKpQQ.ELL.E..+ppKLEQpRQEQ.........ElE+p+REQpL......................... 0 5 12 36 +12037 PF12204 DUF3598 Domain of unknown function (DUF3598) Mistry J, Gavin OL lg7 pdb_2o62 Domain This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 230 and 398 amino acids in length. These proteins are formed entirely from B sheets which form a barrel structure similar to those seen in the lipocalin superfamily. 21.20 21.20 21.30 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.69 0.70 -5.15 18 117 2009-07-29 08:08:10 2009-07-29 09:08:10 3 3 73 2 52 123 46 228.00 22 76.76 CHANGED spW-shh.pNLGE...WpGSFTchSPpG...-llpshsSlloLpuhsc.sphl+hslthas............pts.sp-hspcasulu......pphlaFss.GAFSpGshQLuPaucFuuEhGFlpcsRRhRlV.lascsGpLscloLIREahuus....susEpPsLps-p....LLGpWpGcshohhPDh..p.sph..hph.l.......................tsuslpusthha.tspp..spl.hhLPDGsusssPpplph.....cpsFhlElGWLlpssp...hpRllRpYsspGpWhuloLls.p ...............................................................................................................................hh..ph...tt....W.G.ah.hs.tt....p....pph..shhphp..........t..hp.....h.h...............................p........pth...........thhhh.s.Gsh.s.s...s.tht.....t.....................h.hh.....E.hhh..h..pt..s..pRhRh.shh..a........stpG...p.........LpplshlcEppsst..............sstpps.lp...pp.......h.hGpWpGpthohhs.sh.........h...............................................psthpsphh.hh.tt........ph.hhLPsGh.hssP....tplth.....tpsF.lphsWl.pssp...hpRllRsYsspG.hhu.ohhh..t................................................................................ 2 11 33 45 +12038 PF12205 GIT1_C G protein-coupled receptor kinase-interacting protein 1 C term Mistry J, Gavin OL lg7 pdb_2jx0 Domain This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF01412, Pfam:PF00023, Pfam:PF08518. GIT1 plays an important role in cell adhesion, motility, cytoskeletal remodeling and membrane trafficking. To perform this function, it localises p21-activated kinase (PAK) and PAK-interactive exchange factor to focal adhesions. Its activation is regulated by interaction between its paxillin-binding C terminal and the LD motifs of paxillin. The C terminal folds into a four helix bundle. 25.00 25.00 25.20 26.70 22.70 21.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.54 0.71 -4.28 5 199 2009-07-29 08:09:01 2009-07-29 09:09:01 3 11 78 1 86 202 0 120.70 62 17.57 CHANGED -u-sDssLPSTEDVIRKTEQITKNIQELLRAAQEsKH-SFVPCSE+IalAVsEMAuLFPKRPssEsVRsSLRLLsSSAsRLQsECKKAsP.-..tssDhphlTQQVIQCAYDIAKAAKQLVTlTT ...............s....sPsLPSTEDVIpKTEQlTKNIQELLRA...AQ...E.s...K.HD.S.al.P.CSE+IHlAVTEMAuLF.P...K+.P..t..p.-.sVRoS...LRLLsuSAYRLQSEC....+Ksl...Ps-s........usssDhQLl..TQ...QVIQCAYDIAKAAKQLVTITT............................................ 0 21 28 49 +12039 PF12206 DUF3599 Domain of unknown function (DUF3599) Mistry J, Gavin OL lg7 pdb_3f3b Domain This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. This domain is the phage-like element pbsx protein xkdh. 25.00 25.00 37.60 119.80 24.20 22.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.54 0.71 -3.83 7 38 2009-07-29 14:17:48 2009-07-29 15:17:48 3 1 33 1 5 32 0 117.30 49 97.21 CHANGED MSYpphLs+RCDIYHLtppcsptt.+aGIPucclp..hpYsDTPDlpslsCYFhEKsp....ollQpEPppslhpSaLlHFPhssDIRhpDKslhc..GhsahLppP.+tl+sHHhEVhAlRcpp .MSYpphLsHRCDIYH.ttptspsG.+FGIPu-clpP.hoYP-oPDhpclPCYFsEKsQ....plIQpEPspslYQpaLVHFPhsADlRlNDKllWc..shsYhLchP.+pl+s.HHhEVhAVR--.p. 0 2 3 4 +12040 PF12207 DUF3600 Domain of unknown function (DUF3600) Mistry J, Gavin OL lg7 pdb_3fgg Domain This family of proteins is found in bacteria. Proteins in this family are approximately 230 amino acids in length. This domain is the C terminal of the putative ecf-type sigma factor negative effector. 25.00 25.00 25.70 40.90 23.30 22.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.06 0.71 -4.70 6 85 2009-07-29 14:18:26 2009-07-29 15:18:26 3 1 85 4 6 63 0 158.60 80 75.11 CHANGED SIMADGIYGSFENLKKHAGsMTLEuYMRFsAKLSEAKDEMGTKEYE.FTKELKKLTNAKLAYGDSNGNIDYDuLSPAKREEMKKVSMuLQPYFDKLNGHKSSKEVLTQEEFDRYMEALMTHEIVRVKTKSTGAIKVEEVPEAYKERFIKAEQFMEYVDEKVR ..SIMADGIYGSFENLKKHAGsMTLEAYMRFsAKLSEAKDEMGoKEYE.FTKELKKLTNAKLAYGDSNGNIDYDtLSPsKREEMKKVoMsLQPYFDKLNGH..KSSKEVLTQEEFDpYMEALMTHEIVpVKTKSTGuIKVEElPEAYKERFIKAEQFMEYVDEKVR........ 0 1 4 5 +12041 PF12208 DUF3601 Domain of unknown function (DUF3601) Mistry J, Gavin OL lg7 pdb_3g1j Domain This domain family is found in bacteria, and is approximately 80 amino acids in length. 21.00 21.00 21.00 24.30 20.50 18.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.55 0.72 -4.04 6 50 2009-07-29 14:46:14 2009-07-29 15:46:14 3 1 32 2 6 48 0 82.00 55 54.34 CHANGED s..phLhsGshV+VIKsF+DYhGhTapsGEpaaFAsshahPY-sGaTLYI..up-K....sIhLpsps-sptcIhsps.cEYFE.lp ..sF.cYLYSGDYlKVIKsFKDYYG.FTHpsGEcaYFAstYaL.aEsGhsLYI...SpDKlN...........lssIYLpDRt-s........p.clhsHs.EEYFEIlE................. 0 6 6 6 +12042 PF12209 SAC3 Leucine permease transcriptional regulator helical domain Mistry J, Gavin OL lg7 pdb_3fwc Domain This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF03399. This domain is a helical domain in the middle of leucine permease transcriptional regulator. 25.00 25.00 32.80 31.80 23.10 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.49 0.72 -3.84 10 37 2009-07-29 14:46:34 2009-07-29 15:46:34 3 1 36 5 26 41 0 77.90 28 6.27 CHANGED llpQlhsplhp-sIcsplpshlpshlpctpt+pp+ppl.IsoLScELacAFl+EplYhshLES+A-.ahs+hlKK+hhc+ .........h.hcplhspllppsVpsplpsllpp..hlpctptc..pt+tpl.IcoLucELYsAFl+EplYhhhl-o+A-.hhppplK++hhp..... 0 4 15 26 +12043 PF12210 Hrs_helical Hepatocyte growth factor-regulated tyrosine kinase substrate Mistry J, Gavin OL lg7 pdb_3f1i Domain This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00790, Pfam:PF01363, Pfam:PF02809. This domain is the helical region of Hrs which forms the core complex of ESCRT with STAM. 22.40 22.40 22.70 34.20 22.30 22.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.09 0.72 -3.73 5 116 2009-07-29 14:46:51 2009-07-29 15:46:51 3 7 82 1 70 114 0 93.20 61 13.22 CHANGED EcFlsNlRouVElFsNRM+SNpuRGRSIuNDSAVQoLFQolosMHPQLLchlcpLDE+RhaYEuLQDKLuQI+DARAALDuLREEHcEKLR.RtAEE ......-pFlcuLpsuVosFVNRMKSNphRGRSIoNDSAV.oLFpoIssMHPQLLchlppLDE+RlYYEGLQDKLAQIRDARuALsALR-EHcEKLR.RtAEE....... 1 23 27 52 +12044 PF12211 LMWSLP_N Low molecular weight S layer protein N terminal Mistry J, Gavin OL lg7 pdb_3cvz Domain This family of proteins is found in bacteria. Proteins in this family are typically between 328 and 381 amino acids in length. There is a conserved LGDG sequence motif. Clostridial species have a layer of surface proteins surrounding their membrane. This layer is comprised of a high molecular weight protein and a low molecular weight protein. This domain is the N terminal domain of the low molecular weight protein. It is a structural domain. 25.00 25.00 48.70 42.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.82 0.70 -4.54 19 115 2009-07-29 14:48:39 2009-07-29 15:48:39 3 2 9 4 1 110 0 234.00 34 61.87 CHANGED spsYTVVQscYcKslKplQcGlpcssIosIsVhFD.Gp.ls..sVssttsssp.......AAspLhshV-sKLDsLGDGcYVDFpIoYssssphhT.ps-ls...shtstls....sclllssAss..sss...Gllh...tssssssssuAssslphSDlhchsh...ssspsuhpLohsspt.sshphGpl...........sssshssupslohsss..htlshucu.cplDlspShhhssssu.........hhsspssssssssspspVRVINAKEpTIDlDuSShpoAE ......................................sYTVVQscYcKslKplQcGlpcssIopltV.F-.Gp.lu..pVsstsssuc..t....AAcpLhshVcspLDsLGDG-YVDFpITYs....s....tschhT.pu-hc...shtspls....s+llIssAos.....ost.....GhVp...tssssspt.uAssslphuDhhphsh..sssspsuhploh.sstt.sshphGpl............tss.os.u.tslshsss.th.hthucs.chlDhs.....pShhhsssss.........hhsstsss.sssssspspVRVINAKEpTIDlDuSShpoAc..... 0 1 1 1 +12045 PF12212 PAZ_siRNAbind Piwi/Argonaute/Zwille siRNA-binding domain Mistry J, Gavin OL lg7 pdb_3da5 Domain This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. There is a conserved LKDIL sequence motif. There is a single completely conserved residue L that may be functionally important. This domain is part of an Argonaute protein. It is an siRNA binding domain. 21.10 21.10 21.10 21.90 20.80 19.80 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.15 0.72 -4.34 10 42 2009-07-29 14:51:18 2009-07-29 15:51:18 3 6 8 5 16 37 0 45.80 56 46.23 CHANGED AYAIlp+DSpsIEElL+luKENuILKDILsATtTIKasDucEtThTP ..sYAIhpKcShslEchL+luKENuILKDILsATsTIKYsDupEtThTP.... 0 1 15 15 +12046 PF12213 Dpoe2NT DNA polymerases epsilon N terminal Mistry J, Gavin OL lg7 pdb_2v6z Domain This domain is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF04042. There is a single completely conserved residue F that may be functionally important. This domain is the N terminal domain of DNA polymerase epsilon subunit B. It forms a primarily alpha helical structure in which four helices are arranged in two hairpins with connecting loops containing beta strands which form a short parallel sheet. DNA polymerase epsilon is required in DNA replication for synthesis of the leading strand. This domain has close structural relation to AAA+ protein C terminal domains. 21.50 21.50 21.80 22.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.10 0.72 -4.06 10 117 2009-07-29 14:53:03 2009-07-29 15:53:03 3 4 92 1 78 118 0 70.90 35 13.88 CHANGED csc+LRp+lsusFKl+GLhLRuEAoKYLs-sLtulscsEhEDhl-+Il-sVcKQPLSSshlEcullEsAVQEC ..........s...tlRp+lhssFKL+GhhLRsEAhcYL...sctL............psh.s..ct.Eh-chL-pll-sl-KQs.LsSshl-+sslEtAlpEC....... 0 24 32 57 +12047 PF12214 TPX2_importin Cell cycle regulated microtubule associated protein Assefa S, Gavin OL lg7 PFAM-B_2368 (release 23.0) Family This domain is found in eukaryotes. This domain is typically between 127 to 182 amino acids in length. This domain is found associated with Pfam:PF06886. This domain is found in the protein TPX2 (a.k.a p100) which is involved in cell cycling. It is only expressed between the start of the S phase and completion of cytokinesis. The microtubule-associated protein TPX2 has been reported to be crucial for mitotic spindle formation. This domain is close to the C terminal of TPX2. The protein importin alpha regulates the activity of TPX2 by binding to the nuclear localisation signal in this domain. 20.30 20.30 20.50 20.30 19.50 19.70 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.40 0.71 -4.19 10 167 2009-07-31 14:33:35 2009-07-31 15:33:35 3 9 75 0 104 164 0 147.30 34 25.41 CHANGED s+t+lTsP+EPsLpTupRsRslRsKSsA-LEpEplppl..YKFKAR.lN++IhE.sussl.KKsssK.Ppch.uF+LcTpcRApcRuSsspppspphphpp+ss...hh.DssssPphpshssssP+sstsstcs+pchspp.cc..Kt+PLs.KI...........hcs+tp.u.hsct++ppssPhpFcs .....................................s...hlTpPppP.LpTppRsRss...p.....sK..Ss.u..E.hEtEpltcl..........KF..K..AR.lsp+IhE..su......h.p+sss+..s.p..tFcL.chp....cRhp....p+pspt.p..t..ppt..........p...p....sp.s..............hh.s.st.s.h..h.hs.sppsthh.p.p.c...tt.......c.t.pPh.....hh............htst.........pt...hs.tF.................................................... 0 26 56 72 +12048 PF12215 GBA2_N beta-Glucocerebrosidase 2 N terminal Assefa S, Gavin OL lg7 PFAM-B_2416 (release 23.0) Family This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 320 to 354 amino acids in length. This domain is found associated with Pfam:PF04685. This domain is found in the protein beta-Glucocerebrosidase 2. It is found just after the extreme N terminus. This protein is located in the ER. The N terminal is thought to be the luminal domain while the C terminal is the cytosolic domain. The catalytic domain of GBA-2 is unknown. 20.70 20.70 20.70 21.20 20.60 19.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.20 0.70 -5.27 38 367 2009-07-31 14:39:48 2009-07-31 15:39:48 3 14 220 0 196 361 64 289.80 26 35.26 CHANGED GhPLGGhGuGsIuhus+GchshWpl..csGc.......aha..t.h.........pFula.p.psspstshsLssps.pss...................LssWpa.h........susYpAtYPhuahpac.s.shtsplphEtFSPllPtshpcSShPVAVFpaplpN.ssspslplSlhlSapNslGh.....hs.sptssthhhts.....................h.pssthpGllhps.pstp......spupGphslAs.ss..th...clhtpspWsssu.........sst-lWpsFst...DGpl.....................ptsttscphuuAlulchsltPGpscclsFsluWchPs..h...tFussspth...........+tYocFF..uttsp.sAhslAshALpphppa .....................................................................GhPLGG.......lGuGsIspu.hcGpFppapl...psu......................................h................sspFslhhp..ps....tt.t.hhh...hs.sps.p.t................................................sltsWpa.hst......................tpspYpuLYPpuWh.Yc........s.....tlplshcthSPllPpsYpcS.ShPsulFhaplpN...s.s.p.hplolhhohp.N.slG...........t.stts.......................................................ttst.hpGllhpp.p........s.............s.......shsluspps...sh..........plshpst.as.su.............supphW..pphtp.....sGph......................................ttsstts..ppl....uuAlssp.htl.........Ptt.spp..lpFsLuWshP......h.......tFststpha............................RpYTcFa..upptp.su.pl....schALpphtph................................................................ 0 63 110 161 +12049 PF12216 m04gp34like Immune evasion protein Assefa S, Gavin OL lg7 PFAM-B_2496 (release 23.0) Family This protein is found in archaea and viruses. Proteins in this family are typically between 265 to 342 amino acids in length. The proteins in this family are or are related to the m04 encoded protein gp34 of pathogenic microorganisms such as murine cytomegalovirus. m06 and m152 genes are expressed earlier in the intracellular replication phases of these microorganism' life cycles. They function to inhibit MHC-1 loading and export. gp34 is theorized to prevent immune reactions from NK cells which would ordinarily recognise and attack cells lacking MHC. 25.00 25.00 29.40 29.10 23.20 24.70 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.87 0.70 -5.04 11 182 2009-07-31 14:41:40 2009-07-31 15:41:40 3 1 5 0 0 165 0 261.40 22 87.96 CHANGED hShssR......................hhLlhllhhsshlhssssssspp..................Cp....p.ppthpphhphtps............htCahKctss...sphsss..sss.plhsCpLP...sVhVNAoWolEWll.sslpuSlsshuYapSossSsPpFptshhsah..phh.h....pp.ltsKsGF+VDpSs.....GNLaVasNAo.......ssscuV+C+LphChW..Tossshs..ss.........DcshhpshSsVLsLPDYssP.ths....h.+....ssasasptpps.sss..............lssLoVlVsLlaVsshslLhYhas..........spLhRRhhopDssspa .............................................................................h.hhhhs.h....hs...h.ssspt..................ss......p..htt.....p...s..........htC.htppss......pphspp..sss.p.hasCpls...slplNAoWpscWll..sshss.......lsstshapSTsoSsPpFpthhsshhs.............sssssGFpVDpss.....GpLalhssss..........tsstslpCpLplClh...........sps.shh..ss..............................................................ppth.pshothhpLssat...t..........................s.ts.h.s.s...tp..thp........................t..hsslslhlshlh..ssshhh..LhhhYt..............s.hhcph............................................................................................................. 0 0 0 0 +12050 PF12217 End_beta_propel Catalytic beta propeller domain of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is typically between 443 and 460 amino acids in length. This domain is the highly conserved beta propeller of bacteriophage endosialidase which represents the catalytically active part of the enzymes. This core domain forms stable SDS-resistant trimers. There is a nested beta barrel domain in this domain (Pfam:PF12195). The endosialidase protein complexes to form a homotrimeric molecule. 20.30 20.30 21.10 308.50 19.80 17.60 hmmbuild -o /dev/null --hand HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.39 0.70 -5.77 3 30 2012-10-02 00:45:24 2009-08-03 10:19:26 3 7 24 18 2 29 0 420.90 83 47.65 CHANGED NGELaKITDTPaYNAWTQDKTFVYDNVIYAPFMAGDRHGVsNLHVAWVRSGDDGQTWSTPEWLTDLHsDYPT......VNYHCMSMGVCRNRLFAVIETRTLSsNKLpsAELWDRPMSRSLHlTGGITKAANQQsATI+IsDHGLFAGDFVNFSNSAVTGVSGNMTVATVIDKNTFTVTTsNsQsSDlNNAG+oWsFGTSFHcSPWRKT-LGpIPS.ssushsVTElHSFATIDDNuF..AVGYHNGDVuPRELGlLYFSDAFsSPGsFVRRpIPuEYEsNASEPCVKYYDGVLYLTTRGTLuTpPGSSLHRSoDlGQoWsSLRFPNNVHHSNLPFAKVGD-LIIFGSERAFGEWEGGAPDsRYcGSYPRTFMsRVNVNsW..SLDDVEWVNITDQIYQGcIVNSAVGVGSVCVKDuWLYYIFGGEDFFNPWSIGDNsuKhPYKHDGHPADLYCYRlKIc ..QGELFKITDTPWYNAWTQDKTFVYDNVIYAPFMAGDRHGVNNLHVAWVRSGDDG+TWTTPEWLTDLHENYPT......VNYHCMSMGVVRNRLFAVIETRTVSGNKLQVAELWDRPMSRSLRlYGGITKAANQQVAYIRITDHGLFAGDFVNFSNSGVTGVTGNMTVTTVIDKNTFTVTTQNTQDVDQNNEGRYWSFGTSFHSSPWRKTSLGTIPSFVDGSTPVTEIHSFATISDNSF..AVGYHNGDIGPRELGILYFSDAFGSPGSFVRRRIPuE.YEANASEPCVKYYDGILYLTTRGTLSTQPGSSLHRSSDLGTSWNSLRFP.NNVHHSNLPFAKVGDELIIFGSERAFGEWEGGEPD...NRYAGNYPRTFMTRVNVNEW..SLDNVEWVNVTDQIYQGGIVNSAVGVGSVCIKDNWLYYIFGGEDFLNPWSIGDNNRKYP..YVHDGHPADLYCFRVKIK.. 0 0 0 0 +12051 PF12218 End_N_terminal N terminal extension of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is approximately 70 amino acids in length. This domain is found in the bacteriophage protein endosialidase. The two N-terminal domains (this domain and the beta propeller) assemble in the compact 'cap' whereas the C-terminal domain forms an extended tail-like structure. The very N-terminal part of the 'cap' region (residues 246 to 312) holds the only alpha-helix of the protein and is presumably the residual part of the deleted N-terminal head-binding domain. The endosialidase protein complexes to form homotrimeric molecules. 22.20 22.20 22.40 27.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.26 0.72 -4.50 5 30 2009-12-16 16:06:22 2009-08-03 10:28:11 3 9 25 18 2 27 0 66.00 69 7.08 CHANGED AlGDGVTDDTsAlousLuuossGpKIDGpGLTYKVSoLPDlSRFKNARFVaERIPGQPLaYVSE-FI AlGDGVsDDToAlSshLusuosGpKIDGtGLTFKVSTLPDVSRFKNARFLFERIPGQPLFYsSEDFI...... 0 0 0 0 +12052 PF12219 End_tail_spike Catalytic domain of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is approximately 160 amino acids in length. There are two conserved sequence motifs: VSR and YGA. This domain is the C terminal domain of the bacteriophage protein endosialidase. The endosialidase protein forms homotrimeric molecules and this domain complexes into a tail-spike stalk. The stalk region folds in a triple beta-helix that is interrupted by a small triple beta-prism domain. The tail-spike is a multifunctional protein device used by the phage to fulfill the following functions: (i) to adsorb to the bacterial polySia capsule (ii) to de-polymerise the capsule to gain access to the outer bacterial membrane, and finally (iii) to mediate tight adhesion to the membrane, a prerequisite for the initiation of the infection cycle. 25.00 25.00 26.40 113.20 24.00 22.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.86 0.71 -4.39 5 36 2009-08-03 10:56:41 2009-08-03 11:56:41 3 8 24 24 2 34 2 136.90 78 17.33 CHANGED sDsRVSRDFsYGATPNRAIPTFMGTDGVRTVPAPLcFosplss.DlsVsHLTl+ASTSuNIRSEhhMEGEYGFIGKoVPoDsPTuQRLIlSGGEGTSSooGAQITLHGSNSSTuRRAVYNAsEHLFQuuslhPYlDNVsALGGPSNRFTTlYLGSsPIlT .p-paVSRDFsYGATPNRTlPTFMuTsGVRTVPsPlcFoD.....DlsVpuLTl+AuTSupVRAEVphEGsYulIuKpVPoDssTuQRLIVSGGETTSSADGAMITLHGSsSSTPRRAVYNALEHLFENGDVKPYLDNVNALGGPGNRFSTVYLGSNPVVT 0 0 0 0 +12053 PF12220 U1snRNP70_N U1 small nuclear ribonucleoprotein of 70kDa MW N terminal Assefa S, Gavin OL lg7 PFAM-B_2533 (release 23.0) Family This domain is found in eukaryotes. This domain is about 90 amino acids in length. This domain is found associated with Pfam:PF00076. This domain is part of U1 snRNP, which is the pre-mRNA binding protein of the penta-snRNP spliceosome complex. It extends over a distance of 180 A from its RNA binding domain, wraps around the core domain of U1 snRNP consisting of the seven Sm proteins and finally contacts U1-C, which is crucial for 5'-splice-site recognition. 21.90 21.90 21.90 22.60 21.50 20.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -3.66 37 328 2009-08-03 10:59:28 2009-08-03 11:59:28 3 7 272 0 222 311 0 93.50 35 26.69 CHANGED MsptLPPsLLsLFtPRPPLpalsPh-psscc+ppst..loGlupaL..sthpchpc......p.s....ps..sEohhp++tRc+cEKppphppplpcplppasPppDs .......MTphLPPNLLuLFsPRPPl.alPPh-+hP..c..c++pst............hsGlAsal......sph..cc.p-........................sss.......st..sEoppp+pcRc+.cEKh-ptp.pcl.cpplpp.acPppDP................................. 0 74 122 185 +12054 PF12221 HflK_N Bacterial membrane protein N terminal Assefa S, Gavin OL lg7 PFAM-B_2550 (release 23.0) Family This domain is found in bacteria. This domain is typically between 65 to 81 amino acids in length. This domain is found associated with Pfam:PF01145. This domain is the N terminal of the bacterial membrane protein HflK. HflK complexes with HflC to form a membrane protease which is modulated by the GTPase HflX. The N terminal domain of HflK is the membrane spanning region which anchors the protein in the bacterial membrane. 20.50 20.50 20.60 20.60 19.90 20.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.22 0.72 -4.21 70 1238 2009-08-03 15:45:40 2009-08-03 16:45:40 3 3 1224 0 245 716 1088 56.70 53 14.02 CHANGED MuWN-Ps......ss.......p..pDPW....................Gp..sp..G....PPDLDElhRchpc+lsuh...FG....Gp...u ................................MAWNpPG................NNs......pcpDPWGssp..........................pss+GG+...-QG...............PPDLD-lFRKLs+KLGGh....hG..GKG............. 0 46 115 184 +12055 PF12222 PNGaseA Peptide N-acetyl-beta-D-glucosaminyl asparaginase amidase A Assefa S, Gavin OL lg7 PFAM-B_2578 (release 23.0) Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 558 and 775 amino acids in length. There is a conserved TGG sequence motif. PNGase A is a protein which cleaves glycopeptides. 19.70 19.70 26.80 21.00 19.00 18.70 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.53 0.70 -5.61 9 253 2009-08-03 15:52:16 2009-08-03 16:52:16 3 6 146 0 168 266 3 347.50 26 62.87 CHANGED EsFplh.P.ssTs.......hp.hlhhstVFs.ShhpPaVs..lYhPPs....sashlhlNloV.pSpGpQaDRhAhhalssl.VFhsSTtE.ss.....Th.pDhohapsLhpts.ph.hsLuNhhscs..hTG.assslThhh..t...ssphsshhlPl.spto.shNh..uall..Psssshsp.h.lPssT.Rull.l.tpG.us.-EFWaSNl...................shREIQLYhDGlLAGVV.PaPlIaTGGls.haWRPlsuIsshsh+p..pIDlTPhLslhpcus...hplpVssLpsut......lososu..WsloGslhLals..............pus.lhuspPhhshsRhl...sh.ssGhsushpYp.suphplsh..........u.hpacpulcaSssu.hsp.shsQs.....hhptshlspphtt.s.....hpssplhash..Pl ............................................................................P.......................................sthhlhptsF....u.oh.s.p.P.hh...............hs.P..s.s............s.........s...a.....s....h...lllphps..p....spGpQaD.Rl..shla...ls......ss-lh....RoSTsEP...p.s.t..u...l..h.Wsh.KDlopY..sLh.....p...t...s..t..p..lhh.L.sNllssp........hTG.apsslohtha................................t......t................................................................t................................u.........Dhll................P...lstt.............s..........h.l.................s.tpp..h.......t...ph....lP.....p.NshRshlplhs.ssp..us..-.EF.W..Ys..Ns.................................................u...u.......saREV.lh.l.........Du....plsGshhPaPlIaT.GG..l...s.....Ph.hWcPlsulsuFsl.s.hcl-lTPaLsh.L...hD.up.......HphthpVs....s.h....................................................................Whlsutlhla.s......................t....h..........................t..h..................t...t.........h...........h.........h...t...h........................h.p..thth.....p....tt...h...................................................t................................................................................................................................. 0 42 102 147 +12056 PF12223 DUF3602 Protein of unknown function (DUF3602) Assefa S, Gavin OL lg7 PFAM-B_2582 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 78 and 89 amino acids in length. 20.60 20.60 21.60 20.60 19.80 20.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.41 0.72 -3.59 33 384 2009-08-03 16:04:58 2009-08-03 17:04:58 3 4 131 0 289 349 0 70.60 22 57.83 CHANGED NIh..ssp......tspsssttls..pt..osshpsst......a.oTGRGGuGNht.....ssp..thsRphpDh-..h.css.cpsph..............sGRGGsGN ..................................................................t...p..ss.tsst...............a.soGRGGAGNht........t.p..st.pR.t.psh-.....p.t..t...........................hs................................... 0 77 157 240 +12058 PF12224 Amidoligase_2 Putative amidoligase enzyme Bateman A agb Iyer L Family This family of proteins are likely to act as amidoligase enzymes [1] Protein in this family are found in conserved gene neighborhoods encoding a glutamine amidotransferase-like thiol peptidase (in proteobacteria) or an Aig2 family cyclotransferase protein (in firmicutes) [1]. 23.60 23.60 25.70 23.60 23.50 23.20 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.69 0.70 -4.77 53 463 2012-10-02 17:21:26 2009-08-04 10:24:11 3 7 264 0 248 457 70 231.20 19 63.15 CHANGED pppphGlElE.hs.......GlshppsuphlsphhGs.psppt..............................thtthpl...................tssph............spaclhhDuSlhppppttt...................................................hshElVoPslphpp....lpplpclhctL+csGAhs.ssS..sGhHlHlsspshs...spslpshlphhhthpchlhcthplsh...R+hs.as..cshspphlcp..........hh....ssshpplhchah..tsscsps.......sth.ttphhpt.lshp.hhp.+s...TlEaRh.s.........uslcsschpthlphshtls .................................................................................................................t...phGlElE.hh..........................................shs.tp.ph.sp..htphhss...ph.................................................................hth..............................................................................................t..ph..................................tpWplhtDu.Sltstpt.........................................................hshElVSP.hlthpp......hp.plp.pl............hp............tL....cpt.s.shs.spo..........CGhHlHl...s..........s..............s.s...............h.....s........hps..l+s.lhph...h...hhhE.sh...lhphhshs............c+...p..a.....s.......ps.hs.pt.hhpp............................h......sht..tl...p.....hhh........p....t...................h.tt......ptath...lNhpthhp.cs...........TlEaRh.s..............ushptsphpthlthshhh......................................................................................................................... 0 90 169 223 +12059 PF12225 MTHFR_C Methylene-tetrahydrofolate reductase C terminal Assefa S, Gavin OL lg7 PFAM-B_2600 (release 23.0) Family This family is found in bacteria and archaea, and is approximately 100 amino acids in length. There is a conserved NGPCGG sequence motif. This family is the C terminal of methylene-tetrahydrofolate reductase. This protein reduces FAD using the reducing equivalents from reduced FAD, subsequently reduces tetrahydrofolate. The C terminal of MTHFR contains the FAD binding site and is the catalytic portion of the enzyme. 20.20 20.20 24.20 23.50 19.60 19.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.58 0.72 -11.03 0.72 -4.49 33 210 2009-08-04 10:43:33 2009-08-04 11:43:33 3 6 173 0 95 202 48 93.30 36 30.57 CHANGED lhsuhssh.hhusppththhhpp..CpsCGpChLspTGslCPhs.CPKuLhNGPCGGs.psG.+CEVss-.tcClWphIYcRlcphsph-p.lpplhPPt-appp ..............h...t.sphhhthppththhh.t..CpsCGpChLspTuhlCPhs.CsKsLhNGPCGGs..psG.pCEls......s.....p..tcCsW.hhac+hcthsph-t.htpl.hsshshp.h................ 1 52 74 83 +12060 PF12226 Astro_capsid_p Turkey astrovirus capsid protein Assefa S, Gavin OL lg7 PFAM-B_2608 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 241 and 261 amino acids in length. These proteins are capsid proteins from various astrovirus strains. 25.00 25.00 29.10 28.40 18.70 16.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.45 0.70 -5.12 3 174 2009-08-04 10:47:39 2009-08-04 11:47:39 3 3 7 4 0 161 0 208.40 83 61.93 CHANGED YFVYDFQGcRVSTTETGVFCLuSIPAADSKSRYNNQIToPSIGYRN-SGTGTSFALDuASWWNILDVTQTGVIFGQPRLGVGVIQTMKTLKQHIKDFTEPAVKKYYPGTTNLDQpLKsRLNLAEGDPVISMGDTTGRRAALFYRTSDERFILLFSTTDDPGuQYpSLKMLsFWNWSLSDsKsAFLA+LRTVQFANLs....-SEsupCDSDDDDLSDVTSLFEQADLGDETDFK ......................YFVYNFQG-RVSTTETGVFCLAAIPAATTTSRYNNQITTPSIGYRNASGTGTSFLLDAASWWNILDVTQTGVLFGQPRLGVGVhQTMKTLKQHIKDYTEPAIQKYYPGTTNLDE..Q..LKQRLNLAEGDPVISMGDTTGRRAALFYRTSDEKYILFFSTTEDPGAQYQNLKMLYFWNWSYSDTKQQFLDHLRTVQFANLD....DSQPAPYDSDDDDLSDVTSLFEQADLGDETDFK.. 0 0 0 0 +12061 PF12227 DUF3603 Protein of unknown function (DUF3603) Assefa S, Gavin OL lg7 PFAM-B_2609 (release 23.0) Family This protein is found in bacteria and eukaryotes. Proteins in this family are about 250 amino acids in length. 23.70 23.70 52.10 51.80 23.50 23.50 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.37 0.70 -5.20 9 135 2009-08-04 10:58:43 2009-08-04 11:58:43 3 1 135 0 20 77 0 214.10 77 86.22 CHANGED LLDQVPLLKVss.LacYIEN-LS-LPppLLp-VaQKuYlRKNHERhQL-YCFVVTDGpsIlAVDTlGYpIPIRKSRLIPRQEQLVYEMlcstcs.pYphp.cphp..KEYHILSPsPphhsGLTRKERQLKQLLFMALDQL+oopNpAEIRYWhTEWsPppYspIQpMsFE-sWppLYcEs+pGWS-+H.phCEsLIKGQPFFEKLWEhEptsKVN .LLDQVPLLKVDuTLYHYIENELLELPQKLLEDVHHKAYIRKN..HERL..QQEYCFVVTDGKG..IIAIDTIGYNVPIRKSRLIPRQE.....QMVYEMVENVQAE..KYEFQlEEhE...KEHHIL....SPSPalMNGLTRKERQLKQLLFMALDQLHTTKNTAEIRYWaTEWDPSAY..GhVQHM-FEDIWs+LY-EAKsGWS-KHEQLCERLVKGQPFFEKLWEMENEpKVN.............. 0 3 12 14 +12062 PF12228 DUF3604 Protein of unknown function (DUF3604) Assefa S, Gavin OL lg7 PFAM-B_2610 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 621 and 693 amino acids in length. 20.40 20.40 20.80 20.60 19.80 20.30 hmmbuild -o /dev/null HMM SEED 592 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.00 0.70 -6.35 34 171 2012-10-03 00:45:34 2009-08-04 12:52:57 3 2 97 0 20 168 1789 367.20 26 62.25 CHANGED ssscpsYaGDhHlHTshShDAhhhGs.phsP--AYRaA+G-sl......tps.sG....h.spLscPLDFhsVoDHA-hhGhhpthhssssth.....tpshtp.h.thhhpss.pssttshhhhsshsss.hspsht...tss.t....................htpssWpphlpsA-paNcPG...pFTshlGaEWTutss..usNLHRsVIaRsssphs.phlPFos...hpSssPccLWsahcshcppsuup.....hLAIPHNuNhSNGhMF..thsshs...GpslDtsYAcpRt+hEPlsElTQlKGsSE....sHPhLSPsDEaAsFEh..achushshssttssphh........sYsRsALpcGLplEpphGsNPYKFGhIGSTDoHTuhso.s-EsNFaGKhus.spss..ptphs............tt.............ppthph......uASGlAuVWAc-NTREAIaDAhcR+EsYAToGPRlpVRFFuGasaspsshpss-hsptuYucGVPMGusL........ssup.APsFLlhAh+D.....P.puAsLDRlQllKGWl-.ssGpspEpVYDVAh..Scspphssssph.sslGsoVDlsssoaosslGAsELpslWpDPDFcssQpAFYYlRVLElPTsRWosaDAl+hGhp...........ssPt..TIQERAaoSPIWYsP ............................................s...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...........huuGLsuVaApcpoR-ulh-Ahtp+csYAToGsRlh..lpF.............t................................................................................................................................................................................................................................................................................................................. 0 8 10 14 +12063 PF12229 PG_binding_4 Putative peptidoglycan binding domain Bateman A agb Bateman A Domain This domain is found associated with the L,D-transpeptidase domain Pfam:PF03734. The structure of this domain has been solved and shows a mixed alpha-beta fold composed of nine beta strands and four alpha helices. This domain is usually found to be duplicated. Therefore, it seems likely that this domain acts to bind the two unlinked peptidoglycan chains and bring them into close association so they can be cross linked by the transpeptidase domain (Bateman A pers. observation). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.30 0.71 -3.85 117 1823 2009-08-04 15:04:53 2009-08-04 16:04:53 3 16 849 4 342 1540 49 115.10 15 32.20 CHANGED hshsspcls.hph.shsphlppshshstssshh.phhtphht.........phs....hplshccpt....lpphlspl......spphsptsp..sAphph......ssss...hsls.s.pphG....hpl.ctcph...hpplhpslpss.ppshp ...........................................................................................t.pp.lt..hth..phpphlpt.hhsh..pht.t.ph.h....phhtp...phtt..............phs.........hphshDp.pt....l.p.sh.ls.pl......spph.sstsp...suphph......ss.ss......hslt..s...tp....G....hpl.Dtcph.tptlhpslpst.pt................................. 0 165 288 322 +12064 PF12230 PRP21_like_P Pre-mRNA splicing factor PRP21 like protein Assefa S, Gavin OL lg7 PFAM-B_2642 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 212 and 238 amino acids in length. The family is found in association with Pfam:PF01805. There are two completely conserved residues (W and H) that may be functionally important. PRP21 is required for assembly of the prespliceosome and it interacts with U2 snRNP and/or pre-mRNA in the prespliceosome. This family also contains proteins similar to PRP21, such as the mammalian SF3a. SF3a also interacts with U2 snRNP from the prespliceosome, converting it to its active form. 24.50 24.50 24.60 25.90 24.20 24.40 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.53 0.70 -4.85 16 364 2009-08-04 15:43:10 2009-08-04 16:43:10 3 14 282 1 262 366 5 219.60 28 38.44 CHANGED spshppplcpt..ps.....lLccshpRu-atcpp+ppcpcpc-tpEcc+hpaAuIDWpDFslVtTl-Fsct-.ht-LP.P.lshspLhttoLpp+ppshp..tpss......................pstpcccs-psspsssss..........pthphtsuspuplKlpcshpspspp.ts...........hhhCPIoGchIPtschspHh+l.LlDP+...a+cQ+cph.t+ppp...ophssspVhpNlKpluc+ ...................................................................h......plppp..ps.hp...lL-psh.Rs-Wt+apcpp+p+c--ctEpE+ltaAp..IDWHDFVVVET.lcFp.........s-...psphPs...........P.ho..p..-l.ttt.L...p....ct.......h........t....t..tptht.....t.......t............................................................................p..p.pt..c..ct.tpspt.t..................................t..h...s..s..s....s....t...s.th......hlpcsYssptpp.h..............ss.thhlsPlsuppIPhschpcHMRItLlDP+...WhEQ+c+....tch.tp....s.hss.-ltpsL+pLAp...................................................... 0 89 148 218 +12065 PF12231 Rif1_N Rap1-interacting factor 1 N terminal Assefa S, Gavin OL, Eberhardt R lg7 PFAM-B_2647 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 135 and 146 amino acids in length. Rif1 is a protein which interacts with Rap1 to regulate telomere length. Interaction with telomeres limits their length. The N terminal region contains many HEAT- and ARMADILLO- type repeats. These are helical folds which form extended curved proteins or RNA interface surfaces. 23.00 23.00 25.60 23.50 19.20 20.50 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.00 0.70 -5.49 34 226 2009-08-04 15:44:22 2009-08-04 16:44:22 3 4 185 0 157 234 1 323.80 22 19.96 CHANGED QLAGsscsS+lDAYhp..LhsuLpuhsslPsptsltpKhsLhspaIpRDls............tp.spus..-spLsppALpLLshhlapspIuuplssDFsh.....Fll-+slsshpssshPKslspchhpllupQsFss+lhTssRls+llssLcsIspp.lpGpullhpRLsIYp+LlpQstphMhscus.Whs......pllsshLpshK-lRpp.AlsLuhphuhslG.....................pppplu+slh-lhspslpsp...phhp.htp+LppMhss..cs...ushVPpIWulllLLL..Rsth..lcpWpahppWlhllQtCFNss-...sKhpAhhAWs+hla.....shpssppsspchlphLhpPl.hsQLc++pssc.t....phpphslsuhtsLL..YYuF+Pss..s.phLDhhWsphl ...........................................................................................................hcsa.....h.t.hhph..pt.......tl...pphshhhphhpp..clt..................s........s.pLsptALphlshhlhpsplsstlspp.h...................l.pslp...sh..ts......sKslhpthhhl.....luh.........QpFs....sc........lh..s.pt..h.....sp..h.lhshhh.h.pp...hputsl.hptlt...............lhh.......pLlpph.t...Mhpp...ss...hWht............................llshhl.p....s..spclp.p.....A.....hslt....hth.s.h.hlt...............................ppt.l..uphh.t.phhspt..h......sp............lpphhts..ps...tshs.plWshhlhLL...tpph.........hcshsahsphLpl..phsFps.s...ss..h+t.AhhuWctll......................shp.....s.h..s..s....t..chlch.LhpPl...p.lcpct.s.h.t....phh.hllh.....pltshl.....h..sF...c...ss...s....t.h.................................................................................................. 0 31 70 124 +12066 PF12232 Myf5 Myogenic determination factor 5 Assefa S lg7 PFAM-B_2654 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00010, Pfam:PF01586. There is a conserved CSD sequence motif. Myf5 is responsible for directing cells to the skeletal myocyte lineage during development. Myf5 is likely to act in a similar way to the other MRF4 proteins such as MyoD which perform the same function. These are histone acetyltransferases and histone deacetylases which activate and repress genes involved in the myocyte lineage. 21.70 21.70 21.70 21.70 21.30 21.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.87 0.72 -3.37 28 259 2009-08-04 15:48:27 2009-08-04 16:48:27 3 5 107 0 76 227 0 63.60 47 25.66 CHANGED .paYuus.....S-uoSPp.SsCSDGMsDhsu.P.shopcpssa..sus.Yhspsss.sspssKsssl.SSLDCLSSIV-RI ..............................paYShs.....S-soSPp...SNCSD.G.Mh..........-hsu...P.shspcpssa......cus.Yhspsss...s....tss.+sssl..SSLDCLSSIV-RI.............. 0 9 13 35 +12067 PF12233 p12I Human adult T cell leukemia/lymphoma virus protein Assefa S, Gavin OL lg7 PFAM-B_2655 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are approximately 100 amino acids in length. p12I binds to the immature beta and gamma-c chains of the interleukin-2 receptor retarding their translocation to the plasma membrane. p12I forms dimers which bind to these chains. 25.00 25.00 138.40 138.20 18.10 18.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.66 0.72 -3.72 2 51 2009-12-01 16:56:24 2009-08-05 16:42:23 3 1 3 0 0 51 0 97.90 96 100.00 CHANGED MLFRLLSPLSPLALTALLLFLLSPGDVSuLLLR.PPAPCLLLFLPFQILSNLLFLLFLPLFFsLPLLLSPSLPITMRFPARWRFLPW+APsQPAAAFLF MLFRLLSPLSPLALTALLLFLLsPGDVSGLLLRPPPAPCLLLFLPFQILSNLLFLLFLPLFFSLPLLLSPSLPITMRFPARWRFLPWRAPSQPAAAFLF 0 0 0 0 +12068 PF12234 Rav1p_C RAVE protein 1 C terminal Assefa S, Gavin OL lg7 PFAM-B_2692 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 621 and 644 amino acids in length. This family is the C terminal region of the protein RAVE (regulator of the ATPase of vacuolar and endosomal membranes). Rav1p is involved in regulating the glucose dependent assembly and disassembly of vacuolar ATPase V1 and V0 subunits. 20.80 20.80 20.90 20.80 17.50 20.30 hmmbuild -o /dev/null HMM SEED 631 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.08 0.70 -6.36 13 405 2009-08-05 15:44:38 2009-08-05 16:44:38 3 26 270 0 281 395 1 467.30 29 21.95 CHANGED Ghl+oac.Ahl.....sscppclcWhhs.pplpTGIpNsoLhoGSS.hsKsulV...Dps+ppLTIWDs+puh....LEYcccF......c-pIpDLDWTSTscsQSIlulGFsp+VlLhoQLR.....YDYsNpsPoahsI+clsIpshTsHsIGDSsWhssGslVVuoGNQLFlhD+pl-........spsolsophhhss......DLhclsShLNGPL.PVYHPQFLhQslluGKlsLV+cILLpLappL+hhp..ps.hc-l-SsLsls.pcFh...pss..htth.........................sc.hpshscsluusLsEpLo.chsLPhLopHpQhpLhsllEsVspVpKpccshD.NuhRFlLsh+.....ah.++sp....psSlohR-lsWAhHScsp-ILhshlspphp..h.WpcAREstlhhWhp-.ssLhspFEs.lA+tEaoKs-c+DPscCulaYLAL+KKpVLpuLWRhAshH.EQsphh+FLuNsF.sEsRW+TAALKNAaALLSK+RYhhAAuFFLLADsL+DAVNVlhpQLcDlsLAIAVsRVYEGD.sGPlLtElLcpclLPpAhp-usRWhsSasaWhL+++-hAlRALloshhsLhpspshssp..s....hsKSFLs-DPALllLYppLRpKsLpplpGu...cVss+hEa-hlL+suclYsRMGCDhLALsL ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................th..................................................................................................................................................t....t.hh.s......ph.s.t.lpt.Lh..p..tLP.LophpQh.Lhslh....-sltt.h..........p............p....p...........p...........c.........o.....lD-sGh..R.al.....Lsh+...................ah........hhpsp.....................h.p..p..s.....l..shp.....c.....hsWAhHSp..u.p.-.L.......ls.hl.st.tcs.............p.pWpp.hRthGlsaWlcs..ps.L..................Rpph......Ep...lA+....st...ap.....c..........................t....p....DP..l.D.s........ulaYLA.h+.K.K.sllhGLaR................spcp...p+htpF.h.u.psF....s-sRWRpA.ALK.NAauLLuKpRFc...................huAAFFLLA..s..s..L+DAl........pVCl..pplpDlQLAlsIuRl..YE...................u......-......s.................u.......s.......s...h..p...........p...lLppc.lL........s.......................s.............p............p...................s...........s...........ahtShsaWhlpchs.A..lcsLlp......p.........p....................................................................................................................................................................................................... 0 95 142 225 +12069 PF12235 FXR1P_C Fragile X-related 1 protein C terminal Assefa S, Gavin OL lg7 PFAM-B_2701 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 126 and 160 amino acids in length. The family is found in association with Pfam:PF05641, Pfam:PF00013. This family is the C terminal region of the fragile X related 1 protein FXR1P. FXR1P contains two KH domains and a RGG box that are characteristic motifs in RNA-binding proteins as well as nuclear localization and export signals. FXR1P is thought to regulate mRNA transport and translation. 26.60 26.60 28.30 27.70 23.80 26.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.45 0.71 -4.11 9 307 2009-08-05 15:45:56 2009-08-05 16:45:56 3 10 42 1 87 266 0 115.40 58 24.25 CHANGED VEQLRLERLQIDEQLRQIGhG.RssssRs......-+E+GYhoD-u....ssol+soR....sYssRGRGRRGsshsp...GTNSEhSNsSETES-++cE.Sshuluup-.-..Rps..ptt.stRR..tGtGRG..uGRGR.Gss+ssssohuSshcs.DsNPY.SLhcssEss ...........V-QLRhERLQIDEQLRQI.G.u.....ps.st.........tptth...-ts........p.u.oR....sYts.R.G+GR...RGss..hs.o..........................GTNS-..hSNsSETES-++-ElSDhSlusp-.-....R-....s....h.pRs.stRR.....GGtGRu...uuRGR.....G...s.+sspsp...Shhcs.spNPh.uhhcss-s.p.................... 0 6 12 33 +12070 PF12236 Head-tail_con Bacteriophage head to tail connecting protein Assefa S, Gavin OL lg7 PFAM-B_2709 (release 23.0) Family This family of head-tail connector proteins is found in bacteria and viruses. Proteins in this family are typically between 516 and 555 amino acids in length. This protein is found in Phage T7 and T3 among others. 19.90 19.90 20.20 20.70 17.70 19.80 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.11 0.70 -6.12 28 301 2009-09-11 16:20:59 2009-08-05 16:50:28 3 2 253 0 33 273 923 417.40 20 87.51 CHANGED appl+scRpsacs+hc-hucaslPthhshtss......ptsppphpphapususculpsLAupLhtulhPsspsWF+Lslss......pt.spttsplcthLsplp+h...hhphhps..ssahssltpshpcLlssGsuhlhhtp......p..shsthpshslspaslpcDs.pGs.l-hlh+.......t..pchuh..cslspplppthpst........cspcplclhptlh.c..............spsh.asshh.spstts..hlpcushcphPalssRapchsGE....sYGcu..st-hLsDl+sLpplpcthlcutphsspshhllsssuhhpstsltsuutshh.stsstsshshhsl.pptsc.hssuttslpslcpRlppuahhs.h.......psupchTApElphpspEhtphLGslhuthspEhlpPllpRshthht..+suh...lPs.....hPp...th.......lcsshhosLstht+tt.shsulpphhshlutluthtsslhstlshsphhctlusthGhs.ssllpoppp .................................................................................................................................................................tthcstR.psacsphpchtchhhPphh.h.pt.......................tppp...pphhpssustuhp.Luutlhtslhs.sp.ahclph.p...............p..tt..lc.hlptlpph...h.phhpp.....sshh.th.phhhplhshGsuhh.hl.p..........t.th.thp.hslsphhltpss.pu.p.lshlh+..........pc.hsh..stlscphp.pthpp..........................p.pcpl...cl.hphlh.p.................tpshsatshh.......tshp........hhpp.sshpthPhhssRa.hhss-....sYGpu..s.phlsshctLp.hppthhphhthshp.s.hh.sssshhhp...tsltsushshh...s........s....ttt...thh.h..........ss.hsssht.hpphcpplppshhhs.h.........tsspphTApElh.htpEhtthLGshhphhpp-hhtPllpphhthh...cts.............ls........Pp.......h..............h...c.ph.oshstht+t..thssltphh....th...h...utl..hth...s.p..h.h.s..tlshsthhpthsth.Ghs..thhhs.t................................................................................................ 0 12 23 30 +12071 PF12237 PCIF1_WW Phosphorylated CTD interacting factor 1 WW domain Assefa S, Gavin OL lg7 PFAM-B_2805 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 180 amino acids in length. This domain is the WW domain of PCIF1. PCIF1 interacts with phosphorylated RNA polymerase II carboxy-terminal domain (CTD). The WW domain of PCIF1 can directly and preferentially bind to the phosphorylated CTD compared to the unphosphorylated CTD. PCIF1 binds to the hyperphosphorylated RNAP II (RNAP IIO) in vitro and in vivo. Double immunofluorescence labeling in HeLa cells demonstrated that PCIF1 and endogenous RNAP IIO are co-localized in the cell nucleus. Thus, PCIF1 may play a role in mRNA synthesis by modulating RNAP IIO activity. 25.00 25.00 25.20 25.20 23.90 24.20 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.32 0.71 -4.98 14 172 2009-08-05 15:54:26 2009-08-05 16:54:26 3 8 108 0 110 164 19 167.70 39 24.91 CHANGED KLcpLYcps.......s.c..stc.phFhtplaslLhRY.pshh....st...usGhQuALstsVF-sL+ppasVohECFASPLNsha.....tpaCSAF.......sDsDtaFGShGsFhs..apPspG....SFEsNPPFscElhsthsp+htplL...........ss.uppsLSFlVllP.tWtpssh..hpphppStappps.hllsutpHtYhcG ........................................................t.....stt.chF.h.+lasLLhRY...pshh........Gs...t.............t...upuhQuulPspVF-sL+ctFs.........Vs..hECFASPLN..saa...........................ppYCSAF.......sDTDsaFGShGs...hh-......FpPhuG....................SFEsNP...........PFscELhcthssHh.c.cLL.................ts.ospPLSFl..VhlP.pWcps.s...........hpphcpShapRpp....hll.shp.HtYhpG....................................................................................... 1 49 62 89 +12072 PF12238 MSA-2c Merozoite surface antigen 2c Assefa S, Gavin OL lg7 PFAM-B_2755 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 263 and 318 amino acids in length. There is a conserved SFT sequence motif. MSA-2 is a plasma membrane glycoprotein which can be found in Babesia bovis species. 20.60 20.60 56.90 20.60 20.20 20.20 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.41 0.71 -4.65 14 159 2009-08-05 15:54:32 2009-08-05 16:54:32 3 2 1 0 4 148 0 182.00 36 75.26 CHANGED VsMPSDsSpDALsAhh-ILcslK-clPFcTSLFDptVLpsL-hQ-sDplFcSLl.RVsLIKphLotFNuFLN..DNPt+hLssppsEMTKYYKKHIsscDspVKDYshLVKFCNDFLDSESPFM+hYKthNpY-EL...VcKtPupssSPs.SSsQusoss.pPupsssu.soossstsus.s.............psscPAcospss.............uuSFTFGGLTVA .....................................VsM.ussupcshcALhtlhtllKpcsPFpTS.FDs.slc.hshQss--lFKhLl-ulhhhchhlpchNuFls...pssch.sphcschpcYYhcpIhscpup...h.shstLsphhpsFlsspushhhh..........cth.........ppa..........-...........s+.Kt.tps..p.h.sSspspsps....pPupspso............................................AspPs+PApospss.................GSSFTaGGLTVA....................................................... 1 4 4 4 +12073 PF12239 DUF3605 Protein of unknown function (DUF3605) Assefa S, Gavin OL lg7 PFAM-B_2795 (release 23.0) Family This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 161 and 256 amino acids in length. 23.30 23.30 23.60 23.90 21.80 23.20 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.13 0.71 -4.39 21 242 2009-08-05 15:54:41 2009-08-05 16:54:41 3 9 139 0 185 240 3 143.70 31 61.71 CHANGED hsWppl+cIlpsNcL-hhpR.PStLc+YlcapccltupY.GSlhcallpc+Lt..W.s.sp.cth........tstPFt.spDh+lLhNDaPYulE.slsHLVVWoKhtL..sDsso..sDlsscuRpcI-cFVccsFhp+h........ss-pVlWF+NWpuLpSV+ulEHh.HVhlhp .................................sWpplppllt....p..sp...LphhpRtssphc+Y.ht.aptplctp.a.uolhpalltp+Lt..W.t..s..t......................................shs.F.tp....s......sD...h+lLhNDWPYul..p.sl.sHlVVWoKh.l....s.c.ts................scho.psRt.lppalpcp.Fht+h...............stpplhWF+NahsLpSV+ulpHhHVhlhs.............................. 0 53 101 154 +12074 PF12240 Angiomotin_C Angiomotin C terminal Assefa S, Gavin OL lg7 PFAM-B_2808 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 197 and 211 amino acids in length. This family is the C terminal region of angiomotin. Angiomotin regulates the action of angiogenesis inhibitor angiostatin [1]. The C terminal region of angiomotin appears to be involved in directing the protein chemotactically [2]. 23.20 23.20 23.30 24.20 22.50 23.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.51 0.71 -4.82 8 207 2009-08-05 15:55:09 2009-08-05 16:55:09 3 5 58 0 103 167 2 196.00 58 24.48 CHANGED YVEKVERLQpALsQLQAACEKREQLEhRLRTRLEpELcSLRsQQ+Qupssuuosus.....hssssLpEpLREKEERILALEADhT+WEQKYLEESsMRpFAMDAAATAAAQRDTT..IIsHSPssSh.soSh.......pEElhsAN+RsQEMEsRIKsLaAQIlEKDAhIKVLQQRSR+-suKs-psS............LRPA+SlsSI.su...tsoshpu+spoLoss .....................YVEKVE+LQQALsQLQAACEKREQLEhRLRTRLEpELcuLRhQ.Q......R......Q...u........sst.ss..s.ssp................hsAssLhEh.LREKEE+ILALEADMTKWEQKY.LEEssMRpFAhDAA...........ATsAAQ.R..DTT.....lIsHSsssSh..soSh...........pEEllhAs+Rpt-MEsR...IKsLHAQIlEKDAMIKVLQQRS.....RK-s.uKss..psS............hRPA+Sl.SItsA.....ssGh.uhpoo.p..s............................. 0 15 25 53 +12075 PF12241 Enoyl_reductase Trans-2-enoyl-CoA reductase catalytic region Vella Briffa B, Coggill P pcc Pfam-B_10602 (release 10.0) Family This family of trans-2-enoyl-CoA reductases, EC:1.3.1.44, carries the the catalytic sites of the enzyme, characterised by the conserved sequence motifs: YNThhhFxK, and YShAPxR. In Euglena where the enzyme has been characterised it catalyses the reduction of enoyl-CoA to acyl-CoA in an unusual fatty acid pathway in mitochondria. the whole path performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. 30.10 30.10 30.30 31.70 30.00 30.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.32 0.70 -5.38 49 655 2012-10-10 17:06:42 2009-08-06 14:20:28 3 4 531 5 162 543 120 236.20 56 59.63 CHANGED ++sGoAGWYNssAFcctAcptGLaA+SlNGDAFSsEhKppsI-hIKp-h.Gp.lDLVlYSLAoPhRpcPc....TGclapSsLKPIGpsaospslD.hccc...plt-sol-PAo--EItsTlpVMGGEDWphWlcALpcAsVLA-Gs+TlAYSYIGs-lTaPIYhcGTlGcAKcDL-psAtslspcLus.hsGpAaVoVhKAlVTQASuAIPlhPLYluhLaKVMK-cGhHEGCIEQhpRLFpcpLY ......t.KsGTAGWYNsAAFc+hAcptGLYA+SlNGDAFSsEhKppsI...-hIKp.....DL...G...p.VDLVVYSLA....SPhRpp.Pc.........TGEla+SsLKPIGcs......h.oupulD..Ts+-....slhEsolEPA........T.-p.EIpsTVsVMGGEDWchWI-ALtcAGVLA-GsKTlAaSYIGs-lTaPIYacGolG+AKhDLDpsupuLsp+Lus.hG....GsAhVuVLKAVVTQASS..AIPsMPLYluhLFKVMKEcGsHEGCIEQl.RLFp-+LY............... 0 43 84 122 +12076 PF12242 Eno-Rase_NADH_b NAD(P)H binding domain of trans-2-enoyl-CoA reductase Vella Briffa B, Coggill P pcc Pfam-B_10602 (release 10.0) Family This family carries the region of the enzyme trans-2-enoyl-CoA reductase, EC:1.3.1.44, which binds NAD(P)H. The activity of the enzyme was characterised in Euglena where an unusual fatty acid synthesis path-way in the mitochondria performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. The full enzyme catalyses the reduction of enoyl-CoA to acyl-CoA. The binding site is conserved as GA/CSpGYG, where p is any polar residue [1]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.64 0.72 -4.19 44 648 2012-10-10 17:06:42 2009-08-06 14:30:23 3 3 537 5 165 901 265 78.60 60 19.95 CHANGED lIcP+l+GhICposHPhGCptsVppQIsYl+s...pstl.sG...PK+.......VLVlGASoGaGLASRIssAF.GssAsTlGVhFE+sso.- .....................................IIKP+lRGFICsTsHPsGCEtsVccQIsYlKs..........pGsI..tsG............PK+...............VLVlGASoGYGLAuRIuAAF.GuGAsTlGVFFE+suo......................... 0 43 86 123 +12077 PF12243 CTK3 CTD kinase subunit gamma CTK3 Wood V, Coggill P pcc Pfam-B_12814 (release 23.0) Domain The C-terminal domain kinase (CTDK-1), is a three-subunit complex comprised of Ctk1, Ctk2, and Ctk3, that plays a key role in regulation of transcription and translation and in coordinating these two processes. Both Ctk2 and Ctk3 are regulated at the level of protein turnover, and are unstable proteins processed through a ubiquitin-proteasome pathway. Their physical interaction is required to protect both subunits from degradation, and both Ctk2 and Ctk3 are required for Ctk1 CTD kinase activation [1]. The mammalian P-TEFb is mirrored by the combined complexes in yeast of the CTDK1 and the Bur1/2 [2]. 33.70 33.70 33.70 33.70 33.50 33.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.55 0.71 -4.54 9 129 2009-08-07 14:05:10 2009-08-07 15:05:10 3 5 116 0 105 123 0 134.20 37 32.69 CHANGED hDPFEVRhpFoshLp+LsAShpShpKAApaALKa.pDh-EDLasCILEpLEcs..shNsRsNIhaFl-p.hsE.shpps......pssYlchlpRDlh+lVDsVsPcsssst.ANlpsVR+lLpslpp+phls.pplp-l..........tsLcsRc ........hDPFEVRhpFos.Lp.+LsAShpShQKAApaALKa..+-hsEDLasCILEpLE+s..................shNs..RsNIhYFI-p.lh-huppps.......................p.sYlchl....pRDlh+lV.-uVsPsssu........u...h....sNl+ps++.....VLpslps+phLss.phlpcl..........t.lpt+.............................. 0 32 62 90 +12078 PF12244 DUF3606 Protein of unknown function (DUF3606) Assefa S, Gavin OL lg7 PFAM-B_2813 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 85 amino acids in length. There is a single completely conserved residue G that may be functionally important. 20.80 20.80 21.30 23.80 20.50 19.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.19 0.72 -4.34 28 202 2009-08-07 15:46:44 2009-08-07 16:46:44 3 2 117 0 106 199 9 55.80 27 86.33 CHANGED sssps+tsstDpsplsls-phElpYWs+chslopspLcpAVcpVGssspsVcpcLsp ..................tpppttstD+splshs-paElpYWs+chslotppLcpAVcpVGss.sppVctaLt........... 1 18 48 69 +12079 PF12245 Big_3_2 DUF3607; Bacterial Ig-like domain (group 3) Assefa S, Gavin OL lg7 PFAM-B_2816 (release 23.0) Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 27.00 5.00 27.00 5.00 26.90 4.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -8.71 0.72 -4.41 11 1694 2012-10-03 16:25:20 2009-08-07 16:47:49 3 301 461 0 392 7792 931 42.20 24 4.75 CHANGED sustps.....tpsstSshhPsushc-.aolpFhlhDpAGNpsphshpphhhDshhss.....Pstsas ............................t................................................................t..........a.....p....lp.....h..h..s..sDpAGN....p........s.s..p...t.h...t.h.h.h.cs................h........................ 0 165 256 330 +12080 PF12246 MKT1_C Temperature dependent protein affecting M2 dsRNA replication Assefa S, Gavin OL lg7 PFAM-B_2862 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 231 and 255 amino acids in length. There is a single completely conserved residue P that may be functionally important. MKT1 is required for maintenance of K2 toxin above 30 degrees C in strains with the L-A-HN variant of the L-A double-stranded RNA virus of Saccharomyces cerevisiae. MKT1 is a 93 kDa protein with serine-rich regions and the retroviral protease signature, DTG. This family is the C terminal region of MKT1. 19.40 19.40 20.60 20.40 18.50 16.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.29 0.70 -5.37 24 169 2009-08-07 15:47:53 2009-08-07 16:47:53 3 8 150 0 118 167 0 233.90 25 31.59 CHANGED RhLpLhGals..........cppphTtaGcsLtpsssh.............htEshllhlELlRhslLssp..............s....sshpss..sc-pphlhLlSRlholhplpppshsYpGPls+plLsF+shlphl+pslppLlEsllsshlhpGcssp......stsphtplstpLPFhhss.ssshGlhschaLpchhpptpsppps.p...h.hst.......................a.pssslpc-LspuhpFWcslhpsspplsppphhsts..........hphascAscalppth ...........RhLpLhG..als..........pppphTtaGpsL.pshp...stp..........hpEshhlhlELlRhslLssc.................................s....tss.hpss....s--pp.hlLlSRlholhpl........pp.....c......s.....hsYs.GPls+pLLsF+shlphlppsLRpLhEs..llsuhllsGcssR............................spschtplshp..LP.Fhtss.ssshGlhs+haLpch.p..p.tpsp..s.pt..ph..hh.tsp...........................a.ps..shppsl.chuhpFac.tlhpshpplsppth.stp...........t.appAspahpt................................... 0 42 69 102 +12081 PF12247 MKT1_N Temperature dependent protein affecting M2 dsRNA replication Assefa S, Gavin OL lg7 PFAM-B_2862 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 231 and 255 amino acids in length. There is a single completely conserved residue P that may be functionally important. MKT1 is required for maintenance of K2 toxin above 30 degrees C in strains with the L-A-HN variant of the L-A double-stranded RNA virus of Saccharomyces cerevisiae. MKT1 is a 93 kDa protein with serine-rich regions and the retroviral protease signature, DTG. This family is the N terminal region of MKT1. 19.40 19.40 30.80 29.30 18.90 18.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.99 0.72 -3.66 25 157 2009-08-07 15:51:14 2009-08-07 16:51:14 3 9 139 \N 111 153 0 88.80 33 11.26 CHANGED PsDhH-llGp+LPtElYaYhStGLlusplhsslopG.lh-psP..Lsst.....ocpYR+Llt.cplt.l....+spslsLlsppL...s+haphKpIpshhWF ..PsDlH-llGp+LPpElYaYhSpGLlusclhsslosGplh..p...P.............Lsst....tSppYR+Llt.cslh.h....+spslsLLsp.L...pRaaphKpIphhhWa................. 1 36 64 96 +12082 PF12248 Methyltransf_FA Farnesoic acid 0-methyl transferase Assefa S, Gavin OL lg7 PFAM-B_2872 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 110 amino acids in length.Farnesoic acid O-methyl transferase (FAMeT) is the enzyme that catalyses the formation of methyl farnesoate (MF) from farnesoic acid (FA) in the biosynthetic pathway of juvenile hormone (JH). 21.40 21.40 21.70 21.50 20.90 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.15 0.72 -4.00 23 286 2009-08-07 15:52:06 2009-08-07 16:52:06 3 50 107 0 180 312 1 98.80 28 18.05 CHANGED HltLoss.......hPh-ss.hhEIVlGuapNTtSsIRppht........ssslscspoPslL.sshc.phahlp.apsGplpVsps...G.cstPFlpap..Dsp.....thslpYhuF.osWssssha ....................ltLoss........sp-..ssshhElhlGGapNspSsIRpsp..................................spslsp.s.pTssl.l.s.spEh+pFWIs..hpsGhlpVGps.....u...c..t.pshhpap..-sp.....shsl.pahGh.ouWss.s............................................................... 0 77 104 162 +12083 PF12249 AftA_C Arabinofuranosyltransferase A C terminal Assefa S, Gavin OL lg7 PFAM-B_2900 (release 23.0) Family This domain family is found in bacteria, and is typically between 179 and 190 amino acids in length. This family is the C terminal region of AftA. The enzyme catalyses the addition of the first key arabinofuranosyl residue from the sugar donor beta-D-arabinofuranosyl-1-monophosphoryldecaprenol to the galactan domain of the cell wall, thus priming the galactan for further elaboration by the arabinofuranosyltransferases. The C terminal region is predicted to be directed towards the periplasm. 25.00 25.00 86.80 86.20 19.80 18.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.08 0.71 -4.85 9 161 2009-08-07 15:52:31 2009-08-07 16:52:31 3 2 159 0 34 126 0 178.10 55 27.40 CHANGED sIPcsLcssIslAYTD..TDGpGpRAD+cPPuAspYYsplDcslpcpTG+sRscTVVLTsDhoFLSaYPYaGFQuhTSHYANPLA-FspRAstI-.....sWSpLpssschlAALcpsP....WpsPssFlhRpuu....ps.....uaTlcLApDlYPNpPNVRcYsVpFssulF..-sPtFslpplGPFVls.sRp .....................DIP-hLps-lslAYTD..TDG.GpRuDRRPPGuppYYspIDtsIpchT.G.+.tccTVVLTADauFLSYYPYaGFQuLTSHYANPLApF-cRAspI-.....SW.....up...L...p...os--FltALDphP....WpsPsVFlhRtuu....ps......................uYoLRLApDVYPNpPNVRRYsVphssslF..sDP+.FsVpp..lGPFVlslRp....... 0 6 22 31 +12084 PF12250 AftA_N Arabinofuranosyltransferase N terminal Assefa S, Gavin OL lg7 PFAM-B_2900 (release 23.0) Family This domain family is found in bacteria, and is typically between 430 and 441 amino acids in length. This family is the N terminal region of AftA. The enzyme catalyses the addition of the first key arabinofuranosyl residue from the sugar donor beta-D-arabinofuranosyl-1-monophosphoryldecaprenol to the galactan domain of the cell wall, thus priming the galactan for further elaboration by the arabinofuranosyltransferases. The N terminal region has been predicted to span 11 transmembrane regions. 22.80 22.80 116.60 36.00 22.70 22.70 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.58 0.70 -5.76 5 168 2009-08-07 15:53:10 2009-08-07 16:53:10 3 3 160 0 37 136 0 421.50 50 67.31 CHANGED LGQMllulVVAulVAsVuLhAIARVEWPAFNSSNQLHALTTVGQVuCLAGLsuuGLLWR+..G....R.R...plARLGohshlSAFoVVTLGMPLGATKLYLFGISVDQQFRTEYLTRLTDTAGLRDMTYIGLPPFYPAGWFWLGGRlAALTGTPAWEMFKPWAIlSIAIAlsVALVLWusMI.RFEYALVVolATTAlsLAYuSsEPYAAlITVLlPPsLVLAWuGLuu....pcpGGWAAVVGsGlFLGlAAoFYTLLlAYAAFTlslMALLlAuu.......hRRuaDPLlRLlVIAVlSGAIALlTWuPYLLtAhRGsPA-SGTAQHYLPc-GAcLsFPMlpFSLlGALChLGTlWLVsRARoSsRAGALAVGVlAVYLWSLLSMLsTLuGTTLLSFRLQPTLTVLLuAAGAFGFVEssts.....lutRhps..uRRVlusAsAVGAlGAl .....................................htlssAlllAssVulVuhhAlspVpWPAFsoSN.h+ALTTVGQlssL..sslsuhshlW++..s..............................R..R......hLspl.suhlhsuu...hsVsTLGhPLuATKLYLaGISVDQpFRTpYLTRLTDosuLpDMoYlsLPPFYPsGWFWlGGRhAsLhGhPuWEhFKPWAIhSlAlAsslulsLWtRhl.phchAhhlslATs.AlsLsauusEPYAA.lIs.lh...lPPhLV....LshpuLpu............................ttuWuAllGsGlFLGhuAoaYTLhsAhsAholllhAlllAuh.......................hct..u..h..cP.L........h.....RLsVlGsluhAIAhhsWhPYLltshps.s.susouoApHYLPs-GAtLshPMlphSllGslCLlGhlWLVhRu+sss.AsALulGVlulYLWsLLSMlsTLutTTLLuFRLpPsLolLLssAGshGhsEhsts.......hstp.......uR...t..lh.shsusl.uhhGul............................... 0 6 23 34 +12085 PF12251 zf-SNAP50_C snRNA-activating protein of 50kDa MW C terminal Assefa S, Gavin OL lg7 PFAM-B_2919 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 196 and 207 amino acids in length. There is a conserved CEH sequence motif. SNAP50 is part of the snRNA-activating protein complex which activates RNA polymerases II and III. There is a cysteine-histidine cluster which contains two possible zinc finger motifs. 23.30 23.30 23.30 25.70 23.10 23.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.89 0.71 -11.46 0.71 -4.93 20 210 2009-08-07 15:54:04 2009-08-07 16:54:04 3 7 163 0 144 212 4 183.30 30 46.58 CHANGED tpcahlLuoQsLs-L+DshhChsstph.h..............spsphpuuaFFIcGsFYsDhR.....sscshDhSpsIhpa......sppps.............shuc.hpstpM-csphsDLplplGpPhhahHQGsCEHhllhoslRllsptc...shccstYPhhshcsphppptC.hC+httAphllhs.sphtscsPuahCssCFcha+asssGccls......pFpsatYhcc ...............................................................................h.tphhlLusQpLs-L+Dtl.C.s-h.h................................................................hstphhpSuaFalcssF..Y..sDhR................spshDhSpsIhcW......tpppp............................................thsp..hp.....st..pMcps..pasDL.p.l+l..G...P..YlYhHQ.............GsCEHhllhsDl..R...h.......hp.pc.............s.s........pt.YPhhh..h.p.........h.t.tppC.lCphh.A...phls.s.cphtspsPsahCc.Cachh+hs.tpGphlh......pa.sh.h.................................................................... 0 62 85 120 +12086 PF12252 SidE Dot/Icm substrate protein Assefa S, Gavin OL lg7 PFAM-B_2926 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 397 and 1543 amino acids in length. This family is the SidE protein in the Dot/Icm pathway of Legionella pneumophila bacteria. There is little literature describing the family. 18.50 18.50 18.50 18.50 18.40 18.20 hmmbuild -o /dev/null HMM SEED 1439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.27 0.69 -14.42 0.69 -7.26 7 111 2009-08-07 15:56:26 2009-08-07 16:56:26 3 4 15 0 19 89 0 795.40 33 87.88 CHANGED MPKYVEGVELTQEGMHAIFsRMGHusIoSGoIYNG.PTIDptALspQGFMPVLTGVGP+pDSGHWIMLIKGPGNQYYLFDPLGKoSGEGYQNILAsQLPhGSTLSVIPNsssLNhGLCGYWVASVGLRA+AtLNpcsPPsLhNLGQTITsEMRNELscDGYcKITsWLRAVuDEFP...cGDsQP.DAKALREsTEKs.L+I-IPs.V.PsKDpoPKElslcPTsPQ.s.VPsWNGFSLaTD-sV+sAApYAYDNYLGKPYTGsVEusPAshGGph........h..RQpHGLAHTLRTMAYAElIVEEARKAKLRGETLtKFKDGRTlADVTPEELKKIMIAQAFFVsGRDDEtSstcp......YtKYHEQSRDAFLKYVcDNcSsLIPDlFKDpc-VsFYApVIEDKsHDWsuoPAHVLINQGHMVDLVRVKQPPESaLEpYFsohpsWIGopAsEAVFuhQRQFFHATaEVVsGF......DS-NpEPHLVVsGLtRYVIGEDGpPIREt.cpupK..cG-LKsFsQsYKLK-NERFMRVDEaLKLPEVQpTFPGuGK+LEGGhPGhs.hsYapRLNShpRARCENDVsFCLtQLQpAHcKsKIDPIKpAFQSSpcKsRRQPNhDEIAAApIIQQIhANPDCIHDDHVLINGQKLEEpFFRDLLAKC-MAVVGSLLNDTDItNIDTLMRHEKDTEFHSTsscAVPVK.IG-tW..cpRIpspusslT...QhKHDLIFLMQNDAWYFSRVNAIAQNRDKGSoFKEVLITsLMTPLTsKuLhDTS+u.........sPp+LaRGL.sLsEEFsptLINQANshIANTTppLFTDhSsEAFKQIKLNDhSphSuRTsASTTTshpLspphWs.....SNVIFEMLDPDGLLHPKQVGpHssGoEsEFSVYLPEDVALVPlKVThD.GKTppGcsRalFThVAVKSPDFIPRHESGYAVEPFLRMQssKLsElpsuIEK..sp.t..................PphEsIFs.LQscluh.ph.sclSstYKsFLpppVsPVLEpCLsulhpsssshLsKALAsFPoDpQWSAFN.sEAhpAKtQMDAlKQMltpKVVL-.....ALTQCQ-ALEKQNIsGAL-ALKpIPuEKEhup...IupELREQIQus+Q-....LESLQRAssTPlVoDccKV+.RY-sLIpssoK+lT-LEKupLssLDslKKuIusLsNLpQElTlLRNEKlRMHosoDK.VDFSDIEpLEpQlQ.lcTKLsDAYLlplTKplSAL-p.hPKs.o..-lKohlupFhshhs-IEhLRNERIKKHGuSKDPLDhSDLDKLpGpLQtlNQSLVssLlpsIRsSlsQMc.spTFchQcctIppNh-hLscLEKoLDcScsucK.+EDlsKLpsLLlsKQ.KAYPpMlQLQh+SEshIpQLRElCpsHaDsLsKsRpARLQEL-+...puGI...lGNlhasl...TshlGLTsDEpl-I+hKpQoLARFKs-LhNDK.DhDpLIspLAcKpPSELQEuLGISc- .........................................................................................................................................................................................................................................................................D..VhtshpYsapphLtpsYs..Gs.....h..c.t..ss....t...t.h..........................R..HGLuHThRThhhuplhhEtu+.........tthtsGpolADlo.p-l+Kl.IAQhFFVsGR-sEtS.hs....................YtcYH..utptF.cYsccp.....sclFp.pc-lphYuthI.Dc.tp.atsostt.hLlp.uHMlDLhRsKtP.Eshlt.........h.ph...hG..ss.slhthtRthFhAThtsVs.h......ssp.........sa.s..s................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pp.p...p.Ll.th.pplh..tp.t....t..pp.h......................................................................................................................................................................................................................................................................................................................................................................................................................... 0 8 8 19 +12087 PF12253 CAF1A CAF1B; Chromatin assembly factor 1 subunit A Wood V, Coggill P pcc manual Family The CAF-1 or chromatin assembly factor-1 consists of three subunits, and this is the first, or A [1]. The A domain is uniquely required for the progression of S phase in mouse cells [2], independent of its ability to promote histone deposition [1] but dependent on its ability to interact with HP1 - heterochromatin protein 1-rich heterochromatin domains next to centromeres that are crucial for chromosome segregation during mitosis. This HP1-CAF-1 interaction module functions as a built-in replication control for heterochromatin, which, like a control barrier, has an impact on S-phase progression in addition to DNA-based checkpoints [2]. 21.20 20.90 21.30 20.90 21.10 20.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.90 0.72 -3.86 32 308 2009-08-11 15:32:32 2009-08-11 16:32:32 3 13 258 0 216 310 1 80.70 39 11.05 CHANGED hKhlpFtEssRPPYhGTao+..........hh.h..+sPhppchsshDY-YDSDtEW.....EE..-E-G...E...-l...-.s........----.....---...-.-...p--...........-hDs..F ........................KhLpF..p.EshRPsYaG..Tas+................p...sthlhsRsPhsp.....-.......s....lDY......-hDSDtEW.....EE......-.E.G...E.....sl.c.s........--.--.....-.--......c..-....s--.......-.Ds.......................................... 0 70 115 177 +12088 PF12254 DNA_pol_alpha_N DNA polymerase alpha subunit p180 N terminal Assefa S, Gavin OL lg7 PFAM-B_2966 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00136, Pfam:PF08996, Pfam:PF03104. This family is the N terminal of DNA polymerase alpha subunit p180 protein. The N terminal contains the catalytic region of the alpha subunit. 20.60 20.60 21.30 20.60 19.80 20.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.36 0.72 -4.16 37 311 2009-08-11 15:36:44 2009-08-11 16:36:44 3 14 261 0 202 307 3 65.20 42 4.65 CHANGED LpcL+thRputppttsth..csc...-tp...pIYDEVDE--YcclhcpRhtp.DDFlVDD...s...G.GYsDpGt--.W-c ................................LtpL+thRputpppt.ph.........-V-......-.p...slY-EVDE-pYp+lVcpR.pp.DDFlVDD..................s...G..GYsDDGcE..a-.............. 0 61 103 161 +12089 PF12255 TcdB_toxin_midC Insecticide toxin TcdB middle/C-terminal region Assefa S, Gavin OL lg7 PFAM-B_3032 (release 23.0) Family This domain family is found in bacteria, and is approximately 150 amino acids in length. The family is found in association with Pfam:PF03534. This family is the C-terminal-sided middle region of the bacterial insecticide toxin TcdB. 25.00 25.00 29.90 28.90 23.60 22.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.77 0.71 -4.54 22 147 2009-08-11 15:37:11 2009-08-11 16:37:11 3 14 112 0 49 132 6 142.80 38 8.34 CHANGED hhtRALKGplLRoElYGhDso.........tptshPYoVopsRhpVR.lpstss..p..slhhshslEshoapYE.RlssDPpsspplsLpsDpaGpsLcolslsYPRRspst.........s.a....ss.hspthhss..ShD-pQptL+lstsppohpHlspsps.......hhLGLP .....htRALKGplLRoElYGLDso.........p.sshPYoVspsRYpVR.lQssss.....p......................s...VhhshslEploYpYE......R.ls............sD.PpsspplsLpsDpYGpsL+oVsIsYPRRspss.........ssa.....s.hspthhssphDppQptl+lspppppahp..ssp.....ahLGLP................................ 0 15 27 41 +12090 PF12256 TcdB_toxin_midN Insecticide toxin TcdB middle/N-terminal region Assefa S, Gavin OL lg7 PFAM-B_3032 (release 23.0) Family This domain family is found in bacteria and archaea, and is typically between 164 and 180 amino acids in length. The family is found in association with Pfam:PF05593. This family is the N-terminal-sided middle region of the bacterial insecticide toxin TcdB. This region appears related to the FG-GAP repeat Pfam:PF01839. 24.80 24.80 24.80 25.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.20 0.71 -4.82 29 294 2012-10-05 17:30:43 2009-08-11 16:37:22 3 55 209 0 101 298 21 169.80 24 8.84 CHANGED spsGstau..pshplshPsssphs.t.pplphADlpG.Ghsslll.ohsth..........ts...tpahhthstt......p....PhLLsslsNshGspsplpYt....oSsphhLc-ptt.t.t....ssspLPhshpllpchshpDtl....s.Gsp...hsppapYppGhaDshEREFpGFuplpppDssss...................ssh.spsaatTG ......................................................................................................hs.hht.s......phplsDlpGpGhssllh...sssp.............p.......phhhh.shsss...............................+PhLLsslssshGupsplpYc............................s.Ssph....h..hs.-p....tt.tt...........................sspLPh.s.lpllsp.......s.pp..p.Dtl...........s..Gss.........hsppapYpp.G..haDstpR.-FhGFupVpppDhsst...........................psssh.spphah..................................................................... 0 40 59 86 +12091 PF12257 DUF3608 Protein of unknown function (DUF3608) Assefa S, Gavin OL lg7 PFAM-B_3083 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 280 amino acids in length. The family is found in association with Pfam:PF00610. 25.00 25.00 25.30 25.30 23.20 24.10 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.71 0.70 -5.50 9 344 2009-08-11 15:40:12 2009-08-11 16:40:12 3 8 247 0 223 351 9 242.40 40 18.31 CHANGED VElalKDs.LsRssMWshSopLlsoCVahspRlsaLsshpusVKuIY+NGKKlhSGYIs-NTKIlFRSESA+llFllQlocEMWHFEEsGEhhFHKlVNSLFPKIF++W+-hsTHHoITIVhhsSlDhos.sassLs.GER.pNpcDYFRlVVDQVslhaW-cIMtsLRhEFhphp+Dlh.pp....pDssthshcGphhPslKuNlLEsINluhTllsD.F+ssDL+HTssHlIlloPGoGLaDV-YDLLh.To+KhhSl-hulDlICLSpPPLHlVPLFRY ..................................................................VElsh+DpalsRuDMWRl.t.p.p.LsspssYhs..p+l.......a.h....s...l+.sps.......t.pla..h...p.....s.........c.+V........h........sGah.s.ppT+.lFRSpSA..hhhlFIQ.........hSpEMW-....F.........-......................G................-lhFpK...slNuFLsc.LF.........p+Wtphs.....s..p..H.lT.lVL.....F.......s........p...h.....t...........................s...................t.............................t.............h........t.........p.........l.........t.......................s...................p.........t..........p.................h........c........DFY+V.V.Vp..p.t.p..t...cW.s...s.lLhpl++.F.hpa..h.............lhh..........p.............................................................................................h......t......ups.......Ss.....u.h.....p.GNhLEAl..........Nluhs....hhsccalsRshsRTGp..l..VITPGsGlF..-V....D.hpLh....hlTpp+hl.sp.Gl.G.lDLlChuc.PLHsVPLF+a.............................. 0 86 125 187 +12092 PF12258 Microcephalin Microcephalin protein Assefa S, Gavin OL lg7 PFAM-B_3105 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 384 and 835 amino acids in length. Microcephalin is involved in determining the size of the brain in animals. It is a protein, which if expressed homozygously causes the organism to have the condition microcephaly. Organisms expressing the mutated form of this protein in a homozygous manner develop a condition called microcephaly - a drastically reduced brain mass and volume. Microcephalin is predicted to contain three BRCA1 C-terminal domains, the first of which is the probable microcephaly mutation site. 25.00 25.00 26.10 26.10 19.30 19.20 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.34 0.70 -5.48 6 148 2009-08-11 15:40:18 2009-08-11 16:40:18 3 5 90 0 19 138 0 368.30 56 71.39 CHANGED ESFAuGLpSSFDDLCGtouCGsQEcKLtt.ls-hpoDsChSSPVLKsuSlaSSAussaLsQLoPQKssusLSKpElstQRDsAGclVTPDpKQutGlupEshDEKasLSPThSuoKs+.hsHSps+uSSAKRKRsu-hSpSuPcE+LK.KRspt+ushPplQLaKS-spLppsstsAscu.usttSSYDDYFSPDNLKERsSEsL.PtsQ.suuPAtFpC.RuLSKpERpsIL-MuDFSClGK+sRols.loslhAKosSSLpKPupscusAshuClhS.tTuAs--oPGsCsQAGs.tt-DspPtGsutspTh-s.Ahs.....cG.cGDloPLcGuSpEh+EssDs+STQKEGssscspsSutGEsQs-sc.uFusDsss-pSsp-KEslupGhSt ....-.FAGsLHSSFDDLCGsSGCGsQERKLGG.ls-hKSDsChSShVLKsssl+sSsS.saLsp.oPQKhhusLSKEElshQR.shAGclVTPDpKQAtshSptsF-EKhpLSPThSuTK.s+hhh+S.p...PpSSSsK......RKRVStsSpS.PPKE+hK+KRSlR+shhPRLQLh+SEsuhppsuusA.lEsLusGESSYDDYFSPDNLKERs...SEsL.Pt.sQ.Po.uPA.hpC.R.SLSK+ERTSlhEMuDFSClG+psRol-.hTs.hTAKohsS..QKsuN..scucsshSsVTSccTsAsE-o.tsCtQAsspt+-DA.pP.tGssh.saTl-s.sh.............KGpcGDhTP.LcGS.cE....h....KEhlslKST.....Q.p.c.GTs.SKhsNSuEGE.....AQS-cc...sFlsDsshEpSsE.E+EsLPtG......................... 0 1 1 4 +12093 PF12259 DUF3609 Protein of unknown function (DUF3609) Assefa S, Gavin OL lg7 PFAM-B_3173 (release 23.0) Family This domain family is found in eukaryotes and viruses, and is typically between 348 and 360 amino acids in length. 24.10 24.10 24.30 25.60 23.80 24.00 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.01 0.70 -5.78 7 125 2009-08-11 15:40:30 2009-08-11 16:40:30 3 5 70 0 28 124 0 295.00 28 53.39 CHANGED LthLGspL+popsAIhEAlhSAaQspLSPhVLolpQLptch..h.ucLspthtL.hpp.oISDIYplAols.stQhsNalVFpIpVPLlDsEpFNlYRLTPIPR.lsNGpIQLl-TETPYLGIsDHLDRYFPLQN..LDDClcLssERalCc.spITYGssDcohsCoLAAIRNpoSpsCThRpVpcpSlWT.hlAPNSWMVALoKELoLhGVCSs-cQEL+INGSGILoIpSDClVRSssVTLQGpspKthPS+puYASLQhssposccsshh-SFspLhpIlspLchpQcp.psltsh.hshlsVCPsllLIsLLlShsWhY+saRp+t.....sQpPh..Vssh.sspNcspTsshPLLEKpEl .................................................................................lpcAl..sA.pp.s+Lsshllo.ppL.sch..l...s.cLs..................t..p.....s...h...+sl...h.h..lss.....sh.....h.pt....ss.plhF.IpVPLl-s..pp.FslY+lsPIP..p....hsNs.....p.........h.p..ll.sco..Y..lG........l..ssc..h..c..p..Yh.L..ps..hs..sChcls.p.cphlChps.p.lhassssssh...s...Cs..l.......t......hh..+.s.....p....s.....s.......psCs.l.R.t.sc..p.p.p.lahphss..sN.sWh.h.slsc.-lsLhss..Cu..s..p...p.....p...pl.p..l..s.GoG.lL..o.l..pssChl+.......ostss......lp...............................................................................................................................................................................................................h.......................................................................................................................... 0 14 17 25 +12094 PF12260 PIP49_C Protein-kinase domain of FAM69 Assefa S, Gavin OL, Coggill P lg7 PFAM-B_3196 (release 23.0) Family This is the C-terminal region of a family of FAM69 proteins from Metazoa and Viridiplantae that are active protein-kinases. The family members have a short transmembrane helix close to the N-terminus, and thereafter are highly enriched with cysteines. FAM69 proteins are localised to the endoplasmic reticulum. Many members also have a short EF-hand, calcium-binding, domain just upstream of the kinase domain. The exact function of the more N-terminal family is uncertain. 25.20 25.20 25.20 25.40 25.10 24.90 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.47 0.71 -4.88 36 397 2012-10-02 22:05:25 2009-08-11 16:45:28 3 7 89 0 254 354 0 186.10 26 48.34 CHANGED lWsLLp....ps.Eal...lh.lhpc.p.-...hhP+lLGsCGchassEplshsphht..................hpphhpssWtt+u+lAltLL-hl-cl.pps..p..hahCDlshpsFGls.schc...lp..hlDhcplhscsphcphlpp.ppCpp.....s........pDCsa.hDCh.o.tCsh.pppCssphh..psN..LthlCp.p.l......hshh..........................LpGs........Ps...plppc...LpchLppCsp. ............................................hasLh.....s.E.l.lh..h.p.pc.......hs+hlG.C......G....c....hh..ssphls..p.h........................................................h.ph.h.s.ss.Wpp+s.clAlpLl-hlcpl.tpsshs..hhhsDh.....s.....hcsFuls....sc....hc.....lhhhDhcplhscsp.pphlp..t.ppCps.....s.........................pDCsh...hsCh....u...h...C..s...ptpCss.sh..ppN..LhtsCp.h.l...hshh...............................L..ss........Ps......plppp...Lpp.lppCht..................................................... 0 58 79 148 +12095 PF12261 T_hemolysin Thermostable hemolysin Assefa S, Gavin OL lg7 PFAM-B_3198 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 200 and 228 amino acids in length. T_hemolysin is a pore-forming toxin of bacteria, able to lyse erythrocytes from a number of mammalian species. 20.70 20.70 21.40 21.10 20.10 19.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.92 0.71 -4.73 33 192 2009-09-11 08:17:19 2009-08-11 16:45:50 3 2 185 0 57 157 33 174.30 36 83.54 CHANGED hthhpsscstRpplEpFIpppatpsasAclppFhPtLLul....sspupl..hAssGhRhAsppsLFLEpYLDpPlEphluphhsts.lsRspllElGsLAoh.s.sGsuphlhhslsthLhspGhcWllFTuTctLpshhpRLGLpspsLusAcss+L.scssspWGoYYcppPpVhuuslsp.uhptL ............................h.hhtsscsphscs.phlcpcYppsasAclptaMPshLuL....hs.cst...h..tussGhRh.A....p....p.pPLFLEQYLDpPs-pllupthups.lsRspllEhGpLASh.u..pGhu...thhFhhhsphLsshGa-WslFTATcsL+shhp.RhGLp.phlApAsssRl.ssssphWGoYYpppPplhAGsLspGhpt.h............ 0 13 26 43 +12096 PF12262 Lipase_bact_N Bacterial virulence factor lipase N-terminal Assefa S lg7 PFAM-B_3205 (release 23.0) Family This domain family is found in bacteria, and is typically between 258 and 271 amino acids in length. There are two conserved sequence motifs: DGT and DGWST. This family is the N-terminal region of bacterial virulence factor lipase. The N-terminal region contains a potential signalling sequence. 20.10 20.10 20.20 20.10 20.00 19.80 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.89 0.70 -5.17 8 221 2009-08-11 15:51:27 2009-08-11 16:51:27 3 6 188 0 50 183 24 250.40 33 33.22 CHANGED MK+p.hhhhllsSALhLuGCGD-opooGssTpss...pltpuLptETpIsFsLpu..ussslshPoYLhMDspDGTLpls...os..sssuloNPcsAMGphDGWSTo.PIslsF..pGssLssushsuulhlIKloschT....D.toss.p.lLs.ssDaslssoGsS..LhIlhLKsLssSSpYIFAVTsclpDhNGsPVGTSsSYAsLKSsshs.ssuuLsssQKVTQuoEulFutu...GVspssIVYSoWFoTQSlGDsLaAsKusTAsul..us...phssVW .............................................MKph.lhhshlsS..AlhLuGCG.s....pos.s.p.usssp..........t..l..t..op.lpFshtu.....tsup....lPlPs.hLh....s...sp...DG....T...LplP..........ss.........sss..s.......h.sNP.h.sA.hu..t..l..DGWus.o.hP..l.l..sh................pG....s......sL...s......ss....h..............h....s....u......u.....lhlhE..l.o...s.hs........................ss..s....s..lt........t......h.......Ls.G...sD.as.......s.s.u....Su.ss........lh.IhPhKsLssuSpYlhAlTsp....lpDusG.pslusSsoYuslK..s..c............sh.....h..p..pu.......s....t...slQ.tlsp.u....hEp.l....h.t.hu..............uV.s.p..spIlYSsaFoTQS.lussLh..As+ushAssh.......................................................................... 1 14 24 39 +12097 PF12263 DUF3611 Protein of unknown function (DUF3611) Assefa S, Gavin OL lg7 PFAM-B_3207 (release 23.0) Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 180 and 205 amino acids in length. There are two completely conserved residues (W and G) that may be functionally important. 24.90 24.90 25.20 25.00 24.80 24.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.13 0.71 -4.89 21 164 2009-08-11 15:51:34 2009-08-11 16:51:34 3 4 109 0 74 156 131 175.60 31 80.30 CHANGED ssss.slp+hApslRhhGWIuFWhQlVLulVou.llLlFuslhsspstsspps.Gh...s.GlhhshhullsLhhSlahuatYsRlucpLp.t..ssstpPs+u-ss+hl+hGLllsLlGhhhullGhpAhsGhLluKulo.pssGh.....shssssphlp.uLDlhllhANssslhAHFlGllsSLWLLpplpp ................plpphutth+phGhluFWhQllLulVuuslLhFu.s...h.hsspsss...............st...........s.................s.ulhhsssGllslhhSlahuatY.hRlucpLp...ts..ssst.p.P...s+u-sh.+hl+hGlllNllGhhhullGhpAhlGsLluKulo..pssuh............shssssp..lp.uLDlhlV..AsssslhuHFlGllhSLaLLppl......................... 0 18 50 69 +12098 PF12264 Waikav_capsid_1 VSL_protease; Peptidase_C03; Waikavirus capsid protein 1 Assefa S, Gavin OL, Eberhardt R lg7 PFAM-B_3208 (release 23.0) Family The rice tungro spherical waikavirus polyprotein is cleaved into 7 proteins, including three capsid proteins, by the tungro spherical virus-type peptidase Pfam:PF12381. This family represents the capsid protein 1 [1,2]. 24.40 24.40 25.50 80.00 24.30 23.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.29 0.71 -5.02 3 54 2009-08-11 15:52:48 2009-08-11 16:52:48 3 5 5 0 0 49 0 192.40 90 27.27 CHANGED pEAFQDsEsRssDPNhSDMaNAlpuEYLVKSFoWKVSDGQDKVLuslsLPpDLWsoNSRLsDIMSYFQYYcATGlTFRITTTCIPMHGGTLhAAWDAsGCATRQGIATsVQLTGLPuhhIEAHSSSlTTloVcsPsIQSsICLSGSEHSFGRlGILKICCLNVLNAPpAATQpVuVNVWVKFDGVKFHaYSL+KsPV .QEAHQDSSVAAAGP..TDEHNAMLQKIYLGSFKWKVSDGGGSILKTFSLPSDIWAANDRMKNFLSYFQYYTCEGMTFTLTITSIGLHGGTLLVAWDALSSATRRGIVSMIQLSNLPSMTLHASGSSIGTLTVTSPAIQHQICTSGSEGSIANLGSLVISVANVLCADSASAQELNVNAWVQFDKPKLSYWTAQHoI.. 0 0 0 0 +12099 PF12265 CAF1C_H4-bd Histone-binding protein RBBP4 or subunit C of CAF1 complex Wood V, Coggill P pcc Pfam-B_318 (release 23.0) Family The CAF-1 complex is a conserved heterotrimeric protein complex that promotes histone H3 and H4 deposition onto newly synthesized DNA during replication or DNA repair; specifically it facilitates replication-dependent nucleosome assembly with the major histone H3 (H3.1). This domain is an alpha helix which sits just upstream of the WD40 seven-bladed beta-propeller in the human RbAp46 protein. RbAp46 folds into the beta-propeller and binds histone H4 in a groove formed between this N-terminal helix and an extended loop inserted into blade six [1]. 22.70 22.70 23.80 23.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.65 0.72 -4.08 93 1002 2009-08-11 15:53:19 2009-08-11 16:53:19 3 13 342 12 648 938 11 71.70 33 16.24 CHANGED p-chc.lWp....cssshlY-hlhstslpWPoLohcalPDth........sssp........tsppllhGTps......ss.......ppNhlhlhclphssp .................EcaclW+....cssPhlYDhl.hspsLpWP.oLospalPDhp.....................sspt...........t...tsphLllGTpT.....ust..........cpNpLhlhplph.p.p............................ 0 212 360 531 +12100 PF12266 DUF3613 Protein of unknown function (DUF3613) Assefa S, Gavin OL lg7 PFAM-B_3240 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 126 amino acids in length. 25.00 25.00 25.10 25.10 24.90 22.80 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.34 0.72 -4.46 18 138 2009-08-11 15:54:42 2009-08-11 16:54:42 3 1 135 0 44 132 2 67.80 34 59.51 CHANGED Asps........ssuuc.hscuTcuWLtLQuss+AAu.ssQshsusptsLuhQRYL-SaKacIP-.haspptu .........................ss..........ssssssp.hscuTcshLslQuS.G+tAu.ssQPhpGspusLuYQRYL-SFcpcIP-.aatpt..h...... 0 6 15 29 +12101 PF12267 DUF3614 Protein of unknown function (DUF3614) Assefa S, Gavin OL lg7 PFAM-B_3244 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 162 and 495 amino acids in length. 25.00 25.00 35.60 35.50 20.10 19.80 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.03 0.71 -3.98 7 151 2009-08-11 15:54:47 2009-08-11 16:54:47 3 3 3 0 0 100 0 156.20 75 20.85 CHANGED sFsuRhP......+ht+chsAohhcsuhuhVspuph....s.sps.shhp+Scsu....phP.sphssspsssAh.uss...VchsssSsssss...Rts.S.Pps........l+sush.lsVuuss+c.hsKlR-shhp.phGt.hpFS.F.SuT.....VhNsRssppsah...................hVhhLR.souupF.hlLh ...hsuRhP......RYEHHYDASMKGAGLSWVRKSQF....PNVEV.IRRRRSRASGSSASFPDANLQVSSDRP...SES...VQVVPMLDEGSSPP.RGVsS..PRR......DPVEGSGW.YSVGSPSR+.SSTsRGQRSA.ERGKIGDFS.FPSGT.....VYNARVDERAYK...................RVLKLRDTSASSFLRIV... 0 0 0 0 +12102 PF12268 DUF3612 Protein of unknown function (DUF3612) Assefa S, Gavin OL lg7 PFAM-B_3220 (release 23.0) Family This domain family is found in bacteria, and is approximately 180 amino acids in length. The family is found in association with Pfam:PF01381. 25.00 25.00 91.20 91.00 20.00 16.80 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.28 0.71 -4.49 6 144 2009-08-11 15:56:56 2009-08-11 16:56:56 3 6 142 0 29 92 29 177.60 80 36.16 CHANGED TsVSPYPHWHYFDAYuPGKLKAVYRGNGIPLPWGNMRtVcDPCQHWAVFRMIupsssGSSAQISILsVuspPRIYCCESIKVpD.AGNsHVLCAGIDLNPAI-AQGsDAlulAcpLKshCVusGGSusIPppIKp-LhSlA+ILNINWIERGIpspARLICSRGAVCPRpPSCYt....sCs ...TVVSPYPHWHYFDAYG.GKLKAVYRGNGIPLPWGNMRKVNDPCQHWAVFRRLSpPp....pGSSAQISILsVGDEPRIYCCESlNVhDPAGNNRVLCAGIDLNPAIsAQGGDALuIApELKhuCVppGGoosIPtsIK+DLpTIAKILNINWlERGI-s-ARLICSRGAVCPRpPpCYucC.G..... 0 4 9 21 +12103 PF12269 zf-CpG_bind_C CpG binding protein zinc finger C terminal domain Assefa S, Gavin OL lg7 PFAM-B_3253 (release 23.0) Domain This domain family is found in eukaryotes, and is approximately 240 amino acids in length. This domain is the zinc finger domain of a CpG binding DNA methyltransferase protein. It contains a CxxC motif which forms the zinc finger and binds to DNA. 21.70 21.70 23.40 23.40 20.80 21.30 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.66 0.70 -4.88 6 134 2009-08-11 15:57:41 2009-08-11 16:57:41 3 9 86 0 79 130 0 213.60 55 40.04 CHANGED AspRIhplLPQRlQpWshoPshAsEps+ctL-cIR++Qp.sRspLtELE++apcL-slIpRA+cpolspppsp......-ss-psD.-.sIaCVTCGHpIssRsAl+HMEKCatKhESQsSFGShaKTRI-G.ssLFCDhYNsQu+TYCKRLRVLCPEHoK-PKlPsT-VCGCPLVpNVFE.T.....G-FCRAPK+pChKHYsWEKlRRAEIDLERVRQWlKhDELhEQERtl+pAhosRAGlLGLh .........................................................................................................................AssRIaplLPQRIQpWp.o.PClAEEps++hLE.cIR+cQptsRhcLp-hE+RacEL-slI.+AKpptlp..pp.c-s......s-s-.scD...s-hplaCloCGH.slss+sAl+...HME+CasKYESQsSFGSha.TRIEG..splFCDhYNPpSKTYCKRLpVLCPEHo+DPKV.........s.s.sEVCGCPLV....p...s....V.....F-hT.................G-FCRhsK+pCs+HYsWEKLRRAElDLERVR...hWhKLDELhEQERslRpAMosRAGLLuLM............. 0 27 32 57 +12104 PF12270 Cyt_c_ox_IV Cytochrome c oxidase subunit IV Assefa S, Gavin OL lg7 PFAM-B_3280 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. This family is the fourth subunit of the cytochrome c oxidase complex. This subunit does not have a catalytic capacity but instead, is required for assembly and/or stability of the complex. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.94 0.71 -4.42 24 381 2009-08-11 15:57:58 2009-08-11 16:57:58 3 3 366 0 107 227 117 134.00 43 95.16 CHANGED M+spu+lFhhlssFFhhsullYshhoth..........EWsGosALhLusuLuhhlGsYLthsuRRl....shtPEDpp-AEIsDGAG-lGFFSPtSaWPlhlAsusulsslGlA.hhh.WllhlGsshllhulsGhlFEYapGscp+ .....................MKhEu+lFthluhFallsullYhhhop...................EhAGTsuLsLouuhuLhluhYlpFsA+Rl.....-sRPED.c-AElsDGAGplGFFSPpShWPlhhAlusAlhslGls.hhh....WLhhlGls.h.llhussGhsFEYahGsptH........ 1 34 84 102 +12105 PF12271 Chs3p Chitin synthase III catalytic subunit Assefa S, Gavin OL lg7 PFAM-B_3298 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 288 and 332 amino acids in length. This family is the catalytic domain of chitin synthase III. Chitin is a major component of fungal cell walls and this enzyme is responsible for its formation. 25.10 25.10 35.60 35.00 23.80 23.40 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.81 0.70 -5.44 15 224 2009-08-11 15:59:44 2009-08-11 16:59:44 3 4 136 0 173 213 0 274.80 39 90.17 CHANGED MsFGsFsoICccAuLPLCulVGs.s.p.ssspGIhPsCYuRsIELANTh.IFQhusshlHlsALlholIhIl+VRuKaTAVGR+ElloFFYlYhhLplsSLllDuGVsPPuSssasYFsAVQhGlsSAlChCLLlNGhVGFQLaEDGTslSlWhLRlsShlhFlloFhIuLsTFKuWsG......LusspTlGLFVlhYllNAlhLslYlVhQllLVlpsLp-pWPLGtIuhGlFFFlAGQVlhYsFSssICcussHYlDGLFFATlCNLLAVMMlYKaWDSITc-DLEFSVGshpssa ...........................tFGsFpshCpc.....s.....sLPlCsll.............ss........t.................................................t......t......................Gh.ssChhR........sIplus.sh...lhphushhlphhAlhhsllhlh+scpKhsAV.GR.......+EhhhFhhhYhllplhplhlsuGl.P.suusshsaFsAlphGhhuAsshsLLlNGhVGFQ..lhEDGTslSlhLlphouhshFlsohhluLsThh.uWsG..........husspsluLaVlhhllshlhLhlYhlhQllLVlpsLp-hWPhu......tlshushhFshGQVhhYshSspICpusp+YlDGhFFuTlhsLLuVhMVYKaWcSITc-DhEhsVu.................... 0 48 94 146 +12106 PF12272 DUF3610 Protein of unknown function (DUF3610) Assefa S, Gavin OL lg7 PFAM-B_3173 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 146 and 160 amino acids in length. There are two conserved sequence motifs: FNN and IDS. 25.00 25.00 55.00 50.50 22.40 21.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -4.57 10 53 2009-08-11 16:00:08 2009-08-11 17:00:08 3 2 17 0 4 53 0 156.20 59 30.09 CHANGED MpupohhFlLlslLI...uLSHuosVcp-css..uPMuM+GFNNSLGTFVEYuGQASlAocDWplpsSFNL-SLhsuIpsFp..plYKsLhDhCchp.pshCP-hh..DlTcFADoILpDGLlcsccsL-a+.....ltRLS.u---hup.hpNsoSsIDSol..NllsVulhE ...MKAIuLsFILlPlLl...ALSHGSPVsR.-AIG..GPMAMRGFNNSLGTFVEYSGQASLAS+DWcLCASFNLESLYsAIhAFN..uVYKslVD.C-cQ.ps.CPEIh.....sITcFA-SILHDGLhDLEsALcaR..........................AGRLs.u.-.DDVupELtMuTSCIDSSI..NVINVuL........ 0 1 1 2 +12107 PF12273 RCR Chitin synthesis regulation, resistance to Congo red Wood V, Coggill P pcc Pfam-B_2935 (release 23.0) Family RCR proteins are ER membrane proteins that regulate chitin deposition in fungal cell walls. Although chitin, a linear polymer of beta-1,4-linked N-acetylglucosamine, constitutes only 2% of the cell wall it plays a vital role in the overall protection of the cell wall against stress, noxious chemicals and osmotic pressure changes. Congo red is a cell wall-disrupting benzidine-type dye extensively used in many cell wall mutant studies that specifically targets chitin in yeast cells and inhibits growth. RCR proteins render the yeasts resistant to Congo red by diminishing the content of chitin in the cell wall [1]. RCR proteins are probably regulating chitin synthase III interact directly with ubiquitin ligase Rsp5, and the VPEY motif is necessary for this, via interaction with the WW domains of Rsp5 [2]. 25.10 25.10 25.10 25.10 25.00 24.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.25 0.71 -3.59 29 193 2009-09-11 15:13:23 2009-08-12 12:50:30 3 4 119 0 142 179 1 129.30 23 66.82 CHANGED RWlhaslhlhslllhhhhhhphNpRRR+p.GhpPhhGTuWhs...PPsY.tQppp................pYst............hsP..sttss.t.s.hGY.Yspputa.ss.p.................h.lt.P.psh........ths.tth..hts..............................PsGPP...P ................RWhhhslh.l.lshllllhhhhph.s+RR++....p...Gh...p.....PhhG..TuWhs........sPsa..tps..pp..........................pYsts.t..st...............thsPt.Ystsss...tts.......hGh.Yst.p.sphtsssps.................................ht.P.....ts....................................................................................................................................... 0 27 69 121 +12108 PF12274 DUF3615 Protein of unknown function (DUF3615) Assefa S lg7 PFAM-B_3306 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is typically between 86 and 97 amino acids in length. There is a conserved FAE sequence motif. There is a single completely conserved residue F that may be functionally important. 21.00 21.00 21.90 21.00 20.90 20.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.74 0.72 -3.65 18 304 2009-08-12 14:43:54 2009-08-12 15:43:54 3 10 8 0 163 247 0 88.00 22 24.14 CHANGED Yspsps.......shpY-Lhclhppshhh-ptth..apHhNFhupp+s...pt.shs.phFFAElc........stpsppVhsCs.lcsscs.....GpCauCtspsh....LcHP ...............................................t.thch.ph.ht..p..h..........p....p..tth.....a.tHlNFhApscs.............s......ss.........pLFFAElp.........................ptppt.th....hCs..lpssps.................stCh.hC..tt.....hhHP.......................................... 0 0 25 96 +12109 PF12275 DUF3616 Protein of unknown function (DUF3616) Assefa S, Gavin OL lg7 PFAM-B_3338 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 335 and 392 amino acids in length. There is a conserved GLRGPV sequence motif. 20.60 20.60 22.80 21.50 20.00 19.60 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.12 0.70 -5.43 18 148 2009-08-12 14:49:12 2009-08-12 15:49:12 3 15 126 0 70 162 13 258.00 25 53.17 CHANGED -hSusshssctpLWl.uuDEs.sslERLshhsss....hhssH.ppFsLu-hh-LPss..st.....EhDlEGL..Al.usGYLWlsGSHSlKRKss+spcsp.cshpcLuclph-sNRtlLuRlPl.spsssh.psspshcstp.......htAthlthpptt.s.LtphLusDPpLusFl...sIPuKDN..GFDIEGLAVps.p....RlhLGLRGPVLR.GaAlllElcsppssss..hLtLtshttpGp...hY+KHFLDLsGLGIRDL...phsGD.DLllLAGPTMc..L-Gsl+la+Wpssss...tp......tstlpphhsLPatpGsD+AEGlshhs......tsssplLlVYDSPstpRcss.ssslhuD ................................................................................uh....t..hhl.stD-...th.plh...t..........h..p....h.h.t...........tt.........DlEuh..sh..tt.......shlahhsSHu.p+pt.c....................p.sR..hlh.ph.l...t..............................................L.hphlttc..tht.hh...........thPs...ccs....G.hsIEGl..s..hss..p....plhlGhR....uP....l....h...............c....G.....hAh.ll.lps.t..ph.p...........htht.h................hchhhLcL.s.G....h.....GIR-l...phs..ss..chh.llAG.P....shs........s..Gs..hplapWsus.s.........t...........t........h.l.....s.......t.t......psEuls.h.......tt..phllh.Dts....................................................................... 0 20 49 63 +12110 PF12276 DUF3617 Protein of unknown function (DUF3617) Assefa S, Gavin OL lg7 PFAM-B_3343 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 155 and 179 amino acids in length. There is a single completely conserved residue C that may be functionally important. 20.70 20.70 21.00 21.10 20.00 20.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.24 0.71 -4.78 62 339 2009-08-12 14:49:20 2009-08-12 15:49:20 3 2 307 0 107 263 20 153.30 24 93.08 CHANGED Mpphhhhhhhhhhh........................sssssssstpl.....csGLWEhsspht.........................................sth.....shptClotppsct.pt.........hspsppsCphpshppsusphshshsCsssth....ssspstssshssssassphshpsssst.t...hshphsh..pu+alG.sCt .......................................................Mpphh.shhhsshh..........................huhssshA.sl.......................pPG..Wchpshph.....................................pth....t.shhs...........p...shssthChTsctscssct........hsssssuCops..hscp..o.ssplhhchsC...sssss...t..upuh..spssthSss..cFshshpusssusutt......tsst.ch..cupalG.sCs........................... 0 26 70 91 +12111 PF12277 DUF3618 Protein of unknown function (DUF3618) Assefa S, Gavin OL lg7 PFAM-B_3350 (release 23.0) Family This domain family is found in bacteria, and is approximately 50 amino acids in length. 21.70 21.70 21.80 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -7.93 0.72 -3.93 61 612 2009-08-12 14:49:29 2009-08-12 15:49:29 3 6 483 0 202 459 7 48.20 30 32.03 CHANGED hscs.ssplcp-I-psRscLupTlDpLss+hs.Pcplsccttsps+spssp ...........t+s.sspIcp-I-psRspLAsTl-pLsp+ss.Pppls....c-stspspshht.t......................... 0 71 147 182 +12112 PF12278 SDP_N Sex determination protein N terminal Assefa S, Gavin OL lg7 PFAM-B_3366 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 168 and 410 amino acids in length. This family is the N terminal end of the sex determination protein of many different animals. It plays a role in the gender determination of around 20% of all animals. 25.00 25.00 34.60 34.40 23.70 23.70 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.22 0.71 -4.62 3 123 2009-08-12 14:53:08 2009-08-12 15:53:08 3 6 14 0 16 123 0 157.10 69 42.76 CHANGED MKRNhSSYhHRDEKFKQLRSEDSESGLRSR...TEDERLQHRREEWhIQQEREREHEKLKKKMILEYELRRtREhchhLSKRS+TKSRSPESQDRNNAS..NTSKTlILFEKLESSDDTSLFRGPEGTQINATELRKIKl-IHRVLPGKPTTTTuEVKRDIINPEDVlLKRRT ....................MKRNhSsYSH+DE+F+QLRsE..DsE..h..sLRSR...TcEERLQHRREtWhIQQEREREHE+LpKKMILEYELRRt...R.....EhpK.......hh.......Sc.......R........S........KSRSP.-.sR.DRsN.s.S..NTSKThILS-KLE..SS..D.shS.LFRGP.EGhQINATELRKIKl-IHRsLPGKsT.......T.TTsElKRDIINPEDVhLpRRT....... 0 5 5 16 +12113 PF12279 DUF3619 Protein of unknown function (DUF3619) Assefa S, Gavin OL lg7 PFAM-B_3353 (release 23.0) Family This protein is found in bacteria. Proteins in this family are about 140 amino acids in length. This protein has two conserved sequence motifs: AAR and DDLP. 21.90 21.90 22.10 22.40 21.80 21.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.54 0.71 -4.07 24 136 2009-08-12 14:53:12 2009-08-12 15:53:12 3 1 134 0 61 127 16 128.70 42 91.66 CHANGED hpEcca.A+clpttLccuss....sLPssls-R..LtAARptALup+Kspss...hhssthuhsGtuutsh............ttsahp+luhshPLhALl.s....GLlsIshapsppphsElA-lDAAlLoDDLPssAYhDpGFhtaLcps ............pEhcF.Ahcl+pALsEsuu....sLPssss-R..LAAARctALA++Ks-ss...lhss.....AhuGsuushh...........sp.tsuhh+RL...u.hshPLhsLl.s.....GLlGIshaps.pRs.sELA-lDAAhLoDDLPlsAYhDpGFstYLpp.s........................................ 0 7 34 48 +12114 PF12280 BSMAP Brain specific membrane anchored protein Assefa S, Gavin OL lg7 PFAM-B_3398 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 285 and 331 amino acids in length. BSMAP has a putative transmembrane domain and is predicted to be a type I membrane glycoprotein. 25.00 25.00 27.30 27.40 21.90 20.50 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.63 0.70 -4.44 11 161 2009-08-12 14:53:24 2009-08-12 15:53:24 3 2 62 0 76 147 0 172.00 43 59.27 CHANGED RGCRLFSICQF.VscspchNsTchEC-uACpEAY.scssEQhACshGCtsQ...spsEh...cpcpLtuL.s+.t..LpslphVpsahsDlhsSAQsaI..oSoWTaYLQsDsGKlVVhQocsplphhs.........p.p..p...pt...pt.t.pss.ttshsphRs.u.p.+s...cpspsu.ht.p..t..tpsFLpChS.......+psGhspWlLsssLhLSVllhLWl ...............................................................RGCRLFSI..CpF..Vs.cu.chNpT+..hECEu.ACsEAY.sppsEQhACphGCtsQ.....P.sEh.............ppcplhsL..sp.+h.L.sLsLlpohhsDhhsSAQSFI..oSoWTaYLQADsGKlVlFQopPplphhs.........php....shpt............p......hh..sh.p.h+p..s.t.hRs...ctppps..................FLpChS.......hp...stWI......LsssLhLSVhVhLWl...................................................................................................................................................................................... 0 14 18 38 +12115 PF12281 DUF3620 Protein of unknown function (DUF3620) Assefa S, Gavin OL lg7 PFAM-B_3384 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 281 and 358 amino acids in length. There are two completely conserved residues (G and P) that may be functionally important. 20.70 20.70 21.50 21.30 19.80 20.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.43 0.70 -4.62 29 119 2009-08-12 14:56:28 2009-08-12 15:56:28 3 5 97 0 66 129 4 203.20 28 62.83 CHANGED sphsucllcAlupuGhFRhtuhllGTtAapsYsuhLGl+l.tssh.tTGDlDlAtt.tplSlsl...scshpsslhsl.Lps.h-hsFpslsphsttttsptpps....ahV-FLTspptssch.....st.hsuh.sssApsLcaLcaLlscP.lcslsltcsG.....lhVplPsPcpaAlHKLI.lus+Rp..tts.sKppKDhtQAshlhchl.spcpsppL.tp.uatcAhsR ................supllpslspsGhaRhtssllGThAatsYtshhGlch..s.sh.tTsDhDlA....phshtl.....tsph..slhpl.Lpp...t.sFc...lsphst.t..t...tspttps.........hhV-FLTsptussp...........p.h.ssh.sssApsLcaLs.aLltpP..lps.sh....ltcsG.....l.l...plPsPtpaAlHKLl.luscRp..t.s.hKptKDhtQAthlhchl.tpph.t...t.s............................................................. 0 23 43 52 +12116 PF12282 H_kinase_N Signal transduction histidine kinase Assefa S, Gavin OL lg7 PFAM-B_3401 (release 23.0) Family This domain is found in bacteria. This domain is about 150 amino acids in length. This domain is found associated with Pfam:PF07568, Pfam:PF08448, Pfam:PF02518. This domain has a single completely conserved residue P that may be functionally important. This family is mostly annotated as a histidine kinase involved in signal transduction but there is little published evidence to support this. 22.90 22.90 25.00 23.00 22.10 20.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.45 0.71 -4.58 64 599 2009-08-17 12:16:09 2009-08-17 13:16:09 3 6 591 3 127 400 134 145.90 30 30.13 CHANGED sLs-ll..tppTsLsss-l-aLcpllu-WQllADLoFADlllhl.........sscus.........hlsVApsRPsTusTsahsDl.VGphs...............tspcPtltcshpsGphsRstcsthtssh....slc....ppslPV+ps......s...cl...IAVls..cc..sslsss+psutLEhsahpsAs ...........lpclhtppTsLs-sDl-aLcplls-WQllADhuhADlllhV....................sccss..........hlsVAps.RP..s..TssolhpsDl.VGphs.....................................................htssc.PhVtcshpouthscs.tp...sthptsh....slc...pshPlppp..........s..cV.....IuVlh....pc....ssltstcpsuphEpsaht..................................... 0 48 97 117 +12117 PF12283 Protein_K Bacteriophage protein K Assefa S, Gavin OL lg7 PFAM-B_3455 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are approximately 60 amino acids in length. This family is a protein expressed by bacteriophages which has an unknown function. There is evidence that it is non-essential for in vivo production of a mature phage. 25.00 25.00 78.10 78.00 19.80 19.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.72 0.72 -4.09 2 51 2009-08-18 10:28:34 2009-08-18 11:28:34 3 1 36 0 0 29 0 55.50 78 100.00 CHANGED Mp.KhhLlhQELLLLsYELNRSGLLsENEcIps.LtpLEhlLLpsLSPSopcusKs MSRKIhLIKQELLLLVYELNRSGLLsENEcIRsILApLE+lLLCcLSPSopcosKs. 0 0 0 0 +12118 PF12284 HoxA13_N Hox protein A13 N terminal Assefa S, Gavin OL lg7 PFAM-B_3464 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 149 and 306 amino acids in length. The family is found in association with Pfam:PF00046. This family is the N terminal of the Hox gene protein involved in formation of the digital arch of the hands and feet as well as in correct genital formation. Mutation of the protein is associated with hand-foot-genital syndrome. 19.90 19.90 20.50 20.20 19.10 19.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.86 0.71 -3.95 12 395 2009-08-19 10:03:12 2009-08-19 11:03:12 3 4 191 0 88 358 0 114.00 42 47.88 CHANGED hD-hsKNMEGFs.GGNF....uAsQCRNLhAHPssLAP..SusYsSSEV.ssuGhuEP.uKQCSPCS.AsQuSu.uASLPYGYF.GuGYYPCRMSHH....sulKSCAQ..s..uSa.u-KYMDTSso.GE-F.oSR ..................................................h..................s.ssh...........sAspC.RsLh.......uH......P.........u......s...hs.s...........u..u.....u......h.....s.....o.......u..p.............s...s.....s...s..u....h....................uE..........s....sKQC.....u..P..Cs...A.s.....Qu..SS...s.A......uLP...YGY..F...GuGYYsC.R.huHp.......sulKSC.uQ.............su...uua.u-KYMDTS....s....s.....s...uE-a.sSR....................................... 2 5 13 40 +12119 PF12285 DUF3621 Protein of unknown function (DUF3621) Assefa S, Gavin OL lg7 PFAM-B_3468 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 49 and 62 amino acids in length. There are two conserved sequence motifs: QPLDLS and EQQ. 25.00 25.00 30.70 30.70 24.60 16.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.60 0.72 -3.81 3 129 2009-08-19 10:07:37 2009-08-19 11:07:37 3 6 13 0 0 96 0 49.00 78 16.18 CHANGED KPCPEPs......TEsQPLDLSQKKEKQoEHEQQVsKPlKsQKNEPQPYSQTYGK ......KssPEPE......sEuQPLDLSQKK.....EKQPEHEQQVVKPhKPQKsEPQPYSQTYGK. 0 0 0 0 +12120 PF12286 DUF3622 Protein of unknown function (DUF3622) Assefa S, Gavin OL lg7 PFAM-B_3490 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 72 and 107 amino acids in length. There is a conserved VSK sequence motif. 20.30 20.30 20.30 21.10 19.30 19.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.21 0.72 -3.67 15 92 2009-08-19 10:16:05 2009-08-19 11:16:05 3 1 91 0 24 68 4 70.60 58 53.13 CHANGED MocsKKa-aRlsppcsuWsAEITR+lTuR+TlVSKpcsGFuoEAEApsWuEpELpuFLpN.tcRNcRKA-p ...MocsKKFshRlopc+sGWsAEIT..RphTSRKTlVSKRcsGFsTEAc....AQsWuEpELtuFlps.s.RNERKucp................. 0 2 7 16 +12121 PF12287 Caprin-1_C Cytoplasmic activation/proliferation-associated protein-1 C term Assefa S, Gavin OL lg7 PFAM-B_3504 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 343 and 708 amino acids in length. This family is the C terminal region of caprin-1. Caprin-1 is a protein involved in regulating cellular proliferation. In mutated phenotypes, the G1 phase of the cell cycle is greatly lengthened, impairing normal proliferation. The C terminal region of caprin-1 contains RGG motifs which are characteristic of RNA binding domains. It is possible that caprin-1 functions through an RNA binding mechanism. 25.00 25.00 25.80 25.80 20.60 19.40 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.29 0.70 -5.05 6 155 2009-08-19 10:17:39 2009-08-19 11:17:39 3 3 40 0 55 136 0 264.70 48 39.45 CHANGED QGsYNFMQDSMLEF-GQsLDPAIVSAQPMKPsQsMDh.QMVCPPsHsESRLuQPsuVPVQPEsTQVPhVSsss-saoooPPlYQPSHTsEPRPQs-slD.IQASMSLsoEQoPssSSLPuAsQPQVh.....KPLHSSGINVNAAPFQSMQTVFNhNAPVPPsNEs-oLK.psQYQuSYuQuFSSQstHsVEQoElQpEQLQoVVGuFHopDQshppus....uuHQQPopQGsGFsRpuQSFYNSRG.MuRGGPRNsRGhlNGYRGsSNGFRGGYDGYRPPFuNTPNoGYG.QsQFST.PRDYSNssYQRDGYQ.NaKRGuGQGsR ....................................................................................................................................QGshNFhQ.............-ShL-F-p.s...........sAIs..SuQ..P.pss...............ssss......p.....tE..ppL.up.sph....p..s.psTp..ss.hsss.s..s...s.h.......s.ss..................h.ps................ocs..t..p...p...tt..s.......-..p..ss...h....s...spp..o....shuSs....P..sh.u..p...QsFQ..........pP...p.S.S.uls..lN..sA.PFQ.uMQT..VFNhNAPlPPhpE...Ehhc.ps.pYpuuYNQSFoo...........ts...........ppspQs..pL............p.....p.pl......Qo..s.....V.......ss.....hcs.....s.....ssp..p...ss....Gsht..PtQsssFPRssQPahNSRG.ssRGssRGuRulhNuYRuP..sGa+..tGaDuYRs.......shs.sss.Nus..Ys.Q.QFsA..R-YSu..Y.QRD..s.aQQsaKRGuspus.s.......................... 0 3 7 19 +12122 PF12288 CsoS2_M Carboxysome shell peptide mid-region Assefa S, Gavin OL lg7 PFAM-B_3507 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 430 amino acids in length. This family is annotated frequently as a carboxysome shell peptide, however there is little publication to confirm this. 20.00 20.00 21.10 20.00 16.80 19.90 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.81 0.70 -5.31 13 104 2009-08-19 10:28:48 2009-08-19 11:28:48 3 3 69 0 42 110 359 315.70 37 60.72 CHANGED NsSRAlVLARREA.SK+GKoA.......upQsoouAuVARQuNPDlSSRElAQ+VRELRSKsGAs....ssppsGsoRPsGPNRs.....GuKQ.uuAuDAHWKVGtSETsoGQsVTGTQANR.ShKTTGNEASTCRoITGTEYLGAElFpoFCQ.SpPthtQPuKVsVTsTSHGN+VTG.NEVGRSEKVTGDEPGTCKslTGTEYISANQSssYCGs.sssSP+KVGpShTpsGppVSGVhVGRSp+VTGsEuGus+pLTGDQYLGu-P.s.sGRuspKVuShpTL+GsGVTGTsVGRo-pVTGsEsGSC+pVTGDEYIGuQQYcuFCus+PpPEAsKVGhSlTN+uQhVSGTpTGRSphVTGDEPGTCKAVTGTPYAGLEQuspaCsssulp-l+pRTPpphuTsu.sphTGlQPGIGGVMTGA-KGACEslTGTPYVGuDQhutAC ................................sR.hs..hRtA.stttt.t......................ths..........t.......t........h.ss+phu.t.hRt..s..Gt.......tt....s+Psu..+t..................................h..KVt.upT.tGphVTGTpspc.s.+sTGsEsuoCRslTGTEYhG..sE.hp.p.hCp.ttPt...tP...+..VtlotTsp..uppVoG.splGRSp+VTGDE.GsC+tlTGTEYluupp...hC....s..s......t.s.t.plh.s.ottsp...loGs..tps..........ttVTGsE.....Gs.spplTGspYht.............t................tup...s.....stK.Vu..pTlpGps...VTGT.Vu+usplTGsE.GuC+.VTGspYluspphpshC...ss.pP..s.p...stKVuhshT.csp.VoGTtsGRSttVTGsEsGsCpslTGTsYh.G..ppht..thCss....tt.t...h...t........h.oG..p..G.ss...hTGstpGtsp.loGo.Y.G.tt..................................................... 0 11 25 38 +12123 PF12289 Rotavirus_VP1 Rotavirus VP1 structural protein Assefa S, Gavin OL lg7 PFAM-B_3516 (release 23.0) Family This domain family is found in viruses, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF02123. VP1 is a structural protein of the inner core layer of the rotavirus virion. It complexes with VP2 and Vp3 to form this layer. 19.40 19.40 20.80 35.70 17.50 15.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.57 0.72 -4.34 2 470 2009-08-19 10:32:54 2009-08-19 11:32:54 3 2 231 10 0 377 0 51.00 94 5.17 CHANGED ScGItlTEKAKLNSYAPlhLEKRRsQhShhlohLQpPsoFKSpp.lTINDlL ..SKGIAVTEKAKLNSYAPVYLEKRRAQISALLTMLQKPVSFKSNK.ITINDIL. 2 0 0 0 +12124 PF12290 DUF3802 topoisom_IIA_B; Topoisom_IIA_B; Protein of unknown function (DUF3802) Assefa S, Gavin OL lg7 PFAM-B_3547 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 114 and 143 amino acids in length. There is a conserved KNLFD sequence motif. 20.80 20.80 21.00 94.70 20.60 20.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.39 0.71 -3.77 17 157 2009-12-04 10:35:14 2009-08-19 11:36:19 3 1 156 0 31 85 9 112.40 60 96.12 CHANGED MVs-pDGYhpLIpYLT-pLuLFE.tspssssus-TVh-lhE-plAsplhhVCsQNspLshspR.hllRElDAIVhDLEElLuuVhsppsTsEQhtFIsEFsuLlKNLFDptluch MVVETDGYLALIEHLo.NhslFs...ppGDTGsEoVEDVlTDMlASNIMuIFEQNPELHSSVRFpLLKEADuVVtDLGEVLAGVWt+sATNEQIuFLDEYIuLVKNLFDoAVAKY. 0 4 9 20 +12125 PF12291 DUF3623 Protein of unknown function (DUF3623) Assefa S, Gavin OL lg7 PFAM-B_3578 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 261 and 345 amino acids in length. 18.90 18.90 19.70 19.40 18.40 18.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.72 0.70 -5.42 30 93 2009-08-19 10:44:53 2009-08-19 11:44:53 3 4 86 0 33 90 126 256.20 37 85.67 CHANGED MsshhlssLhAlFlWWFSTGllLhll+h.sc+....sathuhlhussllshGlhGlssotsssoluuuYlAFhuALhlWGWhEluFLsGhlTGPppssCPssspGhcRFhpAhtTlhaHElsLlssslslhhlohGusNphGlWTFhlLahhRlSAKLNLFLGV.plNsEFLPstLtaLtSaF+ppshshlFPlSVoshTsssshhhppAhss.ssstptlGasLLusLsALAlLEHWFhVLPLPstpLWpWhLPutpssspsst ........M.shhlssLhAlhlWWFSTGhlLhLsph.sc+.....sathohlhusslhshulhGlssoupssossuAYhAFssulhlWGWhEhuFLsGhlTGP+ppsCPssspuhpR.FhtAltThl.aHElullsssshlhhlohs..usNplGhWTFhlLahhRhSAKLNLFLGV.slNs-aLPppLtaLpSYFp+cshNhlFPlSVTlsThssshhhppAhss.ssshptlGhsLLusLhALAlLEHWFhlLPLPsttLWpWsl.sptt.....s....... 0 8 20 23 +12126 PF12292 DUF3624 Protein of unknown function (DUF3624) Assefa S, Gavin OL lg7 PFAM-B_3603 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There is a conserved GRC sequence motif. 21.30 21.30 36.80 36.80 20.60 19.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.66 0.72 -3.58 18 129 2009-08-19 10:46:14 2009-08-19 11:46:14 3 1 129 0 25 82 4 77.20 60 92.95 CHANGED MuCpsC.....ppshFppKIGRCpRCMhQ..LTlLSlluWslWaahatDsPpslpSIALLhhshAFoGLLhLHL....llhsaRphps ..MACs-C.....ppchFWpKIGRCpRCMDQ..LTVLSVlsWIlWaauF+-cPtSIESIuLIhAGFAFNGLLFLHLWh+aVILPWR+Rp.G............. 0 2 7 16 +12127 PF12293 DUF3625 Protein of unknown function (DUF3625) Assefa S, Gavin OL lg7 PFAM-B_3621 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 327 and 360 amino acids in length. 18.30 18.30 18.50 18.70 18.20 17.90 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.41 0.70 -5.25 15 185 2009-08-19 10:51:41 2009-08-19 11:51:41 3 4 145 0 18 138 16 235.40 47 72.93 CHANGED PLoP-Q.IcpLRphh--sp+Atutssss...ssPRlSSlsVNLSPGAo.PllRsusspsoolsFoDuTGuPWsluAssl............suss....ssFslpahtsSsllslQAtp.YssGNloVhLcGLssPVllsLpoGc......sptVDhRlDLRlPtRGPsAps.h..shspspluhhsssLpuFLDGlPPcc..A+RLKssGs...-spsWths-cLYlRTphs.lpstFcpohSSsDGT+lY+LPhT.PhVshShpGpsssLpI-h .....................Lss-p.I+pLRplhs-s-+Ahsushso...ssPRlo.SloVNLSPG.A.SlPllRsuhs.lSsloFsD.sGuPWP.ussPh.............................susP....phFsl..Q.a........s.....sNhlslpsh+sausGNloVaL+GLusPVlLslsSG-sss............s.opphDuRLDLRlPppGPsuss..s...uhs...............ss........+.......luLaDssLQuFLDG..lPPc-..s+RLKhoGss..sDTplWQp.GD....-....LhVRoRsh.LpspF-pTloSA....DGTHla+LPsT..PlLoFS.l..sGpslplpsE.h..................... 1 8 11 14 +12128 PF12294 DUF3626 Protein of unknown function (DUF3626) Assefa S, Gavin OL lg7 PFAM-B_3627 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 294 and 374 amino acids in length. 20.10 20.10 20.60 20.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.89 0.70 -5.38 7 296 2009-08-19 10:53:28 2009-08-19 11:53:28 3 4 251 0 60 210 3 194.40 35 57.46 CHANGED RlALHFHPDRlDs+GhoVsEGLLc-GlY+sQFETplSNGplSs..GGsRDaWEppLFGsuYp....shcpRPKYGAL-LhhhPsGPAPRFGSCYhlh+spVhuRsTFoahDSa..psP+-KGTlppFDslLAALLsEoFpRpaA...........LGppslpPspllc+LsppLppsh.sth-tssutNLDHYIEAQIHGslSLscDlthLVADPSF+GTsIGc.LpplCc+YtIsLpWHsGhpLsVpcVPsDFRGsuMPsLApRI.....AhstslDAhhIGtAAtcLp+pPppWS-RGshtcshp-LKhLWHVLV+a .......................................................................................................................................................sspRPhYuALshh..t.sGsA..s..taGpsaF.LsspV......pspsTao.hDoa......p.sh-puphpp..F.h.........h.p.hA............spss.hhs......hphh....ppLht.....hpt.hh.......h.hs.p....ss.t..spYIEApIHG-lsLhcDlctl...l.-sSa.pto.....plht.........................................................................................................................h............................ 1 20 37 49 +12129 PF12295 Symplekin_C Symplekin tight junction protein C terminal Assefa S, Gavin OL lg7 PFAM-B_3634 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 180 amino acids in length. There is a single completely conserved residue P that may be functionally important. Symplekn has been localized, by light and electron microscopy, to the plaque associated with the cytoplasmic face of the tight junction-containing zone (zonula occludens) of polar epithelial cells and of Sertoli cells of testis. However, both the mRNA and the protein can also be detected in a wide range of cell types that do not form tight junctions. Careful analyses have revealed that the protein occurs in all these diverse cells in the nucleoplasm, and only in those cells forming tight junctions is it recruited, partly but specifically, to the plaque structure of the zonula occludens. 25.00 25.00 36.20 31.70 24.40 21.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.17 0.71 -4.37 34 216 2009-08-19 10:55:50 2009-08-19 11:55:50 3 7 161 0 151 211 2 171.50 39 15.68 CHANGED Dlchll.PllstLcKc-l.hpllPcllph.......s.phhppsls+lLp..............stspts.slossElLlhlHsl........sshKpl.lcAhshCFspps........h..asp-llutsLspls-p.....ssl..PhLahRTllQshptaPsLpsFlh.plLs+Ll.pKplWcp.splWcGFl+CspphtP.....pSatlllpLPstplpphlpch. .......................................................Dl+hLIPllsuLpKcEl.lthLP+llpL.......s..hlcpshsRlLt........................tsspshssLsPsElLlA.lHsI.....-stc..ssh..........Kpl...lcAsshCFsp+s..........laTp-VLusshpQLh-p..........sPLPhLhMRTVIQuls.....haP.pLssFVh.slLsRLl.hKQ.............lW+....splWcGFl+CspphpP.......pSatl.lL.Q.LPstQLttshpp............................................ 0 65 92 126 +12130 PF12296 HsbA Hydrophobic surface binding protein A Assefa S, Gavin OL lg7 PFAM-B_3635 (release 23.0) Family This protein is found in eukaryotes. Proteins in this family are typically between 171 to 275 amino acids in length. Although the HsbA amino acid sequence suggests that HsbA may be hydrophilic, HsbA adsorbed to hydrophobic PBSA (Polybutylene succinate-co-adipate) surfaces in the presence of NaCl or CaCl2. When HsbA was adsorbed on the hydrophobic PBSA surfaces, it promoted PBSA degradation via the CutL1 polyesterase. CutL1 interacts directly with HsbA attached to the hydrophobic QCM electrode surface. These results suggest that when HsbA is adsorbed onto the PBSA surface, it recruits CutL1, and that when CutL1 is accumulated on the PBSA surface, it stimulates PBSA degradation. 22.40 22.40 22.40 23.40 22.20 22.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.36 0.71 -4.27 71 311 2009-08-19 10:58:29 2009-08-19 11:58:29 3 9 66 1 199 305 0 119.30 19 53.41 CHANGED +cssslhsslss.IssplsslssslpsYs.....GG.......shslhsstsslhsslppuhpssps.s..ssloss-uhslhsslt...sLpspltsslssllsKKsthss.......uGhushlhssLpp.pssssshusslpsK ...............................thsshhpslss.lssplp..plssslp.sas.....Gu........sh.tlhssssslhsslppu.hpslps..s...ssL.ost-uhslhss.lp...sLp.splpsslssllsKKs.hsp.......sGhsshlhtsLpp.psssppLuptl.tK............................... 0 28 86 153 +12131 PF12297 EVC2_like Ellis van Creveld protein 2 like protein Assefa S, Gavin OL lg7 PFAM-B_3668 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 571 and 1310 amino acids in length. There are two conserved sequence motifs: LPA and ELH. EVC2 is implicated in Ellis van Creveld chondrodysplastic dwarfism in humans. Mutations in this protein can give rise to this congenital condition. LIMBIN is a protein which shares around 80% sequence homology with EVC2 and it is implicated in a similar condition in bovine chondrodysplastic dwarfism. 25.00 25.00 50.80 30.80 20.60 19.70 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.23 0.70 -5.84 5 63 2009-08-19 11:04:06 2009-08-19 12:04:06 3 2 35 0 29 54 0 376.80 49 36.14 CHANGED GEpauVSYTApL-u+cltsGES...LcLPApLTFpSuSQNcTs...QL..........ossFTITlEEpspVhPNHGlHAAGFllAFLVSLVLTsAuhFlLsRTpsLpsshLoRsRpp+cE....SKLEsusFoSu-sluEDluhNDQllDILThEEPGsMLQALE-LEIATLsRADADLEAsRhQIsKDlIAlLLKNLlSuGHLSPQlE+RMuulFKKQFLsLEsElQEEY-RKMlALTAECsLEoRKKMEuQ+QREpsApEEAEELlK+ssE+sAlECSsLL-pLHuLEQE+Lp+pLtLcQEE-FAKA+RQLAVaQRsELHsIFaTQIcsAspKGEL+.EsAKpLlc-YSKsQ-slEELMDFhQAoKRYHLu+RFAaREYLV+sLQupEo+lpuLLNTAATQLssLIpKhERAGaLcE-Qh-MLLERAQsElhSlKQKLDcDLK ..........G-tasVsYsApLputclh.sG.Es...LpLPApLTFpSsSpNco.......pL..........pAhholsspEphpVhPsHGlHuAGFhlAFllSlVLThsshF.hhs+hpslptshls..cpp...h.p.-........sKL..E....FsSu-uVsED..luhsDQhlDILo.E-PGsMlQALE-LE.....lATLsRAD.....ucLEAsRhQIsKD..lIulLLK.sLsSs.GHLSsQsE++huulFKKQFLhhEsElQ.EEY-RKMlALTAECsLEoRKK.hEsQaQREhsAhEEAEELhK+.suEcsAhECs.sLLcpLHtLEQ-cLp+pLhlppEEshAKApRQLAla........pRsELHpIFasQlcsAlhpGEL+.EsAKhLlpcYuchQ..-slEELMDFhQAsKRYHLu+RFuaREYLVpslQS.-o+lpuLLSoAusQLspLIpKpERAGaLcE-Qh-hLLERAQsElhSlKQKLDpDLK................... 0 7 7 12 +12132 PF12298 Bot1p Eukaryotic mitochondrial regulator protein Assefa S, Gavin OL lg7 PFAM-B_3672 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 168 and 381 amino acids in length. Bot1p localizes to the mitochondria in live cells and cofractionates with purified mitochondrial ribosomes. Bot1p has a novel function in the control of cell respiration by acting on the mitochondrial protein synthesis machinery. Observations also indicate that in fission yeast, alterations of mitochondrial function are linked to changes in cell cycle and cell morphology control mechanisms. 21.90 21.90 22.10 22.00 21.50 21.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.94 0.71 -4.32 23 175 2012-10-04 14:01:12 2009-08-19 13:06:09 3 4 149 0 139 193 1 150.20 25 45.85 CHANGED slpPFP.NPphhoshllSE-L+pcIaccltppuh..SlppVSpcaGlcl.RV-AlV+LhclE+cW.......pschps.hpsaucslhcMhPh..........htsptthtcE.................sls-lPV.ptshsphFhsluEScsFs.lDAA+lhph..tPAs-php+hpshstctpstpp..tpp.pp...hchlhsptpcuE ...............t..pPFP.Ns.hhop.lLS-ph+ppIacplh........ppsh.............olctlStcaslchpRVtAll+LhplEcpa............tpptp.hhhshscshhpMhPh...........tst..h.tE................................................sls-l.s.ph.....s..phFhslsESc.Fs..DAu+hht....ss..t....p.ht.....t..tt.......................p................................................................................................. 0 51 86 122 +12133 PF12299 DUF3627 Protein of unknown function (DUF3627) Assefa S, Gavin OL lg7 PFAM-B_3687 (release 23.0) Family This domain family is found in bacteria and viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF02498. 20.90 20.90 20.90 22.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.74 0.72 -3.94 56 290 2009-08-19 12:19:57 2009-08-19 13:19:57 3 9 69 0 2 291 1 90.20 23 28.40 CHANGED hhp+h.ph.spcslthPsssppp.pLslhpp......s.spathlpuQcpplcpphp+hp......shsllh-.shcPNshsshNplp-pl....ppc..phchp+p .................h..p+hs.h.sp-tlhhPs-spppppLslhpp..............tsspashl+sQcpthcpphc+hp......stcllhc.shpPNshshhNplK-pL....tpc..phphpt................. 0 0 0 1 +12134 PF12300 DUF3628 Protein of unknown function (DUF3628) Assefa S, Gavin OL lg7 PFAM-B_3692 (release 23.0) Family This domain family is found in bacteria, and is typically between 153 and 183 amino acids in length. The family is found in association with Pfam:PF00270, Pfam:PF00271. 25.00 25.00 110.40 109.60 20.40 19.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.37 0.71 -4.24 3 35 2009-08-19 12:23:46 2009-08-19 13:23:46 3 1 34 0 8 33 1 174.10 59 30.83 CHANGED EQKIPVEPVTsELLTPLPRPER.AsstGEEu-DssGDSVGsIFREAREsRAA-EtRRGGGRSGPG..uRSGust.G+GsRDG.cGG-GRsRsP.RKPRlEGEpDuAAsuAsTPVsuAuAscsP........ulsAA-GE+sPRKRRRRR+GRPVEGAEuV.sus..sssAuPhpPsQVVAKPVRsAAKoGupPSLL .EQKIPVEPVTuELLTPLPRssR.ssstGEEA---.sGDSVGsIFREAREQRAA-EpRRGGGRSGPG...uRSGu...GtGRR-....G..sGu-G+sRs..RKPR.VEG....Et..s.uAAsustsPVsuAuAs.psP.........usssA-GERsPRKRRRRRpGRPVE..GAEsu.Aus..sssAuPhpPsQVVApPVRsAAKsuupPSLL........................... 0 1 4 6 +12135 PF12301 CD99L2 CD99 antigen like protein 2 Assefa S, Gavin OL lg7 PFAM-B_3714 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 165 and 237 amino acids in length. CD99L2 and CD99 are involved in trans-endothelial migration of neutrophils in vitro and in the recruitment of neutrophils into inflamed peritoneum. 23.50 23.50 23.70 23.70 21.90 23.40 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.23 0.71 -4.57 13 175 2009-09-13 06:10:09 2009-08-19 13:25:43 3 7 56 0 65 166 0 141.50 29 62.51 CHANGED FDLuDAh......................................ccssssss..........pcPssPKKPusG...............t-FDLtDAlssssshs.t.+sss+spspst............uGuhSDsDLtDssspsshpsDtstsGuus................................sscsstpppstsGhIAGIVuAVusAllGAlSSaluYQKKKLCF........ppu.st.........+ssssEP ...................................................................................................................sppst........................ps.hssp.tPsss................ssFD..Lt.DAlssps.s..............t.s...........................sp.p.................suGsauDpDLtDhhtssth+scts..tussst...t..............................ppss..sttsstsGhIuGls.uAlshAllGAlSSYluYQpKK.hCF..........tt.............................................. 0 5 10 25 +12136 PF12302 DUF3629 Protein of unknown function (DUF3629) Assefa S, Gavin OL lg7 PFAM-B_3723 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 256 and 292 amino acids in length. 25.00 25.00 28.20 25.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.73 0.70 -5.00 3 44 2009-08-19 12:28:29 2009-08-19 13:28:29 3 3 6 0 2 44 0 239.70 74 86.97 CHANGED NSWKNSGFElRSGVAuACRLPKCPTLQRIPTN+DVNcK.EKPYSTFKIDHD..RIQ-MWQRSRIGAKEQNSLRGYQPANSDVNTDS+EVISALDKVRRIEERRKIQNKMRALMEEQHM+DERIQEEpDRVIRKEVLVERDPARKQTspEYSPVRNREKCKPsoWVSATVLAATGKMQQHRYMQTPP.KhNPIKVEAEVQpPSKSVP-NPAoGTTSGQPNQT+T..DMoSRAKWAKSlLELHKTRAKEMDDILSLsGYSN ..........................NSW+pStFt.RsGlAusscLPpCPTLQRIPTNcDVNcK.-pPYSTFKIDHD..RlQpMWQ+SRIGAKEQNSLRGYQPANSDVNTDScEVISALDKVRRIEERRKIQNKMRALMEEQHM+DERIQEEpDRVIRKEVLVERDPARKQTspEYSPVRNREpCKPsoWVSATVLAATGKMQQHRYMQTPP.KhNPIKVEstsQpPspSs.tpsAssTTSGQPNpTpt..sMspRscWsKuhLELHKpRAccMDclhSLtGYSN........................... 0 1 1 1 +12138 PF12304 BCLP Beta-casein like protein Assefa S, Gavin OL lg7 PFAM-B_3744 (release 23.0) Family This protein is found in eukaryotes. Proteins in this family are typically between 216 to 240 amino acids in length. This protein has two conserved sequence motifs: VLR and TRIY. BCLP is associated with cell morphology and a regulation of growth pattern of tumor. It is found in adenocarcinomas of uterine cervical tissues. 21.00 21.00 21.10 32.60 20.60 20.90 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.26 0.71 -4.87 9 123 2009-08-19 12:34:45 2009-08-19 13:34:45 3 2 38 0 53 105 1 160.80 47 83.93 CHANGED MKhGLsLllVGHVNFlLGAlVHGsVLRHlus.ppulshpYsluNlluVsSGlLuIhsGIsAIlhSRNLtphsLpWsLLVlSllsuLLSsACslGLllulshTluspG+sLlusCs..hsss....cphslus-CPFDPTRIYuToLsLWsPslLhSssEushSsRChhlshplhtLtPCtccthptphppts- .............M+sGLsLlllGHVsFlhuAllHGoVLRaVusspsAls.pYsssNllSVsSullslssGlsAllhSR.L.....p..L+WslhshulhssLLShsCulGLLhululThAstG+tLlusCp..h.ss......c.hshss-CPFDPTRIYsosLsLWh.ullhsssEsshushCs.hshpLhtltPhhtct.pt.h.....p........ 0 2 7 22 +12139 PF12305 DUF3630 Protein of unknown function (DUF3630) Assefa S, Gavin OL lg7 PFAM-B_3766 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a single completely conserved residue D that may be functionally important. 25.00 25.00 27.60 26.90 19.20 19.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.18 0.72 -4.10 23 171 2009-08-19 12:35:54 2009-08-19 13:35:54 3 1 171 0 37 112 8 94.20 45 93.84 CHANGED hplsphphscssL.Lshsu.lDF-sFtthAEsLlttLsspVlE+QWuADRHpWLL-FEGspLhLpaEaYu-hsWLsstpts-.hEVLsaLAsLhpp ....................................FuLs-YhsscG+Lllpsps.FDF-sFsphAppLlslLuApVlEKQhDADlHoWLlDFEGspLhL+....uEHYSpulWLEsLsss.p.ucEpLsalApLhp............................. 0 7 13 26 +12140 PF12306 PixA Inclusion body protein Assefa S, Gavin OL lg7 PFAM-B_3786 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 173 and 191 amino acids in length. PixA is thought to be specifically produced in Xenorhabdus nematophila. It is an inclusion body protein. 20.80 20.80 22.30 21.60 20.10 19.50 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.19 0.71 -4.41 33 128 2009-08-19 12:40:52 2009-08-19 13:40:52 3 2 61 0 31 100 3 167.10 25 93.80 CHANGED hsphIDlllslDs-p....Ihp.......thsphSpsssp..PTsI.s..ppt.......lYhlsspsphh............supusucLplpsplGDslRWRsoSlot.ss-hslllYchhthss.......hhs......hpsshhphsssspP..sPss......sshsspppshp.haapusltspGp.tsathsFulasp.....pppLhGYapWDPhIslt .....................phlDlllslDs-t.Ihp.........ph.sssSpsPst...PTsl.s.....pct..........lYhlssss.hs..................supusucLplpAslGDhlpapsTSlst.su-pusllY..phhttsu.......h.s......hsssh.phssu.spP..sPss.....ppshsstppshp.hhapupVtp...sGp...tsathsFulash..................ppsLhGYahWDshIpl................................. 0 4 9 16 +12141 PF12307 DUF3631 Protein of unknown function (DUF3631) Assefa S, Gavin OL lg7 PFAM-B_3787 (release 23.0) Family This protein is found in bacteria. Proteins in this family are typically between 180 to 701 amino acids in length. 20.80 20.80 20.80 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.46 0.71 -4.38 22 155 2009-08-19 12:47:29 2009-08-19 13:47:29 3 10 132 0 41 145 32 176.80 39 38.04 CHANGED +MRRRsssEpVtsaRhRpststhpsL+p+LspWhpsphsplcs.Ac.........PshP......t.lpDRAADsWEPLlAVADhAGGsWPp+ARtAulslss.ptspstt....sShsscLLtDlR.ssFtt.tst...ptlsos-LLstLpt.pEuPWus.hs.spsLssppLuphLpc.Y......sI.....cSpslRh..susp.sKGYp+...ssFtDAWsRY .............................+MRRRtssEpV.csaR.RhstsphctL+-+LAsWssshtstlpp.uh...............PsMPtsVpDRsADsWEPLlAVADsA.GGcWPcpARsAshphss...u.scstp.....sSlGlpLLpDIR.clF.....sst.........-ph.Tu-lLstLpp.-EuPWus.hp..GcsLssRtLuphLtc.Y......uI.....psp.h+...uups.hKGYpp.........spFtDAWsRY............................................................................. 0 13 30 35 +12142 PF12308 Noelin-1 Neurogenesis glycoprotein Assefa S, Gavin OL lg7 PFAM-B_3817 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF02191. There are two conserved sequence motifs: SAQ and VQN. Noelin-1 is a glycoprotein which is secreted mainly by postmitotic neurogenic tissues in the developing central and peripheral nervous systems, first appearing after neural tube closure. It is likely that it forms large multimeric complexes.It has a divergent function in neurogenesis. In animal caps neuralized by expression of noggin, co-expression of Noelin-1 causes expression of neuronal differentiation markers several stages before neurogenesis normally occurs in this tissue. Finally, only secreted forms of the protein can activate sensory marker expression, while all forms of the protein can induce early neurogenesis. 25.00 25.00 49.30 49.00 23.20 21.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.33 0.72 -4.21 8 216 2009-08-19 12:51:08 2009-08-19 13:51:08 3 4 46 0 85 186 0 96.70 65 23.41 CHANGED asuPEEGWQVYSSAQDsDGRClCTVVAPsQshCSRDsRSRQLRQLhEKVQNlSQShEVLDLRTtRDLQYVRshEslhKsL-sKL+ssp-s.+sLsAKuaQE ...........hsPEEuWQVYSSAQDs-GRCICTVVAPpQshCSRDARo+QLRQLLE.K.VQNMSQSIEVL-hRTQRDLQYVc+MEsQMKGL-uKF+QlE-s++phhu+pFp..................... 0 5 16 38 +12143 PF12309 KBP_C KIF-1 binding protein C terminal Assefa S, Gavin OL lg7 PFAM-B_3821 (release 23.0) Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 365 and 621 amino acids in length. There is a conserved LLP sequence motif. KBP is a binding partner for KIF1Balpha that is a regulator of its transport function and thus represents a type of kinesin interacting protein. 20.10 20.10 20.30 20.50 19.90 20.00 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.16 0.70 -5.49 21 192 2009-09-12 21:52:06 2009-08-19 13:56:09 3 6 137 0 122 185 3 298.00 31 58.76 CHANGED .h-WAlNAATLSpaYhuptpatpARHpLuAAshlh........sp..p.........t...stptct-pp..............-shcpphAclApsWsKYsLplLpsup.p.....hhc-s.sc.t.php.phpthp........h.....tp...................c..................h.tl........c.ltuhEsplssphshsF--A+tlFhhuppalscAK-aaph-GasT-HlcIlQDhupLa+tLAFFE.psh-.................ppsKMHKRRschLEslhs-LNspaYhtlsRplhaElucsat-Mh-l...................+lt.tc....stptsss+slpK.lNplsppulcaappFl-oh+..............pss.tchPcp..l-pD.h+PhhhA+F+lARLatKllos-spppl-slssSLphYchhscaspppsp...stt.h.psElpls+EMlpLLPhKlpclp ................................................................................................................................-auhNshpLupaahs......pa..put..ppLsuuphlh.....................................................................................t................p.hppt.uplshsah+atl.hhptsp.h........................htp.p.t...p.t.t.......thtt.......................................................................................................................h..h................lts.hcp.pl.sh.hhshsFc-ARplFhhupcalp......cAKcaapl..........-uasT-HlcllQDhStLa+hLAFFE.psh-.................+psKMHKRRlshLEs...lh.s.pLN....s.paYh....h....l.....sRp.l.aElucsah-hh-l...............................+lthhp....p......ppsssch...lpK....lNplsppulcaaphFlcohp.....................................psp..t.h.tp...htp-.hcshhhAhFplu+lh.Kl.hss.sstpplcpltpulphYphh.t....hht.................h......hth..ph..h...p.............................................................................. 0 47 57 98 +12144 PF12310 Elf-1_N Transcription factor protein N terminal Assefa S, Gavin OL lg7 PFAM-B_3835 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00178. There is a conserved PAVIVE sequence motif. Elf-1 is an immune cell specific transcription factor. It is found in T cells, B cells, megakaryocytes,and mast cells and is involved in the control of transcription for various immune proteins. These include IL-2, GM-CSF, IL-5, IL-2 receptor alpha chain, and CD4 in T cells, IgH, blk, and lyn in B cells, TdT in T and B cells, IL-3 in megakaryocytes, and SCL and Fc-epsilon-RI alpha chain in mast cells. 22.90 22.90 23.90 22.90 20.50 19.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.56 0.72 -4.11 11 183 2009-08-19 12:57:33 2009-08-19 13:57:33 3 4 40 0 81 167 0 91.20 44 17.08 CHANGED MusslQss-LlFEFAS......ssh.-ctpQhs-sssaPAVIVEpVPpAcLhphYuG..Lsh--p.shhhpDss...tEcpl.....-slhLo.......spsp.ssscpT.cTlpAAEALLsM-SP .................shslp..st.lhEhsS................ssh.-p.pp.t....csu.aPAVIVE.VPsuc...l...p.Yuu.....Lshs-t.s.hhp-ss....tEp.pl.p...ssh.s.lp.................spsps......ssscsTscTIEAAEALLpM-SP...................................... 0 5 12 36 +12145 PF12311 DUF3632 Protein of unknown function (DUF3632) Assefa S, Gavin OL lg7 PFAM-B_3839 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 170 amino acids in length. There is a conserved ALE sequence motif. 20.70 20.70 22.50 22.40 20.20 20.40 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.35 0.71 -4.48 45 228 2009-08-19 12:58:27 2009-08-19 13:58:27 3 6 62 0 179 234 0 178.10 17 56.69 CHANGED WphlhphApphshsp.t.........Q-+LVplltplpphssh......................t.hhspptplWp..-LPhhu.......php-tWs................ssssspptpcalNhsuFhA+Lsupshssh.............shaulhsl+suLE..................c.sss.phpstls...sAuhWlhhuGpplhphstth.t.t..tt.st...............hpup...sshshcRWphWccRh ............................................................................................a..hhphupths.pp...........pscLlphlhtlpphssh................................................hh.pttplWp......-.LP.hhs.......thp-h.at...............................................tttsstptppahNhsuFhA+Lhstshhsh..............shhulhhh+puLE................................................................p..ss..th..sshl.sAstWlh..hu..uptlhp.s...tt.t...t.......t........................ts....tsh...s.pRW.hWppph.............................................. 0 22 73 140 +12146 PF12312 NeA_P2 NeA_polyprotein; Nepovirus subgroup A polyprotein Assefa S, Gavin OL lg7 PFAM-B_3843 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 259 and 1110 amino acids in length. The family is found in association with Pfam:PF03688, Pfam:PF03689, Pfam:PF03391. This family is one of the polyproteins expressed by Nepoviruses in subgroup A. 25.00 25.00 159.40 25.30 19.80 19.40 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.88 0.70 -5.15 4 141 2009-08-19 13:02:51 2009-08-19 14:02:51 3 6 6 0 0 143 0 229.50 76 29.65 CHANGED MsKFYaSsRRLssashups.+..GohEQWLtshE.DsuFRtpVKpcV.hsRth.ss.RhFpa.lGhGPl-sPthhsWtallpG.....s..........PTpPVKAcEVVssPQPK.VVIPSPPPsPp.PYFRsVGAFAPTRSGFIRATVERLoREREESRAAALFAELPLEYPQGAPL+Lolu.KFAMl+...HTTWR+WYDTSDERlhcsHPGGPsL...PPPP.IQpPPSFpERlpphCphpsCuRAFALETSLGLNhAWVshVsIPSISlCC .........................MsKFYaSsRRLAsas.ups.HLGGSVEQWLAAIpTDPSFRQTVKEDVQtNRpcPsAIRMFSWKlGaGPIDNPppCsWHFVLTGE..RPsQ...........PocPVKAcEVVVsPQ.........sK.KVVIPSPPPsPs.PYFRsVGAFAPTRSGFIRATVERLTREREESRAAALFAELPLEYPQGAPLKLSLAMKFAMLK...HTTWRKWYDTSDERLhcAHPGGPCL...PPPPPIQpP.PSFpERVREhCRhKSCARAFALETSLGLN+AWVGlVDIPSTSVCC.. 0 0 0 0 +12147 PF12313 NPR1_like_C NPR1/NIM1 like defence protein C terminal Assefa S, Gavin OL lg7 PFAM-B_3871 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 251 and 588 amino acids in length. The family is found in association with Pfam:PF00023, Pfam:PF00651. There are two conserved sequence motifs: LENRV and DLN. NPR1 (NIM1) is a defence protein in many plant species. 25.00 25.00 33.80 33.20 19.80 19.20 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.27 0.70 -4.90 21 173 2009-08-19 13:19:32 2009-08-19 14:19:32 3 15 50 0 40 169 0 153.10 51 34.03 CHANGED +RLTRstDYpspTEpGp-osKsRLCI-ILEp-..hR.....sPhsu.-sSso..shhuD.DL+M+LLYLENRVAhARLLFPtEA+lAM-IAps-uTuEFssh.s...spsosuphppVDLNETPhh.pc+hhsRhcALoKT............VEhG+RaFP+CS-VLDKhh....pDDLs...DlhaL-+sTsEEpp.KRhRahELp-sVpKAFoKDKt.Ehs+SuhSSSSSSo ..............+RLTRstDYpspoEpGpcusKDRlCI-lLEpE..h+.....p...h..p.s..s..hhs-.phphcLL.LENRVuhA+lhFPhEAchsMpIAphcsT.EFsh...s....t..sttphppsDLN.sPhh.....pcchhsRhpAL.KT............VEhG+RaFPpCStVLs+hh....t-...D.s...phh.htpsTs-cp..K+.RahELp-...l.+AFscDpt.p.sh....tss.......... 0 5 24 32 +12148 PF12314 IMCp Inner membrane complex protein Assefa S, Gavin OL lg7 PFAM-B_3921 (release 23.0) Family This domain is found in bacteria and eukaryotes. This domain is about 120 amino acids in length. This family is the inner membrane complex of parasitic organisms. This is a cytoskeletal structure associated with the pellicle of these parasites. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.72 0.72 -3.84 22 202 2009-08-19 13:20:31 2009-08-19 14:20:31 3 6 25 0 158 206 7 80.50 33 25.71 CHANGED sK.hhpE+lcpVsKhlhc..EKlVEVPcl+aV-KIVEVPph.hhpK..hVPKh.lpEpll.hP+...h.pc.h.....+hV-lspl+-ls+ac-hEtsEElhchl .............hhpEplhc..V..PK.hhp..EKlVE........VPp.lch....V-KlVE..VPphhhhpK.h.......VP+s.lp....Eplh.hs+...h........h........................................... 0 62 92 140 +12149 PF12315 DUF3633 Protein of unknown function (DUF3633) Assefa S, Gavin OL lg7 PFAM-B_3949 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 210 amino acids in length. The family is found in association with Pfam:PF00412. 20.30 20.30 20.70 21.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.48 0.70 -5.02 17 151 2012-10-03 04:41:15 2009-08-19 14:21:26 3 12 38 0 112 165 9 180.60 46 40.97 CHANGED uLsMKl-QplPhLLVERpALNcAh-uEKpGpa+h...sETRGLCLSEEQhlsol......ccs+hG.sppll.shhTEPp+LsR+.CE..VTAILlLYGLPRLLTGuILAHEhMHAWLRLpG.apsLss-VEEGICQVluHhWLEuphhsuss...........ttuuussssuusoSKKu.tpSpaEKKLG-FahHQIcoDsSssYGDGFRpu.tAVp..caGL++TLcHI+hTG .....................................................................shsM+l-Qpl...PhLLVERpALN-AhpsEKpu....h.ah...h................scTRGLCLSE.E.Q.hlpol............+pPphu.st..p...hh.shhops.+l....s.Rc..CE...VTAILlLYGLPRLLTGSILAHEhMHAaL..RL..p.G...at...sLss..cVEEGICQVluahWLE...uclhssss..........................................ssssss.sssssso...++u..thophE++LucFhhcQIcsDsSssYG-GFRsu.pAVt..p.aG..LcpTLcHl+hT............................................ 0 34 77 95 +12150 PF12316 Dsh_C Segment polarity protein dishevelled (Dsh) C terminal Assefa S, Gavin OL lg7 PFAM-B_3969 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 177 and 207 amino acids in length. The family is found in association with Pfam:PF00778, Pfam:PF02377, Pfam:PF00610, Pfam:PF00595. The segment polarity gene dishevelled (dsh) is required for pattern formation of the embryonic segments. It is involved in the determination of body organisation through the Wingless pathway (analogous to the Wnt-1 pathway). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.98 0.71 -11.52 0.71 -4.21 8 185 2009-08-19 13:22:41 2009-08-19 14:22:41 3 12 44 6 82 166 0 185.80 54 29.69 CHANGED NMAsLSLNDpDGSS.GASDQDTLAPLPHPuAAPWPh.h.uFPYQY.PsPHPa.P..PuYp-..uaSYGuGSAGSQHSEGS+SSGSsRStuttp+pt..cp...............uucpsuGSGSES-aoo+uuhtRt.tccuPstptusssScsSh+u...................uhupppopsShusPGlP..YssshLh.hsssssss..........PPGAPP.VR-LASVPPELTASRQSFphAMG ..................NhAsLsLNc.-GSS..GASDQDTLAPLPp.P.uA.uPWPh...h...uFP...........Y.Q.Y.PsPh.Ph.P..Pu.Y...p-..ua..SY..G.uGSAuSQHS.....E....GS+SSG.SsRSsuttp+ptc.ct.................tspppuGS..G.S.ES-hos+uu...p.s.hcpsssp.ptu..sp....u.ts......................................thpp.ttpssh.ssPGhs..Yss.hhhh.sss..s..s...............................PPGuPP..sR-LuuVPPELT..uSRQSFphAMG........................................ 0 7 14 35 +12151 PF12317 IFT46_B_C Intraflagellar transport complex B protein 46 C terminal Assefa S, Gavin OL lg7 PFAM-B_3974 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 298 and 416 amino acids in length. IFT46 is a flagellar protein of complex B. Like all IFT proteins, it is required for transport of IFT particles into the flagella. 21.10 21.10 31.90 31.90 20.40 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.51 0.70 -4.97 13 166 2009-08-19 13:23:45 2009-08-19 14:23:45 3 3 118 0 110 161 7 199.20 49 58.91 CHANGED GuYcPt-YspLsVos..El+-LFpYIs+YpPpsh-LssKLKPFIPDYIPAlGDlDAFIKVPRPDucP-sLGLpVLDEPsspQScPsVlsLpLRp..........hucptststsshVsslccA-pN..PKtI-sWIsslpElH..+sKPssTVsYo+sMPDIEsLMQEWPt-hE-lLps.ltlPou-lDhDLppYschlCuILDIPVa.....sshI-SLHVLFTLYpEFKsNQHF ...........................tYsPs-YccLsVos..ElKELFpYIs+....YoPQhl-..L-tKL+PFIPDaIPAVGDlDAFlKV.....PRP..............D...........s........c.....s......D......sLGLh.lLDEPu.spQSDP.oVLsL.Lpp.........................ss+pt.s.h.s..p..phhV....+SlE.s..A-..+.s..PKsIDpWIpsIp................-LH..+oK..PssoVpYs+.sMPDI-sLMQ-Ws.cF.....EE.....hLsp...lpL...Ps.u.....clDssLscYlchlCsl..LDI..Plh..............pshIpSLHlLFoLY.EF+sspHF............. 1 50 61 87 +12152 PF12318 FAD-SLDH Membrane bound FAD containing D-sorbitol dehydrogenase Assefa S, Gavin OL lg7 PFAM-B_3647 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 168 and 189 amino acids in length. There is a conserved ALM sequence motif. This family is a membrane protein (FAD-SLDH) involved in oxidation of D-sorbitol to L-sorbose. 23.00 23.00 23.00 23.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.03 0.71 -4.57 25 189 2009-08-19 14:35:08 2009-08-19 15:35:08 3 3 141 0 74 162 5 156.60 31 88.15 CHANGED uloRRplL......hsshussushsstshs.t.hsss.sssss.tsFhslSptLTs...+psL-stlupRlasALhppss.........pFssplspLtslls......ssp.htphpsts.s..lpssspsIlsAWYpGsVtcs.......cuplluYcsALMapsspDsLhlPTYCsssPhaWssp.Pstlss ............................hoRRphL..........sshuhs....us..shsss...h...sht..shs..ss.ssssslssFhsLSptLTu......+psLs...tslGpRlhpuLtcsss.........phsspLsp.Ltshls...............th..sss.s...psluhpIlpAWYhGlV..................cstll....sYcpALMapslpDsLll.oYssstPsaWspp.P.t..s.............. 0 12 28 55 +12153 PF12319 TryThrA_C Tryptophan-Threonine-rich plasmodium antigen C terminal Assefa S, Gavin OL lg7 PFAM-B_3676 (release 23.0) Family This protein is found in eukaryotes. Proteins in this family are typically between 254 to 536 amino acids in length. This family is the C terminal of a surface antigen of malarial Plasmodium species. It is currently being targeted for use as part of a subunit vaccine against Plasmodium falciparum, the main species involved in causing human malaria. 22.10 22.10 22.10 24.00 22.00 21.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.44 0.70 -5.15 52 134 2009-08-19 14:41:50 2009-08-19 15:41:50 3 3 9 0 89 162 0 199.00 31 46.96 CHANGED cWpsWhpcLEp.-WcpFptpl-pcK.pcWlppK-c-WppWlcphcpKWh+Yscphcp.-apsslhcpu.sWs-ppWcpW.lco-s+phhct-accWlppscpplspahhccW.pWKspK...lhpWLhp-WKpcEschWpcacptph.hchhphtc..+cpWhcWpcRlpcEpp-WppWlctK-phhlps.chspWpcWKs-KpthaspWhcsFlpcWlscKpWphWh ......cWppWhppLEp.-WctFptpl-pc+.pcWhcpK-c-asp.Wlpph-sKWhpaspsh-p.Eapsshh+cs......sWs-.p.pWcpW.lpTcuKphhct-accWlpppcstlspahhpcW.pWKspK...hhpWlhs-WKpcEpphWtcapptph.hphhphtc..+ppahpW+pRlp+Etp-WppWlphK-phhlps.chspW.cWKp-KphhaspWhcsFlspWIpcKpWp.Wh................ 0 4 9 65 +12154 PF12320 SbcD_C Type 5 capsule protein repressor C-terminal domain Assefa S, Gavin OL, Bateman A lg7 PFAM-B_3790 (release 23.0) Domain This domain is found in bacteria and archaea. This domain is about 90 amino acids in length. This domain is found associated with Pfam:PF00149. SbcD works in complex with SbdC (SbcDC) which is a transcription regulator. It down-regulates transcription of arl and mgr to inhibit type 5 capsule protein production. It acts as part of the SOS pathway of bacteria. 24.30 24.30 24.30 24.30 24.10 24.20 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.26 0.72 -3.97 203 1897 2009-09-13 17:34:05 2009-08-19 15:46:33 3 3 1883 8 311 1271 34 98.00 23 25.01 CHANGED sshp.sltplc........usl...cclhptl..........tt...........t.ttpt...................salclpl..pscphh..schhpc.lcphhssh.....lclphphtp.pptt.......tpptpslpcl.ssp-lFpchhpppptp ............................shpshthlc.......Gsl....splhpph............tp.......................................tpps........................saLclcl...pscphl..hDhh..p+.lcplhssl..........Ltlcp.sptp..ppp..........pppptslp.c.h..ss.-lFpchhtt.t..s................................. 0 103 188 252 +12155 PF12321 DUF3634 Protein of unknown function (DUF3634) Assefa S, Gavin OL lg7 PFAM-B_3607 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 103 and 114 amino acids in length. 25.00 25.00 40.00 39.90 20.40 20.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.24 0.72 -4.10 19 141 2009-08-19 14:56:50 2009-08-19 15:56:50 3 1 140 0 30 92 5 101.20 51 97.32 CHANGED hlhhIlluhh.llahLlhss+...slhch+Fc-G+lsppKG+lP.pFp+ss+cIA+pp.h.oGpl+s.+.psss+LchStsVscplpQRIRNlF...Papta..Kstssp..+pu ......MLYVIllAsh.lIFWLlhlDR...PlLhVpFKDG+lspsKGHhPPoF+HNlh-IAp+-PF.oGElKVYppRoG.sKLsFSKpVPKKlQQRIRNVF...PHQGFs+pSsshKKt.t.......... 0 5 10 21 +12156 PF12322 T4_baseplate T4 bacteriophage base plate protein Assefa S, Gavin OL lg7 PFAM-B_3861 (release 23.0) Family This protein is found in viruses. Proteins in this family are typically between 208 to 249 amino acids in length. This protein has a single completely conserved residue S that may be functionally important. This family includes the two base plate proteins in T4 bacteriophages. These are gp51 and gp26, encoded by late genes. 24.50 24.50 24.50 24.90 24.40 23.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.21 0.71 -5.07 18 130 2009-08-19 15:13:34 2009-08-19 16:13:34 3 2 86 0 0 114 1244 193.00 25 91.02 CHANGED hchKlssGs+..+a+sFTVpEYpsll......pspt.pc.pphlscll.sph......htclsppppEalFlplastSlGKs.lchphpCspCuc.ptph.lslpptpl......ps.hhpsuslpIph+aPch................cshtchlscCI-plhts.cp.ashcDLs-pEpppll-hlohcshccIhcthps.s.lhhsltlpC..sspppshslpGLtshFc ................................hchcl.sss+.lph+sFTlc-...aptLl........pscp.tphpphlppll.ssh.......hpcLspt-tEhlalplhspSlGcs.lchpapCs...................p...................Csp.ch.h.lsLpph.pl.....t...cs.hh..h.ss.lpIph+aPp.h.t.................................cshtchlsssIcslhss.sp.hs.hc-ls-............p............-....hpclh....chls.pphppIt.c.hhpp..Pplh.hsl.lpC.psGpppshslcGLtphFt................. 0 0 0 0 +12157 PF12323 HTH_OrfB_IS605 HTH_14; Helix-turn-helix domain Bateman A, Gavin OL lg7 Pfam-B_1210 (release 3.0) & Pfam-B_4602 (Release 7.5) Family This is the N terminal helix-turn-helix domain of Transposase_2 Pfam:PF01385. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.29 0.72 -4.63 231 3827 2012-10-04 14:01:12 2009-08-25 09:44:16 3 12 1203 0 846 3330 126 44.80 31 12.81 CHANGED M.........h+...uaKaRlaP.spcQpphlt+phGssRaVYNhsLstppctYc..p.stc ...........M......pua+h+lhP.stpQpphlp+thGssRalaNtsLshppctacttt............. 0 214 582 720 +12158 PF12324 HTH_15 Helix-turn-helix domain of alkylmercury lyase Bateman A, Gavin OL lg7 Pfam-B_3505 (release 6.5) Domain Alkylmercury lyase (EC:4.99.1.2) cleaves the carbon-mercury bond of organomercurials such as phenylmercuric acetate. This is the N terminal helix-turn-helix domain associated with Pfam:PF03243. 21.30 21.30 21.30 21.90 21.20 20.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.43 0.72 -4.32 6 128 2012-10-04 14:01:12 2009-08-25 11:23:53 3 4 102 13 18 80 9 73.50 39 34.60 CHANGED ptpplusplspthspGptspuhshLaRPLLp.LApGcPVohEslAstTG...+ss-cVtpVLpptPusEhD-cG+IlG.hG ..............................phus.l.pphspsp.ssuht.LhhP...LL+.LAc.GcPVohpsLAuthG...hPsEcVtpVLpph..s..u..sEYDcpGpIlG.aG........... 0 10 14 17 +12159 PF12325 TMF_TATA_bd TATA element modulatory factor 1 TATA binding Wood V, Coggill P pcc Pfam-B_97264 (release 23.0) Family This is the C-terminal conserved coiled coil region of a family of TATA element modulatory factor 1 proteins conserved in eukaryotes [1]. The proteins bind to the TATA element of some RNA polymerase II promoters and repress their activity. by competing with the binding of TATA binding protein. TMF1_TATA_bd is the most conserved part of the TMFs [2]. TMFs are evolutionarily conserved golgins that bind Rab6, a ubiquitous ras-like GTP-binding Golgi protein, and contribute to Golgi organisation in animal [3] and plant [4] cells. The Rab6-binding domain appears to be the same region as this C-terminal family [3]. 28.70 28.70 28.80 28.90 28.60 28.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.48 0.71 -4.38 78 241 2009-08-27 10:02:35 2009-08-27 11:02:35 3 5 216 0 179 241 2 116.50 33 13.47 CHANGED hsshhuhsssstussspllE+huuslRpLEuElssh+pElu+LpspRsphppElVcLhcEsEch..+ptspclppLcpplcsLcpRapssLphlGEKoEpVEELcsDlsDLKchY+pQV-phl ................................h......pssthussspllEphpupl+phE.............uElsphpp.ElupLpppRsphppElVc....Lhpps-ch.........cptspclt.pLcpplccLcp.RYpohLphhGEKsEclEEL+tDltDlKphY+pQl-phl.......... 0 52 92 144 +12160 PF12326 EOS1 N-glycosylation protein Wood V, Coggill P pcc Pfam-B_29822 (release 23.0) Family This family is not required for survival of S.cerevisiae, but its deletion leads to heightened sensitivity to oxidative stress. It appears to be involved in N-glycosylation, and resides in the endoplasmic reticulum. 21.50 21.50 22.70 21.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.14 0.71 -4.56 19 118 2009-08-27 10:17:50 2009-08-27 11:17:50 3 6 105 0 85 125 0 130.70 34 35.05 CHANGED psRuSEahLsulWClVSuYLoYulLDuLMVRWIlpYSTsAAIlRhhSMSh..lhIThEhhLluuhSsp.tcYhLHsWIlISChLThsYIhQsalTSNLphh.......................................................................................thpppRphDlYpIsVFsVVPVGlASFlTMlGLLRsLh .......................p....sEhhLs.lWChsohYLoa.hhDsLMsRWllpYos.AsIlRlLohss.......h..h..lThplLhLouhS..s-..chhL.sWIhIussLThhYhh..........................................................................................................................................................................................................................................................................................................h........................................................................................ 1 20 45 71 +12161 PF12327 FtsZ_C FtsZ family, C-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR pcc Prosite Domain This family includes the bacterial FtsZ family of proteins. Members of this family are involved in polymer formation. FtsZ is the polymer-forming protein of bacterial cell division. It is part of a ring in the middle of the dividing cell that is required for constriction of cell membrane and cell envelope to yield two daughter cells. FtsZ is a GTPase, like tubulin. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.94 0.72 -3.92 26 5666 2012-10-03 12:11:42 2009-08-28 16:26:11 3 7 4977 45 1192 3761 2518 93.00 47 24.25 CHANGED GhAMMGhGpAsG...-sRAt...cAscsAlssPLL-..lslpGA+GlLVNITGG.DhoLtEs-ssuptIp-tlD.s-AslIaGsslDsphpsplRVoVVATGlc ........................GpAhMGhG.A.sG.......EsRA.h...cAActAIuSPL.LE...ss.lpG.A.c..GV..LlN.I......T........GG..........D..........l.....oL..hEhppAus....h.lp..pt.s.....s........p........s.....A..s.l.....I.aG.s.s.l.D...p...s.h...p....D..c..lRVTVlATGh.................................... 2 392 791 1014 +12162 PF12328 Rpp20 Rpp20 subunit of nuclear RNase MRP and P Wood V, Coggill P pcc Pfam-B_33537 (release 23.0) Family The nuclear RNase P of Saccharomyces cerevisiae is made up of at least nine protein subunits; Pop1, Pop3, Pop4, Pop5, Pop6, Pop7, Pop8, Rpr2 and Rpp1. Many of these subunits seem to be present also in the RNase MRP, with the exception of Rpr2 (Rpp21) which is unique to RNase P. Human nuclear RNase P and MRP appear to contain at least 10 protein subunits, Rpp14, Rpp20, Rpp21, Rpp25, Rpp29, Rpp30, Rpp38, Rpp40, hPop1 and hPop5, although there is recent evidence that not all of these subunits are shared between P and MRP. Archaeal RNase P has at least four protein subunits homologous to eukaryotic RNase P/MRP proteins [2]. In the yeast RNase P, Pop6 and Pop7 (the Rpp20 homologue) interact with each other and they are both interaction partners of Pop4 [4]; in the human MRP Rpp25 and Rpp20 interact with each other [2] and Rpp25 binds to Rpp29 (Pop4) [3]. 24.60 24.60 24.60 24.60 24.40 24.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.70 0.71 -4.54 44 133 2012-10-03 06:24:24 2009-08-28 16:42:55 3 3 129 1 93 213 7 130.40 29 71.76 CHANGED phtpslYVsopTPahSulKRlpKhLcptp+..........................pt.........tt..................tt..t...............c.Vhl+uhG+AIpKslslutaF...pcp......shcVclpTuoVpVlD-lh...........................................................................................................s-tc-cpscsRpRhVSsVElp .........................................p...pslaVsopTPahStlKRlpKhLctst+.....................pt..............................................t...............tppt.Vhl+GhG+.AIpKslslutaF.......pppp......shpVplpTuTVplVD-lh.tt........................................................................................................................................................scscscpspsRhRtlSslclt......................................................................................................................................................................................... 0 19 39 65 +12163 PF12329 TMF_DNA_bd TATA element modulatory factor 1 DNA binding Wood V, Coggill P pcc manual Family This is the middle region of a family of TATA element modulatory factor 1 proteins conserved in eukaryotes that contains at its N-terminal section a number of leucine zippers that could potentially form coiled coil structures.[1]. The whole proteins bind to the TATA element of some RNA polymerase II promoters and repress their activity. by competing with the binding of TATA binding protein. TMFs are evolutionarily conserved golgins that bind Rab6, a ubiquitous ras-like GTP-binding Golgi protein, and contribute to Golgi organisation in animal [3] and plant [4] cells. 35.00 35.00 35.00 35.20 34.90 34.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.44 0.72 -4.17 51 411 2009-08-28 15:49:32 2009-08-28 16:49:32 3 20 270 \N 165 357 0 74.30 39 10.97 CHANGED sshpcplpEKDppIspLhcEGppLS+p-hcppspIKKLRtchp-t-..pplsph.p...p+hpch-pclpplcpchcpt- ...................t..htppLttKctplAph.pE...hEKL.cs...cupo.ccLcpcscEsE..+phscLp....pKls-LEcElssLchhLsus-............ 0 41 75 128 +12164 PF12330 DUF3635 Domain of unknown function (DUF3635) Wood V, Coggill P pcc Wood V Family This family may be a potential Haspin-related leucine-zipper. A leucine zipper was proposed to be present towards the C-terminus of human Haspin, (up-stream of the current family) [1]; however, as this domain would appear to span several helices and be largely within a loop structure [2], the actual zipper might be further downstream, and be this family, which is the very C-terminal part of the Sch. pombe sequence. 21.60 21.60 21.70 21.60 21.00 21.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.47 0.72 -3.92 6 219 2009-09-03 10:35:38 2009-09-03 11:35:38 3 7 187 7 156 232 1 85.70 24 13.60 CHANGED c-LFsGcG.........DYQF-IYRLMRp.hps..sWupFcPhTNlLWLaYLocplLpKhshKohs..sssshRphhcpI...........pch+phhhshcSto.hpCuc.Lh ...........................................hFpG.p.s..............chQa-lYRhM+p.hps.............sWp..papPhTNllWLpYLhcpLlpphp.hpph......pt.........p............................................................................................................. 0 61 91 130 +12165 PF12331 DUF3636 Protein of unknown function (DUF3636) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 160 amino acids in length. 25.00 25.00 75.40 46.00 21.60 17.20 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.78 0.71 -4.63 17 75 2009-09-03 15:34:27 2009-09-03 16:34:27 3 2 70 0 58 70 0 145.80 35 19.84 CHANGED suTcsLpLLYhhAsuChp................pspslppFW+plpaDFlLhMLssp.QslsDIhLhLslLSTSlhssSFGsl.so.......cpcQhssEN...aIlDRlusLLsEhPpsD.......EGttPaoshclhphRLEslthLTpluFssh.....tptuuhhlApHPoALuRLlRshaDEL ................ssTcsLplLYhhAhuChp.......................spps.ppFW+phphDFlLhMLssp.Q.hsDIhhhLpLLsTShhssShGPl.ss.......cspphsspp.............hIIDRVushLsEhP...........sptshoshclsslRltAhphLsshAhosh..........uuhhlApHs..sAlsRllpslpspl...... 0 8 25 44 +12167 PF12333 Ipi1_N Rix1 complex component involved in 60S ribosome maturation Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 91 and 105 amino acids in length. This family is the N terminal of Ipi1, a component of the Rix1 complex which works in conjunction with Rea1 to mature the 60S ribosome. 21.50 21.50 21.50 22.40 21.40 21.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.26 0.72 -4.05 43 316 2009-09-03 15:39:04 2009-09-03 16:39:04 3 6 274 0 202 292 0 102.20 27 18.54 CHANGED sssslpsahphllhalpuAMTHls..scIRtsohchLshLlchsspp.....sptsahK..hLpsahslLuhs.....tst.........s.shs....hstst............pstcthsphLpsLtpFL .......t..ppltsahsllhsalpuAMTHls..tsIppDSlclL-lLLcthPs.................hs.s..thsp........lLps..Flplluap.......tpp..................ht.shs.ttp....................pstphphphL.tLtpFL...................................................................... 0 59 101 160 +12168 PF12334 rOmpB Rickettsia outer membrane protein B Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 220 amino acids in length. The family is found in association with Pfam:PF03797. This family is the middle region of one of the outer membrane proteins of Rickettsia which is involved in adhesion to eukaryotic cells for uptake. 25.00 25.00 115.00 115.00 19.70 18.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.40 0.70 -4.97 9 158 2009-09-03 15:40:00 2009-09-03 16:40:00 3 2 90 0 6 136 0 200.30 77 15.45 CHANGED INFAu.s.ussDTlLNVGcGVNLYATNITTsssNsGSFsFsAGGTNIVSGTVGGQQGNKhNsVpLDNGTTspFLGNAhFNGtTTIcuNSTLQIGGNYTsDalp..SADNTGIVEFVNscsITVTLNKQAssVssLKQITVSGPGNVVINpIGNAuNshGhsTsTIuFpN....ASLGAsLFLPSulPhDs......LTIKSTV...GNGssGs.FNsPhVlVSGlDSsIAsGQl .IpFus.u.spsDolLsVGcGVNLYAsNITTssANsGSFhFpuGGTsIVSGTVGGQQGNKFNTVsLDNGTTVKFLGNATFNGpTTItANSTLQIGGNYTADFVA..SADGTGIVEFVNTsPITVTLNKQAAPVNsLKQITVSGPGNVVINEIGNAGNsHGAsTDTIAFEN....SSLGAsLFLPSGIPFNDAGNhhPLTIKSTV...GN.cTAsG.FDVPtVIV.GVDSVIADGQV 0 1 1 1 +12169 PF12335 SBF2 Myotubularin protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 220 amino acids in length. The family is found in association with Pfam:PF02141, Pfam:PF03456, Pfam:PF03455. This family is the middle region of SBF2, a member of the myotubularin family. Myotubularin-related proteins have been suggested to work in phosphoinositide-mediated signalling events that may also convey control of myelination. Mutations of SBF2 are implicated in Charcot-Marie-Tooth disease. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.67 0.70 -5.03 11 260 2009-09-03 15:43:16 2009-09-03 16:43:16 3 40 96 0 156 235 0 186.70 35 14.02 CHANGED PhGPsls.......slh-cpsssh.NSA...RRLEVlRNCISaIF-NKhhEA+KohPAVLRuLKu+sARlsLscELsh+sQpNRs.lLDHQQFDhVVRhMNsALQDsSuhDEauVAAALLPLoouFCRKLusGVpQFAYTClQDHsVWsN.QFWEssFYp-VQspI+uLYL...........................t.t.sppp.pstsstscstEpoAl-lAA-QhRhWPTlsc-cQpELlppEESTlaSQAIHaANpM ...............................................................hht.....h.sSA...pRLEVlRsCl....shlF-sK.....hh-scKhhPAshRsL+s+sAR.sLspELth.a........lp.p.N.......+A..hLDcQQF-..hllRhMNssLQ-..sos..h-Easl..AAuLL.PLs.osF...hR........K.L....us...G...l...pQ.FAY..o.slQ........-H.slWps.QFWEuhFYs...sVQp.pl+...sLYLp...............................................pp.t......t.........t..t.p...t.t.......sp.tp.os.h-lss...pQ.t.....s..ht.t.ptphhppEEshlhuQAhHaAshM............................................................... 0 45 61 105 +12170 PF12336 SOXp SOX transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00505. There are two conserved sequence motifs: KKDK and LPG. This family is made up of SOX transcription factors. These are involved in upregulation of nestin, a neural promoter. 22.10 22.10 22.10 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.16 0.72 -3.37 6 314 2009-09-03 15:46:08 2009-09-03 16:46:08 3 3 110 1 118 281 0 77.80 43 26.18 CHANGED YRPRRKTKTLLKKDKYSLPGsLls..ssusss..sVusG..hDsY.sHMNGah..NGuYShMQ-.QLuhspHsshss..............ppl.HRY ............YRPR..RKTKTLhK.K.D....KY...s...L.P....u..s..lls...................ssu...sh...t..........sVu.....u....................h.s........th..shh.NGa.......suu...t............hhtc..Lu......Qh.u.hsu........................................................................... 2 18 32 68 +12171 PF12337 DUF3637 Protein of unknown function (DUF3637) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00073, Pfam:PF08935. 25.00 25.00 134.10 133.10 20.30 19.30 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.32 0.72 -3.99 2 82 2009-09-03 15:48:34 2009-09-03 16:48:34 3 4 13 0 0 84 0 67.00 90 4.17 CHANGED MACLKhFSLKsKsKsHSYSPRtlELKaN.DFsFKPRPLAsLLRLEPSDoKhRRVECsEVEhDSWYPN MACLKIFSLK+KsKSHSYSPRtlELKYNSDFAFKPRPLAPLLRLEPSDTKTRRIECAEVEYDSWYPN 0 0 0 0 +12172 PF12338 RbcS Ribulose-1,5-bisphosphate carboxylase small subunit Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00101. There is a conserved APF sequence motif. There are two completely conserved residues (L and P) that may be functionally important. This family is the small subunit of ribulose-1,5-bisphosphate. 25.00 25.00 28.00 26.30 22.50 20.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.37 0.72 -4.19 46 352 2009-09-03 15:49:56 2009-09-03 16:49:56 3 4 116 0 42 353 0 40.00 53 24.28 CHANGED ASShh.So.AsVAo....RsssAQA.oMVAPFsGLKSsuuFPlTRKsNss ..............uuhh..so.sshss............ssupA..ohVAPFsGLKSoA.uFPVo.RKsss.... 0 6 22 34 +12173 PF12339 DNAJ_related DNA-J related protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 130 amino acids in length. The family is found in association with Pfam:PF00226. There is a conserved YYLD sequence motif. Mostof the sequences in this family are annotated as DNA-J related proteins but there is little publication to back this up. 25.00 25.00 42.80 29.00 24.20 15.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.72 0.71 -4.27 41 208 2009-09-03 15:51:57 2009-09-03 16:51:57 3 2 207 0 51 141 15 125.50 47 61.22 CHANGED tpsPLlhsllplLpp.pspuhp.apLhppLpppshh..splc.ssptsLF+ppFLLhNALYpLQppLhscp..hLplpshsIpLh..sh..tsssts..h.....t....sDsLRcYYLDWpsa.csspspVccLLssFWpcap .......................................-NPLlWPIhplL+c.pPpGWKlHsLAscLpchGhh....ssL..D.pPccDLFKRNFLlMNALYQLQ-pLaP-p..WLQVcAMsIhLhshhp......sp.tp..l......-....sDPLR-YYlsWpNY.-ssEu-V+RLLspFWoRYp............. 0 13 25 40 +12174 PF12340 DUF3638 Protein of unknown function (DUF3638) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 230 amino acids in length. There are two conserved sequence motifs: LLE and NMG. 25.00 25.00 25.60 27.30 23.00 21.80 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.41 0.70 -5.37 26 218 2009-09-03 15:54:27 2009-09-03 16:54:27 3 22 110 0 186 233 13 197.50 30 6.43 CHANGED GHsNWcPh-aP-WLLLEIEusIhIRpsQhpVAtphIsPsuGsNuVLQhNMGpGKoSVIlPMVAusLAssspLlRllVPKsLhpQstQlLpo+LGGLlsRclhHlPFSRcTpsststlpthpplaccshppuGllLs.PEHlLSFKL.GLpplh-u+hs.ApshlchppWhcphuRDllDESD.sLulKhQLIYs.GoQpsVDGpPcRWplsQsLLuLVppashsLppca ..................taps......PcaLlhEh-sslhlRttQh....p....lApp.h.h.s.....s...t..ps..pss.lh.QhpMGpGKooVIhPhluh..h..L......A....ss..p........p...LlpllVscs.Lhpph.hphLpp+l.u.u.l.l.sR.plhplPF..........sRph..p..h.s................p.......t.h..ph....h.t..phhppsh.p...ptG.ll.l.spPE..p...lhSh.....p......Lh.s.....l......p.p.h.....h......p...t..p..........t..............s..........pt........hhp....htp.hh.p.p...sp......D...llDEsD..ht.h+hpLlYs.Gtt.slp.t..s.RW.hhptlhthh...........h.................................... 0 70 126 164 +12175 PF12341 DUF3639 Protein of unknown function (DUF3639) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00400. There are two completely conserved residues (E and R) that may be functionally important. 21.60 21.60 21.60 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.76 0.72 -6.62 0.72 -3.94 35 247 2009-09-03 15:55:19 2009-09-03 16:55:19 3 17 204 0 161 240 3 26.90 40 2.85 CHANGED GEplpulAsussalsVsTStsalRlFo ....sEplpAlulupu...alAssTSspalRlFo. 0 38 75 125 +12176 PF12342 DUF3640 Protein of unknown function (DUF3640) Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 25 and 211 amino acids in length. 25.00 25.00 45.10 44.30 17.50 16.00 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.59 0.72 -6.63 0.72 -4.41 5 29 2009-09-03 15:56:06 2009-09-03 16:56:06 3 4 2 0 0 24 0 25.10 79 5.26 CHANGED MSLLTNRhCRRVDKDQWGPGstGRTP MSLLTNRhsRRVDKDQWGPGshG+sP. 0 0 0 0 +12177 PF12343 DEADboxA Cold shock protein DEAD box A Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 68 and 89 amino acids in length. The family is found in association with Pfam:PF00270, Pfam:PF00271, Pfam:PF03880. This family is the C terminal region of DEAD box A, a protein expressed under conditions of cold shock which is involved in various cellular processes such as transcription, translation and DA recombination. 22.50 22.50 22.90 22.90 22.30 22.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.90 0.72 -3.72 6 500 2009-09-03 15:56:51 2009-09-03 16:56:51 3 4 494 0 21 135 1 63.20 88 10.05 CHANGED GGERR..GG...GRs.FuGERREG.....GRp..G-RREGuss........uGERR.u...RDGQR..APRRDDuAA.......ssRRRFG.DA .........GGERR...GG.....GRG.Fu.GERREG........GRNFS.GERREGGRGDGRRFSGERREG....R........APRRDDSTG............RRRFGGDA.. 0 1 5 13 +12178 PF12344 UvrB Ultra-violet resistance protein B Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00271, Pfam:PF02151, Pfam:PF04851. There are two conserved sequence motifs: YAD and RRR. This family is the C terminal region of the UvrB protein which conveys mutational resistance against UV light to various different species. 21.20 21.20 21.30 26.80 20.90 18.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.14 0.72 -4.43 509 4463 2009-09-03 16:03:57 2009-09-03 17:03:57 3 9 4402 11 984 3212 2153 43.90 56 6.48 CHANGED hYADc.lTcSMpcAI-ETpRRRphQhtYNccHGITPpo.lpKplp- .......hYAD+..ITsSMpcAI-ETpRRRchQhtYNccHGIsPps.lpKclp-..... 0 335 664 843 +12179 PF12345 DUF3641 Protein of unknown function (DUF3641) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF04055. This family consists of proteins which are commonly annotated as Radical SAM domains but there is little annotation to back this up. 21.50 21.50 21.50 24.70 21.40 20.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.86 0.71 -4.31 83 232 2009-09-03 16:04:57 2009-09-03 17:04:57 3 10 220 0 99 252 228 133.40 46 40.40 CHANGED LPPsQtpLEt-YKctLtpcaGIpFNpLaTITNhPIuRFtphLpppGpapsYhphLpssaNssslpslMCRsplSVsWpGplY..DCDFNQMLslPs.......pspt.hplp-Lhp.ss...ltsppItsusHCYGCTAGsGSSCGGu ...........LPPsQtsLEt-YKcpLtccaGIpFNpLaTITNhPIuRFtphLt........ppsphpsYhphLtssaNssslpslMCRsplSVsWpGpLY..DCDFNQhLslsh...................supt.hplp-lhpts...htsppItsusHCYGCTAGsGSSCGGu.................... 0 36 73 93 +12180 PF12346 HJURP_mid HJURP_repeat; Holliday junction recognition protein-associated repeat Coggill P pcc manual Domain Vertebral Holliday junction recognition proteins carry an SCM3 domain at their N-terminus as do the eukaryotic fungi, but they also carry this central, conserved region. The function of this family is not known. Further downstream there is also a repeated domain, also of unknown function. Investigation of Scm3 and associated proteins is likely to be directly relevant to understanding the mechanism of HJURP-mediated CENP-A chromatin assembly at human centromeres. 25.00 25.00 68.70 48.10 19.10 19.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.46 0.71 -3.96 4 36 2009-09-04 12:32:37 2009-09-04 13:32:37 3 6 24 0 14 39 0 105.50 61 17.80 CHANGED MSRLLSTKPSSIISTKTFIMQNWNSR+RHRtKSRM..NKTYC+GuR+SQRSuKEsFlPCSEPVK-TGALRDCKNlLDVus+KTGLKLEKAFLEVNKPQIHKLcPSWKELKVTPSKYSS .MSRLLSoKPSsIISTKT.FIhQNWsSRRRH+pKsRM..N+TaC+Gu+hSpRSu+-s.ssCSEPsKEsGsLRDCcNlLDVSs+K...TGLKLEKAhLEVNKsQlHKLsPoWKELp....shPpK..s.......... 0 1 1 1 +12181 PF12347 HJURP_C HJURP_repeat; Holliday junction regulator protein family C-terminal repeat Coggill P pcc manual Domain Although this family is conserved in the Holliday junction regulator, HJURP, proteins in higher eukaryotes, alongside an Scm3, Pfam:PF10384, family, its exact function is not known. The C-terminal region of Scm3 proteins has been evolving rapidly, and this short repeat at the C-terminal end can be present in up to two copies in the higher eukaryotes. 21.60 21.60 22.10 22.10 21.20 20.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.97 0.72 -4.39 13 404 2009-09-04 15:55:35 2009-09-04 16:55:35 3 7 88 9 135 350 1 63.80 39 14.80 CHANGED +phs.s..ppsscP-hph...s+sEc+Y+EIcEEFDphaQpYpLust..PtphshsssssVSshpuus ................G....C-SP.-PD.sp.sl....oPpoE-KY+KIsE-hDhhhpch.p.h..ss........lP.....s.N.......F......sMPVolPVos.suh.s............................ 0 16 26 59 +12182 PF12348 CLASP_N CLASP N terminal Mistry J, Wood V jm14 Manual Family This region is found at the N terminal of CLIP-associated proteins (CLASPs).\ CLASPs are widely conserved microtubule plus-end-tracking proteins that regulate the stability of dynamic microtubules [1][2]. In yeast, Drosophila, and Xenopus, a single CLASP orthologue is present. In mammals, a second paralogue (CLASP2) exists which has some functional overlap with CLASP1 [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.32 0.70 -5.00 57 1343 2012-10-11 20:01:02 2009-09-07 16:10:40 3 32 291 1 840 1391 6 203.10 18 24.51 CHANGED l-pphpphhshFpt.+EoEtNWthRppslhplRpll+Gs.sspsh.sshlssl+..th.puIh+shsSLRTolsssuCpLlp-luhhhtsp.l-sh.s-hlh.sLl+hsussKclsupsAshslssllssss..ss+l.hphlhhAhp-KNspsRthustWLphhlp+hs.............pppthptss.....hh....chhccslt+sLsDAsssVR-shRssaWpa..................hphaPs+Acplh..spL-sps++hlc .....................................................................................t.tp.Wp.+.p....sl.tl....pt..h...h...............tt...........................tt.hh.ttl+.........shht.p.hp.shp..st...ls....ppuh....thl....s.p.Ls....p...t..........h...tp..........p...hcph....sct....ll..s............sLlphh...s..s.....s..p.c.h...lppsuptslpt.lhp...p.....ss......s...pl.....h.thl.ts..s.h...ps.K..s.stlRpp........sht.aLtt.hlpphs.................................h..t...pph........h...........phlht...tl...t...ptlpDsssplRptAppshht.h..........................hp.h..hs.t............................................................................................................................... 0 270 417 646 +12183 PF12349 Sterol-sensing SREBP-CAP_SSD; Sterol-sensing domain of SREBP cleavage-activation Wood V, Coggill P pcc Wood V Family Sterol regulatory element-binding proteins (SREBPs) are membrane-bound transcription factors that promote lipid synthesis in animal cells. They are embedded in the membranes of the endoplasmic reticulum (ER) in a helical hairpin orientation and are released from the ER by a two-step proteolytic process. Proteolysis begins when the SREBPs are cleaved at Site-1, which is located at a leucine residue in the middle of the hydrophobic loop in the lumen of the ER [1]. Upon proteolytic processing SREBP can activate the expression of genes involved in cholesterol biosynthesis and uptake. SCAP stimulates cleavage of SREBPs via fusion of the their two C-termini [2]. This domain is the transmembrane region that traverses the membrane eight times and is the sterol-sensing domain of the cleavage protein [4]. WD40 domains are found towards the C-terminus. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.75 0.71 -4.76 34 952 2012-10-02 18:57:54 2009-09-07 17:54:24 3 33 349 0 545 2789 1133 148.90 25 14.06 CHANGED hlcS+hsLulsGllhlhholssohGhhuhhG..lshohlshpVhPFLlLulGlDNhFlllpuhppsspshs.....cpRlucuLucsGsSlhloulo-hlsFh.lGshoshPAlpsFslaAulAlhhDFhhQlThFsAlLulDh+Rp.tpschchhsphps ..........................................................h..tS+hhL.uluulhhl.hhS...h...hhu.l..u..l.....h.....s...h.....l....G.......ls..hs.h...............l.h.t.ps.l..P.F.......L...l....L...............s..l......G.......l..-...p...h..h.....l.L....s....p....s.......h......h.....p....s.......s...p.p.tph.........................ppp...l...u......c.ulsp...hGso.lhh.s............sls.....ch..ssh.............h....l...G......s...h..o..s.............l..s.u..l..c.tFs......hh..A.uh....sl.lhs.ah.....l...........h.o...h...F.s.u.l.L.o...l.clchp...ttt..........t............................................................... 0 161 245 407 +12184 PF12350 CTK3_C CTD kinase subunit gamma CTK3 C-terminus Wood V, Coggill P pcc Pfam-B_12814 (release 23.0) Domain The C-terminal domain kinase (CTDK-1), is a three-subunit complex comprised of Ctk1, Ctk2, and Ctk3, that plays a key role in regulation of transcription and translation and in coordinating these two processes. Both Ctk2 and Ctk3 are regulated at the level of protein turnover, and are unstable proteins processed through a ubiquitin-proteasome pathway. Their physical interaction is required to protect both subunits from degradation, and both Ctk2 and Ctk3 are required for Ctk1 CTD kinase activation [1]. The mammalian P-TEFb is mirrored by the combined complexes in yeast of the CTDK1 and the Bur1/2 [2]. It is not clear what independent function this C-terminal domain has. 25.00 25.00 33.20 37.90 18.30 18.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.78 0.72 -3.95 31 130 2009-09-09 12:11:31 2009-09-09 13:11:31 3 4 127 0 99 127 0 67.90 39 23.79 CHANGED lsKppI.hpRhE-DRERHKR.+EshWslst................psEF.cphWschsshs-sp.psshc-hpEhpphspt .....sK+QI..pRlEEDRERHKRhREshWslstss...........................................psEa.cphW-cso..shs--D.htsh--ht-hpphh..t.................. 0 26 54 84 +12185 PF12351 Fig1 Ca2+ regulator and membrane fusion protein Fig1 Mistry J, Wood V jm14 Manual Family During the mating process of yeast cells, two Ca2+ influx pathways become activated. The resulting elevation of cytosolic free Ca2+ activates downstream signaling factors that promote long term survival of unmated cells. Fig1 is a regulator of the low affinity Ca2+ influx system (LACS) [1], and is also required for efficient membrane fusion during yeast mating [2]. 25.00 25.00 25.10 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.38 0.71 -4.40 32 158 2012-10-03 00:20:40 2009-09-10 11:12:12 3 2 113 0 119 187 0 182.80 33 65.11 CHANGED hhppssss......sstp.lplRsGYhGlClspss..............ss....hhC.....ptshssLsshlp..................ssss.sLNLltlApsFpspllaPhllhssllLshlsFlllsh...............h..lhPh.st...lpphshsluhls..slhhhlushapHhuss.AusshstshohGh...lpuphGspAhshsW..huFsh.lhl.sslulhhhhhp ..................................................................s....ssh...sssp.lplRlGYhGlClpsss......................ushlC.........spsussLup.lp................................spp-.sLNL..lhlAppF+-tlVFPYLl.hs..ulllsh..ls..hllLshhs...................................h..lpPFssp...VsphsLulhhlu..slhhhl.ushWQHsuus.Ausplspshu.Gs....lp.utsGssAhlhuW...huFsh.lll....lslGLhhhhLp...................... 0 22 58 98 +12186 PF12352 V-SNARE_C Snare region anchored in the vesicle membrane C-terminus Coggill P pcc manual Domain Within the SNARE proteins interactions in the C-terminal half of the SNARE helix are critical to the driving of membrane fusion; whereas interactions in the N-terminal half of the SNARE domain are important for promoting priming or docking of the vesicle Pfam:PF05008. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.74 0.72 -3.90 20 1258 2012-10-01 21:14:52 2009-09-11 17:34:43 3 20 319 2 844 1215 13 65.10 23 27.89 CHANGED -tlh+pss+lcsSpRhhsEs.shGtullpsLppQRpsLpssps+lpssssslstosplLpshpRRh ..........................h.cppp.plpp.op.p...hhs-s.p.hGtsshpsLtpQRppLppspp+....lhssss...plstuppllptlp+R....................... 0 290 470 692 +12187 PF12353 eIF3g Eukaryotic translation initiation factor 3 subunit G Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 130 amino acids in length. The family is found in association with Pfam:PF00076. This family is subunit G of the eukaryotic translation initiation factor 3. Subunit G is required for eIF3 integrity. 22.00 22.00 22.00 22.00 21.90 21.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.80 0.71 -4.22 47 378 2009-09-14 13:49:11 2009-09-14 14:49:11 3 11 284 0 262 367 3 119.60 34 40.69 CHANGED sPssplhps.sGsKslhpY+hs--GcKlKlspph+..hhpc.+Vs+slAcRKpWsKFGsspsssP.GPsstTT.luE-lhhphspshcppppccppp.......ttpsssshlpCRlC.p.G-HaTs+CPaKsths ........................................s..phhss.p..GhKslhpY+...hs-.-.G.c.......K.lKl.s.+ph+..h.pc.+ls+tVA...c.R+........s.W..sKFGpsptc.s.s.GPs.ss....TT.tl.uE.-lhhp.........hssshcptpppcppp...........httthspphlpCRlC..p..G-HaTs+CPaK-sh.s........................ 0 86 144 213 +12188 PF12354 Internalin_N Bacterial adhesion/invasion protein N terminal Gavin OL lg7 Prosite Domain This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00560, Pfam:PF08191, Pfam:PF09479. There are two completely conserved residues (I and F) that may be functionally important. Internalin mediates bacterial adhesion and invasion of epithelial cells in the human intestine through specific interaction with its host cell receptor E-cadherin. This family is the N terminal of internalin, the cap domain of the protein. The cap domain is conserved between different internalin types. The cap domain does not interact with E cadherin, therefore its function is presumably structural: capping the hydrophobic core. 24.10 24.10 24.50 24.30 23.70 24.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.70 0.72 -4.35 32 845 2009-09-14 13:50:09 2009-09-14 14:50:09 3 116 41 39 12 749 0 55.90 39 10.30 CHANGED h++p.hl+thlhhhLlhhlsh.hlshs.ssclpA.ssIspPsPINpIFPDssLAcsl+p ...........++pshhK.hLlsh.Llhhhul..WIsh.usGscsQA..to...IspPTPINQIFsDsALA-slKs.. 0 12 12 12 +12189 PF12355 Dscam_C Down syndrome cell adhesion molecule C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00047, Pfam:PF07679, Pfam:PF00041. The Down syndrome cell adhesion molecule (Dscam) belongs to a family of cell membrane molecules involved in the differentiation of the nervous system. This is the C terminal cytoplasmic tail region of Dscam. 25.00 25.00 29.90 37.40 19.60 20.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.78 0.71 -3.86 2 83 2009-09-14 13:51:33 2009-09-14 14:51:33 3 25 26 0 21 106 0 122.90 86 5.83 CHANGED tGuuhass.uPEYDDPANCAPEEDQYGSQY.G.YGtPYDHYGSRGShGRRSlGSARN.PssGSPEPPPPPPRNHD.sNSSFNDSKESNEISEAECDRDphsptNYGsstRu...KDthTTEEMRK ........QGGSLYSGPGPEYDDPANCA....PEEDQYGSQYGGPYGQPY.DHYGS....RGSMGRRSI..GSARN.PG.NGS...........PEPPP.PPPRNH.D...MS......N...SSFNDSKE...SNEISEAECDRDHGPR.GNYGAVKRSPQPKDQRTTEEMRK............ 0 7 9 18 +12190 PF12356 DUF3643 Protein of unknown function (DUF3643) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 217 and 4852 amino acids in length. There is a conserved TLA sequence motif. 19.40 19.40 19.70 19.50 17.40 15.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.05 0.71 -4.91 5 94 2009-09-14 13:52:16 2009-09-14 14:52:16 3 6 62 0 49 100 0 143.50 50 4.10 CHANGED pVApL+coushsaRCs....s.S.hE.sL.sPSPuHlHCVAAILW+SYEL.VpYDLPsLLop-LFEolYcWSMsL-pspPLKKSVDSLLCSLCYI+PcYFohLLsWMGIpLpssus.......phs.TDDsKcpp...ulTDDSKsAp...hsu.h.p.sspshosLlLsESaLsTLAhACQ ..........................lhth+.s..hthhss..............sh..l.PusualHCVAuILW+oach..lpYDLssLLs....p-LFEhLapWohoLssN.......sLKKAlDSLLCShCaI+Ps.hFolLhsWMGIhsP.s.lpsc.....hphShoDDsKcp-.......uhTDDSKssp.........................usLsLo-SpLuTLAuuSQ.................................................. 2 10 14 31 +12191 PF12357 PLD_C Phospholipase D C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00168, Pfam:PF00614. There is a conserved FPD sequence motif. This family is the C terminal of phospholipase D. PLD is a major plant lipid-degrading enzyme which is involved in signal transduction. 21.30 21.30 23.80 27.40 19.20 18.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.45 0.72 -4.44 66 288 2009-09-14 13:53:00 2009-09-14 14:53:00 3 8 64 0 147 296 0 73.40 46 9.38 CHANGED l--sFhcPESlECV++VNplA-c.WchYuu--l...s-h.G.HLL+YPlpVspc.GpVssL.PGtEhFPD.stu+llGspo .....--sFhcPESlECV++VNplA-cpW...chYuu...--l......p-l.G...HLLpYPlpVs.......p-.G.cVstL.PGtEt..FPD.stu+lLGs+..... 0 22 86 118 +12192 PF12358 DUF3644 Protein of unknown function (DUF3644) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 65 and 80 amino acids in length. 21.80 21.80 21.80 22.00 21.50 21.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.46 0.72 -3.71 43 219 2009-09-14 13:53:48 2009-09-14 14:53:48 3 5 217 0 50 180 6 70.60 22 23.26 CHANGED sl.hhspuWphhL+uhhhpcthc......................hhth.tsGss+shpLppslcp.......s.sps..lt.pNlptLh-hRcp.h ......h.h.ssuaphhh+uhhhpctss...............................hh...ppss.s+shpLppslcc...........scsph.hptsLptlhchRspu...... 0 12 31 42 +12193 PF12359 DUF3645 Protein of unknown function (DUF3645) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. There is a conserved HPD sequence motif. 25.00 25.00 27.30 26.20 22.40 17.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.53 0.72 -4.82 27 194 2009-09-14 13:54:36 2009-09-14 14:54:36 3 20 105 0 170 208 11 34.80 53 1.11 CHANGED ls.sR...phLAVPahuKDsPotpSEFuHPDVlIsLT ...........p.sR......shlAVPacAKDsPStpSEFuHPDVsIlLT.. 0 65 114 150 +12194 PF12360 Pax7 Paired box protein 7 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00046, Pfam:PF00292. Pax7 belongs to a family of genes that encode paired-box-containing transcription factors involved in the control of developmental processes. Pax7 has a distinct role in the specification of myogenic satellite cells. 21.60 21.60 24.10 33.20 19.00 17.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.31 0.72 -3.68 12 179 2009-09-14 13:55:18 2009-09-14 14:55:18 3 6 54 0 53 178 0 44.40 70 9.66 CHANGED uuuDuuSAYsLuosRHuFSuYSDoFhu.ssusuNtMNP.luNGLSPQ .....suDoSSAYsLsSsRHuFSSYSDSFhs.PouPSNtMNP.luNGLSPQ. 0 2 8 22 +12195 PF12361 DBP Duffy-antigen binding protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 449 and 1061 amino acids in length. The family is found in association with Pfam:PF05424. There are two conserved sequence motifs: NKNGG and QKHDF. This family is part of the Duffy-antigen binding protein of Plasmodium spp. This protein is an antigen on these parasites which enable them to invade erythrocytes. 25.00 25.00 274.00 273.10 18.50 17.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.99 0.70 -4.90 4 71 2009-09-14 13:55:58 2009-09-14 14:55:58 3 7 5 0 3 82 0 294.20 88 38.23 CHANGED SpshhQsVcSSpuE.KVQGDSAHGNlNsGttpSTTscAssGsGQNGNQTPAcSsVpcoDhsEuhSAKNVDspK.VoERusDTsuVTsIsEAGKEsLssopupsSEoTV-ususGDGhVpsAshPVhsSENPLVT.aphht.SKDNSD...ssGS......spssEohAssDSsuK..GEstcsQcstpsKus+DoSspSsuTSu...uoGD....shssl-pt.stusP.pt.......s..VGu.......KctpsE-opspKDutTV..........AGGoTss.pcN...DT-NsNlPsssNKQsEGATALStsESLESNESVH+ThssTTHuLENKNGGsEKDhQKHDFhN SNPISQPVDSSKAE.KVPGDSTHGNVNSGQDSSTTGKAVTGDGQNGNQTPAESDVQRSDIAESVSAKNVDPQKSVScRSDDTASVTGIAEAGKENLGASNSRPSESTVEANSPGDDTVNSASIPVVSGENPLVTPYNGLRHSKDNSD...SDGP.........AESMANPDSNSK..GETGKGQDNDMAKATKDSSNSSDGTSS...ATGD....TTDAVDREINKGVPEDR.......DKTVGS.......KDGGGEDNSANKDAATVVGEDRIRENSAGGSTNDRSKN...DTEKNGASTPDSKQSEDATALSKTESLESTESGDRTTNDTTNSLENKNGGKEKDLQKHDFKS 0 0 0 1 +12196 PF12362 DUF3646 DNA polymerase III gamma and tau subunits C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00004. The proteins in this family are frequently annotated as the gamma and tau subunits of DNA polymerase III, however there is little accompanying literature to back this up. 23.60 23.60 24.20 25.00 23.00 23.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.25 0.71 -10.10 0.71 -4.15 77 299 2009-09-14 13:56:33 2009-09-14 14:56:33 3 3 296 0 103 270 1082 115.70 35 19.43 CHANGED sthsoFc..cllpLhpppRDltLtsplcpslRLVpa..pPGRIEhp.sssAspsLsscLuptLppWT.GpRWhVolssp..sGt.PTltEpcpstcpthpscAppcPhVpAlLstFPuAcIs-.lR ................s..hsoap..-lVsLsccpRDhtl+htlcssVRLVph..pPGR..l-lphss..sAPpsLss-Lup+LppWT.Gt.RWsVslu..p-....sGt..TlsEtctttcpphhscAcscP.hVtAlLstFPGA+Ih-VR............................ 0 30 66 78 +12197 PF12363 DUF3647 Phage protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 119 and 164 amino acids in length. The proteins in this family are frequently annotated as phage proteins, however there is little accompanying literature to back this up or to describe the nature of these phage proteins. 21.30 21.30 22.30 22.90 21.20 20.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.32 0.71 -3.97 39 387 2009-09-14 13:57:27 2009-09-14 14:57:27 3 1 317 0 24 178 4 110.40 28 83.37 CHANGED plpIsGK....ph-hKFshtFlpclsKphuh..ptt...t..h....phssGhs.tlhsslhp.tss.slschlcsuhhptppph.opccl-phlcp...h-pp..p.hpclhpcllpcl.ppushhKptscphhp ..............lcINGK....ph-lpFshtFlREl-c+hsh....cst....t..h.....phu.G.ls.tlh.ssLhp.tssssls-lhpsAsut.ppph.oppcI-chl-p.....p....pt...p..lcpLhs-lLpEl.sppshs+thlcph.................... 0 8 16 21 +12198 PF12364 DUF3648 Protein of unknown function (DUF3648) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 53 and 3115 amino acids in length. There are two completely conserved residues (A and F) that may be functionally important. 20.60 20.60 22.90 22.90 18.40 18.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.59 0.71 -4.18 9 103 2009-09-14 13:58:36 2009-09-14 14:58:36 3 3 12 0 3 85 0 108.20 64 10.89 CHANGED hSKltuSahSSuhAssusYohSRhWRphsshtsht.............haGs............sthspll+-hsAshs.sshRco.....lFPaolcAhh+t.sthupshspstpsshshhsl.pspusspPphhsu.shhsLCcPPL..........ossA++ulRpuK .................MSKLGAsF+SSoFAVRSDYhISRIWRshNTlGVLH.............aFGo............tshTpVlKEaSAshPIVPLRKS.....sFPFSVRAAVRAIcAAu+PhssP+usshsshPVhssuGsspPp.lAGh.RhuLCcPPL................................ 0 1 2 3 +12199 PF12365 DUF3649 Protein of unknown function (DUF3649) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. 21.20 21.20 23.20 23.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.84 0.72 -7.01 0.72 -4.41 39 161 2009-09-14 13:59:17 2009-09-14 14:59:17 3 1 142 0 52 149 2 28.00 39 24.75 CHANGED sFusRSAhRA.WhGlllsuslLuslhhhs ..sFAsRSAWRA.WhGlhlsuhlLuulshh........ 0 11 23 37 +12200 PF12366 Casc1 Cancer susceptibility candidate 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 216 and 263 amino acids in length. Casc1 has many SNPs associated with cancer susceptibility. 21.30 21.30 21.80 21.30 20.80 21.20 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.44 0.70 -4.87 25 204 2009-09-14 13:59:59 2009-09-14 14:59:59 3 4 60 0 109 201 0 216.40 19 39.55 CHANGED hsp.c.lphsphshph+slphchD+LuhauhuFcphhchs...............hpcWthQ.s.ps..Ellhsl-.....oh+sch.haIo..........spuh+uhspchtKphsu+shKhlcI.cP.p-hppLpcthhscslslFsEp-sphYl-ph.hs...KHluh...E.hshcthslpstlhphhRschs+hs.....s+tsIlL............th+..........ssKs.pcss.sp...........h+loP-sssFlcVo.tss-plslht.shts ...........................................................................................ph.h..hp.shth+hlphchD+lu.h.shu......h..p.h.phs.......................hppaphp...s.pp......phl.hphp........pt.hhphp.hhp........................ttht....t.tt........thsst..c..p.h....c............p.t.L.pthhssshplF.pp-t..hhph...h....cphth...p.hshcphshlGulaphshhchs.ps......t+tphll.............h.................................p.ht.t....t.sh......................hh.hs..s.t...h.s.....hltlT.phspplhhht.shh..................................................... 0 25 32 75 +12201 PF12367 PFO_beta_C Pyruvate ferredoxin oxidoreductase beta subunit C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF02775. There are two completely conserved residues (A and G) that may be functionally important. PFO is involved in carbon dioxide fixation via a reductive TCA cycle. It forms a heterodimer (alpha/beta). The beta subunit has binding motifs for Fe-S clusters and thiamine pyrophosphate. 21.40 21.40 21.40 21.50 21.30 21.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.45 0.72 -4.05 69 635 2009-09-14 14:37:33 2009-09-14 15:37:33 3 2 596 0 197 390 60 70.40 42 24.41 CHANGED CVoFNKlNTapWa+E+lhpL...-.saDPoD.+ttAhppuhE....s-p.lPlGllYcsc.+soaE-plshh.....ppsPL ..................................................CVTYNK.lNTYDWF............K...........E+Lsplp...........shE..sY.....D.....s...........o...........D....+p....hA.....h..c......plhE........p-s..LspGllYQsc.ppPoYEpplpth...pcps.................... 0 74 139 176 +12202 PF12368 DUF3650 Protein of unknown function (DUF3650) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00581. There is a single completely conserved residue N that may be functionally important. 20.90 20.90 21.70 21.40 20.10 19.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.73 0.72 -6.96 0.72 -4.45 23 786 2009-09-14 15:50:37 2009-09-14 16:50:37 3 2 782 0 48 308 0 27.80 55 8.66 CHANGED ECRsHPRNRYVpcHsLS-EElpcRLpsl .ECRsHPRNRYVpcppLoppEhtcRLssl..... 0 13 23 35 +12203 PF12369 GnHR_trans Gonadotropin hormone receptor transmembrane region Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00560, Pfam:PF00001. There are two completely conserved C residues that may be functionally important. This family contains the transmembrane region of Follicular stimulating hormone and leutenizing hormone - the two major gonadotropin hormone receptors. These receptors are G protein coupled receptors involved in development and maturation of germ cells in both fecund genders. The transmembrane region is conserved between the two different receptors while the extracellular ligand binding domains are less well conserved. 22.10 22.10 22.60 22.10 20.40 21.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.73 0.72 -3.53 14 158 2009-09-14 15:52:14 2009-09-14 16:52:14 3 24 102 0 24 139 0 60.60 56 13.56 CHANGED +.+.QhotLHPICN+S.h+pD.D-hspspt....pRposh.D..ssaups.s...s......E..a.DYDL.CpEVVsVsCSPc ...........h..p..othpslCNtS...hpp-hs..spspt........pc.ShAEDt..SuYupGFDhhhs.......E..F.DYsL.CNEVVsVsCSPK.. 0 1 3 6 +12205 PF12371 DUF3651 Protein of unknown function (DUF3651) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. This family is frequently annotated as a membrane protein but there is little associated literature to back this up. 21.70 21.70 21.90 22.40 21.60 20.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.99 0.72 -3.97 26 305 2009-09-15 14:06:27 2009-09-15 15:06:27 3 5 99 0 183 275 1 75.30 24 8.61 CHANGED slsFlPp..phGthcuslhlhT.shGs.hhhtl.........pu.uspsPa+L.....pshhthplshssshs..lslaNPaspsLplscl ..........llFlsp..ppGsl-sslhlpT.shGh.hph.V............................................huhssp......pl..........hhlshsss....lslhNstst.ltl.p.............................................. 0 32 49 98 +12206 PF12372 DUF3652 Huntingtin protein region Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF02985. This family is in the middle region of the Huntingtin protein associated with Huntington's disease. The protein is of unknown function, however it is known that a polyglutamine (CAG) repeat in the gene coding for it results in the development of Huntington's disease. 20.90 20.90 21.10 21.30 19.90 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.85 0.72 -4.35 16 142 2009-09-15 14:07:37 2009-09-15 15:07:37 3 5 63 0 66 142 0 41.40 43 2.12 CHANGED hcslhths+.....lh.chlsGphsuuppsssht......psllpDhhhhp ..u.+tllthPK.....Ih.pLs-GlMASGpcusThAl..sshpsllcDLhVlp.... 0 11 13 36 +12207 PF12373 Msg2_C Major surface glycoprotein 2 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF02349. This family is the C terminal of major surface glycoprotein 2 of virulent bacteria. It is a virulence factor antigen. 25.00 25.00 29.60 28.60 21.50 20.00 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.94 0.72 -7.17 0.72 -4.73 17 82 2009-09-15 14:08:36 2009-09-15 15:08:36 3 7 3 0 0 84 0 29.70 54 3.13 CHANGED ph+LV+Rsh.....ssstVoEtElKAFDhsA.Ahp .+ltLV+Ruh.....scsuVoEsElKAFDhsuhAhp. 0 0 0 0 +12208 PF12374 Dmrt1 Double-sex mab3 related transcription factor 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 61 and 73 amino acids in length. The family is found in association with Pfam:PF00751. This family is a transcription factor involved in sex determination. The proteins in this family contain a zinc finger-like DNA-binding motif, DM domain. 21.20 21.20 21.60 21.50 21.00 19.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.45 0.72 -3.62 46 292 2009-09-15 15:40:47 2009-09-15 16:40:47 3 4 110 0 38 292 0 63.10 31 39.59 CHANGED ELGIspPlsL.uus-shVKsEssus.shh...h-.ups.o.s.....ssuSsusssu+.shpssP.uhsuRsas-usSD .........ELGIspPlsL.suu-hhlKpEssus.sph..hh-..u.......sS.ops..........s.ssssoss..ss....sh..hps.s.uhssph...tt.s............................ 0 1 3 18 +12209 PF12375 DUF3653 Phage protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 112 and 194 amino acids in length. 22.20 22.20 22.20 22.30 21.90 20.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.72 0.72 -3.76 20 109 2009-09-15 15:41:35 2009-09-15 16:41:35 3 5 94 0 19 77 0 76.70 28 50.50 CHANGED GFph.....ptsc.LlTPEG+plpPp-lthhulhhs.Apc.acphhc.ttts............h....................usllthscshh+p+ ........GatF..cpu+.LlTPpG+clsstplcshphphs.sRt.acp...hhp.tcts.............th......................h.lhhlt.shhph.h................................. 0 5 13 17 +12210 PF12376 DUF3654 Protein of unknown function (DUF3654) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 193 and 612 amino acids in length. 25.00 25.00 30.80 30.80 23.80 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.72 0.71 -4.35 15 56 2009-09-15 15:42:41 2009-09-15 16:42:41 3 2 7 0 27 53 0 127.60 42 32.99 CHANGED IpphPusLScEEK++ccchLppIKEYGEpLsTcEKQ-cIlcAQKIlCDACc.IWt+.cE-RhthlhttaSp+LpLKhhtus....sch.chchsLhthlDHchLlsAacEYt.hDVsuELV+QhhL..csc-IDcc.lscs...V ..........t.ssthsp-E+ch..c.hpphKcaGtpLpspE+hcpIlEsEKIVCsAC+cIChsL+EEELhGLhAEGuh+KsLKtph.u-......cchpcttY.....LEhthlDpplLLDAH.+EaG.t-Vs+ELV+QhLLGKcG+-ID++Yls+VAssV............ 0 27 27 27 +12211 PF12377 DuffyBP_N Duffy binding protein N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF05424. This family contains the N terminus of the Duffy receptor binding domain. 25.00 25.00 28.70 73.30 21.90 21.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.99 0.72 -3.76 3 77 2009-09-15 15:43:41 2009-09-15 16:43:41 3 6 5 0 3 84 0 61.50 92 9.21 CHANGED cEEKDGEHKTDSKTDNGEGsNNLVMLDYDTSSNGHPAGTlDNVLEFVTEHEGNSLcNSSKGGN.PYD ...........GEEKDGEHKTDSKTDNGKGANNLVMLDYETSSNGQPAGTLDNVLEFVTGHEGNSRKNSSNGGN.PYD 0 0 0 1 +12212 PF12378 CytadhesinP1 Trypsin-sensitive surface-exposed protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 67 and 79 amino acids in length. This family contains trypsin-sensitive surface-exposed proteins called cytadhesins. Cytadhesins are virulence factor proteins which mediate attachment of bacterial cells to host cells for invasion. 25.00 25.00 25.10 31.60 23.50 22.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.01 0.72 -4.03 8 115 2009-09-15 15:45:13 2009-09-15 16:45:13 3 3 12 0 13 113 0 70.30 42 13.09 CHANGED hpoTshas.ct+PsW.........NshsGFssssoppptpu.pcps-..sSuPIuLPa-sYhsNlGNlhhhspuVllFGGN ...oTsphs.stcPpa.........NshsGFGlsGs.....uPpcasE..cupsPlpls.psa.sNlhsllhlspsVlhatt...... 0 11 11 11 +12213 PF12379 DUF3655 Protein of unknown function (DUF3655) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF08716, Pfam:PF01661, Pfam:PF05409, Pfam:PF06471, Pfam:PF08717, Pfam:PF06478, Pfam:PF09401, Pfam:PF06460, Pfam:PF08715, Pfam:PF08710. 25.00 25.00 56.30 55.20 24.80 23.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.47 0.72 -4.05 3 173 2009-09-15 15:46:42 2009-09-15 16:46:42 3 5 67 2 0 150 0 68.80 93 1.17 CHANGED MYCSFYPPDEEEDCEEYE-EEElsEETCEHEYGTEDDYKGLPLEFGASTEIlQVEEEEEEDWLDDAlEAc MYCSFYPPDEEEE.DDAECEEEEIDETCEHEYGTEDDYQGLPLEFGASAETVRVE.EE.EEEDWLDDTTEQS..... 0 0 0 0 +12214 PF12380 Peptidase_C62 Gill-associated viral 3C-like peptidase Coggill P pcc MEROPS_C62.001 Family a positive-stranded RNA virus of prawns, that has been called yellow head virus protease and gill-associated virus 3C-like peptidase. The GAV cysteine protease is predicted to be the key enzyme in the processing of the GAV replicase polyprotein precursors, pp1a and pp1ab. This protease employs a Cys(2968)-His(2879) catalytic dyad. 18.80 18.80 19.60 643.90 18.20 18.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.85 0.70 -5.58 2 13 2012-10-02 13:45:52 2009-09-15 16:57:05 3 3 2 0 0 15 0 284.00 95 6.70 CHANGED FLVGLVTHEVpTGNsTpVEDLNKHPYNKYRpNIVRVYGERGDLNGFLSGK.LaFPRHIFDoCTDNTLTRHIRVTKGEtTHDIELLSEEYDATPFIKs-SPFAEAT.LKFuKLQRTQasYFVTADDIRlGSMSsDGYHNISTKDGDCGSllFDHLtNVVGAHIVGIusIPPVNGALTWNsEpEhLCGPNsDYDaDPpKVsPPKVWPVEslTALSTlLNQLNYVTGDAFTTPKLPTNYQLlGCETLDQYVNApNLVTGQFPQIKEALDDFINGYVANLQRGsEAYN FLVGLVTHEVNTGNNTRVEDLNKHPYNKYRSNIVRVYGERGDLNGFLSGKFLYFPRHIFDSCTDNTLTRHIRVTKGEETHDIELLSEEYDATPFIKI-SPFAEATVLKFGKLQRTQYAYFVTADDIRVGSMSsDGYHNISTKDGDCGSLLFDHLHNVVGAHIVGIASIPPVNGALTWNAEKEMLCGPNDDYDYDPEKVGPPKVWPVESITALSTILNQLNYVTGDAFTTPKLPTNYQLIGCETLDQYVNARNLVTGQFPQIKEALDDFINGYVANLQRGTEAYN 0 0 0 0 +12215 PF12381 Peptidase_C3G Tungro spherical virus-type peptidase Coggill P pcc MEROPS_C03.024 Family This is the protease for self-cleavage of the positive single-stranded polyproteins of a number of plant viral genomes. The protease activity of the polyprotein is at the C-terminal end, adjacent to the putative RNA polymerase. 20.70 20.70 22.20 21.20 20.10 19.30 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.40 0.70 -5.11 3 29 2012-10-02 13:45:52 2009-09-15 16:57:36 3 4 11 0 0 48 0 225.10 36 9.79 CHANGED cKGLDKDPNMVoILos+LssISCVIlNlTPsRpAYhNVlRLCGTaVVCPAHYLEALEE-DTLYFICFSllIKLpF-PcRVTLVNoaQDLVVWDLGNSVPPSIDsLcMIPTl-DWD+FQDGPGAFuVTKYNu+aPTNYINTLDMIERIRANTQNPTGhYcMhGSpHTITTGLRYQMaSL-GFCGGLILRAsTsMVRKIVGlHVAGSpNHAMGYAECLlQEDL+pAIs+LuhD ..........................................h......D.sh.pLlss+.hssh.pl.hlst.ssh.h.hhsGLFlpushhLhPuHhlsh.lsppcp..I...hh.h..c..sl..h+....V.h.cs.+K.V.slV.N.....u.......p.Esslhsh.s.p.Vst+pDllc.aFtsu-shspa+t..s.ps...sLssh+a...Stc..s.spalps.L.ts-+h..-A.....D+Phohs-sppGpYhlRQGLEYphsohsGDCGuPllls-opshRKIsGlHVAGhst..p...u...+..uYAEoITQcDLhpAhppl...sso.................. 0 0 0 0 +12216 PF12382 Peptidase_A2E Retrotransposon peptidase Coggill P pcc MEROPS_A02.051 Family This is a small family of fungal retroviral aspartyl peptidases. 21.10 21.10 21.60 22.40 20.50 21.00 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.47 0.71 -4.37 2 4 2012-10-02 15:32:34 2009-09-15 16:58:21 3 1 2 0 3 7 0 137.00 90 10.28 CHANGED KpNsTpsRp.+KTNVSRIEYSSppFhpap++RYEMVLQAcLP-FKCSIPCLIDTGAQsNlITEETVRAHKLPTRPW.pSVIYGGVYPsKINRKThKL.IsLNGISIKTEFLVVKKFSHPAAISFTTLYDNNIEISSS ........................KNNTTNSRNLRKTNVSRIEYSSNKFLNHTRKRYEMVLQAELPDFK.CSIPCLIDTGAQANIITEETVRAHKLPTRPWSKSVIYGGVYPNKINRKTIKLNISLNGISIKTEFLVVKKFSHPAAISFTTLYDNNIEISSS.............................. 0 3 3 3 +12217 PF12383 SARS_3b Severe acute respiratory syndrome coronavirus 3b protein Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 32 and 154 amino acids in length. This family contains the SARS coronavirus 3b protein which is predominantly localized in the nucleolus, and induces G0/G1 arrest and apoptosis in transfected cells. 25.00 25.00 59.30 59.30 18.50 18.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.11 0.71 -4.29 3 70 2009-09-15 16:01:23 2009-09-15 17:01:23 3 1 66 0 0 16 0 118.70 96 99.22 CHANGED MMPTTLFAGTHITMTTVYHITVSQIQLSLLKVTAFQHQNSKKTTKLVVILRIGTQVLKTMSLYMAISPKFTTSLSLHKLLQTLVLKMLHSSSLTSLLKTHRMCKYTQSTALQELLIQQWIQFMMSRRRLLACLCKHKKVSTNLCTHSFRKKQV MMPTTLFAGTHITMTTVYHITVSQIQLSLLKVTAFQHQNSKKTTKLVVILRIGTQVLKTMSLYMAISPKFTTSLSLHKLLQTLVLKMLHSSSLTSLLKTHRMCKYTQSTALQELLIQQWIQFMMSRRRLLACLCKHKKVSTNLCTHSFRKKQV 0 0 0 0 +12218 PF12384 Peptidase_A2B Ty3 transposon peptidase Coggill P pcc MEROPS_A02.022 Family Ty3 is a gypsy-type, retrovirus-like, element found in the budding yeast. The Ty3 aspartyl protease is required for processing of the viral polyprotein into its mature species. 21.20 21.20 21.40 21.20 20.80 21.00 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.05 0.71 -4.94 2 18 2012-10-02 15:32:34 2009-09-15 17:03:42 3 6 11 0 13 32 0 140.50 27 17.31 CHANGED VLsDhELESKDpppL.IpohPIVHYIAIPEMDpTAEKpIKIpNTKlKTLFDSGSPTSFIRRDhVpLLph.ha-TPPLRFRGFluTcSssTSEAVTlDLplssLQIslAAYlLDpMDYQLLIGNPILRRYPKlLaTlLNT+psssu.KPKsY+SEsVN.V+shSAGNRGNsRNhp.SF ...................t........................ll..h....htpsh.hplphpssplcs.LhDoGSPTSFIRpDllpcLcLphapsPshch+...Ghlus.p.st.ospAsplslphsshphslsAYVh-t..hchpllIG.PhLc+aPpl.........h.s.hhp................................................................................ 0 3 6 13 +12219 PF12385 Peptidase_C70 Papain-like cysteine protease AvrRpt2 Coggill P pcc MEROPS_C70.001 Family This is a family of cysteine proteases, found in actinobacteria, protobacteria and firmicutes. Papain-like cysteine proteases play a crucial role in plant-pathogen/pest interactions. On entering the host they act on non-self substrates, thereby manipulating the host to evade proteolysis [2]. AvrRpt2 from Pseudomonas syringae pv. tomato DC3000 triggers resistance to P. syringae-2-dependent defence responses, including hypersensitive cell death, by cleaving the Arabidopsis RIN4 protein which is monitored by the cognate resistance protein RPS2 [1]. 18.80 18.80 18.80 18.80 18.70 18.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.07 0.71 -4.55 4 20 2012-10-10 12:56:15 2009-09-16 12:43:07 3 5 19 0 6 44 0 158.90 28 25.46 CHANGED sPhhGGhF.........-ouGSSuopuuSphh.............VPYlsQtspR.GCWYAsspMlua.hpsGP..RLGLPtLY-u.pG.PptLpt.pDl.chh+sEsLtplslPsscpaos-cLuALLscHGPIhhu...hcoP..sDsh..Hh.VLTGID+.sssIpaHDPctGPshsMsLshhNpthsWp.sH ...................................................s................................................h.Qpspt..uCWhAsssMlhtahc...s.....t..s..........+lG.s.L..Ycu.p...G.hPtthpt...p-h..........tchhpstuL.ps.ls.P.......tsps....aospphtsLLpcaGPlhhu...hcoP..uss.a....HshVl..TGIcp...-...s......c..ul.hhDP.....pG.p.h...shp.h..spt.................................... 0 1 2 4 +12220 PF12386 Peptidase_C71 Pseudomurein endo-isopeptidase Pei Coggill P pcc MEROPS_C71.001 Family This peptidase has the catalytic triad C-H-D at the C-terminal end, a triad similar to that in thiol proteases and animal transglutaminases. It catalyses the in vitro lysis of M. marburgensis cells under reducing conditions and exhibits characteristics of metal-activated peptidases. 20.80 20.80 20.80 22.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.09 0.71 -4.75 3 4 2012-10-10 12:56:15 2009-09-16 12:58:37 3 2 4 0 1 24 6 145.50 33 55.48 CHANGED ssssc-TT-l.....p-KlGsF+DATSLYs+V++RCKYKYYYNDQsPN+EAlpKMsT.sGINCTDACQLFp+VlEuLGYsV+IEHVRVRCNDGKWYGHYhLRVuGpELss.......GTlWDY..VSATKTGRPLGuPCCosGh...QHLGWGIVSPlYD ...................s..hsp.spch.....hchh.Gthp.shsphhshlp+RptYpaYYNsQhss+cslp+hhs.pGlNCTDusQLFh+lhcuLGYsVph.HV+.sR...ssGp..GHhh..LR..l..ttp.chs...........GssW.Y..suAstsGps..luushCpsGh....c.uW.IhoslYp.................................. 0 0 1 1 +12221 PF12387 Peptidase_C74 Pestivirus NS2 peptidase Coggill P pcc MEROPS_C74.001 Family The pestivirus NS2 peptidase is responsible for single cleavage between NS2 and NS3 of the bovine viral diarrhea virus polyprotein, a cleavage that is correlated with cytopathogenicity [1]. The peptidase is activated by its interaction with 'J-domain protein interacting with viral protein' - Jiv. [2, 1]. 20.90 20.90 20.90 79.00 17.90 20.10 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.23 0.71 -5.15 6 178 2009-09-16 12:02:53 2009-09-16 13:02:53 3 22 28 0 0 196 0 181.60 71 6.17 CHANGED WLG+lsYKRVsoVY-lDpStEGVYLFPS+QKssutsuhlLPLL+AlLISCISSpWQhhYLhYLllElsYYhHRKIIEEIAGGTNhlSRLlAALIElNWuhD-EEoKGLKKFFlLSuRV+NLIlKHKVRNEsls+WatEEElYGMPKllollKAATLSKsKHCILCTVCEsR-WKGssCPKCGRpG.PIoCGMTLADFEE+ ...............WLhthsYKplssl.Y-lDpuGEGVYLFPS+QKssp.huhhLPLl+AhLISCISsKWQhlYhhYLhh-h.YYhH+KlI-EIuGGTNhlSRllAALIElNWuh-pEEsKGLKKFaLLSuRl+pLIIKHKVRNEsVspWaG-EElYGMPKllolIKAuTLSKNKHCIlCTVCEuR-W..+Gus..CPKCGRaG+PITCGMTLADFEE+. 2 0 0 0 +12222 PF12388 Peptidase_M57 Dual-action HEIGH metallo-peptidase Coggill P pcc MEROPS_M57.001 Family The catalytic triad for this family of proteases is HE-H-H, which in many members is in the sequence motif HEIGH. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.37 0.70 -5.00 5 69 2012-10-03 04:41:15 2009-09-16 14:03:59 3 10 40 0 35 188 15 198.00 31 60.86 CHANGED IhIEuDIshTcKELNcLp..u.ss-sKQYRTsNLVssusRTITIlGYTGssQ.uLoupupTALscAVNNYNsl.GhsIsF.RLTFGTNaQNuDMVVYsNolNNPSGo....GGuAGFPsusGcPsKFVQI....YsLcssSTNVsEH.VITHEIGHSIGFRHTDYasRp.SC..GpsGNEGoGGlGAVaIPGTPTGpDsoSlMQACFSsGsDGsFNoNDITALLsMY ........................................................................................................lttDhhho.pt.pp...h............t.........s.....t..cQYRTsslV.....s.t.s.p....sI.slhs.s.............uhs.s.p.hpsuLpp....AlspYNsl..sLslsF...pht.h.....ss.s....h.t....s.t.sh..s..l.h..p.s.sss...sGs.....G..us..A.G.F.P........o..s..Gp..P.....a....p..h.lpI...........h.u.h..s....s....h....s....s....s....s..h..p+....VlsHElGHslGhRH.T.D....ahs.Rp...SC....Gps..s.sEGsu...ssGAltIPGTPos...h..s..s.sSlM.u.Chss.s..psstFsssDhhALphhY....................................................... 0 14 22 31 +12223 PF12389 Peptidase_M73 Camelysin metallo-endopeptidase Coggill P pcc MEROPS_M73.001 Family \N 21.40 21.40 21.40 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.35 0.71 -4.85 2 432 2009-09-16 15:08:38 2009-09-16 16:08:38 3 3 167 0 49 281 4 171.20 43 87.42 CHANGED M..oLKKKLGMGlASAsLGhuLlGGGTFAaFSDKEVSNNTFAsGTLDLsLsPpTlVslpsLKPGDoVcKEFhLcNpGoLsIKcVhLtTcYsVcDVKtDNt.-DFGKHlKVpFLhNhDKppp.VhETsLscLptss.shlspDlhA..WsEK.GlpAGppDhhhVpFEFVDstKDQN.FQGDpLpLpWTFsApQtsGEpK .................M...olKKKLGMGlsoAs.L...Glu..L.l..GG...GTaAaF..S.Dp.E.s.S.s.N..TFAAGT..LDL.sl.................s...P....p.....s.....l.....l..s.............l.........p.........sL..K.......PGDpl.cK.pFpLpN.pGo.Ls.I.c.cV.hL..tTcY...s.V....pD..sK...t..s..N.t.........-DFGcHlcV.pFLh..Nh......D..K...........p......p......p.l..hpTT..Ls...cLpt..s.p...h...h..tp....cl......t......h..t.E.t....G.ltsGpp.DhhhV.FpF...DstpD......Q.......N.FQGDpLpLpWTFpu.QttGpp+................................................................................................... 1 12 34 41 +12224 PF12390 Se-cys_synth_N Selenocysteine synthase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF03841. There is a single completely conserved residue P that may be functionally important. This family is the N terminal region of selenocysteine synthase which catalyses the conversion of seryl-tRNA(Sec) into selenocysteyl-tRNA(Sec). 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.68 0.72 -3.86 134 925 2009-09-16 15:44:25 2009-09-16 16:44:25 3 5 916 0 183 611 49 39.90 44 8.67 CHANGED hRplPulDplLppsthtsh......................lpphuRshVhcslRphl-phR ...appLPulD+LL+-s..shhsL......................h-pYG+otVl-hLRphLDcAR................. 0 66 115 155 +12225 PF12391 PCDO_beta_N Protocatechuate 3,4-dioxygenase beta subunit N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00775. There are two completely conserved residues (Y and R) that may be functionally important. This family is the N terminal region of the beta subunit of protocatechuate 3,4-dioxidase. This enzyme utilizes a mononuclear, non-heme Fe3+ centre to catalyse metabolic cellular reactions. 21.60 21.60 21.90 24.20 19.80 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.66 0.72 -4.52 79 491 2009-09-16 15:49:27 2009-09-16 16:49:27 3 3 474 172 150 420 62 35.90 41 14.71 CHANGED ahtRDtsh.HPPthtPsY+oSlhRuPppsLlsl....ssol ...............tRDhsh.HP.sthsPsYKoSVLRuPppsLlul...ppo......... 0 29 77 115 +12226 PF12392 DUF3656 Collagenase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF01136. 22.10 22.10 22.60 22.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.29 0.71 -4.05 199 1248 2009-09-16 15:51:28 2009-09-16 16:51:28 3 4 1215 0 283 989 60 120.40 32 17.09 CHANGED la.+spDpphtcpl.cp..........stp++lslph.plp..hc.....hsps........hplplp..............s.tp.u...h.....psp..........................sps....shshptApppshsp-plccpLu+lGsTsFph..............pp..lplp.h..............stshFlPsStLNplRRcul-pLppthhtsh ..................................................................................................................RshDpphpptL.+p..........sucR+lsVcl..pls......t........ht-t...............LhLols................s...-c.G....s.......slo..............................................h.s.h....-.s...h-pApssths....hssL....cctLu.KLGpT..ahs..............cc..lpls...l..............ss.slFlPsShLNphRR-Al-hLctsRltt................ 0 108 198 246 +12227 PF12393 Dr_adhesin Dr family adhesin Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF04619. This family is the Dr-family adhesin expressed by uropathogenic E. coli. 25.00 25.00 28.00 28.00 20.40 19.80 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.39 0.73 -6.50 0.73 -3.65 5 56 2009-09-16 15:52:41 2009-09-16 16:52:41 3 1 2 0 0 56 0 21.00 67 13.14 CHANGED MKKLAIMAAsSslhssSTAHA MKKLAIMAAsShlhAsSoAHA 0 0 0 0 +12228 PF12394 DUF3657 Protein of unknown function (DUF3657) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF05057. 21.80 21.80 22.40 22.70 21.50 21.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.98 0.72 -3.90 39 350 2009-09-16 15:53:31 2009-09-16 16:53:31 3 8 98 0 200 384 1 62.30 29 8.76 CHANGED lpl-Lhas-hhpssp.............................hptlustshpl..pstptl+ca.sslhF......DhhHhshlss.slassll ............................lpLhasph.psss.................................................hhus+sLtlp.+hphG...l++a.sslhF......Dh.Hhshlul.slatsLh...... 0 52 75 126 +12229 PF12395 DUF3658 Protein of unknown function Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF08874. There are two completely conserved residues (D and R) that may be functionally important. 20.60 20.60 22.30 22.10 19.60 20.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.13 0.72 -4.51 40 322 2009-09-16 15:54:13 2009-09-16 16:54:13 3 3 272 0 72 276 16 110.90 28 38.04 CHANGED lhcthpptp.lo.pctppLtpEWpplpppsu.LRlWcss.plpSspc.saaDshIlc.........psss-atpus+llGplhup....h...pph.lu......DsFltaRl+pLlppGhlchcGsht.shcphpl .....................h..tth.tht.lostchtphttEWpplpppss.LRlWpss.plpSs.pc.shaDphIlc..........ts.s.p-ahp..AscllGpshut....s.......cth...lu......DsaltaRl+pLlppGhl-hpGshs.thp.hp............................ 0 20 41 54 +12230 PF12396 DUF3659 Protein of unknown function (DUF3659) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 70 amino acids in length. 22.10 22.10 22.20 22.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.23 0.72 -4.30 116 553 2009-09-16 15:55:19 2009-09-16 16:55:19 3 17 61 0 480 572 2 65.10 35 43.00 CHANGED husLcG.hpVs+tGpllD.ssGpslG+llEG..Dscc...Ls..G+p...V.D-cGcIlDcsGsllG+sEhl..s........pptp .........ssLpG.hpVs+sGpVlD.psGphlG+llEG....Dscc...Ls.....G+p.......l.D-cGcllDc.sGsllG+sEhl.t....tt..p............ 0 80 213 374 +12231 PF12397 U3snoRNP10 U3 small nucleolar RNA-associated protein 10 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF08146. This family is the protein associated with U3 snoRNA which is involved in the processing of pre-rRNA. 22.00 22.00 22.00 24.40 20.80 21.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.48 0.71 -4.16 67 281 2009-09-16 15:56:45 2009-09-16 16:56:45 3 8 238 0 201 271 1 121.50 21 6.72 CHANGED -shl.plL.Phl.cult.ppss........DhphuuYhllshLus+ssLsspllpuLhcslspshptpst........pp..............uLhsLs..............hlh.....ppp.sstp.............lsppshctlhph.s.....hsphLspl.spphpl..sphlhslltuh.l ...............................sllspllPhltcuLp.sphs........-hphusYMlls.Lus+ssLpcsllsuLhcplhpshsppst..........ps..............uLhs.Lh..............hLh............Qpp..p.sp.p................Lspcshpt.Lh..ph.s.......lhshLppl.upphsl....sphltshh.sh........................................................................ 0 62 106 165 +12232 PF12398 DUF3660 Receptor serine/threonine kinase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00954, Pfam:PF01453, Pfam:PF00069, Pfam:PF08276. There is a conserved ELPL sequence motif. 25.00 25.00 27.20 26.10 23.60 22.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.94 0.72 -4.09 34 150 2009-09-16 15:59:03 2009-09-16 16:59:03 3 12 24 0 15 168 0 41.50 50 5.90 CHANGED ussIs..ppRsQsLl.MNthVl....SS++p...lS.tEN+o.E-LELPLhEa ......ATsIss.ppRsQsLl.MN.....thVl....SS+Rp...lS.tEN+s.E-LELPLhEh. 0 4 8 10 +12233 PF12399 BCA_ABC_TP_C Branched-chain amino acid ATP-binding cassette transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00005. There is a conserved AYLG sequence motif. This family is the C terminal of an ATP dependent branched-chain amino acid transporter. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.19 0.72 -6.41 0.72 -4.21 827 8057 2009-09-16 16:01:50 2009-09-16 17:01:50 3 14 2852 0 2574 6299 2877 22.90 46 7.99 CHANGED chIAcGsPp-lpssPcVlcAYL.G ........llAcGoPp-lpssscVhcAYLG..... 0 633 1531 2091 +12234 PF12400 DUF3661 Vaculolar membrane protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 123 and 138 amino acids in length. 22.00 22.00 25.00 24.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.85 0.71 -4.05 49 275 2009-09-16 16:04:10 2009-09-16 17:04:10 3 5 208 0 206 258 1 124.00 35 39.14 CHANGED CsaYhlNlllDo...TlGlhllah....hlphlsplhph....hthpplc.SGpY.....................................t.PphpsahpQhhlYhhslhhhKhhlhllhhhh....hlthluphlLsa..hcttsp.lplhhVMhlhPllhNslQaallDshI+pp .....................................................................CshYhlNhllDs........TlGl.llal....hl+hlstlsph.................thp..s.....lc..GpY.................................................................................GsPPpstsWltQshlYhhhlhhhKhslhlllhhh....hhtpluphlLsa..hc...s..Pp.lplhhVMllhPhlhNslpaallDshlpp..................... 0 66 107 164 +12235 PF12401 DUF3662 Protein of unknown function (DUF2662) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00498. 25.00 25.00 27.90 27.50 20.70 19.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.26 0.71 -3.93 76 515 2009-09-16 16:04:50 2009-09-16 17:04:50 3 3 496 1 146 383 83 114.90 32 36.99 CHANGED Ls+hEptlEphlpusFu+sFpup..lpPsElsptLcREh-spsthluts+hl.sPNtaslpLussDacplss...h.tssLspElsshlpcaupppsashs.GslpVphpp..sssLcsG.pa+lpu .............lp+hE+plEssVsssFA+sFp.up..lpPsElsutLpREh-spsphlups.+sl..sPNcahlpLSspDa-+Lssh...sctLssEluspLpcaspcptashh..GslhVca-t..sssLpsG.paclp............. 0 54 113 138 +12236 PF12402 nlz1 NocA-like zinc-finger protein 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 42 and 57 amino acids in length. There is a conserved GAY sequence motif. There is a single completely conserved residue G that may be functionally important. Nlz1 self-associated via its C terminus, interacted with Nlz2, and bound to histone deacetylases. 21.20 21.20 26.30 24.40 19.80 18.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.77 0.72 -3.62 7 173 2009-09-16 16:05:48 2009-09-16 17:05:48 3 2 89 0 44 126 0 51.50 68 10.89 CHANGED AuhsYPGSlsGAYAGYPppals...uLDssK..uSLVuuQh.....uohGss.K.suuushsG ...AGMoYPGSLAGAYAGYPppFLPHG.shD.tK..uSL.VsAQL....AuSLGC.S....AGSSPLAG..... 0 3 7 18 +12237 PF12403 Pax2_C Paired-box protein 2 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00292. This family is the C terminal of the paired-box protein 2 which is a transcription factor involved in embryonic development and organogenesis. 28.10 28.10 42.50 29.80 21.90 20.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.73 0.71 -4.43 11 295 2009-09-16 16:06:57 2009-09-16 17:06:57 3 5 51 0 109 199 0 101.30 53 28.58 CHANGED suspsssshsuhuchusshuphpS.slhsGR-hu.STTLPGYPPHVPPTGQGSYsoSoLAGMV.PGu-FSGNPYSHPQYooYNEAWRFoNPuLLuSPYYYSsAuRss.PPTuATAYD+ ...............................st......s.sssschGushst.p.oh.sl....loGRDMA..STT...LPGYPPHVPP...oGQGSYso.SoLu.G...MV..PGS-FSGsPYSHPQYoo.Ys-u....W...RFs.NP..u..LLu.SPYY..YSussRsuss....sssAs..AaD............................................ 0 5 11 57 +12238 PF12404 DUF3663 Peptidase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00883. There is a conserved WAF sequence motif. 25.00 25.00 33.40 31.60 21.20 20.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.62 0.72 -4.20 55 805 2009-09-16 16:07:38 2009-09-16 17:07:38 3 3 792 0 98 412 11 76.90 63 18.01 CHANGED MplpLSppsAsupWGcpAllSFsssuAsIH.....L....ssscsLppIQ+AARKLcuQGIppVpLsG-sWcLEppWAFhQGFtss ..M+ITLSTQPADARWGEKAshShNs.DGlTlHL.....NGsDD..LGLIQRAARKIDG.GIKpVpLoGEGWDh-+CWAFWQGY+uP.. 0 9 32 66 +12240 PF12406 DUF3664 Surface protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 131 and 312 amino acids in length. 23.00 23.00 23.50 121.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.71 0.72 -3.40 5 53 2009-09-16 16:09:15 2009-09-16 17:09:15 3 3 4 0 1 50 0 100.20 81 54.80 CHANGED DRQLNPIDFDPNDDQQPLDPNQLIDQtEQSQEPTQQEPIEPQQPTQPuT.EPEELEPETVTVEVP.EPVTSEE...PKE.........SoQTEEsTETQDsKpE.PTpQPVDEPP DRQLNPIDFDPNDDQQPLDPNQLhDQhEQS....QEs....TQQEPIEPQQPTQPST.EPEEL-PETVTVEVP.EPVTSEE...PKE............SDQTEE.QKHEEPEAS.PsPEPVDEPs 0 0 1 1 +12241 PF12407 Abdominal-A Homeobox protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00046. This family is a homeobox protein involved in differentiation of embryonic cells to form the abdominal region. 25.00 25.00 26.20 33.70 19.80 18.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.06 0.72 -4.42 5 126 2009-09-17 12:02:55 2009-09-17 13:02:55 3 2 103 0 24 91 0 23.90 83 12.42 CHANGED ELRAVKEINEQARREuc..EcE+c++ .ELRAVKEINEQARRERE...EQDhMK+........ 0 8 11 21 +12242 PF12408 DUF3666 Ribose-5-phosphate isomerase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF02502. There are two completely conserved residues (D and F) that may be functionally important. 25.00 25.00 31.10 30.20 21.00 16.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.34 0.72 -4.86 37 553 2009-09-17 12:03:37 2009-09-17 13:03:37 3 2 544 19 66 225 3 48.30 52 22.79 CHANGED sssh+.shlssL+slDQ-LlKsAluGc+FQ-hFFsNCpspcIssYV+sl ....thsh+.shlshL+sID.-hlKsslSGt+FQchFFENCQ-cEltAal+pl.. 0 14 33 52 +12243 PF12409 P5-ATPase P_ATPase; P5-type ATPase cation transporter Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 110 and 126 amino acids in length. The family is found in association with Pfam:PF00122, Pfam:PF00702. P-type ATPases comprise a large superfamily of proteins, present in both prokaryotes and eukaryotes, that transport inorganic cations and other substrates across cell membranes. 21.70 21.70 21.90 24.20 21.50 21.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.38 0.71 -4.25 66 485 2009-11-19 17:32:02 2009-09-17 13:04:37 3 23 233 0 321 463 0 118.50 21 10.19 CHANGED -D..hhl.sIsGY+sshh+..hhlahhhslhThGlhaLlh+WhP+a+lphhsptssLtcA-..aVl..lE..........spas...phplhpVpp.....phascshSshhs.spp...................h.hspppss....tlsplRhhpYR ..........-.ph.plhGYcpshh+..hhlhhhhslhohG......lhh.L.lh+WhP.cWpVphpsp...s.....s.Lpc.A-.....hVl..lp......................................spap...............phhh.tpVpp................h.h.sp....th....t.......................................................................................................................... 0 115 158 251 +12244 PF12410 rpo30_N Poxvirus DNA dependent RNA polymerase 30kDa subunit Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 193 and 259 amino acids in length. The family is found in association with Pfam:PF01096. There are two conserved sequence motifs: GIEYSKD and LRY. This family is N terminal of the 30 kDa subunit of poxvirus DNA-d-RNA-pol. It has structural similarity to the eukaryotic transcriptional elongation factor SII. 25.00 25.00 53.40 52.80 21.10 19.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.51 12 114 2009-09-17 12:05:32 2009-09-17 13:05:32 3 2 83 0 0 76 0 133.60 70 60.24 CHANGED pDlccllp+YVs-pscspcLlpWAp-pAs+aYl+NIsNTK.SNIEETKF-PtNNIGIEYSKDsKNKLSYRNKP.It..TNh-YpDlCshI+sTNGsEK-hLRYlLFGIKCl+tGVEYsIDclpDhsY.ccYFNVLDcKaN ...DIc-llh+YVc-.uplc-llcWAh-KuSKaYI+NIhNTK.SNIEETKFEs+NNIGIEYSKDSKNKLSYRNKP.Is..TNh-Yp-lCshI+sTsGTEKEhLRYlLFGIKClpKsVEYNIDcI+DlsY.p-YFNVLDcKYN. 0 0 0 0 +12245 PF12411 Choline_sulf_C Choline sulfatase enzyme C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, eukaryotes and viruses, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00884. There are two completely conserved residues (R and W) that may be functionally important. This family is the C terminal of choline sulfatase, the enzyme responsible for catalysing the conversion of choline-O-sulfate and, at a lower rate, phosphorylcholine, into choline. 21.30 21.30 29.60 24.20 21.00 20.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.74 0.72 -4.14 37 292 2009-09-17 12:09:02 2009-09-17 13:09:02 3 6 284 0 123 283 111 54.00 51 10.51 CHANGED cpcVlpSQpRR+lVas......AL..ppGphtsWDaQPh.pDuSppYhRNHhc..LsslEttuRa ...cppVltSQRRRRlVap......AL..pp..Gpht.sWDaQPh.pDASpcYMRNHh-..LDsLEppuRF.......... 0 18 54 88 +12246 PF12412 DUF3667 Protein of unknown function (DUF3667) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. There is a single completely conserved residue P that may be functionally important. 21.80 21.80 21.80 21.80 21.50 19.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.99 0.72 -4.65 78 186 2009-09-17 12:10:03 2009-09-17 13:10:03 3 5 129 0 78 203 27 46.20 33 13.02 CHANGED Du.+hh+TlhtLhh+PGplsccYlsGc..Rt+YlsPhphalhsuhlh........Fl ...Dsphh+TlhtLlh+PGplsccYlsG+..RhcYhsPhphalh.huhlhh.......... 0 39 65 76 +12247 PF12413 DLL_N Homeobox protein distal-less-like N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00046. This family is the N terminal of a homeobox protein involved in embryonic development and adult neural regeneration. 25.00 25.00 26.50 25.60 23.50 22.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.41 0.72 -2.94 20 283 2009-09-17 12:10:48 2009-09-17 13:10:48 3 2 148 0 84 168 0 74.10 45 33.67 CHANGED p-SPTLPESTATDsG.YYSspts......HsYhss.....psY.upslN.sY.......Qa.phsGlsus.usYu.sK.sYs....Y.suuY...+QY.GsYsRssssssps .....p-SPTLP.SosTD.s..YYss..........tasuu......uPY..tpls.sY........QY.phsuhuu.........ssYs.AK..uYs.h...a.suoY...t.pY.GuYupssSsss..s............. 0 4 11 37 +12248 PF12414 Fox-1_C Calcitonin gene-related peptide regulator C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 69 and 99 amino acids in length. The family is found in association with Pfam:PF00076. This family is the C terminal of Fox-1, a protein involved in the regulation of calcitonin gene-related peptide to mediate the neuron-specific splicing pattern. Fox-1, with Fox-2, functions to repress exon 4 inclusion. 25.00 25.00 35.30 30.60 22.20 16.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.47 0.72 -3.58 6 257 2009-09-17 12:11:22 2009-09-17 13:11:22 3 4 42 0 88 228 0 87.20 67 26.35 CHANGED sVPuFPYPs...sss...suuAaRGutL.RGRuRsV....YsshRAAsP...PsulPuYsGVV.YQDGFYGA-.lYGGY.AAYRaAQPAo........AssAAYSDuYGRVY.sA.DPY ............................................................h.lPGFPYPs...Ass.......sAAAaRGAHL.RGRGRs.V....YsshRAA.P...PssIPAYsGVV.YQDG.FYGA-...lY....GGY...A........AYRYAQPss.............AsAAA......YS...........D.uYGRVY..sA.DPY................ 0 4 11 37 +12249 PF12415 rpo132 Poxvirus DNA dependent RNA polymerase Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF04566, Pfam:PF00562, Pfam:PF04567, Pfam:PF04560, Pfam:PF04565. This family is the second largest subunit of the poxvirus DNA dependent RNA polymerase. It has structural similarity to the second-largest RNA polymerase subunits of eubacteria, archaebacteria, and eukaryotes. 25.00 25.00 26.40 40.80 19.80 17.10 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.71 0.72 -7.02 0.72 -5.00 13 82 2009-09-17 12:12:17 2009-09-17 13:12:17 3 5 49 0 0 76 0 32.80 64 2.89 CHANGED -FI+psLoYDMPsEllYLVNulIESTKpllss.p .EFIRRSLSYDMPPEVVYLVNAIIDSAKRlsES.I.. 0 0 0 0 +12250 PF12416 DUF3668 Cep120 protein Gavin OL, Bateman A lg7 Bateman A Family This family includes the Cep120 protein which is associated with centriole structure and function [3]. 21.80 21.80 22.20 22.40 20.90 19.40 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.11 0.70 -5.24 16 109 2009-09-17 12:13:09 2009-09-17 13:13:09 3 8 80 0 73 121 0 295.20 39 33.78 CHANGED hlVlsIlEG+sFspc.......+pplllpAphs........ucsLpTD...............Plspt-sstFsocLsWE........hD+psl+..........+h+sp+sPlKLpCaAsc.tsst+EsIGYllLslRus....ptsp.p....ps+WapLLus...capphKPEl.hlhlslEscsh.t.ts.p..h........psppsPsppu.....ss.h..s.tplhshhhs-pGhhQlGsssh.ssDhallslpltpsppLspL.hs.s..l.t+..t.sshhhhYslLGNDVTsc.pFpp.hsssa.hpcs..lRl+SSlpsL+hahspp.stL.Iplpp....t.......ppulusoplshssLlstssh.th.....sp+hsshpGsFshp..stscs...............ss-.htPplslsloLchc .............................................................hlVlpllE..G+pFspp.......cp.lllpApFsGEpLtTD.....PlpHs-pPpFsTELAWElD++sL+ppRhQRTPIKL......pCFAlc.....sho.st+EslGYllLDLRss....pps..p....ts+WapLLus...+.......Ys+aKsEl..lulslEs-sp....ssc.th................KuptsPPcpu...ps.s.....tlsspsl..hshLtp-tGaaQIGP..s-h..ssD.FlhSloluaAp.pLcpL.lsss....Lstc....spFaFhYs..LLGN.-VTs-.sFpsLhs....P......sFpsERsS.lRlRSSlcsL+hYLshp.stLpIaLCs....G...........spu.LGssclsLssLLtpssh.ph.......sp+sssh-GuFhlpsss+scp...............sh-..htPplslolsLph.p..................................... 0 29 34 53 +12251 PF12417 DUF3669 Zinc finger protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 64 and 80 amino acids in length. 21.60 21.60 22.30 21.90 21.40 21.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.22 0.72 -4.25 23 311 2009-09-17 12:13:48 2009-09-17 13:13:48 3 38 80 0 172 301 0 75.70 43 14.96 CHANGED hWhlDFstsps......hphs.css.ltp......hlsAahcNDsaaPRPt.......t..stpLWssFcppYl.suptlhpshhh...hcs .....................h-hshhsshss.h.slE+K.h-up.us+............LlsLEGRTGssEKKLA................-CEKTusEhuNplEuKWsVLGTLLQEYGLLQRRLEN.............. 0 16 39 68 +12252 PF12418 AcylCoA_DH_N Acyl-CoA dehydrogenase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF02770, Pfam:PF00441, Pfam:PF02771. This family is one of the enzymes involved in AcylCoA interaction in beta-oxidation. 21.20 21.20 21.20 21.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.23 0.72 -4.26 232 1684 2009-09-17 12:14:12 2009-09-17 13:14:12 3 9 805 0 589 1505 340 32.30 33 5.44 CHANGED ssYpuslRDhpFlLpElhshcthh..shssas-hs ........pYpAPlRDhpFlLpElLshcthh..shstas-hs................. 0 150 332 482 +12253 PF12419 DUF3670 SNF2 Helicase protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00271, Pfam:PF00176. Most of the proteins in this family are annotated as SNF2 helicases but there is little accompanying literature to confirm this. 27.60 27.60 28.10 31.00 26.50 25.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.51 0.71 -4.74 105 583 2009-09-17 12:16:04 2009-09-17 13:16:04 3 5 516 0 181 587 58 139.80 27 14.45 CHANGED L.psstPpshp..Lssp-shpFLppsu.tLpsuGlsVlLPsthpt.t...tp+lplclps............................sss.sst......uhlulcsLlsFcWclulGs....psLoppEhppLsppppsLVcl+GpWVplDspclppstchhpp.ttpt.........loht-sLchshs .............................shp..Lssp-shpFLspsuspLpsuGlsVlLPshWppht...ts+hplclps..........................................ttsss.ssp........uhhGhss..LlsFcWclulGs.............tsLocpEhpcLs...c....pppsLlcl+GpWltlDsptlccspchhpc.ttp.t..........lsht-hLphth.t.................. 0 61 131 163 +12254 PF12420 DUF3671 Protein of unknown function Gavin O lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 96 and 116 amino acids in length. 22.10 22.10 22.30 23.40 21.80 22.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.60 0.72 -4.05 42 76 2009-09-17 12:17:00 2009-09-17 13:17:00 3 2 4 0 39 97 1 108.20 37 46.74 CHANGED KKcGL..................c+LDsYCEKKlFsplcplpclscshshcKKpaKphlhKKYG...hhlll.sLhs.llGlIlslLht.ht.th......................................h..hphhFhhlhsslhlLhllYlhhKllKY- ................KKcGLcKLDCYCE+KIFsslD+lcKhtcstNhDKKsaKpl.llKKYG...htlIhssLhs.LlGlIlsILsh.tpsth.......t...........................h.thlsslshsFah.assIhlLsllYIhhKllKY-........................................ 0 0 0 34 +12255 PF12421 DUF3672 Fibronectin type III protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is typically between 126 and 146 amino acids in length. The family is found in association with Pfam:PF09327, Pfam:PF00041. There are two completely conserved G residues that may be functionally important. Many of the proteins in this family are annotated as fibronectin type III however there is little accompanying literature to confirm this. 23.60 23.60 23.90 23.70 22.80 23.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.65 0.71 -4.02 25 1261 2009-09-17 12:19:22 2009-09-17 13:19:22 3 19 403 0 22 1470 7 135.60 56 15.88 CHANGED lspsGphphpNuslpG..............slsAsSGshsssshspssphtGslpAppIc.G.....Dll+..........hh.tt.................h.hpsh..ss.sasRplhl..........................ttt.p.....hhh.pshshtlppstthlhstssssh....hssh.......hshPs.sp ..............LTPDG+LTAKNADISG..............sVNANSGTLN.....NVT.....INENCplhGKLSANQIE.G.....DlVKTVuKuF...........ss........................pasSGTlTVpl.DDQ....sFDRQIlIP....................ustapstpppppps..shYSoC+L.VpKNGsEIaspsshDsp...hlaSuV.......IDMPAG............................................................................................................................................................................................................................................. 0 3 5 12 +12256 PF12422 Condensin2nSMC Condensin II non structural maintenance of chromosomes subunit Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 150 amino acids in length. This family is part of a non-SMC subunit of condensin II which is involved in maintenance of the structural integrity of chromosomes. Condensin II is made up of SMC (structural maintenance of chromosomes) and non-SMC subunits. The non-SMC subunits bind to the catalytic ends of the SMC subunit dimer. The condensin holocomplex is able to introduce superhelical tension into DNA in an ATP hydrolysis- dependent manner, resulting in the formation of positive supercoils in the presence of topoisomerase I and of positive knots in the presence of topoisomerase II. 21.70 21.70 22.50 21.80 21.30 20.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.94 0.71 -4.47 26 201 2009-09-17 12:21:28 2009-09-17 13:21:28 3 7 99 0 119 184 1 148.20 29 21.30 CHANGED Llpshhssthlps.-Gp+hluhlhs.hpt.hhcchtshl+spl...phs+uhlptau-lhapuWKpu...............ptshppplEpshlpshhpsulcs.....ts.hhusphRphLpsFtpp+.hpttV-phL..hclhcPlLaRuLpssNspV...RtNuhtlhhp.hFPlpss ......................hLtsFhssshlts.p..u.phlShLhs.hphshh+thpshlcs.l...tths+slhphhu.-lhacuWppu................tphhpplcpspIQDhh.tulcL...............cp..sas+sRcsLthFhppK..h..tlcchL...hcl.hcPlLa+uLpssNupV...Rssuthlhlc.AFPlcs.................... 0 35 52 84 +12257 PF12423 KIF1B Kinesin protein 1B Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00225, Pfam:PF00498. KIF1B is an anterograde motor for transport of mitochondria in axons of neuronal cells. 22.20 22.20 22.20 22.40 22.10 22.00 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.43 0.72 -3.93 42 516 2009-09-17 12:24:20 2009-09-17 13:24:20 3 40 167 0 305 465 1 46.90 37 3.03 CHANGED hpNRlhtMR.-hYpph........................................hptt............pp...........h.ptpDPFa-s..-paslIGs ..........................LcpRL..MR.-hY.pch......................t............................pps.sst......................cp............hhptuDPFY-p..c.apLlGs........... 0 90 120 213 +12258 PF12424 ATP_Ca_trans_C Plasma membrane calcium transporter ATPase C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00689, Pfam:PF00122, Pfam:PF00702, Pfam:PF00690. There is a conserved QTQ sequence motif. This family is the C terminal of a calcium transporting ATPase located in the plasma membrane. 22.90 22.90 23.70 23.10 22.00 22.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.28 0.72 -3.61 19 534 2009-09-17 12:25:56 2009-09-17 13:25:56 3 22 95 2 238 453 0 56.10 47 6.66 CHANGED GQILWhRGLsRlQTQIRVV+AF+SsL..tcshcpspSpsu.l+shhs.sph..slpcphpp.shlsppc ................GQILWhRGLNRIQT.QI....+VVpAF+Suh....h.p.....u..l..c+..pS.hsS....lHshhst.ph...h.t............s.................................................. 0 32 49 134 +12259 PF12425 DUF3673 Protein of unknown function (DUF3673) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. 22.30 22.30 23.20 23.60 22.20 22.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.47 0.72 -4.12 19 52 2009-09-17 12:28:21 2009-09-17 13:28:21 3 4 3 0 29 47 0 51.70 42 28.82 CHANGED Rhssc-hhssspthshl-sc+t.h.......pc-suHApssssaspcchs.h....sspsph.+ts ......hhsscDhhToscchuYlEccKGYs.......E-EsuHAQsssVYANKKhl.Y.....usKsphs+..h......... 0 0 0 3 +12260 PF12426 DUF3674 RNA dependent RNA polymerase Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 40 amino acids in length. There is a conserved MFNLKF sequence motif. There are two completely conserved residues (E and P) that may be functionally important. 22.00 22.00 22.80 47.90 19.90 21.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.02 0.72 -4.55 8 125 2009-09-17 12:29:39 2009-09-17 13:29:39 3 2 47 0 0 100 0 40.30 63 3.25 CHANGED LIKs-ERuALEAMFNLKFHlus.KspsYhIP-Y+slp.spP ..........LIKsDERuALEAMFNLKFHloGsKsRsFsIPsY+PQsLCpP....... 0 0 0 0 +12261 PF12427 DUF3665 Branched-chain amino acid aminotransferase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 23 and 35 amino acids in length. The family is found in association with Pfam:PF01063. There is a conserved TRT sequence motif. 25.00 25.00 43.40 42.30 23.80 20.50 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.30 0.72 -6.73 0.72 -4.51 3 14 2009-09-17 12:33:33 2009-09-17 13:33:33 3 1 12 0 3 14 0 26.30 72 7.06 CHANGED M............L-YTVTRT-sPTSP-RLKEILA ..............L-aTVTRTENPTSP-RLKEILA. 0 1 2 3 +12262 PF12428 DUF3675 Protein of unknown function (DUF3675) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00097. There are two completely conserved residues (R and L) that may be functionally important. 21.10 21.10 21.10 21.10 19.40 18.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.46 0.71 -3.84 30 175 2009-09-18 09:02:28 2009-09-18 10:02:28 3 3 21 0 96 161 0 110.10 34 44.96 CHANGED PGYTAPs.phsphscsslslp....usWp..ssth-.c-schlAhstuppphhpsth..s-hssssspuusaCRSlAllhMsLLLLRHsLslsh..sus-chuhsl.ho...LhhLRsAGILLPhYlhh+ulo ...............................PGYTAPP...hhp....schslslp............ssap....hsth-.c..cschlAhstuppphh.ps-a.s-hssssssuushCRSlAlI.......hMsLLLLRHslslsp...sss-c.shs...l..Fo...lhlLRsAGhLLPhYlhhhul....................................... 0 8 52 75 +12263 PF12429 DUF3676 Protein of unknown function (DUF3676) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 230 amino acids in length. 25.00 25.00 34.40 34.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.53 0.70 -4.75 27 92 2009-09-18 09:05:25 2009-09-18 10:05:25 3 8 2 0 40 93 0 193.10 63 27.25 CHANGED TDsSVASES+SEESs.sSaEcLsEsDTDcQ.EEpsVcs.VPAAssSTVsAGSSVsEPAhAAESAtNS+sEDNAQLSEGcTuQQuT.sEspcSMQRDSDVQsQD.QSpELTEVsDVEtSSES.DsEpPEEEGcANDRSGGoTSsVuASLSM-TATusVsGEHQVQQSsELuAENcDVRSTGTGTTGAEpSLSLEAGDuNSERTMuSDSSLTPSKSDAEPTSAEsTDslSRTEG ..TcsSVASESpSEES..sSaEcLsEsDs-cQ.EEpsVcs.VPAAsSSTssuGSSVsEPAhAsESAtNSh.-DNAQhSpGETuQQsT.pEspcSMQRsS-VQsQD.QS...tc.TEhsDVEtSuES.Dpp.PEE-ttss-tSGtoTSsVuAS.sM-TsstsV.uEHQVQQSsEhusENsDVRSTGTGTTGAEpSLSLEAGDtNSERTMsSDSS.TPS+SDAEsTSAEsTDslShTEt.................. 0 0 0 40 +12264 PF12430 ABA_GPCR Abscisic acid G-protein coupled receptor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 177 and 216 amino acids in length. This family is part of the abscisic acid (ABA) G-protein coupled receptor. ABA is a stress hormone in plants. 22.10 22.10 23.30 30.10 21.10 21.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.22 0.71 -5.17 44 301 2009-09-18 09:06:13 2009-09-18 10:06:13 3 10 234 0 216 311 1 172.90 36 36.74 CHANGED p.ptthupThhG+hhphh.shhhulYCla+lhhshlphlhhh...............................ssssDslstslshhlp...........hhshpl...Dhp.hhspplShlLsGslhlsShpsllhohpp...hhphhsushs..ss.....................................hhsLlluplhGhYhlSolLLlRssLPt............ph.pshlsc............lLu............sslch.tFhcpWFDhlFllSulhohlhlhhtcphs ..............................t.thupThhG.+hhsh.h.GahFSlYClaKlhhshlsllhpp................................supsDPlopslplhlp....................ahs.hph.......Dlt.hhoppIShlLlGhlllsSlRulLhTlp+...hhhh.luushu..ss.....................................hllLlluplMG.hYFlSolLLlRhshP..........................-h.+shloc......................lLG..............plpa.sFac+WFDslFll.......Sulhollhlhltc+..s.................... 0 82 125 181 +12265 PF12431 CitT Transcriptional regulator Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00072. There is a single completely conserved residue G that may be functionally important. CitT is a transcriptional regulator which allows transcription of the citM gene which codes for the secondary transporter in the Mg-citrate transport complex. 21.60 21.60 21.60 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.40 0.72 -3.80 48 854 2009-09-18 09:08:13 2009-09-18 10:08:13 3 6 644 0 81 293 7 29.40 49 13.06 CHANGED lsQptVDphhptts....pspst..sp..LPKGIDpl ..AsQcplDcMFNshA......+s-sp...sc..LPpGIDt... 0 17 32 57 +12266 PF12432 DUF3677 Protein of unknown function (DUF3677) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. 25.00 25.00 28.00 28.80 24.90 23.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.66 0.72 -3.54 19 104 2009-09-18 09:13:42 2009-09-18 10:13:42 3 3 83 0 79 115 0 81.20 45 4.34 CHANGED l...L+hLstssGhtplRhhss.....p+lEhWlpNsKlp+.sp-L.LhhlhhNsss.......pstpDh-.slstLl+lth+sps...........lhshahtsl+cl ......lL+hLosoCGhtEVRhhul.....pRLEhWLpNPK..Ls..........R...sAQ-L.LhplChNCso.......+uscDh-.VlupLlKlRLKsKs..........LlNaahhCl+El....................... 0 29 37 60 +12267 PF12433 PV_NSP1 Parvovirus non-structural protein 1 Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 109 and 668 amino acids in length. Parvoviral NSPs regulate host gene expression through histone acetylation. 25.00 25.00 67.70 67.00 22.30 19.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.73 0.72 -3.91 5 227 2009-09-18 09:15:31 2009-09-18 10:15:31 3 3 33 0 0 240 0 65.90 56 16.25 CHANGED lLTYpHKQTKKDYsKsVHFGNMIAYYFLNKKKIsT-..+-cGYFLSoDSGahsNFLKppER+lVSKLYTDEpKPETVETTVT .hpYhHKQTKpDYsK.VphtshlhhYhhNKcKIsc-..p.cGYahuusuGhh................................. 0 0 0 0 +12268 PF12434 Malate_DH Malate dehydrogenase enzyme Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00390, Pfam:PF03949, Pfam:PF01515. There is a conserved AAL sequence motif. There is a single completely conserved residue R that may be functionally important. Malate dehydrogenase is one of the enzymes involved in the citric acid cycle in mitochondria. It converts malate to oxaloacetate using NAD as a cofactor. 21.50 21.50 21.50 21.50 21.40 21.00 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.85 0.72 -7.06 0.72 -4.24 5 120 2009-09-18 09:25:20 2009-09-18 10:25:20 3 5 117 \N 46 104 24 22.60 67 2.97 CHANGED s......osSc-DLK...-QQRAALRKAALEYHEFP .............ss.................t+tpLRcAAL-YHEFP 0 3 20 33 +12269 PF12435 DUF3678 Protein of unknown function (DUF3678) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. 20.40 20.40 22.10 21.80 19.40 19.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.70 0.72 -4.39 16 44 2009-09-18 09:28:31 2009-09-18 10:28:31 3 4 4 0 30 39 0 35.00 34 14.18 CHANGED hhso-lhlpusus..........soSSushsHpppc+hFlchsol .....hho-lhltAsus..........sooSsshhHpppc+hFlcasoh. 0 0 0 5 +12270 PF12436 USP7_ICP0_bdg USP7; ICP0-binding domain of Ubiquitin-specific protease 7 Gavin OL, Coggill P lg7 Prosite Family This domain is one of two C-terminal domains on the much longer ubiquitin-specific proteases. This particular one is found to interact with the herpesvirus 1 trans-acting transcriptional protein ICP0/VMW110. 27.00 27.00 27.40 27.50 26.80 26.80 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.54 0.70 -5.14 52 419 2009-09-18 09:30:54 2009-09-18 10:30:54 3 25 258 2 276 415 4 231.30 28 22.05 CHANGED phRhWhhsp.RpN..........+ThRP.......sp.l..p-.shTlpp..ltsp.ss.pt.th+lalElh.......................tt.....ss.pspp..slLlFlKhaDsppppLphlG+lalptspKls-..llPhlpchhua....sssTplpLaEEIK......sMh-.l...........c.ctohpp.....uElpDGDIIsFQ+shs......................t.psphthssshpaa-aLhNRl.VpFtsh...tpPpc.s...........sFsLpLSc+hoYDplupcVup+Lsh.DPp+LRFsssts.h....stpP+...h+...s...hspoLp-l..L ...................................................RhW.h.p.RpN..........tThRP..........sh....t-....s....oh..tp........hhp.....p...tp....................hplalEh..................................................................t.t.h..ss.s..ppp.....slllFlKh...a..Ds.p....p...p...p.LphsG+lal.ptss+.lp-..lhshlpchhGa.......sss..spl.LYEEl+.............................shh-.l...........c.c.ohpp.................sEltDGDIIsFQ+ssst.........................................psps...phssstpaachLhpRh.........Vh.Fpsh.p.spc.s.........................................................tFsltLSpphsYp..p..lscpVup....pLs.........h..DP.....hl..phhthts........pttPt...h+.......p....tpl.ph............................................................................ 0 88 151 228 +12271 PF12437 GSIII_N Glutamine synthetase type III N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 160 amino acids in length. The family is found in association with Pfam:PF00120. This family is the N terminal region of glutamine synthetase type III which is one of the enzymes responsible for generation of glutamine through conversion glutamate to glutamine by the incorporation of ammonia (NH3). 25.00 25.00 31.10 30.80 19.30 18.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.83 0.71 -4.85 106 750 2009-09-18 09:35:47 2009-09-18 10:35:47 3 3 601 6 222 705 101 162.80 58 23.02 CHANGED ppls-hFGppVFscpsM+cpLPKssY+pLpcoIcpGptL.DhplA-sVAsAMK-WAl-+GATHYT.HWFQPL....T..GhTAEKHDoFls.sss-.G....p.slhcFSGKpLlpGEPDASSFPsGGLRuTFEARGYTAWDsTSPAFIhcp...s...sLCIPTsFsSYoGEALDpKTPLLRS ...........p.hs-hFGppVFscpsMccpLPKplYKplpcsI-pG.ptL.DhplADsVAsuMK-WAlE+GATHYT.HWFQPL........T.GhTAEKHDuFls.sssc.G.......p.sltcFSGKpLlpGEPDASSFPsGGlRuTFEARGYTAWDsTSPAFlh-........sTLCIPTsFhSYTGEALDpKTPLLRS............... 0 108 185 212 +12272 PF12438 DUF3679 Protein of unknown function (DUF3679) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. 20.90 20.90 21.30 23.10 19.80 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.61 0.72 -4.61 14 131 2009-09-18 09:40:58 2009-09-18 10:40:58 3 1 130 0 16 88 0 51.90 54 47.82 CHANGED +sllhshlhhhGVLhGMQpANcGhppMKGYcDsohpsshplscscssphEAulLGp ...hh.hhlhhhMVlAGVuLANHGLKSMKGYpQhSYEQIAHMTGTcupssEoEILGp....................... 2 2 8 10 +12273 PF12439 GDE_N Glycogen debranching enzyme N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is typically between 218 and 229 amino acids in length. The family is found in association with Pfam:PF06202. Glycogen debranching enzyme catalyses the debranching of amylopectin in glycogen. This is done by transferring three glucose subunits of glycogen from one parallel chain to another. This has the effect of enabling the glucose residues to become more accessible for glycolysis. 21.30 21.30 31.50 23.50 19.50 15.40 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.36 0.70 -5.11 94 351 2009-09-18 09:42:53 2009-09-18 10:42:53 3 6 336 0 118 343 25 219.70 31 32.83 CHANGED +EWLlTNGlGGYAuuT.lsGs.TRpYHGLLlAulps..PhsR.hlllocL-Eplth.ssp...........pasLuspca..........sss.htspGap.aLppF.ph-.shPhWpaph....sssh.lcKclhM.pu..............................................................pNsshlpY..p......hhpu..sp......shpLplcPhlshRsaHshspss..h.........................thphpht.h.tp..s.............lplpshs..........thssltlh..hs.t.....u..ph.httss............WahshpYshEpc.RGhssp-DhasPGhhphsLps...Gpslhl.s ............+EaLhTNthGuYusuT.lsssNTR+YHGLLlsslss..sss.R.alLLSp.L-Eslh..pGt...........................pasLuhp.....+a..........tss.hpPpGa+..alpcF..ph-..tlPshhYcl.....usll.lcKclhh.pt..............................................................pNpl.hlc.Y.p.........lhsu......ps............sspLcl+PhlsaRsh+tho+ps.th...........................shph..s..ps..G..............lphshhs..........shspLhhp.....hsp.......s..ph..hptss...........WYpshpYstEp-.RGhshpEDLasPGhFchslpt.G-slhh.s................ 0 48 89 109 +12274 PF12440 MAGE_N Melanoma associated antigen family N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 82 and 96 amino acids in length. The family is found in association with Pfam:PF01454. This family is the N terminal of various melanoma associated antigens. These are tumour rejection antigens which are expressed on HLA-A1 of tumour cells and they are recognised by cytotoxic T lymphocytes (CTLs). 21.90 21.90 22.00 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.18 0.72 -3.70 33 530 2009-09-18 09:45:57 2009-09-18 10:45:57 3 8 23 0 267 572 0 89.70 29 28.27 CHANGED +uQKSptpstEcchQAptEspsL.ssQsssu-cp..tusuSs.sh..ss.ppssu.uGssssPQ.usQtAsossoshsu.s.........stspSsEuups.ptEE.ssuso ...............................tQ+SphpthEcp.hpup.....sEsp.......uL..suQs...sssccp..........pssS....S.......u...s...s.......hht..ss.......ppss..A.uss.ss.....PQ.u.PQtuss.ssoshss..s....................shs.pS.sEuupu..QpEc.ssss................................ 0 24 24 36 +12275 PF12441 DUF3680 Protein of unknown function (DUF3680) Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is approximately 40 amino acids in length. 27.70 27.70 28.20 28.90 26.70 27.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.80 0.72 -4.11 33 91 2009-09-18 09:47:12 2009-09-18 10:47:12 3 2 76 0 37 92 16 40.70 32 46.20 CHANGED hp.KplP.cFco-tEtcpFW-pHDso-YhD......phpplpFs.h+. ..........ctlP.phco-pEtccFW-o..pDhT-Yhs.......phpslph............ 0 7 24 32 +12276 PF12442 DUF3681 Protein of unknown function (DUF3681) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 112 and 212 amino acids in length. There is a single completely conserved residue G that may be functionally important. 24.30 24.30 24.50 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.59 0.72 -3.85 22 90 2009-09-18 09:49:35 2009-09-18 10:49:35 3 6 6 0 54 67 0 96.20 30 49.03 CHANGED uspsh.ph.hALhshGlsTsusAhhshh.........hpsP..sGhhs.t.....hhYhlh...LsushlsGlstshsuhWVu.ssstt.....RRusG..+pllhsulsPLlh.s..........sh........uuhs.......l ..................s...s..th.tALhshGlsssuhAhslAh.......apsP..uGhht.h....hh...hYals...LsushlsGluplssu.h.WVu...sssts...........R+usG..+pllasulsPLlh.s.hulss...l............................... 0 0 10 27 +12277 PF12443 AKNA AT-hook-containing transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. This family contains a transcription factor which regulates the expression of the costimulatory molecules on lymphocytes. 26.20 26.20 26.60 32.90 25.80 26.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -4.02 4 80 2009-09-18 09:52:05 2009-09-18 10:52:05 3 1 26 0 32 83 0 103.60 40 11.37 CHANGED QpQIps..pl+ssptsLpu.E..puCpc.pPssQhssSpuoshhFphhpchct.h.+Ltpph-pLKs+lcphp+chc.susspLQDpp.shppL..psspLstP.Gss ................................ppQlph..pl+htpthLpt.E..puhpc.psspQhssSpuoshhFp.hpchct.h.+Ltpph-pLKs+lpphppc.c.sussph.....Dpp..sL.pLp.p.phL.ts.ht.s.................................. 0 3 3 5 +12278 PF12444 Sox_N Sox developmental protein N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 69 and 88 amino acids in length. The family is found in association with Pfam:PF00505. There are two conserved sequence motifs: YDW and PVR. This family contains Sox8, Sox9 and Sox10 proteins which have structural similarity. Sox proteins are involved in developmental processes. 22.30 22.30 22.80 24.80 21.60 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.27 0.72 -2.87 24 321 2009-09-18 09:55:34 2009-09-18 10:55:34 3 2 129 0 110 284 0 74.70 53 17.54 CHANGED ssSPuhS--.sshSPs.s.usuGu....-o.......psst.tp........s.............................phcp-s-......D-+FPssIREAVSQVLcGYDWTLVPMPVRV ..........................................s..SPshS--.....pshSPs.s..Susuu........Do.................psststp..t......t.ts.............................chcp-s-.......--KFPlCIR-AVSQVLKGYDWTLVPMPVRV..... 0 14 22 59 +12279 PF12445 FliC Flagellin protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 125 and 147 amino acids in length. The family is found in association with Pfam:PF00669, Pfam:PF00700. There are two completely conserved G residues that may be functionally important. This family is the flagellin motor protein which confers motility to bacterial cells. 21.90 21.90 21.90 22.50 21.70 21.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.99 0.71 -4.02 27 245 2009-09-18 09:56:18 2009-09-18 10:56:18 3 4 177 0 2 121 0 140.00 37 25.15 CHANGED shpssuss......sussuoIThss....Gsohshsuu.........sss.......hsstssolSA-shtusspssu....aTs...ssus...tsassu..sssVshsu....................sssYscsDG.pLTTsss..s..sYahpsD.GsVTs....................ssGpslYhsADGclTT-Aso ......hh..tsuusuucsuolThs........GTpaohusus.........s..s............ssshsAoVSt-slhutsKusu.....sh.....ssuo.....hTa.ssGh.otslshsust..................sssYlDscG.slTsssshss..sY.lpcD.GoVTsst.................ssusGssVYVsusGKlTT-sTS... 0 0 1 1 +12280 PF12446 DUF3682 Protein of unknown function (DUF3682) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 125 and 136 amino acids in length. 25.00 25.00 97.10 97.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.07 0.71 -3.86 33 59 2009-09-18 10:02:37 2009-09-18 11:02:37 3 1 2 0 1 59 0 133.60 61 40.95 CHANGED usGG..pGoG.GsuSuuuu.......ss...Puusssusussss..sAsuVDSSAGSSsGpAGSSGoNsSNTTGDSsTGDQos.AAAAApsSSPPEGPAGTTSGTGHTRQEEE......EEE-pEKQQQSDEsQVQ.QHQQHEHPAEsGE..ESA ................................................................................sspusGGsuuGuus.............sus..suusssus..u.sssss.......sAssVDoSAGSSsGpAGSSGoNsSNTTGDS.....sTGDQTs.AAAAApsSSPPEGPAGTTSGTGHTRQEEE......EEEEpEKQQQSDEsQVQ.QHQQHEHPAEsGEESA.... 0 0 0 1 +12281 PF12447 DUF3683 Protein of unknown function (DUF3683) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF02754, Pfam:PF01565, Pfam:PF02913. 21.40 21.40 59.10 57.90 21.00 20.30 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.46 0.71 -4.26 42 271 2009-09-21 10:06:30 2009-09-21 11:06:30 3 6 267 0 96 256 43 122.00 63 9.53 CHANGED .ssRlREIPYNYTSFSDREIVlRLLGp-sWphLs-LRsERpTGRSARMLaEVLGDIWVVpRNPYLpDDLL-sscRRptLl-AhpHRLspIccRps.............ssp+VtpLlsuA+pAVptFppp ...s.sRlREIPYNYTSFSDREIVIRLLG--AWplLs-LRucR+TGRSARMLaEVLGDIWVVpRNPYLpDDLL-NPKRRthLlcALpHRLsEIcKRRs...................csppVphLlsAAccAVccFspp....... 0 19 55 79 +12282 PF12448 Milton Kinesin associated protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 143 and 173 amino acids in length. The family is found in association with Pfam:PF04849. This family is a region of the protein milton. Milton recruits the heavy chain of kinesin to mitochondria to allow the motor movement function of kinesin. 22.30 22.30 27.40 23.70 21.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.31 0.71 -4.05 16 296 2009-09-21 10:09:17 2009-09-21 11:09:17 3 7 65 0 163 225 0 164.70 29 31.66 CHANGED sssptpsSsssssts..su....u..........u........h.hs...u..hhLpp+slSsts..sspS......st.tp.s...............................PuoPssp-LptAL+pLo.......h.......pRcNaLup+.hhshppEtphpthupt..tst.........P.....s-SlhSsss.....sth..............tphhLP-KLQ.IVKPhEuS .....................................................................................................................p..........s...s..GSspo.Ss..o...............................hossc..S....ohphos..u...shlLpp+s..hSstlh.psts........tst.pp.us..............................P.uTPssschpsALppLo..........h.......tp-NaLu.p.....+..hhp...pEh+hpphs..................P......o-S..hh.Slus.................................t+hhLP-KLQ.IlKPhEu............... 0 13 27 87 +12283 PF12449 DUF3684 Protein of unknown function (DUF3684) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 1072 and 1090 amino acids in length. 19.50 19.50 32.70 23.20 18.50 17.90 hmmbuild -o /dev/null HMM SEED 1093 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.96 0.70 -13.85 0.70 -6.98 22 109 2009-09-21 10:18:36 2009-09-21 11:18:36 3 7 94 0 90 113 0 917.70 39 61.55 CHANGED LTFVu.LcsIElalDDasll+LpKKsuPshslsIP+clcT+TspGLM+lpslpppssQlDAshMphV..thpssust.h..tt.c..sss.......uoL+SFFS+housussp...cstpsppss..pttsptD...........ltphspuolFL+IsTAsIpsslupuhusELERATKKPPPKpoplAlLTsuYsph.AStts.................tspss..IFuulLPo+..uG..RlFIGFPTpQTTGlsuHlSAPSlIPTVERESIDLNsRalupWNhEhLRsAGllCRlAapsEMuslpscl..t.....t..ttuthccpcltsllscAlHshppFsF+-STPSotlGphIE-uFWoCs+sss.IElLSTpGVlsScpVRhsscsLS.FhcuIPVlPc-hhssApcFlp+Lp-hGLls-lTlsDIhpELcupsLspsQlh-FLpWl.......uccusuupl-.sshppLLssAVAs.ps.s.us.ss+llsLusIssalNPp+I.....Ps-LPlPPsllPapaoKsLspppLpul.GWpELpllsWLpaLlp...susscs.....-pDlTpSssFuspVLsVLSKpW-s.LusuoKpsVlshLpspTlIPTKhGM++PsEsYFsoV+LFDDLPlV.....pGlsulKEKhLsALGVRKTV-LslIF-RLLtssststt......ptcWSHVDLI+YLuSVcsDIPspDlcRL+pTslCsAE........ssppsptpRYKlS-LaEPc-uLRsLtLPllcW...PGcapssSsEu+FLhtLGL+saPss.-llclhu...tusDhpL+sKAhsYalspat.NsYusach..utsshsFLPl.p.....spp...pLusPppCFTscsAulhGFslLRp-L+s..HAsKhGVppHPshspClshLlppPPpocp-A+tlFpYlAuRlu-lsssc..lc+lupAtIVPV.pcphs.........pptpsh..phl......sPppCYL....Gcup-YpcIFDFVDFGtcANhFLhusGuKcEPTptElAphLl+EPARl.uthQSs-................+YLpLLRslA-shshL++c..+pLhpcM+pusFLLuS+-lspptpptst................pps-----psl+EWsLspApDhVlVDDhpSapLF+-plLu...APQEEhL.EsFYhsLGu.sLSuLVcEchchGs..hssDQc.AtcLcKlIhERo+LFLH .....................................................LTFVu.LppIEhalDsapllpLpKKsuPshplslP..+slpo+Tt..pGlM+ltslpppssQlDAshMpsl.....tWps.sss..tt..ttt..s.t...................soL+o.FFS+housusts.................psts.p.t.t..p.t.tc-...............lhthpsuolFL+lsoAplpsplspuautELERATKKPPPKpTplulLTsS.....asph.uStss....................tpstclFuullPs+....uG..RlFIGFPTtQTTGhshHlSA.SlIPTVEREuIDLNsRalppWNhEhLRsuGIlsRlAassEMuslpsch..t.........t.tttst...hptctltthhscAlHhhppFsFpcSTPoutVuphlE-uFWsC.pp.ss.l-lhSopGll.sppVRlss...c-lo............FhcsIPVlPcphhsss..Flp+Lh-hGLlp-lTlsDlhpELcs+sLsppQhhpFlpWh.........sppuhssplc.sshppLLpsA.......V......As..........pp......................s....ss.....sspl...lsLuslpsalssp+I.....PsclPlPssslPhth..o+slsttpLpul...GW.p.tLplssWlpaLlp....sstsps.....-pslTpospFuhpVLsllSKpW-s.ls.ss.+psl....hshL.pshsslPTKhG........M++PsEuaFsoV+L..F-DLPsl.........puhpslKEKFLsALGVRKTV-LphIFpRLls...t..t...........ptcWSH..h-LI+YLsSVpsDIPspDhp+L+pothCsAE......................t......tstpsptphY+ss-LaEPp-slRsLt.L.PllpW...Pupaps.sS.Eu+FLh..LGL+paPss.pllphh........supc.....tL+spAhsYalspah.s.....tYssaph..ut.shshLPl.p.........sp.....t....tLssPppCFTs.tAslhGFtlL+p-Lps....HAsKFGVtpcPshttClshLl....tpP...............PpspppAthlFtYhusRlu-ls.sp..ht+ltpu.IlPl.ppt.s................ts.h...thl......sPppCYl.......GpuppYtsI..FDFV.DFGtpANhFLhtCGuKpEPTp.ElAthhsp-PsRl.shhpos-................+YLpLLRslA-shshl+..+s+tLhpcM+pusaLLus+-lssttpt...................tts.----tsl+papLspApchlllDDh.oapLF+-plls...APpE-hL.EshYhtLGu.tLSslVpEchph.Gs..hstcpp.AhpLp+hlhERo+LFLH.......... 0 34 55 75 +12284 PF12450 vWF_A von Willebrand factor Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00092. There are two conserved sequence motifs: STF and DVD. There are two completely conserved residues (E and N) that may be functionally important. In hemostasis, platelet adhesion to the damaged vessel wall is mediated by several proteins, including von Willebrand factor. In solution vWF becomes immobilized via its A3 domain on the fibrillar collagen of the vessel wall and acts as an intermediary between collagen and the platelet receptor glycoprotein Ibalpha (GPIbalpha), which is the only platelet receptor that does not require prior activation for bond formation. 25.00 25.00 42.90 41.80 20.80 17.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.15 0.72 -4.29 102 412 2009-09-21 10:23:44 2009-09-21 11:23:44 3 14 391 0 114 363 52 102.50 46 17.29 CHANGED ssshss.....spEpatphppNshhssuppPlSTFSlDVDTASYuslRRhLspG.pLP...PssAVRlEEhlNYFsYcY.stPs.........s.s.pPFulssEhussPW.NspppLl+ .................sstt.....s.sspcYpphs-Nsl+psup..sPLSTFSlDVDTuSYuNlRRaLN.p.G.pLP...PsDAVRlEEhlNYFsYDa.shPs..........s..s.tPFulphElussPW.NppppLl+......... 0 48 76 95 +12285 PF12451 VPS11_C Vacuolar protein sorting protein 11 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. Vps 11 is one of the evolutionarily conserved class C vacuolar protein sorting genes (c-vps: vps11, vps16, vps18, and vps33), whose products physically associate to form the c-vps protein complex required for vesicle docking and fusion. 21.70 21.70 21.70 22.30 21.20 20.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.37 0.72 -4.10 39 245 2009-09-21 10:32:19 2009-09-21 11:32:19 3 9 213 0 176 248 0 49.00 36 5.09 CHANGED ssc.pslcsl+csQpc.......stcpa-lFpptL...cpopDpFpllo-ahGRGlhpp ..........scscplhshh+sQcp.......st-pHDhFpppL...cpSpDpFullA-aFGRGVhs....... 0 48 91 145 +12286 PF12452 DUF3685 Protein of unknown function (DUF3685) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. There are two completely conserved residues (L and D) that may be functionally important. 25.00 25.00 47.00 27.40 20.20 19.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -10.89 0.71 -5.14 37 133 2009-09-21 10:33:30 2009-09-21 11:33:30 3 3 97 0 60 145 115 156.80 29 37.26 CHANGED l.hsclPhs.pLhshLlhppsLhlDsthhssssscAhtphEhLLpNhlIplANslhp.lLNphuchppl.....+phlappphlSoR-lERhRNpLsWphRhppahpcPpsIaESpapLahlsspuIpph.lasPRppELppLsulthhVTLlLEhRDAlAPplcullthlGpslValLTpVlGRuIGLlGRGIlQGlG ........................................................................thllpshllphAshlht.hlp.hu...pl.....pp.hhp.ph....lSoRplp+hRNplshp.hhpphhppshtlaEsphpLhhlptt.t..Isph.l..sRppELppLphhph.lolhLEhpDshsP.lpshlphlGshhsahLspllGRulGLlh+GIhQuhG.......................... 0 10 38 52 +12287 PF12453 PTP_N Protein tyrosine phosphatase N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00041. There is a single completely conserved residue L that may be functionally important. This family consists of various protein tyrosine phosphatase haematopoietic receptors, e.g. CD45, which dephosphorylate growth stimulating proteins. This limits growth signalling in haematopoietic cells. 21.90 21.90 21.90 22.50 20.20 19.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.56 0.72 -6.82 0.72 -4.23 7 91 2009-09-21 10:35:08 2009-09-21 11:35:08 3 16 27 0 5 93 0 27.30 48 5.26 CHANGED LKLLAFGF.AFLDstsaVsGpussssssG ..LKLLAFGF.AhLDo-sFVTGpospsssos... 0 2 2 2 +12288 PF12454 Ecm33 GPI-anchored cell wall organization protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. Ecm33 is an essential cell wall component and is important for cell wall integrity. 23.20 23.20 23.40 23.40 22.60 22.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.88 0.72 -4.08 18 53 2009-09-21 10:36:24 2009-09-21 11:36:24 3 3 53 0 38 48 0 40.10 45 10.10 CHANGED MthlKYhLPA.LAAusushAs....sCu..tsTtTIpsQuDAsuLu .........M.hhKYlLPA.LAsAGsAhAtt...sCs..tuohTIpsQuDAsuLu. 0 7 19 32 +12289 PF12455 Dynactin Dynein associated protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 280 amino acids in length. The family is found in association with Pfam:PF01302. There is a single completely conserved residue E that may be functionally important. Dynactin has been associated with Dynein, a kinesin protein which is involved in organelle transport, mitotic spindle assembly and chromosome segregation. Dynactin anchors Dynein to specific subcellular structures. 25.00 25.00 25.40 27.20 21.30 24.70 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.61 0.70 -5.48 30 281 2009-09-21 10:37:31 2009-09-21 11:37:31 3 11 178 0 166 278 0 267.40 33 22.25 CHANGED -L--hcsspp...pEs-ssphsspspshhslNhKLQsosuKApsKsIDlELp+hEuppuspHLpllphaLP-....ahc..u-+DulL.slLhhpRlutKusLlsstlpc+..........htpps.....hsupht-phh..hsscllppLshlsslsc+FhshlspCosEpFschsshh.EhpslE+tlDtaI-hLK+sclsEppssspLp+slshhscLtpsh.hs.....sph.shspphhtpsthhpstlDpsssshuhl+.shlpsthssssp............hhphhpslhspscusKhhspKlh...........Rpl .....................................cLp-hppp.ps..ppptusch..ppp.s.tshshKlphutoKApsK.........sI-hELRphEstpAspHlslLpuF.hP-s...Fhc.........uGD+DslL.sLLLh.RlhtKAcLltppspE+.....................htcps......hpGsss.-phs......austllhpLshLpushc+atpulspCos-.hap+hushhsEhss..pERsLDhhI-LL++....DpLDEssss-.sLp+sItahpHLhpla.ls....pp.tsps..pltsphphhposLDshuspsupl+.shlQ.ss.....ppss-............................lhhhlcsL.spspsh+phspKlpRph............................................................ 0 61 85 128 +12290 PF12456 hSac2 Inositol phosphatase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF02383. hSac2 functions as an inositol polyphosphate 5-phosphatase. 21.80 21.80 22.10 22.50 21.60 20.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.30 0.71 -4.70 32 312 2009-09-21 10:38:54 2009-09-21 11:38:54 3 8 175 0 215 308 0 111.30 27 15.56 CHANGED tst-....hphtphhppAl-ps.pphllsc.s....EchluuWhLh.....sscp.s.....s.p-plLLLTcpulalspaDats-+lspapRlsLsslppIphGhh.tsshp....sppcshshhlpaptss ..............h...c....hphtthhppslcsh.pp.hllss..s........-ph..hGuWhLh.....ps.ct.s..........s-p-plLLLTc..pulhlscYDhphc..Kl..spapRlsLpslppIphGt..a.hssht..........sc.pt.shplpap...s........................................ 1 67 94 152 +12291 PF12457 TIP_N Tuftelin interacting protein N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 99 and 114 amino acids in length. The family is found in association with Pfam:PF08697, Pfam:PF01585. There are two completely conserved residues (G and F) that may be functionally important. TIP is involved in enamel assembly by interacting with one of the major proteins responsible for biomineralisation of enamel - tuftelin. 22.20 22.20 23.60 23.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.66 0.72 -3.98 32 238 2009-09-21 10:39:50 2009-09-21 11:39:50 3 7 182 0 141 228 0 99.20 30 12.68 CHANGED s-s-...-hE...cFEloDhDlcsE...........aNssRpRp..+poKcptlYGIaAppc-s-ppp.ttt.t...........ts++tpDYot.......PlsFVuGGlppsupc....cccpptppsp.....cpsccpst....ss .....................p...-t...-h-pa-h.s-hDhpsE.............aNPpRpR+.....hpoK-pAhYG.laA-cssD-ptst.h...............ss++s+DYot.......PV...sFlSuGlppuupcctpppcppp...pppt.......................ttt.................................................... 0 51 73 110 +12292 PF12458 DUF3686 ATPase involved in DNA repair Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 450 amino acids in length. There are two conserved sequence motifs: DVF and SPNGED. 25.00 25.00 118.90 39.80 18.10 17.70 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.31 0.70 -6.22 31 104 2009-09-21 10:40:47 2009-09-21 11:40:47 3 5 101 0 36 112 9 438.60 47 26.48 CHANGED cLpp+sppLNppRh-hFGus-hcLlGstRlRTENNClsRDIVpVG.................shlLFGYNVahGL+p-s.plsDVFSlach...........tpss.......sFcht.hshst....hLsDspFhpDFp-LY+YY+ss+LhpLthh.cs+LLAlFQlG-phpDlRVFRWplsscGt..pYlDsRGERDhshPspHDF-WpcsoR-s+VtG+HPHlSI..hDcVFVETlGGDLTIKlENNT-oGcGIYuEPV--.sQSLDDAclcYAclGsLILL+lhPYpEppaRYlVFNs+pppVhRlDuIGpAClpLP-DpGIIFPGGYYL.poG-hKsF-pshs..s....hcFcRslRSPNGEDVLYVFaptppGphlLhsYNlIc+plpsPlhCHGaulh-DGchllF+.up.-EPTRlHPhQlWQTPahS-pa.Aust..ssssuhLt+IGNs-LVRGlS-shulsphlpcp..ssosthYpsLhpsspclhDsYa ............LpppsppLNtpRh-.FGusphcllGp.RlRTENNClsRDIVpVG.................chLLFGYN.V...alG.L+.pEs.plsDVFoLY+h.....................t..spta-ht.hshsss...h.LsDssFlpDFs-LYpYY+ss+LlpLthp.-u+..LLAsFQlG-chsDlRVFRWplss.....DGp..pYlDsRGERDhshPst..aDF-WpcsTR-.pV.GRHPHlsI..h.DpVFVETlGGDLTIKlENNT-sGpGIYpEPVp-.sQSLDDAplcaAclGsLlLL+lhPY+EcpaRYLVFNohTppV.RlDuIGtuClpLPEDpGIIFPGGYYL.QsG-hKsF-...pshp......s....hcFcRplRSPNGEDVLYlFap.ppGchlLhsYNlIc+plpsPlhsHGaulh-DGchllFc.u.-....s.cEPoRlHPhQlWQTPFso-ca...AApp......ss...tsuhLsRIGNA-LVRGlS-hhsls+hlppp..ssotthYptLspsspclhDsYa............................. 0 12 25 33 +12293 PF12459 DUF3687 D-Ala-teichoic acid biosynthesis protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are two completely conserved residues (L and Y) that may be functionally important. 25.00 25.00 30.80 29.90 22.20 22.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -8.16 0.72 -4.72 33 811 2009-09-21 10:41:44 2009-09-21 11:41:44 3 2 808 0 47 183 0 43.50 43 89.74 CHANGED h.....hhp+....ssspFlh+TlFYFhILlsLlYLYuYpGhspusFIYNE ................tpphhpFlh+TllYhhIhluLlalYua.pGpspusFIYNE.... 0 8 19 30 +12294 PF12460 MMS19_C MMS19_N; RNAPII transcription regulator C-terminal Gavin OL, Coggill P lg7 Prosite Domain MMS19 is required for both nucleotide excision repair (NER) and RNA polymerase II (RNAP II) transcription [1]. This C-terminal domain, along with the N-terminal, MMS19_N, form part of a silencing complex in fission yeast that contains Dos2, Rik1, Mms19 and Cdc20 (the catalytic subunit of DNA polymerase-epsilon). This complex regulates RNA polymerase II (RNA Pol II) activity in heterochromatin and is required for DNA replication and heterochromatin assembly [2]. This domain apparently shares homology with some HEAT repeat sequences. 27.00 27.00 27.00 27.20 26.90 26.50 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.33 0.70 -5.75 60 284 2012-10-11 20:01:02 2009-09-21 11:42:55 3 10 227 0 192 312 0 381.10 21 40.43 CHANGED lsthl...stLscsssst.........................tphpphLcsLspluspppl.....hcslsh.pLls+lshhhp.......................ssphshslLpolhplhppptppp.....shshahpp.llshh.hphshsssspsps...........psLphluplhshllppLstpcppph.hp...plhshFh.........................h..ttt....tppphlhlhstlLuuLs+sssh......ptsp....LLpplhphshspss......thh+huhhchluhLlNK.hs...psp..........hpshLpphhpph........tt.tp.p.pslcllhWlsKALllRspstusphlsp..Lls......LLss.........pph...uptsAcuFplLls-...cslhst...............pptssl+lLaKQ+hFsplhPhl...hptacssss.................K.sY...LpALStlLp.slP..psllhscLspLLPLLLpoL........shsss...p...........lph..usLpTLtshlc-s..spllpc..alp..oLlspLLplu .......................................................................................................................................h.phLphlstlshp.pl.....hp.h...hlhptl..h..........................................p.t.hhhhhtsl..hhpptt.t.........p.thhhpp.hh.hh.hthhhts...ttt.................................p.thLshhsplhshhsppls.phptp...hp..........ph.hslFh.....................................................h..t.t............t...ppphlhlhh.shl.suLs+ssphs........phtp.......................lhppllphs..hspss...............htphshhc......hhu.sLlNK.h.ssp..................thp.phlphhhpph..................................t.t..p.psh....pllhWlsKuLll....R.h..p..sh.ssplhsp..Lhp..............LLss.............................sph......u.tsAcuFslLhs-.....ss.lLsp...................................tstssl+lha+Q+hFsphhPhl.......lpthcssspt.......................................h+ssa...Lp.uLutlLp.plP....psl.lhs...clss.Ll...sL...LlpuL.........shsss.....s............................lph..usLpsLpsll.cs....sphhpt..alp....sLlsphLph...................................................... 0 57 96 156 +12295 PF12461 DUF3688 Protein of unknown function (DUF3688) Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is typically between 79 and 104 amino acids in length. There is a conserved YRW sequence motif. There is a single completely conserved residue Y that may be functionally important. 25.00 25.00 25.70 25.70 23.90 24.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.69 0.72 -3.60 38 86 2009-09-21 10:44:18 2009-09-21 11:44:18 3 2 6 0 0 86 0 91.90 31 26.06 CHANGED DNKaYallh+spps.......ssWcIhK.FpNsppth........hh...tph............................................shhK.ulYRWsGssEP.phPs....ID.ssGpIpsW..........p ......................................................................................................................DNKaYhllh+ppps.......ssWpIhK.Fppsppt..h...........hh..t..t..h.......................................a...........shhKulYRWsGs.sEP...phPp.....ID.sTGpIpsW.....p............. 0 0 0 0 +12296 PF12462 Helicase_IV_N Nucleolin_N; DNA helicase IV / RNA helicase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 170 amino acids in length. This family is found in bacterial DNA helicase IV, at the N-terminus of Pfam:PF00580. 21.40 21.40 21.50 27.40 21.30 20.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.57 0.71 -4.60 36 672 2010-01-05 17:03:59 2009-09-21 11:51:44 3 8 665 0 62 352 3 164.40 57 24.00 CHANGED McLpuoshuphlsQpsYpplcL.ssulploucp+phhIPFsplt.slps+RGllWGcLpFthss....ppslpl+GhpWp-sppFh............cplhpsappWspchschtsphLsphhppIpchpptspalscppltslhcplcpthpsLshslschtph.sspt...apphttWLpc ......MELKAToLGKRLA.QH.PYDRAhILNAGlKVSGDRHEYLIPFNQLL.AIHCKRGLVWGELEFVLP-....-KVVRLHGTEWuETQ+Fa.....................HaLDAHWpRWSsEMS-lAutVLppQlchIupRTGcN+WLTREpssGlpppIRQuhuALPLPV..sRLEEF-sCREh..WRKC.AWLpD.......................... 0 4 19 40 +12297 PF12463 DUF3689 Protein of unknown function (DUF3689) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 399 and 797 amino acids in length. 21.70 21.70 33.90 33.40 18.70 21.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.78 0.70 -5.64 8 115 2009-09-21 10:58:33 2009-09-21 11:58:33 3 4 60 0 63 117 1 265.30 55 34.69 CHANGED Vps.ltchtLl.sLsphF-pL.Whhptsp...hth.s.sssCosc.uLKIQFLRLlpsFsD+cpsp............tt.p..h.h..s.t..ss..s+shttcuppGLls+lLpshtp-st-ShaRFWlApuVEuFLRGssshtDQhalhpRGLlEHlLppIlcusscspcsLQhpFDLLGELhKFNpssFcRhsphlss-.KFph..FlctlsosLVDSNhFlRsVlLSL-pFcppts-hs..................tpshpcs+hlhhhp............spNph+hLpcLIshIplpslsQ-NlsCLNTuLlILhhAc+sGcL.hYLpuLRpt-hp .......................................VH+MlAEF+LIPGLNNLFDKLIW...RKposS.s.hV.lH.uHNpNCDCSP-loLKIQFLRLLpSFSDHH-s+hlLhs........................................phsphSAlshc..sslPElcullNo......cRoLV..CDGK+GLLTRLLpVMK.+EPs-SSF....R..FWQARAVESFLRGsTSY..ADQhFLLKRGLL.......EHILaCIl....DScC.+.SRDVLQSYFDLLGELMKFN........lDAFKRFNKYlN.......T-.tKFQl...........FL.........pQINSSLVDSNMLVRClsLSLDRFEsQs.DhK......................................VscVL....SEC+LLuYhu.................ps.sphoFLFRLINIIpVQTLTQENVSCLNTSLVILMLARR+t+LPhYLphLpchEh.s........................................... 0 26 33 45 +12298 PF12464 Mac Maltose acetyltransferase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00132. Mac uses acetyl-CoA as acetyl donor to acetylated cytoplasmic maltose. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.54 0.72 -3.95 351 3057 2009-09-21 11:01:47 2009-09-21 12:01:47 3 32 2089 58 602 2037 48 54.50 30 25.99 CHANGED cEKMluGcl..........Yps.t..........Ds...E.LhptRtcu+clhtca.N.........ps...p...p............p.tp........cRpplL+cLhGpsGc ..................hEKMlAGch..........Yps.t..........Dt...p.LhpcRh+A+plhtcaN...............ps...t..sp................-.tp............cRppllccLhGpss................................ 0 166 343 483 +12299 PF12465 Pr_beta_C Proteasome beta subunits C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00227. There is a conserved GTT sequence motif. There is a single completely conserved residue Y that may be functionally important. This family includes the C terminal of the beta-type subunits of the proteasome, a multimeric complex that degrades proteins into peptides as part of the MHC class I-mediated Ag-presenting pathway. 20.30 20.30 21.40 21.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.70 0.72 -4.69 63 391 2009-09-21 11:37:50 2009-09-21 12:37:50 3 3 243 90 207 367 0 37.50 38 13.88 CHANGED sNc+up+ptsY+atpGTTuVLscpl..hpl-lscp.pVps ..............sNp+sp+..ptpY+atpGTTAVLscpl..hpl-.lh-E.pVp................. 0 55 93 153 +12300 PF12466 GDH_N Glutamate dehydrogenase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF05088. There is a conserved ALR sequence motif. Glutamate dehydrogenase (GDH) is a homohexameric, mitochondrial enzyme that reversibly catalyses the oxidative deamination of L-glutamate to 2-oxoglutarate using either NADP(H) or NAD(H) with comparable efficacy. 25.00 25.00 25.80 38.20 22.40 22.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.85 0.72 -3.98 4 31 2009-09-21 11:39:53 2009-09-21 12:39:53 3 1 30 0 6 34 2 55.10 60 3.31 CHANGED MpupKuASpSspsssKPsAEpuVsssAp..ptloLEPVFAALRKRYPAAtQuEVQtFAAD .....MsuKKuhSpSspsssKssAcpulsshAp..puVoLEPVFAALRKRYPAAtQuEVQtFAAD. 0 1 3 4 +12301 PF12467 CMV_1a Cucumber mosaic virus 1a protein family Gavin OL lg7 Prosite Family This domain family is found in viruses, and is typically between 156 and 171 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01660. 1a protein is the major virulence factor of the cucumber mosaic virus (CMV). The Ns strain of CMV causes necrotic lesions to Nicotiana spp. while other strains cause systemic mosaic. The determinant of the pathogenesis of these different strains is the specific amino acid residue at the 461 residue of the 1a protein. 25.00 25.00 43.70 42.60 21.20 20.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.12 0.71 -3.90 14 89 2009-09-21 12:27:24 2009-09-21 13:27:24 3 3 22 0 0 90 0 156.90 59 16.17 CHANGED Whusho+sh..RsFlpshlpohFPoLR....+DcpEFLsKLSshsoF.NEpspsDhucphDVhusAAsl.......sshsVpsuKphts-KcKpht.stppPV.p.s.t.............s.ssc.sPtsssssps...spssp.pslospTcss-oRlApRusAMhEYssYpppLHsNsVSNL .......WFAuhTRPl..RVFFSoV.V+sLFPTLR...PREEKEFLlKLSTFVTF.NEECSFDGGEEWDVISSAAaV.......AsQAVsDGKlLAupKAcKLA-+LApPV.ElSsp...............s.ssSsTPcDsussCG.cEpEsSELDSLSuQTRSPITRlAERATAMLEYuAYEKQLHDTTVSNL 0 0 0 0 +12302 PF12468 TTSSLRR Type III secretion system leucine rich repeat protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. There are two completely conserved residues (Y and W) that may be functionally important. This family consists of leucine-rich repeat proteins involved in type III secretion. 21.20 21.20 23.00 21.80 20.30 19.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.31 0.72 -4.42 25 427 2009-09-21 12:29:20 2009-09-21 13:29:20 3 37 178 4 17 313 0 45.10 35 7.66 CHANGED shuhsssSh.st.......tssposs-YculWscWc+sAsss..EpRspAVp ...................t..psS...t......sssspssssYhslWs-Wc+pAsst..EpRppAVp...... 0 0 7 9 +12303 PF12469 DUF3692 CRISPR-associated protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is typically between 101 and 138 amino acids in length. The proteins in this family are frequently annotated as CRISPR-associated proteins however there is little accompanying literature to confirm this. 21.90 21.90 22.80 23.10 20.50 21.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.54 0.71 -3.87 86 200 2009-09-21 12:31:19 2009-09-21 13:31:19 3 1 170 3 93 221 4 122.50 24 16.65 CHANGED hllsloluPVQsFIspuR+s+DLWuGSalLShLstpshptl..hcp.............sst....llhP......t................t.................................................httsl.P.....................Nphhhhlsstt................hpthtcp......................scpshpctac..plscpl .....llhholuPVQsFIspuR+s+DLWuGSalLShLsttshptl..hcp.t...........sss......llaP.......s.hptp........h...hhpphtt......................................................tsuh.P.....................................N+hhhh..lsssp................hpthtpp...................scpthpptapplhp..h...................................................................................................................................................... 0 56 74 85 +12304 PF12470 SUFU_C Suppressor of Fused Gli/Ci N terminal binding domain Gavin OL lg7 Manual Family This domain family is found in eukaryotes, and is typically between 192 and 219 amino acids in length. The family is found in association with Pfam:PF05076. There is a conserved HGRHFT sequence motif. This family is the C terminal domain of the Suppressor of Fused protein (Su(fu)). Su(fu) is a repressor of the Gli and Ci transcription factors of the Hedgehog signalling cascade. It functions by binding these proteins and preventing their translocation to the nucleus. The C terminal domain is only found in eukaryotic Su(fu) proteins; it is not present in bacterial homologues. The C terminal domain binds to the N terminal of Gli/Ci while the N terminal of Su(fu) binds to the C terminal of Gli/Ci. This dual binding mechanism is likely an evolutionary advancement in this signalling cascade which is not present in bacterial homologues. 25.00 25.00 32.70 28.80 23.70 22.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.18 0.71 -4.72 6 111 2009-09-21 13:09:09 2009-09-21 14:09:09 3 6 78 4 67 106 0 191.20 52 43.07 CHANGED hcpl-cslE+-GSsLuGVsAchsacELs+ss.scp.sc.............cpsT-p.ppshpcshphppcus.sss.uppssshpps.............sshshspphsRspsLsGlcLphu.puAphLsLAlRsRlRHGRHFTF..pspchAlTFVopuVsGuhsoc-cPYushGsWlQILIss-hV.+Ml--hpDLooscs..LKlPhpYcWP-+sLKlhl ................................................................E+V-cGIEp-GSNLSGVSAKCsW--LocssEcc--ocshslup..........................................tp-TE.QIREsLp+GLchsscPlhP.slss.p+pNuhp+s........................ppsssshlPpELlR...TRpL-uVHLKFNtEuGuLlPLs..lRGRLhHGRHFTa.KS.IsGDhAITFVSoGVEGAFATEEHPYAA+GPWLQlLls-EhVp+MhcD....hp.sLsss-c.......hphPhpapWP-+pLpl.l................................. 0 20 24 45 +12305 PF12471 GTP_CH_N GTP cyclohydrolase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. This family is the N terminal of GTP cyclohydrolase, the rate limiting enzyme in the synthesis of tetrahydrobiopterin. 25.00 25.00 50.80 35.80 22.10 21.50 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.23 0.71 -4.70 39 190 2009-09-21 15:39:55 2009-09-21 16:39:55 3 7 177 0 131 193 27 187.10 54 40.49 CHANGED hss+IhLToaPsptshsPlPlpWGA.ssstcRGPVlso..hsshs+RNAIGsHuGSYulY+ALAVAuGpLsscH+sDhTNTpPshsIsPpPpWuDPpKIVShDPaGHlssphFschh.ppGhDIRPTIAlT+AHlplsElp-AlppGRLtsDGcllhs.................sG-ltVTKsAlEPVWYLPGVAcRFGlsEstLRRsLFEc ................h.s+IlLToaPsp.t..uhs.PlslcWGu.ssstpRGPVlso..ssshp+RNAIG...............uHuGSYSlYpALAVAuGtLss-HRPDhTNTpPsssIGPaPpWuDspKIVShDPWGHhlsphFtc.l..ppGl..DIRPTIAlT+A...HhpLPElp-AlppGR.....LhsDG+llls.....................................sG-lsVTKsAVEPVWYLPGVA-RFGlsEusLRRsLFEp..... 0 40 75 109 +12306 PF12472 DUF3693 Phage related protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is approximately 60 amino acids in length. 21.00 21.00 21.10 21.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.72 0.72 -4.37 31 83 2009-09-21 15:41:09 2009-09-21 16:41:09 3 4 62 0 13 83 0 55.90 28 40.57 CHANGED lltl+u-+uco.pt+shWpsIhK+hsuhuhs......sluhs.hsuhuhuhststpslspssls ......LlslcA-+ucsspt+shWpsIsKKhsuhuhs......sluhs..hsuhuhshstsppslhpssl.................... 0 3 7 9 +12307 PF12473 DUF3694 Kinesin protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 131 and 151 amino acids in length. The family is found in association with Pfam:PF00225, Pfam:PF00498. There is a single completely conserved residue W that may be functionally important. 21.80 21.80 21.90 22.20 20.60 21.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.76 0.71 -4.52 38 863 2009-09-21 15:44:23 2009-09-21 16:44:23 3 52 182 0 460 731 1 128.10 25 11.09 CHANGED pYhPV.h.pssp...s.GsFpL+QGlp+Rlslslsppsupph....hpchhtlhluth+.h..ss..h........s..h.Lpl.lspt.......ss......................................shshhupWDSShH.sShhLNRhT.sppp+lhlTlshslhhsc.hscPlhFph-lslpIh.uRsth .............................................................pahPs.h.pttt.....s.shF.L+Q...G.h.p++lslpl.pp.ps.s.ph................hcch.h.tlhl.....u.hp...........................s....h..htlhstp.................ps.........................................................................................................................................hphpusWDouhH.sp.hLNc.T..s..................s..c+l.ahtlts.l.............lpp.s.........hhhphchsh.hhsppt.t.............................................. 0 114 158 301 +12308 PF12474 PKK Polo kinase kinase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00069. Polo-like kinase 1 (Plx1) is essential during mitosis for the activation of Cdc25C, for spindle assembly, and for cyclin B degradation. This family is Polo kinase kinase (PKK) which phosphorylates Polo kinase and Polo-like kinase to activate them. PKK is a serine/threonine kinase. 25.00 25.00 27.80 25.70 24.40 24.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.08 0.71 -4.46 23 413 2009-09-21 15:49:36 2009-09-21 16:49:36 3 10 84 0 193 333 0 135.60 35 24.19 CHANGED +aplt+cQh+cpFh.p+ppLh++a-p.ElEplpRhppcplEchcpcQptE++chsKplRsEpcpchthF+-SL+lptp...pscp-hE+l...cpQc+cchKtc+pchppKHppp.c-hhuppcsslcp.Lpplpsc+++tLh..EpE .........KcQht+pQl+cpa..pppphh+pa-p.EhEphpRhppphlEcLcpcQsp-++c.sKphR....uEtcpchuhF+csL+hppp.....psp...........p-h-+.l....p.pQcccp.K..p.c+.p.ppK+p.p.p.+-hhtppcps....lct............Lpplps...c+p+.Ls-pE................. 0 43 63 120 +12309 PF12475 Amdo_NSP Amdovirus non-structural protein Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 50 amino acids in length. This family contains proteins of each of the four types of Amdovirus non-structural protein. 25.00 25.00 40.50 39.70 19.40 17.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.36 0.72 -4.06 7 138 2009-09-21 15:51:29 2009-09-21 16:51:29 3 3 3 0 0 147 0 46.60 63 33.50 CHANGED YhDKspcPpcsp.sL....+phspDLtlhaoshcCshps.p-scscsp .FDKNEDPKDVpKSLGWhlK+LN+DLA.lIaSNHHCD.QsIKDPEs+Ac...... 0 0 0 0 +12310 PF12476 DUF3696 Protein of unknown function (DUF3696) Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. 21.70 21.70 22.60 21.70 21.60 20.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.55 0.72 -3.99 58 201 2009-09-21 15:55:04 2009-09-21 16:55:04 3 4 191 0 67 169 38 49.90 25 12.02 CHANGED lsscclulaah.....ppspttsplpplp.lscpGcl..spWP.....cGFFDphttph.pLh ............................ppltlhah.....ppssssoplpplp.lsppGpl..spWP.....cGFFDpt.h-h..h............. 0 26 46 57 +12311 PF12477 TraW_N Sex factor F TraW protein N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. There is a single completely conserved residue G that may be functionally important. The traW gene of the E. coli K-12 sex factor, F, encodes one of the numerous proteins required for conjugative transfer of this plasmid. 21.30 21.30 21.80 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.03 0.72 -3.91 21 169 2009-09-21 15:57:01 2009-09-21 16:57:01 3 1 137 0 29 121 8 30.20 55 14.12 CHANGED hhhhhlhshlhhussApApsLGshGssaPIu ......hpspuLlALLlaGQS.VsAADLGTWGDLWPV.p. 0 5 12 24 +12312 PF12478 DUF3697 Ubiquitin-associated protein 2 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00627. There are two conserved sequence motifs: AVEMPG and QFG. 22.20 22.20 23.10 23.00 21.50 19.70 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.51 0.72 -4.31 7 185 2009-09-21 15:58:24 2009-09-21 16:58:24 3 3 70 0 80 193 0 33.20 77 3.25 CHANGED .sopIPuoAVEMPGuu..slsuLslQFGAL-FGSE ..PsSKIPAoAVEMPGSA.....DloGLNlQFGAL-FGSE...... 0 15 21 45 +12313 PF12479 DUF3698 Protein of unknown function (DUF3698) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 89 and 105 amino acids in length. 25.00 25.00 34.50 34.40 24.70 22.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.48 0.72 -3.95 11 45 2009-09-21 15:59:39 2009-09-21 16:59:39 3 3 1 \N 45 47 0 98.90 31 34.17 CHANGED TltsGITLRSA.IAGFFYpsEs-uh-Tppphtpsasl.upKT.....ppsDPhhNVhP+shcps.....sh+..p.t........s.hhsltEsssp-.aps.....IcssTas ............hlcssls.h+sh.hsshhahspscu.soppp.thsFuI.shKTt.h.hppu...hplt.cs.ppp..phpphR.opp.s...............hushlhht.shpp..apA......Rs.h............ 0 0 45 45 +12314 PF12480 DUF3699 Protein of unknown function (DUF3699) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. 20.40 20.40 21.20 21.70 20.00 19.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.27 0.72 -4.34 17 310 2009-09-21 16:01:11 2009-09-21 17:01:11 3 9 30 0 145 276 1 75.50 33 13.82 CHANGED tss.slpLophlPLph..VclplHstpphpLcl+hsouRsaYLpLsssscp.-slFstWl+Llp...lLp.shpthspsspl ...................t..s.plpLo...RhLPL+F..VcLplaDpppppL+l+hsT......sRsaYLp.Lsssscp.-slFshWh+Ll....lLp.shsthsps.t............ 0 10 12 26 +12315 PF12481 DUF3700 Aluminium induced protein Gavin OL lg7 Prosite Domain This domain family is found in eukaryotes, and is approximately 120 amino acids in length. There are two conserved sequence motifs: YGL and LRDR. This family is related to GATase enzyme domains. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.46 0.70 -5.15 33 220 2012-10-03 21:14:07 2009-09-21 17:03:10 3 4 52 0 86 1785 730 191.40 46 86.72 CHANGED LAlFcKulAcsP-ELpS.Psss.s.usth............................c...............ssptllpcFhSspP.suholshGs..uuhlAYotspps.....slhPRLFush.DDIaClF.GpL-NLssL+pQYGL.uKssNEshlVIEAYRTLRDRGPYPADQVV+-LpGpFAFVlaD.spssolFsAsDsDGpVPLaWGlsADGslVhSDDh-llKtuCGKSaAPFPp.GChFoSs.GGLpSFEHPhN+lKu...........hP......RlDSE...G.hC..........GAsFKV .......................................................................................................LulFpttlspsPptL.u.st.t.........p.sttlhppFhstts..suhohphGs...uhhA..a...ot....tpps.......hp..R.Fush.D.-.Ia.C.l.F.GtL-NL....u....t....L.......p....p.....Q......Y....G.....L....u...K....s...s..N.....E....sh....h.VI.E.A.Y.+T..L.R...DR.u...P.....Y..P..A...sp.......V....lpcL...pG..pFAFVl.aDsp.......s.....p..olF.sA.t................sss.G.p....VsLa..WG.l..s............u.......D..G...p...l.shS..D....-.h....-...l..l.......K.....t.....u...C.u.K..S.hAP....F.Pp.G.Ch.apot.....uG.....L.......p...S...a.Ep..Ph..N+.l.pshPth-pc..hCGusFKV............................................................ 0 12 61 73 +12316 PF12482 DUF3701 Phage integrase protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00589. 21.50 21.50 21.60 28.90 21.20 20.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.28 0.72 -4.24 38 151 2009-09-21 16:05:01 2009-09-21 17:05:01 3 5 74 0 61 147 10 96.20 37 16.67 CHANGED ptLcshs....sssPthsDslstWLssplstsLputGIp.......TLu-LssRlsRRt.+WWpulPGLGssuARpIEAFhAsHsslst+u.....................psllhhs.pusllP...h .........t..LcphssssPthsDsVshWLs..spss..tsLpAtGIp.......TLu-LssRlsRRt.pWWpulsGLGsuuARpIEAFhusHsshst+s..............puhlsh..ps.lsP................................. 0 3 11 45 +12317 PF12483 GIDE E3 Ubiquitin ligase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is typically between 150 and 163 amino acids in length. There is a single completely conserved residue E that may be functionally important. GIDE is an E3 ubiquitin ligase which is involved in inducing apoptosis. 21.20 21.20 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.87 0.71 -4.87 31 280 2009-09-21 16:07:55 2009-09-21 17:07:55 3 6 190 0 162 283 43 144.10 22 45.07 CHANGED ppphtccppp......hhpppochlspspcpsPFhLcD....s...........o.GcVhV..s.suutlshhhshcp.F....cs.ss.stsshthuhh..th.....................slGh+hhEclLPsspplhVlGps.pDstG..lpIppPppt.....FhlS.coc-pLhpphtpts+hhhhuulsh.......sllG ..............................................t....thhppppchlp.ptsp.psP.Fh...L...p..D...s.................s..stVhV..s..s.u.ss.l..s..l..p..sshcp.a......c.sst..shss.hh.hs.hhsutp.....................................hG..h..p.psEchL.sGssloslG-h..................h....tc.......s.....st....lp......l.p...sppt...........ah......l..o...s.pplhtp.tt.hphh.hhhshhh..h.......shhh................................................................. 0 44 89 131 +12318 PF12484 PE_PPE_C Polymorphic PE/PPE proteins C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00823. There is a conserved SVP sequence motif. There is a single completely conserved residue W that may be functionally important. The proteins in this family are PE/PPE proteins implicated in immunostimulation and virulence. 21.80 21.80 22.10 22.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.16 0.72 -3.05 86 1529 2009-09-21 16:10:05 2009-09-21 17:10:05 3 5 63 0 165 799 0 84.80 32 22.29 CHANGED V.uAu.l...GpAusl.GsLSVPsoWusu.....sPsssssus..........Lsssshssssssss.......sssh...h..GhP.........hsuhuutttsu..hs...........sRYGh........Rss..VMs.R....PP .....VuAul.Gp.Au.sV.G.sLS.VPsuWusu........sP.u....s.sssuss.........lsssshsussssus.........ssh....hs..GhP.....huuhuu..t.usuu..su..................RhGh.....+.hVhs.+........................................................... 1 35 69 134 +12319 PF12485 SLY Lymphocyte signaling adaptor protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 144 and 156 amino acids in length. The family is found in association with Pfam:PF07647, Pfam:PF07653. There is a conserved LGKK sequence motif. SLY contains a Src homology 3 domain and a sterile alpha motif, suggesting that it functions as a signaling adaptor protein in lymphocytes. 25.00 25.00 59.20 59.20 24.50 24.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.16 0.71 -3.98 14 178 2009-09-22 08:22:33 2009-09-22 09:22:33 3 9 39 0 84 140 0 148.20 42 25.16 CHANGED c.p.pRSoSFGcFct.+..sSPlps-cchsscpt......tttsscsssp...SuhsLGKK.h+u.IShTM++KMGKch.KAlSEEhs-ss-tcshssssss...t...h-Kssl+uusShESLaS.hSGQSSoSu.sVsSsSsGsSNRDSl+LE....-psP..YsGPF ......................................phtRS.oSFGsFDtp+.spS..ssps-cphpsc-s..h......htts.csssp...Su.tuLGKK.h+u.IScTM+KKMuKKY.K.uLSE-ht...-sst.tsuhssss.ssst.-s.ph-KssLKuusShESLhsshSGQSSoSu.sloSsssGo..SNR-..Sh+.E....--sP..YsGPF................ 0 3 10 33 +12320 PF12486 DUF3702 ImpA domain protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 207 and 469 amino acids in length. The family is found in association with Pfam:PF06812. 25.80 25.80 31.70 31.50 25.70 25.70 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.86 0.71 -4.53 26 515 2009-09-22 08:23:51 2009-09-22 09:23:51 3 2 347 0 39 278 0 138.20 40 33.88 CHANGED hppQLspLpplsPhaslphGppllcpApplWPss.phpthsppWppplpspAhsssplsuWpputspLppLu-+Lsth-cp+Gp.hTlS.LKoslashppsh.spshPlEEhLRQLptp.tpspsss.uhhppl-p+LptLLsRYhhLpp ...............pppLpQLhchsPl.sLcpGhphh+.AcshWP-s.Q.pphsspWpctlcspAtsss.pLpGWhQsppcLcthA-hl.phEcct...lTlS.lKoslaphcpuL.spE.sP.lEpLLpQhp-s.+s.pppsss.sLpKQIspRLptlLuRahlLpp....................... 0 0 10 19 +12321 PF12487 DUF3703 Protein of unknown function (DUF3703) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 113 and 135 amino acids in length. 21.10 21.10 21.70 21.20 20.70 20.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.49 0.71 -4.40 51 103 2009-09-22 08:25:31 2009-09-22 09:25:31 3 5 84 0 62 114 13 106.70 36 81.37 CHANGED hsppl+thaptEhptAcps.ttushstsWpHLERAHIluQ.hshhHstsHhtMLthuh+p+DtREshGQllRllsussGShhG+hPhGNTGtusVushpPMPlPpDLtsllp ...............ppl+thaptEhptupps.ttsshptuapaLERAHIluQthshh...Hs....psHhtMLphuh+p+Dt+EshGQlhRllsus..stohhGhlPhGNTGtusVushpPMPlPp-Lttll.s...... 0 17 34 50 +12322 PF12488 DUF3704 Protein of unknown function (DUF3704) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. 21.90 21.90 22.80 22.30 19.50 17.70 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -6.78 0.72 -4.32 8 22 2009-09-22 08:30:11 2009-09-22 09:30:11 3 2 7 0 16 27 0 27.10 65 20.88 CHANGED hL.VG.......ASaGYMP+ssIAGSSsSshSNF ..hLPVG.......sSSGYMPRRGIAGSSuSoMSNF.. 0 0 1 16 +12323 PF12489 ARA70 Nuclear coactivator Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 127 and 138 amino acids in length. This family is ARA70, a nuclear coactivator which interacts with peroxisome proliferator-activated receptor gamma (PPARgamma) to regulate transcription and the addition of the PPARgamma ligand (prostaglandin J2) enhances this interaction. 22.10 22.10 23.50 30.80 21.70 21.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.96 0.71 -4.19 17 169 2009-09-22 08:56:56 2009-09-22 09:56:56 3 2 39 1 59 142 0 127.10 39 44.89 CHANGED spspLp-.hhts+sthtsp..t.h.s.pshpsWL.....lhpphpppohp.pttph....pphhGphpsL.p.L.p................spops.ssphShphp+lusLsLpspEs................ochLhpssshphhpphsshG.hpspppch+.hh .............................t.plp-.LhtsKst.tsp..t.h.s.pshps....WL......h.hQh.pEp...o......pu..pph.......lhGphpsLpp.L.p................sps+shssphSlphE+lusLpLhspDp................oshLhpsspsthhpp.sshu..cshp...+hh.............................. 0 3 7 21 +12324 PF12490 BCAS3 Breast carcinoma amplified sequence 3 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 229 and 245 amino acids in length. The proteins in this family have been shown to be proto-oncogenes implicated in the development of breast cancer. 21.70 21.70 21.80 22.40 19.00 20.40 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.68 0.70 -5.07 19 188 2009-09-22 09:23:55 2009-09-22 10:23:55 3 6 103 0 117 200 0 227.20 26 26.76 CHANGED PsPloLoVVSRIKsus.sGW..........tsoVs...sAAuoA.oG.+sshhuGAlAusFH.....ss..hstsspsssst.sssh-pLhVasPoGpll.QYhLcPusus....pss.t.shsptsthpp.......s-l+lhVEslppWslsR+ssh.Ep--.phsshstsspts..h..hhtssssct..............p.tts.pt........phpscEppchalSpAE.lphapsp.hPlWtcschpFpsh.ssps.p.p....ts.hu........sEhEI..EclsscplEhRpK-LlP ...........................................................................hsl.slupIKps..hGh..........................................h...p.........phusu...t..oh.t.hsluushusshp.........ss....hppptsppp..p.hsl-pLhlhos.GsLl.pahLcPpshs................ssp..hs--..............................oslclhspPhtpWslsRp.ph..-hps....shstsp.hhh.t............th.......................s.....thssht....................................sscstpccc..WLSplE.lhTHsGP...t+.LWhsPQ.......FpF+shp.sss.psh.....sss.u...................................................................... 0 36 62 91 +12325 PF12491 ApoB100_C Apolipoprotein B100 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. There are two conserved sequence motifs: QLS and LIDL. ApoB100 has an essential role in the assembly and secretion of triglyceride-rich lipoproteins and lipids transport. 21.20 21.20 21.20 38.90 21.00 18.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.76 0.72 -4.33 4 81 2009-09-22 09:27:57 2009-09-22 10:27:57 3 4 63 0 21 76 0 44.40 75 2.02 CHANGED Qh+hKLQDFSDQLSDYYEKFIAEocRLIDLSIQpYHhFL+YIhELLKcLQssTssshh ..QFRYKLQDF.DQLSDYYEKFIsESKR.................................... 0 1 2 7 +12327 PF12493 DUF3709 Protein of unknown function (DUF3709) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. There are two conserved sequence motifs: RCLMK and LIEL. 21.20 21.20 21.20 21.20 20.40 20.40 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.87 0.72 -4.25 9 226 2009-09-22 12:07:19 2009-09-22 13:07:19 3 2 48 0 7 117 0 29.50 76 37.05 CHANGED hRCLMKpQCVCRCKFQpaCLIEL.psCVVSpFV ...hRCLMKRQCVCRCKFQpaCLIELSp.CVVS.FV....... 0 7 7 7 +12328 PF12494 DUF3695 Protein of unknown function (DUF3695) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 157 and 192 amino acids in length. There is a single completely conserved residue D that may be functionally important. 20.30 20.30 22.00 37.20 19.00 16.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.35 0.72 -4.29 14 77 2009-09-22 12:08:13 2009-09-22 13:08:13 3 3 63 0 46 79 0 102.70 35 46.11 CHANGED .ssappss+hupp.cPapRLa.ptThuSsRRsstahs......................sphPpDSLDFpLpotYDHpc-hFhsKs-sllQpETlst...................tphRhL+Nst.h.p..pDsl....t+PL+l ................t.satpss+hspp.-PapRLpspsTlsShRRsshahc......................PpIP+DsLDFpLsulYsHHsshFpsKs-lLlppEThpc...................pp.h.p.hlp.stchh....ss............................................ 0 16 19 30 +12329 PF12495 Vip3A_N Vegetative insecticide protein 3A N terminal Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 170 and 789 amino acids in length. The family is found in association with Pfam:PF02018. Vip3A represents a novel class of proteins insecticidal to lepidopteran insect larvae. 25.00 25.00 85.90 85.40 21.30 20.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.24 0.71 -4.59 12 67 2009-09-22 12:19:40 2009-09-22 13:19:40 3 2 11 0 0 62 0 166.50 96 26.51 CHANGED ALPSFIDYFNGIYGFATGIKDIMNMIFKTDT.GGsLTLDEILKNQQLLN-ISGKLDGVNGSLNDLIAQGNLNTELSKEILKIANEQNQVLNDVNNKLDAINTML+lYLPKITSMLSDVMKQNYALSLQIEYLSKQLQEISDKLDIINVNVLINSTLTEITPAYQRIKYVNEKFEELTF ....ALPSFIDYFNGIYGFATGIKDIMNMIFKTDT.GGDLTLDEILKNQQLLNDISGKLDGVNGSLNDLIAQGNLNTELSKEILKIANEQNQVLNDVNNKLDAINTMLRVYLPKITSMLSDVMKQNYALSLQIEYLSKQLQEISDKLDIINVNVLINSTLTEITPAYQRIKYVNEKFEELTF. 1 0 0 0 +12330 PF12496 BNIP2 Bcl2-/adenovirus E1B nineteen kDa-interacting protein 2 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 119 and 133 amino acids in length. There is a conserved HGGY sequence motif. This family is Bcl2-/adenovirus E1B nineteen kDa-interacting protein 2. It interacts with pro- and anti- apoptotic molecules in the cell. 27.00 27.00 29.50 29.50 25.20 25.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -11.01 0.71 -3.97 17 319 2009-09-22 12:21:50 2009-09-22 13:21:50 3 8 68 0 141 251 1 129.20 46 19.61 CHANGED cpp+++LsAPp..........................................lsLoL.DpS..................................................................EtShhSD-h.-oss-.........lDls.....lDDL..DTPs-sD.h-h.s...-h-............WEDDhPts.pusptssps...l.phos-.EEcpDs.RhWRslhIG-.QE+RIDM+lIEPYp+VISHGGY.YG-G ..................................................................................................................................................................................................................................................................................................................................................p..hR++LsAPp............................................lsLoL..DtS.....................................................................................-GSlLSDDh.-o.....s...................................Dls..........lD-l..-TPsEs-ph-...s.......ch-.............WE..DDhP..ps..p.sss..tc.......hs-ho...u..-...E.E.p.cDs..RhWRshhIGE..QEpR..lDM+sIEPY++VlSHG.G...Y.YG-G.......................... 0 19 31 67 +12331 PF12497 ERbeta_N Estrogen receptor beta Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00104, Pfam:PF00105. There is a conserved IPS sequence motif. There are two completely conserved residues (Y and W) that may be functionally important. ERbeta binds estrogens with an affinity similar to that of ERalpha, and activates expression of reporter genes containing estrogen response elements in an estrogen-dependent manner. ERbeta acts as a transcription factor once bound to its ligand and it can dimerise with ERalpha. 25.00 25.00 27.50 27.50 22.20 17.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.64 0.72 -4.22 29 224 2009-09-22 12:23:36 2009-09-22 13:23:36 3 4 124 0 31 209 0 107.90 43 20.22 CHANGED sPhhhS...ssLsh-.spslCIPSPYsDtuHDas..........slsFYSP...olh...uYut.Pulo-sPo...l+poLSPSlFWPuHuH..hssLsLHs.sQshshp-t.spoPWsEhps..-pslssSppsl ...........Ps.Ys...s.LshE.ptsIhIPSsYs-spH-Ys..........sh.sFYSP...ulhsYu...h.Pu...s...sssss....s+QohSPslhWPo.GH....lSsLslHp.ppuhlY.sEs..+SPWsEs+s.h-Hs.Ls.spps.h.................... 0 1 4 12 +12332 PF12498 bZIP_C Basic leucine-zipper C terminal Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 174 and 411 amino acids in length. The family is found in association with Pfam:PF00170. There is a conserved KVK sequence motif. There is a single completely conserved residue K that may be functionally important. Various bZIP proteins have been found and shown to play a role in seed-specific gene expression. bZIP binds to the alpha-globulin gene promoter, but not to promoters of other major storage genes such as glutelin, prolamin and albumin. 22.70 22.70 22.80 24.60 21.60 22.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.78 0.71 -2.85 28 167 2009-09-22 12:46:35 2009-09-22 13:46:35 3 3 36 0 44 171 0 111.40 34 37.25 CHANGED TLRAKVKMAE-oVKRlTGhsshh.sh..phsohshs..ss.Ss.ts...sssPhQsssppaat.pssss.shs.........................tptsst.su.shsuschsposuh....p+lA.....uLE.alQ+.RhpuGsssss .............................TLRAKVKMuEDolKRVhthss.........o.Shshsh.ss.Sss..s..sssPlpDshssaFs.s...sssssshssts....................t.psss.shsu.shsus...cMspsuu...h....p+sA.....uhE.hlQp.thtuh.sss.u.......................... 0 7 30 38 +12333 PF12499 DUF3707 Pherophorin Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 147 and 160 amino acids in length. The proteins in this family are frequently annotated as pherophorins however there is little accompanying literature to confirm this. 20.60 20.60 20.70 20.70 20.30 20.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.70 0.71 -3.87 78 350 2009-09-22 12:49:21 2009-09-22 13:49:21 3 15 4 0 307 345 0 144.00 20 42.98 CHANGED FP...hC........ppstssoPapl....shsssstss.........................................spaCFslpss.........s.ssss....sCC.sss.....LpK..lEhhs.........s....ss..stpssl....sGhsstssssstps.s...s...........sL..................+l....ssLs.hshspssuu.....plCls.lp.................sssolsplC...............sss..sCthu.................................ps..C......CP ............................................................P....p.....pppp.t...sP.apl......s.s.tstss.........................................spaCFplpsh...................................ssssss......sCC..sss.....lpK..lEh.sss..............sC..p....sslt.sssl........sG.h..sthth.t..t.h.t...s........................sl............................+l.ssLs...hshs..pssuu..........plClp.lp..................sssolpphC...................sss...hC.hs.hhs..................tstpC..CP................................................................................ 0 97 307 307 +12334 PF12500 TRSP DUF3706; TRSP domain C terminus to PRTase_2 Anantharaman V, Gavin OL lg7 Anantharaman V Domain This domain occurs C terminus to PRTase_2 and has a highly conserved GXXE and TRSP signatures [1]. It is found in bacteria. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 25.00 25.00 58.20 33.50 19.80 18.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.21 0.71 -4.75 67 331 2012-10-09 19:35:50 2009-09-22 13:50:58 3 6 324 0 70 262 6 151.80 33 34.43 CHANGED hstsss...............................................ts...................................hshtsptsa........................GRhGhpssp....s........h....shutp.lphtt......................................c+lLVLGTsEFhahPhhlAcpLppth.............s..VhapSTTRSPIts.....GYAIcsulsFss...s.s.sl.........NalYNVs.............tt.............................................aDc....lllssEs............ssss.s........ssLlptL ...................................ssstt.h.htsttsh.......................................GRhGhhs.t...t.......h.clucplpstt......................s.c+lLVLGTuEFhatPhhLAccLEptu................s..plhapSTTRSPIts......GYAIcssluFsssY.shul........sNalYNVs.t........tp......................................................aDc.lllssEosscshs........stLlptL.............................. 0 20 44 60 +12335 PF12501 DUF3708 Phosphate ATP-binding cassette transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 143 and 173 amino acids in length. The family is found in association with Pfam:PF00528. There is a single completely conserved residue P that may be functionally important. 21.70 21.70 21.70 21.70 21.60 21.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.00 0.71 -4.41 84 285 2009-09-22 12:53:06 2009-09-22 13:53:06 3 3 283 0 89 250 468 152.80 29 32.47 CHANGED lll.lllhLushuahhGRpRAhsh......u..sus.t....LHSLPsYYGhhlAlasslPAlllLslWhhh....pPhllpphVhuplP.......sstshsssphsL........lhucl+slApG................................thslsups.....ssthhsAAppapshpstuphhhssllls..lAluGhsa ......h.ll.hllsluhluahlGRpRAhsl......A.t.GssstplHShPsYaGhaluLhsulPALllLslWsls....pshhlpp.lhuplP.......tst.hstsptsL........hhuplcslAsG.............................................hslusps......pshhlshApthpphpshsphhhsslsls...lAluGhh.......................................... 0 30 62 74 +12336 PF12502 DUF3710 Protein of unknown function (DUF3710) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 237 and 284 amino acids in length. There are two conserved sequence motifs: DLG and DGPRW. 25.00 25.00 54.70 54.60 19.40 18.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.07 0.71 -4.96 59 439 2009-09-22 13:18:44 2009-09-22 14:18:44 3 1 430 0 116 319 51 179.10 38 69.44 CHANGED GPaDhtpsp......sssssstlDLGulhlPhspGhplplEhs......ps.tt...shulplshspuplpltAFAAP+osGLWcElRtElspulppp.G...........upspt.psGsaGtElhuph.h............................sssh.tshRhlGVDGPRWhLRuVlsGsu...utssp....tAthlcclhppsVVsRGspPhPsR-sLPlp...LPpphs...pphtts ...............GPaDls-hs......ssssssRlDLGSlhlPhhcuhplplchs.........ps.Gs...spulhllpssutlplsAFA.AP+o.....s.....GlWcEVppEl..scuhptp.G............upssh.tsGPaGtElhshl.h...................................................tG..tsu...tssRhlGVDGP.RWhLRuVlsG.u.......uhscp............tuphlc...-hhsshVVcRG-sPhssR-slPlc...lPpshttt....s.................... 0 34 84 108 +12337 PF12503 CMV_1a_C Cucumber mosaic virus 1a protein C terminal Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01660. There is a conserved GLG sequence motif. 1a protein is the major virulence factor of the cucumber mosaic virus (CMV). The Ns strain of CMV causes necrotic lesions to Nicotiana spp. while other strains cause systemic mosaic. The determinant of the pathogenesis of these different strains is the specific amino acid residue at the 461 residue of the 1a protein. 25.00 25.00 32.20 110.20 18.30 17.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.01 0.72 -4.26 11 89 2009-09-22 13:36:50 2009-09-22 14:36:50 3 3 22 0 0 90 0 88.00 68 9.08 CHANGED RlWshAGGcscsss.cS....Vh-TY+pVDshVNlHassGpWha..PpthcYoVGYN-pGLGPKh-sELYIVspsCVIuNscsLAcuoc .RIWsMAGGDsKRNSLEGNLKFVFDTYFoVDPMVNlHFuTGRWM+PVPEGlVYSVGYNE+GLGPKtDuELYIVNu-CVIsNS-uLSslT.+. 0 0 0 0 +12339 PF12505 DUF3712 Protein of unknown function (DUF3712) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 130 amino acids in length. 21.60 21.60 21.90 22.20 21.40 21.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.67 0.71 -4.10 72 289 2009-09-22 13:43:24 2009-09-22 14:43:24 3 10 90 0 237 309 1 125.60 19 24.44 CHANGED sshupltlsthph..sst......shhh.hspphtlsshs......saspaspsllhpcp.hslslpGp...s.p..hphGsl..shpslsh.sKslshp....................G................Lsphp.uhs.................lsshplhhss....ts.........GhNh.husshl....................sNPSslol.pl..G.......slolsl ...........................................................hh.h.hs.hph..t.t........t..h.hst.lplsshs.......sFspa.....sptlh..tspp..hslpl.p.up......s..p.sp....husl...thpslsh.s+slslp....................G................h..ss.hp..uhs...................lsshclstss.....ts....................................uh.sh.pus.ssl....................sNPSs...hol.sl..Gslshp................... 1 97 152 213 +12340 PF12506 DUF3713 Protein of unknown function (DUF3713) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 92 and 1225 amino acids in length. There is a single completely conserved residue S that may be functionally important. 21.90 21.90 21.90 37.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.57 0.71 -4.02 10 36 2009-09-22 13:46:33 2009-09-22 14:46:33 3 1 8 0 10 38 0 97.20 36 13.00 CHANGED cLssYF..p.lIsc...sKVsscso.........soocpss.sclhTpst-......lcKl+-c.pscl-sKlccaV.sKLKsostPpssYSplILlsspsD....pshossu.huLhuLlslposs.LsNtl++paFssss .cLspYY..cslI.....sKVuspso.........sospsosssph..pptp......lcKL+-p.Kccl-stlKcaV.scLKlol......Sp.h.lp.p.........p....h...s..t....p.....hh..hh.ps.. 0 7 7 7 +12341 PF12507 HCMV_UL139 Human Cytomegalovirus UL139 protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes and viruses. Proteins in this family are approximately 140 amino acids in length. UL139 product shared sequence homology with human CD24, a signal transducer modulating B-cell activation responses, and the sequences in the G1c variant of UL139 contained a specific attachment site of prokaryotic membrane lipoprotein lipid. 25.00 25.00 25.20 35.00 24.10 24.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.53 0.71 -3.95 8 69 2009-09-22 13:50:58 2009-09-22 14:50:58 3 1 17 0 13 66 0 103.10 56 63.02 CHANGED hs.hpuppphpls+splLAu........pLhhlhohphpCahLhRKlh....................................................psspGpspEtEp+ppchtRahps+tsp.sshhshussh.Sho.s.Qssohts.sp ...Clpu.uGosWsssQLALLAASGW..TLSGLLLLFTCCFCCFWLVRKI.....................................................CSCCGNSSESESKsTH..AYTNAAFTSSDATLPMGTTG.SYTPP.QDGSFPPPPR. 0 1 7 10 +12342 PF12508 DUF3714 Protein of unknown function (DUF3714) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 200 amino acids in length. 20.90 20.90 23.00 22.50 18.40 17.60 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.85 0.71 -4.85 44 464 2009-09-22 14:34:16 2009-09-22 15:34:16 3 4 143 0 56 408 7 192.20 34 48.13 CHANGED cshshssptsppphssshtp.hs-..pshhssh.spspsttFpThstsp....ppNsIpAslcpspTl..ssGspl+lRLLEshtl.ssthlP+sThLhG.suphpGpRlplpIsSlphsGsIlPVcLsVYDs.DG.pGlaVPsShct-ssKEhuushssuh.....ssshshup.suusQlusshupuslQusSphluK+hRplKVpLKuGap ...........................................................t......ht..pcphlosLt.pshss...tthhtth.sptpshsF...pTssusst......pcNoIpAslcps....p.sl..p-GppV+LRLLEsh..pl....sshhI..P+sohlhG.su+lpG..pRlclpIsSlch.sGpIlPVcLulYDs.D...GQcGlalP..s....S.-hs.As+EluAshuuuh......soShshus..sAss....Qlus-hs+ulhQGsSphluKKhRplKVpLKuGYp.......................................... 0 18 48 56 +12343 PF12509 DUF3715 Protein of unknown function (DUF3715) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 170 amino acids in length. 20.90 20.90 25.40 22.00 19.20 19.20 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.94 0.71 -4.39 12 131 2009-09-22 15:00:10 2009-09-22 16:00:10 3 11 42 0 67 140 0 157.60 27 12.42 CHANGED lpNphLppcastpR..pph+pct+pscEL.-shsFLhhcs.tcstsl......s.........ppGLpss...puKsshLGcsphGVhls..RhuDlhpppshpsu...shs.lhlhKlh+G+hKpl.-s..........spstlsPsPsa-sHl......uc.shsplssphh...ta.pothYhhEh...shs.shhcPpphhP ..............ps.hLppphoppc..pphthstpsspEL.Es.s..hL..hcp.hpsppl...............t...............p+GLpht...ppKhs....hLspPp.shh.s..ph..uDlhpspshpsu..........s.splhh..c...s.hptch.cplt-s.............spsslsssPsa-phl......uc.stsplsshhs..hhsattsthhhhEh...uhs.sphcP+ph.P.................................. 0 13 22 31 +12344 PF12510 Smoothelin Smoothelin cytoskeleton protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00307. Smoothelin is a cytoskeletal protein specifically expressed in differentiated smooth muscle cells and has been shown to co-localize with smooth muscle alpha actin. 21.70 21.70 21.90 21.90 20.40 18.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.58 0.72 -4.11 16 284 2009-09-22 15:02:46 2009-09-22 16:02:46 3 6 61 0 105 292 0 50.10 44 9.26 CHANGED sptps.ppspsssp.hst-l-pIhDEplLc.pLLEssosaEERRhIRAtlRcl+ppc ...............p.....tt.ptt.s..upcl.sh.D.thLc..ph..L-pss-aEER+hIRAAlR-LRppc.. 0 16 23 53 +12345 PF12511 DUF3716 Protein of unknown function (DUF3716) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. 21.30 21.30 22.50 21.70 21.00 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.84 0.72 -4.36 50 170 2009-09-22 15:04:55 2009-09-22 16:04:55 3 7 56 0 133 167 0 57.30 30 11.75 CHANGED Ahhl..QspGppt..sps.CspCp.p...sp.....GP.F..psCV........hhsst.........htsuCuNChasspupp.CShpp .........AhhhpspGp.s...tpsCppCp..p....sp............GP..F.spCl.........lhsup...........htsuCuNChas....spust.Cohh.t........ 0 18 64 110 +12346 PF12512 DUF3717 Protein of unknown function (DUF3717) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 75 and 117 amino acids in length. There is a conserved AIN sequence motif. There are two completely conserved residues (L and Y) that may be functionally important. 25.00 25.00 33.00 32.80 21.80 19.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.28 0.72 -4.33 29 154 2009-09-22 15:21:54 2009-09-22 16:21:54 3 2 113 0 59 128 14 70.90 44 76.54 CHANGED M.ss..lpIs-lEuAINaWRs+pPS....s-thsLssEupALA-lYAlMIhp+pspls.ssLsststsAhtuahpsp ........ssIoIp-lEAAINaWRuRuPu...ssD-ltLCsEAsALAcsYALMIsp+psslsh-uLsscARsAapuah...p.. 0 3 17 40 +12347 PF12513 SUV3_C Mitochondrial degradasome RNA helicase subunit C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00271. The yeast mitochondrial degradosome (mtEXO) is an NTP-dependent exoribonuclease involved in mitochondrial RNA metabolism. mtEXO is made up of two subunits: an RNase (DSS1) and an RNA helicase (SUV3). These co-purify with mitochondrial ribosomes. 21.60 21.60 21.80 24.70 18.90 21.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.32 0.72 -4.52 83 441 2009-09-22 15:25:28 2009-09-22 16:25:28 3 7 383 2 272 424 7 48.20 30 6.89 CHANGED LppLEshachlslYhWLShRa.sshFs-tphspch+ptlpphIpptLpph ...LhcLEshachlsLYlWLShRF..sh..FsD.tphspch+ptlsphIpptLpp.h........ 0 83 158 225 +12348 PF12514 DUF3718 Protein of unknown function (DUF3718) Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is approximately 70 amino acids in length. There is a single completely conserved residue C that may be functionally important. 22.50 22.50 22.70 31.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.95 0.72 -3.92 41 115 2009-09-22 15:28:03 2009-09-22 16:28:03 3 1 56 0 58 113 27 68.90 32 56.60 CHANGED lCcsstosshhpL++phK..........cp+l..............ph+plastlsCNGpSlhpFAhsps..Ascsuphl..........htphshp-lussp ..lCchltusc+scLR+plK..........-pRl..............+h+plasulsCNGpSLlcaAhtss..Ascsupal..........spplshp-Lu........................... 0 7 19 40 +12349 PF12515 CaATP_NAI Ca2+-ATPase N terminal autoinhibitory domain Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00689, Pfam:PF00122, Pfam:PF00702, Pfam:PF00690. There is a conserved RRFR sequence motif. There are two completely conserved residues (F and W) that may be functionally important. This family is the N terminal autoinhibitory domain of an endosomal Ca2+-ATPase. 21.70 21.70 28.50 27.80 19.90 18.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.31 0.72 -4.45 28 163 2009-09-22 15:29:38 2009-09-22 16:29:38 3 12 30 0 103 162 0 46.50 48 4.79 CHANGED hhpcsF.-ltu.........KN....uStEuLpRWRpuss..lVhNspRRFRhssDLsKcs-s ....hp-sF.-l.u..KN.............uStEsLcRWRpAs...lVhNspRRFRaTsDLcKcpEh...... 0 13 62 85 +12350 PF12516 DUF3719 Protein of unknown function (DUF3719) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved HLR sequence motif. There are two completely conserved residues (W and H) that may be functionally important. 20.60 20.60 20.70 20.60 19.30 17.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.21 0.72 -4.41 14 139 2009-09-22 15:30:52 2009-09-22 16:30:52 3 2 57 0 72 128 0 64.50 40 12.14 CHANGED hLaEG+sss..t..spsLppECppWss...+hPHLRlhGsQhhtPpccGapah.....susshp.pshhutssssss ............hLaEtKlus..p..TpuLppECppWss...paPHLR..lLG+QllhPtsEGapha..ussssSssp.cs..s.ttp...s...................... 0 16 24 39 +12351 PF12517 DUF3720 Protein of unknown function (DUF3720) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. There are two completely conserved A residues that may be functionally important. 25.00 25.00 25.00 31.40 22.80 24.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.39 0.72 -3.30 38 82 2009-09-22 15:31:52 2009-09-22 16:31:52 3 1 2 0 19 82 0 93.20 33 25.57 CHANGED KAVEAuEss..ELuussss.pc..p-sossstl.ssTpusuAPGsGG..tGVAGtsss......hP.ssPGGSsTtspptp..pslsspGscpss.cpspp-upsops..Q .....KAVEAu-uu....pLuussss.ps..pcp.sstpl.ssTpusuAsGs..GG..suVAGt...ssss.....hP.ssPGsSsTttpp.tc..pslsspsscpsstcpsppssptsts.............. 0 0 0 19 +12352 PF12518 DUF3721 Protein of unknown function Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. There is a conserved WMPC sequence motif. There are two completely conserved residues (A and C) that may be functionally important. 21.30 21.30 22.80 27.80 18.50 20.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.68 0.72 -4.35 41 73 2009-09-22 15:33:12 2009-09-22 16:33:12 3 4 30 0 26 80 320 33.70 46 32.13 CHANGED asTctEAEppApc.hGCpGuHpM..Gs.pWMPCss...atp .....asT+tEAEppAtc.hGCpGsHpM...Gs.pWMPCspHt...... 0 2 10 20 +12353 PF12519 DUF3722 Protein of unknown function (DUF3722) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 415 and 473 amino acids in length. 25.50 25.50 34.20 35.60 23.20 25.40 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.77 0.70 -5.17 31 140 2009-09-22 15:34:49 2009-09-22 16:34:49 3 2 135 0 105 160 0 252.20 37 58.90 CHANGED l+lplSshuTPphtooasLushsh..lsGSloYLaoossLpp..stsSpplsLpchlpsYRhlp..sshs.ppp..p..htsh.t.......................................oLLYGRhal.PsopLEAhhl+RlSPssQlhlpslSs.t..........................................................................sssuslhshlQ+DoG+aspEalaSTs-uLhGaRsLaNFGssssptt.....................s.hssp.....................plSsGuEhaauslspSsGhSTulRasThsssTsp................................PhTlTLohNPlhGplSoT.Yol+sSsshshsSRaDFN ........................................l+lplSshuTPphsoSa....sLushsh......lsGSlSYLaSohsLpp......stpSsplsLpchlpuYR.lp..ssht.t...tt...t...t......ts...s.tt........................................................toLLYGRhal.PsopL-Ahhl+RloPshQl.ltslSs.t..........................................................................tstuslhs.lQpDs..G+aspEhlaSTssuLhGaRsLaNFGssspp.ss.....................................p.p..............hs.....hlSsGuEhYYuslspSsGhSTGlRFsTlPsssup...................................................PhThTLolNPLhGplSoT.YolpsusshuhsoRa-FN......................... 0 27 56 88 +12354 PF12520 DUF3723 Protein of unknown function (DUF3723) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 374 and 1069 amino acids in length. There is a conserved LGF sequence motif. 21.30 21.30 21.60 21.30 21.00 21.20 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.35 0.70 -6.01 20 202 2009-09-22 15:35:55 2009-09-22 16:35:55 3 6 44 0 161 209 1 265.00 18 50.16 CHANGED C+RL-spN+lsAllScpsLstALppuslotssLh..sspp.PpLpFstG.plpCLHG+HRlpAup-h....Lssu.D+WWsVDLYhD.................slup-L+suLhEEYuNp+psoDGEIYR+IRpYp..........p-sNtthppRWhuRL.osppt+hhp...................h.thht.uhulspLhchhuLhss.......................EllpYLs.pltchWu.llss-.st.....ht+lDscTVctLEhhuPthSctDuptlpslltuGplFssFsp.sERpslhccLp...shcGlIPSLaTFFcDhcYLEsCAcsl++Lls.s..p.olppshpthas.tssss..pptllQooEsshpp...ppsusspph-luY+QlWLaAMRaasphspsst....pcshhs+sspppuDppsla-hAsLAp+LGFpSspIcsLhspuscRp.hApthLhpARps-tYpYsssph-slVp+IV-shssA.h.ppht.sschhsspssph+s.....RCGhPptpupcpD+chLFlDclautpssssshl.TohhVRRslaFAFFG .................................................................................................................................................................................................................................................................................................................................................................................h................hs..sl..lp.............h.....h.tt..hh........h..h...h.....h.......l.oh..h.tsh.hht.....ht.h..................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 12 47 136 +12355 PF12521 DUF3724 Protein of unknown function (DUF3724) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF00073. There is a single completely conserved residue Y that may be functionally important. 22.00 22.00 22.50 22.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.66 0.73 -7.06 0.73 -3.91 5 178 2009-09-22 15:37:19 2009-09-22 16:37:19 3 11 6 0 0 111 0 22.90 88 6.99 CHANGED RupGcVVcDYSRYosAosssst- .RSQAYMVKNYPTYSQTITNTATD.. 0 0 0 0 +12356 PF12522 UL73_N Cytomegalovirus glycoprotein N terminal Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF03554. This family is an envelope glycoprotein of human cytomegalovirus (HCMV). 20.30 20.30 22.80 22.80 19.10 19.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -6.99 0.72 -4.40 5 115 2009-09-22 15:38:45 2009-09-22 16:38:45 3 1 5 0 0 82 0 25.90 67 19.32 CHANGED LGLLVLSVAAGSSGNNSSTSTSATTsS .LGLLVLSVsAuS..NNoSTuoososSS. 0 0 0 0 +12357 PF12523 DUF3725 Protein of unknown function (DUF3725) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF01577. There is a conserved FLE sequence motif. 20.80 20.80 21.10 112.00 20.60 17.50 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.48 0.72 -3.89 4 55 2009-09-22 15:39:49 2009-09-22 16:39:49 3 5 5 0 0 51 0 73.90 79 5.45 CHANGED ahoEFclScGAKILQLV.IGsAElGRsFLEGs+hlRAsIFEIl+KTMVG+LGYDF-sELWhCHsCspTS-KYFK YsSEFcISKGAKILQLV.IGNAEVGRTFLEGNRFlRANIFEIIRKTMVGRLGYDFESELWhCHNCscTSEKYFK 0 0 0 0 +12358 PF12524 GlyL_C dsDNA virus glycoprotein L C terminal Gavin OL lg7 Prosite Family This domain family is found in viruses, and is typically between 55 and 80 amino acids in length. The family is found in association with Pfam:PF05259. This family is the C terminal of glycoprotein L from various types of double stranded DNA viruses (dsDNA). 20.10 20.10 28.70 26.80 18.60 17.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.08 0.72 -4.06 10 48 2009-09-22 15:41:28 2009-09-22 16:41:28 3 1 20 1 0 45 0 70.40 44 32.42 CHANGED uuStsPVssGCVNh-hS+sRpChGpsshshFscsphhpP.susDD...............hs.uRtssc+s+ssRupss+ ..........AuSpsPVtuGCVNh-YSRsRpChGppcLGhhNtspsppPshssDDEAu.Qs.s.ss.ss.hA.ScssP+RssAs+u+pp+.................................. 0 0 0 0 +12359 PF12525 DUF3726 Protein of unknown function (DUF3726) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 80 amino acids in length. There is a single completely conserved residue E that may be functionally important. 22.10 22.10 22.10 22.40 21.70 20.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.42 0.72 -4.18 34 82 2009-09-22 15:42:57 2009-09-22 16:42:57 3 1 79 0 18 89 671 78.10 31 33.98 CHANGED hhShNEltuhspKAhcGhGhshG.A--sAphsthLphhG....LsGsptLsssLphhsspssssl.s.h................hpsss.hhsstpshss .....hShNEl.AhspKAhhGhthshGpA-thAphls.LphhG....LsGlpphspshphhshpsspsl.s...................tsss.h.hsh+t.S................................. 0 5 9 15 +12360 PF12526 DUF3729 Protein of unknown function (DUF3729) Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 145 and 1707 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01661, Pfam:PF05417, Pfam:PF01660, Pfam:PF00978. There is a single completely conserved residue L that may be functionally important. 25.00 25.00 43.00 25.30 22.40 24.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.55 0.71 -2.72 39 273 2009-09-22 15:48:27 2009-09-22 16:48:27 3 10 14 0 0 277 0 99.30 46 9.34 CHANGED HhW..........ESANPFCGESTLYTRTWSsSGFSSsFSP.Essssssssssshsps....sPP.ssslhs.PPssEpssssss.ssssspP..ss.............ssuss..sPss...PsppsspPs.......u.pRRLLaTYP ...................HhWESANPFCGEuTLYTRTWSsSGFSSsFSPPEsuhsssssssthsps....sPs.ssshhs.PPssEpstsssssssssscP..ss.............ssuss......sP.s....PspcsssPs.......uppRRLLaTYP....................... 0 0 0 0 +12361 PF12527 DUF3727 Protein of unknown function (DUF3727) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 100 amino acids in length. 21.50 21.50 21.90 21.90 20.10 20.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.39 0.72 -3.91 34 108 2009-09-22 15:50:23 2009-09-22 16:50:23 3 2 94 0 49 109 114 97.90 35 44.90 CHANGED pcLpLh+oAh.TLTVsGELs.h----l.ph-.-c-.....tc.-sEp..hphLso.Fhh--pEYulYsPLDPhhhlA+..hss.spspLlss-Ehc....clpPll...EppLh-ch ...........................cLpLhcoAh.sLTVpGELs.hp---l.phc.-s-t....sc.-sEplElLss.Fh.h-spcYulYTPLDPlLhlA+..hsp.sps.Ll.ss-Ehp....plpsll...EppLhpt.h....................................... 0 13 33 44 +12362 PF12528 DUF3728 Prepilin peptidase dependent protein C (DUF3728) Gavin OL lg7 Prosite Domain This family of proteins is found in bacteria. Proteins in this family are typically between 106 and 121 amino acids in length. The family is found in association with Pfam:PF07963. There are two completely conserved C residues that may be functionally important. This family is frequently annotated as prepilin peptidase dependent protein C. 22.50 22.50 31.20 30.60 20.20 16.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.97 0.72 -3.59 24 542 2009-09-22 15:52:35 2009-09-22 16:52:35 3 2 530 0 39 167 1 74.80 63 69.32 CHANGED YpQsLhpuFsphaQ.RQsW+hhaQph-hhs.s...............sssWQhpphpsp......pusCsploVplhoPtsppuploRhaCssp ..aQRsLMsShASRsQYpQLWRauWQQTQLpuhS....................PPssWQVNRMQTS......QAGCVSISVTLVSPuGRpGEMTRLHCPNR.. 0 2 8 23 +12363 PF12529 Xylo_C Xylosyltransferase C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 169 and 183 amino acids in length. The family is found in association with Pfam:PF02485. There is a single completely conserved residue G that may be functionally important. Xylosyltransferases are enzymes involved in the biosynthesis of the glycosaminoglycan linker region in proteoglycans. 25.00 25.00 25.60 25.10 23.50 16.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.04 0.71 -4.90 24 190 2009-09-22 15:54:46 2009-09-22 16:54:46 3 6 82 0 97 161 0 172.10 45 21.16 CHANGED -paLaGpYssssPsL+uYWpNlYcp..D.shsuhsDstlohhpShhRluhpp..t......tptts+hcsht...shplphYhpcDpFpG....aLV+acu............t..sstpt.pLEsaltPpsshplspts..ptupRLpsl..-..VGT-aD.KEplhRNaGtllGPpsEslhht+Wutut...........shssTllWIDPhthlAss ...............................................D.aLY.GsYPsGTP.uL+uYWENlY-p.sD.GhsuLSDlhLThYpuFuRLuL++stssht...sptpC..RatPhG..hPsSVHLYFhsD+FQ.G....aLl+pps.............ps..AsuphEoLEsWlhPppshKlssss...sphsRLQph...E..VGT-WDsKERlFRNFGGLLGPhDEPVuhQ+Wu+Gs...........NlTsTVlWIDPssVlAs....................... 0 20 26 57 +12364 PF12530 DUF3730 Protein of unknown function (DUF3730) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 220 and 262 amino acids in length. 25.00 25.00 29.80 25.10 24.80 23.80 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.49 0.70 -4.44 25 180 2009-09-22 15:55:45 2009-09-22 16:55:45 3 7 96 0 122 185 0 222.30 20 24.00 CHANGED hlhhLhht.......htcsssschhhphLpsLssLuspp............hth.hllpslssLspssstph..................................hthcLlsplatpss+ta.....shLpthLt.......tsspssppphchtluhssoh+slCptpPp............+us-llthlstsL.........pppssl.....................................spuhul-ulpslC............pscll-hhssWplltpcl......s.p.cP.lhp...........................phh.ll..hthsstphsp........hpppslpllWchhstpc.......spsphhshpuL ...................h....hhht........tc.ppsph.hthLphLsphsspc.......................................sshshhlpslthLsspsphph........................................hh.pLlstlhptpspha.....shLpth...hth...p..p..t.s.p.....p.p...........ph....cphhuht.ssh+slCptpsp............pus-hlttlothl.........ppssts.....................................................................spuhslphlpsLh............pupllshtstW.psLtt.pl.......t.t.ps.h..............................phh...Lhsththsp.tphpt...............hp.pllphhWphstp.p.......h.s..hh..................................................................................................................... 0 30 47 81 +12365 PF12531 DUF3731 DNA-K related protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 250 amino acids in length. There are two conserved sequence motifs: RPG and WRR. The proteins in this family are frequently annotated as DNA-K related proteins however there is little accompanying literature to confirm this. 25.00 25.00 145.50 144.60 24.00 22.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.65 0.70 -4.85 38 220 2009-09-22 15:58:22 2009-09-22 16:58:22 3 2 219 0 66 205 15 247.40 51 26.58 CHANGED SAcHERhWLNLAGaCLRPGFGsslDsWRlpQlWslaspGlQasp-sQsWs-WWshWRRlAGGLspspQtplhc....slA.hLpss.stpptph...ts...pttuhp-MlRLuAuLERlssppKhcLupalLpR...Lp+ss...............tstptW.WALGRlGARpPhYGSsHpVlPscpsppWlstL...LphD.W+c.t......suFAssplARhTGDRsRDLs-shRppVlc+LcsspAspsWlphVpEl.spL-ps-ppRlFGEuLPsGLpLl ............StsHE+sWLpLAGaCLRPGFGcshDuWRIEQlWsLYpQGIQas.ssQsWo-WWlhWRRlAGGLsp-QQpplLs....-IApaLpPuuh+...sstts....pcu...pctGY-sMVRLuASLE+LtVEcKs.LupWhLs+...Lp+sp................ppspW.WALGRLuARsPhYGStHsllPtEpsppWLspL...LcpD......Wpcps.....hsAFAsV.hsRhTGDRsRDls--hRppVlc+L+po+ss-uWlshVpEV..l-L-cu-opRsFGEoLPsGLpL......... 0 20 31 53 +12366 PF12532 DUF3732 Protein of unknown function (DUF3732) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is typically between 180 and 198 amino acids in length. There is a conserved DQP sequence motif. 25.00 25.00 35.00 34.80 24.80 22.40 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.24 0.71 -4.78 50 147 2009-09-22 15:59:45 2009-09-22 16:59:45 3 5 141 0 42 122 9 187.10 30 30.98 CHANGED pplppLcppls.tpshpp+h..psshsplsph.hschupplchEps...stslchshcphsl......hhsspscphhL......pchGSGuNWluhHlshaLALHcaFhpp.....ppslP..shLhlDQPSQVYFPs........tpshstppl.........................................pDpDhtuVpchFphLsphspchpt......phQlIVh-HAs.pt....p.htsshh.ppWR..pscuLI .................t.plptLctpls....pc...hpc+h...ctthsplsph.hschhtpLchEps...ssslphshc.phsl......hhspsscphhL......hclGSGuNWLuhHlulhLALHpaFspht....sssVP..saLlhDQPSQVYFPs...tttts.cp.pclt........................................pD-DlhsVcclFphlsphhpctpp......shQIIVh-HAspp.htth.....pshh.pcWR..psptLI.............. 0 16 23 32 +12367 PF12533 Neuro_bHLH Neuronal helix-loop-helix transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found C-terminal to Pfam:PF00010. There is a single completely conserved residue W that may be functionally important. Neuronal basic helix-loop-helix (bHLH) transcription factors such as neuroD and neurogenin have been shown to play important roles in neuronal development. 25.00 25.00 26.20 25.20 22.40 22.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.83 0.71 -3.30 21 229 2009-09-22 16:01:00 2009-09-22 17:01:00 3 3 65 0 114 175 0 119.40 49 36.79 CHANGED G+pPDhluFVQsLCKGLSQPTTNLVAGCLQLNsRshLs-p.tsctusp...ssusasspsasY......psP....tLsoPs....tsoh.....susH.h+.....stsYtush-shat.....ssss-tsoPpa-G..sLoPP.lslsGsFS .....GKpPDLloFVQsLCKGLSQPTTNLVAGCL......QLNsR.sFLh-Q.st-tss+..........ssusassHs.asY.......pSP....tLsoPP........aGsh.....ssoHsh+.....sasYs.u.u.hEshat.........usss-ssSPpa-G..PLSPP..lslNGNFS................................................ 0 12 22 56 +12368 PF12534 DUF3733 Leucine-rich repeat containing protein 8 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00560. There are two completely conserved residues (W and Y) that may be functionally important. Many of the proteins in this family are annotated as leucine-rich repeat containing protein 8 however there is little accompanying literature to back this up. 25.00 25.00 50.80 35.60 24.90 24.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.23 0.72 -4.68 22 480 2009-09-22 16:01:50 2009-09-22 17:01:50 3 39 44 0 283 366 0 62.00 36 15.95 CHANGED Mhsls-hppas.tpQssY+lLKPWWDVFhsYLsllMlhlulhuushth.t.phh.ChPs.pspthppp ...........hssh-hpQas..sQssY+lhhPWashahsYLsllhhhIhlhuush.h.......phPs.psuphpp........ 0 16 46 119 +12369 PF12535 Nudix_N Hydrolase of X-linked nucleoside diphosphate N terminal Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 847 and 5344 amino acids in length. These enzymes hydrolyse the molecular motif of a nucleoside diphosphate linked to some other moiety, X. 20.60 20.60 20.60 28.30 20.30 20.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.86 0.72 -4.51 85 803 2009-09-23 12:17:57 2009-09-23 13:17:57 3 4 787 4 107 492 28 56.60 37 27.66 CHANGED ppphLcaAtclQulAQuGLsYu+DsaDhERY-clRcIus-hhuptoslshcplcsLFs ....t..calcahpcl.ulupsGLsYo....K.DsFDpERYccLRplss-Mluphoch.sh-pltplh............... 1 39 72 90 +12370 PF12536 DUF3734 Patatin phospholipase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF01734. There are two completely conserved residues (F and G) that may be functionally important. The proteins in this family are frequently annotated as patatin family phospholipases however there is little accompanying literature to confirm this. 22.00 22.00 22.00 24.10 21.60 21.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.29 0.72 -3.93 64 300 2009-09-23 13:23:11 2009-09-23 14:23:11 3 7 189 0 108 296 13 105.30 29 24.71 CHANGED IpYSSRTRhpo-thcphpphRcslpcLhp+lPtph+s.-Pthptltphus..ssthsllHLIYpppshEsps.KDa-FSptohc-+WpuGhcDscpsLp..c.pWhptsssspG ...........................IpYSSRTRhsT-hhpphpchRpslccllp+lPt..p.+p..c.........s.....h..pthtphus..ssphsllHLIY..ppcshEspt..KDY-Fotsohp-+WpuGhcDhcpsLs..p.phhthsp.................. 0 18 49 76 +12371 PF12537 DUF3735 Protein of unknown function (DUF3735) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved LSG sequence motif. There is a single completely conserved residue G that may be functionally important. 22.10 22.10 23.10 27.40 21.90 21.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.06 0.72 -3.93 46 266 2009-09-23 13:55:43 2009-09-23 14:55:43 3 8 220 0 188 278 2 71.80 38 14.39 CHANGED tshhhhcphls+lullGlTlhAlLSGauuVssPYphhs.........hhhc.ss............ct..clpshpppltpotshltpK+p..clp .........shhshcphloRlGVIGVTlMAlLSGFGAVssPYshhs.........hFhRsls............-s..DItshE+pLhpTh-hlhsKK++l......................................... 0 64 103 153 +12372 PF12538 FtsK_SpoIIIE_N DNA transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 107 and 121 amino acids in length. The family is found in association with Pfam:PF01580. The FtsK/SpoIIIE family of DNA transporters are responsible for translocating missegregated chromosomes after the completion of cell division. 21.50 21.50 21.70 22.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.19 0.71 -3.97 18 321 2009-09-23 14:46:06 2009-09-23 15:46:06 3 4 319 1 22 218 0 107.70 40 7.71 CHANGED hLhlhYppplp+hcLss...pcsloIGsshcsslTlssL...cpsIpLchcps.......hhphptpsLthscshplshttp.hhhhahspsppsplYsluscpplsluspct..sDIslpssps ........hLIIpYscpL+hhsLcs...sKohTIuc--+ADITLpSL...uEsIcLcpNN........pGshQsscsslNK.slshcs..sh-shpL.LYopsshsuhhasuhp-ohTIGsNsY..DDhsIpuh.hs..... 0 5 11 16 +12373 PF12539 Csm1 Chromosome segregation protein Csm1/Pcs1 Mistry J, Wood V jm14 Manual Family Saccharomyces cerevisiae Csm1 is part of the monopolin complex. Csm1 forms a complex with Mde4 and promotes monoorientation during meiosis [1]. Csm1 also plays a mitotic role in DNA replication [1]. This family also contains the Schizosaccharomyces pombe homologue to Csm1, Pcs1. Pcs1 forms a complex with Mde4 and acts in the central kinetochore domain to clamp microtubule binding sites together [3]. The two complexes (Csm1/Lrs4 and Pcs1/Mde4) contribute to the prevention of merotelic attachment [3]. 22.60 22.60 22.70 23.10 22.40 18.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.09 0.72 -3.50 29 121 2009-09-23 14:54:00 2009-09-23 15:54:00 3 4 118 16 91 114 0 91.60 39 22.85 CHANGED hplpcDLap...pLTGlplpssccc..-sshhFDshQoupN..G.......slca+Lsls+sts............tp...........sEhpYhPhL.......cppp-c...............pLhchLP-YLp-sloFPhpp ....................u.plpcDLYpcLTGLhl+ssccc..-sshlaDClQTGpN..G......................sl+aKLulspsps............ps................sEhpYhPhLcpsc-c......................pLhclLP-YLs-sloFPhp................. 0 17 45 77 +12374 PF12540 DUF3736 Protein of unknown function (DUF3736) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 135 and 160 amino acids in length. 22.60 22.60 27.30 23.30 21.40 19.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.72 0.71 -3.95 10 166 2009-09-23 15:32:06 2009-09-23 16:32:06 3 3 65 0 89 153 0 140.60 30 24.34 CHANGED psccshuSclshEup.........+lppPtpcauusspss+cSs...s.lhVssss+uVssoPPlsp.sss+plphlhcsstps.ppscph.cup.hhRcRhl..........scss.sp+.......psttss.........sPu...hspshshthoPs..shsQ-schQs.p.....lsLs+ ...........................................t..p.hsSclshEpp.........+hppstEc..u.h.s.ccss...s..tV.sth+sVs-pPPlsp.sso...cplphlhh.t..p.ppt-ph.pspptt.RcRhl............................csP.stp...........htst.s.................sPu.....t.sh..shthoPs..phspssphp........hsL........................................................................................................................................... 0 12 22 50 +12375 PF12541 DUF3737 Protein of unknown function (DUF3737) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 281 and 297 amino acids in length. 22.10 22.10 22.60 32.90 21.20 22.00 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.00 0.70 -5.42 36 223 2012-10-02 14:50:22 2009-09-23 16:35:19 3 2 209 0 39 195 4 258.30 45 96.08 CHANGED MppIcpphasGERALFss+shplpsshFs...DGESsLKEo+NIplpsshFchKYPhWascslplcssphp-huRAulWYopslphpsoplpAPKtFRcspslpLcssshssAtEThWsCcslplcssps.pGDYFhMsSpNlhl-slpLsGNYuFphs+NlEl+NuclhSKDAFWNsENVTVhDShIsGEYLGWpS+NlThlNCpIpusQsLCYh-sLslcNCchl.sTcLAFEY.SsV-AsIsusIsSVKNPhSGpIpAcpIGclIhDcsplss.spspIhsp ....Mp.IcpphasGERsLFstcDhplpsssFs...cGESsLKEspNIphpsshFchKYPhWascshplcsshFpphuRuul.WYopslphpsohlpAPKhFRcspslpL-NVphssApEThWpCcslplcNlph.pG-YhhMpSpNIhlDslp.sGpYsFphs+NVEl+NuplhoKDAFWpsENVTlYDS.lsGEYLuWpS+NlphlNCpIpupQsLCYhcsLshcNCphh.ssDLAFEY.SslpAsIpusIpSlKNPhSGpIpAcplGplIhDcsthss.spspl..t............. 0 12 23 31 +12376 PF12542 CWC25 Pre-mRNA splicing factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF10197. There is a single completely conserved residue Y that may be functionally important. Cwc25 has been identified to associate with pre-mRNA splicing factor Cef1/Ntc85, a component of the Prp19-associated complex (NTC) involved in spliceosome activation. Cwc25 is neither tightly associated with NTC nor required for spliceosome activation, but is required for the first catalytic reaction. 26.10 26.10 27.20 27.20 26.00 26.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.59 0.72 -3.54 62 293 2009-09-23 15:39:42 2009-09-23 16:39:42 3 5 251 0 220 284 0 95.10 32 23.98 CHANGED c..cctp+l-........WMYpss.................ttttpppE-YLLGK......pclD.phlppppttptpthsptpt..............................sstp-thsKl.p-DPLhtI..KppEppthpthhpss ..........................p+pc+L-........WMYpuP...................ssttppE-Y..LLG+.....plDph.lt....ppp...pp..ctptsuspssh...h..ss....................................sottDhtsKl..+EDPLhhI..++pEppthcthhpsP................. 0 73 122 182 +12377 PF12543 DUF3738 Protein of unknown function (DUF3738) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 251 and 457 amino acids in length. 21.90 21.90 23.40 24.90 21.50 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.18 0.71 -4.73 49 107 2009-09-23 15:51:44 2009-09-23 16:51:44 3 9 14 0 98 138 12 196.00 27 60.42 CHANGED LpsllthAYslp....shQ.lsGPsWl........ssc+..aDlsA........Kh.....Psuss........pc......ph.tMLQsLLs-RFpLshH+Eo+chslYsLsluKs....GsKl+tsssssssstss.............................................tttphpspphshstLAc....hL.....u..phhs....pPV.lDpTGLsGpYDhsLpas.pt........................................................sssssslhs.AlpcQL...GLKL.cscKtPl-slVlD+sc .............................................ttllthAYslp......s.ph....h.suPsWh........ssp+..aDl...A...........+s...........ss.sss..............pp................phptMLQsLLs-RF.pLphHpEs+chssYsLsluKs....GsKLctsps.sssss.s.s..............................................tstphpspshshsplup............hL.........s.....thls...cPV.lDpTGLsGpYDhsLpassps........................................................sssssslhs.AlpcQL...GLKL.cspKssl.-slVlD+h.................................. 1 74 98 98 +12378 PF12544 LAM_C Lysine-2,3-aminomutase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is typically between 111 and 127 amino acids in length. The family is found in association with Pfam:PF04055. LAM catalyses the interconversion of L-alpha-lysine and L-beta-lysine, which proceeds by migration of the amino group from C2 to C3 concomitant with cross-migration of the 3-pro-R hydrogen of L-alpha-lysine to the 2-pro-R position of L-beta-lysine. 26.70 26.70 26.70 26.90 26.50 26.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.52 0.71 -4.09 14 526 2009-09-23 16:00:04 2009-09-23 17:00:04 3 8 452 4 212 447 50 90.00 46 21.48 CHANGED IEuLRGHTSGYAVPTFVVDAPGGGGKIslpPNYlISQSs-KVVLRNFEGVIToYPEP-sYhs.....tps-shFtph....tpcpp.hGluuLhs-..c.phuLsPcsLpRh-RRcth.tp.ttpohKcpRc++- ......hEuLRGHTSGhAVPTaVlDAPGGGGKlPlh.P.s.Y.l...lS..p.....u...ss+.l...lLRNaEG.hlssYs-..P..t................................................................................................................................ 0 97 161 189 +12379 PF12545 DUF3739 Filamentous haemagglutinin family outer membrane protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF05860. 20.50 20.50 21.20 48.10 18.30 19.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.48 0.71 -3.95 24 96 2009-09-24 09:08:12 2009-09-24 10:08:12 3 4 63 0 46 104 5 111.90 55 3.05 CHANGED GGDIhhWSupGDIsAG+GuKTslsssPsph..l.......hDsp.Gssp...ls.sussoGuGIusLpshss.......s.tsGsVsLlAPpGslDAG-AGIRsuGNlsluAhpllNAsNIp.VsGsssGlPss .GGDIhsWSupGDINAGRGSKTollhsPP.+t..l.......YDsh..GNVo....LsspsPuoGAGIATLsslP-........l..ssGDlDLIAPhGTIDAGEAGIRVSGNlNlAALpVlNAuNIQ..l.pGpooGlPs....... 1 3 17 26 +12380 PF12546 Cryptochrome_C Blue/Ultraviolet sensing protein C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 113 and 125 amino acids in length. The family is found in association with Pfam:PF03441, Pfam:PF00875. Cryptochromes are blue/ultraviolet-A light sensing photoreceptors involved in regulating various growth and developmental responses in plants. 22.80 22.80 23.20 23.10 19.80 22.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.09 0.71 -3.42 20 114 2009-09-24 09:12:39 2009-09-24 10:12:39 3 3 39 0 22 110 0 116.80 39 20.11 CHANGED sPIuFPp-lp.MEhc+p..PlR.ss.........sshs..RRhcDQMVPoM.T......SSlhRs.p-pEhSu-h..pssu..tD.oRAEVPsph..h...........tspsppcphhspssspss.............hppppsh.p..pphhttt.htsSTuESSSShp ....sPIsFPp-lp.ME.scp..PsRsss.........hsss..RRhcDQMVPSh.T.......SSllR...s...p-pEhSsDh..pN.s...s-SRAEVPpsh..h...........psptppc-slspshspss..............ppppsh..t.phppphtttshtsSTuESSSShp.... 0 3 10 16 +12381 PF12547 ATXN-1_C Capicua transcriptional repressor modulator Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 49 and 781 amino acids in length. There is a conserved IQT sequence motif. ATXN1 directly binds Capicua and modulates Capicua repressor activity in Drosophila and mammalian cells. The polyglutamine expanded mutant type of ATXN-1 does not bind Capicua with as high affinity as wild-type ATXN-1. It is associated with spinocerebellar ataxia type 1 (SCA1). 25.00 25.00 25.80 27.40 22.90 23.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.50 0.72 -4.00 5 77 2009-09-24 09:15:23 2009-09-24 10:15:23 3 2 60 0 27 69 0 39.00 78 8.02 CHANGED GLpLGKPVSRSsShosppS.th.....p..p.tu.VsolSPHTVIQTThsuoEuLP ..GLHLGKPGHRSYA...........................LSPHTVIQTTHSASEPLP..... 0 1 4 9 +12382 PF12548 DUF3740 Sulfatase protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 144 and 173 amino acids in length. The family is found in association with Pfam:PF00884. 25.00 25.00 30.50 30.50 21.00 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -3.92 21 188 2009-09-24 09:16:27 2009-09-24 10:16:27 3 4 75 0 95 159 1 142.50 43 16.51 CHANGED hptpas+pRppRul.hch....-splhclshE....cc.hph.pshslhccH..............pccsc+-spp.ssss........sssssshhss.pplpsTh............................................RCalhsN.DpVpCssslYcstcuWKcp+spIDt.IcsLpsKIpsL+El+tpLKcp+Pppssss+t ....................................................................................................+spas+sRptR.S.lu..lEh....-GplYclsL-...ptt.....psRslsKRH..............tcpscp.ch...t..s...sGss.............hhsss.sthussssl+VTH...............................................................................+CaIL.N.DTVpC-p-LYpShpAWKDHKhaID+EIEsLQsKIKNLREVRGHLK++RPEECsCsK......... 0 12 19 56 +12383 PF12549 TOH_N Tyrosine hydroxylase N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. There is a single completely conserved residue G that may be functionally important. Tyrosine hydroxylase converts L-tyrosine to L-DOPA in the catecholamine synthesis pathway. 21.10 21.10 23.20 21.20 20.70 20.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.35 0.73 -6.60 0.73 -4.23 7 141 2009-09-24 09:17:42 2009-09-24 10:17:42 3 8 42 0 40 141 0 23.40 47 13.09 CHANGED PTPshuoPphpGhRpAVSEhDsKQA ................shsSPphhGhRpul.E.stKpt 0 3 5 14 +12384 PF12550 GCR1_C Transcriptional activator of glycolytic enzymes Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. This family is activates the transcription of glycolytic enzymes. 21.70 21.70 23.30 27.10 21.30 21.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.81 0.72 -4.11 69 275 2009-09-24 09:19:23 2009-09-24 10:19:23 3 11 71 0 192 269 0 79.80 27 13.55 CHANGED paphs+shpoVt-lacEahpG..htGpPulpph......-pcaG.spWR....ssppp..phap+RKhlhchIpp............shp.cshstppslchlEphR .......achs+s.poVh-lacEahtG.......ltGpP..olcpl......-cpYG.spWRt...sspcp......+has+.....R+hlhchIpp.................s.p.pshshppslphlEphR....................................................... 0 38 111 186 +12385 PF12551 PHBC_N Poly-beta-hydroxybutyrate polymerase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF07167, Pfam:PF00561. There is a single completely conserved residue W that may be functionally important. PHBC is the third enzyme of the poly-beta-hydroxybutyrate biosynthetic pathway. 25.00 25.00 31.30 31.60 21.60 21.30 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.24 0.72 -4.43 58 186 2009-09-24 09:22:42 2009-09-24 10:22:42 3 3 155 0 73 185 19 45.40 41 7.67 CHANGED ssssshptlD+th+Asl.A+hTuGlSPsulthAahDWuhHLAsSPGK ......s...s.hphlD+thcAsl.A+hTuGLSPAuLshAahDWhhHLAsuPGK........... 0 14 40 58 +12386 PF12552 DUF3741 Protein of unknown function (DUF3741) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. 21.80 21.80 22.30 35.20 21.20 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.17 0.72 -4.24 30 140 2009-09-24 09:27:59 2009-09-24 10:27:59 3 5 19 0 93 127 0 45.00 40 5.10 CHANGED lppKhh-u..K+Lus...DcphppSKEFh-AL-lLsSNK-LFlKhLQ-Pso ................lppKhh-u..KcLuo...cpphppSKEFt-AL-lLsSN+-LFLKhLQ-Pss... 1 11 52 76 +12387 PF12553 DUF3742 Protein of unknown function (DUF3742) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. There is a single completely conserved residue Y that may be functionally important. 20.90 20.90 21.10 21.00 20.10 19.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.06 0.72 -3.51 20 78 2009-09-24 09:30:26 2009-09-24 10:30:26 3 1 60 0 28 76 3 51.80 29 43.65 CHANGED LALL.LsFsllAAWhspssss....................DpsEtt...sEaRpG.uGaGLYcss.hRlD .........................................hALl.lshlslAuWhspsss..............................c.s-...........phRpG.sGaGhYsss.hRlp......................... 0 5 19 26 +12388 PF12554 MOZART1 DUF3743; Mitotic-spindle organizing gamma-tubulin ring associated Gavin OL lg7 Prosite Family The name MOZART is derived from letters of 'mitotic-spindle organizing proteins associated with a ring of gamma-tubulin'. This family operates as part of the gamma-tubulin ring complex, gamma-TuRC, one of the complexes necessary for chromosome segregation. This complex is located at centrosomes and mediates the formation of bipolar spindles in mitosis; it consists of six subunits. However, unlike the other four known subunits, this family does not carry the conserved 'Spc97-Spc98' GCP domain, so the TUBCGP nomenclature cannot be used for it. MOZART1 is required for gamma-TuRC recruitment to centrosomes [1]. 19.90 19.30 21.30 24.10 19.20 18.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.04 0.72 -4.50 48 221 2009-09-24 09:35:15 2009-09-24 10:35:15 3 7 188 0 167 215 0 47.20 48 44.27 CHANGED upcsl-llaclSsLLNT.GLD+csLslslpLlEsG.VsPcALApllpcl+ ......c-shDlLaEISplL....NT.uLD+cTLSlClpLhEpG.lNPE.ALA.....sVl+ELR..... 0 49 82 127 +12389 PF12555 TPPK_C Thiamine pyrophosphokinase C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. The proteins in this family catalyses the pyrophosphorylation of thiamine in yeast and synthesizes thiamine pyrophosphate (TPP), a thiamine coenzyme. 22.70 22.70 24.30 26.70 22.30 22.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.42 0.72 -4.24 50 284 2009-09-24 09:39:24 2009-09-24 10:39:24 3 3 282 0 98 221 0 53.40 32 13.78 CHANGED +uVupLY+sRlSst.tlhhllLAuLlAlssslhlossu..pshhphlssphpshht ...KuVupLYRs+lSsu.ulhhLlLusLlAlhlslhlosss..tshlphlhspWsphh...... 0 44 78 92 +12390 PF12556 CobS_N Cobaltochelatase CobS subunit N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF07728. There are two completely conserved residues (P and F) that may be functionally important. This family is the N terminal of the CobS subunit of cobaltochelatase. Cobaltochelatase belongs to the AAA+ superfamily of proteins. CobS and CobT form a chaperone like complex. 21.50 21.50 21.70 23.20 21.30 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.70 0.72 -4.52 79 346 2009-09-24 09:46:14 2009-09-24 10:46:14 3 3 337 0 109 304 959 35.70 48 11.07 CHANGED spshhstPDpplSVR-lFGIDoDhpVPAFoc.ts-+V .......shsshPDppVSVR-VFGIDoDhpVPAFop.sss+V.... 0 28 67 82 +12391 PF12557 Co_AT_N Cob(I)alamin adenosyltransferase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF02572. Cob(I)alamin adenosyltransferase adenosylates Co(I) in an ATP-dependent manner in the conversion of aquacobalamin to its coenzyme form. This is the third step in this process, after two steps involved in the reduction of Co(III) to Co(I). 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.64 0.72 -6.66 0.72 -3.98 89 1083 2009-09-24 09:49:35 2009-09-24 10:49:35 3 3 996 1 172 551 72 22.10 48 11.01 CHANGED tsstcspRHptRMp+hKpshDp+l ...........sERappRQQ+lK-pVDARl... 0 23 74 122 +12392 PF12558 DUF3744 ATP-binding cassette cobalt transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00005. There is a conserved REP sequence motif. There is a single completely conserved residue P that may be functionally important. The proteins in this family are frequently annotated as ABC Cobalt transporters however there is little accompanying literature to confirm this. 22.50 22.50 22.70 24.00 22.40 18.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.28 0.72 -3.53 55 750 2009-09-24 09:52:23 2009-09-24 10:52:23 3 4 746 0 68 428 1 72.30 36 12.82 CHANGED poslLpchGlREPLYloALKtuGhslsphpclsslssLshsp.hpttlppWhp.ptshhppppppc.............sLLclcsLoa ......oslLppsGIREPLYlTuL+thGlDlsptcpLusL.cslshsc.h...hppa....psp....scpp.ctp.............sLLcLcpVoa.............................. 0 19 34 49 +12393 PF12559 Inhibitor_I10 Serine endopeptidase inhibitors Coggill P pcc MEROPS_I10 Family This family includes both microviridins and marinostatins. It seems likely that in both cases it is the C-terminus which becomes the active inhibitor after post-translational modifications of the full length, pre-peptide. it is the ester linkages within the key, 12-residue. region that circularise the molecule giving it its inhibitory conformation [1, 2, 3]. 20.50 20.50 26.20 22.20 18.00 18.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.17 0.72 -3.69 18 45 2009-09-25 12:15:30 2009-09-25 13:15:30 3 2 25 1 16 49 1 49.40 44 91.94 CHANGED Msps.........p..p.cAl.PFFARFL-pQ..................................t.tstsstpsp.s.h.ThKYPSDhE-s ..................psppsKAl.PFFARFLpspp...............................t.ps.psssscssh.sThKYPSDWE-.. 1 2 7 16 +12394 PF12560 DUF3745 Protein of unknown function (DUF3745) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00097, Pfam:PF10426. 21.20 21.20 21.20 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -7.87 0.72 -4.29 99 1647 2009-09-25 12:31:12 2009-09-25 13:31:12 3 7 890 0 1 1687 0 38.20 45 8.84 CHANGED ANs-KGKsAASLDKVs....DshsLhsQc.PFcpcppLNsuhQo ...ApppptcsAASLDKVsEpp.-ssslhsp..PFcoDp-LNpuhQT...... 0 0 0 1 +12395 PF12561 TagA ToxR activated gene A lipoprotein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF10462. There is a conserved GAG sequence motif. This family is a bacterial lipoprotein. 25.00 25.00 41.50 40.70 18.80 16.70 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.80 0.71 -4.17 9 172 2009-09-25 12:32:18 2009-09-25 13:32:18 3 8 73 0 7 89 0 135.30 45 17.05 CHANGED a-h.ts.p.th.spGW.passslssppltpsp...WpThplsspp.hlC+Fsa.sssGcptsFVGaV-...sssthCpuuc-hpap.suppp.h.Sp.sDYpLLS.hGc..GpV.oYsPss-lGEssLCoLs+sGss....GAGFlsss .......F-L.pPcp.EhpLcGW.Qtsssls.tp...h....Nsp......WpTMhls...sppphICRFsYhussGcphpFVGals...tpcplCoGGR-l+ahp.Dtpp..I-SphsDYELLShhGc..GpV.oYsPsspIGEspLCoLspsuss....GAGFl+s.t. 0 3 3 5 +12396 PF12562 DUF3746 Protein of unknown function (DUF3746) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF04595. 21.80 21.80 32.70 37.40 18.20 17.30 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.19 0.72 -7.77 0.72 -4.10 9 53 2009-09-25 12:33:09 2009-09-25 13:33:09 3 1 36 0 0 43 0 36.10 65 9.39 CHANGED SoFssPpL+ssspsp....sC+KpsFsNSpaTTRTssal ....SSFPVPTIKSVTNKKK...ICKKHCFVNSQYTTRTLSHI 0 0 0 0 +12397 PF12563 Hemolysin_N Hemolytic toxin N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF07968, Pfam:PF00652. This family is a bacterial virulence factor - hemolysin - which forms pores in erythrocytes and causes them to lyse. 22.50 22.50 25.00 24.50 17.50 17.40 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.08 0.71 -4.86 7 120 2009-09-25 12:33:59 2009-09-25 13:33:59 3 4 81 15 5 93 2 176.30 68 28.55 CHANGED .KlNpptssh.lslhSulusssshAsIspPsGtAlphhSplpssptlhYhNAu.W.sEcsph.......oLsplR-pVlNQphRhhlDFStIpspspp.sphpsphRpphGluFussFllIoEHKGELLFTPhDss-D.........lsstLLEAsttpRsh..ot.sott...sossETsoLPHVAFYlNVNRsISDpECTFsNS ..........................Kh.ppt.sl.hTILSAlS.SsTlhANINEPSGEAADIISQVADSHAIKYYNAADWQAEDNAL......PSLAELRDLVINQQKRVLVDFSQISDAEGQ.AEMQAQFRKAYGVGFANQFIVITEHKGELLFTPFDpAEE.........VDPpLLEAPRTARLL..uRSGFAS..PAPANSETNTLPHVAFYISVNRAISDEECTFNNS..... 0 1 2 4 +12398 PF12564 TypeIII_RM_meth Type III restriction/modification enzyme methylation subunit Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF01555. There are two completely conserved residues (F and S) that may be functionally important. This family is a bacterial phage resistance protein. It functions in a type III restriction/modification enzyme complex. It is part of the methylation subunit of the complex. It binds DNA and methylates it. 25.00 25.00 29.50 29.20 19.80 19.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.66 0.72 -3.99 21 216 2009-09-25 12:35:04 2009-09-25 13:35:04 3 4 186 0 19 219 15 56.10 43 11.50 CHANGED hDcpLlphLLpspp....lKppFFpcl.sshhlFchpcFlchlc.Kca......Lss..SaT+apNKIGL ......D.pLlshLLps-p....lKcpFFhcl....sssLVFchpcFh.hL-p....+p.......lss..SYT+YsN+IGL.. 0 7 10 14 +12399 PF12565 DUF3747 Protein of unknown function (DUF3747) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 215 and 413 amino acids in length. There is a conserved DSNGYS sequence motif. 25.00 25.00 141.80 141.50 20.50 20.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.03 0.71 -5.21 38 86 2009-09-25 12:38:19 2009-09-25 13:38:19 3 4 48 0 36 105 45 180.40 40 57.40 CHANGED hthhhphsslsshslsshhs..ssupAuh.FspptlsQscalslApP...hG..sspapLLllEQI...pspptCWpEpussPs.lV-PLLLsFD...FTGICsRuoDSNGYSlRhuGpDlGhsYpLRlhppss-LlLh....uhs.t.ssssst.lllGRopGh.s...sG.FlKlpLpPGWchs+RoYpG+sLGHlYhusspsss ............hh.....hsslsshslsuhhs..ssspAuhFspptl-QscalslAtP...hG..sspapLLllEQI...ssp+.CWs-sGusPs.hV-PLLlsFD...FTGICsRusDSNGYSlRlsGpDLGhcYhLpllppsu-LlLl....usspt..ssstsp.lllG+opGhu....sG.FlKlpLpPGWphs+RsYpG+sLGHlYhsssps.s.... 0 2 21 32 +12400 PF12566 DUF3748 Protein of unknown function (DUF3748) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 120 amino acids in length. 21.70 21.70 22.00 27.50 19.90 19.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.20 11 511 2009-09-25 12:39:24 2009-09-25 13:39:24 3 7 505 0 47 269 3 117.80 83 28.83 CHANGED AaVGVVTVSPspP.RYsFIHGPEpPDspWpYDFHHRRGVlVt..psGpApsLDAhDITsPYTsGALRGGSHVHVFSPDGsRLSFTYNDHV..MHEhDsphDhRNVGVAlPhpsVss.sKpHPREY ..........AHVGVVTVHPcuE.....K.YVFIHGPENPDETWHYDFHHRRGVIs...EuG....+VoNLDAMDITAP.YTPGALRGGSHVHVFSPNGERVSFTYNDHV..MH.....EhD..........PA....LDLRNVGVAAPaG......P.VNV....QKQHPREY................... 0 4 15 30 +12401 PF12567 CD45 Leukocyte receptor CD45 Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 77 and 1130 amino acids in length. The family is found in association with Pfam:PF00041. CD45 plays a critical role in T-cell receptor (TCR)-mediated signaling. CD45 interacts with SKAP55 which is a transcriptional activator of IL-2. 25.00 25.00 25.60 30.40 24.90 17.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.26 0.72 -4.12 10 83 2009-09-25 12:40:21 2009-09-25 13:40:21 3 19 34 0 13 87 0 60.40 47 7.86 CHANGED TVDYLYscpTKLFTAKLNVNEsVcCsNs.....sCTNNElpNLsECcp.toVolSHNSCTsPsKpLp .oVcYhYsppoKhFTAcLNVs.-sVcCsss......sC.sNElpNLsECcp.tsVolSHsSCssPtKpl..... 0 1 1 2 +12402 PF12568 DUF3749 Acetyltransferase (GNAT) domain Gavin OL, Bateman A lg7 Prosite Domain This domain family is found in bacteria, and is approximately 40 amino acids in length. The proteins in this family are acetyltransferases of the GNAT family. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.53 0.71 -4.54 21 619 2012-10-02 22:59:21 2009-09-25 13:41:38 3 2 602 1 54 719 54 121.30 65 95.94 CHANGED KLTIppLs....phSsQDhIDLuKIW.........Ppps.ptLpttl......sssppLaAARFN-RLLuAscVplpsp..pupLscLpVREVTRRRGVGhYLl....-EshRph..PplppWhhsttshp..chsshssFhpAsGF..stpsstWp ..........................................................................KLTIlRLE....pFS.c.QD+ID..Lt...K.I..W.................P.EYSsS.SL..p..V........D.-.s.HRI.Y.A..A..R.F..N..E...R.L.....Lu..A.V......R..V......T......L......S......G.....T........p..GA......LDSLRVREVTR.RRGVG....QY....Ll....EEV.....LRsN.....P.s.V..Ss.W.W.M.A.D..sGVE..DRuVMsAFMQALGF..TAQpsGWE....................................... 0 7 19 36 +12403 PF12569 NARP1 NMDA receptor-regulated protein 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF07719, Pfam:PF00515. There is a single completely conserved residue L that may be functionally important. NARP1 is the mammalian homologue of a yeast N-terminal acetyltransferase that regulates entry into the G(0) phase of the cell cycle. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 517 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.82 0.70 -5.75 32 545 2012-10-11 20:01:02 2009-09-25 13:43:38 3 42 309 0 357 564 23 365.10 30 57.91 CHANGED hEaSEhlLYpsplltEuGphEcALc+L..cphpppllD+LAlhEh+uchLlcLGRppEAptsYcsLLcRNP-NhpYYcuL.csh..........t..................hspt...p.pshhslYcphsppa.P+usAs+RlPLcFhp.................G-cFcphs-tYlcphLpKGVPSLFsslKsLYs.cspKhsllp-LlptYhp...shpssuphstps....p.ptcssoshhashYaLAQHYsa....hts.......p-..KAlcaI-pAIppo.Pohl-hahsKARIhKHuGDltcAAchM-cARpLDttDRYINoKsAKYhLRsscsccAtchhuhFTRpts..ushssLp-MQChWa.hEsucuahRpt+hGhALK+apsVp+hFsphhEDQFDFHoYslRKholRAYVchLRaEDpLRsHPaYh+AAtsAlcsYlpLaDpP...hsps.......ph..u.........shsssEcKKhtc...Kt+Ks.pp+tcc-ptct.ttppt....tp..ttttttKttsscsct.Ds...DPhGpKLhpop..-PLc-AhKFLpPL.phuspslEsplhuFEVahR+pKhLLALpslppA ....................................................................................................................................................E.SEhhlYp.s.lh......c.....t.......s.hpp....uLphl.....pp...ttphhDthth.Ehp..u.phhhpLs..ph......p-.AtthaptL..phNs.-s..Yattl.psh............................................thhth...ap...ph..t.h....s...p..s..ss+.h.ls.Lphh.......................s.p.p..F.pphhptal.....h..p+.......G.hPshFssl....+....L........Y.......p.........t........K....h....t..h..lt......plh.th.t...................p..t.pt....................p..ss..hh...s.hhLAQHash......htp..........p..hAhph...lp...tAIt..........s...Ps....h.l...-..hahhKu+IhK......+hGph.pAs.phh-pAptLDhtDRalNsKs.sKYhL+s............s....................p...........cA.phhshF.T+tt...................sshtsL.-hQshWa.hcsutua.t..............................t.p..h.................u.ALK+hht.l.pha..h.-DQaDFHsa.shRKhphRuYlphlchpD.lhtp.hah+us.....h.shph.Yhtlh-...p............ttt................t......................t......p...cch.p....p.ppt.t.p.t...tt........................................................t............p...s.t....l.......t.............t.htt.h...p.hh..h...s......t.............s..h..hc....h..t...tt.h..h..................................................................................................................................... 0 119 195 289 +12404 PF12570 DUF3750 Protein of unknown function (DUF3750) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 175 and 265 amino acids in length. 25.00 25.00 36.40 98.40 19.70 17.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.90 0.71 -3.88 33 129 2009-09-25 12:44:47 2009-09-25 13:44:47 3 1 127 0 59 131 32 131.70 45 55.96 CHANGED tpAlVpVYAA+shuWRGhaAlHoWlls....KtsuuspYsRa-V........lGWGh.....sl+p.sthsPDutWaGutPcllhshcGctAcplIspIcsAlpsYPass..pYcsaPGPNSNTFlAalh+pVPpLplsLPssAIG+DY ...pAlVpVaAApshuWRGhhAVHsWIlh....KctGuspYsRY-V.............luWGt.....slRp.sshsPDuhWaGupPcllsshcGtpApplIPcIcsAlpsYPass..sY+sWPGPNSNTFlAalhRpVP-LplsLPssAlGKDY 0 13 26 39 +12405 PF12571 DUF3751 Phage tail-collar fibre protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is approximately 160 amino acids in length. There are two completely conserved residues (K and W) that may be functionally important. The members are annotated as being putative phage tail or tail-collar proteins. 23.00 23.00 23.10 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.80 0.71 -4.40 47 961 2009-09-25 12:45:44 2009-09-25 13:45:44 3 33 662 0 136 906 8 148.70 39 30.54 CHANGED M...u.....sllTptGcphlAppsutGpslplsphlhAsssG.....s.hssspshsshsphVacsslsph..uhhNsNtVlhshllssslGsF.ashlGLh..sssusLhulspsPpphKhpstpGs...Gpslscshllpao..sspplTtlplssssW.hshtAch.pshD ..........Ms..cahollTshGts.+l.AsAs.A.h..G.p...l.p.lopMAVGDGsG.............shss..P.s..ssQ...T.t.Llpc.hhRuslNpl.....lDsp.N..s..s.tl...l..A.EhlI.P..p..s......h..G...GFalRElGLa....D-sG..sLIAVuN.s.P.E...o.YK...Pt.ltEGS....G+spThRhllhlS..ssssls.LplDsss....lhATpcYVD......................................................... 0 19 59 98 +12406 PF12572 DUF3752 Protein of unknown function (DUF3752) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 140 and 163 amino acids in length. 22.30 22.30 22.30 22.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.04 0.71 -4.09 41 249 2009-09-25 12:46:56 2009-09-25 13:46:56 3 8 209 0 181 244 1 139.20 25 38.15 CHANGED PPpppshtuhhsss.....RpFspu....+sut.........ssSsWT-TPt-+tcRhppphhuhpstst..........sttttttpsppstc.......ttlcpYscp.pRucSLlppHpccpppptpp..................................pppsssstRsFD+-+Dhplspp..sptpp+chlp+u.tshusRFus ................................................................................RpFpptt......tttts..............spShWT-TPt-+t+....+hp.pp..uhps.s.....................pttpp.thhpt.pcpc................htpplppascp...pRu.cSLh-hHpcchcppttp.....................................pppss.p....p+s..FDR-+Dlpssph....spsp++pllp+u.pplsoRFup...................... 0 62 95 141 +12407 PF12573 OxoDH_E1alpha_N 2-oxoisovalerate dehydrogenase E1 alpha subunit N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00676. There are two conserved sequence motifs: VPEP and RPG. This family is the alpha subunit of the E1 component of 2-oxoisovalerate dehydrogenase. This is the enzyme complex responsible for metabolism of pyruvate, 2-oxoglutarate, branched chain 2-oxo acids and acetoin. The E1 component is a heterotetramer of alpha2beta2. The homodimerised beta subunits are flanked by two alpha subunits in a 'vise' structure. 25.00 25.00 28.60 28.20 19.60 18.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.06 0.72 -4.41 28 200 2009-09-25 12:48:08 2009-09-25 13:48:08 3 1 198 5 52 156 12 42.80 58 10.39 CHANGED tssLpLHVPEPssRPG-pPDFSplcIscAGul..RPslsssPt- ...h..sLpLHVPEPssRPGspsDFSal+lstAGsVR.RPsIDspPt..... 0 9 26 39 +12408 PF12574 120_Rick_ant 120 KDa Rickettsia surface antigen Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. This family is a Rickettsia surface antigen of 120 KDa which may be used as an antigen for immune response against the bacterial species. 21.00 21.00 22.70 21.60 20.20 19.80 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.66 0.70 -5.28 7 128 2009-10-28 17:27:32 2009-09-25 13:49:09 3 1 98 0 11 123 1 227.80 70 29.18 CHANGED lL+-hhs.hs.-LAEphthc.....-cD++hcsFh....tNsspRphlspAhEssEhKKtLEsIEIsGY+NlhsoaSA........................................................................tstYtGGF+PlQWcNpl.SAusLRuTVlcNDAG-ElCTLsEpThKTpPhhlAKQDGTpVplsSYRpIDFPIcL-c.AsGoMHLShVAhcsDGppPucs+AVYFTAHYEps..PNGpPpLKElSSPpPLKFhGsss-AlAYIEHGGEIYTLsVTRGKYcEMMKEVtlNpGQuVDLSQ..tpDlhcVQGpup ...........................................................ILKDLAALTDRDLAEQKRKEI..EcEKDKTLSsFF....GNPANREFIDKALEsPELKKKLESIEIAGYKNVHsTaSA........................................................................AsGYsGGFKPVQWENpV.SASDLRATVV....KNDAGDELC.TLNETTVKTKPFTlAKQDGTQ.V.QIoSYREIDFPIKLDK.ADGSMHLSMVALKADGTKPSK.DKAVYF..TAHYEEG..PNGKPQLKEISSP.p.P.LKFAGTGDDAlAYIEHGGEIYTLAVTRGKYKEMMKEVcLNpGQSVDLSQ..A...EDlhhsQGtSp................... 0 1 2 2 +12409 PF12575 DUF3753 Protein of unknown function (DUF3753) Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are approximately 70 amino acids in length. There is a conserved YLK sequence motif. There are two completely conserved residues (D and F) that may be functionally important. 21.60 21.60 21.60 69.00 21.40 21.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.56 0.72 -4.18 14 56 2009-09-25 12:50:08 2009-09-25 13:50:08 3 1 43 0 0 36 0 71.00 54 97.91 CHANGED MDKLYsulFGVFhoSsD-.DFppFI-lV+SVLoDcps.tp.pss.s..t..hlllsllhlllllll..hFlYLKlh MDKLYAAIFGVFMuSs.-D...Dhs-FIEIVKSVLSD-Kosss.osssss.ashaaLIIlhhlVlIllLL..laLYLKVV. 0 0 0 0 +12410 PF12576 DUF3754 Protein of unknown function (DUF3754) Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is typically between 135 and 166 amino acids in length. There is a single completely conserved residue P that may be functionally important. 21.50 21.50 22.50 21.80 19.60 21.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.87 0.71 -4.55 22 186 2009-09-25 12:53:38 2009-09-25 13:53:38 3 4 103 0 108 188 16 150.90 23 30.01 CHANGED a-clllhh+...........................................hcssph.tt......................................tppphshtssplhl+lFcslPcsDL-hlFPNt+lshphhDplhlslsulhushslllphlht.hlhlsshh.hh........................hhtslhuhssLuuaha+passYKs+ph+apptlscsLaFKsLssNuG ...............................................................................................................................................................................................................................................ttpttslhlKtFKsIPhsDLEhlLPcp..c.lphs.hDtlhlslsslluhlslhsshhthh.......s..............................hhsslhllssh.s.hhsh+th.h..sFppphspap.hhlscsLY.KslssspG............................. 0 31 67 86 +12411 PF12577 PPARgamma_N PPAR gamma N-terminal region Gavin OL lg7 Prosite Family Peroxisome proliferator-activated receptors (PPAR) are nuclear hormone receptors that control the expression of genes involved in lipid homeostasis in mammals. This sequence region is found at the N-terminus of these proteins. The family is found in association with Pfam:PF00104, Pfam:PF00105. It is not clear if this region is a separate protein domain. 25.00 25.00 27.10 26.10 21.70 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.07 0.72 -3.67 8 132 2009-09-25 12:56:48 2009-09-25 13:56:48 3 3 66 0 30 125 0 83.90 64 18.98 CHANGED MVDT.QhhuWP.VGFGLSulDLsEL-DcSHSLDlKPFoTlDYoSISS...................hcY-ssPsps-.hs+sMDhophYuYch.....................................+hppsQsSIKL ..MVDT.EMPFWP.sNFGISSVDLSsMDDHSHSFDIKPFTTVDFSSISo...................PHYEDIP.........FsRsDPhVsDYKYDL.....................................KLQ....EYQSAIKV....................................................................................................................................................... 0 1 3 10 +12412 PF12578 3-PAP Myotubularin-associated protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 115 and 138 amino acids in length. Myotubularin is a dual-specific phosphatase that dephosphorylates phosphatidylinositol 3-phosphate and phosphatidylinositol (3,5)-bisphosphate. 3-PAP is a catalytically inactive member of the myotubularin gene family, which coprecipitates lipid phosphatidylinositol 3-phosphate-3-phosphatase activity from lysates of human platelets. 20.40 20.40 20.50 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.57 0.71 -4.33 21 204 2009-09-25 12:59:09 2009-09-25 13:59:09 3 9 92 0 111 195 0 124.90 25 18.37 CHANGED p.pth....s.........hcslssttpph.tss..hht.h.ph....ssspsp.tstspph.ts..ths.......p.pphLpPtptshplplWsQCYhRWhPhtplpsGG.spl.hphphhhsc..l...pclpchlcppchtplsssphc ........................................................................................hP.s.p....tpt.hh..pp.h.hp.......sp......p..sp.tss.cphhcphauhP............ss..cGlLLPt...........hpGscl+lWppCY.LRWlPEsQI.ttGG.sts..p.lp.Lh-E.....l....ppLpctlcptp.t........t................................... 0 19 30 63 +12413 PF12579 DUF3755 Protein of unknown function (DUF3755) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. There is a single completely conserved residue N that may be functionally important. 25.00 25.00 27.80 27.10 24.90 23.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.40 0.72 -4.56 27 109 2009-09-25 13:00:09 2009-09-25 14:00:09 3 2 21 0 64 100 0 34.90 40 12.41 CHANGED ushp.lp-NlsLhppsRcNIhslLpchs...phPslMsp .....shp.hp-NlsLFp+sRcNIpslLsc.Ms...phPGIMsp........ 0 11 34 49 +12414 PF12580 TPPII Tripeptidyl peptidase II Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF00082. Tripeptidyl peptidase II (TPPII) is a crucial component of the proteolytic cascade acting downstream of the 26S proteasome in the ubiquitin-proteasome pathway. It is an amino peptidase belonging to the subtilase family removing tripeptides from the free N terminus of oligopeptides. 25.00 25.00 29.30 28.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.14 0.71 -4.99 24 162 2009-09-25 13:02:15 2009-09-25 14:02:15 3 9 122 1 112 157 4 190.60 42 15.09 CHANGED l-lp.u.LtsEclpPslsLcphthsh+Psp.uKIpPLu.sRDllP.sG+QlaplhLTYphp.........................ls.cuuEVssphPhlschLYEucFESQhaMlFDsNKphlusGDuYsp................................hKL-KG-YTl+LQlRH-ppplLEKlK-hslhlppKL.s.s.lsLclasshssslsG..stcassttltsGpspshYl.sslss-KLPKsshPG .........csp.osLphE-.lsPsloLKs............hspsLRPsp.uK....lpPL...............u..sRDl.LP..ssRQlYp............hl.....LTYsF+..........................s.KuuEV.sPpsPlLs...-hLYESEF-SQhWhlFD.p.NKphhusGDAYPc.............................pas.h.KL-.KG....-.YTlRLQlRHEphshLE+L.K-l..shllsc+L..u.ssloLDlapshs..............tAlhG......ppKh..s.shsLsPt............hspshal.ssl.s.cD.KlPKsssP....................... 0 42 59 89 +12415 PF12581 DUF3756 Protein of unknown function (DUF3756) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 40 amino acids in length. 25.00 25.00 36.30 34.80 17.80 16.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.98 0.72 -4.27 18 76 2009-09-25 13:03:18 2009-09-25 14:03:18 3 4 5 0 0 68 0 40.50 88 2.91 CHANGED lASGYRTN..ALVAPQAKISIGAYAAEWALSTEPPPAGYAIVR .LASGYRTN..ALVAPQAKISIGAYAAEWALSTEPPPAGYAIVR 0 0 0 0 +12416 PF12582 DUF3757 Protein of unknown function (DUF3757) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 154 amino acids in length. 21.10 21.10 25.60 24.40 20.60 20.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.02 0.71 -3.92 25 155 2009-09-25 13:04:14 2009-09-25 14:04:14 3 2 95 0 19 82 0 109.00 29 78.44 CHANGED sspsCPulssI.pts........GsYoA..ssssscWhGs..pG.hsptp..lppFppAhhl.tss.t.....t.Gph.pCoYpl.spstplDhhap......spshhholps.pst....Wcpppssh...h.hh.Co..sssscpCpFc .........t..ppCPshstIp..t......GsYsu....sstpWhG...pss.spts..lppF..Ahhlstst.......s.sth.pCTYtl..tstplDhhat....................hspshhholhs.psh....Wctppssh...hp.hhCo..ssusppCpFs..... 0 2 7 12 +12417 PF12583 TPPII_N Tripeptidyl peptidase II N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF00082. Tripeptidyl peptidase II (TPPII) is a crucial component of the proteolytic cascade acting downstream of the 26S proteasome in the ubiquitin-proteasome pathway. It is an amino peptidase belonging to the subtilase family removing tripeptides from the free N terminus of oligopeptides. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.97 0.71 -4.04 7 69 2009-09-25 13:04:59 2009-09-25 14:04:59 3 7 30 1 23 76 0 126.70 54 20.09 CHANGED sGuuNsuususuTsAAAA.AsTssusKPKssus..ttsuh..sssAuGDGVssQo-sPsps.sssPuSPKKGKosuD-YuEuLRDFQCoaIsKs-hEhAEKIYp-VltAHPKHLtAHLhLIQNIESspLK.spLPLsFssupcsp ...........................P.PAKKoSSTANATTGAAs.AlTNsATNGN.VANA........GSNGT....GNNVITATNGAA..NGSlPNGTAVK.ENRSKW.DEYCEGLRDYQTAQISKLDA.ENA....ENVYQALLKDNPNHLA...AHLAMADHFDS.TDLK.Q..NLPYTFTAShD.u....... 0 9 12 21 +12418 PF12584 TRAPPC10 DUF3758; Trafficking protein particle complex subunit 10, TRAPPC10 Gavin OL lg7 Prosite Family This domain forms part of the TRAPP complex for mediating vesicle docking and fusion in the Golgi apparatus. The fungal version is referred to as Trs130, and an alternative vertebrate alias is TMEM1 [1,2]. 21.60 21.60 21.60 21.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.90 0.71 -4.70 32 265 2012-10-04 00:47:01 2009-09-25 14:07:17 3 10 233 0 196 280 1 163.40 21 14.07 CHANGED slslPslphlhsssht.hppssh........................................................................tVGp.lshplplcp.pphWss.t.t..............................p.sh....chhYcl..sss-sWhluG+++Gphph.................ppssphp......hslhllPLtsGaL.hPplplpshs...........................................................tt..sscl..psuuppllVl ........................................................................................................................................h.hP.lphhasspht....p.tt............................................................................hspsGphhshplplpp.sp.hpst.....................................tttst.......chhYEl...hsssssWhlsG+p.pGshsh................................tpssptp............lslhllPLpsGaL.shPslclhphh..............................................................t...ssps..pshu.plhV......................................... 0 49 96 159 +12419 PF12585 DUF3759 Protein of unknown function (DUF3759) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 107 and 132 amino acids in length. There is a single completely conserved residue H that may be functionally important. 25.00 25.00 49.70 48.60 23.30 22.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.07 0.72 -4.08 39 128 2009-09-25 13:08:28 2009-09-25 14:08:28 3 3 85 0 89 132 0 92.90 46 72.33 CHANGED psacpVh..st........pHcu+hSHELlAGAAuFEAhKsaE-+pc+....................pGKPsSHAhAKElLAGhAGAtlD+lhET.KGLD...alD+-cAK+cAccpscchhcpcY ..............t.tapplh..st......pHcu+hSHELlAGAAuFEAhKAaE-Hpc+....................pGK..PsSHAhAKElLAGhAGAtlD+llET.KGLD...alD..+-+AK+cAccpAcchhsppY................. 0 33 54 76 +12420 PF12586 DUF3760 Protein of unknown function (DUF3760) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 46 and 64 amino acids in length. 25.00 25.00 26.80 25.40 23.90 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -8.25 0.72 -4.12 15 44 2009-09-25 13:09:30 2009-09-25 14:09:30 3 2 3 0 13 37 0 51.60 31 15.26 CHANGED oL.................sPL.ssVpclIh-cLutlsP..lphLplS...........................................+haY.....................+cllPplY+sVsls ....................LssL.tsVpclIh-pLstlsP..lphlplS...........................................+haY.....................+chlPhlY+pVsl.. 0 13 13 13 +12421 PF12587 DUF3761 Protein of unknown function (DUF3761) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 100 and 157 amino acids in length. 22.00 22.00 22.10 22.00 21.90 20.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.51 0.72 -3.87 9 145 2009-09-25 13:10:56 2009-09-25 14:10:56 3 3 109 0 45 146 5 71.50 43 53.39 CHANGED +tsslsuuLhAshhhhshsAaAhs.............stusLspcspYhN+DGtsVHuPA+sh..psPsGATA+C+DGoYSFSpH++GTCSGHGGVspWh ..............................................................................................t............................................................hpAPA.....p....s.P.sGs.T....A.....hC+DGoaShutp..+.+GsCSGHGGVssWh... 0 8 21 32 +12422 PF12588 PSDC Phophatidylserine decarboxylase Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF02666. Phosphatidylserine decarboxylase (PSD) is an important enzyme in the synthesis of phosphatidylethanolamine in both prokaryotes and eukaryotes. 23.40 23.40 23.40 23.60 23.30 23.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.88 0.71 -4.57 45 205 2009-09-25 13:12:22 2009-09-25 14:12:22 3 6 143 0 118 199 5 133.40 35 31.88 CHANGED LtPslp-hpcLI.......EscstlhhLhspMFpp.........tsssGp..plcsacchLpllNtllspAPpass...........suLlGhPlNAlLDWPMuTsuGashFhcPplNtpLK+lLstWupFLsoPcSs..pVLsss.pt........GWFussAhpphpp ..............tsslp-hppLl.......EssshlhMhhspMhc-...............tsssG.p...t..lcsapchLpllstlh.T.p.APpasp...........sGLlGhPlNAlLDW.................PMsTsuGauhFhcspVNtplKclLshW..s.p.FLp....oPcSt..tsLsss..s........GWhuspAhpph..t........... 0 34 64 91 +12423 PF12589 WBS_methylT Methyltransferase involved in Williams-Beuren syndrome Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 72 and 83 amino acids in length. The family is found in association with Pfam:PF08241. This family is made up of S-adenosylmethionine-dependent methyltransferases [1]. The proteins are deleted in Williams-Beuren syndrome (WBS), a complex developmental disorder with multisystemic manifestations including supravalvular aortic stenosis (SVAS) and a specific cognitive phenotype [2]. 21.00 21.00 26.60 25.20 19.90 19.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.29 0.72 -3.50 79 336 2009-09-25 13:14:03 2009-09-25 14:14:03 3 6 300 0 234 315 1 79.90 30 28.36 CHANGED LhsG...sh.pt.spsts..p..................t.pph.httpp.pttpppct+th.cp.....u.K-WIh+KKEphR++..G+.cVtsDSKYTGRKR+s+ ......................................................G.........sth.s..........................p.pph.t.hs.t.pct.ptp.+pt.c.tcth.cp......................o.+sWIlcKKEphRRp..G+.cV+s..DSKYTGRKR+s.... 0 82 131 194 +12424 PF12590 Acyl-thio_N Acyl-ATP thioesterase Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is typically between 120 and 131 amino acids in length. The family is found in association with Pfam:PF01643. The plant acyl-acyl carrier protein (ACP) thioesterases (TEs) have roles in fatty acid synthesis. 22.00 22.00 50.00 47.30 20.20 19.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.99 0.71 -3.68 24 113 2009-09-25 13:15:00 2009-09-25 14:15:00 3 2 47 0 24 123 0 119.70 58 29.72 CHANGED M.VAouAuSAFFPVs.Sssssusu..........t.....GphssSLs..GlKuKss.souuhQVKANA....pAsPKlNGopVuhpss.cshcp-s...sss...SssPRTFlNQLPDWSMLLAAITTIFLAAEKQWhMLDWKP+R..PDMLlD ....MsAo..sAs.SuFFPls.osussssu.............tp...hGphssolshtGltuKss..ssGuhQVKAsA....QAsPKlNGopVslhss..phpp-c...sss...ussPRTFlNQLPDWSMLLAAITTIFLAAEKQWhMLDWKP+R..PDMLlD......... 0 1 12 19 +12425 PF12591 DUF3762 Protein of unknown function (DUF3762) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF05533. 21.90 21.90 22.20 51.60 21.50 17.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.61 0.72 -3.95 2 72 2009-09-25 13:15:54 2009-09-25 14:15:54 3 3 1 0 0 63 0 78.90 86 6.53 CHANGED .PSMVAIPVPIuFGshssTAWCS.uDAAVLRCRLsYHAAETsFpspcKHVRYVYNDVSSAANRPRTVSsRKCGpVFPStS ..PPSMVAIPVPISFGshPTTAWCS.A.DAAVLRCRLDYHAAETSFRNE.GKHVRYVYNDVSSAANRPRTVSPRKCGRlFPSGS.... 0 0 0 0 +12426 PF12592 DUF3763 Protein of unknown function (DUF3763) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF07728. There is a single completely conserved residue F that may be functionally important. 21.80 21.80 21.80 23.50 21.40 20.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.56 0.72 -4.34 26 649 2009-09-25 13:17:07 2009-09-25 14:17:07 3 2 645 1 56 296 0 56.90 57 11.46 CHANGED tssphhppLschcpcl+cpRp.......tFpppQPplFIsschLstIEuSLhplscplcphppp ...........h.PuEIKQQLEcLEsDWR+QHs.......hFSEQQ+CLFIsuDWLGRIEASLQDVGtQIRQAQQC... 0 3 14 34 +12427 PF12593 McyA_C Microcystin synthetase C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF08242, Pfam:PF00501. There is a conserved YAN sequence motif. Microcystins form a large family of small cyclic heptapeptides harbouring extensive modifications in amino acid residue composition and functional group chemistry. These peptide hepatotoxins contain a range of non-proteinogenic amino acids and unusual peptide bonds, and are typically N-methylated. They are synthesized on large enzyme complexes consisting of non-ribosomal peptide synthetases and polyketide synthases. This family is made up of the C terminal of microcystin synthetase, one of the proteins involved in this synthesis pathway. 25.00 25.00 53.30 52.40 17.40 16.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.02 0.72 -4.20 4 77 2009-09-25 13:18:40 2009-09-25 14:18:40 3 12 60 0 1 68 0 39.10 66 5.19 CHANGED DVIFspcQspt..+hsLpsFTsTp.Qt....DWQhYANpPLQP+L DVIFs.hpsp...+tshhsFTPTH.pAKP..-WQhYANHPLps+L 0 0 1 1 +12428 PF12594 DUF3764 Protein of unknown function (DUF3764) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 89 and 101 amino acids in length. 25.10 25.10 25.20 29.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.71 0.72 -4.25 25 55 2009-09-25 13:20:22 2009-09-25 14:20:22 3 1 33 0 17 59 799 83.20 36 88.41 CHANGED hpToVhTFclossFsEWsthaDupc.pthpcphGIpsLYRGVSc-DPpKlhVlhQA.E.GshppFhpssp..-hIcuuGHlh-oT.hos .....EToVhsFclossF-EWtthaDup-.pthacphGlpsLaRGhSp-DPp+lhVlhQus-.Gshpphhpssp..chlpuuGHlh-oThho.h.. 0 1 7 13 +12429 PF12595 Rhomboid_SP Rhomboid serine protease Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 210 amino acids in length. The family is found in association with Pfam:PF01694. Rhomboid is a seven-transmembrane spanning protein that resides in the Golgi and acts as a serine protease to cleave Spitz. 25.00 25.00 61.90 56.20 22.80 22.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.56 0.70 -4.73 31 130 2009-09-25 13:20:38 2009-09-25 14:20:38 3 5 50 0 58 113 0 199.70 59 25.43 CHANGED VSKDs-ST.Q+WQRKSlRHCStRYG+LKsQVhREL-LPSQDNlSLsSTETPPPLYlsspp.....hGMQKIlDPLA...RGRAFRhs--.sD..G.SsPHT......PlTPGAASLCSFoSSRSGasRLPRRRKRESVA+MSFRAAAALlKGRSlhDuThpRsp..RRSFsPASFLEEDosDFsD-LDTSFFuR-shhp..EEhSohPD-VFESPu-uAhKphtps..s-pssLTGuALD.....+ .......VScD.-up.Q+WQR....KSl+HCS.RYGKLKsps.REL-LPSQ-ssShpuTEoPsPhhls................KIlDPLA...RGRAFRhsD-.sD.....t.psP...Hs..............PlTPGshSLsSFoSsRSGas+LP.RRKRpSVA+MSF+AAAALlK..GRS..Vh-..u..T.h.pRsp...+RSFs.sSFlEEDsVDhsDphDoSFF..u.......+..hh......EEhSohPD-VFESPs.uA.h.hths...sp.ss.sGstl.................. 0 3 7 21 +12430 PF12596 Tnp_P_element_C 87kDa_TransP; 87kDa Transposase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 78 and 110 amino acids in length. The family is found in association with Pfam:PF05485. There are two completely conserved residues (D and G) that may be functionally important. This family is an 87kDa transposase protein which catalyses both the precise and imprecise excision of a nonautonomous P transposable element. 22.60 22.60 23.10 22.60 21.70 21.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.77 0.72 -3.71 11 61 2009-09-25 13:22:20 2009-09-25 14:22:20 3 3 21 0 20 62 0 83.40 39 15.90 CHANGED ssscpspp-pals....oopp.cspspppp.........lph.................t.s-Esps.......th.ss.ss.h-hsE....pcDulEYlsGYlh+Kh+.......Lu-hsppsso ...........................................................NltpDNs-sWLN.hs...o.psppcsc.sp.......................................shscEs.-..........phhsNl-h.h-hDE.........LTEDAhEYlAGYVl+KLR.......luspsppp..hs............... 0 3 3 20 +12431 PF12597 DUF3767 Protein of unknown function (DUF3767) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 112 and 199 amino acids in length. 25.00 25.00 30.40 26.90 23.40 22.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.54 0.71 -4.43 31 226 2009-09-25 13:23:46 2009-09-25 14:23:46 3 2 207 0 158 219 0 110.60 25 72.96 CHANGED Psc.ss.....................sps.......tt.pc..olp-Ahcols.hsDF..tphhphPChR-uhlsGhuuhhslGulphlhtt..shhpA.sNWuVGuFhLuulsuaE.CphpRcpphpthppAhchhtc+ctcphccp.pp ........................................................................hpthp.h..sh...hsltphPChR-uhLhGhuuuhshGslpFlhsu..phtpu.ssauVGuFhlsolusW.hCphp...ptppp.thp.shchhtph..........t............................. 0 46 80 124 +12432 PF12598 TBX T-box transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 77 and 89 amino acids in length. The family is found in association with Pfam:PF00907. There are two completely conserved residues (S and P) that may be functionally important. T-box genes encode transcription factors involved in morphogenesis and organogenesis of vertebrates and invertebrates 20.10 20.10 20.60 25.40 19.10 20.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.30 0.72 -3.61 15 119 2009-09-25 13:25:01 2009-09-25 14:25:01 3 2 42 0 52 109 0 84.90 42 13.25 CHANGED RhYEEppK.t+-susSDcSSuEtssh+..cpss..SPsuussush+l+c.............sS+D-+.tsssss........schpsss--csspssSPutpt ........RlYEE+.pK..+-susSDtSSsE.ssh+..spss..SPh.s.ussus.pL+c..................ps+-E+.stusss........sc.pppopEcsstshu.....s................................................................... 1 2 7 20 +12433 PF12599 DUF3768 Protein of unknown function (DUF3768) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 108 and 129 amino acids in length. There are two conserved sequence motifs: NDP and RVLT. 25.00 25.00 37.60 37.60 18.80 18.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.91 0.72 -4.07 41 95 2009-09-25 13:25:45 2009-09-25 14:25:45 3 1 59 0 32 104 44 83.50 41 66.99 CHANGED hhTpGltuL........upp.h.....tpllptVpsFDsFss-NDPaGEHDFGsl................chtGppl..aWKIDYY..DhshpauSsDPuDsslTtRVLTlMLAsE ........................hTtGltuL........utth.........tpllctVtsF-sFss-NDPaGEHDFGsl................ch....pGppl..aWKIDhY..Dhsh.....paGSs-PuDsshTtRVLTlMLAsE....... 0 9 26 29 +12434 PF12600 DUF3769 Protein of unknown function (DUF3769) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 560 and 931 amino acids in length. 20.50 20.50 20.60 20.90 20.40 19.80 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.44 0.70 -5.67 39 99 2009-09-25 13:26:29 2009-09-25 14:26:29 3 5 90 0 50 103 251 383.70 26 53.78 CHANGED tssGplsRhRFpAs+lphsscG.WcApchphTNDPFoPsplclcAcssphpp.ss..sc..lpsppsRLll-p+lslP..l.pcphl..pcp...c-.......shhshGaDscDRs....GhFltRshsslp.ssshpLplpPQahlQRAlpsts.s............sppsssh..s-hFGLpucLsuphs..shphpspuslooh.sh-c.hpsshRhpsclppsls..h.sshphsh.hsYRpRlaNGSLG.psV.uuhGuhl............p.................sssh...sssulphsYp......huhthsshps-ph...........psss..........hssLhRhphhuSLspsasLWpGc.shs.TsppuhR.YoPtPllPhlsl.....sTslsushuhYu...sGss.QsoLshosG.phplGpFS+sahDYTthslshutslpsGt.SPFhFDRhlDhtsLshGloQQIYGPlhlustsulNlDs.....GchlsophtLcapRRoYslslhYNPhpplGulphRlsDFN .................................................................................................................................................................................................................................................................s..stlpphRhputplph.sps.WpupphthoNDPasPsphclputps.h.t.ss...c..l..ptspllh-pthshP..h..pp.h..ppp.p........h.hGhDst-+s....Ghaltpthp.lp.stsh.hplpPQhhlQ+uh...............................tt.tsh..sshaGl.schpsphs..phphp.psp.lssh.s.sp.htst..Rhphphppp...........hs............hphph.hs.YR.RhaNGoLG.psl.puhGshlt.................p......t.sshthsa.......htht.hphps-p......................pspphhshhRhphhsolspth.lWpup.shs.ssppuh+.YoshslhPhltl.....................ssslpushuhYs...sGpp..Qss....ltuolGhshQhGpFo+sahDYTt....hs........ls........h..........utshh........s.....Gs.S........P....F.hFD+hsD.......ht.........s.......L.......shulsQQlhGPlhhssposlslDs..........sp.hsop.h.lpap+...R...oYtlhhhYsPh.phGuhphRls-Fp........................................... 0 11 33 45 +12435 PF12601 Rubi_NSP_C Rubivirus non-structural protein Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF05407. The rubella virus (RUB) nonstructural (NS) protein (NSP) ORF encodes a protease that cleaves the NSP precursor (240 kDa) at a single site to produce two products. 21.60 21.60 80.10 80.10 20.80 17.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.19 0.72 -4.08 3 69 2009-09-25 13:27:13 2009-09-25 14:27:13 3 2 10 0 0 72 0 55.10 100 3.83 CHANGED VCAVGGGPRRVSDRPHLWLAVPLSRGGGTCAATDEGLAQAYYDDLEVRRLGDDAMARAALASVQRP VCAVGGGPRRVSDRPHLWLAVPLSRGGGTCAATDEGLAQAYYDDLEVRRLGDDAMARAALAuVQRP 0 0 0 0 +12436 PF12602 FinO_N Fertility inhibition protein N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 62 and 102 amino acids in length. The family is found in association with Pfam:PF04352. The FinOP (fertility inhibition) system of F-like plasmids consists of an antisense RNA (FinP) and a 22 kDa protein (FinO) which act in concert to prevent the translation of TraJ, the positive regulator of the transfer operon. 25.00 25.00 49.70 48.80 21.00 20.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.57 0.72 -3.68 3 274 2009-09-25 13:28:13 2009-09-25 14:28:13 3 2 214 1 3 135 1 61.60 89 33.64 CHANGED MTEQKRPVLTLKRKst.EGTAPVcuo....suPGhVpRKKlVVVoTPPAWKVKKQ.........KLsEKAARcAE.................AAARKAAPcP ..MTEQKRPVLTLKRKT..EGETPVRS............RKTIINVTTPPKWKVKKQ.........KLAEKAAREAE.................LAAKKAQARQ............. 0 0 0 2 +12437 PF12603 DUF3770 Protein of unknown function (DUF3770) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 250 amino acids in length. The family is found in association with Pfam:PF04196. 21.50 21.50 21.90 66.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.53 0.70 -5.26 6 138 2009-09-25 13:29:41 2009-09-25 14:29:41 3 2 49 0 0 162 0 244.00 40 10.83 CHANGED ss--scEllaRaRLARslh.EL..+pl.Pth.cuDEsls+ppRElpuh.hpuIpI...DWplTEuKF.sPFo+chF-+Fss..hpsDp-Ylu+Ilpcssccuhccl-+spahs-slspptRh-+NuEpuhstlppKhsphpuapphc-hpc..HKSTVQhPshls+....sussspsLpsLpsls....spGsHPhpclWcp.lsssss.lpcIERh+-Ds.hELchAhuuloc+s.E.....RNKYHRssLshs.--+lYlAhlGVs .....ssEAEEllYRaRlAhplhscL..R........ssDpELsKoEcELLAh.lpuIph......NhshsEusF.PPho+EMa-+FhS..oPsDs-YIT+llStp.lpop-cLhsspahtcssstthRhp+Nu-Es..thpcsLpphput.c.s.+shss..pKuTlQLPPWLsh....hss-upDlsshpGh-....shtDH.PhspLWcc.slssss...LppIEchH.sDsAtEL-hAlSsst.-+s-E.....Rs+Y+Rs+LshuSc-plYhAthGVs.. 0 0 0 0 +12438 PF12604 gp37_C Tail fiber protein gp37 C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is typically between 49 and 166 amino acids in length. The family is found in association with Pfam:PF03906. In T-even phages, gp37 and gp38 are components of the tail fiber that are critical for phage-host interaction. 21.70 21.70 21.80 22.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.86 0.71 -4.42 9 109 2009-09-25 13:31:37 2009-09-25 14:31:37 3 18 66 6 4 116 1 139.60 26 22.38 CHANGED tssh.s.lp.......lpss.th.......pus.pt..a....cssstpthalu.tGussssDsoh..........hpss.h.hsc..h.sts....sGshpsscWt...ppWLsspL.p.....t.hh............................................hhs..th.t.sshs.hph...us.....sushhlsshuGhh .........................................................sthsGphplpsspsh......hpuusstutalhu.+ssstssWYlGpGussssDhs.Fashh.h..so.tlt.........lpp..shhshN.c.htlGtAh.ltssGsI.Gohht...staLsshlps......t.h................................................................................................................................................................................................................................. 0 0 2 2 +12439 PF12605 CK1gamma_C Casein kinase 1 gamma C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 54 and 99 amino acids in length. The family is found in association with Pfam:PF00069. CK1gamma is a membrane-bound member of the CK1 family. Gain-of-function and loss-of-function experiments show that CK1gamma is both necessary and sufficient to transduce LRP6 signalling in vertebrates and Drosophila cells. 21.20 21.20 21.40 21.30 20.40 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.22 0.72 -3.42 17 249 2009-09-25 13:32:50 2009-09-25 14:32:50 3 3 74 6 101 230 0 69.50 50 17.15 CHANGED +...............hsTPsGshps...-sssos......sR-pp.hhpp.....................t..ppN...............................................t.QsloSTNG-LN.sDDPTuGHSNsPI ...................................hsTPVGulp....-sshSs......sR-tHthps+..............................pN...........................................t.t.............sps.QVV.SSTNGELN.sDDPTAGHSNAPI..................... 0 15 25 51 +12440 PF12606 RELT Tumour necrosis factor receptor superfamily member 19 Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 49 and 288 amino acids in length. There are two completely conserved residues (K and Y) that may be functionally important. The members of tumor necrosis factor receptor (TNFR) superfamily have been designated as the "guardians of the immune system" due to their roles in immune cell proliferation, differentiation, activation, and death (apoptosis). The messenger RNA of RELT is especially abundant in hematologic tissues such as spleen, lymph node, and peripheral blood leukocytes as well as in leukemias and lymphomas. RELT is able to activate the NF-kappaB pathway and selectively binds tumor necrosis factor receptor-associated factor 1. 28.90 28.90 29.30 29.20 28.70 28.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.41 0.72 -4.82 17 207 2009-09-25 13:33:46 2009-09-25 14:33:46 3 1 110 0 84 175 0 48.90 35 22.33 CHANGED YhhhllVslFhlhGLLGlhICplLKpKGY+Cos-s.-sp..pccppttc......p .....hhhhllVslFhlhGLLGlhIC...plLK+KGY+Cos-c.Esps.tstp......p................. 0 6 17 30 +12441 PF12607 DUF3772 Protein of unknown function (DUF3772) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00924. 25.00 25.00 28.20 26.80 23.60 22.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.85 0.72 -4.33 69 289 2009-09-25 13:35:58 2009-09-25 14:35:58 3 3 288 0 87 297 69 63.60 29 7.86 CHANGED phActhhscAssLhspIsplhRsphsppLhpRusSPLsPshWssshpshspshpplts.ssphh ............ppApthttpApsLsspIsplRRshhpspLstRusS.LuPsFWsslhpshscDhp+Lpshtsp..h....... 0 12 37 61 +12442 PF12608 DUF3773 Protein of unknown function (DUF3773) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are approximately 110 amino acids in length. 19.90 19.90 19.90 26.00 18.70 17.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.27 0.72 -3.95 5 53 2009-09-25 13:37:22 2009-09-25 14:37:22 3 1 51 0 6 43 6 101.50 63 75.38 CHANGED h-l++plchlAcshsAcF.................sLcuRploYDEV.FSDTGLLPA...........LsRRADQLsS.LCLGYGlGsoa--AEsALLGV+ssFDEsTP-uLRLhChhDVlsELMpuusuh ..........t.DIpKsMuhIAAuhNAKF.................YLNDRFVSa-EV.FSDTGLLPA...........IA+RADQLCS.LCLGYGLGATaDEAEsALLGlRVVFDEVTPNsLRLLCMTDVlNELIQGGPS.R.. 2 3 4 6 +12443 PF12609 DUF3774 Wound-induced protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 81 and 97 amino acids in length. The proteins in the family are often annotated as wound-induced proteins however there is little accompanying literature to confirm this. 27.30 27.30 32.80 31.10 21.70 21.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.91 0.72 -3.13 44 190 2009-09-25 13:38:21 2009-09-25 14:38:21 3 2 24 0 125 175 0 75.20 42 81.28 CHANGED lAuSluAVEu..lKDQ...thsRWs.shR.Slpppucsph.tshststt....tsutt............sssttspptcpsEESLRpVMYLS.CWGP ................................VAhSlGAVEA..LKDQh...GlCRW.NaALR.SlpppA+s...ss....tu.h....upup+.....hsuus...............ussspcc+tcpuEEuLRsVMYLS.CWGP.......... 0 6 76 103 +12444 PF12610 SOCS Suppressor of cytokine signalling Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF07525, Pfam:PF00017. The suppressors of cytokine signaling (SOCS) family play important roles in regulating a variety of signal transduction pathways that are involved in immunity, growth and development of organisms. 20.60 20.60 20.70 20.60 19.60 17.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.06 0.72 -3.64 17 118 2009-09-25 13:39:11 2009-09-25 14:39:11 3 5 41 0 58 93 0 55.20 45 12.19 CHANGED pspsstp.......+p-Rppussultshstthp.....pt.st+uhusRSLRQ+lQDAVGpChPl+oppppp .......................scssLp.......ppER+ausSSl.....th......Dpssu+phsuRSLRQ+LQDsVG.CFPl+spupp......... 0 5 12 25 +12445 PF12611 DUF3766 Protein of unknown function (DUF3766) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 20 amino acids in length. There is a conserved FTNID sequence motif. There is a single completely conserved residue T that may be functionally important. 21.10 21.10 21.90 21.10 19.00 20.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.42 0.72 -6.76 0.72 -4.42 43 199 2009-09-25 13:40:01 2009-09-25 14:40:01 3 1 191 0 76 173 5 23.90 55 18.69 CHANGED TVITAhDppphc.sNVFTNIDSAVl .TVlTAhDppphK.-NlFTNIDuAVI... 1 40 68 73 +12446 PF12612 TFCD_C Tubulin folding cofactor D C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 182 and 199 amino acids in length. The family is found in association with Pfam:PF02985. There is a single completely conserved residue R that may be functionally important. Tubulin folding cofactor D does not co-polymerise with microtubules either in vivo or in vitro, but instead modulates microtubule dynamics by sequestering beta-tubulin from GTP-bound alphabeta-heterodimers in microtubules. 21.70 21.70 21.80 23.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.23 0.71 -4.88 48 307 2009-09-25 13:41:32 2009-09-25 14:41:32 3 9 222 0 228 309 4 174.30 25 17.26 CHANGED ssphsppllsslh+QusEKlD+lRtpAspsLpplh.ptst.........................................t.Lpchhs.pp.t.............................................assssphF...splh.pL..Lsls......................happs...............llpGLssSsGu.hoE.ulh+suppALlpalpt........pppppthttlhssllpllp......cptps-Rlsl.PhlchlshLLss.thht.h..pp.t....hhppLhphlppphhp...op.....sht+ .........................................p....hpplhsslhp.usEKlD+hRttAtpshhpll.ptp.s................................h..........l..th.........ttLpp.lhs.stht...............................................asssppsF.stlh.pL...Lt.l..s........................sYpht................lltGLlsSlGu..loE.Sll+top.tuLhpahp............................ppp.pthtthspsllplhp...................pph.ts..-R.......lhl...PhlchlshLlss.shhp.........h.t.t.............h.plhthhppthht...ttsh........................................................ 1 108 145 192 +12447 PF12613 FliC_SP Flagellin structural protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00669, Pfam:PF00700. This family is the bacterial flagellin structural protein. It is involved with cell motility. 25.00 25.00 61.00 61.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.70 0.72 -3.75 9 66 2009-09-25 14:21:04 2009-09-25 15:21:04 3 3 42 0 11 43 1 57.00 69 15.32 CHANGED suTuGTGTAVs.........................sLoLsouAT....uuhoAAptoAhsNulAQINAVNpPsTVSsLDIST .u.oTAGTGTAso......................TLuLSToAT....SuLSAsDQAsATAMVAQINAVNKPQTVSNLDIST. 0 1 2 8 +12448 PF12614 RRF_GI Ribosome recycling factor Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 130 amino acids in length. There are two conserved sequence motifs: LPS and LKR. Overproduction of ribosome recycling factor (RRF) reduces tna operon expression and increases the rate of cleavage of TnaC-tRNA(2)(Pro), relieving the growth inhibition associated with plasmid-mediated tnaC overexpression. 24.70 24.70 24.90 38.00 24.30 24.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.76 0.71 -4.33 21 130 2009-09-25 14:24:29 2009-09-25 15:24:29 3 1 128 0 22 78 3 124.00 55 96.85 CHANGED hcpsIoIsLPSLIHRIGp-ssKpApslAtphsC-LKRVRRSRNWplsGcAhplQuFtppL+sp..............pspphpaLIpKl-suLhpHuDKLEPLpsKLhRLlppNPsITLAELMptTpCTlsEARsARFsu-s .............p.cpsIsIoLPSLIHRIGu-sVK+hKh.AppacCELKRIRRSRNWQLlGEAtshQpFlhplKpp..............Ehp.shcYLI++lEsuLth.uDKlEPlEspLtRLlpQNPuITLAELMutTcCSLhQARsARFstE.... 0 3 5 15 +12449 PF12615 TraD_N F sex factor protein N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 96 and 107 amino acids in length. The family is found in association with Pfam:PF10412. TraD is a cytoplasmic membrane protein with possible DNA binding domains. It is part of the bacterial F sex factor complex. 25.00 25.00 26.00 33.90 24.60 24.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.00 0.72 -3.66 24 278 2009-09-25 14:28:23 2009-09-25 15:28:23 3 7 186 0 20 265 0 98.40 51 15.94 CHANGED hhhhhhhlllsslhhahchstp...shhsuhhYahsp.....hhthl..s.p.hhpltap......spphptohtphLps.ahltss....sphhptlhhuulhuhllshllhhl ....YsLFIhFWILlGLlLWl+ISWQ...TFlNGsIYWWCT.TLEGMRDLI..+.SQPVYEIQYY......GKTaRMNAAQVLHDKYhIWCG....EQLWSAFVLAulVALVICLITFFl........ 0 0 2 12 +12450 PF12616 DUF3775 Protein of unknown function (DUF3775) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 80 amino acids in length. There is a single completely conserved residue G that may be functionally important. 21.90 21.90 22.10 27.40 20.90 19.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.44 0.72 -4.19 44 175 2009-09-25 15:18:47 2009-09-25 16:18:47 3 1 164 0 55 130 14 75.40 46 51.75 CHANGED hsppElcshIssLs--Eps-LVALhWlGRG..-aps-EascAhppAtppt......ssssucYLlGpPhLuDaLEpG........L-u.LGh .....scpELtuhIssLsEDEph-LlALMWlGRG..DasssEW--AlspApcct........sscsAcYLlGpPhLuDaLE-G....LsA.LG........... 0 14 33 39 +12451 PF12617 LdpA_C Iron-Sulfur binding protein C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is typically between 179 and 201 amino acids in length. The family is found in association with Pfam:PF00037. LdpA (light-dependent period) plays a role in controlling the redox state in cyanobacteria to modulate its. circadian clock. LdpA is a protein with Iron-Sulfur cluster-binding motifs. 20.60 20.60 24.10 23.20 19.60 19.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.22 0.71 -5.02 45 96 2009-09-25 15:24:46 2009-09-25 16:24:46 3 8 89 0 42 102 157 186.40 40 51.44 CHANGED sLltphssDAlEIHTpsG+tpsFppLWpslssstspL+hlAlSCst.u.............csLlchLhphapllp..s..............s..hhlWQhDGRPMSGDIGs.GTo+uAlpLup+l.....hstt...PGalQLAGGTNspTlshLcp.sh.........................................pthlAGlAaGuaARpLlpPlLcphcppt.p...............................Lc-hP-hltpAlphApuLVsPh ...............LlpphslDAlEIHTpsG+tptFppLWpplssshspL+hlAlSCst.s..............csllchLtphapllp.s....ls..........t...h.lWQhDGRPMSGDIGp.GTT+sulpLup+l.......hstt.......PGalQLAGGTNsaTlstLcp.sh................................................................pshluGlAaGuaARpllsPlLcphpppttp...............................Lc-aP-hlhpAlphApuLVsPh...................... 0 7 28 39 +12452 PF12618 DUF3776 Protein of unknown function (DUF3776) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. 25.00 25.00 30.90 29.70 20.80 20.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.26 0.72 -3.51 10 168 2009-09-25 15:27:22 2009-09-25 16:27:22 3 5 35 0 61 137 0 105.10 32 20.21 CHANGED sssppcDc.shshh.tchscK.-psoS...ucsh.slSppspscsuh.-sEs.lucpcppppGsshQhc+SRuuclTushuoc..hh.s.pssccK-cspthssssscul.S.p.p ................................hssKK-ctstsh.tss-t.K...K.-p.sos.....SEsF...ulutcsl...PKsshspsEs.locpc+pppGsuhQhc+uRhsclTushssc...hh.s.pspc+K--s.pthssh.s.pp..ul.S.p.................................. 0 3 6 15 +12453 PF12619 MCM2_N Mini-chromosome maintenance protein 2 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 138 and 153 amino acids in length. The family is found in association with Pfam:PF00493. Mini-chromosome maintenance (MCM) proteins are essential for DNA replication. These proteins use ATPase activity to perform this function. 21.30 21.30 21.60 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.58 0.71 -4.24 58 331 2009-11-26 14:18:40 2009-09-25 17:02:30 3 9 280 0 227 319 0 142.50 31 16.94 CHANGED ---.-t...............-..t-l.t-h...p-h-c.t...E-.--G.DLauDshEc.DYcsp......c.DpY-ts.s..lD.D....-t-hc-hshusRRthEtpLscRD+.httt..............h..thah..s--....---Dsphp.............h.....RRRR+pa-c....-.-sh..sht..-........................hp-ElslEsL....sDlKupol .....................................ts.......................p.......h.p...t.t...pE-E-G--Lhu.Ds.h....Ec.DYRshs......-hDpY-sp.s........lD.D....-.-h.-.-lshusRctsEtphpcRDRphupt.......................phh.shhh.....ss.-...........----tp.t...........................pR+R+ph-c..........tp.st.....th-..-..........................h.-..olEsLtDhKupo............................................... 0 76 121 188 +12454 PF12620 DUF3778 Protein of unknown function (DUF3778) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 48 and 61 amino acids in length. There is a conserved LRF sequence motif. 22.10 22.10 23.70 22.40 17.60 15.60 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.09 0.72 -4.41 19 53 2009-09-25 16:06:54 2009-09-25 17:06:54 3 4 3 0 34 41 0 51.00 35 17.06 CHANGED st.shhtsuhthtssshhh.sS.p.+sthllRVE...s.......s..LhLLRFNsELRGsh..LL ....................hh.ssh..t.s..hh.sS..shHsthslRVE...........s..LhLLRFNs-L+Gs.hL........ 0 0 0 12 +12455 PF12621 DUF3779 Phosphate metabolism protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF02714. There are two completely conserved residues (W and D) that may be functionally important. This family is likely to be involved in phosphate metabolism however there is little accompanying literature to confirm this. 22.10 22.10 23.20 22.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.16 0.72 -4.10 64 338 2009-09-25 16:07:58 2009-09-25 17:07:58 3 13 128 0 268 348 0 94.10 28 9.75 CHANGED FhcPchatsactl+phlPpshth......phs.cht.cpAYhpPul.supsPhlWIPRDs..hGlScpElpcspcs.....lsloD-sAthsE.....K.G+lha.......tspPP...ac ...........................................hpPth..sat.h+thh.t............phssphh.ppAYhpPul.puppPhlWIPRD.s..hGlScpElpcspch............l.loDEsutlDc.....K.s+lha.......sttPP.a.................... 0 57 138 226 +12456 PF12622 NpwBP mRNA biogenesis factor Coggill P pcc Wood V Domain The full-length Wbp11 proteins carry several copies of a PPGPPP motif throughout their length. This motif is thought to be necessary for folding of the molecule as it helps to bind the WW domain, Wbp11, Pfam:PF09429 [1]. This domain together with Wbp11 may function as components of an mRNA factory in the nucleus. 25.00 25.00 27.30 26.20 23.70 22.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.80 0.72 -3.48 27 69 2009-10-16 11:54:12 2009-10-15 17:08:00 2 4 67 0 48 62 2 63.90 30 21.31 CHANGED sp+SlYYcPphNPhGtsPsG..........hP.hh+shtt......sp.sp...............hsp............................lshPp ..sp+SIYYcPshNPhGssPsu..........hP.hh+stpt.t.....sp.ss.........................................hpp............................I.hP..................................................................................................... 0 22 32 45 +12457 PF12623 Hen1_L RNA repair, ligase-Pnkp-associating, region of Hen1 Coggill P pcc Gardner P Domain This domain is the N-terminal region of the bacterial Hen1 protein. This protein forms stable hetero-tetramer with Pnkp. The hetero-tetramer was able to repair transfer RNAs cleaved by ribotoxins in vitro [1]. This domain provides the ligase activity of the hetero-tetramer. 20.70 20.70 20.80 27.40 20.40 20.50 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.57 0.70 -5.60 27 120 2009-10-15 16:15:24 2009-10-15 17:15:24 2 5 118 0 59 128 2 241.90 54 50.93 CHANGED MLLTloTT.....cpP.ATDLGaLLHKHP-RlQoFshuhGpAHVFYPEAotcRCTsALLL-VDPlsLV...Rsp+ut...su.s.huLuQYVNDRPYAASShLuVALucVFpTAhsG+CcsRPELAusslPLclclssLPs.R..GGtpLlc+LFEPLGWp.VsApslsLDpsaPpWGcS+YlsLsLpG.slRLu-hLsHLYVLlPVLDssKHYWVusDEVDKLlRtGcGWLusHP-+-LIsRRYLt+ppsLscpA.....LsRLs-s ...........................MhLTloTT.....ppP.A....TDLGaLLHKHP-+sQsFshuaGpAHVFYPEAos-RCTAALLL-VDPlsLV......Rspctt........ssss..suLuQYVNDRPYAASShLuVAlupVFpoAhsGcCcsR.PELAspslPLclclPsLPs.R.........G..G.....spll..c+LFpPLGWs.VsupslsLDtpF........Pp...WGcSRYlpLsLpG.plRLuDALpHLYVLLPVLDcsKHYWVusDElDKLLRtG-GWLssHPE+cLITcRYLp++tuLscpAhpRL...h......... 0 22 46 54 +12458 PF12624 Chorein_N N-terminal region of Chorein, a TM vesicle-mediated sorter Coggill P pcc Pfam-B_PB000002 (release 24.0) Family Although mutations in the full-length vacuolar protein sorting 13A (VPS13A) protein in vertebrates lead to the disease of chorea-acanthocytosis, the exact function of any of the regions within the protein is not yet known. This region is the proposed leucine zipper at the N-terminus. The full-length protein is a transmembrane protein with a presumed role in vesicle-mediated sorting and intracellular protein transport. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.32 0.71 -4.56 121 1194 2009-10-16 09:35:15 2009-10-16 10:35:15 2 56 295 0 818 1223 15 109.70 25 4.37 CHANGED hE...ulls...plLspaLGp....Yl..cslssppL.plu.lasG..cVpLcNLcLKp-....uLcpL..pLP..lplptGhlGc..Ls.lplPWps.L..tsc.PVhlpl-slal.lssPps....tpchst-c.ppppptth....KtpplpphE .......................................thlt.lLppaLup...al..c..s..l....s......cpL.pl.u....l.h....p...G......sl.pLpNLpL+.....pc.........................s....L.p...........p...h........cLP.............lplppGhl..s..c..........ls..lp..............l.P.Ws..p..L..............hsc...Plh..lplcslhl.lhtstt.........t.p.p.t........................................................................ 1 329 455 664 +12459 PF12625 Arabinose_bd Arabinose-binding domain of AraC transcription regulator, N-term Coggill P pcc Pfam-B_PB000001 (release 24.0) Domain AraC is a bacterial transcriptional regulatory protein with a DNA-binding domain at the C-terminus, HTH_AraC, Pfam:PF00165, and this dimerisation domain which harbours the arabinose-binding pocket at the N-terminus. AraC positively and negatively regulates expression of the proteins required for the uptake and catabolism of the sugar L-arabinose 1,2,3]. 22.60 22.60 22.80 23.40 22.40 22.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.12 0.71 -4.47 221 2712 2009-10-16 10:27:52 2009-10-16 11:27:52 2 8 847 0 796 2533 293 180.40 18 52.86 CHANGED Gh-spslLpps..Gl..ss...sh..L..ps..thp.lshpphtpLhptu.hptssc.sslGLchu.pphphsshGhluhshhsusTLtpAlpphh+ahtlh...hhshph......ttttt....stlth.t....................pt.......h-hhhushhphh..ph..lh...sp.hs.hplphp..assP...s.t....ap..phF.s.ss.lpFststs.s.lhhssph..Ls.tPl ..............................................................GhssttlLtts..Gl.....s......t....l..t.....p.....s...psp...lsh.tp.hhplhpth...hp.hh....s....c....ss..h....G..l.p.....hu.p.p.h..ph.s.sh.G.h.luhsh.h.s.us...oLtpAlpthh.cah....plh..ssh.hphpl..........................ptpssh..stlph..p.tt..............h..............pth...h...h-.h.h.lsshhphh.........ph.lh.....upp...ht....h..plphs.......h..stPs.....thp......tYp.phF...s....ss.lpF..s.psts...t.....lhh..stph..Lshs................................................................... 0 161 340 592 +12460 PF12626 PolyA_pol_arg_C Polymerase A arginine-rich C-terminus Coggill P pcc Pfam-B_105 (release 24.0) Domain The C-terminus of polymerase A in E coli is arginine-rich and is necessary for full functioning of the enzyme. 22.00 22.00 24.00 23.10 21.80 20.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.63 0.71 -4.49 70 1295 2009-10-19 10:32:25 2009-10-19 11:32:25 2 4 1278 7 247 869 313 125.80 44 27.24 CHANGED pppG..hsshsAhppAsscllspQspphAIP+RFohshREIWpLQ.RLs..+RpG+RshpLlpHPRFRAAYDFLlLRspu.G-......phtpLupWWscaQpsssp..p+pphlpphstpttt.........p+RRRRs+++ps ......................csGLs.aDAh.shAhN-VLD-ts+s..lA.IP+RhTshhRDIWpLQhRhs..RRp...G............KRA.a..+LlEHPKFRAAYDhLtLRA-l.tss.......-hpcLupW....Ws-FQsusss...pp.csMl.splspcss.s..........................+RRpRRsR+p..s............................................. 0 50 124 193 +12461 PF12627 PolyA_pol_RNAbd Probable RNA and SrmB- binding site of polymerase A Coggill P pcc Pfam-B_105 (release 24.0) Domain This region encompasses much of the RNA and SrmB binding motifs on polymerase A. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.53 0.72 -4.40 312 6138 2009-10-19 12:08:10 2009-10-19 13:08:10 2 37 4516 22 1489 4542 2033 63.10 27 14.21 CHANGED sFpI-tcTtpuIpchus..hLpplusERlhcElhKllhus.psptshchLhctGLh.phlh..Ppl.sth .......................sFplss-Thp.sh..p..p...h...ss.......hL.p...p....l.o.....s.....E....R..lhpEhpK.l..L.h.........us..psptshphL.p.c.hslh.phl.h...Ppl....hh.................. 0 481 941 1254 +12462 PF12628 Inhibitor_I71 Falstatin, cysteine peptidase inhibitor Coggill P pcc MEROPS_I71 Family This family of peptidase inhibitors is expressed from plasmodial protozoal species. Falstatin is found to be a potent reversible inhibitor of the P. falciparum cysteine proteases falcipain-2 and falcipain-3, as well as other parasite- and non-parasite-derived cysteine proteases, but is only a relatively weak inhibitor of the P. falciparum cysteine proteases falcipain-1 and dipeptidyl aminopeptidase 1. Thus, P. falciparum requires expression of falstatin to limit proteolysis by certain host or parasite cysteine proteases during erythrocyte invasion. 22.10 22.10 23.20 63.40 21.50 18.30 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.23 0.71 -4.60 4 9 2009-10-19 12:24:47 2009-10-19 13:24:47 2 2 8 1 6 11 3 162.40 42 31.47 CHANGED DppYpLsGsEpCDs.lKLGsIlNpTNpcTIshSLoVscshCIshEusuGsGYlWsLLGVHKpcPhINPEpFPpKhlpKsaFSpEISVTQPKthphsp.sspKNsspsspsuuQNpssos+P.KPc...pllGGss.lpSlIKuHKsGKYalVYSYYRPFsPTusANTKIlpLTVQ .........spp.tLpssE.CDp.lKLGsIlNpsNpcTIphshsVsplLCIsLEu.sGsGalWsLLGVHKccPhIsPEpFPpKhlpcSaFSp-ISVTpPht.hph..sp.sspcssspptpssspNpssp.+P.psc...pllGGsshlpShIKsHKsGcYaIVYSYYRPFsPTtssNT+IlpLsVp.................................................................... 0 1 2 5 +12463 PF12629 Pox_polyA_pol_C Poxvirus poly(A) polymerase C-terminal domain Bateman A agb Structure Domain This domain is found at the C-terminus of the pox virus PolyA polymerase protein [1]. 27.00 27.00 48.10 48.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.33 0.71 -4.59 13 66 2009-10-19 14:10:58 2009-10-19 15:10:58 2 2 44 9 0 62 0 197.10 65 42.22 CHANGED LLsMlKMFSQIDRLEDLscs.EKhplRhuTLLEYsR...hcauIhhsGc...psphshpssh....shspRllTVDs+pY..shsacKChlYLDEss....................LspcIhchs..uD-u.lDFEsV......oNSsaLIcsssh...................YTYFSN....TlLhpscsclH-ISs+uloAHIL...lYplLT+sshtp.sLuDllNSLlshE...KhPlaplIPRDKKsG+HGIIDIEKDIIsH .LLNMIKMFSQIDRLEDLoKDPEKFsARMAThLEYVR...YTHGIlFDGc...+NNMPMKClI........DcssRIVTVsTKcY...FSFKKCLVYLDENV....................LSSDILDLN..ADousDF.ESV......TNSVYLIHDNIM...................YTYFSN....TILLSDKGKVHEISARGLCAHIL...LYQMLTuG-Y+Q.sLSDLLNShMsR-...KIPIYShsERDKKsGRHGhINIEKDIIl.. 0 0 0 0 +12464 PF12630 Pox_polyA_pol_N Poxvirus poly(A) polymerase N-terminal domain Bateman A agb Structure Domain This domain is found at the N-terminus of the pox virus Poly(A) polymerase protein [1]. According to SCOP this domain contains a helix-hairpin-helix motif. 27.00 27.00 38.20 85.80 24.10 23.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.44 0.72 -3.95 13 66 2009-10-19 14:14:05 2009-10-19 15:14:05 2 2 44 9 0 61 0 112.10 65 24.00 CHANGED MNp........p..hpllcpYLGR.PShsEYahLKpQh+sIp+IhtFNKDlFlu...LlKKNK++FFoDlc..sSsuEIKcRlhpYFoKQcpsp.plG+LhoIIELQolLVooaTclL........GVLTs ........ohsN....IslcIIEsYLGRlPSlNEYHMLKLQsRNIQKIslFNKDIFlS...LVKKNKKRFFSDlD..TSuSEIK-RILSYFSKQTQTY.sIGKLFTIIELQSVLVTTYTDILGVLTI............ 0 0 0 0 +12465 PF12631 GTPase_Cys_C Catalytic cysteine-containing C-terminus of GTPase, MnmE Coggill P pcc Pfam-B_102 (release 24.0) Family This short C-terminal region contains the only cysteine present in these proteins. It is proposed that MnmE is a tRNA-modifying enzyme and that Cys-451 functions as a catalytic residue in the modification reaction. 22.00 22.00 22.00 23.30 21.10 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.45 0.72 -3.57 534 4286 2009-10-19 15:56:53 2009-10-19 16:56:53 2 10 4170 8 1009 3196 2082 72.50 33 15.93 CHANGED ttsspsshlsptRHhpsLp.pAhptLp.ps...........hpslp.......tt.....hs...............h-l...hu.-LRtAhcsLucIT..G.c...hs.s.-.-lLspIFSpFC ..............................................................s...t-sshluppRHlptLc.pAtppLppu.............tptlp......ts......hs....................................................................h-L..lu.-L+hAhptLuEIT...............G.-....hs.sD.-LLspIFSpFC............... 0 338 634 843 +12466 PF12632 Vezatin Mysoin-binding motif of peroxisomes Coggill P pcc Wood V Family Vezatin is a peroxisome transmembrane receptor that is involved in membrane-membrane and cell-cell adhesions. In the movement of peroxisomes it binds to class V [2] and class VIIa [3] myosins to guide the organelle through the microtubules [2] and allow pathogens to internalise themselves into host cells [1]. Vezatin is crucial for spermatozoan production [3]. In mouse cells it interacts with the cadherin-catenin complex bridging it to the C-terminal FERM domain of myosin VIIA [4]. 22.30 22.30 23.80 23.90 21.00 20.70 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.72 0.70 -5.30 32 191 2009-10-20 13:22:29 2009-10-20 11:54:41 2 3 153 0 121 198 1 252.80 27 36.68 CHANGED sshhhlshLlphsphttsshh....h.......hhhlllhlhhslhththhc.....lphh+ppsltplpshlssspshsshhppslhhl..p..El-lhSpuhp.......................t.tpptpspph.tL+ctLppsls.hh.phpputpplh.hhs.........sssLpcYhslYsl...............s..sLt.........................h.p...................pptc-........................t.olptL+hhht.....+hphlRKhhLCpLLolpt......ts.stsshh........................................................capslhptlpsLspslsph .......................................s...hhlshLlth.sp..hhs..s.sh.hhth......................lllhlhllhhulhhatht+........LphhhpphhsplpshlssupsFsshsppultLI..Q..EsEllSRGap...................uss..shut.....pp.sp.+hltLR+slhpslpthhpshp...uphhhhhhhs...............ssslppYhslh.sl................pphsLs.....................u.pt...hs-...................ppscs.............t.SL.sL+.hLht.....p.chhR+hhL..LLuhss.........sG...sshh...............................................................................chsshhptl+sLp.hhp......................................................................................................... 0 25 53 91 +12467 PF12633 Adenyl_cycl_N Adenylate cyclase NT domain Finn RD, Bateman A agb Ref [1] Domain \N 21.60 21.60 21.60 21.90 21.30 21.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.30 0.71 -4.79 18 1003 2012-10-02 22:47:23 2009-10-20 16:23:27 2 7 911 0 113 583 22 184.90 65 24.20 CHANGED Mt.clpph+p+lDtLsplRl-RALuuhssphpcVFpLlPLLLHhNHPtLPGYls.ssPpGIspFplo-hQppaLssh........................hphp.....t......hpsspssIhGlYuMGSTuSIuQospSDLDlWVCasspLosc-hpLLppKspLlppWAcpasVElNFaLhcpp+FRpppsus......hstEsCGSuQHhLLLDEFYRSAlRLAGK.LLWh ..........................................................................................M.hYlpTL+QRLDulNQhRl-RALAuMussFQpVasLLPsLLHapH..PLhPGYl-..G.....s.VPp..G.IshasPs-p.QppaLs-L........................t.t.h.s......t......spuchPIsGlYoMGSTSSlG.QSsS.SDLDIWV.CH.Q.S.WL.DuEE.RQLLQRKCSLLE.sWA.A.S.LG..VEV.SFFLIDENRFRHNESGS......LGGEDCGSTQHILLLDEFYRTAVRLAGKRILW.s................................. 0 16 40 81 +12468 PF12634 Inp1 Inheritance of peroxisomes protein 1 Coggill P pcc manual Family Inp1 is a family of peripheral membrane proteins of peroxisomes. Inp1p binds Pex25p, Pex30p, and Vps1p, all of which are involved in controlling peroxisome division. The levels of Inp1p vary with the cell cycle, and Inp1 acts as a factor that retains peroxisomes in cells and controls peroxisome division [1]. Inp1p promotes the retention of peroxisomes in mother cells and buds of budding yeast by attaching peroxisomes to as-yet-unidentified cortical structures [2]. 20.70 20.70 21.80 20.80 19.20 18.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.87 0.71 -4.52 28 115 2009-10-20 15:46:51 2009-10-20 16:46:51 2 2 114 0 83 108 0 140.90 28 25.09 CHANGED stspcsoLFpasssKIl...........t......................h...hspspo.shpppsppllupGshcIaplpsu...........................pssaLssG...shVaPlL.P+hplhpl...ptpsspFlL.lhsPpp.YW+IElss.....p..--tpllcphcpllsplspYps ........................................................................................sLatasss+ll............php.....t.....u.pss.........hp.s.s..p.pshshpppspphlAhGslcIaplssu............................................ssFLssG...sllaPlL.PKsQsapl.....stpuspFllplhpPp..Ya+IElsst.......sp.E-pphlcphcpVlspllpac.............. 1 12 39 68 +12469 PF12635 DUF3780 Protein of unknown function (DUF3780) Bateman A agb Bateman A Family This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria. Proteins in this family are typically between 189 and 206 amino acids in length. There are two conserved sequence motifs: PEERWWL and GWR. This family is found in a very sporadic set of bacterial species, suggesting that it may have been horizontally transferred. One protein is annotated as plasmid borne. 21.70 21.70 22.30 22.20 20.20 19.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.45 0.71 -4.78 12 34 2009-10-29 09:18:49 2009-10-29 09:18:49 2 2 34 0 15 37 6 187.40 38 93.73 CHANGED Msp................ptpshGFGhsss.ssHpFhV.IPsu+spsVhlhEpauhpuGpsup.................ss...t.RspLs+ptWptlucslcpcFNpRL+cpphpsuRW.KsGcN.V-.RLLGKELsVLsWAlE.sAss-plPsAlpNWpuL+PEERWWLashTsAtsGtspcst.pGWR+AlRhALs-sP................t...h.s+p.h.............spscppphshhcp ................tttslGFGhsssps.HHFhV.IPp.upss..sVhlhEpashpssppsp..............................................ps..h.+shls+.pWptItstlppcFNtRL+pcshpsu+W..KsGpN...sVc.RLLGKELsVLsWAlE..cs.sscplPlAl+NW.uL+PEERWWLasMTsAuTGts.pDpt.+GWRhALRaALs-sP.................sp....p................p..................................... 1 8 10 13 +12470 PF12636 DUF3781 Protein of unknown function (DUF3781) Bateman A agb Jackhmmer:Q17ZV8 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 82 and 98 amino acids in length. There are two conserved sequence motifs: GKNWY and ITA. 27.00 27.00 28.10 32.00 24.10 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.26 0.72 -4.02 19 84 2009-10-30 16:24:37 2009-10-30 16:24:37 2 6 80 0 15 75 3 72.10 47 72.53 CHANGED LLpNl-.+lHTT-LGh.RI++NLsLcss-V.l-...aCKpKIhs.ssAhIp++GKNWYsps-s..shlTlNAaSYTIITAH .....LLpNl-.+LHTT-LGhhRI++NLuL..ssp-V.lp...aCKpKIhs.ssupIpR+GKNWYlps-s..hhITlNAhSYTlITAH....... 0 10 14 14 +12471 PF12637 TSCPD TSCPD domain Bateman A agb Jackhmmer:Q17ZZ5 Domain This family of proteins is found in bacteria, archaea and viruses. The domain is found in isolation in many proteins where it has a conserved C-terminal motif TSCPD after which the domain is named. Most copies of the domain possess 4 conserved cysteines that may be part of an Iron-sulfur cluster. This domain is found at the C-terminus of some ribonucleoside-diphosphate reductase enzymes. 21.60 21.60 21.60 22.60 21.50 21.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.07 0.72 -4.04 149 1269 2009-10-30 17:23:21 2009-10-30 16:52:16 2 33 1088 0 416 1079 1406 91.80 27 13.21 CHANGED acsps.sC....shhlslsh-.t.s...........lhtslshsGGC.suphpulu+Ll.......pGhs...lc-....llcpLcGIpCsspss.....................SCPDtlucALcphh .................................s..sp...phYlshsth...t..c...G........hElFl.s.htptG....us.tuhhpuhuthlShsLp.....hGss......l-p....hlcphpslchtssGhh.t............t.h.SlhDhlh+tLt...h..................... 0 186 331 383 +12472 PF12638 Staygreen Staygreen protein Bateman A agb Jackhmmer:Q181L4 Domain This family of proteins have been implicated in chlorophyll degradation [1,2]. Intriguingly members of this family are also found in non-photosynthetic bacteria. 27.00 27.00 27.60 27.50 23.10 19.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.96 0.71 -4.67 21 155 2009-11-02 14:20:47 2009-11-02 14:20:47 2 3 98 0 61 150 2 143.00 41 65.43 CHANGED spFsssKLpVpFhsstspstPh.....hPRpYTLTHsDhTucLhLsIupshshs.pl....sch.RDEVlAEW.pc.psp..hsL+VasaVS.GuphhhshuAp.RahIFp+ELPLlLcAlhaGDpsLFppaPcL.sA.VaVaFcSs..hP.....pa............s+hEsWGslp-hu .........s.hFpssKLpV.Fhs..sspp..ppPh................hsRpYTLTHSDhTucLhLsIupshshs.pl.....sphtRDEVluEW.+csp.......sp........hsL+Vasalu.GsphhhchssthRahIFp+ELPlsLcAlhaGDpshFspaP-Ltpu.laVaFpSs..hs.ca...................N+lEsWGslp-h.s................... 0 18 48 54 +12473 PF12639 Colicin-DNase Colicin-DNAse; DNase/tRNase domain of colicin-like bacteriocin Coggill P pcc pdb_1bxi Domain Colicin-like bacteriocins are complex structures with an N-terminal beta-barrel translocation domain (Pfam:PF09000), a long double-alpha-helical receptor-binding domain (Pfam:PF11570) and this C-terminal RNAse/DNase domain with endonuclease activity. Their competitor bacteriocidal action is by a process that involves binding to a surface receptor, entering the cell, and, finally, killing it. The lethal action of colicin E3 is a specific cleavage in the ribosomal decoding A site. The crystal structure of colicin E3 reveals a Y-shaped molecule with the receptor binding domain forming a 100 Angstrom long stalk and the two globular heads of the translocation domain and this catalytic domain comprising the two arms [2]. 21.50 21.50 21.60 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.87 0.72 -3.81 57 525 2012-10-05 18:28:12 2009-11-02 18:40:55 2 27 407 56 43 461 2 108.50 29 25.68 CHANGED sshspsG................a.ths...sthss.ls.phtcpLp.....ucchspFcphpcthhpsl...............tpsspltppFstpphpth.psthsP...............ppaphHHppp..tG.......shpllsschHtp.....hHpG ...............................................................................................................................................................................................................c..pphsphRKphhcsV...............ucss-huup.....F...s....s.....c....s....lt..ph.+p...GpsP...........................................................hsYslHH+hslpsGGss.ch-NlhLlpsc.Hcc........................ 0 5 14 22 +12474 PF12640 UPF0489 UPF0489 domain Bateman A agb Jackhmmer:Q181H4 Domain This family is probably an enzyme which is related to the Arginase family. 27.00 27.00 28.50 27.80 25.00 23.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.29 0.71 -4.04 34 190 2012-10-01 22:40:15 2009-11-03 13:56:57 2 4 158 0 102 176 2 174.40 25 49.67 CHANGED lall-sHccshhhWhctlppt.h...pshpllHlDpHsDhhhsh...................phshphshpt......t......phshctlsh.-saI..hsAlhsuhlsclhhlppshs................................................hh.t...h.....t....hpphptht.htshshpp.hhpt..sh...........................................................................p.ppsalLDIDLDaF ...................lalV-sHpcsl..........ahhcsltppcls..pshph..lHhDuHsDhhlPh......................................................shssssshsc...................cthhspLs.I..-NaI....hPulauGah.s.clhalcssau...............................................p..hthst...hhhspt.h.s........ttp.......hpp.h.hp.hps....h.hps......t................................................................................................................................................................................................................................s..psalLDlDLDaF.............................................................. 0 38 53 78 +12475 PF12641 Flavodoxin_3 Flavodoxin domain Bateman A agb Jackhmmer:Q180M7 Domain This family represents a flavodoxin domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.82 0.71 -4.81 44 396 2012-10-03 05:08:30 2009-11-03 16:27:47 2 6 311 0 78 1420 207 144.70 24 88.08 CHANGED hllYSShTGNT+plAcuItpslss..pshhshpcsps.....hsph..DllhlGFWsDKGssspchtcalps.L+sK+lhlFGThGhsssscahpphlppspphlsps.NpllGpFhCQGKhs.plpc+acph....tpstpptthsphlppa-pAhsHPDpsDlppucphsc ..........................llY.s.S.h.o..GNT+plAcsI....tctlss...........tph.h.....s.h..p..ps.ss.....................hpsa....D.h.l.hlG...h......W......s.........D..c..G......p......s......s...t..c.........h......p....c...a....l.......p.....p.....l......c.....s.....K....p....l.....h..lFu...T..h..G....s..t........s..p..p..a.h...p..p...h...h.p.p.h....t....p.h..hs...pt...sphh...s...th...hs..p......G...t.h.s..t........h.thh.t.......................t.h..s..cP.s..ch..h.....t......................................................................................................... 0 30 61 71 +12476 PF12642 TpcC Conjugative transposon protein TcpC Bateman A agb Jackhmmer:Q180I0 Family This family of proteins are annotated as conjugative transposon protein TcpC. The transfer clostridial plasmid (tcp) locus is part of some conjugative antibiotic resistance and virulence plasmids. TcpC was one of five genes whose products had low-level sequence identity to Tn916 proteins, having similarity to ORF13 homologues from Tn916, Tn5397, and CW459tet [1].\ This family of proteins is found in bacteria. Proteins in this family are typically between 302 and 351 amino acids in length. 27.00 27.00 28.00 27.80 26.90 26.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.47 0.70 -4.81 47 781 2009-11-04 11:35:59 2009-11-04 11:35:59 2 2 512 6 62 457 7 223.70 29 72.09 CHANGED saspsFsppYho....tspcsh-pRtppLp.tYlspphp..ststhpt.......sppltssplhslcpp.........cpthhlphpVphphs..............ptcptpp....................pthsVPVth....pssshslsu.Pshss.hsppushpspttpscsshst.tt......pclpcFLpsFFchYssuspp-lshhhpsss......slsssh...th........tclsssphhpppsshpsslt............Vpah-pt.....optphsppasLpLp.cp....sspahlpc ..............................................................................FscsFscsYao..hppspcsh-pRhppLp.tYLsp-hpshs.cts+pt......suslpshplhslcpp..............cspaslpapVc.pls.......................pscptpslp............................ssapVslhh.........tssshlllp.Pshss..hPp.p..u..s.....hpsK.th.-scsss-stss.........pclspFLpsFFchYsoust..p-.LuYasssshh.....slstph....ha...........pclsss.h..hhcc.....s.sps.hlsls............VpahDpp...............oc.sTpsp.pasLsLp..Kp.....sssWhIh................... 0 30 44 53 +12477 PF12643 MazG-like MazG-like family Bateman A agb Jackhmmer:Q181R3 Domain This family of short proteins are distantly related to the MazG enzyme. This suggests that these proteins are enzymes that catalyse a related reaction. 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.18 0.72 -4.06 25 305 2012-10-01 21:36:44 2009-11-04 14:23:11 2 3 298 0 61 203 5 93.40 35 86.29 CHANGED Ih+NlKhIEtLKuELLs......pluclF+hLs+Gup...p-uIl-sluslIllsYlLucRLGhsapclDcslccKL+luIhEccc.lEK.ht-LScLtp+L......cc+c ..........................................tRNh+.hcs.Ks.hlu......-LhEL...Fphhosp.p......s...tp......t..E..cIt-ELADllIYsYhlADpLGhD....lDEhlccKLccs.t.h.chs...Ec.................................. 0 25 45 57 +12478 PF12644 DUF3782 Protein of unknown function (DUF3782) Bateman A agb Bateman A Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 91 and 186 amino acids in length. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.06 0.72 -4.18 54 159 2009-11-04 17:31:12 2009-11-04 17:31:12 2 7 87 0 50 145 10 79.80 20 52.38 CHANGED pchtchpc...........................................chtphpcclpph................................................................hsccphchlp........................chhstlstthshtsEpsa+pGhp-slphhtt ....................................................................................................................................................................th.c...............................phhthpccLpph...................................................................hocEs....hchlp..............................phhstlutths..sEtta+.Ghpthlp.......................................................................................... 0 17 23 28 +12479 PF12645 HTH_16 Helix-turn-helix domain Bateman A agb Jackhmmer:Q180H2 Domain This domain appears to be a helix-turn-helix domain suggesting that this might be a transcriptional regulatory protein. Some members of this family are annotated as conjugative transposon domains. 21.60 21.60 21.60 21.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.88 0.72 -4.04 52 618 2012-10-04 14:01:12 2009-11-05 10:29:19 2 3 295 0 35 325 44 61.90 36 77.88 CHANGED aplIht.AspGDspAlpplLpHYcuYIsphsh+.hhc.chGpshhtlD--l+pclct+Llps.llpFc ........a.lIhp.ApcGDspAlpplLp+YcuYIs+hshR.hhs.-hGphphhVDEph+pclcp+LIpt.ILpFc........................ 0 17 28 28 +12480 PF12646 DUF3783 Domain of unknown function (DUF3783) Bateman A agb Jackhmmer:Q180F4 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length. 27.00 27.00 30.80 30.00 25.30 22.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.59 0.72 -4.22 69 284 2009-11-05 10:33:20 2009-11-05 10:33:20 2 1 262 0 72 251 7 58.20 26 39.88 CHANGED pchllhssh.ssp..clcphlpth+ct....th..phslhAslT.sNhpWshppLh.cEltcE+chhp ...chllhssh.ssp...clpthlpsh+ct....tl...phslhAslT.sshsWshppLl.cElhcE+chhp... 0 35 57 68 +12481 PF12647 RNHCP RNHCP domain Bateman A agb Jackhmmer: Domain This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 143 amino acids in length. There is a conserved RNHCP sequence motif. 24.00 24.00 25.00 37.20 23.20 23.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.97 0.72 -10.73 0.72 -4.16 27 115 2009-11-05 10:59:04 2009-11-05 10:59:04 2 2 95 0 46 118 8 87.30 46 61.11 CHANGED spsssFpChtCGhtVsstusGos+RNHCPsCLpShHlD.phPGDRAus.CtGhMcPlulhsRpsG-WsllHRCppCGcLspN.....RluuDDN.hhLhp ..s.tpsFhChtCGh.V.s.usGotaRNHCPpCLhShHVD...PGDRuus.CtGhM-PlulhV+psG-WhllHRCppCGpLssN......RlAuDDN.hhLh................. 0 20 36 44 +12482 PF12648 TcpE TcpE family Bateman A agb Jackhmmer:Q180I5 Domain This family of proteins includes TcpE a conjugative transposon membrane protein.This family of proteins is found in bacteria. Proteins in this family are typically between 122 and 168 amino acids in length. 22.20 22.20 22.90 22.50 21.90 21.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.39 0.72 -3.90 31 672 2009-11-05 14:27:45 2009-11-05 14:27:45 2 2 429 0 44 272 10 98.40 39 70.53 CHANGED sYsphaphphhlYpItc.hpLPh.s..lshpph....shFllhhlslhlhhtlhs.........htpshhhhhh.hlPhhlshhhsphch-GKphhtalhshlpahhch+hpppthhht .............sYsphaph.hVlYtIsD..hpLPh.s......lshsph....saFll..h.hhl.hlh.hpl.s.s............hhpGsh.L.pYh.slPsslTaahopKpFDGKKsasFL+uhls.Yhhc.Klshsst...p........ 0 23 33 38 +12484 PF12650 DUF3784 Domain of unknown function (DUF3784) Bateman A agb Jackhmmer:Q180M1 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 96 and 110 amino acids in length. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.35 0.72 -4.05 48 316 2012-10-01 21:33:42 2009-11-06 14:28:11 2 2 238 0 39 256 75 92.90 24 80.06 CHANGED hhlull....hhllulh..lhpsKushLloGaNohsccE+cchDpppls+hhGphhhhhul..lhllsul....hthhhsph........hslhhhllhllhhhhhlhhsschp ...................hlull..hhhluhh.......ltst+tshLluGaN....cE+chhDcp+Ls..+hhGhhhh.lhul..lhhltul....hthhhsph............hhhhhh.hlhllshllhhhhss...h.......................... 0 18 31 33 +12485 PF12651 RHH_3 Ribbon-helix-helix domain Bateman A agb Jackhmmer:Q17ZT4 Domain This short bacterial protein contains a ribbon-helix-helix domain that is likely to be DNA-binding. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.70 0.72 -4.23 28 354 2012-10-02 18:44:02 2009-11-06 14:36:52 2 4 316 0 62 379 17 42.50 33 41.58 CHANGED pRpphosolsp-Lhp+LcpLSccTpIPhS+LlDEAl-hLLc+Yc ..........hpphohplspElhp+LDslScsp.u.l.sKScllcEAlptaLpph........... 0 25 39 51 +12486 PF12652 CotJB CotJB protein Bateman A agb Jackhmmer:Q181Y6 Domain CotJ is a sigma E-controlled operon involved in the spore coat of Bacillus subtilis [1]. This protein has been identified as a spore coat protein [2]. 29.00 29.00 29.00 31.20 28.80 28.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.67 0.72 -3.90 71 380 2009-11-09 12:44:13 2009-11-09 12:44:13 2 2 344 0 68 260 3 77.60 41 85.47 CHANGED cppLLppIptlsFull-lsLYLDTHPsDppAlp.apphspptpphhcpYpppaGPLs.thsss..st.............p.W.sWlpsPWP.....W- ...h.hpLLcplppl-FsllELsLYLDTHPcDppAlppaNphupppppLpppaEppYGPLppaGs.u...s.p.............ssW.pWscsPWPWp....... 0 36 54 59 +12487 PF12653 DUF3785 Protein of unknown function (DUF3785) Bateman A agb Jackhmmer:Q181X2 Domain This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. These proteins share two CXXC motifs suggesting these are zinc binding proteins. This protein is found in clostridia in an operon with three signalling proteins, suggesting this protein may be a DNA-binding transcription regulator downstream of an as yet unknown signalling pathway (Bateman A pers obs). 27.00 27.00 186.90 186.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.89 0.71 -4.32 8 29 2009-11-09 15:44:53 2009-11-09 15:44:53 2 1 29 0 4 16 0 136.60 56 100.00 CHANGED M-.YKFsYD-KEYlLsc-NCsshFND...EpcElcGlSl-cILchLspuEEVsFupEYYp-sCs.ChsGhEEKpKhFsFLEYHFYIYTK-sKYVISsIsKEY-spSFNKLhRAsKVDcSYIVSllVCtNCGsYsIpIEpCpV ..h..aKFsaD-KEYhLsE-ph.hhFN-...t.t-VcGhsI-Klh-ILNpuEtVsFuptYYpssC..ChtGlEEKKK.FPFLEaaFaIYoKsGcaVISNIpK-YcGLSaNKLhRspKVDcSYlVslsVCcNCGsasVplEphpV. 0 2 4 4 +12488 PF12654 DUF3786 Domain of unknown function (DUF3786) Assefa S, Coggill P, Bateman A agb Pfam-B_16102 (release 23.0) Domain This presumed domain is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 201 and 257 amino acids in length. Some proteins also contains an iron-sulfur cluster. 27.00 27.00 27.70 50.20 22.80 20.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.88 0.71 -5.01 53 182 2009-11-09 17:15:17 2009-11-09 17:15:17 2 2 71 0 79 186 21 177.60 23 80.83 CHANGED hDhpplupphGs.h.t.ptpt...lpl.hhGpsaplshsshthsstcsssh..........hpllllcYLhpus.sts.ss.cWlsa+-lssGt.ah..ssFppc.s.psLschassp.hcphppssctLGGp.hstu......DhuhhhpslP+lPlhllhWpu..D--FPupuslLFDpssspaL.ssEslhsluthlsphL ....................h.s.tplupphusth...p.pt...htlpahuppatlshssspl...hstpssth...................................hplllLpYLhpup..sh...s..ss.caloa+El.s.s.G.thah..ssFppp.shpsLtchFusp.hcth.ppsspp...LGGp.hshu......Dhuh..hhpshPclPltllhWpu..D-EFPususlLFDssssphL.ssEslhslushlsth.................................... 0 61 79 79 +12489 PF12655 DUF3787 Domain of unknown function (DUF3787) Bateman A agb Jackhmmer:Q185C1 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in Clostridia. Proteins in this family are approximately 60 amino acids in length. There is a conserved TAAW sequence motif that may be functionally important. 20.70 20.70 21.80 23.00 20.00 18.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.57 0.72 -4.24 14 76 2009-11-11 17:47:45 2009-11-11 17:47:45 2 1 75 0 24 52 0 51.40 47 87.27 CHANGED pcpKcphhshPlEpHsTAAW.ANIcchKshSpVsIPoEppVcNAK-WVDsNpK .......................pppKcphhthPIEpHcTAAW.ANIpphKPpSsVsIPSE.pVpNAKEWVDsNpK.... 0 15 21 23 +12490 PF12656 G-patch_2 DExH-box splicing factor binding site Wood V, Coggill P pcc Pfam-B_900 (release 24.0) Domain Yeast Spp2, a G-patch protein and spliceosome component, interacts with the ATP-dependent DExH-box splicing factor Prp2 [1]. As this interaction involves the G-patch sequence in Spp2 and is required for the recruitment of Prp2 to the spliceosome before the first catalytic step of splicing, it is proposed that Spp2 might be an accessory factor that confers spliceosome specificity on Prp2 [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.93 0.72 -4.13 74 304 2012-10-01 21:03:39 2009-11-12 14:11:33 2 12 256 0 228 1568 9 76.80 33 18.52 CHANGED tp.spppthcpclsstP-pssh-..-YcplPVEpFGtA..hLRGMGWc......tupshu+sptt.................ht.ppRss.tLGLGAcshtsp ............................................ht.....tt.p.chttt...s.-pssh-........-Y.c..s.....lP..V.......E......p..F.......GhA...hLRGMGWc......tGpshG+.s.t.tt..............sts.h...s.phRPt..tLGLGAc.h..s.................................... 0 78 129 190 +12491 PF12657 TFIIIC_delta Transcription factor IIIC subunit delta N-term Coggill P pcc Pfam-B_74169 Domain In humans there are six subunits of transcription factor IIIC, and this one is the 90 kDa subunit; whereas in fungi the complex resolves into nine different subunits and this is No. 9 in yeasts [1]. The whole subunit is involved in RNA polymerase III-mediated transcription. It is possible that this N-terminal domain interacts with TFIIIC subunit 8 [2]. 27.00 27.00 27.10 27.40 26.90 26.40 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.30 0.71 -4.55 35 186 2009-11-13 16:10:32 2009-11-13 16:10:32 2 15 151 0 131 188 0 162.20 21 22.39 CHANGED pshPoshssluWSsDGclAlusuctVpl.L...........sPp.......................pt..ttspshsssshpaphsphcsshh.s.pphP.................thhspshsh.hs.t........t-.psu..........plhuluWSP.GLup...p.......RClLAVLToshtLolapsstt...pucWsclsslschLt.....................................t..t....tts.hh.........pc.plpuhsWs ........................................................................p...ssh.pslsWSpDs.pluVsssc.ltlL.............sPt..............................................................t.t..ts.t.....t........ththh...psp.h..s....st.s.......................................th..t.h.s................t.t...................thhthuWSP.G.hsss..t.........................+ClLAsLTssspLslats.t....p.pWh..pl.sslschhh........................................t....st.............p....t......thta................................................................ 0 30 65 100 +12492 PF12658 Ten1 Telomere capping, CST complex subunit Coggill P pcc Wood V Domain Stn1 and Ten1 are DNA-binding proteins with specificity for telomeric DNA substrates and both protect chromosome termini from unregulated resection and regulate telomere length. Stn1 complexes with Ten1 and Cdc13 to function as a telomere-specific replication protein A (RPA)-like complex [1]. These three interacting proteins associate with the telomeric overhang in budding yeast, whereas a single protein known as Pot1 (protection of telomeres-1) performs this function in fission yeast, and a two-subunit complex consisting of POT1 and TPP1 associates with telomeric ssDNA in humans. S.pombe has Stn1- and Ten1-like proteins that are essential for chromosome end protection. Stn1 orthologues exist in all species that have Pot1, whereas Ten1-like proteins can be found in all fungi. Fission yeast Stn1 and Ten1 localise at telomeres in a manner that correlates with the length of the ssDNA overhang, suggesting that they specifically associate with the telomeric ssDNA. Two separate protein complexes are required for chromosome end protection in fission yeast. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution [2]. Ten1 is one of the three components of the CST complex, which, in conjunction with the Shelterin complex helps protect telomeres from attack by DNA-repair mechanisms [4]. 21.60 21.60 21.70 21.60 21.50 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.58 0.71 -4.36 27 90 2012-10-03 20:18:03 2009-11-13 17:26:38 2 2 88 4 62 79 0 132.90 26 80.45 CHANGED Mo....sP.ssphh..............Fh.pphsshstsp+lRhLu.................sVppYshssG...pLhL-+..shst.......t.t.ss.....lsVDlshlLss.lpsc.................clplGsWlNllGYlctpt.............................................................hVcAlhlhsusulplscY..cclLp .............................Mo....sPhs.p.h..............hhpp.sthtsup+lRhLu.................CVssYshtsG...pLhLc+.......shsh..........................ptt.sp.....................spVDlpllLps..lpsp............................plplGsWlNllGYlpttt.........................................................tpsshlpAlhlhssushcltcY.pchl....................... 0 9 30 49 +12493 PF12659 Stn1_C Telomere capping C-terminal wHTH Coggill P pcc Wood V Domain This domain consists of tandem winged helix-turn-helix motifs. Stn1 and Ten1 are DNA-binding proteins with specificity for telomeric DNA substrates and both protect chromosome termini from unregulated resection and regulate telomere length. Stn1 complexes with Ten1 and Cdc13 to function as a telomere-specific replication protein A (RPA)-like complex [1]. These three interacting proteins associate with the telomeric overhang in budding yeast, whereas a single protein known as Pot1 (protection of telomeres-1) performs this function in fission yeast, and a two-subunit complex consisting of POT1 and TPP1 associates with telomeric ssDNA in humans. S.pombe has Stn1- and Ten1-like proteins that are essential for chromosome end protection. Stn1 orthologues exist in all species that have Pot1, whereas Ten1-like proteins can be found in all fungi. Fission yeast Stn1 and Ten1 localise at telomeres in a manner that correlates with the length of the ssDNA overhang, suggesting that they specifically associate with the telomeric ssDNA. Two separate protein complexes are required for chromosome end protection in fission yeast. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution [2]. 25.00 25.00 26.40 49.70 21.90 19.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.61 0.71 -4.31 10 23 2009-11-13 17:32:48 2009-11-13 17:32:48 2 1 23 2 15 23 0 124.20 41 26.69 CHANGED hKuhDslctcsap-lLspLsppGLIslcupop..lcLhsLKsla-Ysp+RIusLlKLQshTGslchs+Vpp..+LphPhhopphIVDlaKEsL++hptt.splLpsWWI-hcs+sth.......hlHFpYspup ............hKShDsLcpEsFcDlls+LlspGLIsLcs+os..hDLhPLKsLa-YspKRIslLhKLQChTGTlplo+Vpc..KLclPhlTspuIVDlFKEsLK+tpKphPplLKsWWIDLcscsth.......llHLEYsts.h.. 0 2 7 13 +12494 PF12660 zf-TFIIIC Putative zinc-finger of transcription factor IIIC complex Coggill P pcc Maraia R, Wood V Domain This zinc-finger domain is at the very C-terminus of a number of different TFIIIC subunit proteins. This domain might be involved in protein-DNA and/or protein-protein interactions [1]. 22.10 22.10 22.50 22.70 22.00 21.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -11.02 0.72 -4.09 16 171 2009-11-13 18:18:58 2009-11-13 18:18:58 2 11 153 2 124 170 0 90.00 25 12.66 CHANGED +shpshps.u........EpCslC......cuslsasssppApCssGHhWhRCuLTFhulQsssh.+hCslCs.sthhstshttspp.....................hchLhpshssCh..aCuuchh ..........................hsp..................EpCshC..........pt...l..s.hps.hcp......uh.CssGHh.ahRCslThhslps.th..+hC.lss.phshp..t....ts....t..............................................................................hphL...p.t....C.aCss...h............................................ 0 30 57 92 +12496 PF12661 hEGF Human growth factor-like EGF Wouters M, Coggill P pcc Wouters M Domain hEGF, or human growth factor-like EGF, domains have six conserved residues disulfide-bonded into the characteristic 'ababcc' pattern. They are involved in growth and proliferation of cells, in proteins of the Notch/Delta pathway, neurogulin and selectins. hEGFs are also found in mosaic proteins with four-disulfide laminin EGFs such as aggrecan and perlecan. The core fold of the EGF domain consists of two small beta-hairpins packed against each other. Two major structural variants have been identified based on the structural context of the C-terminal Cys residue of disulfide 'c' in the C-terminal hairpin: hEGFs and cEGFs. In hEGFs the C-terminal thiol resides in the beta-turn, resulting in shorter loop-lengths between the Cys residues of disulfide 'c', typically C[8-9]XC. These shorter loop-lengths are also typical of the four-disulfide EGF domains, laminin ad integrin. Tandem hEGF domains have six linking residues between terminal cysteines of adjacent domains. hEGF domains may or may not bind calcium in the linker region. hEGF domains with the consensus motif CXD4X[F,Y]XCXC are hydroxylated exclusively in the Asp residue. 18.00 13.60 18.00 13.60 17.90 13.50 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.16 0.73 -5.96 0.73 -3.51 521 6130 2012-10-03 09:47:55 2009-11-16 12:53:17 2 1671 227 11 3408 13199 236 13.00 50 2.13 CHANGED pCpCssGaoGspC ......pChCssGaoGtpC.. 0 1352 1597 2396 +12497 PF12662 cEGF Complement Clr-like EGF-like Wouters M, Coggill P pcc Wouters M Domain cEGF, or complement Clr-like EGF, domains have six conserved cysteine residues disulfide-bonded into the characteristic pattern 'ababcc'. They are found in blood coagulation proteins such as fibrillin, Clr and Cls, thrombomodulin, and the LDL receptor. The core fold of the EGF domain consists of two small beta-hairpins packed against each other. Two major structural variants have been identified based on the structural context of the C-terminal cysteine residue of disulfide 'c' in the C-terminal hairpin: hEGFs and cEGFs. In cEGFs the C-terminal thiol resides on the C-terminal beta-sheet, resulting in long loop-lengths between the cysteine residues of disulfide 'c', typically C[10+]XC. These longer loop-lengths may have arisen by selective cysteine loss from a four-disulfide EGF template such as laminin or integrin. Tandem cEGF domains have five linking residues between terminal cysteines of adjacent domains. cEGF domains may or may not bind calcium in the linker region. cEGF domains with the consensus motif CXN4X[F,Y]XCXC are hydroxylated exclusively on the asparagine residue. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.20 0.72 -4.29 733 2407 2012-10-03 09:47:55 2009-11-16 13:04:43 2 940 145 0 1458 7661 32 23.80 49 2.31 CHANGED SapC..s...C..sGYp...........h.........s.-u.ps.........CpDIDE ......SapC.....p.....C...sGap...........Ls.........s.DG.+s.........C.p.DIDE........ 0 669 767 1040 +12498 PF12663 DUF3788 Protein of unknown function (DUF3788) Bateman A agb Jackhmmer:Q189D9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 137 and 149 amino acids in length. This family may be distantly related to RelE proteins. 25.00 25.00 27.40 27.10 23.30 21.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.33 0.71 -4.48 39 174 2009-11-19 14:09:27 2009-11-19 14:09:27 2 5 124 0 34 153 9 124.80 26 87.02 CHANGED sppphPo.cplpsalGpsh...apclpphlpppYt..hph-ashsutphGWslKa+..KtsKsLCslasccshFsshlhlGp+ctpch-thh.shostlpchacpspshss.G+Wlhl-lpspshlp.DlhcLltlKt ...............pphPs.cpltthlup.sh...appLpphlpppYs...phcashsutphGWshK..Y+.........KtuKsLCslasccshFsshlhlGccctt..ph-thh.phos.spclaccspshss..G+..WLhhclpspshhp.DlhcLlpl+p......... 0 18 30 32 +12499 PF12664 DUF3789 Protein of unknown function (DUF3789) Bateman A agb Jackhmmer:Q18DB3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two completely conserved residues (V and C) that may be functionally important. 22.00 22.00 23.10 22.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.34 0.72 -4.63 11 148 2009-11-19 14:19:49 2009-11-19 14:19:49 2 1 103 0 8 89 6 33.20 46 71.96 CHANGED pllpDhLLsohGsslGVslMCllpsGKtADccMc ......llKDhLLs.hGuhlGVslMClhpsuptADptMc...... 0 4 7 7 +12501 PF12666 PrgI PrgI family protein Bateman A agb Jackhmmer:Q187G3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 116 and 146 amino acids in length. This protein is found in an operon that is part of a Type IV secretion system. 27.00 27.00 27.30 27.20 26.90 26.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.04 0.72 -3.84 74 644 2009-11-20 16:12:33 2009-11-20 16:12:33 2 2 377 0 81 480 60 93.20 27 64.99 CHANGED pVPKDls+.hcsKlhhsL.ThRQLlshuhushlGlshahhh.+thl........u.........sshushlhlhshlP....hhhhuhachcG.hshEchltthlchc.lps+htsapo ..........VsKDls+.hcpKlhhsL.TpRQllhhssushl.u.ls.lahhh...pthh.................s.......sshuhhh...hl.hsslP....hhhhuhach..cG..hs....hEchlthhl+hc..hpscpRsat................. 0 46 65 75 +12502 PF12667 NigD NigD-like protein Bateman A agb JCSG target Q5LAY5_BACFN Family This family of proteins is functionally uncharacterised. This family of proteins is found in Bacteroides species. Proteins in this family are typically between 234 and 260 amino acids in length. These proteins possess an N-terminal lipoprotein attachment site. The family includes NigD a protein found in the Nig operon that encodes a bacteriocin called nigrescin. It has been suggested that NigD may be the immunity protein for nigrescin (NigC) because it is directly downstream [1]. 21.00 21.00 23.50 23.20 18.80 18.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.50 0.71 -4.51 55 331 2009-11-21 13:39:49 2009-11-21 13:39:49 2 1 107 19 39 282 1 190.80 20 78.62 CHANGED hsTspssss...t.........htsDc..GsplhPsssss.........hssssupRshss..a....p.htpt.......sshshslplhtlpsllopsst.......hssp...p.p.thusDPltlhsh........Wh...s..ttYLslhaphphs..........sptppHhhsLlhsp.............tt.tsssshlpLpLRHs.....s.u..Ds.psthtp..u...........hlSasL.spl.sppttt.....ptlcl+hpsht.suctph.p ...............................................................s....ss...t.....hhthDc.us..pl..hPsssph............hpsspspRsls...a....s.hppt..........sshshslpl.ptlpsllotsht..........t.hssc...p.c.thusDPlplhsh...............Wl.....s..ssYLslhaphphs..........pptp+..HhlsLltsp.................t..spsshlpLpL+Hs......stu.....Ds.pshhtp..u...............hlSasL.spl..sptttt.....pslplphpsht.suc.p..th............................. 0 11 33 39 +12503 PF12668 DUF3791 Protein of unknown function (DUF3791) Bateman A agb Jackhmmer:Q184Z5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 71 and 125 amino acids in length. 21.90 21.90 22.80 22.10 21.70 21.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.87 0.72 -4.36 45 302 2009-11-21 15:27:16 2009-11-21 15:27:16 2 4 144 0 37 238 2 61.40 24 67.45 CHANGED lpFslh.....sIEshAcchshssp-shphhp.chsllc.aIhppY-sLHTput-all-Dlhchlcpps ................tahhh.....hIcphAcchslsspcshphhp.chs..llp.alhppY-sLHTpupcall--lhphlppp........ 0 18 33 37 +12504 PF12669 P12 Virus attachment protein p12 family Bateman A agb Jackhmmer:Q18C01 Family This family of proteins are related to Virus attachment protein p12 from the African swine fever virus. The family appears to contain an N-terminal signal peptide followed by a short cysteine rich region. The cysteine rich region is extremely variable and it is possible that only the N-terminal region is homologous. 33.50 33.50 33.50 33.50 33.30 33.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.85 0.72 -3.77 134 610 2009-11-21 15:48:40 2009-11-21 15:48:40 2 2 570 0 65 310 5 50.80 31 79.85 CHANGED slllslllhshsh.hh...l.hphh.+p.....p+p..G...........s.sC...u..C.........tssCtstt.......................................................pppc ......sIlI.shl.Ihuhsh..as..l.h+hh.K+..sKp...G........pCusCthsps.C............tCssp......................................pp...................... 0 25 52 58 +12505 PF12670 DUF3792 Protein of unknown function (DUF3792) Bateman A agb Jackhmmer:Q183N7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. These proteins are integral membrane proteins. 27.00 27.00 27.90 27.70 26.30 26.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.57 0.71 -4.23 64 372 2009-11-21 16:02:30 2009-11-21 16:02:30 2 1 364 0 85 268 3 115.70 27 90.89 CHANGED sttphtsllpGllhuhhlohlhhllhulllhhss....lsEphhshhhhhlhhloshhGGhhuu++spp+GalhGhhsGllYhlllhlluhlhhss.hhs.hphhhphlhshsuGslGGhlG ........p..hspslhhGllhshllshlh.hlhu.LlLpaos....lsEsslshhlhllsllShhhuGhhuG++sppKGWlhGhhsGlhahlllhLlshlshsp.shs.sptllhhlhhhsuusLGGllG................... 0 42 70 76 +12506 PF12671 Amidase_6 Putative amidase domain Bateman A agb Jackhmmer:Q18BM0 Domain \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.50 0.71 -4.18 83 491 2012-10-10 12:56:15 2009-11-21 16:27:14 2 11 364 0 128 483 20 158.20 30 49.09 CHANGED tYspttAlp....YAc+assst.................Nss.......YtsF.........uuD...................CTNFlSQsL....puGGh...................................W...................ttss..........Ws.......supshhpYltps.thstthuttshpth.....................GDllpachs.uss.............................sHsslVoshsspshs........llssHTss....phphshs......hhtstphpathh .................................................................YsRhpAVp....YAc+.ahsst....................................Nst.......YpsF................ssD...............................CTNFlSQsL.....cuGu.h.shsst...................................................W.............ppsthoh............u....Ws.......sA....cuhh.Yl...tss..tps..t...h.ht.tt..sht.p....t...................GDlItY-hp.scs.......th............................sHoslVsuh-spuhs...................LVssHoss......phph.as.....sh.p.ph.....h.......................................... 0 61 107 112 +12507 PF12672 DUF3793 Protein of unknown function (DUF3793) Bateman A agb Jackhmmer:Q18C06 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 211 amino acids in length. There are two conserved sequence motifs: PHE and LGYP. 22.80 22.80 39.80 39.30 21.00 19.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.12 0.71 -4.69 77 325 2009-11-21 16:43:30 2009-11-21 16:43:30 2 2 302 0 75 276 6 175.40 30 88.86 CHANGED lshpC...APsLuGlKsusLhshs.................htstcplhphhp.hst.h...spslphhhLp..pspsphllhlY+pptLcphLpcppspphLpph..GY....pstslpphLppLppRh......ppst...............................FPHEIGlFLGYPlcDVtGFIppsGpshhhsGhWKVYss.pcuhphFppacps+pthhph.htpG.pslpplsss ........hhhpsAPsLsGlKsusLlshs.................hp.ptppl.t...phhp.apptht....tpslphhhLp..ps..ppphllhlY+cctLpphLpppcsppaLpph..GY......pstslpphLppLppRh..........ppp.p........................FPHEIGlFLGYPlcDVtGFIpppGpshhh.sGhWKVYss.pcApchFppYcps+phhhph.htpG.hsltplh..s.......... 0 43 67 71 +12508 PF12673 DUF3794 Domain of unknown function (DUF3794) Bateman A agb Jackhmmer:Q185X2 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF01476. 21.70 21.70 21.70 22.00 21.50 21.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.91 0.72 -3.83 173 709 2009-11-22 14:22:34 2009-11-22 14:22:34 2 10 223 0 199 603 18 88.90 18 45.01 CHANGED sclt.cIlpspsp.lp..lpch.cl....hps..................................+lhlcGhlphpllYl................upcs....pp.......lpshp.tplsF..sphl-l.tusptsh.psp..lpspl-pl..ssslhss.c ................................................................plt.pllp.spsp.lp....lpch..cl......hps.............................................................KlhlcGhlphpll..Yh........................up-s..........pt...............lpshp..tplsFsphl-l...tusppsh..tsp.....hphp.lcp.h..phph....t.............................. 0 122 177 188 +12509 PF12674 Zn_ribbon_2 Putative zinc ribbon domain Bateman A agb Jackhmmer:Q18AK6 Domain This domain appears to be a zinc binding DNA-binding domain. 21.30 21.30 22.00 22.00 21.10 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.36 0.72 -3.64 52 343 2009-11-22 14:37:54 2009-11-22 14:37:54 2 16 275 0 77 278 17 80.40 35 52.03 CHANGED paCQSCGMPlspss...hGoptDGopsc-YCtYCYpcGpFst.phoh-EMI-hss.ahsctst.....hs.-pA+phhpphhPpLKRW+ .........aCQSCGMPhspss...hGoptDGopsc-YCtYCYcsGtFhp.phoh.-EMI-hsst..ahschst.....hst-pA+phhpphlPpLKRW+.................. 0 35 59 70 +12510 PF12675 DUF3795 Protein of unknown function (DUF3795) Bateman A agb Jackhmmer:Q186T2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 99 and 171 amino acids in length. This protein is likely to be zinc binding given the conserved cysteines. 22.60 22.60 23.90 23.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.92 0.72 -11.75 0.72 -3.80 72 322 2009-11-22 14:49:21 2009-11-22 14:49:21 2 10 173 0 78 292 7 73.50 32 51.97 CHANGED hhuhCGlsCstCstahtsp...............................................Cs.GCtstpt.........htsttCp..ltpC..stc+.slspCucCsc..a.P.Cphhpp ...............huhCGlsCs.tC.ta.ttt.................................................Cs.GCtstph..........tsppCp..l+pC..sp..p.K..sl..saCscCsc.a.P...Cchh.t................... 0 46 75 78 +12511 PF12676 DUF3796 Protein of unknown function (DUF3796) Bateman A agb Jackhmmer:Q185I0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 23.00 23.00 23.00 23.70 22.60 22.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.93 0.71 -3.88 13 204 2009-11-22 16:47:27 2009-11-22 16:47:27 2 1 179 0 17 150 2 106.10 38 77.59 CHANGED ps+luYL.GFlGFlGF...LG.h.FhsppshFs.hhaFsFFsFFsa...uKl...lsDELFhpcV+tAsopAFhVullhssIlllhl...hlhcNlcl....Iclhl....sIshslsLsTFllsLhla-hppcc.hpD.......pp ...............s...hhhh.GFlGFhGF...LG...a..tt..hh...hFsFFuFFo.a......+pV.........ptDEhF.p.lsKu..sp.uFllsLhshhIlhhI.....hlhss.sL.......phpI....slhhusLIhsFuhshhha-+.........s.................................. 0 6 10 12 +12512 PF12677 DUF3797 Domain of unknown function (DUF3797) Bateman A agb Jackhmmer:Q18AB4 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 50 amino acids in length. There is a conserved CGN sequence motif. 21.20 21.20 21.20 23.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.52 0.72 -4.34 9 99 2009-11-23 09:31:14 2009-11-23 09:31:14 2 2 89 0 3 54 0 53.50 77 43.88 CHANGED M..pshcslpLhpK...YspCPpCGN-plGNGEGsL...l-DDpFKRTCKCGapIc ....MDLIIQTFPLDGKTLYYVQCPVCKNNRILNSGANVSRIISDDTFRKLCGCTCDVK. 0 1 2 2 +12513 PF12678 zf-rbx1 RING-H2 zinc finger Coggill P pcc Wood V Family There are 8 cysteine/ histidine residues which are proposed to be the conserved residues involved in zinc binding. The protein, of which this domain is the conserved region, participates in diverse functions relevant to chromosome metabolism and cell cycle control [1]. 30.60 29.80 30.60 29.80 30.50 29.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.22 0.72 -3.87 110 903 2012-10-03 15:03:13 2009-11-23 17:28:56 2 21 332 18 628 6919 344 76.70 40 28.82 CHANGED +hplp.....c...apuVuhW.p.....Wshs....................-..........s..CuICRsph..ssC.....pCphss.................--...Csl..sh...Gt.CsHsFHhHCI..........pcWL..c..pps.......hCPlCp .........................................................................................htl+.c.WsuVuhW..s....WDl..........................................s.D..........s.....C.AICRscl...h-..hC...l...cC.pusptt............................................................t--....Csl...........sa...G...C.....s............H.s......FHh.cC.l..........ppWL....+....ppp..........sCPLsp............................................... 0 233 348 516 +12514 PF12679 ABC2_membrane_2 ABC-2 family transporter protein Bateman A agb Jackhmmer:Q189I9 Domain This family is related to the ABC-2 membrane transporter family [1]. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.86 0.70 -5.54 35 2420 2012-10-03 10:13:34 2009-11-24 09:49:33 2 14 1493 0 891 9805 1242 277.90 14 89.44 CHANGED hslhcpEapchl+s+phhllsllhhll.....................................hhhshhsphssthhsshtsstpshssh.hh.........................................................................................................hhthllslh........uhlhusshlutEhcpGTlchLLupPloRtpllluKhluthsh....lllshlluhll................................shshhhhhusshshtshhhhhhhshhhhhhh.......hululhlS.slh+..osstAhssululhhhhtllhshhshh......l..h..........................................................h.hl.Ptshhphhhpthhtst................................................................................h..ht.hlhhllhhllhlsluh....hhFp+cDl .............................................................................................................................................................................................................................................................................h..lhtpEhh.p.h..h...p...p...h...h..h......h....h..h.h....h...l.h.hhh............................................................................................................hh.h.h..h....h.......t..h.........s............h...............t....................t......h...........t....h................h....h............................................................................................................................................................................................................................................................................................................................hhh.h.l..h..s..l.h............s.h..h...l..u.......h.....s....h.....l......u......t....E....h..c.p..G.T.lc....h..L....L.s.p.P.l.oR.s..pl.lh...u....K....h..luh..h..h.h...............hh..l.s.h..l.h..u..hhh...............................................................s.h.h...h....h....h......h.......h....s.....s........s.......h.......s......h.....h......t....h......h....h.......h.......h......h....h...h....h...h...h....h..hhh..............h...h.u....l...u...h.....h..l.S...s..h....s..p......p....p....t..h....A..h..s...h....u....l..s.l.....h....h....h......h......h.....l...l..h..s..h....h...h....h.h..........h.......h.............................................................................................................................................................h...h...........h...s....h....h...p..h....h............t...h...h..t.s...............................................................................................................h...........h....h.h....h....h...h.h..h....h....l...h.h.h.lsh.......hhFt+t.............................................................................................................................................................................................................................................. 0 334 627 821 +12515 PF12680 SnoaL_2 SnoaL-like domain Bateman A, Finn RD agb Jackhmmer:Q11X43 Domain This family contains a large number of proteins that share the SnoaL fold. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.33 0.72 -3.61 623 6223 2012-10-03 02:27:24 2009-11-24 16:13:34 2 57 1993 66 2137 7931 2945 104.20 15 64.43 CHANGED l.pp...a.h.c....s.h.s......st...-...h....c....s.h....t........sh....h....u....sD.h.h...h....p.........ss.....sst...........hp.Gtp.....shtp........hhpthht.................hsshphp.lp.ph..........hss..G....c.........tlss..p.....hphph...........................sup.thph.................pshs..h..acl...c...c.G...+..Isch ...............................................ta.h.p....s.h..p........st...D....h.....c.....t.h....t........sh....h....s....sD..sh.....hp..............ps.....sst......................hp.Gp.p.....sltp.......hh.tphh.t........................th.s.s.h..p..h.p...h..p.ph............hss....G.....s.....................hs...hs...p......h.pht..........................................s.up.th.ph.................phhs...h..h.ph....c...s.G...+Ist......................................................................... 0 618 1329 1765 +12516 PF12681 Glyoxalase_2 Glyoxalase-like domain Bateman A agb Jackhmmer:Q187Q5 Domain This domain is related to the Glyoxalase domain Pfam:PF00903. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.39 0.72 -3.44 429 20226 2012-10-02 15:00:03 2009-11-24 17:23:30 2 76 3531 174 6614 23813 5121 112.40 17 76.73 CHANGED lhs..p..D..hpt..utpFYpph.l......Gh..p...htt...................pt...s....p..h......sh...h.ph......s................h..h.............................................t....h..tt.hh.....t......h..t...........................t...t.....ss..........ts........st........h..tl.....t..h.pl.......s.-....l..-s..h...hp...pltst.................G......sph..............h..p....s...s..t.......p...........hsh..............s........p...hhh..l..tDP...-Gshlplh .....................................................................................................................spD..hpp.u...h.pFY..p...ph...l.........G.h...phtt......................pt.s....s..h........sh..h..ph......s...........t.h..........................................................................................................................t...h...t...hh....t.......t..............................t...st....ss........ss..............st................hp.l............s.h.ts.............s..-...........l....-...s...h....hp.....c...l....tst..........................G.....sph..............................h..p....s...s..t.........p.............tsh...................................u...........p.......hh.h..h...tDP...-Gphlpl................................................................ 0 2073 4288 5609 +12517 PF12682 Flavodoxin_4 Flavodoxin Mistry J jm14 Manual Domain This is a family of flavodoxins. Flavodoxins are electron transfer proteins that carry a molecule of non-covalently bound FMN. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.05 0.71 -4.87 5 1133 2012-10-03 05:08:30 2009-11-25 13:20:28 2 15 627 3 273 1382 49 149.40 28 80.50 CHANGED KsLVAYFSso.....................GNTKhlAEhIAEtTGADLFEIcPscPYTctDLDas-ptSRSSVEMpDspuRPAluscl.shEDYDVlFlGFPlWWYsAPpIVsTFlEuYDFuGKpVIPFCTSGGSGlGsotKcLQ.pAsPcAolLEGptlsRGpl.......oRcpVs-Wlc+L ................................................................+hLlsYaSto.............................................G.sTctlAc.....hI.s......c......t.....h.......s.....u.......D......l.....h....c..I.p.....s...t...p...s...Y..s..p..............s.......a.p......s.t......h.......pp......u..p..t..............E.......h.....p..p..........s.t+..P..t....l.....p....s..p.....h.......s..h.sp....Y.......DslalGaPlW.at....p.hsh....sltoF...L....c...p....h.......-.......h.............s...G.K...pl..h....PF.s.T...p...u....G...o...u....hu..p.......s....h....pp.l...p....p.h.........t.....s.....s..s..p....h.h..p.Ghh..h.....t..s.t.....................t..t...lttWlt........................................................... 0 90 199 240 +12518 PF12683 DUF3798 Protein of unknown function (DUF3798) Bateman A agb Jackhmmer:Q183D5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 247 and 417 amino acids in length. Most of the proteins in this family have an N-terminal lipoprotein attachment site. These proteins have distant similarity to periplasmic ligand binding families such as Pfam:PF02608, which suggests that this family have a similar role. 21.00 21.00 21.80 34.00 20.70 20.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.40 0.70 -5.31 16 148 2009-11-26 10:12:10 2009-11-26 10:12:10 2 1 90 2 26 138 5 260.70 50 68.55 CHANGED ssa+IGllTuTlSQuEDpaRuA-thhccYGs................hIhpsTaPDNFssEhETTISplluLA-DPchKAIllspuhsGshsAhpKI+EcRPDIlhluusspE.......DsshhupsuDlshssD.lspGhslsptApchGAKsFlHhSFPRHhu.hpslupR+shMccsCc-LGlcFl-sssPDPsoDsGsuusQQFlhEclPchlcKY.GK-TAaFuTNsuhpEPll+plhEtGuhasptssPSPhhuYPuALGl-lstsctGDastlhcplpcKlsctGhsGRhG .....................................saHIGllTsoVSQSEDshRGAEtllc..cYGsssp...........GGhIpplThPDNFMpE.pETTISplluLADDPpMKAIVVspulPGTstAF+cI+EK..RPDILlhsupsHE.......DPshlsssADlsls..sDtluRGYlIltsA+chGAcpFlHISFPRHhS.YEhlu+RRsIMcpsCc.DLGhcalt.oAPDPsSDVGVsGAQQFILEpVPpWlpKY.GK-sAFFsTNDApTEPLLKplst.h.GGhFlEA-hPSPhMGYPGALGl-ho.c-tGsastILc+VEcsVlttGGsGRhG........... 0 17 25 26 +12519 PF12684 DUF3799 PDDEXK-like domain of unknown function (DUF3799) Bateman A agb Jackhmmer:JCSG target 392282 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 265 and 420 amino acids in length. It appears that these proteins are distantly related to the PDDEXK superfamily and so these domains are likely to be nucleases. This family has a C-terminal cysteine cluster similar to that found in Pfam:PF01930. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.58 0.70 -5.02 19 347 2012-10-11 20:44:46 2009-11-26 15:13:40 2 3 303 2 52 350 410 226.40 26 57.25 CHANGED cYhSsSpaKpF......hp....CEAtAhApLpGcWp......ppsssALLVGNYVHS.YFES.csc-pFhc.......pcschhoph.....pKGpL+u-FphA-pMIpsLcsDchFtphYp.G......-KElIlTG-lhGl.WKuKlDslNhpcshFl.DLKTsc.sl+.pchWsp..t......ssFlctasY.lQhAlYpEl..lcQ....phG.cphpPhIhAVoKpssPD+sllplsp....phhctuLcplc.pslp+lhpVhpGc.cPs+CG+C-YCRus ..................................................................................................................................................hShS.hcph......h.p......s.A.hhhhc..h....pu...hp.......ppcsps.L.lGshhHs....h.h.Espc......hpp.cFhh............tppp.chh.ppst..........ppshh.tc....-.h...c..t.h...-hMh...ps..l..ht..pshh..p.....h..l..hp...G...................Epp..l..h..h..p..sp.....s.G...l..h....h+s+.Dp.l.....s.......-.......h..p....h....h.....h.D.lK..TTt..D..lp..................pa.....tshh.satYclQhAhYp-h..hct......ph..G..p......t.h.p.h..l..h.lul.Sc.p..h............shplhhhsp......-th..t..pp.....hc.pslpplhcshpsc.p.P.............................................................................................................. 0 20 40 44 +12520 PF12685 SpoIIIAH SpoIIIAH-like protein Bateman A agb Jackhmmer:Q18B63 Family Stage III sporulation protein AH (SpoIIIAH) is a protein that is involved in forespore engulfment. It forms a channel with SpoIIIAH that is open on the forespore end and closed (or gated) on the mother cell end. This allows sigma-E-directed gene expression in the mother-cell compartment of the sporangium to trigger the activation of sigma-G forespore-specific gene expression by a pathway of intercellular signaling. This family of proteins is found in bacteria, archaea and eukaryotes and so must have a wider function that in sporulation. Proteins in this family are typically between 174 and 223 amino acids in length. 25.80 25.80 25.90 25.90 25.50 25.70 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -10.92 0.71 -4.81 84 427 2009-11-26 16:59:55 2009-11-26 16:59:55 2 2 414 3 98 318 4 186.60 23 93.45 CHANGED +pthllssLslhls...luuY...................hsYths...tsttpspsttpptp..t.t.............................................................................................................................................ssspsspsssssspsspph.....................Fsph+lpREppRucph-pLppIlsssssop-pKpcAhpphhplsphtcpEttlEsllcuK.GapDulVhlsss.p..lsVlVpspp..LocscsspIt-lVpcp.sslssppltlp ......................................................pp.hllshLslhlslu..sY.....................Yhhs..p..t...sssssts.sp....t...tttt..p......t......................................................................................s...................................spppspppssssspsssph..........................Fsph+hphEppRucphppLppllsss.ssotppKscAtcphttlsphpppEttl.EsllKup.GapDulVphss......s..p.lpVsVpusc...hoppcsspIhplVpppss..t..pl.l................................... 0 48 79 87 +12521 PF12686 DUF3800 Protein of unknown function (DUF3800) Bateman A agb Jackhmmer:Q183E5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 215 and 302 amino acids in length. There is a DE motif at the N-terminus and a QXXD motif at the C-terminus that may be functionally important. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.38 0.70 -4.72 127 700 2009-11-26 17:12:18 2009-11-26 17:12:18 2 2 610 0 183 611 68 214.40 13 82.60 CHANGED hla.....lDESGshsh...............spshasluulhhppppht..phtppht.phppchhhtt................................................................l+tscltpp...ptt.....................................pplhshltpht...............hhhhs..hlhppthhppt...........htttt...............aphhhphllcclt........................................ppsp.....phtlhhD.....pppppptppltphh..................................htthtppthhpph..lpthhhhcScp...t.....thlQlADhlss....sltpthph...............................................................pptppphhphl...cptht ......................................................................................hahDESGphs..................tp.has..luu.l.hh..t.pp..hh....ph.p.ht...phppphhh.t.................................................................................................l+hsph.pp.....tht......h.................................................................................................................pthhphhtp..........................hhhhhhhh.ppp.httt..................................tt..................aphhhp.hhlppl...................................................................tptp........phh.l.h..hD........pppptpt.p.p.l..hthh.......................................h.ttthhtth...h..p....hhhhpScp........p.......hlQlADhlss......sltphhp................................................................th................................................................................................................................................................. 0 71 127 162 +12522 PF12687 DUF3801 Protein of unknown function (DUF3801) Bateman A agb Jackhmmer:Q187F8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 158 and 187 amino acids in length. This family includes the PcfB protein. 22.80 22.80 23.00 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.32 0.71 -4.75 54 614 2009-11-27 16:03:44 2009-11-27 16:03:44 2 1 330 0 47 447 58 155.40 25 85.15 CHANGED u+hLttultthLpchcKpp..................................GKpol+pLhcp.stslpslclscp..slKpFc+hAKKYGlcaulhKDpsssss..pa.VaF+ucDs-slstAFccastcplppp..................................................................................................................................................................................................................................................c+PSl+ppLpph+.tptttp.....................php+....pps+pKc+ ..............................................p.l..hhtthht..ph....pcttt...............................tGc....plKcLh+.c..up..LpsIplscs...slKchc+phcKaGVsFulhKD+psp......pY.lFFpu+Dhcsh....ptAFcc.hhsc.phc+pcc..............tphpc.hp..................................t................................................................................................................................................................................................................................................................................................ppp............................................................................................................ 0 30 38 42 +12523 PF12688 TPR_5 Tetratrico peptide repeat Coggill P pcc pdb_3kni Family BH0479 of Bacillus halodurans is a hypothetical protein which contains a tetratrico peptide repeat (TPR) structural motif. The TPR motif is often involved in mediating protein-protein interactions. This protein is likely to function as a dimer. The first 48 amino acids are not present in the clone construct. This Pfam entry includes tetratricopeptide-like repeats not detected by the Pfam:PF00515, Pfam:PF07719, Pfam:PF07720 and Pfam:PF07221 models. 21.40 13.60 21.40 13.60 21.30 13.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.62 0.71 -4.07 8 427 2012-10-11 20:01:02 2009-11-30 13:29:35 2 50 408 0 96 763 182 114.00 31 46.94 CHANGED upAhFERAGAhDSsG+pscAlPLYRcALAsGLsGspRRRAsIQLASSLRNLGcs-EuLALLpsphsthPuDELssAlsuFhALsLsutGRscEAluhlLtAlAsHLP.....RYQRShpsYAptL .................................................h.a.hA.haDshGh.E.pcAlshY..cp.....A...l.......s.......t......s....L...s.....u...c.....p...h...p...t.....A.h..l.....s......L..u...SohR.sl.......Gp.h-pAlslL...p.s.....s...l...t....c....a.....P.....s.......................s.s....s......h..c..s.F..h...u...h..sLa..s..l.uc..pc..E...Alphh..Lthl.....s.....pps...................ap+ult.Ytt.L.......................................................... 2 34 69 88 +12524 PF12689 Acid_PPase Acid Phosphatase Mistry J, Wood V jm14 Manual Family This family contains phosphatase enzymes and other proteins of the HAD superfamily. It includes MDP-1 which is a eukaryotic magnesium-dependent acid phosphatase [1-2]. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.08 0.71 -4.62 43 326 2012-10-03 04:19:28 2009-11-30 15:33:38 2 10 244 6 217 336 18 152.80 32 75.86 CHANGED .lP+LlVFDLDYTLWPhasDs+..................lssPh+tpsss.........plh........DctGpplphYscspsILt.......tLcs.........+ulplAhASRTssP...clAcphLphLcls................th.hhchF....st.lEIas....G..oKspHFp+lpc......coGl..sapcMlFFDDEpRNhpshop.LGVshhhl..ss...GlohphhccGLppapcp ......................................hP+lhVFDLDaTLWshas-sc.........................................htsPh+..tss....................thh..............................Dp..tup..ph.phascl.tlLp..tLcp.........pul..luhASRT..sts...-hApphLch.hcl.......................................phF.....sh....hpI.as......u......sKhpH....Fpplpp........................poul...a..pp.M....lFFDDEtR....N.........hps...p..............lGVs.........h.........hhl..s...Ghshp.hppGl.tatt.t............................................................................. 0 68 123 176 +12525 PF12690 BsuPI Intracellular proteinase inhibitor Mistry J jm14 jcsg_3isy Domain This is a bacterial domain which has been named BsuPI in Bacillus subtilis.\ This domain is found in Swiss:P39804, where it has been suggested to regulate the major intracellular proteinase (ISP-1) activity in vivo [1]. The structure of proteins in this family adopt a beta barrel topology. 27.00 27.00 28.20 27.10 23.40 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.68 0.72 -4.27 39 149 2012-10-03 16:25:20 2009-12-03 15:47:41 2 11 122 1 58 124 3 82.90 27 39.88 CHANGED slphplslpNpsscslslpFsSGQcaDlhlh...........DppGcclacaScs+hFTQAlpphslpsG-shsap.tphs.ts........sGsY.plps ..................s.lphpholpNpsspslslpasSGQ+aDhhlh...........spcscclapaS..cs+hFsQshp...spslpsG-ohsap.tphstts.........stY.h................. 0 23 39 49 +12526 PF12691 Minor_capsid_3 Minor capsid protein from bacteriophage Coggill P pcc manual Family This family is from one of three adjacent genes, all of which are involved in formation of the minor phage capsid. 20.40 20.40 22.40 23.90 19.20 18.50 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.63 0.71 -4.47 14 101 2009-12-03 17:53:01 2009-12-03 17:53:01 2 1 99 0 8 73 1 131.10 31 98.94 CHANGED M.....DhhphLtshlps..hthPhKschshl.spp-........ululh.lPuup..sppYh-GscplshpaplshKocsp.cAppsl.tIsstLspl.thc......l.ShssSapacshslhspPthsctsspGhalYhhshssclpl...ttp ......................M....DFhssLhptIcs....thsh.....ch+hshl.spp-........slslh.hPuuc..ppcYhDGsp-hShsFplshK.oK.....sptpupsslatIsshLsph....s......L.StssSapapsh-l.spPhlssts-pGhalYslshps+l.l.c.t..p.............................. 0 3 6 8 +12527 PF12692 Methyltransf_17 S-adenosyl-L-methionine methyltransferase Mistry J jm14 jcsg_3iht Domain This domain is found in bacterial proteins. The structure of the proteins in this family suggest that they function as a methyltransferase. 18.10 18.10 18.10 18.20 18.00 17.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.94 0.71 -4.55 18 51 2012-10-10 17:06:42 2009-12-07 14:19:28 2 1 41 2 19 80 17 158.40 46 96.81 CHANGED MSRLDSFIRRhoAQRDsLNaAsspsusls.GsVLELGLGNGRTYcHLREhhPsRRIhVFDRslsuHPsSsPP--shllG-lccTLst.ltthGssAuLsHADlGsGsc-KDsspAshlSPlIAslLAsGGlhVSupPLh.a.uLpplslP-ulssGRYFhYRR .................MSRLDpFIcRhoAQRDhLsasts.......pl.ssh.s...Gs.V.hElGLGNGRTYcHLREhhPsRcIhVFDR.s.ltuHP........uS..sPsp-c..h.lhG-.lc-TLss.h.thGssAuLsHADlGsG.c-+DsspushLuPLlsslLAsGGlhVSupPL..assLptlPlP-ulstGRYFhYR+................................................................... 1 2 11 12 +12528 PF12693 GspL_C GspL periplasmic domain Bateman A agb Bateman A Domain This domain is the periplasmic domain of the GspL/EpsL family proteins. These proteins are involved in type II secretion systems. 24.10 24.10 24.10 24.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.83 0.71 -4.47 23 933 2012-10-02 17:03:51 2009-12-07 17:13:58 2 6 819 2 175 737 69 154.50 24 39.73 CHANGED pppht.W..RhlshhhulhLllhlsttshphaphtppupthptpuppla....pphFPsppplhs....ptQhpppLpph...tutssssshlshLutLtshls..sssslclpuLcacupctcl+l..plpusshsth-......phpppsupt.aplp......uphpppssplpuphslc..s..p ..................hthW....Rhshhshhlhlls....h....sstpslphap.lp...ppssthcsp...spcha....pphF..Pppp+..ls..s...h+pQhpppLpph.......pstsss......s.ph...lshL...stlsssLt......ph.s.s.lp..l...puhsacppctpLpL......phpu...tshssh-......phpptLspp..a...lp........sth.ppps.-.sVpGthsl+..p...................................................... 0 38 86 133 +12529 PF12694 MoCo_carrier Putative molybdenum carrier Mistry J jm14 jcsg_3imk Family The structure of proteins in this family contain central beta strands with flanking alpha helices. The structure is similar to that of a molybdenum cofactor carrier protein. 23.10 23.10 23.30 25.30 22.40 23.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.75 28 67 2012-10-01 21:16:48 2009-12-08 13:32:46 2 3 58 1 34 73 15 137.60 41 76.45 CHANGED IlSGGQTGVDRAALDsAlstGlspGGWCP+GRpAEDGslPs+Y..LpETssssYttRTchNVpDSDGTLIls.pGcLsGGTthThchAcctsKP.....hLll...phspsps..sptltpWltppslplLNVAGPRESpsPGIaptspshLppllp ............IlSGGQTGsDRAALDhAlt..hsl..shGGWCPpGRhAEDG.lss...cY.......LpEhss.ssYt........tRTchNVhDSDuTLIls......tu.t..LsGGo...thThphApchtKP.....hLhl..........phst...tps...sttltpWltppplplLNVAGPRpSpsPuIhphshphLpthh.t............ 0 17 28 32 +12530 PF12695 Abhydrolase_5 Alpha/beta hydrolase family Bateman A agb Jackhmmer:Q186B9_CLOD6 Domain This family contains a diverse range of alpha/beta hydrolase enzymes. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -11.17 0.71 -4.44 308 16512 2012-10-03 11:45:05 2009-12-10 17:24:36 2 168 4213 56 5692 81995 21732 192.90 14 59.02 CHANGED sllhh.....HG....s...........ss.....s...t..p.....s....h.......t....h.u...p.......tL.spp.G.hsllhh..................................ch..s...t.t...ss......s...........tsts......................phpphhpt....................................................................hh..........shp...pl...hlhGcSh.Gut.suh...hhAs...................................................................................p...p...t.....pl.p..ull....hh.u..s....................a...........................ss........hp........plt......p.hph..........................................................PlLllt...Gsp...Dphs.......s...p.ph..pp.........................................................hhpt..hs....sst.....p.h..........................................hhl.p.Gu.sHs .......................................................................................................................................................................................................llhh...H..G.......h...................ss...........s.......t....t...........t........h............................t.......h....u....p........................tl.....s....p........t.....G.......h.......s..........l...l.h.h.........................................................-h....t...s...h...ut.......s...................tst.........................................................sht..t.hlp..h.....................................................................................................................................tths.......s.p..p.l....s..l..h....G.......t..S.h..G.u.t....h..u.h.....t..h..us......................................................................................................................................................................................................................................................................p.p....t......pl...p.....u.l...l........h..h.u..s.......................a..................................................................sp.......t...........t.ht........t....th......................................................................................................................................................................................................Ph.L..l..l...p........G..p.p.......D..p.hs............sh.....p...t...s....t.p................................................................................................h.h.p..t....ht...........t.h.......p..h.....................................................................h..h..h......s.stH........................................................................................................................................................................................................................................................ 0 1991 3619 4800 +12531 PF12696 TraG-D_C TraM recognition site of TraD and TraG Coggill P pcc Pfam-B_1146 (release 5.4) Family This family includes both TraG and TraD as well as VirD4 proteins. TraG is essential for DNA transfer in bacterial conjugation. These proteins are thought to mediate interactions between the DNA-processing (Dtr) and the mating pair formation (Mpf) systems [1]. This domain interacts with the relaxosome component TraM via the latter's tetramerisation domain. TraD is a hexameric ring ATPase that forms the cytoplasmic face of the conjugative pore [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.59 0.71 -4.27 232 1419 2012-10-05 12:31:09 2009-12-11 11:56:10 2 13 918 0 310 4110 322 119.20 22 19.22 CHANGED tlhhlLDEhuslspl.sphpphluhhtutGlphhhlhQshuQlcp...hYG..c...ptt..cslh.uNsss....pl.hhs.....ss..-.....pTschlSc.hlG....ppol..tp.tpptpt............t.....p..........p+sLlsssEltplsp .........................h.hhlDEhssl....h..h.....h......p.....t.....h....tp.....hls.........put.......uttltls.lhhQsh.uQlct......paG....c.....tps....pplh.sNs.sshl..hht....sp...ss..pTAchl..oc..th.G...ch..phtp..pohststps......................................psphphttp.llsssclhsh....................................................................................................................................................................... 0 90 215 280 +12532 PF12697 Abhydrolase_6 Alpha/beta hydrolase family Bateman A agb Jackhmmer:Q186D8_CLOD6 Domain This family contains alpha/beta hydrolase enzymes of diverse specificity. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.69 0.70 -4.48 785 53202 2012-10-03 11:45:05 2009-12-14 16:32:03 2 430 5271 368 19769 83722 24117 238.80 14 74.29 CHANGED l.l.l.l........H..G....h...........s....s..ss................t.t....ap..t............l.h...p...tL..s............p.....s...........a............p.........l...............hs...h.....-...h.........G.....t...G.....t......o.......s.....................................................t.....t........................................h.s....h....................t...p.h............sp................................s.l........................................t.t........h....l.....p.....p......h.sh............................p....p.....s....................................................h...l..l..GaS.h.G.u.......h.h...s.ht....h.s.......................tp.....t...s..p.............tl..ps..........l..l....hhs....ss.s...........................................................................................................................t..........t.hhp..............thh...............tth.t.........h.....tth............t......t.t.......hht..h....h...................ttpth............p..phh....................................pt........................tht.th..hp.hhtt........................................................................tttth...t....pt.h...sp........h........s.........h.P....s......hh..l.....h.....upp.Dp..............................h.h.....s..............p.th........................pph...t....p..t...h.....s..s.........s.........p...h...h......h..l.s.....t..u.u.Hh....h...h.h...p...p.....s...p.t...l...sph ...........................................................................................................................................................................................................llhl.H...G.....h................s........s.st...............t......ap..t............................h.h.....t...tL..s.......................p.....s...................................a................................p...............l...........................l.s.....h...................D........h...................h....G.............h.....G...........t............S.......s................................................................................................................................................................................................t.........................................................h..s....h....................................................p...p...h...........................s.p...................................................................s...l................................................................................................................................................t..t.......l.....l.......c....p......hsh...........................................p..p...h........................................................................................................h...l....l...G.a...S..h.....G...G............h.l.....u...h.t.....h.u.........................................................................tp.......h..P.p..................................pl..p.s.......................l......l.......lhs......ss.s....................................................................................................................................................................................................................................................................................t.h.h.t.........thh.........................t..h......t..............h..............tth.......................t....t.......hh...t......h.h...........................t.t...t..h.............t.t.h.h..............................................................................pt..........................................t.t.h.h.t.hh................................................................................................................................ht.tsh....h.........t..t..h.....tp............l............p...............h.P......s...............ll...l.......h..................G..p..p...D...........................................................................................................h.hs..................p.hh...............................................pt.h.......t........p...h......h......s.....p...........................s.........................p....h........h.........h...h............................t...s..u.Hh..........h...p.......t......t...h...h........................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 5860 12066 16584 +12533 PF12698 ABC2_membrane_3 ABC-2 family transporter protein Bateman A agb Jackhmmer:Q17ZU3_CLOD6 Family This family is related to the ABC-2 membrane transporter family Pfam:PF01061 [1]. 31.60 31.60 31.60 31.60 31.50 31.50 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.58 0.70 -5.41 220 16168 2012-10-03 10:13:34 2009-12-15 10:08:38 2 60 3863 3 4148 18062 2565 336.20 12 63.90 CHANGED +t.h....l..h.tll..h..P...ll.h.h..h...l.hs.......hh..h....h....htht....p..............pshpl.................sl.lsps......................hspp....hh......................................................ptlp........s...p..s..p......hp....................thss....hp.psp..pt.l....ps.t.......p.hp..u.hh.h.......l.spshttth................................................................................................t...phphhh....ssps.h.....pts............................................ttltpt..lp..............................................................................p...l........hpt...................................h..pthshsh..p..h.hsh.................t.ptt.....p..sh....tt.......h.............hhs......hl...h..hhh....hhhs...sh....h..h.sh.......t....l....s....pE..+p.s.ph.h.c...hhh...so..l....shhp.......hhhu+.......h....lshhlh...th...h....h......h..l.h.h......h.......l.......h...hu..................................................lshs.....sh...........h.h...h........l.....lhh.llh.hh...shss..........luhhl....utl....h.p...st.......tp..s.......hh...hshhhh...........h.h.hsh....hh.sh...h..h.......h.s..t.........s...........sh......h..t.h....l.....hp.hl..P.hhs.hhhh..h..hp.l.h................hss.....hp............................................lh.....hslhhl.h...lh...sll....hhh...lshh ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................h.........h..hs..hh.h......h...h...................h.h.............................................................tth..h............................sl..hs.p........................................stt...hh......................................................................................................................................................................pt.ht.............t.................ht.........................h.s.....t.pht.......tt..h......................pp.t............p..h......h...hl...............................h..s.sht.tth.............................................................................................................................................................................................................................................................................h.t.hhh.....stt.........h.....................................................h.th......lp................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.....h........ht................................................................t.........hph...p....h.h..............................................hs.t.......th.....t.............h............................hhs........hh.....h..hhh....hh.hs.....sh......h....h.sh..............l..............s.....pE....+....p..p....t...h...h..c...............hh.h................s.s..s...l.....s......hp............h.h...hu+..........h.......ls....hh..h..l.......sh.......lt.......h...........h...l...h.h.....h.......l..........h..hs.......................................................................lsht.......s.................h...h......h...........h.......hhh....hlh..hh....s.hhs...................huh...h...l.......us.h........h.p...s.................tt..s.............h..hh.......hhhhhh.................................h.hh.h........hu..sh.......h.s..............h.p..t...............h.s...................................th..........h...p..h................l............................t.........hh....P..hth..h.h.ph.....h...tph..h............................htt........p.........................................................................................................................hh.......thh.hl..h...hh..sh.h.hhh.h...h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1433 2455 3318 +12534 PF12699 phiKZ_IP phiKZ-like phage internal head proteins Hardies SC, Coggill P pcc Hardies SC Family Phage internal head proteins (IP) are proteins that are encoded by a bacteriophage and assembled into the mature virion inside the capsid head. The most analogous characterised IP proteins are those of bacteriophage T4, which are known to be proteolytically processed during phage maturation, and then subsequently injected into the host cell during infection. The phiKZ_IP family consists of internal head proteins encoded by phiKZ-like phages. Each phage encodes three to six members of this family [1]. Members of the family reside in the head [2] and are cleaved during phage maturation to separate an N-terminal propeptide from a C-terminal domain. The C-terminal domain remains in the mature capsid. The N-terminal propeptide domain is either mostly or completely removed from the mature capsid. In one case, an unrelated polypeptide is embedded in the propeptide and also remains in the mature capsid. The phiKZ-like IP proteins are not discernibly homologous to the T4 IP proteins, and it is not known if the phiKZ-like IP proteins are injected into the host cell, or have some other function within the head. The alignment and HMM model exclude most of the propeptide region, but include the cleavage sites. The first 100 residues, including the cleavage sites, constitute the most conservative part of the seed alignment. 22.30 22.30 22.30 22.60 22.20 22.20 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.87 0.70 -5.52 14 27 2010-01-13 14:45:13 2009-12-15 10:40:50 2 2 9 0 0 28 0 299.70 13 68.77 CHANGED tus.tho........ppstpshhssLEslsppl.thcthshu..hEsasss.pst.tthh.....slE..............slpctlppsscphhchlp+lhphlpphhsphpsulpplpc+hspLhc+hpsh..........tp.s.tpplslp.s..pclhhsGpFl..............shhhhsh.phsshhsptasp.hhshlpphschlsshphscp.hsphppthphhsc....................................htstthhhpustlsGNptlhhssspht.s..................hpa..lpsputsss-........hsl-s.sssp.lppplctlpphlptlpchppttpchp........-plcphh-shpp.sth..scps......hcshp-sstclhcsstshlpshssplsshl ..........................................t....ths.pththh.sshcphptph...pthshu.......hEshs.ss.p....th......sh-..............slppthpphhcchhchlpplhshlpphhpphtsuhpplpc+hspLhpchpph..............................tp.s.tpphplt.s..thlhhsupah.................hhs..phsp.htpshsp.hhphhpphtthhpsh.hppp.h.sthtphhphhtt.........................................hh.ts..lsGNthlhhs..tsthh..........................ttpt..st.p........hplch.sstp.ltphlptlpt......hh....phhtthpthpppht........ctlpp.hhpthtt.ttt..spt.......hphhtps.tchhps..thht.ht..ht............................................................................................................................ 0 0 0 0 +12535 PF12700 HlyD_2 HlyD family secretion protein Bateman A agb Jackhmmer:Q182V7_CLOD6 Family This family is related to Pfam:PF00529. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.97 0.70 -5.28 70 15970 2012-10-02 20:27:15 2009-12-15 12:46:01 2 62 3553 31 4798 25329 5154 298.90 18 77.75 CHANGED plppsslt..pshshsGhl.......l..+.pEphlpuptsGhl...phhlp-Gp+VppGshlhslsssphtpphhpt.tt.t.....hpphp....................................t.h..................h..thppp.apthht.ppphpthh.phpp.ptt...........................tthptphp...................................................lpushsGlls...hDuhEth......................t.hp.....................ppp..hpss....stlpsusslh+llss.pp........hplsh.l..s.....ccphpplp......p....sps..l...................plphts.spshpup..lp.ltpt......sp....t.hshlphsst.htchhspRhlclplhhpp.psGLpIPpoAlspcs..........................................................................................................stpGVYsl.ppu..hspF+plc.....lltp..sccahlV.................tpG.Ls.hDplll ....................................................................................................................................................................h............h.h.G..p.l...................t.....s...p.p...s...t...l...s.....s...p.....s....s........G........p....l.......tpl.........h........l.......p..............G..pp.......V.......c.p.G.p.h.L.s.....p...l...c...s.......s...p.......h....p....t....t.....h....p......p......s...p...u...t....h...t...t.......p.t...t...h..tp.h.p........................................................................................................................................................................................................................................................................................................................................hp..h.....t.....c.........p.....p...l.....h.......p.......p........t..........h.......h.......s.......p.......p.....p.......h.....c.......p.......s..p...s..p..h..p.t.spsph.........................................................................t..p...th.tp..............................................................................................................................................................................................................................................................................................lp.A.P.h.s...G.....h...ls....................................................................................................................................tht.....s.p....G............ph....l.......s......s........u.......p..........s.........l......h....p...l.....s...s...h....ss..................................h.hl.p...h...p...l......s...................Ep..p....l....s..p....l..p...........................h..................Gp.p...h........l.....................................................................p..h....t...s......h......s..........s.....p......p....h......p...up........lp.t..lsstss................................tst..........s.h..t..s...p...h.....p.........l............s.........s............s.......s........t..........t.............l.........h.......s..........G.....h.......h......s.......p.....s.........p.......l..................h.............t...........p......p.......s............s..........l.....h............l....P.t...p......A..l.....h.....t....p..s...................................................................................................................................................................s.t...t...h.....V...h...l.......h....p....ps..................ph....p.h......p.............V.p...................l..u..t............ss.t.....s...l........................hpG..Lp.G-pll.................................................................................................................................................................................. 0 1643 3132 4033 +12536 PF12701 LSM14 Scd6-like Sm domain Anantharman V pcc Anantharaman V Domain The Scd6-like Sm domain is found in Scd6p from S. cerevisiae, Rap55 from the newt Pleurodeles walt, and its orthologs from fungi, animals, plants and apicomplexans [1]. The domain is also found in Dcp3p and the human EDC3/FLJ21128 protein where it is fused to the the Rossmanoid YjeF-N domain [1,2]. In addition both EDC3 and Scd6p are found fused to the FDF domain [1,2]. 25.80 25.80 25.80 26.80 25.00 25.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.07 0.72 -4.18 156 564 2012-10-01 22:42:21 2009-12-15 16:50:39 2 12 284 5 358 507 3 94.00 40 21.39 CHANGED ..ssssaIGSpISLISKu-.IRYcGh.LasIss...ppST.......luLps.....................V+SaGTEsRtps.......lsspsplY-aIlFRGSDIKDLpVt-ssp........shssDPAIlpsph ..................hssalGSpISLIScsp.IR...........Yp.GhLhsIss...cpuT.......luLpp.............................................................VRSFG.TEsR.sst..........lsPpcpla-.YIlFR..GSDIKDLpVt-sst...........shspDPAIlt...h............................................ 0 104 170 266 +12537 PF12702 Lipocalin_3 DUF3803; Lipocalin-like Coggill P pcc JCSG_target_392987_3hty Pfam-B_17140 (release 24.0) Domain This is a family of proteins of 115 residues on average. The family has two highly conserved tryptophan residues. The fold is very similar to the lipocalin-like fold from several comparable structures. 20.20 19.40 20.20 19.40 20.10 19.10 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.13 0.72 -3.96 17 79 2012-10-03 08:47:39 2009-12-15 17:12:47 2 3 61 16 8 74 2 90.60 30 55.05 CHANGED sptsllGsWlpPlss.s.sthQGhpLctsGpAoSINMsTLhYcp....................Wc..ppGspLlLoGpShGNst..s..hp-ThpIcpLTscoLlL......cptshplpYs .....................pllGsWspPhshps.sthpGhplcpsGsAS.SI.NMsoLpYcp.........................Wc..hpGs..pL.lLpGpShGstt..s..hssohpIcplopcoLhL......pstt........................ 1 6 7 8 +12538 PF12703 plasmid_Toxin Toxin of toxin-antitoxin type 1 system Gardner P, Coggill P pcc Gardner P [1]) Family This family is the toxin of a type 1 toxin-antitoxin system which is found in a relatively widespread range of bacterial species. The species distribution suggests frequent horizontal gene transfer. In a type 1 system, as characterised for the plasmid-encoded E coli hok/sok system, the toxin-encoding stable mRNA encodes a protein which rapidly leads to cell death unless the translation is suppressed by a short-lived small RNA. The plasmid-encoded module prevents the growth of plasmid-free offspring, thus ensuring the persistence of the plasmid in the population. Plasmid-free cells arising after cell-division will be killed because the stable mRNA toxin is present while the comparably unstable anti-toxin is rapidly degraded. Where the system is transcribed chromosomally, the mechanism is poorly understood [1]. 19.40 19.00 19.40 19.00 18.80 17.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.39 0.72 -4.16 6 28 2009-12-16 11:43:50 2009-12-16 11:43:50 2 1 28 0 8 18 0 70.20 59 92.47 CHANGED Mss.p.hpstpslHpsAh.LuuLcalDQcsARpLu.hAEAVANhFMVVFYQAETGRATptDFpEAMsAlRQuhp .......................MsTtHsIE..spQAlHQAAhpLAALEaIDQcsARplhPlAEAVANhFhllYYQAETGRAT.pDFpEAhsslRQsh.p.. 1 3 4 7 +12539 PF12704 MacB_PCD MacB-like periplasmic core domain Bateman A agb Jackhmmer:3ftj Family This family represents the periplasmic core domain found in a variety of ABC transporters. The structure of this family has been solved for the MacB protein [1]. Some structural similarity was found to the periplasmic domain of the AcrB multidrug efflux transporter. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.22 0.70 -11.73 0.70 -4.77 1392 21380 2009-12-16 11:56:13 2009-12-16 11:56:13 2 26 4469 3 5700 17502 5573 229.30 13 48.61 CHANGED Ah+slhpp......+hRoh...Lohl......G..lsl..G..luu..llsh.hu.ls.pu...hppp.htpp..h.....tp....hus..sh.......lh...lpssts..................................................t.tt.t..sl.s..h.ps.hpt....lp............ph.s.t.lpsl..ss....th..s....................hplph..............sspstss...............ph.G..................ss.s.s.hhph............h......s..h..pl.hpG...chhstt.-...............tpss...llu.p.sl.Acp.las.pt.t..........s.....lGcp..lpl...............................s.....s......s........................hpVl...Glh.....p.............sp....st....................t..s................ttlhlPhsshp..........................phhtt...................................tthsplhlp.........hpsss..s.hsp..................s.ppplpp....h .....................................................................................................................................ttlh..t.p.......ph+.....s.h......l...s...h...l..........u.l..sl...u......l.sh...l.l..s..h...h.u..l.h..pu...h..p..p..t...hppp....h..........tt....hss....sh.........lh......lp.ttt.................................................................................................t.th..s.....p..p..hpp..lp..................ph.s..s..l...p......s.......s...ss....hh...s.................................s.lph..........................sspp.tsh............................................................p.hhG..........................ls..s..s..h.hph....................hs.h....pl.....hp..G.....c..h......h.s...t.......t...........................................pptl.................l...l..s..p......p.......l....A.............cp...hth..p.................................................lGc.p...l.pl...............................................................................s..t.....s.......................................hpl.....s......Glh....p......................st......t........................t.s......................................thlhh.s..h.sshp.........................................................phht..............................................tthsth.hlt........hp.ss...p...tt.h.......h.................................................................................................................................................... 0 2422 4192 5077 +12540 PF12705 PDDEXK_1 PD-(D/E)XK nuclease superfamily Bateman A agb Jackhmmer:Q18AP1_CLOD6 Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.99 0.70 -5.01 258 8235 2012-10-11 20:44:46 2009-12-16 13:18:01 2 43 4486 8 1945 8551 4890 244.70 14 25.38 CHANGED phSsSp.lcpatp.CPhpaah..ph.hh............plpttp...........th.pshs...............................hGslhHp......slcp..............hhp......................................................p............hptp.................................th...t.............................................ptt..thtpt.h..pt...................h.h..pp...htp..hht.........................................................................................tttt.......hp...hth.....Ehphp........hph.s..........................................sl...pl...pGplDRl........-tt....ssp.......................lpllDYKTupp..................sh....shp..........................................ch.....h.....h...ph..QLhhY.hhshtptt..........................s.pstsss....hhah.p.hppstht.................................................................................................................hp.ths....ppp..hpt..hp...p...pl....pphh.p.pl.t....p......httt...............................................h..t...psppsC.....p....a....CsapslCt ................................................................................................................................................................................................................................................t...............................................................................tshp....................................hGs.hhH.p..........hh...c.p..............h..t.....................................................p.......h..t..................t.h..............................................................................t.t..h..t..t.t..h..pt.......................h.....h....pp..........ht.p..hh..t....................................................................................................................................................................t.t...........hp......h..th........E.ht.ht.......htht.......................................................sh..hl.....pG...h..l...Dtl...............................pt.....ssp.............................................hhllD..YK..osph...................st...s.t...................................................ph.....t.......h.....ph.Qhth.Y..t....hslpphh.................................shp.h.t....s..t.....hh.a.h.h...h.t..t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 700 1353 1685 +12541 PF12706 Lactamase_B_2 Beta-lactamase superfamily domain Bateman A agb Jackhmmer:Q189N7_CLOD6 Domain This family is part of the beta-lactamase superfamily and is related to Pfam:PF00753. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.84 0.71 -4.76 268 12584 2012-10-02 15:46:01 2009-12-16 14:03:32 2 97 4382 48 3955 21211 7413 209.10 16 60.92 CHANGED pt..lLhDss.s........................................tht.thhtt.t....h........tlctl.......lloH.........sHhDHh....hsh..hhpshhth.t..........................hlhss..................sstptlp.................................................................thsh..................p..........ht.hsht.........................................................................lps....h.sspHtsst.h...............................................ahlp........................................stplhau.uDssh........................................h........tth....................................pp...............hDlhlhssshh......................t....hHhsh........ppulphhp.phssc...p....hlhhHh ..............................................................................................................................................................................................................................................................................h..hLhDss..s..........................................................................h......................plc.t.l..........hl.oH.........hHh..DHh..........hs.h........h..h..t..hh..thhsp.................................h.laus........................................ts.s..t...p.h..lp.............................................................................................................................................t.th....................................................h....h...h..p......t.p.........................hh.p.h...t..s.hp................................................................................................lps..........h....ss..p....H.s.s...s.s..hs.....................................................................................ahlc....................................................................s.sstp..l..h.as..GDoth.........tp............................................................h....pth..............ps..................sDl.hl...h....-s.s..hh.....t......................................htH.hss.........................pp.s...h..p.h...t...p...p....h...t.sp.......p.......llhhHh....................................................................................................................................................................................................................................................................................................................... 0 1384 2524 3317 +12542 PF12707 DUF3804 Protein of unknown function (DUF3804) Coggill P pcc jcsg_3hzp Family This family is approximately 130 residues. Dali search indicates this protein carries a NTF2-fold with a hydrophobic cavity as a structural homologue to 1JB2, 2R4I, 3FSD and 2UX0. In this hydrophobic cavity, Arg 118 provides the H-bonding force to hold a PEG molecule from crystallisation. The interface interaction suggests that the biomolecule of Swiss:Q46KI2 is a dimer. Two members of the family are annotated as putative EF-Tu domain 2 but there is no match to this family so this is likely to be a false assignment. There are two highly conserved tryptophan residues towards the C-terminal end of the family. 22.50 22.50 23.00 44.60 22.40 22.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.65 0.71 -4.56 5 18 2012-10-03 02:27:24 2009-12-16 16:10:09 2 1 11 1 8 19 129 107.30 53 96.55 CHANGED SDocpIEuLIpGFAsscc.uSFLlsNsTsDFLAIRPSGNPIoAKGLsGMasSuDLVlEsSELlKIHRLEhhuushuaAlFTLsEpFSYKGspNcDLSTYTsIFKKlDGsWKISWMQRSoGTTDLSTWN ........tl.shlpthss.p..toFhhsNsTsDFLhIRPSGNPIsAcGhttMh.osDlV.EtuElsKIH+hEh.ss.shuhshFTLtppFoYKGs.NcDLsohT.IFKKlcssWKlsWMQRSoGsoDhShWs... 0 1 7 8 +12543 PF12708 Pectate_lyase_3 Pectate lyase superfamily protein Bateman A agb Jackhmmer:Q184L0_CLOD6 Family This family of proteins possesses a beta helical structure like Pectate lyase. This family is most closely related to glycosyl hydrolase family 28. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.85 0.70 -4.23 90 2481 2012-10-02 14:50:22 2009-12-16 16:15:15 2 119 1227 26 988 3936 234 234.90 16 41.55 CHANGED aRN.V+D........aGAcGD..Gso....DDTsAIppAIt............ts..ssusosssAlVYFPsGTYlloss.....................Ih.hhhT.........pllG..sstshssl+...usssas...Ghsllsu.s.hht.........................pssFh.ht.....l+N..hhlDhsshsh.st..............sulcWts.........uQuoolpNl..........hhp........hsss....ssspp.Glah-s.....................uu........sshh.sclhhsGGshG...........h..hhus...........Q................QaohcNhp..........hss.spsuIthhhshs..hshps...hslsssts.....................G..ls ..............................................hs.l.p.-.........a.GAt.GD..........Ghs..........DD.T......t...A.lppAl...............................t.s.ss..ss.ss.h...V..a.h...P...s......G..s....Y.....h...lsps......................................................................................lh.l....h..s....s.s.....................................................p.l.....h.G.......s....s...h..........s..................s...h...l...t.......................s..s...s..t...h.t........................t....h..t....h.h.t.s...p....t...............................................................................................................................................................................................................lps.........hhh....s..h.s..sh...............................................................s.hpht.......................................................................sps....s.t..l.ts...l..........h....................................h.......................................s..............t...........................s...h.hpt.........................................................................................h........s....................s.....................................................................p...........................................................h..htp.........................h...................s..........................................................thh................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 293 584 826 +12544 PF12709 Kinetocho_Slk19 Central kinetochore-associated Coggill P pcc manual Family This is a family of proteins integrally involved in the central kinetochore. Slk19 is a yeast member and it may play an important role in the timing of nuclear migration. It may also participate, directly or indirectly, in the maintenance of centromeric tensile strength during mitotic stagnation, for instance during activation of checkpoint controls, when cells need to preserve nuclear integrity until cell cycle progression can be resumed [1]. 23.00 23.00 23.80 23.80 22.90 22.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.16 0.72 -3.91 14 100 2009-12-17 14:51:34 2009-12-17 14:51:34 2 2 99 0 76 98 0 82.20 38 10.97 CHANGED shpcphpppcp-sscplphlA-DLYsQYSSKHEpKVphLKKuYEs+apc+lcclphcspslpcEl-pLcspLp.ERcEKppLlplL- ............tt....phppss+EVpcAVE+VARELHsLYKuKHEoK..VsAL...KKSYEsR...WEK+l+-lcpclcshpcEsEcL+pph...............t................ 0 11 35 63 +12545 PF12710 HAD haloacid dehalogenase-like hydrolase Bateman A agb Jackhmmer:Q18AI1_CLOD6 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.83 0.71 -4.14 272 9618 2012-10-03 04:19:28 2009-12-17 17:10:43 2 102 3541 13 3687 38047 3600 244.60 16 47.47 CHANGED shFDhDuTL.........hsss..h.............................hhhhhtth..........................hhh............................................................................................................................................hsth...........thhch..hh..............................tthtshttp...hhp..tht........................................................................pshh.................phl...t...............ppt....Gtp..lhllous.........p.....hhcsh............ht....thshst..llusph.....................................................................................h.....ttth...ssp.htt..........tKhp.....tlpph..................................................hhhGD..uh.s..Dlshlp ..................................................................................................................................................................................................................................................................................................................................................................................................................................................hhhDhDuTL..........................hppc.......h..................................h...hsth.h..h..................................h.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.h..pt...............sht.p.....ht....................................................tt.h..t...s.h....t..t.t.......hhp......tht................................................................................................................................................................hhtshh...............................chl...........pp..................lp..pt........Ghc.........lhll....oGu.................hp........hspsl.......................................sp..............phs.l..s...t...............h.h..us.ph.......t..........................................................................................................................................................................................................................................................................................................................................h.h.t.sch......ssp.....................t.Ksp...........hlpch............................................................th.thttshuhG.D..us.N..Dlshl...................................................................................................................................................................................................... 0 1113 2065 2970 +12546 PF12711 Kinesin-relat_1 Kinesin motor Coggill P pcc manual Domain This family is closely related to Kinesin-related, Pfam:PF06548. 26.90 26.90 30.40 30.40 26.80 25.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.79 0.72 -3.58 20 117 2012-10-05 12:31:09 2009-12-18 13:10:42 2 3 54 0 61 122 0 86.50 42 5.74 CHANGED RE-KIpRLEsLhsGsLss-salh-Es..psLpcEIclL+ppl-+sPEloRaAlENhRLpEpl+phppFh.-pGEREhllpElspL+spL .......REDcItRLE+Ltcpuhsu.hsppp-p...ppL+-EIphLR-QlE+pP+ls+YAhENppLREEs+RL+thpsVcpspEh.spplupLEctF..................... 0 11 24 38 +12547 PF12712 DUF3805 Domain of unknown function (DUF3805) Coggill P pcc JCSG_target_3hlz Domain This family represent the N-terminal domain of the structure. In two related Bacteroides species the gene lies immediately upstream from a putative ATP binding component of an ATP transporter and a putative histidinol phosphatase. The structure of this domain is strikingly similar to the N-terminal structure of 1tui, also of unknown function. The domain carries four conserved tryptophan residues. 21.80 21.80 21.80 21.90 21.30 16.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.89 0.71 -4.43 6 50 2009-12-18 14:33:49 2009-12-18 13:56:41 2 2 50 2 7 40 1 153.60 68 57.55 CHANGED M...KKaISPGuWFShpYPuDWsEFEDuEsSFLFYNP-+WTGNFRISAYK.....ssussYGp-slcpEL+ENsSApLVKVGcW-CAYSpEhFQEEGsaYTSHlWVTGt-sluhECSFTVPKGEsl+tAEcIIASLclRK-GsKYPtEIIPlRl.EIhpIN ...hQG.KKFISPGuWFSMpYPuDWsEFE.DGEGSFLFYNP-p.WTGNFRISAaK......GsAsYGK-sl+QELKENsSAoLVKVGph-CAYSKEMFpEEGsYYTSHLWlTGh--lAFECSFTVsKGssVcEAE-lIAoLElRKEGpKYPAElIPlRLSEIYpIN.................................................................... 0 1 5 7 +12548 PF12713 DUF3806 Domain of unknown function (DUF3806) Coggill P pcc JCSG_target_3hlz Domain This family represent the C-terminal domain of the structure. In two related Bacteroides species the gene lies immediately upstream from a putative ATP binding component of an ATP transporter and a putative histidinol phosphatase. The structure of this domain is strikingly similar to the N-terminal structure of 1ma7 whose C-terminal domain is a phage integrase, Pfam:PF00589. 20.70 20.70 21.40 20.80 20.50 20.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.62 0.72 -4.52 25 109 2009-12-18 14:36:53 2009-12-18 14:36:53 2 2 100 2 30 98 23 85.00 28 39.15 CHANGED pDlsslQpllDpuhlssccpp......s.hpuhGlslGslLssEh.tGh-WhsltDstscs.uLphtsoc.thlh..Phshlhc+hcsGptsslschY .........................DlpplQpll-puthssccpc......t.htuhGlslGslLssEh..Gh-WhslhDsttcs.sLpht..ssp.hhl...Phpllhp+lcsGp.sslhp.a............. 0 10 26 30 +12549 PF12714 TILa TILa domain Bashton M, Bateman A pcc Pfam-B_897 (release 5.2) Domain This cysteine rich domain occurs along side the TIL Pfam:PF01826 domain and is likely to be a distantly related relative. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.61 0.72 -4.21 42 957 2012-10-01 23:42:56 2009-12-18 15:44:45 2 141 60 0 540 797 0 55.90 33 10.67 CHANGED GCpDspGshhPsGcoWhoss.CoppCsC.ssGtIpCpsapCsssohCp.p..sGspsCt .............GCh.hpG.p.Y.a.s..G.c.p.ah...s.ss..CsppCpC.t.s.s.u.tl....pCps..t.p.Csssp.h.Cplp..sGhhsCh.............. 0 58 113 320 +12550 PF12715 Abhydrolase_7 Abhydrolase family Coggill P pcc jcsg_3g8y Family This is a family of probable bacterial abhydrolases. 20.70 20.70 20.70 20.70 20.60 20.20 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.29 0.70 -5.54 4 102 2012-10-03 11:45:05 2009-12-22 11:09:37 2 2 85 7 21 133 18 320.60 36 84.06 CHANGED ppapPp-Hsll+SsRsDGRaLSSYGlVHsMLcchcPphAFpsDMSs+EFspWQctVR+AMpEIMKFPp.pcp.PuPVClKT.pREGYRLEKWEaYPhPcsVSTFLVLlPDsl..ppPVPulLCIPGSGtoKEGLAGEPGlssKLs-.cYpsP.KloMAhNhVKpGYlAVAVDNsAAGEAuDLE+YstGpNYDYDllSRFLLEhGWSaLGYsSYLDMQVLsWMKopsaIRKDRIVlSGFSLGTEPMMVLGsLDssIYAFVYNDFLCQTQERAhVMThPDKpGpRsFPNSIRHLIPsFW+pFNFPDIVAuLAPRPlIhTEGGLDRDFpLlppAYthuGtP-NschaHYPKFADPspRKcl-pLPEGLDRcpYFchVNVDsPsHYFKsELVIPWL+Kl ..................................................................................................................hhtsh...........................................................ppatpWRpp...s.Rchlp.p.hh........h.hP..s.ssh....s.....a...ssch........l..c.pp...c.Rs....u.Y.hsE.Klth.slo..sc..SR..VsuhLLsPcth..........cGPaP..A..llhL..H..G.p....s..h....s....K....EK....hl............t.....h.G.s....s..sc...lss.......sh.sct.........tph..........hG..........c.p.........LA.....K+G....YVVlulD....u.luWG-.....R.....G....s...h.....p.....h.....pp..t................p..sLAsshh.....p.....LG.pShuGhh....uY-....D.M.R.slDFhsoLPpVD+cRIGlhGFSMG.uaR.uW.LAALsDpl.tAss....hhuahsph.....thhhhsssps....htu.tsuhhhLhPGlhp.phDaPDlASlAAPRPhLhh....N......G.........u.....cD.........+.........L.....F..........h..V....c..pAYt.......................................................................shthhhttt............................................................................... 0 4 14 17 +12551 PF12716 Apq12 Nuclear pore assembly and biogenesis Wood V, Coggill P pcc Wood V Family This is a family of conserved fungal proteins involved in nuclear pore assembly [1]. Apq12 is an integral membrane protein of the nuclear envelope (NE) and endoplasmic reticulum. Its absence leads to a partial block in mRNA export and cold-sensitive defects in the growth and localisation of a subset of nucleoporins, particularly those asymmetrically localised to the cytoplasmic fibrils [2]. The defects in nuclear pore assembly appear to be due to defects in regulating membrane fluidity [3]. 25.00 25.00 26.60 25.90 22.80 21.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.53 0.72 -4.27 34 86 2009-12-22 13:18:28 2009-12-22 11:51:49 2 1 85 0 63 74 0 52.00 27 30.96 CHANGED hPslssllhlllllalsh+llchhhRhhhhhlhhll+lsaassllshuhhlYhp ....Pclssllhlllhlalsh+lLshhhRhhhhalhhll+lhaassllssshhlYhR... 0 11 31 52 +12552 PF12717 Cnd1 non-SMC mitotic condensation complex subunit 1 Coggill P pcc Pfam-B_410 (release 24.0) Family The three non-SMC (structural maintenance of chromosomes) subunits of the mitotic condensation complex are Cnd1-3. The whole complex is essential for viability and the condensing of chromosomes in mitosis. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.06 0.71 -4.43 77 571 2012-10-11 20:01:02 2009-12-23 16:26:08 2 33 298 0 412 1562 22 172.30 27 14.18 CHANGED sshlRsNhllsluDLshRassll-.a....sstlh......ppLp.....Dp..s.......................shVR+sslhsLo+Llhp-hlKh+Gpl.hchhhsL...tDssppIpshAchhhp-ltp+....s.shl....hNhhs-hls....tLsp..ttt............................hsppphpplhpallshlst..........-+ppcsls-Klst+httsp...............................................stp.hpclhhhLshhp .................................................................................hlRsNhlluluDlslpa..s.sll....-.h.....sstlh......tpLp...........Dp..s..................................................................V..R+...s...slhsl.......op....L.....I...h....p....s..h.....l..K....l.K....G...p.l........s.c.hs.t.s..............L......Ds..spc...I.sshAc..hhFscLs..pK........sssl...........................aNt...h.s-.hls....pLss..................................................................................................................................thpccphppIhcaL..l..p.h..lp..............................-.c.pt.pl.s..pKlstch.ts..........................................................stp.hpDhhhhLs.h..................................................................................................................................................................... 0 152 231 339 +12553 PF12718 Tropomyosin_1 Tropomyosin like Coggill P pcc manual Family This family is a set of eukaryotic tropomyosins. Within the yeast Tmp1 and Tmp2, biochemical and sequence analyses indicate that Tpm2p spans four actin monomers along a filament, whereas Tpmlp spans five. Despite its shorter length, Tpm2p can compete with Tpm1p for binding to F-actin. Over-expression of Tpm2p in vivo alters the axial budding of haploids to a bipolar pattern, and this can be partially suppressed by co-over-expression of Tpm1p. This suggests distinct functions for the two tropomyosins, and indicates that the ratio between them is important for correct morphogenesis [1]. The family also contains higher eukaryote Tmp3 members. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.97 0.71 -4.34 41 292 2012-10-03 05:16:33 2009-12-23 16:36:48 2 14 225 0 176 1155 1 117.90 33 58.23 CHANGED KhpsLKlEs-sAt-+sEphcpchKphEpcshcpEpElpSLp+KsptLEs-l-clEppLp-s+sth--uppttopsE....uLsR+lQLLEEELEcu-cpL...+ETsEKLccs-hpA-chERphpsLEpcppp.EcKhE-hpcphccu+t ...................................KhptL+l-t-su.p+.s-phctchKphEpc.sh.........p....p...Ep..ElpuLp+K...phlEsEl-...Kl.c.p....pLp-scpthc.p....s...pp..t..t.sps-...........shp.++.p.lE....pph-psc....tpl.......pps.p.clppsphtutp.t..Rt..h..hp...p.....t...-t+..t.......t........................................................................ 0 56 93 142 +12554 PF12719 Cnd3 Nuclear condensing complex subunits, C-term domain Wood V, Coggill P pcc Pfam-B_484 (release 24.0) Family The Cnd1-3 proteins are the three non-SMC (structural maintenance of chromosomes) proteins that go to make up the mitotic condensation complex along with the two SMC protein families, XCAP-C and XCAP-E, (or in the case of fission yeast, Cut3 and Cut14). The five-member complex seems to be conserved from yeasts to vertebrates. This domain is the C-terminal, cysteine-rich domain of Cnd3. The complex shuttles between the nucleus, during mitosis, and the cytoplasm during the rest of the cycle. Thus this family is made up of the C-termini of XCAP-Gs, Ycg1 and Ycs5 members. 25.80 25.80 25.80 26.60 25.70 25.70 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.86 0.70 -5.43 57 330 2012-10-11 20:01:02 2009-12-23 16:51:05 2 9 263 0 236 335 0 307.10 23 30.42 CHANGED h+CLtlspthLpplp.tslppshpl...sllssLlh...Pulpsp-s.slRchulcCLGLhsLLsc..........plAp-sh.lhhpth.pp.........s.ssplphhAlpslhDllhhaGhphhsspsp........................................stshslhclhh+hLcss......psc......lpshusEGLsKLhLsshlss........................................................spllpsLlltYFsPpopssp.................tLpQsLuhFhPlYsaSphpp..Q..pphtpshhsslhplhph.tch.........ssts.lp...........ssplhphllchTDscphsshsptt...................ttssH.tlu....hplLptl.......t..pscc...p+hlhphLs+Lhls..ps.s...ppphpth ...............................................................+CLhlstthLpphp...ts.hp.shsh........sllpsLllPulpstcs.slRphulhCLGlssLlsc......................phApcp.hsl.hhp..hh..pp.........s..ptslph.sA.....Lpsl..hDllh.ha...G.....hp.hhssptt..........................................................................................................s.ttsllcl.hhch...Lcsp..........ssc....................lpshuscGluKLhLsshlss........................................................sclLp.pL.llh.aFsP..to...pp.s...........................p.L+QsLu.hFhPsas...........h..u.....ph......pp.Q.........pthp..cshlss.lps.lhss.ts....................ss.s.ls...........hsplsphllchTcspths..s.sptt.................................................t.ssH.slu....hclhppl............ht...spt..tt+hhhphLspL.ls...t........th...................................................................................................................................... 0 86 133 200 +12555 PF12720 DUF3807 Protein of unknown function (DUF3807) Wood V, Coggill P pcc Pfam-B_6113 Family This is a family of conserved fungal proteins of unknown function. 25.00 25.00 30.50 59.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.46 0.71 -4.10 19 65 2010-01-04 17:41:02 2010-01-04 17:41:02 2 2 65 0 54 64 0 172.50 31 78.96 CHANGED DLpuFpAcHFssps.................sssshst................tppshsE-h............DDsLGYYPDGsKRTLTDEQIpIFRHSEIcuLhRc+chpc.-p....t.............hp.ttchps..t..s..tp.tt......................ttpppssptpc+ptpstpcpsspps...........h.ss.ttstssLcYs-csstttpt.......psssshuRRllS..........Y-D ...................................DL.sFaspHFsspsh.................................ptp.ttt................tpsshpE-.t...........DDuLGYYPDGVKRTLTDEQIpIFRHSEIc..uLhRp+chpcppp................................tpstspssspt.pts..stssptpttps............................sppppstppppKp.ppshpcppspts.........tpphsssptuh...ssL.-hsppt...............sp..tR+hls................................................................................................................................ 0 8 23 41 +12556 PF12721 RHIM RIP homotypic interaction motif Coggill P pcc Masci AM Family RIP proteins are receptor-interacting serine/threonine-protein kinases or cell death proteins [1]. This interacting domain is involved in virus recognition. The RHIM domain is necessary for the recruitment of RIP and RIP3 by the IFN-inducible protein DNA-dependent activator of IRFs (DAI), also known as DLM-1 or Z-DNA binding protein (ZBP1). Both the RIP kinases contribute to DAI-induced NF-kappaB activation. RIP3 undergoes auto phosphorylation on binding to DAI [2]. 17.20 5.30 17.20 5.30 17.10 5.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.52 0.72 -3.55 28 243 2010-01-08 12:49:32 2010-01-08 12:49:32 2 18 70 0 89 231 2 48.60 23 10.48 CHANGED PsPpsss.sssssspss....................ssssshslsIp.sssulQIGspNhMslp .........................................................................................................s...................................tss...ltIp..su.pslQIGssNhMph....... 2 23 32 39 +12557 PF12722 Hid1 High-temperature-induced dauer-formation protein KOGs, Finn RD, Coggill PC pcc KOGs (KOG2226) Family Hid1 (high-temperature-induced dauer-formation protein 1) represents proteins of approximately 800 residues long and is conserved from fungi to humans. It contains up to seven potential transmembrane domains separated by regions of low complexity. Functionally it might be involved in vesicle secretion or be an inter-cellular signalling protein or be a novel insulin receptor [1]. 20.50 20.00 20.70 20.00 20.20 19.90 hmmbuild -o /dev/null HMM SEED 895 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.63 0.70 -6.87 28 348 2012-10-01 19:21:38 2010-01-08 16:35:52 2 7 227 0 260 575 7 599.10 26 91.93 CHANGED MGso-SKLs..F+pulhcLspp.............................................pth..ss...............--s...aWppFW....chspospDlFs.Lloss-IRplRDps.sNltoLlhtlsp+lhhhspp.ssh..............shs.pp..plLNClRlLTRlLPalaE....tppWcs..taFWusp.c.hh..t...............................tt.psspPLAtpLlcuLlDLLFhssFTlssst..........pshspspa.IWEuGVGsssshsp.......stch-uNRsElL+LLLTlhScshYhsss.......sspss+alsahsosss+p.lLsLhsSLLNsss+Yssss....hslPY.splhapDs+p..............................hLVphsL................QlLllhlsashssssph...................................................shscNhahpYLu+lHRcpDhpFllcGhs+lLppPlp...........................................................................................pohlPsss+.lpa..t.EhLhLhWchhphNK+Fhpalh-opcshDlllhhlYalhpa+sssu+hG....................................ll+hssalLLhLSu..-+sFsh+LNcsass........ppLP.........h+lsshsG.....TauDalIl.hppll.............sosptp.p.lhssLlpllhNlsPY.......................................hpsLShsuuspllpLlsshSoPtFLhu................................sssNacLLthLL-shNshlpapFcs................NtpLlYullRp+clacpLtshsh-.upp.hptpsp.............p.............................................t...tshtsspppsstps.ptss..pssps.t.sphs.p.ssshshssspsptsp..cspss.ssss....................t.s.t.pt.p.tsttp.sthp.thspppsspp.p.sssh..psptpatsossWhcSWtscL..sLpoIhpll....pslhspl.phshppuhss-u.hl.....chhpcsplpulls.......sPpPlpl+paphsshohtWacolhWGhIaspph..............................................................................................................t............................................................................................ssslWssTclKLFcl .................................................................................................................................................................h..tL.t............................................................................................................................s........................sp.........hatthh.....t...p.p-lht.hhs.tpl+.ht.............ptt...NhtsLhhhhs.plh.hh..pp........................................p....p.....hlNs.Rl..Ls+llPhhh.....E..........thpt..................hhWt..........................................................................................................................t..sLuppLl.slhcLLFh.sF.....Tl.......................................................t.tp....h.lWpsG..luhsts................s..ht.sR...hElL+LLlshh.upshY.s.t................................s.hl.hhss.............sp......hhshhsSllNssh....ths.ss...................tls..a..s.hh.h..t..s.pp..............................................................hl..sh................phL.......h.hl.a...........................................................................tNhhh.a.hsp.lp+.t...Dhp.....hl.....hpuh..plht.sh...............................................................................................thhst......p...h...h.phhh.hhaphhphNppFh.hh.hp...t.s.c....hll...hl..h..hh...ctp..t.p.s.....................................................................lhphs.all.hLSs....c.th...h.p....L.p..hp..........plP.....hc..ls..h..s.......................oat.Dhhl.h.hhp.ll.........................hss......t...............l..h..shhhthl...N..l.sh.............................................hptlu..ss..plhpLhtthSs..a..Lhs......................................................................................s..ph.....hl..l..Lc.hsshlth..phpt....................s..Llasll..+ppphhp..tlts..h..p.st.........t..................................................................................................................................................................................................t............t.................t..t............................................................................................................t..............................t.........................................t.........th..s..a.............h.sW..ph..sLphlh.pll.....p.l...l.....t.....t.t....t.............t................h..h.........................sh..t...hp...............s..h.hhWh.thhWu.la.tt................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 101 146 222 +12558 PF12723 DUF3809 Protein of unknown function (DUF3809) Bateman A agb Jackhmmer:NP_295729.1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in Deinococci bacteria. Proteins in this family are typically between 117 and 157 amino acids in length. 25.00 25.00 42.00 73.70 23.40 17.00 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.94 0.71 -4.28 6 19 2010-01-08 17:04:36 2010-01-08 17:04:36 2 1 19 2 13 19 1 127.90 38 90.81 CHANGED oFsLphPus.ApuLs....Pthshuth..h+sLpt-uEtlcGELVspsPlLGElDLPFpSRLphpspGt....pLhPhsLsGE.s..WlEVuGpupssEsG.lsaphphRsHLsLPpuEsWGGtAFc+MlcAAhuRTLERVhppL ........................................shs.h...h-Lch....-upplcGpLhtpsPlLGElcLPFtSRL....-Gt....pLpPlPL...ss..hlEVpGpucss....u.....tu....lththclcl+LpLPpGcsWGtRAFt+hlcAhFtRsLERsLst.... 0 3 8 13 +12559 PF12724 Flavodoxin_5 Flavodoxin domain Bateman A agb Jackhmmer:Q186N5_CLOD6 Domain This is a family of flavodoxins. Flavodoxins are electron transfer proteins that carry a molecule of non-covalently bound FMN. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.72 0.71 -4.19 70 1747 2012-10-03 05:08:30 2010-01-11 11:17:35 2 13 1363 0 401 2543 510 137.00 25 68.21 CHANGED LIlYuSpcGpT+cIup...hlu...pplptt....sphsslps..h...t.th..slspaDp..VllGAul+hG+apstlhpFlppa...tstLsst.suhFsVsLs........uccspc.........h...hc.+h......l.p..sh..a..p.....PphhtlFu..GuL.pYs+Ysah-+hhlphI.hph...stup...s-ss .............................................lIlYu..o.p..p..G.pT+clAp...hl...A....p...pLpp.................sths.....clps.......s..............................p.......p....................s......h.p..sY.......Dp.....lllG.u.u.l.h.h..G...+.h..p...s..t..h..p.c.Fl.c.+.p.....t.s.p.L..s..s.....h...s.u..h..as.l..shs............upc.s.pc..............................h...hp.ch......l.p...p.....h.....p...........st..hthhs..Gtl...h..phthhc+hhhphh.ht...tt.....t................................................................................................ 0 140 253 344 +12560 PF12725 DUF3810 Protein of unknown function (DUF3810) Bateman A agb Jackhmmer:Q185R6_CLOD6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 333 and 377 amino acids in length. There is a conserved HEXXH sequence motif that is characteristic of metallopeptidases. This family may therefore belong to an as yet uncharacterised family of peptidase enzymes. 26.10 26.10 26.30 61.80 25.40 26.00 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.94 0.70 -5.53 60 206 2012-10-03 04:41:15 2010-01-11 13:16:19 2 1 203 0 62 211 151 314.90 30 87.94 CHANGED hYSpslYPhluthlphlhuhhPFSlGD..lhhhl..hllhllhhlhhthhphhpphtph...............h.hphhthls....llYhhFhlhWGlNYaRhshtpph..........slphtp.Yos-cLhphsptllppsNphptplsp..........spshhhths..s...pclhcpsh.puYpplsppashhp..hp.hspsKsslhShhlShhGlsGhhsPFTsEAplNsplsshphPhThsHElAH.lGaupEsEANFIuYLsshpsss.ha+YSGahhuLhYslspltc.hs.ctapclh.pplssslhc.shppsppaWppa.cs.sl......spltphha-tYLKuNsQpsGhcSYuchVsLLlu .............hYupslYPhluthlshhsuhhP.FSlG-.lhhhl....hllhll.hah.hht.hh.phhpphpph......................lhphhthls....hlYhhF.....hlhWGlNYhRhshhpph..............................pl.phtt......ao......s-cltphspphlpphNphtsplsp............ssp..h..............ppltpcsh.puYpplsppashhp...sp..hspsKsh.lhS.lhShhGlsGhhsPFTsEuslNspl.shphPhThsHElAH.hGaupEsEANFluYlsCppusshth+YSGYhhsLtYslssltc.hst-tapclh.pplpst........lhc..shp.ppptaWppaps..hl.........spl.tshhactYLKuNp.psGhcoYuchVsLLls........................................................... 0 31 54 59 +12561 PF12726 SEN1_N SEN1 N terminal Mistry J, Wood V jm14 Pfam-B_2547 (release 24.0) Domain This domain is found at the N terminal of the helicase SEN1. SEN1 is a Pol II termination factor for noncoding RNA genes [2]. The N terminal of SEN1, unlike the C terminal, is not required for growth [1]. 20.60 20.60 21.70 21.70 19.40 18.90 hmmbuild -o /dev/null HMM SEED 727 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -13.27 0.70 -6.48 34 152 2010-01-11 14:18:12 2010-01-11 14:18:12 2 9 140 0 119 164 0 637.60 21 33.42 CHANGED Wlpp+hsppLpsCspClpsatpuKsphhpphh..cphstpp.lppFhphlspW-hpplhtsLcphppphpss..s..p.s........h.hulaEsLsssshL+sssthcth...FsthFchl.sppphh...h.........pphl.PGhhhhLF-s.ss..ppppWApphhpphtpp...hsppphs.slhptlshtlhpl...........s..ts.hs.shhtp.......FWpshthll..phlsp-hlppthps............htlps.lh+lhhsHLtss....stsLthlLcsLphlLc+hsssF.Ws.thpsho.p.........sll-plFssstFsphLhcsppss.hpppt................hpshh...uWhhsFhpSLsssp.p.................psscpls.hLlpphpp.pp.sp.......t+tsshtsuhshLlpslhs....hpcpcps.......shpsslhsls-shshlssth........shh.sssth..h.tpstslsphshpllppslsh.DhhtLppppttLhps........sth.sss....phhs.......lWpplhpthh.psshs...LuptlLtuht..........slsul.hh........spp..............phsstpppaNphhpphtphhspll............p+lu-h....sPspLpplhs.cp..ssspulhSsl.hSss.plhpAAhsllppsh.s.ssuRhEulpclLppphssslpulshslpplpphcs....apssP+.hl+shhDllssLsDs.sGlL.....pspphht....pssttplt....paWchtWphLshIFppThtWusp..actp.hh-FsRDTL-huchLh-paplhsssls.t.sss...........pssps.............lhpsshpshpshlhWLRLsDc.LLsssVpL .......................................................................hhp+hpt.LppCs.Cltpaapu+tthhpphh...cphsppp.lpph.phlsph-hpRlhpsLppspthhpph.s.ptt.....................................thhhul.aEsLss.thLppsp.hpth...Fs.thFphlpsppphh..h.........................pphl..PuhhhhLF-t..st........ppppWAhp.hppht.p......hst.pp.h.......s.slhp.l..tlhth.........s...tt.hs.t...hltp............................hWpuhthll..phhsp-.lhpplps.............htlp...s..hhcl.hhsHLthp....s.sL...lL.....p...slphhLc+hsppF.Ws..shtshssp..........sll-.lhtsstapthLhp..psp...pt................................................................h.shh...uWh.sahpSLptsp..p.................psschlhh.Lhpp.hpp..t..t.......sphtChhtuhshLhpslhs....hhptpts...........phpsthhhhsph..h.thltp.h........................phh..ss.h.......t.tlst.shtlltpslth.-ht.htpph.hlhpt....................sh.............ph.t.lWptlhpth...pssht.....LupthLhuhh.............slhul.hh.spt....................tttppaNthhtphtphhspll............t+hu-h..........psppLpplhp.st..ps.tuhhuhl.hssp.phhpuAhplltth..s...tuRh-ulpthlpp.ht.sh.uhshshppl.phth.......atshs+.hl+hhhDllpsLs-s.sGhL.....pstph......ptphhtl........phWp..WphlshlappT.tWt.h..hcht.h.pFsRDshphuc.Lhsphtlhtsslt..t.t.t..................................s.tpp................lltsshtshpshhhWLRLpDp.Lhpshlt................................................................................................... 0 35 71 106 +12562 PF12727 PBP_like PBP superfamily domain Bateman A agb Jackhmmer:Q18A58_CLOD6 Family This family belongs to the periplasmic binding domain superfamily. It is often associated with a helix-turn-helix domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.88 0.71 -5.35 186 867 2012-10-03 15:33:52 2010-01-12 08:16:29 2 14 708 0 360 5148 918 188.80 29 45.74 CHANGED spthtpt..ss.hp..lt..hthsGShsGLtuLt.cGcsclAuhHL..h.......................cs.css..p..........aNhs.hl.......pch.h.ssh.s.hsl.....lphspRppGlhlttG...NPt......s...............................lpshpDL....sc....tslphlNRpp.GuGoRhLlDphL....pp.t....slss......................splpGYsppttoH.hsV..........................AssVssGp...ADsG.............l.G.l......pss.....At..ph.uLcFl.......Pl.sp...........EcYDLll.+phh.pc..stlptllphlp .............................................................................................................................t............t..ht..lt..ht.....a.hGShsu.Lh.s.Lt....p....G....c....s....c.l.....A...uh.H......L...h...........................................c.s..cs..s..p.........................hN..hs..hl....................c.ch....l.sst....s..hsl........................lph.h.......p....Rp..p..G.l....h....Vt.pG....NPp...................p........................................................................Ips....h...t...D.L.............sc................ssl.+...aV..N.Rpp....G..S..G..o.R....h.Ll.D.p.h...L........pt..t........slss..................................pp.l..s.G..Y.p.....p..p...t...h........o....H..hu....V......................................................Ast.Vu.sGp.......A.D..l.G..............................................l...G....l............c.s..s.......At.......ph.....u...L...-...F..l........................P..l.tc............................................EpY...D.l.ll...+.s...t.h....p..c...h.lptllphh.t....................................................................................................................................... 0 130 258 307 +12563 PF12728 HTH_17 Helix-turn-helix domain Bateman A agb Jackhmmer:Q18A58_CLOD6 Domain This domain is a DNA-binding helix-turn-helix domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.41 0.72 -3.71 461 7911 2012-10-04 14:01:12 2010-01-12 09:58:31 2 67 2491 0 2062 7447 1041 50.90 22 40.39 CHANGED hlospEsA.chLs.l..uppo.lh.c.h...h.....c....pu..pl.sh.......t......t......hthpcp-lppalpppp ...................hospEsA.chLs..l.....SppT.lh.c.h.......h.........p.............pG.....pl.sh..............+.....hGpp....hhhpps-lppalpt..t.................................. 0 792 1556 1892 +12564 PF12729 4HB_MCP_1 4HB_MCP_2; Four helix bundle sensory module for signal transduction Coggill P pcc Ulrich L Family This family is a four helix bundle that operates as a ubiquitous sensory module in prokaryotic signal-transduction. The 4HB_MCP is always found between two predicted transmembrane helices indicating that it detects only extracellular signals. In many cases the domain is associated with a cytoplasmic HAMP domain suggesting that most proteins carrying the bundle might share the mechanism of transmembrane signalling which is well-characterised in E coli chemoreceptors. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.64 0.71 -4.93 65 4710 2012-10-02 01:04:29 2010-01-12 14:21:04 2 84 1259 0 1579 4943 147 176.50 14 30.92 CHANGED sh..plpsKLhhh..hhlhslhhhllG..hlGhhshpphspshpshYpcpLlslphlsplcsshtphcstlhcllhs.p-psc...ppplhppl.pphppchsphhppYcpshhssc.E..+c.hhspacpphppY.pptpppllsLsppsphc.....cAhphhpspst..shhpphhpslpcLhphstphAcpttppspsph ................................................................h.pltp+Lh..hu...Fu.llhll.hl.hlu...shul..h..p...l..sp...ls..ssh...p..pl...h....p...s...p...h...s..sh....phh...s...p....lp...s...s..h....t....p....h..p...hs......h...h...p.............h....l......h....s....p.....s....s...pp..............hp...p....h.t...pp...l....pp....t.....p.....p....p....hp...p....t...h........p....p.....h......p......p.......h........h..........h.....s......s...p.....-......cp...hh.p..p..h.p...pt...h....p.p.a...h...s.....h...h....p..p.....h...h...p.....h..h.....p....p.....s.p..h...p........................p.A..h...t..h.......h.h...s...p.h.t......shh...p...t.h...t..p.tl...p.p...l.h.p.h...pt...p.stt.............................................................................................. 0 404 885 1233 +12565 PF12730 ABC2_membrane_4 ABC-2 family transporter protein Bateman A agb Jackhmmer:Q18D57_CLOD6 Family This family is related to the ABC-2 membrane transporter family Pfam:PF01061 [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.05 0.70 -5.20 293 7890 2012-10-03 10:13:34 2010-01-13 08:24:29 2 18 2165 0 1696 13517 1344 250.10 12 82.58 CHANGED psEhhK.h...t...+...s.t..hh....hl.hhlhsll....hsh..hhh......h..hh.t...........................................................................................................................h...t....tt..h.......t....hh..........h..........h...............................................t....hhhh..........hhhs..l...hlsl.hs.uh.hh..s.tEh........psst........hp....hhhstshs+tplhhuK....hlshhl.h.hh....l.hh.ll.hhhhh.hl.h..shl.hs....h.........t.s.h.sh.s.................................hht..h.h...hhhhhh.s...l.hh.h.hh.......hhhl..h.h.....l......uh....hhc.....s....h.hhs...ls...l.sl.h....hh..h...hsh..hh........st..h.h.......................hhh.sh..s......................hthl..h.......h..th..........h.......t.s....h........h.t........s......h..s..h..........hh.s....lh.hh...hhhhllhhh .......................................................................................................................................................................................................................................................................................................................................-hh+.h.....h...p...p.h..hh....h.h...h..h..h..h..h..h.l.........h.s.h...hhh.........h.h.h..t........................................................................................................................................................................................................................................................................................................h...........tt...h........t.....hh.............................h..........................................................................................................................hhhh...........hhhh.......l......h.hsl.....hs.....s..h.....h...h........s...pEh.............pp.ss...........hp......hhhs..t.s.h.s...+..h.ph.hhuK.......hl...s..h...hh.h..sh......l...hh....l...l....h.h....h.h.s.....h.l..h....s.h.l..h.t...h...........t..s.h.shs.......................................................................h.ht...h...h...h...h..h..h...h.h..h....h...lh..h..hh..................hhhl......hh.........l..........uh......hhc..........s.............thhs.........lh........l.sl..h...........h.h..l......hsh....hh..........sh..h.....................................................hhh..sh.t...................h.hh.....................h....t............h..................................................................................h......h.............................hh......hh..hh...hhhhh....h..................................................................................................................... 0 700 1290 1510 +12566 PF12731 Mating_N Mating-type protein beta 1 Coggill P pcc Pfam-B_4610 (release 8.0) Family This domain is found in some fungi and is the C-terminus of a homeodomain-containing transcription factor protein involved in mating. 21.30 21.30 21.50 22.00 21.00 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.76 0.72 -4.11 24 64 2010-01-13 13:49:11 2010-01-13 13:49:11 2 4 19 0 5 61 0 90.90 21 18.04 CHANGED hs.ts......DpplppsLsslcpcFhuuLcs-st.s.LssFhouaspFcshlpShpspLss-TlshlhsFussluslosshl-hpupppsht.cchss .........hs.........t.DtpltpsLpshcps.alsuLpssst.s.lpsFhsphpp.h..psthp..u.t..p.s..pL...sspThphl.hsFushltslosshlclcsppsplp.sch..t......... 0 5 5 5 +12567 PF12732 YtxH YtxH-like protein Bateman A agb Jackhmmer:Q18C91_CLOD6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 100 and 143 amino acids in length. The N-terminal region is the most conserved. Proteins is this family are functionally uncharacterised. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.56 0.72 -3.61 235 1575 2010-01-13 15:49:26 2010-01-13 15:49:26 2 9 1168 0 352 993 72 104.10 22 81.49 CHANGED hlhuhllGuslGussuLLhAPcsG+-sRcclpcpsp....chtcphpch.t.....................................lppphpp...........................................spcpspchhsc ........lhuhlhGussGAssuLLhA....P.cpG+-hRpclpcthc.............chpcps..pchp.pp.sppp...............................................................................................hpcphpp.........h.tt....................................hppthp.....ptttt......................................................................................................... 0 141 267 317 +12568 PF12733 Cadherin-like Cadherin-like beta sandwich domain Aravind L, Coggill P pcc Aravind L Domain This domain is found in several bacterial, metazoan and chlorophyte algal proteins. A profile-profile comparison recovered the cadherin domain and a comparison of the predicted structure of this domain with the crystal structure of the cadherin showed a congruent seven stranded secondary structure. The domain is widespread in bacteria and seen in the firmicutes, actinobacteria, certain proteobacteria, bacteroides and chlamydiae with an expansion in Clostridium. In contrast, it is limited in its distribution in eukaryotes suggesting that it was derived through lateral transfer from bacteria. In prokaryotes, this domain is widely fused to other domains such as FNIII (Fibronectin Type III), TIG, SLH (S-layer homology), discoidin, cell-wall-binding repeat domain and alpha-amylase-like glycohydrolases. These associations are suggestive of a carbohydrate-binding function for this cadherin-like domain. In animal proteins it is associated with an ATP-grasp domain. 22.10 22.10 22.20 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.67 0.72 -3.70 218 661 2012-10-03 16:25:20 2010-01-25 14:57:29 2 169 264 0 277 708 92 93.00 22 12.69 CHANGED LssLsl......................s.s......Fs..ss..sts.Ys.s..sVsp...sss.s..lslssss.p..............................lplsG...t.........................s...slsL..s............G.ss...lslpV......sup.......sG......s...........................s..pp.Ys.lsl....pRt .....................................................................ltlp................s.s......Fs...ss....tts.Ys.s..pVst...sss.s....lslsspst....................sss...sp.l..p.l..sG..hts............................s..tslsL...s............GtNp....lslpV.........sup.............cG......s.............................s...pp.Ys.lslpR................. 0 145 224 258 +12569 PF12734 CYSTM Cysteine-rich TM module stress tolerance Aravind L, Coggill P pcc [1] Family The members of this family are short cysteine-rich membrane proteins that most probably dimerise together to form a transmembrane sulfhydryl-lined pore. The CYSTM module is always present at the extreme C-terminus of the protein in which it is present. Furthermore, like the yeast prototypes, the majority of the proteins also possess a proline/glutamine-rich segment upstream of the CYSTM module that is likely to form a polar, disordered head in the cytoplasm. The presence of an atypical well-conserved acidic residue at the C-terminal end of the TM helix suggests that this might interact with a positively charged moiety in the lipid head group. Consistently across the eukaryotes, the different versions of the CYSTM module appear to have roles in stress-response or stress-tolerance, and, more specifically, in resistance to deleterious substances, implying that thes might be general functions of the whole family. 21.90 21.90 22.00 22.10 21.80 21.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.59 0.72 -3.88 88 306 2010-02-01 16:08:54 2010-02-01 16:08:54 2 4 97 0 189 281 0 50.20 33 55.31 CHANGED s................P.ssYsp.......................t....tppp.sss....................hhpGCLAALCCCClh-hsh ....................................P..uYsp................................ts....sppp.tsu..........................hlpGClAAL...CCCCll-tCh...... 2 33 98 150 +12570 PF12735 Trs65 TRAPP trafficking subunit Trs65 Coggill P pcc manual Family This family is one of the subunits of the TRAPP Golgi trafficking complex [1]. TRAPP subunits are found in two different sized complexes, TRAPP I and TRAPP II. While both complexes contain the same seven subunits, Bet3p, Bet5p, Trs20p, Trs23p, Trs31p, Trs33p and Trs85p, with TRAPPC human equivalents, TRAPP II has the additional three subunits ,Trs65p, Trs120p and Trs130p [3]. While it has been implicated in cell wall biogenesis and stress response, the role of Trs65 in TRAPP II is supported by the findings that the protein co-localises with Trs130p, and deletion of TRS65 in yeast leads to a conditional lethal phenotype if either one of the other TRAPP II-specific subunits is modified [4]. Furthermore, the trs65 mutant has reduced Ypt31/32p guanine nucleotide exchange, GEF, activity [3]. 29.30 29.30 29.40 30.00 28.90 28.50 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.72 0.70 -5.11 38 136 2010-02-01 16:49:17 2010-02-01 16:49:17 2 4 134 0 104 139 0 297.20 24 49.38 CHANGED cssshhs.......hphshtphslshpsG..plp.hsp...h.....hPlphpspDslohhYKLss..................ss.st.s..................lplplphphh.......tsspspIt.hpWpTtlDFuh.h......P..spslppsspss...o.sh.......ssstssts.h........................................................sshsshtttspstst.ulphoFtGs...sVplGcsFsWpl.llNpSs.........................pshcLslhs.s..................ph...p..t..spsts.t........................spslh.pshhthhpp.st.p..........pssllsLosDl+lGPL.Psssaps-lchlsltsGhh.sL-u....l+lhDhpos-sh..-htclsplls .............................................................................................................................................................................................................hsshhp.hph.hpph..pLthtsu..plcslsps..h....................thPlpshspDplohhY+Lss......................ss...........................s.tlpIslphp..l.l........s.ssp.splp.hpWpTplDFshsh..........................s..sp.slp.p.sppss...phsh...........sss.ssts.h.s....................................................................................................................................................................................s.ssss.t.htsss.s..hGlphoF.us....sVpsGc.FsWplhllNpSs...........................................................................ps+clslhs.s.................................ppph..tsp..ht.sps.ssst.t.tt......................hspslhs..-Nhlpthp...+psthp.............tstllsLSs.DhRlGPL.Psssats-LchlultsGhh.sl-u....l+llDlsosEth........-ltclsslls............ 0 30 59 90 +12571 PF12736 CABIT Cell-cycle sustaining, positive selection, Aravind L, Coggill P pcc Aravind L Domain The 'CABIT' domain (for 'cysteine-containing, all- in Themis') is found in a newly identified gene family that has three mammalian homologues (Themis, Icb1 and 9130404H23Rik) that encode proteins with two CABIT domains and a highly conserved proline-rich region. In contrast, Fam59A, Fam59B and related proteins from mammals to cnidarians, including the insect Serrano proteins, have a single copy of the CABIT domain, a proline-rich region and often a C-terminal SAM (sterile-motif) domain. Multiple-sequence alignment has predicted that the CABIT domain adopts an all-strand structure with at least 12 strands, ie a dyad of six-stranded beta-barrel units. The CABIT domain contains a nearly absolutely conserved cysteine residue which is likely to be central to its function. CABIT domain proteins function downstream of tyrosine kinase signalling and interact with GRB2. 20.30 20.30 20.30 20.40 19.70 20.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.67 0.70 -5.37 28 405 2010-02-01 17:20:35 2010-02-01 17:20:35 2 12 102 0 234 418 0 231.50 21 49.09 CHANGED lPpll+.lp.puphptst............sphlhlpsstp..hpplh.....Apslpppp......hu.plpIPhsYpGpFchhsp..........FpoVp-l........uphhstplhspcshphp.....................s.t................phhlpp...G-plplh......................ttttctttthhp.......................slhshs.pssc....hlpLPhptcGpF.phhspp........paTlc-llc..phcLPhpVclssss.......sh.....t.hstl+Lhshhp-shllsssltpppps..............hclPhp...hplclhtscs..t.ststhhpthhp..p ......................................................................................sp.hhlpptt.........phhh.......hp..htp..tp..........ph.lP.s.Yt.G..hF+hhsp...............asos.-l........upths...c.lhsh..cshpht................................................phslpt...G-plplh.................................t.t.p.t..hhp............................................C.hh..shp..pppc.........tl.LPh.ptcG..pFs.phtstp..........................t.aolpplhp....hcLP.hsVpls.sts..........h...p.ht..h.t.hph..phh.cshlh..hpsh.p.t.h................hch..t...h..ph...pt.................p................................................................... 0 47 66 127 +12572 PF12737 Mating_C C-terminal domain of homeodomain 1 Coggill P pcc Pfam-B_4610 (release 8.0) Domain Mating in fungi is controlled by the loci that determine the mating type of an individual, and only individuals with differing mating types can mate. Basidiomycete fungi have evolved a unique mating system, termed tetrapolar or bifactorial incompatibility, in which mating type is determined by two unlinked loci; compatibility at both loci is required for mating to occur. The multi-allelic tetrapolar mating system is considered to be a novel innovation that could have only evolved once, and is thus unique to the mushroom fungi. This domain is C-terminal to the homeodomain transcription factor region. 22.50 22.10 23.80 23.80 22.40 21.30 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.39 0.70 -5.59 20 42 2010-02-01 17:28:21 2010-02-01 17:28:21 2 4 13 0 7 43 0 402.90 27 65.73 CHANGED sstlEsEFAuItupAcpLYp-KFspSpLAs+LDsAV+DhTPsLKtplpsc+p+++ppsp.pp..sp+ut+sYPoPEpSPss...uphhhSPsssslp.shspssspss++RcsSh...to-sccpsppssKRsRsssspsp...............p....uLPSPusSsh--Lsp.ssss.sosph.Pstsssss.................osKRKR+LSDu....thPAsKRPp.......RspslSDPhPh..tt.ss.spWhpthh.sssplhhhhslPsPVolhsPDss.........sP.............LDlplasFslh..hs.pssssssPsus...................ssspshsssslssssh.ssssssLD.shS.atsssasss......Lpsss.s..h..shP.s.ss......hsts..sh..........................hsshsssshshosLhspPssssssss....ulhssupso.ssssuLs...puphcAKp+ELcELcA+stALcAElA ..........shlEtpFAsltspAppLYspKFppSpLAs+LDsuV+DMTss.......l+tp.............hpp...c+tc..........ccpptp.pt........t.............p..........+A....t............csYPSPp..tSPsu...sphhhoPs.........s..ss.p.....ht.sts.h.......sp+RtpSh.....ssp.ptpsptssKR.Rp.shppp......................ssttsLPS...PssSt.-p.....s...ss.....ssh...sss.s.................shKRKRpLSDu....thPusKRsp......sRspssSDPhPh...tstpp..pa........sspl..hhslPsPVos.ssDss.........sP.............lDlplas.ashh.phs.psssh.sssut...................................s.pshplsuhsp.s.....s.tsssLD.shs.a.ssshs.s.............................lpssp.......shP.s.os......spts..sh...................................ssshshsslhspssssshsst........sl.s.upss.ssshsls........ppphpAKpccLctLpApstAlpAElu......................................... 0 6 7 7 +12573 PF12738 PTCB-BRCT twin BRCT domain Wood V, Coggill P pcc Pfam-B_181 (release 24.0) Family This is a BRCT domain that appears in duplicate in most member sequences. BRCT domains are peptide- and phosphopeptide-binding modules. BRCT domains are present in a number of proteins involved in DNA checkpoint controls and DNA repair [1,2]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.90 0.72 -4.11 49 1637 2012-10-02 11:51:29 2010-02-01 17:45:22 2 88 308 12 1119 4819 1157 64.10 25 10.14 CHANGED lhlslouassp-....RptlpphlpthGupastsL.s+.....psTHLls...tpspGpKYctAppas.ltlVshpW ....................................hlsh.os.h.tspp.......+pplt.phspthGu.......p.h..p..ps..l..sp.........csT.H.L.ls....................tp..s...p......u.....p..K....ac..t...A...p........c......h........s...l..........lVsspW.......................................... 1 356 574 889 +12574 PF12739 TRAPPC-Trs85 ER-Golgi trafficking TRAPP I complex 85 kDa subunit Coggill P pcc manual Family This family is one of the subunits of the TRAPP Golgi trafficking complex. TRAPP subunits are found in two different sized complexes, TRAPP I and TRAPP II, and this Trs85 is in the smaller complex. TRAPP I, but Not TRAPP II, functions in ER-Golgi transport [1]. Trs85p was reported to function in the cytosol-to-vacuole targeting pathway, suggesting a role for this subunit in autophagy as well as in secretion [2]. The overall architecture of TRAPP I shows the other components to be Bet3p (TRAPPC3), Bet5p (TRAPPC1), Trs20p (TRAPPC2) , Trs23p (TRAPPC4), Trs31p (TRAPPC5), Trs33p (TRAPPC6a and b) and Trs85p. 25.20 25.20 25.30 25.40 25.10 25.10 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.55 0.70 -5.82 60 382 2010-02-01 17:51:56 2010-02-01 17:51:56 2 9 271 0 274 400 4 394.30 22 43.06 CHANGED sop......s.s...sshpshtphhpphpphs..............P.ahpss..t..hh.halLlaDs....stsshppspplhpph+p.....phGh.psplLpl...............................tsppsh.s.t.........................s.assh.cph.t.t.t........................................................................................................t.tshhlshs-h.psl+shlp-hlspsllPa.........................ME+clphhs-plss.RKGl..ss+hhphs...+..+a...as........susssssssssts.............................................................................t.hYshsSsEttlR+LADhuFhLpDa.chAhosYchl+p..DapsD+AWpah.....AuspEMsu....luhhhss.p.s..............................tpp.hp.hl-s........Ah.sYhp........................................................................hp.....huhR.shllssEl....L...................pshsth.sust.hhchs......sc......pht........................pAllhEpsuhsa................................................................hps.ts.hh..thR.....KtuhahlLAu...................ccatpssphppAhpshppAhtlYs..........................................ttsWsthp-a............ltt .....................................................................................................................tshpth.ph.tp.p.....................P.aht.s...ph..lc.halllHDt.....ttss.pp..........spthhcph+p.....phuh.pshlLpl............................t...sp.....pss.p.............................s.h.ph...pt......t...................................................................................................................................................................................................t.hssh.ls.pDh.sslc.shlp-hshptllPa.........................hE+plphhs-p.lss..........++ul...ssphh.hs.....+..+a...as...................ssptssts.ss.t......................................................................................................................................................................t.hYt.posE.hthR+luDhsFhlpca.clAhstYchh+p..Da..pDpAhhah.....Au..sh.EMsu....lohhhtsts.................................tp...t.hh-p........Ah.sYhs.........................................................................t......huhR.ssllhsEl..............l........................+spst..h..puss...hh+ht.......pp........tlt....................................................................suLlhEpsuhsa...............................................................................................................................................................................................hp..h...h...hR.....KhuhahlLAu...................ppatptsphppuhpshppAhtlap............................................................................................................................................................... 0 83 136 219 +12575 PF12740 Chlorophyllase2 Chlorophyllase enzyme Coggill P pcc manual Family This family consists of several chlorophyllase and chlorophyllase-2 (EC:3.1.1.14) enzymes. Chlorophyllase (Chlase) is the first enzyme involved in chlorophyll (Chl) degradation and catalyses the hydrolysis of an ester bond to yield chlorophyllide and phytol [1]. The family includes both plant and Amphioxus members. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.87 0.70 -5.50 21 87 2012-10-03 11:45:05 2010-02-01 17:54:43 2 7 51 0 43 2038 336 210.10 26 60.91 CHANGED sPPKsLllshPsptGs..YPVllFhHGah.lhNsh.YoplhpHluSHGaIVlAPQ.................hYs........lhs...ssspcElpssAplhsWLsp...GLpshLP.....ss...VcPshp+luluGHSRGGKsAFAlALGh.ss...................plpaSAllGlDPVs.....Ghspstps.P.lLTa.pPpSFchs.hPshVIGTGLGs.t+.s.hhPsCAPsGVsHp-FasEC..psPshHFVApDYGHhDMLDDs.......shhu.thshshCKsGt...s+pPMRRFlGGlhVAFLpshlpGc.tscLpt .......................................................................................t..............p.....p.....t..tt....a....PVllFh.p......G......hh......h........s....s.....h.....Y.....s.....p....l.....h.....p..H.......l..A...S.....a.G.a.l.V.l..usp..............................................................ht........................hts.........s.s...p.....p...l..p......h..h...t....l......h....s..W.....h.sp...........sL...p...s.......h.s...........................t....s...p..s......D........h......s..+.....l..ul..u.GHSpGGc...s.u.h.s.hs.htt....................................................................hp..s.l..h.hl-....P...s.s....................t............h....................h.sh....p.......h..t...s......h.Ps.hl..h..GoG..hu..............C....sstuhsat....pFapth..tssthth........h..tphG.HhDh..lcss.........................hC.ts..........hh....s..huh..............s.................................................................................................................................................................... 0 17 35 39 +12576 PF12741 SusD-like Susd and RagB outer membrane lipoprotein Coggill P pcc JCSG_target-390164 Family This is a family of SusD-like proteins, one member of which, BT1043 (Swiss:Q8A8X4), is an outer membrane lipoprotein involved in host glycan metabolism. The structures of this and SusD-homologues in the family are dominated by tetratrico peptide repeats that may facilitate association with outer membrane beta-barrel transporters required for glycan uptake. The structure of BT1043 complexed with N-acetyllactosamine reveals that recognition is mediated via hydrogen bonding interactions with the reducing end of beta-N-acetylglucosamine, suggesting a role in binding glycans liberated from the mucin polypeptide. Mammalian distal gut bacteria have an expanded capacity to utilize glycans. In the absence of dietary sources, some species rely on host-derived mucosal glycans. The ability of Bacteroides thetaiotaomicron, a prominent human gut symbiont, to forage host glycans contributes to both its ability to persist within an individual host and its ability to be transmitted naturally to new hosts at birth. 20.50 20.50 21.00 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 529 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.94 0.70 -5.92 5 684 2012-10-11 20:01:02 2010-02-01 18:05:50 2 5 145 12 136 1575 477 436.30 24 89.00 CHANGED MKhpNIKshhph...LSlusLlLGusuCTuNF-.DINoNPhtlTc-DhchDshslGuhhssLpuuVl....s.pssocsNsYQlppsLsuDsauGYau.psTsFsuosNhoNYshsssWss..hlachlaosVYsshpclcphS..-socsPshYALApIlKVAAMHRsTDhYGPIPYSKl....Gp.sohsIPYDSQE-VYsuFFKELDEulplLs-plssupsu.....hpuhDhV.YpG.....cVpKWlKFANSL+LRLAIRIVpVcPuLAKEhAEKAVs.ppuGVI-sNsDNAch...sssshoNPLthIussWs......DTRMuADlhSYMsGYQDPRtAsYF-c.........s.thsssYKGlRsGIsl.ppK-sapsYS.......+PslosocPlhWMNAAEVsFLRAEG.ALRGW.NMGGTAp-LYEpGI+LSF-QaGl.sSuAsoYlADsTssPAsYTDPssscssAsAlSuITV+W-EGAopEEK.LERIITQKWIAhFPpGQEAWSEYRRTGYPKLlPVlsN..NSGGsIsos...sGlRRLPYPpoEYsuNupsVpcAV.uhLGGPDNGAT+VWWDKK ................................................................................................................................................hh.........................h......h...h................uC...t.....t..t...ap..phNp...s.........t.......s.............................t............t...........h.t..h....t...hh...................................h..Q.....h.......s.h...s.s....h...u...s..a..ht...........t.............s..t..p......s....s..a......s..............p....s.....a..t........h.a..p......t.......h......h..t....t...h..h...s.....t..h...p..l.......t.......................t......t....t......s.............h.....h......u...h.s..p......lh...cs...hhh...tp...h.sDhaG..s.lP..Y..s.ph............ut.....t..........t.....h....t....s....................Y.....D..s.pcp.....l..Y....p........t....hhp-.....L.sp...A.hsh...lp...t.......p.t..s.....t...............h.t...ph..D..h.l...a.t...G..............shppWh+aANSL+LRhAh....R...ls..........s....s.....s......t.....h....A.....p......p...........sp.....p..A...l.....p.............h......G...l.....h....p..s.s.s.........-........s...s.hh........................s....h....p.....N.....s.....h..........h....h..............t....as...............................-.....t....h....s....u...s...h.........s......h....h........t.....u......h....p............D........P.R...h.......h.h.....at.............................................................t.t....t....a.........G......h..................G......................................t....t.......t...........h...t.......t......h.S.............................t.........h....t......t.....s.....s...........h....h....h....h.psu..EshFLpAEu...ul..p....sa...sh.s...s...s.......A...p..s...h.Y...cp...Gl.........p........h.........S.......h.......p......p................h...........s...........h.........t..t....h.t.t...Y.h...t.............p.....t.....................t............................t.a........s..s.......................t.................................................s..t...h....s...............t......a......pt........s....s......s...............p...p..p..L-pIlT.QK..al..A......a.P.....u....E.u...Ws-h...R...R...T...G...Y.......P......p....h........h........s....s...h...s............s....s....s........l.sst.......................p......R.h.as.......s..t..p.t..s....t..t.h.t..t.uh....t..h.....L........s.........G...............D......p.h..sTclWWsh.......................................................................................................................... 0 64 120 136 +12577 PF12742 Gryzun-like Gryzun, putative Golgi trafficking Coggill P pcc manual Domain Members of this family are involved in Golgi trafficking. 21.30 21.30 21.80 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.59 0.72 -4.16 7 7 2012-10-04 00:47:01 2010-02-02 09:42:44 2 2 5 0 6 116 1 58.60 26 9.76 CHANGED th.pcs-lh....htV-...ScsFhhpG.pplphplhsGpcpch.asFhPLhsGh.hLPplsI .............t....tscLl....lpV-p..sctFhlsGhs.phphpl..supphpl.apFlsLpsGhhhLPpIpl.......... 1 4 5 5 +12578 PF12743 ESR1_C Oestrogen-type nuclear receptor final C-terminal Coggill P pcc Willis S Domain This is the very C-terminal region of a subfamily of nuclear receptors that includes oestrogen receptors and other subfamily 3 group A members. The actual function of this region is not known, but the domain is absent from all the other types of nuclear receptors. Oestrogen receptors modulate AP-1-dependent transcription [1] through two distinct mechanisms: via protein-protein interactions on DNA; and via non-genomic actions. The mechanism used depends on the cellular localisation of the receptor. In addition to the more extensively studied cross-talk on DNA, additional non-genomic actions might be very important in target tissues in which membrane-associated ERs are found. These non-genomic actions probably contribute to the overall physiological responses mediated by ligand-bound ERs [2] and might possibly be mediated via this C-terminal domain. 22.00 22.00 22.60 23.70 19.20 21.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.09 0.72 -4.42 17 117 2010-02-02 09:45:35 2010-02-02 09:45:35 2 7 69 0 25 113 0 42.60 53 8.76 CHANGED Pss+sus.hEEpspSQL.TouSTSuHSLQsYYls.pEtEsh.sTl ...PsuRuuushEEssQSpLuTsuSTSSHSLQsYYIs.tEsEuhPsTl..... 0 1 1 7 +12579 PF12744 ATG19_autophagy Autophagy protein Atg19, Atg8-binding Wood V, Coggill P pcc Wood V Domain Autophagy is generally known as a process involved in the degradation of bulk cytoplasmic components that are non-specifically sequestered into an autophagosome, where they are sequestered into double-membrane vesicles and delivered to the degradative organelle, the lysosome/vacuole, for breakdown and eventual recycling of the resulting macromolecules. In contrast to autophagy, however, the Cvt pathway is a highly selective process that involves the sequestration of at least two specific cargos that are resident vacuolar hydrolases, aminopeptidase I (Ape1) and alpha-mannosidase (Ams1). These proteins are sequestered within a double-membrane vesicle, termed a Cvt vesicle. The Cvt vesicle is fairly consistent in size, and is much smaller than the autophagosome, being 140-160 nm in diameter. The prApe1 is sequestered within either Cvt vesicles or autophagosomes, depending on the nutrient conditions, and delivered to the vacuole. Autophagy and the Cvt pathway are topologically and mechanistically similar and share most of the same machinery. The Ape1 complex is ultimately enwrapped within either Cvt vesicles or autophagosomes at the perivacuolar PAS. The receptor protein Atg19 binds to the Ape1 complex through the prApe1 propeptide to form the Cvt complex in the cytosol. In the absence of Atg19, prApe1 can form an Ape1 complex, but does not localise at the PAS. Atg19 is a peripheral membrane protein with differing binding sites for both Ape1 and Ams1. The Atg8-binding region in the yeast proteins is this very C-terminal residues [3]. 25.00 25.00 34.90 32.90 21.30 21.20 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.57 0.70 -5.11 10 35 2010-02-02 09:48:03 2010-02-02 09:48:03 2 2 24 6 16 35 0 228.10 40 50.46 CHANGED tsIplPED+PELlsFFo.....clcTscQLp-VaptY+sYE+LhQchDupch+h..h....T.huoscuhKphsIphE........ssPN-cLLplphup+DNSLaFpLaNpTNplluGNCpLcF.....psooQIp..h.IcMGPHEIGIKpaKEhha....FPpsho.hussTh-lVNQDGElIhlGKhusSs.IsL+sP.uphSstShQsu.....Q-Ps.sFcsDoLsp.D-SSIlSTshshphD.....Gss.p+shTWEEl ......................................................s.sIplPED+sELlsFFo.....clcTspQLp-VaptY+sYE+...L.QchDuc.............T..huuscuhKptpIp.E.........sPN-tsLplshsp+DNSLaFpLaNpTNpllsGNCpLcFp......spsooQI......IcMGPHEIGIKphKEhpa..............FP.thsh.us.Tlcl.N...Q.G-VIalGKh..u...s..S.s..IsL.+....sP.uphS....sp.ShQsu...........Q-..Ph.sFphD.sLsp...D-S.SIlSTohohphD.......Gss.p+sh.TWEEl............ 0 2 7 12 +12580 PF12745 HGTP_anticodon2 Anticodon binding domain of tRNAs Wood V, Coggill P pcc Pfam-B_20896 (release 24.0) Domain This is an HGTP_anticodon binding domain, found largely on Gcn2 proteins which bind tRNA to down regulate translation in certain stress situations. 21.00 21.00 21.10 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.66 0.70 -5.25 30 206 2012-10-02 17:25:11 2010-02-02 10:11:00 2 13 179 0 150 351 11 226.00 26 14.78 CHANGED W+spRCDVLVuSFssslLcosGlcllppLWupsISADl.hcss.ShE-llsphppDGhsWIVllKQ.................................t.csp.+sLKVKslsppc..DsDlc.h-EllsaLcsEl.....pt.+p..pcptssspsp.hppsspptsshss..........................pppslhl.sstsRuK.KsN++spW.Ehc.tAptuoppllcshh.....suPIhAlDs.RDElL-hIphToLu.pt-pW.R+Vhts.ssss.RpYsspIastLt..+pts+up+...........hAhlYshRTGcsslhDLp ......................................p+CDVLV.sShss...ss.L.o.pulpllppLWstsIoA-lhh-..s...s.......S....E-lhptsp..ccs.hsallll+p....................................sut....lK.VKslppcc.....-h-..l..p...ts..-Llsalppcl...........cp.+......pctt..t...t..t.....t..s...s..s...........................t.....l.l.sstp.h..ut.p.p.+R..h....p....h..t.tplhpp.......p.h...lhsl-h.p-plh.phl..p.....p....hs.p.pta....pplht.......hs.h.+.p.ahttlhp.l.......p.tt...c....t.............hhlas.h+ss............................................................................................................................................... 1 41 80 119 +12581 PF12746 GNAT_acetyltran GNAT acetyltransferase Coggill P pcc JCSG_pdb_2jlm Family Many of the members are annotated s being Zwittermicin A resistance proteins, whereas others are listed as being GNAT acetyltransferases. The family has similarities to the GNAT acetyltransferase family. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.64 0.70 -5.57 2 486 2012-10-02 22:59:21 2010-02-02 13:52:51 2 2 325 2 48 433 12 231.20 31 95.29 CHANGED Mh.ph.....cphsphFtDaspsllhohLpGhMGshaVsD.p.PpsA.hhhG...hFsFhAGps+....c-LL+.h.s+.hllV.ps.pWpchlEshYtctIcpFhRYthK+ssE.FDhu+LQpLlssLPcua-h+RIDcNlhpsshlcchS+Dhsupa.sVEpalshGlGYsILaKGpVVsGASSYu.YssGIEIEVsTccsYRthGLAphsuAtLILsCL-+GlYPsWDAtshTShKLAEKLGY.hDKsYpsY...........p.p .........................................................................................................................................................................................hlh....hl...p.u...h.Gp..lah....ss.......p..s..ssh..h.....h.G.......aha..h..uG....p.....s...........p...............p....c.h...h.......p....t.........h.......t.....p.......t.....h....h.............l.............l.....s....ps.......p...p.W..pphl......c.p..h.....h.p.p......t.l...pp...............hs..Rht...a...p.p..psp........F.p...p.t...hpp...h....s.p.l..p.sYp..........lp.ID.p.ch.hpp.........t.....p..c.....a....o...p....-...h........hs.p.a..p.S.h.c.s.F.....l.p..........t.G.hG....as...I.l..h..s..s..p..l.l...u.s.ssS.hh.s..a.p.st..h..EI-.ls..T.c.sa.pscGLA.ptluuth...I....h....-CL....p.....p...s...lh...P...t...W...D..s..c..N..h..s..St+lAcpLGaphstsYpsa..............h.................... 2 21 36 42 +12582 PF12747 DdrB DdrB-like protein Bateman A agb Bateman A Family This family includes the Deinococcus DdrB protein which is a ssDNA binding protein. This family also includes some possibly distantly related cyanobacterial proteins. However, these are not strongly supported. The structure of DdrB is known. 25.00 25.00 33.70 33.50 21.40 20.00 hmmbuild --amino -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.64 0.71 -4.41 6 17 2010-02-03 15:48:23 2010-02-03 15:48:23 2 1 11 5 15 19 0 133.50 37 80.75 CHANGED M............pl.ahss.hsKsThsl..tshLhDVpcthsR.s..................hosuE..sssYQaPL-p.hsFDWshIGAR.chossEGtplV.aRG+uacRRc.pssD....KhsAAIhFSpusKs.........sDG-hc.YlpLhhFRstp .....................................pl...ss.hscsohpl...s.Lh-Vpphhup.s..................hoss-.sssGaphPLspttsFDWshIGAR.hhossEGtphV.a+G+sa+RRcLpssD....cLPAAlKaSRGAKs.-.....chsDGchc..YVoLhhFRst........ 0 1 9 15 +12584 PF12749 Metallothio_Euk Eukaryotic metallothionein Wood V, Coggill P pcc Wood V Family This is a family of eukaryotic metallothioneins. 21.80 21.80 22.50 21.80 21.20 21.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.49 0.72 -11.35 0.72 -3.94 4 26 2012-10-05 18:33:37 2010-02-03 17:50:25 2 2 15 0 5 61 0 67.00 50 92.47 CHANGED Mtt.sp...TpsChCsss.....CpCG..csCsCp.....stCGCssCKs......................uC+CSusssssCKCT...SCpCpp....p ......Mss.spsh.TssChCs.ssps..t.hC+CG..cACpCt.....ssCuCs.sCKs....uC+Css.s.ps.ssC..cCoustsC.Ctp.t....... 0 4 4 4 +12585 PF12750 Maff2 Maff2 family Bateman A agb Jackhmmer:Q187G1_CLOD6 Family This family of short membrane proteins are related to the protein Maff2. Maff2 lies just outside the direct repeats of a tetracycline resistance transposable element. This protein may contain transmembrane helices. 27.00 27.00 29.30 29.30 26.70 26.20 hmmbuild --amino -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.41 0.72 -4.45 7 359 2010-02-04 13:16:13 2010-02-04 13:16:13 2 1 171 0 29 229 33 58.10 69 94.26 CHANGED MtFFsSAlssLpTLVlALGAGLuVWGVINLLEGYGsDNPGAKSQGlKQLMAGGGlhLIGhTLlPLLSuLF ......MtFFspAVsVLpTLVhAlGAGLGlWGVINLhEGYGNDNPGAKSQGhKQLMAGuGlhllGh.LlP.Lushh..... 0 23 29 29 +12586 PF12751 Vac7 Vacuolar segregation subunit 7 Wood V, Coggill P pcc Pfam-B_10847 (release 24.0) Family Vac7 is localised at the vacuole membrane, a location which is consistent with its involvement in vacuole morphology and inheritance [1]. Vac7 has been shown to function as an upstream regulator of the Fab1 lipid kinase pathway [2]. The Fab1 lipid p[pathway is important for correct regulation of membrane trafficking events. 25.00 25.00 32.90 26.00 22.70 22.40 hmmbuild --amino -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.54 0.70 -5.61 7 172 2010-02-04 17:05:01 2010-02-04 17:05:01 2 3 110 0 128 174 0 228.40 26 39.02 CHANGED IVETETVSSIPQVuLGsGsGERGsuuRsD.uGolRhKsSsETIRP+KEKKKo.RKPsA.LssGssSSKADIFEAKVASAVDEADsSDS-ETFVYESNPPDsaPsRp.RYHSRTPSATSMASQsDQhuGRoRhuhRDs.HulTGKRSMKFTNs.sY..uol-GDh.spcsu+upuRss.Gp.sHTsRHHHIGR.HGR.us...aPSLFDs-SPFspSQ.p.+SsRHalusuhRQupp....R.sssNYRoh.susKKsG-.YsYDFD.uEGADDERTPLVGSs.RssRSRpG.RRPNSASLRQMEYMppRpRShFuRYGhChllhlLlllllGGAToFllulh+PLlDVpVhtIQNVLASEQEIMlDLpVpAlNPNLhsloIsDMDVNIFAKSRYVGoDph.WR ......................................................................................................................................................................................................................................................................oKsDhF.tA+lAsA..Vs-sp.SDStE.....T....FlY-sss..p..............................................................................................................................................................................................................................................................................................................................................................................................................................................................h........hh..h.h.hshhhu..p.L.thtl..hpp.lsSpt.lhhsh.htAhNsshhslsltthphplFA+S.a.............................................................................................................................................................. 0 24 64 111 +12587 PF12752 SUZ SUZ domain Aravind L agb Aravind L Domain The SUZ domain is a conserved RNA-binding domain found in eukaryotes and enriched in positively charged amino acids. It was first characterized in the C.elegans protein Szy-20 where it has been shown to bind RNA and allow their localization to the centrosome. Warning- the domain has a compositionally biased character. 27.00 27.00 27.30 27.30 26.80 26.80 hmmbuild --amino -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -3.44 57 527 2010-02-10 14:08:56 2010-02-10 14:08:56 2 8 208 0 305 471 0 58.00 34 9.30 CHANGED ptsphpIh+R....................Pstsssps...............tssspsttsptscolEEREtcYpcARpRIFusss .................................phhIL+R.................................sttsssss.........................tp..pss.pss..ptsKSlEEREpEYpcARcRIFupt.s............. 0 78 133 211 +12588 PF12753 Nro1 Nuclear pore complex subunit Nro1 Wood V, Coggill P pcc Pfam-B_4826 (release 24.0) Family In fission yeast, this protein is a positive regulator of the stability of Sre1N, the sterol regulatory element-binding protein which is an ER membrane-bound transcription factor that controls adaptation to low oxygen-growth [1]. In addition, the fission yeast Nro1 is a direct inhibitor of a protein that inhibits SreN1 degradation, Ofd1 (an oxoglutamate deoxygenase). The outcome of this reactivity is that Ofd1 acts as an oxygen sensor that regulates the binding of Nro1 to Ofd1 to control the stability of Sre1N [2]. Solution of the structure of Nro1 reveals it to be made up of a number of TPR coils [3]. TPR proteins are composed of three to 16 tandem peptide repeat motifs of 34 amino acids with degenerate sequence. The helical pairs adopt a helix-turn-helix anti-parallel arrangement with interacting helices. In general, TPR motifs are stacked together so that helix A from TPRn is packed between helix B from TPRn and helix A from TPRn+1. In Nro1, the 12 alpha helices forming the six TPR motifs are organised as follows from N terminus to C terminus - TPR1A, TPR1B, TPR2A, TPR2B, TPR3A, TPR3B, TPR4A, TPR4B, TPR5A, TPR5B, TPR6A, and TPR6B with the C-terminal helix (hC) running above the sixth TPR motif with an angle of approx 45 degrees with TPR6A and TPR6B. The corresponding TPRs structural motifs are longer (50 residues) than are canonical ones (34 amino acids) and are organised into two subdomains - Nro1-N (residues 55-225) and Nro1-C (residues 226-393). The Nro1/Etti protein plays a role in nuclear import suggesting that it is residues 4-19 that are interacting with Ofd1 [3]. 20.80 20.80 21.00 20.80 20.50 20.20 hmmbuild --amino -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.31 0.70 -5.52 18 53 2012-10-11 20:01:03 2010-02-10 15:57:53 2 2 46 5 37 55 0 357.50 36 97.70 CHANGED M.A..KRsLGLGKts+tKKpKh.............psspsscpsss....ssplpVELsEthDs-DEluQL+uLWcsYhcS-+DsEl.....llNGIlHECDRLLRpp...............pp-cth........pLsc.FHuIYALALuELshF+st-pp.............plppaF.DsALERs-lG.hpphscS.hLhlspuKIllp+IPLpYISpLss-Spsp....pl.p.L-pAhpsaphsppphp............sh-lLphlDDLLDIl-NFG+c.pp.pEs.DsD-......t-t.-..plcLscpHPLYtlpps.t.N.pWhR-phlthLsslpc...........................spLhRpls++LGQsYLpcAE.PosVFsoLpYD-..t.ps.cp.hph..tcpAQchAppLhccAlcYLccAcs........---P-TWVslAEAhIsLGNLa-s-ScEQEchYpcAEcILpKANpATpGKYcDlL-NLL ........................................................MA..KRsLGLGKts+tKKpKhpp...........ppppsstpsss......pspholEL..s--sDh-DEluQL.......cGLappYhpS-+DsEh.................lLNGIlHECDRLLR.p...............-ppp...........pLss.hFauIYAlALuELshF+st-pc.............plppaF.-sAlERl-hG.Lpphscs..LhlshuKIlhp+IsLpa.ISpLplcScsp.....phcl..p.L-puhctaphh.cpcsp..........................sh-lLp...hlsDLLDIl-NFG+c.pp.p-s..Dp-s......................---.c...plcLp.pHP..la.lpps.phs.pWhRpph.phlcshpp...............................plhtplspplGp.YLpcAE.PsplahsLpY-c..t.tt.pp......pshptppuQchA.chhppAlcYLcpAp.........c--P-TWVplAEAhIsLGNLh-.-StEQEchYppAEcILt+ANpuopGKap-lL-Nl.............................. 0 12 24 36 +12589 PF12754 Blt1 Cell-cycle control medial ring component Wood V, Coggill P pcc Wood V Family During size-dependent cell cycle transitions controlled by the ubiquitous cyclin-dependent kinase Cdk1, Blt1 has been shown to co-localise with Cdr2 in the medial interphase nodes, as well as with Mid1 which was previously shown to localise to similar interphase structures. Physical interactions between Blt1-Mid1, Blt1-Cdr2 and Cdr2-Mid1 were detected, indicating that medial cortical nodes are formed by the ordered, Cdr2-dependent assembly of multiple interacting proteins during interphase. Q5KIH8.1/10-346; Q5KIH8.1/10-90; 27.00 27.00 37.10 29.50 21.10 26.60 hmmbuild --amino -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.04 0.70 -4.83 29 75 2012-10-03 10:59:06 2010-02-10 16:13:33 2 2 70 2 62 73 0 210.10 38 91.22 CHANGED Fs.....................................KSFLuuLDS..........................RPlKLs....................................................uDaVhDPcshshpsP.................YTLPRLps.PHP......................................MPKKh...............................KpstsPGSSKSIolp....................LKSARNPsLcloLsNs.sluo.............................TSVp-LK-sVppRl.ssp...........ssplPlDKIK.....ILaK+KPV...sucTlu-lLus..-sthluGGcElEh.GVMIhGGA.pls.s...........................................tttttttp.pp.hss.....ss.u......so...................uppVltTEtFW-D...............................................................................................................................................LpuFlpp+lK.DttpApplpsl..F+tAWpus .....................Fs.KoFLusLDS..........................+PlKLs....................................................uDaV.DPcsasspsP.................ahLPRhss..+........................................................MsKth...............................ppshsPGo..s...+S...IoVp....................LKSs.RNP.sL.clsLssh.slso.............................TSl.-lKpsVpp.................psplPlDKlK.....lLap+KPV...suKolt-lL........us.......su.spplEF.uVMlhGGAssh..ss....................................................t...t.........................................s.tllts-tFWtD...............................................................................................................................................Lpsah..+l+.s...utph..h..F+tuW.t.t................................................... 0 10 28 48 +12590 PF12755 Vac14_Fab1_bd Vacuolar 14 Fab1-binding region Wood V, Coggill P pcc manual Domain Vac14 is a scaffold for the Fab1 kinase complex, a complex that allows for the dynamic interconversion of PI3P and PI(3,5)P2p (phosphoinositide phosphate (PIP) lipids, that are generated transiently on the cytoplasmic face of selected intracellular membranes). This interconversion is regulated by at least five proteins in yeast: the lipid kinase Fab1p, lipid phosphatase Fig4p, the Fab1p activator Vac7p, the Fab1p inhibitor Atg18p, and Vac14p, a protein required for the activity of both Fab1p and Fig4p. This domain appears to be the one responsible for binding to Fab1. The full length Vac14 in yeasts is likely to be a protein carrying a succession of HEAT repeats, most of which have now degenerated. This regulatory system is crucial for the proper functioning of the mammalian nervous system. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.18 0.72 -3.60 35 386 2012-10-11 20:01:03 2010-02-10 16:22:32 2 28 266 0 281 523 5 95.30 42 10.54 CHANGED NtRpGGLIGLAAsuIALGpc......sspalcpIl.PVLsCFsDpDsRVRYYACE......SLYNIuKVu+uplLh..aFN-lFDsLs+.....LsADsD.sV+sGAcLLDRLlKD .....................tRpGGLlGLAAsuluLupc..........sstYLccll.PVL.sCFsDpDsRV.RYYACE......uLYNI.s.KV.u...+.G..c....l.Ls....aFNclF.DuLsK.....................Ls.uDs-.s..V+suA.EL.LDRLlKD................................... 0 99 154 226 +12591 PF12756 zf-C2H2_2 C2H2 type zinc-finger (2 copies) Wood V, Coggill P pcc Pfam-B_88 (release 24.0) Family This family contains two copies of a C2H2-like zinc finger domain. 21.70 10.00 21.70 10.00 21.60 9.90 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.23 0.72 -11.09 0.72 -3.90 112 2196 2012-10-03 11:22:52 2010-02-10 17:43:06 2 245 346 2 1419 5276 41 77.00 18 19.96 CHANGED p..CLFCspp.....ss.hc.......pslpHM......hppHuhalP...-pcaLs..DhpGLlpYLtcKlsh......tphClhCppp.tp...ohpul+pHM..psK.sHs+lsh..csptp....h-hscFYDa.psshs ...........................................................................t...................................................................................................................................................................h.t.........................................hpChh......Cspp...ap....................ohp..s..lptHM....p.p..p...t...Hh.t.........................................t..................................................................... 2 388 637 1011 +12592 PF12757 DUF3812 Protein of unknown function (DUF3812) Wood V, Coggill P pcc Pfam-B_8029 (release 24.0) Family This is a family of fungal proteins whose function is not known. 25.00 25.00 31.90 31.90 20.00 19.60 hmmbuild --amino -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.35 0.71 -4.15 39 146 2010-02-10 17:51:15 2010-02-10 17:51:15 2 1 115 0 106 150 0 123.70 26 14.39 CHANGED spssLhpsApcNscsplpsh-pcshtp..............shphppchpcpAlttAppp.....tptppppssplslG......GGhals.s-l-slApphlpPsLc-Isc+A.......-tpRsp-h-t+hcpcctccptppt+pcEcct+p ..................................pttlhttApcssptpLpsl-pcshtp....................th.p-apppAhthAppp......................tptpppspsplslG......GGhahs.p-lsslApp+lpPlLs-Is-+A.......cppRtc.......-tph+hcpcctccptppt+pc-cphc................... 1 17 53 90 +12593 PF12758 DUF3813 Protein of unknown function (DUF3813) Mistry J jm14 PfamB_1273 (release 24.0) Family This is an uncharacterised family of Bacillus proteins. 25.00 25.00 45.30 26.00 24.10 24.70 hmmbuild --amino -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -9.17 0.72 -3.83 4 128 2010-02-11 10:51:27 2010-02-11 10:51:27 2 1 128 0 17 59 0 62.90 68 92.96 CHANGED MtNpLaQpA+shVpphlSpuuus...EQQpsl.+AKNAlpSAYANSosAE+pQL+phQ-QLppls ...MGNLLFQQARDAVtsAVSCS.S.Gs.......EQQ-L....VYRAKNALpSAYANSSTAEKVQLREMQEQLQsIp... 0 1 9 11 +12594 PF12759 HTH_Tnp_IS1 InsA_C; InsA C-terminal domain Bateman A agb Bateman A Domain This short domain is found at the C-terminus of the InsA protein. This domain contains a helix-turn-helix domain. 21.70 21.70 21.70 22.00 21.60 21.60 hmmbuild --amino -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.20 0.72 -4.53 4 1328 2012-10-04 14:01:12 2010-02-11 16:39:48 2 7 456 0 75 676 9 42.40 62 42.85 CHANGED YoYcAppPGhKEpIl-MAhNGAGsRhTARsL+IGINTVlRTLKNSR ..........aTYsAsQP...Gs+p.KIIDMAM.NGlG........sRsTARl.ht.VGlNTll.RpLKNSt............ 1 16 31 61 +12595 PF12760 Zn_Tnp_IS1595 Zn_ribbon_3; Transposase zinc-ribbon domain Bateman A agb Pfam-B_3 (Release 24.0) Domain This zinc binding domain is found in a range of transposase proteins such as ISSPO8, ISSOD11, ISRSSP2 etc. It is likely a zinc-binding beta ribbon domain that could bind the DNA. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild --amino -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.85 0.72 -4.15 73 764 2012-10-03 10:42:43 2010-02-11 17:07:24 2 7 279 0 235 786 170 47.70 34 15.88 CHANGED ssEcpstphLpphRWssG.....hs.CP+CGsp...chaplps.........tthapCpp..Cp+p ...............s-ppChphLpphRWPpG.......hs.C.P+Cusp...tthphpp...............pph..a.pCps......Ct+p.............. 0 51 128 153 +12596 PF12761 End3 E3; Actin cytoskeleton-regulatory complex protein END3 Wood V, Coggill P pcc Pfam-B_51079 (release 24.0) Family Endocytosis is accomplished through the sequential recruitment at endocytic sites of proteins that drive cargo sorting, membrane invagination and vesicle release [1]. End3p is part of the coat module protein complex Pan1, along with Pan1p, Sla1p, and Sla2p [2]. The proteins in this complex are regulated by phosphorylation events. End3p also regulates the cortical actin cytoskeleton [3,4]. The subunits of the Pan1 complex are homologous to mammalian intersectin. 25.00 25.00 25.80 25.00 23.80 23.30 hmmbuild --amino -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.44 0.71 -4.66 21 129 2010-02-11 19:37:22 2010-02-11 19:37:22 2 5 114 0 90 124 0 172.20 38 48.58 CHANGED sKDtsLsFLHILNQRcc.GhRIPRsVPASLRAoFpKpp.sYsLs......pspsph.tssssssTss.pKtpFu-sYLs+l...Ghusp..........shp...ppGTDFSu.spsoDWEEVRL+RELA-LEshLscspptscsp.........spsspsp.uLlKcEhEQLLcYKccpLpphp...spussutsLpsl+cDlchlcpQVssLcpaLps+pppLpcLcp ............................sKDssLsFLHILNpRc-.GhRIPRslPASLRuoFppsplsYplcs.........tstpph.tsptsspTsou..pKtpFu-tYLs+lGhus.............pstGTDFSs...psp-WEcVRL++pLt-L-pclptspttsppp..........ttpstsp.sLlKc...Eh.QLLcYKccpLpchpp..ucsptutsLcplp-DlpslppQV-sLcsaL...tp+pp.LppLp.t.................................. 0 16 44 76 +12597 PF12762 DDE_Tnp_IS1595 Transposase_38; ISXO2-like transposase domain Bateman A agb Pfam-B_3 (Release 24.0) Domain This domain probably functions as an integrase that is found in a wide variety of transposases, including ISXO2. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.74 0.71 -4.38 78 1888 2012-10-03 01:22:09 2010-02-12 10:55:34 2 15 595 0 571 1721 293 125.10 25 51.91 CHANGED hLsG..........VE.........lD-sYl..GGcp....p......sp......cG+.......s........t..ssKssVlshl-ps...................t.sh+shstll.ts.hstpsltshl....pctlptsuplhTDphssYsslstt..asHpplsastp......hs.......tt.....sshshlcshhuplKRtlhGsaHtlu.scaLppYlsEhsaRaN .........................................................................tG......lp........hD-s.hh.....utpp............................tp.......................tG+.......s..............................................ttKh..lhs..hlcps.......................................pshh.hl......ss..hpptsl...h..l...............tpp.lp.s....s..uh......l.h..o.D..s..htu..Y.p.....t.L........p............t...t........a..t.......H.......h........t...l.spu...tp.............s.......................pt........phphlps.hhs.p.hKphl..t....t.aps.ht.tchh........aLtch.a+................................. 0 221 343 458 +12598 PF12763 EF-hand_4 efhand_3; Cytoskeletal-regulatory complex EF hand Wood V, Coggill P pcc Pfam-B_51079 (release 24.0) Family This is an efhand family from the N-terminal of actin cytoskeleton-regulatory complex END3 and similar proteins from fungi and closely related species. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.43 0.72 -4.29 13 2526 2012-10-02 16:17:27 2010-02-15 09:18:54 2 126 302 12 1472 3070 25 94.90 32 17.53 CHANGED MP...+LE-aEIKKYW-IFpGLpPtsNKLoGDpVuPVLKNS+LssDQLu+IW-LuDIDsDGpLDFEEFCIsMRLIFDlVNGshssVPscLPsWLVPuSKAHLIQANc ...................................hht.............p.ap.p.lF..p...s.h....s..s..h..s.....G..h..l.oG.s.p.A....+....s.h...h..h....p.....S....p....L..P..p..s.h...L...u..c.........IW....s.............L....u..Dhcp-G..tLstpEFs..l...AM.+.Ll...........h..h..p..t................................lPt.L.Ps..t.hh..............ttt............................... 0 408 686 1107 +12599 PF12764 Gly-rich_Ago1 Glycine-rich region of argonaut Coggill P pcc Pfam-B_7248 (release 24.0) Domain This domain is often found at the very N-terminal of argonaut-like proteins. 25.00 25.00 34.20 33.60 24.10 21.10 hmmbuild --amino -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.60 0.72 -3.39 12 50 2010-02-15 11:44:43 2010-02-15 11:44:43 2 2 30 0 16 50 0 101.70 50 14.75 CHANGED GGs.EYpsp...GRGsPP.Q..GGpsth.GGGpuGu.......Pss.....s.R.ssPELHQAT...pssYQA.hss.PhP........SEsusSstP.spsssh..tQQFQQLolpQtu.oSQAIQs.Pu ................GGP.EYQuR.....GRGG..Ps.Q..........GGtsta..GGGRGGu.............PSu...ssP.Rp..oVP.ELHQAT...pss.YQA.VsopPos........SEsusophP..scssss..tQQFpQLulcpt.u.sSQAI.QPhPs...... 0 1 8 13 +12600 PF12765 Cohesin_HEAT HEAT repeat associated with sister chromatid cohesion Wodd V, Coggill P pcc Pfam-B_443 (release 24.0) Family This HEAT repeat is found most frequently in sister chromatid cohesion proteins such as Nipped-B. HEAT repeats are found tandemly repeated in many proteins, and they appear to serve as flexible scaffolding on which other components can assemble. 21.00 8.90 21.00 8.90 20.90 8.80 hmmbuild --amino -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.61 0.72 -3.95 52 424 2012-10-11 20:01:03 2010-02-15 11:48:56 2 36 262 0 276 589 36 37.80 32 2.43 CHANGED +slstllptDsslLsps....plppslpp+htDsussVR-Asl-Ll ....................+sl..hlthDsplLst................ph.phlpp+h.tDsu.ssVR-AAlpLl.... 1 90 146 220 +12601 PF12766 Pyridox_oxase_2 Pyridoxamine 5'-phosphate oxidase Wood V, Coggill P pcc Pfam-B_2486 (release 24.0) Family Pyridoxamine 5'-phosphate oxidase catalyses the oxidation of pyridoxamine-5-P (PMP) and pyridoxine-5-P (PNP) to pyridoxal-5-P (PLP), the terminal step in the de novo biosynthesis of PLP in Escherichia coli and part of the salvage pathway of this coenzyme in both E. coli and mammalian cells. This region is the flavoprotein FMN-binding domain. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.53 0.72 -3.69 60 319 2012-10-02 11:35:36 2010-02-15 12:53:17 2 6 304 4 186 1478 2504 100.10 30 42.69 CHANGED huPW+shlppulcp......p..ssphhQLATls.s.ssp.....P+sRTlVFRGFhtp..............................ssshLphsTDtRocKlpplt...............p..ssts..EhsaaascotcQaRlpGpshlls ..................................h...Wt..htpulpp......t..s.phhpLATls....sup..................P.csRTlVaRuahtp...................................................................................................................sshLphpTDtRSpKltclt.......................p.sshsEhs.a..a...h..........s...........c...........stp...QaRlpGpstll.s........................................ 0 55 111 157 +12602 PF12767 SAGA-Tad1 Transcriptional regulator of RNA polII, SAGA, subunit Wood V, Coggill P pcc Pfam-B_319 (release 24.0) Family The yeast SAGA complex is a multifunctional coactivator that regulates transcription by RNA polymerase II [1,2]. It is formed of five major modular subunits and shows a high degree of structural conservation to human TFTC and STAGA [3]. The complex can also be conceived of as consisting of two histone-fold-containing core subunits, and this family is one of these. As a family it is likely to carry binding regions for interactions with a number of the other components of the complex. 27.80 27.80 28.10 27.80 27.70 27.70 hmmbuild --amino -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.61 0.70 -4.71 65 356 2010-02-15 13:09:22 2010-02-15 13:09:22 2 11 232 0 254 344 0 212.60 22 55.99 CHANGED hs.....tstRl-ltpl+pplhptlG.cphpcYhptLstFlh..............s+lo+pEh.sp.sptll.................spcp.....l+LHNpLlhuILsNu....htps....ss...ss...............................tptp.hstsssh..ssptspp.pphccphht.s.csptchpth...............................................t.t.t.....ssh.....tth.chs.hpstpphpph......................ppp.............................h.tt...shsscohpLPDspsLptRh.hhshcpGL........sulstssspllshuL-saLKsllpuslshsp ..........................................................st.......t.-lt.hhppltptlG....cphppYhttlphalh.............................t+ls+pEasp.stthL.......................................stcp...........l+LHNphlhuIlsss....tp........Ps...ts.............................................ttt.s....s....s..h..tst.ts.p....hptphh........ptph.ht..............................................................h.............t....ph.....hptspp..t.....................................................................................................................................................................t....h.s...cs.htLP-..ttplptRh..hs..hcpGL..........ssss.pssphlshul-salKpllpsshshht................................................. 0 71 137 208 +12603 PF12768 Rax2 Cortical protein marker for cell polarity Wood V, Coggill P pcc Pfam-B_2071 (release 24.0) Family Diploid yeast cells repeatedly polarize and bud from their poles, due probably to the presence of highly stable membrane markers, and Rax2 is one such marker. It is inherited immutably at the cell cortex for multiple generations, and has a half-life exceeding several generations. The persistent inheritance of cortical protein markers would provide a means of coupling a cell's history with the future development of a precise morphogenetic form [1]. Both Rax1 and Rax2 localise to the distal pole as well as to the division site and they interact both with each other and with Bud8p and Bud9p in the establishment and/or maintenance of the cortical markers for bipolar budding [2]. thus Rax2 is likely to control cell polarity during vegetative growth, and in fission yeast this is done by regulating the localisation of for3p [3]. 25.60 24.90 25.60 25.20 25.50 24.60 hmmbuild --amino -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.89 0.70 -5.19 34 157 2010-02-15 13:49:32 2010-02-15 13:49:32 2 7 131 0 124 169 15 251.50 25 23.26 CHANGED lhVGGsFppAGuLsCsulChashssspWspPuss......lpGsVsslpah..ss.spLlluG.sLTlss.sssslsoYshpspsapshtu.sp..t.ls.uslsuhshhtsDtsphhlsGp..upGssalhta...DGopWpphss.lhts..sTsIpulpllsL..s.ps+pp...sshhsssplLlloGplsl.sca..GpsSuALasGo..shhPalloo..................p...ssspsGslsplFhp..............................ssssaps.p....h....cphupG...hVVLIuhuhALGsshLlslhGlIhshhpp+ppthh.sspt.th-csp.hpplPP ..............................................................lhVGGsF..p.p.A...G..u.Ls.CsulCha.shssspWsp.Gss.......lpG..s.V..ssltas...ss..spLlluG.shs.....lss....s..sst......lApashpsp.s..Wsshsu.s...ttls.Gs.l...suhshstpsss...p..hhhuGp......s....s....u...ss..alhta...cGs.pWpsh...st..hhts...to.lpslphhsl..p...psctt...tshhspsp.hLhlsGpl.l.ssa..G...ssuslasGs..shhPahlos..................p...ssspsGt.hstlF.p....................................pp..hpt...............phhshG...hVVLluhslALGhh.....hllslhGllhshh.+c.ppt...h..t....hhpc...hptlsP................................................ 0 45 77 109 +12604 PF12769 DUF3814 Domain of unknown function (DUF3814) Mistry J jm14 Pfam-B_10 (release 24.0) Family This is a domain of unknown function. It is often found in combination with Pfam:PF05222, Pfam:PF01262 and Pfam:PF02233 on alanine dehydrogenase and pyridine nucleotide transhydrogenase enzymes. 25.00 25.00 30.30 30.30 22.90 22.90 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.07 0.72 -3.82 159 2249 2010-02-16 09:34:05 2010-02-16 09:34:05 2 12 2099 0 638 1524 1931 86.90 57 21.39 CHANGED lttlslFlLAshlGa.VlhpVsssLHTPLMSsTNAISGIllVGAllth..u.............................hsplLuhlAlhlAolNlsGGFhVTcRMLpMF++ ......h..phslFsLAshVGYaVVWsVo.ALHTPL....MSV.TNAISGIIlVGALLtlGpu...........................tshsphLuhlAVllAoINIF..GGFhVTpRMLcMF+K......... 0 187 373 522 +12605 PF12770 CHAT CHAT domain Bateman A, Rawlings ND agb Pfam-B_4 (Release 24.0) Domain These proteins appear to be related to peptidases in peptidase clan CD that includes the caspases. This domain has been termed the CHAT domain for Caspase HetF Associated with Tprs. This family has been identified as a sister group to the separins [1]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild --amino -o /dev/null --hand HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.14 0.70 -5.28 82 1893 2012-10-03 02:24:44 2010-02-16 10:52:24 2 520 555 0 959 2220 184 278.30 18 28.37 CHANGED ptsuppLaphLlt.P.lttt..l.........tshp...pLllssDu......sLptlPhtu..Lhssp.........................paLlEc...asls..hhsShp....Lpphp.tshpstps..........Lshussphs......t........................................................................................................................ssLPusttE.hpslsph.............hhts..............pshhsppu..Thpslppth...........pphpllHlATHu.Fp.........................................ss.tpShlhLt..................................................ssthLshp-ltp..l....sLst...........scLllLSACpTu..........hG.........ss-uhhG....lstshhhAGspusluSLW.VsD..puTttLMppFYppLtp.......shttucAL+pAQlthhpsphh..................................................pc.PaaW...........uuF..hhhGs .................................................................................................................................................................................................t......h.p.h.h.......s..h.t....l........................................thp..........plhh.h.sps............Lt.lPhth......l.stp.......................................................thlhp.p.......h...lt.....hhss.h..p..........hh.th..t.......tt.t..................................................hhh.u..ssp.................................................................................................................................................................................................................sLs.t.st..tE...h...ptltph.....................................ht.t.........................phh.ht.p.p....s.....ot..p.plhpth.....................pphp..ll..HhAsHGths...............................................................................................tt.s...p.u..tlhlt..............................................................................................tsthLs...h.p.-lhp....h......plpt.................spL..llLSA.Cpou...........................................................hst..........tsc..t..h...h.u..............Lspuhl.t.u...G..s.ps.Vlu...o..h..W..t..VsD...........pss.tt.hh.......pt..FYp.p.Ltp..................shshs.p.....A...L...p..pA.phthhpt...........................................................................................s..W.......ush.h.............................................................................................................................................. 1 381 710 898 +12606 PF12771 SusD-like_2 Starch-binding associating with outer membrane Coggill P pcc JCSG structure (Target 390167) Family SusD is a secreted starch-binding protein with an N-terminal lipid tail that allows it to associate with the outer membrane. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.59 0.70 -6.10 5 655 2012-10-11 20:01:03 2010-02-16 10:56:58 2 5 160 4 277 1675 449 427.60 19 89.34 CHANGED MKKpILlIVLu.....alClSCuNLEEMNINPspPTpTHPpLLLTslphssF+..pGToGlYAQKhllQsDGEsuDQYYKWsRGSFGuYsTLR........NVpKMsEEAERI..NsPs...................YhALuKFFRuYYFYcLTLsFGDIPYSQALKGETEp.YoPsYDuQEDVFcuILpEL+EAD-ILushsolIuG.......DIIYNGNsspWRKLINSFRLKVLlTLSN+S..GElshsSEFcuItTNSPLM-S.sDNGQLVaLDQQsNRYPpFNuuu.WSGhYMDuTFIQRM+ER+DPRLFIYSTQTsKuKs-GKsIsDFSuYEGGDPAAPYG-uh.KsucGDlS.lNDRa+pDPlsEPhhLhGYAEpQhILAEAAVRGW.IuGsAcsaYEcGV+uSFcFYEsaucDYutYLu.NAVApYLpEPLVD..................FopASSTEEKIERIIMQKYLsoFaQhsW-GFYDaLRTGYP-FRRPoGosIP..........+RWhYPQSEYssNosNVSoAIo+QFGuGNDcIspssWWtK ......................................................................................................................................................................h........h.h...h....s......s....s...p...p...h....t..-....l...N....p..s....P..s.....t......s...p.......p.......s.....s.........s.....t.......h.......l.......h..s.......t.......h...........h........p.......h.......h..t...................s.......................t.............h....h...........h.....t...............h.............h......t...........................s..........t.......................t......p...h.......h......t.................t......t.........s....t.........h......s......h...........h....t....t..h.t.................................s.h.....p...h.....h...p....t....s.....p..p..h........t.t..........................................................................h...h...u...l...u...pl.h..p.......uah...hthhsDhaGD.l.P.Y...o..........-..........A.....h.....p.........u........t........t.........................h..p.....P.....paD..s.Q...c.s......l.Y.....t....s.....l.........hp.......p.Lc.p..A........s....h.....l........s.....s.....s......s.....s..............h.....s..u.....................Dh.l.a.....s.....G......s.........h....s..........p......W....h+huso..L+l.Rh.h....h......+...l.s.....phs.......s.t....h.s....h..t..t.h.........h...t..t.....l....h.......s.....s.....t...s...h........h...p.....s......s.-........s.......h...................h...........h..............s...t........t.....s.....s....................s........h........h........t...............t.........t.......................h...............s....s..........h.......h...h............s..........p..h...h..h......s..............h...h...........p........t............h............p..............D.P...R.l..................h........h.........h.........t..............................................................................t..........................................t......................t.......h.................t.......................................................................s.........................................................................................................................................................................................s...h..................h........h..hshuEhtFlhAEu..h..h..+..u..h......h...s....s......s.......A...pph..Y...........p...p....ul.p.u....u...h....p........h..........s........h...................s....s....t..................................h.....h.......s......................................t...................................................................................................................t...t....t..l..p..p.Ih.h.Q.+alu.ha.....p..s..h.-sa.phRR.........o.....G............h......P..........t........h.......................s..................s...............t......hs...............................hRh...Y..P....t..p....p..h..N........th.....t................................................................................................................................................................................................................................................................ 0 119 251 277 +12607 PF12772 GHBP Growth hormone receptor binding Coggill P pcc Pfam-B_7 (release 24.0) Family Growth hormone receptor binding protein is produced either by proteolysis of the GHR (growth hormone receptor) at the cell surface thereby releasing its extracellular domain, the GHBP (growth hormone-binding protein), or, in rodents, by alternative processing of the GHR transcript. The sheddase proteolytic enzyme responsible for the cleavage is TACE (tumour necrosis factor-alpha-converting enzyme) [1,2]. Growth hormone (GH) binding to GH receptor (GHR) is the initial step that leads to the physiological functions of the hormone [3]. The biological effects of GHBP are determined by the serum levels of growth hormone (GH), which can vary. Low levels of GH can result in a dwarf phenotype and have been positively correlated with an increased life expectancy. High levels of GH can lead to gigantism or a clinical syndrome termed acromegaly and have been implicated in diabetic eye and kidney damage [4]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.00 18 1202 2010-02-16 14:01:20 2010-02-16 14:01:20 2 6 789 0 38 1084 0 236.70 57 83.11 CHANGED KGKL-ElNoILuup.....csYKPphYpDDsWVEFIElD.lD..DssEK..spGSDTpRLL......Spc.p.pcut.sh.GhKDDDSGRsSCY-PDl.-sDh................hL.ttp.sLLshctcstpp..ssspcus.....................sl.tst.cSsp.slps..Qhos..pohsNhDFYAQVSDlTPAGuVVLSPGQps+st.pspsst.............................cpE.hpp.hQh.h..s.ssuYhsEusA+phss.sP...pscst..shcPphsppc.a.ss....sssutsstoshh....uPsup..hPVsDYTsVp.VcS.puLlLNsss.....P......sscp.h ..............................................thap-DsWVEFIELDID....D..sDEK..TEGSDTDRLL......SsD.H.pKSl.sILGAKDDDSGRTSCY-PDI.L.-TDFpsuDhsDu..op.ht.ppL.KtEsDLLCLDpKN.ps....shsts..t...s.......s.csKPpsLl.utsESsp..Q...spT...hSNP...sSLA...N...IDFYAQVSDITPAGuVVLSPGQK.KAG....huQss.h.............................p.E..hss.sQtNa...hssAYFCEuDAKKCIshsP...+hEsps.phcPSFsQEDhYITTESL.TToA.hstss-h....sPsuE..hPVPDYT.olHhVQSPpGLlLNAsu.................sp.h................................. 0 1 5 14 +12608 PF12773 DZR Double zinc ribbon Bateman A agb Pfam-B_12 (Release 24.0) Family This family consists of a pair of zinc ribbon domains. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.35 0.72 -4.14 109 1392 2012-10-03 10:42:43 2010-02-16 17:13:36 2 114 823 0 495 1601 122 52.60 32 16.02 CHANGED CspCtpt........ssss..upaCtpCGstLt..............................tt....CspCst....s.ssssspFCspCG ............................................CspCGt.t.......................sss.....ucFCspCG.stls............................................................................tt...hCs..pCup...........hs.ssspFCspCG..................................................... 0 174 347 430 +12609 PF12774 AAA_6 Hydrolytic ATP binding site of dynein motor region D1 Coggill P pcc Pfam-B_14 (release 24.0) Family the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D1 unit of the motor and contains the hydrolytic ATP binding site [1]. 21.30 21.30 21.30 21.40 21.10 20.80 hmmbuild --amino -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.79 0.70 -4.82 3 2434 2012-10-05 12:31:09 2010-02-17 10:48:36 2 274 341 14 1635 2341 119 209.40 54 6.46 CHANGED YSYEYLGNTPRLVITPLTDRCYITLTQSLHLlMSGAPAGPAGTGKTETTKDLGRALGIMVYVFNCSEQMDYKSCGNIYKGLAQTGAWGCFDEFNRISVEVLSVVAVQVKCVQDAIRDKKctFNFLGEEISLIPSVGIFITMNPGYAGRTELPENLKALFRPCAMVVPDFELICEIMLVAEGFLEARLLARKFITLYTLCKELLSKQDHYDWGLRAIKSVLVVAGSLKRGDP .......................................................YuaEYL.G.s.s.s.R..LVITP..LT............D...R........C.Yl.....T..LspA....L...p...h...p.h................GG..u..P......tGPA.G.TG...........KTET.sKDLu+uLGh.s.............lVFN........C...S.-........thD......apu.................hG+hFp.G..L.uQ...s.....G..........AWuCFDEFN.RIplc.VLSV.......lA...p...Q..l.h............s......I........p..........p........A..........l............p........p.............p.............t....................p..................p.............a............................F........................G..............p..............p...............l.....p..L...s.....s...s...s.u...lF....I.T.M....N.P.................G....Y.................A..G.......R........o...E....L.......P-.......NL................K..s............LF.R..slAMhV......PDh.t.lIs.ElhLhSpG.F..h....p..u.c...LupK...hhsh.a.pLs.p..EpLS..p..............Qt....HYDaGhRA.lK.oVLhsAGshKR....................................................................... 0 696 866 1339 +12610 PF12775 AAA_7 P-loop containing dynein motor region D3 Coggill P pcc Pfam-B_14 (release 24.0) Family the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D3 and is an ATP binding site [1]. 20.90 20.90 20.90 21.50 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.83 0.70 -5.32 3 2098 2012-10-05 12:31:09 2010-02-17 10:54:55 2 245 278 14 1513 2001 100 255.70 31 7.07 CHANGED FELDPElPLQAsLVHToETIRVRYFMDLLMERuRPVMLVGNAGTGKSVLVGDKLuSLssDuYLVpNVPFNYYTTSAMLQuVLEKPLEKKAGRNYGPPGTKKLVYFIDDMNMPEVDsYGTVQPHTLIRQHMDY+HWYDRpKLTLKEIHNCQYVSCMNPTAGSFTINSRLQRHFCVFALSFPGQDALSTIYNoILTQHLAhtSVSsALQKlSPsLVuAALALHQKIAsTFLPTAIKFHYVFNLRDLSNIFQGLLFSosElLKoPlDLlRLWLHE .............................................................................................................t.l.l......V....PTh-TsR.h.p.a..llp.hh.........l.p.........p......p..c...Pl.lllGss.Go.GK.o......s.hl.........p....s...h.L.....p............p...l...........s...............p.....................p.......p......h.............h..................s....h.......t....l..s..........F...Su....t....T..o........u......t...........h..Q.......c.........h....l....-...s......h...l.......-...K......+................p...t............t...............s...........a..G..P......s....t...........u...........K.....+..h...l.l.Fl.DDlNMP.............t....h-p.YG...........sQ.........s.l.p.llRQhh-.h..s..s..a...Y......D...p.....p...c....h...s.......h..hp.l.h..........c......l...p....h.l.u.u..M...............s....P..s.......u.....G........R.....p...s................l.ss...........R...h....h..R+Fs.lhsls...hP..s..p..p..s.lp..pIa...u..s....l..........h..........p.......t....a.......h.......p.........................t..........a...............s..................p.....................l..p....p..h...s..p.....t...l...l....p.u.s.........l..p.l..ap..p..s...t...p..p..hL..P.Tss..K..HY....lFNL...RDlo+....l...hp.......................G.l..h.h....s............p.....p..h.........h....p...s................p.....ll.+LWhHE............................................................................................... 0 634 799 1238 +12611 PF12776 Myb_DNA-bind_3 Myb/SANT-like DNA-binding domain Bateman A agb Pfam-B_16 (Release 24.0) Domain This presumed domain appears to be related to other Myb/SANT like DNA binding domains. In particular Pfam:PF10545 seems most related. This family is greatly expanded in plants and appears in several proteins annotated as transposon proteins. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild --amino -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.32 0.72 -3.36 73 836 2012-10-04 14:01:12 2010-02-17 10:57:23 2 29 65 0 554 860 0 91.40 20 25.51 CHANGED pWs..sphp+hhl-lhh-phptGsp.......tssasppuWpp.lhppapp....psttpas+pQL+s+...hcpL+cpaphhpplhppss.............huWDsppptlsAs.cchWcph ................................Ws.st.pphhlchh.h.c.phptGpp.......ssshpppuapp.lhp....phpp.......phsh.p.h......s.p.pQlps+.......hcp.h+cpaph.hppLh..p..p.su.........huWDstp.....p.h.l.sAs..c..c.h.Wpp............................... 0 85 356 454 +12612 PF12777 MT Microtubule-binding stalk of dynein motor Coggill P pcc Pfam-B_14 (release 24.0) Domain the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This family is the region between D4 and D5 and is the two predicted alpha-helical coiled coil segments that form the stalk supporting the ATP-sensitive microtubule binding component [1]. 24.00 23.30 24.00 23.70 23.90 23.20 hmmbuild --amino -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.04 0.70 -5.70 3 2476 2010-02-17 11:31:39 2010-02-17 11:31:39 2 258 489 19 1602 2329 102 305.10 29 9.65 CHANGED ERLENGLhKLQSTAuQVDDLKAKLAuQEVELKQKNEDADKLIQVVGVETEKVSKEKAIAD-EEpKVAVINcEVocKQKDCEEDLAKAEPALLAAQEALNTLNKNNLTELKSFGSPPsAVlNVoAAVMVLhAPsGKIPKDRSWKAAKVsMuKVDuFLDSLINFDKENIHENCLKAIKPYLsDPEF-PEFI+oKShAAAGLCSWVINIVRFYEVYCDVEPKRQALpKANAELsAAQEKLAtIKAKIAELNANLAcLTApFEKATADKLKCQQEAEATuRTIoLANRLVGGLASENVRWAEAVpNFKpQE+TLCGDVLLITAFVSYlGaFTKKYRs-LhERhWlPYL .........................................................................................................................t..hpsG.LpKLtpsttp...Vt..thptpL..t.......p.....L.t....tp.t.....p.s.p.t.h...h.t...l.....t.......h...p...p..t..t...................s..p......p..p....c..t.........s..t.......t...p.....t............t......h...t............t.t....t...h...............t..t...h.......p.......p.......p.......s....p.......t.......-L.t.pA....Ps..L..p.t..A...tA............Lp...s...l..p..t.........t.sl..sp.l+uhtp.......PP..t.h.V...t.......hl..h..p.u..V..............h........l.l..............h...............t............................................................................................p..........................s............W.......t..s..u..+.....t....h.........h......s.....c....................p..............F...L....p.s.L...h...s......a....D...K......-....s...I..s...p...p....h..h...c..t.....l..p..t...........a......l..p...p..s..-....F.s....s....c............h...l...p..ps..S...t..Astu.Ls....pWVhA...h...t.Yt.c......V.h.+.....V.......t....P....K.+..p...p.......L....t...p...s...p.......tp....L.......pt....stp.p...L.p..ph.ptp.Lpp.l.pp...pltt.Lps...pa.c..p.t..hs...c..K....p..pl...pp...........p.............h.c........h.s........p............p..+......lpp..App...LlsuLuu..E+.........R.........W...................pp....s....s...p....ph.pt.ph..ppL.sGDsLl.......uuu..hluYhGsFstp.aRpphhpt......hhh................................................................ 0 659 838 1298 +12613 PF12778 PXPV PXPV repeat (3 copies) Bateman A agb Pfam-B_15 (Release 24.0) Repeat This short repeat is found in multiple copies in a variety of Burkholderia proteins. The function of this region is unknown. 20.20 15.50 20.20 15.60 20.10 15.40 hmmbuild --amino -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.40 0.72 -4.25 7 141 2010-02-17 11:33:22 2010-02-17 11:33:22 2 4 87 0 42 110 3 22.60 66 23.66 CHANGED Ghs...APVhs..tPAPVhl.....APtPslV ...........GsP....APVYV..tPAPVYVAP.tPslV. 0 3 14 32 +12614 PF12779 YXWGXW YXWGXW repeat (2 copies) Bateman A agb Pfam-B_15 (Release 24.0) Repeat This short repeat contains the motif YXWXXGXW where X can be any amino acid. It is generally found in 2-5 copies in short secreted bacterial proteins. Its function is as yet unknown. 18.00 5.00 18.10 5.00 17.90 4.90 hmmbuild --amino -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.38 0.72 -3.97 88 527 2010-02-17 11:41:07 2010-02-17 11:41:07 2 11 179 0 276 526 23 23.20 37 20.84 CHANGED shhYlapsGhWah.....u.sa.sGsWhh ...........sYlWssGaWth.........taha..hsGhW...................... 3 66 118 207 +12615 PF12780 AAA_8 P-loop containing dynein motor region D4 Coggill P pcc Pfam-B_14 (release 24.0) Domain The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D4 ATP-binding region of the motor [1]. 22.90 22.50 23.10 22.60 22.80 22.40 hmmbuild --amino -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.65 0.70 -5.42 3 2160 2012-10-05 12:31:09 2010-02-17 12:48:52 2 240 286 14 1532 2037 96 247.80 35 7.05 CHANGED YNEVNAVMNLVLFEDAMpHVCRINRILESPRGNALLVGVGGSGKQSLoRLAAYISSLDVFQITLRKGYGIPDLKlDLAolClKAGVKNlsTVFLMTDAQVADE+FLVLINDLLASGEIPDLFuDDEVENIISuVRNEVKShGLsDTRENCWKFFIDRlRRQLKVlLCFSPVGosLRVRSRKFPAVVNCTAIDWFHEWPQ-ALVSVShRFL-ETEGIcs-VKpSIScFMAYVHTSVNEoS+lYLoNERRYNYTTPKSFLEQIKLYQSLL ...................................................h..sLV.lFp-AlpH.................ls.RIsRllc.p.P.p..G.ps.L.LlG.V.GG.SG+pSL..oR.LA.u.....a.hs..s.h.p..lF..QIp.....l.........s...+..s..Y.s.hs-....a....+...-D..L+plh.tp.....u...............G...h.....c....s...p.t.h.....sFLhsD...spl.............h.c....E.u..FLE..l.Ns..l....L.so.....G..............E........lP.s............LF...s...t.DE....h.p.p.l........h............s..............t........h.....+....s.......t..s........p...................p....t..................s.....h...........s.....o.....t..........p......s.......l..a......p.aF.....l.p.+...........l+.p..NL.....H....l...V.Ls..h..S..P.....lG..c....s...h.R.s.R....h.+..........pFPu.........Ll..N..ss.sIDW..Fp..p..WPp..-ALh..p...V.................u......p.................p.................a.................L....................p....................p.................h..............p...................h...................h..................p...........................p.................h.....................c....................p..........t......l......s.....p...........h...hsh.....h..........H.t.sl.t...ph.s.t....p..........a.h......t..p......p..R.hsa.sTPpsaLchlp.hatth............................................................................................................................................................ 0 634 798 1248 +12616 PF12781 AAA_9 ATP-binding dynein motor region D5 Coggill P pcc Pfam-B_14 (release 24.0) Domain The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D5 ATP-binding region of the motor, but has lost its P-loop [1]. 22.20 22.20 22.20 22.30 22.10 21.90 hmmbuild --amino -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.61 0.70 -5.50 3 2170 2012-10-05 12:31:09 2010-02-17 13:30:02 2 225 287 14 1548 2035 88 214.80 38 6.23 CHANGED MLTDDADVAsWNNEGLPSDRMSTENATILuNCERWPLMVDPQLQGIKWIKNKYG-DLRVIRIGQKGYLDlIEQAISuGDTVLIENlEESIDPVLDPLLGRNTIKKGRYIKIGDKEVEYNPKFRLILHTKLANPHYKPEMQAQsTLINFTVTRDGLEDQLLAAVVApERPDLEQLKSDLTKQQN-FKIlLKELEDSLLSRLSSASGNFLGDTALVENLETTK+TAAEIE .............................................................................................................pltpWp....p..GLPs.DphSh.-N..ul........I...l.......p...p.......u.......p............R.......a....P...L.h...ID.PQ...sQ.u..hc.W.I......K.......s................h.........................p.........s.....................p...................L......p........l...h.p.....h......s..p...p.............s...a...h..c.....p....LEpulphGpslLlEs...l..t.E...p..l....DP.sL......pPl.L..t.....+...p.....h....h.....+.....p.....G...................p.......h.........l................+..l....G...D..p..p..l-as.s.s.F+la..loT..K...L....s..N..P...c.Y.....P...E..l.p....s.....+...s.ollNFs.lT......................pGLE..............-QLLuhV..l.tp.E.+P....-L....E..c.p.+sp.....Llh..p.t.....s..................p.K.h...pL+pLE-plL.ptLs....su...............p..G...s...lL......-..D.p....pLl.psLppoKtput-l...................................................... 0 635 802 1257 +12617 PF12782 Innate_immun Invertebrate innate immunity transcript family Coggill P pcc Pfam-B_9 (release 24.0) Family The immune response of the purple sea urchin appears to be more complex than previously believed in that it uses immune-related gene families homologous to vertebrate Toll-like and NOD/NALP-like receptor families as well as C-type lectins and a rudimentary complement system. In addition, the species also produces this unusual family of mRNAs, also known as 185/333, which is strongly upregulated in response to pathogen challenge [1]. 26.90 26.90 26.90 31.50 21.60 26.80 hmmbuild --amino -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.36 0.70 -5.20 9 746 2010-02-17 14:14:11 2010-02-17 14:14:11 2 4 2 0 6 908 0 162.80 58 101.52 CHANGED MEVKVTLIVAIVAALAISAHAQRDFNERRGpENGRERGQGtFGGRPGGMQMGGPRQDGGPMGGRRFDGPsSGAPQ.........................MDGRRQNGGPMGGRRFDGPcFGGSRPDGAGGRPFFGQGGRRGDGEEETDAAQQIGDGLGGsGQFDG.GRtHHGHRQG.PQDpsEEQPFGQRN.SSEEDGRPHPHHHt......+HHpHH...........................................+RNHTEGHQGHNETGDHPHRHHNKTtDtDQDRPMFEhRPFphN...PFGRKPFG-RPFsRRNGTEEGSPRRDGpp+PaGNRGRWGENESEEKE+PTTESlTTSSPsEVVclAhN- ..........................................................u.RQDGGPMGGtRFDGPt.GAPpM-GRRQsGGPMGGRRFDGPtFG................................................................................................................NpTEGHQGHNETGDp.............DQD+.h.-.RPFhhN....FGRKPFGs+.F................................................................................... 0 6 6 6 +12618 PF12783 Sec7_N Guanine nucleotide exchange factor in Golgi transport N-terminal Coggill P pcc Pfam-B_13 (release 24.0) Domain The full-length Sec7 functions proximally in the secretory pathway as a protein binding scaffold for the coat protein complexes COPII-COPI. The COPII-COPI-protein switch is necessary for maturation of the vesicular-tubular cluster, VTC, intermediate compartments for Golgi compartment biogenesis. This N-terminal domain however does not appear to be binding either of the COP or the ARF [2]. 25.40 25.40 25.70 25.40 25.30 25.20 hmmbuild --amino -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.06 0.71 -4.82 70 1211 2010-02-17 14:38:16 2010-02-17 14:38:16 2 22 295 0 824 1186 8 143.70 24 9.89 CHANGED hshhtpDAhhlFcsLCpLs...p..sppp..shh....tthpschhuLcLlcslLpspts.l..Ftp...............chlp.sl+phLs.sLl+sssS.sh.hslhphshcIhhhLl.pph+stLKhElElhlshlhh...lL.........-.pst....shp..pKhlsLchhpplsp-Pphlh-lYlNYDC-.s..ppNlh ...................................................................................................h..shh.lFcs.LspL......................s...........t...tt...........................................t.................hs+hhuLpLlps.hLps...tss..h..ht..................................phht.hl......+p.lsh....tLhp........hs...........s...s..........................slh......t.h.shplhh.hLl..pp..h............+s.hL+..h........ph.E.............lahp.lhh.ll..............................................pspp.......sht..........p+.h.sl.p.........hlpp.......lh.t....p.s.....p.hlhphahsYDsc.t..ttslh................................................... 0 286 456 677 +12619 PF12784 PDDEXK_2 PD-(D/E)XK nuclease family transposase Bateman A agb Pfam-B_5 (Release 24.0) Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily [1]. These proteins are transposase proteins. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild --amino -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.27 0.70 -5.19 73 2282 2012-10-11 20:44:46 2010-02-17 14:49:08 2 7 653 0 358 1886 49 177.50 20 76.61 CHANGED spsDauFK+lFGsc.t.NcclLlsFLNslLp..tpppIpclc.llss.......c..t-hts-Ktull....Dlhspss....sGppllIElQhppppsFhcRsLYYhu+shsp...Ql.............p.pG...pt.Yp............pLppllsIsIls.Fshhts.........p....ch+sshplh-ppspp..hhh.........cclphhalElsKFpppppcppsp........h-cWlhalcstsphppt.h.h.....psshhccshpthcphshscc-ht..sY-p+.chhh....Dppsslcpu ......................................................hhp.hh.t.........p.thh.h.t..hlpshLt................t...tlp...lp....h...s...............................p........t.s........+..shh..............Dlhsp.p........sGp..h.lEh.Qh.p........p......p.....t.............h..h........p.Rhh....aYhsphhsp...pl...............................p...pG.......t...Yp..........................pl..t..shhI..l...hs...h.hh.t..................t.............thhp...h...h......p.......p....p.t.........h..............p...t..hh..lph..h.................................hh..hh.......................................h..h...t.h.....s..................................h.............................................................................................................................................. 0 126 250 271 +12620 PF12785 VESA1_N Variant erythrocyte surface antigen-1 Coggill P pcc Pfam-B_22 (release 24.0) Family This family represents the N-terminal of the variant erythrocyte surface antigen 1, versions a and b, of Babesia. Babesia bovis is a tick-borne, intra-erythrocytic, protozoal parasite of cattle that shares many lifestyle parallels with the most virulent of the human malarial parasites, Plasmodium falciparum. Babesia uses antigenic variation to establish consistent infections of long duration. The two variants of VESA1, a and b, are expressed from different but closely related genes, and variation is achieved through the involvement of a segmental gene conversion mechanism and low-frequency epigenetic in situ switching of transcriptional activity from the VESA1 gene-pair to a possible other gene pair. 21.40 21.40 33.10 21.40 19.60 20.60 hmmbuild --amino -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.95 0.70 -5.52 42 227 2010-02-18 13:21:03 2010-02-18 13:21:03 2 2 1 0 141 227 0 260.00 32 38.40 CHANGED AAVTDLLQSVpLEYH.............GYQG-sK.......tssssKGAsccpVs..c+LNGLFSLVQGLGGTAVVRTYIDQLAQVLSALVGWS+I-KC......................tssspCpssststp........................................................HGppss....CcYLcDVctss.sCscCGCMKWsVsp...spscGppLGRtCTRCpsSGsstt......CpCusuust...Cou.spc.CKCAhsGKC....CKCC.....Cpspss.t........Cptpt............uChpppp..............-....sYhSAYsp...................h.ttttht..hpshWssLhp.s...............ssSp..+RHpCARILLGSVCLIWSGlTYMYWTGKYtpoSPRWNNHILDGSGLDDGTLSQWLQALGFP+-MLNNsGPtNRLDuVIWDGhpsKLaLGFscsushss...s.upDsssNThRsPsGMNYAGaIHTscRsuFsss.AsVFppsssss........ho-psppKpGALaKLYILSCAYFTGL....QKK....................puss+TPKTIREILYWLSALPYSpAY.clLcHuKc+L ........................................................................................................................................................................................................................................................................................................................................................................................................C.tptsss.....sts...cpCpC...s.h..s.G..t.............................................................................................................................................h..hAp.hLGslsLIaSslo.h......s..........s..t....hWpt................................tahtshGas...hp......................................................................................................................................................................................................................................... 0 141 141 141 +12621 PF12786 GBV-C_env GB virus C genotype envelope Coggill P pcc Pfam-B_19 (release 24.0) Family This the envelope protein from the ssRNA GB virus genotype C. 20.40 20.40 20.40 20.80 19.70 19.70 hmmbuild --amino -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.75 0.70 -6.01 6 568 2010-02-18 13:22:19 2010-02-18 13:22:19 2 10 6 0 0 453 0 191.40 59 45.18 CHANGED Rls.lPNLTCslcCDhcash.uhsl.....Dh.WshchlhcLPh+LW+GlsuhssLhllVlhhLlLEQRLVMsFLLLhssGpApss..............hapuCpCth.GuRsP.Pshpsh.RGNuTllC-CPFGpMhWhPsLCuGLsW+DGspc...GsspsLPhsCPcpVhGoloVhChWGSAaWhWRhG...shVcLacpLP.tSALCTFauhspucpsaPt.VssLospGsPCASCVVDpRPspCGsCVRDCWEpsGs...sFccCGlGsRlTccLpAVlVcGGsESploTPhGERP+YhuQHGsGsaauAVpthuhshTVoclGGYWHAltCPs.-assGsLPchIPGhPVNACls..scsu+shuuW.APGGaYAPlFT+CNWPpssGVcVCsGFAaDFPGc+sGFIHV+GuhQQlhuGshtssPpWLL ..................................................................................................................................................................uluWGDPIs....tGQsp..........aGusSVss....................................................................................................................................................................................................................................................................... 0 0 0 0 +12622 PF12787 EcsC EcsC protein family Bateman A agb Jackhmmer:Q186V8_CLOD6 Family Proteins in this family are related to EcsC from B. subtilis. This protein is found in an operon with EcsA and EcsB which are components of an ABC transport system [1]. The function of this protein is unknown. 27.00 27.00 27.20 27.30 26.70 26.80 hmmbuild --amino -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.46 0.70 -4.98 47 901 2010-02-18 14:38:34 2010-02-18 14:38:34 2 5 716 0 203 677 35 219.90 21 83.19 CHANGED a.pptlp-lppWcpc...p+csthhp+hs+hs.phl....scllPptlpctlssslc....phsphlhsGupalh................................................shpslpphsLpphDplscchhpthpphAsspGAsTGsGGlhhhhuDhPllLulplKhLa-lAhhYGaDsc-hcERlFILplhQluhS.s.tt+ptlhpplpp..........acttpp....t..splp..s....W+ph.ppYRD.hshtKhh........QhlPslGhlhGAhsNhphlcclu-sAhhhY+hRhl ....................................................................................t.....................................................................................................................................................................................t...h........p....h....p..t..h...h....p...p...h..ht..p..ppthA..sspGus.....oG.......h.......G....................Gl..h...........s......h.s....s....-lP.......s....hl....s....lt....h....+....hlt........plAp.hGaD.l...s.c......cpph.hhLt.l...h.t.h...uht....s.p......tc..p..t.h.hth.h.tt...........................................htt.t.t.................t..t..hp...h.s.......................hcpltpp....h...t...s.l...s....h....p...phh.........................phlP.l.l.Gus.lGAssNhhhhpplschApttathRhl............................................................................... 0 69 145 168 +12623 PF12788 YmaF YmaF family Bateman A agb Jackhmmer:Q181M3 Family This family of proteins contain 6 HXH motifs and is named after the B. subtilis YmaF protein.\ It seems likely that these are involved in metal binding. The function of this protein is unknown. 27.00 27.00 27.30 27.40 21.80 26.90 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.59 0.72 -3.84 10 130 2010-02-18 16:13:56 2010-02-18 16:13:56 2 1 90 0 33 113 0 90.10 43 77.47 CHANGED +hQoHsHEFtuSTchAE-s-.-cHNHRhAGVTGpAI.h.G.sSHVHKI.pspTDFF.DHFH-lClTTGPAIhl.usGKHIHLVpGpTTlNDGHpH-ahFTTLI .........................pHsHEa.uSTphsp.t..........p.c.HNHRhuGVoupsI.h....G..ssHVHpl.p.s.pTDF.h.sHaHhIs.shTGPAIsl.uss+HlHhlsGhT..ohsD..G..HpHcahhsT.l..................... 0 15 24 28 +12624 PF12789 PTR Phage tail repeat like Bateman A agb Jackhmmer:Q181M3 Repeat This family largely contains proteins from the eukaryote Trichomonas vaginalis. These proteins contain multiple HXH repeats. Some proteins in this family are annotated as having phage tail repeats. The function of this family is unknown. 27.00 7.00 27.00 7.00 26.90 6.90 hmmbuild --amino -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.66 0.72 -4.13 16 1947 2010-02-18 16:37:43 2010-02-18 16:37:43 2 34 108 0 1744 1922 24 47.00 41 23.61 CHANGED chScKsHTHoIusITsLQETLspKuDhsHTHTIANITNLQ..ETL..pp..DVGHTHTIusITN ....................................HtHsI...usIssLp-oLsc.Ku.........Dh..s...........H..T.H.s....................................................t.............................. 0 1732 1740 1742 +12625 PF12790 T6SS-SciN Type VI secretion lipoprotein Coggill P pcc Pfam-B_27 (release 24.0) Family One of the virulence mechanisms of E coli is the production of toxins which it produces from dedicated machineries called secretion systems. Seven secretion systems have been described, which assemble from 3 to upto more than 20 subunits. These secretion systems derive from or have co-evolved with bacterial organelles such as ABC transporters (type I), type IV pili (type 2), flagella (type 3), or conjugative machines (type IV). The type VI secretion system (T6SS) is present in most pathogens that have contact with animals, plants, or humans. SciN is a lipoprotein tethered to the outer membrane and expressed in the periplasm of E coli and is essential for T6S-dependent secretion of the Hcp-like SciD protein and for biofilm formation. 21.60 21.60 21.80 21.70 20.60 19.30 hmmbuild --amino -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.49 0.71 -4.57 149 1326 2010-02-18 16:39:03 2010-02-18 16:39:03 2 3 848 1 228 812 20 148.70 26 83.77 CHANGED slhLuGCu........................s..h.tpssplslslpAssslNsst...pGpssPlhl+lYpLcssssFpsu-a.sL..hps...sppsLus-ll.spc-hh.ltPGpptp.ls................hshspsspalGllAtapchs..pup.W+hlhslsttt.........................pht.....lplplsspsl .............................hlLsGCu.......................................................tsu....ppspplslslhupsslNsss...puc...ssPlhlplapLpssstFpsADa...sl..tss.........tpssLssshl.s.ppchh.LhPGp.t.pp.ls................hpls.p.p.spalGVl.Atapc.s...tsp.W+hlhpltsts.........................chtlhlplpppt.......................................... 1 31 88 154 +12626 PF12791 RsgI_N Anti-sigma factor N-terminus Borovok I, Coggill P pcc Borovok I Family The heat shock genes in B. subtilis can be classified into several groups according to their regulation [1], and the sigma gene, sigI, of Bacillus subtilis belongs to the group IV heat-shock response genes and has many orthologues in the bacterial phylum Firmicutes [1]. Regulation of sigma factor I is carried out by RsgI from the same operon, and this N-terminal cytoplasmic portion of RsgI ('upstream' of the single transmembrane helix) has been shown to interact directly with Sigma-I [2]. 21.40 21.40 21.50 21.40 21.00 21.10 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.58 0.72 -4.30 52 279 2010-02-18 16:43:53 2010-02-18 16:43:53 2 6 205 0 71 235 0 56.00 24 13.15 CHANGED lclcccpsllhTscGcFlcl+ppss..hplGpcI.hpppshhphtpthh..hhhhshshh .......h-lcccpsllLTscGcFlp.lKp+sc...hhlGcEIphsppchht..pptth...hhhh.h...sh................ 0 35 63 67 +12627 PF12792 CSS-motif CSS motif domain associated with EAL Coggill P pcc Pfam-B_29 (release Domain This family with its characteristic highly conserved CSS sequence motif is found N-terminal to the EAL, Pfam:PF00563, domain in many cyclic diguanylate phosphodiesterases. 21.60 21.60 21.70 21.60 21.40 20.80 hmmbuild --amino -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.17 0.70 -4.99 138 2685 2010-02-19 09:08:36 2010-02-19 09:08:36 2 3 774 0 211 1380 30 205.10 21 40.43 CHANGED pphppphpshuppslpph-p......llsps.ppshsph...shssps..Csps.hhtLpphshtt.al+slsll.cssplhCSShh.G.h.shshs...s.h......h......shplhhhtssshhsspshl..hht.ts....hsssushsslssthh....hhthh..phsshhlt.......h.spthlttts....t.h.ttssh..........hhhphpSsc.....ashslh.......stsststhhphhhphhhhhl ..................................p..hppchsshuspshcph-p......lhtph.ptstpth...t.htsps...Cs.A.thpLpphutth.hlcslshl...cssphhCSSlh...s...t..shs...h....s.h.ssh..........hp.......ssplhl.hs.sssh..h.sp..hl..hhh..pss..shhssusa........stl.p.shhht...l.hsh.s.phssssls..................l.spshlptts......shhhp.sth...............htpspssc.....hslslh.......lhssstth.thaat.hhh..h.............................................................................. 0 15 45 130 +12628 PF12793 SgrR_N Sugar transport-related sRNA regulator N-term Coggill P pcc Pfam-B_33 (release 24.0) Family Small, non-coding RNA molecules play important regulatory roles in a variety of physiological processes in bacteria. SgrR_N is the N-terminus of a family of proteins which regulate the transcription of these sRNAs, in particular SgrS. SgrR_N contains a helix-turn-helix motif characteristic of winged-helix DNA-binding transcriptional regulators. SgrS is a small RNA required for recovery from glucose-phosphate stress in bacteria [1]. In examining the regulation of sgrR expression it was found that SgrR negatively auto-regulates its own transcription in the presence and absence of stress, and thus SgrR coordinates the response to glucose-phosphate stress by binding specifically to sgrS promoter DNA [2]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.38 0.71 -4.08 49 1737 2012-10-04 14:01:12 2010-02-19 13:52:54 2 2 824 0 137 981 7 113.50 39 20.26 CHANGED +lhppapRLapths.....spstpsTLsElA-hLaCScRHsRsLLppMpptGWLsWpspsGRG+RSpLthhhsspplptphAcchlcpGchcpAlplls..cpttltplLppphGtphppu ...................h.pQa.RLaQtss........GcsppsTlsELA-hLhCScRHhRoLLpphQ-tGWLpWpupsG....RGKRo.pLpFLh.os..sLppphsEchLEps+hppllpLlt...s......tuplpphL.sahGtpapps...................................................... 0 24 45 93 +12629 PF12794 MscS_TM Mechanosensitive ion channel inner membrane domain 1 Coggill P pcc Pfam-B_24 (release 24.0) Domain The small mechanosensitive channel, MscS, is a part of the turgor-driven solute efflux system that protects bacteria from lysis in the event of osmotic shock. The MscS protein alone is sufficient to form a functional mechanosensitive channel gated directly by tension in the lipid bilayer. The MscS proteins are heptamers of three transmembrane subunits with seven converging M3 domains, and this domain is one of the inner membrane domains. 23.40 23.40 23.90 23.50 22.70 23.30 hmmbuild --amino -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.04 0.70 -5.83 72 1372 2010-02-19 14:00:30 2010-02-19 14:00:30 2 5 800 0 153 777 52 322.70 42 30.04 CHANGED hpshlhhlhhlLlhhhll....hh+pphpptLpphspclGplppDshhhT.pulhhollhuLPhslhhhhsGhhlt.hs.hsts..tslusulhthuhh.hhlathhhphhpspGlhhtHFphsppplpphp.+hhphhhhhllsllhhhshs.....pthssphhpsslGplshllshhhlshhhh...slhctshs.hts......t...phlppllhhhlhhsPlhlhlhshhGYaaTAhtLht+L.tSlhlhhhhhllapllhRhhhlppRRLAacRApt+Rpp..hh......pEt...t.ssc..........hp.l-.Esslcl-pIspQSL+Ll+hlLhlhhlsslhhlWu-llss.huaL-s..IsL.W ...........................h....LshhshLllsGh.h....aptpahptalp+hAutVGplp.pDp.hhT.+slhhsllhAhPlsllhhslGhhLh.ph...hslushlhshspths.h.hhVhhlCh.phhcsNGlhltHFGhPcppsu+hh.R.hlh.hhhllPLhhh.lhh.............-..s..+.h.sssL.GphshllhhhhlAllsh...slp+tuh..h.s..............phhph..lhhshlhhhPlshhlhoAsGYhhTu.tLhuRh.poV.hlhhlh.llYpslhRhh.lttRR..l.....AacRAhtRRtp..hl......cE.........usp..................................uh.ss-.EsplsL-tIstQoLRlsp.llhhlhhl.hhslWS-lhos.FuaL-s..IoLW.............................. 0 21 56 106 +12630 PF12795 MscS_porin Mechanosensitive ion channel porin domain Coggill P pcc Pfam-B_24 (release 24.0) Domain The small mechanosensitive channel, MscS, is a part of the turgor-driven solute efflux system that protects bacteria from lysis in the event of osmotic shock. The MscS protein alone is sufficient to form a functional mechanosensitive channel gated directly by tension in the lipid bilayer. The MscS proteins are heptamers of three transmembrane subunits with seven converging M3 domains, and this MscS_porin is towards the N-terminal of the molecules. The high concentration of negative charges at the extracellular entrance of the pore helps select the cations for efflux. 31.40 30.50 31.40 32.60 31.30 30.40 hmmbuild --amino -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.73 0.70 -5.05 69 1277 2010-02-19 15:07:52 2010-02-19 15:07:52 2 3 738 0 127 672 19 235.70 40 21.69 CHANGED ppLcplpp..ppssspppthlpsLppsLshLschcctcpcspphpptlsshPpphcplpppLpshpss.......sstshhsshohspLEpplsptpspLtphpcpLpphpsplhplpsp.pphpsphspsppplppIpppLpsh.sss....sslppuppshLpsE.shlpsphp.hchphhussshQ-Lh..phpp-h.hptchppL-pplQtLpstlNppRhppuEpslccspp.tpp..p.sptsshlppp ....................ppL-phpt.....hpst.pt.l.psLpssLssL-chcpstEcspQhpphlsphPchhpphpA.pLssL.cs...........EsRpl.sshShcpLppclhQs.spL.s....tppphtp.psplhplpsp.ppl.pt.hsAppQLppIcp....RLsu....h....ssGp.........ssLp.uQphhhQu-pAhLpAplDp.chu........ussshQ-Lh..............phpp-h.sptpSppL-t.LQhLpptlNSpR.p.uE+shppsp..sEs..phphs.hlhpp................ 0 14 42 86 +12631 PF12796 Ank_2 Ankyrin repeats (3 copies) Bateman A agb Jackhmmer:Q183I8_CLOD6 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.45 0.72 -3.64 620 110723 2012-10-02 12:10:21 2010-02-22 09:13:01 2 6024 2889 200 69620 109701 6937 89.50 23 36.39 CHANGED Lh.hAsp...p..s..p.........h..p.h..........l.c....hL.....................l...p....ps.......s.....shs.....................ps...............................................sL.h......h.....A................s....................t..t...up.....h...........................................................................c..hlch.........L.lp...............................................................ts...s.s..h.............stp.......................tupTs............................Lhh.............A..........st..................psp...h...c..hl.........c.h.Ll.p.p...............s..s.sh.s....hps ................................................................................................................................................................................................................................................................................................................................................................................................h.t.........h....................................................................h.....t.......t..................s...phs..........................................spT................................................................................................sL.p.........h..........A............................u............................................t..p....G...p.....h..................................................................................................................................................................................................................................c....h....l.c.h..........L...l.p...................................................................................................................................................................................................p.G.....A....s...l...........................stps...................................ppGt.T..s........................................................................................L.p.h................................A............................s.p................................................p.u..p.......h....c.......l.....l.................c..h...Ll..p.t..........s..s.p....................................................................................................................................................................................................................................... 0 33547 42383 56433 +12632 PF12797 Fer4_2 4Fe-4S binding domain Bateman A agb Jackhmmer:Q184V9 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild --amino -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.65 0.73 -7.77 0.73 -3.72 423 1399 2012-10-03 08:56:42 2010-02-22 11:31:53 2 39 752 0 175 6075 978 26.70 70 12.06 CHANGED hthhhs....ts..p...C...hs...C..tt..C...ht..sC........Ps ...........Y.GFFID.......SS..RCTG...CKTCEL..AC........KDh+sLss............. 0 59 106 145 +12633 PF12798 Fer4_3 4Fe-4S binding domain Bateman A agb Jackhmmer:Q184V9 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild --amino -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.94 0.74 -7.15 0.74 -2.97 447 625 2012-10-03 08:56:42 2010-02-22 12:42:34 2 99 466 0 231 9841 2305 17.30 47 7.78 CHANGED C..ht..C.............stCh.psCP..t...ss ....................C..hp...C......................................suCVsAC..s.......... 0 69 134 183 +12634 PF12799 LRR_4 Leucine Rich repeats (2 copies) Bateman A agb Jackhmmer:Q187Q2 Family Leucine rich repeats are short sequence motifs present in a number of proteins with diverse functions and cellular locations. These repeats are usually involved in protein-protein interactions. Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.52 0.72 -4.27 569 21961 2012-10-02 21:32:02 2010-02-22 13:07:11 2 3572 1441 188 8913 43842 1514 44.60 29 11.83 CHANGED p..sLppLp......Ls.p...N......p....l...p...s.....l..s.....s...........l..p....s........L...s....p....LppL.....s...l...s.........p.....N.p..l....p.......s..........l......s...s...l...p....p .................................pLptLs...............Ls.s....N..........p....l......s.....s.......l..s........................s..............l..u.....p.........................L....s.......s....LppL...s...L...s......................s.........N.p..l.........s.........s..........l.....s......................................................... 0 2700 5198 7123 +12635 PF12800 Fer4_4 4Fe-4S binding domain Bateman A agb Jackhmmer:Q184V9 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 24.00 10.00 24.00 10.00 23.90 9.90 hmmbuild --amino -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.47 0.74 -7.83 0.74 -3.15 187 4289 2012-10-03 08:56:43 2010-02-22 13:24:58 2 168 1372 4 663 24462 7195 20.30 39 10.05 CHANGED t...sp......C.....h.....p......Cs...t.....Ch.ps..Cs.................hp ....................tpC..........l.......G..................C+.........s.........CphA..C...sh.................... 0 214 424 548 +12636 PF12801 Fer4_5 4Fe-4S binding domain Bateman A agb Jackhmmer:Q189G9 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 22.00 0.00 22.10 4.40 21.90 -999999.99 hmmbuild --amino -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.76 0.72 -4.17 217 6537 2012-10-03 08:56:43 2010-02-22 14:38:41 2 113 2159 0 1700 6678 530 48.00 21 19.61 CHANGED hshhhl.lhhh.hllhhshl.......hGR.haCuWlCPhGslp-hhtp.........hhppph..thp .......................h...hhh.lhhh..slhlhshh........htR..saCualCPhG.slhslhs................................................. 0 631 1249 1513 +12637 PF12802 MarR_2 MarR family Bateman A agb Jackhmmer:Q17ZV2 Family The Mar proteins are involved in the multiple antibiotic resistance, a non-specific resistance system. The expression of the mar operon is controlled by a repressor, MarR. A large number of compounds induce transcription of the mar operon. This is thought to be due to the compound binding to MarR, and the resulting complex stops MarR binding to the DNA. With the MarR repression lost, transcription of the operon proceeds [1]. The structure of MarR is known [2] and shows MarR as a dimer with each subunit containing a winged-helix DNA binding motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.62 0.72 -4.30 266 11347 2012-10-04 14:01:12 2010-02-22 15:12:01 2 65 2832 42 3627 27482 2684 60.70 22 32.41 CHANGED h.Gl..s..hsphtlLhh.ltptss....................h..ohsclsc..thtlsc.sslsphlppLtp..pGll.........ppp..ts.t.DpRt ........................................................tl.s....tph.pl.L....hh...L...t...ppss......................................................h.....shs-Luc......ph.tl......sp.sslophlpcLpp...pGLl.........p+p......ss.p..DtR................................................................ 0 1112 2404 3087 +12638 PF12803 G-7-MTase mRNA (guanine-7-)methyltransferase (G-7-MTase) Coggill P pcc Pfam-B_40 Domain The Sendai virus RNA-dependent RNA polymerase complex, which consists of L and P proteins, participates in the synthesis of viral mRNAs that possess a methylated cap structure. The N-terminal of the L protein acts as the RNA-dependent RNA polymerase part of the molecule, family Paramyx_RNA_pol, Pfam:PF00946. This domain is the C-terminal part of the L protein and it catalyses cap methylation through its mRNA (guanine-7-)methyltransferase (G-7-MTase) activity [1]. 25.00 25.00 31.00 29.90 21.10 21.00 hmmbuild --amino -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.86 0.70 -5.29 29 388 2012-10-10 17:06:42 2010-02-22 16:14:03 2 3 87 0 1 397 0 319.70 39 14.47 CHANGED phuaplYYhRlpGcpphh-YlhshLpRhstssLsslusslSHP+laRRhhssGllpPhpuP.Lso.sahthsh-hlhtuhcpaLs.lhsG..hchphllspsspshls-RhtslhAR+LslLssLassspphPpI+GLsshEKCtlLTcaLpptshts......hsshphslps.plssasssLhYlpRpslppIRhc...s.....s.hhltphhcshsh.ptshtttps..........tsshppssph..hp.....l.spspthpph.hsssttsshp................pHhhRhlGlsSTShYKAlplsphlpphh..pGspLaLuEGSGAhhohhchhlsss .........shuYQlYYLRlpGhpsIl.Yhsshh+pMsshllsslusTlSHPhIapRhhssGllp.stu.pLAspDahchuscllhoCscphlssLhsG..sca-lLhsss.-DsLs-+h.plluRhhClhssLauss+chPpIRGLospEKCulLT-aLhu-Ahps......tsppspsIhsPpIhsaPusLaYhpR+SLshIRtRp........csshls.hh.sp.sllc.p.spshhuphpt.....shs+pssuhl.ths....Lssss.phcthh..psps.psh.............t..lpahFRsIGhsSSSWYKAsclhol.chpts..pcGsuLYLuEGSGAhMolhEhhlstp.. 0 1 1 1 +12639 PF12804 NTP_transf_3 MobA-like NTP transferase domain Bateman A agb Jackhmmer:Q188E0 Domain This family includes the MobA protein (Molybdopterin-guanine dinucleotide biosynthesis protein A). The family also includes a wide range of other NTP transferase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.11 0.71 -4.16 571 9345 2012-10-03 05:28:31 2010-02-22 16:22:27 2 87 4058 75 2565 24779 11854 170.00 18 55.35 CHANGED ullLAGG......p...u......p.....Rh...........G.......s...........s.........Ks..l........l.h.......s.G.....p...........s...llp+sl.ptl............psh.......h..s.............................plhls....s..st....p.............p..h.h.....t..................................h............t..th........s.....ht....h.....lt-........t......s..t.........GPh....uu...ltsul.....................................p....t....s........p.....s...sh.............ll..lhssD.hPh...lsssh.lppLhpth...............t......p....s...........s..s.......h..ss..st.t.p.........s..p..h.......................................................hp.P.l......l..h.....stp...h...h...........st.lp....p.h........t..s....s.........p..u......h.......+p...ll.p...............p.h.....t....ps ................................................................sllL.A.u...G.......p......u.......s.....RM....................................t.........s.......s.................Ks..L........hpl........sG.....c......................................s...hl.p+..sl.....cs.h.................p.p.h.....t...s..s....................................clh..ll......s......sp......c.................p....h.t....t.......................................................h..h...............t..t......................s.......lp.....h...........l.h.s.................s..........p.....h.................G.s..h........t..u....l...h...s....u..h............................................................pt......h............t.s.pt........................h.l....l...h....s......u.D....h.......P..h.............l.....s.....s....p.....h.......lp..p.L...hp.th........................................t..........t..s........h....h..h.......h...............................................................................................................................................................................................................hhthhh...................................................................................................................................................................................................................................................................... 0 846 1696 2182 +12640 PF12805 FUSC-like FUSC-like inner membrane protein yccS Coggill P pcc Pfam-B_45 (release 24.0) Domain This family has similarities to the fusaric acid resistance protein family. The proteins are lodged in the inner membrane. 20.60 20.60 20.60 20.90 20.50 20.50 hmmbuild --amino -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.66 0.70 -5.48 75 1796 2012-10-02 19:04:43 2010-02-22 17:07:20 2 5 1161 0 248 1140 37 272.10 36 38.74 CHANGED sLhhoLlsFhluohslplhhsa..shhFslslhhsoFshshLGAlGpRYuoIuFuoLllAlYThlshs.........ts.thatpP..llllsGAlWYslhollhphlaPppPlQcsLApsappLusYLcsKusLF...cPs.ppppspphphtLAtpNspllsslNps+psLhpRht.us+tp.ssp..+hLphYahAQDIHERsoSSHh.pYppLpcthp.pSDlLaRhp+llphQupuCpplApulhhpp.Yp......asschptuhpplpsuLph.tpp...psttpt.hhtsLptLhpNLps....l-ppL ......................................LIlshhsFhluShss...pLL.h.s.h..shhhslsL.oh..T.hs.hhlluslGt.+a.Aplu.huuL.l.lA...IaThhssu.........h.....h.apps....llllhGulWYslhshhhahlas...pPlp-sLupsYcpLAc.YLEs.Kuph.a.....s..pc.....s-....sptslhsLs.h.tp...t.pl.hst.lsQs+tp..lhhp....us+sppss+..RhL+hahhA.D.laE+hS....uSph.p.Ypplp.ch.hc..+S-lh.hh..hpphhph.ut.shppL....ucsIL.tp.Yp......+tsphc...hshp.ch.uuLE+htpp.....ssst.hhhpluhlhpsL+s.......h..................................................................................................................................... 0 38 107 183 +12641 PF12806 Acyl-CoA_dh_C Acetyl-CoA dehydrogenase C-terminal like Coggill P pcc Pfma-B_46 (release 24.0) Domain this domain would appear to be the very C-terminal region of many bacterial acetyl-CoA dehydrogenases. 26.00 26.00 26.00 26.00 24.90 25.90 hmmbuild --amino -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.56 0.71 -4.28 470 2011 2010-02-22 18:15:35 2010-02-22 18:15:35 2 6 866 0 708 1823 1503 128.00 25 21.59 CHANGED thhsclpshhppttt.....t....th.t.shsptLtpu.hpplppssthlhtpst.ss..........s...pts..hAuussY..LchhGhlshuahah+hAhsA.tth......................tt...................................t....ts......Fa.puKltsA+FahpplLPcssuhhstlps..Gsss.lhshst ...........................h.hhtplpphsppttt.........st..th..t.thtp.Ltpu.hpplpp...hTthlht...psh..ss............s...ppssAuussYLphhGhlshuahat+.hAtsAtppl...............ss...............................t.....ts...ts.....FYpuKlssA+FahpplLPcssuhhstlpu..ussslhshs.............. 0 183 408 579 +12642 PF12807 eIF3_p135 Translation initiation factor eIF3 subunit 135 Wood V, Coggill P pcc Pfam-B_2213 Family Translation initiation factor eIF3 is a multi-subunit protein complex required for initiation of protein biosynthesis in eukaryotic cells. The complex promotes ribosome dissociation, the binding of the initiator methionyl-tRNA to the 40 S ribosomal subunit, and mRNA recruitment to the ribosome. The protein product from TIF31 genes in yeast is p135 which associates with the eIF3 but does not seem to be necessary for protein translation initiation [1]. 21.30 21.30 21.40 21.50 21.20 20.80 hmmbuild --amino -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.30 0.71 -4.32 65 500 2010-02-22 18:22:24 2010-02-22 18:22:24 2 76 258 0 376 485 1 166.80 22 13.01 CHANGED tsppssPhDGpoLophLHpRGINhRYLGc.......lup......htp....................pLttlppLslpEhlsRuhKHlhpphlp.....................................sl.ss.................sthuusluHhLNCLLGst.ss............................pspsphcpph..................................................t.....spss.satplTs...............ppLhppIppplpp+acasLp...............schhs........p.lp+hpLLRslshKhGlQlhs+-YsFs .....................................h......PhDG.toLsch...hHp+GINhRY.LGclhph.......htp....................................................................................plpt.l......pplslpEhlsRuhKHlhpshlp....................................................................................................................................................................................................sh.....................hs..ss.luc.hLNslLus..ss......................................................p................................................................................................................h..hs........................ptl.h.p..lp..p.hh.hapapl................tp.hp.........t.hppht.llRths.+hGlplh.+.ca.h.................................................................................................................................................................................................................................... 0 144 232 330 +12643 PF12808 Mto2_bdg Mto1_bdg; Micro-tubular organiser Mto1 C-term Mto2-binding region Wood V, Coggill P, Eberhardt R pcc Pfam-B_28820 (release 24.0) Domain The C-terminal region of the micro-tubular organiser protein 1 (mto1) is the binding domain for attachment to Mto2p.The full-length Mto1 protein is required for microtubule nucleation from non-spindle pole body MTOCs in fission yeast [1]. The interaction of Mto2p with this region of Mto1 is critical for anchoring the cytokinetic actin ring to the medial region of the cell and for proper coordination of mitosis with cytokinesis [2]. 21.40 21.40 21.50 21.40 21.00 21.30 hmmbuild --amino -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.55 0.72 -3.86 9 126 2010-02-22 18:30:33 2010-02-22 18:30:33 2 5 97 0 91 132 0 50.50 39 5.17 CHANGED +WlpRLcELEppLKAER.EARhhD+oGA+cRLp-hptENccLpApLp+p+.pp ........+hl.RLcELpp+L+uER.E.....tRhhDpsuAcpRlpclcsENcpL+ucLph.p.t........ 0 13 35 67 +12644 PF12809 Metallothi_Euk2 Eukaryotic metallothionein Wood V, Coggill P pcc Wood V Family This is a family of eukaryotic metallothioneins. 80.20 80.00 80.20 80.00 78.60 78.40 hmmbuild --amino -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.27 0.72 -3.59 2 16 2012-10-05 18:33:37 2010-02-23 09:22:44 2 1 13 0 2 9 0 75.50 58 72.34 CHANGED sl+.pDCEstCCpcus.pCtSpCh.psStGphCpsph.hGp.pspsChcphpC.........ohs..pCpK.u+pphtKp ClRNQDCEoGCC+cAPspCESHCsEKGSEGSLCQspsFFGQYRsCPChcNLTChY.KstKW.oIsaGpCQKlu+ppLtKp... 0 2 2 2 +12645 PF12810 Gly_rich Glycine rich protein Bateman A agb Jackhmmer:Q183Y9 Family This family of proteins is greatly expanded in Trichomonas vaginalis. The proteins are composed of several glycine rich motifs interspersed through the sequence. Although many proteins have been annotated by similarity in the family these annotations given the biased composition of the sequences these are unlikely to be functionally relevant. 24.50 24.50 24.60 24.80 24.20 24.40 hmmbuild --amino -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.36 0.70 -4.79 103 677 2010-02-23 09:42:22 2010-02-23 09:42:22 2 34 107 0 572 663 155 230.70 22 43.92 CHANGED sCTs.YplpLs.sGhY+hElaGAuGGs.t.............................hpp......ss....h.G......GtGGYssGhlplppppphYlhlGupG............p.s.....tp............h..h...GGasGGGpust..........................t...hu.uuGGG..uTcl+hh...........................ssLtsRl....lVAGGG.GG.u.ss.......st..........t.sp.............................................................................G......GsGG..Gltu....tssh....tt....sp.........ss.s...........uu.sp.puuG..u.............hp...p...........s..t.......sGshshus......ss.....sp.................u.......GGGGGaaGGhuu.............p.....t.s........uu............G.GGGSSal...hsp..su..hh................s..tsslps.......sshhapssthhpss...................tp................pp.................GsGhhpITlLs.h..t.hs..s..phh ................................................................................Y.hph..hG.AtGG...................................................t....uhGs.hh.uhh......l.t.p..t....ha.hhlGtpG.......................p...................................tsh..sG.s.s...s........................................u.uuGGG..uT.l.hh.....................psh..phl....lVA...u..GG..GG.s.s........t.............sp.................................................................................u.....GtGG..uhtutssh.......tt.................st........................susp....h.tsu..tu........................h..p...........t.t.....pGshuhuu.........ss....ss..................................uGGGuGahGGtuu.............................t..ts.....ss...........G..uGGSSal...ts...h....................................................................................................................................................................................................................................................... 0 505 515 542 +12646 PF12811 BaxI_1 Bax inhibitor 1 like Coggill P pcc manual Family The Bax-inhibitor-1 region of the receptor molecules is conserved from bacteria to humans. 22.90 22.30 23.10 23.10 22.80 22.20 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.90 0.70 -5.24 8 694 2012-10-01 20:22:31 2010-02-23 10:16:08 2 3 645 0 171 575 640 247.90 32 92.43 CHANGED M...RcoSNPVFRSL.......PK.ppG.GYA........pFGou.Auuhus..QphstpsYtst.....ppssssRshTIDDVVsKTGITLuVLshsAVVSYFLVssNssLuhPhshlGulGGLslVLlATFGRKtDNPulVLsYAuhEGLFlGulSalhuNlhsuGusu..hIsQAlLGThGVFF..GMLVVYKTGAIRVTPKFTRMlluuLaGVlsLhLGNllLAhF.sGss....LpsGGsLAIhFSLlCIGlAAFSFLlDFDAADQMIRAGAPEKAAWGVALGLTVTLVWLYlEILRLLSYhpN- .................................................................................................................................................................................................................................................ttt.ss..p..p.s....MTlssllsK.T.u.l..h..L..t.l.l..h.s.s...u...s...h.....u....a.....h......h...h.....h......s......p.............s...h....s...h.........h.......h....l.....l...G.u.....l.s.u.hl....l.u.h.lss...F..t..p...ph.sPsh..s..l....h..YAhhEGlhLGulSh.ha.p.......t.......h.su........llhpAl..Lu..Thushh.sh.Lh.lY+.sthI+VTs+FpphlhuuhhGl.hlhhLs.s.hlLshF....s...ss...................hlts.....u...G...s.....l.G.Ilh....Sll..s.lslAAhsLl.lDFDh.h-pulct......tAPcp.......h.tWhsAhGLhVTLVWLYlEILRLLShhp..p........................................ 0 46 106 140 +12647 PF12812 PDZ_1 PDZ-like domain Wood V, Coggill P pcc Pfam-B_17100 (release 24.0) Domain PDZ domains are found in diverse signalling proteins in bacteria, yeasts, plants, insects and vertebrates. this is a family of PDZ-like domains from bacteria, plants and fungi. 28.00 28.00 28.00 28.20 27.90 27.90 hmmbuild --amino -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.45 0.72 -4.09 34 382 2012-10-02 11:12:46 2010-02-23 10:45:31 2 15 206 0 258 391 21 76.80 30 13.96 CHANGED sITPsRaVpVuGAoFHcLSYQ.ARpaslsl+..GVaVucuuGShph......ss...tspuallpplsspsTPsLDsFlcVh+plPDp ..............lpss+hlphsGAsh+clsaptsRph..s..h..sht...shhs.u.c.u.s.GShth.......ph...lssshlIppVsspsTssL-sFlclh+pIPDp....... 0 56 147 227 +12648 PF12813 XPG_I_2 XPG domain containing Wood V, Coggill P pcc Pfam-B_10579 (release 24.0) Domain This family is largely of fungal proteins and is related to the XP-G protein family. 29.20 29.10 29.20 29.10 29.10 29.00 hmmbuild --amino -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.69 0.70 -4.88 26 211 2012-10-01 19:52:02 2010-02-23 11:52:00 2 8 149 0 162 230 5 227.40 18 35.47 CHANGED tssFhVsslhEsL+sp.p...................................aushs.llsGEADsaCAhhA.+phGs.sVLTsDSDLLlHDL.G.pGullFhsol-hss...................................................t.hplpAhphpssslsc+Lul....sl.hlAa.........El.ppcsctshspllcpu+t..spssspppspYhsFlcpY........ts...........ss.thptLDs+loElhhph....................tcptsplYLshLl-stuRpsAWt..pupphR.lAYulhshpt.......hspptssltEhhRp...upchsspplsl ...............................................................................................h.s.hhh.shhpsLpph......................................ss.hhhs...u.EADt.hA....slA..pph.s............C.......sVLo..sDSDahla...cl......psualslsslp.hcs..............................................................................................hl.psphap.sphsp.p.hsh..............Ls..hh..Ah................-h.t..s..h..tp.h.....ph.h.th..stt........t......t...tph.thhp.h...................................................................................................................................................................................................................................................................................................................................................................................... 0 65 86 127 +12649 PF12814 Mcp5_PH Meiotic cell cortex C-terminal pleckstrin homology Wood V, Coggill P pcc Pfam-B_1220 (release 24.0) Domain The PH domain of these largely fungal proteins is necessary for the cortical localisation of the protein during meiosis, since the overall function of the protein is to anchor dynein at the cell cortex during the horsetail phase. During prophase I of fission yeast, horsetail nuclear movement occurs, and this starts when all the telomeres become bundled at the spindle pole body - SPB. Subsequent to this, the nucleus undergoes a dynamic oscillation, resulting in elongated nuclear morphology. Horsetail nuclear movement is thought to be predominantly due to the pulling of astral microtubules that link the SPB to cortical microtubule-attachment sites at the opposite end of the cell; the pulling force is believed to be provided by cytoplasmic dynein and dynactin. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.76 0.71 -4.20 26 320 2012-10-04 00:02:25 2010-02-23 11:54:51 2 30 183 0 235 344 1 121.30 30 9.25 CHANGED sllsAIsQTMlGEahaKYsR+p..p..h..............................tpsRHcRahWlpPYppsLhWSsppPtsussptspspp.lhIpuVtsVcDsNshPpGh.......ap+SIlIhTssRslKlTAsoppRHplWhsALpaLhpp .............................................t.hhpAIspshlG-ahaKYsR+t...t..............................ts+HcRaFW.ls..P..a..pps..LhW..Ssp.pPp.pss....t......hpsp...................p....ltIpu.Vps.V..pD...s...s......s..hP.uh................ap....c..Sll..ll.os.p....R..sl+hTssotp++psWhsuLpaLh.................................... 0 67 124 195 +12650 PF12815 CTD Spt5 C-terminal nonapeptide repeat binding Spt4 Wood V, Coggill P pcc Pfam-B_197031 (release 23.0) Domain The C-terminal domain of the transcription elongation factor protein Spt5 is necessary for binding to Spt4 to form the functional complex that regulates early transcription elongation by RNA polymerase II. The complex may be involved in pre-mRNA processing through its association with mRNA capping enzymes. This CTD domain carries a regular nonapeptide repeat that can be present in up to 18 copies, as in S. pombe [1]. The repeat has a characteristic TPA motif. 25.40 24.80 25.60 25.10 25.00 24.40 hmmbuild --amino -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -12.09 0.71 -3.70 33 296 2012-10-02 12:52:19 2010-02-23 14:16:34 2 28 170 1 213 294 3 130.10 32 17.37 CHANGED uu+TPAasu..........sGu+TPAW...tsGupTssh.h................GuRTPsass....Gu+TPh..................htsGu+TPAauusss...........upTPuast......................................................ssshhuusoPush......sss.TPuu..tusTPu.....................s.uusoP ...............................................................upTPhass...........GuRTPta.....GSpTPh...s...............................GuRTPsat........GopTPh...............................tsGSRTPAauus.s...................spTPu.hstt....................................................................s.....s...ss.....................................................................ss......................................................................................... 0 76 113 178 +12653 PF12816 Vps8 Golgi CORVET complex core vacuolar protein 8 Wood V, Coggill P pcc Pfam-B_90 (release 24.0) Domain Vps8 is one of the Golgi complex components necessary for vacuolar sorting [1]. Eukaryotic cells contain a highly dynamic endo-membrane system, in which individual organelles keep their identity despite continuous vesicle generation and fusion. Vesicles that bud from a donor membrane are targeted and delivered to each individual organelle, where they release their cargo after fusion with the acceptor membrane. Vps8 is the core component of the endosomal tethering complex CORVET (class C core vacuole/endosome tethering). Vps8 co-operates with Vps21-GTP to mediate endosomal clustering in a reaction that is dependent on Vps3. Vps8 is the only CORVET subunit that is enriched on late endosomes, suggesting that it is a marker for the maturation of late endosomes. Late endosomes form intralumenal vesicles, and the resulting multivesicular bodies fuse with the vacuole to release their cargoes [2]. 19.90 19.90 20.90 23.50 18.90 17.20 hmmbuild --amino -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.19 0.71 -5.06 42 312 2010-02-23 17:00:45 2010-02-23 17:00:45 2 13 243 0 230 320 2 182.20 31 13.50 CHANGED pslFL-sLEsaIlpGplpslPPtlhKsLlpaassppphpplEclIspLDssoLDlDpsspLCccasLYDAlIYlaNpslsDYloPLh-Llphltp.tp..............hsts...........ssuhKlasYLuhlLoG+tYPoGcth.spp..cshpuKpplaphLFSupshshs.ssst............cssFPYLphlLcFDspsFLshLspuF ............................................................................slFL-sLEPaIlpsplp.s.l.sP..tlh+sLlsaa.t.spt.........h...h.........pplEphIh+hDhsoLDl.sQ..llpl.....CpcapLYD..AhIYlaNcu.hsD...YloPl...cLlp..llts.hptt................t.hstt........................................s.u.KlhsYlS..hsLs.GRsYPh.Gc.h..sp.p...............h..s.psKppl....apaLhptp.shphs.t................................c..aPal+hLLcFDsppFLssLs.sF......................................................................... 0 86 132 195 +12655 PF12818 Tegument_dsDNA dsDNA viral tegument protein Coggill P pcc Pfam-B_48 (release 24.0) Family This is a family of tegument proteins from double-stranded DNA herpesvirus and related viral species. 20.70 20.70 20.70 20.70 20.50 20.10 hmmbuild --amino -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.51 0.70 -5.53 11 214 2010-02-24 16:15:47 2010-02-24 16:15:47 2 2 24 0 0 95 0 223.90 74 46.53 CHANGED lppuhhHlHSslpshup.QLpshlFpsoLhPtspspuushGhY.sTssu.sushstshpcplttsspspucsLspsGVPVluGFl+sl....pptuthstsslhpsShLsosspstLshsR.thGQhlVsLGsFpPssGsDssPahYp-SshshN+lhpsLcLFtphhsssClSuhtRshG.soslcHLhuLl.tsGhpLalSpLPp-lhstLtussssp.st..lcphVpsaFLNshss.lFLllspcs......spsps.psL-hLppAuclsGCshhlLGcTsspsGl ................................................IHGAYTHVHSSVQRGIR.GLGNLLFHSTLFPGGQTQGALTGLY.ATEPA.LGPRAHSRFRRIFAKGVQ.....QAEMLQGAGVPTLGGFLKTV......RTIATTP...G.NALAVCS.....IS....TTTSKE....CIS..LRRMI.P...QQ.T.VVC.LGR..FEPTD.GPDTYPNLYRDSSDNAVRILETLKLVQRLA..KG..P..IF..SGL..NRSHDPAPVVRHLQALAP.R.TGLELFVSKLPDEV+s+LsAcPuAsssu..VcusVu-HFLNVYCSlVFsVVu-oG....ulPuDhGcTPLElLQRAARLCuCQlsVLGRToEpPGI..... 0 0 0 0 +12656 PF12819 Malectin_like Carbohydrate-binding protein of the ER Coggill P pcc Pfam-B_41 (release 24.0) Domain Malectin is a membrane-anchored protein of the endoplasmic reticulum that recognises and binds Glc2-N-glycan. The domain is found on a number of plant receptor kinases. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild --amino -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.84 0.70 -5.36 155 1073 2012-10-01 23:47:32 2010-02-24 17:09:37 2 67 112 0 617 1235 5 284.20 22 44.21 CHANGED IDCGhsssps..a.-..s.slsasoDs.sahs..sGt...hth....t.t.t.s..h.p.htslR.FP...tsp..+.sCYsl.sh...ttt.+YLlRspFhYGNhp.........s..t.ph....ss.F-LhlGsshWssVph................hhtE.llhss.....t..splslCLlssu....ss..PFISuLElRsL..psshY.................h.ps.sts..LphhtRhsh.uss.................................lRYssDs..aDRhW........s..............ssthpphss.....s......hs..t.st.t..tsPptVhpTAhss..ssss................lphsa......sh.ssss..p............ahlhhHFuElpp...ps.........RpFslhlNst.........shps.hhhstsh..hpshhhph.tst.....hhh.sLttsssS....sl.sPllNAhElapl ............................................................................lsCG................sh.ahsDt..thht......ss.........s.pl.......................h.shR..Fs.............t....p..sY.sh.s....t....pahl.RhhF..h.......sshs............t.............s.a..l.hh.sh....s.hh...h........................hhhE.hh.ss...............s.lplsh.hsps........ss.sFlsslE..lhsh...ss.p.ha.............................ts..Lphh....hRhsh..Gspt................................lphssDs......hsRh..W.........sh...........................ssthtt.hss.............t.p........hp....ststh..hsP..t....V.hpT.......Ahsh...ssst..................hp.hsa........ph...ssss.h.p.............ahlhhaFsElpt...t.................tR.FslhlNsphh......p......sht.s....h..h.t.s...s.h.........ht..hhh.t.h...st..........h.lslt..ss.to........th..ssllNuhElhp...................................................... 0 88 348 486 +12657 PF12820 BRCT_assoc Serine-rich domain associated with BRCT Coggill P pcc Pfam-B_51 (release 24.0) Domain This domain is found on BRCA1 proteins. 21.30 21.30 21.80 21.30 20.00 19.40 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.13 0.71 -4.48 83 1101 2010-02-25 13:43:55 2010-02-25 13:43:55 2 15 761 0 20 1126 0 159.50 55 27.54 CHANGED ADsLstRKEhscQKs..CS-sPR.coQD.lPWITLNSSIQKVNEWFSRSD-hLTS.DsopDttsESNuElAuslEl..Ps-lDGaSuSSEKlDLhAS.-Pcsu..llscSERlpSKPVES.NIEDKIFGKTYRR.KuSLP..NLoHl..TE.sLIlGAh.AsEPQIsQEpPh....TNKLKRKRRT ...........................A-PLpGR+chpKQKssCS-SPR..DoQ-.lsWITLNSSIQKVNEWFSRSD-l.LTS.Dco+DtssE.SNsEsu.sslEl..PspsDGhSuSSEKhDLhAS.-sp.............s.A..LhpcSERspSKPVES...NIEDKIFGKTYRR.KuShP..NLs..+s..TE...sl.llGus......shEPp.lspEp.Ph....TNKLKRKR+h................................. 0 1 2 5 +12658 PF12821 DUF3815 Protein of unknown function (DUF3815) Bateman A agb Jackhmmer:Q183G2 Family This family of membrane proteins is functionally uncharacterised. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.52 0.71 -4.21 94 3677 2012-10-02 11:53:07 2010-02-25 14:38:39 2 9 2742 0 637 2738 47 132.40 26 47.57 CHANGED lphlhualAsluFullhsl..P..+..+tlhhsulsGulGWhlYhlh.h.p...hs.hs..h.hh.ushhuulslulhuphhA+hhKsPsslFhlPu.llPLVP......GstsYpshhthl...................p............sp............httu...........hph....................................hlpslhluuAIAlGlhlsshl ...............................h.phlhuhl.u.sluFu..l..lh..N.s..sh...+t..L..hhsull.G.u..lu.a.h.h.h....hlh..h.p...........hs...hs....h....h....h.uoF..h.u.uh.llGh...l..........u....t.........h..h......u..+...h...h..+.........s........P..s....h..l..a..........hl.su..llPh.VP......Gh.huY.p.uh...hshl...............................................p............ts........................hspu...........h.sp............................................................hhpshh..lssAlu...lGlhlsph.h..................................................................................................... 0 196 382 532 +12659 PF12822 DUF3816 Protein of unknown function (DUF3816) Bateman A, Iyer LM agb Jackhmmer:Q188H8 Family This family of proteins is functionally uncharacterised but are likely to be membrane transporters. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 177 and 208 amino acids in length. A subset of this family is associated with the TM1506 proteins. In this context, transport through the channel is predicted to be regulated by the TM1506 protein by either regulating redox potential or modification of substrates [1] 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild --amino -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.66 0.71 -4.37 172 3851 2012-10-03 02:46:00 2010-02-25 14:48:37 2 6 1995 2 522 3039 57 168.30 20 85.15 CHANGED +pl..............shhullsAluhlLh.........hh.hs....l.........p.hshsplssl..lu.....uhhhGPhsGhhluhl...psllphhh...h.................................................s.hh.hh...................hsshlsthhhslsss....hla..tppppptth.............................................................hhuhhluslhhslssslh......shhlh...h.ha...................h...............................................shhthlhhshlshsllcsllssllshhlh ..................................tlshlulhhAlshllt...........hhs.hs.....l...........p..lshsp..lssl...lu...........uhlhG......PhhGhlluhl....psllph.....l....l.....s..sh....................................................s.......................................hhshlsthl.hu.lssu.........hl.a.p..h.h...p..p..p.s....h....................................................................................................................hhu.l.llu..s..l.hh..o.ls.h....ol...l.............shhhh...........h.las................h.....................................................................slhthl..h..s.s.ll.s.hs.llctllsullhhhl.h.............................................................. 0 216 361 451 +12660 PF12823 DUF3817 Domain of unknown function (DUF3817) Mistry J jm14 Pfam-B_123 (release 24.0) Domain This domain is of unknown function. It is sometimes found adjacent to Pfam:PF07690 and Pfam:PF03176 which are both transporter domains. 27.00 27.00 29.10 28.80 26.10 25.90 hmmbuild --amino -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.16 0.72 -3.55 149 830 2010-02-25 16:16:58 2010-02-25 16:16:58 2 6 649 0 275 668 179 92.30 31 73.20 CHANGED sslpt...aRllAhhEulohllLl.luM.slKY...h...........h............shs....h..slpllG........l.........HGhl.FllYllhshtlshch+Wsh.tchlh.shlAullPFuoFhhE.+hlp...+c ........s.slttaRlhAhlpGlshllLl.luM.slKY...h.....................................s..................shs.....t...sVpllG.......h.s..................HGhl.allYllss.h.luh.ch+Wsh.tph...............lh.shlAuslPFsoFhhEphhp+.p............ 0 92 210 255 +12661 PF12824 MRP-L20 Mitochondrial ribosomal protein subunit L20 Wood V, Coggill P pcc Pfam-B_1364 (release 24.0) Domain This family is the essential mitochondrial ribosomal protein subunit L20 of fungi. 30.10 30.10 30.10 30.20 30.00 30.00 hmmbuild --amino -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.02 0.71 -4.28 34 157 2012-10-04 14:01:12 2010-02-25 16:34:37 2 4 148 0 105 147 0 159.90 28 73.35 CHANGED hhps+spuohp+s+pphp..lpPs.o...................llaNPPuSuPoshp....TPttFL...PtsDsR+phh.ttt.t.s............................spphPslhptcsp.....+p..YpLos-sIpEI++LRtpDPhpao+tpLA+cFssSshFlshls..pss...p++cthcph.Lpslcs+W.us+RthAR-..DRp+R+Eh.W ................................................................................h....p.pss.tptt.thp..h.Pt..................................hhapPssStsos.h....sPh.hFL...PtsDsRRth..........tt.................................................tphPslhpsppt.................+p....apLssp-lpEh++LRtpDPhpaohppLA++Fss...SthFVthls........psst.....p+pp..cph....hpthpppWs.p+phARc...-RphR+phh...................... 0 23 53 87 +12662 PF12825 DUF3818 Domain of unknown function in PX-proteins (DUF3818) Wood V, Coggill P pcc Pfam-B_972 (release 24.0) Domain This domain is found on proteins carrying a PX domain. Its function is unknown. 21.70 21.70 22.70 21.80 20.90 21.60 hmmbuild --amino -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.93 0.70 -5.77 27 228 2010-02-25 17:01:20 2010-02-25 17:01:20 2 11 134 0 189 245 0 268.10 26 34.73 CHANGED sLtclFpEl+ppsslp-LSP.hpsFsEWs+lplAuTlYphFlusD.NS.Ehasp.....s++lHpLhPYslhtpllRhoNPhulMpshlDLFL......................AQP...FG.u+SLLQphFshhLs--l+shcctlcpLcp.plts.....phhhp+lcpal..ss-.....s....hhstl+c-utp.....pshslllsILpssp......p...stlss.....splpclhpSYhsapshhpp..........................sptppstuhhasplKplhplhh+cRDKchMppLhp-.PthspLlKsllolFY-PLl+la+sAslp.ulpsFppFhsDLIphl-phppt..hhhssshpsVpthhsLls+HpssFacFlHclah+Dst.....lFpslhpWlcpllphL+psh .......................l.plhtplttptplppLs..hpphh-ahp...l...p...lAuhlaplFlsp-.su.thhtt..........h+clHplhPYthhpphl+hsNPhshhpuhlclhL......................ups....hG..upsLhQ......phhuhhlp-...s...h..pp..hpct.lctl.tt.plts............hhhpplctas..tscp..........pptl+tc.utt.....pshsll..hslLpsp.....t..s.ls..............tthtpl.ps...a.......................................tt.ha..hpphhphhhppRD+t.hhpl...hp........c...s...h.hphh+s.hh.s..hhYpPlh.c.laptusl..slpshttFhsDhIt......h.........lcth...........p................h.....................t........s................ps.........VpshhsLht+HppthapFlHplhhpsst.....latthhtWhpthlthh+p..s...................................... 1 59 117 167 +12663 PF12826 HHH_2 Helix-hairpin-helix motif Bateman A agb Jackhmmer:DISA_CLOD6 Motif The HhH domain of DisA, a bacterial checkpoint control protein, is a DNA-binding domain [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.82 0.72 -4.09 72 5697 2012-10-03 02:11:09 2010-02-25 17:13:14 2 57 4306 6 1394 5279 3189 61.40 32 9.52 CHANGED clh..hh...LGIRaVGppsA+hLApcFt.ol-sLtpAohE-Lhplc-IGshlApSlhpFFps.psppll ........................+hlhu..LG.I+pVGtpsActL.A.p.....c.Fs..o.......l.......-....s............l..........t....p..........A............o..........h............E..............-..........L...t.....p.....l...s...slGthlA.pslhsaFtptt..t.................... 0 461 920 1195 +12664 PF12827 Peroxin-22 Peroxisomal biogenesis protein family Wood V, Coggill P pcc Pfam-B_15020 (release 24.0) Family Peroxin-22 is a integral peroxisomal membrane protein family. The N-terminus is in the matrix and the C-terminus is in the cytosol. The N-terminus carries a 25-amino acid peroxisome membrane-targeting signal. It interacts with the ubiquitin-conjugating peripheral peroxisomal membrane enzyme Pex4p anchoring it at the peroxisomal membrane. Both Pex proteins are involved at the same stage of peroxisome biogenesis. 21.70 21.70 21.70 63.60 21.40 20.80 hmmbuild --amino -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.56 0.71 -4.39 17 42 2010-02-25 17:17:23 2010-02-25 17:17:23 2 1 41 3 28 39 0 120.70 32 66.90 CHANGED ptsptp.pp+hsscSlslslSpolhssph..ls.hhhsscshlhllsPsls.t...............phphppp.tthtssaKllpCsshp...................GhapllKpL+schLL...................................lssD-ls.s.......lspD...............ls+Flppllsl ........p..tppspcp+hspKShsIl.lScSl.u....Plph..hss--lVhllPPsho.ssh..............t.thtps.pppts.paKlIpCcohp...................GhauslKpLptpphl...................................lsocDhssu.......lPcD...............ls+Fl+plls....... 0 5 15 25 +12665 PF12828 PXB PX-associated Wood V, Coggill P pcc Pfam-B_972 (release24,0) Domain This domain is associated with the PX domain. 20.90 20.90 21.00 21.50 20.70 20.40 hmmbuild --amino -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.67 0.71 -4.56 28 179 2010-02-25 17:31:20 2010-02-25 17:31:20 2 11 129 0 143 195 1 138.40 37 14.82 CHANGED M......................tLossppHaLK+.LlchpLtpElpthspP.....ssLppaGhPFps.t.......s.ppppsp...............................hPlLpahhppFlh..oFPhl..........ptstppcFWps+lQsFlEphsptslSsS.D+p....ploKR+plspKhpshlhLhhsos ..................................s......p.LTspQpHYLK+pLlptQlp.tElppLss.P.......sALppaGhPFpss.......sph..ss.pt....t.p...........................................................................................................................lP.l.L.+alFhcaVh..sFPhL...................ppstpc-.....FWQsKlQs........FlEsFssppl...SsSt...DRt............c..oKR+pLutKhp+lltLhhsS.u........................................... 0 42 84 126 +12666 PF12829 Mhr1 Transcriptional regulation of mitochondrial recombination Wood V, Coggill P pcc Pfam-B_7788 (release 24.0) Family This family is involved in the transcriptional regulation of recombination in the mitochondria, 20.50 20.50 20.50 41.20 20.00 18.70 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.78 0.72 -4.16 24 112 2010-02-25 17:49:44 2010-02-25 17:49:44 2 1 111 0 86 104 0 90.70 37 29.53 CHANGED ausplalF+NlposQVlYSpsPshspppl.pQh.hsshcs+.......sshR+DhW+sMsVlsFs...shphulpsYpsLtchRhhR.pls.tcps.ph++ ........aG.plalacNlcosQVlYS.s.tlcpppl.pQhsh....upcsh......PssLR+DhWpPhsslpFs....ssphG....lpsYptLREh+hhR.pls.sc-h.ph................... 0 13 41 71 +12667 PF12830 Nipped-B_C Sister chromatid cohesion C-terminus Wood V, Coggill P pcc Pfam-B_443 (release 24.0) Domain This domain lies towards the C-terminus of nipped-B or sister chromatid cohesion proteins. 26.40 21.50 27.30 21.60 25.70 21.00 hmmbuild --amino -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.88 0.71 -4.65 57 312 2010-02-26 10:34:20 2010-02-26 10:34:20 2 9 257 0 213 314 3 182.20 28 10.29 CHANGED hsuulsQ+ahpplLc.hshsppsplphhAlcllphll+pGLlpPhpslPslIALpss.spshlpphAhphhpplppKapuhlpsp...hspGlphuapapppl.tp....h...............................ssshhsplapllp.ss+..psRpcFlpslh+hhp.p..............................t.lphhhalspslAhlsaps.-Eslhllppl.....ptl.l ..........................huoulhQhaLcplLc.hhhpppsplphhAlp...........llthhlp.QGLlHPhp....slPhLIA.....LtTs.spsthpphA.p.hpplccKatuhlct.c...hht.G.lphuaphQpsl.ss.pssh.t..................................................................ssuhhutLY..sl.l+..ss+..ppRctF...lpslhpha-tp...................................s.splphhha.lu-NLAhhsYps.-EsLhllppl-hh................................................. 0 73 120 180 +12668 PF12831 FAD_oxidored FAD dependent oxidoreductase Mistry J jm14 PfamB_47 (release 24.0) Family This family of proteins contains FAD dependent oxidoreductases and related proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.48 0.70 -5.40 240 2577 2012-10-10 17:06:42 2010-02-26 14:20:32 2 89 1337 8 853 18895 7151 241.30 22 42.20 CHANGED DVlVhGGGsuGlsAAlsAARtGt....pVlLlE..ppshLGGhsouuhs.sshhu...........h..t........................hhsG.lhpEhhpcl..........tt...t.......................t..........................a-Pcshptlhcphl......tc..s.........slplhhpstl............sslt....t.....p...s.......................c..lpulth..................spt.u....thplpAchalDuTs-GDLsshAGssaph.GcEsps...hsps.....ssh........................hh.......phssh..............................................sptht........h.....hph......thssps.tpth..sh..................stsas.tphph..................thhpt.............................hsup..........hsh...................h-hh.........shscuph..psR..cplh......p.hhtslhpal.....t..................Ghtp....splt..................thu.thhlREoRRlhGpaslTppDl.........hs............t.........ppas......D..uluh.usas.lDhH....ss.pth....................tsh..thtsss..........Ys.I........PaRuLlP.ps....hcNLlluG+slSuoHhAhuohRl.ssshshGpAAGsAAA...l...u....lcpst..........s..s.ppl.......s.....hppLpppL .........................................................................................................................DllVlG.uGsuG..h.sAA.l....s..A..A..c..t....G..t..........cV..l.L.l..E..........c..........p..........s...........h..........l......G.......G......t.....h...s....t....t....h..h............................................................................................................h........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 296 586 747 +12669 PF12832 MFS_1_like MFS_1 like family Wood V, Coggill P pcc Pfam-B_20770 (release 24.0) Domain In fungal members this domain is found at the C-terminus of putative transporter proteins. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.31 0.72 -4.19 39 546 2012-10-03 03:33:39 2010-02-26 14:36:54 2 10 360 0 338 2373 283 74.60 24 14.28 CHANGED .hhhLushYFhaFuslGslhPYhulahc.p.hGassppIGhlhsllhhoclluPslauhluD+huptthllhhushhsh ......................................h.lpsh.Y.Fh.a.a.u...sh....Gs..h..h....P..a..h....slah.....c.....p...hG.....hs.ss.p......l.....Ghlh....u.l.h..........h.....h.ch....lu....ss.l.aGhluD+.h.t.pt.thllhhuhhh..s........................... 1 135 186 282 +12670 PF12833 HTH_18 Helix-turn-helix domain Bateman A agb Jackhmmer:Q17ZT7 Domain \N 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild --amino -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.61 0.72 -3.88 512 57814 2012-10-04 14:01:12 2010-02-26 15:54:55 2 267 4045 19 12899 44029 2241 79.60 26 23.35 CHANGED lApthshSt+pLp+hFp.phh.G.hostpalpphRlppApphLh..p.............s...s...h..s.l.pc..lAhphGFsshupFschF+chhGhoPsph...+pp ..............................................lApphshStppL.p...+hF+.p.h......G..ho...htpalp.phRlpp....Ap.ph..L...p...................................s....s.......h...s..l..sc..lAhps..G.a.s.s...s.tFs....+tF+.+h..h..G.hoPspaRp.......................... 0 4087 7939 10539 +12671 PF12834 Phage_int_SAM_2 Integrase_l_N; Phage integrase, N-terminal Coggill P pcc Pfam-B_50 (release 24.0) Domain This is a family of DNA-binding prophage integrases. It is found largely in Proteobacteria. 21.90 21.90 21.90 22.00 21.80 21.70 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.84 0.72 -3.60 22 357 2012-10-02 14:21:04 2010-02-26 16:41:55 2 6 239 0 81 331 30 87.30 33 31.33 CHANGED MscLsh-hKpLA.+.p.s.u.GSapThpsRh+hhpphuccL.ttLshplpssppLKs+HlEuhlpphhupsIottTLpNcMutlRhhhppAGRspls .................................t.Lttphh.tLA.+..p.s.u..GSa.KT..hpDRh+lhpRhsccL...ht.h.sl..p..lpps..pplKs+HIcsYlpphh.s.p.G......luh+TlpN-MuslRslhptsG+ppl.......... 0 14 32 61 +12672 PF12835 Integrase_1 Integrase Coggill P pcc Pfam-B_50 (release 24.0) Domain This is a family of DNA-binding prophage integrases found in Proteobacteria. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild --amino -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.13 0.71 -4.53 21 412 2012-10-02 14:09:14 2010-02-26 16:45:15 2 7 286 0 101 918 314 159.80 31 55.67 CHANGED GluuASRs.GTKhAIss.-capplLspsctpD.pGlAsslpLuRhhGLRocEAVQsspSL+TWppuLtp..G-s+lpVVFG...TKGGRPRcTpll..-p-plhpslppAlthupppsG+LIc..+PsL....+sAhcpa+shspc.uGLsG....pauPHSLRYAaAp-AhpaahppG.hSp+EAhAhsSMDLGHGDGRGcYltpVYs .......................................................................................................hpthhpt..sp....t......p.....c......t......l...A......s....u.....l..p....Lu.+hh...GLRspEu.......lph.......s.........t.......s.h.......t....................................h....p...........spp...p.l...t...l.h.h.G...........TKG..G.Rs....Rps.......lh..........pp..th...h.p.s........l......p...p.....A.......h...........t..........h....s....t........p...p....s.........s.p...L.Is.......c.s.l...................+ps..h..p......h.....a.c..p.t.h.pc.....h....G....l.s.t............t.h..s...s...HuLRauaAQ.c.th....phh..................tt......s..h......s..........................................ht.thtH................................................................................................. 0 14 48 78 +12673 PF12836 HHH_3 Helix-hairpin-helix motif Bateman A agb Jackhmmer:Q182I3 Domain The HhH domain is a short DNA-binding domain [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.84 0.72 -4.03 335 8248 2012-10-03 02:11:09 2010-02-28 11:38:54 2 52 4067 5 1906 6460 471 64.20 37 14.92 CHANGED VNtVGV.....-lNTASssLLp.p.l.u.Gl..s.sslAcsIVpaRcp.p..G.sFpsRcpLh.cV.tlGsKsaEQsAGFLR ..........................tstV...sl.NTA.....Ssp......Lp..p.l...s..G..l..u..tphApsIls.YR-c..p..G.sFps..h..c-Lp...cV..stlG.sK.shEphtshlp................... 0 618 1220 1609 +12674 PF12837 Fer4_6 4Fe-4S binding domain Bateman A agb Jackhmmer:Q180F8 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 25.00 24.00 25.00 24.00 24.90 23.90 hmmbuild --amino -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.38 0.73 -7.55 0.73 -3.87 236 737 2012-10-03 08:56:43 2010-02-28 11:44:55 2 86 681 0 169 11950 1256 25.10 45 8.00 CHANGED hsphc.tspC.h.......p...C..tp..C..hts..CP.....t....t.....sl ........hthD.cpKC.s.......p...C.....h-.....ChcV..CP....E..................... 0 70 132 154 +12675 PF12838 Fer4_7 4Fe-4S dicluster domain Bateman A agb Jackhmmer:Q182B4 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.21 0.72 -3.58 464 18155 2012-10-03 08:56:43 2010-03-01 13:25:32 2 690 4323 11 4999 30460 7729 57.40 30 15.10 CHANGED pC.......h.....s...C......s.....tC..h..ts..C.................................................P.......hp..s.l.p...........h...p................p...............t......................................t..................t...h....................................t...h...p....s...p...p..........C......h.............t................C.........................s.........t...........Chs.........s..CP..p..ss ............................................................pC.l....s...C.....s.....tC..s..ps..C.........................................................................................................................................P............ts...s.....l.p...........................h.p...........................t....................t.................................................................tt....................t...h........................................................p...l....s...........p....p...............C......h.........................t........................C......................................................................G...........t.............Chp............sCPsth.......................................................... 0 1801 3427 4346 +12677 PF12840 HTH_20 Helix-turn-helix domain Bateman A agb Jackhmmer:Q182F8 Domain This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.71 0.72 -4.17 108 9773 2012-10-04 14:01:12 2010-03-01 16:12:49 2 62 2904 43 3329 15478 1487 61.20 27 39.10 CHANGED sphh....ps.....Lus.ssRhpllptL.................ttpshossplucth.....s......hstss.lphHL.ctLpc.uGL.lpspcptc .........................................hhpA.......Luc.PsRh.p.llph..L.........................tpts.hs..s.s.....cl..spth......s...............lspss.lSpHL.phLpc.uGL.lpsc+pG................... 0 1109 2238 2834 +12678 PF12841 YvrJ YvrJ protein family Bateman A agb Jackhmmer:Q189G4 Family This family of short proteins are related to B. subtilis YvrJ protein. None of the members of this family have been functionally characterised. 27.00 27.00 28.10 27.80 24.50 25.70 hmmbuild --amino -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.72 0.72 -4.66 46 323 2010-03-01 17:10:41 2010-03-01 17:10:41 2 2 270 0 80 239 0 37.80 44 70.26 CHANGED phIuNlGFPIsVulYLLlRlEpKl-pLspuIpcLsptl .........hIuNlGFPIsVolYLLhRlEsKL-sLhpuIpcLspt........ 0 33 65 71 +12679 PF12842 DUF3819 Domain of unknown function (DUF3819) Mistry J jm14 Pfam-B_986 (release 24.0) Domain This is an uncharacterised domain that is found on the CCR4-Not complex component Not1. Not1 is a global regulator of transcription that affects genes positively and negatively and is thought to regulate transcription factor TFIID [1]. 21.50 21.50 25.10 24.60 21.10 19.20 hmmbuild --amino -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.89 0.71 -4.51 61 378 2010-03-02 15:41:44 2010-03-02 15:41:44 2 10 282 0 261 393 2 143.20 37 6.88 CHANGED psp.L+pll.hAls+A......l+EllsPVV-RSVsIAshTTcpllpKDFAhEsDEs+l+pAAphMlpsLAuSLAhlTC+EP...L+ts....lpspl.+sl......lp..tssts............h........-phlphlssDNl-LusslIE+sAhE+Alp-I..-ctltsththR.+ppRp .......................p.L+phlp.Al-RA......lpE..ll.p.PVV-RSlpIAshTTcpllpKDFA......h-sDEs+hRpAAppMs+sLuuuhAhlTC+EP...Lphu....lsspl.+sh....ht.tthtts...........phh.......-pththlssDNl-LuCshIcKsAhEKAh.-l..-ctltsphphR+ptR...................................... 0 98 150 222 +12680 PF12843 DUF3820 Protein of unknown function (DUF3820) Mistry J jm14 Pfam-B_72 (release 24.0) Family This a bacterial family that is functionally uncharacterised. 21.10 21.10 21.20 21.40 21.00 21.00 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.25 0.72 -4.36 77 1116 2010-03-03 15:03:50 2010-03-03 15:03:50 2 5 1053 0 236 562 60 64.00 55 65.46 CHANGED hspctLlcLsptpMPFGKYpG+hLhDLPEtYLlWFtpcG.FPpGcLGphLtlhhElKlNGLEsLlcPL ............M-pEpLlclAssh.MPFGKY.......KGRhLlDLPE-YLLWFARK..s.FPtGcLGcL.hplsL.IKhpGLppLlpPL.......................... 0 53 126 190 +12683 PF12844 HTH_19 Helix-turn-helix domain Bateman A agb Jackhmmer:Q180H4 Domain Members of this family contains a DNA-binding helix-turn-helix domain. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild --amino -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.93 0.72 -3.92 149 14559 2012-10-04 14:01:12 2010-03-03 16:22:39 2 171 3576 20 2358 33000 4379 62.70 22 31.97 CHANGED Gcpl+chRppp..sl.o...hpplup.......phth.....p.....pstlsphEps....pp.phs.....sptl.......hplsphhsls.hp....phhpthpp ..............................ppl+plR.p.p.p.......sl.o.......ppclAp...............pl.sh............s................pstlS.p..hEpG..............pp..pss............hppl....................hpls.p.hh.s.ls.hs.......hh.....p........................................................................... 0 826 1594 1964 +12684 PF12845 TBD TBD domain Bateman A, Masci AM agb [1] Family The Tbk1/Ikki binding domain (TBD) is a 40 amino acid domain able to bind kinases, has been found to be essential for poly(I:C)-induced IRF activation [1]. The domain is found in SINTBAD, TANK and NAP1 protein. This domain is predicted to form an a-helix with residues essential for kinase binding clustering on one side [1]. 21.40 21.40 21.70 22.40 20.70 21.20 hmmbuild --amino -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.66 0.72 -4.41 10 157 2010-03-03 16:48:16 2010-03-03 16:48:16 2 2 39 2 71 128 0 55.70 39 12.77 CHANGED .Ltc-RsNlppAahELKEEhuRlphLupsQs-hLpKLshssssspp...QsssPIpCs-+ ..........lhccpsNlppsahELKEEhp+lphLopsQs-hLpKLphssststt......................p.t.ssPlpCp-c..................... 0 3 6 21 +12685 PF12846 AAA_10 AAA-like domain Bateman A agb Jackhmmer:Q180M9 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null --hand HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.84 0.70 -4.98 149 6551 2012-10-05 12:31:09 2010-03-04 08:58:08 2 39 2755 0 1413 9633 1186 297.10 16 39.53 CHANGED ssshhlhGtsGuGKSshhp.thh..hthhttG............................splhll......Dht.scats.....hsph.......................hsuphlths...........hsss.........htlN.....Ph.....pht.........................................................hppptpthhshLtthhpths.stpt.................pshlsps..lptha...................ptths...............................................................................................................................................sls-lhch..lpp...t..........................................ptphpph.tp.........tlpthh..........sthutlFs....t.oshph..........ssplls..hclsslpts.............tphhshhhhhhhshhh..pphht.............................t...........ptppphlh...........lDEs.athhss.......s....thhphlpphhRps.R+hssshhhsoQsh.sDhhs...t.............pslhstsshthhhptss..pphtt......ltphh...sls.s ...............................................................................................................................................................................................................................................phhlhGsoGu.G.K..o.h.....hhp..hlh.......tt..h..h.h.h..s......................................................................sph.hhh.......D.....t.....sp.h.tt.................hsph........................................................hsuphh.p..h.s...............................ssp...................hthN.............Ph..........................th.................................................................................................tpphph....h..h.....s..h..l...t...h..h.h.t...htstthpt...........................tshl....p..ps.......lpth.h...........................p............................................................................................................................................................................slpp..lh..ph.....lp..pt..................................................................p.thtph..tt........................................................tl.p.hh..................st.ht..h..las................................ttss.h.ph...................psph..hs.......h..-..l...s...p..ltpp..........................................thhs.h.s...h...h..h...l..h.ph.hh.....pphht.................................................................psp.thlh...........lDEs.ath.lps....................t.........htp.h...lt.ph...h..+ps.R....K.hssth.hhsTQ...s...h...sD.lh.tt..................pslhpss...s.hh.lh.h.pp.sp..tp.t.....h.t.h......t.......................................................................................................................................................................................... 0 450 933 1222 +12686 PF12847 Methyltransf_18 Methyltransferase domain Bateman A agb Jackhmmer:Q18BX6 Domain Protein in this family function as methyltransferases. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.66 0.71 -3.49 195 8520 2012-10-10 17:06:42 2010-03-04 15:04:54 2 133 3477 116 2843 74852 20479 109.50 19 36.69 CHANGED sst..pl.lDlGsGsG..phs...lthsp..........stp..lhul.........Dh......stphlphsc..........................pps.........................................tht....splp.....hhp....................uch........hsthst....................................hDh..............lhhss...............hhph......................lp..phh....ptLp.................................s.uGh........lllssh ................................................tpl..lDlG.s.G...s...G......p..h.s......l..th..sc.................................t.u.tc.......lhu..l.....................................Dh.........................u..s...t...h...l...p..h...s.c...........................................................................................................................pph........................................................................................................t......hths.......sp.lp................h.lp...........................................................u.-h..........t.h.s.h.h.tp..........................................................................................hD.h............................l..hh.st.....................................................hhsh....................h...........................lp.....t.hh...............ptLp................................................s...u.Gh...llh...hh.......................................................................................................................................................................................................................... 0 981 1879 2441 +12687 PF12848 ABC_tran_2 ABC transporter Mistry J jm14 Pfam-B_38684 Domain This domain is related to Pfam:PF00005. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.97 0.72 -4.14 246 15857 2012-10-05 12:31:09 2010-03-04 15:58:16 2 25 4624 0 3551 11380 1770 86.00 24 14.83 CHANGED l-l-puclp.tYpG.NYspahpp+ppchptptptacpQQcclp+hppaIcRh+.s...pu........s+up....pAp.SRhKtL-+h....-p...lcts.....p.c....tt..p..hpF ......................h-l-pGcl.p.Y.....p.....G...NYspahcp+ppph...pppt.......p.p....pcppcplpc........p.sa.......lc..+ht.t.........................................u+sp........pu..p...uR....h+tl-....+h........cp....hp.................................................................................. 0 1158 2264 2991 +12688 PF12849 PBP_like_2 PBP superfamily domain Bateman A agb Jackhmmer:Q180B2 Family This domain belongs to the periplasmic binding protein superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.99 0.70 -4.94 148 7526 2012-10-03 15:33:52 2010-03-05 19:02:32 2 29 4026 39 1757 5461 2312 251.70 21 78.69 CHANGED sssssstsh................................................lhsssooshtshhh....p.hhts..............Fppp.ssh..............................p...........lp................h...tusGoupulpthppG....................................sDlshss..t....thscpch..ts.......................hshpt..........hslshssllllss.ts........................ssst.t.....hsspsltpIht.t............................................................................sstslthhh..RucsSG.....h.sphtpt.............................................t.ht...................ttsh...................................................stsht.s................ps.sGhhsslshssth....t.........shshss.....tshhhsh................ts.hph..............................t.....hhs.........spt.....GsaPlhpsh...hhlhhssthh..........tstsps.......Fhsahh.Ssc ................................................................................................................tst.........................t....................lpss.G.o.....osh.t.s..lhp..t..hspt.......................................atp.p...s.s.s................................................p.................ls................................h......pusG...S.us.Glp.p.h.h.sGs...................................................................sDhusus.....t.......................shpp....p..ch.....................................................thhp..h...l.........uhs.........u.....lslslN..hs.................................sslp..t.......Lo.t.pp.lt....p..I....a...t..Gc.....................l........................................ppWpp.........................................h.thss.ppIt.llp....R.ss.s...S.G..TpthF.pphlhp.t...........................................................................................................................................t.ht..........................................t...............ssshtth.........l............pp..suulua.lphu..h...........................................h......................t............................................tpthph.....................................................hts.....................h.tpsht..sssa.P..l..hp.h...ahhsp.p.p...................sttspp.....Flcahh.s..t...................................................................................................................................................................................................................... 0 550 1144 1508 +12689 PF12850 Metallophos_2 Calcineurin-like phosphoesterase superfamily domain Bateman A agb Jackhmmer:Q180F1 Domain Members of this family are part of the Calcineurin-like phosphoesterase superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.18 0.71 -4.39 139 12839 2012-10-02 19:15:56 2010-03-08 14:03:49 2 60 4473 32 3683 17996 3872 206.20 16 75.47 CHANGED M+lullSDoHs..................h.c............h...p....................pll.c..th.....p..p....s-hllHsGD...........................h.....s.............s..............p..l....hp.h.........pp.h...s........lhsVpGNs.............................................D........................tt......h....s.pph................................................................hhpl.s......................sh...................ph.hhsHG.........pt......ht......................................................................h..........ht....p..t.......lh....ttsp.pt.sscll.hhGHoHtsth........................p..p......hs..s...hlhlNPGSsshs+.............ht.........t...ohsllclpss .......................................................................................................................+lhhlSDl...Hh...............................................s.h..p......................t.......h...p................................................p.ll...p..th...............p....p...............sD..h.l.lhs.GD.............................................................l....hs..............................t.........................................................................pph...........hc.tl.................pph..........s.............lh.h.l....t..G..N..c.................................................................................................................D.............................................t..........h...t.......h.....................................................................................................................................................h.......h..p.h.t..........................................st.........................................p.l...h..l..s.H...u..............p........h.s.........................................................................................................................................................................................................ht........................h.s.....p....t....................h.......t.hht....p......t.....s....s....c.....h..h.....l......h..GHs..H....t....s..th.......................................................................h............t.t............h..h...h........s....G...s..................................................t....................................................................................................................................................................................................................................... 0 1293 2430 3172 +12690 PF12851 Tet_JBP TET_DSBH; Oxygenase domain of the 2OGFeDO superfamily Bateman A, Zenonos ZA, Iyer LM, Aravind L pcc Manual Domain A double-stranded beta helix (DSBH) fold domain of the 2-oxoglutarate (2OG)-Fe(II)-dependent dioxygenase (2OGFeDO) superfamily found in various eukaryotes, bacteria and bacteriophages [1]. Members of this family catalyze nucleic acid modifications, such as thymidine hydroxylation during base J synthesis in kinetoplastids [2], and the conversion of 5 methyl-cytosine (5-mC) to 5-hydroxymethyl-cytosine (hmC) [3], or further oxidation to 5-formylcytosine (5fC) and 5-carboxylcytosine (5caC) [4]. Metazoan TET proteins contain a cysteine-rich region inserted into the core of the DSBH fold. Vertebrate TET proteins are oncogenes that are mutated in various myeloid cancers [5]. Fungal and algal versions of this family are linked to a predicted transposase and show lineage-specific expansions [1]. 26.00 26.00 26.00 26.00 25.90 25.30 hmmbuild --amino -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.99 0.71 -4.97 25 327 2012-10-10 13:59:34 2010-03-09 14:18:29 2 10 97 0 200 301 87 374.70 27 38.59 CHANGED uhshhhsshhpsp............h.s.hht.t.p.......htphlpphsshlsslhctlsPphapttsphhsshh................hh.hhupsaoshols...............psptst.H+Dptsh.tshslhsshtts.........hcGGtltlsp........................................................................................................................................................................................................................................................................................................................................................................................................................................................huhslsspsGslllhsuphhhHussslps..................................................................stRholVaah+c ...........................................................................................................................................................................................................................................................................................hYashpt.sp.......p..sR+hch.hpp.tt...................................lcpphpplushluslYcpluPptaps.Q.h.phpphs..c..scl..............sh.p.GpPFSGlTss...............hcFss+sH+Dh+shts.G.hol.lsTLscpp..............p--phal..LPh..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hGluls.spGolll.su+h.hHusTs.lc.p............................................................s...p..pspRlolVaY.+c........................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 64 106 140 +12691 PF12852 Cupin_6 Cupin Mistry J jm14 Pfam-B_918 (release 24.0) Domain This is a family of bacterial and eukaryotic proteins that belong to the Cupin superfamily. Some of the proteins in this family are annotated as being members of the AraC family of transcription factors, in which case this domain corresponds to the ligand binding domain. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild --amino -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.12 0.71 -4.68 212 1675 2012-10-10 13:59:34 2010-03-10 15:02:53 2 5 1105 0 494 1685 112 171.80 25 57.25 CHANGED DsLSc.lLstl+hpushhhpsphsusWul....ph....s...ss...............hphahlhcGpshlp..ss....t.....s....lpLss.GDllllPps...p....sahl....s...ssss..................................................................ts.....................s.s...............................stllsGphphcssts....p.LlssLPsllhl...t...............t..tshlsshlphlttEstpspsGpphllsRLh-lLhlpsLRtalp .........................................................................................................DsLoc.LLphhp...pusl..tps..h.su....sWtl......th......sst.................t..s........hpaH.h.......lhpGs.shLp..hss..................s......p.....hpLpsG-ll.llPps...s..sHpL.....s...ssss..................................................................................................p..............................................................sspllCGphphp.ps.st.....t.llsuLPchlhl........................ss.tshLpth.lthLtpE........upp.....s.t.sGsssll.spLssslhshslRtal..................................................................................................... 0 123 263 390 +12692 PF12853 NADH_u_ox_C C-terminal of NADH-ubiquinone oxidoreductase 21 kDa subunit Coggill P pcc manual Domain This family is the C-terminal domain of NADH-ubiquinone oxidoreductase 21 kDa subunits from fungi. 25.00 25.00 49.60 48.50 20.50 19.90 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.13 0.72 -4.28 10 102 2010-03-10 16:33:19 2010-03-10 16:33:19 2 2 99 0 80 97 0 77.00 57 40.60 CHANGED hhEhhcKlpcGcs.Y..GpSpLsshhQslAsRNSpYStLhlaVlPWFNhsNHs.HGVDhsKYYpptEtEhE................Acpst+tt.p ............M+EMVcKlKcGcPLY..GpSpLosahQGVAARNSRYStLFhpllPWFNhVNHsQHGVDTAKYYpQAEcELE...........................sEp......... 0 20 44 68 +12693 PF12854 PPR_1 PPR repeat Wood V, Coggill P pcc Pfam-B_105542 (release 24.0) Repeat This family matches additional variants of the PPR repeat that were not captured by the model for Pfam:PF01535. The exact function is not known. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.49 0.72 -4.47 433 3378 2012-10-11 20:01:03 2010-03-11 14:22:50 2 1621 145 0 2261 22733 138 33.10 30 7.47 CHANGED pGhpPsslTYssLIsGhC+sGclccAhclhccMp ..........................shtPssloY.....ssL.ls.u.h..C..+...s...G....c.lccA.hc.lhcpM................... 0 277 1399 1896 +12694 PF12855 Ecl1 Life-span regulatory factor Wood V, Coggill P pcc Pfam-B_42039 (release 24.0) Family This family is involved in the chronological life-span of S. cerevisiae. Over-expression leads to an extended viability of wild-type strains, indicating a role in regulation. 26.40 26.40 26.50 26.40 26.10 26.30 hmmbuild --amino -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.91 0.72 -4.66 23 165 2010-03-11 15:50:29 2010-03-11 15:50:29 2 2 112 0 132 156 0 43.20 35 21.05 CHANGED Mu....AFssYChsC-+lh........................sssplYCS-pC+ppDpppshsttp ..............F.paChsC-+Qh.....................................ssstlYCSEsCRhtD.pps......t............ 0 28 66 113 +12695 PF12856 Apc9 Anaphase-promoting complex subunit 9 Coggill P pcc manual Family Apc9 is one of the subunits of the anaphase-promoting complex, or cyclosome [1], which is essential for regulating entry into anaphase and exit from mitosis. The APC is a ubiquitin-protein ligase complex. All APC subunits are members of the cullin family proteins, which bind to a ring-finger subunit via a conserved cullin domain [2]. The APC is made up of four parts, the third of which is a tetratricopeptide repeat arm (TPR) that contains Apc9 [3]. 25.00 25.00 28.70 26.80 24.30 24.30 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.73 0.72 -3.92 15 42 2010-03-12 10:19:30 2010-03-12 10:19:30 2 2 40 0 25 35 0 117.30 28 33.36 CHANGED ssslcs+lhslppscKpt..............................h.tpstp.spYDYulF.scpss.............l+ESpIcuapsuE+hspsllFcps........s.psD.....s..............pssc.ctsp..........tptshl...slPGYTp ...........................................................h....hsh.hslppsph.t..............................slpc...tp.tppYDYSsF.sccss.............l+ES+Icua.pAE+ss+sLlFc+st..................................S.csDh................s..........ppssc.-psp............tssshll..SlPGhsp.................................. 0 4 14 24 +12696 PF12857 TOBE_3 TOBE-like domain Mistry J jm14 Pfam-B_1384 (release 24.0) Domain The TOBE domain [1] (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulfate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.57 0.72 -4.11 101 1134 2012-10-03 20:18:03 2010-03-12 10:22:48 2 2 1112 0 221 704 35 57.20 31 16.08 CHANGED sssp...sslsu.plpplp.shGshs+l..El...phh....ssp.........hlElclspcp......t.thht.GpplhlpP+ ...........ps..hsuslPs.pVlcssPhGthspl...l......psh.....hhs-..........lpl.hst-c........s.pt.G-plals................ 0 46 110 166 +12698 PF12859 Apc1 Anaphase-promoting complex subunit 1 Coggill P pcc manual Family Apc1 is the largest of the subunits of the anaphase-promoting complex or cyclosome. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1,2]. Infection of human fibroblasts with human cytomegalovirus (HCMV) leads to cell cycle dysregulation, which is associated with the inactivation of the anaphase-promoting complex [3]. 25.00 25.00 27.00 26.70 20.50 20.20 hmmbuild --amino -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.48 0.72 -3.81 57 240 2010-03-12 11:41:32 2010-03-12 11:41:32 2 3 221 0 182 249 0 101.60 27 6.01 CHANGED csLsVlh..psphplahh...s........GcsaslsLPFp.Vsssashsp..GLlLpRchs...................ssphtttt.............................................................................sshs+l.aoL...scPLsEhs.Vshps ................................slsllh..psphplahh...s.........GcsallsLPFp.Vsslass.th..GLLlpRpts........................................spphs.sss............................................................................................................p..........pshPph.aoL...hcPLsEhs.llhp....................................................................................................................................... 0 57 95 148 +12699 PF12860 PAS_7 PAS fold Mistry J jm14 Pfam-B_21375 (release 24.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.19 0.71 -4.08 164 2444 2012-10-04 01:10:46 2010-03-12 13:50:07 2 265 808 0 913 2460 395 113.00 23 15.73 CHANGED LcphsQGlslaDs-h+LlsaNp+atclaslPsphl.psGts.hpcllca...spcGphu....sc......h-th....lppcl....p.thpp...tpsphhE+pp.ssGpllclpusPh....Ps.GGaVsoaoDITp..p+cu-p .....................l-plspGlslaD.sctRLlhhNppa.t.cl.a..s.l..s..s.shl.........p.s.G..hs...hp.p.l.lc.ht....tppu.hhs.........tp........................hpph..........h.pph.h................t..t.hpp.....sps...p..p..h..p...h.p.h..sD.G..+............h..l..c....l..psp..s.h............s...s.....G......G......h....V.....h....s.ap.DlTphpptc............................................ 0 225 493 644 +12700 PF12861 zf-Apc11 Anaphase-promoting complex subunit 11 RING-H2 finger Coggill P pcc manual Family Apc11 is one of the subunits of the anaphase-promoting complex or cyclosome [1]. The APC subunits are cullin family proteins with ubiquitin ligase activity [2]. Polyubiquitination marks proteins for degradation by the 26S proteasome and is carried out by a cascade of enzymes that includes ubiquitin-activating enzymes (E1s), ubiquitin-conjugating enzymes (E2s), and ubiquitin ligases (E3s). Apc11 acts as an E3 enzyme and is responsible for recruiting E2s to the APC and for mediating the subsequent transfer of ubiquitin to APC substrates in vivo. In Saccharomyces cerevisiae this RING-H2 finger protein defines the minimal ubiquitin ligase activity of the APC, and the integrity of the RING-H2 finger is essential for budding yeast cell viability [3]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.35 0.72 -4.15 5 247 2012-10-03 15:03:13 2010-03-12 14:30:41 2 9 213 0 172 3252 290 81.50 49 56.19 CHANGED MKVKIpEW+uVATWpWDlPs.................DDVCGICRVuFDGTCPsCKaPGDsCPLVlGp.CsHsFHhHCIh+WLcspoSKGLCPMCRQoFphp- .........................................................MKVpIp.p...Wpu.....VAsW.pW.t..hsp........................................D-sCG..IC..R..hs.....F.....-.....G..s......C.....P......s..C..............K........h........P........G........D.......D.............C..........P....L......l..h.....G............p........C............s..Hs......FHhHCIh+W...l.p....p...p......p.....s....p....t........CPMCR.ppaph........................... 0 56 98 141 +12701 PF12862 Apc5 Anaphase-promoting complex subunit 5 Coggill P pcc JCSG Domain Apc5 is a subunit of the anaphase-promoting complex/cyclosome (APC/C) which is a multi-subunit ubiquitin ligase that mediates the proteolysis of cell cycle proteins in mitosis and G1. Apc5, although it does not harbour a classical RNA binding domain, Apc5 binds the poly(A) binding protein (PABP), which directly binds the internal ribosome entry site (IRES) of growth factor 2 mRNA. PABP was found to enhance IRES-mediated translation, whereas Apc5 over-expression counteracted this effect. In addition to its association with the APC/C complex, Apc5 binds much heavier complexes and co-sediments with the ribosomal fraction [1,2]. The N-terminus of Afi1 serves to stabilise the union between Apc4 and Apc5, both of which lie towards the bottom-front of the APC [3]. This region of the Apc5 member proteins carries a TPR-like motif. 21.00 3.90 21.00 4.60 20.90 -999999.99 hmmbuild --amino -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.05 0.72 -4.15 26 461 2012-10-11 20:01:03 2010-03-12 16:08:58 2 64 292 0 278 1604 191 89.20 21 16.11 CHANGED aalpYLpulpsp-Y.sAl-sLHpYF...DY..hhppsscs............................................tYpaA...LLsLAhLHspF...GctccAlpAlpEAlplARENpDpssLsahLsWlhphhcpp .........................................................................................................................................................................h..t.ph.tuhp.Lhphh.....Dh.......t.tpt.........................................................................................hp.h.A......hl.s..L.A..t.l.ps.ph...Gph..ppA......hts....lpEAl...pl....Ap..c....sp....D.......s.Lthhh.hh.......t.............................................................................................. 0 95 155 224 +12702 PF12863 DUF3821 Domain of unknown function (DUF3821) Coggill P pcc manual Domain This is a domain largely confined to sequences from Methanomicrobiales found on putative lipases. The function is not known. 20.80 20.80 21.00 30.30 20.50 19.40 hmmbuild --amino -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.47 0.70 -4.66 20 31 2010-03-15 09:40:29 2010-03-15 09:40:29 2 7 7 0 23 31 0 210.70 28 29.50 CHANGED sGusVFlGEpGLDlosshs......ssspIuWassuss................................ssssPsphhslss..spsFaVsPshFssps....GsWYthssp.t.........sssFpVc-PslulclhDtsssp-lo..spslspGshlsFpI..cTNLtuh...tpRsusssss....lsIplp........sPsGssassLhss.sGssps................Lssls.....Vss.....................ss.tss..suWsTush.ssust.Y.psGsYslhAcsslNsht- ..GssVFlGEpGLDlosshs.......ssspIuWassGsss...............................ssssPsphhslss..sssFa.VsPshFsscs.....GsWYthsss.t..............ssuFhVp-Pslulcla.Dhs...s..s...pDlo..spplspGshlsF+l..-oNL.ul...hpRsusssss....lsIpVp........sPsGsshosLhss.sussts...........lssls.....lss......hah................tt..suasossh.spssp.Y.psGsYpVhAcsslNsMtD.......................................... 0 11 23 23 +12703 PF12864 DUF3822 Protein of unknown function (DUF3822) Coggill P pcc JCSG_pdb_3hrg Family This is a family of uncharacterised bacterial proteins. However, structural-similarity searches indicate the family takes on an actin-like ATPase fold. 20.40 20.40 21.60 21.00 19.90 19.30 hmmbuild --amino -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.50 0.70 -5.15 46 193 2010-03-15 14:40:07 2010-03-15 14:40:07 2 1 190 1 54 196 147 238.40 24 89.79 CHANGED Mt.t................phshpptp.hp....LSIplshcGhSFsl.....hss.spphhhhpphphp....sstpltpplcphhcpp.phLppsacpVpllhssshaThVPpsLF.-ccphssYLpa..s..hhps-hltach.ltp.shssVash.pslpsalhcpass...hcahHpsosllcthh..pts.pstppcplalplpcpph-lhlhcppcLlhhNsFp...hpo.......scDhlYYlLashcQLsls.-psplhlhGp..l..sc..csphaphlppYl+plp ................................................h................s..pppthp..LoIclutsuhSFsl.......hss.ppp..hhhpphphp....sshsl.tssLcphhppp.phLpp.s..acclpllhsss.hsllPhph.F.cc..cpt..pphhpa..shptpps-hlhhsh.Ltp.sssslashscslpphlp-pasp...hcahptsssllphhh...ppu.ttuspcclYsphcccph-lhsapps+LlhsNoFp...hps.......spDhlYYlLalacQLshst-cscLhLsGp..l....sc...c-plhppLcpalcpl............................. 1 22 47 54 +12705 PF12866 DUF3823 Protein of unknown function (DUF3823) Coggill P pcc JCSG_target_3hn5 Family This is a family of uncharacterised proteins from Bacteroidetes. It has characteristic DN and DR sequence-motifs. The function is not known. 21.80 21.80 21.80 22.20 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.61 0.70 -4.84 28 92 2012-10-02 19:08:27 2010-03-15 16:51:00 2 1 54 3 14 85 2 225.90 25 92.24 CHANGED hllhhhhs...hsuCp...h.DNY-.tPcutlpG+l..hhsGEslthch....s..usplplappG..a.s...ph..sshslhlppDGoa.sshlFsGsY+llhhpGsh..............stDolp...lslp.Gs.sphDhcVpPYhhl..cssshshsGsplsAoFplppssss.....slccltlalspophVs............s..thsptt.shsshhs.sp.hohplslsps...............pshhasRlGspsssstph..YSpll+l .....h.hlhhhhhhsSCp...h.DN.YD.tPp..uslpGcl..hhsGEhlthch....s....usplplhphG..au....pp.....ss.shhlppDGoa.sshlFsGsYclhh.pGsh.............hstDTlp...lplp.Gs..sph-hcVpP...Yhpl..pssphph.s...u...splsApaplppssss..................slpcltlalspsphVs....................spphspts.p.hssths.ss.hshphs...lsss....h.s..........tpphasRlGspss.s....s...p....ph.....YStshcl................... 0 6 13 14 +12706 PF12867 DinB_2 DinB superfamily Bateman A agb Jackhmmer:P72629 Domain The DinB family are an uncharacterised family of potential enzymes. The structure of these proteins is composed of a four helix bundle [1]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild --amino -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.64 0.71 -3.79 220 3722 2012-10-02 14:44:17 2010-03-16 10:45:46 2 34 1437 16 1256 5855 770 137.90 14 54.56 CHANGED th..ppsh.p...phhph..lp....sl.....s.p......pphphp..s...................................s.sst................slt.hltHlsh.....spp..hhh....tp.ht................................st.................s.....................................h........tht.ht.h.......................................s...........shs.pl........h...phhp.phppphhphlps..h....st..tp....h...pp.h.....hh.....................................................t.h......s........htphlth.hhtHptpHhtpl ..........................................................................t.th.ph..hp....sl.....s..p.......pp..ht.hp...s..............................................................................t..stt......................................olt.hlsH.l.sh........hpc...hhh....hp.ht.....................................s.p...............t.s..........................................................h........t.htp.t.p.h.............................................shp..pl...........h...p.h...h....p.p.s....p.pph...h...p...hl....ps...h.........st....tp.......h.....tp.h.......................................................................................................................h.............s..............h.t.t.h.l.h.h..hhtHptpHhtph.................................................................................................................................................... 0 533 952 1146 +12707 PF12868 DUF3824 Domain of unknwon function (DUF3824) Coggill P pcc manual Domain This is a repeating domain found in fungal proteins. It is proline-rich, and the function is not known. 22.70 7.90 22.90 7.90 22.00 7.80 hmmbuild --amino -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.29 0.71 -3.24 11 83 2010-03-16 14:05:24 2010-03-16 14:05:24 2 6 42 0 63 89 0 114.80 25 27.75 CHANGED +pRSRoRDLApAALAAsG.lGYAAHKYo.QRp-RKKt-+ER-+.+aDcD.hppSYt-sY.sPhPYssoP........sssspYYPpoNaFPPPPGosPs..ssssstP.sYNPADYPP.......P..PuAs.sP.psYsY....Pss..PusDsYAPRPRRADENV ......................+SR...sRp........huthul..AAAG....hG...hA..A..tp..hp..p++-+K.........cp-..+cR...ccpcpccc.hppuhpp.s..h..ss.ts..ssss...............st.t.h...st....ssh.as.PPs.t........................................................................................................................................................ 0 9 22 47 +12708 PF12869 tRNA_anti-like tRNA_anti-like Coggill P pcc JCSG_target_390051_3f1z Family This is a family of bacterial, archeael and viral proteins that is related to the tRNA_anti family Pfam:PF01336. The major characteristic of families like tRNA_anti is their OB-fold, and many of them bind DNA. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.93 0.71 -4.71 25 205 2012-10-03 20:18:03 2010-03-16 14:53:54 2 8 185 10 48 201 46 146.90 15 77.05 CHANGED Mt............pphlhhhlhl............hhluhlhs...............................sh.spsphsstptpssh...............shsu.ppLhspapcNpspAsppYh.....sKhIpVpGslssI.pc.tttsss..llhtpp.ts.sslhCphsssp.............ts..lppGpplplpGhspGht...........hsslhLcssh .....................................................................................................................................................................h................hh.hhh...............................................tttt..h..............................................hss.s....plhpsa.p.p.Nt.h.......sA.......s......p.pY.p......sKhlpl.s.G.pl....t..sl..pp..ttssph......hsh.h.s....s..t..t..s........h..sl....p.sthsssp....................t..t.......lppGpplslpGhhpsh.....................h............................................... 1 17 31 40 +12709 PF12870 Lumazine_bd Lumazine-binding domain Coggill P pcc JCSG_target_391926_3k7c Family This is a family of putative lipoproteins from bacteria. Many members of the family are defined as having a lumazine-binding domain. Lumazine is a fluorescent accessory protein having 6,7-dimethyl-8-(1'-D-ribityl) lumazine (DMRL) as its authentic chromophore; it modulates the emission of bacterial luciferase to shorter wavelengths with increasing luminous strength. The family is related to the NTF2-like transpeptidase family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.32 0.72 -4.02 26 650 2012-10-03 02:27:24 2010-03-16 16:51:30 2 9 571 4 86 427 52 96.70 23 43.48 CHANGED suC..ssssPpps....scpahcsltcGchcchhchhs.pspp..........th.thhcthspphppphpphtuhc.hcl.ppp.....sspApVplpssatsupppppshpllK.pc.spWplp ...................................................hs...............................t...p.tth.t....................................................................pl....cp.l...cVh-+s.........sDsAcl..+Vch...s......h........s......s.....s........s..s..p......p......p..p.......l.........L...+...c-...upWcl.h............................ 0 35 67 79 +12710 PF12871 PRP38_assoc Pre-mRNA-splicing factor 38-associated hydrophilic C-term Coggill P pcc manual Domain This domain is a hydrophilic region found at the C-terminus of plant and metazoan pre-mRNA-splicing factor 38 proteins. The function is not known. 22.40 22.40 22.50 22.50 22.30 22.30 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.35 0.72 -11.34 0.72 -3.46 72 139 2010-03-16 17:36:41 2010-03-16 17:36:41 2 24 85 0 82 113 0 102.60 25 19.68 CHANGED s+sssh......c...pc.h..p-tp..ps..p..-...cpt.ccs....s......p......cs....s..ts.hs..ttp............p.s.s..cppsRspp.........................ps..pc.p.ppp...pp...p+p.cs.......cs..pp..R..p+spt......p....pc+....-+....p...Rp.+..sRs...R.......sRsR.D..Rpcc ............................................................................h..shc...ts.hpp.ppt..p..-....-pt.pp................ps....s....s...sp..htp...........................p...tspsRpps...........................................................ph+p.ptp.s.......cp.......sRc..Rs..................+s......pcR.c+sp.......p.....+-R....-R....s.Rc...+.cRc...R...sR.sR.DRcc.t........................ 0 19 37 59 +12711 PF12872 OST-HTH OST_LOTUS; OST-HTH/LOTUS domain Bateman A agb Aravind L Domain A predicted RNA-binding domain found in insect Oskar and vertebrate TDRD5/TDRD7 proteins that nucleate or organize structurally related ribonucleoprotein (RNP) complexes, the polar granule and nuage, is poorly understood [1][2]. The domain adopts the winged helix-turn- helix fold and bind RNA with a potential specificity for dsRNA [1].In eukaryotes this domain is often combined in the same polypeptide with protein-protein- or lipid- interaction domains that might play a role in anchoring these proteins to specific cytoskeletal structures. Thus, proteins with this domain might have a key role in the recognition and localization of dsRNA, including miRNAs, rasiRNAs and piRNAs hybridized to their targets. In other cases, this domain is fused to ubiquitin-binding, E3 ligase and ubiquitin-like domains indicating a previously under-appreciated role for ubiquitination in regulating the assembly and stability of nuage-like RNP complexes. Both bacteria and eukaryotes encode a conserved family of proteins that combines this predicted RNA-binding domain with a previously uncharacterized RNAse domain belonging to the superfamily that includes the 5'->3' nucleases, PIN and NYN domains [1]. 25.00 11.50 25.00 11.60 24.90 11.40 hmmbuild --amino -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.54 0.72 -4.14 234 1690 2012-10-04 14:01:12 2010-03-17 11:05:37 2 67 816 6 797 1608 60 71.30 17 19.68 CHANGED pt.ppltphlpphl..tpspcp....sG.hspluplssphppph..ssFcs+sY..........GappLscLl.p..s.hsch.hcl....ppttssphhl ......................h....thtp.lhpll...t..pttst........t.hh.lupltsphpcph..shs.psY..........GapplpcLl.p....u..hssl..hcl.....tpttt.....hh....................... 0 219 388 594 +12712 PF12873 DUF3825 Domain of unknown function (DUF3825) Aravind L agb Aravind L Domain Potential uncharacterized enzymatic domain associated with bacterial Pfam:PF12872 domains. Has conserved residues suggestive of an enzymatic role probably related to RNA metabolism. 27.00 27.00 30.20 29.50 23.50 21.40 hmmbuild --amino -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.72 0.70 -5.22 20 92 2010-03-17 11:31:21 2010-03-17 11:31:21 2 5 85 0 15 78 3 224.80 31 63.68 CHANGED ppLuplAts.E.........sWthsps........pshsILcsYlphTFpRlhcpsp..............hhhssssspusFNTGLlopptcsIaAlFppsp.........................................tsttsWhhpuFs............tsptphhhpshsplP.shusYac.csp-llF..Dsc.tclpls...h-HI..hs-ph..-RhPsphppht....................................cptltshlpuAlchstp+spRNY+hAVPpaYp.......s+lphLLPlpL...spssps-hALslp+..scph....YcupTILTL.chAYpNARLls+P.-scW ....................................................................LtplA....E...Wt.tss...............ILhsYlhthFpplhcppp..............hhhstcps.hAsFNTGLhopth.psIauhFpppp...........................................pttahFpt....Fh.............pptt.hh....plP.phusY..s...t..p-..llF......Dsc...hshpls...h.Hl..hppp...cRlPphlpphs...........................................cphltphlpuslc.......hhtcphphs.+hslPtaYp.......p+l.lhlPLpL...........ppppsDhslVlc+....ppst....YtspTILs..c.sYpsARllshs.-spW................ 0 7 11 15 +12713 PF12874 zf-met Zinc-finger of C2H2 type Coggill P pcc manual Domain This is a zinc-finger domain with the CxxCx(12)Hx(6)H motif, found in multiple copies in a wide range of proteins from plants to metazoans. Some member proteins, particularly those from plants, are annotated as being RNA-binding. 21.10 13.30 21.10 13.30 21.00 13.20 hmmbuild --amino -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.71 0.73 -7.09 0.73 -3.60 388 5348 2012-10-03 11:22:52 2010-03-17 11:37:56 2 907 368 3 3248 16868 203 24.20 29 5.85 CHANGED hh..CplC.phphsspsshpsHhpu++H ............h.CplC..phphsopsp.hptHhpuppH............ 0 719 1232 2244 +12714 PF12875 DUF3826 Protein of unknown function (DUF3826) Coggill P pcc JCSG_target_393061_3g6i Family This is a putative sugar-binding family. 25.00 25.00 170.90 28.90 20.00 19.80 hmmbuild --amino -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.20 0.71 -4.60 7 68 2010-03-17 11:40:38 2010-03-17 11:40:38 2 2 39 2 14 57 0 174.70 46 54.17 CHANGED tYlcsl.pRupKIVspLslsDssttppVsplIAN+YhcLsDIaptRDs+lKtlKcsh.LststpppAlcttphctDAsLh+sHhpa.ApLShhLs-cQI-tVKDGMTYGsl.lTYcuh.DhlPoLTEEEKtpIatWLsEAREaAMDAcsScKKHAhFsKYKGRINNYLuK+GYDLpKEpc-WhcRhcu ........t.stSl.p+utcIVspLtLsDsptuppVtslIsN+ahclpDhapt+st..ppl.Euh..ssstt.ptLctt.htcsAthhcsHhta.ssLshhLs-cQIEtlKDtMThGhVthThcua.-hlPsLpEEE+upIhtaLhEAREhAlDhcNspph+AhFsKYKG+INNYLsc+Ghsh+pttc-ahc+hKA..... 0 9 14 14 +12715 PF12876 Cellulase-like Sugar-binding cellulase-like Coggill P pcc JCSG_target_394744_3gyc Family This is a putative cellulase family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.91 0.72 -3.54 18 341 2012-10-03 05:44:19 2010-03-17 13:25:12 2 18 274 2 102 613 180 89.30 29 19.74 CHANGED llspatp-sRIlhWDLaNE.Ps...s...............h.s.ttsst...hct.hthLppshthhRslcPspPlTuGh......sss..........shss......hpplp..hpsl.DlIoaHsY ...........lspatsDsRlLhWDLaNE.Ps..s.t..............................t....ptst........hp...t....s....tp.Llppsh.pasR.slcP.s.Q.P.l.T.u..G.s..................Wps..................phss...................ls.thp......h.p.ps..Dl..loaHsY............................................. 0 39 77 90 +12716 PF12877 DUF3827 Domain of unknown function (DUF3827) Zenonos ZA, Mistry J zz2 jackhmmer:Q9HCM3 Family This family contains the Swiss:Q9HCM3 protein which has been found to be fused fused to BRAF gene in many cases of pilocytic astrocytomas. The fusion is due mainly to a tandem duplication of 2 Mb at 7q34 [1-2]. Although nothing is known about the function of Swiss:Q9HCM3 protein, the BRAF protein is a well characterised oncoprotein. It is a serine/threonine protein kinase which is implicated in MAP/ERK signalling, a critical pathway for the regulation of cell division, differentiation and secretion [1-3]. 19.40 19.40 20.60 20.00 19.10 18.60 hmmbuild --amino -o /dev/null HMM SEED 684 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.18 0.70 -6.42 5 150 2010-03-17 13:51:21 2010-03-17 13:51:21 2 4 46 0 75 128 0 484.60 41 45.39 CHANGED PhpPsP-upFQV+TVLQFVPuuVDlRhCsFoQphEKGLhpAa...tEVR+sopushNlTVpIlNIT...slSspsp.p+sPVsIlFAV+sspGFLNGT-VSuLLRQLSAVEFSFYLGYPsLQIAEPFcYPpLNlSpLLRuSWV+TVLLGVh-pplss+sFptohER+LAQLLuElhshu.ppRhRFRRAToVGNsoVQlVpsoRLsGPDNPlELlYFVEttNGcRLsAsuoAclLNolDlQRAAIlLGYRVpGllApPV-+QAcPss-opPNNLWIIlGVVlPVLVVslIIIILYWKLCRo-KLDFQPDTluNlQQRQKLQ.PSVKGFDFAKQHLGQHuKDDlhlIp-PsPL.ss.hccsoPuEsu-lPoPKSKus.pcuoopssR+RGclSPS-uDSpsS-suSsRuus-cusRsussPusssQQ+so+ccupptssPuSGhDEsLSSuSIFEHVDRlSRsuuDus+RlSuKIQLIAMQPMPAPPl.s...s.csStsD+As-NuKVNKEIQsALRHKSEIEHHRNKIRLRAKRKGHYDFPusEDlpuuhGD....spEp-RlYppAQ.QIcKILcPs-cp..SsasEP+KSsRu+RSP+sRR++QsLNGsu...T-tD+DRLIpoDRDGTYR+sPGVcNpAYlusss.plP-spu.S.ocp........spGHsPS.PusLsoQPSIEEuRQQMH.LL--AFALsSPuS ..........................Pt.hhplpT.......VLpFVs.sssht.CpFsQhhEptL.hAh......thhh...ptp.s.hpl......................p.sVslhasVt.stpshLNGo.sS.pLLppLSs..huaYLsYPsLpIAEPhp.YPpLshSphh+s.WVh...T...V.....l.G.......Vspp.lt.p..ptphtRhhtQhLupl.......hht...t......hhh+tssshGs.sVQhVphpR..l.GscsPspLhYas...sGc.L.ustuucllsplD.QRhAlhLtahl..........h.tcPsscs.ssNLWlIsuV.l.hPlhVV..hlIllIlhhhLC....RpsK.-FpPDshhNl.QRtK.............VpGFDaAKQHLGQpstD-.hl.hp.ts.......l.................shP.csu...sp.+phh.......shphtppschp..S.cusSt.sup..Spcput.p.................s.tsht...pt+.pct.......tpt.s..............s..ss..pE...................ths.hs.....................u+s.s-s.cp..suplQLIuhpPhshPss.......p...tspsh.ssclN+.................AL+pKS-IEHaRNKl..RL+AKR+GaY-FPss-s.ps..tc....p...p+h.p...........chh...ssst...usahcs+p....R..phpsp.hRp+Q.lsuss...s-h-..h.hhhppspcGhhpps.ss...ps.hh.....tss....................s.st...........s.......s...s..hhsspPoI-csRppMH.LL--AFuLsuss.................................................................................................................................................... 1 8 17 34 +12718 PF12878 SICA_beta SICA extracellular beta domain Finn RD rdf Manual Domain The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. There can be between 1 and 10 copies of this cysteine-rich domain [1]. 21.70 13.00 23.40 13.10 21.60 12.90 hmmbuild --amino -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.91 0.71 -4.46 262 470 2012-10-01 20:19:39 2010-03-17 14:32:34 2 34 2 0 456 482 0 170.50 28 62.21 CHANGED oLCsplpCAup+.a..........hpppptp....ssss.................s...sa.W..pp....hpsclppLhpphspstp....pps......sss.hCsphtsss...............tspspKpACpalsAGLc+l.h......................pt...ssus..................pssp.FcQTMGClhL+tYAKpl+c..pu...............hCsI.-p.......GIc+A....Fpps.p..shhpss......Cs.......sssssCh.Cph......csc..ac...s.CpIs ...........................shCsplpCsspp.a....hppp.ttt...tsss......................ssh.Wpp......hpsclppLhpplspstppss.....sss..hCsshssts..............tspspKpACphlsAGLcclh................ph.......ssss.....................ss..acQTMuChh.LptYAccl+cpu............tC....sI.-p.....GIccA....Fpps.sph.hpss........Cs.......sssssCh.Cph......cps...ac...sCpI....................................................... 1 0 0 0 +12719 PF12879 SICA_C SICA C-terminal inner membrane domain Finn RD rdf Manual Domain The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. The C-terminal domain is thought to remain in the erythrocyte, found juxtaposition to the single transmembrane domain. To date, all full length proteins contain a single copy of this domain [1]. 21.90 21.90 32.80 21.90 18.30 21.40 hmmbuild --amino -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.66 0.71 -4.30 3 127 2010-03-17 14:33:42 2010-03-17 14:33:42 2 24 6 0 123 132 0 131.10 61 13.13 CHANGED KYFG.LhR+tRRY...+RuPspAssPSVQEQlLDHVEEAG......PHEYRLVKERKPsSsPsRTKRS.......GtVNRRTIIcIHFEVLDECQKGDTQLNQKDFLELLVcEFMGSEhM.EEEQVPKEEVLMESlshclVPhE-.VPSLSSGFMV ..............................................................KYFGPLGKGG.RF...RRSPs-IP.GsSVQEQVLDHVpp.su........sHEYpLVKERKPRS..u..PTRTKRS...........GtVNRRTIIEIHFEVLDECQK.....GDTQLNQK.DFLELLVpEFMGSEh..M...EEEQ..........VPK.....E-..VL.....M.....E.....ul.....P..........h.....E......VPhE.....c.....VPsLGSshh.......................... 0 12 12 17 +12721 PF12881 NUT_N NUT protein N terminus Zenonos ZA, Mistry J zz2 jackhmmer:Q86Y26 Family This family includes the NUT protein. The gene encoding for NUT protein (Nuclear Testis protein) is found fused to BRD3 or BRD4 genes, in some aggressive types of carcinoma, due to chromosomal translocations [1-2]. Proteins of the BRD family contain two bromodomains that bind transcriptionally active chromatin through associations with acetylated histones H3 and H4 [1-2]. Such proteins are crucial for the regulation of cell cycle progression. On the other hand, little is known about NUT protein. NUT is known to have a Nuclear Export Sequence (NES) as well as a Nuclear Localization Signal (NLS), both located towards the C-terminal end of the protein [1-2]. A fused NUT-GFP protein showed either cytoplasmic or nuclear localization, suggesting that it is subject to nuclear/cytoplasmic shuttling. Consistent with this possibility, treatment with leptomycin B an inhibitor of CRM1-dependent nuclear export resulted in re-distribution of NUT-GFP to the nucleus [3-4]. Inspection of NUT revealed a C-terminal sequence similar to known nuclear export sequences (NES) which are often regulated by phosphorylation [3-4]. 18.10 18.10 18.10 18.90 17.30 17.30 hmmbuild --amino -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.29 0.70 -5.30 4 167 2010-03-17 15:11:40 2010-03-17 15:11:40 2 10 28 0 77 136 0 260.30 49 37.96 CHANGED sh.GPsMohpPGsuLSsFsALPFhPPsPsPscpP.hEPss.PllsushSPuNPLlLSAhPSs.LVTtpGGsu.SuAGsupVhVpl+T-sGPscsuQsQNllLTQssLshpsPGs.CGGltsP.PP.alTAusVpsllsupsVGsoQ..EG....GLPh..ssPPsAQLsPIVs.tpuhPsPpGspGEGGPsus.ppsS.sDhupts+uVYENFRpWQ+YKsLARRHLsQSPDsEALSCFLIPVLRSLARhKPTMTLEEGL.RAlQEWp+TSNFDRMIFYEMAEKFhEFEAtEEMQIQpsQlMpGsQsLsPsss.+LDP.Gs.APEVspQPVY ...............................t.shshpPusu.ss.ssLPh..PsssPscts.........s.h.ssshsPssPLhL.sshPps.LVstpsu...s...sus..Gssp.lhlph+o-stsspsspsQshlLTQssLshpAPGs...CG.Gs.sP.sP.hhlsAuss..shhsups......lG.soQ..s..-G..s.G....LP.....sPPPsAQLsPIl.s..puhPhPpGspu...EGu.sss.pt.u.sDpSspsKSVYENFRhWQ+aKsLARRHLPQSPDsEALSCFLIPVLRSLARhKPTMTLEEGL.hAhpEWp+pSNFDRMIFYEMAEKFhEFEAtE.......E.......MQhQ.......p........Q..hhpGspshsPsss....+h-P.GssAsclspQPs................................... 1 8 9 20 +12722 PF12882 NUT_C NUT protein C terminal Zenonos ZA, Mistry J zz2 jackhmmer:Q86Y26 Family This family includes the NUT protein. The gene encoding for NUT protein (Nuclear Testis protein) is found fused to BRD3 or BRD4 genes, in some aggressive types of carcinoma, due to chromosomal translocations [1-2]. Proteins of the BRD family contain two bromodomains that bind transcriptionally active chromatin through associations with acetylated histones H3 and H4 [1-2]. Such proteins are crucial for the regulation of cell cycle progression. On the other hand, little is known about NUT protein. NUT is known to have a Nuclear Export Sequence (NES) as well as a Nuclear Localization Signal (NLS) [1-2], both located C-terminal to this domain. A fused NUT-GFP protein showed either cytoplasmic or nuclear localization, suggesting that it is subject to nuclear/cytoplasmic shuttling. Consistent with this possibility, treatment with leptomycin B an inhibitor of CRM1-dependent nuclear export resulted in re-distribution of NUT-GFP to the nucleus [3-4]. Inspection of NUT revealed a C-terminal sequence similar to known nuclear export sequences (NES) which are often regulated by phosphorylation [3-4]. 18.10 18.10 52.90 20.10 17.60 17.90 hmmbuild --amino -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.77 0.71 -4.59 5 230 2010-03-17 15:28:47 2010-03-17 15:28:47 2 8 26 0 96 163 0 127.70 39 25.81 CHANGED PRL+ssRPQ+PPps+sPpEIPPEuVKEYlDIM-.L.G....hTGEScs.+EE-G.spssQEE--LPuDstLLSYpc+LCSQKlFVSpVEAlIpPQFlu-LLSP-sptD.LALppsLEQEEGLTLAQ...LlpK+pPsLcccusAEAsPs ...............................................th...+sp.p.PspsctPp.hPs.ssp-hhDIMp.h.......sG-s.s.pppct..p.tpp.E.E.-.th.sDssLLSYhccLCSQc.FlopV.EAlIcPpFlppL.LSP-sphD...hA....LppcL..EQE..EGL.olAQ..............lhpK.+hh.hcp.t.sct.sP......................................... 1 14 14 23 +12723 PF12883 DUF3828 Protein of unknown function (DUF3828) Coggill P pcc JCSG_target_392985_3kzt Family This is a family of bacterial proteins of unknown function. 24.40 24.40 24.50 24.50 24.00 24.30 hmmbuild --amino -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.51 0.71 -4.19 23 492 2010-03-17 15:29:03 2010-03-17 15:29:03 2 4 451 2 44 225 5 115.80 42 47.98 CHANGED P-s.sspsFYpaY..lpphsp......spsshtssss.....lccYVupsslpcLpt..ph...pp-hh-uDYFhpsQDass.-Wlsplplspuph.sssshVtVphG.p..spshplhlhlp+EsGtWKIhcVpsss ...............................s.pQsV+phYpsY..hosh..........ssshhs-Tsp...........ct..hsStcl.ptLsLsssL........sshsh..lDhD...PhscsQDauc.hh...l.c.s.lslsp..scs.spu.cAsV.chpl.......h.....ps-cc+ps.....slp..hltE.s.GRWhIs-lsss.c................................. 0 6 14 28 +12724 PF12884 TORC_N Transducer of regulated CREB activity, N terminus Zenonos ZA, Mistry J zz2 jackhmmer:Q6UUV7 Family This family includes the N terminal region of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) [2]. The proteins display a highly conserved predicted N-terminal coiled-coil domain and an invariant sequence matching a protein kinase A (PKA) phosphorylation consensus sequence (RKXS) [1]. The coiled-coil structure interacts with the bZIP domain of CREB [2]. This interaction may occur via ionic bonds because it is disrupted under high-salt conditions [3]. In addition to CREB-binding, the N-terminal region plays a role in the tetramer formation of TORCs [2], but the physiological function of the multimeric complex has not been clarified yet. 25.00 25.00 26.30 30.90 23.70 24.10 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.42 0.72 -3.44 13 158 2010-03-17 15:32:13 2010-03-17 15:32:13 2 5 63 0 75 136 0 64.30 56 11.44 CHANGED NPRKFSEKIALppQKQAEETAsFEcIM+-Vpuo+hspppspppp........................s..s.u.thhGGSLP .NPRKFSEKIALHsQKQAEETAAFE-lMpDlssTRhpttplppsp...........................spu..paYGGSLP......................................................... 0 15 25 48 +12725 PF12885 TORC_M Transducer of regulated CREB activity middle domain Zenonos ZA, Mistry J zz2 jackhmmer:Q6UUV7 Family This family includes the region between the N and C terminus of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) [1]. Although the C- and N- terminal domains of these proteins have been well characterised [1-2], no functional role has been assigned to the central region, yet. 21.90 21.90 22.50 24.70 21.30 18.60 hmmbuild --amino -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.19 0.71 -4.45 10 208 2010-03-17 15:34:46 2010-03-17 15:34:46 2 7 51 0 92 168 0 147.20 52 26.60 CHANGED RTNSDSALHQSshsPsPQDsFsG.GuQslpspch...lLho................sPstE-s..ph-KcpsKQhW-tKK..ss.ouRP+SC-VPGINIFPSPDQphosSlhPuAHNTGGSLPDLTNIQFPPPLPTPLDP-Ds.sasohSuusSousLssshTHLGIouuup ..................RTNSDSALHpSsMsPssQ-sasG..usps....hh...p+h........hh.s...................................sP..s..h.EEsh..ps-+sh.cQ.W-sKK...os.uSRP+.SC....E....V...PGI....NIFPSs-Qp.sss.sh.hPushNTGGSLPDLTNlHFPsPLPTPLDPE.-s...sas...uLS..uusSTu.NLssshTH..LGIuus..t................................. 0 11 20 40 +12726 PF12886 TORC_C Transducer of regulated CREB activity, C terminus Zenonos ZA, Mistry J zz2 jackhmmer:Q6UUV7 Family This family includes the C terminal region of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) [2]. The C terminus region is negatively charged, resembling the transcription activation domains. When this domain, from all three human TORC proteins, was expressed as fusion proteins with the DNA-binding domain of GAL4 (GAL4-BD), and tested for induction of a minimal promoter linked to GAL4-binding sites (UAS-GAL4), UAS-GAL4 was potently induced by GAL4-BD fusions containing the C-terminal portion of all three human TORCs [1]. 21.50 21.50 21.50 22.10 20.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.59 0.72 -3.80 8 195 2010-03-17 15:36:07 2010-03-17 15:36:07 2 8 60 0 94 170 0 73.50 55 12.73 CHANGED PNIILT...........sDSsP..uLSKDluuALAGVsshsaDu-s....FsL-DEL+..l-PLuLDGLpMLS.DPDhhLsDPAlEDSFRuDRL .....................................PNIILT...........s-.SsP..uLSK.-lsuuLAGVs-hsh...s..s..-s....FsL-..D.EL+..l-PLoL.....DGLpMLs.DPs.h.lLsD.PulEDoFRtDRL.... 0 13 21 43 +12727 PF12887 SICA_alpha SICA extracellular alpha domain Finn RD rdf Manual Domain The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. This domain is typically found at the N-terminus, with 1 or 2 copies per protein. The domain is cysteine-rich domain and similar to PFAM:PF12878 [1]. 21.70 13.00 23.20 13.10 20.80 12.90 hmmbuild --amino -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.20 0.71 -4.54 71 105 2012-10-01 20:19:39 2010-03-17 16:07:03 2 21 2 0 103 131 1 180.80 26 21.68 CHANGED sssLhppWhpph........hpss...ut.s.spss.........ppIsscL.+cpLcctaccLp..shLp.p.sp.us........ElsshCus.............hsttt.s...ss..t..pp...ph.KplCKullcl+YFMuGlcsppp..........cp.tsthsthpshctYsRClVGtlALsclYG-HCclccllptlpsplc....pplpsp..tspss..........hspCp.s.lshp-lhhG+slLsspIcpW ................s.stlhptWhpph.tts.t.s.s............tplhpcl.+cpLcchaccLp..phLp..t.tpst.........ElsshCss...............hhttt.sst...t..pp....h.KplCKullcl+YahuGlcpptp............cp.tsthpshpshctYhR...ClVGtlA...Lscl..Y..G..-......H......Cclp.cllpplpsplc............pplptptspss..........hspCc..s.lshpcLhh.G+slLtspIcpW............................................................................................... 0 0 0 0 +12728 PF12888 Lipid_bd Lipid-binding putative hydrolase Coggill P pcc JCSG_target_393040_3h3i Family This is a small family of lipid-binding proteins found in Bacteroidetes. 21.50 21.50 21.50 31.70 21.30 19.50 hmmbuild --amino -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.42 0.71 -4.25 5 26 2012-10-03 05:44:19 2010-03-17 17:49:49 2 1 23 3 6 26 1 114.50 33 67.72 CHANGED lGAGalphsTYNTAANssTEMWL-D..huNhWssKlKVsVDYsuRTFouTG....lusplsuDsKVslT.....DGKVLcGAATTPSGMPADSIVahlpFsDDssGhTYKVuGFRRTGFPADD ......................................hstthphpTYNTsssssscMWlc............hsshas..h+sKlssDhsupoFssss....hsst....h.psDs...p..lslT.........................-GKVlhsuuTT..uGsssDSIha.hpFosss..GhTYplsGaR+TGastD-.......... 0 3 6 6 +12729 PF12889 DUF3829 Protein of unknown function (DUF3829) Coggill P pcc JCSG_target_393163_3iee Family This is a small family of proteins from several bacterial species, whose function is not known. It may, however, be related to the GvpL_GvpF family of proteins, Pfam:PF06386. 25.00 25.00 25.30 25.30 24.60 24.60 hmmbuild --amino -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.78 0.70 -5.21 47 668 2010-03-17 18:27:44 2010-03-17 18:27:44 2 4 523 3 45 381 4 246.60 42 80.92 CHANGED ssspthpKhNsYlp.hhNplt...tshhpshtpYhshhtchcpst.ptpphhh.sh.th.......cpshpthcps.......ht.psshtslDpss.....pshhsshpplh.shhschpsYappcsYpcDshs+ucpL...psplhtthppapsshpshpstlpchpccppttphpt........hcp.pscththtthphh....tpscpl...ht.h.ttpp.tthchtshp................ttlschpptlpshcphhtt.thpt.........thhpshspapssspphlpcl+s...................hh....tsshsphhpsYNphlsshNpht ...................................................tttshscKhssYIc.saNplp.......hsl.pulscYsch......hc..Dh+pGP.TGcEph..lhslhsl..........htpCpcthKps.......sALsPshpslDusA.....luYI-uAsALusTI.NEM-KYYsQ-NYKDDAFAKGKsL...HpohlKsl-sFcslucpYcsuIpchs-c+phspLKp........lE-.pE..GKohp....YYo.LslM.....loAKQI.....NslI.op-p...FDscAhh................KcluELEohls.....phKchspss......................uFlsS.AspYQhpsKKalRRlRDpV.......hpphpssss.uahs.........p-Sastsl+pYNEhVDsYNph............................................ 0 13 26 38 +12730 PF12890 DHOase Dihydro-orotase-like Coggill P pcc JCSG_target_393237 Family This is a small family of dihydro-orotase-like proteins from various bacteria. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild --amino -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.86 0.71 -4.51 4 46 2012-10-03 00:45:34 2010-03-17 18:34:02 2 1 46 0 6 1447 801 171.40 49 50.21 CHANGED K+lllshlshLu..LsthuhpAQsLhsT...........TWsAYGloFpAP........................AulsVE-DoEE...........ualhssssaYlTlQhL-uEGhK+.u-LsptLKshAsDDpVTsQouVpsFELPQFaGs.L+G...sCEs-+ClYuhLLsKsuusGFYlSIlYocEs ................................................................................................KLlsPGLlDlHVHLREP.GsptKETIpoGohAA.................................A+GGaTTlsAM..PNT..p.P....sPDphEphp.ltpt....................IpcpuplpVLPauuI.T..V.c.psGpEhsDht..........................sLhchGAF.A.F..TDDGVGV.QsAuMMhcA.MK+AAcL.c.h.A.l.V.A.HC.ED..s..o.L.h.s.p.Gs...V.HE.G...c.hu...c..c..a....G.......L.pGI......PSlC...EoV...H...I.......A......RDl.L....L....A...E......A.A.uC.....H..........YHVCHlSTKp.................................... 0 0 4 5 +12731 PF12891 Glyco_hydro_44 Glycoside hydrolase family 44 Coggill P, Bateman A pcc JCSG_target_393205 Domain This is a family of bacterial glycoside hydrolases formerly known as cellulase family J, and now known as Cel44A. It is one of the major enzymatic components of the cellulosome of Clostridium thermocellum strain F1 and of many other Firmicutes. 25.10 24.70 25.10 24.80 24.90 24.50 hmmbuild --amino -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.85 0.70 -4.86 27 92 2012-10-03 05:44:19 2010-03-18 13:56:41 2 21 82 14 41 96 9 206.30 32 25.61 CHANGED NtsNuGsDWhaps............sphssPutssspahcpshttGs.hshlTlphhGYVutDs.s.s..hspstshPss+asp.......................lthspssshuhsPc.............hsss.lYhsEa......Vsalhs+..hGsu..sssGl+hYuLDNEPsLWss..............THsclHPpslohsElls+slphA+slKshDPsAplhGPs...taGasuYhshss...sss.......ss..scusap.al-aYLcph..+pspcssGhRLLDVLDlHaYPpup ........................................................................h.NsusDWh.ps................t.suthhpthhpp.sh.t.ss..shhTl.hhGaVutct.........ttshsss+att.......................hh.ttss.hs.sPs.............h.ss.hYhs-a.....lphlhpp.....hGtu....stsulpuYplDNEPs.LWsp..............TH.clHPsssThsElls+shshApAlKssDPsAplhGPu...taGatu..Yts.ts......t.t........sp.....p.sshp.alsaaLcph...+ptpptt.GhRLLDhhDlHaYPps................... 0 25 33 36 +12732 PF12892 FctA T surface-antigen of pili Coggill P pcc pdb_3gld Repeat The FCT and equivalent region genes of Streptococcus pyogenes and other related bacteria encode surface proteins that include fibronectin- and collagen-binding proteins and the serological markers known as T antigens. Some of these proteins give rise to pilus-like appendages [1]. The FctA family is found in many Firmicutes and related bacteria. In S. pyogenes, the pili have a role in bacterial adherence and colonisation of human tissues [2]. 22.30 12.50 22.30 12.50 22.20 12.40 hmmbuild --amino -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.20 0.72 -3.18 46 1757 2012-10-02 19:08:27 2010-03-19 11:32:17 2 125 375 11 237 1642 91 84.80 22 26.02 CHANGED YpYslpEp.p..ssh.sG..lsYDs.p.phplsVp.Vs-ssps.thhhs.thhhsh...pt.s.p..pt..sh.tFsNo.a.......sss...s.....Lplp..KplsGps.......cscpF..sFslphp .................................................apYslpE......h......t....s......s.......su.....lsYDs...t..p.hpl.sVp..Vsc.......p......s..pG.....th.....h......s....h..sp........................tt.t..............tt...................tF.sNs.Y.........................sss....p............hphp.....K.l...pG...t...t.........tttp...a....pF.l...t............................................................... 1 65 125 164 +12733 PF12893 Lumazine_bd_2 Putative lumazine-binding Coggill P pcc JCSG_target_391417 Family This is a family of uncharacterised proteins. However, the family belongs to the NTF2-like superfamily of various enzymes, and some of the members of the family are putative dehydrogenases. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild --amino -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.59 0.71 -3.55 61 306 2012-10-03 02:27:24 2010-03-19 16:13:43 2 10 241 22 137 612 378 116.00 21 75.47 CHANGED s-hpulpsslpp.Yh-Ghhpu.DsstLcpsFHscA...tlhsht.pup...hhthshcpa.hshlpsp......tssusshptpIhsl-lsG.ssAhs+lcsphh..sh............papDaLsLl+.h-GpWpIlsKsap .............................................s.p.pslppslpp.Yh-u.h.....tpu..DsstlcpsFpssu.............th.hshp..psp.............lp..t...h...s.h.sp.a...hshlppt...........ts..tt....t...p..t.p........tpl......p..lcls..u..shAhsclphphh.....st..............................pa....s....Dh........h....s..Ll.K.h-GpWpIlsKha................................................. 0 49 89 114 +12734 PF12894 Apc4_WD40 Anaphase-promoting complex subunit 4 WD40 domain Coggill P pcc manual Domain Apc4 contains an N-terminal propeller-shaped WD40 domain.The N-terminus of Afi1 serves to stabilise the union between Apc4 and Apc5, both of which lie towards the bottom-front of the APC, 21.10 13.40 21.10 13.40 21.00 13.30 hmmbuild --amino -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.08 0.72 -4.46 29 282 2012-10-05 17:30:43 2010-03-22 10:40:44 2 24 200 0 193 362 3 47.10 26 6.94 CHANGED ph.p.ls-Kslssp........lp...hhsasPpMDLlAluoccsplhlaRL.NhQ+........l.......as ...........................hspp........lp...hhsasPp.hD.LlAlus.........p.cGp...lhlaRl..shp+........lh....................... 0 65 98 152 +12735 PF12895 Apc3 Anaphase-promoting complex, cyclosome, subunit 3 Coggill P pcc manual Domain Apc3, otherwise known as Cdc27, is one of the subunits of the anaphase-promoting complex or cyclosome. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1, 2]. The protein members of this family contain TPR repeats just as those of Apc7 do, and it appears that these TPR units bind the C-termini of the APC co-activators CDH1 and CDC20 [3]. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild --amino -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.20 0.72 -3.85 111 1548 2012-10-11 20:01:03 2010-03-22 11:07:53 2 603 877 14 798 4682 1088 82.70 18 13.86 CHANGED pptpacsAlFhu-+lhshss..........p...psh........ahh....Apshappspa....ppA.hpll..............tphtt.ps...htspY.....Lh.A..pshh.cLpc...ac-AlssLtps ...........................................................................t...tappAlh.hhc+h.hp.sst...................p.....pst....................................hhh........Apsa.app.spa.......pcA.hphhpp...................................hphps..pp.............ps.ta...................lh...u.......pshh..phpc....hc-Alphlp..h..................................... 0 291 477 677 +12736 PF12896 Apc4 Anaphase-promoting complex, cyclosome, subunit 4 Coggill P pcc manual Domain Apc4 is one of the larger of the subunits of the anaphase-promoting complex or cyclosome. This family represents the long domain downstream of the WD40 repeat/s that are present on the Apc4 subunits. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1,2]. Results in C.elegans show that the primary essential role of the spindle assembly checkpoint is not in the chromosome segregation process itself but rather in delaying anaphase onset until all chromosomes are properly attached to the spindle. the APC/C is likely to be required for all metaphase-to-anaphase transitions in a multicellular organism [3]. 21.40 21.40 22.60 22.00 20.90 20.00 hmmbuild --amino -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.18 0.70 -11.14 0.70 -5.03 36 264 2010-03-22 11:31:27 2010-03-22 11:31:27 2 10 228 0 186 273 1 197.50 24 26.91 CHANGED pLphlthshshh.p..ttphlhplAppssplpsLlpYlppshpplppta..cshhphh....cchhsshtpt..pt................p.tsshs.sphhcllhoGhss..slc-aLh..spLuE+GhK+hp+ssssuapslpcllhppLlsAhERhhllLscLpGlu+ap.t...........ltl.sspplsch......lpsspslhhhsacllhplspEhctF......pt.FhpWlph.hlchhss-..sp ..........................................t...hh.hchphl.p..th.tlhtlApphsplpsLlpYlppshpphpptW......cshh..h......spplsphst...pt......................................................sspsslt.schhcLllhGpsss.tLpp.aLh.......spL.s-..........+GhK+hspulpsuapslpcLlhppL.s...u...hE.......phhhhLu........cLpGhupap.ph....p.........lGL.sss..tlpps...............................lpsssshhlhspcllhslspphppF.......ps.FhpWLhh..l.hhtpp....t....................................................... 0 59 98 150 +12737 PF12897 Aminotran_MocR Alanine-glyoxylate amino-transferase Coggill P pcc JCSG_target_390749_3ez1 Family These proteins catalyse the reversible transfer of an amino group from the amino acid substrate to an acceptor alpha-keto acid [1]. They require pyridoxal 5'-phosphate (PLP) as a cofactor to catalyse this reaction. Trans-amination reactions are of central importance in amino acid metabolism and in links to carbohydrate and fat metabolism. This class of aminotransferases acts as dimers in a head-to-tail configuration [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild --amino -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.33 0.70 -5.92 5 444 2012-10-02 18:26:03 2010-03-22 16:02:22 2 1 437 8 94 3944 814 413.70 53 97.54 CHANGED MTssAhsDALspAcAAY-sFKARGLKLNMQRGQPADADFDLSNGLLolLGEsDsRh.DGoDLRNYPGGVAGLPSARALFApYLDVKuENVlVWNNSSLELQGhVLTFALLHGlRGSsGPW..lu-..KPKhIVTlPGYDRHFLLLETLGFELLTVDMQsDGPDlDAVERLAAsDASVKGILFVPTYSNPGGETISsEKARRLAulpAAAPDFTIFADDAYRVHHLhuEu-+DsPVNFVuLuRDAGHPDRAFVFASTSKITFAGAGLGFVASSEDNIuWLuKYLGAQSIGPNKVEQARHVKFLp-YcsGLEGLMRDHAcLIAPKFcAVsEsLcsELG-.GGcYATWToPRGGYFISLDTlDPVA-RVVcLA-cAGVSLTPAGATYPuGpDPHNRNIRLAPTRPPL-EVcTAMcGVAsCV+LAoEEYRAGp+ .............................................................................................................................................................................ho.--Lssh+pplppcYscLpA+sLsLDlTRGKPSscQL...DL.SssLL.s.l....s.s.s.s....hss...-G.s.D.s.RNY.GGhcGlPElRplFuElLG......ls.....s-plIAusNS...S.L.slMa.D.llsau.hla.Gsssut+....PW.......spp....tpV..KFLCP.VPGYD..R.H...FA.I.oE.ph..G...I...EMIs..V...P..M.s...p.....c.....G....P....D....h....D....h....V.....c....c........L.......V.......A..........t..........D......P.........s.....l........K.........G...................h......W.s........VPhY.uN.PsGsoYS--T.VR+Ls.p.M.....p.s..A.A.P.D..F..R..I..a..W..D.N.A.Y..........A..l...H...p.....L...........t....D.t............p.........s.......h............h............s.......l.....L....t........h....u..p.......c...A.....G.N......P.........s.....R...s..a....h..F..A..S.T..........S.K.I.T.F..A.....G...u..G..Vu..F..l..A..u......S...p.........s.......N.........l....s...........W...........a...h...p...a...h....u.h.c...o.I..G...P..D.K...l.N.Q...L...R..H...s.+.F.F....t.....D..s..-.....G....V....+...t.H...M...c...+..H...t...pI...L....A.P....KFt...h....V.h-.lL...-...c....+....L....u.....s.t...s..l...A....o.W..T.c.P.p..G..G..Y..F..I......S....L...D........V...h.......-....G.........T......A..p....R....l...V....u....L....A..K...-...A...G...lu...L..T.s..A...G...A..o.a..P.....Y...t...+...D....P...c...D...+..N..IRlA.PS...a.Ps.l.-ELcsAh-ul.u.s.CV.hLAAsE+LL...pt............................................................................................................................................................................................ 0 32 71 88 +12738 PF12898 Stc1 Stc1 domain Bateman A agb Jackhmmer:O94276 Domain The domain contains 8 conserved cysteines that may bind to zinc. In S. pombe this protein acts as a protein linker which links the chromatin modifying CLRC complex to RNAi by tethering it to the RITS complex. The region is reported as a LIM domain here, but has a slightly different arrangement of its CxxC pairs from the Pfam LIM domain Pfam:PF00412, hence why it is not part of that family [1]. The tandem zinc-finger structure could mediate protein-protein interactions. 24.80 22.00 24.80 22.30 24.50 21.80 hmmbuild --amino -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.53 0.72 -3.92 21 74 2010-03-22 17:08:22 2010-03-22 17:08:22 2 3 71 0 62 73 5 83.60 29 26.08 CHANGED KC.tsC+Kh+spssFSp+QLcchp+shhsp..hpshsspshspCppCs..us.pssEL....................pCs.hCs+sKuL-tFSKsQR+c.-ss..+ChsClpt ....................cC.thstchpshstFSppQlcphppthhpp......thhstpshhtCppCs..up.pssEl..............................pCs.hCs.+s+sL-tFSKsQR+p.scss.........+ChsCsp........ 0 16 33 49 +12739 PF12899 Glyco_hydro_100 Alkaline and neutral invertase Coggill P pcc manual Domain This is a family of bacterial and plant alkaline and neutral invertases, EC:3.2.1.26, previously known as Invertase_neut Pfam:PF04853. 23.60 23.60 23.60 23.60 23.50 23.40 hmmbuild --amino -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.66 0.70 -6.09 20 347 2012-10-03 02:33:51 2010-03-22 17:10:24 2 7 135 0 171 342 258 358.00 51 75.19 CHANGED pAWc.lL-colVaYpGpPlGTlAAhDs.ss...csLNYsQlFlRDFVPSuLsFLlpG..csEIVRNFLhhTLpLQupc+plDsap.GpGlMPASFKVh....ctt.....cEhLhADFG-pAIGRVsPVDSuLWWIILLRAYs+sTGDhohucsP-sQpGl+LILcLCLsssF-haPTLLVsDGusMIDRRMGlaGaPLEIQuLFYuALRsAppLLp.ss.....pss.pp..hlppls........pRLpsLshHlRpYYWLDhp+LNpIYRaKTEEYGc.s.u.hNcFNIhP-SIP.sWLh-.WLPpcGGYLlGNluPuRhDFRFFuLGNhlAIlouLAotpQupAIhcLlEp+W-DLlGcMPhKICaPAlEscEWRIlTGsDPKNpPWSYHNGGSWPsLLWhhsAAslK.............sGRsp.lAc......+Alpl.AccRLs+DcWPEYYDG+sGRhlGKQARpaQTWoIuGaLlA+tLl-.sP ..........................................................................................................................................................AhchhcpulV.apGpslGTlAA.D..sp...p.LNYsp.lF.lRDFlPSulsaLhpG..c.-IV+NFLhhoLpLQ..................u.........hc+......................hD.ap.upGlhPASFKl.........p.....tp-hL.sDFGppAIGRVuPVDSuhWWIILLRAYsK.oG.....Dhsltcp.-sQpGh+hILpLCLsc.uFDhaPoLLssDGssMIDRRMG..........laGaPlEIQ.uLFa..AL+suh.phLt...p.....t.ts..pp...hlptls..................pRLpALoaHhRpYaWlDhpplNpI.Y........Ra+TEEYSp.s.A.hNKF.NlhP-p.IP.sW.l.hD.ahPpc..G.G..YhIGNlpPA+MDFRaFsLGNhhuI.lSSLuTscQ.spuIhcLlEt+Wp-L.l..up......M..PlK.I.....C...YPAl..E..s..c..E..W...+...Il...T...G...sDPKNT..WSYHNGGSWPs..L..LW....h..ssAslK.........................hGRsp.....lAc.......+....A....ltl...uEpRL.cD..p....W..P..EY.Y..Ds+pG+a..lGK.QuRhaQTWoIAGaLlu+hhLcsP...................... 0 28 99 142 +12740 PF12900 Pyridox_ox_2 Pyridoxamine 5'-phosphate oxidase Mistry J jm14 jcsg_3fkh Family Pyridoxamine 5'-phosphate oxidase is a FMN flavoprotein that catalyses the oxidation of pyridoxamine-5-P (PMP) and pyridoxine-5-P (PNP) to pyridoxal-5-P (PLP). This entry contains several pyridoxamine 5'-phosphate oxidases, and related proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.85 0.71 -4.36 153 1956 2012-10-02 11:35:36 2010-03-23 10:43:58 2 14 1393 22 640 2144 210 137.20 22 77.98 CHANGED cppphhplLc.sshlu+luhss....s....st..PhllPssashs......ssp......lhh+.sus...uu..+hhpshp............t.....sshsstp....hDs...hshsps.........s.a.SVllpGp.uchl...p-spEctpshpthhpph.hPt....ap..thc.......tthtt..sthlclphsphou+t ............................pphhplLp..ps.p.h.s......+luhss....s........st........Phl..lPlsashp.................ssp....................lYh..H..sut.....su..+hhchlp...........................tt....ssh.shsp.....................h.cs........hh...spp.......................tht.a.p....SVl.spGp.sphl.................p..c..t.p....E.p..t..t...ulchl...hp..p....h...sPt.......hp...h..............tt.htt.....ssll.+lphpphou+.................................................................... 0 204 440 569 +12741 PF12901 SUZ-C SUZ-C motif Bateman A, Aravind L agb Aravind L Domain The SUZ-C domain is a conserved motif found in one or more copies in several RNA-binding proteins. It is always found at the C-terminus of the protein and appear to be required for localization of the protein to specific subcellular structures. It was first characterized in the C.elegans protein Szy-20 which localizes to the centrosome. It is widely distributed in eukaryotes. 21.10 21.10 21.30 21.10 20.30 21.00 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.64 0.72 -4.27 26 299 2010-03-24 10:52:53 2010-03-24 10:52:53 2 12 94 0 157 272 0 32.90 42 6.69 CHANGED phpthshssups.p.sVlRpP+GPD..uop.GFphpR ....................tphphssuss...sVlRpP+GPD..sop.GFptpp... 1 27 41 91 +12742 PF12902 Ferritin-like Ferritin-like Coggill P pcc JCSG_target_390707_3hli Domain This is a family of bacterial ferritin-like substances that also includes a C-terminal domain of VioB, polyketide synthase enzymes, that make up one of the key components of the violacein biosynthesis pathway. Violacein is a purple-coloured, broad-spectrum antibacterial pigment. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.78 0.70 -4.75 38 153 2012-10-01 21:25:29 2010-03-24 11:50:23 2 10 120 2 80 218 6 215.90 25 39.49 CHANGED LppAl.plEhuTlP.YLsAhYSl.csssstp................................stphltslshEEMlHhslssNlLsAlGus.....Ptlst.p.....hPsYPsp.LP..tht...........shphsLtshohsslp.pFhpIEpP-............................................................oIGpFY.psl............hcul...ppLstpht...stsh..........Gc....tcpls.p.a..............stlhsVssh.soAhpAlshIh-QGEGsstss...........-..........sc.uHYh+Fpclhp ...................................LptAlplEhuTlP.YLsAhaSl..psststp...........................................shphlpslshEEMlHhslsuNlLsAl....GGs.....Pplst.th.........hssYPss.LP..th.t..............................shp..lsLtthohttlp...hFht.I...EpPp......................................................................................................t...th.o.IGt.hY.psl............hpsh.......tth.tth.........th....................Gt..........tpp.hh..t..h...........................................................s..h...lp...sh.psshtulp.IhpQGEGss.ss.........................................t.uHa.pF.pl..h.............................................................................................................. 1 40 50 67 +12743 PF12903 DUF3830 Protein of unknown function (DUF3830) Coggill P pcc JCSG_target_392654_3kop Family This is a family of bacterial and archaeal proteins, the structure for one of whose members has been characterised. PDB:3kop from Swiss:A0JVT3 probably adopts a new hexameric form compared to previous structures. The putative active is near the domain interface. 3kop is most closely related, structurally to PDB:1zx8, where the potential active site is located near residues E51 and Y53 (conserved in 1zx8). Beyond the two residues above, the other residues are not conserved. Also the shape of the active site differs from that of 1zx8. PDB:1zx8 belongs to family DUF369. Pfam:PF04126, which is part of the cyclophilin-like clan. 21.30 21.30 21.30 21.90 21.00 20.60 hmmbuild --amino -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.04 0.71 -4.82 13 130 2012-10-02 15:38:38 2010-03-24 14:34:14 2 1 126 6 52 135 16 142.90 36 93.05 CHANGED lTL-KRGVossA+LLD-cAPcTCsAVW-uL..PhuGplaHuKYARNEIYsLlPsFs......sscPstENsTVTPIPGDlsaFsFps..lss.uaGYcssstststss.lsDLAlFYGRNNLLlNGDsGWVPGNVFATIsEGL-....chApACpDlWhsGspGEoLoasRA .............plsttuhshhAchh--cAPpTstAhhchL...PhpsphhHs+auGptlas.Lss....as.......hts.GhENsTspP.sGDllaa.........................sG.ho-htIhhuhsshhhsuchG.lsGNhFhTI.s.-.Gh-....pltphspclhhcGAps.shp................................................................................ 0 12 33 42 +12744 PF12904 Collagen_bind_2 Putative collagen-binding domain of a collagenase Coggill P pcc JCSG_target_393067_3kzs Domain This domain is likely to be the collagen-binding domain of a family of bacterial collagenase enzymes. It is the C-terminal part of the PDB:3kzs structure determined from Swiss:Q8A905 (information derived from TOPSAN). 21.70 21.70 22.30 21.80 21.40 21.30 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.15 0.72 -4.20 12 120 2012-10-02 17:35:21 2010-03-24 14:49:05 2 4 80 4 53 113 4 92.40 34 19.50 CHANGED lPDQSllstsNGcchc+hhATRGpD..YhhVY.sasG+shplchuKIuGcchcAhWasP+sGctphIGpFcN..sstsFpssut...pGsDWVLllcs .....................PDpSlls.s...t..s......sp+.hs+hlAoRupD..YhhVY.shs.GcshplsLscl.uG......sp..t.sAhWasP+sGchp...h.....l..G...p.h.....s.....s.......s.ht......sFps.Pus...spGNDW.lLllp... 0 28 43 51 +12745 PF12905 Glyco_hydro_101 Endo-alpha-N-acetylgalactosaminidase Coggill P pcc Jackhammer Domain Virulence of pathogenic organisms such as the Gram-positive Streptococcus pneumoniae is largely determined by the ability to degrade host glycoproteins and to metabolise the resultant carbohydrates. This family is the enzymatic region, EC:3.2.1.97, of the cell surface proteins that specifically cleave Gal-beta-1,3-GalNAc-alpha-Ser/Thr (T-antigen, galacto-N-biose), the core 1 type O-linked glycan common to mucin glycoproteins. This reaction is exemplified by the S. pneumoniae protein Swiss:B2DRU5, where Asp764 is the catalytic nucleophile-base and Glu796 the catalytic proton donor. 25.00 25.00 25.30 25.30 19.50 19.30 hmmbuild --amino -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.58 0.70 -5.85 3 455 2012-10-03 05:44:19 2010-03-24 15:08:58 2 39 408 3 19 333 4 408.30 56 27.05 CHANGED DsKlDWQDGAIAYRKIMp-PlGuE-VsNpVGYRIsMNFGSQAQNPFL+oLDsVKKluLsTDGLGQuVLLKGYuuEGHDSGHPDY.GDIGpRhGGsKDhNTLIccGKEYNApFGVHINAoETYPEAKtFssDhl....DsssuhGWGWLDQuYsINpctDLpSG..pRFKRL.DhLtscsPDLDFIYVDVWssNQ......WsS+QLu-cINDhGWRLuTEaGGuhEpaSTWQHWAuDhsYGG.p.KGINScIhRFIKNHQ+DSWluNaPcVGGsAD+PLLGGaphtsFEGWQoR+Da-ua..I-shFssNLPTKFLQHYpITpWoNsEulc.Ksshpp.......hEssLKDsSsssslslpRK........DshRpRVhTLNGNsIh-Gt.GDcpYLlPWs.cQshKssoA-SEKLYHWNspGGTTTWTLP-GWpussTVYVYELTDLGRTcVKEV ...........................DpKVDWQDuAIAaRs..IMN......N...PpGhEcVt-lsshRIshNFuSQApNPFLhTLDslK+IsLtTDGLGQtlL...L.KGYsuEGHDSuH.sY.us.IG+RhGGhcD......hcs......LIEc.u+KYsAchGIHVNAoEoYPEuK...aF.NE....cIL..............sss..Y..ph...GWsWL..D..QuhpI.Dt.uhDLupG.....cLhch..c.h.K.....+....h....u...........s..s.LD...aIYVDVWGNutp....suWso+hLA...KEINpp.GWRhuhEWupuhEYDSsapHWAADhsYG...G..........h.TNKGh.NSsIsRFIRNHQKDsWlsD...hts.....s.PLLGGhsh..c-FEGWpG+....oDYNuY......lsNlFs+sl.TKahQHaslopWEs...G..s.ss.......ssp.....................hpsplVsssss...c.Vs...Voct.................Pphp.RTlTLN....Gpsl..........cG...............................................uuYLhPWs................sssp.-.KhYYaNspsGuoTWoLPssWAt...opVaLYcLTDQG+sc.t..ph................................................. 0 9 12 17 +12746 PF12906 RINGv RING-variant domain Bateman A agb Bateman A Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.35 0.72 -3.92 26 1986 2012-10-03 15:03:13 2010-03-24 18:10:39 2 31 326 2 1289 1950 106 50.00 37 10.43 CHANGED CRIChpstspss........slhpPCpCpGolphVHpsCLppWlsp........psspp.....CclC ....................CRICh...pp.spppp............................Lh.p.PCp.Cp...........GolcalHp........sC....LpcWlpp......................psspp...............CElC............................. 0 402 693 1007 +12747 PF12907 zf-met2 Zinc-binding Coggill P pcc manual Family This is small family of metazoan zinc-binding proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild --amino -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.03 0.72 -3.99 13 251 2012-10-03 11:22:52 2010-03-24 18:12:56 2 6 164 0 171 237 2 36.30 39 40.41 CHANGED slhCclChpTFhsTsststLpEHA-sKHPK..sshpsCFPph .....hpCplC+...hphss.tp.....h+pHhEuKHPK..ss.................. 1 59 101 143 +12749 PF12909 DUF3832 Protein of unknown function (DUF3832) Coggill P pcc JCSG_target_391895_3k6q Domain This is a family of proteins from bacteria and archaea of unknwon function. The N-terminal part of the structure from Swiss:Q0AZ30 shows remote homology to the N-terminus of the bacterial toxin/antitoxin 'addiction module', and the C-terminus is distantly related to the TTHA1013/TTHA0281 superfamily. 21.80 21.80 22.40 22.30 21.50 21.20 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.86 0.72 -3.92 12 47 2012-10-02 16:06:15 2010-03-26 16:28:33 2 2 38 4 18 50 1 91.30 29 62.79 CHANGED L.pshphpsphht-csGpl.olslspl-..lhssusTh-pAhpcLl-sLh-YupDYhschphahp.uPNRppHaPYlh.................plhhps.sp-clppll ........hh..pshphssplhpEsDGol.olsLspl-..lhususoh--AhscLlcsLl-YAcDahs-hphahs.uPNR+cHhPYlh.................plhlpc.sc-plppll........................... 1 11 14 17 +12750 PF12910 RelB_N Antitoxin of toxin-antitoxin stability system N-terminal Coggill P pcc JCSG_target_391895_3k6q Domain This domain appears to be the N-terminus of the RelB antitoxin of toxin-antitoxin stability system or prevent-host death system. Together RelE toxin and the RelB antitoxin form a non-toxic complex. Although toxin-antitoxin gene cassettes were first found in plasmids, it is clear that these loci are abundant in free-living prokaryotes, including many pathogenic bacteria, and these toxin-antitoxin loci provide a control mechanism that helps free-living prokaryotes cope with nutritional stress [1,2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.21 0.72 -4.60 97 364 2012-10-03 00:18:00 2010-03-26 17:37:23 2 5 297 4 132 2153 384 44.10 28 52.34 CHANGED M......pslsho-ARspLppllDpsspscpsl...IsR.p..sppsslllSh-php ...............M...tphNhp-A+spLopLl.-ps.t.p.Gc.p.sl...I..s..+.t....scssshllshp..................... 0 38 80 111 +12751 PF12911 OppC_N N-terminal TM domain of oligopeptide transport permease C Coggill P pcc Pfam-B_1473 (release 24.0) Domain Oligopeptide permeases (Opp) have been identified in numerous gram-negative and -positive bacteria. These transport systems belong to the superfamily of highly conserved ATP-binding cassette transporters. Typically, Opp importers comprise a complex of five proteins. The oligopeptide-binding protein OppA is responsible for the capture of peptides from the external medium. Two integral highly hydrophobic membrane spanning proteins, OppB and OppC, form a channel through the membrane used for peptide translocation. This N-terminal domain appears to be the first TM domain of the molecule [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.74 0.72 -4.59 398 11398 2010-03-26 17:47:28 2010-03-26 17:47:28 2 16 3658 0 2635 7363 2094 54.70 23 17.25 CHANGED shoh.hp..-sW..+Rh++NKhAlluLhlllllllhA.lhu.Phl..uP....a...................sp.s.hs.......ht...hsPu .......................hp.....phh.......cphh...+...s+...hA.h.h.ulh.l.l...llll.l.hu...l...h...u...Phl........us........a........s.........st.s....hs............h.......sP.................................................... 1 740 1563 2104 +12752 PF12912 N_NLPC_P60 N-term_NLPC_P60; NLPC_P60 stabilising domain, N term Coggill P pcc Pfam-B_845 (release 24.0) Domain This domain, at the N-terminus, appears to be the stabilising domain for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The next domain is an SH3b1, the third an SH3b2 and the last, the C-terminal region, the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN). 21.90 21.90 22.00 22.00 21.30 21.50 hmmbuild --amino -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.57 0.71 -4.02 25 228 2010-03-29 10:40:00 2010-03-29 11:40:00 2 6 225 2 40 175 4 120.00 31 26.33 CHANGED hhhhlhsslLh..suCus+ss.s......................h.....tDlphhPQ-hpsYsps..........lhsttpptt.spcapp+aFuPWppspsphstp-lhhshpth...tpppu...YuENhp.hs.shhcshhpsAshcsa .................................................................hahshhllhh..suCupKshs...............................................pslShLPQ.spshshs..............................sccYp+haF.......uPWcss.hh.....s.....hps.....ps......lFWs.ashh...........hssps.......Yh.Ntp.hshuaacphIpNAslpca............ 0 14 33 38 +12753 PF12913 SH3_6 SH3 domain of the SH3b1 type Coggill P pcc JCSG_target_405735_3m1u Family This domain appears to be an SH3 domain of the SH3b1-type, and is just C-terminal to an N-terminal domain that is probably the stabilising domain for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The next domain is an SH3b2 and the last, the C-terminal region, is the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN). 20.90 20.90 21.00 22.50 20.70 20.20 hmmbuild --amino -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.41 0.72 -4.73 53 435 2012-10-02 18:48:24 2010-03-29 13:38:20 2 10 430 2 70 327 10 52.70 44 11.45 CHANGED PhFtsPp.sGpGaPF.DhhQpSslhsGoPVhlsHhSpDt....sWhaVpos.hshGWlcu .............hapsPp.tsEGhPF.DhhQpShLpsGTPVhlhHhS+Dp....cWtaVhos.sshGWVcS... 0 19 49 61 +12754 PF12914 SH3_7 SH3 domain of SH3b2 type Coggill P pcc JCSG_target_405735_3m1u Domain This domain appears to be an SH3 domain of the SH3b2-type, and is the second SH3 domain to be found, downstream of an N-terminal domain that is probably the stabilising domain, for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The last, the C-terminal region, is the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN). 21.30 21.30 21.70 21.30 20.60 20.40 hmmbuild --amino -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.20 0.72 -4.19 41 382 2010-03-29 12:38:44 2010-03-29 13:38:44 2 5 381 2 47 261 5 49.80 32 10.79 CHANGED scphlppapsh..p.ahsllp-psslh...-ppGpahhpu+lGslhPlhspstst .........spchlp.atpL...p.hhs.lpsplsla....stsGsaaFpuRhGslhPhhcpcts........ 0 12 30 39 +12755 PF12915 DUF3833 Protein of unknown function (DUF3833) Mistry J jm14 Pfam-B_259 (release 24.0) Family This is a family of uncharacterised proteins found in Proteobacteria. 25.00 25.00 25.50 25.40 20.80 19.70 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.69 0.71 -5.01 105 426 2010-03-29 12:42:56 2010-03-29 13:42:56 2 1 405 0 121 374 1121 163.30 40 91.88 CHANGED hhs.hhLsuC..uu..slsD.Ytsp.sPphcLcpaFsGclpAaGhlpchsGcVhRRFsVclsusW....c....GsphsLcEcFhYsDGp....pppRlWpLstsus....GpapGoAsDVlGpAp........GptsGs.AlpWpYslcLPl.....-usshpVshDDWMYLhc-ssllN+ophpKFGlcVGclsLshcK .....................h.hhhLsuC....uu..pls-Y..psp.pPphDLtpaFsGphpAaGhlQDhoG+lsRRFpVslpGph....-....Gs..phsLs.EcFlasD..Gc....pppRlWplp....+t.u-......spYpGs....AsDllG...pAp........GptsGN.AlpWpYshpLsl.....-usshpVpFDDWMYhhD-pplhNcophpKFGlcVGclTLhFcK........ 0 24 63 93 +12756 PF12916 DUF3834 Protein of unknown function (DUF3834) Coggill P pcc JCSG_target_400673 Family This family is likely to be related to solute-binding lipo-proteins. 20.80 20.80 20.80 21.10 20.60 20.30 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.34 0.71 -5.31 10 19 2012-10-03 15:33:52 2010-03-31 17:06:15 2 2 13 1 18 29 4 188.30 31 82.80 CHANGED M...........+llsAP..GPVSYPLIAu.hhcpcDl-I..hFuK.cGpuDV..VLDShVSLsKp...Gl+Ishsll+cLhsIhPclup.+IulWRKGSAADlLsRsllDlpsh+uElVYu-D.ppllcMLscGclsoAllu.ushupGcsF.E-Lhpphsl.lPGSCGAhlpsp..-cFIsAYpcGI-hh+pcPEpsA-YIucpLPhphspcFIcsll+pscas ...................................................llsAP..GPVSYPlIsu..hcpcDlcl...FsK...pspuDl..lLD...ShssLsKh...Gl+lshss...lpcLhhlhPplsp.......+IuVWR.+GoAADlLhRhllchpshc.......u...El.VYs-D.tplhcMhppGclpoAV.....l........s...u.hpc..G.th.E-hhs........hPGuCGs.s.ps.....p....pthlssYpcGI-hh+pcPpssA-hluppL..Phhhspchltslhcpsph........................................................................................... 1 4 7 14 +12757 PF12917 HD_2 HD_1; HD containing hydrolase-like enzyme Coggill P pcc JCSG_target_394139 Family This is a family of bacterial and archaeal hydrolases. 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.47 0.70 -4.55 3 447 2012-10-01 20:28:14 2010-04-01 14:05:03 2 2 442 3 51 577 15 210.00 60 97.87 CHANGED MGIHQYFQSLSDLENIYRCPGKFKYQEHSVAEHSYKVTSIAQFLGsVEEQAGNEVDWRALYEKALNHDYSELFIGDIKTPVKYATPELREMLSEVEESMT+NFIEREIPEcFQslYRahLKEGKDsTLEGKILAVSDKVDLLYESFGEIQKGNPENVFlEIYoEALATIYEFREMASs+YFLKEILPDMLAEKGIEKTELPQLTscIsScuL+.-- .........................................MGlHQYF.ppLSDhEplhRhPGcFKYhEHsVAAHSFKVTcIAQaLusV.E.....E.h...p...Gp.c.INW....K.uLYE.KALNHDauElF.hGDIKTP.VK.YAosEL+chhupVEEcMs.-sFIc.-...EIPtp..ap.-lY+p.R.L......p.....E....G....K.D..Do.L....E..G.....Q....I...L.SV..ADKID.LLYEoFGEIQKtNPE.plFhEIYc.S.L-TIhp.F.-.c.Ls..SVQ.FIppIlPEM.LsEs..F..hs+sc.LpchThsIlp......pcpt.................................................................................. 0 16 27 42 +12758 PF12918 TcdB_N TcdB toxin N-terminal helical domain Bateman A agb Bateman A Domain This is a short helical bundle domain found associated with the catalytic domain of the TcdB toxin from C. difficile [1]. The function of this domain is unknown, but it may be involved in substrate recognition. 27.00 27.00 27.20 27.80 24.40 24.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -9.05 0.72 -3.90 20 211 2010-04-04 12:49:17 2010-04-04 13:49:17 2 17 131 14 9 218 0 60.70 39 3.43 CHANGED scYsLhh-slcpc..l.slo-ppp.cphpplpclpctIspYpsh..pcKNS..ppGpsLLppQuplLppl .s.p.sLh.-slhpt..l.sho-p.hsKhh.pIp-lpclhspYpth.....sKNS..RpGlcLLpcQu-LLctl...... 0 3 3 6 +12759 PF12919 TcdA_TcdB TcdA/TcdB catalytic glycosyltransferase domain Bateman A agb Bateman A Domain This domain represents the N-terminal glycosyltransferase from a set of toxins found in some bacteria. This domain in TcdB glycosylates the host RhoA protein. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.84 0.70 -6.02 14 376 2012-10-03 05:28:31 2010-04-04 13:55:39 2 24 176 14 22 503 17 327.40 31 26.98 CHANGED -KplHhlWIuGs.ss-sshsYIphahcs...sDYsahlWhDssAahssphspslhc.Ahstulpplccshs....sspcFhcch.clphch.................................................QcsFhNYshhps.pshs...D-hRhpYLppth.phsp-clppY.............l.cslschhupspscI+shc.h.phpctp.hphYppEhlhRWNhAAASD.lRlhhLKEhGGlYhDlDhhP.......uhsp-lhcsIpc.u...shh.-ssphccshs-ulh+htph..p.......hpshchspLsppspsplppllsphpphs..............clFpslsshhlp-h.s.....p.h.ppuhhssphlNshhlo+KsStssshlIpt.pptYp.Lpchhp...............................................................................................................................................................................................................................................................................................................................................................................................NshsspFhcolsshpshcp...hsh...p.hhhhuhhssYhpDuh...hPcAhSTLslSGPslhstshhchhp.ht..sc.hlhppth+..........................thsahpPpolhupsss.-cpSoWshsct+s ....................................................................................................................................................................................................................................................................lHhlWluGt...s-...pYhp.ahth.......pash.hahD.pAhhh.phpt.lhp.u.p.s.h.th.pt...................h..h..t...........................................................p.h.............D..hh.aL.p.h.th....p..tth.........................t....t...t......l..t.t........h.....................Y.......thh....R.h....N.hsuAoD.lRh.hLpphGGlYhDhD..hhP.........sht.pl.....h............................t......p.h..c.lhphh..........................t.p.s.l.tt...t.h.p.hhtphp................................................................................phF..ht....p........................hth.NthhhshtsS.h.phl..t.p.tY..l.t.ht................................................................................................................................................................................................................................................................................................................................................................................................p.......h.................................................................................................................................................t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 3 4 13 +12760 PF12920 TcdA_TcdB_pore TcdA/TcdB pore forming domain Bateman A agb Bateman A Family This family represents the most conserved region within the C. difficile Toxin A and Toxin B pore forming region. 27.00 27.00 33.70 34.40 26.00 25.80 hmmbuild -o /dev/null HMM SEED 663 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.88 0.70 -6.61 18 84 2010-04-04 13:21:11 2010-04-04 14:21:11 2 19 42 0 12 90 0 620.80 33 30.49 CHANGED hupchssAspcLppcppLsppWlslhsolccpspstYplpFlN..cssspopalpTp-ppFhcapcahpcph.sslspthhh..psGclhtc...tsls-spsVssLNAuFhIQoLIpa......p.suppts.ssLusALKlpsYlshsQhuhGslpDssclspLlppAhptphphhsTt.......hpsh........hupssspGlullhsGsslGhsIaELupAcsslp+shhuTpLuhsusuhssusuulshuhhuu............ussuulLGshuV.luGlulGlsuLspsauhluEcA+uVucYFsplspuhppsua..phpsppp...llhPhssuVlsplDhpsNplpasoptIaps....tpGpotsGphsaFF...........ssssh-cpp.ulsIppulGhsppphphsh.spupshlLPssPpp.hhpYEYshlPGspopcssGhclL...................................++lcpsp.p......F.acFYsFPu-a..sIspl+.cYhsTslclhLDpcsRpLlVPplsc-h.+s+LpYclpGsGGpYpLsLsst.splplss.......ss......psspWIIDsspl...cssoIp..pssplhlGslhhcl..shs.ppshlhhspp-Ip..........................pl-hsspphpl......llutctpphps...............sspslpp....+lppLuhsppLpsphsh......spsh.....ptptpshspsaaDssppchla...h.DssstpsptshhsslssshAahhststtslWtlD ..............................h.hcplssuhhcLpppppLs.cpah.sFpslpc..ss..p...taplpFIs..cpstcshaVpTccphFscatpalscpl.uplppphh...ssGclhtp...hslspsp.......tVssL.NAAFhIQoLIpa............suspts.ssLusAhKVpsYspLhphuhsslpDus+lVpLlpsAlppshsllsTh........pul.........................s..slusllsGl...slGhsIhELucspssl.+t.htspluh.usshssussulh...suu............hshuushuhhhVPLAGlusGl.suLlps.hhl.t-cAppVscY......Fsclshu.ppGsa.....ph.-.cp.......lhhP.sslVIocIDh....sss..p...lhhsp..ppIa+h....soGpTsosslsaFF...........susshshpp..tlsI........hssltlpppphsh.......sp......clhlLPssPpp.hhsaEhsh.hPGh..pohcssGhclL...................................cRlc-ph.c......FhacaauF.u-h......hIspL+.cY.-TsIcIpLDspsRphIlPhlsp-..........h...+pKLSYphhG...uG..GpYtlsLsph.hslslph.........s......csshWllDscpl.....cssoI....pssplphGsll.sl..pl...shpcstlllsppcIp..........................cVDhhs.p.phpl......llus-hphhh................sspplpp....+lshluhssclptshsh......spp......spppt.hspth.-ssh.phla...hpssp...t..hs.hu..hpshpshsh..hthhsuhahhD.............................................................................................................................................................. 0 2 4 9 +12761 PF12921 ATP13 Mitochondrial ATPase expression Wood V, Coggill P pcc Wood V, manual Family ATP13 is necessary for the expression of subunit 9 of mitochondrial ATPase. The protein has a basic amino terminal signal sequence that is cleaved upon import into mitochondria [1]. 21.00 21.00 21.00 25.30 20.90 20.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.38 0.71 -4.78 14 101 2010-04-06 09:10:04 2010-04-06 10:10:04 2 3 99 0 74 100 0 128.60 24 20.08 CHANGED spphhssllhuhua.GplphlcphlcplWsIsspsp.tt...........hstssshaPopclLhullsua.shspslpsAlpll-pF.cpYs.lcls....cphWccLhpWuthhhsh+tssttphhttshthhtph.pss ............sEphhsslhhuhu+tGplptl.cphl.c.plW.sIsssth.ppst...............ht.ssslaPos+lLhAlspuF.usN.scl.tAlpllDhluppYs..lsls....pphWpcLhpWoalhos.chs.t.ptp....t..........h....................... 0 11 33 62 +12762 PF12922 Cnd1_N non-SMC mitotic condensation complex subunit 1, N-term Wood V, Coggill P pcc Pfam-B_15091 (release 24.0) Domain The three non-SMC (structural maintenance of chromosomes) subunits of the mitotic condensation complex are Cnd1-3. The whole complex is essential for viability and the condensing of chromosomes in mitosis. This is the conserved N-terminus of the subunit 1. 21.50 21.50 23.10 22.60 21.20 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.85 0.71 -4.63 44 291 2010-04-06 09:13:56 2010-04-06 10:13:56 2 14 238 0 204 298 1 161.40 25 13.28 CHANGED LssphppplhpLlsSulsshupplps.lpss.................-shspa+ptLEhYuaLlhhllphlp.......................t...ts..tt.hssttppppsspssstpWchs.splpphLpslsplLc.lcLs+lFhTos-+DpFls.LFoRsha.hlhEspth..hKspsl+hhlF+lluhuVK+Hupuhssp ......................................................thhpphh-hlhpslushuptlts.lpss..............p..........tsshspat..phLchhs.aLLthhlpshE.........................t.stps..sp...hstt......sp..ppptps....spspsa......chp.sphpthLphls...plLp...L............cLp.+l.atsos..c-pFls.LhTcssY.tlLE.s..ph......hKspsl+.tlh+lLshul++asHhhus.................... 0 68 113 170 +12763 PF12923 RRP7 Ribosomal RNA-processing protein 7 (RRP7) Mistry J, Wood V jm14 Pfam-B_1057 (release 24.0) Family RRP7 is an essential protein in yeast that is involved in pre-rRNA processing and ribosome assembly [1]. It is speculated to be required for correct assembly of rpS27 into the pre-ribosomal particle [1-2]. 27.00 27.00 31.00 29.20 26.60 26.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.93 0.71 -4.13 61 327 2010-04-06 11:06:40 2010-04-06 12:06:40 2 9 269 0 220 324 2 124.30 31 42.70 CHANGED GhpcahppYcpp.hss..ptLppplcpahpta-...pccppppcpt.........tp.s.sDE.DGassVs+s..t+t.............sshspsp...htppttttptppc+c+Kphps..FY+FQhR...Ep++pclt-Lh+KFE-DKc+lpthKtpR.+F+Pa ........................................................ttahp.paptp..h.s......ptLppplcpaMpta-pcptpppcct.........ttp..shsDE.DG...alpVs+p...s+p..............................sshspst...........tspt.pht....tptpp.++..c+Kthts..FYpFQhR...Ep++pclspLh+KFEED+p+lp..hh+tpR.+F+Ph...................... 0 71 119 181 +12764 PF12924 APP_Cu_bd Copper-binding of amyloid precursor, CuBD Coggill P pcc manual Domain This short domain, part of the extra-cellular N-terminus of the amyloid precursor protein, APP, can bind both copper and zinc, CuBD. The structure of Cu2+-bound CuBD reveals that the metal ligands are His147, His151, Tyr168 and two water molecules, which are arranged in a square pyramidal geometry. The structure of Cu+-bound CuBD is almost identical to the Cu2+-bound structure except for the loss of one of the water ligands. The geometry of the site is unfavourable for Cu+, thus providing a mechanism by which CuBD could readily transfer Cu ions to other proteins. 26.40 26.40 28.10 28.10 24.80 26.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.24 0.72 -3.46 17 340 2010-04-07 17:32:00 2010-04-07 18:32:00 2 18 84 23 111 306 0 57.50 64 8.54 CHANGED pCpF.Hhcph...shCpsapcW+ssApcsCps......cshpL+SauMLLPCG.lDhFpGVEaVCCPs .............KC+FhHQERM...DhCEoHh......HWHTVAKEuCup......cuhsLHs.....YGMLL.......PCG.lD+FRGVEaVCCP...... 0 20 27 61 +12765 PF12925 APP_E2 E2 domain of amyloid precursor protein Coggill P pcc manual Domain The E2 domain is the largest of the conserved domains of the amyloid precursor protein. The structure of E2 consists of two coiled-coil sub-structures connected through a continuous helix, and bears an unexpected resemblance to the spectrin family of protein structures.E 2 can reversibly dimerise in solution, and the dimerisation occurs along the longest dimension of the molecule in an antiparallel orientation, which enables the N-terminal substructure of one monomer to pack against the C-terminal substructure of a second monomer. The high degree of conservation of residues at the putative dimer interface suggests that the E2 dimer observed in the crystal could be physiologically relevant. Heparin sulfate proteoglycans, the putative ligands for the precursor present in extracellular matrix, bind to E2 at a conserved and positively charged site near the dimer interface [1]. 21.90 21.90 22.30 23.00 21.50 21.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.24 0.71 -4.73 11 385 2010-04-07 17:34:15 2010-04-07 18:34:15 2 22 91 16 118 340 1 175.40 59 27.13 CHANGED sssssPsss.sul.DsYFppsssp........sEH..........ppFpcA+pcLEp+HRc+hscVMK-WpEAEcphcsL.+uD.......+pplhp+FQpplpoLEpEusuERppLlETHttRVtAhLN-+RRtAlEsYhsuLpssPPcs++lLpAL++YlRAEpKDRpHol+HacHlpps-P....E+Auph+splhpHLclI--RhNQSLsLLh ....................................h...sTsts.T..DsV.DhYhEsPuD-........NEH..........A+Fp+AKEpLEt+HRcRMspV..........M+EWEEAE...pQAK.NL.P.KAD.......+pslIQHFQph...VcuLEpEuAsE+Q.QLVETHhARV.EAhLNDRRRhAL.ENYlsALQ.u..s..PPRP++Vh.phL++YV......RAEp.KDRpHTL+HapHVhhVDP....cK.AAQh+s..QVh..THL+VI-ERhNQSLoLLY............. 0 21 30 66 +12767 PF12926 MOZART2 Mitotic-spindle organizing gamma-tubulin ring associated Coggill P pcc [1] Family FAM128A and FAM128B proteins have been re-named MOZART2A and B. The name MOZART is derived from letters of 'mitotic-spindle organizing proteins associated with a ring of gamma-tubulin'. This family operates as part of the gamma-tubulin ring complex, gamma-TuRC, one of the complexes necessary for chromosome segregation. This complex is located at centrosomes and mediates the formation of bipolar spindles in mitosis; it consists of six subunits. However, unlike the other four known subunits, the MOZART proteins, both 1 and 2, do not carry the conserved 'Spc97-Spc98' GCP domain, so the TUBCGP nomenclature cannot be used for it. The exact function of MOZART2 is not clear [1]. 23.30 23.30 23.90 30.60 23.20 22.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.09 0.72 -4.40 12 71 2010-04-08 11:58:30 2010-04-08 12:58:30 2 2 50 0 43 73 0 86.80 47 58.46 CHANGED Mu...............................s.h.ph.hpptpsLss-Es..ELaELsphAGlslDs-VF+..........................lllDLl+hNVuPhAlhQhLKShCuupths.t......sssspusuh ....................ss..............s.........p....ushQKhul...++KKVLosEEh..ELaELuQAAGsslDP-VFK..........................ILVDLLKLNVAPLAVFQhLKSMCAGQRlusp......st-ssuhs.h..................................... 0 13 17 27 +12768 PF12927 DUF3835 Domain of unknown function (DUF3835) Wood V, Coggill P pcc Pfam-B_14632 (release 24.0) Domain This is a C-terminal domain conserved in fungi. 21.00 7.90 23.20 8.20 20.60 7.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.90 0.72 -3.02 55 188 2010-04-13 08:34:32 2010-04-13 09:34:32 2 10 114 0 149 197 0 71.90 25 17.68 CHANGED lluDtllE+s..............ssstchspphhpcpls.pahch+pthh.t.t.....................................pssPtKhSRFKuu..Rhs ................................................................................................lhcp.......................thp.thh.ppls.....pahch+pphlppps.t........tttpt.....................sl-c................psss.c+...lSRFKuuRh.... 0 35 78 128 +12769 PF12928 tRNA_int_end_N2 tRNA-splicing endonuclease subunit sen54 N-term Wood V, Coggill P pcc Pfam-B_644 (release 24.0) Domain This is an N-terminal family of archaeal and metazoan sen54 proteins that forms one of the tRNA-splicing endonuclease subunits. 21.10 21.10 21.10 21.40 21.00 20.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.29 0.72 -4.51 49 270 2012-10-01 20:02:48 2010-04-13 10:50:06 2 9 232 0 196 267 0 78.90 30 17.41 CHANGED MassL.shsRhpph+shshuhap..s....................................................ppptspVsps+GpaapohG..............hs...pts+ha..LhsEEALYLlERGoLplhhss ..................................................phL.s..Rhp..p....p.shs...huh.ah..P...................................................................cpthspl..h.ps.+GpaapohG..............................hs........ppG+ha...LhPEEALYLlEpGolplha..s..... 0 58 101 158 +12770 PF12929 Mid1 Stretch-activated Ca2+-permeable channel component Wood V, Coggill P pcc Pfam-B_1244 (release 24.0) Domain MID1 is a yeast Saccharomyces cerevisiae gene encoding a plasma membrane protein required for Ca2+ influx induced by the mating pheromone, alpha-factor. Mid1 protein plays a crucial role in supplying Ca2+ during the mating process. Mid1 is composed of 548-amino-acid residues with four hydrophobic regions named H1, H2, H3 and H4, and two cysteine-rich regions (C1 and C2) at the C-terminal. This family contains the H3, H4, C1 and C2 regions. suggesting that H1 is a signal sequence responsible for the alpha-factor-induced Mid1 delivery to the plasma membrane. The region from H1 to H3 is required for the localisation of Mid1 in the plasma and ER membranes. Trafficking of Mid1-GFP to the plasma membrane is dependent on the N-glycosylation of Mid1 and the transporter protein Sec12. This findings suggests that the trafficking of Mid1-GFP to the plasma membrane requires a Sec12-dependent pathway from the ER to the Golgi, and that Mid1 is recruited via a Sec6- and Sec7-independent pathway from the Golgi to the plasma membrane. 28.60 28.60 29.10 29.00 27.80 28.50 hmmbuild --amino -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -13.04 0.70 -5.76 34 145 2010-04-13 13:17:33 2010-04-13 14:17:33 2 5 138 0 108 145 0 402.80 31 66.37 CHANGED hhlalSsNhCppPs.........tssss.spLplYhS....hspss.tstssuphph....hcsGYhpshhp.sss................................slYIuVpu...............sssos................upasYclusSp.sshhapa-s..csalhhVDoDssuALLsTsNLTssstssts.pphht...............s.Yolasa..shs.cs.ths..uLppSaCAlpssspl.......hsstshps.......................ShTsRus......u....+pQFYlsGLNsSosYhuaLs..............stshsssGGtVapshpFsT+s.................sssCuLlasLsFCspVAYuVPusssh...............stspLsphYDshAtulYpNFohuLpQlsCsssscupYS.h+sCsDCssuYKsWLCAVoIPRCsshsss.salp..........................................tspssoRNshIschIpP.ssYhElLPCh-lCaslVRsCPushGFsCPs....s.hlstSYshcs...sss.hTCNYlGssh ..............................................................................................................................................thlalSuNhCtpPt...................t.stssspLplYhS............hpps....s.ts....ts....sp.sh......Fp.pGahth.hs..sss................................slYlultu...............sss.sp...................................................................................hsu.asaclusS..sshhaphcs.....pshlhhlDoDssuALL.TsslTss.s...st.........pph....s...............s.ashash....shp.ss.thp..GlppShCulpststl........htstshps................................................................uhTpRG........................s.....+pQFalsGLNsSosYhuhLs............pt.....tss.shsssGGtVapshpFpT+s........................sssCtllasLsFCspVAYuVPussph...............ststLuphYDshApshYtNFopuLpplsCsssspshYShspsCsDCtpAYKpWLCuVoIPRCs-h.sss....salt.............pt...........................................hspssoRNs.hI.s..phIpP..usYhElLPC.-lCaslVpsCP.ushGFsCPp....sthhp.SYshtt.....sss..oCsY.Gss................................................................................. 0 27 59 91 +12771 PF12930 DUF3836 Family of unknown function (DUF3836) Finn RD rdf Manual Family Family of uncharacterised proteins found in Bacteroidales species. Test. 25.00 25.00 26.80 25.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.66 23 146 2010-04-13 15:35:07 2010-04-13 16:35:07 2 1 61 1 16 108 1 125.20 32 86.98 CHANGED MKspslhKsllhsuhhhsoslsssAsu....p.ssaIYNpEppsGhlsucTlaKh-.G..phLppah+YsYpYD-psRlopKcAhKWNss+spWpshaplsYs.Yssp..-lsssYucWNp++psYshshppoVY.phDs ..................MKspshhKsll.h.sAhlhssshssssps....t.sshIaNp-.p.psshllupslY+h-..G....ptLspahKYsYpYD-ppRhspKcAhKWsuscppWts..hplsYs..Yssp...plshsYscWNsc+psashsschoVh.h.s.s............ 0 2 14 16 +12772 PF12931 Sec16_C Sec23-binding domain of Sec16 Wood V, Coggill P pcc Jackhammer:O14029 Domain Sec16 is a multi-domain vesicle coat protein. The C-terminal region is the part that binds to Sec23, a COPII vesicle coat protein. This association is part of the transport vesicle coat structure [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.99 0.70 -4.67 37 648 2010-04-13 16:54:12 2010-04-13 17:54:12 2 27 269 2 424 664 2 240.20 24 17.51 CHANGED plhshL.s....Gc+-+AlhhAlcpR.WuaAhllu......Ssls+sh....WppVlp-Fl+.c-h.p.suss.......h...p........LuslaplFuGNhcpul-E.Lsss..................s.t..hs.ts...............W+-sluhlLsN.....+ss.p....sp..pulhs...........lGcLLsphGphtAAHlCalhA...tssh........s.sts..h...h..s..ss...s...shh.hlG...........h.ps.cuhLho........ElYEauhol........ss...h.....s...hPHLhs....aKLhHAhsLA-hGhpscAppYCDsIuusLK.uts+pS........h.hph...pLh..ppLp....c.lusph....p....s...s....s...o...SWl....SKPsh-KV.Gpl.tpFspFluGD ......................................................................................................h.hpphLlhGph..c.pAlphshcpphaucAlllA...pth...spch...atp.shp....pahppp........t..................p................l.pslht.....lhusph...ps.h.ps...hs..................................................hts...........................................W+ppLAhl...lo.s.........tss..s........p.t...ps.l.ss...............LGc.pL...t...p...c.......G.........h...h...p...uAphCYl....h..A.....ts..sh...................t........h.....h...s...p..p......sphshlu.......................................ph.hs.....csl.hs.........................................E.hhEaspsl............ss.p.h....hlssh.s...hK.h...YAphL.A-hG.hhs..pAhpYhphl.t..h..............................................................................................................................................h............................................................................................................... 0 106 230 333 +12773 PF12932 Sec16 Vesicle coat trafficking protein Sec16 mid-region Wood V, Coggill P pcc Jackhammer:O14029 Domain Sec16 is a multi-domain vesicle coat protein. This central region is the functional part of the molecules and thus is vital for the family's role in mediating the movement of protein-cargo between the organelles of the secretory pathway [1]. 23.00 23.00 23.30 23.90 22.80 22.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.54 0.71 -4.14 58 342 2010-04-14 13:53:08 2010-04-14 14:53:08 2 8 218 2 231 328 0 110.60 28 6.82 CHANGED stPlhpaGhGGpllsshP...s.................st.sshhpsssuplclpslpsllsssp.................thppFPGPLhst.................KoKKKDlhpWLpspIsthppt...............................stpcpc-chLLWclLclhlc.psGs .............................ashhpFG.GGpllpshP..........................................s.stp.spsu..Vclcshcsllppst..t...................phpsF..PGPLhpt.................cs+KcDlhpahppphsphtps...............................phtcpcsphLLWplLhlLscpsG........... 0 52 104 174 +12774 PF12933 FTO_NTD FTO catalytic domain Bateman A agb Jackhmmer:Q9C0B1 Domain This domain is the catalytic AlkB-like domain from the FTO protein [1]. This domain catalyses a demethylase activity with a preference for 3-methylthymidine. 27.00 10.00 27.20 15.80 26.00 9.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.79 0.70 -5.24 8 102 2012-10-10 13:59:34 2010-04-14 15:13:07 2 3 51 1 40 84 3 255.40 57 61.02 CHANGED Ds-.FtpshpssYsGFllDsPssLPscLHc-VppAFcoht+cGpFh+DlVpsGsK.lohT.VSRsLlG-pGhTY+Y.cLRLFAhPWss--p.......................chttshcshpRLN-hLspcopphLcchsssph.ssshpupCE..........................aNVTLINhM-P.ptpspstLK-EshFGMGK...hSVSWHpDSuLp-pSTVAVYppost..cs-...............cssWpVAL+.uhDttT.......PuLtlPLcstssYYMhcDFNuTHcHAVLsGsos.RFSSTHRVAhs ...................Ds-.Fap.Wph+YsKLllRcusolPE-LHccVQcAFLTL+KHGChF+DLVR...IpGK..DlhTPVSRlLIGsPGCTYKYLNTRLFTVPWPscGsphKYsp........................................s-ItsACpshlKLNDYLphEolpALct.s....ht-p...ppss.s..s.t....thh.ts.............................tp-.hshpsRsuYNlTLLNaMDP...tpMsYLK-EPYFGMGK...MAVSWHHDE..NLV-RSsVAVYsYSCE...s.-tpspp.....................................................phcGRDPshWHVGhKluWDIET.....................PGLAlPL+QGDsYaMLDDLNtTHQHCVLAG.pP.RFSSTHRVA-C....................................... 0 8 11 20 +12775 PF12934 FTO_CTD FTO C-terminal domain Bateman A agb Jackhmmer:Q9C0B1 Domain This domain is found at the C-terminus of the FTO protein which was shown to be associated with increased BMI and obesity risk in humans. The N-terminal domain of this protein is a DNA demethylase and this domain is found to associate with the N-terminal domain in the crystal structure [1]. This domain is alpha helical with three helices that form a bundle [1]. 27.00 27.00 31.20 31.10 18.20 23.30 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.23 0.71 -4.69 8 94 2010-04-14 14:20:04 2010-04-14 15:20:04 2 3 55 1 41 75 2 134.40 57 31.48 CHANGED sTapYI+sRCppALpslsshpsps..........s.thpsh-ssslphhtElHpEVEFpWlR.FWlQGsRHAppHc.YWpptIAELspuWcpMEtshphlLsc.l+pusp.......os-ppscsasslLhsLpEpp-LRcEastRspusAatsLPsDQ+PsshP........taD-ss...PLPFDLcsVIspLcppp .......................GTL-YIhpRCplALQNlps-scss..........shSLKShEsuVlKQsEEIHN..EVEFEWLRQFWFQGpRYpKCTD.WWspPMspLEthW+KMEthophlLpp.l+pct...........................................s.cppschhsslL..LptRppLRpEWttRppsphhptlP.pptP.shP........happ.pp...PLPhsLp.hl.tlpt..h.......................... 0 7 10 20 +12776 PF12935 Sec16_N Vesicle coat trafficking protein Sec16 N-terminus Wood V, Coggill P pcc Jackhammer:O14029 Domain Sec16 is a multi-domain vesicle coat protein. The overall function of Sec16 is in mediating the movement of protein-cargo between the organelles of the secretory pathway. Over-expression of truncated mutants of only the N-terminus are lethal, and this portion does not appear to be essential for function so may act as a stabilising region [1]. 20.30 20.30 23.60 23.60 17.00 16.60 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.08 0.70 -4.50 14 66 2010-04-14 15:49:59 2010-04-14 16:49:59 2 2 59 0 47 69 0 218.80 25 12.80 CHANGED sshWss...tussp-sstcFFsQlpoQTKPI.............ahPsE.sE....SRaEEGhPLlDpsst.........ssPs-pst.pssplDslFstDcs...............s-sutFFsphpps.s.p.tpsssplpRKsToQVLsShpsst..s...uPhS................sPsup..asphLs..ssstp..ptp..cs.S..--sLutp...E.....h.p...p...sE--..............LAtRWpAhL-sD..D.DLLl-D-h.t........t.tph.stss...stssttsls.....SPhsosp..s.spPhhts..sYTP..............H .........................................t.....t....s...ptt..F.sp.psQshPh.....................................hs.E.s-....uRaEEuhPLlssspt...........shppst.ttstl-shFptDts...............s-pusFFsp.t.t......p.....pp..s.sshp+KsT.QVLsohphpp.....p.ushs...............ttst.sp.....p.sp.l....ststp...hp....p..p..t-phttt..........................tp...................sE--..............LAp.+....W.p....AhLts..D..D..DlLlDD-h......................ttp..stss.....ss.s..h.s.....p.hsspp..s...tPh.t.......YsP......................................... 1 6 15 32 +12777 PF12936 Kri1_C KRI1-like family C-terminal Wood V, Bateman A pcc Pfam-B_8372 (release 7.7) Family The yeast member of this family (Kri1p) is found to be required for 40S ribosome biogenesis in the nucleolus [1]. This is the C-terminal domain of the family. 25.00 25.00 37.90 37.70 20.90 19.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.13 0.72 -4.04 58 317 2010-04-14 16:07:41 2010-04-14 17:07:41 2 6 275 0 236 318 6 94.60 36 14.27 CHANGED hccph-chl-EhapL-h--hl.........ssthssR........FKYRcVsPps.FGLosc-ILhA.DDppLNpaluLKKhAsYRscctch....+-p+chp++t+hcch++csh .................p..cphEphl-c.hhpLDhE-ll........sc.hssR..............F+YRcVsPss.FGLosc-ILhA.DDcpLNpasuLKKhusYRscctch...........c-p+phpcKt..c.pph+cph......................................................................................... 0 84 133 196 +12778 PF12937 F-box-like F-box-like Wood V, Coggill P pcc Pfam-B_22368 (release 24.0) Domain This is an F-box-like family. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild --amino -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.40 0.72 -4.23 369 8154 2012-10-02 00:56:31 2010-04-14 17:42:23 2 360 366 19 5916 10186 51 47.30 25 8.94 CHANGED hspLPsElllpIh.................s....h.....L.s..sp..-.........Lhp.s.u.....tVC+pWppls..tcs.tL.W+chhhp .................................................tLPsE..l.l.h.p.Ih................................................................................................s.........a.......................L..s.....sp.......s..............................L.hp..s..u.................tVC...+p.Wp.pls....tss...l..Wpph...t................... 0 2058 3220 4628 +12779 PF12938 M_domain M domain of GW182 Coggill P pcc Pfam-B_71410 Domain \N 21.90 21.90 21.90 23.90 21.50 21.40 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.84 0.70 -4.55 11 119 2010-04-14 16:45:07 2010-04-14 17:45:07 2 12 69 0 70 104 0 239.10 38 16.86 CHANGED sHsu.u.tsshussc..........asuGuu..suhsFspNs.....sshuusssssGsG..ssNssls...ulssh......caLs..ps..............ushss.usuGupssGuushsustss...t.s.....tpu.QPSspQlRhLVQQIQhAVpsGaLNsQILNQPLsPQTLhLLNQLLs.pIK.......................pLQtsQpsLpRttsu...........................pshplsltIsphKppIssLQNQIss......pQulalK....t.....t.....................t..hupuusshhtspsshssLpsph ..........................................................................................s.tts...t.sa..p...........uuhh...ttsssu..hsssshp......u..suhs.s.tth.thhst.thQs.........................................Ghhusuuuupupsh..p.p...ss..t...PsstslsusQPs.........hpupsPphh....S.ph.AQ.LQhAs+shtLNsuLLTpP.Is.PQphshLNQLhQ.Qls.......................+LQl..Q.QQhLQs.ptNsot.............................cppppQVuhsIsshpQQIQQhQpQlA........QALhhKQ............................p.shpssuu+SuhDsF....s.s+sphsuL.s...p.................................................. 0 16 24 51 +12780 PF12939 DUF3837 Domain of unknown function (DUF3837) Finn RD rdf JCSG_target_388609 Domain A small, compact all-alpha helical domain of unknown function. This domain is currently only found in Clostridiales species. 25.00 25.00 28.70 31.00 23.50 21.80 hmmbuild --amino -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.31 0.72 -4.02 6 18 2010-04-22 13:29:43 2010-04-22 14:29:43 2 1 18 0 3 18 2 102.20 34 95.09 CHANGED MlohIsKQAlslKsphp..AshsuNYEhYYAlGlhuKuhslshsctchs.hEL+-pL-pclct.p..PtDEp.phLhtlLpcac.sDDsaDtQMhELhpaGhpscp MlopIA+QulhIKsp.hp.puslTuNYEhYYAsGlluKhhGlshs-......chpshELp-pLppclcphp..PpDEp-+hLhplLpsYc..s--shDpQMhELlpaGhp-p......................................... 0 1 3 3 +12781 PF12940 RAG1 Recombination-activation protein 1 (RAG1) Mistry J jm14 jackhmmer:Q6U1Q1 Family This famiy contains recombination activating protein 1, which is the catalytic component of the RAG complex. The RAG complex is a multi-protein complex that mediates DNA cleavage during V(D)J (variable-diversity-joining) recombination [2]. RAG1 mediates DNA-binding to the conseved recombination signal sequences (RSS) [3]. Many of the proteins in this family are fragments. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.50 0.70 -6.13 3 12026 2010-04-22 13:54:52 2010-04-22 14:54:52 2 13 8629 0 36 11047 0 335.50 71 76.21 CHANGED LLPGYHsFE........WpPPLKsVSsSTDVGIIDGLS.GhssSV-Easl.-sIu+RFRYDAALVSTLKDLEEDILEGhpcQGL--su.StlFTVVlKESCDGMGDVS.EKHGSGPhLPEKAhRFSFTIMsIoVK+cDGsslpVFEEsKPNSELCCKPLCLMlADESDHET.TAILGPllAEREAMKsScLhLEIGGloRsaRFIFRGTGYDEKLVR-VEGLEASGSsYICTLCDATRsEAuQNLVhHSITRoHoENL-RYEhWRoNPYNESsDELRDRVKGVSAKPFlETpPS...IDALHCDIGNAsEFhKIFQcEIGEVY+..NsNsTKEERK+WQotLDKpLRKKMNL+PlMRMNGNFARKLMTcETVEAVCELVpsEERREALRELlcLYlpM+PVWRuspPAKECPDLLCpYSa+SQRFAcLLST+FKYRY-GKITNYLHKTLAHVPEIIERE ...................................................LPGaH.FE........WpPsLKNVSsshpVGIIsGLS.Ghs.SlDDhPs.DTIs+RFRYDsALVsALKDhEE-IhEGh+tpsl-D.h....susFoVllKESCDGM.GDVS.EKHGuGPAVPEKAVRFSFTlMsIo.........l.....................h.......t.s..............p............-.............s.................l....p.............I.FpEsKPNSELsCKPLCLMhsDESDHETLTAILuPllAERcAMKpScLlLphGGl.RoF+FhFRGTGYDEKhVREhEGLEASGSsY...lCTL..........C.DuTRhEASQNhVhHSITRSHsENLERYElWRoNPapESsDELRDRVKGVSAKPFhET.Po...lDALHCDI....GNAsEFY+IFQ..EIGElYp.......pss...so+EER+pWpssLDKpLRKKhpLKPhMRMsGNaAR+LMotEsV-sVCELl.sEE.Rp.ALpcLMpLYlpMKPVWRuosPs+-CP-.lCpYSapSQpFA-lLuo.FpYRYtG+ls.................................. 0 8 10 17 +12782 PF12941 HCV_NS5a_C DUF3838; HCV NS5a protein C-terminal region Mistry J, Bateman A jm14 jackhmmer:A1YSL9 Family This is a family of proteins found in the hepatitis C virus. This family contains the C-terminal region of the NS5A protein. CC The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525. 25.00 25.00 29.00 28.90 18.90 24.90 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.77 0.70 -4.80 17 15967 2010-04-22 15:12:44 2010-04-22 16:12:44 2 30 112 2 0 9643 0 185.50 77 38.13 CHANGED SHITAEsApRRLuRGSPPSLASSSASQLSAPSLKATCTspssHPDAELIEANLLWRQEhGuNITRVESEsKVllLDSF-PLsAEh.DDRElSVuAECaRPsRPKFPPALPIWARPDYNPPLlEsWKtPDYcPPsVpGCALPPts.PPVPPP.RRK+sVpLDESsVSpALApLAcKoFspsssssct.SssGhsssosssssssstsDssSDssSaSSMPPLEGEPGDPDLSSGSWSTVSpE-D....VVCC ...................SHITAEsAtRRLARGS..PPSlASSSASQLSAPSLKAT....C.....TT...+....H....D....S.....P....DA....DLI.EA....N....L....L.....W....R....Q....E.M....GGNITRVESENKVVILDSFDPLpAEE.DE.RElSVPAEILRKSR.+FssAhPlWARPDYNPPLLEoWKcPDY.PPVVHGCPLPPspuPPlPPP.R+KR.TVVLTESTVSoALAELATKoFGSSpoSu..............hs.sssssss.......................................................................t.S.-.s.t......................................................................................................................................... 0 0 0 0 +12783 PF12942 Archaeal_AmoA Archaeal ammonia monooxygenase subunit A (AmoA) Mistry J, Finn RD jm14 jackhmmer:A7U5N3 Family This is an archeael family that contains ammonia monooxygenase subunit A. Ammonia monooxygenase is an enzyme that oxidises ammonia to nitrite and nitrate, thus playing a significant role in the nitrogen cycle. Ammonia-oxidising archaea (AOA) are widespread in marine environments [1]. 25.00 25.00 33.70 33.50 20.80 18.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.03 0.71 -4.50 8 13679 2010-04-22 16:07:26 2010-04-22 17:07:26 2 1 283 0 2 13456 21 177.80 85 90.34 CHANGED VAsNSTLhTINAGDYIFYTDW.....AWTSFVVFSISQShMLsVGAhYYhsFTGVPGTATYYAhIMTlYTWV...AKGAWFA.LGYPYcFllsPsWlPSAhLlcLuYWAT++NKHuhILlGGsLhGLSLPLF....NMVNLLhVpDPLEsAFKYPRPTLPPYMTPIEPQVGKFYNSPVALGAGAGAVLoVshAALG ..............VAVNSTLLTINAGDYIFYT.DW.....AWTSFVVFSISQohMLsVGA.....sYYL.hFTGVPGTATY.YAhIMTlYTWVAKGAWFA.LGYPYDFIVsPVWIP.SAMLLDLsYWA.T++NKHu.LILhGGsLVGhSLPLF....NMlNLlhVtDPLEsAFKYPRPTLPPYMTPIEPQVGKFYNSPVALGAGAGA..VLoVshAALG................. 0 1 1 2 +12784 PF12943 DUF3839 Protein of unknown function (DUF3839) Mistry J, Finn RD jm14 jackhmmer:A2DDZ9 Family This is a family of uncharacterised proteins that are found in Trichomonas. 20.00 20.00 22.40 21.80 18.60 18.00 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.70 0.70 -5.21 3 2523 2010-04-23 10:20:04 2010-04-23 11:20:04 2 6 1 0 2523 2523 0 188.50 85 78.54 CHANGED TTuPNSLVMNP.TSMLVEMKsFIPSSFTFETEIQKIKQELLTSsLDCoAKDETNEQYLYEMQDIIDHLPKLPEIQQQKLTIPEFDEItVKsTDSAEIKKFIRKVNsEFLGFHCNHKVMDKDCDMVYKNISDIYKSEEFKTYDNFVSLVAcCVWEIRDKDKRGKVWNEQIKPTsSELKKTIDALVVLAGQISMYNAKMNPQCSKCKAAIRKYNYSVKEIERMRNDYADLKKEAEKPAEDKMNML ....................TTuPNSLVMNP.TSMLVEMKsFIPSSYTFETcIQKIKQE.LLpusLDCoAKDEpNEpYLYEMQDlIDHLPKLPEIQQQKLTIPEFDEIEVKsTDSVEIKKFIRKVNYEFLGFHCNHKVMDKDCDMVYKNlSDIYKS.tEFKTYDNFVSLVAcCVWpIRDKDRRGKVWNEQI+PAhFEhK+sIDALVVLAGplS.YNAKMNPQ.C.SKCKAAhRKYNYSVKEIERMRNDYADLKKEAEKPAEsKMDML................. 1 2523 2523 2523 +12785 PF12944 DUF3840 Protein of unknown function (DUF3840) Mistry J, Finn RD jm14 jackhmmer:Q1H8S6 Family This is a family of uncharacterised proteins found in hepatitis A viruses. 25.00 25.00 37.80 37.70 23.40 19.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.39 0.72 -4.18 2 2908 2010-04-26 15:16:45 2010-04-26 16:16:45 2 5 19 0 0 947 0 80.40 91 47.07 CHANGED LSFSCYLSVTEQSEFYFPRAPLNSNAMLSTESMMSRIAAGDLESSVDDPRSEED+RFESHIEsRKPYKELRLEVGKQRLKYAQEELSNEVLPPPRKhKGlFSQA ....................FYFPRAPLNSNAMLSTESMMSRIAAGDLESSVDDPRSEEDRRFESHIECRKPYKELRLEVGKQRLKYAQEEL.................... 0 0 0 0 +12786 PF12945 YcgR_2 Flagellar protein YcgR Mistry J, Auchincloss A jm14 jackhmmer:Q9KNC3 Family This domain is found N terminal to Pfam:PF07238. Proteins which contain YcgR domains are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias [3]. This family contains members not captured by Pfam:PF07317. 21.60 21.60 21.70 22.10 21.40 21.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.85 0.72 -4.10 119 585 2010-04-27 15:34:41 2010-04-27 16:34:41 2 3 451 5 179 481 9 88.40 21 37.67 CHANGED lphGpplp.l....ph.....tss....ttt.....hp.opl..luhpps.thlhlshP.....hhsstthhlh..pGptlpl+hhspss....lhsFpoplhp..hh..ppPh.hl..hlshP ...................phGpclp.l....pl......psss........tpp.......hhhp..oplluh.....cps..thlllshP.....hpsspthhhp..pGhtlplchhssps......lhtFpoplhp..hh..pcPhshlhlphP........... 0 70 123 158 +12787 PF12946 EGF_MSP1_1 MSP1 EGF domain 1 Bateman A agb Jackhmmer:P04933 Domain This EGF-like domain is found at the C-terminus of the malaria parasite MSP1 protein. MSP1 is the merozoite surface protein 1. This domain is part of the C-terminal fragment that is proteolytically processed from the the rest of the protein and is left attached to the surface of the invading parasite. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.15 0.72 -4.17 15 1331 2012-10-03 09:47:55 2010-05-02 12:25:03 2 13 39 10 19 696 0 36.90 71 6.05 CHANGED sCls.sssPpNAGCFRassGpEEWRCLLGaKK..-usp..Cl .......pClc..ppsP-NuuCaRaLDtpEEh+..CLLsaKp..EGsK..CV............ 0 8 9 15 +12788 PF12947 EGF_3 EGF domain Bateman A agb Jackhmmer:P04933 Domain This family includes a variety of EGF-like domain homologues. This family includes the C-terminal domain of the malaria parasite MSP1 protein [1]. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.66 0.72 -3.99 174 4024 2012-10-03 09:47:55 2010-05-02 12:53:04 2 685 140 8 1803 4138 79 37.60 36 6.42 CHANGED Ctssss..sCcspApCsNosG.......s.......asCsCp...sGapGDGhs.C .............Ctps.s.s...sCc..s..s.A.pCp.pssu..........p.........................hsC.p.Cp......sGah..G..-.Gh.C........... 0 749 845 1435 +12789 PF12948 MSP7_C MSP7-like protein C-terminal domain Bateman A agb Jackhmmer:Q95VZ1 Domain MSP7 is a protein family the malaria parasite that has been found to be associated with processed fragments from the MSP1 protein in a complex involved in red blood cell invasion. 27.00 27.00 28.70 33.70 21.00 25.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.71 0.71 -3.89 21 213 2010-05-02 13:01:12 2010-05-02 14:01:12 2 1 10 0 31 191 0 128.40 38 35.71 CHANGED lKalDpLaD............................-lLss.pspKsplcsspa............+sKYNpF+ccY-...shNppEYcIlK+Llssahppsstss.ttsslh.......clFKKuLpDccapccF+NFlaGlYuFAK++NYLs......................sp+hps.pc.YpplFcNslsL .................lKhlDcla-............................-VLpp..spcsclssspa............psKYs-FKKca-.F.slNppEY-IIKpLIhsFhpcsspspptpsclh.......plFhKsLcDcca+cpFKNalYGlYuaAKp.+sYLp..................................tc+hcs.cc.Y+plh-pshsL.... 0 8 11 27 +12790 PF12949 HeH SAP_2; HeH/LEM domain Mistry J, Sazer S, Wood V jm14 Manual Domain This is a HeH domain. HeH domains form helix-extended loop-helix (HeH) structures.\ This domain is closely related to Pfam:PF03020 and Pfam:PF02037. 28.90 28.90 29.10 28.90 28.60 28.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.09 0.72 -7.38 0.72 -4.40 47 275 2012-10-03 03:04:30 2010-05-04 14:05:07 2 13 241 1 131 254 0 34.40 38 7.90 CHANGED ssoLpVscLRplLsp+slpaPusAKKupLlpLhpc .....psLTVscL+slLsppsl.sasusAKKu-LltLhp....... 0 35 72 115 +12791 PF12950 TaqI_C TaqI-like C-terminal specificity domain Bateman A agb Bateman A Domain This domain is found at the C-terminus of the TaqI protein and is involved in DNA-binding and substrate recognition. 27.60 27.60 27.60 27.70 27.50 27.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.61 0.71 -4.35 40 635 2012-10-02 00:09:28 2010-05-04 17:03:46 2 18 482 22 163 675 44 141.90 19 16.42 CHANGED hslLpG.....cs.lp+Yplpas..................spYlsat.cthppsp...................................cchac...........p.KIll..Rplus.pls....usaDpcshhshsshhhlhhpstt..................lshchllulLNSclhp.aaapphh.p...tphh.+lphppLpplPl .................................................................................h..hhcG......p.s..Ip+a.t.h.p.h.s.....................................t.al...h.h.....pthpptp.................................................................................................................tphac.................ptKIlh...tphss...cst.........FshD.s...p.....s...hhh.....p.......ss........sah..lh..tt...............................hs.h.ca.L.lu.lLNSclhp.aah..cph.htth....tssh..h..chptp.lp.plPl...................... 0 70 132 152 +12792 PF12951 Autotrns_rpt Autotransporter-associated beta strand repeat Bateman A agb TIGRfams:TIGR02601 Repeat This model represent a core 32-residue region of a class of bacterial protein repeat found in one to 30 copies per protein. Most proteins with a copy of this repeat have domains associated with membrane autotransporters (Pfam:PF03797). The repeats occur with a periodicity of 60 to 100 residues. A pattern of sequence conservation is that every second residue is well-conserved across most of the domain. These repeats as likely to have a beta-helical structure. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.49 0.72 -4.24 713 7020 2012-10-02 14:50:22 2010-05-12 12:53:43 2 164 932 0 1415 6703 697 31.50 41 9.95 CHANGED uLsKsGsGTLs.L..o...G..sNT..YoGuTsl.suGoLplu ..................LsKsG.s.G.o..Ls.L...o............G....sNo..........Y.......o..G.s...T.....sl.s.u.G.oLtl................ 0 364 771 1068 +12793 PF12952 DUF3841 Domain of unknown function (DUF3841) Bateman A agb Jackhmmer:Q189I4 Domain This presumed domain is around 190 amino acids in length. As yet no function has been given to any member of the family. 27.00 27.00 27.40 27.30 25.70 26.80 hmmbuild --amino -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.31 0.71 -4.81 31 186 2010-05-13 13:05:00 2010-05-13 14:05:00 2 1 139 0 28 161 0 172.10 25 88.45 CHANGED plaTtQscpshcplccsGhhhs+cE...Ylchchtp.u.hahpsYcWhlccupctl.shPpsscaPlWh..............uhpsctsh.p.ssssllLpLclPp-pllhh-hpcWshlLNhtYlspc-pDcttacchl....cphGltpphphh.o...thYPpl+pclpcSWcR.............................lF.........sttshs.ptlpuslWcl++E.Wlh .................hlaThQsppshcplccpGhh.hsppc....alp............p.hhhuY..cWhV+ph.cp.+l...s.....pspaPIWsh...............ssppctsh...p...s.......pchVlLpLclPcchllhoshchWshhhspt.hhs...ppp..-ppthcchh....pp.th............hh.t........hsphh.phpcSac+................hF......................................t.tt.t.....ptlpus.Wp.++E.hl................................................................. 0 11 21 23 +12794 PF12953 DUF3842 Domain of unknown function (DUF3842) Bateman A agb Jackhmmer:Q189S9 Domain This short protein is found mainly in firmicute bacteria. It is functionally uncharacterised. 25.00 25.00 31.40 31.30 24.00 20.90 hmmbuild --amino -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.54 0.71 -3.99 37 142 2010-05-13 13:13:52 2010-05-13 14:13:52 2 1 128 0 53 126 4 128.70 46 92.09 CHANGED +IsVIDGQGGGIGppllcpl.+cphsc..l-IlALGTNuhATusMLKAGAscGATGENAllhsst+..ADlIlGPluIlhAsuhhGElTPtMApAlupStApKlLlPls.+ssh.ll...GspspPLscll-phlp.clp .pIsVIDGQGGGIG+pllcpL.+cphsc.......lcIlALGTNulATusMLKAGAscGAoGENAllhsspc..sDlIlGPluIlhssuhhGElTPtMApAlupSpApKlLlPl.s.+ssh.ll...Gsp.stPLschlcphlp.l.t... 0 36 50 51 +12795 PF12954 DUF3843 Protein of unknown function (DUF3843) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 25.00 25.00 119.70 116.80 18.30 17.40 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.32 0.70 -5.43 12 103 2010-05-17 00:57:42 2010-05-17 01:57:42 2 2 61 0 10 91 1 236.70 27 97.99 CHANGED h+IYhKsWLplHshspshsTDpaYlshAN+lhslhcpo.Lh.shpt...pKplslhhAhYhEDsIushGhW+tFhptHppLYG+YLPFYshs-sYhsDEINhEDIpFlLWshhp..thht.tctsh.sPh-tslhchAphhYslh-cpaEpAP.s-phsphahs.sphhpt...........................................................................................hss..stpstpchscphcpFhpuopG+.LlYFssYc-LppFhlcsLpWpsccsphLPphppp+pFllaAss.KGlLlutslscYhsDccNPhYsucpAtppuachFs..hhCP.DLL+Yshp+slLPDAQhP......suKcll+cNWDFIARaaLtEY ............p.Whthp.h.t...sD.aahthAsplh.hh............l............thsl.hshahpDhlst.G.Wp.F...h.t.Ysp.LPFY..t....t............Yh.sElN.EDl.FllWth.p................P.s.s..thu..hathhpt.ap.AP.st..t..h...................................................................................................................................................................................................................................t...t.ht.hhthstst.hhahtt.tth..Fhhp..th..t....t..hst....tthhhhss..p.Gh..l...p.s.hhtp..Ns.Y...s.t.Attpuh..hhh....h.s..h...h.tpthls-h.h.........tpthhpp.hcFlsc............................................................. 0 2 8 10 +12796 PF12955 DUF3844 Domain of unknown function (DUF3844) Bateman, Wood V agb Jackhmmer:O74728 Domain This presumed domain is found in fungal species. It contains 8 largely conserved cysteine residues. This domain is found in proteins that are thought to be found in the endoplasmic reticulum. 27.00 27.00 29.80 38.70 25.40 25.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.56 0.72 -3.92 32 106 2010-05-17 16:38:42 2010-05-17 17:38:42 2 1 104 0 83 109 0 102.90 41 23.96 CHANGED CasSpcuCspuTssCSGHGpChcp.tst..............cCauCpCpsoh.pp..t......Koh.pWuGssCpK+DlSs.FaLlshsollllhhlsuuIthLaulGp-cLPuVluA .CasSpcuCppuTssCS.GHGpChpp.tst..............tssCauCpCpsThspptt..t............+sspWuGssCpKcDlSs.FaLlssholsLlhhlshuIshLaulGpEcLPGVluA..... 0 25 47 72 +12797 PF12956 DUF3845 Domain of Unknown Function with PDB structure Ellrott K kellrott JCSG structure PDB:3GF6 Family Member PDB:3GF6 has statistically significant similarity to TNF-like jelly roll fold may indicate an immunomodulatory function[1] or a bioadhesion role[2] 25.00 25.00 479.30 479.10 17.60 15.20 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.56 0.70 -4.97 3 18 2010-05-17 17:26:20 2010-05-17 18:26:20 2 1 18 2 1 17 0 243.60 89 95.08 CHANGED hIMNKIIGhAVLLLCLoGCVRDNDAIYYPVGNVDIERGGPALEVGpculLVARSYNEEDYVLDTLAQYPGDPTLGKLTFMINLKNQSuDQEVA-FNGVGKSKLTMSLGYKDGNYPVESQVPVYTSuDVTASYAIKLRLKGELTLTGDEWMIDYVYAQLAGLFQPYPPASFPEVFMCKGGEQsFuTFDSFRRTWTFDITYDRS-LSFSQLYFNLFVNLAGQKRE-RVRLRIDKESYFEIYKpKEEM .hIMNKIIGLAVLLFCLSGCVRDNDAIYYPVGNVDVERGGPALEAG.KGDLIARSYNTEDYVLDTLAQYPGDPTLGKLTFMIsLKNQpADQEVsGFNGVG+SKLTMSLGYKDGNYPsESQVPVYTSSDVTASYAIKLRLKGELTLTGDEWMIDYlYAQLAGLFQPYPPTSFPEVFMCKGGEQsaATFDSFRRTWTFDITYDRSNLSFSQLYFNLFVNLAGQKRE-RVRLRIDKESYFEIYKEKEEM... 0 1 1 1 +12798 PF12957 DUF3846 Domain of unknown function (DUF3846) Ellrott K, Bateman A kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain is found associated with an Pfam:PF07275 like domain. This suggests that this family may also be involved in evading host restriction. 24.60 24.60 24.80 24.60 24.30 24.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -10.11 0.72 -4.26 53 201 2010-05-17 20:05:53 2010-05-17 21:05:53 2 10 147 0 52 192 71 95.20 25 44.35 CHANGED psLllpPsptsts.....hplsss...........LcslQphV......GG.IEslhh..........sc.s....sslhlN-EG+hpuLP.hNpthst...............tth.hhDhlsGshllsG...sspss.hs...sLssctl ...................................sLhlpPtp.s.h.....hpltss...........LcshQphV......GG.IEs.lhh.................pc.s....ssllsN-EGKhpGLP..lNcthts...................t...hh-hlsGshhlsG.....ppptp.hh..sLs.t................................................ 0 30 42 50 +12799 PF12958 DUF3847 Protein of unknown function (DUF3847) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 40.00 39.00 40.30 40.10 39.70 38.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.91 0.72 -4.22 40 356 2010-05-17 20:46:12 2010-05-17 21:46:12 2 1 216 0 14 212 18 83.20 36 80.67 CHANGED cp.LEpLppEhE+uEp+Lccsppc.KtLcpQhKpLp...............RKcRTHRLCTRGuhLEShl.csccLTD--lh.LLchIFppp-sp-hL+ ...........................pph-cplcpt-cclKpLpNpp+pLcp....t....-....RKpRs+RLIp+GAlhESlhpE...sp-LTc-EhhpLlctl.t....................... 0 6 13 13 +12800 PF12959 DUF3848 Protein of unknown function (DUF3848) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain frequently seen with DUF3849. 30.00 30.00 30.00 35.10 26.40 29.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.39 0.72 -4.05 3 58 2010-05-17 21:43:24 2010-05-17 22:43:24 2 2 36 0 3 54 8 98.70 65 39.48 CHANGED -LNTALYEKMoAEQ-KFRDWLKSQPPEEILNHAYEYTVREDIVMAMEELELTDAQAQALLDSPSPLADVYRYFEKVETDYMDsIRDCIEsRADDVCRAQcE .........-LNTALYEKMAAEQDKa..RDWLKSQPPEEILpHsYEYTlREDIVMAMEEL.E.LTDuQApALL-SPSPLADVYRYFEKL.ETGYMDs.IRDSIEsRADDVCRApEE.............. 0 2 3 3 +12801 PF12960 DUF3849 Protein of unknown function (DUF3849) Ellrott K kellrott JCSG-Joint Centrer for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain frequently seen with DUF3848. 25.00 25.00 26.10 28.60 21.40 20.10 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.64 0.71 -4.21 8 86 2010-05-17 21:44:30 2010-05-17 22:44:30 2 8 49 0 8 81 12 131.00 42 25.96 CHANGED PlYpHsAsYApE+sEL-tYRsSppsNhuCKEAIEsAIp-HYssNRLc.cuAVcpVlEpFGhERshaVLAsTlQpp-aDGRaSpsNK-WA+slshPss.ssh....sctshhhVssspPGLlDLFhcpsR+shtppQp ...............................................................PlY.aSAuYApEcGEl-pYRASppsNlpCKcu...IEtAIppcacs....pLs.csAs+sVlEpaGhERVpaVLANTlQpp.-aDG.RhSpcNK-WAKoIssssspsst....pths.ht.hh.ssspsGllDLFhcptR+phptpp.c......... 0 6 8 8 +12802 PF12961 DUF3850 Domain of Unknown Function with PDB structure (DUF3850) Ellrott K kellrott JCSG structure PDB:3IUW Family The search results from NCBI sequence alignment indicates a conserved domain belonging to ASCH superfamily [1]. Dali searching results show that the protein is a structurally similar to the PUA domain, suggesting it may be involved in RNA recognition. It has been reported that the deletion of PUA genes results in impaired growth (RluD) and competitive disadvantage (TruB) in Escherichia coli. Suggestions have been put forward that, apart from their usual catalytic role, certain PUS enzymes (e.g. TruB) may also act as chaperones for RNA folding. The interface interaction indicates that the biomolecule of protein NP_809782.1 should be a dimer. 30.00 29.00 32.20 37.80 29.20 26.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.37 0.72 -4.01 10 184 2012-10-02 17:37:24 2010-05-18 18:54:59 2 3 161 0 15 153 9 73.00 46 53.09 CHANGED HsLKItPEaFpAVlEGpKsFEIRKNDRNaQVGDlLlLpEassG..pYTG+tspuEITYITD....YuQ.p-sYVVLSh+ ......HcLKIhPcYFc..sVhpGpKpFElRKNDRsapVGDhLhL.......pE..a..p.p.G.............pYT.Gcp.......htscITa.l.T.D........asp...p-.....GYVlLulp...................................... 0 2 8 12 +12803 PF12962 DUF3851 Protein of unknown function (DUF3851) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 23.00 23.00 210.20 210.10 22.40 21.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.62 0.71 -4.13 4 33 2010-05-18 21:24:42 2010-05-18 22:24:42 2 1 27 0 1 22 4 124.70 69 99.32 CHANGED MKspILppcpMMFFDRAL-sQRotLLTsMADAVSECRTAADQAsELNEsGEsGLhRLsEIWsshhstcGhu..hllEGopsplLu-VVAQlYAYLotp.hhDPlGLAlYsELpaMMuSLMLGEWFE MNPNILNpNPLMFFDRAVNAQRSQLLTVMADAVSECRTAADQAAELNETGQlGLLRLAElWSsIRAKEGMG.GLlLEGTEAKILSDVVAQFYAYLSGCMFNDPVGMAIYAELHYMMSSLMLGEWFE 0 0 1 1 +12804 PF12963 DUF3852 Protein of unknown function (DUF3852) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain frequently seen with DUF3848. 25.00 25.00 25.70 27.70 23.30 23.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.65 0.72 -4.02 10 46 2010-05-18 21:55:23 2010-05-18 22:55:23 2 1 31 0 11 44 3 106.80 63 97.58 CHANGED KpKKhhhl.hlsVLlLshhFsssAYAuss.GDVAGAIEGTWpsASsQIKTVVNpVVFPAIDLILAVFFFuKLGTAYFDYRKHGQFEWAAPAILFACLVFTLTAPhYIWQILGM ...................KhK+hhhhlsllLlLshhFsssAaAAs.s.GDVAGAIEuTWssASsQIKTVVNpVVFPAIDLILAVFFFuKLGhAYFDYRKHGQFEWuAPAILFACLVFTLTAPhYIWpILGh.. 0 9 11 11 +12805 PF12964 DUF3853 Protein of unknown function (DUF3853) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 22.10 22.10 22.10 22.50 21.90 21.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.19 0.72 -4.18 8 97 2012-10-04 14:01:12 2010-05-18 23:13:38 2 2 62 0 5 95 5 90.60 54 87.14 CHANGED sLpcLLpKPVWpMTGEELlFLh++u....sppEscos.psspss-++aVYGIpGIARLFGCSlPTANRIKKSGKID+AITQIGRKIIVDA-LALELAs+c .....................lppLLtKPlhQMTGEEhlFLtp+u......ppsc..sp.s.tssscs-++YVYGltGIA+LFGCSlPTANR....IKcS.GKID+AITQl.GRKIIVDA-LALELAG+K.............. 0 2 5 5 +12806 PF12965 DUF3854 Domain of unknown function (DUF3854) Ellrott K, Bateman A kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. This domain is likely to be related to the Toprim domain. 21.60 21.60 22.10 21.80 21.20 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.67 0.71 -4.50 54 235 2012-10-01 21:47:57 2010-05-18 23:31:30 2 15 148 0 78 233 13 113.00 26 15.36 CHANGED FWpWVhpp.pIPlhITEGuKKAuuLLStGasAIuLPGIhsGYR..+cph....h.t.pL.lPpLthhApsGRclhlsFDp.DoKscThtsVppAlp+hGpLlpptGCpVpl..lpWs..t.tKGVDDhI........sspGtcsa ...............................................................................lhlsEG.hKs...th...h............p.........sh..slul......Gl.sh............t..................................l...hs.Lt...h.s.......s..+plhlsFDt..D...h.pp....ppVppAl.hchu....phL.pp.t.G.h..p..Vhl...........hs..Ws......s.sKGlDDhl........hst.....h................................. 1 16 59 75 +12807 PF12966 AtpR N-ATPase, AtpR subunit Dibrova DV, Galperin MY, Mulkidjanian AY, Finn RD rdf [1] Family Membrane protein with three predicted transmembrane segments, two of which contain conserved Arg residues. AtpR genes are found in the N-ATPase (archaeal-type F1-Fo-ATPase) operons and are predicted to interact with the conserved Glu/Asp residues in the c subunits, regulating the assembly and/or function of the membrane-embedded ring of 'c' (proteolipid) subunits (PFAM:PF00137). 26.00 26.00 26.00 26.00 25.70 25.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.04 0.72 -3.84 38 110 2012-10-01 19:47:47 2010-05-19 14:12:51 2 1 103 0 51 134 7 85.70 30 84.35 CHANGED hshhhulhsGhlLGhhaFuuLWhTl+phhsuppPshhhhhShLhRhulslushhh...l.....sssthttLLsshhGFlluRhlhlp......h...hpsp........t ...h..hhluhhhGhshGshaFsuLhhssR..........hhl..sup...sshhlhho.lhRlulslushhll.....upu..shttLlush.hGFhsARhlsl+hh...st............ 0 16 35 44 +12808 PF12967 DUF3855 Domain of Unknown Function with PDB structure (DUF3855) Ellrott K kellrott JCSG structure PDB:1O22 Family Family based on orphan protein (TM0875) from Thermotoga maritima that has been structurally determined as PDB:1022. The TM0875 gene of Thermotoga maritima encodes a hypothetical protein NP_228683 [1] of unknown function. Analysis of TM0875 genomic context reveals the presence of MMT1 (a predicted Co/Zn/Cd cation transporter) and an inactive homolog of metal-dependent proteases. 1O22 shows weak structural similarity with the phosphoribosylformylglycinamidine synthase 1t4a (Dali Z-scr=4.6), the yggU protein (PDB structure:1n91; with DALI Z-scr=3), and with the thioesterase superfamily member (PDB structure 2cy9 - found using FATCAT), even though they have very low sequence identity. 27.00 27.00 307.40 307.20 25.10 23.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.04 0.71 -4.68 2 5 2010-05-19 16:59:37 2010-05-19 17:59:37 2 1 5 1 1 5 0 157.80 81 100.00 CHANGED M+LMDSLEIhYh+KsK-htsLE+Kh+EIhpETGloL-sVNSE.hGRIFL+IsVLE-tEplPSFhlKALhPcpsAscLPLG-WssLp.VFVEEhsYL-sYs.MKIhS-tNhYTlYVPaSuVKpKNRsElVt-FMKYFFEoKGWsPGpYpF.VQElDslh MRLMDILEILYYKKGKEFGILEKKMKEIFNETGVSLEPVNSELIGRIFLKISVLEEGEEVPSFAIKALTPKENAVDLPLGDWTDLKNVFVEEIDYLDSYGsMKILSEKNWYKIYVPYSSVKKKNRNELVEEFMKYFFESKGWNPGEYTFSVQEIDNLF 0 1 1 1 +12809 PF12968 DUF3856 Domain of Unknown Function (DUF3856) Ellrott K kellrott JCSG structure PDB:2HR2 Domain TPR-like protein. The 2hr2 structure belongs to the SCOP all alpha class, TPR-like superfamily, CT2138-like family. A DALI search gives hits with the putative peptidyl-prolyl isomerase 2fbn (Z=16), the SGTA protein (Z=16), the PLCR protein 2qfc (Z=16), a putative FK506-binding protein (PDB:1qz2-A; DALI Z-score 15.3; RMSD 2.9; 16% sequence identity within 132 superimposed residues), and with the tetratricopeptide repeats of the protein phosphatase 5 (PDB:2bug; DALI Z-score 15.1; RMSD 2.5; 19% sequence identity within 117 superimposed residues). 27.00 27.00 27.00 28.70 26.90 26.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.75 0.71 -4.37 10 11 2012-10-11 20:01:03 2010-05-20 00:48:56 2 1 11 6 10 16 0 144.00 56 82.41 CHANGED KPL+EVAtAYhALS-AE+pLp-Gta-EAAtSsR+AM-hSRTIPsEEAFDHsGFDAhCHAuLSuAhutLGRY-EuLpSA-cAL+YFNRRGELpQDEGKLWIAAVFSRAlAL-ulGRp-EAlptFRhAGEMIAERKGEhsGKEpLh ....................KPL+EVAtAYhALS-Ap+pLpsGta-EAAtssR+AM-hSRTlPsEEAFDHsGFDAhCHAuLusAhutLGca-EuLpSA-cAL+YFNRRGELpQDEGKLWIuAVaSRAlAL-ulGRt-EAlttF+hAsEMIsERKGEhsGKEph.h.... 0 1 1 7 +12810 PF12969 DUF3857 Domain of Unknown Function with PDB structure (DUF3857) Ellrott K kellrott JCSG structure PDB:3KD4 Family This family is based on the first domain of the PDB structure PDB:3KD4(residues 1-228). It is structurally similar to domains in other hydrolases, eg. M1 family aminopeptidase (3ebi, Z=10, rmsd 3.6A for 152 CA, seq id 12%), despite lack of any significant sequence similarity. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.74 0.71 -4.68 94 529 2010-05-20 21:30:11 2010-05-20 22:30:11 2 34 361 2 180 535 31 170.10 15 23.93 CHANGED plp.ssGshph.thphshplhsppGlcph.uphpl..sa...ssphpp..lplhpuplhpssGp..h.clsss..t.hhphpt.tshtt.hasstpphh.lshsslpsGs.hlchpaph...............psps..shhsstasshhthpht............PstchphplphPsshslphpth....ts.stsp.pppsst.pha...paphcpl.shh..Es ...........................................................................................tst.s..hhphtlplhspsulcph..uphpl..........a........stphpp..........lplhpsps.hp...s..sGp...h..p..plsss...p..htshpt.t.st..stshasst.....ct....hs.l....shPslcsGs...h..lchp.aph...............ptpp...shh.sha..h.t.h.h..h...hp..t............PhtptphplphPtp..hslphphh.......s.st.p...p......tsst..phh..phphpph........t............................................................. 0 81 124 159 +12811 PF12970 DUF3858 Domain of Unknown Function with PDB structure (DUF3858) Ellrott K kellrott JCSG structure PDB:3KD4 Family This family is based on the third domain of the PDB structure 3KD4(residues 410-525). It is structurally similar to part of neuropilin-2 (Z=4.6, rmsd 3.6A for 83 CA, 7% seq id). This domain and the second domain appears to be part of peptide-n-glycanase (1x3w, 2g9f). 27.00 27.00 27.60 32.50 26.80 26.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.32 0.71 -10.45 0.71 -4.31 6 27 2010-05-20 21:56:00 2010-05-20 22:56:00 2 3 27 2 6 31 0 115.80 41 20.73 CHANGED YthhsLPpA+sGhAuhs..hshlNocRssNLLLPtLsDEsYTYhVcssssMcssTssppKcIsNsVGplslTV+ppucchcVsRoL+LpKQhITPAEYssa+pLMoEWtDssspoLLF ...............YhlhsLPstcsGlus.t..hsthNScRosNLLLPuLs-EsaTYsVssspGMc.sTsshcKcIsNslGplslolpspusphcVhRoLcLpKQhITPu..-YssaRpLhsEWtsscspoLla.............. 0 3 5 6 +12812 PF12971 NAGLU_N Alpha-N-acetylglucosaminidase (NAGLU) N-terminal domain Moxon SJ, Bateman A agb Pfam-B_6295 (release 7.7) Domain Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate [1]. Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations [2]. The structure shows that the enzyme is composed of three domains. This N-terminal domain has an alpha-beta fold [3]. 25.00 25.00 27.20 25.40 23.40 23.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.93 0.72 -4.31 33 349 2010-05-23 13:20:51 2010-05-23 14:20:51 2 28 248 5 145 351 5 87.10 28 10.19 CHANGED ptsuspsllpRllstcupp.Fphclhss.....stDtFpl..ss.......ssuc...lhlpGsssl...ulusGLpaYLKahC......tscloW..Gsp..lcL.....PpsLPhl ............................ssspsllpRllstpspp...Fphp....lhps.t.........spDhFpl.ss..............pss+...lhlpGsssl...ulAsG........LpaYLK.as......tsploW....s.usp..lpl.......PtsLP................ 0 53 90 122 +12813 PF12972 NAGLU_C Alpha-N-acetylglucosaminidase (NAGLU) C-terminal domain Moxon SJ, Bateman A agb Pfam-B_6295 (release 7.7) Domain Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate [1]. Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations [2]. The structure shows that the enzyme is composed of three domains. This C-terminal domain has an all alpha helical fold [3]. 25.00 25.00 27.20 26.60 21.70 20.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.04 0.70 -5.01 34 392 2010-05-23 13:23:40 2010-05-23 14:23:40 2 27 266 5 168 383 11 256.40 28 31.60 CHANGED pWlppYucpRY.....Gt.pstplppAWphLhpolYssst.ssttps....pslhstRPsls.........................................spsphhYsspclhpAhclLlpsus....phpssssacaDLlDlsRQsLu.ptupphhhphhpAYppcDhtphpttuschl.pLlpshDcLLuocspFhLGpWlps.A+uhu...........ssstE..................+chYEaNARs.lThW.....uspGslh.DYAs+pWuGLlpsaYh.RWphahstlppslpts..............pshsttpa..thh.phEhtW.sp....sschass.....pstG-slphupp...lhp+h .....................pWlppYsppRY......Gs...ts....tphtpAWplLhpolYssst.ttttps.......cslhstRPs.hs............................................tpsp.h.h.Ysssc.lhcAhclh.lpsss....phpss...ssacaDLlDlsRQsLu.phspthhpphh...puappp...............c......h..p.............t.h..ptt..upphl.cLl.shDplLuopppFhLGpWlpp.ARshu............s.stp-.........................cc..hYEhNARs.lThW........usp..G...s.lp..DYAs......+pWuGLl.......p.....cYYh.RW.phahstltpslpt.s..........................t.hsttp.a...thh.th.EptW..sp....sp.phass......psts.sslphupp.lh..................................... 0 58 102 140 +12814 PF12973 Cupin_7 ChrR Cupin-like domain Bateman A agb JCSG target 403193 Domain Members of this family are part of the cupin superfamily. This family includes the transcriptional activator ChrR. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.93 0.72 -4.14 88 1076 2012-10-10 13:59:34 2010-05-23 15:59:43 2 8 611 11 370 1097 835 89.10 27 55.05 CHANGED sWhsSPhsGVpRt.Lc.Rhu.s.E.u.+sTolVRasPGopFssHsHsGGEEIhVL-GsFsDEp.GcY..PuGoYlRNPsGSpHsPa.S.cpG..CsIhVKLt ................................................hsGlthh.Lp....t.s..t...p.......tssLl+hs.s.G...s...p...h..s...p...H...p...H..tG..sEp.h.h.V.L..cG....s....F.....p....D.c.....p......G.....c....a.........ssG..salh.ps.s.s.s...p.H.s.P.h...s..p..p..G...slhh............................................ 0 92 205 288 +12815 PF12974 Phosphonate-bd ABC transporter, phosphonate, periplasmic substrate-binding protein Bateman A agb JCSG target 416811 Family This is a family of periplasmic proteins which are part of the transport system for alkylphosphonate uptake. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.33 0.70 -4.99 178 3450 2012-10-03 15:33:52 2010-05-23 16:08:34 2 64 2087 10 931 7437 1283 236.80 20 68.71 CHANGED hullPt.p.sssphhpp.....apslhshL...scpl......G.h...p...lchhssssasshhpsht.sGplDluahs.sh..shlh..h..p..pp.s.s..h.pslsph............ht......pG...p...s.........s...................apul..llspp.-.u..s..l...............................p.......sl............................p..DL.......cGcpluhssssSsoGhlhPthhL..............pp...t.G.l...........s.....p...........p.......h........h.p................h.h..a...s....u..sH-ssh.h.uVh.pGpsDuus.................sts............t............sh..pphh...................pps.....h...............ppl+l...........l...........h...........p.o.s...h.Pst.......s.lss.p.ss....ls....sp....h....ppplppsllshs..p.s................................sps.pphlpsh....s...h.....s...uFhsss.spsY ............................................................................................................................................................................hhs.p..s.tp.h.tp.....hp.s.hhph..l....p.cpl...........G..h......p....lp.h..h.h....s..s.s..a....s...s.h......h...p.u..ht...ts.p....l.D....l..u......h..h...s......s...h....s.h.s..........s....t......pp...s...s.......s...ps.l...spt............................ht........ts.....p.....s.............s..................................................a..p...S...h......l..l...sp.p....-....S....s...l......................................p............sl............................p...DL.........+G.+...p...h..u..h...s.s...s.s.Ssu...G..a..lh.Pt.h.h..L.........................................t.c...t..G...l........................s....sp.................................................p.........................h..p........................................................t.s...h..........s....u...u.....H......-.......s....s.l..h.s.lh.s.Gp........s........D.......s..us...........................................ss.s...........................................s....s.h..pp.ht...........................................................pp.t....h...................pcl.+.l...........l................................................h.................................p..o.....s.......h.....h.....P.s.........................s...h.s.s...p...ts.......l..s.............tp........h........tp..plpp..sh.hshst..s.......................................pt.tth.h.p.h.....t...h......t...th...p.......................................................................................................................................................................................................... 0 290 614 788 +12816 PF12975 DUF3859 Domain of unknown function (DUF3859) Bateman A agb JCSG target 416836 Domain This short domain is functionally uncharacterised. 21.00 21.00 21.00 21.80 20.90 20.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.61 0.71 -4.46 33 110 2010-05-23 15:32:20 2010-05-23 16:32:20 2 7 98 2 33 109 13 130.10 29 51.56 CHANGED hsthpshlshhpaGIasphsscspphsp..................hpptTphVPAclslpFGhhh.slpcucspp...............lphplhHPtI...hsspGpshpsasuplaspss-hshahh-ph..acsls......GpWph.plthcscllA-KoFpV ............................h.p.hlshhpaGlasphspc.p.h.p..................hhp.TspVPAclshcFGhhh..slp+s.cGpp..................lphhhhHPsl.....spcGphhpsapsplhssssDhthYhhsp...a-sls........GcWRh.tlhhs.scllA-KoFpV... 0 5 14 24 +12817 PF12976 DUF3860 Domain of Unknown Function with PDB structure (DUF3860) Ellrott K kellrott JCSG structure PDB:2OD5 Family A protein family created to cover PDB:2OD5. 2OD5 is a hypothetical protein (JCVI_PEP_1096688149193) from an environmental metagenome (unidentified marine microbe). 27.00 27.00 32.50 138.50 21.90 19.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.98 0.72 -4.19 2 2 2010-05-25 22:37:37 2010-05-25 23:37:37 2 1 1 0 0 5 106 88.00 26 100.00 CHANGED McoppLRphI+paLsER..tNThEI.talsppMc.sosPpplsNlLptDcsIl+luTs++uGhh.sch.Is.Ws....lR....sta.-tcp. McoppLRphI+paLsER..tNThEI.talsppMc.sosPpplsNlLptDcsIl+luTs++uGhh.sch.Is.Ws....lR....sta.-tcp. 0 0 0 0 +12818 PF12977 DUF3861 Domain of Unknown Function with PDB structure (DUF3861) Ellrott K kellrott JCSG structure PDB:3CJL Family The 3cjl structure is likely a representative of a new fold with some resemblance to 3-helical bundle folds such as the serum albumin-like fold of SCOP. No significant hits reported by a Dali search. This protein is the first structural representative of a small (about 60 proteins) family of proteins that are found among proteo- and enterobacteria (REF http://www.topsan.org/Proteins/JCSG/3CJL). 27.00 27.00 30.10 30.40 24.00 20.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.99 0.72 -3.88 13 132 2010-05-27 00:12:12 2010-05-27 01:12:12 2 1 130 2 37 96 3 95.10 40 92.80 CHANGED pppYRITlEpLsstpspstc...sLpFEhpsHDDlFsIVE+lKp+psh-..sppuspFuVGLKLhGEVMhpsRKHPLFt-FtPpFtsFMpsLKsts........p ............a+YRITlp.LpctcGcshsps.slpFEspNHDDIhpIlE+Lcs+csls..p-posuFuVGLKLFoElMhcsRc..HP..LFpshtstFppFMppLKpts.sp...... 0 6 20 28 +12819 PF12978 DUF3862 Domain of Unknown Function with PDB structure (DUF3862) Ellrott K kellrott JCSG structure PDB:3D4E Family PDB:3D4E shared structural similarity to beta-lactamase inhibitory proteins (BLIP) which already include 1XXM, 1S0W, 1JTG, 2G2U, 2G2W, 2B5R, and 3due. All of structures are involved in beta-lactamase inhibitor complex. (REF http://www.topsan.org/Proteins/JCSG/3d4e) 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.76 0.71 -4.59 9 255 2012-10-01 23:09:26 2010-05-27 22:46:43 2 2 218 1 31 175 0 130.40 26 74.29 CHANGED ppp-lRtpF-KIKlushpspFcGGoSl--LcplFG.cPspppppsAGsVpLcsYTWshDsVolslp.LhpNSolsKoIoNFpFs.RD.ploLK-YsslpcGhoYc-VschLGEPDshSpAsSS-cpplQAlWlSGlKuc.spuusloLsFENstLosKoQssL .....................................................t........................p..usp.tplhthhG.p.tpp..........t..........W............s.t........s.....h.h..h.pst..s.....h.pt..s..h...t......+ptp..lshp.passl.........ppGh.oY..ccVpchlG.-P.-..s.h..spssh...s..s.ppphph.lahpshpus....sushsloF.pssplpsKsQ.sL............................. 0 10 20 27 +12820 PF12979 DUF3863 Domain of Unknown Function with PDB structure (DUF3863) Ellrott K kellrott JCSG structure PDB:3LM3 Domain Domain based on 1-364 domain of PDB:3LM3 which is encoded by the BDI_3119 gene from Parabacteroides distasonis atcc 8503. 27.00 27.00 27.80 198.70 26.60 25.10 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.12 0.70 -5.85 3 9 2010-05-27 23:42:17 2010-05-28 00:42:17 2 2 9 1 4 10 0 350.70 77 74.19 CHANGED oslTL-GNRFVTLCIMIRTTPWEVSRDVKLHPRDEsSWHTLEGVRALREAFAKNNP-GRLTWGFTLNALEDpR-NY+QIR-YVVECQKKYGDEVSYFPGYFPAMYLPRERVNREMSEAIQLISKMVGNGYRPQSIMGGFLSADNLRYLAEKENIHVAHAVIWSQHNIDGGGADGSPSYPFYPSTEHFCKPAQGKSDFIDCVNLDGWTMDFICARRSGAMGHGIEGYNSRRGVGPIETYTGWGLDLGpREVMHTQAIHFDKGlELNGFGWVTNIWEAQMVHEFGQEFICKAMETWVTETKERWPDT+FVTFGEFGMLWRcpHKoN-DWNYRFVERGSGLGDSYNNLEIKWFMN ..EsLTl-GNRFVTLCIMIRTTPWEVSRDVKLHPRDEssWHTLEGVRALREAFAoNNPNGRLTWGFThNALEDGRcNYR-IRDYVVECQKKYGDEVTYFPG.YFPAMYLPRERVNREMSEAI-IISKMVGNGYRP.QSIMGGFLSADNLRYLAEKENIHVAHAVIWSQHNIDGGGADGSPSYPFYPSTEHFCKPAQGKSDFIDCVNLDGWTMDFICARRSGtoGHGI-GYNSRRGVGPIETY+GWGLDLGHREVMHTpAIHF.DKGlELNGFGWVsNIWEAQMV.HEFGKD.LICDAMchWVTGTKERWPDTHFVTFGEFG-LWRcQaKoN-DWNYRFVERGSGLGDSYNNLEIKWFMN. 0 1 4 4 +12821 PF12980 DUF3864 Domain of Unknown Function with PDB structure (DUF3864) Ellrott K kellrott JCSG structure PDB:3LM3 Domain Domain based on 366-449 domain of PDB:3LM3 which is encoded by the BDI_3119 gene from Parabacteroides distasonis atcc 8503. 27.00 27.00 106.30 105.40 25.20 24.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.62 0.72 -3.78 3 8 2010-05-27 23:42:58 2010-05-28 00:42:58 2 1 8 1 3 9 0 81.60 80 17.10 CHANGED RLALLRDWHpKNAPAYVIDFTRYDLKA+EPADPSPcKPAKDWSLINVINQKGLRPQDKPVLLscL-sEcQ-LIRKYYPELFK RLALLRDWHTKNSPAYVIDFTRYDL.AHEPADPSPpKPAKDWSLINKINQKGLRPQDKPVLIDKLEKEDQDLIRKYYPELFK. 0 1 3 3 +12822 PF12981 DUF3865 Domain of Unknown Function with PDB structure (DUF3865) Ellrott K kellrott JCSG structure PDB:3B5P Family Family based of PDB:3B5P encoded by ZP_00108531 from nitrogen-fixing cyanobacterium Nostoc punctiforme pcc 73102 is a CADD-like protein of unknown function. Superposition between protein structures encoded by CT610 from Chlamydia trachomatis (PDB code 1rwc), pyrroloquinolinquinone synthase C (PqqC, PDB code 1otv) and ZP_00108531 revealed that putative active sites in CT610 and ZP_00108531 are identical. ( REF: http://www.topsan.org/Proteins/JCSG/3B5P). 31.60 30.90 31.60 30.90 28.60 27.20 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.70 0.70 -4.57 3 20 2012-10-02 21:56:19 2010-05-28 01:00:09 2 2 12 2 4 21 2 140.20 31 89.32 CHANGED hK+lScpLsphLspDaluFSlNNNPll..SpISstSFuQhh.VhpQYSlFPKplluhh.hAthph.htsWsGVuEELLpNlNEEMGushGtIo.....HYTlLR+uLc-ulGlsVsNshPSVATppFlcoVcuLl-+ps.-hVLGusYAlEooAIPELhLl+ELV...tEuApcKcLsappohl.pFF-hHLD-lElEH+DcLtshluuYIpsEE.......QatEFt-GFpAsIDsM-sWWo-LspEth ......................................................................................................................................s.lstEl.tNh....EE.s.....................Hh.hhtpuh.p..h...t....hs...lpshh..P...uTpthh.pl.tlh.hps..phshGshYAhEstul.Eh.lhhcls...tchs..ct..ht........F.aphHl.DthE.tHpstL......tp.hsthlt.tp.............hhtGhhshlshhpsaWptL.....s... 0 2 2 4 +12823 PF12982 DUF3866 Protein of unknown function (DUF3866) Bateman A agb Jackhmmer:Q18B83 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 352 and 374 amino acids in length. 25.00 25.00 112.90 28.00 22.90 22.40 hmmbuild --amino -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.92 0.70 -5.46 30 119 2010-05-28 08:24:37 2010-05-28 09:24:37 2 3 117 0 62 121 2 317.00 43 87.68 CHANGED tRAlsYspLsGpscsGDcVlLNsTAlthsLGTGGYcaVlus.sphs.s........ssGHIMKhRYTPhQhpVLusEEpcSPaHchhpssc.oLcGhPVlVu-LHShLsPlssul..+pts.....Ps...........hRluYlMTDGGALPltaSpsVtpL+cpGhlsuTlTsGpAFGGDhEAlNlaouLlAA+pllcADlllVu.GPGllGTGT+aGFSGlptGchlsAlssLGG+PlulsRlShADsRtRHpGlSHHolTslsclsLsssplslPhh.tt-.............ttshlpcQlpsh...sp+Hp....ll..hl.sssthtpsLpphslploTMGRuhp-DssaFlAsuAAuh .......................+AlsYsplsGpspsGDcVlLNsTAlthsLGTGGYshVl..us.sths....-ss...................ssGH.lhKhRYTPhQhtVLus-EppSPaHshhpp.sc...sLcGhPVlVu-LHShLssls.uul.+........p...t.s.....Ps...........h+lsYlMTDGGALPlhaS+sVttL+cpGhlsu.TlTsGpAFGGDhEuVslaouLluA+cVlcADlslVu.GPG.lGTGT+aGFSGlptuchlsAlstLGG+Pluh.RlShuDtRpRHpGlSHHolTshsclshsssslsl.Ph.h.ss-...................htthlppQlpt.......tp+....H.p..ll..hl...ss....sth....tp...h...L....c....p....h....s.lploTMGRuh-pDssaFlAAuAAu.h.................. 0 36 54 59 +12824 PF12983 DUF3867 Protein of unknown function (DUF3867) Bateman A agb Jackhmmer:Q18BR7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 190 amino acids in length. 27.00 27.00 131.90 131.60 22.60 22.60 hmmbuild --amino -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.29 0.71 -4.31 10 47 2010-05-28 12:03:52 2010-05-28 13:03:52 2 1 46 0 9 31 0 187.50 55 98.74 CHANGED D+IIDFNELKNKVKDKDlDKFEsYIYSLYYclApGKLoMuDFS+cIhcYMEENNISQEKFhNIQKKhhER....YGhDsu-IEcQhKshGlDssshu...psssY...........................Esl+KohuFQEKYKs+IpsKosopYaIKN-KNDlclllEpEcVlLpSsKKIDLsDsELNEFLCSYKKllcDKKLcIslCEslKpY-Y .D+IIDFNELKNKA+DKDlDKFEpYlYsLYaSV.spGphoMu-FS+cIhcYMc-NNISQEKFlNIQKchhER....YGhD.....hp-lEcQhKshG..lDhsslG...psssY...........................Esl+Ks..lSFpEKYpuKlpsKslopYaIKN-KNDlclll-sEclhLpSsKKIDL.pDoELNEFLCSYKKhhpsKpLpIslCEslspYsY..... 0 5 9 9 +12825 PF12984 DUF3868 Domain of unknown function, B. Theta Gene description (DUF3868) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_1065 Family Based on Bacteroides thetaiotaomicron gene BT_1065, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.40 27.30 26.60 26.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.45 0.71 -4.57 11 235 2010-06-17 22:02:38 2010-06-17 23:02:38 2 7 74 0 30 212 2 109.50 21 24.44 CHANGED +phhhlhhhLhlhsh...sh.AQst...apGtlslssspLcQcG-olhlshslslpslplcSppulslsPhLhS....uspphpLPplllpG+pchKsYcRplAltstpccspath......llht ........................................h.....hhh.h.lhhhsh.......sstAQph.....tstlplssh.p.l.pp..pGc.p..lhlshslsl.s.s.l.plsoscslhLoPlLts.........uscpht..LPslhlsG+p.+.phh..p..Rphuh..ptp................th.h...................... 0 7 25 30 +12826 PF12985 DUF3869 Domain of unknown function (DUF3869) Ellrott K, Bakolitsa C kellrott JCSG structure PDB:3KOG Family A family based on the N-terminal domain of 3KOG, which shows weak but consistent remote homology with adhesive families such as immunoglobulins and cadherins, suggesting it might form an attachment module. 20.40 20.40 22.30 28.10 20.20 18.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.42 0.72 -3.60 10 29 2010-06-17 22:50:21 2010-06-17 23:50:21 2 2 22 2 4 27 2 110.80 32 32.71 CHANGED shFTSC.EKEEFNVs.lcsssApATIusTVhDhsoG..sslT............oussTluuuu.....ssplAupotshsA..ostsYho.uospVplPALscGQaAslsVoIhLQchtuAscsss ..shFTSC.EKEE....h..sl.....s..spsssAphhIssoVhDssTG..pslo............oApsTlusuuh........hssplA.tpu.h...slss...ssssYhs.sstsVplstlpsGQhushsVslhLpp.tsst..s.......... 0 0 3 4 +12827 PF12986 DUF3870 Domain of unknown function (DUF3870) Ellrott K, Bakolitsa C kellrott JCSG structure PDB:3KOG Family A family based on the C-terminal domain of 3KOG which shows structural similarity to pore-forming proteins [1][2], suggesting it may have a lytic function. 22.00 22.00 23.00 28.10 21.10 17.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.17 0.72 -3.74 19 200 2010-06-17 22:53:45 2010-06-17 23:53:45 2 2 158 1 42 130 0 94.70 40 81.03 CHANGED lYIlG-A+os.sNsITcpYphFFIuFllcccsscIl..Dh-souTlpLTpsFl+plFlG+shhc.p-tlhp.ElcpRYaGSSQKAllVAY+cAhpKYpp ................YlsGcAKsPpsNsITchacoaaluhlls+cTGcIl..DA-CosoltLTppFV+pLFls+slpD..stLlt.ElcsRYFGSSQKALlsAlKDAp++Yp.... 1 18 33 35 +12828 PF12987 DUF3871 Domain of unknown function, B. Theta Gene description (DUF3871) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2984 Family Based on Bacteroides thetaiotaomicron gene BT_2984, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 34.00 33.90 20.90 19.80 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.83 0.70 -5.73 13 110 2010-06-17 23:00:13 2010-06-18 00:00:13 2 1 53 0 17 109 3 294.40 52 93.83 CHANGED opNhG.EaAEEAsII..psspspcpspFIEANTpElTLpHLKNDCIlPVFAKDNElTISHpsFIEsVa-AAsoFFuGEpIspP-IRVSHlIKGRIPEAIpKsAspLLEuDKThYYERhAFsI-IPTIYETVsGN+LsLoIGGVRAYNchNLY.SKKusE+FKlFIGF+spVCoNhCloTDGYpsslEVoNopELYpulLELFpsYNPAKH..lHLMQoLusThloE+QFsQlLGRMRLYQsLPpuhQKplP+hLlTDoQINsVAKAYlsD-NFGuh..Gs-lSMWchYNLLTGANKSSYIDSFLDRulNATElusGIssAL.+G.D-+..YpWFl ...........................p.hE-AsIl..ps.t...ppps.FIEANTpElTlpaLcp-CIlPVFuKDNEhTISH.sFI-sV.-AApsaFsGEplppP-IRVSHlIKGRlPpAlpK.sspLLEsDKTIYYERhAFshcIPTIhEslpGN+LsLoIGGVRAYNc.NLY.SKKus.E+FKlFIGFpspVCsNhCl.oTDGapsplcVhssp-LYpusL-LFppYNsAKc..l+LMpsLuso.hoEpQFsQllGRhRLYQsLP.u.pKp..lP+hLlTDoQINsVA+uYhsDcNFush..ssslSMWchYNLLTGANKS.SYIDoFL-RulNATElusGIspAL.pG.Ds.c..YpWFl....... 0 4 16 17 +12829 PF12988 DUF3872 Domain of unknown function, B. Theta Gene description (DUF3872) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2593 Family Based on Bacteroides thetaiotaomicron gene BT_2593, a conserved protein found in a conjugate transposon. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 29.40 32.30 26.30 23.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.61 0.71 -4.63 21 300 2010-06-17 23:03:44 2010-06-18 00:03:44 2 1 109 2 24 255 3 133.40 51 87.93 CHANGED hlsplhshss.lshuhlsloAC..sc-LDlQQsYPFoVETMPVsKcIspGETsEIRCpLpR-GcFssTpYTIRYFQPDGcGpL+h-cGTVhhPNDRYPLscEpFRLYYTStS.s-pQoIDlYlEDsa..GphpQloFsFNNcs ........l...phhsh.sh.lshsshsLsuC..-c-.LDlQQuYPFoVEoMPV.cclspGpTsEIRCpLKRpGcFssTtYTIRYFQ.DGcGpL......+h.DsGhsFhPN...........DRY.Lp...........c...........-.pFRLYYTStS.s-pQslcVaVEDNF..GphhpLsFsFNNc....... 0 7 20 24 +12830 PF12989 DUF3873 Domain of unknown function, B. Theta Gene description (DUF3873) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2286 Family Based on Bacteroides thetaiotaomicron gene BT_2286, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.60 27.30 26.50 26.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.39 0.72 -3.85 9 114 2010-06-17 23:08:19 2010-06-18 00:08:19 2 2 63 0 6 83 0 67.20 61 90.38 CHANGED tsphshNGVSlCpssGcEKYpKaphslt.t+tpphYQYDYRcs.ssELFSCVusTL-ECRcpRDcWLppKp .....ppMTlNGVSTCppuGpEKYE+FQ.GlG.RR+RThlQYDYRHs.DGELFSCVKPTLDECRptRDcWLstKp......... 0 3 5 6 +12831 PF12990 DUF3874 Domain of unknonw function from B. Theta Gene description (DUF3874) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4228 Family Based on Bacteroides thetaiotaomicron gene BT_4228, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.00 27.20 25.90 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.35 0.72 -3.86 12 222 2010-06-17 23:12:49 2010-06-18 00:12:49 2 4 61 0 19 204 0 72.40 34 14.28 CHANGED Y+hss.EEVhtsCFR..hstt.sEcsh.LSAusIFphLpctpPAAMRGssshphuplLhuhGlcRpHTcaGNVYpV ........hsshEplFhphFR..sApp...tE-uchLoss-IhphLp+pssh.s.hps.s.plspFGRlLpph.Glpp+HTppGslYpV.......... 0 9 18 19 +12832 PF12991 DUF3875 Domain of unknown function, B. Theta Gene description (DUF3875) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4769 Family Based on Bacteroides thetaiotaomicron gene BT_4769, a conserved protein found in a conjugate transposon. As seem in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.50 27.20 25.40 26.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.69 0.72 -4.28 17 294 2010-06-17 23:17:18 2010-06-18 00:17:18 2 4 123 0 37 265 8 53.20 67 7.11 CHANGED RNlhKssTLESKFPLLAVEpsCIlSKDADlTVuFcV-LPELFTVTuuEYEAhHS ...........RNh.KhoTLEsKFPLLuV..E.p..GCIlSKDADITVAFcVELPELFTVTuuEYEAIHu... 0 13 32 37 +12833 PF12992 DUF3876 Domain of unknown function, B. Theta Gene description (DUF3876) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0092 Family Based on Bacteroides thetaiotaomicron gene BT_0092, a conserved protein found in a conjugate transposon. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 28.00 28.00 25.40 24.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.09 0.72 -4.05 19 215 2010-06-17 23:20:33 2010-06-18 00:20:33 2 1 93 0 21 156 1 93.10 42 80.70 CHANGED hpsspp..shchcplsGsWcSlst+PsVhla+-ucpYplolhthsthstphpPpTY.l.p.ccsGshFIsTGa.Rl.lsYcptpDlLohSstG...-YlRs ...........h..oCp..sspch-tlsGsWcSVp.G.+.PsV.hIY+-....Gcs....Y+VTlhp+SshpRph+PcTY.l.p..E-sGsLFhsTGa.RlsluYscusDlLohSPsG...DYlR..................... 0 6 14 19 +12834 PF12993 DUF3877 Domain of unknown function, E. rectale Gene description (DUF3877) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_0237 Family Based on Eubacterium rectale gene EUBREC_0237. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 129.70 129.40 26.10 25.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.23 0.71 -4.71 14 33 2010-06-18 21:49:19 2010-06-18 22:49:19 2 2 33 0 4 31 2 175.20 45 90.95 CHANGED acRLE+sLIDlIKEEQAKLGYRKEpIRLYYPLSSLNHFFpscsss-cMpctLpp............Fschhcp+LGclpVopKs-RFCFpIPEpGu-YVHEphptNEFI+-LIELlu+HGCTh--IhsLF+paS-plhhEcMsNGEFDhhIpFpp-s-DsYYYCFKDEGCHIIYHRFLPEDYsDFGF ..ap+LE+slIDlIKEEQhKLGYc+EsIRLYYPloSLN+Fhps..-s..s.tccMpptLpp............FscphpppLG-lclo.pKs-RFChpIPtcGupYVHEpsscsEFIK-Llplluc.HGCThE-lhclFc....pYS-.plhh....EchpsG.EFDhllpFp-ss.DsYYYCh+DEGCHlIYHRFLPEDYtDFsF... 0 3 4 4 +12835 PF12994 DUF3878 Domain of unknown function, E. rectale Gene description (DUF3878) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_0973 Family Based on Eubacterium rectale gene EUBREC_0973. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737). it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 63.70 63.50 20.50 19.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.05 0.70 -5.24 3 29 2010-06-18 22:00:08 2010-06-18 23:00:08 2 1 28 0 2 27 1 283.00 34 86.14 CHANGED pISRAFphLAElFEYQVFELCVuE-....pYlIPYMMNDAVECYLol+GsploGcYp+D-ElEsuupLttcEcRYGLIVHQGEENVFTLWFDELcEHpsCF+YHEIGHFWV-GQEQWRQLVYMIGTItEKYpYLGEEYCNEsEptIMSLIEFAPFRhWoPVsEDLEEa.YPAThEGIDCMEcLARcAuD+DYLKWIc+YR+aPo++hE+LLuR+LtDPKRQcLYEhICccVpsAS-sYPpRNYGEcINpRIQ+KRcpl-KKLLEcGFTGpYPpYpKKphoVsVTEEHPFTluhLEaEDFKFKlQ ................................sh.hLtplh-.p.FE.lh...h.s-p................phhls..YhMNDAVEsaLsFc..su+hoGpYpp..-h........E...sthtApl...ptp......-s..tYsLlV+Q......t-.sVh.T...laFcclp.chphYpYtcIGHFWVcGhE.hRpL.YhluhlpDKacYLGcchCs-pEtcLhpLh-FsPhp..h..YssVsEp.........c-.....asso..t..EulshMpclAtcssDcshl+hLchYc+pPs+hhp+hluthLpcs++tplhchlhcclppAopsYspRsaG...pp...hc...p+hp...c...h...tcphcp...cLhc...pGhpsh.................................................................... 0 1 2 2 +12836 PF12995 DUF3879 Domain of unknown function, E. rectale Gene description (DUF3879) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_1343 Family Based on Eubacterium rectale gene EUBREC_1343. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 27.90 32.60 25.00 26.70 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.20 0.71 -4.70 3 33 2010-06-18 22:08:48 2010-06-18 23:08:48 2 1 13 0 7 33 6 165.70 48 87.36 CHANGED plNSSSVQcQLKAAGIDTNSKQYKAAlSEMM+sGNGAMYTNIQAIKNLMS+YDKDGDWINPsTGLAGLLVTDENcNS+KRIISIPESS+EEMFELTKKEFLpENGVpNGDTTKRo-VYNNLYRKM-KcDRLAAGYTLEcYERQYRQAFsDAAKsADPsWEAGKPIhAGALDcITRESAEoG.........RKSs- ................................sGIsTNSKpYKA.........s.........l..pp...........MMpstpthhapsh...tu.....IKNLMpQaDpsGDhlss.sGlsGhsVTscs.sSap+IlSlsEs.+pcMF-.sK+EFlpENGh.NGDTTKRpslapsh.hphsK-cRLuAGWTLEQYEtQYRpAhhtAsKsAsPsW+sGpshssuhLDslTRESsEus....................... 0 4 7 7 +12837 PF12996 DUF3880 DUF based on E. rectale Gene description (DUF3880) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_3218 Family Based on Eubacterium rectale gene EUBREC_3218. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), It appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 21.70 21.70 21.70 21.80 21.20 21.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.75 0.72 -4.07 48 247 2010-06-18 22:21:15 2010-06-18 23:21:15 2 10 156 0 76 245 24 76.70 27 17.81 CHANGED WhsDsPhhtlas.h.sl.hsshN.hIFhFDpsphppa+shGhpplaaLPLAssssRhs.hhtp..................t.ppapsDlSFVGs..hYpp ...............................WhsDsPhah..hs...h.....pl..hs..hs.hlFThDt.s.slpha+.s.h.GhppVa.aLP.LAsssph.apPhhtp.....................ppaps-lsFlGssa.................... 1 35 64 67 +12838 PF12997 DUF3881 Domain of unknown function, E. rectale Gene description (DUF3881) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_3695 Family Based on Eubacterium rectale gene EUBREC_3695. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 106.10 105.20 21.80 20.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.72 0.70 -5.40 28 81 2010-06-18 22:31:02 2010-06-18 23:31:02 2 2 75 0 8 77 13 258.60 41 95.97 CHANGED LRAIGFSslpp+c-lcclLcplhcphsppphl..p.pcspcasEhp+-ausshGIsVpG-hDc.....sspFch-YYaPYapGssloopt-lslE++s-+EuYsGlCDDh+lGloLIFYLQNsh-Yhcp+....................thsphshphpulsLSGLuspGpILLPlpKscpppcp....pcppscs.RppLlsAA+sGDpsAIEoLTl-DIDhYStlSRRlhsEDlaSIVDThFMPaGlECDpYSllGEIl-lcpppNphTsEplY.hcL-CN-lpFclCINcpDLlGEPtVGRRFKGpIWLQG ............L+ulGFuplpp+p-lcclLcpl..c.p.hsp.pphl..p.pc....tpphsEhpp-h........usshGIslhG..ch-c.....pspFph-YYaPYhpusslooht-sslE++h-+EsYsGls-Dh+lGloLIFaLpNshEYhppc.....................htt..hpspulsLoGLuhpGpILLPlpKsppphcp....p.pptscsRppLlpAA+sGDpsAhEoLTl-DhDhYSplS+Rlh..pEDlaSIV-ohFMPhGlECDpYSllGEIhplcpppNthTtEplY.hplcsN-l.FcVsIscpDLhGEPtlGRRFKGplWhQG................ 0 6 8 8 +12839 PF12998 ING Inhibitor of growth proteins N-terminal histone-binding Wood V, Coggill P pcc Pfam-B_205 (release 24.0) Domain Histones undergo numerous post-translational modifications, including acetylation and methylation, at residues which are then probable docking sites for various chromatin remodelling complexes. Inhibitor of growth proteins (INGs) specifically bind to residues that have been thus modified. INGs carry a well-characterised C-terminal PHD-type zinc-finger domain, binding with lysine 4-tri-methylated histone H3 (H3K4me3), as well as this N-terminal domain that binds unmodified H3 tails. Although these two regions can bind histones independently, together they increase the apparent association of the ING for the H3 tail. 23.70 23.70 23.80 23.70 23.60 23.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.25 0.72 -3.76 79 827 2010-06-28 12:29:44 2010-06-28 13:29:44 2 12 279 6 538 851 2 102.20 26 28.97 CHANGED shL-cal-sl-sLPtElp+hhs.l+-lDtp...hppthppl-pphccalppstt..............................ht.p....pcpphhppIpcphpcsppht-EKlplusphh-ll-++h++LDtchcph .......................................................hl-cal-s.....l-sLPh-lp+phs.h+-lDtp..........hpshhp.p.l...-phhpca..hppspp...................................................................................t.p...pcpphhppIppshtcstchuDEKlp.lupphh-l.l-++l++LDtclth................................................. 0 154 245 401 +12840 PF12999 PRKCSH-like Glucosidase II beta subunit-like Coggill P pcc Wood V Family The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing [1]. The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum [1]. 26.30 26.30 28.10 27.00 26.20 26.20 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.49 0.71 -4.69 5 445 2010-06-28 12:42:30 2010-06-28 13:42:30 2 22 268 0 317 453 13 145.60 32 32.92 CHANGED lhlPlLuluLhlusA.lscLRGVuPDcLcLYpPD.ENGN..WKCLNcscIhLSFDQVNDDYCDCPDGSDEPGTuAC.pNG..KFYCsNcGaIPuYIPSF+VDDGVCDYclCCDGSDE..plG+CPN+CsElAcpacchpsE+Nspl+sGLKIKccllltup+KsDElpp+hcELccoLhAcppct ........................................hhhh............................hGl..t..thY.p........t.......a.pC..l.ss..s....th......lshsp..l..ND-aCDC.s.D.GSD.EPGTu.A...C....s...........ps....................tFaCp.....N...t.G..a..hs...h.....h.....lspsp..VNDGlCD...CCDGSDEh.............st....s...t..C......spCtpht.pt..pt..p..t.....p.hp...tu....ht.h.+t..hh....t.....t.h....t....t..............tp.tt.t...............tht........................................................................ 0 105 178 269 +12841 PF13000 Acatn Acetyl-coenzyme A transporter 1 Coggill P pcc Wood V Family The mouse Acatn is a 61 kDa hydrophobic protein with six to 10 transmembrane domains. It appears to promote 9-O-acetylation in gangliosides. 102.60 99.30 109.50 100.10 102.50 99.20 hmmbuild -o /dev/null HMM SEED 544 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.79 0.70 -5.99 2 464 2012-10-03 03:33:39 2010-06-28 15:33:01 2 7 252 0 348 478 3 242.70 26 72.83 CHANGED phYhLIhLYLhQGlPhGLshGolPahLKs...psSaosLuhaShAsYPYSLKllWSPIVDohYs+phGRR+oWllPs.hl.u.sLhhhuasl..........Dsahs+GsuhlpSho....TW.F.LLVFlCATQDIAVDGWuLshLs.EpLSYASTAQTlGLNhGaFhSFTIhLshsSs-FANpahRsIP.scGhIoLuGYhKF.uhhhhlholhlhF..............................aD-us...pQphusIp.ha+shhtsLpLKshRpLhhlHhluKhuF.sNEshT.LKhhE.GhppE.LulhlLIshPhtlhhGhYss+hSs.+s............................Ls.WLhGahGRlsuhlLsohlV.......+pFP........hFh.lhhpahLsu.hsTlQFlulusFHo+luDPhlGGTYMTlLNTLSNhGGoWPphlhhpMhshhTV.pC.ThP...+lhsotsuphptC.phLsGTshhhRDGYYlTsllsIhlul.lhhuhlp.hlh+LpphPISSW+h........T ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p............................................................... 0 113 181 282 +12842 PF13001 Ecm29 Proteasome stabiliser Wood V, Coggill P pcc Pfam-B_682 (release 24.0) Family The proteasome consists of two subunits, and the capacity of the proteasome to degrade protein depends crucially on the interaction between these two subunits. This interaction is affected by a wide range of factors including metabolites, such as ATP, and proteasome-associated proteins such as Ecm29. Ecm29 stabilises the interaction between the two subunits. 20.50 20.50 21.00 21.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 501 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.69 0.70 -6.01 45 308 2010-06-28 15:32:17 2010-06-28 16:32:17 2 15 231 0 217 302 1 426.60 25 27.72 CHANGED ls+V-LRl.ALA-sDccLcphLspaLsPlLLKLuSspsuVRpKVlcllpH.lNpRlp.u.tsIpLPVssLLcQacpss.....ssshV+pFsllYlppGl-RL.sssc+hpllPpllpGI...........S......ptshsptuhhFtllh.+lL..h+h..P.cssp-ppphcsp..htl.t-...p.sthLh.hhsph.hh.......t..................................ss.sssulssppsshhoh.t.....stsssh.s..pLpcsKhslLcFLsuuhh...............................s-tphhlshLlASuD............ss.ppluchu-phLK+hpss........hEsspllppLapLah...........................s.t.tptt.PspssLph+ILshLsKShtAssphspshplhptuLhos.h..ss+lcshshphh.................................spshpphssphlptlpshlhspshshh.......spsp.t......tpshplRshsYcslGhLsp+tsphh.c-......hsllpaLFcuL..cspss-ltsuIpcALuulhsuhsp..p.............................t.thphhh.lh.ttpp...........p.ttssRasAl+aANpsaPasDssuRalsllu ......................lp+l.hRl.uhA-sD.ppLpphlppaLsPll....L....KLuS.s.pt.uVRp..............K.........V...............h...pllt...H.lspRlp.s.splpLPltsLl.Qappss................sshl.ppFsl.lal..phuhsRl.sspcphplhPtllpsh............................p......p.t.hhphhh.h..hhh...+h............p....s.p..p.t....pp.pph.....hl.s....h.phhLh.hh.hh...............................................................tsth......t...shhoh............st..ssh.h....p..pL.pphK....lsllpFl.tut.h...................................................s-.phhh.hlhAu.uD............sp.pp...l.ushu-..L..K+hts.....................hp..s.sllppLaplah..............................................s...t.th.Pspstlph+llshL.+.Sh.Asp....th..st.hpll.t..slh...us.........pt.+hcthshphh................................................h...ths..hlp..ltshlhsthh.hh......................tps.phhshuYpslGhL.upphsp.h...h.ppc..............hsllt.LFpuL.........ppp.tsphthuI.ppALsthhsshtt..t........................................t..hhh.h..lh.t.............................................................................p..htsR.sul+auspshs.pch.uRal.llu.............................................................................................................................................................................................................. 0 63 112 179 +12843 PF13002 LDB19 Arrestin_N terminal like Coggill P pcc /wood V Family This is a family of proteins related to the Arrestin_N terminal family. 21.60 21.60 21.90 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.25 0.71 -4.80 5 121 2012-10-02 22:29:00 2010-06-28 16:40:07 2 3 103 0 96 133 0 181.80 37 30.53 CHANGED VpLsLlQKVHFHKPFhsshpuIQTCpNCpoKTTcl+SWDIQsNTs-LsVGoasaPFSaLIPGSlPAoooLGusuETplKYELpAsVsYhD............PctthSs.uKcplLpLsMPIsVTRSlhRGPDKNSLRVFPPTELTAsAVLPNVVYPKSoFPLEMKl-GlSpGD....RRWRMRKLoWRIEEpTRI+uHAC-pHKH-L ...................................hphhtplphpKP..............s..........hpsCtsCpsphs-LppWph....L.spst..s.LppG.p.HsaPFSaLlPGpLPuSss.......s.s.l..sp..lpYcLpAp.uph.....................ss.up...t..lphphPlslpR..ol.h.t.G.s.D..+..sSlRl....FP..P.T.slsAsssLPsVlaPtu.o.FPlph+LcGl..s....s..ts......pRWRlRKlsWRlEEpt+lhu.ACs+Httc........................................................................... 0 18 48 84 +12844 PF13003 MRL1 Ribosomal protein L1 Coggill P pcc manual Domain This family includes putative ribosomal L1 and L10 proteins and fragments. 21.90 21.90 23.10 22.40 21.40 20.80 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.91 0.71 -4.34 7 64 2012-10-01 21:21:48 2010-06-29 16:43:58 2 1 40 0 28 58 2 142.90 51 46.98 CHANGED M..................................................hs..pshc.st.htscppthsplph..............................................................slYh..............................................................................sslhhsp.hhsphppssVs ...................................hhtt.......................h...hs.RpaAAA.....pKs..sKKsKKss.KcKss-EKp.D-lEKhKsYsaMEuEPEDDVYLKRLYPRpIYEVEKAlcLLKKFQhLDFTsPKQ..............sVYLDLTL.DMsLt....K.K...KKVEPFs............................................................SslpLPYPFsSElNKVsVF. 0 2 5 11 +12845 PF13004 BACON Bacteroidetes-Associated Carbohydrate-binding Often N-terminal Coggill P pcc Rigden D Domain The BACON (Bacteroidetes-Associated Carbohydrate-binding Often N-terminal) domain is an all-beta domain found in diverse architectures, principally in combination with carbohydrate-active enzymes and proteases. These architectures suggest a carbohydrate-binding function which is also supported by the nature of BACON's few conserved amino-acids. The phyletic distribution of BACON and other data tentatively suggest that it may frequently function to bind mucin. 20.00 13.30 20.00 13.30 19.90 13.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -10.03 0.72 -3.75 432 1404 2010-07-09 08:33:15 2010-07-09 09:33:15 2 119 153 0 197 1347 35 73.70 18 20.89 CHANGED sls..............ls...............................................ssss...W...l..........sls.tst.....................tts.lsls....lstN...ss.tp...t....Rp.upl...pl.....................t........tp........ts.......l.sl....p..Qtu .......................................................................hp...ss..................................spssss.....W........l...........sls.sst...........................tpsplsls..ss.t.N....ss.sp...t..Rs..upl...pl.............pss....shs........tp......l..sl..p..Qt....................................... 0 119 177 197 +12846 PF13005 zf-IS66 HTH_Tnp_IS66; zinc-finger binding domain of transposase IS66 Coggill P pcc IS-finder Domain This is a zinc-finger region of the N-terminus of the insertion element IS66 transposase. 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.54 0.72 -3.99 190 3219 2011-09-20 07:06:31 2010-07-12 16:03:04 2 18 1017 0 565 2843 332 45.40 39 10.71 CHANGED psptCss..CGs.plp..plGc-.l..sEpL-hlP.uphc.VhcahR.+auCpp..C.c ..................ppsCPs....CGG.pLp...hlG--.s...uEpL-lls..ush+.VI.cphR.KhACsp..C-............ 0 111 286 407 +12847 PF13006 Nterm_IS4 Insertion element 4 transposase N-terminal Coggill P pcc IS_finder Family This family represents the N-terminal region of proteins carrying the transposase enzyme, DDE_Tnp_1 (that was Transposase_11), Pfam:PF01609, at the C-terminus. The full-length members are Insertion Element 4, IS4. Within the collection of E.coli strains, ECOR, the number of IS4 elements varies from zero to 14, with an average of 5 copies/strain [1]. 21.80 21.80 22.00 22.10 21.20 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.07 0.72 -3.97 36 1119 2010-07-12 15:15:56 2010-07-12 16:15:56 2 6 317 0 125 971 61 88.10 61 26.57 CHANGED sshsslusLschlPh-hl-pslptTGpushR+.RcLPuchsVahVluhuLapcps..hp-VhppLshsLss................hphsusSAlspARpRLGscPlctLFcp .............SL..RNPLTSLGDYLsPELISRCLAESGTVTLRK.RRLPLEMMVWCIVGMALERKEP..LHQIVNRLDIMLPG...............sRPFVAPSAVIQARQRLGuEAVRcVFs.c..................................... 0 19 55 80 +12848 PF13007 LZ_Tnp_IS66 Transposase C of IS166 homeodomain Coggill P pcc IS_finder Family This is a leucine-zipper-like or homeodomain-like region of transposase TnpC of insertion element IS66. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.91 0.72 -3.32 254 3061 2010-07-12 15:45:03 2010-07-12 16:45:03 2 20 942 0 489 2621 269 73.50 31 17.94 CHANGED ppLcpplthhcRphFGp+SE+h..s.......tQhpL...h...p..-h-sststht.........sphcptsttsss.............ttt+pc..ss.RpsL.P.scLP.Rtch .................cLptpltpLpRhhFGppSEKl....sc...............tQhch.........h..p....clptptsphp.....................tp.tc..ts......................t..+pp...ppR+PL.P.tpLP.R-p.p..................................... 0 97 233 341 +12849 PF13008 zf-Paramyx-P Zinc-binding domain of Paramyxoviridae V protein Karlin D, Coggill P pcc manual Domain The Paramyxoviridae, which include such respiroviruses as para-influenzae and measles, produce phosphoproteins - protein P - that are integral to the polymerase transcription-replication complex. Protein P consists of two functionally distinct moieties, an N-terminal PNT, and a C-terminal PCT [1]. The P gene region transcribes proteins from all three ORFs, and the V protein consists of the PNT moiety and a more C-terminal 2-zinc-binding domain. This conserved region consists of the two-zinc-binding section sandwiched between beta sheets 6 and 7 of the overall V protein. It is the binding of this core domain of V protein with the DDB1 protein (part of the ubiquitin-ligase complex) of eukaryotes which represents the key element of the virus-host protein interaction [3]. In the Henipavirus family which includes Nipah and Hendra viruses, the V protein is able to block IFN (interferon) signalling by preventing IFN-induced STAT phosphorylation and nuclear translocation [2]. The P gene of morbillivirus is co-transcriptionally edited leading to a V protein being produced. 25.00 25.00 33.20 47.40 21.50 16.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.16 0.72 -4.77 26 152 2010-07-13 14:16:56 2010-07-13 15:16:56 2 5 76 3 0 147 0 46.90 50 16.38 CHANGED SlsWs..supsplpcWCNPtCuPlsstspptpCsCGpCPphCspCtpDs ......SlsWs..ssclhlpcWCNPhCS.lTsssp+tpCpCGpCPphCcpCcpD... 0 0 0 0 +12850 PF13009 Phage_Integr_2 Putative phage integrase Coggill P pcc Pfam-B_5288 (release 24.0) Family This family is found in association with IS elements. 22.80 22.80 76.30 41.10 22.70 22.20 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.14 0.70 -5.49 22 56 2012-10-02 14:09:14 2010-07-21 11:59:58 1 14 53 0 20 60 8 318.40 30 34.49 CHANGED PphtpWtphupcalptpsp..uhsp+hpuLspFltpYlh..shPh...sPtsahp........sppph.shlpth...........ssspspphhshlppFlcalLpphho.pD...Dp.Gp.lh.stapNPlsph..shpsh.tt...sETs+ssLPhpaIpph+phLsss..........................papDatWspp..............ssWh.Vs.p..lscsDscClaRshth.p................htplWSPVpshsLashLpLPlRohQlRhLDSGEuDshhY....ppt.spWlhN.ssphA........ppshpcGhh.++hp-st................t.sGLalsTNKTup.............sp.ppGYsIPWt.....pp-lhaWLh+LRpWQpKY.NPIscsTsWp- .........................................................................................................................phttWtphht-alpsppt..shsp+hpuLstFhppYlhtpshsh...sPttahp............hstpph.phlcpt...........sssptpphhshlpsFlcallpchhoppD...-s.Gphlh.sta+NPls+h.....shps..p.....sEos+ssLPYpYIpchRphLsst..........................pFpDapaApp............ssDWhtVs.p..IDcs..........DPDC..VaRtpps.t................hhplWSPV+hhslashLpLPLRshQlRhLDSGEADshha........pt..spWhhN.psthA........t..ppshppGlh++hpcpt...................sGlalsTNKT...............st.tpGYhIPW........sp-lhYWL.KLRsWQpKY.NPIspPTsWs.............. 0 5 9 14 +12851 PF13010 pRN1_helical Primase helical domain Bateman A agb Jackhmmer:Q54324.1 Domain This alpha helical domain is found in a set of bacterial plasmid replication proteins [1]. The domain is found to the C-terminus of the primase/polymerase domain. Mutants of this domain are defective in template binding, dinucleotide formation and conformation change prior to DNA extension [1]. 27.00 27.00 142.00 142.00 23.10 20.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.86 0.71 -4.54 9 9 2010-07-21 11:16:13 2010-07-21 12:16:13 1 3 5 4 2 12 0 132.40 48 16.02 CHANGED tG+cEcp-...tc-hEKL+cEhsKYs+a+GKTlEAIRpElCpclK+pl..........ppKhpthhphshpVlC-uKoYu-lGIDRSRGDW+VlphLhoHGVTDlDhlhQLLPpDSKVas.PKWD..KYFlHTLtKAWphVK.aLch ..t.scpEcp-..s-c-hEKL+cEhsKYs+a+GKTlEAIRpElCpclK+pl..........ppKhpthhphshpVlC-uKoYu-lGIDRSRGDW+VlphLhoHGVTDlDhlhQLLPpDSKVas.PKWD..KYFlHTLtKAWphVK.aLch. 0 1 1 2 +12852 PF13011 LZ_Tnp_IS481 leucine-zipper of insertion element IS481 Coggill P pcc IS_finder Family This is the upstream region of the conjoined ORF AB of insertion element 481. The significance of IS481 in the detection of Bordetella pertussis is discussed in [1]. The B portion of the ORF AB carries the transposase activity in family rve, PFAM:PF00665. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.80 0.72 -3.64 2 445 2012-10-04 14:01:12 2010-07-21 13:08:03 1 5 189 0 209 1294 169 81.90 41 27.81 CHANGED M.sHtpARLTlhGRsLLVpRVhppphshtpsupthGVShpsua+WlsRFRuEGLcGLhDRSSRP+psP+tsuPEp.cthhptRtQ ........................................M.sHpNAh.LT....htRh...chs..pp..ll..p.p.th.s...lscAAc...taGVS.htT.sp+WhsRaRspGt.s.Gh..s...D..R..S...S........R...P..p.p..o.P.p.p..h.s.s.th.tp.tIlplR..h................................................... 0 121 173 198 +12853 PF13012 MitMem_reg Maintenance of mitochondrial structure and function Wood V, Coggill P pcc [1] Family This is C-terminal to the Mov24 region of the yeast proteasomal subunit Rpn11 and seems likely to regulate the mitochondrial fission and tubulation processes, ie the outer mitochondrial membrane proteins. This function appears to be unrelated to the proteasome activity of the N-terminal region [1]. 22.10 22.10 22.20 22.70 21.80 21.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.52 0.71 -4.00 148 1224 2010-07-21 12:18:05 2010-07-21 13:18:05 1 19 336 4 844 1138 8 118.00 23 35.91 CHANGED pEsEc...........l......ulctLl+............slpcpt........ho.sl....s.p+hptptpuLtsLpp+lhphstYLpsl......ttphshspplhpplpslhsLlP..sh.......................spph...........pcphphpssD..plhlhYluplspsph...slpsllssp ..................................................................Escp....h..........ulptLh+...................slpcpt.........hu..sl....s.p+lptptpuLtsLpp+ltplttYlpcl.................ttph..s.h....s....ppl...hpplpsl..hsL.hP..sh................................spph....................pc.thphpssD.....pLhlhYluplspsph.......slpphlpp........................................................... 0 315 475 695 +12854 PF13013 F-box-like_2 F-box-like domain Wood V, Coggill P pcc Wood V Family The F-box domain has a role in mediating protein-protein interactions in a variety of contexts, such as polyubiquitination, transcription elongation, centromere binding and translational repression. 21.60 21.60 21.80 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.53 0.72 -4.34 7 39 2012-10-02 00:56:31 2010-07-21 15:07:26 1 2 22 0 34 54 0 97.80 19 19.17 CHANGED sostos....huosps..tp.l..tlhDLP-ElLphIhphChctphh..hshs.shRpppch.......................................................hl..spsCptl+pl.............p.hpshhsh.s.hhhpshph...stp ...............s..............p..phtsl..plhD.LP.-LLphlhcpCspsphhtl.ppsp.shh..shtph.....................................................................hl....spppphhpt..............................................hh.......................................................................................... 0 24 26 31 +12855 PF13014 KH_3 KH domain Coggill P pcc Wood V Domain KH motifs bind RNA in vitro [1]. This RNA-binding domain is required for the efficient anchoring of ASH1-mRNA to the distal tip of the daughter cell. ASH1 is a specific repressor of transcription that localizes asymmetrically to the daughter cell nucleus. RNA localisation is a widespread mechanism for achieving localised protein synthesis [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -7.98 0.72 -4.24 992 970 2012-10-02 00:34:43 2010-07-21 15:11:14 1 161 261 0 679 13179 2478 42.40 26 10.20 CHANGED hhutlI..G+s.Gp.s...Ipplp.ppo.u.sp.Iplsp..............sss.spchlplpG ...........hGtlI...G+s..Gp.s.......Icplp..ppo.G.sp.Iplsp.............................sss.spchlplpG.................................... 0 314 459 633 +12856 PF13015 PRKCSH_1 Glucosidase II beta subunit-like protein Coggill P pcc Wood V Family The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing [1]. The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum [1]. The beta-subunit confers substrate specificity for di- and monoglucosylated glycans on the glucose-trimming activity of the alpha-subunit [2]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.89 0.71 -4.75 5 406 2012-10-02 14:19:22 2010-07-21 15:42:45 1 19 259 0 274 570 7 146.00 26 30.43 CHANGED Lp+clDEhcc-IcsI-p-lotlhEsLNpcaGhDDIaRAl-GppsscKlGGYsY+lsFhuSlaQ.....-DIpIGsFcctE...........Gs+LhY-cGuKCWNGP+RSAIVcVECGcsN-LlSVuEPEKCEYplpV+SPAuC...sss.hhKSLscE..EphAsF+I........s-hDEL ...................................................................................................................................tp..................t.......................t.......th...G....p.t.....at.....Lts..p...Ch.p..h.ptsp.Y.hYc..hC.........a...pp...spQp....................sttps.......p.........lGp...a...p....p..a.ph.................................................tph..h..s...h..h..ap..s..Gs.pC......W....N.....G.P........s.R.o.sp.......Vp.LtC....G...p.....p....s....c.....l....h..p.......V...s......E..Ps+..CpYthphpTPtsC...p................................................................................ 0 110 156 224 +12857 PF13016 Gliadin Cys-rich Gliadin N-terminal Coggill P pcc manual Family This is a cysteine-rich N-terminal region of gliadin and avenin plant proteins. The exact function is not known. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.13 0.72 -3.78 28 2134 2012-10-01 19:46:35 2010-07-21 17:26:44 1 3 145 0 43 2573 0 79.20 44 32.15 CHANGED QQQ..........hp..l.ppQLsPC+sFLhQQCsP........hhhsahpSphhQpSsCQVhQQQCCQQLtQIPcQsRCpAI+slVpAIIh ....................................................................................................................QQQ............hlQspl..pQQLsPC+sa.LhQ.Q.Css.......sh..sh..s...p.Sp.hhtpS.uC.p.lhQQQCCQQLsQIPpQsRCpAI+ullauII.......... 1 0 3 21 +12858 PF13017 Maelstrom piRNA pathway germ-plasm component Zhang D, Coggill P pcc Zhang D Family Maelstrom is a germ-plasm component protein, that is shown to be functionally involved in the piRNA pathway. It is conserved throughout Eukaryota, though it appears to have been lost from all examined teleost fish species. The domain architecture shows that it is coupled with several DNA- and RNA- related domains such as HMG box, SR-25-like and HDAC_interact domains. Sequence analysis and fold recognition have found a distant similarity between Maelstrom domain and the DnaQ 3'-5' exonuclease family with the RNase H fold (Exonuc_X-T, Pfam:PF00929); notably, that the Maelstrom domains from basal eukaryotes contain the conserved 3'-5' exonuclease active site residues (Asp-Glu-Asp-His-Asp, DEDHD). However, the animal and some amoeba maelstrom contain another set of conserved residues (Glu-His-His-Cys-His-Cys, EHHCHC). This evolutionary link together with structural examinations leads to the hypothesis that Maelstrom domains may have a potential nuclease-transposase activity or RNA-binding ability that may be implicated in piRNA biogenesis. A protein function evolution mode, namely "active site switch", has been proposed [1], in which the amoeba Maelstrom domains are the possible evolutionary intermediates due to their harbouring of the specific characteristics of both 3'-5' exonuclease and Maelstrom domains. 18.90 18.90 18.90 21.20 18.70 18.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.26 0.70 -4.85 27 133 2012-10-03 01:22:09 2010-07-21 17:42:00 1 4 76 0 89 141 6 189.70 29 46.12 CHANGED +......sschalPAEluhscaSLcp.G.lhs.apshIsPGpl.hG.hthcsppHop.sTHp.lPlsssshGcpshsplhpplhpalptp.........p........c.hs......................laopscplshVpsChcaLt............spsp...t..............lpVhslp.Lhhsl+ppshpht...p.h.shpls..sshhppsha-apsshuCpaHEc...spspaCshSh.VpRauahhschh...CtDluIphhss+H..........lP.ptcs .................................................p...spptalPsEluhscaSLpp.G..IhspaHphIs.......P.G.plshG.hthcspttop.soHp.lP.lss........t.huc.shsplhpplhp...alpss.................t.........phs.......................laspscphshVp.Clcahs............ptsthppp..................lplhslppLhhtlhpp....th.........pp.stphs...pshhshsha-asssh..pCcaHEc...Dh.haCuhuh.sp+hu..aslss.h...sp.hulplp..tH...lP.p..s..................................... 2 26 30 64 +12859 PF13018 ESPR Extended Signal Peptide of Type V secretion system Coggill P, Desvaux M pcc Desvaux M Domain This conserved domain is called ESPR for Extended Signal Peptide Region. It is present at the N-terminus of the signal peptides of proteins belonging to the Type V secretion systems, including the autotransporters (T5aSS), TpsA exoproteins of the two-partner system (T5bSS) and trimeric autotransporters (TAAs). So far, the ESPR is present only in Gram-negative bacterial proteins originating from the classes Beta- and Gamma-proteobacteria. ESPR severely impairs inner membrane translocation, suggesting that it adopts a particular conformation or it interacts with a cytoplasmic or inner membrane co-factor, prior to exportation. Deletion of ESPR causes mis-folding of the TAAs passenger domain in the periplasm , substantially impairing its translocation across the outer membrane [3]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.46 0.72 -6.70 0.72 -4.30 290 2495 2010-07-21 16:49:15 2010-07-21 17:49:15 1 530 854 0 281 2182 23 24.40 44 1.62 CHANGED MN.+.lY+llWNcspss......alsVSE....l..u+ .....MN.K.lY+llWNcspus......alVsSE....l...A+........ 0 52 130 207 +12860 PF13019 Telomere_Sde2 Ubiquitin-like; Telomere stability and silencing Wood V, Coggill P pcc Pfam-B_2457 (release 24.0) Domain Sde2 has been identified in fission yeast as an important factor in telomere formation and maintenance. This is a more N-terminal domain on these nuclear proteins, and is essential for telomeric silencing and genomic stability. 26.40 16.00 26.60 16.00 26.30 15.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.01 0.71 -4.55 20 270 2012-10-03 10:59:06 2010-07-21 17:50:03 1 9 236 0 202 1794 27 145.00 31 39.30 CHANGED lNVLlooFsGhsLPsTLulsLPsooolo-Lhcclss+lPtohsp.....hhLTTsSN+tl.sssssslSsLlussss............shLsLRLss.LpGGKGGFGSQLR.AAGGRMSS++KpN..t-sNuSsRNLDGRRLRTVsEAKALAEYLAlKPEM-..........+K-KEtRR..................cRWcslVctsE+ .........................................................................................................................................................................hhshpLthR...LpGG..KG...GFGShLR.AhGuph...cKosN.......p-uC...RDL.s.GRRLRsVNctKtltE.altppsEc-..............+c...p+.ct....c.+..................pphcth.t................................................................................. 0 73 113 166 +12861 PF13020 DUF3883 Domain of unknown function (DUF3883) Mistry J jm14 Jackhmmer:O25123 Family This is a domain is uncharacterised. It is found on restriction endonucleases. 22.10 22.10 22.20 22.10 22.00 21.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.28 0.72 -4.25 140 729 2012-10-11 20:44:46 2010-07-22 11:22:32 1 17 639 0 217 541 130 96.80 24 14.27 CHANGED sEphshphhhpt.ppt.........s.............ahsccs.t.........ps...hGYDItuh...........p..........sGp......p+a..IEVKupt............tttssht.lopNEhthAp..pps-..pYhLhhVhss.........ssps....phhhl ......................................hEphshphhpph.ptp.................s........................hphscspt..........pG.tGYDlhuh.........s...............psp....................pha..IEVKupp..........ptpsshh.lotsEhthup......ph..p..p..cYhlhhVhsh...scts..ph...h.......................... 0 88 154 191 +12862 PF13021 DUF3885 Domain of unknown function (DUF3885) Mistry J, Aldam G gba Pfam-B_1173 (release 24.0) Family A putative Rac prophage DNA binding protein. This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved YDDRG sequence motif. There is a single completely conserved residue D that may be functionally important. 21.80 21.80 22.40 24.00 21.10 21.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.82 0.72 -4.25 21 280 2010-07-22 13:00:10 2010-07-22 14:00:10 1 2 244 0 30 151 2 37.40 48 20.14 CHANGED hHlYDDRGsDllusspcpLpsLYccascWILD..YDRpcI .aHlYDDRGh-lhssNsEshtchhcKYpDhI.-.aDpEcI....... 0 9 21 26 +12863 PF13022 HTH_Tnp_1_2 Helix-turn-helix of insertion element transposase Coggill P pcc pdb_2ao9 Domain This is a family of largely phage proteins which are likely to be a helix-turn-helix insertion elements. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.96 0.71 -4.39 4 30 2012-10-04 14:01:12 2010-07-22 14:02:37 1 1 29 9 7 63 4 133.90 38 93.68 CHANGED Mu+.hKpLEupLohcQRcAA.LLVtN...ELh.Ess.EKKTQ-ElApELGlsRhsLacWRTQspsFIsYhNhlADchLup+RspVYsQLM+hIpGsQ...PSVKAlDLFM+RaGLLTDKpVIE...Dsssssp+ost-IcpElscLsALl...K.c ...................Mu+.hccLctpLThpQhpAA.lLspN.....Elh.ps...s..cK+..TQ-ElApElGlsRoTLac.W.Rs.cNpsF.I.sa.p...s...clADshLup+RppV.....YspLh.phIhGsQ.......PSVKAhpLah.pRaGLLTD+.p.llc......sc.hssus+oN.t-IccplpcLpclht..c.......................................................... 0 3 4 4 +12864 PF13023 HD_3 HD domain Mistry J jm14 Jackhmmer:O25415 Domain HD domains are metal dependent phosphohydrolases. 21.60 21.60 21.60 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.12 0.71 -4.50 143 2536 2012-10-01 20:28:14 2010-07-22 14:13:12 1 13 2105 12 718 2045 280 160.20 26 71.76 CHANGED h-+LKplhRpshhtss.p.RtEssAEHoW+lAlhuhlluths..s....ttlDhs+slpMh...LlHDlsEl.sGDhhshs....t.hsptt....pptcEppAtc+lash....LPps......tu.........pchtsLWp..............EFEs.......spos-A+a.............A+slD+hpPlltshtspspshtth.plphsphhsp.sptltps.tspla ....................h.s+LKhhpR..Wsh.....h.t...p........p...s...EsluEHohplAh....hA....h...h....l....s....t...hh.....s...........pl.s..h...p...+l...h.hhs.............lhHDhsEsl.sGDlssss.....................s...h...tpp......h...c...t...hE..p.t.A...pc.lh.sh.....lPpp..............hp.............................cpht.s.L.hp..............................Eapt............tps...-.u.ph............................s+thDtlpshl...ps...h.p.t.t..ts...tth...ht.t...................hh.............................................................................. 0 211 416 587 +12865 PF13024 DUF3884 Protein of unknown function (DUF3884) Mistry J, Aldam G gba Pfam-B_1352 (release 24.0) Family This family of proteins is functionally uncharacterised. However several proteins are annotated as Tagatose 1,6-diphosphate aldolase, but evidence to support this could not be found. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 106 amino acids in length. There are two completely conserved residues (Y and F) that may be functionally important. 21.70 21.70 22.20 22.00 20.20 17.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.88 0.72 -4.57 12 338 2010-07-22 13:17:09 2010-07-22 14:17:09 1 1 312 0 12 116 0 66.60 42 86.39 CHANGED lacpVYllsFpchsp..phss..cLppLGcWhssoGppWhCHS...cLshs-FcptFhp..pLsssphthhpht.sahPap ........lY.lpF.chs....hsu..ccLKpLGcWhsoTG+.WhCHS...chph-EFKphFLp..hlss-chch.s.sssah.hp....... 0 1 3 7 +12866 PF13025 DUF3886 Protein of unknown function (DUF3886) Mistry J, ALdam G gba Pfam-B_1536 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two completely conserved L residues that may be functionally important. 25.00 25.00 31.80 31.10 20.30 19.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.50 0.72 -4.01 17 157 2010-07-22 13:20:39 2010-07-22 14:20:39 1 1 157 0 25 101 0 70.30 53 82.87 CHANGED p......ptppcthoLKDtLss-lhtpLKttKcpLctcEpcRccccctc+tccp+c+EK..sKSFEELLsESpL.DW+c ...............QsccEslTLuDpLN-sLhppLKsKK+ELpscEE+KcAsEhcR+RpEpKEREK..NKSFEELLsESsL.sWKc.......... 0 6 15 17 +12867 PF13026 DUF3887 Protein of unknown function (DUF3887) Aldam G, Mistry J gba Pfam-B_1534 (release 24.0) Family This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 128 and 167 amino acids in length. The proteins in this family contain an N-terminal lipid attachment site. 24.50 24.10 24.60 24.20 23.10 23.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.40 0.72 -3.73 5 144 2010-07-22 13:28:10 2010-07-22 14:28:10 1 1 102 0 6 78 0 101.00 50 66.99 CHANGED KEsKEcVcQSAE-pEsM...+sY+-VHcK.YDpKMNKELNpulpLaEpAKEKGGKsIspssaKEDVQKITsSMLEDID...HlRTEIRVPKSKEQEHclYlGFLNEoEQA ...KpsKEKVcpusEps.-h...+cY+tVHEK.aD..KMscpls.....hEsuKEKsucpIspAs.hhE-lpKl.Ts.shlEDhD......c.lpptIpVsK...cEpc+plhluFhsEsEpA... 0 1 4 4 +12868 PF13027 DUF3888 Protein of unknown function (DUF3888) Mistry J, Aldam G gba Pfam-B_1080 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 111 and 149 amino acids in length. 22.20 21.80 22.20 21.80 21.80 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.76 0.72 -4.13 23 254 2010-07-22 13:30:48 2010-07-22 14:30:48 1 3 127 0 20 172 0 85.40 40 59.20 CHANGED hpDslhshLtP.IscslpcaYuc........h.a..ppscllclc+hts.spa...Fhlslclpsa.....Gs+ssshupDplThpls.....stscllcacHhc .....hEsALhshLashIppslEcQYt.........h.a..pCsclI...slK+lh.p.spal..Fplolphpsap....GspsPP.-KVTlThpsp......sclclpKlppc.... 0 4 15 16 +12869 PF13028 DUF3889 Protein of unknown function (DUF3889) Mistry J, Aldam G gba Pfam-B_1146 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two completely conserved residues (A and Y) that may be functionally important. 25.00 25.00 46.20 45.40 23.20 17.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.09 0.72 -4.33 19 153 2010-07-22 13:45:40 2010-07-22 14:45:40 1 3 127 0 20 104 0 96.20 45 78.51 CHANGED llhshhhsh.hstsuhspsp...sshuKWt+lAhpcsKc+YPtAcllDYhalGRpptssp.oh-pF+lhL+cssK-FGVaVclpFcshTp+lhpIsl.Ep ..............lLhhsosushhsssulVcAp...PsYAKWGKLAVcKTKEpYPcA-IlDYLHlGR+s+TsphTVEKFKLWLREcGKEaGVFV-VpF-opT-KhIKloap+... 0 6 11 14 +12870 PF13029 DUF3890 Domain of unknown function (DUF3890) Mistry J, Aldam G gba Pfam-B_1148 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 70 amino acids in length. 22.80 22.80 24.40 26.40 21.00 19.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.24 0.72 -3.93 6 114 2010-07-22 13:48:49 2010-07-22 14:48:49 1 1 28 0 9 75 0 76.90 64 51.96 CHANGED Mspppsss-s.................lsclYpcIltLLslsccclSFpcFp.ahpLLEhIL.oRGIslpsLNhSphhLLlYYaI ..MSEQKsLQsQltuEEE.............LLVTKLHSEVLLLLGIDchALSRQNFLLHLSLLQAILVTRGIDASSLTYEQIFLLTFYHM. 0 6 6 6 +12871 PF13030 DUF3891 Protein of unknown function (DUF3891) Aldam G, Mistry J gba Pfam-B_1216 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 250 amino acids in length. 21.40 21.40 21.40 21.90 21.20 19.60 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.36 0.70 -4.87 29 151 2010-07-22 13:55:02 2010-07-22 14:55:02 1 2 146 0 32 130 3 223.80 44 89.22 CHANGED MIlp....pptp.sahlIpQpsHuhluGplAtpWpps.hts.sphhtpslhAltpHDcGWhch-..tsPhls.....psutPhsFhchP...sph+lthappulspstpps.YuuLLsShHhstlhpppt...............spphppFlspphp+............QcplhpsLs..................hp........pt.hcppYtLLphsDpLSLhlChsc.....Puspctt-lshhtsu...........tpphpsp.tsssplplsPaPFppc.hslslph+tlspp ........MIhR....Ecs-.c.lLIcQHDHGaLAGEIA++h+-chF..........Es.c..........sahcETlsAIYEHDRGWIELD..+sPILND....ucshPYTFhDsP...........oslRhlFYolGLsEl.EssNPYuALLCS+HahSFshNc-.........................D-EhhuFaptEL-RQKRlLpsLo..................................p-........phshhD+HY+LL+FCD-LSLYVCMNc.........PGscKcpEIslFK-GFctoc...hssppcc.lpAcWlD-pTI+IoPFPFps-.FpshV+YKslsK........ 0 13 26 28 +12872 PF13031 DUF3892 Protein of unknown function (DUF3892) Aldam G, Mistry J gba Pfam-B_1252 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 87 and 104 amino acids in length. 22.30 22.30 22.30 23.60 22.10 20.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.94 0.72 -3.58 62 309 2010-07-22 13:57:37 2010-07-22 14:57:37 1 1 292 0 68 216 6 75.70 33 78.93 CHANGED plsslp+ss.tus....Isplt......hssspth...ohppslphlcs..G..............thssVtVh.......tspsG.pcYl+op..sDuspssNL.sL...P ..............................pIssl+Kss..cGs....lhthp......tssGpph...sh.pulp.hcs.G.....................plstVsVh.........p+pG.pcalRop..sDGtppsNLtsLP............. 0 27 50 58 +12873 PF13032 DUF3893 Domain of unknown function (DUF3893) Aldam G, Mistry J gba Pfam-B_1590 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 123 and 144 amino acids in length. There is a single completely conserved residue E that may be functionally important. 22.10 22.10 22.10 22.80 21.70 21.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.97 0.71 -4.06 15 111 2010-07-22 14:00:18 2010-07-22 15:00:18 1 2 106 0 18 96 1 127.70 47 17.45 CHANGED lIRINss.....-VPsahsh-..p-pshsps....GLat.........cppslYYSsGphshstss.....p........................hhpppllEIlslGs..c.pcEt-plAchl...Hhhp..pushhhpcslphPhPhHhsKh.lKcYlss..Dshthcp.--p.c.-h ............................................lRI..Nsu....FDVPpYGVIE.....sD.....-sLDss................uLYh..........DQKGMYYSTGEYS...hNsSt...............................................hhp+YILEIhPLGV.............K.sVERsYIAKMl...HYMC.CNS.SMLhcKN..lHMPYsMHMAKV.IKsYhTD.IDAREFKEFDDELDVDl............. 1 1 13 16 +12874 PF13033 DUF3894 Protein of unknown function (DUF3894) Mistry J, Aldam G gba Pfam-B_1594 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 66 and 79 amino acids in length. There are two conserved sequence motifs: FNIC and MALLNLT. 25.00 25.00 67.90 67.90 20.90 18.00 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.92 0.72 -3.96 3 82 2010-07-22 14:03:13 2010-07-22 15:03:13 1 1 82 0 3 25 0 52.10 94 74.74 CHANGED LATFNICSYLVAIVCMALLNLTFVlGAFQQKQYTSFVhALhM.FSFSIVAIlLl LATFNICSYLVAIVCMALLNLTFVIGAFQQKQYTSFVIALVMSFSFSIVAIVLY.... 0 0 1 1 +12875 PF13034 DUF3895 Protein of unknown function (DUF3895) Mistry J, Aldam G gba Pfam-B_1598 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two completely conserved residues (Y and L) that may be functionally important. 25.00 25.00 25.30 30.80 20.40 24.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.47 0.72 -4.50 11 104 2010-07-22 14:05:47 2010-07-22 15:05:47 1 2 87 0 5 53 0 76.10 69 54.50 CHANGED L.sphppchlp.alpsp.....ptIoAp-lCEhLIcpsussscpYoTsKsKIYs.VC.aL-aLspcGhlhhlcshshpDRlYtp ...........LSPLQKDILE..LlsS.....EEISALELCEpLIRuGKIsDERFTTNKPKAYGQVCLlLEGFV+EGKLIFVKsDEKRDRVYK.L............... 0 0 3 4 +12876 PF13035 DUF3896 Protein of unknown function (DUF3896) Mistry J, Aldam G gba Pfam-B_1603 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 122.80 122.60 20.70 20.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.15 0.72 -3.92 5 83 2010-07-22 14:08:07 2010-07-22 15:08:07 1 1 83 0 3 28 0 61.00 84 96.53 CHANGED MK+TYDYsATKKHLELKKQpLCKKLSNh+LSEKEREQIKhEIDNYEYILNLVEMNHYERGF MK.TYDYsATKKaLE.KKQQLCpKLssh+LScKEREQlKhEIDNYEYILNlVEMNHYERGF 0 0 1 1 +12877 PF13036 DUF3897 Protein of unknown function (DUF3897) Mistry J jm14 Jackhmmer:O25995 Family This is a bacterial family of uncharacterised proteins. Some of the proteins in this family are annotated as putative lipoproteins. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.84 0.71 -4.66 74 1082 2012-10-01 20:48:06 2010-07-22 15:45:39 1 3 1047 0 176 569 152 184.90 36 90.44 CHANGED hhhsul...hluGC.............us.s.sphh-..s...............sss.shosshssp........DhphsApcMspshlssPhltphhtpss.t..sl..lhlss..lpNcTsp.pIso..cphssplpptLhpo..G+hph.....ssssphs.shppphp.pppsuhhspsostthGp.luApahLpGplssh..lc...pstpp.hhYphshpLhslpoGhllWsscppl ..............................................................hshAh...hLuGC...........................s...........s.....................sslsp.ssslppp...........s.tst..spchs.shhhpPh.VuphhtssGsp......sVlhV-s....V.pN+Tst..pl......ss.....schT..cs.lp.suL.tss...GKFsL.......Vusppls.hs+pQ.Lshpsp.D.uL.sspS....p....AIuhu.............+tVGA.pYsLYushSus....Vp......................................t.p....hpMpLM.lpTGcIlWoscssl................................................ 0 49 94 138 +12878 PF13037 DUF3898 Domain of unknown function (DUF3898) Mistry J, Aldam G gba Pfam-B_1179 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. There are two conserved sequence motifs: DFG and FEKG. 25.00 25.00 40.70 40.50 17.50 15.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.75 0.72 -4.11 9 132 2010-07-22 15:16:23 2010-07-22 16:16:23 1 2 128 0 19 80 0 88.90 74 25.58 CHANGED ElQEpaosEQVhEAoAtIlEppPElELKhKLschplKuhLADFG-olHlAKVNGRYVlLIEuDslpFEKGsSPVEFLKP-sLp-Vlc+Ipp ..EIMEQFSTEEVMEATAQIVEHAPEVELKLKADHISVKALLADFGDpIHIAKVNDRYVLMIEADTLTFEKGFSPIEFLKPDELQDVIERIEN. 0 6 12 13 +12879 PF13038 DUF3899 Domain of unknown function (DUF3899) Mistry J, Aldam G gba Pfam-B_1174 (release 24.0) Family Putative Tryptophanyl-tRNA synthetase. 21.70 21.70 21.80 22.20 21.60 21.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.11 0.72 -3.83 37 490 2010-07-22 15:18:45 2010-07-22 16:18:45 1 2 484 0 38 179 2 87.20 36 73.50 CHANGED hlNlhF.llullhlllGhhlhlhpsGh..FcshtauF++hppphtppp.tphhpp......p.....p.hhtshhhshhlss..hlhhlhslhltah .....alNIhF.hluLhhhIhuhhlhlhQpGh..FDss.pauF+Rlph.........phppp.c.pc.Ipcs.......spc...........ch.phhlupa..hhslllhs....ILhhlholhlohh................ 0 11 22 32 +12880 PF13039 DUF3900 Protein of unknown function (DUF3900) Aldam G, Mistry J gba Pfam-B_1279 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 360 amino acids in length. 21.00 21.00 26.90 25.80 18.10 16.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.41 0.70 -4.83 8 130 2010-07-22 15:34:20 2010-07-22 16:34:20 1 2 127 0 19 78 0 246.30 79 69.68 CHANGED DFTIpaLSFYVlpVEGcuEpssKpaKHaQTLDpspYEcSsLKDFLDGELtKIuKRKV-RHPKSEpsPTKIGRFlVEPGH-LDSNPNYNLFpRhRhAEThEsFp-tSEphV+oYLDTSAVRGGVFLVspA+L+KYaDDsFVFlhKCDFEpKVAsIoDpSTLI+cVEMAITTKNMKSIQYPYMPEEGMVEEuELKIHQuSHARYFEDFLKFVEYscSMPEIlKsQVhsMsp-HIt-Tap-pSEERppFEEslElW .DFEINYLSFYVVQVEGKGEuVDKRYKHFQTLDAEEYEDSSLKEFLsGELLKISKRKVERHAKTEQAPTKIGRFIVEpGHELDSNPHYNLFNRIRFAETKENFKDMSEPLVYTYLDTSAVRGGVFLIAQAKLRKYFDDPFVFVMKCDFEPKVASISDESTLIRNVEMAITTKNMKSIQYPYMPEEGMVEsGELKIHQASHARYFEDFLKFVEYERSMPEIMKT.QVMDMVYDQIEDVFEE..GT.EEREQFDQAMEVW 0 6 12 13 +12881 PF13040 DUF3901 Protein of unknown function (DUF3901) Misrty J, Aldam G gba Pfam-B_1316 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a single completely conserved residue L that may be functionally important. 25.00 25.00 43.40 43.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.92 0.72 -4.67 32 262 2010-07-22 15:36:13 2010-07-22 16:36:13 1 1 134 0 36 125 0 41.30 48 84.45 CHANGED +K....p++.....hoFc-LVpENKcpLLsDpcth-cIEc+l-c+attp ..........R+.p+R......KSFcpLlpENKQpLLss+-shccIEERIEKRaph....... 0 8 23 26 +12882 PF13041 PPR_2 PPR repeat family Coggill P pcc Wood V Repeat This repeat has no known function. It is about 35 amino acids long and is found in up to 18 copies in some proteins.\ The family appears to be greatly expanded in plants and fungi. The repeat has been called PPR [1]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.51 0.72 -3.99 436 33080 2012-10-11 20:01:03 2010-07-22 16:48:00 1 3913 396 0 23020 36618 476 49.20 26 28.42 CHANGED P.....s....l....l...o.YNsllsu.hs..c.pu.c.h.....cc.Ahclap-Mp..c.p...G.h...p.Pss.hTYshLlpu.hCc .......................shh.oasslIsu.hs.....c...p.....G...p..h...................cc...A.h.p..l.a....p....c..Mt.....p...p............G...l............p..P..sh..hTa.ss.llsu.hs............................... 0 2730 14486 19222 +12883 PF13042 DUF3902 Protein of unknown function (DUF3902) Mistry J, Aldam G gba Pfam-B_1357 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. There is a conserved LGI sequence motif. 25.00 25.00 92.50 92.40 20.40 20.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.24 0.71 -4.95 10 107 2010-07-22 15:54:01 2010-07-22 16:54:01 1 1 79 0 3 76 0 158.30 69 95.33 CHANGED KuVLKsIlIShIFulsGhhhLLFsLh....hGsuDWlLsWlGVLMAYLSLahLIsLYs+sTYcKphsKlLlKosllSFshAVLGIhFGIlapLLssWSLolMhWYWLLlLlLaLhTIIoLVILVFVNppspsashlY+hLILLNlhLTLGPVLWPlhloIl.GNGM ......KSVLKSILISFVFSAVGMCWLLFlLF....+GDGDWLLSWlGVLMAYLSLYTLIDLYCKsTYDKKlsKhLIKTsVTSFSFuVLGISFsIIHELLTPWSLSLMVWYWLlMLlLFLTTIIoLV.LVFVNRKNHNFTssYRlLILLNlhLTLGPVLWPLLLoII.GNGM........ 0 0 1 1 +12884 PF13043 DUF3903 Domain of unknown function (DUF3903) Aldam G, Mistry J gba Pfam-B_1600 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length. 25.00 25.00 51.40 88.80 22.50 21.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.85 0.72 -4.45 4 84 2010-07-22 16:14:07 2010-07-22 17:14:07 1 1 84 0 3 25 0 40.00 89 61.79 CHANGED hsATSphLAIpKVRsECKRRFGKoLLLQTEIpEElhhcQK VPATTQLLAIEKVRAECKRRFGKsLLLQTEIKEEIVFEQK 0 0 1 1 +12885 PF13044 DUF3904 Protein of unknown function (DUF3904) Mistry J, Aldam G gba Pfam-B_1386 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in viruses. Proteins in this family are typically between 437 and 448 amino acids in length. 25.00 25.00 190.40 190.30 16.50 16.30 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.50 0.70 -6.06 2 198 2010-07-22 16:22:22 2010-07-22 17:22:22 1 1 2 0 0 189 0 398.50 96 99.09 CHANGED LVLFLhKEVLCEPChC-NPTCLGlTIPpsGaVRSAPGGVLLTETITEpPtLsEWTTSRs+LE-ohW.sG-sKsGKVSQTLFEAIQGTQMENCAVKAVhDToFVNLT+pDlVLG+lKVS.FGG-pDISKCGRKGLKVFICGGTsGYVTRGCPPEEC+G+KGRMMuLEPTsDCGVEKGhTT-RIKTGhlDlsSCCTQHGCTKGIRVEVPSPVLVSuKCpEloFRVVPFHSVPD+LGFARTSSFTL+ANhsNpHGWSKYsFNLRuFPGEEFIKCCGFTLGlGGAWFQAYLNG.VQGDGAASA-DVKEKLNGIIDQINKsNhLLEGEIEAVRRIAYMNQASSLQNQVEIGLIGEYLNISSWLETpTLTKTEEGLMKsGWCpSssHCWCPPcsshlPTIGYVDsIKEVTGTSWWMVMIHYIIVGLIVlVlVVhGLKLWGCl ..................PChC-NPTCLGlTIPpsGaVRSAPGGVLLTETITEpPtLsEWTTSRsRLEDSsWpGGEVKSGKVSQTLFEAIQGTQMENCAVKAVFDTSFVNLTRHDVVLGRVKVSPFGGEHDISKCGRKGLKVFICGGTTGYVTRGCPPEECRGRKGRMMSLEPTADCGVEKGFTTERIKTGKVDLDSCCTQHGCTKGIRVEVPSPVLVSAKCNEISFRVVPFHSVPDRLGFARTSSFTLRAsLANQHGWSKYNF........N...........LR..........A...........FPGEEFIKCCGFTLGIGGAWFQAYLNGEVQGDGAASAEDVKEKLNGIIDQINKVNLLLEGEIEAVRRIAYMNQASSLQNQVEIGLIGEYLNISSWLETKTLTKTEEGLMKDGWCRSSNHCWCPPDTVGIPTIGYVDNIKEVTGTSWWMVMIHYIIVGLIVVVLVVLGLKLWGCI.. 0 0 0 0 +12886 PF13045 DUF3905 Protein of unknown function (DUF3905) Mistry J, Aldam G gba Pfam-B_1447 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 19.70 19.70 21.80 31.80 17.60 16.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.95 0.72 -4.21 7 99 2010-07-23 08:51:41 2010-07-23 09:51:41 1 1 99 0 10 51 0 82.80 83 71.87 CHANGED csPhLDtThPHQIsh.PSFKGoGhphppPFVNpaGVVIGDSpYsS.NSPLppWSDEhDPulMAGDEWlHPTNDIGWpotENR-LL .......QSPILDETLPHQMNF.PSFKGTGKpMQQPFVNQYDVVIGDSKYNSENSPLpNWSDEVDPAIMAGDEWIHPTNDIGWISEENQELL............. 0 4 7 8 +12887 PF13046 DUF3906 Protein of unknown function (DUF3906) Mistry J, Aldam G gba Pfam-B_1532 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved EKK sequence motif. 25.00 25.00 51.20 51.00 18.80 17.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -9.01 0.72 -4.36 5 108 2010-07-23 08:53:15 2010-07-23 09:53:15 1 1 108 0 10 48 0 64.00 72 94.54 CHANGED MaLYRFEAsl.cschlsIVIlApsEEpAF+LAElELEKaaLKlPslpElSLaEKK+Is+GAGYVl .MDLYRFEAVL.sNSIVPIV..VVApSEEQAFKhAEIELEKHFLPLPEVKEIuLFEKKKIRKuAAFVI... 0 1 4 5 +12888 PF13047 DUF3907 Protein of unknown function (DUF3907) Mistry J, Aldam G gba Pfam-B_1274 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 160 amino acids in length. There is a conserved AYTG sequence motif. 20.30 20.30 20.80 145.50 19.80 20.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.88 0.71 -4.59 16 138 2010-07-23 08:55:27 2010-07-23 09:55:27 1 1 138 0 20 93 0 147.50 63 90.38 CHANGED Qs-pstphLpcslpplpsaLNppTLspLhpEp.su-c..sYYcslLusLR+LLVFsE-Gh-uCtllLpppsFccsAAEKsLYclYHQCIpEFFpPKpDsWYEDSRuAYTG+suIcF+ppsPpulcpLhhSlEutFpphREEL-YYETDYpT ..QTEQVuhFLE-sIslIosYlNaHTLPSLLEEossGsE..pYYKGlLuohRRLLVFCEEGtDAChVLLsSQPFRKTAAEKsLYKIYHQVIAEFFSPKuDpWYENSRSAYTGKNSIVFQQTPPASlEpVM+SLEGKFQlMREELEYYETDYQT.. 2 5 11 13 +12889 PF13048 DUF3908 Protein of unknown function (DUF3908) Mistry J, Aldam G gba Pfam-B_1533 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 140 amino acids in length. There is a single completely conserved residue Y that may be functionally important. 22.90 22.80 22.90 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.84 0.71 -4.17 9 99 2012-10-04 14:01:12 2010-07-23 09:58:18 1 1 97 0 5 46 0 121.50 72 89.42 CHANGED shpphcchltpsphhpppsh...clhchh+clp.hL-..pshphhYPKNlFss..scclElhhhhpDs.hhhI.hppppc.thphchhph-slpcsplcpp.s...tshpLplhFssst.pIhhsSht..Ds..pp.ah..asc ...................NMKTIEEWIAESNARcEEDF..GpVVEEMKEVCl..GLD...NATLIYTKNVFCF..GKKVEVhFFFQDH..VVI.GQEK-E.YlEIEKLKYDsITpSNLKT..N..DKNTTLELKFANGQ.SINLDSLN..DNYGTKNWL..FAR............. 1 2 3 3 +12890 PF13049 DUF3910 Protein of unknown function (DUF3910) Mistry J, Aldam G gba Pfam-B_1539 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 21.50 21.50 22.00 72.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.13 0.72 -3.90 6 84 2010-07-23 09:00:14 2010-07-23 10:00:14 1 1 84 0 3 46 0 92.40 90 91.92 CHANGED MNlQAKVDWIGTPKPYIYK.DDVTYDAToIDFSLppDDNRYKLIVLKHEpssHYKhVQYGlKPGSQKPFPIDIPFcpsMLPLlEpILpDPYVQA MNVQAKVDWIGTPKPYIYK.DEVTYDATSIDFSLAGDDNRYKLIVLKSEcNTHYKhVQYGIKPGSQKPFPIDIPFEQNMLPIIEQILHDPYVQA. 0 0 1 1 +12891 PF13050 DUF3911 Protein of unknown function (DUF3911) Mistry J, Aldam G gba Pfam-B_1540 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 20.20 20.20 22.30 40.20 18.00 17.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.47 0.72 -3.87 5 86 2010-07-23 09:01:18 2010-07-23 10:01:18 1 1 85 0 3 28 1 76.10 94 94.08 CHANGED MACVQIKGTRQEVVEMLQLFDLMDTKGFCKFDNYVEVEPNsccHNNFTASIDIQSNssSAQDp.LNDQFVSQMLTGVY MACVQIKGTRQEVVEMLQLFDLMDTKGFCKFDNYVEVEPNsKEHNNFIASIDIHSNTSSAQDT.LNDQFVSQMLTGVY 0 0 1 1 +12892 PF13051 DUF3912 Protein of unknown function (DUF3912) Mistry J, Aldam G gba Pfam-B_1615 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 21.80 21.80 21.80 148.90 20.90 21.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.29 0.72 -4.23 5 84 2010-07-23 09:02:45 2010-07-23 10:02:45 1 1 84 0 2 28 0 68.00 93 90.32 CHANGED NFDIVGQKAYIKDGPHRNRIGIVKKNEcQLESpFAIVIGEQsIDVELKDIVLVGVDVGQFHcWCEQNG NFDIVGQKAYIKDGPHRNRIGIVKKNETKLcSQFAIVIGEQsIDVELKDIVLVGVDVGQFHKWCEQNG 0 0 1 1 +12893 PF13052 DUF3913 Protein of unknown function (DUF3913) Mistry J, Aldam G gba Pfam-B_1619 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 28.80 106.80 18.30 17.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -8.76 0.72 -4.03 2 84 2010-07-23 09:04:03 2010-07-23 10:04:03 1 1 84 0 3 13 0 56.80 97 93.66 CHANGED KIhFYEKTAQ.DDLLGIWDNVPTIPRIGEKVEILKTlRTVTDIKYlKpGNNF+VEIl KIWFYEKTAQLDDLLGIWDNVPTIPRIGEKVEILKTVRTVTDIKYVKNGNNFRVEII 0 0 1 1 +12894 PF13053 DUF3914 Protein of unknown function (DUF3914) Mistry J, ALdam G gba Pfam-B_1562 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two conserved sequence motifs: KFDIR and DLW. 25.00 25.00 42.10 41.60 21.30 16.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.34 0.72 -3.83 5 85 2010-07-23 09:05:22 2010-07-23 10:05:22 1 1 80 0 3 51 0 88.00 89 78.88 CHANGED MQIGLNlHTluQsTKlTPlslE+NTpusssppuKEssND.sIKFDIRSSEKEhKQucHKFTELDLWKMLKDKGVPLWIILEMLpKFRKEKEp MQIGSNIHTLSQPTKITPSNLEHNTISSTKLESKK.lND.PIKFDIRSSEKEMKQPEHKFNELDLWKMLKDKGVPLWIILEMLQKsRKEKE.A 0 0 1 1 +12895 PF13054 DUF3915 Protein of unknown function (DUF3915) Mistry J, ALdam G gba Pfam-B_1549 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 25.00 25.00 146.30 145.90 18.90 18.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -11.07 0.71 -4.14 5 84 2010-07-23 09:06:43 2010-07-23 10:06:43 1 1 84 0 3 45 0 119.00 86 97.62 CHANGED MFGSFGCCDNF..RDCHH.....HEREp..HR...hcREKERc..hcspRsAVCNVLANISIGTEISLLolKGNGTFRNVIFEGFCNGVALFSALsh.ssDKD...sNKDDKNNpNpN..+FTGILRVCP-DIVAI MFGSFGCCDNF..RDCHH...........HEREp....cR...-HREKERE..V+PQQPAVCNVLASISVGTELSLLSl+GsGoFNNVIFEGFsNGVALFSALAR.NssD.KD......NNKDDKpN..QN..R..N..TFTGILRVCPTDIVAI 0 0 1 1 +12896 PF13055 DUF3917 Protein of unknown function (DUF3917) Mistry J, Aldam G gba Pfam-B_1608 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 25.00 25.00 149.20 148.60 21.90 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.43 0.72 -3.73 4 84 2010-07-23 09:41:22 2010-07-23 10:41:22 1 1 84 0 3 28 0 71.00 95 82.96 CHANGED MTLKQNGLKRFVPGSILAGIALITYVsSIFlESlSl-hSTShhFIGITLFAGSlMVLMVAGIIlFIHMNSE MTLKQNGLKRFVPGSILAGIALITYVISIFIESVSVDMSTSLMFhGITLFAuSIMVLMVAGIILFIHMNSE 0 0 1 1 +12897 PF13056 DUF3918 Protein of unknown function (DUF3918) Mistry J, Aldam G gba Pfam-B_1567 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two completely conserved residues (G and R) that may be functionally important. 21.30 21.30 21.70 21.30 20.90 18.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -7.98 0.72 -4.64 11 120 2010-07-23 10:07:22 2010-07-23 11:07:22 1 1 120 0 13 31 1 42.20 66 98.98 CHANGED MN+shTSLlAlGsGsAAYphAp+sDhMNsRsMKKhR+Rlh+hh .MN.LRNSLIALGVGAAAYQYARKQDVFSKRNMKKARKMIKSYL 0 1 7 9 +12898 PF13057 DUF3919 Protein of unknown function (DUF3919) Mistry J, Aldam G gba Pfam-B_1479 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 251 and 262 amino acids in length. There is a conserved YLNG sequence motif. 25.00 25.00 76.40 76.00 21.90 20.60 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.43 0.70 -4.89 13 118 2010-07-23 10:09:05 2010-07-23 11:09:05 1 1 111 0 10 94 0 220.90 57 86.08 CHANGED cDKpsVlpphspSIPsKlclhsc+WGchslsDcspLppIhshlcpIspspoptss....csss.sIsGslaYLNGcKcoFplushLplssphYuss.hsshIstL+shLlshhYoPssLssllssssclIlhcp.ppspclssssKphIhspIcph+ploDsc-lt+hlspppcs.hhHI+lYhcscpt........cscspsllsIDVY-s.YhVVQYhGDENGpshYhKGsLsslhhK ..NDKQsVLQRVNESLPsElKVRHEKWGEIVlTDEVRLHTIVSFFDRIpIpPs-s.......KsQEQVFTGEVTYLNGpKRTFAVGDLFQYGsshYGKNGtDPMISAhQTYLLSLYYTPERIuDFF.ASApDVlV....RQGDVhRshNLTHILDSIRYAKQITDYGEIQKLLQSQNEP.IAYITAYKTGK........+lKN-REDILTISVYPS.YFVVQYLGDNNGNVMYMKuSLAplFVK.............. 0 3 8 8 +12899 PF13058 DUF3920 Protein of unknown function (DUF3920) Mistry J, Aldam G gba Pfam-B_1595 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. 25.00 25.00 26.20 83.30 21.40 18.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.74 0.71 -4.32 3 86 2010-07-23 10:10:41 2010-07-23 11:10:41 1 1 86 0 5 41 0 125.50 85 87.16 CHANGED VLDSE.LPWDlQRl+sDlFSLIEKpchPVIFCDTCDANpVLhsLGEEEEEFLFPlGGFYHKEKQhIFVCMWEpYEQVLKTLLHEFRHuMQHEp-VLYVGpEsYEERhIEKD..ARtFAERKlDEYtcRs VLDSE.LPWDVK+LR-DLFSLIElC.KTPVIFCDTCDAN+VLLSLGEEEEEFLFPlGGFYHKEKQLIFVCMWEEYEQVLKTLLHEFRHAMQ.....HKp-lLYVGSEpYEERWIEKD..ARKFAERKLDEYKNRK.. 0 1 3 3 +12900 PF13059 DUF3922 Protein of unknown function (DUF3992) Mistry J, Aldam G gba Pfam-B_1628 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 87 and 98 amino acids in length. 25.00 25.00 161.50 161.30 19.40 18.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.80 0.72 -3.79 9 83 2010-07-23 10:11:49 2010-07-23 11:11:49 1 1 82 0 3 40 0 78.90 94 86.70 CHANGED FGMDKSKKLG-YVNALQALCEQYNVETDKIAIIEATEEYYLFLVKQE-CYDVVKVETVDTNI-YYTKAYKISSFNHTAY FGMDKSKKLGDYVNALQALCEQYNVETDKIAIlEATEEYYLFLVKQE-CYDVVKVETVDTNIDYYTKAYKISSFNHTAY. 0 0 2 2 +12901 PF13060 DUF3921 Protein of unknown function (DUF3921) Mistry J, Aldam G gba Pfam-B_1624 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 29.40 120.80 21.80 19.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.93 0.72 -4.09 3 83 2010-07-23 10:12:38 2010-07-23 11:12:38 1 1 81 0 3 28 0 57.70 95 91.77 CHANGED MDGFQLSMIQKAIHRTYDELGKEIDLQGVlADEIQKAQEEYLSALSHETLIDKRYLKS MDSFQLSMIQKAIHRTYDELGKElDSQGAIVDEIQKAQEEYLSALSHETAIDKRYLKS 0 0 1 1 +12902 PF13061 DUF3923 Protein of unknown function (DUF3923) Mistry J, Aldam G gba Pfam-B_1586 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 26.60 26.20 22.10 20.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.00 0.72 -4.12 19 152 2010-07-23 10:14:53 2010-07-23 11:14:53 1 1 147 0 23 92 0 65.20 41 83.99 CHANGED h+hahlsslhhlllFlhhuhhIahRcVDGuGlhQThph+hlslhlhslhhlllllhplIWhllh++ ...ahsWWluNlhWlIlFshhAslIWlRcVDGAGVhQTsclK.IoLlVllIsallslhhQlIWLlIsh+........... 0 3 10 15 +12903 PF13062 DUF3924 Protein of unknown function (DUF3924) Mistry J, Aldam G gba Pfam-B_1601 (release 24.0) Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 20.90 20.90 22.30 125.80 19.60 19.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.94 0.72 -4.10 2 84 2012-10-02 18:44:02 2010-07-23 11:16:03 1 1 84 0 3 14 0 62.00 96 96.87 CHANGED MsTLTIELPp-hAEKLDLLKQsYpKKTGAoIS-S....TLISKEFlQtITPFDLQQal.tKE MNTLTIELPKETAEKLDLLKQAYEKKTGASISESTLVQTLISKEFIQAITPFDLQQFIsGKE 0 0 1 1 +12904 PF13063 DUF3925 Protein of unknown function (DUF3925) Mistry J, Aldam G gba Pfam-B_1644 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. 25.00 25.00 92.40 92.30 20.90 19.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.22 0.72 -3.94 2 82 2010-07-23 10:17:35 2010-07-23 11:17:35 1 1 81 0 4 27 0 65.50 95 94.02 CHANGED MKTAQ+EhISNREFYFVLYMMLLaVhGWVhDVNGLFLSpYFsLAG.I.LPlVGGlVGhFlMSIsKE MKTAQHETISNREFYFVLYMMLLYVhGWVIDVNGLFLSSYFNLAGEIMLPLVGGIVGLFVMSINK.Q 0 0 2 2 +12905 PF13064 DUF3927 Protein of unknown function (DUF3927) Mistry J, Aldam G gba Pfam-B_1668 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 50 amino acids in length. There is a conserved SVL sequence motif. There is a single completely conserved residue D that may be functionally important. 18.60 18.60 19.80 21.50 16.30 18.10 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.12 0.72 -4.48 10 463 2010-07-23 10:19:14 2010-07-23 11:19:14 1 1 177 0 1 147 3 44.00 64 83.66 CHANGED hchRLslAslLLFLVVhVDFTSRIMSVLADGsLVuulVllhWPllK ...........hKLpLhssslLLalsVMlDFTSRIMSVLADGsLVCGIVVLLWPllK.................. 1 0 1 1 +12906 PF13065 DUF3928 Protein of unknown function (DUF3928) Mistry J, Aldam G gba Pfam-B_1675 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 25.00 25.00 194.40 194.30 20.30 19.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.17 0.72 -3.70 6 81 2010-07-23 10:20:22 2010-07-23 11:20:22 1 1 81 0 3 16 0 95.00 97 97.76 CHANGED MYTLKIVSDREAlYQFASYV+VVQGVEDVYVEVGEPLYEHPLMKFYVHIsIcETY-QpKALQEIARLVELGRFTYVHYRN-EIEcAFEAVKYESF MYTLKIVSDREALYQFASYVRVVQGVEDVYVEVGEPLYEHPLMKFYVHIKLEETYEQHKALQEIARLVELGRFTYVHYRNDEIEEAFEAVKYESF 0 0 1 1 +12907 PF13066 DUF3929 Protein of unknown function (DUF3929) Mistry J, Aldam G gba Pfam-B_1716 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. 25.00 25.00 151.10 150.80 23.80 18.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.23 0.72 -4.20 3 81 2010-07-23 10:22:33 2010-07-23 11:22:33 1 1 80 0 4 17 0 65.00 97 95.14 CHANGED MVYHLENGETIKDIKEFCYRDpGKMLERVAHRVMDN+EVTAIDKQGTIISIAC-DIVKVELDYIp MVYHLENGETIKDVKEFCYRDQGKVLERVAHRVMDNREVTAIDKQGTIISIACEDIVKVELDYIT 0 0 2 2 +12908 PF13067 DUF3930 Protein of unknown function (DUF3930) Mistry J, Aldam G gba Pfam-B_1721 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 51 and 67 amino acids in length. 25.00 25.00 63.80 58.90 18.30 17.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.38 0.72 -4.11 4 87 2010-07-23 10:23:47 2010-07-23 11:23:47 1 2 80 0 3 25 0 52.00 82 95.06 CHANGED M.......p+ppcahaE......IhKhlFlFhpshslshsthhlVQhIhph .MEYQYE....V....G....QTKEEFMHEDQWADSLIKWLFIFLIIVGIPYTAYVVVQFILSF. 0 0 2 2 +12909 PF13068 DUF3932 Protein of unknown function (DUF3932) Mistry J, Aldam G gba Pfam-B_1731 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 173.90 173.80 20.30 18.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.78 0.72 -3.89 2 80 2010-07-23 10:52:45 2010-07-23 11:52:45 1 1 80 0 3 20 1 81.00 93 97.49 CHANGED MKEsFRLQTDFSSSFDRWVSSFVSsaPsQLcWoTLKELIHEYTooHTNpolPpYISSuhTYYAQRlSTANNoEIlIapN.T MKEsFRLQTDFSSSFDRWVSSFVSDHPAQLEWTTLKELIHEYTToHTNDoLPTYISSAlTYYAQRVSTsNNSEIVIFEN.T. 0 0 1 1 +12910 PF13069 DUF3933 Protein of unknown function (DUF3933) Mistry J, Aldam G gba Pfam-B_1720 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 25.10 98.90 22.90 16.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.53 0.72 -4.27 4 81 2010-07-23 10:53:53 2010-07-23 11:53:53 1 1 81 0 3 24 0 52.50 93 95.15 CHANGED MKQYVICQlIsGsKYLAAYAETKQEAIEKAELLGLRTGsRYlVITAEEA-GLp .MKQYVICQIINGEKYLAAYAETKQEAIEKAELLGLRTGNRYhVITAEEAEGLT 0 0 1 1 +12911 PF13070 DUF3934 Protein of unknown function (DUF3934) Mistry J, Aldam G gba Pfam-B_1719 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two conserved sequence motifs: GTG and SKG. 25.00 25.00 31.30 31.30 19.40 15.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.29 0.72 -3.73 5 100 2010-07-23 10:55:23 2010-07-23 11:55:23 1 1 99 0 10 25 0 41.50 85 88.81 CHANGED MSKoK..sKuGTGpGTGKKGWNRWQuSAK+.KKuAKPY..pSKGT ..MSKTKAKPKKGVGQGTGSKGWNRWQSSAK..K..KKAAKPY..KSKGT...... 0 2 6 6 +12912 PF13071 DUF3935 Protein of unknown function (DUF3935) Mistry J, Aldam G gba Pfam-B_1715 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two conserved sequence motifs: FVF and LGV. 25.00 25.00 37.30 78.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.19 0.72 -3.59 3 83 2010-07-23 11:43:06 2010-07-23 12:43:06 1 1 83 0 3 36 0 70.70 95 76.27 CHANGED MTRLKQIFGIhISFFVFWFSMLGVQM.FAEFLDI-SLKFluGKTEsARAFLFsYPaFIlFLl...ShYh.FlI .MTRLKQVFGIIISFFVFWFSMLGVQM.FAEFLDIESLKFVAGKTEAARAFYSPYPFLIVFLIT..LLSLYF.FVI.. 0 0 1 1 +12913 PF13072 DUF3936 Protein of unknown function (DUF3936) Mistry J, Aldam G gba Pfam-B_1705 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved GKAW sequence motif. There is a single completely conserved residue G that may be functionally important. 21.80 21.80 21.80 29.00 21.20 21.70 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.70 0.72 -4.50 7 122 2010-07-23 11:44:47 2010-07-23 12:44:47 1 1 122 0 13 56 0 37.40 68 82.30 CHANGED MKlahhscullLsGKAWEIRtKLKcYsppaphVp-Wls MKVYILPNRVTLVGKAWQIRHKLKQYGKEYTTVQEWIT...... 0 2 8 9 +12914 PF13073 DUF3937 Protein of unknown function (DUF3937) Mistry J, Aldam G gba Pfam-B_1711 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 22.50 22.50 22.50 75.40 21.70 20.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.25 0.72 -4.09 7 81 2010-07-23 11:45:39 2010-07-23 12:45:39 1 1 73 0 3 31 0 72.30 81 90.49 CHANGED MFpNKKLIRhGLoLFlhLslIsFTIuYFQoYLcSAssIcWllschW+TILlDAPcGILVlLGAIALY-FTKcs .MFTNKKLIRFGLTLhVhLhlIsFTISYFQTYLESAAGIKWVIsEhW+TILLDsPEuILVILGAIALYDFTKET. 0 0 1 1 +12915 PF13074 DUF3938 Protein of unknown function (DUF3938) Mistry J, Aldam G gba Pfam-B_1607 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. 25.00 25.00 209.70 209.20 24.40 19.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.36 0.72 -4.17 6 83 2010-07-23 11:46:34 2010-07-23 12:46:34 1 1 83 0 3 21 0 102.00 96 80.04 CHANGED NlQLGEpIIFNGIEoLVSASILGGYIaFLFNPEENAQKTMLLTMIGIVGGCISYSMTNYTLPLQLSSAFFHGLWTWFIAFCLADVFNLLQDsEE-sGRpIES .NTQLGENIIFNGIETLVSASILGGYIaFLFNPEENAQKTMLLTMIGIVGGCISYSMTNYTLPLQLSSAFFHGLWTWFIAFCLADVFNLLQDNEEENGRQIES..... 0 0 1 1 +12916 PF13075 DUF3939 Protein of unknown function (DUF3939) Mistry J, Aldam G gba Pfam-B_1535 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. 21.30 21.30 21.90 21.40 20.90 19.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.70 0.71 -4.45 6 100 2010-07-23 11:48:02 2010-07-23 12:48:02 1 2 96 0 9 42 0 135.70 75 85.22 CHANGED pcpt.ctlcVTlDEVR+AlpcatpshscGIshpsLlpsspcIDhctLtsaLGGhPcQhFYMS+ETaEIF...tEE+-lsh.lDhVQlAVDpYlp-ptchPlhpsspshpVshpKL..ptYLcEhPpa.sLYls-pphlVohcPcp ........F.+TGKEEREITKDELEQAMApFLEp.NANIVYTVLVNDDYTVNYDLLKPYLPAFPTNhFLITKETLEVFEHTEENLNLVKE.IDlVQKAVDQYVTEKEMFPIVEGS......ED......RLICGMKL..GPYLsRlLKR.DLYISEKHYLVSSKPDR..... 0 4 6 7 +12917 PF13076 DUF3940 Protein of unknown function (DUF3940) Mistry J, Aldam G gba Pfam-B_1673 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 50 amino acids in length. 21.70 21.70 21.90 22.80 21.10 21.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.48 0.72 -4.41 29 195 2010-07-23 11:49:13 2010-07-23 12:49:13 1 1 129 0 24 102 0 37.80 50 66.79 CHANGED pcKphLIppLIppGlaKtpc..RpLaEhohpELcc.Ycph ......+KcaLI-cLIssGlaKhcD..RQLYELSLpELE+EYcs.h......... 1 1 14 16 +12918 PF13077 DUF3909 Protein of unknown function (DUF3909) Mistry J, Aldam G gba Pfam-B_1537 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 25.00 25.00 211.60 211.50 20.00 18.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.59 0.72 -4.18 11 84 2010-07-23 11:57:32 2010-07-23 12:57:32 1 1 84 0 3 30 0 107.80 93 97.97 CHANGED MDLQKFDtMIDAVQRATClpIN-KQKEAFKQKYDFEPpFEYGRDEKGHYVIRTSKKMLEEMEFYLALKYDRDGlDLYMcAEIDGlCHVSVSYSEDALHLQELFQFLEE MDLQKFDEMIDsVQRATClQINEKQKEAFKQKYDFEPcFEYGRDEKGHYVIRTSKKMLEEMEFYLALKYDRDGVDLYMQAEIDGIhHVSlSYSEDALHLQELFQFLEE 0 0 1 1 +12919 PF13078 DUF3942 Protein of unknown function (DUF3942) Mistry J, Aldam G gba Pfam-B_1722 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. 25.00 25.00 99.80 99.70 21.30 19.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.67 0.71 -4.03 5 85 2010-07-23 12:01:07 2010-07-23 13:01:07 1 1 81 0 4 34 0 124.20 88 93.83 CHANGED LDEFsp+VKEYL-sEK-EKIIK-GHRDVIFpYLYcLEscIGVV+NP-FsFFTSGcRSHIVlENlEFKTEVcsE+NIIEITKIVDpVA.TsLDTIIlQDGELFALGRNEKFTppILp-YLpEsFuEhL R.FEFTTKlKEYLDDEKDEKIIKDGHRDIIFpYLYsLESEIGlhKNPNFTFFASGRRSHIVLENIEFKTEVNVKSNIIEITKIVDNVV.IPLDTIVAKDRELFALGRNEKFSVQILEQYLFDTFGEKL... 0 0 2 2 +12920 PF13079 DUF3916 Protein of unknown function (DUF3916) Mistry J, Aldam G gba Pfam-B_1564 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. There is a single completely conserved residue S that may be functionally important. 21.30 21.30 21.80 21.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.73 0.71 -4.55 14 109 2010-07-23 12:11:47 2010-07-23 13:11:47 1 1 107 0 11 80 1 146.20 59 82.08 CHANGED hhcphptph.shPsshasstY..Wph+lPVstualpup+sspplKphClQsLlspAppLhphKsssppphRVsshIslssLasSpIllF..cs..-cYFcsFhsRss.chppWlsLsspcsltpcWsLplssshpphGapElhpDp-...s..tcpElWaIGE ................MIKRIEEHTKsFPSTFYND..EY..W.M.LPVSQsFI-S+KTPRKVKRLCIQTLlspsNHLIph.K.PoDTHTYRVVsLISIpNLWcSQIIlF..KN..-DYFHNFFNRss.EFQKWIhLSNElDFWETWtISIssohphL+FQElIYDED...t...EKEIWFIGE............ 0 0 4 7 +12921 PF13080 DUF3926 Protein of unknown function (DUF3926) Mistry J, Aldam G gba Pfam-B_1663 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 46 and 63 amino acids in length. There is a single completely conserved residue P that may be functionally important. 25.00 25.00 26.10 82.60 19.10 18.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.23 0.72 -4.42 8 78 2010-07-23 12:15:49 2010-07-23 13:15:49 1 1 77 0 3 35 0 44.00 80 80.29 CHANGED MpILcELPsPIQQSAKpMLNILQEELSSYspEpsQspsNLKsII MHILEELPsPIQQSAKQhLNILQEELuuYPpEQsHHcsNLKNII 0 0 2 2 +12922 PF13081 DUF3941 Domain of unknown function (DUF3941) Mistry J, Aldam G gba Pfam-B_1728 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 30 amino acids in length. There is a conserved YSK sequence motif. 25.00 25.00 28.70 43.80 23.80 24.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -6.85 0.72 -3.95 5 93 2010-07-23 12:21:47 2010-07-23 13:21:47 1 1 93 0 8 35 0 24.00 82 47.55 CHANGED RccKNpt+..cKNtQpGK+AYSKKTD .RsQKNEQE..QKNIpQGKRAYSKKTD. 0 1 4 5 +12923 PF13082 DUF3931 Protein of unknown function (DUF3931) Mistry J, Aldam G gba Pfam-B_1734 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 40.30 161.30 21.50 18.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.32 0.72 -4.11 4 82 2010-07-23 12:31:39 2010-07-23 13:31:39 1 1 82 0 3 10 0 66.00 98 78.64 CHANGED MDNNEKKCNVISIDGKKKKS-TYSYPKLVVEsKTYEFSSFVLCGETPDGRRLVLTHMISTDEFAGF MDNNEKKCNVISIDGKKKKSDTYSYPKLVVENKTYEFSSFVLCGETPDGRRLVLTHMISTDEFAGF 0 0 1 1 +12924 PF13083 KH_4 KH domain Bateman A agb Jackhmmer:O25768 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.30 0.72 -4.34 211 3577 2012-10-02 00:34:43 2010-07-23 16:26:45 1 7 2027 3 775 2186 412 73.20 25 47.89 CHANGED cch...l...cplhcsllpp...p...plplptp.....p.c.ppthlplpl.sspD.hGplIG+cG+slpAlphllsss.s.s+...p.s..p...c...h..plpl .....................................thlpthlcsllst..s.c.....sl.p.lphp......psp..pttplpl.pl...s.s.p.D....hG+..lIG+pG+slpAlc.t.llpsss..s.+....t..s..p....p.h.hl..................................... 0 331 604 701 +12925 PF13084 DUF3943 Domain of unknown function (DUF3943) Bateman A agb Jackhmmer:O25483 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. 27.00 27.00 27.90 81.00 22.90 21.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.20 0.72 -4.48 33 236 2010-07-23 16:14:36 2010-07-23 17:14:36 1 2 221 0 52 187 8 110.00 50 30.54 CHANGED puhha................DsDsahhNhhuHPYtGuhYassARssGashapShhaohsuShh.WEhusEs..EhPSlpDllsTsluGhllGEhhac.huphl....tsutphhsppshu....hllsPhs ..................p.GPsWDcDpahhNhlhHPYFGulYYsAARpAGas.acShhYShshSThFWEYGlEAFsEVPSWQDlhlTPhhGullGEhhac.hpptI.....hssGGclhGS-hhGthslhhlsPl.G.............. 0 21 33 45 +12926 PF13085 Fer2_3 2Fe-2S iron-sulfur cluster binding domain Bateman A agb Jackhmmer:O25214 Domain The 2Fe-2S ferredoxin family have a general core structure consisting of beta(2)-alpha-beta(2) which abeta-grasp type fold. The domain is around one hundred amino acids with four conserved cysteine residues to which the 2Fe-2S cluster is ligated. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.45 0.72 -4.23 52 5265 2012-10-02 17:47:23 2010-07-24 12:26:47 1 25 3727 84 1318 3398 2510 108.10 39 42.50 CHANGED plclhRhss................................................................tt.psahpsaplshc..ptholL-sLptIcpp..........-ssluac..tuCppulCGoCuhhlNGcs......pLACpshlpshhpt...............................................lplpPL..spaPVl+DLlV........Dhsshhc ............................................................................................................................................................................hplaRasP....................................................................p.ss....pPphps.Y..pl...c...hp...pshhlLDuLh.hlKcp...........DssLoFR...pS.CR.E.GlCGSCuM.lNGps...........pLACpsh....lcshs.p..t...........................................................................................lp.lcPL...ssh.PVl+DLlV..DhopFh.t..................................................................................... 0 387 813 1105 +12927 PF13086 AAA_11 AAA domain Bateman A agb Jackhmmer:O25195 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.79 0.70 -4.79 118 5668 2012-10-05 12:31:09 2010-07-24 13:20:17 1 234 1842 10 3094 6877 709 222.70 20 21.82 CHANGED pLNpsQtpAl...ppslsppt......................hsLlpGP.............P.............GT........GKT...polstlltth...................................................................................................................................................................................tttsp+lLlsusSNsAVDpllp+...Lhp.................hph...............pllRlGpsp.........psslpphs....hpt.h.........................................................................................................................................................................httphtphppthpth.pthtt.ptth............ppllppupllhsThsuu.upthhpph..................pF-hlIlDEAuQ.............usEsssLlPl..hhu.............c+hlL...................lGD.pQLPPslhSp .............................................................................................................................................................................................................................................................................................................................................................................................Q..uh...........h.h.t.............................h.l...G..P.............P.......................GT...............GKT...........h.hh..t..hh...h...................................................................................................................................................................................................................................................................................................................t..hl.hh..s.s..s......A.hs...hh.t...h.t...............................................................................................................h.h.R..hh..t.....................................h...........h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..h...t..t...h......p.....l......l........h..s..T....h.....ss..........p............h.t.t..h..............................................................th..c..hl.l...lD..E.....A....uQ................................h...h..t.......p......s..hh...s...l.......hhs...........................pphl...l...................................lG...........D..pQLsPhh................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1106 1755 2545 +12928 PF13087 AAA_12 AAA domain Bateman A agb Jackhmmer:O25195 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.41 0.71 -4.94 125 5685 2012-10-05 12:31:09 2010-07-25 09:30:25 1 268 2107 10 3041 5759 458 189.40 23 15.53 CHANGED hspSLF-RLhptt.................ttshhLshQYRMHspItp...FsSphFYp...Gc....Lpsus......shtppph..............................................................h.tshtP.lhFhcs................................sssppppppt...............sShhN.sEAphlhpllppLhpptstt..................pcI..GlIoPYpuQlphl+phlp.........p..............................pht.t......................................lclsTVDuFQG+E+-lIIhSsVRo...............s......pps.......................................................s....lG.FLsDhRRlNVAlTRAKptLlllGssp ...........................................................................................................................................................ohh.hh............................................hhLphp.a.R.h.p...p.lhp...h...s.s.....p.hh.Y.p...........s.p.......L..sts..................phtp.t.........................................................................................................................................................................................s...hhh.hss........................................................ts......t..ttt...t...............................tuhhN...tE.u........p.h..l.h.p.....hl...p.t.hh....pt..thtt.................................ppl......u.l..l....o....P...Y..p..u..Q...h..p..h...l..p..ph..l..p..........p..................................................................................................................................t..h......t..t.................................................................................................lp..l.sT.V.-s...a..Q............GpE.p-.l.lll..S..h.....V..cs.........s..................tpt............................................................................................................................s........hG....F..l.......p....s.............c.....R........l....N..VAlo.RA+pt.lhllGs..p.......................................................... 0 1103 1793 2543 +12929 PF13088 BNR_2 BNR repeat-like domain Bateman A agb Jackhmmer:O25303 Domain This family of proteins contains BNR-like repeats suggesting these proteins may act as sialidases. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.30 0.70 -5.11 78 2868 2012-10-02 00:45:24 2010-07-25 11:16:42 1 108 1391 46 640 2427 442 267.90 18 47.16 CHANGED Gp...LlAhWFu.........GscEG..ssDlsIhho......ppp.ss.....p.......W..ussthlssst......................................htshtNPllht...ts.supLhLF...atssssss...............tWh...........uhh......hpSsD.......sGt..o.Wotsp....cLssu..........hsGsl+s....................s....lhhp....sG.pllhP...s.pE.......tt....ashhlthosDsuts.........Wppsphh.................................ph.stlQPoll..ss.......u..plhhhhRs..tps............plhtotSpDsGpoW..ss.......sp..........hslPNss....Sulsuls.LtsG..phlLshN........................................................RssLsl.....th..........SpDt.GpsWp.thhsL-sss......................................................phSYPsllpssc......s..plal ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s...h..........................t....s..lh......h......h..................................................................................................................................................t..................hpSsD..................sG.p.....T.....Wotsp..................pls.t.....................hh.h..h...s..s.spu..............................................................................................l..hp.............sG..cllhs...................shtp..........................ttt........ps.h.h..h....hS...c..DpG.pT.........Wphu.pss.........................................................................................ph..sps...ps...s....l....s..s........G.....slh.h..h.....h...Rs....ss.....................phhh.u.tSpD..s.......Gt.....TW....st............................................sp................................h.s..h...s..s......s...............u..sh..p.....h.h..p.....h.t...ss.........phhlh.hs..........s.pt....................................................................Rt...h..s..l..th...............s..t..Ds...uh.....sW.....hh.....l...p..s......................................................................huYsshh..ss................................................................................................................. 0 246 392 511 +12930 PF13089 PP_kinase_N Polyphosphate kinase N-terminal domain Bateman A agb Splitting PF02503 into domains Domain Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules. 21.90 21.90 21.90 21.90 21.80 21.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.49 0.72 -3.96 174 3094 2010-07-25 13:24:08 2010-07-25 14:24:08 1 9 2787 6 696 2356 835 105.30 41 15.31 CHANGED hahNRELSWLpFNpRVLpEAtD.p.p.hPLLERl+FLuIhoSNLDEFFhVRVAuL+p...............plptuhsphs..sGho.............Ppcp.LptIpppspphhccptphapp.lhstLt.cpuIpllp ......alsRELSWLsFNpRVLppAtD.p.s.hP.....LLERh+FLuIaosNLDEFahVRVAuLKc...............plphu..h...pp....tu....ssu.h.s.............spc..LspIppcsp.chhpcp....tplas-.lhspLt.cp.sIhll.p......................................... 0 209 457 597 +12931 PF13090 PP_kinase_C Polyphosphate kinase C-terminal domain Bateman A agb Splitting PF02503 into domains Domain Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules. This C-terminal domain has a structure similar to phospholipase D. 23.90 23.90 23.90 24.70 23.80 23.80 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.90 0.70 -5.85 174 3665 2012-10-02 13:01:53 2010-07-25 14:32:09 1 10 2915 6 706 2979 1325 303.40 49 47.92 CHANGED slFssIcc.pDlLLHHPYcSFssVlc.FlcpAApDPsVlAIK.TLYRsu.scSsIlpALlcAAcsGKpVTVlVELKARFDE-sNIpWA+pLEcAGs+VlYGlsGLKTHuKlsLlVR+..Ets.t.....l++YsHlGTGNYNspTA+lYTDhuLhTscpplupDlsclFshL.oGh...uc.s......phc+LhluPhsl+ppllchIccEhppActGc.sutIhhKhNSLsD.plIctLYcASpAGVcI-LIVRGICsL+PGlsGlSENIcVpSIVGRFLEHoRlahFtN.s...........G..........csclYluSADhMsRNLc+RVElhhPlhDspl+pclhch.LphhLpDNspuhtlpsDGsYpphps..p..........pp.h.suQphhhpth..p ......................................................................slFcuIRc.p.D.lLLHHPYcSFs.s.Vlc.hLcQAAtDPpVlAIK.TlYR.su...p.D.....S.IlpuLlcA...Acs.GKpVTVlVEL.p..A..RFD..E...EuNIpW.A.+cLEcAG.sHVla...G..hs..G...l....K.sHuKlhL.lsR+..E......ss....p..............lhcYsHlGTGNap...tTA+lYTD.hulhTss.plsp-spplFs...l..p.s........p..............................p....hp.....pL...hhu...P.sh+ptlhchIcpE.htt..t.p......tG....h......u........tIhhKhNsLsD.....tllctLYtASpuGVplcLll.R......Gh.CsL.hP..s.l.Gl.......S-....N....IclhSIl..sRaLE.HsRlahFts.s.................s..........................p.....plaluSADhMtRNlpp.RlEshhPlhs.plptplhp..hph.htDshpuh.lpt.-hp.thh.......t...........p.t..h.puQ.hhhp..h........................................................................... 0 212 466 607 +12932 PF13091 PLDc_2 PLD-like domain Bateman A agb Jackhmmer:O26029 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.17 0.71 -4.38 205 18675 2012-10-02 13:01:53 2010-07-25 14:56:24 1 85 4401 14 4237 15098 1180 139.20 19 41.80 CHANGED lhphlpp...up..cplpl.ssh..ah.............ppp.....l....hpsl.ht.ttp+G.lcl+llhsp..............................................p..h.thtthpthtph.hptshp..........h..........................................HsKhhll.D.......sp......................h.sh.l..GSsNhotpuh.........ptNh......Ehs......lhlp.sp........thspphpp..hF...pt...a ....................................................................................................................................................................h..phlpp..Ac.....cplhl.ts..ha.......................................s.spp....l........hpsL...hp....sspc.....G...V.......c..V..+..l..lhs.s..s...............................................................................................................h.....h.ts.hph.h.t.ph....h.p.s.Glc.lhhh..........................................................................tts..hhHpKhhll..D.......sp.........................................................................hsh.l...G.o..h.N....h.....s..s..c.uh...............thsh........................E.hs.............lh..lp...s............t.stthtt..h........................................................... 0 1240 2500 3500 +12933 PF13092 CENP-L Kinetochore complex Sim4 subunit Fta1 Coggill P pcc Wood V Family CENP-L is one of the components that assembles onto the CENP-A-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. Fta1 is the equivalent component of the fission yeast Sim4 complex [1]. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. 21.70 21.70 21.90 22.10 21.50 21.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.85 0.71 -4.46 30 154 2010-07-26 14:40:08 2010-07-26 15:40:08 1 5 141 0 110 144 1 169.30 25 45.25 CHANGED pshTpLPLLLsRhPssLRpshhoFLupsFDspsSsLRLsophLssshEpalsslspssss.....................plh+-hpLTLuFs.sssss.......................uL+olslsIPtpslssFhptu.....................................tt.psp.s....ssFhsuLusYhcpHLAhcLs.................tttl+LoKlusuuFllusEGRlKlluss ..............................................................................................phspLPLhLsphspshp.phlhsalppsFD...shho..sL....p....lsspsLshhhthahsshsppths..........................................phplhas.sstss.................................slpslsltIs.cchpshhpps......................................................t.s.t.p..pp.pps.....s.FhpsLtsahcc.Hhtlc.Ls.................................................................ss+Ls+lusushsht..s....-G+lKlhs..t.................................................. 0 24 43 77 +12934 PF13093 FTA4 CENP-U; Kinetochore complex Fta4 of Sim4 subunit, or CENP-50 Coggill P pcc Wood V Family Fission yeast has three kinetochore protein complexes. Two complexes, Sim4 and Ndc80-MIND-Spc7 (NMS), are constitutive components, whereas the third complex, DASH, is transiently associated with kinetochores only in mitosis and is required for precise chromosome segregation. The Sim4 complex functions as a loading dock for the DASH complex. Sim4 consists of a number of different proteins including Ftas 1-7 and Dad1 [1]. 24.80 24.20 24.80 24.80 23.50 23.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.38 0.70 -4.90 31 102 2010-07-26 14:45:24 2010-07-26 15:45:24 1 3 88 0 82 102 0 200.70 30 81.33 CHANGED Msto.p.......olhplKpsFlcsQl...+ILSpsL.........pssccW+shst.ts.pp...............p.............Ls-.......+slpclLpKlNstL+pHsRtlaSsQAlpHVApQItpLYhpphtpsspp.sphpthlc.............................cssDLos...........ptsIppLPp......................................................................ph.t.....ps...ssppptp......................+YpcLppcLlpLsppppptpc+LsphppLpplL...........................EPacsspp........................slQsNLl.....o+supLspEl..p+MRhLls+Vuu+ .................................t......pl.phKpsFlpsQh...+lLSpsL.........tPsc..sW+shst..stpp.........................s........lsp.......+slpcsLt+lNthl.ppHsRtlassQAhp+VApQIppLYhpphpttst.p..p.t.hlp.............................ctsDLss...........tt.IppL.Pt.......................................................................ph.t............t.p.tptp......................+YtplhtcLhpLspp+pphpp+ltph+pLpphl...........................cPapsspp............................................slQ..NLl.....T+su.lttEl..p+MRhLlu+Vss+......................................... 0 14 40 68 +12935 PF13094 CENP-Q CENP-Q, a CENPA-CAD centromere complex subunit Coggill P pcc Wood V Family CENP-Q is one of the components that assembles onto the CENPA-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENPA nucleosomes directly recruit a proximal CENPA-nucleosome-associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENPA NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENPA-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. Fta7 is the equivalent component of the fission yeast Sim4 complex [1]. 28.50 28.50 31.00 29.90 27.80 28.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.05 0.71 -4.21 38 159 2010-07-26 14:57:46 2010-07-26 15:57:46 1 3 124 0 96 153 0 156.00 22 39.12 CHANGED pLtp+Lsp.hshPsts...................pcs.hsh-plhpppptLEppLu....s.hcplphLppElccp-sthpp-tcpLpchccsscttcsphccppcp......+.lLpt.ppssspp..tsppp...............tp.ssph.................-p-ltsllpplp......pclcshpsstt.......lpslpctlpcspst.....Ls ................................plhpphtp.h.hPstp.........................................cs.hsh-pl.lptppsLEppls....ss.culthLpp....Elc.ctptphpppptplppLcpsspshppphccppcch..................+plhph....s...pp.ss.p..hsthsppp........................................sssh.................ppElhslhtplp......pchcshpsstp.......hpshhphlpcshttLp....................... 0 14 34 62 +12936 PF13095 FTA2 Kinetochore Sim4 complex subunit FTA2 Coggill P pcc Wood V Family Fission yeast has three kinetochore protein complexes. Two complexes, Sim4 and Ndc80-MIND-Spc7 (NMS), are constitutive components, whereas the third complex, DASH, is transiently associated with kinetochores only in mitosis and is required for precise chromosome segregation. The Sim4 complex functions as a loading dock for the DASH complex. Sim4 consists of a number of different proteins including Ftas 1-7 and Dad1 [1]. The equivalent higher eukaryotic protein is CENP-P. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. 20.70 20.70 21.40 21.40 20.50 19.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.49 0.70 -4.73 13 63 2010-07-26 15:12:01 2010-07-26 16:12:01 1 2 35 0 56 62 2 191.60 25 60.04 CHANGED Ms...........................h.p.L.hspPscpss...............stPpLchFht...Hs..............psIphlpalstss-..........uhVaphclpu+.pYALKl...h..asapss..hh.cltt+th.......hahs...PhssEsRAauRLspltcpGhh....AV+CHGWhhLo..cpphpph................sp.hphhspWAIVKD...alssph........s.pplsplhpchplh+cshlhspDlp.pNYRsuhlVDLGssh .................................th..................................tPpLt.F.h...pt..............tslphlphLststp..........uhVa+sclpup.....YALKl.............a....htts.....t...phttp.h.............h.ahs..................PFssEsRAauRLpchscpsh.....AV+CaGah..hLs....c...pphp.h....................................tpttphhshhAIVKD...hlssth..................p.psh.c.lhpchphh++htlhstDlpspsYh.suhllDhups..................................................................... 0 3 27 48 +12937 PF13096 CENP-P ShortName; CENP-A-nucleosome distal (CAD) centromere subunit, CENP-P Coggill P pcc Wood V Family CENP-P is one of the components that assembles onto the CENP-A-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. Fta7 is the equivalent component of the fission yeast Sim4 complex [1]. 25.00 25.00 30.30 29.40 20.60 18.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.99 0.71 -4.96 5 51 2010-07-26 16:04:31 2010-07-26 17:04:31 1 1 35 0 27 49 0 127.20 51 60.61 CHANGED Q+aRLuGsCpuLsFQLEFQlLElQsc-slssslTDLsIlhEssEapDLScFVSRsEEc+sLLLFFRSLpoFuEWCEaR+pTFcHFKEKYP-lVsLPEGspuEpMtlRNPQpPGhELlIVWKIHID-EG.sVlPlLDLLsKlPpQALELDcKuslEsuussFRoLLtlhGIEuoIEsLI .....................+a+LSGsCp.lsFpLEFplLEh.psp-phSusloDLsIlhEssphuELScFloRsE-ctsLhhFFRuhp.asEWhcaRcpTF.HhKt.KYPphV.LscG.t.up.h.lpssp..sGFELhIVW+lplsE-G.pshPhLDLLsKhPppsLt.ppptsl-suP.tFRshlhhhGlEssl-pLI................ 0 6 7 11 +12938 PF13097 CENP-U CENP-A nucleosome associated complex (NAC) subunit Coggill P pcc Wood V Family CENP-U is one of the components that assembles onto the CENP-A-nucleosome associated complex (NAC). The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [1]. FTA4 is the equivalent component of the fission yeast Sim4 complex. 19.50 19.50 19.50 19.50 19.10 18.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.18 0.71 -4.33 9 57 2010-07-26 16:13:52 2010-07-26 17:13:52 1 2 39 0 30 56 0 158.90 39 44.36 CHANGED p-s-spcpVtssEp.......pspp.pshssss....thsEcPupsVTsppsuspsupsSstctss.AspoppcTQK..p...t+Rp+scptsptocsu-shplW...C.cth+tS..DIpELDVlLutFE+hhLEY+QclEScsC+pAIscFasplKcpLhchlcElQhLKsLK+KNsKlluslEKKRQRL ........................................s-sppcltssc+......hsspppcsh.sss.......phsEcPupsVssppsus.ss..psssEcpsh.....s.pspp.....csQKp......p.t.s++p+s....c..opshs.scs..S-.......ssplW...C.cshKpS..DIpEL-lVLstFEKhhhEY+QplES+sC+pAIspFasshKEpllchlpEsQhLKNLKRKNAKllusIpKKRQRL.................... 0 3 7 13 +12939 PF13098 Thioredoxin_2 Thioredoxin-like domain Bateman A agb Jackhmmer:O25140 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.89 0.71 -3.87 144 3289 2012-10-03 14:45:55 2010-07-26 17:16:55 1 21 1889 26 693 17334 6561 121.70 21 46.68 CHANGED s..p..s..p...s...Kh.hlhhF.ssss.Cs..aCcp..hc.pplhp..sp...ltth......h....................pt.p..hth.hhl................sh...................t.....psp................t.........hht.....................ttt...............h......tpp.......................p..........................................hu..pph.s..........lpu.TPsllh.hs.t.p........G........p.......p....hs.G.hh.s.s.p.phh.phL .......................................................ts....p..t+h.hlhVF.s.D..ss.C.s.YC.+c....h.c....p...p.h.t.s........st.......l.s.h.t...h...............................................p.....t....hhh....h..h.l.....................................ss...............................................................................t.......s.s.p........p.......tt..t............hps..............................................tttp..................................t.......................................p...................................................................................................Ls...p.ph...G.............................l...s.u...TPsl...lh...s................G............p.........h.......hs..G..h....s..s..pph.th............................................................................................................................................................ 0 189 415 577 +12940 PF13099 DUF3944 Domain of unknown function (DUF3944) Mistry J jm14 Jackhmmer:O26108 Domain This short domain is sometimes found N terminal to Pfam:PF03981. 21.70 21.70 22.30 28.00 20.80 21.00 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.32 0.72 -4.46 19 557 2010-07-27 11:58:35 2010-07-27 12:58:35 1 3 517 0 12 169 3 34.80 69 14.48 CHANGED Y+.hDsDLEFLscCoscDLpsLVphLT+D.KDGphRh ..YL.pDsDLDFLQHCSEEQLAsFARLLTHN.EKGKsRL... 0 3 5 8 +12941 PF13100 OstA_2 OstA-like protein Mistry J jm14 Jackhmmer:O26089 Family This is a family of OstA-like proteins that are related to Pfam:PF03968. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.95 0.71 -4.90 30 372 2012-10-01 21:43:16 2010-07-27 14:41:27 1 9 305 0 104 461 481 129.20 26 25.21 CHANGED Ko+VhL.HuDpLphDp.t.pPDAQlLhG...sVsh+HDushMaCDSAhhaEpoNSlEAFGNV+MpQGDTLhlYG-YLaYDGsTQlAhlR.cNVRMcN+ssTLhTDSLNYDRlhslGYYF-GGolsDpcNsLTScaGEYSPuTKhAlFspsV+LhNPcFsLsSD ...........................................................l...us.h.....hsp......s......s.....s.....h..h..G.....pVth..p.a.pu......hh..hsD....p...shh............p........p...p.....p..........hp...Ah..Gs.....V.p.....h......p....s..D.s.h...p.....lhu-hh.Yss..pp.s.hh....tp.V....h..h.h.p..p........t......p..L..h..s..-..pL.Ysph...p..h...uh......a.p.s.s..pl..h.s..p..t..s.hlh..u....p...G.Y..tsc...h..ts.h..p.t................................................................................... 0 41 89 102 +12942 PF13101 DUF3945 Protein of unknown function (DUF3945) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This is a C-terminal repeated region. 21.90 11.10 22.10 11.20 21.80 11.00 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.53 0.72 -4.67 53 731 2010-07-27 21:34:35 2010-07-27 22:34:35 1 9 130 0 73 590 11 54.20 26 21.58 CHANGED chhssalslDctTNclhsh.........ss....p.......lpIP.......sclps.lcLosppppsLppGcslhlc.hhspcs .......................................hlphD..Ttph..h.........ht....p...................ltlP.......pclpG.lcLosppppsLppGcslhlcshhscp.......... 0 26 64 72 +12943 PF13102 Phage_int_SAM_5 DUF3946; Phage integrase SAM-like domain Ellrott K, Bateman A kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This family appears related to the N-terminal domain of phage integrases. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.17 60 914 2012-10-02 14:21:04 2010-07-27 22:57:47 1 2 196 1 136 888 67 145.40 17 38.17 CHANGED lplRpKpL.ssGph.SLYLDhY...........suc.....RpaEaLplYlhsc.psttc.........................+cpNppslphAcsI+ucRhlElps.......pptsh.tspp.csctshl-ah...cphtcpppppsp.t........phpsshp.aLcpasttp.......lsFp-lDtcahpsFhcaLh .............................................................................................................p.h.tpGph..slhl..chh..............................hptp..pph..h.t..lh.l..hsp..hstpp..........................................................pp.t.pp.h.h.t...ht.p..t..l.p.....t...p.chh.p.h.p............................tp.th...hp..p..t....ptp.......ts..hh....sah....cphhpphp....t.....p....s.p.......hp............papss...hp....p.....lppFhppp.........lshp-lshpalpsFcpaLh.......................... 1 45 110 134 +12944 PF13103 TonB_2 TonB C terminal Mistry J jm14 Jackhmmer:O25752 Family This family contains TonB members that are not captured by Pfam:PF03544. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.84 0.72 -3.94 102 1325 2012-10-03 21:09:15 2010-07-28 10:51:51 1 14 909 1 457 2915 858 85.30 19 30.22 CHANGED hssYhspltptlpp.....p..W...s..st......ssshpshlplplss.sGp.l...shplhcsSGsptaDpuVtcAlp....pspshP..s....uth.....phthsF ........................................tahstltpplpp..........p.........a.....s.....ts............tstshp..s..plp......l..p........ls.s..sG.........p.l....h..s...s....pl...s.c.....o.S...G.....ssthD.....p.u.....shpulc........pssshP..s......tt...................................... 0 136 268 371 +12945 PF13104 DUF3956 Protein of unknown function (DUF3956) Aldam G, Mistry J gba Pfam-B_1228 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 22.20 22.20 23.90 93.00 20.90 19.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.12 0.72 -4.40 3 98 2010-07-28 10:22:01 2010-07-28 11:22:01 1 1 64 0 4 13 0 43.80 89 99.68 CHANGED M..shssFVNGQPhLVVSVAGIEIARLEISLQVALTLIALGIPICA ..M-SCVlFVNGQPhLVlSVAGIEIARLElSLQVALsLhsLGIPICs. 1 0 2 2 +12946 PF13105 DUF3959 Protein of unknown function (DUF3959) Aldam G, Mistry J gba Pfam-B_1424 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 260 amino acids in length. 25.00 25.00 25.40 25.30 20.20 19.80 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.67 0.70 -5.04 5 82 2010-07-28 10:32:12 2010-07-28 11:32:12 1 1 77 0 5 56 0 229.50 81 95.47 CHANGED MLLSGLFPIAGIhKQIPLEQSLYIGGLLFFTSFGSYFAKKhYSRICSWIAYAPFITLLLlIWcQDITTuSlLANAKIAAC..IALlPsIaRFRTYGlTFGLlALWGALLWDlKEVQSLVILERMoSLMTScalYLLLLlGGLIlGGLLAshIHRKEKD-NKENINLapQKKKRK+LSFKI.LPRLPKL+MKLFKFGtK.oppK..c..+c+pYEEsh.....ch.pp.s.acpp..ol.GQTRMERRRN ..MLLSGLFPlAGlhKQIPLEQSlYIGGLLFFTSFGSYFAKKhYSRICSWIAYAPFITLLLVIWHQDISTSSIIANAKIAAC..lALIPCLFRFRTYGLThGLFuLWAALLWDhKEVQSLVILERMoSLMTSpahYIhLLlGGLIlGGLLAMhIHRKEKDsNKENINLFpQKKKRK+LSFKI.LPRLPKh+MKLFKFGGK.SKpKpPEKI+E+pYEEsss....TYEMpEQIc.YKE.s..slQGQTRMERRRN............. 0 0 3 3 +12947 PF13106 DUF3961 Domain of unknown function (DUF3961) Aldam G, Mistry J gba Pfam-B_1483 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 40 amino acids in length. 25.00 25.00 63.10 63.00 21.50 19.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.95 0.72 -4.39 11 103 2010-07-28 10:37:55 2010-07-28 11:37:55 1 1 84 0 4 33 0 39.10 76 52.23 CHANGED pslNcaFGI-tstSDpIWFYGhaululhlhhhsYllStll .QSVNKFFGLD.TKEDCVWFYGFYGVAVSILLFMVFTSNIF.. 0 0 2 2 +12948 PF13107 DUF3964 Protein of unknown function (DUF3964) Aldam G, Mistry J gba Pfam-B_1516 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two conserved sequence motifs: FYF and AFW. 21.60 21.60 24.10 179.00 20.10 20.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.39 0.72 -3.81 4 117 2010-07-28 10:39:55 2010-07-28 11:39:55 1 1 117 0 4 44 0 107.70 71 96.81 CHANGED TRpEpIhpLsFFEDKPtLAEQIL+lE+cEplaLPsQFEIKQsssYphGEKpsllGRlcpFYFlul.to.Espa+hQAFssEhcs+tFFlsLssIpcp.lAFWhNplELl .TRQERILQLPFFENKRELAEQVLKhEREEHlYLPDQFEIKQVPPYSFGEKpuIIGRIHEFYFlSl.GS.EusWKYQLFKDEMKCREFFVpLPsIsDQQIAFWFNNIELL. 0 1 2 2 +12949 PF13108 DUF3969 Protein of unknown function (DUF3969) Aldam G, Mistry J gba Pfam-B_1576 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 25.00 25.00 40.40 40.30 20.10 19.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.15 0.72 -4.39 9 294 2010-07-28 10:43:29 2010-07-28 11:43:29 1 1 290 0 11 74 0 100.10 49 88.47 CHANGED htspppl-pahhhhhlGhhpuh+hshIols.hEthlFp.hhhcllpchslcccLl-IItpGhpLEDl........t.lhspcLpcsIcslpspslphLhphp.php..cptlphl. ........htp...LEKhhLhhhhGlhppLKLtllSlDpAc+hlFs..hhEhLtshulccsllDLIHpGsELEDh.............tshshoIE-hlslCLQhhcEh.pphcsVEhp.c.l.l............. 0 5 6 8 +12950 PF13109 AsmA_1 AsmA-like C-terminal region Coggill P pcc Jackhammer-O25308 (H pylori) Family This family is similar to the C-terminal of the AsmA protein of E. coli. 22.50 22.50 22.70 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.24 0.70 -5.06 21 213 2012-10-03 05:41:17 2010-07-28 11:44:00 1 5 211 0 30 230 477 212.80 47 23.87 CHANGED splpssupacsGphsh.hppsp..lplc..ucshsD-FlNplhs.....p.phhptGpFsl..p...u.t.s.s.s.shapGclplpsThl+shphlpNlluFIcTIPSLlsF+sPsFsscGaplcpGpllFthpc-hLslcslpLpGsShDIhGpGhIsLcspplslsLpLpTlKshoshIsKIPlls....YIlLGK-tpISTslplpGsLDsPchpTplspDlLhuPFsllKRll .........................................................................................s.tlchcushtNAphslh.h.sssc...LpLp....s.p.N.h.s.DpaLNp.hLQ.....K.puVps....GlFsLp......h..t.G....s..s....chF.......cG..pl..cF..p..NTalKDL+sls.p.LISFI..s..T..........lPSLL...........hF.KsP.........sFN.p.KG.lsl+cG+l..lFst..KK....D..lLulpsIsLsGsSlDIhGhGohsL+hNslDhsLELKTLKohScsISKlP..IlN....YlIL.G...Ks...p.c..I.S.T.N.l.+lcGolDcPKh+T...........plloDsLpsPFNllKpIl............................... 0 10 25 29 +12951 PF13110 DUF3966 Protein of unknown function (DUF3966) Aldam G, Mistry J gba Pfam-B_1525 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 86 amino acids in length. 25.00 25.00 111.50 110.90 18.80 17.40 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.50 0.72 -4.16 2 85 2010-07-28 10:46:03 2010-07-28 11:46:03 1 1 84 0 4 20 0 49.80 97 63.60 CHANGED VIhYISRKFSQERpLEKSEITAEL-MLscEsaKKQKIKEDHEtsHHLNsp .VIVYISRKFSQERELEKSEITAELEMLADESYKKQKIKEDHEASHHLNAN 0 0 2 2 +12952 PF13111 DUF3962 Protein of unknown function (DUF3962) Aldam G, Mistry J gba Pfam-B_1505 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 233 and 796 amino acids in length. There is a conserved FSY sequence motif. 25.00 25.00 25.30 32.70 24.30 23.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.56 0.70 -4.95 5 80 2010-07-28 10:48:59 2010-07-28 11:48:59 1 2 78 0 5 63 0 214.20 84 34.62 CHANGED QLLAFKslV-PLhpEsVaYlYFPcEWlsLLctHh+sacLssKLKtLNERLYhMFSDILFIQHNPYsLsEsoPWIVuKEPl+QEQLEYIs+uWY..ElIH-WK...PschlsPscLEWQsshISNLPlLHDNcTaaKWIPALITHlFCEcPLHLslsNcs-EElhFsPLRSQ+lsEAMSEPI+DccTQDYFSYVYRFEsITRGGE.NtPLLKVSIGIRRFYQp ................................................+LLTFENIVEPLLNEpVSFIYFPIEWLDIVEIHYKTFLLTSKLKRLNERLYDMFSDILFIQHNPYVLNENTPWIVSKEPIRpEQLDYIFQSWY....EIIHDWK...PNKLIEsPKYEWpYDLISNLsVLHDpEs....YSKWVPALISHIFCERPlpLE..NhNEE-IYFSPLRSQNICEAMSEP.....I...KDEKTQDYFuYVaRFEhITRGGE.NIPLLNVSIGIRRFYQE. 0 0 2 3 +12953 PF13112 DUF3965 Protein of unknown function (DUF3965) Aldam G, Mistry J gba Pfam-B_1524 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 380 amino acids in length. 20.10 20.10 20.50 21.20 18.90 19.50 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.01 0.70 -5.25 3 85 2010-07-28 10:52:17 2010-07-28 11:52:17 1 2 83 0 3 38 1 284.80 92 75.80 CHANGED QEIGWYKEAYTFMVs+sLssFVHTSlEYETWDlLTQAVAWNYLIIKYRIGcLEDuDlhIWERIKFNEECIEcCcsLLSHKEVLEFTFFYlCKQAKpLSKEcLNp-MMsLAIYCNTYVYDLYoYDLL+KYRKCTDFLSYYGPSpuVLACQRAVlAQISDRLNPLKTTHVDDYLYVMKEMMEHMoapFMcRY-HFIGKLLSYVPFFEMIQVPQHAYYCEELMYICKGIuYKEEILRNYlFIQLHDCLPSFIKlFLKNKRYATIHDILFYWCDcEQRMuLERKYNLSFIYEKYA .......QEIGWYEEANsFMlsQGLAEFVHTSLEYETWDLLTQAVALNYLIIKYRIGELTDtDVEIWDRVKFNEKCITDCKHLLSHKEVLEFTFFYMCKRAKSLSKEQLNSDMMSLAMYCNTFVYDLYTaDLLRKYRKCTDFLSYYGPSQAVLACQRAVLSQISDRLDPLKTTHVDDYLYVMK-MMEHMTIGlMDRYsHFIGKLLSYVPFFEMIQVPQHAYYCEELLYICKGIcYKEEILRNYIF...IQLHDCLPSFFKLFLK..NKRYATIHDILFYWCDDE..QRMSLEKKYNLSFIYEKYA............ 0 0 1 1 +12954 PF13113 DUF3970 Protein of unknown function (DUF3970) Aldam G, Mistry J gba Pfam-B_1596 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved NPKY sequence motif. 21.10 21.10 21.10 21.50 20.60 19.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.85 0.72 -4.28 7 111 2010-07-28 10:55:07 2010-07-28 11:55:07 1 1 110 0 11 35 0 61.30 65 99.96 CHANGED Mhp..lRlpGpcE.EI.chlpshsc..taEloa...spcshttsNPKYchSKslhsYlclKh.....pK MIR..VRIEGTEE.EMLEFMcKMPDIPGFEKTH...hREPR.KGNNPKYDSSKNVLAYLSYKKIE..VANK. 0 2 6 7 +12955 PF13114 RecO_N_2 RecO N terminal Mistry J jm14 Jackhmmer:O25605 Family This entry contains members that are not captured by Pfam:PF11967. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.08 0.72 -4.26 24 198 2012-10-03 20:18:03 2010-07-28 14:46:01 1 3 198 0 22 850 192 71.10 66 34.33 CHANGED MQGaILpsp+V+DEDLIVplLTpsplhphY.....RFYGARH......SsIplGaKIDFElEpst.ph.ls+LRslhHLGatWh ..........MQGFIL+TQK..VK..DEDLIVaILSPctL..lKsY......RF.....YGh.RH.........................SoIhsGYKIDF..t..LEcss..sF..LPRLKDVLHLGFlWI........................................ 0 6 19 22 +12956 PF13115 YtkA YtkA-like Coggill P pcc Jackhammer_O25884 Family \N 24.40 24.40 24.40 24.60 24.30 24.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.28 0.72 -3.63 90 656 2012-10-01 19:30:51 2010-07-28 15:05:13 1 14 358 0 179 609 32 85.10 22 36.73 CHANGED ptpsps......hplpls.ssp...ph....psGp.s.s..l...hs....t...ss...psGc..sVs.sA.s.l.phphhhst.t..uhtph.pshh...........p.spp...tsGhYphp.ssh.sh....sGpWplplp .......................................tshplplt.ss......h....psGcs.s..h...pl...........p...hs...pssc......sVs.cA.p..V...p.hphhps........Ghtph.pths.......................p.spp...tsGhYpsc.tsh.sh....sGsaplplp............... 0 58 121 145 +12957 PF13116 DUF3971 Protein of unknown function Coggill P pcc Jackhammer_O25308 (H.pylori) Family Some members of this family are related to the AsmA family proteins. 22.50 22.50 22.50 22.60 22.30 22.40 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.69 0.70 -5.03 104 1538 2012-10-03 05:41:17 2010-07-28 16:08:13 1 12 1521 0 320 1280 259 286.00 26 24.21 CHANGED htssspshc.YhPt..hhhupsLpsaLssAlp.......u.GpspsuplhapGslsp.aPat.....ppp..G.FpshsslcsuphpFpss..........WPslpslshslhFpNsslthpsspupltsss.hssspssIscl.sppsh...LpIcuchsu.pspsltchhppoPLhsslu..tsL.stlplsGplsupL.pLsIPLp.....st.......tspGphsl.pssslhls....shplpslsGplpFs.s.......sslsupslpAphhspPlslshss........ppptpshtl.....slslpGph....phptl.t......hst.....ltt...hlsGpssapsslslths...........ptshphpl ....................................................................s.ssspshR.YhP......hhucs.LhcaLssAlp........u...Gpscs..uplh.....h.t...G...s....pt...F.....Pap..........tsp.....GpF..plh..s..sl..csupa.tapss...............................................................W.Pulpslsh-Lsa....s.ss.....L......hh.....p.....ss......pu..............p.....l......s.....s......l.....p.....s.o.......s......l.............s..ss.............I.P..Dh.sccp.......LhIcuc.....hpG...sups.l.tsh...h.s.p..o.P...L.t..s.hu..ssL.sth..p...l.s.Gs.ls.u..cL.cLsIPLp.....................st...s....sspG.plsL...psssL.lt...s...s......sLpslsGp.hpFs..s........sslpup..slp..AphhstP..lslc.hss...........pp.tsp.shps........tl.slsush.......ps.tth.s.........hls.t.......l.p...hlsGsssaphpltlt............................................................................................................... 0 70 171 243 +12958 PF13117 Cag12 Cag pathogenicity island protein Cag12 Mistry J jm14 Jackhmmer:P97245 Family This is a Proteobacterial family of Cag pathogenicity island proteins. 25.00 25.00 26.20 25.90 21.70 19.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.41 0.71 -4.17 13 185 2010-07-28 15:30:54 2010-07-28 16:30:54 1 2 135 0 10 136 1 109.40 36 56.11 CHANGED hhhshlLsuC.SSsPcPscl-tsp.tlslNsplhppp.hshVPKss.hlss.sWsYplhlpsh..c-chlssc.hsphFhlAHsuc+IlllGptsphtpYKpahppNGspusIp..lQP ......h.lLsllLoAC.Ss.sc.s.h.pcs+shpsl.Np.Ll-p..aSplPhss..plpshsahhplhl.sa...cDhllcsc.hslpFtLsHpo++IsllGcusphhpYKsYhptNGApusI-..lQ................... 0 2 5 9 +12959 PF13118 DUF3972 Protein of unknown function (DUF3972) Mistry J jm14 Jackhmmer:O25162 Family This is a Proteobacterial family of unknown function. Some of the proteins in this family are annotated as being kinesin-like proteins. 22.70 22.70 22.70 23.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.49 0.71 -4.05 47 197 2010-07-28 16:11:16 2010-07-28 17:11:16 1 3 197 0 21 106 3 123.20 63 65.04 CHANGED EFs+LspLsc-hlhthhspGtLpsKp.EcGKlhI-AspGT.ulV.sstpshhuMspshshht.s..FVEKTIGTILNLHEKVLsAKDETlpAlKNENpFLKDALhSMQElY-ED+KTI-hLppEL+pAR ..................EFCKLVHLsE-VVcuMhssGsLshKE.E-GKIYIEAppGT.h.........SVVPuu..spshsuM.......s.........sShslsupS..FVEKTIGTILNLHEKVLDAKDETL-ALKNENKFLKDALYSMQELYDEDRKTIETLspELK+AR... 0 8 18 21 +12960 PF13119 DUF3973 Domain of unknown function (DUF3973) Aldam G, Mistry J gba Pfam-B_1636 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved YCI sequence motif. 25.00 25.00 53.00 52.30 24.50 23.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -8.27 0.72 -4.03 3 87 2010-07-29 08:33:42 2010-07-29 09:33:42 1 1 86 0 4 44 0 41.00 92 51.35 CHANGED MYYCIsCSEIHHEKusNDKVFKNGFYIDPFLGERYHLGMCK MFYCINCSDIHHEKHPNDKVFKNGFYIDPFLGDRYHLGMCK. 0 0 2 2 +12961 PF13120 DUF3974 Domain of unknown function (DUF3974) Aldam G, Mistry J gba Pfam-B_1643 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. 25.00 25.00 38.30 38.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.86 0.71 -4.30 2 82 2010-07-29 08:38:48 2010-07-29 09:38:48 1 1 81 0 3 41 0 124.00 94 45.69 CHANGED MuFIphVLLLlGTLLLIuFTlVVLlVYFGRKhYhSWsKPYKRAp-Sl-KLSNKShPFLQEFTQHPLFYRWIRTEGKKEQpshNTLFCsusQRTREQVFSMLPK-+QKKVHsMAKoTKKlTNE..DI MSFIQTVLLLLGTLLLIAFTVVVLVVYFGRKLYFSWTKPYKRApDSL-KLSNKSlPFLQEFTQHPLFYRWIRTEGKKEQ+TLNTLFCASuQRTREQVFSMLPKEKQKKVHVMAKTTKKLTNEDIDV.... 0 0 1 1 +12962 PF13121 DUF3976 Domain of unknown function (DUF3976) Aldam G, Mistry J gba Pfam-B_1743 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length. 25.00 25.00 83.50 83.00 18.60 18.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.02 0.72 -4.11 3 82 2010-07-29 08:43:40 2010-07-29 09:43:40 1 1 82 0 3 20 0 40.00 94 67.13 CHANGED MYLFVRKDITKDNTLTKRGFYKLIGCLVVMFIGIIVMIVLl VFLFIRKDV.QGGTLTKRGFYKMIGCLVVMFIAIIVMIVLI 0 0 1 1 +12963 PF13122 DUF3977 Protein of unknown function (DUF3977) Aldam G, Mistry J gba Pfam-B_1744 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 61.30 61.20 20.00 17.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.67 0.72 -4.17 4 85 2010-07-29 08:51:07 2010-07-29 09:51:07 1 1 85 0 2 40 0 76.40 87 91.52 CHANGED MKYIEhGIGN+WhVRTETEpEDGTEFEQKGIlKPIYFESlYlRlWFRKTClIhDoKEGFKKh+KpRsEYKFIhGIVS .MKYIEIGhGNRWFVRTETENKDGTEFEERGIIKPIYFESLYVRlWFRKTChIFDTKEGFKKVKKRRIEYKFIlGIVS 0 0 1 1 +12964 PF13123 DUF3978 Protein of unknown function (DUF3978) Aldam G, Mistry J gba Pfam-B_1745 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. 25.00 25.00 27.00 26.70 19.00 17.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.72 0.71 -4.80 3 84 2010-07-29 08:52:49 2010-07-29 09:52:49 1 1 83 0 4 30 0 143.90 88 98.04 CHANGED MEAYNMHNFINTNIESpPcETsFNLHICEoNEFDsNLTKSTTLSFIVTK+NIKIlTKKWINScpESMIGKSYIIPTKAFHYlLPIIsEoEEEMsIQVQSFGlsGELLLNERLLIcKNN+hNS.KIsuFFEALNENI+QALRTLQIp .......MEAYKMHDFINTNVESHQNETVF.NLcICE.TsEFDVSLTKSTTLSFIVSKKNIKIVTKKWINSNQESMIGKSYIIPTKAFHYFLPIISETEDELNIQVQSFGLHGELLLNERLLIDKNNKHNu..KIToFFETLDENVNKsLRGLQI.H.... 0 0 2 2 +12965 PF13124 DUF3963 Protein of unknown function (DUF3963) Aldam G, Mistry J gba Pfam-B_1512 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 85 amino acids in length. There is a conserved DIQKW sequence motif. 25.00 25.00 84.10 84.10 18.40 17.50 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.98 0.72 -4.16 4 53 2010-07-29 08:58:40 2010-07-29 09:58:40 1 1 51 0 2 29 0 40.00 86 64.97 CHANGED MlhINshFIERYFcDIQKWIRNIThCFALLVVsLVuLWIG MLSIYTAFIEKYFcDIQKWIRNITFCFALLVVVLVALWIG. 0 0 1 1 +12966 PF13125 DUF3958 Protein of unknown function (DUF3958) Aldam G, Mistry J gba Pfam-B_1404 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. There are two conserved sequence motifs: RLF and TWH. 22.80 22.80 24.30 24.20 21.30 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.42 0.72 -4.11 9 95 2010-07-29 09:08:48 2010-07-29 10:08:48 1 1 45 0 3 73 0 92.20 64 82.10 CHANGED VFEEQDRNQpAIQpQEpAEtDFaEh+sRssRLFsRILETWHsDKElSpFFhNhpQEuQaIERKLTFELENQKETLlKE+RcLpDLENDLoYppQpLt+E ...............................lFEEQscNp.AlQtQEpAEAsFaEW+sRspRLFsRILpTWHGD+EhspFFhNhhQEspplERKlTFELENpKETLLKE+RcLS-hENDLSYpQQ.QLtRE....... 0 0 0 1 +12967 PF13126 DUF3975 Protein of unknown function (DUF3975) Aldam G, Mistry J gba Pfam-B_1736 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 25.00 25.00 43.10 43.00 18.90 18.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.04 0.72 -3.83 2 81 2010-07-29 09:50:29 2010-07-29 10:50:29 1 1 81 0 2 42 0 78.80 90 93.89 CHANGED MWKEKGpQllshIhlGIVlLLQhSFHhIE.LFHKslSILTFhPNMsLEllSIVWSIIASIhIlIIW.....uIthLhpplhhKcS MWKEKGKQlLAWITLGIVILLQISFHIIEWLFHKVlSILTFLPNMTLEhISIVWSIIASIAIVIIW.....SIAKLWNKLFKKDS 0 0 1 1 +12968 PF13127 DUF3955 Protein of unknown function (DUF3955) Aldam G, Mistry J gba Pfam-B_966 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 68 and 87 amino acids in length. There are two completely conserved residues (G and E) that may be functionally important. 20.60 20.60 22.50 22.20 19.40 19.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.88 0.72 -4.43 30 317 2010-07-29 10:35:54 2010-07-29 11:35:54 1 6 248 0 29 152 36 61.90 33 75.10 CHANGED hppahluhlhhl.lGlhChhhashhsShVsssGhLpEP.FaLIPlualhlhhu....llshlhshlp.ph .......hpphhluhhhhl.hullhhhIpshs.ohlsssGhLcEP.hF...ahlPlGalhllhu....hhshlhshlpp.h........ 0 13 21 25 +12969 PF13128 DUF3954 Protein of unknown function (DUF3954) Aldam G, Mistry J gba Pfam-B_934 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 60 amino acids in length. 22.20 22.20 22.60 22.80 21.60 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.33 0.72 -4.54 12 144 2010-07-29 10:39:09 2010-07-29 11:39:09 1 1 89 0 4 82 0 48.70 54 84.47 CHANGED MK+.EIshtpNtlYlV.pcGclphl.pPPpoGFGEQslhWpsGKVs+scsppT ......MKt.EIDlppNtIYlV.KNGpVphl.pPPtoGFGEQshsWpsGKVsRs-sphT.......... 0 1 3 3 +12970 PF13129 DUF3953 Protein of unknown function (DUF3953) Aldam G, Mistry J gba Pfam-B_875 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 47 and 76 amino acids in length. 21.80 21.80 21.80 22.10 21.60 21.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.76 0.72 -4.33 36 454 2010-07-29 10:41:40 2010-07-29 11:41:40 1 1 294 0 22 219 1 41.90 39 68.35 CHANGED hhhhL...GhhhlhhGlp-h+...ccc....KshGhhshlsuhaslhVul.p ........hLL.GlhslpIGhpphK.....Kcc........KhhuIlshLAGshlllVulh..... 0 5 13 13 +12971 PF13130 DUF3952 Domain of unknown function (DUF3952) Aldam G, Mistry J gba Pfam-B_704 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. There is a conserved VMSAS sequence motif. 25.00 25.00 52.60 52.10 18.20 17.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.35 0.72 -4.07 19 114 2010-07-29 10:44:54 2010-07-29 11:44:54 1 1 37 0 2 107 0 104.40 60 41.53 CHANGED usLLSGCuFGE......TKIEYERhVKALDEGDMKTVMSASDDGYAaVcEcsI....aSsaEcKEDGpHp+slYQTT-GlYNhK-KsLYGpToQplsoclcsccp+cp.psYKcEpl .....SLLSGCuFGE......TKIEYE.hVKALDEGDMKpVMSASDDGYAYVKpcsI.....STaEpKEDGcHppsIYQTTcGlYNsK-KsLYGpToQclsoslcsccp+cc..sY+pp................ 0 0 0 0 +12972 PF13131 DUF3951 Protein of unknown function (DUF3951) Aldam G, Mistry J gba Pfam-B_698 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 56 and 71 amino acids in length. There is a conserved YTP sequence motif. 25.00 25.00 39.60 39.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.69 0.72 -4.19 4 137 2010-07-29 10:49:09 2010-07-29 11:49:09 1 1 86 0 5 52 0 50.70 61 82.10 CHANGED MILhTIGhlLhTlFIFFIIGFlTFpMFVsKATPQIYYTPC-shTsQohpctpp ...hlLhTIG.l.LTlhIhhIIGF..ahhFlcKto.p...aYTPh-slTspohuca+.c 0 0 2 2 +12973 PF13132 DUF3950 Domain of unknown function (DUF3950) Aldam G, Mistry J gba Pfam-B_688 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 30 amino acids in length. There is a conserved NFS sequence motif. 25.00 25.00 36.80 36.30 23.60 23.00 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.39 0.72 -4.42 8 501 2010-07-29 10:51:02 2010-07-29 11:51:02 1 1 257 0 7 125 0 30.00 81 50.68 CHANGED LL-QIstuhtpEp....ouNFSAWVh-ACRcKLp .MIEQINIAL-pKG....SGNFSAWVIEACRRRL.. 0 2 2 7 +12974 PF13133 DUF3949 Protein of unknown function (DUF3949) Aldam G, Mistry J gba Pfam-B_636 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 69 and 87 amino acids in length. 25.00 25.00 61.30 61.10 17.30 17.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.04 0.72 -3.75 5 147 2010-07-29 10:52:29 2010-07-29 11:52:29 1 1 88 0 5 77 0 60.90 70 78.40 CHANGED MIPIQYtYIcuLK...Ec++KpGlSQcELY-NMSFEEEQLHYHsQGNlFsIPuuhVAShIY+lKp .hlPIQY.YlphL+...EKpKKhulSQpELY-pMSatEpQlHaHhQuNsFsIPuuhVA.hIhKVK.. 0 0 3 3 +12975 PF13134 DUF3948 Protein of unknown function (DUF3948) Aldam G, Mistry J gba Pfam-B_550 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 25.00 25.00 51.80 51.60 20.80 20.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.47 0.72 -4.48 8 175 2010-07-29 10:54:03 2010-07-29 11:54:03 1 1 80 0 13 52 0 34.70 69 93.11 CHANGED MpN.cQVLQVTKhDFLGSASGAslLTAhIVFLusVL ...Mps..EQVLQVTKsDhlGShuGAVVLTuhIlFLusVL 0 0 4 4 +12976 PF13135 DUF3947 Protein of unknown function (DUF3947) Aldam G, Mistry J gba Pfam-B_493 (release 24.0) Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 26.90 26.90 21.60 19.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.97 0.72 -3.78 13 175 2010-07-29 10:55:40 2010-07-29 11:55:40 1 1 80 0 4 91 0 77.00 55 98.40 CHANGED hssYa.sphplt.......suIThuuAQuTlpAVpQAhQ.....MQQQh..th..p.......uh..aYsshtY.......hYPs........sFoTIPaGssY.L .........h.pYFhstttht.......suIThuGAQuTlpAV.pQAlQ.....MQQQhQ.....Q.......G....YSph.h.......hYPs........oFhoIPYGusY.L.......................... 0 0 3 3 +12977 PF13136 DUF3984 Protein of unknown function (DUF3984) Aldam G, Mistry J gba Pfam-B_3236 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 393 and 442 amino acids in length. 21.30 21.30 40.40 30.50 21.10 20.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.36 0.70 -5.21 20 72 2010-07-29 12:40:09 2010-07-29 13:40:09 1 2 68 0 59 69 0 298.50 34 77.17 CHANGED RSRRSasSLpHlSLAPLTsRFPlDDDss..........s.tst....t.hstsspsspTSYLuShSVPsTPslL..ScSRssSpsRtp.....p+ppoopptphS-.....osLcupsstpshHHppp.............pcptpppsssst............p.ss.tppc.pDsEWLLRAGlALASSTREEKGQSWLVKR-SSTSLV..................uEss.s..ptht+ppc............tppptoRRuR......................SGhSTP...sAhSRRsSpSRsuSRtu.SRs-L..sMTuhch............sttsttths...ss..p-sp...thlPDFVDcclRsEMt.h................tp.cp..........s.............st....u-sDsE-.......-.hDEtEhQRLTRc.cGhGLGuWI.DRlVEWTLFuVE- .............RppRS.ssLpHlSLAPLTs+hPl-D-s..............................thtsh..poSYLpuhSsPsTPs..lL..Sp..Stssupsphp.......tpss.tt.hSp.......o.h.puhsss..p.hppppt......................pt.tppphspt..........................p....shsppc.tDs-WLLRsGhsLuSpsREpKGQSWLVpRpSS.TSLl.....................spsp.s....cthtcppc......................tspptuRRup.........................Ssh....uoP...hshSRhs....S+s.sS+....hu.SRspL......MTshph.........................tt................psh...hhsPDFV-.s.+.Eh.......................................................................t-p-spp...-..DEtclpRLsRc.pshG..lGuWl.-plltWoLFuVE-................................................................................... 0 12 27 46 +12978 PF13137 DUF3983 Protein of unknown function (DUF3983) Aldam G, Mistry J gba Pfam-B_2658 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 40 amino acids in length. There is a conserved AWRN sequence motif. 25.00 25.00 39.50 39.50 19.90 18.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.56 0.72 -4.27 15 83 2010-07-29 12:42:07 2010-07-29 13:42:07 1 1 55 0 3 48 0 34.70 68 79.89 CHANGED KK+Kl+KAIsRRuKsl...EK.cRVcpAWRNIFVQAGI .KK+KlRKAIARRsKsV...EK.aQVsKAWRNIFVQuGI. 0 0 2 2 +12979 PF13138 DUF3982 Protein of unknown function (DUF3982) Aldam G, Mistry J gba Pfam-B_2022 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 47 and 73 amino acids in length. There are two conserved sequence motifs: EKL and EIP. 25.00 25.00 34.50 33.60 19.40 17.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.28 0.72 -4.09 20 72 2010-07-29 12:45:00 2010-07-29 13:45:00 1 1 40 0 0 43 0 54.10 58 87.08 CHANGED M.....hGI.IshtVshTEIlAPAsNVSIVVNESESPI+hEEKLSVATAPLIEIPTPsGAHPGAVV-hDTLIT .......hGI.IshtVshTEhhsPAspVoIV.pEot.PIph.EKLpVushPh.EIPsPsGtcPGtVV-h-pLIo 0 0 0 0 +12980 PF13139 DUF3981 Domain of unknown function (DUF3981) Aldam G, Mistry J gba Pfam-B_1754 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. 25.00 25.00 186.40 185.60 20.20 20.20 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.41 0.71 -4.09 2 80 2010-07-29 12:48:28 2010-07-29 13:48:28 1 1 79 0 3 48 0 114.00 96 19.90 CHANGED hIPWTRuuptLRsVDKcGscKVlKtKpusILhIPlLhWIGIAIYEYhWLIDDRVDSIlTHYSVslAlLIGlVLhSQsphG+LEspLKullMhILLsSYGYFGYLHDIVISQKKY LIPWTR.SGSKLRAVDKKGDEKVVKGKKSSILVIPVLFWIGIAIYEYFWLIDDRADSILTHYSVAVAILIGLVLFSQDQIGKLEGTLKGLLMFVLLASYGYFGYLHDIVISQpKY 0 0 2 2 +12981 PF13140 DUF3980 Domain of unknown function (DUF3980) Aldam G, Mistry J gba Pfam-B_1748 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. 25.00 25.00 77.20 77.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.06 0.72 -3.71 3 79 2010-07-29 12:54:34 2010-07-29 13:54:34 1 1 79 0 2 48 0 86.60 85 67.47 CHANGED MEQEQTSYLSIKILKIMSVIYLIVSILsAlSTGuFIpss..GFs.uISlSGoG.uAlGllhLGSIFQSVLVFCGIWVFILLVETVIKIYEK .VEccQTSYLSIKILKIMSVIYLIGSILMAFSTGPFIHNl..GFD.EISISGSELGLISIVMLGSIFQSVLVFCGIWVFILLVETVIKIYEK 0 0 1 1 +12982 PF13141 DUF3979 Protein of unknown function (DUF3979) Aldam G, Mistry J gba Pfam-B_1747 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 22.50 22.50 24.50 216.30 20.40 19.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.38 0.71 -4.19 3 81 2010-07-29 12:55:49 2010-07-29 13:55:49 1 1 81 0 3 37 6 113.70 90 95.87 CHANGED hoLFQsAPhEDtKGGWKYIIQEQNGKYpIsN-IussHMSVELYFNEYDElRITLYKDGpPITTMQRIsIlKlELEEDEEGIQFVLERMPSRMIRLQLKPYLAlEMGLYWEVCED .MTLFQAAPKE-sRGGWKYIIQEpNDKYEIVDEMLKNQMSVELYFNEYDEVKITLYK-GhPIoTMQRIAISKVELDEEEEGIQFVLERMPSRMIRLQLKPYLALEMGPYWEVCDD 0 0 1 1 +12983 PF13142 DUF3960 Domain of unknown function (DUF3960) Aldam G, Mistry J gba Pfam-B_1431 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 72 and 89 amino acids in length. 21.50 21.50 22.10 22.60 20.40 20.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.98 0.72 -4.21 4 84 2010-07-29 12:59:48 2010-07-29 13:59:48 1 2 82 0 3 37 0 88.60 94 23.50 CHANGED MhAsp..PNWsLVhDsYhEPNNFADLFSLLVPs+PKGEuKERTILsWKEKEFYKEENLhPFILYGMNKuK-LPQFHKDEIPTLVRIVRL ......MKAVQtDPNWNLVTDTYIEPNNFAELFSLLVPCHPKGEGKERTILVWKEKEFYKEENLAAFIVYGMNKAKNLPQFHKDEIPTLVRILRL. 0 0 1 1 +12984 PF13143 DUF3986 Protein of unknown function (DUF3986) Aldam G, Mistry J gba Pfam-B_362 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 20.30 20.30 22.30 22.30 19.90 19.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.11 0.72 -3.53 12 206 2010-07-29 13:04:43 2010-07-29 14:04:43 1 1 96 0 12 100 1 86.80 50 91.36 CHANGED hpaDsspHLHlGYY-cshDl...EulAYKphscslWslahsatthshhhpph.pth...hh-thGhpl..aolcspDLs.-tusthFEcWLhcNp ......cYDsShHhHlsYatsthD..l...EshAYKRhNEsVWDlYhsahthss..hcclEpt+.....h.-.hGhhV..aSlcspDl..s..E.upt.FEcWlh+Np....... 0 1 6 8 +12985 PF13144 SAF_2 SAF-like Coggill P pcc Jackhammer:O26012 Family The members of this family are similar to those in the SAF family, and include flagellar basal-body proteins and pilus-assembly proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.97 0.71 -4.82 119 1927 2012-10-01 20:51:14 2010-07-29 14:46:06 1 4 1667 0 431 1918 464 178.10 24 76.94 CHANGED lpptlpp....hlppp...hsth........ph.........plpshplc.....s.php..hss....s....ps.hp.....h.plss...spt.tspsslpl....p....s.p..........ssp...sa...p..lalssplph....h....sphl............VAs+sLs+GphlsssDlshpphslsplt.s.shh..s...-..p..llGtps+RslpsGpslptsplt.shlVp+GppVplhAps.suhplpspGpALpsGshG-pl+V+N.pSs+llsupVpusGpVcV ........................................................................h......t.h.tp........h............p.....h.....hp................hs....s...p........ht..ss....ss.t....h...up...h..s..h.....h....p....s................s....p....haltsplp.s............p...ts.hl..................................Vuspsls.+..Gphlss....s.slp..hcp......hcl.s.p...ls..p..shl...s....hs........p...ll..st..h......ut..+.s.lssspslphstlpp..shhVptG.pp..V.lls.ps.suhslsspGpALpsushuppl+V+.....sSs....p..l....lpuhVsu.sGpl............................................... 1 114 243 333 +12986 PF13145 Rotamase_2 PPIC-type PPIASE domain Bateman A agb Jackhmmer:O25628 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.39 0.71 -3.48 206 2673 2012-10-02 13:30:10 2010-07-29 16:26:11 1 27 2057 2 569 6738 2215 123.40 20 28.69 CHANGED l....s-..p-lcphYc....p..pts..pa....t.ps.pt...p.h.hp.........hhh.hs...sp....st.s...p..tt.....tpt.....hpt..t.........sh...pp.h..ts..h.tp..t..p..shs......hps..hs...........htp.ts.....................p...hst..................................t.htptl..h...sh.....ps..G..p....h.s..s.s..l.......ps..st......u.h..hlh+lssh.p.s..sp.shs.h-..c..s..+.s .............................................................................................................................................................o-t-lpphY-..............p....pps....pa................t..t....pt...h.t.hp...................hlt..hp............sc.........sp..A.........c....ts..............hpp.........hpt.t...............................t.......ts.F...ss...l..uc...c.....p...uss...............sss..hs................hhp.ts.............................................s..lPt.....................................p.ltp..As...h.......p.h..........cp..G..p.....l.S..s..s....l........ps..ss...........u..a.hll+lsch.p..s...tp..sts.hpcs+.......................................... 0 194 373 478 +12987 PF13146 TRL TRL-like protein family Bateman A agb Jackhmmer:O87326 Domain This family includes the Swiss:O87326 TRL protein that is found in a locus that includes several tRNAs. The function of this protein is not known [1]. The proteins in this family usually have a lipoprotein attachment site at their N-terminus. 25.00 25.00 27.90 26.10 24.80 24.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.31 0.72 -4.00 22 210 2010-07-29 16:26:30 2010-07-29 17:26:30 1 1 172 0 33 102 11 77.10 51 73.76 CHANGED sGhlaspsshPstss.......sspssKpGcACspo..lLslVuh.GDuSl-sAtcpusIsclspl.....Dapspshl..slasphCslVpGp .........................u.huLYTcVpsPlouT.......slsuoKoGKACApo.....VLGlVsT..GDASI-oAK....KuGcIShVoSV.....DYETTGsa...sh..YGKsCVVV+Gp. 0 9 26 27 +12988 PF13147 Amidohydro_4 Amidohydrolase Bateman A agb Jackhmmer:O25045 Domain This family of enzymes are a part of a large metal dependent hydrolase superfamily [1]. The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source [2]. This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit [3]. Dihydroorotases (EC:3.5.2.3) are also included [4-5]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.94 0.70 -4.37 111 10639 2012-10-03 00:45:34 2010-07-29 18:01:37 1 79 4086 62 2860 18795 7896 309.00 16 70.36 CHANGED Dsp...G.thlh..PGhlDhHs+.....................hstthsst.ht.hh.........tuhhshh............tsshpssthttthtp........................................hhh.sththtsttthhtthpphhthhtttshsh.th...............................thttsthtthhp...tstpts.hhhhhsttstt.........tt.httththhthh........................hthstshplhpthssthshththhttttsthh.t.............hthstttslssssht.......hhp.h.th................shthhhhssh..................tptshtsltphhpsG...hhhhluoDth.........................sssshhtthhhhhht...hshs........pslphsotssuphhsh........tphGtl..t.GtpAchl ........................................................................................................................................................................................................................................................................................................DspG.thlh....PGhlD.hHsH...............................................................h..t.t.s.s.h...h..h.p....s..h.................................h........t..h.suGhoshhp...............................hssspsthh.ttth.hp.h.................................................................................................................................................hhh.h.h..t..h..h..h..t...t.........h..h...t......th..p...p...h.t....t....h....h..t.......h..t...h...h...s.h....t..h..............................................................................hh.sst..h..h.t.hht..............tt..........t.ths................................hhhh..t.tp..t.......................................t.h.h..ttt..t.h.hh.................................................................sts..hp..h..h..h..h....h..t..t....t.......h....s....h...th.ph....htt..ph.st.......t.ht.................................hths..cttulssst.t.....................................................hhtpth.th...........................................sshhhh..sPsht................................................pppshtsl.h.......p..h.lp.sG.................shs.s.lu..oDa.ssts....................................................................................shts.hl.hh..h.t.hhht....................htl.oh......t...............................phlph..h.o.ts.sA.c.hhsh..........................tptGpl.......tsGtpADls......................................................................................................................................................................................................................................... 0 927 1826 2398 +12989 PF13148 DUF3987 Protein of unknown function (DUF3987) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 27.00 27.00 27.00 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.88 0.70 -5.90 117 899 2010-07-29 17:03:48 2010-07-29 18:03:48 1 19 543 0 189 834 169 314.80 20 61.84 CHANGED ppspsshshhshusLsslusshtst.sclph....sshht..sssLahhslupuGpt.Kosstphhhtslpphcpphtpphppphppactpt....th..hphctpshcpphtt...t......................ttppPphP......t.pllss-sTspulhptLtpsss.shhlhssEusshhsuhs.h......phhpphshhpcsacGss.lshsRpspsp..ht.lpp..spLolhlssQPshl.phlhthps..hpspGhhuRhL...hshPsstttp...c............shsp....tthpsatpclpplhpt...............tttp.hhlphoscAppha.phasplcpch.t.suph.....hpshsuKhsttssRlAullphhc..............................................ttstpIss-shptAlp.lscahhpcutclhs.hhsssp ..................................................................t....h..shhhhuhLsshuh..shts..........hplth.....sshht.sssLahhhlu.supt.Kos.h.t.h.h.h.t...........hh.hcpthhpphtptht.t.aptt.......hp.p.tth.pt........................................tpts...P.....hhhhlhs.s.sT.pulhp........h...tps.t...shhlhp.sEhss.lhsshs...........pt...sh....hpphas.......Gs.....s...lshsR...ps...pst.....hh..lp.p....splolhhhhQPshh.phhhptps.........sp.....GhhuRhL...hshP.s..s...tphh..p...................................p.....thhptht...p.phpclhph.h..........................................ttp.hs..lpho.sp.Appha.ph..hp.p..ltpch.t.....s.......hps.hhs.+h.st.hhRlAhlhthhp....................................................tph...Is.pshptAht.ls.p.ahh.p.chhhl.s......t............................................................................................ 0 73 131 165 +12990 PF13149 DUF3988 Protein of unknown function (DUF3988) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 29.90 29.90 29.90 29.90 29.20 29.80 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.44 0.70 -4.89 252 1094 2010-07-29 20:05:34 2010-07-29 21:05:34 1 66 98 7 150 1009 2 288.40 14 64.34 CHANGED hhlsuCspsc......................t.t.stststs..........lplsss..................ststTRusss..............tts....-plG........lash..............................t.ssshsstthhsshhhhtss..............stshssss.thaa........s.spphsh.hA....Ya......P.........hpssssss........ht...ssssp......................t......................Dh....lhApsps.........sssssshs........................tFpHthopl.plplp.t..ssshsst.....hh....plpshtsp.................ushs.....h.....sGshsssss............................ssshshts...sssshsh...........hhll..P.......................tsst.........hplphshssp......stphhhsstttt..........................sGppasas.........lslsps...........hhhsssslssWss ..................................................................................................................................hhsuCspcp........................t.....tstpts...............l.phsss.....................ssstTRusss............thpss..........cplG...............lash.................................ts.sst.hhssh.hh.h.tss..........................................shshssss...hha.........s.ssph.sh..hA.............Ya..........P.........................asssssss............shs...hs..........hsspp...................................t......................Dh........hhupsps.....................tsssssls.....................................h.....pF..pHthopl.plplp.s...ssshsss.......................h....hplps..h..tsp...................................ushs...............hs.........sGshsssss..................................................s.sshshtt......sssstsht..................hllh.....P......................tsss...........................hpl.phsh..sst......................shthhhsttth...........................................sGppaths.lplstt......................t.ph..W...................................................................................................................................................... 0 23 126 150 +12991 PF13150 DUF3989 Protein of unknown function (DUF3989) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 27.00 27.00 27.00 27.30 26.40 26.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.60 0.72 -4.53 17 195 2010-07-29 20:28:02 2010-07-29 21:28:02 1 1 80 0 16 135 3 83.90 32 85.32 CHANGED +phlcthpcth-scLRthhstLsPctRlhllLshhshFusLulYhhssuIacl....G+p-tpphpIcHIcpl-L....pp-ohNhhp.hp ............h.hlpchpchhss+L+thhstLsPcpRhtllLsMhshFusLulYhhspulYcI....G+..p-G.pc..hphcHltplcL.........s-ohp.hp.................................... 0 4 14 16 +12992 PF13151 DUF3990 Protein of unknown function (DUF3990) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 27.00 27.00 27.10 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.86 0.71 -4.84 47 384 2010-07-29 21:37:02 2010-07-29 22:37:02 1 9 247 0 57 326 11 140.90 29 74.19 CHANGED hhLYHGSsphlc.pPclttu+.ppDFGpGFYsTp.h-QApcWApph...........tpsshlNhYpl-hsthp...LclhcFp.thspcWLsFlhpsRpstt....................csYDllhGshADD.phashlptahsstIoh-thhctL+htp..spQhshpop+Als.pL+ah ...........................hlYHG....S......s.....h.......l.....c......p.......P..ph.ths.......+...p..h..DF...G....p....GFYhTs.hcQApcWAtph...........................ttpshls..hYphs......phh.......hphhhF........p.....ths.cWlpFlhpsRpt..pt....................ppYDllhGs..hAsD...phh...phlp.ahp...s.......h...........l.......sh...c....p...hhpthp..htp......spQhshtoppshp.hLph.h................................................ 0 28 42 49 +12993 PF13152 DUF3967 Protein of unknown function (DUF3967) Aldam G, Mistry J gba Pfam-B_1529 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 173 and 249 amino acids in length. 21.60 21.60 21.60 22.20 21.30 21.40 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.43 0.72 -4.31 15 228 2010-07-30 08:43:20 2010-07-30 09:43:20 1 2 107 0 8 152 0 35.30 50 17.38 CHANGED RtppRDppLMpsIRElQEoK+LIAAScpK..hpFW ...+.ppRDppLMpsIREIQETKR....h.......lA....Ao..KE...p...p..hhp.a................ 0 1 3 3 +12994 PF13153 DUF3985 Protein of unknown function (DUF3985) Aldam G, Mistry J gba Pfam-B_3329 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 25.00 25.00 82.80 82.60 20.40 19.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.38 0.72 -4.15 8 55 2010-07-30 08:48:10 2010-07-30 09:48:10 1 1 55 0 2 11 0 44.00 94 95.65 CHANGED MEILoIILIVLLIYVVFKVAYVALKILAILLIIFLIVEFGSKLL MEILTIILIVLLIYVVFKVAYVALKILAILLlIFLIVEhGSKLL 0 0 1 1 +12995 PF13154 DUF3991 Protein of unknown function (DUF3991) Coggill P pcc Jackhammer:O25192 Family This family of proteins is often associated with family Toprim, Pfam:PF01751. 22.20 22.20 22.20 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.75 0.72 -3.79 113 763 2010-07-30 16:16:39 2010-07-30 17:16:39 1 15 512 0 102 654 29 82.50 24 16.66 CHANGED sYLpppRtlstpllpthhp.....pshlhp.........sph............t......Nhl........FsttD.............p.pGpspGhphRGs...................ttsa.+uhspG.......sshhashsh.........sps .............tYLpppRslsppllpthhp.....pshlhp.........sph................p..................sll........Fhth-.....................ppsphtuuphpGh....................ptsa.KthspG.....sshthuhsh.......s.......................... 0 36 72 92 +12996 PF13155 Toprim_2 Toprim-like Coggill P pcc Jackhammer:O25192 Family This is a family or Toprim-like proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.64 0.72 -3.67 328 1811 2012-10-01 21:47:57 2010-07-30 17:17:53 1 42 948 0 254 5018 2583 95.50 22 20.72 CHANGED hlhEuhhDslShhp....hthps...................slushGsshp.........pthphL..pph...tp............lllshDsD...........pAGppAsp+hh..phhtp.....................hphphhphstt.c.....D...hs.........-hl .......................................hlhEuhhDhlSahp......lthpp..t..........................lsl.susu..s..h....t..............................phhphL.....pth........pp......................................lhhs.hDsD.............cAGccsspclt.....pthttt...............................................psth.h.h.tt..K......DhN-hL.............................................. 0 97 201 234 +12997 PF13156 Mrr_cat_2 Restriction endonuclease Coggill P pcc manual Family Prokaryotic family found in type II restriction enzymes containing the hallmark (D/E)-(D/E)XK active site. Presence of catalytic residues implicates this region in the enzymatic cleavage of DNA [1,2] 22.00 22.00 22.10 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.61 0.71 -4.24 8 260 2012-10-11 20:44:47 2010-08-02 13:31:32 1 15 227 0 57 253 94 120.90 35 9.82 CHANGED csDPshpspYscVpTas-WAc..pGhsppDTGIDLVA+hRcsDuasAIQCKFYsssHpIpKsDIDSFhsASGK........c.FopRlIlsTTc.cWScNAEshLcsQplPlsRIsLscLEpSpIDWstats+ucllLp .............................-Phhtppa.c.pVahWtD......Ws..........p.....t..s.....pp..DsGIDL.VAp..pp..s..........s...s.p.......a..hAIQCKFY....pss..t..pltK.s-.IDSF..hs....s....SG.+...............sh..Fsp.R.lIlo....T...Ts.cWu....p...NA-cs.l....ps....Q.t...hslp+Isht-ltp.....S.IDWshh..t......t............................................................. 0 17 32 42 +12998 PF13157 DUF3992 Protein of unknown function (DUF3992) Aldam G, Mistry J gba Pfam-B_480 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 98 and 122 amino acids in length. There is a single completely conserved residue T that may be functionally important. 22.00 22.00 22.00 22.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.14 0.72 -3.79 10 220 2010-08-02 13:24:37 2010-08-02 14:24:37 1 1 84 0 11 114 0 90.20 46 77.59 CHANGED VCssWshs..suAsshllYssNlsQsIsGTGaVchDsGsu.....sITVshl...ssGTsVsThTVsPGoStSFThRcFsoIpIls...ssuGoppGcaClTh .............VCosWSh...ssAhspllYTNNIsQpVhGTGaVKYDVGsu.....PITV-hL...suGTVlDThTVpPGoShoFThRcFsolpIss......Tssss.QGEFCITs......... 0 0 5 5 +12999 PF13158 DUF3993 Protein of unknown function (DUF3993) Aldam G, Mistry J gba Pfam-B_782 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 160 amino acids in length. 22.40 22.40 23.60 63.80 21.20 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.37 0.71 -4.31 5 94 2010-08-02 13:25:49 2010-08-02 14:25:49 1 1 94 0 5 39 0 119.70 79 72.48 CHANGED hIhLlthlsVAFLVsaulTpl.uK-csKlDRcEVFpTlQouaEsQFSLoEKcRoMs-MhulL-PYFT-uFtslFl-ENups-cpG..haGoDhGEYhIPs....FSFou-TKluhD-E+..lYVYEass ..................GIWLVLFVCVAFLVGYSVTTVLGKEEVKlDRKEVFTTIQKGYETQFSIRGKHLPMNKMIETLSPYFT-NFLQVFTDENSRSDKQS........GEYLLPAKEAPFSFNSETKMSYDEEHK.LYVYERs... 0 0 2 3 +13000 PF13159 DUF3994 Domain of unknown function (DUF3994) Aldam G, Mistry J gba Pfam-B_903 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 97 and 111 amino acids in length. 25.00 25.00 25.40 27.40 21.60 24.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.65 0.72 -3.79 21 98 2010-08-02 13:26:55 2010-08-02 14:26:55 1 1 44 0 2 81 0 109.20 40 32.91 CHANGED psKphs-cupchlccGschWppsacslcsch.tt.p...............sthptsohossc.p..p......................sspsslspDGpELlGsWGh.pss.tFphulsh+p..DsTFpsYss.uphs .......TDKEll-KupphlcEucphWt.sFccLcu-h.t..tt...............phssuosSspchpphptKsGl.ps..............NsppNl.KDGTELlGsWGhptus.GaphoLlLKu..DKTFEoYus.GpYP... 0 0 1 1 +13001 PF13160 DUF3995 Protein of unknown function (DUF3995) Aldam G, Mistry J gba Pfam-B_958 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 138 and 149 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important. 22.70 22.70 24.00 23.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.83 0.71 -4.08 35 198 2010-08-02 13:28:22 2010-08-02 14:28:22 1 1 181 0 58 169 10 119.90 35 82.04 CHANGED hLssluhlH....lYWAhGGpWshps.shsspss.......tsshtPuhsushhVAssLhsu..Asllhhpsh.h.ht.hh.ttlh..phushsluslhhlRu.....lushtaluhh+chts..shFuchDshlYsPLCLhLuluhh ............................hLhhluhlH....lYWAhGGpWussu.slPscss..............chshpP.sshh...TL.hlAlhLshA..Ahllltpss.h..hththsshll.phuuhls...hllFhlRs..............lG-FpYhGh.FK....+h+s..opFA....RhDThlY.PLChhLuluh......................... 0 16 35 46 +13002 PF13161 DUF3996 Protein of unknown function (DUF3996) Aldam G, Mistry J gba Pfam-B_998 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 172 and 203 amino acids in length. 21.90 21.90 21.90 29.80 21.40 20.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.18 0.71 -4.58 16 146 2010-08-02 13:29:55 2010-08-02 14:29:55 1 1 31 0 17 100 1 156.00 34 82.20 CHANGED ths+pptsppphGhGhulssPIsNhhlphshhsl-IGaGsaNGlpss.........sFhshlhhul......DhlFhpphhcphs..lssulGhGh....GphhhSshpsst................psssphulGhRlPLhlpaslh.Ksl-IhhKssPuIt.shahsss........h.Gh+aphFuuhhl+h ...h.tKcppsppthGhGhulG.PlANhhlshsasshDlGaGuasGlpss.........sFhshlhhul......DhIFpp.lhcshs..lshulGhGh....GslhhSs.ppp.................o.ttclulshRlPLslpYshh.+slpIhhKhsPolt.shhhsss........h.Gh+aNFhushhl+h.............. 0 5 7 7 +13003 PF13162 DUF3997 Protein of unknown function (DUF3997) Aldam G, Mistry J gba Pfam-B_1597 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. 25.00 25.00 27.20 25.40 24.70 24.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.45 0.71 -4.26 10 103 2010-08-02 13:32:12 2010-08-02 14:32:12 1 1 101 0 15 57 0 109.60 67 75.10 CHANGED pls-tYcLlNsussshtlhs.psslhpopa.h.l...sApVs..-IuaD-paIIAKpp..ph+sDshNscsslhscpsE...YWIIDlK......pschaGPasccpFpcKpcphpIscplphlpsahp ...............................................lNDEYELIRTSGNAFELFPoQDAVYATQY...I...PAKIT..DIAWDDKYIIAKQT..EEKSDPNNPDAAIANKKSE..HYWIIDVK........HNKRFGPYNEKQFpEQKDAFKIKVPFQslDuYI.K... 0 4 12 13 +13004 PF13163 DUF3999 Protein of unknown function (DUF3999) Aldam G, Mistry J gba Pfam-B_2128 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 440 and 470 amino acids in length. There is a single completely conserved residue D that may be functionally important. 29.00 29.00 29.30 29.00 28.40 28.60 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.35 0.70 -5.53 34 171 2010-08-02 13:36:19 2010-08-02 14:36:19 1 2 165 0 56 185 19 389.10 29 87.18 CHANGED stDaupthsLphsuu.usaYpLsLPtsVYttutpsD.LsDlRVFNusGcslPaALhtspst..s.ss..phpslphFsL.....sssuss.spssspltlptsssGsl....hthsssstssptpt..t.aLlDhoth......cs.slptLhLch.ssstps..clsVcASDDLpcWpsl.ucutlhcLspssppltpccIt.....Lss..hpuRYLRLhW..s..susthpssplpttsssshss.........shpaptsltsppsss.....spahaplstshPlsclclsLsQs..Nslssspl.u.......................Rssspt.......sWpslusuhL.aRLttsu..tc.tsssltls..spsscthRlpssp...uGhGsssPsltsuhpstpLsFlApGssPapLAhGssssts........sslshssLlsshpstp........ls.Aphst.....ssssussshhs..sssps...h++hsLWusLllG..VssLuhhAapL .................................saspphsLphsut.usaYplpLs.sV.htutpss.LtDlRVhsutGpslPaul.s.pst..ttts...p.htlphFsL......sstts.ttsss.hhlph.sssGsl.......tststsuttth......saLlDhSth........ct..slptLhlca......ssshps.tphsl-uSDDLpcWpsl.ucuplhcLshsspplppccls..........................Lsu....tpARYLRLhh.ts.tsuP.tLtsscltsts..sss..ss.............shpWptshtsptsss............schhaph.ss.s.hPl..sclclslsQs...Nsls.spl.tu..............................Rssspt.......sWp.slusshL..YRLtts.s....tphpsstltls....uphs+thRLhlcp.s..sGLus.t..sP.pl...s...l.uh...pstp..LsFl.....ApG.ssPasLuhGssssps........s.slslssLlsshs.pp........ls.Aplus......htsussshh..s..sssps....h++hhLWusLllu...VhlLuhhAhpL....................... 0 10 27 41 +13005 PF13164 DUF4002 Protein of unknown function (DUF4002) Aldam G, Mistry J gba Pfam-B_3350 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 112 and 125 amino acids in length. There are two completely conserved C residues that may be functionally important. 21.00 21.00 24.50 23.90 18.20 17.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.51 0.72 -4.00 26 48 2010-08-02 13:38:04 2010-08-02 14:38:04 1 2 18 0 22 50 0 74.50 36 63.43 CHANGED CC......p.tppltaphtpst.......Ctshsupts.....tsC.phsICsDGpsl.hGs.aCGpGsCNlFGCsCcGGCl...pGshhpsFhcpssth ............................CCh.ttpltaphttus.......Cthssupts....shsC.chsICsDGpsl.hGo.YCGpGuCNlFGCpCcGG..Cl...pGsh.psFhchst......................... 0 5 5 15 +13006 PF13165 DUF4001 Protein of unknown function (DUF4001) Aldam G, Mistry J gba Pfam-B_3337 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are at least two pairs of cysteine residues in this short family of proteins. 21.70 21.70 23.10 23.00 20.80 17.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.58 0.72 -4.29 32 277 2010-08-02 14:58:05 2010-08-02 15:58:05 1 2 276 0 55 184 1 43.90 66 92.19 CHANGED KHIKTlsppsLpcohppGG.CGECQTSCQSACKTSCTVuNQsCEp ................KHIKTlNp.psLp..po..lp+GG.CGECQTSCQSACKTSCTVuNQsCEp... 0 36 51 53 +13007 PF13166 AAA_13 AAA domain Bateman A agb Jackhmmer:O25761 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. This family includes the PrrC protein that is thought to be the active component of the anticodon nuclease [1]. 45.20 45.20 45.30 45.40 45.10 45.10 hmmbuild -o /dev/null HMM SEED 712 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.59 0.70 -13.53 0.70 -6.07 43 601 2012-10-05 12:31:09 2010-08-02 18:32:18 1 5 517 0 126 556 33 537.10 15 85.43 CHANGED shusa.s..tp........ssp..lss.......hp+lNlIYGtNGoGKTTlSplhtshp.....s........cap..............ps.ph............phps.............s....p.............s..plh..VaNp-Flcc..Nh....s.....p...sp..lpul.asLGccsh-tpppIpphcpplpphppp..hpphp....pplpp..tpp.........phpphpsphp..cpshcph.pp...hppthp.pshpu..hpt.cpph........tpphlp..c.hps....hsts......shhs.cclpp.p....hptl.hssshpp.lshl.s....hs.h.s.hpt.l..-p....ssllpppllup...s.slscLlppls.s-WVc.............pGhc.ahp.......p..s...ppCsFCpp..lspphtppLppaFc-.sa...ppphpplpphhpph...psp...hpt..hhpp.lpphh.s.h..............t.hthp......hpplpp....ph.pt........lppt....l....pt...p....ppLppKhpcPsp.slp.l.csh.sshl......s............................plpshlsphNphIpcpNc....hspNhppcppphp.pplhtah.h.........t.chp..pslp..thp....cphp...shppslsshppplsptppclpshppc....lpcLcpplsshp.slcpINchLpsa...GhsshpLp.h....ps.c....c.st.....YcIhR.....p..s....s.p.ss...psLSEGE+ThIsFLYFhptlc...sss..sh..t.cspllVIDDPlSSLDsshlahlsuLI+shh......tp.sp.............h+QlFlLTHNlhFa+clp..h......hhp........p..........p..pts..p..p........spaa.h.......................lc..+...sss....tSpl...p.............sh.p.p.....pshpopYphL.appl.......pct..............t.........pps.....ssh.slsNshR+lLEsY..Fph...sph.....p...........c.plh.phhps..........pcp.th...tpu.lhcal.NctSHst.s.s-l.hshst.shp.chhplhcpIFp .........................................................................................................................................................................h..............................h..shhaG.NhtGKoshsphh..................................................................................................................................................th...ha.tphht....h...............t...t................h........h.t...............t.h..t..t...h.......................................................................t.h...................................................................t..........................................t.......t.................h..................................................h.........................t...........................................t......................t.a..ht........................................s.ht.h...........t.t......t.C.hCtp.........................h..ht....h...tt...h...hp...p......t.t...........hp..t.....h....t...................h....................................................t.h...........h.t...........h..t.....h......t...........t..h....t...c............t..t..t.........................t....................................................................................................th....t.h.t.thtt..h.t.pp.......t.t.....h..t....t.....t...h....t........h..............................t............................ht.......t........h....t..t..t....t.h....t.t....p......t.htt.hp..t.t.....h..tth.p.t...ph.............p........h.....t.....t.....hst.lpth......t..th..p..lt..h.......ptp......ptt................ht.l...t............t..s.............t.p.........pplScGEcshluhhaFhtphp..............ptp.sh...........ptthlllDDPlSSLDppphhh...lh.sh...ltphh.................sp...............hp....p....lhlhTHsh..Fh.p.lp..p..hp..........................t..tp...............phh.h..........................................................lp.p......ptp.......p.h..................htt........p.h.p...a.t.h..h...ht.l.hph..............t.........t.t.........h..h.N.hRplhE...h..hth....t.........t.........................h.....t.h.t..................................h..phh.p..S..H..................................................................................................................................................................................................... 0 35 78 107 +13008 PF13167 GTP-bdg_N GTP-binding GTPase N-terminal Coggill P pcc manual Family This is the N-terminal region of GTP-binding HflX-like proteins. The full-length members bind and interact with the 50S ribosome and are GTPases, hydrolysing GTP/GDP/ATP/ADP. This N-terminal region is necessary for stability of the whole protein. 21.90 21.90 21.90 22.30 21.80 21.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.10 0.72 -3.86 6 4196 2010-08-04 10:27:35 2010-08-04 11:27:35 1 5 3967 0 960 2981 1188 93.80 42 21.38 CHANGED DcpFNF-STMEELpuLSpTCQL-VhuQITQNR-pVDcKYYVGKGKl-EIKuFlEh+DI..DVVVsNDELTTAQSKoLN-sLsVKIIDRTQLILEIFA ..............................tpt.......ttsh.pELtsLuco.A.G..s.cl.l.t.s.l.sppRppsc...sphalGpGKl.cEltp.h...........l.......c........t........p...............s..............s....................s.lVlhs.c.c.LoPu.QpRN....LE......ch.h..............p.........s+V..IDRTtLILDIFA............. 0 319 633 820 +13009 PF13168 Poxvirus_B22R_C Poxvirus B22R protein C-terminal Mifsud W pcc Pfam-B_3510 (release 7.3) Family This is the highly conserved C-terminal region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses. 25.00 25.00 211.20 210.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.08 0.71 -5.04 19 94 2010-08-04 13:07:16 2010-08-04 14:07:16 1 2 34 0 0 88 0 196.20 58 10.66 CHANGED cspslhpsVSs.uLoplGuslusuG........hsuuPplAhAGhulpuIuGLIDhupsIYallSGpp...P.DPllcpFssYupalu.oscuGsRhChMPsS-lhlhlAYRp.....cs...p.uhEKhthaahDslsShlhYLpTStIshshplpVsCP.IGtLR.h-sDIsuYshLhhssc-sV+aYhhstlhshLSpaPsVphTCGp-.sLh ..K-EKIFEAVTh.oLSTIGSTLooAG........hhusP.LMIAGlGIohIoGlIDsuKDIYYLhSGpE..+PVDPVIKhFNTYAuLVSDosKhGVRKCLhPGpDTlIYlAY+N.....Do.SFKpssEthuLYFlDVIsScIhYLNTSNllL-YpLKVACP.IGsLRSlDlDITAYTlL.Y-TsDslK+Y+FlRhusLLSKHPVlRLTCGhssTL... 0 0 0 0 +13010 PF13169 Poxvirus_B22R_N Poxvirus B22R protein N-terminal Mifsud W pcc Pfam-B_3510 (release 7.3) Family This is the highly conserved N-terminal region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses. 24.70 24.70 25.10 57.90 23.60 24.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.13 0.72 -3.80 18 90 2010-08-04 13:12:09 2010-08-04 14:12:09 1 3 32 0 0 83 0 90.90 44 5.09 CHANGED pcpChRKhulYHshspshpt.+-phDhpup..ush+YLslscptEppphhpsFNWopIppsl+cpFlppCsssss.......YhYNYolshslolsspssh ...ETChRKoALYHD.phscs..c-NpDssAS..lsYKYLpVVpcRERoRLluoFNWTsIuEuV+N-FI+hC-lsus.......YLYNYTIslShhIcupc..ch.. 0 0 0 0 +13011 PF13170 DUF4003 Protein of unknown function (DUF4003) Bateman A agb Jackhmmer:Q188C9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 327 and 345 amino acids in length. 27.00 27.00 30.40 29.50 22.80 19.10 hmmbuild --amino -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.73 0.70 -5.16 20 197 2010-08-04 15:11:32 2010-08-04 16:11:32 1 1 189 0 22 159 1 290.40 39 89.43 CHANGED csYcplcps.thphssshlphhhAhhashpscphs..hschcclpphIKppouhFSshRuphphsluuhLslcts..psccthpphlplYspLppttFpcsp.ahhLuAhhlhpp..pppshcpplp+spplYcthKccH.FLTus-DhshssL.LAt..sspsl-plh-phEphYphLpc.hsht+uNsLQhLSplLsLtpspsppp.st+shplhptLcccclKlpp.a.hshlGlLullpssc.c.lcslpplh-pLpccctht...hp+chphhlAlsLhhschhsppp........hhcpsLttslphlltspps .............sYtpLKsp.caphpDsRhthhIAthaAuss+lhs..ht+FhEIspaIKpQlGhFShL.......+.uhpRaslAuhLslc.s..sh+cuhcphlclYcpLlpuGFpRoh.aTYLAAhlLLpp......pp-phsppIp+uhplYctMKK-HhFLTuopDhshAVL.LAs..psEsl-pLh-csEhhYpcLAp.tGF+KGNsLQFLSpILoLtps.c-phhlupsssIhp.LKpc.clKlKphH.YPuIGlLALlpDu-.K.lculpslI-cLptc+hFR....app-tslhlAIpLassppu-c.p........tpspGLts.lclLItAQQA...................... 0 9 17 19 +13012 PF13171 DUF4004 Protein of unknown function (DUF4004) Bateman A agb Jackhmmer:Q183T3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 210 amino acids in length. 27.00 27.00 27.00 49.70 21.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.30 0.71 -4.15 9 172 2010-08-04 15:48:56 2010-08-04 16:48:56 1 1 169 0 26 122 0 196.20 57 94.62 CHANGED EpLISKKELL-hssISYGQLYRWKRKNLIPEEWFIRKSTFTGQETFFPR-KILpRIspIpphK--lSLDELAchFSsp.hp-lplsppcllpcsllScsslchatp.hs.ptpsas.pcllslal...LEcLLpSG.lSl-EuptlstsLcpp.tp.ppKpspLhlhRKLGlshahllussscIhh-sssKVlp+lslsch....hEc ..p.-LISKKDLLELTGISYGQLYRWKRKNLIPE-WFlRKSTFTGQETFFPKEKILERI-KIQoMKEDLSLDELAsMFSPs.lp-lhLT+--lL+KGIsScsVLphFhEpps....css...pFpFs-lLhlYh...LEcL..LQSG-ISLEE.GKhlhpVLc-N.Ypuh.ccKss-Lll..lRKLGloTChLl.uss--llFEcGsKlVlR.slh+h.oE.t............................................................ 0 8 16 20 +13013 PF13172 PepSY_TM_1 PepSY-associated TM helix Coggill P pcc Jackhammer:O25020 Family This family represents a conserved TM helix found in bacteria and archaea. 22.90 3.00 22.90 4.30 22.80 -999999.99 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.31 0.72 -4.10 183 2589 2012-10-01 23:59:14 2010-08-05 10:40:52 1 41 1548 0 728 5088 380 32.10 26 8.70 CHANGED htphhtphHhahuhhshshlllhulTGhhlsapp .............hhhphH.hahulhssshlllhulTGlhhha.............. 0 180 437 605 +13014 PF13173 AAA_14 AAA domain Bateman A agb Jackhmmer:O25880 Family This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.88 0.71 -4.24 476 3483 2012-10-05 12:31:09 2010-08-05 13:21:57 1 24 1307 0 833 5752 2489 125.60 23 29.71 CHANGED sptslllpGsRpsGKool...l.hph.hpphh........pphlal.s.h-c.....ph................ht....ht.....t...h.................phhhp.h....h...............tt.h..lhlDElQps......................s.s.h.phl+tlh-.......pt...t..h....clhlTGSsshhl...p.phs.........splsGRhh............hclhPlSFpEah ...........................................t...hlllpGsRpsGKosl...l....tph...hpphh..................................tph..h..h..l..s...h-s......ph......................................................tp...ht.....t..................h....phhhp.t.......t......................................ttth....lhl...DE....l....Q..ph..................................s..p..h...p.h..l.+thh-..........pt.......p.....h..........ch.h.loG..S...s..s..h...h..h......p.....ph.s.............p...h..s..G.R.hh..........................h.p.lhPhoatEh.............................................................................................................. 0 327 580 714 +13015 PF13174 TPR_6 Tetratricopeptide repeat Bateman A agb Jackhmmer:O25749 Repeat \N 25.70 10.00 25.70 10.00 25.60 9.90 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.27 0.73 -7.87 0.73 -3.13 402 6355 2012-10-11 20:01:03 2010-08-05 13:32:42 1 1348 2675 10 2337 34545 12501 32.30 19 8.58 CHANGED pAh.hphu.hshh.p.t........t.pts..pAtp..hhppllpp.aPso ........................................................sh.hphu.hhhh.p.t......................................s.php.....pAhp....happllpp.aPp................................... 0 982 1643 2054 +13016 PF13175 AAA_15 AAA ATPase domain Bateman A agb Jackhmmer:O24997 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.52 0.70 -5.43 19 1024 2012-10-05 12:31:09 2010-08-05 14:14:22 1 20 787 0 211 6175 1983 226.90 14 45.90 CHANGED l+plpIpNa+shpsh.............................clshsc..........sls........lllGpNssGKoslLcul.........t.h.s.p.....tp...............................................t.tph....pp....hp...................h.tt.ptlplpthlppssschhus.....h...p..h....ppspsp..hhhh.p.lc.thchhh.sphpchchhpphst..................................................ph..h....t...p.......phph.ph.hhtth.p.....h..pt.h.tt.phhpchhssts..phpphhp.shhcthct................ttcthhpshspplpphhpcths................tshthp.spphts...........................st.hp.phplsps.lphhhp.........psp...p......lslpppupGhp.hhhhslhhsphptps.sp..........IllIDEPEsaLHsshQpphlchLpphsp..........sh..QlI.................lTTHSPall ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....p...t......t...u.......G....hp...............h.h...h.............h......h........h..................h..t......t..t.....h..t...t.......................................lhhl-EPEsphHsphQh.....h.h.p.h.l....p.h.ht..................................th.....Qhl.................lo.THSshl............................................................................................................................................................................................................................................................................................................................................................................................. 0 67 124 176 +13017 PF13176 TPR_7 Tetratricopeptide repeat Bateman A agb Jackhmmer:O25374 Repeat \N 25.00 14.00 25.00 14.00 24.90 13.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.64 0.74 -8.06 0.74 -3.73 162 2738 2012-10-11 20:01:03 2010-08-05 14:29:41 1 891 1597 5 1042 30248 7638 35.70 24 5.94 CHANGED sl.ttLuph.app...tGchccAlphacp.......l.tht........ps.tstp ...............h.hsLGpl.app.......pG.ch-cAlchapp...................h.t.t..................ph............................................... 0 404 612 833 +13018 PF13177 DNA_pol3_delta2 DNA polymerase III, delta subunit Bateman A agb Jackhmmer:O25829 Domain DNA polymerase III, delta subunit (EC 2.7.7.7) is required for, along with delta' subunit, the assembly of the processivity factor beta(2) onto primed DNA in the DNA polymerase III holoenzyme-catalysed reaction [1]. The delta subunit is also known as HolA. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.08 0.71 -4.58 82 9575 2012-10-05 12:31:09 2010-08-05 15:02:19 1 36 4883 47 2332 8351 6088 159.20 34 34.48 CHANGED tpsphhphLpptlpps...+luHA..YL.FpG.pGsGphphAhhhupt.lhCpptt.t...............sCtpCpsCp.pltp.........................tsaPDlhhl...........ps.cst.....................................................................................................................................sI+l..-pIR.pLppphshpshc.up.hKlaIIcsA-phstsAuNuLLKhlEEPs.spslhlLlopshspl....LPTIhSRCpllphpshs ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pt.hhptLtp.u.l..p.p.s......+l.sH.AaLFs.Gs.cGsGKsshAphhA+.sl.Cpps.ss.......................................psC.spCp..s.Cp.....tlpp..............................Gp.asD...l...h.tl...........................cs..sup................................................................................................................................................................................................................................................................................................................................................h..s...l........-......c...l.........R.......c.......l..h....-..p....s.....p..h..s.....P....s......p.....u....p.....aKValI--sc...h...L.....o...p.sAh......NALLK.T.LEE.P.P.sp.s.....h.FlLs..T..s..c..s.p..+l...........sTIhSRCppaphp...s............................ 0 792 1517 1986 +13019 PF13178 DUF4005 Protein of unknown function (DUF4005) Coggill P pcc Pfam-B_2171 (release 24.0) Family This is a C-terminal region of plant IQ-containing putative calmodulin-binding proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.33 0.72 -11.05 0.72 -3.79 69 399 2010-08-05 15:10:13 2010-08-05 16:10:13 1 6 28 0 239 398 0 120.00 20 26.44 CHANGED soPp...............s...t.ttt.t.sss.p..tt.........................hPsYMAsTpSu+AK..lRSQSuP+.QRsps............tppssp............+RtShss.ss.s...............tttts.+Sss ..................................................................................ts....................t..t.......p.....p.ttppp.h..s...........................................sshPsYMAsTpSA+AK..s.R.....u..p..S.u...P+..pRsp.s..........................pppssp........................+RhShs..sssstt...........................tthtt............................................................. 0 31 151 199 +13020 PF13179 DUF4006 Family of unknown function (DUF4006) Coggill P pcc Jackhammer:O24959 Family This is a family of short, approx 65 residue-long, bacterial proteins of unknown function. 20.90 20.90 21.00 21.50 20.20 20.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.12 0.72 -4.42 21 189 2010-08-05 15:41:21 2010-08-05 16:41:21 1 1 189 0 21 73 0 65.00 57 90.90 CHANGED M.ps.pRslFuLNGloGhLlAsVLLLuILssLThhuItsQpssAspsYp....lpcssulKMh..uspNsc+hh ...MENsNRsVFuLsGVTGMLIATVLLLuILVsLTlWGlKsQQEVhQpPYo....LKDl....puVKMh..uSccQDHh.s... 1 4 17 21 +13021 PF13180 PDZ_2 PDZ domain Bateman A agb Jackhmmer:O24877 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.68 0.72 -4.09 52 19233 2012-10-02 11:12:46 2010-08-05 17:07:01 1 162 4727 36 5564 18896 7404 86.90 22 21.84 CHANGED uslGlphptpps..................sshl.p........VtpssPutcss.lph..............GDhIhplssptl...psh.......spLh......phlh.ptp.GssVplplhR......sscppslplpl ............................................................................................Gh..h...t.............................................................tGs.h.Vsp.............Vh.s..s....u...P...At.......cA..G...lcs......................................GD..lI..l...p..l....s...Gps..l............psh...................p.clh................p.hl...t.....p......t....p....s..G..p...p...l.p.l..p...l..h..R.......s..G...p.p.hphpl........................................................ 0 1926 3646 4675 +13022 PF13181 TPR_8 Tetratricopeptide repeat Bateman A agb Jackhmmer:O25864 Repeat \N 25.00 10.00 25.00 10.00 24.90 9.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.62 0.74 -7.70 0.74 -3.77 158 8299 2012-10-11 20:01:03 2010-08-05 17:20:12 1 2662 2423 16 3530 83745 27261 32.40 18 6.01 CHANGED h.....ps.....hh.thGpl.ahp.hsph..cp...Ahphhpculplss..s. ........................................t...hh.tlup.h..Yhp..hs.ch........cp..............Ahphhpcshph.....t................... 0 1467 2293 2991 +13023 PF13182 DUF4007 Protein of unknown function (DUF4007) Bateman A agb Jackhmmer:O26600 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 284 and 326 amino acids in length. This domain is found associated with Pfam:PF01507 in some proteins, suggesting a functional link. 27.00 27.00 27.90 27.70 25.30 24.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.58 0.70 -5.58 34 141 2010-08-06 09:59:40 2010-08-06 10:59:40 1 6 122 0 41 146 81 259.60 24 81.04 CHANGED pFuuHE...........TFslRhGWLpKuhctlp.ps....sssFh..ccsuhsphGVGpNMVculRYWhpAsplhc-s.tt.........phTphGchlh.......shDPYlE-.uoLWLlHatLsss....pstAssWYhhFN.hshsc..Fo+s-lhphlpchhsp......tpphsppolpcDlssllpsYs...............ptsphssE-.hssPhsELuLlphs..ttpt.taphshss+ssLsstllhYAllcah...s..pstpolulscLhtps....sSPG+lFpLscpslschLcplpph..stlplscTAGLcplhhpp.......phtshchLcpaY .............................FutHETF.hRtt....WLpKuhpth..tt..........phF....tppuhs.phGVG+NMVpu.l+aWhtAssllcpstt..........phTth.Gphlh..........................shD.ahEp.solWLlHah...Lsst............tptsssWahhFN.hp..t...p..p....Fscpplhphlpphhtp.........tpt...hupsolpc.DlsshlpsYs.................ttts.E-.hpssls-Ls..Llpth..........tpt.haphtpstp.slsstlhha..ullchh...t........tstpolshpplh.t......suPGclFpLscpslhchLpplpph.....shlp.hscouGlcpl.h.t...........thlt.................................. 0 18 36 39 +13024 PF13183 Fer4_8 4Fe-4S dicluster domain Bateman A agb Jackhmmer:O27906 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.92 0.72 -3.94 344 10837 2012-10-03 08:56:43 2010-08-06 11:04:20 1 158 3616 30 2967 16116 5623 77.90 24 15.69 CHANGED p.hp...pChpC.u..tCt........ss.....C...........................P...................................................hhht.................................ht....h....tttth..t...................................................................t..h.........................................................htC...stCtt.....Cs..phCP.t.....sl ....................................................................t.htpClpC.G...hCh.............ss........C....................P................................................shtt...........................................................................................................................h.s......p.h...ht..ss.pstsht......................................................................t....htt.............................................................pthhsC...ssCts............Cs.psCPhpl................................................................ 0 989 2028 2572 +13025 PF13184 KH_5 NusA-like KH domain Bateman A agb Jackhmmer:O27285 Domain \N 27.00 27.00 27.00 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.93 0.72 -4.17 54 4542 2012-10-02 00:34:43 2010-08-06 11:35:28 1 31 4481 7 1040 3009 2319 69.80 50 16.41 CHANGED sGpRsKlAVtops.....plDslGuslGhpGsRlpslspELp..............sEKIDllpascD.......ppaltsALsPAcVhsVpl .....................sGsRuKIAVpo..p-.....ppl...DPVGACVGh+GuRVpuVssELt..............G.E+IDIl......W.....s-.-......PApFlhNA.Lu...PAcVssl..l.............. 0 356 687 885 +13026 PF13185 GAF_2 GAF domain Bateman A agb Jackhmmer:O27394 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -11.13 0.71 -3.71 408 8344 2012-10-02 14:34:25 2010-08-06 13:27:49 1 1430 3034 23 3170 13907 1709 140.50 14 25.15 CHANGED sts...h.pclhpt...h....l.p..tl.hp.......hh..sphuh.lh.......lh..cppp...................thh.hs......htsht..........tht..ht.t.....................................hpptcshhhs......................tt......t.h.htt..htshl..........sl..Plh.tps...p.lhGll.s....lts....tt...................tasp.p..chphlptlupthuhulp ............................................................................................................................................t......hht......h...h...p.....h..hp......hhs..hphuh..lh.............................hh........-tpt...................thhhhs........................h.t.uh................t...........h...t...........................................................................................hpppcshhls........................................................tt.h...........t.phspptsh..p..uhl................sl....Pl.........h..t........p.....s.......p.llGlL.s...........l.su.........tph...........................ttas.p..p....-..hphLptlut.hu.sh................................................. 0 1082 2191 2819 +13027 PF13186 SPASM DUF4008; Iron-sulfur cluster-binding domain Bateman A agb Jackhmmer:O26216 Domain This domain occurs as an additional C-terminal iron-sulfur cluster binding domain in many radical SAM domain, Pfam:PF04055 proteins. The domain occurs in a number of proteins that modify a protein to become an active enzyme, or a peptide to become a ribosomal natural product. The domain is named SPASM because it occurs in the maturases of Subilitosin, PQQ, Anaerobic Sulfatases, and Mycofactocin. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.70 0.72 -3.63 441 4320 2010-08-06 12:57:53 2010-08-06 13:57:53 1 60 2271 0 1134 3391 1345 67.60 24 17.48 CHANGED C.........h....hpphs....lp.........ssGsVhs....C....................s..hhtph........tlGN.l......p......p....s.lt-.......l....a.pu......pphp...ph.....+pthhpt...............................hC........p..p.............Cs .....................................stpt.hs..lp.......ss.Gcl.hs....C..................pth.....shsph........plGN.l....pp.........p....s.lpc..........................l............h...pS......tt.hp.ph.................ppt..phtp..............................t.C...p..p..C.......................................................................... 0 484 821 978 +13028 PF13187 Fer4_9 4Fe-4S dicluster domain Bateman A agb Jackhmmer:O27418 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.23 0.72 -3.70 188 5796 2012-10-03 08:56:43 2010-08-06 14:12:30 1 300 2244 20 1131 20878 5640 56.40 33 18.14 CHANGED pChs.C......t....t.....C..hps.C.........Ptthh.hhtthtthhhh......................................................C.ht..Cst.........................Ch.psCPp.ssl .............................................................ClG...C.....t.....h.....C..hsu..C............................P.h.s..lh.....ph...s...s...t.s...s.t...h.t...............................................................................................................................cC.sh.......Csp..........................................Cs....psCPs.sA....................................... 0 435 821 1007 +13029 PF13188 PAS_8 PAS domain Bateman A agb Jackhmmer:O26558 Domain \N 21.00 11.00 21.00 11.00 20.90 10.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.55 0.73 -9.73 0.73 -3.93 488 8124 2012-10-04 01:10:46 2010-08-06 15:34:09 1 1381 2453 5 2746 38264 3152 67.80 15 10.70 CHANGED pphps....l...hc.....shst.ulh...h............h..c......t......................lhh......sNpshtchh........G.......ht...............hhsp.h....t...........thhsthptttt..thtp....hh.........ptht ........................................hps....l.........hc.......shsp.ull.....l...............................l..D......tts...................p.................................lhh.............sNp.sst.c...hh...............G.......hs................hhsp.h..t.........thh.........................tthtthttt...................................................................... 2 951 1890 2360 +13030 PF13189 Cytidylate_kin2 Cytidylate kinase-like family Bateman A agb Jackhmmer:O26138 Domain This family includes enzymes related to cytidylate kinase. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.85 0.71 -4.37 174 1110 2012-10-05 12:31:09 2010-08-06 16:01:58 1 17 600 2 300 3886 1461 168.30 26 77.44 CHANGED lITIuRpaGSGG+pluctLAccL.Glsh....YD+.-llphsAcc.tGlscphh.pph-Epss.........psh.h.thhht...................................shshs-plaphpp...c......lIpcl.A.p.ct.ssVIlGRCAcalL+-..ps.ss...lplFlt.Ash-tRl...............cR.lhcp....h.sls.c.c.cApchlccpD+pRppa.apaYT..spcW.....GcspsYDLslsoup ......................................lITIuRphGSGGppluctLAccL.Glth...........Y..D......c....c...l....l....p.........h.....A.....c....c.....p.....G.l......s......t......p....h.....h.....pph.sEpts..............tsh....h....hh..........................................................ph.hscpl.a..ph..pp....c......hI....p........c...l...A.....p...c.....t...s...s....V..l......l.G.R.s.A.s......a.l..Lcc........hs...ps........l..p....l..a.l..h..A.s...h-hRl........................cR.....lhcp.............t...s..hs...t....c...cA.....pc..hl....p..c..p..DcpRtpa...Yphas..sp.c.W.....sc.s.psYDLslsou................................................................................................................................................. 0 141 243 275 +13031 PF13190 PDGLE PDGLE domain Bateman A agb Jackhmmer:O27741 Domain This short presumed domain is usually found on its own. However, it is also found associated with Pfam:PF01891 suggesting it may have a role in cobalt uptake. The domain is named after a short motif found within many members of the family. 27.00 27.00 27.30 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.17 0.72 -4.30 127 379 2010-08-06 15:35:55 2010-08-06 16:35:55 1 2 367 0 183 378 172 89.10 25 37.34 CHANGED llsulllushluhhAS............usPDGLE.shtth..........pstp-hsh.pps.....t.........u.hsDYul.s....uhsp.........................................................hGssluGllGshlslsluhuluhl ......................h.lhulllusslu.hhAS............ssPDGL..E.shpph.....................tstp.ch...sh.ttt.....t.ht...................u.hsDYul..s......Ghss...................................................lGshluGllGshlslslshslsh.h.... 0 65 130 163 +13032 PF13191 AAA_16 AAA ATPase domain Bateman A agb Jackhmmer:O27636 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.44 0.71 -4.30 217 4712 2012-10-05 12:31:09 2010-08-06 16:51:29 1 525 1445 1 2447 16457 2610 171.70 16 17.45 CHANGED plhGR-ppl.....ppLtshl..t..thtpups..............thllltG.sGsGKosl......lp........p..lhpthtpp...................thh.hhpspttph.......................shtsh.tphhpplhpp...hhst...........................................................................hh....................................sshsshsssptp......................h....phlp..........ph.l.............................tthspt........pp.......................................lllhlD-lp..............hh-.....pss.........hph.lpt..L.....hpt.........hpt.....................slhll ........................................................hhGRptpl...ttl.t.phl..........t....tht.ts.ps..............................p.s...l.l...l.t...G....sGsGKosL...............lp..............................p...l...hpp..htpp.........................thh...h..hp..s.p.sspt.....................................................sh.ts..h.....tph.l........p..p...l...h..pt...............hht................................................................................................................................................................................................................................................................................................tt.hs..s.t.tht.......................................................................h................thht.........................ph...l................................................................................t.t.httt....................tt.............................................................................................................................................................................................................................................lll...h....lD....-....hp....................hhc..........tts.................hph..lt.....l...hp.............................h........................................................................................................................................................................................................................................................................................................................................................................................ 0 925 1663 2175 +13034 PF13192 Thioredoxin_3 Thioredoxin domain Bateman A agb Jackhmmer:O26981 Domain \N 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.70 0.72 -4.16 112 2658 2012-10-03 14:45:55 2010-08-09 14:50:14 1 25 2065 41 788 3102 540 77.70 30 21.59 CHANGED hcI+l..l..GsG.C.spCp...pltphscpulpch.u.lsu....plp+....l.pDhpcI..hpaGVhsTPul.lls....sclhhsGp.lPstcc.lpplLp ............................h...cs.hh...Sho..C.psCP.......sVpsh.s...h.h.u...l...h...ss....s...lpt...........shl-........s....sh..h...p-l.....pp..h.....s.lMuVPul..hl....s........G...c.........h...s..p..G+....hsh.tc..ltp................................. 0 267 506 666 +13035 PF13193 AMP-binding_C DUF4009; AMP-binding enzyme C-terminal domain Mistry J, Eberhardt R jm14 Jackhmmer:O26318 Domain This is a small domain that is found C terminal to Pfam:PF00501. It has a central beta sheet core that is flanked by alpha helices. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.82 0.72 -3.03 574 45096 2012-10-03 01:00:17 2010-08-10 11:53:00 1 1760 5076 96 14992 41390 10507 76.60 27 9.63 CHANGED ElEssL.ssH......P....s.V..sEu...AV..lG...hs.Dp...........hpG.ptl.hAaV..lh.........................................t...pc....lpph.......l.....pp......p.lu..........shthP.p...lhhl...s...LP+TpoGK ..................................................ElEssL..hp.a............P......s..V.....t-u.......s.V................lu............hs..ct...................................hhG..ctl...hAal..lhpt...........................................................tshpt......pp........lppa..........l........pp............p..Ls......................sat..hP.........hta...l.......s.........LPhTssGK............................................................................... 0 3967 8666 12279 +13036 PF13194 DUF4010 Domain of unknown function (DUF4010) Mistry J jm14 Jackhmmer:O27500 Domain This is a family of putative membrane proteins found in archaea and bacteria.\ It is sometimes found C terminal to Pfam:PF02308. 30.00 30.00 33.10 33.10 29.80 29.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.37 0.70 -5.03 138 426 2010-08-10 12:49:28 2010-08-10 13:49:28 1 3 381 0 187 409 49 207.30 28 50.08 CHANGED VlluuloasGYluhRhhGs+tGlhloGlhGGlsSSTAsThshucpu+p.pss......hsth..hsuulllAsssMhlRlllls.sllss.s....lhh.tLh..hPhhs..hslsshssuhhh..h+p.......tttttt.ssshs....pNP...hpLpsALtFuhlhsllhlhsphhpphhG.ssGlhhluhluGlsDlDAhslSlsph.hsss.plshssAshulhlAshuNslsKssluhhhGs .....VlluuluahuYlshRhhGscp..GlhloulhGGlsSSTAsshshucpu+p..pss..........sth.hsuuhllAss.sMhlRhl...lll.sllss..s........lhh....tlh...hshhs..hslssh.shuhhh....h+p...........ttttts...stphp.....pNP.hpLpsALhFuslhsllhll....sphspphhG.suGlhhluhluGlsDlcAhslolsph.tsss..plssssushulhlAhhuNslsKsshuhhhG..................... 0 54 131 166 +13037 PF13195 DUF4011 Protein of unknown function (DUF4011) Mistry J jm14 Jackhmmer:O26588 Family This family of proteins is found in archaea and bacteria. Many members are annotated as being putative DNA helicase-related proteins. 22.20 22.20 22.70 22.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.10 0.71 -4.25 100 425 2010-08-10 13:28:15 2010-08-10 14:28:15 1 28 396 0 147 436 75 172.30 23 10.16 CHANGED cLLDLohR.NpLLNhp...tspp.slplhsss.....ssplt-h..L.sp......upshphhs......hsp.t.....................................ts.p...stthhptpshp..........chttph....htpth......................pcpplh....................................................stlstppL...pp+..LhpLappA+ohh-EsGsNsLYLAlGhLcWhc.spps....cp.hhAPLlLlPVpLpRp...sspp.....sapl..phps--.hthNhoLhphL ......................................................................................................................pLLDhsh.R.Npllsh........stp..sl.lhs.t..........s.l.ph..l.tp......tpthphhs....h.t...................................................................................................t.........t.p.....................p...t.......tth...................................................................tpppl.................................................................................s.h.st.ppL.....ppp...LtpLhcpu+shhcEsGsssLaLAlGhLcWhc...spss......scshhAPLlLlPV.pLp+p..ssts.....tapl...phc.p--..sthN.oLhphL........ 0 62 95 120 +13038 PF13196 DUF4012 Protein of unknown function (DUF4012) Mistry J jm14 Jackhmmer:O26332 Family This is a family of uncharacterised proteins found in archaea and bacteria. 28.00 28.00 33.00 31.40 26.20 26.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.70 0.71 -4.04 44 176 2010-08-12 09:26:21 2010-08-12 10:26:21 1 3 119 0 73 178 61 143.00 31 24.84 CHANGED cGsRsYLlhsQssAEhRusGGlsGuhullps-sGplslsc.h..tsssch....htpssss.hss-pts..has.....t.shhhpDsshsPDFspuAphhpphWppps...Gt.slDGVlulDPVsLshlLpss.GPVsl..s.DGp..........slsusNsschlhsssY ....sGsRsYllhsQssuEhRuoGGllGuhuhlss-sGplslsc.h..tsssch.......hspsshs....hssc..ptp...lau..............hthsh.hpssshsPDFspsAphhpshWptps...sp.slDGVlulDPVsLptlLtss.GsVsl..s..DGp............slsusNssphhhsssY........................ 0 22 51 61 +13039 PF13197 DUF4013 Protein of unknown function (DUF4013) Mistry J jm14 Jackhmmer:O27260 Family This is a family of uncharacterised proteins that is found in archaea and bacteria. 26.90 26.90 27.10 27.30 26.10 26.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.40 0.71 -4.92 99 205 2010-08-12 09:39:34 2010-08-12 10:39:34 1 1 98 0 137 208 21 168.40 18 63.89 CHANGED lllssll....hlsllshhllhGYhlclh+ps....hst.pshPc...a.s..sasphhhcGlhh....hllsllahllshllhhlhhhhshh...........................s..hh.........slhshhhsh....lhhllsllhuhh.hsh....uhspautps.phssuFphsplhphlp...hhhhta......lhshll.hhllshlhshlsslhhhl ......................h.hlhhll....hlsllshhllhGYhhclh+ts....hs..sthPc...a.p...sasphhhcGlph....hllsllYhllshllhhlhshhhhh.................................s..hh................hl...hsh..hhhl......lsh.llsllhshh.hsh......uhsphutps.phtuuFphpplhphlp.t..hshhpa.......llshll.hhllshlhshlhhhh...h.............................. 0 37 99 126 +13040 PF13198 DUF4014 Protein of unknown function (DUF4014) Mistry J jm14 Pfam-B_4873 (release 24.0) Family This is a bacterial and viral family of uncharacterised proteins. 23.00 23.00 23.70 23.50 22.20 20.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.41 0.72 -3.57 5 132 2010-08-12 10:49:27 2010-08-12 11:49:27 1 1 102 0 1 55 0 70.70 74 91.89 CHANGED Ms+ha+K+YPRKSRsTEFLFLILFIVLM..lPISPLIllWlIG+..........................IlEhVIELYsDVVWuSFNsLHNKINPY.......KEN .....MsphhcKpYPRKSRsTEFLFhILFIVLM..IPISPLlhVWhIGK............................IIEPVIELYsDVVWASFNTLHNKINPY.......KEN.................... 0 0 0 1 +13041 PF13199 Glyco_hydro_66 Glycosyl hydrolase family 66 Bateman A agb Pfam-B_3959 (Release 24.0) Domain This family is a set of glycosyl hydrolase enzymes including cycloisomaltooligosaccharide glucanotransferase (EC:2.4.1.-) and dextranase (EC:3.2.1.11) activities. 23.70 23.70 23.80 23.70 23.60 23.60 hmmbuild -o /dev/null --hand HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.87 0.70 -6.11 25 143 2012-10-03 05:44:19 2010-08-16 13:58:58 1 19 112 3 25 168 3 475.00 26 70.15 CHANGED sTDKApYpPG-.sVphslshpss.......ussplchpHLs.psltpp.phshtht.spshs.....WpsPssDF...pGYhVclhs.ppssstlspsshAlDVSSDWs+FPRYGalus..Fsss...........s.pphpppl-pLsc.aHINulQFYDWta+HcpPlstss..tp..psWsDhu......s.RplttpsVKshIstsHphGhtAMhYNhlYGuhpshtp............sGlp.-....Wtlaccsspt......p.-pasL.....sp.h.ss..lhlhs.PsNssWQsYlhsptscshcphsFDGaHlDQlG.sRusl.a.......shsGpsl..........p..................LsssausFlpshKptl.sstp.........LVhNsVssautpplus.ushDalYsElW............tspssYssL+phlcp.....scph.sps.....upssVlAAYMsa...........................................................stus.ssGt.................................................................FsTsuVLLsDAsIFAuGGsHlEL......G-........phLspEYFPspsLphs.ccLpcphhsYYDFlTAYENLLR.....DGtsp....sssshsssstph.Shs....................sptspVWshuK.cssstc..llHLlNhhGss..shsW+c........ssuspspPpphpslplpl........ptss.plppVahASPDh.tsGsspcLsFpp....sGs.V..pholPpLpYWsMlhl ................................................................................ssKuhYtsup.pV.hphp.t..............hthphphhhht..phltp...ht.....ttph...........hp.Ptpph...pGYhspl.h..ttspth......tshulsVssshtpFPRYGalus..a.pp................tp.tttht.hpp.hplNsh.FYDhh.ctp............Phsts................................tpa.sh.......t.ppl.hpsl+phlpth+phGhhuhhY.shhh.us.ps.h.p.............tsst.p....hhlapsssp...............t..thsl.....st..h..s.....hh...hhs.Psstp.WQpYlhpp.ppshpphuFDGaphDplG...ppssh..h.......shs..up.h.............................lppsassFlpshKpth..sh..........lshNsVst.s.pplst.....up.DhhYsElW.............t.pstatsLpthltp....stth..............thshlhuAYMphtttt...............................................................................................................................ttt.hppsuhlLssAshhA.GG.Hhpl............Gp.........thLtptYaPsps.hphs.pphppt.hhpY.pFlsuYpNlLp.....Dsthp....t.s.phshsshth...uhs...................sptsplhshsK..ps......sshp......hlphlNh.shs....s.sW+s................us...pPth.pphtlph.........htt...spplahsSPD..t........tut..plthp......pst.l...hslPpLphWsMlh........................................................................................................................................ 0 12 19 24 +13042 PF13200 DUF4015 Putative glycosyl hydrolase domain Bateman A agb Pfam-B_597 (Release 24.0) Domain This domain is related to other known glycosyl hydrolases suggesting this domain is also involved in carbohydrate break down. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.04 0.70 -5.52 62 311 2012-10-03 05:44:19 2010-08-16 14:08:57 1 12 292 0 87 459 246 298.50 31 62.60 CHANGED GlYlouhsuus.pt.h-cllchlcpTcLNuhVIDlK....-DpGplsapp.psstspthssspsh....hDhpthlccL+ccsIYsIARlVsFK.DshLAct.....+P-huh.ppssGslWpstpu.....u.........alNPapc-VW-YNlslAccAAchGFDEIQFDYlRFPs.....pt.tcpLpastsshpp.......................................................................+ssAIssFlthA+-cL.pshsV.lSsDlFGhssts....cssuIGQshppluppVDhISPMlYPSHassG................aGlc.pPDtcPYclltpuhpptpptlsth........tshhRPWlQsFTssaltt..hhpYGspplcsQIcAltDs..GhspalLWNAuNpY ...................................................................................................ulYlouhshss..p.h.hcphlchhcpotlNuhVIDlK....sDpG.lsa.s.pst....h..s...p.t..h.....s..s....spsh..............hD...h.tt.l...lcph+cpsIYsIARl....V...s.F.......K......D.sh.Lu..pt................pP..-.hul.....pptsG..p.h....Wp..s..tps..........s.....................W.lsPapc.csWcYN.l....s.lA.c.E....A.A.c.h..GFDEIQFD.YlRFPs..........tt.sppl.pa.s...tsptp...p...................................................................................+ssAIssFLphA+.cc.L..t...h..s..l..lSsDl.F.Ghsshs....sstu...IG........Qphpt..l..u..p.....VDhISPMlYPSHas.uh...............huhp..PstcP...Yphlhpuhtcstphh..tt..................tshhRPWlQsFpsshhtt...h.Ys.pplctQlcAhp-t..shstahLWNusNpY..................................................................................... 0 43 66 74 +13043 PF13201 Xylanase Putative glycoside hydrolase xylanase Coggill P pcc JCSG_target_393205 Family This is a family of putative bacterial xylanases. Comparative structural data from TOPSAN indicates there to be a C-terminal carbohydrate binding domain similar to those of carbohydrate enzymes such as glucanase and xylanase. There is also structural similarity of the N-terminal domain, according to TOPSAN, to endo-1,4-beta-xylanase (from Streptomyces sviceus) and beta-xylosidase (from Magnetospirillum magnetotacticum MS-1). The N-terminal domain fold is an immunoglobulin-like beta-sandwich. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.04 0.70 -5.62 30 518 2012-10-03 05:44:19 2010-08-16 14:49:09 1 35 171 3 105 458 18 216.30 18 43.09 CHANGED oSCI+-EA.NAEADIp....u..s......psssshLhp...........ps.hssspIplhV..pp.ssDloplAPpFsLTsGATI........sPs...u......G......os.p......D......Fos...Ppp..............YTVTSEDGpWc+sYpVsh....hsspl....s.o..pacFEslpht.........p.t.cYplaY.-h.psuspp.....hp..WASGNsGFplouhu.p.ssp-YPTsQsss.G.hpGKClK.LsT+STGshGuh...l..tMP.IAAGNLFIGoF-ls.sAlsssLKAT+FGhPFp+..cPhpLsGYYKYKAGcpap....-.s.Gp..l....ss+KDpssIYAVhYEsscsst..........h.LDGs....Ns..hT..usp..lVulAcl..s..-s+E.........T..DpWTcFslPF.h.p.GKsIDts+LppGcYsLAIVFSSStcGupFcGAlGSTLaIDEVcLI .........................................................................................................................................................................................................................................................t....t..t..p..l.h.l.............ts...s...l.pt.h....s.h.t..hs.......s..Aol............t.Pt..s......s......s.........................sasp.....t..............apVsuts.....t.th...p...Yp...lph................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 40 93 105 +13044 PF13202 EF-hand_5 EF_hand_3; EF hand Bateman A agb Jackhmmer:O26668 Domain \N 25.30 11.50 25.30 11.50 25.20 11.40 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.82 0.73 -7.13 0.73 -4.38 149 5306 2012-10-02 16:17:27 2010-08-17 09:27:41 1 515 903 68 2797 37229 2719 23.80 30 8.51 CHANGED lp.s..hFp.thDhstDGpIshpEh...pp..lh ...........h....hFp.thDt.s.tD..Gp.lohcEh.........h............. 0 1041 1668 2149 +13045 PF13203 DUF2201_N Putative metallopeptidase domain COGs, Finn RD, Sammut SJ, Bateman A agb COGs (COG4900) Domain This domain, found in various hypothetical bacterial proteins, has no known function. However, it is related to Pfam:PF01435. 23.20 23.20 23.30 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.95 0.70 -5.24 16 492 2012-10-03 04:41:15 2010-08-17 13:36:00 1 4 403 0 173 512 550 212.40 15 53.68 CHANGED phppRhstAlt+hlEstPhhuuLsLhhphp-..ucsh......................tsstTDGc.plaYsPsF....-sLslsppVGllAHEVhHlALcHstRttsht......hD.plaNhAuDuhINssLlpsGa.tLPtsslh.pp.............csshtsasAEclYptl.........................tt.ttttuttptt..sputppstpppscu............t.hthssGtps-hsstsputussEttstput..cWpppltcthptG+usGph.utLtchls-hspsthP.WcplLRshls+slpc.s...chSWpRPsRRalu...........pssa.Pu.hp ......................................................ht.........................hhs..h..h.th.........s.th.........................s.h....ts....c...uh....hlhhsPthh......th...s........p...p...h..t...t...l...l....hH-lhHhhhtH.h..hRt.tt....................................hst.hhshAsDhs...lN...p.h....l.......p....t.h...........h..Ptshh..........................................................th.sEt....h.h.t.l...............................................................................................................................................................................................................................................................................................................................................................................................................t.............................................................................................................................................. 0 70 126 158 +13046 PF13204 DUF4038 Hydrolase_6; Protein of unknown function (DUF4038) Coggill P pcc Jackhammer Family A family of putative cellulases. 22.60 21.70 22.60 22.00 22.10 21.50 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.75 0.70 -5.08 28 345 2012-10-03 05:44:19 2010-08-17 13:38:46 1 8 222 4 120 308 23 264.50 24 58.13 CHANGED spRaLhppsGpPFFaLuDTuWthhp+lsp..--hchYLcpR+ppGFNVIQhssLsph-uh....sh.sphsh..tpp.th.....thN..........sYa-HhDthlchAspcGlhlulVslWssts..st.h..........sslhsh-pAcsYu+alspRYcch.NlIWllGGDs............csspthchWpulupslpptsst.......LhTaHPpGp...pssschFcsp.WLDFpMaQSGHpp..psp...............s.hphl..phshpppPsKPllDuEPsYEshshshps........passhDVR+tuYhulhuGu.sGhTYGspuIWQah ...............................................................................t..+hht..tsGpPahhhu.-TsWthh.....p.phsp.............p-...h....phY.L.psppcpG.FNslphslls......p...h.s...h......................sh.s.....h...sh....t..................................hshtthN.........................................sYFc+h-hhlptstphGlh..s.s.l..l.h..a..s.s.h....t...tWh.....................shhs..-p...sptYscalspRatsh.Nl.l..Wh.luGDh.............................sptshphap.thsp..slp.........ph...sstp..............LhohHspsp.........ts.sph.......h...........psp..shlshshhQo.uHtt...tt..................s.hth.l.....t....p.ptt...hKPVlssEs.tYEs.h...........hshts..............hss.-lR+t.sa.ulhuGu...s..GhsYGtpslhph.......................................................................... 0 57 94 107 +13047 PF13205 Big_5 Bacterial Ig-like domain Bateman A agb Jackhmmer:O26670 Domain \N 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.86 0.72 -3.44 269 1557 2012-10-03 16:25:20 2010-08-17 15:32:03 1 286 641 0 798 1779 1039 104.70 19 16.89 CHANGED DsssP.sls..us.sPsss....us...sl..ss..ss..............slslsFsE.sl....pssshstshhh..............ssstsssls....s.s...................hsu.psl..slsPs..............ss.Lsss.TsYslsl.....ssu......lp.....Dhs...G..s..........h...............shs...a....sFoT ..............................................................................................P..tlh....sh.sPsss......us......sV.....ss..st...................sls...ls.Fsc..sl.........shssspts.l..hh.....................................ssssshslshp.........................................hsu..ppl..sls.Ps..............ss.Lpss.....o.....s...Yslsl.........ssu.....lp......Dhs..G.....s..................................shs...hsFoT.................................................... 0 256 541 758 +13048 PF13206 VSG_B Trypanosomal VSG domain Bateman A, Jackson A agb Jackhmmer Domain This family represents the B-type variant surface glycoproteins from trypanosomal parasites. This family is related to Pfam:PF00913. 25.00 25.00 25.00 25.00 24.70 24.80 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.42 0.70 -5.32 132 958 2010-08-18 11:04:25 2010-08-18 12:04:25 1 4 12 0 708 958 0 291.60 15 75.39 CHANGED hhlhlhhhs................ssusssssp......................Ntt-FpsLCp...llp.....luptt.....hp.st.sp...psssthpplt....tlNhohusss....ahp.h.ptttttp.ttt..tt.t..................ttWppsttthpp.ttp..........................................................t................thtphshpthsstttpthp................pltppAtplhpphp..ptttpttsss......thppt....lpp...AlYGsssssssst.............hssssspsss............C...........stsssssu.polssslhCLCssss...........................................................ssssphCspt............sssssshtssss...sspssap.pltstC...sttsssplT....ssplpsslt.h..........sthppttsssssphhLGp..........tssssCsG.s.....ssuhC.VpYsshhsst.....shsslsWlppLppAsppLpptppsptptpp.........htp.plptl .....................................................................................hh.....................................................................................................................................................................................................................................................................................................................................h...h..h.tt.t...........t...tt.......hcpt...............htp.....sla.Gptssts..p...................................................tththsss....+sts......................C..............tsttsp...t....s..G...colssDhhCLCssss...................................................................................tpstpphCstt...................tsttt.s.h....t.s.s.t.t...................t.h.......pp.....s.....Wp..p.........lp.ptC....pttpp..t..h.p.................tplpphl.tph.........................tthhpt..tp.p.p.t.p.........t.....t.......h.h..LGt..................tptts.....CsGpp..................ssshC.V.Yttp...t.....................ttttlsWhpcl.ppuh.p..p.hpp.hpp..ttp..pt...................................................................................................... 0 622 708 708 +13049 PF13207 AAA_17 AAA domain Bateman A agb Jackhmmer:O26135 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.06 0.71 -3.29 412 3187 2012-10-05 12:31:09 2010-08-19 09:37:34 1 91 2065 29 1073 22418 10222 153.40 15 55.53 CHANGED hlhls.G.ssGuGKoTluptL.....uc.p...............h..........s........h......h...l..phss.....................ht..shh.tht..pt....................t..p.....ph.......lpph.......................hhhcs.........................................................h.cht..tss......phlhhs...................hsh...........sttt......hhp..c.............................................cshsp .........hlhlh.G.ssGoGKo....TlA....ppL....................up..p..............................................h..................................s..................................hs....................h.............l......chDs....l.h................................................ht...sht.....tht.....pt.........................................................................t..h.p.......................ph..............lpt................................................................hll-G..........................................................................................................ht.............................hhhh.............................h..................................................................................................ttthtt.h...................................................................................................................................................................................................................................................................................................................................................................................... 0 356 664 893 +13050 PF13208 TerB-N DUF4016; TerB-N Anantharaman V, Aldam G, Mistry J gba Anantharaman V Family The TerB-N domain is found N terminus to TerB, and TerB-C containing proteins [1]. It has a predominantly alpha-helical structure and contains an absolutely conserved glutamate [1]. The presence of a conserved acidic residue suggests that it might chelate metal like TerB [1]. These proteins occur in an two-gene operon containing an AAA+ ATPase and SF-II DNA helicase suggesting a role in stress stress response or phage defense [1]. 25.00 25.00 38.00 38.10 24.80 23.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.56 0.70 -4.89 152 214 2012-10-09 17:40:07 2010-08-19 12:01:53 1 12 206 0 57 217 8 206.60 24 30.87 CHANGED Psc.hhphtshth..s.....................hh......u....................ht...sp.....-tsalptuh.h.As....sshsht.hshh.sYaPoYpslospp++uYhsWhupsRps.........ssssluYlFlahYtL.pplhl-ss......pcsht.tlhp.hpch............lshY...........hppah.pshl...hhhsl....s..........................hpph..........hhh.p...s...............................................pp.....hutc.shthhhsLuphhhc..pssl.........................schshhhst.sh.pltt....+ps ...............................tphhthtthhh.s.......hlu...............ht..sp...-tsFlp.uh.lAs.....sphsht.hshh.sYa.PoYsslospp++uYhpWhupsRps.........ps.sluYlFlahYtL.pplhl-ss......pcphstlhp.hp+h............lshY......hppahpshl...hhhsh.st..............hpch.........hhhp.s............................................pp..tutc.shthhhsLuphhhcppsl..........................schshshshsh.tlt..................................... 0 16 35 45 +13051 PF13209 DUF4017 Protein of unknown function (DUF4017) Aldam G, Mistry J gba Pfam-B_2009 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 21.10 21.10 21.30 91.40 20.20 20.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.05 0.72 -4.18 15 75 2010-08-19 11:03:21 2010-08-19 12:03:21 1 1 75 0 4 26 0 56.90 82 94.07 CHANGED MKNIlPALllYIIVClIAhIlPAS-GYNpVGWKLFVGQAYAIPIFlITAIITFYINKKKS MKNIhPALllYhIVClIuhIhPASpGYNalGWKLFVGQAYAIPIFlITAIITFYINKKKS.. 1 1 3 3 +13052 PF13210 DUF4018 Domain of unknown function (DUF4018) Aldam G, Mistry J gba Pfam-B_2010 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 190 amino acids in length. 25.00 25.00 162.40 161.90 20.00 19.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.26 0.71 -4.92 6 74 2010-08-19 11:48:07 2010-08-19 12:48:07 1 1 73 0 3 66 0 186.50 77 56.30 CHANGED hpKKsTGFllLhllQIIuCShFLsFSllGoILLPLFFFIVHAhssGhPVQKSLGuIlWFVVSAIFYA.PFPPLWKLlLLsLHIhlTFWLTGuNRNQQLlRFlSIITIGlhShLll.lFPYIRLlhSYlhphVALGFGYAlpPLhSAApLKDT--hWpNKGpLtcspIc-ss.p...FDPhllNSITIIlhTsIAl ...................................MpK+TTGFVILLlhQIlhhSlFLPFSLFGTIhLPLFFFIVHVVGPGYPVQKSLGGIVWFsVSAIFYA.PFPPLWKLLLLllHIMITFWLTGANRNQQLLRFsSIITIGlMSILIVQVFPFIRLIFSFIsEVVALGhGYAlNPLlsAAELKDTEDVWANKGHLLKPpIEDs+t...DFDPTLINSITIIsCTAIAI....... 0 0 2 2 +13053 PF13211 DUF4019 Protein of unknown function (DUF4019) Aldam G, Mistry J gba Pfam-B_2012 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 130 and 183 amino acids in length. There is a single completely conserved residue E that may be functionally important. 22.10 22.10 22.10 22.40 22.00 21.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.24 0.72 -3.77 34 143 2010-08-19 11:48:56 2010-08-19 12:48:56 1 4 124 0 45 144 12 100.90 29 57.03 CHANGED puAppalpLlDsGcas-sWcpAushh+stlspcpWsstlpssRts.LGsltsR..phhstpthsshsssPcGpYsslpapTpFsstts..shEplohhh-pDutW+lsGY ................sA.phhphlDus.....phsphWp.sAushh+stlsp.s.tasspltstRtp.LGslhsR..t.lsth.phs..sh.sshPsG..YsslpasTpFspssp...shEplohph-pDstW+lsGY............. 0 10 24 33 +13054 PF13212 DUF4020 Domain of unknown function (DUF4020) Aldam G, Mistry J gba Pfam-B_2014 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 176 and 195 amino acids in length. 21.60 21.60 22.60 21.60 21.20 20.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.00 0.71 -4.70 10 80 2010-08-19 11:50:41 2010-08-19 12:50:41 1 2 80 0 5 73 0 175.10 70 16.29 CHANGED VFIKCVLYWE..........................QKTDWLYPLLlpEsEEsKIKFMpaLCYYVKTLSs+EQQKFWsAWLusFLRERP..KMGsI....TsREYVMlLRhlLaMDEVhEKGLpll.ppF.sVpGcssptEhKQLahchLcKcEphKtahEhYAslFFhLLQsspEAshhEsEIIQIK-hLspapV-+clLshIcNEh ......................................VFIKCVLYWE..........................QKTDWLYPLLIpENEENKIK.FMQFLCYYVKTLSsKEQQKFWsAWLSlFLRERP..KMGtI....TAREYVMLLRIILaMDEIlE+GLsIls+uFSsVpGK.ssppEhKQLahEhLcK.pEShKhaKElYANVFFhLLQThQEAsLhEsEIIQIKELLltYcVEcpVlphI-NEI.................................... 0 2 3 3 +13055 PF13213 DUF4021 Protein of unknown function (DUF4021) Aldam G, Mistry J gba Pfam-B_2025 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved YGM sequence motif. 25.00 25.00 29.30 29.30 19.20 18.30 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.47 0.72 -4.33 5 82 2010-08-19 11:51:21 2010-08-19 12:51:21 1 1 81 0 4 36 0 45.50 80 77.05 CHANGED K-NspcNsTsIpNNNTsNLslEEQAMNGLYGMPETsIEDADHAcT- .............KENVTENsTsIQNsNTANLsIEEQAMNGLYGMPETsIEDADHAts... 0 0 2 2 +13056 PF13214 DUF4022 Protein of unknown function (DUF4022) Aldam G, Mistry J gba Pfam-B_2027 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 85 amino acids in length. 25.00 25.00 79.30 79.20 21.10 19.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.78 0.72 -3.88 9 74 2010-08-19 11:52:15 2010-08-19 12:52:15 1 1 74 0 3 36 0 75.30 92 97.18 CHANGED MLLSHIMtMDaIMSIlTLALLLLAElLVAIILIGVSIEICSYGWKKSNGIKYSCLLLSLLLGTASILGLhAAPAYFFIQLTEK ...............MSIlTLALLLLAEILVAIILIGVSIEICSYGWKKSNGIKYSCLLLSLLLGTASILGLhAAPAYFFIQLTEK. 0 0 1 1 +13057 PF13215 DUF4023 Protein of unknown function (DUF4023) Aldam G, Mistry J gba Pfam-B_2030 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved KLP sequence motif. 25.00 25.00 34.40 34.10 19.90 19.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -7.66 0.72 -4.18 6 106 2010-08-19 11:53:06 2010-08-19 12:53:06 1 1 106 0 13 35 0 37.50 69 92.19 CHANGED MpsTp-FV-Kl+EsQuKsc+N+c+QGpGsPu+KLPNKQ ...MSNSN-FLDTLHEKQAKDEQNRKRQGNGNPAKKKPNKT.. 0 3 10 10 +13058 PF13216 DUF4024 Protein of unknown function (DUF4024) Aldam G, Mistry J gba Pfam-B_2031 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved RDE sequence motif. 25.00 25.00 93.50 93.40 21.20 17.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.68 0.72 -4.15 6 71 2010-08-19 11:53:56 2010-08-19 12:53:56 1 1 71 0 2 7 0 35.00 97 94.59 CHANGED MVGLSVTKlHLFRDENVNFLFCIEFMQKNELLLTH MVGLSVTKlHLFRDENVNFLFCIGFMQKNELLLTH 0 0 1 1 +13059 PF13217 DUF4025 Protein of unknown function (DUF4025) Aldam G, Mistry J gba Pfam-B_2033 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved EGT sequence motif. 25.00 25.00 33.60 33.40 23.30 22.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.76 0.72 -4.12 17 126 2010-08-19 11:54:36 2010-08-19 12:54:36 1 1 126 0 18 64 0 51.10 49 81.61 CHANGED Kpppppt.p.lAscpasssshcsss.h.tuLAhTHEQVSDsYtEGTI-shlpcpsp ..........................KQpNKQslp.stpQs.YTScssssup......SVhcEQISDTlAEGTIDsKLs+tS........ 0 5 10 12 +13060 PF13218 DUF4026 Protein of unknown function (DUF4026) Aldam G, Mistry J gba Pfam-B_2037 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 450 amino acids in length. The family is found in association with Pfam:PF10077. 21.30 21.30 21.30 29.50 19.50 20.70 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.87 0.70 -5.56 11 104 2010-08-19 11:56:37 2010-08-19 12:56:37 1 3 104 0 8 85 0 295.70 54 65.82 CHANGED EKcsSpMVAlPuoclTht.LEQRLEp.QThaT-G-IsYhs-t..suFFapC++s-c-L+FalpLsEsDs-.pI.pPYauTDslosELhAcAsAssQ-lhlEsLFps..cPLssYhQQLphlphLsPDLLLulD.SAAGKVhTREWlpFQLEs.DLhP-I-SLYVIHAVYDs-.......E-ssPThYWFHTHGLsRCGLoEsElIIPp.IuSYYGIPDLFpoFVNNuIpNGQIsFNEPIhIGQTpsGhEYLVAVPFEEGLcHVGpSTPlDsL+PLEEMpachpsss.pspFLGDhsDRDEhHQcPSVMLFRsspEpPhLESFFKGaEEQsAhMFhRT ............................................ERHFSDMIAVIPTRI...T..I.EQLKQRLEsIATKVD.-LKI..V....Y....SDE........TSLIVEL.....HMc...-plIPYELHIDEs.s.D..PE.EY...KhYNRQDoT.IVDRsFED.A.AaGTEIFTRT..LFVG....DVL-CFFQQLQFLWNLAP.DLLFVIDSSAAMKVISRsYIE.YHVEN.ELLPDIPDLYVIHSVYEDD.......K-uEPTQYWF.HTHGLLRAGVTEIELI.IP..N..R...ISSYYG..IuDLFQTFANNAVE.NGQVP.MNEPIVIAHSQQG.SIH.TVA.VPWEKGLSYIGHKTshDQLSSIE-EEVKLQPIsAQNTFLGGMDsRD.EYHQSPSVLLFKhsTSEEh.IESF.FKEHEEATGLMFYKT..................................................................... 0 2 5 5 +13061 PF13219 DUF4027 Protein of unknown function (DUF4027) Aldam G, Mistry J gba Pfam-B_2038 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved CLGGF sequence motif. 25.00 25.00 30.40 69.60 24.90 16.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.28 0.72 -7.64 0.72 -4.52 13 86 2010-08-19 11:58:14 2010-08-19 12:58:14 1 1 75 0 2 42 0 36.00 77 90.50 CHANGED MKuhQNLSYSQGVoLICLGGFsuSVsLAllIKhhpQ MKuhQNLSYSQGVoLICLGGFAASVTLAVlIKlhHQ 0 0 1 1 +13062 PF13220 DUF4028 Protein of unknown function (DUF4028) Aldam G, Mistry J gba Pfam-B_2040 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 67 and 93 amino acids in length. There are two conserved sequence motifs: IVKI and YVKKWF. 25.00 25.00 88.70 88.60 21.20 20.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.19 0.72 -4.33 12 74 2010-08-19 11:58:51 2010-08-19 12:58:51 1 1 74 0 2 39 0 64.00 92 87.78 CHANGED MIVKILKDSSNSFLCTVQNKNG-pYVKKWFpKpcNpEELGRPTFKEVEKDWKENRESFMYPNlKA MIVKILKDSSNSFLCTVQNKNGDQYVKKWFRKHENNEELGRPTFKEVE+DWKENRESFMYPNVKA 0 0 1 1 +13063 PF13221 DUF4029 Protein of unknown function (DUF4029) Aldam G, Mistry J gba Pfam-B_2041 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 119 amino acids in length. 25.00 25.00 170.80 170.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.16 0.72 -3.61 10 73 2010-08-19 11:59:44 2010-08-19 12:59:44 1 1 73 0 2 37 0 94.80 90 89.30 CHANGED MTKIELMALFLGYVFIFSNLNRIQEQSILEICIFSISIELFSIVSIVLLNELFphIHSFELhKFGNlVLQVICAYIVFVVLDKIlGQQTVFQDpRK MTKIELMALFLGYVFIFSNLNRIQEQSILEICIFSISIELFSIVSIVLLNELFpWIHSFELMKFGNlVLQVICAYIVFVVL-KIVGQQTVFQDNRK 0 0 1 1 +13064 PF13222 DUF4030 Protein of unknown function (DUF4030) Aldam G, Mistry J gba Pfam-B_2044 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 164 and 197 amino acids in length. 21.60 21.60 21.60 45.10 21.50 20.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.71 0.71 -4.48 6 71 2010-08-19 12:00:24 2010-08-19 13:00:24 1 1 65 0 1 48 0 136.00 73 72.54 CHANGED sDpsRpVDLEIADT.s.oSclKp-INpQLKNQsI+PYTINlsQRsMcIVKpEpRWscVhuoIh--lFsKNGYKGFuIp.hNhEusQPhslsIpTpIssuDsGAKEFGcKIEKElsslLKTcclpKWI-sDSYTIEIYSpDpQK ..D-KpRhVDLEIADSEN.uNEIKKEINKRLQIQGIhSYKVNISQRN+EIVNAE+RWpLVFGQIFDDVFRKNGYEGFGIQQINYKKNQPVTIDIKTKIsDDEVGAREhGQKIEKEVEsVLKTEAVKKWIENDSYAIGIYDI-sR..... 0 0 1 1 +13065 PF13223 DUF4031 Protein of unknown function (DUF4031) Aldam G, Mistry J gba Pfam-B_2059 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 91 and 130 amino acids in length. There is a conserved HYD sequence motif. 25.00 25.00 25.10 25.10 22.90 24.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.73 0.72 -3.83 53 158 2010-08-19 12:01:13 2010-08-19 13:01:13 1 2 152 0 76 163 23 81.10 42 63.48 CHANGED VYlDsstaP....h.+GphWuHLluDo.....h-ELHAFAs.plGlscRsFpp......-HYDlstpp+scAl.shGAlsls..sR-....ls+RLpsuG ......VYlDsstaP.....h.+GphWuHLluDs.....h-ELHAF.As.plGlsR.RuFpp........................-HYDlPspchs.cAl.thGAltls..p+c....Ll+plht..s............... 0 22 48 68 +13066 PF13224 DUF4032 Domain of unknown function (DUF4032) Aldam G, Mistry J gba Pfam-B_2062 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 170 amino acids in length. The family is found in association with Pfam:PF06293. 22.30 22.30 22.30 22.40 22.00 21.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.95 0.71 -4.54 44 312 2010-08-19 12:02:02 2010-08-19 13:02:02 1 7 304 0 87 228 135 157.30 46 36.80 CHANGED cpLWsELTspEsFsssEp.WRlppRIcRLN-LGFDVuElplpost..sGsplplpP+VVDAGHHpR+LhRLTGLDVpENQARRLLNDL-saRAs.......stpshs-phsAHcWLs-VFEPslculPt-L+uKLEPAplFHElLEHRWaLSEptG+DVshtEAlpSYlcsVL. ..............................pLWsELTst-pFsss-h.apl-pRlcRLNsLGFDVuELclpTss.........-GpclplpP+VVDAGHHpR+LLRLTGLDspEtQAR........RLLNDL-saRAp...................sshpsh-.plsAHcWlsElFEPsVptIP......E.....l..p....p...+....h....Es...AQhaHEVL-HRWYhSp+tt.+-VshsEAspuYlcslL........ 0 35 64 80 +13067 PF13225 DUF4033 Domain of unknown function (DUF4033) Aldam G, Mistry J gba Pfam-B_2072 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 80 amino acids in length. 21.40 21.40 23.30 31.30 20.50 18.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.13 0.72 -4.37 27 109 2010-08-19 12:02:47 2010-08-19 13:02:47 1 3 33 0 82 111 5 84.10 49 32.97 CHANGED WLhGPsc.lpph-..................tptsuVhlcKCRYLEpSsCsGhClNhCKlPTQsFFccchGlPLpMpPNF-DhSCphhFGppPsshp-D ....................................WLhGPsc.V.ps-.......................ssGhpppoGVhlcKCRY.LEpSsCsGhClNhCKlPTQ.sFFpcchGlPLpMpPNFEDhSCphhFG.tPPshppD.......... 0 29 63 75 +13068 PF13226 DUF4034 Domain of unknown function (DUF4034) Aldam G, Mistry J gba Pfam-B_2075 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 280 amino acids in length. There is a conserved PRW sequence motif. 21.30 21.30 21.40 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.87 0.70 -5.31 8 236 2010-08-19 12:03:56 2010-08-19 13:03:56 1 9 217 0 29 143 1 257.00 50 39.77 CHANGED +ppph.clcphLp-pca-ELDchh-pthst.apu+puEpc...Yshshss.s..hhDhsollus...t.ptLAhL+AWppApPcShHAalspupYWp+pAhchRohuWApcVTcstWlsAttss-hslhAtLpAlsLcPR.hhAuhhhhssoshFGpPsWLusllpGpcstspslhts..thctph.pEspAhhuppGLpshsph...st.hPssLPsts-.cchpcsh.YWLpssLuIaPptFhshp-Yl.ahhPRW.GGSac-IccFlsSslCcpLSptE+spLchhlhWD .........................................................RcWpIsDIsuLLREcRY-ELDEpYsQAL..TcSFTS.R-AE+R......YFhAWspMc...FYDMcTLV-A....GP...pGLALI.KsWQ....+ARP+STHAWL...AEAQYWsHRAWLYR.SYGWA+-TT+AMW..lCAAACNEpMVlAsLpAIDp-PRQWMAAuLhp..TsSpsFGpPsWLsthLsGscssu.PLht-Lt-YaccoPQElsALMAaSGL.uascAlsPslshPulLPcpsD.D..uGpK...YWLtVsLsIFPTsFYlhsEYIPF+MPRW.tG.SH-EIp-hL-SssC-HLSstE+-+LclLIWWD......................................................................................... 0 3 14 21 +13069 PF13227 DUF4035 Protein of unknown function (DUF4035) Aldam G, Mistry J gba Pfam-B_2076 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 67 and 93 amino acids in length. 22.10 22.10 23.20 22.80 21.00 20.40 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.69 0.72 -4.59 18 233 2010-08-19 12:04:48 2010-08-19 13:04:48 1 2 184 0 14 135 5 52.90 61 63.23 CHANGED htFDRpsPlGDhRsDh+sAQIup..AshsuQGs+..sslsDhM.tWspc.....-cctpDDu ..hEFDRlSPLGDERGDIRNAQIV+..AVFGAQGhs..VuLcDAMLsWGED.....EDcsEsDP............................. 0 0 5 11 +13070 PF13228 DUF4037 Domain of unknown function (DUF4037) Aldam G, Mistry J gba Pfam-B_2110 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 100 amino acids in length. There is a single completely conserved residue P that may be functionally important. 22.10 22.10 22.70 22.10 21.60 21.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.11 0.72 -3.78 37 317 2010-08-19 12:59:41 2010-08-19 13:59:41 1 17 297 0 68 282 5 100.10 31 26.79 CHANGED WLplPpppLups.TsGpVFtDsh.GphophRppL..paYP-DlhhthlAsphhphuQtGQaNhsRshpRsDthuutlshscFlcsshpLlaLLN+....pYhPYa...KWh .......................................pphlush.psGtlhaDt..Gchspl+ccl..tsYP--LtcphIucpLhhh...sp..........a....Nht....+sL+RpDhluhhtsls-hhcshhsllFhLN+....hatPhh...KW.............. 0 36 54 60 +13071 PF13229 Beta_helix Right handed beta helix region Bateman A agb Jackhmmer:O26996 Family This region contains a parallel beta helix region that shares some similarity with Pectate lyases. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.18 0.71 -12.28 0.71 -4.48 178 5365 2012-10-02 14:50:22 2010-08-19 14:58:07 1 525 1536 7 2617 9515 1881 170.30 13 29.99 CHANGED sGl....hlpssss.................hp.....lpssplpss......................tssGlhltssss....................hhlp.s......splps.........t..ul.h.t........................spsslpsstl......psss.........................uhhh...sssshlpssplpss........ts..............Gl...h..s................sss....spl....psNplpsss.........................tsGlhltsss....................sh..psNplp............sst........................tsGl.hhssss...............splpsNph ................................................................................................................................................................t...................................lt..t..p.h..t......................................tt..tG.lh..ht..ssst..................................................hhlp..s.........................splts.............................t.tu.l..h.ht.ts.................................................................sss.tlpss.pl......................psst..............................................tGl..hh....t.....s......s....s.....s...s......h..l.p.s......N..plt...ss.....................sss......................................................................Gl..hlt.s..............................sss.....................stl............ps.N.p.lp.s..st.....................................................................tsG...l...h...ltsss..................................ph..h.ps.N.h.lh...................s.st...............................................................G.h.......................................h...................................................................................................................................................................................................................................... 0 1057 1738 2217 +13072 PF13230 GATase_4 GATase_II; Glutamine amidotransferases class-II Mistry J jm14 Jackhmmer:Q7LYB4 Family This family captures members that are not found in Pfam:PF00310. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.65 0.70 -5.71 97 1953 2012-10-03 21:14:07 2010-08-19 15:05:10 1 8 1663 4 535 6397 3754 248.90 35 89.16 CHANGED MCQLLGMNCAsPTDlTFSFoGFusRGGlT-cHuDGWGIAFFE..........DKACRLFlDpQuuusSPlAEhVKcYP.IKS+NsIAHIRKATQG+lhLENsHPFhREL.WGRHWIFAHNGDL.ps..asPpL....uGsYp..PVGsTDSEpAFChLhptLRctF.s.ts.pPsLsELFc.tlu-Lo+cIsc+GlFNFLhSNGQALFAHCS....T+......LaYlVR+a..PF.usAHLl.DtDlslDFuch..TTPEDRVAVIATpPLT.....csEsWTshpPGELlhFpsG...clstp.hplsss-tlhcchpss .............................................................................MCpLhu.h.s...s..s...s...P.s...c..l..s..F..S...h...p..u...h.h.......p......R.u...G.....t......T.s...s....H..tDGa...G.I.u...FY-..............................s+.u.s.R..h..F..+....D..s...p...P....u...a......s.S.s..l....A.....c...h...l........p......p.....a.....s......I.......K......S.......p....s....V.......l..A..H.IRp...A..s.p..G.....p...V...s..l.p.N.o.HP.F........s.....R.....Eh...hG......c........p.W.s.a..AHNGpL...ss......a...c...slp................ssha.p........s......l.......G.....p.......T......D..S..Et.....A...F...C..h.l...L..p.....p..L....pp..........c..........h.....s.....t.........s....................ss...........h..s...........t........l.......hc.......hlt...........p.......l.....s............p..........p........l............p...........p.................t.................G.........s.........F.....N.h..l.L..S..D....G.c.h..lhA..as.s..............sp...............................Laals...R.c.s.........P...F...s...u......p.L.....h....D..p..D....h..p....l...c....h....t......p................sss..p..Dh..VsllATpPLT.................t.s.E.s..Wpth.sGphhhhp..Gp...................thh.................................................................................................................................................... 0 134 273 423 +13073 PF13231 PMT_2 Dolichyl-phosphate-mannose-protein mannosyltransferase Mistry J jm14 Jackhmmer:O26471 Family This family contains members that are not captured by Pfam:PF02366. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.14 0.71 -4.33 106 4872 2012-10-03 03:08:05 2010-08-19 17:21:47 1 112 2057 0 1921 6160 2870 159.90 15 29.25 CHANGED c+P.....PhhsW..lh....tlhstlh..G.s.sthulhlssslhsslshhhlatls+ph.hs.ppsulluslhhss.sshhsssuh...ths..Dshhlhhhhhshahhhpshp..csp.h..phh...lhsulhhGluhhoKattshllhs.hll..aLlhs........th.hhpp..thsal.sshlslllh.s.......lhW .........................................................shhhh..hh................th..h.h...t...l.....h.............G.............s..............h...............s........h.....+......l....h...s....h...l...h....s.h.h....s....s....h....h.l......ah....l............s.............c......c...........h.......h...........s.....p.............p..............s............u.........h.........h.............u.......u....l.l....h....sh....h.s..h.....h.....h.h.........hut.......hsp.....-...sh...h.........h..h...h..h...h.....h...u....h.....h....h.............h.....h...........p.....h......h..p....................pt.......p....h...........t.......h....h..............hh...h..u...l..h..h......u....l..u..h..h...s...+....h....h....s...h...h...h..l...h...s...hhl........hl.lhp.....................pp...hhtp........hhhh...h.......s.h.hh.h..h.h.hh.......hh............................................................................ 0 764 1422 1713 +13074 PF13232 Complex1_LYR_1 LYR-motif-like; Complex1_LYR-like Wood V, Coggill P pcc Manual Domain This is a family of proteins carrying the LYR motif of family Complex1_LYR, Pfam:PF05347, likely to be involved in Fe-S cluster biogenesis in mitochondria. 20.80 18.60 20.80 18.80 20.70 18.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.07 0.72 -3.76 236 283 2012-10-01 20:54:40 2010-08-19 18:36:27 1 8 162 0 201 1849 13 65.40 24 34.50 CHANGED pplLpLYRpl....LRpu.pp......hsshshp........................h...ppp...lRspF+.........................cs+s......hpD......................p.......pIpph.lp.pupc......................pL...phlp .................................................tshpLYRpL..................LRpu..pp........................hs.s..hshR...................................pas...pc+........l+..ctF+..............................................cppp..........hpc.................................p.........clpph..hp..cuhp......................pLphh................................................................................................................................................. 1 61 104 165 +13075 PF13233 Complex1_LYR_2 Complex1_LYR-like Coggill P pcc manual Family This is a family of proteins carrying the LYR motif of family Complex1_LYR, Pfam:PF05347, likely to be involved in Fe-S cluster biogenesis in mitochondria. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.91 0.72 -3.55 66 376 2012-10-01 20:54:40 2010-08-19 18:48:37 1 4 227 0 259 553 1 103.20 18 73.89 CHANGED lhplYR..................plL.+pL.p....h................................................s.hcppl...........ppp...hpp.ppphs....................................................................httthpchpphhpalpsp+...tahph...hppY........p.Ght.hsp-cp..l...................chsuphVshp .....................................................hplYR....................plL..+th.t........h......................................................................shhcphl.......................+pp...Fpp.pptss.................................................................ttthptthpchpp..hhphlpppp.....ph.ph....htph..................s....hs...pp.....................t............................................................................................................................ 0 63 125 203 +13076 PF13234 rRNA_proc-arch rRNA-processing arch domain Wood V, Coggill P pcc Pfam-B_8473 (release 24) Domain Mtr4 is the essential RNA helicase, and is an exosome-activating cofactor. This arch domain is carried in Mtr4 and Ski2 (the cytosolic homologue of Mtr4). The arch domain is required for proper 5.8S rRNA processing, and appears to function independently of canonical helicase activity [1]. 21.30 19.20 21.30 19.20 21.20 19.10 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.62 0.70 -5.23 86 683 2010-08-20 10:00:23 2010-08-20 11:00:23 1 18 387 9 414 671 13 215.90 22 22.40 CHANGED EaMLc+.SFaQFQsst.........................ulPtLEcclpclcpchsshpl......s-.........Esslp-YYcl+ppLpphpc-hRpll.......s+PpasLsFLQ.sGRLl+lpt...........ssp-aGWGl.....Vlsap.++p.st..pt....................................hsspppYlVDVLlps..s..cssstttps.................stshcPspts.-..cuch....pVVPlsL.sslpuISslRlhlP..cD.L+st-s..+pslh+sl.........pElp+R..F.P-G...lPlLDPlccMpIcDssFp+ll+KI-sLEs+LhssPLpsu..scLp..clYppapcKhclppcl+plKcclppu ......................................................E.Mlc+SFhQFQspp................................................................................slsthpc..plp.p.h.pp.phstlth...............c............................................pts...ltpY...aplppplp.p.hp.p.....p..h.p.phl.......hp.st.....hs....l..h.Lp...sG....Rlltlpt...............................................tt.p..c.hs.hGl.....llpht.ppt....................................................................................hhh.shh.........................................................................................hh.h.h....lttlsthhh...h...........p..h.....tt.............ht.....t..hhttl.........................tcl.ph........h..sts....shlcPhps..htl....p.s.thhp.............hh.php.hpphh.t.t.h.hp........s....ph..............t.h..htt+.tht.phpthc......................................................................................................................................................................... 0 147 240 346 +13078 PF13236 CLU Clustered mitochondria Fey P, Coggill P pcc [1] Domain The CLU domain (CLUstered mitochondria) is a eukaryotic domain found in proteins from fungi, protozoa, plants to humans. It is required for correct functioning of the mitochondria and mitochondrial transport [1,2] although the exact function of the domain is unknown [4]. In Dictyostelium the full-length protein is required for a very late step in fission of the outer mitochondrial membrane [2] suggesting that mitochondria are transported along microtubules, as in mammalian cells, rather than along actin filaments, as in budding yeast [1]. Disruption of the protein-impaired cytokinesis and caused mitochondria to cluster at the cell centre [1]. It is likely that CLU functions in a novel pathway that positions mitochondria within the cell based on their physiological state. Disruption of the CLU pathway may enhance oxidative damage, alter gene expression, cause mitochondria to cluster at microtubule plus ends, and lead eventually to mitochondrial failure [3]. 21.40 21.40 21.60 21.50 21.30 20.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.51 0.70 -4.92 42 412 2010-08-23 12:34:58 2010-08-23 13:34:58 1 64 248 0 310 412 5 207.90 35 16.22 CHANGED DWN-EaQuh+ElPppolp-RlhR-RhltKlhp-FspsAsc..sAhsllpGplhPlNPp-spp........sphalhNNIFaShu.s-shspapphGG........DcA..AptAuspDLpulphlsph.....D.lsslppLsTsllDYtGpRllAQohlPGlhp....................p...slhYGus-.....................supplhscppFtphl.cphuchh+lKpHpV...h.....t.....hplhsSh-sKGlhGsDuR...pYlLDLhRshPhDlsah .......................DWNEEhQss....+E....lP.pp.....slp..-R............lhR.-..R.......h.l.K..lhsDFssAAs+..GAhtllc.sp......lhs.lNPp-..pp......................hphalaNN.....IFFShu..hDs.h..spapphGG......................................Dp.A.....AhsAsspD.LpGl+shs.p.h........D.lpG....Las.sosllDYtGhRl.sAQ................SllP..GIhpt.........................................................ptpp.slhYGu.h..-.....................tsc...slh.s.p.......c.a.h.phh.pchuc.L+lptHtVh.................................spps..thcLhuos-sKGllG.s.DuR...pYlLDLhRhhP.Dhta...................................................................... 0 124 189 271 +13079 PF13237 Fer4_10 4Fe-4S dicluster domain Coggill P pcc Jackhammer:O26799 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 25.50 24.60 25.50 24.60 25.40 24.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.32 0.72 -4.16 189 7356 2012-10-03 08:56:43 2010-08-23 16:55:13 1 391 3234 1 2292 29622 8087 55.40 30 19.67 CHANGED thhhs.pt.C.....h.tCt.....tChp.....s.C......P...................................h.httthttthth.ss.p.......................Cht.........................C......s.......tChps.CP ......................hh...hs.cp..C........l...sCs.......hChp..s..C............Ps............................................................................................tshp.t.t.p.p.h.h....pl....ss...s.p.........................................................C.h.p................................................C.....................G...............pChps..CP............................................................... 0 833 1631 2006 +13080 PF13238 AAA_18 AAA domain Bateman A agb Jackhmmer:O27656 Domain \N 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.82 0.71 -3.83 124 2477 2012-10-05 12:31:09 2010-08-24 11:57:14 1 35 1523 30 881 16173 6513 139.40 17 66.84 CHANGED IhlsGssGsGKoTluctL.....tct......h.......................thhltchhhptshhhthsph..............tpttthshptht.hhpthtpt.........................tp.h...ll-shhsthh..tphh.hh...........................lhLpss..chhhcRlpp..Rshpt.p.............cptpschhp ..................................IhltGssGsGKo....Tls...ppL...............pcph.............................................................................thhh.h..p.........h...h..p..t...s...h.h..t...t.h.ppt.....................................................t.p.t......t.h..s.t.s...t...h..........h.h.p.thtp...t.......................................................................................................tt.t..t...l..........l....l..-....s....h..h...s...h..h.........t.p.h.h.hh.........................................................................lhLps..s...c..hh.h...cR.hps.....Rs.tp.p.t.p.......................t.................................................................................................................................................................................................. 0 254 472 688 +13081 PF13239 2TM 2TM domain Bateman A, Galperin M agb Jackhmmer:O27758 Family This short region contains two transmembrane alpha helices that are found associated with a wide range of other domains. This domain may be involved in cell lysis or peptidoglycan turnover. 24.80 22.90 24.80 22.90 24.50 22.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.96 0.72 -3.92 128 506 2010-08-24 13:02:44 2010-08-24 14:02:44 1 11 339 0 207 516 126 80.20 23 53.61 CHANGED phppApc+l...cch+tFat.HLhsYllVssh..LhhlNh.hs.s.sh.................Ws...........l..a....sh...lhWGlGLhhH..uh.p...s.a......hhhup..pWcc+clp.chhp+ .......................pApc+l...pph...psFhh.Hlhsalllssh....Lhhlsh....hs...sssh.h........................Wh.............l.....a......sh.hsWGluLlhH.ulp...s..a.........hhst....pWppp.php.p.h......................... 0 49 140 187 +13082 PF13240 zinc_ribbon_2 zinc-ribbon domain Coggill P pcc Jackhammer:O26621 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR. Pfam:PF12773. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.62 0.72 -4.35 442 1062 2012-10-03 10:42:43 2010-08-24 14:40:20 1 99 704 0 242 1992 209 23.00 39 6.97 CHANGED hCspCGp.pl.s.c.s.spF......CspC..Gsp...l ........hCscCGp.pl.p.-.s.spF.........CspC..Gpt.......... 0 98 197 231 +13083 PF13241 NAD_binding_7 Hydrolase_like; Putative NAD(P)-binding Coggill P pcc Jackhammer:O27094 Domain This domain is found in fungi, plants, archaea and bacteria. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.27 0.72 -3.77 422 3188 2012-10-10 17:06:42 2010-08-24 15:53:40 1 33 2756 11 874 5098 3558 109.30 30 31.68 CHANGED lhl.clps..+cllllGGGpVAt..c+lpsLlp.ts..A.c.l..sV........l.u...Pp.............htc.......................................ph.lp..............h........t........p.........+p.................................................a...........c.sD..................................l.p.....s..........sh..........lVl...uAos-..splNcp..ltp....ts..+.....ph..LsN.ssD........ss...ppss...........hhhPu .................................lhhpLps+plLl.VGG..G..c.V..A...t....RKsphL..lc..sG...A.c......l..pV..........l.u..sp..............hp...................................................sh.lp...............h...........h........p.........c.t....................................................................a.........c.sp.....................................................L.c.s...............sh........Lll...A.AT...s...-....s..t.....l.....N.p.p....V.tp........sA.....ct......cpl.....h..sN.llD.....ss...ptssFhhPu................................................................................................................................... 0 272 556 743 +13084 PF13242 Hydrolase_like HAD-hyrolase-like Coggill P pcc Jackhammer: Domain \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.81 0.72 -4.27 437 8522 2012-10-03 04:19:28 2010-08-24 17:11:02 1 48 4225 45 2343 23735 6396 74.00 26 28.24 CHANGED s.GKP...s.shhhpt......Ahtth..............................shp..............................pp..........shh.lGDp..sDlhuuppsG..h.ps...lLVho.Gh...pst............pthtt.........h..pssh.l..hss.l...s-h ......................................................................................h.htKP..p..sh.h..hpp........Ahpph.................................................s.h.c................................................hpp............shh.V.GDphhoDl.t.u.ut......p......s.......G.............h....p.o.........l.L....V.h..o....Gh.....tp.......................tth........................ssh..h...h.sl.................................................... 0 726 1394 1948 +13085 PF13243 Prenyltrans_1 Prenyltransferase-like Coggill P pcc Jackhammer:O27751 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.50 0.72 -4.04 26 810 2012-10-03 02:33:51 2010-08-25 13:25:01 1 65 544 6 312 4104 349 105.50 17 18.46 CHANGED lpshlphllspQpp-GuW....s..hs......hs......s........s.hssssthh.tshttt.....t.......ss...sstpul.c+us-alhc..pQps..-Guat...tp...stp...............sh....sp.ssh.hs.sh.hht.thht.........scpp.lc...+ulcalhpp....p.sc ..........................................................................h...hhth.lh..ptQ.....p......-....G.u..W.............h...hp..............hs...............s......................................s...h..s..o...s..h...sh....u.Lt.th.............s.....................ss......ssp.s..t.l..p.....+..u..s.p....aLhs...........p.Q..p.t...........D.........G..u...a.u.........ts.........stt.................................................................h....th....h.....h.....h...........................................................tt......................................................................................................................... 0 121 211 269 +13086 PF13244 DUF4040 Domain of unknown function (DUF4040) Coggill P pcc Jackhmmer:O27316 Family \N 30.00 30.00 30.30 30.30 29.90 29.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.25 0.72 -3.99 434 1807 2010-08-25 15:48:25 2010-08-25 16:48:25 1 9 1353 0 544 1494 224 71.00 34 10.23 CHANGED hhlllluulss.lh..t.+s+lsAllhhGlhGhslulhFhhhuAPDlALTQhsVEs.loslLhllslppl..............sphtp ...........h.llhlssu.l..h..h.lh..t..+pRLsullhhGshGhslulhFlhhpAPDLALTQh.lVEs.loslLhlL..shp+LPp................... 0 159 338 455 +13087 PF13245 AAA_19 Part of AAA domain Coggill P pcc Jackhammer:O26587 Domain \N 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.74 0.72 -4.22 209 2781 2012-10-05 12:31:09 2010-08-25 16:49:56 1 100 1622 0 906 20876 4566 80.60 23 8.66 CHANGED cAlt.uht......ssl...hllsGGPGTGKT.sslptlstlh........................t...sttlLll..uPoscAsc.pl.tpth...........................h..upolcplh .......................................t.....t.t......psh.......hll.pGsP.GTGKT.s....s.h....l.p..t.lAhLl.............................................................................tt.......s.p..p..l...Ll..l.....u...s...p.s.h.hc...hl.pphL.........................................t................t............................................................................ 0 336 668 820 +13088 PF13246 Hydrolase_like2 Putative hydrolase of sodium-potassium ATPase alpha subunit Coggill P pcc Jackhammer:O26582 Family This is a putative hydrolase of the sodium-potassium ATPase alpha subunit. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.28 0.72 -4.03 225 1746 2012-10-03 04:19:28 2010-08-25 17:23:33 1 71 1074 0 530 10781 261 97.10 34 14.14 CHANGED sLCNpup.....h..p...tt..............................................hhG-.soEsALlph....scph..hts..............................................hpt.....h+...pphp+ltp..lPFsSspKhh..ssl.......................t..............................................sst.......hh.hhKGAPEplLc+...Co.pl .............................................................................................................................................hLCNcup..h..sptpts.......c..............................................hsG-.soE.uA..L.lcs.....s.c.hh.....hts..........................................................................................................................shs..........h+.......pc..........c...+...l...sp.......lP.FsS...s.p.Kh.....ssl...........................t..cs..............................................ssss....t..hllhhK...GAP.EpI..L-R..Coph............................ 0 204 318 445 +13089 PF13247 Fer4_11 4Fe-4S dicluster domain Coggill P pcc Jackhmmer:O26500 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.29 0.72 -11.02 0.72 -4.11 103 12420 2012-10-03 08:56:43 2010-08-25 17:37:53 1 136 2648 60 1943 6770 905 93.70 38 31.84 CHANGED shhhYLPRlCcHCLNPsCluuCPuu....AlYKRpE...........DGlV.LlDpcpCR....GaRhClouCPY+psaaNapoG..+uEKCs.hCaPRlE....sG.s....ssCscoCsu+hRYhGsh ..........................................h...hhshtC.pH...C..p......c..s...sClssCPsu................Ah.h+cp....................s.GlV..h.ls..p-p.Cl.....GCc.hChhACPas........s.....h...............p.........h................s...............p....ss.....................ps.p.K.Cs..hC......h......t......R.ht...................G............................PsClc.sCs.s...p...AlhhG..h............................................ 0 603 1190 1602 +13090 PF13248 zf-ribbon_3 zinc-ribbon domain Coggill P pcc Jackhmmer:O26570 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR. Pfam:PF12773. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.29 0.72 -4.63 487 1966 2012-10-03 10:42:43 2010-08-26 11:58:27 1 152 1261 0 583 3186 312 25.60 36 8.81 CHANGED t.hhCspCGp.p.h...sss.......spFCspCGspl .........hhCPpCGp.p.h......pss...................spFCspCGppl..... 0 221 418 509 +13091 PF13249 Prenyltrans_2 Prenyltransferase-like Coggill P pcc Jackhmmer:O26856 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -11.04 0.71 -3.67 105 3084 2012-10-03 02:33:51 2010-08-26 14:46:15 1 123 1058 183 1552 4407 373 121.60 19 29.92 CHANGED lchlhs..t.Qp..sDG...u..as..................h.................tp................tsssshTu.hAlh...uL....t.shspps.......s...............hcch................................hpal.p.p.ppp...sGu..ast..........sst............s..tss....hps.shh..sl.t...sh.......phh..spp......................phhpp....s.....lp.........alhs.hQ....s.......s....s.G.Gaph ...........................................................................thlht.h.Qp....t.s.....G....u...au..............................................s..................tp.........................spsssTs...ts..lt....sL.......s..hhuppp......s....................tpph............................................................hpaL..hs..hQ..........p....s.D....Gu..ahs..................p...........................................t..s.s..t...h.h.s...oh....h......ul..s..ul............thh.s.h.tt........................hpc........u.........hp..........al.hs..hQ......p............c.G.Gat.t.............................................................................................. 0 523 955 1309 +13092 PF13250 DUF4041 Domain of unknown function (DUF4041) Aldam G, Mistry J gba Pfam-B_2162 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and viruses, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF10544. 21.00 21.00 21.30 22.50 20.40 20.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.39 0.72 -4.41 57 210 2010-08-26 13:56:30 2010-08-26 14:56:30 1 3 206 0 39 173 8 55.90 33 12.28 CHANGED KhhlRuFNuEsDsslsKVphsNlsphccRIp+uFcplNKlspt..slpIstpYLpLK ....KhhlRuFNuEs-shlsKVshpNlpshpp+ItKua-tlNKlhcss.slcIsppaLclK...... 0 15 26 32 +13093 PF13251 DUF4042 Domain of unknown function (DUF4042) Aldam G, Mistry J gba Pfam-B_2172 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 180 amino acids in length. 21.60 21.60 21.60 22.20 21.30 20.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.07 0.71 -4.82 19 156 2010-08-26 14:01:37 2010-08-26 15:01:37 1 11 113 0 95 151 1 167.50 31 16.22 CHANGED KVRluALthlpsls+sh-++shaGYWpslhP-s.......ttsttpsoLhshlLpDPss+sRssAlpsluthLpGS+.aLsQAsptc..ssptuFTsFSsoLAshlhplHcsLhhhLppEsssshLsQllKCLulLlpsTPYpRLphGllschlppl+.hlcc...pDsslpVuuLhshshLlu.s.t.hsEh .......................+lR.uAlhsh..shhcp.c.p+.sl..auYWsshlP-s.............................hs.pstss...........oLhT.hhLp.DP.ssKsRssAhpsLuslL-G..u..+paL.......sh........Ap-sp....................spptuFTsh..Ssplus.lhpLHcsLlhuL............tEss...st..........sLs.pllKsLu.....sLlpssPYpRL.....p.............sLlsplh...ppl+shlpc.........pDsslpsssl.hhthlhus...h................................................... 0 38 54 75 +13094 PF13252 DUF4043 Protein of unknown function (DUF4043) Aldam G, Mistry J gba Pfam-B_2174 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 369 and 424 amino acids in length. There is a single completely conserved residue G that may be functionally important. 21.10 21.10 23.50 22.80 19.30 18.90 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.73 0.70 -5.46 33 188 2010-08-26 14:03:29 2010-08-26 15:03:29 1 2 154 0 29 134 71 315.30 39 84.63 CHANGED supsusI.chs-LpK.ssGDp.lsasLhspLsucs.shGspplEGptEsLphhspplpIsphR+sVcss..GpMspQRohaslRctA+stLssahschhDphhhlpLuGsp.s...s......s.ttpsphsthtsNslpAPossRhhhuusssstt.............slsu..sDhholchl-phsthuct......s.thhpsh..hs.sucsh..YVhhlsPtQhssL+sssshtp...W.phtpsshsuspu.psPlFp.GshGhasslll+cht.hshphssuss...............................usssslsRAlLLGuQAlshAaGp...........tuuttatasE.......EphDasschtlusstlhGhKKsRFss..st.....pDaGllslDT ........................................t.sssuPlsRhsDLsK.puGDc.lsFslhppLotcP.shGDpclEG..+GEsLpass.sL+INQuRHhVcuG....G+MopQRohasLtppARshLssahschtDpshllHLAGAR.shhss..phhlshstcscapchhhNsVhsPTpcR+hhuussoshp.............plcu..sDlFolsllDshshhl-phshs....slc.h.sst.....hh..sp-sh....YVlhloPtQhschhososs+p...W.phhstssstu+u.ppPLFc.GpsuMapslll+KhsthPIRFhpusp..hstsp.st........................stphsussslcRAhLLGAQAls.AaGp..................cuGtpFphsE.......cphDhsN+sElulshIpGlKKhRFspcsGph......pDaGVIslDT........................... 0 5 12 21 +13095 PF13253 DUF4044 Protein of unknown function (DUF4044) Aldam G, Mistry J gba Pfam-B_2177 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 56 amino acids in length. There is a single completely conserved residue M that may be functionally important. 20.30 20.30 21.00 21.00 19.60 19.20 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.48 0.72 -4.80 32 476 2010-08-26 14:04:45 2010-08-26 15:04:45 1 1 473 0 39 139 0 34.90 49 77.51 CHANGED tp++KKosFpKlThlhVhlMlllTluullhuAluu .....NGpRKKThFEKlTlhlVllMLlsolhGlhAoAluu.... 1 5 18 26 +13096 PF13254 DUF4045 Domain of unknown function (DUF4045) Aldam G, Mistry J gba Pfam-B_2180 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is typically between 384 and 430 amino acids in length. 21.50 21.50 22.00 21.50 20.90 19.60 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.75 0.70 -5.19 12 108 2010-08-26 14:45:42 2010-08-26 15:45:42 1 8 72 0 88 107 0 241.30 29 23.98 CHANGED RSoSPTKGLGGFVQSAMMKRSDSVSKRWSAQ.........PsshsRs.sShhSsRsShtustt.ss..s...........sp...........................sRPuSSHSEATlV+psp.cs-h.so.ss.........s-shs+ssLs.+stSpSsssss.....sushs.shssosS+TMD.+RWSP..TKuoWLESALN+P-SP+pKtQ..spp..spWhK-.....RQuRuSVDLGRssShK-......htp.s.Gscspssohouhssh.spp-spsscttps.............s.ts...pt...css.ps...csp.ph.pssEtssppss....................tstthtsPs.hsssststssss.....lsu.....pDsl.s+sKPpoP.l.hDFRANLR+RElsp-posp-E.PEFKNVFGKLRKsEopNYVAPD.LK-NIL+GKAALNuTGGPKKop+VDEhKESILK ...........................................................................K..s.................t...s.....................................................................t..................................................................................s.sso+s.p.+RWSP..oK.u.oWL-sALp+.s.-.Pp..................t............t.....................................................................................................................................................................................................................................s........saRusL+.R............t.....t.t...p..s..EhpslhGpL++scsppahsPD.hKtNIhpGKssLs.osGP..s.hhD-h+-ul............................................ 0 16 41 71 +13097 PF13255 DUF4046 Protein of unknown function (DUF4046) Aldam G, Mistry J gba Pfam-B_2182 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 64 and 331 amino acids in length. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.31 0.70 -5.67 13 108 2010-08-26 14:47:10 2010-08-26 15:47:10 1 4 56 0 23 94 4 180.70 41 86.05 CHANGED IE-IYQEILDGKRpRFPsNTWp-DpcNELARRVT+YLIEslLKW-c--I+psWNopLIlKY+LpGlLpp+YsNSPY+MlNDlYPscFKEWEFpMTPLNFWTKEKALEsLKWTIEEKEpLSspcLLclYupKWLccpKLuuPLphaWsGSPYsMIN-LYPsRFKEWEFpMTPNpFWTKEKALEALKWTIEEKEpLss-QLhplYsl+WLpppsLposCplaWssSPYuMIN-LYPspFKEWEFKhTPssFWTKEKALEALKWTIEEKEKLo-EQLLpVYoh+WllKp+LhTPLhRYWpGSPYA .................................................pIYpplL-GKpp+FP.shW..tpc..s.p..c.h.t+R...s...hpYLlpphLchp.....pp...I....php..hlhpY+L.hhl....pt.s..thlp-haPp...............................................................................hapsSP...athlssLYP.t.+apc..h.................................................................................................................................................................................................................... 0 3 21 23 +13098 PF13256 DUF4047 Domain of unknown function (DUF4047) Aldam G, Mistry J gba Pfam-B_2183 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. There are two conserved sequence motifs: TEA and FPKT. 22.00 22.00 30.30 30.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.75 0.71 -4.17 5 74 2010-08-26 14:53:30 2010-08-26 15:53:30 1 1 73 0 3 62 0 125.00 72 47.63 CHANGED VVTYTEAAFVsETKVQuoISTAIVFPKTIDTLscpAcQHEclIL+sYEsMKpElcs-.SlElLEQplssW+pQREKVssEREALQ+IYTEIEsYYsQlpEsl+sccS-SsKcVLpYVNAGF...ppVK- .hMTYTEAAFIHETKVt.A.TISTA.IFPKTVDpLhEQAcQHKclILHEYcpMKuKLpsp.SspEIEQAlslW+QGREKIsAEREuLQ+VYppIEpPYNQlQEELKs.NpoESsKQVhsYVNtGF+hVKE....................... 0 0 1 1 +13099 PF13257 DUF4048 Domain of unknown function (DUF4048) Aldam G, Mistry J gba Pfam-B_2186 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is typically between 228 and 257 amino acids in length. 25.00 25.00 29.60 29.60 20.70 19.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.12 0.70 -4.74 14 71 2010-08-26 15:06:14 2010-08-26 16:06:14 1 2 69 0 56 72 0 205.90 30 36.84 CHANGED HTRTLSLLSPsssss.........ss.sssssssssRsoRpPRSATLP.SV-Rosssps.t......uspso--.hspWRRohPP....PopEALMRTG+QMASDLREGLWTFLEDIRQATVGEEGINATcSRs.......ss...+pssspusSRu..............tsGpsupSsoupSupupssssup.......psop...Sss.-sSFWSEFGIDossQp............tsppspsossssp.......pppppsp.....pss.LDl...D.DNWDsWDTPQP.pKoHTPSSSpS ..................................................HhRsLSLLSs....s..................tps...t....Ptssp.P.Sh-ct....................p.hs.WptshP.....spt-tlhpsG.+QhApDh+pGLWTFlEDIRQATVG-EuI....N..u..s..p..sRs.tt..............tpssspuss+s......................ttsssstspoupu.sp.p.s...stt.............sppsp.......stt.p..sF.Wp-hGhss......................sptstss..st..............................t.pt..................-....................-.ps..Wp.WD.oP............................................................................... 0 8 24 43 +13100 PF13258 DUF4049 Domain of unknown function (DUF4049) Aldam G, Mistry J gba Pfam-B_2191 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 310 and 324 amino acids in length. 25.00 25.00 40.40 40.00 20.60 19.00 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.06 0.70 -5.46 3 501 2010-08-26 15:31:45 2010-08-26 16:31:45 1 1 233 0 2 271 0 263.30 60 58.68 CHANGED pNsNTAYFGDTDGRVGAVLYALLVSGHIGIRuEGWSLLCQLLKHEDMASsAYc+K........NlKsLaoLLNTRDMILNELHQHVFLKcDAITPCIFLGDHTGDRFSTIFGDKYILTLLNSMRNMEGNKDSRINKNVVVLAGNHEINFNGNYoARLANHKLSuGDTYDLIKTLDVCNYDSEpKVLTSHHGIIRDEE+KCYCLGALQVPFNQMKNPlDPEELANIFNKKHKpHMDD+LFHLIRSNol+STPVYsNYFsNTTDFRPKhEcIFtCGQTLKt......I+QKYGHaGsGVDcsQphDNslMGLNShKpA+scRschhhsSGLSCF .......psspssYFGDTDGpVGAVLYALhsoGHlGIhtcG.shLspLLphED.s.sshh+c........NsphlhslLNpRDhlLppLp.alhlpcDAlTPChFLGDpTGDRFSsIhGDpaIlsLLpphhs........INcNVhVLAGNHEhNhNGNYhtphsphK..stDTYshIKshsVC.YDsch+lhssHHGIhhD-ppKpYhlGslpVshspMpNshDP.ELAsIhNKKH+thhss+hF+h.Ru.ohts...aspYFssoTDaRPK.EslhtCuQhLt.......IpQhhuHpG.Gscpp.t.ssslhGLNuhctthstph......ShhsC..................... 0 2 2 2 +13101 PF13259 DUF4050 Protein of unknown function (DUF4050) Aldam G, Mistry J gba Pfam-B_2193 (release 24.0) Family This family of proteins is functionally uncharacterized. This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 173 amino acids in length. There are two conserved sequence motifs: IPL and FLVD. 22.80 22.80 23.60 25.10 22.60 22.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.19 61 243 2010-08-26 16:15:28 2010-08-26 17:15:28 1 5 148 0 159 242 0 146.50 25 51.54 CHANGED hssushch-sssshspp.........................................tcpptt...pp.hpp...p......hhNpGLtlWppRRptWsGscppp................................................................................................................................................................pspthRp.......s.hsas.shYcpLlspspshs..pPIsLu-hl.........chLVsGWcp-.Ghas .......................................................................................................................................t.t...................................t......t.p.p.pp.p...tptspp..pt.......hhNpG........LhlWpppRptWsGsppp.p................................................................................................................................................................p..ppth+p.........s....hsas.sh..Y-pL.ls.........s..scs.hs..pPIsLu-Ml................chLVssWcp-Ghas............. 0 35 92 135 +13102 PF13260 DUF4051 Protein of unknown function (DUF4051) Aldam G, Mistry J gba Pfam-B_2194 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 90.10 90.00 24.70 23.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.93 0.72 -4.63 5 389 2010-08-27 08:24:20 2010-08-27 09:24:20 1 1 389 0 3 31 2 54.00 83 94.76 CHANGED MFIAWYWIVLIVLVVlGYFCHMKRYCKAFRQDRDALLEARNKLhRRssEEsStp MFIAWYWIVLIsLVVVGYFLHLKRYCRAFRQDRDALLEARNKYLNSTREETAEK.... 0 0 0 1 +13103 PF13261 DUF4052 Protein of unknown function (DUF4052) Aldam G, Mistry J gba Pfam-B_2197 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 220 amino acids in length. 25.00 25.00 61.60 61.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.49 0.70 -4.90 7 72 2010-08-27 08:59:02 2010-08-27 09:59:02 1 1 71 0 2 52 0 212.20 66 97.78 CHANGED LMKQLKLHINaHYKAILIFWhVALLIKGssoshDlKsI+luFLpDIhNNPSIAIhhFIVlSsFlIQ.DlFRLAVSFGVTRLQFFIGSlCYIlLQSAhFSFLQllhLQshhYpscshSLGupSlcQFFVQFLhYVTlAshFQssVIFppRFpWIGhulGuhFFlGLsSVhYutsGlKtLshpsotsLlsIP.FIhISIsLhllYhllSuIhIRKVSFE LMKQLKLHIpapYKAILIFWhVALLIKGshsAscLpGlKluaL.-IhNNsSIAIhhFIVsSVFlIQ.DlF.hsVSFGVTRlQaFIGuICaIlLQSAlFShLQllhLQshhYph.plshGppulpQFhlQFlFYsTlAChFQssllFppRFpWlGLhhushhhhuhsSshYutlGIKtLlFhsstsLl-IPaFIslSIsLhhlYIlhSulFIRKVSFE...... 0 0 1 1 +13104 PF13262 DUF4054 Protein of unknown function (DUF4054) Aldam G, Mistry J gba Pfam-B_2204 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 120 and 152 amino acids in length. 22.40 22.40 23.20 22.80 21.40 19.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.39 0.72 -3.79 59 275 2010-08-27 10:40:30 2010-08-27 11:40:30 1 1 248 0 36 227 20 106.50 27 80.09 CHANGED F+ttaPpFss...hP-stlphhls.A.phhlssp.........phuc.hhppuhtLhsAHhhsLsttstt........sus.ssu..hhoStssGplSlS.hssssssss.shh..hspTsYGppahpLhp ............................................F+ttaPpFss...............hscsplphhls.A.s.hlsps..........thsc.hhthhhtLhsAHhhsLtstsst......................................ssutssu...shoS..cosuplSlS.assss....s.....tss.ts..a.....aspT.YGtpahpLh........................... 0 6 22 28 +13105 PF13263 PHP_C PHP-associated Coggill P pcc Jackhmmer:O27523 Domain This is a subunit, probably the alpha, of bacterial and eukaryotic DNA polymerase III, associated with the PHP domain, Pfam:PF02811. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.61 0.72 -4.46 143 736 2012-10-03 00:45:34 2010-08-27 12:03:46 1 15 540 9 305 732 62 49.30 25 16.25 CHANGED spsNccAtphAcchslPhhuGSDAH.hspplGpuhThh.......stshp.....s...c-llpul+cGps ..................s.phApphs.lPhlsuSDAH..thp..plG..psas.h........................s....tthhthlht...................................... 0 98 213 275 +13106 PF13264 DUF4055 Domain of unknown function (DUF4055) Aldam G, Mistry J gba Pfam-B_2501 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 140 amino acids in length. 25.00 25.00 27.90 27.70 23.70 22.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.59 0.71 -4.08 30 180 2010-08-27 11:19:37 2010-08-27 12:19:37 1 2 157 0 27 167 145 136.70 29 31.58 CHANGED PLLsLAplNltHapsSADhcp.lahsupPphhlsGh..ssp..........tslpl.Gupush.LPp.sus.htalchousuls..+pshcchEspMhphGA+llppssss.cTuspupt-pssppSsLtshssslp-AlspuLcasApalG ....PLLsLAplNltHapspuDhpp.lahsupP.hhhpGl...spp........................pslsl.Gupssh.L....sp..s...us.ht.....alptsusult..+pshcchcspMhphGA+..l.lppssss..couspspt-pssppSsLtthssslp-AlspALphsAcah............................. 0 4 12 19 +13107 PF13265 DUF4056 Protein of unknown function (DUF4056) Aldam G, Mistry J gba Pfam-B_2502 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 355 and 380 amino acids in length. 21.10 21.10 22.10 71.80 20.70 19.50 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.70 0.70 -5.40 27 134 2010-08-27 13:34:41 2010-08-27 14:34:41 1 1 128 0 22 109 0 263.80 52 72.18 CHANGED hsshs.ls.sPpGLRPCCAFGYsL+sclhGlPVPFYplsNVl-s-sLGpH+YNDuhhus...susLlG..lusEpNGLlYTp+GGFIDlAHVRDTADhThYLFopIhs+LGppaplsLssELusRpIpa.ps...ssPhssp-RasLoAaLAApLAFpLAtWHEIAQWYGapSVsGFsEtlSAFSPEDLYSNhLGA+LAhslILpupshShptaspuhsphL.puLppLtAhscspT+ptFcplDGhWWsSp+RlP-KaLVL+RcYclupsRhPshsstp ..................s..ssP..GLRPCCAFGYsL+splhGlPVPFaplsNVl-lDsLGtH+YNsus.uh...ssuLlG....LScEpNGllYTcRGGFIDhA............HVRDTADhThYLFpplhspLGp.thplsLssELtsRpIpa..ps..o..sslsscERhpluA.lAAalAFpLApWHEIAQWaGhpSVsGFsEtsSAFSPEDLYSNhLGA+LAhsllLs............s.stspppaspshsphLcptLpcLtAps.pshTptphppLDGhWWsSpRRlP-KaLlL+RcYcLu.shLPs.ss.s-....... 0 2 7 16 +13108 PF13266 DUF4057 Protein of unknown function (DUF4057) Aldam G, Mistry J gba Pfam-B_2503 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 279 and 322 amino acids in length. 21.00 21.00 33.20 23.10 18.90 18.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.98 0.70 -4.95 13 114 2010-08-27 13:36:17 2010-08-27 14:36:17 1 7 22 0 65 120 0 222.50 44 87.59 CHANGED RuTPVRKPH..TSTADLLoWs..EsPPssus.....usus.sutRsHQPSDGISKVVFGGQVT-EEAESLsK..RKPCSuaKhKEMTGSGIFussucs-uuEsuuuss.ss.KTslRhYQQussuI.SQISFupEESVSPKKPTSlPEVAKQRELSGTLcoEuDsKhpKQlSsAKsKELSGHDIFAPPPEIpPRsh.ss..Rthth+tshshucsss+shppushhsssAGu.Sph.hsp-sVlKTAKKI.asQKFsELTGNsIFKGDs...sPuSAEK.sLSsAKL+EMSGSsIFADGKu...poRDYlG.GVRKPPGGESSIA ....................................................slRtsH..tsTusLL.Ws.................................ss.ssh.sh.........pPu.tslp.hhh.Gu.lopcEspsLsK......+K.CSs.KhKEhTGSGIFsttupstss-su...sss....+Ts.+.aQth.ssl.SpISFut-tslSPKKPoolsEVAKQRELSGThpo.-s..-sKhp.+.QhSpAKsKELSG.psIFuPP.-..s+s......................t.t....t.........s..ss.....hs.p-sshKT.uKKI..spK..ht..-L.oGN.........sIFK.tD....ssuoA.E.K...LSpAKL+EhoGssIFADG.Ks....sRDhhG.GhRKPPGG-SSIA.............................. 2 13 45 55 +13109 PF13267 DUF4058 Protein of unknown function (DUF4058) Aldam G, Mistry J gba Pfam-B_2520 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 244 and 264 amino acids in length. 25.00 25.00 39.30 33.10 16.40 15.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.76 0.70 -5.25 21 75 2010-08-27 13:37:48 2010-08-27 14:37:48 1 3 27 0 25 78 0 216.80 37 96.42 CHANGED MtsPFPGMsPYLEpPslWP-VHppLIssluDtLsPQLpP+YcssI-cRlYhtsspps...lllu..l..PDVsVhcpp..........ssssssuss.ssspPlsVslshs-.l+psaLElR-Vss.ppVVTsIElLSPsNKRsGcGRt.sYp+KRpclLsStTHLlEIDLLRuGp.....shPh.hsshstucYpILlSRuscRPpA-lYshsLp-PlPshslPLpssDs-shlDLpsllpplYc+AuY-htIDYp...ppPsPP.Lssc-ssWlcphL ......M.sPFPGMsPYLEpsthWs-VHptLIsths-hLtspltPpYhstlp.hlh.tt..pts.......hhht..lPDlsVhppp.................ssssssss.s.stP.hslth.h...s..h+pthlElRplts.tplVTsIElLSPsNK+sG.spt.tYppKRpplhtS..sHLlEIDLLRtGp.................h...ss...ssYhlllSRupcRPt...s-las..hsLppslPshslPLp..Ds-shlsLptllpplYccutYch.tIDYp...p..s.Ps.Ls.p-.tsWlpth....................... 0 7 23 25 +13110 PF13268 DUF4059 Protein of unknown function (DUF4059) Aldam G, Mistry J gba Pfam-B_2521 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved DKT sequence motif. 25.00 25.00 43.60 43.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.41 0.72 -3.79 11 344 2010-08-27 15:00:50 2010-08-27 16:00:50 1 1 341 0 20 91 0 69.40 64 96.52 CHANGED MLlpIFuLYlpGLlLuslhllllullWlhaRAhp+hDKTu+ERQuaLYDhLMIuIhTIPILSFAhMuILLVl ..MLlplFSLYhcuLILToILVLIhLGIWIGLRAhSGVDKTA+sRQAHLYDMIMIGVLllPVLSFAVMSLlLVF. 0 1 3 11 +13111 PF13269 DUF4060 Protein of unknown function (DUF4060) Aldam G, Mistry J gba Pfam-B_2524 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There are two conserved sequence motifs: VEVV and SYVAT. 20.10 20.10 22.70 25.00 20.00 19.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.38 0.72 -4.05 7 128 2010-08-27 15:01:51 2010-08-27 16:01:51 1 1 118 0 14 64 0 72.90 57 90.07 CHANGED hI.RucpsPlthhAscsALstHhptYG-.GRQth.ssYolthcus+lsVEVVsR+pSYVATsMsGsR+LppLPG .hI.+.uc.ps..thhsscsAlstHpcpaG-.uRQtasosYpVhhcss+VsVEVVsRppSYVATsMIGsRpLppLsu...... 1 0 3 9 +13112 PF13270 DUF4061 Domain of unknown function (DUF4061) Aldam G, Mistry J gba Pfam-B_2526 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 90 amino acids in length. There is a conserved AFG sequence motif. 21.90 21.90 21.90 22.90 21.80 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.92 0.72 -3.89 14 156 2010-08-27 15:03:06 2010-08-27 16:03:06 1 3 79 0 89 134 0 85.20 58 39.73 CHANGED VsDV+cME+uLLsLLsDFHSGKLpAFGpsCohEQMp+lRE.QEpLA+LHF-Lsspt-chs-c...........ttpppspuppNMc+LlppLEpLS.SIpK ..........VoDVpEMEpGLLsLLNDFHSGKLQAF.G............p-C..S..hEQMEHVR-MQEKLARLHF-L.sphE-....sE-...............p++stuDpNl-pLLssLEpLs.SIpp................................ 0 20 27 54 +13113 PF13271 DUF4062 Domain of unknown function (DUF4062) Aldam G, Mistry J gba Pfam-B_2536 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 80 amino acids in length. There is a conserved SST sequence motif. 21.90 21.90 21.90 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.65 0.72 -3.96 28 368 2010-08-27 15:04:52 2010-08-27 16:04:52 1 88 282 0 170 346 35 85.70 28 9.79 CHANGED plFlSSTapDLp-ERpslhpsl.hchsahPhGMEh..F...sAu.DccphchhpchI.DcsDhYlLIlGsRYGShs....csu...hSaTctEY-aA ................................plFlSSTap..D.hptE......R...p........tlhc........s...l........hp..........h......s...........h.hs.h..th.Eh.....h.................ssp..sppp...hc.ls...h....cpl.cpsD......ha.....lh.llGsRYG.hs.......t.ps...................hSh.sp.Ea..A................................ 0 73 110 140 +13114 PF13272 DUF4063 Protein of unknown function (DUF4063) Aldam G, Mistry J gba Pfam-B_3026 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 95 and 123 amino acids in length. There is a conserved RRA sequence motif. 22.10 22.10 34.40 31.50 19.10 21.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.47 0.72 -4.24 16 129 2010-08-27 15:06:18 2010-08-27 16:06:18 1 1 114 0 24 92 1 88.90 52 81.40 CHANGED sRLssallsullLLhslullSPQQ...LPVllYKLSLlsLAAlsGYWLDRsLFPYARPuuYL....p......ctssh.hupu-aPls...pGYphlFsAAhLRRAlIVu.ushLuVuLG ..........PRLouWLlsollLhulIu.hsSPtQ...lPVVlYKLoLloLuAVLGYWLDRSLFP.a.A.RPsuah....h............................................................................... 0 6 15 20 +13115 PF13273 DUF4064 Protein of unknown function (DUF4064) Coggill P pcc Jackhmmer:O26620 Family \N 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.74 0.72 -3.90 50 1109 2012-10-02 01:14:40 2010-08-27 16:08:03 1 4 408 0 62 435 1 101.60 24 57.04 CHANGED KRTsEhlLulIGs...Ilu....ll...huhhhlh...l..u.h.h......u.s...........tp....h..........................................hhhshhullhsIl...uI......luull..lp+....cs..p...luGllhlluullslls .....................RhsEhlLuh...Iuh...llt....ll...hslhshh...h.h.hl......us................sshtpphttphs..t..................sp.hhshhpsh.h.hslhlhh.l.l.shll....uh......luhl.t...hpt....pp.+..luGlLhlIuuIlsh..s................ 0 17 32 58 +13116 PF13274 DUF4065 Protein of unknown function (DUF4065) Bateman A agb Jackhmmer:B5ZAK8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and viruses. Proteins in this family are typically between 155 and 202 amino acids in length. 25.70 25.70 25.80 25.70 25.50 24.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.83 0.72 -3.35 264 905 2010-08-27 15:57:42 2010-08-27 16:57:42 1 8 726 0 195 777 25 106.70 22 56.07 CHANGED LpKLlY.a.upsht....lth..s....p............s.......lh.s..-ph.pAapaGPV.......h.....p...lYpph+.........tht.....................................................lspppt..c....ll-...pl....hppau....ph...su.hpLpch.oH.....................t.ts.W.........pps ...................LpKLlYaupshtlth.hs...c...................s........Lh.s...pph.pAWpaGPVhsp..........lapth+.........tht......................................................................p.pplspppt.....c.........lls...pl....hppau.......ph..............ss.hpLtch.oH.tp.ts.W...h............................................................................................ 0 85 142 174 +13117 PF13275 S4_2 S4 domain Bateman A agb Jackhmmer:B5ZAQ9 Domain The S4 domain is a small domain consisting of 60-65 amino acid residues that was detected in the bacterial ribosomal protein S4. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.95 0.72 -4.38 202 2674 2012-10-01 23:15:27 2010-08-27 17:10:54 1 5 2633 1 435 1579 178 65.30 38 73.24 CHANGED lpl.ps.-aIpLsQlLKhssllsoGGpAKhhlt-s..tVtVNG-sEsRRG+KlhsGDhVph......s..spphplh ......................l.p.paIpLsphLKhtGlscSGGpAKhhls-s..pVhVNGp..lEoR..RG+KlhsG.DhVph...s.stphpl.............. 1 129 267 362 +13118 PF13276 HTH_21 HTH-like domain Bateman A agb Jackhmmer:B5ZBQ5 Domain This domain contains a predicted helix-turn-helix suggesting a DNA-binding function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.02 0.72 -3.96 515 11498 2012-10-04 14:01:12 2010-08-27 17:18:47 1 58 2749 0 1982 9551 1161 59.50 26 23.43 CHANGED p....h.c.p.t..h.h.pt.............Ipplap...pp...pt...........pY...GhR+l.pt.tL.c.....c................p....h..t...h........ls+++VtRLM...+.p.hGL....pu......h.h...+..++t ...............................ttp......l.p.pt......ltplap....cp...pt...........sY...GhR.+lt......t..tL.p......c................p..........t...h.......ls++.+VtRLM...+.phuL....ps.....p..h..+.tc..................................... 0 514 1189 1570 +13119 PF13277 YmdB YmdB-like protein Bateman A agb Jackhmmer:B5ZC04 Domain This family of putative phosphoesterases contains the B. subtilis protein YmdB Swiss:O31775. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.62 0.70 -5.36 140 1315 2012-10-02 19:15:56 2010-08-27 17:30:23 1 3 1256 8 347 881 1521 250.70 47 94.97 CHANGED LFlGDlVG+sGRpslpcpLPpL+cch.plDhllsNGENA.A.uGhGlTtchsccLhssGlDslThGNHsWDp+Elhsal-p.ps.+........llRPhNaPp....ssPGcGht......lhcs..sGp+ltVlNlhGR.lFM......pshs....sPFpsh-pllp...ph.......ttp..sshl......lVDhHAEATSEKhAhGaalDGRlS......sVlGTHTHV.TADppILPsGTAYlTDsGMsGsa.cSVIGhcp-tslc+FlothPp.R...FpsApG..ps..pLsGlhl-hD-pTGcAtpIphlp .................................................LFlGDlVGcsGRcslpphLPpL+pca.c.shsIlNGENA......A......s.GpG...lT.....ccIhcpll.csG.lDslThGNHsWDp..+.E.lh-.FI-c.t.p.+........llR.......PANaPc......ssP....Gp..Ght........hlps.....Ns....tcl..uVlNL.GR....sFM.........s.sl-..sPFc.ps-pllpph..........pcpsshI......hVDFHAEsTSEK...A.........hGaaLD.....GRsS......AVVGTHTH.VtTADpRIL.....P................pGTAYlTDlGMTGs.Y.DullGhc+-tllc+FlTslPp...+...hps.s.p.G...c..s..hLsGVll-lDc.p..oG+ApcIcpI................................................ 0 143 256 301 +13120 PF13278 DUF4066 Putative amidotransferase Bateman A agb Jackhmmer:B5ZBC5 Domain This domain contains similarities to other amidotransferase families such as Pfam:PF00117. Some members of the family lack the likely catalytic residues. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.82 0.71 -4.98 51 5657 2012-10-03 00:28:14 2010-08-27 17:42:27 1 16 1638 20 2096 9111 779 160.80 25 54.62 CHANGED Lsa-.GFs-lDhhhshslLspsp........t.t...aplplsusssp.Vp..........Sh.sGlsl.tsptsLpp...sspsD.sVL....lsuGhp.....pthssc.stlhspL.phc................stt...phluutCoGshlLAchGLLsshsAsTchpspshlppttspVh...sp.asssG.........slsTAG.Gsluuh.Luhallt ......................................................................................hhsshphhshsssh-.sh.thss..................................t....ha...p.h...p...l..h...u......s......s.....s......s...l.p.........................................s.s....s.....G.....l....p.....l......s..s....t.s..h..ss................hs..p.....s...D....h.....ll.................V..s.G.uhs....................t.t.h..ts...t....tl....l.....s.....h....l...p.p.tt......................................tpu.......thlsulC.o.G.u..a.l.L.At.A.G.L.L..c...G...+..c.A..T.....T.......H.......W............t..........h............h.......s.......t.......h...t..p...t......a........P....p....l....p....h...t...............s...t.....h...a...l....p...D.u............................................................s.lhTu.u...Gss...A..ulDhsLtll.................................................... 0 461 1105 1633 +13121 PF13279 4HBT_2 Thioesterase-like superfamily Bateman A agb Jackhmmer:Q7P2C3 Domain This family contains a wide variety of enzymes, principally thioesterases. These enzymes are part of the Hotdog fold superfamily [1]. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -11.44 0.71 -3.66 179 4845 2012-10-02 20:54:35 2010-08-31 15:04:51 1 19 2807 37 1526 6021 3343 126.80 18 76.66 CHANGED Vt.s-.hDh................................t...Hh....s..sspYl......................t.h....hpt.up..............phht...............hsh.....thhttt...............th..................uhhhsps....plpah..p........................plp.hs-p.h.........lp..............hpl..hs........hsppphthhhph..............hp..............tsp...h...............Apspthhhhh.shpp.................................................................................ssshP......pt............ltptl ..............................................................t-.hD............................hhs.Hl....s....NupYh......................pa....h-p.ARh...............................................................phh.p..................phsh..........hp.hptp....................................s.h.............................shl.lsch........phpYh..p..........................lt..hs-p..l..............plp.................spl.....hp....................hssp..php.hp..hp..l............hp.................................ssph.....s.........................................up..uph.h..hlhl..-hps...p................................................................................................hs.....................h.h.......................................................................................................................... 0 432 883 1254 +13122 PF13280 WYL WYL domain Bateman A agb Jackhmmer:Q7P457 Domain This presumed domain is around 170 amino acids in length.\ \ It is found to the C-terminus of a DNA-binding helix-turn-helix domain. This domain may be involved in binding to an as yet unknown ligand that allows a transcriptional regulation response to that molecule. There are a number of proteins that contain two tandem copies of this domain such as Swiss:Q47P13. This suggests that this domain may form a dimeric arrangement. 25.80 25.80 25.90 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.14 0.71 -4.39 746 6059 2010-09-01 09:50:02 2010-09-01 10:50:02 1 38 2680 0 1655 5249 711 156.10 17 51.55 CHANGED p.h...l...p.........t.......lt....pAltppc...tlph.....pY..ps..................................pptp.....p................R.p....l..........c.........PhtL.h..h..p..p..s..p..WYL..hu....h........s..p.............................p.......c...........p.........s................h..R...........tF+..lcRl....p..s..lp....hh..s.....p..................................ph..p.......hs...sh....c.......l......p........................p..hh.p......p........h..h.....t..hht................t...hp..l......pl............c..h.........s...sps...sph........lh....p....p.......hh.......t....s.............t.p..........................lt...............p.....ps..........ss.t.h........h.....hp.h...p.....h..t.s.....p.....t.....h.h.thlhua.G..s.p..lcVlp....Ptp..L+ppltpphpp.hh ..............................................................................................t..httltpAltppp....hlph.....p.Yps.................................ttptp......p......................................R.p...lp.......PhtL.hh........t...s..s...p.....WY....L..hu..a........st............................................................p......c.............p...........p........................h..R.........sF+..l..sRI....p..s...lp....hh..s..p.............................................th.t..............t......s.........h..t.................................p..h.h.p......p...................t.....................ph..hl............h.h.........t....th......h..h........h....t.............h..................t......................................................h................t.......t..........pt.......h..........h.....hp.h........h.....t....................h..h...h...l...ht..hu...............t..htllt.....P.t....lht.hht.ht....................................................................................................... 0 652 1199 1458 +13123 PF13281 DUF4071 Domain of unknown function (DUF4071) Aldam G pcc Pfam-B_2008 (release 24.0) Family This domain is found at the N-terminus of many serine-threonine kinase-like proteins. 24.10 23.40 24.60 24.10 24.00 23.30 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.23 0.70 -5.61 14 275 2010-09-01 12:57:33 2010-09-01 13:57:33 1 13 141 0 144 258 14 309.10 37 31.02 CHANGED LhYcLGVRESFphppNIlLas-ss.p...-sh.tl+..h........ushphlPYhlsspu.......thhps.....pttcthh..s.th..sh.shsLsstlhpLlpss.phppstah+Ephhs-lRpA...R-phsu..cpLpctLpclct+LDssplLos-IlhsLhLSYRDlQDYsuMlcLV-cLpsls.h..lssss.lpapYAFALNRRNpsGDREKALpllhph.......lppccshusDhhClhGRIYKDhFhcSshpcpso.....LcpAIcWY+KuFEsp.PstYuGINhsTLLhhsGpcFpps.ELpplu..hhLssLlG+KGsLsphpDYWDVATahElulLApDhtKulpAuEpha+L+sPsWYL+SThpNIpLlcch++phpt....s.c.phhpFWh-h .....................................................LhYHLGVRESFsMtpNll.Lhppts......th.uL+..h..........usYhhlsYhlssps...............phhsspst.h.....pthsphh..s.........t........hhsL.sRhhp..lLpsh.php..S..p..t..ah+.E.phhpD.lRpA....Rphapu..........p....pLtttLtclptRlD.s..c.lLos-llh..slLLS.YRDlQDYsuhlcLV-sLphlPshc.hsp....p.pl..pFpYuFALNR...R....N..sG.DR...t+ALplhl.h............lpp.p...tp.....sssD..hhCLsGRIYKDh...Fl.cS..sh..p..Dsps..............h-pAh.pWY+...KuF-hp..s..........sh...auGINhAs..L...Ll......huG.......p.....p...Fc..............so.E.............Lp.p.lG........hpL.ssLlG......+...KG.....s..........lpph.ppYW-VuhahtsslL....As...Dhhc.slpAuEpha.KL.p.sPhW..........Y.Lc.S.....hhpslhlhp+Ftt..t......s...phhpFWhc.................................................................................... 0 29 48 89 +13124 PF13282 DUF4070 Domain of unknown function (DUF4070) Aldam G pcc Pfam-B_2005 (release 24.0) Domain This is a bacterial domain often found at the C-terminus of Radical_SAM methylases. 21.60 21.60 21.60 21.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.74 0.71 -4.66 55 231 2010-09-01 13:08:55 2010-09-01 14:08:55 1 4 185 0 116 262 53 141.90 28 27.44 CHANGED TsLWcRLc+EGRLh.......tpssssQ.sshhNFlPshPh-clsptYhcshhpLY-PcpYhcRshphhhphss...sptph..........shtsL+AlhplhW+.Glhp.cpRhpFW+hLhshlh+pPpsl.hhlslsshucHFhcap.phlpcplcppl ...................TsLacRLc+EGRLl.................pstsssp..s..sthNFlP.p.hPh-clhstYhcshtplYsP..ctahcRhhphhtphtss....t..h.t..................s.htsl....puh....hpl.....ha+......G.l....ht..ctRhtFW+hhhthlh..+...s...P..t..h.hthhlshshhscHahpat.p.shpt.t...h................................................... 0 33 79 103 +13125 PF13283 NfrA_C Bacteriophage N adsorption protein A C-term Aldam G pcc Pfam-B_2015 (release 24.0) Family The function of this domain is unknown but it is found at the C-terminus of bacteriophage N4 adsorption protein A, in association with an N-terminal region of TPR repeats. 25.00 25.00 34.10 45.50 24.90 23.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.88 0.71 -4.91 14 355 2010-09-01 13:23:58 2010-09-01 14:23:58 1 21 350 0 31 204 0 175.00 65 19.09 CHANGED sps.susTGhsTsQGulGsRaKPlpstNLllssp...RhhtlG.shutsDWLLRhuYS....ss.Gs..DL+VstPsWhohplYsEuuaalppsphh.sssEuchG+oaRlsuhss+LslhPassluusaDostssp...hAlGuGsGlshRaWFREscYsAPtSalDlolQYRhpLsss-.RucGlhhcAs .................G.ENGVMMPVKNPMSGTGLRWKPLRDQIFFLAVE................QQLPLN.GQNGASDTMLRASASFFNGGKYSD......EWHPNGS..GWFAQNLYLDAAQYIRQD.IQA.WTADYRVSWHQKVANGQ...TIEPYAHVQD.N.........G....YRDKGTQ.....................GAQLGGVGVRWNIWTGETHYDAWPHKVSLGVEYQHTFKA..IN..QRNG......ERNNA.... 0 2 9 18 +13126 PF13284 DUF4072 Domain of unknown function (DUF4072) Aldam G pcc Pfam-B_2021 (release 24.0) Family This short domain is normally found at the very N-terminus of Hyrdrolases Pfam:PF00702. 21.90 21.90 21.90 23.70 21.60 21.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.11 0.72 -3.78 23 106 2010-09-01 13:49:34 2010-09-01 14:49:34 1 2 105 0 40 85 2 47.80 41 16.54 CHANGED LllQSss.LusschcsLssLupusplpp.lsspAhRltsA..ssspRs-lc LVlQSsuPLSsuHt+sLsALucusclst.lsspAhRltsA..ssupRsDlD...... 0 5 19 30 +13127 PF13285 DUF4073 Domain of unknown function (DUF4073) Aldam G pcc Pfam-B_2039 (release 24.0) Family This family is frequently found at the C-terminus of bacterial proteins carrying the family, Metallophos Pfam:PF00149. 25.00 25.00 25.30 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.92 0.71 -4.80 8 102 2010-09-01 13:49:58 2010-09-01 14:49:58 1 6 101 0 3 91 0 136.80 62 22.17 CHANGED LNLPDWAGKKKIsG.GDcKGFTVVNTGGIETGWMSAGPNGGEKTAPDGhSFKQGLQVKAYGNDVVVTAYDYKRDKsIKKLLISDuKIAQMAPDVTADDsKNVIVGATEYMEYoVEGTNEWpTYs.usPPKFDGDKlVYVRHKGEMNLEPGLTQLLRFSs ..................LNLPDWAGKKKItG.G.Dc...KGFTVVNTGGIETGWMSAGPNGGEK.TAPDGYSFKQGLQVKAYGs.DVhVTAYDYKRDK-IKKLLISsSKIAQMAPN.VTADDoKNIIVGATEYMEYSl-GTsEWhTYs.uNPPKFDGDKhVYVRHKGEMNLEPGLTQLLRFS.s.............. 0 1 2 2 +13128 PF13286 HD_assoc Phosphohydrolase-associated domain Aldam G pcc Pfam-B_2016 (release 24.0) Domain This domain is found on bacterial and archaeal metal-dependent phosphohydrolases. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.30 0.72 -3.71 344 2449 2010-09-01 14:47:52 2010-09-01 15:47:52 1 4 2227 6 684 1967 1173 91.00 22 22.24 CHANGED lshssp.httthptLKpFlapplYcpsplpphpt+uppllpcLFph....a...hs......c...sp.hLPspaptth....ppssp.........tptRllsDYIAGMTDpaAlchapc .......................................................................................h..hs.thttththL+phhhphlhpp..p.h..th.p.h..p..s.ppllppLhch....h.....hs...........................................c...sp.hLs..t..p..h.t..thh...............................pttt...................t+hRllsDYIuGMTDpaAhc.app............................ 0 235 468 581 +13129 PF13287 Fn3_assoc Fn3 associated Coggill P pcc Jackhmmer:O26995 Repeat \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.10 0.72 -3.88 342 282 2012-10-03 16:25:20 2010-09-01 16:25:02 1 69 219 0 75 1442 337 63.70 33 7.78 CHANGED VsLo.o..............spsA.....sIYYTlDGosP....T.s........pSsh.......Y...........s.sPIhl......sp.........ss.s...............................l..KAhAhcpu....hssSslsohsa ....................pLp..o..t...stsu......pIaY.Tl.DG.S.sP.......s..p.............................pSht.........Y...........................s..ss.I.hl......sc......ss..s...................................................................................................l...KAlAhcpu.....hppSslsoh........................................................... 0 29 55 68 +13130 PF13288 DXPR_C DXP reductoisomerase C-terminal domain Bateman A agb Jackhmmer:Q7P3U3 Domain This is the C-terminal domain of the 1-deoxy-D-xylulose-5-phosphate reductoisomerase enzyme. This domain forms a left handed super-helix. 27.00 27.00 27.20 27.30 26.70 26.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.49 0.71 -4.10 236 3405 2010-09-01 15:25:30 2010-09-01 16:25:30 1 7 3273 79 826 2642 1652 119.10 39 30.61 CHANGED LGsPDMRhPItYALsaPcR..h.........s..s......ss.ptL.Dhs..pl.up.LsFc.pPD....hc+FPsLpLAh-Ahct..GG..shs.slLNAANElAVpuFLpt+IsFh-Iscllcpslpp..........hps.t..................ssL-sllpsDphARphA ............LGsPDMRhPIuaulua.PcR...l..............s...s......s.s.psL.D..hs......cl....u........s.Ls..Fp.tPD....hc+FPsLcLAh-Ahct......Gs..shs.slLNAANElAVsAFLsp.cItFh-Isplltpslcp...............hsht........................sssl--lLp.hDt.ARch....................................... 0 289 560 710 +13131 PF13289 SIR2_2 SIR2-like domain Bateman A agb Jackhmmer:Q7P447 Family This family of proteins are related to the sirtuins. 24.50 24.50 24.50 24.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.94 0.71 -4.31 164 1624 2012-10-03 09:55:27 2010-09-01 16:29:03 1 38 1214 0 458 1490 86 152.60 17 27.46 CHANGED ptllTTNYDsllEpuh.....tptstthh...........................hhspshtt.....................pssphhlhKlH.Gsl............t....s......................psl.llopsc...........Y...tphh......................psh.htphlp...shh.t...sp.....sllFl..GaS..hsDs.slppll....pplhpphss..............pt....ahlh.pst.................tttpt......hhpphslphl .............................................h..llTTNYDphl.E...psh...........pptshth..........................................h.hpsht........................tpthhtlhKlH.Gsh................................................................................................psl...ll..o..p..s.c.................................Y....tchh........p..........................................................ps...ht..phlp.........shh..p.....sp......sl.lFl..Gao...h..s..D...plp..t..lh...pphhpphtp................h.........ahhh.tt........................................................................................................................................... 0 165 285 360 +13132 PF13290 CHB_HEX_C_1 Chitobiase/beta-hexosaminidase C-terminal domain Coggill P pcc manual Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.68 0.72 -4.19 268 225 2012-10-03 16:25:20 2010-09-01 16:33:34 1 57 161 0 67 1534 350 68.50 26 8.39 CHANGED sssG.php................st..plslps....sssss.....sIhYThD.GosPoh.........t...S......hY...............t...............s..l.l......t.ss.....ss....l+shuhsss.tps..u.tlh..o ....................t.............pstlplss............sss.ss.....pIaY.ThD.G.o.pPoh..........................p.......S...........hY..................p..t..................................P....lpl..................s..ts........................ss...............lcshshp.s.sp................................................................ 0 33 48 57 +13133 PF13291 ACT_4 ACT domain Bateman A agb Jackhmmer:Q7P344 Domain ACT domains bind to amino acids and regulate associated enzyme domains. These ACT domains are found at the C-terminus of the RelA protein. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.22 0.72 -3.64 182 5583 2012-10-02 00:29:19 2010-09-01 16:38:22 1 27 3945 4 1173 4363 2094 78.80 25 11.21 CHANGED sspp.....apsslplpu.hDcpGlLs-lspslupp.psslps.lshps.........pthsphplslpVpshppLpplhppl+plpsVhpVpR ..........s..tta.splclpu.hs.R.pGlLs-lspslusp..psNlhu.lsscsp..p....sphushplslp...l..p..s....h..p....p.L..spllp+l.+plss.VhpVpR........................ 0 347 739 993 +13134 PF13292 DXP_synthase_N 1-deoxy-D-xylulose-5-phosphate synthase Mistry J jm14 Jackhmmer:Q7P481 Family This family contains 1-deoxyxylulose-5-phosphate synthase (DXP synthase), an enzyme which catalyses the thiamine pyrophosphoate-dependent acyloin condensation reaction between carbon atoms 2 and 3 of pyruvate and glyceraldehyde 3-phosphate, to yield 1-deoxy-D- xylulose-5-phosphate, a precursor in the biosynthetic pathway to isoprenoids, thiamine (vitamin B1), and pyridoxol (vitamin B6). 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.84 0.70 -5.46 331 4453 2012-10-02 16:07:47 2010-09-01 17:03:14 1 13 3512 8 1043 7165 5251 238.80 43 43.01 CHANGED L-pIssPpD.L+pLshppLtpLApElRphllcsV.Sp.....oGGHLuuNLGVVELTlALHhVFsoPc.D+llWDVGHQuYsHKlLTGR+.-papolRphsGlSGFPcRsESt.aDsFssGHoSTSISAALGhAhA......pc.l.p....G...p.........s.....ppVlAVIGDGAhTuGMAaEALNpA..Gph.p.p..sllVlLNDN-MSIu...........NVGulu.paL...sclhs...sth...Ypph+ptscphl.p.....hh.......hhphhc+h....ccshKshh...h....s......slFE-hGhpYlGPlDGHDlppLlpsLcps+......s.....h....cG.PlllHVlT .........................................................................................................................ttlpsP.t-L+.tLsh......ppL..pLu...p...ElR.p.h...l.l.p...s.V...S.p.....o.G.G..H.....huusLGs.........V.........E..L.........T.........l......A.....L.........H.........h.........V.........a..........s..........o..........P.........h........D.p..llW..D.....V.....GH....Q....u.......Y....s.....H.....K...I......L....T.....G.....R......c........c....p.....h.....t.....T......l....R......p.h.....s....G.......l....s....G...F....s....p......R.......s......E...S.......p..a.D......s......h.....s.....s.......G.H.....o...S..T.....S....I.SA......u.l.GhAh.A......tc..hp....G..p....................p.....cps.l.uV..I..GD...GA............l.TuGM.AaE.A.h.Np.s....G..t.h...c.t...........s.hl..l.l.L.ND.N...-...M..S..Iu.....................s..V..G...u...l..s.........p..a..L........u..p.l.h.......s.......s...ph...........Y.p..p..h.+...p.th..c..phh..p.................tl.......h.hphhc+h..............cc.t.hKsh..hs........s..sslFE-L..GhpY..lGPlDGH.slttLlpsLcph+...............p.........h.....p.G..PhllHlhT....................................................................................................................... 0 343 706 895 +13135 PF13293 DUF4074 Domain of unknown function (DUF4074) Aldam G pcc Pfam-B_2050 (release 24.0) Family This family is found at the C-terminal of Homeobox proteins in Metazoa. 22.30 22.30 22.60 43.30 22.00 19.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.13 0.72 -3.75 14 230 2010-09-01 16:25:41 2010-09-01 17:25:41 1 5 74 0 87 199 0 64.30 67 16.92 CHANGED PsoGPulFGLsHL....PHssossMDY.sGAssMusupH.H........GPCDP..HP.TYTDLosHH.sSQGRIQEAPKLT .............PsSGPulaGLsHL....sHssSushDY.sGAsshuuspH.H........GPCDP..HP.TYTDLouHH......sSQ...GRIQEAPKLT....... 0 3 10 35 +13136 PF13294 DUF4075 Domain of unknown function (DUF4075) Aldam G pcc Pfam-B_2028 (release 24.0) Family The members of this family are putative mature parasite-infected erythrocyte surface antigen protein from Bacillus spp. 25.00 25.00 65.40 27.80 17.80 17.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.81 0.72 -3.94 12 77 2010-09-01 16:37:33 2010-09-01 17:37:33 1 2 76 0 3 33 0 77.00 93 46.08 CHANGED KETKEIFSKKKlEstEKPETIEIQAVSPKVDELKAEEEPVVAEDGGMKEARELFMKDSNAEEKKTEAYIELKQDKEEKK ..KETKEIFSKKKVEPtEKPETIEIQAVSPKVDELKAEEEPVVAEDGGMKEARELFMKDSNsEEKKTEAYIELKQDKEEKK.... 0 0 1 1 +13137 PF13295 DUF4077 Domain of unknown function (DUF4077) Aldam G pcc Pfam-B_2043 (release 24.0) Family This is the N-terminal region of methyl-accepting chemotaxis proteins from Bacillus spp. The function is not known. 25.00 25.00 373.30 372.70 21.40 18.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.39 0.71 -4.55 4 74 2010-09-01 16:52:32 2010-09-01 17:52:32 1 1 74 0 2 55 0 175.00 95 34.95 CHANGED MEWLKRTCFSNLEKESQKNHLLLFITICSFFLGIIAIGYYGYIFTERAIAFWhCGISVVVFGTLhTFIcSMEuhYKYIMTFMLLhMSFIMVQAFNESPAVFQMVYFTLAVSLIYLSERLllILGGVAVVlTFILCSYWPEQFFAYTAuSEAANFASLLAIVTIAMWGVTKIGSNL MEWLK+TCFSNLEKESQKNHLLLFITICSFFLGIIAIGYYGYIFTERAIAFWlCGISVVVFGTLhTFIcSMEohYKYIMTFMLLhMSFIMVQAFNESPAVFQMVYFTLAVSLIYLSERLllILGGVAVVITFILCSYWPEQFFAYTAuSEAANFASLLAIVTIAMWGVTKIGSNL 0 0 1 1 +13138 PF13296 T6SS_Vgr Putative type VI secretion system Rhs element Vgr Aldam G pcc Pfam-B_2052 (release 24.0) Family This is a family of putative type VI secretion system Rhs element Vgr proteins from Proteobacteria. 22.40 22.40 22.50 24.90 22.10 21.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.20 0.72 -4.19 159 943 2010-09-02 08:37:27 2010-09-02 09:37:27 1 14 320 0 249 1034 26 106.20 38 12.58 CHANGED S+phtG.ss..hNpLhhDDssGQ.pspLuo..sa.upSpLsLGaLsct..tsptctphRGpGFELRTDuaGAlRAupGLhloocspstAt....up.LDhspshppLppAtphhcsLups.A ............o+phtuss..hNpLhhDDssGp.+hpLuo.....sa..upSQLNLGaLsct..tsp.ptp.RGcGFELRTDuaGAlRAupGlhlSocspsp....Ap....uphL....Dhstuhp.....LpputphhcuLupsA.................................... 0 24 77 152 +13139 PF13297 Telomere_Sde2_2 DUF4076; Sde2_C; Telomere stability C-terminal Aldam G pcc Pfam-B_2053 (release 24.0) Domain This short C-terminal domain is found in higher eukaryotes further downstream from the Sde2 family, Pfam:PF13019. It is found in all Sde2-related proteins except those from fission yeast, fly, and mosquito. Its exact function in telomere formation and maintenance has not yet been established. 21.80 21.80 21.90 23.80 21.20 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.83 0.72 -4.70 33 282 2010-09-02 08:39:50 2010-09-02 09:39:50 1 10 133 0 179 257 4 59.10 58 13.20 CHANGED lDLssasSscELE.sLGl-RLKptLtuhGLKCGGTLpERAsRLF.lKupsh-clD++lhAK .....lDLssFoShE.....ELt....sLGL-RLKsALhALGLKCGGTLpERApRLFSsKGpsh-plD.sLhAK....... 0 62 92 138 +13140 PF13298 LigD_N DNA polymerase Ligase (LigD) Mistry J jm14 Jackhmmer:Q05W82 Family This is the N terminal region of ATP dependant DNA ligase. 25.00 25.00 34.50 27.40 21.80 20.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.55 0.72 -3.93 134 675 2010-09-02 12:44:19 2010-09-02 13:44:19 1 11 606 8 296 710 139 106.30 50 16.03 CHANGED QcH.....cAp+..LHYDFRLEh-GV.LtSWAVPKGP...oh-PssKRLAlp.sEDHPlsYssFEGsIPpGpYGuGs...VhlWDpGsac.sh.............pstp............th..............pcG.clphpLcGc+Lc.G.casLlR ...........QcHcApR..LHYDFRLE....hDGV...LtSWAVPKGP....ShcPs..............sKR..LAVc.....sEDHPL-YusF....EGsIPp..GpYGuGs...VhlWDpGsap.s......................s.tp...........sh..................................cpG.clphpLcGc+LpG.pasLlR...................................................... 0 77 177 244 +13141 PF13299 CPSF100_C Cleavage and polyadenylation factor 2 C-terminal Aldam G pcc Pfam-B_2065 (release 24.0) Family This family lies at the C-terminus of many fungal and plant cleavage and polyadenylation specificity factor subunit 2 proteins. The exact function of the domain is not known, but is likely to function as a binding domain for the protein within the overall CPSF complex [1]. 22.30 22.30 23.40 22.50 20.80 20.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.07 0.71 -4.05 58 300 2010-09-02 13:14:05 2010-09-02 14:14:05 1 16 262 0 216 303 3 149.10 29 18.79 CHANGED V+LscsLlppL..+Wppl......cshpVutlsGpLt.............................................................................ht..t.p..pttsptptpsthchlss.....p.s..ssthss.spslalGDlRLuDL+chLts.....pG..hpAEF+.u-GsLllsst...........VuV..RKsus...............GcltlEGshs.....................ssaatV+chlY-tL ..................................................VpLpcsLlppL..papps......cshplshlsG.Lt..................................................................................t.....ht...............t.t..sppp.....shlss................h.s..ssphss..ppslal..G-lRLuDhKphLhp.....pG....h.pAEFp..tGs..L.l.s.sst...........................................VuVR+sss...............Gc.ltlEGshs..............................psaYplRchlY-th.................... 0 72 116 178 +13142 PF13300 DUF4078 Domain of unknown function (DUF4078) Wood V, Coggill P pcc Pfam-B_3305 (release 24.0) Family This family is found from fungi to humans, but its exact function is not known. 25.00 25.00 26.80 26.80 22.30 18.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.01 0.72 -3.86 34 194 2010-09-02 13:35:35 2010-09-02 14:35:35 1 4 176 0 149 186 0 87.80 36 21.56 CHANGED ppMpplAc+RD+s.T.P..phHY-ust..ElRs+GsGFYuFS+DEcpRccQM-cLpptRpc.TpccRc.pRcct+tcRcchhc-Rhcclcpc+ .......................h.phpphtcccccshp.P......sshHY-s.t....EsRs+GsGaYuFS.p.DE-pRpcQMcpLcphRc.p.TpcpRp....cR......c....p....h......+ccR....cthlcpRhtclcp++............................. 0 49 73 117 +13143 PF13301 DUF4079 Protein of unknown function (DUF4079) Mistry J jm14 Jackhmmer:Q05X76 Family This is an uncharacterised family of proteins. 25.40 25.40 26.10 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.20 0.71 -4.26 90 277 2010-09-02 14:02:26 2010-09-02 15:02:26 1 3 104 0 136 293 83 147.20 27 80.26 CHANGED ahth...........lHPllMhll.aslsshshhLGhphRppRstps.......................................................................tphtpLl...pschpt....hHaphu.hlLshhslhslGGhsssahpssp....lFhus.HhasGluhssLhhhSsulsPth..p....up.p.thRplHlsLNslslhLFhhQulTGhcllhcl ................................................hhHPhhhhhh.hshshh.shhhGhphRphR..tt...........................................................................................h..hh.......hhph+ptHhphushhls.hhslhhlGG.hssshhpssp......la.us.HhhsGhshssLhhhusulssth..psp...p.hhRt.....hHlhLNshhhhLFhhpuhoGhpll.ch........................................................... 0 30 94 126 +13144 PF13302 Acetyltransf_3 Acetyltransferase (GNAT) domain Bateman A agb Jackhmmer:Q7P3G3 Domain This domain catalyses N-acetyltransferase reactions. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.03 0.71 -4.02 406 20736 2012-10-02 22:59:21 2010-09-02 16:53:55 1 153 4229 65 4844 27191 2993 139.30 19 68.41 CHANGED RLhLRs.h.p....s...Dhsslh.phh.s..s.....sc.lh....ca.....hs.....shppspphl....t..............htt.http....s.hhh............aslt..........................t+s....s...........t.................p.....h.lGh.hul.................ht.............................tspsE.lG.ah.....ltps.hh........Gp.GausEAspshh......pas.....h.p....phs......h........................p...................pl.huhhts..pNhsStpl.hc+hGhp ......................................................................................................................................hhL+...h..p.....p.....D....h....t....t...h....h....phh...s.......s.................p...hh........pa...........h..h.................sh.....p.p....h..p..p...hl.....p...........hhtt..ppt..................hh......................a.s.lh..............................................................................................pcp...s...................t..............................................p..................h..lGh..lsl...........th....................................................tppps-...l.G....a.h........l.s...tp...at.....................Gp...G.h.u......s....-Ah.pth.l................cau......a..p......................phs.................l.................................................p..................+l..hh.ps..t.s........pNh....sS...t+....l..h.c.+.hGF........................................................ 0 1518 2985 4028 +13145 PF13303 PTS_EIIC_2 Phosphotransferase system, EIIC Bateman A agb Jackhmmer:Q7P503 Family The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The sugar-specific permease of the PTS consists of three domains (IIA, IIB and IIC). The IIC domain catalyses the transfer of a phosphoryl group from IIB to the sugar substrate. 27.00 27.00 27.00 27.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.97 0.70 -5.58 67 1977 2012-10-01 19:13:17 2010-09-02 17:03:21 1 3 895 0 191 1006 7 317.10 29 92.29 CHANGED hcsLsGhAhullsuLlsusllpplsphlhh...........s...................hlhplsslspthhusulGsslAhthphsslhshusshAuhlGussh.h.....................tth...hhuhGs.lsshlsuhlAshlsphls....thsslclllhPllshhluuhlGhh.ltPhls.hhphlGshIsssoshpPllMullluhlhulllsoP.lSSsAlulAluLsG.....................luuGAAslGssushh..shshhoh+.NshGshlAhhlGosKlQhsNll+pPh.lhlPshlsuAlsGslushh........slp..ssssuu...GhGhsGLlGPlssh.................s.us..ss.hlhhllhhhllPhlluhlhthlh.........................+phthh+ss-.hpl ................................................+sLuulAsuhls.uLlssAlLttlhth..hhth..........p..........................hLhp.lush.h..t..uhsuhhlGsLlAhthshsPlhshllusushhus.......................................htGhGcllsshlsuhlushlhphlp....pt.sul.-....lIlhPllsss.l.ushluhl.lhPhlpshhptIGshIpshTshpPllMuIlluslhullhhoP.lSosA..lshhluLoG.....................luuuu.AslGssussh..shlhtsh.+lNs.hG......sslAlslss...shhp...hsslhpp.Pl.Ihlss.hlsuslsGllsshh........slp..ssstuu..............GhGhsGhlu.lshh.....................ss.hs..lllhllshhll.Phlsualsphlh.+hhtlhppt.h................................................................................................ 0 71 123 178 +13146 PF13304 AAA_21 AAA domain Bateman A agb Jackhmmer:Q7P3F0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.96 0.70 -4.45 220 6048 2012-10-05 12:31:09 2010-09-02 17:17:35 1 63 2772 13 1685 97673 27953 227.70 14 53.08 CHANGED lsslhGsNuoGK..Ssllculthh.......................................ththh.hhhptpthptshphpht.hhtpshpaphthphpppth..................t.htthhh.ptt.t.th.th..t.ht.tt.ht..hhh.hthtt...hh.hthhh.hht.h..h........................thphhphhpphhhtpphhphhphhhptht.hththtth................................................................hhhh..ttth.h.h.t......SsG...ppph.h..slhshlhpshtt.....................sl...ll....lDEh.-ss.LHP....phh....ppll.....phh.............pptpp...............................tsQl.lhoTHs.shl.ls ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h....htth..............SsG.........ppph.h..........hlhhhlh.p...t.t.........................................................................................sl.......ll.......lDEP.Ess.LHP.....................ph.........pplh....phl................pphtp...................................ts.Q.l..llsTHu.s.l.l.............................................................................................................................................................................................................................................................................................. 0 627 1112 1447 +13147 PF13305 WHG WHG domain Bateman A agb Jackhmmer:Q7P3F5 Domain This presumed domain is around 80 amino acids in length. It is found to the C-terminus of a DNA-binding helix-turn-helix domain. This domain may be involved in binding to an as yet unknown ligand that allows a transcriptional regulation response to that molecule. The domain is named WHG after three conserved residues near the C-terminus of the domain. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.76 0.72 -3.41 239 2407 2010-09-03 08:44:01 2010-09-03 09:44:01 1 6 1383 8 710 1956 289 84.90 20 41.89 CHANGED tshutuYlpFAhppPshaclhF...........ttp.h...................ps...sphpp.h....h...........p..............hhtph...lpp.h..h..tt........hthst......pc...sp.phshs...hW..uhlHG.husL...hhs......sth .............h.thuhuYlpFAhccPshachhF............ttshs.................................................tt......tphpp.s....h..............p.................................thtph...lpp.h..h..tt....................hth..ss.........................pp...st..thsht...hWuhlHGhss.L.hhss..h............................ 0 225 496 610 +13148 PF13306 LRR_5 Leucine rich repeats (6 copies) Bateman A agb Jackhmmer:Q7P2P7 Family This family includes a number of leucine rich repeats. This family contains a large number of BSPA-like surface antigens from Trichomonas vaginalis. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.90 0.71 -12.10 0.71 -4.44 205 12012 2012-10-02 21:32:02 2010-09-03 10:37:11 1 746 915 12 6389 30082 1356 113.30 19 48.31 CHANGED spIs..stsF.hs.s.sLp.s.ls.lP..s..s..lp..p....IuptuFtsC.s.Lp..sl.p..l..Ps..s..lppIustuF.ts.C.s.L.psl.sl.....s.s..s.lppIs.pt.sFptCs..s..Lp.sls..l..s..s..s..lp...pIupt.s..F.ts.s.s.L.p.sl.s.ls.s..slp...pI..ss.ps...Fps.C..sLp ..........................................................................................................................................................................................................................................................h.p....p.....l......l..s........p.....s.........l..p...p...............I.....s.......p........t........s........F..........t.........s....s..........................s.....Lp...........p......l....p.........l......s..s........s..............l...p......p......I........s........p........t..........u.......F......t...s.....s.....................s......L.........p....p......l.......s.l.............s....s....s....l........p........p...........l..........s........p.......t..........s........F......t...t........s.s..........p................L........p..........p........l.....p.....l..........s.....p...........s..........l.p.............t..l...s.......t.................s............F.....t........t....l........t...........h.........h................................................................................................................................................................................................................... 0 5510 5860 6131 +13149 PF13307 Helicase_C_2 Helicase C-terminal domain Bateman A agb Jackhmmer:Q7P4K4 Domain This domain is the second of two tandem AAA domains found in a wide variety of helicase enzymes. 25.10 25.10 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.04 0.71 -4.30 152 5901 2012-10-05 12:31:09 2010-09-03 10:41:35 1 61 3620 5 1731 4652 739 165.30 27 22.05 CHANGED lhplh.p.t.hs..ussllhFsSaphhpplhp.t.h...pp............p..h..................th.........t.lh..h..p................s...pt..s..tppll.................ppap....p......s....................................psu.l.Lhust....ph...EGlDhs...sct..hpsVll..stlPa..........ssspsshhpt+tpahc.p.....pt.....................................................tss........hp..p.hhhspAhtplpQuhGRlIRpps...DhGsllllDpRht...p.....pp.htpt.l.....phltst ..............................................................................................h...hhphtt..sthllLFsShphhpplhc.h.h...pp..............................................p.h..........................ph..............ll.hQ.......................................s...pt..sptpll.................pp.Fp..p........s.........................................................................................psu..lL..l.uss...u.....FhEG....lDlt...G....c...t......lpt....V....lIs+lP.F...........................sssssP.....lhps+h.chhc.p.....ps.....................................................................................................................tss...........Fp.p.h.tlsp.Ahh.p.lpQ..ulGRLIRs.ps...D...pG.s........l.l....lh.............Dp............Rlh...p........pp...Y.s...pth..psls..h............................................................. 0 576 1031 1435 +13150 PF13308 YARHG YARHG domain Bateman A agb Jackhmmer:Q7P768 Domain This presumed extracellular domain is about 70 amino acids in length. It is named YARHG after a conserved motif in the sequence. This domain is associated with peptidases and bacterial kinase proteins. Its molecular function is unknown. 21.30 21.30 26.20 26.10 19.10 17.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.68 0.72 -4.44 105 463 2010-09-03 15:16:03 2010-09-03 16:16:03 1 33 361 0 77 396 21 81.60 28 22.53 CHANGED sphhh.sp...sssphlspsplpshs..sp-LplhRNpIYAR+GhpFps.tplpsYFs.spsWYpssh..........................pshLsshEppsl...p.hlpphEp ...................................t....h.t....s.phhs...pplpths....ppcLclhRNEIYAR+GahFps.t-hp.pYFs...pps..WYpssh.........................sshlochEppNlc.hlcphp....................... 0 38 61 67 +13151 PF13309 HTH_22 HTH domain Bateman A agb Jackhmmer:Q7P8J8 Domain This domain is a helix-turn-helix domain that is likely to act as a DNA-binding domain. 29.60 29.60 29.70 33.10 29.30 29.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.77 0.72 -4.21 187 1886 2012-10-04 14:01:12 2010-09-03 16:51:08 1 5 1465 0 294 997 44 63.90 37 28.64 CHANGED -hlpphl.ppslp.ph.stss....shhsppc+hpllptLpcpGlFtlKsusstVAptLslS+hTlYpYLc .............................-llsphl.cpslc.pl..sssh....sl.spscp+pI....VhpLa-+GlFplKcAlspVAcpLsIS+tTVYhYl+... 0 75 143 221 +13152 PF13310 Virulence_RhuM Virulence protein RhuM family PIRSF, Bateman A agb Jackhmmer:Q7P417 Family There are currently no experimental data for members of this group or their homologues. However, these proteins are implicated in virulence/pathogenicity because RhuM is encoded in the SPI-3 pathogenicity island in Salmonella typhimurium [1-2]. 27.00 27.00 27.10 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.72 0.70 -5.38 118 1101 2010-09-03 16:18:37 2010-09-03 17:18:37 1 10 730 0 253 1002 83 187.10 38 65.40 CHANGED cusl+cathstp-Gp+.....tcplpaYNLDsIIuVGYRVpShRuTQFRpWATphL+EYllKGFlhDDERLKsss......hstDYF-ELLERIR-IRuSERRFYQKlpDIYA.sSlDYDssuptTppFFupVQNKLHaAlpGpTAAElIhpRADupKspMGLToW+su.....+.lpKsDVslAKNYLsccElcpLNRlVohaLDaAEhpAcR+hsMsMpDWtp+LDpFLphs-pclLpsuG+lSpctAcppAcpEa-cachpp......php.sDa- ............................ushpphhhstt-Gpp........hp...hpaYNLDsIIuVGYRVpS.+u..T.pFRpW.A.TphL+-YllKGashsc..c.RL+pss.......paacchLtclR..p...IR.SEt.hh.a..pl.h-lat.hu.D....Yp...p...s..Fat.hQ.Nh..hhauhtt.....Tus-ll..pss.....huh.thttt.........-h.hu.KNYLt..ph..ht.....hht.hp..h.tt...h.ht.h...hp.hh...t..hL.t...Gthp...h...s................................................................................................ 0 95 189 225 +13153 PF13311 DUF4080 Protein of unknown function (DUF4080) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 22.90 22.90 23.10 23.40 22.50 22.70 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.24 0.71 -4.27 54 385 2010-09-03 18:13:29 2010-09-03 19:13:29 1 5 375 0 101 364 15 171.30 22 30.98 CHANGED uccaGhlYpstsPYEVLpTcaLSYc-ll+LKplE-hlEhYYNSupFspolcalhppa..soPFchYppLucaa-ccGahphs+uptphYclLh-Fh.........p......-pthc..t..hh+-lLpaDahhppp..t+shPpFh.tppppt.cpp.hpth.hpctpthph.....ht......pphh+hsHlEhFp........hs.................ppp.hlLF ........................ccaGhhYpshPPYElLpochlSap-lhpLKplEchl-tYYNSspFppolchl.h.p.ph.....tssFcFapphu.p.a.apppshhphshuhpchaclLhcFh..........................................p........pph.t......p.phhppllphDahhh.pp..hc.h..Ptah.tt.......p.....p.t...tpt...hhth....hpp.......................t...................................................................................................... 0 48 88 94 +13154 PF13312 DUF4081 Domain of unknown function (DUF4081) Aldam G pcc Pfam-B_2088 (release 24.0) Family This domain is often found N-terminal to the GNAT acetyltransferase domain, Pfam:PF00583 and FR47, Pfam:PF08445. 21.70 21.70 23.30 21.80 19.20 21.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.31 0.72 -4.11 49 307 2010-09-06 12:54:03 2010-09-06 13:54:03 1 4 306 0 85 197 16 108.80 45 38.43 CHANGED Gu-lWGhhtt....GtLsulCauGANLlPltu........sssu...lcAFA-+ApcpuRRCSSlVGPA-sVhsLWcpLcs....sWGsA.REVRssQPLhshsssP...tlssDPt........VR.lR.cEl-hll .............uplaGhh.t....GtLpuLCasGAN...LlPltu......................................sssslcAFA-+Atct.RRsSSlVGsAcsVLsLWcRLp.....sWGsA....REVRssQPLhAhss.pP....plssDst..........VRplp.c-h-sY.................................... 0 28 65 80 +13155 PF13313 DUF4082 Domain of unknown function (DUF4082) Mistry J, Aldam G pcc Pfam-B_2054 (release 24.0) Family This family appears to be a parallel beta-helix repeated region that sits between successive Cadherin domains, Pfam:PF00028. 21.50 7.00 21.50 7.00 20.80 6.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.85 0.71 -4.76 61 169 2010-09-06 13:08:15 2010-09-06 14:08:15 1 38 53 0 108 198 22 144.50 38 27.55 CHANGED lasssssPssss.ssD..ssulELGs+FpusssGplTGlRFYKu...suNTGTHoGoLWousGslLAosTFTsE.ouS..GWQpssFuoPVslsAGTTYVsSYaussGpYusossaFsss.hss.GsLpA.....husu.....N..GVYsYuuushFPs..sSapuoNYWV ...................................lasssssPssss.ssD..ssslELGh+FpusssGploGlRFYKu...uss..TGoH.sGsLWou...s...GshLAo.sTFo.sE....ouS...G.WQpssFusPVslsAsoTY.Vs.SYpsssGtY.ussss.aFss........s.hss..usLpu.usu............NGVYtYuus....hFPs..soa.psoNYWV......................................... 0 31 55 84 +13156 PF13314 DUF4083 Domain of unknown function (DUF4083) Mistry J, Aldam G pcc Pfam-B_2061 (release 24.0) Family This is a family of very short, approximately 60 residue, proteins from Firmicutes, that are all putatively annotated as being MutT/Nudix. However, the characteristic Nudix motif of GX(5)EX(7)REUXEE is absent. 21.90 21.90 22.10 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.84 0.72 -4.14 10 78 2010-09-06 13:30:24 2010-09-06 14:30:24 1 2 65 0 3 58 1 57.20 60 92.23 CHANGED M......ltshIYhh..hhllllhussFshhIRhllpsSstKKQc..slEQKLD+IIpL...LEKp .........l.shIYTCLVIGLIVLFFlSFT..L..F.IRRVLQSSsA.KKppshsMNQKLDRIIEL...LEKD......... 0 0 1 1 +13157 PF13315 DUF4085 Protein of unknown function (DUF4085) Aldam G, Mistry J gba Pfam-B_2570 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 101 and 269 amino acids in length. 24.10 24.10 27.30 37.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.20 0.70 -4.96 7 89 2010-09-06 14:00:34 2010-09-06 15:00:34 1 2 79 0 6 74 1 167.80 52 85.01 CHANGED KYFs+-aYccMQlpsalpa.Eo.cEWEph.........-ahppLKEEhc.h.p.-LL+aLPcolaPhlpsso.lsothssscLKchhhEWsp-aEtchpplppuYh-paspItc+LPusVtQLa.phSLHDuhIpslc.+scsoLpIhLDsSGsFSpFsKLplTFhslT+sphsENF.-GuWWLYaEItLT-cG.FEhtVLFDsPhc..EloIhA ......................................KYFs+-WYKcMQl.tFlpF..EolcEWpEh...........hpSL+--hE-+.K.DLLKFLPcSlashIps.T...lsS-.YPStcLKKhhhcWopDYEKRhspLppuYh-.as..SIcc+LspNVhQLa.chSLHDu.lpslc+cS....cDol.IsLDCSGoFsEFDKLplTFhGV.oKCShsENF.EuAWWLhHEIsLs--G.FELGVL..FDsPFc..EVhIsA.................. 0 3 6 6 +13158 PF13316 DUF4087 Protein of unknown function (DUF4087) Aldam G, Mistry J gba Pfam-B_3066 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 140 and 280 amino acids in length. There is a conserved RCGW sequence motif. 25.00 25.00 34.60 34.40 19.20 17.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.35 0.72 -3.83 11 83 2010-09-06 14:02:00 2010-09-06 15:02:00 1 1 79 0 19 63 0 99.90 52 52.17 CHANGED suspsEpRCGWh-NPoPANhoLpDc-GpWpIusQG.Ga..pscG...hPs..hssspaVpT.susaGYuCAChslssDscptplspIpps+phPLupCRpDhuL..pc. ...t..spAEpRCGWhpNPTPu....NhhLsDR-GpWpIGTpG.Gh...pscG......Ps..hu.s..sQaVcT....N...GsaG..Y.uCAClTssT.D.pppRlsplhKApthPLStCRpDKpLKEP.................. 0 3 10 14 +13159 PF13317 DUF4088 Protein of unknown function (DUF4088) Aldam G, Mistry J gba Pfam-B_3345 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 258 and 300 amino acids in length. 24.10 23.70 214.20 214.10 24.00 22.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.52 0.70 -5.01 4 61 2010-09-06 14:03:01 2010-09-06 15:03:01 1 1 61 0 20 51 0 229.00 80 80.43 CHANGED RLR+-F-sFlpVSTGLDtpFlPPpFsDFLRARLLQpDGPLTERAV.RLLuuGEYGWA++VFDKQLPNALAuLMRDApRFGFGLAVQs-WoPpQRhcHAR-WAAQlLuEsGADAAaT-ALAuQluASApDlRsLEERM+TPAWRLAESLRQRAYDlMYALQTEssEshGRu+VGELRuhLsLALpYGShQh-EAsRVLEQlcRs+PcLFpEAPDDVFARLAAWLRRhFsps S.LRKDYDAFVRVSLKLDPQFsTPSFEDFLRAKLLDsMVPLTEHAVQRhLQGGQYAWAKRTLDKEFPDVVuILhRQAG-FGFGFASRSEWTPDELAKACRDWAAAlVupAQGDAuLVDPLAAQIKSAVpDIQTLEEpMQTPAWRLAESLRQRVYEAKLACEMSVGSsA.REKLGELRGLLRLGlAHGSFQKQEAQQIMEYLRLLKPEIFlEEPYDlFsRlAAWLRShFhsu.. 1 1 4 11 +13160 PF13318 DUF4089 Protein of unknown function (DUF4089) Aldam G, Mistry J gba Pfam-B_3700 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 21.60 21.60 22.40 22.30 20.80 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.21 0.72 -3.66 33 163 2010-09-06 14:03:51 2010-09-06 15:03:51 1 2 162 0 52 131 2 49.60 39 77.12 CHANGED YlpphutLLuLslcsphRssVlsphp+IsshAp.lhsFPL.s-chEsAsVF ...YlpphtplLuLsLD-spRsslhtpFp+IAshAp.lhsaPL.s-chEhAuVa....... 0 5 23 35 +13161 PF13319 DUF4090 Protein of unknown function (DUF4090) Aldam G, Mistry J gba Pfam-B_3702 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 22.70 22.70 24.60 31.70 20.80 16.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.88 0.72 -4.04 15 61 2010-09-06 14:04:41 2010-09-06 15:04:41 1 1 60 0 23 58 101 82.80 63 88.35 CHANGED GsDAVDpAIusGlDLDGoPIPssMLsLYpcVMsLEupRpRSGVpKSMRNRIVRoGAKHhsp-sLNQhLl-AGW-GLK-KEIuFF ....GsDAlDtAIssGlDLDGSPIPsshL-LYpcVMsLEupRQRSGVppoMRsRIVRhGAKHhsQ-pLNQhLlcAGassLK-KEIAFF. 0 2 13 21 +13162 PF13320 DUF4091 Domain of unknown function (DUF4091) Aldam G, Mistry J gba Pfam-B_3704 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 70 amino acids in length. There is a single completely conserved residue G that may be functionally important. 22.50 22.50 23.10 22.60 22.20 22.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.46 0.72 -4.07 63 346 2010-09-06 14:05:22 2010-09-06 15:05:22 1 22 286 0 79 284 31 71.00 34 11.20 CHANGED hshsGhL+Wuas.ha..sp.......sPap..ss.phth..a.ssGDsallYPGps.........ttshsSlRhchhpcGlpDhchlchLcp ...........................t.phsGhL+WuhN.hW...sc........................-Pht.........Dsphch.......a..suGDshllY.Pusc...........tt.hs.SlRhchLtpGhpDhchl+hlc.p.... 0 29 54 74 +13163 PF13321 DUF4084 Domain of unknown function (DUF4084) Mistry J, Aldam G pcc Pfam-B_2026 (release 24.0) Family This family of Firmicute proteins is frequently associated with the EAL, GGDEF and PAS families, Pfam:PF00563, Pfam:PF00990, and Pfam:PF00989. The exact function is not known. 22.00 22.00 23.40 23.20 20.80 20.50 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.97 0.70 -5.18 8 77 2010-09-06 15:42:46 2010-09-06 16:42:46 1 5 76 0 2 60 0 194.50 89 25.51 CHANGED MINQKKYVHFVhhYIIIFSLWIFLIPK-LNIKEIGILFLFCFAsLFSCYCLYKAIKKMKRGDKLFWVLlLCTCLCGLTMEITLFLHSLSIYDQVIFSYKALPFFIlQYILLFSGFAIKFIKHYSIRGLAQFSFDSIF.IlIMNIYFTLTFILDlSSFRMLTpDTWVLIGYFIAQSLVIYAVISLYRREQYSSSRISLIIGFTIILVYGYIHLFQLNAGhKTSSEVSYLIHTASILLIGLSSILYILDKPMQHETKTKYYRFDYVRFILPYFSIIITFSFIIhQPWDDKFMLIGLVLSLILLFLRQ ..............................................................................................................................................................................................MLTpDTWlL..IGYFIAQSLVIYAVISLYRREpYS..SSRIuLIIGFTIILVYG.YIHLFQLNtGhKsSuElSYLIHTASILLIGLSSILYILDKPhQ...HETKTKYYRFDYVRFILPYFSIIITFSFIIhQPWDD.KFMLIGLVLSLILLFLRQ............ 0 0 1 1 +13164 PF13322 DUF4092 Domain of unknown function (DUF4092) Mistry J, Aldam G pcc Pfam-B_2068 (release 24.0) Family This family is found in Proteobacteria. The function is not known. 22.20 22.20 23.40 69.80 21.00 20.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.00 0.71 -4.52 16 289 2010-09-06 16:06:56 2010-09-06 17:06:56 1 3 274 0 9 193 2 170.60 77 11.51 CHANGED lAtpIDppLptp.sthspth.......hsshsssuuplpsslp+Lhsh..t.....a+sVspFHVFHDsosFYGuoGsARupsslNIoNpAFPVlMsRNDpNYWlsFGpppAWD+....suhAYIT...........................-APS........hlpP-+Vop-TATFNLPFlSlGclGcGKVMVMGNu+YNSlLsCPssYSaNGul ..............................................................Q.AKEIDTAICAK.TsGCNEARWFS....LTTRNVNDGQIQGVINKLWGVDss.....YKSVoKFHVFHDSTNFYGSTGNARGQAVVNISNAAFPILMARNDKNYWLAFGEKRAWDK....NELAYIT...........................EAPS........lVcP..ENVTRDTATFNLPFISLGQVG-GKLMVIGNPHYNSILRCPNGYSWNGGV. 0 2 4 7 +13165 PF13323 HPIH N-terminal domain with HPIH motif Mistry J, Aldam G pcc Pfam-B_2165 (release 24.0) Family This family is found in fungi on proteins carrying the PAS, Pfam:PF00989, domain. There is a well-conserved characteristic HPIH motif, but the function is not known. 20.30 20.30 22.40 30.70 18.80 18.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.02 0.71 -4.52 40 137 2010-09-07 08:27:00 2010-09-07 09:27:00 1 7 118 0 92 146 0 149.80 28 13.89 CHANGED p+plTssLptlu+puCpHPIHTIllsALLASooYlull-sslhc...sspshstps..phsshltGu+sLhhGpsouW....cWphh-stsstsps..........spHhALhTLsFPco...ssssshPthcslshssNhStphLsposshhoshsp......-sulsaslPasps ..............+plotslttlu+huspHPIHTIVlsALLASsoYlull-tslh-....ssss..ssps.........chssh.ltGupslhhupsouW.....pWpshssppststs..........spHhALhTlsFscs...ssssssPthpsl.hssshothhLspo.sshhoshsp.......-sulsaplsasp.................................... 1 13 44 76 +13166 PF13324 GCIP Grap2 and cyclin-D-interacting Mistry J, Aldam G pcc Pfam-B_2169 (release 24.0) Family GCIP, or Grap2 and cyclin-D-interacting protein, is found in eukaryotes, and in the protein Swiss:O95273, residues 149-190 constitute a helix-loop-helix domain, residues 190-240 an acidic region, and 240-261 a leucine zipper domain. GCIP interacts with full-length Grap2 protein and with the COOH-terminal unique and SH3 domains (designated QC domain) of Grap2. It is potentially involved in the regulation of cell differentiation and proliferation through Grap2 and cyclin D-mediated signalling pathways [1]. In mice, it is involved in G1/S-phase progression of hepatocytes, which in older animals is associated with the development of liver tumours. In vitro it acts as an inhibitory HLH protein, for example, blocking transcription of the HNF-4 promoter. In its function as a cyclin D1-binding protein it is able to reduce CDK4-mediated phosphorylation of the retinoblastoma protein and to inhibit E2F-mediated transcriptional activity [2]. GCIP has also been shown to have interact physically with Rad (Ras associated with diabetes), Rad being important in regulating cellular senescence [3]. 21.40 21.40 21.40 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.82 0.70 -5.18 12 246 2010-09-07 13:57:06 2010-09-07 14:57:06 1 4 169 1 171 242 1 213.70 19 65.93 CHANGED Wpclschu-pVS+pAThluhlW.pG.hPcs-shpcsh-saasuLpGhlLssHGpslGAGsTLppsl+sulKplVDuohpLhptsVS..h.tS.ppshcsslsplsGsVWEACsshc+lPpoNhsAIGpuhoplushlKDlLcEMcchh...............hhspupsstsssp.s-ss..s...spD..hS.E-hcVsp.shullppohsslKclIpsIsshh.......t-spshVspLEcLLclspclusplD-LGsSlY.PP.-hspl+tslp+ltuslcchhp.l ............................................................................................................................................................................................................................................................................................tt.p...t.......t..........sutlWt...sCpth....p.hs....p.p.sh.h.u.h.h.h.h.hp.p....hshlcDsh.cEhcph................................................................tpptp.....s.......t-p..p......t....t.s.......pD.................hp-.-..st.tlhp..sh..sllphsh...s...hlcpl....h........t....h.h.t.t..h......................t..p.......pps....s....p.L-cllshs.pcIssp..lD-lstolY...PPhs......lc.p..ttl..................................................................................... 0 50 78 117 +13167 PF13325 MCRS_N N-terminal region of micro-spherule protein Mistry J, Aldam G pcc Pfam-B_2099 (release 24.0) Family This domain is found in plants and higher eukaryotes, and is the N-terminal region of micro-spherule proteins which repress the transactivation activities of Nrf1 (p45 nuclear factor-erythroid 2 (p45 NF-E2)-related factor 1) [2]. In conjunction with DIPA the full-length protein acts as a transcription repressor [1]. The exact function of the region is not known. 22.60 22.60 22.70 22.70 22.50 22.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.22 0.71 -4.80 17 183 2010-09-07 13:59:08 2010-09-07 14:59:08 1 4 121 0 119 177 0 156.60 41 30.57 CHANGED RWKPhDDLALItulpQTNDLchVHpG.lKFSC+FTLpElppRWauLLY-PslS+lAssAh+sLHPEsltulQs+ALaSptEEpLLuolpSsp.....pPsL-pFQ-LLccpssVFatuRTAKoLpsHWhLhKQYhLLPDQsVpPhtpsp.pslSFS....DAE-plsDs-Lp-scDE....uLEpELtlsDR+pKR-IRhLENELsRWsVLVD ....................+WpPtDDLhLlsuV.psssLptlahG...V+.FSp+FTlpElpcRWauLLYDPsl.....SphAstuMpp.............LcP-h..hs.u.lp..p+slaS.tEEplLtp.lts..sp.....................tsph.ppFpcLLp..ppspsFa.uRTspsL...sHW...hh+pah..LL...DQshtsh...tt...........shs....c.pt.h.p.................................................................................................................. 0 38 58 94 +13168 PF13326 PSII_Pbs27 Photosystem II Pbs27 Mistry J jm14 Jackhmmer:Q05RN1 Family This family of proteins contains Pbs27, a highly conserved component of photosystem II. Pbs27 is comprised of four helices arranged in a right handed up-down-up-down fold, with a less ordered region located at the N-terminus [1]. 25.00 25.00 26.40 34.10 21.40 20.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.67 0.71 -4.21 61 146 2010-09-07 15:30:42 2010-09-07 16:30:42 1 2 100 3 77 153 105 141.20 29 87.46 CHANGED hhpcll.......hulsL...sh......sl...h............lsus.............................t.ssss.slo...Gs.....YhcDThsVlpsLpssl..shs.....pDsss+pcstspA+chIs-alu+YRtpsplsu...................h..sSFsshp.oALNuLAGHYssaus+.PLPccl.+pRltpEhppAEpul ....................................................hhhhh.............hshhh....h.......h...h......h.ss....................................h.sssp.sho....Gc...........Yhp-ThsVlpsL+psl.shs.....p-ssshtcstspA+ptIs-alu+YRppsslsu.....................h..pSFsshp.oALNuLAGHYssaGs.p.PlPccl+pRlhpEhspAEtul................................. 0 22 53 70 +13169 PF13327 T3SS_LEE_assoc Type III secretion system subunit Mistry J, Aldam G pcc Pfam-B_2123 (release 24.0) Family This is a family of bacterial putative type III secretion apparatus proteins associated with the locus of enterocyte effacement (LEE). 25.00 25.00 47.40 47.00 22.80 22.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.80 0.71 -4.67 16 253 2010-09-08 13:46:21 2010-09-08 14:46:21 1 2 242 0 12 81 0 129.80 46 81.76 CHANGED hhpLHpLsWpPupaAHPhWLssl.Glpsps.apYGcustLDssLsphLtphRsa.ppsLPssLsspQpp.lthttRlsshslALGLlpLpCsDYLhLppYRpsLtshLu-ssIpQLhGh.hhpGpp......sspL..sPppLsshAhplGpulhpphtsssslh+AlulhLPP .............haphh..Ph.hhhs.WLstl.Ghp..s.hthtcp.thcp.LsthL.p.ht...phl.hs..sppth.ht.h.+l.hhshu.ulhtlpCsDYhhL.cYRQhLlphhu-s-IhQLhGa..hGtps.........tt.L..sPphhppsALplGsulL......sp.tpDsslhpAlhlLLPP.......... 0 2 6 9 +13170 PF13328 HD_4 HD domain Mistry J jm14 Jackhmmer:Q05RX6 Family HD domains are metal dependent phosphohydrolases. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.08 0.71 -4.50 118 7076 2012-10-01 20:28:14 2010-09-09 10:34:10 1 37 4412 2 1646 5095 2697 149.10 37 23.59 CHANGED AhthApphat................tppttsGpshlsHslslAt..ll...tph....t...hD...ps.......sh..hAulLacss..........c..............-..........h.............pp............lp..cp....F...GspVApLVpuss.chtplpph..................ttp.......ptph-...slR+.hl...luh.....pD...h....RVl..Ll+LA-RlpslRth.......................tlApEshslausL.AsRLGlh ....................................................Ah.hAtptHp................sQhRp..S..G..-......P.Yl.h.HPlt.VAt.....IL....uch......p........hD........ts.................ol.hAA.LLH..D.ll...............E.................D.......................T......sss.....-c..........................lc..pp......F......Gpp......V.A...p.LV-GVo.Klsclpht..............................................spp...............ptpuE.....Nh.RK.MllAM.s.cD.l......RVl....llKLADRlHN.....M..RTL.....pth.....-...........KptclAcETh..-IYAPL.ApRLGl.t........................................................................................................................... 0 513 1040 1361 +13171 PF13329 ATG2_CAD Autophagy-related protein 2 CAD motif Mistry J, Aldam G pcc Pfam-B_2170 (release 24.0) Family The Atg2 protein, an integral membrane protein, is required for a range of functions including the regulation of autophagy in conjunction with the Atg1-Atg13 complex. Atg2 binds Atg9. The precise function of this region, with its characteristic highly conserved CAD sequence motif, is not known. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.02 0.71 -4.65 37 161 2010-09-13 10:50:17 2010-09-13 11:50:17 1 7 152 0 115 168 1 145.80 25 7.67 CHANGED slslcDCslGLNPhphsuKsllllspuphsssh.tttpsphshpl+cuolhlIDDlps.t..........p..tp.ss.tssphps....hp.shGalslupIoosplslphtp.sppttpp............lDlcl+sDhlhLEsCADSTQTLIsllssLpPPs..Ps..ppKYR.T ...............................................................p.lthhDCslsLpPh.thsu+hllslsssphsssh..tp....sp...p.hphplccuslhlIDDsps......................tt.tstph.p.....hh.spGYVslsplushpls...lph.hpss..ppsppp............l-lclpsDllhLcTCADSspTLlsLlssLt.P...ss..t.Kap.................. 1 27 56 95 +13172 PF13330 Mucin2_WxxW Mucin-2 protein WxxW repeating region Mistry J, Aldam G pcc Pfam-B_2205 (release 24.0) Family This family is repeating region found on mucins 2 and 5. The function is not known, but the repeat can be present in up to 32 copies, as in Swiss:C3Y5K5, from Branchiostoma floridae. The region carries a highly conserved WxxW sequence motif and also has at least six well conserved cysteine residues. 22.40 1.00 22.40 1.80 18.40 -999999.99 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.88 0.72 -3.55 91 678 2010-09-13 12:29:30 2010-09-13 13:29:30 1 101 53 0 385 687 1 85.80 34 12.24 CHANGED tWTsWhstD.pP....s.ssG.DhEThss.lp..tt............hC....ptPs...s.IpCR..sh.........s.tpsGpt.lpCsh.ssGhh...ChNs.-Q..s.......ChDYEVRh.C.......C .........................tWTpWhshchP........u.ssG..DhEohps.l+....tstp..........lC.........ppPhslcCRsps................sthsh.pp.hG.ps....lp..C.sh..shGLh...ChNc-Q.sth.......ChsYclRhhC.................................................. 0 168 181 262 +13173 PF13331 DUF4093 Domain of unknown function (DUF4093) Coggill P pcc Jackhmmer:Q7P4Q3 Family This domain lies at the C-terminus of primase proteins carrying the TOPRIM, Pfam:PF01751, domain. The exact function of the domain is not known. 25.00 25.00 28.60 30.40 22.30 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.77 0.72 -3.78 141 1316 2010-09-13 13:16:25 2010-09-13 14:16:25 1 5 1303 0 153 724 4 86.40 42 46.33 CHANGED slGVEpAos-sIpcALpclts...ppp..p.ppp.......................lohpDLhphGLhusssu+............pRRctLGchLsI.GYsNuKQLl+RLshatIocc-hppAlp .....slGVEHAS.EsIccA.Ltplpp.....hc.p..t.pp........................IopsDLlchGLluGssu+............cRREhLGcpL+I.GYoNGKQLlKRLphFGlTps-lccAh...... 0 59 102 128 +13174 PF13332 Fil_haemagg_2 Haemagluttinin repeat Coggill P pcc Jackhmmer:Q7P6T3 Family \N 22.00 15.60 22.20 15.60 21.90 15.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.39 0.71 -4.48 155 4185 2012-10-02 14:50:22 2010-09-13 16:16:57 1 347 516 0 992 4738 66 124.60 16 23.22 CHANGED luhs..p..op....opsp...ppss.ss...su..Ssls.uG...ss...lslpAs.......p................uslsltGopl....pus...............s..ltLp.Asp.slsl.puup...ssppppsp.s.............p.SpususGluluhus.ss.......h.sh.su..ssutucu..p..ts..u..sus..shss.opl...s.uu.ss.ls.lpS....Gp..DoslpGAplsu..pplsssl.Gs.....s..LslpShQ..Do.......s..p..h....s..sp......s....tu..u ...............................................h...........tptp...tp.p.p..pt......u..oplp.us......ss.lslpAs......p..........................slsl.p.G..opl.....pu.s..............................................p.s.ls.....l.....t.A.....s.....p..sl.sl..tu.u..p....s..p...p..p..p....p....p...p..p.............................................p....p...t.............t......s.h.........t.............................................................................................................................................................................................................................................................tt.......................................................................... 0 79 422 731 +13175 PF13333 rve_2 Integrase core domain Coggill P pcc Jackhmmer:Q7P766 Family \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.76 0.72 -3.89 38 2406 2012-10-03 01:22:09 2010-09-13 16:55:03 1 11 1136 0 213 9884 987 54.10 42 39.33 CHANGED EuFFGhLKsEhhasp..papohcchcpslp-YIpa.Y....NpcR....L....K....G....LoPlpYRspsL ...................EsFFGhLK.sEh....a....Y..ut......pa...p..o..h..c..p..L.c.p.s.I..h-.Y.I.c.Y.Y.....NpcR.................l....K...........G....LSP.lp.Y.Rpp.................................. 0 41 116 163 +13176 PF13334 DUF4094 Domain of unknown function (DUF4094) Mistry J, Aldam G pcc Pfam-B_2504 (release 24.0) Family This domain is found in plant proteins that often carry a galactosyltransferase domain, Pfam:PF01762, at their C-terminus. 24.50 23.80 24.50 24.30 24.40 23.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.32 0.72 -3.60 46 197 2010-09-13 16:05:43 2010-09-13 17:05:43 1 6 33 0 130 187 0 86.40 30 23.32 CHANGED St+h.sllLChsSFhhGllhosRh...W...s....t..............tppppchplhpp-ssspp.......tpppclhtcVscT+psIp...sLDKoIosLEMELAuARup .......t+h.shlLshhSFhhGhh.ho.sRh...W....s.t.............................pptppchpll....pp.s.sspcp................pppchht.pVscTp.csIt...sLDKslusLEMELAuA+u.......................... 0 16 77 104 +13177 PF13335 Mg_chelatase_2 Magnesium chelatase, subunit ChlI Coggill P pcc Jackhmmer:Q7P325 Family This is a family of putative bacterial magnesium chelatase subunit ChlI proteins. The domain lacks the P-loop region present at the N-terminal of Mg_chelatase, Pfam:PF01078. 22.20 22.20 22.20 22.50 22.10 21.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.03 0.72 -3.59 197 3113 2012-10-05 12:31:09 2010-09-14 12:06:40 1 12 3054 0 770 2544 357 95.70 39 19.76 CHANGED puEoSssl+.pRVhtARptQhpRhs.................ph.......Nupls......uptlc.ch.stLsppstphLcpulp+hsLShRuhcRlL+VARTlADLpuppplsppHluEALsYR ...........................t.uEoSssl+.pRVhtA.RchQhpRht.......................ch..............................NApls......upplc.p.a..C..t.LsspstphLcp...A...hp..+hsLSsRuacRlLKVARTIADLcs.s...-.......p....IsppHlsEAluYR........ 0 270 528 664 +13178 PF13336 AcetylCoA_hyd_C Acetyl-CoA hydrolase/transferase C-terminal domain Coggill P pcc Jackhmmer:Q7P7G0 Domain This family contains several enzymes which take part in pathways involving acetyl-CoA. Acetyl-CoA hydrolase EC:3.1.2.1 (Swiss:P32316) catalyses the formation of acetate from acetyl-CoA, CoA transferase (CAT1) EC:2.8.3.- (Swiss:P38946) produces succinyl-CoA, and acetate-CoA transferase EC:2.8.3.8 (Swiss:Q59323) utilises acyl-CoA and acetate to form acetyl-CoA. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.67 0.71 -4.55 251 2410 2012-10-04 00:26:15 2010-09-14 13:36:31 1 17 1795 21 696 2411 257 149.60 42 31.00 CHANGED GopcLY-al.....-.....p.....NPplth.tsssasN.........s...PtlIu..ppsphluINoulclDLhGQVsu-sl.G.......s+..hhSGsGGQhDFl+GAthS..pG..G+oIlslsSTs..p........s...G....p....l.S+IVPhLs.GusVossRsclcalVTEaGlA.sL+G+ohpcRAcsLIs.IAHPcFR-pLhcpA .................................................pchYc.h.....-p.....ps.cl.hh.RPt-hoN.........s..Pcl.Ip..+.hsl..lulNsulEhDlaGplNSspl..G.......s+hhsG.lGGpsDF.sRsAhh.........SIhsssSsA..c..............s...G....p.......I.SsIVPh....ls........pVspocpclcllVTE.Gl.A..D.LRGhosppRActlIs..hAHPcaRstLpch.h............................................... 0 268 472 621 +13179 PF13337 Lon_2 Putative ATP-dependent Lon protease Mistry J jm14 Jackhmmer:B8D5Z7 Family This is a family of proteins that are annotated as ATP-dependent Lon proteases. 19.90 19.90 20.10 20.80 19.30 19.70 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.54 0.70 -6.13 53 313 2010-09-14 13:27:09 2010-09-14 14:27:09 1 3 292 0 83 269 74 438.10 48 68.76 CHANGED sphF.sGhV.....VRKDLs+plKtuss.VPsYVLEaLLGpYCAosD-ppIcpGl.ppV+chLscpYV+scEuchl+S+l+EcGp.a+lID+lsV+Lsp+cDtY.AphsNLGl..c..clhIssphV+c.a-+LLsG.GlWsllslpYp...................hs-.................spcs.......sP..FhlpsL+PIQhsshD.hcphhpuRppFTp-EWlDlLlRSlGhEPss..hsc............RtKhhhLsRhlPhVEpNYNllELGPRGTGKSHlapElSPa.uhLlSGGpsTVApLFhN.ust...plGLVGhWDsVAFDEVuGl.pFc-.+Dsl.sIMKsYMASGSFuRGc..-..plsAsAShVFlGNls..psV-thl+su..H.LFsPhPpthp.Do...AFhDRlHsYlPGWElPKh..+s.-haTspaGhlsDY.LuEhh+phRc..ps..assslpcaFcl.GssLspRDppAV+KTlSGLlKLLaPs....GphocE-.lcchLchAlEsRRRVKEQL.K+luuhEFhcspFSY .............................................ptF.sGpVVRK.D.L..s+..p..lK.tGA.N.VPsYVLEYLLGhYCuo-D-phIppGlpsVKcILu-sYVRP-EAptl+Sp.l.R.EcGp.a+lIDKloV+LspKcDhY......AphsNLGl..p....ss.lssphV+c...-.+LLsG..GIWsllslpY.....................hc.c..........................................spps.......sP.F.lpsL+P.IQhs.s.hD.h-c.lhpu.RppFop-EW..lDlLlRSlGhE.Psp..h..sp............RsKhhlLsRhlPh.....V.....E...sN....YNlsELGPRGTGKSHl.Y+EhSPp.ulLlSGGQsTVAsLF....YNhus+...plGLVGhWDsVAFDEVAGI..p....FK.D....KDuV...pIMKDYMASGSFuRG+....-.....plpupASMVFVGN.IN..po......V-.s...llKTS...............+LhsPhPpthh..D.o...AFhDRhHsYlPGWElPKh..+P..EaFTspYGhloDY.LAEhhRc.hR+....p.s....a..uDsl-caFcL...Gss......LspRDshAV++TVSGLlKL..LaPs........GpaoKE-.lc.phLphAlEhRRRVKEQL.K+lGGhEFa-lpFSY......................................... 0 38 63 75 +13180 PF13338 DUF4095 Domain of unknown function (DUF4095) Coggill P pcc Jackhmmer:C2D0I5 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.64 0.71 -3.98 231 2088 2012-10-04 14:01:12 2010-09-14 16:14:53 1 8 1014 0 614 1837 131 118.00 17 48.01 CHANGED Gl.......s.....htlp..chh.cpGplp+lp+GlYh...........h.......sst..................hcp.........hh........hh.stt......h..............s.pu.....l.lu.htoALhhau..l..s.s..p.hP.t.thpls...sspstp...t.php.......tlphhth.p.tphhp..h.Ghpphph....stsltlhshERTlsDhh ................................................tlpchh..ppG.t.l.+l...t.+..G...lYh.............h.ssh...................................hcp.........hh...........hh.ttt..h..............................s.pu......l.l..u..t.t.oA....h.th...aG....l.......s.s....p.hs.....t...th.......p.lh....ssp.stp..................s..tht..................slph.hhh..p.....tphh..........t.h.....t.....h............sl.lhs.t+ThhDh...................................................................... 0 204 424 536 +13181 PF13339 AATF-Che1 Apoptosis antagonizing transcription factor Mistry J, Aldam G pcc Pfam-B_2199 (release 24.0) Family The N-terminal and leucine-zipper region of the apoptosis antagonizing transcription factor-Che1. 24.70 24.70 25.70 25.80 24.60 24.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.99 0.71 -4.00 65 310 2010-09-14 15:30:23 2010-09-14 16:30:23 1 4 266 0 215 308 2 134.70 26 27.10 CHANGED DstKGh..AVcpQpphaDplL-sRI+LQKulssuNpLP.ptpshp.h..................tspptppslpssppsshpLhsslhsL+psLh................................tttp.spstpppKR............t.shschhpphpp....hcpphpsaRsslLpKWupK .................................................................-spKGt..AV+pQhtlaDplL-hRI+LQKulsssNpLP..pspshs.h.........................ttspphppslcsspcsht..pLhps.Lh..sLpppLh.............................................................tttpttpttpppKR....................p.phshp.ch.pthsp....hppphpsaRspsLpKWppK.............................................................................................. 0 68 115 176 +13182 PF13340 DUF4096 Putative transposase of IS4/5 family (DUF4096) Coggill P pcc Jackhmmer:C2D5Z9 Family \N 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.52 0.72 -4.12 381 3268 2010-09-14 15:35:10 2010-09-14 16:35:10 1 25 878 0 1120 3161 331 69.30 31 41.37 CHANGED LoDppWshlpPlL...Ps..........tp....t.tt..........t..phspRpllsuIlalh+oGssWRsLP.pc..au..sap..TlappFpcWpcsGlapc.lh..p ........................loDtpWth.lpshl....Pt..........tp....t.t...........t..thctRtlls...ull.a....l.h....+o.Gst..WR.................t.LP..pc......au.....sap.....TlappFpc...WpctGsapplh............................. 0 323 715 905 +13183 PF13341 RAG2_PHD RAG2 PHD domain Bateman A agb Bateman A Domain This domain is found at the C-terminus of the RAG2 protein. The structure of this domain has been shown bound to histone H3 trimethylated at lysine 4 (H3K4me3) [1]. 27.00 27.00 27.10 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.97 0.72 -4.00 6 1007 2012-10-03 17:27:21 2010-09-14 16:38:22 1 3 816 14 26 965 0 55.10 80 13.56 CHANGED GYWIKCChuCpVD.NTWEPaYSTELs+PAMIaCS+G.uGHWVHAQCM-LoEohLlpLSQuNsKYFC.-HstLs+t..TP .GYWIpCCsTCpVDlNTW.PFYSTELs+PAMIaCS+G..sGHWVHAQCM-L................................ 0 1 3 10 +13184 PF13342 Toprim_Crpt C-terminal repeat of topoisomerase Coggill P pcc Jackhmmer:C2D5X4 Domain \N 43.70 43.70 43.80 43.80 43.30 43.60 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.88 0.72 -4.34 157 1275 2012-10-03 10:42:43 2010-09-15 11:56:09 1 20 649 0 258 1109 139 62.40 33 11.70 CHANGED psCsFplh+p.lsu+plopsplccLLppG+Ts.llcGFpS.KsG+pFsAhL.hlpts...pp............lsF-.Fsp .......sCcFplh+p.lsu+plopsplccLL...ppu+Ts.ll+GF+S.K.sG+.sFcAhLhhpsc...tp............lsFpFt......... 0 74 171 216 +13185 PF13343 SBP_bac_6 Bacterial extracellular solute-binding protein Bateman A agb Jackhmmer:Q7P755 Family This family includes bacterial extracellular solute-binding proteins. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.57 0.70 -5.05 82 6234 2012-10-03 15:33:52 2010-09-15 13:11:13 1 13 2738 20 1426 11548 3747 237.10 19 69.13 CHANGED sphsDllhu......hspthhpphhppshhp..............ssthsphst......tshtDs.cGtah..hhshsshlhhhsppplssp.h....P..psWsDLh.sPpacs..plslssss........shhpslhlslhp......phG.hcsstchhcshttsh+ssp...hschhsphpssp..........lslhshahsphhtppp.........shpllhPc-.GuhlsPhhhhlpps......ptctspthlcahhusc.htphhupsth.........hPs...........hpsshtsph.pstthphlsW-alppp .....................................................................................................................................................................ADllhs....................hsss.h.t.th..t.p..p.u.l.h.t.s.hp.............................................st.t.h...s...p...l...s......................tthp..cs...p.....s..p..ah.............sh..s.......h.t....s.h....u....h..hh...N....p....c....t....l....p....p..........h.......P...........ps...a......p......D...L........h......c........s......c.........a.......+..s........c.....ls.hs.ssts..............tssshsh.l..tshhp.............................................................thG..tc...t...u..h...p...a...h.p....p....L...t......t....s..h......s.phs..........tsss.sh.p.t..h..t.pG-.................hs..l.s..h...s...a....h...h...s...h..h..t.t...ppps................sl.p.h...h....h......P.......p..........p....G.........s.....h.....h.....p.....s.......p......s.....s...ul..h+s...............upp......ct..A.cpa.l.-ahl...S.sc..sQ.p.h..h.spssh...........hPst....................t............................................th.................................................................................................................................................... 0 374 805 1139 +13186 PF13344 Hydrolase_6 Haloacid dehalogenase-like hydrolase Bateman A agb Jackhmmer:Q7P3Y5 Domain This family is part of the HAD superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.11 0.72 -4.12 179 4798 2012-10-03 04:19:28 2010-09-15 14:26:07 1 25 3106 27 1565 3385 1469 102.30 28 34.77 CHANGED hlhDhDGVLa....................p.......Gsp..s..lPGAs-slptLcpp.G........................hphhhlTNs..us+otpphtpcL..pp.l...G.ls.....h........s.t.c.....pllsSupsstphLpp..........t..............................spplhllGs..p.....s..htptlcphGhp ....................................................................................hlhD.lDG....sla.........................................................................c.........G.sp....s..lP.u.A.tchlc.t..Lppp.s........................hshlalTNs....ss+o.psls.p+L...pp.h...G.ls........s.................st..c......plhTSuhAss..c....alpc.............t...............................spp..l..allGp..t......u..ltptlpphGh............................................................................................................................................. 0 484 900 1295 +13187 PF13345 DUF4098 Domain of unknown function (DUF4098) Coggill P pcc Jackhmmer:C2D3F5 Domain This domain is a C-terminal repeat found in many bacterial species. 24.40 7.80 24.40 7.80 24.30 7.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.39 0.72 -4.03 289 4531 2012-10-03 02:33:51 2010-09-15 14:57:46 1 38 1424 0 1491 5507 345 72.60 17 44.79 CHANGED shp..s.s..pl..plps.s..sG.s.lpl..p.s..l...pup....l.pl.p.ss..s..G.cl.plp..ss.pu.....shphpsssG.c.lp...l..p...s......phs...slph..p...s.s..sG.s.lp..ls .....................................................................................t.......hphps.s.sG..s..lpl.....p...p...h......psp........h..pl..p..os...s...G..sl...plp.....ss.ps.............slp.l.p..o...ss.G..c...lp.........l....p................t..................tht.........php.h...p...s.....sG.pl...t................................................................................................................................................ 0 607 1005 1297 +13188 PF13346 ABC2_membrane_5 ABC-2 family transporter protein Coggill P pcc Jackhmmer:C2CY30 Family This family is related to the ABC-2 membrane transporter family Pfam:PF01061 [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.78 0.70 -5.03 126 1444 2012-10-03 10:13:34 2010-09-15 15:17:57 1 3 724 0 151 2276 340 201.80 18 90.65 CHANGED uLllKDhh...hh+p....h..hhlh..hh.lh.hlhhhh.hh.....ptth...hh...hshh.shhhs..hhhh..h......ss......hp..hDc.p...sch...sthlhoLPlsRcplVhuKYlhsllhh....hluhll.sh.lh.......s.h.l...h..h...htts.....hsh..t.hhhh..h.hhshh..hs....hlh....hulhlPlhacaG.c+up.h.......lhhhhhh...s.......lh..h.....hhh...th...hp...h..........p..tl..........hht...hhshss...h.....hl.hh...hhhhl.h.lh...hhhlShhlSlpIap ..........................................................................Lhhpshh....hh+t.......h...hhl..h..hh..l.h..shhhhhh...................s...h......hh...........hs.h..h....ss...h..hh.....h.hh..h....h...........ps..........hp...h-c.c.........sch...thhlhoLP.ls..Rcpllh..u+..Yl.....h...sllhh....hh..u..h...l.l.s.h..lh...........................s..h..........h...l........lpss......................h..h........h.h.h.......s.....h.h..h.hh....h.h....hhh.....hu.l..h.....hP..h.....h.a....t....h..u.h...cps.p.h...........lhhhhhh..h.......................lh...h.............hhh....sl....ht...h...........................p..th...............................hp...hhsh.s.....h..........ll.hh...hh.hhl..s.ll...hhhlShhlSlpla............................................................................................................................ 0 67 117 132 +13189 PF13347 MFS_2 MFS/sugar transport protein Bateman A agb Jackhmmer:Q7P3E5 Family This family is part of the major facilitator superfamily of membrane transport proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.27 0.70 -6.08 92 6648 2012-10-03 03:33:39 2010-09-15 15:21:27 1 29 2127 0 1465 15146 4138 393.70 22 86.65 CHANGED p+ls.YuhG...s.hupshhhs....hhsh.alhhFaTDlh..Glss.......uhluslhhlsRlhDAltDPhhGhlsD...pop..o+aG.+a+PWllhuulshulshhlhF.........................................................................................s.P.shshss...........phha......................................h...hlsYhlh.slhYohh..slPahuhssslopcsc..-RsplsshRthhusl.us.hlssslhhsll..........shhussst.................................................................................................................................................................Gahhhshlhulluhl..hhhhshhss+E....................................tpppphshtphhcslhpNc.hhhlhhhhlh..hhhshsltsuhhhYahpYslsss...s...hhshhhh........hhhlssllusslh.sh.lsc+h...u++p.hhhhuhhlhhlshhlhhhhs...........sshhhhhshhslhshshshhh.hlhhshlsDsl-Yuph+..sGpRtpulhhuhhshhtKhuhAlusslsuhhL.shsGasuss.........s..posssltslphhhshlPslhhlluh.lhhhhYpLsc ...............................................................................................................................t..hh.YuhG......s..hutshh.hs..h.hhh..al..hh...a...Y.......T...c..l.h............G..l..ss.....................................shsGh....l.h....l..ls.+l...hD.A..l.s....D....P.h.h....GhlsD...............psp.......o+..a.....G..........+...h..R.P..........a..............l..L..h..u....s.....l.....s....h.u..l...h..h.h...l..h.F.........................................................................................................................................................................................................................................................................................................h.s.....s.......t..h.s.h.ss..........................p.hla....................................................................................................................................................s..........hlsah.l.h.....sl..ha.o..h.h.....sl..P..a.t....u....h....h..s..s..l...T...p......s.....s.p......c..R..s..p..l...s...u....a...R....h..h....h......u.....s..l...us....h...l..s.s.h.h.h...h.s..lh......................shh..u..s..s...p..t........................................................................................................................................................................u.a..h...h...h...s..h....l..h...u..l..h..u..h...l....h...h..h..h....s...h...h..s..s..+E.......................................................................t...t....t.....p.....p.......h.......s..........h........t.......p........t.......h......p.......t........l........h..........p..........N..........c.........h.........h.....h.......l........h....h....h........h....l...h......h..h......h.........u.....h...s.....l....h...s....u....h...........h..........h.........Y......a......h....p......a....h.....l..s...ps...............t.........h....h....s....h.h..hh...............................................................h.h..h....h....s.....s....l....l.....u...s....h...h...h......sh....l...s.c...p.h...............u+...p.....p....h.......h.....h....h....u....h...h....l...h...s...h.....s.....h.....h..h..h..h.hs...............................tsh.h..h....h...h....h.....h...h....h....l....h......s.....h....s.....t..........u.....h....h.............s......l.........h...s.h...h.....u.D...........s....l.-.as.......c.....hp.............s.G.....h..........R.......h...p...G.hha...u..h.h..s.h...h.h...K....hu..u.lu.s.h..h......h.u...hh..L.....sh.....h..G....Y.ss...............................s....Q.ss..t..s..l.s..lh.h.h....h....hh....h....P.s.hhh..hlsh..hhh.hh.a.lp................................................................................................................................................................................................. 0 457 814 1153 +13190 PF13348 Y_phosphatase3C Tyrosine phosphatase family C-terminal region Coggill P pcc Jackhmmer:C2D3Y7 Domain \N 22.00 22.00 22.10 22.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.15 0.72 -3.86 223 1604 2010-09-15 14:47:59 2010-09-15 15:47:59 1 18 1059 3 505 1321 176 65.90 20 23.01 CHANGED ph..hpph.tt.tstspthhp....h.....hhss...p..tp.al.pssh.stlcppaGuhcsYLppslGlsspplppLRpphL .........................................tht.................ht....hts.....hhss....c...tc.alpssh.ptlcppY.G..uh-sYLpcslGloppplpp.L+phhL............ 0 153 323 426 +13191 PF13349 DUF4097 Domain of unknown function (DUF4097) Coggill P pcc Jackhmmer:C2D3F5 Family \N 23.00 22.60 23.00 22.60 22.90 22.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -11.41 0.71 -4.70 53 1584 2012-10-03 02:33:51 2010-09-15 17:17:30 1 18 1137 0 191 1342 30 157.20 17 50.57 CHANGED KKh.lhhuhslh...ll.....Gsll.hhh.u..htssu......h.a...spt..pltp..................ppph...cslsplsl.psss.sslpIpp.ussppl..pl..ph........ppp..........hpl........ptss.spLpl.ppp.tpp.hhhh.......shsht......t..tpsplplplPcsh....phs....slplps.s..Gsls...lp.s..l.phc.sl..p.ls.s.su...slphp ..........................................................................hhhh...lh.....uhhh..hhh...s...h...s..................................h.p....................................phsht...pshc..plpl..s............hss...t............s..lplpp..us.s.p...pl..cl..ph.................psp.................lph.................p.s.s..psLpl..ssc....tt...hh....................................hhphts.................tpppl......sltlPcph.........................lc...................plslps....s.......sGslp........lps...l..shc..sh..s.lp.s.ps..G.pl....................................................................... 0 72 121 154 +13192 PF13350 Y_phosphatase3 Tyrosine phosphatase family Coggill P pcc Jackhmmer:C2D3Y7 Family This family is closely related to the Pfam:PF00102 and Pfam:PF00782 families. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.13 0.71 -3.99 403 1860 2012-10-02 20:12:17 2010-09-15 17:39:40 1 30 1165 3 645 2255 428 158.20 28 56.25 CHANGED NhRDlGG............a...st............sGp...pl+hGhlaRSusLspl..o.s....s.DhphLt..slGlppllDLRsstEhppp....P.........c.hhh....s......usp.h.hpl..slh..sss.s.s...........................tt..h.t...thht.t...............t...t...stphhtch..Ypphlps....t..tss....appl..hchlts....s.s...s...slLaHCoAGKDRTGlsuALlLthLGV.sc.-s.IhpDYlloN ......................................................................................................................................................NhRDlGG....................................h...st......................sGp..pl+.shlaRSu.p.L...s.p.l....o.p....t.Dh.t.hL....t...p.....hsl.ptlh...DhRo..s.tEhptt......P.................................s.....h..t........ss.p.h.hth..s..lh..stttt..................................................t..h.t...t.htt...............tt.s.phhhphY.p...p.h.lpst.........pps....apph..hp..h.lhs.................ss....s.....sllhHCo.............AGKDRT..Ghs.u.A.L.l.L.t.h.L.....G......V.sc..cs.Ih...pDYlhop....................................................................................... 1 218 425 550 +13193 PF13351 DUF4099 Protein of unknown function (DUF4099) Ellrott K pcc JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. The C-terminal repeat region of this family is DUF4098, Pfam:PF13345. 22.00 22.00 22.50 25.60 21.90 21.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.61 0.72 -4.09 51 427 2010-09-15 16:54:58 2010-09-15 17:54:58 1 5 128 0 40 345 4 84.10 31 20.97 CHANGED ap.pclsWcpLc.phGlo+EpLEp.stp.L-plLpGt+os.llslphshsss.tl.ph-A+LulhpspsG.plsltlaslpp.pspL-p.hh .....hc.sclsWppLc.phGlo+-p.LEp.ssp.L-phLpGtKos.llslp.hshsst.sh.ph-ARLuhhpss-G.plslshHslRc.c.pL-t........................... 0 13 35 40 +13194 PF13352 DUF4100 Protein of unknown function (DUF4100) Mistry J jm14 Jackhmmer:Q05Y37 Family This is a family of uncharacterised proteins found in Physcomitrella. 21.40 21.40 21.70 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.37 0.70 -4.89 15 2840 2010-09-16 09:21:40 2010-09-16 10:21:40 1 6 1 0 2840 2906 0 132.50 52 29.62 CHANGED AYIAKSQHEALMEEKRRGNFDDTREGNSSKRQTRGDKAREAASQELPVKDTSsSLtEKTKETKDKGKSIAYKLLSDIEAATNLKGVLEE+ILNAKlEFtLKElLtIsKKEFHDVIIDSIK+KRQLMuEstMsHAIDARIY+DEEEV.D.sYKQsTNEKNuYNQRVRFED.SDKEhEshSHYTRKHWARsTTEVLVKVGDIEEPIVALVDHGSE ..............................................................................................................................................................................................................................................................................................................................................................................................................................................h.....t...hpssh.shKt..........hL-.Ep...l....Lss.KlEFhl+-hLt..IsK.+-.....Fa-lI....I..ssIK...p.Kp.................................l.h......c-.E.E.E....l...s.........h......t...p...........h.........p....p...................t...............................................................................................................hhhc............................................................................................ 1 2840 2840 2840 +13195 PF13353 Fer4_12 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:C2CYL4 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.03 0.71 -4.06 147 15922 2012-10-03 08:56:43 2010-09-16 11:21:53 1 90 4368 5 3584 16094 3629 134.80 18 43.59 CHANGED lNGsG...s...R...s.....oLa..VSG..Cp..+..pCc..G..C...aN...pssW............shs....................tGp.a...s.....pchpcpIlppLp...s....shl......pGLol.GGEPhh....Nt..p..sllpLl.++l+pch...s...sKsIWhaTGYph-.cLt......p....p.........ppclLshlD...V.LVDGcFh.psht.D.....s..Lt.aRGSuNQ+lh ................................................................................h.h....h.p...s...Cs..h...........cCt.a......C............h.s.........p..s..h..............................................p..h..p........................................................................t.s..p.....h.........o.........p-h...h.......c..p....l...l...p..t.h.t.........p..........tth..............ts.l.s...l...o....G....G...E.....P.h..l.......th.......p...hl.h..c....l.h....p....t....h....+....p.ps...............h............p.........h....h......h....p...s....s.G...h....h....h.t..p.h...................................h.th....h....p........h...s........h........l....D...h..................................................h........................................................................... 0 1305 2427 3058 +13196 PF13354 Beta-lactamase2 Beta-lactamase enzyme family Coggill P pcc Jackhmmer:C2D5A8 Family This family is closely related to Beta-lactamase, Pfam:PF00144, the serine beta-lactamase-like superfamily, which contains the distantly related Pfam:PF00905 and PF00768 D-alanyl-D-alanine carboxypeptidase. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.14 0.71 -4.87 138 5244 2012-10-02 21:13:33 2010-09-16 14:09:41 1 27 2386 268 694 10619 1975 200.40 28 66.42 CHANGED lss..sp...hh...sh...sscph.hssASshK....l....s....l....h..........ptlp....p.....Gcls....Lsc.....plphp.p..pp.......h..s..s..Gu.....Ghhp.hh..s..s.s..p..holp-LhphMlshSDNsAoNhLlchl....G.hps...lsphhpp.hGhps.stlpph......h.........................................................shp...t.....t...p..N.....to..osp-hspllp....tlhp....s..............p..h.ls................hp.......h........l.p.p.....pth.....p.p.t.ls...ts.l.s..............ps...h...p.lu...pKoGs.......l.ss.....hppD...sGll.hh..ssps.allul ...................................................................................ssp..h...sh...ps.c.c+.ash.sSThKlh....l.s........................ttl...........t...........s.p...p....Lsp.....p.l.p..hp....p...s-........................l...s...p......ho.......s..l.hc.....+t....h.....ss...s...h...olt-..L..s..p.u..s..lphS.....D.N.....s.A..s.....N.h.....L....l.p.....pl.............G.Gs......p....t...................l..s....s...h.......h.......+.......p.......h...G...s.......p..s...s..p...lc+h.......cs............................................................................................................phs....ths....s...st...c...c...............oooPtu.hup.s.Lc....pl.ht.....s....................................p..h.Ls.tpp.......................t.lhp...h......................ht....s............s.ps......s...t..h.....lp....us.lP...........................................tsh...h....l.u..c.Ko.Gs..................t.hs.......s.ps.p......luhlhs..stts..hhls...................................................................................................................................................................... 0 200 452 595 +13197 PF13355 DUF4101 Protein of unknown function (DUF4101) Mistry J jm14 Jackhmmer:Q05S35 Family This is a family of uncharacterised proteins, and is sometimes found in combination with Pfam:PF00226. 22.80 22.80 22.80 22.80 22.20 22.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.43 0.71 -3.69 63 153 2010-09-16 15:48:57 2010-09-16 16:48:57 1 4 101 0 70 172 95 115.10 26 17.52 CHANGED AppllppWLssKupshussas.hs.....tLppllssshlsp.hpppsp.........thpppstahpa..hphplpulph.sps...sppst...lcAplpEpsphhs.sGphpsp.ohsssh.pV+Y.Ls+p.ssp.W+Ips ............AppllppW.ssKupAhGssap.hs......tLpplLssshLpp....hppput....................thpppshaapat.hplplpslph.sss....sppAt.lcAplpEssplhs..sup.ppt.ohpssh...pl+YpLt+p....sst.W+Ip.................. 0 14 49 65 +13198 PF13356 DUF4102 Domain of unknown function (DUF4102) Bateman A agb Jackhmmer:B3CUZ6 Domain This presumed domain is found at the N-terminus of a wide variety of phage integrase proteins. 27.00 27.00 27.10 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.12 0.72 -4.10 255 5922 2010-09-17 08:49:24 2010-09-17 09:49:24 1 16 1625 5 949 4352 701 86.60 34 23.48 CHANGED LT.cptlc.........sh..psps........pp.......htht..D.......t.........GLh.lpVp.s.s...G...sK..sah.hcY................ths..G+..pp......p...h.sl..Gpa....Ps..................l......oLspARppspchpthl.s.p.GhD..P.ttp....+cppct .....................Loctplcsh....KPpc........K......hpls.....D.....G.........tGLh..Lh.Vp....s...s............G.......s+...hWp..h+Y.................+hs....GK........pc........p...l..ul..G.sY.......Pp.....................l......oL...u.-ARppt.p.c.h+phl.u.p..Gh.D...Ptpp+pttp.h............................................................................................. 0 174 512 736 +13200 PF13358 DDE_3 DDE superfamily endonuclease Bateman A agb Jackhmmer:B3CQR6 Domain This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.93 0.71 -4.45 69 7080 2012-10-03 01:22:09 2010-09-18 09:34:17 1 125 1242 0 3290 6631 624 121.10 21 54.12 CHANGED pllahDE.sshphph.thttuas.pGpp...h.htstt+tp......phshluA.lshps...hhsh.hhh.......................................ps.shsuptaht.alcphltshhp.................thhllhDNss..hH+u........ptlpph..........lps...............................sshplhaL.....PsYSP-LNPIE.thWuhlKppltp..tthpshcslp .........................................................................................................................................................................................................................................................t...............h....hhus...hp.....tt......h......h.h......................................................................................................................................................................................tt...thst..t...h.....t...hl.p..p.h...h..........h..p..................................thhllh...D.Nss..............hHps....................................pt.lp.p.h................hpp..................................................................................................................................tp...h.p.....l.h.h.h.......P.s.a.S.P.-.L.N.P.IE..phWth.l.+.p.phhp.............h.................................................................................................................................................. 0 1274 2326 3074 +13201 PF13359 DDE_Tnp_4 DDE_4; DDE superfamily endonuclease Bateman A agb Jackhmmer:B3CSL4 Domain This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.86 0.71 -4.63 61 3886 2012-10-03 01:22:09 2010-09-18 10:01:42 1 51 549 0 1579 4471 221 120.50 28 53.91 CHANGED lDsocl.l..ppPp...s........pstsasthKppp........olphhl..sssssGhlhh.lstuasGpssDpplhppS.shhph.......................ss.chlluDpuFs..hpp.........h.hshpt...........tttpl.sspc..tphNpplu.phRhhVERshshlK.paphL...pptlthp.............hpphsclltssssLpN ...............................................................................................................................................................hph.h....h........s.s...p..hp.hh....h.h.....h.....s...........h......pDh..p..l..h.p.......u......h................................................................s..hh.hluD.u..GYt.....sh...........tl..................sPh+t..........................................................phps......L......sspc......cthNct..l.u....p......t.............RthlEp.hhup....lK...pa+hh.....ppthc.p.....................hpphsh.h.h..h.hsslhN............................................................................. 0 715 1056 1438 +13202 PF13360 PQQ_2 PQQ-like domain Bateman A agb Jackhmmer:B3CRY3 Domain This domain contains several repeats of the PQQ repeat. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.25 0.70 -4.91 112 7855 2012-10-05 17:30:43 2010-09-18 13:57:24 1 358 2271 34 2581 8458 4130 182.50 15 45.19 CHANGED spupl.tuls...t....sGpplWpt.....sh.t...........................................s.............huuGl..sh.sssplhls...s.......stuplhulcsssGphlWpppl..suthhus.....................P.hlssspl.....hlhs...........sDs..plhu.....ls.spsGch.......................hWp.hpts.......s.sshs..ltussssshs....sshl.....lsshs.....uGcl...........hulshpsGph..hWptsl.utsp.............tssphp....plsDlsusPlls....ss...plhssuhpG....p...h.....ssh.-htsGp....hh.W.sps.huu................hps.shsssshlahss.p...................p....uplhslctpsGphhWpp ..........................................................................................................................................................................................................t..................................hWt............th....................................................................................ts.h.......s.h........s..s...h...l...a...hs...........s....................tp.s..t.l.h..A..l...c..s.....t...s....G......c......h....h.....W..p.h...ph......ss.t.h..tss...........................................................P...s..h....s...s...s...tl......................hlss......................................tsu..........pl.hA...............l.....s....tts..G.ph..........................................................................................................hWp......................................................t........h.........s....hh......................tthh......................h.h.s.................................ss....l..........................................sh...t....t.....p...G..p........hh...t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 857 1659 2192 +13203 PF13361 UvrD_C UvrD-like helicase C-terminal domain Bateman A agb Bateman A Domain This domain is found at the C-terminus of a wide variety of helicase enzymes. This domain has a AAA-like structural fold. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null --hand HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.30 0.70 -4.99 35 17009 2012-10-05 12:31:09 2010-09-18 16:32:58 1 133 4760 29 3900 14321 6091 300.30 21 37.70 CHANGED lpLEpNYRSstsILpsANplI.............................ppsppchpK...tLhsspssu.pplphhpspscpc........EAchl...spcItp...............htpp....shp.hpDhAlLhRs.NtpucslEcsLhpts.IP......Y.clsGshpFa-RtEI+DlLuhL+.llsNspDc.............hulhRllssPt.+G....IGssolppltp..hspppplshhphh.............................................................................................................................................................................................hh.ttlstcstptlpsFhphl........................pphp.p..ht...hshpphlcpllcctsh.hphlppt.....cpucp+lpNl..pcL..hshhcca..............pp.t.................p.tsLhsFlpchsLpsptpcpp.......tpt....st.VpLMTlHuuKGLEFPhVFlsGh....pcu............................................hhP.............................ptpuh..tptcplp...EERRLhYVulTRAccpLalotspp ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................LppNaRSp..tllphs.s......lh..........................................................................................t..t......t....................t............h...h..................t....t...t.....t.................t..........h............h.....h..............h........s....t...t......................................c...s....p..h...l.......st...p..l.p................................................................................h.tt.........tht..hp-....h..AlL...h.....R....s.....p.....t.........p........s...t..................h...p....p....h..h....h....p.....t.....l..P...............h...hh...s..t.......p..h..h..p..p.......p.l.p.t.h.......h.s.h....L.p.....h.l..........s..............p...p................................s.h........t.h..l.t..........sh.....h.......t.......h..s.........t.p....h..t..l.......................................................................................................................................................................................................................................................................................................................................................................................t..........................h......t..h....h...t...h.h.....................................................................................t...th...t...............................h...h....t...t....h....h..........p.......t..............t....h........p...h.h.tt.......................tst.t.ph..t..s....l......tth.........hp.h.h.p.ph.......................................................t....................................................................................t........t....l..........t...h...l...p.....p......h....t......h........t..tt....ptt................................tt......................st..V....p.......l...h.......Tl........Hsu...KG..LEa.s..h..V.....a.l....s..s.....h.......pc...s..........................................................................h..hP.....................................................................................................................................................................................................................t.t.sh............p.tpp..l..p..........E..E.....p......R....LhYVulTRAccp.Lhlsh...t........................................................................................................................................................................................................................................................................................................................................................................................................... 0 1318 2605 3363 +13204 PF13362 Toprim_3 Toprim domain Bateman A agb Jackhmmer:B3CQ99 Domain The toprim domain is found in a wide variety of enzymes involved in nucleic acid manipulation [1]. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.30 0.72 -3.75 51 1268 2012-10-01 21:47:57 2010-09-18 17:02:44 1 39 840 0 288 2429 926 98.00 25 17.03 CHANGED hlhlsEGlEouLSl....htshsshsshAshus..sslp......slths..t.thccl..hlstD.sDss...........GppAstcl........sc+hps.tuhpstlltP........pu........DaND..slpptGtc ..........................................lhluEGhtTALol................thhss.h.ss....h....A...s.h.su......s.p.Lp.....................................sl.ths.......t..ss..p..pl.........lIs.u.D.sDhs.t...............................Gpp..su.pph.....................s.pt...h...t...t.........s.....h..t..sh..h..l.P.................shs..................DaND...hhpt.G............................................................. 0 65 161 226 +13205 PF13363 BetaGal_dom3 Beta-galactosidase, domain 3 Bateman A agb Jackhmmer:Q700S9 Domain This is the second domain of the five-domain beta-galactosidase enzyme that altogether catalyses the hydrolysis of beta(1-3) and beta(1-4) galactosyl bonds in oligosaccharides as well as the inverse reaction of enzymatic condensation and trans-glycosylation. This domain has an Ig-like fold [1]. 27.00 27.00 28.80 29.80 25.70 26.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.52 0.72 -4.59 56 193 2010-09-18 17:35:32 2010-09-18 18:35:32 1 10 99 6 137 210 0 80.10 32 8.04 CHANGED WsPsl.ssss.s.......sp.sllVpGsYLVRoAslp....G..ssLpLsGDhsso.TslEVaus.p.slpslpWNGcclpsspos.hGslpu ...................Wss.h...spss.s.............sp.sllVpGsYLVRoAslp......GssLpL....pGDhsso.....TslEVh..........us..p..slpslpaNGcplptp.psp..Gshh............ 0 39 74 112 +13206 PF13364 BetaGal_dom4_5 Beta-galactosidase jelly roll domain Bateman A agb Bateman A Domain This domain is found in beta galactosidase enzymes. It has a jelly roll fold [1]. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.92 0.72 -3.80 105 562 2012-10-03 19:46:52 2010-09-18 22:23:13 1 26 198 12 334 675 18 112.50 22 20.36 CHANGED Ys-p..uW..ssss..sos.........s...sss.........hsu...hpsGsthYRspF......p....h-..hu....hp.............stsusuat.........splalNGh.hGp.as......shusp........ssaslPp....slLsh.....p........stlhsslhspsuts ................................................................as-p..sW...........sshs..sst...............ttss.........hss.....p...sG.shaYRupF................s.u.ths......hshp.............hs.spuus.s.ht.........splalNGh.hGp.ah.......sshGsp.......................ssaslPp..ulLsh...p........sslhsslhs..t.............................. 0 96 180 272 +13207 PF13365 Trypsin_2 Trypsin-like peptidase domain Bateman A agb Jackhmmer:C0AF95 Domain This family includes trypsin like peptidase domains. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null --hand HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.06 0.71 -4.12 318 12156 2012-10-02 13:45:52 2010-09-19 13:18:49 1 253 4752 80 3773 11497 4680 143.10 28 31.28 CHANGED GoG..h....ll...ssss............................hllT.s.................tHVlp...................................................................ts.pplthhhhsspp.ht...................................................uplhths.....t.................................hDl..Allplpsstttthshtssstthtssthhhhhh.......................................................................................httstshhhttthtstttttstphhhh.........................sssht.s.....GsSGuPl.hs...ppGp...llGl ..................................................................................................................GSG..h...ll.....sp.s.G...............................allT.N.................sHVls...................................................................................................................suspl.p....V..p.h.scucp..hs.....................................................................................................Acl...l...G.tDs....p..................................................sDl.All+lpssphhshhhhsss....................................................................................................................................sthshsssssshh...ssss.hhs.s.h.s.t.s...s..s..t.t.t......tp...h.....................................l....QTDA.u.IN..s...............GNSGGsL...lN....hpGc...llGI............................................................................................................................ 0 1232 2423 3164 +13208 PF13366 PDDEXK_3 PD-(D/E)XK nuclease superfamily Bateman A agb Jackhmmer:C0AD98 Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily 22.00 22.00 22.00 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.26 0.71 -4.04 106 398 2012-10-11 20:44:47 2010-09-19 14:41:19 1 8 204 0 173 467 252 110.60 36 80.21 CHANGED p-loppIluuAhcV+ppL.GsGhLEsVYcpuLthELpppGlshcpQhtlslhY+uhhlsp.apsDllV........psplIlELKu.VcplsshHpAQllsYL+hos..........hclGLLlNFss...h+.t.......h+Rl .............................t.plshtIlssuhcVappL.GsGhLEslYcpsLhhEL.pppGlthpp...Qh.ls.lhY+s...hh..ls..p.a+hDllV........psplIlElK.u......Vc...p....l....t......s....hHpuQllsYL+hos..........hclGLLl..NFss....p.thpR....................... 0 96 155 171 +13209 PF13367 PrsW-protease Protease prsW family Coggill P pcc Jackhmmer:B0SDU2 Family This is a family of putative peptidases, possibly belonging to the MEROPS M79 family. Swiss:B7GHM8, PrsW, appears to be a member of a widespread family of membrane proteins that includes at least one previously known protease. PrsW appears to be responsible for Site-1 cleavage of the RsiW anti-sigma factor, the cognate anti-sigma factor, and it senses antimicrobial peptides that damage the cell membrane and other agents that cause cell envelope stress, The three acidic residues, E75, E76 and E95 in Swiss:B7GHM8, appear to be crucial since their mutation to alanine renders the protein inactive. Based on predictions of the bioinformatics programme TMHMM it is likely that these residues are located on the extracytoplasmic face of PrsW placing them in a position to act as a sensor for cell envelope stress [1]. 21.50 21.50 21.60 22.00 21.10 21.10 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.73 0.71 -5.11 184 1339 2012-10-01 21:07:14 2010-09-20 14:09:30 1 22 1111 0 303 941 360 189.90 23 55.85 CHANGED pPhthllh.sFlhGshsuhh......shhlp.......thht..............................hh..hhts.hlhusllEEhsKhlslhhhh.......h.htppph..........sp...hD.Gl.lhusssuhGFAhhENlhYh....h.............................................shhts......hlhR.slhu.shuHsla...ouhhG..huluhst...htt...........t.....hhhhhuhl...huhhlHulaNhhhhh..........................hhhhshhhhl .................................................................................hhhllh.shhhGshhuhh.......shhlp......................................phlh..h.........................tth.s.tshtsslsusllEEssKhlsllhhl..................................h...tthch..........................................spl...hs..sl.lhG...sss....GhGFAhhEslsYhhp.h...........................tt.s.........................sslsh............................hlhR..shhu.lsuHhla.oul..sG...hulslsh................pptt.............t.hp....hthhhhhl...hAhslHhlass.hsh.ts..h...............hl...hhhh.h....hhhhh.................................................. 0 113 225 277 +13210 PF13368 Toprim_C_rpt Topoisomerase C-terminal repeat Coggill P pcc Jackhmmer:B0SHF2 Domain This domain is repeated up to five times to form the C-terminal region of bacterial topoisomerase immediately downstream of the zinc-finger motif. 22.30 22.30 23.10 22.30 21.90 22.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -9.05 0.72 -3.49 42 3284 2010-09-21 08:44:24 2010-09-21 09:44:24 1 25 1144 0 1046 2994 3373 67.30 30 21.53 CHANGED cPtsG.c.sh.V.+-GRFGPYV.....TcG...csN.....s.oL.+s..sss-slT...hccA............hp.LLA.-+.cA....+usu...+...K....p..s.Ap.K ............cPtsG.c.l..l..psGRaG.PY.l.....................p.c..G...........pss....................................u.oL.+...s.......ps......s..tslT........L-cA..................................lc.Llu...t.....+t...........tu.t........................ts............................................................................. 0 338 755 931 +13211 PF13369 Transglut_core2 Transglutaminase-like superfamily Coggill P pcc Jackhmmer:B0SCJ3 Family \N 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.91 0.71 -4.72 183 1446 2012-10-10 12:56:15 2010-09-21 10:38:47 1 21 1341 0 427 960 170 155.40 32 47.73 CHANGED pthhtpLcplstcscpp.....lstp........................................................p........plp.tLtchhapchG.....FpGspp....sYhssc..........Nualsp.......VLcpRp..GlPloLullaltlAc+.lslslhuVsh.PscFll+ht.........................hhl-Pas.G.c....hlstppl.pthLpthhs............hp....hpsp.....hLp...sssspplltRhLs ......................................................................................................p..hhscL-pLsp.scpp.............lsp.......................................................................................................................................................hs.cp.............plc..tLhplFY..t-..h.G......................................Fpu..sps.......sYh.s-.................sha.Lcp....VL..c..p..Rp..G.slSL.......uslhLh......lAp+..LsL...PLhsVh.F.....Ps..p...h.lLRhp.....................s.-sp.........................hhIsPFs.G.c........pLspctL...chhLcsphus............sc...lh.p............Lp...tAsshpllt+hL.s............................................................. 0 135 239 341 +13212 PF13370 Fer4_13 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:B0S9J0 Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.09 0.72 -3.58 110 1093 2012-10-03 08:56:43 2010-09-21 10:44:49 1 12 947 18 263 2183 451 59.60 39 58.75 CHANGED V......D..ssCIsCssCtthAPclFphssctttshl.h......c..........p....sp..s..........pp-.....p.....ppshpAhtuCPssuIts .............VD..-pCIA.CG...tCsshA...P..-...l....FD..a.-.D.-...s...ht..hll.h..........-.................c...sp..s....................-c............h........-shh-A.hc.uCPTcuIc............................. 0 91 192 235 +13213 PF13371 TPR_9 Tetratricopeptide repeat Coggill P pcc Jackhmmer:B0SCJ3 Repeat \N 21.00 8.30 21.00 8.30 20.90 8.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.09 0.72 -4.12 103 4976 2012-10-11 20:01:03 2010-09-21 13:12:18 1 1320 2289 0 1951 43963 9522 67.30 16 14.49 CHANGED LKthh.hppcchppALpsh-hlLhl.....tPc...ssh-hRDRGllhtpLsshpsAhpDLptal....ppsP.....pssssphlctplp ...................................................h.........ttp.h...p.A...h...t....h.h..p.....t..h...l..t..h.........................sP.p.................sst....h.....h.......h.......p........p..........u........h.....h....h.....h.......p.........h........s........p.......h.....p....t...A...h..p..s......l...p.t..hl............p.p.s......................pt..................................................................... 1 652 1138 1559 +13214 PF13372 Alginate_exp DUF4104; Alginate export Coggill P, Eberhardt R pcc Jackhmmer:B0SIJ8 Domain This domain forms an 18-stranded beta-barrel pore which is likely to act as an alginate export channel [1]. 25.00 25.00 25.00 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.22 0.70 -5.85 23 682 2010-09-21 12:51:04 2010-09-21 13:51:04 1 7 487 4 258 716 125 365.20 17 76.18 CHANGED shlshGhslRtRhE.hssssthGstsstcDshllpchplaA-hchss....phphhlphpDsRsaspp.................sssssD..pNtlDlcpAals.......as.ssLs.stshc.lRlGRQchshstpRa....Iusp-h.NsppuFDulhhcac..s....spaRhpuhhspPsphhssph.s........DspscshpFsusphphps.hsssslpsYasthpcpsup..........................hhcssssc+hpshulRhsGpt.......sthDa-hEuhhQoGps....Gspshcua............AhuucsG..YTh.shshpPRlulphDhASGD........pcssDGslpoFssLFssut....aauhAshsuhuNLhpspsplolpP....ssplsh.suhthhWRtsssDulhspstss..............l.ssuGpuu.casGpthclR.............hc.ahhssphthtlcuuaFpsG-sl+susspssshsssp ...........................................................................................................................................................................................................t...phshphR.cht........p.t..........h............................t..t..s..t...t......h......h.........ph...thh.h..hphtt....................thphhhph..ss.h.h...st..t..............................tss-.......ps.thslpp..uahp............hp...tht..........s.....hp....hphGRQph......ths.s...p..+h..................l..u..s...hs........h...ss...t.p.s....a.D.uhththp...h.....................sphp.h........s.h...h...h.h.....p...s.............sth....................................cst..s.t..p.pth.h...s.....h...h....h......p......h.........p...............h......t...........h..t.......h.....p...s..ahh.thp.pptst..............................................ht...t..t...t..s.p.p.hpshGh.+htuph.................tths..a..phphhhQ..t..Gph...............sspshpuh................................uhts..p..h..G......a..ph...t...s.h...pP......ph..s.......h..th..sh...h.....SG.s..................p.t.ss.p........h.t.sF......p.sh....a.sssh...........................hhu...h..th...h.....t...........s.......N......l...sh..t...th.phps.......ptph.shthth.hh.hhh...pttsshhtts...............................................st..p...hGt.hph...........................hp..a.ht..ph..hththshh..up...hht..t............h................................................................................................................................................. 0 94 183 221 +13215 PF13373 DUF2407_C DUF2407 C-terminal domain Wood V, Coggill PC, Bateman A agb Pfam-B_17915 (release 21.0) Family This is a family of proteins found in fungi. The function is not known. There is a characteristic GFDRL sequence motif. 25.00 25.00 25.50 26.50 22.50 24.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.98 0.71 -4.26 28 151 2010-09-22 10:47:12 2010-09-22 11:47:12 1 7 140 0 116 144 0 129.40 32 39.30 CHANGED hGFDRLLssGFoppElssLRpQFhulaus......ppss-shss.p..............slRpLE-pWh-ssus..................................tssts.s.ssphss.........hsssstps.-DLLhGhllGhFhGlhuhlaLh+--..Glas+Rp+hulhsGlhlNhsauhlRs ............................................pGFDRLL.ss.GFotpElstLRpQFhuhh.u......hppss-shssss...................clRpLE-pWh-ssus......................................................stss.s.tsthst................hssssttshcDhLhGhhhGFFaslhshhaLh+pc..ul..astRpphulhsGlhlNhhhuhh+..................................... 0 38 68 100 +13216 PF13374 TPR_10 Tetratricopeptide repeat Bateman A agb Jackhmmer:Q7P422 Repeat \N 22.00 10.00 22.00 10.00 21.90 9.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.76 0.72 -3.89 291 5651 2012-10-11 20:01:03 2010-09-22 18:02:07 1 1557 998 7 3285 40335 8366 38.20 20 7.91 CHANGED shsshssL.At..hhtpt.uchpcA.pplhcp.s.lphpcpl....hG.s.....cH....ss ....................................shpsL.ut......hh....tp....t..G.....c....hp.....cA..t...sl....hpp.s.lt.htpth............................................................... 0 1339 2317 2944 +13217 PF13375 RnfC_N RnfC Barrel sandwich hybrid domain Bateman A agb Jackhmmer:Q7P7C9 Domain This domain is part of the barrel sandwich hybrid superfamily. It is found at the N-terminus of the RnfC Electron transport complex protein. It appears to be most related to the N-terminal NQRA domain (Pfam:PF05896). 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.17 0.72 -4.29 96 1912 2012-10-02 20:27:15 2010-09-23 09:15:51 1 35 1463 0 364 1936 307 94.90 37 16.91 CHANGED hsF+.GGlHP.s-s.KphS.pspPIpph...hspclllPLpQHIGAPucPlVphGDcVLpGQhIucu.s.G.FhSuslHASsSGsVpuI-.+sssp.uo..t..s..p..sllIEsDGc ........................................................th..GGlH..P..c...Kt.o.pttslp.ph.s....h.s..p.c..hs.l...PL..p....QH.....I....G......A........s.up..sVpsGD+Vh+GQh..lscu.....p......G..hh.....ss...PlHAssSGsVsu.Isspsss.p.ss.t..h......slhIpsDu............................. 0 145 261 317 +13218 PF13376 OmdA Bacteriocin-protection, YdeI or OmpD-Associated Coggill P pcc Jackhmmer:B0SHC2 Family This is a family of archaeal and bacterial proteins predicted to be periplasmic. YdeI is important for resistance to polymyxin B in broth and for bacterial survival in mice upon oral, but not intraperitoneal inoculation, suggesting a role for YdeI in the gastrointestinal tract of mice [1]. Production of the ydeI gene is regulated by the Rcs (regulator of capsule synthesis) phospho-relay system pathway independently of RcsA, and additionally transcription of the protein is regulated by the stationary-phase sigma factor, RpoS (sigma-S) [1]. YdeI confers protection against cationic AMPs (Antimicrobial peptides) or bacteriocins in conjunction with the general porin Omp, thus justifying its name of OmdA, for OmpD-Associated protein [2]. 22.40 22.40 22.40 23.10 22.30 22.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.59 0.72 -4.32 168 1185 2012-10-03 20:18:03 2010-09-23 13:04:02 1 11 931 0 364 981 110 61.90 27 33.61 CHANGED spshplP.sDltsALpssspAtthFpslstst+pshlthlt..sAK+scTRt+Rlpchlphlscsc ...............t...h.lP.p-lpstlp..ppPth.tstFpsLosutp+palhalt..sAK...pscTRp+Rlpchlphl.pG............ 0 160 276 335 +13219 PF13377 Peripla_BP_3 Periplasmic binding protein-like domain Bateman A agb Jackhmmer:C0AC70 Domain Thi domain is found in a variety of transcriptional regulatory proteins. It is related to bacterial periplasmic binding proteins, although this domain is unlikely to be found in the periplasm. This domain likely acts to bind a small molecule ligand that the DNA-binding domain responds to. 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.04 0.71 -4.24 246 26367 2012-10-02 13:57:41 2010-09-23 13:14:04 1 51 3428 168 5469 22392 1655 160.60 23 47.18 CHANGED scaL.hppG.a++lu....hls..............sttt..hsptRhpu........atpth.pptsht.sphhhhhttttt.tt..t..............................................................s.....s....Alhs..ssDthAht.lh.pshpp.t............Glp..l.P.cDl.ullu.hs........t......hh..phssP.sL.....o.olph.shpphu.ppAschLhph...l.......tstpts.pp.....hh.hss.p.llhR.pSs .............................................................................................................................paL.lp.t.G.+.c..c.Iu....hls.............................sstph.tsspp.R.h.pG......................app...uh....p..c.t..s.ls...h...st...t...h..h....h..t...s...t......s...t.t.s.s.httht....................................hl......pth...............................................ts...........s......Alhs...ssDt.h..Ah..G..s.l.ps.h.p.c..t...................G.lp.........l....P....c.........D.........l...u..ll..G..aD.............sh.........hu....ph...h....p..P...sL........................T.Tl...p..p..s..h.p.p...h.G..ppAschLlpt.....l........................pt..t.p....h..s....pp.................hh....lss..p.LlhRpSs.................................................................... 0 1692 3369 4443 +13220 PF13378 MR_MLE_C Enolase C-terminal domain-like Coggill P pcc Jackhmmer:C2D1R2 Domain This domain appears at the C-terminus of many of the proteins that carry the MR_MLE, Pfam:PF01188 and MR_MLE_N Pfam:PF02746 domains. EC:4.2.1.40. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.44 0.72 -4.09 1015 7138 2012-10-02 01:07:48 2010-09-23 13:18:26 1 22 2468 500 1681 7613 3271 111.80 23 28.55 CHANGED sEp.h...hshtshpphlp.tsu.s.....t..h.lphcls+sGGlspsh+lush.Acshslt...lhsHs......ssluh.suslp.l..sssh.s.............s......hs..........h.h-................hh...h.............tph....ht..............s.h........s.Gh...l.tl..P.....t..P.....GLGlc ..................................sEhhtshhpht.phlp..ttu.l....Dh..lphchs+sGGlstsh+l..Ash.A.ct.a.s.ls............hssHu......ssluh...utshH.lss..ss..s............s...........hs.......h.pE............t.h.t......................ch........lp..................s.h..t..hc.s...Gh...l.ps...s..p....t..P...GLGlp........................................................................... 0 405 960 1336 +13221 PF13379 NMT1_2 NMT1-like family Bateman A agb Jackhmmer:C0AC77 Family This family is closely related to the Pfam:PF09084 family. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.43 0.70 -5.00 82 4162 2012-10-03 15:33:52 2010-09-23 14:15:53 1 25 1726 8 1437 7965 1155 236.20 18 65.58 CHANGED ssPEpspl+lGFIsLTDsAPL.llAtcpGaap+a....................Gls.l..plt+puSWussRDpll....sGplDuAHh.LhshshhhphGh...su.thshhsLhsLshNGpuIslusphhsthh..............hssssl+thhtp.pt........tshphAhoFPsuTHshhlcYWLAusGlcP...............pDlphlllPP.PQMVushcsGsh-uaCV....GEPWstpAl.tcslG..assh....oot-l.......WtsH..PEKs......husptcal-ppPssspAlltAll-AspahD..sstN+pcsApl.l....u ................................................................................................................t......tlp..lG..h...........h.h...t....h..h..sh....h....h.....u.t...p.....p.....s.h.hpct......................................Glp..l......phh...p..h.s...u...s.s.s...l..h..p.....ulh...............uG.plD..h..uth..hss..h..h...h...s.h.s.t.Gt......................s.h..h..s..h...s......s.....h...s...t......s.......G.....p.....s...l...s...l..p.p.t......................sshp.slt-l...............cuhplu.h..s.h.....s..u..s.p.p.h..h..l..p..h..h.L....t.p..s..Glss............................pDl..p.l.h..h.h.sP...s.ph.......s.s..u.h...p...s..G..p...l..D..u...h.s..s...........h.-..P..a...s....s...ts......t......p..s..h..u......phlh............ss..t.p.l......................h.s.sh.........s..t..ps...........hhs.pp....p....a.h..c..p...p..P...p..s.s..ps...h...lp...u.hhc...Atpa.....hp...t...t.....t....ttt..................................................................................................................................................................... 0 383 868 1164 +13222 PF13380 CoA_binding_2 CoA binding domain Coggill P pcc Jackhmmer:B0SEN3 Domain This domain has a Rossmann fold and is found in a number of proteins including succinyl CoA synthetases, malate and ATP-citrate ligases. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.45 0.71 -3.92 341 4531 2012-10-10 17:06:42 2010-09-23 16:03:17 1 30 2872 8 1431 4521 2487 121.00 29 26.68 CHANGED +s.........lAVlGhS..s.pss+...suht.lhphLt.t.......pG.hplh.....PVsPp........pplhG.t.......t.....sassLs...t.s........tslD.....................hVslapssptsssllcch.hsh........s....scslWhQ.G...........shspcstphAcp......s.Glp.ll..spChtltts .............................................................................+olAVlGAS..s..c..ss+.......s..u.ht....lh+.Lhp...........pG.....p...lh......P..V...sPph...................................ppl..h..G...............p................sYsols.........p..........PtslD......................lsslhpsu.c.ts.sllc-h..hpt.....G........s+.s.hh.l...u...................hhp.c.c.h.t.t.hAcc............t.G.hp...llh..s.pCltl...s............................................. 0 459 926 1215 +13224 PF13382 Adenine_deam_C Adenine deaminase C-terminal domain Bateman A agb Jackhmmer:C0A9T6 Family This family represents a C-terminal region of the adenine deaminase enzyme. 27.00 27.00 27.50 27.40 26.40 26.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.68 0.71 -5.10 110 1480 2010-09-23 16:28:37 2010-09-23 17:28:37 1 12 1271 6 346 1189 171 167.40 36 29.18 CHANGED spl.lTcchhhpl......pl.p...sGhh..ss..ppD..l.hKlAVlERapt.s.GslulGhlcGaGl+.pGAlAoolAHDSHNllVlGssDcDMhhAlsplhchtGGhsls........psGc.llupLsLPlAGLMSs.pshcpVscplpplpp.shc.p.lG.s.s.h.p...sPFhoLSF.LuLsVIPcLKlTDpGLhDVppaph .........................s..h.Tp.h..ph....h..t....ss.h......s...p.p.D..l.shlAVlE.Raup.p....tsh.uhGllpGF....G..l.p..p.G.A.lAoTluHDSHNIlVlGp..ss--MthAsNplhphGGGhslV........p.s.Gp...Vhup..lsLPIAGLMSs.pshpp.lsc.plctLcp.Ahc.c..h..G...st....hs....cPFhphuF.LuLsVIPtLKlTspGLhDspphp................... 0 136 260 294 +13225 PF13383 Methyltransf_22 Methyltransferase domain Coggill P pcc manual Family This family appears to be a methyltransferase domain. 21.90 21.90 22.10 21.90 21.80 21.40 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.90 0.70 -4.84 9 252 2012-10-10 17:06:42 2010-09-23 17:52:57 1 11 100 0 209 288 84 207.40 20 68.36 CHANGED u.AsKcu.uFFDchhsshhpthpc+hh.Fpphspst.................ppcsttWY.-NlEPshTC...sp.pRlGs..tG.........-GsKWlCDPpRLhp..........pts-CLlYSlGSsscasFE-ulhc.lups.CEIHVFD...tshsps.ssp.pthaap.hGluuuh-.shss................hhsht-h..tLsH.htRTIDlhKIDCEtCEWusYtsWhssDs...cphLlElHu..............................Psppss...........spcFapthhcpsahhFpKEsNl.a...pptClEauaI+ ...................................................................th.....................................................................................h........hhu......s.........................DGsp..h..lC.t.sp...t.h.........................................................pC.hlaSh..G.....h.s.sphsF..-tph.hp.hs...CclasaD......s.......t...p...t...t.......p....t.t......p.h..ha.....tph.........ulu....tt...s...s..p.....t...st.............................hsl.tslh.p..t.h...s....H.....t..p.h.l-lLKh.DlE......u....s.E...a....p....s..L.........p..s..h.l....p..p.th...................hpQlhhElH.h.............................................st..............................hh...p...t...htt.s..hhhat........................t................................................................ 0 84 110 190 +13226 PF13384 HTH_23 Homeodomain-like domain Bateman A agb Jackhmmer:C0A3L3 Domain \N 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.52 0.72 -4.18 345 2487 2012-10-04 14:01:12 2010-09-24 17:38:01 1 100 1542 3 717 11862 1498 45.90 20 22.10 CHANGED stpttpRhp..hlthht...p........uhshpplAphl.s.loppolt.ch..hppa..........p.pG....hpulh ......................................t......h.t...hhhhht......p...........Gho.hppIActl..G.lo.csT.Vt.+h...hpp.........................t................................. 0 199 482 617 +13227 PF13385 Laminin_G_3 Concanavalin A-like lectin/glucanases superfamily Bateman A agb Jackhmmer:C0A826 Domain This domain belongs to the Concanavalin A-like lectin/glucanases superfamily. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.36 0.71 -3.99 726 6160 2012-10-02 19:29:29 2010-09-27 13:17:45 1 933 1411 34 2404 8060 5104 168.50 14 16.23 CHANGED p.u......h.h..h....sus........ss...h...lp..........h...ss......ts.................h.....s....ss.......sh........................T..lssWl..p.hs........s.....h.s......st..tth.....hh......................................s....ss..s.................sht.l...t.....................h.......s....ss.s............ph.t..h...........th...tss....s................................t.t.........php.....s.ss...........sh.s..................s....s.pW.pHlsh.s..hs.............u..s...................s.......hp....lY.l.......s...Gp........hhs...s..........t...s..t..sss........................................................................hs..............s..ss....h...................h....lG.........................ss.t................s.....s............spha.s.......G.....t.....l.D-lplas......p.uLoss-lp ..................................................................................................................................................................t..................t......h.........h....s........................h......s....sp.......sa..........................................T..lss.hl....p.hs......................s.....h.s.......st...tth....hh...........................................................t.sts.....................tht.l.t...................................................................h.........s...sss...............ph.th............thtss....t.................................................................................................thp.......s..ss..........sh.p...........................s.....sp.W.t.+..lsl.s..hs..............s..s......................................p........hp..lY..l.........s...Gp..........hhss........t...s..h..sss.........................................................................................................................ht........ts..ts..th...................h....lG.....................................................sst........................s.s.........tp.h..p.........G.....p.........l..spl.tl.as......p.sLsttpl.......................................................................................... 0 1029 1475 2077 +13228 PF13386 DsbD_2 Cytochrome C biogenesis protein transmembrane region Coggill P, Eberhardt R pcc Jackhmmer:B0SI46 Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.80 0.71 -11.67 0.71 -4.57 161 1138 2012-10-02 18:22:22 2010-09-27 14:35:20 1 9 1040 0 354 3522 1325 201.20 26 81.07 CHANGED sshllGLh.Guh.HC...lGM..CGGlshuh.....sh.....st...........p.......p........p..t.....h...t.........h.lhYslGRlhoYsllGslhG.hlGs....sl..thh....st....h.....t..h....lph.hhu.lhhslhhlhhuhthh.h....h...h....hh..t........l....tp....ls.....ph.l...hphh....psh..tpph.........psh....hs.shhlGhlhG.hL...P.C..G...l..VYsslhhAh.so..G.ushpGulhMhsFGLGT...lPshlsh..uhhssh.l.........pt......hhp.......p.th.h.+..hs..ul..llll.h.Glh .................................................................................h.hhllG.ll.uu....h..HC........hu.M....CGG.l.s....h.uh............shs............................................t..............p............p.h........h...h............h.lhYslGRl.....hoYsl.l.Ghl.h.G.hlGp....hl....s......st..............h....lph.hht...hlhslh.hl.h..h.u..hhhh..............t....hh..h...............................l........tp........h..s.........th....l...h..p...hl......p.l.hpp..........................psh..h..sshhlG..h..l..h..G..hl......P..C....G........l....VY.s.h....l.h....h.......A.h.s.s........u........s........s...hp.G.uL.hM..h..sFGLG...T.....lPshlhh.....uh...h.u...ph..l..............sp.........hhp...........p..hh...h...+....lu..uh..lll.h.hGl.................................................... 0 126 259 317 +13229 PF13387 DUF4105 Domain of unknown function (DUF4105) Coggill P pcc Jackhmmer:B0SCN2 Family This is a family of uncharacterised bacterial proteins. There is a highly conserved histidine residue and a well-conserved NCT motif. 22.20 22.20 22.20 26.90 22.00 22.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.22 0.71 -5.09 149 716 2010-09-27 13:47:27 2010-09-27 14:47:27 1 1 544 0 199 698 188 171.80 22 36.56 CHANGED ss......tttpahp...........thtssplphs.hhsu.hhspshshaGHohl+h..............hpsthh.......thshsaus.sh....sshshhh+uhhGpa.phhhshtshpchltpYsphct...RslaphpL.s.Ls.pptptlhtplhctt.shp.t.....htYpahpsNCoTplhchl.ctsh...sttlth......ph....hh.sh.............shhchltph ................................t.......phs.cplphl.hhhu.httpsh..shaGHThl+h................thtD.pp......thshsaushst.......sp..hhhcGhhGpa..hthss.hshpchltpY.sphct...Rslaphp.L.s.Lo.pctptLhtphhcht.phs.......hhY.ahssNCsoplhchl.ctsh...ss.l.hs....ph....hhss.............thhphl.p........................ 0 69 127 171 +13230 PF13388 DUF4106 Protein of unknown function (DUF4106) Bateman A agb Pfam-B_5 (Rel 25.0) Family This family of proteins are found in large numbers in the Trichomonas vaginalis proteome. The function of this protein is unknown. 27.00 27.00 27.60 32.80 25.20 26.80 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.54 0.70 -5.51 13 401 2010-09-27 14:06:47 2010-09-27 15:06:47 1 5 1 0 401 401 0 270.60 73 66.03 CHANGED MQKAMKSAEYIKANNDWLDAQANAKAAQLIGSIRTKIQADEDSSNEALTNADFKNAFEALHSKVK.VNDFSSGKKLKSEGFDKEL+EVAQNMTKITDAATRQAVQSAYDAVRATVVESQEKELQQTKTDLVNAFL+TKSQVGHYAADGTYVPAGGTYIPAGGTYILASGTYIPPNPPREAPAPGLPKTFTSSHGHRHRHAPK....PTQQPTVQNPA...........QPTVQNPA.Q...............QQPsQQPssQ..............PAQQPssQpPA..........QQP..QTEQGHKRSREQGNQEFLKMLKE-YGYPDTlDFSDRYKEAIRKFKEGNTDPNLFSFMAQHQIGYNLKPGKYKLAKGYDLIAYHPNDMsEFTPRYLMSELNDNSTlFMKRVKNRDGTKEERhMssDDLsRELVKNGLGIYEMPA.......DEVQETPQEE.VQIQPDMEEIVQQQQLEEP ..........................................................................................................MTKITDAATRQAVQSAYDAVRATVVcSQEKELQQTKTDLVNAFL+TKSQVGHYAADGTYVPAG.......GTY........I...................................PPNPPREAPAPGLPKTFTS....SHGHRHRHAPK........PsQ.......Q..PT...Q....ss............pPs.Qpss.t...........................s....Q.ss.p............sA......Q...P..osQ..ssA..........Q.QP....QTEQGHKRSREpGNQ-FLKMLKEsYGYPDTlDFSDRYKEAIRKFKEGNTDPNLFSFMsQHQhGYNhKPGKYKLAKGYDLIAYHPNDMsEFTPRYLhSElND..NSTlFMKRVKNRDGTKEERhMssDDLsRELhKNGLGIYEMPs.......DEVQET.Q.....EE......VQIQPDMEEIVQQQQLE.............................. 0 401 401 401 +13231 PF13389 DUF4107 Protein of unknown function (DUF4107) Bateman A agb Pfam-B_5 (Rel 25.0) Family This family of putative proteins are found in Trichomonas vaginalis in large numbers.\ The function of this protein is unknown. 27.00 27.00 37.00 35.00 18.30 18.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.88 0.71 -4.54 4 582 2010-09-27 14:17:08 2010-09-27 15:17:08 1 2 1 0 582 582 0 140.50 87 90.96 CHANGED MGTKALAFGAKALGAYDAVNKMSGGRVSKTLDANKGKIGGWVAKKLRLNKIGLINKASNLVATESENALGKDDEFAKHAKDFNDQMKGETMHLNRVDGTKENVSSPPAVhPYGPYGMYGNPYERPFDPLTGGSNWYHYGRRRKTVKLETDVKKATKKK ..................................MGTKALAFGAKALGAYDAVNKMSGGRVSKTLDANKGKIGGWVAKKLRLNKIGLINKASNLVATESENALGKDDEFAKHAKDFNDQMKGETMHLNRVDGTK....ENV..SS...P......P.....V..............VL.P..............Y......GP.....YG...M.....Y.G.NP.....YERPFDPLTGGSNWYHYGRRRKTVKLETDVKK.ATKKK......................... 0 582 582 582 +13232 PF13390 DUF4108 Protein of unknown function (DUF4108) Bateman A agb Pfam-B_8 (Rel 25.0) Family This family of putative proteins are found in Trichomonas vaginalis in large numbers.\ The function of this protein is unknown. 27.00 27.00 132.80 132.50 20.90 19.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.98 0.71 -4.44 38 686 2010-09-27 14:18:04 2010-09-27 15:18:04 1 1 1 0 686 686 0 131.20 85 97.24 CHANGED TKFISVNKYMotlKEELDPFsYLNVYFYHFEKS.oFcKVWNIEPVKFAIVTK..NGApFEDL..DIEGLLsVKENFDR+FSpLcEGKAYKLVIPYEPKKADDYEYYESKIVEVQGKLGKKILESK.....PVFAPKEEENIDIDPE ............................sKaISVNKYMotlKEELDPF....sYLNVYFYHFEKS.oFsKVWNI.EPVKFAIVTK..NGAKFEDL..DIEGLLsVKEsFDRRFSNLKEGKAYKLVIPYEPKKADDYEYYESKIVEVQGKLGKKILESK.....PVFAPKEEENIDIDPE...................... 0 686 686 686 +13233 PF13391 HNH_2 HNH endonuclease Bateman A agb Pfam-B_14 (Rel 25.0) Domain \N 25.00 25.00 25.00 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.40 0.72 -4.19 198 1540 2012-10-05 18:28:12 2010-09-27 15:37:21 1 23 921 0 616 1405 179 64.70 29 19.80 CHANGED Csl..oGhph................................lcAuHIh.Phu.........................h...tsp.....................................................sspNGlhLsssh..HphFDpthlsls .....................................CsloGhp.......................................hlcAAHIhPhp.........................t.s....sss...............................................................sspNGLhLssph..HphFDpuhlulp.................... 0 118 339 479 +13234 PF13392 HNH_3 HNH endonuclease Bateman A agb Jackhmmer:Q7P8C3 Domain This is a zinc-binding loop of Fold group 7 [1] as found in endo-deoxy-ribonucleases and HNH nucleases. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.11 0.72 -4.56 226 1385 2012-10-05 18:28:12 2010-09-27 15:51:52 1 43 897 1 251 1201 508 45.20 27 24.40 CHANGED hhhHRllaphh....hG....thP..s.s.h.........hlcHhs.s.s+....pssph..sNLchsoppcNtt ....................................hhHRllhtth.........hs........shs..p.s.h..........plsHls...s.s+.......tsN...ch.....sNLchsopppN................... 0 83 160 209 +13235 PF13393 tRNA-synt_His Histidyl-tRNA synthetase Coggill P pcc Jackhmmer:B0SA16 Family This is a family of class II aminoacyl-tRNA synthetase-like and ATP phosphoribosyltransferase regulatory subunits. 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.92 0.70 -5.37 129 7540 2012-10-02 14:22:40 2010-09-27 16:44:42 1 40 5007 33 2112 6233 5624 291.80 27 66.39 CHANGED GlcDlLPtcApph-pl+cpLl-happaGY-hVhPPhlEah..-oLlsGs.....ut.s..h.c......p.saKl.sD.p.oGRhlGlRuDhTsQlARlsAp.p.l....s..pp......tstRLCYsGsVl..+spsst..h......t..ss..REslQlGAELaGps.ulpA.DhEllpLhlcsL....t....sG.l...pplplDLuHsslhcsl.h.p.sss..lststpptlhstLppKshsplp.plst...t.....hs............tsh.t.phLttL....pLtGshcsLp..cAtph.lss.h...s.t..htt.slspLcpltshhpsh.....t.splplDLu-lRGacYaoGlsFssYs.....sG..hu..p..slApGGRYDplstt.F.....G..+..........sc.......PAsGFSl-lctL .......................................................................................................GhpDhlPtp.hhh..ph....lppthpphh.p.paGappl.cs.PlhE.....h.clh..tp.th.................G-..s...s.c.............c..h..Y..p...........h...........................D......c.....s...............s...........c...............p..............l.......................s.LRs.-.h.Ts.s.h.s..R..hh.hp..p...h..........hs...................tsh+haY.hGshF.Rh.-.+.s..p...t..................G.....Rh..R..pFhQhG...h.E..........hh......G........s.....s..s...t....h......D.u.E..ll..t...hshchhp......t.....LG..l..................pp.h..p.l.clN..shs.h....h.c..........................h....p........t........t....h....p.....p........s....l....h......p...h...l.....p.............p.......h.....p.......h.....t.....t......l.......c.....c.h...h.p....c...........t.....lp..................................................................pp.....h....ch.Lps.h..........ph.p....s...s........t........p.........t.......l.t...........p.s.....h...t...h....l.....s...t...........................p..p.........u.....h....p.c.........h.ptltphLc......sh..............t.hth.p.l-.....s.l....l..R.G....L..D...Y....Y.o....t......slFE..h.......h..s.........................s....s.......hs....s..................slsuGG.....RYDsLlpth....tG...p......................sh.............PulGFuhGl-Rl............................................................................................... 0 694 1329 1754 +13236 PF13394 Fer4_14 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:B0SHY7 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -11.08 0.71 -4.08 151 9987 2012-10-03 08:56:43 2010-09-27 16:54:42 1 73 4097 8 2936 16021 4178 110.20 19 33.66 CHANGED lFl..uGCsh...........pCp................sC......sttthcF................ssGc.hstphhspll.............hpssth..ttlsloG.GEPhh.hspt.hh.Llcpl+ppt.......hcIa.l.Tshohp...........th..t..hlu......hh-.llscGc .................................................................h.......tsCs..l.........................pCp..........................................................aC.h...............s.t..t...t..h.sh........................................................................t.t.t..p..h...h...s...h..c..p...l..h..p..h.lt.............................h.tt..t.s.h.......p...p..l.s.lo..G....G.E....Ph..h..........p........t.......l...h....t...l.l.....c...t....l...p...p.ts..................hp.l..p....l..p..T.s.G.h.h...........................................hhttthh............................................................... 0 1066 2000 2532 +13237 PF13395 HNH_4 HNH endonuclease Bateman A agb Jackhmmer:Q7P7J1 Domain This HNH nuclease domain is found in CRISPR-related proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.68 0.72 -4.33 93 798 2012-10-05 18:28:12 2010-09-28 10:39:15 1 12 747 0 166 2662 2689 53.20 33 6.29 CHANGED shYoGctIslscLhs..tta-IDHIlPhS.hhhD........DS.hsN+VLstpphNpp...Kusc.sPh ........................shYoG.c...t..l...s..hp.pl.t..........h-IDHllPhS...h..h..hD...................Do..htNhVLsppptNpp....KusphP....................... 0 59 115 147 +13238 PF13396 PLDc_N Phospholipase_D-nuclease N-terminal Coggill P pcc Jackhmmer:B0SFY4 Domain This family is often found at the very N-terminus of proteins from the phospholipase_D-nuclease family, PLDc, Pfam:PF00614. However, a large number of members are full-length within this family. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.33 0.72 -4.29 441 4348 2010-09-28 09:45:17 2010-09-28 10:45:17 1 24 2893 0 835 2871 257 46.90 27 12.42 CHANGED hhllslhs.llpll.tp......p....tss......ss+hsWlllllh...lP..llGsllY.llhGcp .........................h.hll.lhs.h.lpll.hp.............c.....+ss.........ssphuWllllhh....lP..llG..hllY.lhhGp......... 0 273 568 736 +13239 PF13397 DUF4109 Domain of unknown function (DUF4109) Coggill P pcc Jakhmmer:C7MCW6 Family This is a family of bacterial proteins with several highly conserved characteristic sequence motifs, such as: APR, WxC and ERR. The function is not known. 25.00 25.00 58.90 58.50 21.10 19.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.57 0.72 -3.65 79 618 2010-09-28 12:50:06 2010-09-28 13:50:06 1 1 430 0 167 347 96 101.60 49 89.31 CHANGED RuLRGoRlG......usShEo-+ss-hAPRpplpYhCss.GHphplsFAs...-AEl..PssW-C+.CGt.Aths..cus..sP-sp.tsKPsRTHWDMLhERRS.pELEplLpERLphLR ....RsLRG.plG......uhShEs-Rss-hAPRphlpYhCsN.GHchclsFAs...-A..El..PssW-C..CGh...Aths..cus....sPp..sc..sKP.....RTHWDMLhERRS.tELEtlLsERL-hLR. 0 60 130 159 +13240 PF13398 Peptidase_M50B Peptidase M50B-like Coggill P pcc Jackhmmer:C7M9M6 Family This is a family of bacterial and plant peptidases in the same family as MEROPS:M50B. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.44 0.71 -4.93 100 639 2012-10-03 04:41:15 2010-09-28 13:50:45 1 11 589 0 249 815 96 197.80 26 80.07 CHANGED ll..sh.llA.lhllh.s.....h....lht..........h........+hls....ohhHEhGHAlsAlLoGtclpuIpLpsD........pSGlsho..pG........tGhuhhlsshAGYhusulhGhshhh.hs..t.s..sps...sh...hL.h...lsl.s.l.Llsh.L.lhh.Rshhul...hhlls...huslhhslhhh...sss...ph....s.....hh...shh.....lulhlhl.uult.s....l...........h.-L........h......pt...t.st........pSDAstLAc.lTt............lP..uhhW..ssl.ahhl.ulssl.hhu ........................................hh..hhhsuhhlhhtt..........lhp....h........h........p.h.ls..............shhHEhGHslh....shLsGG.+sc.slhlhs..s.............................................pp..Ghslopu..............thh....u.h..........lo..shu...GYh..h..s.s.l......h.h....hh...h..lh...ss....th.....phs....sh.....hlh...hhl..h.l..hlhh...L.lhs...R.ph...sl.....lslhl......hsh..h..lh.hlhh.......tp........phhh.......hh.....shh.....hhlhlhLstlh.p...................l.........hcL......h...................pp.....ss........p.Du.......stLtc.lot.................lP..hhlashl.ahlhslhsl...h.............................................................................................................. 0 91 170 223 +13241 PF13399 LytR_C LytR cell envelope-related transcriptional attenuator Coggill P pcc Jackhmmer:C7MGV9 Domain This family appears at the C-terminus of members of the LytR_cpsA_psr, Pfam:PF03816, family 22.20 22.20 22.20 22.30 22.10 21.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.95 0.72 -3.61 203 1654 2010-09-28 13:27:52 2010-09-28 14:27:52 1 14 748 0 494 1394 193 91.50 23 24.14 CHANGED sspls..V.pVhNuo.sh......sGLAuplustLps.p.GFs.s...sss..uNs....s......ss.....t.....spop.lh.a....usss.pss.A.t..s.lust..l....s...ss.p.lh.t.s....s...tts..s...s...lplll..Gs-aps .......................tphpV.pVhNuo.sh........sGlAupsustLp.s...t.G...Fss...sss..uNt..s...........ss.....t.....spop.lha.....ssus....pss...A..p...plupt..l........s....ss..s...lt.t..s...........s.......tts....s............s....lplll..Gpsat.................................................. 0 156 375 464 +13242 PF13400 Tad Putative Flp pilus-assembly TadE/G-like Coggill P pcc Jackhmmer:C7ME36 Domain This is an N-terminal domain on a family of putative Flp pilus-assembly proteins. The exact function is not known. The Flp-pilus biogenesis genes include the Tad genes, and some members of this family are putatively assigned as being TadG [1,2]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.38 0.72 -3.93 284 1158 2012-10-01 21:13:59 2010-09-28 16:26:36 1 8 766 0 467 1471 102 51.90 26 14.21 CHANGED G...sss...lhs..lhhlsllh....hhuhsl.D..huphhhs+sclQsu...uDsAALA..u...........A...ts ......Gshs..lhu.lhhlsllh..hsuhul.Dhuphhhs.+sclQsA...ADsAALA.u......A.................................... 0 144 280 372 +13243 PF13401 AAA_22 AAA domain Bateman A agb Jackhmmer:C0A2E2 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.90 0.71 -4.01 168 4268 2012-10-05 12:31:09 2010-09-29 11:23:30 1 160 2155 9 1502 14349 3197 134.70 17 27.70 CHANGED hp........ppshh......hlhGpsGsGKThshpphhpth.................................pspsl...hlpsssss.......ohppl.hpplhptlshthtt..........................sttphh.ptltptlppptt..................llllDEApp.L....s.....tphLchl..ptl........hs...........ppslsllLhGps.plpphl .....................................................tt...tthhhlsGpsGsGKTsh..h....pp...hhp.ph.................................................................phphh..........hlp..ssstt...........ssppl...hp.....t...lh....p..p.l.s...h.thss.........................................................................................sttphh.....ptl...t..p...h...lppppt...................................................hllllD...E.scp..L.........s.................tphL.ptL........ch.l...hs..........................pt.h.t.l..l..lh..G........................................................................................................................................................................ 0 443 929 1274 +13244 PF13402 M60-like Peptidase M60-like family Nakjang S, Hirt RP, Bateman A, Eberhardt R agb Nakjang S, Hirt RP, Domain This family of peptidases contains a zinc metallopeptidase motif (HEXXHX(8,28)E) and possesses mucinase activity [1]. 27.00 27.00 27.50 27.40 26.30 26.90 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.71 0.70 -5.52 67 818 2012-10-03 04:41:15 2010-09-30 13:21:49 1 49 547 1 178 859 44 289.90 27 25.67 CHANGED sshp.sTGlYsssscplslpl.....tsspslplhlusp..scp...........tp.shhtppas....Lssupsplps.shGGllYlh.....ssstst.....slplplsu.uh.sPhahhGppop.p-Wpp.lpphs.uPhsElpscphllTs.sscsl+ph.....sDsptlhchaDchhptts.-lsGhscp..t..pt............+hVsclphs.huhha.uGYslhhsss....shppllsh.stht..ssWG.hHElGHstQps.sapas...u.ssEVTsNlaolhspcthh..sp.tpthp...........tsch.....ppshpalppspsphth................tshhhtLshahQLphhaG .............................................................................................................................................s.shp.sTGlaA.stpplslph.....tssss......s..slpl.tlu...sc.hs.cchtp........th.tRs.sh..s..s.c.p.as..............Lp.s.s....p.s.phps..s.............aGGLlYlh.............sspsst...........ssphoh...su...ssp.....sPa...a........t.........pp......tc.......Wpp...hhps...................s...u.................P.......h.......sELpo-shlhTs.s...p...cs...lps.......................ss...s......p........ph.......h...cp...h.D..p....h...hps.h...s.c........h.hGhsp......t.s.sppp.hp........................+ass-lplu...huhha.o.GY.slhsssh.........s.s.p.sl......s.......h...ps.......l............s............sWh...hHElGHNtp....ps..shphs...............u..sTEVssNlhuLahpc.phh........sphpp.................htsch...phu..p.alp.p.s.sspt.h.....................sshspLhhahQLp.hht.................................................................... 0 67 89 126 +13245 PF13403 Hint_2 Hint domain Bateman A agb Jackhmmer:A3PK18 Domain This domain is found in inteins. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.27 194 984 2012-10-03 10:25:13 2010-10-01 10:01:36 1 80 208 0 187 918 160 135.40 32 24.73 CHANGED sCFssGThItTspG..hsVEsLpsGDhVhTtD.....sG...hpPlpWlGp.pp..l........h.sstsph..t.Pl+Ip..sGALGss.....hPp+DLhVSPpHRlLlss...sphhhuptcVLlsAcpL..........l..s...t.ss..l.p...tt......s...s.p....t.VsYaHlhh-pHcll.hApGs.sESahsGs ....................CFhsGThIpTs.pG.....hsVEsLpsGDhVh..Th.-............su......h..p...s..l..pWlG.p.p.p..h........th.ssst.sh..hP.lRIt..t.s.A.luss..........hPtcDllloPpHplhlp..............th.hlsA+tL..........Vs....u..tu..I.h....pp......s....hs..shsYaHl.h-..pHpll.hA-.Gh.sESahss.s.................................................................................... 0 22 148 169 +13246 PF13404 HTH_AsnC-type AsnC-type helix-turn-helix domain Bateman A agb Jackhmmer:A3PS88 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.22 0.72 -7.78 0.72 -4.42 347 5527 2012-10-04 14:01:12 2010-10-01 11:47:45 1 33 2295 47 1669 9314 1799 42.00 35 25.97 CHANGED l.DclDpcILphLppc...uRhohpcluc..plGLSsssstcRlc+Lc .........lDclD+pILphLpp-....uR.h.s.h.s.cLAc..plG.....lSsssltpRlc+Lc.................. 0 419 1008 1383 +13247 PF13405 EF-hand_6 EF_hand_4; EF-hand domain Bateman A agb Jackhmmer:A3PLV3 Domain \N 25.00 14.90 25.00 14.90 24.90 14.80 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.69 0.74 -7.96 0.74 -3.58 793 5737 2012-10-02 16:17:27 2010-10-01 13:55:03 1 526 696 127 3421 36588 2275 32.40 24 8.28 CHANGED plp..phFphh.D.ps...psG..hls.h.p-l.pp....hlp..............t.hu .................hp.psFpha.D.p-........tsG......tls.h.pEl.pp....hhp................................................ 0 1356 1922 2697 +13248 PF13406 SLT_2 Transglycosylase SLT domain Bateman A agb Jackhmmer:A3PHY1 Domain This family is related to the SLT domain Pfam:PF01464. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null --hand HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.63 0.70 -5.48 176 3080 2012-10-03 00:09:25 2010-10-01 15:54:35 1 19 1866 9 798 2548 2044 255.30 32 69.52 CHANGED sapshltsh.pppAtppGlspsslpps....hsss.php...........tpllcht.......ppQs....Ehs.p..s......htcYhsth.ls.spRlppGpphh.ppatshLsclEpcYGVssplllAlWGlETsaGp.hhG..s....hsllcuLATLAa.-..t...........................RRssaF....cpELhssLpllccsp.lss.................pp.hpGSaAGAhGpsQFhPooahpYAV...DhDGDG+tDlass..ssDAluSsANYL.ppp.GWpsG.pshuhplpls..............tshsh.ht...s...hpsh............spht.thGlp.hsst..........s.......thts..pL...tutpG.........ss..aLshpNF.hlIt+YNpSshYAhuVutLA- ................................................................................................s..t..h.tth.pt.stttGhsttthpth..h...sts..ph...................tllph.........................ppQs.........thp..........s.............hhpYhpph...ls...tpl.pp..Ghthh.ppatshLp.chtppYGVssphllulhGlEotaGp...hhG..p....hpl.l....suLuTLua.s.h.............................RRtpaF....ptcLhshLhhhp.pt.p.hs.s...................p..h.pG..S..aAG...AhG.sQF............hPooatpYAV...DhcGDG+h..Dlass..ssDAluSsANYL.....ppp....G..WppG.tshuh.sp.h........tshp......t......h....h...........................................................s....tst..u.lhh.tsttG............t...ahsh.NF..slhcaNp.....S.hYAhulh.Lu............................................................................................ 0 186 454 620 +13249 PF13407 Peripla_BP_4 Periplasmic binding protein domain Bateman A agb Jackhmmer:A3PPI0 Family This domain is found in a variety of bacterial periplasmic binding proteins. 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.69 0.70 -5.03 173 12924 2012-10-02 13:57:41 2010-10-01 18:32:26 1 44 2863 86 3019 24092 3112 255.70 20 75.86 CHANGED lu.....lsh..stsss.ahpt....htpuhpptspp.hshphhh............ts..upsss.spQhp.lpsh.lsps........sc..ull..lssscs..suls.sslcp.ApstGI.s.Vls........aDss...........ssstphh..........hluh-stph...Gphtu................................chlscthssp............uplsll....sGsss.ssstp...tt....hpGh.pps.....lpp..................hs.plplhs...............................s.s.hssspuppt.hpslLs...ss.........l........sul.hu..sssshsh...........u....shpulpp.tuh..tsp..h...........lsGhDssstth.phl.............psGphp..s...slhpssh...th..uttuhphsh.ph..hp......Gcp ...............................................................................................................................................sh.h..thsss..ahst......hppu...h....p.p....t...u....c....p...huh.phhh.....................supsc....sp..Q....h..p.t...l.csh..l..s.ps............sc.....u.l.l........l..s.....s...s....c.s.........s.u..h...s.....s.....s..l...c...c.....A...p.........p.....t....s.....I....s...V.l.s........................hDpt...............hsp.s.t.h.s.s......................hl...u...s...D...s...h...p..s....G..ph..tu.........................................................................................c.hl.s..c.phstp...............................................uplshl....................t.G..ps..s...s....s.s.sp............tR.............tpG.h...tps...............lpp....................................s...sh.c.lls......................................................p......ss.s...h.s..t...s..p.u....h...ph....h...p.sh.Lp.................sp.s..............pl.......................................................s.ul..hu..ts..Dshuh....................G.........Al.p.A...lc...s...tuh.......ssp.hh................ls.Gh.D...u...s...s...p...s...h...pt.l.........................p.s..G.p..hs............s......o.lh.p.sst....th.u.t.h.uhp.hsh.ph...hp..s............................................................................................................................................................ 0 815 1763 2389 +13250 PF13408 Zn_ribbon_recom Recombinase zinc beta ribbon domain Bateman A agb Jackhmmer:A3PQ69 Domain This short bacterial protein contains a zinc ribbon domain that is likely to be DNA-binding. This domain is found in site specific recombinase proteins. This family appears most closely related to Pfam:PF04606. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.33 0.72 -3.71 726 3758 2012-10-03 10:42:43 2010-10-01 18:35:27 1 37 1459 0 677 3269 314 62.10 23 12.28 CHANGED hL..s..G..l..lhCup.......CG....p...s.......h.....tt..........p.....p....pp........tpt.....................hh..Y....tC.s....st...h..pt.t...................p.....Css..p......plpt.ptl-...phl...hpt...lp ...............hsG..l..lhCup........CG......s.s.......h..tt..........p....p.....pp............ppp...................th..Y..tCs....pt.....t..pts........................p...Cstp......plpt.chl-...phlhptl........................................................ 0 325 545 625 +13251 PF13409 GST_N_2 Glutathione S-transferase, N-terminal domain Bateman A agb Jackhmmer:A3PFR8 Domain This family is closely related to Pfam:PF02798. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.50 0.72 -3.98 315 3673 2012-10-03 14:45:55 2010-10-04 11:15:34 1 29 1984 30 1074 16648 5196 85.80 29 29.54 CHANGED hs.Pas..p+shlh.Lpht.....ul...s.hp.....h..p.h................l............t.h..................p...th.......................................................h.p.hs......P..t..s..................................................p.....lPsLhh.............................t...sup.............sls..-S....tsIhc.aL.ppt ......................................................sPhu..p+shIh..hc.h...........GL.....-...h.....l..sh.................s.........hh..htps.....................ph..u.ss.F....................................................................................................s.c.hs......s..p..s...................................................p....VPsLhD...................................c...psp...............llss...ES.up.Ilphlsp.................................................................... 0 300 612 885 +13252 PF13410 GST_C_2 Glutathione S-transferase, C-terminal domain Bateman A agb Jackhmmer:A3PFR8 Domain This domain is closely related to Pfam:PF00043. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.27 0.72 -4.08 279 7419 2012-10-03 01:14:49 2010-10-04 11:42:28 1 51 2398 100 2549 11894 3037 85.10 19 31.76 CHANGED pth..hpp................................................h.htp.....ltpsls..tl........cp.p.hs.......................................pt.............hh.Gs.......phohuD..hsl.h...s....hl...t.h.h......phh.....hh...t.h.......................thh.......p....t.h..spltsah.cp ...............................................................................................................tt................................hhtp.....hhptLs....hL.........-p.t..Lu.............................................................................................ppt............als.Gc.....phohAD......htl..h...s.........hl.....h.p...h........phs........hh...t.h................................hh........t.......s.h....spl.tahp.t........................................................ 0 615 1327 1986 +13253 PF13411 MerR_1 MerR HTH family regulatory protein Coggill P pcc Jackhmmer:C2D3E8 Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.89 0.72 -4.20 214 18606 2012-10-04 14:01:12 2010-10-04 13:16:52 1 99 4180 5 3931 16622 3391 68.70 27 35.72 CHANGED hsls-luchhsl........stpslRaaccchh...ltssc....pspsh.+hYsppclpplphIppLhp.pGholptlpchlptt ..............................hpIu-luch.s..u..l..................osco...LRaY.....-.c..pGL.....lp.Ptc..........spsGh...RhY......s.p...p-lppL.phIpphpp..hGh.sl.p.p.lpphlt..h...................................... 0 1233 2583 3343 +13254 PF13412 HTH_24 Winged helix-turn-helix DNA-binding Coggill P pcc Jackhmmer:B0SAM5 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -7.77 0.72 -4.57 608 12234 2012-10-04 14:01:12 2010-10-04 13:54:13 1 121 3550 54 3360 26657 3880 46.50 30 21.83 CHANGED hDphDhcIL.phLpc..su+.lotp-LAcplu..lSsssstpRl++LccpGlIp ........................hsph-hpIL..ph.Lpc........suc..l..otp...-L....Acplu......LSsssstcRl+cLccpGlIp................ 0 820 1884 2697 +13255 PF13413 HTH_25 Helix-turn-helix domain Bateman A agb Jackhmmer:Q87B38 Domain This domain is a helix-turn-helix domain that probably binds to DNA. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.74 0.72 -4.33 195 2987 2012-10-04 14:01:12 2010-10-04 14:51:59 1 16 2861 3 621 3650 1038 61.70 32 21.10 CHANGED hL+psRpppulolcclupph+lphphlpAlEpschs.tLP....ss....sas+Gal+sYAchLG.L..Ds...p...tl ........L+puRcphGlSlpplspchplphphlpulEpscas..tlP....us....sasRGalRpYAchlsl.-spp............................................ 0 195 382 519 +13256 PF13414 TPR_11 TPR repeat Coggill P pcc Jackhmmer:B0SDR7 Repeat \N 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.52 0.72 -4.33 458 62040 2012-10-11 20:01:03 2010-10-04 16:34:37 1 8099 4563 105 30435 78055 23252 67.00 19 21.46 CHANGED p.p...u..t..t..h.pphGpt.hh.p....p.t..c..hppAlptappu.l...p...h......s.......s.s...s....sp..........ha.hs..h...uts.hh..ph....s......p..p..hp....p.A....hpt..h..p.p.u....l.p......l.s..s ...................................................h.hth.G.h..hh..p...................t...s.......c.......a...p.p.A..l....p...t....a...p.c......A..l..................p.............l................s...................................P..p.......p........sp...............................................sa..hs.........h.........uhs..h.h.......p.h........................s..............................p....hp..............p.A..........hpt..h..p.p.A..l.p.ht............................................... 0 11553 19108 25290 +13257 PF13415 Kelch_3 Galactose oxidase, central domain Coggill P pcc Jackhmmer:B0SIN7 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.56 0.72 -3.69 122 3739 2012-10-05 17:30:43 2010-10-04 16:49:49 1 745 564 0 2451 13422 417 52.60 26 10.03 CHANGED ss..tlhlhGG..hs.........t..tpth....ssh.h..hh....sh....sss........pap......ph........ss...h....ss.....sRtsassshh ..........................................splhlFGG......hs.......................t....tphh....................sDl..a....hh.......cl...........sst.......................................pWp.............pl..........................tus........h...........Ps...........sRssHsssh.h.............................. 0 922 1445 1978 +13258 PF13416 SBP_bac_8 Bacterial extracellular solute-binding protein Bateman A agb Jackhmmer:A3PHX7 Family This family includes bacterial extracellular solute-binding proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.25 0.70 -4.67 141 17239 2012-10-03 15:33:52 2010-10-04 17:51:20 1 26 3652 65 4107 24014 7960 298.20 16 73.07 CHANGED phscpa.p....c.....p....s..s.......l.p..lp.htp..h..ss..s.-..h..spl...pst....ht..s...s..s..h..ss...h...sl...hhhs.sst..h.tphsp....ps...hlh........-l..s.............ph...t.phsp.........................................h..ss......hhpsh.......sh..cu.........hu..hP.hshs.s.sh..s................hhYspshh.........p.c...............s...........ss...poh.s.......sl......h.......c.....hp.......t..h................t......Gp.h..........................sh............h........p....s...s.....h.s.......hh...........hh.t.hhs........................sGt.t.h..sps.........stsh.......................sps.hphh.pp...ht....s..p..h.....h........hh................s.s..s...s...s..shs..ths......sG..c..sshhh......s....us....hs...hs...s.hp..................pt..s........t......s.hs.....h.........s......h.........s................p...........p.......u..........s.hh.....ss.p...s......hs..l....st....s...ss.pp...ph...tuhcF.l..pah.s.s.s-.spt....p....hhp....ttuh..h.P.sppss..t..t......sst....hp.......ps......st..h.s ..........................................................................................................................hhppF.p....c......p.s...s..........l.c..V..p...hph.......h....st......s...p...............h..h.....t..pl.........pst...............h.t....s..........G...s.......s....s.s......h.....s..l............hh.s.s.....s.s.h.......h....s..p...hhp..................ps.....hlt.......sl.s.............ph...s..phpp......................................................l..ss..........hhp.sh................sh...sG.p..............................hau........l..P.....h.....h..h.s....s..........h....s..............................................l.h..Y..N..p..c..h..h...................c.p....................................t..........................ss....p..o...W...s........-l..................h......p...........t.............ch.........................p..........sp.h......................................sh..............h.........s.....s.s.....p..t...........hh............th..h..hht............................................tGh.s.h..sps................sstph..................................tps..h.p..h..l...pp.......ht....s...p...h........t..........h.h...............................s..s..s.....s......p....h.....ts.......hh..t.............s...G.....c........s.s...h.sh............s..........hs.......hs....hs.....t.hp........................................pp...s.....h...........s..hs.......h...........s........h.........P................................................p.........p........u.............s.h.h.......ts..s....s........hs.....l.........s..p.......s.....up....sp........c.......Ahc.F.l........sal..h..s.s.....-...stt.........p......hsp........thuh...h..s..s.p.p..s.s.h....t..................................hh............................................................................................................................................................................................. 0 1273 2551 3313 +13259 PF13417 GST_N_3 Glutathione S-transferase, N-terminal domain Coggill P pcc Jackhmmer:B0SCP7 Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.38 0.72 -3.75 216 16191 2012-10-03 14:45:55 2010-10-05 09:52:41 1 91 2388 143 5495 19464 7212 77.70 24 34.58 CHANGED La..s..h.t.SPas..p+lph.sLphps....l....s.....hc..h.....h......h..s.......sh.....p...p.......s..t..h........hth..s....shs...plPsL......h.p.s.....up.......slt....-S.t.sIh.ca.L...-pths.........ss..s ...................................................h...h..ss......hs......t..+....l..pl....s...L..pt...p...u............l........s...................ac...h........h...................tl....s...........................ht..........p....p............ss......p..h...........................hph..N...........P.ts..........p.VP.sL..........h..c..s.......st.............................slh.......ES..t...sI.hcY.L.scpas....t..................................................... 0 1291 2807 4225 +13260 PF13418 Kelch_4 Galactose oxidase, central domain Coggill P pcc Jackhmmer:B0SIN9 Repeat \N 21.60 19.00 21.60 19.00 21.50 18.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.49 0.72 -4.19 84 4909 2012-10-05 17:30:43 2010-10-06 13:08:44 1 961 757 2 3302 16453 618 51.00 22 10.48 CHANGED sRhtts.ssh.h...ss..s..p.lhlhG.Gpsss.s........thhs-h.a..h..a....sh..ps..s..p..W........ppl.......s..sh..Ps ..................................RhsHo.ssh.l..........p......s..........p..lh.la....G..G....h...s..s..s..t.....................................ph..h...s...D...l...a.......h........a........ch.....ps........p........p.........W.........p.h....................t............................. 0 1424 2079 2760 +13261 PF13419 HAD_2 Haloacid dehalogenase-like hydrolase Coggill P pcc Jackhmmer:B0SBQ3 Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.05 0.71 -4.05 169 33740 2012-10-03 04:19:28 2010-10-06 14:29:26 1 189 4905 123 8845 29816 9459 180.80 18 75.44 CHANGED llFDlssTL..h...s..hph........hhhpth.......p....p..........hh..hcph.......sh..s...........hp.tpp.....hcp.......h.......h.......thsh..t.....ph...................ltp.lh.....pph....s.hp..................st..ht....p.thpph.............th.....th..t...h.hss.ssp.hlppL.p...p...thpl.hlh.oss..s..t..tt.hp.t...hhpp..h....sh.....thac...tlh.su...p..p..ht.h..h...KPpsts.apthhpph.....sh.ss...pp....h....lhlsDsh.psl..ts.Ap.shGhps.l.hl ......................................................................................................................................................................llFDhDGT.L....h.......D.....st...................hh.h.ps.h..........p.p........................................h....h.pp.h...................sh.....p...................hs....pp............htp..................h..............h......................s.hs.h..t................ph.....................................................................hp..t...h...h......t.t.h.....s..hp......................................................htt..hp.....p....hh.tph....................................h.....pph......p......h..h..s...s.....s...h....c...hL.p...p...L...c..............tp.........s..h..p..l....s..l..s....oss...............s................p............ph...hp...t..............hl.p.p......h..............................slt........ph.F.......c............tl.h...su................p.....p........ht...t....t........................K.P.p.......P..c..h.......a....h......t......s....h.p.ph.....................sl...ss........pc.....................s..................lhl.s.Dsh.ssl..tu..Ap.su.Ghps.hh....................................................................................... 0 2568 5341 7324 +13262 PF13420 Acetyltransf_4 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:B0SAL8 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.84 0.71 -4.27 21 5238 2012-10-02 22:59:21 2010-10-06 17:18:10 1 36 2675 35 970 25638 3580 151.00 22 79.81 CHANGED IRhlppcDht.tlhplh..pp...hhpt..th.s.....hth...sshphhc.chlpph.....p.....s..phhahlhc..sspllGhsplp..th...ch...pppps.lp.hhhh..tpspcpslspc...lhstlh.pah.cphslcplhssl.sss.......Nh....s........uhhFapphGFchhGh.+ssthhts.cahDhhW .............................................................................................................lR.hp..tDh....t...t........l.....h.....t.....lh.........sp....h....h.........pp.......th...s................h..p.........................s...h...p.......p....h.....p.......p.....h......h.....c.....p...h.............p...................tp.........p.h....s...h.....h....V........t..........c...................s..........s..........p.........l....l.....G.....a..u...s.hs......sh..................cth...................shca.....s.....s.....E....h.........u......l......a.....lp........ssh...p..G..+..G..l.Gpp.........L.l.p.t.ll..ph...A..........c..........p......t.....s.......h....c...p.l.h..s..s.l...hsp................Np........s...............................ul.p.L.a....c.p....h....G.....F...p.h.h.G.t.h.p..p.s.t....h..h......s...p.ahD...h................................................................ 0 272 584 802 +13263 PF13421 Band_7_1 SPFH domain-Band 7 family Coggill P pcc Jackhmmer:C1ZGV5 Domain \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.13 0.70 -4.82 24 734 2012-10-01 22:02:33 2010-10-06 18:16:45 1 19 672 0 189 878 126 216.40 26 57.04 CHANGED VWRF.RasNEIKhGApLsVREGQsAVFVpEGQlAD.VFsPGpYpLpTpNlPlLoTLpuWcaGFpSPFKuEVYFVNTppFssLK..........WGTpNPlhlRDPEFG.PVRlRAFGoYuhRV.sDPupFlpEIVGTDGpFTs-EIstpLRslIVpc....FuchlupotIPlLDhAANhs-Luchltptlssch.scYGLslschhlENISLPp...EVEcALDKRoSMGl ...................................................................................................t.p..IppGotlhVtpuQhslhV...pp....Gpl.h.D...s........h.p.sGh..Ys........lp..o.s..........s............h.............P..............h.........l..............p..o..............l............p.p............a........p.......aG.h.p..o....Ph........c.pcVaFlNhpchhshK..........a.GTs..s.P..l.....ht.....Ds....pa......s...................ltlRA..aGsYoh+l...sD.........P.h...h.F....h..s..p......l...l........G....s...p......s...p...a.s...h....s-........l..........p....p.....pl.pu.........llst................lp...s...s....l....s....p.....p...s...h.....s....h..h...pl...s..u...p.t.t-l.uphl...pp...t.l.s..tp.a...pph..Glplsshtltul.oh.s-......-spchlpph................................................................................................................ 1 75 138 165 +13264 PF13422 DUF4110 Domain of unknown function (DUF4110) Coggill P pcc Pfam-B_8504 (release 24.0) Family This is a family that is found predominantly at the C-terminus of Kelch-containing proteins. However, the exact function of this region is not known. 27.00 27.00 30.90 29.60 26.90 25.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.27 0.72 -4.15 56 186 2010-10-07 10:44:02 2010-10-07 11:44:02 1 38 161 0 138 184 0 96.20 32 14.76 CHANGED tpsssp-shPpP+.PFEoLR-FasRTuspWtphhhsphc....tt............ssK.................EL+KcuFcLuE-+aW-pR-tlptl...E-ppE-.uGlt-llt.tpct-ss.sss+p..........R .....................................t...shpDshPpPc...PhEoLR-FasRTstpWtphhhsphc.t...........hssK..................EL++cuF-LAE-+aa-tRctlptL...E-ppE-.uuhtEssshspptp........pt........................ 0 37 82 120 +13265 PF13423 UCH_1 Ubiquitin carboxyl-terminal hydrolase Coggill P pcc manual Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.12 0.70 -4.98 21 393 2012-10-10 12:56:15 2010-10-07 16:51:41 1 11 243 0 279 8427 287 334.80 20 34.06 CHANGED sGLEs.phs.ssYhsuLLQhhah.p.shhphlltps.................p.hcpsLhsELuhlasMLc.s...ss.uhssQsoNhLp.shuth......p............Auslst...............................ppslpp...sppFllcplphslhsh...............hhs.........................................................................................................hpsplsp.hhshph.ppt+s...hpsps.....pp...+.s...shhshslshss.......psp.......................................thshsshLcp..hpppp.pp...hhsppspph..ps.........................................................................................................................................................sp.shphpphP.lLsIshthts.p....h...s..hphsp........................................................hltlPhp..hslsph...............................htp-..tppt........shssphhYpLpulVCc..lss.shp.psH.hVShlRVs.......tp..................pc..................WalFND....ahl .............................................................................................................................................................................................................................................................................................................................................................GhEs..phs..ssY..hsu......l.....LQ..............h.h........aa....h..............h.......h...p....h...h....l...p..ch...........................................................s.....h....c.........s...L....h.....s.......E......L......u...........a..........l....a....p.....M...L...c...t......................up.....s.........s....s...p....s.....o.......N.....h.....l......p....s....h.........p....p.h.............p.........................sss.lth.................................................................................tphhpp....hp.p.F.l...lp.p.l.p............p....h.h..ph..................t.....................................................................................................................................................................................................................................................................................................................................................................t...............l......t.......p....h.......h...s..h..t..h......t...........p..ps..............p.stp..................pp......hp..s.......t.h...h.....h..s.....ls..hss..............psp..................................................................................................................................................................................................................................................................th..s..h..t....p..h...L....c..ps......h.p........p...p...psp...............shCp...p..C....p...ch.....p.s....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hp..p.p..p...l...p.....p.........h....P....s...l....L....s..l.s..h.ph........s.p...........phh..........hp...t............................................................................................................................................................................................................................................................................................th...ls......hp....hp..h...h.h......................................................................................................................................................................................................................................................................................h.tp........t.t.......................t..s..h..h...Y..pL..u.hV....sc....lts....p..hp.....t.sp.h.........V.u....h...l.+..ss...........p.p.................................................................pp........................................................................W.hhFND.....hhl.................................................................................................................................................................................................................................................................................................................................................................................... 0 88 148 232 +13266 PF13424 TPR_12 Tetratricopeptide repeat Coggill P pcc Jackhmmer:B0SDL4 Repeat \N 32.60 32.60 32.60 32.60 32.50 32.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.70 0.72 -4.04 266 22703 2012-10-11 20:01:03 2010-10-08 09:43:34 1 4096 2284 44 13792 31171 6927 76.00 21 24.33 CHANGED p....p...h...s....p.s.h..t...pl.u.thht.phschppAhphhppulph..hp.t..h..u.p...p..p...ph.s.ts..h.tsluphhtt.hs..c..h..pp.Ahphhpculpht...pp .................................................h..ts.h..s........sl.u.t.sht..phG.c......a......c...cAhp..ha.p.cuLpl...tp........h.......G..p............p........p..s..p.......h....u.ps.........h..ssLuts.a.....tp..hG.......c...........h......cc..Ah.p..hapcuLph...t............................... 0 7608 10139 12329 +13267 PF13425 O-antigen_lig O-antigen ligase like membrane protein Coggill P pcc Jackhmmer:C1ZFT0 Family \N 33.00 33.00 33.00 33.00 32.90 32.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.27 0.71 -4.20 326 5848 2012-10-01 22:04:45 2010-10-08 16:03:28 1 44 2998 0 1467 5882 1859 156.00 16 35.08 CHANGED +.h.......h.......h..h.......hs.hhhh..hhslllotoRs.u....hlshhlsh.......hh..hhh.................ht..h.ph+.............hhlh.........hsh..ls..lhslh.......hhh......................hh.tt..l..............................................p...ps..s.o.hss..Rh.p..........h.h......p..thh...p.hh........tp....t........s...h....hG.....hG.......h.s..sht.........................th...............thhh.s.....csh.............a...hp...hhhp.....hGllGhlh...hhshh .................................................................h...hhhh..hhhh..hh.s.l.h.h..o.t.S..R.s..u......hluhh.lsh.......hl....hhh.................................................ht.tph+......................hhlh.........hsh..ls..lsslh.......hhh........................thht.h....................................................................................................................tt..ps.s.ohss....Rh.t..........h.a.............p.tuh..p.hh.............tp....p........P....h.......hG.......hG.........hsshh...............................................................hhhtsHNh..................a.lp...hhhp.....hGllGhllhlhh.h.................................... 0 494 986 1232 +13268 PF13426 PAS_9 PAS domain Bateman A agb Jackhmmer:A3PI49 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -10.17 0.72 -3.65 245 26423 2012-10-04 01:10:46 2010-10-08 17:13:29 1 3737 3198 105 10645 42862 3384 103.90 18 17.69 CHANGED ssu...lh..lhst....p................splhhsNpsh...tphh...Gh..s...........t.p.....cl..h..u..p.s................h.p...t..lh............................s....tt.....t...........p...t.p..h.h......pp...........ltp..th.p..............ptp....t....h.....p.h..c.hhh...h...p...p.s.......................G....p......h..hhtlphtsl...........hs.p..p..sphh........th.ls..hhp....D...loc ....................................................ulh.hh-.......c..........Gpl.h.h..s.N...p..s..h.........p.c.hh....Ga..s..............t..c......E.l....l....G...p...s................................h..p..........l.h.............................................................................................................s.....sp......t....................t....t...t...h..h...........pp..................h.h..p....tl..p..........................ps.p.........t......h.........p.t.....-....h..th.....h............p......+..s..................................................G.......ph...........h...hh..p..h...s..h.s..s.l..........................................hs..p..p.......G.p..h.h...............th..ls...hh.p..Dlo.................................................................................................................................... 0 3685 7467 9280 +13269 PF13427 DUF4111 Domain of unknown function (DUF4111) Coggill P pcc Jackhmmer:Q2YCQ2 Domain Although the exact function of this domain is not known it frequently appears downstream of the family, Nucleotidyltransferase, Pfam:PF01909. It is also found in species associated with methicillin-resistant bacteria. 22.20 22.20 22.30 22.40 21.60 21.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.28 0.72 -4.20 35 1222 2010-10-11 10:57:40 2010-10-11 11:57:40 1 5 685 0 86 694 11 106.30 45 40.45 CHANGED Ap-lFsPVPpsDhhcA.lp-olspW......sDhpGDEpN.......l...lLsLuRlWaolpTGpIuuKDtAApWslpRLP.scapslLptAppuYLGptp-shst.psppltsFlpah+ppI ...................................A-clFsPVPcpDLhcA.Lp-oLshWp.......u.sDas...GDE.....RN..................V...VLTLuRIWYoss..T..G......+IusKDVAA-WAhc..R..LP.spa.psllhcA+p.uY.LGp.c.-Dchts...pssplpcFl+asKscI............................................ 0 28 51 68 +13270 PF13428 TPR_14 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2Y691 Repeat \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -8.26 0.74 -8.82 0.74 -2.80 819 700 2012-10-11 20:01:03 2010-10-11 12:02:20 1 232 562 0 291 7605 2621 43.80 22 8.89 CHANGED h.p.h..h..h..t..h.u..p.h..h..h..p..t..u..c...h..p.p...Ahp.h.hpp..h...h..p....t..t..s...s..s...s..p...hh...hth.up..h ....................h..hhh.tl.Aps..h...h.p...t..G...p.hcc...Ahphhcps...Lp.....t.s..P...s.s.......p..uh...htLu..................... 0 121 209 262 +13271 PF13429 TPR_15 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2Y889 Repeat \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.81 0.70 -5.34 27 995 2012-10-11 20:01:04 2010-10-11 14:33:27 1 217 880 3 284 2894 1122 173.90 17 26.66 CHANGED +l...t.chts...p..ps.h.th..AshhasptchcpALplL.ssscpts.ss-s.....taWRshApLAhsLpc-DtuptAYcpLLssstA.pssDhsphlth...h-usPhcAtclu.huac+s+ssptLppAl.hhpptcsapcltsLLts..hp..t..tst....Ap...pSshhhssRAchhcppGpspsAh+.h+cAlshssusspl+usLLWhLlDtGppsp..h+.hLstacsph.....pDusLWtsaAuAhLhLscsspALpahpp.ptpptppD.LhhhuYADAh-huGpt-tAhplRRpsa+pLpc ............................................................................................................................................tttt.................................................................................................................................................................................................................................................................................................................................................h............h.h........h....h.....p..h....p.......p....h..t....p.......p......h...hpp......h........p.....t.t..............................ht.............s..s.......h...a....h....t.....h...A...p...l.h.t.p.h.s..p..h..st.A...h..pshcp....A..Lc..l...p...P.....s.....s....s....p....h........t.......s.....h....s.....h....h....L...h.....p....p...t..p..h...tp.........h..t..h..l...p...t...hh...pt..............................ss..s..t....h....h..........t....h......u....h......s...........t..h...t..p...........t..s......th...hp.......................................................................................................hhht.......................................................................................................... 0 84 161 223 +13272 PF13430 DUF4112 Domain of unknown function (DUF4112) Coggill P pcc Jackhmmer:C1Z9H4 Family This family has several highly conserved GD sequence-motifs of unknown function. The family is found in bacteria, archaea and fungi. 21.90 21.90 21.90 24.50 21.30 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.20 0.72 -4.12 134 487 2010-10-11 14:44:25 2010-10-11 15:44:25 1 7 424 0 240 486 28 104.40 32 53.33 CHANGED ptl.p+lc+hAhhhD.ps.h.....plsGh..thRhGhD..u...llGL.l..Ps.l...GDhlshhhuhhll.h.pA.pcl....GlPppllh+MhhNlslDhll........G.hlPl..lGDlhDhhaKuNpRNhtLLccaLp ........................t..hp+lcphAhhhDpsh.....plssh...th+h..Gh-..u...l...lGL.l..Ps.l...GDhlshhluhhl.l.h.pA.pph....GlPtplhh+MlhNlslD..hll........G.hlPl..lGDlh.......DhhaKuNpRNhtllcpal....... 1 66 144 206 +13273 PF13431 TPR_17 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2YD44 Repeat \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.62 0.74 -7.98 0.74 -3.31 374 768 2012-10-11 20:01:04 2010-10-11 16:05:33 1 146 199 0 464 10633 7068 36.70 39 7.58 CHANGED hhpculph..s..Ps.s...spshh..plu..h.h.h.t.........pp.u...p..ht...pA.tp .................h.KSlEs..D........Ps.s.......upSaa...pLu.p.s.h.S.......................st.G......p.s..st.............................. 0 367 402 433 +13274 PF13432 TPR_16 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2Y5H2 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.35 0.72 -3.19 121 9987 2012-10-11 20:01:04 2010-10-11 16:24:43 1 2573 2894 10 3726 30807 9308 65.00 20 12.94 CHANGED hthApthhptuc.hppAtphh.pphhptt...st....sspshhhhupshh.pt...sphspAh.thhpthh......ptsstss ..................................hu.thhptsc.hspAhphh.ppslptp.................Pp...............sspAhhhL..G.p..shh..pp.................G.chspAh.phhppsl.........phtPt...................................... 0 1425 2531 3216 +13275 PF13433 Peripla_BP_5 Periplasmic binding protein domain Coggill P pcc Jackhmmer:Q2Y8R1 Domain This domain is found in a variety of bacterial periplasmic binding proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.90 0.70 -5.71 64 1016 2012-10-02 13:57:41 2010-10-13 14:55:12 1 18 666 4 355 8653 3387 355.80 48 85.60 CHANGED IKVGlLaShoGohAloEpolh-sshhAIcEINssG........Glh....G..+..plEsVlhDPuSDhshaucpAccLls.p-+VsslFGCaTSsSRKuVLPVhEctsuLLF..YPspYEG...EsScNlhYT.GAuPNQQslPhlcaLhsphG.........cRhaLlGSDYVYPRpsN+Il+s.h.Lppp..Gu..c..s..lu..EpYhPl..Gp...o..-..apsllscI+....ph....t....Ps....slhSTlsGDuslsFY+thtst.....G.....ls.sp.c.hPlhuhSluEpElpul...ssp....shsGHhuAhsYFpSlcoPpNcpFlppa.+ph..a........G.s.sp..lsssshEA.uYhtlphWspAVcpA....G.os..-s...ctV+pAl.hGp....phsA...PpGt.l.p.lcs..NpHhthsstIGclpsDGQF-..llacoptsltPcPasshhst ......................................................................................................................................................................................IKVG.lLHS..L.S..G.T..M.A..I.S.E..o..s.l....p.D.sthhs...I......-....-...I..N...s...p..G.................G...V.L..........G.....+......p...l..E..s...V..l..h..D......P..A..S......s.W..P.h.F.........A..E...K.A....+pL..l..s......p.D.+.V......us.VF......G.......C.W.T.......S....s.S......R........K....u..V..L..P..V..a...E...c....h......N......u........L...L....a...............Y....P......V......Q.........Y...........E.........G..............E............S........p..........N.......V......F.......Y....T......G...........A........u........P....N.....Q.....Q....u....l......P......A........l...-.......Y....L......h...p...c..p..G..................s+..+...a.a...L...l...G...o...D....Y....V...a...P....R.T.....o.....N...+.I.l...+.s...a...L...c...sc........Gs...............c..s.....l........t.....E....s.....Y.....s.P....h.......Gp........o..D..a....p....o....Il.u....c....IK..ph.........t..............s.......sV...l..S....T..l.N..G..D....S.N.V....s....F....Y....K.p....L....t....st........G.....l.p.....u....s.....c...h....P.....V...l....u....h......S.....V......u.....E....E....E...l...p..G..I.......s....s.c.............sL..s.G...a......l.......u......A.....W.......N....Y.....F.......p.......S.......l......-..............s........P.......s......N.......c.....c.......F...l...p...p....a.....+sh....h......................G...s.....cp......VT......s....D.....P.....h....E..A...s...Y...l..u...h....a...h....W......t......p.A...VE.K.A.....................G....os........Dl..........D....c....V.pt..A.h.....h..G............p..h...s.A...........P....p....Gh...l.....p.....h...c......t...........N.....H.H...l.....p..K...s..s.h...I...G....c.l..p.s..D.G......Q...Fp...lV....a.p..o..s.pslcs.pPassh................................................................................................................................................................................................................................................................................ 0 75 185 265 +13276 PF13434 K_oxygenase L_oxygenase; L-lysine 6-monooxygenase (NADPH-requiring) Coggill P pcc Jackhmmer:Q2Y7Z9 Family This is family of Rossmann fold oxidoreductases that catalyses the NADPH-dependent hydroxylation of lysine at the N6 position, EC:1.14.13.59. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.53 69 1152 2012-10-10 17:06:42 2010-10-13 16:42:34 1 13 829 4 434 10129 2446 319.20 29 71.40 CHANGED claDllGIGlGPhNLuLAsLhcphs.........slsshFl-ppspFsWHsGMhLssuphQssFLpDLVThssPTS.aSFLNYL+p+.s..RLapFh.tcsahssRpEascYhpWsAspls.s...lpFuppVpslphsp...p...t........h..hpVps.....ts...ppp...phhu+plllGsG.ssPhlPss.hpsh......splhHoo.cY.Lpphsp.........tss+plsVlGuGQSAAEIah-LLpc..hs..s...hplsWloRsssahPhDposhspE.h.FoP-Yl-aFasLsp.ppRppllpppcthsasGlstsLlppIYchLYp.p.clp..u.....p....p.hpLhsppplpshpps...s...tt...shpLshcptt.ps.pp...shcsDsllLATGYch ........................................................................................................................hDhlGlGhGPh......NLul..Ashhppht......................th.pshFh-..p..p.......s......p......F..s...W...H.....s..G.M....hls...s.s..p..h.Qs.s.F.....l.pD........L.............V......o..ht.....s.Ps......s...a...SF.....l....N....YL..tp.p...s.....R.l...h...p....F.h......h........c.....p.....h......h........ssR.....p.....E.a....s......-..Y.h....p.....W.....s..A..p...p..h.....s....s..................lp.....a...u..p.p......V...p....s....lc...hst.....................p...................p........th..................ap....V.p......s.......................t.....................p.....p..p...............p..h....h..u....R........p......l..l..l.....G..s.......G.......s.....p.....P....h........l....P......p..s.....h...p...t..h.....................t...p....l....h.....H...u...o.....p...a.......ht.p...t..t...............................thpsc+lsVlG.u.G.QSAAE...l...hh...sL...hpp.......hs..t....................hpl..s..h.l.s..R..p....s.......s....a....t....s.....h......-....p.....o...........h...s...p...E.......h......F....s.........P...-......as..c...........h......F....a......s....h..sp.....p.t..R....p.p................ll.t.p....p.....+.......h.......h.s...u......l..s....c.h......l.....t...p.....I...Yc.....h.l...Yp..p....p....lh.t............................p.....p..hplh.s...s.p.p.l......pshp....tp..............s...........t.......th......p.L........t......h.p............p.....t.....................p......t.........t.........p................tp..h.csDsl.l..hATGYp..................................................................................................................................................... 0 99 242 354 +13277 PF13435 Cytochrome_C554 Cytochrome c554 and c-prime Coggill P pcc Jackhmmer:Q2YA34 Family This family is a tetra-haem cytochrome involved in the oxidation of ammonia. It is found in both phototrophic and denitrifying bacteria. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.34 0.71 -12.51 0.71 -4.09 93 858 2012-10-01 23:37:15 2010-10-13 17:17:09 1 116 351 4 437 1110 218 122.30 16 25.29 CHANGED sCh.sCHsp..tt.ppatp.....ot.Htp.hhpsh.t.....ttt..................h...t...........t.sp..spCs.uCHs.sh.thp.s....tsh.....sht......p.........h..........tsl.sChsCH...ssss.sa...spts.ss.sst............h.t..............................hsts.stsstpsCusCHt ......................................................................................t.....................................................................................ptt..ttCh.pCHs..ss.....htp....s......t.sh........st.s..............p.........tt......................................tsl.sCtsCH.......uss...u...pa........tph.s.t.t..t....................................................................................ht......ht.ptp.C.h.tCH.............................................. 0 157 293 384 +13278 PF13436 Gly-zipper_OmpA Glycine-zipper containing OmpA-like membrane domain Coggill P pcc Jackhmmer:Q2YCQ8 Family \N 28.90 28.90 28.90 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.80 0.71 -4.44 34 349 2012-10-03 03:18:43 2010-10-14 11:39:32 1 7 288 0 122 1054 136 99.20 26 65.73 CHANGED uCAshPsGPs......hshPusscs..hppFp..tDcttCRpaAtpps...ss..psp.....pss......p.sussu.uslGsu.lGA....usGA.shGs.....upG.AulGA..usGhLsGuAuGusuuphtshtsQ.tpY-suYhQCMhu+ .......................................................................................................................tsp.........psp.........................ppss..su.ussGAA.lGA....ssGu..shGu.......upG....AshGA..uhGuls.Gu.......hs.....G.......u....s.......ts.....p...p.....t.t..........t..................................................... 0 29 74 102 +13279 PF13437 HlyD_3 HlyD family secretion protein Coggill P pcc Jackhmmer:Q2Y7Y5 Family This is a family of largely bacterial haemolysin translocator HlyD proteins. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.48 0.72 -3.69 147 4655 2012-10-02 20:27:15 2010-10-14 13:36:12 1 49 2429 0 1181 25718 5084 112.10 18 29.22 CHANGED tlpAPhsGhl.p..t..h..s...h..p....G.ph...lp.s.Gps....lh..p....lhs.........s...p.....h.t.lcsh.l.sspphstl............Gpplpl.pht....s.hst....p...l...pGplppl..u..s......ss....s.......p....psp.....s..ht.....h.....ph..plsss........................thtlpsG ...........................................lpuPhsGh..l..t...p........h..p..............s...p..............G...ph........l......s.....s.......G....p..s.................l.h....p........................l.hs....................h.....s..p................l.h..lp..sh..l.....s.s.....p.........c....l....s.t.lc.....................G.p..p.........l....p....l....phs............s.htth....s......l................pG.p..lpp..I...u..s................ss........s..........................p.......ppp................h..h............s.....ph..phpt.................................................................................................................. 0 360 721 990 +13280 PF13438 DUF4113 Domain of unknown function (DUF4113) Coggill P pcc Jackhmmer:Q2Y7D7 Domain Although the function is not known this domain occurs almost invariably at the very C-terminus of the IMS family DNA-polymerase repair proteins, IMS, Pfam:PF00817. 24.00 24.00 24.30 25.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.38 0.72 -4.37 197 1492 2010-10-14 15:22:47 2010-10-14 16:22:47 1 11 959 0 244 1129 1436 51.00 46 13.70 CHANGED sppLMpslDplNp+aG+uslhhAu..p.G...h.p........p..........sWpM+RchhSPpYTTcWs-LPhl ....................sppLMpslDplNt+pG+GslaFAu..p.G...h..p..........p..........pWpMKRphLSPpYTTRas-L.hl................ 0 65 133 204 +13281 PF13439 Glyco_transf_4 Glycosyltransferase Family 4 Coggill P pcc Jackhmmer:Q2Y6X7 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.96 0.71 -4.55 52 6671 2012-10-03 16:42:30 2010-10-14 16:40:50 1 54 3442 22 2039 11319 3388 166.90 15 42.72 CHANGED lllsspphtp.huGsEhhsh-LApthtp.pGapVplh...usphsp.htpp........................hhhthshhhthtphlpph..chDll..+.p.........................hhhshHssh.....................h.......hp.h.hhhhthh....h.tsctllAlSptstcplhp.hs....lsp....pclpVl.Nul-tphFp ............................................................................................................................................h.................hu.G..s.t..p..h.hhpltp..t....L....t.p....p....G...a...p....l...h.lh.................s...t.......t..t................................................................th..........................................h.........t...........t.........h....h...........h......t.......h..........h............h........h.......h...........p.......h.......t.........p.....h.......h....c...ph..............ph.D.ll..........Hh.p..t...........h......s...h...h....h...s..h.hthh.................................hph..h.l..t..o....h.Hshh....................................................................................h...h.....t........h..h...t........h....h..h....thh.............hp...p.sc..t...l..l...s.....l.S.p...h...h..t..c.p..lhp...hs.............h...............pplp.l.l......s....Gl-.t......................................................................................................... 0 677 1353 1746 +13282 PF13440 Polysacc_synt_3 Polysaccharide biosynthesis protein Coggill P pcc Jackhmmer:Q2YCG5 Family \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.76 0.70 -4.92 52 1715 2012-10-02 21:24:20 2010-10-21 10:27:40 1 14 1385 0 471 5839 872 248.50 15 56.48 CHANGED tssshhhtlhlu+hlshpshGh.athshshhs.lhhhl.s...th.Gh..pssh.ht....t..s.tstp.p..htsh.hthsh..hs..shlluhshshh..hlsh...hh..t.s..thh.hh.hluhhhhshhhpthhpthhhsttchth.sshhshlhshhh.hhshlhh.hhhth...slhshhhs..hshuuhlshlhhh.h....htt...hc.ht.hp........shth.h...c.h.h.p.puhhhshtsh...hshhs.hphsh.hl.lsh.h.us..stlGhaps.st.hlhth.hs.lhhtsltphhhPphu.p.t ......................................................................................................................h...thhhhhhls.+.h.h...s...stt.h...Gh...hs..h...h....hs...hh...h..h.h....l...h.....th...th......p......h...ht...............tt...tstp...p..........ht.p....h....h....p....h..s.h........hh........sh.l..h....h....l...s....h....h.....h....h......h.............h....l.sh................hh..........s....s.....t.....p.....h..........h.........................h...........h..........h.....h..........h......s......h...h...h....h...h....h...s.....h.....h....s....h...h...p.....s.......h....h......p...t......t....p........+.......h...ph.....hu.......h.......p......h.....h..p.....s....hh...t.....h..h..st...l.hh..hh.hsh..................sh.h.u.......h..l...lu..........h....h.........h....u...s...l....h......u.....h.........h....h...h..h..h................hht...........tp..h.p...hp..............................ph.ph..h.........c...h...h....p...p....s....h....h....h.......h..h....tsh.................h.t.h.h....t.....tp.....h.........s......t.....h..........l....l.........s.....hh....h...us.............ss......s.G.....h...as.....h....s....h....ph....h.s.h...hs....hhsts.l.sp.hhhsph...h.............................................................................................................. 0 144 297 385 +13283 PF13441 Gly-zipper_YMGG YMGG-like Gly-zipper Coggill P pcc Jackhmmer:Q2YAQ0 Domain \N 27.60 27.60 27.60 27.60 27.50 27.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.47 0.72 -4.44 284 420 2012-10-03 03:18:43 2010-10-21 10:38:44 1 7 388 0 100 1126 106 43.70 46 28.65 CHANGED sss..stGAulG....A.usGAs....lGuhsG........tu.s..p....G...................A....h.lGAusG....ussGu.......shGs........t ................psstGuhlGAusGAlhGshsG............ss+....G........................A.AlGAGlGAlsGhh..................... 0 21 55 80 +13284 PF13442 Cytochrome_CBB3 Cytochrome C oxidase, cbb3-type, subunit III Coggill P pcc Jackhmmer:Q2Y8Z1 Domain \N 27.00 22.00 27.00 22.00 26.90 21.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.80 0.72 -3.82 488 9400 2012-10-03 10:02:11 2010-10-21 13:26:28 1 190 2200 132 3123 13490 4902 79.30 23 30.40 CHANGED sss.psGpplY.p.p....sCt.s.CH.u......sG......ss.....u............sslt.......sp...shs........s.............cplhp.......hsh..p.G.......t.....s.....u...MP....shs.t......p.ho-c-lptlssal ............................t..htpGppla..t.p........sCu..s.CH..u..............ss......ut.....G.............................................PsLt......................ss.......sht......................................................................pplhp.................hlh.......p...G........................t.............s..............s.......MP..............uas.t..........tL.s.-.pc..ltslssYl...................................................................... 0 889 1958 2631 +13285 PF13443 HTH_26 Cro/C1-type HTH DNA-binding domain Coggill P pcc Jackhmmer:Q2Y718 Domain This is a helix-turn-helix domain that probably binds to DNA. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.74 0.72 -3.77 196 4823 2012-10-04 14:01:12 2010-10-21 13:31:50 1 44 2292 4 901 17516 1639 60.70 23 55.90 CHANGED +.Lpph..htc+....ph....shpcL.....t.....c......ts......G.....lop..sslscLtp..s.p.....p.tlsh..ssLpplCphLsC.p.suD.ll....chh..s- ....................................lphh.htc..+........ph....ohpcL.....u........c.......ps.........G..........lop.......sslsp.....l.tp.........s..c................p...s.l.p......h...ssL.p..p.I......C.c.hL......s....s..p....s-.lh.....ph................................. 0 334 628 760 +13286 PF13444 Acetyltransf_5 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:Q2Y7M2 Domain This family contains proteins with N-acetyltransferase functions. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.61 0.72 -3.44 244 1571 2012-10-02 22:59:21 2010-10-21 13:46:37 1 11 1299 0 478 1970 489 102.60 30 27.02 CHANGED LRa.pVFtpEhsusspt......t.hD...hDpFD...shscHLllh....-.ppp...............lVGsYR.lhtsstst....th..ts.hYops.F.....c.h.stlts..hhs...phlElGRSsVpscYRsttshhhLWt ..........................LR.psFtpph....suphps.........t..hD..hDpaD....p.h.s.pHLllh.....Dpsp...................c...lVGsYR..lhhssphh....................th.....ss..hYops.F..c...h..st.hht.......hhs...phlElGRosVp..scYRss.t.sh.hLh........................................................................ 0 133 304 401 +13287 PF13445 zf-RING_UBOX zf-RING_LisH; RING-type zinc-finger Wood V, Coggill P pcc Pfam-B_49 (release 24.0) Domain This zinc-finger is a typical RING-type of plant ubiquitin ligases [1]. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.44 0.72 -4.24 80 584 2012-10-03 15:03:13 2010-10-22 16:12:06 1 39 268 1 422 2209 29 41.40 40 9.31 CHANGED CPls+-....ss.tp.......Ph.h...Ls.CGHlls+pslp+lsp.......tt.................p.....hKCP ...............CPls+E.....osp.pNs............Ph.h....Ls.CGHsls.+c.sLp+Lhp..............ss......................p......h+CP............................ 0 166 244 345 +13288 PF13446 RPT A repeated domain in UCH-protein Wood V, Coggill P pcc Pfam-B_2127 (release 24.0) Domain This is a repeated domain found in de-ubiquitinating proteins. It's exact function is not known although it is likely to be involved in the binding of the Ubps in the complex with Rsp5 and Rup1. 24.10 11.40 24.20 11.40 23.80 11.30 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.82 0.72 -4.56 66 317 2010-10-22 16:03:35 2010-10-22 17:03:35 1 11 124 0 248 350 8 60.80 23 12.16 CHANGED hhshppAhphLpl.ccsssD-hllosaphKls........-sP.s..phchh++ALphIAcpRpSth..LhpFLps .......................shtpAhph..L....t..l...s......p......s....s....s.D-..hllssaptphp..........ssP..s...phphhpcALphIAptRpSth..Lhphl.............. 0 59 132 216 +13289 PF13447 Multi-haem_cyto Seven times multi-haem cytochrome CxxCH Coggill P pcc Jackhmmer:Q2YA36 Domain This domain carries up to seven CxxCH repeated sequence motifs, characteristic of multi-haem cytochromes. 21.90 16.20 21.90 16.20 21.80 16.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.52 0.70 -5.33 11 1428 2012-10-01 23:37:15 2010-10-25 16:31:19 1 16 119 2 112 1635 47 181.40 54 67.74 CHANGED ppVGCIDCHu.....slst.pphpHppc.......LtMPstssCGsCHlppFAE+EuE+cs......W.........Pptp.........Ws.G+PSHulsacA.NV.EsuhaAuMspREVApGCsMCHsp.QN+CDuCHTRHpFSsAEARcPpACusCHsGlDHNEaEsYhhSKHGslapsptc.pWsapsPLK-Ah.pcGG.TAPTCssCHMEacG.-aoHN.lsRKlRWu.sP.sPtIA-sl..sp.WhEtRh-uWlsTCspCHSspFA+saL-thDpGphpGlshhpEAcplltsLYcDGLLsGQpTNRP ..............................ss....................................................................................................................................................................................................................................................................................tGCT...F.C..HT...........s.....p............p........+.........Cso.CH..p.RH...pF...s.s.t.......AR+...s...E.pCps.CH.h.G........KD........HRDWE..AYDISlHGs.......VYQlN..K........a.D...ashp....ctL.u.D.A.D...YVGPTCQYCH....M.....R.....GG.....H..............HN.VQR.huTV.................Y............TS.....M.....GM..S...........A...D..RG..................A.Pl...WpEKR..DpWsSVCDDCHSPRFAREpLQAMDEAsK.D.AG.LK.Y.pE.TFKVAEsLhhDGhh-PMPKD..Ls........................................... 0 45 90 107 +13290 PF13448 DUF4114 Domain of unknown function (DUF4114) Coggill P pcc Jackhmmer:C1ZAY0 Family This is a repeated domain that is found towards the C-terminal of many different types of bacterial proteins. There are highly conserved glutamate and aspartate residues suggesting that this domain might carry enzymic activity. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.23 0.72 -3.16 74 320 2010-10-25 15:42:29 2010-10-25 16:42:29 1 61 151 0 82 297 55 84.00 22 9.76 CHANGED phsuG...splua....hLhssuhssthht.............................h...ao.sshN.............ss....hp.....phtshttsst...h......lGaEDhh.....uD....p..DaNDllFtlphss ..................................................................................h.tG.tluhhl.hssuhttthht...................................................h.aohtshN............................ss............hp.............ph.tshtsss......hh..............luFEDhh.s..........uD.............t....DaNDllhtlph......... 0 26 59 81 +13291 PF13449 Phytase-like Esterase-like activity of phytase Coggill P pcc Jackhmmer:Q2Y944 Domain This is a repeated domain that carries several highly conserved Glu and Asp residues indicating the likelihood that the domain incorporates the enzymic activity of the PLC-like phospho-diesterase part of the proteins. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.38 0.70 -5.17 51 1288 2012-10-05 17:30:43 2010-10-25 17:12:27 1 51 942 0 459 1415 255 314.60 19 66.08 CHANGED phlGp........hslssshthsu.........sthGGlSGlsass.psstaaslSDcts....sssRaYshplsh....tst..thsslphtshssLpcss..................Gpsa...........h.DsEulth..sssslaluSEGc.....tt.h...Phl..hchs.hs.Gph.hpch.sl.Psthh.t.t.................pGhcsNhuhEuLolss-...Gp......hLasAsEss......LhpDustsp....tt.......sRllpachts.G..ts....htpahY.h-sh...........stsGlo-llAlsspt..hLslERu...au.u...................hGtsh+laplsls.s..upsssshsslt....s....lsslpKpLlh-htp.......s.h...lDNlEGhshGshh.-GppoLlllSDNNF......ss.t.QpTphlshplp ...........................................................................................................................................h.h................s...........htu.h.Sulhh............t........t......s............h....h.sl..sDps...................................t....h.hhh.hph............................t.sth.ph.h..p.h....h...lp.css...................................................................Gt.sh.......................................DsEulsh................ss......u......s......halusEhs...................Phl....hchs.....ts...Gph....h...t.......h.....s....h...Pssh..................................................hthpp..stGaE...ulshs.s..-...Gp.........hLa.shh....pss....................l.h..p.cs.t....t...................hRllpa...sht...s.....t.............................htta..h..Y.h-t......................tshsl.u-hshlsspp...hlllE..Rs....t.s......................................uhhp+.l.ac....ls...ls..t.....t........................h...........t.s........h..tt....t................t..............h.h.hptp...l..h.s.h....................................s..h......h-phEGl....sl.............................................s......s........p.......p..........lhlhs.Dssa.......................t.s.hh.............................................................................................................................................. 0 109 269 375 +13292 PF13450 NAD_binding_8 NAD(P)-binding Rossmann-like domain Coggill P pcc Jackhmmer:D2BHP8 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.46 0.72 -3.95 465 10693 2012-10-10 17:06:42 2010-10-25 17:56:16 1 305 3705 149 4167 53665 24260 63.30 26 12.65 CHANGED IVGu....Glu..GhshA.th..h..s.....c.....p.....s..h....c..l..hlh-...p.c.sc.lG..Gps.h.sh...ph.......p..u........hhh.c.hG.s+.ha.ts.......t........tt...hh.c..hh...cp.l .................llGuG...hu..G.LssA...hh...L..s..........................c.............p..........G...h.......c.....V...hl...h.E............c....p....s.......p.....h.....G.......G.p.h....h....sh....th.........................t.................h.p..hs...............................................h....................................................................... 0 1314 2568 3528 +13293 PF13451 zf-trcl Probable zinc-binding domain Coggill P pcc Jackhmmer:D2BIP7 Domain This is a probable zinc-binding domain with two CxxC sequence motifs, found in various families of bacteria. 21.50 21.50 21.50 22.20 21.40 21.10 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.73 0.72 -4.33 91 388 2010-10-25 17:00:03 2010-10-25 18:00:03 1 9 215 0 117 275 17 49.10 57 50.87 CHANGED pDKsLsCKDCGpEFVFTsGEQEFY.tE...KG.F...p.NEPsRCssCRcARKpppss ...tDKsLsCKDCGpEFVFTsGEQ..EFYtE...KG.F...cNEPsRC.sCRcARKpppp.............. 0 65 108 113 +13294 PF13452 MaoC_dehydrat_N zf-MaoC; N-terminal half of MaoC dehydratase Coggill P pcc Jackhmmer:D2BH16 Domain It is clear from the structures of bacterial members of MaoC dehydratase, Pfam:PF01575, that the full-length functional dehydratase enzyme is made up of two structures that dimerise to form a whole. Divergence of the N- and C- monomers in higher eukaryotes has led to two distinct domains, this one and MaoC_dehydratas. However, in order to function as an enzyme both are required together. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.39 0.71 -4.37 39 2537 2012-10-02 20:54:35 2010-10-25 18:06:56 1 31 1010 26 1023 4405 1494 127.80 17 29.40 CHANGED shlGpph..ss.hp..hplppuplRtFAcAh..Gp......ss..Ph.YhDptsApts.ta.sl.APPTF.hh.sls.....h.....ss.......hh.....ptl...s..ls..h....tplLHG-.Qpapa...cp...s.lhsGDplshpspls-lh-.Kps...Gs.h.calshcopssspcGchVush ..............................................................................t....................h...hsttplh.ash...ul...ss..............tt...s....hhs.t.............................s.h.h.s....P...s.o....a..sh..l.hs....................h..........ts..............................hh..........tl.........s....hs....h..................hpl..l...H..s...s.....p.phph............t+...P.....l.h...s...G.-p...l.psp.sp.l.ss.lhs.+.t.....Gp...s..sh....ls.hc.sp.h.p..p.t.p.G.p.lh..h................................................................................... 0 248 602 865 +13295 PF13453 zf-TFIIB Transcription factor zinc-finger Coggill P pcc Jackhmmer:D2BJR6 Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.48 0.72 -4.76 126 674 2012-10-03 10:42:43 2010-10-26 09:24:28 1 7 520 0 270 585 125 41.10 44 38.76 CHANGED pCP..pC.p.s.s.hp.....hhcp..............pu.lplDhCs.pCcGlWL.DpGEL-+llpp ..............CP..hC.ps.s.lh.........hs-+..............pu.lcIDh..Cs.pC+GlWL.D+GEL-+llp......... 0 84 174 240 +13296 PF13454 NAD_binding_9 FAD-NAD(P)-binding Coggill P pcc Jackhmmer:C2D2T2 Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.08 0.71 -4.45 180 1586 2012-10-10 17:06:42 2010-10-26 10:52:14 1 24 1273 0 358 3901 1444 156.50 25 30.28 CHANGED AIlGuGspGlsshtpLhpph.......t..ts....lplplh.-s..t.sh..G.G...tsaps..s..p..spthlhNssusph.ohhscps.............ssahcWhp.....spt......................ths.s.ps..assRtlaGcYLpthhpphhp.ph...st.th...plph.h.p...s.cls...s.lpt...ps......ss....h...h.l...hh....s.su..p..s...h..tsDtV.lLAsGps .............................................................AIlGuGssGlhshh..pLlpp......................tss......lsls...la.-p.....s...th....G.........G.........hsat.s....p.....p....st.h....L.h...N..h..s..u.t.p..h..s.h.h.sp..........................................................ssa..h.cW.hppppt..........................................................t..sh..p..s..p..p.......a..h.P.R.......h..l.......a..G.cY.h....c....p...h.........h..pth......hp....ph......p.hh.......p.l.t.....h.h....p......s..plh......s...l..p.......ss...................ss.........h.....hl.......th................s.ss.....t......s..............h...shDhl.lLAsGp.h...................................................................................................................... 0 84 201 290 +13297 PF13455 MUG113 Meiotically up-regulated gene 113 Wood V, Coggill P pcc Pfam-B_48720 (release 24.0) Family This is a family of fungal proteins found to be up-regulated in meiosis. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.99 0.72 -3.53 212 101 2012-10-01 19:55:08 2010-10-26 13:41:55 1 2 86 0 70 797 308 100.00 30 25.08 CHANGED hKIGhTs...ss.pcR...lpphppts......shphpllthh.ht................................sstclEphlHpphps..tRlpt..................EaFc....lsh.....pplpp................s.......lcch ..................................LKIGRus....NV.p+R.lspWpcQC........spplpll+ha.hhs.p.t.............................................................phsscs++lERLlHlELss...............hthpt.....t.Ct.tC....s+.......pH.....pEaFc.lps.........pphtt................l...l............................................................................ 0 20 39 58 +13298 PF13456 RVT_3 Reverse transcriptase-like Coggill P pcc Jackhmmer:D2BGL4 Domain This domain is found in plants and appears to be part of a retrotransposon. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.62 0.72 -4.20 89 3907 2012-10-03 01:22:09 2010-10-26 15:56:17 1 208 854 5 1534 3767 350 81.20 25 12.10 CHANGED s.h..........AEhhAl..ltGLphAhphGhp.+lhlpuDuphllptlpsp...hpsps.phstllpclcplhpp.Fpphplp+lsRcsNpsAcsLAp..hutpt ......................................s.p.AEhtul...lhuL.p....h..A.h......p...h........s...h.......p....p........l..h.l...hs.D.SplVl...p..pl...p.tp................hpsp....s...p...ht........h..h..pp....l..c..p..hhp....p....F..p....t....h......p..l....p....al.............R....p....p....N..ph....ActLAp.u...t..................................... 0 197 508 925 +13299 PF13457 SH3_8 SH3-like domain Coggill P pcc Jackhmmer:C2D335 Domain \N 21.00 5.00 21.00 5.00 20.90 4.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.66 0.72 -3.82 69 1627 2012-10-02 18:48:24 2010-10-26 17:18:41 1 78 162 3 111 1462 0 74.30 26 36.50 CHANGED ss.cslshpuplp..pspsculas.pshthsuspplss...ups....Ysspp...lplh.ccApTs+........u...o......ahph...........ph....supslGWlDpcAhs ...................................................pcslshhshVp....s..ssssu..las.t..Phtss.usp.p.......sst...........lss........Yps+s...lpl...+cApsst..................................s..s........WYpl..........ph.....sspsIGWlcscsh........................... 3 37 65 98 +13300 PF13458 Peripla_BP_6 Periplasmic binding protein Bateman A agb Jackhmmer:A3PL33 Family This family includes a diverse range of periplasmic binding proteins. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.94 0.70 -5.09 259 10778 2012-10-02 13:57:41 2010-10-27 11:15:08 1 73 2751 52 3781 13919 6511 334.70 19 81.74 CHANGED sl+lGllhsh.o...Gsh...u...ss..upshhsusphulcphN.t.s.G...............Glt....G..c.....p..lchl.ht.D.sts.ss.stssptucchlp..p...cp..Vts.lh.sshsossstul.ts..hh.pp...ps.h..........hl....s.......ss..........t.....hs........s....t....p.s......ss.t...ha..hh.s....h......ss.t.tp..st......shspa............h......h......p......p.h........G........sp......c.hh.hl.s..ss..hshu.psh.t.p...sh..pph.hp....tt.Gsp..ll.............sp....hhhs...h..ss....s-....h..ss....hltp.lps..u....tsD..................s....lhsss.susssss.hhcth..p.p....t.G.....ls........t..h.hh....u.hs..ht.psp..l..tsh.......us....ts...s.p.......G.....h.....hhs.ss....ah......s......h.....s..s....st..........sptahppa..pp........t....h.............s.....p..t............s.s.t.htt...s...uYhus....thh....h.pAl.............cp....A.........G........s...s..s.....sps..........l....h................ps.l.c.s..hs...h.....s..........s.......s.h...........u.t..hth.........h...s.s.....s.pp..s....h........p.s...hh...lsp.lp......ts..u...p ..............................................................................................................l+lGhhhs.h..o........G.sh.......A....th....G.p.t....h.pGh.p.h...A..l.cc.lNt..t..G.........................slt..........G...c.........p....l.pll...hh..D.....s................t......s.....c............s....p.p........u......s....s...s....s....p.c.....l...l..s...........................cp....lt...h..l.l.G....s....h..s.S.u..s...s.h.s.s..ss.....l.h...pc.......t.t...l.......................shls...........su..................................s......ss.............s......p.........ps....t..t...h.....ha....+h..s........s....................ss....s...tp....ut................s.h.up...a.......................................h.............h..........c........p..h........s.............sc..........+.l.u..ll..t....ss.....ss...a.....G...pu.h...t..p..................sh...pps..h.c........t...t...G.hp........ll............................t.p......t.th..s...........h....ss........pD..........a.....s.u...........h..l...s....p.....l..p..s.......t.................s..s.D...................................................................s.........l.h.h...s.s.......h...t...s...p...s..s........hl.+....p.h....p.p.....h....G............hp...............s...........h..hh........uss......hs...s..s...p....h...ts.h..............................us........ts......s..p.........................G..........h...............hh.s..ts........hh........s...............p...s........st..............................................sp..t..a....h..p..t...a....ct.....................p....h.............................................................s....p...........................s...s..h....h..sh............h....uYsus.......hh.l....h.pA.l......................................pp...s..........................u........s....h....s............stt..........................l.h....................................................p.s...l.p.s.....hp....h.....p............s........s.h.............G.......h.t..h............t....t.p......s..t......h......................h.h..h.h.p.h.......t............................................................................................................................................................................. 0 935 2299 3103 +13301 PF13459 Fer4_15 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:B0SHL0 Domain \N 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.71 0.72 -3.41 122 1717 2012-10-03 08:56:43 2010-10-27 11:35:52 1 33 874 0 647 2723 504 59.90 29 46.40 CHANGED lhVD.c..spClGstt...Csths.....P.csFph-.s.c.u.hupsh...............h.................hssc.ppphp.cA.hcsCPsssIpl ........lhlD.p..spCh.Gsuh......Chths........P....-.l...Fp.hc.-.-..G..hstlh...........................................h.ss.s.tp.ppsp..cA..scsCPspAIp.............................................................. 1 174 451 575 +13302 PF13460 NAD_binding_10 NADH(P)-binding Coggill P pcc Jackhmmer:C2D4U5 Domain \N 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.94 0.71 -4.28 74 14495 2012-10-10 17:06:42 2010-10-27 11:37:19 1 78 3768 30 5105 51820 16534 186.90 17 65.92 CHANGED IsllGAsGpsGptlspchhpcG....hpVpuhs...Rs.ss....ths..........thphlphDl..hsh...tthspslp...G..hDsllssh......ush...tt...........c........tp..s..sp....pllcshp.t...s.s..sp.+.ll...l..l.uusu...hhp.....stpth..............h............h.h....ttt..t.ts.....-.....chl....p..s.s..sl.sWThlpPut.hhps.......t...sp....phph.....hts...........ss.....s.t.....st.....shlshpDlApshl-tlps ......................................................................................................................lhlhGA..o...G....h...l..G...p......t....l...s......p.......p......L.........h......p........p......G..............t...p..V.....t......u.....h....s.......................R.....p.....s.p...............p...htt............................ts...h....p......h........h.........t........s........D..........l......p.s......................t.s....l.......t.......p.......s.......l.......p...........s...............h.......D.............s........l.......l...h..sh....................ss.h..tt........................................................-.................................hp.s....sp..........p..l.....h....p...u.......h....p....t...............s...s........lp.......+......h..l.............h...........l...Su...h.u................s...t..................p.t...s.........................................................................h..h........ps.c.......t....ts..........-............ph.l............................p....p....s.......sh.....s....h......s....l....l...R.....s..u......h....l..hss..................t.....tt.............hth......hht..............................................................h...........t.l...t.h...t...D...hAthhht.h..t................................................................................................................................................................................................... 1 1558 3201 4341 +13303 PF13461 C-term_anchor Cell-wall surface anchor repeat Coggill P pcc Jackhmmer:C2CZF8 Repeat \N 27.00 27.00 27.30 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.04 0.72 -3.50 178 2550 2012-10-02 15:23:12 2010-10-27 14:32:45 1 182 317 1 148 2314 6 59.40 35 18.55 CHANGED YlDp.sGpp.ls...ssps..loGp..lGcsY....so.....psc....sIsG.....Y..plsps.....sssts.GsFs.pss.psVsYV..Y ........................YhDp.sG..pp.ls....ssps....loGp........lG-sY....so..................psc.......sIsG.................Y...sLsps................ssNss..GsFs..sss..poVsYVY................ 0 51 84 132 +13304 PF13462 Thioredoxin_4 Thioredoxin Coggill P pcc Jackhmmer:B0SES9 Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.08 0.71 -4.23 52 3857 2012-10-03 14:45:55 2010-10-27 15:14:37 1 30 2252 39 1197 5536 2874 166.60 17 66.73 CHANGED ss....s..shh.lGstcAslsllEYsshsCPaCsp.app.p.s.h..p.lhpc.YlcsG..Klpalh+ph....hs.c.s....lhAuhh...ucs.stp...s...phF.h.hhpphhhpp....ppt....hsps.........pt........hs........tp............sssh.....t.cp.h.t.sh.pss.p.hpshl.t.t.pp.p.u.cps.slp.sTPohhl..sGch..h......s.shsh-chpthI-p .......................................................................................................................................ts........G......s.u.....l.sllt.ahDapCPa.C....t....p....hp............t..p....l.....t.....p....l..h....c...p......h..........s..s........c.l...p..h..h..h+..ph................sh......sps..............uht.A.u..ts..............u.ts...s..tpp...........s.........pha....h.hh..c...p...h...l....h..p.p............p.pt.....hspt................pp............hs...........tp..............................................................tshs......h.pp..hpp...sh......ps....t....p....h......p..p.t..l....p......p......s...........t.p...........h........u.......p..........p...........h.....s....l..........p......u..........TP.o..h..h..l.........sGph.....h.........t..t........s.....p.th.thl........................................................................................................................ 0 389 766 1022 +13305 PF13463 HTH_27 Winged helix DNA-binding domain Coggill P pcc Jackhmmer:D2BJI8 Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.92 0.72 -3.65 61 2080 2012-10-04 14:01:12 2010-10-27 15:25:49 1 20 1390 11 608 21196 2353 65.80 23 40.16 CHANGED lot.phhl.Lppls........ttpptph.sclsthhshc.psshs......tslccLhct..Ghl..t..tp..spt.....thhplTscG ..................................................phhl..L.h.tls...............tppthoh..sc....ls.p.t.h....s.lp..pssls......pslcpLhcp........Ghl.....ppp.ts.....p.D+Rp...........thlpLTspG........................ 0 154 358 487 +13306 PF13464 DUF4115 Domain of unknown function (DUF4115) Bateman A agb Jackhmmer:A3PK73 Domain This short domain is often found at the C-terminus of proteins containing a helix-turn-helix domain. The function of this domain is unknown. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.34 0.72 -4.33 73 1846 2010-10-27 16:37:37 2010-10-27 17:37:37 1 11 1803 0 409 1357 405 75.40 29 23.46 CHANGED ltlhss.tsuWlpVpsADG...pllhpslh...psG-shsl....ssptPhplplGsuu.ulhhslsGpthss.ushup.spsspLohs ...................lshshs..uc.sWlp.Vp....D..u.s.G...+pLh....s..G.hh...+tG..p..s.hsl....supsPhc.l.plGssu.uV.plph.sG.cslcl...uthhp...sp.sschsh.t.................... 0 117 257 347 +13307 PF13465 zf-H2C2_2 Zinc-finger double domain Coggill P pcc Jackhmmer:D2BGQ7 Domain \N 22.50 20.00 22.50 20.00 22.40 19.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.11 0.73 -7.56 0.73 -3.41 163 227898 2012-10-03 11:22:52 2010-10-27 17:41:20 1 6697 1851 312 138033 194266 376 25.80 50 27.05 CHANGED pLppH...h...p.p.Hp.......sp.c.....s....a.......pCt..h..Cs..tpapp ......................LtpH.......p.............R..l.HT........................GE.K.....................P.....Y...........................cCp.......p..CG...KuFs................ 0 21562 34039 60664 +13308 PF13466 STAS_2 STAS domain Bateman A agb Jackhmmer:A3PG87 Domain The STAS (after Sulphate Transporter and AntiSigma factor antagonist) domain is found in the C-terminal region of Sulphate transporters and bacterial antisigma factor antagonists. It has been suggested that this domain may have a general NTP binding function [1]. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.33 0.72 -3.95 89 3458 2012-10-02 18:52:36 2010-10-28 17:48:09 1 38 1847 0 1117 4550 627 79.20 22 51.70 CHANGED LpLsGplsh.psss.s.Lhptltt..hl........ts..s.....pl.plDhutlpphDsuulslLhphtptstt.ps.pplpl.ps.sss.t.lt.plhplhGls ......................................lsGclDh...ssss...s..lh.pthtp.....hh...............pt..s.....tl.sl..DLuplshlDouGlulLlph.hcp......s...p..p......pG....t.plpl..t.s..s.ss..p.lt..plhplhsl............................. 0 371 742 971 +13309 PF13467 RHH_4 Ribbon-helix-helix domain Bateman A agb Jackhmmer:A3PHV5 Domain This short bacterial protein contains a ribbon-helix-helix domain that is likely to be DNA-binding. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.08 0.72 -4.09 57 610 2012-10-02 18:44:02 2010-10-29 13:14:09 1 3 438 4 200 495 214 68.70 37 66.55 CHANGED t.hpR.......SlplsGHpTSlpLEstFWshLc-lAppcuholspLlucl.....Dtpp.......................cs.NLuSslRlhsLpalp ..............t..hpRSlplcGtsTSlpLEshFWphLc-IAspcshol.spLlucl.........Dtp+.......t..............ch..NhuShLRltsLpal.t....................... 0 50 109 147 +13310 PF13468 Glyoxalase_3 Glyoxalase-like domain Bateman A agb Jackhmmer:A3PMH5 Domain This domain is related to the Glyoxalase domain Pfam:PF00903. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.42 0.71 -4.36 115 1009 2012-10-02 15:00:03 2010-10-29 13:27:31 1 8 863 2 301 1880 849 182.90 23 72.75 CHANGED lDHlllssp..sLspussth.p..pLGhshs..........sGGpHs.th..........GTpNtLlhh..u..s.....s.YlEll.u...l.c....Ppts.......................ss....t....ts..p.h.athc......................php.....p......s..sultshsh........cs...sDl..sssttpht..........p.t.G...........p.h..phst............sp.....lpWchs..hhts..sthsh...........tshhPhh....IpWtss....c.s.............p.......ts.ss.shslppltl...sss..cs...stht.thh......stlh ...........................................................hDHllhhsp.....sL-pshph...........h.p..thGh...p..ht......................s..G..GtH.s..th...................GT.tN..p..L.h....h...h....s....p................s..Y..lEll.s....l..c...sspp..........................s.....t.ts..p.hhshp........................hhp...t......stGhtshsl..............cs.....sDl..pshppchp...........p.pult.....................st.h..ph-t...............tp......lcWphh...hhts..tt..t........................phthPFh...I.pWpps....ct..................tp.......a...ss.shslpslhht.sp..p....ppshphh....h.......................................................................................................... 0 84 176 244 +13311 PF13469 Sulfotransfer_3 Sulfotransferase family Bateman A agb Jackhmmer:A3PS36 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.13 0.71 -12.53 0.71 -3.29 403 1788 2012-10-05 12:31:09 2010-10-29 13:28:25 1 182 817 11 719 2609 1547 224.00 14 47.11 CHANGED phlhl..hGhs.Ro.G.....................oohlp.tlh.........................................................................................................................................................................t.tstthhhttt...........thhthhttthh.tthttht.....thhhhh.h.ttthh.............................................h..h.thhhh...thh...............................hh..........csshph...................hh.....lhhh..Rc...Ph.th..hhs....h...htt...................hhtht.....h........................................................................htthhhthtt............................................hp..ph.tt..pthh.ttlht.h.t.....plspt .........................................................................................................................................lFl..hGhs.RS.G....................TThlp.tlL.............................................................s...t...............................................................................................tts.thhht.t.....................................thhtt..hhthhh......tthtt.ht........th.h..hh.htth.tt.h..................................................................................................................hh...h.thht..................................................................................t.hh....h...KsPtphh.h........................................................lhthaPsA+hlhhh.Rc.....Ph.ss......htS...t.h............htt...............................................................tht....th.....................................................................h.thhhth.........................................................cphhtcP.t.h.pplht.h.......t.......................................................................................................................................................................................... 0 256 451 586 +13312 PF13470 PIN_3 PIN domain Bateman A agb Jackhmmer:A3PJ99 Domain Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases). 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.69 0.71 -3.43 201 1089 2012-10-03 20:43:45 2010-10-29 14:04:15 1 5 687 0 443 1286 243 110.50 21 70.24 CHANGED plV....lDTNVll.....sull.......................................sthllphh.....hpsthpsh...hotpllpEhtpsl.......hc...t....hthttt.....t.h..ht...............................................................thhpsh.hhsh.....t....D...scDp+hlssAls.........u....p...AshlVTtsh+ .................................lllDTNVll..........ssll..t.............................................sphlhphh.....t..psthphhhosphlpEhtpsl.......t+.h.t....h.thpthtt....hhhhhh................................................................................thspsh.....ht.......hp.......D....cDphhlssAls.........u......p......AchlVTtst.................................................... 0 161 315 393 +13313 PF13471 Transglut_core3 Transglutaminase-like superfamily Bateman A agb Jackhmemr:A3PJ09 Domain This family includes uncharacterised proteins that are related to the transglutaminase like domain Pfam:PF01841. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.75 0.71 -4.30 84 283 2012-10-10 12:56:15 2010-10-29 16:51:48 1 7 246 0 123 398 53 113.40 20 54.94 CHANGED hlhhlshphhhtthshtth...httts.tst.sssstpph......pplspslptsup....hhPhps....tCL.pulusthhLctp.GhssslhhGVppp..........s..htAHAWlp.sssth.....ls.st...tshppasslhs ....................................................................................h.......thhh.hhs.h.h....h.t...ths...tt..t.sptpph......pphspslptsup.............hsshcs....sCL.pulusthhLp.pp.s.h.s.sslhlGltpp.......................shtAHAWlc..sssth....ls..st....tthptas.l..t....................... 0 38 92 106 +13314 PF13472 Lipase_GDSL_2 GDSL-like Lipase/Acylhydrolase family Bateman A agb Jackhmmer:A0LCN0 Domain This family of presumed lipases and related enzymes are similar to Pfam:PF00657. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.24 0.71 -4.14 323 11620 2012-10-02 11:02:24 2010-11-01 11:55:58 1 260 4043 70 3589 11502 2666 188.90 16 56.62 CHANGED lhlGDS..hs...tG......h...s..........s.s.....................t..t........t........sa...s..th..l...tp........................p.........t....t........s...h.p.h.h.shuh..sG...s.s...st.....p...........................h...........h.t.......p.htp............................................th...............ttps..c..ll.hl.t..hG.s.NDhtt...................................................................t.....ht....hpp...htpt.lpp...l...lpp.h...........p.s.ps......p........l.l.l..l....s.h...s..shh..tts...................................................ttthp....ph.s.....p...t.l.......p..........p..h...u...p..p.t......s..sh.hl...c.htss.....ht.....s........t.........t.......th......t..p...h.ht...................................sDs....l...HPss.tGhphh ...........................................................................................................................................................hlGDShs.....t.G.....h...t...............................................................................................tt......t.......sa....s....th..l....tp............................................................t.............t...t..s...h.p...l...h...N...hu....h.......s.G...t...s...st....p.................................................h............h.t.......p.hpt...........................................................................................t.h.....................ttps...c........hl..ll.t....hG.s.NDhht.................................................................................................................................................................................................................s........hs...h.p.p....h.tps.lp....p...l......lpp.l..........................................p..sps..........p...........l.l.l..h......s..h....h....s.ht..tht........................................................................................................................................pphhp......ph..s............p....h...h............p.....................p....h......u......p...p.h.............s.....s....hl......s..hhth.............ht......s.............t..............................t........................t.......hht................................................................................................sDs......l....H.st.tGhth......................................................................................................................................................................... 0 1283 2468 3145 +13315 PF13473 Cupredoxin_1 Cupredoxin-like domain Bateman A agb Jackhmmer:B3CV67 Domain The cupredoxin-like fold consists of a beta-sandwich with 7 strands in 2 beta-sheets, which is arranged in a Greek-key beta-barrel. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.17 0.72 -4.14 62 2319 2012-10-02 17:41:00 2010-11-01 15:20:39 1 22 1660 14 491 3072 708 105.60 23 33.67 CHANGED thhhlhh..hh.hhsp.....sst..........s..ssptp.....hplplp.ss.........tapPsplplsuGp.hp.lp.lcNpsss..........s..tEac..c.....Lp.hc.cslssGpsspltlssLcsGcYpFhss...hp...p..+GpllV .............................................................hth........................t.....................t..sshpp......lpls.l..p...st................................tapPt..pls.lpsGp.sp.lh.h.p.N..p.ss.p..................................s...p-h..h.c................................ht..hc....c....s......l..s...s.G......t.p......p..s..l.p.h.s.s..ps.Gc.Y....phsCs.......h.....ph....t...+GplhV............................................................... 0 136 300 392 +13316 PF13474 SnoaL_3 SnoaL-like domain Coggill P pcc Jackhmmer:D2BK05 Domain This family contains a large number of proteins that share the SnoaL fold. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.52 0.71 -4.07 92 1249 2012-10-03 02:27:24 2010-11-01 16:36:19 1 78 757 19 493 3716 1050 118.60 16 47.55 CHANGED lpphhspa.tpu.hspsDh-thhshhs.....-csshlG..ssssch..hh....stpphpphhcthh....s.p.s...p..shphph..t..p.hpl....p...t.s.s....ssuhhst..hhph......h......p.h.....t.h...Rs.osl..h.c+pss..sW+lsHhHhS..h..shs ........................................................thhpta.hpA.hpp...tD...hcs.h.h.s.has.................sc....s.......s....h.......h....t.......s.s....s.......th.......hp.........Gh.p....t.h....p...p....h.h.p...t..h..h................s..t..h.....p......sh.p....h.p.h...p......p...hpl.............p......h..s..s....shu...h..st.t....t.h..ph.............p..h.....t...spth............tthh....+s....T...h..l.......h.p+..p......s.s......t.......Wc..lsphHhS......t.................................................................... 1 171 331 420 +13317 PF13475 DUF4116 Domain of unknown function (DUF4116) Coggill P pcc Jackhmmer:D2BI90 Domain \N 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.51 0.72 -4.47 608 1708 2010-11-01 16:51:33 2010-11-01 16:51:33 1 50 79 0 1429 1843 1299 48.20 25 44.03 CHANGED D+-l...lhps.....lc..ps....s.h.............slpa...s...s...pp.....L....+s........D+......-l...lhp...A..lc.......p..ss....p..............s...l....pass...p................p.l.c ...................................................s+-llhpA..lp...ps....u.h.....................slpa....s...s....cp.......l.+s.........................D+......El.....lhp....Alc............p.ss....h....................s...l.....pahs.t.................................................................................. 0 1384 1406 1429 +13318 PF13476 AAA_23 AAA domain Bateman A agb Jackhmmer:A0LBT8 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.82 0.71 -3.99 469 5945 2012-10-05 12:31:09 2010-11-01 17:09:13 1 57 3641 10 1564 18355 6884 299.00 13 41.98 CHANGED plplps..a.tsa...............p.s.....t...p....l....cF................sp......s.............l..s........lIhG.t.......N.........GuGKoTll.-Al....p..h.s.L.....h.u.....ph......tph...ppt.................................pt...t......tht........ptph.p...........ht.t..........................................ptp.t...tsh.....hclp.............................hpt...tpt...................................................................................................lp......ptp..ht..p...p....pt.pth...............p............tt............t..tp....hh..ph...............pp.h.p.....p..h...lpp................ht..tp...tt.....................................................th...p.....h.....h.h...h..hspp.p.........................................t...........................pp......p....................................................................ph.p.t..h.......p.pt...h.......pp..h..p.p..hpp..........h.......hpp.........................................................................h...........................pt..pp....hpp.......................................php...pl....ppplp...ph.......p..clp ............................................lplps..h.tsh...............p.p......h.....p....l.....-F.......................ss....s.....................l.s...............l.lsG.t...........s...............Gu..GKo........oll...-.Al........p........h...s.......L......h...s...ps.........th..ppt.............................................tt.................ttph.......h.........................................................................................t.p.t....tt.......hpht...................................hp......ttt..................................................................................................................................................................................................................................................h......t.....hh............t....t..t.......................................................t.........................h...th...........................tt..t.....p.h.......htp......................hht...t...t...............................................................................h...t............h..h.ptp..........................................................................................................p...............................................................................................................................................................................................................p..t...h.......t..t......h......tt...............................ht...................................................................................................................................................................................................................................................................................tttt................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 583 1015 1360 +13319 PF13477 Glyco_trans_4_2 Glycosyl transferase 4-like Coggill P pcc Jackhmmer:D2BIQ3 Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.05 0.71 -4.34 45 1823 2012-10-03 16:42:30 2010-11-01 17:19:16 1 17 1376 0 466 6381 2986 141.00 15 38.32 CHANGED Klhhlusss...p...h...a...s...h...+...a....spt...Ltpp....Gh-lclho....css....pphth..ttth.phhpl......p.s......hps....hph..l..p.h....h.pl..p+llcchp.PDllHsHhs..psh.ulhutlh....h.htt.h..shl..losaG.s...Dl...hpt..spps.........hh..ch......lh+.h.shp..pustlh.ssu .....................................................................................................................................h.........h........h........t...h................hpt.......hh.pt......sh.c..lhl.ls..............psth.......t.............p..hh............t..s...h....p..h..htl..............................s.ht.......................ppt..........hp.......l.....p...h.........................h...pl.....h+....l...l...+.....c....h......p.....D.l.la...s...a......s..h......p.s........s....l.....h....u...t....l.u...s.......p...h....s....t......h.......th.l........h...s..h..p....G...h.........sh........hpp...s.ht................hh.ph.............lh+....h.....ht.....sp.hh..................................................................... 0 196 332 414 +13320 PF13478 XdhC_C XdhC Rossmann domain Bateman A agb Jackhmmer:A0L5F2 Domain This entry is the rossmann domain found in the Xanthine dehydrogenase accessory protein. 27.00 27.00 27.00 27.40 26.90 26.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -3.86 296 2511 2012-10-10 17:06:42 2010-11-01 17:23:08 1 15 1622 6 738 2055 1067 138.90 30 40.65 CHANGED LllhGAGclupsLsplAttl.G.acVsllDsR...................p.....p.....t.....a.......s...........ts.....sp........lh........sh...s........................t..p..th.......ttl.......p..hs..s.ps...h..l..l..h..THsa.p.hDh.tsLpthL.p.p.........s.st.YlGhlGS+p+ttphhppLt..p..Ghs......tpplsc.lpuPlG.Ls.IGucoPpEIAlSIlAEl .....................................lllhGAGclupslsphushL.s.acltlhDsR...................t.....p.......hF............P............s..st........lt......s.h.s..................................s.th.......tth.......p.lsspo..h..l..l..l.THshp..hDh..sLptsl.cp.........s.st.YlGhlGS+++ptphhccLp.tc...Ghs.........cpclsR.l+uPlG....ls.lGu....coPpEIAlSlhAEl.......................... 0 233 476 620 +13321 PF13479 AAA_24 AAA domain Bateman A agb Jackhmmer:A0L701 Domain This AAA domain is found in a wide variety of presumed phage proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.46 0.70 -4.75 49 566 2012-10-05 12:31:09 2010-11-01 17:32:31 1 4 488 0 83 510 154 223.00 18 78.95 CHANGED spsh+hllYGtsGhGKTohstsh....scsLhlDh-sG...htsh.....tscslplp..............sap-htc..............................h...htpp...ls..pYcslVlDolophpchhhthhttp...thscp.sts....hpsYGhhs.pthhphls...thhp..hstsllhsAatspcp.....phsthshh..phts....pstsplhshsDhV..s.h.....t..pt..........Rhh.hpsssthhuKs............................................pLss .................p...hphhlYGpsGsGKT....o....hs..pph.................spsl.....h.....l.D.h..-.tu..t....h.p.sht.........ssss.l.p..lp...............shpphtp.........................................hltth.t.pp......tp..pacslVIDols..ph...p..c.h...h..htthh........tt.cp.sp..........hpsauhhs...pph.hphlp.......plhp....hshpllhs.....u.+psppps.........tss.t..hspht.phps.......phts....tlhs.sDll......sphth......pttst..t....................thh.h.p....p..h...sc.p.................................................htp.hhh................................................................................ 0 35 62 70 +13322 PF13480 Acetyltransf_6 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:D2BI49 Domain This family contains proteins with N-acetyltransferase functions. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.60 0.71 -4.23 211 1584 2012-10-02 22:59:21 2010-11-02 10:42:47 1 24 914 0 667 4625 1145 142.00 14 36.55 CHANGED pphR...p....pl+ppt...++h.p....c.h....u...th.ph...ph......s...p....ss....p.s.......hp.t.......h....hphh.......h...p....p.....hptp...........sh...hs.p.s...hht.p...ahcplhp...phtt.......ts..t...h.p....lhhlp....h....s....s..c....h...lAsh..hshhpss..phh..h..hhsua..c.p.....h..s...chuPGhlLhhphlcpshp....p....G..hp.t...hDh....st........G.......s....p...........p....YKp ................................................................................................t....tph+pth+ch.t........c..t...u......h..ph....ph............t....p....s.s....p.p................hp...t.......h....hphh.......t....p....t......hptp...............sh.h............hst.p...ahpphhp...th..................ts...p....h..c................l...h.h.lp..............h.......s........s.....c..........h....l.A.s.h.......l.s....h.....h.....p......s............s.........p.......hh.........h.......h..h....s.....u....h......ctp.........h..p.....phus...s..t.lL....hh....ph...l....cts.hp.....p.....G..hp..h.....h-h....sh............s.......t................................................................................. 0 248 474 580 +13323 PF13481 AAA_25 AAA domain Bateman A agb Jackhmmer:A0L707 Domain This AAA domain is found in a wide variety of presumed DNA repair proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.11 0.71 -4.90 101 6612 2012-10-05 12:31:09 2010-11-02 11:53:00 1 60 4750 17 1429 15488 5931 168.50 30 36.74 CHANGED ttp..th...phhs.......hs.h.tt....hss.t..ss....hchllp....s.....hls...tus...hshlsGtsssGKohlshsluhslu..........................pGt.hht.......h.s...pt.spVlalssEss...tt.plp...cRlpt.ht..........tp.h.....................st.....sph.hhhp..................t...................t......h...tt..hppltphlpp..ts....ssllllDsl..sp...hh..ss.....sp..ss.s.ss....h....tthlptlppl.tp.ph.s..ss.llllcHssKs...ss ......................................................................h.t.................c...h.o.sh..tE.......hsR.VLG.....G............GlV...sGo......ll.Ll....u.....GsPGhG......KSTL.L.L.p.hs.s.p.lu................................p..............................................t..tp.lL.Y.....l....o.....G..EES........hp...Ql.p...........h.R..A..p...R..ls...............hst........................................................................spl...hl..h..s................................................................................................................................................cs.s...l.c.p..l.t..t..t..l..c.p....tp............Pc.ll.l.ID..SI.....Qs....h.h.....ts.................sh...su...s...ss.....................h......Rcsss....t.Lh...c.h...AK.......pp...s..........hs..lhlVG.HlTK-G.t................................................................................................................................ 0 493 948 1211 +13324 PF13482 RNase_H_2 RNase_H superfamily Coggill P pcc Jackhmmer:D2BIZ8 Domain \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.09 0.71 -4.43 108 1043 2012-10-03 01:22:09 2010-11-02 12:04:53 1 24 932 19 331 2850 1595 173.30 19 32.29 CHANGED lFaDIETs.......Gl..tstt.sh.....l.aLlGlhhh.....ps..sph.t.....hhp..hhhc.p..s.t-phhh......p...........ltph...sh.....lssaNGpsFD.hshlc..phh...........p.hp.hs..............th......t......al.....DL.h....p...hh.....pp..hsh.........pu....uLKslEch.lGh.p+..c..ss..lsGtpulhh...a......p......p......ah...css-......ps.........h..LcpllpYNcpDshsLhpLhshls ..............................................................................................................hhDlEss................s....h.............t.sp............h..hlh.......G.....h..h....h.........st....pph...........................hht....h.h.h..p....s........s.....tcpthh..........................p.h.....................thh.p.pt......s.h..........................h.hs.a..shp........s........a.-.hshl..pphh......................tph.t..hst...............................................sh......t........hl....D..L...h....p.....hl............cc..phh....................pu.huLKs....l........pph....h..Gh.......ph.....c..ps...h.....s....Gt..p..u..h..ph......a.......................p..............p.............ah......p.p.ts.............tp.......................h..h.p....plh.pYNcpD.lhuhhtlhphh.h.................................................................................................. 0 128 259 310 +13325 PF13483 Lactamase_B_3 Beta-lactamase superfamily domain Coggill P pcc Jackhmmer:D2BIZ6 Domain This family is part of the beta-lactamase superfamily and is related to Pfam:PF00753. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -11.10 0.71 -4.53 85 2488 2012-10-02 15:46:01 2010-11-02 12:47:16 1 7 1789 3 832 7626 2715 181.30 21 68.69 CHANGED plpalGHusaLlc.s.s.....G.hpllsDPa..............p.......t...hGh..h.st........................hpsDlVhhSp.sh.D.Hsss.....ssl.s...............................tspllts......................s.........us..........hp.l..s..sl.tlpsl.sssp................cph...s.Gh...ph..........s.s...Nsha.la..p...s.....GlslsHLGchsp..lstpph..ttlu.clDVlhlPVsG.......s.hshstcpuhclscpLpPplllPhH .......................................................................................ploahGHushhl..cs..s.............s...pp..lllD.Pa........................................................t..........t.sh...t..t.....................................................spsD.h..ll..l.oH...sHs....D.H.h..ss..................p..h..l..tth............................................................................................................ttps.t.h..l..ss.............................................................................G.........sp............................hp...h.......s.....th...p...l.p....h.s...tuhH...................................................................ss.........p........................................s........s..s....G....h.....l.l.....ph.......t.................sh.p.l...a....H.....s.....G.....D.s...........s.............h.............................t.........h...........t.p...h.......t.....t........lD..l..hh.....l.......Plus.............................sh.sh.s..p.p.A....s..h.h.p..h..l.p..s..p.h.slPhH...................................................................................................................................................................................... 0 285 572 722 +13326 PF13484 Fer4_16 4Fe-4S double cluster binding domain Coggill P pcc Jackhmmer:D2BJB3 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.49 0.72 -3.23 562 3179 2012-10-03 08:56:43 2010-11-02 13:29:30 1 73 2633 0 658 3760 1232 65.10 42 16.23 CHANGED hC.ss.Cst.ChcsCPs.....sA......ls....ts............p..h.c.sp.p....................shsh..................hht..h....c.s..hh...st...hc.sh...h.............hh......huCs...h.....Ct..tlCPaN .............C.Gs.Cst.Chct.CPT.........sA..........ls....ts............hp...l...D..u.p.+....................ClS.a.........................sh..ph....c..G...hh....s-.p...hR.sh..hu....................scl........YGCD...s.....CQhlCPaN................................................................... 0 209 436 555 +13327 PF13485 Peptidase_MA_2 Peptidase MA superfamily Coggill P pcc Jackhmmer:D2BH64 Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.89 0.71 -4.10 226 1955 2012-10-03 04:41:15 2010-11-02 13:30:43 1 88 1162 0 810 8921 2335 137.20 14 25.04 CHANGED sssu....ht.....h...stt..thlh..............h..........t...h.....t...s............tt..ts.hp.......tlltHEhsHhhhtp.h...........s..........s.....................s.......thP..hW......hsEGlApahs.......................tp..........................h........p.s........t.......ht.tthtt...tht..ps....ph....hs..................hp...p......l.......pt..sh...................pttpss......h...sY...t..puhhh...spalt.....p....p.h....G...p.p......pl.tp.............h.lp.....ph .............................................................ht..............................................................................t....t.hh.......tl...lsHEl.sH..h...h..ht..th.......................h.s.......................................t.....pts.....hW........hsEG.hAp..ahs......................................tp....................................h...pt.............ht..tth...tp..........hht.......pt......ph.....s...........................................ht...p..........h............ts..........................ttt.htt........................h....sY.........t...tuh.h.h.......h.thlt.......p.....t..h.....G..........t......hh.thh...h................................................................................................................................. 1 380 616 731 +13328 PF13486 Dehalogenase Reductive dehalogenase subunit Coggill P pcc Jackhmmer:D2BJ91 Domain This family is most frequently associated with a Fer4 iron-sulfur cluster towards the C-terminal region. 24.30 24.30 24.70 24.80 23.80 24.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.07 0.70 -4.97 89 620 2010-11-02 15:39:17 2010-11-02 15:39:17 1 12 161 0 63 596 74 241.20 24 54.61 CHANGED ush++PWWVKER-htcPTsElDWslhp+.hD....hppt.th.......................t.t.h.t.h.s..t............t.....htpttp...thtpthppphPGhshps.......................hALt....tuht....s...h.t....sh....t.s.............................shhs........s..sT..P-phG..lP.+WpGT.PEENhpMlRAAh+aaGus.pVGshEl.Dpps+.Klhastst.............................spth.sa.ED..l-..csYp.........sssphlIPs+s+.ahlsasshps....p-hh+ps.............ss...h...t.....ss.u.sh.huYsp..tshlp.sphppFl+uLG.Ypul....s.......s...s.........shsss..suhulhoGlGEhuRhs.hsl.oPcaGshhRh..hthlTDLPL ..................................................................................................................................s.....sWalpph-..p...oh.lDWs.h....t..................................h..................................................thhtp...st.t.t.........................Al.....u.........................h................................h.t............t...tths..hs..hWp.GT.PEEN.thl+sAhphhGus..l.Ghh.l.spp....h...p...phhhtht...............................h.h..h.ps..h......sh...............sstphhhPpphp..hlshsh..s...-hhcps......................sh...ht..ss.sshhs.Y.phs.ht.hhltpFl+sLG.Ypu..h....s.......t...s............shh.s..sshu.hhuGlGEhuR.u..hl.sPca..Gs.hh+h...hthhTDLPL............. 0 25 57 57 +13329 PF13487 HD_5 HD domain Coggill P pcc Jackhmmer:D2BI63 Domain HD domains are metal dependent phosphohydrolases. 21.80 21.80 21.80 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.90 0.72 -4.00 81 3171 2012-10-01 20:28:14 2010-11-02 15:49:01 1 216 1158 7 1242 5797 561 64.10 32 13.77 CHANGED ss...Los.p-pt...h....l.p...p+shhshphLpp..l......P..........ht.......pls...cllstppEphDGoGaP.c.uLpu-pIsltuRlL ......................................tLospEhph....h..ppHshhG..hcl.L..pp..h.......................s.............................ht..................tlt.......c.l...s.h.p.H.HE+hDG.o.G.YP.p.G.....Lp....G-pIsl.uRI...................... 0 538 906 1111 +13330 PF13488 Gly-zipper_Omp Glycine zipper Coggill P pcc Jackhmmer:C1ZB73 Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.40 0.72 -4.32 279 2160 2012-10-03 03:18:43 2010-11-02 17:37:02 1 18 1426 0 437 2410 297 47.50 42 26.37 CHANGED hG.ushGA.ssGAhlG....s...s...s....G......s..t.......s.tG.Ahl.GA.u....lGussGu....s....lG....p..t....h.cppp .........................GAslGA..hlGAsl..G......s....h....s......u.........s.p.......t.cG.AlI..GA.u....lGA....ssGu....h....lG....h.h....h.-.p..................... 0 116 258 352 +13331 PF13489 Methyltransf_23 Methyltransferase domain Bateman A agb Jackhmmer:A0LD74 Domain This family appears to be a methyltransferase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.06 0.71 -4.52 129 14355 2012-10-10 17:06:42 2010-11-02 17:48:41 1 362 3944 67 5881 52099 19466 173.70 14 51.54 CHANGED htp.th......ph...ht.......phl.pp...hh.................ttst...................p.lLDhGsGsG..h..h..................................hp.hh....pp...............p..........G.......h.....p..ht...shD........................................................................................h.....................................................pp....................................paD..hlssh.cllEHlt....s...st................phlppltp...h.l..p..s.s.Gh..lhlps......h.................................................ph..htp..................h.t..h.....h.........ts......sHh....saa.......otpslptlh.cpt.G..ap..lhph ..................................................................................................................................................................hhh...............................h...........................tst..................................p..l..L.D.lG.s......G..s....G....h....h.............................................................................................s.t..hh...........tp..................................................t.............s....................h.........p..lh.....GlD.h...u....thhphst.........................................................................................................................................................................................................................................................................sp..................................................................................................paD....h...l....h....s.....h.....p.....s........l.....p.....a....ls...........-.........t...........................................p..h...l...p...p...h...t...p................h...L.........c...P.......G...G..h.....l..h....h..ss..........sh....................................................................................................h........................................................................................................................................................th..............h.h...................t.............h........h.h.....................t...................................................................................................................................................................... 0 1716 3566 4946 +13332 PF13490 zf-HC2 Putative zinc-finger Coggill P pcc Jackhmmer:C1ZG19 Domain This is a putative zinc-finger found in some anti-sigma factor proteins. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.68 0.72 -3.98 461 2372 2010-11-02 17:57:24 2010-11-02 17:57:24 1 52 1229 16 847 2033 89 34.60 27 15.34 CHANGED C.p..c.hp..phlstalDsp....Lstt.p..p.tplcpHLtpCssCpp ...........C......h.....thltsal-Gp....Lstt.c..p.tplcpHLtsCspCp...... 0 368 627 768 +13333 PF13491 DUF4117 Domain of unknown function (DUF4117) Coggill P pcc Jackhmmer:C1ZFF5 Domain This family is frequently found on DNA-translocase FtsK proteins at the N-terminus. The function is not known but might well be enzymatic. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.19 0.71 -4.81 92 2646 2010-11-02 17:57:43 2010-11-02 17:57:43 1 4 2575 0 664 2313 1905 162.10 24 17.91 CHANGED p+hhcEhhhlhhhhhulalhluLlSa...s...ssDPu..............W............sp......sss..ss.ts.lpNhuGhhGAalADh.hh.h.lhGhuAahhshhh...h...hhs....aphhpp................p..............p..p............ph......hhch......huhhlhll.sss.ul.h....uh.phhth....t.......th.shs.sGGllGphlush....hhphl..GhsGuh......Ll..llslhhluh..slh....sthS.........................W...l...pl...h-............plG ...........................................t......hh.hl.hhhhhshhlhhu.lhoa.........s....tD.su...............a................................sp......ss..........hpNhsG.hhGAaluDhlh.h...hFGh.hAah.lP.lhl...h...hhs...hhhhpp...................................p......s.tp...........th........sh+.h...........l.G.h....l.h...l.l.l....ss.s....u.l.h................s.l......t.....hs............s.....................h...h.u...uGGllG.t.h.l.u.s....h...........ht..s....h....l....sh.h....G..us.......l.l....Lls..l.hhhul......hlh......s..shohhplht........................................................... 0 226 437 557 +13334 PF13492 GAF_3 GAF domain Bateman A agb Jackhmmer:A0LBX7 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.34 0.71 -10.37 0.71 -4.04 153 4956 2012-10-02 14:34:25 2010-11-03 10:47:51 1 693 1936 7 1590 12597 1586 135.70 16 20.13 CHANGED sh.c..plhpps........hph.lt.phh.s.sct.....ss.lhh...h.c..ps..pt.th......phh...ssh.....t..ptth..tt.....................................................sl.s.......tsp.s.lhp.....tshppt..p..h..h....h.........s........h.......tt.......p......p............h....................h..s.s..........t............sh.........hh...lPlhs...tt.................pshGlls.l.t.....p.t.........s.tp.p.......h..s..t.pph.phlpt..hushluhulpp ......................................................................................................................................................................................................................................pplhpph...hph..lt..phh..s..hcs........ss...l..hh........h...c......pp.........tt..ph............phh....ssh........tt...p.......................................................................................................................ph.s...........hsp.s.lsp.......ts..h...cpp..........p....s.......h......hh.....................ts...................h..........tp...........p........s...........t........................................................h.s.h...................p.....................sh..............lh...lPLhs.......ss.......................phhGllh.l..t.....p..s..........................p..tp..t....................a...s...t..cph.plLpt....hAshlAhAlpp.................................................................. 0 627 1138 1442 +13335 PF13493 DUF4118 Domain of unknown function (DUF4118) Bateman A agb Jackhmmer:A0LBX7 Domain This domain is found in a wide variety of bacterial signalling proteins. It is likely to be a transmembrane domain involved in ligand sensing. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.47 0.72 -3.84 102 1922 2010-11-03 11:55:06 2010-11-03 11:55:06 1 58 1608 1 460 1522 70 101.40 25 13.48 CHANGED th..l...cshlhhhlshh..lshhlts.tts.....h.t.sh.hh...h...ha..l..lslll.hu.lta.Ght.ulhuull..u.hsh.....hhhhh.h.t.h.............shhhht.p.........hhhhhhhll....lullsGth.ssthppp ........................s....slhhhhh..ssh....hs.hhh.t...hst......h....s..hh....h...la..L.LuVlllA..l.hh.G.h.h.s.ullAull..ss.ls.a.....shaFh.s.s.h..........................ohsltcsp............YllTFslhLh.lullsusLssth+................. 0 139 291 383 +13336 PF13494 DUF4119 Domain of unknown function, B. Theta Gene description (DUF4119) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0594 Family Based on Bacteroides thetaiotaomicron gene BT_0594, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2] 27.00 27.00 80.80 80.60 22.10 20.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.23 0.72 -4.00 3 17 2010-11-05 17:33:43 2010-11-05 17:33:43 1 1 16 0 1 13 0 95.60 61 72.61 CHANGED pcSKKsuKNNsuspph+olE+..-+Qs+-EIIScDELEKRsGITGDt+tYLTsaLRpFhEG-tHpsYsKKLpsLAcYIaDp+ILYIsKHGGYKLMElS .............puKKGSKNsKsKRNVQThcKAPs+pSKEEIISEEEL-NRIAIoGDIRLYhTMaL+IFIDGaF+HPKKKKLINLAQYIYDQKVLYIHKHGGYKLMELS. 0 1 1 1 +13337 PF13495 Phage_int_SAM_4 Phage_integr_N2; Phage integrase, N-terminal SAM-like domain Bateman A agb Jackhmmer:A0LBL0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.63 0.72 -3.66 83 3438 2012-10-02 14:21:04 2010-11-05 17:53:17 1 21 1789 4 599 5743 1666 84.40 20 27.95 CHANGED llcphppth.+h...cthuhpTpcsYhhhlpp....alpah....p......+..........p..P.pphssp-l..........ctFLstLs..pcp.....susuTppps...lsALhFhacplLppshst ................................t.hhp....h..ph......tthS...pThpsYhptlc.p..........Fhp.ah.......t..........c................................p........p..p...l...s...s...p..-....l........................ctalsaLt........c+p.........................hShsThspt......hsuLphh.ap.hlhpp................................................ 0 213 418 518 +13338 PF13496 DUF4120 Domain of unknown function (DUF4120) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2585 Family Based on Bacteroides thetaiotaomicron gene BT_2585, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 27.00 27.00 27.30 38.90 24.70 26.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.42 0.72 -3.97 6 44 2010-11-05 21:26:43 2010-11-05 21:26:43 1 1 29 0 6 29 1 93.80 70 87.90 CHANGED KIhC.QEHY-pVVpYAKSIsDpTLQpClERLKQWEcNsssPCEIELYYDaAPYSFGFspRYPDGpsGIVGGLLYHGpPDcSFAVhl.pPFHGWoIHT ....KIhs.pEHa-pVtcYAESIGDToLQcCLERLKpWEcNPstPsEIpLYYDHAPYSFGFsp+YPDGRpGIVGGLLYHGhPDcSFAVTl.pPFHGWpIHT............ 0 1 6 6 +13339 PF13497 DUF4121 Domain of unknown function (DUF4121) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2588 Family Based on Bacteroides thetaiotaomicron gene BT_2588, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 27.00 27.00 27.70 27.50 19.20 18.70 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.67 0.70 -5.38 7 50 2010-11-05 21:42:42 2010-11-05 21:42:42 1 1 30 0 9 43 1 254.20 40 94.59 CHANGED psps+YslEoLpphNs.aD+tatlsQcDVDhsNRhlplIEpsRSchh.PpsGDplhYloRpGDaaspAhI-thss+p..lpIC..P.lPFsacstsslthsspGGsap.lpscslK.suhppttF+sWGHsGtCuNGuVhFpApV.hWpYpEP-PLYGcaT..Tcsap+aalpKp.-sE..st.hYpu.shshhscp-hcphlt.hcGplFpG.hpsplVlWsaRh-ahhLs.pEWpphc..Ap.Rhh.ht.p.VKIlpDh-pHhshFY ................hpshYslEoLphLNhh.DptaslscpDV-KVNphlp+hEcsRschh.PpsGDslhYlo+tGDYaspAaI-phs..s+p..lpIC.hPpsPFsac...stpshthsspGGsashlsscpl+.suhpptpF+pWGHsGtspNGuVhFcAhVthWcYsE.P-PhYscaTT+pWp+aaIc+ps-.E..su.hYpu-shohhsc-ELcphls.hcGplFpG.hssp.hllWsaRh-hhcl.sstEWpthct.sphRhhahthp.VKIhpDccpHlsThY................................ 0 2 8 8 +13340 PF13498 DUF4122 Domain of unknown function (DUF4122) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2607 Family Based on Bacteroides thetaiotaomicron gene BT_2607, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 27.00 27.00 52.90 41.70 24.40 24.30 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.46 0.70 -4.97 3 36 2010-11-06 00:25:54 2010-11-06 00:25:54 1 1 27 0 6 28 0 204.50 43 88.78 CHANGED EEIVYLSIRluCsuYLLYKVaGQK+RIccICDLLYuK.PPVK+ccsEsV.suEPGu-o-VMGSTRFVYLDENAGKTVAPYMSQPLETuuDFIGEEEDVsEEEVECKLPLEEMRMLKEEQEELDucSPEVEAVSPsVTPcDL-NlG-VLh+LN-AspDEsKShRAAhTLHSIRETDLFElFSSQVENKslIEELMGKYLDc-GNPLPLR+c+c.NPVs-sWRQ ...............................................tllYhslRhsChsYlLYpVht.+c+ltpl..CsLLYs....shpp.ctEps...spssss..t...s-VMGpTRaVYLDENAGKTsAPaMSQPLE..p-hIGEDEDIsp-DVECpLsLEcM+hLp-EQEEL...Du.ps...P-sEslo.ulT.cDlpNVGDVLhphstA.pDccKuhpAApTLa.uIR-TslF-lFsSplpNpphlEcLlcchlDc-GNshPL+ppp.p.spssspWR.p................. 0 1 6 6 +13341 PF13499 EF-hand_7 EF_hand_5; EF-hand domain pair Bateman A agb Jackhmmer:A0LDI7 Domain \N 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.30 0.72 -3.83 146 22951 2012-10-02 16:17:27 2010-11-08 14:23:57 1 974 2128 881 12350 22877 1126 70.20 26 23.94 CHANGED plpp....h...................F.pt..hDtsp..sGhlshp.-ltphhppht.......................................ppphpp.........h....hp..phDts..tcG.plshpEFhp.hh .................................................................................................................................................................................................t..pp.....s...............F..ph....aD.p.D.s......sG.hI...o......tp...EL.pp.hh.p.s.h.s.............................................................................................................................................ppph...c.p.............................hh.....h..hp........ph..Dt.D......sD..G..pl..s..ap.EFhph................................................................................................... 0 4511 6719 9545 +13342 PF13500 AAA_26 AAA domain Bateman A agb Jackhmmer:A0L3M2 Domain This domain is found in a number of proteins involved in cofactor biosynthesis such as dethiobiotin synthase and cobyric acid synthase. This domain contains a P-loop motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.16 0.71 -4.71 55 5230 2012-10-05 12:31:09 2010-11-08 15:01:41 1 28 3140 43 1197 6492 1083 206.80 25 57.02 CHANGED +slhloGT-TslGKTllossLspuh.........p..ss.....YaKP.l.QoGht........p.ss.........Dsphl.pp...lhshsp.s.hh....hs....................................................psht.lptPh.oPph.uAph-.s...hsls....lppl................p.lsp.s..s........chlllEGuGulh.VP.lspp.ph.hDlhppL......shsllLVups...sL.G.o.......I.N+s..L....Lolcsl.+.....s.....+....sl.s..lhGllhNs......tss...........c................shptl.pph.st.........l......slL.uplPhhsplss.pp .......................................................................................thaloGT..cTsVGKTslotuL.hpu..hpp.......................pG.hp..ss......................s.aKP....l...ssGsp..................................p...ss............D..s..thl....pp.............h.s....s....h.....s....h.....s....hp.....th.....................................................................................................................................................s.P.ht....hs...p...s......h......us........pl....uu...p....t..s......h.s.l.............hppl................................hpp..l.s.p..p..s..........................-hllVEGu....GGh.......h.....s.......s.......h.......s........s........p.....t.........s.......h.........h.......-.hspph...................ph.P..l....l..l.Vsss....pl..G..s..................l...sc..s.....h.....L.o.h...p..s.l..p.....p.....c.........s.l..s..lhGll.hNc........h.tss..............c....................................th.t.hl...t...p...h...hs....................h..........Pll..G..tlPh........p......................................................................... 0 335 731 1019 +13343 PF13501 SoxY Sulfur oxidation protein SoxY Bateman A agb Jackhmmer:A0LE08 Domain This domain is found in the sulfur oxidation protein SoxY. It is closely related to the Desulfoferrodoxin family Pfam:PF01880. Dissimilatory oxidation of thiosulfate is carried out by the ubiquitous sulfur-oxidizing (Sox) multi-enzyme system. In this system, SoxY plays a key role, functioning as the sulfur substrate-binding protein that offers its sulfur substrate, which is covalently bound to a conserved C-terminal cysteine, to another oxidizing Sox enzyme [1]. The structure of this domain shows an Ig-like fold [1]. 22.00 22.00 22.20 22.00 21.60 21.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.32 0.72 -4.01 101 444 2012-10-01 19:22:21 2010-11-08 17:43:11 1 3 280 16 196 483 305 109.60 32 55.82 CHANGED shhpshhGspsh......ssplplpu...PphAEsGuhVPl..slpss.h.......lcpltlhs-pNPsPhsusFph.Ps.supst..lusRl+lspsosVpAl.ucst.cGphah.usptVKloh.GGC ............................t..hpthhGstsh...t....sstlplsu...PplAEsGusVPlsl....ss..s.hst.........lcpltlls..-p.....N....PsP....hsA...sFph..Pt....sststluoRl+lup.oosVhAlucst...DGphah.usppVKVoh..GGC.... 0 44 122 160 +13344 PF13502 AsmA_2 AsmA-like C-terminal region Coggill P pcc Jackhmmer:B8J2T2 Family This family is similar to the C-terminal of the AsmA protein of E. coli. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.25 0.70 -4.99 168 2606 2012-10-03 05:41:17 2010-11-10 11:25:13 1 26 1712 0 739 3716 1170 219.90 17 21.31 CHANGED luplpsplph...p..s..s.........sl.plsslp.hs..hssupl..ss...sup...a..ps..p.........ss..s..p...........sphshplpstslsphh.......pt.hs.hss.......s..l.p.G..p..hs...hshplsh.p.sss.ht....t.h.soLsG...shphpltpG...........plt......p..........................hp.ph.......................shsshhsp...........sh.sa....cslpushplpsGhhps.cs.hplpus.suplshp.GphsLsp..pp..lshphslssphst.hs.hsh...................hh.hhht..htphs....sl........pa..p..lsGshssPp ......................................................................................................................................................................................................................................tphphphth....p....s.s..........tl..plppht.hs...h.h.t.u.pl....pu........sup....hts..s.............ss..t...p..............................sph.p.hp..l...p.s.hs..ls.t.hh.......................................ph.hs..hss....................................s..l...sG...p....hs...hs.h.s..l...ph..p.sss..ht.....s.....h...ssL..sG..........phphp.l.t.pG......................plp.........ph........................................................h.hp............................................................................thshpsh.hsp.......................sh...hF.....cslp.....u..s.....h...pl..p..s....Gl.hp...s.ss.h....h..l......p.....us...t.u.....p......l.s.....hp..G.pl...s......Lsp............pp......l..sh....ps...sls.sphs.t.......................................t...h..tt..h.........l.................ph..p..lsGshspP...................................................................................... 0 242 470 596 +13345 PF13503 DUF4123 Domain of unknown function (DUF4123) Bateman A agb Jackhmmer:B5FDW6 Domain This presumed domain is functionally uncharacterised. It is about 120 amino acids in length and contains several conserved motifs that may be functionally important. This domain is sometimes associated with the FHA domain. 22.20 22.20 22.20 22.20 21.60 21.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.57 0.71 -4.15 139 819 2010-11-10 11:40:33 2010-11-10 11:40:33 1 3 345 0 199 758 13 123.60 20 45.28 CHANGED phYhllDusph.s.p.....hhpthh..pt..t.t....t..hts..Latss.shppht.phuPa..Ll......pl..................s............s......h........tphhpp....t...tpsh..........u....h.hlt.......S.s..t.........shp..plhp...HL+p..hl.pl.ph.s.p.G.c.th..hhRaaDsc....lh....tsh......lssh..............tptst.......hh ...........................................................hahllDshhh....p.....h.tth.....th....t...t..hhs.Lattp..sh..ttlt...phuPaLlpl.................ts................s....hl........pphhtp............tt.tt.h.............u....h.hlt.......S.s..t...................................sh.s..pLt.p...HLpphlt..s.ph.s....p..G.c.ps..lhRaaDsclh.sh..hps.hs...tpht.h............................... 0 40 78 125 +13346 PF13504 LRR_7 Leucine rich repeat Bateman A agb Jackhmmer:B5ETY5 Repeat \N 22.00 7.00 22.00 8.30 21.90 -999999.99 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.69 0.76 -7.30 0.76 -2.74 1228 2943 2012-10-02 21:32:02 2010-11-10 13:37:07 1 941 590 28 1121 55766 2241 18.00 36 3.46 CHANGED spL....ppL...p...l..............pp.....s.....p.....l....p......p.....l...s ........................sL...cpL...s...l.......us...N......p.......L..s.....p...LP............................... 0 385 667 922 +13347 PF13505 OMP_b-brl Outer membrane protein beta-barrel domain Bateman A agb Jackhmmer:B5ESE4 Domain This domain is found in a wide range of outer membrane proteins. This domain assumes a membrane bound beta-barrel fold. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.63 0.71 -4.33 190 7576 2012-10-03 17:14:37 2010-11-10 14:45:20 1 53 2032 12 1809 11637 730 195.60 15 81.14 CHANGED hshshhss.hu...ss..shu................ssp..........tsh.h...........l..s..s.u...h..shs......sh...s...................sss.......ss.s.t.......................s...s..sh..sltss.a...p...h....s....s......h......hu..l.ps.sh..s.............h......tss.....pttss.......................s.hp....hphht....................hsss......ht.h.p.....h.sh.....ss.............h..p....hYs......tsGhs.............................................hsp.p..hp...hts.................................ptstsp..huhsh..us..G......lpaph......s.....p.....ph....t.l.....ss..cY.p..h..t.ph.........................ss..............hcs.......ss...hpluluY+F .........................................................................................................................................................h...hhhhh..hs....ss...shu....................................sts...........................th...h.........................l...s.......s.s....h...uhs............ph...p.............................................st..........t..p.s............................................s....hsh..sl...ths....Y.................p.....h..........s.......s.........h.........hu....l....ps..s.h..s.....................................h........tpt........pptss....................................................p..p.....ph.tt...............................................................ht..hp.............st.h.s........h...sh.......sph............hp......h.Ys.......tsGhu......................................................................................................hs.p.h.p..hp......t..............................................................spsps.p...hu.h....s.h........G.s......G.............lp.a.p.h......................s..............p..........sh......s..l............p.h...p..Y...p....h.....h.ph.........................................................ts.................hp.......ts.........h..hh..G..hsapF........................................................................................................... 0 544 1089 1459 +13348 PF13506 Glyco_transf_21 Glycosyl transferase family 21 Coggill P pcc Jackhmmer:B8IZF6 Domain This is a family of ceramide beta-glucosyltransferases - EC:2.4.1.80. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.97 0.71 -4.98 54 703 2012-10-03 05:28:31 2010-11-10 14:55:10 1 8 542 0 308 9471 1198 168.40 23 39.16 CHANGED P...p..l..c....lhhs...ss....shuh.N..P...KlsNhhhshpt....Aca-hlllsDuslhlsschLpplssshpp...psGL.Vou.hsh.ssssp...u....hhutl.t.ssh...h.......s.sh.....thhhu....h...t.....uh..shshGtohhhR+psL-phGGhpulsphLAEDhthu.phlpst.Gh+ltlssts.ht.p.s.ss...tp.htt...hhsR.h..RWu+ ..........................................................................................................................lhls..st....hus...N.s......Kls.N.Lh....hhct...........sc.....a......-......h.ll.l.s.Du.D.h.h.l......p...s...c....h...L.p...c..........l.s.......s..s..h.............s................-.............p...........l.....G.....l......V.........o.........s......h.....s..........h........s....t.....s.s..p........s..................h.h.u...t..l...t...tha..h........................s...h........sshhhs.............h...t.........................tt...t.h...s..h...G....t....s......h..........s......h..R......cs......s......L......c..............p......h..............G......G...........h......t.........s......h........t...........p...........t........L..........A.........-...D.a.hh...u.......phl......p.......s........t......G.......h.......+......h..h..h..u..s....s..hh..p.sss............phtp......hhp+....h...pWs........................................................................................................ 0 96 178 246 +13349 PF13507 GATase_5 CobB/CobQ-like glutamine amidotransferase domain Coggill P pcc Jackhmmer:B8IZF6 Domain This family captures members that are not found in Pfam:PF00310, Pfam:PF07685 and Pfam:PF13230. 20.80 20.80 20.80 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.69 0.70 -5.48 255 4672 2012-10-03 00:28:14 2010-11-10 14:55:38 1 18 4440 7 1177 3706 2727 235.40 37 29.43 CHANGED +P+VsIlpt.GsNschEhAtAFcpAGhcsh-.V..p.........h.s...-.........l.........h.........su.........ch.........s.Lcc..hphlshsGGFShGDsh.GuGcu..hApslhht.......plc-phppFh.s.R..DshsLGlCN.GhQhLspLu...Ll............Ps...............s...........cp.PphspNpSt+aEuRhspl..h....h.ss.p...S...Psl..hh.psh...s..shplP..V..uHGE..GRhhh.....spphhp..plhssstlAhpYVDs.......p..GpsTtp......YPhNPNGSstuIsGlsSsDGRlhuhMPHPERshcshpps.hs............t.t........t............ushhclFcNAhpah ...................................................+lAllt..GsNs-h-h..At.Ah....c.c.u.......G.......h.-.s...ht..Vh..........h...s.........l.........h.....ts.........ch...................s.L.ps..h...csllhsGGFSYGDhL...tuGt.h..hu.tphh........stltptlppah.p.c..ssh.sLGlCN.GhQhLs.chu...Ll................PG..................................shh+Npu........t+........F...u..+..hshl..h..............l........s...p......o........s...lhh....psh...p...thplP...VuHGE..G+ahs............ss.p...p.l....t..pLc.spsp....lsh+Ys-t....................s...t.......................hNPNGSspsIsGlss.psGp.VhuhMPHPERshcsl..s................................................hh................................................. 0 380 751 997 +13350 PF13508 Acetyltransf_7 Acetyltransferase (GNAT) domain Bateman A agb Jackhmmer:B5FCA0 Domain This domain catalyses N-acetyltransferase reactions. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.38 0.72 -3.82 163 13825 2012-10-02 22:59:21 2010-11-10 15:58:29 1 107 3917 33 2973 41376 6818 86.50 18 43.91 CHANGED ppp..p.hhsh.pp...........ssc...ll.u...hh.tl.................tt....t........p...ht..........hlt.......tl..slpsphRspGhu...pp...llpt......h.......h...pph.....t...............t.....p...t...h.........h...........l............h............s.....p.......s.......p............t..........h.paYpphGFph ..................................................................................................t....hh.h.h...p..........................ps.p.....l.l...u....hh..pl.........................................................h......t..................p.....hh........................................hlt........tl.....s...V...p.s.php........u..p..G..l..G......pp....Ll.pp..........h..............t...pp.h...........t...............................................................t...........p....s.......l............h...................l...............t................................s........p........p.........p.............................s.............h..sF.Y..p.+.h.GFp............................................................ 0 930 1796 2449 +13351 PF13509 S1_2 S1 domain Bateman A agb Jackhmmer:B5FDY3 Domain The S1 domain was originally identified as a repeat motif in the ribosomal S1 protein. It was later identified in a wide range of proteins. The S1 domain has an OB-fold structure. The S1 domain is involved in nucleic acid binding. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.56 0.72 -4.05 96 2158 2012-10-03 20:18:03 2010-11-10 16:15:05 1 7 1694 1 363 1496 70 60.30 29 26.77 CHANGED lGphssLplh+ps.chGha.L.......s...ssp....s-lLLPpp..tl..s..cs..hplGDplcVFlYhDu-c..+llATsp ....................lGphthLpVlc..........p.......s.c.......h.Gha.L.................p....tst.......pslhLspp........Eh................pp....hplG-plpsFl.Y.h.Dpps.+lh.ATp.............. 0 127 239 313 +13352 PF13510 Fer2_4 2Fe-2S iron-sulfur cluster binding domain Coggill P pcc Jackhmmer:B8J1F8 Domain The 2Fe-2S ferredoxin family have a general core structure consisting of beta(2)-alpha-beta(2) which a beta-grasp type fold. The domain is around one hundred amino acids with four conserved cysteine residues to which the 2Fe-2S cluster is ligated. This cluster appears within sarcosine oxidase proteins. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.66 0.72 -4.17 254 5655 2012-10-02 17:47:23 2010-11-10 16:31:22 1 204 3363 9 1816 5235 3983 79.40 31 11.18 CHANGED sp.lshphDGpp.hpuhtG-TlAuALlusG................l.p.ls.+.oh.t......htpP...Ruhhsshups.ssLVpl....s.........u....ps...s.h.pAshs.lt-GhplpoQss ...........................................lplplDGp..p..l..p..s.....t.......G....s...ol..lpAs.tp.sG..........................l..p..lP..p..hCa....................p..............stl..s.s......s.G.sCRhCl.V..-.l.....c.........................s....t....t...l...t...uCsssl..p..-..GMhlpopp...................................................... 0 630 1190 1513 +13353 PF13511 DUF4124 Domain of unknown function (DUF4124) Bateman A agb Jackhmmer:B5FFC8 Domain This presumed domain is found in a variety of bacterial proteins. It is found associated at the N-terminus associated with other domains such as the SLT domain and glutaredoxin domains in some proteins. The function of this domain is unknown, but it may have an Ig-like fold. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.29 0.72 -3.92 232 1444 2010-11-10 16:35:35 2010-11-10 16:35:35 1 18 588 0 495 1312 147 82.30 18 43.19 CHANGED hhhhhh......ss...ss..s...u..u....plY+WsDps.GpspaoDp....Ps.t....s.s.ps.p..pl..........................phts.sss......................hs.t.....s.....ss.tts.......tpt .......................h.hhhhh.....ss.....ss....s....u...u.........slYcasDss.Gsspao-p...........Pst.....s.s..pt..p..pl.............................................p...........................................................sttttttttttt............................................................................................................................. 0 129 310 423 +13354 PF13512 TPR_18 Tetratricopeptide repeat Coggill P pcc Jackhmmer:B8IZE5 Repeat \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.92 0.71 -4.16 19 80 2012-10-11 20:01:04 2010-11-10 16:50:44 1 7 80 0 30 1458 1509 130.10 30 50.38 CHANGED sc-slP-hssuplYspAQcsLpsGsYpsAIcpLEuLDsRYPFGsYupQsQLDLIYAYYKss-hshuhAoIDRFlRLNPsHPNlDYVhYMRGLsshshDcs.h.phh.....phs.t.st..................tAFc...DFppLlp+YPsSpYAsDAppRh ..............................................p.s.splYspupp.tLpc.t.p.ac...pA...hpph.c.t..L.cs+..aP.h.u...au...p...Qsp..L...pL...h.Y..........u....a........a..cs...sc...h.......tAhu......sh-RFl.....+L.pPs.H........s.s.....l.....s.....Y.sh....Y.h....+GLsp..h...t.......s................................tsc+...D..p.sh............................................Aht..phppllppaPsotYu.cup........................................................................................ 0 12 19 26 +13355 PF13513 HEAT_EZ HEAT-like repeat Coggill P pcc Jackhmmer:B8J2H7 Repeat The HEAT repeat family is related to armadillo/beta-catenin-like repeats (see Pfam:PF00514). These EZ repeats are found in subunits of cyanobacterial phycocyanin lyase and other proteins and probably carry out a scaffolding role. 20.00 18.30 20.00 18.30 19.90 18.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.20 0.72 -3.35 261 1731 2012-10-11 20:01:04 2010-11-10 16:59:27 1 168 403 18 1145 5921 533 54.80 25 5.68 CHANGED .plR......ptAhh.uL...s..........htt.t.ph...ht...hhtpllshLh.....hh..t....sss...tl..Rptu..shuLupl .......................................................hR....cuAlh.ul......Gs...............lstsshpt.......hps......hls.pllshLl...........phL.pD.......sps.....tV..Rpsu...shsLup...................................... 0 376 651 956 +13356 PF13514 AAA_27 AAA domain Coggill P pcc Jackhmmer:B8J072 Domain This domain is found in a number of double-strand DNA break proteins. This domain contains a P-loop motif. 29.20 29.20 29.30 29.30 29.00 29.10 hmmbuild -o /dev/null HMM SEED 1111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.09 0.70 -13.83 0.70 -6.88 10 1921 2012-10-05 12:31:09 2010-11-12 16:18:39 1 17 1239 0 319 1500 94 415.80 15 69.93 CHANGED laGsNEAGKSTlRsAlpsLLFGFPtRoshs.FlHs+psL+lGGsLs....tcsGupLcFpRl++sst...oLhsscGc.sls--sLsshLsGhs+phF-ulFulDccsLhpGG+pIl-ApsclGphLFuAuAGlGS..LssVc-pL-cEt-pLaKPp.GppstINsulpphK-lppcl+chpl+scsWccppcsLccuccclppl+ccpcpL-pc+pclERlppltPhlp-c+uhpppLutl.uEslphPscus-Rhtphcschpsspppl-phpc+lpplcschsuIplDc-hLucAssl-uLpppcsphcpupp-lpphpuclsstcc-tsuLttQlG.Pshspsslcuhcsuhss+cplspLsp.......c+psLcptlcsApcpLcEpccclcplcpphsulssh..spsLhtul.ss.thhshtt...thtstcpcltps++cttpulspLG..attsl.t.hthslP.htplpthp+ctpEhtsstppt+cchpcsptpLtpl....tlptpthstssslsoss-ltssRstR-tlWps.................hs..tshs.th.chlppADpLsDphhspsppsupltplRpptEptptRhtthptchtshcpphAthctsWttthtshu.hPh.s.tth.sWltphpthh.t.pth.ptptEhp.hhptttchpttLtt.Lth.u.........t-LsthLptucphlcphc+sutccspL-c+hppscpuhpcAcc+ppcApcpl-sWcccWpphlhphtLsuphosstsls.ul-hhpchppchpcs-chsp.Rlpuhcc-lucFcpclcsLscshssth.s..s...pth+tLpsRLppA+-cupthc+LsEclcphccclspsspslpptptclssLhc....hApssohE-hhssscRu-ptcchccplscLppplsphssulsltuLttchsth-ssplpucl-plspcl-c.hpsphscLsppluctcsplupl-GsosAApltcchppthuplp-tAEcalpluhApplLppsh-RYR-s+psPlLp+AuEhFspLThGcFopLpsDs.-cpP.hLsucRssGpplpltpLSpGTRDQLYLALRlAsLEhhhtpppshPFllDDlFlsFDDsRocAslcsLc-Luc+sQVIaFTHHcHLlshstps.asspspllcL .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................t....................h........hStush-.lhh.hRhu.h.ht..............................h.P....h.lh.DD.hh..D..Rht.hhthh...p........s.....t.....t..............Q.llhho....t..........................th...................................................................................................... 0 119 219 276 +13357 PF13515 FUSC_2 Fusaric acid resistance protein-like Coggill P pcc Jackhmmer:B8IZ01 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.84 0.71 -4.22 286 4566 2012-10-02 19:04:43 2010-11-12 16:27:32 1 34 2561 0 1051 5938 148 130.10 23 22.04 CHANGED l..uh...hl..s.....t.h......h...s.h..........t.......+sh..........Ws.sls....shhlh.........p..s..s..hssshp+shpRhlGTllGs.hluhh..lh....h........hhs...s..s....h..h....hh......h..l....lhhh.....hhhh........h...h..........................h....s..ts......Y.....sh.ts..hh..l..T........hhsl....h.hhsh...............sssht.................hsh....RlhssllGsh....lulls ..........................................hhh..h..hl.s.p.h......hsh............t............+ua..........Wh.h.lo..s..hhl.h......................p..s...shssotpRhhpRllGTllGllluhs..ll........h.................................h.hs.......s..p..........hh....hl.............h.l.....hhhs........shh.h..........h...h...................................h....p....ts......Y.......uh..us.hh..lT.....hhsl........h....h.h.s.h......................ttshp.............................................hhhsRl.l-sllGshlAhh.s................................................. 0 275 612 877 +13358 PF13516 LRR_6 Leucine Rich repeat Bateman A agb Jackhmmer:A5IFW6 Repeat \N 23.00 8.70 23.00 8.70 22.90 8.60 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -7.04 0.75 -7.40 0.75 -3.27 889 30484 2012-10-02 21:32:02 2010-11-12 16:29:19 1 2898 807 67 21001 56590 2857 24.90 27 9.82 CHANGED h.....ps..Lp.....pL.....s.....L....s........p..s.p.....l...ss...pu....h.......p....sl....u.............p ......................................pp..Lp........pL.........s...........L............u...............................p...N.p.........l.....sc......pG....s.......p..tls.............................................................. 0 10309 14782 17695 +13359 PF13517 VCBS Repeat domain in Vibrio, Colwellia, Bradyrhizobium and Shewanella Coggill P pcc Jackhmmer:B8J4T8 Repeat This domain of about 100 residues is found in multiple (up to 35) copies in long proteins from several species of Vibrio, Colwellia, Bradyrhizobium, and Shewanella (hence the name VCBS) and in smaller copy numbers in proteins from several other bacteria. The large protein size and repeat copy numbers, species distribution, and suggested activities of several member proteins suggests a role for this domain in adhesion (TIGR). 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.62 0.72 -3.61 1255 7532 2012-10-05 17:30:43 2010-11-12 16:50:10 1 572 812 12 4031 8611 7152 66.10 25 19.29 CHANGED Dhss..DGthDll..s....s................s....s.....ss...................spl........h..h...t.p....s......s..G....s........hp............h...s.p......h........h..hss..s.................sts.h....s..........s....s....hu..DlssDGp.hDlls .................................................DhssDG.h.Dlh..s..s............................s...s.....ss..................................stl.........a.h...s..p......u................s..G.......s..................as...............................t.h.....s.t........h.........thss.s...............................................stsh....u................s.....s.....hu...Dhss..DGp.hDlh............................................. 0 2314 3279 3827 +13360 PF13518 HTH_28 Helix-turn-helix domain Bateman A agb Jackhmmer:A5IDV5 Domain This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.46 0.72 -3.92 193 5836 2012-10-04 14:01:12 2010-11-15 09:27:30 1 87 2346 0 976 15814 2048 50.90 20 29.02 CHANGED p+hpl...l.ph.hh..p....s..p...shpp.....supphsl...sp.psl.tpWhppap.p.tG.h..s..uLt.s......c..p...p....psp ......................................+hpl....l.ph.hh..p.......u..p.....ohpp................hupcasl...sp.ssl.tpWl+tac.p..tG.....p..uL............................................................ 0 285 617 813 +13361 PF13519 VWA_2 von Willebrand factor type A domain Bateman A agb Jackhmmer:A5II27 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.30 0.71 -4.15 168 10039 2012-10-10 16:07:06 2010-11-15 09:44:16 1 392 2980 5 4071 16696 2611 193.40 16 32.32 CHANGED slllllDhSsSM...........t..sp.....sh..........................................t.......s................Rlst.s.+tth.........tsl...lp..........p...h.........s.ss.....ph......u.l..lsau.........u.......s..............u.................h......h....h............h..................s........h....o...........s....-....t..ssl.....tsh.....l..ss......lssp.h..................................hs..s......s....u..........os....hs...t.ul.....................................tt..Ah....p.hh....................................p..p...t..s.............................tp...s.s.l.lllo..D.......G.....................ts.s..............sp..hh......p...............hh.p...........................................t..h...p..pp..s...hp.....l..hsl.........sl.G..........ss.......ps..s...............................................................t..s.................t..lpp.l..Ap..ts.....u......Gt.......hhph..psss..psls.............ph...hpp.h ...................................................................................................................................................................................................................................................lhlllDs.S.u.SM...................t.sp.................................................................................................................................................................................................................p..........+l..p....t....s....+p.sh...........................pp..h.....lp..........................................................p.......h................s..ss..........pl...........u...l......l..s....Fs................................................................s.......p....................u...............................................................p.........h........h..........h......................................................................................s.........h.........o....................s......s....t....pt.h...........tpt............l...ss.......lp.s...................................................................................s......s....s...................Ts..l.s....t....u..l.......................................................................................................................................pt....A.h.........p..h..l........................................................................................................................................p.....p...p...t................................................................t..tp.......p...h....l....l....ll....T....D........G.............................................................ps.s...........................st..h......p...........................hhp.................................................................................................................................................................t...h.......p.......pp...s.........lp...........l..hs.l..............sh..u.........s.s.....ts........................................................................................................................................................................................t............h.hpp..l....up.......s.......s..............Gt.........................h..h..h............................................................................................................................................................................................................................................................................................................ 0 1621 2750 3486 +13362 PF13520 AA_permease_2 Amino acid permease Bateman A agb Jackhmmer:A5ICF5 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.77 0.70 -5.94 61 23261 2012-10-03 01:44:59 2010-11-15 11:34:15 1 79 4110 20 6607 26829 2040 418.20 16 86.76 CHANGED luhhsshhlsh.....us.h.....hu...ss......h..h.s...s...........hss....sGh.s..hhlhhh.lsshhh...hshthsh.hEl..u..s....hspsGGhhsas......pts....h...........s....phhu.....hhsshhh..hhshhh..sh.ss...h.s....sh.hhph.h..hth...h....t.................ph.t.t................................................................ph..hh.hhlu....lsllhhhhhlshhuh+hu.splp..h.hshhplh.hslhhhllhulhhh....tt....s..hhs.........................tp....s.................hhss.....shs......t.....h..........h...s...........u.h....hh......s..hauhsGa-ss...................sshspE...hp......p..+..s.h.huh......hhuhhhshllhhl..............hsls..hhh.lls..sp.l........s.t..ss..........................h.shhhpth.s...s.p..hhthlls.lhlsl......shhu....slhshhhuss+hltphuc-..........shlP.......chhsph.............s.c..hss........Phtulhhthlls.lhhhl.hh.........................................hh......tsshs................hlhslsshshh...lhhhlh.hhuhl...hh+...hp...p.....ph.......t+.................hh.h............h....h.lshhs.....hlhh..lhh....l.......hhsh...........hs........t.s.........s......stts...h.hhh.........hl.hhs.hh .............................................................................shhshhhlsh....us.h........h.G....sG.......h...h..h.hs.................................h..hts......sGs...s.....s.lh..h......h....l.....l.s..sl.hh...........h...h...h..u.hs...a.....ucl...u.............s...t.......hP.....p..s......G...G..h..a.s..a.s...................pts..................h.......................................G.....p..h.hu.....................ah..s...u..a.h.h........hl.s.h...hh....s.s...s..s............h...s............hh....hs..s.h..h.......st.h.....h......s.........................th....t................................................................................sh....hh....hlh...s........................h.s.l...l..h...l.h......s.....h.l...s...h...h....G....s...c...h...s....up...l......s...sl....h....s.......h...........ht........l.......h.....s..l..h...h.h.l..l..h...u...h...h.h.h.............ph........s....hhs........................................................................................................ht..s....................................hhst..........shs..........s.......h.............................h.....s............................................u..h....sh.........s....ha...u.....a.h........G...h....-....s..h.................................................s...s...h..u...p-..........sc................p......+....s...hPp...u.l........................................l.h..u.h..h.l..s..s..l..l.....Y..h..l..............................................s..s...hs......h..h..s....h....l...s......pp..l...................s.t..s.ss..................................................................h..s.h.hh.p.h.h...t.................h..s.....hh.s....h..lls....h.s.s.h.l...............................uh.hu.........................sh.hs...h...h...h..s.............s...s..+...l.h.h.u..h..u.c..-........................................................u.h.hP...............ph.h...u...+..l...........................................................s...c...pt.s..............................P.h...t.......u....l...l..h....t.......s.l.l...s..h.lhh...h.h..h...........................................................h............tth.s..........................................hl..h....s.....h....s..s....h....s..hh............l...s.a..h..h.....s.....h.h....u...h..l..........h..l.p.......hp........t.........ph.........tp................................h.h...............................................................h....h...h.s...h..hs.................hlhs.....h..h..h....h....................h.h..h.......................................................................................hhhhhhhh............................................................................................................................................... 0 1861 3597 5318 +13363 PF13521 AAA_28 AAA domain Bateman A agb Jackhmmer:A5IAR3 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -11.10 0.71 -4.28 115 1816 2012-10-05 12:31:09 2010-11-15 12:03:12 1 15 1500 1 435 2151 632 163.30 27 52.65 CHANGED +lllsGu.usGKTT....Ll....ptL......u........p.p.........G.hs...hs..sEhuRpllppph.....spst.....thh......h..................-hhths.pthh.ptph.cp.h....p..s.s..........p.s...sphlFhDpuhh-shs.Y..h.chh.tt............................s....hss..p.....lt.p........tsp....ptc.Y...ch.lhlhs.s.hs.......l.....pDst.R..p.p.sh.cc.ttph.tpthtp....thpp....huh.phltl...p.ush.ccR ......................................lsIhGGtSoGKo.T....Ll....sp.L...................A..........p.h...................hs.ss..........ss...........E....h.....u........R.....c..hl.pph........stsp...............t.Lp........hp..................................................Dhht..hu.p........ttph..hs.h.......t.st.................h.u...spl.sFhDs...shl.s....s.p...u...a...s...cth..ts.............................................................c.....t.s..h..........lp.s..........hhp......ch+..a........Dl..Vlllp..s.ss............sa.........ssD.uh.R........s...t...s.......t....-.....cpph..pp....hlhc....hlpc........sh....phlpl...psshppR............................................................................................................................. 0 157 265 373 +13364 PF13522 GATase_6 Glutamine amidotransferase domain Bateman A agb Jackhmmer:A5IAK2 Domain This domain is a class-II glutamine amidotransferase domain found in a variety of enzymes, such as asparagine synthetase and glutamine--fructose-6-phosphate transaminase. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.94 0.71 -4.20 67 6720 2012-10-03 21:14:07 2010-11-15 13:25:09 1 18 4568 0 1845 12173 7462 149.00 32 26.58 CHANGED PDspGh..ah..ss......p.....................ssLuHs.RLull-hs.ttus.QPh...stssp....h...hlsaNG-lYNat....cl+ppLtt.tGhs..hps.poDoEVllt.hh....pphG........pcsl.p..........clsG.hFAhulaDp.pppplhls.RDch......G...hKPLaatht.........ssshhFASE ................................................................thttttth...............pG.....s.........................sGIG.HT...RWA....TH..GtP....s..p..t..N.A..HPH.............sssp.......................l.......slVH.NG.lIENat....................pL+....c....c....L.....t....s....p...Ghp..............F.pS...pTDTE....Vls+..Llt............................pphut................puhppsl..p........................................plcG.....ua.A.......h.....s.....l.....hs..t....p........s.......s..pllsA......p...........s.......sP..LllGhs.........pt...h.......................................................................................................... 0 629 1210 1572 +13365 PF13523 Acetyltransf_8 Acetyltransferase (GNAT) domain Bateman A agb Jackhmmer:A5IFT1 Domain This domain catalyses N-acetyltransferase reactions. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.70 0.71 -4.33 54 3083 2012-10-02 22:59:21 2010-11-15 13:26:02 1 23 2166 17 599 11897 940 153.50 19 66.16 CHANGED lRshc...ppD.hs.hlppWhsps+ls..ha.h.pp.st.sh.pth.cp.hhp.tlt...tssathshlu..........thsu...cPhu.........Yh.....Eh.....Y.spcc........h.....................ts.........p..s....tDpGhH..lLlu.ssc.h..hGtshspshhpulh...calF.t..-..tspRllsEPcscNpphhphhpphG.F.phhtph.c.h...P..cKcAt.Lh ...........................................................................................................................................pD.h...hh.....h.....p..h....h....p...p.....s...c..hh........h.....h.....t..t.....t.............t..................................t.t......h.....p......pl.............tp..p.p..h...t.....s.....h...l.s..............................h.h..c..s..........p..s..h.u.........................ah............ph...........h..h.s.p.ps.............h.....................................................ps..................p.s.....t.s...h...s...h..c.........h.h..h.....s...ssp..h.................t..G..c.Gl.u.pth.l......p...s.....lh..............ch.hh...p.....c.........t.........sp..c.lh.h-s..c..s...s....N....t.t....hh+....h.h.c..+.tG....F....ph..hup..h......h...s..t......h...................................... 0 136 318 477 +13366 PF13524 Glyco_trans_1_2 Glycosyl transferases group 1 Coggill P pcc Jackhmmer:B8J2T1 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.83 0.72 -3.71 238 1210 2012-10-03 16:42:30 2010-11-15 13:26:32 1 65 678 0 497 17241 5704 98.20 19 21.71 CHANGED l.sLN............hspp..............................ss.ss.....h.....RhFEshAsGs.hllos.............pslcp..h..FpsGp..................-..llhhp...shp-hhctl.cthh..p.ss.p.t.tp.pl.uptutpcl.hs.cH....Thpp...R..spplhs ........................................................................................................ts...s.....h.......Rh.FEsh.A.sGs..h..llo.s.t...........tsh..pp.....h...a..p...s...s.p..........................................c....ll..h.hp............shp....-.......l....h....c....tl....p....h....h..h.....p...s....s....p....p....t.....p..pl....u.....p.pu.h...p.p...l...hp...c.H..........o...a...p...p....R..hpph..t.................................................... 0 190 360 424 +13367 PF13525 YfiO Outer membrane lipoprotein Coggill P pcc Jackhmmer:B8J028 Domain This outer membrane lipoprotein carries a TPR-like region towards its N-terminal. YfiO in E.coli is one of three outer membrane lipoproteins that form a multicomponent YaeT complex in the outer membrane of Gram-negative bacteria that is involved in the targeting and folding of beta-barrel outer membrane proteins. YfiO is the only essential lipoprotein component of the complex. It is required for the proper assembly and/or targeting of outer membrane proteins to the outer membrane. Through its interactions with NlpB it maintains the functional integrity of the YaeT complex. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.31 0.71 -4.85 30 2784 2012-10-11 20:01:04 2010-11-15 13:53:42 1 103 2215 4 696 2342 2289 181.20 26 61.86 CHANGED s-hsspplYppAppuLppGsYtsAlcpLEsL-sR..YPFGsYupQuQL-LIYAaYKssDhstAhAsh-RFlRLpPpHPslDYshYMRGL.......oshpt..........tcsh..........c...RDs......sth.....+pAFp-FppLlp+aPsSpYAsDAppRhhaL+spLAcaELplAcaYh+RpAalAAlNRuphllcsYPsT.ustc.ALslhhpuYcpLuhsp.ttcpp.phL .........................................................sspplY.spA...pp.t.l.p..s.G.s.a...ppAh.pthc.sl..p....sc.....a.Ph.u.s.aup.p.....u........p.l.......L......sa...A...a........Y..c...p........s...-.......hstA......hssh-+....F...l...c.ha..Ps..p........s...p......h.c..Ys...hY..hpGl............spht........................................spsh................................p..RD.....pts.....ptAhpshppll....ppYP...s..S..pY.s.s-App+.hh.hl.ps.....tLuth-...h..t..hu.ca.Yh.............p.p.............t...........t.....a.....h...........A.shtRhp...hhpp..a.ts.....h....c.ul.hh..sa..ht..t..........hh................................................... 0 207 426 568 +13368 PF13526 DUF4125 Protein of unknown function (DUF4125) Coggill P pcc Jackhmmer:B8J0H5 Family \N 25.00 25.00 60.40 59.80 21.40 20.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.18 0.71 -4.77 22 120 2010-11-15 16:02:55 2010-11-15 16:02:55 1 8 113 0 25 111 3 186.40 37 80.57 CHANGED cppllcpIlchEWphFppspstuG+AsCQss.p.......sFclMRtSQahsWspphLpSYLpDLppActtGRNLlsEKYARMMphss.P....c..............s...........hpsh..ls...........cIsp...........h...........pht...........Wp+-htc+YPplspt..ts.hps...pD.......s...p.....sS....FETYLRuELtTYS.+TlpLYhphl..pchtppppNLscpshcphVphhGYcoL-cAEc ...............................t.cpllcpIlctEWs.FppspN.GGRAsCQssh........s.FplMRtSQahsaspplLpSYhpDLppActtGRNLlsEKYupMMp.os.P....c.a...........sts.hpshIp...........cIsp...........hplt...........Wtc-htc.......+...YPplupthRsltosEDs..tp...TShETYLRGELtTYSpcTlpLYtphl..tch..t..pp..phNLstphhtphsph.Gapsl--sEt...................... 0 14 19 22 +13369 PF13527 Acetyltransf_9 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:B8J1N3 Domain This domain catalyses N-acetyltransferase reactions. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.26 0.71 -4.28 105 3978 2012-10-02 22:59:21 2010-11-15 17:08:39 1 19 2411 47 796 9816 1092 126.00 21 48.77 CHANGED clRtls.ps-..hcphhpLhphsFphspsth............p.h.phhpthhc......p...s...p.shshh-.....ssc....lluphthhshplsl..G.phhth.uulssVuohPpaRp....+Gl....hppLhpptLpphcpp.utsluhLhP...hp.h...shY.c+aG.aphs ....................................................................................................t..t-....ht.th....t....p..l....h...p......t....u......F.tsstps.......................................h..t.t.h.p....p..t..h.t..............................s........h...s.....h..s...s..h.c...............psp.............l..l..u..p..l...t.............h...............s..........h.......................h............p...........h.....p.......G.....p.....p......h...........p..........h.....h........s..........l..u...s.....l....u...V..t..Pc..a..+t........pGl....up.pL...l...p...ps....l.c...p....h....p.......c....t.....G.......h......s........h..s......h...l......h...u..........s...s.......s..a..Y..t+aG.ap........................................... 0 278 537 676 +13370 PF13528 Glyco_trans_1_3 Glycosyl transferase family 1 Coggill P pcc Jackhmmmer:B8J061 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.72 0.70 -5.24 39 1194 2012-10-03 16:42:30 2010-11-15 17:29:34 1 27 910 0 454 4822 1304 259.50 16 66.89 CHANGED MKILYGVpG.o...GsGHlsRuRslsctL..+..p..H..-VchlhSG.Rs.t.chh...p..c.F..s.........hpph.........pGlohss.ppu...+lsh.........hcThhps.........thscthtphhp.hl..-hpp..hDLlIoD.FEPl..........o.u..hAA.....+hpslPsluluHQhAh...p.asls.ht....hss...hh...t+hhlpt...auPuphplulpaa........cap.ps.l.hPPllcpplhstp..s.t..p..t..st.lLVYLsh-shc.....pls-hLpt.hs....phpFhlY.Ghs.....p....-tp.......puslp......a+shocpsFhpDLtpCsuVlssuGFpL.uEALpLGK.lLhhPlc.G..QhEQp.NAhhLcpLGhGhs..h.sL-sshltcaLpph.......s ............................................................................................................................................................................................................................................................hh.h.h.h.p.s.....G...G.H....hsRsh.s.ls.p.t..L....p..t............h.........p.l..h...h..h.....s...s....t............phh......t....t...h..........................hh..th...............sh.............tt.t........p.hp................................t.t.h..h..............................t.........h..t.....t..........h.p..hl....pt.t......D.l..l.l.sD...h.....h.................h...h....hhs..............p..h........s.....h.....s....h..l.....h....l..s..p..........h........................................................................................................h...................................................h.....................................................................................h...h....................................................................................................................................l...h....h........hs.......t.t......................th.h.t.h....h...t....h..........................h....h.....h..h....t..............................................tplp......................hh...h....p..........t.h....p....h.....h.....t....t....s....p....h.........hlst.uGhp.....h...Ehhhhsh..h.l.h.h.P..h...........t....-...Q.....p.u.....h.t.th..uh..h............................................................................................................................................................................................................. 0 155 304 384 +13371 PF13529 Peptidase_C39_2 Peptidase_C39 like family Coggill P pcc pdb_3erv Domain \N 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.15 0.71 -3.83 127 2151 2012-10-10 12:56:15 2010-11-15 17:30:24 1 145 1292 1 439 2081 233 142.70 20 40.19 CHANGED lsVPhhtQh......s..p................h.sG..CtsToluMlL.....sah....Gh...s...hstsplApp..h.t..pss................t..tsahss.h.....ts....sh.ushs.tsl.tp.hup.pas........pshshsspsh......splh.ptlps..GpPVlshss..ht..................phsh.....ssHhlllsGYcp......st.......lhlsDPh .............................................................................................................l.hhtQ........t....................................h.ps.CtssShsMlL..............pah.........Gh.........p............ls.t...t.p..l..sp.p..h.h..pts.................................................................hh.tp..............................t....ush.....ts...h...sp.hsp..pas....................................pshs..h..s.st.sh...................ps..l.t..ph.l.s.p.....G.p...PVlh.hh..s......t............................................t.h.st.....ssHhhl.lhG...Y-p................pt...........hhltDP............................................ 0 190 306 388 +13372 PF13530 SCP2_2 Sterol carrier protein domain Coggill P pcc Jackhmmer:B8J1N3 Domain \N 25.00 25.00 25.20 25.30 24.80 24.70 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.20 0.70 -4.96 41 1071 2010-11-15 17:31:58 2010-11-15 17:31:58 1 4 843 39 229 841 26 209.70 22 53.40 CHANGED spsGslsRsptWWcph.h.......t.........tttsc..h...p...hslahstsspspGYlhYchps..........sshpVp-hhussstAtpuLWpFlpuhpshhpplp.h.pp.ss--.sl.h..hLsD.P.cs.sp...........p..l..p..sahhsRllDltpsLpuh.sass...........stslsLpVp.Dshhsh.....NsGpapLph.ssGsssspps.........s...ps.s-lplslssLuslhLGspssspLsthGcl.c.....tps.sAlppl-p.lhss.c.p.Pah.- .............................................................................................t..tGslhRsthhW...p.h.h.......t.........ttt.pp..h.....p.hslhhstst.cs...pG...Y..hh..Ypltp.............pphplpEh.hhhst.cA.ppuLap.Flsu..h.shlp...plp.h.ph.....sc....sL.h..hlp-..s.ch...pp............................p..h......p...shhMsR......IlD.VpthLp...th..sapt......................ptshslclp.Ds.hh.h.....NsGhaplsh...ts.......u......p.......sp.lsct.............................s........ts..sslplslpsLsslhhGhtp....sppLstht+l..p.......ssp..ptlpt.lcp..hhst.t...shh............................................................... 0 85 172 207 +13373 PF13531 SBP_bac_11 Bacterial extracellular solute-binding protein Coggill P pcc Jackhmmer:B8IY91 Family This family includes bacterial extracellular solute-binding proteins. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.52 0.70 -4.73 225 7719 2012-10-03 15:33:52 2010-11-16 11:58:06 1 25 3267 36 1791 14857 3154 235.70 22 78.01 CHANGED lpl..hs..uuuh..ptshp.c.l.sstF.p.p......c....su........h.......p..l..plp....hu..so.....ut..lhp.....pl...........ppG..t..............s.....Dlhh.s..........us...phhpp.L.hpp......Ghst..............s.ttsh..sh..s.p....lslhs.tps.s................s.....................h....s.......h.p........s......h.p.s..........Ltp....s.............................sl.......+.....lu..husP....p.....s..sshGt............huh.phh..p...............ph..........................G..........................lhpplp.sp.hl...h..ssss....................pp.shph..........lt.p.G..p.....s.D...h.ul.s....h....to...ts.......................h...........ss..t.lph........l..l.Pssh...t................................h..thshulh..p....pu....tp........p...sA.psF....hpaL.tS.sp.upsl.h.pchGap .........................................................................................................................................................................................................................................................l.s.sssh...pp...shp...p.l.s.pta.p.p.......c...ss...................lp..l....p.h.s......a.u....u.S......up......hs.p.......pl..............................tpG..t............A.................D.l.hh..s..................................As...t....p.t..h...s.t...l....t.cc........shlt......................................ss.sps.h......ht....s..s...........l.V......l...l...s..pcss................................s....................................................p......s.........l.p.........s..........h..sD.............Lhc....s.............................................sl.............c......ls......h.s...s...P......c............s....s..s.s.Gp...................hu.h..t.h.h.p...................p.h.........................................s.........................................................h..h........p..p..l.t...p.p....ls..........h.....s.s.ss......................................+s..shsh..................lp...p...G....p............u.D......l....h...l....s............a.........p.o....-u....................................h...............tss.....ph.c.l.................V.....h....s..p.....sh.......h.....................................................................sl.....p..h...s.....s....u....l.l...c..........ps.............tpp........p.tA...csa....lc.a.L.h.S.sp.u.p.p.l...h..tchta..................................................................................................................................................................................................................................................................................... 0 472 1088 1474 +13374 PF13532 2OG-FeII_Oxy_2 2OG-Fe(II) oxygenase superfamily Bateman A agb Jackhmmer:A1B8L5 Domain \N 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.47 0.71 -4.48 71 3255 2012-10-10 13:59:34 2010-11-16 16:25:36 1 66 1836 23 1480 2897 465 187.80 22 55.86 CHANGED G.hhhl.sal...s...t...p........p...hhphhpplh....p.pss....h.pp..th...t...Gpt.h...sl..........t.h....h....h....tthuW.........hs..........ct..t..sYcY....sstp......shsspsasshPthlhplhpchst.tt...................hs.s...hsP...sssLlNhYs.s..........uu.phGhHpDc-.Eh.....shssPllSlSLGssshF.........................................................ph....t..........................stp..css.....................spplhLpsGDlllhuGs..u..Rhta.Hulspltp.s.............................................t..................................ht.ssRlNLThR ..........................................................................................................h......................................ht.h.......t.............h..p....h...............h...................................................................................h.th........................hs..........................t....tY..pa......ss..h.................................p...st...s.h.s..s..h.P.t.....h.t.p.lh..pphtt.............................................................s..h.p.....s.ss..LlNhY.p..s....................Gs..pluhH..........p.Dpc..ch............sh.p.ssIs.S.lS.L......Gss.shF................................................................................................................ph......t........................................................................thp...pss....................................................hhplhLp.p.....Gsll.lh.sGp..s....phta..Hul..hpt.s...........................................................................................................................................................htstRlsLTFR.............................................................................. 0 460 838 1193 +13375 PF13533 Biotin_lipoyl_2 Biotin-lipoyl like Coggill P pcc Jackhmmer:B8IZB1 Domain \N 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.24 0.72 -4.30 99 2242 2012-10-02 20:27:15 2010-11-16 16:33:40 1 39 1526 0 592 25272 6066 54.60 29 15.15 CHANGED ps..........lsl.s.spl..uG.tlspl......hVp.-sp.hV+cG-lLhpl-s.sphp........hthpp...............sps..p.......l..t ...............................p.....tl.s.ssl......uG..hl.spl....................................Vc..-..s.Q..hV+KGpl.L.h.p.l.Dp...schp.....................ttlpp............................sptt................................................................................... 0 178 334 468 +13376 PF13534 Fer4_17 4Fe-4S dicluster domain Coggill P pcc Jackhmmer:B8J3I4 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.76 0.72 -3.62 126 4064 2012-10-03 08:56:43 2010-11-16 16:34:27 1 53 2739 54 1193 12425 4526 71.60 29 15.95 CHANGED tChpC..s..h..Css....................Cs......h..............hhh...........hs.t.......t..........sp......p..hhpph.....th..............s....hh.p.p.......h..t.t........p......t.h...ppCstCGh..Cp..htCPt..sls .............................................pClhC.u..t........Css..........s.............CP.....o....................................ahh.....................ss.c..........c...................................ss.......s..l.h.t.t.h...phh.............................................ts..ph.p..t...........h..pp..t.................h........pl....pC...tsshs.........Cs..psCPpGls................................. 0 417 815 1025 +13377 PF13535 ATP-grasp_4 ATP-grasp domain Coggill P pcc Jackhmmer:B8J4T0 Domain This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.16 0.71 -4.55 109 4886 2012-10-10 13:17:03 2010-11-17 14:52:06 1 70 2718 6 1546 35532 13206 172.70 17 35.99 CHANGED th+.cKhth+phhp.ttGls.sst.......thhlpshsshpthhpth........h....PhllKPtpu.su.....S...hslhh...lp...sts-lpsh..hpphttph.................tthllEpals............G...s.asl-uhhh....cGch.hhhsstchhhs.s.ph.....h.hsth....tt.......tthpphspph..lpsh.uhp..pGshHhEhhhssc.G....hhhlElss.Rs.uGsth .......................................................................................h..psKhhhp.c....hhp...p....t..G..l.s...sst.......................................t.t..h...h......s......s......h........p.......p.....h.......t.....p.....h......h...p.p.hs.............................h...........P...l.l.lK.......P......t....s......u...su...................u...........tu.l...hh.................lp...............s.t.p..-....l...p...p.h.....hp..p.hh.tttt........................................................................thl..lE.c.a.ls..............................................G.......pp...h.......s....h........p....s......h..........s...........sG.........c..........h.........h........h........h........s......h.....t.......p..........h........h....h....t.............................h........................h.s................................................ttl.h..p.h.s....t....t.h.........h.p...t...h....s.h....................u...h...h...p...h...-..h..h.....h.........s........t.......p....s.....................h.h.h..Ehs..R....t............................................................................................................................ 0 543 1039 1334 +13378 PF13536 EmrE Multidrug resistance efflux transporter Coggill P pcc Jackhmmer:B8J0H9 Family This is a membrane protein family acting as a multidrug resistance efflux transporter. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.59 0.71 -3.88 52 1585 2012-10-02 19:55:49 2010-11-17 16:30:08 1 10 1267 0 447 11223 5450 114.20 18 37.49 CHANGED AulR...ah..hhhhhhhlllh...h...+t..p...lt..........p....h.hp..th+p...pshh......h....hlhhuhlGhslh...hshhshusph.....u......s.u.hls.s...h.ht..h..sslhsslluhhhh....p.....p...chst+............tllhs.hllhsGl..hllthpp.h.tu...hs ..................................................................h..hp.h.h...h...h..h.h..h.h..h..h..h........h.....pt......p.....h.t..............................t....l....hp......t..h.pp.......p..hh....................h......h..l..h..h..u..h..l.s.h.........ht.....hhhh..h.hA.hth....u...........s..u...hlu...sh.ht..l..ssl.h.ssLluhhhh.............+.........c............+.ls.h.t..................................................ph..l.u.s.hl.hh.h..Gl.hh.hth.......tt.................................. 0 132 261 374 +13379 PF13537 GATase_7 Glutamine amidotransferase domain Coggill P pcc Jackhmmer:Q2LTR9 Domain This domain is a class-II glutamine amidotransferase domain found in a variety of enzymes such as asparagine synthetase and glutamine-fructose-6-phosphate transaminase. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.39 568 6228 2012-10-03 21:14:07 2010-11-17 16:30:36 1 28 3641 10 1948 11436 7426 122.40 33 21.85 CHANGED HpRLul..........ssGpQPhh........t.ssp...hs..llaNGEIYNa....p-LRp.-Lt..s.t..G.....a...p..F..p.o.p.SDTEV.lhthhtt.................aGpssl.......p+LsGMFAFAla..D....p..c..pp....pLhlARDRhGlKPLYYsh........p.......ut....sl....hFuSElKALls .....................................................................tRLul....ss..tt..u.tQPhh....t...............t.st..p..........hsls.......aNGE.......lY.........Nh..........ppLR...p...cLt.....p..p......G.................h......t...F......p.....o......s......S.....D......o.EV....l.l.thhpc...............................................ts....t..csl....................cc..l.........p..G..h..FA.F.s..l.h.D...........p.c.pt......tl.h...h.uR.D.hGl.+.PLahup..................pp........ss.....sh......hhuSEhpAL.................................................................... 0 640 1243 1641 +13380 PF13538 UvrD_C_2 UvrD-like helicase C-terminal domain Coggill P pcc Jackhmmer:B8J482 Domain This domain is found at the C-terminus of a wide variety of helicase enzymes. This domain has a AAA-like structural fold. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.48 0.72 -3.89 132 6202 2012-10-05 12:31:09 2010-11-17 16:43:43 1 104 3952 12 1430 12988 3540 110.90 23 15.99 CHANGED hths-thlhhhsshpp....................tlhsspltph...tpht.........pttttlpt............................................hsasholH+upGuEassVhlstsstt...ht..........htpch....LYTAlTRApptlhll ..........................................................................................................................h...uc.lh.hhNs.th...................................slhNG..slGhh..h.t.h......t...................................t.t..h.ht...................................................t......p...........................hphuaAhTl.HKuQG.SEFst..Vl.l...hh..ssth..........................................................hhp+pL.........lY...TAlTRA+cpLhl............................. 0 456 937 1217 +13381 PF13539 Peptidase_M15_4 D-alanyl-D-alanine carboxypeptidase Coggill P pcc Jackhmmer:B8J0M5 Family This family resembles VanY, Pfam:PF02557, which is part of the peptidase M15 family. 25.00 25.00 25.10 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.91 0.72 -3.53 215 1064 2012-10-02 01:02:30 2010-11-18 11:18:06 1 38 858 0 257 963 223 71.80 28 28.71 CHANGED su.sst..hSpHs..........aGhAl......D..ls........shh.hstp......................................................................thstspp..ls..p.hhtp.........hG....tWGGc.W...............h..h..Dh.HFph ........................................................tts....tp..pStHh..........h..GhAlDls.......t..s.hlt.h.ssp...............................t..............................hphst.h.pp..hs..p..hhpp......hG..h..p....WGGc.W..............................p.....sh...hDt.HFph.................................. 0 109 185 224 +13382 PF13540 RCC1_2 Regulator of chromosome condensation (RCC1) repeat Coggill P pcc Jackhmmer:B8J0U2 Repeat \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.26 0.72 -4.24 320 3569 2012-10-05 17:30:43 2010-11-18 13:23:35 1 413 646 32 2155 12729 3878 29.90 34 6.54 CHANGED l..s..p....lu...s..G.t.tH.....shu.Lp.s....s.Gs.......lhsaG..s..N..s..tGQ .......................hht.lu...s.....G..t..tH....................ohu.Lp.s.......s.Gp..................lhsaG.....p...N....p...hGQ........... 0 959 1559 1984 +13383 PF13541 ChlI Subunit ChlI of Mg-chelatase Coggill P pcc Jackhmmer:Q2LRR9 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.31 0.71 -4.59 173 7224 2012-10-03 01:04:38 2010-11-18 16:46:55 1 17 4260 0 1657 8626 2674 111.30 31 23.71 CHANGED VEVcl..u...sG.L...Ps...aslVGLP-sAV.KES+...-RV+oAlpN......oGacaPsp+ITlNLAPADL+K-GutaDLPIAlG..IL......u..u.....p...t.p...h..s..t..h...........p..p...hh...h..lGELuL-GpLRslpGsLPhsl....tA.p.ctu.h.+p.lllPtpN ........................................................................................................................................................t................sGh...t.......h.......s.......s.....p.....c.....l...h.............lN......l..s...........s.u....s.......l...+.hs.....tsuhDLulAlAll.......ou.....h..p..p....h..s..hs....................p..p..hhhlGEluLsGclR.....sVsu.s.pplt....EA.t.....+....h..G....a.c.p...hllPptN........................ 0 556 1110 1418 +13384 PF13542 HTH_Tnp_ISL3 Helix-turn-helix domain of transposase family ISL3 Coggill P pcc Jackhmmer:Q2LSW8 Domain \N 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.06 0.72 -4.77 134 3356 2012-10-04 14:01:12 2010-11-18 17:14:59 1 11 862 0 381 2084 96 51.40 41 17.15 CHANGED tpss.hlpctsphTpthcp...hlhphhtpp...ohpslActhsluhpTVpclhpchs ..................A-Ts.lV+KNpQIschlsQ...KIAQpLlE+..hSMT-IA+pLulSTSTVhRhLsca.p............... 0 89 236 288 +13385 PF13543 KSR1-SAM SAM like domain present in kinase suppressor RAS 1 Page R kellrott NMR Structure Family \N 27.00 27.00 28.00 27.60 26.20 25.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.65 0.71 -4.35 10 142 2012-10-02 20:42:54 2010-11-20 00:12:54 1 6 79 0 70 133 0 111.40 45 15.00 CHANGED hshlQcMIDISus+LcGLRTQCAs.Ss-LTQQEIRsLEuKLV+aFSc.LlsKp+lsEc.s...AstL.sY.....PcLpQWLcVVGLpscolpulhsplpTL-shLcMs-cEl+plLsc...spsp.EEEp+RLppAhpNLR .................lQphI-lShspLpG.LRTpCuh.....S...ssLTQpEIRsLEuKLV+YhucQL.sKh+.ls.p.....sstL.sa............PpLppWLclVslp.-slptl...........s.p...h..oL-sLLchs-tcl+phlpc.....husp..pEEstRLstALpsLR........................... 0 18 24 45 +13386 PF13544 N_methyl_2 Type IV pilin N-term methylation site GFxxxE Coggill P pcc Jackhmmer:Q2LVK2 Motif This short sequence motif appears a the N-terminus of type IV prokaryotic filamentous adhesins or pilins. The N-terminal residue, which is methylated, is hydrophobic (generally a phenylalanine or a methionine), and this leader peptide is hydrophilic. The fifth residue of the mature sequence is a glutamate which seems to be required for the methylation step. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.49 0.73 -7.55 0.73 -4.25 777 9430 2012-10-03 10:38:27 2010-11-22 10:00:16 1 25 2209 25 2394 7502 2666 27.30 43 14.94 CHANGED M...............h.ppppGFTLlElllulslhull ....................h.................ppppGFTLlElhlVlsIlulL......... 0 899 1563 2033 +13387 PF13545 HTH_Crp_2 Crp-like helix-turn-helix domain Bateman A agb Jackhmmer:A1B3V6 Domain This family represents a crp-like helix-turn-helix domain that is likely to bind DNA. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.15 0.72 -4.19 94 8343 2012-10-04 14:01:12 2010-11-22 11:39:05 1 18 3028 101 2466 7943 935 72.00 22 30.98 CHANGED pR.lApaLlthtschst....................hpls.lopptlAphlGl.pR.ps.lopslppLpppGlIp......h.....p+.s.plplhDhptLpph .................................................RlsphL..l.t.h..s.pp.hst........................................t.......hpls...loppcl...Aph...lGs.o+.po.lsRhLpchpcc.G.lI.p.......h................pp...p.pl.tl.hchptLtt.h...................... 0 769 1663 2072 +13388 PF13546 DDE_5 DDE superfamily endonuclease Bateman A agb Jackhmmer:C0W963 Domain This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.69 0.70 -5.54 14 1337 2012-10-03 01:22:09 2010-11-22 13:29:01 1 10 331 0 429 1483 75 212.80 20 63.11 CHANGED sLpphhspassh....Fspt..cpppthhthltGhlushpR+ol.tluh..hhutpssp....uhpchlspsca-tsplpttltp.shsph....sssptllslDsTshsKp.....Gp+osuVtRpasGshG..Ktpss.sslalh.sspth.pshlstplalP.ptWhps......t.+pphsslsss.hapsK.plAtthlcphhtsu.h.hth..lssDutYu.tcttFlptLpptt.hthlsplpsspshahtsst.ts.......pGRPthhs.h.....hslpssplh....phhspsapphsh+ttsKG. ..................................................................................................h....................h.t...........t..t.h..hh....s.....hh......tp.+s.h..hut......h....ht..t.t.t.....shpph.lspsp...as.t.t.lht...tlht.h..h...h.............ttttt.h....hhlD-osh...Kp......G..h..po.s....sl.s.+...p..a..ssph.G......+.......h....t....p.s.....hs....l....h....hs.......st........t....p...h.l..shp..l.h.h.P...t..thhtt...........................cp.pts...th....p...p.....h..h...t....s..K......t.....lsh.t...lpp.h..ht......t..h..t.h...lluD..uhYu....pt..th...h...t...h.pp..h...s...h.....hhhtlp.psphh..h............................................................................................................t.............................................................................................................. 0 104 270 331 +13389 PF13547 GTA_TIM GTA TIM-barrel-like domain Bateman A agb Jackhmmer:A1B6L4 Domain This domain is found in the gene transfer agent protein. An unusual system of genetic exchange exists in the purple nonsulfur bacterium Rhodobacter capsulatus. DNA transmission is mediated by a small bacteriophage-like particle called the gene transfer agent (GTA) that transfers random 4.5-kb segments of the producing cell's genome to recipient cells, where allelic replacement occurs [1]. The genes involved in this process appear to be found widely in bacteria [2]. According to the SUPERFAMILY database this domain has a TIM barrel fold. 27.00 27.00 27.00 27.10 25.80 26.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.00 0.70 -5.43 66 218 2010-11-22 15:16:03 2010-11-22 15:16:03 1 5 195 0 83 214 30 265.80 48 25.22 CHANGED WuYRRFILHYAtLst...u.....AGG.V-uFlIGSEh+GLTplR............suss..uFPAVstLtsLAu-VRulL..........Gsss+loYAADWSEYaGapss-GsG-haFHLDPLWAcssIDFlGIDsYhPLoDWR-GpsHhDAt.......................thsulaDhsYLpuNlpGGEGYDWaYsos.........tsRsAQhRoPITDushuc.......PWlaRhKDl+sWWsNsH...a...-RsGGVctusPTuWlPpSKPIWFTEhGCsAVDKGsNQPNlFlDPKSSEStlPhaS..........sGtRDDhhQppaLcAhlsaWs......s......sspNPsSslYG....GtMlD.schalWsWDARPaPtFP ..........uYRRFILHhApLsh...t.......AGG..V.DAFlIGSEh+GLTplR............ssts..saPhVstLpsLAu-lRulL..........G.sss+loYAADWSEYauapstsGsG-hhFpLDPLWAcssIDhIGIDsYhPLoDWRDustp.ss........................thts.aDh.shLptsltuGEGa.DWYYsos................tsRtAphRsPITDGhhuc.......PWlaRhKDl+sWWsN.Ha......sRhsGlc.us...........sTuWlPpSKPlWFTEhGCPAVDKGsNQPNlF.DPKSSEsthPhaS..........sGtRsDhhQcpaLcAhhpaWp......s...pNPhSs.lYG....G.Ml-.pchalWsWDARPaPtFP............................................. 0 18 58 68 +13390 PF13548 DUF4126 Domain of unknown function (DUF4126) Coggill P pcc Jackhmmer:Q2LV71 Domain \N 27.00 27.00 27.30 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -10.88 0.71 -4.84 52 380 2010-11-22 16:08:38 2010-11-22 16:08:38 1 3 355 0 157 366 68 174.00 31 86.32 CHANGED slhLuhGLuhAuGlplYlslLhlGl.hut.hGh..lsLPss...h...phLssshslsssulhhllEhhADKIPsVDslhDslpThlRhsAGuhL.sAu..sh....uc...hsP..sh..........p..hshAslhGGuhAussHus+uusRsslNsostsluNslsSssEDssslsh.hlAlhhPllhlll...ll.......lh...l...lhsh..hhlh+lh ......s.lhluhGLuhAuGlRl.a.lslLhhGl.hut..hGh.........l.p.LPss...h........p.hLssshVlsshulhslsEhhADKlPhlDohhDslpThlRhsAGAlluAu...sh.......up.....hsshh.............t..h.hAslsGGsh.Asss+hs+u.usRshlssostshuNh..lsShsEDshsluhhhlAhhhPllhhll.hl.......hh....l.......hhsh.hhh.p..h....................................................... 0 44 105 139 +13391 PF13549 ATP-grasp_5 ATP-grasp domain Coggill P pcc Jackhmmer:Q2LV27 Domain This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.14 0.70 -5.13 220 2119 2012-10-10 13:17:03 2010-11-22 16:09:47 1 27 1555 1 765 4111 1934 216.30 36 28.42 CHANGED pAlsc..u..Rsh.LsphEuptlLsAYGIsss.tohl...Ap.ospEAsthAppl...G.aP.VslKlhS.....Pc...Is.........H.........KoDlG...GVtLsLp.ssppVcpAhpplhppl..ppth.........P.s..........................Ap.l.pGlh..VQpM...sp......tupElllGsspDPhFGPllhFGh.GGshVElhpDt.........uhsLP.P..........Lsh.slAcc.h.l.pps+st.plLp.G..hRs..p.P...sD.........hsALtphLl+l....SpLls.....D.h.Pc........ItElDINPLls....ss....p........G..shAlDARltl ..................................................tt.tt...Ls..ptEsp.s.lLpuYG...lssh.sshl............As.ss.sEAlp..hAc.pl..........G..a.P.....V.s.lKlhS..........Pc.......Is.........H.......................KS.-...l.s.....GV.hL.......s..Lp...sspc.Vp...p.Ah.ps.l...h...s....p....s....phh.h........P.p.......................................Ap....l..c....G.lL....V.Q.pM.....ss.........sutE..Lhlu..l..p..c.D..P...s...F..G..P..l..l..h.h..G.....GGl.h.s-.h.h..c.Ds............s.h.t.L.s.P............Lsh....s...A.c...t....h...l....pp..l..+..st.......c.....l...l..t..u.....h+.sp...s....lD................l.s.u.LsplL.l.p.l....Sp.Lls.......-.t..P-...........I..p..c..lDINPLl..s..ss.......s.........t....hsAlDspl.......................................................................................................... 0 255 518 663 +13392 PF13550 Phage-tail_3 Putative phage tail protein Bateman A agb Jackhmmer:A1B6L4 Domain This putative domain is found in the large gene transfer agent protein. These produce defective phage like particles. This domain is similar to other phage-tail protein families. 21.80 21.80 21.80 22.00 21.40 21.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.60 0.71 -4.47 106 1924 2012-10-01 22:58:23 2010-11-22 16:42:01 1 32 968 0 203 1978 371 159.10 38 15.89 CHANGED sp..pss+sslpsLhpsashsshpps.uplph.htc..s...................s.ps.s..hsl.stsslstssp..t.........t......hphs....Rssps....-hssplplpah-ss...ssYpsss...sp.utp......s...s....sts.....tthsshphshshs.....pspApplupphLtpstsspcs.hpasls.pt.ht......lp..PGDllp.ls...s.................s...p....st..thRlspl...c....p ....................................................................pppcAa-VLsDhsushR.shs..lWsG.p...s...Lsh..lpD..t...................s.-h...V...asa..spusVVhs-p.t...............FcYo...ho.uh.+....DR.asuVcVsa.....hDPs.....Nuapsus.......EhVcD.........s...p......ultR..............hGcNhtch-A.aG.CTS........RGQA+....RsGhWllcTphhEspT.VsFolGhcG.lp......hs..P.GDlIc.lsDst..............huG..h....thuGRlhul..s..................................... 1 37 114 158 +13393 PF13551 HTH_29 Winged helix-turn helix Coggill P pcc Jackhmmer:Q2LS51 Domain This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.55 0.71 -3.92 85 3523 2012-10-04 14:01:12 2010-11-22 16:50:17 1 49 1390 0 1218 5925 803 103.40 18 36.88 CHANGED RtpslhhhspGh......tssphuchhul.sppslh+hhcpap.ptG...hsu...Lhs...ptp..tspspp....lsstpcp....tllp.hsp.ppssp....G........shhohphltp.l...........hshplStpslpphlp+.tu ...........................................hlhh.h.tp.uh...........hpph..uchls..l...upp.ol.h+ah......p.p.ap..ptG.........hpu.................Lhs......pt+......ss.p.spp......hstp...hpp..........tl.h.p..hhp.pps.t.....s...................ph.o.h..p.hl..tp.h.....................s..h.t..l..u.....ps..l.tphhht............................................................................. 0 289 753 1056 +13394 PF13552 DUF4127 Protein of unknown function (DUF4127) Bateman A agb Jackhmmer:C0WER5 Family This family of uncharacterised bacterial proteins are about 500 amino acids in length. 27.00 27.00 46.40 46.20 17.60 17.30 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.60 0.70 -6.10 50 186 2010-11-22 16:53:42 2010-11-22 16:53:42 1 4 127 0 59 185 2 426.60 27 92.87 CHANGED +llalPLDsRPsshpash.luphs.shpllsPPpchLushppsucs-tLtpWLppp.s..ss-shllSlDtLlYGGLlsSRppphshcpshpRLphL+pl+ppsPshpIYAFslIhRsst.s..sspp-PtYa...tpYGtplaphuh.......hhD+.t....hs.ppttph..tplpttlPtchlpDahs+RpcNhplNtthlcLsc.cGllcaLslspDDss.auhsshEpcpLtphhpphsl.tp+...............lhhaPGADElGhsLluRshsphtt.p....PplhlhYsssputphls.YEspslpcolpp+lpuuGuhhsss.ppADhlLhVsssstttt......tt...........pshpphsppIpphlsp.GcsVulADlAasNGuDppLlphL.tctshltcLtuYuGWNTuuNolGoulApuhlthth...............stpsphphLhtRhl-DahYQusVRsplp..cphtt.....shshhslscpptt.............hpphhpp.......hhpphhtpphshphhthph..............phPWsRhFElslpl ..................llalPlDsRPsshphsh.huphs.shpllsP....Ppphluth..........pps.............ussptlhpWl.pp.ht.pscshllShDhLlYGGLlsSRhpph........s.pph..........t+h.phlcpl+ptpsph.lYsFshlhRs...s..s....sst.-s...aa.tas...tlh.phuh.......l.c+.t.....t.ht.t-ttph...ttlpttlP..phlpcaht+RppNhtlNpthlp.hsp..pGhhshL..slspDDss..hu.ssh-...tptLtthhpp......ht....pc................hhhhsGADEluhhLlsRhhsch...t.h....PplhlhYs.stutthls.YEs.t...sltpolptpltusGuhhsp..tpuDh.lLhlpss.......................................................tshtthsptlpphltp.sh.lulsDlu...hsNG...u-ttLhthL..pts....h.......l.cltuYuGWNTsuNolGhslupuhlhhhh........................s.ttphphLh.chl-Dah...Ypu.lRpplt.pth.t......t.s.hth.t.pttt.t.............hpthhpp......................................hpphhtt....hh...h........h..ph.......................phPWpRhFEhphp......................................................................... 0 21 38 55 +13395 PF13553 FIIND Function to find Weichenberger CX, D'Osualdo A kellrott Joint Center of Structural Genomics (JCSG) Family The function to find (FIIND) was initially discovered in two proteins, NLRP1 (aka NALP1, CARD7, NAC, DEFCAP) and CARD8 (aka TUCAN, Cardinal)\ [1]. NLRP1 is a member of the Nod-like receptor (NLR) protein superfamily and is involved in apoptosis and inflammation. To date, it is the only NLR protein known to have a FIIND domain. The FIIND\ domain is also present in the CARD8 protein where, like in NLRP1, it is followed by a C-terminal CARD domain. Both proteins are described\ to form an "inflammasome", a macro-molecular complex able to process caspase 1 and activate pro-IL1beta [2]. The FIIND domain is present\ in only a very small subset of the kingdom of life, comprising primates, rodents (mouse, rat), carnivores (dog) and a few more,\ such as horse. The function of this domain is yet to be determined. Publications describing the newly discovered NLRP1 protein failed to identify it as a separate domain; for example, it was taken as part of the adjacent leucine rich repeat domain (LRR) [3]. Upon discovery of CARD8 it was noted that the N-terminal region shared significant sequence identity with an undescribed region in NLRP1 [1]. Before getting its final name, FIIND [4], this domain was termed NALP1-associated domain (NAD) [5]. 27.00 27.00 28.40 28.10 24.30 24.10 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.61 0.70 -4.95 7 140 2010-11-22 18:21:59 2010-11-22 18:21:59 1 31 32 0 69 160 0 224.30 41 24.92 CHANGED RV+hPsAGoYphPsTGLtFlVpcsVTl-IcFCu..WsQaLc.ch..pcsWhVuGPLFDIKAEP.GAVsulaLPHFluLpsGcVDs.ShFpVAHFK-cGMlLEpPsRVE.aasVLEsPSFSPhGlLLRhlsushh.lPlTSssLlYa+l.a.E-lsFHLYLlPsDsslpKAIDDc..EhKFpFVRIpKPPPlssL..hGuRYlVSGS..spLEIhPcELELsYRSPGE.QlFSEhYlGphtssI+Lplp-K+atsLVWcALlKPsDL ......................................................................VphPsAGpYphssTGLtFVVppsVTlcIpFss..Wspa.Ls.ch...ppsWhVuGPLFDIpA.Es..sAVsslaLPHFlsL.p.s..u.cVDs.ShFpVAHFp-cG.MlLEpPu+VcsaasVL-sPSFS.hGlLL.+.hhpushh.lPlsShsLlY.a+h.......p.--l..p.hHLYLlPsDsslpKAID-c.......E.+F.phl+lppsPPhp.s.L..hGs.cYhVSss.......t.lc.lhP.p.E.Lc..LsY+Sstc.QhFochahtphtp.IpLplppp.....pp.sllWcs.l+ss........................................................ 0 20 29 43 +13396 PF13554 DUF4128 Bacteriophage related domain of unknown function Wahab A, Serrano P, Geralt M, Wuthrich K kellrott Bordetella bronchiseptica RB50 PDB:2L25 Family The three-dimesnional structure of NP_888769.1 (PDB:2L25) reveals\ a tail terminator protein gpU fold, which suggests that the protein could have a bacteriophage origin. 27.00 27.00 27.00 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.35 0.71 -4.47 28 150 2010-11-22 18:52:35 2010-11-22 18:52:35 1 2 141 1 34 122 78 121.30 22 89.05 CHANGED tIppulct+lsphs...tt...sl.lAa.NlsFssssut....sYLpsthhPusTpshsLutct.phhpGlhQlsVlhPsGpGsstspplAspltstFtsspplppsshtlhlppsPphussIssssphplPVolpY ................Ipthl.s+ls.s.hs.....tht..shs.lsa.Ns...s...........Fssssst....halphphhsusotshslu..t..ss...h..hhp..GlhplslhsPsGsGssthppl.AcpLtphFp.....th...s.....h.t..........t..............h.hshph...................................... 0 3 18 26 +13397 PF13555 AAA_29 P-loop containing region of AAA domain Coggill P pcc Jackhmmer:C2M2Q6 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.25 0.72 -4.44 68 804 2012-10-05 12:31:09 2010-11-23 10:58:31 1 6 755 0 192 5015 2079 55.80 31 5.90 CHANGED LpclpllNWGoFcs.cshs..ls.tG...s..sLloGssGSGKSTllDAlpslL..sPsp..t.h..tFNpA.A.sss ......................................apt...p.hs.....h...s...p...t..G.......h...hlIoGs.oGoGKSTllDAlp.hh..L....hsps.................t......................... 0 60 123 166 +13398 PF13556 HTH_30 PucR C-terminal helix-turn-helix domain Bateman A agb Jackhmmer:D3Q4F0 Domain This helix-turn-helix domain is often found at the C-terminus of PucR-like transcriptional regulators such as Swiss:O32138 and is likely to be DNA-binding. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.48 0.72 -4.38 456 5435 2012-10-04 14:01:12 2010-11-23 15:02:24 1 29 2226 4 1399 4584 150 58.40 30 14.07 CHANGED L.lcTLcsal.pss.tshspsAppLalHpNTlpYRLc+lpclhG.hc.lp...s.sps....th.pLtlAlpl ..........................LhcTLpsa..h..pps..sshspsAcpLalH+NTlpYRL..c+lpcls..G.hs.lp....s..hcs....th...lhlAl.......................... 0 544 1037 1249 +13399 PF13557 Phenol_MetA_deg Putative MetA-pathway of phenol degradation Coggill P pcc Jackhmmer:Q2LTI1 Family \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.95 0.70 -4.57 154 1206 2012-10-03 17:14:37 2010-11-23 16:08:22 1 10 655 0 439 1221 272 237.20 14 76.57 CHANGED uspssssp.....Gppls........shchph...pssh.phhhhp.s.p....h.hss..thuhthh..l.s.hh...phph.phs......t..........hpss..t.....G.huDhhlushhhhatp.t..........shph.......shthtlthPsGsasts.ps......s...........shhshssphuhs...hs.s.thslssshthsh...tscsss....................phptt.sthphshshsh..t...h.st.....ph.ssGltua.hh..............t..s..........ts..sctpshslGsulsahhsts....hplshphtpph....sspsthtussh.hh+h ..............................................................................................................thth..........h...th.h...............tt.....phsh.phh....l....hh.......phph..ths........t...........................................http..ts................G.hGDltlushhhhhptst...................shth..............................shthtlphP.T..Gshsts.p...........................ut................shash.ssthuhs......ht.......s.....ths.lssph.thth..............ttcsss.....................phptu.st.h...phshshsa.......t.l..ss..........ph..ph...ulssh...hh.........s..................p..s.t...........ss....sctpsht..lss..uhtathstt..........hplphphhhsh........sspsh..t..h......................................................................................................... 0 137 262 362 +13400 PF13558 SbcCD_C Putative exonuclease SbcCD, C subunit Coggill P pcc Jackhmmer:C2M2Q6 Domain Possible exonuclease SbcCD, C subunit, on AAA proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.25 0.72 -4.07 152 3904 2012-10-05 12:31:09 2010-11-23 16:14:37 1 21 2832 14 954 3276 186 85.50 31 7.53 CHANGED pphhDhRsa.hs.aplphp....pts.........u.ppht...h...hpshsshSGGEpthhhhlsLtAAluthh..st...........tss......hchlhLDEAFuphDscphppshp.hhpp ........................................h...........t.hplph.......p..................s...tp......h..........h+pstsLSGGEs.................hhsuLsLu..LuLushh..pp............shs.................hchLFLDEuFusLDs-olcssh-sLc.t.......................... 0 313 591 808 +13401 PF13559 DUF4129 Domain of unknown function (DUF4129) Bateman A agb Jackhmmer:D3PZP2 Domain This presumed domain is found at the C-terminus of proteins that contain a transglutaminase core domain. The function of this domain is unknown. The domain has a conserved TXXE motif. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.43 0.72 -4.01 194 1189 2010-11-23 16:26:06 2010-11-23 16:26:06 1 12 838 0 462 1144 159 72.80 18 14.34 CHANGED ptapthhp.hhtpt.ulspp...s...upTspEh....spcs....sttt.........t..lpp..........lsphacps+Y.....Gscs.sstp.ph......pphtpthcpl ..........................hatthhc.hhtph.ulshp........sucTspEh...spcs..............................spths.........ssshpp................lsphappspY............upps..sstt..ph.........pphhphht............................... 0 148 321 418 +13402 PF13560 HTH_31 Helix-turn-helix domain Bateman A agb Jackhmmer:D3Q8V6 Domain This domain is a helix-turn-helix domain that probably binds to DNA. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.90 0.72 -3.77 149 10133 2012-10-04 14:01:12 2010-11-23 16:28:26 1 174 2365 13 3830 31197 3989 66.80 24 29.09 CHANGED pLGttL+p.hRp..ps..........................G.l.otp.plApths........h.Sts...plsclEp...Gcp...s.........shs....hlptls.ch.hs..s..s...........ss.....th.p..tL ........................................................................................hGttlRp.hR.p....p.t...........................G..l..opp...pl.A.p.t.sG................l..S....h.s.....hl.sp.lEp.......Gc.p.....s..............................oh.p..........hlt..t...ls....ps.Lt....l..s.......................hh........................................................................... 0 1127 2704 3367 +13403 PF13561 adh_short_C2 Enoyl-(Acyl carrier protein) reductase Coggill P pcc Jackhmmer:C2M4X7 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.50 0.70 -4.60 47 22490 2012-10-10 17:06:42 2010-11-23 16:43:09 1 80 4615 768 7643 72809 33208 228.60 25 81.38 CHANGED Gshs-pSIuWtlActht-pGApllloshs......................t...............................hh...stlp.plucph..s.....sc.....l.......l..shDss..st..-c...................lpplhpplpcph.......GplDhllHSIuhusp.t...htpshh-.s.shctahpuhslSuhSahslhpthh...hhscs......GSlluLoYlAupRs.hPsYs.sMusAKAuLEShsRsh.AhchGc.+slRVNsIStuPht.TpA.upuIss........h-p......hhpaupphuPLsp..s.su--lAsssshLhS-LsptlTu.pslalD..GGhs ...........................................................................................................................................................................us.....pGIG.h..u.h.A.....p...t...h...s....p.....t....G.....A...p......l...s...h...s...h.s.t.......................p................................................................................t..........t........h...p.....p...h.....h...p....p...h...s...............tp..........s..........................h..ts..D..l..s....st...pp.......................................................l..p..p...h...h..p...p....h...h...p..p.h.............................G..p..l...D..h....l.....V......s...........s.......A.....G............h............s...............................................s...s....h........h.............c............h.......s..............t...........c........t.............a.........p.............p...............s........h.....s.......l..........s...........l.......h..............u............h.....h.......h.....h....s.....p....t....s.hs..............hh.....p...p...s.................................G..u........I......l........s.......l.........o............S..........h...........s...........u............h............p............s..........h...........s..........s...........h..........s.........s...............Y.......u......s.........u......K.......A.......A......l.......t.............u.........h............s...+........s....l....A...h....-....h...........u.........s.................p.........G.....I.....R...V..N......u.....l.......u.........P.......G.........s..........l..........p......T..........s........h........h.....p....s.....h....h..t..................................h.p.p....................h...h...p.....t....h......t.......p..........p.......s.........P...........l.............t........R......h...........u......p.......s.........c.....-..l.......A..s..s...s.h..F...L.s.S....-.t.u..u....a...l.T.G.p...s.l.hV..D..GGh....................................................................................................................................................... 0 2101 4453 6315 +13404 PF13562 NTP_transf_4 Sugar nucleotidyl transferase Coggill P pcc Jackhmmer:C2M1K0 Domain This is a probable sugar nucleotidyl transferase family. 27.00 27.00 32.60 31.80 22.90 24.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.88 0.71 -4.58 35 114 2010-11-23 16:46:08 2010-11-23 16:46:08 1 3 114 0 61 123 80 160.00 28 39.69 CHANGED phlLF--st.tppLhPhTaTRPVu-lRlGIhTl+EKWpphL.s.......ssp.s.ohhsccYL........pp+aP........sph.p.t-...s..........lhlNuphlPsc.tlsphIps..L....c.Gp..ul.hps-.....cllAhc....................h..cp.....schs.............sh.................ttlp....htpshhhlcpsWDlFshNsptlppDach.lo..p.GRsS .....phlLFDsss.tppLhPhTaTRPVu-lRlGIhTl+EKWpphL.s.......s.ssohlTcsYL........ppKaPh.............sp..t..tp...s..........lhlNushlP.sp..tlhphlpp......L....p.sp...Alhts-........pllAhhhp...............tpp.ss.hs..............ph.................phlp....htpshhhlcpsWDlFshNsptlptDaph.lTpsR.............................. 0 29 48 58 +13405 PF13563 2_5_RNA_ligase2 2'-5' RNA ligase superfamily Bateman A agb Jackhmmer:D3Q0T5 Family This family contains proteins related to Pfam:PF02834. These proteins are likely to be enzymes, but they may not share the RNA ligase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.15 0.71 -4.50 139 2076 2012-10-03 21:31:48 2010-11-24 11:46:27 1 31 1509 1 702 2552 184 152.70 17 68.99 CHANGED sssshttt..lppl+.................thhstt...htths......sHlTLhhshhssp...........................spltptlpphht....................................................phps...Fp.lpl..sshstF.........tst.slalps.........................pstppltpL+pplhps......h.th...h..tt.............................tsapPHlT.....lupthsstsh...................................ptlhptl.......................tth.shp....hpls..plsLhphpsss ...........................................................................h..............h.........htt........HlT.L..th.h..t.c.h.s.pt..........................spltp.t.l.pph.ht....................................................ph.t...s........hp....l.pl....sth.us.Ft...........tss..sla.htl..........................................tssspLtpLpp.....plppt......htth........sh....................................psatPHlT.....lAp.t..h.s.spph...............................tphht.l........................t......hp......h....lp..phtLh......t............................................................................... 0 219 478 611 +13406 PF13564 DoxX_2 DoxX-like family Bateman A agb Jackhmmer:D3Q9W4 Family This family of uncharacterised proteins are related to DoxX Pfam:PF07681. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.47 0.72 -4.05 115 1334 2012-10-02 13:32:46 2010-11-24 11:51:27 1 7 742 0 518 1577 778 103.00 19 78.74 CHANGED llsslluhhhhh.uush.+l.h...ps......t..h.....h.....t.....shs.........p.h..G...a..P.tth.hhh.lGshclhGuluLl.....l.s..h.h....h..s...hls.hAAsGlshhhlGAhhsH..l..pts-s..t......t.....hshsl..shh.sl..shus .............................hthlluhhh.hh.suhh.+l....h...ts................p..h.......h.....p.....ths.................p.h..G.....h....P...th..h....h..h..h..l..G..s....h..ElhuuluLlh.s....h...............hlshhA.uhhl.s.hh......h.l.u....A.h.h.s.H.....h.....tstc.....h............t...........hhhsh..hhh.hh....h......................................... 0 202 354 442 +13407 PF13565 HTH_32 Homeodomain-like domain Coggill P pcc Jackhmmer:C2M820 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -10.26 0.72 -3.18 486 664 2012-10-04 14:01:12 2010-11-24 14:43:25 1 19 411 0 337 3654 468 74.30 20 27.49 CHANGED Tl...t+aht+...a....pp...tG....ts...........t....tp....sups.....hp......sph..ptlhphht.....pp.................hospplsph.ltp....t............hS.tsolt.R.hL ................................................................oh.+Whp+..a..............pp....tG....tul.s...................h..t.tc....sG+s......pp......hssch..ptlhph.hp.....pps.....................phoscplspp.Ltp......th.h........ShsTlh.+hL........................................ 0 104 210 313 +13408 PF13566 DUF4130 Domain of unknown function (DUF4130 Coggill P pcc Jackhmmer:C2M6X4 Domain \N 27.00 27.00 27.70 28.50 24.80 24.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.07 0.71 -4.68 156 594 2010-11-24 15:37:12 2010-11-24 15:37:12 1 4 583 0 199 587 26 164.90 28 52.22 CHANGED cpsp+hshlachlhhsh.phsp..plhpthucssVhp.ltphs+pVp+EtH+hcuFlRFpcl....ps.........shahAhhEPcaslLshlusHFscRassppWhIhD.c+thusha...cs..pph.phh....s.hs........p.....th...........ph...........s.ppEctappLW+sYacolsI.tRhN.+....hppppMPp+YWKtLsE ...................................t...p+hshlachlhps..h.p..tsp...plhpphuDs..sVhp......lpphsKpVp+EtH+hc.u.F.lRFpch...ts....................shahAhhEPcaslLshhusHFtcRassppWhIhD.p+thuh..ha..-s....pp.lphh....p.hs........s....t.........t...ph...........................s.ppEctappLW+sYapsltl.tRhN.+....hhpppMPp+YW+.LsE.................. 0 59 132 167 +13409 PF13567 DUF4131 Domain of unknown function (DUF4131) Coggill P pcc Jackhmmer:C2M1S0 Domain This domain is frequently found to the N-terminus of the Competence domain, Pfam:PF03772. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.92 0.71 -4.86 202 2454 2010-11-24 15:48:31 2010-11-24 15:48:31 1 9 2425 0 598 2213 149 162.40 14 22.34 CHANGED lhGl....lhs.........hh.hhshhhhhlhhhhhhhhhh...hhh..........................................t.thth.........hhhhhhhhhhsshhhthtthphtt......pphsphht................tpthh.....lpG.hl.....tstPphp........spth.....pahlphp............htsttpt.hss+lhlth.ppptt...............hphG..-tlpl...puplptPpsstNP..utFDYppYL.tppsI..huphhspshphh .......................................................................................................h....................h.....hhh...h.h.h..hhhhhhhh.hh.......................................................................................hh.....................hhhhhh.h.hh.h.hh.h.h..h...t..h..t..t..thtt.........pths.t.ht...........................h.........hps..hl................ss.h......p.hs........................................spth.........phhht.........................................tstpth.h.hcl.h..l.t..hptpt...................................hpsG...pphph...pu...c..lc.sputtN...........uuFD..hp..t..ah.ht.pslhthh.ht.....h.................................... 0 189 382 494 +13410 PF13568 OMP_b-brl_2 Outer membrane protein beta-barrel domain Coggill P pcc Jackhmmer:C2M356 Domain This domain is found in a wide range of outer membrane proteins. This domain assumes a membrane bound beta-barrel fold. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.24 0.71 -4.47 110 1717 2012-10-03 17:14:37 2010-11-24 16:11:55 1 14 223 0 538 2059 496 183.40 15 70.26 CHANGED tAQ......p..................hp..hGhpsG..h..s.h.....sshph..................p...t...p........ph...sh.............ps.uhs..sGh..hs.chth...s..p....t....h..u....l..p..stltasppshph..ptp.....................t...........p...............ttphphphshlpl.Plh..h...paph...sp..........hphtlhsGsthuhhls.ppt.........p...tt...............t...............................t.ph...s..t....tph...pp..hshulthG..huhp....h..h...........p....hh...l...phcapa.GLsshhp ....................................................................................t..........................hphuhpsG..h..s..h.......ssht...........................................p...................p...sh..................ph....uhp....hGh...hs..ch.th.........s..p.....p......h...u....lp.....sslh..as.pp.u.h.ph.ptp.......................................p.......................................................ttphph.p..hs.al.p.l...Plh...h...paph....sp...............hphhltuG.s.hh.uhhls.sph.....................p.......hp.............t.........................................................tph..s...p......tph...p.p....hsh..u..l..s..hG....hGhp....h..t..................p.......hh.......l..phpaph..Ghss.................................................................................................. 0 277 479 526 +13411 PF13569 DUF4132 Domain of unknown function (DUF4132) Coggill P pcc Jackhmmer:C2M366 Domain This domain might be involved in the biosynthesis of the molybdopterin cofactor in E.coli. 27.00 27.00 37.40 37.40 25.00 24.50 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.03 0.71 -4.75 83 961 2010-11-24 17:18:36 2010-11-24 17:18:36 1 16 526 0 149 754 8 181.60 41 17.24 CHANGED G.KplKolPtsh+c-........tshpc....hpsh+K...plcphhstptpRLEpuhhss+pWssppa.pp.hhs.HPlhpplsccLlW...h......sss.......t.tshthh...cD..ss.........lsshcscth......p.Lsss.s.t....lpl..sHPlclst.tslssWpchht-hclhQPFcQltRc..lahlTss.Eps..tspspRau.Gthlps.t.ph.hu.hlpp+GWp ............................spRhKslP+.ptsDDp...pAs-AlschKtLKK...Dscplupp...plsRLEsAhpppRRWShtsFpthhVpHPlsRhlTpRLIW.GlY.....s.pp....................pLlssFRVA....p-..ss....................assAp.D-.h......s.LPss.s.......IGI..sHsLElos...pp......tutFuQlFADYElhPPF+QL...sR..p..o.ahLTt...........s..Eps..usp..LsRWtG+.psss.Gpl.hG..hpt+GW................................... 0 46 88 102 +13412 PF13570 PQQ_3 YWTD; PQQ-like domain Coggill P, Eberhardt R pcc Jackhmmer:C2M3Z1 Repeat \N 22.00 18.00 22.00 18.00 21.90 17.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -8.19 0.72 -3.55 1244 1943 2012-10-05 17:30:43 2010-11-24 18:25:17 1 236 866 9 946 6594 2060 41.50 27 13.21 CHANGED G.phh.....Wph...........ph...s..s...th.h...sss....................sh..s.....s...s....h.....lalsst...............suplh.u.l..-....sp..s ............................................Gphh..Wph.........................ps.......s......u.......ts.h.....usP........................sl...u.........s....G..............h.........Valsot............................sGplh.A.l..st.......................... 0 381 735 860 +13413 PF13571 DUF4133 Domain of unknown function (DUF4133) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0094 Family Based on Bacteroides thetaiotaomicron gene BT_0094, a putative uncharacterized protein as seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or vs when in culture [1][2]. 25.20 25.20 25.30 26.00 25.00 25.10 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.08 0.72 -3.76 26 267 2010-11-24 18:36:44 2010-11-24 18:36:44 1 1 110 0 31 216 10 94.30 60 85.71 CHANGED INKGIG+sVEFKGLKuQYLFlFAGGLLAlFllaVlLYMsGVsQalCluFGssouolLVWtTF+LNpKYGEHGLMKhhAt+pHPRYllsR+plh+Lh ..INKGIGRsVEFKG.LKA.QYLFlFAGGLLAlFlLh.VILY.MsG.lsQalCIuFGssuuolLVWtTFpLNt+YGpHGLMKhuAt+pHPRYllNR.+plh+Lh....................... 0 11 29 31 +13414 PF13572 DUF4134 Domain of unknown function (DUF4134) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0095 Family Based on Bacteroides thetaiotaomicron gene BT_0095, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or vs when in culture [1][2]. 22.30 22.30 22.40 22.40 21.60 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.11 0.72 -3.88 27 454 2010-11-24 19:06:34 2010-11-24 19:06:34 1 3 132 0 47 312 15 94.60 48 88.66 CHANGED pKthlhshhhlhshss..thApssuuuGhst...Ass.lsoYhsssspLhYAIuAVlullGulpVY.KhssGDpDlsKshhuhhGAClFLlssupll.uFF ......................p..thhh.hhhhhhsssu...sshAQG....sG.AGIsc......ATphVoSYFDPuTKLhYAIGAVVGLIGGlKVYsKaooGD.sDsoKoAuSWFGACIFLIVAATlL+SFF....... 0 16 42 47 +13415 PF13573 SprB PbH1; SprB repeat Coggill P, Eberhardt R pcc Jackhmmer:C2M2H5 Repeat This repeat occurs several times in SprB, a cell surface protein involved in gliding motility in the bacterium Flavobacterium johnsoniae [1] 26.00 20.00 26.00 20.00 25.80 19.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.70 0.72 -4.35 113 1000 2012-10-03 16:25:20 2010-11-25 11:10:41 1 88 62 0 782 1180 555 36.20 31 8.65 CHANGED stssssloCtG....sssGolsl.ss.oGG....o..ssYoYp.Wssuss ......................ssso.C.u...........sss.G.s..l...s.l...s...s...o...G.G............o.....us....Y.oYs.assst............... 0 535 718 782 +13416 PF13574 Reprolysin_2 Metallo-peptidase family M12B Reprolysin-like Coggill P pcc Jackhmmer:C2M7W3 Domain This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.58 0.71 -4.21 49 431 2012-10-03 04:41:15 2010-11-25 14:00:08 1 33 240 11 189 3218 220 181.20 21 27.86 CHANGED hspshsthssshsplNtlYE.............c.-hu..lph.....sL.l..s.............s.spll.hhssso.....csass...............................sssshhspt.pshhssh.lGpp........sYDlGalh..ssh..ssuts...GluhlG..................slCs..ss.......pps..............oG........hs...........sPhGssh.........thch...lAHEhGHpFGusHohs...............s.ssp..t....o.u..ss..........EsuuGsoIM.u.Yu.....uh...sssp..............sl.s.p ...................................................................................................................................p....................................................................................................................................................................................................................ht..........tt.t.....................sh.s...h.....s.hlh..............tsh......ss.....u....sh..............Gl.Aalu..........................................ulCp..ps........pth....................................s.G............hs............pshssth.................uh.tT...huHEl..G...........HshGusHsss......................................................s..p.sp....s.....s..s..ts...................ps.tp.G....s.h.IM..s..ht..........sp....s.s..p.........F..S.s..o.ht.h......t.............................................................................. 0 79 103 149 +13417 PF13575 DUF4135 Domain of unknown function (DUF4135) Bateman A agb Jackhmmer:D3Q4V7 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 380 amino acids in length. The family is found in association with Pfam:PF05147. This domain may be involved in synthesis of a lantibiotic compound. 27.00 27.00 27.50 37.00 26.20 25.70 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.23 0.70 -5.46 79 396 2010-11-26 09:48:42 2010-11-26 09:48:42 1 9 292 0 97 356 22 354.20 24 39.27 CHANGED YPVLsRhlspphpp.ahptht-lhp+lt.pD...................hppltpp....hhtsp.......sp..lsslph..u.h.uDsHssG+sVhhl..pFs....sG..h+lVYKPRs.Lpl-ttap..........phhpalspp....s..t....h..h.hh.chls.ct.....YGWtEalptpsCp................spp-lpcaYpRhGhlLulhalLsuoDhHaENlIA.......t.hPlllDlETl..h........p..s.........................p.h.....t...s....s...s..h.s.tp.t..p.......................ulhpsG...ll..s.t............h.h.....h.....s..s....t.p..u....................h..sls.hh..s.s......s...ps..hp...h.......hp........t......h..s......h.t...pts..........scsh....lttt.ttph.......................t...ss..tsYh.......ppllpGFppsY.phl..hpp.+p..c.l...hp...hlhp...FpssplRhlhRsTphYuplLp....tuhHPc.....h....hps.cpphhh.phh.......htpp.tttplltsEhpsLhpsDIPhFtsps ................................................YPhLh+hlspthtphhphhhcllppltpD...................hstlppp.............h.htp.........sp..lsslph.s..h....GDsHstG+oVhhlpFs.......su....+llYKP+slthcpthp...................................................plhphlspp....s..t.....hp..lhh.......chls....pss..............YuatEalpttssp............................................stcclpcaYh+hGhLlulhallsuoDlHaENlIu......pGphPllIDhETlhp..s.............t..h.......s....s.s..t...stt..t.t.t..................lhpSVhpoGlLPh.......h...h....t...p...pp..u..............hDlSuh.ut....p....ts.....pp...sh.p..h.h.l..hs.t......oDph....ht.ht.phph...tt.................hpsp.l...phtpYh..pplhpGFpphaphh..hp.p..+p..c.h.....hp...hl.tt...htshpsRhlhRsTphYuplLph.thcPs...........hh...pst..hcccthhpplh..............ttp.pppt.llttEhppLhptDIPhFhsp.s......................................................................................... 0 31 58 86 +13418 PF13576 Pentapeptide_3 Pentapeptide repeats (9 copies) Bateman A agb Jackhmmer:D3PTZ3 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.28 0.72 -4.07 230 524 2012-10-03 04:02:01 2010-11-26 10:13:22 1 41 195 8 233 1940 556 48.40 27 19.80 CHANGED FspspF.pss...sFp....pspFpsss....p....Fsp.spF....s.pspFpsspFp..ts..ssFppsp .....................................................................................................FptspF.pss.s.sFp..........pupFpsss....s.......Fpp.upF.....ts.sssFssupFp....ts..ssFtts....................................................................... 0 69 184 226 +13419 PF13577 SnoaL_4 SnoaL-like domain Bateman A agb Jackhmmer:D3Q8I0 Domain This family contains a large number of proteins that share the SnoaL fold. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.66 0.71 -4.22 175 2151 2012-10-03 02:27:24 2010-11-26 11:07:17 1 22 700 23 793 3721 973 126.70 18 73.54 CHANGED ppLpscpsIppLhscYstthD..........p...........t..ch........c....th.s.s.lF..sp..D..uthchs...sh..............s..........h.hpG.........t..s..........slhshhps..................h.h..s........shh...........hs...........tH..hhs.s.hl.p.l....c...........G........D..p..Apupsh.hh.......shh.hh.s.s...................s.s.......s.ht..hh..hs.......u.tYp..cchh+p.............su..t......W+lpcpp ...............................................................stttIpp.lhsc.Ysts.hD...............p.............p..ch........c....th.ss.lF...ss.....D..u.h.h.phss.........................t.............hhpG..................t.s.......................slhs.h.hpp.........................................h..h...s.....thh.........................hs...........................tH....hhs..s.h.h.l..p..l......s...................u............-....p..A..p.upsh...h..h...sh.h..hh.ss.................................t.t...........t..t..hh...ts..........utYp....-p...h.h+p.............ss.t......W+ltph................................................................................................ 0 154 493 664 +13420 PF13578 Methyltransf_24 Methyltransferase domain Bateman A agb Jackhmmer:D3Q0B1 Domain This family appears to be a methyltransferase domain. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.37 0.72 -3.21 204 1368 2012-10-10 17:06:42 2010-11-26 12:36:09 1 67 883 14 614 4985 3811 113.70 21 37.53 CHANGED lElGshpGtSshhlsps...h........pp..s..sh...........................................pl..h..........ulDh.......t........................................p.......htt..hhp......................................p......................tthss.......h..........sphh.........tsss....................tp........s.ht........pht.........................s..s...l.D...........llalDG...sH...s...h..........p...s...sht-..h...pth...h..s..t.l.p.s.s..u.....lllhcDh .........................................................................................................lElGs.hGh...Sshhhst.s...h....pp..ss...............................................pl..h..slDh......t.t.....................................................p......htt..hhp.............................p............................ts.h.s.s.....h...............lpl.h..........puss......................p........s.ls.....pht.......................................s..s.l..D.....................ll.a.l.DG......s.H...t.....h......................p.....sshts....h......phh........h....s.......p..l.p.s.G..u.....lllhcDh.................................................. 0 222 417 536 +13421 PF13579 Glyco_trans_4_4 Glycosyl transferase 4-like domain Bateman A agb Jackhmmer:D3Q529 Domain \N 27.60 27.60 27.60 27.60 27.50 27.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.16 0.71 -4.09 231 11449 2012-10-03 16:42:30 2010-11-26 14:14:27 1 99 3251 7 4057 16665 3336 167.30 15 40.28 CHANGED GGhtphs.tplup.sLs...p...t.G..h..c....Vp..l...l..s..............s.s....tss...........................t.............ht.......t..s.........s....s.t....l.h..........t.l......s......hs.........tt..................t............t.......t.............h.t.........h.h.......................h....t.l.................tp....h..................l..................t.....................ttpsD....ll.a....s.p..s...h..h......s...s....hhs....h...h.u..........t...t.......t..s.......hP...h........lh...sh...........................+...............................s........hh........ht............t............tsh.........tt...p......h..h......t.......h.h.....pc......hhhpp..As...t......llsso.ptht..pt..lt.p.h..u....h..sss.clhllssu ...............................................................................................................................................................................Ghthh.h..hpl.sp...tLt.....t..........t...G.....h.....c...........Vp...l...h.s.............................t......t.t...................................................................t.............................s............s.....h..p........lh.......................p..h................s.............h...........................tt.........................t..................h.t.........t...........................h.t...........h.h..............................................................................t....t..l....................................................tp....h.......................................l....................p...............................................................................................................................................ptpsD.......ll..a......s...p......s.....h..h....................s..s........h.su.......h.....h.s........................t........t................................h...s.................................hP...h........V...h...sh................................................................................................H........................................................................................s.........hh...............hp........tt...................sh......................h.t...p................h...h...........t..................hh.........pc.....................t.h.h..pp.........s..c.....t........lls..sS..pt.h.......t........pp...hh..p..h......s.....h.......t....t.p.h.hl.................................................................................................................................................................................................... 0 1455 2852 3553 +13422 PF13580 SIS_2 SIS domain Bateman A agb Jackhmmer:D3Q8F5 Domain SIS (Sugar ISomerase) domains are found in many phosphosugar isomerases and phosphosugar binding proteins. SIS domains are also found in proteins that regulate the expression of genes involved in synthesis of phosphosugars. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.44 0.71 -4.46 65 4732 2012-10-02 15:05:26 2010-11-26 15:23:47 1 23 2778 42 921 4439 1821 135.90 30 60.55 CHANGED pYhsph.ppllppl..pp........ptssIppAuchlspul..pssuhlalaGs.GHSthhApEhhhRsG....Gl.sshpslhh.slhLps...s..s.....t.tustlE+h..puhucp.lhpt..ht....l..pssDllllh.SsSGpN......sssl-hAhtA...+cpGhplIAlTo ..................................................................................t......................p.t.................ttI...p.pAuphlspuh..psG.G+llhsGN.G.s....S....u....ss.A.....chAsEhs..........sp...a.p.p....p..R....s.l.s.u...l....u....lss........s.s.............hloslus-hthc............p...l...a...u+....p....l...puh...................u...ptG.DVL.lu.I.SoSGsS.........ssllpAlctA...+pp..GhpsluLTG............................... 0 327 600 773 +13423 PF13581 HATPase_c_2 Histidine kinase-like ATPase domain Bateman A agb Jackhmmer:D3Q5R6 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.69 0.71 -4.34 142 5237 2012-10-11 19:05:54 2010-11-26 17:53:18 1 183 1833 0 1838 7481 882 122.50 20 41.79 CHANGED hsuphp.pltt.lpphlpph.h..tt.tslstptht........clpLAlsEAhsNslpHuhtpts.t...............t.l.................plp....................hthpspt....lpl..p........lpD.pGts.hsst.th..s.....................................sstttuGhGlhllpplhDpl..ph.............sss...Gsplphph .......................................................................................................................................t..hth..s..Rthh.tth..h......tp......hs......h.....s........p...t..hp.................................s.l..p.l....s....l.....oE.hs........o....NAlpH.u.....h..t....t.....................................t.l..............................................plp.......................................h.t.h.p.s.sp........lpl...p.......................VpD...p.....Gss....hsht...ph...t...................................................................................sttppsGh.GLh..l.lp.p.l.s..-.....ph.....ph...............tst....Gpplhh.................................................................... 0 613 1434 1724 +13424 PF13582 Reprolysin_3 Metallo-peptidase family M12B Reprolysin-like Coggill P pcc manual Domain This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.01 0.71 -3.60 94 288 2012-10-03 04:41:15 2010-11-29 11:26:57 1 38 237 4 116 3927 262 129.40 19 22.22 CHANGED huthssslspsNtlac..p-hu....lphpLl....slhh......hsss.......s.s.....shss..sss..................tthls.pt...tsh.hss........t.htpsshDl.uplhss......................usuGlAhl..........ushsssspp..............t.uhss.ss..s.st.h.h................shhuHElGHphGusHo ...................................................................................................................................h...h.t.hshss.h..hp......p.s................lphpls.............th.h..............h.pht.................s...t........shhp..pst..............................................tp.hls..sh....ssh...pth......................t....p.p.ss....s...D.h.sh.h.hpthsh...................................ptssGl....Aal........................uuh.shspts.....................................t.shsh...sp......hst...sss......................................psht..HEl.GHs..hGhpH.............................. 0 54 83 105 +13425 PF13583 Reprolysin_4 Metallo-peptidase family M12B Reprolysin-like Coggill P pcc Jackhmmer:C2M7W0 Domain This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.65 0.70 -4.80 9 268 2012-10-03 04:41:15 2010-11-29 11:39:56 1 39 198 0 78 3727 359 164.50 15 27.17 CHANGED sh+shclulssDhoahpha....ushstlpphhhsslsthNclY.s..................pslGhplpLh....sspph.hTss.pss..as........s.s.spps..spthsshsshhGppshDhuhlhp.h....pss.sGLA.aluphs..........p.st.pu.ushuss.............................tp....pa..pl..hAHElGHhhGAsHshs............ussss.osts......-sssGs...oIMu.Y...sssspt..sh...FSssoIphI.th..hp.ushs ..........................................................................................................................................................................h.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s....h..h.................................................................................p.....sh......p..s....hs......HElG.....Hs..LGhsH.s..hs.......................su.s.s.s....spsh.....................pps..p...t.h............olMu..Y.........hp..t.s.......................................th..................................................................................... 0 24 45 68 +13426 PF13584 BatD Oxygen tolerance Coggill P pcc Jackhmmer:C2M5J5 Family This family of proteins carries up to three membrane spanning regions and is involved in tolerance to oxygen in in Bacteroides spp. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 485 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.53 0.70 -5.90 64 896 2010-11-29 12:52:47 2010-11-29 12:52:47 1 20 707 0 273 880 568 397.70 18 74.48 CHANGED hlh.h..sh..hs.....hAp.....sphpAp.s..s+stlu.sEplplshph.s..p...-s...c..s...........Fps...ssh....ps.Fc.V..hGP..spSp....sp..ph..lNGcso....psh..oaoYhLhsp+pGshoIssApl.hsGpphpopslplpVss.ssppspts..................................tthsspslalpspluKsssYhpEslhlsYKlYhpsslp..hs...hc.PchpsFhsppl...sp..ptphp..ptphsG+sYpslhhcphllaPQpoGcLpIsshshsssl...th..............spt..............p.s.....hF.G.t..p.hp......php+plpusshsIpV+PLPp..h+PpsFsGuVGpFshpssh.....s..p..splcsG-slohplslsGpGNlKhhphP..clp..hP...ssh-hY-schpppsphsssG.hpGshshpasllPpptGpasIPslpFsYFDspsppYcTlsopshplsVtpGs.......ss...sss....ss.......sspppht..spshphlc..p..sshht.h......p.p..phFa..soh.haahlhllshlh.hhl.hh.lhh+c.pttp..ps..clsth..+t+cA..s+lA.p+t...LppApchh..spp.pt ................................................................................hh..........h..ht......ph.sp.h..sp...tl..h...s-.hplph...s....t....ps....p...t............hph...sth.......ts.h....l...hts...p.sp....ph........ph....h..N...G.php..........ph..phthhl.hs...pp.G.htIPshpl.....G..hto.psltlpVht.tttt.........................................................................ttt.h.lps.p...l..s.......p.p.p..h.Y..tpthhhphclhht...........p.........ht.....................t.....................t........hp.sph.....p.s..h..h.hpph............sp...ttphp.................p..h....sG...tpa...pshp...pp..ahlhP..pp.sG...p..hp.....lsshthpstl.......................................................................u......................h..hphps..tshslp.V.p.sh......Pt.....ps..t...a....su....t.ph.p.lst.ph..................s....s....pp...h.psG-sloh..plpl...p...u....p...Gs.ht....h.p.hP....plt......hs......pshp.hY..s.tps..php.....t...s.t...t...t......h.....t..up.hs.phsllPppsGphpl.P.s.l.ph.saasspspphcph..phsshsl...pVtts................t.................................................................................................................................h.h.h.h.hh..h.hh....h..hh.hh.hh...............................................................s........................................................................................................ 0 92 174 227 +13427 PF13585 CHU_C C-terminal domain of CHU protein family Coggill P pcc Jackhmmer:C2M7W3 Domain The function of this C-terminal domain is not known; there are several conserved tryptophan and asparagine residues. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.25 0.72 -4.10 160 1085 2012-10-03 16:25:20 2010-11-29 13:23:00 1 223 183 3 619 1204 1201 88.80 27 6.67 CHANGED lhls.N.shoPN.GDGh.NDhaplp.s....l.......ss.splpIaNRaGshVapsp..sYp....ssW.cGp.p.........ptLPsGTYaYllph........sst....ppt..hpGalhl .......................h..lPs.sFoP.N..GDG..h..N.D.haplp.s..............h..p..ph......ss..hplpIa.....sRaGph.....l.......apss....shs.....................ssW.DGphps..................p.lPsG..sY.aYhlph.......tsst........t.......hpG.h........................ 0 364 565 618 +13428 PF13586 DDE_Tnp_1_2 Transposase DDE domain Coggill P pcc Jackhmmer:C2M1K3 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.48 0.72 -3.80 38 2848 2012-10-03 01:22:09 2010-11-29 14:04:42 1 17 787 0 763 3964 388 82.10 27 46.05 CHANGED lpsD+hYcsccsRphhcc+..GI..+lusssls+ssp............hp.tttstttR.slEtpFuhhK+t.hpLsphhs+Lspottsh..lslshhlhsLc ..........................................................hsD+uY.ps.p.t...R.phhtpp...uh.......phs.lstp..tpptp............................h...s..hph.h.+p....R.p.h.lEphFuplK.p.a....R+..l.......u.....p..Ra.-Khspsahuh..ltlAshhlhh................................. 0 202 519 633 +13429 PF13587 DJ-1_PfpI_N N-terminal domain of DJ-1_PfpI family Coggill P pcc Jackhmmer:C2M3D1 Domain This domain is found at the N-terminus of proteins from the DJ-1_PfpI family, Pfam:PF01965. The exact function is not known. 21.50 21.50 21.60 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.60 0.72 -4.42 59 1421 2010-11-29 14:19:25 2010-11-29 14:19:25 1 12 1169 10 438 1155 114 37.70 42 16.10 CHANGED t+lLhVLTSHDpLGsTGc....pTGFWlEEhAsPYYsFpDAG ....................h+lLhVl..TS..t.s..p...h....s...s...suc....tTGhWLpEhstPYhshpcuG......... 0 117 250 366 +13430 PF13588 HSDR_N_2 Type I restriction enzyme R protein N terminus (HSDR_N) Coggill P pcc Jackhmmer:C2M5G6 Domain This family consists of a number of N terminal regions found in type I restriction enzyme R (HSDR) proteins. Restriction and modification (R/M) systems are found in a wide variety of prokaryotes and are thought to protect the host bacterium from the uptake of foreign DNA [1]. Type I restriction and modification systems are encoded by three genes: hsdR, hsdM, and hsdS. The three polypeptides, HsdR, HsdM, and HsdS, often assemble to give an enzyme (R2M2S1) that modifies hemimethylated DNA and restricts unmethylated DNA [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.46 0.71 -4.27 81 1412 2012-10-11 20:44:47 2010-11-29 15:02:48 1 34 1136 1 328 1237 289 110.80 21 20.20 CHANGED PEEhVRQchlp.hLlpchsYPt.shltlEhtlp..h.s....s.pp.....cR...s.....DlVl...aspc..............tp...shlllEsKssplpl..sp..ps..h-QltsY.st.s..h...pupahlloNGhpphhhp..hs..h...ps...p.......p..aphl..scIP ................................................hl...hh.p.p.hs...Y...s........t..........p...l...h.s..Eh....phs...hs................p.pp..................c+.....s.....Dhll.hpps.......................p...PhhlIEs.......K..s..sphpl.......sp..............s...h.pQhhpY.hp.h.......h.....s........spash...loNGptahhhp........t...................................................................... 0 106 235 300 +13431 PF13589 HATPase_c_3 Histidine kinase-, DNA gyrase B-, and HSP90-like ATPase Coggill P pcc Jackhmmer:C2M6V6 Domain This family represents, additionally, the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.92 0.71 -4.51 55 9547 2012-10-11 19:05:54 2010-11-29 15:33:13 1 80 4481 84 3019 14695 4507 137.90 25 20.93 CHANGED ho...psAlsELlcNulD.......As....Aps..................lplhls.......pp.........................tsltltDsGtGMshp..-...hh.p...sh.....p.hup...ss.p..p...t.......p.......p.....pps............hGc.hGl.G.KhAuhphupplplho..+...pp...up.ps....s..hp.......l...shphhpppp..s...h.l...ths..pth...............p.hpp..pppGT ..................................................t.hl+E.L.....l....pNu..hD..................As......s.p..h..............................................................................l.cl.p.lc............ps.......................................th....................pp..l..pl.p..DNG..........h.Gh.s....c-........-...........lh.t.....hh..........p...hsp...........o.u....p....p.........p..h..........p.................-.........hpt.....................................................IGp...aG.....h.....G...........h....h....u.....u.....h.....h.....s......s.......s......+.....l.....s...l....p.o..+......ps......sp..pt.......s..ht...............h...........................................................tstttthttttt........................................................................................................................................ 0 999 1810 2464 +13432 PF13590 DUF4136 Domain of unknown function (DUF4136) Coggill P pcc Jackhmmer:C2M2E4 Domain This domain is found in bacterial lipoproteins. The function is not known. 27.00 27.00 27.10 27.20 26.50 26.90 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.91 0.71 -4.15 166 841 2010-11-29 15:34:22 2010-11-29 15:34:22 1 5 488 0 314 779 367 162.60 18 79.98 CHANGED o..........h......p..lp...sDa....-pss......sF.s.....s...h+Tasahps..........tss....th...s.....sLpp..pRlppulpspLpt+.Gapt......u...p....s.s.Dhhlshthtscp.p........tss............h.s.s.......h............Gh......u...h.........G........au.....s..h..h............G.................s.s...hs..h............spp...h..pp....uo..Lhl-llD........sps..s.........c.........l.lWcGsupstlps...p.s...s....tppt...lsp...hVpplhspaPPp ...............................................ss......phs.th......sa.s.....sh+Tasahps..................st...th...s...sl...pt...p..p.lpsslsppLsp.+.Gap.......s.....sp....su.Dl.....hVphththppp......psh..........................hs.s.....h.............sh.....u...h.........................G................au....s..h..h.................s.................................hs.hsh..............stp....h....pp..ss....Lh..lcl..hD.......sps...s..............p.........h.lWpusupsthss..ps...s.......pppt...htp...hVpphhspaP........................................................................ 0 81 179 254 +13433 PF13591 MerR_2 MerR HTH family regulatory protein Coggill P pcc Jackhmmer:C2M362 Domain \N 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.43 0.72 -4.38 56 920 2012-10-04 14:01:12 2010-11-29 15:57:16 1 2 905 0 189 833 159 83.90 38 81.93 CHANGED lslsEhCppsslptshlhELl-hGl.lcs.ptt.s...p.....phh.hsspplt.plc+hhRLppDLslshtGlslllcLL-cl-pLppElppL+p .....................hTlsEaCh+.s.G.loc-pLsElVslGllEP..cp..p....s..........sWh.Fcsc.....sshhVpRAhRL+c-LuLshsGIAlsLsLl--lscL+pEsRhLp............. 0 60 124 167 +13434 PF13592 HTH_33 Winged helix-turn helix Coggill P pcc Jackhmmer:C2M2H6 Domain This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.39 0.72 -4.43 87 787 2012-10-04 14:01:12 2010-11-29 16:08:47 1 15 372 0 214 1136 242 57.50 29 30.89 CHANGED ssthWostpltphlpppaslpa.ohsuhs+lL++hGaSap+Pphh.shctDtctpptFtcph ...............................hppchshsY..ohsuhhpLL+.RtGa.p.hpP+sp.PtK..tD..s.pp..t......................... 0 28 108 155 +13435 PF13593 DUF4137 SBF-like CPA transporter family (DUF4137) Coggill P pcc Jackhmmer:C2M6P1 Domain These family members are membrane transporter proteins of the CPA and AT superfamily. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.84 0.70 -5.33 59 1645 2012-10-02 17:06:44 2010-11-29 16:51:02 1 8 1426 0 476 3347 1338 293.20 43 89.90 CHANGED chFhlhLlsslhLAthhPs..h........Gpssshhp.t.hssshu.lullFhlpGhpLupptlhsGhtpWRLHlhlhhhoFllhPllsh.ulshhht..s..h...ls................................st.....lhhGhlaLssLPoTVpSuluhTohAtGNlsuAlssAuhSsllGlFlTPlLl.tlh.h...........s.ss...............ssshs.........htpsltplsLplllPhllGQllp...sh..h.s...p.......................aspp..p.cp.h..........lphl....DpssILLlVYsuFSsAhspGla..pplsh..hsllslhslshhLhhlllshs....hhhuRhL.t.............................as+............................pDpIslhFCGS.pKSLA.GlPh..usllFs.sts......tlGhllLPLhlaHthQLhlsuhLApphs+ ...................................................................................D.FhlsLlssVll...Aoh.h.Ps...p............G.s.h...s...s.hh...........................h...o...s.hA...I........u..LL.F....F.h...a...G....A......+..L..S......p.....c..tlls..G..h.t.H...W.R.L...H.L..hV..hs.sT.....Fll...FP..ll..G.l.........h.....h.....t.....h.....h.....h.....s.....s..........ls...............................................s.........LYhGhLaLChL..PuT..VQ.SuI.AFTShAtG.N.VA..A..............AlsuASsSsLLGlF.loPLLV....uLlh...............s.sp..........................Gus.s.s..ht.p.....ltp.Ih...lQ.L..........L...lPFl...lGpL......R.....sa......l..u.......s..............................a..l..pR....p....+p.h........lshs.....D.p.....s.....S.....I.....L.....L.V...V.Ys..A.....F.....S...-..u.h.....s.p..G.lW...pp.luh.............ssL.l.h...l.l..ll.....s.h.lL.L.s.l.l.l..s..ls.......hh.hu.+.h..L...G...............................................................F.s..+............sD..c.I.s.I.....lF..CGS..KK.S.L......As....G..lPM..As..l.....LFs..s............slG.h.hV.LPL..MlF.H.QlQ....LhlCuhLApRat............................................................................................................................................... 0 127 265 386 +13436 PF13594 Amidohydro_5 Amidohydrolase Coggill P pcc Jackhmmer:C2M675 Domain This family of enzymes are a part of a large metal dependent hydrolase superfamily [1]. The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source [2]. This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit [3]. Dihydroorotases (EC:3.5.2.3) are also included [4-5]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.77 0.72 -4.15 1248 3712 2012-10-03 00:45:34 2010-11-29 16:53:58 1 83 2104 48 1323 23661 7737 75.40 25 16.55 CHANGED lhlcss.+I........st..l...................t......tt.....t................l.D.....u.p.G..p.h.lhPGhID..sH.....sHh......................................hhtsh..tphttt......h.....................................sGs...............................T...T..............l .....................................................lhlpsG..+I....................hs.l......................stt..hsh...............ss.sp.........................ll..D..............s...s..G...p...h..l..hPGhID..hH.............sH.h................................................................h.ts.h..................h.................................................ttGl.....T.o...................................................................................................................................... 0 575 1004 1193 +13437 PF13595 DUF4138 Domain of unknown function (DUF4138) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4780 Family Based on Bacteroides thetaiotaomicron gene BT_4780, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 25.50 25.50 25.60 25.80 23.60 25.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.40 0.70 -5.42 30 514 2010-11-29 22:10:22 2010-11-29 22:10:22 1 2 140 0 63 427 8 231.40 40 82.80 CHANGED plpVshsKTsHllFPpsI+YVDlGSspIlusKApss-NllRlKA...................sspsF.s-TNhoVITtDGphY..sFslhYsspPsthshph.....................phtssstpshhpphss.sssthpphhhslapps.+cl+plss+paG.....lphhLpuIYspsDhhah+hslcNpSNlsYDIDhlRFKlsDKKhsK+TssQphtlpPlhshNthp..lpsKpshRsVaslsKhTlPD-KlLplElhEKs.GGRphslpIcNsDllcAcsl .............................................................................s.lpVoasKTsHlIFPus.lRY.V.DlGSs.p.l.lAsKA-ssENllRlKA...................sscsF.sETNhSVI.TpDGsaY..sFNlpYss-PthLsh-h.p......................................................................................sphspsttplY...hp-lss.pS.....shh....h....ph.h..hpsI..a.pps........pRpl++lus+paG.....hphhLculYscsshlYh+hplcNpoNlsa-lDalpa.K.l.sDKKhsK+TAh...QE.hl.Plcs.hs.st..lsu+ps.R.sVasltKFTlP-DKhLhlEl.E+s.GGRp.ohhl-scDllpAc.h.................................. 0 18 54 63 +13438 PF13596 PAS_10 PAS domain Coggill P pcc Jackhmmer:C2M6E2 Domain \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.14 0.72 -3.77 87 1751 2012-10-04 01:10:46 2010-11-30 10:56:59 1 160 1165 5 368 2524 174 107.80 26 18.13 CHANGED lsslLsohsl.tlsFlDcs.plphass....s...t....p....plhp.hhssslGRslssh....p...sp.....pt..hstl....cp.ll...pplcsspp..s.p.hchhh....s....p..s....s+h.hhh.phhsh+spsuphtGll.shhDlp ..........................hshlLpshPh..-lTa.lDcssphp..a.ass......s.....t....p.....hl..ap...R...s...s.s..p..l..G.+.....s..l.p..p...s........H.........Ps.......tp...hcp...V.........cp..ll.......p.s...l..+.sGpp.......c..h...h..chhh........s.......t.s......Gp..h...h..h..h..p..Y..t...A..h.+..s.p.s...G..p.atGllEhh.Dlp....................................... 0 125 250 306 +13439 PF13597 NRDD Anaerobic ribonucleoside-triphosphate reductase Coggill P pcc Jackhmmer:C2M7Y9 Family \N 25.00 25.00 25.80 25.40 24.70 24.80 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.94 0.70 -6.43 45 3413 2012-10-01 23:28:04 2010-11-30 14:10:29 1 23 2857 5 604 2408 158 537.40 35 83.84 CHANGED ptchclspsu.Nhs.huht.....tlsp.....hltphahpp...l.s.s.E.hs.cAH.ppG-laIHDL..sh..h...ssYCsuaoLpplLpcGhsslsG.....pS+PP+HhsoAhuphVNFlhshQspaAGAQAhssF-TYhAPFlRtDt.hsYp..............................-lcQtlQchlaNLNhPsR.hGhQoPFTNloh......sp.s.sh.....cs......lhG.sth...........................h.sttht.....s.hp.Ehchlp+Aah-lhhpGDupGpsFTFPIsThslppc...............ppssspt..lachsAKhuh..htsah.............sph.........-Pss......stuMCCRLpl-lp-htp...us..................................uLh...uus.tTGSIuVlTlNhsRLuhpucs.......ccc.t.h.tcL.......pc.h.p.l.s.......t........Rp.hlpchhp.....pGlaP.....a.hcph......Lhs..............L.cspFsTIGlsGhsEss.t.ph..stp..slp.......s.cuc.......phshclLcal+pphpchpccTGhhaNlEtTPAEusuh+hA+tD+c............c..s......lh..........psts...hh....sshats.ssphslshh-chh....hc.pc.lpshhoGGollHlaL.GE.p.h.sscuhtp.Ll+phh.p.hclsYholTssholCsspGalssc.ptpCPpCGpct.............sEhaoRlhGYh+.PVpu.......aNtGK+p.EatcRphap ...........................................................................................................................hh..s.NsNcsu.p.h.h.....sp.tc..........hhus.......hssc.hhhpp.....hLP....pc..ls.pAHpcGDIHhHDL...Da...h....hhsCshlshcshLppGFphGsu........plcsPKSIpoAsu.hspIluplsup.aGGpohsphDphLAPas...ptsh..pchcph.t............................................................................pths.tpsp+-hhpAhQul.Ypl....NThho.usGQTPFsolsa........GlsT.sh..............................................................................+.h.Ip.culLpsp...h...tG...l...u.p...t.+....T.s.lFP.pll..F..slccGh.Nhp..........................s.s.s..NYD...lhpLAhcsusKRhaPsl...................................................-h...........sssh........hs.sMGCRohLts....p.tp.....sp..................................tl..........tpGRs....NLGVlolNLP...R.lA.lcupG................................-ts....t...F.hcl..........-+.h.t.l..s.......p.............................+t.h.h.pc.htc............stshP......h..hh..h......hhphhtt.p...........t.s...hh..+p..u..p..h..o..l..GaIGlhEsh.p...ha..s..sp.....hhp.............st.pu.+.......thslpIlcch+ptsppap.........cc..h..Gh..tFSlhuTPuEuLssRFs+.hDpc......................c..FG.....ll...........slTD+s....YYTNSFHh.s...V.pp.p.sssa-Klc....hE.ts..a..h.ssGGaIpYsEh.sp..p.t..N.cAlcs.lhcauhp..+lsYh.uhNsslDpChp....C....G.........a...........p..........u.........t......c........s..........h........p..........CPp.......CGscc...........ttssVh+RssGYLs....sssp...................astG+pp.ElppRV+H.h............................................................................................................. 0 214 398 520 +13440 PF13598 DUF4139 Domain of unknown function (DUF4139) Coggill P pcc Jackhmmer:C2M2S6 Family This family is usually found at the C-terminus of proteins. 25.50 25.50 26.10 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.68 0.70 -5.12 85 674 2010-11-30 15:48:01 2010-11-30 15:48:01 1 18 418 0 391 713 87 256.30 16 54.27 CHANGED lcloYhlss.A...u.WpPhY-l+ls..s.tp....sp.lplshtAtVpQpTG-DWs.sVpLsLSTupPsps.s..ssPpL.ts.Whlph...hpP......s.h......hst...hssh...........s...ttht..t..tt.h...tts.................s.......h..tt.t..h.....tptsht....s.tlppsshu...ssapls..t.holsusupst.......plslsphshsuc.hphhssPph.s.spAaLhAphsss......s.shslL.sGpss.lahDusaVGpsplsh.hss.......Gpchc.lsFGsDctlplcRphh.c.c..p.ss..p.G.hl.......sp.ppp.h.s.hphplpVcNtp....spshp.lplpDplPlSpscclcVph..................p...p....s...s...................ts..............s.hc............spc......GhlpWclsLssGp.spplphsaplcaPc ...................................................................................................................................................lpY.....h..t.s.....W.s.Ychtht......t...t......................t...htl.h.u.l.ptou.sWp.sstl.lsosps............t.t................s.....h.....s....hth.......................................................................................................................................................................................................................................................................................................t..t.t..t.t............htaph..t....hs.l...sstt..............hh.l..p.ph.....s..p..h...h........h.....h..........h.P.th.....p..............t....s.....h...h..s....p..hts...................t..thsl...G..st.lh...........h.....s.....s.....t......a....l....up.s.lt..hss.......spphp..ls..hG.-.tl..p....lp......hp.....p..t......p..tp..........ts.hh...................st...t..p.........hthphtlpNtt.......tpshp.l.l........-th...Ph...s...t....p..ttl.lt.............................................................................................................................................................................................................................................t.....t.h.athtl.stt....t..h.hth.hp............................................................................................................................................. 0 155 261 359 +13441 PF13599 Pentapeptide_4 Pentapeptide repeats (9 copies) Coggill P pcc Jackhmmer:C2M218 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.59 0.72 -4.17 86 6775 2012-10-03 04:02:01 2010-11-30 16:17:53 1 256 2008 57 1619 10345 2693 77.40 16 36.50 CHANGED sphpsspFpssp.h........pp.ssFpss.....plcsssFsssp.h..........hpss..........Fpssp.....lpsspFpssp.h...pss....pFpssslp.sspF..p..s......p.lpsss.F ...................................................................................................tspF.p.ss.p.h................pt.s.sFp..ss...............plp.s.ssFp.psp.h.......................ppss...................................F.p.s.ss...............Lp.s..ssFp.s..ss...l............pps...................sF.p.s..sslp..ssph.p..t................................................................................ 0 562 1041 1333 +13442 PF13600 DUF4140 N-terminal domain of unknown function (DUF4140) Coggill P pcc Jackhmmer:C2M2S6 Family This family is often found at the N-terminus of its member proteins, with DUF4139, Pfam:PF13598, at the C-terminus. 27.00 27.00 27.00 27.00 26.90 26.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.27 0.72 -3.58 88 387 2010-11-30 16:18:01 2010-11-30 16:18:01 1 13 299 0 212 388 20 104.20 21 17.80 CHANGED sVTlassu...ApVsRp..us..l.s...l.ts..Gp..p..plhhpsL..Pssl.st..sSl..pl.s...stus.......ssl...tslp....hptp..h....hp.......ttsst.......plpplcpplcplppphstlpschs...sh...ptphphlp .........tVTla.s.sp...AplsRp.....sp.l.s...L..ts....Gp......s..c.............lhlp.sL..s..ssl..ct..sSl..+V.p...upus........ssl.....hslp.........hptt...h..................ht..........ttt.st............................plpplcpplcplcppht.......tlpsphsshptphthh.t.............................................................................. 0 74 133 193 +13443 PF13601 HTH_34 Winged helix DNA-binding domain Coggill P pcc Jackhmmer:C2M278 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.44 0.72 -3.95 35 545 2012-10-04 14:01:12 2010-11-30 16:59:13 1 7 427 1 221 2120 231 79.50 32 70.80 CHANGED lRLAlhShLhu...V-cA-Fshl+-phpuTsGNLSlplcpLcpAGYIplcKsFpG++PpThh+lTspGRpAFpcYlcuLcphl ..................................RLtlhuhLhs........scp.s.s.F...spL........+.c...hl..s....l.Tc.........GNLop+lptLccsGY.l.p..h...c..K.s....a..t....u.......+....+..............P.p.T.hhplTsp...G...+.......p...A....app....alpsLcph............................... 0 86 153 191 +13444 PF13602 ADH_zinc_N_2 Zinc-binding dehydrogenase Coggill P pcc Jackhmmer:B3CU33 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.83 0.71 -3.24 420 4473 2012-10-10 17:06:42 2010-11-30 17:15:43 1 149 1864 6 1804 17425 4479 130.70 21 25.39 CHANGED LGA-pll..Dhp.ptth....................h.t..p.hD....lV...........lDs.lG....spsh..pts........h..hh.......s..Gp.h..lth...............................hhthst.t..tthphhhhh.....................t.......ttp.L.....pplspLlcsGp....l+.shls.ps.aP..Lsc..sscAachl.csG+sp.GKlVl ...............................................................................................................................................................hGAptsl.......Dap...ptth............................th.h.pthD.....h.l.....................hDs.hG.....spth....pts.....................h..phl.......tss..Gp..l......lsls.s.................................................t...hth......t..t..t...h..s..h.ph.thhh.............................ht..........................tpp.L...............pp.l.s..p..L.....l....p.p.G..p..............l.....+......s........h..l...s...p.......s...a..........s........l...pp..........st...cA....a.p....hl....c....s......G....+.s...p...G.KlVl............................................... 0 552 1141 1527 +13445 PF13603 tRNA-synt_1_2 Leucyl-tRNA synthetase, Domain 2 Coggill P pcc Jackhmmer:B3CR81 Domain This is a family of the conserved region of Leucine-tRNA ligase or Leucyl-tRNA synthetase, EC:6.1.1.4. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.99 115 4853 2012-10-02 18:00:56 2010-12-01 14:32:33 1 35 4641 29 1125 8298 4984 183.30 43 22.35 CHANGED +NWIG+ScGs-lpFplpst.p...p.......plp.VaTTRPD..TlaGVTalulAPEHPLspcl...scppsplpsFlccs+ppsshEpshtstE..KcGl.hTGhhAlpPls.Gcc......lPlWlANaVL.h-YGT.GAVMuVPAHDQRDa-FAcKYs..LPIp.VIp...sts.tp..p.p........ppAa..s-.cG...hLlNSu....pFsGls.sppAhptIschLc ................................................................................................RNWIG+ScGscls..Fs..l...p....sp....s......p...........................plp..VFTTRPD..TlaGsTah........slA..P..E.........H..s..L..s.p.pl...........................scp...p...s...t........l.......t...s.......a....l......c..c....s.p...p...p.....u.........s.....h.E....c...p......t......s..t..E.........K..p..G...V.....hTG.....h.....a.A....l...p.P....l...s....Gcc.............l.P....lWl.A...s.Y.VL..h..s........Y.GT........GAVMAVPAH.D.pRDa.EFA.p.K....a..s......L..s..I..h....VIp..............sss................h................................tptAa....Tp...cG......h.h..l...N...S....u............h.sGLs...hp..pAhptlhphL............................................................................................................ 0 373 737 963 +13446 PF13604 AAA_30 AAA domain Coggill P pcc Jackhmmer:B3CTP0 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. There is a Walker A and Walker B. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.30 0.71 -4.73 84 5200 2012-10-05 12:31:09 2010-12-01 15:00:29 1 89 3798 11 1137 10491 2582 192.00 29 21.89 CHANGED pLsspQtpAlcplhsssphhsll.GhAGsGKToslpu.sppshcs.............p.G.hpVhGhA.oupAAcs.Lpcs............hG.......lpupTlushltthsp..................................hssps.....llllDEAGMVuocphtcllptspc.sGA.+llLVGDspQLtulpA.GusFctltcphs....sscLsplhRQc......ssht+.pAsptltpGcssp...uLshh.tppGplptsss ..........................................................................................t..t..Q.t....t....A....h...t....t.....s.....l.......t.......p.......p..................l.....h..........ll..oGG.PG..TG...............KT....T....s.......l...p..t.......l.......l..t..h.h..tp.............................................................................p...s...h....p...l...h..L....s.....A......P....T......G....+...A....A.....p....+.....ls...Es....................................................................h..G........................h.p.A...p.....T...l..H.....+....l...L.t....h....p..s.s...s...p..t.........p...............................................................................hh.s.t.s............lll..l...DE.....uS........M...........l..............D...........h..............h.............h.............h............s...........p.............L.........l...........p........s...........l.........s.......s.......................s.........s..........+.........l.lll...GDp.c.....Q.....L......s.......S........V.............t........s.......G..........s..............l.............h...............t...........-..........l...............h..............p..............t............t..............................h.l..p..p..hh.Rpt.....................h......s..h........................................sh.......................................................................................................................................................................... 0 339 718 942 +13447 PF13605 DUF4141 Domain of unknown function (DUF4141) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4772 Family Based on Bacteroides thetaiotaomicron gene BT_4772, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 30.00 30.00 30.20 30.20 29.90 29.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.78 0.72 -4.77 24 314 2010-12-01 19:13:44 2010-12-01 19:13:44 1 1 144 0 39 229 13 53.60 45 25.74 CHANGED +p+lhhlhhshh.hhsspA+AQWVVTDPuNLAQuIlNos+pIVpTSsTApNslpNF ...........hpplhhlhsssh..lhsspApAQWVVoDPuNLAQuIlNusKpIlpTSpTApNhlpsF.............. 0 10 28 35 +13448 PF13606 Ank_3 Ankyrin repeat Coggill P pcc Jackhmmer:B3CUD7 Repeat Ankyrins are multifunctional adaptors that link specific proteins to the membrane-associated, spectrin- actin cytoskeleton. This repeat-domain is a 'membrane-binding' domain of up to 24 repeated units, and it mediates most of the protein's binding activities. 20.00 17.20 20.00 17.20 19.90 17.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.28 0.74 -7.80 0.74 -3.13 248 446 2012-10-02 12:10:21 2010-12-02 11:06:47 1 179 202 0 271 70923 5071 30.10 27 4.47 CHANGED p..s..p..os....L..p..h.A..st....p...s....p.....h...p.h..l.chLl.p....p....s..s...s...lst .........................sp.Ts....L...H...h.A.......sp................p.........u.......p................h......-.l....l.chLl.p......p.......s..s....s.................................. 0 129 176 221 +13449 PF13607 Succ_CoA_lig Succinyl-CoA ligase like flavodoxin domain Bateman A agb Jackhmmer:A3JZV1 Domain This domain contains the catalytic domain from Succinyl-CoA ligase alpha subunit and other related enzymes. A conserved histidine is involved in phosphoryl transfer. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.21 0.71 -4.57 213 2340 2012-10-02 00:59:22 2010-12-02 13:50:07 1 30 1654 2 873 4427 2578 135.40 36 17.93 CHANGED sGs.lullSQSGu.luss.lhshutpp..s.lGhSphlShGN.ps.DlshsDhlc.ahsp...Ds.pTcsIhlYlEulpc..u.ccFhpsA+pss..tp.KPllllKuG+.....optGspAA..tSHT.uuluGsstla-Ash+psGllpVcshc-....lh-suph..l ...............GplAhlSQSuAluss..lLc.hApp+......s......l.........G...FShh.lol.Gs..ps..D....l...s..h..s..-..l..L-aL.uc....................Ds..cT..psIlLYlE.....s..l.p.....-.....u...R+....F.h.s...A.A.....R.pA.u....+....s..K...PllllK.uGR...........os.s.u.....t....c....h..s......sHs....u.u..h........A.....G....s....D.....s....s....a....-....A....s..h...p...cuGllRVpshcE....Lhssscsh............................................................................ 0 284 570 741 +13450 PF13608 Potyvirid-P3 Protein P3 of Potyviral polyprotein Coggill P pcc Jackhmmer:P04517 Family This is the P3 protein section of the Potyviridae polyproteins. The function is not known except that the protein is essential to viral survival. 25.00 25.00 25.10 25.10 22.00 24.60 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.53 0.70 -5.36 65 976 2010-12-02 15:02:58 2010-12-02 15:02:58 1 24 129 0 0 1226 0 381.00 35 16.23 CHANGED shchLIKula+Pchhppllp--PYlllhullSPslLlshassuulEpAhphWlp+cpslutIhshLpsLAcKVShAcsLhpQhplIppsuspLhchlpss.pss.h.u.h....phuhph....Lphh..tpcscssppLhpsGFss.hp.pphhphhEK....pYhptLcppWp-LohhpKhphhhpsp+hptthpptLp.p..sts-hpst.hshSspthhspstpphpsshs.....pshppspphh....psht.pphhshslp.slpthhsDlhphlNlhlllSlLlslhpslpshlppt+ph+tp.ht.thcpccptpplttha.h.a....sphp.p....c.t...ss...ptpFh-a...lc.....phpPcLhphh..p.hh....tp..pp..VpaQu.KsssptpLE+IlAFhuLlhMlFDsERSDsVaKlLsKlKslhuohspc...............Vp..............a.Q.S..LD..-Its.hh--KphTIDF-lsssttss.sssh.-.sTFppWWspQLppNRslPHYRTpG ................h+hLl+ula+P+hhppll.p-PYlllhullSPulLhuhYpssthEhuhphWIpccpslAhlhslLptLAt+VohupsLhtQhplIppsutpLh-hhtss.p..h.u.h.....hu.hp.h....Lphh..hppppssppLhptGass.hp.pphhphhEK.....Yhp.LcptWp-LSahEKhuhhh.t.+hphthpc.lh.p..stsDltuh.hshSspshhtpshppl+sshp.....thtpchpshhpt.phttl.shhlt.ththhhsshhphlshlllhSlLlplsushpshlppp++hKt.htthctccp..tl.hhathh....tpht.s....ptPohpcFhpalp...thsPcLhphh..ps.h.....tp...psVhHQu.K.pssptpLEpllAFhALlhMhFDsERSDsVaKlLNKhKulhuohspp...............Vp...............a.Q.S..LD..-.l.s..h--+phhlDF-Lppst....p.....-..hpFtpWWspQlp.s..slPHYRopG....... 0 0 0 0 +13451 PF13609 Porin_4 Gram-negative porin Bateman A agb Jackhmmer:A3K5H9 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.28 0.70 -4.91 115 4742 2012-10-03 17:14:37 2010-12-02 15:24:35 1 10 840 14 1423 6841 1393 323.20 20 89.64 CHANGED As.s....sls....u....s....uus.A.tA.ps.....................sVslhGthc..hslt...................hhs.......s.tt....s..............tsts.....t.tssht....ss....s..cl...........shps..........ptphs...sGhs....h..tuth..pL-...................ss....t........s....................................................ss..s......sh..s..............scpu.....hlu..l.su..saGplphGc.......p..ts................sh..pphhs....th....s..h.t....hs.s.hssh....t................h..shs....ss..s.s...........................................................s......................sttsssl.....hhhs..ss......hu..G.h.shussYshsp.ss..ss...................................................st......sphh......slss.s.Y.s...h.u....shssu.su.....as..................psp.sss.......................phhshu.sp..hsh...us.h.............slsus..........as.......phpssssht..s.......................................scts.sht.....luss...a.sh.....s.s..h.s.lsssY........sphc.s..........s....................sssscspthslussYsL..up..pssl..aup..huh..h.csc...s.s .....................................................................................................................................................................................................................................h..s.shs.....h..h....u..s.s...A...tA..po...................................................oVoLYGhl-..sulp.......................................ah.s................stts......s.................................................psht.......h...t.s.s.sh.......ss..........S.Rh..............Gl+G..........sE-.LG...sG............h..tAhF..pLE.....................ssh............s............................................................ss...s...shh....................sRpA......aVG...Lss..sa...G...p.ls......hGR................p..hs................................hh.....s.hhs......................s...ht......sh..s....hss.h......t..........................h.....h..hs.......ss.............................................................................................................................s.......................phsssl......tah.o....ss.........au.......G..h.phs.ut..Yu.hus.ss..st.....................................................................................................t..tspsa.............uh.uh.s.Ys......hu.......s.hslu...su.............Yt.............................p.pssss.........................................tp.ptht.h...u..us..Ysh...us.h..................pl..su.s....................................Yp...........ps.p.h.p.s.s.hs..s..............................................................................tchs.sht....l..us.p......Y...ph......s.s...h...s..l....t..u..u..Y......thscs...............ts............................................................................ssssp...h....pp.h........s...l..u....ss..Y.t..L..SK....+Tp..l..Yst..hsh...p....s................................................................................................................ 0 153 515 976 +13452 PF13610 DDE_Tnp_IS240 DDE domain Bateman A agb Jackhmmer:A3K4H6 Domain This DDE domain is found in a wide variety of transposases including those found in IS240, IS26, IS6100 and IS26. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.56 0.71 -4.21 25 3068 2012-10-03 01:22:09 2010-12-02 16:00:46 1 16 1006 0 405 6167 734 118.80 40 59.33 CHANGED upsW+lDETYl+l+GcWpYLYRAVDpcGp.slDahLop+RDttAA+tFh++slcptt...tpPcsllTDphsuYstAlcclt...t..ch.................tplpphpsK.ahNNhlEpcHphlKp+h..pshpGF+ShcsApphluGh-shpslp+sph ..................................h..pW+h.DE.T..Y..l...K..l..p........G.+..W..t..Y..L....Y..R..Al..Ds............c.Gp..Tl.DhhLp.p.p.R.ss.p.u.A.h.tFl.p.+....l....l.pph..........stP..p...h..l.....s..T.....D......p.......u.s..u..h......t...t......A...h..t.p.lh....p...th....h......................................................................t.h..p..Hp....p.....K....Y...hNN...lIE.pDHthlK.t.h..h..........ht.a...p.....S...h..p..o.A....s.h.l.p.G.h.E.s.h.hslh+tp.t............................................................................................. 0 90 222 337 +13453 PF13611 Peptidase_S76 Serine peptidase of plant viral polyprotein, P1 Rawlings N, Coggill P pcc Jackhmmer:Q65730 Domain This family is the P1 protein of the Potyviridae polyproteins that is a serine peptidase at the N-terminus. The catalytic triad in Swiss:Q65730, the ssRNA positive-strand Brome streak mosaic rymovirus, is His-311, Asp-322 and Ser-355. 27.00 27.00 27.00 27.00 26.70 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.79 0.71 -4.00 6 57 2012-10-02 13:45:52 2010-12-02 16:11:56 1 12 13 0 0 162 0 123.60 34 5.04 CHANGED Esll-.clsssccpsIc............Kp..ALhKp+pp.+l..VAN...lsDLsppLopICsEpGIPIl.lDppKR+AI.Ph...........V+L+Hlht.pl...tpDDhat--RhFLEHhst.ppshRssc..+Is.p.VRPGWSGsVI ...........................................lh-.cl.hhc.hch+.....................p..A.h+ch+...p...ppl...lus....lsDLhppls-IChEcshPIphIsssK++.sl..sh...........V+L+Hsht...ch....hs...tpDDMh.s.-RtalpHhsh.ttshp.sc..KIppp.V+PGWSGsll.................... 0 0 0 0 +13454 PF13612 DDE_Tnp_1_3 Transposase DDE domain Coggill P pcc Jackhmmer:B3CSW4 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contains three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction [3]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.75 0.71 -4.48 38 1053 2012-10-03 01:22:09 2010-12-02 17:12:34 1 4 328 0 267 2914 378 123.10 32 58.29 CHANGED hGcsTGIuFIDSTslsVCHNhRI.c.HKVFcGlApRGKoohGWFaGFKLHLllNcpGElluhplTsGNlDDRcPl..ptLscsLhGKLauDKGYISppLhcpLhpp.GlpLlTplR+NMKspL.hshhDKhhLRKRulIETlsDpLKNlsQIEHSRHRSl .............................................................h.hlDSh.l.l...Cp.hR...t....t.+h.....h...p....s....h.....A.p.h....G..h......s..s..h.....t.......a.......a...a................G...aKlHh..l..h...s..p.p.G...l.h....s...a..h.l........T..u.sscD..hpsh..........h...h...........p.............t.....h............h.............s............h..........l.h........u..D.........cGY...l.u.........p.p...L...h..ppL...h...t........p......u....h............p...l........h........T......................h.......+.........p.........N......M.........c..........t..........t..............h...................p..................c....c.........h....h....h.t.p....Rth...IEoh..s.Lhp.hph.p.hh......................................................................... 0 82 137 182 +13455 PF13613 HTH_Tnp_4 DDE_4_2; Helix-turn-helix of DDE superfamily endonuclease Coggill P pcc Jackhmmer:B3CR78 Domain This domain is the probable DNA-binding region of transposase enzymes, necessary for efficient DNA transposition. Most of the members derive from the IS superfamily IS5 and rather fewer from IS4. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.09 0.72 -4.43 117 1593 2012-10-04 14:01:12 2010-12-02 17:48:36 1 20 366 0 326 1064 32 50.00 44 29.09 CHANGED p+L.shp-plllsLhaLRpshohppLuhtFsl.upoTss+hlpphhshLttphst .........+L.shED.LhhTLpYlR-Y+TY.cpIAscF.G.I.p....ESslhRtspaVEssLspsu..h.......... 0 161 288 317 +13456 PF13614 AAA_31 AAA domain Bateman A agb Jackhmmer:C4FZU1 Domain This family includes a wide variety of AAA domains including some that have lost essential nucleotide binding residues in the P-loop. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.60 0.71 -4.29 61 7392 2012-10-05 12:31:09 2010-12-03 11:32:26 1 25 3411 29 2053 16785 5368 138.30 24 29.84 CHANGED cllulhSshsphGposhAlsluphLuppt.t.........VLhlsh-thsu.tthh.ptttp.................sls-lLhhht....t.....plsshlh..ph....sshshlsshpssp-hptlstpchtpLlpplpp..tY-..hlllDlushhpthhs...lLphscplhlssppsshutpp .....................................hlhlsSs.p.sG..G.Koo.lusNL.A....hsl...Aptut+.........................Vll..l..D.u..D.....h.....p....p....s......s..h.......s......p.......h....h...s...h.ssp.........................................................Gl..s....p....h.l....ts.t......................................................p..h.p..p.h.h........th.....................sh.h......l......h......s.........s........s......................s.........s.........s......s.........s.........p.............h......l..........t...........s...........p.........p..........h.............t.............p............l.........l......p..........hpp...............pa-............hlllD....s.s.s.........h..h........s.s....s....s.......h....l...h.p.....h...s...ss.h.lhVsp.t......pt.................................................................................................................................................. 0 671 1329 1734 +13457 PF13615 Racemase_4 Putative alanine racemase Coggill P pcc PF09739_manual Domain This is a family of eukaryotic proteins which are putatively alanine racemase. 22.40 22.40 23.00 25.70 21.80 20.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.26 0.72 -3.77 39 179 2012-10-03 05:58:16 2010-12-03 16:56:07 1 7 148 0 127 188 5 102.40 37 17.07 CHANGED lsaL.sp.sLt.GDsLAAEalLLaLlSp......VhsR................ss.shs.lGphoLNL...sshstps....................hsppLtphlppllPtsthlslolpsLNs...hphs.P++Dhpss+LhoGlLQLu .............h.taLop.sLh.GDpLAAEYLlLHLlSp......VasR................p-..shs.lG+hoLNl...oshspss.................................................................sasppLhpllppLlPtohhlshTl-shNp...hphh.P+......KDYpsN+....LhoGlLQLs.......... 0 58 76 106 +13458 PF13616 Rotamase_3 PPIC-type PPIASE domain Coggill P pcc Jackhmmer:B3CTU8 Domain Rotamases increase the rate of protein folding by catalysing the interconversion of cis-proline and trans-proline. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.55 0.71 -3.83 68 3691 2012-10-02 13:30:10 2010-12-03 17:11:49 1 35 2370 6 991 7167 3075 115.00 30 30.29 CHANGED hplhKllsppp...................hsD.SlpspHIhls.................spsttcA.+ppADSIhsslp.sGu.cFssLA+caStD.t..ttpGG......-lsWhs.....tu......phstpFtsslhss.psucl.t.slcoshGhHIlp...Vh-++ ......................................................................................................ttt..............................tp..phpspHILlp..s...................................................c......pp....A....cp.l....h....p....c......L..........c....s........G..........s......c.........Fu....pLA+p.h..S..p..D..s...u......s......t..p..p..G...G...................-L.G...a.hs............tu.................phs.s.t.F.c..c.A.shsL....c...............h.....G.....p....l......o....s.......P......V......+.....o....p.....a....G......aH..II+lp-h....................................... 0 308 613 823 +13459 PF13617 Lipoprotein_19 YnbE-like lipoprotein Bateman A agb Jackhmmer:B8H670 Family This family includes lipoproteins similar to E. coli YnbE Swiss:P64448. Protein in this family are typically 60 amino acids in length and contain an N-terminal lipid attachment site, which has been included in the alignment to increase sensitivity. The specific function of these proteins is unknown. 22.60 22.60 25.70 25.10 20.60 19.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.87 0.72 -4.34 77 733 2010-12-04 11:44:36 2010-12-04 11:44:36 1 2 727 0 121 293 18 58.00 58 90.54 CHANGED hhhhhshhsshhlu.uCo.......Pplclp.sPc..cPIsINhNVKI-HEIclKVD+-l-sLlpspssL ..........hhlsuhhuohhLs..GCT.......PRIEVA.APc..EPITINMNVKIEHEIhIKsDKDVE-LLcoRSDL.............. 0 26 57 89 +13460 PF13618 Gluconate_2-dh3 Gluconate 2-dehydrogenase subunit 3 Bateman A agb Jackhmmer:B8GVK2 Family This family corresponds to subunit 3 of the Gluconate 2-dehydrogenase enzyme that catalyses the conversion of gluconate to 2-dehydro-D-gluconate [1] EC:1.1.99.3. 23.40 23.40 23.40 23.40 22.60 23.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.74 0.71 -4.00 170 695 2010-12-04 12:21:38 2010-12-04 12:21:38 1 6 483 0 300 712 280 143.80 22 62.23 CHANGED sspch.phlssls-tll.....Pps.ch...PuAt.....t......................................ssls.tFl-phlss............................spp......................................p..phhptGlst....l-phspppa..sps.FssLssppp.stlLpthptsph.........................................t..........stF....F.phlpshslpGaas.s.h......atGsps.hht.a................hPG ................................................................tpchshlpuhscpll.....Pp-..c......sGAh.....p....................................ssVs..tFIDpplss..........................s....t................................p.phaptGlss....l-phupppa.....scs.FspLs.s.t.pp.-plLpshppsph.................................................t.th..sphFF.shlhp.shpGaas..sPh..............asGsps.hhu.a........hP......................................................... 0 78 182 257 +13461 PF13619 KTSC KTSC domain Bateman A agb Jackhmmer:B8GXL8 Domain This short domain is named after Lysine tRNA synthetase C-terminal domain. It is found at the C-terminus of some Lysyl tRNA synthetases as well as a single domain in bacterial proteins. The domain is about 60 amino acids in length and contains a reasonably conserved YXY motif in the centre of the sequence. The function of this domain is unknown but it could be an RNA binding domain. 21.40 21.40 21.40 21.50 21.00 21.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.82 0.72 -4.33 89 426 2010-12-04 12:39:23 2010-12-04 12:39:23 1 7 377 \N 139 373 132 59.70 28 54.38 CHANGED lsS..SslpuluY....DspsptLplpF.....psGs....h.YpYhsVPtplapshhs..As.......ShGpaasppI.+....sp.Ys ............S.otltulsY....DspsphLclpF.....psGs......h.YpYhsVPtplapshhp....us.......StGpaapphI+.....sp.a.................... 0 31 81 109 +13462 PF13620 CarboxypepD_reg Carboxypeptidase regulatory-like domain Bateman A agb Jackhmmer:B8H1B8 Domain \N 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.92 0.72 -3.92 622 6052 2012-10-02 19:08:27 2010-12-04 12:41:19 1 501 1121 14 3219 18992 4276 80.30 21 11.43 CHANGED s..lpGpV...p.D.s...s.G..t...sl......ssAsVp...l...pt............t.....s...........t......s...ts......s...h.T..s.ss.Gpapls..tl...s.s.....G..s..Y..p..lps.....st.G..ap..stp.h.............p..ltl.p.s...s..p.....s..t.s...l...s...l...s......L...p ...............................................lpGpV.........h..-.s.....s..G.....p..........s.l.........s.sAs..Vp.........l......t....................t.......s......................................h.......s.....ts................s....tT......s.....s.s..Gpa..p...ht......t..l..........ss.......G.....s....Y...p..l.p.s...........s.t...s...G......at....stp.h...................................ltlt......s..t...............h..t.h............................................................. 0 1542 2401 2868 +13463 PF13621 Cupin_8 Cupin-like domain Bateman A agb Jackhmmer:B8H2W4 Domain This cupin like domain shares similarity to the JmjC domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.79 0.70 -4.88 90 2241 2012-10-10 13:59:34 2010-12-04 13:30:07 1 98 579 36 1539 2844 306 243.80 17 46.84 CHANGED spph...pchlspspPlll+u..hs..p..c..WP...shpt..Wpp............hcYLtpt.hss.htVp.....................sthts.t.t.sth......................................................................h.tt....s..hp.hhs..h.p........ttphshpch..................................lsp.lpptt..........t...................................sh.Yl.................t.....sss...l..sp.ph.st..l.t.......................p.....c...........psls...h.hs...thh......s..t.............................tss.........................................................lWlG..................................stp..s...p...oshHaDh......h-....................NlhshlpGcKcFhLhPPpphs............pL........................Yh.s............................shp......................ts...s..........................................................................................................................................h.....Sh...............lD.h.p................................................psDh..p.........caP..phpps.........ps.hhspLpsGD............sLalPuhWaHcV.............................cu.......h...............ss....hslulNa.Waptt..st .................................................................................................................................................................................................................................................................................................................................................................h.........h....tpPhll.ps..h.h....t.....t......hs.........s.hp.t..Wp.............................phh.ph...h.s.p.h...l..........................................................h.t...t.....................................................................................................................................................................t...t..................thph.sh....ppa.....................................................................................................................lph....h....pp....tt.....................t.............................................................hhhh.....................t......ppth........p...th.......s.t.l.h...................................................................................................t..c...............ths..........h..t....th.h.........s...pt......t..................................................................ths.................................................................................................................................................................................................ha.h..G.....................................................sts.....s...h...osh..Hh..D............h.p...............................s..h..h........s....l......p.........Gp..K...phhLa.s.P...p.p....t...t.....t.l.........................................a.t.........................................................s................................................................................................................................................................................................................................................................................................................................................................................................................h.s.......l.s..h.t..................................................p.sh....p.........pa.P.....th.tps.............ps..h.cs...hl..p.sG..-..................................hLal.....P..stW.aHt.V...........................................................................................................................psh..........................t......slulsh..a......h...................................................................................................................................................................................................................... 0 639 928 1286 +13464 PF13622 4HBT_3 Thioesterase-like superfamily Bateman A agb Jackhmmer:B8GZN3 Domain This family contains a wide variety of enzymes, principally thioesterases. These enzymes are part of the Hotdog fold superfamily [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.85 0.70 -4.64 200 2392 2012-10-02 20:54:35 2010-12-04 15:19:22 1 22 1107 8 1028 4235 1639 249.30 20 84.50 CHANGED hsssWt............................sthsaGG.hss...ullhpAhppt...............tss.....................hthhplsssalt.ssssu.shpl.....pscslR.........sGRshshlpspl..hp.......sut.....................s..sspupsshhpsp.t..............................................sshP.sP........spp........h......................................h-h+hh......................................hsssss..hphWhRhp.ssh...........................................t.........lhhlsDsh..sshshthhs.........................................hss.slsh.....olp...............h.......................t....phP.t..sp...........Wlhhcs.....psphsssGh..uttpsplaD.................pp.G..hlupupQsshl ..............................................................................................................h..............................ttsaG..G..lhu............p.s....l....hs.Atcss..........................stt...........h.spSl.p.s.hF..lt..su..s.....s..s..P..l.hh.........pVcpl.R..............cG.+o..hsstp...Vps...h.Q.............pGc..........................................s..lhpspso.Ftt...tc.ps.................................................................t...sshP.................s.P-p....h...s..tt.....h.............h................................................................h-hR.hst...............................................sststts...h...thWhRsp..ssh.ss.........t.................................ph......t...........LshhuDhh.........sssh.h.hhtth........................................................................hhhs.....ol-h.....sha................................................h.ctP..hph..sc.........Wlhhpt.............pos.u.s.su..h..uhspuplas............................ps.G.pLlAsstQpuh................................................................................................................ 0 253 600 877 +13465 PF13623 SurA_N_2 SurA N-terminal domain Coggill P pcc Jackhmmer:B3CTU8 Domain This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.84 0.71 -4.21 6 236 2012-10-02 13:36:56 2010-12-06 09:20:05 1 9 235 0 66 1450 838 139.80 32 20.93 CHANGED LGKIRS..KGslLlhlIGLuLFAFlA.psh.RSCpus+sppRpQVGEVhGEKISlQDFQKhl-EYppsIK.hp.tp.s.....ppE..pQVKDtVWpphVsN+LlEt-AK+lGLTVT-pElQsVLptGsNPhLhQT........PFV.pQ..TGRFDssuLKpFl ......................................................................................LppIRs..K.us..l..L..l.l..l.I..G...lA..LF..AFlh....u.sh....hp.s.....t...s.........t....t...ps..p.p.....V.G.c.Vs..G-.p.Io....h....p-....a....p...p....h....V....-..p........h...s..t..h......+....hp..t.Gt.....ss......................hsp..p............s.......Q..l..+..cpV....WpphVpptll.pp-sc+LGlsV..octE.l.p.s.ll.....t........s..s.s..P...hl.hps..........Phh...ss.psGtFDtstLppal................................................ 0 27 54 61 +13466 PF13624 SurA_N_3 SurA N-terminal domain Coggill P pcc Jackhmmer:O66854 Domain This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.88 0.71 -4.24 9 2783 2012-10-02 13:36:56 2010-12-06 10:42:40 1 22 2338 1 800 3368 1934 155.70 23 29.90 CHANGED MhchIR+..p..s....c..hhtslhsllshhFhLh..uhtuh.pp.h.upp.ssVApVsGpsIphp-ap+.....chc....h......hpp.hp......sp..........tpch.....lppt.....lLcshIs+cLLhppAp+hsltVoDppVsctIpp.PtFQ..s.GhFstphYpphLtpsshostpaEp.l+cplhlp+ht ...............................................hpt........t...................hh.h..h..h...h.h..l..l.h.l.s.F..h...h.s............G...l..s.....uh.......h........h..............t.....s.......ss....s.......h....s...A....p....V.s...s...pc...I.op..s...c...acp......................t.hp............p......................p.p.p.hp.................p.p.h...........................t.t.pt....................l.+pp..................l.LspLIs.ct....L.L....t.p...h......A.........cc..h...s.l..s...l....S...........-......p....p...l......c.........p........t.........l.........h........p........h.........s........t.......F.......p......s.....s.......G.........p.F..s..p...p..p..a....p.t...h....L....p...p..................u..h..o.....s.....c......p.at......ptl....+pplhhpp..h.................................................................. 0 290 539 681 +13467 PF13625 Helicase_C_3 Helicase conserved C-terminal domain Coggill P pcc Jackhmmer:B0SDT2 Domain This domain family is found in a wide variety of helicases and helicase-related proteins. 32.50 32.50 32.50 32.50 32.40 32.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.37 0.71 -4.38 93 1240 2010-12-06 10:42:54 2010-12-06 10:42:54 1 14 735 0 510 1049 240 129.40 30 19.02 CHANGED sllVQuDhTlll.s.ups.s.sp.hs.c.tlushA-l.E.Ss.ttspsYRlTshSlhpAhsAGhsA-pllshLppaS...+hsVPQsLhhhlsDsspRaGpL+l.....tt.....s..u...hhlcssD.slLs.clhtstph.pslhhcc.l..uPssl ...........................lllQu.D.t.T.l.lL-s.s.p..t.sc.tA.c.tLsshAEh.c..ps..pplHsYRlTshSLasAhusGhss-pllshLtca...S.......+h.s.V.Pp..u......Lh.h.......hIs-..ssp.caG+l+l.....tp..................sthhlcssD.slLppl......Lpsptltshhht.p.ls.t................................. 0 192 352 454 +13469 PF13627 LPAM_2 Prokaryotic lipoprotein-attachment site Coggill P pcc Jackhmmer:C6QHI9 Motif In prokaryotes, membrane lipoproteins are synthesized with a precursor signal peptide, which is cleaved by a specific lipoprotein signal peptidase (signal peptidase II). The peptidase recognizes a conserved sequence and cuts upstream of a cysteine residue to which a glyceride-fatty acid lipid is attached [1]. 27.00 27.00 27.00 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.18 0.72 -4.14 142 1185 2010-12-06 13:22:07 2010-12-06 13:22:07 1 2 1178 0 174 520 13 24.30 50 36.96 CHANGED hhlhhhhs..shsL.uGCGpKGsL.....Yh...P ........hlhllls....hhuL.oGCGhKGPLYhP.............. 0 30 73 125 +13470 PF13628 DUF4142 Domain of unknown function (DUF4142) Coggill P pcc Jackhmmer:C6QAI3 Domain This is a bacterial family of unknown function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.45 0.71 -4.15 144 804 2012-10-01 21:25:29 2010-12-06 14:02:49 1 5 412 0 357 1141 171 133.60 25 68.88 CHANGED ss.Dpp.FlppAutushhElpuu+LAh.p+u.psspVKsFAppMlpDHspsspcLpplAp.....ptslsls......ttlstpppttl....spLpsh....sG.tsFDcs.Yhspt.lssHccsl...slhcp.h..sss......upsscLKsaApp.sL..PslppHLptAcpL .....................................................t.t-tpFlppsstushhEl.puuclAh.p+.u.pss.p...........V+sFAppMlpDH....sp....ssppl.t.p.l..Ap........................p.t..slp.ls.......................tths.sp...tpshl....................spL..p.sh........pG.tsFDps.Ylptt.lssHpcsl...shhpp..h..tts......ucsspL+shApp.sl..PslppHlptActl................................ 0 95 200 267 +13471 PF13629 T2SS-T3SS_pil_N Pilus formation protein N terminal region Coggill P pcc Jackhmmer:C6QEC4 Domain \N 21.50 21.50 21.50 21.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.18 0.72 -4.45 188 1023 2012-10-03 16:25:20 2010-12-06 15:01:29 1 19 613 0 374 1013 88 71.40 24 16.39 CHANGED sspslplshupupllph.spshpp..VhVusPplADspl.h.......ssp.....plhlhGKpsGsTslhla..spsup...lhshpltVs .......................s..ttlplshupup..hlpl..s.p...s..hpp.......lhlu..sPplA.Ds.pl.h................ssp......pl.hlhG+.phGsTslhlh.....spsup..htshsltV........................ 0 99 193 279 +13472 PF13630 SdpI SdpI/YhfL protein family Bateman A agb Jackhmmer:C7PMJ0 Family This family of proteins includes the SdpI and YhfL proteins from B. subtilis. The SdpI protein is a multipass integral membrane protein that protects toxin-producing cells from being killed. Killing is mediated by the exported toxic protein SdpC an extracellular protein that induces the synthesis of an immunity protein [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.65 0.72 -4.27 205 1094 2010-12-06 15:14:03 2010-12-06 15:14:03 1 8 814 0 214 904 57 75.20 21 46.31 CHANGED sphc.Nh.hhGlRTsho..........hps-csWcpsp+.......huG..hlhh....hsGll..hllhuhh................thhhhlhhhhhllshlls..hhhuh ...........................hc.Nh.hhGhR.osho..........hpsccsWctsp+..............huu....hlhh.......hsGll...hllhuhh...hh...............shhhhhhhhhhllh.hlhshh.............................................. 0 82 154 180 +13473 PF13631 Cytochrom_B_N_2 Cytochrome b(N-terminal)/b6/petB Coggill P pcc Jackhmmer:C1Z9G2 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.20 0.71 -4.46 68 102102 2012-10-03 10:28:09 2010-12-06 16:15:23 1 28 38793 95 787 97053 2443 142.10 66 51.21 CHANGED .hhhlshhlHhhRshahGua+.ts+.phsWhlGlhLLlLshhpuFhGYsLPhs.hShhuhpl.hs.slhpu......lPhlG...shlh.hlaG.........G.hssss...hlsR.hash.Hl.llPhlllulhhhHlhl..l..pppspts............s..h...tp...ps.......sh..........st............hsaaPhashKs..h...hhhh.h...h...lhllh..........sls...........hlsP.....hh.hhsP .......................................................ShFFIClYlHIG.R..GlY.YGSYh.a......pE.T..WNlGllLLh..hl..MuTAFhGYVLP.W.GQM.S.FWG.A..TV.IT.NLLSA....................lP..Y.lG..............ssL.Vp.WlWG....................G.F.S..V.D..s.A..........TLTR..FF.u..h.HF.lL..P..Fl.........IsuhshlHLlF..L...H.E.T.GSNN.....................................Ph......Gl...sS................ss...............DK...........................................IPFH....PYa.oh.KDl.....LGhh..l.h.....lhhLh...................Ls...........LFsPslLGDP...................................................................................................... 0 232 504 660 +13474 PF13632 Glyco_trans_2_3 Glycosyl transferase family group 2 Coggill P pcc Jackhmmer:C6QI89 Domain Members of this family of prokaryotic proteins include putative glucosyltransferases, which are involved in bacterial capsule biosynthesis [1][2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.46 0.71 -4.59 101 2553 2012-10-03 05:28:31 2010-12-06 16:25:28 1 20 1785 0 422 13191 1466 214.50 30 54.65 CHANGED hllllDuDsh.l....sssplphh....st.h.ht..p..s......c.hs...hlQ...t...hh..s....h......s.h..p..sh..h.pphhshh.......h....sp.thhpthhhtt.shst.sss.......hs..Gssshh.chpu..l.p................cls..............has...st........slu.EDhchuh+L..h.ttG.....hph....has...............................st.s.......th.pct...ss..o.......................hts.hhp..........Q.ptR..WhhG......................................hh.........................................hh....h..........t.h.h.....tph..h...h...th...h.h................hth...h........hh.....hl.t......s..h...h.h......................hhhhh.....hh.h............h.....lhhhh..hh ...........................................................................................................................................................................................h....hDt.-sh.l......s.........l..h......................h....ht...p.....................................hs...........hh...............h........hh.......s.......................p.......p.......ph.........h....s..h...h....h......t..........................h..............h.....h..s....p...t....h....s.......s.....p..s......p..h...G.u....V..h.s............................s.s....G...s......s.A...h.Y..RR.SA...l.h.........................................................................................pst...............................................ha+..GcsS.............c.a..G...E..D...+...H.LTI.LM......L..cA..G...........aRT...........-YV...................................................................Ps.A.........ls..uTl.....VPc...o.........................................................lts...aLR............................Q...p..L...R....W..ARoo...............................hhp....................................................................................................hh......hh...................h.h...h.........hs..l....h.....t.........sh........s.s......................................lhl.......s............lu......sl..u......u..l..s.p.....................................................hhhsh......hh...................hhshhhhhhhh...................................................................................................................................................................................... 0 153 264 364 +13475 PF13633 N_methyl_3 Prokaryotic N-terminal methylation site Coggill P pcc Jackhmmer:C6Q9T5 Motif This short motif directs methylation of the conserved phenylalanine residue. It is most often found at the N-terminus of pilins and other proteins involved in secretion, see Pfam:PF00114, Pfam:PF05946, Pfam:PF02501 and Pfam:PF07596. It is often described as TypeIV_pilin_GFxxxE. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.19 0.74 -7.04 0.74 -3.62 363 98 2012-10-03 10:38:27 2010-12-06 17:09:35 1 5 70 0 17 1401 206 22.00 46 13.90 CHANGED aoLlElllulsllu.lshsshhs .FoLIElhIuhsIlulhAhshh..... 0 3 7 14 +13476 PF13634 Nucleoporin_FG Nucleoporin FG repeat region Bateman A agb Jackhmmer:Q5A223 Family This family includes a number of FG repeats that are found in nucleoporin proteins. This family includes the yeast nucleoporins Nup116, Nup100, Nup49, Nup57 and Nup 145. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.48 0.71 -12.53 0.71 -3.85 444 1433 2010-12-07 09:26:20 2010-12-07 09:26:20 1 64 197 0 1107 1448 4 103.60 29 24.15 CHANGED s......sss..........s.........................sss....s.........u.....uhFG........s........ss.........s.........s...........s..................s.uh...F..Gs.........s..s..........................s.s...s.......ss........uulFG.......s.........s...s......t......................s..................ss.u........u..uLFGs......ss...........t...............t..s..s..............s..s........u..uL.....FG.....................s....s..s...t.................t..s......s.....s...s.uul..FGs....s.s....ss.......................................................................s.......sssuulFG ...................................................s............tssulFG...............s........pss..............s....s...........................uuLFGs.........s.ss..............t.....ss.s........su.........GGLFG...s......s..s...s.......................ts.............ssu......G..GLF.Gs....ss........s.................t..s...s........s.u........G.GL...FG.........................s...s.s.........t........p.s........ss......uuGL...FGs....s..s...ssp................................................................s................................................................................................................................................. 0 342 639 994 +13477 PF13635 DUF4143 Domain of unknown function (DUF4143) Coggill P pcc Jackhmmer:B1L4V1 Domain This domain is almost always found C-terminal to an ATPase core family. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null --hand HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.61 0.72 -3.81 579 2818 2012-10-11 20:44:47 2010-12-07 09:58:11 1 13 1059 0 687 2443 408 200.50 18 50.27 CHANGED lpcD..lhp....h...ht...........hpsht..thpplhphlstphup.hshpplupthpssptsstp...............hhphhhthhhhhhh.shtspttpphtps.KlYhhDsGLhss.hhsh.........hh...............s..hGtlhEshVhpcLh...........pp.............t.laa....ac..................sp..st.....t..ElDh....ll.p.........ts.....ph......hslEVKtu ....................................................................................................................................................................................................................................................................h...Dh.t...t.......thtphhphhhsphsp.hshpphsphht.tttph.t..........................................................................hhphh.tshlhhhh..hphptppthtpt.KhYhhDsGLhss.hhshp.................................s..hGtlhEshVhp-Lh.....pp.....................thp..laa..a+..............................sp.....ss..........t...ElDall..p..p.......ss.....phhslEVK........................... 0 283 483 591 +13478 PF13636 Nol1_Nop2_Fmu_2 pre-rRNA processing and ribosome biogenesis Coggill P pcc Jackhmmer:B1L5U4 Family This family represents an evolutionarily conserved sequence motif of a set of proteins that are involved in pre-rRNA processing and ribosome biogenesis in S. cerevisiae. 23.90 23.90 24.10 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.25 0.72 -3.94 150 1049 2012-10-10 17:06:42 2010-12-07 11:38:25 1 3 1010 7 195 1177 35 103.90 35 24.07 CHANGED lah.h.....th.t.s...h.pth.......+lhR.GlplGc..h....+Kp+acPoaslAh..s..............ltppph.pp..s...l-..L....sp..-....pht..pYlpG-sl......................pl...s..p............p...t.p..G.alllshcuh....slGaGKh..sss.plKNhaPKGL ...................h..hh...t.h.ss..l..spL....+lhRsGLcLGph......KK...pR...FEPSaALuh..u..........................Lp.s.s....p...s..pp..s...l-.l........sp..-......phhcYlpGEsl........................................pls.p............s..s.pG...ahl.lsh.c....Gh....................sLGFuKh....sG.s..hlKNhaPKGL.................. 0 71 130 162 +13479 PF13637 Ank_4 Ankyrin repeats (many copies) Coggill P pcc Jackhmmer:C6QI42 Domain \N 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.74 0.72 -3.35 86 7425 2012-10-02 12:10:21 2010-12-07 11:59:11 1 2308 729 31 4885 95319 6198 53.50 28 8.01 CHANGED thss..lphAsh.......psp..hph...lc.hll.....p..p..s..hs........hsth.....s..tt....st..s...slph.As...tts.....p....h.phlphL.l ..........................................................................................os.LahAut.................pGp.......hc.l........lc...hLl..................p....p....G......us.........................................lstp.........s.......pp........................Gt........T............sLah..As..........tpG.............p...........h.phlphLl................................................ 0 2429 3032 3970 +13480 PF13638 PIN_4 PIN domain Bateman A agb Jackhmmer:Q5AFK9 Domain Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases). 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.84 0.71 -4.05 147 2263 2012-10-03 20:43:45 2010-12-07 14:09:18 1 39 1676 9 998 2073 447 140.90 26 23.81 CHANGED aVLDTNVLl..cc.pslhp..h.............................pcpp..llIPhsVl-ELDphKpppp.......................plsphs..c...............ps.h....p...hlppt........hptt..........................t..sp.l........ts.....pph.......................pp.pl.....t...........................p............p..sDsp..ILssshthppph..........................tppl..lLlocDhNLRlKAps.hGltup........s.ap .................................................................hVLDTNVLL.....p.c.....s..p..s..l.h.p.h...............................................................pcpc...V.llPhsVlcE....LDthK+ttp........................................................clupp.A.R...............p.s.h..+...hl-ph.....htpt.......................................s.tph..........hh.ppt.......................ptph....p............................................p.......tpsDs....p......ILsssht..lppch...................................................................................tppV..lLVoc.DhshRlKApu.hGl...spph........................................................................................................................................... 0 359 614 836 +13481 PF13639 zf-RING_2 Ring finger domain Bateman A agb Jackhmmer:Q5A9Y7 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.26 0.72 -4.13 329 15070 2012-10-03 15:03:13 2010-12-07 14:28:22 1 495 529 19 10132 21355 1292 45.90 34 10.07 CHANGED tpCsIChpph..................t.ppsh.....tl......s..C.........s....H.h.FH....tp....C...lpp....a...........h..............p...p......p.......t.....................pC..Ph.CR ....................................pCsIChpph..........t........................tcpht..........hL...................s...C.......................s...H....h...FH....tp........C....lpp......W............................l............................p..p.......p.......p.....................................................sC..Pl.CR................................................ 0 3177 5561 7989 +13482 PF13640 2OG-FeII_Oxy_3 2OG-Fe(II) oxygenase superfamily Bateman A agb Jackhmmer:B8H030 Domain This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.84 0.72 -3.49 172 3739 2012-10-10 13:59:34 2010-12-07 14:54:39 1 75 1689 20 1686 4120 5084 99.90 24 31.62 CHANGED htlphh.psGshhs................hHhDs............................sp+tloh...lha.Ls.sp..................pGGplphhst.........................hssh.ttlt...............PphG.plllFpu..........ppshHtVtss........tst..pRhslssaht .............................................h..h.pY..tsG.p..tat.................................hHh.Dsh.....................................hsp+pl.oh...llY..Lstsp....................................cGGchhhhs..............................................................................tt...tplp.........................PttG..pl..llF.u................psh.HpVpsV..............tpupRhuhshWh..................................................... 0 587 994 1366 +13483 PF13641 Glyco_tranf_2_3 Glycosyltransferase like family 2 Coggill P pcc Jackhmmer:C6QF57 Domain Members of this family of prokaryotic proteins include putative glucosyltransferase, which are involved in bacterial capsule biosynthesis [1][2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.59 0.70 -4.56 37 8712 2012-10-03 05:28:31 2010-12-07 15:09:08 1 130 3503 4 2974 39049 8776 233.50 18 43.38 CHANGED Pp..lsllls.shppss.....hltc...slp..shl..s....t..a....sp..hclhlhs..ps......ss....pts.h....p...h.hp....thttths..sht..hp.hl.t...s...p....G.tsKstsl.tpshp..t...........h..p.........sc......hlslhDuDshl.ssss.ltthhshh.........tp.......phshlp.s.h..hs...p..s...tp......sh.......................hs.h.....s.......h.chstpph..thhhhp.....pt.....h..s..h.s.s.h..s....G.ss.hhhcpssl..pcht.............t......hs.............hhhs..-Dh..thshpltp...tGhpst..h..h.....s.tsh...............shph......ts......ps....hts.hh+pptRWhhu ....................................................................................................................................................................................lslhl.s..sa..s..Et...............sl.tp........slp.......ul..h......t.........s...a..............sp........hclh.l.l.s.....Ds............................s.s.............-..p..s..h.........................p..........h...h....p............................t...h..........t....t.......h.s......t.h....................h.p....h....l.t.t..........................s.............p........s.................s..K......u.....t..s...l....st..u.h.p.....t..........................................................................s..p................................s-............hl..h.lh..........D.u..D....sh....h...s.....s....s...h...l.t.p..h.l..s.hh.............tss............................pl..u.....h.....Vp...s....h.......th.............t.....s.......ts................sh.....................................................................h.s.p.h.........tp...............h...p...h.....t.....h....h...h..t........h..h...t....t...s.p........................p.t.........................h......s......s.....s....h....h........s.............G...s..........s....h.......h...h....R..+.....s..s..l.......c.p..l.G.................................................................................u......ass.............ts.hs...ED.h......clu..h.+..l...tp........t......G..a......p.....hh......a.......h...........s...ps.h.........................................s.h.t................hs..............ts..........htt...hhtpp.RW............................................................................................................................................................................................................................ 0 902 1889 2484 +13484 PF13642 DUF4144 protein structure with unknown function Serrano P, Geralt M, Mohanty B, Horst R, Wuthrich K kellrott Shewanella amazonensis SB2B PDB:2L6O Family A family based on the three-dimensional structure of YP_926445.1 (PDB:2L6O) 27.00 27.00 28.80 28.40 26.40 26.40 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.43 0.72 -3.68 25 104 2010-12-07 17:52:53 2010-12-07 17:52:53 1 2 92 1 28 77 3 100.70 41 86.07 CHANGED WPulLKhsGcDELlYLsSpp-a.pEs..pphhhsssDhLIDSpGpsYtlpp................ssptssLhhpsphlslpclhpLlpsHshtpupsCssKltFsolpQul-hls ................................................WPslLKL-GDDELlYLsScsDhpsEs...shIhsssDplI..DSpG.sYsl.s................ssstssLhtpspplSl-EsocLIQpHEFsLApsClsKIpFpTlspAhpsL... 0 6 10 19 +13485 PF13643 DUF4145 Domain of unknown function (DUF4145) Bateman A agb Jackhmmer:C7Q403 Domain This domain is found in a variety of restriction endonuclease enzymes. The exact function of this domain is uncertain. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.09 0.72 -4.13 91 970 2012-10-01 22:14:54 2010-12-08 10:52:41 1 24 854 0 191 795 46 92.90 23 13.40 CHANGED pcAt.phh.t.s.s.....p.uuu.....shhRpsLEtlhcchs........t.....psp.....................s..........L.........tpcIsplhpps......stpl..tchh.....................cs...lRhlGNpusH...s.....p.........p.lsp.........p.D....stphhc.hlc .............................h..Acphht.s.s.s.p.sss..hhhRpslEthl+hlh.........p.....psp.....................s..........L....pshlpp...hshpsh.....lsspltp.th.....................chl+phGNpAsH....s.....t............chst.........c-....u.hhhp...h............................. 0 57 112 150 +13486 PF13644 DKNYY DKNYY family Bateman A agb Jackhmmer:C2M6R1 Family This family represents a group of proteins found enriched in fusobacteria. These proteins contain many repeats of a DKNXXYY motif. The repeats are spaced at about 35 amino acid residues intervals. These proteins are likely to be associated with the membrane. The specific function of these proteins is unknown. 22.50 22.50 22.50 22.60 22.40 22.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -12.04 0.71 -3.98 31 664 2010-12-08 11:29:52 2010-12-08 11:29:52 1 11 134 0 233 689 10 122.90 17 66.51 CHANGED asKD+splYYhs.......c.....clcs......sDs........poFch.......ls.......c......p..a....spDKsslYh....tspp..l.........shs..spsh...chl..............spp.....hhtp...stsphh..............hhp..hpp.............t..p...............hpsls.tYht.DKpplYa........t....t..phchlc.......shDhp.........ohc.ls...sh................aspDcsslYa....ts...........p.............plpssDs..soFchl...............s.t..hah+DKsp.lYh..t.s .......................................................................................................................l.t..s..........oh............ht....................h...........hDp.thYh.....t....h............................................................................................................................................................................................................t......a..........................................p............th..t.t......sh...p.h.................................................hhpDtpplYa.....t..................................p...................................l..ths...tsht.l..................t......h....hDt........t.................................................................................................. 0 78 218 226 +13487 PF13645 YkuD_2 L,D-transpeptidase catalytic domain Bateman A agb Jackhmmer:C2M5S1 Domain This family is related to Pfam:PF03734. 27.00 27.00 27.00 32.50 26.40 24.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.99 0.71 -5.04 56 379 2012-10-02 23:30:06 2010-12-08 12:37:36 1 3 278 0 115 362 40 172.30 39 70.48 CHANGED applph..p.spl.shpshppAlpuapplppp..h...ppslLTlIDaopPSsp+RhaVlDlppp+lLapohVuHG+NSG......p.ph...A......spFSNpssShpSSLGaYhTspTYpG+pGaSL+..LpGLEcGhNDNAppRuIVlHGAsYsspsaI+ppGRLGRShGCPAlP.plscplIcsI...KsGoslFlY ............................h.......tstl.shpshppAh.puhp.phppp.......ppslLslIDaSpPSsccRhaVhDlcpc+lLapshVuHG+......sSG......p.ph...A......spFSNpssShpSSLGhahTt.ps.Yt..G.cpGhS.L+..LpGLEpGhNDpAccRsIVlHGAsYss......s..hl........p.p.h......G.+LG.RShGCPAlstp.lsc...p...lIstl...KsG...s...llahY.............................................. 0 47 79 102 +13488 PF13646 HEAT_2 HEAT repeats Bateman A agb Jackhmmer:C2M4W3 Family This family includes multiple HEAT repeats. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.37 0.72 -3.70 323 11825 2012-10-11 20:01:04 2010-12-08 13:16:31 1 572 2216 18 5662 13686 1691 88.30 20 24.50 CHANGED h.p...hLhp.h...L..p......ss..ss...t....l.+.t.t.uh.psLu.............p.h..t..s..t............ps.............................hshLh.p.hl........p..s...s..s....s..t.....lRttAs..p..uL....u....p..h............s..........s.....s...p....s....h.....s.hLhp.......h..h.ps..s..s..s..t...h.hR..ts..s...hp...uLt ...................................................................................................hhth.l..p.......st..s....t.......l..R....t..t..As.tsLu....................................p.h...t....s..t...............ps..............................................................................................lshLh...p..hL..................p...D.....p....s.....s..t..........VRttAs..t.....uL............u......p...l.................................u.............................s.........t..p...........s.......l...........s..hLh.p.............h...h...ps...p..t...............hp.................................................................................................. 0 1805 3521 4730 +13489 PF13647 Glyco_hydro_80 Glycosyl hydrolase family 80 of chitosanase A Naumoff D, Coggill P pcc CAZY:GH80 Domain This is a small family of bacterial chitosanases. 25.00 25.00 616.50 616.40 19.30 18.00 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -11.95 0.70 -5.16 3 15 2012-10-02 14:50:22 2010-12-08 13:22:11 1 1 13 0 0 15 0 297.50 91 89.83 CHANGED AAAAAGVIPVGDSRVYGNVFDKGRKLTVNQWQAVLSMDAYPENGTTNYQDPEPWRYCEVDYEAuEGISDYRGDTFGPVGVTTVGDFPDYFKNAYAPYVLGKTGATNTDMKNWGVQVTGIAAADMKADDTRLDPYPNLSRSNSKKRAALTKICQALQSDFDNRQAQYVMSHYAHIDSDKLLPVLDALKKIGFTSFuQYNLVGLAFQVQVNTGSIGSISAFSSVKSAGNCGSMSuETCFATYLTDQYIRWLKSSSLGDDsGNCWRASMALDIYKQDPTMGNVSVVTSIINSKYPNNSGKCPTSGVKWSKN ..AAAAGVIPVGDSRVYGsVFDKGRKLTVNQWQAVLSMDAYPENGTTNYQEVGPWRYCEVDYEAAQGISDYRGsTFGPVGVTTVGDFPDYFKKAFAPYVLGKSNATNADMLAWGVQVTGVTAGNFpADDTALDPYPS+SRSDKsKRAALTKICGALQSAFDTQQDKYVMSHYAHIDQDKLVPVLNALKGIGFTAFDRYNLVGLAFQVQVNTGSIGSISAFSSVKSAGNCGSLSAETCFATYLTDQYIRWLKSSSLGDDPDNCWRASMALDIYKKDPTMGSVSVVNQVINASYPGNSGKCPTSGIKWSKN. 0 0 0 0 +13490 PF13648 Lipocalin_4 Lipocalin-like domain Bateman A agb Jackhmmer:C2M763 Domain \N 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.97 0.72 -3.41 116 502 2012-10-03 08:47:39 2010-12-08 16:03:31 1 7 182 0 164 533 83 115.20 14 70.75 CHANGED hhsu.Cus-..-c.....ps.............pss............................lh...G....s.Wpls....p..h.s........tts.t..............................t...ts.h.........sps.ptshhpap..s..s........s.hstt..php......ss.......t.th.t..t..............tpsssasls..ss......pl.slph..............pss..t..t...tp.......hp..hhp......l....s..sspLsh ................................hhuCssc..cs.....ps.............pss............................lh...G....s.Wpls...ph.s.........httt...........................................p..tth.........sps...ptsthpFp..s...s............s..hstt...php......ss.........h..t..t..................................ppstsaslp..ss......pl.phph....................t.p.s..t.................h..hhp......h..s..tpph...................................................... 0 68 136 164 +13491 PF13649 Methyltransf_25 Methyltransferase domain Coggill P pcc Jackhmmer:B1L4J9 Domain This family appears to be a methyltransferase domain. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.54 0.72 -3.65 78 1216 2012-10-10 17:06:42 2010-12-08 16:37:45 1 39 965 11 462 43237 12860 101.60 19 35.92 CHANGED lL-luCGsG....h..h.tt....hhpt........................h....t................hphhs...lDhspctl..........................ph..sp....................cph..........h........phc.h......ht.....sch......pp.....l........s..........h........ttt..pFDhlhs..................sh.hs.........h.hsptthhphhpchsp.hh..c..s.G ............................................................................hLDl.uCG.s.G....t...h....h...th....h.h.t....................................................h.....t..................tph..hs......lDl......s...p.......p.h..l......................................................ph....up....................................................................pph..................................pl.p...h......lh...............u.Dh......................pp.........L.............................................s............................h.....................tst.....sa.c..l....l.hh...............................................hs.t..hs..................h...h.....t..........p.....p......h......t....t..h.h.p.....ph.h.p.h.l.t.t............................................................................................................................... 0 133 259 371 +13492 PF13650 Asp_protease_2 Aspartyl protease Coggill P pcc Jackhmmer:C6QGC6 Domain This family consists of predicted aspartic proteases, typically from 180 to 230 amino acids in length, in MEROPS clan AA. This model describes the well-conserved 121-residue C-terminal region. The poorly conserved, variable length N-terminal region usually contains a predicted transmembrane helix. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -10.02 0.72 -3.48 300 2359 2012-10-02 15:32:34 2010-12-08 16:43:30 1 147 973 0 1248 4331 281 91.40 19 17.90 CHANGED l...sspl....s.G......p..s...h...phllDTG.A.o.t..s.sls....pp..hu....c.p.l.....u.l...ph..t..sh.t............ht.s..p..s...u..s.G......p..s.p.s..t.h...s.p.ls...p..........lp..l..G.s........h.p..hp.sl........p.......s.hl.hs.............s.....t.....h...s........s.......ul.......LGhsh .......................................stl....s..G......p...s..h...phllDTGA.s.s...s.hls....ps...hs....c.c.l..........u.l...ph..p..s..h......................hth.....p..s...s..s.G...........p..s..p...s....t.h...s.p.ls.....p.............................l..p..l...G..s.................hp...h.t...sh........p..........h..hlhs........................................t................p........slLGhs........................................................................... 0 480 794 1052 +13493 PF13651 EcoRI_methylase Adenine-specific methyltransferase EcoRI Bateman A agb Jackhmmer:C2M8X9 Family This methylase recognizes the double-stranded sequence GAATTC, causes specific methylation on A-3 on both strands, and protects the DNA from cleavage by the EcoRI endonuclease. 22.60 22.60 24.20 24.00 21.00 20.80 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.13 0.70 -5.36 12 164 2010-12-08 17:00:59 2010-12-08 17:00:59 1 6 134 0 31 164 89 237.20 38 88.75 CHANGED psLpcA+pu..KsDEFYTphsDIEpElpaYhpa....FcsKsVLCNCDDPhcSNFhKYFAhNFspLGLKKLIsTsYusss.........................s.......phs..........ps......csp.pshhh..-h-.t..s..psschs...............schphphLpGsGDFRSpEslcLLc-uDIVVTNPPFSLFREaluhllcYcKpFlIIGN.NAITYKElFsLI+cNKlWLGhph..GchtFtVPDtYE.....ctTchhlDEsGp+apphuNspWaTNL-ht+RH.EsL.Lh..........++Y..tc....cpYPKYDNYDAIpVs+sp-IPhDYpGlMGVPITFlcKYNP-QFEIlG.htps..t...............................h.h..ss..cpc.h....hlNG..K...phYtRILI++K .......................................p..LptApt...KpDEFYTph.-IcpEhthY...ph....FtsKhlhhsCDD........s..........Ss........FhhaF..pFtthtlK+Llsspa................................................................................................tt..h..htts.....GDFcStEshp.lhp.p.uDIllTNPPFShFcEals.Lh.ch.pKpFllluN..NulT.YpchF.LlppN+hWhGhth..u...FhlPpth.........tt.t.h....ct........G......t.hhp.tshhWhTNl-..hRp..p.l.Lh..........pph........ptY.hYDsa.puIpVs.h.tIP.Da.G.MGVPlo.Fhp+asP.pQFcllt...t........................................................................t...................ps...p.......a.Rlhlp................................................................................................... 0 6 20 24 +13494 PF13652 DUF4146 Domain of unknown function (DUF4146) Coggill P pcc JCSG_target416819 Domain This is a family of short proteins which appear to be pre-cursors. All members are from Pseudomonas spp. The function is not known. 21.80 21.80 22.40 180.80 21.20 20.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.21 0.71 -4.25 11 61 2010-12-08 17:32:18 2010-12-08 17:32:18 1 1 61 1 15 44 4 112.00 72 81.75 CHANGED ASLpEaELs+MLEKVA+ESSVGTPRAINEDILDQGYTVE.GppLlNHLSVRpuHAppMRuNPcsVRsQLGsSVCpNsGYRpLMuKGAVh+YcFTEYKTN+PVsoppFsuuDCs .AShp-FELSKMLEKVAKESSVGTPRAINEDILDQGYTVE.GNpLINHLSVRpuHAppMRuNPDsVRsQLGsSVCpNsGaRQLMu+GAVLpYpFTEYKTNpPVATppFpAusCp. 0 1 3 10 +13495 PF13653 GDPD_2 Glycerophosphoryl diester phosphodiesterase family Coggill P pcc JCSG_target396624 Domain This family also includes glycerophosphoryl diester phosphodiesterases as well as agrocinopine synthase, the similarity to GDPD has been noted [1]. This family appears to have weak but not significant matches to mammalian phospholipase C Pfam:PF00388, which suggests that this family may adopt a TIM barrel fold. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.92 0.72 -7.25 0.72 -3.66 73 257 2012-10-01 22:17:21 2010-12-08 18:04:34 1 7 110 10 44 1089 478 31.20 49 10.52 CHANGED +VhhW.os...........Dp...............s.........sh.........pphh.shGVDtlhTspss ....KVYaW.Tl...........DK.........cu.....oh........RcuL.cAGVDGlMTNaPs... 0 17 31 38 +13496 PF13654 AAA_32 AAA domain Coggill P pcc Jackhmmer:C6QHL0 Domain This family includes a wide variety of AAA domains including some that have lost essential nucleotide binding residues in the P-loop. 25.80 25.80 25.80 26.00 25.70 25.70 hmmbuild -o /dev/null HMM SEED 509 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.64 0.70 -5.84 125 1541 2012-10-05 12:31:09 2010-12-09 13:32:27 1 8 1299 2 408 1140 169 341.70 25 52.08 CHANGED Tp-L.ssh.....pt.hlGQcRAhcAlcFGlul.cppGYNlalhGtsGoG+pohlpphLpctA...cp...ssPsDWsYVtNFcsPcpPtsLpLPAGpGpphccDhcpllcpLtpslPpsF-u-cYpp++ppltcpappcppphhpplpcpApcpGhsL.hpossG....hs....hs.Plp...........-.G.........cs....lsp--a......ppL...............sccc+cplc..pphppLppclppllpp.lpph-+chp-clccLc....cclst.slsphl....p....pL+....c+Ypc...h...sclhpaLcslpcDll-Nlp.Fh...............t.tp.......tt...tpt..p...................RYpVN.l.lV.DN...s.......................pppGAPVVhEssPoapNLhGcIEatsphG.slhTDF......ohI+sGuLH+ANGGYLllcAcclLppPauW-uLKRAL+opclpIEsh.....ths........so...h......oLcPEPIPLclKVlLlGs..lYaLLtpaDs-FpcLFKVpADF-schsRss-shppaAphlushsccpsLh.h-+sAVA+l..lE..au..uR.....spc..+..LSs+hspls-LlpEAsaaAptpssphlpspaVcpAlpt+phR ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sphtpLhGp.lc..s..............s..s.h........h...l..pPGhlHpANG.GhLllshpsLLspP....h.....h....W.tLKphlppcchc.h.uh.....s.s......pslslps.sh..P..LclKlILlG-.ccthtt...hpch-P-.ht.c.hht...lh..u-F-.cplp.h.s.s.E.s.h.p.p.a.spalsp.hsp.c.p.p.L..sh.st.s.AhstLlctusR..h......spc...p..LsLp.......l..tp..hl.t-.us.hs........t..............h...p..tl..s....................................................................... 0 112 246 345 +13497 PF13655 RVT_N N-terminal domain of reverse transcriptase Coggill P pcc Jackhmmer:B3CUZ7 Domain This domain is found at the N-terminus of bacterial reverse transcriptases. 27.00 27.00 27.20 27.30 25.80 26.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.72 0.72 -3.92 152 753 2010-12-09 14:12:41 2010-12-09 14:12:41 1 5 321 0 163 711 23 79.10 44 18.14 CHANGED Wp.........sIsWpplccpVh+LQpRIhKAsppGchp+V+pLQ+lLh+..SasAKhLAVRRVTp.Np..............GK+TuGVDGhhhho....Ppp+hphhpp.....Lp ...................WpsIsWpcspppVh+LQpRIh+AsppGphtKV+pLQ+LLs+..SahA+hLAVRRVTp.Np..............GK+TuGV.DGhhh.o....spp+hphhppL................................ 0 42 115 138 +13498 PF13656 RNA_pol_L_2 RNA polymerase Rpb3/Rpb11 dimerisation domain Coggill P pcc Jackhmmer:B1L7Q7 Domain The two eukaryotic subunits Rpb3 and Rpb11 dimerise to from a platform onto which the other subunits of the RNA polymerase assemble (D/L in archaea). The prokaryotic equivalent of the Rpb3/Rpb11 platform is the alpha-alpha dimer. The dimerisation domain of the alpha subunit/Rpb3 is interrupted by an insert domain (Pfam:PF01000). Some of the alpha subunits also contain iron-sulphur binding domains (Pfam:PF00037). Rpb11 is found as a continuous domain. Members of this family include: alpha subunit from eubacteria, alpha subunits from chloroplasts, Rpb3 subunits from eukaryotes, Rpb11 subunits from eukaryotes, RpoD subunits from archaeal spp, and RpoL subunits from archaeal spp. Many of the members of this family carry only the N-terminal region of Rpb11. 27.00 27.00 27.00 28.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.35 0.72 -4.54 53 756 2012-10-02 13:35:44 2010-12-09 14:24:54 1 6 450 90 495 763 147 75.70 37 58.93 CHANGED sphphplpsEDHTLuNsLpphLhcs.scVpFsuYsl.sHPh........pschpl+lpoc..ss.........sshcslcculpclhshscplpppa .............sspFplpcED.HTLGNhLRhhLh+..........s...P........pV.F.uGYpl.PHPh.............-s+hhlRlQTp..ssh........................................ssh-AlppulpcLhs.hshlpcpF................... 0 149 262 401 +13499 PF13657 Couple_hipA HipA N-terminal domain Bateman A agb TIGRfams Domain This domain is found to the N-terminus of HipA-like proteins. It is also found in isolation in some proteins. 22.50 22.50 22.50 22.60 22.40 22.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.44 0.72 -3.70 185 1769 2010-12-09 14:38:03 2010-12-09 14:38:03 1 10 1109 8 541 1574 195 102.00 24 26.72 CHANGED LpVhlp.s....p....hlGpLp..p.s...ss....s.....hpFpYsssahs....s..uh..sl..S.l....sh......Plp............sp.................s.a..........p......s..........p.......sl.s.................aFssLLP-uh....hpc.....hl.....sp.....p..h.....phs.....sts..shsl.....L.thlG.p.-.shGAlphh ................................hht..s......p.....hlGpLs...pp.......ss..s.....hpFpYss....palt....t......uh..sl...S...l....sL......Plp..............pt.................sh..................p...s.....p......slhs........a..FsshL...PDuh......hRc.....hl.........tp...c.h..............php.........stsshsh..........L.thl.G.p.c.s.hGAlph.h.................................. 0 136 313 436 +13501 PF13659 Methyltransf_26 Methyltransferase domain Bateman A agb Jackhmmer:Q9YEM1 Domain This family contains methyltransferase domains. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.74 0.71 -4.06 167 9512 2012-10-10 17:06:42 2010-12-10 15:35:54 1 120 4267 26 2887 64631 10679 155.40 18 26.58 CHANGED ss......plL-PusGsGt.h.h..thhh.pt................................................h........sphhul.El.ssh......................................................................ssph...sp............................th.....h.t............t...t....h.............................................................................................................p....hhtsc.h..t.ph.....................................................t.....h....t.s.....s.p.............h-hlltNs.Pa...................s.....t...............t.t.........t......t...........................................................................hhp...............th.l..tps.hc.hLpsu.Gh.lshls....s.......s ...................................................................................................................................................................................................................t..pllDh.us...G....o.G.s....h..h....lhhs..pp................................................................................................................h........tsplhul.E.l.....s.s.t........................................................................................................................................................................................................................................................................................................................................s.s...p.h....Ac........................................ps......hth................s....t..h.........................................................................................................................................................................................................................................................................................................................................................................................................................t....t.......h..p........l...h...p..s..D...h.....h..ph..................................................................................................................h............h.......t..s..........s...p...........................hD....l...l...l....s...N...P...P.Y...........................................h..t.t.........................t....t..t.........t...t......p................................................................................................................................................................................h.hp.................th..h...t...t.u..h.......p...h.L...p............s...Gh..hshh...p............................................................................................................................................................................................................................................................................ 0 966 1897 2456 +13502 PF13660 DUF4147 Domain of unknown function (DUF4147) Coggill P pcc Jackhmmer:B9Z5Y2 Domain This domain is frequently found at the N-terminus of proteins carrying the glycerate kinase-like domain MOFRL, Pfam:PF05161. 23.00 23.00 24.90 24.70 22.30 21.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.53 0.70 -5.26 229 900 2010-12-10 20:10:12 2010-12-10 20:10:12 1 9 749 3 400 860 424 223.20 39 52.89 CHANGED phlpplFcuAlpAupPtpsltptLst..p.....................................s...tG+slVlGuGKAussMApAs.-phh........ss.........p......l..p.GlVlT+a.........Ga....s..t..........s......h...p......................p..l...cllE...Au.HPVPDpsulpAuccllchlp.....s.l.stcDlVlsLlSGGGSALLshPss........GloLtDKpslsctLLpSGAsIsEhNsVRKHLStlKGG+LAtss..tP..A..cVloLllSDVsGDc..s.sI.......A.......SGPTlsDs..o.ThtDA..hsllc+Ysl...plPs..s.ltphLpps ..............................t..hpplapsAlsAspPttsltttLs.p.......................................spu+slVlGsGKAussMAtAh.Ephh........ss..........p.l...p.GlVlT+.a.........Ga..s..s.......s.....s.p......................................cl..cllE...Au.HPlPDtsulpAupcllphlp.....s..L.s.t.cD....lVlsLlSGG.GSALLsh.Ps.t........GloL.........p-cpsls+tL.LtSGAsIsEhNsVRKHLSt.lKGG+LAtus..hP..A..pVl...oLllSD....VsGDsss.hI.......A.......SGP.T....VsDs....o.o.....h.....t-....A..htllc+Ysl...plPt..slhthLpps................................ 0 128 241 326 +13503 PF13661 2OG-FeII_Oxy_4 2OG-Fe(II) oxygenase superfamily Bateman A pcc Jackhmmer:B8H030 Domain This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.35 0.72 -4.34 39 165 2012-10-10 13:59:34 2010-12-13 14:17:58 1 7 132 0 81 1691 1412 68.40 19 21.49 CHANGED hsshhsst...............h.hhthtpsshhphHhDtpstts.......................huhllYLs...thsscasGuphhh.cpssptt ......................................................hht........................thhphstu.tp.hshHhD.s.sstts...........................h.hohllYL.s........t.ts-at.....GsphhF.c.ts...h................. 0 36 64 72 +13504 PF13662 Toprim_4 Toprim domain Bateman A agb Bateman A Domain The toprim domain is found in a wide variety of enzymes involved in nucleic acid manipulation [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.42 0.72 -3.97 195 9626 2012-10-01 21:47:57 2010-12-13 14:48:10 1 67 4702 9 2135 8279 5737 85.30 27 20.04 CHANGED stlhlVEGhhDllulcps...shpsshts.......lG...ssl...shpplt..phph................................pclllshDsDh.........sGppsutt....htc..........h..t.shplshls ............................................................t.plhVVEuhhDVhA......l....cps.....sh.........p..s...th.t.s.........................LG.....ssl.......sh..-t..lt.....chth.....................................................................................pclIlsh.-sst............................sGcssAhhhhc.........hhh....shth................................................................... 0 703 1405 1799 +13505 PF13663 DUF4148 Domain of unknown function (DUF4148) Coggill P pcc Jackhmmer:A9BSV2 Domain \N 22.10 22.10 22.20 22.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.95 0.72 -4.12 203 1134 2010-12-13 15:52:52 2010-12-13 15:52:52 1 3 104 0 396 1029 56 61.00 30 62.98 CHANGED ussl..ssshs..........uh..A.ps.............s...lTRAQV+AELhphcpsGh...Ps...ts.s......Y.Pssh........p...s.Aps.plss ................................................hs...h..sssss..........................uh..utps.........................slTRAQV+uELsphcpsGh....tss....ssps.......Y.Pssh........ts.Attp........................................ 0 20 104 254 +13506 PF13664 DUF4149 Domain of unknown function (DUF4149) Coggill P pcc Jackhmmer:B9YZD8 Domain \N 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.27 0.72 -3.95 178 975 2012-10-01 21:57:53 2010-12-13 16:57:40 1 3 879 0 341 687 671 99.80 23 53.21 CHANGED LhhuhhhGuhhhhu......als................uPhlFps...L.....s+t.psGtltspl................Fshahh....hthsssslhhlhthhth.............h.t..t..hhthhhlhshhlhslhsthhltPhhsphptpttp ...................................hshhhGu.lh..hs......a.ls................u.hlFhs........L..........s+h.psGphtspl.................F.hhhh....lhhsssllhh.hhhhhsh.......................t..h...h.hlllshh.lhslhshahltPhhtphpptt..t............................ 0 84 194 285 +13507 PF13665 DUF4150 Domain of unknown function (DUF4150) Coggill P pcc Jackhmmer:A9BLL6 Domain \N 27.90 27.90 27.90 29.30 27.80 27.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.34 0.72 -4.16 83 382 2010-12-14 12:40:17 2010-12-14 12:40:17 1 24 235 0 135 304 15 106.30 32 34.40 CHANGED sGhsh.uhPDVChT..P..........lPlP..YPNhuhssssh.s..sstsVhhsGtPshshs.ohh.shosGDp.uGss..tGVhSGslhGtschh..stS.sVhhcGpsssRhsDhshpN....ssNssGt ..........................hsh.shPDVChTPs..s......lPlP..YPNhAhssssh.s..ssssVhhsG.ts.shshs.ohh..shosGDpuGst.........tGVhSuT.lpG..p..schh..stSssVhhpGpsssRhsDhshhN....ptNs.s.h.............................. 0 35 71 104 +13509 PF13667 ThiC-associated ThiC-associated domain Coggill P pcc Jackhmmer:B9Z987 Domain This domain is most frequently found at the N-terminus of the ThiC family of proteins, Pfam:PF01964. The function is not known. 27.00 27.00 27.20 27.20 25.80 26.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.80 0.72 -4.42 217 1951 2010-12-14 13:42:45 2010-12-14 13:42:45 1 4 1913 6 496 1521 213 79.30 48 12.96 CHANGED tshPsS+KlY.lpGs..+sDIRVPMREIsLosT.............s.........pt.....NsPlhVYDTSGPYTDPsspIDlcpGLP.plRpsWItcRuDsEp ..................t.saPsS+KlY....lpGo....ps-..l....RVPMREIpLosT.h..t.st.........ptNtslsVYDTSGPYoDPph..t.....IDlppGLs.cLRpsWIptRuDsE............. 0 124 292 401 +13510 PF13668 Ferritin_2 Ferritin-like domain Coggill P pcc Jackhmmer:A9C0V0 Domain This family contains ferritins and other ferritin-like proteins such as members of the DPS family and bacterioferritins. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.60 0.71 -4.22 124 660 2012-10-01 21:25:29 2010-12-14 13:44:48 1 9 316 0 446 1025 61 140.50 24 45.49 CHANGED sDhclLNaALsL..EaLE..............spFYptu.htt.............................sushsssh.....hshhpplttcEhuHlchLpssl......Gsps.......l......sps......pa.....ca.............................hssas.s.......ttsaLtsAtshEssGVsAYhGAus.hls...sts....hLpsAuuIluVEAtHsuhlRshltp ....................................................DhplLphALsL..EalE..............t.saYptuhtths..................................tsshssts.....hphhpphsppEhuH.sp...h.Lpssl...........Gsps...............s..........sps..........p.a.....sa.................................................................shs.s........stsalt.h.ut........hl..Ep..sGsuuY...hG...sss...hls..........sps....h...hp..hsu...slls...sEApHtuhlRph..................................... 0 159 312 398 +13511 PF13669 Glyoxalase_4 Glyoxalase/Bleomycin resistance protein/Dioxygenase superfamily Coggill P pcc Jackhmmer:A9C2R0 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.42 0.72 -4.04 77 2150 2012-10-02 15:00:03 2010-12-14 14:45:10 1 24 1580 20 829 7043 2394 114.10 24 65.97 CHANGED pplulsVt...Dl-pshpha...sphlG............h........p.....h..............h..........t.p.h........h..s.............p..............s..............p................s..............s........s..h.........s......h.h.th.sss.....hpl..ELl.p.....Phs.....ss.....s.hpt.......ptsu.lp..Hl.uhh..s...c...D...l-ssltthpp.p..Ghp..hh....t.s...tt..u...s.s.s.tc.hsa .................................................................pHluluVt...D..l-pAhphaps...hL...G.............h............p......h.......................h...............t..t..................t..s......................p............................p......................p.....................s.................................V....................c...s.........................s..........h...l...th...s.ss.................pl...E.L.l...p........Phs.........ss.......................phlpp...........................pusG.l..a....Hl..Aac......V....c......D....l..c...s...s...h...p...c...Lcp...p....G.hc..ll.....tp..t..sph..G...s.t.s.t.lha................................................ 0 282 568 711 +13512 PF13670 PepSY_2 Peptidase propeptide and YPEB domain Coggill P pcc Jackhmmer:C6QGP5 Domain This region is likely to have a protease inhibitory function (personal obs:C Yeats). The name is derived from Peptidase & Bacillus subtilis YPEB. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.78 0.72 -4.08 83 928 2012-10-01 23:09:26 2010-12-15 11:13:51 1 7 550 0 229 873 47 80.40 22 77.90 CHANGED hhhlshssshhu...usAhA.........sspsPtspWh..stsplpptlpp.pGa.plcclcht-sts...aElc.uhspsGc..+hElhlDPtotpllcpc ...................................................................hh...hhhhhhh.u...........ssAhA...................pstt..s...sph.............shsps.tp.h...l.pp....pGa..plpclch..-...c.us...........YElc.s..ts....p..sGp..ch-lhlDstohcllp.................. 0 52 120 174 +13513 PF13671 AAA_33 AAA domain Coggill P pcc Jackhmmer:A9BWH5 Domain This family of domains contain only a P-loop motif, that is characteristic of the AAA superfamily. Many of the proteins in this family are just short fragments so there is no Walker B motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.74 0.71 -4.31 146 4032 2012-10-05 12:31:09 2010-12-15 11:19:46 1 83 2093 32 1521 12436 3403 138.60 17 34.96 CHANGED lllhsGhsGSGKooh....spp.h....htth....s........hhh.lspD.s................................h+p..phhs..........tt.t.....p..................................t.......th...sht....hhhphspthL.ptG...ts.s.ll......D.sT.........slp.....t.ppR....p.thhpl..Ap..phus.............h.ph.lhhpss..phhtpR...............tpRptp.................p......ssttslpphhpph..c..P .......................................................lllhtGhsG.oGK.oTh................ucp...l....................ttt.hs.......................hhh...l.s.p...D.s.....................................................................................hpp....phts................tt.........t...................................................................p.......ht..tt.........s.h.p..............h...h..h.......p....h...s...p..p......t.......l........p...p...G..........ps...l...ll.................D...ss...................................................hp...........t..pt.p...................p...th.h.p..h......sp.......p....h..s..h.p.....................................h..h.h...l.h..h...p...s.....s.......c.h.h.h.p.R.................................h..t.t..R..stt......................................h.....h.................................................................................................................................................................................................. 0 467 898 1220 +13514 PF13672 PP2C_2 Protein phosphatase 2C Coggill P pcc Jackhmmer:A9C323 Family Protein phosphatase 2C is a Mn++ or Mg++ dependent protein serine/threonine phosphatase. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.58 0.70 -4.93 68 3942 2012-10-03 01:39:20 2010-12-15 15:10:55 1 30 2499 16 1091 5760 566 219.60 18 65.13 CHANGED tGpoHh.ppu..tssQDsht......htth.sss...hl...l..A..VADGAGS......u..p..hSchG....uplAsp.ss.....h.ptlpphh.................sptphs...............t.pthhp...plh..pphht.......................................psttp....pt...phpchuoTLlhsl....hs.......s....s.h.......hh..hhplGDGsl.shht.........ps..uph.ph..lsp......s.c...s......GEa.s...Np...............TtFlsss....sshp.ph.......................................phhphp...h.pth...sslhlhTDGlp..........s.sl.....tstp.........sahs.hhpth...hpt...h..spst...........hppp...LtphLp ............................................................................................................................................................................................................................ht...............t..NpDthh.............t....ttt.............l......h......s.....lADGh....GG.........p..p........sGc.h.A............S...ph...u..lp..ph.........h.p.h.h..p.p.ph...............................pttt.t....................................................h.pth.hp....tht....pp.ht..............................................................................................................................pt..tt.p...........pt.....ph.p.s..h..u....T....Tlsshl........hp.....................s.........s.p..................hh..hsplGDSRs.....hhh..................cs......s..p..l.....pp......l..op.....................D..+...........s..................................h.th..s.......sp...................................tt.t.l..s.tp.........p.s.t....ph..............................................................................................c.h.t.php.....h.p.ts....-.h..l.l.L.s.SDGlp...................s.hl........sspp..............h..................................................tht.thttt..t....................................................................................................................................................... 0 376 772 985 +13515 PF13673 Acetyltransf_10 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:A9C0U2 Domain This family contains proteins with N-acetyltransferase functions such as Elp3-related proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.18 0.71 -10.45 0.71 -3.97 46 6639 2012-10-02 22:59:21 2010-12-15 17:48:46 1 97 3051 5 1600 44588 6348 116.40 17 61.61 CHANGED Dhstltplhpc.shps.p.s.h.hh.s....tth.......h..s.l.....cphhpp..s......hhhlh.hss.....pl.....s..ua.....s.t......h...p....s...ssc.......lsh.....L..hstsphptpGhupsLls.tltp.th..pp...uhp..hLpsp.......ushhu...psh.ap+.hGa .......................................................................................................................................h......................................................h........................p...h.............p.p..h..l....t..p.....................h.h...h....l...s.....p....p...s...s..................................p.l...............l.....G.a..................h..s....................................l.........p...........t.........ptp.............................................lst...............l...aV..p..P.p.h..p..t..p..G....l....G....p....t......Ll........p....t....h.hp....t.h......tp........tht.......h.l...p.lp..............ss.......tA.........hs.F....Y.p.c.hGF............................................ 0 480 972 1299 +13517 PF13675 PilJ Type IV pili methyl-accepting chemotaxis transducer N-term Coggill P pcc Jackhmmer:Q47EX4 Domain This domain is found on many type IV pili methyl-accepting chemotaxis transducer proteins where there is also a HAMP, signature towards the C-terminus. 23.00 23.00 23.10 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.61 0.71 -3.75 504 2034 2010-12-16 09:58:11 2010-12-16 09:58:11 1 65 1235 6 420 1339 56 110.10 25 20.36 CHANGED uh..hh...lt..ust..ssA.cAINhAG.SLRMQSY....RL...h......h.h.....st.u.....p.t..hh...p..thppsht..p.sh..tp......Lpptsst..................s.s.l.psph........tp........Lpp.pW.....pc.lp....s..tl......pp...spp..................tt...shpsh.ssp....ls .................hhht.sst.tsA.cAINhAGSLRMQSY....RL.......s....................h.h.................st..u..........p...p...t...hh.....p..thpps..ht..p.sh...sp........Lpptsst..........................psl...psph..............tt........Lps..W....pchp....stl..........pp....spt....................httslsphlt................................................................................................................................... 1 105 246 341 +13518 PF13676 TIR_2 TIR domain Coggill P pcc Jackhmmer:Q47BW0 Domain This is a family of bacterial Toll-like receptors. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.24 0.72 -3.87 214 2056 2012-10-02 18:56:14 2010-12-17 13:44:38 1 344 944 8 1033 5257 266 109.00 18 21.07 CHANGED lFlSYs..t...p.D..pt...hAchl...ttt...L......p....p..t....Gh...p...Va...h..s......h.ch.h...s...Gssh..hptlpps.l...p.p..uphslsll.Sssah....p.S............s..asp..p.E....hptA......tp......ts......+...llPlh.......lc...s.s........ph...ss....h.ls....plts.....hc ..................................................lFlSas.....p........p..D.........pp..........h.s.....p.....t.....l.....t.pt....L.................p...................p........t........Gh.......p..................sa......h..D..............p.sl..t............s.......G..p.s.h........tpp.......l.p...pu...l......p.p...uc.....h..hlh....l.l..S..sshh....p.s.....................p...aspp..E.....httu..................hp.............ts..........c.....llPlh........hp...s..h..........th................................................................................ 0 444 754 932 +13519 PF13677 MotB_plug Membrane MotB of proton-channel complex MotA/MotB Coggill P pcc Jackhmmer:P45443 Domain This is the MotB member of the E.coli MotA/MotB proton-channel complex that forms the stator of the bacterial membrane flagellar motor. Key residues act as a plug to prevent premature proton flow. The plug is in the periplasm just C-terminal to the MotB TM, consisting of an amphipathic alpha helix flanked by Pro-52 and Pro-65, eg in Swiss:D3V2T1. In addition to the Pro residues, Ile-58, Tyr-61, and Phe 62 are also essential for plug function [1][2]. 24.00 24.00 24.00 24.00 23.60 23.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.89 0.72 -4.65 1028 2913 2010-12-17 16:12:36 2010-12-17 16:12:36 1 5 2048 0 745 2094 570 56.70 30 19.45 CHANGED h..hh+.+.p......t.tt.sppssuWhluYADFhThhMAFFllhahlSssstpc...........hptluphFpsshs ..................h..h++p......t.pp.stttssWhlsYADhhThhhAFF...llhahh..St.ss.p.c...............hptluphhpssh.s....................................... 0 250 484 613 +13520 PF13678 Peptidase_M85 NFkB-p65-degrading zinc protease Rawlings N, Coggill P pcc Jackhmmer:B7UNX4 Family This family of bacterial metallo-peptidases is thought to compromise the inflammatory response by degrading p65 thereby down-regulating the NF-kappaB signalling pathway [1]. NF-kappa-B is a pleiotropic transcription factor which is present in almost all cell types and is involved in many biological processes such as inflammation, immunity, differentiation, cell growth, tumorigenesis and apoptosis. NF-kappa-B is a homo- or heterodimeric complex formed by the Rel-like domain-containing proteins RELA/p65, RELB, NFKB1/p105, NFKB1/p50, REL and NFKB2/p52; and the heterodimeric p65-p50 complex appears to be most abundant one. 21.80 21.80 22.50 62.20 21.30 19.20 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.66 0.70 -5.00 8 95 2011-01-11 13:03:09 2011-01-11 13:03:09 1 1 85 0 1 39 0 246.90 84 71.65 CHANGED sppppDsYA-YVlspGpRt..pLSsspLsslh.sVp+AVppShupLlDcHTAhAIEsTlh-ALhpSpTFRcAVuFul+pc+psLGh........IpYRN.YEls-pops+hpclpplohsEIhpSsAspsPIls.h.sE.AsE--s.pcP..aVsISlAPshsStcYPhWQpuLIHEIIHHlTGAuDP..t-s..RlGPTEILApRlApELsWsI..Pp....FpuYssP-RlpAlppRsFpuLhcslhRHcpctptllpRLssIucthcASP- ............PNRAENAYADYVLDIGKRI..PLSAADLSNVYESVIRAVHDSRSRLIDQHTVDMIGNTVLDALSRSQTFRDAVSYGIHNEcVHIGC........IKYRNEYELNEESuVKIDDIQSLTCNELYEYD.VGQEPIhP.I.CE.AG.ENDN.EEP..YVSFSVAPDTDSYEMPSWQEGLIHEIIHHVTGSSDPS.GDSNIELGPTEILARRVAQELG.WSV..PD....FKGYAEPEREAHLRLRNLNALRQAAMRHE-NERAFFERLGTISDRYEASPD..................... 0 0 0 1 +13521 PF13679 Methyltransf_32 Methyltrans_27; Methyltransferase domain Coggill P pcc Jackhmmer:A9BXC4 Domain This family appears to be a methyltransferase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.98 0.71 -4.50 63 1223 2012-10-10 17:06:42 2011-01-12 13:55:34 1 19 861 0 577 1525 252 151.10 24 34.50 CHANGED +Kh+QVp+....hhphlcs...llpph............tpssshsllDhGuGKuYLuFlLh..hhpt.......ssclhGl-s+sclscpupplAp+Ls..a.sphpFhphsltpsh.......................................................................................ssphcllsuLHACsTATDsAlphulp..ppu+hllhVPCCptclspp ..........................................................................................cK.cplpphhphlpt...hht.......................................p...tshpllDaGuGKGYLuhhLth......................t..hshp.lhulE...hc..p......p....l...s.p.t.u..pp..h....u....p.c.l.t.............h.......t...p..h..p...hhp.s..s...hts.h..t.......................................................................................................................................................................spthshslu.LHACss.ho.spslch..s.....l...........p.......t.........p.....s..ph..............ls.....hsPC.Cap.lp..t.............................................................................................. 0 197 312 453 +13522 PF13680 DUF4152 Protein of unknown function (DUF4152) Bateman A agb [1] Domain This family of proteins is functionally uncharacterised. This family of proteins is found in archaea. Proteins in this family are approximately 230 amino acids in length. The structure of PF2046 from pyrococcus furiosus has been solved. It shows an RNaseH like fold that conserves critical catalytic residues [1]. This suggests that these proteins may cleave nucleic acid. 27.00 27.00 332.60 332.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.53 0.70 -5.34 5 13 2012-10-03 01:22:09 2011-01-12 14:06:31 1 1 13 0 11 17 0 223.80 73 99.79 CHANGED MRIVSADTGGAVLDEsYEPIGLIATAAVLVEKPYKTAKhSlVKYADPFNYDLSGRQAI+DElhLAIELAKKVKPDVIHLDSTLGGIElRKLDEPTIDALsISDRGKEVWKELSKDLQPLAKKFWEETGIEILAIGKSSVPVRIAEIYAGIYSAKWAIEYA+K..EG+lIIGLPRYMcVEI+-GKIhG+SLDPREGGLYGEIEsEs.EGIcWElYPNPlARRFMVFElW+E MRIVAADTGGAlLDEsYpPIGLIATsAVLVEKPY+TAshSlVKYADPFNYDLSGRQAIRDEshLAlELA++VKPDVIHLDSTLGGIEVRKLDEPTIDALsISDRGKEVWKELSKDLQPLAKKFWEETGIEIlAIGKSSVPVRIAEIYAGIYSAKWAIEYA+c..cG+llVGLPRYMcVEI+sG+IhGcSLDPREGGLYGEIEs-s.-GItWElYPNPlsRRFMVhEVat.... 0 1 1 6 +13523 PF13681 PilX Type IV pilus assembly protein PilX C-term Coggill P pcc Jackhmmer:A9BNC4 Domain This family is likely to be the C-terminal region of type IV pilus assembly PilX or PilW proteins. 25.60 25.60 25.80 25.60 24.10 25.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.52 0.72 -3.52 20 47 2011-01-12 14:33:16 2011-01-12 14:33:16 1 2 40 0 20 47 9 97.10 25 40.70 CHANGED sssth..Psp.tc...suhhhushsp......s..pssppGlCt....tsst........pshWsssc...ts....sshp.s..ps..........s.ssspaGpaT.Gsh......ss.usshlttpts....spssRYhIEhlshts .........................s....hP....p....ushhushts......shssphppGLCtp...tsst..........pshWssss...hp........ssst.....s.........pstsstYGpFT.Gst......ss.usshL....s......sptsRYhIEhls.p.p.... 0 1 13 18 +13524 PF13682 CZB MCPsignal_assoc; Chemoreceptor zinc-binding domain Coggill P pcc Jackhmmer:Q47EQ8 Domain The chemoreceptor zinc-binding domain (CZB) is found in bacterial signal transduction proteins - most frequently receptors involved in chemotaxis and motility, but also in c-di-GMP signalling and nitrate/nitrite-sensing. Originally discovered in the cytoplasmic chemoreceptor TlpD from Helicobacter pylori, it is often found C-terminal to the MCPsignal domain in cytoplasmic chemoreceptor proteins. The CZB domain contains a core sequence motif, Hxx[WFYL]x21-28Cx[LFMVI]Gx[WFLVI]x18-27HxxxH. The highly-conserved H-C-H-H residues of this motif are believed to coordinate zinc; mutating the latter two histidines of the motif to alanines abolishes Zn binding. This domain binds zinc with high affinity, with a Kd in the femtomolar range. Although the function of the CZB domain is not yet known, scientists have speculated that it may function as either an unknown signal input domain, based on its frequent association with signalling output domains, or as a domain that helps to stabilise protein tertiary or quaternary structure. 20.00 20.00 20.20 20.20 19.60 19.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.55 0.72 -3.80 345 1282 2011-01-13 13:31:52 2011-01-13 13:31:52 1 83 938 0 334 896 70 71.70 26 17.09 CHANGED c.h...-Hhhahppl.hph.lh........ttp..p.pht.scppC.phG+Wa.puts......ppthsph....ss........a.pplcpsHpplHptupphhpt .........................h.sHhhWhhph.ach.lh............sp...phpp.tsaspC..phG+Wh.pshu....tpphsph......st.............a.ctl.-.ssHpclHpsupphh..h.............. 0 135 281 309 +13525 PF13683 rve_3 Integrase core domain Coggill P pcc Jackhmmer:A9BZ80 Domain \N 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.14 0.72 -4.45 121 4386 2012-10-03 01:22:09 2011-01-13 14:25:37 1 37 1527 0 913 15372 2196 62.80 30 37.29 CHANGED tulphphhpPGcPhpNualEpFNsph+cEhLstph......h.slscscthlppWhp.cYNppRPHouLGhhTP ......................................tlt.ph.hpsGp..Php......Nu.hhEpast.....ph+.............s.E...h...lstph...................atsh..tc..scp.t.l.tpah........p.hY.Npp...R..PH..p..u..L.shhsP............................... 0 204 552 722 +13526 PF13684 Dak1_2 Dihydroxyacetone kinase family Coggill P pcc Jackhmmer:C9LMG3 Domain This is the kinase domain of the dihydroxyacetone kinase family. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.68 0.70 -5.49 228 1830 2012-10-02 12:41:15 2011-01-13 14:44:36 1 7 1719 0 319 1574 117 297.40 38 56.58 CHANGED caGYCTEallphppt.............ch..cph+shLpsl.GD.SllVV..sD-c.............llKVHVHTpcPGpllphuhpa.GpLhclKI-NM+tQ+pphh.................t........................tttp.pp.......ch...ulluVus...GcGl.sclF.cslGsshllpGGQT....MNPSTp...DllpAIcps.sAcplhlLPNNpNIlhAApQAspls...cp....p.lhVlPT+olsQGluAl.lsassst.sh--Nhp.sMp-uhsp........V+oGplThA.VRDTpls...GhcI+cGDhlGl.........h-scIhss...sp-..hhpsshpllcph..ls--u.EllTlhhG.p-sscc.pucp........ltptlccp......a..s.-lEl-lapGsQPlY.alloV.E ....................................................................aGYCTEhhVphpps..........pta-h-cFRs.h.Lspl.....GD.SLLVV.....sD--..............llKVHVHT..-cPGp...V..hphG.pa...GpLhKlKl-NMRp.QHcthl................p.p...........................................................................tptttt.pt.ppt..ph.........ulluVss.......GcGl..s-lF.cuhGsshl.IpGGQT...............MNPSTE........Dll.cA...lcp..s.....sA...c...p.lllL..PN..N.pNIhMA...A...pp...A...A...p..ls........-t.........shVl.o+..o.......lsQ...Gho.....Al.h.....s.....a.....ssst.....sl-.-N..tt.pMspulss.................VtoGplThA.VRD.Tp.l-.....GlcI+csDhlGh...............l-scIlss......ssD......hhpsspphlpcM.........ls-..D....o...E.......I..l..T...lh.hG...cD.uspp...s..p.p........ltph.lpcp.........a......DlElElapGsQPl.Y.YlhSVE.............................................................................................. 0 127 230 286 +13527 PF13685 Fe-ADH_2 Iron-containing alcohol dehydrogenase Coggill P pcc Jackhmmer:A0Z2K3 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.73 0.70 -5.07 57 650 2012-10-02 14:41:14 2011-01-13 16:02:49 1 3 514 4 210 15098 4240 260.30 33 66.02 CHANGED hlupuslsclsphlpch...t.h..p......clhllsDps...ohph...hucpltpsLpptsl...ps.t...hh.........p....s....s......hpsltc...lhpth....t....p....c..s-.....hll.ulGuGpl.DlsKahua...hshPalsVsTAsShDGauSssASlhhp..GhKhoh...AthPhullsDhsllppAPtchltuGhGDlluKhT...A.l...t........DWp..hu.c........th......sc..........l.........sptshp.hl.psh..pps...h.t.....s....p.....c......phl.....ptLhpu....L......s.hu...sluh......S..+PA.....SGuEHhlSHhh......-hh.......h......s.......h.h......HG.pVG .......................................................................hGpsslp.c.h..u.p.l...lp.ch.....t..h...p.........psl...l..l...s..s..ps.......T.h.tl............s.......G...c....p..l..p..s...s...L..c....s....s....s...h.....ps.h........................ss.p........s..................s..........................h.s.sl.t...p....lt.c.p.h........p....s......h......c.sc...........hllulG.u...Gp.l.D.l..s..K..................h..................A....................u....................h.......................c....................h..................s....................h...................s...................a..................l......s..................Vs.T..............A......A......S....h....D.G...a...sS...s....s...........A...s..lp.t......t.......G...h...K..t.oh..t....s........t.sP...u...l.....lsD.lc.l.....l.....s.s.A......P.p.c.h...h.s.u.GhG..D....ll.......u....K..l..s....A..l....t......................DWh....lAc.....................ch.....sE...........s.l..............s.p.h..s...h...p....h...l...p...s...s...l........cs..s...lp............c..s.c..............c........psh.......csL..h.-u.L................lhS...GlAMth............hssS.RPA.............S...G......u.EH.h.h...SH...hh....-hh...........t.....s.s....hhHGtpVG................................................................................................................................................................................... 0 75 145 176 +13528 PF13686 DrsE_2 DsrE/DsrF/DrsH-like family Coggill P pcc Jackhmer:A5EWY2 Family DsrE is a small soluble protein involved in intracellular sulfur reduction [1]. The family also includes YrkE proteins. 25.00 25.00 25.10 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.97 0.71 -4.42 32 864 2012-10-01 20:53:36 2011-01-13 16:37:43 1 15 675 10 260 763 82 149.10 38 44.23 CHANGED pp++hhllhossh.c..hAhAuaIlAsuAAAhGh-VThFFTFWGLslL++sc.phpl........cpshltphhhhhh..t......hsh.shsGhusthhpthM..............................................Kc+sssoLp-LlchAh-pGV+hlACpMoMDlhGhccEELlDGl.phuGsAsaLpcAt-uslsLF .........................................................t..p+ssIlhhuG..phD..KAhAuhIlANGAs.AhGp-VTlFFTFWGL.NsL+Kt..p.p.sp.l....c.Kp.hh.t+hFshMhstts.p.....................h.lS+MNhhG.hG.stMh+.tlM...........................+++sls.o.L.pLlc.pAh...-...p...G.lKllACpMoM.DlhGlp.cE.E.L..hD..tV..-huGVusYl.sc.AppushsLF.............................. 0 105 187 233 +13529 PF13687 DUF4153 Domain of unknown function (DUF4153) Coggill P pcc Jackhmmer:C9LQV3 Family Members of this family are annotated as putative inner membrane proteins. 25.00 25.00 25.40 25.30 24.40 24.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.90 0.70 -4.82 46 732 2012-10-01 23:40:40 2011-01-13 16:41:44 1 2 721 0 91 498 28 233.20 38 42.73 CHANGED lhhGls.lh....lh.ulshLF.hsl......st.pha....h.plh......hhshhhhusshaLuhls....p..p.....p......c...htps..........pt..h.....ph.llpalllPLhhlYshlLah.Yhh+IllshplP.....pGhlu.......hl.lhhh....hh.shlll..hhhhh....p.ppp......+.a.....hpha....h+hhs...hlllPll.l.lhhhuIhhRlspYGlT.sR....halllhslah.hshhlhhhh......p+.pt........p.h.........ls.hsh.sllh.llsshsP.hsshslotpSQhsRlcph .............................................................................................................lhsGLhWLV....LLLWupLF.......+...LV.........GI.pFF.....s.TLFht........schFhalol..G..Ll..o..ALAV..lL...........u.R....ppp.....h...lcuh.....................p+.L.......ho.LIAsG..LLPLVoL.LsLhFI...h.sLPh.s.G.L.u.u.lu.....c+loA...........As..LLhs.....Lu..h.l.Ll.lhAIV...........p-sp............K.s.......lshh....lRsLh......LlVAPla....l..lAuWALWlRl....uQYGWTsDR....Lhllllhlh..Lsahluhlh............R+.up.........ssh.h.......tthh.lss.L.sLVl.LlLLsSPlLDshRISVNSphARapu.u............................................. 0 35 63 76 +13530 PF13688 Reprolysin_5 Peptidase_M84; Metallo-peptidase family M12 Coggill P pcc Rawlings N Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.66 0.71 -4.38 48 463 2012-10-03 04:41:15 2011-01-13 16:50:21 1 31 230 47 275 3504 182 203.10 22 28.71 CHANGED ss+plsLl.lssDssasssa..u...s.................spptllshls....sAsslappsh.................s..Is....lsLp...slslsspsssss..................t.sssssshLsph........................sshc...uppps.shuahhhhsssss...u...hGlAal........Gphs............ssss.....sssss...ts...........................hhhtsssp...hplhAHEhGHsaGAsH....Dsssp............tsts.Cshst...............ssss.GpaIMsss.ss........sshsp..FS ....................................................................................................................t..pps.lh.lssD.p....p..ah.pth..ss...c...............................tshphlhp..hls.......................ps.tslh.psh...........................s..ls....htlp..............plp.l.h..s..p..s..s..sp..t..........................................ttss.h.s.sp.phL.p..F..................................................s.p.hp.........t.t.....s.....p..s.....s.l.....u...ah.h........o.h.....p.....s...h...ss............G..............lGLAal.................Gssp...................s.sst.......uuhss.....pt.................................................hs..hh.h..o...tp.s....t.hlhA......HE..lG...........H..saGu.H......Dsss.................................p..Cs..........................ts.ts..upalM.shss.........s.t.FS.............................................................. 0 114 164 230 +13531 PF13689 DUF4154 Domain of unknown function (DUF4154) Coggill P pcc Jackhmmer:Q47CB5 Family This family of proteins is found in bacteria. Proteins in this family are typically between 172 and 207 amino acids in length. Many members are annotated as valyl-tRNA synthetase but this could not be confirmed. 24.90 24.90 25.10 25.80 24.60 24.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.64 0.71 -4.59 103 788 2011-01-14 11:37:18 2011-01-14 11:37:18 1 6 728 0 143 407 62 141.30 44 76.62 CHANGED pct..........pl.c.......Ashlhshhpasc.WP.....ss......s...ss..lp...lClhu.sst..hs.........s.s.Lp...p.h.tspph..ss...c.lpl...........ppl........s..ssst......h..ss........Ccllalupt.p........ttphtplhp.tlpspslLolu-tss.sptGu....hlsLhh.pss+lpFclNlsssp+uGlplsu.plLpLA+p ........................p.....VphhVsGIlSYT+..WPuh......uuP..s+...LCIhu...suc..au.........s.s.Lp...ct.Aspsh..sh...hPlhl...........+s.........p..ptsh......h...us........CsuhYFGsc.o........P.shQh....-Lsc.pas.u+u...LLlIAEp.Ns....EChlGS.........AFCLll.......pNscV+FsVNLDuLoRSGV+VsP.cVLhLARp..... 0 35 89 118 +13532 PF13690 CheX Chemotaxis phosphatase CheX Coggill P pcc Jackhmmer:A5EYB3 Domain CheX is very closely related to the CheC chemotaxis phosphatase, but it dimerises in a different way, via a continuous beta sheet between the subunits. CheC and CheX both dephosphorylate CheY, although CheC requires binding of CheD to achieve the activity of CheX. The ability of bacteria to modulate their swimming behaviour in the presence of external chemicals (nutrients and repellents) is one of the most rudimentary behavioural responses known, but the the individual components are very sensitively tuned [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.07 0.72 -4.13 95 880 2012-10-01 19:50:22 2011-01-14 15:54:09 1 12 643 7 333 939 49 97.90 23 51.70 CHANGED lsu.hluh...sGsh..pGhlhlshspphshpl......s...p.t...l...G....-c....t...ps....pc..pltDslGElsNhIsGss+......sch........uh.....p....hpl.slPpllpGps.htl..phs.............ss.sp.tlslsap ...................sslIGh...sGsh..cGphhlshscphAhcl......s....p.t....h.......G.....pc.........pp.....hs-.s.pssluElsNhlsGssp....shLt.......uh.......s.hcl.usPtlltGps.hpl..p...............t............................................ 0 161 258 292 +13533 PF13691 Lactamase_B_4 tRNase Z endonuclease Wood V, Coggill P pcc manual Domain This is family of tRNase Z enzymes, that are closely related structurally to the Lactamase_B family members. tRNase Z is the endonuclease that is involved in tRNA 3'-end maturation through removal of the 3'-trailer sequences from tRNA precursors. The fission yeast Schizosaccharomyces pombe contains two candidate tRNase Zs encoded by two essential genes. The first, Swiss:Q10155, is targeted to the nucleus and has an SV40 nuclear localisation signal at its N-terminus, consisting of four consecutive arginine and lysine residues between residues 208 and 211 (KKRK) that is critical for the NLS function. The second, Swiss:P87168, is targeted to the mitochondria, with an N-terminal mitochondrial targeting signal within the first 38 residues [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.88 0.72 -4.50 41 279 2012-10-02 15:46:01 2011-01-14 17:05:56 1 11 238 0 197 491 119 61.80 37 7.17 CHANGED pslossTsDTst.sslhLph-.p.cRYlFGpluEGoQRshsE..p+l+l..uKlpslFLTGph.s...............................................WsshGG ...................hshsotDos...sslhla.h-..p...pR...YlF.NsuEGsQRhhpE..+.+l+l..u+......lcsIFLTths.................................................WsshGG.......................................................... 0 71 115 167 +13534 PF13692 Glyco_trans_1_4 Glycosyl transferases group 1 Coggill P pcc Jackhmmer:D2BSD5 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -11.02 0.71 -4.01 266 5146 2012-10-03 16:42:30 2011-01-17 13:51:46 1 174 2454 8 1838 35567 12883 141.30 16 28.30 CHANGED p..hh....hl..Gsh...s..ph....shpsh...l.lp.hh..........l..t.p.......t.......h..s....p....h..p...hhl..h..G.s..........h....s.....s............p.....l..pph........t......sl.p....h..........h..uh...h...........s..c..h.sph...hsp...s.....c..l...s..l......s...P..h.......t.....hss...........s...h.....s.....h.K.lh-hhs.sGhPl..l......s.....osh....s.....hps.h.................h..t.......t..hs...h..h.h..s..s....s......s..pshspslt.ph.hp.....s ..................................................................................................h...hhshh...s.th.....t..psh.phh..lp......h.h...............................tl.....t..p..............................p................................h....s................p.......h......p.......hhl....h..Gt..............................s..s...p.......................p......lpph......................hs........sl..p....h.....................................h..sh..l...............s.....-....h..sp.h....l.s.p.....u...........c...l...s...l.........s.....s...h...................p.........ss..............................s..h....s......h.K.lhE.hhu.sG.h..Pl...l..........s.......osh......s.........hp.s..h.........................................h..tt..........s.ts...h....hh.......s..s..........s................s....pph.hptl.phh.......................................................................... 0 731 1269 1619 +13535 PF13693 HTH_35 Winged helix-turn-helix DNA-binding Coggill P pcc Jackhmmer:D2BY08 Domain \N 25.00 25.00 25.10 25.10 24.00 24.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.44 0.72 -4.19 22 1007 2012-10-04 14:01:12 2011-01-17 14:12:01 1 2 663 2 160 491 4 75.70 59 84.66 CHANGED DWHsADIIAAL+K+GToLAAlSRpAGLSSSTLANALsRPWPKGEalIAcsLulcPuEIWPSRYaD.psGpll-RchRhR .............DWHPADIIAuLR.K+G.TShAA.SRcsGLS.S.S.T.LANA..L..o.RP..W..PK..G..EhI..IAcALGscPh.IWPSRYaD.pspphlpRp.h.................... 0 10 62 108 +13536 PF13694 Hph Sec63/Sec62 complex-interacting family Wood V, Coggill P pcc [1] Family This is a family of closely related Hph proteins that are integral endoplasmic reticulum (ER) membrane proteins required for yeast survival under environmental stress conditions. They interact with several subunits of the Sec63/Sec62 complex that mediates post-translational translocation of proteins into the ER. Cells with mutant Hph1 and Hph2 proteins revealed phenotypes resembling those of mutants defective for vacuolar proton ATPase (V-ATPase) activity. The yeast V-ATPase is a multisubunit complex whose function, structure, and assembly have been well characterized. Cells with impaired V-ATPase activity fail to acidify the vacuole, cannot grow at alkaline pH, and are sensitive to high concentrations of extracellular calcium [1]. 19.90 19.90 20.60 24.00 19.10 19.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.13 0.71 -4.26 7 38 2011-01-17 14:23:41 2011-01-17 14:23:41 1 1 23 0 18 34 0 171.50 38 32.47 CHANGED pp.pp.hh.DhsSssTGtFSDpMFpss.c....ppp.phsps.ph.pssphpspschsssttttpshstpph.spsh...spsthpphspt..pup.cspp...........t+Shupoh.shD+KRLVsQFLc.SlpsSss.pssppsst.............shpo.Sphshssuhp.p.......................hpsLhYpDLcpsstppppso ................................................................................................pLchpslPshsSspTGhFSDhhFpss.D...............hstu.Sh.cs.plSPK.....psaNsstsh.hushsptphtShsl.......apupcpth.uFpplQupppsp+............+SsusS..F..sh-+KRLVsQFLp.ShtsS.s.sohpppss.t.usshssIh.s.shps.Sphs.sppS............................pSLhYHDL-uSshpcsSs............ 1 3 8 13 +13537 PF13695 zf-3CxxC Zinc-binding domain Coggill P pcc Jackhmmer:A9VEF7 Domain This is a family with several pairs of CxxC motifs possibly representing a multiple zinc-binding region. Only one pair of cysteines is associated with a highly conserved histidine residue. 22.00 22.00 22.60 22.80 21.20 21.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.27 0.72 -11.41 0.72 -3.77 45 436 2011-01-17 15:42:00 2011-01-17 15:42:00 1 13 129 0 268 393 0 92.60 29 37.05 CHANGED phhGcFpC..ppC.spsWsSsplhlsh+hh....t.....th................hph+hacQcC+pC..sphppPhh........s.ps.......hs-Rlshchpchsshphpt.............h.t.p.t....tPHppcLCEuC+tG ............thGcF+C..spC..p+pWpSupVhsl.h.................tt.................sphh..hatQpCp.cC....pp..pPph........................p..lpclt.p.p.shpppt...h..t.h................h................tsHcpchCptCpt.................................................. 0 64 89 155 +13538 PF13696 zf-CCHC_2 Zinc knuckle Coggill P pcc Jackhmmer:A9VEF7 Domain This is a zinc-binding domain of the form CxxCxxxGHxxxxC from a variety of different species. 27.00 12.00 27.00 12.00 26.90 11.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.86 0.72 -4.45 5 190 2012-10-03 11:39:54 2011-01-17 17:21:59 1 28 117 0 149 536 7 31.30 45 8.63 CHANGED P+KsPPPcYLCHLCFpKG.HYIsDCPQ..AsPKuE ...............phPPssYlC+lChpcG.HaIpDCPp......sp............. 0 81 103 129 +13540 PF13698 DUF4156 Domain of unknown function (DUF4156) Coggill P pcc Jackhmmer:D2BXE0 Family The function of this family is unknown but members are annotated as putative lipoprotein outer membrane proteins. 25.00 25.00 25.10 25.40 24.80 24.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.00 0.72 -4.10 49 839 2011-01-18 10:31:11 2011-01-18 10:31:11 1 1 682 0 94 295 28 93.80 51 79.50 CHANGED ssphostuppVph.sssps..sppCphLGpVoGopu.sahothhtss..sshhpGAhN-L+NcAAt.hGuNslhhhs.....................spthssssshhGpsYcC ........................S.NpLouAGpsVRl..s-ppP..GuECQLlGosTGpQS.NWhSGpaGpE..uuSMRGAANDLRNpAAA..M..GGNVlYGloSP....opsh.............................LSSFsPTsSphhGQVYKC............................................................ 0 14 37 66 +13541 PF13699 DUF4157 Domain of unknown function (DUF4157) Bateman A agb Jackhmmer:C7PSA5 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 80 amino acids in length. This domain contains an HEXXH motif that is characteristic of many families of metallopeptidases. However, no peptidase activity has been shown for this domain. 22.00 22.00 22.10 22.30 21.80 21.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.67 0.72 -3.84 185 470 2012-10-03 04:41:15 2011-01-18 10:44:17 1 49 214 0 257 522 62 74.20 33 11.64 CHANGED sLsssl+sthEsthG..tD.hosVRlHs...s......tusptspslsApAaTp.GscIhF....s.Gp.tststt..........upclLAHELsHVlQQptu.t ............................................Lstsl+sthEsthG..tD..husV..RlHss.....stusptspslsApA.aTh...GscIhF....s.Gp...ts.t..........spclLAHELsHVlQQppu......... 1 100 177 239 +13542 PF13700 DUF4158 Domain of unknown function (DUF4158) Coggill P pcc Jackhmmer:A8LT43 Domain The exact function of this domain is not clear, but it frequently occurs as an N-terminal region of transposase 3 or IS3 family of insertion elements. 25.00 25.00 25.20 25.00 24.60 24.90 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.63 0.71 -4.76 108 1440 2011-01-18 15:32:58 2011-01-18 15:32:58 1 7 737 0 250 1165 105 151.80 26 19.69 CHANGED lLosppcppl..hslPs..s..cpclh+aaoLscpDlplIp..p.+RtspNRLGaAlQLshlRa.Ghhls....s....spplPtsllpalApQLtls...s..s.s..hppYu.p.RcpTRt-HhtcltphhGa+sFst..st..h.p.pLhpaLhphAhpsscshh.LhcthlshL+pp+llLPuhoslERllucuh ..............................Lo.tpptth..hthst..sct-L.hpaaohs-t.Dlph.Ip...p..+R.tstsRLGhAlQLshhRh.Ghhls....s....hpthsssllpal.u..p..QL.tls....s...t..p....htp.Ys..p..RppT+tcHhtplpphhsac.ast.........st..hp.plh.p.hLh.p.hAht.sscs.h..LhsthlthLhpp+lllP.uhsslpRhhups............................. 0 50 136 189 +13543 PF13701 DDE_Tnp_1_4 Transposase DDE domain group 1 Coggill P pcc A8LT71:jackhmmer Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 25.00 25.00 25.10 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.28 0.70 -6.31 10 2054 2012-10-03 01:22:09 2011-01-18 17:04:24 1 6 411 0 238 1429 381 279.80 36 91.76 CHANGED FsslsG+pVhucF-GGslSSDuGllLl+plDcpltlspRhAsClcDtR+sSYlcHSlc-LluQRIYQIAsGYEDsNDuNpLR+DPhFKlALs+lPhsssst..LASpPThSRLENslspp-lp+huR.............uFVDtFL-SYt+sPc.IVLDMDsoDDtsHGpQEhAFFNuYYpssCYhPLalFEupoG+LLsApLRPGcpcoGctslshLpRllcpIRctWP-T+IllRGDuuFupPElMshCEspsssD.lFGhuGNssLhc+tsslls-sRcpctpptcpshh...ts..t................RhacpsaYsA+SWscsRRVVhKsEhtucGsNhRFllTsL......scssPpcLYcchYCsR.GpsENRIKEhKhDLtSDRTSscsFluNQLRLFlusAAYVLhpulRppsLspT..LAKApsuTIRLpLlKLAARVslotRRIllcLPouCPhpsplthshppLphtp.hs ..........................................................................................................................................................h..................................................................................................................................................................................................................................h.....lDhDso.........u.ppp..................a...h....t....sa..Ph..h..hh.................tt............hR.up......s..tt..t..h.....t..ht.h...h...t....h..........................................hRh...D.ut.....t.h.......h..............................................................................................................................................................................................................................................................................................T.sh..................t....s.s.pp.....h.....p.....h......YptR..G.p.hEN.h.IKEh...p.s.hh.....s-.......+h.s.......op..............shhtN.plRhhhsshAYsLhh...hh..p..h...h..................s.......t........p........h....t.......t.h....p....htp...hR...h...hhl+lss+hs.ps.Rp.hlphsph.h...shtt.h..h..........th..................................................................................................................................................................................................... 0 49 161 214 +13544 PF13702 Lysozyme_like Lysozyme-like Coggill P pcc Jackhmmer:B0G1S2 Domain \N 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.01 0.71 -4.75 63 1147 2012-10-03 00:09:25 2011-01-18 18:31:40 1 9 690 0 68 686 23 158.50 43 52.92 CHANGED lspcVlpacshVcchApchu..ls-.YlsllLAlhhpESGG...cssDlMQSSEShshs.............sNsIpDs-tSIcpGVphaspslcpApppus....-.lcsslQuYNaGsG.alsaltppG.spYoh-LAppFScphs..........upphth....sh..tt.hh..sYGshhYsppVh.pYh ....................................................tpVhsapPhVcchscE.....hslsp..hssllLAlIhsEStG...........ps.tD..V........M..Q....SSEShuhs..................................sNoIps.scpS..I+QGlphhuphLtpApc.tul.....Dl.oslQuYNaG.su.Yls..alAppG....pcaThpLAcpaS+-hs............sGpphsY.pPlul...hsGGahY.NhGNhaYsphVp............................................................. 0 16 35 47 +13545 PF13703 PepSY_TM_2 PepSY-associated TM helix Coggill P pcc Jackhmmer:D2BX46 Domain This family represents a conserved TM helix found in bacteria and archaea. 30.00 30.00 30.20 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.24 0.72 -3.81 95 2542 2012-10-01 23:59:14 2011-01-18 18:43:04 1 35 1345 0 728 2488 143 87.60 25 18.15 CHANGED ahhplHhsLh.lsth.....hGthllulsuhhhllslloGllla..............+hh+pha...shR.tc..stcp.hhD.hHshhGlhslPFhlhlshTGlhhhht ....................................hhpLHtsLh..lsts.............Gphllslsuhhh.l.lhll.o.Glhla...h............................p.h...h...p.t.h.h...........thc....h......pp.....st.+p...hh-..hHshhGlhshhhllhhshTGlhh...s.............. 0 155 412 583 +13546 PF13704 Glyco_tranf_2_4 Glycosyl transferase family 2 Coggill P pcc Jackhmmer:A8LQ72 Family Members of this family of prokaryotic proteins include putative glucosyltransferases, 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.51 0.72 -3.67 164 934 2012-10-03 05:28:31 2011-01-18 18:44:27 1 33 490 0 305 3098 709 103.40 19 24.61 CHANGED +NEshh...L.palt...a.a.p.p.l.G....h..s.c..hll..h...sN.sssDso....sphL....tp........h....sc....ls......lhp...ss..................t.sa................pp..tt.ht....h......s..........hhsh......h..hp...p..h......s.........p..s.....cW..hlhlDsDEFlshsstt...tslpsLh ............................hNpt...h.l.ta.lt.......a..a...p..t..l...s.....h....c...c.....hhl..h....ss..s...s...s.D.s..o.......scl.L....cp.......h......ss.....lp.........lhp...p..p...........................................p..th......................tt...tp..ht......h......c............hhpt.......h..hp......p...h.....t..........................t..s.....-W..llhlDsDEhlhsst.....tl....h............................................ 0 101 194 240 +13547 PF13705 TRC8_N TRC8 N-terminal domain Bateman A agb Jackhmmer:Q8WU17 Domain This region is found at the N-terminus of the TRC8 protein Swiss:Q8WU17. TRC8 is an E3 ubiquitin-protein ligase also known as RNF139. This region contains 12 transmembrane domains. This region has been suggested to contain a sterol sensing domain [1]. It has been found that TRC8 protein levels are sterol responsive and that it binds and stimulates ubiquitylation of the endoplasmic reticulum anchor protein INSIG [2]. 27.00 27.00 27.40 27.30 24.20 24.40 hmmbuild -o /dev/null HMM SEED 508 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.74 0.70 -5.93 13 190 2011-01-19 10:54:33 2011-01-19 10:54:33 1 7 80 0 116 183 0 433.60 38 72.60 CHANGED lhull-VsLRVPulhllDhlhphsh.tuhs.........pp.........................................................................................hphphlthslphlGhls...........usslLhLsp++LlplYhahhuhlLhhsuahlsh.h..l..c.....-thlhls.hph.c.suhh............huhllhtl....lhssca..hhhhhshhhh.Pllhplh.lPh.s............Lhhlsshshhhoshtshhhlhpth.hshphsh.hh.....hthhplaGltuLlpshWp+LpVPslLpVFWlschshQhhs.....hhspp-ssh.....s.pphhhllhshlsssCsoshslLGhosllShlA+hluphhphaLtuh-s.cccp...hGhspuVhhhILALQTGLouLpsccRhlhLuhsLhlllTAlLp.lHpIs-PlLhuLuAS+spShpRHhRsLslChFLllhPlhlsahLhpaashshWLLhVsu.slplsl+VlsoLhhYsLFMlDuaRpp.WEKLDDhlYYV+usupslEFlhulhlhu.GAaphlFtu.....hohIpAshhhlHuYFNIWhcApsGW ....................................h..uhlpVsLRVPslhllDhlap.....h..u......................................................................................................h.hhhlth.hp.h.hGhhl...........SsllLh..Lsp..pp.......L.hphYhah.hshLLhhsuh.lsh........h..............................shal-.hu.h.phsshh................hu.lllph..............hhtht....l.lhShahh..llhclh......lPl.s...............llhhsphshhhssh.llhhhhp.hhh...shphsh.hh.....hphhcl..YGL.sLh.shWpplhhPslhhV..FWLshhshQh.hs........hhsspspsh..........shpphh.lhhs.........lhpsCsosholLGhshslS.lAhhlhhhhhhaltuhct..sc..............................hGhstuVhhhILAlQTGL.tLps.cRhhhLSh.hhlllsulLp.hhphsDPllhuLuAS+sp.Sh.h+HhRsl.lshhLhlhPshhuYhlhpaathshWLhhlhu.sl.ssLpVlsoLhlYsLFMl-tap.p...hEphDDhlYYVpuThpllEFlhulslhu.Gs.phlFtp.....hohhtuhhhhlHuYaNl...aLpAp.GW............... 0 31 40 78 +13548 PF13706 PepSY_TM_3 PepSY-associated TM helix Coggill P pcc Jackhmmer:D2BX46 Domain This family represents a conserved TM helix found in bacteria and archaea. 29.10 29.10 29.10 29.10 29.00 29.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.77 0.72 -4.54 99 885 2012-10-01 23:59:14 2011-01-19 14:09:45 1 19 462 0 319 1056 191 36.90 30 8.04 CHANGED pphhhhlHpWhGlhhuhllhlhFhoGslhha.....ps..p..lsp ......p.hhhlHpWhGLlhuhlLhlhhloGslhha.....cp-ls.......... 0 65 174 255 +13549 PF13707 RloB RloB-like protein Bateman A agb Jackhmmer:C7PC62 Domain This family includes the RloB protein that is found within a bacterial restriction modification operon. This family includes the AbiLii protein that is found as part of a plasmid encoded phage abortive infection mechanism [1]. Deletion within abiLii abolished the phage resistance. The family includes some proteins annotated as CRISPR Csm2 proteins. 27.00 27.00 27.00 27.80 26.90 26.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.54 0.71 -4.63 120 422 2011-01-19 14:48:00 2011-01-19 14:48:00 1 2 358 0 96 390 16 178.60 20 87.08 CHANGED hlIlsEG.p...TE.pYFctl..................t.hthpphplplhstt...............................ssstpllcpstchtp........................................................................tpphDplasVhDt.D.........t.ptpphp.cshpt........scpp............thplhhSNs.sFElWlLLHapths............thsppthtt...ph..pth.........ssYpKsp.................hthhhpphts...AlppAcpltppt.tppt....................ss.sTslh..pLlctlt ...................................hhIhsEG..p...TE..tYFptl..................p.hhhsp..hpl..phhstt........................................psstpllctshphhp....................................................................................................tph.cpl..ahVhDt.D..................ptpphp...p...shph...........scpp....................phphhhSN...sFEhWhLLHFpthst.............................t..ppphhtph.....hpthh.....................tpYpKsp...............thhp.hhpphtp..........AlppApphhtp...pp.t.....................ss.hopl....pllc.l.h............................................................ 0 48 80 92 +13550 PF13708 Methyltransf_27 Methyltransferase domain Coggill P pcc Jackhmmer:Q3IV21 Domain This family contains methyltransferase domains. 25.00 25.00 25.10 25.60 24.20 24.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -10.97 0.71 -4.77 34 854 2012-10-10 17:06:42 2011-01-19 15:25:22 1 11 411 0 44 544 11 171.00 53 64.67 CHANGED thp+slDpphWccLhpcoGhhshMsupt+cpWpcplp..ts...........shPshotcNIhuTFppLhts+pchFp+GllslF+pL..........ShcaKTN..pshtFG+KlIl..ssllp.....hp.ph...uhs.......hshu...pps.pLsDLt+hhtllcG...KPhs-..pRpshs.tthtpthptss..t.............................................t.pshEsphFpl+hFpp.GosHlpF..p+s-Ll-+lNpllAcaYP.ssL ........................................................................s.ITRslDRcIW+cLMpcoG.MholMsupsRDpWh+sLE...D....................shPEISEsNILSTFcQLHpNKs-VFERGVINVF+uL...............................SWs.....YK...TN......sPC......+F...G.....p.KI.....Il..NNLVc......Ws.+W.....Ghp.......L.sG....ptD.pLsDLERMLaLh....sG...KPlPD...NRpsIs...lc.Ls-alpssp....s..........................................................p..ppaEDEh...FpIRYFpK.GouHITF..++.-..Ll-+lNDIIA+aaPshL................... 0 7 15 25 +13551 PF13709 DUF4159 Domain of unknown function (DUF4159) Coggill P pcc Jackhmmer:C6VZA7 Family Members of this family are hypothetical proteins. TM prediction shows them to have two transmembrane regions, with a cytosolic region of about 25 amino acids between the two, and an N-terminus outside the membrane. 25.00 25.00 26.80 26.30 24.00 22.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.58 0.70 -5.21 124 420 2011-01-19 15:53:05 2011-01-19 15:53:05 1 9 358 0 171 402 276 209.10 30 33.09 CHANGED hplAhl...ho...............G-hpsD...........suLpsLsphl...........sppTslcss..t..hsVclss.s-Lh..haPhlYhsspssh.h........ospt..hspLcpYlpsGGhlhhDsp............t.tsshstsh........pc.hphlhs...ssLp.lPs-Hslh+sF...Yhl..........hsutshhh-ttst...................h.hthhsDG....hhsllhsss.DhusuWshspps.pshhshsss..............................AhRhGlNllhYsL ...............................................plAhl..hs..................GchssD..........psuLpsLsphL...........tp+Tulcsu....p...hsV-hsp...D-Lt...haPhlYhshssss..h..........SspthsplcsYh.pp.GGhllhDsp........c.......t.tushstsh..............pchhphlhs....hssLcslPs-HsLh+sF...alh........-.hP....uphputshWsEstst........................sh.hth.sDG....hssllloss.Dhus.AWuhctpssshhsssss.....p+.........................hAhRhGVNIVhYsL.......................................................................................................................... 0 72 126 147 +13552 PF13710 ACT_5 ACT domain Coggill P pcc Jackhmmer:D2BYU0 Domain ACT domains bind to amino acids and regulate associated enzyme domains. These ACT domains are found at the C-terminus of the RelA protein. 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.88 0.72 -4.16 79 4946 2012-10-02 00:29:19 2011-01-19 17:24:13 1 10 3420 7 1010 2625 1922 62.70 40 43.40 CHANGED ppstsLpRllpllR+RGFplsshshpt...tssst....hplplsVpu.pRslch...LspQLsKLhDVhpl .........NcsGsLsRVsGLFu+RGaNI-S.lsVus..oc-ssl.................SRls..l..s..s..s..u.p.-.p.s.l.EQ......lh+QLpKLlDVl+V................. 0 289 629 835 +13553 PF13711 DUF4160 Domain of unknown function (DUF4160) Coggill P pcc Jackhmmer:C6W4D5 Family \N 23.00 23.00 23.60 24.30 22.70 22.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.29 0.72 -4.12 168 574 2011-01-19 17:56:36 2011-01-19 17:56:36 1 4 384 0 179 516 65 63.30 28 72.08 CHANGED Glhlhhah...ppHpPPHlHsphuc....tpAhhslps......hph.hpG..phsp+.pl+hlhpalphapccLh..ppWp ...................Ghhlhhah...p.p.H.pPPHlHsp.hus......hcuhhtlps.........hph...hcG...h..ss+..ph+hlhtaht...+pcpLh..ttWp.............. 0 63 119 156 +13554 PF13712 Glyco_tranf_2_5 Glycosyltransferase like family Coggill P pcc Jackhmmer:C6VYA4 Family Members of this family of prokaryotic proteins include putative glucosyltransferases, which are involved in bacterial capsule biosynthesis. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.48 0.70 -5.10 121 312 2012-10-03 05:28:31 2011-01-20 09:28:06 1 21 211 4 52 1116 294 202.30 38 68.24 CHANGED ILhVsClNs-cl...acQ.Cp+.pIcsL..hVPP..GYlVQlhPIRsA.cSMsSAYNcAlSaPAKYK....VYIHQDsallN...cshhhsLlsLFp-s..E+LGlIGluGAQalPsNGlWWE..GKslV..GK.VIpYpp..psYphhp.hpphh.Y....t.spsFhsVpAIDGLlMATQYDIP........WREDLFpGFHFYDVSQSLEFp+.A.GY...hs...sQ...t...s...h...W...CIHYsu.Dp.h-.s.shhc.ph+p.FVE.+Y ...................................................................h.hlhs.pppth...htp..h.....l.ph....hs.s..sh.....hlph.hs.lR.s.........A..pSMsSuY.Np...Alsp..A...+aK......VYlHQDsallN......pshh...hsLlplF.p.c.p.....-cL.G.....h..I............G..........h.....s.....Gu.p..h...l.P...s..s.....G....l.....Wh.-........up.s...hs........GK.........Vlt..Y....t.p.....h..........a...h..t.....p..phh............tsp...sahsV.psI..D...G.LlMA.TQ.YDls................W.REDLF...pG..F.H.....F.Y..D..VSQShEFp+.A.GY...pl......sp....t.s....h..W.C.I.H......a......st.....-t.......t......s...s........a..c..ht+h.FlccY........................................ 0 10 37 46 +13555 PF13713 BRX_N Transcription factor BRX N-terminal domain Coggill P pcc Jackhmmer:Q17TI5 Domain The BREVIS RADIX (BRX) domain was characterised as being a transcription factor in plants regulating the extent of cell proliferation and elongation in the growth zone of the root [1,2]. BRX is rate limiting for auxin-responsive gene-expression by mediating cross-talk with the brassino-steroid pathway. BRX has a ubiquitous, although quantitatively variable role in modulating the growth rate in both the root and the shoot [3]. This family features a short region, also alpha-helical, N-terminal to the repeated alpha-helices of family BRX, Pfam:PF08381 [1]. BRX is expressed in the vasculature and is rate-limiting for transcriptional auxin action [4]. 24.50 24.50 24.50 26.70 24.40 23.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.72 0.72 -4.50 53 188 2011-01-20 14:44:40 2011-01-20 14:44:40 1 21 25 0 128 193 0 38.00 41 4.56 CHANGED s-Euu.+scuAK.-sIKSLTuQLK-MAc+l...s.uuhc....pscsss .........-EuuKs+AAK.ElIKSLTuQLK-MAc+l...s.Guhc...p.t...t.......... 0 16 77 103 +13556 PF13714 PEP_mutase Phosphoenolpyruvate phosphomutase Bateman A agb Jackhmmer:A1B6C5 Domain This domain includes the enzyme Phosphoenolpyruvate phosphomutase (EC:5.4.2.9). This protein Swiss:O86937 has been characterised as catalysing the formation of a carbon-phosphorus bond by converting phosphoenolpyruvate (PEP) to phosphonopyruvate (P-Pyr) [1]. This enzyme has a TIM barrel fold. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.66 0.70 -5.15 124 2953 2012-10-10 15:06:27 2011-01-20 17:46:56 1 17 1973 94 941 4275 3390 240.70 35 79.80 CHANGED F+p.LH...pts.p....sl........................................lLsNsWDusSAclltps........G...........apA.luToShulAtuh.GhsDu.........ptlshsph...l.stlppIsps.s.s................lPl..osDhEsGY.....upp....spplscs...lcclhpsGssGlslEDp.........t.......tttlh.sh...........pptst+lpAs+puststs...hhlsARsDsalht............tsstls.......-slcRupAYtc.AGADslFlsGh.....pc..sp.l.tplspth.s.......hP..lNlhs.h..st.t....hshscLtplG.VpRlShGsthhcsA....hssh.tpsspplh ....................................................................................................hRt.Lt...pp....p....sl.lsusasuh..sAhlspps.........G.....................apA..la.h.SG.uu.l.A.....A.....S.....h......G.....l......P..Dl.........G.lsohs-l...l.ps...s..ccI..scs..s...s................................lP.l..lV.Dh.....D..sG.a...Gs...........s.h.N.luRT...V+p.h.h.c.A....G.sA.u.lpIEDQh......................K+CG.....+...ssKtll..st..........................-..E.hl.s.+I.+AAh-A+.......scss............alIhARTDAhh.................................scGl-s..AIcR.Ap...AY.......s....-.AGAD.hlF.s-uh............psh..pp..h...cphscsl.p..........sP...lh..ssh..p.....ut.o...............hosc-LtphG.lshllashus.hRAh.pAhppshpt..h...................................................................................................................................... 0 228 531 766 +13557 PF13715 DUF4480 Cna_B_2; Domain of unknown function (DUF4480) Coggill P, Eberhardt R pcc Jackhmmer:A6L3W4 Repeat This domain family is found in bacteria, archaea and eukaryotes, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF07715 and Pfam:PF00593. There is a single completely conserved residue G that may be functionally important. 32.20 32.20 32.20 32.20 32.10 32.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.88 0.72 -3.93 100 11370 2012-10-02 19:08:27 2011-01-21 13:39:15 1 110 309 0 3174 13036 3876 84.30 31 8.98 CHANGED plpGtVh...sp...p.s...ps...ls....usslhhp...ss.p......ps......sh.Tst......sGpFplp....sp.....t.ss...pLhh..sh..hGaps...hp.h.lp....tp.t...p........l.s.lh...L.pp.ssp...p..Lc.E..Vll ..............................lpGpVh....Dp....p...s.......p.P...l.....GAo..Vhlc......Go.s..................pG........sl...T..Dh...................cG..pFslp....ls.......s...s.s......t..Lh..h...Sa..l..G..Yps.....pp...l.p...l.s.......t...........s.....................l..s.lt...L...pp...c.s.p.....t...Lc.E.VVV.............................................. 0 1390 2842 3163 +13558 PF13716 CRAL_TRIO_2 Divergent CRAL/TRIO domain Bateman A agb Jackhmmer:P32525 Domain This family includes divergent members of the CRAL-TRIO domain family. This family includes ECM25 that contains a divergent CRAL-TRIO domain identified by Gallego and colleagues [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -11.05 0.71 -4.30 101 1520 2012-10-02 01:12:42 2011-01-21 18:21:51 1 65 240 9 821 2870 21 140.20 22 13.59 CHANGED hth.h.tup.sppGpsllhhs.uphh..............................................shcpllhYlhpph.pp.....sps..asllh.pssh.................................sp.shshlpph..............................................hphlstt..........................htpp.lptlal.l+ss.....hhh+phht.............................shhphh.stth.ttc...............................................................lhh.................lsslspL.....hphlc....hspL..pls...........tshpa-pp .........................................................................................................hh......tG..sp..u.ps.lls.as..sp.h......................sttp...............shcpl..h..h.....Yl..h...p....h...h.c.................h.s.tps.........ahll.h..pp.th.................................s..p........h...s....h...l+ph.....................................................................................h..p.hlspp...............................hhpN..L+........s.lhl..l+Ps...............hah+ph..h...............................hhhp.hh...ss..ch..thK.....................................................................................l.h..h.................................l...s.....o...l...p...c..L.....tphls.....hpQl..cls......tshpast....................................................................................................................................................................................................................................................... 0 213 322 549 +13559 PF13717 zinc_ribbon_4 zinc-ribbon domain Coggill P pcc Jackhmmer:Q5HCD9 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR, Pfam:PF12773. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.89 0.72 -4.26 74 110 2012-10-03 10:42:43 2011-01-24 12:58:18 1 21 79 0 72 614 889 35.80 31 8.64 CHANGED MplpCspCpspapls-pp.ls.spGt.pl+CspCpphah ......MplpCspCpspapls-cp.ls.sput..plcCspCtpha..... 0 40 57 69 +13560 PF13718 GNAT_acetyltr_2 GNAT acetyltransferase 2 Coggill P, Eberhardt R re3 Jackhmmer:P76526 Family This domain has N-acetyltransferase activity [1,2]. It has a GCN5-related N-acetyltransferase (GNAT) fold [2]. 24.60 24.60 24.60 26.20 24.50 23.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.27 0.71 -4.99 88 1293 2012-10-02 22:59:21 2011-01-24 13:23:31 1 20 1187 2 418 1028 31 180.60 42 24.00 CHANGED chhuLhVuuHY+sSPNDLphlhDAPupplalLh..........................................s.ss............cllssl.lshEGpls.pc...hs.ppshpttpRspGcLlPhslupphtcppFupLsGhRlVRIAscPshpp.........hGhGochlph..lppa..hp..............................................................p..plDalGsSFGhTspLh+FWp+.sGFtsValppstsphoGE+oslhl+s.....Ls ...............................latLLsuAHY+TSP.DLpthhDAPup+hhhht..........................................sts.................clhGsl.......hl.s.cEG.s...LS....pp...ls.pslh.u.G....hR.............RP+GsLlspoLutphsss.tAA.sLpGtRlsRIAVHPshQc.........pGhGpplltthhphh..........................................................................................................................................................................p..plDYLuVSFGh..Ts-Lh+FWp+.sGFl.V+husp+.-suSGpYoshhLhPl.s........................... 0 131 221 337 +13561 PF13719 zinc_ribbon_5 zinc-ribbon domain Coggill P pcc Jackhmmer:Q5FCF8 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR, Pfam:PF12773. 27.60 27.60 27.60 27.70 27.50 27.50 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.94 0.72 -4.37 79 534 2012-10-03 10:42:43 2011-01-24 14:21:25 1 21 487 0 215 626 893 36.50 35 10.00 CHANGED MplpCspCpspapls-pp.ls.sput.pl+CspCppsahh ........hpCPpCpTpaclsssp.ls.hpst..tVRCupCpplFp...... 0 55 128 174 +13562 PF13720 Acetyltransf_11 Udp N-acetylglucosamine O-acyltransferase; Domain 2 Coggill P pcc Jackhmmer:C5B7S0 Domain This is domain 2, or the C-terminal domain, of Udp N-acetylglucosamine O-acyltransferase. This enzyme is a zinc-dependent enzyme that catalyses the deacetylation of UDP-3-O-((R)-3-hydroxymyristoyl)-N-acetylglucosamine to form UDP-3-O-(R-hydroxymyristoyl)glucosamine and acetate. 24.50 24.50 24.70 24.60 24.40 24.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.68 0.72 -3.76 349 2605 2011-01-24 16:19:52 2011-01-24 16:19:52 1 15 2381 24 620 1760 1696 83.10 37 31.62 CHANGED DVPPashssG.s..Ap..htGlNhlGL.+R+GFop..-..plp.tl+pAY+lla+.sG.h.....slp-Al.ppl.pp....hsp.......sscl.ppll...cFl...t..sop.......RGlh+ ...DVPPYslAp....G....N...cA..p...hGlNl.GL.+..R.RG...Foc......-....plpsl+pAY+hlY+....sG..h......slc-.sh..t...cl.tc....spp...........pspV..pthh....-Fl.....p.....pSp.....RGllR.................................................... 0 196 396 518 +13563 PF13721 SecD-TM1 SecD export protein N-terminal TM region Coggill P pcc Jackhmmer:D2BRP0 Family This domain appears to be the fist transmembrane region of the SecD export protein. SecD is directly involved in protein secretion and important for the release of proteins that have been translocated across the cytoplasmic membrane. 25.00 25.00 25.00 27.50 24.80 24.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.13 0.72 -3.75 100 2015 2011-01-24 16:50:55 2011-01-24 16:50:55 1 5 1305 0 300 1070 399 98.20 35 20.67 CHANGED lN+YPLWKYLl..llhllhlGhlYALPNLYGEDPAVQIou.p.p.u.s..p..s..s..s..t..s...pV.pssLcpssIsh..c..ult.l.-s....s..s..lLl.Rhsss-sQLpA+-h....lppsL.....G.c.....sYlVAL ....................W..h..h..l.l.h.sl.llulLYA..l.PNl..aG--PAlQIousp.p.G.s..s...h..s..-..t...s......pVpctLpppsIsh..K....Sls..h..cs...........s..slLl.RF..c..so..DpQ.lpA+-sLppsL.....s.c.....pYlVAL........... 0 49 131 219 +13564 PF13722 DUF4161 C-terminal domain on CstA (DUF4161) Coggill P pcc Jackhmmer:C5BCA0 Domain This domain is found at the C=terminal of most known CstA domain-containing proteins. The function is not known. 23.00 23.00 23.20 23.20 22.00 22.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.76 0.71 -3.83 141 3040 2011-01-24 17:03:47 2011-01-24 17:03:47 1 5 2329 0 528 2010 121 127.00 41 20.10 CHANGED Ghs...huhhapFAlhatAlFlLTolDuuTRluRahlQ-hhu.......h......htppphh.ssthluoslslshhGhllhtG.............shsslWPLFGhuNQlLAulALhlsosh.Lh+ht+t..p.....asa.lshlPhsahhlsThsA ...............hh....huFWYHFAILFEALFILTulDAGTRuuRFMlQDlLG........sh.............ht..c.s.csl....suslluTshsVs.hWGalLapGshD...........PhGGlsoLWPLFGluNQhLAulALhlsoVlLhKhp+p....p................ahW..VsllP.ssalllsThsA..................... 0 173 322 436 +13565 PF13723 Ketoacyl-synt_2 Beta-ketoacyl synthase, N-terminal domain Coggill P pcc Jackhmmer:D2C0A5 Domain \N 25.00 25.00 25.90 25.10 24.80 23.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.44 0.70 -5.08 53 581 2012-10-02 12:25:54 2011-01-25 13:01:22 1 2 555 0 160 516 37 202.70 26 79.93 CHANGED Wp.p.Wup.ss.tth..ss......s.sstPtlstlPsMpRRRhSpho+lulcsuhplh......pt..pss..hlVFuSRHGElpRohsLLpslhsppslSPTuFuhSVHNsuuGhaoIhsppshssTSlAAGp-ohppullEAhuhLp.p.u.sppVLlVshDpPlPphYpsa.s........ppts.hsaAluLlLssGss.hphs..................hpss..ssss..pps.......shs.puL..phlctllssp.....s.ph.shsu.ppppWpWp ................................................................................hsth.s.lP.sh.p.pRRhop.hs+l.ul.p.s.u...h.t.hh.........................pp........t..p.s........s.......llas.Sp.aG-lp+shpllp.s..L..h..s..p..p.....s..l..S..PTsFu.SVHNussG.hoIhtpshsssouluA.upsoappuLhEAhshLpp..u...tppV.Lll.shDp....h......Pph..Yt.............p.s..hsaAlulll.p.s.usp....hp.hp.......................................ht.t.........tt.................tsh....hht.........................h.h................................................................................................. 0 34 79 119 +13566 PF13724 DNA_binding_2 DNA-binding domain Coggill P, Eberhardt R re3 Pfam-B_65234 (release 24.0) Domain This domain, often found on ovate proteins, binds to single-stranded and double-stranded DNA. Binding to DNA is not sequence-specific [1]. 25.00 25.00 25.70 44.70 20.50 19.40 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.76 0.72 -3.82 15 45 2011-01-25 13:21:28 2011-01-25 13:21:28 1 1 14 0 35 46 0 56.70 41 16.04 CHANGED MG.paRFRLSDMMPNuWFYKL+DMpKsRs+sstssstt...................ssouphppsSss ...MG..pa+FRLSDMhPNAWFYKL+DMp......+sRtpsssss.pt...................ssosppppsS...................................................................... 0 4 19 27 +13567 PF13725 tRNA_bind_2 Possible tRNA binding domain Coggill P, Eberhardt R re3 Jackhmmer:P76526 Family This domain, found at the C-terminus of tRNA(Met) cytidine acetyltransferase, may be involved in tRNA-binding [1]. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.23 0.72 -3.91 114 1005 2011-01-25 13:25:22 2011-01-25 13:25:22 1 12 956 2 333 715 7 106.60 31 13.55 CHANGED Fc+Rh.....h.tLhu.hphpp..hs..........................h...ts..............................phsthlo.......................................shDh+RL....csYu.pshh.sacsl...lsh...lscLhhtthht..............hplssh....ppslLluhsLQp+sh-plsp-LsLsup...pt..l ................................................RRch..tLhp.as.hcs.ls.................................hts.....................................h.-ssLo.......................................................spDhp..c..L...suaA.hutt.shhss.....lss...LhRLl.ps.............................tl..........thshLhu+l.pptS.uplsppLp.LsGc..+..h........................... 1 106 180 269 +13568 PF13726 Na_H_antiport_2 Na+-H+ antiporter family Coggill P pcc Jackhmmer:C5B988 Family This family includes integral membrane proteins, some of which are NA+-H+ antiporters [1]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.95 0.72 -4.07 42 1096 2012-10-02 15:12:49 2011-01-25 13:28:17 1 4 1056 0 144 1175 19 88.20 47 20.18 CHANGED NAVllAVhlMLlLSLhR..lpVVlALhluAlVGGLlGGLulspT............lssFs.sGLGGGAplALSYAlLGAFAlAIo+SGLsclL.....Aptllphlu ........NsVlluVl.lMl.lLs.L..hR..lNVV.luLhluALVGGLlu....G.h....u....lsco........................lssFt.s.........GlssG.A.p.l.ALSYAlLGu.F.A.s.AI.S+SGlschLsppllphl.p............................ 0 34 76 118 +13569 PF13727 CoA_binding_3 CoA-binding domain Coggill P pcc Jackhmmer:C5BFR0 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.93 0.71 -4.44 56 4439 2012-10-10 17:06:42 2011-01-25 14:11:05 1 19 2789 2 1096 3621 674 172.50 16 32.80 CHANGED thhslYps.hph+shhpths.clhtuWh.ls..h.h.h.h.hs..lha..t..h..p......s..h...hSR..lalshW.h.hsuh...sh.llhtRhll...tthl...pp.ht+....p...s....thp....htsl....ssss....uppht...ptl....p..pp.ts.u.h..h....h.l.GlaDDc.......s.ssp..s..h.s........uh..PhlGslspllchsRpsclcplalALPlssEpcIhcllpchtspsVsIRlhP ....................................................................................................h....hYp......h...h.th.h.p.p..hh..p....lh.t.shh..hs...h..l..h..h......hh....lsh........h.....h.....p............p.h.......hs..R.......hh..h..h...h.a........h....lhsh......hh...lh...h..hRh...hh.......+h.h.h.....................pp....hhp....................p............p................pts............hh..ls........usss....u...p...t.l..h....pt.l...........p...p.p.s.p....h...s...h..c......ll..uh....h..D-c...................t..s.tt..sp.....h...t...........................sh...s..l...h.G...s...h..p....p.l...............p....h..s.c.p........t..........p.....l...c.p........lhl...A....l.P.....s..t..t.ph.pc.llp.h.pp..h.s.sp.hhhhP................................................................... 0 367 716 919 +13570 PF13728 TraF F plasmid transfer operon protein Coggill P pcc Jackhmmer:Q9WTC0 Family TraF protein undergoes proteolytic processing associated with export. The 19 amino acids at the amino terminus of the polypeptides appear to constitute a typical membrane leader peptide - not included in this family, while the remainder of the molecule is predicted to be primarily hydrophilic in character [1]. F plasmid TraF and TraH are required for F pilus assembly and F plasmid transfer, and they are both localised to the outer membrane in the presence of the complete F transfer region, especially TraV, the putative anchor [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.16 0.70 -4.85 53 761 2012-10-03 14:45:55 2011-01-25 16:40:05 1 4 447 0 91 751 65 167.80 27 71.99 CHANGED GWhWYs-s....p.pc.t........p..tsss...........ss....s....t.ts.................s....t............pph..ph..h+pthpchhspAlhpPo...ENltpahplQchhhc+uspFupsappslhppPpLDYslc.pP...h..sssutpshhp.tcp.pppp.pslppLu.ppaGLhaFYcu.s..ss.hspthusllpsFucpa.GhsllsVShDGshhs..thPpsch...DsGptp.plsl...p..hhPALhLVsPpo........tphtPluaGhhSp--LhcRlh ..............................................................................................................................................................................................................................................................tt.tpp.hltphs..pca..u..lh..F..Fh..pu.p..Cs.hCc......phsPll....pphu...p.p...Y....G...h.....s.Vhsl...o...l..D..G..tsss...............thP.phhs..............spu......hp....p.h..s.l.........h..hhPshhLlssps..........hph..hPls.Ghhs.spl.tp.................................................... 0 24 44 71 +13571 PF13729 TraF_2 F plasmid transfer operon, TraF, protein Coggill P pcc Jackhmmer:D2BTB8 Domain \N 25.00 25.00 26.00 25.00 23.30 24.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.40 0.70 -5.59 29 545 2012-10-03 17:14:37 2011-01-25 17:05:16 1 3 385 0 74 341 4 257.80 36 66.69 CHANGED AshhNPALluttp.p.Dc....hu.lllP..ulGspls..D.DplhDc.hDslpDshDthpsshss.p................tssscLsssLpsLcsspAhussGsuhu.....lulPsphlshuhhsKuYssuhstusl.spsDlshLpsht..................sss.ssLsSputshuusls-lGlAlA+ph.s..hss.hslolGlTPKhQ+lpsYNYssolpsaD..ssD.acs......scapsscouFNlDhGhshphscsaplGlsupNLlupc..l-Tpphp.............................uhp.TYplcPhhTs.GsAaps..chhTlusDlD ................................................................................................................ushhNPA...L.lAhhc.p..Ds......hu.llLP..ulG..hphs.....D..csl.sp.lD.clpD.....p.h-..acpss.s.t..............................tssspLscpLpt....hpspp.hpupsGsulA......sulP.spsl......uh......sh.......hsKuYupshVsupl...sss.shphlcph.................................s..ppthsS.sss....upus.hlo-hGlulAKph.s........huu.pplSlGl.............T.............PKlQ.+lhhY......sYss..o..l....p..sYD....tsD..acs.............sch..s-suFNhDhGss..h...ls....-..paplGlsupNLluR-..I-TKslt.............................shppTYpl+PpsTs..Gsuaps..DhhTsusDhD................................................ 0 11 30 58 +13572 PF13730 HTH_36 Helix-turn-helix domain Coggill P pcc Jackhmmer:C2JR21 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.65 0.72 -3.84 198 1833 2012-10-04 14:01:12 2011-01-25 17:46:11 1 15 1152 0 237 1777 177 53.80 25 20.10 CHANGED pp.Lshp..sphlhhhlhshs..........ttt..sas..o..............pppl.......uphh.......shu.cp.....olpctlppLpctGa.....l ...........................................p.t.tthlhht.Lsshu...............sppsh..saP....o..............hppl.....Acph...........thu.cp..........TlppslppLcctGhl......... 0 81 149 191 +13573 PF13731 WxL WxL domain surface cell wall-binding Coggill P pcc Jackhmmer:C2JPG8 Family The WxL motif appears in two or three copies in these bacterial proteins [1] and confers a cell surface localisation function. It seems likely that this region is the cell wall-binding domain of gram-positive bacteria, and may interact with the peptidoglycan [2]. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.73 0.70 -4.52 83 1929 2011-01-26 11:14:11 2011-01-26 11:14:11 1 25 266 0 120 1070 0 205.50 24 49.88 CHANGED ssAtss....so......sso.sspVpFpss..............sVsPsss.s..s.....s...ss.lsP...DP.....sts........uo..sus......Lslsas.s.s.hsFGppp.Ios...ss...pshhup.........................p.h...t...t.st............ptss......hlpVsDhRG..Tpp.....GWpLospts.p...h..psus.....tp.........pLsGu.p..................lshsssphts.ss.ss...ss...................sPsshss.s......hsls......Gs..........susslhsAssupGt.GsWhhpass...........................p......slsLslPupshK......AssYTuslTWsLssuP ............................................................h...............hto....l.h..s................P.ss.s.............shps...s.......sts..........ss..sGsLslsh.s..s.s...hsFGp.p....Ios...ps...psahsp.....................................................s....p.h....t.sss..............ptss...alplsD.....t.R.G..Tps................GWp.Losp.s..p...Fpsss...........tp.........pLs...Gu..p..................lpht..ss..ss..ss..s...sp...ss..........................sPss.pt....s.........hsls....s..ss......................ss.sslh..s...A......s......p......s.........pGt....Goa..h.hphsp.....................................p......slpL.p...V...Pusssp......utpYpsslTWsLsssP............................................................. 0 60 97 102 +13574 PF13732 DUF4162 Domain of unknown function (DUF4162) Coggill P pcc Jackhmmer:C2JJQ9 Family This domain is found at the C-terminus of bacterial ABC transporter proteins. The function is not known. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.69 0.72 -3.47 129 1360 2011-01-26 13:50:07 2011-01-26 13:50:07 1 3 1176 0 282 884 167 84.40 27 28.07 CHANGED Gslp-lKcpaGc...pp..lhlc....sc...tshp.t.....Lppl.......sGltph......p.p....p.p...ss...h..p..lp.lp.s.p..p..su.pp.lh.ptlspp..u.h.lppFp.ptPSLs-IFlcpV ....................................................Gslp-l+pp..aGp...pc...lhlp........oc....tshpc.........Lp.sl....................sl.pps.........p.h......p...p....pG.....h..p...lp..lc...s..-..s.......su...pc.lhphlspp.uh.lppFp.ptPoLp-IFhpt...... 0 107 202 246 +13575 PF13733 Glyco_transf_7N N-terminal region of glycosyl transferase group 7 Coggill P pcc manual Domain This is the N-terminal half of a family of galactosyltransferases from a wide range of Metazoa with three related galactosyltransferases activities, all three of which are possessed by one sequence in some cases. EC:2.4.1.90, N-acetyllactosamine synthase; EC:2.4.1.38, Beta-N-acetylglucosaminyl-glycopeptide beta-1,4- galactosyltransferase; and EC:2.4.1.22 Lactose synthase. Note that N-acetyllactosamine synthase is a component of Lactose synthase along with alpha-lactalbumin, in the absence of alpha-lactalbumin EC:2.4.1.90 is the catalysed reaction. 21.30 21.30 23.20 22.20 20.80 21.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.83 0.71 -4.80 4 775 2012-10-03 05:28:31 2011-01-26 16:12:21 1 14 122 59 468 672 185 120.50 40 36.04 CHANGED CPphsPlLVGshpV.Fp.VPSLs-IVcpss+l.PGGRa+PsuC.sRs.....RsAIIlPaRuRccHLRlLLY+LH.FLpRQQLsYGIaVIcQtGNGsFNRAKLLNVGhhEALp...aDChhLHDVDLLPEND+NLYsCs.p ................................................................................................h.........t........l...GG.pa.p.P....pCh..stp...........+lAl.llPa.R......s............R....p..c.HLhhhl.aLH.PhL......pR....Q....p....l....casIYVlpQs.s.s..t...h.FNRAtLhNV...Ga..h...EA.h+..c........s.....a..D..C...hlFHDVDLlP.s.D+.NhYtC........................ 1 160 192 326 +13576 PF13734 Inhibitor_I69 Spi protease inhibitor Coggill P pcc manual Family This family includes the inhibitor Spi and the pro-peptides of streptopain (SpeB). SpeB is produced as a 43 kDa pre-pro-protein, which is secreted via the recently described Sec secretory pathway Exportal. There is tight coupling between this inhibitor and its associated protease: the gene for the inhibitor Spi is located directly downstream from the gene for the streptococcal cysteine protease SpeB, and the sequence of the inhibitor is very similar to that of the SpeB propeptide. This is an example of an inhibitor molecule that is a structural homologue of the cognate propeptide, and is genetically linked to the protease gene [1]. 21.90 21.90 22.10 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.30 0.72 -3.88 3 136 2011-01-26 16:26:11 2011-01-26 16:26:11 1 5 74 14 8 126 3 109.20 27 17.66 CHANGED M.........................................E.pF.RopsEAhtlApoFhupssp.................oKspLRlppLS..hPsDT......LaIlAL.s.GGFlLVSGDTRh.slLuho.csNLDhspssV.shlsVFtcQl....................................NFu- .................................................t..........pAhphA.p.ah...tps.t..t......................pp.hp...h..psp.thu.........ss..ss..............shYlaNh...sss.GFVIVSGDcRss.pILGYSppGsh..D..h.s....p..t...N..ltsh..hpta.p........hth......................................... 0 2 6 8 +13577 PF13735 tRNA_NucTran2_2 tRNA nucleotidyltransferase domain 2 putative Coggill P pcc Jackhmmer:C2JK63 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.52 0.71 -4.39 46 1530 2012-10-01 20:28:14 2011-01-26 17:21:14 1 13 1500 6 255 1460 430 149.20 25 36.36 CHANGED hphpspppuWuhLhhtls.h...ppspsFL+sWKhSNchI+pVppllphlphhp.......ppphsth....pl.....YphGpp...hhhhs..ppl....tph...hs.hs............hsh........pplppha..psLPI+.......s++-LslsGtDLlpthshpPGPhlGclLpplEptllpGclsN-cculhpas .................................................................................................................hhh...t..........pp...spthL+.phKhS.N.p.h.h+p...l...ppllp..hhphh............................ppphp.ph.........l...................Yc.h.s.hc...........hhhps....pl...........tps....ht.ts...........................................ssh.....................ptlpchh.....pp..L.....s.....l.+.....................s.p.+..-....l.....s.....lsGscLlpt.h.uh.p....s.GPh.lG-lLpplEtullp.Gpl...p.Npcctlhpa....................... 0 109 180 216 +13579 PF13737 DDE_Tnp_1_5 Transposase DDE domain Coggill P pcc Jackhmmer:Q6D6U8 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 27.00 27.00 27.00 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.34 0.71 -3.91 55 810 2012-10-03 01:22:09 2011-01-26 17:53:35 1 3 314 0 228 802 109 97.70 47 42.18 CHANGED hpRGSLThWlDpch..tW.hs..s.p.G+RGRsptaSDsAIpss.LhlKslFsLPLRtspGFlpSLhcLssLshssPDaSolsR...Rt+slsVslsh+ssstu...lHLllD.STGlKhhGE ...............................hpRGulThWlDtcs..tW.tt...spp.spRGRsppaSDhAIpss.LhlKplFphsLRtspGFlsSlh.pL.hsl.sltsPDYos...lSR...RtKplslsh..............p..s.scu......hHLhlDuTGLKhhGE.................................. 0 16 73 111 +13580 PF13738 Pyr_redox_3 Pyridine nucleotide-disulphide oxidoreductase Coggill P pcc Jackhmmer:C2JNY9 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.64 0.71 -4.26 38 7189 2012-10-10 17:06:42 2011-01-27 11:41:36 1 104 2015 19 2924 28500 10134 196.30 22 41.32 CHANGED hVlGAGssGhuhs.stL....hcp..sh......s.s......llllD....pttp...sGup...at...p..h..h..........spl..hp.....P.uh.h........shs...h........shtth.shsst.s...th....h...tph....sous-.l..s.p...........Yhpplhcph....hl..s.......lp..htspls.tl...p....ts................s..st...ap..lps..ps..t..................plp...sctll.usG...hhtp..P..ph..s..hsu................s....t....ps.l...ths....pl..hsh..t...........c.....h..ts.....pphsVlGuG...+oAhssshtLhct.....u...p......c...lshlt.ppsshh ................................................................................................................................hllGu.G......uG.luhu...hp.L........ppt....Gh.......p................................hhllE.....................p..t....t...p.........h.G..us..........Wt.........p.....h.......................................................................................h.....................s...s.........................................................th....t.h.....s.h.t........................................t............pt.h......................ss...t....s....c....l.......h....p........................................................................................................Y..l...p....p.....h.......s...c..c.a............sl...p...............lp...h..s...s...c.....V....p....s.s......p.......hs................................................s..sp.......ap......lps.....pss.....................................................php.......uch.llh.AoG.....h.h.sp.....P...p..h...P.s....lsG.........................................t.....t........ph..h........H..s.s..............pa......pps....h..........................s.......h....p.u..............++V.s.VlG.uG..........s.SA.......h..p.hs.....p..lspt..............u......t..............p..lshh.Rp...................................................................................................................................................................................................................................... 0 721 1687 2444 +13581 PF13739 DUF4163 Domain of unknown function (DUF4163) Coggill P pcc Jackhmmer:A7HM60 Domain The structure of this domain is and alpha-beta-two layer sandwich, identified from a Fervidobacterium nodosum Rt17-B1 like protein. The function is not known except that it is found in association with Heat-shock cognate 70kd protein 44kd ATPase, Pfam:PF11738. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.40 0.72 -3.23 48 615 2011-01-27 14:33:03 2011-01-27 14:33:03 1 13 479 2 148 539 7 102.50 19 37.12 CHANGED slphpphp...hp..s..shhp..h.plphPhlps.hp.spphppplNphl..cppstph..............hpc.........h....c....ct.upc.....th.......cpshs........hs..Ythps.....saplphs....ssshLSlhh.shYpYo.GGAHGhTs ............................................................................s.........p..p...hhp..h.plphP.hp.....s...t..spp......hp......p...p....l..Nphh...pp.p.spph..........................hpp.........h....p....pptpc...........ht.............pps............hs...aphps.....saclphs....psslLSlhh.shYpYs.GGAHG..................... 0 60 110 125 +13582 PF13740 ACT_6 ACT domain Coggill P pcc Jackhmmer:Q6D7R4 Domain ACT domains bind to amino acids and regulate associated enzyme domains. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.02 0.72 -4.26 82 3060 2012-10-02 00:29:19 2011-01-27 17:00:36 1 22 2494 6 650 2293 345 75.40 27 39.29 CHANGED phLlIoshGpD+PGlssplsplluptsssIlDsp.AhltsphoLhhLlpss.........ts..sh..splppsL.hhupchsl....tlhhp ...........hlITllGtD+sGIlsslophlupt.s.....sNI..hDh..p........s...h....l....s......s....t....F..o..h..h.hlls.ss............ts...sh....s..tlc..ssLtth.utplsl.l.h................................................ 0 198 402 547 +13583 PF13741 MRP-S25 Mitochondrial ribosomal protein S25 Coggill P pcc PfamB-B_2836 (release 25.0) Family This is the family of fungal 37S mitochondrial ribosomal S25 proteins. 25.00 25.00 25.10 25.90 24.50 24.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.54 0.70 -4.89 31 140 2012-10-03 14:45:55 2011-01-28 09:39:44 1 4 131 0 108 137 0 205.30 38 81.68 CHANGED M..KlphpAspVhppsos.lpu....GhlpppPsWaslVushPPsp..phsRp.hhpp..sppphtph...tp..................hhcsp.pth..+pppssplapss+lpa.EDpLRchFa+pHPWELuRP+lllEss....Gc-..tchDWS+.hpQhsKsLDGESVVQRsLaLlps.....pshohhcAYDhARhEFY+LRhpEEl-ppVAtEEAchaGAsFusopl-hGhphEpchl-sWcphApppoplhpuc ................................thphpAhp.Vhpps.t.hps.....Gh.h.....pptP....sWhsllsslPPsp..hhsRp...pp..stphhtp.......................................hps.p..t.....ppppspclapP.c.lpY...EDpLRp.Fa+-HPWELuRP+lllEss.......GpD..pphDWS...+.lp..Q.G+.LDGE.............SVVQRpLaLhps.................tshohtpAYDhARtEFYpLRhpE-lEp+VAtEEAchhGAhFG.s..tlphGhphEpp.h-pW+thAtpcsphhpt.t......................... 0 28 59 92 +13584 PF13742 tRNA_anti_2 OB-fold nucleic acid binding domain Coggill P pcc Jackhmmer:Q5FCF9 Domain This family contains OB-fold domains that bind to nucleic acids. 27.00 27.00 27.00 27.00 26.60 26.80 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.21 0.72 -4.14 80 4104 2012-10-03 20:18:03 2011-01-31 09:50:30 1 5 4053 0 822 3044 1031 97.40 34 22.09 CHANGED hhols-Lsshl+pslcs.s...sphaVpuElSshpt...ps.GH..hYhsLh-............sp..Aplpushapsphptlp.........hp...............................psGhcVllpuplsaat.hG.....hslhlpclcs ............hoVopLsphl+thl-p..c..h..spValpGElSNhpp...sS..GH..hYFoLKD............cp.......Aplp..sshF+..ss..sp+ls...........................Fps..........................ccG.pVlVpuclolYEspG...........sYQlhlcphp............. 0 260 536 693 +13585 PF13743 Thioredoxin_5 Thioredoxin Coggill P pcc Jackhmmer:Q5HAE0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -10.83 0.71 -4.71 36 777 2012-10-03 14:45:55 2011-01-31 10:34:12 1 1 761 6 119 791 73 177.00 31 71.20 CHANGED YlFlDPlsscCaphEstlpKLth-.....Ysphhpl+alhhspLpslsttttp......................hshssssh..hpp.hsssY.uuLAhKAApLQG++tGtpFLpcLQctlhlp+pslop.-llhphApps.GLDl-.FpcDhcSshApcuapsD.+lupEMsVppsPThVhFN..ps.c-pGlplpGhhsYcla.p .....................................................................................................................................YhFhDPhsssCaclcshlh+Lph-.....as..p...h..l...p...l..R....a..Ihs...s.....s.l..p..s.lsps.tp....................................ths.p.h.....p.pss..s...s..h..as..s..u.L....A..hK...AA.c.L...........Q.........G..+.c......cu.c.c...F.L.c.tl...Qptl.hlppps.hss.......ph...lhch....h......pss......G.....lDl..-....hF+c...Dh...p...o...st.h.p.c.uhp....p.Dh....+lA.pE.MpIpptPolVhFs...ps.c-pGlhlpGhhshchh........................... 0 33 73 99 +13586 PF13744 HTH_37 Helix-turn-helix domain Coggill P pcc Jackhmmer:Q5HBM9 Domain Members of this family contains a DNA-binding helix-turn-helix domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.09 0.72 -4.17 55 1158 2012-10-04 14:01:12 2011-01-31 10:40:40 1 9 818 4 307 2718 211 79.70 24 74.93 CHANGED lapDhuhscucphp.....hKupLhht....ItchlcpppLoQpcAAphhslspPclSplhcu+lsphSl-pLhshLstLGtcl-Islp ...................................................pttt...t..............h+s.p.lh..ht..........lp.phh.cp..pt....h...o..QspsA.phhGloQPpl.Sc.l.p...t...+...h....s....p...h...s...l...ssLhphl.s.t.h..Gt.clcl.h........................ 0 67 178 251 +13587 PF13745 HxxPF_rpt HxxPF-repeated domain Coggill P pcc Jackhmmer:Q6D9B1 Domain This family is found in non-ribosomal peptide synthetase proteins, and can occur up to twelve times. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.81 0.72 -3.67 1348 6665 2011-01-31 11:02:57 2011-01-31 11:02:57 1 1095 1165 1 2334 7456 120 92.30 29 5.06 CHANGED sHQDlPFEpLV....-t.L..p.s.p....R.shu...+.s...PLFQVhhsh..p......s..s.....s.......t....s...t.........h.p.l.....sG.......L..p.lp...sh...s...h..s....t..s.....su+FDL....slpl.tE....ps......s.......s....lpsshpYso-LF-psTlpch ...............................tHQDlPFEpLV.......-t.L..p.....s..p..............R...sh...u...........+..s...PL.FQlhhsh.p..............s.t......s...............t.....s.t.............................h.p.l........su..............l...p..lp.....s.h........s.....h...s.......s..s......................su+F.....DL....slpltE......ps..........s...........................s..lpsshcYso-.LFctsTlpph............................ 0 609 1300 1915 +13588 PF13746 Fer4_18 4Fe-4S dicluster domain Coggill P pcc Jackhmmer:Q5FDF3 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.46 0.72 -3.55 40 1675 2012-10-03 08:56:43 2011-01-31 11:20:17 1 15 1374 0 499 3018 1748 74.80 36 17.46 CHANGED lChahCPasR....................hQ.usMhDp...colslsY....................c.ht.hp........................scs..........................ct....ttpp....hcphp...........................pst....shGcClD..CstClpVCPsGIDIR .................................................................................hChahCPasR.............................hQ..ushhDp.......so..hh.ls..Y.................................................................................c..hc.tt........................tpt...........................p........hh+t...hcpht...............................................c.tt.....shG-ClsCstCVplCPsGIDIR...... 0 164 331 422 +13589 PF13747 DUF4164 Domain of unknown function (DUF4164) Coggill P pcc Jackhmmer:Q5HAR4 Family This is a family of short, approx 100 residue-long, bacterial proteins of unknown function. There is several conserved LE/LD sequence pairs. 27.00 27.00 27.00 27.10 26.80 26.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.84 0.72 -3.85 32 182 2011-01-31 11:46:12 2011-01-31 11:46:12 1 3 165 0 69 156 20 88.10 45 84.00 CHANGED ssssplptAhpRLcsAlspLEsAl-pRh-tccst......s-hcscl.ptlssDRuRLApELDputsRsp+LccsN+ElucRLssAhEoIRuVLsc ................s...stLcpALcRLcpAlssLEpAV-hRl-.p-pch........uEhEpEl.Q+hsADRSRLApELDpu-uR.u.cRLEtsNREVS+RLs....oAMETIRuVLD................ 0 12 39 48 +13590 PF13748 ABC_membrane_3 ABC transporter transmembrane region Coggill P pcc Jackhmmer:Q6D8M5 Family This family represents a unit of six transmembrane helices. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.37 0.70 -5.33 20 130 2012-10-02 13:23:42 2011-01-31 14:29:18 1 3 117 0 24 160 15 207.20 47 78.77 CHANGED L+pIs+patt+LhlTasLVlAENsLhlhYPLhuGFAIsullsGsstpAlhYuslVllhWllGuuRRtlDTRsFuRIYscLAVsVllsQRppstssSolsARVsLSREFVDFFEpHLPhLhTSllSlsGuslMLLslEFhlGlusLslLhhhhhllspas++NppLat+LNNcL.....E+-VsllspsptpsLp+HYchLu+LRItlSDREAhuYhhIGlstulLFshslhhhohpssssAG ......................................LKtlsppa+K+LhhTF.LVshENl.LhLhYPlhuGaAIN....A....l.......l.s.G.p.s.......hpA....l.h....Y.A.ll..V...llh....WllGAARRhs....D....TRTFs....RI....Ys....cl.........AVsV....l......l.p......QR......p.......p..........p..h..spS........sls........AR......V.....u......L....S....R.E.F...V.s.FFEcHLPhhhTSllSlhGAslMLLllEFWlGluul...s...ILshhhhl...LPpFsth.......s-pLah+LNNpL............E+-schlppu..st..ppLhRHYshluRLRlhlSsREAhuYLslGhuhulLFuhshshhohpshsoAG.......................................................... 0 5 10 18 +13591 PF13749 HATPase_c_4 ATP-dependent DNA helicase recG C-terminal Coggill P pcc Jackhmmer:B0G2L3 Domain This domain may well interact selectively and non-covalently with ATP, adenosine 5'-triphosphate, a universally important coenzyme and enzyme regulator. 25.60 25.60 25.70 25.60 25.40 25.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.56 0.72 -4.19 156 1792 2011-01-31 15:44:31 2011-01-31 15:44:31 1 38 1018 4 435 1610 115 86.20 24 18.82 CHANGED GsplplplacDRlEIpsPGslhsslshcp...hhst.s......spsRN.hlAslhpch.............s.......h............h.......E......ptGoGlp.+lhpthcp....tthstPpa.p.s...ss..sp..hp..Vsl ..........................................lplpha....sD.RlEltsP..Gsl...sloh.....-p.....hhst.............stsRNshluplhpph.............s.......h.............h.......E......phGsGlp.+.lhpthpp....hth...tPph.p..p..pt..st..h..hh......................... 0 146 311 382 +13592 PF13750 Big_3_3 Bacterial Ig-like domain (group 3) Coggill P pcc manual Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 27.00 15.40 27.10 15.40 26.90 15.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.96 0.71 -4.80 8 166 2012-10-03 16:25:20 2011-01-31 16:04:29 1 38 89 0 42 589 311 132.50 27 12.60 CHANGED ssaphsFshssLP-GpYollt.sApDpasN...ssspoht.sltlDsTsPolsl....ttslu-GuslpGLEsLcIoLsDshssu..sLoSlsLsGGPssDpVpLoWsstGcshYtLpYPRlFPSL.csGEoYTLTVsApDstGNssspossFpYhPsNLlplcsL ...................................................hthpas.h..p.tls.-G.pY.s.l.s.s..tApDthsN.....sss.p.s.ht.....p...l.......s.h.D.s..T.s..P...sl....sl........tss...s..s..ss..s.....l..s.l....-...s...l....h...I.s..l..s..D.shs.s...pls..ph...t..L..hGG.ss...s......-..t..l....pl.sh........u.ps.h...a.h...pYs.h...h..F...P..s.........h..p.....s.p..Y..plss.s.DttuNhhp....ts.tFpY...P............................................................. 0 6 17 34 +13593 PF13751 DDE_Tnp_1_6 Transposase DDE domain Coggill P pcc Jackhmmer:B0G2Q7 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.66 0.71 -4.12 60 2925 2012-10-03 01:22:09 2011-01-31 17:05:27 1 17 1173 0 565 4140 496 101.80 25 32.07 CHANGED hCPtGpph.phptp.thtt..t.p..s.hs...thht.aps..p..sCpsCPh+ppC..sps........pp..sRpl...ph.p.p....h.pp.h.t.pcsccp...h...poc.ttpphhppRs.slEsshupl+pphuhc+h.ph.RGhp+sphphhhsshuhNlc+lh ..............................................................p..t...............................t......................t...Ct..t..C....h........C................................t.....hh......hp.........pt..h...t.tp.hp..pt..........h......ho..t..t....s.p.p.h..ht....p.Rp.slEtsF.uphKph.h.Ghc.+.h..ph....RGh.pp..............sphphhlshhuhNl+Kh...................................... 0 215 392 444 +13594 PF13752 DUF4165 Domain of unknown function (DUF4165) Coggill P pcc manual Family \N 25.00 25.00 25.00 95.80 24.60 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.54 0.71 -4.36 10 99 2012-10-03 16:25:20 2011-01-31 17:26:11 1 4 41 0 8 59 2 123.90 43 11.92 CHANGED ssApAplhpYSFTDTsusp+olpPuo.salNPs..oslolsL.uGLDRhl+loVh+ouush.hhoosTo+lhsAschlossGs-YYGKclsLPAL.uEGsaoL+sElLsssGssVuopsYslsIDTTuP .s.sApAplhEYoFpsssGsp+olsPss.sYlNPs..uslolsL.uGLDRhl+loVh+usGo..lhSTsTo+lhsAschhussGp-YYGKclsLPAh.uEGsaolcs-ILs.sussVsTssYshtlDsTPP....... 0 0 0 3 +13595 PF13753 SWM_repeat Putative flagellar system-associated repeat Coggill P pcc Jackhmmer:Q6D875 Repeat This family appears to be a repeated unit that can occur up to 29 times in these outer membrane proteins. It is putatively associated with a novel flagellar system. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.64 0.70 -5.38 14 4913 2012-10-03 16:25:20 2011-01-31 17:42:37 1 401 519 0 1126 8114 1151 233.00 21 45.74 CHANGED TPss..pp..ssssh..plsls...lpDssGNss..s.s..o.....ts.hs.lDTc.......PpVsls....Iocscl.sutpssssh...oFotssos.hsssshh.hsss.....s.Gsh.....GuL....o......ls.osG.p.Wossh...TP.........ps..slpsu.-soI.pVs..hVpDstGNu..............s..su..sossh.oIDTt.......PpVoVo.............Ioss....cltuGpssTsTFTFsEsV..o.GFstsDs.sho.....sGThG.LssVG...oDGhsWosshTPps..spss-s....slpVs..As.VpDAtGNAs...ou.....S.s....sa.olDT......ps.Pplolsh...uDshls....usEsus.Thshotsls.shsssDs.h.....sos.pG..s...hss.T.....sVss...su..p.aosshssts..stssssT .................................................................................................................s.................................st.................................h....h...............s..............ht.......................s...............................s.............h......t................s....h..p..................h........t..s..s....t.....t.l....l.h............s.G.t....................h.s.h............s.........hs...ssG.....s...Woh.sh........ss....................................s........sL.ssG...s.h...s..l....sss........ss..D.hs....G.N.s..........................................................s...ss.........o.h..s.h..sl.D...st...................s..s......l...s...l...s..s..................................................................................................hs...ss.............s.l.h..s..s...s...p......h.......s..........s....h.s....s..t.s......s..s.h......t.s....s...p....h....l..p.lsh...........sG...t....h...h.....ts.s...st............s..s.G....sW.s.h..s.h....s.s..s.........t....h.s.su.............sh..s..ls.....ss....spD.huGNss.....os......s..................sh...s..l...D..s.............th...s.....s.....h.....t.....h.....s.....................t........t............................................................................................................................................................................................ss....................................................................................................................................................... 0 251 639 891 +13596 PF13754 Big_3_4 Bacterial Ig-like domain (group 3) Coggill P pcc Jackhmmer:B0G3A4 Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 30.80 30.80 30.80 30.80 30.70 30.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.88 0.72 -3.64 416 7056 2012-10-03 16:25:20 2011-01-31 17:55:29 1 431 622 0 1020 10654 674 55.10 32 17.22 CHANGED Gp.s.h...s..s.ss....s.....ss.....GsWo...h.....s.....s.....s.....ss...s.......-G....s.a..s..lo..l..suoDsAGNs.u.ss..s.s.s.....l..s.l..Dos..sP .........................................................s..s.s.s.....s........ss...GsWo....a......T.....s.............s........ss..hs............-G.....s.a...s..lo..l..sAoD.s..AGNs.u.ss.......s.s.h.s...l...s..l......DTps.............. 0 174 349 751 +13597 PF13755 Sensor_TM1 Sensor N-terminal transmembrane domain Coggill P, Eberhardt R re3 Jackhmmer:A3JX63 Family This domain is found at the N-terminus of the sensor component of the two-component regulatory system. It includes a transmembrane region and part of the periplasmic region, which is likely to be involved in stimulus sensing [1]. 21.90 21.90 22.40 22.60 20.80 21.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.40 0.72 -4.47 43 271 2011-02-02 10:44:03 2011-02-02 10:44:03 1 3 271 0 92 230 114 76.40 43 13.15 CHANGED RtRRuhhslptSsLTR+IlshNLlALslLVuGlLYLNphR-uLltpRspuLlopuclIAsshtAp...ussssshsosDs .........h..t+hhhphhhSSLTRRIlhlNLhALslLVuGILYLNQFRpGLI-A+lpSLhsQucIIAuAluAS...usssss.hhhDs............... 0 27 58 69 +13598 PF13756 Stimulus_sens_1 Stimulus-sensing domain Coggill P, Eberhardt R re3 Jackhmmer:A3JX63 Family This domain is found in the periplasmic region of the sensor component of the two-component regulatory system. The periplasmic region is likely to be involved in stimulus sensing [1]. 29.70 29.70 30.20 29.70 29.30 29.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.46 0.71 -3.68 35 203 2011-02-02 10:46:24 2011-02-02 10:46:24 1 3 203 0 75 162 67 111.60 43 18.97 CHANGED lsPEpVuPlLRRLhsPT.poRARlYDt-GpLlh....DSRsLh.....spuplhph-LP.Psc.sppsshhcphhphhpphh.ts...............cLPlapE.hssssGptasEVtsALsGp.hssslRhspcG .......INPE+VuPlLRcL...ISPT.pTRARIYDppuslLL....DSRsLY...........upGtVlRaDL........P...Plc..s.c..........p.ss.lh.......ERhhshlpphh.su...................sLPlYpE.tssusGtsY...EVhpAL.s.Gs..tsshRhsp+G........................................... 0 23 45 55 +13599 PF13757 VIT_2 Vault protein inter-alpha-trypsin domain Coggill P pcc Jackhmmer:A8MTC4 Domain Inter-alpha-trypsin inhibitors (ITIs) consist of one light chain and a variable set of heavy chains. ITIs play a role in extracellular matrix (ECM) stabilisation and tumour metastasis as well as in plasma protease inhibition [1]. The vault protein inter-alpha-trypsin (VIT) domain described here is found to the N-terminus of a von Willebrand factor type A domain (Pfam:PF00092) in ITI heavy chains (ITIHs) and their precursors. 23.00 23.00 23.10 23.00 22.90 22.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.48 0.72 -4.52 10 116 2012-10-10 13:59:34 2011-02-02 13:41:04 1 10 54 0 56 544 59 76.80 45 8.33 CHANGED PGLlNhpotss...............LPLpuSclsuClpGhuLuhTAoLTYtNspstsl-Gs.FlaPLs-sssVlGF-AhluuRhlssplppcs .................................................PGLhN.tohss...............................LPLosSsVsuClsGhsLulTApLTYtN..p.s...p.P...h.-Gl..FVYPLsE.spsVsGFEAh.lus.RhVohQlps+.u.............. 0 11 18 34 +13600 PF13758 Prefoldin_3 Prefoldin subunit Coggill P pcc Jackhmmer:P43573 Domain This family includes prefoldin subunits that are not detected by Pfam:PF02996. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.10 0.72 -4.22 23 73 2012-10-02 17:27:01 2011-02-02 13:51:44 1 5 73 0 60 162 0 97.40 41 15.65 CHANGED pcSlh+W+ph.u...EY-uLK-ElssL.............scsuop--llcluR-FsGoLVsEcElctILGc...pp...slpRo+pQVl-lloRRIDYVppNlsTlEKRlcsAEs+Ls ..........................................pcSLhHWphW-AEY-uLKEElpsL.....................sssp--...llc...Iu....R..-...F..s....GsLVsc+ElctllGc...pp....thpRotpQllshlsRRlDYVppNlsoLEKplcsAEs+L............ 0 10 27 47 +13601 PF13759 2OG-FeII_Oxy_5 Putative 2OG-Fe(II) oxygenase Coggill P, Eberhardt R re3 Jackhmmr:A3JXF3 Family This family has structural similarity to the 2OG-Fe(II) oxygenase superfamily. 27.00 27.00 27.10 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.37 0.72 -3.73 92 313 2012-10-10 13:59:34 2011-02-02 14:23:49 1 52 193 4 101 350 4039 98.90 26 31.52 CHANGED ssWsslhp.pGshpssH...hH...ssu...hlSGshYlpss.p..ss............u......shph.ssc........hshh..hs.s.s.h....t.....t...t....tshhh....lpPc.s.GplllFPSaLhHpV.hs.tuc..tpRlSluFN ............................t.hWhsh.hp.pG.shpssHhH....ssu...hlS.....GshYlp.hPp....ss...........................u......shth.ssc..........tthh....hs...t.s.........t......t..t....h..tshhh....hpPp.sGplllFP.SaLhHt...V...s.......t......up.......p.....pRloluFN.................................... 0 39 72 89 +13603 PF13761 DUF4166 Domain of unknown function (DUF4166) Coggill P, Eberhardt R re3 Jackhmmer:A3JYW7 Family This domain is often found at the C-terminus of proteins containing Pfam:PF03435. 23.00 23.00 39.70 39.60 22.80 22.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.14 0.71 -4.36 56 283 2011-02-02 14:29:42 2011-02-02 14:29:42 1 4 268 0 77 250 15 170.60 31 72.13 CHANGED LsstlpchHuh....tss...s..hhpGpsclt..tusp...hhs.....+h.lstl.....ht......hP.....psupplPhplphps.....sss...up.....pWpRpFst.........ptFcSph.h...st..s...sshlhEthG.....s...hthpls....lps....p.s.GuLchpspp....hph....hG....lPLPth.Ltspupsp...Ethc..-......spapFcVclphPhlGh.lhpYpGphp .............LtPtlpcpasl.......psshshpGphcph..huus.....hhs.....+h.lhtl.....ht.......hP.....cpGpclPhslpsps.....pts..sp.....pWsRpFhh.t.h.+hFsush.h......st..p...........pstll-ahG.....t...lthpLs...lps..............c.p.GulphpSpc...hh......hG...hlPLPpa...Lh..spupsh...EphD..-......ppF+hcVpVpsPll.Gs..LhpYcGpF... 0 18 47 62 +13604 PF13762 MNE1 Mitochondrial splicing apparatus component Wood V, Coggill P pcc Jackhmmer:P24720 Family MNE1 is a novel component of the mitochondrial splicing apparatus responsible for the processing of a COX1 group I intron in yeast [1]. Yeast cells lacking MNE1 are deficient in intron splicing in the gene encoding the Cox1 subunit of cytochrome oxidase but do contain wild-type levels of the bc1 complex. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.68 0.71 -4.12 10 32 2011-02-02 14:48:45 2011-02-02 14:48:45 1 1 31 0 20 30 1 146.20 25 23.00 CHANGED s.hpsShhGslhsNLpshpcaIpsHhsYhpppshspshpslFlNslLsHlslapNaouhlphLcsl........phlpssshs...sahcssoF+lIhpSl.SNSsSuKlsuhtLasaLKp..pcl.........plTscsYpsLlpusL+G....ta+-sl.FYlYcYLpsa ....................h.s.phusl.hNLpshpcalpph.p.hpt.pshppsh+shFIsslLsHls.stpsashhlsllcpl.........hL.ssslh...s.hcssoa+hlh+uh.SpssSsKhshhtLasaLpp...sh.........phospsahphlpsshph......p-hh.Fahaphlhs.................................. 2 2 9 19 +13605 PF13763 DUF4167 Domain of unknown function (DUF4167) Eberhardt R re3 Jackhmmer:A3JZ71 Family \N 25.00 25.00 35.50 35.50 20.60 19.70 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.78 0.72 -3.96 45 295 2011-02-02 15:58:01 2011-02-02 15:58:01 1 1 292 0 94 262 1061 82.10 52 35.05 CHANGED RuR......s.pss..........p....spt.ppss..Ns..hsR..sa...-S....sGPDs.KlRG..oApplhEKYppLARDAtuuGDRVhAENYhQHAEHYhRllsst.ptptp ...............................sRs.pss...........s....Np.s..+pss...Ns.....hsR..sa....-S....NGPDl.KlRG..sAQpIhEKYtpLARDApuSGDRVhAENYhQHAEHYhRllsuAptp........................ 0 26 59 71 +13606 PF13764 E3_UbLigase_R4 E3 ubiquitin-protein ligase UBR4 Coggill P pcc Jackhmmer:B3KMT2 Family This is a family of E£ ubiquitin ligase enzymes. 23.00 23.00 41.20 31.10 20.40 19.80 hmmbuild -o /dev/null HMM SEED 802 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.72 0.70 -13.40 0.70 -6.66 16 189 2011-02-02 16:43:37 2011-02-02 16:43:37 1 17 115 0 133 204 7 616.10 39 20.09 CHANGED MspNPYSSs-.ulGPLMRDVKNKIC+ph-LluLLEDD.GMELLVsspIISLDLsVpcVYEpVW......................pts.......tp....................ssPMslsYRhpGL.G-ATEphI-pL...pssp-EppDsEtpaphAulh.p-CGGL-shLshltplp.c......hpusp-hlshlL+LLhaCsKl+pNRptLLp..lGALshLLcshppAassss.......htluEplL......................hIhEollpEAs.pssluts.s..hpsss..............................s..pchlthFL-pls.....ss..hh+sNtp...hctlsRllPhLoaGcspsMcsLlcaF....cPhL.p....F-phDp...............c+.s......s-p.............p..ht..lEsFs+lu-ulcssssGc+LK-hIlppGIsptAhpYlccphsss.....tpp...s.SsEWtphLp+PSLPhlLphLpGLupGH.sTQpt.lc.cshI.slLHtLEpVou.-pcIGoLAENLLEsLuc.ppt.....lsc.....plpplRctT+tEp+RhAhtpREchLpsLGMch....sp...tG..pl..l.su.ssllcshE-lc...E.E-GLsChVCREGYsh+PsclLGlYoFoKRsslss...........sp.spt.pts.....................hsYTTVSHFNllHapCHpsAhR....Lpps....+cEWEuAuL+NupThCNsLhPlhGPpVspusaspslspahsslpslup.tsssphph..hs.aDItLLLsRFApptSFSsDs+GGG+ESNh+hlPahlphshaLLsp.......osstp..R..c....p+s..ltualo.s....sspphhps................shph.....-ss..ahhVhSLhs.pSh-pWpppRhsaLpRhltpuah...pah.pstsssch......................s.pspcp....aslh+PhLlaauLl-pl.phFKhth....................o.ssst-sW.hshpE+LppN.pthlsts+clLphh.-EhhsspDhpEhhDlsG ....................................M.tNPYsS.c..hGP.LMRDlKNKICpph-hluLl-DD..G.MELLVssp.Ilu.LDL.....slt.p..VYcplW.......................................................................................................................................h.p..tp..............................s.sMplhYRhpGL.G-ATE.hlc..pL.........ps.p.p.......pcp...c.E..athsssh.tpssGLphhlphlttlp.p.........................hpp.s..p..lth...l.....lcLh.hssKl.chspp.Lhp....hsslshhLtsh.phsh.stt...........st.sEplL.....................................lhE.lhtcus......tpsht...t.....h.t......................................................................s..p.l.hhlpphs...........ss..hhcps..ph..hptlhRllshLsaGp.ttMthLhphF....pshh.p....apphDt........................pp.p......cp.................p..hh..l-.Fshlstul.........p..p.ss...GtpLKshll.phGlh.ptuhp.YhtpphPsh....................................hph....s.us.hpphlp..pPuL.hlLphLpG..........Luh..t.H.s............TQ........hh...l......s..p..............p...l.s.lHtLEtVu.......u..p.......ptlGs..lAENLL.-sLpp.p.t.............................hspplpth....RctT+tEp+chAhthRpc.LttLGMph....sctG...pl...ssp.sshlpphcc.l.......-..EsGLs.ChlCREGYphpPschLGlYsFo.KRssltt......................hp..p...ptp.............................................................sYoTVoaFNllHhpCHhtAhR.....htps+cE.........W-uAsLpNspTtCNslhPlhG.P............p...Vstusassslsca.s.lpp.h..s........t..tpt.....php....h..aDltLLhhRFAht...tSFpsDstGGG.pSNh+hlPahl.hs.hal.lsp.....................ststtpp......tpt...l.s.alp.t..s..pt..ps................................................shp...-ss..ahhh.uLhh..s..cpWpt..+hthLp+hlhhu.h...pthts.....t.........................................tt.p...a.sh.h+s.Llahull-.l.phhhph........................................................................s.ssp.ttW...ht-hlt.ss..hhthscphlphh.--hhsspshtEhhDlhG.............................................................................................................................................................................................................................. 0 62 81 111 +13607 PF13765 PRY SPRY-associated domain Coggill P pcc Jackhmmer:B0V264 Family SPRY and PRY domains occur on PYRIN proteins. Their function is not known. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.27 0.72 -4.43 191 3648 2011-02-02 16:53:56 2011-02-02 16:53:56 1 109 99 11 2020 2534 0 48.80 38 9.97 CHANGED loLDPsTAaspLhLS-Dt+.pVph....sppt...p.shP..-sPcR.Fsph..spVLupcuF ......................lTLDPsTAaspLhL.S..-Dp+.pVph.................sppp...............p...shP....-sPc..R.Fsth..spVLuppuF................... 0 51 592 1270 +13608 PF13766 ECH_C 2-enoyl-CoA Hydratase C-terminal region Coggill P pcc Jackhmmer:B9A058 Domain This is the C-terminal region of enoyl-CoA hydratase. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.55 0.71 -3.95 180 1675 2012-10-02 13:07:06 2011-02-02 17:12:08 1 13 1322 3 662 1647 343 119.80 30 31.65 CHANGED sL..ttptstI-chFu.....t....s..olpcIlssLcs.........c....s.s....p.....autpshcsl.ppt..SPhShplshc.lcc....u+..p.........h.o.lt-shphEhpluhpshp....p.........s........DFhEGVRAhlIDKD..+s...P.c.....Wp..sslp-..Vs.ss..hVsphFs ........................................tphshIsctFu........t....s..olp...cIlps...Lcp................................s...s.s....p.....aApp....shc.s....l.tpt..SPhSl.plohc...t.........lpc.....ucp.....................hoLt-shphEhplutphhp.....p.........s........D..FhE.....GVRAhL.....lDKD..ps........P..p.......Wps..sslp-..Vs.sp..VpthF...................... 0 192 370 537 +13609 PF13767 DUF4168 Domain of unknown function (DUF4168) Eberhardt R re3 Jackhmmer:A3JZQ0 Family \N 25.80 25.80 26.10 25.80 25.20 25.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.50 0.72 -3.35 72 184 2011-02-03 11:39:14 2011-02-03 11:39:14 1 3 138 0 75 191 5 81.90 23 54.79 CHANGED hocpplppaApAhlplpslcpchhpc.lps..................sps...................p.pphppl...tpcAppph..............l.........p......tl.cssGLolppaNpIsptspsDs..pLpp+l .........................................ospplppaApAhhplptlppphhpc.lpp........................sps....................................t.sphppl.......tpcuppph..............s.........phl.pssGLo...lpcaNpIsptsQsDs..pLpp+l...... 0 26 61 70 +13610 PF13768 VWA_3 von Willebrand factor type A domain Coggill P pcc Jackhmmer:Q3UR50 Domain \N 27.00 5.20 27.00 5.70 26.90 -999999.99 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.94 0.71 -4.53 14 1260 2012-10-10 16:07:06 2011-02-03 14:36:20 1 77 395 0 741 6397 466 152.20 22 18.63 CHANGED tclVlllDsSss.hpsp.....hhppulshhlcs..Lsspc.plsllshGs..ps..hh.ss....hhshssttlpthhthlps...hps..hGusslhsuLc...tsh....p...t........ttsthhcpllllo.cGs...t..ss....tcs..p.lp.p.t...tc..hphhshuh...ss..phs.s..shLptLAphupGthph ..........................................................................................t.cllhll.DtSuS.M.........p.......u.................hphs...+.......p..s.......l...h...h.....h.....l....cs.............L.......s...................p.......s.......p......F........N.........l..l..s..F......us..........p.h...p...t.h.....h.....s.p................................h..t...h......s....p....p...s....h..p..p...A..h.p...h.lpp................lps....hG.....u...T....p......l....h.....p.....s.L.p............tsh.........p......h...............................ttt...s.p...t....l.h.l..l.T..DGp.....sp......sp.......................p.p..l..h.....ph...l....p...p...tt..................tp.........h+..l..a..o...hul..................Gp............sss...t..........thlp...tlAphst.Ghh..................................................................................................................................... 0 248 375 539 +13611 PF13769 Virulence_fact Virulence factor Eberhardt R re3 Jackhmmer:A3K275 Family This domain is found in conserved virulence factors [1]. It is often found in association with Pfam:PF02985 and Pfam:PF08712. 25.00 25.00 25.60 32.20 24.30 23.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.60 0.72 -4.12 53 419 2011-09-19 00:13:31 2011-02-03 14:42:30 1 9 419 0 60 256 220 81.70 45 25.41 CHANGED lha+sIPhQVhlspGcc....ps.+htLP-RFppAls+As....ttssDshlt-h+hs-..sst.G-hp.-lAcssspcl-AsYsp-RLcpLl ....hF+sIPhQlKLTsucp....Eh.RhtLPppFh-shspA.....tpssDNlVh.RKWl-..usRYGshE.ElhcsVlEEllAsYsEppLshLV....... 0 15 36 46 +13612 PF13770 DUF4169 Domain of unknown function (DUF4169) Eberhardt R re3 Jackhmmer:A3K2D0 Family \N 25.00 25.00 32.60 32.50 18.60 17.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.72 0.72 -4.03 82 226 2011-02-03 15:43:01 2011-02-03 15:43:01 1 1 226 0 73 181 5 54.80 44 82.85 CHANGED u-llNLpphRKp+sRsp+ctpA-pNRspFGRTKAE+phscscsc+spcpLDu+.+h ..u-lVNLRphRKp+uRsp+cppA-pNRlpFGRTKsEKshscspsp+Ap+hLDtpRh.. 0 19 42 52 +13613 PF13771 zf-HC5HC2H PHD-like zinc-binding domain Coggill P pcc Jackhmmer:Q5W0A5 Domain The members of this family are annotated as containing PHD domain, but the zinc-binding region here is not typical of PHD domains. The conformation here is a well-conserved cysteine-histidine rich region spanning 90 residues, where the Cys and His are arranged as HxxC(31)CxxC(6)CxxCxxxxCxxxxHxxC (21)CxxH. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.92 0.72 -3.84 144 1247 2012-10-03 17:27:21 2011-02-03 15:57:56 1 127 218 0 775 2477 15 83.60 31 6.32 CHANGED HhhCuLassplh.ps.stt........slpsl.pphsppthphpCth..Cc........pp.....G.AslpCsttsCppsaHhsCAtpss.hhhp......hp.................t...........................phpsaCppHs ....................................HhtC.hl...aS.stlh.pptst...................t.lt.sl..cp..tlp...c..upph.....cCs.h...Cp.........................ch........G..Aol.u.........Cs....t.p..sCp...psaHhs........CAhp.s...t...sh.hp.........tp....................................................hphhC.pHp......................................................................... 0 191 283 499 +13614 PF13772 AIG2_2 AIG2-like family Coggill P pcc Jackhmmer:B3KMN7 Family This family is found in bacteria and metazoa. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.77 0.72 -3.87 26 714 2012-10-02 16:39:48 2011-02-03 16:07:47 1 11 571 11 289 659 306 83.90 29 41.81 CHANGED cVaGsLaclshpshpsLDppEuVp...pGhYh.lplpV.....ps...pss.pp.lhsRsYhlss...p...ss....s....................PSppYLplllcGAhpsGlPpcYlctL ...................................................................pVaGlLacl.s........p..........-hpsLDphE.G.s.........hthYp.+.hp.lp.V...................ps....tsG.p...l..Ahs.Ylhs.s....h.......ps.....uh...........................P.SspY...Lshl...hc.G...Acps....GhPpcYlp.L........................... 0 97 180 239 +13615 PF13773 DUF4170 Domain of unknown function (DUF4170) Eberhardt R re3 Jackhmmer:A3K3X5 Family \N 25.00 25.00 26.40 25.60 21.10 24.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.24 0.72 -4.07 34 216 2011-02-03 16:14:24 2011-02-03 16:14:24 1 3 213 0 76 162 34 68.60 62 81.17 CHANGED pQLLHLVhGGELp..c..hss.spF+DLscl-lVGlFPsYssAhsAWKucAQpTVDNAcMRYFIlHlHRLLDPs .....pQLLHLVFGGELp..c..Lss.spF+DLpslDIVGIaPDYtSApsAWKuKAQpTVDNAHMRYFIVHLHRLLDPp...... 0 20 45 55 +13616 PF13774 Longin Regulated-SNARE-like domain Coggill P pcc Jackhmmer:C9J9A4 Domain Longin is one of the approximately 26 components required for transporting proteins from the ER to the plasma membrane, via the Golgi apparatus. It is necessary for the steps of the transfer from the ER to the Golgi complex [1]. Longins are the only R-SNAREs that are common to all eukaryotes, and they are characterised by a conserved N-terminal domain with a profilin-like fold called a longin domain [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.97 0.72 -4.43 180 1355 2011-02-03 16:49:18 2011-02-03 16:49:18 1 14 344 17 898 1268 11 84.60 23 37.83 CHANGED hu+.pllp+ls.......ss...s...+tohp.pss.ahhHalh...p......su.................lsalsls-c.sa.s++lAFsaLp-lpc-Fhp............pas..........t.phtsss...sauh.......hpFssh ....................................uctlhp+ls.......ps.....................s+hohp..pss...ahh..H...ahs....p.................su........................lsalsls.-c..sa..scclAFsaLpcltc-Fhp...............pas..........p..phtss.....shsh....pFs.............................................. 0 294 502 724 +13617 PF13775 DUF4171 Domain of unknown function (DUF4171) Coggill P pcc Jackhmmer:B0QXZ9 Domain This short family is frequently found at the N-terminus of Homeobox proteins. 27.00 27.00 36.40 36.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.73 0.71 -3.96 4 63 2011-02-03 17:52:59 2011-02-03 17:52:59 1 2 34 0 25 53 0 128.00 76 23.90 CHANGED DTAMDLLKAITSPLAsGS........KPSKhhup.ssuSSspScS+KE............HH+KhGsuSo......DssSH+SKK.h.hasss...EsLTLREPDGLKMKLILSPKEKu..........pSSssppuhth.uppAosKKsSKKpuR-Ept DTAMDLLKAITSPLAAGS........KPSKKTGEKS.Su.S....S....S......HSESKKE.............HHRKKlSGSSGEL...sLEDGuSHKSKKMKPLYVNT...ETLTLREPDGLKMKLILSPKEKG..........SSSVDEEuFQYPSQQATVKKSSKKSARDEQG................................................................................ 0 2 4 10 +13618 PF13776 DUF4172 Domain of unknown function (DUF4172) Eberhardt R re3 Jackhmmer:A3K4N3 Family The family is often found in association with Pfam:PF02661. 25.00 25.00 27.10 25.80 23.90 23.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.45 0.72 -3.88 93 374 2011-02-04 09:14:32 2011-02-04 09:14:32 1 4 341 0 117 349 80 79.70 41 22.14 CHANGED aIWQpscWPpFpWDsppltshLppsphpQGtLlGph..pslu.s.pppstL-sLsp-llcoStIEGEpLstpSVRSSlAR+LGl .........aIWQpscWP........pFpWDtstltshLppsphppGhLlGch......psls.stppptsL.-sLhpsllpS..StIEGEtLNhtSVRSSlAR+LGl........ 0 29 64 91 +13619 PF13777 DUF4173 Domain of unknown function (DUF4173) Eberhardt R re3 Jackhmmer:A3K0W3 Family This domain of unknown function contains multiple predicted transmembrane domains. 24.10 24.10 24.40 24.40 23.60 23.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.45 0.71 -5.01 54 229 2012-10-01 23:40:40 2011-02-04 09:19:05 1 2 227 0 76 246 7 185.70 26 36.61 CHANGED LsllslLahlasslQlshLa..GG...ssl...ss.GhoYA-YA+pGFapLlhsslLshlhlll....s...pth....hp.....c......sp.....l.l.+sLlhlhsshslllluSAhhRhpLYlssYGLThhRlhshhahhhlul....slllllhplh...tthsstalhptsh.hssssslh.shuhhN.-shIAchNls+..t.....pssplDhpYLs.s..Lu..scAhPultch. ....................................hhlshlahhFshlQhshLa..uu......tth...ss.uh.oY..u.pYA+pGFapLlhlslLshsllhs....s.....phh....sp.....c......p+.....h.l.+hlhsll.sshoh.lllhSAhhRh.slYlptYGhThhRlhshhahhhlsl.hhllllhplh....+th.....tthh........hpt.sh.hs.shsshh.hhshh........s.-thlAphNlpp.....................thtthDh..Ylt.t..Lu...sAhshl....h.................................. 0 32 57 66 +13620 PF13778 DUF4174 Domain of unknown function (DUF4174) Eberhardt R re3 Jackhmmer:A3JZK5 Family This domain of unknown function is found in a putative tumour suppressor gene [1] and in a ligand for the the urokinase-type plasminogen activator receptor, which plays a role in cellular migration and adhesion [2,3]. 27.00 27.00 32.10 33.70 25.30 26.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.38 0.71 -4.00 85 508 2012-10-03 14:45:55 2011-02-04 15:36:30 1 7 241 0 222 467 183 122.10 31 45.45 CHANGED ssLspatWppRslllFAssssDsphppQhphLpp..pt...ssLs-RDllllss.sssssps...........................sLRpphc....sps...FthlLlGKDGthKlR....tstPh.sscclhcsIDpMPhRppEh .........................LspFth+pRlLlloAPsssshhap.QhshLpp..st...CslstR+lsllpl.hssssppss.....................p.hss.th..h.pLRphhpls.....pt...FshlLlsKDGpsKpc....astPl.shptlashIDshPhRppEh..................... 0 17 53 116 +13621 PF13779 DUF4175 Domain of unknown function (DUF4175) Eberhardt R re3 Jackhmmer:A3K2P3 Family \N 26.40 26.40 26.60 26.70 26.00 26.30 hmmbuild -o /dev/null HMM SEED 820 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.60 0.70 -13.46 0.70 -6.84 55 303 2011-02-08 14:41:21 2011-02-08 14:41:21 1 2 262 0 113 278 125 681.10 30 90.41 CHANGED ERlh.shWPlhollhlhLuhshhGLaphlssthhhss.lsl....hsluhl.suLhhslh+aRhPsRsEALsRLDt.s.......LstRPlsALtDs.AhGssDssutALWpsHQpRMAsphuph+ushPc.+luc+DPaALRhlAlLhlls.A..hha..u...u..h...hRluslsshhsu.sus..s..u.s.usph-uWlpPPsYTGcPslaL..............s...sts...............ss...........slsVPpGSplolRhhG..........t.su...slsls..........ps.sos..p.s.............ss.ssss...........................s...........sp.......pcFs..lsp.sGslslp..usu.....uc....sWplsllPDpsPpIshss.sPct.sspGshplsapAcDDYGVssucApIs.hsh....s.sssc..p.Ghssc......P.........hhcs.plsLsLPhsusRsshstthlcDLocHPWAsh.VplTLsApDuAGQpGpSpshphhLPuRpFhcPLAtAlIEQRRsLhhstssss.cVsplLcAlohpPEc.hhtstssYLtLRshhpRL-......tu......ho--shc-lsstLW-lAltlEDGsLusAcc+LRcAQ-+LpEAlcpGASD.EEIscLMpELRcAhp-Yh+pLAcptppssp..pt..tps....p....ssp...phoQpDLpcMhDRIpELhcpGchspApphLcpLQpMMENhQssQ.....sQ...t..........ts...p.u.ttp...QuM-pLu-hlRcQQpLpDcsFRphQ-.p...p.....s.......QpGp........p.pspp.sp.............s.pt.Gp...s.Qt.................sp...pttp....sp....stp..........psppsp.........tt......p....p..ttps..............LuccQpsLp.ccLpct.p.ppLsstGsp.uttup-uLscAscAMcsApcALtcGchspAlDpQucAh-ALRcGhcsLuEtM.....tpp..........ptpt....pGt....tspt....t.Gp.....ts..s..pspDPLGRstssp.G..ssss.psh.lssE.shpRAR-lL-ElRRR.G-ssRPplEhDYLcRLLc ............................................................................................ERhh.hhh.hhhllslhhuhuhhGlhthhP...hh....phhhlsh.hshshl.hsl...hsh.p.h.R....Pppt-h...tRl..-..t.ss.....tLsppPlss.p...Dp.A...ss...tcsh....u......A.LWptHppRhttpltplpss.....h..P.c..c.hstpDPauL.Rh.hshlhhls.A..hhh...u.......s.t....tRhupshs.h.ssts......h......ssp..l-.uWlsPPtYTGpsPlaL.....................s...sts...................ts...........slslPtGShl..s..lRhh.G..........tss...thshs......ts..st...t..............t....stt...............................................................s...hp.....hph..p..lpp.stsltlp..u.s...st........pWphsshPDpsPplthst...tP...c...t.....t...h..p.G.p.hpLs..aphpDDYGlsp.upuplh.............s..............t....p......P................hts.phsLsLPhtsscps....h.........pDLopcPaAG..VtlsLsspDsAGppGp.Stsh.hhLPtR.F.pPlAtAlhEpR+.Lshstppt..pshphLpAlh.htP-t.hh.s.shaLtLtshhppLp...............hu...hscp..thppssshhWplAltlE.....p.....G..s.....lu..tApcp..LRtAQptLppAl...p..........p..........s.....A......u...p........pEItcLht...-LRpAhpcahpt.hApp.tpssp.t.....t........tst..hppp-LppMhcplpphhcpGptstApphLp....plpphhpNhp...hsp............sp..p........................ttpsphp...pthp...pLs-hh+cQQphhscoa.pt.pp.php...t............p.st........p..ttt.t....................................tt............................................t.......t....t.....t.............tt.....th.....t....p....t.pp...............................htppQptlp...ppLp.p....pt..h.t.thp..............stps.....hscAtpuMtsAtpALt.p..sp..hstA.stQupAl-uLRcGtpphhptM.....tpt..................................ttt.....t..........stt....t.s......................tttttpDPLGR......t...sp.s...t..sp.psh.lssc..shpRARcIL-ElRR+.upt.pPp.EhcYLpRLLc........................................................................ 0 35 71 88 +13622 PF13780 DUF4176 Domain of unknown function (DUF4176) Eberhardt R re3 Jackhmmer:D3E5E1 Family \N 25.00 25.00 25.10 25.30 23.20 24.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.60 0.72 -4.26 56 651 2011-02-08 14:46:40 2011-02-08 14:46:40 1 3 430 0 56 331 1 72.40 40 70.15 CHANGED lLPlGSVVhLKs..usp+...lMIluRt.hh....psppp...haDYsushYPpGhhs..-pshhFNc-DIpcVlFpGYpD--Ehpat ..............slGSllhLcp....sp.pp...............lMIlsRt.lh....pspph.............haDYsushYPhGhlp..-plhaFNc-sIcpVlFcGYpDp-EhpF.............. 0 17 34 45 +13623 PF13781 DoxX_3 DoxX-like family Eberhardt R re3 Jackhmmer:D3E5H6 Family This family of uncharacterised proteins are related to DoxX Pfam:PF07681. 25.00 25.00 25.40 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.10 0.72 -3.66 76 297 2012-10-02 13:32:46 2011-02-08 15:41:34 1 3 292 0 87 289 15 100.70 27 34.60 CHANGED uhlWlhsGllshhlhs.st.phplLsph.Ghssthushhlhhsu.l.h.-lshGlhhlh.thpp.+.hshhhpl.h.ll.luhhlhsshhhPphahcPasPlsKNlsllsLth ...........................shlWlapGllsth.....lhstst.phplLsth.shss..thu...hh..hlhhsuh.h-lhhGlhhLh...hhpt....+.thhhhplh.ll.luholssu.hhtPthahcPFsPlstNlslhsls........................... 0 21 41 65 +13624 PF13782 SpoVAB Stage V sporulation protein AB Eberhardt R re3 Jackhmmer:D3E7E2 Family This family of proteins is required for sporulation [1]. 25.00 25.00 27.10 26.10 22.30 21.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.34 0.72 -4.08 43 225 2011-02-08 15:45:58 2011-02-08 15:45:58 1 1 225 0 34 157 7 103.60 50 80.58 CHANGED lssLGllPRhuplT+TtcplhhYEsslllGullGshhsla.chpl.h.....ut.hllslhGLhuGIFlGslAsALsElLNVhPIhuRRlplpptlshllhulAhGKslGSLhaahh ....lslLGllPRLsplT+ohcpl.haEhullhGslhGshhslh.phsh.h.....................up.ahLlllGlFsGhFlGMLAAALTEVLNVLPILAKRlGlc.spIllLLhAlVLGKllGSLFaWl........... 0 10 24 26 +13625 PF13783 DUF4177 Domain of unknown function (DUF4177) Eberhardt R re3 Jackhmmer:D3E8A6 Family \N 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.10 0.72 -4.34 50 606 2011-02-08 15:50:16 2011-02-08 15:50:16 1 2 570 0 167 416 98 52.20 41 82.95 CHANGED paEYcsltl.ht.tts.......tphcplLspaGp-GWELVpllss.......t.t..t..shhshhKRp ................................pWEYtT.lPlhs..+uo.................cp.......ILspaGpDGWELV.pllsu...................st.-phluYhKR............................. 0 58 133 159 +13626 PF13784 Fic_N Fic/DOC family N-terminal Eberhardt R re3 Jackhmmer:A3K4Q6 Family This domain is found at the N-terminus of the Fic/DOC family, Pfam:PF02661. 25.00 25.00 25.00 25.50 24.90 24.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.70 0.72 -4.12 115 562 2011-02-10 11:07:32 2011-02-10 11:07:32 1 9 506 2 173 510 92 83.30 34 23.18 CHANGED llphlspAspALucLcuhsphlP.Nsslll..sshshpEAhhSSpIEGT.oTh--lhph-tsts.s........tss-scEVhNYhpALptGhcp ..................................hptlhpAptuLucLcuhuphlP.N.s..lLl....sslshpEAhtSScIEsh.hoThD-Lhphptptst...............ssss+EVhsYtsALptGhc............. 0 67 135 157 +13627 PF13785 DUF4178 Domain of unknown function (DUF4178) Eberhardt R re3 Jackhmmer:D3E9P9 Family \N 25.00 25.00 25.00 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.82 0.71 -4.19 54 460 2011-02-10 11:12:14 2011-02-10 11:12:14 1 3 375 0 119 366 29 142.20 22 59.83 CHANGED LtsGDhlph.....s.ut..sahV..pGphp.....h..p.pushpWtEahLp..s.sspttWLslE........--splphslhpphsshs...............sssplph....-GppaphsEpGsAp...hpupussshtsu....sphpah-Y..tusss.phLuhEtass........ph-hspGphlsst- .............................................Lt.Gshsph..........s.sh..papl..hGphp....................h.......p...ps..t..tWh..EahLp..s..spshtaLplE........-cs.ph...thplhsthssht........................................psssphph.........csp.p.Yph.p-p.....hpup.....sssp.spt.s..h..tsu.......pphphh-a....pusst..thLshEh.ss...................ph.h.pGc.l....t.................................. 0 36 62 96 +13628 PF13786 DUF4179 Domain of unknown function (DUF4179) Eberhardt R re3 Jackhmmer:D3EAG2 Family \N 25.20 25.20 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.04 0.72 -3.76 49 573 2011-02-10 11:15:32 2011-02-10 11:15:32 1 7 242 1 126 525 5 99.40 20 25.09 CHANGED p+h.cp...+p....htp.tth....sh..s...sslh....lh.....l...hs....o......ssaAt.h........ts....l.hph..h.p..p....pGhpp..sh.pptaupt..lsp....oho.spGlplTlscllhDcsplhlhYplc.spc ....................h...pp....+t....hhhpts........su..s....uslh.......lh........h....hs.........s...........sshA.ssls................hhss.....lhph...h.s..tc.............pG.lpp.............sh.pt..th.s..pt...l..sp................ohp..spGlslTls-lhhDs.splsltYplcpt............. 0 57 114 117 +13629 PF13787 HXXEE Protein of unknown function with HXXEE motif Eberhardt R re3 Jackhmmer:D3EB69 Family This domain contains an HXXEE motif, another conserved histidine and a YXPG motif. Its function is unknown. 25.00 25.00 26.40 26.40 24.50 24.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.71 0.71 -3.72 142 380 2011-02-10 11:30:58 2011-02-10 11:30:58 1 2 312 0 94 360 151 114.00 21 63.39 CHANGED halh.llahlH.phEEhhh....h.tahspph.th.....................hhosptFhlslhhhhlhhllhshlsth...........hhhhhhlhh..hhphl.hHlh.s.hhh+p..YsPGlloulllhlPhulahh ....................................hlh.hhahlH.phEEhhh........h..a.hpp...th.th......................hhsspshhlslhhtallhhlhshhshh.t.........hhhhhhhta...hhphl..hHlh.....s....hhh+p..........YsPGlhTullh.lPhulhh.............. 0 33 66 78 +13630 PF13788 DUF4180 Domain of unknown function (DUF4180) Eberhardt R re3 Jackhmmer:D3EBG4 Family \N 25.00 25.00 36.20 36.10 23.70 22.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.34 0.71 -4.43 45 213 2011-02-10 11:34:24 2011-02-10 11:34:24 1 3 206 0 58 178 5 109.50 43 79.20 CHANGED hc.psshplshlsucssllsstp-ALD.Lluss.aptssstlhlstspls--FFsL+TtlAG-ILQKFlNYcl+lAllGDhSta..sSpuL+DFlaESN+GpplaFlssh-pAlp+L ............h...sshplshlpsspslIsssQsALD.hhuss.aEtsscplllpcshloEDFF-L+TpLAG-ILQKFlNY+lKlAIV.GDFShY..sS+uL+DFIaEsN+GpclaalsocppAlc+L.......... 0 25 48 56 +13631 PF13789 DUF4181 Domain of unknown function (DUF4181) Eberhardt R re3 Jackhmmer:D3EC26 Family \N 25.40 25.40 25.40 25.40 24.90 25.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.20 0.72 -10.65 0.72 -4.13 31 197 2011-02-10 11:38:06 2011-02-10 11:38:06 1 1 123 0 28 151 0 94.60 34 86.50 CHANGED lhhhhh-phl+KKL....sltKpthh...caVNphHchlEhhl..hllhllsh..hhhhhh.c........h.hthhhhhahslhhshRuaMEWKY.s+-oKc...Yllolhthh...hlllhuhhhhhh .................hhhp.hlR++L....sh.K..ptha...caVNphHhhhphhl..hhhalls...hhhh...p........h.h.thlhhhahhhhhshcu.aMEWKY.D+ESKEYllSlhshh...hllhhullhhh....... 0 3 24 25 +13632 PF13790 DUF4182 Domain of unknown function (DUF4182) Eberhardt R re3 Jackhmmer:D3EDS0 Family This protein of unknown function contains a number of highly conserved cysteine residues, which may form disulphide bonds. 26.00 26.00 35.60 35.40 25.90 25.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.01 0.72 -4.16 13 277 2011-02-10 11:50:17 2011-02-10 11:50:17 1 1 126 0 21 82 0 38.50 73 89.38 CHANGED MGTIVCQsCssTIsaFE-EKVTsLYGpCsp.C-Ccppcc ..MGTIVCQsCEGTIuHF.E.D.EKsTVLYGKCGo.CcCc+cEc........ 0 3 12 14 +13633 PF13791 Sigma_reg_C Sigma factor regulator C-terminal Eberhardt R re3 Jackhmmer:D3EDV4 Family This family is the C-terminal domain of a sigma factor regulator, this may represent a sensory domain [1]. 25.00 25.00 25.80 27.10 24.80 24.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.76 0.71 -4.18 37 457 2011-02-10 11:53:04 2011-02-10 11:53:04 1 6 316 0 49 336 1 146.30 29 45.71 CHANGED plpclspsplsEsulSFD+sYohcE..lpphhspt..........sWhhlcTts-ppppt.t........................ttpshGat....spt.ph....tpp.tp...cpFl....stLchhscppphsphhs............hppphchlpp.pslclhGlllTGpsc-ltpLtspshl+uuslGss ...................................Lpch..pspVsEVAlSFDKsYohpE..lpphlPsshp........lWhh..lsotpc.pcpps.t....................................sttshGFt.........ph.p....p..........t.....p..p..pp...ppFh........ctLc.phspp.pct....p..t.p.............................htpphc....hcp..pslclaGlllTGpocphppLpspsal+uAslGsh................................. 0 8 27 38 +13634 PF13792 Sulfate_tra_GLY Sulfate transporter N-terminal domain with GLY motif Coggill P pcc Jackhmmer:C9JKR6 Domain This domain is found usually at the N-terminus of sulfate-transporter proteins. It carries a highly conserved GLY sequence motif, but the function of the domain is not known. 25.90 25.90 26.80 26.80 25.80 25.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.49 0.72 -4.31 377 6864 2012-10-03 01:44:59 2011-02-10 11:55:28 1 50 2950 0 2486 5940 1281 81.00 30 13.90 CHANGED lP...hl..p.....W......l.p....Ys....p.hl.tsDllAG....lTluhhhlPQulAYA.h.lAG..lP..P.hGLYuuhlssllYulaGoS+plslGPsA.s.h.o.l.l.l.us.s.l ........................h.t........hp.......p.ph...ttDll...AG.................lsVullh.lP..............u........lAa......A..l..luG.........ls...........PhhG...L...Yuuhls.sl....lhu.lhGuSp..thls...........G.Psuuh.ulllssh.h................................... 0 663 1334 2000 +13635 PF13793 Pribosyltran_N N-terminal domain of ribose phosphate pyrophosphokinase Coggill P pcc Jackhmmer:C9JDH0 Domain This family is frequently found N-terminal to the Pribosyltran, Pfam:PF00156. 25.00 25.00 25.10 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.32 0.71 -4.35 429 6720 2011-02-10 13:49:44 2011-02-10 13:49:44 1 18 4765 46 1992 4453 2459 117.60 47 35.76 CHANGED lplFoGsus....pLAccIuctL.........................Gl...Lu..p.......splp+FuDG..........EltVpl..........pE....oVR..GpDVFllQoT.s..........s..............P.lN-sLMELLlhlDAh+RASA.p+ITAVlPYaGYARQDR...................K.sps........RsP.........IoAKL.......VAsllpsAG ..............................................................................h+lFuusus....pL.Ap.cl.App...L....................................................ul..t..LGc......sslp+FSDG..........ElpVpI...............pE.....oV.R....G.p...D.V........FllQST.s....s..............................................P.sNDsL.MELLIMlDAh+R....A.SA..pp..I....o......sV..l..P....YaGYARQ.DR...............................+..s+u........RtP.........IoAK.L.......VAshLpsAG................................................................................................... 0 654 1213 1657 +13636 PF13794 MiaE_2 tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE)-like Coggill P pcc Jackhmmer:A0JYP2 Family \N 21.50 21.50 21.50 21.80 21.40 21.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.07 0.71 -4.80 3 288 2012-10-01 21:25:29 2011-02-10 15:50:20 1 1 287 4 74 200 13 176.30 42 79.12 CHANGED sRYccuVVDLLGALAYGELSAFERLAEDARhAPTLsDRAsLA+MAuAEF+HYEhLcDRLAuhGl-sE-AMcPFVAAaDsFHupTcPuDWLESLVKAYVGDolAADFYREVAcaLDsuTRELVLsVLDDTGHouFAcE+VRAAlAuDPRLuuRLALWGRRLLGEALTQAQRVVAERDALuoLIlGG ................................................................t.......h.cLhuhl...AYuElu.AF.RLsc-.u.chAPsLssR...ht...lAsMAuAEh.t.H.act.LRstLsc.R.Gs-s..htAM.pP.asuu.l-sa+t...T......s..P...psW...hEu........LVK.sYVuDuLAuDFYpclAssL...s.sp.s+sl.Vt.u....s....L....s.-..TGpupFss....ucVRuAls...A.c.sp.+uRLuLWuRRLlGEAlopAQhlh.Ac+ppLsshlh..s........................................ 1 24 59 71 +13637 PF13795 HupE_UreJ_2 HupE / UreJ protein Eberhardt R re3 Jackhmmer:A3K636 Family These proteins contain many conserved histidines that may be involved in nickel binding. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.78 0.71 -4.73 99 405 2012-10-02 18:22:22 2011-02-11 10:42:20 1 4 310 0 155 468 1125 151.10 36 51.33 CHANGED alhlGhcHll..pGhDHlLFlluLlh........................hhtph+cllhhVTuFTluHSlTLslusLshls.....ls...ut...llE..slIulSIlhtAl-Nl...............hhshh.thphhhshhFGLlHGhGFAshLp.phslspss..hlhsLLuFNlGVElGQlhllshlLhlhhhhpcth ....................................alhlGhcHll..pGhDHlLFLlu..llh.............................hhtph+c.llhhVTh.F.TluHSlTLhl.ushshl.s............ls...sh...llEslIulSIlahA.l-Nl....................thht.ht.tphhsshhFG..LlHGhGFAshLt..-.h..s...ls..p...s..........s..l.lhsLluFNlGVElGQlhhlshlLhlhhhhpp.h............................................. 0 58 109 136 +13638 PF13796 Sensor Putative sensor Eberhardt R re3 Jackhmmer:D3EFN7 Family This family is often found at the N-terminus of proteins containing Pfam:PF07730 and Pfam:PF02518. The N-termini of proteins containing these two domains often function in stimulus sensing. 26.00 26.00 26.60 26.50 25.90 25.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.60 0.71 -4.67 87 465 2011-02-11 10:52:36 2011-02-11 10:52:36 1 7 224 0 205 504 14 172.20 21 46.00 CHANGED allhuhshulsshshlhss.lslus.slslhhl..G.lslLssslhss+shuthERtt.spthh..sh.plstP........st......suhhstltshlp.-suoWRslhahll.phsluhlshhlsssh..hshulshl..shPLhhhhhsss.hs.hh.............hhlsohspuls..hsh...l.Gllhl.hlsha..ls.sslsp....hpuhhs+uLL .....................................allhshshulhh.hs.hhlsh.lshuh..slsh.hhl.G..lPlL.s.h.slh.ssRs.huphERtp.scthh.......uh.pl.st.s..........tt............................suhhtp..lt.thLp.DssoWRslhahhl...phsluhlshhlslsh.hshs.lh..hl......hh....Plhhhhhs........h......................................h.lsshstshh.hhh....h.Gllhh.hhshhls.tshsthpshhspthL................................................. 0 68 162 199 +13639 PF13797 Post_transc_reg Post-transcriptional regulator Eberhardt R re3 Jackhmmer:D3EKT1 Family This family includes post-transcriptional regulators [1]. 25.00 25.00 30.60 30.50 21.80 21.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.57 0.72 -4.39 26 293 2011-02-11 10:56:17 2011-02-11 10:56:17 1 1 293 0 38 158 1 86.50 35 87.77 CHANGED phcta+cp.lpPsLpsKh-EF+hLGYcplot-clWsaLhp+KWK+ppt...hpLaEllsDIhslphs-YMsahTlcuhcssshhhsctspc ..........h..p.hppp.LpsslcpKsEEF+hhGYcplsp-DlWpaLpscKWK+tss...lpLaEhlsDlhplpssEaMsYlslpAhpus...hsph-.................... 0 15 28 30 +13640 PF13798 PCYCGC Protein of unknown function with PCYCGC motif Eberhardt R re3 Jackhmmer:D3ELL0 Family This domain contains a PCYCGC motif and four other conserved cysteines. Its function is unknown. 25.00 25.00 30.10 26.80 22.70 22.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.17 0.71 -4.54 13 157 2011-02-11 11:09:16 2011-02-11 11:09:16 1 2 138 0 31 109 15 151.10 54 89.50 CHANGED hh.hllshullLuGCussssp-pspscp...........pppssGDIpEpTuSs-pLPoFLcs.ps-slpplYptuupppELL-aIPCYCGCGESAGH+sNhsCFlpEp+cDGslVWDDHGT+CGVCLEIAspShphhpcGKShK-IRphIDEpYKEGYAKPTPTPMP ............................lhu..hlsllSllLuGCGususs-..p.....psucp.pppt.......pto+oppuD.IQEcT..KulDsLPoFL-c...h-spM+cIYslAGpssELL-aIPCYCGC..G.E.SsGHKNN+NCFI+EIKKNGc.V.V.WDoHATsCssCLEIAVESuuM+QcG.K.S.shEIRsaIDsKYKEG.YuKPTPTPMP........... 0 9 22 25 +13641 PF13799 DUF4183 Domain of unknown function (DUF4183) Eberhardt R re3 Jackhmmer:D3EMB4 Family This domain of unknown function contains a highly conserved ING motif. 25.00 25.00 25.10 36.50 24.70 22.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.89 0.72 -4.17 22 220 2011-02-11 11:15:06 2011-02-11 11:15:06 1 6 107 0 31 144 6 77.60 40 52.48 CHANGED sssh+ahaTss-clthhustsIsust...................suYhNLaINGVLQ.tshY.sls...sutLsLpssss..htGsPIllph .................h.suc+hh.TsusGhuthushsIsussh...................hoYhNLaINGlLQssshh.sVo.......TGslTls........ssss..lssGoPIhlEF.. 0 11 21 21 +13642 PF13800 Sigma_reg_N Sigma factor regulator N-terminal Coggill P, Eberhardt R re3 Jackhmmer:O07581 Family This domain is found near the N-terminus of a sigma factor regulator. The N-terminus is responsible for interaction with the sigma factor [1]. 25.00 25.00 25.00 25.70 24.70 24.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.22 0.72 -3.96 22 353 2011-02-11 14:42:16 2011-02-11 14:42:16 1 4 247 0 37 251 0 91.50 25 27.43 CHANGED .Kphl++uKhKthlp.h.llslhl.hlllhhhhhhhshl.ah.sp....spch.hpshphhhplopPNshlssp.hp.sphs.hFutshph.sl.KplGscsl. ..........h.ct.h++uK+Kphlphh..llslsh.sllllshhh..h.shhha..sp.....phpcl.p-hhshthplst.PNsphsup..ht.sssp.hFusphph.shhKslsshsl.................. 0 6 20 28 +13643 PF13801 Metal_resist Heavy-metal resistance Eberhardt R re3 Jackhmmer:A3K6W9 Family This is a metal-binding protein which is involved in resistance to heavy-metal ions [1,2]. The protein forms a four-helix hooked hairpin, consisting of two long alpha helices each flanked by a shorter alpha helix. It binds a metal ion in a type-2 like centre [3]. It contains two copies of an LTXXQ motif. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.29 0.71 -4.14 97 966 2012-10-02 12:34:46 2011-02-14 13:53:14 1 3 826 26 248 1113 79 123.10 29 76.98 CHANGED sSLslNlhllGslsusshph.ssttstt................thht.hhtsLsscppctlppthcstt.t...phpshppph.cpttpplhphltApshDssslpsslsptpptptphppthppthlshhsshss...cpRtt ...............................................................................................th.......hhhul..uhuhss.ss.uhspsup.............................shhp..ps..s..s..sL.....T..sEQpst...h...pclhs-ah..s........ps...s...u.L.....p.....p.....p.......l..hscR........hE.h....s....A....LL.s.As.ssDsu..c.lsAlsc-hcslpppLcchp.lcpslshspshhPct...st..................................... 1 81 156 203 +13644 PF13802 Gal_mutarotas_2 Galactose mutarotase-like Coggill P pcc Jackhmmer:Q8FC64 Domain This family is found N-terminal to glycosyl-hydrolase domains, and appears to be similar to the galactose mutarotase superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.35 0.72 -4.12 218 2671 2012-10-02 23:57:29 2011-02-14 13:55:17 1 55 1668 60 985 2314 64 65.70 31 7.77 CHANGED sEphYGlGE.+.s.....s.sls....++.....G................pphch...aNt....Ds..hsh.p..ps.......csh.....YpsIPFhlshp.......ss..p.......u.aGlFasNstcshaD ....................EplYGLGE.+..h.....s..sls....+p........G.............................pshch.....aNp...Ds....hs..p...ss....................pth..........YtslPFhh.................ss....p..............u..a....Glahssstps.h............................... 0 358 606 839 +13645 PF13803 DUF4184 Domain of unknown function (DUF4184) Eberhardt R re3 Jackhmmer:D3EE91 Family This domain of unknown function contains several highly conserved histidines. 25.00 25.00 25.20 74.50 24.60 24.80 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.81 0.70 -5.06 38 172 2012-10-01 21:01:47 2011-02-14 14:10:33 1 1 170 0 22 182 3 235.70 41 94.91 CHANGED MPFThSHPAhllPlp+hsh....LshsALllGSMsPDh.YF.....hthp.......tsshuHshhGlhhhsLPlslllhhlaphll+psLhphlPh.hp.phhs.hp.....hh.hcph.l....lhlhShllGshTHllWDuFTHpsGa.hVpthshLpppl..thhs..tlPla+lLQauSollGllhlhhhhh+hhpppspp.t..h......ptKhhhhhhlh...lhuhlhhhhhhhhhs..h.....hhhhsphllshlsuhhhulllsshlh ..MPFT.F.uHPAAVLP.hsK...+p...s.pt...lsloALlLGSMAPDFpYF...........lpF+P..........aGslGHsWhGhlahNLPLshLLAhla+hllKcPhlsaLP+sas.shasa.shs...p..tas..htoh..+pa..h.......VF..shSALhGhlTHVlWDuFTH..psGa.FVhplshLpppl.......h.sIPlYKhhQHGSTslGlllLlahla....+atpc..psp.chhhth.......pcKhpaWhslh...lluhll.hhlashl.s.a.h.....hatlGthIVshlsu.hsuhhlsslla......................................................... 0 7 16 21 +13646 PF13804 HERV-K_env_2 Retro-transcribing viruses envelope glycoprotein Coggill P pcc Jackhmmer:P61569 Domain This family comes from human endogenous retrovirus K envelope glycoproteins. 19.40 19.40 23.30 22.20 18.20 17.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.16 0.71 -4.74 16 352 2011-02-14 16:01:45 2011-02-14 16:01:45 1 5 6 0 22 230 0 167.60 89 44.44 CHANGED TPVTWMDNPIEVYVNDSVWVPGPTDDRCPAKPEEEGMMINISIGYRYPPICLGRAPGCLMPAVQNWLVEVPTVSPISRFTYHMVSGMSLRPRVNYLQDFSYQRSLKFRPKGKPCPKEIPKESKNTEVLVWEECVANSAVILQNNEFGTIIDWAPRGQFYHNCSGQTQSCP ..........................................TPVTWMDNPIEVYVNDSVWVPGPTD.DRCPAKPEEEGMMINIS..IGYRYPPICLGR......APG......CLMPAVQNWLVEVPTVSPISRFTYHMVSGMSLR..PRVNYLQDFS.YQRSLKFRPKGKP.CPKEIPKESKNTEVLVWEECVAN..S..AVILQNNEF.GTIIDWAPRGQFYHNCSGQTQSCP............................................................................. 0 17 17 17 +13647 PF13805 Pil1 Eisosome component PIL1 Coggill P pcc Jackhmmer:O74960 Family In the budding yeast, S. cerevisiae, Pil1 and another cytoplasmic protein, Lsp1, together form large immobile assemblies at the plasma membrane that mark sites for endocytosis, called eisosomes. Endocytosis functions to recycle plasma membrane components, to regulate cell-surface expression of signalling receptors and to internalise nutrients in all eukaryotic cells. 27.00 27.00 27.10 27.70 26.30 26.30 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.76 0.70 -5.25 7 285 2012-10-03 12:17:00 2011-02-14 17:38:51 1 8 137 3 215 271 1 249.20 51 65.78 CHANGED MHRTYSLRsSRsPTASQ..LpsPPPPsSoTKotpaFGpsulu.aohR+ssAGuhGP-LuRKLuQLVKhEKNVhRShElsuRER+-sA+QLShWGE-.................sDDDVSDVTDKLGVLIYElGELEDQaID+YDQYRlTLKSIRNIEuSVQPSRDRKpKITDpIA+LKYK-PpSP+IsVLEQELVRAEAESLVAEAQLSNITREKlKAAasYQFDAh+E+uEKhALIAGYGKtLLELLDDoPVTPGEoRPAYDGY-AS+QIIhDAEsALspWsLDsAuVps ............................................................h.stst......................ttsss.s...S.osp.s........hh..sp..t..uhu....pshR........ht.s.t..G...shs.P..-Lu....+KLspLlKhEKslhcu....hEhsu+ERhpsApQLShWG.ps..........................s.D.-.D.VSDlTDKLGVLlaElGELEDpas-+.aDpYRlolKSIRslEuSVQP.SRDRKp..KI....o....DcI....A....p.L.......K.....Y...K..........-.....P.......p....S....s....K.......l.......sV.......LEQE....L..VRAEAEoL.....VAE.......AQLSNITRp.......KlKsAasapFDAlhE+uEKhAlIAsaGKtLL-L..l..DDoP....VsP......GEoRsAYDG..a-s.o+pIl.DAEsuLpsWp.s.s.h....................................................... 0 56 115 183 +13648 PF13806 Rieske_2 Rieske-like [2Fe-2S] domain Coggill P pcc Jackhmmer:Q8FCX0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.21 0.72 -4.31 77 1917 2012-10-02 12:49:59 2011-02-14 17:40:20 1 23 1666 11 493 2846 625 103.40 36 52.55 CHANGED sWpslCslcDlhPsoG..VsALl.s..spQVAlF+s......t.s.s.....plaAlsNhDPhupAsVLSRGllGs.h.tGchsVASPLaKQ+FsLpoGpClE...................sss..hsltsaslRlp.sGpV.Vp ....................................................................WhslC.pl--.lh....P.t....sG.....hs.s..l..l.......s......s.......c..p.....l...AlF+s......pss...........pla.AlsNhsPa...c..u.....ul...L.....S.....c......G.l...........l............u................s.....c........t..............t..............p........................h.......V.....ssPL+KQ+Fc...L.....p.....s......G..t..s.h.-........................................s.-.p...hsVpsYss+.Vc..DGtV.l.h........................................... 0 114 271 403 +13649 PF13807 GNVR G-rich domain on putative tyrosine kinase Coggill P pcc Jackhmmer:Q8FKJ4 Family This domain is found between two families, Wzz, Pfam:PF02706 and CbiA Pfam:PF01656. There is a highly conserved GNVR sequence motif which characterises this domain. The function is not known. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.62 0.72 -4.23 72 3261 2011-02-15 09:40:19 2011-02-15 09:40:19 1 7 1874 0 746 2626 476 78.50 26 13.58 CHANGED pTQQclL...RLpRDVcVspplYspLh.sptQp.hpls.p..AuslGNVRIlDs.A.sstsc..PVKPKKsLlllluhllGhhluluhlllRph ..............................................h....plpRphpstp.plYhtLL.p....+..pp....hpls...p....st.ss....u...s...l.....+..l..l..Ds...A..hs..s....t...Plp...P..++.tL.ll.l.l.u.hlLGlhlu.lshsll+..h......... 0 230 467 612 +13650 PF13808 DDE_Tnp_1_assoc DDE_Tnp_1-associated Coggill P pcc Jackhmmer:Q2EEQ8 Domain This domain is frequently found N-terminal to the transposase, IS family DDE_Tnp_1, Pfam:PF01609 and its relatives. 30.30 30.30 30.30 30.60 30.20 30.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.74 0.72 -4.32 95 1944 2011-02-15 10:37:59 2011-02-15 10:37:59 1 5 565 0 251 1556 231 80.30 40 30.01 CHANGED LhpthsplsDsRp.sp.shcasLsslLhlslsAll.uGucuap-Itpaups+...phLpphh..sh..pG.......lPSccThpRVhptlDspthppsatpWhp ...................hphhp.l.D.Rp.th..phcHpLscILhLslsAVI.uGA-uWc-IE-FGcs+h..-aL+pah..sh..cpG.......lPscDTlsRVlu.pl..sstthppthhpah.................................. 0 73 179 221 +13651 PF13809 Tubulin_2 Tubulin like Eberhardt R re3 Jackhmmer:D3EKE8 Family Many of the residues conserved in Tubulin, Pfam:PF00091, are also highly conserved in this family. 25.50 25.50 25.80 26.10 25.30 25.40 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.16 0.70 -5.50 23 225 2012-10-03 12:11:42 2011-02-15 14:23:36 1 7 203 0 66 232 45 326.50 31 31.02 CHANGED llIGLGGTGt-llt+lR+hIhppatt..................sthshlsFLhlDT-ps...........hstp.....thshcphcchhthhsspsssshhpphp.........sastlpp.......WhPsch.p...........shpslcsGAGplRslGRLAF......hspappIpptlpsthc...............h........hthcstssulslalVuSLuGGTGSGhhlDluYplRphh......ttpsshtlsuhllhPs...hasshsss......sphpANuYAALtELsaasshsscapsphstttspph.spps....................PFDhsYLluspNspsth..hshcplhphlApsIa.LphosthustppshccNhhpphtpssststs..................ahsFGhusIthPh-pItshlth+lspphlpha ...............................................................................................................................llIGLGGhGuplhtpl.hcp...l.c...........................-p.cp.lt..hhshDTshs..............................................slsphcc.hh..p..hh..sp.sssuph..pchh....................spP..pIsc..........Wh.Ph-.ph............spslspG..A..G..QlRsluRLuL..................s+l...ss..hhpplcchhs..................................ssucts.phul..p...VhlVoSLAGGTGSGMFLplAhhLRphl......htpcshh..ltuhhlhP-...lhspspsss.......hcplpANG...YAuL+E.Lcthphs...sstt..hs...tp.tthsh..php...hpss.....................................Pas..hsaLhsh....cNh.c..G...ptl...tshsphhc.h....ApsIh.hthhsshus.tph..stp......c..s...hpphtpsss.p..sp...................asshGhuplhhPh-clhphsAh+huhphlp.......................................................................................................... 0 19 47 55 +13652 PF13810 DUF4185 Domain of unknown function (DUF4185) Eberhardt R re3 Jackhmmer:D0L2Z5 Family \N 25.00 25.00 25.00 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.31 0.70 -5.53 52 408 2011-02-15 14:28:20 2011-02-15 14:28:20 1 10 213 0 128 358 13 297.80 29 69.59 CHANGED TGs..........spTsp+aslsGTDLGIhacsssG....phhhsFGDTFu.........ssssGs.....sWRSNllhhSs.spslsc..Gl.........thsusst..ss.sp.Acpll.s.s.p.......sh-h......ThIPTuuIsls....s..spYlphMol.+sWus.s.....W.TNaoslshSsDsGpoWspsststpsss...........sG.sp.....h..suhsp.......scG..aVYhauT.ssuR..susshLuRV...tsplhsh......s.....uYEaWssss.......W..upss...usPllsus..sGElSlpap...t.hG+WlhhYhs.........s..s.tslshRsAssPpGsWo.stslssshp..................hs.p..LYGuYIaPhS.p..s..ttLaaslSpWs.........sYNVhhh+s .................................................................................ps.tcaulsuTDLG.h.h-.sss...........pllhhFGDoFu..........sts.Gt.....tWpssshhpsp..spslss..Gl....................phsusss.........thuppll.s.s.............s....th......ohlPousl.sls..........t.......ppYlphhoh.+s.hss......................hotlshSpD.s.GpsWpshstshpsss.......................st.st....t....h.sshhp....................ssG...alYhhu....o....s........s........R.......s........s.........sshLsRs..s..p.plh..Dh.................s.....papaWsuss.t...................W.....s...pss.....hoPl..h..s......ss.................lG.Eh.Slph.............spalhhYhs.........s...st.....ss...lhhRs...A..s...pPpu..sWu.stslssshp................................hs.t...hYusalpPhS.....h....s...........p.LhhhlS.Ws............sYpVh.hp.................................................................................................................... 0 40 89 115 +13653 PF13811 DUF4186 Domain of unknown function (DUF4186) Eberhardt R re3 Jackhmmer:D0L5G5 Family \N 25.00 25.00 40.50 40.40 17.50 17.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.54 0.71 -4.37 43 607 2011-02-15 14:30:59 2011-02-15 14:30:59 1 3 591 0 60 245 5 107.10 72 88.23 CHANGED ph-plFpRLu+SsFRuRF...+Lstc-psYlpcKGhssIppHApDFlscRLAPAhPsNDGKQTPMR....GHPVFlAQHATATCCRGCLpKWHtIstG+sLotpEQpYlVsVlhpWlpp .....sh.-sLFARLuRSpFRSRF...RLGhKERQYCh-KGAsVI-pHAADFlA+RLAPAhPANDGKQTPMR....G..HPVFIAQHATATCCRGCLAKWHsIPQGhsLSEpQQcYIVuVIa+WL..Vl............ 0 14 31 43 +13654 PF13812 PPR_3 Pentatricopeptide repeat domain Coggill P pcc Jackhmmer:Q9USP3 Repeat This family matches additional variants of the PPR repeat that were not captured by the model for Pfam:PF01535. The exact function is not known. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.43 0.74 -7.99 0.74 -3.22 132 5769 2012-10-11 20:01:04 2011-02-15 14:46:39 1 1960 345 0 4254 55882 730 33.00 18 6.79 CHANGED htsassllpshs..c.tup...hpt....s...hplhpp.Mp.p.p...u......lpP ......................tasslls.s.hu......+..sGp............hcp....................A..........hplhpp..Mt.t.t....t........................................ 0 932 2789 3676 +13655 PF13813 MBOAT_2 Membrane bound O-acyl transferase family Coggill P pcc Jackhmmer:O74380 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.18 0.72 -3.91 101 505 2012-10-01 20:09:06 2011-02-16 11:45:15 1 13 129 0 397 1144 83 88.40 25 20.91 CHANGED aP.s.lFGs...h....hpA.....h...o.....lpp.....FWGc.hW..HQhhRt....hhps....h....u....phl....h+t.lht........p..........ht+hhplh...hsFhlSGllHhhsshhhst............pt.....shh....FFhh ...............................PhF.sp.....htu.....p...S.....LpcFWG+..t............W..Hphhpp..................hhps........s....u....hh.......spt.hht...............pt.......................htphstlhssFhlSGl.hHths...hhhsht...........sh.........hh.FFh.h.......................................... 0 152 254 343 +13656 PF13814 Replic_Relax Replication-relaxation Eberhardt R re3 Jackhmmer:D0L6G0 Family This family includes proteins which are essential for plasmid replication [1] and plasmid DNA relaxation [2]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.29 0.71 -4.45 47 432 2012-10-04 14:01:12 2011-02-16 13:12:31 1 3 244 0 81 370 8 165.50 19 62.86 CHANGED Lthl..tcHphLoscQltplhhsstt.........tspppLpcL.pphphl.cphp.................ts.uptshhYhLsttG......tchl............st.ts.........t......tt............pttt...hsss......t.pls.HplslschastLh.tts.+pt...s..hp..ltcWh.....sE.psttt..h.............t..p..h.lpPDuhhthptss.......tthtaalEhDpuTEshtp......ltpKlppYtc.hhptt............tt..........s.hPl....l .................................................................l..hthhshpplhthh...t.h...........sspphLpc.L...p..t......hl.pphhh..........................ttt...hhahL.sppG...hphh..................tt........................................t..hhss...t.plt...HpLhhs-hh..lphh..p.s.....t......................tap..............hEtphthp..h...........................tttt...t..h.lhPDshhhhppp....................hh....lElDpspps.hph.........htcKl.ppYtc.hhp........................................................................................... 0 34 66 73 +13657 PF13815 Dzip-like_N Iguana/Dzip1-like DAZ-interacting protein N-terminal Wood V, Coggill P pcc Wood V Family The DAZ gene-product - Deleted in Azoospermia - and a closely related sequence are required early in germ-cell development in order to maintain germ-cell populations. This family is the N-terminal region that is the only part of the protein in some fungi and lower metazoa. 26.20 26.20 26.40 26.40 26.10 26.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.20 0.71 -4.29 16 230 2011-02-16 13:19:29 2011-02-16 13:19:29 1 6 105 0 130 203 1 116.60 34 17.34 CHANGED FpFpsRp-plDWR+luulDl-+l...s+-hDlsTLQcsl.sIoFsclpsEcssp.....s-shhLKLa+LuQLsIEYLlasQ-hLssp...spL--+lpps.pttcptcpphs+ppc-hphl.KcEs+ ........FpFp..R..p.cslDWR+luulDl.D+l...spchDltsLQ-plt...s.lTF.CsL-sEcssp..........lDPsl.lKL.hRLAQLsIEYLLH..sQ-......hLssp.........lppLcccLptu.tptpphcppltcpsp-l+tl.+cE.+........................................... 0 52 68 93 +13658 PF13816 Dehydratase_hem Haem-containing dehydratase Eberhardt R re3 Jackhmmer:Q76K71 Family This family includes aldoxime dehydratase, EC:4.99.1.5. This is a haem-containing enzyme, which catalyses the dehydration of aldoximes to their corresponding nitrile [1]. It also includes phenylacetaldoxime dehydratase, EC:4.99.1.7. This haem-containing enzyme catalyses the dehydration of Z-phenylacetaldoxime to phenylacetonitrile [2]. The enzyme forms an elliptic beta barrel, composed of eight beta-strands, flanked by alpha-helices [3]. 25.00 25.00 25.90 25.40 24.80 24.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.94 0.70 -5.38 33 119 2012-10-02 00:20:33 2011-02-16 13:30:49 1 4 97 20 74 142 8 286.30 33 82.63 CHANGED hAhhGlQhpus.st.........stAhssltphlpt..hssssuPpth.-hsthsDssGhpshlhluYWp-sssacpWhpssshssaWsuhs...tpshGhapElhsssscRaETlaSsp-hh.Gluplhs.shos.hpcHuYWGuhRDRhPhopsDthts.........sspsth.sssstttGRlhls..sh.-NlshIRSGQDWussss-E+chYh-pl-PsLppGMcaLp-putcsGChosRahpshDs....c.ss............h.hccohsluaacsLssLE+Wu+oH.THlsIassFhchs.pshs....thcLRLaHEVuVLcsspthFEYlsCHssTGh ....................................................huhhGlQhpssst..................ssAhpsltphhtt....hstssuPsta.-hsph.sDspGhtshlhluYWps....ssappWhpssshssaWpu.s...t.pps.....hGaapElhssps-RaETlaut.ch.h.Glutlhs.shos.hpcpuYWGuhRDRh.Phops.D.hhts.....................ssp.th..s.sssttsuRlhlt..sh.cNlshIRSGQDWussps.cERphYh-plcPsLppGMsaLcDpGtpsGChusRahpslD...........c.tp...............h.hccohsluaapsLspLE+Wucs.Hs.THlpIFssFhchs.pths.....hpLRLaHEVsVhcstpthaEYlsC+stTGh................ 0 10 29 59 +13659 PF13817 DDE_Tnp_IS66_C IS66 C-terminal element Coggill P pcc Jackhmmer:P39351 Family \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -7.80 0.72 -3.94 114 3268 2011-02-16 13:47:29 2011-02-16 13:47:29 1 17 854 0 386 2560 227 38.70 51 11.84 CHANGED SLIpTAKLNGl-PpAaLp-VLsR.I.ss..a.st.sRlc-LLPWs .......SLIuTC+LNsl-P.EuYLRa...VLsh.l..s-.....W...Ps....N......R....Vs-LLPWp............ 0 81 201 298 +13662 PF13820 Nucleic_acid_bd Putative nucleic acid-binding region Coggill P pcc Jackhmmer:Q14686 Family This is a family of putative nucleic acid-binding proteins. Several members are annotated as being nuclear receptor coactivator 6 proteins but this could not be confirmed. 21.60 21.60 23.20 21.60 20.60 21.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.64 0.71 -4.61 6 169 2011-02-17 13:43:59 2011-02-17 13:43:59 1 25 69 0 126 158 0 143.50 40 12.42 CHANGED hSNlaVsspGplc..hhptpLDpL+ppLspLhuscpSplhh+chcha+SlhVcFoIPREssssLRphA-puD.cLhhhGIpS....lQI-u-ssI.slsppushaD..st+p..ssslc.lGsSsRscttps......tShstL.u.sl.ucshsspstsh...ussph ................SslaVsh+Gslc...hp.pLDplhps.lssL.h.p.h...c...........p.Sp............l.Khpthc.apSlhVpFsIPREssthLRhhAppss.pLh.hGlhS......lQI..-uEs...sI.plt.utsh.pshR.........sssht.sGsSs.Rhctths...........tuhstL.th.s..ussh..thtssuS..h................................................. 0 82 87 106 +13663 PF13821 DUF4187 Domain of unknown function (DUF4187) Coggill P pcc Jackhmmer:C6Y4A5 Domain This family is found at the very C-terminus of proteins that carry a G-patch domain, Pfam:PF01585. The domain is short and cysteine-rich. 27.00 27.00 27.10 29.60 26.70 26.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.22 0.72 -4.37 53 314 2011-02-17 15:57:08 2011-02-17 15:57:08 1 13 227 0 230 315 0 67.60 35 14.86 CHANGED --E...............hc-.ptLsspEcLppLlpYLRpcapYCaaCuhpYcspc-l...psCPGss.................................E-DH ...........................pE...sc.-tLssp-+LphllpYLRcpapYCaaCshcY-stc-L...cpCPGss....ptpH..................................................................... 0 65 115 181 +13664 PF13822 ACC_epsilon Acyl-CoA carboxylase epsilon subunit Eberhardt R re3 Jackhmmer:D0L8S8 Family This family includes the epsilon subunits of propionyl-CoA carboxylase, EC:6.4.1.3, and acetyl-CoA carboxylase, EC:6.4.1.2. These enzymes are involved in the biosynthesis of long-chain fatty acids. The epsilon subunit is necessary for an efficient interaction between the alpha and beta subunits of these enzymes [1]. 25.00 25.00 25.20 25.70 24.50 24.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.16 0.72 -3.62 83 404 2011-02-17 16:15:50 2011-02-17 16:15:50 1 1 355 0 109 286 9 60.60 33 71.49 CHANGED pshl+Vl+GsPosEElAALsAVLsuh..uusssssssss..........stWuc.sc......htt..hsuPsuW .......shlpVl+GsPTsEELAALlAVluuh.........uusussstssst...............stWsp.hc......htcshh.u.tua.......................... 0 30 81 105 +13665 PF13823 ADH_N_assoc Alcohol dehydrogenase GroES-associated Eberhardt R re3 Jackhmmer:D3EFL6 Domain This short domain is frequently found at the N-terminus of the alcohol dehydrogenase GroES-like domain, Pfam: PF08240. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.24 0.72 -6.44 0.72 -4.28 120 1545 2011-02-17 16:20:05 2011-02-17 16:20:05 1 5 1130 2 440 1190 19 22.70 51 5.97 CHANGED MKAlsapGt+cVcV-pVPDPcIp .MKAlsa+GsccVcV-s.VPDPtIp... 0 117 274 373 +13666 PF13824 zf-Mss51 Zinc-finger of mitochondrial splicing suppressor 51 Coggill P pcc Jackhmmer:Q9UTB4 Domain Mss51 regulates the expression of cytochrome oxidase, so this domain is probably DNA-binding. 27.00 27.00 28.00 28.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.04 0.72 -4.11 25 148 2012-10-03 05:12:49 2011-02-17 17:23:16 1 5 144 0 111 149 0 63.70 44 12.10 CHANGED hCPhss......+............................clpapCPcCGlPsaCScEHWccD.EtHtc..hCpsLRp.lNE.s-HDL .................hCPhss...+p..................................tclpapCPcCGlPsaCScEHWtcDhEtHhp.lC-tLRp.lNc.D-HDL...... 0 26 56 93 +13667 PF13825 Paramyxo_PNT Paramyxovirus structural protein V/P N-terminus Coggill P pcc Karlin D Family This family consists of several Paramyxoviridae structural protein P and V sequences [1]. From a structural point of view, P is the best-characterised protein of the replicative complex. P is organised into two moieties that are functionally and structurally distinct: a C-terminal moiety (PCT) and an N-terminal moiety (PNT). PCT is the most conserved in sequence and contains all regions required for virus transcription, whereas PNT, which is poorly conserved, provides several additional functions required for replication [2]. P protein plays a crucial role in the enzyme by positioning L onto the N/RNA template through an interaction with the C-terminal domain of N. Without P, L is not functional. The N, P, and L proteins of SeV and measles and mumps viruses are functionally equivalent. However, sequence identity between proteins from these viruses is limited, and the viruses have been placed in different genera (Respirovirus, Morbilivirus, and Rubulavirus, respectively). SeV P protein (568 aa) is a modular protein with distinct functional domains. The N-terminal part of P (PNT) is a chaperone for N and prevents it from binding to non-viral RNA in the infected cell [3]. 21.70 21.70 22.20 21.70 21.10 20.70 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.67 0.70 -5.21 8 536 2011-02-17 17:37:57 2011-02-17 17:37:57 1 6 59 0 1 461 0 228.70 56 64.32 CHANGED EQAYHVsKGLECIKALRtsPPDh.pIcEs.uhhscsssssspsssTscpEEtDoQslscSCpPAhGSspsutshtcspGsGE..sNssssEhttsPc-.tpsusulpCYaVYDHSGEcVKGIEDADSLlV.uGssusssFpGG-suS--SDsDSGEsDoEGsAsoshGSust.pssRAoDVEplpu-ElptLLRoQppsssth+sGKTLpVPssP-sppussSspPIKKGT-cRSsSaGTthsuu.TuGATQsAhKSsuuSStPuASAGNVhpssoNAchhQcsp.ESGTphSP+opNptES-sEYDDELF .....................................................................EQAhHVppGLECl+AL+tpP.s..tlpEs.uhhsphpssstpptsssh.ppctsspslscsp.sAhGSs..sutsh.ps.GstE..usss..pht.ssts..tsusGlppYaVYD.HSGEtVKGIpDAD...SlhV.uGhsusps..hpsG-sp...-sSs.D.GEs.oEG.A.oshG.u.h.hs.RAuDVphh.ttElptLLRhpppss.h.+.GKTLphP..sP-st..pussppssIKK.G..TctR...ASaGht.Isuh.h............................................................................................ 1 1 1 1 +13668 PF13826 DUF4188 Domain of unknown function (DUF4188) Eberhardt R re3 Jackhmmer:D0L3R6 Family \N 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.77 0.71 -4.00 48 173 2012-10-02 00:20:33 2011-02-18 08:00:56 1 3 167 0 98 217 26 116.20 33 56.30 CHANGED lVVFllGMRlN+hhul.....ppWhslhtAMstMlcE.Ltps.....-.hGhLut.cs.ah.h......csshllQY.WRuhEsLcsaA+..sp.HhtAWcpFsppst.s...s.ssVGIaHETYhl.tGpaEsIYsNM.P.shGL ............................................lVVFhlGhRhN+.huh......tah.lhtuhstMlc-.Ltpst.........chGhLuhp.hh.s............psh.hhlpY.WRuh-sLppaA+..st.Hh..pAWch...aspthps......sspVGIaHETYhl.tGpaEsI.YsNh.P.shGL............... 0 21 61 84 +13669 PF13827 DUF4189 Domain of unknown function (DUF4189) Eberhardt R re3 Jackhmmer:D0L9M9 Family This domain of unknown function contains six well-conserved cysteine residues. 25.00 25.00 25.20 25.50 24.90 24.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.15 0.72 -10.90 0.72 -3.75 66 395 2011-02-18 08:10:12 2011-02-18 08:10:12 1 7 227 0 89 316 4 96.50 27 61.71 CHANGED paGAlAh..ssssGs....hGhuhshs.ocppAcptAlppCpt..........t.tsCclhshapNt.CuA..lAhstpstt........thususshp....pApppAlppCtptss...........pCplhh.tsCo ................aGAlAh..sss.us.....hGt.uhs..ts..optpApttAl..p..pCtt.............psC+llsphps........CsA..lA..hststh.................tsususshp....sAcpsAlspstt..............tstl.s.hsCs.......................... 0 14 46 68 +13670 PF13828 DUF4190 Domain of unknown function (DUF4190) Eberhardt R re3 Jackhmmer:D0LA47 Domain This integral membrane domain is functionally uncharacterised. One of the membrane helices contains two GXXG motifs that are usually associated with dimerisation. 35.00 35.00 35.10 35.00 34.90 34.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.00 0.72 -4.42 107 528 2011-02-18 08:18:59 2011-02-18 08:18:59 1 16 318 0 182 498 26 63.40 31 30.28 CHANGED hAIuSLVh.ulhuhhh..........lsullullhGalAhspI+css..ppGcGhAlAGlllGhlslsl...hllh .........hAluuLVh.ulluhhh..........h..lhullullhGhlAhspI++ss.....ppGcGhAlAGlllGhlslllhll.h.............. 0 59 137 166 +13671 PF13829 DUF4191 Domain of unknown function (DUF4191) Eberhardt R re3 Jackhmmer:D0LAZ2 Family \N 25.00 25.00 133.10 129.00 23.80 24.60 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.32 0.70 -5.23 55 434 2011-02-18 08:42:45 2011-02-18 08:42:45 1 1 433 0 112 285 112 229.20 39 91.39 CHANGED s++s.cpp.....pphpQlhpuaphpR+pDspl.l.hhluuhluslslhlllGllhs..th...hhhlllGlh....lGlLsAhhlFuRRsp+usYsplEGQPGAAussL.p.slRt....sWplss.sVAss.....+p..D.....hVHRllG+PGllLVuEGsssRl+sLlspE+K+lsRlls..ssPl..aslhlGsu..EGQVPLpKLp+plhKL..P+s...lsps-lsslspRLpALs.....ssthsl...PKGPhPppA+.t+sshR ........s..csppttphpQlhpsFphpR+pD.pLl.hhlGu.Flssl.sl.hhllG.llhs.....th....ahhlllGlh....lGsLsAhhlFsRRAp+usYs+hEGQsGAAuhALs.slcp.......tWpsos.uVAhT......+p.D........sVHRslGhsGllLlGEGs.ssRl+sLLupE+KRhtRlss..slPl..acIllGs.G.......-G....QVPLcKLc+pltKLP+.s....................lossplsslssRLcALs....htsssslPKGPhPp..s.h.Kh.spsh........... 0 34 82 104 +13672 PF13830 DUF4192 Domain of unknown function (DUF4192) Eberhardt R re3 Jackhmmer:D0LBR8 Family \N 25.40 25.40 26.00 26.10 25.10 25.20 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.13 0.70 -4.78 56 428 2011-02-18 08:58:27 2011-02-18 08:58:27 1 2 346 0 131 346 78 282.00 26 82.83 CHANGED Pu-llAAlPtlLGFhPpc.SlVllsl..................sph.GsshRhDLs.......hst.sptlst.hh...........s...t.....tpscsslsllhss..t.t.s............thpthhptLtpthsst.slsl..hsshhh.plssGphWphh...............s......ssGt..shssssossssA.slhs.Gphs.hssRs-ltAtlt.....s.s.h.tstthstshpttsht.t.ph.......t............th..t..h..chhttsspphspu.............phhsss.....tphutluhsLs-spl.RDthhshsst...csussA.tpLWttluRphsus...h...RusslsLhuhsAaspG-ushAshALstAl.....ps-Ps.ashApLLppALppGlpPc .......................................t-llsslPhlLGFhPpc.SlVhlsh.......................ts...ssth..Gh..VhRhD.Ls........t.ssthsthAt...h.................h.........sssstslslhhsp.....p.tts............................ht.hhts..Lt....psltth..shs..l..hsuh...tl.ssGthWpsh.......................tsss........pts......hc.sssshhsA.ulh...p.G.ht.l...hssRssl.s.hls..................st...t.....tp.sss.htthsttt..th.t......................................tsh..phhhtshshh..tsu..................................pplss........tthsplu....s.ulpcspl.RDhlht.hh.t....................tpAs...t....s..pl......Wttlschh.ss..h.................tstsLsLhuhsuahpG-GshAulslptuh.....thtss.a.....phsthLpphlpsGl.Pt............................................................................................................... 0 42 92 123 +13673 PF13831 PHD_2 PHD-finger Coggill P pcc Jackhmmer:P55198 Family PHD folds into an interleaved type of Zn-finger chelating 2 Zn ions in a similar manner to that of the RING and FYVE domains [2]. Several PHD fingers have been identified as binding modules of methylated histone H3 [3]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.41 0.72 -4.63 28 1358 2012-10-03 17:27:21 2011-02-21 11:26:41 1 92 289 2 819 1377 16 35.50 47 3.52 CHANGED ss.llsCs..pCslpVHtsCYGlsp.sst.....WhCs+Ct ...t.N.llaCD...tCslsVHQ..pC....Y.G...lsh...l...P.p...G..p.........WlC+pC..... 0 223 368 595 +13674 PF13832 zf-HC5HC2H_2 PHD-zinc-finger like domain Coggill P pcc Jackhmmer:P55198 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.48 0.72 -11.31 0.72 -4.06 34 1557 2012-10-03 17:27:21 2011-02-21 11:45:05 1 99 292 0 942 2258 15 113.10 38 10.90 CHANGED scCsLCshcGGAl.KpTs..-.sp..WsHVhCAlh.lPElpFs...sst...phpPl-.lppls.p..R..hchpChhCcpp.........tGACIQCspsp...CtssFHsoCAptAGlhMchc........sh...................h.hhshCt+H ........................................................s.pChLC.......P.......p.......c.......s.........G.......A.......h.........K.......p........T....s...........s.....s.....+............W......s..HVsCAla.....lP....E.V...p.Fu..............ss.t.......................p.hE......P...ls......l.............p.p.IP......s..........R.........a.p..L.......pC.hlC+pp........................sGA.Cl.Q.........Csptp.........C..h.........suFHVTC.A.p.p.u...G.lh..hch.p.....s.t...................th.hhsaC.hH....................................................................... 0 261 429 688 +13675 PF13833 EF-hand_8 EF_hand_6; EF-hand domain pair Coggill P pcc Jackhmmer:B4DPI1 Domain \N 27.00 13.80 27.00 13.80 26.90 13.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.45 0.72 -4.35 49 8724 2012-10-02 16:17:27 2011-02-21 13:12:04 1 472 1753 448 3941 29164 1599 50.10 27 16.73 CHANGED ppGhIoh-sLpcsh.thh..shp.....hscp-.lpshhpthDhDsDGtlshpEFhshhhch ........................................................pG.Istp-Ltphh....pth.........G.p..................................ho..p...p...-...l.....p....p....h....l....p....p........h....D..t.....D....s......s...G..p...l....s..a..p..EFhthh...t.......................... 0 1329 2024 2921 +13676 PF13834 DUF4193 Domain of unknown function (DUF4193) Eberhardt R re3 Jackhmmer:D0LBT3 Family This domain of unknown function contains four conserved cysteines and a conserved histidine, including a CXXXXH motif. 25.00 25.00 25.70 27.20 23.30 17.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.46 0.72 -3.83 55 502 2011-02-21 13:14:06 2011-02-21 13:14:06 1 1 430 0 156 293 158 97.70 59 97.30 CHANGED MATDYDAPR+s-.D-lspDSLEEL.KucRs-p.touslD.DEs-sAEuFELPGADLSsEELoVcVlP+QsDEFTCuSCFLV+HRSQLAc.Ecs...GphlCpDCA ...........MATDYDAPR+o-...D-lsE.DSLEEL.Kup..Rs-p.tSusVDsDEsEsAE.sFELPGADLS...sE.ELoVRVlP+QuDEFTCoSCFLV+HRSpLAp...Ecs...GphlCpDCA.... 0 49 105 144 +13677 PF13835 DUF4194 Domain of unknown function (DUF4194) Eberhardt R re3 Jackhmmer:D0LDY0 Family \N 25.00 25.00 25.00 25.00 24.60 23.60 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.73 0.71 -4.81 69 425 2011-02-21 13:18:36 2011-02-21 13:18:36 1 1 392 0 144 392 34 162.10 19 70.16 CHANGED llpLLcsshlpsppp....plWptllcp...pstlcshLsslhLcLllDcstGhAahcph..p.tp.ts.........pLlpR.p..sLoh.pollLlhLRpphtctp...t.ts.spchllsp--lh-tlps...ahs.tssscsphtcclcsslp+lt.chuhlcphcs......-sp.......aclpsllcshlss-hls .....................................tLhpt.hlptpp.......phaphlhcp...pstlcshlsslslcLll....Dcp..tshhalpst...........t.t..................hhpp..p..pLshhpollLlhLRphhp-t.....sh.s..stp..shl..sh-Elhphlps...ahs...pscsthtpcl.cp.sl.pchp.phshlpthcs.........t.csp......htlpPhlthhhssp.l.t................................. 0 48 96 127 +13678 PF13836 DUF4195 Domain of unknown function (DUF4195) Coggill P pcc Jackhmmer:Q8ND82 Domain This family is found at the N-terminus of metazoan proteins that carry PHD-like zinc-finger domains. The function is not known. 27.00 27.00 29.50 31.50 24.50 19.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.20 0.71 -4.58 4 132 2011-02-21 13:42:38 2011-02-21 13:42:38 1 4 32 0 55 149 0 164.30 43 25.84 CHANGED IFVGEISSSKPAISNILNRssPSSpS+GlKNGshs.GIoshFKPTSQ+hpNPsSNPVsA.P.sFHPtS+SS-SSVhVQshSKPsaspNSspVsSssSS.LLFD.TQDo.Lsp.QshPslshsGhspoSal.K+PSTScVNSVNPKKPKsSEslSthssSoslsS.pSPSVsSSQshLSpGsNoSSs ...............IFVG.hSsSK..PslS..NIL.NRssP.uS.S+thKpsthpp..shsthhpPsSpchhsPoSps..Vss.P.....s.psESRSocSsl.h.l...pshSK..P..sahpsSsQVss.ssSSpLh.sh.sp..p.o..ls..pss.s...s...lshsGhscosh.lSKR.sSTS-l.N..SlNPK+PKhS-ulsthsuSush.sS..spS.ohsopQsh.upsssoS.......................................... 0 4 5 9 +13679 PF13837 Myb_DNA-bind_4 Myb/SANT-like DNA-binding domain Coggill P pcc Jackhmmer:Q6P1R3 Domain This presumed domain appears to be related to other Myb/SANT-like DNA binding domains. In particular Pfam:PF10545 seems most related. This family is greatly expanded in plants and appears in several proteins annotated as transposon proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.19 0.72 -3.85 185 1844 2012-10-04 14:01:12 2011-02-21 14:15:41 1 81 168 2 1133 1879 0 91.10 22 25.60 CHANGED pspWscpEsptLl.plh...sp....hc....hpt............pt+st.lWcplupp....h.......t...p.....p.....G.....h...p....R.osp...QC+pKacsLp+pY+pt..+p......t.t....t..tp.s.....p....s....h...a...............................Fcpl-pl ........................................................t..pWs.t.p.EsttLl.phh.......pp....hcttht...................................sst+s.p..hac..tlupt....h.......................t...c.....p........................G..........h.....t.....R.osp...Q.C+pKa.cs.Lp+pY+..+h..+p............t.t........tts............p.....s....h...a................................apphct.............................................................. 0 211 533 829 +13680 PF13838 Clathrin_H_link Clathrin-H-link Coggill P pcc Jackhmmer:Q8NHS4 Domain This short domain is found on clathrins, and often appears on proteins directly downstream from the Clathrin-link domain Pfam:PF09268. 27.00 27.00 27.40 27.20 26.50 25.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.10 0.72 -4.08 51 782 2011-02-21 14:58:18 2011-02-21 14:58:18 1 37 456 9 307 484 4 59.40 63 6.54 CHANGED sLsGA-sLahppFppLhspGpYpcAA+lAAsoPpGhLRTspTIp+Fp..phPstP.Gp.s.PlLQYFuhL .......................sLsGAE-L..FsRKFNsLFAQGsYuEAAKVAAsAPKGl.LRTspTIp+Fp.....slPsp.s.GQsS.PLLQYFGhL.......................... 0 99 161 238 +13681 PF13839 PC-Esterase GDSL/SGNH-like Acyl-Esterase family found in Pmr5 and Cas1p Anantharaman V pcc Manual Family The PC-Esterase family [1] is comprised of Cas1p, the Homo sapiens C7orf58, Arabidopsis thaliana PMR5 and a group of plant freezing resistance/coldacclimatization proteins typified by Arabidopsis thaliana ESKIMO1 [2][3], animal FAM55D proteins, and animal FAM113 proteins. The PC-Esterase family has features that are both similar and different from the canonical GDSL/SGNH superfamily [1]. The members of this family are predicted to have Acyl esterase activity and predicted to modify cell-surface biopolymers such as glycans and glycoproteins [1][3]. The Cas1p protein has a Cas1_AcylT domain, in addition, with the opposing acyltransferase activity [1]. The C7orf58 family has a ATP-Grasp domain fused to the PC-Esterase and is the first identified secreted tubulin-tyrosine ligase like enzyme in eukaryotes [1]. The plant family with PMR5, ESK1, TBL3 etc have a\ N-terminal C rich potential sugar binding domain followed by the PC-Esterase domain [1]. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -12.05 0.70 -4.58 109 1574 2012-10-02 11:02:24 2011-02-21 15:50:55 1 22 161 0 1001 1544 2 250.40 20 56.82 CHANGED LPRFsupphLchhR.............sKplhFlGDSlsRspapSLlChLppshs.............................sppphhptps........t.hhaphpcassolpahhsPaLVp..p..............hlcl-plp.pts.ptht............ssDlllhNo.spWWhpp.thh..hs.h.............tpp.hpchshhtuactulpohtpalptslsstp............................................................opVhFpoh..oPsHhcsstWtt........u.tp.C.tp.............ts.thpsts.phhchltcshp.........th.psslphlsl.TthsphRp.......DuHsuhYtt.......................................pDChHWCLPGl.DoWN-lLhshlht .................................................................................................................................sphsstphLph.hp..............sKplhFlGDSlsRs...apSl..lC..h..Lppsh........................................................ttt.......tt...........................t.hhhth...t..p.a..s...ho....l.p.a.h...hssaLsp........................................................................hhhcpht....ths..p.tht...............shD.ll.l.h.so.....stWahp..th.................................................................t.....phs.h.ht.u.aphslps..h....hphl...t.t..ph....tt...............................................................sp..l.hh..psh...sPsH....h.p.....st.......h.t..........st...C............................................t....th..t.....h.h..ph.h..p.hht.....................hth.hphl.sl...T....hhsthR........DuH.s..a................................................tDC.HWCls..G..h.DsasplLhthh..h....................................................................................................................................................................................................................... 0 251 600 806 +13682 PF13840 ACT_7 ACT domain Coggill P pcc Jackhmmer:Q8NAP1 Domain The ACT domain is a structural motif of 70-90 amino acids that functions in the control of metabolism, solute transport and signal transduction. They are thus found in a variety of different proteins in a variety of different arrangements [1]. In mammalian phenylalanine hydroxylase the domain forms no contacts but promotes an allosteric effect despite the apparent lack of ligand binding [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.73 0.72 -4.45 192 6068 2012-10-02 00:29:19 2011-02-21 15:51:45 1 38 4053 48 1523 4779 2087 65.50 33 16.45 CHANGED phppshttlplhGs.......hshthsGlhAplsssL.......uptsIslhhlS...oaposalLVppcchppAlpsLcp .....................ppsluhlSlVGs......GM+s....hsG.luA+hFpAL..........ucs.sI......sI.h...hIo..oSEhsIShl.l..sp.cphppAl+sLp........................ 0 466 918 1239 +13683 PF13841 Defensin_beta_2 Beta defensin Coggill P pcc Jackhmmer:Q4QY38 Family The beta defensins are antimicrobial peptides implicated in the resistance of epithelial surfaces to microbial colonisation [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.01 0.72 -3.59 114 633 2012-10-01 20:50:19 2011-02-21 16:25:20 1 2 61 0 227 564 0 30.50 36 38.63 CHANGED cChpt..tGpCRp.pCppsEhhhth....C..tstphC.C ...pChpt..pGpCRp.pCppsEhhhth....C..tssphC.C...... 0 33 34 39 +13684 PF13842 Tnp_zf-ribbon_2 DDE_Tnp_1-like zinc-ribbon Coggill P pcc Jackhmmer:Q96DM1 Domain This zinc-ribbon domain is frequently found at the C-terminal of proteins derived from transposable elements. 17.60 17.60 17.60 17.60 17.50 17.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.38 0.72 -3.54 30 124 2012-10-03 10:42:43 2011-02-21 17:07:58 1 6 55 0 68 155 29 35.10 32 6.86 CHANGED ppCthCtp......pt.tp........opatCppC...s.....hsLChp........C ........................ppCphCtp......pthtp........opahCppC..s.......ssLChp...........C................ 0 25 31 62 +13685 PF13843 DDE_Tnp_1_7 Transposase IS4 Coggill P pcc Jackhmmer:Q96DM1 Domain \N 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.02 0.70 -5.40 48 1008 2012-10-03 01:22:09 2011-02-21 17:19:41 1 50 126 0 857 1024 6 208.80 14 54.68 CHANGED sPhphFphhhsc..cllppllppTNtthpph.................t.thpssshpElcsFlGlhllhGhh+..hsp....l..p-aWsss...hhu....hshhpssMohpRFptlhpsL+hs-...........s.t...s........tt...Dphtcl+.Llcthspph..pphYsPupplslDEsh.l....ha+.u+hshcphhs.sK.hphGh.chahhs-spouYhhph.....lhp....sp.ssth..t.................p.s......sspllhcLhp.sht.s.ps...+plhhDs..aasSls.LhppLhpp..shhusGslcps+t....thP.....psl..p.........pp...........h...pp...Gphhhths.......ss...l.hhhpapc..scplh.hloo...hps........psh...lptppps...t.........pp.....h...t..pPthlptYspphsu..V-...ptcphht...p.apsst...ps.ppWhppl.hhallshulhNAa ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 377 459 779 +13686 PF13844 Glyco_transf_41 Glycosyl transferase family 41 Eberhardt R re3 Jackhmmer:O15294 Family This family of glycosyltransferases includes O-linked beta-N-acetylglucosamine (O-GlcNAc) transferase, an enzyme which catalyses the addition of O-GlcNAc to serine and threonine residues [1,2]. In addition to its function as an O-GlcNAc transferase, human OGT, Swiss:O15294, also appears to proteolytically cleave the epigenetic cell-cycle regulator HCF-1 [3]. 25.00 25.00 25.20 25.00 24.60 24.80 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.61 0.70 -6.52 5 1950 2012-10-03 16:42:30 2011-02-22 08:01:16 1 478 581 32 949 2116 1664 192.00 16 40.67 CHANGED LRIGYVSSDFGNHPTSHLMQSIPGMHNR-+VEVFCYALSPDDGTNFRsKlMsESEHFVDLSQIsCNGKAADRIHsDGIHILlNMNGYTKGARNEIFALRPAPIQVMWLGYPGTSGAsFMDYIITDuVTSPlELA-pYSEKLAYMPHTFFIGDHAQMFsHLoERVlVctKssu.c.hDsc..tVlNus-.LcPlL-p.psKchV+-spscuGsclDhs+ocVsLPVl.hsT.sEPlcpMIsoGQIsssl.NGVsVQNGLuT.oQsNsKAATGEEVPpoIlVTTRuQYGLPDDAIVYCNFNQLYKIDPsTLsMWscILK+VPNSVLWLLRFPAsGEsNIppastchGlsssRIVFSNVAAKEEHVRRGQLADVCLDTPLCNGHTTGMDVLWTGTPMVTMPtETLASRVAoSQLsuLGVPELIAKsRpEYpDIAlKLGTDtEYL+pIRAKVWpARsoSTLFNsKQYCp-LEcLahKMW ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 267 562 790 +13687 PF13845 Septum_form Septum formation Eberhardt R re3 Jackhmmer:D0LB95 Family This domain is found in a protein which is predicted to play a role in septum formation during cell division [1]. 25.00 25.00 25.20 25.40 24.40 24.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.95 0.71 -11.74 0.71 -4.80 89 373 2011-02-22 08:02:50 2011-02-22 08:02:50 1 6 246 0 129 352 62 180.70 24 53.38 CHANGED psGsChsh.......................................................................................................................shsupstshs.......................................sVsCspsHssElh.ushslsss............P....utsshsstspctCs.pthpsYsu...st.h....ssslphhahhPoppSWpsG.sR.plsCh ..............................................................s............................................................................................................................................hpststt.s.ts...h.h.h.....sGsClshssss................hstsVsCspsHshEls.usls.....Lssphst..........P.upsp.sshhpstCs.ptsp.sY.lu..........s.th......ssslphhahs.ots.SWs.s.G.sRtVsC................... 0 33 98 123 +13688 PF13846 DUF4196 Domain of unknown function (DUF4196) Wood V, Coggill P pcc Pfam-B_104093 (release 24.0) Domain This is a short region of ccdc82_homologues that is conserved from Schizo. pombe up to humans. The function is not known. 25.00 25.00 25.70 25.70 19.90 18.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.68 0.71 -3.78 4 51 2011-02-22 13:26:22 2011-02-22 13:26:22 1 5 30 0 23 52 0 98.80 59 24.33 CHANGED KcLhNosNSSshEEphscoKHp..DLsDpEKt.GQp-schNK+TGQIlE.ED.E.....-E.lp.s++p+lSS..lh.DSDppcpSD.h.+psu.....hK+.R+Vlps..SS.-.EppsPEc..pshhR+ ..............................cpLhNousuSshEEEhNc.ccp..DL.Dp.EKHh.SQE-sDLNKpTGQIIE..-DlE.....EEpIKpGKRKRlSS..VMhDSD-SD..DSDILVRKVu.....lKRPRRVVEDE.sSSlEME..pcsP..EK..o.AAR............ 0 3 3 7 +13689 PF13847 Methyltransf_31 Methyltransferase domain Coggill P pcc manual Domain This family appears to be have methyltransferase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.08 0.71 -4.62 66 11527 2012-10-10 17:06:42 2011-02-22 13:32:39 1 237 4093 66 3510 72499 23508 157.30 17 50.53 CHANGED hppshplLD.....lGCGsGhhshhlt......hs...puc....lhGlDhopctlphA+p....ptpphshp..slpFht...u-lpp.ls...ht........ppaDl........llsp..................ssl.......thhss.tpslp..............phhclL+ssGhlhhtshs.....................t.......tthppp.p.................................ttpchhph......hppuG ................................................................................h...stplLD..lG.C......G..s...G....h.h.s..h..t..lA.............................p.........ssp................................lh..G....l....D.......h.........o......t........p......h......l......p.....h...A....pp............................p..h....p......t................s.....h.p............s...........l....p...a..h..p........................us...h....p..p......l..s........ht....................ppa..D.l..........................................l...l.s..p...............................................................................................................tsh...........................th.h...t....s..........p....p....h...h..p....................................................p.h....h....p....h....L...p...s...s...G..h..l...h..h....t........................................................................................................................................................htthhhhhh.hh.h...h.h............................................................................................................................................................................................................................................................ 0 1293 2284 3027 +13690 PF13848 Thioredoxin_6 Thioredoxin-like domain Coggill P pcc Jackhmmer:Q96DN0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.22 0.71 -4.48 54 1636 2012-10-03 14:45:55 2011-02-22 13:44:55 1 33 412 18 956 2074 42 177.40 17 37.01 CHANGED Fps..tpptt..hctFpcsApphp.c..htFuhs....ppc....lhpp....hthpt..ssllla..+......t.......cp..pphphstp......h.shsslppalpppphsh.ltchs..c.s.htplhppsh..hlllh...hsp.t.sps.h....p.phppt....l..pplA.pph....p..s...p...lh..Fs.......hscsp...th.sc...hlp.hh.shst..s.chPhlshlc.ts.p...t...hhh....h....ps.p.hss....pplppFlps ...................................................................................tt..hp.ahpsApthp........c......h....Fs...hst........stp................lhpp.............hth....pt...s....s......ll.la......+..............................hcp....t..h.apsp................h..st.pp....l.p....p.a.l.....p.p........p....p..........h..........s......l......l..s......c.....h.s.....t....c.....s.....h.t...p..l....h..p.......s.....s....l................h....l....hhh.......hs.....p....s..sps.h...................p..pht.st..............h....ppsA..cpa...............................+..s..........p...........l......Fs........hhctc.......ph.tc....hh....p........h....sl..pt........t...p...h..Pslhl.hs..tp..p...t....hhh...........h......pp..p...hs.....pplppFhp.................................................................................... 0 328 470 733 +13692 PF13850 ERGIC_N Endoplasmic Reticulum-Golgi Intermediate Compartment (ERGIC) Coggill P pcc Jackhmmer:B8ZZK7 Domain This family is the N-terminal of ERGIC proteins [1], ER-Golgi intermediate compartment clusters, otherwise known as Ervs, and is associated with family COPIIcoated_ERV, Pfam:PF07970. 27.00 27.00 27.30 27.50 26.50 26.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.07 0.72 -4.01 79 852 2011-02-22 16:17:28 2011-02-22 16:17:28 1 15 295 0 572 795 24 93.90 31 24.37 CHANGED pL+plDA.as..K.s....s-Dh...ph+.ThuGullTlluhllhlhLhhuEhttY..hsst..hpscLhVDp...s............cuc+LcIslsloFPplPCp...hLolDshDsoG-pphc.l.....p.c ................................l+phDA.as..K.s.....--h....p.p..TtuG..u.h..............lolls.hllhhhLhhsEhttY..h.s.st..hp..clhVDp...s...................pupcl.cINlDlohs.thsCp............hls.lDlh.DhsGcppht...h................................ 0 213 335 474 +13693 PF13851 GAS Growth-arrest specific micro-tubule binding Coggill P pcc Jackhmmer:O95995 Domain This family is the highly conserved central region of a number of metazoan proteins referred to as growth-arrest proteins. In mouse, Gas8 is predominantly a testicular protein, whose expression is developmentally regulated during puberty and spermatogenesis. In humans, it is absent in infertile males who lack the ability to generate gametes. The localisation of Gas8 in the motility apparatus of post-meiotic gametocytes and mature spermatozoa, together with the detection of Gas8 also in cilia at the apical surfaces of epithelial cells lining the pulmonary bronchi and Fallopian tubes suggests that the Gas8 protein may have a role in the functioning of motile cellular appendages [1]. Gas8 is a microtubule-binding protein localised to regions of dynein regulation in mammalian cells. 30.00 30.00 30.00 30.60 29.80 29.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.31 0.71 -5.12 31 204 2011-02-22 16:46:04 2011-02-22 16:46:04 1 7 130 0 133 196 5 187.80 36 41.80 CHANGED spLhcpH-pAFp-hKsYYN-ITpsNLplIpoLK--ltph++p-pcsc+hht-lpp-N+cLsEPLppspp-lpcLc+pLppYp+DKtsLppsKs+lpphccclpsLchEp-lLpp+applppER-pLhp+Fcpslp-lpQKsth+NhlLEpKlpslpcpLEp+-spLpcllpsssl-ss.tlptlspclccllpsKNptIcsL .............................pLhppHEcAF.......s-hK...........sYYNDIThNNLsLIpoLK-phpch++..p-p+.-+.ht-lttc.N++Ls-PLpcApc-hp-Lp+pLt..p.Yc.+........DKp..tLtssKs+lp..hh.ccclcsLpWEpElLppRapc................lppE...R--Lhp+F....p....ssl.-lQQKsuhKNllLE+KLpsLpptlEp+-sQLsElLssusl-Ps.slp.l.s.......p+lcclLppKNptIccL........................... 0 57 72 108 +13694 PF13852 DUF4197 Protein of unknown function (DUF4197) Bateman A agb Jackhmmer:C7PG16 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 228 and 249 amino acids in length. 24.90 24.90 24.90 38.90 24.50 24.30 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.04 0.71 -4.78 102 258 2011-02-22 16:53:23 2011-02-22 16:53:23 1 3 244 0 119 277 75 201.50 38 83.83 CHANGED ssusLossphssG.LKcALphGsppAVspLupp...sGFhsNstl+I.LPppLpcssphL+pl..Ghu...phsDchhhshNRAAEsAsscApslhhsAl+pMolsDA+sILp.Gu.-sAATpYhcppTpspLtspFtPllppuhsclGssphasslh.........................s..ph................sslsh....spp....hss.sLssYVTpcAL-GLFthlApEEppIRpsP.supsosLL++VFG ......................sh.ssLops-hsuG.LK-ALspGsptAVspLups...sGFhsNstV+I.LP..sp..Lp.cssphl+ph.....GhG.........s.sDpl.tuhNRAAEsAVspAp.slhlcA.l+pMols.DA+sILp.Gu.csAATpYLcppopppLtscFhPlVcpuhsc.....VG.....hsptYsshs......................s.ph...................sslsh...........lss..hsssLpsYVTpcAL-GLFphlAccEppIRpsP.supsTsLL+KVFG..................................... 0 47 98 113 +13695 PF13853 7tm_4 Olfactory receptor Coggill P pcc Jackhmmer:Q9H342 Family The members of this family are transmembrane olfactory receptors. 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.99 0.71 -4.39 118 25558 2012-10-03 04:04:29 2011-02-23 09:06:22 1 29 180 0 14828 27096 3 143.20 33 48.90 CHANGED sssthhsuus..hsuGhLp.ul.l.HT..us.TF...sLshC...p..s.s...c...p.Fhs-hPt.lLK.LSCS-o.al.pEl..slllhus.hlshssFlhIllSYlpIhpAVLRIPSppGR+.KAFSTChsHLsVVSLFhuTshFsYL+P.s......Shu..SsshDlll....oVLYoVVPP ...........................................................................................................hChh.L.s..s.s.u..a..h.h..G..h..l..t..u.l...h..p.s.......h.h...hh......pL.s..FCt........sN...h.I.....s..H.Fa.C..-..hs........sl.l..+....L.u.C..u..D..T....t.l...scl...........hh.....h...h...h....u.s.....h..h.hhh..........s..h.h.h.I..l..h.S.Y.s...h.I....l.t....s.l.......L..+...l...t......S...s.....p.......G..R.p....KA.F....S.TCu.S...HL.s.VV.s.lF....YGss.l...h..h.Y.lpP..p.......u.s.p..............s.........p....h....cplh......ulhYollsP....................................................................................................... 0 416 451 1888 +13696 PF13854 Kelch_5 Kelch motif Coggill P pcc Jackhmmer:A8MU55 Repeat The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase [1] for which a structure has been solved [2]. The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure [3] as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415. 27.00 20.00 27.00 20.00 26.90 19.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -8.12 0.72 -3.94 78 1509 2012-10-05 17:30:43 2011-02-23 09:06:54 1 435 312 0 1094 6146 187 42.50 25 6.40 CHANGED hP.ss..ptp.tss.s.....h.s.....s.............tp......lal...aGGhs...........psps..h..s.cla...lL...slss....h ..................................................P..s..Rhs.Hss.s.......................s.h.......s................sp......................l.al........aG..G..hs.............................psth..........h......s.D...la.....hh..sh...................................... 0 445 651 900 +13697 PF13855 LRR_8 Leucine rich repeat Coggill P pcc Jackhmer:JCS-Target417241 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.36 0.72 -4.21 93 70869 2012-10-02 21:32:02 2011-02-23 12:42:34 1 6305 1468 377 38112 83275 1324 59.50 28 22.98 CHANGED spLppLpLs..p.N.plp.tl..sp.ssFp.sl...spLp.hLsLs.tNpl..p..sl..ss.p.s...hpsLtpLptLpLs.s.N.p.l .....................................tLptLp.Ls.......p..N...pl....p...pl...........ss...t..s..Fp....s...L..........sp...L..p...t...L.....s.....L.....s...p......N.....p...l....p........sl......ss...t..s...............h..p...s...L.....p.....p...L..p..t..L..pLs..t.Npl...................................... 0 11072 17914 27457 +13698 PF13856 Gifsy-2 ATP-binding sugar transporter from pro-phage Coggill P pcc pdb_2pp6 Domain Members of this short family are putative ATP-binding sugar transporter-like protein. 22.90 22.90 22.90 23.00 22.70 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.98 0.72 -3.85 23 319 2012-10-01 22:58:23 2011-02-23 17:08:29 1 1 266 1 22 351 15 88.50 45 95.50 CHANGED hsssFDphhupsDssIh..phMGpphpIsG.....tshhuVhD-s.s.ht..............slpGsuhoLslaoushht.P++sDpl..shsGcpahVsRhph.s.GphhlhL .......................s.Fpthh.tcMDslTl...+c..MG.+.pAsINs.......sshs.VlPsEphAEh.s..............ALSGs...ulSLVV..F....Sus.YR...P+RGDpV..Vac......G..pp....aTVTRa-paN..GKPhIal................. 0 5 9 17 +13699 PF13857 Ank_5 Ankyrin repeats (many copies) Coggill P pcc Jackhmmer:P42771 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.76 0.72 -3.57 81 5157 2012-10-02 12:10:21 2011-02-24 13:31:59 1 1642 768 6 3250 58643 3660 54.20 29 7.93 CHANGED Llpps...shtt.sttt....ssLthAsp.tsphphlphLl....t.thshphpstps.tsslchA ............................................t...ss.hst.p..s...p.p.Gp...............TsLHh...A.u.....t..p.....G....p....h....c......l....lchLL............pt..G.A.c..h....s.h..psp.pG..pTsLphA............................. 0 1425 1883 2549 +13700 PF13858 DUF4199 Protein of unknown function (DUF4199) Bateman A agb Jackhmmer:C7PPF0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 167 and 182 amino acids in length. 27.00 27.00 27.10 28.80 26.60 26.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.20 0.71 -4.32 97 280 2011-02-24 17:30:07 2011-02-24 17:30:07 1 1 202 0 108 296 198 157.40 18 90.11 CHANGED paGhhhGlhhhl..hhllhhhhh....s.ths.t.hh......shh..hllslshh...hhul+pa+pphhsG.hhoatpuhthGhhhshluullhslhphlahphl..sPsahpphhptt..t...t....ht.s.t.hp.t..htth...ht..t..t..............hthhh.shhtslhhGhllollhuh.lh+..pp ........................auhhhGhhhhl.hhlltahhh.h.....shphshhshl.....shh...hllslhhh...........hhtl+paRp..p..h.hsG..hloatcuhhhuhhhhhhuullsslhpala..hphl..-s.sahtphhpth.hp............t....htts.thhp..tth.pth.....th..p.hps.......................hphsh..hhhtslhhGhllulhhuh.hh++....................................... 0 50 93 107 +13701 PF13859 BNR_3 BNR repeat-like domain Coggill P pcc Jackhmmer:Q99519 Domain This family of proteins contains BNR-like repeats suggesting these proteins may act as sialidases. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.31 0.70 -5.36 201 1305 2012-10-02 00:45:24 2011-02-25 12:47:17 1 12 39 1 486 2085 72 273.70 35 44.51 CHANGED sPuLlcV.suDVFAVAEAQhp.pp.........................ps...s....s............t...........................st...h....u..htp...........tt....tpt.p....p................p.....p.p.t.hh.cs.........s.......................................p...pp......t....+..............Vs..ls.RPTsllc...Gs..DIYML.............s.....Gp...Y.S.............pp...s...................us.....s.......................sh.s...lh.....................Ls............c........us.....s....s..sp......p.......Wp........................-spsls.......psh.............t..p..........................p......h............LlGGG....GSGVKMc....DG.TLVFP............VE............GT..........KK.p..sstt..........sVSL...IIY.S..pc.....sp...sWpLSKGMSs...........sGCSDPSVVEW...c-...t.KLMMMTACDDG....RRRVYESGDKG-SWTE.......ALGTLS.........RV.WGN..............p............hpushlsut............ss.-....pRs.....VMLVTLPVa......ttst......spc........KGcLHLWL......T..DNTHIVDIGPV.s..pD..D..D.sAASSL.LYKSup........s.sspcEELIALYE ..............................................................................................................................................................................PuLlps..sus.lh.AhA-uphttt.......................tt...t..................................h.hs.t..................s.t.t...p............pt.......h...tt..t...........................................................................................................................................................t.t.t.p...............s.t..l.....p..PT..s.h..sp.....ss.........pla...h...L..............s....Gp....a.s..........................ppt.................ttt..............................................................ph.slh...............ls...........c..........sp....s..tt....p...........Wp......ps..ts.l......t................p...................................................p.......t........h.lu..uG.G.SG...l.h.hp.......D.G....TL....V..FPlp............ut.............ppt...tt..................h..Sh....lha...o.p.s.......s..p...s...W..p...l.o....p....s.ho.s.......................sG....C..t...sP.sl.sE..W.......c.....t....c.....L.h....M.h.........s..s....C.c...s..G...........p....p+.V..Y..E.S.s.DhGpoWT..E.....Al..G..T.Lo..+V..Wu..s...pp............t.tuthlpu.............ss.-.t+pV.ML.hT..sh.......tpt......tpt.....cstL.aLWl......T.DN.....s+.hhslGP.....l.....ts...-...s.s.s....A...S..o.LLYpsst................hh....................................................................................................................................................... 0 14 25 486 +13702 PF13860 FlgD_ig FlgD Ig-like domain Bateman A agb Bateman A Domain This domains has an immunoglobulin like beta sandwich fold. It is found in the FlgD protein the flagellar hook capping protein. THe structure for this domain shows that it is inserted within a TUDOR like beta barrel domain [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.80 0.72 -4.24 102 1909 2012-10-03 16:25:20 2011-02-25 14:24:35 1 97 1529 5 465 1365 1800 80.40 26 23.53 CHANGED usth..........hsss............tsshp..Lsssusss..plpltDs.sGplV..pshsl...sspsu..GthsasWD.GpsssGp.....................tlssGp.Yslpl..........pu.psss ..........................sttsss.........sss.....tshslp..L.tp.su..s....p..l....slp.IpDp.sGplV........+T.l.s.l............G.s.h..........su....G.s..t......s.....Fs.....WD....Gp.s.ssGs...................................sh..s.s..Gs..Yslplsusst.t................................. 0 162 299 386 +13703 PF13861 FLgD_tudor FlgD Tudor-like domain Bateman A agb Bateman A Domain This domain has a tudor domain-like beta barrel fold. It is found in the FlgD protein the flagellar hook capping protein. The structure for this domain shows that it contains a nested Ig-like domain within it [1]. However in some firmicute proteins this inserted domain is absent such as Q67K21. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null --hand HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.00 0.72 -3.97 107 1425 2011-02-25 14:57:41 2011-02-25 14:57:41 1 6 1242 5 295 834 103 141.10 27 59.41 CHANGED sspshpuuuhlG+pVhssus.th..........tsss..................tsshp..lsssusss....plpltDu.sGplV..pshsl...sspsu..G.hsasWD.....GpsssGp....................tlssGp.Yslpl..........pu.psss.....tthshsshstupVsuVphs..u.sss.hLsl.ssst.....lshsslpplt .............................ssQuLQAosLIG+sVhlsGsslhhssssptsussthsstssphttssspsssslssp...........................................................sluphsuGspshsWsGppssGsssssGpYshshsusssGsphsspshthuhVpuVhhs....u.sss.hLsL.Gshut....ssLscVppl.h............. 0 63 154 224 +13704 PF13862 BCIP p21-C-terminal region-binding protein Coggill P pcc Jackhmmer:Q9P287 Family This family of p21-binding proteins is important as a modulator of p21 activity. The domain binds the C-terminal region of p21 in a ternary complex with CDK2, which results in inhibition of the kinase activity of CDK2. 20.70 20.70 21.50 25.50 20.40 19.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.30 0.71 -4.80 87 360 2011-02-28 11:44:46 2011-02-28 11:44:46 1 3 298 0 238 367 11 198.30 28 65.46 CHANGED -h..lsVDF-hasss.s.sDaHulKp..LLpQLhs...s...sp..hsl.up.........LuDlIlsQ......s...s...lGoslKs............................Ds..c....-.......s.-sau.hlollNlppp.................p...s..p.....slcpLtcalhp+sp................sps.hh....ppLpplLs.............s.s..sp....p......lGLllsERhlNhPspllPPhaphLh-ElptAtp........cp...............c.aphs.aaLllo+sY...........................cp.................tp....ttp.ptt..ttp...pp.................................................sE.......hhah+sEDEhht .............................-.VslDF-has.p.s.sDacGlKp..LLpQLa....p.....s..hsh.up........................LsDLIlpQ.......sp...lGoslK....................................Ds..c-...............s-s...au.hlolLNL......ppp.......................+...s..p.....slpplpcalhppsp.......................pps.hh....ppLpplLs...............................ss....sp....plGLlloERhlNhPsplssPhap.Lh.cElptAhp........sp....................csaphs.aaLllo+sa......................................pc.....................tt.....ttp.p.pt..ppppp............................................pc.....hhahpsE-Ehh................................................................................ 0 82 134 194 +13705 PF13863 DUF4200 Domain of unknown function (DUF4200) Coggill P pcc Jackhmmer:A6NFT4 Family This family is found in eukaryotes. It is a coiled-coil domain of unknwon function. 28.90 28.90 28.90 28.90 28.70 28.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.52 0.71 -4.18 69 384 2011-02-28 13:40:14 2011-02-28 13:40:14 1 8 119 0 261 380 4 125.20 24 29.05 CHANGED phlcp+REhhphpptlcpp+.cch...pcppcthpp+ccplpcpppcLpcphh.caspFlp-scsK+pcA.+chppEpptcpphpp-lpclppplspLpppppclcpp.lpphp.YpcaLcp.........Vlp.......tss.......p.......app ......................................hlpc+R-hhhhphsLptK+.pch........p+hppthpp+ccpLppp-ppLccchh..pF-cF...lc....-N-p+phcAh+p....ApcEpct+tp+ptElcclptplptlc.schp+hcpp.lpch.........phYpcaLpp....ls.p..................................... 0 133 157 201 +13706 PF13864 Enkurin Calmodulin-binding Coggill P pcc Jackhmmer:Q8TC29 Family This is a family of apparent calmodulin-binding proteins found at high levels in the testis and vomeronasal organ and at lower levels in certain other tissues [1]. Enkurin is a scaffold protein that binds PI3 kinase to sperm transient receptor potential (canonical) (TRPC) channels. The mammalian transient receptor potential (canonical) channels are the primary candidates for the Ca(2+) entry pathway activated by the hormones, growth factors, and neurotransmitters that exert their effect through activation of PLC [2]. Calmodulin binds to the C-terminus of all TRPC channels, and dissociation of calmodulin from TRPC4 results in profound activation of the channel [3]. 23.00 23.00 23.00 23.70 22.70 22.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.26 0.72 -3.79 54 314 2011-02-28 14:56:46 2011-02-28 14:56:46 1 5 133 0 205 315 3 96.40 26 32.90 CHANGED G...clPpYLpcpKc.chtcp..pc......phcp.h.tc.p......ts......suhphlsEcERhphLpsL+pphcplppchp.plshh..hD.Thst+pRKpclEccLpplEcsIchhs+spV ....................................................GplPpYLh.cp.+c.phpct..pc................th.pp....tc.t............t.........................suhphls-pERtphL...psL+pphppltp-hp.plshh..hD..Tltt+p+KtclEccLpplEcsIphhp+.p.................. 0 92 109 161 +13707 PF13865 FoP_duplication C-terminal duplication domain of Friend of PRMT1 Coggill P pcc Jackhmmer:Q5T7Y9 Family Fop, or Friend of Prmt1, proteins are conserved from fungi and plants to vertebrates. There is little that is actually conserved except for this C-terminal LDXXLDAYM region where X is any amino acid). The Fop proteins themselves are nuclear proteins localised to regions with low levels of DAPI, with a punctate/speckle-like distribution. Fop is a chromatin-associated protein and it colocalises with facultative heterochromatin. It is is critical for oestrogen-dependent gene activation [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.19 0.72 -3.54 26 408 2011-02-28 15:48:16 2011-02-28 15:48:16 1 13 203 0 273 388 0 70.80 26 28.25 CHANGED ss.............tssptGphp..tsptsttG+sRu+ssRpspsttt...tsthopE-LDtELDpYM.......ussKscLDt-L-sYhpttt.p ..............................................................................................ttstt......tGtht.....tupsstsupupu+....utpt...s.p.uttpt.....c.thosE-LDs-LDsYh...........st...................t..................... 0 89 131 201 +13708 PF13866 zf-SAP30 SAP30 zinc-finger Coggill P pcc Jackhmmer:Q9HAJ7 Domain SAP30 is a subunit of the histone deacetylase complex, and this domain is a zinc-finger. Solution of the structure shows a novel fold comprising two beta-strands and two alpha-helices with the zinc organising centre showing remote resemblance to the treble clef motif. In silico analysis of the structure revealed a highly conserved surface dominated by basic residues. NMR-based analysis of potential ligands for the SAP30 zn-finger motif indicated a strong preference for nucleic acid substrates. The zinc-finger of SAP3 probably functions as a double-stranded DNA-binding motif, thereby expanding the known functions of both SAP30 and the mammalian Sin3 co-repressor complex [1]. 25.00 25.00 29.30 28.60 19.40 17.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.86 0.72 -4.33 5 123 2011-02-28 17:15:49 2011-02-28 17:15:49 1 2 78 2 78 124 0 71.50 65 39.79 CHANGED PuAAGPtsGQlCCL..RE-GE..RCGRPAGNASFSKRIQKSISQKKVKI-LDKosRHLYICDFHKNLIQSVRN+RKRKsSD.D ...........................h.sQ.CCL......l-DGE..RCsRsAGNASaSKRIQKoloQK.KLKLslD+SsR.HLYICDaHKNhIQSVRNK.RKRKsSDDt.................. 0 18 23 45 +13709 PF13867 SAP30_Sin3_bdg Sin3 binding region of histone deacetylase complex subunit SAP30 Coggill P pcc Jackhmmer:Q9HAJ7 Family This C-terminal domain of the SAP30 proteins appears to be the binding region for Sin3. 22.20 22.20 22.90 22.20 21.50 20.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.56 0.72 -4.09 36 333 2011-03-01 09:59:32 2011-03-01 09:59:32 1 8 234 1 224 313 0 51.20 37 25.43 CHANGED LpsssL......+RYp+.taplstt...sssK.pQLspsVt+HFpots.lsEp-sIstFlhtl+ .....................LphssL......pRYp+..capltsps...shsK.sQLspsVp+HF.ps.h..s..lsE.p-slshFlYtVK................ 0 62 112 168 +13710 PF13868 Trichoplein Tumour suppressor, Mitostatin Coggill P pcc Jackhmmer:Q8NEH6 Family Trichoplein or mitostatin, was first defined as a meiosis-specific nuclear structural protein. It has since been linked with mitochondrial movement. It is associated with the mitochondrial outer membrane, and over-expression leads to reduction in mitochondrial motility whereas lack of it enhances mitochondrial movement. The activity appears to be mediated through binding the mitochondria to the actin intermediate filaments (IFs) [1]. 27.00 27.00 27.50 27.50 26.70 26.70 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -12.75 0.70 -6.05 68 564 2011-03-01 11:50:10 2011-03-01 11:50:10 1 11 126 0 380 548 18 306.30 19 64.55 CHANGED -pp-El+pLppplptAtssp.RssQltE+cthctcpp.cc-pphsphhct-+p+slpppcccEppctpcptctpptlppQl.cE+cpp+tpptcphtpEcphhpphhcplp-E-ptct.tc+pc+ppphtc-lpchpcpptphKctpcpppccE-p+.lhcahppppc+ccphppcppct.ccc.+-chhp+Ltpp.pctpccpp-h-pLptchhtEEtccct....Rp+-pp-tcc+tchpp-lppscppQhptKppphptptpc-cpphpchlp.phtcpcch-phptpc++tpp.catcplpptlpp+cpp+pppcppphp-tpphp...cppppcpphl-ptR.p+hLpchtsp..shhsK ..................................pppcl+phpphlhtspsht.RssQlt-+p.thptp...pt.pc-p.thtph.hctcpppthpcppppcppcppcph....phtptltpQl.pcp.cpp.+t.tt....tcp.pppEtp.hpp.hhcph.p.t....E-.ppct..tp+t.ppppphpptlpch..pppt..ph+pp...ptpppppt-p+.lhph.h..tpc..pp....c...cp.ph....p....t.cppch....ppp..ppth.h.p....t.l..tp.t..p.c..p.pp.p.t.ch-plht..chhtcphpcc..........cpct...p...p...ctp.p+.chhpc.lhpsctpQht.Kt...pt...h...ttpt...tc...ptt.....hpphhp....t.pp.tpc.cp.ptpcpp...tht.phtptlppp....lppppppt.tppptphpch...p.t.t....p..ttt..pthl..pp.h.................................................... 0 166 202 285 +13711 PF13869 NUDIX_2 Nucleotide hydrolase Coggill P pcc Jackhmmer:O43809 Domain Nudix hydrolases are found in all classes of organism and hydrolyse a wide range of organic pyrophosphates, including nucleoside di- and triphosphates, di-nucleoside and diphospho-inositol polyphosphates, nucleotide sugars and RNA caps, with varying degrees of substrate specificity. 27.00 27.00 27.40 27.00 26.80 26.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.97 25 336 2012-10-02 00:00:35 2011-03-01 14:52:41 1 6 247 29 220 316 2 175.50 52 74.87 CHANGED pslplYPL.oNYsFusK....-shhc.KD...tolscRhpRh+ssYpppGh.RpoVcullLVHcasaPHlLLLQ.l...sss....h.....aKLPGG+L+sGEs-.l-GLKRKLscpLu..............ss......c..sh...................pssWclG-slGpWWRPsFEs.hYPYlPsHIT+PKEppKLalVpLPc+stFtVPKNh+LlAlPLF-LasN.tpYGshISulPplLSRF ............................................p.slplYPL.oNYTFGoK....Es..E.cD.........sSVsuRhpRhc-cacc..hGM..RRoVEGVLlVHEHshPHlLLLQ.l...uss......F....................FKLPGG.cLpsGE-...............E.lEGLKR.hLs-pLu...............t.....s....uh...............................tp-W.ls.DslupWWRPNFE.s.....YPYlPsHl..T..+..PK......EpKKL..a...lVpLsc+.........thhu.V......P.K.NhKLlAsPLFELYDNsttYGP.lSulPplLSRa......................... 0 77 118 173 +13712 PF13870 DUF4201 Domain of unknown function (DUF4201) Coggill P pcc Jackhmmer:Q2M329 Family This is a family of coiled-coil proteins from eukaryotes. The function is not known. 27.00 27.00 27.00 27.30 26.90 26.60 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.97 0.71 -4.83 56 290 2011-03-01 17:10:52 2011-03-01 17:10:52 1 9 106 0 186 284 10 165.80 24 35.01 CHANGED hppp.csplschRLphlphpcplschppplcphEplu-s...LphhDF.plph-stsLscKIEERNp-Lt+L+pphspslphlsHh+cKhp.......hlppphpph+ppLpctpcphpclRcplhpschc+s+lcppppc..L+ppuulhphPsLhtD.Y-pphpplpphccslcpL+c..phcplptplpt ..................tppcp.hschRLc.hplcppht+hptplcpp-plu.-s...LphlDFpQL.cI-Npphtc+IEER.N.....pELhcL+.tp........sspsl............phlsph+..............cKhp................hhtpc.pphcpcltptpc.ltchcccltpsctp+pphcppppc.L.cpptuh...h..th......Pplh.-.a.pphtphtthcpplpthcc..chp.hphph..t............................ 0 83 104 145 +13713 PF13871 Helicase_C_4 Helicase_C-like Coggill P pcc Jackhmmer:Q9Y2G9 Domain Strawberry notch proteins carry DExD/H-box groups and Helicase_C domains. These proteins promote the expression of diverse targets, potentially through interactions with transcriptional activator or repressor complexes [1]. 27.00 27.00 27.20 27.30 26.70 26.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.82 0.70 -4.98 47 482 2012-10-05 12:31:09 2011-03-02 11:29:27 1 25 252 0 257 484 56 240.40 40 21.03 CHANGED ssLDpllpchG..sc.tVAElTGRptRl...Vpp.p..sG....p.hthcpRsss...........sNlsEppsFMsGcKtlhlhS-AGuT.....GhShHADhpstN.Q..RRRVHhhLEhsWpADpAIQthGRTHRoNQsssPhac.lsTcltGE+RFhSolA+RL-oLGALT+GpRpsusp.h...................h.up.Nl-osYu+p.ALcphap...tlhtsp....h.............................h................tFhpch.puLp.l.l....................ps.psG.......shp.-.....c......s..sIspFLNRlLuLslchQNsLFphFpphlpshlcpA+tsGp..aDhGlps ..................................................sLDpllpphG..s-.tVAEh.....T.GRptRl....Vpp.s....cG....................p.lhh-sRstt.......................hNhtEpptFMsG.c..K..........tlhIh.S.-Auuo.....GhShpA......Dtps....pN...Q....R+RVH................hhLEhsWpADpAIQth...GRTHRoNQss..s.P..ah..hlho-l.tGE......+.RFsShlA....+RL-o..LGALT..+GpRcsstp...h...................h.sp..Nh-spYu+p..ALcthhp...tlhttp....h................................................................................pFh.p.ch.tuLhtltl..............................t.pss..........hhp..-.....c.........sIspFLNRlLuhtlchQNt.LFphFtphlsthlptA+ttGp..aDhGl....................................................................... 0 88 144 201 +13714 PF13872 AAA_34 P-loop containing NTP hydrolase pore-1 Coggill P pcc Jackhmmer:Q9Y2G9 Domain \N 27.00 27.00 27.10 28.30 24.30 23.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.09 0.70 -5.65 39 492 2012-10-05 12:31:09 2011-03-02 11:38:16 1 21 227 0 281 484 52 270.10 45 24.25 CHANGED utsHP.ssLVESuuMASVuPPtso....Yp...pLPstlls...sGhLSssQLEoVlYAupAHsthLsGta..tt.............t..th..RpGFaLGDGTGsGKGRQlAGIIhDNalpGR+RAlW.lS+SssLlEDAcRDWpslGut..thplhsLu+............................a+.u......cs......s......h......pc......GlLFsTYuTLRus.....t.t....ttctSRLcQllcWhG.p-.............FDG.VIlFDEsHthtN......A......u......uut.........tsSpQGhAGLcLQptLPcARVlYsSATGAoclcNLAYhsRLGLWG.ts.t..PFsstpsFlsAl-pGGlAAMEllARDLKAhGLYlARuLSFcGVEa-llEppLTp-QlplYDsa .......................................................h...HP.s.lVEouu...h.u..........SVsPPc.t....Yp.....tlst.....thhp..pGhL.SshQLEslhYAspt.Hpt.hL.ssu...........................................R...tGFhlGDGsGlGKGRplA....GlIh-Nal..pG.R.+..+A.............lW.hSh.....SscLh....D.ApRDhp......slG...up....pl.lpsLs+..............................................................hc.u.......c..........hs......h......pc.....G..l...lFsTYu..sLhup.......t.....................tphpoRlpQll....pWhG...pc................................FDG..V...I.lFDE.....sHphpN.....h...s.stt....................tsoppGhAsLcLQ..............ptL.PpAR.V.VYsSATG.A.o..cs+Nh.u.Yh.sR.LGlW................G..ts.t.s.Ft.p............htcFlpAlE.......c..t.G.....VuAMEllAhDhKhpGh..YlA.RpLSFp.GV.pacl.Eh...Ls.......p.hphYst.t........................................................ 0 102 162 224 +13715 PF13873 Myb_DNA-bind_5 Myb/SANT-like DNA-binding domain Coggill P pcc Jackhmmer:Q8NCY6 Domain This presumed domain appears to be related to other Myb/SANT like DNA binding domains. This family is greatly expanded in arthropods and higher eukaryotes. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.35 0.72 -4.08 40 544 2012-10-04 14:01:12 2011-03-02 13:50:17 1 24 83 0 347 698 0 75.80 23 22.09 CHANGED Rp.tpFotpEhplLlchlpcppplLhs+tsssss....hptK.scsWcpIspchNu..hus.s.pRohppl+++WpchKppsKc.chtp ..............................Rt.pao.pEpphLlph.lc.pp..hpll...s+pss.t.s.s.....htt.K.p...csWcpIspchNu......hss..s..p..R....shppl++pWpchKtps+cch..t................................ 0 96 119 238 +13716 PF13874 Nup54 Nucleoporin complex subunit 54 Coggill P pcc Jackhmmer:Q7Z3B4 Domain This is the human Nup54 subunit of the nucleoporin complex, equivalent to Nup57 of yeast. Nup54, Nup58 and Nup62 all have similar affinities for importin-beta. It seems likely that they are the only FG-repeat nucleoporins of the central channel, and as such they would form a zone of equal affinity spanning the central channel. The diffusion of importin-beta import complexes through the central channel may be a stochastic process as the affinities are similar, whereas movement from cytoplasmic fibrils to the central channel and from the central channel to the nuclear basket would be facilitated by the subtle differences in affinity between them [1,2]. 30.10 30.10 30.90 31.20 30.00 30.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.78 0.71 -4.52 49 335 2011-03-02 14:23:06 2011-03-02 14:23:06 1 13 275 1 231 340 1 138.80 31 29.89 CHANGED p..Ps..shs.cpWcpAh......pcpPsP.p....hlPV.hhGFp-LtpRhphQcpplsphpthLpplpsp.lspL.p.p+.chpsss+ltct+p+ptpLp+RlLclhsplp..lL+p+Ghs.........Lss-E-clppclpplppclssP.sth......u+lsEL.......hupl........p ................Ps.ssDsthWcpAh......hcsPsP.p..........hlPV.h.hGFp-LtpRhchQcphspptps+Lc.....t....lspp.ls.........p.......L.p+c.phssss+ltph+c+phpLs+RhLpl.....ht+hp..lh+ppGhu............................lps-EEpLpppLpplppplp..sP...sthp..........uRhpELhuph.................................................... 0 77 125 187 +13717 PF13875 DUF4202 Domain of unknown function (DUF4202) Bateman A agb Jackhmmer:A1B4I0 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 187 and 205 amino acids in length. There are two conserved sequence motifs: LED and KMS. The function of these proteins is unknown, although many are incorrectly annotated as glutamyl tRNA synthetases. 21.60 21.60 25.70 25.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.19 0.71 -4.64 51 144 2011-03-05 16:24:37 2011-03-05 16:24:37 1 6 142 0 91 151 13 182.80 41 82.94 CHANGED appAlstIDsApupDPspp...h......p....spshPtELhYup+MochL.............p+hsPs.ASE..............sLpLAsRuQHlcRWclPRusYPhs+sGYhpWRssLtphHAphssplh.....hpuGYsppphpRVutllcKcsl.....K......p.............Ds-sQsLEDVACLVFL-chhtsFsppH...D.--KllcIlcKTWtKMS-cG+phALp..lsLs-phtsLlt ..........................ppAlthIDtApspDPpt...h..........s..spshPhELhYAp+MocaL.............tph.pP.s....A.....S-..............hLp.LAsRuQHlcRWclPRssYPhsRs.GYhpWRssLtptpAspssclh.....hpsGYspp-hpRVusLlpK.csL......K..p..................Ds-sQsLEDVACLVFL-c.Fp..pFtpc+.................D--KhlsIlcKTWtKMSpcG+phALp..lp..hs-phttLl.t.................................... 0 23 57 76 +13718 PF13876 Phage_gp49_66 Phage protein (N4 Gp49/phage Sf6 gene 66) family Bateman A agb Jackhmmer:A1B8I7 Family This family of phage proteins is functionally uncharacterised. The family includes bacteriophage Sf6 gene 66 Swiss:Q716B1 as well as phage N4 GP49 protein Swiss:A0MZD7. Proteins in this family are typically between 87 and 154 amino acids in length. There is a conserved NGF sequence motif. 27.00 27.00 29.70 29.50 25.10 24.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.62 0.72 -3.91 52 176 2011-03-05 16:37:05 2011-03-05 16:37:05 1 4 160 0 15 139 2 88.40 41 69.80 CHANGED +lT.ppl-shIsppp......Yhsttst...................................plTlCslsLcNGFpVs.GcSAC.lsPpNFDsElGcclAcc....sAhsKlW.LEGY..hL+ppLt ......................+lT.ppI-ulItppp......Yhstts.sh..................................hlThCllsLcNGFsVs..GcSAC.ssPcNFDsEIGc+lA+p....sAhsKIW.LEGYhLppph.t....... 0 4 8 12 +13719 PF13877 RPAP3_C Potential Monad-binding region of RPAP3 Coggill P pcc Jackhmmer:Q8IW40 Domain This domain is found at the C-terminus of RNA-polymerase II-associated proteins. These proteins bind to Monad and are involved in regulating apoptosis.\ \ They contain TPR-repeats towards the N_terminus. 23.00 23.00 23.50 23.90 22.90 22.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.81 0.72 -4.01 50 325 2011-03-07 13:33:25 2011-03-07 13:33:25 1 54 161 0 200 314 1 93.70 25 18.61 CHANGED .ss.Ppsuh-Ftpsa+plt..t......sss........tphpaLppl....ssssLsplFps.slss-hLtpllpslp.....ph...hh.pc....p.s.....hshphLptLscspRFchhl.hhluss-Kp ...........................PpsuhcFtpsa+plt..s........sss..........tphphLppl.......sPsplsplFps.sLsschlspllp.sLp.....................sh...hh.p-.....................t.s................hlhphLppLupspRF....shhl.hhhSps-K........................... 0 70 97 145 +13720 PF13878 zf-C2H2_3 zinc-finger of acetyl-transferase ESCO Coggill P pcc Jackhmmer:Q5FWF5 Domain \N 21.70 21.70 21.70 22.30 21.20 21.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.14 0.72 -4.39 43 338 2011-03-07 14:47:26 2011-03-07 14:47:26 1 11 245 0 223 333 2 40.40 39 8.27 CHANGED QhhLDhGQpp.h.s.t.p.pCspCGMhYssuss-DpphHp+aHpph .............QhhlDh..G.Qcp...hs.t...h.pCspCGMhYssu.ssEDct.HppaHpp.... 1 57 100 161 +13721 PF13879 KIAA1430 KIAA1430 homologue Coggill P pcc Jackhmmer:B2RV13 Family This is a family of KIAA1430 homologues. The function is not known. 23.00 23.00 23.30 23.00 22.80 22.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.42 0.72 -3.57 46 280 2011-03-07 14:58:15 2011-03-07 14:58:15 1 16 112 0 222 284 2 98.40 23 30.14 CHANGED tH.pp+lpph+...s........slcsps.......Ppp.ht.......phh...hphpph.phpp.........p+h......pcIp+....-NphLlp+ls..cIt.cppsph...stp.p...........ttp................csls...tp...pRpcE.ht+...IscENpplh+Rlpp ...............................chp+l.php...s........tlDsp.........Ptt.ht......thh...hp..h+ph..phpp...............-+hppI-+....-NphLLp+lsplt..cptsph....sph..pp..........h..................................................pshs....t.pRp+c..t+...IptEN...tlhcRlp.h........................................ 0 119 142 181 +13722 PF13880 Acetyltransf_13 ESCO1/2 acetyl-transferase Coggill P pcc Jackhmmer:Q5FWF5 Domain \N 21.60 21.60 21.60 21.80 21.40 21.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.08 0.72 -4.23 40 326 2012-10-02 22:59:21 2011-03-07 14:59:05 1 6 233 0 213 325 10 69.20 42 14.58 CHANGED shsAhCGlsRIWVpsppRR+GIAocLlDslRssFl.aGhplspsplAFSpPTssGpthAppYsss....ss..FLVY ......sAlCGISRIWVhshtRR+uIAo+LlDsl.Rs..s.Fl.aGthl.........s.........+p.....plAFSpPTssGphhAppYsss....sp..ahlY...................... 1 57 94 154 +13723 PF13881 Rad60-SLD_2 Ubiquitin-2 like Rad60 SUMO-like Coggill P pcc Jackhmmer:O95164 Domain \N 23.00 12.00 23.00 12.00 22.90 11.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.50 0.72 -4.18 24 319 2012-10-03 10:59:06 2011-03-07 16:02:45 1 4 202 3 206 1807 27 106.70 29 62.84 CHANGED -.lElKFRLs..DGoDIGPhpass.uoTVusLK-pllu.pW............P+-..Kcp.sP+oss-lKLIsuGK.ILE.NN+TLups+..hPhG-lss...slhTMHVVlp.shs-ppsc+p.....psc..s.+pspCsCs ....................................................lpl+hhLh..sGp..ht...hh.ass..ssolsslKcplhs..pW.............................................Pp.-.....hct....ts..p..osst..l+L.I.tG+..hLc...sst......TL....uph+....hshscs.....................slMH.lVs+....s...sc..psptp...........tpp.....t................................................ 0 54 105 159 +13724 PF13882 Bravo_FIGEY Bravo-like intracellular region Coggill P pcc Jackhmmer:C9J6A4 Domain This is the very C-terminal intracellular region of neural adhesion molecule L1 proteins that are also known as Bravo or NrCAM. It lies upstream of the IG and Fn3 domains and has the highly conserved motif FIGEY. The function is not known. 23.80 23.80 25.40 24.40 23.60 21.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.22 0.72 -3.07 19 521 2011-03-07 16:52:03 2011-03-07 16:52:03 1 69 100 0 165 423 0 86.90 55 7.65 CHANGED KRs+GGKYs..V+-KEss.pscs-sp....p-..ssFs.........EYpp.ps..sp.sh.p....tupts..stphpsp...sosDShs-YG.-u..ss.spFNEDGSFIGQYusc+cp ............+Rs+GGKYs....VKEKEDs.psDsEhps..hcD...soFs.............EYS......-.....ED.....cK.PLtt.....................uSpss...st.slKtp...............cSDDS.LVDYG.-G..sp.GQFNEDGSFI...GQYouKK-K................ 0 29 43 97 +13725 PF13883 Pyrid_oxidase_2 Pyridoxamine 5'-phosphate oxidase Coggill P pcc Jackhmmer:Q8IUH2 Domain \N 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.12 0.71 -4.49 37 676 2012-10-02 11:35:36 2011-03-07 16:55:50 1 10 508 4 389 722 172 156.70 25 61.64 CHANGED Ps.tc-sAphARhLVppssausluTl..So.......ps.....hpGhPasslhShuDsss...s...susGsPaahLosls.ospslpps.sp..sSloho.sp..............pshspp.........hDPpsP........s.Cs...RlplsGplppl....s......sp-hph..A.ccshhsRHPchppW.ps......HsaaahKLplpsIallshFGG...sphl...ss--Yap ..............................................ARpLlpp..sp.hu.sLuTl......sp............t.............................hpGhP.au.s.hl.shu....................s....sssGpPhhhlS..sl.u.....ps+NL.tss...s+.....sSLhls.tss..................................................................tsss.................................................t.ss...RloLhGphp..l....s.............ssphtt.....s..cpp.ah.ppH.P..c..upha.hsh........tcFtah+l.p.lpp.lh.hluG.FGt...hthl...sspp...t.................................... 0 100 209 301 +13726 PF13884 Peptidase_S74 Chaperone of endosialidase Coggill P pcc Rawlings N Domain This is the very C-terminal, chaperone, domain of the bacteriophage protein endosialidase. It releases itself, via the serine-lysine dyad at the N-terminus, from the remainder of the end-tail-spike. Cleavage occurs after the threonine which is the final residue of the End-tail-spike family, Pfam:PF12219. The endosialidase protein forms homotrimeric molecules in bacteriophages [1]. The catalytic dyad allows this portion of the molecule to be cleaved from the more N-terminal region such that the latter can fold and presumably bind to DNA. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.88 0.72 -3.84 244 1011 2011-03-09 16:45:01 2011-03-09 16:45:01 1 59 650 2 275 946 2602 57.10 29 7.10 CHANGED SDpRhKpsI.psl.ps..s...................Lsplpp..lpshsa.....pa....h..................................ttttsp.pch..GhIAQ-lpplhPc .......................SDpRhKpsI..psl..ss.u...................Lcplpp....lph..h..pY....pap.........................................................tpstp..pch..GlIAQ-lpplhP................................. 0 109 164 224 +13727 PF13885 Keratin_B2_2 Keratin, high sulfur B2 protein Coggill P pcc Jackhmmer:Q6L8G4 Family \N 23.00 10.00 23.00 10.00 22.80 9.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.60 0.72 -10.95 0.72 -4.24 95 2411 2012-10-03 03:07:01 2011-03-09 16:51:25 1 24 43 0 981 3770 23 46.10 43 80.47 CHANGED CCpPs.CCpsoC..CpsoCC.pP..oC...CpsoCC..p.soCCpP.sC....Cp..ss..CC.pPs ..................................CCp.ss..C.....C..p..P..o..C.......C.p.....s..o.C....C.pP.....s.C..........C.p.s....s..C.C.....p...s.....s...C....C.......c.P....sC..........Ch.......ss....CC.ps............................................. 0 183 188 227 +13728 PF13886 DUF4203 Domain of unknown function (DUF4203) Coggill P pcc Jackhmmer:Q9NS93 Family This is the N-terminal region of 7tm proteins. The function is not known. 29.20 29.20 30.10 29.50 29.10 29.10 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.66 0.70 -4.87 33 330 2011-03-09 17:08:44 2011-03-09 17:08:44 1 7 171 0 235 299 5 186.80 25 38.80 CHANGED sllsslhs.lhGllhshhGaRha+shh.Fl.....sGahhuuhlshllh....hphps.........hssththtsshls.GlssGlllhhl.hshh.hshGlhhhGllhGhhluhhhhhshhsph.hhhp.s.hshh..h..h..hhshullsul..lsLthp+.hhsIluoulhGushlhhulDhFhps.tlp.hh.htlhtphhsps..h.thh.shPhsh.........salhLusasllhlhGlllQh+ ..................................................t.lhsshhh.lhGlhhshhGaRhh+shh..Fh.....sGhhhuuhhh.hl...Lh.....hp.ps................htpph...htsshhsulhh...Gl.hhshh..hshh.hshulhhsGllhGhhlu.hhh.h..h.h..s.th....p.s.s.hsh.h...............sh..hhshull..hsl..lsl.th.+...hhsll.uoulhGushlshulDhahps...l...h.h.lh................tc.h..p.....h.th........s...Phsh..............hlhluhhshlslhGlhhQh+...................................... 0 82 113 176 +13729 PF13887 MRF_C1 Myelin gene regulatory factor -C-terminal domain 1 Coggill P pcc Jackhmmer:Q9Y2G1 Domain This domain is found just downstream of Peptidase_S74, Pfam:PF13884. The function is not known. 24.10 24.10 24.10 26.90 24.00 23.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.62 0.72 -4.93 31 153 2011-03-10 11:00:22 2011-03-10 11:00:22 1 9 79 0 92 129 0 35.90 68 3.83 CHANGED LlVNK.....-RIFMENVGAVKELCKlTssLETRI-c.L.....ERh .........................LhVNK.................-RIFMENVGAVKELCKLTsNLEoRI-E.LE+W............. 0 24 30 59 +13730 PF13888 MRF_C2 Myelin gene regulatory factor C-terminal domain 2 Coggill P pcc Jackhmmer:Q9Y2G1 Domain This domain is found further downstream of Peptidase_S74, Pfam:PF13884, and MRF_C1, Pfam:PF13887. The function is not known. 22.10 22.10 22.10 23.10 21.50 21.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.18 0.71 -4.38 5 132 2011-03-10 11:11:45 2011-03-10 11:11:45 1 9 52 0 64 109 0 132.00 41 16.50 CHANGED TIoSIQIlEIQQlID+RYCSsuLpCGPGNYsY+IPVNKaTPTNVKFSLEINTTEPLIVFQCcaTL.GNhCF+uph.....sputsQSpcVstpMTQGYQHIWSLPVAPFpDSTYHFRVAAPDLADCSTDP.as.GpFFTDYFFYFYR+C .................................................oloSIQlhE.p..IsppYC.ut.s..tCt.......s..G...N..aoYpIPls.ptTP.hp.lploLphNo.o..p..Pl...Vh...Cphs....tp.C.cu...............................uh.p.p..h..c.sp....p..p..TQGh.p....H.hWslslhsFp-.sYHFRVAhs..s....As.CS.o....-s.hh....hTDYaFaFYRhC....... 0 12 16 33 +13731 PF13889 Chromosome_seg DUF4204; Chromosome segregation during meiosis Wood V, Coggill P pcc Jackhmmer:Q32MH5 Family The proteins come from eukaryotes, plants and animals, and are necessary for chromosome segregation during meiosis. 21.60 21.60 23.30 22.10 21.30 19.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -9.00 0.72 -4.09 23 266 2011-03-10 12:01:03 2011-03-10 12:01:03 1 4 170 0 181 269 0 62.40 45 8.09 CHANGED ssLRYAlHLRFhsP.................p..............................cpRaYLYssIRVVFspRps..DuDEu.............cL+s.hcaPss.......P..........+Yhsh ...............................................lLRYLlHLRFpss..................p...............................................pGRhYLasDIRllFupRs...-sDpu.............cL+s.schPps.......P.+aSPh................... 0 38 76 124 +13732 PF13890 Rab3-GTPase_cat Rab3 GTPase-activating protein catalytic subunit Coggill P pcc Jackhmmer:Q15042 Family This family is the probable catalytic subunit of the GTPase activating protein that has specificity for Rab3 subfamily (RAB3A, RAB3B, RAB3C and RAB3D). It is likely to convert active Rab3-GTP to the inactive form Rab3-GDP. Rab3 proteins are involved in regulated exocytosis of neurotransmitters and hormones. The Rab3 GTPase-activating complex is a heterodimer composed of RAB3GAP and RAB3-GAP150. This complex interacts with DMXL2 [1,2,3]. 21.80 21.80 21.80 25.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.48 0.71 -4.87 27 198 2011-03-10 13:10:59 2011-03-10 13:10:59 1 5 129 0 135 196 4 158.80 43 18.68 CHANGED sssppststthcG..thp.....hs.shhLLps.s-slatPhTQcssshTEDhlc-ctphLtphGsstp..upthptp........hQsssLlSDMpAF.................KAANPu.....shhEDFVRWa.S......P+Dal......ppt..sssp..........ps.s...............hsscGpLSpR......Mpt...pG............NhWtclWcsApshPApc.Q+.LFDstcEuEKlLHaLEs ......................................................ts......thpscGphtt....hs..phpLLcs.uEPLYlPlTQ-ss.Ph....TEDhlcEpsEllt+LGsssp..Gsthpsp........hpossLLSDMpuF..................KAANPG.....shLEDFVRWa.S......P+Dal-cp.....shsp..............pss......................................hsh+GpLStR......Mph.....u............NhW....hcsW-sAcPlPAp+.Q+pLFD-s+EAEKVLHaLt......................... 0 51 75 110 +13733 PF13891 zf-C3Hc3H Potential DNA-binding domain Coggill P pcc Jackhmmer:Q9H9L4 Domain This domain is likely to be the DNA-binding domain of chromatin re-modelling proteins and helicases. 27.00 20.00 28.30 20.20 26.20 19.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.03 0.72 -3.91 48 422 2011-03-10 13:43:28 2011-03-10 13:43:28 1 8 116 0 277 413 0 65.20 32 16.65 CHANGED p..C.....st....ttCpppslshscaChp..HILpDtp...QhLa+tCsh.........ts.ppCs..pPl...psttps......hCshHhp ..................t.C.....ssttCsppsLshscaChp....H..ILpDps...QshF+pCshh........tssppCspPl...csp..ppss.....hCstHh....................... 0 87 123 197 +13734 PF13892 DBINO DNA-binding domain Coggill P pcc Jackhmmer:P53115 Domain DBINO is a DNA-binding domain found on global transcription activator SNF2L1 proteins and chromatin re-modelling proteins. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -10.97 0.71 -4.14 27 402 2011-03-10 17:40:01 2011-03-10 17:40:01 1 13 236 0 268 411 1 108.90 33 8.46 CHANGED t+phspphcplWtsls+KDlsK......spRhhsssppsphtNh+KhApLst+Es......R.+ht.........h+opKs.KDhthRA+RlhREMhsFWK+.E+-ER-hRK+sE....KEAhEptK+-EE.REA+RQpRKLNFLIoQTELYS.HFhu+Klcs...sc..h- ........................................t...............................................................................p....................t..........t......c.....p...p-h..hhK+.-+..pch+.+...t.tp....Kp.t..hEtt+ptEE....t....c...E....sc........R..Q...p.+..+LsFLlpQTELau.HFlsp+hp.....tt....................... 0 78 133 201 +13735 PF13893 RRM_5 RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) Coggill P pcc JCSG:Target_421317_RF20609A Domain The RRM motif is probably diagnostic of an RNA binding protein. RRMs are found in a variety of RNA binding proteins, including various hnRNP proteins, proteins implicated in regulation of alternative splicing, and protein components of snRNPs. The motif also appears in a few single stranded DNA binding proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.54 0.72 -4.12 106 5485 2012-10-02 20:46:34 2011-03-18 17:52:00 1 201 405 27 3107 36633 716 56.70 21 11.51 CHANGED l.hphhppa.G.p.l..pp.lth.hppp.........ptuhlp...a...s.shpsAppshp..th..s.ssh...h...ts..p..t.lplp...au .........................................phhsp.a..G..p...l.........tp..l...h..l...hppp...........................sp.A.h..lp..................a........p..shps....A.pp.A.h.p............tL.......s..uph..............h.........tu....p.......lplp.h................................... 0 924 1420 2227 +13736 PF13894 zf-C2H2_4 C2H2-type zinc finger Bateman A agb Jackhmmer Domain This family contains a number of divergent C2H2 type zinc fingers. 23.00 9.00 23.00 9.00 22.90 8.90 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.08 0.74 -7.53 0.74 -3.23 594 26727 2012-10-03 11:22:52 2011-03-21 11:17:14 1 4325 623 12 17330 257549 774 24.50 26 7.18 CHANGED a......tC.......sh......C......s.............p.....p..........a.......ps.....t.....p...p.lp.pHhp..p.p...c ..........................................atC..............sh...........C..............s............................p.......p.....................F..............ss...........p.............p.....p.Lp.pHhp...p.p.................................. 0 4311 6366 12692 +13737 PF13895 Ig_2 Immunoglobulin domain Bateman A agb Jackhmmer Domain This domain contains immunoglobulin-like domains. 23.80 10.00 23.80 10.00 23.70 9.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.50 0.72 -3.85 70 24350 2012-10-03 02:52:13 2011-03-21 11:49:24 1 2345 524 273 12457 102175 132 89.10 15 18.46 CHANGED ps..........hlp...sss.......ss.......l..pp..Gpsl..sL.pCp....ss......s......tss..s.p....hp..hh..+.s..............................s......phl.s.......p.s.....................ps..........................a....h.........pp.............s............s...h.s..suGp.YpCps......p..st.s...........s...pt...Sssl.pl.pV .........................................................................................................................................................h..........................h...........G..p..s..l......sL....p.Cp.....sp.................u............................p..s.......s..p...........h.p.....W...h.......+...s..................................................................................................................................................s.............p..l.s....................p.s..............................................................................pp.......................................................hh...l................ts.................................s......................................s.......h...p......c...s....G...p...Y....p.C.ps............ps...t....h......................t..........p.t.............................................................................................................................................................. 0 2991 4093 7204 +13738 PF13896 Glyco_transf_49 Glycosyl-transferase for dystroglycan Coggill P pcc Jackhmmer:O43505 Domain This glycosyl-transferase brings about the glycosylation of the alpha-dystroglycan subunit. Dystroglycan is an integral member of the skeletal muscular dystrophin glycoprotein complex, which links dystrophin to proteins in the extracellular matrix [1,2]. 27.00 27.00 28.10 27.10 26.70 26.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.83 0.70 -5.33 33 771 2012-10-03 05:28:31 2011-03-21 13:31:38 1 17 123 0 493 733 5 188.60 23 52.85 CHANGED slThsTHushcaL.csl.sls..c+Wp.GPlSlAlasPGt-hptslssIphlhpssssp....l+chsohHlhFptpc.hs...phs.....h.ps.....hph.shp.....C...sphh.sht....ph...h......t.......acs...ssphsYPlNlhRNlARpuAp.TcalhssDI-lhPSsGhlpph..hphls.p....t.p...........s.p.sppVaVlshFElpss...........s..slPpsKtELhphhpsupAh.FHpclC.pCHpsPshpcWhpts.....cs.s....thsssap...h...sh...p.p....tWEPhYIus...p...s-PhaDERhshcGp.s+hoQsathClhsYp...........FplLssAFLVH+.GIKps...tspts.pt...ch.tp...pt...pphltpchhpEhchhYs .........................................................................................................................................................................................................................................................................................................hP.N.hRNhAh...........s.hhhh.Dh.-h.h.........shh..h.....t....................................................shhl.hFp.h............................................................................hs...s....p.t....L....h.t.........................t..............h........h.........sp...t.....W.t..........................................h................aEs.hl.............t.......................s...h.h...h..sh.......................h........hth...h..t..hp.....................h.lhs.sa.h+...sh............................................................................................................................................ 0 224 269 401 +13739 PF13897 GOLD_2 Golgi-dynamics membrane-trafficking Coggill P pcc Jackhmmer:Q6PL24 Domain Sec14-like Golgi-trafficking domain The GOLD domain is always found combined with lipid- or membrane-association domains [1]. 27.00 27.00 27.50 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.77 0.71 -3.94 11 231 2012-10-03 07:10:23 2011-03-22 13:37:50 1 7 93 0 147 218 0 111.20 47 29.43 CHANGED VGRGEllTVRVPTHscGopLFWEFATDcYDIGFGVYFEWTsssSs.....sloV+VSESoD-Ec---spp..................tsu-lEpGs.htt...s+.....PplsEIlPVYRRDsHcEVhAGSHpYPGcGVYLLKFDNSYSLWRSKTLYYRVYYT ...............Vt+G-slTlRVPTp.pG.p.hlhWEFATDpYDIGFGlaF...-Ws.ssss.....tloVplS-Ss----p....t-t.................................................tp.ppup......pp.t......t+.............sphsEllPlY...RRDsHp-V.sGSHpYPGcGlYLLKFDNSYSLhRuKolYY+VYY............................................ 0 62 73 105 +13740 PF13898 DUF4205 Domain of unknown function (DUF4205) Coggill P pcc Jackhmmer:A8MYZ0 Domain The proteins in this family are uncharacterised but often named FAM188B. 21.00 21.00 21.00 21.00 20.10 20.90 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.93 0.70 -5.71 11 398 2011-03-22 14:02:10 2011-03-22 14:02:10 1 11 139 0 269 395 9 225.10 25 57.41 CHANGED +plLFGoshpsFsp-W+ppsFsFs-s..tltYGLhQt+GGPCGVLAuVQAhlLKtLL..Fspss............th..tshhpssssp+ppsLstALADILWpAGc.ppcAsVsLsssc.phsss.scYptDslpEpLpLashpph--hptFlppals.Fc.pushGsILhlYSsILSRol-plppDhDsss.s....pLlGu.HG.a.CTQElVNLlLTGRAsSNVFNGs.......cpspssh.....s.L+Glts..RS-IGaLoL.....aEH..apsspVGShLKTP+hPIWVlCuE.uHYSVLFsssp-LlsDa+h....E+pF-LYYYDGLssQpctI+..LTVDspupthpss.p......t.p.....clhPPLEhsIRTKWtpAslsWNG ........................................................................h.at............ul.p..p............sG....s....p.ullssl..Quhllp.hl....h..t..............................................................p.phL.....h...sh....t....ll.....................................ht..............t........t...............h..h...................h.................t.................................................................................................................................p............t.................h..........t..h...................h......p.....h...t......p..tph..h...............h................t..h.....at..........t....Gsl.hhhhShlho+uhp..........l.p.t-hpssp....t.............Llts..hs..h..ss...tllNLhLsGpAsstlass.......................................p...th........hhGl.t..pstl.GhLth...............hpt....p...t....p..VGphhKsP.phPlWlhssp.sHhslhFs...p.....tl...t.......................................................................................................................................................................... 1 88 117 187 +13741 PF13899 Thioredoxin_7 Thioredoxin-like Coggill P pcc Jackhmmer:O95994 Domain Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.77 0.72 -3.96 51 3155 2012-10-03 14:45:55 2011-03-22 14:27:14 1 52 1979 19 1031 6223 2371 86.30 24 17.25 CHANGED shhs..sh-pAl.tpApppsKslhl.hh.p..sshC.pCpsh.ccs.hhss.pplpp.lt.c.calhlphsh............sspct.ph.........thtt.......ph.......sPtlhhlss ......................t......hppul..tp..u..p..s......p..s..KP.V.hl..Dh..t..A-W...CssC+..th..c...ch..s..hsc...s......pVpp.....ht..p..s.h..lhlps-h........................sssss..p.............................htp.................hG.................hPtllhhs........................................................... 0 347 605 827 +13742 PF13900 GVQW Putative binding domain Coggill P pcc Jackhmmer:Q6ZWE1 Domain This short domain is often found nested inside other longer domains. The function is not known, but the domain carries a highly conserved GVQW motif. The members are rich in proline and cysteine. This may be a binding domain. 28.10 28.10 28.10 28.10 27.90 28.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.37 0.72 -3.98 35 1257 2012-10-03 02:24:44 2011-03-23 13:28:18 1 170 16 0 72 778 423 41.40 60 18.50 CHANGED VuQAGVQW+sLGSLQPsPPtFKpFSCLSLPSSWDYR+sPPpPAN.Fs..lF .............ssQAGVQ...W.p.c..LuS....LQP...P.P..sFK.pFS.CLSLs...SS...WD.Y.R.....p..P.sp...s....h................... 0 55 60 61 +13743 PF13901 DUF4206 Domain of unknown function (DUF4206) Coggill P pcc Jackhmmer:Q9H714 Domain This is a family of cysteine-rich proteins. Many members also carry a pleckstrin-homology domain, Pfam:PF00169 27.70 27.70 29.40 28.90 27.50 27.60 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.74 0.71 -4.77 28 498 2011-03-23 16:23:42 2011-03-23 16:23:42 1 18 131 0 302 495 2 187.10 36 26.38 CHANGED RhCsYsGcYaCsuCHp.s.stssIPARllcsWDFppasVSptuhphLpphhspPLlslpplNPpLYs+scpLsclpplRppLthlcpYlhsCRhus.p.tptt.........hp..t..s+pa.Lhp.s.schYSltDLlplpsG.shtth...LpplhphsppHlt.sC.LCps+GFlC...EhC......pss...clIaPFpp...t.psp+CspCtulaH+pCap...p..ss........CP+CtRhpp+c .......................................RhCpY.GpYaCs.s.CHt.s.spslIPARll+.pWDFpch..........V..Sph..uhchLp..tlhppP.lhslp....p....lNst.LYp+...s...ctLsplpp.lR.ppL.hhh...+sa....l.......h.....o...CR......ut....p.....h.hp.........hp....ts.pa.L.h...-....s....c.......haSlpDLh.pl..p.pG..t....Lssh................LpplhphstpHlh.pCp........lCpt.+.G.F.lC...EhC.....................pss.......cllFPFp.....ps...p...Ct..pCtus...aH.p.pCht.....p...tt.........CP+C.Rhp...p........................................................................................................ 0 93 124 205 +13744 PF13902 R3H-assoc R3H-associated N-terminal domain Coggill P pcc Jackhmmer:Q96D70 Domain This family is found at the N-terminus of R3H, Pfam:PF01424, domain-containing proteins. The function is not known. 25.00 25.00 32.60 28.10 24.60 24.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.75 0.71 -4.19 32 193 2011-03-23 16:27:11 2011-03-23 16:27:11 1 7 156 0 132 182 0 121.20 23 35.98 CHANGED ++phhtcculcpp-slh..u+cGsR+ppRaENc+lLpshhsp..stspp....sshshps.lP............hthsthW.ssahshst-pppphh...pthcsppcpstsptt.......................s.t.ap+ls+cLRttLK+ ................................++t.hhppul+pp-slh..spcGpRpppRaENc..+hLhshhsp..ss.sps..ht.tssh.shts.sP..............hphsthW.ssah.s.tst-cpp..phh......pthpp.ptpppt.tp.tt..t...........................................ts.tthpclspchRthl++.................................................................. 0 35 65 100 +13745 PF13903 Claudin_2 PMP-22/EMP/MP20/Claudin tight junction Coggill P pcc Jackhmmer:A6NFC5 Family Members of this family are claudins, that form tight junctions between cells. 31.20 31.20 31.20 31.20 31.10 31.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.43 0.71 -4.57 37 835 2012-10-03 00:20:40 2011-03-24 16:16:45 1 11 100 0 558 2183 0 182.30 16 73.69 CHANGED hlAlsLllluhsTspW..lp...ss..spp..............pt..............................t.....h.t.t...hh....pp...Gh.ahhshh..................................................h.hhshshhhluh....shs.hhuslht.h.hussp...ps.t................thhhh..s.uh..h.hh.....suus.hhhhslshasthhp......h.......hpp...th..pp.....tsh....hhthGaSFhLsssushlsllsshLhLh ................................................................................................h.huhhhhhhuhs..o..s..h.W..h......hp.....th............................t................................................................................................................h............tp......Gl..a.p...h....Chh.......t.........................................................................................................................................................t..hhh.tht...hh.s...tsh.h.....hl.ul.......sl.h..lhu..h...l..hs....l..h...u..h..hp.......ps.t....................................hhhhs...s..Gh..h.hl................huG.l..hhll...ulh.ha.h.t.t..hp.......h......................h...t........tph.....................phpauWSa.h..lAhsuh.hhthlu.uhlhh.h............................................................................... 0 115 170 350 +13746 PF13904 DUF4207 Domain of unknown function (DUF4207) Coggill P pcc Jackhmmer:Q96HJ3 Family This family is found in eukaryotes; it has several conserved tryptophan residues. The function is not known. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.39 0.70 -5.01 19 192 2011-03-24 17:00:58 2011-03-24 17:00:58 1 4 70 0 84 188 0 234.00 25 57.05 CHANGED s.Ss.usSspSp..hSsp.....sspsh.tspspspsps....pps..spShsshhcsh.p.............................pocsps.ssls.hphs..spsSs.....tsp.......sah+.plcth+lc+pst....pAY-sWhuuKptp.p.+.ptc.h....ttcpEcc+pc..sthRp+LAcE+YcpWh+pKspQpppp+p.p.pt......tpp........sup.....pustu...........................ss+phs....t-ps+c+lpcW.hcKhcppppcRpcpcptppp+ppccpcR+phuEtAapcWhpsVsp+PKPVPhNQGhcu..LRGTlS......slYlNPh.W .........................................................................................................................................................................................................h...tts..u..S........t.p....sppp.pstp....p..............h..tsh..............................pspsts.s.h..hp.t............o.......pp.......s..c.p.cp.ch.cps...........tsa-sWhstKpt...pcppp.......t.h.....tpppE+cccct...tttpRppl.A.pppa..ppWhppKpcppppp+t.p.pt......pppts...psthp.....................................p+ph..t-c..sppchppW.hcKhp....ppp..pc....+...pc....pc..ptpt...p+ptp.pcR+phuptAapcWhppscp+s+Pss.hs.Ghs....LpGhhS.............hahNP..W.............................. 0 17 22 47 +13747 PF13905 Thioredoxin_8 Thioredoxin-like Coggill P pcc Jackhmmer:Q5VZ03 Domain Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.07 0.72 -3.87 83 2061 2012-10-03 14:45:55 2011-03-25 14:24:33 1 60 711 33 871 11398 5021 94.50 22 26.84 CHANGED s..+hlllhFhus......hst...t..s......pp.....hhst.Lp.p...h...hpphp.......................tp....lpllhlsh............D...p.s..t.p..ph...pphhpph..............s.hh.shs.hss..t.t......pp..pl..........tpth.s..lps.................l.Pplllls.psGp.....l ..................................Khlll.Fh.us......WCs......s...C..........pt.....hhPp...Lp.c....h...hp.chp..............................tps..hpl..l..h..l..Sh......................................D........c..s....p...c......p..h..........p.p..h.h.pph.........................s...a.h...p......h.s...hss....s.t.........pp....pl...................hpta....s....l.p.u..............................l.P.s....h.llls.tpGp........................................................... 0 370 608 774 +13748 PF13906 AA_permease_C C-terminus of AA_permease Coggill P pcc Jackhmmer:Q7Z475 Domain This is the C-terminus of AA-permease enzymes that is not captured by the models Pfam:PF00324 and Pfam:PF13520. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.63 0.72 -4.10 56 923 2012-10-03 01:44:59 2011-03-25 14:26:22 1 16 191 0 595 2651 76 50.00 44 8.37 CHANGED FpsPhVPhlPsluIhlNlYLhhpLsshTWlRaslWhsl...GhllYhh....YGhpHS .................FtVPhl...Ph.lPslSIh....lNlYL.MhpL.....st..tT..W...lR..Ful.....W.h...hl...Ghl.l.YFs....YGlppS.............. 0 136 243 422 +13749 PF13907 DUF4208 Domain of unknown function (DUF4208) Coggill P pcc Jackhmmer:Q86WR6 Domain This domain is found at the C-terminus of chromodomain-helicase-DNA-binding proteins. The exact function of the domain is undetermined. 23.00 23.00 23.10 23.10 22.50 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.20 0.72 -3.79 31 331 2011-03-25 14:30:37 2011-03-25 14:30:37 1 14 214 0 224 312 0 101.20 32 7.30 CHANGED sc...s..t.tp.ptc-.h-spshsp..CKchMcPl++sL++Lc+s.sc....uLs+c.-hlphl+ppLhsIGcc....Icpplpph...pps..p.cchcc+LWhaso.pFhs..hc...upKLhphYc+h .....................................hst....t.ptcph-.p.pshsh.CK-hh+PlKcsLKpLcps...pc..........sL.spc.-plpch+ppLlpIG-+....IsppLpph..........sss...phcpa..+cpL....WhFVS..pFot..hc........uc+LpclY++.h................... 0 52 94 163 +13750 PF13908 Shisa Wnt and FGF inhibitory regulator Coggill P pcc Jackhmmer:Q8N114 Family Shisa is a transcription factor-type molecule that physically interacts with immature forms of the Wnt receptor Frizzled and the FGF receptor within the endoplasmic reticulum to inhibit their post-translational maturation and trafficking to the cell surface. 27.40 27.40 27.40 27.40 27.20 27.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.97 0.71 -12.11 0.71 -4.10 11 427 2011-03-25 14:59:58 2011-03-25 14:59:58 1 13 55 0 239 379 0 153.30 22 54.82 CHANGED tpGEhCh..hsshsphcPsa.CPp......p.paCCGoCuhpYCCss.h+psphsps.sCss.-p..sh.t....s.......chtpshc.sPs...pshtssllVulhhhVlhl.lshllshsC.pCsL.Kh.....................pR.ppPshopss.usshpppshsQs.st.........sph.P...usph..QsYps.sspsu.......h.ts.h.sh.hs..Ph.h.ssssssY ......................................................s...p.Ch.hahsh.utap..FpCs.............hhhC...CGsC.hRa.CCt....h....p..hp.Qt.tC.s.....................................ss..............h.lhsulh..hh.lh.h..l....hshlss.h.hs.....p.s.h.h.K.................................................pp.....p.......h..t..p...sh....sp.........h.t......t..s........t........................t..s.........p...tth.s....sst.s...............h......................................................................................................................................................................... 0 51 77 135 +13751 PF13909 zf-H2C2_5 C2H2-type zinc-finger domain Coggill P pcc Jackhmmer:Q6IQ32 Domain \N 25.00 13.60 25.00 13.60 24.90 13.50 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.10 0.73 -7.42 0.73 -3.42 73 3546 2012-10-03 11:22:52 2011-03-25 15:19:35 1 546 228 2 1708 10506 142 24.70 29 7.43 CHANGED apCst..Cs.ats..p.tp.p.lp.pH.hphh.Hs .........a+Cph..Cs.apo...s.pp.s.Lp.pH.hppt.H........ 0 489 585 1020 +13752 PF13910 DUF4209 Domain of unknown function (DUF4209) Coggill P pcc Jackhmmer:Q5T6L9 Domain This short domain is found in bacteria and eukaryotes, though not in yeasts or Archaea. It carries a highly conserved RNxxxHG sequence motif. 22.30 22.30 22.40 22.30 21.90 21.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.34 0.72 -3.91 17 134 2011-03-25 16:11:02 2011-03-25 16:11:02 1 2 110 0 71 124 3 91.10 30 15.63 CHANGED hl...ph.lcptlhsh.hh.shptpts...h..hLp........-lLtscplpphlspsl.............hhlL+hhlssPp..GLNLRNllaHGFls.sphp.phhshllhLhhsls ................................hph..t.h.Rtlhphhhhhsp..ps...........hhLc........-LLs..s....p.pltplhGpsl.............h.lL+lhhssPp..GLNLRNhlhHGah..ssp-lss...phsshhlhLhhtl.s............... 0 32 42 50 +13753 PF13911 AhpC-TSA_2 AhpC/TSA antioxidant enzyme Coggill P pcc Jackhmmer:Q8TBF2 Domain This family contains proteins related to alkyl hydro-peroxide reductase (AhpC) and thiol specific antioxidant (TSA). 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.75 0.71 -4.16 82 573 2012-10-03 14:45:55 2011-03-25 16:14:57 1 12 263 0 372 774 179 123.50 20 46.43 CHANGED tLtpt..t..spLpt.....tG..lp..lhs....lu..hush..s...spcFt.ph.....sta..shp.la...........sDs.stphaptLsht.......................................................................................p.hh.h.th..................h.t.......h..t........th.htthtth...........th..tsh.......................................................................suthtQpGGsalhssss......pllapHc-cs ...................................................................................................s.hpt....ts..lp..lls..lu..hush......ts...hcpFt.ph........sta..s..h-..la.............sDs..p.+..phYptLGht...................................................................................................p.s...th.sh..h...sst............htps........h.p..............th..hpthpph...........s.h..ssh..................................................................................pGshhQtGGhhlh........stss.....plhataht............................................................ 0 127 219 297 +13754 PF13912 zf-C2H2_6 C2H2-type zinc finger Coggill P pcc Jackhmmer:C9JL96 Domain \N 25.00 5.00 25.00 7.50 24.90 -999999.99 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.09 0.72 -7.35 0.72 -3.91 81 8615 2012-10-03 11:22:52 2011-03-25 16:26:12 1 2303 333 9 4881 167936 200 24.80 27 6.24 CHANGED hapCshCpppF..sshpslhsHK.ppa.ppt ...........apCs.t..C.s...cs.F......ssh.p.s...L..h.tHh..p.h....t.................. 0 845 1687 3070 +13755 PF13913 zf-C2HC_2 zinc-finger of a C2HC-type Coggill P pcc Jackhmmer:Q5TFG8 Domain This family contains a number of divergent C2H2 type zinc fingers. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.00 0.72 -4.21 111 1078 2012-10-03 11:22:52 2011-03-25 17:14:21 1 55 127 0 709 1684 18 24.70 37 10.97 CHANGED phh.C.thCGRpF.sscplt+H....tphCp ......hh.C..hCGRpF.ss.s.s.l.ppH........shCp.......... 0 305 367 517 +13756 PF13914 Phostensin Phostensin PP1-binding and SH3-binding region Coggill P pcc Jackhmmer:Q4KMQ1 Domain Phostensin has been identified as a PP1 regulatory protein binding PP1 at the KISF motif. The domain also appears to carry an incomplete incomplete SH3-binding domain PxRxP further upstream. It is likely that Phostensin targets PP1 to the F-actin cytoskeleton [1]. Phostensin binds to actin and decreases the elongation and depolymerisation rates of actin filament pointed ends [2]. 25.00 25.00 25.10 25.10 24.60 24.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.63 0.71 -4.04 5 99 2011-03-28 10:56:36 2011-03-28 11:56:36 1 5 42 0 53 90 0 122.60 49 22.28 CHANGED APQtAK.Psssh...Pup.pLHPA+PGcsutL.pRGuNTFTVVPKRKPGoL...thSQANsposspsAE---AsuL.......Gs......TLKKRYPTVHEIEVIGGYLALpKSCLoKAGSSR..KKMKISFNDKuLpTTFEYPSESSLlQ .....................................sp.sh...sh.....sh..hass+sG.ssth....pR...uGpTFTVsP+Rpss.............Asst...s.sp.........Ap.t..-sAs.................ss........shKKRYPTscEI.VlGGYLpLp+SCL.sKu..uspR..KphKISFsEsuLpTTapYPSESSllp... 1 6 12 23 +13757 PF13915 DUF4210 Domain of unknown function (DUF4210) Coggill P pcc Jackhmmer:Q32MH5 Domain This short domain is found in fungi, plants and animals, and the proteins appear to be necessary for chromosome segregation during meiosis. 21.20 21.20 24.10 22.70 20.10 19.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.25 0.72 -3.68 21 277 2011-03-28 12:49:58 2011-03-28 13:49:58 1 5 173 0 185 275 0 63.10 49 7.99 CHANGED LlGSFEESlLpGRhs..P.op.l-GFhAplGloGut......asPtplplPFsshaap.......hu..ssss.....sPYl ..LlGsFE..ESlLpGRhs..P.st.l-GFsAp....lGsoGs...............aCPsHlTLPhsssFYs........sS..-suP.........oPYh.................... 0 41 78 126 +13758 PF13916 Phostensin_N PP1-regulatory protein, Phostensin N-terminal Coggill P pcc Jackhmmer:Q4KMQ1 Domain Phostensin has been identified as a PP1 regulatory protein binding protein. This domain is N-terminal to the PP1- and SH3-binding regions though may carry an additional SH3-binding motif. It is likely that Phostensin targets PP1 to the F-actin cytoskeleton [1]. Phostensin binds to actin and decreases the elongation and depolymerisation rates of actin filament pointed ends [2]. 23.00 23.00 24.40 23.50 22.20 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.81 0.72 -4.00 4 58 2011-03-28 12:54:51 2011-03-28 13:54:51 1 4 30 0 30 50 0 83.50 56 14.78 CHANGED s-ps.hotMPAWKRtILERRRAKLuh.sGt..suP.......GsA-.....PsEptVLhEulGPl+QN.Fhp.EppRpptpptttpcLLtch+tsP ..................AER-RLSQMPAWKRGlLERRRAKLGL....uP.....GE..P.uPh.s...................s-uGss-...........PDESuVLLEAIGPVHQNRFIRQERpppp....pp...p......Qp...p.....p-LLt.chpss........................ 0 2 8 13 +13759 PF13917 zf-CCHC_3 Zinc knuckle Coggill P pcc Jackhmmer:Q8TBK6 Domain The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. The motifs are mostly from retroviral gag proteins (nucleocapsid). Prototype structure is from HIV. Also contains members involved in eukaryotic gene regulation, such as C. elegans GLH-1. Structure is an 18-residue zinc finger. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.34 0.72 -4.22 25 384 2012-10-03 11:39:54 2011-03-29 14:02:14 1 40 191 0 234 1569 12 40.60 31 17.55 CHANGED ssusspCQKCLphGHW...TYECK..........sc+....sYs.......sRPSRTQpLcps.l ........s...pstCpKCh....phG.........Hh..........TaEC+....................tpp................................................................................... 0 90 133 193 +13760 PF13918 PLDc_3 PLD-like domain Coggill P pcc Jackhmmer:Q8N7P1 Domain \N 27.00 27.00 27.50 27.00 26.80 26.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.12 0.71 -4.84 5 475 2012-10-02 13:01:53 2011-03-29 14:42:38 1 7 150 0 155 456 3 169.10 38 45.75 CHANGED SluTIKNLGLYSTN.K+LAhDLMNRYNTFSSMls-PKlPFT...RhCCuM.lTPTATsFHLNHoGGGlFFSDSPERFLGFYRTLDEDLVLHRI-uAKNSIDLSLLShVPVlR+uosVcYWPcIhDALLRAAI-RuVRVRlIIopWKNADPLSVAAARSLc-FG..VGslDlSVRhFulPGR ........................................................SloplKpLGlh..hN.spLAhDLtphFpsaasht..s...p..s..p..l.P.o....h.....phsssh..spsss.hph..p.l..s.p............s.s..u.s......lF.....hS.s..SP.pthh..sh.tRThDhDslL.phIcsA+p.I.lulhshlP....lh....c...h.s....s.s......h...p.YW.PtI.suLhcA.A.lpR.uV+VRlLlo.pWc..psDPhth.s.hl+SLp...sh.s........ss...s..ss..lpV..+hFslP............................. 0 40 55 103 +13761 PF13919 ASXH Asx-hm; Asx homology domain Iyer LM, Aravind L, Godzik A, Coggill P pcc Manual Domain A conserved alpha helical domain with a characteristic LXXLL motif [1] [2]. The LXXLL motif is detected in diverse transcription factors, coactivators and corepressors and is implicated in mediating interactions between them [3]. The ASXH domain is found in animals, fungi and plants [4] and is predicted to play a role in mediating contact between transcription factors and chromatin-associated complexes. In Drosophila Asx and Human ASXL1, the ASXH domain is predicted to mediate interactions with the Calypso and BAP1 deubiquitinases (DUBs) which further belong to the UCHL5/UCH37 clade of DUBs [4]. 25.80 25.80 25.80 27.20 25.30 25.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.73 0.71 -4.38 85 311 2012-10-09 14:14:56 2011-03-29 16:45:12 1 10 155 0 174 299 0 133.90 41 12.83 CHANGED ptps..++hssu.....Qh+ps+.st.cl..DlEoPcSILlsTNL.RuLlN.pcTF.ssLPtphQppLltLLPc.........VDRpsu......................................ssshl+.LssS.sLN.NEFFscAsppW+-RLu-GEFTPEhQ......................h+l+pEtE+E+.KlDsWKE+aFEsaWGpK ...........................................ht.....cp.ttt..phKps+...st...cl...DlE...oPsSIL.l......sT..NL.RA...Ll.N.pcTF.ssLPschQppLLhLLP-.............VDRpsu.........................................s-uhl+..L...ssS..sLN...NEFFspAspsW+-RLu.-..GEF...TPEhQ...............................l+l+..pEhE+EK.......Kh-.WKE+aFEsaaGp..................................... 0 37 66 112 +13762 PF13920 zf-C3HC4_3 Zinc finger, C3HC4 type (RING finger) Coggill P pcc Jackhmmer:Q9BY78 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.78 0.72 -4.29 585 5985 2012-10-03 15:03:13 2011-03-29 16:58:25 1 423 488 23 3675 15063 738 49.20 31 9.50 CHANGED ppph....Chl.......Ch.s......p.......p..t...s........s.lhh..P....C..s..HhshCtpCuppl......................ppCPl..CRp.slp..ph .............................p..ttChl.Ch...c.......................p....................t....s........................s..l.h.h.....P........C..u...Hh..hC...ppCupph..................................................ppCPl..CRp.lp..h........................... 0 1260 1981 2858 +13763 PF13921 Myb_DNA-bind_6 Myb-like DNA-binding domain Coggill P pcc Jackhmmer:Q15361 Domain This family contains the DNA binding domains from Myb proteins, as well as the SANT domain family [1]. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.04 0.72 -3.86 39 4769 2012-10-04 14:01:12 2011-03-30 11:08:35 1 107 621 51 3074 13999 171 61.90 27 16.83 CHANGED WTp-E-chL....hphhpp........h....u..t........s...WppI.....upt..l.s...........Ros..tpltpRap..p...L..........p.........s.p.......hppss..asc.p-pppL ............................................................WTp.-ED.phL..........hphl...pp...................................a............G...p........................p.....Wsp.I.............upt....l..s.................................Ros..ppC.c.p....R.Wp....p......hL......................p.............s.p.....................hp.p..t.......as..t-....................................................................................... 0 1464 2097 2635 +13764 PF13922 PHD_3 PHD domain of transcriptional enhancer, Asx Coggill P pcc Jackhmmer:Q76L83 Domain This is the DNA-binding domain on the additional sex combs-like 1 proteins. The Asx protein acts as an enhancer of trithorax and polycomb in displaying bidirectional homoeotic phenotypes in Drosophila, suggesting that it is required for maintenance of both activation and silencing of Hox genes. Asx is required for normal adult haematopoiesis and its function depends on its cellular context. 25.00 25.00 36.40 34.50 21.60 21.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.03 0.72 -3.73 13 166 2011-03-30 10:10:42 2011-03-30 11:10:42 1 4 63 0 89 168 0 65.00 59 4.50 CHANGED ppt.....tpsps....pstp..h..Qp....ospNT.........................s...pussspCuCuLpAMVlCQpCGAFCHDDCIGsSKLCVSClI ................................................t..s......uupsts..lslQhFs-pss.........................sEshs.KCsCRLK.AMIMC+GC..GAFCHDDCIGPSKLCVuCLV... 0 12 18 41 +13765 PF13923 zf-C3HC4_2 Zinc finger, C3HC4 type (RING finger) Coggill P pcc Jackhmmer:Q5VTB9 Domain \N 27.00 20.00 27.00 20.00 26.90 19.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.23 0.72 -3.92 345 3897 2012-10-03 15:03:13 2011-03-30 11:46:46 1 230 817 11 2262 19172 876 41.60 35 7.80 CHANGED CslC.hp.ph.ps.....s.h..hh.s..C..u..H..h.h.C.p.pC...ht..p..h..hpp.t..........p..p....C..Ph..C ...................CslC...hc..hh...pc.................P.h....hs..s....C......G........H....h.F.C.p..pC........lt...c....h..hps..p..............................p...........C..Ph..C.................................. 0 782 1217 1747 +13766 PF13924 Lipocalin_5 Lipocalin-like domain Bateman A agb Jackhmmer:C7PBJ6 Domain This family includes domains distantly related to lipocalins. However, they do contain the important GXW motif in the first strand. The protein in this family include aln5 Swiss:B6SEG2 which is involved in biosynthesis of alnumycin [1]. The family also includes the ZFK protein from Trypanosoma brucei which is a protein kinase. This domain is at the C-terminus of that protein [2]. The domain is also found as the C-terminal domain in StiJ a protein involved in producing stigmatellin. This domain has been assumed to catalyse a final cyclisation reaction [3]. 21.10 21.10 21.60 21.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.80 0.71 -4.49 68 249 2012-10-03 08:47:39 2011-03-30 13:10:08 1 8 197 0 95 228 57 132.90 25 64.70 CHANGED hG.sWcLlu...hphpss-Gs...................................hhhPhGp.sPpGhlhYsscGhh.osplhptsR......................h.sss.hpuosp-hsp..shpshluYsGpY.....plcss.........plhacl-suhhPsWl..GspQpRphplc.s.....cp...Lhlt...tstshhhsst.tt.....shLsWcRh ..................................................hGsWpLlS...hph...ss.s...................................hhhPhGp...ss...hGhlhYstpG.hhuspl.hsss...Rst....................hsstshhtss..sp.-h....st..shpshluYsGcY.........plccs..........................thlsHpl-suh.hPNWl..Gs.pQpR...h.aphc.t.........-p...LpLt....phssh.hsst.hh.....shLhWp+................................................ 0 26 56 76 +13767 PF13925 Katanin_con80 con80 domain of Katanin Coggill P pcc Jackhmmer:Q9P0V8 Domain The con80 domain of katanin is the C-terminal region of the protein that binds to the N-terminal domain of katanin-p60, the catalytic ATPase. The complex associates with a specific subregion of the mitotic spindle leading to increased microtubule disassembly and targeting of p60 to the spindle poles [1]. The assembly and function of the mitotic spindle requires the activity of a number of microtubule-binding proteins. Katanin, a heterodimeric microtubule-severing ATPase, is found localized at mitotic spindle poles. A proposed model is that katanin is targeted to spindle poles through a combination of direct microtubule binding by the p60 subunit and through interactions between the WD40 domain and an unknown protein [2]. 27.00 27.00 27.20 27.40 26.70 26.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.66 0.71 -4.51 33 279 2011-04-01 14:39:33 2011-04-01 15:39:33 1 12 127 0 176 262 0 145.60 30 26.00 CHANGED pcsHsshhpsLpsRhppLpslcphW.ppssl+sulsshtph...pDhulLsDlLsllp..t+ppt.....hsL-hCstlLPhlppLLp.S+aEpalpsuLchLptllppFtshIpsshp..ssstsulDlstEpRh...p+sptshppLpplpphlpths.p...psGplGppupcLphhls ...................................psH-shhpsLpuRhtpL.....psshphW.pppslps...slshhhph.........pDhuVl.sDlLslls.......ptph......hsL-hCsslL....Phlp.pLLp.S+a.EpaltsuhshLphllppFhshl.....pst...ht....s.spluVDlptE-.Rh...............ppsphshppLppl.ph..tht.....h.G.hup..hpclp.hh..................... 0 52 80 127 +13768 PF13926 DUF4211 Domain of unknown function (DUF4211) Coggill P pcc Jackhmmer:Q8N4S0 Family \N 27.00 27.00 28.00 28.00 22.30 21.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.58 0.71 -4.81 38 274 2011-04-01 15:59:20 2011-04-01 16:59:20 1 7 160 0 173 252 0 146.50 23 14.76 CHANGED c-Flhp-p................t.s...sstsphshphshpsppsh+-pFcshlchhVpstLcssFhpsh.....t.....chahsuhc+l-sp.lpsppcslltsosWsssapcuLcshPplplhth.........psttCpACpcss+ss.shcl+hsGcsY...........spcT......Lpshppsss .....................................................................................t.....pt..................t.tss....tt.tpsphshttp.h.hspp..sh+-pFcshlchLlppALDPs.hhpsl.c.ppp........chaLssh++lDsh.lpspp...cplltps.thspp...appuLcsaPpl.h........................t.hsCpuCsps.st...shcl+hsGcsY...........NpcT......Lp..ph...s.................................................. 0 40 64 114 +13769 PF13927 Ig_3 Immunoglobulin domain Coggill P pcc Jackhmmer:O60384 Domain This family contains immunoglobulin-like domains. 27.00 15.30 27.00 15.30 26.90 15.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.84 0.72 -3.46 182 4712 2012-10-03 02:52:13 2011-04-04 12:51:01 1 772 224 45 2131 57861 94 77.20 17 13.25 CHANGED pPplthp.sts.t...............lsLp...Cpstss..........ssphpWh............................tstttttsspsslsls.........hp.....ppsss..asC....hs...ps .............................................................................................................P.l.h..t.....hts.................lsLp..........C...pupss..............................ssphtWh.......................................................................................t..s...p...t....t....t...s...t......s....p...u....s...ls.ls...................................ssp........................pcsGs.....YpC..hu.p............................................................................................................... 0 498 693 1156 +13770 PF13928 Flocculin_t3 Flocculin type 3 repeat Bateman A agb Jackhmmer:Q5AF39 Repeat This repeat is found in the Flocculation protein FLO9 Swiss:P39712 close to its C-terminus. 21.90 21.90 22.20 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.62 0.72 -3.80 149 538 2011-04-06 11:49:32 2011-04-06 12:49:32 1 38 52 0 311 544 0 43.50 44 15.50 CHANGED sTTllTlTSCs.c........spCspos...........l......oTu.so....sls.........ssp..TsYTT..YCPLo ....................pTTllTlTSCp.s........shCopos................V.......oTu..ss....TVs.........sss..ThYTT..aCPlo.................. 0 57 149 285 +13771 PF13929 mRNA_stabil mRNA stabilisation Eberhardt R re3 Jackhmmer:Q03153 Family This domain is an mRNA stabilisation factor [1]. 25.00 25.00 57.90 27.50 20.70 20.70 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.86 0.70 -5.46 7 26 2011-04-06 12:28:04 2011-04-06 13:28:04 1 2 24 0 15 23 0 267.70 34 48.31 CHANGED hplhpsLsppD....actuppllsssp..p.p.l+sltsslppL.pph....clsscpWhphh-pphPhlh.pps...aWssRhpaapLL.hss.pph................slp...phh.-YLhhKpuhGtplsccDLlsFLplh.pplpsspp....Y.pLVppNphllcALpLac..psc....llhDspllshLLpoMss.ppp....pL+uhYEhlcals..ppat.pths.ssltpllpsLschpsWscLhpFWctthss.s.tpDpRPWs.Fl+slspsGDpplh+cllccGpLLWIpRspVs..hos-LpstLpcLFppus ...................s..hh.hLpppD....Fctuscll.pss..st.sphc..l+olpcshssLpp.s..tpclcsspWpphFDpp.shlshspst......YWslRhcahhLLNcscPphY....................os+...halpDYLlLK+ShGpcLh+-DLluhLchllps.css+p....YhsLVcpNchllcALpLact.psc...tpllhDppVlohLLpoMlsp-cs....+LcuLYEsI-alh...poas.ccLTsshIlsILpsLAch+-WsKLhphW-shsss.s.sp.......DpRPWscFIpllspoGDsplhpKllssGHLLWI+RhsVs..l..oscLpsslctLhcpss...... 0 3 8 14 +13772 PF13930 Endonuclea_NS_2 DNA/RNA non-specific endonuclease Coggill P, Punta M mp13 Jackhmmer:D0LUR7 Domain \N 27.00 27.00 27.10 27.00 26.90 26.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.79 0.71 -4.12 50 1121 2012-10-05 18:28:12 2011-04-06 14:28:53 1 34 776 3 170 1036 12 132.40 26 35.41 CHANGED PNspYps....sGh....hY.pTDphGRlspspu..pLp..L.h.t..p......scR.NsapQsss..Gp..........s...th.t....Dc.GGHLIustFsGssc.h.cNLVshspp.lNc......GpahphEpcWAs..ulpps..+pV.plcIpPlYpGs.S..RPssFplp.YpIss ...............................................................................t.................hh..phDphsRhttsps.....ht......h........p.................ttR...t.s.h......t...pss......uhh.t......................t.....tp.......tt.h..................h....s....RuH..LI......u........hp..h...............u...............G........p..............ss.....p..c.....NLsstoph.lNp...................sshhh....aE....sp....lpp...........shcps.....ppV...pYcls.PlYp.Gs..p...Lhspthplp.h..................................... 0 44 92 135 +13773 PF13931 Microtub_bind Kinesin-associated microtubule-binding Eberhardt R re3 Jackhmmer:P46863 Family This domain binds to micotubules [1,2]. 25.00 25.00 25.60 25.60 24.90 24.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.08 0.71 -4.04 11 156 2011-04-08 14:37:49 2011-04-08 15:37:49 1 3 133 0 105 156 0 99.50 31 9.52 CHANGED -LKh.hPTGTTPpR+casYP+sLstTpP+-pIlcRa+pEQshsp.hshsssIsEhscE-ss....pslpulpt.clpsoEslhsspsh.-hs-ssppttth..hQ.ppstsKttpsps.ps.lcp.psp.sppppu..........ls+ShhP.+tss .....Lpp.hPTG.sTPp++pYtYPssLsRTcPH-pLlp.ph+...ppp..................................................................................................................................................................................................................................................... 0 23 39 72 +13774 PF13932 GIDA_assoc_3 GidA associated domain 3 Punta M mp13 Jackhmmer:D0LLL5 Domain The GidA associated domain 3 is a motif that has been identified at the C-terminus of protein GidA. It consists of 4 helices, the last three being rather short and forming small bundle at the top end of the first longer one. It is here named helical domain 3 because in GidA it is preceded by two other C-terminal helical domain (based on crystal structures [1,2]). GidA is an tRNA modification enzyme found in bacteria and mitochondrial. Based on mutational analysis this domain has been suggested to be implicated in binding of the D-stem of tRNA [2] and to be responsible for the interaction with protein MnmE [1]. Structures of GidA in complex with either tRNA or MnmE are missing. Reported to bind to Pfam family MnmE, Pfam:PF12631. 27.00 27.00 30.60 29.20 26.20 24.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.16 0.72 -3.92 60 4262 2011-04-08 14:57:33 2011-04-08 15:57:33 1 15 4143 17 1037 3177 2191 71.90 46 11.44 CHANGED lKYpGYIcRQpppl.c+hc+hEshplPpclDYsplpuLSpEu+cKLscl+PpTlGQAuRIsGVoPADIolLll ...............lKYpGYIp+Qpppl.-+hc.+hEspplPtslD..Y..s..p..l..s..G..L..SsEsppKLpch+PtoIGQASRISGVoPAsISlLll................... 0 354 654 866 +13775 PF13933 HRXXH Putative peptidase family Bateman A agb Jackhmmer:Q5A0Y5 Domain This family of putative peptidases are closely related to the M35 family Pfam:PF02102. In this family the metal binding HEXXH motif is replaced with HRXXH. The exact function of these proteins is unknown. Members of this family are found to be fungal allergens. 21.40 21.40 21.40 21.40 21.30 21.10 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.70 0.70 -4.96 28 85 2012-10-03 04:41:15 2011-04-08 16:09:36 1 3 72 0 60 112 0 225.10 40 80.13 CHANGED hhlllhsusshAussss.............tspsssssssshsWsuuh.....................sssFP.IHpSCNuTppcQLpsGLpEshpLAsHA+-HlLRaGspSpha+KYFGs.........usou-slGha-pll...........................suDKushLFRCDDPDGNCs..ss.WAGHWRGpNATsETVICDLSYp..oR+.LsplCutGYTVusussNsaWAsDLLHRlaHlPslG.pGhl-Ha....A-sY--VLcLAcs.NuohAlRsS-oLpYFAlDVYAaDlAlPGhGCsGc .................................................h.hhh..............................t....ts..shshpt.sh.....................htsaP.IHpSC.Ns.T.ppp..LptuLp-shplAt+A+-Hl.......LcaG.scSth.a++YFGs.........usshpsl..G.ha-pls...........................susK....u.shLFRCDDs...Ds..sC......t.pss...aAGH.WRGpNAo.sETV..IC-lSYp..oR+sLsslCs.....tGaTVu.s.utss.paa..usDLL....HRhhHlP....slu..cuhVcHa....A-..sYp-llcLAps.Nsohus+socsLpYaAhDVYuaDlulPG.GCsGc............................................................... 0 13 30 53 +13776 PF13934 ELYS Nuclear pore complex assembly Wood V, Coggill P pcc Jackhmmer:O94384 Family ELYS (embryonic large molecule derived from yolk sac) is conserved from fungi such Aspergillus nidulans and Schizosaccharomyces pombe to human [1]. It is important for the assembly of the nuclear pore complex [2]. 27.00 27.00 27.30 27.00 26.00 26.80 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.63 0.70 -4.62 41 211 2011-04-11 16:17:10 2011-04-11 17:17:10 1 6 177 0 151 221 1 232.90 25 21.47 CHANGED spLalDcLLpt....h....ul..............................ps.spthYPPco...L+s.Lhchllssss.....st..p..pKpullhYlLhDh.....cpt..............................phsppFupphtlscphhhhlcuhWhLD+...h-...acpAlchL..scP....ulh.....ssassc....IlpsLhp...............p.tsss...lAhtahpsssPslsos.............pslphhhsh.LspsslsEAFtatRphss...cpt...................p.hhEpllpaslp.t....................p.s.pp.h..hc.LlsLPhsppEEphlpc.h.......Lc...psspp....h.......sc-sllh .......................................................................hLhlDtllpt....l.....sl.......................................ps.stthYPPpo...L+s.Lhphhh.....s.sh.......................sp....t.....tKpul...hhYlLhDh.......pts...............................p.hppFspshsls.phhhhhpGhWhLD+................t-.........................appAlchL.....scP.....uhh.........sphtsc....IlpsLhp........................p.tctp...hALtYhps..spPslsos...................................pslphhhsh...Ltp....s...slsEAahatRptss....ph.......................hcpLlphhhptpt.....................c.ttt.hpcLlpLPasspE-chlpc.hLp....sssth........spphlhh............................................ 0 41 75 118 +13777 PF13935 Ead_Ea22 Ead/Ea22-like protein Bateman A agb Jackhmmer:B2PIW3 Family This family contains phage proteins and bacterial proteins that are likely to represent integrated phage proteins. This family includes the Lambda phage Ea22 early protein as well as the Bacteriophage P22 Ead protein. 27.00 27.00 27.10 27.50 26.10 26.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -11.18 0.71 -3.85 41 775 2011-04-12 10:52:06 2011-04-12 11:52:06 1 2 384 0 12 500 1 137.90 35 66.99 CHANGED SpIsp..QALRE......sAcpAttu..h.......h.hh.t.t.t...h............................t...........................pspalst...h...s....PpslLALLDEhEp.......tpphlcthc.........s.cls.shsphpp.....cLcstcph...........................................................................................t.t..hp..hst.t.......RlA-L.E ............................................................ScIsY..QALRE......sAEp...Ahpupah.......h.hh.sppt.h...............................thhcp.hpth........................ssspFlshs...s....PssVLALLDEpE+................................sQphIcch-pc..............Np-lALs...lsKLRs.......-LEsscpph............................................................................................p.t....t................................................................................................................................................................ 0 1 1 5 +13778 PF13936 HTH_38 Helix-turn-helix domain Bateman A agb Jackhmmer:B2PKW2 Domain This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.94 0.72 -4.40 170 4579 2012-10-04 14:01:12 2011-04-12 12:55:48 1 55 1701 4 685 4174 318 42.50 36 15.56 CHANGED phppLohpERtp....Ip.th...h.p...pG.hShp...p.IAcpLs+ssSTIuREl+Rs ......................ppLThpp.Rhp...........Ip..th............h.p.......pG.hS.R...p.IActLG+u.sTIpREl+Rs........ 0 188 368 496 +13779 PF13937 DUF4212 Domain of unknown function (DUF4212) Punta M mp13 Jackhammer:D0LH74 Family This family includes several putative integral membrane proteins. 30.00 30.00 33.60 32.20 26.90 26.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.63 0.72 -3.98 71 560 2011-04-12 12:23:16 2011-04-12 13:23:16 1 4 541 0 211 473 1169 80.10 43 74.54 CHANGED pptptYW+pNl+LlhhLLsIWFlVSFG..hsll..hscsLsplp..l..hGasLGFWhApQGuIhlFllLIahYuhtMs+LD+caslcE ........c.ttpsYW+pNlp.llhsLLslWFl.VSFG...suIL...h........s........cs.......L......sp.lp...h......uGa.sLGFWFAQQGSIhsFlsLIFlYshpMsp.LD+caslpc................. 0 53 136 183 +13780 PF13938 DUF4213 Domain of unknown function (DUF4213) Eberhardt R re3 COG2014 Family This domain of unknown function has an enolase N-terminal domain-like fold. Its genomic context suggests that it may have a role in anaerobic vitamin B12 biosynthesis. This domain is often found at the N-terminus of proteins containing DUF364, Pfam:PF04016. 23.50 23.50 23.80 27.90 22.60 23.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.83 0.72 -3.85 59 215 2011-04-12 12:28:01 2011-04-12 13:28:01 1 4 184 6 124 217 9 86.60 23 32.70 CHANGED lh-tlhpt....hp.shplcclslGlt..aTsVphspt.thGlA..hT.hpsh............h..h.hsup..ltGpsspclhphhh..uhs.slcpulGlAslNAlhp ....................hctlhpt....hp..shplpchshGht..aohVpss..s..t...uhGlu..hThhppt.................th....h.hssp..lt.Gpslc..clsphht..Shs..lctolGlAslNAl..... 0 51 88 108 +13781 PF13939 TisB_toxin Toxin TisB, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:A5A627 Family TisB (toxicity-induced by SOS B) is an SOS-induced toxic peptide. It is a hydrophobic membrane-spanning protein which inhibits cell growth [1]. Its expression is inhibited by the antisense RNA IstR-1, which acts as an antitoxin [2]. 27.00 27.00 55.50 55.40 14.00 13.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.61 0.72 -7.09 0.72 -4.19 2 56 2011-04-12 12:59:00 2011-04-12 13:59:00 1 1 56 0 3 12 0 28.00 89 94.00 CHANGED MushDIhILILKLhVAsLQLLDAVLK.h MSLVDIsILILKLIVAALQLLDAVLKYL. 0 1 1 3 +13782 PF13940 Ldr_toxin Toxin Ldr, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:E8XFD8 Family This family includes the Ldr (long direct repeat) toxins. In Escherichia coli there are four Ldr toxins, LdrA, LdrB, LdrC and LdrD. These toxins inhibit cell growth, decrease cell viability and cause nucleoid condensation. LdrD expression is inhibited by the antisense RNA RdlD, which functions as an antitoxin [1]. 27.00 27.00 39.60 39.30 22.30 21.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.68 0.72 -4.18 5 958 2011-04-12 13:02:29 2011-04-12 14:02:29 1 1 369 0 11 214 1 34.90 74 70.82 CHANGED MTLsQLGlAFWHDLAAPlIAGIIAulIVNWLRcRK ..MTLApLuMsFWHDLAAPllAGIlsuhIVsWhppRK.... 0 3 3 5 +13783 PF13941 MutL MutL protein Bateman A agb Jackhmmer:B2PG71 Family This small family includes, GlmL/MutL from Clostridium tetanomorphum and Clostridium cochlearium. GlmL is located between the genes for the two subunits, epsilon (GlmE) and sigma (GlmS), of the coenzyme-B12-dependent glutamate mutase (methylaspartate mutase), the first enzyme in a pathway of glutamate fermentation. Members shows significant sequence similarity to the hydantoinase branch of the hydantoinase/oxoprolinase family [2]. 27.00 27.00 27.00 29.30 26.90 25.60 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.20 0.70 -6.16 38 371 2012-10-02 23:34:14 2011-04-12 14:12:58 1 3 341 0 69 264 97 432.00 39 94.36 CHANGED slLssDlGSThT+sshh-...ssphcllupupuPTTV..t.tDVshGlppAlpclpcph....................shts..hshsphhuoSSAAGGL+MslhGLV.chTscAA+cAALuAGAplhpshuh.................clscpclcclpplpPDlILLuGGsDGGspcsllpNAchlu.ph........shshP......lIYAGNpsspcclpcILt..tsptslhls-NVhPcl-plNlpPsRcsIpclFhc+Ih.cA.Ghsc.lpchs......stslhPTPuAVhpsschluc.......thGsllslDlGGATTDlaSls....cst.t..thhhp...-PhsKRTVEGDLGhthustsllcthu...ttlhchhs.....tth.ptlpphhtpsshlPp.sc-Ethh-ptLAptAlphAlcRHsGphpplhsshtphh..ht.G+DLs........plchllGoGGsLs+usp..spplLpthhps.tt....lhPspp.....sclhlDppYlhuuhGlLup.htP-sAhplhccslth ...................................................................p.hlsl-IGSThTKsshFp...ss..thp...........hlupuhsPTTl..t...tclthGhpsAlspltpts...................s........shsphhssSSAuGGL+hsshGLs.shTscAA+cAAhuAGAplttshuh...................................pls.cpDlpp.l.p.cspP-IlLl..sGGsD.s...G-cchslpNA.+hlA.pu...............pL.s.ss...............llYAGNpclpccVpcIhs.....t.tpslshl-NVhPclDhlsltss.RpsIp-lF.c+Is.pu.Ghcs.l.h-hs......s..cplhPTPtAVhpsschlup.......uhs-hlllDlGGATTDVaSss...........tusst.sshllc....PEPhsKRTVEGDLGhhVsAhslsctssc......h....ht.................t.hcchhtp.chlPt..spcEt.hhsphLAt.sVthAspRHAGshpplhTssGps........lthG+DLo........sV+hVlGoGGhLo+sst...htclL+hhph..ssssp.....lL.ssc...................sphhlDpcYlhuulGslAp.s.PptAh+hhtphh..h....................................................................................................... 1 39 56 65 +13784 PF13942 Lipoprotein_20 YfhG lipoprotein Bateman A agb Jackhmmer:B2PEB8 Family This family includes the YfhG protein from E. coli Swiss:P0AD44. Members of this family have an N-terminal lipoprotein attachment site. The members of this family are functionally uncharacterized. 27.00 27.00 56.70 30.30 25.20 21.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.06 0.71 -4.74 26 543 2011-04-12 15:31:32 2011-04-12 16:31:32 1 2 534 0 41 227 3 177.70 71 74.12 CHANGED lPcpplsDYRhssCcslWslpstsu.sNsLYWLRshDCA-RLssspARspA+pls...sssWpsuF+puILLssAchTssERRphlspLssaShphPsslRPLhQLWR-pQsLplsLuEERtRYp+LQpsoDucLDpLRppps+LptpLpsToRKLENLTDIERQLSoRKpst....sphs-sscss .....l.PhYQLADYLoTcCuDIWuLpGKuTETNPLYWLRAMDCADRLMPAQSRpQARpas...DsoWQNsFKQGILLADAKITPhERRQlVuRl-ALSspIPAQVRPLYQLWRDGQALQLQLAEERQRYSKLQQoSDSELDTLRQQppsLQpQLELTTRKLENLTDIERQLSTRKPAGNasPDTPHps........... 0 1 9 24 +13785 PF13943 WPP WPP domain Woodcraft BJ agb Woodcraft BJ Domain \N 20.10 20.10 22.40 24.20 18.60 17.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.24 0.72 -3.80 25 109 2011-04-14 10:02:01 2011-04-14 11:02:01 1 14 29 0 65 111 2 95.70 43 27.43 CHANGED Sl+lWPPoQpTRchlVcRhscsLoo..sSIlS++..YGsLsc-EApcsA+pIE-tAFusAsp.....thssps-sDGhpslQlYuKEsSKhMLEslKptspspssspss ..............ShplWPPoppTRphlVcRhspsLos..sSlho++..YGslsppEAccsA+tIE-tAFusAsp......thpt..psssDGhpslQlYuKEsS+hhLEhlKptststtt...ss................................ 2 10 39 50 +13786 PF13944 Lipocalin_6 Lipocalin-like domain Coggill P pcc Jackhmmer:JCSG_Target393211_GS13544C Domain \N 27.00 27.00 27.40 27.10 26.80 26.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.86 0.71 -4.00 21 111 2012-10-03 08:47:39 2011-04-14 14:22:19 1 6 70 1 13 99 0 127.70 22 39.62 CHANGED clAGsYpGshslolss...sssshsos....ps...pplpIspss-sTlplplsshs.hth...G..lslGchsVsssslcpsssuhhp.hs.ssshp........scspss...............shss.olsGTl.psGshslshshc....s.Gu...hshslpssas .............plsGsYpGshs.lslss....ssssht.s....hspplplsp.sscss...lplpLpsFsh........G...hplG-lsls.slsVppssss....thh..hs.tpshp.......hpspss...............shss.olsG..Tl..psuphshsls...lp.....sss.......hshslpssa..................................................................................... 0 7 10 13 +13787 PF13945 NST1 Salt tolerance down-regulator Wood V, Coggill P pcc Jackhmmer:Q09863 Family NST1 is a family of proteins that seem to be involved, directly or indirectly, in the salt sensitivity of some cellular functions in yeast. It does this without affecting sodium accumulation. It negatively affects salt-tolerance through an interaction with the splicing factor Msl1p. This interaction stresses the importance of efficient RNA processing under salt stress conditions [1]. 20.00 20.00 20.60 21.70 19.70 19.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.31 0.71 -4.22 7 142 2011-04-14 14:53:59 2011-04-14 15:53:59 1 5 122 0 112 155 0 154.10 34 13.95 CHANGED hDDpsp..hpspupp.ppphs...ts.hps.ssoKpKKKKKK+p+.upusShpssspps...s.STsps....ts..sLs...........psuh+.hpccc.IWsTSopEEREpI+EFWLpLuE-ERRSLVKVEKEAVL+KMKEQQKHSCSCoVCGRKRsAIEEELEVLYDAYYEELEQYANh.Qsshpsusslssssp ......................................................................spt........................................tttptppt.c...........t.....tt......t......t.....s..t.t............................t............................s..s.........................ps.t...hsppp..IW.s..oSop.E.....ERE......pI+cFWLsLuE-ER+sLVKlEK-uVL+KM.KE......Q....Q..+...............H...........o...........CSCoVCGRK.Rs.....AIEEELEsLYDuYY-.ELE.paApt.................t................. 0 35 64 99 +13788 PF13946 DUF4214 Domain of unknown function (DUF4214) Coggill P pcc Pfam-B_781 (release 24.0) Domain This domain is found on a variety of different proteins including transferases, and allergen V5/Tpx-1 related proteins. 23.00 23.00 23.00 23.00 22.70 22.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.25 0.72 -4.29 15 574 2011-04-15 12:34:27 2011-04-15 13:34:27 1 75 127 0 119 497 104 67.10 22 14.25 CHANGED VApoFluSsEFpp+YGs.slsscsaVssLYpNlLGRcuDtuGhsYWsupLssGsETRtElLLGFu-SsENcuLFoE .......................................................pppal...p..plYpsll..sR...s..s..DtpGhpa.W.......h....s.......pL..p.....sG.t......o.......ht...phhh...th...h.pStEh.t....t.................................... 0 33 86 99 +13789 PF13947 GUB_WAK_bind Wall-associated receptor kinase galacturonan-binding Coggill P pcc Jachmmer:Q8H7G7 Domain This cysteine-rich GUB_WAK_bind domain is the extracellular part of this serine/threonine kinase that binds to the cell-wall pectins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.98 0.72 -3.48 243 1199 2011-04-15 14:01:02 2011-04-15 15:01:02 1 68 22 0 725 1166 0 107.00 22 20.37 CHANGED hssC.s....s..p..C..Gs..l..s.IPYPFGI........G..s...s.....Chh......tuFp..lsC....ss...o.......s.st..lh.............tsshcVhs.IS.lt....p..........uplp....lhs.....sl....stpChs.sss.s...ph.......t.....sh..shst........s...........ahhSs.spNp.hssl.GCss ....................h..sC..s..p...p...C..G..s........l..s..IsYPF....G.l................s....s....s...........Csh............sFp...lsC....sss.........ts.st.lhh...............................ssshpV.hs....Is.....ht.....s....................sp...lp.lhs..........sh.....tt.shs..ssst............................hshsh.............s...................hh.h....s.t.p...N......h.h.hh.uCt.................................................................. 0 34 326 531 +13790 PF13948 DUF4215 Domain of unknown function (DUF4215) Punta M mp13 Jackhammer:D0LZ65 Family The function of this family is unknown. 27.00 21.50 27.00 21.50 26.90 21.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.30 0.72 -3.74 207 1195 2011-04-15 14:50:47 2011-04-15 15:50:47 1 70 35 0 1101 1185 279 46.50 39 15.77 CHANGED ChpC...p..p..G..ap.h..s......pptC........hshC....GDslls.......sp.....EpCD.Ds.......Ntht..h.D.....GC....hp......Cp .......................................C...........G..a..h...............tt...C..............hshCGDGlls...........st......EpCD..DG............Nphs..t.D......GC......p.....Cp............................ 0 1023 1028 1080 +13791 PF13949 ALIX_LYPXL_bnd ALIX V-shaped domain binding to HIV Coggill P pcc [1] Domain The binding of the LYPxL motif of late HIV p6Gag and EIAV p9Gag to this domain is necessary for viral budding.This domain is generally central between an N-terminal Bro1 domain, Pfam:PF03097 and a C-terminal proline-rich domain. The retroviruses thus used this domain to hijack the ESCRT system of the cell. 27.90 27.90 28.00 28.80 27.80 27.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.74 0.70 -5.45 97 602 2011-04-15 15:01:10 2011-04-15 16:01:10 1 17 285 9 420 611 2 282.60 22 30.37 CHANGED lc.s.s.......slPt..slh..c.csppl...p.pp.su.........lpplppthpplp..plpppspphlscsp.chLctEppp-pphRppa.sp..pWs+......sSsph...ss..plppclpchcphlpp.AspoDsplppphcp..hppsl..plLststpp.................................l.pshl.Ps.s.p...t...........................ts....p.s.........pp....tltpL+p.hlpclpplcpcRpp..hhp.cL+.p...th.p..sD.hhsplL..t...hsp......ptt...........s.p.h..c.tlFppcLc+.acshpppl.ppshppQppllpplpps..ppph......h...p.t.+.......ps..ptps.......ppRpphl....pplp.sua.ptap-lhsslpcGpcFYssLtphlp.phpppspsastsR+tEt ....................................................................................................tlP..slh...pchttlp...pp..ss..............hppltp.hppltphhppspphLpc...........st...chLptEptp-pphRtpa.sp...pWpp.........................sSsph.....sp...plpp..c...hpcacphlpp.Asp.....uDsplppphpp..pp...ssl.....plLsts.pp..................................................l.ps..tl.Ps.s.p....t.................................ts...ps.......pt............ltpL+p.hlpclppl...cpcRpp..l.ppL+.p...th.p.pD..hsp.hL.h...h.tp..tst.....................s.p.....c.tlFppcLc+.aps.hpp.tl.ppshppQppllpplpps..tpph..........h...p..+.............ps..ptpt.......ppRppsl.............................ppLt.sua.ptap-lhsslp..cGpc.FYscLtphls.phpppspshs.sRctE.................................... 0 137 221 346 +13792 PF13950 Epimerase_Csub UDP-glucose 4-epimerase C-term subunit Coggill P pcc Jackhmmer:Q3EAY3 Domain This domain is the very C-terminal subunit of UDP-glucose 4-epimerase. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.17 0.72 -4.03 308 5379 2011-04-15 15:23:58 2011-04-15 16:23:58 1 13 3888 48 1359 4059 1160 61.00 38 18.14 CHANGED AFE+AsG+plPYclssRRsGDl...ApsaADss+Apc-LGWcAp+.sL--MCpDsW+WQspNPsGY .....................shc+soG+slP.h.c..l.s.s..R..RsGDh...ushhAss..sKAcccLGWc.sph.sl.-c.......hhpc..u..Wp.W....p..p...ppPpGa............ 0 444 849 1142 +13794 PF13952 DUF4216 Domain of unknown function (DUF4216) Coggill P pcc Jackhmmer:Q9LI66 Domain This DUF is sometimes found at the C-terminal end of proteins carrying a Transposase_21 domain, Pfam:PF02992. 27.00 27.00 27.50 27.60 26.60 26.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.46 0.72 -4.50 55 725 2011-04-15 16:13:17 2011-04-15 17:13:17 1 77 16 0 338 760 0 76.00 44 8.30 CHANGED lclpa....c.sttph......p.lsLF+CcWhcs.p..spsl+hD.chG.hspVshsphhh.....cc.-PFlLAsQssQVaYltcPp........ppsWplVh .....................................a..-.sYssh..+VPLF+CpWVcl....p...ssGVhlD.-tG.hTsVDLsKlGY.........pDEPFVLAspVsQVFYVcD.s.............h....h.............. 0 0 50 68 +13795 PF13953 PapC_C PapC C-terminal domain Bateman A agb Bateman A Domain The PapC C-terminal domain is a structural domain found at the C-terminus of the E. coli PapC protein. Pili are assembled using the chaperone usher system. In E.coli this is composed of the chaperone PapD and the usher PapC. This domain represents the C-terminal domain from PapC and its homologues. This domain has a beta-sandwich structure similar to the plug domain of PapC [2]. 27.00 27.00 27.00 27.10 26.80 26.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.26 0.72 -4.33 301 5978 2011-04-18 16:06:12 2011-04-18 17:06:12 1 11 820 10 369 3683 29 67.20 28 8.66 CHANGED hslp.tsGp.slPFGAp...V.............p.......s....p.........p.s..p........p...s.....G.lVGpsGplY..lsGls..s.pupL.pVpW..Gss...ppCplsas.....lstppp ..........lp.tsGp.slPFGAp......V.............p........s..p...........p.s..p............s..s.....G.lVu-sGplY..LsG.ls..t..supl.pVpW..Gpp....stpCplsap..hs....p............ 0 32 99 245 +13796 PF13954 PapC_N PapC N-terminal domain Bateman A agb Bateman A Domain The PapC N-terminal domain is a structural domain found at the N-terminus of the E. coli PapC protein. Pili are assembled using the chaperone usher system. In E.coli this is composed of the chaperone PapD and the usher PapC. This domain represents the N-terminal domain from PapC and its homologues. This domain is involved in substrate binding [2]. 27.00 27.00 27.60 27.40 26.90 26.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.90 0.71 -4.63 34 5788 2011-04-18 16:06:36 2011-04-18 17:06:36 1 9 775 12 327 3635 16 143.60 29 18.18 CHANGED hpFssshLpss.....tss.DlShFpps.tthhPGpYhVDlhlNsphhs..ptplpFpstsst........h.sCLop-hLpphGlphcths...tht.ptsp.........tChs..hstlPtsshpaDhup.tpLslslPQshLt.pscshhsPppWDpGIsAhhlsYshsus .........................FssphLpss......tpsh...D...L.S...p.Fpps...sth.PGpYp.VDlhlNsphhs..ppsl.pF.h.ssssp......................thhsClo...p.Lp.p.hGl..p...s.ss.h.s....t...ht...ttss..............................tCls....hph...l.ss.ushp..aD.h.up..pLslol.PQshlp.p.p.s.c.s..alsPppWDcGIs.AhhlsYshsu............................... 0 24 83 222 +13797 PF13955 Fst_toxin Toxin Fst, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:Q9RLG7 Domain Fst (faecalis plasmid stabilization toxin), also known as RNA I, is a toxic peptide. Its N-terminus forms a transmembrane alpha helix, its C terminus is disordered and is likely to be cytosolic. Its translation is inhibited by the antisense RNA, RNA II, which acts as an antitoxin [1,2]. 36.00 36.00 36.20 36.30 31.90 35.70 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.50 0.72 -6.41 0.72 -4.17 13 45 2011-04-19 12:12:39 2011-04-19 13:12:39 1 1 41 1 5 25 0 21.00 62 62.92 CHANGED FspIIAPllVGlllpLlc+WL .hpTIIuPIVVGVVLcllD+WL 0 0 1 2 +13798 PF13956 Ibs_toxin Toxin Ibs, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:C1P607 Family The Ibs (induction brings stasis) proteins are a family of toxic peptides. Their expression is inhibited by the Sib antisense RNAs, which act as antitoxins [1]. 27.00 27.00 33.30 33.00 22.50 22.40 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.06 0.72 -6.46 0.72 -4.07 4 33 2011-04-19 12:16:39 2011-04-19 13:16:39 1 1 15 0 5 21 0 18.80 77 64.41 CHANGED MMKLVIILVVLLlISFsAY MMKlVIILlVLLLISFsAY 0 5 5 5 +13799 PF13957 YafO_toxin Toxin YafO, type II toxin-antitoxin system Eberhardt R re3 Jackhmmer:Q47157 Family YafO is a toxin which inhibits protein synthesis. It acts as a ribosome-dependent mRNA interferase. It forms part of a type II toxin-antitoxin system, where the YafN protein acts as an antitoxin [1,2]. This domain forms complexes with yafN antitoxins containing Pfam:PF02604. 25.00 25.00 28.40 28.20 22.70 20.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.48 0.72 -4.24 20 200 2011-04-19 12:19:10 2011-04-19 13:19:10 1 1 179 0 23 81 1 104.50 53 80.37 CHANGED athacpsGs.hs...shhG+Dshaccs...sphh.sclpHlHlt..............t.pssW......h.phhQhpRTSD.saLVYs.thhssppahlluIl.cPsAHcps.......cppsh.hspltchAEpFppph ...........FISYKRDGV.LP...DIFGRDALYDDSF.TWPLIKFERVAHIHLA..................NsNNPFP..........PQLRQFSRTNDtuHLVYC..Q.GAF...DEQAWLLIAIL.KPEPHKLA........RDNNQ.MHKIGKMAEAFRMRF.............. 0 7 13 17 +13800 PF13958 ToxN_toxin Toxin ToxN, type III toxin-antitoxin system Eberhardt R re3 Jackhmmer:B8X8Z0 Family ToxN acts as a toxin, it is part of a type III toxin-antitoxin system. It acts as a ribosome independent endoribonuclease. It interacts with, and is inhibited by, the RNA antitoxin, ToxI [1,2]. Three ToxN monomers bind to three ToxI monomers to create a trimeric ToxN-ToxI complex [2]. 25.00 25.00 25.40 25.40 24.60 24.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.03 0.71 -4.57 31 151 2011-04-19 12:21:31 2011-04-19 13:21:31 1 2 115 10 9 140 2 147.30 27 88.43 CHANGED hclYpIsscYlsYL+ch.....Ds.+Vh.st...ppppRsalGlllplsshcYasPLSSPKpK+cp....................hcppl......shhKIpp.......phLGllplNNMIPV.-sp..lphlDlp......p.p-p+....Y+pLL.cphpalpp..spccItc+.AcplYpphhppt.............tlpptsssFplLEctsppY ......................phYplspcYlphLpph.....Dp......+V..s......tpp.+salGlll.p.l.s....shpYhsP.loSsKp.K+cp.....................................hp..sph......shhKl.p...........phluslplspMIPVscst...hptlshp..........p.pD..+....Yp..pLltpphpalpp......ppppI.hpp.upplYpphhppt.............hhpphsssFthLEchh.t............... 0 3 5 6 +13801 PF13959 DUF4217 Domain of unknown function (DUF4217) Coggill P pcc Jackhmmer:Q1KS87 Domain This short domain is found at the C-terminus of many helicase proteins. 27.00 27.00 27.30 27.50 26.60 26.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.03 0.72 -4.43 193 1332 2011-04-19 13:16:38 2011-04-19 14:16:38 1 18 316 0 937 1323 22 64.00 30 9.61 CHANGED slppp.........l........cp.hlhpc.pltphAp+.........AFhSalRuYtpH..pt+.......pIF.plppL..clspl...AcuaGLhpsPphp..t .......................lptplcp..hlhp.shtlpp..tAp+.........AFhS..........al+uYspH..ph+...............plF.pl..p..pL..cLscl.............A.pSaGLhpsP+h.................. 0 329 530 778 +13802 PF13960 DUF4218 Domain of unknown function (DUF4218) Coggill P pcc Jackhmmer:Q9M233 Domain \N 27.00 27.00 27.20 27.30 26.80 26.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.50 0.71 -4.70 38 847 2011-04-19 13:36:45 2011-04-19 14:36:45 1 90 37 0 424 907 0 109.80 46 12.19 CHANGED tsIscLshFFptlspKVl-sppLcpLpspll.sL..CpLEhhFPPSFFDlMhHLhlHLVcEs+lsGPlahR.MaPaERaMthLKuYVRNRA+PEGSIsEuYhsEEslEF....CscYhp-ss.sluh....sss+pcsph ................................................t.sl.cls.F.hptlsp.KllcsppLptLpspllpsL..s.p.h..EhhFPPoFFslMhHLh....sH...LVcp.hplhGPhah+pMaP.aE.RY.Mu.lLKpYVRNRA+.PEuSIscu.YsTEEslE....F....Cs-alp-hp.sIGlPtsR+-spl............................................ 0 21 99 119 +13803 PF13961 DUF4219 Domain of unknown function (DUF4219) Coggill P pcc Jackhmmer:Q9C740 Domain This domain is very short and is found at the N-terminal of many Gag-pol polyprotein and related proteins. There is a highly conserved YxxWxxxM sequence motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.21 0.72 -4.46 162 657 2012-10-02 13:37:57 2011-04-19 14:55:30 1 109 61 0 341 681 0 27.50 37 3.68 CHANGED hsu.sNYshWsh+McshLpupc..lW.csV- ....hsG.sNYstWsh+MpsaLpupc.....lW.csVp....... 0 83 169 206 +13804 PF13962 PGG Domain of unknown function Coggill P pcc Jackhmmer:B9DG91 Domain The PGG domain is named for the highly conserved sequence motif found at the startt of the domain. The function is not known. 23.00 23.00 23.10 23.80 22.80 22.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.62 0.71 -4.46 259 1615 2011-04-19 16:12:26 2011-04-19 17:12:26 1 118 27 0 955 1566 0 108.80 26 29.59 CHANGED pchhpctpphlhllAsLlATVTasAuhssPGGa..ps...................sGpslL...sph.tapsFhh.sNshAFssSlsslll..Ll....th................hpth..hhh..hhhhhhhlulh.ulhsAasuGshhs ....................p...hpctppslhllAsLlATVoa.sAuhssPGGh.htss.................................p.....p.sGpsllt....p........p...ta..tsF....hh.ssshAh....hsSlsslll..ll..t.th.........................pth...h..hhhhhhhhhhhshh.uhhsAahsu.hh............................................................................. 0 48 486 751 +13805 PF13963 Transpos_assoc Transposase-associated domain Coggill P pcc Jackhmmer:B3H612 Domain \N 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.73 0.72 -3.99 127 878 2011-04-20 16:20:45 2011-04-20 17:20:45 1 73 23 0 549 900 0 68.40 41 9.37 CHANGED RsWM..........thhop-ahcGlppFlphAhsphtppp.....pl.hCPCpcCpNthh...psp.pplppHLh.ppGF..hpsYph...WhpHGE ................................pWMY..........pRhop-ahcGVp.FlphApsptppt......hl.hCPCp.cC+Nph.....pss..cpl+tHLh.ppG.Fh.sYss...Ws.pHGE........... 0 1 262 285 +13806 PF13964 Kelch_6 Kelch motif Coggill P pcc Jackhmmer:Q9CAE9 Repeat \N 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.23 0.73 -8.85 0.73 -3.57 179 1811 2012-10-05 17:30:43 2011-04-20 17:50:05 1 393 387 0 1125 11207 344 52.00 22 10.77 CHANGED hpt..ht.sh..ss..ss...s..t..l.alh..GGt.sp................sphhh.as.....p..o..p..p..ap.p...l..s.....s..h.....t....s...p ............h....tt.us...ss...hs.......s...c.....l.Ylh...GGh.ss................................thhsslht.ac....sp....o...p.....p.....Wp.p.....l...s......s...h............................. 0 462 599 827 +13807 PF13965 SID-1_RNA_chan dsRNA-gated channel SID-1 Eberhardt R re3 Jackhmmer:Q9GZC8 Family This is a family of proteins that are transmembrane dsRNA-gated channels. They passively transport dsRNA into cells and do not act as ATP-dependent pumps [1]. They are required for systemic RNA interference [2,3]. 28.70 28.70 29.10 29.10 27.90 28.60 hmmbuild -o /dev/null HMM SEED 570 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.15 0.70 -13.15 0.70 -6.34 8 320 2011-04-21 14:52:42 2011-04-21 15:52:42 1 5 81 0 179 283 11 382.50 32 77.38 CHANGED SEPhaaahp.......shsp.sssoVhlclsS-Dsl.ChsVSlQsusCPVaDhspslp.sapGhapTho++uuIhlpKp-as..ttFhVFsVlps-DssCopps......pp.ps....sNcsK....oIoFplsh.hpshp...YtsholshlhhlhohhL..lahs.lshplhpspppht..............ptslluhpPs.sop.p...........p.hsh....tpcp.sspssshcEpc....hsspph..pllhs+tsLpVuDLu+csp..phlKp+ShsYhapllslulFYsLPVlQ.LVlTaQ+hlspT..GcpDhCYYNFhCA+PL....hhlSsFNplhSNlGYllhGlLFlllshpRchpa..........sppaGlPpHaGLaYAMGlALIMEGlhSAsYHlCPNpsNFQFDTSFMYVIssLsMlKlYQsRHPDlsuoAYssFulLuV.slLsulIGlhs+ss.hlallFslIallsshhlSh...............phYahGshKl............................csRhhl.Ls..........................h.slsNauhhshGL....ph+...stDFusalLhlFhsNslLYlsaYhlMKl........lspE+IshpAllhhhLAllsWssAuaFFhpcsosWTcoPApSRphNp.CllLcFYDsHDlWHhhSAlAlaFTFhhLhslDDDLhsshRssIsVF ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.....h...................s.s.......hhhh.G..h.hh....p.....................................sh.........hhh.hh..uh.us.hp.CP.s..shp.a........hh.h..hh..a..R.............p...............h.....h.......h.................h......................hh.h.h....h...h..h..hh....h.hs...........h.h....t.........................................................................................h.h...................................................h..hhsh.hhh.......h............t.....th....hhh.hh..h....phhhYhhhYhh.Kh.........................h.t.c....p..h........sh....h....hhhh..h......hh......u......hh..a........t.....th..............ssA.SR.hNt.C.h.th.h....D.+Dh.hHh.suhshhh..h.h.......................................................................................................................................................................................... 0 78 96 140 +13808 PF13966 zf-RVT zinc-binding in reverse transcriptase Coggill P pcc Jackhmmer:P93281 Domain This domain would appear to be a zinc-binding region of a putative reverse transcriptase. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.14 0.72 -3.67 117 1664 2011-04-21 16:16:41 2011-04-21 17:16:41 1 319 46 0 590 1605 0 79.10 24 12.48 CHANGED pFSs+ssYph.l.pssss.ts...ap.pl......Whspsss.KhsFFsWhAshs+lhTtDpLp+...+.s.h..t..hs..spCh.LC.p.p.ppEohsHLhhpCshsp.tlW .........................................ha...................................t..............t.ph..................Wp.t..p..s.P..K.lp.hF..h..W..h..h.hps.+..l........TtspLtc...+..s...h...t...hs......sh..C....h...C..t.t.tt.....EshpHlhh.pCshsttlW........................................... 0 51 342 453 +13809 PF13967 RSN1_TM Late exocytosis, associated with Golgi transport Coggill P pcc Jackhmmer:Q39074 Family This family represents the first three transmembrane regions of 11-TM proteins involved in vesicle transport. In S. cerevisiae these proteins are members of the yeast facilitator superfamily and are integral membrane proteins localised to the cell periphery, in particular to the bud-neck region. The distribution is consistent with a role in late exocytosis which is in agreement with the proteins' ability to substitute for the function of Sro7p, required for the sorting of the protein Enap1 into Golgi-derived vesicles destined for the cell surface [1]. 27.00 27.00 27.10 27.20 26.90 26.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.20 0.71 -4.67 177 1182 2011-04-26 11:55:49 2011-04-26 12:55:49 1 36 266 0 844 1140 1 153.80 22 17.75 CHANGED slhsslshshsluhhhhhhFslL..Rhp.....hpclYt.PR....phht...........pppss....tuh........auWlsshhchs-ppllppsGLDAhlFLRah+hs.hplhhshsllshslLhPlN.......ss......ssttt.............................th.shhohuNl......psps...........thasallhsala....sh.hshahlhcE ................................................................................hhsshhhshhhhhhhhhhF........hL..R.p....................plYt.s+..........hhh..............................tptt.......puh...................................h.uWlhsh..hc.....h..s..-..ppllppsGlDAhhaLRah+h...........h.ltlhhshslls.....hslLlPlN................hs.............usthp.............................................................psh..spho.huNl.....psss.............................hhWsHslhualhhh.hshahlhp..................................................... 0 239 465 693 +13810 PF13968 DUF4220 Domain of unknown function (DUF4220) Coggill P pcc Jackhmmer:Q9FHJ0 Family This family is found in plants and is often associated with DUF294, Pfam:PF04578. 27.00 27.00 27.40 27.30 25.10 26.90 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.24 0.70 -5.35 29 828 2011-04-26 13:23:38 2011-04-26 14:23:38 1 24 16 0 517 735 0 247.10 18 46.39 CHANGED LWlAYhhAD.lAsasLGhLohst.............sst.....pppLhsFWAPFLLLHLGG.DTITAa..ShEDNpLWhRHlLsLssQshhu...hYVhh+.ph...s..u..s.......lhsshlLhFlsGllKYuERs.hA...LptAuh.................pphtsphhp....tscssss.h....s.h......hhcph.t...h.p.tth.sph...hhh.t....................................pphsptphlhtAashhp.hhp.thas.s.hhh....s.........hpcp.ppst..th...hpph.......pt.........h.ppua+llElELuhhYDhLYTKsslla.............o.hhGhh....h....Rh........lolhsss.sAhhL......Fth............t.....spt........th.p..as....psDlhlTYlLLuGAllLElhullh.hlhSsWshshLpp...p......hh..hhs...............hhtthhphlph.................h...pp.R..WSsphuQYNLlptC.hcccs ...........................................................................................................................................................................................................................................hWs..h............h.s......t........................h.........h...hh.h........hh.hh.....h..............................................h....h.hh.h.shh.+..hh.+..h........h..st........................................................t..................................................................................................................................................................................................................................................h....t...................................................h...h.................................................................hh..plh...p.plsh.hh-hha.oth.hh..........................................h..h..h....h.................h...........h.hh...........h..........................................................................................t..h...l..s...hhhh.......hhh.-h....h....h....h.s...h...............................................................................................................................................................t............................................................................................................. 0 6 185 357 +13811 PF13969 Pab87_oct Pab87 octamerisation domain Punta M mp13 pdb:2qmi Domain This domain was first characterised as the C-terminal domain of Pab87 serine protease from Pyrococcus abyssi [1]. The domain is reported to play a crucial role in Pab87 octamerisation and active site compartmentalisation. Its up-and-down 8-stranded beta-barrel 3D structure is reminiscent of the one found in lipocalins. 27.00 27.00 41.60 40.40 24.80 23.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.23 0.72 -3.90 6 15 2011-04-26 15:55:18 2011-04-26 16:55:18 1 2 14 8 11 15 0 95.70 34 22.07 CHANGED Pc.ELPFhhREclL++LEGhYcGY+GTlcapVKscGDhLhL+shGth.pho.hsLhPE-lc........EDal+haTh..GtKh.VEFp......IcsscVcllaERY+LlK .....Pc.ELPhlhhEchLc+LEGpYcuY+GTlchsVcscGDhLhl+utuchtpto.hsLhP--lc........-DascahTht.sGt+hsVEFp......hcsscVcLlhERa+LlK. 0 4 6 7 +13812 PF13970 DUF4221 Domain of unknown function (DUF4221) Coggill P pcc JCSG_target-390208:A6KZ57 Family This family of bacterial proteins contains highly conserved asparagine and cysteine residues.\ The function is not known. 27.00 27.00 27.00 27.90 26.90 23.80 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.12 25 101 2011-04-27 09:58:51 2011-04-27 10:58:51 1 1 42 1 34 143 0 317.50 18 85.45 CHANGED lhhlhll..hSCusp......pcpp.t.................pshsLh..ps...cslsaslD-psh..........hshslt.h..p...........psscchLh.Fhs..ptp...plphhDLcst.cllcpI.hccE.GPNGIspsh..uhh..s.Dshalh.ss..th.plhhhsppGphhpphsh...pssp..........pht.tphst..hh.....ssh.th.p-shlahs.h.......httc.hpphhs.hAhlDlpspplchhsl.sh.s.hhpp..ht.......phss.....s....h..s.....pssh...ph..ssspllhShssssplah.......s.s..p......tcphphh.pss..uphh.....spht....phpss....pps....hsp.hhc...ths.ppspYuslhaDch+chYaRaup......tsps.tpt.h..p.pphslhlaDc-asllGEsp ............................h.hhlhll..huCspp......pppt...................................................tphpL...tp...cslthsls-phh..........h.htl..h..p...........psuccaLh.h.s..php..tplp..hhDlpst.clhcpl.hcc-..GPsul.s.thh..uhh.hshDuhalh.st...........plhhhs.ppGclhpphsh...pptt...........ph....thss...........psh..hh.tsp.t.h.ahs..................h..t.p.h.ppphh..hshhshps.....pph.ph.hsh...h.s.hhpp...........phts.....s....h...th.....phsh...sh..ptspllhua.spsplal.....hs...p......tpph.chh.shp..Sphh......sph......ph.ss..tp....hpthhp.....hhppspYtslhYDph+plaaRhsh.sh......tstp.h...........p.+ths.lhlhDp-aplluEp.......................................... 0 26 32 34 +13813 PF13971 Mei4 MEI4-Rec24; Meiosis-specific protein Mei4 Wood V, Coggill P, Eberhardt R pcc Pfam-B_78600 (release 24.0) Family This family of meiosis specific proteins is required for correct meiotic chromosome segregation and recombination [1]. It is required for meiotic DNA double-strand break (DSB) formation [4]. 21.70 21.70 23.20 23.50 20.70 19.70 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.51 7 42 2011-04-27 10:11:47 2011-04-27 11:11:47 1 1 32 0 25 34 1 302.40 31 94.28 CHANGED Mssp..p.........hhpT........+lAlAlAII+p+PsshsuRpaoEhLtptlppp-psWKpcsctLctclhpl+QcL......L.sp.sss.h..t.t.h.tcl.sQpshpt.sstshhcphtssss.p.ps........................p.sphsh-s.hst+hpFLpplhcL+shp......pph..phplh...........psuhpplLssLhphh+ss+....Lh.sphhppsltslsphhsphshpphhstpppp+lp-hhppLlphlLpppplsphpsQchlsppLlsL.upss..h..lIphLLoplpshscsL.phh.pp...sh.p.tt.......phDssphpNhahhahllEpllppp..t..h.....p....pph.tph-cplh.hu-tFPLFuhhL......................WRlGslLsusc ......................................hh..........+lAlAhAII+o+PsshssRpasp.Lt.phlpt...p-.ph+pphcsLchcshph+pcl..........L.s+h.tst.h.......t.t........t.h.spp.hps.sshs.hc.-ssss.s.p.p................................chspsshppsLos+hpFLQ+lLpL+shp......csst.p.hpls.hp.p..ppl..s-SV.pLLDuLlsFa......+pPK....hshsphhpcAlpsLsphlsshshspthhKps.K+LEEhpcoLlphILpsppINphpstp.hsp.Lhhl.......h..llphlL..lpth.p.L.................................hp....hh.hhptl..t..........................p......ptash.th.h......................hhlt.h................................................. 0 6 10 14 +13814 PF13972 TetR Bacterial transcriptional repressor Coggill P pcc JCSG:Target_399142 Family This family of bacterial transcriptional repressors is characterised by the short approximately 50 amino acid stretch of residues constituting the helix-turn-helix DNA binding motif, around the YRFhY motif. The target proteins that are repressed are involved in the transcriptional control of multi-drug efflux pumps, pathways for the biosynthesis of antibiotics, response to osmotic stress and toxic chemicals, control of catabolic pathways, differentiation processes, and pathogenicity. The regulatory network in which TetR itself is involved is in being released in the presence of tetracycline, binding to the target operator, and repressing tetA transcription [1]. 25.00 25.00 25.60 25.40 24.70 24.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.64 0.71 -4.76 102 573 2011-04-27 11:46:47 2011-04-27 12:46:47 1 2 496 2 150 441 87 146.10 26 66.69 CHANGED ppYppplpphhpssps....tt..........sl....-shhhYlcslFphhWpYRFhap-LsslLs+stpLppcapp.htpphhpphttlhpthtptGllp.hsspc.lp..sLspslhllsoaWlsapp..stpsptt....hsptsltpGl..hpllsLhtPalsspuppthp ..................................pYpppl.phLp.sps..pp..........sl--hhtYLpslhshhWcYRFlapD.LscLLsRst.cLppcapp.htpch.hpph.hslhpthhst..ullp..hs............psp.lp..sLspshhllsohWlsapp..shp..pst....lscptlppGl....hphlslltPalpspuRpth............................. 0 36 71 113 +13815 PF13973 DUF4222 Domain of unknown function (DUF4222) Bateman A agb Jackhmmer:B2PP87 Domain This short protein is likely to be of phage origin. For example it is found in the Swiss:B6DZ51 Enterobacteria phage YYZ-2008. It is largely found in enteric bacteria. The molecular function of this protein is unknown. 21.40 21.40 21.90 25.00 20.90 20.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.36 0.72 -4.56 30 595 2011-04-27 13:50:12 2011-04-27 14:50:12 1 1 284 0 13 250 0 52.30 37 64.10 CHANGED shhphschY+D.ppGhhVplhussps..RVhahR.-GYpasCshPltpFpp-FphVp .........hhphsphY+D.paGssVpIpplpcp..+VhYhR.-GY.a.sChhPlhpFpc-Fp.lp.... 0 0 2 6 +13816 PF13974 YebO YebO-like protein Bateman A agb Jackhmmer:B2PQU2 Domain This short protein is uncharacterized. It seems likely to be of phage origin as it is found in Swiss:Q9MCU2 and Swiss:Q9MCS4. The protein is also found in a variety of enteric bacteria. 27.00 27.00 30.70 30.60 25.60 25.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.82 0.72 -4.20 21 513 2011-04-27 13:58:43 2011-04-27 14:58:43 1 1 509 0 34 129 0 79.70 73 86.41 CHANGED lslsllsllluLllWFFlNRASVRANEQIcLLcpll-QQKpQss.....LL.................p+Lspssp...tpst..s......custpssp.-atphIsER ...lVSlVVLLlGLILWFFlNRASSRsNEQIELLEALLDQQKRQNA.....LL.................RRLCEANE.....PEKscccss.......-S..pcusEDEDIIRLVAER.................. 0 1 5 19 +13817 PF13975 gag-asp_proteas gag-polyprotein putative aspartyl protease Coggill P pcc Jackhmmer:B8H4J3 Domain This family of putative aspartyl proteases is found pre-dominantly in retroviral proteins. 32.00 32.00 32.00 32.00 31.90 31.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.83 0.72 -4.15 40 832 2012-10-02 15:32:34 2011-04-27 16:57:49 1 76 77 0 189 1318 75 69.60 42 6.43 CHANGED tshtptsphplpsplsu.hphp.sllDoGAots..hlspphAp+L.....uhshpths........hplp...hAsupstpspshhpshh .......................h..hp-sGssTIoVhIGG..p.....hlc.pALLDLGASVN..LlPhslaKpL.....GLstLKsT................sloLp....LADpSl+hscGll-s..l.............................. 0 53 73 124 +13818 PF13976 gag_pre-integrs GAG-pre-integrase domain Coggill P pcc Jackhmmer:Q8S892 Domain This domain is found associated with retroviral insertion elements and lies just upstream of the integrase region on the polyproteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.52 0.72 -4.33 1103 1961 2011-04-27 16:15:09 2011-04-27 17:15:09 1 257 126 0 637 1866 6 69.40 23 6.66 CHANGED sLYhl..sh..........p....p.h...s.s...........s...........s...s...........s.........s...........sp........p...s...........ss...shlWHpRLG.Hhuhp...tlpcLs..p..........th.....l...sth...s...h........p.....tp.......hCcsChhuKp ............................................................a.h................................t..................................h...........s...........h.................sp.........t....s..........................ss...shlWHpRLG.Hhuhp...tlpcLhp............pth......l......tsh...s...h............p.........tt.......hCpsC.huKp................................ 0 184 260 364 +13819 PF13977 TetR_C_6 Bacterial transcriptional repressor Coggill P pcc Jackhmmer:P17446 Domain This family of bacterial transcriptional repressors is characterised by the short approximately 50 amino acid stretch of residues constituting the helix-turn-helix DNA binding motif, around the YRFhY motif. The target proteins that are repressed are involved in the transcriptional control of multi-drug efflux pumps, pathways for the biosynthesis of antibiotics, response to osmotic stress and toxic chemicals, control of catabolic pathways, differentiation processes, and pathogenicity [1]. Another target protein is BetI, an osmoprotectant which controls the choline-glycine betaine pathway in E.coli [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.28 0.71 -4.06 258 2956 2012-10-03 00:15:22 2011-04-27 17:17:06 1 10 1376 20 826 2207 181 109.00 18 54.34 CHANGED P....ppRLpullcsshs..sthst..t...thps.WlsFaupu.hpss...phtclpphh.tpchcspLtthlp.....phhs.........stcscthAptlsAllDGL...alc.ssls.........tshs..hpput...plspphlsthls .....................................................pthht...h...h...s....p.hs.......t..........shph...hh....phhut..u..h.+.pP....tl.t.pl....hp.tt....pchps.sL.s.phh.c.........pths............t...ps......p.....thu.ps....Ls....Ahl.-.Gl...hlc.hshs.......pshs....tpphh...phhpphlt....t..................................... 0 232 548 713 +13820 PF13978 DUF4223 Protein of unknown function (DUF4223) Bateman A agb Jackhmmer:B2PGT1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. These proteins are likely to be lipoproteins (attachment site currently included in alignment). 27.00 27.00 43.80 43.80 25.90 21.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.85 0.72 -4.24 6 435 2011-04-27 16:27:16 2011-04-27 17:27:16 1 1 432 0 13 47 2 55.40 89 99.28 CHANGED MpKhlKlAlluuVLo...oLTACTGHlpN+cKNCSYDYLLHPAISISKIIGGCGP....sApQ ........MNKFIKVALVGAVLA...TLTACTGHIENRDKNCSYDYLLHPAISISKIIGGCGP....TAQ.............. 0 1 3 8 +13821 PF13979 SopA_C SopA-like catalytic domain Bateman A agb Jackhmmer:B2PGH8 Domain This domain is found in the E. coli Type III secretion effector proteins SopA and NleL [1,2]. These proteins have been shown to act as E3 ubiquitin ligase enzymes. This domain contains the active site cysteine residue. 25.00 25.00 25.70 32.90 19.10 24.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -10.90 0.71 -4.65 9 234 2011-04-28 08:31:54 2011-04-28 09:31:54 1 9 189 13 6 132 0 163.80 49 24.43 CHANGED .hcscLcshFhpAhppspsplKhl--tpp.pcLtpla.ph...psh.ptapLpscHappIlshYsLsshocpcKAEILFsLusVFs+YSSSslFGsEhDSPphLRtYApALhpKAacLDPplh.opppFs-WpsRLlGpssuF....TC..TulLtspMhcHAcppFsslLpplhPsAWR .............................................................t.p+GcLpptFluAhspK+SsVKhlsDss..o+LsolF............ss.hhsptpLs....stHYppILoua.....cLsDAo..pKQAEhLFCLSssFA+YSSSslFGTEaDSP.sLRtYApALhpKAacLsPulFsS......s...ppFs-apDRhp.Ghps.sF....TC..TSVlsDsM....pRH..A+c.hFPpVLSphhPlAWt................... 0 1 1 4 +13822 PF13980 UPF0370 Uncharacterised protein family (UPF0370) Bateman A agb Jackhmmer:B2PE90 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved DWP sequence motif. 27.00 27.00 27.90 63.70 24.00 23.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.21 0.72 -4.15 13 533 2011-04-28 08:38:11 2011-04-28 09:38:11 1 1 532 0 39 98 0 65.30 86 99.09 CHANGED MpWL.ADYWWl.ILLlLlGlIlNuIK-LpRlDtK+FLcNKP-LPPHRDNNcpWDDED.DWPp..cKK+ .........MDWL.AKYWWILVlVFLVGVLLNVIKDLKRVDHKKFLANKPELPPHRDFNDKWDD.D.D.DWPK..KD..QPKK. 0 1 8 23 +13823 PF13981 SopA SopA-like central domain Bateman A agb Jackhmmer:B2PGH8 Domain This domain is found in the E. coli Type III secretion effector proteins SopA and NleL [1,2]. These proteins have been shown to act as E3 ubiquitin ligase enzymes. 27.00 27.00 28.20 29.30 26.90 26.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.64 0.71 -4.14 14 461 2011-04-28 08:38:40 2011-04-28 09:38:40 1 13 337 13 6 257 0 135.00 39 23.85 CHANGED splKschsccLlpsLspsplslsp..sh.s..LhsIhucssYlpsspIp.palpplp-sahpshsp..............hshhcppspllpshlshFs......+pP-hMlst..NusFIQhlhtshtpt..ssshpppAtcLYppYLphsplp.hhpps- .............................................h...KshhshcLVhQL..phLshssh.phlEs..h+chhSHsPYIpsslI+.SaI+ssc-shF-shhppa...........Rhscssassp.h.tFlspFs......hNptLhspp..NshFIQlIspshcus..sDth+ppAhtLYcpalppscVSPhhhp................. 0 1 2 4 +13824 PF13982 YbfN YbfN-like lipoprotein Bateman A agb Jackhmmer:B2PG20 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. Members of this family are lipoproteins. 27.00 27.00 35.70 35.60 22.20 19.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.92 0.72 -3.80 7 463 2011-04-28 08:44:07 2011-04-28 09:44:07 1 1 455 0 19 107 1 88.00 88 81.76 CHANGED QssAP.EDu+.LKpAYSACINsA-GSP-KltuCQuVLsVLKp-KtHQtFAppEoVRVLDYQpCIpAspTGNsQshsApCsKlWQEIRuNN .............QSTAPQEDSR.LKEAYSACINTAQGSPEKIEACQSVLNVLKKEKQHQQFA-QESVRVLDYQQClpATQTGNDQAVKADCDKVWQEIRSNN................ 0 1 3 9 +13825 PF13983 YsaB YsaB-like lipoprotein Bateman A agb Jackhmmer:B2PHF9 Family This family of proteins is functionally uncharacterised. These proteins are related to E.coli YsaB Swiss:Q0TBP2. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. These proteins are lipoproteins. 27.00 27.00 27.00 32.00 22.20 22.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.67 0.72 -3.82 9 447 2011-04-28 08:46:45 2011-04-28 09:46:45 1 1 445 0 15 112 0 76.10 88 78.93 CHANGED PsQKAQ+sKVSPpRoLsMEpLCK-QAA+RYNTssQKIDVTGFEQFQGSYEMRGpThRKEuFVCSFDA-GQFLHLSMR .....PVQKAQRVKVDPLRSLNMEALCKDQAAKRYNTGEQKIDVTAFEQFQGSYEMRGYTFRKEQFVCSFDADGHFLHLSMR 0 1 1 8 +13826 PF13984 MsyB MsyB protein Bateman A agb Jackhmmer:B2PRZ7 Family The MsyB protein has been found to be able to restore protein export defects caused by a temperature-sensitive secY or secA mutation [1]. However, its exact molecular function is still unknown, but it may play a role in protein export. Proteins in this family are approximately 120 amino acids in length. This family of proteins is found in bacteria. 27.00 27.00 68.40 68.30 25.00 18.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.48 0.71 -4.08 7 458 2011-04-28 08:52:19 2011-04-28 09:52:19 1 1 455 0 27 99 2 121.30 91 97.87 CHANGED MYsTL-EAIDAAREEFLAsp.t.--D-ssV.sQFNLQKYVMQDGDIMWQAEFFtDEs-pGEClslhSGtAAQAIFDsDYDElElRpEW.sENTLHEWD-GEFQL-PPLDoEEGcAAAsEWD-c .MYATLEEAIDAAREEFLADNPG.I.DAE-.ANV.QQFNuQKYVLQDGDIMWQVEFFADEGE.EGECLPMLSGEAAQSVFDGDYDEIEIRQEWQEENTLHEWDEGEFQLEPPLDTEEGRsAADEWDER. 0 1 3 15 +13827 PF13985 YbgS YbgS-like protein Bateman A agb Jackhmmer:B2PG89 Family This family of proteins is functionally uncharacterised. The family includes the YbgS protein from E. coli Swiss:P0AAV6. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. Some members of this family are annotated as homeobox protein, but this annotation cannot be verified. 27.00 27.00 28.90 28.60 24.50 22.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.71 0.71 -4.22 9 467 2011-04-28 08:58:10 2011-04-28 09:58:10 1 1 463 0 22 92 0 123.50 85 98.29 CHANGED MsKLAoLFLTAshoLASGAALAADosuposNGpANuAADAGtVAPsAKpNlAPNNVDNopINTsss.........Go.hpsss....ohspcsMopDElHKNoMCKDG+CPDhNKKVpsttu.ss-ssTKTDGTTQ ....MTKLATLFLTATLSLASGAALAA.DSG..AQ...oNNGQANAAADAGQVAPDAREN..VAPNNVDNNGVNTGSG....................GTM.LHsDG.......SSMNNDGMTKDEEHKNTMCKDGRCPDINKKVQTGDGINNDVDTKTDGTTQ................................................... 0 1 2 12 +13828 PF13986 DUF4224 Domain of unknown function (DUF4224) Bateman A agb Jackhmmer:B2PT37 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 50 amino acids in length. The protein is likely to be of phage origin and is found as protein Gp02 Swiss:C8CLF5 in the Xylella phage Xfas53 [1]. 21.30 21.30 21.30 22.20 21.20 19.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.11 0.72 -4.41 38 290 2011-04-28 09:01:39 2011-04-28 10:01:39 1 2 222 0 30 167 2 46.30 37 53.99 CHANGED thLTccElt-LTGh+.......ptupQtchLcpp.GI.ahhpscG+PlVsRpalp ....hLTcsEltpLTG.hc................hto+QpchLpcp.GlsFhhst.s.G...cPlVsRphh.................... 0 4 16 28 +13829 PF13987 YedD YedD-like protein Bateman A agb Jackhmmer:B2PRC7 Family This family of proteins related to the YedD protein is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. These proteins are lipoproteins. 27.00 27.00 33.90 63.30 21.60 18.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.41 0.72 -4.49 10 460 2011-04-28 09:05:55 2011-04-28 10:05:55 1 1 457 0 23 121 2 110.60 86 80.58 CHANGED VVKTPAPAtLsGYWQopGPQSuLVSP-AIASLlVTp-GDTLDCRQWQRVIAhPGKLTptsD-hhNVTpKh-VYsLEh-GssLEYDGMTLpRVDRPTsECtphLsKt....PLssP ..VVKTPAPDWLsGYWQTKGPQRALVSPEAIGSLIVTKEGDTLDCRQWQRVIAVPGKLTLMSDDLTNVTVKRELYEVERDGNTIEYDGMTMERVDRPTAECAAALDKAPLPT.P........ 0 1 2 13 +13830 PF13988 DUF4225 Protein of unknown function (DUF4225) Bateman A agb Jackhmmer:B2PH01 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 182 and 282 amino acids in length. 24.10 24.10 24.40 24.60 23.60 24.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.99 0.71 -4.79 24 185 2011-04-28 09:36:09 2011-04-28 10:36:09 1 5 122 0 37 140 1 160.80 32 59.07 CHANGED Ippl+pEppsLccQsphlpppphp.hh.hph............cppphhthhltulGlVuGGsQlhuGhulh...hso......tsshsthhGusLlscGsNslhEsshtlh...tcp..........shpG.l+cuYcpsAp.hlGhscpsushsYssVDluhShYGhhphh........................lp......P..ssaRLa+.YlssDah+thpp.....MopstL ...............................p.lp.E.tphppp.phhpttphchalpsch............EcpplIsYVhtulslVs...uGhQlVuGsGhl...hss......tssluhhsGshLlhcGANslhEuhthLh.....hsc..........psoG.lccsYtssAc.hhGhsp.psu.huYpslDluhShYGhhphh................................h+......P..sshRLa+.YlssDahpphpphsp............................ 0 1 11 18 +13831 PF13989 YejG YejG-like protein Bateman A agb Jackhmmer:B2PI82 Family The YejG protein family is a group of functionally uncharacterised proteins related to Swiss:P0AD21. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 27.00 27.00 58.50 58.40 20.30 19.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.27 0.72 -3.98 16 534 2011-04-28 09:51:31 2011-04-28 10:51:31 1 1 532 0 39 115 1 106.00 82 93.71 CHANGED lQLSVVHRLPQSYRWloGasGsKVEPlP..sshss-ssLlGLKLLSHDGssAWplMcpLppoLs-IQlssullEWEGEPCLFl++pDESAshCRLKNhGVAIAEshsu .......LQLSIVHRLPQNYRWSAGFAGSKVEPIPQ.NG...s.s............s.DNSLVALKLLSPDGDsAWSVMaKLSQALSDIEVPCSVLECEGEPCLFVNRQDEFAATCRLKNFGVAIAEPFSN.. 0 1 9 24 +13832 PF13990 YjcZ YjcZ-like protein Bateman A agb Jackhmmer:B2PJH4 Family This family of proteins is functionally uncharacterised. The family includes the YjcZ protein from E. coli Swiss:P39267. This family of proteins is found in enteric bacteria. Proteins in this family are approximately 300 amino acids in length. There are two conserved sequence motifs: FGD and MPR. 27.00 27.00 28.10 27.60 22.70 22.30 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.60 0.70 -5.26 10 540 2011-04-28 09:57:24 2011-04-28 10:57:24 1 1 319 0 13 209 1 258.80 62 90.92 CHANGED hVDLApG.-ss+sththtppp.pFRpRLppplhupophRpashsushupsLthsLpLlEcLsu.LsPGHLALTplsc+LspLppssustAphossltpQLtsLs-paspRssahEcsLsptsLhspAtcHsEQIFoRWRAGcYsuWSLsGRCYlALEELRWGAFGDACR.LussshsshLlDsLRs....cAsptLApslpAuPsTRHaYHpWLs.ssstsuss-asDhLuWLGDWCssD+HPVsWSVTQpWQsVuLGMPRLCSAcRLu-AMVEEIF ...............lVDhApG.Dst..p...Atpp..pFhcRLhptLhucsthpp.s.su.LstslphpLphlpcLso.LsssphAlTpls.........p+lu.L.p..stR.tph........s-h.pQlhsLh-.hpt+hpahEctLpphs.l.pAt.p.EQIFohWpAGt.YsuaS.sGRCalsLEELRWGAFGDshR..Gps.ttVs.LLs.LRh....KAhpplApp.suusTsRh..hhpWhu.pup......sss-atDhlsWLGcWsot-+pPVhWSsTQthpplsltMPRLCSApRLutuMV-EIF.................. 0 1 2 6 +13833 PF13991 BssS BssS protein family Bateman A agb Jackhmmer:B2PS04 Family The BssS protein family is a group of proteins that are involved in regulation of biofilm formation [1]. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 40.70 40.40 26.80 22.00 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.32 0.72 -4.25 16 542 2011-04-28 09:57:45 2011-04-28 10:57:45 1 1 527 0 44 132 1 71.10 83 86.86 CHANGED HPlVGWDISTVDuYDAhMlRLHYLSotpQs.EsApVscThWLTTDlA+QLIsILpAGIsKIEou-htssshp+ .HPLVGWDISTVDSYDALMLRLHYQTPN+sE.pE.GTEVGQTLWLTTDVARQFISILEAGIAKIESGDa.sNEYR.R...... 0 2 10 27 +13834 PF13992 YecR YecR-like lipoprotein Bateman A agb Jackhmmer:B2PR21 Family The YecR-like family of lipoproteins includes the YecR protein from E. coli Swiss:P76308. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 110 amino acids in length. 27.00 27.00 27.20 43.20 26.70 24.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.72 0.72 -3.81 19 390 2011-04-28 10:03:24 2011-04-28 11:03:24 1 1 385 0 20 105 2 73.10 73 71.33 CHANGED ssuSpssGlVcLuYp.....uhhpss.psDthtutthAspcCppWGYssAcsFGpss....psCshhsGph.....ChspplTlpYQC ..S.EssssoGIVRLsY-....QAaLQ+A..+TDcYVS+GlA-RACQQ.GYocAlPFGQPV....GsCSLaAG.SL.....CLNTcFTLSYQC... 0 2 2 12 +13835 PF13993 YccJ YccJ-like protein Bateman A agb Jackhmmer:B2PKP7 Family The YccJ-like family of proteins includes the E. coli YccJ protein [1] Swiss:P0AB14 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 75.10 75.00 25.20 24.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.24 0.72 -3.90 6 468 2011-04-28 10:07:47 2011-04-28 11:07:47 1 1 467 0 26 65 2 68.90 92 92.19 CHANGED KsHHVuEWAsVRcTSsEIAEAIFElAphDEpLAEcIWE.EGSDEVL.hAFuKTscDpLaWG-cTlERKNV ...KAHHVGEWASLRNTSPEIAEAIFEVAGYDEKMAEKIWE.EGSDEVLVKAFAKTDKDSLFWGEQTIERKNV.... 0 1 3 15 +13836 PF13994 PgaD PgaD-like protein Bateman A agb Jackhmmer:B2PKR8 Family This family includes the PgaD protein from E. coli Swiss:P69432. The homopolymer poly-beta-1,6-N-acetyl-D-glucosamine (beta-1,6-GlcNAc; PGA) serves as an adhesin for the maintenance of biofilm structural stability in eubacteria. The pgaABCD operon is required for its synthesis and export. It has been shown that PgaD is essential for this process [2]. 27.00 27.00 33.80 33.70 25.60 24.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.70 0.71 -4.69 34 382 2011-04-28 10:23:42 2011-04-28 11:23:42 1 4 353 0 38 146 4 123.60 45 83.02 CHANGED LIIspp+hhs..p+hhshhlThhhWshalaLhhsh...hlhhhlsht.hht.h..htshtsthtslthahhlhlhsuslLlhWApYNphRapscp++ph.....ls.c-lAppasls..pptlpphpps+lhs.la.aD-cGclhp .......LIIspppp.s..phhlDhlsTslhWshFhhhlhhhh..llht.............ahapSputopLpaYhLLAlANAVVLIlWAhYN+L...RFQcpp++Ashp.....hospEY.....AcSLAlP...-ELhQQLQKS++hoVHasspGpIp.h.......... 0 8 20 28 +13837 PF13995 YebF YebF-like protein Bateman A agb Jackhmmer:B2PQW6 Family The YebF-like protein family appears to be a group of colicin immunity proteins. As well as YebF the family includes cmi, the colicin M immunity protein [1]. This domain family is found in bacteria, and is approximately 80 amino acids in length. The alignment contains two conserved cysteine residues that form a disulphide bond in the solved structure [3]. 27.00 27.00 28.60 28.90 25.90 25.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.81 0.72 -3.97 23 531 2011-04-28 10:25:30 2011-04-28 11:25:30 1 2 485 4 22 151 1 88.40 73 73.86 CHANGED RssKhsuCssLspsQlAApVKcDFLQNRIsRWssD+.KtLGpscP.VsWlssp-I...sGc-ssapVPLTVRGs+sD+pYpVhlDCpsGTIoYs ....p.SVpFPpCEGLDAAGIAASVKRDYQQNRlsRWADDQ...KlVGQADP.VAWVslQDI...pGK.DDKWoVPLTVRGKSADIHYQVsVDCKAGhAEYp..... 0 1 6 14 +13838 PF13996 YobH YobH-like protein Bateman A agb Jackhmmer:B2PQU4 Family The YobH-like protein family includes the YobH protein from E. coli Swiss:Q2MB16 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There are two conserved sequence motifs: GYG and GLGL. 27.00 27.00 31.60 31.40 20.10 17.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.22 0.72 -4.05 15 454 2011-04-28 10:29:34 2011-04-28 11:29:34 1 1 451 0 30 97 1 69.00 76 88.65 CHANGED +hlIRslllLAllWlulLhSGYGVLlGSpcNsAGLGLQCpYLTARGhsTAQYlHoDSGlIGlo-CPLLRK ..RhIIRslhLlALVWIGLLLSGYGVLIGSKcNAAGLGLQCpYLTARGTSTsQYLHTcSGhlGI.oDCPLLRK.. 0 1 6 18 +13839 PF13997 YqjK YqjK-like protein Bateman A agb Jackhmmer:B2PL82 Family The YqjK-like protein family includes the E. coli YqjK protein Swiss:Q47710 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a single completely conserved residue R that may be functionally important. 27.00 27.00 28.00 27.50 25.50 25.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.27 0.72 -3.80 39 560 2011-04-28 10:34:02 2011-04-28 11:34:02 1 2 555 0 59 168 6 72.30 73 73.50 CHANGED LLppIpQQRh-LusptppWhchTsshDRuWpplh....phRpahh.luuulhAlhulR+.Pp+lhRau+R.uhusWushR ....LLSQIQQQRLDLSAS..R..R-WLEsTGAYDRtWNhLL....SLRSWAL.VGSSVMAIWTIRH.PNMLVRWARR.GFGlWSAWR....... 0 6 25 41 +13840 PF13998 MgrB MgrB protein Bateman A agb Jackhmmer:B2PQU3 Family The MgrB protein is a short lipoprotein. The mgrB gene has a mg2+ responsive promoter [1]. Deletion of mgrB results in a potent increase in PhoP-regulated transcription [3]. The PhoQ/PhoP signaling system responds to low magnesium and the presence of certain cationic antimicrobial peptides. Over-expression of mgrB decreased transcription at both high and low concentrations of magnesium. Localization and bacterial two-hybrid studies suggest that MgrB resides in the inner-membrane and interacts directly with PhoQ. This domain family is found in bacteria, and is approximately 40 amino acids in length. There are two conserved sequence motifs: CDQ and GIC. 27.00 27.00 33.40 49.50 19.30 18.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.88 0.72 -7.32 0.72 -4.17 14 404 2011-04-28 10:41:38 2011-04-28 11:41:38 1 1 401 0 18 57 0 29.00 90 61.90 CHANGED LYLlAlsshCDQG.tpFh.GICsITcalPa LWAQVFNMMCDQD.VQFFSGICAINQFIPW 0 1 2 10 +13841 PF13999 MarB MarB protein Bateman A agb Jackhmmer:B2PLW6 Family The MarB protein is found in the multiple antibiotic resistance (mar) locus in Escherichia coli. The MarB protein is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved GSDKSD sequence motif. 27.00 27.00 28.10 28.00 21.50 20.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.06 0.72 -4.39 10 439 2011-04-28 10:46:09 2011-04-28 11:46:09 1 1 435 0 13 87 1 65.30 69 90.91 CHANGED uuAslsLLlLsSGQuhAEQTppPsspsscDslllPsupcQSPFDLNHMuAGSDKSDELGVPYYNp+ ..SAIAuALILFSAQGVAEQTsQPVVTSCusVVVVPsSQEQPPFDLNHMGTGSDKSDALGVPYYNQp... 0 1 1 8 +13842 PF14000 Packaging_FI DNA packaging protein FI Bateman A agb Jackhmmer:B2PPD2 Family This family includes the lambda phage DNA-packaging protein FI. Proteins in this family are typically between 124 and 140 amino acids in length. There is a conserved EEE sequence motif. 27.00 27.00 29.10 28.90 22.30 21.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.39 0.71 -3.93 13 521 2011-04-28 10:50:25 2011-04-28 11:50:25 1 3 289 0 3 215 0 120.80 49 94.02 CHANGED TKEc.lpRLcELAshLGREsDhSGSuA-lAQRVAEhEEEls.............suss-ssssccss.supp-psssspsp.....tsps-hVpVcsLtoLHhsAlcsssscsV.phV.sGpslhVsussAsshlssGLA .......TK-E.ltRLcpLut.LsR-ssloGottElA.RVAEhEEELs.............suupDs.susE...huccs...pssosps-pl.......psousLssVhsLssLHssulcsppsEPV.thVhsGpshhVsuulAspMs-+GhA................................... 0 0 0 1 +13843 PF14001 YdfZ YdfZ protein Bateman A agb Jackhmmer:B2PLX9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved YDRNRN sequence motif. The E. coli protein has been shown to bind selenium [1]. 27.00 27.00 27.40 27.40 21.90 20.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.06 0.72 -3.70 11 477 2011-04-28 10:55:07 2011-04-28 11:55:07 1 2 463 0 21 77 1 63.40 78 94.39 CHANGED +sYDRNRNAIosGsRVMluuoGpsGVIKAIHu-GhospQlRRuKsVplcGs-t+asPl-LlRLG .....pTYDRNRNAITTGSRVMVSGTGHTGhIhuI-oEGLTAEQIRR...GKTVlVEGCEEKhAPlDLIRLG........... 0 2 6 12 +13844 PF14002 YniB YniB-like protein Bateman A agb Jackhmmer:B2PNH1 Family The YniB-like protein family includes the E. coli YniB protein Swiss:P76208 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 180 amino acids in length. This family of proteins are integral membrane proteins. 27.00 27.00 30.40 30.00 25.10 25.10 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.86 0.71 -4.79 20 546 2011-04-28 10:57:37 2011-04-28 11:57:37 1 1 541 0 42 162 4 162.50 80 91.65 CHANGED lhGWllFIPALlSTlISllphhat+u-ctpGINAVh.DFh+lMl-MlRFNTPFLNhFWpNSPlPsasth..uuuNlhFalIYlLIFVGLALpASGuRMuRQl+aIREuIEDQLILEpAKGs-GhTRppLEp+IslPR.HTIFlQhFsLYlLPlllullGYhhlKLLGh ..IlGWVIFIPALlSTLISLLKFM.s+pEpQEG......INAVMLDFTHVMIDMMpsNTPFLNlFWYNSPTPNFs.....GGlNlMFWlIFILIFVGLALQDSGARMSRQARFLREGVEDQLILEKAKGpEGLTREQIESRIVVPH.HTIFLQFFoLYILPVIsIssGYhFFSLLGF................................ 0 2 10 25 +13845 PF14003 YlbE YlbE-like protein Bateman A agb Jackhmmer:O34958 Family The YlbE-like protein family includes the B. subtilis protein YlbE Swiss:O34958 which is functionally uncharacterised. This family of cytosolic proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There is a conserved WYR sequence motif. 27.00 27.00 43.10 42.90 22.90 18.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.19 0.72 -3.89 14 143 2011-04-28 13:02:23 2011-04-28 14:02:23 1 1 143 0 20 63 0 64.70 64 81.46 CHANGED p.-L+pFIRppPhWYRpLoRpPpplsshEh-AhpaYcKTlPc+V-+hssslpMApMMhpMhpAM+ ..D.EDLsRYIREQPaWYRKLoRNPEEhEAFELAAMpHaKKTIPDKVEKFQNQLulASlMI-MFQhMK... 0 3 12 14 +13846 PF14004 DUF4227 Protein of unknown function (DUF4227) Bateman A agb Jackhmmer:C0H451 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 33.30 33.10 26.50 26.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.28 0.72 -4.14 18 158 2011-04-28 13:05:32 2011-04-28 14:05:32 1 1 157 0 29 79 0 70.90 58 90.84 CHANGED +plhcslKVFlLFTGsTlLFYYullWlscEYpsYHRYDEPcGsAVKVhphpssp.ph..sa.h-RLhhFYhsGE .........KhsaDuIKVFLLFTuCTILFYaAILWlN-EYEsYHRY-KPKtcsVchV..ous..c....EPsKD....ua.lsRhhFFYcNGE. 0 7 19 21 +13847 PF14005 YpjP YpjP-like protein Bateman A agb Jackhmmer:P54172 Family The YpjP-like protein family includes the B. subtilis YpjP protein Swiss:P54172 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 200 amino acids in length. 27.00 27.00 56.80 56.00 25.50 18.40 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.89 0.71 -4.40 15 144 2011-04-28 13:07:43 2011-04-28 14:07:43 1 1 143 0 23 80 0 136.90 68 68.73 CHANGED ssc.spppahct.lhppA.cQSh.hKFGsKIuPhIE-Ea+chILPKlEcsIs-hlsphs---.LppLslS-sPuuGpGEKIFHlYsp+TG-DllRFHVRRD+PPpcGYaFNFHYHot-DsFpsHHELGsIYWcKNTPPpW ........s.+LTsDTFIsY.AMQEAEKQSM.pKFGoKIGPVIEDEFKDVILPKIEEAIAELAs-VPE-.S.LQSLA.ISpKP.AGGNsEKIFHVYDTKTGsDLLRFHVRRDHPPQDGYYFNFHYHcaDDGYouHHELGsIYWNpNsPPKW... 0 6 15 17 +13848 PF14006 YqzL YqzL-like protein Bateman A agb Jackhmmer:C0H452 Family The YqzL-like protein family includes the B. subtilis YqzL protein Swiss:C0H452 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 27.00 27.80 26.30 26.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.54 0.72 -3.76 20 174 2011-04-28 13:10:03 2011-04-28 14:10:03 1 1 174 0 37 81 0 44.00 51 92.99 CHANGED DFoWKlFupTGsIDoYLLaKE....lEccsc.p.sppppE...pchs...shp ...DFTWKhFSpTGSI-TYLLhKE....hE+-sp-ch-pcE-E..hsclD.sh.s............. 0 18 29 31 +13849 PF14007 YtpI YtpI-like protein Bateman A agb Jackhmmer:O34922 Family The YtpI-like protein family includes the B. subtilis YtpI protein Swiss:O34922 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 101 amino acids in length. 27.00 27.00 36.20 35.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.66 0.72 -4.06 24 163 2011-04-28 13:12:27 2011-04-28 14:12:27 1 1 162 0 30 111 0 80.50 53 87.64 CHANGED hlhlIlhShshYlYaKl+t.hRopcshc+phasuKusluLGlFlhhFulNQhhl...hpoTlshlluhlFlllGhushatGa+sa+HYhPhht ...........................................hhlhhoh.hhhaaph+..h+st...shEK........tahSuKSuMALGoFVLFFGINQaFL...phSTsclI.VGllFlLhGuuSlasGaRpYKHahPLA.l...... 0 9 21 24 +13850 PF14008 Metallophos_C Iron/zinc purple acid phosphatase-like protein C Coggill P pcc Jackhmmer:Q9LMX2 Domain This domain is found at the C-terminus of Purple acid phosphatase proteins. 23.00 23.00 23.00 23.00 22.70 22.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.93 0.72 -3.89 130 888 2011-04-28 16:05:58 2011-04-28 17:05:58 1 20 198 20 604 904 23 64.20 29 12.89 CHANGED pAPlaIlsGsuGs.....th...ss.....hs..p....s....p...ssao.s.h....cp...s.caGauplphhNpTcLpacalc.s..p...s.........G..p..V..hDph .....................................uPlalshGsu...Gs...........hh...sp...................hh.p...........s....p...PsaS.sa.........Rp....s.saGaupLplh.NpT....+hhapahc.s.p...D.........G..p..l..hDp................. 0 189 377 518 +13851 PF14009 DUF4228 Domain of unknown function (DUF4228) Coggill P pcc Jackhmmer:Q8GY70 Family This domain is found in plants. The function is not known. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.22 0.71 -4.14 56 732 2011-04-28 16:07:15 2011-04-28 17:07:15 1 4 27 0 458 690 0 161.80 16 86.36 CHANGED MGNslsssh.......stssssss+llh..sGplcch..pt....sls..AuElhtphPsHh................sss....tp...s....s.....pclt.........sLssDccLphG..clYallPhpp.htst.hstps....hs..plt....ht..sspstpp........htttpsp.h...............t........ppssss.sp.h.......hh...hhs.cppl...pchh.............p..pt.........sspt..hsp.h......tptt............hp...ppp....p.tspspsW+PpL-oIsE ......................................................MGst.t......................t.tss+lht.........s...........G.plpch......th.............s..hp..suclh....t.t........P.s.p.h.l.............................stsps..ht...h..........s....................tph................hl.t..scppLp.G..plYall.Phpp.h...p.....hpttp.............hs..tht.........ht......sttthtt.....................ttt.tp.h..............................................................tttst...ht.......................t.h....tphh........................................t.....................t...................................................................................h................................................................................................................... 0 45 266 375 +13852 PF14010 PEPcase_2 Phosphoenolpyruvate carboxylase Coggill P pcc manual Family This family of phosphoenolpyruvate carboxylases is based on seqeunces not picked up by the model for PEPcase, PF00311. Most of the family members are from Archaea. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.85 0.70 -6.19 27 105 2012-10-10 15:06:27 2011-04-28 17:21:18 1 1 102 8 53 798 438 495.80 36 99.07 CHANGED IPRsMuTQHPDNs.hP.a.s.psshlsu-DElpEAahsao.....thGs-EhMWDaEGK-sDpaVl+KLlopY.caFpcphLGcDlaLThRlPNPplEpsEtKlLsEshtsIspsaDhuchh.....t.sssPIFEVILPMTsssccl.plhchYcchlt.pt...h.s....htlcEhlG-h.PcpIcVIPLlEDtsuhlpscpIlpcYh....ctt..c.pahRVFLARSDPAhNYGhluAsLhsKhALpclhclpc-hulplaPIlGsGSsPFRGphsPtslpps.lpEYsGlhTaTlQSAF+YDashccVhculcplpp.tphppst.lsp-c...hhplhpphoppYppplcplAshlNplAphlPpRRpRKLHlGL.FGYuRsh.ssl..............................sLPRAIsFsuuLYSlGlPPELlGhusL...sc...cch......-hlpcsa.tltcDLphAscahN.-........ssthhhstctlhtlp-th-hhtp.............pcHp.hsppllpshppt.....................hcshllchuplRtFLG .......................................................................................................................................IP+sMuTQHPDNsthP.a....s..psshIsup-ElpEAahuao.....tLGs-EhMWDaEGKcsDtaVlc+LhopahcaFppp.LG+DhFLThRlPNsphEcsp..pplhspshtslsps..hDhAchh......t.sttPlFEVILPMoposcplhp.l..t.chapchls..................hthc-..a.h.s.ch..sc.pIclIPLhEDhsuhLpsccIlccYh.........cph.......c..cYhR.......VFLARSDsAhsYGhluulLusKhALsclhchscchulpIaPIlGsGSsPFRG..s..L..o.P..cs..l.-ch....l....pE...Ys....G......lp.ThTlQ.....S....A...F......+...Y...D......a.....s.......h.....-.....c.....V.......p..p....A.....lpp....l.......ps......hph.....sps.....p.lscc-.....thhhclhp.phu.ppYppplcplssslspluphlPcRRcR+hHlGl.hGYSR.s.l.ssl..............................sLP..RAIsFTu......uhY.S.lG.l.P.PE.l.l....Ghu...pl..p..sc....cch......-hlhchYss..h..+cDlphAu+ahsh-............hsthhhspcshtcl...cEDlchhpp..hth........ppcH.sthsppllphhct.h....................hpphIhchAplR+hLG........................................................................................................................................................................................................................................................................... 0 17 31 43 +13853 PF14011 ESX-1_EspG EspG family Bateman A agb Jackhmmer:A5TZ16 Family This family of proteins contains the the EspG1, EspG2 and EspG3 proteins from M. tuberculosis. These proteins are involved in the ESAT-6 secretion system 1 (ESX-1) of Mycobacterium tuberculosis which is important for virulence and intercellular spread [2]. Proteins in this family are typically between 254 and 295 amino acids in length. 27.00 27.00 29.20 27.90 25.80 25.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.35 0.70 -5.12 66 505 2011-04-29 10:18:57 2011-04-29 11:18:57 1 2 116 0 134 371 0 245.10 22 90.72 CHANGED Lossp......h.hLh-.thuhsp.hP................hsLsls..s.hss..tscRsshpppshs......pLp..ttGlhsst..plcsplsshlpsLscPshpl-sthhh.................sup....hRsh..............sutpss............psVlA..spss.......stlslpsht........ssuLssslsssLss...ss.Pup.hpslolPssphtpstp............t.tsst.htshlpphGl...sssssps.ltphhspspstsu.husptp.sst.......pspsssslshhD..Tsp.GRhlspsppu.ssp...W.solsPus.spplsptlppL ..........................hossshhhltp.hhshsp.hP................hsLslp....shhss........pspctthtcpshs..........pLs.thGllssp..t.....hsstltshlclLspP-hpltsthht................................ssp........hRsl.................lupcus............pt..VlA..hRsu.......shlslpsss.............................spsLsshlsssL....ss.............ss..PAs.hpslolstpphtcssp...........................s...ss.lpphul................ssssht...s.ltphhssspststhlusppc...sss..........ptpssssl.ulhD..oss.GRll.sts..p..puhsuc...a.hshsPuo.sttlttulpp......................................................................... 0 39 97 124 +13854 PF14012 DUF4229 Protein of unknown function (DUF4229) Bateman A agb Jackhmmer:A5TZR1 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 122 amino acids in length. 27.00 27.00 27.60 31.20 26.60 26.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.33 0.72 -4.08 62 283 2011-04-29 10:23:44 2011-04-29 11:23:44 1 1 267 0 100 247 24 68.30 33 61.31 CHANGED hlhYshsRLsLhsslssl...lh.hluhhhh.....shhluslhAllluhsLShhlhpphRpcsstsluth....sppR .....hlhYshuRLhLhlslsul...lh.hl..uhhhsh......hsllluslhALllAhPLShhlFpslRccsotslAthscpR.......... 0 29 76 95 +13855 PF14013 MT0933_antitox MT0933-like antitoxin protein Bateman A agb Jackhmmer:A5U0U8 Family This family of proteins contains the MT0933 protein Swiss:O05901 which has been identified as an antitoxin to /protein MT0934 [1]. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 90 amino acids in length. 27.00 27.00 27.20 28.90 26.90 26.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.50 0.72 -3.86 58 385 2011-04-29 10:29:15 2011-04-29 11:29:15 1 4 326 0 102 249 0 55.50 39 74.87 CHANGED lhD.............K..uKchlspp....t...........-plc....pGlDKAu-hlD..........cKTGGKYucplDpup-tAcctls ..................................hhD...........KAK-hlspp.....t...........Dpl-....pulDKAGchVD..........c+TsGKauDpIDpup-us+ctls....... 0 36 78 98 +13856 PF14014 DUF4230 Protein of unknown function (DUF4230) Bateman A agb Jackhmmer:C7QH51 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 203 and 228 amino acids in length. 27.00 27.00 28.30 27.00 25.50 25.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.01 0.71 -4.36 170 626 2011-04-30 09:44:43 2011-04-30 10:44:43 1 1 536 0 162 520 25 157.40 18 73.09 CHANGED lpplcslucLtshphphpplhshpsppphh.................h.hspp+hlhlhsu..plpsul.DLsclp.p..lph........pscp..................lplp.LPpsclh.ssp...lD...tp..hpha.sppps....hhst..........p.p.-hsphhppu.cpphtcpsh..pss.lhppAc...ppupphlptlhps....hth.......pphplph ....................................................................pplcplucLsshphthpcllshccsthhhh.....................hshsc+phhllhpuplph..sl.DL.pchs.t.lpl........sscp........................lplp.lPp.sclh..psp..lD....tp.....hcha..cppsu......hhs...............hs.p.-tsphhpcA...ccplpcpsh....pps..lhppAccsApsh...lpslhps....hsh.......pphph.................................. 0 70 128 153 +13857 PF14015 DUF4231 Protein of unknown function (DUF4231) Bateman A agb Jackhmmer:C7PX93 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 148 and 288 amino acids in length. 24.70 24.70 24.70 24.70 24.30 24.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.35 0.71 -4.08 76 408 2011-04-30 10:14:04 2011-04-30 11:14:04 1 3 361 0 94 240 33 109.30 30 56.78 CHANGED .shhphhcpputpspptah.......hhphhpllh.hhuuh.....lsh....................h.......................tt.......shthhphsss..llus...hhshhsulhshh........phpcp....WhphRsssEtl+ppph....tah......................hpss.at..........ssppstphh ................................ss...lsaa.++usppR+cY+........lL+lspIlsuhLhAl.....IPh..................................................................sscphpllsl......sLSu...lshlspulhsla........sh+-s....WhsapcTAphLc+E+a..lYt.............sp.sp.Ys......sppc..................................................................... 0 23 60 84 +13858 PF14016 DUF4232 Protein of unknown function (DUF4232) Bateman A agb Jackhmmer:C7PXZ8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 177 and 242 amino acids in length. Many members of this family are lipoproteins. 27.00 27.00 27.40 27.30 26.00 26.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.51 0.71 -4.49 69 475 2011-04-30 10:33:41 2011-04-30 11:33:41 1 4 262 0 148 421 0 132.80 20 63.79 CHANGED CssssLshss.tssps..usGtp....hhhlslsNsuspsCsLpGa.Pu.Vshh.sssGs..............sssuscpss...........ssssVsLsPGpsAhAslthss..ssssss..........tssssss....ltl.tsP.ss........sssl.......tlshsss.........................................ssssplpVsshpss ...................Ctsstlphsh...s.ss...pu.......uhGtp..hhhLslp.N......suupsCtLsuh...Ps.lphh...sssGs.............hss..s..sttpss.............sspslsL.sPGpsstsslpats..ssssss..............shtstt...lhlh.s..st.........tsh.........l.h..h........................................................t................................................... 0 35 120 146 +13859 PF14017 DUF4233 Protein of unknown function (DUF4233) Bateman A agb Jackhmmer:C7Q8C9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 122 and 147 amino acids in length. Proteins in this family are integral membrane proteins. 27.00 27.00 38.20 38.10 23.30 21.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.67 0.72 -4.12 61 349 2011-04-30 10:53:39 2011-04-30 11:53:39 1 1 349 0 92 219 71 105.30 33 79.73 CHANGED Rs.lhAusLlhEulVlhLAhhVshplss.....h......sssshshssslslshllhsul.p+.sWulhlshsLQls.llsushlhPuhhhlGllFuhlWhhslhhtpclccchtct ....tVhAusLhhEsIVlhLAlPVhhtVuu..........h.........sshuhshssshsVlhllhush.tR.s.....WulhlshsLQlh.hlhushlhPsh.....hhlGllFuAlWhhlhaLt+clcpp.t......... 0 28 70 88 +13860 PF14018 DUF4234 Domain of unknown function (DUF4234) Bateman A agb Jackhmmer:C7Q4E0 Family This presumed integral membrane protein domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 70 amino acids in length. 23.70 23.70 24.00 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.76 0.72 -4.24 77 247 2011-04-30 12:47:06 2011-04-30 13:47:06 1 8 203 0 77 217 14 81.90 22 50.02 CHANGED Rslshhll.Loll.......ThGIYslaWhhphsc-......................lsth..ssct..shshshhllls......................ll............................TsGlashaWha+hupcl.pthptp ............................hslhhhll.Loll.............ThGIYslaWhhphsc-.............................lsth...stct..thshhhhllhh.................................hl.............................................Thu..lh.lhWhaphuptl.t.....tt.................................................................................................. 0 32 51 61 +13861 PF14019 DUF4235 Protein of unknown function (DUF4235) Bateman A agb Jackhmmer:C7PYA5 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 88 and 119 amino acids in length. 27.00 27.00 29.10 27.00 25.70 26.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.60 0.72 -4.13 66 301 2011-05-01 09:52:26 2011-05-01 10:52:26 1 1 285 0 89 219 10 78.20 30 77.74 CHANGED aKhluhusuhsuGhlAs+lhptlWcps..sG.ccsP...csp...............................D.ctuhtcslsaAslpGulhull+shssRuuApshp+hssp ..........aKshuhAlusluGhlupKlhptlW+hl...oG..ccsP....pss...............................D...-huhscsLsaAslSGsh..hAssphhscRtss+tat+hs..s........... 0 32 67 82 +13862 PF14020 DUF4236 Protein of unknown function (DUF4236) Bateman A agb Jackhmmer:C7PYP4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 69 and 402 amino acids in length. 27.00 27.00 28.70 28.40 26.90 26.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.71 0.72 -3.85 88 345 2011-05-01 10:59:28 2011-05-01 11:59:28 1 20 326 0 69 260 50 54.00 41 22.01 CHANGED a+FRKol+luPG.l+lNlSKsGl.ShS..lGs+.Ghplsh..stpG.htsosulPGTG..loYpp ........h+FRKSlplsPG.l+lNlSpuGs.ShS....lGs+..Gs.pl.oh...ss+G.thsslulPGTG..LSY+........... 0 24 47 65 +13863 PF14021 DUF4237 Protein of unknown function (DUF4237) Bateman A agb Jackhmmer:C7PYW1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 123 and 781 amino acids in length. 27.00 27.00 27.80 28.30 25.90 25.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.20 0.72 -3.71 68 319 2011-05-01 11:03:08 2011-05-01 12:03:08 1 23 246 0 96 292 2 94.90 31 19.49 CHANGED pssLssGp.hlDRaG............s.........stGsFluPt.....GssaspRuLP...Pssh..........spsY+hYc.......Vh+.....shs.l...................................htGslA..PWF.sQPG..GGsQ......a...................hh.....stslppLlppG.......hLccls .......................p...l..Gs.hlDRaG............sstGpalush.....sssa..tpRuLs.sssh.............tpsY+hYp.......Vhc......sh...l..........................................................htG.lA..PaFuQPG..GGhQ.h.................ph................hslppLlcpG....hLcp.................................. 0 27 56 81 +13864 PF14022 DUF4238 Protein of unknown function (DUF4238) Bateman A agb Jackhmmer:C7Q0A2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 274 and 374 amino acids in length. 27.00 27.00 27.30 27.20 26.80 26.10 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.69 0.70 -5.12 94 367 2011-05-01 11:09:05 2011-05-01 12:09:05 1 5 301 0 117 350 7 261.60 15 77.69 CHANGED +pHaVP..phaL+......pF............t................sppsplh.............................shph..................pppphhttshp.....shstcpphYshps.s.t.........................................................plEcth..spl.Es....phuphlpplhstpt..............lsspppp....tlhtFlshQhhRosthppphhphhpthhp.h........................................................t..t.........t.......tthttpthhhthhpphhp...........................htthhhphphtl....hhspssttFlTSDpPlsh...............................tt.sht.............pthplh......hP..loPchhlhh..........................................................................ttptttthhhhsp.pplp...plNph.hpp....up.......................chlaupspsththh .........................................................................pHalPph.hL+ta..................t...................tppthlh..............................................................hhth...................tttthh..hs..hp...........phshtpthYp..t.......................................................................................lEchh..uhh.Es....phsphlp.tlhptht..............................................lstpptp.........tlh.palh........h.hhRs.sh....ttphhphhpt.hp.h..........................................................................................................t...tph....t.....................h..t..h..hh.p.th.p.............................................h...h.htphphtl.........stsppthlsSDs.......Plhh.............................t.s.....................phhplh....hPloPplhlhh............................................................................tt.p..hh....h...h..st.p....p....lp...........hNt...h.hpp...u.......................p.lh.tpt.....hh....................................................................................................................................................................................................................................... 0 45 75 102 +13865 PF14023 DUF4239 Protein of unknown function (DUF4239) Bateman A agb Jackhmmer:Catenulispora acidiphila Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 254 and 270 amino acids in length. 21.40 21.40 21.50 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.36 0.70 -4.97 9 249 2011-05-01 11:18:32 2011-05-01 12:18:32 1 4 178 0 136 268 20 198.00 18 72.03 CHANGED lhsslullaushhuhslshLhppasclpcslssEAslLthlscslhsh.tsc.....cppspthltsYsctl..........lscshshh..pcs..phshh...shs.lsslhshlpthcssspsp.pshtpphl....spltcLpclRtpRlphtthuLsstaalllhhhoshshlsa........hshhsspthhthhuhhlhs.hsushhlFh.h......cLscPFpGsaplsps.shsphl ..................................................................................................h...hluslaullluhshsssh....p....phspsppsltpE....As.s.ltplhpt..s.t.s.h..s.ts...........psplcttlpsYsp.t...l..............lpp-Wsth................pps..ph.s.p.............ssphl..s..p.lhp....p.ltshps.p..sstp...tthhpthl.............splspl....hpsRppRlt.t..s.p.s.sls.s..shWhhllhhusl..h..h.lsh....................hh..hhht.pp..h...hp...h.hh.h...slhu..shluhhlhllh......ph.-pPatG.htls.t.......hh............... 0 41 82 111 +13866 PF14024 DUF4240 Protein of unknown function (DUF4240) Bateman A agb Jackhmmer:C7Q711 Domain This presumed domain is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 169 and 263 amino acids in length. This domain is often associated with the WGR domain Pfam:PF05406. 27.00 27.00 30.40 30.50 25.70 25.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.58 0.71 -4.29 59 244 2011-05-01 11:23:53 2011-05-01 12:23:53 1 6 205 0 77 229 6 128.60 31 61.94 CHANGED Mscpc....FWpLlspsp........sss-tp.....thtchL....ps..........................tLsphssc-lhsFpphhpp...hhtc.uaphslW..uAAallpG............s.sSDDuFthFRsWLIupG+chacpsltsP..DsLuph....ptt............................................thEch.thlutcsacct ..................scpc....FWpLlcpuc...........tst-.-.......phtchL....pp...........................pLsphssc-llsFcphhpp...hhpc.u....Ys......sLW..uAAall.hG..............G..sSDDuFsa..FRsWLlupG+-sa-sslps.P..DsLsphht.h.pptt........................................sphE-l.halutcsYcp.t......................... 0 29 53 69 +13867 PF14025 DUF4241 Protein of unknown function (DUF4241) Bateman A agb Jackhmmer:C7Q8Q9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 205 and 315 amino acids in length. There is a conserved GDG sequence motif at the C-terminus. 27.00 27.00 28.40 28.70 25.60 25.50 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.15 0.71 -4.51 46 192 2011-05-01 11:27:02 2011-05-01 12:27:02 1 5 160 0 42 186 6 172.60 31 66.89 CHANGED plplsoGcllssDPL.............h.h..tstpP..ahpplssGpa.lplslsct.p..t............RhAss+l..................pascpc.....sspachAhhsspsl.ppl...c-u-.a......FGasVDAGhushsDhpstcthpcatpch.pcp.....hshY-DhFsphhpp...shtt.spapppss.shhsashssostslshFpSGaGDGh..YP....sYaGaDcsGplssllh ............lplsoGcllssDPL...............h...pspts....ahpplss.GpYslphtlspt.p..t............RhAss+l..................phscpc.....sstac..hAhhsspc...l.ppl...p-sc.a...FGasVDuGhushsDhpshpthpch.pch.pc.......hs.YsDhhs.phhpc...p..t.spappp..tG.thhs.h.hss...oshslshFpSGaGDGh..YP....sYaGhDpsGplssllh.............................. 0 20 32 35 +13868 PF14026 DUF4242 Protein of unknown function (DUF4242) Bateman A agb Jackhmmer:C7PZ36 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 90 and 170 amino acids in length. There is a single completely conserved residue C that may be functionally important. 27.00 27.00 27.40 27.10 26.40 26.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.47 0.72 -3.89 75 359 2011-05-01 11:30:12 2011-05-01 12:30:12 1 5 329 0 110 241 112 76.80 43 51.89 CHANGED pallERc.....lPss..los-pltshpppssshhsch....ssVpWlcSaVs.tps...+.saClYpAPst-ul+ctsccu.GhPscpIscV ...cYLVpas.....IPcu....ITh-phhAp.++KsssthcEl....P-VcFhRoYVsEDhu....K.shCLYsAPDEEAVRRA++ts.stPlDsIp..h.. 0 40 81 98 +13869 PF14027 DUF4243 Protein of unknown function (DUF4243) Bateman A agb Jackhmmer:C7QAE2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 348 and 477 amino acids in length. 27.00 27.00 29.30 29.10 20.00 21.20 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.12 0.70 -4.99 118 340 2011-05-01 11:33:20 2011-05-01 12:33:20 1 10 167 0 246 351 4 303.70 23 73.39 CHANGED tha............N...HhsHtlhulatLGAsssp....lppha...........-pptphhp.h............stphhs......psa....cphLG...cpchhtsalsFFpcElp...........pcG...hcpVlpcYha............ttpt................................h.pllstlhuGhhHPlI+LGaulEhs.pst......................llAEu..................LA.sAsch..s.h.thh..ss....ttts................................................................s.shh...pllpc.lpsstphps....ss.....................ht.thlh..cthcphh.hs...............................u.hh..........pphpcpht-hhpss....shhhsust................t..phDF.....hhlHslTuuhhlphll.hh......sspp+hpllc...hhhthslshYsupupPph.....hpplhphhs.t................................sWpplhpp.hsttps...Ds...HhsKhlRuhtp ..................................hpN...HhsHtlhuhathGAsspp....lppha...........-tttph.csh...................s.p.hst.....psa..........pphLG.................ctchhtsalsFFpcclt................ppG....hptVlpcala...............ttpt..................................phlstlhuGhhHPlI+LGaulEhp..p.t..........................lhuEu..................LA.sAsp....s.htthh..ss......t.t..........................................................................stshh...pllpp.ltt.s.tchps.....sh.....................ht.shlh......pt.cphh.ht......................................u.hhh...............pphpcpht-hh.pss.......................shhhssst................t..thDF.....hhlHhlTuuhhlthl..l..shh.......s.p.+hpllchhhh..hslshYhupspPph.t.ptl.h.s..h.st..t............................................sWptlhpp...sh.ts........Ds........HhhKhhRshh........................................................................... 0 71 141 213 +13870 PF14028 SpaB_C SpaB C-terminal domain Bateman A agb Jackhmmer:C7QBX6 Domain This presumed domain is found at the C-terminus of the SpaB protein Swiss:P39774. SpaB is involved in the synthesis of the lantibiotic subtilin.\ \ \ \ This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 317 and 1029 amino acids in length. The family is often found in association with Pfam:PF04737, Pfam:PF04738. This domain is found in isolation in some proteins. This domain is also found in EpiB involved in epidermin biosynthesis. 27.00 27.00 27.60 29.10 24.90 24.60 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.08 0.70 -4.85 142 404 2011-05-01 11:47:07 2011-05-01 12:47:07 1 9 270 0 119 365 3 254.40 19 35.28 CHANGED W.hth+l..hsts.pt...-phlsptlsshlsph.....tts..hpt..aFFlRYtc.....st.....HlRLRl...pss..p.......thhtthhs.....tltphht.....hhtt.............................thhsphphss..YpPEht.....RYGG.ssshshAE...plFttDSthsls..hlt.............tthshph...................................................phlsuhshhp...............hhs.h.....................p..pt..........................................................hphhtchspptttt.ph.hps.tt....thhpltp.....hthht...s...t.th..........hhppttpthsthtphh.....................................................p.t.p..htsllssh.....lHhphNR..hGls.tppE...thlhph ........................W.hhh+l..atst.tt...-phlhctlhshlcph........tps..lsp..aFFlRYhc..................s...sHlRLRl.....phss.t...........htthht..............tlpphhp....thhpp.........................................shhsshphss..Yc.Eht.....RYGG..sshshuE....phFphDSthslp..hlp...............shphph......................hlsuhshhh.........hhp..hsh.......................s.ppt.................hphhpp.hhspth..tp......t.hpthht................thhplhp........ht.ht...t..th..h............hhpptpphhsphpphh.....................................................ttt.tpt.hpp....llsSh.....lHhptNR..hGls..hppEthhh..................................................................... 0 51 92 110 +13871 PF14029 DUF4244 Protein of unknown function (DUF4244) Bateman A agb Jackhmmer:Catenulispora acidiphila Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 66 and 95 amino acids in length. There is a conserved EYA sequence motif. 27.00 27.00 27.20 28.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.61 0.72 -4.87 53 401 2011-05-01 11:52:36 2011-05-01 12:52:36 1 1 400 0 101 287 10 54.10 44 67.72 CHANGED tttpthtsttss-pGMuTAEYAluslAAsAFAulLhhllpSssVpshLpullppALs .........hh....hhhhh.s-pGMuTAEYAlGslAAsAFAulLhtVlTussVhotLpsllspALp.... 0 30 74 94 +13872 PF14030 DUF4245 Protein of unknown function (DUF4245) Bateman A agb Jackhmmer:C7QKK2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 188 and 235 amino acids in length. 27.00 27.00 56.40 56.30 26.20 24.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.52 59 339 2011-05-01 11:55:11 2011-05-01 12:55:11 1 1 335 0 84 236 22 175.20 31 88.33 CHANGED Rhhp....ss+DMllSLsllllsshll.hhh........stsss.ss.lssVDssstlptsuc..ss.saPlttPp...lPcGWpssSu..chsshsu............ssshplGalTss......spYlplsQossssss.......hlsthssst.ps....sGspsluGp.sWphaput.........sscsshVtc.h....sssp......lllo.....GsAstc-hcslAsAlt .........................t.ss+DMllSLuslllsshll.hhh....................stssc..pssl.pV.D.hpssltsstp..sh.uaPlttPp.......lPc...s.WpssSu..chsshss............uss.plGalosp......stYlulsQSsts.tct.......hVuu.lstuspp.........sGshsV.u...Gh...pWshasus-........stppshVsc.l...........Gss.pllls.....GsushcphpshAusl.............. 0 26 62 80 +13873 PF14031 D-ser_dehydrat Putative serine dehydratase domain Bateman A agb Jackhmmer:A5U2B6 Domain This domain is found at the C-terminus of yeast D-serine dehydratase [1]. Structures have been solved for two bacterial members of this family. The yeast protein has been shown to be a zinc dependant enzyme. 22.00 22.00 22.90 22.90 21.90 21.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.02 0.72 -3.79 98 1102 2011-05-01 13:05:54 2011-05-01 14:05:54 1 7 847 7 478 1062 313 98.80 27 25.73 CHANGED AlhlhupVlStsp.......spsllcsGt+uluhDs..........u.....h..................th...sh..shs..................t..s..............................shphsth.u...-EHuhL........................p..h..........sss......ss.....................h........plG-hltlhssHsCsThshachhhlVc ..............ALpVhupVlSpsps......spsllDsGp+sluhDt.............u..h......st...............sh.uh.lhs..................ts..............................sh..plssh.s...-EHuhl................................p.h........sss..........st.....................l.......plG-hlplhPsHsCsTsshacthhll........................ 0 134 278 399 +13874 PF14032 PknH_C PknH-like extracellular domain Bateman A agb Jackhmmer:A5U8Q1 Domain This domain is functionally uncharacterised. It is found as the periplasmic domain of the bacterial protein kinase PknH [1]. The domain is also found in isolation in numerous proteins, for example the lipoproteins lpqQ, lprH, lppH and lpqA from M. tuberculosis. This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 268 amino acids in length. There are two completely conserved C residues that are likely to form a disulphide bond. A second pair of cysteines are less well conserved probably form a second disulphide bond. It seems likely that this domain functions to bind some as yet unknown ligand. 27.00 27.00 27.20 28.50 25.50 26.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.25 0.71 -4.73 68 678 2011-05-01 13:51:53 2011-05-01 14:51:53 1 4 94 0 115 462 0 188.20 19 66.25 CHANGED sslsssslsslL.....Lsss-l....ssl.hGssthtsstsssshs.............sspsssspChushsssps.sYt........shs..sh+stsh.........tssssssth.lpQulssassspsApphh..sshssphpsCsstshsht...........tts.sttaplusssssss...hlshshsttsst.......hsCt..+shsspsNlll-lpsC...........sssssst...usplssthhs+l ........................................................h.sh.sstlsslL.....Lstucl....ssl.hG.ss.th.hs.th.stsht................sssssss.pCtsshshsts.sa.....shs...uhputsh..........t.s..ss.s...tsph..lsQuVssa.sssssAcpha..pshssphppC.sGpshshh.......................tsststthsluss.sssss....slshshtttss..................hpCt.....+shtlcsNVll-lssC...........ttspssss...usslssthhspl.................................................... 0 27 69 99 +13875 PF14033 DUF4246 Protein of unknown function (DUF4246) Bateman A agb Jackhmmer:Q59QG7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and fungi. Proteins in this family are typically between 392 and 644 amino acids in length. 27.00 27.00 31.80 27.70 23.20 25.30 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.81 0.70 -5.89 50 374 2011-05-02 10:30:38 2011-05-02 11:30:38 1 6 93 0 324 394 5 341.20 30 77.80 CHANGED tsshts.hhTlREhsMlplMsplTDKPsWcpKlaDc.sIs.s+W+p...Eslspsp.................................................tlocphh-aslpELcaK....Aphac.cs..Gh.l.s..hss...........sVsKS..Dosls...ssLpppL+sul.phLcss....tsp.D.aHPuosppVlDLVHPSLFPLVYGRT+.llssth..lsl-..ssltt..GpGpllPh...sspppt...................h......ttht.hhaSp+FQWLPspVch.....................ssssss+IsSYINNLHPt+a+sLYpsIEpllspsIPhWNpsLs.h......................tsct.hRIph.ts..........................................p.pp.p.tps--.pch.cchp............pW.ppppphl.......................................P-P.ttFp......................................................sp.pttlsL.pccFp..ppGLQVIVKlAsIELTPEc.PpYsGG.sWHVEGph...............NEHIsATAlYYYDs-NITpS.pLuFRpts..-sp.......p.t...a-pss...........................p.palpplaGhcst...........ssshQplGuV.psppGRLlsFPNsl.................QH+VsPFcLtD+oKPGHR+hLsLaLVDPphR.IlSTANVPPQpp-WWs-tst .........................................................................................................h...........................................................................................................................................................................................................................................h.....p...............h.........................................................................hh.h..t................................................................................................................hs.thphLPs.hth.......................t.thph.S.YlNNL.p..P.......h.Y...hltphhtt.ls.hp.hh................................................hh...........................................................................................................................................................................................................................................................................................h........................................................................hpl........t..tptlpl......IV+hssIpLT......Pp.p.P.p..a............ts......t.........WHhEG.h......................NEpIsuoslahhs..pN.lsts..pltFR..h.................................t........................................htpl.asht.t.............t.h.Q.lGsl..h....p....G........RhlsaPNhh.................QH+.psFpLhD.opP..GHp+hLshalVsP.....t..hc...l.hSTspVsPQp.pWh.p..h......................................................... 0 160 218 286 +13876 PF14034 Spore_YtrH Sporulation protein YtrH Bateman A, Eberhardt R re3 Jackhmmer:C0H3P8 Family This family of proteins is involved in sporulation. It may contribute to the formation and stability of the thick peptidoglycan layer between the two membranes of the spore, known as the cortex [1]. In Bacillus subtilis its expression is regulated by sigma-E [2]. 27.00 27.00 77.80 76.70 19.50 19.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.19 0.72 -4.09 29 206 2011-05-04 11:44:28 2011-05-04 12:44:28 1 1 206 0 47 114 0 102.90 53 87.53 CHANGED hsshlhsFFlAhGVllGGullGGlGAhLsscPPLpshhpLAspLKIWAllAAlGGTF-slpslEpGlhpGphcslhKQllhIluAhhGApsGhhlIpalstsp .hs.hlIlSYFIAFGVlLGGSLIGGhGAaLhG+PsLT........h........hsphApsL+IWALVAAIGGTFDoFYuhERuhFtG-h+DIlKQlLLIhhAhGGhQTGhlII+WLTQE.... 0 20 36 39 +13877 PF14035 YlzJ YlzJ-like protein Bateman A, Eberhardt R re3 Jackhmmer:C0H413 Family The YlzJ-like protein family includes the B. subtilis YlzJ protein Swiss:C0H413, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 72 amino acids in length. There are two completely conserved residues (L and G) that may be functionally important. 27.00 27.00 36.10 36.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -9.15 0.72 -3.92 39 213 2011-05-04 11:51:42 2011-05-04 12:51:42 1 1 213 \N 58 134 0 64.20 40 93.42 CHANGED LYTlhP.Ehla...ttpcsthpspp...plphs.Gl.llVcthps..sp.....hcllRllSTsPhcYLpschsPGshlp .............LYThMPpplVa...sss..sps.pspc....Vsls.GVpLhVpp.cp..sp.....YpIVRlLS.TsPhcYLc.tapPGppIp.... 0 27 44 49 +13878 PF14036 YlaH YlaH-like protein Bateman A, Eberhardt R re3 Jackhmmer:O07632 Family The YlaH-like protein family includes the B. subtilis YlaH protein Swiss:O07632, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a conserved LGFA sequence motif. 27.00 27.00 61.50 61.20 22.70 22.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.67 0.72 -3.90 23 162 2011-05-04 11:58:59 2011-05-04 12:58:59 1 1 162 0 30 97 0 77.20 63 74.91 CHANGED scssthGhallYlsIllLullVapLGFA+...KLPlLKslllYllLhlGshlLTFhulh..LPlsEuLlVAAllLhIY+lRL ...D.ENPElGMWLLYGsIllLSAlVYNLGFAR...KLslLKNlVIYl.LAlGCTVLTF.FAVF...LPVGEGLVVAAlVLuIYRlRL. 0 9 21 24 +13879 PF14037 YoqO YoqO-like protein Bateman A, Eberhardt R re3 Jackhmmer:O31923 Family The YoqO-like protein family includes the B. subtilis YoqO protein Swiss:O31923, which is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 120 amino acids in length. There are two completely conserved residues (I and Y) that may be functionally important. 27.00 27.00 39.90 39.80 22.90 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.60 0.71 -3.98 8 164 2011-05-04 12:01:38 2011-05-04 13:01:38 1 1 97 0 11 108 1 108.70 49 88.71 CHANGED p+pIGaYGllhshhlSlllptFhps-hlsh.IlssssFlFlllYsWD-hKsYS+KslllhuIpFlIlluslsFlLhcGpchh-slslFpGWlhlA+llYllhlLslsssIh++Isc+L ..RcKIGaaGhllsh.L.lI.s.FhpsEWls..IlslhshlFs.hYpWD-hKtYS+Kphhlh.hphVlh..hlsFlLlcGpc.h-thshFQtWh..AKhLYllhllhlhhh..lshplsphl....... 0 2 5 6 +13880 PF14038 YqzE YqzE-like protein Bateman A, Eberhardt R re3 Jackhmmer:O32020 Family The YqzE-like protein family includes the B. subtilis YqzE protein Swiss:O32020, which is functionally uncharacterised. It is a part of the ComG operon, which is regulated by the competence transcription factor ComK [1]. This family of proteins is found in bacteria. Proteins in this family are typically between 49 and 66 amino acids in length. 27.00 27.00 29.40 50.40 23.00 22.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.76 0.72 -4.22 19 154 2011-05-04 12:12:15 2011-05-04 13:12:15 1 1 154 0 26 79 0 49.50 58 82.40 CHANGED psNDYVKahTpphVpYhDpPK-ERKcc....+ptRKppK.Ph.t.RWFGllPhuhpLha ...ssDhV+ahTpphVpYMDsPKE-RKp+....KEpR+s.EKEPF.hs+WFGlhPLShsLaa........... 0 6 16 18 +13881 PF14039 YusW YusW-like protein Bateman A, Eberhardt R re3 Jackhmmer:O32189 Family The YusW-like protein family includes the B. subtilis YusW protein Swiss:O32189, which is functionally uncharacterised. This family of proteins is found in bacteria, and is approximately 90 amino acids in length. 27.00 27.00 29.40 29.10 26.90 26.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.10 0.72 -3.71 19 189 2011-05-04 12:17:32 2011-05-04 13:17:32 1 1 183 0 23 103 0 91.80 45 49.90 CHANGED hppF-L-l-ap-s..pph-hpYE..ccpsph-Aclc...cthss.phpG-EAhpclcsll.spLslsssssppcllspVlssFsL-p.shpch-l..........ElpFpD .asEF-L-s-Yp-s..p-YEssYc.ltuspphEAclE.....DcpADlcLpG-EAhsKlpsLL.pcLphcpsTs-...p-Vl-pVlssFpLDc.cYp+F-L..........ElsFoD. 0 9 18 19 +13882 PF14040 DNase_NucA_NucB Deoxyribonuclease NucA/NucB Bateman A, Eberhardt R re3 Jackhmmer:P12667 Family Members of this family act as deoxyribonucleases [1]. 27.00 27.00 27.20 27.00 25.40 26.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.77 0.71 -3.93 29 243 2011-05-04 12:23:46 2011-05-04 13:23:46 1 15 181 0 66 192 1 103.50 50 43.24 CHANGED lpasss+YP...ETupHIp-.........AltsG..................cucls.TIDRs..uAcppRcpSL+..........shPs+.........pGhDRDEaPhAhscEGGsG....Asl+hIsPoDNRGAGS.lu......................p.QLusas......DGs+lhhhl ............................................................................tFPppRYP...ETupHIpD.....AIp.p.G..................HS.clC..TIDRs.........GAscRRc.SLt..........shPoK.................pGYDRDEWPMAMC..cEGGpG...........Aplc.....YI...oP..uDNRGAGSWVG.......................ppLspaP.......DGT+VhF.l......................................................... 0 12 39 49 +13883 PF14041 Lipoprotein_21 LppP/LprE lipoprotein Bateman A, Eberhardt R re3 Jackhmmer:A5U518 Family The family includes putative lipoproteins LppP and LprE from species of Mycobacterium. LppP is required for optimal growth of M. tuberculosis [1]. 25.00 25.00 25.50 25.40 24.80 24.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.79 0.72 -4.05 49 283 2011-05-04 12:39:40 2011-05-04 13:39:40 1 6 143 0 72 185 0 87.80 29 41.83 CHANGED sssYsss..........upLShlhlp.hssssssss.........ppshhFcpGcalGososcsh......shlssh.tssssslslpYph.t.s-ssssso.GtssVpF+W........ssspl ............................................s.ssasts.......spLShVhlp.sss.uss.sss.........ppslhFHpGpalGouoscsh.......shlssh..tsosDoVslpYp............sp.....t.....sssu.h....hssVpF+W.ssst.h..................................... 0 13 45 68 +13884 PF14042 DUF4247 Domain of unknown function (DUF4247) Bateman A, Eberhardt R re3 Jackhmmer:A5U5U9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 143 and 271 amino acids in length. 27.00 27.00 41.80 41.50 26.20 25.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.99 0.71 -4.17 29 214 2011-05-04 13:10:41 2011-05-04 14:10:41 1 1 211 0 35 139 0 171.20 35 83.97 CHANGED huslhl........shuss............t.sspsaluspYsptus........s..thhhssstosspVAsplsst..ppPssps......s-............................ssshaLRYs....DclVtl.............................................tscstss.................................hIcV-shc.............................................suYpphs.........hhhhus....thssssP....sGusstuu..........GssGuu ..........hhlssAs.....hhluust............ts.sp.Its+YshESss.........ps...hshhssspSsspVAccLlst..pcPpptu..p-................................psphaLhYs....Dc.Ihsl.............................................p.Dtppsss...............................lIclpNhc.............................................usYpshs.........................................h.t..............thTPss.....pGu.hcpu..........GssGu.h............................................................ 0 11 21 28 +13885 PF14043 WVELL WVELL protein Bateman A, Eberhardt R re3 Jackhmmer:O31578 Family This family includes the B. subtilis YfjH protein Swiss:O31578, which is functionally uncharacterised. This is not a homologue of E. coli YfjH, a synonym for IscX, which belongs to Pfam:PF04384. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length and contain a highly conserved WVELL motif. 27.00 27.00 103.30 103.20 20.40 18.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.51 0.72 -4.35 16 169 2011-05-04 13:47:27 2011-05-04 14:47:27 1 1 169 0 20 74 0 74.40 61 86.64 CHANGED p-hhE+LTspLLEKNspLSYspARTWVELLWpDFEoThAKAG+tYpGp-hTEclVppWI-pYGupLHpapsppsK .N-haEpLTpELL-KNc+LSYuQARsWVELLWpDFpoTYAKuG+.YQG-EMTEplVRpWIpsHGu+LHchcosNPK. 0 6 12 15 +13886 PF14044 NETI NETI protein Bateman A, Eberhardt R re3 Jackhmmer:O34700 Family This family includes the B. subtilis YebG protein Swiss:O34700, which is functionally uncharacterised. This is not a homologue of E. coli YebG, which belongs to Pfam:PF07130. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 66 amino acids in length and contain a conserved NETI motif. 27.00 27.00 27.00 43.80 22.60 19.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.76 0.72 -4.46 23 336 2011-05-04 13:56:26 2011-05-04 14:56:26 1 1 336 0 28 97 0 51.50 63 91.60 CHANGED KFcVpENETIu-CLsRMcpEGYhPlRRhEKPlFcEsKcs...plpsh+QcIlF-GKh .KFcVpEsETIsDCLsRMK.pGYMPV+RhEKPlFpEpK-G...sVEss+QcIlFhGKh. 0 7 15 23 +13887 PF14045 YIEGIA YIEGIA protein Bateman A, Eberhardt R re3 Jackhmmer:P50742 Family This family includes the B. subtilis YphB protein Swiss:P50742, which is functionally uncharacterised. Its expression is regulated by the sporulation transcription factor sigma-F, however it is not essential for sporulation or germination [1]. This is not a homologue of E. coli YphB, which belongs to Pfam:PF01263.\ This family of proteins is found in bacteria. Proteins in this family are typically between 276 and 300 amino acids in length and contain a conserved YIEGIA motif. 27.00 27.00 87.60 87.40 18.70 18.10 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.58 0.70 -5.78 25 191 2011-05-04 14:04:51 2011-05-04 15:04:51 1 1 173 0 39 117 0 277.00 56 96.04 CHANGED hhsllhGllhGslsRlhhL+sDYRQYPTYPHGhlIHluLGhIAAuLGAlAlPALlcc-aTAlTFLuLAApQFR-VRsMERpTLspL-shELVsRGssYIEGIApsFEuRNYlslhTuhlTohshh.hh.........slhhGslsGllshlls+plhpGpplsDIA-lchuclpF.-GstLaV-cIh.IMN.lGLppp+EhlLccGhGhllpPKshs..utsTlsNlGQRQAIlH-lushLGlh+DsspPshsPlu++Dl-sGclulhllP.cpDh-thlcllcplPlLEoAh+hPpctc ......sllhGllhGhhsRlhMLRTDhRQYPTh.HG+lIHIuhGlIAAALGAIAlPulLcK-FoAITFLTLAAoQFRDVRNMERNTLppLDuYELVPRGsTYIEGIAllFESRNYLuhlTSFsTThAYlhFt.........ShlAGlIhulIuhaIu+pLMSGcpL+DlsDIEal.l+F...-GuGLYlDNIY.IMN.IGLPs+QEcIhKaGMGFIL+PKshD..AhlTIuNLGQRQAILHDVSssLGlYRDSGTPuLVPLAKRDL-..DGRVGIFlLPQ-pDsEKAItVItNVPsLESAl+MsoE..t...... 0 21 30 34 +13888 PF14046 NR_Repeat Nuclear receptor repeat Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family This is a repeat domain involved in dimerisation of nuclear receptors proteins and in transcriptional regulation in general. It contains a Leu-Xaa-Xaa-Leu-Leu motif which has been characterized for the orphan nuclear receptor Dax-1, which represses the constitutively expressed protein Ad4BP/SF-1. The LXXLL motif plays in important role in binding of Dax-1 to Ad4BP/SF-1 [1]. The domain is subject to structure\ determination by the Joint Center of Structural Genomics. 27.00 20.00 28.30 25.80 23.40 19.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.23 0.72 -4.23 14 140 2011-05-04 15:59:12 2011-05-04 16:59:12 1 6 28 0 57 131 0 47.60 54 39.46 CHANGED FCGEDHPpQGSILYsh.sSAKQTpAA...PEsp.GushWssSCGup+sl ....FCGEDHPpQGSILYshLTSAKQTpuAs.....pAPEAR.GushWssSsGup.s............... 0 4 4 9 +13889 PF14047 DCR Dppa2/4 conserved region Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family This domain has been characterized in the finding of a developmental pluripotency associated gene (Dppa) in the lower vertebrate Xenopus laevis [1]. Previous to this discovery, Dppa genes were known only in higher vertebrates. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 27.00 29.20 27.10 25.60 20.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.46 0.72 -3.69 8 100 2011-05-04 16:17:40 2011-05-04 17:17:40 1 4 27 0 57 74 1 66.80 62 23.52 CHANGED GhhWCVVHGp..sssscuWltLph.HuGpsaVPsc..G+sIsLFLLPushhPPstlcDNhLCscCV++Nc .......GsRWCVVHG+sLPADocG.WV+LQF.HAGQAWVP-p..tRhhuLFLLPA........CsFPsPtLEDNMLCPcCl+RNK.... 0 4 4 6 +13890 PF14048 MBD_C C-terminal domain of methyl-CpG binding protein 2 and 3 Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family CpG-methylation is a frequently occurring epigenetic modification of vertebrate genomes resulting in transcriptional repression. This domain was found at the C-terminus of the methyl-CpG-binding domain (MBD) containing proteins MBD2 [1] and MBD3 [2], the latter was shown to not bind directly to methyl-CpG DNA but rather interact with components of the NuRD/Mi2 complex [3], an abundant deacetylase complex. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 27.00 29.70 28.40 20.90 19.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.12 0.72 -3.40 26 237 2011-05-04 16:22:55 2011-05-04 17:22:55 1 4 92 1 125 236 0 93.50 48 36.94 CHANGED cs.ssE.llpsh-LscsLpslu.PshsscsLlpulAouL+hsus.....PlsGQssutptl-psPus...hhsssQPLCpthl.....VT-..-DI+cQEc+VppARcRLp-AL .....................................................t..stE.llcoh-LPKuLps....VG.PGss--.TL.LpulAoALHoost.....PlTGQhou...AlEKNPuV........aLNssQPLC+....uFh.....VT-..EDIR..+QE-+VppsR+RLpEAL.... 0 28 36 66 +13891 PF14049 Dppa2_A Dppa2/4 conserved region in higher vertebrates Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family Developmental pluripotency associated genes (Dppa) in lower vertebrates have remained undetected until the discovery of a Dppa homologue in Xenopus laevis [1], reporting a new domain termed Dppa2/4 conserved region (DCR). In higher vertebrate Dppa proteins the DCR domain is located next to the here-reported domain. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 5.00 27.70 14.90 26.00 4.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.94 0.72 -4.14 2 81 2011-05-04 16:24:09 2011-05-04 17:24:09 1 3 20 0 43 57 0 69.70 47 25.78 CHANGED ppQshPEhS.Es+LQpCSRKtKhVsK+A+L.+ShchpERtEEoNTVEVlTSA.tuMLAuWuRIAARAsQsKulNSpSIPsSVEsFL .............hst...Es+.......tK......................p........EcsNsVcVhTSA.EAhLASWARIAARAspPcAVsS.......................... 0 2 2 2 +13892 PF14050 Nudc_N N-terminal conserved domain of Nudc. Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family The N-terminus of nuclear distribution gene C homolog (NUDC) proteins contains a highly conserved region consisting of a predicted three helix bundle. In the human homolog this segment has been targeted for structure determination by the Joint Center for Structural Genomics. NUDC forms a complex with other NUD proteins and is involved in several cellular division activities. Recently it was shown that NUDC regulates platelet-activating factor (PAF) acetylhydrolase with PAF being a pro-inflammatory secondary lipidic messenger [1]. 27.00 27.00 28.00 27.30 26.40 25.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.06 0.72 -4.02 60 308 2011-05-04 16:54:57 2011-05-04 17:54:57 1 5 149 0 172 299 1 55.90 41 17.99 CHANGED p+aDshLhslAppp.sulpshL-shFuFLpRKTDFap.............................G.hp.spsccllhcsFc+ ......paDshLLslhQpp.uslpp....hLsshFuFLtRKTDFap..........................................G.hs.GhAEcllhpsFp........................................................ 0 66 83 132 +13893 PF14051 Requiem_N N-terminal domain of DPF2/REQ. Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family This putative domain has been detected on the human DPF2 protein and was subsequently targeted for structure determination by the Joint Center for Structural Genomics (JCSG). Possibly, the C-terminus extends by 30 amino acids and forms a separate domain. DPF2 interacts with estrogen related receptor alpha (Err-alpha), an orphan receptor which acts as a regulator in energy metabolism [1]. It was also identified as an adaptor molecule that links nuclear factor kappa-light-chain-enhancer of activated B cells (NF-kappa-B) dimer RelB/p52 and switch/sucrose-nonfermentable (SWI/SNF) chromatin remodeling factor [2]. 22.00 22.00 22.10 22.10 21.20 21.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.34 0.72 -4.05 13 277 2011-05-04 16:55:41 2011-05-04 17:55:41 1 6 85 0 123 214 0 71.30 70 19.10 CHANGED lsDshY+EslEsussaNoRLshER..RlRhPFLDsQTGVAQscs.tlahppcpRhPGpstGQlYTYPu+RWRK++Rp .......L.G-paY+-AlEpC+sYNuRLCAER..SlRLPFLDSQTGVAQsNC.YIWMEK+HRGP..G..l.....AsGQlYTYPARpWRKKRR..... 0 24 34 74 +13894 PF14052 Caps_assemb_Wzi Capsule assembly protein Wzi Eberhardt R re3 Jackhmmer:A7UZC7 Family Many bacteria are covered in a layer of surface-associated polysaccharide called the capsule. These capsules can be divided into four groups depending upon the organisation of genes responsible for capsule assembly, the assembly pathway and regulation [1]. This family plays a role in group 1 capsule biosynthesis. It is likely to be involved in the later stages of capsule assembly. It is likely to consist of a beta-barrel structure [2]. 27.00 27.00 27.20 27.20 26.80 26.30 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.44 0.70 -5.63 92 390 2011-05-05 12:32:33 2011-05-05 13:32:33 1 2 294 0 143 386 206 444.60 21 90.03 CHANGED lphhsphGshtus.hsphPhhhpusptslspspsss..................shlpttltpthp.........ttttphp.......................................................................................huhslcs.hshst....ss........................................shplppuYlshphtshtlssGppcph...Gh.......sGuhhhosNARPlPtlplspsp.hsh.s.hhphhhshphphthGhh...ps........s...........chstps..........hhautphhh+.t..p........pLElGlshsstaGGp..............Gp.ss.....uhpsah..cshhutt.t.....st..........s.GNpluuhDh+hph..hhphshtlYtphhhEDpuuhh.h........................tshlhGlchph......hspsphh..lhhEahpTp.pss...................ssaYspshY.sG.apahGpslGssh...........................ssc..spshplGhptp.......hssphshphphsasct.ssh.t........................h.....phtthhcl.....h....hthhst....h....phsushuhcps..phh.ssshshtlslph ................................................................................................................................................................................................................thhsphGshths.hsphPl.hpthtpsLppscss.......................hstlpttlptthss..................thtths...................t................................................h......................................ht............ashtlps..shpts................................................................t.tchpl.p...puYsshp.....hts.h.hlshGphppahGPGh..........sGullh..osNARPhstlplp.psp..hsh.s.hhph.lsshphphshu..ph........ssp.....................phsscs...................hhhut+h.sh..pPh..p........pLElGhsp.sh..paGGc...................Gc.sp.....uh.psah..cshh....upsss.t........t.s....................p.GNpluu...aDh+hph........h.hsh.....slulYtphhtED.......pu..uh.h.t......t......................pshLhGlc.hph.......sppsh....lhhEahsTpspss......tt.................shhYs.H....th.YtsG.atppGhslGssh........................................................sscupshtluhphp.......hssp.p...hpsphpaschsss.t......................sh...pppp.thlpl....tat........hhtth.plpsshhh..sps..p...sss.sh.hth................................................................................................. 0 48 102 127 +13895 PF14053 DUF4248 Domain of unknown function (DUF4248) Eberhardt R re3 Jackhmmer:A7V2Q3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 86 amino acids in length. 27.00 27.00 28.60 28.50 26.90 23.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.01 0.72 -4.39 64 339 2011-05-05 12:42:11 2011-05-05 13:42:11 1 1 64 0 17 200 0 68.30 36 86.02 CHANGED ahlRs.Ys+sELAhhYhP.....sl.ss.puAhc+LpcWIcts.tLhppLttsGYpppp+haTPtQVplIlcaLGEP .......a.l+s.Ys+sELA.thYhP.....sl.sspsAhc+Lp+WIcts.sLhppLtssGYpsps.+t.aTPtQVplIschLGEP.... 0 8 15 17 +13896 PF14054 DUF4249 Domain of unknown function (DUF4249) Eberhardt R re3 Jackhmmer:A7V6V4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 279 and 365 amino acids in length. There are two completely conserved residues (C and G) that may be functionally important. 27.00 27.00 27.20 28.20 25.80 25.70 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.95 0.70 -5.06 80 352 2011-05-05 13:08:51 2011-05-05 14:08:51 1 2 162 0 154 373 218 287.50 17 90.65 CHANGED hhhllhhhhh..suC...pc.l-l.shsss.s+LVlsuhls......sssssttlpLoposshhssp..........lssApVpl.......sssptpshhhtpsss........................shYh....sss..........hspsGcsYpLplph..stpphsApsplstss.sl.pslphpptshhsspp.pp.....................lplpapDss.sppNYYhhphp.pph.hh.........................................................................................tlhsDphh.........sGpphshthhhp..............................phpsssplplplhulocshYpYhpsl.ptpsss.....ss.hhstPsslhuNlhsu...................lGhFussphs.phphpl ............................hhhhhhhhh.suC...pc.l.s.h.p....hsss...spLVlpuhls................ssssthlpLopo.....tsh..hsst..........................lssApVpl............................hssptpphhhtpsst.................................stYh...........sssh.................thpsGcsYpLpl.....ph..st....pph.sA.p.s.p.lstss...tl..pslp..hp...thhs.tt.t......................hplshpDss...spp...sYY...hhp.hp.pth.h...............................................................................................t.h.........................lhsDp.hh..............sspp.hp.hth..t..............................thphtt.pl..tlchhulocphYpYhpsl.thpsss.........................st.h..stPspl...uNlpss......................lGhhsssshs.ph.h..h........................................................................ 0 77 140 154 +13897 PF14055 NVEALA NVEALA protein Eberhardt R re3 Jackhmmer:A7V7L0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 75 and 92 amino acids in length. There is a conserved NVEALA sequence motif. 27.00 27.00 27.10 27.20 26.80 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.46 0.72 -4.27 65 219 2011-05-05 13:17:48 2011-05-05 14:17:48 1 1 44 0 36 174 0 71.20 26 84.01 CHANGED K.....p..Klhhhhhhusl.sshsGashhps...pptc.....thS.DLuLsNVEALA.sGEss..............s...............................stsshsC ..............phhhhhhh.uhl....sshsuhshhps....pptc.....ph.S..-LsLsNVEALA...suEss..................................ss................h..C....................h............................................... 0 6 19 36 +13898 PF14056 DUF4250 Domain of unknown function (DUF4250) Eberhardt R re3 Jackhmmer:A7VAD8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There are two completely conserved residues (N and R) that may be functionally important. 27.00 27.00 30.10 37.20 25.40 21.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.60 0.72 -4.24 92 470 2011-05-05 13:27:41 2011-05-05 14:27:41 1 1 362 0 66 290 8 55.00 43 87.76 CHANGED lshD.....Ph.hLlShlNhKLRDpas.SL-pLCpshslsccpLhpKLsslGYcYctppNQF ..............shDPh.hLhSllNhKLRD.-.as.oLDcLsssa-lDccpLh...sKLsshGacYssppNQF.... 0 19 37 53 +13899 PF14057 GGGtGRT GGGtGRT protein Eberhardt R re3 Jackhmmer:A7VAK6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 330 amino acids in length and contain many highly conserved residues including a GGGtGRT motif. 27.00 27.00 46.00 46.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.07 0.70 -5.62 11 294 2011-05-05 13:44:32 2011-05-05 14:44:32 1 2 279 0 48 216 16 313.70 78 97.84 CHANGED pFEuh-RRhs+IptsLpphGlsSLE-ApplCps+Glcs.pIV+slQPIAFENAsWAYTLGsAlAlK+GspsAu-AAttIGEGLQAFslPGSVADpRpVGLGHGNLuAMLLpEETcCFAFLAGHESFAAAEGAIGIApsANKVRKpPLRVILNGLGKDAAhIISRINGFTYVcTpaDYhTGELclVcE+saSsGs.RAtV+CYGADDVcEGVAIMp+EsVDVSITGNSTNPTRFQHPVAGTYKKtslEpGKcYFSVASGGGTGRTLHPDNhAAGPASYGhTDTMGRMHuDAQFAGSSSVPAHV-MMGLIGMGNNPMVGATVAlAVAVppA ......................h.FESa-RRIcpIpssLsphGIpo....IEEApplCcstGlDsYphlcphQPICFENAsWAYslGuAIAIKK....GC....c....sAA-AApAIGcGLQAFCIPGSVADpRKVGLGHGNLuAMLLcEETcCFAFLAGHESFAAAEGAItIAcpANKVR..KcPLRVILNGLGKDAApIISRINGFTYVpTpaDYhTGE.....LclVpcpuYSs...G.......RAKVpCYGADDVREGVAIMa+EGVDVSITGNSTNPTRFQHPVAGTYKKEplEpGKcYFSVASGGGTGRTLHPDNMAAGPASYGMTDTMGRMHSDAQFAGSSSVPAHVEMMGhIGMGNNPMVGsTVAlAVuVpEA... 0 26 43 45 +13900 PF14058 PcfK PcfK-like protein Eberhardt R re3 Jackhmmer:A7V2F8 Family The PcfK-like protein family includes the Enterococcus faecalis PcfK protein Swiss:Q82YK9 which is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 137 and 257 amino acids in length. There are two completely conserved residues (D and L) that may be functionally important. 27.00 27.00 33.20 32.90 24.40 23.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.85 0.71 -4.04 18 325 2011-05-05 14:36:51 2011-05-05 15:36:51 1 1 168 0 19 230 7 125.50 39 76.36 CHANGED K.uo-pFKcsIpsYLppRApsDpLFAtsht+ssKNl--ClsYIlscVp.....+...oGss..........uhsD-ElauhAl+YYc..EcsIcssK.slpCpV......sVN+h...lphotccKtcs+ppAhcphppEchpKhpp+spts+.....ttscs...psQ.SLF...Dh ......................so-pFpcsIppYLspRAppDtLFA.shh+ssKsI--ClTYIlsplp......c.......uGCs..........GhsDsElFuhAlHYY-..E-cI-lGK.sls.CpV...............sVNHh...lcLTpEEK....scARppAlcphppEphtKhppcpt..t..........t....p.oLF..................... 0 8 17 19 +13901 PF14059 DUF4251 Domain of unknown function (DUF4251) Eberhardt R re3 Jackhmmer:A7V8X1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 164 and 196 amino acids in length. 27.00 27.00 30.40 29.80 26.70 26.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.59 0.71 -4.36 50 228 2011-05-05 15:01:04 2011-05-05 16:01:04 1 1 117 2 32 178 4 144.00 26 81.10 CHANGED s+ppp+pp..................ptpplpphlsscpaplcsspshP...................ppGpsh.Ls.ss..hlplpsDolhspLsa..aGpsas.sPh...GpGulsapushpsaphp.pcKKGshplshsspsp..tps.hphslslassu.sAslslsss.s+psl.....oasGplt ................................t.....ttt..................thppstptlcscpahl-sspsh....................hpGpshhlo.ss..hlplps-pshsplsh..FspsYs.sP....GhGulshsushss..hphp.h-KKGs.hp...l.shslpsh...t.s...hphplslhssu.sAolslsss.sppsl.oasGpl.h.......... 0 12 26 32 +13902 PF14060 DUF4252 Domain of unknown function (DUF4252) Eberhardt R re3 Jackhmmer:A7VAG1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 182 amino acids in length. 27.00 27.00 27.90 28.80 24.30 26.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.90 0.71 -4.67 82 269 2011-05-05 15:12:56 2011-05-05 16:12:56 1 2 129 0 88 266 42 153.50 18 87.94 CHANGED Kph......hllhh.lhlsshsutu................t.t.phas....capstcshsslslspphhphhsph...........p.cspphtchlpplcslplhs.ts............sp..spsphppphpphhp.....s.sacpLhplp...-.ssppsphhh+tsps....tlpElllhsss.................cp........phsllpltGs.hs.pclsplhpp ........................................h..lhhhh.lhlhsh.hstu................t.p.phFs....ca.pcpcslssVtlsppMh.phhsph.............ps.ph.tplhpplcslplhshpc..............sp........hppphppphpplhp.......s.sappLhphp....-.psppsplhh+tpts....tlpElllhsss.................cs.phsllplpGc.hs.p-ltplht......... 0 34 74 88 +13903 PF14061 Mtf2_C Polycomb-like MTF2 factor 2 Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Domain Mammalian Polycomb-like gene MTF2/PCL2 forms a complex with Polycomb repressive complex-2 (PRC2) and collaborates with PRC1 to achieve repression of Hox gene expression [1]. The human MTF2 gene is expressed in three splicing variants, each of them contains the short C-terminal domain defined here. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 27.00 27.00 29.90 26.90 26.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.60 0.72 -4.07 11 173 2011-05-05 16:17:15 2011-05-05 17:17:15 1 3 65 0 92 157 0 48.60 61 8.25 CHANGED ssshscL+sSlssYFGuA.GRlssGE+apVLARRVTs-GKVQYLVEW-Gsss ......s.pLsHLKsSIosYFGAA..GRlAsGEKYpVLARR.VTs-GK...VQYLVEWEGsT..... 0 12 19 43 +13904 PF14062 DUF4253 Domain of unknown function (DUF4253) Bateman A agb Jackhmmer:C7PV89 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. 27.00 27.00 33.60 33.30 26.50 26.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.32 0.71 -4.43 58 204 2011-05-05 16:42:08 2011-05-05 17:42:08 1 17 177 0 57 196 2 108.70 32 25.08 CHANGED shhlshlPspcsa-lhuhls.h.GuhNpsssssphsAlh+tWp-+aGuhlsulshD.pl-hhlsp....PPtstc-AhplAtEpatFCsDhlcQ...s...............ht..olpsLA.ctLhpsphWaFWW.....D ..................................t.hlshlPsppsa-lhuhls..h.GuhNtsssss-hhAlh+hW.-+aGAh.ssls.a.D.pl-h.lsp....Ps.s.-cAhplAhEpYsFC.PDhl-Q...u................................ht..olppLA.c.sLh.psptWaFWWD...... 0 21 46 55 +13905 PF14063 DUF4254 Protein of unknown function (DUF4254) Bateman A agb Jackhmmer:C7P9Y0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 195 and 207 amino acids in length. 27.00 27.00 27.10 27.10 26.80 26.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.64 0.71 -4.59 56 245 2011-05-05 16:45:14 2011-05-05 17:45:14 1 4 231 0 90 244 323 136.60 40 65.79 CHANGED ltpllhppshhcshpWHhEDlhRcsslsstthhphKRpIDt.NQcRsDhVEhlDsahLpthpsltshs..sA..........plNoEoPuhslDRLSILuLKlYHMp-pspRpD..As.tpHhtpCppKLslLhEQ+sDLusulDpLLsDltsGcKhhKV .................................................h.tthlah+shl-ssQWHhEDllRcPplss.tshthKRcIDp.NQ-RoDhVEhIDsahhpthpslpshs.....sA..........plNTESPAhulDRLSILuLKIYHMp-pspRsD...ss...s-HhtpCppKLslLhEQ+tDLupAl-pLLsDltsGcKhhKV...... 0 34 71 87 +13906 PF14064 HmuY HmuY protein Bateman A agb Jackhmmer:C7PU21 Family HmuY is a novel heme-binding protein [1] that recruits heme from host carriers and delivers it to its cognate outer-membrane transporter, the TonB-dependent receptor HmuR.\ This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 278 amino acids in length. 27.00 27.00 30.60 27.10 25.20 26.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.27 0.71 -4.24 83 274 2011-05-05 16:52:52 2011-05-05 17:52:52 1 4 165 4 92 262 99 176.90 20 72.01 CHANGED sstssahYhshcssph.........................................................h.ttpttpsssWDlAFpph...slpsNuGs............su.Gpstshh.hs...........................ssasp...........ssshsssssatt..D............................................................................................t.......hshstshssssh................................................................................suWasashs...........................................................................sshhsspsp.lall+s...s-.GpasKlplhsaYssss..................uahoFcYt .........................................s...spWhYhshpssph........................................................................................t...tpttpsssWDl..AFpph.........pl+sNuGs..........................ts.utstsh....ss.............................sshst.................ssptsssssass...D.........................................................................................................................h.hst....t....hsht.t.shsstth...................................................................................................................................................ssWhshshs...........................................................................sshhssstp..lall+s..........s-..GpYsKlplhsa..hssss..tp..............uhhohpa......................................................................................... 0 37 67 87 +13907 PF14065 DUF4255 Protein of unknown function (DUF4255) Bateman A agb Jackhmmer:C7PSC6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 190 and 320 amino acids in length. 27.00 27.00 30.40 29.90 23.80 23.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.97 0.71 -4.83 82 228 2011-05-05 16:56:58 2011-05-05 17:56:58 1 5 188 0 126 237 37 182.90 19 72.26 CHANGED lstlspsLpsh.Lppthtt.............ssspVsh.ssPsp..........ssttsssplslaLaplpcssth+sts...stpsstt.......hhps.slhlsLaaLloAa..........ssshtpshpl...Lupslphhppp.shl...stpshssth.................................................tthp..plplph.s.hsh-plsclWusL.ssp.apPSlsYplshlhlpssthtsts....sVsphs ..................................lttlspsLpph.Lppth.t.............ssspVsh..ssPsp..........sstt.sssplslaLaslpcssth+spsh.......psspst........hppP.PhhlsLpYLloAa.............spsstpptpl........LupslphLtpp.shl...sspslssth.................................................ttht......tl.p.lt.h.s..hshcplsplWsuL.ssp..h+sSlsYplsslhhtssthts.s..Vtp..s............. 0 38 92 115 +13908 PF14066 DUF4256 Protein of unknown function (DUF4256) Bateman A agb Jackhmmer:C7PN24 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 190 amino acids in length. 27.00 27.00 203.50 203.40 22.20 19.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.02 0.71 -4.77 44 212 2011-05-05 17:00:08 2011-05-05 18:00:08 1 1 211 0 53 180 13 172.10 67 92.09 CHANGED LLpsLKsRFEcNhpRHpulcWspVps+LcAss-KLWSLp-MEcTGGEPDVVuaDpposcYlFaDCSsESPKGRRSlCYD+cAL-uRKcaK..PcssAl-hAspMGIELLTEEQYRpLQpLGpFDhKTSSWlcTPssIRcLGGAlFsD+RYspVFlYHNGA-SYYAuRGFRGhL...+V .LLclLcsRFEKNMsRHc..GL-WucVpsKLps.s.sEKLWSLsEMEtTGGEPDVVuYDc.cp-EYhFaDCStESPKGRRSLCYDtEALESRKcHK..PcNsAIDhAssMGIELLTEEQYRpLQpLG-FDhKTSSWlpTPs-IRcLGGALFCDhRaG+VFVYHNGA-SYYAARGFRGsLRV.... 0 30 45 49 +13909 PF14067 LssY_C LssY C-terminus Eberhardt R re3 Jackhmmer:Q7UW88 Family This domain is found at the C-terminus of Legionella LssY proteins, which may be a part of the type I secretion system [1]. This domain is functionally uncharacterised. This domain is found in bacteria, and is typically between 182 and 195 amino acids in length. It is often found in association with Pfam:PF09335 and PF01569. There are two completely conserved residues (P and W) that may be functionally important. 25.00 25.00 25.10 27.10 21.70 24.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.14 0.71 -5.32 48 217 2011-05-06 08:39:08 2011-05-06 09:39:08 1 5 202 0 75 233 29 174.90 23 33.97 CHANGED hs.pthhhcthPspopshDGh..uDPlNlsll.GspsplpsshtpsGWptscshohpo.hphshuslhcpshstAPVSsLahhGRtQDhAaQp.ssssstp...RaHlRhWpssht..................stssp........shWlGusoaDpGlth.ohhospl.o.HcI-s-lDsERDtlhpslp.tss.httsthhtshsss.....thssss-sahTDGcl ..........p...hp.thphhPspppshsGp..tpPlNlths.Gs.s.tlppthpttGWppssphohps...hlthhhhppshsphPV.shhhpu+spslsht+.sss.ss.p...RphlRhWtssht..................htssp........slWlGuhsh-phh...shhtt.l..h.atht.shD..thshlhttl......................................................................... 0 25 43 59 +13910 PF14068 YuiB Putative membrane protein Bateman A pcc Jackhmmer:O32109 Family This family of bacterial proteins is functionally uncharacterised. Proteins in this family are approximately 100 amino acids in length. There is a conserved FGIGF sequence motif, and many members are putative membrane proteins. 27.00 27.00 43.70 30.40 22.10 21.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.46 0.72 -3.70 21 172 2011-05-09 08:23:49 2011-05-09 09:23:49 1 2 165 0 33 112 0 95.60 60 96.48 CHANGED s.llIShlLFFVLFFGIGFlLNMLLRtTWlMAllYPIVllhIlsp.thhpYhpsPupuFsulhcclhuLthsDllILsuGhsGAllSGhsI+hLR+pGYQMF ..........llIuMhLFFlLFFGIGFLLNMlLRsTWlM.sllYPIVClhIIs+sshhcYFocPpEoFuShGspVupLutADlhILSoGLlGAhlAGllIKpLRKsGYQMF.......... 0 11 24 27 +13911 PF14069 SpoVIF Stage VI sporulation protein F Bateman A, Coggill P pcc Jackhmmer:O31625 Family The sporulation-specific SpoVIF (YjcC) protein of Bacillus subtilis is essential for the development of heat-resistant spores. Its expression is governed by SigK [1,2]. 27.00 27.00 28.30 28.20 23.00 22.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.42 0.72 -4.24 18 195 2011-05-09 08:36:16 2011-05-09 09:36:16 1 1 170 0 38 106 0 78.90 49 91.38 CHANGED FcslcKKTu..Vs.p-lhKLAsSlpsANhcDEpsVRplI+pVuplAN+PVoKEpEDcIVpAIhsssh.stDhsoLsKMh...KK .....FsNIEKcsp..VNc-DIFKLAuSVQNANL+DEsslRQLI+pVAhhAs+cVPKEpEDpIVcAIlsssh.PsDFuoLuKMhp..K...... 0 10 26 28 +13912 PF14070 YjfB_motility Putative motility protein Bateman A, Coggill P pcc Jackhmmer:O34438 Family This family of proteins is regulated in B. subtilis by SigD, and is likely to be involved in motility or flagellin production, Proteins in this family are approximately 60 amino acids in length, and contain two highly conserved asparagine residues. 27.00 27.00 27.10 27.00 26.50 26.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.67 0.72 -4.18 64 334 2011-05-09 08:54:15 2011-05-09 09:54:15 1 1 320 0 92 221 9 58.30 30 94.26 CHANGED Iuulus.....uhupsphtpp.luhuVhKpuhDspppsutpllph.......h..........t.ussPslGp.slD..................lps .................huMupupLhps.VshoVLKKuMDsscshh.spLlcs.......h...........ttuspsshGs.slDlhA..................... 0 34 66 78 +13913 PF14071 YlbD_coat Putative coat protein Bateman A, Coggill P pcc Jackhmmer:O34880 Family This is a family of putative bacterial coat proteins. Proteins in this family are approximately 140 amino acids in length. 27.00 27.00 65.00 64.80 26.70 20.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.69 0.71 -4.06 18 144 2011-05-09 09:44:54 2011-05-09 10:44:54 1 1 143 0 20 87 0 127.50 56 91.84 CHANGED ccFKpFV++HPKlIpEVRpGpKTWQ-lYE-WhLhGEDDchWssY+...............tpssspcpc.........pppps-hhsplhshlK+hDssplQpalsphSpuIuulQsllsQFpssspppt.......tptpp.PFSFRKD .pQFKEFVp+HPKhl+EVRuG+KTWQQFYEEWYLLGEEDPIWssYR..................tp.tEscccp-s.+sE.................EEKouDlMGQMLSahKKLDV-QMQcHLANVoSAIGSVQQVlQQFQGs+oQQc......psoSEsNPFhFpKD............................ 0 3 12 14 +13914 PF14072 DndB DNA-sulfur modification-associated Bateman A, Coggill P pcc Jackhmmer:O34448 Family This is family of bacterial proteins likely to be necessary for binding to DNA and recognising the modification sites. Members are found in bacteria, archaea and on viral plasmids, and are typically between 354 and 474 amino acids in length. There is a conserved DGQHR sequence motif. 29.00 29.00 29.00 30.30 28.50 28.90 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.10 0.70 -5.31 64 426 2011-05-09 10:06:35 2011-05-09 11:06:35 1 2 328 0 110 399 165 326.50 18 79.94 CHANGED aPAl+GhQus+..............paYhshhPh+hl...s+lhhhsp.p..phssphR...........................uQR............sLNcsRlscIscYl.lp.........................N.p..sYl.huul.Tusls...s.shthp...h...............ttts..t.plGhLpls..hDu.c....hhlsDGQHRpsAIc.......pALc......csP...p.......Lup-..............sIuVlha....h-tuLc+sQQhFuDlNptuh+sssSls............hhYDcR-.s..hutls+plh.....ppsshap...shs-hccssluppup+LFT.lsslhpusptLh....................tptttsph..cpttphsppFWptl.spphs-...Wp.lhppphss......tphRpchlpupulsLpAlGhhupt..........lhppt.........spphcptlptLpp....lDWs+ss.......W.....ps...+sh..psp.................lsps..psslpLssstlcptlulsLs ...........................................................................................shph.p.s.t.t......ha.hhh.hp.l....clh.hsp.....p......p.s.p.t...............................hQR..........................slsps....+l.p.c..Itc.Yl....p................................................................................stp.......shl...hs.sl.shshs....t..thphp.....h..............................................tssshGhLpls........cs..p.................hhllDGQHRhtAlp.................................puht.................................p........p..........................lsp..........................................plsVh..ha.................hs.shcc.p.pp....h..FhslNppthhsssslh..................................h.hsp.cc............shlsppls.................pts.ht...............shhc...h.t.....p.....s...s........tpp..ptlh........o..hssl.hph..hp...thh.............................................................sttp.tphhp....hhppaaptl..tphhst......hp.......................................p....p.h.h.h....p.s....h.hh.slu.hh.t.........................h.......................................................................................................................t....................................................................................................................................................................................... 0 27 75 97 +13915 PF14073 Cep57_CLD Centrosome localisation domain of Cep57 Coggill P pcc manual Domain The CLD or centrosome localisation domain of Cep57 is found at the N-terminus, and lies approximately between residues 58 and 239. This region lies within the first alpha-helical coiled-coil segment of Cep57, and localises to the centrosome internally to gamma-tubulin, suggesting that it is either on both centrioles or on a centromatrix component. This N-terminal region can also multimerise with the N-terminus of other Cep57 molecules. The C-terminal part, Family Cep57_MT_bd, Pfam:PF06657, is the microtubule-binding region of Cep57. 27.00 27.00 27.40 27.20 26.70 26.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.26 0.71 -4.44 8 162 2011-05-09 10:19:16 2011-05-09 11:19:16 1 3 48 0 73 143 0 161.50 53 39.03 CHANGED AllSALKsLQEKIRRLELERsQAccslppLSREusca+csL....................cc-ppp+shtpp-hopp.........pp-lspQLsuAEuRCoLLEKQL-YM++MVcs.s-+E+sslhEpQspLpREppppQhclpupLcKL-lLEpEapRLTsTQusAEcKIppLEcKLpEEEpQRKLhQDKAucLQTuLEsN+lll ...........................Alh.ALKsLQ-KI+RLELERhQAE-sl..ptLS+Es....hp.YK..KsL....................-...p...php.ERp.u+pE..Kp...............pp-ls.QL.uAps+CsLLEKQLEYh+pMlhp.sEtE+shlLEp.Q.spLpRE+...p........p.DQh+.l..pupLEKLDlLEpEh.+LTshQthAEcKhpcLEpKL+EEEppRKhhQcKA.upLQTGLEhs+llh.............. 0 13 18 34 +13916 PF14074 DUF4257 Protein of unknown function (DUF4257) Bateman A pcc Jackhmmer:O34881 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 27.00 27.00 30.20 30.80 26.30 25.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.67 0.72 -4.33 11 133 2011-05-09 10:32:23 2011-05-09 11:32:23 1 1 114 0 10 61 0 80.20 64 70.66 CHANGED plllAslIGGlhGlluH.lpp+G+lphP..........................................Rp.....s+phaalGFltDhhlGhhAulLhV.lhs..cspohhpllhlSIluGlGGEuhLhS ..QWLTAlLlGGITGFVSHLINN..QGKLLLP............................................RR.....LKTFFHhGFLTDIFTGSLAALLGL.VLF..DVTsIKEIIKVSIVTAISGQTFLLH. 0 1 4 5 +13917 PF14075 UBN_AB Ubinuclein conserved middle domain Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family Ubinuclein 1 and 2 (UBN1, UBN2) are members of a histone chaperone complex involved in the formation of a certain type of facultative heterochromatin, called senescence-associated heterochromatin foci (SAHF) [1] [2]. The domain described here is conserved in many eukaryotes such as human, rat, drosophila, and zebra-fish and has been targeted for protein structure determination by the Joint Center for Structural Genomics. 25.00 25.00 25.00 25.00 24.20 24.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.48 0.70 -4.66 12 163 2011-05-09 11:56:05 2011-05-09 12:56:05 1 5 79 0 96 141 0 196.10 46 21.34 CHANGED LPpsLsscllpplsslK-hu+.h.chsG+ppFF-sclssLLLclt.phppss..pstRstVapHLEtpLpssK.slh++hKpl+lccpcs+hppsLpKL+cAlscsMPctlssYEhEppphsptpsu..................cpsuEc.P.....ph+hPRKKFpWs-plRpLLhclhpl+hpSatl.t.pRppShE-alpu...aLcpcVhsLWPpGWMphcpLpKEl ................................................LP-.GLPs.Lc++lc-Lp...........AA+hh...-tEG+pKF.Fop-hNslLLDIElQhp..Els....stlRSuVYuHLtuFlPCsK-oLlK....Rh.KK....L...+L...................p.....Q....c.s..RL+EPLpKLK.AlupsMPEQlt+Ypc-CpA+spsKsA.........hpt-.p-.cpp.....................s-.--D-E+ss.......+RlhGPR.....KKFpWsDpl.RpLLCplVclKLtsY-LEt.s+u.......pShED....alKs...........Fh-........s....EVKPLWPK.GW..MQuR.hLaKES................................ 0 28 35 57 +13918 PF14076 DUF4258 Domain of unknown function (DUF4258) Bateman A agb Jackhmmer:Chitinophaga pinensis Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 95 and 124 amino acids in length. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.25 0.72 -3.97 112 331 2011-05-09 12:23:19 2011-05-09 13:23:19 1 5 278 0 128 317 58 71.80 19 55.93 CHANGED hhop.H.Ahpchp..pRp.Ishp-lhpsl..tp.Gclhcphs....sst.tssplhlttstppt............slcllhshsps.....hhhllTlahs ...................hopH.Ahp+hp...cRs..Ishpplhpsl....pp..Gclhct.........pst.tss.phhhthstttp..................lpllhshpps......hhllTsa...................................... 0 56 95 118 +13919 PF14077 WD40_alt Alternative WD40 repeat motif Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family WD repeats are short subdomains of about 40 amino acids and fold into 4 antiparallel beta hairpins. This domain here has been detected on the C-terminus of WD repeat-containing protein 18 during target selection by the Joint Center for Structural Genomics. 25.00 25.00 38.30 64.70 24.30 21.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.27 0.72 -4.33 4 42 2011-05-09 12:27:19 2011-05-09 13:27:19 1 5 31 0 22 35 0 47.50 63 11.27 CHANGED huush-KslhGst-pL+lRVuELEEEVRsLRKIN+DLFDFSTRIIT+P ..hsooh-KsVLGsQ-pL+lRVoELE-EV+sLRKINRDLFDFSTRlIT+P... 0 1 3 7 +13920 PF14078 DUF4259 Domain of unknown function (DUF4259) Bateman A agb Jackhmmer:C7PLU1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 118 and 145 amino acids in length. 22.70 22.70 24.10 23.40 22.30 21.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.73 0.71 -3.64 61 249 2011-05-09 12:29:06 2011-05-09 13:29:06 1 6 214 0 67 189 4 121.80 20 66.43 CHANGED GuWuhusF-sDsuhDhls-Ltct.............thhpthhcsshhs..tspchl-s--stsuluuAtllshh..........hsssh.ssthtt.............................ttpssppltphAhphlcplhst...SEhh-LWp...ps..tth-pWhpslp .....GuWuhthF-sDsuhDhlsplp-h.............ttpthlpth...tpthhhs...tttsth-hc-s...t...s...sluhAtlhhth..........t.sssh.sss..h.t.............................htph.ppltphshphlcph.t........tt.hphW.p.........h.t....................................................................... 0 18 51 67 +13921 PF14079 DUF4260 Domain of unknown function (DUF4260) Bateman A agb Jackhmmer:C7PFK4 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 114 and 126 amino acids in length. There is a conserved GLK sequence motif. 27.00 27.00 32.20 32.00 26.40 24.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.56 0.71 -4.26 24 197 2011-05-09 12:52:38 2011-05-09 13:52:38 1 1 193 0 65 166 5 108.60 44 92.15 CHANGED sctllRlEGhslhssulhhYuhhs..huWhlahlLlLsPDLSMlGYLhGsRlGAhsYNhsHoahhPllLlslGlhhssshs........htlulIWlAHIGhDRhLGYGLKYssuFccTHLGRl ......................+tll+hEshslh.lhslhhYs..h..hp..aS.W...hlFhlhlLsPDLSML.....uY.hhss+lGAh......lYNlhHoYllslllsllGlhhp..shl........lhlu.LI..WhAHIGhDRhhGYGLKYp.ssFcpTHlt+l.......... 0 20 46 55 +13922 PF14080 DUF4261 Domain of unknown function (DUF4261) Bateman A agb Jackhmmer:C7PC06 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 80 amino acids in length. 27.00 27.00 27.30 27.30 26.60 26.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.50 0.72 -4.17 52 353 2011-05-09 12:56:15 2011-05-09 13:56:15 1 16 297 0 40 258 2 77.10 31 21.46 CHANGED hThGhpsFsh.-lphh...pshcsp-lhpaLhslutYllppsss.lp-G-TI.Ghss......ppphphptppu.....lts..sp.s.LcIp .........YThGhcsFsc.ElElps...tsh-sp-lh..hlhslshYlLppDss.LcDG-Tl.thp-.........spphphpcspu.....lts..ppts.LcI............................ 0 21 29 34 +13923 PF14081 DUF4262 Domain of unknown function (DUF4262) Bateman A agb Jackhmmer:Q50763 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 147 and 227 amino acids in length. Swiss:Q50763 is incorrectly annotated as the KatG protein. 27.00 27.00 30.00 29.60 26.90 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.75 0.71 -4.01 54 189 2011-05-09 13:06:11 2011-05-09 14:06:11 1 2 171 0 54 153 10 117.50 29 65.65 CHANGED pltcpGhslhtVhscppsss...........aua.olGh.pphshPEl.llhGL.shphspshls...plsptht.tsc...h......Gtthpph.......tsh.shhh.lt...ht..hhh.u..hat.............h.slQhlasDtpGhh.......PWc.shs ......................lhpaGWslhtV.s-cps...............auYTlGL.sphshPEL.llhGL.sschupplLNthucc.hh.sGch...hssGhphsth.........ts....lthhpVspschc...hhhA.hhhat...........sph..sLQllWsDpcGpa.......PWpssh......................................... 0 17 40 52 +13924 PF14082 DUF4263 Domain of unknown function (DUF4263) Eberhardt R re3 Jackhmmer:Q7UE31 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 244 and 403 amino acids in length. 27.00 27.00 27.70 30.50 26.80 26.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.04 0.71 -4.29 61 136 2012-10-11 20:44:47 2011-05-09 14:08:03 1 3 127 0 43 141 11 162.50 19 47.31 CHANGED Lpss...t............sEppaQphlcps.shlh...shtasthh...........hh.schp...hssphhsDFlhhstssshh.......llEIKpPs..pslhsp..........ssh...t.up-lspAlsQlhcaht.hlppptsphptptptt...........................shps+slllhGcp.........phhsppppcshchhppph...t.slcIlT...aD-Ll ........................h.....t...sEpphQpahcps..shlh......uhtast.h...........hhh...schs.......hssshhsDalhhstssshh.......llElKpPs...spl.hpp...........ssh..........t.ut-lspAlsQlhsaht.hlppphsthptp.pht..............................shpscshllhGpp.........p.hsppp.pcsh.chhppph....t.plpllTaDpLl.............................. 0 15 31 39 +13925 PF14083 PGDYG PGDYG protein Eberhardt R re3 Jackhmmer:Q7UEH4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. There is a conserved PGDYG motif. 27.00 27.00 27.40 30.30 26.90 26.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.59 0.72 -3.65 6 78 2011-05-09 13:16:29 2011-05-09 14:16:29 1 1 76 0 24 63 18 101.70 77 69.33 CHANGED GP...............NRYtPGDAllTGuTGDRWVVSR-RFDs+Y.shsPs.sHGcsGAYRN+P.lsVLA+cMscsFsIARSs.GGDVL+GssGDWlMQYA........PGDYGllppARFupVYR .................GPNRYssGDALlTGSTGDRWVVSR-RFDAKYlPsssuhAHGpPGAYRNRP.AVVLA+RMDtPFoIARSA.uGDsLRGsAGDWVMQYA........PGDYGVVQApRFAQVYR. 0 3 7 15 +13926 PF14084 DUF4264 Protein of unknown function (DUF4264) Bateman A pcc Jackhmmer:P54395 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 28.70 28.60 19.80 18.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.31 0.72 -4.59 25 183 2011-05-09 13:37:37 2011-05-09 14:37:37 1 2 183 0 47 86 0 50.80 62 89.49 CHANGED KlElluohchppscDhYKlVDFLN+TLK-cslhFGLohspc.....cschlhoIYcT .KIElLuThclchosDLYKIVDsLNRTLK-pcLMFGLALDEc....c+cpAVFTIYRT. 0 22 36 40 +13927 PF14085 DUF4265 Domain of unknown function (DUF4265) Bateman A agb Jackhmmer:Chitinophaga pinensis Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 139 and 168 amino acids in length. 27.00 27.00 28.00 34.10 25.40 25.30 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.13 0.71 -4.35 42 117 2011-05-09 13:38:35 2011-05-09 14:38:35 1 2 102 0 45 106 14 114.70 24 73.85 CHANGED sEslauc.hls..ssp.aclpssPaashulAhGDllpsppss..tthhhpch.ttuGssTlRlhh.pss.........hpplhsplpthGssh-shpt..shlulslPspsshpslpphLsp.hpcpshlpa .........Etlaup.hhs..ssp..atlpslPaas.slAhsDllphcpcc..thhhhpcllpsSGNsTlRllhhspt........hpplhsp.lp.p.hGsphEt.st..shlulslPspsshpslctlLsp.hc-pshhta............ 0 18 22 33 +13928 PF14086 DUF4266 Domain of unknown function (DUF4266) Bateman A agb Jackhmmer:C7PCX1 Domain This presumed lipoprotein domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 50 amino acids in length. 27.00 27.00 39.50 39.50 25.10 25.00 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.47 0.72 -3.45 47 129 2011-05-09 13:43:10 2011-05-09 14:43:10 1 1 108 \N 61 131 37 49.90 44 58.12 CHANGED VpPaE+shLAcs-Msh.spcshcsshspHlY.u+Euu.uGGtGhuGGGCGCN .VcPaE+spLAcs-MsL.spcshctthspHlY.s+Euu.uGupGhuGGGCGCN. 0 21 45 55 +13929 PF14087 DUF4267 Domain of unknown function (DUF4267) Bateman A agb Jackhmmer:C7PBL0 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 126 and 142 amino acids in length. 23.00 23.00 23.50 25.10 22.60 22.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.30 0.71 -4.48 58 194 2011-05-09 13:46:48 2011-05-09 14:46:48 1 5 140 0 133 197 3 111.20 22 73.37 CHANGED hushhlshGlthllp.PppAhshFGl.......Pts........................pptssuh....hhlhGsRDlshGlslhshhhh...............ust+ulGhhl...................lusuhlsluDuhlshp...........tGssttsh...Hhs.....husllsshGhllh ..............................uhhhlhhGlth.lhs.PttuhttFGl.....Ptt......................................................................pssspuh....htlhGsRDlshGlhlhshhht...............uph.cslGhhl...................lssuhlsluDshlshp..............tus.ttts.hh..Hhs.....suslhhshuhhh.h............................ 0 31 77 114 +13930 PF14088 DUF4268 Domain of unknown function (DUF4268) Bateman A agb Jackhmmer:C7PAS6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 151 and 387 amino acids in length. 27.00 27.00 27.30 28.50 24.90 26.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.68 0.71 -4.52 63 189 2011-05-09 13:50:04 2011-05-09 14:50:04 1 8 179 0 68 194 80 136.50 22 45.35 CHANGED pl+p.cFWpphhcphpt...............tshps.scpWlshso.Gl.puhshphhh........spc..cspVpl.lscss..tt.scthF-pLhppKstIE...sphGt.cLpWpc..hs-..+ctsRIh.hhtpsss...lhscspWschhpahscphpph-phatthlpt ....l+pcFWpphhcthttp.......h....t.hpstscpWlsh.ss....Gl.pshphphhh..........spc....pspVpl.Iscss.....ppthF-pLhp.hKstlE...pphut...sl.Wcc....hss.......+csuRIh.hh.t.psls...ht.scssWsphhcahhcphhphcphatth...h............... 0 25 53 62 +13931 PF14089 KbaA KinB-signalling pathway activation in sporulation Bateman A, Coggill P pcc Jackhmmer:P16449 Family This family of small proteins is found in the membrane and is necessary for kinase KinB signalling during sporulation. There is a conserved GFF sequence motif. The initiation of sporulation in Bacillus subtilis is dependent on the phosphorylation of the Spo0A transcription factor mediated by the phospho-relay and by two major kinases, KinA and KinB. 27.00 27.00 102.00 101.70 21.10 20.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.26 0.71 -4.73 22 164 2011-05-09 13:52:14 2011-05-09 14:52:14 1 1 164 0 30 125 0 176.80 61 88.75 CHANGED GulsssIlGhllcaspa..hhhsh-hspllusllahlGlGhh.aSlISQMGFFAYLTlHRFGLuIFRShpLWNtVQllLIhFllFDLlYhRahsFuppspuhhtYlhlslllLlhullVAalKsKpT.NKsAFlPALFFMlVlTslEWhPALpls-pp...aLahhlhPLLsCNAaQLLhLH+lp ................................GGloTsIlGFlLcW-c.YsphFtsF.-stEIluV..FWlhGVGFIFSVISQMGFFAYLTlHRFGLGhFRSuS.LWNhVQLFhIAFVLFDhVYLR.lhh...AsucsSlus.ILlAshLhhFGsIVAYlKoKpT.NKKAFVPALFFMVVVTlLEWVPALRINDsD...WLYLMlIPLLlCNAYQLLlLHRL.l...... 0 10 21 24 +13932 PF14090 HTH_39 Helix-turn-helix domain Bateman A agb Jackhmmer:C7PBH6 Domain This helix-turn-helix domain is often found in phage proteins and is likely to be DNA-binding. 21.30 21.30 21.30 21.50 21.20 21.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.17 0.72 -4.50 51 254 2012-10-04 14:01:12 2011-05-09 14:58:04 1 4 222 0 46 207 80 65.50 28 53.69 CHANGED sQptclLstLpptss.lTshEAhpphslhc..huuRIp-....LR.ppGa..pIpTph....sppGp.c.+lupY...sLhss ...............ppppllttLppstt..lTsh-uh.pphushc..lu..AcIhc....LR.ppGa....sIhTpp....sphstsptpVstYhL.............................. 0 8 24 37 +13933 PF14091 DUF4269 Domain of unknown function (DUF4269) Bateman A agb Jackhmmer:C7PAK4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 176 and 187 amino acids in length. There is a conserved KTE sequence motif. 27.00 27.00 28.80 27.30 25.00 24.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.69 0.71 -4.27 29 134 2011-05-09 14:02:34 2011-05-09 15:02:34 1 3 131 0 33 105 1 145.80 57 81.68 CHANGED acslpchplhctLtsa..sPlLsGTlPlsIDlssSDLDIlCpsp......D.hptFpptlpshauphps.FphcptpI.psh.sllssFphpsathElFuQshPsppQtuYRHMhlEp+LLphttssh+pclhpLK.cpGlKTEPAFuchLul.s....G.DPYtsLLph ........................Y-VLscLsIMEcLAlY..sPVLsGTIPIcIDs.pSDLDIlhEVp......N...aDsFEQch+SLYGoa.cG.FpIK+.KcI.+ssESIp...........VNFcaEGF-FELFAQP+PV+sQNAYRHMlVEHhLLhp+.P+lREEIl+LK.EpGLKTEPAFAQlLsI.s.....G.DPYEtLlh.L.. 0 16 28 29 +13934 PF14092 DUF4270 Domain of unknown function (DUF4270) Bateman A agb Jackhmmer:C7PAW4 Family This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 444 and 534 amino acids in length. 27.00 27.00 61.00 60.80 21.00 20.00 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.91 0.70 -5.86 72 233 2011-05-09 14:05:54 2011-05-09 15:05:54 1 2 191 0 63 235 345 458.80 20 93.05 CHANGED lGtsllss.s.shssphsshshsspohht.....s.......uVhops......shs...hlGphsDs....aGph.pusahsQlshs....ss..sa.ss.sp.........................................lDSlhlhl.Ysu.................................................................................aG.Dohss...h+lslaclsp...l.................sptaYoshss....ps.........thlush...............sassss...................................shssplplpLsps.............aspplh...ppsp............h.hpsspsFh...shh+Glalpsst.......usGshhhls........ssplplaYphpspsstst................s.....hshss.........hphsphpss.ss...lsplht..pp............sYLKussGhhsplsl.......................slppltp.........p............tp.slNsApLshhh.ss......st..................hshshPpplhLhptcpt..............pshhppsth.sst..ssahuhhp.....................ttpsspYsFs.hs.t.lpshltt.....................pt................ts.chshhlhlsltspss...............tt...........hht......stsshLhGspss............................ppl+LplhYoph ................................lGtslhss.s..phss.phss.hs.ssspohht...s.............ulho..ps..............sss......hlGph.sDs....aGph.pusahsQh.pss....ssh.sF.sttst..........................................lDSlplhlhYsu....................................................................................aG...Dohss...h+lslacLsc..sl.....................tpsptaYoshs..s.hps..........phlust................................sassss.t............................t...................shssplclpLscp.............aGpplh........pttp............t.hpssppFh...phh+Glalpsst.......usGslhhls.........ssplplaY+hptp..sssspst..............hs........hshss.................hphNphpss.ss...lpplhstss................psYL+ussGhhsplsl.......................slsplhp...p........................ppsslNsApLplhh.ss........shp.................hshshPpplhlhptcpt......................pshhppsphssst....ssahuhhp......................spsspYsFs.hst.lpshlps................................pt..................ts.chs.hhlhlslphpts..............................t.ht...................sttshLhGssts............................ppl+lplhYop............................................... 0 23 55 63 +13935 PF14093 DUF4271 Domain of unknown function (DUF4271) Bateman A agb Jackhmmer:C7PG02 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 221 and 326 amino acids in length. 24.20 24.20 24.30 27.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.82 0.70 -4.65 53 191 2011-05-09 14:10:02 2011-05-09 15:10:02 1 1 190 0 53 188 97 205.80 23 75.26 CHANGED alshlllhshhl....lshs+thhhpphp..sFhhhhhppphhhtpsspph.....haphhhhl.sslhhulhh.....ahhhppht.thhhsh............hhhhshh.hshhhhahlhKhllhphluhlFh.cchhptahhphhphhshh.ul.lLhPlsllhsYhs........hshthhhhhhlhlhllshllhlhpthplhhpphht.hahILYlCALEIhPhllL.aphlh ........................lshlLlssFhl..hhhshs+phh.pphp....sFhhht.tpsphhtspssp-h.....tapl.hLhltsslhhulhh.....ahhhpthtsthhtps............hhhlshh.hshhhhahlhKhllhphluhlFhscpt.sp.ahtshhtlhhhhGh..hLaPhsllllYhs........hslphhh.hhhlhlh..lh.spllhhh.+thplahpphhs.hhhILYhCuLEIhPhllL.aphl..... 0 22 47 53 +13936 PF14094 DUF4272 Domain of unknown function (DUF4272) Bateman A agb Jackhmmer:C7PHI5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 221 and 399 amino acids in length. 27.00 27.00 29.30 28.80 20.50 20.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.53 0.70 -5.33 39 169 2011-05-09 14:13:04 2011-05-09 15:13:04 1 2 163 0 51 163 7 198.20 29 67.51 CHANGED R+t+ohphLppp.Glshh.ppLPsl..sps-sphRstcElspRAlAlhhlshhAps............tpstphhh.phl.cpasl.hphLTspE+pal...sspss...........cpthhphsW+YEuhasLhWALGhl-..pLshPsplC...Dsshshphhtph.ts.hsphhpp..splRshsEILDttDLhYRh...cWAsVcARl..pstsssu.....slstsVVhER+hALsWLls.............ht...sps.........WDcl .............R+tpshphLppp.Gl.h...tpLP.l..stppsphRstc-lspRslsLhhlh.tAttl.................tpspp.hh..phl.ppasl.hphLTspEpthlt........ssps.c................tpthhphsW...+Y.EuhhsLhWuLGllc..pLshPschC...........Dsthhh.p.hhtph..t.....p.hpphhpp..sphRshpElL-ttDhhaRh...cWAshcAch.....pspsssu...........sls.ulVhER+huLsWLls.............h.stt.........WDpl...................................... 0 21 36 43 +13938 PF14096 DUF4274 Domain of unknown function (DUF4274) Bateman A agb Jackhmmer:C7PQU5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 80 amino acids in length. 27.00 27.00 31.30 34.00 23.40 24.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.52 0.72 -4.18 39 134 2011-05-09 14:22:07 2011-05-09 15:22:07 1 2 114 0 32 115 6 78.00 25 44.42 CHANGED hsoscplahhstpaN......aD.s...shplhphllppspCDhuTALhlaWhh.......sstha.hp...........ptptsphhp-t.hphlpplhc+hhs ........soscpLahlsssaN......WD.s....uhcl.ptIlcpspCDhuTALhhFahs.......suhha.hp.t.........tt.sssh.p-h.hphlppltc+hh.t.................... 0 10 16 22 +13939 PF14097 SpoVAE Stage V sporulation protein AE1 Bateman A, Coggill P pcc Jackhmmer:P40870 Family Members of this family are all described as putative stage V sporulation protein AE, although this could not be confirmed. Proteins in this family are approximately 190 amino acids in length. 27.00 27.00 28.00 199.60 21.80 19.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.16 0.71 -4.88 23 159 2011-05-09 14:32:09 2011-05-09 15:32:09 1 1 159 0 34 84 0 178.80 67 93.72 CHANGED +VILVTDGDphA++slEhsA+plGGRCIStSuGNPohLoGpcLVcLIhpsspDPVlVMFDDsGhhGcGsGEpAhcaVssHssI-VLGslAVASsTcts-hs+VDlSIDRpGcls-huVDKpGhs-hc..t+lpGDTV.sLcpL.slPlIVGIGDIGKMstpDchc+GuPITp+AlchILE....RS ...lhVTDGDEYAKRTIEllsK-hGGRCIStSpuNPT+LTGcclVELIhQTPYDPVFVMFDDSGalGEGuGEcALKYVATHcpI-VLGlLAVASNTHphEWsRVDVSVDRsGsLTEYGVDKaGlP-sE..lGRIsGDTlYCLDcL.sVPVIVGlGDIGKMsGsD-a-+GSPIT+KAIpLILERS...... 0 16 26 29 +13940 PF14098 SSPI Small, acid-soluble spore protein I Bateman A, Coggill P pcc Jackhmmer:P94537 Family This family of proteins is putatively assigned as a small, acid-soluble spore protein 1. Proteins in this family are approximately 70 amino acids in length. There is a conserved LPGLGV sequence motif. 27.00 27.00 36.30 49.20 19.60 17.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.08 0.72 -4.05 20 184 2011-05-09 14:51:40 2011-05-09 15:51:40 1 1 184 0 35 94 0 64.70 59 90.65 CHANGED hNLRpAllsNlpssop-pLccTIsDAIpsGEEKhLPGLGVLFEshWcpussppKpphlpsLcpuL .hNLRsAVlANVoGNoQ-QLp-TIVDAIQSGEEKMLPGLGVLFEVIWcsAsEsEKcEMLcTLEpGL.. 0 9 22 26 +13941 PF14099 Polysacc_lyase Polysaccharide lyase Eberhardt R re3 Jackhmmer:Q7UP23 Family This family includes heparin lyase I, EC:4.2.2.7.\ \ Heparin lyase I depolymerises heparin by cleaving the glycosidic linkage next to an iduronic acid moiety [1,2]. The structure of heparin lyase I consists of a beta-jelly roll domain with a long, deep substrate-binding groove and an unusual thumb domain containing many basic residues extending from the main body of the enzyme [2]. This family also includes glucuronan lyase, EC:4.2.2.14 [3]. The structure glucuronan lyase is a beta-jelly roll [4]. 29.90 29.90 29.90 30.30 29.80 29.80 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.68 0.70 -4.69 54 240 2011-05-09 14:57:12 2011-05-09 15:57:12 1 21 167 6 118 254 933 233.50 15 63.76 CHANGED tshshpushpssshsphpp..ttsstps..........................sssshpGp.................hAl+hphp......hsss.st.......................ut.RuEl.....ptsshphusphaYsauh...hlspsas................ss.p..hlsQa+tps....s...................................................tpPshtlhl.......pssphthphtsss...........................ts.tttphhshsslpt..GpWpchslcs+Wuss........psGhhclWh..........sG..phlhppps.ssthss.................pt......ahKhGlYpsshppsss............laaDplth ................................................................................................................t............................................................t....s...................shp...........tss.......................................sp.RsEl...........tt.th..thGpshhYsauh...hlss.s.at................ss..ptthlsQa+sts...........................................................................stPs.htlth.......pssphhhphpsss........................................................tstphshhhshs..s...hst.....upWhchhlphcasst...................psGhhclah..........sG....ptl.hp...tps...tshhss.................spsh..hhKhGlY+ss.ps........................h......................................... 0 41 77 100 +13942 PF14100 PmoA Methane oxygenase PmoA Eberhardt R re3 Jackhmmer:Q7UPP4 Family This family is a putative methane oxygenase [1] 27.00 27.00 39.80 38.00 21.90 21.00 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.82 0.70 -5.19 65 174 2011-05-09 15:14:35 2011-05-09 16:14:35 1 6 100 0 95 187 116 281.70 25 73.69 CHANGED sspslsl...thsG.p......lspYhas........ht......ss....+PalHPlpoh.uGsslTch...........pPtDHsHHpGlhhuhscVs....s....hsFWsspshh................tp.Gphtapshpthps....ssstupl...spclsW.hst......sGph.lLpEpRslshpss....st..............shhl-asssLs.....................sssp.slshs...ss............tY.GGhthRsspsh............................ssuplhsutG............tpGps..........shGpp....us.........WlshsGph.......ssp...............tsslshhscPsN.......thP......s.WasR...shuhhusssssth.........ptshsls...sGcslph+YRlllt-Gsh.sssc.lsshhppa ........................s..thsl.thsG.p.lhpYhhts.........hpss.....+PalHPlpTh.uGss.lTch............pPsDH.....hHHhGlhhuhscVs........GhsFWsspshh...................pp.Gphpppshpthts....ststsph...spplsW..hst......sGp....lLpEpRslshpsh....sst..................sahlDhshsLs.....................sssp.slphs.ss............sY.GGhhhRsscph............................ssuplhsutG...................tpucp...........shGpp....us.........WlshsGph.....................sst...............ts.olshhspPsN.......ths......s.WalR....sts.hhusssuhsh.........ptphsls...sGcslph+aclllt-Gth..stsphsthhpt.................................... 0 48 79 95 +13943 PF14101 DUF4275 Domain of unknown function (DUF4275) Eberhardt R re3 Jackhmmer:Q7UUJ6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. 27.00 27.00 31.60 31.50 20.20 18.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.03 0.71 -4.11 13 119 2011-05-09 15:22:53 2011-05-09 16:22:53 1 1 112 0 16 94 1 132.80 57 95.41 CHANGED ME....lhchLcp..+slKlpEhspWGshhRKpWEcpFAsHLohcEKcpIhlhsscshs..GYLWHlFSY-p+...sCLcGcEAcpAFcpcpKssCYlFaQasD-slllEcAspLpAsDL.s.......Eh......DlYVVDc-FsWTaVhTHEscahGPYFs ......................ME.hl-hL++..KshKVREhpp.WGsYFRKRWEDpFAN.HlScEEKE-IaLYsDchsC..GYLWHIFSYE+K...KCLEGcEAEpAF+sEsK+-CYIFYQHsD-VLl.lcDAShLph-Dllp.......Essp...ha+uDlYIVDK-FTWTFVKTHEccW.CGPYFs......... 0 6 10 12 +13944 PF14102 Caps_synth_CapC Capsule biosynthesis CapC Eberhardt R re3 Jackhmmer:Q7UXU8 Family This family of proteins play a role in capsule biosynthesis. They are essential for gamma-polyglutamic acid (PGA) production [1]. 27.00 27.00 35.60 29.30 24.20 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.53 0.71 -3.99 31 208 2011-05-10 07:29:10 2011-05-10 08:29:10 1 2 193 0 50 120 38 120.40 43 66.61 CHANGED slGlllSLlasEpsGlssGGllVPGYlALhh.spPhplhlslhhSllTahll.phlu+ahl.......lYGRR+Fshhlllull........lphlhphh..h......................h..phpslGhIlPGLIAsphp+QGlhhTls ...............lGllLSLlFsE+hGlsPuGLVVPGYL.ALhh.spPlhllsllllSlLTYhIV.pslS+ahI.......LYGRRKFAAhllsGhl........L+hlhchl..h...............l.ssFths..-hpuIGlIlPGLIANoI.p+QGlshTl.h........... 0 18 37 47 +13945 PF14103 DUF4276 Domain of unknown function (DUF4276) Eberhardt R re3 Jackhmmer:Q7UYP7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 190 and 224 amino acids in length. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 27.40 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.29 0.71 -4.45 56 288 2011-05-10 07:40:08 2011-05-10 08:40:08 1 2 220 0 123 324 17 189.50 16 90.84 CHANGED llEtsotpphLcslL.+hh........thphphhshthc.slppphscph..+sapphh..........stllllhDpDs.ssstphppp......p.htp.t..tpt..............s+lsspEhEuWaLuD.hpAl..pphhsph.sphsh.......tt.pKhpsP-sl.susppt................tspYp......Ksptuptlustl......shspsp.S.SFpp.....hlpulpph ....................................................................................................hhEs.o.ppthlpt.ll..hh..........................th.h.hh.h.ht.pht.tt....hs..t.th...............pthpp.h.t.....................shlhshhD.....h.s...hss....st...ttph................htt...........t...h.............s...t.......phhshlt....hcEhEuWhhuD.h.p.u.l..tphhsp......tphth...................t.htpht.sP-plpsu..sppt......................thhstYp.............Ksh..tu...tlstpl.....s.phhppp....sspFptalptlt................................................................................................... 0 44 88 113 +13946 PF14104 DUF4277 Domain of unknown function (DUF4277) Eberhardt R re3 Jackhmmer:A8ZKM0 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 110 amino acids in length. There is a conserved NGLGF sequence motif. 25.40 25.40 25.40 25.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.27 0.71 -3.89 23 342 2011-05-10 07:59:00 2011-05-10 08:59:00 1 2 80 0 129 362 22 108.60 39 26.46 CHANGED Mp.........p.pl...+sLDHLGlVAGlhDElslschIDchLspcppc+.lSpGpslKAMILNGLGFsu+sLYLFPpFFpsKslE+LlGpGlpA-aLNDDtLGRsLDcLYchGsoplFtplAlp .........................................................thpl.pslDHLGlVAullDclGlsclIsph.lsh-..s.p.c.p.losGpsVKAlllNGLGFss..psLYlaspFFpshshE+LlGps..lpPcaLNDDtlGRshDcLYchu.lsplF..lsh.p............. 0 43 106 116 +13947 PF14105 DUF4278 Domain of unknown function (DUF4278) Eberhardt R re3 Jackhmmer:A8ZNS8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 58 and 136 amino acids in length. There is a single completely conserved residue R that may be functionally important. 25.30 25.30 25.40 25.30 24.70 25.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.58 0.72 -4.02 53 145 2011-05-10 08:06:48 2011-05-10 09:06:48 1 2 71 0 65 166 197 56.30 30 62.60 CHANGED M....cL.....sYRGlsY-.hsssslp...............sspsp............hsupYRGhsa.......hpphsh.psp...hsLp.YRGVsY ......................M..pLsYRGlsYs..s.sssp...............htpsp............lphpYRGhsY......phpphppht.s...t.hp.Lp.YRGlsY.................................................. 0 5 47 58 +13948 PF14106 DUF4279 Domain of unknown function (DUF4279) Eberhardt R re3 Jackhmmer:B0C1A4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 134 and 145 amino acids in length. 25.00 25.00 25.60 25.40 24.70 23.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.23 0.71 -4.22 48 184 2011-05-10 08:13:26 2011-05-10 09:13:26 1 1 166 0 42 156 3 116.00 23 83.66 CHANGED lhu-shs......s.-plTphlulpPopsht+Gshhpss...p......hhtphssWtlsoptttpts...lp..cplcpLlppLp....sttstlppltpphsh....p......thhshh.shssspsss....................lsschlphlsslGsplsl .............................lhG-.hss.-tlTphLslpPTcshtKGchhtppp...........hhpppssWpls...othppshc.....lp..splphllcpLp......scpptlpclppcasl...p.........hlhhlhhp.hpssp.sPs....................lspchlphhuslsu-lch......................................................... 0 13 31 35 +13949 PF14107 DUF4280 Domain of unknown function (DUF4280) Eberhardt R re3 Jackhmmer:B0C1L7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 129 and 456 amino acids in length. There is a single completely conserved residue C that may be functionally important. 25.00 25.00 25.30 25.50 24.50 24.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.90 0.72 -3.74 69 265 2011-05-10 08:22:45 2011-05-10 09:22:45 1 18 148 0 84 252 15 112.10 27 38.96 CHANGED VssGAhlpC.shGsss...utLpVhs....ps.plhspu.....p.hAohtDhhsh.....hNlhsFG.........hCpsh..sssss..................ChPs......ss.Wt...st.sslhl...........suts.sLpssSphhCsa.G..Gh..Iplhss......GQt .................................lstGAhlpC.shGsss...shLtlhs.....tp.ts.hhss.....................tshushtDphsh.....hNI..sFG...............................hCps...ssssht...............................................ChPs...hs.Wh..sst.ssshl...........ssts.sLspsSphhCsh..G...Gh...IphhssGQ................... 0 24 63 73 +13950 PF14108 DUF4281 Domain of unknown function (DUF4281) Eberhardt R re3 Jackhmmer:B0CBE6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 147 and 232 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important. 27.00 27.00 32.80 32.40 23.50 22.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.70 0.71 -4.04 70 170 2011-05-10 08:58:51 2011-05-10 09:58:51 1 2 128 0 92 165 946 126.70 30 64.30 CHANGED stlFslushhslPhWhLhl........hhP...php.......hTp.plhpohh..shlhLu....hlYhhlhhsuhss...ssssh...........................hssL.s..ultplF...............usptsshsuWl.HaLshDLFVGpWlhh-upcpu.l......hhh.sLlLshhhGP.lG.LLsa ......................t..lFshushhslPhWhLMl........hhP...php.......................hTc..clhpShh..shlsLu....hlYshLlhhuhsstshthh..........................hssLs.ulsphF...............usptssssuWl.HhLshDLFlGRWlah-utc..ps......l...........hshhsLhLshhhGP.lGLLsa........................ 0 45 80 89 +13951 PF14109 GldH_lipo GldH lipoprotein Bateman A agb Jackhmmer:C7PII4 Family Members of this protein family are predicted lipoproteins, exclusive to the Bacteroidetes phylum. Proteins in this family are typically between 155 and 167 amino acids in length. Members include GldH, a protein linked to a type of rapid surface gliding motility found in certain Bacteroidetes, such as Flavobacterium johnsoniae and Cytophaga hutchinsonii [1]. Gliding motility appears closely linked to chitin utilization in the model species Flavobacterium johnsoniae. Not all Bacteroidetes with members of this protein family may have gliding motility. 27.00 27.00 27.30 27.60 26.00 22.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.38 0.71 -3.93 60 194 2011-05-10 08:59:56 2011-05-10 09:59:56 1 2 190 0 54 193 141 132.50 28 81.38 CHANGED psplac.papsls..s..uWp+scslpFpl.shtDss.s.YslhlslRssssYPapNLaLllphphs..p.uph.....hsDTlphpluc.ss....GphhG.pGhus.lhppph.h....pshpFs.csGpYplplppsMRcs.......sLpGIs-VGl+l ......t.ptlYc.pYpsls..s...uWp+s-s..lpF.sl..shp..D.sh.ssYplhlslRssssYPY..pNLaLhlp.hphs....ssph......hsDTlphpLs-..ps....GphhG.pGhus.lhppph.h....psh.ph..csGsYplplppsM+cp........LpGIsDlGl+l.... 0 23 48 54 +13952 PF14110 DUF4282 Domain of unknown function (DUF4282) Eberhardt R re3 Jackhmmer:B0CFB0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 93 and 155 amino acids in length. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 28.20 27.80 26.90 26.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.99 0.72 -3.79 54 193 2011-05-10 09:26:58 2011-05-10 10:26:58 1 3 179 0 82 176 29 83.40 25 68.64 CHANGED h.hsFccalTspllphlYhlulllhslhslssl.....huuh..............................s.hhuhhtllh.uhls...hlhtllhsRlhhEhhlshh+ls-slppltcpt ...........hpF-palTPpllphlYhlsllhlslhuls..sh........hsuh.......................................hhhhl...h.uhlh....hllshlhsRlhhEhllslF+ls-sLpcItcp.s..... 0 26 54 70 +13953 PF14111 DUF4283 Domain of unknown function (DUF4283) Coggill P pcc Jackhmmer:A0MDU5 Domain This domain family is found in plants, and is approximately 100 amino acids in length. Considering the very diverse range of other domains it is associated with it is possible that this domain is a binding/guiding region. There are two highly conserved tryptophan residues. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.61 0.71 -5.13 121 920 2011-05-10 09:46:33 2011-05-10 10:46:33 1 139 22 0 264 872 0 122.60 17 16.68 CHANGED lplsccth.......hltp....sLl....G+a.......hs.p.......lps.ltphhtptWtLpu...plpltpl..s...p...s......hhlhcFcptt-hc+VLppGshthps.hhlhLc+W.s...spssshp.pthpph.lWVRlhsLPlphasp.phhcplGsthGt.hlplD.pstphpphp..as.....Rlh.Vc ....................................................................h.......................................................h....h.t....a.t.....t...ththh.h...t...t......s......hhhhpFpt.t-h.pplht......t...us..h.h.h..p.s.....hhl..hlp..pW.s.........t.s.....t.....t.ph.h..hWlcl..h..slP.hthhst.phhpt.lu.pthGt.hhtlD.t.s..t....h....hp....hh+lhl........................................ 0 25 157 230 +13954 PF14112 DUF4284 Domain of unknown function (DUF4284) Bateman A agb Jackhmmer:C7PJ13 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 124 and 142 amino acids in length. 27.00 27.00 29.30 28.00 25.40 26.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.93 0.71 -3.64 22 158 2011-05-10 12:25:39 2011-05-10 13:25:39 1 2 101 0 24 145 0 117.30 28 81.12 CHANGED hVolWlG.sFpopp-hppYh-.........cY....-.E-ucplsSpFtp-hsltah..DcDhlEtshhsps.t............slppLLpshSYspphlpph.phh...ph..pphNslIhlYshc..Ystphcpsp............lpalGshpYc .................VSlWlG.sh.p.oppplcpYh-l...................pY......c.--G-pl.supFhpDFsls.hh..D-D..hlEhphhpps.ps............slptLLpshS.Y.cp.pl.lsphhphh...th..pphNulIhlYsap..Ysppsppspt............hpalGshtY............................. 0 6 13 19 +13955 PF14113 DUF4285 Domain of unknown function (DUF4285) Bateman A agb Jackhmmer:C7PJQ8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 157 and 206 amino acids in length. 27.00 27.00 27.70 27.60 25.90 25.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.93 0.71 -4.10 48 220 2011-05-10 12:39:33 2011-05-10 13:39:33 1 3 185 0 60 159 4 122.90 37 69.00 CHANGED ssapNsCAl.RhShuLppsG....hshpshs.............hthhh..ss........+hhhh..RspchtsaLp..p...s.......................ssshtsplpu...............+pGIIhF...hthWspu........uG...HlsLW...NGsphs......sps..................................................shtpspplhFW ...........................................................h..apNsCsIRMSYsLNto.G....hslspto................htpluGsDs........KhYha..RVs-hhcaLp+p.......hs...........+P-hIs....................sssppuchhG...............KKGIIlh.pspGWosA........pG...HlTLW...NGohso..DpCah................................................................................ 0 19 28 44 +13956 PF14114 DUF4286 Domain of unknown function (DUF4286) Bateman A agb Jackhmmer:C7PK46 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 100 and 112 amino acids in length. 22.80 22.80 22.80 22.90 22.60 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.11 0.72 -3.72 46 171 2011-05-10 12:48:32 2011-05-10 13:48:32 1 2 166 0 61 159 102 97.20 30 86.20 CHANGED IYNlThpl.-csltccWlpWh.pcpHIP-llsost...Fppuplh+llscp-ts.........GpoYolQYpscspssLppYhpcpuscLcp-shppFusKhluFc....Th.LEhl ........IYNsThpl.-cslccpaltWh.pcpaIP-lhpsGt...hpps+..ls+lLs..cc-pt.........GpsYSlQapscspssLpcahpc....puscLpp-hhchFt..c.KhluFt....Tl.hEll................................. 0 31 56 61 +13957 PF14115 YuzL YuzL-like protein Eberhardt R re3 Jackhmmer:C0H3R0 Family The YuzL-like protein family includes the B. subtilis YuzL protein Swiss:C0H3R0 which is functionally uncharacterised. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 33.40 33.30 23.00 17.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -7.92 0.72 -3.68 7 211 2011-05-10 13:53:45 2011-05-10 14:53:45 1 1 121 0 23 67 0 42.10 53 90.24 CHANGED sKhKKsPSKsGlSAssV+Gp.GsT.pcsGuh+psStppphKKc ..KlKKsPS+uGlSAssVcGp.GsT.s+ts.u.st+.sSsNpphK+.. 0 1 11 14 +13958 PF14116 YyzF YyzF-like protein Eberhardt R re3 Jackhmmer:C0H3T9 Family The YyzF-like protein family includes the B. subtilis YyzF protein Swiss:C0H3T9 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 30.60 30.20 21.20 17.50 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.37 0.72 -4.01 25 164 2011-05-10 13:59:09 2011-05-10 14:59:09 1 1 164 0 35 95 1 48.20 47 83.06 CHANGED sCcEHlElAlDpaVD-hEpsPslpclpcsp......sppCcaC.cptApYlVs .sChEHlElAlDhhVDEpEluPsIppl-soc....p.ppsC.-aC.pspAsYlVs.. 0 11 23 28 +13959 PF14117 DUF4287 Domain of unknown function (DUF4287) Bateman A agb Jackhmmer:C7PKG7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 70 and 180 amino acids in length. 27.00 27.00 27.90 27.00 24.20 23.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.90 0.72 -4.28 59 198 2011-05-10 14:04:37 2011-05-10 15:04:37 1 2 162 0 95 200 86 60.60 38 62.24 CHANGED uhpoYlss.Icc+TG+slscWhpll...cpp.........sssKat-hVsWLKsEHGLGHGHAsAlVthhctp ...........u.toYhss.IEcphG+sls-Whpllcpp.........shs+ahElVuWLKsEH.GLG..HGHAsAlVshhht.t............ 0 33 71 88 +13960 PF14118 YfzA YfzA-like protein Eberhardt R re3 Jackhmmer:C0H3X6 Family The YfzA-like protein family includes the B. subtilis YfzA protein Swiss:C0H3X6 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 27.00 27.00 32.60 32.50 19.60 18.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.23 0.72 -3.88 4 111 2011-05-10 14:05:57 2011-05-10 15:05:57 1 1 78 0 6 58 0 91.00 59 97.31 CHANGED MssK...tpPl..hKRsWhpolssFllsQLlFIIhEhTuWhP..sF+-.Gshhs+lVsSpFFTcWFu.YcsPaFNllTlFhuIhhll.slhGAhKDlhspt...N ..........hh....K...tpPl..hhRsWh+hLGsFhlhQLlFIlsE.lTuWsP..NF+..GpFhsR....llN.S..pFFTEWFoPYKhPpFNVhTAFaAIhLl..uLluAhKDhpoRKp......... 0 2 5 5 +13961 PF14119 DUF4288 Domain of unknown function (DUF4288) Bateman A agb Jackhmmer:C7PM87 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 27.00 27.00 27.60 27.00 24.70 24.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.91 0.72 -3.86 22 131 2011-05-10 14:09:39 2011-05-10 15:09:39 1 2 128 0 20 109 2 90.90 45 74.05 CHANGED aYlsphlhchh..........t.psstpppshhcEphlLlcAcop....-cAac+upc.hut.ppcpsapN..p.sptlpachhsls-lh.l.h-pl-c.G....sElau ...............hYuVKLLFEol..........hspch-cs+coLFEES.IILVKAsSh....EEAHtLuEp.lAh.puEc.TYcN...hh..sEQlTWoFRKl...LcVFEL..spssFEo.G....pELYu.. 0 13 19 19 +13962 PF14120 YhzD YhzD-like protein Eberhardt R re3 Jackhmmer:C0H3Y1 Family The YhzD-like protein family includes the B. subtilis YhzD protein Swiss:C0H3Y1 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved GKL sequence motif. 27.00 27.00 31.30 73.80 23.10 17.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.89 0.72 -4.07 15 141 2011-05-10 14:12:53 2011-05-10 15:12:53 1 1 141 0 23 80 0 61.00 67 97.73 CHANGED Mp.sYhLTVF-psGEpLL-EpFEAusD-EAKchGcppLcE+shpc+THRhssu.GKLlLFHR MG.lYVLTVFEKDGoKhLDESFEAATEcEAKsKGEuILpEKGLaEKTHRCTSoAGKLVLFpR. 0 6 15 18 +13963 PF14121 DUF4289 Domain of unknown function (DUF4289) Bateman A agb Jackhmmer:C7PM99 Family This family of membrane bet-barrel proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 655 and 722 amino acids in length. Swiss:Q2S343 is identified by Gene3D as a membrane bound beta-barrel. 27.00 27.00 39.20 27.70 21.30 20.70 hmmbuild -o /dev/null HMM SEED 614 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -12.85 0.70 -6.10 62 213 2012-10-03 17:14:37 2011-05-10 15:18:18 1 3 201 0 58 222 301 617.50 27 91.63 CHANGED .slpsapl.s.hht-pphs..sssThpptapp.shhpt.phchhshuNlGpshpshhat.c...p.htshhhhcshchhhhps...............cclpaasshoPhTpLhYpssspp...tpphcuhauhNhsc+.......hshuhpachlhupGhYps........ptsuphN.hphhsuYhuc..RYphpstasspch.pspENGGl.ss-shhptspph.....................ppp......aps.splsspL..pps...pNp.cspchalsHcYplshpp.hsth......................................................................................................................shhHohpa-spphhapps.......t.h..psht..................h.ssshpDpTphhplpNphuls......asph.hh..utlpsahstchhp.Y.....thssh.......................hh.pphpppplslGGpl.t+p.uphhchpupu...c..htl.stshuphplpuphshsh...t-sh.pltushhh.pspsPsFhh.phapSpa..a.WpN......shsp.ppppltup..lshp+htsplpsshsp.........lcNYsYF.......................sspstspQpussIsl.LphplppcFp.hGhaph-NplhaQpsos..psl.........LslPplsshsslYhphpl...hp.KsLhlphGhsh+YFTp..YhAssYsPslupFhlQs.........phclGsaPllcsahNh+l+psRlFlthpHlNsuh.ss........sYFhsPpYPhpshhl.+FGlsWNFFs ...............................................................tlh.apl.p.hhsspphh...ssDThtpta.pp.shhpt.php..hs.huNlGsPh.shha.p+pp.......t.hhhh.psh...shhhhps...............schpahso...oPhTpLtYppuusp...ppphcuhauhNhs+c.......lshGhsachlhucGh...Yps..........ptouphN.hphauoYhuc..+Yp..hpshhsspph.cssENGGI.ss-phlspspph.................ppp...........ap.s..splsshL.sps..............tNp.cspphalspcYsLuhp+ph...s.t....tsth.........................................................................................................uhhHohph-pppppapst.....tpspsaatpshh.....................ssshpDpTphh..s.lpNphuls.........sFsph..sp....usLpuahsachhp.Y..ph.sh..............................h.pphsppplhlGGpl.sKptGphhchpspu..E...hsl....spshGphplcuphshsh.....t-ss.pltApu.h...psptPs....Fhh.ppapSpa.....ahW.cN......shspphpspltup.....ls..hp+h...tspLpsshps.........lcNYsYF..............................sppstspQpu.usIpl.lphplppcF...+.hGhap..h-Nplh..aQpoos..psl.....................LslPplsshsslYhphpl..h..KsLplphGs-s+YF.Tc..YhAs..sYsPslsp...FhlQs............phcl.Gs.Y...PllssasNh+l++.sRh.FlhhpHlNsuhhss.........sYFhsPcYPhssth.l.+hGlsWNFas.............................................. 0 25 52 58 +13964 PF14122 YokU YokU-like protein Eberhardt R re3 Jackhmmer:C0H434 Family The YokU-like protein family includes the B. subtilis YokU protein Swiss:C0H434 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two conserved CXXC sequence motifs. 27.00 27.00 28.10 28.00 26.60 26.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.05 0.72 -4.26 10 136 2011-05-10 14:25:58 2011-05-10 15:25:58 1 1 128 0 14 62 0 83.70 68 95.75 CHANGED sCcWCsppcAssspsoVYWELPDGT+AIEIs-TPuIsCSuCGMsYQ--sllcEIEDQLlLIDoKKLPcslTYcpLMsp-RlLKRNYF ........................MWCDSTEAKESLNTVYWELPDGTKAIEIQ-TPCISCSSCGMDYQuDpTVKEIEDQLFLIYTKDLPKQLTYEELMuRPRLLKRNYF................... 0 2 8 10 +13965 PF14123 DUF4290 Domain of unknown function (DUF4290) Bateman A agb Jackhmmer:C7PMG8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 200 and 221 amino acids in length. There are two conserved sequence motifs: EYGR and KLWD. 27.00 27.00 50.20 50.10 26.70 26.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.08 0.71 -5.08 48 201 2011-05-10 14:29:21 2011-05-10 15:29:21 1 1 197 0 55 186 178 174.90 47 82.69 CHANGED L-YNTpRp+LllPEYGRplQpMV-aslslcD+-ERs+sAcsIIslM.GphpPcL.RDssDapHKLWDpLhIMSsFcLDlDsPashsscEpLtp.+P-.lsYPpsph+aRaYGpsIpphI-pAhphEcG-c+-uLlhsIANpMK+salsWNK-oV-DchIhpcLt-LScG+lpLstss.cL ......h-YNTp+c+LhlPEYGRpIQpMVDaslolpD+cERp+sApoIIslM.Gsh.PHL.RD.VsDFpH.KLWDHLtIMSsFcLDlDhPY-l.spc.-s.L.ts.+P-.lsYPpsph+YRHYG+slcphIc+Ahchp-G-c+csLlthIANaMKKsalsWN.K-o.V-DcpIhcDLt-LSsGclpLstpt..h....................... 0 24 49 55 +13966 PF14124 DUF4291 Domain of unknown function (DUF4291) Bateman A agb Jackhmmer:C7PN25 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 190 and 214 amino acids in length. There are two conserved sequence motifs: VYQAY and RMTW. 27.00 27.00 49.30 38.30 21.00 19.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.15 0.71 -4.66 57 206 2011-05-10 14:31:31 2011-05-10 15:31:31 1 4 187 0 91 186 3 173.00 44 83.25 CHANGED +pIRA..ta.sscTIsVYQAYsspIAcsAlpst+Fs.sP.FphsRMTWIKPSFLWMMYRuGWupK.tsQERlLAlcIpRcGF.-hhLppusLSph..............................tsthh.sptpWppplppus......................VRlQWDPERslphpsL....saRSIQlGLsschlc.cYsc-WIluIpDlTshs+clcphlpsu.....phcpAttLLPt..EcsY ...................pIRA..pa.sppTITVYQAYsssIAssAlcsG+Fs.us.Fph..sRMTW.IKPSFLWMMYRuGWutK.tsQE+lLAlcIpRpuF.-hhLppAsLSph.................................................ps.plasp.tstWcpth...ppus......................VRVQWDPERsl.php..L....taRSlQlGlssphlp.pas--.WI..VuIpDlTshs+ch+pllpsG.....chppAtthlP.Ec.Y.................. 0 35 61 79 +13967 PF14125 DUF4292 Domain of unknown function (DUF4292) Bateman A agb Jackhmmer Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 243 and 287 amino acids in length. 27.00 27.00 27.20 27.30 26.40 25.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.09 0.70 -4.91 73 228 2011-05-10 14:33:12 2011-05-10 15:33:12 1 1 225 0 80 221 184 198.30 21 75.77 CHANGED ssphcslsuchplslp..tpppp.sssssl+hc+-ctIhlSs..shlG.hpVu+hhlTP-plhhhD+lpppYhpu..saspLp....chlshs.lsFp...pLpslLlGp..hhhshppphp......hphs......sp.th.lp.pp..........sthphthhhsspshplpphpltptss...ppplphpYtsapph.....stthhPtplplth.p......ttppsplslpasc...hsh.spshphsaslPspYcpl .................................................................................................................t...hpslsu+hplslp..sppppholsssl+hc+cchlhlol..shlt.hEluRh.lTPDplhhhD+hs+.pYhcu.......saspLp....plhshs.lsFp...pLQsLlhsp.......hhhstppphp.......................hp.t........tt.h.lp.pp.....................tthth.hhhsstphhlt..p.plt..t.st...t.tlphpYtsap.h.....stt.hPtphplth.p.......tpphplplphpp...hph...spsh...p...h.hphsptap................................... 0 33 61 76 +13968 PF14126 DUF4293 Domain of unknown function (DUF4293) Bateman A agb Jackhmmer:C7PP09 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 136 and 154 amino acids in length. 27.00 27.00 27.10 29.00 25.30 24.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.91 0.71 -4.28 71 211 2011-05-10 14:35:41 2011-05-10 15:35:41 1 2 209 0 68 197 86 143.70 32 97.01 CHANGED IQRIQTlYLLlssl.hssshh.hhPlhphssssthhh..t.h..h............................hhhslhhlsullulhuIFhaKpRtLQhpLsh...lshlLhl...hhhulhshhhhshtsph........hshphthuhhlPllullhhhLAt+uIp+DEcLV+usDRL ............................IQRIQTlYLLlssl.lhhshh..hhPlhphhss..shhh.....hthhsh.h....t.....................slhslhhlssllulhoIFhaKpRh........lQhplsh....hshlLhl....shhshhs.hhhhslpsph..........hshphuhuhhLPllullLhhLAh+uIt+DEtLV+usDRL...... 0 25 53 65 +13969 PF14127 DUF4294 Domain of unknown function (DUF4294) Bateman A agb Jackhmmer:C7PPB4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 192 and 226 amino acids in length. 27.00 27.00 83.40 82.90 23.60 23.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.85 0.71 -4.63 44 181 2011-05-10 14:42:30 2011-05-10 15:42:30 1 1 178 0 44 172 171 154.90 44 73.32 CHANGED GDoIs....hlpLspVhlhsph+.Fcsc....c-cppYhhLhRcV+KVhPhAKhsscplh-htchLpolss++s+c+ahKhlcKhlcccaosclKKLThoQGplLIKLlpRQTupToY-LlKshhuGa+AhaYpshAthFshSLKccYcP..stEDhLlEcllhps .......................tDoI...hlpL.spValas.hc.F+..sc....c-+pcYh+LlcsVKKVhPhAK.ssctllEh.caLpTlPsc+t+c+ahKpVEKtlccpYoschKKLohoQGKlLIKLIcRpospooY-LlKuahGsa+AsaYQshAtlFGsSLKccYDP..ps-DtLhEcllhh.s........ 0 15 38 44 +13970 PF14128 DUF4295 Domain of unknown function (DUF4295) Bateman A agb Jackhmmer:C7PPC2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are two completely conserved residues (K and Y) that may be functionally important. 27.00 27.00 27.20 32.00 26.00 26.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.29 0.72 -4.34 27 189 2011-05-10 14:44:08 2011-05-10 15:44:08 1 1 188 0 52 149 49 47.70 61 93.33 CHANGED AKKs.....VATLpput..uKphoKlIKhVKSsKTGAYsFcEchlss-pVp-al ..............AKKs.....VATL+pGp...G+saTKVIKMVKSPKTGAYsFcEpMVsNEpVpDFh....... 0 24 47 52 +13971 PF14129 DUF4296 Domain of unknown function (DUF4296) Bateman A agb Jackhmmer:C7PRC6 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. 27.00 27.00 28.00 29.60 26.30 26.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.82 0.72 -3.83 55 193 2011-05-10 14:45:37 2011-05-10 15:45:37 1 1 192 0 54 193 131 86.70 28 35.94 CHANGED PcslIscccMpslLaDhaLscuhtt.........tsppptstpthshpphlacKaslssspFspShsYYspp.scphpcIYcclpcRLppcppshsp ....................PpslIspscMcslLYDhHlupuhtt.........stshspshpphthh.ptla+KaslopspF-sShhaYscp.s-hhpcIYccVpcRLcscppth..t........ 0 23 48 54 +13972 PF14130 DUF4297 Domain of unknown function (DUF4297) Bateman A agb Jackhmmer:C7PUH4 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is typically between 207 and 221 amino acids in length. 27.00 27.00 27.10 27.10 26.20 26.20 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.55 0.70 -4.39 34 146 2011-05-10 14:49:40 2011-05-10 15:49:40 1 6 139 0 34 124 9 198.00 20 46.26 CHANGED .pEpuGstutptFcaQhpaAlhpllchhpsps.shtlhhEh+-Dlslhps.t.t.sph..-ahQVKTpcss...pWohssLsch...............................pppp.............Shlu+Lhpp..pppas.stssplthVostsh...shslcshphshppt.......splpsphtpplp...pp...lpsphshsp..h.hstlhhl..hssls.Lcshppplhuplsphl..chhsp.stp.spslhctlhcplcc+us .....................................p-puGstuhptFpaQhphAlhphhphh..p......p..ps..satlhhE...........hc...........-.Dlsltps......sph......sFhQVKspp.......pt....saoh..ps...lspp.......................................pptpt..............Shlu+Lhpp......ptpht...ptsp......c..lthlospsh........shs.cp..phphtsh.........tplppp.tpplp...pp.....................ltsphs.t...h....phlhhh....hssls..lpsh.tp..hhuphsp.h..phh.p...p.spslhp.lhpphp.+u........................................................................................................................................................... 0 13 24 29 +13973 PF14131 DUF4298 Domain of unknown function (DUF4298) Bateman A agb Jackhmmer:C2M2S9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 94 and 105 amino acids in length. There are two completely conserved residues (Y and D) that may be functionally important. 27.00 27.00 27.20 29.30 26.80 26.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.04 0.72 -4.11 25 275 2011-05-10 14:52:29 2011-05-10 15:52:29 1 1 267 0 31 183 1 85.70 29 84.60 CHANGED +IpchpctaschpchlscLpcsl-papcshpchppLpsYY.oppWhcDhps.pps...phss.chptuVLSEDulashhu-phpLAhphLclus ..............+IpcMpphhpchsphLspLpcslpphpct..pphtpLcsaY.op-ahcshct.pps...........phss.phstuVLSEDulashhs-pppLAhphLcl....... 0 10 18 27 +13974 PF14132 DUF4299 Domain of unknown function (DUF4299) Bateman A agb Jackhmmer:C2M3B8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 275 and 313 amino acids in length. There are two conserved sequence motifs: RGF and DAY. There are two completely conserved residues (P and D) that may be functionally important. 27.00 27.00 28.80 28.70 25.70 25.20 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.90 0.70 -5.28 20 348 2011-05-10 14:56:01 2011-05-10 15:56:01 1 1 303 0 17 200 0 267.10 50 97.31 CHANGED SloFaIKNK+p.hh.uhpclhoscclLsLsct..Lppauh-tsppphshpchhtt.ltp.t....sllhGspspSuRGFELuYsccppsY.sVRlhTPSopsDWplALpalpsLupph.sscIhsE.pGcpaoscsIppFDYcsDIhhGlcsl.tplpscpttsh....haGlpRPlshscchl-cIhsupss..lcpFuchlcch..QaLDAY.A+QpFacspsssc..IhGhYsLopslcTILPac..P...V-hpshphlpsc-luhWplsllshsu-csc..ptYphluplcYccFlcpLPc-KYcalDAsYIhVcsLo+-Elpcl ...........................................t.hTFaIsNKpS.lL.GpQclLsAKSILuLlDG...LESHSYDsshLRQsLN.RLp............hIcCulsGpSphhF+VSYsDup.KuY.pVclPDhhT+sDWpIlhsFLcALhuhh.Go-I...........EGL-sF..DFEAaFpuuIpsaLuD.sA+h.shCp......GIhsPlaFS+EpLcuFLcuDu...LApFEspVRsl..QpoDA..YFA+spFYpDu-.GK..VaGlYaLAQGV+TVLP+E..P....aVPss..YlEQLs-+EVp...W-IcLVpIoGDusK.PEsYEuIARLDYscFLEsLP.saY+pLDA.sQIcVQPIhsp-hcsL....... 0 4 6 11 +13975 PF14133 DUF4300 Domain of unknown function (DUF4300) Bateman A agb Jackhmmer:C2M4P8 Family This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 281 and 303 amino acids in length. There are two conserved sequence motifs: NCR and PYQ. 27.00 27.00 54.40 54.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.64 0.70 -5.11 36 333 2011-05-10 16:08:49 2011-05-10 17:08:49 1 1 320 0 24 172 0 241.20 60 83.36 CHANGED hsYSNLssppSpcEVpshL......sAslsppslcpFhphVs-YNs.sl.....ppstlpssFssh...tps-YDh.t..IpchWpc+pss.Fh.GsNCRIouFsLhKshIpsssstp.......ssph....LFhDt-uIcss..............plhspp-cpcFpsLFopl.Tc.....sTpDlc.hHtpphpctaKphtlpF....tss+hpllSVhlHsp...-sshLFlGHsGVLlsscsG.aLFlEKluFppPYQAlKFss+p-lpc.YLhs+Ycs..htspsp.A+PFIM-NDchl ...................................u.SYoNLNspsSsEEVKSLL......SAHLDssSVDuFFNLVNDYNs.lV.....GSTGLoGDFToF...T+TEYD.VEK..IScLWspKKGD.FV.GTNCRINSYsLLKNSlTIPKlEK.........sDpL..LFlDNDAIDKG..............KlFDupDKE-FDILFSR.VPTE.....ATT.D.VK.VHA-KMEsaFSQ..FpF.....NEKARMLSVVLHD...NLDG-aLFVGHVGVLVPsDDG.aLFVEKLTFEEPYQAIKFASKEDCYK.YLuTKYsD..YTGEGL.AKPFIMDNDKWV............. 0 8 14 19 +13976 PF14134 DUF4301 Domain of unknown function (DUF4301) Bateman A agb Jackhmmer:C2M610 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 505 and 516 amino acids in length. 27.00 27.00 84.20 83.60 21.10 18.40 hmmbuild -o /dev/null HMM SEED 513 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.69 0.70 -5.95 43 177 2011-05-10 16:11:31 2011-05-10 17:11:31 1 2 172 0 47 182 62 495.50 53 98.03 CHANGED hoppDlpQlcp.......+Glo.cplppQlchFccGhPhlpL.puAolscGIhtlsscEpcphlsha-p..tppstcllKFVPASGAAoRMFKsLapFL.............su.hscppssthppFFsslccFsFYccL.ppslpspt.slssLhsstchtt.llctLLstcGLNYGshPKGLL.FHpYs.-ts.pTP.....hEEHLsEuAhYAsss.upsclHFTVSp-HhctFcpplschtsphEpchsspacloFShQ+sSTDTlAVsh-.NpPFRsc-GpLLFRPGGHGALI-NLN-lDADllFIKNIDNVV....s-ph+pcTlpYKKlLAGlLlplQc+sFpYLchL-ps.phscpplpElhpFlpccLshph....s.phpphsppphhpaLpp+LNRPlRVCGMVKNpGEPGGGPFWltstDGslSLQIlESuQIDhssscptphhppuTHFNPVDLVCul+sY+GcpFcLhcaVDppoGFIopKS+sG+pLKALELPGLWNGAMAcWNTlFVEVPltTFNPVKTVNDLL+spHQ ...........h.oppDhc.ltp.......+GIopcplpcQLppFppGFPaLcLpuAAol.p.c.GIhshspcEpcpalstW-s.hppss+plVKFVPASGAASRMFKsLFpFL.............sAth..spPsscFccpFFssIccFAFYccLstsChc.spspslssL.htpspY.KslVssLLpspGLNYGsLPKGLLhFH+Ys.-.ss.RTP.....hEEHLsEuAhYAssp.GcsplHFTVSpEHppLFcphlscphstapc+auVcaploFSpQKPSTDTIAssh-.NpPFRstsGpLLFRPGGHGALIENLNDlDADllFIKNIDNVV....PD+LKs-TlpYKKllAGlLlsLQcpsFpYLchL-su.cho+ppltEllpFlppcLss+p....s.-hcpLp-spLshYL+pKLNRPhRVCGMVKNsGEPGGGPFhshNsDGolSLQILESSQIDhssscppchFcpuTHFNPVDLVCul+DYKGc+FcLscYVDcsTGFIS.KSKsG+-LKALELPGLWNGAMScWNTlFVEVPLsTFNPVKTVNDLLRppHQ.. 0 19 41 47 +13977 PF14135 DUF4302 Domain of unknown function (DUF4302) Bateman A agb Jackhmmer:C2M6D7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 344 and 443 amino acids in length. There are two completely conserved residues (R and L) that may be functionally important. 27.00 27.00 40.70 33.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.64 0.70 -4.99 51 186 2011-05-10 16:14:32 2011-05-10 17:14:32 1 2 93 0 30 178 0 240.70 24 56.39 CHANGED -p-sl.F-c..osupRlspslpphpclL...uutsG.WhhpYYssps..........................aGG.ashhhKFsssp.Vshtu-hs.........sssppssShYplppspu.shLoFDTYN.h.....lHhhupPsssts........pGhpGDa..EFll.....hpsss-..plhlc.......G++stNphhhs....htpstshppahsphppsppthp....hstaphhh...............sspphshthpspppthhhs..hsssp......hphsahhTspG.l.phhpPl....plsGhp.hpphsaspssp ............p-sl.F-c..osupRhppslpchpchL..psuspG.WhhpYasp.ss..........................hGG.ashhhKFsssp.Vshtu-hs..........ssssthsSpYplp.pspu.shLoFsTYN..h.....lHhhusPpttts............tGhtuDa..EFll.....hssss-..plhlc.......G+Kptsphhhs....htpshshcph...hpp.ht.s.hppshp......hhhhphhh.................sspths.hhh.ps.p..t.pthhhs.......hssss.............hshsahhTp...pG....l.p.........hh.pPl.......plsGhp...hppasaspt..p................ 0 20 27 30 +13978 PF14136 DUF4303 Domain of unknown function (DUF4303) Bateman A agb Jackhmmer:C2M6J2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 169 and 192 amino acids in length. 21.60 21.60 21.80 21.60 21.30 20.90 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.89 0.71 -4.58 38 181 2011-05-10 16:15:50 2011-05-10 17:15:50 1 3 156 0 39 155 8 149.60 23 79.57 CHANGED ltpAs+psapplhp..cpss-shYuauLhosspuhs..lssuAsoc-sLptttp........p..pst..h.........................+Wsss-Wsa...t.sttphFsplsphltphscphp.p.t...........apphhptlhpshlsuLppLcpcGlFust...tph.hlhlslsss-sspt.....ppsctLNss .....................................................lhpuhpptapplhp..cp.ss-shYua...u...l...h.....oDsssts...lsssANocchlph.pts.........cs...shh...........................+auss.EWsh.......hssht...hFscl...schlpp.h.pphhpps.................aphhhp..plhpthlpsLhcLcp-Gl.Fust....sc..hlhlslsDsssp.hh....ppsphLNs.h.............................................................................. 0 6 20 27 +13979 PF14137 DUF4304 Domain of unknown function (DUF4304) Bateman A agb Jackhmmer:C2M6R4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 223 amino acids in length. 22.50 22.50 22.70 22.50 21.70 21.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.55 0.71 -3.98 48 164 2011-05-10 16:19:06 2011-05-10 17:19:06 1 1 144 0 38 153 4 111.30 17 58.01 CHANGED lsshLKptGFpK.pshsaa+.pppphhtllshQ+sph.........thpFhlNlGlhshthsp................................ph.pthshthpt...Rluslhspt..............s.haplcspp.shp.thhppltp.......tlpphllsah ...................tthL+.hGFpp..pt.s..ah+.pppshhhhlshQ+Sph.................sspFhls..lGlhshtltp..............................................................tt..pp.s..hhhts......+lstlh.pp................hapl..stt....sht.hhtpl.t........lpt.hh.......................................................... 0 13 27 31 +13980 PF14138 COX16 Cytochrome c oxidase assembly protein COX16 Bateman A agb Jackhmmer:Q5ACH7 Family This family represents homologues of COX16 [1] which has been shown to be involved in assembly of cytochrome oxidase [2]. Protein in this family are typically between 106 and 134 amino acids in length. 27.00 27.00 27.70 27.70 25.60 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.58 0.72 -3.81 52 253 2011-05-10 16:50:56 2011-05-10 17:50:56 1 11 223 0 174 242 0 79.60 35 58.69 CHANGED lhaGLPFlhhlVuGSasL..pphoplRY-hpcpKspph.sppEthsht..............pp++cl..slc-E.Yh+l...psh.sh-.........sWE.hRl.R.htE ................lhaGlPhlhllVuGSFsL..ppho.tlRY-ttcpKscp..spc-ththt...............................tpp+cl.....slc-E.Yh............+l....pst..sh-.............................sWEphRl.R.ht-................. 0 47 90 140 +13981 PF14139 YpzG YpzG-like protein Eberhardt R re3 Jackhmmer:C0H444 Family The YpzG-like protein family includes the B. subtilis YpzG protein Swiss:C0H444 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a conserved QVNG sequence motif. 27.00 27.00 32.20 38.50 17.40 16.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.56 0.72 -4.03 8 104 2011-05-11 07:19:52 2011-05-11 08:19:52 1 1 104 0 17 34 0 49.80 68 98.29 CHANGED MGp..p+cpaDp...p.YSsPFspPWsNPKHA+uQVNGcTQQoQsLIILcspsRK+p .......MS...YRDRLDs.....RSELFNHTWTRPKHAKAQVNGQTQQTQSLIILANECKKRQ. 0 3 11 12 +13982 PF14140 YpzI YpzI-like protein Eberhardt R re3 Jackhmmer:C0H446 Family The YpzI-like protein family includes the B. subtilis YpzI protein Swiss:C0H446 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 27.00 27.00 32.40 58.00 26.20 16.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -8.08 0.72 -4.46 8 121 2011-05-11 07:34:23 2011-05-11 08:34:23 1 1 121 0 13 41 0 41.90 80 91.57 CHANGED MGKDRQEKKLKtS+RVESDRDQSLpYsGATpL-oPEcARKRN MGKDRQERKLRESRRVESDRDQSLQYPGATuLDTPEQARKQN.. 0 1 8 10 +13983 PF14141 YqzM YqzM-like protein Eberhardt R re3 Jackhmmer:C0H453 Family The YqzM-like protein family includes the B. subtilis YqzM protein Swiss:C0H453 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 27.00 27.00 33.70 33.60 26.70 25.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.09 0.72 -4.17 11 134 2011-05-11 07:51:06 2011-05-11 08:51:06 1 1 134 0 22 49 0 42.90 79 95.72 CHANGED NcFE.KDVQsKRNDAlDSuVGFlVSFGFFsslFlIAslIcFlGp ...NDFE.QNVQSKRNDAIDSGVGFIVSFGFFATLFIIATVIKFIGS... 0 6 15 18 +13984 PF14142 YrzO YrzO-like protein Eberhardt R re3 Jackhmmer:C0H458 Family The YrzO-like protein family includes the B. subtilis YrzO protein Swiss:C0H458 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 100.90 100.80 23.70 23.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.23 0.72 -4.32 2 83 2011-05-11 07:57:22 2011-05-11 08:57:22 1 1 82 0 4 18 0 46.00 90 99.79 CHANGED MhEuLLFFhusGlsCELAAINRNGRKpIKQQAEhIQlLKE.h.KsI MLESLLFFFAVGVACELAAINRNGRKKIKQQAEhIQLLKELKER....... 0 1 2 2 +13985 PF14143 YrhC YrhC-like protein Eberhardt R re3 Jackhmmer:O05395 Family The YrhC-like protein family includes the B. subtilis YrhC protein Swiss:O05395 which is functionally uncharacterised. YrhC is on the same operon as the MccA and MccB genes, which are involved in the conversion of methionine to cysteine. Expression of this operon is repressed in the presence of sulphate or cysteine [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 35.80 35.70 25.40 21.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.36 0.72 -4.03 17 134 2011-05-11 08:20:14 2011-05-11 09:20:14 1 1 132 0 19 88 0 71.80 56 89.73 CHANGED pcLcsKhtDYKpFuhlLLAlSsFLYlGslIP.tuhpsspp.hhhsulslhLshuhhhh+RuhhhpcpLcEt- ...KELppKIEDYoRFGQlLLAVSThLMlGLLIPsGuKEThQhFlMMGoIVIFLuLSFFFFpRVKlhRccLEEsE...... 0 2 11 13 +13986 PF14144 DOG1 Seed dormancy control Coggill P pcc Jackhmmer:Q9SN45 Family This family of plant proteins appears to be a highly specific controller seed dormancy. 27.00 27.00 28.30 27.60 26.40 26.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.77 0.72 -3.94 81 488 2011-05-11 10:20:54 2011-05-11 11:20:54 1 7 52 0 217 469 0 77.90 41 23.87 CHANGED +plsEL....Rs.ALpup.h...........sDs-L+hlV-sshsHY.pclachKusA...A+sDVF...al.lSGhWpoPsER.sFLWlGGFRPS-Ll+lLhsp .............................+phsELRsAlpu+h...........sDs-LRh.lV-sshsHY.sclF.chKusA...A+sDVF...al.loGhWpoPuER.sFlWlGGFRPS-ll+llhs....... 0 28 122 175 +13987 PF14145 YrhK YrhK-like protein Eberhardt R re3 Jackhmmer:O05401 Family The YrhK-like protein family includes the B. subtilis YrhK protein Swiss:O05401 which is functionally uncharacterised. Its expression is under the control of the motility sigma factor sigma-D [1]. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.07 0.72 -4.30 57 199 2011-05-11 10:48:16 2011-05-11 11:48:16 1 4 174 0 77 175 13 60.00 30 54.02 CHANGED hppY-hhth......................lsDhhsulhFllGSlhFhhs.s...ht............hsusWlFllGSlhhhl+Psl+ll+cl+ ..................p.ppYchlth............hsDhlhulhFllGSlhFhhc.t...ht........................phGsahFllGSlhhhl+Phl+llpph.h............ 1 33 54 71 +13988 PF14146 DUF4305 Domain of unknown function (DUF4305) Eberhardt R re3 Jackhmmer:O05524 Family This family includes the B. subtilis YdiK protein Swiss:O05524, which is functionally uncharacterised. This is not a homologue of E. coli YdiK, which belongs to Pfam:PF01594. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 27.80 27.80 26.90 26.70 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.61 0.72 -4.21 20 171 2011-05-11 11:57:29 2011-05-11 12:57:29 1 1 170 0 24 78 0 37.10 43 60.72 CHANGED llFsYhAlpsss-ss..WshaThllhlhAshDFshul+hl .hlFTahAlssVsDsh..WshaTILhhlMAshDFslulRLI. 0 6 15 18 +13989 PF14147 Spore_YhaL Sporulation protein YhaL Eberhardt R re3 Jackhmmer:O07520 Family This family of proteins is involved in sporulation. In B. subtilis its expression is regulated by the early mother-cell-specific transcription factor sigma-E [1]. 27.00 27.00 52.80 52.70 25.70 21.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.60 0.72 -4.52 10 131 2011-05-11 12:12:26 2011-05-11 13:12:26 1 1 131 0 20 58 0 49.50 70 76.14 CHANGED hPWWVYhlIlGIlFSuYMsl+ouKEE+EhDQcaIE+EGclYMcRlEcERE+R ....hPWWVYLVIlGIlhSGYMVLYTSKKEQ-MDNEFIEKEGEVYMKRLcEEREKR.. 0 3 12 14 +13990 PF14148 YhdB YhdB-like protein Bateman A re3 Jackhmmer:O07530 Family The YhdB-like protein family includes the B. subtilis YhdB protein Swiss:O07530, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 57 and 82 amino acids in length. There are two conserved sequence motifs: LMVRT and FLHAY. 27.00 27.00 74.10 74.00 22.50 21.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.76 0.72 -4.23 6 132 2011-05-11 12:24:37 2011-05-11 13:24:37 1 1 131 0 16 43 0 69.50 71 93.88 CHANGED Ms.hsDYDKALYYTaRSpWDsLLILMVRTKDDLLSKRIE+FLHAYpFp+Das.V-KpL.sLLpYIDHA..hoshpppEt ...p.h.DYD+ALYYTapspWDpLLlLMVQTsDQLFSKRIEHFLHAYQYSKELPEVDKQLQLLFQYIDHASQKSHlEElE.Q....... 0 1 8 10 +13991 PF14149 YhfH YhfH-like protein Eberhardt R re3 Jackhmmer:O07606 Family The YhfH-like protein family includes the B. subtilis YhfH protein Swiss:O07606, which is functionally uncharacterised. Its expression is repressed by the Spx paralogue MgsR, which regulates genes involved in stress response [1]. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 53 amino acids in length. 27.00 27.00 27.00 30.00 26.90 25.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -7.99 0.72 -4.16 17 140 2011-05-11 12:36:23 2011-05-11 13:36:23 1 1 127 0 25 77 0 36.90 66 75.72 CHANGED lpphsEFFRNLPsKpCscCGcc.I-EQpEsYtspC-cC ............-pshEFFRNLPoKsCAcCGKE.IDEQHEuYpNcCDDC 0 8 19 22 +13992 PF14150 YesK YesK-like protein Eberhardt R re3 Jackhmmer:O31514 Family The YesK-like protein family includes the B. subtilis YesK protein Swiss:O31514, which is functionally uncharacterised. Its expression is regulated by the sporulation-specific sigma factor sigma-E [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 25.00 25.00 26.60 26.20 24.40 24.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.69 0.72 -3.88 10 127 2011-05-11 12:48:56 2011-05-11 13:48:56 1 1 108 0 12 74 0 76.40 57 81.94 CHANGED FallGhlohlllFslShll+++aPsKp.h-hlLuhlLIllslhslhlSlFllGGWEGhGlGlluhhlllGolIGhIActhl+ ........FYIIGGlTIlLVFAIS..YLLKKRFPDKQ..FDIIFALuLILLCLAoFPVTMhlIGGWEGMGYGFIGFFVLLGTLIGMIAHQLlK........ 0 1 8 8 +13993 PF14151 YfhD YfhD-like protein Eberhardt R re3 Jackhmmer:O31572 Family The YfhD-like protein family includes the B. subtilis YfhD protein Swiss:O31572, which is functionally uncharacterised. Its expression is regulated by the sporulation-specific sigma factor sigma-F [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 27.00 38.20 26.50 26.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.11 0.72 -3.86 16 145 2011-05-11 12:59:37 2011-05-11 13:59:37 1 1 141 0 27 79 0 54.30 55 99.03 CHANGED MsRsptpKs+cK.NctphsQsPct....uDuhDVEFSpElADp-DhEApsRupAADtRt........pppcp ........................................scpph.p.sKs......oDGlDVEFS+ELADHsDLEApARAsAADsRQK...p....sp........ 0 4 17 19 +13994 PF14152 YfhE YfhE-like protein Bateman A re3 Jackhmmer:O31573 Family The YfhE-like protein family includes the B. subtilis YfhE protein Swiss:O31573, which is functionally uncharacterised. Its expression may be regulated by the sigma factor sigma-B, which regulates the expression of stress-response proteins [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved QEV sequence motif. 27.00 27.00 36.90 36.50 19.80 18.50 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.75 0.72 -4.05 13 126 2011-05-11 13:10:06 2011-05-11 14:10:06 1 1 124 0 15 42 0 36.60 66 92.22 CHANGED -.....K++pccs+soLopsQEVpYup-FKtAD+A..u..pt++ ............DKKKRDKsKNsLSSTQEVLYQREF+KADRA..AGYRuKS......... 0 2 8 10 +13995 PF14153 Spore_coat_CotO Spore coat protein CotO Eberhardt R re3 Jackhmmer:O31622 Family Bacillus spores are protected by a protein shell consisting of over 50 different polypeptides, known as the coat. This family of proteins has an important morphogenetic role in coat assembly, it is involved in the assembly of at least 5 different coat proteins including CotB, CotG, CotS, CotSA and CotW. It is likely to act at a late stage of coat assembly [1]. 35.00 35.00 43.30 40.20 34.00 33.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.20 0.71 -4.85 16 142 2011-05-11 14:03:18 2011-05-11 15:03:18 1 2 126 0 22 123 0 159.60 42 98.69 CHANGED s+cph.....scpcPLLYIsQPchpc.spspMQcsaht+pcppp..................ttppptppp...................p........p..t....t...p..p..t..tppttppp..........................................................h.hcKsFpcMsl-EKIsFLsphPcplPslpCplhocpcoYcGllhshcss...plhltss......tpt..spsplsh-cIhSlphlGF ...............................................ps..Ks.p..sssKPLLYIsQssh-h.ussphpcIllsphcscs....................pcEppscscph......t...pth.E.ppppt..............pcp.tp..p..tp.p.p..ppppc.s.......................................................psh.hpKsF+-Ms.-EKIcFLhshPHalP+l+CcIcTsshoYhGsIluhRNG...hVsIhss......ssh..c-hcLuI--IpSIsMhGF........ 0 1 11 14 +13996 PF14154 DUF4306 Domain of unknown function (DUF4306) Eberhardt R re3 Jackhmmer:O31651 Family This family includes the B. subtilis YjdJ protein Swiss:O05524, which is functionally uncharacterised. This is not a homologue of E. coli YjdJ, which belongs to Pfam:PF00583. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 152 amino acids in length. 27.00 27.00 44.90 44.30 21.40 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.90 0.72 -4.01 8 123 2011-05-11 14:12:19 2011-05-11 15:12:19 1 2 99 0 11 90 0 87.50 47 78.91 CHANGED hlQaGhuhhlFlFSAlsoWYpGSpLlsssa-WKaoshFophh.G..slss.cpISQLDaFlYAAKapPshsslMllShlYlLsLlhhhlhp .....hlQhu.uhhlFlhSAL.ho.WYQGSsLl-sP.EWKYoAKFTshhp.G..TVoshcDIYQIDFFlYAAKFYPsshIVMllSlLYhLlLIlahlh......... 0 1 5 7 +13997 PF14155 DUF4307 Domain of unknown function (DUF4307) Bateman A agb Jackhmmer:C7PZ49 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 132 and 153 amino acids in length. There is a single completely conserved residue C that may be functionally important. 27.00 27.00 38.70 52.60 25.50 23.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.34 0.71 -4.28 57 347 2011-05-11 14:48:55 2011-05-11 15:48:55 1 1 343 0 86 210 61 112.00 31 78.09 CHANGED pscpthhslsslhslsuhshhuh..hsapphusssl..suphluacllsDs.psplphpVs+s.s..upsuhChVcAhspctuEVGRc-lhl....ss......ssppphchssslcTsp.ussuclh.GC ......tcthhhlll.hVLsllhhshhsh...huhp.h.us..ssl..puplhGaphl...sD.s.pssVTlpVpRsDP..ShsusChVpAput-tuEVGR+-lhl...Ps..........ussps.plpssV+Tpp.uVsu-lhuC... 0 26 65 82 +13998 PF14156 AbbA_antirepres Antirepressor AbbA Eberhardt R re3 Jackhmmer:O31697 Family This family inactivates the repressor AbrB, which represses genes switched on during the transition from the exponential to the stationary phase of growth. It binds to AbrB and prevents it from binding to DNA [1]. 27.00 27.00 54.50 54.40 23.20 21.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.05 0.72 -4.32 6 115 2011-05-11 14:58:50 2011-05-11 15:58:50 1 1 115 0 12 35 0 61.00 70 96.46 CHANGED Mp+cls...LTpEEppLLLDILFpQpYAhElLusELsDIEsGhKpsDhppY++lsRLasRL+sE .Mp+EhV...LTcEEESLLLDILFQQNYASEILAVELTDIENGLKpTDVhQYKKITRLFYRLKNK. 0 1 4 6 +13999 PF14157 YmzC YmzC-like protein Eberhardt R re3 Jackhmmer:O31797 Family The YmzC-like protein family includes the B. subtilis YmzC protein Swiss:O31797, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 91 amino acids in length. There is a conserved ELR sequence motif. 27.00 27.00 35.10 34.60 25.60 20.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -8.97 0.72 -4.15 7 102 2011-05-11 15:18:03 2011-05-11 16:18:03 1 1 94 6 6 68 0 58.60 57 69.31 CHANGED hNpEpllEh.ssas.....os.psppssMsQltcNpFAlh.......-csplKIa+as.cTNclpLlKEass-E ......spEpsVEh.ssas.....Ss..hsTssMTQlu-NTFAhp......sEsspIKIFKFNPDTNEIKLIKEFhusE... 0 1 3 3 +14000 PF14158 YndJ YndJ-like protein Eberhardt R re3 Jackhmmer:O31813 Family The YndJ-like protein family includes the B. subtilis YndJ protein Swiss:O31813, which is functionally uncharacterised. This family is found in bacteria and archaea, and is typically between 222 and 269 amino acids in length. There are two completely conserved G residues that may be functionally important. 27.00 27.00 42.30 41.70 23.40 23.30 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.69 0.70 -5.33 14 139 2011-05-11 15:24:24 2011-05-11 16:24:24 1 2 133 0 21 118 0 248.50 55 50.67 CHANGED shssspthlhLusLhhlPhshphh.pts......h..shhh+....h.PluAlsAshuhshs.....tshhAssWhshsshhAlhussRl.......hpRshc....hpElulsuullYlssGuhWhhstsuslslhtFu..IlhLTAsHFHauuFshPlhsGLLGRt.t.......tct.hlaphhshhIhluPhh.lAlGIshSchh-hhushlhssAlhuhuhhshtps......hcspsuthLltluuhsLhholshuhhYuhGphhupshl.sIspMlhhHGssNAhuV ...........slsslEAIlLLSlLLFlPhohsllDKcs....RsGu.llFYK.VShLYPIAAIsAhLAFVTs.......thhFAllWFlYTGl.....lAL...FGlsRL.......................LERGh+P.................LEEsAIDSAFIYLFLGGFWFFASVA+lo.IMpFSsDIlLLTAAHFHYSAFLLPLSAGLlG.RK+c............KpS..KlYcsIhalIhISPMT.VAIGITYSRl.....FEFFAVhLYLsAIYuYuh.YVW+s+......FsuloAKILLllSSoTLMlTIhFSLIYSYGNh+pVhTI.TIAQMVWIHGVVNGlGV....... 0 7 14 18 +14001 PF14159 CAAD DUF4308; CAAD domains of cyanobacterial aminoacyl-tRNA synthetase Bateman A agb Jackhmmer:B1XK71 Domain This domain is present in aminoacyl-tRNA synthetases (aaRSs), enzymes that couple tRNAs to their cognate amino acids [1]. aaRSs from cyanobacteria containing the CAAD (for cyanobacterial aminoacyl-tRNA synthetases appended domain) protein domains are localised in the thylakoid membrane. The domain bears two putative transmembrane helices and is present in glutamyl-, isoleucyl-, leucyl-, and valyl-tRNA synthetases, the latter of which has probably recruited the domain more than once during evolution. 27.00 27.00 27.70 27.70 25.70 26.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.78 0.72 -4.34 96 289 2011-05-11 16:33:04 2011-05-11 17:33:04 1 7 100 0 141 276 113 86.30 31 38.78 CHANGED lspl.pthsph..sphphshhshuhhhh.llslhlssullsAIsplPLlsslh....ELlGluYosWFsaRhLlhppsRp-Lhpclp..................shcpp.....lhG ........................h..pl.phhsph..sphphhhhssuh....llulhlssullsAIsplPLlPsl.h....ELVGlG..YouWFsaRaLlhppsRc-Lhsclp...............sl+pplhG........................ 0 22 85 123 +14002 PF14160 FAM110_C Centrosome-associated C terminus Coggill P pcc Jackhmmer:Q8TC76 Family This is the C-terminus of a family of proteins that colocalise with the centrosome/microtubule organisation centre in interphase and at the spindle poles in mitosis. 21.00 21.00 22.10 23.30 20.00 17.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.67 0.71 -3.13 16 181 2011-05-11 17:01:40 2011-05-11 18:01:40 1 2 70 0 114 169 0 111.10 43 28.84 CHANGED p+.psuLpRSKS..DlSc.RaStutu-lE+FFsaCGLDsp.l-tLGh.-shtpss....S..DhsSl.phcSsossuS-.s......tpopcSs.-.sh.............pE-tts-RlP.GlSlIERNARVIKWLYup+pA+ .................................s.psuLpRSKS..DLSs.Rauc.....shu-lERFFsaCGLDPEtl-sLG.h...EpF.upus...................S..DhsSl....shpSsossoS-.s..............tpSpc.Ss..s.sh........................................t--...pst-RV.....P....u.lSllERNARlIKWLYuh+pA+.......... 1 14 22 61 +14003 PF14161 FAM110_N Centrosome-associated N terminus Coggill P pcc Jackhmmer:Q8TC76 Family This is the N-terminus of a family of proteins that colocalise with the centrosome/microtubule organisation centre in interphase and at the spindle poles in mitosis. 22.20 22.20 22.20 22.20 21.60 21.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.39 0.72 -3.79 5 153 2011-05-11 17:02:04 2011-05-11 18:02:04 1 2 49 0 93 142 0 103.50 38 31.34 CHANGED s.uK....PluPAGPh.ouAVPLRILNKGP-YF.R.RQA..EssP+RhSAVERLEADKAKYVKSQEVINAKQEPVK..PPVLt.KPhsSPu.....PKRuu....uoP....ohKApss.puKo-SG.u......+RsNLcL .....................................hs...ssshP.hRlhsKsP...s..Yh..R...R.s...-..s...s..s..+..+.....hSAVERLEADKAKYVKSppVlss+QE.P.Vp.....sslht..pP..hsss......spt.sh.........tP.....sh+.......................................................................................... 0 8 16 47 +14004 PF14162 YozD YozD-like protein Eberhardt R re3 Jackhmmer:O31863 Family The YozD-like protein family includes the B. subtilis YozD protein Swiss:O31863, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 92.00 91.90 23.60 17.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.75 0.72 -3.89 6 127 2011-05-12 07:20:28 2011-05-12 08:20:28 1 1 127 0 15 38 0 56.90 81 96.98 CHANGED MKEIEVVIDTEEIAEFFYpELlRRGYVPoE-ElEElADITF-YLlEKChIDEEh--- .MKEIEVVIDTEEIAEFFYEQLIERGYVPKREEIEDLADITFEYLLEKCMIDEVFDEE.. 0 1 7 9 +14005 PF14163 SieB Superinfection exclusion protein B Eberhardt R re3 Jackhmmer:O31930 Family This family includes superinfection exclusion proteins. These proteins prevent the growth of superinfecting phage which are insensitive to repression. It aborts lytic development of superinfecting phage [1-3]. 26.00 26.00 26.10 26.10 25.90 25.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.42 0.71 -4.64 34 252 2011-05-12 07:43:39 2011-05-12 08:43:39 1 1 226 0 34 153 4 143.90 25 82.81 CHANGED hhhhLslhsuhLL....hhPpshlphlslsphhsp...at.alGlshllusAallspllshh....hp.hhph......hpp++phctlccp.....lptLsspE+slLppahhps..pphlpLshssssVpsLhppsIlphhust........ssttphplshphpsahpcpltphspt .................hahllIhhh..hll....lhPsshhphlsltp..hh....t.ahahlllhslSallstslsph.......hcthh.t........pp+ptpcphtph.......hpsL..o.tE..pAlL.t.hltss...p.l....ph.pssPhshpLhc+Gllp+hsss........sspstahIs-papp.hhhthtsc...t....................................... 0 8 16 23 +14006 PF14164 YqzH YqzH-like protein Eberhardt R re3 Jackhmmer:O32014 Family The YqzH-like protein family includes the B. subtilis YqzH protein Swiss:O32014, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 27.80 40.00 23.90 20.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -9.00 0.72 -4.06 9 119 2011-05-12 07:53:05 2011-05-12 08:53:05 1 1 119 0 12 63 0 60.80 59 94.71 CHANGED MsEKhIcKhltpshcQYu.s.pshPlosp-hcpLhcpIppthspcs-hDlYEhlcDlVY-YlTu .MNEKLIEKMIIKSFQQY....QCsPlSpEDQEMLlKHIQslhHSNscIDlYEtlEDIVYDYVTG. 0 2 4 6 +14007 PF14165 YtzH YtzH-like protein Eberhardt R re3 Jackhmmer:O32066 Family The YtzH-like protein family includes the B. subtilis YtzH protein Swiss:O32066, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There is a conserved DIL sequence motif. 27.00 27.00 38.30 71.50 20.90 18.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.80 0.72 -3.75 14 132 2011-05-12 07:58:12 2011-05-12 08:58:12 1 1 132 0 21 51 0 86.70 69 94.45 CHANGED LsppHQLsLL+DILssHpsDCsGTVuEsEQlERLlpSLhsNssl.ssslKslLpcI....YsYuQsGh.utslssHIstppppLspWlssls ..INQQHQLEVLKDILlNHQSDCCGTVSECEQLERLIQSLLANDsI.SSDsKsMLNDV....YSYSQSGKSSSNLDNHISNNQEQLTQWIuGMD. 0 4 13 15 +14008 PF14166 YueH YueH-like protein Eberhardt R re3 Jackhmmer:O32093 Family The YueH-like protein family includes the B. subtilis YueH protein Swiss:O32093, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 38.30 38.00 26.70 23.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.72 0.72 -4.22 17 252 2011-05-12 08:07:03 2011-05-12 09:07:03 1 1 252 0 16 76 0 75.00 57 93.12 CHANGED KI...chhptps.hhsVYlaEscccp.hllAIPslpWShplshp.-ppplt-cLhhpLhphh-EppAppLAsplspWlpc ......hl....NKRhlDEGKTIDVYLFEulNsQ.IIIAIPDWFWSYQMAMTL....DEETCFEAILMQLF..VFKEEEEAESIASQLTDWIET... 0 2 5 12 +14009 PF14167 YfkD YfkD-like protein Eberhardt R re3 Jackhmmer:O34579 Family The YfkD-like protein family includes the B. subtilis YfkD protein Swiss:O34579, which is functionally uncharacterised. Its expression is regulated by the sigma factor sigma-B, which regulates the expression of stress-response proteins, and by the forespore-specific sigma factor sigma-G [1,2]. This family of proteins is found in bacteria. Proteins in this family are typically between 254 and 265 amino acids in length. 27.00 27.00 29.90 29.60 22.40 21.40 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.59 0.70 -5.29 10 138 2011-05-12 08:22:59 2011-05-12 09:22:59 1 1 137 0 20 94 0 229.10 72 89.47 CHANGED PsSVlsISKENTYPNPTQDLPhLQPS-LAcpLL-oo-VKIENP-LIRhLNESSIsuoPLAIGYRAoIYLGpWPLsYESsETssNWEYQKVNTNhlDNRGGpuspplpYpQEpQK+V+GGLTAcIPNu--VKKMMLlKAtEKTsLPLAFcTVIGtGTKK-psYsVsPK+lGYLauYsPAVNEKGKVTYGEVYLsLKGsK++LsVKNVTpQGIGAWIPVQD+lSFuFhsSspP+ .....PSSVLNISKDNTaPN-AQDLPRLQPSKFAQELLKTANIKIENPDLIRMFNETTISNAPLAVGYRAKIYLGQWALpYESlDTSlNWEYKQVNRNVYDNRGGD+LYPLRYKQEoQKTVEGsLTAchKcAsDVKKMMLLKAhEKVQLPLSFKTTIGYGTG+ERVYNISPsQLGYLYAYTPAVNEKGKVTFGEVYLVLKGNQK+LVVKNlTSQGIGAuIPIpDHLaFKFlSSS+s.p............................. 0 3 12 14 +14010 PF14168 YjzC YjzC-like protein Eberhardt R re3 Jackhmmer:O34585 Family The YjzC-like protein family includes the B. subtilis YjzC protein Swiss:O34585, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 25.30 25.20 24.90 24.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.60 0.72 -4.04 20 212 2011-05-12 09:55:35 2011-05-12 10:55:35 1 1 206 0 28 102 0 56.70 51 92.41 CHANGED M......G............p.....pp.F+PGp+APNsGhYl.ElGt.pGut..VpsP+pVclctGD+FP-ToN+sR..+Wpp.+ ..............MGppppF+sGpKA.PNs.GlY.....V.ElG-.sGuh....Vp.sPphlpLstG-+FP-To..N+sR..tWph...... 0 8 18 20 +14011 PF14169 YdjO Cold-inducible protein YdjO Eberhardt R re3 Jackhmmer:O34759 Family This family includes the B. subtilis YdjO protein Swiss:O34759, which is functionally uncharacterised. This is not a homologue of E. coli YdjO, Swiss:P76210. B. subtilis YdjO is cold-inducible [1]. Its expression is induced by the extracytoplasmic function sigma factor sigma-W [2]. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 28.10 30.90 24.00 26.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.35 0.72 -4.52 15 173 2011-05-12 10:24:33 2011-05-12 11:24:33 1 2 128 0 30 85 0 59.30 52 59.05 CHANGED MYapK+spEslPcEp...TsVWpCouEDCpGWMRcNFoh--pPhCPLCpSsMhsupRhLspl ........h.RKpshtplP.Ep.....TsVWECpuEDChGWMR......KNF.........o.........a......EEc..........PpCPLC+SsMpsGpRhLPpL.. 0 8 19 20 +14013 PF14171 SpoIISA_toxin Toxin SpoIISA, type II toxin-antitoxin system Eberhardt R re3 Jackhmmer:O34853 Family SpoIISA is a toxin which causes lysis of vegetatively growing cells. It forms part of a type II toxin-antitoxin system, where the SpoIISB protein, Pfam:PF14185, acts as an antitoxin. It is a transmembrane protein, with a cytoplasmic domain accounting for approximately two-thirds of the protein. The structure of the cytoplasmic domain resembles that of the GAF domains, Pfam: PF01590. SpoIISB binds to the cytoplasmic domain of SpoIISA with high affinity [1]. 27.00 27.00 32.30 32.20 19.30 19.10 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.41 0.70 -5.11 7 118 2011-05-12 10:56:17 2011-05-12 11:56:17 1 1 113 2 13 77 0 239.80 59 97.97 CHANGED lLFFQhhV..........WlllhuLhlY.VaAsWRaEtplcE+hhsIRKTWYhLaVhGuslaWTa-PpSlFTcWpcYLIlAVhFsllDAFIFLouYlpKluusE..hpTDTcpllEcNs-hL+hhhs+LKsaphLLKs-sIHlY....YGoh-AYhpGlccllttaA-KhslpAulh.assptsKDcLhcp..................hcpptslpscLsRp-VYYpppsKlVlIP.FolpsppaVlKLoS-sllTEFDhLLhsSLssIYDLlhs ...............NIRIG.......l..........FlLAIVFlVL.VF..FYW+NEELYEEKKQRI...RKTWYGLFIlSVTVYFMIKGID.LTLWKNLLMFTAMVIFVDIAFILTPNISEIWGAK....FSDIGKTVQSIKRSLIASKARGEIYTTIIQNVNPuVFGT..MEWHTEEEYTKSLNsFLDSYGEKIGAKIVVFEAAKELNTN..F.Rsh+sp.............................FSh.IlPlEaIEQLNEQ+AV..QVENVGIIP.AKIV.SDVFIVI.D...GKKNNLQDRDFENVYNLTIHHSYFS.K.... 0 2 8 9 +14014 PF14172 DUF4309 Domain of unknown function (DUF4309) Eberhardt R re3 Jackhmmer:O34960 Family This family includes the B. subtilis YjgB protein Swiss:O34960, which is functionally uncharacterised. This is not a homologue of E. coli YjgB, Swiss: P27250. Expression of B. subtilis YjgB is regulated by the alternative transcription factor sigma-B [1]. This family is found in bacteria, and is approximately 140 amino acids in length. 25.00 25.00 25.10 25.00 24.60 24.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.63 0.71 -4.18 15 166 2011-05-12 12:03:59 2011-05-12 13:03:59 1 4 156 0 29 141 1 131.10 44 57.89 CHANGED hlpslhchAhcGclss..ssFslspushpDVp+phGcP-psshsusuhshYtsatu.hsluFGasKsspIhElRSFssslc......ulTlpclcptlGpPspshTs....sc-pIh.....sY+sGppYcLcFVassssst......lDHlSlh .......h.lcsla-hAccGK..VP.N..ssFuspTusI--lcKsWGKs-+s-tuusu..hY.Aoass..+slsFG.aNKGuQlF-lRSacscLK.............uITLp-lcKsLGcPsplpss............uc-cIY.....VY+..lss....paEL+FlIscoTup..................lcHlSVh............. 0 10 22 22 +14015 PF14173 ComGG ComG operon protein 7 Eberhardt R re3 Jackhmmer:P25959 Family This family is required for DNA-binding during transformation of competent bacterial cells [1]. 25.00 25.00 25.30 25.00 24.90 23.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.89 0.72 -3.78 18 125 2011-05-12 12:37:24 2011-05-12 13:37:24 1 1 125 0 20 93 0 94.30 38 78.36 CHANGED hahsEpphhpppcphhch-pLlphulh-lppclt...pspptpssphpa.cGplsa.phscps.tshhplslpspsssGhphsspFhhshtspcIhc.WpE ............hhs-KpFhpEhEQpalh-ELLppulssl.K+-Lp.......pcEK.csshhFpYc+GcsSh.pashEs...-hIhVsLQCphK.ppshYpVsFpYc+KcpKIh-WlE.......... 0 4 12 14 +14016 PF14174 YycC YycC-like protein Eberhardt R re3 Jackhmmer:P37481 Family The YycC-like protein family includes the B. subtilis YycC protein Swiss:P37481, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a conserved HIL sequence motif. 27.00 27.00 28.70 64.30 22.30 21.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.66 0.72 -4.29 13 148 2011-05-12 12:50:22 2011-05-12 13:50:22 1 1 148 0 26 62 0 49.30 75 97.51 CHANGED MRPLQISsETAl+LucpLsVPLEplMHMPpHILlQKlhEL.................ptppppcccs .MRPLQISPDTAV+LSKALGVPLEQLMHMPQHILlQKLlEL.............EKpNK-.......-E......... 0 6 15 18 +14017 PF14175 YaaC YaaC-like Protein Eberhardt R re3 Jackhmmer:P37526 Family The YaaC-like protein family includes the B. subtilis YaaC protein Swiss:P37526, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 320 and 333 amino acids in length. 27.00 27.00 32.80 30.40 20.30 24.10 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.04 0.70 -5.47 27 147 2011-05-12 13:03:11 2011-05-12 14:03:11 1 2 139 0 32 142 3 284.50 38 91.65 CHANGED hpplthapohphsppaLtppapp..htpspppuhpss.shlaalcpupsaYctuptu.slpl+PlLLaYGhhpLlKAhLLstssshs.....pposlluHGloo+cp+cpsaphhpc......cV+lpcs...G...laschscplh.......thpthpsEc........ashtcLLtpIPElppha........ptpphh.lclthhppp.......hhhspphhsphp...hstpcFhphhpppppthh.........ppppcpthhhp.s........................hp.hhstslhhphpstpaal.h...tpsthhplPplhhHYLlLYsLShlsRYcs-hWt-LLpup.spchshIppFLshstcphPhhltphl ............................WppLshFhSsppsQcaLtcsYpc..shppuccpSacNshsFlYaLcHucsaYchhphu.PhsIpPhLLFYGhsQLhKACLLThDPsYP.....psToVLAHGVTTRKRKKQsYpFh-D......EVKlQ+N...G...LFsHhucpLF...................phcplpsE+........asMhcLhtpIPELpslF......hhppctthhhclpp.ppp.......lshs.pllcph+...MoppRFscYlpp.hpth...............tcppppplhFpss.pt.......................hpshhssslhach.sspYalPh....+s..t.hLPElllHYLLLYNLSMIuRYET-WWh-LLt.uasSc.-YshIhpFLslotpKhPhhlsphl........................ 0 10 22 26 +14018 PF14176 YxiJ YxiJ-like protein Eberhardt R re3 Jackhmmer:P42320 Family The YxiJ-like protein family includes the B. subtilis YxiJ protein Swiss:P42320, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 27.00 27.00 31.00 36.50 23.70 23.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.43 0.72 -3.93 8 95 2011-05-12 13:10:07 2011-05-12 14:10:07 1 1 90 0 7 61 1 103.30 58 87.22 CHANGED lpELc.+hp..LcpPFPscslp+lp.sh........-ssshssDFhpaaslluGSLSYVLss..K+IPcpQh+hLcKSFFEhYPQYc.LcscItpY.cLacclpsaEcTRcLLlt .......FpELQ.KM+SPLaKPFPspshpKlp+DhpshF....TEDDCIsADhNaYWMHTAGTLSYVLNNNEQcIsFpQIKWL+KSFFEWFPQYpFLETEIhcYPILYRDFhsYEKTRKLLLY..... 0 2 6 6 +14019 PF14177 YkyB YkyB-like protein Eberhardt R re3 Jackhmmer:P42430 Family The YkyB-like protein family includes the B. subtilis YkyB protein Swiss:P42430, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. There are two conserved sequence motifs: NRHAKTA and HLG. 27.00 27.00 28.50 27.50 19.50 21.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.83 0.71 -4.04 9 140 2011-05-12 13:17:39 2011-05-12 14:17:39 1 1 137 0 20 74 0 135.60 65 88.32 CHANGED Tl-NLupAlFTVNRHAKTAhNP+aLYhLKK+AlpKLlcEGKA+KlGLHFSpNP+aSQQpSDVLVplGs......YaFHlPPsKcDFcpLPHLGsLspSaRNP+s+MsLspAKpLLppYsGhKpc..pss+...ppppYpKPsa++LG- ........s.IspLAQuIFlVNRHAKAATNPKYLYhLKKpALE+LIs.EcKAlKEGLH...FSR.NPRFS..QQQSDVLI+LGD......YFFHIPPTKEDF+hLPHLG+LE..SS..YRNPK..TTLSLTlAKKsLQ-YlG.cshc.ppp+..scss.............hh............ 0 1 11 14 +14020 PF14178 YppF YppF-like protein Eberhardt R re3 Jackhmmer:P50834 Family The YppF-like protein family includes the B. subtilis YppF protein Swiss:P50834, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved LLDF sequence motif. 24.70 24.70 24.70 43.70 23.80 24.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.73 0.72 -4.44 9 125 2011-05-12 13:26:50 2011-05-12 14:26:50 1 1 123 0 15 45 0 59.80 62 92.84 CHANGED Msls-L+ppFhphKpYpPtshNELLDFAR+hYlcGcIsIs-YRsLl+ELEtsGAspPspp MVLGDLKQAFSQKKGYtTENsNELLDFARHhYLEGKICISDYRTlIKELEINGATKPTT.h... 0 1 6 9 +14021 PF14179 YppG YppG-like protein Eberhardt R re3 Jackhmmer:P50835 Family The YppG-like protein family includes the B. subtilis YppG protein Swiss:P50835, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 115 and 181 amino acids in length. There are two completely conserved residues (F and G) that may be functionally important. 27.00 27.00 28.90 38.40 22.20 21.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.93 0.71 -3.47 18 134 2011-05-12 13:35:43 2011-05-12 14:35:43 1 1 133 0 20 101 0 128.70 55 86.65 CHANGED psaptsh.sahP.t...........................tp.pPYhp.s.pt...............................tPs.s.p.Ptttph..h.s..h....pPY.Pp.ps.h..p.P....sphpShhuQFKsp-Gs..aDhNKMhsTsGQhhsssN.......QVuullKGlsuhFK ..........................................p.tsYh..p.......................................................Q.pQPYhspt........t...................................hP..s.sPYsNQQuMFYPP.K............QPY.PTtsKQK....QQQP.....SQFSSFVSQFKsSDGN..YDVNKMMNTAGQMMNAMNQVTGIVKQVGGFF.u....... 0 3 13 15 +14023 PF14181 YqfQ YqfQ-like protein Eberhardt R re3 Jackhmmer:P54474 Family The YqfQ-like protein family includes the B. subtilis YqfQ protein Swiss:P54474, also known as VrrA, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 146 and 237 amino acids in length. There are two conserved sequence motifs: QYGP and PKLY. 27.00 27.00 27.20 29.10 23.30 26.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.35 0.71 -4.20 17 138 2011-05-12 13:47:31 2011-05-12 14:47:31 1 1 131 0 22 119 0 165.80 41 84.95 CHANGED tGhLu+Lhstu..............thtu........................................................uuusts.suussuu...htshhs..slosMLsNlQKsLulAQpVsPM...VQQYGPLlRNLPuMh+l...a+pLssucs....tsppsspcssspppppp..t.psppppcpccph.pspppctppcppsst.p.......................SpPKLYl ..........................................tGhLu+hh.tt...........s.pthtu...................................................................htssts+GuAsss.....AuuSuGhGuhhuNLhS..N..sosMlNNlpKV.pVsQoVuPh...VpQYGPlhRNLPSllKI...hpthpSscp......spEspsEchoEpsEstossss........ppKc++KKhl.csshc+ph.c.psspphs......................oKPKLYl..................................... 0 4 14 16 +14024 PF14182 YgaB YgaB-like protein Eberhardt R re3 Jackhmmer:P71080 Family The YgaB-like protein family includes the B. subtilis YgaB protein Swiss:P71080, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 27.00 27.00 37.50 37.20 24.10 23.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.79 0.72 -3.74 11 129 2011-05-12 13:57:44 2011-05-12 14:57:44 1 1 129 0 14 60 0 78.00 63 90.65 CHANGED pF-+LVuEQhcTMDcLLpLQSELERCQpIE+pLhchpccscLpsIppEIppp+ccL+pIQchFpKQTEpVIpSYppp.....Eh ............DFDKLVGEQLETMDELLKLQuHLEKYQQIEhsE+-pCDKKELHFIRQEIYRTElALKhLHEKFEcQTNsVIQSFcTEK.h... 0 1 6 8 +14025 PF14183 YwpF YwpF-like protein Eberhardt R re3 Jackhmmer:P94588 Family The YwpF-like protein family includes the B. subtilis YwpF protein Swiss:P94588, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 146 and 167 amino acids in length. There is a conserved IIN sequence motif. 27.00 27.00 52.20 129.70 23.70 22.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.57 0.71 -4.45 21 269 2011-05-12 14:41:30 2011-05-12 15:41:30 1 1 269 0 30 86 0 132.30 70 88.58 CHANGED KTFKLlsLpll...pcpt.....php-hsLhDGLIINKEcupspWLlEsllsppatshFcphhppppplplpVsIT+tsN-PAthhssV+sIscl...scplSVLh-Gpllpp+.pshuEplLcsLlcEGlsG-pLlcp.FKpph ..KTFKAVRFQIV...NEHG.....RIIEYELEDGVIINKE-SGTGWLLEIVISNEHYETFKEYQDNEQLLDIRVVITRPANDPALF-oTVKSIKNF...KTTMSIVFECHIY.TLR.QQYAESLLEQLIDDGLSGEELKKoFNRMM... 0 7 17 26 +14026 PF14184 YrvL Regulatory protein YrvL Eberhardt R re3 Jackhmmer:Q7WY71 Family YrvL prevents expression and activity of the YrvI sigma factor. It may function as an anti-sigma factor [1,2] 27.00 27.00 27.40 27.20 22.70 21.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.77 0.71 -4.36 16 154 2011-05-12 15:00:41 2011-05-12 16:00:41 1 1 131 0 16 108 1 118.50 39 91.84 CHANGED htphhhlhlhhslhhhllhuhhhhshsulFplhGhpYpShsullLFhllhhlL...uhhl-hht+sllpslhph.hhsphhhhlLhhhl-hhhsahslahsDpahcSlplsshsclllulhhallshhhsp..............cpp ............h..hhhlh.hhhlhlhllFuh.hFFlhhGlFplhGlpYpShsuLLLFhLlhhhL....shhh....hh+hllhshtpt..hsphlslhL..hull-lhhsWhsIHhAD-aI-SVplSshsElslsLhhalLsKhhsc.....c.....ct......... 0 5 9 11 +14027 PF14185 SpoIISB_antitox Antitoxin SpoIISB, type II toxin-antitoxin system Eberhardt R re3 Jackhmmer:O34800 Family Members of this family act as antitoxins. They bind to the SpoIISA toxin, Pfam:PF14171. They are disordered proteins which adopt structure only when bound to SpoIISA [1]. 27.00 27.00 48.60 48.40 24.20 23.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.84 0.72 -4.22 5 24 2011-05-12 15:08:58 2011-05-12 16:08:58 1 1 24 2 5 17 0 55.60 70 98.31 CHANGED MEpAFQscpps.++tpPFKhlKK+SpTSlAcYcVSPHTcRIFKcNERLIDEYKpKKA ..........MEpAFQNspp..+tAKPFKhhKKRSpTSlASYQVSPHTARIFKENERLIDcYKpKKA. 0 1 2 3 +14028 PF14186 Aida_C2 Cytoskeletal adhesion Coggill P pcc Zhang D, Aravind L Domain This is the C-terminal domain of the axin-interacting protein family, and is a distinct version of the C2 domain. This domain is critical for interactions with cytoskeletal in the context of cellular adhesion points [2]. 25.00 25.00 40.30 27.90 23.70 22.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.66 0.71 -4.95 11 105 2012-10-10 12:23:49 2011-05-12 16:14:55 1 12 62 3 71 90 0 136.50 53 24.73 CHANGED uohhPshsppsGsstLslhI-+lu.lKDAspahsPahoVoVhDssGpslpssQ-TPlu..pp+sssalhFs.sslplQsslcchsc.GuAlhhEF+HYKsKK+KhSs+CWuFMEhD-l+sG....ssslElYtKPTDh+R.KKlpLlotKshaL+lphp .......................................uoLlPtlsppPGhThLol+IEKIG.LKDAuph..ID..PahTVS.VKDhsGhDLsssQDTPVA....s++E-sYlaFs.sslclQ+pl....EKLsK.GAA.IFFEFKHYKPK..KRhTSTK...........CFAFMEM..DEIKsG............PlVIELYKKPTDFKR..KKLpLLTcKPLYLHLp.o.............................................. 0 20 28 43 +14029 PF14187 DUF4310 Domain of unknown function (DUF4310) Eberhardt R re3 Jackhmmer:B0P5U5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 231 amino acids in length. 27.00 27.00 29.30 29.30 19.60 18.80 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.47 0.70 -4.83 9 318 2011-05-13 07:18:32 2011-05-13 08:18:32 1 2 315 0 37 122 2 207.80 80 95.54 CHANGED ppp+sFWaADWoFPlhVulhuAulFAGTHMYhsYGlGAFNElAlVAMLcAGlssGsYGAAAAFGASFLFARILEGsLVGILDlGGSl.TGlGlGlPAlLLuuGhstslsNFsLSLlTGuVlGLlIGhlIIhlRKhTlsQusSTaGADlMMGAGNsoGRFLGPLIILSAhsASIPlGIGShlGAAlFYhacKPIAGGAILGAMlhGulFPl ..cpsKuFWYADWSFPIFVGLLSuGVFAGTHMYYLYGlGAFNEVAFVAMLKAGhDTGsYGAVAAFGASFLFARIIEGSLVGILDIGGAIQTGVGLGVPALLLGAGhlaPVuNFhASLlTGLVlGLAIGYIIILARKFTINQSsSTYGADVMMGAGNsSGRFLGPLIILSAhoASIPIGlGSLlGALLFYIWpKPITGGAILGAMILGuIFPl......... 0 4 9 23 +14030 PF14188 DUF4311 Domain of unknown function (DUF4311) Eberhardt R re3 Jackhmmer:B0P5U6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 260 amino acids in length. 27.00 27.00 98.70 98.40 23.00 22.70 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.23 0.70 -4.83 9 329 2011-05-13 07:36:43 2011-05-13 08:36:43 1 1 314 0 38 136 0 202.20 82 82.56 CHANGED ARMFHAPssQuMGAFRTLGELNACpGDPhSHFSFGLGFhFNAWASsVGAGAhTQDVpHRIIPNWAAAsLLhKNKsVpETL+DPKKMuIuGAlIGslVVshLNsTAuuIPESLQslAscVLVPAANhLINsVMPIlFWLAAlDAGK+TGhWGTlhGGLupLIMGNAVPGlVLGILIGKGVD-SGWN+lTKsMhsslllLFllSGFFRGFDhpLl ......ARMFHAPTTQGMGAFRTLGELNSCEGDPASHFSFGLGFFFNAWASSVAAGuFTQDVDHRIIPNWuAAALhlKNRNVu-TLHDPKKMAIAsAlIGMlVVsFLNhTASuVPEALQVTAVKVLVPAANlLVNhVMPVIFWLAAIDAGK+SGFWATlFGGhAQLIMGNAVPGLVLGILIGKGVEESGWN+VTKlMMsAIVLLFVLSGFFRGFDhKMI....................... 0 4 9 24 +14031 PF14189 DUF4312 Domain of unknown function (DUF4312) Eberhardt R re3 Jackhmmer:B0P5U7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 99 and 118 amino acids in length. 27.00 27.00 60.20 59.90 18.70 16.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.53 0.72 -4.15 20 309 2011-05-13 07:57:18 2011-05-13 08:57:18 1 1 308 0 38 108 1 84.80 62 80.17 CHANGED phspoVpVsGKGcoKpcAFAsALuplQ+pVl+ssspllLRIEPh-VpllcAcEpspsEKFLFFFhPRcRppYpVcL-loVsVThI ....phTTTVpVpGKG-sKs+AFAsALNcVQssVh+E.oshlLLRIEPpDVcIlQA+EsVppEtFLFFFL.RcR+oYuVEL-VTVsVTAI.. 0 4 9 23 +14032 PF14190 DUF4313 Domain of unknown function (DUF4313) Eberhardt R re3 Jackhmmer:B0P650 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 136 and 171 amino acids in length. 27.00 27.00 28.00 34.50 22.30 19.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.20 0.72 -4.32 20 120 2011-05-13 08:28:26 2011-05-13 09:28:26 1 1 84 0 7 94 5 103.80 32 61.26 CHANGED lpLpVssYhssss....LhItLhpp-...psphEsausLTVNLsshh.....spAFIsssss.c-hhpFlpcppLucsoGhptpSGaspYsthtFshc+LpcLDP-GhpcYtcphs ......hhlspYhsNst....LhlsLhs.c....tp.E.assLTVNLsshss.....sscuFlssNps..s-hhcFlpcNcLucso.....Ghpt+SGaspYshhhFslscLtchsPcshpchttt.s..... 0 2 7 7 +14033 PF14191 YodL YodL-like Eberhardt R re3 Jackhmmer:B0P785 Family The YodL-like protein family includes the B. subtilis YodL protein Swiss:O30472, which is functionally uncharacterised. This domain family is found in bacteria, and is approximately 100 amino acids in length. There are two completely conserved residues (Y and D) that may be functionally important. 24.00 24.00 25.10 24.50 23.10 22.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.38 0.72 -3.53 50 285 2011-05-13 08:42:40 2011-05-13 09:42:40 1 22 147 0 33 244 34 101.40 33 14.67 CHANGED tcsasIYQL+t..tscstsh+Fhsh-pLpppGh......pschpsYchVYsutlp........spsLEslap+FNhc+.P.tDacGHSLSlSDllhLppsGcsosaYVD.shGFpplstFh .......................t.csauIYQlpt...spstsh+F.sh-tLpttsh......plctpsYclVYsusLs.........spsL.Ecl...appFNl..c+..P.tD........acG+SLSVSDlVslcp..sG.c..sosaY.l.D.ohGFpplstF.............. 0 20 28 29 +14034 PF14192 DUF4314 Domain of unknown function (DUF4314) Eberhardt R re3 Jackhmmer:B0P797 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 56 and 93 amino acids in length. 24.00 24.00 24.30 24.40 23.90 23.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.42 0.72 -3.74 61 138 2011-05-13 08:50:07 2011-05-13 09:50:07 1 4 85 0 31 127 2 97.20 22 44.82 CHANGED stGoRshLht...................hs.....cs.t.h.tusp..........................................usl.tVD....................Dhupl................p....................sh........hDs.GpuL............sll.Gc-hhphl ........................................ssRshhhh...................hs..........ss.t.ltpt.sh....................................................usl.sV-....t..hhssht....t..sttpl................p............................hh..h..st.hDs.Gpuh............tl..Gp.hhph......................... 0 22 28 31 +14035 PF14193 DUF4315 Domain of unknown function (DUF4315) Eberhardt R re3 Jackhmmer:B0P8U3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 27.00 27.00 27.20 27.20 26.70 26.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.83 0.72 -4.06 19 219 2011-05-13 09:28:17 2011-05-13 10:28:17 1 1 101 0 16 153 27 84.00 38 94.37 CHANGED sKlcKlptEI-Ks+pKIsEhQu.......+L+ELEppKTEhENLEIVplVRuhsho.p-LsshLpth.........uGpssPtst.......ppc-ttcEc .....sKl-+Icp-IcKs+...cKIsEhQp.......+L+pLEspKsEtENl-IVphVRuh+hoPppLsAhLpsh...................shtssPtsps.t....ppEcp....t.................................. 0 11 16 16 +14036 PF14194 Cys_rich_VLP Cysteine-rich VLP Eberhardt R re3 Jackhmmer:B0P8U5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. It contains 6 conserved cysteines and a conserved VLP sequence motif. 27.00 27.00 29.30 29.30 22.90 21.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.02 0.72 -4.23 20 192 2011-05-13 09:48:24 2011-05-13 10:48:24 1 3 84 0 20 140 9 54.90 55 41.26 CHANGED TtpQ+++lptLl+phCsNY.Ds..GNCLhLD-G-spsCsQsIS.aSlhC+YFRpAVLPtD .......tQp+psppLl+tpCCNYDs..GNClhLDDG.-sCsCsQoIS.aSlh..C+WFRtAVLPhD.. 0 11 20 20 +14037 PF14195 DUF4316 Domain of unknown function (DUF4316) Eberhardt R re3 Jackhmmer:B0P8U6 Family This domain is functionally uncharacterised. This domain is found in bacteria, and is typically between 56 and 95 amino acids in length. 25.00 25.00 25.20 25.20 24.40 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.70 0.72 -3.90 15 205 2011-05-13 10:04:25 2011-05-13 11:04:25 1 13 99 0 14 169 30 83.90 37 10.96 CHANGED +pNPLKsAElohE...QNYNMlDGllNN....sPsh.....................................s--KPSlhD+LKpspp+pcu+c....................ccEp ......-NsLKsAEhshE...pNYNMIDGlINN.....tsPohs-lEtt.pt..................................................................tt.pttp.ccKsSlhcpL+stpcpppp.+p.......s..ppp.....p............................................................................ 0 9 14 14 +14038 PF14196 ATC_hydrolase L-2-amino-thiazoline-4-carboxylic acid hydrolase Eberhardt R re3 Jackhmmer:B0P9R4 Family This family of enzymes catalyses the conversion of L-2-amino-delta2-thiazoline-4-carboxylic acid (L-ATC) to N-carbamoyl-L-cysteine [1]. It cleaves the carbon-sulphur bond in the ring structure of L-ATC to produce N-carbamoyl-L-cysteine [2]. 25.50 25.50 25.60 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.26 0.71 -3.81 62 369 2011-05-13 10:27:15 2011-05-13 11:27:15 1 8 272 0 100 357 53 152.10 18 73.90 CHANGED lt+cth.............................hpAthhthlhcpht.cchup..cpuppll.pcAlpphut....ttGps.httph.t.....ssshp.sFts.hhhshtpssshc...chhc..........tscschthchpcCshscha+chGh..pphthhhCch....Dtshspuas....slc.hp+spTlscGsspCcFpa ............................................................................tththht.......................A.hhh...hhctht..cphs....ppspthh...tculp.t.huh.........ptucp..hstt..t...........pshp..patp...ht...htht..t..s..ts.hc.....chhp..........ssc..p..p..hthchpcCshschh+chGh....tcht.hhCph....Dt.shhpsht.........slp.h.p.R....s....pT.lu.p.G.sspC-apa................................... 0 56 76 86 +14039 PF14197 Cep57_CLD_2 Centrosome localisation domain of PPC89 Wood V, Coggill P pcc Jackhmmer:Q10218 Family The N-terminal region of the fission yeast spindle pole body protein PPC89 has low similarity to the human Cep57 protein. The CLD or centrosome localisation domain of Cep57 and PPC89 is found at the N-terminus. This region localises to the centrosome internally to gamma-tubulin, suggesting that it is either on both centrioles or on a centromatrix component. This N-terminal region can also multimerise with the N-terminus of other Cep57 molecules. The C-terminal part, Family Cep57_MT_bd, Pfam:PF06657, is the microtubule-binding region of Cep57 and PPC89. 27.00 27.00 27.60 27.60 26.80 26.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.10 0.72 -4.03 24 78 2011-05-13 10:39:40 2011-05-13 11:39:40 1 5 64 0 67 80 0 72.30 30 8.30 CHANGED p+L-upp.sL..ps+L.DhhpR+sphp-....tthKpLsp...ERDtshppLus.Ah.csp-L+sE.-sLcpE...........Lcpph.tp ....cLEsphtsL..Qs+LDphsR+sssp-....tp.cpLsp...ERDpshppLs..AhtcspcL+pEh-sLppE...........Lcphp...................... 0 13 34 55 +14040 PF14198 TnpV Transposon-encoded protein TnpV Eberhardt R re3 Jackhmmer:B0PBU6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 114 and 125 amino acids in length. 27.00 27.00 27.10 28.20 26.10 26.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.24 0.72 -4.17 56 477 2011-05-13 13:11:50 2011-05-13 14:11:50 1 2 167 0 34 365 68 101.40 42 88.22 CHANGED sYphhGD.Y...........alPsLp..lsc-pc....sl............G+YGch++pYL+Ea+sshYspLllsGcLhp+Ls-l-cpAp-ph-hllpQhtcppGlTEpLKspspMcWVppMNsl+spAEEIVhpEll ....................................pYh..GD.Y...........hlPsLplsppcp....sl..............GtaGph+ppYL+ca.+.shYspLlhoGc.LhsaLs-lscpAp-Rh-tllcpMtptpGl...T.EpLKspstMcWVtphNsI+spAEEIVhpElI........... 0 17 30 33 +14041 PF14199 DUF4317 Domain of unknown function (DUF4317) Eberhardt R re3 Jackhmmer:B0PBX6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 225 and 451 amino acids in length. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 36.80 27.90 21.50 26.40 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.04 0.70 -5.67 45 203 2011-05-13 13:17:54 2011-05-13 14:17:54 1 2 155 0 33 197 9 334.90 34 95.75 CHANGED M...N......KK-lsEI.+KpF+.-s..ssIo+lsGCYVst-.+-lhpphp-sFlsLp-EEh.KYLpIFKKsLSGolGcNLl-lcFsh..................cp-tpuupQphLhpL+pSpLcD-..thl-cFYc+lIcsYsY.ssNalIllh+ssYDlPsKssDs.pph.DsS-EVY..caILCoICPVpLsKsuLsYptp-NphcsRh...pDWlVpsPptGFLFPAFsDRusDlaslLYYo+csc-.c.cFlEplLGsshshTAt-QK-sFpsIlc-slG--sch..-slpslaEpLschIE-pc.t.....psEP.htLspp-lcclLppSGVs--ph-ph-psac-shss.......................csshhAsNll...soKplcl.........................................................................................................................................cT.DlslpVsPc+schVcophID.G++CLlItl..s-psplNGls .....MNKK-lhEl.++..paphsp...ssho+lsGCYVct-.pphhtphppsFhpLsp-Eh.K..YlclhKKsLoGslspNLhchpFsh..................pppttsup.....QphLhpL+pSpLc--....tLh-pFYcpll.-sY..pa.s..........tsahIllhHstYDlPsKssDs.tph.-.tS-EVY..callCulCPVpls+.uLta..tppphtsch....shhlphPt.GFlFPuFpDRssDlpplhYYoppspp.p.phh-phLsst....oAttpKtsFptllpcshupc.ph..chhhpl.-plsph..l-ppp.......p.-s..hLs.pplpplLtpuGl.p..htph-ptatp.h.t.......................p...hApslh...ss+phph................................................................................................................................ps.sl.lplpPpc.h..lcp.hlc.GpchllI.l..s-psplsGh........................ 0 20 32 33 +14042 PF14200 RicinB_lectin_2 Ricin-type beta-trefoil lectin domain-like Coggill P pcc Jackhmmer:Q8X123 Domain \N 35.10 35.10 35.10 35.10 35.00 35.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.72 0.72 -3.66 49 1501 2012-10-02 19:42:32 2011-05-13 14:20:14 1 251 412 51 644 2662 47 100.70 22 25.03 CHANGED ssQpWphp....h....s..........ss.....t.......aplh...ss..s....o....uphLssss.ss.tpustltth.s.s.......sssppWplps.ss..s..G..t.apl.hsh...ss.s....h.sLc.lts.s.s.ss......susslhhaph...ts.ss...sQpWphp ..........................................t.............................su........h...........a.p.lh...st..t.....o.....GpsL...sl..ss..ss....s..s...sGs.p.l...tta.ssss......sssQpWpl.....ss....su..............s....G.....h...apl.tst........so.s.......................hsLD...lts..u..u..ss........sGs.s..lh.as......ss...ss.....sQpWph...................................... 0 285 489 608 +14043 PF14201 DUF4318 Domain of unknown function (DUF4318) Eberhardt R re3 Jackhmmer:B0PDG9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There is a single completely conserved residue F that may be functionally important. 27.00 27.00 27.30 27.50 25.30 24.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.37 0.72 -4.15 13 131 2011-05-13 13:28:57 2011-05-13 14:28:57 1 1 104 0 4 69 0 70.70 52 79.81 CHANGED +KuFhI-L-DuhpYPoscsICpAlEpYstcsppslcFlu+scPl.hhl-sshYElcl...phuRGsYa.lpC+El ..........pKSFFIELDDuLTYPSuEs........IsoAIEpYssEsNEpL+FESKsKPIhFYL-ss.hYcsEl...+MARG.GYY.ISCpEV........... 0 1 3 3 +14044 PF14202 TnpW Transposon-encoded protein TnpW Eberhardt R re3 Jackhmmer:Q9L782 Family This family of proteins is found in bacteria. Proteins in this family are typically between 54 and 75 amino acids in length. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 26.90 26.30 24.00 23.60 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.44 0.72 -4.36 31 347 2011-05-13 13:45:03 2011-05-13 14:45:03 1 2 153 0 30 225 25 36.90 36 58.15 CHANGED sshp++IGpTTYhVplpFscsupEThpDKlpRllcs- .....hhpp+IGposahVplaFscsus-ThpDKlt+llctE......... 0 16 28 28 +14045 PF14203 DUF4319 Domain of unknown function (DUF4319) Eberhardt R re3 Jackhmmer:B0PDI8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There are two completely conserved residues (E and K) that may be functionally important. 27.00 27.00 27.30 35.90 22.70 25.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.85 0.72 -4.43 27 181 2011-05-13 14:14:01 2011-05-13 15:14:01 1 1 81 1 7 129 22 64.30 37 90.68 CHANGED pFTlEEpNLhslYps.uoRpphIcslpth.hsah...Ds-.....MtpLsppslsKLpshoDsEaspLplhss .......pFThEEhNLhslYss.......uoRpslI-slpth.hs..h....-s-......hRcLsspslsKLpuMTDu-FucLplhs.............. 0 4 7 7 +14046 PF14204 Ribosomal_L18_c Ribosomal L18 C-terminal region Coggill P pcc Jackhmmer:Q9FL18 Domain This domain is the C-terminal end of ribosomal L18/L5 proteins. 23.00 23.00 23.10 24.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.23 0.72 -3.43 90 635 2011-05-13 14:31:27 2011-05-13 15:31:27 1 8 464 10 360 612 6 87.50 41 31.06 CHANGED AEshRcaIaGtHVA-YMcpLpE-D-EpY+pQFSpYIK..pGlsADslEchYpcAHtAIR......tDPsh.ccpp...........pcph.hpp....K+apppKLThcpR+pRVtpK ..........AEshRcaIhGtHVA-YMctLtE-...D..E......EtY+cQFSpYIc...pulss-sl.E-hYccAHsAIR......ts.Psh.ctp.......................pc.p.h...pp..........K+apt.Klohtp++s+ltt......................................................... 0 121 192 283 +14047 PF14205 Cys_rich_KTR Cysteine-rich KTR Eberhardt R re3 Jackhmmer:B0PDJ5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 60 amino acids in length. There are 4 conserved cysteines and a conserved KTR sequence motif. 24.50 24.50 25.00 24.90 24.00 23.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.82 0.72 -4.51 23 165 2011-05-13 14:39:52 2011-05-13 15:39:52 1 3 119 0 10 88 7 52.00 56 84.28 CHANGED pscWlLCPlCGNKTRsKIRcDTlLcNFPLYCPKCKQEsLIsVcpLpIoVIKEP...DA .......t.WlhCPlCGsKTRlKIRcDT.L+NFPLaC..PKC+pEsLIclc.phcloVIpEPDA......... 0 7 9 9 +14048 PF14206 Cys_rich_CPCC Cysteine-rich CPCC Eberhardt R re3 Jackhmmer:B0PDW3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 68 and 104 amino acids in length. There are six conserved cysteines and a conserved CPCC sequence motif. 24.50 24.50 24.90 25.00 24.30 23.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.02 0.72 -4.42 25 243 2011-05-13 14:50:38 2011-05-13 15:50:38 1 7 210 0 43 194 2 68.40 37 67.44 CHANGED patCPCCGYhTlspcsst...a-ICslCFWEDDslphpss-ht.uGuN.clSLp-ApcNFtcaGAC-pchhph.VRpPts.--h ................hpCPCCGhhThp..s..........a-ICslCaWEsDsh............t.tps........s.................h........t..uGsN.phoLpcA+pNahthts...................h.................................. 0 15 33 40 +14049 PF14207 DpnD-PcfM DpnD/PcfM-like protein Eberhardt R re3 Jackhmmer:B0PE77 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 57 and 153 amino acids in length. There are two completely conserved residues (E and A) that may be functionally important. 25.00 25.00 26.80 26.10 24.00 23.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.12 0.72 -4.21 18 267 2011-05-13 15:01:36 2011-05-13 16:01:36 1 2 239 0 12 86 7 48.60 53 41.50 CHANGED K....YcVcIsETLp+hVpVcAcoc--AhphspchYpsp-..IVLss-DFps.s- .....KKYsVEIsETLSRlVSlEAE.s.D.EAccLVc-pYpspE..IVLDADDFpsh-................ 0 7 10 11 +14050 PF14208 DUF4320 Domain of unknown function (DUF4320) Eberhardt R re3 Jackhmmer:B0PFN6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 120 and 131 amino acids in length. There are two completely conserved residues (G and Y) that may be functionally important. 25.00 25.00 25.10 25.20 24.30 23.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.13 0.71 -4.14 20 145 2011-05-16 07:46:53 2011-05-16 08:46:53 1 1 99 0 15 97 0 116.10 30 89.67 CHANGED llLsshllIALuVclhPlalsKppLDsFAsELVRpAEluGplu.......sETsp+ttsLcE+TGlsPp..lpWS............psG+IQLNp-lsVTlThchclGlFu....sFu...SFPlTlpApAoG+SEVYaK ...hlhhulllIsLhspsshhhhph.chcsaAsphlc.AEpsGGhs.......scssshltslp.c+.httpsh....hsWc.-..........ppG+lpaNpslshplpuchc.hhlFt....shs...shclslpAptsGhuplYa+................ 0 11 15 15 +14051 PF14209 DUF4321 Domain of unknown function (DUF4321) Eberhardt R re3 Jackhmmer:B0PGC7 Family This family of proteins is functionally uncharacterised. It is found in bacteria, and is approximately 50 amino acids in length. 27.00 27.00 27.50 27.50 25.90 25.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.15 0.72 -4.27 43 156 2011-05-16 08:00:46 2011-05-16 09:00:46 1 1 155 0 48 130 27 48.70 31 56.63 CHANGED hlsaLshshs..lGhsss......slDLtllplohG.lslclNlhoIlGlllAlhlY ............lsaLshshs..lGhsssh.....sLDLtllploFG.lslclslhoIlGlllAlhlY..... 0 29 43 46 +14052 PF14210 DUF4322 Domain of unknown function (DUF4322) Eberhardt R re3 Jackhmmer:D2PCT9 Family This presumed domain is functionally uncharacterised. This domain family is found in archaea, and is approximately 60 amino acids in length. There is a conserved QTV sequence motif. 27.00 27.00 28.20 33.90 20.30 22.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.13 0.72 -4.50 7 162 2011-05-16 09:56:38 2011-05-16 10:56:38 1 1 13 0 20 159 0 58.40 72 22.23 CHANGED hTPsh.pp.shpQIsaKLLShlsFpGRKuEEVpKsLVSAuLhpDSVENKuptaslSPQTVRNYsEE ...lh..LPHQNNlQQIGYKLLSMLNFpG+KuEEVA+TLISACLWNDSVEsKSRAYsVSPQTVRNYVEc............ 0 18 18 20 +14055 PF14213 DUF4325 Domain of unknown function (DUF4325) Eberhardt R re3 Jackhmmer:C4FKE5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 99 and 341 amino acids in length. 27.00 27.00 27.10 27.40 26.80 26.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.24 0.72 -4.33 60 205 2011-05-17 10:57:51 2011-05-17 11:57:51 1 5 197 0 61 187 21 71.20 22 43.03 CHANGED ppupclhpp.lpphlptst...VhlDFpul.phhspuFhsphhuplhtphs......tthcpplphhshscsspthlp+.lhsp ................pupplhpp..l.....t....phhpttp....lh.lDFcGl.stlusuFhcEsFushhp.c..hs.........t.hcpplphh..shspphpthlth.h................................ 0 19 43 52 +14056 PF14214 Helitron_like_N Helitron helicase-like domain at N-terminus Coggill P pcc Jackhmmer:Q9S9S3 Family This family is found in Helitrons, recently recognised eukaryotic transposons that are predicted to amplify by a rolling-circle mechanism. In many instances a protein-coding gene is disrupted by their insertion. 28.70 28.70 28.80 28.80 28.40 28.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.11 0.71 -4.56 42 666 2011-05-17 12:28:46 2011-05-17 13:28:46 1 43 74 0 525 652 26 155.40 26 14.84 CHANGED Ya..sa+l..phRpsphshhhp...st+LhpQa..hVDtaspl-ps..RLpalp..p..pQpplRschhpulpDA............hppups.cs...sphGc+ll.LPuSasG.u.RahhppYpDuh.....Als+taGhPclFlTFTs.Ns+WsEItct...............lt........sppspD.....................RPDllsRVF+hKlcpLhc-lhpp...phFGp.shs.hhaslEFQKRG.LPHsHlLla ...........................................................................................................h...h.....hp.....h.pt....phthht..........pp.tph.....pt.................h.tt..l.pt...................................ht.tttt..ph......p.t...h...pphh..hstshhG.....s...........phhhpph..-sh.....Ahspp....hGt..P.slFl.Thos.s....s....p.Wt-.l...hph...................lt...........tp.psp..D...................................cPshssRh..Fph+......hpthhp.lhtt...................thh.Gp...l.t.hhhhhEaQtRG.sHhHhll.......................................... 0 194 351 473 +14057 PF14215 bHLH-MYC_N bHLH-MYC and R2R3-MYB transcription factors N-terminal Coggill P pcc Jackhmmer:A8MSG2 Family This is the N-terminal region of a family of MYB and MYC transcription factors. The DNA-binding HLH domain is further downstream, Pfam:PF00010. Members of the MYB and MYC family regulate the biosynthesis of phenylpropanoids in several plant species (DOI:10.1007/s11295-009-0232-y). 22.20 22.20 22.20 23.20 21.70 22.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.04 0.71 -4.04 95 661 2011-05-17 12:29:08 2011-05-17 13:29:08 1 12 127 0 262 681 0 164.70 31 30.79 CHANGED Lp....ppLpsllp.......stpWsYAlFWp.s..........................................lLsWuDGaap.us.p.t.t......................t.t.t+pp.hpch.p.hs.........................................hth...-lsssEhaahssh.asa.....GpG...........lsG+shssupphWlssspp........h..ssphhs.........R.uh.Ap.tuh...p..............Tllsl..Ps..s.GVlELGSochlhEshshlppl+shF .....................................................................................pptLthhsp.......s.p...WoYulFWphsspp...........................................................hLh.WsDGaas.Gshcscpt...........t...................ththpRsctLc-Lht.h..s...............................t.sssshss--ls-sEha..alhshsasF.........GpG.....................LPG+shusspplWlssspp................................s....ssphht...........R.uhhAp..tuh......................TlVslPh..hsG..VlELGoTchl.E-.shlppl+shF................................. 0 48 172 220 +14058 PF14216 DUF4326 Domain of unknown function (DUF4326) Eberhardt R re3 Jackhmmer:A8ZQW0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 100 and 162 amino acids in length. There are two completely conserved residues (P and C) that may be functionally important. 25.00 25.00 27.30 25.30 24.00 24.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.28 0.72 -3.41 43 174 2011-05-17 12:59:55 2011-05-17 13:59:55 1 3 156 0 61 180 43 92.70 29 68.43 CHANGED splspp.+ttphss...........t.h.YlGR..........sop.....W.GNPFshsp.................sssR........ppslcca+pal.................tppshhhptlt.cL.......+G.+.sLuCaCs......P.........CHuDVLt-l .......................................p..ht.ths...........h..al.GR..............soh.....a.GNPFthtt..............................................ttsR.........ppslcta+palh..................................tssphhhptlt..pL.......c.G...+..pLuCa.Ct..................P.ts.............CHuDVlhc................ 0 19 47 56 +14059 PF14217 DUF4327 Domain of unknown function (DUF4327) Eberhardt R re3 Jackhmmer:B0BYV4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 75.80 75.60 21.30 18.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.14 0.72 -4.16 39 113 2011-05-17 13:09:37 2011-05-17 14:09:37 1 1 41 0 44 101 0 67.60 45 88.51 CHANGED paol-hIp-EARpLVcpGllpRpQPIasLspalPuREWshlEpELEcp-FLLRDpIsDLlu.sEsWc..-D ...YolchIp-Es+pLVcpGllsRpQPIYsLCpaIPuREWstlEpELEcp-FLLRDpIuDLlu.sEsW-pD... 0 5 32 44 +14060 PF14218 COP23 Circadian oscillating protein COP23 Eberhardt R re3 Jackhmmer:B0C7I3 Family This family includes the circadian oscillating protein COP23 from Cyanothece sp. (strain PCC 8801), Swiss:Q54702. The levels of this peripheral membrane protein display a circadian oscillation [1]. 27.00 27.00 30.90 30.40 17.60 16.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -11.05 0.71 -4.60 63 131 2011-05-17 13:43:54 2011-05-17 14:43:54 1 6 38 0 42 156 4 146.20 22 65.35 CHANGED pFhCt.........................................hssp............sTlstpsp..uph...slltWs....s.h.suuaoPppRCppVosRhpphtss....h.................th.hlps..Ghh.NspsVlCsssptss.sC.....pslLhTL..pssss.....sppsLpplhs.......t..p.................................ttstp......hlshsshLsss .................................................pFhCt........................................tsp.......sTlshpsp....uph...PllhWs....sth.susaoPppRCppVSsRhpphhps....h...............................thphlps..Gph.Nsp.sVICsssptss...sC...pslLhTL..pssss.....Pppslpplhshht....t....s....................................psstt.......lphp.hlt..t..................................... 0 1 33 42 +14061 PF14219 DUF4328 Domain of unknown function (DUF4328) Eberhardt R re3 Jackhmmer:B0C5R9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 218 and 342 amino acids in length. 24.80 24.80 25.30 25.00 23.40 24.10 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.49 0.71 -4.96 51 224 2011-05-17 14:05:06 2011-05-17 15:05:06 1 7 194 0 73 216 1 164.40 23 55.87 CHANGED lltphpssh.............ht.sthstu-thssh.....suhsthlhhlsssllhlhWlhRs+tsApsht............ts.chsssh....slsuahlPlsNLhhPhphhtElapuotths.............................t.sthlhlW...W.....hhWl.................lutslshhshshs.....htsssptls..............tsshhslhsslhsls..sulhslhllcplsp..hp ..............................h..............h...hhttushhssh.....su.lsshhshlsshllhlhWLhttRssAhhhts...........hs.Rhust..hhuGhhlPhlNLhhshhhlhElhtspsphs.............................phpthlssW....W.....hsWl.................lussl.s.h..hs.hshp......hss..sspshs...................ssshh...hlluhlhuss..usshshhlhcthpp..................................... 0 29 57 68 +14062 PF14220 DUF4329 Domain of unknown function (DUF4329) Eberhardt R re3 Jackhmmer:B0C699 Family This domain is functionally uncharacterised. It is found in bacteria and eukaryotes, and is approximately 130 amino acids in length. It is often found in association with Pfam:PF05593 and Pfam:PF03527. There is a single completely conserved residue D and a highly conserved HTH motif which may be functionally important. 27.00 27.00 30.70 30.30 26.70 25.90 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.62 0.71 -4.17 20 415 2011-05-17 14:18:05 2011-05-17 15:18:05 1 16 244 0 18 432 7 126.10 48 15.32 CHANGED tpshApshLpplpscSIspspEYsGhIsp..sssGchhuo..psppG..........ppsushsths.s..shphVAuYHTHGuasps...........YssEl.SstDlpus........tpptlsGYluTPuGRlahlc.psppsp.phsshusl .........................I.DDhuh.ALshhNucSIsEsKEYuGLICK......ppG.cYF.so...sP.su.........s-pcus..hNhtCPpGoE+VusYHTHGh.Scs.............sYss..FSuKDhp.sh............uhspp.hu.Y...LGTPsssahthss+u+t.s.........sp............ 0 8 12 15 +14063 PF14221 DUF4330 Domain of unknown function (DUF4330) Eberhardt R re3 Jackhmmer:B0C6A2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 165 and 177 amino acids in length. There is a single completely conserved residue G that may be functionally important. 27.00 27.00 30.40 28.90 23.30 21.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.97 0.71 -4.40 43 119 2011-05-17 14:26:20 2011-05-17 15:26:20 1 2 109 0 50 119 32 159.90 25 88.80 CHANGED M..t...llDspG+LFG+lSllDlsAslllLhslsGlhhhPtt..osss...Aph...ssspslpV..sl.lhsltsssPpshhtp.hpt.......spcssllIRNQPh......GplpltsVp.ls+plssspPDGsVhshsD...Pp.t..hphDhhhTLcGpupho.ssGs...VlGspclKIGsslELEGpsYphsGoVh...slcl ..................................M.llDpcG+LFGplsllDlhshllllhs.lsGlhhhstp.ssss.....Ap....ssspslcl..shhlhslpstsspthhtp.hpt.......sspsshhh+spshG....plpslp.h..spsh.hsspsDGp.Vh.thsc...P.....hthDhhlTlcupupho..ssGs.....llG.spcl+lGps.lpl-stsaphsusVhslp........................ 0 17 36 47 +14064 PF14222 MOR2-PAG1_N Cell morphogenesis N-terminal Coggill P pcc Jackhmmer:Q9LFV8 Family This family is the conserved N-terminal region of proteins that are involved in cell morphogenesis. 20.30 20.30 20.50 21.50 19.30 20.00 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.96 0.70 -5.83 35 407 2011-05-17 14:28:14 2011-05-17 15:28:14 1 28 236 0 259 372 3 456.00 35 21.39 CHANGED u-..R+uhlSl...YllCRVLIEIhpQss.hp.slsp-htc+LEsllFsQL..KssD.s.....p.ls.sSsl+huNWslaApLLGhh..SchpF..sSVocRFls-Lcc........hp.p......pps......cs-sphthLIhGMRaL+l+saP.Esa-cSs-Fhpulu+hFspup.pc..lKpAaspllpplLLPlAussss-....lspP...pWs-slstlhschsphhs..K.s+HWss..u.FPLtssLLClSsp-hFhspWh....thlpsL.s+lKD+..spR..sluLpulsRLlWsYL..apss-ossssh++L-plhchhl..Psu+.........+..shlssD..shh-PLlpllphIuac+.Dash+pIIaPLlssshhtss.....lcplpPE+hllGIRuaLsIhsshppspt......................................t.h.tsppc......hls......s...ssth.spslppaa.chschlsc.IhllhDsshGupshh....s.....cphst.p..ssht...................h.hthpsD.ph...o..sp....+pth...-L.h+sslpAlPRCL..us..pIPa..ssLIslLspsssHlpssIupouupuL+uluppp..tsppVhhGFA+FlFs.FD-+asoh.s.th.Ls..cl-ssL+L.YlELLplWl ......................................-R+shs..aIhshVLlEllpQhs..hp.sh.c.....th......hpcl.slsFpph..Kht-..s.........s.sush+.h.hslaApllGhhup.t+.......F.uVpc+Fhs-Lcchp.p...........p.s......s.tphh.LlhGM+ah+l+hhP..EtaEtShpFhp.phuphFhcs...pc.tlKpAhstlhsplLlPlAuss.psE.............lshP......phtphlp.lh.pshph.s..+...p................+H.....................hs.........h..aP.LhssL.....LClSppphF..hs.pWh...................thlppLh.........K.................D......ph..p..tluLpulhRLlWsYh..hhps-o.s.....sT..p..+L.plhphlh.....Ppsp...................+..shlspD...h.ls.hlpllphIu.c+.D..ash+pllhsLlsssh.hp....................................h.plpPE+hsIGlRuaLsIhsslppt-t.ss...p...............................................................sssshpspph..pphls...........p.ptsps.uhp.Yasphpchlsp.IhhhhDpphGp.hhh.....s............ph..h..s..p...............................................c.cD..hh...o.....sp.....Ksph..-L.F+osltAlPRhl.........sc.........shsh.................ppLl-lLs+..h.olHh-pplpt.u.poLpslhhph..atpsVhhGFspFlhc.hsDha..sh.....................l-ssl+hhlpLlp.Wh.................................................... 1 76 133 204 +14065 PF14223 UBN2 gag-polypeptide of LTR copia-type Coggill P pcc Jackhmmer:A5B9L3 Family This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.43 0.71 -10.24 0.71 -4.31 123 1538 2012-10-02 13:37:57 2011-05-17 15:37:42 1 195 75 0 589 2426 5 103.70 21 14.09 CHANGED coAKchW-sLcpha.Gss..+s+tuclppLcpca-plcM+-sEolc-ahs+lpslssclpslGp.phs-pclVcKlLpoLP.p+appllsul-pt..hDhpp.hol--lhupLpstEp+h.ppppsp ...................................................................stthWphlpt.atss..p..p......s...ph..lppph..t.thp........h.p.ss....c.s.l...p...-aht..+hp...sl.ss...p.l.....p.............s.l.....G.............p....s....h......s.........-p.........cl.lp..+..........lLp.....u.....L....s..pc..ap..hhss...lppp...sht.....ohp-lhspLh.shE.ph......t................................................... 0 132 305 345 +14066 PF14224 DUF4331 Domain of unknown function (DUF4331) Eberhardt R re3 Jackhmmer:B0C6N1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 223 and 526 amino acids in length. There is a conserved FPY sequence motif. 27.00 27.00 32.60 32.40 26.80 26.80 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.61 0.70 -5.29 47 155 2011-05-17 14:40:04 2011-05-17 15:40:04 1 2 121 0 70 177 49 349.20 25 90.04 CHANGED SoHc-uP....htttsphDhTDhYhFpS.............tcsstlsllhNh.Php.......sssshhthsssuhYcl+l...........DscG......-..uht......D.lsapFpFss...............................................................................h..sshGptsh.........shtthtst.l.t...h...s......shpsFsGpRpDsFhhs....l.thapl.........................th...t.s.sshtsh.NVpolulclPtstls.t..........t.slGsWsossh.........................h.sGsasQlsRlGpPhlNcl..h...hsh...t.-.KDpaNsspPspD...upahshhtsshhstlhth...h..........................hsRsDllsshlsshst..........................h..s-hLclssuhPss........................................uuaPNGRphs.......DDVlDlsLphlhG.h.....................................t.hshs..cssshsss..shhssFPYLusP ...................................................SsHh-uP........hst...tsphD.oDhYsFpu.............tpsstsshlhNh..Pht.......sss.sh...hshs.sshYcl+I............DssG...D.uht.................D.lsapapFps...s...............................................h.p.ht...........................h.hs.sthGttshs..............shtth.tst.lht....hss..........shpsFsG.tpDsFhhs...htthhclht..............................h..sh...t.s.sshtsh.NVtolslclPpshlstt.................tsslusWsosph........................................sGsahQlsRlGtPhlNpl..h.hs.........t.s.KDtaNsstPtpD...spahphhhps...hshhht...hh...........................................................scschlshhhtsh.t..............................s-hLplssuhsss............................................suaP.NGRp.s.......DDVlDltLphlhG...................................s.....t.shh..sssshsst...h....ssFPYLs.P............................ 0 19 44 65 +14067 PF14225 MOR2-PAG1_C Cell morphogenesis C-terminal Coggill P pcc Jackhmmer:Q9LFV8 Family This family is the conserved C-terminal region of proteins that are involved in cell morphogenesis. 25.00 25.00 25.50 25.50 24.90 24.60 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.63 0.70 -4.64 38 388 2011-05-17 14:50:41 2011-05-17 15:50:41 1 28 236 0 230 364 0 247.40 32 10.74 CHANGED cLltaPQLFWsssACLsolaEpEFlEuLphLs+hLsKlDLcsssshphLhsshPsp...........................Wp...GsF-GLQsLlhKGL+SSsoh-hTLclLs+Lshlsssplluss.po.RLlh.sllAsLPphLpph-pss..............pphhpsAptLuplA...cspshssLucllsuaucs+a.+opcDFlsphlshlpstaFPchpspslsaLlGLLpNshsWh+lpohplLpsllstlDhc....ps....hGs....D....LIuPLLRLLpT-hs.pALcVL.-plhshSGu.hhsc ...................t...hhsplFWsssuhLcSs...a...EhEal.uLcLLs+lLs+.....ls.Lcc....s....pspppLtphpsph.......................................................h....ssFsG.LQpLllKGh.....pSss..oh-hTlplLspLssl..spp.sll......ssu.......p.......s..t.....h.h.sllshLPphlpph-sss...................................................phsp..psA.pplAp.ls.............cstphs...sLAchhshYupppa.+sspsalshlsphL+.-sahschshphlsa.LhpLL.pp..uhs...hp.plLpllhslLsh....hDhp.............ps.............hss-.......l.lpsl.h+hl.posahtcALplL.chllohSuo.h..h................................................................................. 0 68 115 177 +14068 PF14226 DIOX_N non-haem dioxygenase in morphine synthesis N-terminal Coggill P pcc Jackhmmer:Q94JV6 Family This is the highly conserved N-terminal region of proteins with 2-oxoglutarate/Fe(II)-dependent dioxygenase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.52 0.71 -3.55 783 5634 2011-05-17 14:57:58 2011-05-17 15:57:58 1 32 1392 66 2627 5670 1247 113.90 22 34.09 CHANGED lPlIDlu.............s.....ss......c......p...ph.....h.p.p...ltpAspchGF.F....l...sNH...G...ls..p.......p.llpchhp.huccF.F.s.LPh-...c.Ktc.h.....tp.....s....s......th...pG...Ys.sh...htpph..ps.................phDa.cEt.h.phtt.p..........s.........p............................h.sh...WPsp ....................................lPllDls........................ts...tp.......p..tph....hpp...ltcAsp..p..hG..F.F.............l..............s..N..H......G...l....s..t........................p...l...l...c.ch.hp.......hs..ccF.F...s.L...P...h-...p..Ktch.........tp.....s.....s.................th..pG...Ys..s..h...h.t.p.p.h.ps................................th-a.c-h.h..t.h.th................................................................................................................................................ 0 523 1614 2211 +14069 PF14227 UBN2_2 gag-polypeptide of LTR copia-type Coggill P pcc Jackhmmer:Q1PE19 Family This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.25 0.71 -4.55 97 1112 2012-10-02 13:37:57 2011-05-17 16:31:27 1 127 81 0 605 2057 3 105.80 19 15.57 CHANGED ppoutplWppLcphahspshss+lhl...hp+lhsh...+Mp-sps.lp...palscFppllscLpsl.slpls-E...spshhlLsSLP.ss..acphhss...lh.st..splohcpltstlhs...c-hphppppppsps .........................................................pstphWptLpt.h...att....p.s.....h...s...s...c...hhl.....hcphhsh........+ht.-.......s.......ps..lh.......palpch.p.ph...sp.cL....p....s....h....p........h....t....l...s.-c...........h.s.hhlls...sLP..ss........ac.shhts....l..hp...t......p.p.h...shpt.lhstl..........c-.thttt.....st............................................. 0 306 380 445 +14070 PF14228 MOR2-PAG1_mid Cell morphogenesis central region Coggill P pcc Jackhmmer:Q9LFV8 Family This family is the conserved central region of proteins that are involved in cell morphogenesis. 27.00 10.00 27.10 10.00 26.60 9.90 hmmbuild -o /dev/null HMM SEED 1120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.43 0.70 -14.01 0.70 -6.99 3 1087 2011-05-17 15:34:39 2011-05-17 16:34:39 1 28 226 0 651 1024 4 217.20 9 27.26 CHANGED tEuSEFRASEMDAVGLIFLSSlDVQIR+TALELLRCVRAL+NDIRDYStpEphDspLKs-sEPIFIIDVlEENGEDIVQSCYWDoGRPYDLRREhDslPlDlTLQSIL.ESuDKuRWARCLSELV+YAAELCPSSVQ-AR.....LEVIpRLApITPsELGGKApQSQDTDoKLDQWLlYAMFACSCPPDSRE-uuLRAARDLaHLVFPSLKSGSEuatLAAThALGHSHLEVCEIMFGELTSFlEEVSSETEuKPKWK......SQK..t.RREDLRVHVANIYRhVAENIWPGMLoRKPlLRLHFLKFIEETsRQILsuPSENFQEIQPLRYALASVLRSLAPEFVDAKSERFDLRsRKRLFDLLLSWCDDoGSTWGQDGsSDYRREVERYKASQHsRSKDSlDKLoFDKEluEQVEAIQWASMNAMASLLYGPCFDDNARKMSGRVISWINSLFhEPAPRAPFGYSPADPRTPS.YSKa..TG..EGsRGuuGRDKQRGSHLRVLLAKoALKNLLQTNLDLFPACIDQCYSSDuuIADGYFSVLAEVYMRQEIPKCEIQRLLSLILYKVVDQoRQIRDDALQMLETLSlREWAEDGuEGuG+YRAAVVGNLPDSYQQFQYKLSuKLAKDHPELSEtLCEEIMQRQLDAVDIIAQHQVLTCMAPWIENLNFV+LKESGWSERLLKSLYYVTWRHGDQFPDEIEKLWSTVASKsRNIlPVLNFLITKGIEDCDSNASAEISGAFATYFSVAKRVSLYLARICPQQTIDHLVCELAQRMLEDssEPVR.ussKsDTSuNsVLEFSQGPssS.QlAolVDSQPHMSPLLVRGSLDGsIRNVSGNLSWRTAAVTGRSVSGPLSPMPPElsIlNVTTGRSGQLlPA...LMNMSGPLMGVRSSTGSLRSRHVSRDSGDYaLDTPNSuDDILHuG.sGsHGlNApELQSALQGHpQHhLSRADIALILLAEIAYENDEDFREHLPLLFHVTFVSMDSSEDIVLEHCQcLLVNLLYSLAGRHLELYEV.EsSDuENKQQVVSLIKYVQSKRGSMMWENEDPTLVRTELPSAALLSALVQSMVDAIFFQGDLRETWGuEALKWAMECTSRHLACRSHQIYRALRPSVTSDTCVSLLRCLHRCLGNPVPAVLG ........................................................................................................................................................s.tts...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tttt...................................................................................................................................................................................................................................................................................... 0 151 278 466 +14071 PF14229 DUF4332 Domain of unknown function (DUF4332) Eberhardt R, Bateman A re3 Jackhmmer:B0C7H1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 134 and 356 amino acids in length. This domain contains helix-hairpin-helix motifs. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.12 0.71 -4.15 49 244 2012-10-03 02:11:09 2011-05-18 08:15:57 1 15 232 0 88 272 276 105.00 25 30.05 CHANGED .GlsptptppL.ppsGIpospsLLptucs.tuRhtLApphtlstpplh+htshADLs.Rl.uluhpausLL.pAGlsoVspLApps.sppLpppltclptppphs+ph.....Psls.VppWIppA+pl ..........................................................h..t.sl.p..th.t.t....tp..h...tt..ht.t.l.th..t.s.phh.ph.GluppastlLpcAGlsolp-L.sp.ps.sppLpppltclNcph.clttph.....PolpplppWIppAp.......................... 0 31 58 79 +14072 PF14230 DUF4333 Domain of unknown function (DUF4333) Eberhardt R re3 Jackhmmer:B0C9G5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 140 and 255 amino acids in length. There are two completely conserved C residues that may be functionally important. 27.00 27.00 27.80 27.30 24.10 25.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.65 0.72 -4.02 37 334 2011-05-18 08:13:58 2011-05-18 09:13:58 1 5 143 0 89 229 0 79.00 32 49.87 CHANGED sssssusuhhhss........slDtsplppslpphLs....sshGhpsss.VsCP..ss.cscsGsohpCs.hs..lsGpshpVsVTlpss.DG .......................h..h.hhhuh.hss....hs.p......slDpspltstlpphLs....sshGs...+sss.VsCP...ss.pscpGAshpCp.ls..ls.Gp.shpVsVTlsss.-G........ 0 18 65 80 +14073 PF14231 GXWXG GXWXG protein Eberhardt R re3 Jackhmmer:B0CA20 Family This domain is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. There is a conserved GXWXG motif. This domain is frequently found at the N-terminus of Pfam:PF14232. 25.00 25.00 26.60 25.10 23.00 21.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.91 0.72 -4.32 50 169 2011-05-18 13:18:05 2011-05-18 14:18:05 1 2 133 0 86 180 1 58.80 39 34.55 CHANGED hslFDpLssVss-.hl.GpW+Gutl.TGHPh-GhLpshsWaGKpFhss-sVcPLlhhss...cG .......phassLssVps-thl.GpW+GsthsTGHshsGhLpthsWaGKpFpsspcVcPLlhhsssG....................... 0 9 53 72 +14074 PF14232 DUF4334 Domain of unknown function (DUF4334) Eberhardt R re3 Jackhmmer:B0CA20 Family This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. This domain is frequently found at the C-terminus of Pfam:PF14231. 25.00 25.00 26.80 25.50 21.10 17.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.87 0.72 -4.33 50 165 2011-05-18 13:43:53 2011-05-18 14:43:53 1 2 132 0 85 176 1 60.60 45 35.32 CHANGED sppupApLRhhpaRGtsoAoMlYDphPIhDhFR+VD-....sslhGlM-hKs.........tspsaFFhLcR ...h.ttupApLphhcFRG.....ploAoMlYDtpPlhDaF++lD-....solhGlMshKs.........psp.a.aFhLcR............. 0 9 52 70 +14075 PF14233 DUF4335 Domain of unknown function (DUF4335) Eberhardt R re3 Jackhmmer:B0CBF9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 204 and 480 amino acids in length. There are two completely conserved residues (G and D) that may be functionally important. 27.00 27.00 52.70 52.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.20 0.71 -4.83 55 113 2011-05-18 14:47:08 2011-05-18 15:47:08 1 1 70 0 44 125 119 193.20 27 57.39 CHANGED +pYs.PsCoLplpGhss.h....stspscshlslLsts-sp.h....sp...lcGs+-aLcsLhpsVssYspphLS...........................uhs.p.......phst-ssh.............................................lplps........psh.tH...cLhLpu........ttsssss.................hplpLsssQLaDLlpALDphtsDspsLsshshshp.......h++ht...h..sstshhpphsssslG............................ssulslsuhhh...hhlPhPp ........hpYs.PsCoL.l.uhss.h....st..sthpshhthLp.s-sp.h....st...lpGs+phLEsLhpsVssYspphLo...................................uh.........tptph...........................................................................lplps.........sh.tH......cLhlts.................tspsss.....................htlpLsssQLFDLlpAlDpahsDspsLsshshphp........++ht....tsstshhpphsssslG.........................shulshsuhhh...hhhs.P................................................... 0 7 31 42 +14076 PF14234 DUF4336 Domain of unknown function (DUF4336) Coggill P pcc Jackhmmer:Q8RWI4 Family \N 27.00 27.00 27.20 27.10 26.10 26.80 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.86 0.70 -5.23 47 413 2011-05-18 16:09:10 2011-05-18 17:09:10 1 4 339 0 204 422 226 203.30 28 65.88 CHANGED ssplWoFEQlQGlhY...VsVPIRMTVlKLps.GGLhVauPlAPTtEhlphlcc..L..t.cG....sVcaIlLPTsSG........lEHKlalGPhARtFPpApVWVsPsQWSFPlsLPLs..WLGhP....ps........+s..L.s........h....sss..........ssPas.DEh-athLsPlsLGlG..s.....FtEluhaH+tSpTLLVTDullulstpPPt.lhshDP............hPLLF.........HAR-p..us...csltDosps..R++GWpRhsLFuhYhpPs..sL.................pss....................ph.phh..t................h.a..tWp.sh..sFptlps.....+...LhVAPlLpsLlasRs.ptlhsWl-cl............up.W..s.....hc.........pllPuHasAPlts.sspchppAFsa ...........................................lshssRMTVl+Lss......G.......u.....LhlaSPls.TpclhptlppL.........G.sVcallsPsh..h..........H+lalssapctaPpApla....ss...P.......s...................t......ph.....h....p........hhsh..........t..t....h................t....t.pt............ss...as.s-h.-..hhhp......t..t.........hpElsFaH+sSpTLllTDhl..sh....p........................................................................................................................................................................................................................................................................................................................................................................................................... 0 66 121 171 +14077 PF14235 DUF4337 Domain of unknown function (DUF4337) Eberhardt R re3 Jackhmmer:B3DVT1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 201 amino acids in length. There is a single completely conserved residue Q that may be functionally important. 27.00 27.00 30.20 28.80 24.00 26.20 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.82 0.71 -4.39 47 143 2011-05-19 08:31:57 2011-05-19 09:31:57 1 1 133 0 74 139 31 162.60 30 84.08 CHANGED hspplAlhsulLAsluAlsshtuss.......hpscuhhppscAuspWuaYQAKohKpslhEhssphhthph............psthppclpcYppcppchcpc......ucpLppcAcph-ppp-pthcpacpashAsshlQIuIsLAulslLT+.+phLhhhuhuhussGlshhshuh ......................spplAlhsAlLAshuA.lsshtGss.......hps-A.slppscAuspWuaYQAKohKpsls-husphsthph................psthppclpcYppptpch+pc......................ucplppp..Acct-cpp-tthcpaHcashAsshlQIuIsLAulolLTc.....pphLhhh....uhslussGlshsshuh....................... 0 19 47 62 +14078 PF14236 DUF4338 Domain of unknown function (DUF4338) Eberhardt R re3 Jackhmmer:B3E1A9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 206 and 475 amino acids in length. 27.00 27.00 27.30 28.60 24.80 24.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.73 0.70 -5.75 26 167 2011-05-19 08:39:33 2011-05-19 09:39:33 1 8 64 0 33 167 423 198.60 25 54.41 CHANGED php+cslhpLstsppp..phttpptthst.........hssspsshschhs.pLphVcstsp...pLFphhhtp.a.LsaspshGc+LRYLVhsptp....t...llGhLuhuussapltsRD.caIGWsppsR+ppLhplshsu+hL..I.Pasplhsht.hlLuhsspclpsDapcpYstpshllEs........hVsssphhG..osYptss..................Wh.lGtTpGpG+hcht..................................................hhh+ts.h+-lalhPLscshcchLps ............................................................t..................................................thht..lp.Vpp.pt........p......thap..hhtp...H.Lshp.hhGppl+Ylshsttp...............hluhl..uauusshphtsRD.paIGWs.pt+pppLhhls.ssRhL..IhPh.hpl.shtShhLuhshccl.pDatphYshp.hllEo............................alsspp..hh.G....osYptsp.......................Wh.lG...TpGhuc.t...................................................................................................................t................................................... 0 15 25 29 +14079 PF14237 DUF4339 Domain of unknown function (DUF4339) Eberhardt R re3 Jackhmmer:B3DVT0 Family This domain is found in bacteria, archaea and eukaryotes, and is approximately 50 amino acids in length. There are two completely conserved residues (G and W) that may be functionally important. 26.00 26.00 26.00 26.00 25.80 25.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.06 0.72 -4.38 59 701 2011-05-19 10:27:33 2011-05-19 11:27:33 1 48 515 0 277 694 67 44.80 29 6.35 CHANGED paahsp.NupptGPashppLpphltsGplss-oLVW+pGMss.Wpsh ...............Wah.sp....s..s......p...p......tGPhshpplpphhtpGplst.sThlWppGhss.Wpsh...... 0 109 177 238 +14080 PF14238 DUF4340 Domain of unknown function (DUF4340) Eberhardt R re3 Jackhmmer:B3DYU8 Family This domain is found in bacteria, and is typically between 183 and 196 amino acids in length. 25.00 25.00 25.00 25.00 24.30 24.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.31 0.71 -4.60 71 627 2011-05-19 11:01:47 2011-05-19 12:01:47 1 8 364 0 213 647 246 159.80 14 62.45 CHANGED sWp.ls.s...s.hs.......hssDpsplsshlssLpplphpchls.....ssss.h.scaGLsss........thplsltsss.......s.p..tplh.lGs..sss.s.....sp.....hYs+h..stpsplah...lss...shhshh..s.tshss....a.hspp.lh...s.h....ptsp...lpplplpt.....tspt.........hph.........spps...s.....tWph.ss....s....tt......pss.ssps..sphls.slspLpsps..hhstps......sphtthsh.ss....Pth .........................................W..h..t......t.......h.hs.t.t.t.lp.phhptltthph.p.hp.........s.spp.htpa..GLsps............................thplplp.tts.......................t.p....hplh..lGp..s.ss.s......................sp...........h.Ys......ph......t...s...........p..s.....p....lah.....lst......shhp.h...s..ts.t..p....h.hppp.lh......p..h......t.tp....l.pplplp............tt.................hph.........................tttt..t.......th.h.tt..............................ht....h..tthht...h..h.h.t....h........................t....................................................................................... 0 128 169 184 +14081 PF14239 RRXRR RRXRR protein Eberhardt R re3 Jackhmmer:A8ZLD7 Family This domain is found in bacteria, eukaryotes and viruses, and is approximately 180 amino acids in length. It contains a conserved RRXRR motif. It is often found in association with Pfam:PF01844. 25.00 25.00 25.50 25.60 24.60 24.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.09 0.71 -4.86 49 361 2011-05-19 12:12:55 2011-05-19 13:12:55 1 6 102 0 87 389 36 159.60 40 43.18 CHANGED VaVLspst+PLMPs+s.u+ARcLL+pGKAtVh+..hhPFTIpLptp..tss.tsQP.lpltlDPGu+hoGlul....hssp..cslhsuclplhhtpl+ctl..................tspRphRRuRR.sR+h.....RYRpsRFsN..R....c+..s...tW..Ls........PSlpp+VcsplphVpcLp+lhPlosIshElV+FDhQthps..P-IsGhpYQpG ...................VhVlspptpPLhPsps.u+ARhLL+pG+AtVh+....thPFTIhLpp......p....sss.........sQP.lpltlDPGu+hoGlAl..............hppp..............ps..............latuElphRtp.pl+ctL.................ppRRthRRsRR.pR+h..............................RYRpsRF.NR................p..+...p...GWLs...............PSlpp+lpshhshVp+LpchhP..lss....ls.....ElV+FDhQthpN....P-l..sGhpYQpG.............................. 0 33 62 83 +14082 PF14240 YHYH YHYH protein Eberhardt R re3 Jackhmmer:A8ZMB4 Family This domain family is found in bacteria, eukaryotes and viruses, and is typically between 141 and 198 amino acids in length. There is a conserved YHYH sequence motif. 25.00 25.00 26.00 25.40 23.00 24.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.42 0.71 -4.58 57 242 2011-05-19 12:29:31 2011-05-19 13:29:31 1 18 133 0 117 242 1052 161.80 25 17.45 CHANGED shsaplPhs..P.thu...........sps..sshsh....sshGlAlNGVsh.ssss...................t.t..s...s...hhs........................slDpssuHspssG.sYHYHuh.Ps...shhp.t...tt...ssc.............usllGaAhDGFPIY..u...hs.sss..p............................................suDLDcCNG+hsss.......GtY+YalTssa...Pal..pCa+Gs .................................................................................p...h.lshp..P...t.............tps..pths...........sslGlslNGlsh.sshs.............................t...s.....t......................................shD.ts..sGHs....p..s.u....sYHY..H.........t......ss...shhp....tt......ssp.................................usllGaAhDGFPIY..Gs.hs.sss.p..........................................................................................t..s...s.......D..LDpCsG+hsss........................utY+....Y....asT....tsh....shh.tshhG............................................................ 0 65 96 111 +14083 PF14241 DUF4341 Domain of unknown function (DUF4341) Eberhardt R re3 Jackhmmer:A8ZP08 Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF04143. There are a number of conserved glycine residues that may be functionally important. 25.00 25.00 25.30 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.99 0.72 -4.27 233 845 2011-05-19 13:03:03 2011-05-19 14:03:03 1 3 766 0 330 765 339 64.00 36 39.15 CHANGED sPhhuLhGGhLIGluAslLllhsGRIAGIS.......GIluu.....l........lss...................pssts.W.RlsFlsGLlhushlht .........h.uhhGGhLIGluuslLllhs......GRlAGIS.............GIluu.l.......lss.............................tts.hs..W..c....lhFllGllhushlh............... 0 101 209 276 +14084 PF14242 DUF4342 Domain of unknown function (DUF4342) Eberhardt R re3 Jackhmmer:B0C276 Family This family of proteins is found in bacteria. Proteins in this family are typically between 97 and 206 amino acids in length. There is a single completely conserved residue P that may be functionally important. 25.00 25.00 25.80 25.30 24.60 23.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.78 0.72 -4.21 24 177 2011-05-19 13:27:51 2011-05-19 14:27:51 1 2 141 0 62 144 7 82.10 31 52.91 CHANGED Epppc.shsE..chpsoss-llpplKcllcKGNVpRIhlK+--.+sll-IPVsuGlhhGsI.u.sllhPhlshlGs...luAlsschTlEIp+ .....................pph.-phphssscllctlK-llcKGNVsRIhl+K--.+sll-I..Plsssl.hGs....l....u.slhhPh.lhhl.us...huAllschplcl.+............. 0 36 55 60 +14085 PF14243 DUF4343 Domain of unknown function (DUF4343) Eberhardt R re3 Jackhmmer:B0C6A8 Family This domain family is found in bacteria, eukaryotes and viruses, and is typically between 127 and 142 amino acids in length. 28.50 28.50 28.90 28.50 28.40 28.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.71 0.71 -4.30 33 270 2011-05-19 14:59:22 2011-05-19 15:59:22 1 1 245 0 46 151 7 125.30 54 49.75 CHANGED ppshFlKPsstsK.............tFsGpVhptsp.............-ls..........ths......s.......hspc.....p.lhlS-shp.htsEaRhallsucllssuh..Y.................+ss....hchcs.....-...scshppshphh.t.....hshspuaVlDluhsp.stp..htllEhNsh.uuGhY.uscstphlp ...........................................N.WGKFIKPKAGSK...............VFTGR...VVNsT+.............DLI..........GI.......G.......LPFD.....YPIWISEVVE.FIA......EWRCFVLDGRVLDVRP..Y.................................TGD....YHAQF........D.ASV.IDEA..ISCW.K........DAPIAYGLDIGVTR..DGR.....TLVVEVNDGYALGNY..GLSPLK...ohs........ 0 20 36 43 +14086 PF14244 UBN2_3 gag-polypeptide of LTR copia-type Coggill P pcc Jackhmmer:Q9SKR1 Family This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.86 0.71 -4.66 45 858 2012-10-02 13:37:57 2011-05-19 16:31:48 1 147 45 0 183 1298 0 123.20 19 15.91 CHANGED uDsssh.Iss.h+L..sGsNYttWSpslphhlsuKsKhGalsGsl..stP.tpsD......acpWpppNuhlhuWlhNohssplhpshlhhssApplWcslt..ppappss...ssplhp....l+p..pltphp.Qs.stslppYasc.hhp.LW...-hpphp......hpsstsp ..........................................................................hL..st.p.N.Y..Wptth.hhl..tu.p.s.....h.h.....s.a...l..........s.......G....p..h..................P..................p........s....p................s..tht..pWptp.......sshl......h.uhl....h.sohs.p.l..h..pp...h....h....h..h.p.....o.up..ch...Wptlp..ph.a..sptp....tp.hhp.........lpt.....tl.thp..pt.t.pl.tah.th.....h..............................tt....................................................................... 0 20 109 161 +14087 PF14245 Pilin_PilA Type IV pilin PilA Eberhardt R re3 Jackhmmer:Q59589, Jackhmmer:B0C6E0 Family This family consists of proteins which form type IV pili. In M. xanthus these pili are required for social motility [1,2]. 24.50 24.50 24.60 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.36 0.72 -3.52 48 287 2011-05-20 08:28:23 2011-05-20 09:28:23 1 8 84 0 98 303 30 100.80 20 62.61 CHANGED pssKA+QSEA+shluulN+AQQuYhhEp..spFs........ss..lssLslsh........t..poppY........sYshsssss....................................sstt.......sssstssslpsYsuulhhsss...........ssssuslCcsssss .............................t+u+poEA+ss.L.pulhpAQpuaahEp..spau.....................ss....hscluhss.................p....psspY..........sYplssuss.........................................h.......h....t..................................................s............................................................................ 0 40 72 96 +14088 PF14246 TetR_C_7 AefR-like transcriptional repressor, C-terminal region Eberhardt R re3 Jackhmmer:B0C7Z3 Family This family comprises the C-terminal domain of transcriptional regulators of the TetR family. It includes the AefR transcriptional regulator from P. syringae [1]. It is found in association with Pfam:PF00440. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.53 0.72 -3.75 164 1183 2012-10-03 00:15:22 2011-05-20 10:40:53 1 4 683 4 387 1012 42 54.30 26 25.21 CHANGED GpLp.lsDsphAApQFhuLhcu.th.ah..plhs....sp....ss....tt-h....ppllcsAVchFLutY ............GtLp.hsDsthAAppahuLlpu.ph.hh..tlhu......hs.....ts.s..spch...cphscpAVchFLttY................. 0 84 213 288 +14089 PF14247 DUF4344 Domain of unknown function (DUF4344) Eberhardt R re3 Jackhmmer:B0C924 Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 247 and 291 amino acids in length. There is a conserved EED sequence motif. 25.00 25.00 29.50 28.10 23.60 23.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.59 0.70 -4.69 33 149 2011-05-20 10:11:40 2011-05-20 11:11:40 1 5 108 0 52 149 25 172.00 24 70.55 CHANGED Y..sspssptpphpchlppsph......hEshsshhssh.hhlPpslslth.ts...u-ssshYDP-spsl..plsYchlspshp...hFtp........ts..pssptht..cs....A.ls.s.hh.aolhHEhGHAhIshhplPllG+EEDAsDplAullLlph..s-..s..Gs.......hsluu..AshFthpucc..cs.pht-hs..........ahD-HSlDhQRaYshlCllYGSDPcpas.sLlc..cut.Ls..p-RA-hCttEYpplspsWppLLp .............................................h.ttth.......p....ht..pt..h.hstsl.l.h..s...up.sshassptttl...hsYphh....t...hh.t..............t......tt.........s.hs.shhhhhhHEhGHhhlt..plPhhG..pEEDssDphAsh..hhlp.......p..ptt..........hhhs.s.st..ah.htt.tt..t..th..p..s.........hhstHuhchpRhashhChhYGussptht.tlhp......pht.h...pcRtthC..patthtpsW.phl.t....................... 0 8 18 36 +14090 PF14248 DUF4345 Domain of unknown function (DUF4345) Eberhardt R re3 Jackhmmer:B0CDG7 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 125 and 141 amino acids in length. There is a single completely conserved residue E that may be functionally important. 25.00 25.00 25.00 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.37 0.71 -4.67 33 186 2011-05-20 10:55:15 2011-05-20 11:55:15 1 1 172 0 55 174 304 121.30 22 89.27 CHANGED cthhphhLslsulsslshGlshslsssshhsstsssssss.........hcuphRahuGhalulGlhhlhuhhphphtphshhhlshhhhsuGlGRllShhhsG.hPssshlsuhlhELllsslhhhhhtth ........h....phhlhlhuhhhlshGlhhhhsstthh.s.sh.s.ssthss.........shssphR.h.h.u.Glh..hGlGlhhh..h..ssh..p..h.phhshslhhlhhhhhssulGRLlol.h.h..cG.sP.tshhhshhshE...llhs..slhhhh....h........................... 0 16 36 46 +14091 PF14249 Tocopherol_cycl Tocopherol cyclase Eberhardt R re3 Jackhmmer:B0C447 Family This family contains tocopherol cyclases. These enzymes are involved in the synthesis of tocopherols and tocotrienols (vitamin E) [1]. 25.00 25.00 25.40 25.20 24.40 24.90 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.49 0.70 -5.73 22 180 2011-05-20 12:58:36 2011-05-20 13:58:36 1 3 160 0 72 189 73 263.30 27 72.89 CHANGED sRRFFEGWYaRVTLP.EhtpoFAFMYSIEDP.hG............GpsaSGGuAQILGssDpYlCRhFP-VcpFWAStt....tpLuluH.ttps.ph.sphLsPptFpcplppGYQsTsshpQGhItDsu......osphsRWpYphpPlYGWGs.sp.QpSTAGWLSahPIFEPGWQILMAHGLATGWIEWsGcpY-FpsAPAYSEKNWGGu.FPpKWFWlpCNsFpsp.sDLALTAuGGhRpVLhh...hEsVAlIGlHY.pG+..FYEFlPWs......uploWplsPWGpWphpAcNpp.apVclpusTp.ps......GTsLRAPT.pp...GLtahCRDThpGclpLpLhp..........ppsclIlcupSshuGLElGG ......................................................................................................................................................................................................................................................................................................h...................................uhhGhhu.hl.PhhEstWplh.hhtGh.us.Ghl.....phs.Gcpa-F..s..su...u..YsEKNWGtu.FPpcWhWl.Q.sNsFp....st......ssl....ulssuGuh.htl.hhh.......hcs.su..ll.u.l.ah..pGp..hYcF.sshs..................utl.....ph.p.l...p...s.h.u..p.Wplpu..p......s....pp....a.tlplpu.ps.p..p.......G.p...L...p..A..P..s..tp....sht...tsc-ohtGplplplhc.................psphlhpspo..phuulEhGG................................................................................................................... 0 30 57 67 +14092 PF14250 AbrB-like AbrB-like transcriptional regulator Eberhardt R re3 Jackhmmer:A8ZQN0 Family This family of DNA-binding proteins is likely to act as a transcriptional regulator [1]. This family does not include E.coli AbrB, Swiss:P75747, which belongs to Pfam:PF05145. 25.00 25.00 25.30 27.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.40 0.72 -4.07 31 183 2011-05-20 13:30:59 2011-05-20 14:30:59 1 1 74 0 80 187 216 70.60 55 54.51 CHANGED sFhsALL-Ac....GlsLssps....supsptGRpsoYRloVQsNGNLLIGuAYT+pMsLpPGDEFEIpLG+..KH..I+Lh .sFYcALL-A+....GlsLsssu......tupuptGRpsoY+soVpuNGNLLIGpAYTcphsLcPGDEFEIcLG+..Kp..I+Lh....... 0 12 53 71 +14093 PF14251 DUF4346 Domain of unknown function (DUF4346) Eberhardt R re3 Jackhmmer:B0BZ08 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 127 and 502 amino acids in length. There are two conserved sequence motifs: LDP and DHA. Many members of this family have been annotated as dihydropteroate synthases, however no experimental evidence can be found for this and Swiss:Q57571 has been shown not to possess dihydropteroate synthase activity [1]. 25.00 25.00 25.10 25.20 24.20 24.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.45 0.71 -4.37 36 151 2011-05-20 14:28:43 2011-05-20 15:28:43 1 4 138 0 83 154 139 92.90 40 30.70 CHANGED pslDcpLSpRaIsLDPuGYFlIhlD+csuhIsAcHasNsIs-+GLAsDPETGEslsC+G.psp.R.sssslapGRTAKELulplhEp.ppssPlopLDHAhYLGREhp+AEhsLlsGpcYlQD ...........................................t........................................................................h.h.........p.pspthhpG+oAKclh.plhEp..t.s.....lopLDHAsYLGRELt+AElALhpG.p.pY..lQD....... 0 20 55 73 +14094 PF14252 DUF4347 Domain of unknown function (DUF4347) Eberhardt R re3 Jackhmmer:B0CAL0 Family This domain family is found in bacteria and eukaryotes, and is approximately 160 amino acids in length. There are two completely conserved residues (C and G) that may be functionally important. 24.60 24.60 24.70 24.80 24.40 24.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.91 0.71 -4.75 113 300 2011-05-20 14:50:08 2011-05-20 15:50:08 1 150 158 0 134 354 204 155.90 31 6.72 CHANGED pllhlDusVs-hppLlsulh..........s........ss........c.....lllLDsspDG....lpQIsphLpsp.....s.slsulHllSH.G.ssGsLpLGss.pLstssLsp..hssp...Ltphup..sL.stsu-lLLYGCsVAu..G.....stGppFlppLuplT.GAcVAASsshTGssthG.Gc.WsLEhp.sGs.lpssh......shsttshssYsulL ....................................................llhlDusVpchpsLlsulh............................s........ss........p.....lllL-sspDG....lpQIsphLpsp...........s.slsulHllSH.G..ssGplpL.Gs.........s...hLsts.sLts..hssp......Ltp..h..sp..........sL...stsu-.....lLLYGCslAs...u...................ttGppFlppLup...lT.GAsVAASss.hTGssthG.Gs.WpLEhp.hGp..lpst........hh.....tta.................................................. 0 34 87 114 +14095 PF14253 AbiH Bacteriophage abortive infection AbiH Eberhardt R re3 Jackhmmer:B0CCK0 Family This family of proteins confers resistance to bacteriophage [1]. 24.90 24.90 24.90 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.12 0.70 -4.79 44 409 2011-05-23 08:00:20 2011-05-23 09:00:20 1 3 336 0 51 289 12 245.00 19 82.16 CHANGED LaIIGNGFDls.HGLpTpYpcFt.pYl.....................................ppp................................................cpp...lh...ct...lt......................chh...p..ttp.hWs-hEpsLu.cls.....hc.........................pp...............h.tth..p....chh.-..shpc........hhphp..p....pt.h.p.hh..phphpp...........hhp.chh.p......hphhp.....p................hhtpph...hp..ph...h..pppshF.loFNYTsTLEplYsls..pl.alHGt.pt................pplhhsHG.......st...pt...s.hh.........s.hs-c...................pchpptp..........h.h........tps...hh.phhh+shc....spthh.p..pppthhptl.s..sl.........pplhlhGaSluclDhsYhpcIhppl....sssph.hhhaas ................................................................................hIlGNGFDlt..a......GL....p........TpYp.-Fh..pah.............................................................................................................................................................tp.......................................................................pt.hh...p....l.......................................p.h.........t.p..tWsDhE.plu.phs....pp.....................................tp..............................................................h...h..p..p.hh..s..p.hpc................hhtht...p.....p...h..p.......t.ph.p.........................................hhp.phh.p............h.hhp.....p.............................................................h.htth......p..p...........pp.s.h.lsFNYT..p.s.......lp.p.h.ht............th...pt...................t..hhhHu.....p....p....s.hh...........s.hscp.............................................p...p........................pp.....p.hhpthh...........h.p.....p....t.l..t..s...........................p.lh.lhGhSl...ut...s...Dh.happlhpp.......ss...hh....................................................... 0 18 37 44 +14096 PF14254 DUF4348 Domain of unknown function (DUF4348) Coggill P pcc Jackhmmer:Q64RZ7 Family \N 21.10 21.10 21.10 21.10 20.90 21.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.78 0.70 -5.29 14 97 2011-05-23 09:07:11 2011-05-23 10:07:11 1 1 93 2 12 80 0 259.60 43 95.51 CHANGED lhl.ulhh.LhlhsuCssp.psctcP.h.us..optlDSspscssDo.s...phlsEpPh.PtsADEhFDDFhFNFAuscKLQ+pRlpFPLPhY.pGcp.spcIcKcpWKhD.hFp+QsYYTLlFDpccpMchsKDTslsSVhVEhIaLcp+pVKpYaF-RhcGpWhLpuIshpsh..ccstNtsFlpFYp+FusDShFQtppl+pPLtFsssDPD.D-FshlpsTlsspQW.uFpPt.L.PpsplhNIhYGQK.s-.ospKIlsl+GluNGhps.LhF+++tspWcLhK .......................................hshhl.lhlhsuCusp..psshDP.h.so..op.tl.DSh.t.pps-o.p....shlsEpPh.PtpADE.FDDFhaNFAusctLQ+pRlpFPLPhY....succ.....pspIcccpWKhD.hFp+QsYYTLlFDp-cpM-hstDTsL..sSV.VEhIalKp+hVKpYaF-RlcGtWhLpuIshcsh..cpst.NtsFlcFap+FssDShFQtpplppPLt..Fls.sDPD.D-FuhlpTTlshsQW.uFpPt.L.Pt-tl.NI...YGQ+....s-.SspKIlsl+GIuNGhps.LhF+.++sGpWcLhK................................................ 0 5 10 12 +14097 PF14255 Cys_rich_CPXG Cysteine-rich CPXCG Eberhardt R re3 Jackhmmer:B0C8L0 Family This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There are 5 conserved cysteines which occur in a CPXCG motif and a DCXXCCXP motif. 25.00 25.00 25.10 25.50 24.70 24.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.99 0.72 -4.14 50 377 2011-05-23 09:38:52 2011-05-23 10:38:52 1 1 360 0 122 303 213 51.30 39 80.52 CHANGED lsCPaCGcphclhlDsSsG.sQpYhEDCpVCCpPIphpl.pl.D.-s.phplplts- .......hpCPaCGctlpl.hlDsSsG.sQpYhEDC.lCC+PIplsl.pl.D...ct..c.plplh.-................ 0 34 63 97 +14098 PF14256 YwiC YwiC-like protein Eberhardt R re3 Jackhmmer:B0C8C3 Family The YwiC-like protein family includes the B. subtilis YwiC protein Swiss:P46909, which is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 26.00 25.90 24.30 23.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.97 0.71 -4.06 64 369 2011-05-23 10:22:13 2011-05-23 11:22:13 1 1 357 0 79 307 2 127.30 34 50.68 CHANGED lPsQHGAWuMlllPhlhGh.huu................shhp.lh............L..hluWhh..hYlhpaPh..hhhlKp.+p........ptp....ahph........h....hlY......u.....sluhhhslhslh.hps.pllhash.sh.lPLhhlshaastp+cERuLlN-lsullshulhuhs..ua.hh .....lPpQHGAWsMlllPFlhGhhlus.....................P..shhH..l.............L..hluWhh..hYLssYPh..hhhlKp.+p..........pcc....ahph........sllY......h.....sluhlhulhsLl.hp..pllhash..shlPLhhVshYas+pKpERuLlNDluullshslhGhsuhh............ 0 19 48 66 +14099 PF14257 DUF4349 Domain of unknown function (DUF4349) Eberhardt R re3 Jackhmmer:B0C5Q6 Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 282 and 353 amino acids in length. There is a single completely conserved residue D that may be functionally important. 24.40 24.40 25.50 25.10 24.10 24.30 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.40 0.70 -5.16 99 422 2011-05-23 10:50:48 2011-05-23 11:50:48 1 4 385 0 188 435 178 256.80 18 81.31 CHANGED l..su.Cuusss............................s.............ss.......s.............tsssus........s..ss...sts.sutts.................................sss...s..ss....sss........ss....ss.......s........s...................p.p..lIp..su..sls..lps..ps...hspshsplpshspptu.Ga.lssps......p..t......tts.ssptpuslslRVPssph-shl....spl.p......s.l.....G.pl..pspshsucDVTpphlDlcuRlcshcspppRLhpLh.p.+As...slpDllplEppLuplps-lEulpuphchLpspluhSTlslslpp......t.ss....sp...s.....shh..sthtsuhpsuh....sshh....s....hhthlh....hhl....s.sllshhshl.h..lhs.h.ls.hhhh+ ...........................................................................................................................................................................................t................t.......t.t..t...s................................................sss.s..ts.......sss.......ss....ss...........s........p....................................pp...llp..su..slp..lps..ps...lspshsplpshspph..s.Gh.ltsps...............t..p..................tsstptpuplslRlPs.s..phcshlsplp.......ph.....G..pl..p.scshpupDVTp.......phh.Dlpu.RlpshptpppRlhplhp...cAp....slp-.llplcpcLu...plpscl-phpuphp.lppplshuTlslshpp......................s......ttt..........sh....tthh....p.u.ht.tuhpshh....thhthhh....hhh....s.h.hlshhshh.h..hhhhhh.hhh............................. 0 92 150 173 +14100 PF14258 DUF4350 Domain of unknown function (DUF4350) Eberhardt R re3 Jackhmmer:B0C323 Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 70 amino acids in length. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.32 0.72 -3.77 163 383 2011-05-23 12:43:26 2011-05-23 13:43:26 1 4 379 0 148 374 7 71.10 21 18.23 CHANGED sssssGs.tuhtp....hLpp....p...Ghpl......pthpp..s.hs...th..s..s................s....ss.....ol....lllsss..ht..hst...tphp.pl...hpalp..pG.s..plllss ..........sspspGs.tAhtp....lLps....p....GhpV......phhps...s..hp...ph..s..s.................p..ss.....oL....lllsss......t..hsp......tphp.tL...hphsc..tG.scllls..................... 0 44 100 136 +14101 PF14259 RRM_6 RNA recognition motif (a.k.a. RRM, RBD, or RNP domain) Coggill P pcc JCSG:Target_421663_WS20613B Domain \N 27.00 18.00 27.00 18.00 26.90 17.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.07 0.72 -3.99 76 8811 2012-10-02 20:46:34 2011-05-23 13:46:14 1 359 393 72 5363 52886 951 69.90 20 16.00 CHANGED lhlpsl.s....s.sst.pclhphhst....h.s.p.....l....p.slphh....t..p..............pst..Ahlph..s...stp.sAppshpp...h...p...hh..l..ps+hlc ..........................................lhlpsL..P..h......s..s..o....p....p....-.....l........t.....p....h....F.p.p..........................h..u..........................l..............p..sl...p..lh.....................h......s......t...t.....................................pu...h..........A.a.......V..........p....F.........t.................s..tc....p....Ap...p...A.l.pt......h......p....h......h...tsp.l................................................................. 0 1680 2599 3942 +14102 PF14260 zf-C4pol C4-type zinc-finger of DNA polymerase delta Wood V, Coggill P pcc Jackhmmmer:P30316 Domain In fission yeast this zinc-finger domain appears is the region of Pol3 that binds directly to the B-subunit, Cdc1 [1]. Pol delta is a hetero-tetrameric enzyme comprising four evolutionarily well-conserved proteins: the catalytic subunit Pol3 and three smaller subunits Cdc1, Cdc27 and Cdm1 [2]. 24.00 24.00 24.20 24.10 23.20 23.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.34 0.72 -3.97 123 623 2011-05-23 13:01:37 2011-05-23 14:01:37 1 15 346 0 405 635 5 74.40 35 5.48 CHANGED Cl....s.Cc.s.lppt................slCppC.t..spp....sslh.phhschpphEp+...hscLhshCppCp..............ushtpplhC.sSpDCPl.FYpRt .................................................................................Cls.C+s.lpptt......................slCspC..p....spp.....splh.phlsclppLEp+ascLhotCQpCp.................Gohc.p-VhC.sS+DCPl.FYhRh...................... 0 141 227 345 +14103 PF14261 DUF4351 Domain of unknown function (DUF4351) Eberhardt R re3 Jackhmmer:B0C7B5 Family This domain is found in bacteria, and is approximately 60 amino acids in length. 25.00 25.00 25.00 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.76 0.72 -4.10 123 513 2011-05-23 13:20:27 2011-05-23 14:20:27 1 8 115 0 203 604 76 61.60 34 25.98 CHANGED p-Gc....pc....................tt....t.sLlLR.LsRRhGplssp......ppIpsLSlpQLEsLu........EALLDFsslsDLpsWL .............................................pGhpc............up....tpLllR.LpRRhG.plssphp....ppIppL.s.l.ppLEsLu........EuLLDFssl...pDL.tWL............. 0 62 162 200 +14104 PF14262 DUF4353 Domain of unknown function (DUF4353) Eberhardt R re3 Jackhmmer:B0P678 Family This family is found in bacteria and archaea, and is typically between 262 and 279 amino acids in length. 25.00 25.00 25.60 25.00 24.60 24.90 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.05 0.70 -5.18 51 568 2011-05-23 14:50:37 2011-05-23 15:50:37 1 12 299 0 137 543 7 204.70 25 61.88 CHANGED Glslsus....s.V.TIosuGTYhlSGohs..sGQIlVsA.sc...scpVpLlLcGssIosossusIhVps.AcclhlsLu-GTpNoloD....uupas........tsspssAAIaS+sDLTl.sGsG.sLsVsushssGIp..S+DsLhIsu.GThsl....sA.scculpGKDuVpI.....ssGs...lsl.sAssDGl+Sc.....N--D.....sscGhlhIsGGslTIsAG.sDGIpAssslhIsGG.s....lslss.......................uscGlcutt.l...slsGGslslssu...DD..Glpust.........................plplsGGsh..slsuuc...DGlc .............................................................................................................................................................................................................................................h.s...h.h.....t.......s...l....lt.u...t...tp...ul...sp.....s.s......lhlt.s..G.....s..h..pl.....p.u....s.....tcul.....pup....s....tlpI............psGs.....hsl....su.......s....s.......D...ulcus...............ttp.p...........................tt.G.h.lhIsG.Gsl....sl.........s....u...u....s......D.....G.lcA.........s.........ss....l.hIsuG.s..........ls..lps.............................................................u.cGlcu..t.slslsGG..sls.l.us...sD....ulpus.......................................................st............................................................................................................... 0 82 123 133 +14105 PF14263 DUF4354 Domain of unknown function (DUF4354) Coggill P pcc JCSG:Target416839_SP17692A Family Several members of this family are annotated as being ATP/GTP-binding site motif A (P-loop) proteins, but this could not be confirmed. The one PDB:3NRF structure solved for this family exhibits an immunoglobin-like beta-sandwich fold. Crystal packing suggests that a tetramer is a significant oligomerisation state, and a disulfide bridge is formed between Cys 125 at the C-terminal end of the monomer, and Cys 69. 25.00 25.00 25.80 29.00 24.90 24.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.57 0.71 -4.43 10 100 2012-10-03 03:07:29 2011-05-23 16:13:37 1 1 97 3 13 46 1 121.80 51 96.22 CHANGED MKp..thhluulALsuh....shsAsAsssDslhVaATppopGolSlGc+shYTKsFcVsVsNhucpsIDLsph..Ch+AauscG+cF+lDTVDEcLspGoLKsGpsVKGhAVFAS-scSVYpAshVKlSss ...........hKp..hhhhshluhsuh....sShApsAussslMlhATscopuus..SsGDKsFaoQTFDluVANsuuoDI...sLcKl...CFlAluscGKoFssDTIDpKLToGlLKsG-SVKGFAsFAusDcSlYcsplVKhS-.s....................... 0 3 8 11 +14106 PF14264 Glucos_trans_II Glucosyl transferase GtrII Eberhardt R re3 Jackhmmer:B0P6U7 Family This family includes glucosyl transferase II from the Shigella phage SfII, Swiss:O21944, which mediates seroconversion of S. flexneri when the phage is integrated into the host chromosome [1]. 25.00 25.00 27.00 27.00 24.00 21.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.26 0.70 -5.43 56 371 2011-05-23 15:20:29 2011-05-23 16:20:29 1 1 254 0 42 303 10 302.20 21 64.26 CHANGED hhshlhsshhh.hD-....stsh.h....uhtsW..........hp...GR.hhphlhphh....hpsh....hsh.s.hstl...Luhlhluhuuh.ll...sp.hh...shp..........p...phh.sslhshh.h.hssPhhl.pph.uapasuhthululh.husluhhhh..........p.............pp...p..h.thlh.uslhlh.huhshYQuhhs...lalslhlhhhlhph.............l.csp.....tshpthhh.tlh....p.lhh..hlhuh..slY..hlhhplh..hhhh.psphssh..t..spht...h...tshhps.lhpshpph..hphht......t.sh.hhhh..hlhlhllhhlhh..hhhhhtpph.....p...t.h.thhlh.l.lhhhhhP..hhh.h.....lhlh...hs.s.......shhps.........hhshshs..h.hh..hhh.llh .......................................h.hhhtsh.h.lDD....t+tht..........uhtsa..........sp...uR.h.phlphhh....spth...hsh.shh.s.l.....Luhhhlulssllh...hh.hh.....stp..........p....shh..shlsshl.hhhs.PhFl.psh.uFpasu.hhululh.huhlshhhh..........p..................ps...p..h.hhlh.uhlhlh.hhhshYQushs...lalslllhh..hhhpl.............l.ptp.......htthhh..hhh....t.hhhhlhuh..lhY..hlhhph.........ss..ptsth..ts.tpht....h...........tch.ps.lhpshpph......hp.hht.........psh...hl..hll..h....h.llhllhh....hh.thhp.p......t.....hh..thhhh.l...lh.lhlss..lh.h...h......lhlh.....hsp........hhts.........h.tashsh.sh..hhhh..h................................... 0 14 20 26 +14107 PF14265 DUF4355 Domain of unknown function (DUF4355) Eberhardt R re3 Jackhmmer:B0P709 Family This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 180 and 214 amino acids in length. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.81 0.71 -4.17 85 633 2011-05-24 07:17:12 2011-05-24 08:17:12 1 2 519 0 57 462 6 126.50 21 66.38 CHANGED ppp.pt...cphscp-.....lschls..............cphpchcpc.....pp.....................Eu...........c+...l.uchss.c-.......+.tc.....h-hcph.......pc.......cl...pchctphpppchp.....spspphLs-pul..sss........hhs..hl.........l..ss..DsEp....sppslcsh...pph....hs...pslpptlc.cp....l+us ........................................p......pphsppc......lsphlp...............cphpphcpc..tpct...................hp..Es...........ccl..sc.hst.cc........c.tp.......hchcph.......cp........-l.......pph....cspht....ppchp.....sps.pphls-tsl...sss........lls..hl.........l..ss..ssEp....scpslcsh...pph.......hpphlppthp.tthpt................... 0 17 36 47 +14108 PF14266 DUF4356 Domain of unknown function (DUF4356) Eberhardt R re3 Jackhmmer:B0PD80 Family This family of proteins is found in bacteria. Proteins in this family are approximately 540 amino acids in length. 25.00 25.00 25.30 26.40 24.80 24.30 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.67 0.70 -6.09 10 232 2011-05-24 08:09:00 2011-05-24 09:09:00 1 3 178 0 31 192 2 426.50 34 89.65 CHANGED sps...hsasplAsRlLGsPp....Dcs-YaNpLacLpps.pshphhpppLcKpIsscc.h...QclpcIHs......lspcp...oVs+hlAaLsucpll....s+cssslh+Rpl+pAhhpVl..E.ha...+cp.sLs+s....c..s.....hlVcLls...Wlpsalsphhcshchc...cphPKVVWYG-sscSplYFLaaLhhlGCDVLlFHP..s.pscDsFpcsD.E.p.h.hl.hphs.sossLEPFPsEc..sRpuTV.AY+uo+ElEplLas.DSul.YKPWQF+casPhoVTLKTTYDELFllsKE+AhlRPsFcsscs.oVsIPNlFAKIsGVoc.DppEYWs+l+sLt.s.p-Tphlps.FP.FTcpp..puNap.aapcsls...csGcIcs-cLhpSslWpYpcLs-GlQcuIAcsIpchCcpPhlK...tE..p.spD.ltlalFsQlsplssslLcLIQsFDYoQslPKlVlYpo-psspLoRsDAssLlFLNclGlDIllYNPsGapsIEpYI-cstFDsHaL-EhlFsLpa+EsSt......ppll+KLF .............................................................p...htashlhsRllGl.p......D.s-YhscLachsp....s.h....t..t....h....hpt.pslsc..sIsscp.h....pc.l..t.l+p..............tt.pt........sss..ch.lupLssppLl....hpss..cLt+h.l.+psFhslL..cla.....ccp....p.ppl.....K..s.....Fhlchlp..h.ppalsphhpshshc..........cphP+IlaYG-..hp.csclYFLhaLhhlGCDVLYhpP........c.sc-sapp....l......Dpctp.h..hh..h...phs...tphslpsFP..ccc......p.RhuTl.AYpAo+EI-plLaptsShlYKPWQFcsass.shTLKTTYDElhllhcEcAhlRPsFhspsp..plhIPslFAKIsGVpp.spc-Yapclcslt.s..pso...h...hl......pshP..ascpt....psshp...pYp.ph..ls....................ptGplc.-hlhpSpha..appLspslQptIhctI.chhcp.hhh.............................p..p.pp-..htlhlhtpl.pls.plLc.lppFDYspplPKlllapspps.plo+pDuhlLhFLNplGhDlhhasPsGhssIE.aIp...tthaD.HhL-chsas.phpt.s...........hhptlF................................. 0 12 24 26 +14109 PF14267 DUF4357 Domain of unknown function (DUF4357) Eberhardt R re3 Jackhmmer:B0PED6 Family This domain family is found in bacteria and archaea, and is approximately 60 amino acids in length. There are two completely conserved residues (G and W) that may be functionally important. 25.00 25.00 26.10 25.20 24.10 24.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.51 0.72 -4.37 54 287 2011-05-24 08:19:50 2011-05-24 09:19:50 1 13 270 0 82 286 23 52.60 34 16.61 CHANGED RppLlssGll.tt.ps...sp.hlFscDhhFs.SPSsAAullhGpssNGhhpWK.stsGpTLc ......................phhtpthl......t..........tt.hhhscDhhFs.SPSsAA.shVlG.p.o.sNGhspWK..stpGpoL......... 0 37 65 72 +14110 PF14268 YoaP YoaP-like Eberhardt R re3 Jackhmmer:B0PGJ1 Family The YoaP-like domain is found at the C-terminus of the B. subtilis YoaP protein Swiss:O34983. It is found in bacteria and archaea, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00583. There is a single completely conserved residue A that may be functionally important. 25.00 25.00 30.30 29.10 22.20 21.30 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.11 0.72 -4.73 37 211 2011-05-24 08:34:33 2011-05-24 09:34:33 1 4 169 0 37 184 5 43.80 38 17.89 CHANGED PlphI+lcohEcApssPssFssaulFYsGcFlTscl..l.scppFcK ........hphIcl-ohcpApssPssaTsaulFYsGcFlTscl..h.spp+hc+.. 0 22 32 34 +14111 PF14269 Arylsulfotran_2 Arylsulfotransferase (ASST) Coggill P pcc JCSG:Target416597_Pfam-B_1234 (release 25.0) Family \N 27.00 27.00 27.00 27.30 26.60 26.80 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.97 0.70 -5.27 23 447 2012-10-05 17:30:43 2011-05-24 10:41:10 1 8 258 0 240 647 295 268.60 28 56.51 CHANGED +sQhLpspsVlsaWsGshht..hGaGaGtlplLssoYppIapVTlsss....aho..-.psh.SalDhHEupl.oscGTllV..ouhNlTptDLpslGG..p-sahhDuhhaEIDIpTNcllFcWSAl-Hlsplslp..........up..tslsssssspcpPa-haHlNSV...........spas-.sYLlShRahsSlahlp.ps.......GsVhWplpG.pG.GDFph..sss..FsaQHDsRlhppo.....c-shslSlaNN.......sNosh.......ststsTTGllhslDhp.s+psohh+plh........sspcsltSsoQGsaQlLss........uHlllsaGshs+.lcEaDssGplVhp...spFG .......................................................................................................................................h.......................................................................................th...h..c...hH-hp.....h...h...ssu.s.h.Lh......ss.h.p.......h...t.........D....l..s......s...h....GG.........s....p....p...G......h.........l.ssl.....h...............p..................El.......s...h...c....G....-lla-.Wpuh-Hl.s....p.c.h..................................................h.ts...h..........................s.....t.hcahHINul................sh.s.p.-......G....p...h.L.l.ShRpssulhhls...t..po.................G.cl........lWch.....t.......G....................................................................t..........s....h...u................Q....H.sschh............................ss..s..sIhlFDN..................us..h.........tssss.s..spu...h.....l..l..c.l.D..........p.........pths..h....h.cph.h..............................sss...h...hSs...GusQ.h.LsN...........................GNshl.......s......ush....ut...lhEhss-G..c..slhch...h............................................................................................................................................ 0 43 114 194 +14112 PF14270 DUF4358 Domain of unknown function (DUF4358) Eberhardt R re3 Jackhmmer:B0PFB1 Family This domain family is found in bacteria, and is approximately 110 amino acids in length. 25.00 25.00 25.30 25.30 21.90 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.17 0.72 -4.16 55 201 2011-05-24 09:45:12 2011-05-24 10:45:12 1 3 148 0 17 159 5 106.10 19 57.92 CHANGED pplppsss.....h.p..phpchssptlcph..a.slssschcshhhhtust..sspssElhll+sp-sp.scsVcsslpp+lpsppps.acsYhs..-..phphlcsuhlpp..cGsYlhhllup ................................t....tsh....h....phpphs...sptlpph..a.uls.sthpshhhhhsh...shpss-lhlh+sp-tcth-slcpslppphcspppp.apsYhs...c..phphl.csutlps..cGsalhhhh............. 0 14 16 16 +14113 PF14271 DUF4359 Domain of unknown function (DUF4359) Bateman A agb Jackhmmer:B1XKA9 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. There are two completely conserved residues (P and S) that may be functionally important. 27.00 27.00 30.00 29.80 26.20 26.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.16 0.72 -3.67 39 182 2011-05-24 11:53:38 2011-05-24 12:53:38 1 2 174 0 36 127 19 107.00 39 71.17 CHANGED usshshTNPupp-YppaAuppLsph..hpp-lCt...pphshhLt........hhpsCspLlsst..pstltpllsptTpRpNahlFSlYpT-lshps.......hstaphp..TlGlhspFhshpsp .h.lhLA.oNPo+s-Yp-aAucphhpp..ls+clscs..csppu.hLss.......lsussc+Lscph...scPplshlI-phT+RssYlhFSsYpTEaclss...............pY+al..slGhuphFlsl-h.s... 0 8 23 32 +14114 PF14272 Gly_rich_SFCGS Glycine-rich SFCGS Eberhardt R re3 Jackhmmer:B0P5U8 Family This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. There are a number of highly conserved motifs including an SFCGSGGAGA motif. 27.00 27.00 144.60 144.40 26.40 25.20 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.43 0.71 -4.21 9 314 2011-05-24 12:25:02 2011-05-24 13:25:02 1 1 314 0 37 103 2 114.80 81 95.98 CHANGED lpVVIGDRLGKGppVAKGVEpAGGpAlVIPGMGADMKLGDVMppEsADlGISFCGSGGAGAlTApTKYGY.s+aGMRSV-EGVTAIp-GppVLGFGFMDpEELG+RlsEAahKKa ...ITVVIGDRLGKGQKVAtGVEpAGGRAVVVPGVAADMKLGDVM+uEsAsFGISFCGSGGAGAITAQsKaGYKAKYGMRSl-EGVTAINEGssVLGFGFMDKEELGcRLVpAapKKa.. 0 4 9 23 +14115 PF14273 DUF4360 Domain of unknown function (DUF4360) Bateman A agb Jackhmmer:B1XRI1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 200 and 228 amino acids in length. There is a conserved GCP sequence motif near the N-terminus. 27.00 27.00 33.10 32.00 26.40 25.10 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.37 0.71 -4.63 14 198 2011-05-24 12:27:51 2011-05-24 13:27:51 1 4 87 0 146 193 13 174.30 29 79.62 CHANGED sspplpIhssshuGoGCPpGosus..slSsDpT....hhohuFDpF.s.lGs...Ghs.ss-ppKNCpLclsLp.aPuGFQaull-usY+GaApL-tGlTGohhooYaFSpssspsss...........oppohpGs...hpG.sYshp-plsssuhlaSsCG...ssu.Ls.INsplsL...Tu.ssusssGphopDsssls...hsQplpltW+.......sCs ...........s.stplpItssshsGoGCPpGosss..slosDpo.....shTlta...s.pahAp.hGs...sss.ss-pRKNCQLslslp.hPsGa.pau..lhs.s-Y+GaA..pL..ptGsoG.s.pusYYFsG.tsppss...........sppshs..GP....hss.saphpDpssh....s.s.....hhWSPCG......sps.LN..lNsplpl.....su...ss..tp.s...tuh.hs.Dshsss...hpphhphtWppC................................................................................................................ 0 54 101 127 +14116 PF14274 DUF4361 Domain of unknown function (DUF4361) Coggill P pcc JCSG:Target_416718_SP15308B Family \N 25.00 25.00 27.20 26.40 24.90 23.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.08 0.71 -4.58 20 68 2011-05-24 13:04:20 2011-05-24 14:04:20 1 1 29 8 8 59 0 161.40 39 48.06 CHANGED YpKALL+lhPFNDYSGsYSuTsh..pla..h.sGs......spshstss+puaVVD-pTIFFYAGhlsE-h..pDRcpYKlhhpF.........s..pc.........t......slplpss.ssssslpFcl..........hs..psoYplspphDss+PYLc++alolp.lsYcasDhTos....Puh.lpY+VcGohoMpRpI.NTpIPD ..YpKALLRlhPFNDYSGsYouosh..pla..hpss......ssshspss+pu.aVVD-pTlFFYAGhhsE-h..pD.RcpYKIhhpF.........s.s-.................s......slslpss.sssschpFcl...........hs..sPoYplspphDsspPYLcH+alsIp.hsYpasDhTos.....sh.lpYcVcGolohpRpl.NTpIPD.......................................... 0 7 8 8 +14117 PF14275 DUF4362 Domain of unknown function (DUF4362) Eberhardt R re3 Jackhmmer:B0PHJ6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 93 and 146 amino acids in length. There is a conserved IRIV sequence motif. 25.00 25.00 25.40 27.70 22.90 24.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.32 0.72 -3.95 8 109 2011-05-24 13:06:54 2011-05-24 14:06:54 1 2 92 0 9 83 0 89.60 44 60.27 CHANGED KKNDVVVK.GstISNLDKFEpFVlNV-QGcVDKIRIVpYTcEGDPIFQTLEaSGpDIlYV.DNRpschhAGcpKGLaKDSCKSIVK.EQREsposYRLI ....................psD.ll.p.tst.l.Nlc+h-pFlhNl-pscs...DcIRIVpYTpEGDPIFQsL....Easu....p....cIhYshDsRcD.p.FsG.cpKsl..h..KDSCKpIVK.cp+EstssY+Lh............................... 0 5 8 8 +14118 PF14276 DUF4363 Domain of unknown function (DUF4363) Eberhardt R re3 Jackhmmer:B0PHJ0 Family This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 26.40 26.40 26.50 27.50 25.30 26.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.21 0.71 -4.41 34 185 2011-05-24 13:21:49 2011-05-24 14:21:49 1 1 150 0 49 144 2 115.50 22 93.11 CHANGED hlslhlhll.llhhshastphltpsspclppplsplpptlcp.pcapcAtpphpch.ppWpcppphhslhlsHpElDsIshpls+LppalppcspspuhuplptlKhhlp+lhctEphslpNI .............................h...hhlhhl.llhhshh..phltp...t..s...cplp.pp...lsplppplpp.ccWppAtpphpclpppWpchppthshhlcHp-lDp.lshslt+lppalpscscstuLuplphl+hhlppl.p.p..plpNI............... 0 26 44 47 +14119 PF14277 DUF4364 Domain of unknown function (DUF4364) Eberhardt R re3 Jackhmmer:B0PBH9 Family This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 180 amino acids in length. 25.00 25.00 25.00 26.20 24.80 24.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.74 0.71 -4.63 61 224 2012-10-04 14:01:12 2011-05-24 14:37:29 1 1 223 0 42 169 12 161.20 32 91.48 CHANGED hKLllLYhLc+lchPLoNsQlochlL-psassYFpLQpslsELhcushlphc..pcspshYplTcpGccsLphFpscIspslcccIcpalp.p.p.hph+c.-ss.lpuDYh..p.sssspYh.VchplhEs..sssLl-LpLsVPsccpActlCspWcpp..sp-lYshllshLh ..............KLllLYlLc+lchPloNsQlochlL-.ppahNYFpLQQhLsELhcushlphp..tpspp..........hYplTccGccsLphFts+Is..tshhcclcpalppp.t..pl+p.Esplhu-Yh..t.pssspah.VphplhEs..spsLl-LplsVsoccpActICspW+pp..up-lYshllptLh......................... 0 27 38 40 +14120 PF14278 TetR_C_8 Transcriptional regulator C-terminal region Coggill P pcc JCSG:Target403231_MJ9673J Pfam-B_17743 (release 25.0) Domain This domain is a tetracycline repressor, domain 2, or C-terminus. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.33 0.72 -3.60 44 1050 2011-05-24 13:50:59 2011-05-24 14:50:59 1 4 678 0 160 662 5 77.60 21 39.73 CHANGED thh.hpplhphltcppchhphlhstp.tsssa..hpplpphhpphhhphhppthhptst........hh.hpahsuGhlullppWLp ............hhpplhpalt-Npcah.+sllpsp..tsspa....pp+lpchh..p.pp.h.h.p.h..h..s.htp.ptsh................hh.hsahsuuhlulIphWl.............. 0 55 104 138 +14121 PF14279 HNH_5 HNH endonuclease Bateman A agb Jackhmmer:B1XMH2 Domain This domain is related to other HNH domain families such as Pfam:PF01844. Suggesting that these proteins have a nucleic acid cleaving function. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.55 0.72 -3.97 15 136 2012-10-05 18:28:12 2011-05-24 16:05:27 1 1 131 0 21 135 168 68.00 50 23.01 CHANGED CIlC+c-hscpshs..-EHVIPsoIGG.....ph+hps.lCcpCNscLGpslDsplscphh......phhpthhcIcpc+Gpss ............CIICR.....KDTKE.LS.......EQ.aVIPEILCG.....aYF.T..NS...I...CD..oCpEpho....TNIDRPLIRHKLu...........hKIEpMKtp..hp.s.................... 0 5 11 15 +14122 PF14280 DUF4365 Domain of unknown function (DUF4365) Bateman A agb Jackhmmer:B1XQ96 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 182 and 530 amino acids in length. There is a single completely conserved residue D that may be functionally important. 21.90 21.90 21.90 22.60 21.80 21.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.46 68 263 2011-05-24 15:26:30 2011-05-24 16:26:30 1 15 243 0 94 248 14 142.10 19 38.93 CHANGED pptuhshlpth........hsph.salhc...psptDhGlDu........lEl....hs...ssp....soG..thls...VQlKuspshh.................psstshphthp........ppchsYWhp.tslPVllV..lhssssp........pha......Whplppthh.........sppptplplspss.hhsspshsplhphsttt ...........................p.u.shhpth........hspt..shhhp....ptptDh.G..lDsh.....lch.............hs......sup.............ssu.....hhls...VQlKsss..shh.........................ssssshsh..hp........spclsahhp..pslPl..lLV...lhssssp................psY...............Whplpspshp....................sppphplplshpp..hsstshpplhp.h..sh....................................... 0 24 59 79 +14123 PF14281 PDDEXK_4 PD-(D/E)XK nuclease superfamily Bateman A agb Jackhmmer:B1XMR1 Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily. 21.80 21.80 21.80 21.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.33 0.71 -4.42 92 355 2012-10-11 20:44:47 2011-05-24 16:36:41 1 7 324 0 99 367 61 175.10 18 46.51 CHANGED hNlFplhph..........EhtpSshluaLLsPp...ts.........HshsshFlctal.chhttptt..p...................................hpshpVp+E...............ttp..+lD..lhl......pssph....hllIENKlh.....up-pp....................s....QLpcYhp.....hlppch..................................tttphhhlaL............psssppshss..............................t.tahtlsa.......pplhpaLcphhpp.......tttpthpthlppYhp ............................................................................shaphht.............E..pothlshhls.p......tt...........Ht....t..t..a..lphhl.chhttp....t.......................................hpphplppE...............................tttp...+lDlll..........pssph...................hllIENKlh.......up-pp........................s..........QLpcYhp.........hlppph..................................t.pphhh.laL..p..t..............ppssp.shtp...........................................tahhhsa.......tplhph.lpphhp.....................h..................................................................................................... 2 34 64 86 +14124 PF14282 FlxA FlxA-like protein Eberhardt R re3 Jackhmmer:B0PBA7 Family This family includes FlxA from E. coli, Swiss:P77609. The expression of FlxA is regulated by the FliA sigma factor, a transcription factor specific for class 3 flagellar operons. However FlxA is not required for flagellar function or formation [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -11.15 0.72 -4.09 18 424 2011-05-25 07:16:36 2011-05-25 08:16:36 1 2 376 0 79 227 8 98.60 31 75.65 CHANGED SulS..uossSoo.tsusss....supIppLppQIpsLpcpLpcLsss...psh.os-cK..p...pQpphIQsQIptLQAQIuQlQpQpupcsppp....pppsh.p.....spssss-GsNp.Pos.ssp ....................ssh......potsSsp..tStsss........supIsclspQIppLoppl.p.clss........suh.os-pK...p...cQtpLlQpQIphLpsQl.....u...QL.QpQp.A-Ktpcp........ppt..........................h....................... 0 12 40 57 +14125 PF14283 DUF4366 Domain of unknown function (DUF4366) Eberhardt R re3 Jackhmmer:B0P8U4 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 227 and 387 amino acids in length. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.74 0.70 -4.92 37 374 2011-05-25 08:16:00 2011-05-25 09:16:00 1 13 188 0 39 339 46 197.90 28 65.93 CHANGED Rhh.u.A.LsAu.l.lls.GFossAaAtGs-ssst........hssss.t......s....c.......pcstslTP-GNhsLVDDh....sst..ssKQFITlsTKuGNhFYllIDRs.sc.s.c......s.VHFLNtVDEADLhALhE-tpst..s.............................thtps.........pspst.p.........p................................cP.Ec.................c.s..shus.....lhllLllu.....l.hGG.GAh.aYF.Khh+sKpppp.s...ssDh.--.hD.hs-..........-p.pp-Dtss.s-- ......................................................................................................................t....................................................t.s.h.o.spGs..h..t..hhDch.....................t....ss+QFIThpTKsGphFYllID+spp.sc......s.VahLspVsEsDLhsh..hEctptt.t................................t...phpcsst......t.psc..p.tp........tp......................c...Ec............pu..shGs...hl.ll.llls.h.suu..GAh.YY.FKlhKs.Kpc.pp.t.....cpD.h.-p..h-...s-.....p......-p.-pp-t.t.t..pp............................................ 0 25 36 39 +14126 PF14284 PcfJ PcfJ-like protein Eberhardt R re3 Jackhmmer:B0P892 Family The PcfJ-like protein family includes the E. faecalis PcfJ protein Swiss:Q5G3N2, which is functionally uncharacterised. It is found in bacteria and viruses, and is typically between 159 and 170 amino acids in length. There is a conserved HCV sequence motif. 25.00 25.00 25.00 25.00 24.70 24.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.01 0.71 -4.76 32 550 2011-05-25 10:31:24 2011-05-25 11:31:24 1 2 378 0 44 395 28 152.40 26 36.54 CHANGED hWpDYlchhcchtp.Dlp..ssthlpPpsL+ttHDchhtchpthpcccctpchpc+hhc.pthh.c.lKt.+a.....pFoDsplhlpVhcSlp-hhpEGpsLHHCVs...s.ptYh...t+tcohIh.hRhcsc.hE...TlEl..p.csh..VlQsRGhpNp.....sschp-cIhphlpp.tphIppRh ...................................................................................a.Dhlphh..h...t...hp..p.....hhP...s..h..tHD.h.t..php..p...........p.p.p.tp...........p.....ch..p....................hp....h.................hp.s.th.....hhs.cohpEhhpEGpthp.HCVu............u..p.Yh....tptco.hIhShR.h..p.s..p..h..c........TlEl.........s....ps......h....plsQh+GhpNc.....p..p.chtccllsllpp..t.h..................................... 0 18 35 42 +14127 PF14285 DUF4367 Domain of unknown function (DUF4367) Eberhardt R re3 Jackhmmer:B0PHJ7 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 229 and 435 amino acids in length. 25.00 25.00 25.10 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.12 0.71 -4.93 52 440 2011-05-25 12:10:07 2011-05-25 13:10:07 1 11 247 0 86 388 10 174.10 15 56.36 CHANGED hhp.+htshhhsshlh...hhss.........sh..ssp..Ahpttlh..p.hl.hphhs...phsphphpspptp............................c.....phthp..hp.s.......YlP...cGaphp..........p....hp...hss...t....hp.....hh..haps...sss.phhpa.pppth..p...s....tsh.sh...ssE....ss....ph..cplpl..s.G.......hp.uhh.hc..p.ps.........pph.......lhWpppshha...pl..t.......us.ls.......p-EllKlscsl .......................................................................................................................................h..........hh.h...................h...........t.....hh..t...h......t...............h.hp.stt................................................................................tpht..t.htht..hhtss.hlP...c.G.hphp.....................................s..............hps...s.........hp...........hh..haps....p.pt..thhhh.t.tth...s....p..........tss..sh.ssp.....ts.......ph..cphpl...s.G........pc.uhl...hp....pps.............pts............lha..pppshhh...pl..t........usls...........c--hlclscol.................... 0 50 74 77 +14128 PF14286 DHHW DHHW protein Eberhardt R re3 Jackhmmer:B0P7B6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 366 and 404 amino acids in length. There is a conserved DHHW motif. 25.00 25.00 25.40 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.30 0.70 -5.14 8 368 2011-05-25 12:28:32 2011-05-25 13:28:32 1 3 220 0 48 338 15 202.40 15 68.82 CHANGED EKRcLAcFPsFShpuhhDGSYFcDlupWYuDTaPhRDtFluhsAshcpLYGlch.sssclhussstptpDts.-sspsssspsh.s..sssspsso..........................sSsspsuospsss.sps.ps..sDsss.pshh-t.........shhpsphtsslalhsspuapLYsFsp-sucpYAuhlNshspcLs.sls.VYDMllPTuhslhLP-s.hpc.h.sousQcpAIsYhYuhhscsVKpVslY-pLtsHs-EYIYFRTDHHWTALGAYYAYpsFscstGlsshsLScacKc-hssFlGSaYutTpps.uLcpNPDTlpAYlPhsTNshchhss-us..pachlh.s.s..casuuspYSsFlGGDsslscIpNPslpDGSsllVlKESYGNAFlPFLV-HYppVYVl ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.h.t..........t.h...t........lph............t.....h.....t.......t.....t.c......l...Ya+TDHHWs..G.....A.......ahuap..........hh..........p....t......h.......t............p.............t........p.........p....h..p.........................t......t..h........hGs........h.tt...........h......t..-...h.....h.h........t.............t.......h.........................p................................t..........h....h......p...........t.........ts...Y.t...h.ah.ts.s..s.h..l...ps.....t..........s...........t.....p..hllh+DSausshhshhh..atph............................................................................................................ 0 32 45 46 +14129 PF14287 DUF4368 Domain of unknown function (DUF4368) Eberhardt R re3 Jackhmmer:B0PC15 Family This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00239 and Pfam:PF07508. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 25.00 25.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.09 0.72 -4.16 35 694 2011-05-25 13:00:02 2011-05-25 14:00:02 1 13 232 0 54 600 82 68.50 34 13.31 CHANGED pplsphcppptcs-+FlsllcKYsshpELTsshlsEhl-KIlVHcs-+..psu.pRpQcI-IYasalGplchPp ......................t.plsp.pppssssc+.Flpllc+YsshpELTsshlNEhI-KIlVHEt.....pc.....css..p+......pQcIEIYasFlGphp.............. 0 36 50 52 +14130 PF14288 FKS1_dom1 1,3-beta-glucan synthase subunit FKS1, domain-1 Coggill P pcc Jackhmmer: Family The FKS1_dom1 domain is likely to be the 'Class I' region just N-terminal to the first set of transmembrane helices that is involved in 1,3-beta-glucan synthesis itself [1]. This family is found on proteins with family Glucan_synthase, Pfam:PF02364. 27.00 27.00 33.00 28.20 24.40 26.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.62 0.71 -4.18 84 502 2011-05-25 13:11:19 2011-05-25 14:11:19 1 19 190 0 335 501 5 109.40 43 6.37 CHANGED pcplhplALYLLIWGEAsNlRFhPECLCaIF+...hAh-.......h.t.hps.......ht............tts-tsFLsplITPlYphl+spsh.......c............htpct..c.........HsphhsYDDlNphFW..tscsht+lths.p .........................cclhpluLYLLlWGEAsplRFhPECLCaIF+.............hA.-....................hht..ts......h.p.............ss-tsaLspVITPlYphl+spshc.................................hctcpc......HsphhsYDDlNphFW.scsht+lsh...p................................. 0 90 208 296 +14131 PF14289 DUF4369 Domain of unknown function (DUF4369) Eberhardt R re3 Jackhmmer:A7UYJ0 Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00578. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.45 0.72 -4.02 232 931 2011-05-25 14:05:07 2011-05-25 15:05:07 1 13 192 0 223 886 222 108.30 20 31.00 CHANGED hsl...su.s.....s.pp.....t.........s....aplpGplssh.......ss.....splYL..hth....ps...st...h....s.....lDSstl.p.s..G.pFsFp..s.p..h..s.p..s....phhhl...............hh.p....s............tpp.........hhs.......hhl-.sG.p.lp.lps...........st..t...p...........hp.l..sGo.tt....scphp ....................................h..hsuC...spt......t................p....aplpGplpsh.....cs..........splYLtth......ps...st....h......s................lD...ostlp..s..G..pFsFp...s.s..h...s.p...s....phhhl................ps..................tpp...................hhs.........hhl-.sG.p..lplph.............stt.p...........hpl..pGo...scth............................................... 0 117 201 223 +14132 PF14290 DUF4370 Domain of unknown function (DUF4370) Coggill P pcc Jackhmmer: Family \N 25.00 25.00 169.20 169.00 20.40 19.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.50 0.70 -4.89 7 36 2011-05-25 14:53:12 2011-05-25 15:53:12 1 1 20 0 17 31 0 219.80 54 96.89 CHANGED M.....c+.hhtlRslhRuAshtSup.uuh..tpttl..cpth.tpRohhsLsusu................ttlsu-hp..sshuhuhGspRhFSEDVoHhPsIpDPcl.sAFKDLMAASWsELPsullp-AKpAlSKNTDDKAGQEsLcNVFRAAEAsEEFGGlLhoL+MElDD.lGlSGENVKPLPs.htsAl+TsapRYssYL-SFGP-EsYLRKKVEhELGoKMIHLKMRCSGLGuEWGKVTlLGTSGLuGSYVEQRA ................................................h............hR..shtu.t...h...t..h....h....tshssLsps.....................thsu..t..us.uhShs.RRhFSos.pHLPsIpDP-lcsAFKDLhAsSWsELPDSlVp-AKKAlSKsTDDcAGpEALcNVFRAAEAsEEFuGl.LVoLRMtLDDLsGLoGENVtPLPsalccAl+ouYpRYhsYL-SFGP-EsYLRKKVEsELGoKMIHLKMRCSGlGuEWGKlollGTSGluGSYVE.RA.... 0 2 10 14 +14133 PF14291 DUF4371 Domain of unknown function (DUF4371) Coggill P pcc Jackhmmer:Q9C842 Family \N 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.41 0.70 -4.82 19 987 2011-05-25 15:07:50 2011-05-25 16:07:50 1 37 67 0 777 904 0 167.60 21 32.17 CHANGED -uFVspGapsa.p..c+pR..hcpHlGp.l.sSsHp.ApcKh-shhppppsIspshpppocptKtpYhsRLshSIcssRaLL+QGLsFRGHDESc-ShN+GNFlEllsalAcp.c-lp+llhpsuspsshhss.pIQ+plhpshApcspp.Ih--lGsshFulLsDESpDsspKEQhAlsLRYVDKpGp........VhERFlGlVHVp-TTSssLKsAI-uLLscasLSLpplRGQGYDGASNM+GchNG ................................................................................................................................................................................................................................................................................................................................................h....s......................................................................................h......th.h.t.hh...t..................h......h...h......................................................h...p......................p.......h.pp.l..phh.u...pt.l..h.p...lh...pp..l...p..s.....p....h...F...u.l.h.hDcstD.h.u.p.p.c.Qhsl...hlR.a.l....p.t.p.t............ltE.cF..l...shht.h...p...p...............t...............s....u..tl.hphl......p...h..l.t..p...h.sls...h......pp..hh..upsa..D...suush.s............................................ 0 218 578 708 +14134 PF14292 SusE SusE outer membrane protein Eberhardt R re3 Jackhmmer:A7V649 Family This family includes the SusE outer membrane protein from Bacteroides thetaiotaomicron, Swiss:Q45769. This protein has a role in starch utilisation, but is not essential for growth on starch [1]. 25.00 25.00 25.00 26.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.71 0.71 -4.29 45 304 2011-05-26 07:22:16 2011-05-26 08:22:16 1 4 133 0 53 283 22 120.30 19 29.15 CHANGED lhhh.hhhhsuCpcDp..phtsh...psps....s.pLthsssssslsLst...us....s....A..lphsWssAshtss..s.slpYslphsttss.sFuss.lshs.psss...tpphohTspcLNs.l.h.s.p.h.Gl.tsspsssl..phclpu ...................hhshhhhsuCpc........Dt..p.ssh...p.ss.....shsLstss.s.s.spl.sLst...tss..s...u..lphsWop.s.....s......hsss.............s........s.....l..s...Y.........plp......hsh.....ts.t......sFsss..hths..pstt........ptphshottpLNp.h.h.s.......p.h....sh....tss......ptspl...hpl......................................................... 0 23 47 53 +14135 PF14293 YWFCY YWFCY protein Eberhardt R re3 Jackhmmer:A7V7D7 Family This family is found in bacteria, and is approximately 60 amino acids in length. There is a conserved YWFCY motif. It is often found in association with Pfam:PF02534. 25.00 25.00 28.40 27.40 24.00 23.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.72 0.72 -4.13 13 298 2011-05-26 09:50:59 2011-05-26 10:50:59 1 4 115 0 29 252 8 59.80 61 10.31 CHANGED QpEDDLRALAKIMDFhRAlSIlllllplYWaCYpuh+tWGlslsVlD+ILhNFpRTuGLFu ......QQEDDLRALAKlMDFhRAlSIlhlllNlYWFCYpuhctWGlslGVlD+ILhNFpRTuGLFp... 0 13 26 29 +14136 PF14294 DUF4372 Domain of unknown function (DUF4372) Eberhardt R re3 Jackhmmer:A7V8G9 Family This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF01609. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 26.40 25.00 23.90 23.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.26 0.72 -4.10 46 284 2011-05-26 12:17:13 2011-05-26 13:17:13 1 3 143 0 79 290 118 72.30 32 23.51 CHANGED ppupslFuQllchls+ppFccllc+.apG-ptl+pFosasQhlsMhauQLotp-SLRDIpssLpuppsKLYHLGhpp ...........psphlFuQllphlspppF.pphVc+..apuc+asKpFosasphlsMhauQLoppcSLR-lpssLpupp.t+haHLGht.t............ 0 33 60 73 +14137 PF14295 PAN_4 PAN domain Coggill P pcc Jackhmmer:Q2FN86 Domain \N 27.00 7.00 27.00 7.00 26.90 6.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.18 0.72 -4.25 64 1029 2012-10-02 11:41:37 2011-05-26 14:20:17 1 128 214 14 794 1625 53 47.50 22 13.28 CHANGED sDh.GsDh.psh..h.......s.s.......s..psCtttCp.psspCp..uaoasp...............s........s....upCaLK ........................................ph.G.s..h..tt...h......ts.s........................s.spsCpphC........p....s........s.....s........s....Cp..hao.ass.........................t...........t......tp..CaLK................................... 0 389 539 755 +14138 PF14296 O-ag_pol_Wzy O-antigen polysaccharide polymerase Wzy Eberhardt R re3 Jackhmmer:A7V9M7 Family This family includes O-antigen polysaccharide polymerases [1]. These enzymes link O-units via a glycosidic linkage to form a long O-antigen [2]. These enzymes vary in specificity and sequence [2]. 25.00 25.00 25.30 25.10 24.90 24.20 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.83 0.70 -5.90 29 375 2011-05-26 13:28:48 2011-05-26 14:28:48 1 3 336 0 31 222 10 415.20 39 91.97 CHANGED ls.h.hha.llshhh......sshphlh...hslh..hlhlh.shlas.h.........pphphh.hhh...hFhhshFlFlhup.h.lshh.....h.s.h..........hshp.ahp.phhh..ps.....hhlhhluL...lhhalGhll...hppp..phph...pptptpp....pph...hpthphl..shhlha.lshl..th....lht.hpplhhhtssuYhuhYts..htpp......h.s......t.hhhhhu..shhhhu...hhlaL..sohss++c..hhh.hhlalhhtll..slltGpRs.hlhslLhlhhYahhpph............hlshhphhhlhlh.sshlhhh.........hshhshlRsp.............p..htt.shh.sslhcFhhsQ..........GlShtlluh.........shphp...splstpt..sYsh..hhph.h..t.l.tt.lhuh.hsh..tspshc.sh.ssshucplsY..hl.ssstYLtGhGhGoSal..AEhYhDaGhlG...lhlhshl...lGhl.l.....thhpp..hh..pps....hhh.hhh.shhhlsslhahPRushhs.l..hhhhhhhhhllhlh .........................................................................................................hl...h.h.hhhhh..........sp...ll....slh....hl.l.ls..sllhs.h..........scl+hhhl.aa......lFhlolFlFL.lo.R..Ps.lsYF.........t.s..s...........ulc.s...Yps......shp....aA.....alllhlSl...LGlshGulL....hs++...+.lKh..tshusshc.....csa.....lKpL+hl....S.LslFl.Lsa...s..ah.......hlc.ap......cLla.p..lp.......so..Y.hAhYss...acSp................L.P.....a.FshhLS......saslhu....hshYL....uo+P..+K.hp......u.ptl.......L.ls..alssssl....pLslG.oRss.FILoILhsFlYY.ahRcp.........+........tKaIuhKcplsIalu.usILhl.u.........MGl..l.sYlR-ss.................p..loas.uhh..-lllDFlYcQ...............G.sSau...V...Lup.........uhhas....spLPhcc...NaTaGsll-a.Fspus.L.us.IFus..pu.h....hsssSl-lulcuNSaAHsLSY...ll.lsc.cY.L.pGa..GlG...SS.....Y...I..hE....lYs.DaG..hlG....VFL.lSF.L...LGlL.l.....uhLps.....sh.....+..s+....sIl...hsl...uL.llL..ssLF.F..hP..RuSFop.sahsLhshpFhslllllh........................................ 0 12 22 27 +14139 PF14297 DUF4373 Domain of unknown function (DUF4373) Eberhardt R re3 Jackhmmer:A7V2U9 Family This domain is found in bacteria, eukaryotes and viruses, and is approximately 90 amino acids in length. 25.00 25.00 25.70 25.50 23.40 24.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.77 0.72 -3.69 44 354 2011-05-26 15:13:09 2011-05-26 16:13:09 1 4 157 0 42 290 6 91.60 23 32.64 CHANGED YFsh-sshhpDpplphlht+.h.G.hcGhuhhhhllpplhp.pssY..hsshpt...h.......ph.hApch..s..hst..........splcpllp......-a...sLF...s...hpp....p..t..hpS..t..lp .........YFshssshhpDtclctlttc.a.G.hpGhulhhhlLsp.l.Yc.psta..hh.hcp....h........hhlupph...s..hst...................ctlppllp......ch...sLFs...hpp......p..t.lloSttl............................................ 0 14 33 41 +14140 PF14298 DUF4374 Domain of unknown function (DUF4374) Eberhardt R re3 Jackhmmer:A7VA08 Family This family of proteins is found in bacteria. Proteins in this family are typically between 406 and 466 amino acids in length. 25.00 25.00 27.00 25.50 24.90 23.80 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.65 0.70 -5.90 8 125 2011-05-26 15:24:25 2011-05-26 16:24:25 1 2 81 0 32 115 0 283.90 23 80.97 CHANGED oSCoDs-s..P.....usus..ssssKusYVIAuosssSsussshLLTAESLDEGolSolsNGL.ssDGuT.WVFYcspYLYuLsYNQGsAGTTpSYILsuNsclcpRstoYslpRFTTYGIYscaIlTsSTGDGspEhsDpNGYlPpuFLlSYLDVscpThTTNs.ppppshLuENFLGNGEaVTLAGIlEsNsKlYouAIPMGLSpYGsp..s-sGt..hlLssspDLVKTEsGGSGSGuYcKGEL.WTQYPNcsaVAIasssoFsscKlI.cTDKISYACGRhRSQYYQTIWuADNGDlYVFSPSYAKTM........oDt+QQT.TLPAGVVRIcAGuE-FDssYYsNLEppouG..+uFlRCWHIosDYFLLLMYD+.....s....lTt.TuhsAsELAlFKAEspKLTYV.oGLP..Ss.lISGFGNTPYsENGhAYhAV.TTT-G.pPAlYKIDPsoAoATKGloVE .................................................................................................................t..........................................................................................................................................................................................................................................................................................s.asspsalAlas...........shp.p.ll.pss.+hu..hhsuh..h..h.hp.tlh.hs-sGD.lYlF.S.suhsth...........ts..t.o.phPuuhhRIptut......p-FD.sYahshpt.sss.....t.....hhphhalsts.h.Fll.hh.sp...s..........hst...ps.....hsspp.h..u.l..h..cs....t.stp.hh.l.pGlP...st......s...s...h.u..t...t...s..h...s.-..p..G...h...sYhsl..ssp...pG......s.h......lYplDstsupAs+Ghpl.......................................................................... 0 10 31 32 +14141 PF14299 PP2 Phloem protein 2 Coggill P pcc Jackhmmer:Q9C8U9 Family Phloem protein 2 (PP2) is one of the most abundant and enigmatic proteins in the phloem sap. PP2 is translocated in the assimilate stream where its lectin activity or RNA-binding properties can exert effects over long distances [1]. 25.00 25.00 26.70 25.50 24.00 24.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.04 0.71 -4.83 88 511 2011-05-26 16:48:35 2011-05-26 17:48:35 1 14 50 0 261 487 0 150.10 27 61.38 CHANGED Gt+CaMLSARs.LsIsWu-s...sp.YWpWhsh..............spSRFtEVAcLhsV.sWLEIpG+lssphLSPsTpYusYhlh+l.............s-p..saG......h-..h..Pl...............c.h.....slsh.....................................................s..ss.............................................p.p..p.....................p.p.php...........h..p..............................................cRs......DGWhEl.-lGEFh....s.s..p.....s.....pss....ElchSlhEsc...ssph..KsGLllcGIEIRPK ..........................................................t.shhlsu+.s.Ls.I..sh....hs...s....p..YWpahsh..............spu..ca........clA.L.pl..hW....l.-lpGplphph..h...ssspYsshhhhph.....................tcp...shG......hc..h....sh................p...hplsh...............................................................................s...st.................................................................p.p.........................t..tthph..p..............................................ptt......-.sWhEh.chG.-Fh....sp....p.........t.........pss.....clp.hsh..h-h.c...ssph..KsGLhlcGl.tIpP......................................... 0 40 141 190 +14142 PF14300 DUF4375 Domain of unknown function (DUF4375) Eberhardt R re3 Jackhmmer:A7VAK3 Family This family of proteins is found in bacteria. Proteins in this family are typically between 156 and 204 amino acids in length. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 25.20 25.30 24.90 24.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.57 0.71 -4.13 41 346 2011-05-27 08:18:22 2011-05-27 09:18:22 1 4 298 2 55 291 11 119.00 25 63.69 CHANGED -.shppLss-Q+sLlAhphLcpEVpsGGFsQhhtNuhGtalh.pshhcuh+paGhcchscllpcupplYtpp.t.sl-c-pp..tcph.shh.cp.....a..st.....F--hDcpah..-hEE..phsphlspYlcpHh-pF .......................ht.Lsscp+pLhAhphhcu-VtsGGFsQhltN.uhG..th.l..h.pshs....cuL+phGspchspllc+At.s..l...a..pppttsl-pp.tp...tp......ch.shh..cp.........h..tp.....h-ph.Dctaa..-ht-...phhthl.stalptp.t.F........................................... 0 24 34 43 +14143 PF14301 DUF4376 Domain of unknown function (DUF4376) Eberhardt R re3 Jackhmmer:A7UZK5 Family This domain family is found in bacteria and viruses, and is approximately 110 amino acids in length. 25.00 25.00 25.20 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.34 0.72 -3.94 46 601 2011-05-27 09:01:56 2011-05-27 10:01:56 1 7 328 0 48 539 8 107.00 30 60.69 CHANGED L-ps+stKhsEIsshRsptpss..u..hshp..u.tchtsspps..p........hp...hssslt.....h.upps.shs.tsh.....hWpssssts..lshs.......hh..shttslhp..p...sp..phapcppph+pplpuhpshcslpuh ...................Lcts+pt+hp..EIpshRspppst.s..hsac.....s..tph.s..ssss...p............hp...l.sslh........h..Apss..sh...sth..............hW.sD.....uDNpp....V.p.loh....-hht....sh.spuhss..+...spcIap+pcphKpclps..hsshpplps................. 0 8 29 41 +14144 PF14302 DUF4377 Domain of unknown function (DUF4377) Eberhardt R re3 Jackhmmer:A7V378 Family This domain family is found in bacteria and archaea, and is approximately 80 amino acids in length. 25.00 25.00 25.40 25.20 23.30 17.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.49 0.72 -4.08 59 285 2011-05-27 09:46:05 2011-05-27 10:46:05 1 11 240 0 66 282 16 80.30 31 37.50 CHANGED lhVsspp..ssC..s........Gh.....hshpChtV+pp...spp..........sWp.haYssIcGF..s..a-sGacYtL+V+c..hpltss.....PADuush.pYpLlcllpKp .......................................Vuspp...ssC...s........Gh.....sshpCLpVRcs...tts..........sWp.haa..us..IEGF..s..acsGhcYhLcVpc..hphtNP.....PADu.uu.h.tasL.pllpp......................... 0 15 38 53 +14145 PF14303 NAM-associated No apical meristem-associated C-terminal domain Coggill P pcc Jackhmmer:Q9SKG8 Family This domain is found in a number of different types of plant proteins including NAM-like proteins. 27.00 27.00 27.10 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.40 0.71 -3.90 48 509 2011-05-27 10:45:57 2011-05-27 11:45:57 1 15 25 0 377 488 0 144.70 18 46.12 CHANGED +sFshhHCWplL.+spsKWpsch.p........................t.pp.ppt..........psssps....sssss...pts..sss....sp....ssstp........................................cRP.GpKpuKpthp..........................th.c..................................c.tp.........................cc+ht...p..p.......hppt....p......cc+h..........p.................hccc+h-hcphp....pEccIMhsDhosl.ss.pppa...hcthpccI ..................................................................................................Fph.asWplL.+ppsKWpsh.tp..........................pp.pt..........................tstspp...sstss......sts..sss......sp.......ss..ttt.......................................................................pRPhGpKtAK.pphppt........................h....th.............................................................t...........................ppcht..p......thtp.....p............ppph........p.................hppcp..hphp.h...........t..plh.hDhssh.s................................................................................................................................... 0 143 254 311 +14146 PF14304 CSTF_C Transcription termination and cleavage factor C-terminal Coggill P pcc Jackhmmer:Q8VYM7 Domain The C-terminal section of CSTF proteins is a discreet structure is crucial for mRNA 3'-end processing. This domain interacts with Pcf11 and possibly PC4, thus linking CstF2 to transcription, transcriptional termination, and cell growth. 23.00 23.00 23.00 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.16 0.72 -4.68 4 329 2011-05-27 10:51:39 2011-05-27 11:51:39 1 13 212 1 210 290 1 43.80 49 10.15 CHANGED spp.tupAtLl.QVhpLopspIshLPPtcRppIh.LRpQlp+ut.sp .............cpsALlhQVLQLos-QIuhLPPEQRpsIlhL+pQltps...th..... 0 56 96 153 +14147 PF14305 ATPgrasp_TupA Glycos_tran_Wfd; TupA-like ATPgrasp Eberhardt R, Iyer LM, Abhiman S, Burroughs AM, Aravind L re3 Manual Family A member of the ATP-grasp fold predicted to be involved in the biosynthesis of cell surface polysaccharides such as the O-antigen in proteobacteria, the capsule in firmicutes and the polyglutamate chain of teichuronopeptide [1]. 25.00 25.00 25.80 25.10 24.80 24.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.53 0.70 -5.06 14 355 2012-10-10 13:17:03 2011-05-27 14:24:01 1 11 269 0 99 342 80 230.60 28 73.88 CHANGED pKlQahKlh...scs.hhshhsDKhpl+calpcphGpp.hlIPhluVhsph...psIcapplP.ppaVlKssHsSG..shhlspscsphshhpsp.....pch.pphLpp....shhhhsREa.Ypplp.+lIlEchhtcpss........cDYKhaCFpGpsphl.lplcR..psppptshashsaphlsh...stpa......stpphpKPsshc-hlplu....cpLSpc..hPaVRlDhYpsssplYFGElTFh...suuGht.chhPc-aDchlGchhp ......................................pKl.ahh..h......ppshhsphsDKhtVRc....altpph.s....p....p....hllPhl.s.hhssh.....p-ls..a..spLP....ppFVlKssHsoG.......sshls...pDK.sph..s.hp.pht.....................pph.pphLpp.................sahh..hs+Eh.p.Y+slps.+.Il.sEchlt....sp.s.s.....................tD.Y.....KhasF...s...G.....c....s.....p..hl..l...sh.s.R.........tsp.p.p....h..s...ha.D....h.s.W.phhsh......p.phs.........stp..h.s..+..Pp.ph....cchlplA....cpLups......hs.a.V.RVDhY.t.s....ss........c.lYFGElTFt...ssuG..h....ph....hspp....hDhhhGphh........................................................................................... 0 37 55 77 +14148 PF14306 PUA_2 PUA-like domain Bateman A agb Bateman A Domain This PUA like domain is found at the N-terminus of ATP-sulfurylase enzymes. 27.00 27.00 27.10 27.10 26.90 26.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.81 0.71 -4.83 89 1267 2012-10-02 17:37:24 2011-05-27 16:48:36 1 16 994 50 512 1155 428 155.00 30 31.10 CHANGED ltPHG..G.....pLh........shllpsp.ptpphhpcutp...L.Pp..lplospph...........sDLphlutGsFSPLpGFMscpDYpuVl-ph+L.s.............s........Ghh...WolPIsLsVsp-ptpp..lptGscluLppt...Gph.lAllp.lp-hYph...-KppEAppVatTs-..t............HPGV.phlh...ppschhluGslpllpp .....................................lhphhs.tt...phpt.hhtc..App.....h...p.lplsphpl....................................s-LpllupGhaSPLpGFMscc-Ypp...Vl.cph..+L..s.........................s...........................Gsl.......aolPIsL....s....ls....cc....ptpp...................lc....s....G...s....c.lsLhtp........Gph..lAllp.l.p-hap....cKpc...cstplauTss..t.............HPuVp......hlh...ptGchhlGGslpllp..................................... 0 175 312 427 +14149 PF14307 Glyco_tran_WbsX Glycosyltransferase WbsX Eberhardt R re3 Jackhmmer:A7V083 Family Members of this family are found in within O-antigen biosynthesis clusters in Gram negative bacteria, where they are predicted to function as glycosyltransferases [1,2]. 26.10 26.10 27.00 26.40 24.70 26.00 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.38 0.70 -5.28 84 380 2011-05-31 07:37:58 2011-05-31 08:37:58 1 33 282 0 99 359 61 323.30 31 59.13 CHANGED +lIAaYLPQFHPl.P.E.NDcWWG+GFTEWTNVs+A+PhFcGHYQP+..lPs.-.LGaYDLR.s-shctQAcLA+caGIpGFCaYHYWF.sG......K+lLE+Ph.cp.lL.p.ssc.DhPFCLsWANEsW.o+.pWcG...t....spcl....LlpQpYssccD..ppHh....phll..hF+DpRYI+l-G.KPlahIY+P.......ppl.PchpphlphWcph.Ap.csGlsslahlsht.......................tth.tt......p......hphGaDussph....t..........stt......h.tth..................s....htt.......thtph...hppt............h....th....sph.h..........c.Ys..phhpphls.p..t.t.....p....h..phaPslhPsWDNosR..........+sp.p.uhl.hhsuoPctFcpaLppsl.p.p.sp.p.p.......pcchlFINAWNEWAEGsaLEPDh+aGhuYLcAl ......................................................................................................................................................................................hlAaYLPQaHsh...E....ND....tWWGcGFTEWsNlt.pAp..PhFpGHhQP+..hPh.p.hGaY..DLp.sc..shptQsclA+phGltGFsaaaYWF..sG...........+pl....LE.pPh...pp.hL.....p.st..c.D.hPFClsWANcsW.s+pWsu.............t.....ppcl...............LhtQpYs.st.pD.....htpah....phlh.thFpDpRY.l+l-.....G..+.Pl.hhlYcP..............ttl...schpphlphWc.ph.sp.c.tG.h.sslahhtht.................................sth.t.........p..........tphu....a....Dushph...tshh.h..ht................................h.t.......ph..hp.h.hppt...................h...........hph..h..........cYp..chhp...t.hhpp...p...t..........s.....hphaP.s.lhs..sW.DNo..sR..............ptt..p.uh....l.....h..hs.uoP.cta..ppalppt.hp.h.st..p.p........ppch..lFlsAWNEWuEGsaLEP.Dh.+.aGhuaLcsh................................................................. 0 40 76 89 +14150 PF14308 DnaJ-X X-domain of DnaJ-containing Coggill P pcc Jackhmmer:Q93ZH5 Domain IN certain plant and yeast proteins, the DnaJ-1 proteins have a three-domain structure. The x-domain lies between the N-terminal DnaJ and the C-terminal Z domains. The exact function is not known. 27.00 27.00 27.80 27.50 26.70 26.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.16 0.71 -5.05 53 402 2011-05-31 12:29:53 2011-05-31 13:29:53 1 12 193 0 305 409 4 194.00 27 43.07 CHANGED ptpccRlpcLuppLhc+lp.as........c..ssp........cp.........Fpp+.hppEsc.sL+hESFGl-lLHsIGtlYppcAspaLtppp......pahG.lut...........haspl+.sKGcsh...Kssa.......sslsoAlcAppsh-p.hp+.......hpppt...................s.c..h....spcc.....hschpcp...htu.............K......hLsshWphs+aEIpssL+cVCpplLpDcsV..spcpRhpRAcALhhlGchFppsccs.s..c...-pppt.psFEc.lhs ........................................................................h..ppcR.ppLuppLhc+lp.as......................c..ssp..............cp............Fppp.hc.Es........c.pL.phpSFGh-.....lLcsIGhsYhppAsphL.ppp..........ta.hG.lst...........hhp.th+...s.Kuphh....+sph..........ssh.ssAlsh.phhcc.ht+.......h.pppt...............................................................................................t.p.......spcc............htchtcp.hts..........................................................................p......hLsuhWphshh-IpssL+cVCpplL.....p.D..p.sl.......s.p..cphhtRAcALhhlGplappstps..p...cttt.................................................. 0 122 199 273 +14151 PF14309 DUF4378 Domain of unknown function (DUF4378) Coggill P pcc Jackhmmer:Q9FIS0 Family \N 25.70 25.70 26.00 26.20 25.10 25.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.02 0.71 -4.18 103 449 2011-05-31 13:05:52 2011-05-31 14:05:52 1 9 28 0 299 412 0 156.50 19 19.00 CHANGED p-...hpYlp-lL.tsuGl.h.pst.t............h.....p..h..psssp..PlsPslFcpLEppt.........s..................................ttsptt+.scR+LLFDhlNEsL.......s.......chht........hshpshhp.....th......shphp.........h...su..pp.Llc-lhpplpphht.......ps....h.pp...p.....t........ll..scD.hspp.....tt....hh.....shpp......-sppluh-lEchlhc-LlcE .............................................................hpYlpclL..sush.htp................h...........t.h..h.ssp..slssplFpplEpphs...................................t.tttpp.c++LLFDhlNEsL.............s..chht.........hshtshhp.......htht.h............h.....ss...pp...Llcclhp.plpphht...............ppt.....h.pp.....p.....t.........ll...tcD.htpt.........tt..hh.....shpt......-hpplsh-lEchlhc-Ll-E.................. 0 50 182 249 +14152 PF14310 Fn3-like Fibronectin type III-like domain Eberhardt R re3 Jackhmmer:A7VAA0 Family This domain has a fibronectin type III-like structure [1]. It is often found in association with Pfam:PF00933 and Pfam:PF01915. Its function is unknown. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.23 0.72 -4.04 1123 4829 2011-05-31 13:39:53 2011-05-31 14:39:53 1 85 1882 11 1761 4491 425 70.70 32 9.16 CHANGED EVVQ...LYl..p......s...s..s......u.........s......l..s..cP.s..+pL+GFpKl.p.Lps.GEoppVsh.s.ls.t....c.s....luhac.t.......p...t....p..........ahl-...s......Gp..aplhl..Gs..SS ...................EVVQ...lYl.p........s..s.s.........u...............................s......h....s..cP....s....+pL+GFcKlp..Lp........P....G........E.oppVsh.s.ls.h....c.s......L.uhast........p...t....p.................asl-...s..........Gp...aplhlGssS........................................... 0 582 1136 1550 +14153 PF14311 DUF4379 Domain of unknown function (DUF4379) Eberhardt R re3 Jackhmmer:Q7UL48 Family This domain is found in bacteria, eukaryotes and viruses, and is approximately 60 amino acids in length. It contains a CXXCXH motif and a CPXC motif. 24.50 24.50 24.70 24.50 24.40 24.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.21 0.72 -4.15 113 571 2011-06-01 07:26:03 2011-06-01 08:26:03 1 19 105 0 218 533 412 55.50 29 36.58 CHANGED scLspEas.....csp.....hpPspVshuSph+shW+Ct..pC.uH.cWcAplssR.o.....spssGCPtC ...............................clhpEWs....tcNt........hsPpp..lsh.s.S..p...pcs..a..W.cCs.........ps.uH..pWpuplpsRs........tpspsCPhC..................... 0 153 193 213 +14154 PF14312 FG-GAP_2 FG-GAP repeat Eberhardt R re3 Jackhmmer:Q7UNP1 Family \N 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.48 0.72 -3.95 180 1055 2012-10-05 17:30:43 2011-06-01 10:35:36 1 62 110 0 582 1179 1780 50.20 34 23.05 CHANGED asppsKLsA.oDGuusDhF.G..hSVulsus..s.hlVGAh...t-Ds...ts......s.GuAY.lF .................tthlhu..s....s.....s...s...s..sD...t....F.G....h.S...Vul..su-..........s..llVGA.................t--s................sus...............s.u..GusYlF......................... 0 432 550 571 +14155 PF14313 Soyouz_module N-terminal region of Paramyxovirinae phosphoprotein (P) Coggill P pcc Karlin D Domain The soyouz module moiety is the N-terminal region of the phosphoprotein (P) from the subfamily Paramyxovirinae of the family Paramyxoviridae viruses. The main genera in this subfamily include the Rubulaviruses, avulaviruses, respiroviruses, henipaviruses, and morbilliviruses, all of which are enveloped viruses with a non-segmented, negative, single-stranded RNA genome encapsidated by the nucleoprotein (N) within a helical nucleocapsid. 21.30 21.30 21.30 21.30 21.00 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.66 0.72 -4.41 16 410 2011-06-01 10:13:34 2011-06-01 11:13:34 1 9 40 3 0 415 0 56.90 54 14.64 CHANGED MshhsDsEIscLl-pusslI-pIppupspssc..ThG+SsIstGsTcsLssAWEccsssp ...MATFTDAEI--LhETSGTVIDSIITAQGKssE..TVGRSAIPQGKTKALSsAWEKHGs............. 0 0 0 0 +14156 PF14314 Methyltrans_Mon Virus-capping methyltransferase Karlin D, Coggill P pcc Pfam-B_840 Family This is the methyltransferase region of the Mononegavirales single-stranded RNA viral RNA polymerase enzymes. This region is involved in the mRNA-capping of the virion particles. 20.20 20.20 20.30 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 675 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -13.08 0.70 -6.68 10 144 2012-10-10 17:06:42 2011-06-01 13:38:09 1 2 69 0 0 167 31 660.80 50 31.64 CHANGED l+.p-G-W-sLossEKSYHVGRslGFLYGDLshp+Ssps-DSSLFPLSIptKlcGRGFLRGLlDGllRuSulQllHRRolspLK+PtsAlaGulhYLI-KlossssFhsLlRcGPIR-ElpoIPHKIPoSYPToppDhGtllRsYLKpph+plccupYposhss.lWlFSDltSpc..alGshulSoplLchLhcssLSK+s+spLRcLusLppplRoscs..h..s-hclcplhpchL....hCspElRHAsKF..shsKpssspthh.......WG.pEhhGslpplPV.Yoossss....chhphs.PRlQNPLISGLRlsQlATGAHYKlRoILsshcIpacDsLssGDGSGGhTuslLRhNspSRuIFNSLL-lsGpsh+GopPsPPSAL.slGs-.psRCVNt-osWEcPSDLScppTWcYFhcLKpphuhpIDLIVhDMEVpD.phsppIEppl+calhpLL-csGsLIaKTYGThlsspsps.slshLGshFcoVpLlQTphSSShTSEVYllh++h+phl.DssalDasoL..pchhpplasFps.+pEFcRA+plpppchlhGlPsplIPDPhV-LpTlLpIuGVsSGluHplsp-lppupush.phAlllsslIsa.p.shslo+hhs.p..phpPPSDucll+hhsslsGlshWLSltppDlshapphppllscsh.sI+atptht+.uphhhpW .................................................................................................IcL+sGDaEoLoscEKShHIGoAQGLLYSILVAhHDSGYNDuoIFPVNIYuKVSPRsYLRGLARGlLIGSSICFLTRMTNINIsRPLELISGVISYILLRLDNHPSLYlMLREPsLRuEIFSIPQKIPAAYPTTM+EGNRSlLCYLQ+VLR....YE....R-sl...os...SP-sDhLWIFSDFRSsKMTYLoLlTaQSHLLLQ+l.-+sLSKpMRspL...RQhuSLMRQVLGGHGE.sh....-SD--IppLL+DuLp+.........T+WV.DQEVRHAA+o..Mpuc.h.S..P..s.c.+.hSRK......hGsSEWlCSAQQlAVSTSu.NPAPhS-hDlRsLS....+RhQNPLISGLRVVQWATGAHYKLKPILc-LslaP.sLs.LVlGDGSGGISRAVLsMFPDu+LVFNSLLE...VsDLMASG.THPLPPSAIhoGG-DIhSRVIDa-SIWEKPSDLRN.sTW+YFpSVQcplNMoaDLIICDA.......E..V.......TDIu.Sl.N+.......IsLLhS.......DFuh..SI.-.GP.lhLlFKTYGThLlNs.-Y+.Alp.HLSRAFPoVsGalTQhTSSFSSElYL+FSKpGKFF.RDuEaLTuSTL...REMSLV.LFNCSSsKSEhpRARSLN.YQDLlRGFPcEIISNPYNEMIITLIDu-VESFLVHKhVDDLEL...p+GsLSKhuIIlsIhIlFSNcVFNloKsLs.-P.hFaPPSDPKlLRaFsIpsuThhaLSsshGDlssFs+LH-LYNpPl..oaYFc+Qsl+.GphalsW............................................ 0 0 0 0 +14157 PF14315 DUF4380 Domain of unknown function (DUF4380) Eberhardt R re3 Jackhmmer:Q7UYS3 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 288 and 372 amino acids in length. There are two completely conserved residues (G and E) that may be functionally important. 26.50 26.50 26.90 27.00 26.40 26.40 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.73 0.70 -5.22 21 128 2011-06-01 12:55:25 2011-06-01 13:55:25 1 2 114 0 49 125 11 260.80 25 81.04 CHANGED tua.pslpLuNsslclslTsslGsRIlta.uhpG.upNlL............s.htsthttsstschhhhuGpRlWhuPE.........t..+p.....a.PD....ssshphp......ppsltlpssssshoGlphphplplsssp.splplppshp......Ntstt.slphshWshThlss.uGhshhPlsstsph.........pl.hhshophtps.phphpcchlslpssstst......tKlGhsspsGWhAh.htpsplFlcpashhs.sA..pY.P-hGhshEh..Yss.......pshlEhEshuPhtpLpPGpshpapEpWpLhc ...................................................h.pslplssshlclhlssshGsRllpa..uhps..ttNhh...................tt.hh.ss..tsphhhhGGc.+lWh.....uPp...........................................cp......a.PD....s...tshphph......pssl..tlppssp......t.h..sslphphpl........p....h.pss.............p...spl..plptplp...................Nhssh.....sl.....p.....huhWslohls..........s..s.......uh..hlPhs..stss....................pl..hhshs.phts..Rhhhscphlhlctssp.ps.........hKlGhss..p..tG..W...hAh....hs..sspla..lcpath.....su..tY.P-h.........G.phEs..Yhs......................shhlEhEshuPhhpLpPGpphpa.E.WpLh................................................................................. 0 27 42 45 +14158 PF14316 DUF4381 Domain of unknown function (DUF4381) Eberhardt R re3 Jackhmmer:Q7ULH4 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 158 and 180 amino acids in length. 27.00 27.00 27.10 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.19 0.71 -4.09 111 458 2012-10-09 19:59:19 2011-06-01 14:55:13 1 4 391 0 131 402 66 145.10 27 87.80 CHANGED sLtpLpDltlPsslu.aWP.A.GWWllhsl...llhhlh..hhhhh....hh.++ppp.p....t..h++t..ALptLsplpt..............sssphhsplspLLK+sAlshh...P+p..plA.sLsGpsWhpaLcspts..........hssthtp..tLhp....thYpstst...............hpp..lhptsppWlcpp ...............LtpLpslhlP.pssu..WaP.A.GWWlllsh...llhslh....hhhhh...........hhp+tpp....p.............t....h++p..ALptLsph.............................sssphhpplstLL+csAlsha....P+p...thA.sLs.GcsWhsFLDspss..............t..hsshtp.....hhhpshYpspsh............ps......hspLhptsppWlcp............................................... 0 28 57 95 +14159 PF14317 YcxB YcxB-like protein Eberhardt R re3 Jackhmmer:Q7UN84 Family The YcxB-like protein family includes the B. subtilis YcxB protein Swiss:Q08793, which is a functionally uncharacterised transmembrane protein. This family of proteins is found in bacteria, and is approximately 60 amino acids in length. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.92 0.72 -4.72 258 600 2011-06-01 13:57:44 2011-06-01 14:57:44 1 4 461 0 124 522 35 62.50 16 34.41 CHANGED l..sccslth.....ps....spspsph...pWsplpp.lhcscphhhl....hh.......spt.phhhl...P+.c.shs........ttph.....pphhphlp ........................hpppulhh......ps....tpspsph...tWspltc.lhcscphhhl....hh.........spt.pshll...PK.c..shs....ttph....pphhth...................... 0 41 86 104 +14160 PF14318 Mononeg_mRNAcap Mononegavirales mRNA-capping region V Karlin D, Coggill P pcc [1] Domain This V domain of L RNA-polymerase carries a new motif, GxxTx(n)HR, that is essential for mRNA cap formation. Nonsegmented negative-sense (NNS) RNA viruses, Mononegavirales, cap their mRNA by an unconventional mechanism. Specifically, 5'-monophosphate mRNA is transferred to GDP derived from GTP through a reaction that involves a covalent intermediate between the large polymerase protein L and mRNA. The V region is essential for this process [1]. 21.70 21.70 22.00 41.60 18.00 21.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.57 0.70 -5.34 63 729 2011-06-01 14:32:24 2011-06-01 15:32:24 1 9 223 0 2 766 0 237.80 32 11.27 CHANGED sCSsslActLRphSW.....GRplhGlTsPcPlEhhss..hh.ttp.Ct...................Cp..t..sppp.....hohhh...hss...lspstp.spu.tsPYlGSpTpE+p...hhphhphcstsshlKpAl+ltsshpWhhssssp.hpphhplhpstssl...shsphphl.sh.poushtHRhssupsppthhsus.sthsoahphosDshphhsct....pshslhaQp...llshuhhpshhth.pssshpstshHhHhpsssCl+clpps ...........CSsplAchLRppSW....tGRpIlGlosPcslEhhcs.tllpsotsCsh........................Cp..s..Gspp.....hohhh...lPuthplsssspts.u.+lPYlGSpTpE+p...shphhphtsho.plKsAl+ltsshtW..saucs-.sphpthhplupu+ssl.....oh-.l+hl.Ph.pouNlhHRLcDutsphpassushsplusalplSsDshshhtp-..tsp-tNllaQp.hh.LhGLulhcohh.hppspshsp.hslHLHhchssCl+.h............. 0 2 2 2 +14161 PF14319 Zn_Tnp_IS91 Transposase zinc-binding domain Eberhardt R re3 Jackhmmer:Q7UXQ4 Family This domain is likely to be a zinc-binding domain. It is found at the N-terminus of transposases belonging to the IS91 family. 25.00 25.00 25.00 25.30 24.80 24.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.33 0.72 -10.87 0.72 -4.56 72 995 2011-06-01 15:28:09 2011-06-01 16:28:09 1 9 449 0 150 849 144 105.20 32 30.65 CHANGED t-Ihcp...ahssa...p...p.pa....sttls.spt+shpuIhpCRT.sshGsphhpCps..Csppc.hhhpSC+sR.aCPpCQspssppWlppppsclLP.ssYFHl.VFTlPtpLpslshp.spchlYs ........................h..lhpt...hhs.h........p.t.......tttltsh.hcshschhtCGo.thhGh.pphpC..ss...Cs+p+.hlshS.C+sR.tCPpCGs+tsspWlthhhscl..s.ssahHl.VFTlPpplhslhhpsp..h..h........... 0 56 94 121 +14162 PF14320 Paramyxo_PCT Phosphoprotein P region PCT disordered Karlin D, Coggill P pcc Jackhmmer:Q4VCP9 Domain The N-terminal half of the phosphoprotein P of the Paramyxovirinae viruses. The very first 60 residues have been built as the family Soyouz-module, Pfam:PF14313. The remaining part of the region, here, is disordered, and is liable to induced folding under the right physiological conditions. The region undergoes an unstructured-to-structured transition upon binding to Measles virus tail, C, unstructured region. 27.00 27.00 317.60 317.20 20.10 19.80 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.87 0.70 -5.53 3 49 2011-06-01 16:08:56 2011-06-01 17:08:56 1 3 3 0 0 53 0 296.20 78 57.10 CHANGED ERRNLEDLSSTSPTDGTIGKRVSNTRDWAEGSDDIQLDPVVTDVVYHDHGGECTGYGFTSSPERGWSDHoSGANNGNVCLVSDAKVLSYAPEIAVSKEDRETDLVHLEDKLSoTGLNPTAVPFTPKNLSsPAKDSPVIAEHYYGLGVREQNVcPQTsRNVNLDSIKLYTSDDEEADQLEFEDEFAGSSSEVIVGISPEDEEPSSsGpKP.ESVG+sIEGQSlRDsLQl.KsNKssDsPGAGPKDSAVKEcs....PQKRLPMLAEEFECSGSEDPIIQELLKENSaINuQQGKDAQPPYapGIEuSpSPDKTEITuDA EGRNVEDLSSVTSSDGTIGQRVSNTRAWAEDPDDIQLDPMVTDVVYHDHGGECTGHGPSSSPERGWSYHMSGTHDGNVRAVPDTKVLPNAPKTTVPEEVGEIDLIGLEDKFASAGLNPAAVPFVPKNQSTPTEEPPVIPEYYYGSGRRtDLSKSPPRGNVNLDSIKIYTSDDEDENQLEYEDEFAKSSSEVVIDTTPEDNDSI..NQE..EsVGDPSD.QGLEHPFPLGKFPEKEETPsVRRKDSLMQDSCcRtGVPKRLPMLSEEFECSGS-DPIIQELEREGSHPsGSL.+LREPPQpSGN.SRNQPDRQLKTGDA.. 0 0 0 0 +14163 PF14321 DUF4382 Domain of unknown function (DUF4382) Eberhardt R re3 Jackhmmer:D2PI33 Family This family is found in bacteria and archaea, and is typically between 142 and 161 amino acids in length. 25.00 25.00 25.20 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.90 0.71 -4.10 62 266 2011-06-02 07:47:30 2011-06-02 08:47:30 1 4 221 0 121 260 33 151.60 24 46.45 CHANGED ssGs....lslslTDA...P..ss..sh.....ppVhlslsplplp...t....sss................sps.......shh...shs.......spp...lsl...........................thhsus.tp.....Ls.........ptslP.uG.p.YsplRLhl.s................s.........ss.......................ls.h...s...G....sphs.LpsPSut...ps.sl.Kl....s......s..hslp..uss.sshslDFDsppSl.......s.tG.ss..pYhLKP .......................suslsltloDA...P..ss..sh..............spVhlslsplplps....sss......................................................sss........shh..sls............spp...lDLh..........................thtsus.tph.....lu....ptslP.sG.p.YpplcLhls................s..sp..............................lsh..s..G.....sphsLpsPSst.................ps..tl.cl.....p................shslp.sss.sshllDF-hp+ul.......s.pG..ss....pahL+P........................... 0 39 73 106 +14164 PF14322 SusD-like_3 Starch-binding associating with outer membrane Coggill P pcc JCSG:Target390309 Jackhmmer:Q8A1B4 Family SusD is a secreted starch-binding protein with an N-terminal lipid tail that allows it to associate with the outer membrane. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.65 0.71 -4.24 44 6539 2012-10-11 20:01:04 2011-06-02 11:11:39 1 8 205 26 1561 6470 663 202.60 17 36.62 CHANGED caLDhp.Ppup....h..sps......h.chps.....hls.....sh.h...ssh.s......hht...thh....thsp.hh..ss...hsss.h...s.t...tsh...h.t......ht..h.........pphsh....s.....s......sss..s......shWsshYp.sI...hssNhlLcp.........s...pp........p.hs....t.....lh.GEAhhhRAataahLssha.............u............t..l..Ph.s............p....sss...........s.............p...............t..s....h........s..Rs..Tlp-lYppIhsDLppAhsl.Lspst...............ph..+hsh..puA.hAhhARhYL ......................................................................................................................................tt.................phpt........hht...............sh.h........tth...t...........................t....h..................t.......hh..........sp.............hhs.s............t.....t.......t.s.h..t.t................h.......................tp..h..ss.......s.........s.............sth....p...........................sh..W......p......t.......h...Y......p......s.......I..........ppsNh...h...lppls.........tth.s...pp..........................p.tp...p........hh..uEAh.......hlRAahY.a.pLlpha.........................G...............s...l..P.l..h...................s......psp....................s...................s....................h....p......h..........s.....R.s....o..h.p.-....l..h...s.h....I...h.p...D.L...p.....p.......A..h......s......h......L..s..tptst.................................hs...R...hsp...suA.hulhu+hhL................................................................ 0 739 1439 1561 +14165 PF14323 GxGYxYP GxGYxY sequence motif in domain of unknown function Coggill P pcc JCSG:Target393069 Family This family carries a characteristic sequence motif, GxGYxYP, but is of unknown function. Associated families are sugar-processing domains. 27.00 27.00 64.20 29.60 23.30 22.90 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.69 0.70 -5.78 18 103 2011-06-02 10:20:36 2011-06-02 11:20:36 1 8 61 1 42 97 11 377.40 22 66.51 CHANGED hlsoLQGlVNp.opspIY.l..psp.s.....s.pphhcphppp..hG.lsasc..lss..sasLLs+a+p..tlcGhVlYDsp...pssSlNlAsolAGlcsulslspsLhsplpstGlpt..l.t.DlRs..hs.......tp.......ahYsp.YhsphsHphlltLs..Pc...ph.stLRDYAl...h....op.uhlFhDssss...-..........psLhcphhusMss.u.uhshGWa......s.D.Et.sVphuSpaGluhluuDashNLoVhSuhsp.osshp....ptsshst.....hpNKhYVoFlhSDGDNlQas....ashpphassPs..RGplPlGWolSPuhh-luPslLsaYYcoA...osN..DsFIuGPSGsGY..hYPsph.............sp.s.p.LssahphspsYMp+ssh+llsIhD....ssshs.stsl.hsp.........astp.ssl.ulah.sasppss.......Gplhh..NstPslupcshhh.tlps..ph....pslpspIspuhsshss.sPtFhhl.lcsWsp.sh.sslpslhspLs....pN....lcVVsPDpFhpLh+c ......................................................................................................................................................hsolpGllsp..pp........pla.h....t............p.t.hhtth.pp.....ht.lt.hp....hps....tllphapp....hpGhllasst......ohs.Asol.Aulpsulsh...s.t..h.th.lp.thsh.....h...Dh+s..hs...................ht.......ahhpp.h.sthspphl.h.hs..sp......h..h.lp-ash....hsp..hh.a.h.pss.......p...........hslhpplh..st..h....t....s..u....hhGW.............s.c....Eh..lphsSp.h.G..h.h.lsuDa....Nhohhosh.....p...h................p..t..........pshhYlsahhSDGDNhQa.......ht..p..hasss...RGph......shsW.olu.P.slh..hsPslhphahpsu........s.N...D.hlsusSGsGY..hYPsth.............sp.s.t.httahphhppahpphs.phh..lh.s.......t.t.......t.th....hpt...hh....th.uhhh.tht...........h.h.h......sth....sh.t.p................................tth.t.ltth.t..tt....a..l.h.sWt.......pth..hht.ht..........h..h....h............................................................................................................................. 0 34 40 41 +14166 PF14324 PINIT PINIT domain Bateman A agb Jackhmmer: Domain The PINIT domain is a protein domain that is found in PIAS proteins [2]. The PINIT domain is about 180 amino acids in length. 27.00 27.00 28.00 30.60 24.60 26.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.77 0.71 -4.11 38 474 2011-06-02 13:21:12 2011-06-02 14:21:12 1 9 214 1 256 469 0 149.60 39 24.75 CHANGED shtsslpFKcsPFYclh.chlpsos.h.ts.....................sp..p+ppsp..hpFpLsspphpplpst..........p...shplhLhst...........sh........hssss.spphpFP..slpl+lNsp.lphshpu.psKsGs.....sRPsslTs..al+hs...sh.Nplplsass.....ssc.............pYhlhlhLVc ..................................+P-lphppLPFYcllspLl.+P....osLssss............................................sp...+h.p-sp..hhFsLTP.p.Q.lppIpssh-.........t.h-hslQV.LR............................hCh........s-TSs.sQEDpFP..sslplKVNsc.ss.lP........sh.hPssKsGsE................KRPsRPlNITs.........hl+LS..ssssN..pIsloWus.....hs+...............sYshulYLV+............. 0 53 96 175 +14167 PF14325 DUF4383 Domain of unknown function (DUF4383) Eberhardt R re3 Jackhmmer:C1CUZ6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 137 and 164 amino acids in length. 27.00 27.00 27.10 27.20 26.90 26.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.72 0.71 -4.11 66 192 2011-06-02 14:11:15 2011-06-02 15:11:15 1 1 132 0 100 202 10 124.70 29 79.76 CHANGED sphsAhshGslalllGllGFl..PGlss..st......................hsstputuhLhGlFtlshLHNllHLlhGlsGLh..hups.ssuARhahhshGhlYhsLhlhGlhhs.......tp.oh..........sshlPl...............NsADsWLHlslulshlslGhhh ...............................phsAhllGslaLllGllGFl...Puhss...........................hsttstuhLhGlFs..lshLHNllHLlhGlsGls..huts..tssARsahhssGslYhllhlaGlhss.......ts..sh................sshlsl.................NsADsaLHlshulshlslGhh.h....... 0 32 69 93 +14168 PF14326 DUF4384 Domain of unknown function (DUF4384) Eberhardt R re3 Jackhmmer:C1CXC5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 80 amino acids in length. 27.00 27.00 27.00 27.00 26.80 26.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.51 0.72 -4.15 69 228 2011-06-02 15:09:03 2011-06-02 16:09:03 1 22 137 0 127 233 36 83.10 23 20.24 CHANGED ssYphG.-plphtlps.....scsuYlhlhsl..sssG.plshlhPNthpts.........NhlpuupshshP........sstas.....hplssPhGp...pplhslsoppsl .............................aphG.-plpltlps.......spsu.Y.lhlhsl....ss.sG..pl.shlhPNthpts.........shlpuspsh.plP..........sstap........hpls.sPh..Gp....-plhslsopp...................... 0 39 84 118 +14169 PF14327 CSTF2_hinge Hinge domain of cleavage stimulation factor subunit 2 Coggill P pcc Jackhmmer:Q8VYM7 Family The hinge domain of cleavage stimulation factor subunit 2 proteins, CSTF2, is necessary for binding to the subunit CstF-77 within the polyadenylation complex and subsequent nuclear localisation. This suggests that nuclear import of a pre-formed CSTF complex is an essential step in polyadenylation. Accurate and efficient polyadenylation is essential for transcriptional termination, nuclear export, translation, and stability of eukaryotic mRNAs. CSTF2 is an important regulatory subunit of the polyadenylation complex. 27.00 27.00 27.40 27.20 26.90 26.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.37 0.72 -3.90 52 428 2011-06-02 16:18:23 2011-06-02 17:18:23 1 16 266 3 292 397 2 82.30 36 19.13 CHANGED ..lPsshs...sshpss-sIopsLuslsPsQLh-llsphKp.LsppsP...spAcplLtpsPQLuh.AlhQAhllMshl-spllppslpp ..............................................h.sshs.s..ss.sus-.sIopsluoLPP.pQhh-lhpQMKh...hspssP....pcA+phLhpNPQLAY.ALhQA.llMplVD.sphhhphlp.t........... 0 85 153 229 +14170 PF14328 DUF4385 Domain of unknown function (DUF4385) Eberhardt R re3 Jackhmmer:C1CYW3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 149 and 163 amino acids in length. 27.00 27.00 32.30 32.20 26.30 26.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.87 0.71 -4.27 28 238 2011-06-03 07:02:05 2011-06-03 08:02:05 1 1 235 0 57 138 307 138.60 63 93.28 CHANGED FDYshDatpIDa..RppPEhYRVGRGEQGVLLVEPYKSEILPaWRFKTP-lAcpSS-KIYphFhsYhcpsDFlGMDMARKFLQMGaTRARRY.ANaKGG+KY..p..............sptcl.cpptD...scKAcuApIF+ppWcps+scppYhchKccapc...cY .............FsYp.DFssIDF..RppPELYpVGRGEQGVLLVEPYKSEILPaWRaKstssAhcSAEpIYQLFEsYRpQDDFVGMDMARKFIQMGYTRARRY.AN...YKGGKKY.s...........................--GpLsscssD.......PlKAAAAslFKuhWD.+lRpDEDYL++K+pHQt+a............. 0 18 30 46 +14171 PF14329 DUF4386 Domain of unknown function (DUF4386) Eberhardt R re3 Jackhmmer:C1D3W7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 214 and 245 amino acids in length. 24.80 24.80 24.80 24.90 24.50 24.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.58 0.70 -4.86 69 193 2011-06-03 07:11:31 2011-06-03 08:11:31 1 2 136 0 112 204 60 194.10 19 88.70 CHANGED +huGhlaLlhhlh.....ulh.uhhhhtshl.........sssshssltsptshhphuhhhthlhslhslslAlhLahll.+..thspslAhsssshRll.tsslhslullshhsslhlh.ssssh..s.t.............th.shshhhhshashs..a.luh.lhh.....GlphllluhLhhRSthlP+hluhhhlluuhhhlhsshhphhhss.....thhshhhhlshhlsEluhslWLll+Gh ..................................................hsGhhhlhhhlh......uhh.u.h.hh.shl.........ssthht.thhspt..s.hh.hhuh.h.h..hlhslh.lshulhLa.ll.+...ths.......shuhhhs.hhtll.tus.lhshul.lthhshh.lh.pt.....................th.thshh.hht.htshs......hhlu....lhh.....Glthllluhlhh+ot....hl.........P+hluhlshluuhhhlhss..hhthhhst..........ht..hhhlshhlhElshslWLlhhG........................... 0 62 87 107 +14172 PF14330 DUF4387 Domain of unknown function (DUF4387) Eberhardt R re3 Jackhmmer:B5YEU8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are approximately 110 amino acids in length. There is a conserved RSKN sequence motif. 27.00 27.00 37.00 36.90 19.90 19.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.03 0.72 -3.67 38 189 2011-06-03 08:26:29 2011-06-03 09:26:29 1 5 181 0 59 130 55 98.20 54 62.14 CHANGED Lt-lAcsIRSKNAGPa.lThDIhFcspcsY-plKposslspchlucLYtlss-cl.phh.a-sApAlKholsR.....shs.uGuhG-pDlaGuQQauPLLslpl ....ssLApVIRSKNAGPYELslDIlFKo+EsY-RVKsSutLTsElIAcLYclcP-hIhcIVaFDPusAlKIshPR.....slh.SGslGDsDVYGAQQHAPLLshp.h..... 0 19 37 50 +14173 PF14331 ImcF-related_N ImcF-related N-terminal domain Eberhardt R re3 Jackhmmer:Q7UL74 Family This domain is found in bacterial ImcF (intracellular multiplication and human macrophage-killing) proteins. It is found to the N-terminus of the ImcF-related domain, Pfam:PF06761. 25.00 25.00 26.10 25.20 23.90 24.60 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.54 0.70 -5.49 110 1314 2011-06-03 08:45:58 2011-06-03 09:45:58 1 12 877 0 250 1135 43 257.20 27 22.64 CHANGED sssptDpstWpuFLsLL++pR.sRpPlNGlllslu.lscLlsss.stp...ptpApslRtRLpELpppLuhchPVYlllTKsDLlsGFs-aFssLspp.pRp.QlWGhT.h....shs...p..sss..sst..........hpp-astL.hpRLsspl.s+LppE..p...Dhp....cRstlasFPpQhuuLpshLtpalpplFtsspapps....shLRGlYFTSusQpG..s..s.h..s..p..ht..sh........s.p.......ph.......s....hs....ts.......s..s...s......s.....t...t............ss...s+.oaFlccLhpcVlFsEusLsu.ssh+hpt.....t.R...hh.t.ht.st.h...ssh.ss ........................t...t.ptt.WtthLshL++pR....s+pP.l.NGlllslsls-L.lssstpt..t...pphupsLRpRLpElpppLthphPVYlllTKhDLLsGFspaFpsLs..pp.pRs...plhGhT.hshp..t..ppsssh..................tthspp..ap.pL.hp+lsttlss.thttp..h....ssp.......pRstlasFPppht.u.LppsLtphlptlht.sst...apts....shLRGlYFT.........SuhQpG....ts.....s...h..sp...sh...............sp.......pa.............ths..tt..............s.....t....t.................................ss..sps..aFh+pLhspllhsEs.s...Lss..sh.p.hth....pt.R.hh.h.hh.hhh.....hh.................................................................................................. 0 47 109 179 +14174 PF14332 DUF4388 Domain of unknown function (DUF4388) Eberhardt R re3 Jackhmmer:B5YCM3 Family This domain family is found in bacteria, and is typically between 102 and 135 amino acids in length. 25.00 25.00 25.10 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.31 0.72 -4.18 165 493 2011-06-03 11:45:10 2011-06-03 12:45:10 1 42 129 0 296 484 46 131.50 20 34.04 CHANGED pG.sLp...sh.sLs-llQhlptsp+oGsLpl.....pt.st............................lah..ccGpllp..Apt.s.............................................................thpupcAlhpl.l....thpp....GpFp.................ht.tthstttslph.sh..ppllh-uh+ph...DEhpth ....................tG.sLpph.sls-llphlptsp+oGh.Lpl..pttstp..up...............................lahppG..pl..ltApts............................................................................phps.pcslhpl..h.......shpp..Gp.Fp.............ht.ss.ttttplph...sh....ptllh-uh+hh..-ch..h................................................................................... 0 125 211 281 +14175 PF14333 DUF4389 Domain of unknown function (DUF4389) Eberhardt R re3 Jackhmmer:Q0EYR1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 104 and 223 amino acids in length. There is a single completely conserved residue R that may be functionally important. 25.00 25.00 25.20 25.10 24.00 24.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.65 0.72 -4.27 51 266 2011-06-06 07:50:20 2011-06-06 08:50:20 1 2 225 0 91 250 268 80.10 31 48.89 CHANGED hpppshahRllaMllhslhhpluphllhllsllQalhhLls.GpsNppLtsFussLupahhchhpahoaso-c+PaPF.....scaP .................h.....hh+hlhhl.a.....hlVhhhhphshsslsllthhhhLho.GchspsLhsFssul.pahh+l..u..tYs..s..h..t..oDchP.PFs............. 0 38 59 83 +14176 PF14334 DUF4390 Domain of unknown function (DUF4390) Eberhardt R re3 Jackhmmer:Q0F0K5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 192 and 203 amino acids in length. 27.00 27.00 29.00 28.30 24.00 23.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.70 0.71 -5.06 70 309 2011-06-06 07:59:00 2011-06-06 08:59:00 1 1 306 0 114 259 83 164.20 31 82.02 CHANGED utAsshplpp.hplp.tsc....sslh.LsuslphpLsttlc-ALpcGlsLhFsh-hpltRsRhaWh...Dcplsp.....tshph+LuYpsLT+paclstss............hspsasoLs-ALpslt.clpshtlhstu.........plpssp.sYplplRhcLDhspLP+PhQlssh.spssWslsos.hhp..aph ..........................tApslplpcsphp..sss....ushp..lsuc..hch-Ls..spLc-Alp+GlPLhFshchpLs+sRh.....aaa...-cplsp.....sshsh+LSapPLT.ccYRVosuu............hphsasoLcsALpslt.cls..sW+V.hsps.........plpsup..s.apuplRhpLDsupLP+.PFQlsAl.sspsWpLsSs.Wpph....h................... 0 35 76 95 +14177 PF14335 DUF4391 Domain of unknown function (DUF4391) Eberhardt R re3 Jackhmmer:Q0F339 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 220 and 257 amino acids in length. 29.90 29.90 30.10 30.20 28.70 29.80 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.06 0.70 -4.95 57 203 2011-06-06 08:05:32 2011-06-06 09:05:32 1 2 194 0 61 189 15 216.40 22 92.12 CHANGED lslPpsst....ls+p........lPKpthh..cpsshssppKchhs-sl..-pIhahhplsspoh.sls....sspphpEItlhplpL+s..t....phspclh...cllp+tlPh.llh.lhp.....h.sschthshsaK...Rhsps-tschsl.pphhtosW............thhpslshs.h..thshtslYpshlpplhthps..............t.ht...........shtpphpthcphpclpcclspLcschp+E+Qhs++s-lNtcl+clcpcl ...............hslPpsst....ls+p........lPKphhh.....pps..shosp.Kphhhppl..-pIphhhtLpsssh.sls....sspph.El.llplpLps....p...............phs.pclh...clIppt...l...hsllh.hlp.......................h.ssphphshuh+...Rhttscts+hsl.tphatosW........................................................th.sp..lphp.h..sh......sltslYpslhpplhhhssph...................t.....................slstphtphpplppLppclscLcpchp.+-+phs++sElptcl+chcppl............................ 0 25 44 55 +14178 PF14336 DUF4392 Domain of unknown function (DUF4392) Eberhardt R re3 Jackhmmer:Q0EY62 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 282 and 585 amino acids in length. There are two completely conserved G residues that may be functionally important. 27.00 27.00 27.80 37.10 25.30 24.90 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.69 0.70 -5.61 49 194 2011-06-06 08:24:43 2011-06-06 09:24:43 1 6 147 0 85 191 48 279.00 31 62.43 CHANGED pl-pll..ssDhstcG...ltthh......ssu.hhpAup.......hL..................ppsppllIsTGFsl.....ssssETDGPsGAhsLupALpt.lGtcshlls-p.stpslpshhpshsh..ps..h...............................................tt.hpphp.shlluIERPGpusDG.pYasM+Ghslsth..ssshD....tlhhpupp...slsoluIGDGG.NElGMGsl.pptl.phlshusp..........hhusspsDpLllAsVSNWGuauLsAtLshhts.................................hl.shcpctp..hLctllptG.ulDGlotps.sho..VDGlsh.phptpllctLpph ............................................lpphh..shD.Gt.+G...ltplh..........t.st...lh.c.AuhtL.........................spsppVlls....TGFPs...........ttsstETDGPsGAlu......luthLpt.lGtcsslls......Dpp..shshhpthhpsts...psh.th.hs...........................................ttps..pp..p.hctlluIERsGpAsDG.pYaNhRuhsI.pph..ssslDpLFhtApp.......GlsohGlGDGG.NElGMGpl..cptlpphl.pGs...............IAsslpuDhhllAu..............VSNWGuYALsssLhhLts................................phl.s.hppc.ct..hLphllptG.shsGhott..shp..VDGlsh.phatphlptLh..h............................ 0 28 37 58 +14179 PF14337 DUF4393 Domain of unknown function (DUF4393) Eberhardt R re3 Jackhmmer:Q0F0Z8 Family This family of proteins is found in bacteria, archaea and viruses. Proteins in this family are typically between 254 and 285 amino acids in length. 25.00 25.00 25.20 25.80 24.90 23.80 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.25 0.71 -4.85 48 194 2011-06-06 08:50:57 2011-06-06 09:50:57 1 3 178 0 46 155 1 182.20 21 69.96 CHANGED c+lpplsp...cs..ltpss..splssssl-supat.ss-pLpchaupLlAsuhspspsppsHPuFscIlcpLossEAplLphlt................tsthshhshthhh..............pstshphhhsshhhhspp............s.pphss.lsNLpRLGLlphs...........h.h.ppthhpthcstphhpphttpht................................................thlplTshGppFhpssh ............................th......pt....h.ps....plhhssl-sspa....hspcpLpphaApLl.usuh-pp.pss.sH.uFlcIlcpLoss-AplLphlt.............................................ppp.hPhsphthhh................pststth.hhpsh.hhspt...................pphsh.lsNLhRLGLlphs.............hspps.hphh.t.s..thhpph.pth......................................................t.lploshGptFhpsC...................................................................................... 0 11 32 40 +14180 PF14338 Mrr_N Mrr N-terminal domain Eberhardt R re3 Jackhmmer:B5YBC5 Family This domain is found at the N-terminus of the Mrr restriction endonuclease catalytic domain, Pfam:PF04471 [1,2]. Fold recognition analysis predicts that it is a diverged member of the winged helix variant of helix turn helix proteins. It may play a role in DNA sequence recognition [2]. 24.00 24.00 24.20 24.20 23.80 23.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.91 0.72 -3.90 132 557 2011-06-07 07:50:11 2011-06-07 08:50:11 1 9 518 0 145 476 56 89.00 25 28.56 CHANGED sapp............hh....hPlLchl..p-.Gt...ptphp-lhctlscphpLo-c...ctpphhs...oupph.hapsRluWApoaL.......p+AGLl.cs...sp.RG..haplTcp.......GpplLtpss ..................hpphhhPlLchL...ts.sp...ststc-lhc.tl.sct..h..s..ls-c.....pts.ph.hs....SGpp..hhpsRlsWApshL.......ppAGll..pp...sp...RG..haplTpt.......Gpphltt.......................... 0 60 101 124 +14181 PF14339 DUF4394 Domain of unknown function (DUF4394) Eberhardt R re3 Jackhmmer:C1D3Q3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 262 and 476 amino acids in length. 22.50 22.50 22.70 22.80 22.40 22.20 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.58 0.70 -5.08 47 116 2011-06-07 10:49:46 2011-06-07 11:49:46 1 11 90 0 47 134 22 221.50 32 63.35 CHANGED sphLh.hsssssspstpshploG.h.ssppLlGIDaR.........PssGpLYul....usp.upl..YoIsssoGt....uohl..........uthssslsus...shulDFNPsADRLRllos...sGpNLRlssD.......sGs......hshsD.........ssLshsssshp.u..............ssslsAuAYTNshsssp.s......TsLasIDsstst..LlhQs.........sPNsGsLsslGsL...Gl..ch.ssssGFDIs....sstt......ssssuassh....u..t.s..pLYpl-.........LtTGpuo.......thus..........lss..t...ltslAl ..................s..hl..h.sspssthhpshtloG.h.s.sppllGIDaR.........PssGpLYul.......sss...uplYolsssoGs.........AThl..............uthssslsus...shulDFNPss...D.....RlRllus...sGpNLRlss-.........sGs.........hs.sD...........GsLshs.ss.t.u.............t..ssslsAuAYTNshsssp.s.......TsLasIDsshs..tLhhQs.........sPNsGsLsslGsL.....Gl.....sh...ssssGFDIt..........s.ts..s.sttAatsh....s..s..s..pLYpls.........L..t.....T.GtAo.....thup.................lss.......ltslAl................................. 0 22 37 43 +14182 PF14340 DUF4395 Domain of unknown function (DUF4395) Eberhardt R re3 Jackhmmer:C1CVW5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 142 and 168 amino acids in length. There are two completely conserved C residues that may be functionally important. 24.00 24.00 24.50 24.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.66 0.71 -4.08 65 233 2011-06-07 11:45:19 2011-06-07 12:45:19 1 8 221 0 115 246 199 131.10 30 78.15 CHANGED lD.pusRhsAslsslllllsLlsu........shh........lLsl.slsFslsshhGs+huPau....hlh..tplltPpL...ssschEsssPhRFAQhlGhlFussu.hlu.ahh.....GhssluhlssuhslsAAhLsAuhGhCLGC.lYhhlp+h ....................................................................ls.pusRhsAhlsslllhlsllss...................thh........llsh.slsFsls.shhG.s+hsPau.....hlh..t...plltsclt..sssc..h..EsssPhRFAQhlGhlFsssuhlu..ahh......................Gh.shl..uhlssuhslsAAhLsushGaCLGC.lYhhlt+.h........ 0 40 87 105 +14183 PF14341 PilX_N PilX N-terminal Eberhardt R re3 Jackhmmer:C1CZZ9 Family This domain is found at the N-terminus of the PilX prepilin-like proteins which are involved in type 4 fimbrial biogenesis [1]. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.35 0.72 -4.30 69 454 2011-06-07 12:24:02 2011-06-07 13:24:02 1 4 373 0 154 436 62 50.80 32 22.54 CHANGED pGhu.Lll...uL......llLll.lollulushps...shhpp+huuNppppppA....a.uAEuulp .....pGhs.Lll...sL......llLll.lolluluuhps...sshpp+huuN.pcpphA.....hpsAEuuLp.............. 0 45 102 134 +14184 PF14342 DUF4396 Domain of unknown function (DUF4396) Eberhardt R re3 Jackhmmer:C1D417 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 167 and 310 amino acids in length. 27.00 27.00 32.00 31.90 22.50 21.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.77 0.71 -4.07 54 240 2011-06-07 12:30:05 2011-06-07 13:30:05 1 4 225 0 106 237 1104 138.70 29 53.90 CHANGED shWppustuTlHCLsGCulG-lhuhhlsps................huhshhsth.sluhshuhhhGhhlphhslh..hpshshtpAl+tAlts-hlSlsshElu.ssshhhl...............uhshshssshFWhs.htluhhsGFlsshPhNhWhlp+GtKc .............................WppsshuTlHClsGCslG-lhuhhhhph................huhshhsph.slshshAhhhGhhlphhslh..hpsluhtpAl+pAhts-hlSlsshplu..sshhhl...............uhshshssshFWhs.htluhhsGFlsshPhNhahlc+GhKc............ 0 27 58 87 +14185 PF14343 PrcB_C PrcB C-terminal Eberhardt R re3 Jackhmmer:C1CVC9 Family This domain is found at the C-terminus of Treponema denticola PrcB, Swiss:B8YNY4. PrcB interacts with the PrtP protease (dentilisin) and is required for the stability of the protease complex [1]. 24.00 24.00 24.30 24.00 23.90 23.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.72 0.72 -4.26 96 234 2011-06-07 13:08:52 2011-06-07 14:08:52 1 11 210 0 92 220 20 60.10 26 27.35 CHANGED phall.lshGp+sTuGYulpl..pph.t.s...pssl...hlpsplhsPstsphssps.....h........TaPa...hllclpt ...................hlh.hshGppsTuGYulpl....ppl...ths......pssl.hlp...schhsPp..ssphssps.....l........TaPahllph..t........... 0 50 74 87 +14186 PF14344 DUF4397 Domain of unknown function (DUF4397) Eberhardt R re3 Jackhmmer:C1D258 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 120 amino acids in length. 26.00 26.00 26.30 26.00 25.80 25.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.58 0.71 -3.79 91 801 2011-06-07 13:44:06 2011-06-07 14:44:06 1 13 358 0 263 753 167 108.00 22 64.45 CHANGED sutlRVhH.....uuPs.......uPsVDVal..ssph.........hlsslsapshos.YlslssGs.aplpl..hssGss.s.sss.lhsssl.sltssppYTlsAssshs.....s..hph........hshsDsh...p....t.hssspu.plplhHsuPs..A.PsVDlhl ............................................s.lRhhH.....hsss.......sPsVDl.h.l...sGpt............lhpsls.ap....p....h....o......s..Ylsls.sGs.aslpl....sss..uss.............s..l..h......s.ssl.sltssptYTlhAl.spss...........s....hph...........................hhh.c..................................................................................................... 0 85 185 241 +14187 PF14345 GDYXXLXY GDYXXLXY protein Eberhardt R re3 Jackhmmer:C1CYU2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 171 and 199 amino acids in length. It contains a conserved GDYXXLXY motif. 27.00 27.00 28.70 30.30 25.70 18.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.91 0.71 -4.34 106 394 2011-06-07 14:04:33 2011-06-07 15:04:33 1 4 391 0 107 331 16 153.40 27 55.03 CHANGED lplshlshhlhpppthltpGcslhLclsPVDPRsLhpGDYhsLsY..slup.....................................tthtps...spsalhL..p..-spul...........sphhphpps...........s.tssplhlps.php...................................t.pl.....phu..h.-saahsEGpupphcp.sph.................uph+Vssp.GpulLhsLh ..............tlshlshtlhpppthLppGpslhLpltPVDPRsLhpGDYhsLsY..slss.s......................................t.p.tts........ppsalhlc...Dspsl...................................sshsphspst..........shsssphhlps.cht............................shpl..phu........h-pYahsEGpucphpp.u+h.................................................uth+Vssp.GpslLhsLh............................... 0 30 60 86 +14188 PF14346 DUF4398 Domain of unknown function (DUF4398) Eberhardt R re3 Jackhmmer:Q0F2G1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 127 and 269 amino acids in length. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.85 0.72 -3.97 51 449 2011-06-09 10:03:32 2011-06-09 11:03:32 1 13 287 0 153 393 38 104.80 24 52.41 CHANGED hsshhluG..C...Au........sshs.spphptucpAlppApss..sAsp..hs.s......cLptAppcLtpA.ctshsppchp........pAcphAppApsc...AclApspupstpsppthpchpps..lppLcp ..................................................hhhhuG.C..us.............tshs.spphstAppulppAcps...susp.....A.sh........phcpAp-pLspA...ctu....h.p.c...t.pYp........cA.+plAppAptc...AclApp+u.sspspptlpphppt..hppl................................ 0 42 68 114 +14189 PF14347 DUF4399 Domain of unknown function (DUF4399) Eberhardt R re3 Jackhmmer:Q0EYV0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 135 and 1079 amino acids in length. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.07 0.72 -3.96 72 404 2011-06-09 12:43:37 2011-06-09 13:43:37 1 27 222 0 152 401 417 86.80 32 24.73 CHANGED VpFGLcG.hslsPAGp.tpss.....TGHHHLLlDsst.........shspslPts............tptlHF.....GsG...QTEs...slp.LsPGcHTLpL.hGDttHhsa........ssslhScpI .........................................hGlps.hslsssup..hpss.....TGHHHLLlDsps.........sh.spslshs.............stlHa.....GpGpT-splp.L........sPGcHTLpL.h.uDt.Hhsh........t....o........................ 0 22 100 127 +14190 PF14348 DUF4400 Domain of unknown function (DUF4400) Eberhardt R re3 Jackhmmer:Q0F326 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 209 and 249 amino acids in length. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 30.40 29.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.20 0.71 -4.93 58 389 2011-06-09 13:00:12 2011-06-09 14:00:12 1 2 295 0 93 325 19 189.70 27 84.40 CHANGED huhhahhs-puhp+upphhppphshLusphs.cullhp..........psu..h......hh.......haphhFVc.oGhhshhpp...................s.tt.ttshphhsshhhphhpsalhushhsshhhhlRLs.lLhhslPlFlhsslsullDGLspR-lR+auuGhESualYH+A++hlh.shshs.hhlYLuhPh.ul.Phllhl...PsAlllGlslslssusFK .................................................................................hhh.s-ts...hpp.pt.hphphshLusp.hspslhhp...................pss.....................haphhhVc...oGhhphhpp..........................ttttp.t.s.pshp..h.sshhhshhpsal.AhhhshhhhhlRls.lLhhhlPlhlhhhhsulhDGLspRclR+auuGhESualYHpApthlh.shhhls.hhlYLshPh..ul.Phllhl.....PsAhLlGlulshssusFK........ 0 13 43 74 +14191 PF14349 SprA_N Motility related/secretion protein Eberhardt R re3 Jackhmmer:B2KAY0 Family This domain is found repeated three times in the N-terminal half of the gliding motility-related SprA proteins. The role of this domain in motility is uncertain [1]. It is also found in proteins required for secretion [2]. 27.00 27.00 54.90 27.30 22.80 24.20 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.77 0.70 -5.23 88 445 2011-06-09 13:24:56 2011-06-09 14:24:56 1 3 146 0 160 475 719 425.90 19 52.43 CHANGED tahshts.hh.h.tt..............hhs-P.............Yph.ahtp.......pl......................hp+Yh..................h...p.ttpshsssEc....pcsshsphpp..ah...pa+lsh...pssp......hhhG.ps..........hhlc.ps.splshss.......pahphc.P.........................olphhphahosFshshhlphushshVtuchp...sYsp...sh.s.s..sp.h..pht..............................pph-lusVslppNssh.........l..h..ssh.ptps.ss....sstlphscpsLohhlsp...........psucu+sV.....hpsps..lshcpYcchchF..............................tlGpsapsNY.p.pls.......s.....tpshshst...plW..hsNp...........s...t..sshc..h...................................ttphslhu.sslG..csh..tlpN.s..........................hsthsp.....sssslssshu-hsssshsuph...thhs.ppssp..ctppshpphsh......sth...s.phshphshh.utpp.........p..s.pasPhhsslpLpstLs ..........................................................................................hs....hhhh.tthst...........hhhss-s.............att.ahtp...................pl.........................hp+Yh.....................................ht.p..tp..shssscp.....psphschth.hspachsh....s.tp............h.hGst..........hpl.s.pGssplshGs..........ph...c.P...................................slp.tpcahssa......shsphlp..hs..uhlsuphphphsYps...sssa.shpsp..hhhshp...........................................................p.phspphplGslshp.spph............h....h..ssh.ttpstsolaGhssphpts.cptLohhlsp...........tsspspsl..phshpsp.h.lpstpYcp.pha................................hhupsah-sapp...sh...............t.....posl.slsp...plW..lsNp........s........ss.hp................................................tppRslluh.slu.....psh......h.t............................h...........ss....ssl.ssshu.shts.hp.hstph...thhs..p.p...tsp..ptppshpshsh...................t.phshh.st+t...t..papassths.h.Lp....................................................................................................... 0 73 145 160 +14192 PF14350 Beta_protein Beta protein Eberhardt R re3 Jackhmmer:B2KEQ4 Family This family includes the beta protein from Bacteriophage T4, Swiss:P13057. Beta protein prevents the gop protein, Swiss:P13058, from killing the bacterial host cell [1]. 27.00 27.00 27.30 27.20 25.70 21.10 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.24 0.70 -5.26 43 134 2011-06-09 13:40:16 2011-06-09 14:40:16 1 1 129 0 34 119 2 313.80 20 94.46 CHANGED YhPlL..+h+puEhpALpp.L..ssps+sp.lhPllplsthststpp.................pptspcltpshspt..shhsssp.......................pphhpsssthhs..hhhphhpptstp............llPVlshspss.....ph.psltpth......ttttlslRlptsphts.....phtptlspllstls.htss.phtlllDhtshp.sts.sthtthhsth......lpplsph..tapslslsuoSaPps.......hsshst.....ppstl.RtEhplapplts.......sttshaGDYusspspht......sstsspsss+lcY....ot.csp.ahlhR.stthpt.tht.......phh.sluppll...spst..a...ussaSWG.DphIpps.Aps..........ss.......GssopWhplssspHlshhlcp.l ......................YhPhL..+h+.puEhpALp.....pL...s.phtst.hhPlhpl....hs.p.................................pphhppltp.hstt...hh..sh........................tthhpsspt.ht......hhp.hpthsh.................llPVlthspss.....phhptltphh........tttlslRlp.t.th.s..........t...lspllsphs..h..s..ph.lllDhtslp.stst...th.tphhtth......lsplsp....hpplh.lsusuaPss.......hsshst.....t.tpl.RhEhplapplhp........hpshauDYushpsph.......s.thhp..ssplcY.....oh.csp.ahhhR..tthpptsht............phh.phsppll...sssp..ap..s.sp...asWG.DphItps.ups.........tss..............GssspWhplshspHlshhlcp................................ 0 7 23 31 +14193 PF14351 DUF4401 Domain of unknown function (DUF4401) Eberhardt R re3 Jackhmmer:B2KEA1 Family This family of proteins is found in bacteria. Proteins in this family are typically between 357 and 735 amino acids in length. The family is found in association with Pfam:PF09925. There is a single completely conserved residue K that may be functionally important. 25.00 25.00 25.20 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.54 0.70 -5.44 44 202 2011-06-09 14:25:50 2011-06-09 15:25:50 1 7 199 0 48 196 9 311.50 20 56.27 CHANGED sWalplh................huhuuWlAulh..hLsFlhhshhhh.c......hshllhGllhhuhAhhlh....+tp.............thFhcQhulAhsluGphhlsaGlhphh.......thhshhlhhhhlhslhhhlh.s...hlhphL.shhhshshhhhhtthhh..........h.lshhhlshhhshl......hhth.........ttstththhpslshuhhlshlshhhhhsht.hhhh.................................th..hhhhhhhhlshshshhhlhhphph.l..ssshthslhhslslluhh.h..hsulshulllllluhhtspphlhululhsllhhluhYYYsLploLLtKShhLhusGllLLshthllh+hh ..................................................hhlphh................huhuualAulh..hLhahhhhh.hh.p......sshlllullhlshAhhlh................+t..............shhhpphulshhluGp..lhhhaslhp.h...................hhhhlh....hhhlhslhhhlh.s.....hhhphL.shhhshshh.hhhshhhh.................h.lh.hhhhshhshl.......hh.hh............tthhtttlp.sltauhhlshlshhhhthh.hhhhh..............................hh....th.thhhhhhhhhshhhhhhhhhhh...h.h.hh....ptth...th.....hhhh.slhlhs..hhhh..hsulshslhllLluhhtspphlhululhshhhhluha.YYpLthoLLhKShhLhssGllLLshthllt+..h................................ 0 11 24 39 +14194 PF14352 DUF4402 Domain of unknown function (DUF4402) Eberhardt R re3 Jackhmmer:B2KBI6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 155 and 182 amino acids in length. 25.20 25.20 26.00 25.60 25.10 25.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.87 0.71 -3.68 65 129 2011-06-10 07:19:46 2011-06-10 08:19:46 1 2 54 0 66 133 13 131.50 20 75.21 CHANGED lslsps..ss........LsFG........hhsssssGoVsl..sssG....shshsGssshhsssss.....sApFslsG.psspshslols.........sshslss........sssshslsshs.................sshstssthsssG..............stshslGGoL.......sls.usps..sGsYo.Go..asVoVsY ..............................................................plsps..ts........LsFG........hhss.ssssslsl.....sssu.....hs..sssh..hsssss..............sApFslsG.ps...spslslols...............sshslss..........ussshslsshs.................................sthsssshhsssG...................stslplGGoL..........sls..usps......sGsY.o.Go..hslsVsY...................... 0 21 42 59 +14195 PF14353 CpXC CpXC protein Eberhardt R re3 Jackhmmer:B2KC69 Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea, and is typically between 122 and 134 amino acids in length. It contains four conserved cysteines forming two CpXC motifs. 24.00 24.00 24.00 24.80 23.90 21.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.84 0.71 -4.18 43 128 2011-06-10 07:51:50 2011-06-10 08:51:50 1 3 105 0 30 113 9 122.00 21 47.72 CHANGED plsCPpCtpphchclhstlssspcsch+cpll.sGplhphpCPpCGpphhl..shshlYpD.p.c.chhl...hhhPc......tphppthphhtshph.........t.tttthphRllhshsplhEKlhIh-sGl-....D+hlElhK .......................lsCPpCtpthphphh.shlssspcPchppplh.ssphhthpCPpCGpph.t.l..shshlYhD.p..c.chhl...hhsPp...........p.ttpphphhtthp...................htthphRhshshtplhEKlhlhcsuhscchhEhhK...................................................... 0 18 30 30 +14196 PF14354 Lar_restr_allev Restriction alleviation protein Lar Eberhardt R re3 Jackhmmer:B2KDC9 Family This family includes the restriction alleviation protein Lar encoded by the Rac prophage of Escherichia coli, Swiss:P33229. This protein modulates the activity of the Escherichia coli restriction and modification system [1]. 28.60 28.60 28.60 28.70 28.50 28.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.92 0.72 -3.67 63 206 2011-06-10 08:38:13 2011-06-10 09:38:13 1 6 176 0 44 148 11 60.10 35 47.56 CHANGED tcLcP......CPFC.Gssslthps.......ppt.t.............................hhlt..........Ctp......C....Gu.................tsshtpscpp....AlctWNpRs ...............cLKs......CPFC.Gssplhlcs.......s.shs.....................................hhsc...Cst.......C....tu....................csshsssttt........AhcpWN+R......... 0 10 24 35 +14197 PF14355 Abi_C Abortive infection C-terminus Eberhardt R re3 Jackhmmer:B2KEQ6 Family This domain is found at the C-terminus of the Lactococcus lactis abortive infection protein Abi-859, Swiss:Q48620. This protein confers bacteriophage resistance [1]. 24.00 24.00 24.40 24.50 22.10 19.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.67 0.72 -4.04 92 373 2011-06-10 09:57:38 2011-06-10 10:57:38 1 2 341 0 82 267 10 78.90 23 29.98 CHANGED lspLh..phspptLshtsspttp..........t.hppll...sultsll........pulsslRN.ctu.suHG.....ts....pthtlsspcAcLslssusolst.allcs ....................................................h..pLh..ptshphlthp.ppp.t..........p.l+pll....puhssll....sulsplRN.ctu..suHG.....pst.....pthtl.s.pcc.AcLslssusolstalhc.... 0 18 51 68 +14198 PF14356 DUF4403 Domain of unknown function (DUF4403) Eberhardt R re3 Jackhmmer:Q8KCF8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 455 and 518 amino acids in length. There is a single completely conserved residue W that may be functionally important. 27.00 27.00 46.10 46.00 24.40 22.90 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.13 0.70 -6.18 41 97 2011-06-10 10:29:55 2011-06-10 11:29:55 1 1 88 0 59 107 9 419.20 20 89.72 CHANGED stshSslslPlpls..lsslpphlNpplstphhspssh.ts....................ptplphplsRtGslslsus....ssplhhohPlpsphphthuhtshh.th.......................shstssphssslshsspsslsssWclssphp.....shshhppsslslu.sh+lslpshlcshlcpthpclpstl-pplppshsL+pplpphWpphpcPl.lsp......s.ssWLclpPpclhss...phsspslphslGlpupspsssus.pPt.....hPLPshphhs..shssphclslssplsas-lsphlspph...ts+sFs...sssh.plplcslplhu.ssc+Lhlsl...pspus.........hcGslalsG+PsaDssspplplpDl-aslcocu.h.Lhpsushlhpstlpctlppph...shslpsplspsppplpptlup..psspslclsuplpslphsclhhsscslplhlpAsGpsslplp ...............................................s..t.ShlslPlpls..lsslpphhsptlstphhtcss..ss....................ttplphpl..h.R.Gslslsss....ssplhhshPLphphphthuhtshh...........................hstssphpsslphsspspls.sWplpsphp.....shshhpssslplu.Ghclslss.lcshlcpthsphtstlspplpcshsL+ppspphWp.phpcPl.lsp........shWLplpPpclhss...phss.sslphslGlpspschssus.pPt.....hslPs.hhths....shssthplslssplsas-lscllppph...tu+sFs...........ssshplplcplslhu..sss+Lllslplpup............hpuslalhGpPhhDstspplplpDlchslcocs.h.Lhpsushlhpstlpptltpph...shslpstlspspppltptlsp..pstpGlplsuplpslplsclthsssslplhspApGplplpl........ 0 21 40 52 +14199 PF14357 DUF4404 Domain of unknown function (DUF4404) Eberhardt R re3 Jackhmmer:Q8KFC4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two completely conserved residues (P and G) that may be functionally important. 27.00 27.00 27.00 27.90 26.80 26.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.83 0.72 -3.57 40 107 2011-06-10 10:45:52 2011-06-10 11:45:52 1 1 105 0 38 91 6 83.50 38 92.69 CHANGED cLpppLppL+ppLp...psss.lD-sppstLpsLtc-IpphLpp...tsssttpcpsls-plsp.Alp+FEscHPpLutslcplhsoLusMGl ...........pLpcpLssLRcpL-...psss.ls.Epp+tpLcpLhppIEtplp........tstt..p-soLsDslNh.AlERFEspHPsluuTLRsIlpoLusMGI. 0 9 17 30 +14200 PF14358 DUF4405 Domain of unknown function (DUF4405) Eberhardt R re3 Jackhmmer:Q0F3T2 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. There are two conserved histidines that may be functionally important. This family is N-terminally truncated compared to other members of the clan. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.14 0.72 -3.84 112 320 2012-10-03 10:28:09 2011-06-10 13:45:22 1 10 262 0 150 342 33 63.50 23 31.21 CHANGED lphhlshhlhlshlhlhloGlll.hts.shh........hhuhstthhc.plHhhsuhhhhlhhslHl.hlpW ...........phhlshhlhlshlhhhlSGlll.hhs.shh......................hhshs.phhp.pl.HhhsuhhhhlhhslHl.shpW..... 0 53 92 121 +14201 PF14359 DUF4406 Domain of unknown function (DUF4406) Eberhardt R re3 Jackhmmer:B2KDC7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 98 and 145 amino acids in length. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.10 0.72 -3.91 41 334 2012-10-02 19:28:18 2011-06-10 13:54:17 1 3 282 0 34 299 91 89.40 37 77.64 CHANGED lYIAGPMo..Ghs........-hN+ssFppsAccL+tpG...ahVlNPAph......Pssh.......s......acpYM+lslshLh.sCD..sIhhLsGWppScGAphEhtlAcpLGhplhht ..........................................................hYluGsMo....Gh...........saN+..AFp.p.stptL......+....p..cG....asV..l..NPAhh.............s-uh.......s...........scYM.c.hshshLp..ssD..sI.Y.h..L..p..GWppStGAptElslAc+LGhsVhh.t........... 0 12 23 30 +14202 PF14360 PAP2_C PAP2 superfamily C-terminal Coggill P pcc Jackhmmer:Q56Y01 Domain This family is closely related to the C-terminal a region of PAP2. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.79 0.72 -3.71 54 511 2012-10-02 00:53:37 2011-06-10 14:41:37 1 8 174 0 334 550 26 72.40 38 19.26 CHANGED tCGDllaSGHThhhslshhhhhcY....ssphh........h+hlhhllshhshhhIlhu+tHYTlDVllu..halsshla.hhYHt ....................hCGDhhFSGHTlhlslhhhhlpcY.............ss+ph.........................h+hls.Wllshh...u.hhhIlsu+..cHYolDVllAaYlostlF.hhYH................ 0 123 175 264 +14203 PF14361 RsbRD_N RsbT co-antagonist protein rsbRD N-terminal domain Eberhardt R re3 Jackhmmer:Q8KAB8 Family This domain is found at the N-terminus of a number of anti-sigma-factor antagonist proteins including B. subtilis RsbRD, Swiss:P54504. These proteins are negative regulators of the general stress transcription factor sigma(B) [1]. It is found in association with Pfam:PF01740. 23.00 23.00 24.10 24.10 22.50 22.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.06 0.72 -3.58 74 220 2011-06-10 14:14:43 2011-06-10 15:14:43 1 6 201 0 98 227 5 103.70 21 39.90 CHANGED slt..clLpc...ccpplLppWhpph.tphstcssth.ptc.hpp.ssslhpsltpulpsh.......hDhhssph......pplpphLssls+hRAhQGFoPscsssalasLKpslhchlppp .........t.h.phLppccptllppWhcth.tp....hs....hp..sst.h..c.pc.hcppspslhpslspulpps..........hchtsssh.........splcphLsp...lu....+sRAhpGFoPscssshlhuLKpslhphltt............................ 0 39 65 88 +14204 PF14362 DUF4407 Domain of unknown function (DUF4407) Eberhardt R re3 Jackhmmer:Q8KC28 Family This family of proteins is found in bacteria. Proteins in this family are typically between 366 and 597 amino acids in length. There is a single completely conserved residue R that may be functionally important. 28.30 28.30 28.90 29.00 27.60 28.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.66 0.70 -5.46 54 204 2011-06-10 14:37:50 2011-06-10 15:37:50 1 6 166 0 93 182 38 296.40 23 61.13 CHANGED s.slLcp.sss-psKasulGAslhhTAlhAsluuuaAlhhshsss.............hhulhhG.llWGlhIhsLDRaIVsohp+pst...............hhpphh.uhPRlllAlllulVIScPLpL+IF-pEIsppltptppppttphhsplss...........................thssphsthpsplsslppphsptpsplsthhpthpsEhpGs............ssoshhGhG.shhcp+ppphcstt.......splpslpsp...ssthsthc......................pphsphptsppthhsppp.t....sstsGlhsRhpALscLs.....tts..hhhsphhlhLLF.lulEhhPllsKLhsstus...YDhtlppp-pt .............................................t..........t-cspasshGshlhhsuhhA.slsuuhA.l.tts....hpss.........................hhul.hu.llhGLhlhsl-Rhlso....u.t+tps.....................................h....h..t..sss..RhhlAlhlu..hVluc.ltLhlFp..spIs...pclpcp.t.pptpst.stlps..................................................ssplpphpsth...ssLcsplpptcsclspth..shhpsEhpss.................ptoGhsGtG.P.scptpphh.sstp...........................................tcLsshhsp...hspptstlc...............................................pptptltpsppt..hhst..............sstpGhhsRh.AhscLs......ts..s..hhhsphhhhhhh...hhlthhPllh+Lhpstss...aDhthttpt..p........................................................ 0 31 59 77 +14205 PF14363 AAA_assoc Domain associated at C-terminal with AAA Coggill P pcc Jackhmmer:Q9M0V8 Family This domain is found in association with the AAA family, Pfam:PF00004. 22.10 22.10 22.10 22.50 21.50 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.34 0.72 -3.94 102 409 2011-06-10 15:51:54 2011-06-10 16:51:54 1 12 26 0 238 417 0 98.40 24 20.91 CHANGED hlP...t.p....lcp.....h.....l....t...shhtph.........hs....th.hss.hh.slsIpEhs.....u...........hp....pN............c.............hapAscsYLusph..ssp.....s....c+L+sspsc...pspt...................hslolcc.s-clsDsFc.GlclhWphsspp ..........................hP..plpth.....l....h.....phhtph.........ht....h...hs.shh.plslpEhs.....u...........hp.ps............c.................hapAspsYLu.s.p.s..ssp.......s....c+L+..sstsc.p.sps...................lslshcc..s-clsDsF.....p.....Gspl.hWphh...p............................... 0 25 137 200 +14206 PF14364 DUF4408 Domain of unknown function (DUF4408) Coggill P pcc manual Family This domain is found at the N-terminus of member of the DUF761 family Pfam:PF05553. Many members are plant proteins. 21.50 21.50 21.50 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.57 0.72 -4.39 37 166 2011-06-10 15:57:58 2011-06-10 16:57:58 1 2 20 0 93 164 0 36.80 33 13.20 CHANGED hPplaushtua.............lsPshL.FlllNll...IhsIsupS+h .....................................hhsshtsa.......................hoPshL.FlllNlI...IlsIssoS+h.. 1 11 54 74 +14207 PF14365 DUF4409 Domain of unknown function (DUF4409) Coggill P pcc Jackhmmer:O64569 Family \N 23.90 23.90 23.90 24.10 23.50 23.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.73 0.71 -3.92 35 420 2011-06-10 16:12:37 2011-06-10 17:12:37 1 21 28 0 255 418 0 104.60 32 27.44 CHANGED oIpSPD.GDlIDCV.hp+QPAFDHPhLK....s..hp....hcPs.p.pPcuhh.tc........p....p...........ppps.....spshsQlW+psG.c.CP-GTlPIRRTpc-...DlLRA..sSlp+aG+Kpppsh..sts.p..t..ss..hspsGHcHAluYs .....................lpo.D.GDllDCVshpcQPAFDH..P.hLK..........s...hp......hcPs.....hPpsht.tp........................................ttt......tp.hhphWp.pss...p.CPcGTlPIRRs.pcc.......DllRs....p....S....ht....phsh..+..t................................................................................................. 0 51 132 159 +14208 PF14366 DUF4410 Domain of unknown function (DUF4410) Eberhardt R re3 Jackhmmer:Q8KCX4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 238 amino acids in length. 26.00 26.00 26.00 26.10 21.60 25.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.53 0.71 -4.43 38 82 2011-06-13 11:52:46 2011-06-13 12:52:46 1 1 73 0 41 82 7 130.50 18 64.47 CHANGED lhVtsF...thpstth....t...........................ttstpppppstphscphusplspc.Lppp..ulhutpsssss....tsslllcGshtphctGspttRhhlG.hGA...Gpoplpsslplhs..................soG+hsuth.ssssssuuu ..................................................................................t...........................ptttp.ppphtt.hcphtpplhppLppp...slhsttsssss.....tshhlpsplpshchGssttRshlG.hGA...Gpsplsuslplh-..................sss+..sthpsssshuhuu................... 0 8 17 33 +14209 PF14367 DUF4411 Domain of unknown function (DUF4411) Eberhardt R re3 Jackhmmer:Q8KAB2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 153 and 170 amino acids in length. There is a single completely conserved residue D that may be functionally important. 23.50 23.50 23.70 23.50 23.40 23.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.13 0.71 -4.53 58 237 2012-10-03 20:43:45 2011-06-13 13:14:28 1 1 225 0 61 213 14 150.70 26 96.66 CHANGED Yl.LDoNlhI..putppaYs.h-lsPuFWchLpphhp..sGplhohcpVt-Elpps.....sD-Ls.pWscppts...hhpss-...tshpphtplhpa....psspaptss....ttah.......shADsaLlAtAhs.pst.....slVTpEt......tsstpp............phKI.PslC..pthsVphhshaphl.+chshpF ............................................................Yl.hDosshl...........puhpphYt...shhsuhWphltphhp..ssplh..shctVtcEltpt.............sDchp..cWhctpts..........hhhs...t-........tph.pthtclhph....sppthppt...................ptADsalIAhAhs..pst...........slVTpEpt..........sss.pp............th+I.PslC..pth.s..V...hlshhphl.cp.shp............................................ 0 23 39 52 +14210 PF14368 LTP_2 Probable lipid transfer Coggill P pcc Jackhmmer:O49645 Family The members of this family are probably involved in lipid transfer. The family has several highly conserved cysteines, paired in various ways. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.36 0.72 -11.38 0.72 -3.85 72 1249 2012-10-01 19:46:35 2011-06-13 13:37:29 1 18 64 3 722 1872 6 97.10 20 61.47 CHANGED hlA..h..hhsssts...s....sss....................sssssC.......sss...hh...ss............hs...h.........hs.....s...s...ssPsssCCssl+s..h.....h.t.sp..ssClCthhss.s..............ts...h.sls.hspuh.tLs.ptCsls..sss.....tC .................................................................hhh......................ts..........................ss.s.s.sC........ssp.....Lh...ss.................hs...h.........ls............s.s.....ssPossCCsslcs..h....................t......ss...........tCLCthlpsss...........................hs..h.sls.h.s..pAh.t.l.P.stCslsss......C.................................. 1 104 420 582 +14211 PF14369 zf-RING_3 zinc-finger Coggill P pcc Jackhmmer:A0MEB9 Domain \N 29.60 29.60 29.60 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.18 0.72 -3.95 96 381 2011-06-13 12:47:51 2011-06-13 13:47:51 1 8 114 0 221 364 0 33.60 39 9.85 CHANGED spYWCapCsphVpl..t..tss........s...CPpC.s.u.GFlE...El ...paaCapCsp.Vph..t....ssh.......h....CPpCp.u.GFlEEl.... 0 46 116 169 +14212 PF14370 Topo_C_assoc C-terminal topoisomerase domain Coggill P pcc Jackhmmer:B3H4K2 Family This domain is found at the C-terminal of topoisomerase and other similar enzymes. 25.00 25.00 27.80 27.80 23.30 23.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.40 0.72 -4.32 47 743 2011-06-13 13:44:24 2011-06-13 14:44:24 1 10 600 16 275 707 30 65.80 64 12.18 CHANGED lcc+lcphplphpsK--NKpVALGTSKlNYlDPRITVAWCK+a-VPIEKl.FoKTLRc....KFsWAhcs....cpcap....F ....................L.c-QLhKLElQtTD+.....-ENKslALGTSKLN...YLDPRISVAWCKKa-VPIEKI.aN.......KT.R-..................KF.WAht......................... 0 90 146 221 +14213 PF14371 DUF4412 Domain of unknown function (DUF4412) Eberhardt R re3 Jackhmmer:Q8KCD3 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is typically between 75 and 104 amino acids in length. 28.70 28.70 28.90 28.80 28.30 28.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.94 0.72 -3.47 91 199 2011-06-13 13:44:51 2011-06-13 14:44:51 1 7 134 0 99 199 11 89.20 21 35.07 CHANGED hpchG.scslsGhpCp+hcl................tssptptph...Whop-...............................................tGhPlchptpsssssh................htslchpsh.ssuhF..plPs.GYp ..........pchG.scslsGhsCpcaph.....................tssptpsph...WhopD...........................................................................................Ghslphptpssss.s...t.......................hhplphts..ssuhF..plPs.GYp.................... 0 51 80 93 +14214 PF14372 DUF4413 Domain of unknown function (DUF4413) Coggill P pcc Jackhmmer: Family This domain is part of an RNase-H fold section of longer proteins some of which are transposable elements possibly of the Pong type, since some members are putative Tam3 transposases. 27.00 27.00 27.00 27.00 26.60 26.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.38 0.72 -4.03 79 377 2011-06-13 14:32:42 2011-06-13 15:32:42 1 27 37 0 208 358 0 97.30 25 17.07 CHANGED lSusphPTuNhaapplhclct.......hL.pc..h.p.sp...-.s.h.lp.s.hspsMppKFcKYW.......c......phshh....LslAslLDPRhKh....p.h...lcathpph...aGp...c.sp...ph...l.pcVcsslpcLappYp .......................................................ss..sTus.hhh.phhplph..........................hL...pp...h...t..sp.......s..h.lp.s..hsppMpp.......KFcKYW..........................p............phshh....lslAslLDPRaKh....c.h...lcathpph......hut....c...st...ph...lpplpptlpplappY.................................. 0 9 80 136 +14215 PF14373 Imm_superinfect Superinfection immunity protein Eberhardt R re3 Jackhmmer:Q8KCM5 Family This family includes the E. coli bacteriophage T4 superinfection immunity (imm) protein, Swiss:P08986. When E. coli is sequentially infected with two T-even type bacteriophage the DNA of the superinfecting phage is excluded from the host, into the periplasmic space. The immunity protein plays a role in this process [1]. 27.00 27.00 27.20 27.20 26.80 23.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.03 0.72 -4.32 60 264 2011-06-13 14:42:33 2011-06-13 15:42:33 1 5 229 0 62 205 39 42.80 40 38.53 CHANGED lYFlPsllAhhRp..+ppthsIhllNlhLGWThlGWlsALlWAlp ..........lYFLPsllAhtRc..+psphsIhllN....lhhGWThIGWllsLhWuh......... 0 17 30 48 +14216 PF14374 Ribos_L4_asso_C 60S ribosomal protein L4 C-terminal domain Coggill P pcc Jackhmmer:Q9C6F1 Domain This family is found at the very C-terminal of 60 ribosomal L4 proteins. 27.00 27.00 27.20 28.20 23.70 23.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.60 0.72 -4.10 64 472 2011-06-13 14:55:20 2011-06-13 15:55:20 1 6 336 8 266 449 5 78.60 42 21.13 CHANGED psYpLPpshhsNsDLsRlINS-EIQullRsPptpspp+...ltKKNPL+Nhpshl+LNPYAtsh++pplhsppppttt+ttt ......psYsLPpshMsNsDLoRllpSsEIQpslRss.......+ppsp++......shK.KNPLKNhplhL+.LNPYApsh++ptlhtpttphttp...h............................. 0 101 154 217 +14217 PF14375 Cys_rich_CWC Cysteine-rich CWC Eberhardt R re3 Jackhmmer:Q8KFY4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 74 and 102 amino acids in length. It contains eight conserved cysteines, including a conserved CWC sequence motif. 24.00 24.00 25.10 25.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.19 0.72 -4.04 76 321 2011-06-13 15:01:50 2011-06-13 16:01:50 1 4 317 0 117 296 51 50.50 33 57.01 CHANGED CPhCGsshpCu...........ttstssCWCsshshs.shh......tth.t.......................psClC.sCLpph .........CPtCGpshpCs............tss.tsCWChs..hshtsshh......tth.tth.....................psCLC.pCLpp.h.. 0 25 54 87 +14218 PF14376 Haem_bd Haem-binding domain Eberhardt R re3 Jackhmmer:Q8KBS9 Family This domain contains a potential haem-binding motif, CXXCH [1]. This family is found in association with Pfam:PF00034 and Pfam:PF03150. 27.00 27.00 27.40 37.50 26.80 25.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.79 0.71 -4.48 89 710 2011-06-14 08:17:18 2011-06-14 09:17:18 1 4 650 0 123 370 30 137.10 48 34.47 CHANGED llshlslQhhshpps...........ssshtp.hpsspplptlhcpuChDCHSspTpaPaYuplsPsuahlppclpcG+cphNhopatsh...........spptppscLsclhphlcpscMP.tpY..hh..hH.pAcLopp-+ptllsWlpptp .................................h.lsYLGLuGYVaaaDppRucp...ucspu...SsluENsclluhl+EKGCDYCHTP.S.A.ELPh.YhhlPsAKQLMDYDIchGhKu..F....NLE..sVcuAL........hsspPVspS-L..sKIEhVhphpTMPPoRY..sA..L..HWuu+lSD-ERs-ILsWIucQR...... 0 33 82 109 +14219 PF14377 DUF4414 Domain of unknown function (DUF4414) Coggill P pcc Jackhmmer: Family This family is frequently found on DNA binding proteins of the URE-B1 type and on ligases. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.90 0.72 -4.13 25 383 2011-06-14 09:48:55 2011-06-14 10:48:55 1 45 242 0 276 400 2 103.50 29 3.48 CHANGED IDPsFLtALPE-lRcEVltppls............t..p...sssspsss-lss...........EFLsALPs-IppElLpQEptpppRppppttsp..............ss-MDsAShlATlPPsLRcpVLh-psE ......................lDPpaLsALP-DlRpEV...ltpp..ht.ptsp.................t..p.......sss.....ss...t..ss-lsP.........................................................EFLsALPs-lppElLtQpptppp.ppp...ttttt............................................................hD.sshh.sh...tlRtthh......................................................... 0 75 144 227 +14220 PF14378 PAP2_3 PAP2 superfamily Coggill P pcc Jackhmmer:Q9LVZ7 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.27 0.71 -4.78 12 684 2012-10-02 00:53:37 2011-06-14 15:22:55 1 10 432 0 300 1471 163 172.80 19 49.31 CHANGED shslhsh-cslthshptsLpph.lsp+PhL.slhush..sohhhththhhlhashhsc.s+hRshhshhhhhshhullshshh.shPs....phLsst.sF.s.hhshsssuha..th.t.h..............hhPShHsuhAhhsulshh+.hthhthhh.lssshsllhslsllustsHYhlDhssGssluhlhhtL .................................................................................................................................h.........................h.......h................h...hhshh..Y........h...h..h.....h..h.h.h..h..l.h...h.....h..h...h...t....c....t..p..............h....p....t....h.....h..t..sh.h...hs..s....h..h..uh..l...h...a.h.....ha.P.s.s...PP..............................th.h.............................t.......s......h..............................s....h....h....t..........s......h..s.s.h.......s....h....h...........................t.........h......tth.......................ss.h...s..A...hPSlH....s.u...h....u....l....h..h......u...l.......s.......l..........h.....p.....h.....t...........t.............t.............h.........h..............h...h....h.........l....h.h....h.a.sh.h.h.hh..s..sls..s..upHYhlD.s.lu.Ghhhshhh...hh...................... 0 97 194 260 +14221 PF14379 Myb_CC_LHEQLE MYB-CC type transfactor, LHEQLE motif Coggill P pcc Jackhmmer:Q9LG13 Family This family is found towards the C-terminus of Myb-CC type transcription factors, and carries a highly conserved LHEQLE sequence motif. 27.00 27.00 29.00 29.00 26.90 26.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.69 0.72 -4.38 46 474 2011-06-14 14:51:50 2011-06-14 15:51:50 1 10 53 0 259 459 0 49.60 52 15.20 CHANGED uhpls.EALc....hQhEVQ++LHEQLE..lQ+pLQ.lR.I......EupG+aLppllEctpcshu ...............hpls.EAL+........hQMEVQ++LHEQ..LE........VQRpLQ.LR.I.......EAQGKYLQpllEctpch..s...... 0 43 156 213 +14222 PF14380 WAK_assoc Wall-associated receptor kinase C-terminal Coggill P pcc Jackhmmer:Q5XVH3 Domain This WAK_assoc domain is cysteine-rich and lies C-terminal to the binding domain, GUB_WAK_bind, Pfam:PF13947. 27.00 20.00 27.00 20.20 26.90 19.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.86 0.72 -3.48 53 366 2011-06-14 15:07:24 2011-06-14 16:07:24 1 27 20 0 205 376 0 92.30 23 22.47 CHANGED hsCssps.....s..st.....sh.hh...h........s...s.................s....s.........hsttCppsltlPVhssth..tt.tt.......ssss...htpsLcpGFpLpWp......tsssCppCts........SuG.pCGasps.........s.ptFsChCsDsh ...........................................................................................................t.....h.ttC..p.....h.lPV...tt..h.t..t...........s.ss....htpsLppGFpLpap.....sssCppCps........S..GG.tCGasps...........t..tta.tCh.Csst................... 0 22 113 156 +14223 PF14381 EDR1 Ethylene-responsive protein kinase Le-CTR1 Coggill P pcc Jackhmmer: Family EDR1 regulates disease resistance and ethylene-induced senescence, and is also involved in stress response signalling and cell death regulation [1]. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.55 0.71 -4.75 46 364 2012-10-10 12:56:15 2011-06-14 16:27:23 1 45 119 0 219 341 1 179.40 30 21.50 CHANGED sssu-slSpRaWspssLsY...t-KlsDG..FYslhGh........t.....pts+hPSLpsLcuhs.s.s..ssshEllLVDRptDspLpcLcphAhsls......pss.....ss.ss.....ptllp+LApLVsspMGGssts.sps.hhs....p.Wpp..op...L+s...ppusls..lGpLslGLsRHRALLFKVLADsl.....slPCRLVKGs..su..........c-suhslV+hs.s....sREalVDLhusPGsLh.P ........................................................................................hu..hh..s..ls.....p.l.sG..FY.lhs...........................................phPsh.pL...pt......tt.th-slllst...D.tL.t.l.phs.t.hh..........................ts..........ss.....tthlppLAplVscpMG...........Gss.....s..tp..hhh........pap....hSp......ph+p.........psssls..lGplp....hGl.sRHR.ALLFKlLADpl.......s..l.PC+LV+Gphhsu..............pstshsllt..ht.s..............sp...E.....alVDLhttPGpLh.................................................... 0 41 119 175 +14224 PF14382 ECR1_N ECR11_N; Exosome complex exonuclease RRP4 N-terminal region Coggill P pcc Jackhmmer: Domain ECR1_N is an N-terminal region of the exosome complex exonuclease RRP proteins. It is a G-rich domain which structurally is a rudimentary single hybrid fold with a permuted topology. 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.74 0.72 -4.66 79 613 2011-06-14 16:29:30 2011-06-14 17:29:30 1 16 360 11 439 594 35 39.40 35 14.59 CHANGED lllPGp.hLus.s.s.pa.h.s.GpGTYh......cs.splhASlsGhlplss ....llsPG-.hls......s.s.s.p.....a.h...t.GcGTYh.......................ps..s......plhuSluGhVph....... 0 131 226 350 +14225 PF14383 VARLMGL DUF761-associated sequence motif Coggill P pcc Jackhmmer:Q1PFF9 Domain This family is found frequently at the N-terminus of family DUF3741, Pfam:PF12552. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.56 0.72 -4.78 10 212 2011-06-14 17:36:40 2011-06-14 18:36:40 1 5 19 0 148 189 0 29.80 43 3.81 CHANGED suosssscuhuh+uPulVARLMGLDSlPssppsp .........t...............+sPulVARLMGLD.uLPs............ 0 20 93 122 +14226 PF14384 DUF4415 Domain of unknown function (DUF4415) Eberhardt R re3 Jackhmmer:Q8KF70 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 82 and 104 amino acids in length. 26.10 26.10 26.10 26.30 25.90 26.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.08 0.72 -3.81 196 801 2011-06-15 08:14:51 2011-06-15 09:14:51 1 2 572 0 237 649 74 66.50 32 69.83 CHANGED Dhsphsct...tpht..............................................Kp.p..lolRlDsDV.....lcaF+u..p.GcG......aQo+lNtsL......Rchh .................................................-.s..sct.h.tpAp.ht.....................................hhpPh.....sKp.p..lolRlDsDV.....l-aF..Ku.....s..GcG......aQoRlNthLRchh.............. 0 53 137 182 +14227 PF14385 DUF4416 Domain of unknown function (DUF4416) Eberhardt R re3 Jackhmmer:C9RKS1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 176 and 187 amino acids in length. There is a conserved DPG sequence motif. 27.00 27.00 43.60 96.40 19.40 19.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.83 0.71 -4.53 29 85 2011-06-15 09:02:56 2011-06-15 10:02:56 1 2 82 0 53 88 19 163.40 33 87.51 CHANGED tst.spLlh.slhss...pphhpp..lhttLpc+FGsl-hhSs.hsFc.aTsYYpcEMG.sslh..++hluFccLls.-pLs-lKlhTNplEppaut.cup....RplNlDPGYlstpplVLAosKsasHRlYLscGIaA-lTLhYppGc.apshsWTYPDYpspphtpaLsplRch..Y .........................h....spLlh.ulh.hs...cthhpp..lhstLpppFGslchhSs.hsFp.aTsYYpc.EMG.ps..Lh..++hluFccLlp.ppLsclKhtosplEcpaup..puc....RplNlDPGYlstpplVLAosKsasHRIYLscGIYAElTLhYpcGc.apshsWTYPDY+spchtpahpplRchY...... 0 29 46 48 +14228 PF14386 DUF4417 Domain of unknown function (DUF4417) Eberhardt R re3 Jackhmmer:C9RLL4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 220 and 340 amino acids in length. There is a single completely conserved residue G that may be functionally important. 27.00 27.00 31.00 30.60 26.90 26.60 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.13 0.71 -4.98 22 148 2011-06-15 09:37:46 2011-06-15 10:37:46 1 2 121 0 11 123 1 194.50 25 76.35 CHANGED pll.p................h.hsuphclPhlt.sphhsts................hlsFshtpppcp.....................pstslHFalcDhpFcclassPc.......phlppLpp..apulloPDaSlYhDhPhshQlaNhYRs+hlutahQppGlpVIPslsWuspcoa.......casFcGl.sctuslAloohG.....phpspcpcphFhpGhpchlp+l.pPtplllYG....th..pphass.....hpllphpsatpp ...........................................................p.......h..psthplPhlp.sphhs.t................hhsashhtpp.p...............................psshlHFahcDhpFcp...laspsc.......phlpclpp.......asullsPDaSlahD......hPhshplaNhY+sRh.lutahQ.ppGlpVIPslsWuspcoa.......casFcGl..ctuhh..AluohG.....hhpsppphphahpGlpchlc+l.pP.phlll.YG.....th..c.hatp..t..hphhph.sh...p........................................... 0 6 9 11 +14229 PF14387 DUF4418 Domain of unknown function (DUF4418) Eberhardt R re3 Jackhmmer:C9RQ28 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 132 and 150 amino acids in length. 27.00 27.00 43.30 43.30 24.30 24.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.07 0.71 -4.38 32 145 2011-06-15 10:24:38 2011-06-15 11:24:38 1 1 133 0 26 122 2 125.30 29 87.92 CHANGED hl.lllGlLlslsPhh.hhslCss..h.tst..................hMtCaasupAhhslGsllhllullhhhh..ppthchuLslsshshulls...hLlPshl...............IGlC.ttstMtC+sh.ohPslhllullhllhuhhs...hahpcc ..........h..tlllGlllsluPph.hAssCts..h.pst.....................................hMtCaaoupushslGllIhllullhlhl..sttl+huLsluslslulhs...hllPssl...............IGlC.ttstMpC+sh.ThPhlhllullhllhusls...lahpp.p....... 0 17 19 22 +14230 PF14388 DUF4419 Domain of unknown function (DUF4419) Eberhardt R re3 Jackhmmer:C9RQB5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 348 and 454 amino acids in length. 25.00 25.00 26.90 26.70 21.30 21.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.12 0.70 -5.20 59 188 2011-06-15 10:35:06 2011-06-15 11:35:06 1 7 78 0 151 195 7 271.50 28 70.40 CHANGED ssuFlpu.shpAYupHhsLhlpPD-lWhsIlsphuhalsspu..EpLRphFVsa.cGKcpLhVp....stssh.p..............sasplh.chsppIpcpltss.h....cchhh..s.sFSTTTtsD+lsuslshMush+pYFcYp.......hhhhCGlPs..........VTLLGph-DWppLtp+lc+Lt-.....as.p..............hppWt.phLpPllccFlpohcsps....s.......hpFWspIschpus...uuGssh..l......oGWIosFhhasp..pGphhttttt............................................................ts.h.hlshpplPsGhsplPlplp...s.stt...hptphhuGhhGhph.....ppp............................psslpPthsWhlh ...................suhlpuhhpAYspHptLhlcPD-lWhsIhpQhshal....Ntpu..E..plRphFVsa..pG.KcpLhVp........ssh.p..............satthh.phsppI.pppltss.l.......tshlh...P.sFSTTTtsDphsuslhhMu.sh.ppYFpat.........hhhhCGl..Pp..........lTL.GphpDWtplhp+lccLt-.......as.t...................hptWh.phLpPllcpFlpuhcsss........s............hpFWpplhchpst...........huGssh.....l.....sGWl.ssFhhaspcGph.t..................................................................................h.hls......hpplPsuhspsslhhp...p..t......h.h.hhuGhhGhph.....pp............................t.slpP..sWhh............................................. 0 78 120 139 +14231 PF14389 Lzipper-MIP1 Leucine-zipper of ternary complex factor MIP1 Coggill P pcc Jackhmmer:A8MS81 Family This leucine-zipper is towards the N-terminus of MIP1 proteins. These proteins, here largely from plants, are subunits of the TORC2 (rictor-mTOR) protein complex controlling cell growth and proliferation [2]. The leucine-zipper is likely to be the region that interacts with plant MADS-box factors [1], 26.00 26.00 26.00 26.40 25.80 25.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.98 0.72 -3.88 39 340 2011-06-15 10:44:33 2011-06-15 11:44:33 1 11 25 0 185 326 0 83.90 31 13.80 CHANGED pcpps.p..pp+t....sLEp-VtpLpcpLpcEpslRtsLEpAlsp.......ssus.hsp.hst.plPppspELlpEIAhLEt-VspLEppl.sLttplhppp .............................t......ppt+.sLpp-.VppLpcpLp.pEpsl+tuLEpA.hst.....................s.us...hsp...ss..hlPspsp-LltElAlLEtEVs+LEppllsLppplhpp................................... 0 35 109 149 +14232 PF14390 DUF4420 Domain of unknown function (DUF4420) Eberhardt R re3 Jackhmmer:C9RS82 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 310 and 334 amino acids in length. 27.00 27.00 33.00 32.70 19.40 18.60 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.77 0.70 -5.58 41 142 2012-10-11 20:44:47 2011-06-15 11:48:59 1 1 139 0 58 155 70 293.10 20 92.56 CHANGED pplpphWpplptp........t.thstphlptstt.hslhluhshpsphthhl..............hshthhsphsphpuhc.lp.......t.tttphhlslphpsss.hpclFstlssDllppl...t.pstpphhpshhtplpcWpcLht..tttpslopcp.hGLhGELhhLcp..h.sthsspts..lpuWsGPptsppDFphss..sslEVKooh..psspplpIou.cQ.L-ssstt.....LhLshhtlppsss....Gh.olscllsclpphl...t..tshptapp+Lh.phGYh.tpt....htphpatlpphch.apVc-.sFPRlspss.....lP.pulsslpYpl-Lss ...........................................................................................................................t............................hh......hthhhuhs...s.p.thhh.....................tph.sphpshp.lth................tttthhltlhh.tptt....tclFttlspsllppl.......tstpphhpshhpplppWpplht..tttptLspcp.hGLhGELhhLcp...l....tthsssss...lpuWpGP..ptsp....pDFphss..tslEVKooh..p..pspplpIuu.cQ..Ls.sstss........LaLhshplppsss....Gh..oLs....s...llpplpptl...ttspshptFpptLh.phGahstpt....htptpa....tl.ps.h....ph...apVs.c.sF.P+lspss......lP..pulsslpYplsLs.t................ 0 17 45 52 +14233 PF14391 DUF4421 Domain of unknown function (DUF4421) Eberhardt R re3 Jackhmmer:C9RSA4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 336 and 370 amino acids in length. 27.00 27.00 27.40 27.10 24.50 24.50 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.69 0.70 -5.22 31 119 2011-06-15 10:56:27 2011-06-15 11:56:27 1 1 103 0 25 121 5 295.50 33 84.70 CHANGED sphDTs...........YIcspcas.hshhlpssssh.Ehaplsssp................tpslshsPsssh+lGshhuaRala........LGaohshptlt.tppspspppphslsL....Yos.+lulDlaY++ssu.Yphpphphts..t..s....shsF.sshpsphhGhNlaYIFNp+.+FSYsAAaSQSshQ++SAGShlhGhuastaplphshstlst.hppp..h................ththpphcahshslusG..YuYNWVFu+.salhss..SlssuluYp.sh.php.pp.tt.sshphpshshchluRhGlsaNss+aauGhShlhcsasYpcpsao....ssshF...GslslYsGa.....pF ...........................................................................................................t.phDTs...........YIpPp+YN.aslMlppsssa..EhYplsspp.................QplsFuPsssh+..lGhYFGWR..WIF........LGaolDlsplh..pppcs+tp-hsLSL.....YSu.clGlDlaYR+TGssY+l+phphhs...phssp.....stsF..sGlpsph+GhNlYYIFNp+.+FSYPAAaSQST.QR+SAG...ShlsGhuhopHpLshDaspLsthlppp..ht........................shthpclKYsshulssG..YuYNWVFA+.Na.Lhsh..SLssuluYKts.h...ph...pppp....sphhhp.shNh...DhlsRhGlVYNss+aasGhShlh+sYsYc+s.sFS....hsNhF.......GslplYsGaNF.......................... 0 15 24 25 +14234 PF14392 zf-CCHC_4 Zinc knuckle Coggill P pcc Jackhmmer:Q9SHW6 Domain The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. This particular family is found in plant proteins. 27.00 6.00 27.00 6.60 26.90 -999999.99 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.58 0.72 -4.29 79 629 2012-10-03 11:39:54 2011-06-15 13:56:05 1 98 80 0 325 3156 20 38.20 30 8.98 CHANGED lcls+PLpptltlp....hs...ss...p.p..th...hplcYE+lspa.ChhCGhlGHspp..pCst ...............................................................................h.h.YEhh........h..Ch.pCsthGHstp..pC........ 0 67 167 281 +14235 PF14393 DUF4422 Domain of unknown function (DUF4422) Eberhardt R re3 Jackhmmer:C9RPD5 Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 255 and 371 amino acids in length. 25.00 25.00 29.50 29.50 22.30 22.30 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.66 0.70 -4.76 90 376 2011-06-15 13:10:07 2011-06-15 14:10:07 1 7 289 0 76 365 474 224.80 37 67.37 CHANGED +IhlusHKphp.hP..pc.shYhPlpVGss.t...............p...........sahtDsoGD.NISp+NPtYCELTulYWAWKN...h.csDY.lGLsHYRRaF....shppp......................tthptllsppplpphl..p.pa..DlllP...c+Rp..ahlc..........olhsHYtcs.Ht.tccL-hscclIpcpaP...-Yhsua-plhpppp.uah..aNMFIM++clFcpYCpWLFsIL.ElEccl..Dh....os.YsshptRlaGalSEpLhsVWlp...p....pp.lc...htE .....................................pIhlusHKpht...hP...ps.shYhPlpVGtshp....................sh..............shhtDso.....G.D.NISp...+NPhYCELTulYWAWKN....h..csDY.lGLsHYRRaFshppt..........................................tthphll.spp...p.l.pphL...p..ph..DlllP.......c++p.......ahhc..........ohhs...HYtps...Hp...tcsL-hscplIpcpaP...-Yhsua-p..hh.pppp..uah..aNMFlM++chFscYspWLFsIL.clEccl..Dh................os.Y.....sshptRlaGalSEhLhsVWlt....p....pp.h....t........................... 0 23 47 56 +14236 PF14394 DUF4423 Domain of unknown function (DUF4423) Eberhardt R re3 Jackhmmer:C9RKJ5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 170 amino acids in length. 30.00 30.00 30.80 31.50 29.80 28.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.62 0.71 -4.72 52 72 2011-06-15 15:22:47 2011-06-15 16:22:47 1 2 5 0 70 73 44 168.50 24 60.39 CHANGED hhphsptpph+hlsscpacaapsWhpsllRp.Lsshhsss...ssttlActhhstlostclpcuLphLhchullc+sts.ssYtpTppsls.sst-shshul+shp+phhchAhcul-phshpcRshSulThulsccsaccltcplcchR.+clh...sluspp.pc.sccVYpLNlQLFPL ............................h...hhpttph+hlsscpacaappWhpsslRp.Lhshhsss.....ssttlAcphhstlostplccuLphLhchuhlc+sts..spat.oppsls.sstcshshul+shppphhchAtcul-phshpcRshSulThulsccsaccltctlpcFR.+clh...plssps.pp.scpVYpLNlQLFPl.. 0 70 70 70 +14237 PF14395 COOH-NH2_lig Phage phiEco32-like COOH.NH2 ligase-type 2 Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A family of COOH-NH2 ligases/GCS superfamily found in the neighborhood of YheC/D-like ATP-grasp and the CotE family of proteins in the firmicutes. Contextual analysis suggests that it might be involved in cell wall modification and spore coat biosynthesis [1]. 25.00 25.00 29.50 28.70 20.00 17.90 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.63 0.70 -5.33 8 44 2012-10-02 17:21:26 2011-07-19 19:39:50 1 2 41 0 16 48 0 234.20 36 50.04 CHANGED osloLGADPEFMLppspGc.MlsAS.cFaspsGslGCDsp+lttc.....hPlAELRPuPspsPcpLltplcplLppAsphlsctslcWlAGuhPasGYslGGHIHFuGl.lohpLlRsLDsYLuLPLhhlEDPss.ttpRRp+YGaLGDaRhKsaGtFEYRTssSWLVSPtsA+AsLpLA+llAcpaccL.ppshhsschpcAFYpG-+chLtsplstlhp-lpuhssYsshtttI-.lashl-cGpoWDEcsDlRptW+IPhh. .............lhlGhDsEFhLhp.psp.hl.AScah.s+tG.sGsDshphptch....hPlsElRPtPsssPttLhhplpphht.Att..hlsp....psl.WhAGuhPhtsaslG.GHIH..F..S...G.l...........sL.....sh....pLlRsL....DsYLALPLsLlEDsps..thRR.s.pYGhLGDhRtpsaG...GFEYRTLs.S.WllSPtls+uslslAhll..ApphtpLpt...psl.p..phpcAaYpGs.+thL+thh.tlhtcl..thstYtpa...........pt.lt.hhphlcptt.WsEptDlR.hWpl...s............................................ 0 10 14 14 +14238 PF14396 CFTR_R Cystic fibrosis TM conductance regulator (CFTR), regulator domain Finn RD rdf Manual Domain \N 22.50 22.50 24.10 23.90 21.10 21.10 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.44 0.70 -4.97 9 137 2011-07-19 22:39:50 2011-07-19 23:39:50 1 9 75 46 32 131 0 200.10 67 15.53 CHANGED DFSSpLhGh-uFDpFSAERRsSILTETLRRhSV.DtD..uusshsEsc+QSF+Q.......s.......G...-.....asEKRKsS.ILNPlsSsRKFSllQpu......QhsuhE-ss..cE.sER+hSLVP-sEQGEssLPRSNhh.ssGs.shpu+.RRQSVLsLMTp.S.pQGpphatptsuShR.KhSlsPQssL..SElDIYoRRLSpDSsh-ISEEINEEDLKECFhDDh-shs...ssTTWNTYL .......DFSSKLMGhDSFDQFSAERRNSILTETLRRFSl...EGDAslSWsETKKQSFKQ...............T..............G.E.....FGEKRKNS.ILNPlNShRKFSlVQKTP.......LQMNGIE.Ess..-EPhERRLSLVPD.SEQGEuILPRuNll.soGP.ThQu.+.RRQSVLNLMT+.Sl.sQGQsl+++ssuSsR.KhSlAPQusL..oE.lDIYSRRLSQ-ouLEISEEINEEDLKECFhDDhEsIP...sVTTWNTYL......... 0 1 3 11 +14242 PF14397 ATPgrasp_ST Sugar-transfer associated ATP-grasp Iyer LM, Abhiman S, Burroughs AM, Aravind L rdf Manual Family A member of the ATP-grasp fold predicted to be involved in the biosynthesis of cell surface polysaccharides [1]. 31.40 31.40 31.40 32.50 31.30 31.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.79 0.70 -5.32 11 400 2012-10-10 13:17:03 2011-07-21 22:10:43 1 5 364 0 135 342 56 268.40 35 78.98 CHANGED phhhppphtphs.....NspphhtlhccKshasph..hp-h..................hspchhshp....h.p.p-hptahpsp....pplhlKslsG.u.GpGlthhptss......................htpctpslhphhpssc.....pallpEtIpQHpthspLss.sSlNTlRllThhcs..spVclhhsllRhu..tuutssDNhus.GGhhsslDtsus..lpp.A.sh-.ptpta...phHPsotthhssapI..Phaspslphshcsupp.lPplthlGWDlAITspG.PllIEuNss.........sshs......h............ulhs.thsclhshphp ........................................................................................hth.....pRNhsalh+Y..NtR.phYslVDDKlhTK.h...A.p.t.Gh...........................h.hslp....tptplcplpph.....l.tsh.....ssFVIKP.A.p.GuG.GcG.IlVl.s.scptshahp.......ss..hs.pc..lpcc.....l.oshLsGLaSLuGp.......Dsul.lEphlp.cshFpuhSa..-GVPDlRlIl..........h...........pt...........hslhAMhRLu.....psuss.KA.....N.L...H.....Q...G...A...lG...VG.l....D.l.uTG..tlps.....s.........ph...s....p....l.................scH...PDTspplsslpl..PpW-pllpLAusshhE.hsGL.....GYlGsDhVLD.c-.c..GPhlLELNAR.......PGLsIQ.lANst............Glh.............pphtt............................ 0 43 84 118 +14243 PF14398 ATPgrasp_YheCD YheC/D like ATP-grasp Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A member of the ATP-grasp fold predicted to be involved in the modification/biosynthesis of spore-wall and capsular proteins [1]. 100.00 100.00 105.30 105.10 93.00 92.90 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.47 0.70 -5.59 14 572 2012-10-10 13:17:03 2011-07-22 00:11:06 1 7 188 0 184 583 0 256.60 26 65.71 CHANGED pphppt.th.has.shhsKWcVactLpcppplp.aL.PpTchhpshsslcphlspapplalKPhpGstG+GIhplpptpt...phhschpssptp..p.passhtthhshltpthsppcalhQQulsLhphpG+shDhRshhpKNppGpWploulssRlAupsplsoplssGGpstthpchhs.ct..p.....ppshtppLccsuhpluptl-cthssp....luELGlDlGlDspG+lWllEsNoKPu+pshpp....p..tpppohpp.lpYutaLs ........................................p.......th.hhs.......s..h..h.sK.WplaphLtpstplp.sa.L.PpTphh.pp.h..c...p...lp.phLppa...p.tlalKPhpGohG+GIhplptpp.......s............sahh..phppppp.....t..........hh.php.s.hp.p....L.hphl..p.....p.....h.h...p..t..p.....p.....YllQpG.IsLhph..sGcPhDhRlhspK.s.t.p.Gp.WploulsuRlus.ps.slsT.sl.s..sG.Gp.sth.h.p.phht..p......pc......ttplhpclcch...ulplupt...l-..cphst.....luElGlDlGlD.ppu.plWlhEs..No.K.Pu.+pshp.ph......t.t...p......p.shph.hpYuhaL............................. 0 94 141 147 +14244 PF14399 Transpep_BrtH BtrH; NlpC/p60-like transpeptidase Iyer LM, Abhiman S, Burroughs AM, Aravind L, Eberhardt R la_psag Manual Family Members of this family are often found in the gene neighbourhood, or fused to, non-ribosomal peptide synthetases. They are predicted to function as transpeptidases in peptide metabolite biosynthesis [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.78 0.70 -5.14 49 388 2012-10-10 12:56:15 2011-07-24 22:26:28 1 12 298 0 113 426 19 290.20 18 77.47 CHANGED HCtosshssLL.phtGhtl.....sEshhFGLGuGLsahaht..hhphsh...hlssRst......phhpphtptLGhchp....hpphsssppuhctlcphlspGpsVhlt.sDhaaLsYhss...phHFsuHhlllhGhD..pcphhlsDs.spsshhpss............hssLtpARtupu.........shss+sphathpt....ssshss..ltpslh....pulppsspphlsss...................hGhpGlpphupcl.cW.....tspcphpthhtthhphhcch...............GTGGuhFRtlYupFLpcsuphhst.t.htphuphhpchuptWspl.ushhtphststp...tshpphuchlpplsctEcphhctLt ................................................................................C.sssht.hh.p.hGhth.....s-.hh.hhhstuhsh....hh.......pht....h..hshph........thhpphhph.lGh.php.....hpphs.s.......ppu.....h....p.tlcphLs....pG.hPVhl....t.hDhh..aL..s.......Yh..ss...................phph...............s.............s...HhlllhGhD.....ppthhl..t.....Ds........t...s...h.h...phs............hppltcAhtup.s..............hs..s.tshh.hthph...........st..p......hhptht.....pslpp.shp..phhsss.............................hGhtuhpphsppltp......h.......s.............tpphph.h...hh.......h.hthhtph.......................hR.hh.up.aLpchtt...h........h..p....hup....hhpphuphapth.thhhhc.......t.t.....pthtphuphltplsphEpphhp.h........................................................................................................................................................................................................ 0 44 85 99 +14245 PF14400 Transglut_i_TM Inactive transglutaminase fused to 7 transmembrane helices Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A family of inactive transglutaminases fused to seven transmembrane helices. The transglutaminase domain is predicted to be extracellularly located. Members of this family are associated in gene neighborhoods with a pepsin-like peptidase and an ATP-grasp of the RimK-family. The ATP-grasp is predicted to modify the 7TM protein or a cofactor that interacts with it [1]. 100.00 100.00 106.50 106.10 91.80 91.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.88 0.71 -4.73 93 271 2012-10-10 12:56:15 2011-07-24 23:33:47 1 2 269 0 71 210 16 166.30 42 33.13 CHANGED YRthshGlPhsPGEppplWslEA+lcFsAsGc.Pl+........VohslPphpsuasllsEsssS.sGYGl...........oh..hcsc.s.s..R..+Ap.W.ShRpAsGpQsLYY+splh.s.csps....chs...ths...sP.....................s...hssh.s..h.suP.ppsAA.psLlspspp+SADsho..........FspELI.Kpl.N..sss.sQNspLLLs.s..t....s.+ ....aRt.shGlPhTPGETcplWslEA+l-FsApuc.slK........VShusP.pppuasllsEosuS.sGYGl...........Shhps-..u..sRRspW.SlRpA.sGsQTlYY+sphhscspu..cts..phs..ss.....................t.hss.s.h.-uP.EcsAApALlspspp+SADshT.....FspELIKplNssp.sQNspLLLsp......scsp... 0 20 34 56 +14246 PF14401 RLAN RimK-like ATPgrasp N-terminal domain Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family An uncharacterized alpha+beta fold domain that is mostly fused to a RimK-like ATP-grasp and is found in bacteria and euryarchaea. Members of this family are almost always associated in gene neighborhoods with a GNAT-like acetyltransferase fused to a papain-like petidase. Additionally M20-like peptidases, GCS2, 4Fe-4S Ferredoxins, a distinct metal-sulfur cluster protein and ribosomal proteins are found in the gene neighborhoods. Contextual analysis suggests a role for these in peptide biosynthesis [1]. 25.00 25.00 31.30 30.40 24.00 19.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -4.71 87 317 2011-07-25 16:00:10 2011-07-25 17:00:10 1 6 294 0 83 259 20 151.70 37 31.83 CHANGED osp-YLpps....htps...........+...spllNLC+sYpYhopGYYsSLLAEARGH+Vl.PoVpsls-.......LppppLhshshtplp..............................................phltptl.t..p............hssphhplp............laFGps..ptp..slcclARplF-tF.sPlLclphpcppp......................Wplpslps...hslscLscpcpshFtp..uL- ....................osppYLpps....hhst...........+..pspVINLCRsYpYhSpGYYsSLL.AEARGH+VI.PoVpsIs-.......lsptphhphslt.c.lc....................................................chh.p.lpp.............ss-shslp............lYFGps..stcsLccLARpLFEtF.sPlLclphc+pps.......................WpIpsIps.hshpcLp-scp-hFhpuL.................................. 0 37 61 73 +14247 PF14402 7TM_transglut 7 transmembrane helices usually fused to an inactive transglutaminase Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A family of seven transmembrane helices fused to an inactive transglutaminase domain. The transglutaminase domain is predicted to be extracellularly located. Members of this family are associated in gene neighborhoods with a pepsin-like peptidase and an ATP-grasp of the RimK-family. The ATP-grasp is predicted to modify the 7TM protein or a cofactor that interacts with it [1]. 25.00 25.00 42.00 41.50 22.50 21.90 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.98 0.70 -5.50 34 285 2011-07-25 16:28:49 2011-07-25 17:28:49 1 4 280 0 75 224 26 309.70 52 62.39 CHANGED sphlhpLLspAsIPARhVpuLpLE.Du+RcQplpsalpVass.cc....WhhFsPpoGppGhP-shLlWppustsLL-lpGGpsupVsFShhppphsshphup..pptp.ssp..hhsF..SlasLPlcpQslFKslLLlPlGsLlVVhlRsllGlcTSGTFMPVLIALAFlpTpLlsGllhFlllVulGLllRuYLs+LNLLLVsRluuVllhVIhlhuhlSlluaKlGlspGLolohFPMlILuWTIERMSIlWEEcGs+-VlhQGsGSLhVAlluYLsMssshlpHLsFsFPtL.LllLAllLLlGpYTGYRLsELhRF+shhcs ................................................................s.hhppLLuhAtlPs+hVtsIpLE.DsR...R+Qolp.hlcVasG..pp....WllF.....NPpTGpQ.uhssN.hL.lWppussuLL-VsGGpNSQVpFShlspchss.pssp.....scspsss..hLsaSlauLPLE-QuhFKslhLIPIGALlVVhLRlllGLKTSGTFMPVLIAlAFlQTpLssGIluFlLIVuhGLllRSYLS+LNLLLVuRlSAVIIsVIllIulholluaKlGLscGLoIT...FFPMIILoWTIERMSILWEEEGu+EVhhQGuGSLhsAlLsYLuMosshlpHLsFNF.ulpLllLAhlLLhGpYTGYRLoEL+RFKsLsc-............................. 0 22 37 60 +14248 PF14403 CP_ATPgrasp_2 Circularly permuted ATP-grasp type 2 Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Circularly permuted ATP-grasp prototyped by Roseiflexus RoseRS_2616 that is associated in gene neighborhoods with a GCS2-like COOH-NH2 ligase, alpha/beta hydrolase fold peptidase, GAT-II -like amidohydrolase, and M20 peptidase. Members of this family are predicted to be involved in the biosynthesis of small peptides [1]. 191.00 191.00 195.90 195.60 187.90 187.50 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.30 0.70 -5.83 6 58 2012-10-10 13:17:03 2011-07-26 05:57:10 1 2 52 0 30 57 2 428.80 25 95.00 CHANGED LspAlAD.YculL..s.slthtuW.psLstchRuspLhaGsRPlsslh.............RPpllocsQY-lLp+sscplspAlpclspthLsss....ulRphLtLoPhEE+LlshDPGYhtsp.AtuRhDoFLolDGs.LpFVEYNA-SPuuh.AYtDhLAchFlshPshpEFpK+YsltPLPuRphhLcTLLssaRpuGusstcPplA.IVDWpu..sPstoEFEhFpcaFt.......-aGl.sVIsoPp-Lsa.RDGpLls...........GshPlslVa+RllTsEhLsHas...LsHPLVpAYtsGAlslVNSFRAcLlHKKulFALLoDEphcs.hsA--RuAl+sHVPWTRlVpPu.TTapstsIDLlsFAhANRE+LVLKPND-YuGKGlsIGWEsos-tWcpALppAhco.PalVQERVpIshpPaP..thscGcl.ht-h.VDscPaLFG..scVpGChsRLS...TsslLNlos.uGuTlPsFll-- ........................h.....................tsa..th.tthctsthhht...sc.s.lstsa.............pPhhhsppshcphpphspplhpllp+lhpchlpss....phRchhshsst.ccLlhhs.GYsthl.shuRhDlFhs..t.ss...hpFsEhNsDuuuuh.scsctlsphhhph.shpcFtcc..aplp.shs..h..hp.hlcphhshYcp.hts.psppPplA.IlDaht...hsshsEFc..hFtchap.......cpGhsshIsssccLpa..csspLhs...........ss.h.lDhlaRRhVTs-llpphs...thcsllpAhtssslshlsSFpupllHsKhlFslLpD-p..spt..h..Lss-EpshIccHlPaTphlps............h-lhphshss+-chllKPsDtYuucGVhlGh-hst-cWcptlpc....shp....p....sallQEahph.s.ppth........th.p.-uc.s.....h.sphhhssu.salas...........tphsGshoRlG...sssllsssh.tshsssshhlp................................................ 0 18 26 29 +14249 PF14404 Strep_pep Ribosomally synthesized peptide in Streptomyces species Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A ribosomally synthesized peptide related to microviridin and marinostatin, usually in the gene neighborhood of one or more RimK-like ATP-grasp. The gene-context suggests that it is further modified by the ATP-grasp. The peptide is predicted to function in a defensive or developmental role, or as an antibiotic [1]. 25.00 25.00 103.10 103.00 21.40 20.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.89 0.72 -3.75 7 17 2011-07-26 15:33:21 2011-07-26 16:33:21 1 1 17 0 3 19 0 62.90 72 98.35 CHANGED MpPFALNYARPAsthpsssPYsYDuuhQLNVLhDGRsAApD+ALLtclGTTTSTAGSKTHFDD MQPFALNYARPAsph-sssPYsYDSGLQLNVLhDGRlAAsD+ALLRElGTTTSTAGSKTHFDD 0 1 3 3 +14251 PF14406 Bacteroid_pep Ribosomally synthesized peptide in Bacteroidetes Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp, and an ABC ATPase fused to\ a papain-like domain. It is often present in multiple tandem gene copies. The gene contexts suggest that it is modified by the ATP-grasp as in the biosynthesis of microviridin and marinostatin. They might function in defense or development or as peptide antibiotics [1]. 25.00 25.00 34.10 39.50 20.00 19.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.51 0.72 -3.81 17 17 2011-07-26 16:09:42 2011-07-26 17:09:42 1 1 5 0 0 17 0 50.80 32 80.28 CHANGED MKKLsthp.S..hhpNpKLsstptpultGG.hpsthpphs.....hsssc.s+DsDsh MKKLsthp.S..hhpNpKLsstptpultGG.hpsthpphs.....hsssc.s+DsDhh.. 0 0 0 0 +14252 PF14407 Frankia_peptide Ribosomally synthesized peptide prototyped by Frankia Franean1_4349. Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide linked to cyclases in chloroflexi. It may have a link to cyclic nucleotide signaling [1]. 48.00 48.00 48.70 48.50 41.00 36.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.77 0.72 -4.21 10 16 2011-07-26 16:57:07 2011-07-26 17:57:07 1 1 13 0 10 14 0 60.90 37 80.85 CHANGED plE+lIGRAVoDssFRppLlsDucp......Ascs..YDLTsEELcAL-clcsssLpuhAtsLDtcLs+ .thEclIGRAVsDssFRppLlssscp......Ascp..YcLTsEELcAL-phcssshpshAtplD.plp+......... 0 6 10 10 +14253 PF14408 Actino_peptide Ribosomally synthesized peptide in actinomycetes Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp and an aspartyl-O-methylase. Gene contexts suggest that it is further modified by the ATP-grasp and the methylase. It might function in defense or development, or as a peptide antibiotic [1]. 25.00 25.00 28.00 30.10 24.80 18.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -9.06 0.72 -4.04 16 32 2011-07-26 17:11:13 2011-07-26 18:11:13 1 1 26 0 15 34 0 61.30 36 65.68 CHANGED sPhssss.........lDPsTQhulhsDphG.pllE.hGKH.GTussstTsosTss..DGps...ptss-sDospD .................Phssss.....lDPsTQhuhhhDptG.pls-.hG+H.GTuosspTsTsTus..DGps...spssDp-sspD........... 0 2 12 13 +14254 PF14409 Herpeto_peptide Ribosomally synthesized peptide in Herpetosiphon Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp, and an ABC ATPase fused to a papain-like domain. It is often present in multiple tandem gene copies. Gene contexts suggest that it is modified by the ATP=grasp. It might function in defense or development, or as a peptide antibiotic [1]. 50.00 50.00 50.60 50.00 28.70 27.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.98 0.72 -4.21 6 8 2011-07-26 17:54:10 2011-07-26 18:54:10 1 1 1 0 8 8 0 57.80 39 73.45 CHANGED MEFcsh...KTEElPlI.....FGLTYLEEEAAEIsDVVGCLMPIDG..YosTGCDDSDts.....IP Mchcsh...+hp-h.lI.....FGLTYLEEEAAEIsDVVGChhslDG.uYosTuCDDuDt.......P.............. 0 8 8 8 +14257 PF14410 GH-E HNH/ENDO VII superfamily nuclease with conserved GHE residues Zhang D, Iyer LM, Aravind L la_psag Manual Family A predicted nuclease of the HNH/EndoVII superfamily of the treble clef fold which is closely related to the NucA-like family. The name is derived from the conserved G, H and E residues. It is found in several bacterial polymorphic toxin systems [1]. Some GH-E members preserve the conserved cysteines of the treble-clef suggesting that they might represent potential evolutionary intermediates from a classical HNH domain to the derived NucA-like form [1]. 25.00 25.00 25.60 25.60 24.20 24.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.33 0.72 -3.83 51 101 2012-10-05 18:28:12 2011-07-27 20:41:42 1 24 88 0 22 106 0 70.40 29 13.00 CHANGED sssGphhs...ssstpsl...............sphDMGHp...uhcatchhtph.pht..hopcEhp-ahp.sPcNYRlEp...sosNRS+ttE ........................t..pG.hhs....tstp.l......................t.hDhGHp....uhcapchhtth.p..tt..hopcphh-ahp.sPcsaRlEp...sSsNRSHtsE.. 0 8 12 17 +14258 PF14411 LHH A nuclease of the HNH/ENDO VII superfamily with conserved LHH Zhang D, Iyer LM, Aravind L la_psag Manual Family LHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif, LHH. It is found in bacterial polymorphic toxin systems [1] and functions as a toxin module. Like WHH and AHH, LHH nuclease contain 4 conserved histidines of which, the first one is predicted to bind metal-ion and other three ones are involved in activation of water molecule for hydrolysis. 23.30 23.30 23.30 29.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.01 0.72 -3.77 25 102 2012-10-05 18:28:12 2011-08-10 22:52:37 1 25 88 0 22 112 2 81.20 35 12.15 CHANGED TNhchhp...pGpuPhs..psG..cslpLHHlsQppsGPlsElopspHc...tspphLHshtpsups.......cspasp.+psYWKtRupphht ..........TNl-hMp...pGpAPls..pDG..pslpLHHltQcpsGslsElopstH...p...........tt....pphLHthhpss.psh.......+spFsp.RppYWKhRApp..p... 0 3 9 12 +14259 PF14412 AHH A nuclease family of the HNH/ENDO VII superfamily with conserved AHH Zhang D, Iyer LM, Aravind L la_psag Manual Family AHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif, AHH. It is found in bacterial polymorphic toxin systems [1] and functions as a toxin module. Like WHH and LHH, the AHH nuclease contains 4 conserved histidines of which, the first one is predicted to bind a metal-ion and the other three ones are involved in activation of a water molecule for hydrolysis. 18.20 18.20 18.30 18.20 18.10 18.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.76 0.72 -4.07 49 340 2012-10-05 18:28:12 2011-08-11 01:08:44 1 41 221 0 113 345 45 106.40 19 19.54 CHANGED spth..pttshtts...tshpuHHIlspps..........hhsttcplhcchGlc...........lssstNhlhlst........................hHp..GsHspp.....YpptVtcclppspp...............sppthhcpl.ppltccltsst .......................tt...................shpsHHllspp................hh.th.p..pllpchG.hc...........lssspNtlhLP...........................................hHp....GtH..spt.........Ypphl.tppLpphpp..................ttsppthh...ptl.ppltpph............................................................ 0 29 46 89 +14260 PF14413 Thg1C Thg1 C terminal domain Anantharaman V la_psag Manual Domain Thg1 polymerases contain an additional region of conservation C-terminal to the core palm domain that comprise of 5 helices and two strands [1]. This region has several well-conserved charged residues including a basic residue found towards the end of the first helix of this unit might contribute to the Thg1-specific active site [1]. This C-terminal module of Thg1 is predicted to form a helical bundle that functions equivalently to the fingers of the other nucleic acid polymerases, probably in interacting with the template HtRNA [1]. 26.30 26.30 26.70 26.60 26.00 26.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.85 0.71 -4.42 152 419 2011-08-11 14:39:21 2011-08-11 15:39:21 1 10 336 8 274 424 17 123.10 34 44.55 CHANGED stpplpDYhsWRQ.sDsHlNNLYNssFWsLl.csGhospcApppLpGohuu-KpElLFpchGINYNs.P.tha++Gohlh+.................................................................................tt..pt.ttt+p+....pthhshat...........Dhh...ppFhcph..h.. ........................................ppl+DYhuWRQ.sDs.............HINN.....LYNTsFWtLl...puGhospcApppLpGThuu-KN....ElLFpcaGI.NYN.s..Ptha+KGoslh+........................................................................................................p.......p..pt......pp.tht+p+....tt.hsha..........chht..ptFWpp.....p................................................................................................................ 0 90 164 224 +14261 PF14414 WHH A nuclease of the HNH/ENDO VII superfamily with conserved WHH Zhang D, Iyer LM, Aravind L; la_psag Manual Family WHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif WHH. It is found in bacterial polymorphic toxin systems [1] and functions as a toxin module. WHH is the shortest version of HNH nuclease families. Like AHH and LHH, the WHH nuclease contains 4 conserved histidines of which the first one is predicted to bind a metal-ion and other three ones are involved in activation of water molecule for hydrolysis [1]. 25.10 25.10 25.20 25.20 24.50 24.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.40 0.72 -4.74 35 277 2012-10-05 18:28:12 2011-08-11 20:50:36 1 34 251 0 50 331 2 44.80 34 6.85 CHANGED tttppspuaTWHHppcs......ssMQLVspslHst......htHsGGhuhhpst .....s..htpspGaTWHHh.s-s........ssMpLVspphHps............htHs...GGhuthpt.t... 0 11 27 38 +14262 PF14415 DUF4424 Domain of unknown function (DUF4424) Bateman A agb Jackhmmer:Q7P768 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 310 and 361 amino acids in length. 27.00 27.00 29.60 27.00 23.90 25.40 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -12.02 0.70 -4.88 97 165 2011-08-18 09:52:12 2011-08-18 10:52:12 1 4 135 0 43 147 14 218.20 25 71.31 CHANGED pclpVcYhFpNsosp-lph.luFPh......P.............................h.hs.....s........sspshlpsF+lhVsG+.lpsph...........p.hs.................t.-lostLtppG..........h...t..h..........t...............tthh-tst..t.tashphhY.hWppsFsAGcsV.lcHsYpPs......lusul.......................t...tthChp.t.htthtth.............t..ht.pplsYVLoTuusWp.tsIt-FpLplc+sssspllshCa...ssl+Klssp........................................papappcNFsPpcD ..................clpVcYhFpN.osp-lp..luFPh......P.............................h..s..............sptshlpsFchhlsG+.ltsphp.hs....................................-lostlhttG.........................h...t.............t............thhctst....thshp.ha.hWppsF.supsl.lcHsYpPs......sutul.........................t...t..C.t.t..t.h.th................hh.pplsYlLsTussWt.tsItcFpLplctts.pt..llshCh...tsl++lssp.....................................phphppcsFhPpcD........................................................ 0 15 27 31 +14263 PF14416 PMR5N PMR5 N terminal Domain Anantharaman V la_psag Manual Domain The plant family with PMR5, ESK1, TBL3 etc have a N-terminal C rich predicted sugar binding domain followed by the PC-Esterase (acyl esterase) domain [1]. 29.20 29.20 29.20 30.30 28.00 28.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.22 0.72 -3.92 63 964 2011-08-19 17:52:04 2011-08-19 18:52:04 1 11 31 0 619 937 0 55.20 41 12.91 CHANGED sppCDlhpGcWVhDsst..P......LYpspsCs..alppthsCtcsGRPDpsYhp.WRWpPcsCs ...............s.tpCDla..p.GcWVh....Dss......t.....P........LYs.s.p.sCs..alppphsChpsGRPD.p.s.Yh+.WRWpPpsCs....... 0 64 359 499 +14264 PF14417 MEDS MEDS: MEthanogen/methylotroph, DcmR Sensory domain Anantharaman V la_psag Manual Domain MEDS is prototyped by DcmR and is likely to function with the PocR domain in certain organisms in sensing hydrocarbon derivatives [1][2] The MEDS domain occurs fused to Histidine Kinase and as standalone version [2]. Sequence analysis shows that it is a catalytically inactive version of the P-loop NTPase domain of the RecA superfamily [3]. 28.70 28.70 29.50 28.70 28.00 27.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.09 0.71 -5.24 30 260 2012-10-05 12:31:09 2011-08-24 23:25:56 1 51 146 0 138 283 15 172.90 20 39.54 CHANGED hctthRpSGl-llG.-lPaGTHhCQFYpTcE-Lh-lllPYhKuGLEsNEhChWlsop..P.plE-AKpuL+cslP-l...-hYL-+GQl-Ilsh..hh...hp-ushDspRllsshlcK.hscAlt.....pGYcGLRlsGsshWhtKp..-asshssYEpclDuslssp...phhuLCsYsl-chsus-llDllssHpFsLlK+ct+Wpp .......................................................h......................sH.shhYp.sp.p-hhshhssFlppGLts.sE.....shhhss...stpht.lp.ptL....................hh.ssup.l.plh...s...h...............spushss..schlsthtph...hspuht......tGhpslRlhG-.hs.Ws..tcs....sphs.thh.paE.shlNphh.sst...shh.hL.CsYDt.......pphssp...s....lt-shtsHshhh.........t........................ 0 61 101 117 +14265 PF14418 OHA OST-HTH Associated domain Anantharaman V la_psag Manual Domain OHA occurs with OST-HTH [1]. 25.00 25.00 25.40 26.60 24.40 24.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.28 0.72 -4.48 28 75 2011-08-27 06:30:04 2011-08-27 07:30:04 1 8 27 0 69 81 0 74.20 23 9.32 CHANGED ...tps.hhFs..GGRYuhActL+cps..th+shoLGpls+lVQlAlp.ppllh.Ypps.sLhP...hpssppsssshhstssppp ..............s.......hs..sGRYshAcpL+cps..hh+shsLGclhclVpluls.c+hhh.Ypss.tlhP...hptstthspshhs.....s................ 0 29 45 59 +14266 PF14419 SPOUT_MTase_2 SPOUT_MTase_11; AF2226-like SPOUT RNA Methylase fused to THUMP Anantharaman V la_psag Manual Family SPOUT superfamily RNA methylase fused to RNA binding THUMP domain [1]. 28.40 28.40 28.60 28.40 27.80 27.60 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.93 0.71 -5.08 28 37 2012-10-01 22:53:19 2011-09-01 23:30:29 1 1 37 0 31 42 0 175.20 55 49.03 CHANGED KlslVQMPYhGDhcus+phGE+IGRAAQuFEVKELIIAP+cplsAaELhpFl+GV+cGQcSRYpIQ++uYshcVc+VPVhVhDLYQllRDK+R.pscllIlTDPKGcplocVK-+Lucsh.....+tu+EVllFlGSREGIPpGlFRFADaVlDLAPahTFATEauIPusLlALholYEEtt ........................KlslVQMPYhGDhcusRphGE+IGRAAQuFEVKELIIAP+cphsAaELhpFl+GV+cGQESRYpIQ+cuYshcVcKVPVhVaDLYQllRDK+R.ppcllIlTDPKGcplscVK-+LAcsh.....+hu+EVllhlGS.REGIPp.....Gl..FRFADaVlDLAPa...hTFATEauIPusLlALholYEEth..................... 0 11 15 21 +14267 PF14420 Clr5 Clr5 domain Bateman A, Wood V agb Wood V Domain This domain is found at the N-terminus of the Clr5 protein which has been shown to be involved in silencing in fission yeast. This domain has been found to often be associated with proteins that contain ankyrin repeats and large regions of disordered sequence [1]. 22.30 22.30 22.30 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.67 0.72 -4.07 93 327 2011-09-06 09:24:57 2011-09-06 10:24:57 1 30 74 0 281 320 0 54.30 29 10.23 CHANGED tspcW-th+shIpcLYh......pcphsLc-VhchM.....cpcasFpA....o...................c+tYcp+hc.cW..GhpK ............spsW.-sh+shItcLYh......pcphsL...c-VhchM.....cppasFps....o...................................................................p+tappphc.cW..shtK................................................................ 0 54 142 232 +14268 PF14421 LmjF365940-deam CDD_CDA_1; A distinct subfamily of CDD/CDA-like deaminases Iyer LM, Zhang D, Aravind L la_psag Manual Family A distinct branch of the CDD/CDA-like deaminases prototyped by Leishmania LmjF36.5940. Members of this family are widely distributed across several microbial eukaryotes such as kinetoplastids, chlorophyte algae, stramenopiles and the alveolate Perkinsus. Domain architectures suggest that these proteins might possess mRNA editing or DNA mutagenizing activity [1]. 45.70 45.70 134.30 59.00 32.40 23.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.63 0.71 -4.92 12 36 2012-10-02 00:10:39 2011-09-07 19:24:38 1 2 20 0 17 35 0 133.90 25 37.19 CHANGED shhslhh-p.psahsphsshh.p....ppscs.Wh+Ks.p+PVlssLh..Vchc....sthst.sup.p................................hhhulNhElShPoGShCSEpNAlGp...............lAslul.........................................................................................................PTps....................................hRt.................................h.s............tst+ps.........................NPLaPCGsCpphL+Klsc.....hssshplhhF-ss.pscplhhhsls ...s.....hhp..t.....h............t..ah+ps..+.Vls.lh..h..................................................h..uhNhElShPoGShCuEpsshup.hs.........................................................................................................................................................................................................................................................................t...........................pP.h.shs.s.phL.+ltp......ss.hhlhhats..p..t.lh............ 0 10 14 17 +14270 PF14423 Imm5 Immunity protein Imm5 Iyer LM, Zhang D, Aravind L. la_psag Manual Family A predicted Immunity protein, with an all-alpha fold, present in bacterial polymorphic toxin systems as an immediate neighbor of the toxin [1] . 25.00 25.00 35.50 35.50 23.50 23.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.17 0.71 -4.67 12 38 2011-09-07 22:45:15 2011-09-07 23:45:15 1 3 23 0 12 41 0 171.10 29 58.91 CHANGED ppIEK....Lpc.IspsuhGHLsLshRlcLM+pIsssph...........lpKlhhpCspKssuhascpFss-s.l.plLtchp..paLYpscGsh-pl.s.s-RhcsYlpps.-ps.-hssaslluL..GaAltsDAuolls.tDYsu.ED....DssaD.EuWssDFlsulAhSGu..PFs..-sGssE+R+cYWhWYlphslphsp .......................................................lp+....hht.lttss.GcLsLshRhplhpthspsp...............lptlthhCsc+shshWscpFsssp.l.thLp...phL.pscus.......hcphts.ht+.p.asp.h.ppshshsthshhhl..uaAAssshs..osl.p..p...a..ss..cD.....Ds-hDPEua-ssalsu.AhuGGhsa....s-psssEtRRtaWhWYLs.sls...h.... 1 3 10 11 +14271 PF14424 Toxin-deaminase DEAM-TOXIN1; The BURPS668_1122 family of deaminases Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Burkholderia BURPS668_1122 [1]. Members of this family are found as toxins in polymorphic toxin systems in a wide range of bacteria and in the eukaryote Perkinsus. Members of this family typically possess a DxE catalytic motif in Helix-2 of the core fold instead of the more common C[H]xE motif. The Perkinsus versions are predicted to be inactive [1]. 29.60 29.60 31.20 31.20 27.30 28.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -11.09 0.71 -4.18 14 120 2012-10-02 00:10:39 2011-09-08 05:48:19 1 24 101 0 32 108 0 131.10 30 15.23 CHANGED ptphscl+tshs............hchtNhulAchchph......stphhuhSt.spstt......t.............h..ps.....ssplhps....................tsssphh.....sRhsDoEhKlLcpltppltssshss.................GplslFoppssCpSCts.llppFptcaPpIphssh....s .............................................................t....tph+.phst........thpp.uNhAlAch.pltuh......pphh.A.Sthsphpt......t.........................hs.ps.........ps.hFcs..t..................tpssh.h..................Rsh..DoEhKlLpsIAppLss..sppss..................Gp..IcLaTEhssCsSCs.........s.VItpFpp+Y.PNIplplh...t................. 0 5 25 26 +14272 PF14425 Imm3 Immunity protein Imm3 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted Immunity protein, with a mostly all-alpha fold, present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1] . 25.00 25.00 25.40 26.40 24.80 24.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.48 0.71 -4.13 26 52 2011-09-08 18:59:05 2011-09-08 19:59:05 1 2 39 0 8 34 0 116.30 55 95.29 CHANGED McDWEYNELF-AIpEsYcEhL---R.Ga+YAIA+luDE.F...D...NL...GKIEDVIVDTAIGEIslsHc.+VFlGhIcGITKRLShFN.pEAp.....sELThEEI+DLopRINpVlEGLcNVclDYpPSs ..................MKDWEYNELF-AIpEsYcElL-E-R..GY+YAIAKLuDE.F..D...NL...GKI..EDVIVDTAIGEIulsHc.KVFlGhI-GIT+RLSpFNsQEAt.....sELTlEEIKDLSpRINKVIEGLKNVclDYsPSs.............. 0 4 6 6 +14273 PF14426 Imm2 Immunity protein Imm2 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted Immunity protein, with a mostly all-alpha fold, present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 35.70 35.70 39.80 39.60 35.20 34.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.72 0.72 -3.97 5 9 2011-09-08 21:17:19 2011-09-08 22:17:19 1 1 9 0 1 7 0 59.30 46 73.66 CHANGED MLEVlsLILSAGRuPD+V-haHpctI+cLLpEIsLssLLcDlPuDEAuELRcDLRlLKLl MhsVIhhILSGGhaPDh.-.hh+ctIhchlhsIsLssLLpslPuEEAE.hRaDLRlLKhl. 0 0 0 1 +14274 PF14427 Pput2613-deam Pput2613-DEAM; Pput_2613-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Pseudomonas Pput_2613 [1]. Members of this family are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 25.00 25.00 28.40 54.70 22.70 17.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.47 0.71 -4.42 4 6 2012-10-02 00:10:39 2011-09-08 22:47:24 1 5 5 0 2 7 1 116.50 34 4.32 CHANGED pF.uSu+Gs.HpAhsslpDssGplpspuhhh.SGNMTpsEttLGFPcsSLATHTEuRhs+pls.pp....GDhhlI-GpYsPCspCKGtMphtupcoGAclpYpWspsst..schWpsGph+cp+ ....F.uSu+Gs.HsAshslaDssGslpspuslh..SGsMTcsE+pLGFPcsoLATHTEuRhl+cls.ss....GDhhlI-GpYPPCs....uC+GtMpttupcoGupIpYpW.psts..scpWpttp.pp.p........ 0 1 1 1 +14275 PF14428 SCP1201-deam SCP1201-DEAM; SCP1.201-like deaminase Iyer LM, Zhang D, Aravind, L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Streptomyces SCP1.201 [1]. Members of this family are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 25.00 25.00 36.00 35.40 22.00 21.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.00 0.71 -4.46 25 44 2012-10-02 00:10:39 2011-09-08 23:43:07 1 10 31 0 24 53 0 131.40 25 15.12 CHANGED spphsPshsssthtth..............sthTpGplFctsGp....lsshh..Su..ccp.tpthhshhtspu.........s.hshssHVEsKhAhhMpc.......suhccuslsIN..ssPCst............CcphlPtlLPcGSoLpVahst......sstlcGtu.t ...............................sP.hhsshh.th..h..........ts.hTpGplaptss.t...hsshh...Su....pcp.hptlhphhtshs.........s.hshssHVEsKhAhhMpc.........suhcpuslhIN..ssPCss.........t....CcphlshlLscGupLpVhhst......thhhtuh..h................ 0 11 21 23 +14276 PF14429 DOCK-C2 C2 domain in Dock180 and Zizimin proteins Zhang D, Aravind L la_psag Mannual Family The Dock180/Dock1 and Zizimin proteins are atypical GTP/GDP exchange factors for the small GTPases Rac and Cdc42 and are implicated cell-migration and phagocytosis. Across all Dock180 proteins, two regions are conserved: C-terminus termed CZH2 or DHR2 (or the Dedicator of cytokinesis) whereas CZH1/DHR1 contain a new family of the C2 domain [1][2]. 25.30 25.30 25.70 27.50 25.00 25.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.24 0.71 -4.65 45 1118 2012-10-10 12:23:49 2011-09-09 02:18:47 1 34 216 2 631 942 4 187.60 30 10.24 CHANGED h.ssapNpLalthpphshs.....Kpsp.............t+Nlhlslplpsscsp.......pshpsIastsss.....phpsphtoslha+scpspat-plKlpLPhplspptHLhFohhcsssp......psppt....p.huaualPLhc..sGph.............lpssphsL.lh..............tp..ttYhphs.t....................hhcss+shhplpsplsSohhspsssl ............................sshRNcLYlh.pphcas............+ttp............................spsRNlpVplphhsu-sp......................phhpsI..astuus.............htsph.ho.s.....V.haHs.p.s...........Pc.ah-plK..lpl....P.....hph........hpp.......p.HLhFoFhHhSsp.......................pppcpt............Ep.hGau..alPLhp...c.Gp................................lpsup..apLsV................ttp.sssY.L.sl.ssst.tht........................................................................shphscu.sKssFplpohlsSThhTQsspl.................................................................... 0 183 251 418 +14277 PF14430 Imm1 Immunity protein Imm1 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted immunity protein, with an alpha+beta fold and a conserved C-terminal tryptophan residue. The protein is present in a wide range of bacteria in polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 25.70 25.70 25.80 26.20 25.00 25.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.51 0.71 -3.98 26 58 2011-09-09 15:11:35 2011-09-09 16:11:35 1 1 41 0 26 66 2 125.30 16 90.44 CHANGED lpshhstt.......pssshhlso.s-l....-tllcchsthshs.....hsplhhs.s-...st....hshlssGl...s.sshuh.hhhhs....ss.s..sthhstssss.......tt..hhashssssp-..a..Ps....sppls....l-ssppAstpFhpsG.p+PsslsWppt .........................................................h................tt.h.hts.tpl....pthlpth.tt.t.............hhplhhs.ss....s.........shlssul...p..sphuh..lhhhs.........ss.s....tthhssssss.....stt......hp...hsssss-.....a..Ps....sstlsl-psppAlp-Fhp..ou..p+PsslpWp..h... 0 7 19 25 +14279 PF14431 YwqJ-deaminase YwqJ-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Bacillus YwqJ [1]. Members of this family are present in a wide phyletic range of bacteria and a few basidiomycetes. Bacterial versions are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 25.90 25.90 25.90 28.00 25.10 25.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.86 0.71 -4.27 14 91 2012-10-02 00:10:39 2011-09-09 18:36:12 1 16 76 0 42 98 2 125.80 27 22.31 CHANGED hshhtspshstpchs.sshuuslchp......sGchhpuhstttp.............sslHPhlpshlsph...............pthpp.a..........tsGppAEltulsptLhp..........................tstchhshthpth.....hshtpGthh.sCssCshllpphsh ...................................ttp...........th..sssAuulshc..Gp.....hhputuspss............sssLHPhlpchLDshss.........................sppcp.a...............sGpCAEshhluctLtsh-tt...............t.tttshp.tu+lpstplRps.....scshcG.....s..htsPCcsCsslhsphs......... 0 8 31 38 +14280 PF14432 DYW_deaminase DYW_Deaminase; DYW family of nucleic acid deaminases Iyer LM, Zhang D, Aravind, L la_psag Manual Family A family of nucleic acid deaminases prototyped by the plant PPR DYW proteins that are implicated in chloroplast and mitochondrial RNA transcript maturation by numerous C to U editing events [1]. The name derives from the DYW motif present at the C-terminus of the classical plant PPR DYW deaminases. Members of this family are present in bacteria, plants, Naegleria, and fungi [2]. Plants and Naegleria show lineage-specific expansions of this family. The classical DYW family contain an additional C-terminal metal-binding cluster composed of 2 histidines and a CxC motif and are often fused to PPR repeats. Ascomycete versions, which are independent lateral transfers, contain a large insert within the domain and are often fused to ankyrin repeats. Bacterial versions are predicted to function as toxins in polymorphic toxin systems [2]. 25.60 25.60 25.70 25.60 25.50 25.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.90 0.71 -4.10 17 1779 2012-10-02 00:10:39 2011-09-09 19:11:19 1 778 129 0 1096 1827 0 121.80 40 20.51 CHANGED sssshsps.....hphhuG.ts+...........phhpRh.ps.shh.spsp.hhH.slcspcpp...................hhhpHuEK.tlshuhhp.................hhhps.....................................................................................................................................+lCsDCashhphluphhsp-IhVpDs.sph+hFc .................................................................................................................ht.hh.............HtFhssDp.sHPptppIh.......t.Lpclht.c.h..c.p.t..G..Y..h...P-op.h.V.La..Dl.-.-....E.pK..................................h.L.t...hHSEKLAlAFG...Llsossu.....................sslRlhKNL....................................................................................................................................RlCsDCHsuhKhIS+lh.sREIllRDs.sRFHHFc......................................... 0 111 754 916 +14281 PF14433 SUKH-3 SUKH-3 immunity protein Zhang D, Iyer LM, Aravind L la_psag Manual Family This family belongs to the SUKH superfamily and functions as immunity proteins in bacterial toxin systems [1]. 24.60 24.60 24.60 25.50 24.50 24.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.46 0.71 -4.41 35 175 2012-10-01 20:46:44 2011-09-09 19:50:34 1 3 133 0 43 157 2 134.00 22 81.79 CHANGED spplhphLppuGWh.......tsRplshshhhpthtpt.....ua.lhssstp..FLpcFGsLplt.......................tppssstshphsPhhsh.....tpppphpphtph.....lssplhPlGpht...ssthhLhls....-sGplashtp...........thhhlGsshppulpsllpupt.t ................................................tl.thLptuGWh.......psc..p..h..s...hsh......hhcthtpt..........tatl..hs..ushc..hhpcaGuLpht..t....................tpphsstshthcPhpsh.............h.tpphtphtpt.....ls.pplhPlGptt.....psphhlhls....ppGclYuh.cs.............h.hhhGsshppulppLltut..t....................... 0 11 30 39 +14282 PF14434 Imm6 Immunity protein Imm6 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted immunity protein, with an alpha+beta fold (mostly alpha helices). The protein is present in polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 25.00 25.00 30.70 30.60 21.50 20.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.36 0.71 -4.41 29 64 2011-09-09 19:11:10 2011-09-09 20:11:10 1 1 38 0 18 54 0 119.00 28 74.73 CHANGED hspstahLsluptlhshl.cppp.p...........ph..ccuL-tCW...pal-s.cphsuDpLYshL-s.pD.pslhhahp..pc.ccpp.shWssIhsAluhsuhhA.YphEscca.lPpslEslD.-ppl-hahpphpchh .................h..hstahLtlu-tlhshlppppht...........phh.ccuL-tCa...pal-s.cphsuDpLYshL-s.tD...pslhhahp..p-.ccpt.shWssIhpuluhsuh.A.Yph-sc+a.lPpslEslD.pphlchahpshpch.................. 0 5 7 10 +14283 PF14435 SUKH-4 SUKH-4 immunity protein Zhang D, Iyer LM, Aravind L la_psag Manual Family This family belongs to the SUKH superfamily and functions as immunity proteins in bacterial toxin systems [1]. 25.50 25.50 25.50 25.50 25.20 24.40 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.03 0.71 -4.56 32 180 2012-10-01 20:46:44 2011-09-09 20:14:07 1 4 115 0 50 183 1 163.60 18 61.38 CHANGED stppltphasttt...............lhphpptsh.....sthhp.pspcFLppsGlPpss......hhhhtss................hstlsph.h..........................stthtpahhlGpss....ts.lslct.soGpVhhlssss.......phhhlNoslspFspslhhhtphhtthtt.tt...................................tpp.pshstphppplptlDstAht.........stsaWstlhppl .......................................................................t.........s.t................lhph.ttth.....t..h...stphL....sGlP.p.......hh.hts.................h..htph...........................stphtpahhlGpsh.........ss.lslp....ssuplhh.ls.ps..............p.hhlNoslspFspslhhhtchhpthhths.....................................tpt.tphhtp..hp...p.....plttlDsssht..........tsaWs.hhpp............................................. 0 11 35 48 +14284 PF14436 EndoU_bacteria Bacterial EndoU nuclease Zhang D, Iyer LM, Aravind L la_psag Manual Family This is a bacterial verion of EndoU nuclease. It is found at C-terminal region of polymorphic toxin proteins. 25.50 25.50 25.70 25.70 25.40 25.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.70 0.71 -4.30 57 308 2011-09-09 20:44:37 2011-09-09 21:44:37 1 33 244 0 65 277 3 120.20 21 21.59 CHANGED sspshpHlhpG-hspt........thsGhHttsth..................sp.l.p.ht.........ss.pGlhpsphph.t.stt.............tstoohFPpp..WospclhstlppAhpspptpt.................ssphtutsss..............Glplc.shhpsp....GplhosaPhh ..................................................................s.....tHh.tsp..t..........h.GhHt.s.....................ht.lhttht..........pGlhphphph.t.stt...................tstpohFPcs..WocpcIhptlppAhpstthht.................sphhtstsps..............Gl+lc.shhcst....GplhohaP..................... 0 13 31 47 +14285 PF14437 MafB19-deam MafB19-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Neisseria MafB19 [1]. Members of this family are present in a wide phyletic range of bacteria and are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 28.90 28.90 29.00 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.99 0.71 -4.56 20 53 2012-10-02 00:10:39 2011-09-09 23:23:34 1 17 53 0 8 55 3 145.10 37 12.83 CHANGED hsshsphtshp.tlG.shstt...............tsshsslApscIsGppFhssNpsu+s...lus....spsshhssplthp.t.........spsshsssHAElsslQQAa-t..Ghsh.GpshshhVs.+-lCshCp....uslsshAcchGLcpLslaspt..oG..pshhasss ...........................................h...................................psp.pVlAcusIsG..cpFhDsNQoA+....lus..sspPTLhutplphchp.....t..tPN.sshusuHAEIulIQQAYsA..GhTt.GtsMshhVsGK-VCuaCp....GslsuhAcpoGLcpLslpsss..oG..pohYap.................. 0 3 3 4 +14286 PF14438 SM-ATX Ataxin 2 SM domain Anantharaman V, Eberhardt R la_psag Manual Domain This SM domain is found in Ataxin-2 [1]. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.49 0.72 -4.05 52 367 2012-10-01 22:42:21 2011-09-11 08:17:31 1 11 227 28 216 462 1 75.30 29 8.85 CHANGED cRhhahhssllGppVpVplpsGsh.YcGlhpohssp...........phsllL.chs+hlsss.t..........ppstspphhcslllpspDllp ...........RhlahlssllGppscVp.l+.sGsh.YcGlF+ohos............chslsL.chs+chsspss.............ssspppsls-ohlhpssDls...................................... 0 66 108 163 +14287 PF14439 Bd3614-deam Bd3614-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Bdellovibrio Bd3614 [1]. They are typified by a distinct N-terminal globular domain. The Bdellovibrio version occurs in a predicted operon with a 23S rRNA G2445-modifying methylase suggesting that it might be involved in RNA editing [1]. 38.50 38.50 39.20 130.90 37.70 37.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.65 0.71 -4.31 4 4 2012-10-02 00:10:39 2011-09-11 22:36:34 1 1 4 0 4 4 0 124.00 36 37.69 CHANGED pshacRDRcVsAhLlssp.GtlhssAsNoNucNtsLHAEhNLLhPhhhtp.......................t+sl.sGspLhVTLQCC+MCAAhlsthu-t.ut...hcVsYhpEDsGuLARcTtLcp+GhEp+hst .pshacRDRcVsAhLlssp.GtlhssAsNoNucNtsLHAEhNLLhPhhhtp.......................t+sl.sGspLhVTLQCC+MCAAhlsthu-t.ut...hcVsYhpEDsGuLARcTtLcp+GhEp+hs.. 0 2 3 4 +14288 PF14440 XOO_2897-deam Xanthomonas XOO_2897-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Xanthomonas XOO_2897 [1]. Members of this family are present in a wide phyletic range of bacteria and are predicted to function as toxins in bacterial polymorphic toxin systems [1]. The Xanthomonas XOO_2897 lack an immunity protein and is predicted to be deployed against its eukaryotic host [1]. 32.60 32.60 33.40 36.00 31.40 31.40 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.97 0.71 -4.39 16 52 2012-10-02 00:10:39 2011-09-11 23:02:42 1 9 41 0 20 57 0 113.50 36 17.59 CHANGED h.s.sshshsG.......sptsshhtpss................pphlhtp........................................tssHuEcpllpplpp....tsltPsplhclYoEhpPCst.t....Ctphlcs.h.ssscloaohs..aGt-ttst...hppuhspLhp ...................................................................s......s.hG.......s..sshhhps...............t.tp.hhtpu.t...................................ssssHsEhphhppLcs.......hsVsPppVlELaTELEsCchPGsh....Cuchl+cpa.PpsRlopsss..YGsDpsu...RppGhppLl.t.......... 0 5 14 19 +14289 PF14441 OTT_1508_deam OTT_1508-like deaminase Iyer LM, Zhang D, Aravind L, Eberhardt R la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Orientia OTT_1508 [1]. Members of this family are present in a wide phyletic range of bacteria,including several intracellular parasites and eukaryotes such as fungi, Leishmania, Selaginella, and some apicomplexa. In bacteria, these deaminases are predicted to function as toxins in bacterial polymorphic toxin systems [1]. Versions in intracellular bacteria lack immunity proteins and are likely to be deployed against their eukaryotic hosts. Eukaryotic versions are predicted to function as nucleic acid (either DNA or RNA) deaminases. Among eukaryotes, some fungi show lineage-specific expansions of this family. Many fungal versions are fused to a distinct N-terminal globular domain. Various fungal versions are fused to domains involved in chromatin function. Apicomplexan versions are fused to tRNA guanine transglycosylase domain [1]. 25.00 25.00 25.10 25.40 24.80 24.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.54 0.71 -4.39 23 207 2012-10-02 00:10:39 2011-09-11 23:54:14 1 6 86 0 171 219 0 165.50 15 29.16 CHANGED htttthphtshph.sht.....................phts.thhtthptpt...tt.......................pshhhthppp..............................................t.psplHAEhhllcplppp...................st..YlG....sSKhsCssCphhlpths.............ttpsshhts....cspsa.tW.phss...............tpspthpphh ..........................................................................................................................hh.t..................................................................................................h........t.......................................................phpstlHAElpllpahtppt.............................hhss.pYIG.sSK.uChhCphahpths...............phhhhts.....HsplY..pW.phPs......................ht.h.......................................... 0 35 93 140 +14290 PF14442 Bd3614_N Bd3614-N; Bd3614-like deaminase N-terminal Iyer LM, Zhang D, Aravind L la_psag Manual Family This is a globular domain that occurs N-terminal to the Bd3614-like deaminases, which are predicted to be involved in RNA editing [1]. 25.00 25.00 65.00 63.60 22.10 21.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.75 0.71 -4.20 4 5 2011-09-12 09:35:36 2011-09-12 10:35:36 1 2 5 0 5 5 0 126.80 35 37.92 CHANGED puSscccDVAFLlAsssu-...u-AcssVaaApussssss.cSAlV+LlLG..........AppuusRspusuW..hRtRIhTT.tulSshDRAhVKVsApRsTth......slcscDcuAsA......st.sssDDscs........LphhDsTshAptAlpRGh.s ...puSscccDVAFLlAsssu-...u-AcssVaaApussssss.cSAlV+LlLG..........ApEuuGRspucuW..hRpRIaTT.+ALSshDRAlVKVsApRATsl......clcsDDDuAsA......st.sssDDDDDDDAs......LEhcDlT-aARtAlcRGAp.s 0 3 4 5 +14291 PF14443 DBC1 DBC1 Anantharaman V la_psag Manual Domain DBC1 and it homologs from diverse eukaryotes are a catalytically inactive version of the Nudix hydrolase (MutT) domain [1]. DBC1 is predicted to bind NAD metabolites and regulate the activity of SIRT1 or related deacetylases by sensing the soluble products or substrates of the NAD-dependent deacetylation reaction [1]. 25.00 25.00 26.30 26.40 22.70 21.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.61 0.71 -4.62 13 186 2012-10-02 00:00:35 2011-09-12 21:55:47 1 8 89 0 108 182 0 122.50 51 12.57 CHANGED .sHPs+llKFLVGp+sK.EsMAIGGpWSPSLDGsDPpsDPpVLI+TAIRssKAhTGIDLSsCTpWYRFsEl+YhRstppt.....sss+lETVVlFLPDVWshlPTcp-WcsLptshpppLscphptspppss ............pHPs+hlKFLV.G..hKu+..E..........sMAIGGpWSPSLDG..sDPppD.PsVLI+TAIRssKALTGIDL..SsCT.............pWa...RFAEl..........+YhR.stppt.............................sPs+lETVVlFhP..DlWphhPohpEW-slppthppphs.cphpt.....s..................... 0 32 49 73 +14292 PF14444 S1-like S1-like Anantharaman V la_psag Manual Domain S1-like RNA binding domain found in DBC1 [1] 25.20 25.20 25.70 25.30 25.00 24.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.61 0.72 -4.24 11 185 2012-10-03 20:18:03 2011-09-12 22:52:37 1 6 68 0 90 173 0 56.30 57 7.05 CHANGED QRVF..TGh..VTKlp-sFGFlD--VFFQhSs.VKGthPpVGDRVLV-AuYNssMPFKWNAsR ......QRVF..TGlVT+L..H.DsFGhVDE-VFFQl.........Ss.VK...G+.hP.p.VG-+VLVcAsYNPs...sh.WNA.+.............. 0 18 26 48 +14293 PF14445 Prok-RING_2 Prokaryotic RING finger family 2 Burroughs AM, Iyer LM, Aravind L la_psag Manual Family RING finger family found sporadically in bacteria and archaea, and associated with other components of the ubiquitin-based signaling and degradation system, including ubiquitin and the E1 and E2 proteins. The bacterial versions contain transmembrane helices [1]. 22.10 22.10 23.00 91.00 21.20 21.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.31 0.72 -4.02 2 3 2012-10-03 15:03:13 2011-09-16 18:03:25 1 1 3 0 2 3 0 57.00 53 61.96 CHANGED DPcSFupasCDLCpoutslutLRQCVlCGRWuCsuCWpDEYYhC+SCuGlhplh.Lc DPcSFSRYoCDLCNooaPlu-LRQCVLCGRWACuSCWpDEYYsCKSCuGIIsLHLLc.. 0 1 2 2 +14294 PF14446 Prok-RING_1 Prokaryotic RING finger family 1 Burroughs AM, Iyer LM, Aravind L la_psag Manual Family RING finger family found sporadically in bacteria and archaea, and associated in gene neighborhoods with other components of the ubiquitin-based signaling and degradation system, including ubiquitin, the E1 and E2 proteins and the JAB-like metallopeptidase. The bacterial versions contain transmembrane helices [1]. 29.10 29.10 29.10 29.30 29.00 28.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.56 0.72 -4.23 14 24 2012-10-03 15:03:13 2011-09-16 19:23:01 1 3 21 0 11 26 7 52.80 34 12.45 CHANGED .phpsstCssCGcpht.p-DlVVCPcCGAPYHRpCapchGpChphs.pssshshp. ......phpsspCshCGcphh.p-DlVlCspCGAPYHRpCapchGpChh.s.pttshph.h...... 0 7 8 11 +14295 PF14447 Prok-RING_4 Prokaryotic RING finger family 4 Burroughs AM, Iyer LM, Aravind L la_psag Manual Family RING finger family domain found sporadically in bacteria. The finger is fused to an N-terminal alpha-helical domain, ROT/Trove-like repeats and a C-terminal TerD domain [1]. The architecture suggests a possible role in an RNA-processing complex [1]. 27.00 27.00 27.00 27.60 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.18 0.72 -4.42 10 46 2012-10-03 15:03:13 2011-09-16 19:42:08 1 2 46 0 15 52 1 54.70 37 7.14 CHANGED hhhQpP-QPClhCutsuos+slsPCuHlVCcsCFDGscYSuCPlCts+l-sucPF ...................h.p..hshlhshhsspp.sshLsCGHl.I.PcsTFPh..-RYN...GCPFCGsPF-sus........... 0 10 12 14 +14296 PF14448 Nuc_N NUC_N; Nuclease N terminal Zhang D, Iyer LM, Aravind L, Eberhardt R la_psag Zhang D, Iyer LM, Aravind L Family This is a conserved short region that is found in many bacterial polymorphic toxin proteins [1]. It is often located before C-terminal nuclease domains [1]. 25.00 25.00 27.70 26.20 18.80 17.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.92 0.72 -4.42 2 47 2011-09-16 20:53:49 2011-09-16 21:53:49 1 7 29 0 1 42 0 54.40 79 19.80 CHANGED hDpsoshh+G..ElL.DGSVhR.uGTNaStphpEAHDuSKASIQSRISNLESGGVKGTtc ....hDoVoshhKG.VEIhPDGSVsR.oGTNYSGKFQEAHDASKASIQSRISNLESGGVKGTG.............. 0 0 0 1 +14297 PF14449 PT-TG Pre-toxin TG Zhang D, Iyer LM, Aravind L, Eberhardt R la_psag Zhang D, Iyer LM, Aravind L Family PT-TG is a conserved region found in many bacterial toxin proteins. It could function as a linker that links N-terminal secretion-related domain and C-terminal toxin domain. It contains a TG motif [1]. 23.50 23.50 23.50 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.43 0.72 -4.20 37 309 2011-09-16 21:38:00 2011-09-16 22:38:00 1 46 140 0 52 270 1 81.40 27 17.49 CHANGED pppthshhh-hss..lushp-shcshpGpD..hTG-cl.shh-Rshuul.hh.uh.hs..huKlsphstth.thh....+uhcsstcsp ..............t.hchsh-hss.ElsGhhDhtRshsGhD.PsTGE+L.osh-Rl.h.Aushslhuh.hP.....hG+suphsp.sshths........+shtts....t............................................................ 0 11 25 33 +14298 PF14450 FtsA Cell division protein FtsA Bateman A, Mian N, Griffiths-Jones SR, Anantharaman V la_psag Pfam-B_1177 (release 5.4) Domain FtsA is essential for bacterial cell division, and co-localises to the septal ring with FtsZ. It has been suggested that the interaction of FtsA-FtsZ has arisen through coevolution in different bacterial strains [1]. The FtsA protein contains two structurally related actin-like ATPase domains which are also structurally related to the ATPase domains of HSP70 (see PF00012). FtsA has a SHS2 domain PF02491 inserted in to the RnaseH fold PF02491 [2]. 31.80 31.80 31.80 31.80 31.70 31.70 hmmbuild -o /dev/null --hand HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.43 0.71 -4.27 219 7122 2012-10-02 23:34:14 2011-09-17 14:26:47 1 10 3572 5 1490 4911 2477 177.60 22 77.84 CHANGED lshlDlGuupsshslhct.....sshtt..........hpllslG.............ustlo..cclupul......ppslppAEcl...chphu..................ss..hh.........sthp.tppphp.lsh................t.spplsppclscllpsthp..........Ellchlspph........hpssh.ht...p.hssshVlTGus.utlpsl.tchspchh...............lcls..t....................hltsP........aA.ouhul ....................................sslDIGsupssshlhct..........t.t.tt...................hsllshG................ssh....ls........ccl......u......pul.............pp.s.h......p......p.A......Eph...ch.p.h.u.......................ss..hsslsupp.phtsspshsshsppthpppthphh.....................................................thh.ch.phhcp.hshtshch.stuhllTGusuhlpsl.hcsspcsh........................l+ls..p.............................hl.hp.P...husuhu................................................................................................................................................................................................................................................................................................ 0 491 953 1235 +14299 PF14451 Ub-Mut7C Mut7-C ubiquitin Iyer LM, Burroughs AM, Aravind L, Eberhardt R la_psag Manual Family This member of the ubiquitin superfamily is found at the N-terminus of Mut7-C like RNAses, suggestive of an RNA-binding role [2]. 23.00 23.00 23.00 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.45 0.72 -4.46 22 253 2012-10-03 10:59:06 2011-09-19 19:24:29 1 2 245 0 99 309 28 79.60 36 34.19 CHANGED MssssFRFatELNsFLs.spRp+thupssscsATlKHhIEALGVPHTEVthlhVNGcssshs+hlp-GDRlsVYPphcshc ..............................lphRhasELscFls..h.t.t.R..t......t.s.ht.p.....sht.sssTlKcllEuLG..V..PHTEVsllL....VN....Gcsss...a....s....a....h....ht....sGD+luVaPhhcs..................... 0 31 68 89 +14301 PF14452 Multi_ubiq Bac_multiUb; Multiubiquitin Iyer LM, Burroughs AM, Aravind L, Eberhardt R la_psag Manual Family A ubiquitin superfamily domain that is often present in multiple tandem copies in the same polypeptide. Members of this family are associated in gene neighborhoods, or on occasions fused to, bacterial homologs of components of ubiquitin-dependent modification system such as the E1, E2 and JAB metallopeptidase enzymes and a distinct metal-binding domain [1]. The E2/UBC fold domain appears to be inactive. The JAB domain in these operons is usually fused to the E1 domain [1]. 24.50 24.50 25.20 24.70 23.70 23.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.88 0.72 -4.00 24 160 2012-10-03 10:59:06 2011-09-19 22:07:54 1 7 84 0 69 158 3 72.20 21 64.51 CHANGED pshphplsscphshscshloGpplhtlAthssss......phshhphh.ssppcslt.s-sV-LtppGhc+Flshpsc .......hphtlsscphphscstloGpplhplAthssss......thsh.hphh....s.sptpplt.s-s.l-Lp....ppGhE+Flshp............. 0 22 43 59 +14302 PF14453 ThiS-like ThiS-like ubiquitin Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the ubiquitin superfamily that is often fused to the ThiF-like (E1)- ubiquitin activating enzyme and is present in gene neighborhoods with components of the thiamine biosynthesis pathway [1]. 26.00 26.00 28.30 27.10 25.60 24.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.46 0.72 -4.12 8 166 2012-10-03 10:59:06 2011-09-19 22:35:31 1 2 166 0 20 104 3 56.80 56 21.39 CHANGED M+IhlNEcplsVc-ssoLatl+sphKs-ADVlIlNGFPlp-DhtLp-sDclhhI+RG ..................................MRlKFNGKELDTchpTSL-FFcslSKN..END..VW.IINGFATKENItLpENDELFCIE+N. 0 7 19 20 +14303 PF14454 Prok_Ub Prokaryotic Ubiquitin Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A Ubiquitin-superfamily protein that is present across several bacterial lineages, and found in gene neighborhoods with components of the ubiquitin modification system such as the E1, E2 and JAB proteins, and a novel alpha-helical protein, which is predicted to be enzymatic [1]. 25.00 25.00 25.20 27.80 22.00 22.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.88 0.72 -4.32 7 94 2012-10-03 10:59:06 2011-09-20 00:17:57 1 1 75 0 32 85 3 66.90 41 79.77 CHANGED lphppLpRsFpa....NGhsLsDPsPpho.cpV+DFYSspYPELhsAsl-GPpscsshtpYoF++usGs ....hplpsLpRsFpa.....ss..l.p..LsD.P.ss.shSs-pVh-aYosp.YPELTTAsVcGPpl..cs-ptlYpF+pslGs..... 0 7 22 29 +14304 PF14455 Metal_CEHH Predicted metal binding domain Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A predicted metal-binding domain that is found in gene-neighborhood associations with genes encoding components of the bacterial homologs of the ubiquitin modification pathway including the E1, E2, JAB metallopeptidase and ubiquitin proteins. The domain is characterised by a conserved motif with a CxxxxxEYHxxxxH signature. 25.00 25.00 95.30 30.90 22.90 22.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.97 0.71 -4.87 5 8 2011-09-20 03:51:40 2011-09-20 04:51:40 1 2 7 0 5 11 0 143.90 36 90.99 CHANGED VDPAVSRAKFDR-IGpFcspAsAYRtQGCFLIEAsFPTAFFIFAsPKV+PplIGAAVEIDFoNYDLRPPSVVFVDPFTRpPlARKDLhLsMLRRPpLPGTPP-MISVLhQQpALSLsDFLQANSsEcTPFLCMAGVREYHDNPAHSGDsWLLHRGSGEGCLAFILDKIIKYGTGPVE ..............Vc.tlSRApF-RpluphpspssuaRhpGhhLlpApaPsh.hIFssspltP....htlclshsNaDhpPPSVshlDs.Tt.............ct.tLstsPPthhsshht..........pApssptpPFlCMtGsREYHspsuHoGD.W.haRGSG-hsLuhIL-pIh+hhpssl........... 0 1 3 4 +14305 PF14456 alpha-hel2 Alpha-helical domain 2 Iyer LM, Burroughs AM, Aravind L la_psag Manual Family An alpha-helical domain found in gene neighborhoods encoding genes containing bacterial homologs of components of the ubiquitin modification pathway such as the E1, E2, Ub and JAB peptidase proteins. 25.00 25.00 34.60 28.10 24.70 24.10 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.62 0.70 -5.66 11 36 2011-09-20 04:45:34 2011-09-20 05:45:34 1 2 31 0 19 34 1 298.60 19 85.51 CHANGED lsslsLuhhctGshptu-l..s.ssss-hhtpslptWhcsphsshthhs.............t.sssss.hhppsssss.ps.hhhshshssp.lahl....csplpsLcuspPsLupslhsllpcuuups.htlhoPshhhshhuhhaWcucs...Dpcpshphhctps-..sEtthpthh.s.sh.................hhphhsthspphhph.pptphphhtppcshssplssAlscltshlpctp.c.p.s.hshssst.tp...........sltsushltWc-s-hspcllDDahEhttpuusuppt.shl.ls..ssspulpphhcshcthhphhtul-cLLshlup ............................................hhhhhhptsslp.sDl..s.ssstchhppuhptWhpcpsushphlphth.........ht.stsththhpsssc.ss.ps.lahshshsp...s.latl....tsplcsLcsspPsLutTslsllpcAuths.h.lhTP.shhhshhuhha.WcG-s...D--sshphhpcp.t-s..s.tphpthhsu.sh.......t.........hhphh.chhtpshph.t.pp...hphh..tpppshhtpl.....ss.....tlt.....plts.........h.lptsphp.....thhtsshpsp...........shh.hshlthpp.s........-hstcllDDah-hhspus.spsh.shl.ls..ssspultp.hpchptthphhptlDpLlshlu........................... 0 4 10 16 +14306 PF14457 Prok-E2_A Prokaryotic E2 family A Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in several bacteria. The active site residues are very similar to the eukaryotic E2 proteins [1,2]. Members of this family are usually fused to E1 and JAB domains C-terminal to the E2 domain. The protein is usually in the gene neighborhood of a gene encoding a distinct metallobetalactamase family protein [1]. 25.00 25.00 27.40 26.70 22.90 22.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.68 0.71 -4.78 12 50 2012-10-02 15:28:41 2011-09-20 18:18:15 1 5 48 0 22 55 8 146.50 22 21.40 CHANGED ttchhtshhtptsstsht.htspttptshthphhVths.psshsstssssI+phEsl..hhhssshshphP.shhLRpDFPhp........sH.p.sh.sps......ss.Clh-ssh.....s-hhhphsh.....pullpplhhWLpcuApGsLhpsspshEPhhhsstss.hlhs.shthtttss ...................................h................ht..th.t..p.ht..t.h.phhl.hsspsthts.ssssl+phEpl.hlhhsss..hshssP.s.shRpDFPsp.......hPHlpss..st.......hplClacssh.....s-h.hshuh.....puhls+lhtWLpcAAtspLpp.ppshEshhh.s..t.lh..........s.......... 0 9 16 20 +14309 PF14460 Prok-E2_D Prokaryotic E2 family D Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in several bacteria. Members of this family lack the conserved histidine of the classical E2-fold. However, they have an absolutely conserved histidine carboxyl-terminal to the conserved cysteine [1,2]. Members of this family are usually present in a conserved gene neighborhood with genes encoding members of the Ub modification pathway such as the E1, Ub and JAB proteins. These neighborhoods also contain a gene encoding a rapidly diverging alpha-helical protein [1]. 25.00 25.00 28.90 28.50 20.40 20.10 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.22 0.71 -4.80 17 151 2012-10-02 15:28:41 2011-09-20 19:31:56 1 1 122 0 43 142 4 171.90 23 72.96 CHANGED shpshhphlctlspp.....hshssalss.ssLs......hussthhWWs...........PsspRtlaFcst........thtppstslPhPuLVFhsstp..........shhVaAl+ss.cPs.sTpLapsPaaNVappGplChGs..splP.ctssssphpsWcttFFsSt..............FTHPNstspphhht......shshapchh-sph.psFPpssLlshc .............................................t..............t..........t.hh.s.shLh.........huspphlWas...........PstpRplaFtp..........hthtstpsshPsllatss.tp..........shpVaAl+s...s.cP.stsT.LapsPahNV.....h.s.p.GplChGs..sp..l..P...chp.s...hpt.lptWcphF.a.sSt..............Fo.H.ssspp............hhphhtphhct.t..psFstphLh...t......................................... 0 11 31 39 +14311 PF14459 Prok-E2_C Prokaryotic E2 family C Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A divergent member of the E2/UBC superfamily of proteins found in bacteria. Members of the family contain a conserved cysteine in place of the histidine of the classical E2/UBC proteins [1,2]. Members of this family are usually fused to an E1 domain at their C-terminus. The protein is usually in the gene neighborhood of a gene encoding a JAB peptidase and another encoding a predicted metal binding domain [1]. 25.00 25.00 83.70 82.40 18.40 17.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.59 0.71 -4.07 4 5 2012-10-02 15:28:41 \N 1 1 5 0 3 5 0 129.40 59 25.17 CHANGED AFDDQAASCAEGQATLDLAVRLLARLYPVLAILPLGSAAShQAQALERLAKSINPKlGIRRSGKS.AolClVAGsTRPsLRCPTFFhGSDGWAAKLSRTDPVGSGSSLLPYGAGAASCFGAANVFRTIFAAQ .....AFDDQAASCAEGQATLDLAVRLLARLYPVLAILPLGSAAShQAQALERLAKSINPKIGIRRSGKS.ATlCVVAGsTRPsLR.CPTFFlGSDGWAAKLSRTDPVGSGSSLLPYGAGAASCFGAANVFRTIFAAQ.. 0 0 2 3 +14312 PF14461 Prok-E2_B Prokaryotic E2 family B Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in several bacteria. The active site residues are similar to the eukaryotic E2 proteins but lack the conserved asparagine [1,2]. Members of this family are usually fused to an E1 domain at the C-terminus. The protein is usually in the gene neighborhood of a gene encoding a member of the pol-beta nucleotidyltransferase superfamily [1]. Many of the operons in this family are in ICE-like mobile elements and plasmids [1]. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.60 0.71 -4.40 15 108 2012-10-02 15:28:41 2011-09-20 21:03:45 1 3 103 0 27 835 18 129.40 22 23.12 CHANGED t.....h.pasps.sphhttsphsppssptshhtphhht.............pssshslpLVhscp.FshlPstlhls.-.pph...l.sHhts.s...GtLClh...ptsh-papssusst-ll.cphpplLp.pshsuss......ptchhuEasuYWptppssh........hh ..........................................................................thhhh.h..h...................................putshsltlsl.ss..a..t.h.P.th...h.lp...p.s..cphh.......hl.PHlph..s.....upLChhsp.psshs.sps....cush.t-s....h.cphpthLc..sulsusp............cs-htsEFtuYWptppp....h..................................... 0 7 18 21 +14313 PF14462 Prok-E2_E Prokaryotic E2 family E Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in diverse bacteria. Analysis of the active site residues suggest that members of this family are inactive as they lack the characteristic catalytic residues of the E2 enzymes [1,2]. They are usually fused to or in the neighborhood of a multi/poly ubiquitin domain protein. Other proteins of the ubiquitin modification pathway such as the E1 and JAB proteins are also found in its gene neighborhood along with a distinct predicted metal-binding protein. 25.00 25.00 25.70 25.40 21.80 21.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.50 0.71 -4.53 5 36 2012-10-02 15:28:41 2011-09-20 22:59:24 1 4 35 0 15 40 1 123.80 31 52.33 CHANGED LPEsDccYL-o+GaTaEsVsDGu++Glll+pFpLPpG+FssspVDlLllLPsGYPDssPDMFYlpPsL+LVsGs+hPpAo-sscpFpG+sWQRWSRH...NssWRsGlDGlaTMLKRVEcALcsAs ...................L.ptDppaLcshGhpa....Eslt-uutphll.lcsasLP.......pG..Y.....stsp.........s-lhlhlPs.uY.Psstl.DMFYspPsL.......phs..sGtt.......lP.s.....sp.sscshp...G+sWQRWSRH...pssWcP...shDslhTalthl-psLttth......... 0 4 8 11 +14314 PF14463 E1-N E1 N-terminal domain Iyer LM, Burroughs AM, Aravind L la_psag Manual Family An uncharacterized alpha/beta domain fused to E1 proteins. This protein is usually present in gene neighborhoods with genes encoding a JAB protein and a predicted metal-binding protein. In related E1 proteins, the E1-N domain is replaced by an E2/UBC superfamily domain [1]. 25.00 25.00 46.30 45.70 24.60 21.70 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.87 0.71 -4.62 2 3 2011-09-21 18:21:18 2011-09-21 19:21:18 1 1 3 0 2 5 0 148.00 34 32.41 CHANGED M.pssQpNAhMLAulLGssEs-AuERLsRsVLlTAsPGhtsuhhst.sthlh.RTVtVs.pp.sp-spLELVIG-sssRTsh.RlaAslsutGAssslcPVu+huG..PHsLhAAsAACssuAssl+hllDsssLPtsthPhRLDasQLGVP M.pssQpNAhMLASlLGlsEsE.AuERLsRoVLlTAPPGsssA.thAp-VtALLuRTVsVVtpSsss-PsLELVIGDVsPRTS.usRLYAuluSsGATlulcPVuRsuGP.PHuLLAAlAACsVSAAVl+tVlDss-LPtVchPLRLDFDQLGVP. 0 1 1 1 +14315 PF14464 Prok-JAB Prokaryotic homologs of the JAB domain Iyer LM, Burroughs AM, Aravind L la_psag Manual Family These are metalloenzymes that function as the ubiquitin isopeptidase/ deubiquitinase in the ubiquitin-based signaling and protein turnover pathways in eukaryotes [1]. Prokaryotic JAB domains are predicted to have a similar role in their cognates of the ubiquitin modification pathway [2,3]. The domain is widely found in bacteria, archaea and phages where they are present in several gene contexts in addition to those that correspond to the prokaryotic cognates of the eukaryotic Ub pathway. Other contexts in which JAB domains are present include gene neighbor associations with ubiquitin fold domains in cysteine and siderophore biosynthesis, and phage tail morphogenesis, where they are shown or predicted to process the associated ubiquitin [2,4]. A distinct family, the RadC-like JAB domains are widespread in bacteria and are predicted to function as nucleases [5]. In halophilic archaea the JAB domain shows strong gene-neighborhood associations with a nucleotidyltransferase suggesting a role in nucleotide metabolism [5]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.66 0.72 -4.46 111 2146 2012-10-10 14:49:21 2011-09-22 01:37:47 1 14 1332 8 470 3074 346 105.60 29 49.36 CHANGED stphhptlhtpupps.....hPpEssGlllGpt.....................tpphhhhthh.......................................ssppphphht.................ttpppshphluhaHSHPssss...hPSppDtphstt..................shhllssh............th.........shphh ...............................................................................................................................hp...h.ptlhtp.upts.....hP.tEsCGhlhtp............................ptp.hh.h.s.hs...............................................................hcPpt.hhcht.................................hts-hps-lVulhHSHP..s.....uh.....s.....h.S...csD+ch.phpss............hsaallsps.....................ttpthtsh..................................... 0 149 304 409 +14316 PF14465 NFRKB_winged NFRKB Winged Helix-like Godzik A adam de novo domain prediction, confirmed by X-ray structure determination Domain This domain covers regions 370-495 of human nuclear factor related to kappaB binding (NFRKB) protein. 22.20 22.20 22.20 28.60 21.30 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.45 0.72 -4.17 10 81 2011-09-23 07:25:46 2011-09-23 08:25:46 1 5 64 2 52 80 0 100.70 48 8.17 CHANGED FFSLLRDllsSTspH..R..Lo..h..LcppLpsWp.ps...PsusLN-Wao.pss.D.WopLLpSAlpFLoG..-sss.hPs-FVPYlEaKsphs.YQWIGAuRDSD.ucLssLCphW .......................FFSLLh-llhhcupso..Ls..h..LE-+VhcWQ.uS.....PASsLNsWaS.hsP.s.WucLVhsALpaLAG.........-sps....lP.S.....uFsPaV.EaK-csQpW+hlGt..upDs-.K-LuALhQlW........... 0 12 15 32 +14317 PF14466 DUF4425 Domain of unknown function (DUF4425) Godzik A adam BVU_3708 from Bacteroides vulgatus, JCSG target GS13500 Domain A small family of bacterial proteins, found in several Bacteroides species. Structure determination (NMR and Xray) shows an immunoglobulin beta barrel fold.\ Multiple homologs have been found in human gut metagenomics data sets. 27.00 27.00 28.60 111.90 24.60 23.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.59 0.71 -3.98 7 32 2011-09-24 22:47:10 2011-09-24 23:47:10 1 1 30 5 3 26 0 119.80 39 79.95 CHANGED tslslhhullhVSCus-Dspsss..psula+IslctSGc.cshthulhlssss.....sKlhsE.sGp.lupShss...chhss+hohpTstpsh.hTstGslhSppc..ustpLphhV..YhDGKEV.p. .pslhlshuLlhluCsc--ppsss...+uGhY+IslpQSGshcSFcsSVslsuss....ss+Lhs-.sGc.lusuhSls..EhtosKsohsTstsuh.hTsuGulhStp-..uscpLplslhsYpDGKEVp+p. 0 1 3 3 +14318 PF14467 DUF4426 Domain of unknown function (DUF4426) Godzik A adam Pseudomonas aeruginosa PA0388, JCSG target SP19004A Domain Members of this entry are found mostly in g-proteobacteria, especially in Vibrio. Strangely enough, there seems to be one eukaryotic homolog in Nematostella vectensis (NEMVEDRAFT_v1g226006), where the PA0388-like domain is fused with a domain homogous to the Methionine biosynthesis protein MetW (see below). In several Pseudomonas species, but also in Vibrio vulnificus and Azotobacter vinelandii PA0388 homologs are genomic neighbors of Nucleoside 5-triphosphatase RdgB (dHAPTP, dITP, XTP-specific) (EC 3.6.1.15) and Methionine biosynthesis protein MetW. On the other hand, in most Vibrio species it appears as a part of a conserved operon involved in possible response to stress. 25.00 25.00 79.60 79.40 19.90 17.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.33 0.71 -4.31 62 249 2011-09-24 22:49:33 2011-09-24 23:49:33 1 2 249 4 64 191 80 120.60 46 83.76 CHANGED phcphG-h-VHYsuFsSTFLsPclApsYslpRSchpullNIoV.lcps...ttsp.suh..sAploGpspNLlGptppLsF+ElcEGs.AIYYlAphsasscEphpFpIslpsssp..stslcFpQchas- ....QhcsltDh-VHYsAFNSTFLTPcVApuYpLpRusYpullNISVLDpu..ph...up.sAs..pAploGpA+N.LlGphppLsF+cV+..EGs.AIYYLAEhPhss-EhlsFsIcVcsGsc.hstpLpFsQKFYs-... 0 13 28 48 +14319 PF14468 DUF4427 Protein of unknown function (DUF4427) Godzik A adam PSYMP_19184 [Pseudomonas syringae pv. morsprunorum str. M302280PT] Family This domain is often found at the C-terminal of proteins with Pfam:PF10899 domain, for instance in STY1911 protein from a multiple drug resistant Salmonella enterica serovar Typhi CT18. 22.70 22.70 23.20 24.70 21.50 22.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.51 0.71 -4.43 3 112 2011-09-25 08:35:22 2011-09-25 09:35:22 1 2 109 0 2 45 0 125.80 91 33.30 CHANGED LSuusVKsYsccIN-YIspLYSsKDahsDsYuhEFGNAWVWIHDNQs-VTRALLQcGhVSVN+EGRYLLKlsLtuScWPLR+KEshAKaVA+WL+pRFsLEuGYFSVhGu-DYDcIPaYNssLcENHPFYNs .......LSASKVKNYADSINDYVSELYSKKDFLNDsYAMEFGNAWVWIHDNQSQVVRALLQAGMIKVNKEGRYLLDVNLASVDWPLRRKEAFASHVAGWLKHRFDIEAGRYSVpGKDcYDAIPSYETPLK-QHPFYNH..... 0 1 1 2 +14320 PF14469 AKAP28 28 kDa A-kinase anchor Jaroszewski L, Godzik A adam Q86UN6 A-kinase anchor protein 14 isoform a [Homo sapiens] Family 28 kDa AKAP (AKAP28) is highly enriched in human airway axonemes. The mRNA for AKAP28 is up-regulated as primary airway cells differentiate and is specifically expressed in tissues containing cilia and/or flagella [1]. Homologs of AKAP28 are present in all animals and in some, including mice the AKAP28-like domain are preceded by another uncharacterized domain 21.60 21.60 22.10 22.00 20.20 21.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.51 0.71 -4.54 15 76 2011-09-26 09:21:28 2011-09-26 10:21:28 1 2 64 0 52 77 0 111.90 30 49.52 CHANGED pWhTtGEFshpsulppI--FlspWpls...csWlass-.lp+--h.+uphYhhcV+aShPTtRpPlPpAoApVhFsIpVSKhcPtchPV.VoYhFEup+Lla.....Rssps.pFREpWL+cIlcoKhthh- ..............................hhoht-FphppuhppIccal.p.pWphp..........csW.latscalpccsh.puhhYhapV+aShPTsppPhPpsoAslaFhl.p.loKhcP.ph.Pl...lh......YhhEspphl+.....Rst.s...pFp-pWlcslhcsKh.hh..................... 0 22 24 36 +14321 PF14470 bPH_3 Bacterial PH domain Bateman A agb Jackhmmer:C8LJ44 Domain Proteins in this family are distantly related to PH domains. 23.00 16.00 23.00 16.40 22.90 15.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.14 0.72 -3.72 68 691 2012-10-04 00:02:25 2011-10-26 16:08:13 1 14 480 0 99 818 44 95.00 24 64.96 NEW Lp..ssEplhhhs...hu...hh.p......................tpsshllsTs+Rllhhsp..phhtt.hp.h.pshsapcIsslphc..psl............hh.sc.lphts...sspplpl...p.l..s+sss.cphhphlpp ..............................................tE.h...h....hs....hhch..................h.sthpGhhlsTNc.R..Llhhth....sh.tp...hh...h....c..ph..s..ascIpsl.chc...................hs...pp..Ihhph...sttp..l.h...cpI..ppssV.phhlchlp............................................ 1 34 63 82 +14322 PF14471 DUF4428 Domain of unknown function (DUF4428) Bateman A agb Jackhmmer:C9XSR9 Domain This putative zinc finger domain is found in uncharacterised bacterial proteins. 27.20 27.20 27.60 27.50 27.10 27.10 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.82 0.72 -4.22 24 71 2011-10-26 15:21:41 2011-10-26 16:21:41 1 8 57 0 10 70 4 49.50 28 20.98 NEW pCslCspclGh..h...pt...hclpDG.h.lCcsChpKl....ssh.....h.pshph.p.hTlpcI+ch ..CslCspclGh..ht.....KlpDG.a.lCcsChpKl....psh.....h.tshp.h.pphTlpplpp............ 0 6 9 9 +14323 PF14472 DUF4429 Domain of unknown function (DUF4429) Bateman A agb Jackhmmer:Q9L067 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and viruses, and is approximately 90 amino acids in length. This domain is often found in two tandem copies. 25.00 0.00 25.10 5.60 23.80 -999999.99 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.17 0.72 -3.69 66 192 2011-10-26 15:23:52 2011-10-26 16:23:52 1 7 87 0 61 208 3 93.40 26 61.81 NEW hsFDGpp.lplphs...htstth+pu...tG.spslslsslsuVpac.s.....sth....................................................tsGaL..Rhhhts....................ussstst.................ssppDPtsl...hhs.tcptthshhhsstVps ...............hsFDGcs.lplpap...hts.phptt..hG.-pplPlssluuVphc.P....sth....................................................tsGhL.Rhhh+s................................ussshtths...............ssspDPhsl...hhh.tcp.t.st.hsstl..t.................... 0 19 48 59 +14324 PF14473 RD3 RD3 protein Bateman A agb Jackhmmer:RD3_HUMAN Family RD3 is a human protein that is found preferentially expressed in the retina [1]. Mutations in RD3 causes Leber Congenital Amaurosis type 12 [2]. 27.00 27.00 31.20 30.80 23.20 18.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.61 0.71 -4.68 17 73 2011-10-26 15:48:09 2011-10-26 16:48:09 1 1 43 0 55 70 0 130.30 43 66.10 NEW hsWh.......p.hsp.s.ppsupRssp-.........lVscsLhhELphpl+ctE+.ppE+EpEtR+tc....otsDYSWLhoss...+tphplsssE+hpLEsLCu+lpPspsu.lIsRFRcllspp-spspElsplF+sVLp-hLpphccptp ..............sWh.......+.hspss..+.opRssuE.........hVh-TLMhELshpl+EsER.pcERcsEhR+hc.....TGVDYSWLsSsP...+ssaslossE+LQLE-lCuKlpPspCGslI.RFRpllAEp-P-spEVsplF+uVLp-sLcphcpEp...... 0 7 15 33 +14325 PF14474 RTC4 RTC4-like domain Bateman A agb Jackhmmer:Q59X99 Domain This presumed domain is found in the RTC4 protein from yeasts. In Saccharomyces cerevisiae, Cdc13 binds telomeric DNA to recruit telomerase and to "cap" chromosome ends. RTC4 was identified in a screen to identify novel proteins and pathways that cap telomeres, or that respond to uncapped telomeres [1]. This domain is also found in proteins that contain a DNA-binding myb domain. 21.80 21.80 24.40 23.40 20.90 20.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.65 0.71 -4.18 38 169 2011-10-27 11:39:00 2011-10-27 12:39:00 1 4 127 0 125 175 1 120.40 28 26.24 NEW ILsspp....sShY.............+shhcsphps.u+cpshphs...............t.thpphssGYYG.+Gtph.......hsptlhs+.auscLc+hAspsp........llpthGsssFsQtVLVPElhlhLltEDM......sls.......s.cc....................ARpIhc-Ss-hGshls-p ...........................................h..t...sShYhshhcpthpp..uppth.hphs.................thpph.ssGY..YG.+Gtph.............hsptlhsc..ats..t.lcphsstst........llp..hh....G.sssasptVLVPElhhhLlhEDM.........sls.........s.cp............................Acpllc-SschGplls.............. 0 28 59 104 +14326 PF14475 Mso1_Sec1_bdg Sec1-binding region of Mso1 Wood V, Coggill P pcc manual Domain Mso1p is a component of the secretory vesicle docking complex whose function is closely associated with that of Sec1p. It is a small hydrophilic protein that is enriched in the microsomal membrane fraction [1], and this binding domain is towards the N-terminus of Mso1. The yeast Sec1p protein functions in the docking of secretory transport vesicles to the plasma membrane [2]. Mso1p and Sec1p interact at sites of exocytosis and the Mso1p-Sec1p interaction site depends on a functional Rab GTPase Sec4p and its GEF Sec2p [3]. The C-terminal region of Mso1 (not built) assists in targetting Sec1 to the sites of polarised membrane transport [4]. 29.10 29.10 31.30 30.40 28.60 27.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.07 0.72 -4.55 22 111 2011-10-27 17:28:36 2011-10-27 18:28:36 1 3 109 0 80 103 0 43.90 40 17.97 NEW Sulpplp..l...p....sEpDGDTtsDTlVH+sLlpYYppp..tpsaPsWL ...................phtph.....h.....p.........sEs.D.GDoEDsThlp+sLhsYYscK..GpsFPsWL 0 13 39 67 +14327 PF14476 Chloroplast_duf Petal formation-expressed Coggill P pcc Pfam-B_480 (release 25.0) Family The members of this plant family from Arabidopsis thaliana appear to be proteins found in the chloroplast, expressed in the pollen tube during the petal differentiation and expansion stage. The function is not known. 25.00 25.00 44.50 25.00 21.90 21.30 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.03 0.70 -5.63 14 101 2011-10-28 14:10:21 2011-10-28 15:10:21 1 4 16 0 67 104 1 250.10 52 75.27 NEW KLaAlhEAVADRlEMHcNIGcQRDNWN+LLLsSlNslTLTAAsMAGLA...stuusshhALKlSSTlLasAATGhhslMNKIQPSQLAEEQRNAsRLF+QLcppIcssLulsss........spsDVp-AME+VLALD+AYPLPLLGuMLEKFPpoVEPAsWWPppcppptcppt................tttpuNGWSpELE-EMRcllpVLKtKDpp-Yl+LGclALKlNKlLAIoGPlLTGlAAluSAFlGss.....ssWAuhluVssGALAosVNTlEHGGQVGMVFEMYRssAGFFphMEEoIESslpEp-Vc+RENGElFEhKVALpLGRSLS-L+ ......................pLhulhEtssDRhEMHc.IGcQRsNWNpLLLpShN.lTLsAusMuulA....s.sussllALKhSuslLh.uAsshhhhhNKIQPSQLsEEQRNAsRLa+pLptpl.pthluhsss........optDVppsh-+VLALDpAYPLPLLs.sML-KFPpphEPAhWWPppc.ptt.pptt....................t.psNGWs.-LE.EMRclltVlKtKD.p-Y.+lGplsLplNKhLAluGPhLsGhAAhuouFlGss.......tsWushlulhsGuhAusVNshEHGGQlGMVFEMYRssuGFaphhpEsIEuslpEt-lt+RENGElFEhKVALpLGRShStL+....................................... 0 3 46 59 +14328 PF14477 Mso1_C Membrane-polarising domain of Mso1 Coggill P pcc PF14475 Domain Mso1p is a component of the secretory vesicle docking complex whose function is closely associated with that of Sec1p. It is a small hydrophilic protein that is enriched in the microsomal membrane fraction [1]. The yeast Sec1p protein functions in the docking of secretory transport vesicles to the plasma membrane [2]. Mso1p and Sec1p interact at sites of exocytosis and the Mso1p-Sec1p interaction site depends on a functional Rab GTPase Sec4p and its GEF Sec2p [3]. This C-terminal region of Mso1 assists in targetting Sec1 to the sites of polarised membrane transport, the SNARES and Sec4 [4]. 23.20 23.20 23.20 23.20 22.50 18.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.78 0.72 -3.64 7 18 2011-10-28 14:29:35 2011-10-28 15:29:35 1 2 18 0 12 17 0 60.30 31 34.86 NEW hSup......uSsslp.....+suSclpDsaNSp+DsS.hp..Gosptuh...PAps.sssustpGS.LRu ........................................................tusshp........RSSSRLQDhYNKSRQQS..hPGsGYso................................ 0 2 7 12 +14329 PF14478 DUF4430 Domain of unknown function (DUF4430) Coggill P pcc JCSG-Target:417407-SP17946A Domain Although this family has overlaps with SLBB, the majority of its sequences are unique. Several family members, eg UniProtKB:A0RGA8, that do not overlap have an LPXTG-cell wall anchor at their C-terminus, a SSF_Family 10_polysaccharide_lyase or Glycosyltransferase structure associated with them in the middle region, as shown by InterPro, as well as this domain at the N-terminus. 30.00 30.00 30.20 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.20 0.72 -3.92 123 701 2012-10-03 10:59:06 2011-10-28 15:57:49 1 52 462 4 173 592 41 69.30 29 19.69 NEW usolh......csLc.....p....pt...plpt..p...pt.........thG....alsuI...sG.....h.p.p.........c..tsss.ph.....WhapV.N.Gphssh.....Gusphpl.c.sGDp.lpat ..............................................................polhslLp.......c........st....clct..p.......s............G.....alsuI....sG....................ltp...............c..ssp.sh.......WhapV..N.Gphssp......GAsph.....pl.c.sGDplpa........... 0 62 111 129 +14330 PF14479 HeLo Prion-inhibition and propagation Greenwald J, Coggill P pcc Pfam-B_407 (release 25.0) Domain This N-terminal region, HeLo, has a prion-inhibitory effect in cis on its own prion-forming domain (PFD) and in trans on HET-s prion propagation [1]. The domain is found exclusively in the fungal kingdom. Its structure, as it occurs in the HET-s/HET-S proteins, consists of two bundles of alpha-helices that pack into a single globular domain [1]. The domain boundary determined from its structure and from protease-resistance experiments overlaps with the C-terminal prion-forming domain of HET-s (PF11558 [2]. The HeLo domains of HET-s and HET-S are very similar and their few differences (and not the prion-forming domains) determine the compatibility-phenotype of the fungi in which the proteins are expressed. The mechanism of the HeLo domain-function in heterokaryon-incompatibility is still under investigation, however the HeLo domain is found in similar protein architectures as other cell death and apoptosis-inducing domains. The only other HeLo protein to which a function has been associated is LopB from L. maculans [3]. Although its specific role in L. maculans is unknown, LopB- mutants have impaired ability to form lesions on oilseed rape. The HeLo domain is not related to the HET domain (PF06985) which is another domain involved in heterokaryon incompatibility. 23.20 23.00 23.30 23.20 23.10 22.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -9.98 0.70 -11.20 0.70 -4.58 65 227 2011-10-28 17:51:32 2011-10-28 18:51:32 1 30 76 5 198 232 0 189.70 16 32.69 NEW GlulGsh...ul..............uulFssslc.....saphlpsu.........+sau.cDhphhp.l+L-lpchRLhpWGcul.....GLhpss.tpp...........................hssthh...tptlpplLspIpplhp-s..pphpp+athptsssstt................................................................................ppphpphspp.tppp....pshhp+sp......WslhDKcp........FppLlpclpshs-s.LpsLh..Pstptpp............phtptchpth..pptpsLphLpcs ........................................................................................h.sh.ul........htlapsslp...................shphlpss.............pphs.p-hphht.hcLclpphRLhtWucss.....Gl.psttttp......................................................tp..h...tptlhplLtpltphhpph..pphp.p+ath.tst.ttt...............................................................................................................................htthpthhtp.tppp.........hphhp+hp......Ws....lhD+pp........hptllpphpthsspLpplh.....s.ttp.p.......................................................................................................................................... 0 27 88 173 +14331 PF14480 DNA_pol3_a_NI DNA polymerase III polC-type N-terminus I Eberhardt R re3 Pfam-B_853 (release 23.0) Family This is the first N-terminal domain, NI domain, of the DNA polymerase III polC subunit A that is found only in Firmicutes. DNA polymerase polC-type III enzyme functions as the 'replicase' in low G + C Gram-positive bacteria [1]. Purine asymmetry is a characteristic of organisms with a heterodimeric DNA polymerase III alpha-subunit constituted by polC which probably plays a direct role in the maintenance of strand-biased gene distribution; since, among prokaryotic genomes, the distribution of genes on the leading and lagging strands of the replication fork is known to be biased [2]. It has been predicted that the N-terminus of polC folds into two globular domains, NI and NII. A predicted patch of elecrostatic potential at the surface of this domain suggests a possible involvement in nucleic acid binding [3]. This domain is associated with DNA_pol3_alpha Pfam:PF07733 and DNA_pol3_a_NI Pfam:PF11490. 22.00 22.00 22.60 22.00 21.90 21.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.31 0.72 -4.34 58 1166 2011-10-31 08:26:54 2011-10-31 08:26:54 1 22 1163 0 129 779 4 76.00 29 5.33 NEW cpFphLhpplp..hssch...thhpsu.plp+lslpcps+pWcFplphcplLPhchapphpppLpps.Fpphs.plphplps ............ppFplLhsQlp...hssph.psthlpsu.pIc+lsVppts+hW-FHlshsplLPh-hahthpptLppp.Fpchu.psshplp.............. 0 42 81 105 +14332 PF14481 Fimbrial_PilY2 Type 4 fimbrial biogenesis protein PilY2 Jaroszewski L, Godzik A lukasz Pseudomonas aeruginosa PAO1 gene PA4555, JCSG target SP18988A Family Members of this family were experimentally shown to be involved in fimbrial biogenesis, but its exact role appears to be unknown. 25.00 25.00 118.80 118.60 22.00 20.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.41 0.71 -4.67 9 19 2011-10-31 17:44:58 2011-10-31 17:44:58 1 1 18 2 2 11 0 112.70 67 99.54 NEW MKsL.hL......LALAsPshsaAp-spTFEsuGVV.-VplEpsLVsIDtphYRLPNussps....GhPslFQV+PGSVVSaSGoVSpPhspIssIYIhKQhS...hu....cEp...tuE.ps+ .MKVLPML......LALAVPGLCWAE-PQTFEGAGVVFEVQVEKNLVDIDHRLYRLPNSTVRN....GMPSLFQVKPGSVVSYSGTVSQPWSTITDIYIHKQMSEQELAEMIEKEQPRQDGEEQPR.. 0 1 1 2 +14333 PF14482 Cut8_N Cut8 proteasome-binding domain Eberhardt R re3 Wood V Family In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome [1]. Cut8 comprises three functional domains. An N-terminal lysine-rich segment (this entry) which binds to the proteasome when ubiquitinated, a central dimerisation domain (Pfam:PF14483) and a C-terminal six-helix bundle (Pfam:PF08559), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding [2]. Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 [1]. Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome [1]. In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 [1]. Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum [1]. 22.00 22.00 23.70 23.70 21.40 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.84 0.72 -3.97 19 98 2011-11-01 13:47:55 2011-11-01 13:47:55 1 3 96 0 68 85 0 55.20 33 17.09 NEW phpspps.tpR...KRpus--.......pp..hsP....ssp.t........h.spth.........................ts++Kh+.tsplpGpP ...........ppss.ssR...KRKA--D........t.sschohSP.......osSPu........hssRsls.........................sp++hR.psplsGRP. 0 10 27 51 +14334 PF14483 Cut8_M Cut8 dimerisation domain Eberhardt R re3 Wood V Family In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome [1]. Cut8 comprises three functional domains. An N-terminal lysine-rich segment (Pfam:PF14482) which binds to the proteasome when ubiquitinated, a central dimerisation domain (this entry) and a C-terminal six-helix bundle (Pfam:PF08559), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding [2]. Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 [1]. Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome [1]. In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 [1]. Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum [1]. 22.00 22.00 22.30 22.30 21.60 21.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.54 0.72 -4.15 19 126 2011-11-01 13:49:00 2011-11-01 13:49:00 1 5 124 3 91 116 0 37.70 39 11.94 NEW LPLsRLLEsLDpspLpslLpslsppHP-lupplhptsP ..LsLsRLLEoLDpspLpslLpslsc+HP-lspEVhppuP............. 0 14 41 74 +14335 PF14484 FISNA Fish-specific NACHT associated domain Eberhardt R re3 Jackhmmer:A5PF24 Family This domain is frequently found associated with the NACHT domain (Pfam:PF05729) in fish and other vertebrates [1]. 22.90 22.90 23.40 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.26 0.72 -4.00 25 1153 2011-11-03 14:27:53 2011-11-03 14:27:53 1 57 42 0 826 627 0 71.30 49 11.79 NEW paKssl++KapslhEGsuppussshLNcIYT-LYIsEGcSspVNpEHEVh.QhEppu+ppssp-TPIpCpDIFp .............phKssLK+KapplFEGIuptGssTLLNcIYT-LYITEGtotpVNpEHEVR.QIEssS...Rp....p..............sp.-oslcs..cDlFc.................. 0 2 385 784 +14336 PF14485 DUF4431 Domain of unknown function (DUF4431) Coggill P pcc Jackhmmer:E7X0E8 Domain \N 24.00 24.00 24.50 24.00 22.90 21.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.20 0.72 -4.35 23 127 2011-11-04 10:52:06 2011-11-04 10:52:06 1 4 112 0 11 66 0 49.00 47 13.24 NEW plQLsl.sssphshhcp.....hlGKcVpVs.GclhhupouaHaTPlLLs.lspls ..........h.VQLlL.sPEcashapp.....alGK+ITlp.G+VMlAcShaHhTPVLLs.lpc..p.... 0 4 7 9 +14337 PF14486 DUF4432 Domain of unknown function (DUF4432) Coggill P pcc JCSG_target390294_A6THE6 Family \N 32.00 32.00 32.30 32.40 31.50 31.30 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.59 0.70 -5.87 63 706 2012-10-02 23:57:29 2011-11-08 15:18:22 1 2 605 3 123 495 53 292.50 33 85.99 NEW scGschlplps...uGLphsllPsRGMDlh.....csphp...Ghs.luWpSPs.s.hlsPshhs...psGhGWLcuFs.shlspCGLptsGsPs..s.-s..........ut..............h..........sLHGRlustPAcplsh.ph..p...-..p.s.htlplpGplcEsphFGtsLpLppplpsphGps...plplpDplsNpust.spsh.hLYHhNhGhPlL-csuchh..................hPs.pplsst....s...spAtp...shssapphtsPps...s.a...sEpVahhc..tuDtsG.pshshlhNtph...shG...lslpFspppLPhhs.WKshss..tsYVhGLEPu.Tshshs.+sht+cpGplhtLtPGEs+saplphpl ................................s.pGlculclpN..upG.plhllPhhG....Ih.....cA..pac...Gps..Lshpshh..p....pPt.ht.......ph.lcoas.sahh+sG....L.....ts....GsPu....s.-D......................ha..........sLHGchsssshcc.shl.ch..c...s.....tlplsGchc.shs.F.Gcca.hspsslshchuSs...hFcIp.pVTNhu.sh.shPLQhhhHhNauas.......ssAphp.............................tPs..tphhsh....N....pch.hp.........G.s.s...hpols.tPph...h...............sEhVahhc...h.sc.......ps.psthth..hsscu........ps...hss+FsospLshhTpWhhhsup......ppshshuL...Pu.Tsp.P.-.G..hhst....pt.pGp.LhpLpPtpo+sFslohu................................................................................................................. 0 40 61 87 +14338 PF14487 DUF4433 Domain of unknown function (DUF4433) Bateman A agb Jackhmmer:A0YHY7 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 201 and 230 amino acids in length. There is a single completely conserved residue E that may be functionally important. This family is distantly similar to Pfam:PF01885 suggesting these may be ADP-ribosylases. 22.50 22.50 23.30 24.20 21.90 20.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.41 0.71 -4.59 53 218 2011-11-08 17:28:40 2011-11-08 17:28:40 1 5 201 0 55 181 25 184.10 25 79.79 NEW aHIsHlcNLsSIlpsGhLhucsplhppthshssIu.spIpp+Rhtp.slsstsutplt-YVPF...YFsPRSsMLYtI.ppss.....htttp...psIlhLtssl.ptlst.....t.phsFo..susAusphschhs.............sL...ppLt.pl-Wshlpu........p.Wp.........pp+ct+QAEhLlc.pphPhphlppIsVhspshtspVpphlttts....................hp.sVplpss....WYa ...................................................................aHhTHhcNLssIl..psGhLhucstl....hshsplu.s.lhphRtt......sh.ssshhpDaVPF...Yh.s.spS.sMLasl.pttps...........pt........ttslVhLsssl.chlst...........thsasao..sssAss.sh..sphh.s..............pl......spLt..lcas.hlpt...................cpapps........t...p..s.sp.pcppAEhLlh.pplPhphlpt..lsshspphhpplpphlt.hs.............h...h.hpst..hYa............................. 0 20 35 44 +14339 PF14488 DUF4434 Domain of unknown function (DUF4434) Coggill P pcc JCSG_Target_393000_GS13553A Family \N 30.00 30.00 30.00 30.40 29.80 29.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.19 0.71 -4.52 39 420 2012-10-03 05:44:19 2011-11-09 11:13:28 1 8 375 0 32 218 17 167.10 53 52.21 NEW loGTFlp.hp.s........c...........pshssspWcpchpth+plGhcsll.....lp...............hsuhps.tshhP.oph......t..h......s...shlphhLstAc+hGMclahGlhhss....paWc..p...pshph.......p..hthspt........lhcE.lhp..haup.at..uFt..GWYlshElspt...s............hss.......ssth....ptls.ph.hcplus.......shPsh..lSsahsst ...................MKGIIWQPQNRDS............QVTDTQWQGLMSQLRLQGFDTLV.....LQ...............WTRYGD.....AFTQP.EQR..................sLLFKRAAAAQQAGL....KLIVGLNADP....EFFM.HQ...KQSSAA..............LESYLNRL.............LAAD...LQQA.RLWSA.sPG..lTPD..GWYISAEIDDL....N..............WRS............EAAR....QPLL.TW.LNNAQRLIS.DVSAKPVY..ISSFFAG.N...................... 0 10 23 26 +14340 PF14489 QueF QueF-like protein Bateman A agb PF01227 Family This protein is involved in the biosynthesis of queuosine. In some proteins this domain appears to be fused to Pfam:PF06508. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.67 0.72 -4.03 30 2821 2012-10-01 20:59:24 2011-11-09 15:46:40 1 7 2788 25 550 1754 742 79.10 46 38.24 NEW PDFAolhlcYhPspphlEhKSLKLYLhSFRscshFHEpssNpIhcDllpthcPcalcVhucFsPRGGlshslhsppsctss ...................PDauolhIpYhss.p...l-pcuLhhYLhSFRpHs.-FHEpClppIhs.DLlchh.pPchLpVau+.aT.RGGlsIsPaps.s...sh...................... 0 155 323 442 +14341 PF14490 HHH_4 Helix-hairpin-helix containing domain Bateman A agb Jackhmmer:A0YV56 Family This presumed domain contains at least one helix-hairpin-helix motif. This domain is often found in RecD helicases. 27.00 27.00 27.00 27.00 26.30 26.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.02 0.72 -4.22 189 1795 2012-10-03 02:11:09 2011-11-09 16:09:58 1 16 1618 4 349 1402 107 94.40 34 12.36 NEW shpcpps..hcclhhhLppaGlss.p..hAh+Iapp..Yu..s.....pul..cllccNPYpLsp-l......pGlGFppADpIAp....p.h......GlstcsspRlpAulhasLpp.s.t.ppGHsa .................................tppshcplhhhLpsaGlus..pluhpIapt..Yt..p.........co.l..pllcpNPYpLlcDI......cGIG...FppADplAc....p.l...................GIs.s..s...sscRl+Aulha.sLpp..sh.ppGcTY..................... 0 137 248 310 +14342 PF14491 DUF4435 Protein of unknown function (DUF4435) Bateman A agb Jackhmmer:A0YK45 Domain This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 285 and 362 amino acids in length. This domain is sometimes associated with AAA domains. 27.00 27.00 27.10 27.20 26.80 26.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.79 0.70 -4.54 54 311 2011-11-09 16:32:35 2011-11-09 16:32:35 1 5 276 0 51 265 2 230.50 19 66.66 NEW c.+lhsaVEuh..DD.h.aacslls..ph....ps.c...h...c.h.ap......................stGKcsVL.phhphh......ppt..sspshlh.hVDsDa..........Dhl.......h.tt.........ps.s...hla...pT.sYu.....IENhhs.hs.pulcclhsth.sls-..............hh.c..apphhppapctl..hslhl...a.hhhh......cpt.....phhshpps..l.ph..............ps.hphpph.hph.......phpplphp...hphpplpphh.p...th..............................c...ps...h.pplppclppl..s...ps..h.....p..hhh..pG+.....alh......hl.h...hlp.plhpph...pp..cppp .....................chhhaVEGh...cDh.aapsl...ls..ph....ps.c.....h.....p.h.h.......................stGKppVlts.thl.......psp......hspshlu...hlDsD.a.................Dhl......httt..............psp....lahT.sas..........lENhhh...hs......pslcclhs...ph.....phss.....p......th.......hc...h.p.t.hh..pp.hp....phl.hslhh.h.hhhh...ppt.............thpph..l.ph.............pphphpph...p......p.tphphp...thhpplpphh.p...th...............................p...t....h.pthppphpth.....t...ts..h..........h....hh..pGH......lhp..l.h..hhh...htp..........pp................................................................................................... 0 15 34 48 +14343 PF14492 EFG_II Elongation Factor G, domain II Bateman A agb Jackhmmer:A0YMC3 Domain This domain is found in Elongation Factor G. It shares a similar structure with domain V (Pfam:PF00679). 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.15 0.72 -4.13 129 11565 2012-10-02 20:07:24 2011-11-09 16:50:22 1 54 5577 56 3004 8396 3438 73.30 37 10.91 NEW hPcPVlshAlcP...cs...csDp-KlupuLp+lhcEDPohplppcpETspslluGMGELHL-lhl-Rl+ccatl-lplup ...........................PpPVlphu.lEPKs......psDp-Kh............sp...........ALp.+LscEDPoh+lps.....cp.E...o...u...p...pl.....luG.....h.......GELHL-lll.-.Rhc+Ea..p.l-spht.t............................. 0 1071 1868 2539 +14344 PF14493 HTH_40 Helix-turn-helix domain Bateman A agb Jackhmmer:A0YXF4 Domain This presumed domain is found at the C-terminus of a large number of helicase proteins. 27.00 27.00 27.00 27.30 26.90 26.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.76 0.72 -3.77 166 1114 2012-10-04 14:01:12 2011-11-09 17:27:19 1 26 983 0 236 851 57 91.20 26 16.58 NEW sotphThpLa....p.p..G.holc-IAp....pRs......LphuTI.sHLschhpp..Gt.....l....s....l....p....ph..l..sp.....c.....c.hppItpsh.......pp..hs...........................s...p.......pL+sl+E..tLspp.hoYtpI+lshs .................othhTaphh....p.p..G.holc-IAp....pRp......LphsTIpsHllchhhp..Gh.....h....s....h....p....ph......l.st.............-.....c..ptlh...p..hh.......pp...hp....................................s...p.........cL+.lK-..thst....p.loYhpl+lhl............................................................... 0 89 170 209 +14345 PF14494 DUF4436 Domain of unknown function (DUF4436) Coggill P pcc Pfam-B_6430 (release 25.0) Family This is a family of membrane and transmembrane proteins from mycobacterial and related species. The function is not known. 23.00 23.00 23.00 23.00 22.40 22.90 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.69 0.70 -5.09 15 176 2011-11-09 18:28:06 2011-11-09 18:28:06 1 3 96 0 47 101 5 244.90 40 77.13 NEW ushlhspo..ssshsps-suh..u-posVplslEclcossscLsVsVhVtPssuLlDschpsLssDluVRlpusss.u-lpascGphPushsss.lshsGchpsWPFDpYcoss......lss......-lhsGsGss+tshPupVchsGs.LsGWslshspsscss.........sssshplsLcRStuslsFslsIshVLIuLPslALhVAIphspsRRpFpPPhsTWaAAMLFAVVPLRNhLPGoPPhGuWIDhsVVLWVLlALVsuMllYIlsWWRph ...........................................................s..hhhpo...s...psshup..sDsTsVhl+hEpLpTltshLssplh.V.PssphlDpp.hp.sLpsDhoVp..lh.s.s.p.schpassGpLPu...hsss.lp.h.p..Gs....Pus..WPFDpYposs......lps...............plhhGu...u.............+t..h.s......pVp...hssp..L....G..Wslshs..t.Vu-us.................tssshhlsL+.....Ruhusls...........F..slsIshVL.Is.Lsslu...L.F.V.Al..QhhpGR.RpF...Q...PP...h..sTWYAAMLFAVlPLRNhLPGuP.P.....h.GuWIDhsVVlWVllALssuMVlYIlsWahch....................... 0 27 37 43 +14346 PF14495 Cytochrom_C550 Cytochrome c-550 domain Bateman A agb Jackhmmer:Q55013 Domain This domain is a heme binding cytochrome known as cytochrome c550, or cytochrome c549, or PsbV [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.73 0.71 -4.71 49 117 2012-10-03 10:02:11 2011-11-10 14:07:10 1 1 98 25 35 180 43 133.90 48 82.64 NEW hcLDc.........pscTVsL.......sssGcTlsloscQlppGK+LFsssCupCHsGGl...TKTNPNVuLs.-sLuhAoPsRDNltuLVDYhKsPToYDGpcsIuElHPuhcSuDlaPcMRsLo--DLhslAGaILlpPKlhuppWG....GGK........lYa .......................hpLscpohTVslsspGpTlsloscQlppG++LFsssCupCHsGGl...TKTNPNVuLs.EsLuh.A..T.Ps..RDNltuLVDYhKsPToYDGp-sIuElHPSlcSuDlaPcMRsLT--DLhslAGaILlpPKlsuppWGGGKh.................................................... 0 10 26 33 +14347 PF14496 NEL C-terminal novel E3 ligase, LRR-interacting Coggill P pcc Jackhmmer:E7K2H2_PDB:3ckd Domain This NEL or novel E3 ligase domain is found at the C-terminus of bacterial virulence factors. Its sequence is different from those of the eukaryotic HECT and RING-finger E3 ligases, and it subverts the host ubiquitination process. At the N-terminus of the family-members there is a series of LRR repeats, and the NEL domain interacts with the most N-terminal repeat. The key residue for the ligation step is the cysteine, eg found at position 386 in UniProtKB:E7K2H2. The LRR section sequesters this active site until invasion has occurred [1]. 23.00 23.00 23.10 23.40 22.30 22.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.43 0.70 -5.06 37 676 2011-11-10 15:41:18 2011-11-10 15:41:18 1 44 233 6 57 677 1 198.20 45 29.38 NEW WLtssstps.p..phttWpshppEpsussFsphLscLppossaptts.....shpppVhphLpthspcspLRpphFshAtssp......toCpDplsltasshchshhl....tsscssph-ss.....LlphuRphaRL-tL-plApc+ltph.t...............sD-lEVhLAapspLtcsLsLss..spcMhahslSslopsDltpApspV.........ppppspthhpalup.pshWcshLcchpssca ...................................................Wh.s.pcps.....sp.W+uF..-pEppA..ssFStFLDRLu-T.ss+pss...........sF+cQVuAW....LtpLus...........s....spL..RppsFsl..At-AT.......tSCEDR.VsLsasphcpshLV....H.pApcGhaDsch..stLlshGREhFRLE.LEsIAR-..KV+pL..h............................................h...lDEI...EVaLAaQsh....Lt-pLpLoo.hsp-MRFas...VS.GVTssDLcsAEthV.........+stEpp-FpcWhuh.WuPWHsVLcRpts-ca...................................................... 0 4 12 27 +14348 PF14497 GST_C_3 Glutathione S-transferase, C-terminal domain Bateman A agb Jackhmmer:Q7WQ90 Domain This domain is closely related to Pfam:PF00043. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.62 0.72 -3.52 199 1536 2012-10-03 01:14:49 2011-11-11 11:56:35 1 53 641 37 886 5329 1288 117.80 14 30.20 NEW tpthphsh........hs.....Whs..................................................................tptsppcl.ptlphh.....sptL...stps...............alhGsp..h..................ohsDl....s...las....hl........tshp..at.....h..................tsh.......................p......lhp........ahsplpp .....................................................................................................................t.........................................................................................................t..t....t...h...t...p...ts...p...p...c...l....p....h.h.pth..........pptL.........spps..................alhGsp.....................ohADl......s........l.au..........hL...................tsht......hs.....hs...........................pphs.......................sltpahpplp.............................................. 0 254 427 678 +14349 PF14498 Glyco_hyd_65N_2 Glycosyl hydrolase family 65, N-terminal domain Bateman A agb Jackhmmer:B5CKV7 Domain This domain represents a domain found to the N-terminus of the glycosyl hydrolase 65 family catalytic domain. 27.00 27.00 27.30 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.62 0.70 -4.68 86 1312 2012-10-02 23:57:29 2011-11-11 13:22:47 1 57 676 9 328 1063 38 230.20 29 29.24 NEW LaaspP...............Asp..................Wp......cuLPlGNGplGAhlaGsss.....pEplplN-colWoG..uspssps.s..............................s.thlttlRphlhpscht.........pupplhtphhtstst.................t..YpshGsLhls............ts...pssss.............sYpRpLDLssulssssaph..su.............spa..........pR-hFsSh...PD.........sVlV......h+l.puspsttl.shslplsss...................psssssspssplphpGph.....................tssulpaputl+l.hssuGsl...tsps................sslplp....sAsplslllsssTsapsp ..........................................................LhYppP...................Asp.................................Wp...-ALPlGNGpLGAhlaGshs.....pEplQhNEco.lWsG......ustsps.sst..............................th.hL.p.lRp....hl....p.schp...........................tAp.p.Lhppphhu..p...........................up.Y.shGclhlc........................ths...tspho.......................sYpRpL-lspAlus..ssaph..su.......................spa...........pREhFsSh.....sc...................slll......h+h.ous.t........t.p.......l..sh..s..lplsps.......................................ppthhshsssplhh.p.Gps......................................................tpsslp..at....sh..h..th...htss.Gpl......ps.s....................spltlp...sAs.psslhlsutTsat..................................................................................................................... 0 123 253 301 +14350 PF14499 DUF4437 Domain of unknown function (DUF4437) Eberhardt R re3 Jackhmmer:B2J7U5 Family This family of proteins is found in bacteria. Proteins in this family are typically between 152 and 283 amino acids in length. 24.00 24.00 24.00 24.00 23.90 23.70 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.45 0.70 -5.47 17 81 2012-10-10 13:59:34 2011-11-14 11:54:57 1 2 76 2 32 109 79 171.00 22 81.08 NEW -llhus-lcWshLNPhRG-tuPuAupLWGDRsssssoGhLV+FpcGFpSPPHIHNloYRGlVIpGtlHNDD.cAtphWhPsGSFWTQPAG....EsHlTAAcuspslhYlEIssGPYLV+PsppsFDsGERPlNlcpsNlVWLsspclsWlpss.......usphsaLWssssstthpGhhl+LPsuFpGplpopussh+AVllpGplsap..p.spspsLtsGSYhsustc..Hpl..ps-pssslYlRosGcaplp ..................................................usthusLaGD.tppushshhl+hssGapssPHhHshs.p.h.hVIpG...t...h......h...s.....s...c....s.t.......hssGuaah.PuG....t.Hhshspst.p.h....h.htpsPh...............................................................................................................................................................h............................................. 0 9 18 26 +14351 PF14500 MMS19_N Dos2-interacting transcription regulator of RNA-Pol-II Wood V, Coggill P pcc PF12460 Domain This domain, along with the C-terminal part, Pfam:PF12460 [1], is an essential component of a silencing complex in fission yeast that contains Dos2, Rik1, Mms19 and Cdc20 (the catalytic subunit of DNA polymerase-epsilon). This complex regulates RNA polymerase II (RNA Pol II) activity in heterochromatin and is required for DNA replication and heterochromatin assembly [2]. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.65 0.70 -5.13 55 354 2011-11-14 16:18:51 2011-11-14 16:18:51 1 16 268 0 250 358 4 225.50 28 24.04 NEW VpsLGphLTsp-sthRs+ulp.hLosVLppLs.ssh..LscpplplLhsFYps.......Rl..cDptslhss.LpGltuLs.phpp.......hstst...stpllculhpchps..puhhpssRhtsapllpslhppatctL......pshus-Flh.GhlphhsGEKDPRNLllsFplhp.hlhppa.s.............l.spasE-LF-lhhCYFPIsFpPPsNDP.h.sITp-DLptsLcsslsuoshFAphshPhLlEKLsSos.s..sKhDoLpsLttChpsa...sss.slppahhslWsuLKhElhps ................................ht..lts...p..hRt+.uhp.hLstll.th...ph.............Lpppp.....l.thLhtFat.s.......+l..p.Dtth.l..s..hpul.......ttL....h.tt................................hs.s.......s.plh.p.sl..h.p......phps....ps........h.t...t.p...Rhtsapllp.th..h.pp..hp..t.tl......................pths.s..p..ahh..shlp.hhsGE+DPRNLhlhFplhp....hlhppas........................................h..sthsE-lF-sh.sYFPIsFpPss...s.Ds..h...sIotc-Lt.sLptsls.usshF.A......hshPhLl-Kls...Ss.....s......sK..h.....DsLpsL...t..Ch...t....Y...s.....p..pl.ttah.plWsul+hElh..s.......................................................... 0 84 137 208 +14352 PF14501 HATPase_c_5 GHKL domain Bateman A agb Jackhmmer:B5CQB8 Domain This family represents the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.36 0.72 -4.30 80 2619 2012-10-11 19:05:54 2011-11-14 17:49:38 1 17 1244 0 315 12868 2588 102.70 24 25.87 NEW l.pshDlsslhuNlLDNAIEA....spc.........h..p..p...c..+h.Iplpht.t.pp..sh..lhIplc.N.s.....hss.....................ph......sp.....hh......oo..K.p.c.tphH.GhGlpSlcphlcKY.sG.slp..hp.hcss..hFphplhls ..........................................h.p.hDls.pll.u.llDNAI.E..u......u.tc.......................h..p.............pth....l...p...l..s..h.h.....p....p....p.......sp.........h......h.........h.......h.....l.p...Ns........hpp.............................sh..........sc...........lh..........sh....p....s......c.....s..p.....s+....G..l..G..L..p.sl...cc...l.l.c.....p...a...ss.....l..p....h..p....h...p...s..s.....h.F.p...plhl........................................... 0 158 232 280 +14353 PF14502 HTH_41 Helix-turn-helix domain Eberhardt R re3 Jackhmmer:D2A9I0 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -7.96 0.72 -4.34 19 338 2012-10-04 14:01:12 2011-11-15 11:51:03 1 2 335 0 28 586 65 47.90 75 16.35 NEW psG-RlpTIsEhucchsluhGslQsALKhLcspsAlpL-pRG+.GTal ...........KCGNRLKTIDELATECRSSVGLTQAALKTLESSGA.IRIER.RGRN.G.SYL........... 0 8 14 23 +14354 PF14503 YhfZ_C YhfZ C-terminal domain Eberhardt R re3 Jackhmmer:D2A9I0 Family This domain is often found in association with the helix-turn-helix domain HTH_41 (Pfam:PF14502). It includes YhfZ proteins from Escherichia coli and Shigella flexneri. 27.80 27.80 27.90 49.80 26.90 27.70 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.42 0.70 -5.02 20 369 2011-11-15 11:54:42 2011-11-15 11:54:42 1 2 339 2 28 144 1 210.60 73 75.69 NEW chphLLphAslsslVssMPLPYS++YEGLATGLpppFcp..slPh...hAaMRGustRlcsLpsGhYDaAllS+LAAcpalcp...cslclshphGs.poYVupHhlla+csppppI....hRlGlDssShDQplLTchhhc.scclEhVElsYsphlptlhpGpIDAslWNh-..chchps.hslphhslpp...pp..hhpcsocAVlllcp-spslppllpphVstcpllphQpcVlpschhPsY ...DNKALLoHVDINNVVCAMPLPYTRLYEGLASGLKAQFD...GIPFYYAHMRGADIRVECLLNG..VYDMAVVSRLAAESYLTQ...KGLCLALELGP.HTYVGEHQLIC.RKG...ESANV.....KRVGLDNRSADQKIMTDVFFG....s.S.DVERVDLSYHESLQRIVKGDVDAVIWNVV.AENELT.....M..LGLEA..TPLTD...DP.RFLQATEAVVLTRsDDYPMQQLLRAVVDKHALLAHQQRVVSGEQEPSY......... 0 8 14 23 +14355 PF14504 CAP_assoc_N CAP-associated N-terminal Coggill P pcc JCSG:target_417453-SP18049A Domain The function of this domain is unknown, but it is found towards the N-terminus of bacterial proteins carrying the CAP domain, Pfam:PF00188. All members that do not otherwise carry an additional Cu_amine_oxidN1, Pfam:PF07833, domain are likely to be extracellular as they start with a signal-peptide. Most other non-bacterial proteins with the CAP domain are allergenic [1]. 24.30 24.30 24.40 25.00 23.90 23.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.76 0.71 -4.46 63 757 2011-11-15 12:29:15 2011-11-15 12:29:15 1 5 529 0 63 374 0 139.40 32 41.03 NEW IGpstpplppphG.pPpRhpsspYGacWalYpppstp.Yl.luh.pc..s+VsulYssuspls..hsshclGpstpplhcphshpPphslptssppYphchscc-h.hcslhph.sshY.splahDpa.ssplsul+hlsccshhchpsYt ...................lGpshcslpppaG.pPc...Rlhssta.GachYsYppcspp.YhhVuh.tc..c+VsulYsssptls..luPlKlspppuclh.p.+hulpPEhshphspppYchEh.-c-h.hpsll+h.sclYAplaaDpp..ssplhuVphlscphhsclcPY.................................. 0 21 43 57 +14356 PF14505 DUF4438 Domain of unknown function (DUF4438) Eberhardt R re3 Jackhmmer:D2C4U6 Family \N 27.00 27.00 319.50 319.20 21.60 17.70 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.62 0.70 -5.61 31 62 2011-11-15 14:52:22 2011-11-15 14:52:22 1 1 58 35 28 65 22 259.40 51 88.24 NEW sYcVst-GpshlLPusGGITYNVplGDsshGhsGDHlEPGVSh....+s...sscp......NsuLphhuClGNpAcVlSG-AKGtpGhVTG+HGGl.-HVlVpFscEshEKLslsDpIhI+AhGQGL+LhDaP-lplhNlDPcLLc+hsIcp..ccGtLcVPVsshlPAhlMGSGlGussshpGDYDIhTsDpcssccaGlccLRFGDlVAlhDpDNcaG.RtYpcGAloIGVVVHSDChpuGHGPGlTslMTutsutIcPhlDspANIAshL ....YcVst-GpshllPusGGIoYNlplGDsshGhsGDHlEPGVSh.......+s.......sscp.NsuL.hhuCIGNpApVlSG-AKGtpGhVTG+HGGl.-HVlVpFsc-shEKlslsD+IhI+AhGQGL+LhDaP-lplhNlDPpLL-+hsIpE..ccGtLcVPVsshVPAalMGSGlGuusshpGDYDIMTsDtcssccaGlccLRFGDlVAltDpDNpaG.+tY++GAloIGlVVHSDChpAGHGPGVTslMTutsutIcPhlDspANIAshL.. 0 8 16 25 +14357 PF14506 CppA_N CppA N-terminal Eberhardt R re3 Jackhmmer:E0TM64 Family This is the N-terminal domain of the CppA protein found in species of Streptococcus. CppA is a putative C3-glycoprotein degrading proteinase, involved in pathogenicity [1,2]. It is often found associated with Pfam:PF14507. 25.00 25.00 25.00 25.20 24.90 24.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.50 0.71 -4.45 12 394 2012-10-02 15:00:03 2011-11-16 09:00:13 1 2 368 4 25 213 1 117.00 61 50.78 NEW hhsPVL+VNNRclN.sFYppoLGhKhL.EEsAhh.hus.t.ptt-+hllEESPShRTRtV-GsKKLscllIKsssPpEIEtLLApGsp.hcpLFKGppGYAFEslSPEsDhhLlHAEDDlppLp.lt- .........p.IlPsLKsNNRcLNpsFYhcTLGMKsLLEEuAFlS..LGD.Q.oGhEKLVLE.EuPSMRTR......+VEGhKKLu+llVKVpNP.hEIEulLu+s...c.u...l.....c+LYK....Gp....NGYAFElh.SPEsDLlLlHAE..DDhssLhclt.c....................................................... 0 2 7 15 +14358 PF14507 CppA_C CppA C-terminal Eberhardt R re3 Jackhmmer:E0TM64 Family This is the C-terminal domain of the CppA protein found in species of Streptococcus. CppA is a putative C3-glycoprotein degrading proteinase, involved in pathogenicity [1,2]. It is often found associated with Pfam:PF14506. 25.00 25.00 28.40 25.60 22.10 24.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.43 0.72 -3.83 13 384 2011-11-16 09:02:29 2011-11-16 09:02:29 1 4 382 4 26 177 0 99.50 49 40.97 NEW LSpFpl-slsLNVPs..c.ucuFYcs...splshsl-FppupGsDLslssslTWDLEhLEapVs.-aDlsuL+sphEs..pps..YlDK+cKlLVhoDsSpIElWFp .............................................LSpFEI.ShELplPs..c.hcuFL-s............................scl.............s........soLcFl.AQGpDLsVDNslTWDLoMLKFhVs.-hDlAuLRp+FEu..p-a..FIPKS-KFhLscDpsNlELWFE.. 0 2 8 16 +14359 PF14508 GH97_N Glycosyl-hydrolase 97 N-terminal Coggill P pcc PF10566-seed Domain This N-terminal domain of glycosyl-hydrolase-97 [1]contributes part of the active site pocket. It is also important for contact with the catalytic and C-terminal domains of the whole [2,3]. 25.00 25.00 29.50 29.50 22.50 21.80 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.95 0.70 -5.41 243 798 2011-11-16 12:22:34 2011-11-16 12:22:34 1 17 279 12 179 796 147 258.40 27 38.27 NEW lsSPDGplplsl.......shs.s............p.sYslpa.....puctllpsStLGlphptss........................hspshp.lhssppssh.ccsap.sh.GcppplcscaNElslshp.ptts..............t.htlpFRlasDGlAFRY.p..hstp.tsht.......hhl.sEtTpFsh..ssstpuah.....hs.t.t................hs.........saEt.Yppsslsph............pt.............................hsphPlhhcst..s.s..halslpEAsLhs.YsuhtLp.ss......ts.shpuths.ssts....................t.tsh.h..........psshsoPWRslhlucssssllp.osllhsLs.-P ............lpSPDGplplsh......slsts............pssYplsa........cscsllpsStLGlphpsss.............................h.spshp..lpssp.ps.s..D-sap.sh.Gcpppl+scYNElslshppsts..............tth.lpFRlasDGluFRYp......hPpp..tsht.......hhlpcEtTpFsh..s.sst..p..uahhs.s..s...................ap.................saEt.Yppst.l.sch.........pt.............................hsphPlh.hcss...c..G...halslpEAuLh...s..Ys..uhpLp.ss......pstshpuths.ssts............................s.tsh.h......psshsoPWRslhluccstsllp..oslhhsLscP............ 1 71 152 171 +14360 PF14509 GH97_C Glycosyl-hydrolase 97 C-terminal, oligomerisation Coggill P pcc PF10566-seed Domain Glycosyl-hydrolase-97 is made up of three tightly linked and highly conserved globular domains. The C-terminal domain is found to be necessary for oligomerisation of the whole molecule in order to create the active-site pocket and the Ca++-binding site. 25.00 25.00 40.20 38.70 20.50 24.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.13 0.72 -3.92 195 791 2011-11-16 12:46:07 2011-11-16 12:46:07 1 17 276 12 177 794 106 99.50 33 14.68 NEW ssWD-TchLpuc.sG-YlslAR+s.......ss.sWalGuhospsuRs..lplsLs.FLs.pG..tp.YpAplYpDG.sAsh......sspsa...plpp...ppV.sspspL.plphAsGGGhAlplh.. ....ssWD-o+hLcuc.sG-......YlslAR+p...........us.sWalGulss..p..psRs..lplsLs...FLs..tG..............pYpA....plYpDutsAph.......sspsh...........phcp............tpV..ssp.spL..plphusuGGhAlplh............................. 0 71 153 169 +14361 PF14510 ABC_trans_N ABC-transporter extracellular N-terminal Coggill P pcc Pfam-B_101 (release 25.0) Domain This domain is found at the N-terminus of ABC-transporter proteins from fungi, plants to higher eukaryotes. It would appear to be an extracellular domain. 25.40 25.40 25.60 25.50 25.30 25.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.63 0.72 -3.89 317 1186 2011-11-16 17:21:30 2011-11-16 17:21:30 1 43 200 0 805 1238 0 101.70 20 7.47 NEW Lspphsp......................................................t..............D.....s..pL..s.........spsc............s.F-.scpal+plhchhspsuhp.h....phGV.sFcsLsVpG.u..suschtsTl.sNhhhs .........................................................................................................................................................................................D.....tpL..s...........spsc.......................................p.ac.s.cpal.c..p..h...t...c.t...h.c..p..s......Ghph.....plGV.tacsLsVpG.u..sssp.h.sTl.hNhhh........... 0 147 445 691 +14362 PF14511 RE_EcoO109I Type II restriction endonuclease EcoO109I Coggill P, Eberhardt R pcc pdb_1wtd-Jackhmmer:Q9RPJ3 Family This is a family of Type II restriction endonucleases. 22.50 22.50 22.90 23.20 20.40 20.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.10 0.71 -4.91 22 53 2012-10-11 20:44:47 2011-11-17 13:56:36 1 2 52 4 15 47 3 194.80 28 76.72 NEW pl.ppscpalcppIsphHp+phppL.ppLcLhclLc.KNPaLF+.spslLous-llcullcAa.....LuSs-EThFGs.alEsLAhalsptshts.p......SshpGIDlEF.....ppDup+hhlslKSGPNhhNusplpphpscF+pstphlRos.t.shphsslsGssYGccpp.sps....................cYhchsGQcFWphlo.G-cshYhclI-slGctAcEps ..................................................t....thpphht..I...p.+thp.h.pphp..pl....NPaLac.hpsl.sup-hsculhcAh.....luouc-TsFGs.hhppLhhhlspssh.................................SshsuhDlEh.....ppDGphhllplKSGsNshNpspl....pplp....pcF+phtsphR.....shs.psl.sGlsYGcssp.sst.p.h...........................t.cYhhlsGpcFWpalo.G-cshYtclhcsls.hApEhs............. 0 5 11 14 +14363 PF14512 TM1586_NiRdase Putative TM nitroreductase Coggill P pcc Jackhmmer:Q9X1S2_pdb:1vkw Family Compared with the more traditional NADH oxidase/flavin reductase family, this family is a duplication, consisting of two similar domains arranged as the subunits of the dimeric NADH oxidase/flavin reductase with one conserved active site. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.35 0.70 -5.14 5 84 2012-10-02 14:48:17 2011-11-17 16:31:20 1 1 81 1 19 82 2 203.90 23 84.37 NEW MNIFEAIENRHSVRDFLERKMPERVKDDIENLLVKFITKKLDWKINLSSFPSYIYAKAEKHFDELVEYGFQGEQIVLFLTAQGFGTCWMARSPHPDVPYIIVFGYPRTRNFTRKRRPITSFLENDLEELPPEIVKIVEMTILAPSALNRQPWKIKYTGGELCISSERPVDLGIALSHAYLTAREIFKREPVIQKREEDTYCLILNP .......................................................................phh-hh..R+SVRpa......pctl........s......pp.......l......h.....p......pl...........p..........s..........l...................h....p....h............l.......h...c.p.......h.....t....h..p....h....p....h................s...............p..........h......sp...Y.....l...........s.hh...ucc.pt.........s...h...h.p.............p.hGYhuEplVLhhpslGLuTCW...l.........u........h........s.....................p.....t...........c.............l.........s.......h..l..I..s..hGa...s........p.s..p....sh......s..c.....+..........p..+.sh.......pp..h.hp...t.............h....p-..hs..p.......h..h..p.hlcushLAPoAhNpQ.a.......h..h.........h.p.s.s..pl...........h.........h..............................................................................................hthttt............................................ 0 13 19 19 +14364 PF14513 DAG_kinase_N Diacylglycerol kinase N-terminus Eberhardt R re3 Jackhmmer:P23743 Family This domain is found at the N-terminus of diacylglycerol kinases. 23.00 23.00 23.00 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.00 0.71 -3.97 9 283 2011-11-18 15:00:42 2011-11-18 15:00:42 1 30 72 2 119 251 0 118.70 37 20.79 NEW tpccWspLSPpEFsQLQKYsEYSTKKlKDVLcEFptsGshspYsPc-.......sIsaEGFchFMKsYLEs-.lPccLspHLFhSFps+.pp..................................ss.sscSKst.hpush+.................p.ssVlpLKDlVCYLSLLEsGR ................................................ph..loPt-FtQLQcY....-..Y.S.oK.KlpDVLptF......t..s..G...h....t....pY...tt...................sIsa-GFchFh+saLEs-.lPpchst+LFhSFppp....p............................................................................................................................s.t.....t...tp..................t..........................................h..l.LpDlsCYhSLLEtGp.................................................................... 0 22 32 67 +14365 PF14514 TetR_C_9 Transcriptional regulator, TetR, C-terminal Coggill P pcc pdb_2qtq Domain This family comprises proteins that belong to the TetR family of transcriptional regulators. This family features the C-terminal region of these sequences, which does not include the N-terminal helix-turn-helix. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.63 0.71 -4.33 4 174 2012-10-03 00:15:22 2011-11-21 15:07:58 1 2 158 5 45 121 5 127.00 28 58.91 NEW DMSPpAKLRRHlutsIcTYacYPYLpRLLhtLhRDusEtpARcIAcpYlpPLtcAYpRhIptGVtsGsFRPlDPQLFYFsVhGAsDphFSuRhVL+asaG.DplsEpLRcpYpEpsl-hIMuGlLA..tc .......................................................s.....tthphhlptl.hctYh....ch....P.h.RLl...p.s....hpp...st..tt....t...p.ppl.....lcp.....hlt..P.lscslp.sll.EcGlctGlhR.p.lDP.hhalolhuLshah.a...sp.sLttla.Gh-..hup.phhcphlctshpLlhtGh......th........................... 0 12 24 34 +14366 PF14515 HOASN Haem-oxygenase-associated N-terminal helices Coggill P pcc Jackhmmer:Q9HY91, pdb_3bjdA Domain This domain represents a pair of alpha helices, which are found at the N-terminus of some Haem-oxygenase globular domain. 25.00 25.00 123.70 121.40 21.00 19.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.01 0.72 -3.61 2 32 2011-11-21 16:17:14 2011-11-21 16:17:14 1 2 16 3 2 30 0 86.30 68 26.25 NEW hopRshS..th.....stutllA-ALtsuAohcQIp.AhpAlhAlTtKuLtGDtpAYAtYQtLLh.LplusDs.Tt.TRRWhAstlYhVE-RF .hspRshS.Pth.....ptutllA-ALtstAo.cQIs.AhptlhAlsttGLtGDtpAYAtYQtLLh.LplusDstTt.TRRWhAptlYhVE-RF 0 2 2 2 +14367 PF14516 AAA_35 AAA-like domain Bateman A agb Jackhmmer:A0YLR5 Family This family of proteins are part of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.97 0.70 -5.80 99 465 2012-10-05 12:31:09 2011-11-22 16:51:27 1 74 91 0 172 633 10 318.70 25 46.18 NEW hPpGsls.LsSsFYlERs.....PlEppsYppIhpP.Gu...LIRIKAP+pMGKTSLlsRlLspAppp....uYpoV.LshppADpsl.hssL-+....FLRWFCsslo+pLpLt.....s..pL.....--YWD..-thG..SKhsC.otYFccYLLppl........s.pPLVLuLDEVDclFpaPclApDFhsLLRuWHE.cu+....ppplWpKLRLllVHS..TElYls.LslNQSPF.NVGLsIcLscFohpQVp-LApRasLs.hstsp.....lppLMshVGGHPYLl+LALYalsp..............p...plT........LcpLLppAsTps..G.IYscHL+cphhpL.pppP....-LspAhppVlp.us.pslpL..-shtuaKLcShGLVphp..GsplhspCp.LYRpYFpc ..............................................................................h...s.l..ss...Yl.R..........h-p.hhptl.....hp..Gp...h.hhlhssRQMGKoS.L....h.hch...h.....pphp.p.p..............s.hts...s..h....ls.hp.t...h..s...s....t............hs.s...hpp.............ah.p..hh...s..tpl...sp..p...l..p..L................................t......pl.......p..ph...W.p....c..p..hs.........sh...ph.....sp........ahp..p..h..l..Lt...ph...............s..ps...l....V.lhlD....E....l..D...p...l...h....ph......s.....h..h.......p.-F...h...shlRthap....ppt.......p.ps.a.p+.Lphsl....ht....s.......Tp.s......l......h.....hc........h.......s......p......oP......F.....Nl..G.h..s.lc....Ls..sF....sh.pp....l....p.s....L....h....p....t....h....t....h.p.....hspsp......lpplh.t.hs....uG..p.P.aLspth.hhhl.sp..........................................................p....phs................lpp.l.l.pp....t.h.h...ps.......t.......s.....p...HL....cp.h......h..ppL.....p.p...............cLht....hhp......pllp.........t.....t.....p..............h..t.h..........p.....s.......t.t..h..pL....h.GLlhhp...ts.tlh.ts..lYpthFt........................................................................................................ 0 31 126 171 +14368 PF14517 Tachylectin Tachylectin Eberhardt R re3 Jackhmmer:Q27084 Family This family of lectins binds N-acetylglucosamine and N-acetylgalactosamine and may be involved in innate immunity [1-3]. It has a five-bladed beta-propeller structure with five carbohydrate-binding sites, one per beta sheet [2]. 25.50 25.50 25.50 25.70 25.30 25.20 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.88 0.70 -5.03 8 90 2011-11-23 07:54:36 2011-11-23 07:54:36 1 9 23 1 14 107 1 124.10 40 56.07 NEW auVspDhclptGssPcstsDsa...hsRAsplG+..hsshchlhhuPsGcLYuVcuu...pLYpG.pshsosuspWh.upu++IGcGuWspF+......FlhFDPsGlLYAVotsGpL..YRussPss-spsWhttpAshIGspGWssF-sLFFcPpGhLYuVs.scs..cLhKph.PPsussDcWLu.uoollupuuW.cs.s+FluFossGsLauV.cssGtl..Y..RstsPppssssYhccAphlGs.uaspa+h .....................................................................................................lYulp.ss....hYpt.....pps..ssph....h.tss.....thIGpu.WttFp......hlhh.ssG.LY..GVss.s..pF..Y+RsP..PTHuSDNWL..G..SAchIGoGGW+s...FphLh.................................................................................................................s............................ 0 6 9 14 +14369 PF14518 Haem_oxygenas_2 Iron-containing redox enzyme Coggill P pcc Pfam-B_412 (release 26.0) Family The CADD, Chlamydia protein associating with death domains, crystal structure reveals a dimer of seven-helical bundles. Each bundle contains a di-iron centre adjacent to an internal cavity that forms an active site similar to that of methane mono-oxygenase hydrolase [1]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.40 0.72 -3.87 140 559 2012-10-02 21:56:19 2011-11-23 09:10:08 1 8 409 3 220 662 254 104.50 24 27.19 NEW Lhp.....shhD.Eh.G.sGp.scp.h................HspLa.......tchhpshGlsss.h.........shhct..hs.s.....t.....sl.u.....hsNhh..hhuh.p.+.p.hh.sthlGths.s.hE....hsss..h....hpphsp....u.lc.R.h.G...h.stts...hpaas.Hlp .....................hhpshh-.E.h.G.....sGp..pp.s.................HspLa.......tcllpshGls.s.h..............................thhpt.hs..s..........t........sh..t..........hs.shhh.hhsh.p.c.phh.sthlGhhh.uhE....hhss.t...hp.phsp....sLc.c...h.G.....h.s..t...htaashHlp............................... 0 52 127 189 +14370 PF14519 Macro_2 Macro-like domain Coggill P pcc Jachmmer_A6ZME4, pdb_1njr Domain This domain is an ADP-ribose binding module. It is found in a number of yeast proteins. 27.00 27.00 27.00 27.00 26.60 26.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.93 0.70 -5.29 3 54 2012-10-02 00:07:53 2011-11-23 11:38:38 1 1 53 3 34 65 2 231.40 30 93.76 NEW MTts.s++p.hN...phRIVLCDTNEVVssLW+KhlPKuLlpssKhVClHHGsLESLhcSMRKscspH..........sGcKYAIVSPGNSFGYLGGGFDLALaNYFGGKPFETWFRscLGNRYHTVGSATVIDLpRChLsch.EsRDGIRYIIHVPTVVAPS+PIFpcupPLKTGYEPVFNAMWNSLMHuP+DlDGLIIPGLCTGYAGVPP-ISCKSMAFALoLYMLsD+ISKEL+NlLIMYYLGYPFEPFFLESCpEECQtLGIDIEpLcSFNVE+DsI-tLIP++lLs..L ................................................................................................................hp...h................................spsh...uIVSPuNSaGahGGGFDhAlh.......p.........h.F..G.....s.....+....s..h...E......s...hhR..ppl..s......sc........Y.......t....s...l...GS.sTl..lcLt.......cth.t.p.............tptp.......sl+YllcsPThlsP...pts......has.p...tshps.shp.l.....Fs.s.hW...N....sl......h.........p.s....s.p.c.I-sLllPGLsTGauGVss.lus+pMsFAlpLah...h...t..........................................................................th.................................................................. 0 6 17 29 +14371 PF14520 HHH_5 Helix-hairpin-helix domain Bateman A agb Jackhmmer:C6UUJ1 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.79 0.72 -3.80 554 12686 2012-10-03 02:11:09 2011-11-23 13:36:33 1 143 4845 77 3277 11596 4688 61.00 23 15.86 NEW p..th..............h..p...t....L..h.......s...l.........sG........lG......s.p......hu.tt...l.............hpt..........ht...............................................o...l..p....p.l....t.p..............s......s.....h..c.p.........................................Lt....p....l....................G...............................l....u..p.c...pAppl...l.tph+.c .............................................................................h....ppl..h......s......l.....sG........lG..sc......hs.tt...L...........................hsp........t..ht...............................................o....h..p....c.l....t.p......................h......s.....h..c..c...................................................Lh....p.....l...................................tG..................................l.....u..cc...sAppl...l..thc........................................................................................................................................... 0 1019 2066 2740 +14372 PF14521 Aspzincin_M35 Lysine-specific metallo-endopeptidase Coggill P pcc Pfam-B_2237 (release 26.0), pdb_1g12 Domain This is the catalytic region of aspzincins, a group of lysine-specific metallo-endopeptidases in the MEROPS:M35 family. They exhibit the following active-site architecture. The active site is composed of two helices and a loop region and includes the HExxH and GTxDxxYG motifs. In UniProt:P81054, His117, His121 and Asp130 coordinate to the catalytic zinc ligands. An electrostatically negative region composed of Asp154 and Glu157 attracts a positively charged Lys side chain of a substrate in a specific manner [4]. 30.00 30.00 30.10 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.18 0.71 -3.69 34 226 2012-10-03 04:41:15 2011-11-23 14:29:17 1 8 116 9 119 217 3 142.80 27 48.57 NEW u.Lpptsss..pp....apsWFGshssp...........+hsslhsphsshstsh.s..s..hthsCs......sst..s...sshthshPsph.tp..IhlsssFhs.hss.....sGhDSpsuTLlHEhSHFss.........shGTsDh..............sYs..pssupsLupssPspAlpNADshEhas- ..........................................................s..........t.p.....appWFGshssp...........+hspspst.hhphcpshts.tt..h...hhsss.........sp...s.......sthAhshssph.hp.....lal.ss.t.....Fhp.sss............oGp...-.S......+....suTLlHEhSHh.s...................ss..t..opDh..............sYu..ppssp.p....LApspP.spAlpNADsaphah.................. 0 87 97 113 +14373 PF14522 Cytochrome_C7 Cytochrome c7 Eberhardt R re3 Jackhmmer:Q74BP5 Family This family includes cytochromes c7 and c7-type. In cytochromes c7 all three haems are bis-His co-ordinated. In c7-type the last haem is His-Met co-ordinated [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.26 0.72 -11.57 0.72 -4.34 198 1003 2012-10-01 23:37:15 2011-11-23 15:49:51 1 52 372 25 549 1372 400 72.40 23 23.15 NEW shFsH.......ptHhp........th.....s..........Cs..sCH.sp................hh....sspt.....hshsph.......t..s............ptCss.CH..............sup.....pu..........................sssCs.pCHt .................................FsHphHht............th..s..................Cp..sCHss.............................ht.....ttph....................hphssh...t..s..........ttChs.CH..................................stp............s......................hssssCs.pCH................................................ 0 172 394 505 +14374 PF14523 Syntaxin_2 Syntaxin-like protein Bateman A agb Jackhmmer:E7Q9M8.1 Domain This domain includes syntaxin-like domains including from the Vam3p protein [1]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.27 0.72 -3.99 91 588 2012-10-03 05:55:03 2011-11-24 17:46:35 1 10 286 1 374 673 2 97.20 24 35.43 NEW luspl...hplsss....lsplp+hhpplG.Tt.....+Ds.clRcpl.c...........phhppssphhcphsptlpplsph.......................t.....pppphtppKLsp-FppslppapphQcp.htp+ppsts ....................................ustl.plsps........ssplpch.lspLG..Tt..........pDoscLRcpl.p...........phpppss.pls+cssptl+phsph..............................t.pppp+hppp+Lsp-FpssLppFQtsQ+pss-+p+t..h............................ 0 100 169 281 +14375 PF14524 Wzt_C Wzt C-terminal domain Bateman A agb Jackhmmer:A8A1Q5.1 Domain This domain is found at the C-terminus of the Wzt protein [1]. The crystal structure of C-Wzt(O9a) reveals a beta sandwich with an immunoglobulin-like topology that contains the O-antigenic polysaccharide binding pocket. This domain is often associated with the ABC-transporter domain. 22.00 22.00 22.00 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.65 0.71 -4.49 236 837 2011-11-25 13:44:43 2011-11-25 13:44:43 1 9 707 2 260 731 162 141.70 17 33.12 NEW thts.sttphGsspApI..pssplhs.tpGp.ssh...lpsG-plplplp..hph...ppslps.sl..hGhhl.+..sppG..h.lhG.sNohhpp....ttls..hht..........upphplpaphph...Lss..G.pYhlssul...................t..ptp...stp.hchhpcs.hhF...pVh.s..................spphhGlht.lss ............................................th........tpttspltpsplhs...tpsp..hp.h...lps.G-p.lplclp..hps....pps.lpp..sh..hGhtl..+.......s.ppG.........thlhG..sNo.hhps.......hplshh...........ssphphphphth....Lts..G.pYhlslul..................pppsth.h.chh.pp..s..hhh..pVhs..........p..h.Ghh.h..................................................... 0 95 178 225 +14376 PF14525 AraC_binding_2 AraC-binding-like domain Bateman A agb Jackhmmer:D9V2D0 Domain This domain is related to the AraC ligand binding domain Pfam:PF02311. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.69 230 1819 2012-10-10 13:59:34 2011-11-25 14:10:04 1 6 746 0 600 1693 67 174.60 16 52.75 NEW hcchluphhs...s......t..........clpst..s...ts..shp.....uphpthplGs..lslstlp......hs........sps......pl.....cs.sp............psh..hhlplslpGpuplp.ps.sp.pstsssup..sslhsssp.s.hp...h.p.hss.........ss.c.....plhlpls.............................c.......ph..ls.pshp.t...........sht.hs...sp.................hshs.sshu......thhtp.....hlptlhs.phsths.....s......hhpps.h..hspph...p.s.Llh.shLh ...........................................................phhsph.hs.s......h..........p..hps...s....tp...thp.....uphpt.ht.hss......lplspls........hs...............sth............tl.....ct..ss................sth..hhltl....l..sGps.t.h.p...p.s.....sp...ps.t.hs.sGc.....hslhcs..sp.s.hp...h.p..hpt................ss...c.....plsl..t.....lP.............................csh.Lp.phhs.t...........sh.h.hs...p.........................l...shs...sshs.......phhtp......h..lpplhp..phst.hs.............tt.t..t..htpth.....p.Llh..h..h...................................................................................................... 0 118 310 451 +14377 PF14526 Cass2 Integron-associated effector binding protein Punta M mp13 CATH:3gk6A00 Domain This family contains Cass2 from Vibrio cholerae, an integron-associated protein that has been shown [1] to bind cationic drug compounds with submicromolar affinity. Cass2 has been proposed to be representative of a larger family of independent effector-binding proteins associated with lateral gene transfer within Vibrio and other closely-related species. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -11.05 0.71 -4.20 24 758 2012-10-02 11:08:51 2011-11-25 15:11:58 1 8 542 1 127 2424 50 145.20 18 79.49 NEW clpchsuhslluhth..phppspt.....hptchsphapphh..p..pt....hsph.tpp...pchaulhts.t......psthshhsshssps......hsps.......hphhphPsupYhshps.p...uphs.p.lt.chatph.htth.ppp.psat........psss.s........p..hEhY........hpss................p....lElhIPV ...............................................................................................l.ph..thtlhuhtt.....hp.ptptt............tt.h.s..shapplh.....p.....ps...............hsp..l...t.t..........ssh.....a..u..l..a..p.......sh.t........hpsc.....h.s..h.........l.s.h..s..sps..........h...p.s....................hphh.p..l......s.s.c.Yhs.h.ss.p..........sphs..p..ls..p.....h..a...pth....hthh...p..p...ssht............hsss..s................s...hEhY............hpss........................s-lhlPl....................................... 0 51 88 110 +14378 PF14527 LAGLIDADG_WhiA WhiA LAGLIDADG-like domain Bateman A agb COG1481 Domain This domain is found within the sporulation regulator WhiA. It is a LAGLIDADG superfamily like domain [1-2]. 23.20 5.00 23.20 5.00 23.10 4.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -10.04 0.72 -4.12 23 1981 2012-10-03 01:41:40 2011-11-28 15:47:47 1 15 1950 3 344 1568 111 93.00 42 29.70 NEW tpualRGsFLuuGSlscP.pss.YHLElssssp-hspplpcllp..ca.slsuKlhcR+spallYLKcuEpIschLpllGAppuhhcaEslRlh+-hR .................t.+uYLRGAFLA.sGSlssP....-po..YpLEIho...h.p..-aA.psLspl.hp.......pa.tL.......s.....A.......K.s.......l.......E.......R.....+.......p.......u..........hls..Y...L....+..-...u........E.c.Is-FLslIGAhpu.hl.c.F.EclRIhR-hR............................................................ 0 139 249 304 +14379 PF14528 LAGLIDADG_3 LAGLIDADG-like domain Bateman A agb Jackhmmer:P21505 Domain This domain is part of the LAGLIDADG superfamily [1]. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.61 0.72 -3.74 233 970 2012-10-03 01:41:40 2011-11-28 17:32:55 1 236 486 19 348 1219 203 81.20 21 10.86 NEW t.sFLpGl....asuDG..sl.....p..t..pp................tlplss...s.sp....p........llc........plpp.l.L.h.p.hGIhu..plh.......t............c.......................p...................p....................psh...a..pLhI....su..cs..ht.pFhcpIG ....................h.taLpGhasuDG...sl.....p..t..ppt..................plphs.s......s.sp.....p........lhc........slpp.l.L.h.p.h.Gltu...plh...................p...........................p.....................p.............................................................psh...a.plhl....su..cs..h..tahp.l...................................................................................... 2 97 192 286 +14380 PF14529 Exo_endo_phos_2 Endonuclease-reverse transcriptase Coggill P pcc CATH:1wduB00 Domain This domain represents the endonuclease region of retrotransposons from a range of bacteria, archaea and eukaryotes.\ \ These are enzymes largely from class EC:2.7.7.49. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -11.03 0.71 -4.49 204 2004 2012-10-02 01:25:08 2011-11-29 14:09:45 1 110 233 3 1603 5329 1737 121.10 19 18.41 NEW lhlhusYhsPst.............hpphhpplpphhpphs.......hllsG.DFNu..........tp.hWuu................ss..........................pcGptL....hphhpptslthh............sp.s..pt.sTahstps..............t....Shl.....Dlshs...ssshhtt..................h...hh.....SDHphlh ...................................................hlhslYt..Pstt................................pth.h.pt.L.pt.h.hpph.........thllsG.DF.Ns...........................................................h.p.....s.....t.....hss...................tp.........................tp...............................................ppu..p...tl..........hp.h..h.p...p...t...s.L.t.h............................................................pt..t.....pt..hTahsspt...................tstl.....Dhhhs.sp.shhtt........................thh.t......hh.....................SDHp.l............................................................................. 0 649 1047 1503 +14381 PF14530 DUF4439 Domain of unknown function (DUF4439) Eberhardt R re3 Jackhmmer:Q7TXQ6 Domain This domain has a ferritin-like fold. 25.00 25.00 25.30 25.40 24.80 24.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.59 0.71 -3.67 37 325 2012-10-01 21:25:29 2011-11-29 14:31:28 1 1 321 2 85 267 7 124.60 31 52.61 NEW ALpsALAAEHAAlYuYGlluu+l.ssstpstApsuhstHRARRDslhthlpstGssPssstAuYtLPh......sVsssusAspLAuplEpcsAssatsll.tsssushRshAspALp-uAlRus+Wpusss......AFPGhsp ..............h.tsls.hEauslauh..ul..s..t....u....h.......ss......t.hp............sttshttH+spR-pltptlpst....usTsss..s..s..s..G...Y..tl.sh............s..ssssAuutpLhstl.Es.csssuWtsVs..tAssu..ssRshAlsuhspsAhhhs+httshs.........saPG............................. 0 28 64 81 +14382 PF14531 Kinase-like Kinase-like Eberhardt R re3 Jackhmmer:B6KSS4 Family This family includes the pseudokinases ROP2 and ROP8 from Toxoplasma gondii (Swiss:Q06AK3 and Swiss:O15693). These proteins have a typical bilobed protein kinase fold, but lack catalytic actvity [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.70 0.70 -5.45 8 140 2012-10-02 22:05:25 2011-11-30 11:55:51 1 3 7 6 79 15114 247 263.20 26 50.30 NEW shpVsSpLGptsRsLl+sshlshschulhapssDpETsEshsl+Vhhhss.-sopp-l-ph+cpsLAlsL.hhul+NPppApsahRhlhPaDLVplssKshhhptpscppshhVhNhFhLhPsspssL.........phlschltppssp..ctsLsptARLhLTlQhIRLsAsLQscGlVHuchpssshhLcpcGGlaLssF..ssLsRsGs+ssh.upss....puauPPEhpup+tt..at..sssphTauhDAWsLGlsIahIWCtcLPhshsssthu.-a.............hFspCp.shP-sV+hLltphLphspcsR ...........................................................................................................................................................................s...........h..l.hhht.lt..t..hhh.spc.tp.p...htlh......h.....h............t....p.........s....st....tt...hpp.........hp...c.t...htht....h.........p...s....s.........A....p...h.....hR...hlh.....P.....D....h.l....tl....t........ps.........p..ht......t..ps.....h...h..l.......s..h.h..h..L..h..P..t.....h.....p..s...s...h.............................................tt.h..h..p....l.ht...h...ss.........p.p.t..h...s.h..h...s..+...h...h...L...T....h.....Q....h.l...+.L.........l...A...p...L......p....s........p....G....l....V.H.u..........c..lpP..p......s.h..h......l........t......p.......c........G...........t..........l...h......L...u....D......F............u.p.......l...h....+....s....G.......s....p..h..s........upss................................s....a......s.....P.......P..E.......h...h..s...p.......................................................p........p....s.........t......h......T......a..........u.....h......D.....A....WtL.Gl...s..l...a.h.l......W....C....t.c....h....P.....h......s......h....s......s......s......t......h......s.........p.h......................................F..s.....t....C.....t.....s........h.....P........-..........V......c.....t......Ll....pphL..phs.ppR.................................................................................................... 0 57 58 79 +14383 PF14532 Sigma54_activ_2 Sigma-54 interaction domain Eberhardt R re3 Jackhmmer:D1EI59 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.45 0.71 -4.12 21 695 2012-10-05 12:31:09 2011-11-30 16:25:51 1 32 602 10 154 18102 4682 141.20 25 30.88 NEW LGsSsslp-lpcpLEtsu..ppssPlLLsGEsGSshEhlA+al+psssP....Wlp.schppls...c...hP........h-...lLpp....A....sGGhLals-lsphuKshQpulhhlLs+.........u-.+hsl...Rllssuupshsp.htsssh-scLhphLSshslplPsL ....................................................................................lGpSthh.p.p.hppp.l...p...p.h.A........p...s......s......h........s......V...hl.h.GEsGo.G+pp.l....A....+.....h.....l...H.....p...h........u.s.p................tpt...s...hh...h..h..phs.......s........s...........................................tp.........h.lpp.........................u...........pG.GT..Lh.lsc..l..ctL..s..c.p...t...Qp..pL.s.phLpp.................................tc...cp...sh...........R.l..l...us.s..s...t.sht...p.......h..t.t.sph.t..t-La.h.hhs..s.pl.hs.L.................................................................................................... 0 50 105 137 +14384 PF14533 USP7_C2 Ubiquitin-specific protease C-terminal Coggill P pcc Pfam-B_1954 (release 25.0) Family This C-terminal domain on many long ubiquitin-specific proteases has no known function. 29.40 29.40 29.40 29.40 29.30 29.30 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.44 0.70 -4.90 69 415 2011-12-01 14:48:45 2011-12-01 14:48:45 1 20 259 1 265 395 4 201.30 27 19.14 NEW ptlaYElLs.hslsEL.Es++s...lKlhWlssshpc-.......p.hplhlsKsuTVpDllsclpc+hph.....s-ststclR..lh...Eh......tt.Khhchhs.p-psltsl...schh.t.............................hhhE...clPp-Ehpht.p..tt.....plltVhHFp+-ss......p.aGlPFhFhlppGEsFs-sKcRlpc+h..slss.KpFp.KhKFAl.lphsph..........pYlpD..cc..ll.shhhptc..........p..LG.LDH.scss+p ............................................hlaYphLs.hsls-h.-sp+s.............hKlhah........p.sshpc.............phplhlsKpusltDllpclppp.spl...........scpt...stclR..lh............-l.....hsp..Klhplht..tcph..lpsl....sct..p..............................................hhhE....clPp-c.hshs....t-..............................hllsVh...H..Fp+-st..........psaGhPFhhhl+p..uE..phtcs+cRlpp+h....tltp.cpFp.K..hKFAl.l..hsp...................pYlp-..st...l..h..p.p................p.hLG.L-H.sps............................................................ 0 84 139 212 +14385 PF14534 DUF4440 Domain of unknown function (DUF4440) Eberhardt R re3 Jackhmmer:Q11v67 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.37 0.72 -3.83 216 1968 2012-10-03 02:27:24 2011-12-01 15:55:36 1 41 1054 19 759 4175 1032 109.10 15 69.00 NEW lhphcp.p...hh.p.u.h.....s.p..sD.hs...th.tpl...h..s.......s.-.h..hhh.....sss........Gth..h..s.........+pp..hlpth.........tp.......sh...h.t..h..t.ph.....ph.ps.......hp.l..p......hh...ss...sA.hlt....sp.hp.h.......pt...pt..s.....s...t.h.t.......tphth.splW.p+.p....ss..pWpl ...........................................................................h.tth...p.u.h.....s...p.....s-..hp....sl..tsh.....h..s...............s..-..h.....hh.l.........sss................Gth..h...s...........ppp....hh.pth...............ps.........tt.....h..p...t...t.sh........ph..ps..........hp..l...p........hh.....sc......sA....l..lp....hp..hp..h............ph.......ts...s..................s.....t.h..t............hp.st.h.stlap+.p.....ss....tWth................................................................................ 0 280 490 656 +14386 PF14535 AMP-binding_C_2 AMP-binding enzyme C-terminal domain Eberhardt R re3 Jackhmmer:A6L0Y5 Domain This is a small domain that is found C terminal to Pfam:PF00501. It has a central beta sheet core that is flanked by alpha helices. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.06 0.72 -3.94 244 1758 2012-10-03 01:00:17 2011-12-02 13:31:06 1 7 1084 15 662 1569 228 94.80 32 21.53 NEW GVNlFPoQIEplLhph.stlu.scYplllsR.p..s..s..hDplplpVEhsp..t.h.s.c.ph.............tp......hpp......lpcclpcpl+sh.lGl...s...s...cVclVpPtolsR.....S.E.G...K..A+RVlDpR .......GVNVFPoQIEcllhp..h..t.t.l.s....s..c..Y..plh.l...s+.c......s...p......hD.p..lplp.VEhpp..th..s...p.................tp........hpp......lpcpltccl+sh.lGl.s...s...c.....Vpl.lpss....olsR.....o.p.G.....K..ApRVhDhR............ 0 235 477 589 +14387 PF14536 DUF4441 Domain of unknown function (DUF4441) Coggill P pcc Pfam-B_1275 (release 25.0) Family This family is largely made up of uncharacterised proteins from the Ciliophora. The function is not known. 22.40 22.40 22.50 22.40 22.10 22.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.73 0.71 -4.12 66 94 2011-12-02 16:40:58 2011-12-02 16:40:58 1 2 2 0 94 142 0 118.40 24 42.38 NEW pNh.hKNI.l+uF...tpalh...............pppcp.....................................hlhphhpph.........tpp.p.......hpphp..Kphppahcppshs.N.p.....lppllpsppaspl.FpaaLpptsptWL.ppS+lpstppahhhIphlhpshpst..phlpplpha.K ................................................................................................pNhhKNI.lpuF....hpalh...............pppc.p...............................................hlhphhpph.........t...p.......hpphp..Kphpphhp.ppshs.Ntp......lppLlppppapph.FpaaLptpsptWL.ppSKlpspppahhhIphlhpshpst..phhs.lphhp............... 0 94 94 94 +14388 PF14537 Cytochrom_c3_2 Cytochrome c3 Eberhardt R re3 Jackhmmer:Q8EDL6 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.81 0.72 -11.87 0.72 -3.64 85 1009 2012-10-01 23:37:15 2011-12-07 10:41:39 1 39 603 40 274 1023 144 93.50 36 38.08 NEW hHtptu.hsCt.sCH.......s...sst..ttst....s.hpspp................ClsCHs...sh.p.....ph......sp.ptt......................s.Hs.......sH..........spls.CssCHps.Hp...........ts.........hC.s.s.CHs ..................................th....hslsCs.sCH.................u.....pss....spp...cp.......ulcssh......................................Ch..S..CHh..sp.p....h.......pc..s.............................................hhPH.c.........sH...s.........scls.Cs.uCHu.h..Hs....................................c.tt.....phCs.s.CHs....................................... 0 87 186 233 +14389 PF14538 Raptor_N Raptor N-terminal CASPase like domain Bateman A agb Jackhmmer:F5H7J5 Domain This domain is found at the N-terminus of the Raptor protein. It has been identified to have a CASPase like structure [1]. It conserves the characteristic cys/his dyad of the caspases suggesting it may have a peptidase activity. 27.00 27.00 27.00 34.30 26.80 26.00 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.82 0.71 -4.37 48 343 2012-10-03 02:24:44 2011-12-08 19:24:50 1 25 255 0 246 365 5 148.00 52 11.29 NEW +hKTsslsLllCLNIGlDPPD....lhKssPsA+hE....CW.....lDPps....hs..........t..KulcsI.upsLppQYcph..p.+.....s+a+.tlDPol--l++hCtshR+sA+s...-RlLFHYNGH..GVP+.PTssG.....EIWlFN+saTQYIPlslh-LpsWlssPslaVaDCSsAGhllpsFp ......................................................hKTsosALslCLNlGVDPPD....llKssPsA.+lE....sW.........lDPhs....hss.............p..KAlEpI.GpsLQpQYEph...p.R...............sRYKptLDPoV--lK+hCtoLRRsAKc....ERVLFHYNGH..GVP+.PTssG.....EIWVFNK.......sY..........T.......QYIPlSlYDLQoWluuPoIaVaDCSsAG.IlpsFp....................................................... 0 98 147 214 +14390 PF14539 DUF4442 Domain of unknown function (DUF4442) Eberhardt R re3 Jackhmmer:Q9I2R0 Domain This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 139 and 165 amino acids in length. There is a conserved PYF sequence motif. There is a single completely conserved residue N that may be functionally important. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.48 0.71 -4.17 66 935 2012-10-02 20:54:35 2011-12-09 10:57:25 1 2 624 4 278 1704 374 137.40 21 84.79 NEW GptlFStshsh+APYFuT.lpPplpcLcPs..hsplpl.c++tVpNHIGTlHAIAhCNhAEhAhGhhsEAolPs...stRWIPKGMsVpYlAK.AposlpAsAphs.s..........sa............pp.....s..s.-...lsVsVplh...D.psG.hp.V.spupIsh.WV .....................................................h......hhht.hhP.h.h...hs.sshcl..hplsss.....pscl.pl..hp..h.ts.c.N.a...l...s..o.....h..auGulh.shs-.s....s.hG..h...h....h..h...t....plsp.............chh...h..h...s...K..u..hplc..a...l.+.........u..c...u..s...l.p.Ap.spls.p..........tph............tp.....p...tt...h.h.......l...p....lh....-..pp.G...ph...l..spsphph.h............................................................................. 0 97 182 244 +14391 PF14540 NTF-like Nucleotidyltransferase-like Coggill P pcc CATH:3c18A01 Domain Structural comparisons with PDB:1kny indicate that this N-terminal domain resembles a nucleotidyltransferase fold. 27.00 27.00 38.40 37.70 22.10 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.33 0.71 -10.24 0.71 -4.50 23 78 2011-12-09 13:30:00 2011-12-09 13:30:00 1 1 78 3 27 80 0 118.80 40 40.16 NEW MEslLRPIYQE+ASpssTLGllhlE++..pppsslTDsFDslLLVIscc.sEpshalKHYphssc+suL+lVs-cpLpcWllhGoNR+llDWlhpG+llFDRNEYlppL+pcLppFP.ppRc .............M-slLRPIYQE+AScssTLullhIE+c..ppp..uulTDsFDslLLVIVcp.s-pshalKHYphcpc+AuLahVo-ppLpEWlLlGosR+lIDWllpG+llFDRNEYlppL+pcLppFPhtpRc..... 0 7 18 21 +14392 PF14541 TAXi_C Xylanase inhibitor C-terminal Coggill P pcc CATH:1t6eX02 Domain The N- and C-termini of the members of this family are jointly necessary for creating the catalytic pocket necessary for cleaving xylasnase. Phytopathogens produce xylanase that destroys plant cells, so its destruction through proteolysis is vital for plant-survival. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.92 0.71 -4.69 112 1552 2012-10-02 15:32:34 2011-12-09 14:54:42 1 19 111 13 878 2503 14 147.00 24 35.70 NEW tYalsl.puIpls.....sppl.s.lss.shh.........u...tGGshlsosssaThLtsslYpsltpAFsp.thst........h.p.ssssssFchCassss.h........tt..hshslP.sl.sLhhpu.....us.....phpl.us.shhlp.s..s...s.s.........sh.CLuFlss.................sstssslIGuhQhpsphl.FDltssplGFss ...................................................................Yhlsl.pu..IpVu.................sphl..s....l..ss...shh...............s..........suG..sllDS.GTs.hThLs....sss.Ypslppsh.tp...t..hst......................ss.s..h...s.s..h....c..h.C...ashss.........................ttstlP.s.....l.slpFps...........Gu.......shpl.s.s.p..sh..hh.t..s..s......s.s...........................hh..C.L...u.hsss...................................ttshsllG...sh...pQpshh.lhaD..ltpp.plGFt..................................... 0 106 490 698 +14393 PF14542 Acetyltransf_CG GCN5-related N-acetyl-transferase Coggill P pcc CATH:1xmtA00 Domain This family of GCN5-related N-acetyl-transferases bind both CoA and acetyl-CoA. They are characterised by highly conserved glycine, a cysteine residue in the acetyl-CoA binding site near the acetyl group, their small size compared with other GNATs and a lack of of an obvious substrate-binding site. It is proposed that they transfer an acetyl group from acetyl-CoA to one or more unidentified aliphatic amines via an acetyl (cysteine) enzyme intermediate. The substrate might be another macromolecule. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.46 0.72 -4.09 355 2309 2012-10-02 22:59:21 2011-12-09 16:30:00 1 16 1990 7 555 1626 130 78.60 29 78.76 NEW Ratlth.s............t...uhh...sY..................pt...............sst.........hh.slsHThVssshcGpGlAspLlctulctsRppGh.+llPhCsalt.s.ah..c++P.-.a.p-l ..................................pa.lts-......Gpth..u.l...s.Y.............pp....................sss......hhhlsH.T.hVscshcGpGlupp.Llct.sl-psRc.p.s.....h.Kl...l..PhCsasp.phh..c+ps..-..a.p-l................. 0 176 360 478 +14394 PF14543 TAXi_N Xylanase inhibitor N-terminal Coggill P pcc CATH:1t6eX01 Domain The N- and C-termini of the members of this family are jointly necessary for creating the catalytic pocket necessary for cleaving xylanase.\ Phytopathogens produce xylanase that destroys plant cells, so its destruction through proteolysis is vital for plant-survival. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.35 0.71 -4.29 111 1620 2012-10-02 15:32:34 2011-12-09 16:34:32 1 20 98 13 922 3319 9 167.20 27 38.32 NEW YshplphGs............P...........h.hslDhu.ushhW.hpC....................................t...Ssoa....psl.C...........sSs.Ct.ht......................sssts..ssCthhs..s.....stsssG..pl.spDsl.hssst.t..................sshsshhFuC......ussth..............uh.t...........................sssGlhGLu.pst...hSLsuQlutt.....hsp+FuhCLss..............ssssGhlhFGs ..........................................YhhplslGT..P.sp.........hhlhlD.TG.S-lsW...l...p..Cpss.................................................................tshtt..shas.PspS.ooh..........ptlsC.................ss..s..h..Cptht.....................................tsss.s.s.s....s.C...s.....Y.p...h..p......Yus.......sosotG.......hl..spDslslsss...........................ssh.s.s.hsFG.C........upssp.............Ghht....................................sssG.l...lGLG....pus...................hS.......l.......s......o......Q......l...tt................hsstFSaC.Lss.................sssuhlhhG............................................................................................. 0 113 501 731 +14395 PF14544 DUF4443 Domain of unknown function (DUF4443) Coggill P pcc CATH:2p8tA02 Domain This is a family of archaeal proteins. The domain is a putative gyrase domain. 18.50 18.50 18.60 19.00 18.40 18.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.59 0.72 -4.16 8 29 2011-12-09 16:41:01 2011-12-09 16:41:01 1 1 29 1 20 33 3 105.00 30 52.94 NEW FS.EsttV.u.VEGaPAYAIVVKNPPpFKSIELRDEAIRFFAKGAMILlVKNGElVFPEDtRPL+EThPELAE+L...l+h..--...GDhlVVTWAENPuDAhKSAhHVALsLKp-EI .........................................hulhhtt.sp.h.c.u..l...-LRDEAIRasAcGAhIlhhKsGcllFP..E..DtcsL....c-hhs..cls..ccl...tph....c-...GDhlllThu-s.spAhpuhh............................... 0 4 5 13 +14396 PF14545 DBB BCAP_N; Dof, BCAP, and BANK (DBB) motif, Coggill P pcc PROSITE, Pfam-B_2980 (release 26.0) Domain The DBB domain is named from the Drosophila (Downstream of FGFR - Dof, also known as Heartbroken or Stumps) protein, the BANKS and BCAP, both signalling in B-cell pathway, proteins. This domain defines a minimal region required for mediating Dof dimerisation. Since this domain can interact both with itself and with a region in the C-terminal part of the molecule, it may mediate either intermolecular or intramolecular interactions [1]. Mutants lacking this domain disrupt FGFR signal transduction and fibroblast growth-factor signalling [2]. 22.20 22.20 22.90 22.20 21.80 21.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.82 0.71 -4.67 13 113 2011-12-12 13:27:38 2011-12-12 13:27:38 1 1 63 0 61 137 0 136.40 37 17.33 NEW FsVtP+Kl+sGps.+VhllLsp...sLpccsslpVphcpssps.....lssscpcNPYTlphssP-thhplSthVslplcpsshsLGs+slKCcScLcElpplLps...sssPlEFMCQulsIsPsspEpLDplLhpoFp+.NlPss.apLhu .........sV.Pc+lpCGpptplalIl+s.....cLpcpso.sElEFpspsp.hh...phpsphcNcYTlshpAP-h...suGsVslplYssslslupssIpYYoshcElpplLpp...sssPlEFhCQAhtlsshspEsLDplLTpshKc.NlPssthpLht...... 0 12 16 33 +14398 PF14547 Hydrophob_seed Hydrophobic seed protein Eberhardt R re3 Jackhmmer:Q9S7Z9 Domain This domain has a four-helix bundle structure. It contains four disulfide bonds, of which three function to keep the C- and N-terminal parts of the molecule in place [1]. 23.40 23.40 23.40 23.50 23.20 23.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.24 0.72 -10.80 0.72 -4.08 24 532 2012-10-01 19:46:35 2011-12-12 14:20:06 1 8 71 0 253 530 0 81.10 49 49.60 NEW sCPhssl.cLssCssVL.slhplhlGs..tsptCCsLltGLssl-AAsCLCssl+hplLs.lsl.lsl..slplllshCG.+s..PsGFpCs ................................pCPh..Ds.L..KLusCuslL..G.............L.l.p.lt.lG..s..........s...s......spsCCsLlpG.LsDL..-.AAl.CLCT.AlKAsl.L.G.......Isls.l.Pl..sLs.LLLNhCG.Kps....PsGFpCs.................... 0 18 110 191 +14400 PF14549 P22_Cro DNA-binding transcriptional regulator Cro Coggill P pcc CATH:1rzsA00 Domain Bacteriophage P22 Cro protein represses genes normally expressed in early phage development and is necessary for the late stage of lytic growth. It does this by binding to the OL and OR operator-regions normally used by the repressor protein for lysogenic maintenance. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.62 0.72 -4.25 33 735 2012-10-04 14:01:12 2011-12-12 15:55:30 1 3 468 8 62 374 11 58.40 33 75.46 NEW pKsDllpaFGuts+lApALGlopsAVSpW....G-hlPEhRAaplEclTsGpLKss...sslhpcss .............h...cslsaa.G.o.ps.KlApAhG..ls..suV.spW.....sch..lPc.......t.......R.......Ah.plppsou.Gtlphp...........p........................... 0 11 27 42 +14401 PF14550 Peptidase_U35_2 Putative phage protease XkdF Coggill P pcc Pfam-B_5816 (release 26.0) Family This domain is largely found on phage proteins. In a number of cases the domain is associated with a SAM-dependent methyltransferase. 24.00 24.00 24.60 29.60 23.90 21.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.75 0.71 -4.50 35 118 2012-10-01 19:43:34 2011-12-20 15:14:30 1 3 106 0 21 108 166 123.60 35 38.81 NEW hppplclh+........psp....--cplVaGlVhpPs......hhDucGDh....hss-EIEKAAapFhcp.....hpplDtpHch...psusuplVESaIsss.DhplsG...p..slp+GoWlhss+s...sD..p-lW-p....l+cG..choGaSluG...sAcph-h ...............tppVclh.ppsp....--p+lVaGlVhEPc......s.DuHGDa....hoA--IEKAAasFhpp.....hpplDhpHsh...psusupVVESalsPs..Dhplss.....p.plpKGoWlhss+ssD......-lW-p....lKcG..clTGaShuG...sAch..h........... 0 10 15 18 +14402 PF14551 MCM_N MCM N-terminal domain Bateman A agb Jackhmmer:O27798 Family This family contains the N-terminal region of MCM proteins. This region is composed of three structural domains. Firstly a four helical bundle, secondly a zinc binding motif and thirdly an OB-like fold [1]. 24.90 24.90 25.00 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.38 0.71 -3.62 340 2275 2011-12-20 15:49:09 2011-12-20 15:49:09 1 42 484 10 1550 2224 62 132.50 20 16.40 NEW hpcpF.ppFlpp...............ht..................................................................hYhpp................lpphhpp....p...p.........................................psL.lchp.cLtp..a..s..................................p....................pL.sp.t.lhppPtchl.s..hhcp.ulpchh.t.............................................................................h.spht...pc......tp......h.plphhsh............sp.............thslRsL.c..us..clspLlslpG .................................................................................h..ptFppFLpp........apt................................................................phhY.hpp..........................lp.phhph......pp...........................................psL.Vsh.p.c.L.tp..a..s.......................................p.......................pL.sp.t.lhppPtchl.s..hhpp....Al...pchhhp.............................................................................hsstht..tp........pp......h.plp..hhsh.................sp...............hhslRsL..p..ss..plspLlslpG........................................................................................................................ 0 540 876 1300 +14403 PF14552 Tautomerase_2 Tautomerase enzyme Coggill P pcc CATH:3c6vA00, Pfam-B_819 (release 26.0) Domain \N 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.50 0.72 -3.96 157 646 2012-10-01 20:38:22 2011-12-20 16:17:50 1 4 541 17 193 1070 117 79.40 31 61.58 NEW D+Fpllppa..csschhhs..sp.h..ls....sRocshllIpIssttsRohEpKptLYctlscpLpppsGlpspDlhlslsEsst.-sWSFG ...............................Dpaphhppa..p..s..pphhasst.h..Ls...hpRo.-s..h.lhlpIssttsRohcpKcpLYptlsppL.pppsGlpspDlhIolhEss.t.-sWSFG....... 0 42 99 150 +14404 PF14553 YqbF YqbF, hypothetical protein domain Coggill P pcc CATH:2hjqA01 Domain This N-terminal domain is found in Bacillus and related spp. The function is not known. 27.00 27.00 33.20 33.00 26.40 17.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.93 0.72 -4.29 9 66 2011-12-20 16:42:37 2011-12-20 16:42:37 1 1 59 1 3 36 0 40.30 59 71.20 NEW ltGpTYsshGp..hFhhs.EppVscchapYLpsNcaFplpc-.sp ..IpGpoahAas+..+FLhupEEcVSEKlYNYLRRNEFFEVRKEE..a... 0 1 2 2 +14405 PF14554 VEGF_C VEGF heparin-binding domain Bateman A agb Jackhmmer:B4YYD6 Domain This short domain is found at the C-terminus of VEGF. It has been shown to have heparin binding activity. 27.00 27.00 51.00 51.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.92 0.72 -4.16 5 153 2011-12-20 16:57:28 2011-12-20 16:57:28 1 2 59 5 32 137 0 54.20 78 25.52 NEW cpcENpCEPC....SERRKRLFVQDPtTCKCSCKaTDucCKSRQLELNERTCRCDKPRR ................t...ps.CtPC....SER..RKHLFVQDPQTCKCSCKsTDSRCKuRQLELNERTCRCDKPRR...... 0 2 5 13 +14406 PF14555 UBA_4 UBA-like domain Bateman A agb Jackhmmer:A2AT02.1 Domain \N 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -7.92 0.72 -4.42 201 1716 2012-10-01 23:03:33 2011-12-20 17:19:24 1 62 362 9 1068 1687 15 41.70 26 10.41 NEW cphlspFh.slT.G..s....sp.....ppAppaLctssWsLctAlstaappspss ...................thlppFh.slT.G...s.....sp......stAtphLptssW..sL....p....tAlstaapt.............. 0 328 522 821 +14407 PF14556 AF2331-like AF2331-like Coggill P pcc CATH:2fdoA00 Domain AF2331-like is a 11-kDa orphan protein of unknown function from Archaeoglobus fulgidus. The structure consists of an alpha + beta fold formed by an unusual homodimer, where the two core beta-sheets are interdigitated, containing strands alternating from both subunits. AF2331 contains multiple negatively charged surface clusters and is located on the same operon as the basic protein AF2330. It is suggested that AF2331 and AF2330 may form a charge-stabilized complex in vivo, though the role of the negatively charged surface clusters is not clear. 150.00 150.00 155.20 155.10 29.70 17.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.18 0.72 -3.64 3 3 2011-12-20 17:20:05 2011-12-20 17:20:05 1 1 3 2 3 3 0 93.30 46 100.00 NEW MPTYVFsKESFLKFLEKNLtEDsVVVVSSDVTDlDccpuESa.LGcK-aahVcFAlsADVFKEsDlDEFDEhhKYsVVFVESDEL.SEAG+KA.hR MPTYVFsKESFLKFLEKNLtEDsVVVVSSDVTDlDccpuESa.LGcK-aahVcFAlsADVFKEsDlDEFDEhhKYsVVFVESDEL.SEAG+KAhR... 0 1 3 3 +14408 PF14557 AphA_like Putative AphA-like transcriptional regulator Coggill P pcc CATH:2rkhA02 Domain Members of this family are putative transcriptional regulators that appear to be related to the Pfam:PF03551 family. This family includes AphA-like members. 27.00 27.00 27.10 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.72 5 13 2012-10-04 14:01:12 2011-12-20 17:41:38 1 1 12 0 5 54 17 175.10 40 93.05 NEW asDNoLTPKEAVRLsALG.hIARuPhRYuDLAuAVRHFloRIsGPSLDLhGoSlELLRaEGLlEPlsGcGMEDNApLuIT-uGRpELpcLlTAsLRAu.SD.LuKLVIuLKLRFLDLLsscsRpcQIDsLLchsEoELARLsDL.RAAhuu-Gs..Lht-WLDp-IsQlEpRLuWLcuLt .................................................atDNoLsP+EA.VRLssLG.hlAcuPt.R.Yu-LAu.u.V..RH.FhoRIhGPS.LD.LhG...oSlEhLR.aEGLl..E...shs...G..p..G.M.E...Ds.Ah..L.uIT-uGRpEhpsLh.sAslRss.oD.Lu+LVluLKlR....FLDLLss-pppsQl-tL..l-hsETELARLhDL.RA..A.s...s.s..s.G.s........hht-WLDp-I..sQsEpRLuWLcsL.h.................................................... 0 3 4 4 +14409 PF14558 TRP_N ML-like domain Bateman A agb PF06011 Domain This domain is distantly similar to Pfam:PF02221 and conserves its pattern of conserved cysteines. This suggests that this domain may be involved in lipid binding. 27.00 27.00 27.10 27.00 26.90 25.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.92 0.71 -4.16 62 432 2012-10-01 19:31:57 2011-12-21 11:00:42 1 8 131 0 325 430 0 145.30 28 17.73 NEW chlposulssCh-s.Sphsss...............hFclpasssspolsaslsus......................................oslssp.VssplplhAYGhplh.scshc.Cs..hs..ht........tlCPlssGphsspus.hh..sp..phsspIP...uI.AYslPDLDApl.+lhlhsss........sspplAClpusloNGKTsp .................................t.ltosuhssC.....hss.Sthsss...............hFslsass..s..s..p.o..lphslsus.....................................................................................os.hssp..lshplplhAYGhphh.sp.s.hc..Cs.....hs.....hp............slCPls.sG.p......hshpss.hl.....sp.....sh.sspIP...................uI.AYslPDl-Aps+lhl.tssp.............sspplAClpuploNG+Ts..................................... 0 61 158 265 +14410 PF14559 TPR_19 Tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q87RI8 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.06 0.72 -3.63 300 8943 2012-10-11 20:01:04 2011-12-21 11:26:45 1 1648 2991 20 2662 21075 6378 65.60 19 13.54 NEW hltpu.....chspA....hplhpp...s.h....pt.....psp....sspsth..t.....L..Acshlpt.sphcp...ApplLsplstppps....sphp..sl...hApl .................................................tps....chspA.........hphhcp.......s.h........pt...................pPp..........ssphtl.......t....................L.....Apsh.hpt.G...c.......hpc...........Ap.p...hL...pp..hhtpp.s............................h................................... 0 874 1638 2192 +14411 PF14560 Ubiquitin_2 Ubiquitin-like domain Bateman A agb Jackhmmer:1t0y Domain This entry contains ubiquitin-like domains [1-2]. 23.10 10.00 23.10 10.00 23.00 9.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.81 0.72 -3.73 85 602 2012-10-03 10:59:06 2011-12-21 11:57:51 1 38 292 5 409 5698 192 79.70 20 19.48 NEW sVplplTpstsp..htsE...tRashshTlpplKpKLphhsGsssssMpLpL..h.spssphlssl..........s-..DsthLGtYs.lcDGhclHVlDpsPss ..................................................c...h+.ls.shTl.....sp........lKp+Lphl.sGl..ss.sshcL..h....h....t..p..s...s..t...h...tph.......................cp....sp.phLst..as.lps....s.....plhl.......t............................................... 0 159 216 322 +14412 PF14561 TPR_20 Tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q87RI8 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.76 0.72 -3.87 216 1831 2012-10-11 20:01:04 2011-12-22 16:10:40 1 21 1796 5 436 1379 539 89.00 34 30.03 NEW pussss-lppLctpls...ssPsDh..pAphpLAhthhtsGch--AlcpLlpll++Dpsh........s-ssARppLlclFpslGss...DPhssphRR+LsolLa ..............................tssss-ltpLppplA....t.sPp.Dh..phthpLAh.th.htsG+s.E-Al-hLhshl++.D.h..st.................................s-...sp.sR+phh-lhsslGss.............Ds.lssphRRpLhulLa.............................................. 0 116 257 352 +14413 PF14562 Endonuc_BglI Restriction endonuclease BglI Eberhardt R re3 Jackhmmer:O68557 Domain This restriction endonuclease binds DNA as a dimer. BglI recognises and cleaves the interrupted DNA sequence GCCNNNNNGGC and cleaves between the fourth and fifth unspecified base pair to produce 3' overhanging ends [1]. 25.00 25.00 25.10 25.30 24.80 20.90 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.80 0.70 -5.56 3 4 2012-10-11 20:44:47 2011-12-23 09:30:11 1 1 4 1 2 6 2 287.50 47 97.46 NEW RpclapSYppsRsYLlsNh-pLIclEpYsLsllsNlI+-NtEEI+ADYNE.AsaLaPFWpNYPPE-RGRtP+GDQIPWLEVGEKsVGSKLsRLVsp+hE.sVR-lGLPTGuDlRallos.pI.plTNshTDSsalFlDIKSVGPRDsDs-lVlSPNQVSGsG-.W-sFpsGIpNNphTI..pGsRu.NasFhPoLPPLYILSDGpIVPVVplaIKPVYuMlSLp.psDGGQPLp+IclASVPNGLLLFsNPsYuaspAacsLFpPGKDEhTKDsppRRlRVcLclLuRIusWRshcIDp ........................Rpcla.SYpps+pYL.sN.-...clEhYsLsllsplIp-NtEEI+ADYNE.AsaLaPFWhNYPP.-RG+hP+GDQIPWlEVGEKsVGSKLsRLVsp+.-.sVR-lGLPTGsD.Rallos.pI.plTNshTDShhhFlDIKSVGPRDsD.-lVlSPNQVSGsG-.WsthpsGIpNNp.TI..pGsRu..sp.FhPolPPLYILSDGpIsPVVplaIKPlYuMhSLp.psDsGQsLh+IclASVPNGLhLFsNPsYuaspAachLFpPGKD-hTKs.hp+RlRVcLclLs+Ius.RshpIDh.. 0 0 0 1 +14414 PF14563 DUF4444 Domain of unknown function (DUF4444) Eberhardt R re3 Jackhmmer:Q1GJN9 Family This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved LIPL sequence motif. There are two completely conserved G residues that may be functionally important. 25.00 25.00 42.90 42.20 24.00 23.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -8.06 0.72 -4.57 20 30 2011-12-23 10:42:36 2011-12-23 10:42:36 1 1 30 1 4 32 8 42.20 56 19.81 NEW lGE-lThsG.....tTGTFlGVDEcFGMLLRsss.sTHLIPLTolLEs .......lGEplThsG.....tTGTFLGVDEcFGMLLR-ss.sTHLIPLToLLEp. 0 0 3 3 +14415 PF14564 Membrane_bind Membrane binding Eberhardt R re3 pdb_1yhp Domain This family includes the C-terminal domain of Dictyostelium discoideum Calcium-dependent cell adhesion molecule 1 (Swiss:P54657), which has an immunoglobulin-like fold. It tethers the protein to the cell membrane [1]. 25.00 25.00 28.70 75.60 22.40 21.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.42 0.72 -3.88 26 35 2012-01-03 14:41:10 2012-01-03 14:41:10 1 3 21 2 25 36 0 112.10 26 45.68 NEW slsl+lh....sssssstpYphslpsapls...ssshposs-...Yshlslhshs..sspllsplsl+sppt.....GthlssGSlYF+Yssssuplshsc.s...psaPp..sLclppsspspFshsLhs ..........lsl+hh...sssssssppYphslpshpls...ssslhSsss....Yshlslh.hs..ssplVsplslR-pph.....GthlssGSlYF+Ysssssplshsc.s...-saPp..slclpcsspssFslsLh.... 0 11 19 23 +14416 PF14565 IL22 Interleukin 22 IL-10-related T-cell-derived-inducible factor Coggill P pcc Jackhmmer:Q9GZX6 Domain Interleukin-22 is distantly related to interleukin (IL)-10, and is produced by activated T cells. IL-22 is a ligand for CRF2-4, a member of the class II cytokine receptor family. 30.00 30.00 30.00 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.10 5 53 2012-10-02 01:28:15 2012-01-04 14:53:14 1 1 36 14 29 136 0 136.60 54 77.25 NEW CRLDKSNFQQPYITNRTFMLAKEASLADNNTDVRLIGEKLFHGVSMSERCYLMKQVLNFTLEEVLFPQSDRFQPYMQEVVPFLARLSNRLSTCHIEGDDLHIQRNVQKLKDTVKKLGESGEIKAIGELDLLFMSLRNAC ...................................C+LcpSsFQpPYIsNRTFhLAcEASLADNsTDVRLIGcc.LF+G..V...s.h.s-RCYLMKQVLNFTLEEVL.......h...P.......p..S..D+...F.....p..PY....Mp....-V...V....s....F....L....u....+.....L.Ss+LSp..CHIp.G...D-...pH......I...Q+...N...VppLK-T.VKKL.G.E.s.G-.IKAIGELDLLFhsL+sAC... 0 1 2 6 +14417 PF14566 PTPlike_phytase Inositol hexakisphosphate Coggill P pcc Pfam-B_194 (release 26.0) Domain Inositol hexakisphosphate, often called phytate, is found in abundance in seeds and acting as an inorganic phosphate reservoir. Phytases are phosphatases that hydrolyze phytate to less-phosphorylated myo-inositol derivatives and inorganic phosphate. The active-site sequence (HCXXGXGR) of the phytase identified from the gut micro-organism Selenomonas ruminantium forms a loop (P loop) at the base of a substrate binding pocket that is characteristic of protein tyrosine phosphatases (PTPs). The depth of this pocket is an important determinant of the substrate specificity of PTPs. In humans this enzyme is thought to aid bone mineralization and salvage the inositol moiety prior to apoptosis [3]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.84 0.71 -4.16 103 540 2012-10-02 20:12:17 2012-01-04 15:56:02 1 4 197 33 220 698 17 143.50 25 39.58 NEW lsLREEshs...............asNst....hshct...hps.tpp..l.t..hpuhsstplpphEpth+pclhtps........pphssphhh.................................t...................................................................hc.p.thspshslpYhRlPlTDcpsPpspslDthlphlcsh.sp..................sshlhFpCptGpGRTTohMlhhsllp ........................................................................................................lsLREEshs.................ahs.Gts....hoh+phps.hps...h.........h.shstppl-...phEt........tl.+pclhtps........pphtsthhhhpp.pph..........................ph.tl..............................................................hhc.p...hs..p..th..sh.p.Y..hRlPloD.p.p....t.P.p.p.pshDthlphlcph..sp.....................................................sshhhFpCpsGpGRTohhhshhshh........................... 0 90 144 182 +14418 PF14567 SUKH_5 SMI1-KNR4 cell-wall Coggill P pcc Pfam-B_7167 (release 26.0) Domain Members of this family are related to the SMI1/KNR4-like or SUKH superfamily of proteins. 27.00 27.00 27.00 27.10 26.80 26.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.37 0.71 -4.70 27 120 2012-10-01 20:46:44 2012-01-04 16:35:07 1 2 109 1 35 225 9 129.90 51 92.78 NEW Mc-lI-pLpphspssslsl-LPsp-pls-lEcplhlslPt-aKcaLhpsSDllhGslEPlolsDs.oHoY..LsEsss.Ahs.hGlP+-hlPI.CpssssaYCls.....p-Gp........VhhWs..cG...s-EpWtshapWsccVWl ......................Mc-lIEpL+E.hsE.sVPV..PL.EL.P.-.-.-pLVElEEpLhIslPhpaKEFLLps..SD.V.........lY.....Gs.l.............EPVT..lo......D..Pp.SHTY...........LPEV...su..pA.W-...lG..l.PR.-lIPl.Cp.-..G..csYYCl-........pDGp.............VhlWs...-s-l..o--sW-SlWpWscDVWL...................................................................... 0 3 8 24 +14419 PF14568 SUKH_6 SMI1-KNR4 cell-wall Coggill P pcc Pfam-B_725 (releawse 26.0) Domain Members of this family are related to the SMI1/KNR4-like or SUKH superfamily of proteins. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.35 0.71 -10.69 0.71 -3.63 86 516 2012-10-01 20:46:44 2012-01-04 16:50:21 1 4 281 2 92 1001 10 112.20 18 72.41 NEW ccpIpcsEppLul.....phPpsYKpFLcpYGu.ut.......hsGh-lhu.....h.................tsshhhtshh.t.ptthpp.htl.p.h.........llhpsssGphashc.........ps.scs.lh...........h.stptp.hhussFtEaL ......................................pIpphEppLsh.....phPps.Y+pFLcp.hsu.st..................h.s.u..p.lhs.....h......................................t.sh.hhpp.h...............htp.....ph..pph...............llh..p..s..s..s..s...s..h.hshc.........ptsps..lh.............................t.....p.....h.....h....hsssht-al....................................................................................................... 0 24 55 61 +14420 PF14569 zf-UDP Zinc-binding RING-finger Coggill P pcc Jackhmmer:A9T9M4 Domain This RING/U-box type zinc-binding domain is frequently found in the catalytic subunit (irx3) of cellulose synthase. The enzymic class is EC:2.4.1.12, whereby the synthase removes the glucose from UDP-glucose and adds it to the growing cellulose, thereby releasing UDP. The domain-structure is treble-clef like (PDB:1weo). 27.00 27.00 42.80 42.10 24.80 24.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -9.87 0.72 -4.23 6 380 2012-10-03 15:03:13 2012-01-05 15:54:05 1 6 85 1 128 405 0 75.90 53 8.56 NEW PKPLpNlNuQlCQICGDDVGVTl-GElFVACsECuFPVCRPCYEYERKDGsQuCPQCKTRYRRHKGSPRVcGD--EDDsD ...............suplCpICGDpVGls.ss.G-.sFVACsECuFPVCRPCYEYER+EGsQsCPQC+..TRYK.RhK.........Gs...Ptl.GD-.-.-.-.t.................... 0 18 88 109 +14421 PF14570 zf-RING_4 RING/Ubox like zinc-binding domain Coggill P pcc Jackhmmer:A5BM39 Domain \N 27.00 27.00 27.00 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.35 0.72 -4.38 58 462 2012-10-03 15:03:13 2012-01-06 18:22:44 1 17 256 2 313 530 12 47.80 51 5.58 NEW CPlCspch..DhsDpshhPC.pCuapIChaCapclhp.....s..t.....sGpCPuCRcsYc .............CPLChE.h...DlsDt.sFhPC.sCGYQ...ICpFCapcIcp......s..t.........sGhCPuCR+sYp........... 0 101 187 261 +14422 PF14571 Di19_C Stress-induced protein Di19, C-terminal Coggill P pcc Di19_old Family C-terminal domain of Di19, a protein that increases the sensitivity of plants to environmental stress, such as salinity, drought, osmotic stress and cold. the protein is also induced by an increased supply of stress-related hormones such as abscisic acid ABA and ethylene [1]. There is a zinc-finger at the N-terminus, zf-Di19, Pfam:PF05605. 26.00 26.00 26.40 31.30 25.40 25.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.53 0.72 -3.72 49 170 2012-01-09 11:51:23 2012-01-09 11:51:23 1 4 26 0 88 170 0 98.50 30 47.24 NEW slSlLp+...-L..R.-upLQuLL.GGu......sss.s..s...uo.ssusDPLLSSFlhsh.ss.scs.p...c..ssc..s...sssst....pc.s.Stpcphs..pp..sh..p.t.s..pss.LStc-pE....EpspRucFVQsLLlSTl .......................hShLt+-Lc..-upLQsLLGGu.......tsts...s...ss...s.sssDPLLSSFlhshss.scs.p...pssp..s...sssst....ss.sshtpphs..pp.sh.p.....pss..Lopc-p-....c+spRupFVQtLlhSTl.......................... 0 14 55 73 +14423 PF14572 Pribosyl_synth Phosphoribosyl synthetase-associated domain Coggill P pcc pdb_2c4k; Jackhmmer:Q14558 Domain This family includes several examples of enzymes from class EC:2.7.6.1, phosphoribosyl-pyrophosphate transferase. 27.00 27.00 27.20 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.17 0.71 -4.54 11 6166 2012-10-10 14:25:38 2012-01-09 15:06:51 1 16 4467 28 1767 4367 2278 127.90 40 39.16 NEW DYRNAVIVAKsPuuA++ATSYAERLRLGlAVIHGEsK-u.EsDh.sDGRpSPPsh.cs.............sslsssht..lPhhhsKEKPPlTVVGDVGGRIAIIVDDlIDDlpSFVAAAElLK-RGAYKIYVhATHGLLSuDAPcLlE-SsIDEVVVTNTlPH-lQKhpCpKIKTVDISllluEAIRRIHNGESMuYLFR ...............................................................................................................ptsssp..............................................................................................................................................................................................s.......M....p....l.I.G.D..V..c.G+..ss..lll...........DDhIDT...uGTlsp.AAc..s......L.c.-.p.G.Ap.c.VaAhs.TH.....ulh.S...G......s.......A...h.....-....p....l....p....s....S....s....l....c....E..lVVT.........D..T..I...s........l....s..c...t.........p..p....h....s....+.l.........c.hl.o...lu...s..l..lAEAIc.Rla.pp..cSlSsLF.p........................................................ 0 573 1057 1460 +14424 PF14573 PP-binding_2 Acyl-carrier Coggill P pcc CATH:3ce7A00 Domain \N 32.00 32.00 32.00 32.00 31.90 31.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.40 0.72 -3.95 5 15 2012-10-02 01:16:24 2012-01-09 15:11:50 1 2 10 1 12 80 1 91.30 44 35.82 NEW SPVVDTDINAVTNYIVGMCQKFLQKGEKVTPSSKLEELRTREDRLWDCLDTVEFVLDVEEIFDVTVPDEVADNFQTLQEIADFVVSERAKAGKFMK ................................................lpphllGhhpKaLpcspclTssoKLE...EhRT.+-sRhWDsLDTVEFVlDVEEhFDVTIPDEsADNhcTlQEIADaVVupRt................ 0 7 8 11 +14425 PF14574 DUF4445 Domain of unknown function (DUF4445) Bateman A agb Jackhmmer:C9L8Q5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 525 and 664 amino acids in length. The family is found in association with Pfam:PF00111. 27.00 27.00 29.30 40.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.02 0.70 -6.24 215 536 2012-01-09 17:49:09 2012-01-09 17:49:09 1 11 362 2 223 535 496 400.20 34 68.91 NEW haGlAlDlGTTTlsutLlDLp...o.GchlussuthNsQhpaGsDVlSRIsaAt..p............sGhppLpptll.pslNpllsplhpp.......ss............................lshpcIhplslsGNosMpHLhLGlssptl........utsP........ahPsh..........sps..h.lpAp-l..Gl......phssputlalhPsluuaVGuDllAull.uss..ht.....p....p..cc..hs...LhlDlGTNGEllL...G.sp.cpllusSsAAGPAFEGusIspGM+AssGAI-cVpl....csss.............ph.psIG......................s........................ttP....................pGICGSGll-hlAplhcsGllcpsG+hsps...tt.............t.h.h.t.t..ttttalls...tt.......t.......t.h.......scslhloQpDI.cplthAKuAlhAGlphLlcc.sGlshs-l-clhlAGuFGsalshcsAhslGllPc.h.hp+lphlGNuuhtGAphsLls............................pptpcchpplscph..p..alEL...usp...ssF.pctFlpuhhh ............................h.aGlAlDlGTTTlsstLlDLp.........s.GcllspsuthNsQhpaGsDVlSRIsauh..p............sGhppL....ppslh.pslNpllpplhtpss............................lstppIhphslsGNosMtHLhLG.lssppLutuP........ahss..h..............pps..htlpAp-l...ul.............plps.tutlalhPsluuaVGuDhsAull.ust..h.t..................p....p..cc...hs..LhlDlGTNGEllL......u.sp....spllusSsuAGPAFEGusIssGhRAssGAI-cVplssps.................phpsIG...............s........................tts..............................................tGICGSGll-hlAphhcsGllcpsGchtps....t...............h...h...ttttphhls.t........t..th.spslhloQpDI.+plQhAKuAlhAGhphLlcp.hGlphpcl-clhlAGuFGsalshcsAhslGllPc.h.hp+lphlGNuuhsGAphsLL.s............................pp..ttpch..pclsppl..p..hlEL...usp...ssF.pctFlpuh................. 0 111 190 207 +14426 PF14575 EphA2_TM Ephrin type-A receptor 2 transmembrane domain Coggill P pcc CATH:2k9yA00 Domain Epha2_TM represents the left-handed dimer transmembrane domain of of EphA2 receptor. This domain oligomerises and is important for the active signalling process. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.87 0.72 -3.58 64 1068 2012-01-09 18:22:38 2012-01-09 18:22:38 1 47 82 50 462 762 0 77.70 37 8.58 NEW hllsu.ss.su..llhLllllh.l....h.l.hht..RRpp.hp..........+s...pp.p..s-c..h......th.pssp.....h..h...su.....l....................Ks...............YlDPaTYEDPspAV+EFA+EIDs ......................................................h.lhssss.su..lhhlls.l.l.s.h.....hl....lht....R+.....pp...hp...................................+s..pp..p.....s-c.....................ph.tpsp.........h..h.PG..h.........................................................K.s...........................Y.....lD....P....aT.YEDPNpAV+EFAKEIDs............ 0 46 86 226 +14427 PF14576 SEO_N Sieve element occlusion N-terminus Eberhardt R re3 Ruping B Family Sieve element occlusion (SEO) proteins, or forisomes, are phloem proteins which accumulate during sieve element differentiation [1]. This domain represents the N-terminus of SEO proteins. 27.00 27.00 63.20 32.00 22.90 22.40 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.82 0.70 -5.13 34 105 2012-01-10 14:54:50 2012-01-10 14:54:50 1 6 13 0 58 98 0 247.50 32 38.72 NEW t.sDsplhcplhtTHssDs.cchDVcsLhsllpsIlp+us.hs............h..tt......t..p..cp...s.hshhcs.hhslc+ISCch.sKssutp.sAH.............pTThsILphLssYoWDAKsllsLAAFALpYG-FWhLsph.hsos.LAKSlAhLKpl.s..hp.ts...+sR.ss...lssLlcshlpVhcsIhEaccLssp.Y.....spDVPuLstAhpcIPlsVYWsItolVACsupIssl.........hspp..csaELS.shspKLssIhs+L+ppLshCcppI--...hEsYppLhclFpps+p ................................................sDs.hhcplhhTHs.ss..pchDscsLhpllpsllppss..h.....................t..p......hs.hc.s..htlcpIusphhsps..utt.puH.............tTThsll..sh.LssYoWDAKsVlsLAAFAlpYGcFWhLsph..s..ss.LAKSlAhLcpl.P..hp......+s+hps...lssLl+shhpVscsIhEaccLsst..Y......spDlPsLssAhpcIPlsVYWsItolVACsspIssl................h.t......................p..pt..........a-LS.shspKls.IhpcL+ppLphChppItc...h-tat.hhphhpp.p.................................... 0 3 46 55 +14428 PF14577 SEO_C Sieve element occlusion C-terminus Eberhardt R re3 Ruping B Family Sieve element occlusion (SEO) proteins, or forisomes, are phloem proteins which accumulate during sieve element differentiation [1]. This domain represents the C-terminus of SEO proteins. 26.00 26.00 31.60 35.60 25.20 22.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.61 0.70 -4.99 35 115 2012-01-10 14:55:34 2012-01-10 14:55:34 1 6 13 0 69 112 0 213.00 34 34.22 NEW pscEEsLhpc.sWhh-llsc.lDPhlhpWlp-s..+YIhlYGGsDh-WIpcFTpsscslu.......psAclslEhhaVGKt............................pD.shlhaFWsclEShh.hoKh.ph.............cpscsDslhQE.lpplLSacts-pGWAlLSKG...ss....lhlpG+Gsshlpols-a-t.WKcpV.p.cGFshAFc-Yacplp...s..sccCs+l.l.Psss..GcIP-plsCP-..CuRsME.palsY+CCHc ........................s..ccctLhpc.pWhhpllhc..........lcstl.phlc-s.+aIhlYGGsDhcWIpcFTpssctlt.......psAcl.lEhhaVGKtp..........................h...pD.shlhhFWh+lEShhhSKh..ph.h...........ppspsDshhpE.lppLLsac.t.sptGWAll.o+G...ss....lhlpG+Gsshhpols-ast.WK-pl.p.cGFshAhp-ahpphp.....s...sc.Cs+h.h.ss.ts...Gplscp.lhCs-..CtRsME.pal.YpCCp................ 0 3 56 66 +14429 PF14578 GTP_EFTU_D4 Elongation factor Tu domain 4 Coggill P pcc CATH:1xe1A00 Domain Elongation factor Tu consists of several structural domains, and this is usually the fourth. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.66 0.72 -4.34 11 354 2012-10-05 12:31:09 2012-01-10 16:40:20 1 15 284 4 219 555 91 86.20 37 11.07 NEW hpssGKhpV.psaslht+-.slVG.cVlpGlIhPGYKl...KG+c.VGhIhpIp+p+KpV-FAlsGD+VAl.lEG.hh...psc-GDlLEV .........hh...Ps+l+IL..PpalFppp....DPlVlG.VcV.sGhlK.G.sPls.....pccG..h.c...l...G.h..lp.oIcp.sc.K.sV-.A+cGpcVulpIcs....................................... 0 72 122 170 +14430 PF14579 HHH_6 Helix-hairpin-helix motif Bateman A agb Jackhmmer:C9L7X9 Domain The HHH domain is a short DNA-binding domain [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.89 0.72 -3.94 364 7029 2012-10-03 02:11:09 2012-01-11 15:39:14 1 48 4452 9 1533 5939 2559 91.90 30 7.87 NEW GlpllsPDlNpSp.h.c.asl............................................pst.........s......IRhGLsslKGlGpssscpIlp.tRp...tus...Fp.sltDh..hpRs..............tls+cslEsLhpAGAh.D.sh.....u..h...p..Rt..tL ...................................................GlplhsPDlNp.St.h.c.Fpl...............................................................................................................pst.........ts.........IhhGls...Al+GlGps.sscpIlc..sRp.....p.....G................Fp.sl.Dh..tpRs..........................ttls.+.+s.l.EsLhpuGAh.D.sl....s......p.Rt.......................... 0 538 1027 1307 +14431 PF14580 LRR_9 Leucine-rich repeat Coggill P pcc CATH:1a9nC00 Repeat \N 30.50 30.50 30.50 30.50 30.40 30.40 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.95 0.71 -4.71 6 1482 2012-10-02 21:32:02 2012-01-11 16:43:36 1 116 303 11 910 1732 20 148.50 27 32.21 NEW MVKLTAELIEQAAQYTNAVRDRELDLRGYKIPVIENLGATLDQFDAIDFSDNEIRKLDGFPLLRRLKTLLVNNNRICRIGEGLDQALPCLTELILTNNSLVELGDLDPLASLKSLTYLSILRNPVTNKKHYRLYVIYKVPQVRVLDFQKVKLKERQEAEKMFKGKRGAQLAKDIA .....................................................h....................................................................h.p.t.lt......h......p....h...c....h...L...s...ls..pN.t.l..p....p...l....p..........s...l...s.....................p............L........p...p........Lcp.L.......Ls..sNpI....s..p..l...........p..s..l.t.....t.....t..l..s...s..Lp.........p...Ls..L..s.sN..p.I...p..s..l.s.............s......l..p.......L..p..p.l...p..p.LppL..s...L......h.....s.....N...P..........l............s............p...........h............p.........p.Y....R.....hllhh.lP.p...Lc.hLD.hppl..ptp........E.+.t.A...............................t.tt.............................................. 0 313 425 643 +14432 PF14581 SseB_C SseB protein C-terminal domain Bateman A agb SseB Domain This family consists of several SseB proteins which appear to be found exclusively in Enterobacteria. SseB is known to enhance serine-sensitivity in Escherichia coli [1] and is part of the Salmonella pathogenicity island 2 (SPI-2) translocon [2].\ This presumed domain is found at the C-terminus of SseB proteins. 19.90 19.90 20.00 20.30 19.60 19.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.07 0.72 -4.20 72 744 2012-01-11 16:51:14 2012-01-11 16:51:14 1 7 695 0 82 441 11 108.70 51 42.24 NEW p..shp....sGsplpls...cP..pc...Psphhsulsp.hhppptsVppAalthhpp.......tsppsshhlsl-hss....chppl.hpshup.hssshhss.hslchshls.s..tsluchhhpcspPFYpR ................EGGESLlLS...EV....AE......PPuQMIDSLTT.LFK.TlKPVKRAFlCuIKE......pp-AQPNLLIGIEADG.....DIEEI.IpAsGs.VATDTLPGDEPIDICQV+.c.GE....cGISHFlT-HlsPFYER............................ 0 15 39 61 +14433 PF14582 Metallophos_3 Metallophosphoesterase, calcineurin superfamily Coggill P pcc C0ATH:1uf3A0 Domain Members of this family are part of the Calcineurin-like phosphoesterase superfamily. 28.20 28.20 28.20 28.20 27.80 28.00 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.62 0.70 -5.23 6 31 2012-10-02 19:15:56 2012-01-11 17:04:52 1 2 31 9 11 163 10 169.80 37 57.98 NEW M....thhscKlLAlSsh+Gch-hl-+LlclltEp.ssDAllllG-lhpspA+ucEYt............................pFFRtLuphslPsahlPGspDAPlchaLRtAaNhElVhPpl+sVHcoFAhhtG.allAGhGGElT-cucs-...thtL+YPuWEAEYpLKhL+-L+DhpKlhLFaTsP.......hcKGhccuGSpsVAcLlKTasPclVls.....uGsstcH..E.LGsoLVVsPGuLuEG-YAllDlcp+cl-hGsl ......................................................................................................................................................................................................................................................................................................................................................................................................h.L....p.....-.............h.......c..c.........ph.......I.....hl.F.Ht..PP.s.............htp..sh..u..csGS+sVtcLIpp..a.pPlls.Ls.....GHltcsp.t.h-plGs.Tl..lVNPGu.L.tp.G..-..aS..l..lshp...t............................ 0 5 9 11 +14434 PF14583 Pectate_lyase22 Oligogalacturonate lyase Coggill P pcc CATH:3c5mA00 Domain This is a family of oligogalacturonate lyases, referred to more generally as pectate lyase family 22. These proteins fold into 7-bladed beta-propellers. 30.00 30.00 30.00 30.40 29.90 29.20 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.16 0.70 -6.01 4 173 2012-10-05 17:30:43 2012-01-11 17:11:27 1 3 156 4 47 139 10 353.20 48 90.58 NEW MAKGKhlsLsFcTa.DSsTsscVsRLTPsDVlCHRNYFYQKCFTpDGpKLLFuGsFDGshNYYLLDLsTQpAsQLTEGpGDNTFGGFLSP-DcuLFYVKNt+NLMRVDLsTLEEssIYpVP--WVGYGTWVANSDCTKlVGIEIpKcDWpPLTDWKKFtEFYaTNPpCRLI+lDLcTGEupVILQENpWLGHPIYRPhDDsTVAFCHEGPHDLVDARMWhINEDGoNhRKVKpHA.GESCTHEFWlPDGSALAYVSYhKGpopRaIhpssPsTLENcplhpMPsCSHLMSNaDGoLMVGDGssAPVDVpDsuGYKIENDPFLYVhNhKstpha+lA+HsoSWcVh-GDRQVTHPHPSFTPDDKtVLFTSDs-GcPALYhAclP-pl ......................................................................MAKG..l.Lpacsh.DspTGspVsRLTPs-lhCHR.NYFYQKCF....spD..Gs+LLF.uut...F..D..G.hNY.YLLDLtoppAsQLTE......Gt.....G...DNT..F.G......GFLS.s....-.D..culaY.VK..........st...+......sL.hcV-LsT..L.........c.....Ep.slYpVs..........-cWVGY.GTWVANS...D...CT+.lVG.I...E..I....t...+p.....................DW...pPL....s....DWp.hFp-...FaappPpCRLh+lDL....c.....T..GE...s...pVIhp.-spWLG.HPIYRPaDDsTlAFCHEGPHDLVDARM..WhlN.cDGoNh..RKV..+....pH.s.t.G...ESCTHEFWlPDG....SAlsY.V.......SY.h..K.G...ppsRh...IhphsP.............p..T......h...cs..c..t..l..hpM...P.s...CS...HLMSNaDGoLhVGD..G...u..ssPVDV....p......D.ss.u...YpI...-.N...DP.aLY.l.h.sh.p.s.........tp....t...+..lu+.....Hs..o......SWpV...hcGcRQV.T........H.....PHPSFTPDs+tVLFoSDh..cGp.PAlYhsplPt............................................................ 0 7 25 36 +14435 PF14584 DUF4446 Protein of unknown function (DUF4446) Bateman A agb Jackhmmer:C9L935 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 165 and 176 amino acids in length. 24.00 24.00 24.40 24.30 23.90 23.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.70 0.71 -4.38 123 292 2012-01-11 17:19:42 2012-01-11 17:19:42 1 2 289 0 74 243 26 149.80 32 86.90 NEW .shl.....llulh.llslll.hl...hhlhhhh+...hp+lp++Ycthh....cGpsup.sL....Echlhphhcclcclppptcph.ccphcplppphp....pshpKlGlV+YcAFp....-hGucLSFulAlLDspssGllloula.uR-sshsYsKslppGpSp.htLSpEEppALcpAh .............................s.hlllslh....llhllh..hl..hllhhhh+hp+Lc++YcthM....cGpssp.sL......Echl.hphhcclcclppptc.ph.ccphpplcpphp....pshpKlGlVRYsAFc....-hGucLSFulAlLDspssGllloula.uR.-p.ohsYuKsIppGpSp.hsLSpEEppuLcpAh.............. 0 44 65 69 +14436 PF14585 CagY_I CagY type 1 repeat Bateman A agb Bateman A Repeat This repeat is found at the N-terminus of the CagY proteins - part of the CAG pathogenicity island - and involved in delivery of the protein CagA into host cells ([1]). 25.80 25.80 100.40 25.80 17.60 25.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.27 0.72 -4.04 7 210 2012-01-12 14:30:43 2012-01-12 14:30:43 1 33 32 0 3 213 0 62.60 78 9.99 NEW ETSKKsQQcSPQDLSNEEATEANHFED..KESKESSDpHLDNsTET.KTphD-s...KopETpsphspp .........ETSKKTQQHSPQDLSNEEATEANHFEDSSKESKESSDHHLDNPTE.....T.....KTNFDE.KSEEhpsp.ss............. 0 3 3 3 +14437 PF14586 MHC_I_2 Class I Histocompatibility antigen, NKG2D ligand, domains 1 and 2 Coggill P pcc CATH:1jfmA00 Domain Members of this family are known as retinoic-acid-inducible proteins. They are ligands for the activating immunoreceptor NKG2D, which is widely expressed on natural killer cells, T cells, and macrophages. 27.00 27.00 27.40 27.20 26.80 26.80 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.17 0.71 -4.69 3 89 2012-10-03 22:02:01 2012-01-17 15:53:59 1 3 17 5 26 189 0 164.60 45 74.91 NEW DAHSLRCNLTIKAPTPAD..WhEVKChVDEILILHLSNINKTMTSGDPGETANATEVGECLTQPLNDLCQKLRDKVSNTKVDTHKTNGYPHLQVTMIYPQSQGQTPSATWEFNISDSYFFTFYTENMSWRSANDESGVIMNKWKDDGDLVQQLKYFIPpCRQKIDEFLKQSKEK ........................................sHSLshNFT.I.Ko.ho+PGp.WCEuQs.hsc.phFLpa.s.ss.N.hsh.P.L.G..l.GKK.VNATpsWs-LTQsLs-lGc-LRhhLh-lKsp.h...K.TS.Gs.s......T....L.QVpMlsQ+cstphsuASWpFs.IssphshlF...D.s...hNMoWplIN.-AscIhEpWKcD+sLtc.ah.+.h...h.s.DCsphLcEFLt.....t......................................... 0 1 1 7 +14438 PF14587 Glyco_hydr_30_2 O-Glycosyl hydrolase family 30 Coggill P pcc CATH:3clwA02 Domain \N 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.38 0.70 -5.53 4 205 2012-10-03 05:44:19 2012-01-18 13:33:11 1 21 143 6 97 244 11 287.10 27 49.98 NEW cppLslshposaQpIDsFGASDAWRsQalGKNWP.EK+ppIADLLFSpEhDppGNPKGIGLShWRFNIGuGShEpGcsuGlsspWRRsECFLot-GsYDWsKQuGQpWFh+AARERGVpphLsFS.SAPhaMT+NGpuhoo-cs.phNlppsKhcsaAcFLs-sspph.pc.GhslNYlSPlNEPQW-W.usuuQEGo.sTN--hpchVphLD+cLpcRplsTp.IslsEsGsIpYLacs.sNtpsRDN.IcshFspsuphSlhpLssVtpsVouHSYWSsaPhspLVspR+pLspclup...ush+aWtoEYC.hEp..Ns-.spGsGs.RDLGMpsALYVARlIHpDLTlANASuWQWWTAlSthsYKDGLIalDcsp.tsG.Sh.........KpDG ...............................................................................................................ht.....lp.tt.hQ.phcsaGsS.sW...hp..h.h....G...hst....t....pp....p.pls-hLFo..p.p........................p...G...l...GLolhRaNlGuG............us.t..p.....t........p............s...........s..t....l..t.........s...............h...h......p....s...p.........s.a.......h.......s...sc.........s...s.........a.........s......W...s..t...s..t..u.QRh..h.l.p.t.A.+.p..+..G.ls..p....hh...uFu.NSP...PhahTpNGt.....s...s.s.....s......s....t....s.....s.....s.............NL..cs....-p.apsFApYLusVsc....th..pp..........t.......G.l.phshlsPhNEP.s...h..s...........W.....s..........s.....s........p..........QEGs..ths.s.p.p..sphlp.h.Lspp.lpppsl..s.sp..l.h.h..s-...................................................................................................................................................................................................................................................shttst..................................................................................................... 0 34 71 91 +14439 PF14588 YjgF_endoribonc YjgF/chorismate_mutase-like, putative endoribonuclease Coggill P pcc CATH:2otmA00 Domain YjgF_Endoribonuc is a putative endoribonuclease. The structure is of beta-alpha-beta-alpha-beta(2) domains common both to bacterial chorismate mutase and to members of the YjgF family. These proteins form trimers with a three-fold symmetry with three closely-packed beta-sheets. The YjgF family is a large, widely distributed family of proteins of unknown biochemical function that are highly conserved among eubacteria, archaea and eukaryotes [1]. 35.00 35.00 37.20 36.70 34.20 34.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.80 0.71 -4.28 2 1260 2012-10-01 19:40:00 2012-01-18 13:58:18 1 6 1017 15 475 1093 1483 146.50 44 92.19 NEW EoRLhAhGL.LPcssAAlGsYEPauhhus.lhTShQhPa.ttpLha.G.LGsshosp-GhAAsRLssLNulAQLtpAsGpLupl.plhRL-GhlsspQsh.-hPhsLDuAScLL.-lhGEtGRHuRhhhsp.VMPLsu.shlhhFAEl ....................................................t+LtpLGlpL.P..t.s.....ss.P..s.A.sYVPsl...p.o....G....s.h.l.a.sSGQlP...h....h..s..G...p.....l..h.hsG..K.....l...G.......s.........-.............l....o..s...Ep..upp..A...ARh...sAlNhLAs.lc...u...........t.........l....G......s.......L..-+.lpRlV..KlsGFVsS..sssF.....s.pQ...s......tVhNGAS-LlscVFG.-.s..GcHARSAVGVusLPhsusVEl.Eh................... 0 150 300 384 +14440 PF14589 NrfD_2 Polysulfide reductase Coggill P pcc CATH:2vpzC00, Pfam-B_200168 Domain Bacterial polysulfide reductase is an integral membrane protein complex responsible for quinone-coupled reduction of polysulfide, a process important in extreme environments such as deep-sea vents and hot springs. Polysulfides are a class of compounds composed of chains of sulfur atoms, which in their simplest form are present as an anion with general formula Sn(2-). In nature, polysulfides are found in particularly high concentrations in extreme volcanic or geothermically active environments. Here, the reduction and oxidation of polysulfides are vital processes for many bacteria and are essential steps in the global sulfur cycle. In particular, the reduction of polysulfide to hydrogen sulfide in these environments is usually linked to energy-generating respiratory processes, supporting growth of many microorganisms, particularly hyperthermophiles. 27.00 27.00 28.90 28.60 26.60 26.10 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.74 0.70 -4.72 3 4 2012-10-01 19:35:38 2012-01-18 15:02:00 1 1 4 8 2 10 1 254.00 66 99.22 NEW TEFYGLPNApEFWHWTNLLHFlLVGLAGGsAFLTALLHLKGcAEARRYTLaALuLIALDLFlLWAESPARFRFTHVWLFLSFHPTSPIWWGAWGLALSFLouGLLYLGKGPuRsLAWuLLlFSLVALAYPGhALAVNLNRPLWNuLLAGLFPLTALVLALGVAVLLKSuWALaP.LRVLAGASLhLAhLYPlTLs...sEAR..GHLWEEGGhhYGLFLLLG....LGAFhpERLAPWAGhLAAAG...LRALLVtsGQWQGL..G ......TEFYGLPNAtE.FWHWTNhLHFlLVGLAGGsAhLsALLHL+G..psE..ARRYTLh..A..LuLIALDLFlLWAESPARFRFTHVWLFLSFHPsSPIWWGAWGLA.LuFLouGLLYL..G.KG...P......pRhLAW......uLLlFSLVALuYPGhALAVNLNRPLW...Nu..L.hA.GLFPLTALVLALGlAsLL+SsWA...LaP.LRl.LA....GASLhLAhL.YPhTLs...sEAR..tHLhEE..GGhhYGLFLLL.G....LGsFhpERhAPWAGhLAAAG...LRALLVhsGQWQGLG......................................................................................................... 0 0 1 2 +14441 PF14590 DUF4447 Domain of unknown function (DUF4447) Eberhardt R re3 Jackhmmer:Q8EAP9 Family This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. 27.00 27.00 314.00 313.80 24.80 24.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.13 0.71 -4.66 5 28 2012-01-18 16:17:44 2012-01-18 16:17:44 1 1 28 4 13 19 1 166.00 82 99.49 NEW MSKNlGLNAIEMQYLRpSLGLTsAQVAplTKsSE-DVlAWEAGEppAPtLAQKKLLEIDEIIEMQVLNToDGIEELFKKEPKRRLAFVVYPTQAlYTQYNPEFLSSLPLTELYNTAAWRIKKECKLVLEVDVoLVPLDVEuYKAYREcNGLuESRESRAKWAATQL MSKNIGLNAIEMSYLRQSLSLSsAQVGpLTsHSEADVLAWEuGEpsAPELAQKKLL-IDDIIEMQVLNTCDGIEELFKKEPKR+LAFVVYPTQAlYTQYNPEFLSSLPLTELYNTAAWRIKKECKLVLEVDVSLVsLDVEAYKAaREpsGhSESRESRAKWAATQL 0 1 2 8 +14442 PF14591 AF0941-like NTP_transf_5; AF0941-like Coggill P pcc CATH:1yozA00 Domain Members of this family are of unknown function. 22.60 22.60 22.60 23.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.74 0.71 -4.08 6 11 2012-01-18 17:29:05 2012-01-18 17:29:05 1 1 11 2 9 11 0 107.20 28 84.46 NEW hpsTal-KltELh+.....t.hl.-stcclEclscpLh........scEsItEh..F+pDhE.IlchapsGchocEEAhppLp.Lc.hA.spLppah.clt-lLcchEtchpchltch.c.........ls..l.Y.hcp....lcpstcE ....ohl-KLtELlp.....pplIsDlp-cL-EIhcpls........scEpIcEh..F+sDhc-llc-hpuG-I--EEApcllc.l....................................................................................... 0 1 6 9 +14443 PF14592 Chondroitinas_B Chondroitinase B Eberhardt R re3 Jackhmmer:Q46079 Family This family includes chondroitinases. These enzymes cleave the glycosaminoglycan dermatan sulfate [1]. 24.00 24.00 24.20 24.00 23.90 23.70 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.61 0.70 -5.66 6 114 2012-10-02 14:50:22 2012-01-19 10:54:16 1 13 78 4 66 132 31 320.70 28 50.89 NEW VSo.sELtcAlspsKsGspIlLKsGlaKDVQIKFpGcGTK-pPIsL+AETsGKVFIEGsSpLEluGcYLlVcGLaFKNGaoPspsVIuF+su.cs....luNas+VTNCVIp-FspssR-psspWVp..thhG+HNclspChlsGKsNhGPTlRls......lcGsp.ultNYHpIVpNHFGPRPpKGGspGETIQlGsSaoSMoPu+ThlsNNLF-cCNGEVEIISSKoNFNla+NNlFacSEGSlVhRHGNYsslDGNaFIGssssEsYGGIRlINTGHWlsNNYFYtlKGppFRSslAlMNGIPKSPLNRYNQVTDVVVAYNTalDss.SPaQFGVGpNlsppDVLPcSEIRSARPhRhplsNNllaNcc.scphPllcp...Dcs...sFKsNhlssp.ss.......Lhsp.ctshTclutNhps.ps.....sshK.DlEsh.pGFDF- ......................................tplppAl..p.sspsGDpIlLtsGsa....s...s...h.p...lhh.p.s.pGTpspPIslpAps..sG.pVhl.s....G.....pu.......p.....lpls...........G..p............alhlpGLhF.p.suh.....ss.t..tth.hthp.s..........husps+lT..pssh....t..a....s.................tp.....t.......t...........al..............hhGppspl-ps.h.s...K..pshGsh.lhl...................tstt...s.tp..hc..p.I....cpNaF...t........p..........h........u....u........N.uu....Eslpl.Gh..S..t.u..h.s...u.shlppNhFcpss..GE.sEllS.KSstNhhptNshhpspGslshRHGp.shlpsNhalGst.......................hGGlRlhspsphlhsNYh.sh..........p..G........h........t........h..h..ushs..........hps....................s.t....s.........s.ta...s.p.s.l..Nshlsst....h.hs...................................................................................................................................h........................................................................................................................................................................... 0 19 53 64 +14444 PF14593 PH_3 PH domain Eberhardt R re3 Jackhmmer:O15530 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.21 0.72 -4.14 31 202 2012-10-04 00:02:25 2012-01-19 14:29:39 1 8 127 7 132 503 8 102.20 47 18.57 NEW shWcpFlp...sclIlKpGhlpKR..+GLF..uR+RhLLLT-u.....P+LhYlDPspMhhK.GEIPWo...ppLpsEsKNhKpFalHTP.....sRsYYLpD.......scupAhcWscsIpclppphhp ........................................WcpFl-....splILKhG.VcKR.....KGLF.......uR+RpLLLT...-G................P.....+L.......hY.V.DP.s...s......h..l.l..K......G..E..I.PWS.........p.-.L..+....s...........E......s.......KN.....F.......K......p........FaVHTP...........sRTY.a.L...D............sp.u.p..A.hc...Ws+tIp-lhc....p........................................ 0 58 76 109 +14445 PF14594 Sipho_Gp37 Siphovirus ReqiPepy6 Gp37-like protein Bateman A agb Jackhmmer:C9LCM0 Family This family includes numerous phage proteins from Siphoviruses. The function of this protein is uncertain, but it is related to Pfam:PF06605. In Rhodococcus phage ReqiPepy6 this protein is called Gp37 [1]. 26.10 26.10 26.30 29.60 25.90 26.00 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.90 0.70 -5.20 43 160 2012-01-19 16:55:40 2012-01-19 16:55:40 1 2 128 0 25 151 20 326.20 24 86.66 NEW shphlG.......................l-sap..SLpasc+a.tssGsFE...lclshsppp.................hphL.ppsshlhh...pstp...p...........ssllp..phphsp-p...tpsplsVpGhs.hpshlscRlshss..........h.......hhsussEslhcphlspsshssspssRtIs...sLt..lussts...tupplshp.spa.csLh-tlppl......spssslGaclh.........hc.p..............p..pch.lF-lhpGpDh.......sssllF.....StcasNlpstpatpos.sapNssh.VuGpGEGp.sRthsp............l....us.....s..p...GhcRcEhalDA+-ls.................................phhspLpp+GppcLs.Ettthtshcsplpssst......hhYtpDasL.GDhVolpscth..GlphsspIoEl.........p-ha-..psGh.plpssFGs ....................................p.hu..l-.apSlphpcpa.tssusFE...lhhshptt..................hchl.ptsshlhht..ss..............sullp..thplpps-...ttphlslpGhs.hpuhlscRIlhs...........h.......phoGpsEslhcphlppphh.s...tRpls...tLh..lsssts...ps.tsl.p.hp..spa.pslh-tlppl......spssshGaclh.........hs.p....................................p...tth.hhchhpu.pDh.......ssslhF.....St-ac.Nlhstpapcst.shpsssl.luGcGEGp.sRphsp.............................l................ss.....h..s.........GlcRhEhalDA+-lp.................................thhspLpp+Gpc+Ls..Ettph.hshpsplp.ps.......hhYttDacL.GDhVshtsp.h....Ghphsshlspl.........ppphc..psGh..plpspaGp........................ 0 10 23 25 +14446 PF14595 Thioredoxin_9 Thioredoxin Eberhardt R re3 Jackhmmer:Q9I4A4 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.63 0.71 -4.58 80 573 2012-10-03 14:45:55 2012-01-20 08:05:38 1 3 547 1 117 2556 1082 127.40 35 70.61 NEW Ghshs-alp.........phsp..........spc.chpclhcphplsp-tpp...hhpthspsh.phLllsEsWCGDuhhslPllp+lu.chs..slcl+llhRDcsh-lh....-paLT..ssu+uIPhhlhhD..........ps.hp.l.shWGPRPptlpc.hh..........sph+ ...............................................................shspalsthpt..........Nppph.h..plYps....ap...lsp....-..p....hhch..h..p.t....s..h..+.l.L.V..l..o...E...sWCGDAhhsl.....Pll....c....+I.u...E....s....t..........Nl...-...l...+l..h....t..R...D......-..s.p..c..L.h..............DQYLT....sspu.....R.....u......IPl.F.lFls...........cp...hcp..ss.W....GP....R.ssclpc.hl.pph.t................................................. 0 40 85 105 +14447 PF14596 STAT6_C STAT6 C-terminal Eberhardt R re3 Jackhmmer:P42226 Family This family represents the C-terminus of mammalian STAT6 (Signal transducer and activator of transcription 6), it contains an LXXLL motif which binds to NCOA1 (Nuclear receptor coactivator 1) [1]. 25.00 25.00 26.20 25.30 24.00 21.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.35 0.71 -4.37 2 54 2012-01-20 10:05:42 2012-01-20 10:05:42 1 5 26 1 17 45 0 168.10 78 23.30 NEW PLPTPE.phPTMVPSYDLGMAsDoSM..QLusDMs...YPPpSHSI.saQsLs.EESlsVLsAFQE.PHLQMPPshuQMSLPFDQPHPQGLL.CQsQEHAVSSP-PLLCsDVsMsEDSCLoQPVsuFPQuTWlG..........EDhFPPLLPPTEQDLTKLLLEGQGEuGGGSLGAQPLLQPS.YGQsGISMSHhDLRsNPSW ....PLPTPE.QMPTMVPSYDLGMAPDS..SMsMQLuPDMVs..QVYPP..HSHSIss.YQuLs.EESVsVLsAFQE.PH.....LQ...MPP.....sLuQhoLPFDQPHPQGLLPCQPQEHAVSSP-PLLCSDVTMsEDSCLoQPVsuFPQGTWlG..........EDhFPPLLPPTEQDLTKLLLEGQGEoGGGSLGsQPLLQPSpYGQSGIS.MSHhDLRsNPSW...................... 0 1 1 2 +14448 PF14597 Lactamase_B_5 Metallo-beta-lactamase superfamily Coggill P pcc CATH:2p97A00 Domain This is a small family of putative metal-dependent hydrolases. 28.40 28.40 28.80 28.70 28.30 28.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.30 0.71 -4.92 4 14 2012-10-02 15:46:01 2012-01-20 10:58:09 1 2 14 2 5 21 24 190.30 39 92.85 NEW MKSLHRPDLYSWSsFNPARNIDFNGFAWIRPEGNILIDPVALSNHDWpHLESLGGVVWIVLTNSDHVRSAKEIADQTYAKIAGPVAEKEpFPIhCDRWLSDGDELVPGLKVlELpGSKTPGELALLLEETTLITGDLVRAa+AGuLplLPDEKLhN+pcsVASVRRLAuLEKVEAVLVGDGWSVFRDGRDRLcELVATL .........................................................................RPDLasWShFs.t+NlDFNuhhhhRP-GNlLIDPlsLSscDhp+LpulGGlsaIVLT..NsDHlRuAcchAcphtA+lhuP.......su-.......c....cph.....PlssD+hLsDG-..pllsGlpVlpLp..GpKTPGElALL.L.--..s.sL.IoGD.Llhuh.uGsLshLPDcKhhshpcshtSl+RLAth..ch-slLsGDGWslh...pcupttLtpLh.................... 0 0 2 5 +14449 PF14598 PAS_11 PAS domain Eberhardt R re3 Jackhmmer:P70365 Domain This family includes the PAS-B domain of NCOA1 (Nuclear receptor coactivator 1), which binds to an LXXLL motif in the C-terminal region of STAT6 (Signal transducer and activator of transcription 6) [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.09 0.72 -4.11 22 2159 2012-10-04 01:10:46 2012-01-20 11:08:31 1 50 589 39 703 2822 16 106.50 29 13.98 NEW p-pFoo+hshsGKIlplDpsslphsh.tahp.c-.lhspshaphhH.pDhp...hpp+h+phhp..........pGp.s..tSsh..YRlphpss..salhlpTpuplhp..sppssp.phlhssppllsEpps .......................................h..pFhoRHsh..s..sphtalDp.+...s.............s..shl..GYhP..p-..L.l.G..p.s...hh.c..a.aHscDhp....lp..c.sac..p..lhp....................................pGp..s.....po..s.........Y..Rhhs+sG..salh.lc...............Tphpshh......Nsh.op..c.......c.a.llspppll.....t............................................... 0 147 210 422 +14450 PF14599 zinc_ribbon_6 Zinc-ribbon Coggill P pcc CATH:2k2dA00 Domain This is a typical zinc-ribbon finger, with each pair of zinc-ligands coming from more-or-less either side of two knuckles. It is found in eukaryotes. 27.00 27.00 28.00 28.00 26.30 26.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.45 0.72 -4.18 75 401 2012-01-23 11:24:43 2012-01-23 11:24:43 1 18 217 1 260 390 11 59.80 44 12.44 NEW lsDMsphachLDpEItspP...MPppYpsphshIlCNDCss.pSpVpaHhlGhKCsp..C...sSYNTpp .............h.hDMsthactLDtpluspP........MPp-Ypsp..pshILCNDCs..........s.cS.p.V.p....FHhLGh.KCtt..CtSYNTpp............... 0 67 148 216 +14451 PF14600 CBM_5_12_2 Cellulose-binding domain Coggill P pcc CATH:1aiwA00 Domain This C-terminal domain belongs to the CAZy family of carbohydrate-binding domains that are associated with glycosyl-hydrolases. It is suggested to bind cellulose. 27.00 27.00 27.30 28.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.12 0.72 -4.34 5 106 2012-10-03 03:17:01 2012-01-23 14:37:49 1 26 74 1 33 110 1 59.30 49 6.53 NEW TssCANsNVYPNWVSKDWAGGQPTHNEAGQSIVYKGNLYTANWYToSVPGSDSSWTLVGSCN .......................sss.lN...sYPsWsppDatu.ssoHAssGDpMla..puslYpANWWTsShPGSDuSWohlh...s.... 0 2 22 27 +14452 PF14601 TFX_C DNA_binding protein, TFX, C-term Coggill P pcc CATH:1nr3A00 Domain This is the C-terminal region of TFX-like DNA-binding proteins. 27.00 27.00 30.10 29.60 22.70 19.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.49 0.72 -4.19 28 73 2012-01-23 14:39:50 2012-01-23 14:39:50 1 3 69 1 53 84 0 82.70 30 56.08 NEW ARETLpFhpsLsAPlplslcpGTDla-lPchlaspuDcsGlKVpasoh-Lhphlp-sAs-tlcsRhl+cchhlhlspcG-lplp ..A+pTLthacplpAPlplplctGTDla-lPchlaccuDctGlKVpYsohplhphlp-pAsphlcsRhlccshhlhlspsGclpl......................... 0 9 29 42 +14453 PF14602 Hexapep_2 Hexapeptide repeat of succinyl-transferase Coggill P pcc CATH:2rijA03 Repeat \N 27.00 11.00 27.00 11.00 26.90 10.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.50 0.72 -4.39 86 8656 2012-10-02 11:29:45 2012-01-23 15:54:45 1 152 4074 116 1925 24566 14688 33.80 33 14.21 NEW sltIGcsChlGAN.us..l..G..lslGDssllsAGlhlos .......................................lhItcss.h..I....G......u......p..uh......l..............G..............l......p.....lG-......ss.ll.u......sGshls....................................... 0 584 1174 1585 +14454 PF14603 hSH3 Helically-extended SH3 domain Coggill P pcc CATH:1ri9A00 Domain This domain is the 70 C-terminal residues of ADAP - Adhesion and de-granulation promoting adapter protein. It shows homology to SH3 domains; however, conserved residues of the fold are absent. It thus represents an altered SH3 domain fold. An N-terminal, amphipathic, helix makes extensive contacts to residues of the regular SH3 domain fold thereby creating a composite surface with unusual surface properties. The domain can no longer bind conventional proline-rich peptides [1]. There are key phosphorylation sites within the two hSH3 domains and it would appear that binding at these sites does not materially affect the folding of these regions although the equilibrium towards the unfolded state may be slightly altered [2]. The binding partners of the hSH3 domains are still unknown [2]. 25.70 25.70 25.90 25.80 25.20 25.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.95 0.72 -3.97 3 112 2012-10-02 18:48:24 2012-01-24 09:38:00 1 4 35 1 63 125 0 81.50 52 13.02 NEW FRKKFKYDGEIRVLYSTKVTTSITSKKWGTRDLQVKPGESLEVIQTTDDTKVLCRNEEGKYGYVLRSYLADNDGEIYDDIADGCIYDND .....FRKKFKa..-GEIpVlhpshlsss.hso++hGs+DLsI+sGEtLEVIp..hT-..c..sclLCRNpcGKYGYV.RotLh..-...s-lYDDl.................................................... 0 4 9 20 +14455 PF14604 SH3_9 Variant SH3 domain Coggill P pcc Jackhmmer, JCSG:target_422527 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.26 0.72 -4.38 37 9839 2012-10-02 18:48:24 2012-01-24 16:52:14 1 624 319 119 5343 17510 50 51.00 31 8.53 NEW Alasap.sps..p.s..E..LplptG-hlhl..h..pp.....t..p...ssW.hh.u..p....h..sG..ppGhhPssYlp ......................Alasap..u....ps........s...c.......E......Lsh.p.t...G.-..l.ltl...l...pp...................s...-.....cGW..ap.G....p..................h.....sG........pp..G..h.FPusYVp............ 0 1250 1872 3407 +14456 PF14605 Nup35_RRM_2 Nup53/35/40-type RNA recognition motif Coggill P pcc Jackhmmer, JCSG:target_422743 Domain \N 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.33 0.72 -4.19 21 98 2012-10-02 20:46:34 2012-01-24 17:02:17 1 12 56 0 66 283 2 55.80 26 9.42 NEW spWlsVoGat..hcpt....thVhc.....aFp.s.hGpIlc...pphs................tsshhYlpYtsphssptAL ..................phlsVsG..as..scpt.....phlLp.....HFp.s.hGcIsc........hpls.........................tppshhhlpatschsA-pAl..................................... 0 23 29 47 +14457 PF14606 Lipase_GDSL_3 GDSL-like Lipase/Acylhydrolase family Coggill P pcc Jackhmmer, JCSG:target_416889 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.03 0.71 -4.54 20 244 2012-10-02 11:02:24 2012-01-24 17:22:51 1 7 172 2 67 573 133 161.20 31 41.20 NEW cK.PlVhYGTSIsQGAsASRPGMuaTsILuR+hstsllNLGFSGNu+hEsEls-llu-l.DA.ulallDslPN..........h....ospplpcRhtshV+tLRstHPcTPIlllEchh.hsps.hhcpptpcchpppspslpcsacp.LppcGsKslYalsucsh.lGpDuEuolDGsHPoDLGhhRaActaptll+ .................................................................................p.hlhYGoSIo.QGus....A..o+....P......u.....h.sasslhu......R.....p.....h.......s............h.....c.......l.......l.......NLG.....FSG...s..u....h....L........-.......................h..A...c.h.....l..s....-.....h...c.....A...clhs.....l....-.hhsN...................h....ssc...thppphpsFlcplRptH..P....sT.Pll...llps.hh..h.tt.....h..pt........................t.t......t....tt.p..t..hhtphhtt....h...t......t....t..sl..h....h..l...t...t...t...p......h.....s.......s............t....h......h.....s.....D....h.h.H.sshGh.hhup.h............................................................................ 0 27 58 67 +14458 PF14607 GxDLY N-terminus of Esterase_SGNH_hydro-type Coggill P pcc Jackhmmer, JCSG_target_416889 Domain This domain lies upstream of SGNH hydrolase, but its function is not known. There is a highly conserved GxDLY sequence-motif. 27.00 27.00 51.80 44.00 25.90 24.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.84 0.71 -4.73 44 142 2012-01-24 17:32:33 2012-01-24 17:32:33 1 6 85 0 28 136 91 147.60 34 36.76 NEW sphpahsstph...sltG+uh.ssp..shYpRLPsptpst...lptsVhsLuppSAGlslpFpTsSspIps+aplsss.hshspMsssussGlDLYsts..sGpWpasus.up.sht....s.stshlhpshss......pt+EahLYLPLYsslpsLEIGlspsuplp ...........t.phpahsspph....slhG+uhtsp...shYcRlPsshpsh....sptslhpLu+sSAGlAlpF+oNSspIpl+apltss..hphsHMsssuhpGlDLYshp..sGpWpasss.up.sst....s..spshlhpshps......pt+EahLYLPLYsslpoLcIGVsssApl.p................. 0 16 27 28 +14459 PF14608 zf-CCCH_2 Zinc finger C-x8-C-x5-C-x3-H type Wood V, Coggill P pcc Pfam-B_880 (release 26.0) Domain This is a zinc-finger of the type C-x8-C-x5-C-x3-H. 27.00 8.00 27.00 8.00 26.90 7.90 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.79 0.73 -6.70 0.73 -3.78 44 1117 2012-10-01 21:35:20 2012-01-26 16:52:10 1 44 235 0 664 1211 11 18.40 40 10.59 NEW .C+.hhss.Cp.s.cChatHP ..........C+.ah..ss..Cpp.s..cChFtHP.. 0 185 320 526 +14460 PF14609 GCP5-Mod21 gamma-Tubulin ring complex non-core subunit mod21 Wood V, Coggill P pcc Pfam-B_276835 (release 26.0) Family GCP5-Mod21 is a non-core subunit of the larger gamma-tubulin ring complex that effects microtubule nucleation from both centrosomal and non-centrosomal sites. This subunit, unlike GCP2 and and GCP3 and others, is not thought to be essential for viability in the fission yeast, and may not be expressed in very high concentrations. Fission yeast can form a large gamma-Tubulin complex C similar to that found in higher eukaryotes and this complex is important for maintaining normal levels of microtubule nucleation in vivo [1]. 27.00 27.00 28.80 27.90 26.60 26.70 hmmbuild -o /dev/null HMM SEED 653 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -13.14 0.70 -6.41 4 5 2012-10-02 13:20:28 2012-01-31 09:18:48 1 2 3 0 4 28 0 519.00 25 90.96 NEW VhsplEuls-K.phpshps.ANshcs+h--.lDR..VpsustsS.LHhpILHhLLEhSssPocss..KpPs.KclNsslEpEA.hspDsp.hpp.spGD+WD..ls-WSp......VTcsEEoEp.ss-NEchs-susaPcIPs..+hcNapp.chhhlFccpuQplhD+h.EKc..........lhRIuEQshhtE.oIhhLhG..............plLchlP....h.csLph.Ippo.LhchpsaG+sh-lhN.QVAsp+pCphahsDss...........DpEKpEsFls.hhlhQphTPphpuhlK....cLL.L.TpVRKht..Ihp...LptsspH.pKF......FR.AalchuppVhDhLpohhFIahpRhpN...........SLhhuthhpphYtEuh.Is.lhhpllsls+.TssasIuo.hLsILhpp..s.hpthcSlL.-aS..FLhKcCh..........ASQt.sFhhlV.sas.phsNhpp-pEshIcpslshpss..lsshc.uLsL.a-l.............ppaosRF.sL.pt.-EsacpLspKhFuKhlhtc.salsT.+salhp.ppcFTc.-plu.F.GVats..p-lssppVhtE.EK.....hlLKp+pKpLhsFhhsRstsL.su-hsshHK-.LtslM-......NsYKTplup.ID.pRhV....Dsp.shhlupluDllhc.up.lhscshpa+SsLs-tIhpshs ....Vh-pIEulsDKhphpshps.ANshcs+h--.lDR..VpsustsS.LHhpILHhLLEhSssPo......css.........KpPs.KclNsslEpEA.hs.pD.s.p.hp..p..spGD+..WD...ls-W...Sp...........VTc..s..EEoEp..s...s-...NEchs-su.sa..P.c.IPs..+hcNa.pp.ch.h......hlFc..c..puQplhD+h..EKc..........lhR.....Iu.....EQshhtE.oIhhLhG..............pl..LchlP....h.csLph.Ippo.LhchpsaG+sh-lhN.QVAsp+p.....C.....phahsDss...........DpEKpEs.Fls.hhlhQphTPphpu..hlK....cLL.L.TpVRKht..Ihp...LptsspH.pKF......FR.AalchuppVhDhLpohhFIahpRhpN...........SLhhuthhpphYtEuh.Is.lhhpllsls+.TssasIuo.hLsILhpp..s.hpthcSlL.-aS..FLhKcCh..........ASQt.sFhhlV.sas.phsNhpp-pEshIcpslshpss..lsshc.uLsL.a-l.............ppaosRF.sL.pt.-EsacpLspKhFuKhlhtc.salsT.+salhp.ppcFTc.-plu.F.GVats..p-lssppVhtE.EK.....hlLKp+pKpLhsFhhsRstsL.su-hsshHK-.LtslM-......NsYKTplup.ID.pRhV....Dsp.shhlupluDllhc.up.lhscshpa+SsLs-tIhpshs........................................................................ 0 2 2 4 +14461 PF14610 DUF4448 GPI-anchored_2; Protein of unknown function (DUF4448) Wood V, Coggill P pcc Pfam-B_5686 (release 26.0) Family This is a family of predicted membrane glycoproteins from fungi. However there appears, visually, to be some similarity with the family of GPI-anchored fungal proteins, Pfam:PF10342. 27.00 27.00 27.10 27.30 26.90 26.80 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.44 0.71 -4.81 53 151 2012-10-04 12:14:07 2012-02-01 17:11:59 1 2 119 0 114 146 0 188.40 21 49.05 NEW ssstsusss...hhtCsspph..........ttPFCtPccssclhsspTYa.lTWsspaF............tp..VplplsYssssstpt..........................................................sha...sS-h..lsNspGaaslplcppWL........psp.h.........sslolp..thsstssh..s.h..........pGPpVtltsp.s....ph.s...t...pt..........................hlhlulPsslsh..hhlhhhhhhhh.p+cpRclu ............................t.ssh.tCpspph............tP.FCtPpcsspltsspTYa......lTW.spaF..........ssss..Vpltlsahppstspt........................................................tha...soch....lssst.GhhslplpppWl...........psp...........sslol.p.h.thss.ssh..ssh...........................pGsplhhssp.sh...thts...t..st...................................hlhlulPlslss..hhlhhhhhhhh.p+.cpRch................................ 0 30 66 99 +14462 PF14611 SLS Mitochondrial inner-membrane-bound regulator Wood V, Coggill P pcc Pfam-B_1679 (release 26.00 Family SLS is a fungal domain found bound to the mitochondrial inner-membrane [1]. It reacts physically with fungal Kar2p to promote translocation across the endoplasmic-reticulum membrane. This action appeared to be mediated via the promotion of the Sec63p-mediated activation of Kar2p's ATPase activity. This indicates that the Sls1p protein is a GrpE-like protein in the endoplasmic reticulum. In S.cerevisiae the SLS1 gene (ScSLS1) is not essential but is also involved in ERAD and folding [2,3]. 33.70 33.70 33.80 34.20 33.60 33.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.24 0.70 -11.20 0.70 -4.91 34 140 2012-02-02 14:41:00 2012-02-02 14:41:00 1 3 113 0 84 142 0 207.30 21 25.97 NEW sKptLsc+Ilc-sWpLslpscl........Gplplplp......sp.hslLhhspphhlppl.....hpppss+Iclppphs.hlcIouscpssphlpptlpchhsclcppplsls.h..h..t.pt...........hpsthLphlpphtpshhchssssp.hhhhhht................tsppps-pscRhLhhAl.s.pspsppshhss.hs..tst.h.h.sh..........pslsWhsRp.cpWhRWt ...sKttluttIlcchWpLplpcp..lp............Gch.....lplp.................st.htLLhsp....sp.h.Lcsl..........................st.tt+lsspppps.tlpIpu.scushphlhpplsclhpslpopplslp.h.....t.pt...............p.....hs.pthL.splsphosshhcpsss..sst...lp..lpaht..................tsccss-hshRhLhhAh.s.tsp.ss.p.phhsphhs.............pstpuphhshss........................cshsWh-+..+pWhRah...................... 0 13 41 71 +14463 PF14612 Ino80_Iec3 IEC3 subunit of the Ino80 complex, chromatin re-modelling Wood V, Coggill P pcc Pfam-B_3771 (release 26.0) Family This is a family of fungal chromatin re-modelling proteins found in one of the chromatin-central complexes, Ino80. The function was identified in Schizosaccharomyces pombe but there is no orthologue in S. cerevisiae. 27.00 27.00 39.30 36.80 20.50 19.80 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.57 0.70 -4.30 22 99 2012-02-03 15:55:25 2012-02-03 15:55:25 1 3 72 0 83 95 0 161.80 30 59.05 NEW Y+SaKKKYtKh+IpF-.tM+-S-uLh+EEh+hp-huKRlpEQNDQLL-lLLEhNsSh+lPschRac...Luh..Pss..s.hhss..t...s...s.thhpt.Lp.u+sph.sGphp.pthp.lttshhpspsh.sPsh.phssLlp.VPHos.sstppp.....sDh-h.......sp.....shGFLoPEc-sEYhhshDu+lus.s...t.tphsp......pPshu.............p.-R-hslRNPsSVYNWLR+ppPplFLQDsE .....................................................................................+Sa++KatK.+lhF-.tM+csptLh+p-.+h.shs+Rlt.pND...............pLL-hLL-hNpo.pls.phRhslsh..ssp.............................................................ts.h.phtpL.p.hPH.t.............p.p...................t..........s.sahss-c..pYhhthD.ths.................................t...t........................................ppph.l+NPsSVhNWLR+ptPp.hFLpDt-........................... 0 15 41 68 +14464 PF14613 DUF4449 Protein of unknown function (DUF4449) Wood V, Coggill P pcc Pfam-B_1378 (release 26.0) Family This is a fungal DUF of unknown function. 27.00 27.00 27.10 27.10 26.80 26.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.83 0.71 -4.32 28 110 2012-02-03 16:32:29 2012-02-03 16:32:29 1 2 90 0 89 117 0 152.00 33 17.97 NEW sVclcsLcIKlKKSpHKlLF.slFKPlLhpl..lRPulpKslEppIR-shp+sDuhhacl+pEAcRut-tucp..DPpp.ssNIYs+YhsAhpcchtpt+.cKAptt...ss....DpKlphAhTpc-SlFscIpLPGG..ISoKATEY+-........LAc+G-+.WESPlFoIGsAucSsslPp .....VclcsLclKl+cSpHKlLF.slFKPlhhpl..lRsslpKslEctI+sshpphD.t.hhapl+pEAc+A...tctucp..sPpp....t.shascYhsAhppphhpt+.pcupth...ht....................-pclphshopccSlF.plpLPGG.....lSsKATEYc-........LAt+.....G-+...WcSPlFuIGpAtpSpslP......................................................................... 0 35 55 76 +14465 PF14614 DUF4450 Domain of unknown function (DUF4450) Coggill P pcc JCSG:Target_393004-GS13576A Family This is a family of bacterial proteins of unknown function. 27.00 27.00 30.00 27.50 26.80 25.90 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.43 0.70 -5.19 8 86 2012-02-06 10:27:07 2012-02-06 10:27:07 1 3 47 4 14 68 0 154.60 27 20.87 NEW MPshuGohRhGlssGscSpWlc-.hpKhcupa.hssp.hhhhps.......plplslhuLuDocGFIlElcucclP.-slsLhWuFGGssut....hscDsDIsu-st.......t.ChcNlaolcpssFTlhY.G.cuh......pL+s.......lsGlhPsto-IRLuDuctpsoPLpLapStKKossPVluu+hslsspp.........shYFshYp.psu+A-Ysha.hLspLFpKpcp ......................uGshphtl.pus.p.o.hhLcs.tpphcupahst+h.Y.l.c..h.ttGplpltshAhsDscGhIhchpsp....shs.c.ushLhWtaGsspst..phspsuDhss..p.......................................................................................................................................................................p....................................................... 0 6 14 14 +14466 PF14615 Rsa3 Ribosome-assembly protein 3 Wood V, Coggill P pcc Pfam-B_11864 (release 26.0) Family This is a family of 60S ribosome-assembly proteins, from fungi. 27.00 27.00 27.90 28.30 25.70 26.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -8.10 0.72 -4.67 29 100 2012-02-06 14:52:56 2012-02-06 14:52:56 1 3 96 0 77 91 0 47.10 37 24.20 NEW phpshYLphhsppFu-DL-pLR.pusDF.sspoLshLsps.LcpGsshFs ..phpshYLphhsppFu-DL-clR.pssDF.ss...coLslLscu.LppGsshFs.. 0 28 52 71 +14467 PF14616 DUF4451 Domain of unknown function (DUF4451) Wood V, Coggill P pcc Pfam-B_5126 (release 26.0) Family This is family of fungal proteins up-regulated during meiosis. 27.00 27.00 27.60 28.90 26.80 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.22 0.71 -4.37 35 127 2012-02-06 15:39:06 2012-02-06 15:39:06 1 3 102 0 103 133 0 115.60 36 20.33 NEW spsht.sDhYpPcalRspssp+.........pGhCshCt.........phhahshKsSuYh.aHhshpHGIsu.sGhhhssPpthtphp..p..............................cthp...............thpuhCt..pCpc....hlslps........t+...p.hhsaa+Htpc.pH ..........p..phpsDhYoP+alRspusc+...................EGhCshCc.........sscWLsLKNSuah.YchuasHGIou.sGps...att.Ppph+chp..........................................tpsp...................hh-GlCs..sCpc....Wlslss........sp...pthhsaaRHshcsH.................. 0 37 66 92 +14468 PF14617 CMS1 U3-containing 90S pre-ribosomal complex subunit Wood V, Coggill P pcc Pfam-B_3046 (release 26.0) Family This is a family of fungal and plant CMS1-like proteins. The family has similarity to the DEAD-box helicases. 35.00 35.00 36.50 35.30 34.90 34.50 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.56 0.70 -5.31 5 248 2012-10-05 12:31:09 2012-02-06 17:00:37 1 7 211 0 174 236 2 221.00 27 77.70 NEW TToscRKRpup....pcpuGKKcK.pt.........+RpK+ccRKcu-Lhpht.DVcuG....LNpAhA+MsPcLLADYlAsplKRFtoDLSSVELED+YIsASAIpDTTSFTKPRTLDNLP-F.LEcFSchssKLspusKoNGSPHTLlLTuAALRAADLuRulRKaQ...TKsscVAKLFAKHIKLcEpIoaLKsSRlGIAVGTPsRItDLl.-sESLoVDpLK+IVlDASalDpKsRGILDh+ETpcslhc.lLGpKplp-Rac..-cKlcVlFY ........................................................................................................................................ttpt.......................tp......................tptp.t..........p......t...th..........p...t.................t..p...........psp....h..-hh.tph.h.p..............t......h.......p.h.osl...ELp-h.............ls...........p.ss..h....h.......sssph..p........p.t...h.cs..l..ssa..l.c..t...h....s.......t..................p........l...........pt...p.....p...p...toPh......hlllssuulRus-lh.Rulpsap......stssplhKLFAKHhKlcE.........plph.L...............c.....p.....p.......p....sp....lulGTPsR............lpcLl...c.....p........s..uL..p..l..s..pLchlVlDhsahDpKtpslhDh.-h..p..lhc.hLt........................h.............................................................. 0 52 92 139 +14469 PF14618 DUF4452 Domain of unknown function (DUF4452) Wood V, Coggill P pcc Pfam-B_6056 (release 26.0) Family This fungal family has no known function. However, it is rich in paired, as CXXC, cysteines and histidines, but these do not fall in the conformation that might suggest zinc-binding. 27.00 27.00 49.30 44.50 25.00 23.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.15 0.71 -4.43 14 83 2012-02-06 17:08:27 2012-02-06 17:08:27 1 2 81 0 67 81 2 156.30 44 82.24 NEW sHH....GGRsRRus+hSuupssp+QFR.GV+SM+-L.sEusulsuFRtRFEAuRSFDLEDDhEFCP.sLLTEsDLsSIpS.tuS-RSSLuSsSPpuSPtQp..ps.....ssuhSLsuuussh.sPsh.......pp..sthKlHQPuAsRsRNAIPIVNPsTGhshoSPPsSlsP.t.M........htRRW ....................................pt...ssRsRRss+huspps.p+QFR.....GV+SM+-L..sEusuloAFRsRFEAGRSFDL-DDhEFCP.sLLTEcDL.................pSIpS...tuS-RSSLuSsSP-oSPhQp.hps.....ssuhSLssu..o.ss.h.hsssh......pt....sth+lpQP.oAsRs.RN.A..IPIVNPsTG....hploSP.Ps....ShpP.................................. 0 9 32 54 +14470 PF14619 SnAC Snf2-ATP coupling, chromatin remodelling complex Wood V, Coggill P pcc Pfam-B_4045 (release 26.0) Domain This domain appears to play a crucial role in chromatin remodelling for yeast SWI/SNF. It binds histones. It is required for mobilising nucleosomes and lies within the catalytic subunit of the yeast SWI/SNF. It is found to be universally conserved [1]. 27.00 27.00 29.30 29.30 25.80 25.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.65 0.72 -3.81 109 470 2012-02-07 11:49:24 2012-02-07 11:49:24 1 26 242 0 281 441 0 79.00 40 5.41 NEW pE....Rppp..ct.................................t.hs.RLhp-sELP-hhhp-tsth..............hpt-pptt.......GRGsRc.RK......p.VpYs.DuLT....E...-Q......WL .............................................................................................................h.-Rc+c-t......t.........................scphP.RLMpE-ELPshhhcDcscl..............hppE--Ept......GRGuRp.RK......c.VcYs.DuLT...EcQWL............... 0 80 142 228 +14471 PF14620 YPEB YpeB sporulation Coggill P pcc Pfam-B_309 (release 26.0) Family YPEB is a protein that is necessary for the functioning of SleB during spore-cortex hydrolysis. 27.00 27.00 27.10 27.20 26.90 26.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.90 0.70 -5.44 62 303 2012-10-01 23:09:26 2012-02-07 14:26:29 1 9 272 0 84 258 0 334.50 35 74.40 NEW hll.ssuhWGapppp.......c+sshphthpNpYQRAFa-LstpV-plcspLuKslsssSppptt...hhs-lWRhuutApssluQLPlsths...hscTpcFLuplGDFuYplut+shssp.sLocc-apsLppLappusplpppLpclpppl..hpsplc.Wh-.............lctth......ss..phppss........................sshlsstFpsl-cpl.ppYP.sl.a-GPh...S-ph.pcpsP+tlsG..plopc-ApphAppalshp.p.tphpsspssptsphssYshplts.tpptt...hhh-lo+pGG+llahlss..Rslu.........psp.lshscAhppAppFLpptGa..psMpss.stph.-........NhushsF..s.hppssVhlYPDhlKV+VALDsGcllGa-ApsYLhsHpp.Rsls ...................................................................h.llusuhWGYppap.......E+sslhhts-NpYQRAFa-LshpV-.LcsplupolshsSppphs..ssLs-lWRhoupApssluQLPls.hs...hscTpcFLuplGDFoYphuh+shptc.sLscpEacsLppLappusplpppLpclpphl..hpssL+WhD...........................lchshus...ptp.sc.........................ssllsu.hcsl-+sl.psYs..shta.GPh.......Ssph.pcpsshthpG..+tISc--AtcIAcpFlshp.......t.....s.....p......plp.l.p....p.....us.psuph.shYolplpstsppst.....hhh-lotKGGaslahh..ss......Rpl.p.............cp+.lSls-Ats+uhpFLcc...pta..psMphhpoppaD..............Nlulaoa..V..s.s..pssVhlYP-tIphKlALDsGpIlGFsApcYLhsHpc.RslP............................................................. 0 35 63 71 +14472 PF14621 RFX5_DNA_bdg RFX5 DNA-binding domain Coggill P pcc Pfam-B_20855 (release 26.0) Domain RFX5 and RFXAP reveals molecular details associated with MHCII gene expression. 27.00 27.00 119.70 119.70 19.50 18.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.45 0.70 -4.73 7 47 2012-02-07 16:00:39 2012-02-07 16:00:39 1 2 30 0 16 42 0 211.40 75 35.26 NEW PGPGpAPPGGLTQPpGTEsREVGIG.GD.GPHDKGVKRTAEVPVSEASGQDPPAKAsKQ-hEDTuSDAKRKRGRPRKKSGGSGERNSTP-KSAAAh-SuQSSRLPhEsWuSutEus..uuuGsERPGssGEAEKGsVLsQGQtDGAVSKGGRGPuSRHAKEAEDKIPLVssKVSVIKGSRSQKEALpLVKuEs-susQGsKDLKGHsLQoSLs+E+KDPKAs ....PGPGRAPPGGLTQPRGTE.NREVGIG.GDPGPHDKGVKRTAEVPVSEASGQDPPAKAAKQDlEDT.uSDAKRKRGRPRKKSGGSGERNSTPpKSA..AAh-SAQSuRLPWETWGSGGEuN..SAGGuERPGPhGEAEKGsVLAQGQpDGsVSKG.GRGPSSpHsKEAEDKIPLVsSKVSVIKGSRSQKEAh..LsKGEs-TAsQGNKDLKtHVLQuSLopE+KDPKAT. 0 1 1 2 +14473 PF14622 Ribonucleas_3_3 Ribonuclease-III-like Wood V, Coggill P pcc Pfam-B_6419 (release 26.0) Family Members of this family are involved in rDNA transcription and rRNA processing. They probably also cleave a stem-loop structure at the 3' end of U2 snRNA to ensure formation of the correct U2 3' end; they are involved in polyadenylation-independent transcription termination. Some members may be mitochondrial ribosomal protein subunit L15, others may be 60S ribosomal protein L3. 35.00 35.00 35.10 35.00 34.90 34.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.63 0.71 -4.18 70 4780 2012-10-03 08:45:47 2012-02-07 16:33:03 1 23 4550 5 1167 4144 2037 128.80 40 47.56 NEW cppLLppALTHpSa....s....p.....t....p..p....p....p....NERLEFLGDAV...Lp..L....s.....s.oc..........a.l.a.cp............h.s.h..s..E.G.t.........h.s+hR.A.s...lVs.cp........oL.AphA.+clp..........LucalhL.....t+GEtt.psupsc..cs..ILuDshEAllGAIYLDtGhpsApcFl...tch...llssl ..............................................pLLppALTH.p.S..h...........s...............................t....t....t.................p.......p.........NER....LEFLGDuV...Ls.l.....l............l.u.c........................h.L...a.cc............................s.t...s...E...G.c.................................L.o+hR..A..s....l.Vp.pp..................oL...Ap..lA.+.chs...................................................Luch.lhL........................GpG...E.tp.....oG.G.p....c.+......sS............I.Lu..DshEAllGA.laL....D...p...G......h..p..s..scphlhphh....h..................................................................................... 0 402 757 987 +14474 PF14623 Vint Hint-domain Coggill P pcc Burglin T Domain This short domain is a conserved region of intein-containing proteins from lower eukaryotes 27.00 27.00 27.40 27.90 26.50 25.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.34 0.71 -4.75 14 51 2012-10-03 10:25:13 2012-02-08 11:39:37 1 9 47 0 42 56 43 164.60 36 22.29 NEW sCFsGso.VpLuuscs.............l.........plcplRtG.pVhsPp....Gs..ccVtsVLpTsVpp..tplCcl...........G.sLhITPWHPl+h..tuc..WtFPsslu.....pscs......lYSlLL-ss.....HAlhV..sGhhsVTLGHGlpt.......pDltuHtaFGs.ppVs+sLttLsthtpG..hlhstuh..pRsst.TGhVpG ..................sCFsGps.Vplussps......................lplcpLRtGhpVhTPp....Gs..R+VtsVLpTsVpp...tslCpl...........................G..sLhlTPWHPlph..ssc...WhFPsshupt.s.h...sss........lYSVLLcssss...........spAHAlhV.....tsh.hsVTLGHGlps.........spDlRAHtFFGsYptVhcsL.tpLst.h...sG..llhstGh...Rs..oGhh................................................................ 0 16 26 36 +14475 PF14624 Vwaint VWA / Hh protein intein-like Buerglin T, Coggill P pcc Buerglin T Domain VWA-Hint proteins carry this conserved domain of around 300 residues, now named the Vwaint domain. Such proteins do not seem to have a signal peptide for secretion. Generally, this domain lies between the N-terminal VWA domain and the more C-terminal 'Vint'-type Hint domain. The exact function of this domain is not known. 23.50 23.50 23.60 23.50 22.20 23.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.82 0.72 -3.87 28 182 2012-02-08 15:26:40 2012-02-08 15:26:40 1 17 64 0 112 174 28 78.50 32 12.02 NEW DptspuLhtDLpt...............QlphAhsspphYc+WG+pYLhSlhsAHshQhsssFKDsus..a.G..oshFppsp.............tphsshaDtLssPps ................D.hstuLhtELp............................chppth.....tsp.phYEppGRuYhLSuloSHuhQRs..su..Rs.Ds..........................os.h.hh.....................hphsshhc.l..sp.............................................. 0 19 51 92 +14476 PF14625 Lustrin_cystein Lustrin, cysteine-rich repeated domain Coggill P pcc Jackhmmer:O44341 Domain This repeated domain is found in proteins from lower eukaryotes in lustrin, perlucin, pearl nacre, and other similar protein-types. Each repeat lies between Kunitz-BPTI repeats, in certain species, which are also cysteine-rich. The cysteines may form the disulfide bonds observed for other members of this superfamily. 27.00 15.00 27.00 15.00 26.90 14.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.27 0.72 -3.88 56 1210 2012-02-08 16:11:41 2012-02-08 16:11:41 1 138 35 0 833 1225 0 45.60 27 22.43 NEW ssCshG.pPhl.s.ssp.htC....sssss...CPssaaC.ahGss.ps.olCC.s ...............Cst.G..pshh.......t.ssp.shtC.....sspss......C.P.s.s..ah.C...p.....hu.ss.s...pp...sl....CCs................... 0 301 402 824 +14477 PF14626 RNase_Zc3h12a_2 Zc3h12a-like Ribonuclease NYN domain Coggill P pcc Jackhmmer:O18125 Family This family is found to be a divergent form of the NYN-domain- containing RNAse family. 21.00 21.00 23.80 22.00 20.80 19.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.66 0.71 -4.25 5 17 2012-10-03 20:43:45 2012-02-14 17:06:22 1 2 7 0 15 17 0 116.20 32 19.74 NEW Y+ssLNLPVKALsDIIhhFLIRGHKTsVYLPKYY-Dalo-sGlSKVDDlVAFc................+Ll-LsaI..Kh..Ip.s.ca+WFNEVuchAD+sGAVFVSSsE.YRpRchclcYsKsSERIITPC.FLNA-DRLM ...............L.V+sLh-IllpFllcGHKTslaLPcaYpshhs...hpKVDDl.sFphLhsLchI+F...lpp...csccthhppVhtcA-+ssGlhVSssE.hhtp........................................................ 0 5 6 15 +14478 PF14627 DUF4453 Domain of unknown function (DUF4453) Coggill P pcc Jackhmmer:C9CWS7 Family This short domain is found only on a small subgroup of proteins from Gram-negative Proteobacteria that also carry a YARHG domain, Pfam:PF13308. They carry three conserved tryptophan and three conserved cysteine residues. 26.10 26.10 26.10 98.00 25.30 22.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.46 0.72 -3.92 11 12 2012-02-14 17:36:03 2012-02-14 17:36:03 1 1 11 0 4 12 2 107.00 41 57.09 NEW uC+VDTupppL..DlpDlthR.+pLhcLPlpD-hESAClGWhGsPlsLpAG+uh.soshIGpIpsGDslpauHhs..hGGWoYVola.ss.DWtlhouGWhc..hsths-pCspFAG .uC+VsTppppL..slpDhthR.+pLhsLPlpD-hESAClGWhGsPlsLhAG+uh.soshIGpIpsGDsltauHhs..hGuWoYVosa.ss.DWtlhouGWhc..hsths-pCppFAG 0 0 4 4 +14479 PF14628 DUF4454 Domain of unknown function (DUF4454) Coggill P pcc Jackhmmer:A8RR49 Family This C-terminal domain is found only on a small subgroup of proteins from Gram-positive Clostridiales that also carry a YARHG domain, Pfam:PF13308. 25.00 25.00 97.50 97.40 21.70 20.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.49 0.70 -4.71 5 9 2012-02-14 17:48:16 2012-02-14 17:48:16 1 2 8 0 0 9 0 198.90 40 43.06 NEW caDcs+uctt.KpGY-uLPPAPYLsLLsc+sEhGVpLYSDluHAsD+GlYYsAcGTISVPloITsEQYculLs-GuElclVlNELTGEo+hL++ss.so-YG-s...hLlY-tGsEP.pssGE-TGtYh.loY-PsSGsYoLWssSsDTVFKTVYcGsVYVLKGA.........sEEaYsYFshP-+upsE..o..u.RVMpFs-.sshGssGY.sGNpLsaDuKGYlKAIYaLGD ...phDcsphctt.phth-sL..APYLphLscasEhtlpLhuDhspAhDhGhYYsspGoISVPholTtEQhps.httGuplclshsELTGEpthLphs..sschsth...hhha..Gp-s.ts.G.-sG....loh-.pSGpYpLWpsSsDTlhKTVYcGslYlLKGA.........spphYshhshsscupsE..s..u.hs.phst.ts.G.psh.hGNpLhasu+GYhpAlYaLGD 0 0 0 0 +14480 PF14629 ORC4_C Origin recognition complex (ORC) subunit 4 C-terminus Eberhardt R re3 Jackhmmer:O43929 Family This entry represents the C-terminus of origin recognition complex subunit 4 [1,2]. 25.30 25.30 26.10 27.40 24.50 25.20 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.26 0.71 -4.93 87 326 2012-02-17 08:55:20 2012-02-17 08:55:20 1 17 258 0 210 324 1 202.70 22 39.47 NEW hclscphLt.lssp...............................................tpphhptWNptlpp....Lhpsp.phpphLpphatts.+sh.....pphhs.h...hh..sl..splsssps............hlsssphh..........thsph.hsc..................phph.lpuLSsL-LsLLlAhs+Ls.thh-..............shNFsh.sas.EYpphhpptphp............................................psas+slsh.........puaE+LlphsLlhs............................t.ssststt-h.phhplplshppl.pslppt ........................................................h.plhpphLp.Lssc...............................................sptahppWNpplpp....Lhpsp...phpphLpphaphs.psh.....pshhthl................hh.......sl....spls..s.pps...................hlsssshh........pssphhp.D....................schph.lpuLSsLELsLlIAhp+Ls..cha-.............tshNFph.sYs.Eapchhpptptp............................................phat+slsh.........cAaE+LhphtLlhs............................ttsssphth-h.phh+l.ls.pplhpslt..h.................................................................. 0 68 114 172 +14481 PF14630 ORC5_C Origin recognition complex (ORC) subunit 5 C-terminus Eberhardt R re3 Jackhmmer:O43913 Family This entry represents the C-terminus of origin recognition complex subunit 5 [1]. 25.00 25.00 30.80 27.80 23.80 23.00 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.78 0.70 -4.94 68 327 2012-02-17 09:01:59 2012-02-17 09:01:59 1 12 261 0 243 309 2 262.10 25 53.50 NEW lhFPsYo+cEhlpILtps..........tss.h.....................p.s.plaspFlshlhcshhsss.pclsplppls..cphWspaspPltsG.ph.....s..................tp-hs+Lapphpshhpp...thpslh..spphsss.....p.t............................t.t...............ttht........s.ss..s.p.....hcLPhhuKaLLlAAYLASasPs+hDtphFu.Ktp..u.....+p++.........p.pp.t...........................................................p......pp....tc...h...........................ssp..h...L..uPpsFsLERLLAIapuIh.....................s.p.t....ss..................................sss-......lhspluTLssL+LLsps..u.u.....s.................ssLD.u..sKa+sN..........Vuh..........-hltplA+uluh-lscYLh ...............................................................................................lhFP.Yshtph.pIL.ts................s.............................................p.s.phaspalshlhsshhtss.ps.l.pLppls..thhaspa...sp....Plhpsph.....t......................................p-h...p.+Lhpp.hps.hhpp...thp....plh...p...t.h.sstp.t.................................................t........t..........................tphp.......shts.ttp.............hpLPhhuKalLlAAYLASaNss+hDtphFs.+tps......................+p++p..ttt......................................................t..tp...tp..h....................................spp..h...l...sPcsFsL-RLLAIa.uIh...................sp...ss........................................................sss...lhtpluoLhpLpLlshsus.....t................................................s.h-.s.s+a+ss..........lsh..........-hlttlu+..s..lshpltpal.......................................................................................... 0 72 122 203 +14482 PF14631 FancD2 Fanconi anaemia protein FancD2 nuclease Coggill P pcc Jackhmmer:Q9BXW9 Family The Fanconi anaemia protein FancD2 is a nuclease necessary for the repair of DNA interstrand-crosslinks. 27.00 27.00 27.30 27.10 26.80 26.60 hmmbuild -o /dev/null HMM SEED 1426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.09 0.69 -14.36 0.69 -7.47 4 248 2012-02-21 15:25:38 2012-02-21 15:25:38 1 8 108 1 170 249 6 667.60 19 82.83 NEW MlSKR+hScs-scEs.TEDuSKTchpshStcoKKS+luccsp......EN-SVFVcLLKtSGlhLKsGEsQNplAVDQlhFQKKLhQsLRKHPuYPplIpEFlSGLESYIEDp-pFRNCLLsCp.hpsEpu.ohusSYscSLIKLLLGI-ILQPAlIphLFEKlPEFhFEstspDGlNhPRLIVNQLKWLDRlVDGKDLosKlMQhlSVAPVslQHDIITSLPEILtDSQHu-VuKELusLLhQNTpLTVPILDsLSSLcLDsshLuKVRQhVMspLSSV+LEDLPVllKFlLHSVoAsDulEVIu-LRcpL-LppCVLPupLQASQsKLKSKuhA.SSSssQpoSuQsClhLlFDVIKSAIRapKTISEAWlKAIENhsSsu-HKVlDLlhLhIIaSTNo.po+KtsE+VLRsKIRpGCIQEQLLQssFpsHhhVlKDhhPSILuLAQoLLHS.DpsllsFGShhYK.AFphFDoYCQQEVVGALVTHlCSGsEuEVDsALDVLhELVlLpPStMhh.AsFVKGILDYh-NloPQQIRKLFalLSTLAFSQ.pppuuHIQDDMHlVIRKQLSSTV.KYKhIGIIGAVTMsG.MAtcRscssu..pcpusLSpEpssQVooLLpLV+SCoEpSPpAuALYYDEhANLIQcpK..LDPpsL-WlG+olhpDFQDsFVVDhssss-GsF.FPVKuLYsL-E.-TQsGIAINLLPLhhppp.uKsusphoutpSppRhVSPLCLuPaFRLLRLCstcQHsGsLEEIDuLLsCPLaLTDLEssEKL-ShStpERpFhCSLlFhTlNWFREVVNAFCQQssPEMKGKVLTRLpsIsELQslLpKhLAsTPsYVPP.AsFDsEohDhhP.SsoslsAKttpKtKsG.+KpKuDuSKsSSuDphptEcsS-s-.ssochucl.sKctstKEs.KohlpLpsY+sFFRELDlEVFSlLHCGLlTKhILDTEMHTEAoEVVQLGPAELLFLLEDhspKLEphLTss.A+RlPFLKsKGs+slGFSHLpQRSspEIspCVspLLsPhCNHLENhHNaFQsLhsENpGVVDtsslslQE.plMuSCYQpLLQlhHsLFAWSGFSp.EppsLL+SALpVLusRLKpsE.ps.PLEELlSpSFpYLQNF+pSlPSFQCALhLhpLLMsl.EK.usssspp+EKlASLAKQFLCpsWs..uG-KEKuspaN-pLHsLLsIYLEHTDslLKAIEEIuGVGVPELlNusKDAsSSTaPTLoRpTFlVFFRVMMAELEKoVKpI.sGpsuDSQplppEKLLhWNhAVRDFpILINLlKVFDS+PVLpVCLKYGRLFVEAFLK.sMPLLDaSF+KHREDV.SLLcThQLsTR.LHHhCGHSKI+QDTtLTpHVPLLKKSLE.hVhRVKAMLsLNpCpEAFWLGsLKNRDLQGEEIlSQs..S...pEusAE.-SE-shpSpAucsc ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................................................................................................................................... 0 80 98 142 +14483 PF14632 SPT6_acidic Acidic N-terminal SPT6 Wood V, Coggill P pcc pfam-B_9510 (release 26.0) Family The N-terminus of SPT6 is highly acidic. The full SPT6 protein is a transcription regulator, but the exact function of this acidic region is not certain. 23.30 23.30 23.40 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.86 0.72 -3.82 58 252 2012-02-21 15:38:02 2012-02-21 15:38:02 1 29 221 0 193 259 0 90.40 37 6.20 NEW pDSSEE-.......----DEEEt+c......l+EGFIVD....-DE.......----p..pptp......c++++K++++cc+ctE--...tLDEDDL-LltENsGhttpp.........sKaKRLKRupc-- .................DuSEE-.......-p-DDE-ct.cp......................hcGFIsDp...-DE........E---t....pctt...........cpccc++++.++c.......+..........p.p.t--..........pLD-DDh-LItENhGsphcct........pKaKRLK+hpc-....................... 0 57 100 160 +14484 PF14633 SH2_2 SH2 domain Wood V, Coggill P pcc pfam-B_9510 (release 26.0) Domain \N 27.00 27.00 31.50 27.10 23.90 24.90 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.36 0.70 -5.28 4 312 2012-10-01 22:44:06 2012-02-21 16:35:46 1 42 265 11 230 336 4 211.70 35 14.81 NEW ..+soYaDh-AEtcDscpEcc.pptpQRsphl+RVIsHP.F+slNh+QAEchhcoM-pGDlVIRPSSKG-sHLsVTWKVu-GlYQHlDVpE.tKENsFoLGpsLhls......opcapDLDEIlscYlQshAphhc-hhsHchF+c...Gs+KchEchL.chp+tpPsh.sYahshs+-hPGhFlLsapspups+lph..VplTPsGFhhpuplaPoVsuLhphFKshY ......................................................h......aD.ptE..t.t.D.p..c...p...cc.t.p.c.p.p.t.R......h.h.p.RVItHP.F+.shshppAEchLts...t...s..t..G-slIRPSSKGt.sHLslTWKVs....Du......laQ..H..lD..VtE..s..K.....-.....N....t.....a....olG+pLhl.s.......pppapDL.DEllscaVpsMuphlc-lhs.H.c+.a.pc...........Gs.+p..c....h..-..c..............hLpp.ppts.Psp.sYhh..sh..sp..c..hPG..h..FhLs.....ahs......ssp......s.+h..ph...VplhP..cGac..h....p.tp.........asslptLhp..hFKph...................................................................... 0 79 131 193 +14485 PF14634 zf-RING_5 zinc-RING finger domain Coggill P pcc Jackhmmer:Q495C1 Domain \N 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.55 0.72 -4.23 118 801 2012-10-03 15:03:13 2012-02-21 16:43:03 1 78 214 0 571 6385 343 47.50 29 12.65 NEW pCs..hChpth...............ppt....hhlssC.uHl.hCp..pChpptt.................ttpCPh..Cpp ....................................Cs..lChpph........................t.ppt............shlhs.C..uHs.hCp..pClpphh...........................ttphpCPh..Cc.................. 0 166 222 455 +14486 PF14635 HHH_7 Helix-hairpin-helix motif\ \ Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.25 0.72 -3.91 4 299 2012-10-03 02:11:09 2012-02-21 17:03:10 1 47 257 2 231 2357 68 104.10 41 6.91 NEW -DlhsLphHPhQchlspEpLhpAL.stFlshVN.VGVDVN+AlspsYptullpYIsGLGPRKusalLKhLppsNsRL-sRoQLlThshMu.KVFhNCAGFlhIs .................................c-IlsLph.H.P.hQp.h.ls...p-cLhptLcpthV..shVNt........VGVDlN..cAls.......c.sap.sslLpaVsGLGPRKAstLl.......K.......hl.p..p.s...s..stlp.......sRppL....ls.........h......s......hhGs..+VFhNCAuFLpIp................................................... 0 87 136 196 +14487 PF14636 FNIP_N Folliculin-interacting protein N-terminus Eberhardt R re3 Jackhmmer:Q9P278 Family This is the N-terminus of folliculin-interacting proteins [1,2]. 25.00 25.00 25.70 29.30 24.80 23.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.96 0.71 -3.68 40 205 2012-02-22 08:14:46 2012-02-22 08:14:46 1 9 140 0 136 194 0 126.80 33 11.18 NEW plRlllhQ-sptcspp..lLFDSpsspphspsssss.st..s.......................tsphspttt...hpssssp..............shttp.p....hpspsppsthpa.t+....ssDlshL.u-hlFGSss.MuY+G.oohKlHhls...us.sphhhopsh .........................................plRlIlhQDsppcscp..lLFDSpshp+.spphs.spphs.sss....................................................sphsp........ssssp...............................puhpsosss......pscpptsc.aphsRs...usDsshL.uEMhFGSlA.MSYKG.SohKlHhlp...oP.splhhopl....................................................................... 0 27 49 88 +14488 PF14637 FNIP_M Folliculin-interacting protein middle domain Eberhardt R re3 Jackhmmer:Q9P278 Family This is the middle domain of folliculin-interacting proteins [1,2]. 25.00 25.00 27.10 26.50 23.70 21.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.54 0.70 -4.78 17 166 2012-02-22 08:15:25 2012-02-22 08:15:25 1 9 87 0 101 150 0 226.90 48 22.26 NEW ptsstssppsssht++t+lulullhpl...........s-sptpph......cpFhhpHhsllEochs+L+stlcp..............ushptptalphlhpAhpch.pphlssLaou........PRlppPVWLshho................sppppplspcFhppLspLlpphsp+popaFlSsLlTAVLTaHLuWVsTVtsssts..........p.hh.hp.ppplsplspsaPY.NsLWAQLuDLYGulG..............sPs+Lu+TlVsGspp...llp+lL.lLSYFIRCSElpc ...................................................................................sh.st-osusssuhlR+KKIAIulIFSL....................scp--tpp.........pFp-FFFSHFP....LFESHMN+LKSAIEpAMh.......................p+hutsu.php..hhh.sRlh-ALsEF.RsTIpNLYoh.........PRIsEPVWLTMMSu.........................................o.EKspLCpRFhKE.FshLhEphsKNQ...FlsALlTAVLT.HLAWVPTVMPsspP........PI+hF.EK+oSQSVshLAKTHPY.NPLW.AQL.G..DLY.G..AIG..............SPVRLuRTVV.VG+cp-..hVQRlLYhLTYFIRCSELQc........................ 0 30 38 64 +14489 PF14638 FNIP_C Folliculin-interacting protein C-terminus Eberhardt R re3 Jackhmmer:Q9P278 Family This is the C-terminus of folliculin-interacting proteins [1,2]. This region is responsible for binding to folliculin [1]. 25.00 25.00 29.90 28.70 20.10 21.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.25 0.71 -4.95 16 160 2012-02-22 08:16:55 2012-02-22 08:16:55 1 7 88 0 99 144 0 183.60 49 17.82 NEW clPhPpsp.hp.s.s...............shuRSLhuGhs-sYsPDFVLpG..hsssc.hcpp.....................LtsD........LthusppSsl--s..luEuVsIlADsDsWsVplhSSpppshs.......uh.VusSplVs...........uMLpShhsLachshssp...FClhaLED+LpElahKScsLuEhL..............pssst.lshcclspsLGl-tuDlPLLluVAusHoP.Vs ........................................................lPhPt..Sp.lpsptst.s.t............NFG.RSLLuGYCsoYsPDhVLpG.husD-+.h+Qs...........................LhuD.....LsHuVpHPVLDEP..IAEAVCIIADhDKWoVQVASSQR+ss-..K.............LGp-V...L...VSSlVS...........sLLcShLQLYK.h.sLsss...FClMHLEDRLQElYhKSKMLoEYL............................................+GphR.VHVKELusVLGIESsDLPLLsAlASTHSPaVA........................ 0 26 34 62 +14490 PF14639 YqgF Holliday-junction resolvase-like of SPT6 Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain The YqgF domain of SPT6 proteins is homologous to the E.coli RuvC [1] but its putative catalytic site lacks the carboxylate side chains critical for coordinating magnesium ions that mediate phosphodiester bond-cleavage [2] 35.00 35.00 35.00 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.64 0.71 -4.65 4 495 2012-02-22 14:01:54 2012-02-22 14:01:54 1 42 441 2 270 514 14 143.20 28 12.53 NEW pGKh.+VLuluhupGR..scslhCshVNscGEssD.L+L........shc.c-+ppptp.hEsLcpFl.spKPcVluVuG.NhcAphlhcclpcslpch-..spph.slsl.hV-sElAlLY.NSc+utsEF.shPPll+.sVuLARhlQsPLlEaApls ..............................................h.....................sh.h.h.shlctsGcll-phpl.....................................h.s...t.psp.p.p...c............p...pshpp...L....t....ph....l.p...p....+.csclIul..s...G.t.u.t...cop.c..l....hcclt....c.......h...l......p.........c...........h..............p....................t..............t.....................p...................lt.......l..hlVs-psAplYpsSchAtpEFP..c.h.s.s.hlRtAVSlARpLQDPLsEhs...t......................................... 0 95 154 231 +14491 PF14640 TMEM223 Transmembrane protein 223 Coggill P pcc Jackhmmer:A0PJW6 Family \N 22.00 22.00 22.30 22.20 21.90 21.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.93 0.71 -4.32 14 99 2012-02-22 14:08:19 2012-02-22 14:08:19 1 4 75 0 62 100 0 152.60 31 73.24 NEW +DVlLFca-ps+FF+hLslFuhsQhhFWsYLuaFu..ho.....cs.ss.p.c.ppchshhptl...............s.LupspaR.Glshhs....lhlGshlLhsuhhFoLRSVptllLp+GGppVolhTYuPFG..ppRthsVPLcplSshtsRpps..pu.lPlKVKG+tFaalLD.pcGcFpNspLFDhTVGlpRp .......................................................................pDlhLaca..cps.+FathlshFshsQhhFWs.huhhu.hs.........ps.s..........tc...ht.h.................s.htsshaR.Glshhs....hhhGhhlL.hssh..h..FshRSVp.lhLppGGp.....p........VslsT..auPFG...h.tpph.s..........V.......PLppVSsh.spRtps...s.lPlKlK...G+p....haalLD..+tGcF.NspLFD.TsGhhR............................ 0 23 30 44 +14492 PF14641 HTH_44 HtH; Helix-turn-helix DNA-binding domain of SPT6 Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain This helix-turn-helix represents the first of two DNA-binding domains on the SPT6 proteins. 35.00 35.00 35.20 35.30 34.40 34.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.59 0.71 -4.12 4 227 2012-02-22 16:49:36 2012-02-22 16:49:36 1 29 207 2 176 238 0 131.10 33 8.87 NEW LpshsshsuEDDELEEpstWIhcphhss.shshptsh........hosFp.......ptI+pAlpFhpppphEVPFIhhYR+-Yl..........pP.Ls.NDLWclaphDpKappL+s+KpslpRLapchph..........DcPLs .............................................h.hp.-..-..ELc-EApWIhphhhsc.p.s...hs.h..ppsh....................ppsFp.....................ptItcsLcFhp..spphEVPFIhhaRK-Yl...........................................................................c..............Ls.hsDLW+laphD.KappLhp++psLp+ha-p.hpt...............h........................ 0 58 92 143 +14493 PF14642 FAM47 FAM47 family Coggill P pcc Jackhmmer:Q5HY64 Family The function of this Chordate family of proteins is not known. 27.00 27.00 27.70 27.40 26.80 26.80 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.90 0.70 -5.09 2 239 2012-02-22 17:41:02 2012-02-22 17:41:02 1 10 33 0 65 246 0 127.20 33 63.46 NEW MGDpRPQDRPpS.GMDSpPWYCDKPPSKYFAKRKHRRLRFPPVDTQNWVFVTEGMDDFRYuCQSPEDTLVCRRDEFLLPKISLRGPQAD.KSRKKKLLKKAALFScLSPsQPARKAFVEEVEAQLMTKHPLAMYPNLGcDMPPDLLLQVLK.LDPERKLEDAhu.CEupEKTT-.PTEsGKYPCGE.sPRPPET.VSpL.Pp.PKTPVSShRPEPPcTtVSpLRPpPPKTpVSSLH.EPPETtsSHLRs-PPcTtVSp ........................................................................................................................................................................................................................................................................................................................t....hsp..........t...sps...hS.p..Lp.E.P..Pc...T.t.s.SpLp..EPsco......s.Splp.-P..c...t.h..s........................................................................................... 0 28 30 38 +14494 PF14643 DUF4455 Domain of unknown function (DUF4455) Eberhardt R re3 Jackhmmer:Q9P1Z9 Family This domain family is found in bacteria and eukaryotes, and is approximately 480 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important. 28.40 28.40 29.00 28.80 28.30 28.30 hmmbuild -o /dev/null HMM SEED 474 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.31 0.70 -5.86 16 121 2012-02-23 13:51:07 2012-02-23 13:51:07 1 10 67 0 82 82 0 353.80 27 33.39 NEW ht-s++c+appslsuhpc-hsplup-hEstltcsutthhpplucsDpplsplhspl-s-ssLhchohppLhpla-pVsp.ch.h+pphIcpL-psLpphEppRt-pLcssLc+hhptLpchualhps-VpRhlscEAMtlNpslLtNRRAhAcLhhpLhpsclcpEtspctcWppthccW+sL+ppphlppFp-hhtStchtsPsphpphhEshtcpQtslpppRhchlpslsslhP................Pshopsp....lpcWhsslpslscph....sphptpthtcl+tph-csppcshsplpph+ppLhphts..hsp-ctpsLVstchh.hssphpppscchLEthcphhEphutphcppspsLhpahpclspha-tHppthtpp-t-lppclcptRppasppppppEspLDthlcplpptusEpsLctplcpshshLcplcppYcsF+pphsplsppYPstlhpphpsYppsLsphhtlcchhcpshtsph .................................................................pppta.thh.th.ppht.ls.php..hhp.tt.h.t.l.p.stthp.hh.thttp..h.thphpt...l.plhppltt.....ppp.IcphcttLtphE.pRhppl...pthLpchsthlpchuahh.s-l.+llppcuh.hNtslLuNc+uhupLhhpLhpsplpp-h.ptphc.WpthhpsW+tlp+pthlppFpphhts.tphppP.th...............p........hpthhppQ..lt.p.pRhph.Lpplp.shhP......................................Pshspsp....lpcWhpplpslppph....sthphphh.pl+h.h-phhppsht.hpph+......ppLhphth....h.spcchpphlp..h..h........hhtthttphct.lchhtp.h-t..ttp.p....tplhpahtthsthh-.pt...h...p.ph.pphtphptt.p...t..ctthp..htphp.tsp.tttLt..ht.s.t.Lt.hp..h.............t.................................h................................................................................................. 0 37 50 60 +14495 PF14644 DUF4456 Domain of unknown function (DUF4456) Eberhardt R re3 Jackhmmer:Q9P1Z9 Family This domain family is found in bacteria and eukaryotes, and is approximately 210 amino acids in length. There is a single completely conserved residue E that may be functionally important. 25.00 25.00 25.10 26.80 23.20 23.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -10.93 0.70 -5.10 19 84 2012-02-23 13:52:56 2012-02-23 13:52:56 1 8 59 0 55 72 0 181.90 26 14.14 NEW hhshs-.aYcpct.p+tlsRPppl.psh-phs-slppplpph.pppupcappsslpchRsQlpphpphhpplsthlhpshhppahpphppuhpslppc.Fpph.pphppp+pp+tppLRPsLucPsphpELcsLpppEpcRppch.phlpphpptlh-sttp.uptFlppLsphspphlhhhD.phlhh-Dltssshssspp+pthhhpp+h .....................hths-taacpc..pc.hptPpth.psh-pss-tltptl.ph.pppsppYtspslhchRtQhpphpchl............spl...........s.llhpshhppphpphppshpplptp.Fpp..pphppt+ppptppL+ssLu+PtphpphcsLpppEppRppch.phlpt.pptl.cphpp.uphFlspLsshscphhh.LD.plloh-Dltssphts.ppphphhhppc....................................... 0 30 38 46 +14496 PF14645 Chibby Chibby family Eberhardt R re3 Jackhmmer:A6NI87 Family This family includes the eukaryotic chibby proteins. These proteins inhibit the wingless/Wnt pathway by binding to beta-catenin and inhibiting beta-catenin-mediated transcriptional activation. Chibby is Japanese for small, and is named after the RNAi phenotype seen in Drosophila [1]. 23.00 23.00 23.70 23.30 22.00 21.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.34 0.71 -4.20 14 172 2012-02-24 08:43:10 2012-02-24 08:43:10 1 5 85 0 109 173 0 115.10 33 58.82 NEW MPLFup+...FSPK+sPhR+suShSsh.pslDppTcphEhuhsaGssphcLu.spplhF..csGpWh.........s.us.sssssspcst+L+KcsppLpEENNhLKLKh-lLLDMLoETTA-sHLhEKEl- ..................................................shht.p....Fus+phP.R+.tS.ssh..psh-+soc.p.s-LtL-.Y.ssPphpLu.spphhF..psGpWl..............................tp.u...ptssstc...-spp.L+ccsptLcEENNhL+lch-lLlDMLsppss.c.p................................................................................... 0 25 35 60 +14497 PF14646 MYCBPAP MYCBP-associated protein family Eberhardt R re3 Jackhmmer:Q8TBZ2 Family This family of eukaryotic proteins includes the mammalian MYCBP-associated proteins. These proteins may be synaptic processes [1] and may have a role in spermatogenesis [2]. 24.50 24.50 24.70 24.70 24.30 23.90 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.34 0.70 -5.76 34 181 2012-02-24 13:32:21 2012-02-24 13:32:21 1 5 93 0 112 180 2 330.40 21 48.95 NEW ss.phDpRL+pWpchLppR++hQp+lp+chGKpss-lLhNpssTl.....Rsppph.....+cllD...hsp.Psp..ppcs.....hp.hu....thhpp..p.phspp......lstLt.ThP+sEctt..slE.hlGLPpss.ppElhu........p......tptphpspWp+..SphLppRlcctppslccllp..ah..PDl-..sLpV....lGps..h.......hpss..................pspphph.pps.........................ppt...spp-ph..pp...sth....pp..ps.c.....s..p...sh....ps......ulph....supsh......hhh.s....tssp.p.....................tphshthplhFcC.......cPhp.+s.h+tlh.lcNlGspslphpWpphshhtpht...........s..hhhspspcFhF-ppshhlhPGEs+phplhFpPppsulhpppWc..Lp.h.pPslhu................ppt.lhl..pL....pGhChssspatp+hpchpptlhsKpppphsppL..h..pch.....LssllpPspsh......CPY.-RhlsEcElFsstN....PG.a+....s.R.a-DLEsL+pLapplK ............................................................................................................................................................................................................c.hLp.Wpp.ht.ppt.pttlt.phht+.sp..phlhp.spph.....Rphtp.hphh-...h....h...........t..s.....hhp...................h.th...sh.sphp.......l.p.hhths..h..p..Eh.h.........................a.p..S.hL..+hpp..t.htpl..........s..hs..tLpV....hGps.................................t.tt....t...........................t......t..tt............t...................t................................t....sh.h....ts..h..................tt.t......................t.s..h.plhFps.........hp.ph.pplh.l.N.GshslhapWpph......t.....................................................p...hh.s.p.ppFhF.sp.p.thhlhPGchpphthhFps.psG.lhpphWphts..pP.lhs................tt...h.lpL....huhs.h......h..tthp.hpp........h.tptt..hhppl..h..pph.....l.sl..s...s.......s.Ph.pt.hpcp-hFt..N.....t.h......h.p.t.h..l.tha.............................................................. 0 45 53 80 +14498 PF14647 FAM91_N FAM91 N-terminus Eberhardt R re3 Jackhmmer:Q658Y4 Family \N 25.00 25.00 27.10 26.00 22.40 23.40 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.75 0.70 -5.37 14 179 2012-02-24 15:46:41 2012-02-24 15:46:41 1 12 117 0 114 165 0 259.20 44 36.29 NEW ptIRpNhsWppLPssl+ptLGsSp+-Y-KtVlcYS.l+sQLRa+sNlV++lt+cEcpYY-cLlcYSpppLhLYPYHLuDlll+tLRlTPFsYYhsllsclLpsE+SYDuLPNFTAADCLRLLGIGRNQYI-LhspsRSs.......+phFt.++ss+chLPppPl.hth.-PWWhVpsGhlhEsDl+h....Lo.tE+shlDpLIDpGsp.....................h.AGpLchslVpuLYpKGLlYL-VPlss-DhIsVPPL-G.FVMNRV.GDYFEsLLYKIFVolDEpToluELAplLplDlppVKsAlSlaCRLGFA+KKs.sslsp.......spl..HsSWts ...........................h.tlcpphsWppLPtplpp.Ltsopc-Yc+pll.Yu.l+ppL.cacssh....sp+lhpcEpcYYEcLlcYspppL........hLYPYHLu.....Dh.....hs+shRlTPFpYYhslht.....-lhpsE+SYDoLPNFTAADsL.RllGIGRNpYI-lhNpsRSp.......+.h.h....h....h.........+p.hs+-hLPhpP.lchsl.EsWWhlphs.lsp--l+h....ho.tEtshl........Dcllcpss..p....................................................................................................ss.h-hpllpuLYp+GhlYhDVPlps-s.......plt.VPsL.EG.FVMNRs......p..s......D.hEsLLY+lFVohDEp.ssVuE..............LAphL.......plDLp..lpsAlShhCRLGaApKc............................................................. 0 41 62 89 +14499 PF14648 FAM91_C FAM91 C-terminus Eberhardt R re3 Jackhmmer:Q658Y4 Family \N 25.00 25.00 33.10 26.90 23.90 23.50 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.63 0.70 -5.65 9 246 2012-02-24 15:50:15 2012-02-24 15:50:15 1 12 117 0 173 235 0 274.80 26 51.76 NEW s.sKRIAFLFDSTLTAFLMMGNLS..PsLKsHAVTMFEVGKLSDESLDSFLtEL-+V-p.suEGEAQRYF-HAlTLpsTlLFLRpscclh.p.s.Pc..shshulDLLRsESL.uLDssTpSRVLsKNYpLLVSMAPLStElRslSs.ssPsHhGPsIPEVsSsWFKLaLYphhGpGPPSLLLsKGoRL+plPslFpca-+LLlTsW.......................GHDsulVssSNsLlhLNDALoHSAVhlQuaG.hhsp....u-..............olplPFPFpcs-hth...Fohsphph.H.ulppLcpplsLpa.pCGYlThLs......tspph.tst.......st.ttsthpsshs....ht.t.poFshstc...pth........ss.uppp.pptsssps-Wh.L-lsFGlPLFcu-LNpclCc+IsuppLhpc-slpplhpusRcLuhplhpFlppaQshspssc.pst...su....lspss..........ttuslPhPspsLlFcsGcLs.W- ....................................................................t............................................................................................................................................................................hlp.tth......t..ph..htY..hh..........................h.......................h.h..........t.s...............shhh.hG......h..hP..h.............t..ph.....h.......................t.p...hh..t..h..hNp.h..tslhlp.h.......................................................................................................h.s........................................................................................................................................................................................................................................................................................h...th.hGhs...l.t..hs..hh..h...th...................................................................................................................................................................................... 0 57 98 136 +14500 PF14649 Spatacsin_C Spatacsin C-terminus Eberhardt R re3 Jackhmmer:Q96JI7 Family This family includes the C-terminus of spatacsin. 25.00 25.00 25.70 27.80 24.80 21.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.89 0.70 -5.54 28 154 2012-02-27 09:55:58 2012-02-27 09:55:58 1 4 102 0 95 154 0 263.60 33 14.19 NEW ohlGppLl..................ch.spu......................................................s.........pct.p...........hsshVELLIp..AHpCFstsCsh-GIspVLpts+phs.shLspspcasLlVRLloGlucYpEhpYlFDlLlcscQFEhLLp+.p..h..........Dp.......t........sG.......L+hAlLsaL++hpPp-p.-taphlsL+FsMa+ElAphhcpcAc.ptl.chlts..............p.s...............p............ppsphpppL.p.uhcpahcAA-sYhp-sshphAppCsppApLluLQlclh...............................ph...pllsLs................csphcchlspphsF.pALIlAcAYsh..pssWupsLapQhlhtGsh.pYLp-Fhphhslssslhp-ls+pYptc .........................................................lG.tLhch.h.s..........................................................s.......ph.............hps.sELLIh..AHpCFshsCph...-GIh..plLptsphhs.sths...spcauLllRLLoGlucap-hpalF-lLhcpc..FEhLhp+.t............-...........ts....l+hAlLsal++h.pPtDp.-tashl...........sLpFsMh+Elu.p.hEstAp.tl.chl.s........................p.s............................ps.phpp.LhpuhphhhcAAEsasp-sshppAp+stphspLlsLQl+hh.........................................................sh.hllsLs................cpphhchlhthspF.pA.IVAcAYs.h....s.s-Wu...plLapphlh.ssh.pYLpEahp.h.Lpsshhp-ls+hap............................................................................................. 0 35 50 71 +14501 PF14650 FAM75 FAM75 family Eberhardt R re3 Jackhmmer:Q63HN1 Family \N 24.00 24.00 24.80 24.40 22.40 23.60 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.21 0.70 -5.07 23 316 2012-02-27 11:20:10 2012-02-27 11:20:10 1 7 25 0 203 266 0 228.50 24 29.28 NEW Qphsas+shpD.pLppKssQLFWGLPSLHSESLsusshlssssSshp...hlFNchSss.Ph..psphos....................hLopspPLshsphQsQs..hh.shPQ.Qs...PlsphpsQApLpsslPlls....PSs.sQl+sCGVsa.ssQscspsLhPoEIppLEaslLpKQ.EpthsLPolVc+SQcsFs.ssPshspcs.....ts.psphssSIhPssFslosEL+++LEpHLpK+lIQcp..WGLPpRIpcSlplhpPQsEhstsspucuppG.....sSp.ShhpucuspsspK.st...stp........hphcpchs+.shspsltph.KDhsts...psossscl.tsssEpphps...........t..pscstts..hu.s+ccLEssLcsHLs+KhspIsEG.lPsoV+pSh .............................................................tp...phFhGlPshpSESl.s.h.........h......t..................................................................................................................................................................h..t.p....t................t...p...th....+t.p.....hsshhpp.....t................p...................................hshhst...p.h...h...c.h.pp..h-.+lpc.hh....p.p...s.st+lp.S.ph.h.........t....t.................................................................................................................................................................................................................................................................................................. 0 20 20 30 +14502 PF14651 Lipocalin_7 Lipocalin / cytosolic fatty-acid binding protein family Coggill P pcc Jackhmmer:P51161 Domain Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The family also encompasses the enzyme prostaglandin D synthase (EC:5.3.99.2). 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.78 0.71 -4.42 3 209 2012-10-03 08:47:39 2012-02-27 14:48:16 1 2 77 40 108 690 0 119.60 38 94.68 NEW MAFTGKYEhESEKNYDEFMKRLGLPSDVIEKARNFKIITEVQQDGQNFTWSQpYSGGHoMTNKFTIGKEC-IQTMGGKKFKATVQMEGGKVVVNFPNYHQTSEIVGDKLVEVSTIGGVTYERVSKRLA ................MsFsG.p.aph.p..op.ENa-tFhKs..l..G...lP..p...-.h..I..p..+...u..+..shK..slo.Elp.Q.sGpcFsh.ot.....h.s..s.u.+..s..h.s.N.p.FT..lGc...Es..E..hp.T..hs..Gc....KhKss.....V.ph.EG......s....K.......L.....V.....s........s...h.............s......h.......p.......p........s......p......E....l.....s.....G....s.p..lsp...s...Th....u...sh....sh.RhSK+.......................................................... 0 29 39 68 +14503 PF14652 DUF4457 Domain of unknown function (DUF4457) Eberhardt R re3 Jackhmmer:O60303 Family This family of proteins is found in eukaryotes. It is found repeated several times in the vertebrate KIAA0556 proteins. 24.00 24.00 25.60 24.30 23.60 23.60 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.96 0.70 -5.47 14 419 2012-02-28 08:56:53 2012-02-28 08:56:53 1 6 92 0 292 386 5 183.50 19 50.48 NEW plplhpoWGDtaYlGLTGlEl.....l.spptp.lslshpplpApP+DlN-lssapsD.RTLDKLlDGhNlTT-DpH..MWLlP....as..tpcphlsIchsptpsluulRlWNYNK..Sh-.....DoaRGsKhlpl.lDsph.lss...........ssallRKAPGs.spFDFuQsI.h.phppp...........pshp.hsphpt.h...............h.........pp-YpsshhPsGalh+h.LloTWGD.aYlGLNGlElaDtpGppI...plp.psls..AhP.SVslL...shpsDsRTs-+LlsGlNs.TasspHMWLu.........Phhsu......................psNplYlhF-pPlslShIKlWNYSKTPpRGV+EhtlalDDlLlYpGhLcpssp ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s................h..h.h.hpsaGs.....hultthphhs.......................................................................................................................................................................................................................................t....................................................................... 0 118 148 223 +14504 PF14653 IGFL Insulin growth factor-like family Eberhardt R re3 Jackhmmer:Q6UXB1 Family This family includes the insulin growth factor-like proteins. These proteins are potential ligands for the IGFLR1 cell membrane receptor [1]. 25.00 25.00 26.50 31.60 24.00 24.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.01 0.72 -10.83 0.72 -3.97 9 151 2012-02-28 09:38:21 2012-02-28 09:38:21 1 5 23 0 106 129 0 68.80 44 82.70 NEW hs.u.ssschhhCp.hPRCGc+hYNPhcpCCscssllsLs+T+h............CG.sCTaa...PChchCC.tphs.spp+aVVKLKshGhpu.p..sPloppC .................u.hhPchh...hC..hPRC.Gc+FYNPhcapCscsphl..s+T+p............h..................................................h....................... 0 74 74 74 +14505 PF14654 Epiglycanin_C Mucin_C; Mucin, catalytic, TM and cytoplasmic tail region Coggill P pcc Pfam-B_ 91014 (release 26.0) Domain This family represents the non-tandem repeat domain including cleavage site, the transmembrane helix domain, and the cytoplasmic tail of epiglycanin and related mucins [1]. 22.40 22.40 22.50 36.30 21.30 21.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.52 0.72 -4.03 5 31 2012-02-28 11:02:14 2012-02-28 11:02:14 1 10 13 0 14 32 0 100.60 66 14.78 NEW SAoTPVsEsKPSGSLKPWEIFLITLVSVVVAVGLFAGLFFCV.RN.SLSLRNsFsTAVYaPHG.N..LGs..........................GPGGNHGssHusuWSPNWFWRRPsSSlAMEMpGthsRP .........pAoTsVSEsKPuGSLhPWEIFLITLVSVVsAVGLFAGLFFCV.RN.SLSLRNsFsTAVY+PHGhN..LGs..........................GPGGNHGsPHRPpWSPNWFWRRPVSSIAMEMoG+tsG.P. 0 6 6 6 +14506 PF14655 RAB3GAP2_N Rab3 GTPase-activating protein regulatory subunit N-terminus Eberhardt R re3 Jackhmmer:Q9H2M9 Family This family includes the N-terminus of the Rab3 GTPase-activating protein non-catalytic subunit. Rab3 GTPase-activating protein is a GTPase activating protein with specificity for Rab3 subfamily [1]. 25.00 25.00 25.20 25.10 24.60 24.00 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.42 0.70 -5.47 21 180 2012-02-28 14:10:59 2012-02-28 14:10:59 1 8 138 0 125 175 0 330.20 31 32.33 NEW WL.ssssl..ulussu-hlslAtsp+hslLsstW...pps.stsshthshpusl-sts....ppITulphlPhsstt..........-hsslulGhoSGalhFYoppGsLLhpQhhp.pcsllpl+l+ssppht..t.........cELsllYP.sslshIsGhsLhshLpsshppls+sthstpt........sts.ssLsapKWsh..scts...slsDsulsGshhsshF-.h.............h.ppos.phsphlssGscPhluaahspE.Gtspslluslhp.uVuuploohl.........huhh...uts.pppppssppp................ps.sshssRhsLhDttRcGpslsluPs.spLAAlTDsLGRVlLlDstpslllRlWKGYRDApCuFlpshpcpsp............sppscpchu..............................LFLlIaAPRRGlLEl.Wshps.GsRVsshss.uKss+Ll .........................................................................................l..t..h...hs.t.phhshu.ttp.hhh...h...............h.h.h.t.hp.............p.louh.hlsh.t.t.................-.hhslslGhssGhlhhaot.tG..hlhtphh.............p..p.lhtl+hps..h....................ppl.llas.sslshlpG.slht.Lptthppltpst...htt..............s.lsapKath..pp.s....h.Dts....h....G...h...s.a-.h...........s.h.pto....s.thsphlssGtsPhhuhahs.E..s..tsp...sh.lucls.h.uluuph..sshl...............................tuhh..u.ht.tppppps.sppt.t.....................pss.sshs.s+h..sL..DscRcups..lsluP......s.s......p.LAAl.T.DshGRVhLlDstpslslRh...WK...GYRDAphuWlphhpphtc..............................t.t..pchu................................................................................................................................................................................................aLlIYAPRRGllEl.Wshpp.GsRlsuhsl..u+tspLl.............................................. 0 51 69 103 +14507 PF14656 RAB3GAP2_C Rab3 GTPase-activating protein regulatory subunit C-terminus Eberhardt R re3 Jackhmmer:Q9H2M9 Family This family includes the N-terminus of the Rab3 GTPase-activating protein non-catalytic subunit. Rab3 GTPase-activating protein is a GTPase activating protein with specificity for Rab3 subfamily [1]. 25.00 25.00 27.30 26.60 23.70 23.60 hmmbuild -o /dev/null HMM SEED 595 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -13.12 0.70 -6.38 9 135 2012-02-28 14:11:58 2012-02-28 14:11:58 1 6 90 0 89 126 0 456.10 34 43.64 NEW cDLLoLllsaWLpKshchhpps...-shscltphlphLschtush-p..sashpslSPWWQplRchllpScph.uuLLsAlVs+sVAsphh....cstp-tp..hspsshtss....pEpWEplShDtptWsLLhtpLEDlhlLtslLsps....hssppshssph..-.hsc...sSLKslLpuG+.GhVsEhlAKWlhpstLcP.cll.tlssscppp-s.........................s.....p.....htp...t.pt.....cls.h-.............sl.-hLplLpp+FPhSL-ssVLluphuWEYhVpWsKs.pp...hcaLptulcpLctltss..tlpHGICtMlWNshLhh.hpAs.shLhpKVG+hPKD+hCppDlsMSs..stlspFLchsl-hLpph.suslscDchph........caE-uh..sEG......shPlp.LALpQ+psphsLlphHppLsoVLahlspFpl+ssKPLo.LFDuh.GppAFFtDlsp..hhs.sssD.slhp.RppFLp+VVoushch......hcps.pplh....ccltahsphscLAppWplsps.l+++.VsELYuaGhDt.Ac-lLhsls-cEhLup.LL.lAGpRLs..Lhsp..poppsh.h...lAslsspLhsaLcshp................sphcs.........splslssls+Llp+lhcpLsc ..................................................................................................h..hhplhl.hWhph....t.h..........htphtthl.hlspht..s...p......ht.pt.....l....S.WWpph.Rt.hhhpSpp.htu...Ll...sAhls...+tsuhph...............................pph.tc.tp............................p.......ppWEtlS...h-.....p.WtllltpLEDhhlLpslLtp........................hs.......p..hsp...hSl+pllpuG+.G....hls-.luKWlhppshsP.phL..p.h..ppp.-..p.........................................................p..................t.t..t......................phlt.hhhppFPhS..Lp.slLhuphsWEahspWsKc.pp.................hphh.tulphL.pt..l.ss...tlppGl...........sh........hhWssalhthhpus.shLhpKV..G+.PKD.....+hCppDlGhu-..tthspFLt.ChphLphh...hpusht..-chph............hphEchh.....s-..G.......................s.slspLA...l...pQp.....h.hsLlphH..LsslLahhhpF..pl+..sKPLs.LFDuh..uppAhFp-..l..ssh...hh..s..sphD.sh.p.Rp.pFLh+llsu...shph....................tth..................tt..a...thshpLAp.hp.lst-.l+c+.VsELYphGhDt.ucphlhplp-p-hLuspL.L.hl.sGpRLs...Lhpp..psppshth...luplsspLhsaLcshp..................htp.............thsl..htpLltphh.hLs................................... 0 31 37 66 +14508 PF14657 Integrase_AP2 AP2-like DNA-binding integrase domain Bateman A agb Jackhmmer:C9L423 Domain This family includes AP2-like domains found in a variety of phage integrase proteins. Presumably these domains are DNA-binding. 22.10 22.10 22.10 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.27 0.72 -4.18 212 1784 2012-10-03 08:51:45 2012-02-28 14:47:41 1 10 978 0 198 1338 40 45.30 27 12.51 NEW pWhhph...thpshsGK++pppK.pGF+TK+EApphtpc....hhtp.....hp.....psht .............ahhph...thcs.sGKc+ppp+.p....GFc.TK+EAptthtc....h.tp....hp.p...t............. 0 77 133 168 +14509 PF14658 EF-hand_9 efhand_7; EF-hand domain Coggill P pcc Jackhmmer:Q8N6L0 Domain \N 37.00 37.00 37.00 37.00 36.90 36.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.14 0.72 -4.06 8 82 2012-10-02 16:17:27 2012-02-28 15:26:37 1 13 55 0 37 112 0 65.20 37 17.67 NEW -shFcsCDsp+sGcVslS+llsYL+tsTup..sPp-s..cLpsLsp.LDPsGc..csslsLDTFpsVMRcWI .................sFphhDsp+TGhlsltclhshLcusst...sPp-u..cLQsLhsplDPpup...uplshDsFhslMpphh.......... 0 9 10 18 +14510 PF14659 Phage_int_SAM_3 Phage integrase, N-terminal SAM-like domain Bateman A agb Jackhmmer:C9L423 Domain This domain is found in a variety of phage integrase proteins. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.77 0.72 -3.70 221 5045 2012-10-02 14:21:04 2012-02-28 17:25:28 1 28 2378 11 835 4073 332 57.40 19 15.31 NEW oFpchh.cha..hcp.hc..tpl+.tsThtspcphlcp+IlPhF..GphclscIs..stplpc.ahNchh ..........................Thpchh.cpW.....hc..p..hp.....p.......p.......l.....c...........s........T.hpphpp.hlc..pal.h..P..h..h....G.ph..p..l.scIs..stplpp.hhpph.h................... 0 320 580 723 +14511 PF14660 DUF4458 Domain of unknown function (DUF4458) Godzik A adam Jackhammer:Q8ABA0 Domain this domain is found in tandem repeats on the N-terminus of secreted LRR proteins from human associated Bacteroidetes domain boundaries are based on the JCSG solved 3D structure of JCSG target SP16667A (BT_0210) 22.40 22.40 24.10 29.30 20.40 20.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.50 0.72 -4.06 25 61 2012-02-29 08:06:47 2012-02-29 08:06:47 1 11 24 3 2 57 0 115.00 32 26.05 NEW GhVpFKLhKsh.......pt..pts.soRA............s..sc..h.h..hu-hpKlc.lslpp..stTphspTl.....t.Lpsh.s................ps...u-.....................hGhco.....-pL.pLhAGsYplsuYphY......c+pc...pp...lhs.upss.pspsFsVhsssls ...............GhVpFKLhKsh........t......soRA............h..sch.h..hS-hpplc.lslpp..shTphspTl....t.lcshhs................Es..s-.....................auhpo.....-pl.pLhAGoYplhuYhhY.....Dchs.....ps...lht..upss.pspsFsVhsssl... 0 2 2 2 +14512 PF14661 HAUS6_N HAUS augmin-like complex subunit 6 N-terminus Eberhardt R re3 Jackhmmer:Q7Z4H7 Family This family includes the N-terminus of HAUS augmin-like complex subunit 6. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2]. 26.30 26.30 26.60 30.90 23.90 26.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.66 0.70 -5.02 27 214 2012-02-29 10:59:24 2012-02-29 10:59:24 1 4 158 0 141 215 1 211.80 21 32.99 NEW hlapsLphLuhch.sst........shpthhshshF.KsNhpAh.hlhaaLFphhDssch+.c+Ft.hWP.hDp..tpcssFRssshchLpc..Lpcpt.tLsphspl+sshlspssG.+FhchLhphusaVlpchl+p.....h..pssssshths.spphtsss..t.hhshhtthcsphhphhpcpsth.t.apchsphlppphcslsucptt.ppth.......thpsppcpttcth.p...tt.......h.t.hpphpphWsph .......................hhh.l.hLthc...............s.p...t.s...ht+...phpshphshaaLhph....h..D.st....tp..p...........thp.....haP.h-t..hpshphRtthhphLpc..lpcps..sLs.........p.sp..l+tohlhpssG.+FhclLhphuphVLpchlpp........h...........p...........s.ssh............p........hs.ttphtspp..t.hhshhhht..+.pphhp.hhpcpsth....t.apchs...phlpt....phcplttcp.thhppp..................p..pp.tp....................................................................................................................................................... 0 34 66 108 +14513 PF14662 CCDC155 Coiled-coil region of CCDC155 Coggill P pcc Jackhmmer:Q8N6L0 Domain This is a small family of eukaryotic proteins of unknown function.ThiS is the central coiled-coil region. 30.90 30.90 31.50 32.00 30.40 29.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.38 0.71 -4.70 15 48 2012-02-29 12:47:12 2012-02-29 12:47:12 1 8 33 0 25 66 0 163.10 59 29.74 NEW PATADLLSSLEDLELSNRRLAGENAKLQRSVETAEEGSARLGEEIpALRKQLRSTQQALQhAKAlDEELEDLKTLAKSLEEQNRSLLAQARQTEKEQQHLVAEMETLQEENGKLLAERDGVKRRSEELATEKDuLKRQLaECE+LICQRDAlLSERTRHAESLAcTLEEYRoTTQELRLEIS+LEEQLSQTpE .....ATADLLSoLEDLELSN+RLsGENAKLQRSlETAEEuSARLGEEIhuLR+QL+..ST....Q....QALQhAKAlDEELEDLKTLA+SLEEQNRuLhAQARpsEKEQQHLVAEhETLQEENGKLLAERDGVK+RSpELAs..EK-sLK+QLhEsEcLICQR-slLSERT++sEuLspTlEEYRssTQELRhEIS+LEEQLSQo.p......... 0 2 4 9 +14514 PF14663 RasGEF_N_2 Rapamycin-insensitive companion of mTOR RasGEF_N domain Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the more conserved central section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin. 27.00 27.00 27.90 27.40 26.40 26.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.35 0.71 -4.14 35 251 2012-10-02 12:00:54 2012-02-29 13:10:47 1 25 215 0 179 269 0 112.50 36 8.19 NEW phssa.ppWulcLLl...sQLhDsss....pVsthA.lplLcEsCppp..ssLEhllph.+.PsLspLG-hGt.Llh.+FLSpspG.a.phLsp.hsalppEh-pWh.pptNpcYVtllEsslppuhssptcs ..............s....hppWuIcLLV....sQLaD.sh....pVsptA.lclL-EACpp+..s.Lchl..lp......h........+.P........s.L.........s....H.................L..G..-..h.....G.t...........LLl.....RF.LShstG.a.p..Y.........Lsp.h....salspEh-cWh..pthNppYVsllEtpLscuhsp.......................... 0 57 94 147 +14515 PF14664 RICTOR_N Rapamycin-insensitive companion of mTOR, N-term Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the N-terminal conserved section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin. 27.00 27.00 35.40 31.00 26.60 26.10 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.00 0.70 -5.87 30 301 2012-02-29 14:25:56 2012-02-29 14:25:56 1 28 234 0 209 292 2 316.30 29 26.27 NEW lp+uNsLVpLh+cpPpl+h-lshuhhs.clps...hLLscs.+-VsAAuYRhsRYhlsDtpslptlppLphDhlllhSLs+-p+s.plEREQALKhlRsh..l-l.tulpch..opulVRsllulA-c....p-DRL+shsLpTLsElhlhsPpLlhpsGGlpsLtcsLh-.usap..huEolhsslLaLLDsPpTRpalpssh-LcslhusFTD..............pscpt...t+lpsSshuIushL+oWsGlhhLupssh.psL+SLlssLplPpsp.lRcsll-Lla-lLRI+sPsWossFhsup+hsshup..h...hplp.....spspp.h...s.....t.tt.slhspahALlLhshlcuGL...lpuLlpllpssp..........ttLspKusl......LLuElL+LAsplLP ..................................................tphNphspLhpp.hs..+.c...hsh..h.c..l.h........hlLscs..ppVhAAuhRhhRYhlts....p...lphlhplp..h-hhlhhslsh......p...........p.....p...............s..phERp.............QAL+hlRt..h..lsl.............s........sp..............h...............spulspsllulupp............................pcD...c..hhphslthlsElhltsPpllhhsGGh.pslhcs.lh-...sphs.....hs-s..lhsshLalLspPpoRpalcss..h-.L-h..lhusao-h...........................hpppp.........+hpssthslhshh+oWsGll...........hst..ss...ps...lp.....u.....L.ls..............sL...ph......P.p...p......hRchll.-lla-lhp........l.......s.....s.....htpphh.u.h.h.ss.sp...h....php.........tt..t.h....t.............................tp.slhppahuLlL.hhlcsGL...............lpsLhplhpsp.s..........................lph+uol......Lls-lLphupphLP............................................. 0 76 116 174 +14516 PF14665 RICTOR_phospho Rapamycin-insensitive companion of mTOR, phosphorylation-site Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient- and growth-factor signalling. This short region is the phoshorylation site. Rictor does interact with 14-3-3 in a Thr1135-dependent manner. Rictor can be inhibited by short-term rapamycin treatment showing that Thr1135 is an mTORC1-regulated site. 27.00 27.00 38.40 37.20 25.90 18.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.63 0.71 -3.98 4 57 2012-02-29 14:26:48 2012-02-29 14:26:48 1 8 36 0 29 52 0 105.60 69 6.89 NEW PFshhuSS+LV+NRhLNSLoLPsKKtRSoSDPKG....uKL.os-ohsuhRRsRTlTEPSl.s.spu-sFsPl.p....P+p.TlsLETSFsGhKsl--ssSTsSIGEN-lKhs+s.u ....................sFPFFASSKLVKNRILNSLTLPNKKH..RSSSDPKG....GKL.SSEsKoSNRRIRTLTEP.Sl...DFNHSDDFTPlS........ssQKTLpLETSFVGNKHlEDTGSTPSIGENDLKFsKs.hG............................................................................ 0 1 4 12 +14517 PF14666 RICTOR_M Rapamycin-insensitive companion of mTOR, middle domain Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the more conserved central section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin. 27.00 27.00 30.00 27.20 26.60 25.20 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.40 0.70 -5.34 51 266 2012-02-29 14:49:26 2012-02-29 14:49:26 1 27 232 0 188 271 0 212.50 34 15.33 NEW hDDtpF+phlt-.opVLssK-apcWNWsllhpllcGPLhNsKRl-Esh..+ssKF....l+RLluFYR...Ph+hRFosl.hssc....s.......p+YlplGCtLhcsLLso.sEGh+hLt.cs..+llpQlucsLspl..........hsGhsstp.slFSpp+LpsTlstGYFthl.GsLSsstpGl..plLE+aphFshlhplhph.........p.s+..ssllpl.llssL......DYsh.supsRhlLsKALssusc.plRlaATchLthhlps ...............................................p-tthpphlh-.opV..Lpp.p.phhcWsWslIhpllcuP....h.hN.+.phc-t....phs+Fh+RLlsFY+......P.phpaupl.hstp.....s............ppaspsGCthhchLLpo..Eu.thhh...s.........cllpplsphLsps..................hoG.hps.p.lhspppltsTLsttYFhhl.GsL.SspspGl..phL-+hp........h........Fphhhplhph............................p..sc...scLlpL.llusL......DYoh..-u.h.s.RllLoKsLTsuoc.shRlaATchL+hhl..................... 0 62 101 155 +14518 PF14667 Polysacc_synt_C Polysaccharide biosynthesis C-terminal domain Bateman A agb Jackhmmer:C9L817 Family This family represents the C-terminal integral membrane region of polysaccharide biosynthesis proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.98 0.71 -4.40 347 5970 2012-10-02 21:24:20 2012-02-29 15:39:12 1 27 2511 0 1351 15273 3365 133.80 17 27.14 NEW hLtlhuhshlhh.ulh.t.h.h.s....s.l.Lp.uhs+tphshhsh...hluhllpll.lshhL....l.........hG...h.........hGAuluTsluhh.l.s.shlt.h.hhlp+..h...h.p.hp....h.....h.t.p.hh...hth....h.hsuhlMslsl....hh.hthhh.....................t......sh...l...th....llsls...lGs...hl...Yhhhlhhh....ph .......................................................h.hhshs.hlhh..u...lh.t...h...h.s....s...h..Lp.uh....s...c.p.+h.s...hh.s.h.....hh....u.s.l...l.p.ll..l...s....hhL........l............................h..a..G......h...........................hG.Aulu..T.h.lu....h....h....l....s....h...h....l.t.....h.....h...h....l..........p..........+..........h..............h.....t.....hp..........h......h.....h............h..............h....h..h...................h...h...h....s...h........h...h..h...h...h..h.............hh...h.......h.h...............................................................h.........h.....h........h.h.hhh....h.uh......hh....ahhhhhh...h..................................................................................................................... 0 455 917 1153 +14519 PF14668 RICTOR_V Rapamycin-insensitive companion of mTOR, domain 5 Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. These long eukaryotic proteins carry several well-conserved domains, and this is No.5. 27.00 27.00 27.00 27.10 26.80 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -8.91 0.72 -4.05 32 251 2012-02-29 15:53:02 2012-02-29 15:53:02 1 30 216 0 185 260 1 73.00 41 5.33 NEW lhcLKuuLWAlGal.GoophGlshLp...ptslltpllplAppsplhSlRGTsFasLuLIupTppGsclLpchGWpS ............hclKusLWAlGsl.GS.sphGhshLp..........pp..slltpIlclAcpspVhSlRGTsFaVLGLIu+TppGt-lLpchGW-..... 0 65 101 153 +14520 PF14669 Asp_Glu_race_2 Putative aspartate racemase Coggill P pcc Pfam-B_34791 (release 26.0) Family This is a small family of vertebrate putative aspartate racemases. The family lies on TOPAZ 1 proteins. 27.00 27.00 28.00 27.50 26.80 26.50 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.47 0.70 -5.04 3 28 2012-10-03 04:39:14 2012-02-29 18:32:59 1 2 25 0 16 34 0 222.80 63 15.17 NEW EAGhVhssEas-hl.hhLcphpssppElssslssKS...RupAosP+puaLs-LApsVVEVELCKcpEDWs+LGslFhSVCpGsccsu-LpRFCuCVAhALLcEsKD.K.uVPFstFAETVCQ-sppDElsKTaLGRIGVSLMapYHRTppWoKGRKVL-VLScLKlEFToLKGLFGsEcGASRCQLVTlAAElFIpSGSIEGALNsLR.ENEWFlSSSoWPCEpADVpsR+RVLstL ...................................................................EAGMlLDsEHFNYIVKLLYQlQASKQEIoAVLEhKS...RLphRQFKK..NWps...DLcSALsclEHCKEKGDWTKLGsLYlNlKMuCEKF.ADhQpFCACIAEsLTK-h.K-ERPslPFCEFAETVsKDPQNSc....lDK.s.lLGR.IGISAMYFYHKLLQWSKGRKVLDKLYELKIHFTSLKGLhGPEKLAsRCQIVNlAAEIFLKSGSLDGAIWVLR..ESEWIIsTPLWPCDRhDVLNRHNLLCTI.................................................. 0 2 3 5 +14521 PF14670 FXa_inhibition Coagulation Factor Xa inhibitory site Coggill P pcc Jackhmmer CATH:3kl6_B_0 Domain This short domain on coagulation enzyme factor Xa is found to be the target for a potent inhibitor of coagulation, TAK-442 [1]. 33.40 33.40 33.40 33.40 33.30 33.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.59 0.72 -3.83 204 5651 2012-10-03 09:47:55 2012-03-01 14:05:28 1 1176 167 166 3003 5435 0 36.70 41 5.65 NEW Cphs..N..G..G...C.s....c..h..C.h..s.s.......s.s...........s..h.p....C.sCs..p..G.a.p.L.s........s.D..sp...o.C ..............Ct.s..N...G...G....Cp...+.h.Ch.ss.......s.u.................u..a.p...CsC...s..G.a.p.L.t......s.D..t+o.C.............. 0 870 1089 1827 +14522 PF14671 DSPn Dual specificity protein phosphatase, N-terminal half Coggill P pcc Jackhmeer:CATH:1ohe_A_01 Domain The active core of the dual specificity protein phosphatase is made up of two globular domains both with the DSP-like fold. This family represents the N-terminal half of the core. These domains are arranged in tandem, and are associated via an extensive interface to form a single globular whole. The conserved PTP signature motif (Cys-[X]5-Arg) that defines the catalytic centre of all PTP-family members is located within the C-terminal domain, family DSPc, Pfam:PF00782. Although the centre of the catalytic site is formed from DSPc, two loops from the N-terminal domain, DSPn, also contribute to the catalytic site, facilitating peptide substrate specificity [1]. 28.20 28.20 28.60 28.30 27.60 28.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.67 0.71 -4.19 42 431 2012-03-01 14:48:06 2012-03-01 14:48:06 1 5 262 3 265 410 10 128.20 43 23.61 NEW lEhIpsRLYasshpp.....pP+ss...sssaaFslDc..-LsYpsF....atDFGPLNLuhlY+astpLschLps...shppKplVaYou.hcs....c+RsNAAhLlusYhllhhshoPccAhpsl.pphp....s..sahsFRDAohusssaplolhDClpGlp+A .....................................tphhhh.h....................pspss........shhaFslDc..pLhYpsF....atDFGPLNluhlYRassclpchLps............hshtcKtlVaYou.hDt......................+cRANAAhLlusYhVlhhshoPcpA.apsl.hpss....s..sahPFRDAuaGsssaplTlhDslpGlpKA........................ 0 99 145 213 +14523 PF14672 LCE Late cornified envelope Eberhardt R re3 Jackhmmer:Q5TA76 Family This is a family of late cornified envelope proteins that are expressed in skin [1]. 24.50 24.50 27.00 24.90 24.40 24.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.13 0.72 -10.95 0.72 -3.50 10 255 2012-03-02 13:07:00 2012-03-02 13:07:00 1 2 22 0 94 230 0 71.30 56 80.19 NEW sPPKCP....sPKCP...PKsss.Ch....PPsSSC.CussS..GGCsGs.............SuuGGCCLSHHR.RRSHRC.RhQSSsCC-pGu.....GQQSGGSGCCHuSGGCs .........................sPKCP.....PKCP...P.pss......s.C.........PssS..SC..CusSS..GGC.Cu..s..................Su.GGGCCLSHHR..+Rp.+Rp..R.psSssCsp.................uG.u.S.uCstuuuGp........... 0 20 20 20 +14524 PF14673 DUF4459 Domain of unknown function (DUF4459) Coggill P pcc Pfam-B_10980 (release 26.0) Domain This family appears only on sequences from Salmonella spp. These sequences also all carry a YARHG domain, Pfam:PF13308. 27.00 27.00 45.80 45.70 23.20 18.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.00 0.71 -4.53 2 68 2012-03-02 13:31:19 2012-03-02 13:31:19 1 3 66 0 0 26 0 148.70 97 33.39 NEW MKKCFLFIFhCLFIFSANAELKFRPEhENKKIYFQGKVTDYTLNDFhFFGDSREPFYGSENDDYTATADEWLtFYAELPDVRKWQRVVPDDFShM.GAPWCDIQFFEQENDHSVITGSEHhRCIDFLVTPKRKGLIPMGTKGTLhDYGSYLAFAPQIc+ ..MKKCFLFIFVCLFIFSANAELKFRPEFENKKIYFQGKVTDYTLNDFTFFGDSREPFYGSENDDYTATADEWLGFYAELPDVRKWQRVVPDDFSTMYGAPWCDIQFFEQENDHSVITGSEHMRCIDFLVTPKRKGLIPMGTKGTLMDYGSYLAFAPQIKR..... 0 0 0 0 +14525 PF14674 FANCI_S1-cap FANCI solenoid 1 cap Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 1 cap (S1-cap) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 31.60 31.60 22.50 18.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.43 0.72 -3.91 5 57 2012-03-02 15:30:47 2012-03-02 15:30:47 1 5 31 6 20 47 0 51.90 63 4.80 NEW MDQKILSLAAEcosD+LQEaLQsL+Es-LoslLTsQAVKGK-sGALLRAIFKG ..MDQKILSLAs-KTsD+LQEFLQTL+-DDLTsLLpNQAVKGKssGALLRAIFKG. 0 1 3 6 +14526 PF14675 FANCI_S1 FANCI solenoid 1 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 1 (S1) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 25.40 26.90 23.20 22.20 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.36 0.70 -5.00 16 138 2012-03-02 15:31:51 2012-03-02 15:31:51 1 23 93 8 82 125 0 194.60 35 17.27 NEW pstp+Rhplspsslptlcss-Lshchs.DlIuRLhhDlssassppLlcls-hCl-ul........................RtG-spshsWK-LLPplLss......Lusp.tplshsshtloGsEY+cpllssLssh+WsspIlsslssMFRDlsLSpEEhphVlsKlsstlpc.ls..ElPPLsaQLhplC.psuupll..L.uLp+YFpcpahc+h.sptsopoo....Dl-sI.......................shSscELR-sEtTlLaHls.ssph .......................................................................t...+Rhtlhp.hlph.hp..ssclp..c.st-llshLhh-htphPs...Lsplsp.hl.ssl...............................+.p.ushhssp.....h-LlPhhLos...........Lssp.pplsh.....s...t....st..lsG........pEhKcpllsslCo.scWst....ph.llplssMF+Dls..Lo.s-Elp.hVlpKllphhpc.lsLQElPPLVYQLLlLuoK.G.s+.+tl..LpGlltaFsphppp....ptp...ppp.s...sp.h.....-l...l.......................s.hst..tELRclEGTlllHlsaAlc................................. 0 29 38 60 +14527 PF14676 FANCI_S2 FANCI solenoid 2 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 2 (S2) domain of the Fanconi anemia group I protein [1]. 28.50 28.50 28.90 29.60 27.90 28.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.60 0.71 -4.33 25 145 2012-03-02 15:32:28 2012-03-02 15:32:28 1 27 102 8 94 147 1 152.00 36 12.64 NEW tllpulVphuFhLL-..........uttssptpsht.hpt.....................LGlplLhphF+hHphsRscIl-plhsRllsppsp.sspal...................cLLuhlspstshhllEpsuplp.....-hh-al.salssssuptllsAlhPLlKlS.tsl+DtlILVLRKAMas+-hssRhsAVsGalp .....................................hVspGLVpLuFhLh-...................................uhssp+h.s.stshps..shtp...........................................LGtslLlchF+hHchhR.......p..cIl-plhsRllopsss....s.p.a.l....................................-LLupllhtsPhhl.psss+lp.....-hhDal.saLPhps..sptLlpAl.PLlKlS..hshRDuLILVLRKAMFup..ph...s..sRpsAVsGFl.h.................................................... 0 34 44 72 +14528 PF14677 FANCI_S3 FANCI solenoid 3 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 3 (S3) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 25.80 25.90 24.80 21.80 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.21 0.70 -4.96 13 99 2012-03-02 15:33:04 2012-03-02 15:33:04 1 14 71 8 57 106 0 205.40 38 17.27 NEW lLohphlScLLshLFcDssp.u+pEsLulLRSss-ah.RYuVsVslQKlQQLc-p..sDss-upssc+hFcpLCDIT+VLhhRhsshsssl.c-sG.....polSlLslEshhclhsslpppYss+hspFLpslDssssstpc...........slTppsthhI+pFQ+slhs.hs.us-DsFsuKpslhLlslLohLucpLsPuSsphs.QhhsWslplC+cpsl-DsuhsKGLl.sLLF ............................................lhshthlspLLphLacDsst.uppEslslLRSsp-Fh.+YslslslpKlQplcpp.....sshsptppsc+hFp..pLs-IT+VLhW.R..Y..TslPosV.E-sG++.KtcolSLLCLEG..Lpclhssl.pphYps+lppFLpul.....Dsssppspc.t.......hslsppsth.I+pF...Q..Rsl....h......s.LS...upc--F.sSK-sl.lLlslLosLo+hL-P..uSsphs.p....hhoWs.+lC+-ssh....-Dst..hsKuLh.sLLF............................. 0 15 19 37 +14529 PF14678 FANCI_S4 FANCI solenoid 4 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 4 (S4) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 66.30 32.60 24.40 23.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.40 0.70 -5.28 19 132 2012-03-02 15:33:39 2012-03-02 15:33:39 1 22 97 8 84 138 0 235.70 35 20.15 NEW sshLpclupsltspLGslc..p-sps.....-pps..pauhlshcTAu.olhtlllup.lp+lL--V-Whls+hps.httsphs........psspsthtsshcshE+ulhtQLspllpslpcLspsslPsGush-slhKtLt+hYshLosLsKaYlphssspts...thsscFEcL.l+h.............sGopLsspsYshIoY......lpttpp-p.spppp.....p........tttspsthAKlLRET+.IPslIauIEpaEKalIpLSKKoKl..NLhpahKhSTsRDF+IpuspLcsAl .........................................................h..hhpclupclpt.lGslc....pD.pl.........-pss...pashlshcTus.olhhhllup.s-csL--VDWlls+lKs.hstpphs........ptspsphph....tpshE+ulhhQLspllphhpcLlpsulPs.G...o.ssDsLLKtLs+hYshLosLsKaalpsppspts....hstph-+L.V+l.............oGpp..LTs.hYsFIoY.......................lpp...p...p.....p...pp...t..t....ttpt........................tt.ststhA...+lLR....ETK.IPsLIFsIEpYEKaLIpLSK......KoKl...............NLhpahKhSTuRDF+Ipss.Lp.sl.................. 0 33 41 61 +14530 PF14679 FANCI_HD1 FANCI helical domain 1 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the helical domain 1 (HD1) of the Fanconi anemia group I protein [1]. 25.00 25.00 25.20 29.20 24.40 24.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.65 0.72 -4.25 19 101 2012-03-02 15:34:20 2012-03-02 15:34:20 1 17 66 8 57 95 0 85.00 44 7.08 NEW pLGREll+alKsst....tpsLsPFslulLLSVuplpRFc-plh-hL+oslh+sh+D.phppsu+WLpchl.ppssltshll-slpso ..........-LGREllKtLKssp..t.ssssLsPFSlALLLSVsRIp.RFpEQVhDlLKsullKuaKDhQlhpuSKaLpsLlPpc..s..sVushILEVV+NS............. 1 17 23 38 +14531 PF14680 FANCI_HD2 FANCI helical domain 2 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the helical domain 2 (HD2) of the Fanconi anemia group I protein [1]. 25.00 25.00 35.80 30.60 23.90 22.70 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.36 0.70 -5.25 16 147 2012-03-02 15:46:03 2012-03-02 15:46:03 1 24 99 8 94 147 1 212.30 33 18.07 NEW su.....Sp.sSp.tsshoQlplss+ophsss...sN.-sLsLEIlG.LRRCLsQQA-VRlhLY-GhY-slcpNupLusplLphLhsph+paa-s........-.DhlsPl+l-sClsupG-p...lhLpEPLu+LlpslspCLthhpps.sstss.s..............El..splLESlopRMl+s-..LEDFpL..........................Dcss-hs.pssssuhcpshhshhhhulhEsLIpaphht..sshscsph...cclLuLFcsYpKLs-hLKc ..................hs..spphSt.t..shSQ.splsspuphsss......tN..-shsLEIhs.L.RRCLsQQA-VRhhLY-GhhcllppN....s.pLusplhphLhs.p.h..+.p.a.acs..........c.DhlsPlKl.-tClh..........spusp.............l.LpEPLspLlpslppCLthhppt..shptt..........t.p...............h.pcl...........pphL..-ohspRMl+s-....LEDhpL.......................................Dcst...-hs.psos.huhcsphh.s.hlhulhElLl-aph.h.....sphscp..ph....cpll.sLF.papcLp-hhp............................................... 1 32 42 72 +14532 PF14681 UPRTase Uracil phosphoribosyltransferase Bateman A agb Jackhmmer:B0C7X6 Domain This family includes the enzyme uracil phosphoribosyltransferase (EC:2.4.2.9). This enzyme catalyzes the first step of UMP biosynthesis. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.12 0.70 -5.12 113 5012 2012-10-10 14:25:38 2012-03-05 13:22:09 1 28 4240 67 1505 3471 1360 199.90 43 83.73 NEW ls.s+PhlpthhohlRspsT.ssspFcthhcclsplLshEAhs.p..Lsh.p..pt..plp.....TPl......utth....s.s....h.h....h.st......plshVsILRAG.................................uMhpulpsl.hPs.s.plt+lhl.R.-c..pT..................hp..P.........aYp+LPppl........sppp..................llllDP...hlATGuoshtAlchL.hct...sst.ppIhhlsll......u....usp....Gl......pplttpaP..c.lc.lhsuulDppL..s.-puaIlPGLGDhGDRhFGT .........................................................................................................................sHPLlpH.KLollR..-.p........c..T..uop..c..FRc..LssElupLh.............s.....YE...............so+..D..L........h..c.......cs...pIc.............TPh..........ut.sp...sp.....p.l............t..u+..........KlslVPIL...R.A...G..l................................G.Mh-.....Gl.L.p.l...l.......P...u.......A...+.......l.......G.......h.......l..G..l..aR....D.E....-T.............................................Lc..P.s.............Y.a...t..KL...P.p.cl......scRt...................................slllDP...MLAT..G............G.Sslt.........AlchL.Kc+......Gs.....ppI+hlsLl......A....APE......Gl......cslpc...u.HP.....D....Vc....l....asAul..DccL..N..-+......G.................Y.IlP.G.....LGDAG......DRlFGT............................................... 0 468 899 1262 +14533 PF14682 SPOB_ab Sporulation initiation phospho-transferase B, C-terminal Coggill P pcc CATH:1ixm_B_02 Domain Sporulation initiation phospho-transferase B or SpoOB is part of a phospho-relay that initiates sporulation in Bacillus subtilis. Spo0B is a two-domain protein consisting of an N-terminal alpha-helical hairpin domain and a C-terminal alpha/beta domain, represented by this family. Two subunits of Spo0B dimerise by a parallel association of helical hairpins to form a novel four-helix bundle from which the active histidine - involved in the auto-phosphorylation - protrudes. In the phospho-relay, the signal-receptor histidine kinases are dephosphorylated by a common response regulator, Spo0F. Spo0B then takes phosphorylated Spo0F as substrate hereby mediating the transfer of a phosphoryl group to Spo0A, the ultimate transcription factor. 27.00 27.00 29.60 28.60 25.00 24.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.57 0.71 -4.29 12 140 2012-03-05 14:41:45 2012-03-05 14:41:45 1 1 137 10 21 74 0 113.40 51 61.89 NEW hPphAthlLTaNWcs+.hpLEaEVLG-l+sLpth-ppLhshspplFshhcpuls.tsENHLslolphp..-......ptlplaFDFpGhls.shptlpp.hpp.tptp.thphhphcls-cEssl ...MPLFSEWILTYNWKQQPsLLEYEVLGc..L..+N.LS+hDEpVCTWosQFFSMLQHSLDVY..VENYVCITIEsDu........-NARFFFDFRGKLT..slEELQs.WLu.spNN..cahsISYoVRDEElSl.......................... 0 5 13 15 +14534 PF14683 CBM-like Polysaccharide lyase family 4, domain III Coggill P pcc CATH:1nkg_A_03] Domain CBM-like is domain III of rhamnogalacturonan lyase (RG-lyase). The full-length protein specifically recognises and cleaves alpha-1,4 glycosidic bonds between l-rhamnose and d-galacturonic acids in the backbone of rhamnogalacturonan-I, a major component of the plant cell wall polysaccharide, pectin. This domain possesses a jelly roll beta-sandwich fold structurally homologous to carbohydrate binding modules (CBMs), and it carries two sulfate ions and a hexa-coordinated calcium ion. 27.00 27.00 28.00 27.80 26.00 25.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.08 0.71 -4.61 56 304 2012-03-05 16:42:11 2012-03-05 16:42:11 1 23 130 5 211 322 1 176.50 27 28.84 NEW sslWcIG..DtossuFhsu-...............phhR.H...sc.schtshtsLsYTVGsS.tssDashAphpchs.......sshpIpFpLsssp.hs..tshTLRlulstA.........suucspVplNs...hsus......ssu..tshthDspshp+GsapGhhclYshslPuutLhp..Gs...NTlplssss..........uo......s.h..uhhaDslcL .....................slWcIGhsD+oustFhhs-......................................pphRta....p..c...schhP.t.t.slsYTlGpS..tpDa.aup..hsths...........................ssapIpFsLssspht...tshsLclul..AsA..................ss..u..ch..pVp......lNs...sss...............hss.......ht.pcsshsc+Ghp.Gh...ht.....hap..hslsushLhpGs...Nsl.hls.sp..............us..........t....shhaDhlcL.................................... 0 52 129 181 +14535 PF14684 Tricorn_C1 Tricorn protease C1 domain Eberhardt R re3 CATH:1k32_A_03 Domain This domain is the C1 core domain of tricorn protease. This is a mixed alpha-beta domain [1]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.23 0.72 -4.10 129 801 2012-03-06 14:27:51 2012-03-06 14:27:51 1 45 538 24 226 719 107 69.70 30 8.07 NEW ttEhpphFc-sWRhh+-p..FYss.....c.h+Gl.D..WpultccYp.hlsplsspt-hscllsEMlGELssSHshhpss ....................tEhtthF-csW+hhccp...F....ac......s..hpGl..D..WpultspYp.hlsphp...sp..p-.ht-lLsEhluELssuHstst........................... 0 99 185 216 +14536 PF14685 Tricorn_PDZ Tricorn protease PDZ domain Eberhardt R re3 CATH:1k32_A_04 Domain This domain is the PDZ domain of tricorn protease [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.81 0.72 -4.19 47 349 2012-10-02 11:12:46 2012-03-06 14:32:34 1 16 297 24 103 348 96 86.50 37 7.96 NEW GhLGADhph.c..s..sp..acls+IlsG-shsscs+SPLstPGlplptGDhllulsGpslssstsPttLLsspuu..ptVpLolpss.su..ssRclsV .................................GhLGA-lsts..s..ss..h+ls+lhsG-spsspttSPLstsGlslppGDlIlAlDGptlsss.......ss.htpLLpG..p.A.G..+.VpLolppsss....ttRplsl............... 0 39 75 94 +14537 PF14686 fn3_3 Polysaccharide lyase family 4, domain II Coggill P pcc CATH:1nkg_A_02 Domain FnIII-like is domain II of rhamnogalacturonan lyase (RG-lyase). The full-length protein specifically recognises and cleaves alpha-1,4 glycosidic bonds between l-rhamnose and d-galacturonic acids in the backbone of rhamnogalacturonan-I, a major component of the plant cell wall polysaccharide, pectin. This domain displays an immunoglobulin-like or more specifically Fibronectin-III type fold and shows highest structural similarity to the C-terminal beta-sandwich subdomain of the pro-hormone/propeptide processing enzyme carboxypeptidase gp180 from duck. It serves to assist in producing the deep pocket, with domain III, into which the substrate fits [1]. 27.00 27.00 27.00 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.28 0.72 -3.87 51 306 2012-10-02 19:08:27 2012-03-06 16:19:16 1 22 139 5 207 340 67 90.20 27 15.07 NEW pRGsVoGph....Ghs......st.s.hhVGhs.............spshQYWspAt.ssGpFslssl+sGs.YsLhsat.........u-hpst...ssslsVo.uusst.slush .....................................tRGsVsGplh....th.................h.t.shVGht..tt........t....spshQaWsp.....sc....ssG.....p.FsIssl+sGs..YsLhsas.........su...hG-ath....psslsls.supsh..p....h.............................. 0 50 125 175 +14538 PF14687 DUF4460 Domain of unknown function (DUF4460) Eberhardt R re3 Jackhmmer:Q8N3R3 Family This domain family is found in eukaryotes, and is typically between 103 and 119 amino acids in length. There is a conserved HPD sequence motif. There are two completely conserved residues (N and F) that may be functionally important. 25.30 25.30 26.10 28.30 25.10 25.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.87 0.71 -4.32 31 156 2012-03-07 14:34:13 2012-03-07 14:34:13 1 4 113 0 111 150 0 103.70 33 20.81 NEW hhppRphtssclpsAL+PFYhtVHPDhFsp.aPp.p+psNEcSLKhLsuaL-sLpp.t.......scstpLpFYlpcssssp..t........pa+hVplpl....s...ppD...........scphlpplLcsCsL ........hh...+thtsschtsALRPFYhtVHPDhFup.aPp...p+plNEsS.....LKtLssaL-sLpp.t.............shpstpLsFYl+pss....pst.................ta+hlphpL.....p........spc..............phl..llppCpL.............................................. 0 44 61 90 +14539 PF14688 DUF4461 Domain of unknown function (DUF4461) Eberhardt R re3 Jackhmmer:Q8N3R3 Family This domain family is found in eukaryotes, and is approximately 310 amino acids in length. 24.00 24.00 25.40 24.80 23.10 23.10 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.79 0.70 -5.09 24 123 2012-03-07 14:36:20 2012-03-07 14:36:20 1 6 90 0 86 118 0 275.60 30 57.36 NEW cpsLssWLccsttpApp+tcsstsl+cElp+L+ppLspphsLpclpasCGWsltH..hpus..LpoLp+L..sp...ppsp.....hpsL+...s+sllFu.shoGlShpGc..VMLsosDVpcsWhcllcpls.pacth..lptlshhEptlSplLtsIpls+hchh..PthpsptYtspLpplhsultca..hsttth.p.hPps.Lpshplslpo-u..ushhl....usoGphllPuosssshLlsFlspphcpApc+hpcacp.pthEcpLhspChcphpLppLsKD-ulTs-pMIsssp+LLpt...........t............hpGlpLplopYYS..VhoDGsl...CIPW-Wc ....................................................................sLpsWLcpsttpAhp.+hcpshsl+pElp+LcptLsppLtLpDlpappuWslsH..hpup....LpoLpRL..uppp.p....hhpth+...GpsllFs..stoGhoh.Gc....VMLsohDV..ppWhchhcpls..pa.sh......ptl.hhEcplStlLtuIplshhc.h..Phhph.ptYhshLpthhspl.c......hh...h.........Ppo.Lps..h.....ph.llpo-t......hs..l..........pphGpF.lPs.sCssspL..Fl..pp.hpp.Apcphp+hpp.................phhEppLhptshcchpLppL.hK.-.sul....osspMlsCsc+Llp..........t.h...............shLpGhpLplo+aYS.....VhpDGsl...CIPWsap........................... 0 31 42 66 +14540 PF14689 SPOB_a Sensor_kinase_SpoOB-type, alpha-helical domain Coggill P pcc CATH:1ixm_B_01 Domain Sporulation initiation phospho-transferase B or SpoOB is part of a phospho-relay that initiates sporulation in Bacillus subtilis. Spo0B is a two-domain protein consisting of an N-terminal alpha-helical hairpin domain and a C-terminal alpha/beta domain. Two subunits of Spo0B dimerise by a parallel association of helical hairpins to form a novel four-helix bundle from which the active histidine - involved in the auto-phosphorylation - protrudes. In the phospho-relay, the signal-receptor histidine kinases are dephosphorylated by a common response regulator, Spo0F. Spo0B then takes phosphorylated Spo0F as substrate thereby mediating the transfer of a phosphoryl group to Spo0A, the ultimate transcription factor. The exact function of this alpha-helical domain is not known; it does not always occur just as the N-terminal domain of SPOB_ab, Pfam:PF14682. SCOP describes this domain as a histidine kinase-like fold lacking the kinase ATP-binding site. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.96 0.72 -4.41 56 2392 2012-03-07 16:11:40 2012-03-07 16:11:40 1 29 1272 10 403 1561 22 60.90 32 12.52 NEW htt.th...ppllcsL.+ppRHDahN+LQlIpGhlpLs+h-+spchIcplspchppp.ucl.spl .....................ph...tpLstlppYs-uL.RspsHEahN+LpslhGLLplppY...-...cl....h....phlpptspt.Qp.........h.t.................. 0 139 263 333 +14541 PF14690 zf-ISL3 zinc-finger of transposase IS204/IS1001/IS1096/IS1165 Coggill P pcc IS-finder, manual Domain \N 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.23 0.72 -3.93 135 4025 2012-03-07 16:36:10 2012-03-07 16:36:10 1 22 1094 0 544 2757 127 47.00 38 15.25 NEW pshtCPpCG........pps...hhtpsp.pppph+.clshtsptshltlc.ppRapCp...p..C ...........p.sssCPcCG..........sph....t+hchp...+so+Is.hLcs.sG.hPohlhL+...KRRF+Ch...p..C.............. 0 144 375 439 +14542 PF14691 Fer4_20 Dihydroprymidine dehydrogenase domain II, 4Fe-4S cluster Coggill P pcc CATH:1gte_A_02 Domain Domain II of the enzyme dihydroprymidine dehydrogenase binds FAD. Dihydroprymidine dehydrogenase catalyses the first and rate-limiting step of pyrimidine degradation by converting pyrimidines to the corresponding 5,6- dihydro compounds [1]. This domain carries two Fe4-S4 clusters. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.44 0.72 -11.01 0.72 -4.44 327 6849 2012-10-03 08:56:43 2012-03-07 16:55:41 1 286 3369 26 1619 4929 2276 110.70 35 17.58 NEW Rtps.F.pEVshGaotc.pAhtEApRCLp.....C....t.ss...Ch.p.....uCPlslsIPpFIctltpGs......hptAhchIppsNsL.PulCGRVCPQEp.CEus.....Clh........t.....p..s.p....PVsIGpLERaluDhthpp....sh ...................................................h...t.a..Eh.....s.h.s..t....ps...t..pus.R..C..h..p....C..........s..ss.......Cp..p............uC....P....l....pst....I...Pp...a...lcLl..t.....cuc..............h.ppAhch....lp.p.........TNsh.P.t.lsG.RVCPps..CEuu.....Csh.........s....s.p..s............sVsItslE+alsDpuhp.s.h................................................ 0 581 1077 1386 +14543 PF14692 DUF4462 Domain of unknown function (DUF4462) Eberhardt R re3 Jackhmmer:A8MTB1 Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. 25.00 25.00 35.40 25.90 24.20 24.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.61 0.72 -7.06 0.72 -4.69 24 153 2012-03-08 11:34:24 2012-03-08 11:34:24 1 11 7 0 148 149 0 26.80 66 21.88 NEW uAKSR.sWNhLPRAusl.......GIGCQEQVQLE ....sAKSRhsWNhLPRAGsl.......GIGCQEQVQLE.... 0 148 148 148 +14544 PF14693 Ribosomal_TL5_C ShortName; Ribosomal protein TL5, C-terminal domain Punta M mp13 CATH:1feu_A_02 Domain This family contains the C-terminal domain of ribosomal protein TL5. The N-terminal domain, which binds to 5S rRNA, is contained in family Ribosomal_L25p, Pfam:PF01386. Full length (N- and C-terminal domain) homologues of TL5 are also known as CTC proteins. TL5 or CTC are not found in Eukarya or Archaea. In some Bacteria, including E. coli, this ribosomal subunit occurs as a single domain protein (named Ribosomal subunit L25), where the only domain is homologous to TL5 N-terminal domain (hence included in family Pfam:PF01386). The function of the C-terminal domain of TLC is at present unknown. 27.00 27.00 29.80 28.70 26.80 26.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.84 0.72 -3.78 195 2672 2012-03-08 13:20:59 2012-03-08 13:20:59 1 2 2591 91 749 1875 2003 88.30 28 43.06 NEW EclplcVPlphhG..-...usGhp.t.G.Gllpp.hpplplpsh.PsslP-plpVDlssL.c.l.GsslpluDl..p..l..P...p..G..lpl.s.......s.....-...s-t......sllolssspstc ................plplcVPlphhG..c...usGlK.p.G.GllppshpplcVpsh.spslPEtl-lDlosL.c.l.G.cslpluDl..p..l..P...p..G..lpl.h...............s..c...s-.......slsslhtsp......................... 0 274 534 658 +14545 PF14694 LINES_N Lines N-terminus Eberhardt R re3 Jackhmmer:Q8NG48 Family This family represents the N-terminus of protein lines [1]. In Drosophila this protein is involved in embryonic segmentation and may function as a transcriptional regulator [2-3]. 25.00 25.00 27.70 27.70 24.00 24.00 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.26 0.70 -5.17 9 107 2012-03-08 13:57:56 2012-03-08 13:57:56 1 4 78 0 67 108 0 284.60 32 45.46 NEW hslohsDsESa.DooplKs.slKtLts+WssLlcth.spllpp.........ssspstsslloFLcLWEslISV..KANLSlh-T+PFaupLsphlhLLssslsshla+phLsLFN........EsLCYGSTLALQ-...hLs--ssuLActll+sV+chRlL-pLPhpptsu.................................phuuupus............sshD+sLLQKMuLLVLKSlAlpl+EhR.ssSSDSSlcSpc.D..t-hthIpRSIR-VLppL-salKspLpFHP-sphuchLl+lFtDQDD.hlEAMVCoLDltsGloa.cssAsssLs.........th....LNPshoFltFL+hluaDocVLLDhLlSsETCFLLYLLRaLKalRcNWshFlpsCt ............................................................................................................................hlshh.pE.h..Dost.Ks...hlpph.p.asshhcsh.splh.s.p.............tsth.p.lhsFL.pLhE.llus....thpLpht....pp+.hahp.sth..ll.sh.s.l.shlh+phlhhhp........EsLsh..G..S..s...s..L.s.......hls.-hhtLAptllptV.shthLcplshpt..s.....................................hGsspst...st..t..t.DpslLpthsLlllKSltlphp....sso.S..ssp..........hc..s............lpp.hpclLthL.c.hlp.php.p.......p.schl....plF....-QDDphlEuhhs.Lslh.....t...lsh..ttps.h...pLs.......................th.........hNPhhhFl.hLc.huasss.lL.LDhLlSsE...TC..FL.YhlRaLKhlpts.Ws.Fhp.C................ 0 21 27 46 +14546 PF14695 LINES_C Lines C-terminus Eberhardt R re3 Jackhmmer:Q8NG48 Family This family represents the C-terminus of protein lines [1]. In Drosophila this protein is involved in embryonic segmentation and may function as a transcriptional regulator [2-3]. 27.00 27.00 37.70 36.80 21.40 19.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.76 0.72 -4.71 20 93 2012-03-08 14:10:50 2012-03-08 14:10:50 1 4 76 0 64 97 0 38.70 47 5.50 NEW cshssLlcL+huIpRLhpKsLFPYNssPLLR..LLcpsEpL .....cshpsLhcLphsIsRLppKsLFPYNsssLL+..LLcphEsl..... 0 17 24 42 +14547 PF14696 Glyoxalase_5 Hydroxyphenylpyruvate dioxygenase, HPPD, N-terminal Coggill P pcc CATH:1cjx_A_01 Domain This domain is one of two barrel-shaped regions that together form the active enzyme, 4-hydroxyphenylpyruvic acid dioxygenase, EC:1.13.11.27. As can be deduced from the disposition of the various Glyoxalase families, _2, _3 and _4 in Pfam, Pfam:PF00903, Pfam:PF12681, Pfam:PF13468, Pfam:PF13669, these two regions are similar to be indicative of a gene-duplication event. At the individual sequence level slight differences in conformation have given rise to slightly different functions. In the case of UniProt:P80064, 4-hydroxyphenylpyruvic acid dioxygenase catalyses the formation of homogentisate from 4-hydroxyphenylpyruvate, and the pyruvate part of the HPPD substrate (4-hydroxyphenylpyruvate), derived from L-tyrosine, and the O2 molecule occupy the three free coordination sites of the catalytic iron atom in the C-terminal domain. In plants and photosynthetic bacteria, the tyrosine degradation pathway is crucial because homogentisate, a tyrosine degradation product, is a precursor for the biosynthesis of photosynthetic pigments, such as quinones or tocopherols [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.70 0.71 -4.24 17 1091 2012-10-02 15:00:03 2012-03-08 15:28:49 1 6 816 6 336 996 193 140.50 36 33.00 NEW FENPhGl-GFEFVEFuuP-..s..ttLcslFctMGFTtVA+HRSKDVsLYRQGsINFIlNtEPcS.AthFup-HGPuACuMAFRV+D.AppAYccAl-hGApPlpht.sGsMELplPAIKGIGGuhlYLlDRas-us..........SIYDlDFE ................................................hthsGhEFlE.F.s..s..ss...s...pp..Lt..t.la.p.p.h.GFstl......A......+......H......Ro...K..s..l..h..La..RQG....s..Ishll.......N......u................-..P...c..S....h..AppF....s...p..pHGPulCuhAhRVcD.AppAap+AlphGA....t....s....h....p....s....t....s....u....s.t....E....Ls......l....P.A.I.p.G.l...GsSllYhVDR..h..tt..t..........slY-.DF.......................................................... 0 72 164 254 +14548 PF14697 Fer4_21 4Fe-4S dicluster domain Coggill P pcc manual Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 35.00 35.00 35.00 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.04 0.72 -4.11 63 3437 2012-10-03 08:56:43 2012-03-08 16:17:41 1 136 2521 25 833 2684 375 57.90 41 22.32 NEW AhIcp-tCIsCs+CahAC.DsuapAI.......t..h...s..sc..p.....t..h.......s......l.l...p.-.-.CsGCsLCsslCPl...-CIsM ...........................hIsp-pCI......G...Cs.p..Chp.A.CP.......l.cAI..........................h.....s...s.c...p........h...p.................................s......l.h....s..-..pC.s.G..CpLCsssCP..s..cCIp............. 0 245 478 670 +14549 PF14698 ASL_C2 Argininosuccinate lyase C-terminal Eberhardt R re3 CATH:1k7w_A_03 Domain This domain is found at the C-terminus of argininosuccinate lyase [1-2]. 24.00 24.00 24.40 24.10 23.50 23.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.29 0.72 -3.90 617 4020 2012-03-09 08:50:06 2012-03-09 08:50:06 1 16 3666 53 1083 3103 1579 70.50 40 15.17 NEW assATDLADYLV+.+.GlPFR-AHclsGphVt......hu.pcs..hs.Lp-LoLc-hpp....h.....s......shl.......s.p....D...lap.sLsl-suVsp ...........ausAT-LADYLVpK.GlPFR-AHclVGcsVh..............huhppG....hs...Lp....-...LsLp-hpp........h.........s......shl...............s..p....D........lap.lLshcsslp..................................................... 0 329 677 908 +14550 PF14699 hGDE_N N-terminal domain from the human glycogen debranching enzyme Godzik A adam Jcakhammer:GDE_HUMAN Family this domain is found on the very N-terminal of eukaryotic variant\ of the glycogen debranching enzyme (GDE), where it is immediately followe by the aldolase-like domain. The eukaryotic GDE performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33), performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzyme hGDE_N domain is involved in the glucosyltransferase activity, probably\ as a substrate binding module (by analogy to other glucosyltransferases) 22.10 22.10 22.70 26.40 21.30 20.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.97 0.72 -4.14 68 237 2012-03-09 12:36:43 2012-03-09 12:36:43 1 12 206 0 168 232 0 94.70 30 6.56 NEW pLRFt..G.u.S.lsRcGslasNhPtsup..pFcRsp..apphclp.............s.......s...........hspshplclslppsGuFsaYhsY.......................csp........cosphYhsVsPtLplsu .........................LcFt..Gs.S.lu+psslaoNh.....P.......pGp..pFpRsc..F+phphp................s............s.................hspchhhcl.s.lppuGuFpaYhsa..........................sspt......tcosshYlsVsPtLpls.s............ 0 45 81 132 +14551 PF14700 RPOL_N DNA-directed RNA polymerase N-terminal Eberhardt R re3 CATH:1msw_D_01 Domain This is the N-terminal domain of DNA-directed RNA polymerase. This domain has a role in interaction with regions of upstream promoter DNA and the nascent RNA chain, leading to the processivity of the enzyme [1]. In order to make mRNA transcripts the RNA polymerase undergoes a transition from the initiation phase (which only makes short fragments of RNA) to an elongation phase. This domain undergoes a structural change in the transition from initiation to elongation phase. The structural change results in abolition of the promoter binding site, creation of a channel accommodating the heteroduplex in the active site and formation of an exit tunnel which the RNA transcript passes through after peeling off the heteroduplex [2]. 25.00 25.00 27.60 25.90 24.40 23.80 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.02 0.70 -5.17 126 554 2012-03-09 13:37:42 2012-03-09 13:37:42 1 14 381 20 280 574 278 279.60 19 28.13 NEW p+QhpLEpcuh..ctAhc+a+cthc....chpcp.uh..shsh........lpslhhpWapsLtstlpc-hc...............h..........................................ptppptpstassaLp....................hlss-chAsITlppllshhs.sts..httsh...................plsphshslGculEpEhchpphhcp........t.p.........ppppttpp.................................+php............phlcppphpt...........htthhttt................................................Wspss...ps+lGuhLlphLhc.s........uhlpsst.......tssts..........t..PAFhHthph.........sp+h.Ghlchsstlhch.....lscpstt..h......s.ah......P....MllsPcPWs..uhspGGYh.hht...o.lhRs+ss.........tppphhhpshpp.splp.......pVacuLssLGp ...................................................................................................................................................tQhthE..psh..ttuhtchpp..p.......ph.ph..t........h......hpthhhpWht.lhttlppphp........h..................................................................................tttttp..hhsalp....................hlsscp.h...u.hlshhphhthhh.stt...................................................phhphhhtlGptlpp-hph.pphhpt........................t.p.h........t...ppt...t............................................pthh.....phhptt.t.tt....................................................................................................W.s.t...th...phclGshLlphLlp.s.........shlphpt.........tt..s...............................................t..sshhp.hph.t.............spph..shlchps.lhph......l....pppsh...............s.hh.......PMlsPPpPWs..u.hppGG..Yl..ht.........stlhRspt...........ppp.hhtthtp...sp.hp.......tlhcuLshLup.................................................. 0 86 164 238 +14552 PF14701 hDGE_amylase glucanotransferase domain of human glycogen debranching enzyme Godzik A adam Jackhammer:GDE_HUMAN Domain this is a glucanotransferase catalytic domain of the eukaryotic variant of the glycogen debranching enzyme (GDE).\ \ The eukaryotic GDEs performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33),\ performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzymes. hDGE_amylase domain is a catalytic domain responsible for the glucanotransferase function. It belongs to the alpha-amylase clan and is predicted to have a structure of a 8 stranded alpha/beta barrel (TIM barreal) where strands are interuppted by long loops and additional mini-domains. In most other amylases, the catalytic domain is followed by a beta- barrel substrate binding domain, but presence of such domain cannot be verified in the human (and other eukaryotic) GDE enzymes 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.38 0.70 -5.57 65 321 2012-10-03 05:44:19 2012-03-11 07:14:19 1 22 258 0 228 1087 85 363.10 35 27.98 NEW c.LPLsulsl.oVlSKahGs.h..s.-W-c+lpsluc+GYNMlHFTPLQpRGtSNSPYSIaDQLpaDsshFss.......s..p........pDVpphlc...chcc-..aulLSlTDVVaNHTAsNSpWLpEHPEuGYNhpTuPHLcsAhELDssLlcaScpL.....pph.G..LPsplco.p-DLtplhssl+pcVlspl+LWEaYslDVccsscphhctap...........ssp.........s.shs...p...s............................s.hpp.pt.....ph...............lpc..t............hhp....RaupclDsp....h....sh....sl.hsph.....tps....ssc......h...cpstp..phpclL-clNlshYcEaDsDlppIlpplhsRI+YhRl--cGPKLGs.loccsPLlEsYFT.+..h.p.pt.p.............ptpphsLANNGWlWs.usPLhDFAu.sp...S+uYLRREVIVWGDCVKLRYGspP-DsPaLWc+MscYochhA+hFsGFRIDNCHSTPLHVAEalLDtAR+VpPsL ...............................................................t..lPLsslsl.ohluKhhG...h....s.cWc....p....cL.phhpc...pGY...NMl...HF...T...P.l......Q.........p...h......G......t......S......p.........S..sYSlhDQLphssphh.s............................sp...........p-l.tphlp...ch...cpc.....aslLsloDlVhNHT.AsN...SpWLp...-...HPEs..uYNhhsu....PaLp....sAh.LDptLhphuppl....................pph...GlP.s....l.cs..p.cl.........tlhphltpplhsplcLWE..aa.l-Vppthp.thhp.hp...ptp.............p.....p...........................................................s..p..hph............lpp.................hpRhsppls.p.h....sh....thh..h........ttt.......st......ppstphhpphlctlN..hhpphp.t.chp...............thlpplhsplpY.RlsspGP+lG..lopc.p.P...LhptY...FTh..tp..........................pttphhhApNGWlhs.sssLhshAt.st...SpsYLRRElIsWGDsVKLRYG.ppP-DsPaLWpaMpcYschhAphFpGhRlDNCHSTPlHVuEahLDtARclpPsL................................................................................................ 0 81 124 184 +14553 PF14702 hGDE_central central domain of human glycogen debranching enzyme Godzik A adam Jackhammer:GDE_HUMAN Domain this is a central domain of the eukaryotic variant of the glycogen debranching enzyme (GDE). The eukaryotic GDE performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33), performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzyme The hGDE_central domain follows the glucanotransferas domain and precedes the glucosidase (GDE_N) domain. It is very likely that the current definition contains two or more domains, by analogy with baterial GDEs, this domain should be involved in substrate binding either for the N-terminal glucanotransferase and/or the the C-terminal glucosidase (or both) 19.80 19.80 20.70 20.40 18.80 17.30 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.78 0.70 -5.21 58 283 2012-03-11 08:10:09 2012-03-11 08:10:09 1 19 228 0 207 282 0 236.10 34 16.88 NEW GIuslKphLNcLHpphupcua-.....EsalHp.-scaIslpRhsPcTp+GhhLlA+TAFssssss......tslsP.lpLsG.opschlhuhpL......pttssp..........c............hhpDcphLpGlPsplh-lps.............th..pt.tppstlhh..psFsPGSIhlFcTplssss....t.Lcp.l..................................hps..ustcAhspLsLhDLNhlLYRC-sEE+D..ss.Gps...GsYsIPs.aGpLVYsGLQGWhSlL.cpIhtpNDLGHPLCspLRpG..pWAlDYllsRLp .................GIhssKphLNplHpphutcGas............psalcp..-sphl.sVpRhpPpo+puhhllA+TA.Ftsspss......ttlss.hpl..sG.phtchlhphpl.........t.p.ssp..........t........................hhpDpphlpGlPsh.hhplpt.h..........hth..ps.tp.sp.h.hpphsPGSlhlFcsplss...ts......s.Lpp.l....................................hpp..shtph.hsp...LsLh-.LNhlL...YRC-sEEp-.s..ttsGsYsIPs...aGtLsYsGLQ..........GhhSl.L.pcIh....p.NDLG..HPlCs.NLRsG..pWhlDYlssRL................................... 1 73 112 168 +14554 PF14703 DUF4463 Domain of unknown function (DUF4463) Godzik A adam Jackhammer:Q9P1W3 Domain This is a cytosolic (predicted) domain present in integral membrane proteins, such as TM63C_HUMAN TRANSMEMBRANE PROTEIN 63C. This domain usually preceeds a DUF221 (PF02714)domain and follows a RSN1_TM (PF13967) Fold recognition programs consistenly and with high significance predict this domain to be distantly homologous to RNA binding proteins from the RRM clan. 22.80 22.80 22.80 22.80 22.70 22.60 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.76 0.72 -3.46 198 880 2012-03-11 09:12:07 2012-03-11 09:12:07 1 21 225 0 638 834 0 88.20 22 10.30 NEW lsp.sspcLpcLlccRcphhppLEtshsch...h+ps.p....th.ptps..ptph..........................................t....hh..shhs..cKVDuI-ahppclpcLs .............................................................................................lshsspcLtcLscc.RcchhppLEth.sch...h+psp.....th.ptcs..ptph................................................................................................................................t..........hh..shhu...c+VDuI-ahppclpcL..................................................................................................................... 0 150 321 507 +14555 PF14704 DERM Dermatopontin Eberhardt R re3 Jackhmmer:Q07507 Family Members of this family mediate cell adhesion via cell surface integrin binding [1]. They also induce haemagglutination and aggregation of amebocytes [2-3]. 23.00 23.00 23.40 23.00 22.90 18.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.01 0.71 -11.30 0.71 -4.25 29 166 2012-03-12 10:48:15 2012-03-12 10:48:15 1 26 61 0 112 165 2 137.80 28 24.10 NEW NsacpshsapCssspulsslpShasN.tppDRRWsFsCsshus.t.ss.sssphsu...hNp..acpshsasC.....ssNhalpuhpStassstc......DRhWpatCpch..sss...ph..psCh.ps..sahNsacsslsas...........................s...ssspsIsGltS................hasss..pcDRcW+hpsCpl...sC ..................cpshsapCsp.spslstlpShasp.ptpDR...Wsa..t..Ctssst......tp.....ssp.shhp......lNp.....hs......tthshsC.....sss.thlsGhpSha..ssh..............DRcWpFhCsch..sst..................sshsCh.to..sasN..p.aspphshh...........................s.s.shhlpGhto.....................................hasss..pcDRpWpahh..Cph................................ 0 78 82 94 +14556 PF14705 Costars Costars Eberhardt R re3 Jackhmmer:Q9P1F3 Domain This domain is found both alone and at the C-terminus of actin-binding Rho-activating protein (ABRA). It binds to actin, and in muscle regulates the actin cytoskeleton and cell motility [1-2]. It has a winged helix-like fold consisting of three alpha-helices and four antiparallel beta strands. Unlike typical winged helix proteins it does not bind to DNA, but contains a hydrophobic groove which may be responsible for interaction with other proteins [3]. 24.00 24.00 24.80 24.20 23.70 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.44 0.72 -3.99 49 232 2012-03-12 11:46:56 2012-03-12 11:46:56 1 6 126 2 163 217 1 77.80 45 36.18 NEW hpVpcEltpLsphIcchGp........p..sss.GphsVpFGhLF..-phssl.-........t..LVGsLhsA+K+plVcF-GEhLhQGtcDcVsIsLL ..................................plp+EltpLsp.IcphGp.......c.stD..GphpVpFG.LF..-+hspI...-........t......lVGhLhtA+K+clVsF-GE.hLhQGtcDcVsIsLL.................. 0 52 71 117 +14557 PF14706 Tnp_DNA_bind Transposase DNA-binding Eberhardt R re3 CATH:1mus_A_01 Domain This domain occurs at the C-terminus of transposases including E. coli tnpA (Swiss:Q46731). TnpA encodes a transposase and an inhibitor protein, the inhibitor only differs from the transposase by the absence of the N-terminal 55 amino acids, which includes most of this domain [1]. This domain consists of alpha helices and turns, and functions as a DNA-binding domain [2]. 24.00 24.00 25.50 24.50 23.20 22.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.68 0.72 -4.25 54 483 2012-03-12 14:11:11 2012-03-12 14:11:11 1 8 172 5 115 435 60 55.90 43 15.95 NEW sWAppEhtpssLGDtRhscRLlplsppL..uppPstSlPpusp.shApscAAYRFhsNppl .....tWApcphupAcLGDsRRs+RLlsLussL.......Ap+sGhSIspusp.shApscuAYRhlcNspV.............. 0 23 63 79 +14558 PF14707 Sulfatase_C C-terminal region of aryl-sulfatase Coggill P pcc CATH:1auk_A_02 Domain \N 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.93 0.71 -3.86 86 772 2012-10-03 20:55:17 2012-03-12 17:14:39 1 9 252 8 368 739 322 119.10 24 23.28 NEW Ss+c.....hlFaYsust.............LhAVRhtp.......aKAHahT.shtspss..........................s.s.st.tlptH-P.PL.LFcLspDPuEphPL................ssps....spatpllpp.......lppshcpHppol.........s.s.sQhshhs.....hhhPhhpstp..h...s.ss.C .................................+c..l.Fa.Y.s.sst.............LhAlRhtp.........aKsa...Fho.th..pspst....................................hhtp..hsppcs..PL...lFcLppDPtEph.s.l.................ssts..............s.a.ph.ltp.......hpthhtpa.psh...................h......................................................................................................... 0 105 153 227 +14560 PF14709 DND1_DSRM double strand RNA binding domain from DEAD END PROTEIN 1 Godzik A adam Jackhammer:Q8IYX4 Domain A C-terminal domain in human dead end protein 1 (DND1_HUMAN) homologous to double strand RNA binding domains (PF00035, PF00333) 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.55 0.72 -3.69 24 349 2012-10-02 17:51:51 2012-03-13 07:27:26 1 29 109 0 202 525 123 73.20 32 9.36 NEW psAsphLcplCpKNpWGpPhYplp.ppsGPcthhh.FhYKVhlPuhss...s....h...sschsssh--AKphAAphsLppLs ......tssphLp-lCpKppWssPpahLh..pssGPcppph.Fla+Vhlsuhsh.......................hh..sschpsthcpAKphAAphsL.tL....................... 0 40 70 122 +14561 PF14710 Nitr_red_alph_N Respiratory nitrate reductase alpha N-terminal Eberhardt R re3 CATH:1q16_A_01 Domain This is the N-terminal tail of the respiratory nitrate reductase alpha chain. The nitrate reductase complex is a dimer of heterotrimers each consisting of an alpha, beta and gamma chain. The N-terminal tail of the alpha chain interacts with the beta chain and contributes to the stability of the heterotrimer [1]. 24.00 24.00 24.30 25.30 23.80 23.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.01 0.72 -3.71 73 1502 2012-03-13 09:33:31 2012-03-13 09:33:31 1 5 1099 12 150 902 20 37.10 58 3.07 NEW HFLDRLpaFp+t......p.-sFusGHGtsssEsRsWEcuYRpRW ......+hLDRhRYFKp+.......s.ETFADGHGQlhpssRDWEDuYRpRW. 0 32 70 112 +14562 PF14711 Nitr_red_bet_C Respiratory nitrate reductase beta C-terminal Eberhardt R re3 CATH:1q16_B_03 Domain This domain occurs near the C-terminus of the respiratory nitrate reductase beta chain. The nitrate reductase complex is a dimer of heterotrimers each consisting of an alpha, beta and gamma chain. This domain plays a role in the interactions between subunits and shielding of the Fe-S clusters [1] 27.00 27.00 28.70 28.60 22.20 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.87 0.72 -4.46 141 1906 2012-03-13 09:34:22 2012-03-13 09:34:22 1 7 1444 12 235 921 18 80.90 52 16.13 NEW LSPlhssspussh.....s......slhPslcsLRIPlcYLANLhTAGDstPVttuLcRhhAMRuYMRupslstp.D...tslhcpVGLotp .................................LSPIpShs-AGsh...sss..............ulLPsl...-oLRIPVpYLANLLTAGDTtPVhpAL+RMhAMRpYMRupsVcs....hsD....spsl-cVGLot.t............... 0 59 130 189 +14563 PF14712 Snapin_Pallidin Snapin/Pallidin Eberhardt R re3 Jackhmmer:O95295 Family This family of proteins includes Snapin, this protein is associated with the SNARE complex, which mediates synaptic vesicle docking and fusion [1]. It also includes the yeast snapin-like protein SNN1, which is a part of a complex involved in endosomal cargo sorting [2]. The family also includes pallidin, a component of a complex involved in biogenesis of lysosome-related organelles [3]. 25.50 25.50 25.70 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.17 0.72 -3.68 53 266 2012-03-13 14:00:15 2012-03-13 14:00:15 1 10 143 0 176 242 0 88.10 27 51.76 NEW tptLupG..lhplhpPslcphcsplppltpsQppLtpplcpLspcLcchpc.pp.s.s..hshsp.YhpKLhsl++clhslpphlpplpcRht+lpp ...................h.p.lupG..lhshhtPslpphcpplpplpps........QstLhppl-p.s....sc...Lpchpp.tphs.h..hshsp.YhpKLhsl++chhhlpphhpplpcR.htcLp........................... 0 61 86 132 +14564 PF14713 DUF4464 Domain of unknown function (DUF4464) Eberhardt R re3 Jackhmmer:Q6V702 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 224 and 241 amino acids in length. There is a conserved YID sequence motif. 27.00 27.00 53.90 44.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.45 0.70 -4.95 44 173 2012-03-13 14:47:19 2012-03-13 14:47:19 1 4 102 0 115 188 5 188.30 38 93.70 NEW hhs.slhpFsTYEDYLDShlo.pDhhYLpspclsRpLlc..LG.aRssupllpR-EFttp+pthctthps...................plsuh...Gcpl..pst.PhLpALApREchshstcLoTIIFlc.css+GpEISGYIDasppL+s..........psacshFpG+++........LhPpsoDLSaaNWcoppshhNsSsNapVls-st.pGLlF+pKtD+KlIsVssps..s.u-NspRsh.lposhYspVVlYDHhsR++s ......................lhpFtsY--YLcS.lo..DhhYLtspphh+pllc..LG.h+sptphhpc--F.tt+ttht.th.s................................htt........up.h..ps..shLhtLAtREc.shptpl...............ooIlFlp.pspp.G.ElSGYIDasppL+p............psacshFpt+++........LhPp.sDLSaasWcsphsh.NsosNa...........pVlscs....GLlFppKtD+KhlsVsspt....ucNspRp..l..o..Yh.slhaDHhhR+p.......................................... 0 45 55 85 +14565 PF14714 KH_dom-like KH-domain-like of EngA bacterial GTPase enzymes, C-terminal Coggill P pcc CATH:1mky_A_03 Domain The KH-like domain at the C-terminus of the EngA subfamily of essential bacterial GTPases has a unique domain structure position. The two adjacent GTPase domains (GD1 and GD2), two domains of family MMR_HSR1, Pfam:PF01926, pack at either side of the C-terminal domain. This C-terminal domain resembles a KH domain but is missing the distinctive RNA recognition elements. Conserved motifs of the nucleotide binding site of GD1 are integral parts of the GD1-KH domain interface, suggesting the interactions between these two domains are directly influenced by the GTP/GDP cycling of the protein. In contrast, the GD2-KH domain interface is distal to the GDP binding site of GD2. 25.30 22.20 26.70 23.70 25.20 22.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.45 0.72 -4.02 596 4526 2012-03-13 15:50:54 2012-03-13 15:50:54 1 12 4425 2 1039 3025 1279 80.60 40 17.34 NEW RlsTupLNchlpcslttp..s.PP.s...p...G.+c....lKlhYsTQ......ss.spPPsFllF...sNp..schlptoYpRYLcNplRcsF.sh.pGoPl+lhhR .............................RlsTuhLNcllppA..lttps.PP.s..p...G..+R...lKlhYAoQ.......su.spPPsFVla...sN.p.schlp.SYcRYLpNplRcs.F.sh.pGTPI+l.h+................ 0 365 699 892 +14566 PF14715 FixP_N N-terminal domain of cytochrome oxidase-cbb3, FixP Coggill P pcc Pfam-B_28684 (release 26.0) Domain This is the N-terminal domain of FixP, the cytochrome oxidase type-cbb3. the exact function is not known. 24.90 24.90 26.00 25.30 23.80 23.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.82 0.72 -4.57 213 1066 2012-03-13 17:05:29 2012-03-13 17:05:29 1 15 961 0 306 849 102 52.10 41 16.65 NEW tssspsoGHsWD...G....IcEhsNPLP+WWhahFhhTIlaulsYhlh.YPuhshhpuh ...........stpspssGHpaD...G.....IcE.hs.....NPLP+WWhhhFhsTIlaAlhYhlh.Y.Puhu.hpu.h......... 0 83 189 248 +14567 PF14716 HHH_8 Helix-hairpin-helix domain Coggill P pcc CATH:1bpe_A_01 Domain \N 30.00 30.00 30.10 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.15 0.72 -3.79 351 1491 2012-10-03 02:11:09 2012-03-13 17:38:04 1 43 1116 210 618 1303 285 67.70 29 13.24 NEW Npcl..s.phL.pclAphhEhpt....ts..sa.+spAY++AApslcs.h.sp.slss...h...pt...tl.....pp.lsGlG...cslApcI.pEhlp ....................ppl.h...chL.cplAphhEhpt....ts..sa+lpAYR+AApslcs.h.sc..slsph....p-.........h........pp.lsGI.G......cphAphI.pEhl........... 0 202 354 498 +14568 PF14717 DUF4465 Domain of unknown function (DUF4465) Godzik A adam JCSG structure SP13250B Family A large family of uncharacterized proteins mostly from human gut bacteroides, but also some environmental and water bacteria (Planctomycetes) as well as metagenomic samples Most proteins from this family are secreted or located on the outer surface and may participate in cell-cell interactions or cell-nutrient interactions This function is supported by a solved structure of a Bacteroides ovatus homolog, which adapts a galactose binding (jelly-roll) beta barrel structure 25.00 25.00 63.10 62.60 22.00 21.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.50 0.71 -3.86 22 101 2012-03-13 22:58:36 2012-03-13 22:58:36 1 7 65 1 16 68 17 185.20 31 39.28 NEW ssY......spaG.......t.h......uGauhostT-ss.....ossass...p.huuhsGtGps....sssh.hh....s.sas.uhst...shlth.........sshphcGhalTNooYshhshhpGssh.........ut.......tshp..c.s.DaFplolhGa..c.....ss.....ssl-hhLADaR..ssp...p.lVssWpahDLosLusssplpFphpSSDs..Gp.aG.hNTPuYFslDsl ......................h....sphG...t.....s.uuashSthsp.t......sssass...p.hus.h.....sts.Gps.......Gs.sa.sll...hGYssuaspt.......st.chh......................ssstplpGlalsNToYsYsshppGspa.............................sth..........sshpc.s.saFplslhshD.......ssG..hhpshchhLADYR.ssp.......s..lssWpahDLsul..s..s.VpslcFshcuSDs..us.YG.hNTPAYhClDcl.... 0 13 16 16 +14569 PF14718 SLT_L Soluble lytic murein transglycosylase L domain Eberhardt R re3 CATH:1qsa_A_02 Domain Soluble lytic murein transglycosylase (SLT) consists of three domains, an N-terminal U domain, an L domain (linker domain) and a C-terminal domain (C). The L domain may be involved in the interaction of the enzyme with peptidoglycan [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.33 0.72 -4.18 96 1072 2012-03-14 10:47:09 2012-03-14 10:47:09 1 2 1057 3 180 738 113 70.40 40 10.99 NEW .pttlttpssltRlpELhthsc.........stA+pEWthLlsphs....p.p.p........pt..tLAtaAtcppWachuVpAoIpuKhWDpl ............................ss...tlspsPphARlcELhhhsh........cssARuEWspLlcstS....c.s.E........pttLA+YAasppWaDLoVpATIsuKhWDpL.......... 0 31 85 137 +14570 PF14719 PID_2 Phosphotyrosine interaction domain (PTB/PID) Coggill P pcc Jackhmmer:Q6ZT52 Domain \N 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.44 0.71 -4.64 10 195 2012-10-04 00:02:25 2012-03-14 13:07:17 1 2 88 0 123 471 1 164.10 34 47.36 NEW TYsVlYLGNVLTlhAK.GEGCl-KPLupIW+sYspp.+.sslpMKLsVosSGLKAsTcc.........+GLTEYWuHRITaCsAPscaPRVFCWlYRHEG++hKsELRCHAVLCpKpppAppluppLpppLpsALpEaKREKlp.....tQpA+Lshu.slhs..sPs.lP+RKLL..sGs..pNaRP.PVERSKSAPKLuSI-E- ...........................................apVhYLG..ps..sT.....ht.sp..G.cG.CT-csV.sc....l....W.....p+..p......p...u......+.....tss.....ph...c...Lplss...pGl..+h.p..p...h..-tt...............pps....h.c..t..Y..h..ltRIoYC.sA.D..tp.h.P+lFAWl..............YR.H........p.s....cc..............h....s..h........LcCHAVls.p+ttcActlAhhLhpshtpA..h..p.-.a.K..+.p+.........+ppt....................................................................................................................................... 0 38 48 82 +14571 PF14720 NiFe_hyd_SSU_C NiFe/NiFeSe hydrogenase small subunit C-terminal Eberhardt R re3 CATH:1wui_S_02 Family This domain is found at the C-terminus of hydrogenase small subunits including periplasmic [NiFeSe] hydrogenase small subunit, uptake hydrogenase small subunit and periplasmic [NiFe] hydrogenase small subunit. This C-terminal domain binds two of the three iron-sulfur clusters in this enzyme [1-3]. 27.00 27.00 31.20 30.90 23.30 22.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.47 0.72 -3.95 163 2101 2012-03-15 09:29:21 2012-03-15 09:29:21 1 5 1192 65 410 1124 51 85.00 50 22.89 NEW aupplH-s.C.RpsaF-tGtFscpaG-.ttt.GhCLaclGCKGPhTassCsphtWNt.ss..uaslpuGpPCIGCoEPsF.Dphssahpt ...........YGppIH-p.C.RRsHFDAGcFscpasD-utRpGaCLY+lGCKGPpTYssCSohcaNs..Gs..uaPlpuGHsClGCsEsGFhDctu.a...hh.................................. 0 144 296 359 +14572 PF14721 AIF_C Apoptosis-inducing factor, mitochondrion-associated, C-term Coggill P pcc Jackhmmer:JCSG-Target_422903 Domain This C-terminal domain appears to be a dimerisation domain of the mitochondrial apoptosis-inducing factor 1. protein. The domain also appears at the C-terminus of FAD-dependent pyridine nucleotide-disulfide oxidoreductases. Apoptosis inducing factor (AIF) is a bifunctional mitochondrial flavoprotein critical for energy metabolism and induction of caspase-independent apoptosis. On reduction with NADH, AIF undergoes dimerisation and forms tight, long-lived FADH2-NAD charge-transfer complexes proposed to be functionally important. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.78 0.71 -3.91 23 200 2012-03-15 13:49:27 2012-03-15 13:49:27 1 9 126 9 116 198 7 98.30 44 20.11 NEW AGENMTGAtKPYhHQSMFWSDLGP-lGYEAIGlVDSSLPTVGVFAKuospD.......sP.cusscsossslpscspspussssstsssps.sss.t.......tt....-DaGKGVlFYLRDcpVVGllLWNlFNRhslAR+llp-s ....................................................................AGENM.sG.A...t.......K..P..YhHQSMFW.SDL.GP-lGYE..AIGllDSS.L.sTVuVaAcsotpp.........p....tt..t...............................................tcpasKGVlFYL..+.sc..hVVGllLWN...lFs.+.hslARplltp.s...................... 0 49 64 92 +14573 PF14722 KRAP_IP3R_bind SSFA2_N; Ki-ras-induced actin-interacting protein-IP3R-interacting domain Eberhardt R re3 Jackhmmer:P28290 Family This family includes the N-terminus of the actin-interacting protein sperm-specific antigen 2, or KRAP (Ki-ras-induced actin-interacting protein) [1]. This region is found to be the residues that interact with inositol 1,4,5-trisphosphate receptor (IP3R). KRAP was first localised as a membrane-bound form with extracellular regions suggesting it might be involved in the regulation of filamentous actin and signals from the outside of the cells [1]. It has now been shown to be critical for the proper subcellular localisation and function of IP3R. Inositol 1,4,5-trisphosphate receptor functions as the Ca2+ release channel on specialised endoplasmic reticulum membranes, so the subcellular localisation of IP3R is crucial for its proper function [2]. 27.00 27.00 28.10 27.20 21.70 25.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.88 0.71 -4.66 8 175 2012-03-15 14:06:43 2012-03-15 14:06:43 1 4 68 0 90 165 0 137.00 44 18.07 NEW .hhs+GpShNSou.SusousTsSSlSElL-hapEDsEElLasLGFGpDEPclso+IPuRFFsssSsA+GIsh+lFLcuQlpRIchEsPshuLsSRFRQlElLssVANAFSSLYSaVStpPsQKlussch.......E.hshssPlp+pS....p...cspusht+ltsp...s ..........................................hhphGpShsSst.st.pouussS.SlsElL-hhccDPE-ILhsLGFG.....p.....-.E.sDlsS+IPuRFhsssStA+GIDhclFLpAQh.pRh-hEsPs..LhuRF+QlphLshsssAF.sLhs.VSths.pt................hs....shphss......................s.............................. 0 12 20 45 +14574 PF14723 SSFA2_C Sperm-specific antigen 2 C-terminus Eberhardt R re3 Jackhmmer:P28290 Family This family includes the C-terminus of the actin-interacting protein sperm-specific antigen 2 [1]. 25.00 25.00 25.70 25.70 22.00 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.14 0.71 -4.54 5 106 2012-03-15 14:07:22 2012-03-15 14:07:22 1 3 38 0 47 107 0 157.40 49 15.79 NEW HhsospTHSVPhsSGh.....uuSsHhoPAuCsaSs+HsPaPapspusssPssshst.......tplEMQLRRVLHDIRsThQNLuQ.s.h+G.Dhshs..hsT.p.SVpPLYEsThpELQshRRsLNlFRTQMMDLELAhhRQQoLVYpHMSEEERcEA-QLQTLRcAVRQELQELEhQLEDRLLuIcEQl .....................................................................Hhtp..T+SVs..SGh.....ssosassPhtCshsp+H.tsasa.p.pssss..Pss...............lEhQLp+sL+slpso..........................oV.h.Lhp.sThpEhpsMRpsLshFRpQhh-LEhshhcQQshVY+HMoEEERhEs-QLQoLRpuVR.Elp-LEhQLp-RlhtlcEpl........... 0 3 7 16 +14575 PF14724 mit_SMPDase Mitochondrial-associated sphingomyelin phosphodiesterase Coggill P pcc Jackhmmer:Q9NXE4 Family The GO annotation for this family indicates that it is a single-pass membrane protein, and it appears to be found in mitochondrial membranes. Sphingolipids play important roles in regulating cellular responses, and although mitochondria contain sphingolipids, direct regulation of their levels in mitochondria or mitochondria-associated membranes is mostly unclear. Sphingomyelin phosphodiesterases catalyse the hydrolysis of sphingomyelin to ceramide and phosphocholine, and these metabolites are involved in signalling pathways. 23.70 23.70 24.20 23.70 22.10 23.60 hmmbuild -o /dev/null HMM SEED 765 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -13.23 0.70 -6.74 4 286 2012-03-15 16:48:53 2012-03-15 16:48:53 1 4 90 0 124 270 0 315.60 27 82.32 NEW QPSFLLAoLKADslsKPFhQpCQDLV+VIEDFPAKELHsIFPWLVESlFGSLDGsIVGWNLRsLQuRhNPsEYslsh-FLDPSGPMMKLVYKLQAE-YKYDFPVSaLPGPVKASIQEpVLP-CPLYHNKlQFPsSGGluLNLALNPFEYYMFaFAhSLITQKNhPluhHlSouDSAYFILVDpYLKaFLPTEGSVPPP.SossGGolPSPsPRoPulPFsSYGhHHTSLLKRHISHQsSVNADPAuQEIWRSETLLQVFVEMWLHHYSLEMYQKMQSPphKLElLHYRLSlSShhh.sPhps.u.tuLHuYQEsFpPTEEHVLVVRLLVKHLHAFSNSLKPEQlSPSAHSHTsSPLEEhKRVVVPRFVQQKLYlFLQHCFGHWPLDASFRAVLEMWLSYlQPWRYssEKssPso.-.psRsVsEKWusFVQENLLMYTKLFlGFLNRsLRTDLVsPKNALMVFRVAKVFAQPNLuEMI.KGEQLFLEPEhVIPHRQHRLFhoPshGGSFLSuW.PslTDsSFKVKSHVYSLEGQDCQYpQMFGsEsRsLVL+LAQlIsQA+QTAKSISDpSAEosAspSFhSWFGhuSsDhNGSYsGsDLDEhGtDol+KTDEaLEKAL-YLCQIFRLNtuQLsQhhhslGouQD-sGKKQLPDCIpuEcGLILTsLGRYQlINGLRRF-IEYQGDPELQPIRSYENAhLVRhLaRLSSslNcRFAspMsALCSRcDFLGphCRaHLTsPthsp+t+hSPltpcps.ucstuPRlSLR ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 44 55 91 +14576 PF14725 DUF4466 Domain of unknown function (DUF4466) Coggill P pcc Jackhmmer-JCSG:target_419245-SP18803A Family \N 25.00 25.00 394.90 394.70 21.50 19.70 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.00 0.70 -5.14 6 11 2012-03-16 11:55:18 2012-03-16 11:55:18 1 1 11 2 3 13 0 315.10 48 94.39 NEW ACcDp.....-scSsL+NDhIK+Tl..uPslsGpcIEFAYAMu..sppG+lssApscASIAGAsGThF-hpSaYT............uoGpDVPVQsscDusTsGssoTsshhccss..................AATLRYaYllPc-A+GKsVSFoFSApSSsGpplSYphssY+ISKMDMK+slslpNtsACYlSltD....MpsYTKt-VssN.sLAsKIDhlYlYp.plsshDasHAhVoPuosscYls.ushlPuGhsNs.TthcKphsV+DtQLpshph.uVYIDDlDFcplDhusAsDYslsL+p-sGAaVcTADGKYtAYVYlNplssSuKphTVSlKRYsL .....uCp-......-scssL+NDhIK+Th..GPslVGppIEFAYAMu..spcG+lssApsEASIAGAsGThh-ppSaYT............uoG.DVslQsussolTsGssoTsshhccss..................AATLRYYYllPEEA+GKsVSFoFSApSSsGppVSYphssYcISKMDMK+slslpsuuuCYlSIAD....MpsYocs-lssN...usKIDhVYlYp.plsshsFsHAlVoPuusspYls.uVpLPuGlsps.TtlcKthsV+DtQLtthph.uVYIDDlDFpplDhssAssYAlNL+p-sGAaVcTADGKYtAYVYlNslssouKphTlSlKRYsL 0 3 3 3 +14577 PF14726 RTTN_N Rotatin, an armadillo repeat protein, centriole functioning Coggill P pcc Pfma-B_645 (release 26.0) Family Rotatin and its homologues such as Ana3 in Drosophila are found to be essential for centriole function [1]. A deficiency of rotatin in mice leads to randomised heart tube looping, defects in embryonic turning [2], and abnormal expression of HNF3beta, lefty, and nodal. Thus it is required for left-right and axial patterning. Ana3 - the Drosophila homologue - is present in centrioles and basal bodies, is required for the structural integrity of both centrioles and basal bodies and for centriole cohesion. Rotatin also localises to centrioles and basal bodies and appears to be essential for cilia function [3]. This family represents the N-terminal domain. 25.00 25.00 26.30 26.30 24.40 24.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.23 0.72 -3.42 25 85 2012-03-16 13:05:11 2012-03-16 13:05:11 1 3 68 0 57 75 0 97.60 43 5.77 NEW tEIRhRALcsIpsKlp+sLlphp-ls.ppp.LL+pLlc..WFsassss.tp-cVLsLlpc.LhcpshuspllpclGs.cF...Lscl+....phlssph.ptplppllcsL .......EIR.RAL+sIlsKl-HsLlshsDLl.pc+hLhlpLLE..WFNFsslP.hp-EVLsLLpc.LlKaPsAsphLh-lGAl-F...LocLR....ssl-Ppl.pscIDslLDsL........ 0 17 21 35 +14578 PF14727 PHTB1_N PTHB1 N-terminus Eberhardt R re3 Jackhmmer:Q3SYG4 Family This family includes the N-terminus of PTHB1 protein. This protein forms a part of the BBSome complex, which is required for ciliogenesis [1]. 25.00 25.00 27.50 27.00 23.40 21.20 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.26 0.70 -5.52 25 181 2012-03-16 15:04:53 2012-03-16 15:04:53 1 9 114 0 104 189 5 348.90 39 50.44 NEW MSLFKsR-WWoopssp.sEcaDp.usLsVuNlDNssstp.......DpIlVG..SasGhLRlYpP.........ppssacsp.DLLLEppLppPILQlphG+F.........luus.pslpLAVLHP++LslYsl.....pshsGp..sppust.......hpLpLhYEHpL......p+sAhNhshGsFGGscs...........+DhlCVQShDGtLpFFEQ-shuF.s+hLss.aLLPGPlsYss+sDSFVTsooshpl-sY+.........YpsLAsuspsppppt...p...........................su++lsssWohslGEpsl-Ip.................lsphsp....spssIlVLGE+oLFslccsG.pl+a.K+L.-asPsshhsYss..hscsshp........................lllsocospLhlYpDssLtWuAph...spsPVAlpsusht.............sl.pGhlVsLspsGpLpssYLGT-P...........shhhsPshp..s+-hsY-chpcEhpcLpclI+pussspch.......psccclplpsplsspl- .........................MSLFps+-WWss..hs..tEpa....c....t...u..sLhlushsspts.t..p......................DpIlVG..Sh.Gh.LRIapP..........ptpshpsp.clLLEsplp.tPlLQltsG+F..........lssp.p....pLAV.Lps+plslYsl..............tth.Gt....hthusp.............................hphplhaEHpL......p+sAhshshGsFGssps...........+chlClQShDGhLhhaEQ-shs...............F.sphLPs..hLLPGPlsYss+oDoFlTsoSshplpsY+..Yps.Luhussttt..................................ss+plss-WohslGEpslDlp.................lsphsp................stsslhlLGERshasLc-.s.G.pl+ah++L.-hsPsChh..s...Yss...hppsshp........................hlluscsshLhlYpDssLtWuspl...sthPV.Alplushp.....................................sl.cGhIVoLussGpLpsuYLGT-P...........sha.p.ss.lp..s+..pl.sYpphphEhpcLpchI+chptstsh.......pttcpltlph.l........................................................... 0 45 55 82 +14579 PF14728 PHTB1_C PTHB1 C-terminus Eberhardt R re3 Jackhmmer:Q3SYG4 Family This family includes the C-terminus of PTHB1 protein. This protein forms a part of the BBSome complex, which is required for ciliogenesis [1]. 25.00 25.00 33.70 25.20 24.80 24.70 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.16 0.70 -5.67 6 158 2012-03-16 15:10:26 2012-03-16 15:10:26 1 7 107 0 92 177 5 312.30 34 42.03 NEW solKlsl+s+hphppspLolssptP..LslopDshhFcslus.tho+shshslYl..psshhPssLpsplVsSYsosp.....GlPRllQppspLPL+LhhcPsQPsKsAsHKLTlssNpsPVs......LhsLFPEFhp..EcusssAlGFQhlu....Gp+..VTlLAuKoSpRYRlQSDpFEsLsLlscchlhRh............ccpas+pshhDtF..plohuGshPlpphh-hIDsHF-lRhshccLcspLpptusQaRsIQRRLls+FK-KoPsPLpsL-hLL-uTYsplhtluDplcEhccsLh+utscLsuuspLllLlltLhhsL.ssctlplLEushsPllhDhpE.uWEEhsDAuloaLL+TsLuKSSK-Quhs..........hpsshc.PpDsS........+L+KHlshlsDRls .............................................h.hp.t...hptspltl.st.P..l.hspsp.h..sh..p.hss..t.spp...hthshah....ptth....Psphcsphss.oasp.sp...........G.....hP+llQpphpLPLpLls..hssp.PsKsA...sa...KlTlc.TNp.s.sls................LhslF..........s.t.Fst............pc....s...ps...ssh......Ghphls.................Gsp......lTlLASKsSp..R.YRIQS.-phEsLaLlspELlhRl...........................................pph..hpc.t...hs..h....thsh.s.....ssh...P.........lpcaachlDpHac.lRhphcchpchLscpAhQFRu.lQ+RLLs+a+-+sPssL..p..tL-sLL-sTY..cplhths-thpp.ptphhpthstLpssspLh..h..hllth...p.h.p.pththlpshh.s...p.............p........uWEE.stsulshLlphs..pp.pttt..................s...t...sht........ph++phshhh-+h..................................................... 0 35 45 71 +14580 PF14729 DUF4467 Domain of unknown function with cystatin-like fold (DUF4467) Godzik A adam JCSG target SP18127A; Pfam-B_491 (release 26.0) Family Large family of predicted lipoproteins from Gram-positive bacteria Experimentally determined structure shows a cystatitin-like fold, allowing us to classify this family in the NFT2 clan, despite lack of any detectable sequence similarity between members of this family and other families in this clan 21.90 21.90 23.90 30.30 19.90 19.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.19 0.72 -3.66 24 517 2012-10-03 02:27:24 2012-03-18 08:21:19 1 1 217 2 13 123 0 94.00 44 78.01 NEW YsccIDclhKhppcppcch.tp.s.scsppc.a-+ccuNhYVY-cGKhIllu.Yp.hK.sscpl.hYhhYchps..cKhph.ccch.....s..s+pYhccH.cPDY+EpN ..YpK-IDcshKlQspppcph.uKhs.schhschc+cDuNhaVYccGKlIllu.Yp.hp.sc-ch.aYaAY-hpD..cKsph.pp-h.....D..sc+YhppH.cADYc-EN.. 0 3 3 11 +14581 PF14730 DUF4468 Domain of unknown function (DUF4468) with TBP-like fold Godzik A adam Jackhammer: JCSG target SP13279C Domain A large family of (predicted) secreted proteins with unknown functions from human gut and oral cavity.\ Typically forms a N-terminal domain with FMN binding domain at the C-terminus. Experimentaly determined 3D structure of this domain shows a variant of a TATA box binding - like fold, but no detectable sequence similarity to other proteins with this fold 22.60 22.60 24.50 24.10 20.90 20.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.89 0.72 -4.29 50 160 2012-03-18 18:57:27 2012-03-18 18:57:27 1 3 125 0 29 156 6 92.60 26 32.94 NEW aopplpssuh..op.splYsphhpWhsphhps...h.sSplshssccpGhIsupu..pthllFs.....s.shLS.....ls+sphpYpltlcscDs+hclohoclp.YpYp .........Fscphpls.uh..Sp.spIYcphhpWhspphpp..........t.sS+lshsscpcGsIsupG..cchl.lFs.....s.sslu.....LD+splpYplplsCc-s+splphscIp.YpY............ 0 13 24 29 +14582 PF14731 Staphopain_pro Staphopain proregion Eberhardt R re3 CATH:1x9y_A_0 Domain This domain is the proregion of the cysteine protease staphopain. Like many papain type peptidases, staphopain is synthesised as an inactive precursor and cleavage of the proregion is required for activation. This proregion has a half-barrel or barrel-sandwich hybrid fold. The proregion blocks the active site cleft of the mature enzyme on one side of the nucleophilic cysteine [1] 27.00 27.00 47.30 46.60 23.30 22.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.13 0.71 -4.49 6 356 2012-03-19 09:54:49 2012-03-19 09:54:49 1 2 195 4 4 96 0 167.80 64 43.35 NEW KpscVNVcsKcVPpcV+cLApcQYLSaVsuLDKtSNpcsuuYTLGEsFKIYKFNpcSDGNYYYPVLNK-GcllYlVTISPKssss.KtSppsusYSINVSPFlSKsLNQYKsQ..pITILTspKGYahhsEDsKl+LVLKTPhsssKppKpsscpsss+.hpphKQTuolTK ...KplplNVcscpVPpcV+sLAQppahuYspuLDK..N..t.cpupYpLGEsFKIYKFNtcpDssYYaPVl..p-GpIsYhlTlSPKsp...ppSpps.pYolplSsFluKsLsQhKDp..pIT...lLT.spKGaY.hppstKs+LVhtTPh.pshK.Kcotphsoup.hppLKppsosTh......... 0 2 2 4 +14583 PF14732 UAE_UbL Ubiquitin/SUMO-activating enzyme ubiquitin-like domain Eberhardt R re3 CATH:1y8q_D_03 Domain This is the C-terminal domain of ubiquitin-activating enzyme and SUMO-activating enzyme 2. It is structurally similar to ubiquitin. This domain is involved in E1-SUMO-thioester transfer to the SUMO E2 conjugating protein [1]. 27.40 27.40 27.50 28.00 27.30 27.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.67 0.72 -3.88 74 271 2012-03-19 13:18:53 2012-03-19 13:18:53 1 9 228 6 191 271 5 89.30 29 14.34 NEW pltlsspchTlpcLl-clL+ppLuhspP-lhl...ssslla-s--..........t.hssshpKpLu-l............GlpssohLslpDhpQc......hplplhlpcpcphcp ................lplssc+sTlpsLh-cll.....KpcLuhst.......P-lplp....spuslLhss--..........t.h-sN.pKpLu-h............Glp..sGo..hLpscDh.p-......hsl.l.lhcpcp...t................................................. 0 58 97 153 +14584 PF14733 ACDC AP2-coincident C-terminal Woodcroft B, Eberhardt R re3 Woodcroft B Family This family is found at the C-terminus of apicomplexan proteins containing the AP2 domain (Pfam:PF00847). 25.00 25.00 25.10 25.30 24.00 22.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.84 0.72 -3.86 28 106 2012-03-22 12:53:11 2012-03-22 12:53:11 1 4 11 0 100 116 0 94.70 22 6.97 NEW sssptLpltKpAlphlLpDLppsClspl...tthts.............................hpphlptHlphlpsu.tshpplhsYlplFssplppshLPSshshptQthllpuL .............................................................................................t.tphlpltKtAlhhlLpDLpppshsph.....hh.s...................................................................hp.hpphlctHhphlpsu.pshppltsYlplFspsIppppLPSphshptphhllpuL..... 0 37 47 88 +14585 PF14734 DUF4469 Domain of unknown function (DUF4469) with IG-like fold Godzik A adam Jackhammer:JCSG target GS13689A Domain A C-terminal domain in a large family of (predicted) secreted proteins with uknown functions from human gut bacteroides 22.60 22.60 22.60 23.10 22.30 21.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.24 0.72 -4.15 29 126 2012-03-23 06:08:24 2012-03-23 06:08:24 1 2 44 0 33 111 1 97.60 31 43.92 NEW IspVsDssTGptssslTsGtshplpGsplKls.Gs-......susGlhhss.........ppG........s.htlssshlstNpPSpLhhhlPstLssGp.YpLplsTpauu.usphLKssRospa ......................tlpD.sTttts..GslTsGtshhlpGpplKls.Gs-......s.ssGlhhss.........ppu........s.stlshs.lshNsPSclhhhlPssLscGp.YpLplsTQaus.ssphLKsPRoh................. 0 15 30 33 +14586 PF14735 HAUS4 HAUS augmin-like complex subunit 4 Eberhardt R re3 Jackhmmer:Q9H6D7 Family This family includes HAUS augmin-like complex subunit 4. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2]. 25.00 25.00 25.90 25.20 23.80 22.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.44 0.70 -5.06 12 109 2012-03-23 08:46:42 2012-03-23 08:46:42 1 3 76 0 61 109 0 209.10 34 62.65 NEW stphsspsshsss.stlLGlsttpLhphhssp.sh.....thptpL.pElEppL+cKC.sLlsaapPsspssucshptsKss+Lsphlctcp..cphpptctphpcshhhhc+phppYhpsLhpsLplLppllp-a+LcpQs-hDchpppaLpsKC-sMhhKl+s.phplLpDTYTtEolsAh+pIRchLpsAhcptcp-hppupptLpsYEslG.cF-slscEYsclhpcl-s+pWsLpclpps ......................................................................................h.p.....tss.tt..s.hLGlp.t.Lhph....p....sh......hptpL..ElEtpL+pKC.....sLhshas.ss.......pssup..th.ptspsh+LsEhlhtth..pphpctcs.....t.pEph.hhh-+p.upY.pVL.ppLslLpp...Ll...p-p+LcpQ......schDchptpaLph+CpsM.hKL.Rh.Ehc......lLp-.TYT.sEpltsh.+h.IR...cpLptuhc.tppphpcupph.LpsYc.sls.c.........F-p..........ls+pYpplhpthEshp.Wslpphph.............. 0 16 30 44 +14587 PF14736 N_Asn_amidohyd N_Asn_aminohyd; Protein N-terminal asparagine amidohydrolase Eberhardt R re3 Jackhmmer:Q96AB6 Family This family of enzymes catalyse the deamindation of N-terminal asparagines in peptides and proteins to aspartic acid [1-2]. 24.00 24.00 24.00 24.10 23.90 23.80 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.78 0.70 -5.53 19 160 2012-03-23 10:23:26 2012-03-23 10:23:26 1 4 114 0 93 169 0 232.10 37 83.22 NEW thtSpsspsVsstslLYVpQREaAsT...sPpDpsVsllGSDDATTChlVVlRHTGSGsssLAHhDGos.Tcsulshhlstlpshs.s...tpGRLElHLVGGFpD.................scphScpLshpILpuFc+pp--IHLpTsClsEhNshlc.sGlphPllYGIuVNlKTGclFPAoF..ss+GPDctLRpARhh.......ssuphlslYDsppphl+IGPhpasPhhs..sshWLppsDchILppLSTSPtsEPPHFVppl+uslpFlh-HPps.sslFPcspP+ha+Rsc.sGpWc+l ...........................................t...p.sss.thLYVtQREhAss......sP.t.s....tpl.sl......lGoD-ATTChlVVlR...csusG......s...ssLsHhD..........uss.scttls.hhpplpshs........puRlEl..HLlGGFsD.......................s.cthSppL..s...hpllptFccpp.....c.....lc....Lho.hCVs..-hNsh..............c....st.......phPllaGluVsl+Tucla..AoF......s+GP-c.LRtARhh...........sst.hlsl.YDspht.l+IuP..hsapPh.t....ssha...LppsDp.lLpphSTSP.sEPPHFltphRtslhal...cp..s..thF.s.ppshhac+sp.sGh..W........................................... 0 32 46 69 +14588 PF14737 DUF4470 Domain of unknown function (DUF4470) Coggill P pcc Jackhmmer:Q8N9W5 Family This family is conserved from fungi to Metazoa and includes plants. The function is not known, but several members have zinc-finger domain, zf-MYND, Pfam:PF01753, at their very C-terminus. Others are also associated with DUF1279, Pfam:PF06916. 25.00 25.00 27.80 26.10 23.60 23.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.40 0.72 -4.27 100 314 2012-03-23 14:25:58 2012-03-23 14:25:58 1 21 155 0 250 334 1 98.50 23 12.55 NEW haGsos...Ahsl...hpp....................tt..s.ppslslLhh..GsG.DhRpllpTlsshs.pp.p............p...lplhlsDts....tllARNlllLpl.....lhs.s......ppssphhhc.laasshls .....................................................................hG.osAhslhp.........................................s.ppslslLlh.............G..sG.DhRplltTlspt..tp.tt...........................plphhlh-hs.pllA.RslllLpl......hhc.s...................pptsphhhclahshhh................................... 0 118 170 216 +14589 PF14738 PaaSYMP Solute carrier (proton/amino acid symporter), TRAMD3 or PAT1 Coggill P pcc Jackhmmer:Q7Z4T9 Family PAT1 (proton amino acid transporter 1), also known as TRAMD3 of AAT-1, is the molecular correlate of the intestinal imino acid carrier. It is a proton-amino acid co-transporter having a stoichiometry of 1:1. Due to its mechanism, PAT1 activity increases at acidic pH, which correlates well with the acidic micro-climate close to the brush-border in the intestine. Glycine, proline, and alanine are the preferred substrates of the transporter. The maximum velocity is similar for the three substrates. All substrates are transported with low affinity, showing Km values in the range of 2-10 mM. The transporter does not discriminate between L- and D-isoforms of these amino acids; in addition, beta-alanine is transported with similar affinity as alpha-alanine. Similar to the IMINO transporter, the amino acid analog MeAIB is recognized by PAT1. The transporter is strongly expressed in the small intestine, colon, kidney, and brain. 21.60 21.60 26.40 21.60 19.10 18.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.97 0.71 -4.49 32 172 2012-03-23 16:23:11 2012-03-23 16:23:11 1 10 109 0 105 175 2 146.70 38 20.68 NEW usQT.YRES-AQTsPYoP..-.aslp.s.ss...sP..ElLsLssLpasc.G.L.PuGhtEVEhIERARc+RsaEssLP.s..........hs....Dts..ph...p......pR...+phh-t.hEhcEWthREp-IpchQchRL-llpchlpcREcppcphsppRl-phtpphppc+pttlp+lctctl..cthR+ .............t.sQT.YR-u-sQTsPYpP..Ehhspp.ss...hs..............ElloLusLpa...........uc..G.......L...PsG.tEVEhIERARcKRAaEssLPs..........hsDts....ph...p.......cR+chhpthEhcEWt.REp-Ipc..lQchRL-llpchLccREcpppphstpRl.ptthpphpcp+ctpltplphphhpshR+.......................... 1 44 53 77 +14590 PF14739 DUF4472 Domain of unknown function (DUF4472) Coggill P pcc Pfam-B_085261 Family This family is specific to the Chordates. Some members also carry Kinesin-motor domains at their N-terminus, Kinesin, Pfam:PF00225. 22.70 22.70 23.40 28.70 22.50 22.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.25 0.72 -3.59 12 44 2012-03-23 16:37:54 2012-03-23 16:37:54 1 2 35 0 29 51 3 107.70 38 21.45 NEW SEEp+LpISKELVDLQIcsp+l+EQaEAEhFELKNc..................lLpLEsRlLELELct-.....phststsshtcphphspp.p+chttphh.h+pph.s.spsh.p.pscpccLu.pL. .SEEp+LQISKELVDLQIpTp+LpEQaEAEhFELKsc..................lLpLEsRVLELELcs-.....pss..spss.tctht....spc....+pchtsphh.hcpph.s.scsh.s..tcppcLu.tL.h............................... 0 10 13 18 +14591 PF14740 DUF4471 Domain of unknown function (DUF4471) Coggill P pcc Jackhmmer:Q8N9W5 Domain This family is conserved from fungi to Metazoa and includes plants. The function is not known, but several members have zinc-finger domain, zf-MYND, Pfam:PF01753, at their very C-terminus. Others are also associated with DUF1279, Pfam:PF06916. This domain is more C-terminal in many members to DUF4470, Pfam:PF14737. 25.00 25.00 26.40 26.20 22.50 22.30 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.83 0.70 -5.18 18 137 2012-03-23 16:41:55 2012-03-23 16:41:55 1 9 98 0 86 142 4 253.10 29 55.42 NEW hlslptLKaKERDtLEthFpaWpst...ppsaclschW-...pRlRphLGsRYDpRsGlaDWDhsMpL+-.ptup.IssQEYRpWRcsGlAFsa.......sE.t.-aspPNKThssuhl...psGpsahpRGYlGDIpTGPFhuFG...lcss-.....E+h.h+ohcG.....pscapuTDloE+Nlhplh........aELp.....sp...........ssap.hsts...........-....................sps.p..th..psptsh....sp.p.....hhsstpVpl+Fl.slph.lchhpp.+p+apphFDllFlups.hspaLpss....hhp.sh..+ss.AllllETt+alssh+K-phppatscl+clh+pushcsstshs ...................................lslptLKa+ERDtL-thhphWt........t........ps...........ashsphWD...pRlRphhupRYDtRpsh...hDWDhp..MpL+c..pt........uphIp.pcaphWRpsGlAFph..........................h-t.tYp.hPN+Thsshhh............pp.............GpphhtRGYhGDIhsuPahuFG....lcspc.....pph.hph.ps.....p..hsut-lspcNlh.phh........hpltst..................ttht.hs.t..........p........................................................................................................tp.t..t......tt.t.........t.......h......thplpal.sh.s..hpp.l................p.+.ppa......pthFphhahuss.hsphlpsp..........htt.hh.......tst..uhlhhEht.pahhshpp-phtta.ppltchsptsGht......s....................................................... 0 38 48 71 +14592 PF14741 GH114_assoc N-terminal glycosyl-hydrolase-114-associated domain Naumoff D, Coggill P pcc [1] Domain This short domain is also a very small family found at the N-terminus of GH114, glycosyl-hydrolases. 22.00 20.50 22.90 38.90 18.10 18.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.67 0.71 -4.42 9 17 2012-03-23 18:28:37 2012-03-23 18:28:37 1 5 7 0 16 17 0 124.80 26 28.58 NEW shssp-lssopDshspYlchus.........GuphphpFshPu.husssstsLslsssht.s.ttSutcWph-hash...ssssWsplGD.utAsShsWosssLsls.ssst.cFV.ussplplphs...psssspushlDh .......hsstshssotDs.sp.lchus.........uuphphhFshPu.sus....sphhololsspht.ssttsuscWph-hasa....sussWsplGD.otssohsWosh.sLsls.ssP.....s.sFl.ssstlphphp...psusspu.hlD........... 0 13 14 15 +14593 PF14742 GDE_N_bis N-terminal domain of (some) glycogen debranching enzymes Godzik a adam Jackhammer:YP_001865398 Domain This domain is found on the N-terminal of some glycogen debranching enzymes and is usually followed by the GDE_C (PF06202) and in this sense it is analogous (but probably not homologous) to the GDE_N (PF12439). Its exact function is unknown 22.10 22.10 22.80 41.80 21.10 22.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.12 0.71 -4.93 158 381 2012-03-25 04:19:21 2012-03-25 05:19:21 1 3 312 0 197 416 23 191.80 27 27.41 NEW sL+cussF.hls-ppGDI.......s..ssspGLahpDTRaLSchpLplsGppshh...L.uusspps.ttshh..tlss..........l.t.s...st..hscsslplpRpRhl.t...su..hhEclslpNasspssphplplphsADFsDlFEVR..Gtp.ct..c+Gphh...sp..h.ps.s...p....................lthpYpG.D.....shpR.poplph.....t.........s.sssp...lss............sp....hsaplpLsPppphpltlpl .......................L+pussFhlsDtpGDl.......tsssspGLahpDTRhLSphpLplsG.....ptP.h...L.uusspps.t.tuhh.pLss..........l...t.s...st..hscsslplcRpRhlt................ss..hhEclslpNasspssplplslphsADFsDlFEVR..Gsp.ct....c+Gpht...sp..spss...t.........................l.php.YpG.D.....shpRssplph.......p...........stPsp..lss..................................sp....ssaplplssptphslhlp.......................................................... 0 54 119 159 +14594 PF14743 DNA_ligase_OB_2 DNA ligase OB-like domain Eberhardt R re3 CATH:1fvi_A_02 Domain This domain has an OB-like fold, but does not appear to be related to Pfam:PF03120. It is found at the C-terminus of the ATP dependent DNA ligase domain Pfam:PF01068 [1-3]. 25.00 25.00 25.00 25.10 24.90 24.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.01 0.72 -4.30 136 673 2012-03-26 11:48:33 2012-03-26 12:48:33 1 11 649 7 120 539 455 66.90 42 21.06 NEW .G+GKapGhhGALhl..............ch.ss.....G..hcFclG.....oGFoDpp...RpsPP...........lGohlTY+YpGhT.psG.....hPRFssFlRlR ...............GcG+apGthGAlls..............ch..ts.....G......hpFpIG..SGasDp-...RcsPP.......................hIGollT..Y+YpGlT.p.pG.........hPRFssFlRlR.................. 0 40 76 108 +14595 PF14744 WASH-7_mid WASH complex subunit 7 Coggill P pcc Jackhmmer:Q2M389 Family This family is the central, conserved region of proteins that form subunit 7 of the WASH complex [1]. In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes [2]. 25.00 25.00 32.10 31.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.96 0.70 -5.60 29 153 2012-03-26 12:33:06 2012-03-26 13:33:06 1 10 116 0 104 160 4 310.00 48 31.51 NEW FLYWpR.slh.Phahpplacsshpsp..+l.ahhsAhpDshshltt.........spHhps..stslhcs.....apct.lhpplcccllcPLCpclEs-LRLpsHu.cL...phs.spsPhps....s....hp..Dlsp...hl.plsPl+hhsphlsl+pclE+YLspTFYNLsslAhHDWKTYtEMRsLApp+YGLphh-s+LPsQTL-QG.LDVLcIMRNIcsFVu+YsYNLNpQlFlEc..sS..s...uKHLsTIsI+HIANSIRTHGsGIMNTTVNasYQFLpKKFhlFSQFLaD-aIKSRLlK-hRaa+cp.+cp...hs..tpYPa-RA-cFt+pI+KLGl.......sss...GpoYLDpFRhLITpIGNAlGYVRMlRSGGlchsucuhpFlP.slpsh.ssap ................................................aLaapR.shhPhahpplappshpst..pl...YhhsAhpDshs.hhp..........upHhps....p.Llps.....appp.lhphlpcpllc.LCp-IEpDLRL.psHo.HL....pls.sp...sPh.cs.................u..h+..Dlsh....ah..plpPl+.hhscalcl+.shVp+YL-psFYNLoTlAlHD..WtTYpE.MRs.LApp+YGLths-sHLPsQoL-.........QG.LDVLpIMRNIHlFVupYhYNLNsQlFlE+..sS...........s........sKHLsTIsIRHIANSIRTHGTGIMNTTVNF.sYQFLppKFhlFSQFhaD..-+IKSRLlK-hRaa+Eh.K..cp....................ts....ppYPa-RA-+Ft+sIRKLGl.........................os-...GpoaLDpFRpL......IopIG.NAhGYlRMlRSGGL+ssusuhpFlP.chcs............................ 0 37 50 80 +14596 PF14745 WASH-7_N WASH complex subunit 7, N-terminal Coggill P pcc Jackhmmer:Q2M389 Family This family is the conserved N-terminal region of proteins that form subunit 7 of the WASH complex [1]. In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes [2]. 25.00 25.00 32.80 27.90 24.30 23.70 hmmbuild -o /dev/null HMM SEED 567 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.86 0.70 -6.33 14 161 2012-03-26 12:44:04 2012-03-26 13:44:04 1 8 111 0 105 164 3 456.10 29 49.95 NEW GpFhp-asppLpplccsl..s.so..lsps.....hshth-PlsLphhsh......EphslhcL.l.......co-.pKlhNKllsslAuLCsph+pLpccAc.pah.sLlhaGEt.hs..........-st.s........tcs.hphuphlshhpcL.talpRshsllpslhpQluAlhs......h.hshspVHh.sVa-sls-LLshLlolDElhptposlsspWsLY++hl+olppssupa..s.ls.c.cLptL-+hLtcl-spLLsGsIFpphlpphhD...t.hsVscNsths.pEhst.l+plhsplEu+...s-.pp.h...pppcphltlsuLhVlhapLatp.....hD+KLhKplh-lt++hstlslssNllWhPssFLhp+hssh.h..Kthscts.p.shp..+pphLpphstsht+pspphthQlutWhlcMposhsps.....h.hctLpspspLllpGlhhAtplSpllcsllNLHsuLstPhoKosVhslC+llEhLKsIptTFappphhlschlspllQalshhhhphLpssK++lst....DppYocc+LDlLSuLpLupcsLpGssTp-RlhllpLuLushhph..csl+s.......-chcplp.lhp+Lctlu-lppplptts...DsS .....................................................................................................................................................ssltl.hhs...................cp.s.l.pl..l........................ppc..splhsKllsshusLstEhptLp.pApp........phh.s..Lhh.aG-t....hp......................................................-st....p.......tcs.hphu+hlshLpcl.talpRshp...Vlh...sllpQLuula.s.....t................t..h..........t.hp...t..........l+hp.shac..plucLLthllslDEllppp.slpstaphY+.+h.lpplppssspa.........s.ht..pccl.c.hcphlhpl.....c..tpllss.lhp.tslpp.a-....................h............lpp...sphhspchtt.lcphhspl-sph...spspp.h................pp+pphlslsuLhslh.hplahs..............................h-p+hh........Kplhchp.p+.hPhltlh..uplhahPstFlhpph..Ps...h...+.h..h.s.ppt.t....sht................pp...t...hhppt.stp.h....p.....phpphhh.lssWh.hc..Mpohhstp................th.tp..l....pptsplhl..p.GhhhA.plpphltshh......sL.....ahs....hp...tPhopssVpsls+hlphLKslppha..hp+th.hl..spshshlhQplph.h.p.ltsh....+pplht........ppp.sp.pp......h.-.....hL...uulhls.phl.p..us...o..hp+hhhlpLslshh.hp.....p......hhpt.....................pch..h..hhtplphlsp.htt.h....s................................................................................... 0 40 49 81 +14597 PF14746 WASH-7_C WASH complex subunit 7, C-terminal Coggill P pcc Jackhmmer:Q2M389 Family This family is the conserved C-terminal region of proteins that form subunit 7 of the WASH complex [1]. In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes [2]. The C-terminus is predicted to include a transmembrane region. 25.00 25.00 50.50 25.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.87 0.71 -4.66 22 137 2012-03-26 13:08:12 2012-03-26 14:08:12 1 9 112 \N 97 142 2 166.40 44 15.89 NEW ThpAucpLDsllpsLtc.sao-Go-YF+hLlssFu.phRs...s+NtHL+NFYlIlPsLTlNaV-ahlpsK-KlhKKsKs....susF.TDDGFAhGlAYILKLLcQhppFDSLHWFpolcp+app-ppplp..............pp.ttp.psp...........tD.....-+h.pshp....LTh++lpshpcEasLLhhohoSARIFFc ....................................................ThpAu+pLDsVlushsc.s.uE.Go-YFKhLV-.....VFuschRs.............s+NhHL+NFYlIVPPLTlNaVEaplssKEKLtK....KNKh....uusF..TDDGFAhGlAYILKLLDQappFDSLHWFpSV+p+YtpEhcslt................cp.pspssp.....................pD.....-chhpThp....LTt++Lcsh.p.EapLLhhoLoSARIFFc................................................ 0 36 48 74 +14598 PF14747 DUF4473 Domain of unknown function (DUF4473) Coggill P pcc Pfam-B_8489 (release 26.0) Family This short family is largely confined to Caenorhabditis proteins. The function is not known. There are two well-conserved aspartate residues. 25.90 25.90 27.20 26.70 25.10 22.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.60 0.72 -3.76 42 147 2012-03-26 13:48:54 2012-03-26 14:48:54 1 3 5 0 145 125 0 80.40 28 75.08 NEW ushs.s.....s--h+uELhuAGlSppussGlhplspcatsph.hh...pssccsscphhpchps-scsalco.StpD.QpsYpsalc.c+ ................s....t-ch+AELhuAGlSpsuscGlhplupcapsphsts...psspcuucphhsphps-scsalKo.oppD.QstYpsalc.Khp................. 0 21 42 145 +14599 PF14748 P5CR_dimer Pyrroline-5-carboxylate reductase dimerisation Eberhardt R re3 CATH:2ahr_A_02 Domain Pyrroline-5-carboxylate reductase consists of two domains, an N-terminal catalytic domain (Pfam:PF03807) and a C-terminal dimerisation domain. This is the dimerisation domain [1]. 25.00 25.00 25.60 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.17 0.72 -4.04 862 5462 2012-03-26 14:57:42 2012-03-26 15:57:42 1 17 4352 40 1427 4011 1723 105.90 36 39.32 NEW hsEp.hDulTAlSGSGPAYlahhlEAhscuulp..hGLscchAtpLutQTlhGuAph...lh.............................p.....................pPu......pL+cp.VoSP...GGTThsulpsL..Ecs..u.lcsslhcAlpA..AspRupEL ...............................................sEphhcslTuloGSuP............AYl..a.hhlEAh.s-..A.uVp..hG.ls............+.ppAhclssQsl..hG.uAphlh...........po.sp.......................cPu.........pL+-p.VsS.P...GGTThtulpsL..Epp...u.hcsslhcAlpu..uhp+upch...................... 0 448 860 1182 +14600 PF14749 Acyl-CoA_ox_N Acyl-coenzyme A oxidase N-terminal Eberhardt R re3 CATH:2ddh_A_01 Domain Acyl-coenzyme A oxidase consists of three domains. An N-terminal alpha-helical domain, a beta sheet domain (Pfam:PF02770) and a C-terminal catalytic domain (Pfam:PF01756). This entry represents the N-terminal alpha-helical domain [1]. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.53 0.71 -3.83 150 617 2012-10-02 12:47:07 2012-03-27 09:42:00 1 15 250 8 432 619 22 114.30 24 17.60 NEW sscclsthltGuccp...hcc++....clpphlt...p-P...apc..pshhahoRpEpacpulcKutthhphhpp.hth............s.p-.................hhhh....tshhspstPhs..LHhsM..FlP....slpsQuosEQpccWLshApphcIl ...................................................................scphsthltGutpp...hchpc....cl..phlt...p-P...hpp..tsh..h..ho+p-phctulcKstphhphhp.p.hth...........s.p-...............................................hhhh....hshhst..shshs....lHhuM..Fls....slpsQGTsEQhpcWl.hu.phpIh..................... 0 160 230 355 +14601 PF14750 INTS2 Integrator complex subunit 2 Eberhardt R re3 Jackhmmer:Q9H0H0 Family This family of proteins are subunits of the integrator complex involved in snRNA transcription and processing [1]. 25.00 25.00 36.40 27.30 21.40 21.40 hmmbuild -o /dev/null HMM SEED 1049 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.73 0.70 -13.76 0.70 -7.04 13 186 2012-03-27 12:41:57 2012-03-27 13:41:57 1 7 101 0 136 185 1 667.70 34 87.28 NEW lspLuphscpEIRPlLPCLVRMSLhSPLDpopchs-sRKplLplLsGlElVNSI.VuLLSlDFHtLEsDlKKEQQlRpKlGhtpp-SlhhpuLps.ulsLEFERS-ssR+lRlVLSELLtlhuQlp-..........pss-..hh+sS.....-LFDs-lYLEEluDllCIAlAELPuLLsIp-lsEsLL+lcNGsplIChlVANhPDsFcEVCpuLIsNG-p.DE-sssG+hRhssLptLspMNPoQALthRupsVEhC+MPuLAltLoL-ps......p.........uDLVAFlSGLLLGsDpplRoWFAhFIRsuQKR+s-....ALphLRccLLcplpslhsp......................uh..putLs-ppVVpuuuLLRLYCALRGIAGlKFs--EsssLlQLlTS+PPPosAGlRFVoLGLCMLlACPSLI.....uspEhEppslcWlpWLl+EEAYFEssSG.........soASFGEMLLLhAIHFHSNQLSuIs-LVCSTLGMKlslRPsoloRhKplFTQEIFTEQ...VVsuHAV+VPVTssLsAslsGFLPlHCIaQLLKSRAFuKH+VPIKsWIY+QICsSVoPLHPlLPuLlEV..YVNSlls..Pss+s..p..........ppsNcPlSEpEIppVFps.....................s..pp..ph.pp................................pssLTsQLLlLYYLLLYEDsRLsNhpshlsts+p...hKsYSscFhScLPIKYLlppAp+cQpcYuuLFSPLLRLLATHaPHLslVDDWLc-Etlssp...ps.....thpss..tssloppslscAFsplpspPsp.sh+llcpLhphsspsLhPaAphllpahphlLscslPRhlpc.hhpplWhRLNoVhPRpLWVhTlNA..Lhspt......p..sLTp-slslDPLpVLRCDcRVFRCsPlhsIlLRlLpuhLAASRopLupHlp-p....Phsp.....hupts.s-s-REEL+hALlAAQESAAVQILLEsCLcTc-D+s................................pssphhtLREl+ullCSaLHQhFIADPsLAKLVHFQGYPpELLPloVpGIPSMHICLDFIPELLuQs...cl-KQlFAIsLsSHLulQYuLPKSLslu+LslNsLsTLLuVLsospRhpLFpslLPuLVRhscAFPPLs-DslslLhQlGRlstSQuuL ..............................................................................................................................................................................................................................................................................................................................................................................................................................hh..............................................................h.h........h...h.h............................................................h...t.lh..t....................t......p..............hlh.l.ph....s...h..h.h.....p.ht.s.h.hh.lthph..................hl.hhpshlh........................................h..........................................................................................................................................hh.hhssh.....hp............hht.........p.............th..h.hshlhs.....h..h..........................pt........hhphh.pt................................shtphhlhhthhhhstphttl..hhpthlt..hc......................t.h..ht.hh.h..pthh.p.....ls..s..ph.sss..t...hs....s.t........................hshh.....sl.tL.....ptp.a...pht..h..ht.hh...ph...t....Php...h..llp...hht..h.................................................................................................................................................................................................................hhhhhahh....ht..h...t...........................................................a.....th.....hshp.hh...hpt......th...lh..hh...hh...hs...........h.....................................................................................p.................................hh.................h............h.......ha..h..h...............................................hs.......P....h..t....h..p....hh.hhl.hl.........p..................h..t.........................................................p.lt.shh..p.s.hhphLl-hh.................................................................thpt.hs..lHphals............lh+llhaQ...................ths.t.l...st.lPuhh.h...l.ph.......p....hhsl..hhs.l...pa..l.pshths.p..hh................phh..hh..t..h............................h.h....hh.sh..h...a..h.....................h................................................................................................. 0 65 79 112 +14602 PF14751 DUF4474 Domain of unknown function (DUF4474) Godzik A adam Jackhmmer:JCSG target SP18061A Domain Domain found on N-termina of few families of uncharacterized Clostridia proteins. Typically followed by a proline-rich domain or other kinds of repeats 21.10 21.10 25.70 24.10 17.30 16.20 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.65 0.70 -5.19 12 51 2012-04-03 19:57:09 2012-04-03 20:57:09 1 9 45 0 8 47 1 222.40 33 41.00 NEW LNphhc.sGauY-hppDlFYSthcAWQRchGYs+LYDEAAshhsMlhDCEPlaFsYsGKpWLIEhWKGQYGlsTGuElGVYpss+..ls.s.ha+sTFYcslpDc-hlslShsLh+ssKslaph.pshHWWLTGFpLGpFSpPppLhh-hoITh.DppMppAFlcuLhch.G.Yp.pcEhhlpsNoVplpascP+osQPhs+sphs-shlQhpN+h.CpLYphh..T+sasps..hD+lthlpthhP-la .........lst.ht.hGas.Y-..ppDlFaSp..h-sWQ....RchGYscLYDcAusshuMlhDs-PlhFsYssKcWhIphWKGQY.s......l..sTGuElGlYpssc.s..l.......s...hhpssaYpsh......pD.s-hh..hohsLcK.s..G.+.......s...l.a...p...............p.......pstHWWLTGFp.....hG.c.F..Spsp-LshshsIsht.c.t...MhpAFlcuLhph.G.Yp.pp-h.h.tppV.hhhs.spp..t..hhps.h.cthlQhhNp..CthYp.h..Tt.h.ph...-+l.hl...hP.ha....................................................... 0 4 8 8 +14603 PF14752 RBP_receptor Retinol binding protein receptor Eberhardt R re3 Jackhmmer:Q9BX79 Family Proteins in this family function as retinol binding protein receptors [1]. 25.00 25.00 26.70 25.20 20.90 22.50 hmmbuild -o /dev/null HMM SEED 617 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.21 0.70 -13.14 0.70 -6.20 10 202 2012-04-10 09:35:04 2012-04-10 10:35:04 1 7 52 0 108 188 1 404.80 30 72.30 NEW sssC..csoVs.sLaptChAslSllllLlLuhLsRRcphpsc.....sh+GRhGLlsPlDFLustssRhshuAsFGAlhsslshL.LhoEshhPht....sPoh......s+uhh....hlluhl.hushYYPlhAChost.a.hlutlLGhhhShsahsVpVaQplpCP..........pusplh+YhoLls.lPhLLCLuFLslpashllV+Sl+s.+tG...susp.....slp.sSa.ccYl+s...LLp+p.Lpp.t...tsc.phhSWhpsh.p.pIYsP-PsF+FPh+hlhoslLohluLYphALl.lsullsTlcclRsslsssls.aLLsuhsllhSp......sppclltpVKcaLaulEssalsollLusLlolshLh+sLVsaRppLKtLaRGsph.Lsspa+oPpPoppulsshMpaSuaQsAFlhhGhLIQpllFFLsslslsahlVlPllHGcsLhLL+uLs.hh.shalslllsllLQslhAphhFLps......+ptshsLsNRRuhashoYFLFhhNVLlGlhsulWRlLlSuLhsshhluRlDhSlLppshEohDPGYpoYhGhL+lEsupSHPVhluFCpLLLpupp..pcs..t....psol+.s..cpuhQhlpp.ccspupGuss.uuRuRsRWhLhYTLLpNPsLlshRKst ..........................................................................h.......................................................th..Phshht.........h..hhhhhhh.sthh.h....s.t...sh..................................h....hhhh.hh.h...s.h..hhPhhhChs.....hu.hhG..hsh.hh.h.hh.h....C..........................t....h....h......h..h..hhsh....hhl.h.a.h.............h....t.......t.....................................................h.t.....h........................................................................F.hs.phh.s.....hh....hh.hh...h...............................................................................................s.....hh.shh.s..h.h.h.hp.hh...ha.Rtph.thhtGt...h.....t.t....shhshhtasuaphA..hhhh..Ghhl.phh....lhh.hhhhh................l...h.hPhh.t....p..h.....hh.p....l...h....hh........hhhhhhh...lQ.hhuthhFlp........tt..tlsNR...+sha.h...sahlF.hNVllGhhssh..RllhoslhshhhluphDholh..thtsh.D.Ga.sahshLhh-h.popPshhsFCtlLlpstt.....t.................................h................................h.+W.lh.tLhpN..l..................................................... 0 55 60 74 +14604 PF14753 DUF4475 Domain of unknown function (DUF4475) Eberhardt R re3 Jackhmmer:A4D161 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 99 and 305 amino acids in length. 25.00 25.00 25.20 25.00 20.90 24.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.80 0.71 -4.80 11 195 2012-04-10 10:52:20 2012-04-10 11:52:20 1 2 81 0 110 177 3 126.00 34 61.63 NEW Yp+IVG-.DDGG+LFo.cEYEcYKKpVhPhRlKNRlasSWpsssGhDCKlIGPEThCFCsHR.aKpHcTDhpp.lspcRPhtlPC+sstCpC+saaYlPhpGSpslRCp.CKHhss-Hss..hsa+Cs+ss....pCsG.FcSsaoCuCGpss.cHpTllE....T+-ERhupGKPVup.D.......VPYtuMG.GlTGFSSLs-Gh.R....lDsSGhGsss .................................................................................................................stohCFCsHh.h+pHph.........p.....tlsCp.ttCtC..a.alP....................s..h+C+.C+H.hppHss.....shh.Cp.t.ss....tCss.FpSsah.C.s.Csp.h.tH.T.hhE....Tcp..h.t.t.....................................................t........................... 0 42 49 67 +14605 PF14754 IFR3_antag PPRSV-IRF3_ant; Papain-like auto-proteinase Coggill P pcc Pfam-B_8065 (release 26.0) Domain The replicase polyproteins of the Nidoviruses such as, porcine arterivirus PRRSV, equine arterivirus EAV, human coronavirus 229E, and severe acute respiratory syndrome coronavirus (SARS-CoV), are predicted to be cleaved into 14 non-structural proteins (nsps) by the nsp4 main proteinase Pfam:PF05579 and three accessory proteinases residing in nsp1-alpha, nsp1-beta and nsp2. This family is the two nsp1 proteins that together act in a papain-like way to separate off the rest of the various functional domains of the polyprotein. Once inside the host cell, this nsp1 interferes with the regulation of interferon, thereby enabling the virus to replicate. 25.00 25.00 573.10 572.10 19.50 18.40 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.71 0.70 -5.38 2 79 2012-04-10 11:02:07 2012-04-10 12:02:07 1 4 2 0 0 81 0 249.00 97 12.89 NEW MATFSATGFGGSFVRDWSLDLPsACEHGAGLCCEVDGSsLCAECFRGCEGVEQCPGLFMGLLKLASPsPVGHKFLIGWYRAAKVTGRYNFLELLQHPAFAQLRVVDARLAIEEASVFISTDHASAKRFPGARFALTsVYAusWlsSPAANSLlVTlDQEQDGFCWLKLLPPDRREAGLRLYYNHYREQRTGWLSKTGLRLWLGDLGLGlNAsSGGLKFHIMRuSPQRAWHITTRSCKLKSYYVCDISEA MATFSATGFGGSFVRDWSLDLPAACEHGAGLCCEVDGSTLCAECFRGCEGVEQCPGLFMGLLKLASPVPVGHKFLIGWYRAAKVTGRYNFLELLQHPAFAQLRVVDARLAIEEASVFhSTDHASAKRFPGARFALTPVYAssWVsSPAANSLIVTlDQEQDGFCWLKLLPPDRREAGLRLYYNHYREQRTGWLSKTGLRLWLGDLGLGINASSGsLKFHIMRSSPQRAWHITTRSCKLKSYYVCDISEA 0 0 0 0 +14606 PF14755 ER-remodelling Intracellular membrane remodeller Coggill P pcc Pfam-B_2813 (release 26.0) Domain This domain represents subunit nsp3 of the RNA-arteriviruses, such as porcine arterivirus PRRSV and equine arterivirus EAV, and is a tetraspanning transmembrane protein that contains a cluster of four highly conserved cysteine residues. These are predicted to reside in the first luminal domain of the protein. Arterivirus nsp3 proteins are uniformly predicted to contain four transmembrane helices, with the N and C termini of the protein residing in the cytoplasm. NSP3 are localised to the ER and appear to be essential for formation of double-membrane vesicles that originate from the ER during the life-cycle of the virus. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.03 0.71 -4.13 5 141 2012-04-10 13:33:59 2012-04-10 14:33:59 1 8 7 0 2 101 1 140.80 66 10.05 NEW GPPPAPVSASVLDHILEAATFGNVRVVATEEQQRPVPAPRsRPSAoSS.GDVKDPAsVPPVPKPRTKLAKPSPTQAPTPAPRTRhQuA.....SsQEPPsGsusAPASAPKWRVAKTVYSSAERlRTELVQRARSlGDVLVQALPLKTPAVQRY ..................................GPPPAPVSASVLDHILEAATFGNVRVVsTEEQQRPVPAPRsR+SsoPP.GD.V.KDsAsVP.PVPKPRTKLAKPSPsQAPTPAPRTpPQsA.....Pp.EPssuTAAAPuSAP+WRVAKTVYSSAERhRTELlHRApSlGDoLVQALPLKAPAVQRY............................................ 0 0 1 1 +14607 PF14756 Pdase_C33_assoc Peptidase_C33-associated domain Coggill P pcc Pfam-B_535 (release 26.0) Domain The nsps or non-structural protein subunits of the arteriviral polyproteins such as porcine arterivirus PRRSV and equine arterivirus EAV are auto-cleaved into functional units. the function of this particular domain is not known. 25.00 25.00 27.10 25.60 21.90 18.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.16 0.71 -4.48 4 550 2012-04-10 15:06:29 2012-04-10 16:06:29 1 10 6 0 0 558 0 108.70 91 8.18 NEW sSPDAVEVSGFDPACLDRLAtVMHLPSSsIPAALAEhSGDssRPsSPsTTVWTVSQFaARHpGG-HPDQVCLGKIISLCQVIE-CCCSQNKTNRsTPEEVAsKIDQYLpGAsSLEECLA+LE+ARPPSshDTSFDWsVVLPGVEAAs ...SPDAVEloGFDPACLDRLAcVMHLPSSsIPAALAEhSsDss.RssSPssTsWTVSQFaARHtGGsH.DQVpLGKIISLCQVIE-CCCpQNKTNRsTPEEVAAKID.YLRGATsLEECLAKLERVSPPSAADTSFDWNVVLPGVEAAN. 0 0 0 0 +14608 PF14757 NSP2-B_epitope Immunogenic region of nsp2 protein of arterivirus polyprotein Coggill P pcc Pfam-B_58 (release 26.0) Domain This domain is in a non-essential part of the nsp2 (non-structural protein) subunit section of the arterivirus polyprotein. This domain carries seven small sequence-regions that are predicted to be potential B-cell epitopes. 25.00 25.00 42.60 27.50 20.90 24.90 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.77 0.70 -4.92 22 1615 2012-04-10 16:54:06 2012-04-10 17:54:06 1 14 11 0 0 1542 0 119.20 49 22.05 NEW VKsYPRWTPPPPPPRVQPR+TKsVKSLPEsKPVPAPRRKVRSDCGSPlLMGDNVPsuhEDLsVGGPLshPTPSEPhTPhSEPsLsPu.Q+ls+PsTPLStsAPVPAPRRTVSRPhTPLSEPIFVSAPRHKFQQVEcAN.AusTLTpQDEPLDLSASSQTEYEAsPLAP.QNhGlLEsGGQEAEEVLSEISDlLNDhNPAPVSSSSSLSSV+ITRPKYSAQAIIDSGGPCSGHLQ+EKEACLSIMREACDAoKLuDPATQEWLSRMWDRVDML ................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +14609 PF14758 NSP2_assoc Non-essential region of nsp2 of arterivirus polyprotein Coggill P pcc Pfam-B_6704 (release 26.0) Domain This non-essential region of the nsp2 subunit of the arterivirus polyprotein of such as porcine arterivirus PRRSV and equine arterivirus EAV may offer immunogneic surfaces to B-cells. It is associated with Peptidase_C33, Pfam:PF05412. 25.00 25.00 29.10 145.00 21.80 19.00 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.45 0.71 -4.19 18 61 2012-04-10 17:03:01 2012-04-10 18:03:01 1 4 4 0 0 61 0 167.70 66 15.20 NEW KtFEtsAsEEVQEuGHKAVHSALLAEGPNsEQVQVVAGEQLcLGGCGLAlGsAp.................SssDSMKENMhNShEDEPLDLSpPAPAuTTTLV+EQTPDNPGSDAGALPVTVRcFVPTGPhLRHVEHCGTESGDSSSPLDLSsAQTLDQPLNLSLAAWPVKATASDPGWVHGRREPVFVKPRcAFSDGDSsLQF ................................EEVQESGaKsVHSA.hAcGPNcEQVQVVsGEQLKLGGCsLsVGNA+tss.sSuu.h...................shtsEPLDLSpPAsAATTT..tEpTP-NPGsDAGALPVTsRcFVssGshL+HVEHCGTESGDuSSPLDLSDAQs.DQPLsLSLssWPV+sTASDPGWVhGtpEsVFlKPRtshSDG-SshQh 0 0 0 0 +14610 PF14759 Reductase_C Reductase C-terminal Eberhardt R re3 CATH:2gqw_A_03 Domain This domain occurs at the C-terminus of various reductase enzymes, including putidaredoxin reductase, ferredoxin reductase, 3-phenylpropionate/cinnamic acid dioxygenase ferredoxin--NAD(+) reductase component, benzene 1,2-dioxygenase system ferredoxin--NAD(+) reductase subunit, rhodocoxin reductase, biphenyl dioxygenase system ferredoxin--NAD(+) reductase component, rubredoxin-NAD(+) reductase and toluene 1,2-dioxygenase system ferredoxin--NAD(+) reductase component. In putidaredoxin reductase this domain is involved in dimerisation [1]. In the FAD-containing NADH-ferredoxin reductase (BphA4) it is responsible for interaction with the Rieske-type [2Fe-2S] ferredoxin (BphA3) [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.77 0.72 -3.69 153 1813 2012-04-11 08:06:00 2012-04-11 09:06:00 1 26 1065 20 581 1646 395 82.00 28 19.73 NEW WFWSDQa-h+LQhAGl......ssutDp...sVlRGs.ss.t...t..sFol.a.....ah.+supLlAlculNp....sp-ahhu++Llsput.ssssstLA...DsussLKsll ..........aFWSDQYs.hplQhsGh.......s..p..u.h..Dc......h.l.lRGs..ss...p..........p.....phhs.a........ah..psu.....pllussulNp.......s.c.p.h....t.hs.+.chltsut..sh..s..s...t...lh.c.t..L................................. 0 153 355 470 +14611 PF14760 Rnk_N Rnk N-terminus Eberhardt R re3 CATH:2pn0_A_01 Domain This domain occurs at the N-terminus of Rnk, an RNA polymerase-interacting protein of the GreA/GreB family (Pfam:PF01272). It has a coiled coil structure [1]. 23.00 23.00 23.00 23.10 22.90 22.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.40 0.72 -7.89 0.72 -3.88 110 932 2012-04-11 13:03:29 2012-04-11 14:03:29 1 4 909 6 162 424 20 41.70 54 29.57 NEW pPsIhlophDhcRL-pLl-.....shstps.sstptLpsEL-RAclV ....+PoIIIs-LDsERl-tLLE......psAhushPlA-ALsAELDRAph.... 0 32 73 117 +14612 PF14761 HPS3_N Hermansky-Pudlak syndrome 3 Coggill P pcc Jackhmmer:Q969F9 Domain This domain is at the N-terminus of these vertebrate proteins. This region carries the clathrin-binding motif LLDFE at residues 172-176 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 [1]. 25.00 25.00 26.30 25.30 23.80 24.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.64 0.70 -4.94 10 92 2012-04-11 15:52:16 2012-04-11 16:52:16 1 4 66 0 50 89 0 187.50 42 18.61 NEW RlhssHsFsSQcVssscp.EPsthCsu...GtDtLFlu..suuCpVEVasls.pppspshssFuTlupVlplsYScsGDYlVTIEcKspso.............alRsYlNWcsp..pscsstVslRhsGhphpssps-ss.pcQhEllElPL.scsPhCIuCCsloGsLLVGssspLllF.pLKspsl....scchphlDF-cpLI.hhsuasPscluhCssYIAlho-LEVhllKLsp ................plhshHsFtuQpls.sp...EPt.hCsu...G.-tLFl...suuCc..VEsaslt...pEhsp.+ssFuTl.GcVl.plsYo-u.GDYLlslEcKspso............................FlRsYsN.WRpt...................psppshVsl..Rhh.....G....hpsshspsh...cpphpllEhPL.scsPhsluCCsspGsLLVusps+llLapLphphh.........spchuhlDFE.pplhhhh.shsPhcluhCssalAlho-hElllhKLp.s................ 0 11 15 30 +14613 PF14762 HPS3_Mid Hermansky-Pudlak syndrome 3, middle region Coggill P pcc Jackhmmer:Q969F9 Domain This domain is downstream of the N-terminus of these vertebrate proteins. This region carries a number of tyrosine sorting motifs and one of two di-leucine sorting boxes at residues 542-548 well as a peroxisomal matrix targetting motif at residues 614-623 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 [1]. 25.00 25.00 34.40 34.00 17.70 24.20 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.10 0.70 -5.86 9 100 2012-04-11 16:43:41 2012-04-11 17:43:41 1 6 68 0 54 97 0 358.40 41 36.33 NEW ptpPhElLG.csp.ssls...lslc.oTslss-hppthpV.............ppLLaRRFsPchpshhhs-ph..+LHSLQLhPhYpps...................thtss.ppsspppcLhulhCFFShPppGYLYslspu......ssLlS...sYtYspcsppsVLsspFLHsITpsuLpsaTlRsSssss.cpsshlDshhcsCPshohcVChltlp.FIGLpslsphcsallLLops-spph.ph.......................ttsuWsLYhl.sssohhQLYp-hl-huppYcsspspohhHLLuEAHLLlRsAL..hchs.tcsscKpEL..............hpAapESCuhLGDhaSR.-.spchcLALPYY+MSGLphs-llpR..h.ht........shppuh.sRGhIaaLpHuL......hcchsEpLScphAspVlphFthu-PcpL ................................................................................t..Ehlup.sppsuhs...lhlE.sTuhts-tht.hpl.............ppLLa+RFs.PDhsph.s..s.--h......+LHSLQ..LhPIYQpu...........................................thpsctps.o.p..c+chlul..FCFFShPcsGYLY.lsps.......VcLhS...sYpYs-+spQAVLospFLHVITS.........ssLQsaTVRCSAssA+cc...DsY..hDTT...hKuCPPVSh-VCsLRlQlFIGL+slCph+sHllLLTKAss.Esh.cRpps.hphhph......................................t.tssp.t.sssspsuWsLYll.sshsshpLYc-hl-YupoY.+os..po..po..hhHLLuEAHLLlRuAL......h-ssphc...s....s.c+t...EL............................................................hcAF+-SCu+LGDpa....SR.........hs..ppp..cL..AlPYYKMSuLshs-V....lsR.hthshp...........st.pph....tcGLlaYlp+sL.........hcphspp...L...scphu..s..cllpha.ht-Ppp.............................. 0 16 19 34 +14614 PF14763 HPS3_C Hermansky-Pudlak syndrome 3, C-terminal Coggill P pcc Jackhmmer:Q969F9 Domain This domain is downstream of the mid domain family, Pfam:PF14762, of these vertebrate proteins. This region carries a number of tyrosine sorting motifs and the second of two di-leucine sorting boxes at residues 711-717 well as the ER membrane-retention signal KKPL at residues 1000-1003 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 [1]. 25.00 25.00 27.90 27.50 22.30 20.70 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.17 0.70 -5.46 4 71 2012-04-12 10:25:22 2012-04-12 11:25:22 1 4 47 0 35 70 0 308.80 58 33.88 NEW MKNlsPLpAlpYLRKL-s..hsSVLVTLTKAsMALKMGDLDMa+NEMpSHSEMhLlCGFlhEPRLLhpp+cGpllPTEhAlaLK-sQPGLLVASlluLpcNsKIslEEAD.FFKsLCsK..DEDsVPQLLVDFWEAhLVAC.P-sVLpELhFKLTSQYVWRlSK+phP-ThPL+TsEDLINoCSHaGLI.PWVshlMSs-ShhsKshsEDl.KLQSllsGPShDltshLPaLEsLu-ssNsGLolH.lLChTRLtpYEcsIDpLLc+hPEAVl.YApHELKE-spslWWpKLLPELCpRl+pstschplalSSLKETLSVVAsEL-LRDFLNlLPEDGTAAFFLPYLLaCS+KK.Ls .............MKNlsPLsAhpYLcKL-ssGhsSlLlTLTKAAhALKMGDLcha+sEMcpHuEMpLVhGFILEPRLLlQQ....+KG....QllPTELAhaLK-TQPGLLVASlLGLQKNsKIulEEADuFFKV....L.CuK..DEDslPQ......LLVDFWEApLVAslP-lVLQELhFKLsSQYlWRLSc.........+....ps....P.DT....hPLRTuEDLINuCSHYGLlhPWVplLhSs-ShsDKsasEDL.K.LQSLlCGPShDlASIlPFLEPLS.ED.ThA..G.LSlH.lLCpTRLpEYEpsIDpLL-RCPEAVIsYANHELKE-.scsLWWK..KLLPELCpRl.+...s.....G...GE+.p...LaL...SuLKE..TLSllAsEL-L+DFlNlLPEDGTAAFFLPYLLaCSpKK.l................. 0 5 7 18 +14615 PF14764 SPG48 AP-5 complex subunit, vesicle trafficking Coggill P pcc Jackhmmer:O43299 Family This family would appear to be the second of the two larger subunits of the fifth Adaptor-Protein complex, AP-5. Adaptor protein (AP) complexes facilitate the trafficking of cargo from one membrane compartment of the cell to another by recruiting other proteins to particular types of vesicles. AP-5 is involved in trafficking proteins from endosomes towards other membranous compartments [2]. There are genetic links between AP-5 and hereditary spastic paraplegia, a group of human genetic disorders characterised by progressive spasticity in the lower limbs [1]. 25.00 25.00 26.50 25.30 21.30 22.00 hmmbuild -o /dev/null HMM SEED 460 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.59 0.70 -6.00 14 140 2012-04-12 10:55:25 2012-04-12 11:55:25 1 4 79 0 89 136 0 309.70 30 54.76 NEW -LQcACLlEuVtlLchlC+p....DsS..hlhRshPpl+pLasRls......ussu.......pu....psLLsIhQFFLsHGEs.sshDu-ushcphFspl.ucpFpcPhLAa-hlpFhhpNp.plLssp.sslhpp.FPsLLK...........hLAWsu.sLhscFVtlLPsLlsssTAlElLHslLDLPCLoAAL-hphRuus......su..tpslhs.p........................scssush-uhpsPhtcsLFpalLRsEuu..ssh.-R...Lss.LHplLtshussP...RVhQCupslPsLLclaFsslhcpAsssLlspLl.lLLERsstLa.lpsapt-V++VLSptlLtLsph+PsLlV-Lp+-lL-FlG.ospsh.pu+p-hasallWAlGEYhSsuhD+RCos-hhspaFEsLEs.lLaElopop.............ssushspsssRllssLMTsLsKLAoRsp-LlP.RV...uLhLoKhpo...psh.t.s.s-ps..sttlhpRAs-LlsLLKhPuVAthVLs..Posss ............................................hhpsl.hh..hsp.....ssp..hlh+shshhptlhtRh.......ss.s........s....hhlLslhpFaLsa.u-h..shhDs-s.ht.hhtt..h.s..a.s.hhs.thhthh..p....h........h...hP.lh+...........hlA.ps..h.t.h..lhP..hhsstohh.hh..lhDLPhlshsh...........................................................................................................s.ts.......hpsha..lL+scss...s.htp.........s.h.thht.h.t.s...............RhhtshphsP.lLphaFsssh.p.s...st................sLhs.tLh.hlhtR.s.ha....aphp.......lpphh.p.hh.hhphpPthlh...tp.l.t....t........tt.hh.plsWhlGEa.us..........t.h.thaEsLEh.hhac..................................................................phhh.lhsshsKlAsh..-h.s.+s....l..........................................................s..................................... 0 26 52 67 +14616 PF14765 PS-DH Polyketide synthase dehydratase Coggill P pcc Pfam-B_852 (release 26.0) Domain This is the dehydratase domain of polyketide synthases [1]. Structural analysis shows these DH domains are double hotdogs in which the active site contains a histidine from the N-terminal hotdog and an aspartate from the C-terminal hotdog. Studies have uncovered that a substrate tunnel formed between the DH domains may be essential for loading substrates and unloading products [2]. 27.00 27.00 27.10 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.91 0.70 -5.26 177 5569 2012-10-02 20:54:35 2012-04-12 14:52:00 1 1171 1046 21 2255 5792 232 275.10 19 12.09 NEW HPLL.G.tplths..sssphhapspLs.hps.PaLsDHtl.tGpsllPGsualEhAhpAuppht.....tt........shtlc..-lslppsll.ls.pss....shplplslpss.stst..............phplhSpsssst...............WshHspGplt.......................sh..h..tpt..s.s...ls...stthYpph...t.phG.ltY.GPsFps.lc.plhp.............s..p.shApltls....pshtttt.........ahL.HPulLDuslp.sh..................tt.ttshLPh.ulsplplh........t.....t.hhscsch.....tsts......htsclplhD.ssGpslsplcuLph+tlsstshts ...............................................................................................H.ll....t....s.....t.tth...h..hp.s.p.l.s..hp...p..h..s..a...L.........s.D.H.t...........l.....t...........G...........p............s...........l..hPu...sual-...hAl.p....Au..p..pht..............................................................shplc....-lsh.t.p.sLh...ls.......t............s..........s........s...............hplp..l....s.....lpss...tttt............................p.h..plhopss..sst...........................................hshHupGtlt...htttt................................h......s.................h....t......t.........h...............................t............t....................s......................................ls........................ssp...h...Y..pth............t...phG...h....p.Y..Gs....s..F...pu..lp..phhtt.........................ss..p..shAclt..ls....................psht.t.t.tt....................................ahl...HP...u..........lL......D.ushp..sh....hhhh...............................ttsts..tshlP......h....uh.pplplh..................................ssth.hsh.sph...............................stts....................h.t.s.s...l.p.l..........h.......D....t........s......G.......p........s...l...h...plpulthp.ht.....t........................................................................................................................ 0 518 1299 1894 +14617 PF14766 RPA_interact_N Replication protein A interacting N-terminal Eberhardt R re3 Jackhmmer:Q86UA6 Family This family of proteins represents the N-terminal domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. The N-terminal domain is responsible for interaction with importin beta [1-2]. 24.00 24.00 25.30 24.50 23.60 22.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.87 0.72 -4.49 25 123 2012-04-12 14:36:35 2012-04-12 15:36:35 1 4 80 0 63 129 0 41.10 45 19.07 NEW ss++s.hKt.psPsWK-plRccChcRlRcpRscLLp+hRtss .............+RshaKh...soP....sWKEsaRpcCl-RhRssRs+LLs+aRps.... 0 20 32 47 +14618 PF14767 RPA_interact_M Replication protein A interacting middle Eberhardt R re3 Jackhmmer:Q86UA6 Family This family of proteins represents the middle domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. This domain is responsible for interaction with RPA [1-2]. 25.00 25.00 25.80 26.00 24.10 23.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.86 0.72 -3.23 27 109 2012-04-12 14:37:26 2012-04-12 15:37:26 1 3 65 0 48 112 0 74.40 36 36.46 NEW p.hlp-lhp-EhppLppsspsh..................hh..psh.phh......pEh--.Lt.h-.....-lppEhhpp.EhphhtphEpthphE-phLsthlp ............h..lVQEVMEEEWpsLpssps.............................hscshst....pEh.-.LusLE.....EIpQELlpp..Etsl...lp.EaE.cshph--phLs.hl........ 0 8 18 31 +14619 PF14768 RPA_interact_C Replication protein A interacting C-terminal Eberhardt R re3 Jackhmmer:Q86UA6 Family This family of proteins represents the C-terminal domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. The C-terminal domain is a putative zinc finger [1-2]. 25.00 25.00 25.70 25.70 24.50 23.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.40 0.72 -3.58 50 138 2012-04-12 14:38:09 2012-04-12 15:38:09 1 4 109 0 85 130 0 80.90 31 35.91 NEW lCPVCppspLpt...spph...lhC.......sC.G.lplspp.............thshc.pLpshLppslspHtpp..C..spsPpFslp..ssss.t....sLhhpCpsCcahpll ......lCPVCpptsLph......sssh......lhC........pC.G.Lplssp................................ttplohp.pLctpLppslscHttp..C..spsPpFslp...........sssp.p....sLhhpCtsCchhsll................... 0 27 41 66 +14620 PF14769 CLAMP Flagellar C1a complex subunit C1a-32 Coggill P pcc Jackhmmer:Q6P047, Pfam-B_2704 (release26.0) Family This family represents one small subunit, C1a-32, of the C1a projection (the seventh projection of flagellar) [1]. Numerous studies have indicated that each of the seven projections associated with the central pair of microtubules in flagellar plays a distinct role in regulating eukaryotic ciliary/flagellar motility. The C1a projection is a complex of proteins including PF6, C1a-86, C1a-34, C1a-32, C1a-18, and calmodulin. C1a projection is involved in modulating flagellar beat frequency and this is mediated via the C1a-34, C1a-32, and C1a-18 sub-complex by modulating the activity of both the inner and outer dynein arms [2]. 22.70 22.70 22.70 22.80 22.20 20.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.32 0.72 -3.86 50 207 2012-04-12 14:49:21 2012-04-12 15:49:21 1 5 87 0 139 195 0 94.30 27 33.37 NEW hpslhFspp.psaohppsSshhs..lhpplhp..sht...pthshtcshpha+pll.hppulp.+Pshs...htlFohppl+tlh..-ahhpo...aaRHY+LYpasFosphphshpt ......................................................pshhasht.psFoh.phothhs..lhpplhp.sh.........t..t.thslp-shphhppll.hpausp....cs.hs..............htlFshcpltslh..DYhhpo......aa+HaKLYcalFssppchpl..s......................... 0 59 70 89 +14621 PF14770 TMEM18 Transmembrane protein 18 Coggill P pcc Jackhmmer:Q96B42 Family The function of this family is not known, however it is predicted to be a three-pass membrane protein. 26.90 26.90 28.90 28.50 26.00 24.80 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.61 0.71 -4.58 43 161 2012-04-12 16:07:47 2012-04-12 17:07:47 1 4 128 0 104 149 2 117.70 37 69.08 NEW cshhsFhpulDWp.EPWlluLlsFH.llhllsslhoR+phsh..QhslFhlhlshVahuEplNchuupp...W..+pFupp..pYFDspGlFlSlVaSsPLLl.shllllshlhphsplhlclKptpL+c+t+p ...................s..h.shhhslDWp.EPWLhuLhsFH.llhllhslhoppphsh..Qlh.lFlhhlhhVYhuEhlNchuAtN.W.....+.FSpp..pYF...DspG.hFISlVaSsPLLlsshll............llphlhphsp...lMsclKptpl+c+t+.h............... 0 37 58 81 +14622 PF14771 DUF4476 Domain of unknown function (DUF4476) Coggill P pcc Jackhmmer:Q86XN7 Family \N 22.50 22.50 22.70 22.80 22.10 22.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.08 0.72 -3.95 42 221 2012-04-12 16:46:32 2012-04-12 17:46:32 1 12 125 0 88 194 12 93.60 26 26.52 NEW tspshsshphpplhphlpph.sF-s-+lphlphhtts....p....hosspssp.llphasFsss.+lchLchlhspIhDh...p.stptlhssFs....Fs.ss+p+spcll .........................h...hhs..phcphhphl+hh.sas-sphchlcshhpp.....hs...hosspssp.llshaoFsc-.+lpslclltspIlDt...p.Nhp.l.chhp....hs.SpKc+h+chl....................... 0 51 59 71 +14623 PF14772 NYD-SP28 Sperm tail Coggill P pcc Jackhmmer:Q96MC2 Family NYD-SP28 is expressed in a development-dependent manner, localised in spermatogenic cell cytoplams and human spermatozoa tail. It is post-translationally modified during sperm capacitation and ultimately contributes to the success of fertilisation [1]. 22.10 22.10 22.10 22.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.16 0.72 -3.94 28 241 2012-04-13 08:02:08 2012-04-13 09:02:08 1 6 113 0 152 239 2 101.00 27 17.72 NEW +VusDtREtpRRhcEppt+pphpp+LcpEstpshpchppIst+WsplhphphPp-LpcplppQ+ptCpcllppK-plIs-hpp-LcppD-cYlpsl+cQucDl- .......................t.thcE.pR.Rtc.pp..h.pp.hhcK....Lp.pEtcp.optphscI....spcWcphhcp..tpspELpcplpt.ppphpcllcpKcplIp..........p.L.pp........-Lcpt--pYspsl+pphcslp....................... 0 60 76 113 +14624 PF14773 VIGSSK Helicase-associated putative binding domain, C-terminal Coggill P pcc Jackhmmer:A4D997, Pfam-B_8865 (release 26.0) Family The function of this short, serine-rich C-terminal region is not known. However, as it is frequently found at the very C-terminus of P-loop containing nucleoside triphosphate hydrolases, it might possibly be a binding domain. 19.30 19.30 20.10 19.50 19.00 17.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -8.99 0.72 -4.06 20 116 2012-04-13 08:13:48 2012-04-13 09:13:48 1 6 103 0 91 108 0 58.80 41 6.30 NEW SQLAAhlp..........tpsptttppppsstsKpDPIQAIL.AuAGVEYTHENSEVIGoSKlEEpLSRRAE ...................................h.................ptths.t.+pDslpuIL...usuGVpYTHpNsEVIGSSKlEppLSRpAt...... 0 15 36 60 +14625 PF14774 FAM177 FAM177 family Eberhardt R re3 Jackhmmer:Q8N128 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 134 and 205 amino acids in length. 25.00 25.00 25.10 25.40 24.20 24.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.45 0.71 -4.20 23 150 2012-04-13 08:18:13 2012-04-13 09:18:13 1 4 89 0 91 148 2 107.10 36 57.19 NEW tssshppsE.......pcctps..hpsP+RllaFuDG.sMEEhSo-EE..-cp-..ttppsh.hss.lDsscLshGPalhapsh+lusssluuCDalGtplAohhGITss.KYQYtl-EY.Rhpcccpccpp-schs ...............................sp.......-.....t..t...hphP+RlI+FssG.sMEEYSo-E-...pt.p....pps..hss..lDs..scL...sWGPalhahhh+huosolusCDaLGE+lAshhGIosP.KYQYsl-EYhRhppccpccppcpp....................... 0 27 40 66 +14626 PF14775 NYD-SP28_assoc Sperm tail C-terminal domain Coggill P pcc Jackhmmer:Q96MC2 Domain NYD-SP28 is expressed in a development-dependent manner, localised in spermatogenic cell cytoplams and human spermatozoa tail. It is post-translationally modified during sperm capacitation and ultimately contributes to the success of fertilisation [1]. This short region is found at the very C-terminus of family members of family NYD-SP28, Pfam:PF14772. 27.00 27.00 27.60 27.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.85 0.72 -4.27 38 177 2012-04-13 09:00:41 2012-04-13 10:00:41 1 6 106 0 122 167 5 57.70 34 9.39 NEW caWpphupllspcphclWcsLpcuLp+YhclLpcRtpllp-s...ppLcpQNsEL+sLLpQYl ...........taWpths.pshstpp.clWcsLtpuLp+Yp..p..lLppRspLlpEs...psLcpQNpELcpLLpQYl.... 0 56 73 100 +14627 PF14776 UNC-79 Cation-channel complex subunit UNC-79 Coggill P pcc Jackhmmer:Q9P2D8 Family This family is a component of a cation-channel complex. 27.00 27.00 32.60 31.50 26.70 22.00 hmmbuild -o /dev/null HMM SEED 525 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.82 0.70 -6.12 7 116 2012-04-13 09:36:58 2012-04-13 10:36:58 1 3 72 0 67 108 0 431.60 53 21.57 NEW .p.DssRhuhYPNL-YpsLYsALs.LlDVsPLIQhG.psFGpAlLQCLuCLLPFL-+DlIDsLPYLsASoluVLPspLHp-IlNhLCaYILPFTI..TRpp-.ppEshssQSVouVIMhVhQYosNPuHHCQLLECLMsLKpsVhKDlLsVIAYGTusuRuSAAKLLFYYWPsFsPNlaDRKslhsKhs.shsPFsCQR-tCPNAGNAEAsKVCYDHsISIsausDoPPPLYLCIECANcIHREHss.hFhDILHPMQplShlCENKNCRSp-KpAlSICFSoECASaNGNHPIRYCpQCHsNRHNsRRGuD.....HlsHpuLssshphDuEhQsahVEulVSLL+EAc.hs.psp+-spp.pt.....................tsssssssDshohEERQLLGRYGlWLLVGhCTPs-sTPsElLGRLLuMLFHWFHsTAY.aD..sQs..ESolEKLKs-aVCsWLp-ls+sHacVFISCLLPHPsEYuRVGGHW-T.LsS+ToHLKEGLpRLlCLlPYEVIop-lW-hVMP+WhEAIsNDVPE+ELsELKI .....................................cstphs.aPsLpYtsLY.sls.LlDlhPhlphu...shupulh.sh....tslh.FL.pp...-.lppLPhhh..Sslu.shPs......LHpsIlphLsh.hLPhsI...op..+pp..s..stss..SsSShl.MhshQ.YosNPsaHCQ.LLECLMphKppVhKDlLhVIAYGsups+ssAsphLFaYWPshpPshh.pchhh..hphT....sasPhpCQ+.cC.NA.hN.t.AsK.hChD.olSlshu...DpPPPLY..LC.ECupcItt.............-Hsph..hh.....DlLhP.t.p..lShlCp.pKNCpSp.....s+pAlshCFSst.........Csuhp........GN+PlRYCppCHoN+HsschGus.................................a.sp.s..sshphssE...sphVEAVl.......SLL+EAc.hstppphEhscpcpht..p.....................hsssshs.s.....cspstc-p+lLupaGIWhLVuLCT.Ps-sTPsEsLuRLluMlFpWFHsTAYhhD.............DpV..GShlEKL.....KspaVsc...............WLKslCcl+acVhl.C.LLP+P.EaARVGGaW-p..sSpsopLKEGLsRllCLlPYsVIo....ppl............W-plMPcWhEAIps-VP-ppLpEh+.t............................. 0 23 27 48 +14628 PF14777 BBIP10 Cilia BBSome complex subunit 10 Coggill P pcc Jackhmmer:A8MTZ0, Pfam-B_35417 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. BBIP10 localises to the primary cilium, and is present exclusively in ciliated organisms. It is required for cytoplasmic microtubule polymerisation and acetylation, two functions not shared with any other BBSome subunits. BBIP10 physically interacts with HDAC6. BBSome-bound BBIP10 may therefore function to couple acetylation of axonemal microtubules and ciliary membrane growth [2]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. 26.00 26.00 26.70 26.20 22.80 21.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.27 0.72 -4.60 4 68 2012-04-13 10:27:55 2012-04-13 11:27:55 1 3 62 0 48 71 0 65.60 41 58.35 NEW MuEs..KsshRE....VLPKQG.L.hEDssshVLCKPKLlPLKSVTLEKLEKMQpEAQ-sVRpQE.ApKpp .............................t.......ll.Pcp..G.La.hE.-.h.hshVLCKPKLlPLKSlTLEKLEKM..p+cAp...cpl+ppc.spp..t................ 0 17 22 39 +14629 PF14778 ODR4-like Olfactory receptor 4-like Coggill P pcc Jackhmmer:Q5SWX8 Family In C.elegans, odr-4 and odr-8 are required for localising a subset of odorant GPCRs to the cilia of olfactory neurons [1]. Olfactory receptors (ORs) are synthesised in endoplasmic reticulum of the olfactory neurons, trafficked to the cell surface membrane and transported to the tip of the olfactory cilium, where they bind with odorants. Various accessory proteins are required for proper targetting of different ORs to the cell membrane. ODR-4 was the first accessory protein to be described. 24.50 24.50 24.60 29.60 22.90 24.00 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.36 0.70 -5.76 38 177 2012-04-13 12:22:49 2012-04-13 13:22:49 1 5 117 0 110 194 1 313.30 28 78.69 NEW GLllGp.tssp+..saVlplspTPpc-ssss.................................t..shpslDp-Wls-HAppVoRMLPGGhsVlGlal..ls.scsshcpss.hpthpplls.......................tsphhhhhspchspphhlalshss..p.phsC+shshts..ssuoh+PsDaKht..pt.spWhplpsshsl-hhlPlt..tspsst.shc......cplppslphhscplpsuhsl..lsGchhptp..l...........................................................psh.plplllP..............tpsspppstplpsssuslphpGslps+ualps.+solu-.AhpslKpDIl+SLpoRl-lhhDsLhpspsssss....t................................hppLPRRVhhsl.st..............sl.hsDYLFtsEsspcshtphp-lLshphsspslstshE ............................................GLllGp..sspc..shllhhstTP.p-pttt............................................t.p.tslDpcWhs-HApQ.VoRMLPGGh.VlGlal..hs...s.thh.pp..p...phhpplhh.......................tpphhshh..p..pphs-plhlals.ss..+...+hhC+shsh.s...spush+PsDaKap......th.spWhplcCshphshhlPls......tsss..shc......cphppsl.pth..s+.pl.psuhhl..lsGp...lhtpctsL....................................................................................s..tttpt..plpllh.h.......................tssppp.stplp.ssGslphpGslps+Aalps.+splp-.AhpslKcDllpol..tsRh-lhh-slh.sp..tpt....t.................................h.hP+Rlhhsh.ut..............slhhsDYhFtsEsspchhpphh-hLshphp.pth............................ 0 38 54 83 +14630 PF14779 BBS1 Ciliary BBSome complex subunit 1 Coggill P pcc Jackhmmer:Q32MM9 Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS1 predominantly localizes to the basal body and or transitional zone of ciliated cells. It has been found in a heptameric complex with BBS2, BBS5, BBS7, BBS8, and BBS9, termed the BBSome. Mutations in BBS1 can lead to retinal inadequacy [4]. 27.00 27.00 38.60 33.40 24.30 22.90 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.66 0.70 -5.41 14 145 2012-04-13 12:46:37 2012-04-13 13:46:37 1 7 104 0 91 136 1 216.40 41 43.55 NEW KWLcA..phDssusLaThSoClsLuDlpuDG-h+LlluDlG..s.tstph...KLKVa+GsplhoEpsLsDlPoulsoFhh-ppEPRhPsIAVAsGsslhlYKNh+PYaKFTlPuh-lssLEp-lW+pst.tsclss.sLcphL-s.L+s.hutt.pLos+S.caLpLc.c-htuFlppapsssltRposITsMsolKKsou-psulSCLVluTEsG-lalLDspAF....slLhp.......hslsSl........so..Gpas...l-aRlsVusRsGslh..hLRR ........WLpA...hh...-shAslpshSuClsLu..DlpGDG-h+LlluDhu.........t..p.....+LKVh+GstlhpEpsL.slPsuhssFh.h-p.p-.P...+.h.....P..slAlAuGsslalY+NL+PaaKFolPtl..ssh.Et-lWppht.ps.plss...shtp...hLcs.l+p...hst...L.ShpShchL.tl.c.........p-.....h.ttFl..pp...a...+s..pslt+pssI...Tshssl++s.s-csusSCLVlGTEstclhlLDspuF.slltp..................hpls..usPs....hltss..Gpap.l-aRlssusRsGplYhl+............................ 0 37 46 72 +14631 PF14780 DUF4477 Domain of unknown function (DUF4477) Coggill P pcc Jackhmmer:Q6NW34, Pfam-B_4074 (release 26.0) Family \N 24.90 24.90 25.10 25.70 24.60 24.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.85 0.71 -4.92 18 140 2012-04-13 15:23:06 2012-04-13 16:23:06 1 4 103 0 86 128 0 160.00 28 39.79 NEW WNchcLppPshsoh......psppshhlcslhtslschlppLpu....pph-pEuAlLsRllY+h+NpF+pp+uapulpplppsLpRLhphsLspslpslpshLPs......sspstssslPo+sslEalLVRlLGhsKLhhRlh-sCppAhphhspplp.saFhphsslhhuhluRla.lL.s+slhppsssLYscLhs .........................................................................hh..hpp.....ptLptEssll.ptllYpp+Nphtppp.ahtLppVcpsL++LpphsLpsslpsl........hplh..ss............................pst.th...lPo......ps......s.....h..-......h.lhh+lLGss+LlhRlh-sCpcshhhhsppLthp.Fh.hsllhhulluRlh.lL.hptlLtchh.lYp.L..t....................................... 0 21 37 62 +14632 PF14781 BBS2_N Ciliary BBSome complex subunit 2, N-terminal Coggill P pcc Pfam-B_5448 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia [5]. 26.30 26.30 27.80 32.80 24.70 23.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.62 0.71 -4.54 26 130 2012-04-13 16:13:47 2012-04-13 17:13:47 1 10 98 0 86 119 1 128.20 45 19.35 NEW l.slG+aDGppP..sLssAT.suGKVhlHsPappttts............ppsslshLNlNpploulsuGtLp....ssspDhLllGoposllAYDVcpNsDlFYK-lsDGlNulll.G...plus..ppPLsllGGNCulpGFDtcGsEhFWTVT.GD ...........................slG+aDGh+P..sLssAT.puGKVhlHsPHppsth.st..................s.cuclshLNINpslosLsAGhLps......p.shDsLllGTpTsLLAYDVhsNsDlFY+E................ls.DGsNul.llG................pl..Gs.....hss........PLsllGGNCulpGFDt-Gs-lFWTVTGD...................... 0 36 44 70 +14633 PF14782 BBS2_C Ciliary BBSome complex subunit 2, C-terminal Coggill P pcc Pfam-B_5884 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia [5]. 25.00 25.00 25.60 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.15 0.70 -5.91 25 156 2012-04-13 16:18:48 2012-04-13 17:18:48 1 11 100 0 98 140 0 345.70 40 60.16 NEW oGEVlaKDshsu..ulAulltuDYRhs.GpspLIssSlDG-VRGY.........sstpppssthpsssppctlccL.p+KQsLhhELcshEcshctt..............tssspsshIPssTplpsslpss.....ppspl-LsluTsN...-olI+usllFA..EGlFc.GEoaV....pssssosslclsLhss..+Dssl-l+lKshVGhp.sSspa+VFElohpLP+FsMYthspss.s................pPsuhVsFpls-..RhpRlshWlspsF..hlsptlp.......tssshclphhsLR...ssps.Lhlchssss.........plpIpoDch-lAGDllQuLssFl......slc.-lpspu-FPtphccLcplLp+Vs-hpulRt+Lou-hADpushlKslllRAEDARllsDhcsM++hYsELhslN+-LlspaphRssN+scLlssLKclNphIQ+Au+LRlGpsKspllusCRsAIKsNNlpuLhcII+ ..................................sGEVlaK-shsu..slAullpuDYRhs.Gp.pllssos-Gc.lRGY.............stt.psshhtts.ppphlccL.p+KQsLh...............hELpshEpp.t................tttsphshlsssTplpsshtsp.....ptsph-Ltlssss.......sslI+ulllFu...EulFt..GEohl................ss.pp.ss..plplslhss..KDssl-lclKsh....VGht.sus................papVFEloppLP+FsMYth.t.....................P.uhVphtls-..R.p+l.shWlppsF..llsptht.......pttshplpahslR..sst...lhlphp.ss......................plpl.sDshchsG-llQuhstah.......tlp.chps.spFP..hcchpphh.cVcphppl+t+Lou-hA-pushl+shllpAEDARlhtDhpsM+phYhpLhslNp-Llstapl..........RpsNappLhssLKtlNphIppAu+LR.lGpspspllstCRsAI+ssNhpsLhplhp.................................................... 0 45 54 82 +14634 PF14783 BBS2_Mid Ciliary BBSome complex subunit 2, middle region Coggill P pcc Pfam-B_5884 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia [5]. 27.00 27.00 27.00 27.20 26.90 25.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.34 0.72 -4.29 24 129 2012-04-13 16:52:41 2012-04-13 17:52:41 1 11 96 0 83 119 1 109.30 52 16.03 NEW sVsulslsDhssDGp..sELlVGS-Da-IRlFcs-.-lltEhsEsstlssLsslps.sp.....FuYuLtNGTVGVYcp...ppRhWRlKSKpp.ssultsaDls..u-G..-LIsGWusG+l-sR .......................pVpSLsLsDasuDGc.....pEL.....LVGSEDF-IRVFKcD.ElluEhoET-tlTuLs.shhs...u+...............FGYA.LuNGTVGVY...-+.......ssRhWRIK..............SKsc.shulpuFDls..uDGVsELITGWSNGKlDsR....................... 0 33 41 67 +14635 PF14784 ECIST_Cterm C-terminal domain of the ECSIT protein Godzik A adam Jackhammer:Q9BQ95:268-396 Domain This family represents the C-terminal domain of the evolutionarily conserved signaling intermediate in Toll pathway protein, an adapter protein of the Toll-like and IL-1 receptor signaling pathway, which is involved in the activation of NF-kappa-B via MAP3K1. This domain is missing in isoform 2. Fold recognition suggests that this domain may be distantly homologous to the pleckstrin homology domain 25.00 25.00 27.00 27.30 21.00 20.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.62 0.71 -4.18 23 98 2012-04-15 05:32:46 2012-04-15 06:32:46 1 4 81 0 63 105 0 119.00 40 31.59 NEW SspQpcLLpcpshscPlaVEGPaplWL+cpslsYalL+u-..s.....pshs..p..pp--.hD.....Dssslhhs.h.hhchphpp......p.t.stthoVHEQ-DGTIaA.hCsTGsuo+sSLLuWIRhLpc..sNPsLsplPVlF+Lp .......SP-QpthLucHsss+PlaVEGPFslWLRs+sl.YalLRAD......hP.p.....-cc.p-.hc.....-..slhhP.h.h.chchsR......psh.shphsl.c-..--GslFA.hChsG.spspsoLhpWIptLQc..sNPsLuplPVlFRL..................... 0 21 26 46 +14636 PF14785 MalF_P2 Maltose transport system permease protein MalF P2 domain Eberhardt R re3 CATH:2r6g_F_03 Family This is the second periplasmic domain (P2 domain) of the maltose transport system permease protein MalF [1-2]. 22.00 22.00 22.00 22.90 21.60 21.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.99 0.71 -4.37 57 692 2012-04-16 10:16:57 2012-04-16 11:16:57 1 2 684 8 63 308 12 159.70 61 31.20 NEW LoaERAQsVLhsRpaQu..GcsasFsLY..ssssp.apLtLpss-ttph.........hlStsFsl..stsss............pplsLs.sss..spGEtAsl+sIspsR..pALsslshhLPsGs.cLpMSuLRpFuuspPLYo............lpcDGpsLpNspoGpha+PNh-hGFYQslstpGpa.tu-plSPGFTVslG ..........................................................................................................LTFERAQpVLhDRSaQA..GKTYNFGLY...PuGDE.WQLALoD..G..E..TGKp..........YLSDAFpF..........GGE..............QKLpLKETsA...PpGERANLRlITQNR..pALSsITAlLPD....Gs...KVhMS..SLRQFSGTpPLYT............Ls.sDG.TLTNNQSGVKYRPNNpIGFYQSIsADGsW.G-EKLSPGYTVThG........ 0 11 23 46 +14637 PF14786 Death_2 DEATH_2; Tube Death domain Coggill P pcc CATH:1d2z_B_00, Pfam-B_14779 (release 26.0) Domain This Tube-Death domain has an insertion between helices 2 and 3, and a C-terminal tail compared with the Death domain of Pelle proteins in Drosophila. The two N-terminal Death domains of the serine/threonine kinase Pelle and the adaptor protein Tube interact to form a six-helix bundle fold arranged in an open-ended linear array with plastic interfaces mediating their interactions. This interaction leads to the nuclear translocation of the transcription factor Dorsal and activation of zygotic patterning genes during Drosophila embryogenesis, and is assisted by the significant and indispensable contacts in the heterodimer contributed by the insertion and C-terminal tail described above [1]. 25.00 25.00 27.50 32.70 22.80 24.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.84 0.71 -4.46 4 45 2012-10-01 21:41:45 2012-04-16 14:44:21 1 2 33 2 20 49 0 117.20 54 27.18 NEW YoRsTElR+Vp-.Dl.cLApIL--s..WRpLh.lIP++l.DVptsuGuhh..a..h.....hKYsupp.pplDctApRl..spupuphhl-EWtTSGKLsERPTlGhLLpLLV+uphapAADaVAlchLpEspPARPssGPAAhIsl- ...........ElR+l...th..pLsplL..p...st.W+plMshlPpt.......................hKYoupcl..I-psApRh.PcQStSQlMIDEWKTSGKLNERPTVGVLLQLLVpAELaSAADFVALchLNEspPsRPsDGPuA.ISL-....... 0 5 7 17 +14638 PF14787 zf-CCHC_5 GAG-polyprotein viral zinc-finger Coggill P pcc CATH:1cl4_A_00 Domain \N 27.00 27.00 27.00 27.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.84 0.72 -4.46 12 108 2012-10-03 11:39:54 2012-04-16 15:14:35 1 20 40 2 36 128 1 33.90 51 5.38 NEW ssslCPRCtKGhHWAs-C+S+hDhpGpPLss.cpps ....sPsLCPRCKKGpHWAs-C+SKhDhpGpPLss.ptp........... 0 16 19 23 +14639 PF14788 EF-hand_10 EF hand Coggill P pcc CATH:1djx_B_01 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.58 0.72 -4.30 12 227 2012-10-02 16:17:27 2012-04-16 16:04:36 1 18 55 20 99 189 0 50.60 47 6.86 NEW .KMSh+ElKphL+.lNlElcDpYAcpLFpcCD+SpoupLEupEIEcFY+hLT ..............KMSF+ElpslL+hlNl-hc-pYAhp.LF.....pcs..D+..S.p.o......spLEscElcpFY+hLT........ 0 12 24 51 +14640 PF14789 THDPS_M Tetrahydrodipicolinate N-succinyltransferase middle Eberhardt R re3 CATH:2rij_A_02 Domain This is the middle domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase [1]. 25.00 25.00 26.00 25.20 20.20 24.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.85 0.72 -4.01 68 856 2012-04-17 07:42:46 2012-04-17 08:42:46 1 5 812 32 164 575 309 40.80 44 11.94 NEW LsNVAW.TstGPhpl-tlcpsclphRh.pGph.lpVhuVDKFPR ....LsNVAW.TspGPhclstlcpschc...h.....Rh.pG.........t.........h...lsV.sVDKFPR........ 0 42 107 149 +14641 PF14790 THDPS_N Tetrahydrodipicolinate N-succinyltransferase N-terminal Eberhardt R re3 CATH:2rij_A_01 Domain This is the N-terminal domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase [1]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.92 0.71 -4.63 21 296 2012-04-17 07:43:41 2012-04-17 08:43:41 1 2 295 22 35 170 92 135.90 44 36.33 NEW -cFKshVc-lpup.pGYK-PlAFGIARVDhGQhsucKlLQAoYPllNWc.ENaGSAAlFlpAlpcsGlplDFosSEhVhslspcFlppALphFsPalsEAhu-..uHKNlQVlpsLppthccst......pscF+lVFlFEDssPcSVEulYLKLYALSLuKAsLRSlNLsGAFG .................................................................................pcFh.hlpphppp.stY+cPhuFGIARlDhu.h.ppKlLpAoasllNap.pNhGShAlhhpuh.pp......t...h..c.ptSEhV..lp.p.l..ALtha.pPalpE...p...uHpNIp....llh..lhct.hc-.s.................................ashVhLaE..DccP.SVEusYLKLhLLSp+KVsLRSlNLsGhFG..................... 0 10 27 34 +14642 PF14791 DNA_pol_B_thumb DNA polymerase beta thumb Eberhardt R re3 CATH:2van_A_03 Family The catalytic region of DNA polymerase beta is split into three domains. An N-terminal fingers domain, a central palm domain and a C-terminal thumb domain. This entry represents the thumb domain [1]. 25.00 25.00 25.20 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.85 0.72 -4.28 209 1268 2012-04-17 13:11:18 2012-04-17 14:11:18 1 39 923 222 528 1126 238 66.10 35 12.07 NEW ALhYFTGSctaNhplRphApcc.Gh+LsEaGlac.................sp..........tp..................h....l..ssp.....................oEc-laptLGLsalsPchR-s ..................................uLhaFTGS+paNhphRph.App+.uh+lsEaGlhp....................h..ss..........sp...........................................hl.php..............................................................oEcclactlGlsalsPp.Rc.................................... 0 177 300 418 +14643 PF14792 DNA_pol_B_palm DNA polymerase beta palm Eberhardt R re3 CATH:1bpd_A_03 Domain The catalytic region of DNA polymerase beta is split into three domains. An N-terminal fingers domain, a central palm domain and a C-terminal thumb domain. This entry represents the palm domain [1]. 25.00 25.00 25.00 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.76 0.71 -4.01 47 629 2012-10-02 22:47:23 2012-04-17 14:17:36 1 31 308 209 368 663 146 118.30 26 21.74 NEW RIPRcElptltshVp.ptspplsPs........hpshlsGSYRRGtsoSGDlDlLIT+scspst..................hp.........shLspllppLpp.......psFLscsL...........ut........us............sKahGlCpLss........................s...............thaRRIDlhllPt ..........................+hPRpEspth.tphlp.c.t.s.p.tlsss..........hh.sslsGSYRR.............G+.t.s.u.G.DlDlLlT+sctss........................................p..............tllppllppLcp........ps.alp.p.s..L........................................s.........t.....................................ta.hG.t.h.....................................................................................................hRRlDhhh........................................................................................................ 0 113 187 273 +14644 PF14793 DUF4478 Domain of unknown function (DUF4478) Eberhardt R re3 CATH:3bq9_A_01 Domain This domain is found in bacteria, and is approximately 110 amino acids in length. It is found in association with Pfam:PF03641 and Pfam:PF11892. 25.00 25.00 25.20 71.90 19.80 24.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.33 0.71 -4.48 51 869 2012-04-17 14:13:23 2012-04-17 15:13:23 1 3 864 10 131 454 72 111.60 69 24.98 NEW hpsplsPtGoh-lLSQhEVs+L.ppsususLYpLFRsCuLAVLNoGupoDsucplh-pYp-F-IpllpcERGlKLELhNsPtpAFVDGchIcGIpEHLFuVLRDIlYlssclpp ......Is+lSPh.GSMDhLSQLEVDhL.K+.TA.SSDLYQLFRNCSLAVLNSGShTDNSKELLsRaEsFDINVLRRERGVKLELlNPPE-AFVDG+IIRuLQspLFAVLRDILFVtuQIcs.... 0 22 52 94 +14645 PF14794 DUF4479 Domain of unknown function (DUF4479) Eberhardt R re3 CATH:3bu2_A_02 Family This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF01588. 25.00 25.00 27.20 27.10 24.20 20.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.12 0.72 -4.44 90 1013 2012-04-17 15:01:32 2012-04-17 16:01:32 1 4 989 4 85 461 0 72.60 40 36.05 NEW tsssstctshE++GsVs+Ihs.tcsspslGaNlFssSshl.plpu.sGpVpLoc-plspLNptLpcsGFsppLpsDh ..........tpsstuchsh-RKGsVsRlhp.c-sGpsVuaNIFclSs...hl.pIpp..pGpltLTDE.VsplNptlpcsGFsccLssD....... 0 18 42 65 +14646 PF14795 Leucyl-specific Leucine-tRNA synthetase-specific domain Coggill P pcc CATH:1h3n_A_03 Domain This short region is found only in leucyl-tRNA synthetases. It is flexibly linked to the enzyme-core by beta-ribbons structures [1] 25.00 25.00 38.20 83.10 18.90 16.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.66 0.72 -4.11 9 12 2012-04-17 15:26:50 2012-04-17 16:26:50 1 2 12 10 6 14 0 55.90 70 6.37 NEW .WTDaGPVEVEGspVR.LsEssRlRLEl.EutLSLE-V+KMGAELRsHEDGTlHhWKP WTDFGPVEVEGstVR.LPEPTRIRLElsputLSLE-V+KMGAELRPHEDGTLHLWKP 0 1 4 6 +14647 PF14796 AP3B1_C Clathrin-adaptor complex-3 beta-1 subunit C-terminal Coggill P pcc Pfam-B_195384 (release 26.0) Family This domain lies at the C-terminus of the clathrin-adaptor protein complex-3 beta-1 subunit. The AP-3 complex is associated with the Golgi region of the cell as well as with more peripheral structures. The AP-3 complex may be directly involved in trafficking to lysosomes or alternatively it may be involved in another pathway, but that mis-sorting in that pathway may indirectly lead to defects in pigment granules [2]. 27.00 27.00 27.40 27.50 25.70 26.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.77 0.71 -4.50 25 196 2012-04-17 16:02:19 2012-04-17 17:02:19 1 7 100 0 118 173 1 141.10 38 13.89 NEW s+ss.s.LLDL.DD.......sPsssP.....lloPSLsusLpshohosssssss............lsuPual...sh+spELLp.+loGcGLulpYRFoRpPplauspMVSlplpFoNpospclpsI+lup+.pLsuGMplpEFspIsp.LtPptShosslGIDFsDSTQ ................................................................s..tpph.LLDL.-Dat........s.ss.P......hlosSLhuDLpGLslosos.s.s.............................lhoP..shh...ssKppELLH.+h.s.GcGLulcYpFsR....p....Pph.......hs..spMVo.....lplphsNso-.p.pIcsI+lGpp.cL..ssGh.plpp...Fs.I-s.LtP.tt.o..hT.sshGIDFsDSTQ................................................ 0 25 42 75 +14648 PF14797 SEEEED Serine-rich region of AP3B1, clathrin-adaptor complex Coggill P pcc Pfam-B_195384 (release 26.0) Family This short low-complexity, highly serine-rich region lies on clathrin-adaptor complex 3 beta-1 subunit proteins, between family Adaptin_N, Pfam:PF01602 and a C-terminal domain, AP3B1_C,Pfam:PF14796. 23.00 23.00 25.70 24.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.39 0.71 -4.00 16 46 2012-04-17 16:15:25 2012-04-17 17:15:25 1 3 27 0 20 41 0 130.00 69 12.64 NEW sssKKFYSESEEEEDSSDSSSDSESESGSESGEQ...sEEGDSoEDSSEDSSSEQDSESGSESEsEsKRsAKRNSKoKGKSDSEDtEKENEKSKTSDSSssESSSlE-SSS-SESEStSESESESR+VTpEKE ...NsuKKFYSESE.....E....E..EDSSDSSSD..........S................E............SE..SGSE.SGEp...sEEGDSSEDS...SEDSSSEp.-SESGpESthEsKRsAKRNSKsKGKSDSEDGEK.E.NEKSK...TSDSSsuESSSlE-SSSDSESESESESESE...SRKVTKEKE..... 0 1 1 4 +14649 PF14798 Ca_hom_mod Calcium homeostasis modulator Eberhardt R re3 Jackhmmer:Q86XJ0 Family This family of proteins control cytosolic calcium concentration. They are transmembrane proteins which may be pore-forming ion channels [1]. 25.00 25.00 30.70 27.20 22.10 21.50 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.78 0.70 -5.42 30 292 2012-04-18 15:03:15 2012-04-18 16:03:15 1 2 49 0 174 237 0 231.90 33 77.89 NEW M-pa+hlhpahpspcsolhsullulLTluupclFShhsFpCPCssshNhhYGlshLhlPAlsLhllGhhlNspo......W+lssG.............+.hpptppssh.hhlhssIhtpAhlAPlsWlsVuLLsGpaY.CAhSs.ssssth....t.hss...psscstchLu+lP.Ctc.hss.....pcplhphL+spSQlLGWhLlsllslluhl.spClppChS.loaLQhcaWppYhcpEcc....lF-ppsppHAcphAccNl+pFF-shpsp. ......................-tFphlhpah...spptslh.ulhulhsluutplaSshsFpCP.C.sshN.hhYGhshLhsPslsLhllGhhlNsps......WchhsG.......................ppt.p..ssth..hhhhsplhtp..AhlAPlsWlsVsL.LsGp.....h.a..CAhSt..ssspth.......hsp....hp....sspstchLs+hP.Ctc..h..s.......p.ctl...hthL+spSQhlGWhllhlsslhshl.spslppChS...l.oaLQtpaWppYhppEcc....lF-psspcHActhAcpslcpFFtthp..p............................................. 0 21 35 73 +14650 PF14799 FAM195 FAM195 family Eberhardt R re3 Jackhmmer:C9JLW8 Family \N 25.00 25.00 26.30 25.60 23.40 23.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.48 0.72 -4.15 32 165 2012-04-19 08:00:36 2012-04-19 09:00:36 1 2 77 0 90 155 0 98.80 36 68.81 NEW .pssssP+sVFpp.ss+ph.....tts.pptpp......pEshostHEE.l+aIp-uWppV.pp.tts.....................................sss......tstpusshYhpcsPsssLpsFpPhDLEpWWucRhhsNIsp ...........................................................h.pssss+hVap....Nu+Rt......ss.s.s.pss.....pEsaTssHEENVRFlhE.........AWppVppphpst................................ss......tpssts.pYh.EcsPsPpLpsFpPhDL--aWupphhssIt........................................ 0 16 22 55 +14651 PF14800 DUF4481 Domain of unknown function (DUF4481) Eberhardt R re3 Jackhmmer:Q5VZI3 Family \N 25.00 25.00 26.70 37.70 24.80 24.80 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.93 0.70 -5.51 5 86 2012-04-19 09:33:35 2012-04-19 10:33:35 1 3 66 0 51 93 0 236.00 39 63.81 NEW WRNGQsVhuVPTSShhoP.SFDLSlCRsLLEocGFQIPAu-FEsPLElALDcPSVRRYLlFNSplF+FIMAPIlYlVlWCAlYSTLHLY...SlucYWlLCLsVSLVSIhLTssIlLIlcYSNKEINMNTDVRLluVNERLlRH+LLLGVADWVcsCpGsLQLFhVYWDlu+CL+uLTEoL-EM+FupDpuQphL+KRMSHLlLVsEVsoh-PsA..u....sEG.S-..EEpPLLsusEE.suE.polSQR-DoKLTcNaSLVP-shLsspshApQLLlsYGAlYVRLLVSs+L.sssppPpssu+NHCsuu.slChCQYIc .............................................h..NGpV.lss.L..hs.s....hhsPhtFc.ths.EpL.s.Glp....lss-pYhshhEohl.-..hhRahlYN.p.hRlhh..s..hlh..a..lVlhu.laS...s.......Qhh....uLushhs.uhLhlshA.Al.LshhlhLhhp+tpcKhNhNh-hpLAtsNttLhRH+lLLGlsDpschspphlpLhFlYFDhppCVpaLs-alp......chcpst.-..sh....hcp+Ls..................................................................................................................................................................................... 0 11 15 31 +14652 PF14801 GCD14_N tRNA methyltransferase complex GCD14 subunit N-term Coggill P pcc CATH:1i9g_A_01 Domain This is the N-terminal domain of GCD14, itself a subunit of the tRNA methyltransferase complex that is required for 1-methyladenosine modification and maturation of initiator methionyl-tRNA [1]. The exact function of the N-terminus is not known but it is necessary for maintaining the overall folding and for full enzymatic activity. 21.90 21.90 21.90 22.50 20.70 21.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.70 0.72 -4.39 11 435 2012-04-19 10:01:56 2012-04-19 11:01:56 1 2 432 1 112 324 79 53.60 53 14.52 NEW .RRGsLpAGEKVQFTDRKGKKITDQLVsGGVTQTEHGlILHDDVIG+oEGsVlT .....t..pGPFpsG-RVQLTDsKGR+aTlsLpsGupFHTH+GultHD-lIGts-GoVV.... 0 33 82 104 +14653 PF14802 TMEM192 TMEM192 family Eberhardt R re3 Jackhmmer:Q8IY95 Family The function of this family of transmembrane proteins is unknown. In vertebrates, proteins in this family are located in the lysosomal membrane and late endosome [1-2]. In Arabidopsis, a member of this family has been found to weakly interact with FRIGIDA, a determinant of flowering time [3]. 25.00 25.00 25.30 25.00 24.80 23.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.48 0.70 -5.51 19 137 2012-04-19 10:17:44 2012-04-19 11:17:44 1 2 88 0 80 139 0 206.00 32 76.71 NEW D.ss.hhsssLtSt..cspFcslsTVhhssl.llls...lslslsuhlhshhhsspptcC........csahlllYh+sshWllshlh-phs+p+HpplRhpGYhcFYRpTpph++lPLhlhShsNssLLhltslhpphh...............tpsh.t.t.....Loshhaltlls.lEhllhlsshlhYhs+Vh+FN+t+s.PDlhc--ph...h..s.phssphGh.pcusshEEllEKQADlIcYL+cHNspLu++lhpLs ...........................................................h.t..h.s...p.pFp.l.Th.hhslhhhlp...........shlshhshhhs.h.....tp.pC................ps.hlhlhh+shLWllphlh-palph.pHp+lR.pGYhphacpT+pl+plPLhlhShGNsslLllhsh.tph................t.sh..........L...hhlslhs.lEhlsshhshhhYhs+..lp+.FNpt+spPDllcppt..........s.phhsphGh...+pssshc-llEKQuDhIpYL+cHsthLsc+lhth.......................................................................................... 0 16 28 57 +14654 PF14803 Nudix_N_2 Nudix N-terminal Eberhardt R re3 CATH:3cng_A_01 Domain Ths domain occurs at the N-terminus of several Nudix (Nucleoside Diphosphate linked to X) hydrolases. 25.60 25.60 25.70 25.80 25.50 25.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.89 0.72 -4.26 89 442 2012-04-19 11:54:43 2012-04-19 12:54:43 1 3 377 4 170 392 347 33.50 47 17.74 NEW +aCspCGss.l.ph+lP.tGDs+.RhVCssCstIHYp ......+FCstCGps.l..tt+IP..tGD..sR.RhVC..s..s.CusIHYp... 0 43 94 138 +14655 PF14804 Jag_N Jag N-terminus Eberhardt R re3 CATH:3gku_C_01 Domain This domain is found at the N-terminus of proteins containing Pfam:PF13083 and Pfam:PF01424, including the jag proteins. 25.00 25.00 25.40 25.10 24.80 24.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.10 0.72 -3.95 270 1458 2012-04-19 14:54:28 2012-04-19 15:54:28 1 7 1341 3 281 989 174 51.90 36 17.81 NEW chpG+Tl-EAlppAhpcL.sls.c-cl.-l..-Vlpcs.spGhhG..lGpKsAhlclphc ......hpGpTlEEAlppuLppL.sl....s.+pcl...cl..cVlpc..s.+KGFLG..hG.+KsAhlclp..................... 0 137 226 247 +14656 PF14805 THDPS_N_2 Tetrahydrodipicolinate N-succinyltransferase N-terminal Eberhardt R re3 CATH:3gos_A_01 Domain This is the N-terminal domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase [1]. 25.00 25.00 25.20 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.31 0.72 -4.11 262 1625 2012-04-20 07:44:11 2012-04-20 08:44:11 1 6 1605 15 423 1040 1377 68.40 52 24.78 NEW spLpphIEsAaEs.Rspls..t....sssscl+cAVppslshLDsGplRVAE+.h...s......Gp............WhVNpWlKKAVLLuFRlp .....................pLpslIEsAFEc..RApIo..ss.....sscstsR-AVppsIshLDsGtLRVAEKh....D..............Gp................WsspQWLKKAVLLSFRlp......... 0 118 254 339 +14657 PF14806 Coatomer_b_Cpla Coatomer beta subunit appendage platform Coggill P pcc PF07781 Domain This family is found at the C-terminus of the coatamer beta subunit proteins (Beta-coat proteins). It is a platform domain on the appendage that carries a highly conserved tryptophan. 25.00 25.00 29.80 29.20 22.20 21.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.62 0.71 -4.44 30 371 2012-10-01 20:40:43 2012-04-20 16:04:06 1 9 303 0 267 372 4 123.70 54 14.51 NEW uDcssVlLNDIHIDIMDYIpPAp..CsDspFRsMWsEFEWENK.VsVpT.sls.sL+sYLcHlhcsTNMpCLT...P-uuLsG.-CsFLuANLYA+SlFGEDALANlSIEK....sscGpIsGalRIRSKTQGlALSLGD+Is ...................................s-pssVlLNDIHlDIMDYIpPAs..Co..-spFRp.MWsEFEWEN..K.VsVso.......sh...p..........sL+-aLpclhcuTNMpCLT.................P......c...t.........u........L........sG....c.............CtFluANLYA+Sl.FGEDALANlSIEK.............ts-u.l.sGal.RIRS.KoQGlALSLGD+l............................... 0 99 154 223 +14658 PF14807 AP4E_app_platf Adaptin AP4 complex epsilon appendage platform Coggill P pcc Pfam-B_21377 (release 26.0) Domain This domain is found at the C terminal of clathrin-adaptor epsilon subunit, and at the C-terminus of the appendage on the platform domain. 27.00 27.00 29.90 28.50 26.50 25.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.34 0.72 -3.80 12 75 2012-10-01 20:40:43 2012-04-23 09:50:09 1 2 55 0 53 74 0 102.90 41 9.81 NEW RPLploT--FGthWhSh.up-hcQslp..psstsslsshLpthpppL+LHsVplIGs....EsIhAspLLssss......CLlHs+lsus.slslhl+osspsLs-slltpCppshp ....RPLpIoT--FGchWlSh.us-sKQslp..spot..ssLs.ssLp.sLpp+LpLHlV-lI.Gs....EGllACpL.Lsohs......CLLHsRl.....pus..sluLWhRSssssLsDhLLhpCQ+sh................. 0 22 26 36 +14659 PF14808 TMEM164 TMEM164 family Eberhardt R re3 Jackhmmer:Q5U3C3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 214 and 330 amino acids in length. There are two conserved sequence motifs: LNPCH and DPF. 25.00 25.00 25.30 26.70 20.40 21.40 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.59 0.70 -5.18 10 165 2012-04-23 10:24:12 2012-04-23 11:24:12 1 5 104 0 108 146 1 193.00 37 75.81 NEW hhDWsYGGVDsSls.GNGGPECAsFLospQRllEollhhsLuhhtlhhulc+lt.................spsssttppcssu+plLLlhLslsFGlElGFKFAo+TVIYLLNPCHlsTslQIYLLAss.PS+psTslFRLpMahLNGAlLAlLFPllNTRlLPFEhEIYYIQHlLla.VVPlYLL+hGGsYssEPlsDapWulLuhGLhhhYHFslLQlLullTpVNLNNMLCPAlSDPFpG.aYRIaAssHQsLLshlpuKLhshlh ..........................................................................................................h........................hhs..p+hhE.hhh..h.hh.hhhsh..h......................................tp..c.hllh.....hhsh.hhuhphuaKhsp..t....ph..l....ahLpPCHlho..hhp.l.hlLhh.........p...hshhlF..pl..hthl.GshlAhl.FP.h...ssRhls.hEhthYalQHhhlh.llPlYLlh..sG.sYs...........Esht..shtWshl..u.........hulhhhYHFshLp.lul....h..............TtlNLNpMLCP....A..hpDPF.......G..YRlhA.hHQslhh.hhsKhhhh..s................................... 0 50 56 84 +14660 PF14809 TGT_C1 C1 domain of tRNA-guanine transglycosylase dimerisation Coggill P pcc CATH:1iq8_A_02 Domain This short region of the tRNA-guanine transglycosylase enzyme acts as the dimerisation domain of the whole protein [1]. 25.30 25.30 25.30 110.90 25.20 23.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.34 0.72 -4.16 5 13 2012-04-23 10:52:57 2012-04-23 11:52:57 1 1 13 8 11 15 0 70.00 73 12.05 NEW PITKKSAFFKISEESL+WPIVRRAKERAERVsuKFPEpVcHPIFGEIPKYLSLTYPFAQSEuEEDFoIEK PITKuSAFFKVSEEuL+WPIVpRAKERAERVppKFPEslpHPIFGEIPKYLSLoYPFAQSEGEEDFTIEK. 0 1 1 6 +14661 PF14810 TGT_C2 Patch-forming domain C2 of tRNA-guanine transglycosylase Coggill P pcc CATH:1iq8_A_03 Domain Domain C2 of tRNA-guanine transglycosylase is formed by a four-stranded anti-parallel beta-sheet lined with two alpha helices. It has conserved basic residues on the surface of the beta-sheets as does the C-terminal domain PUA, Pfam:PF01472. The catalytic domain, TGT has conserved basic residues on the outer surface of the N-terminal three-stranded beta sheet, which closes the barrel, and it is postulated that these basic residues from the three domains form a continuous, positively charged patch to which the tRNA binds [1]. 25.30 25.30 25.50 26.50 25.20 24.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.27 0.72 -4.13 62 193 2012-04-23 13:13:56 2012-04-23 14:13:56 1 5 144 8 131 194 27 71.10 29 20.27 NEW lpplRsIADYQFGtGAGcsLFs-p...hplphS.pTG+lRplhsssc.plATl+usDGhhoLultGAcRLpcshshPp .....................tl+slA-YQFGtGsu.c.tLhsct....hplphS..pTs+l.Rplh.....t.....s.........sc..pls.olRupDGhlsLoltGAchLpphh..P.......... 0 31 81 109 +14662 PF14811 TPD Protein of unknown function TPD sequence-motif Coggill P pcc Jackhmmer:Q9Y2V0 Family This is a family of eukaryotic proteins of unknown function. A few members have an associated zinc-finger domain. All members carry a highly conserved TPD sequence-motif. 27.00 27.00 29.60 30.20 21.50 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.81 0.71 -4.65 33 149 2012-04-23 14:17:28 2012-04-23 15:17:28 1 2 114 0 101 150 64 132.30 41 47.79 NEW ls-pcLtt-ltpsl..hsDp.huPls-ph+.......cshGpEaEhhLcchLcptslsFhsEcc.LRtpGasK........TPDl+LplPlslcGp.....lVsWIESKA.FGD...pcsH..pphhccQh.uYhNRFGP.GhVIYWaGal-.....pl..sp..t.........lll.........hD.pFP ....................................D.hLutpl.pCl..hsD.stauPlsDph+.......+shGpEaEhhLcchLhptsl...sFhsEcp.LRtcGYDK.........TPDhhLp...lPl......ul..cGp.........................llpWIESKAsFGD.......cpoH....psa.lccQahSYhNRFGP.GhVIYWaGalp-l..st..p...tlhlhs.t.................... 0 43 56 78 +14663 PF14812 PBP1_TM Transmembrane domain of transglycosylase PBP1 at N-terminal Coggill P pcc CATH:3fwl_A_01, Pfam-B_367 (release 26.0) Family This is the N-terminal, transmembrane, domain of the transglycosylases ()penicillin-binding proteins), the multi-domain membrane proteins essential for cell wall synthesis that are targeted by penicillin antibiotics. The TM domain is a single helix, several of whose residues lie in close proximity to hydrophobic residues in the TGT domain. The TM helix seems to be necessary for stabilizing the protein-membrane interaction, and the resulting orientation limits the interaction between PBPb1 and lipid II in the membrane in a 2D lateral diffusion fashion [1]. 27.00 27.00 27.00 27.00 26.60 26.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.37 0.72 -3.67 26 562 2012-04-23 14:23:22 2012-04-23 15:23:22 1 8 557 2 46 248 3 78.00 74 9.53 NEW MS.sDDREPIGRKGKts..pss+ppss.+pRRRcDD.....................--..DDD..p..E.--csMsRKsKs......p.s+pKR.RW.LhLLlKLhlV.FsVl ............MA.GNDREPIGRKGKPo..RPVKQK..VSRRRhcDDD........................D...D..D.YDD....E..DEEPM.PRKGKGK........GR.KPRGKR..GW.LWL.LLK..LhIV.FAVL................ 0 3 11 28 +14664 PF14813 NADH_B2 NADH dehydrogenase 1 beta subcomplex subunit 2 Eberhardt R re3 Jackhmmer:O95178 Family This family represents an accessory subunit of the mitochondrial membrane respiratory chain NADH dehydrogenase (Complex I), that is believed not to be involved in catalysis [1-2]. 25.00 25.00 25.90 27.00 24.50 24.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.59 0.72 -4.31 12 105 2012-04-23 14:29:09 2012-04-23 15:29:09 1 2 86 0 61 100 1 69.70 46 65.04 NEW u.stusHl.shYRphPp.ls+tphhtuElluGhMWaWlLWHhWH-s-tlhGHFsYPDsSpWTDEELGIPPDDpE .................u.tusHl.spYRphPp..lT+p...phhtuEhh.SGhMWFWILW+hWHDs-t.VlG.HFPYPDP..SpWTDEELGIPsDDt.............. 0 16 21 42 +14665 PF14814 UB2H Bifunctional transglycosylase second domain Coggill P pcc CATH:3fwl_A_02 Domain UB2H is the second domain of the transglycosylases, or penicillin-binding proteins PBP1bs)), the multi-domain membrane proteins essential for cell wall synthesis that are targeted by penicillin antibiotics. The exact function of the UB2H domain is uncertain, but it may act as the binding component of PBP1b with different binding partners, or it may participate in the regulation between DNA repair and/or synthesis and cell wall formation during the bacterial cell cycle [1]. 27.00 27.00 27.00 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.38 0.72 -4.26 119 1054 2012-04-23 14:40:43 2012-04-23 15:40:43 1 9 1036 2 158 684 84 84.40 46 10.57 NEW LhsGtslotpplhpELchLsYRps.s.p...sp...psGpa......shpu....splplhpRsFsFsDG.t.Essp+lplsFss.splsplpshps.sp.shuhhRL .......LcPshslS+sEhlplL-uhpYRpV.o.p...hs...+PGEF......oVpu....so.IEhlRRPF-FPDu.c.Eup..hRsRLoFss.s+LusIhsh-s.sR.phGaFRL................................ 0 32 68 118 +14666 PF14815 NUDIX_4 NUDIX domain Coggill P pcc CATH:3fsp_A_03 Domain \N 35.00 35.00 35.00 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.66 0.71 -4.65 286 3169 2012-10-02 00:00:35 2012-04-23 16:31:00 1 22 3080 6 661 3685 1180 115.00 24 31.12 NEW shl.lhp.pssp..lL.Lc+RPspG.LhuGLaphPph.-....h..t..................tp.th.................t..t.h...tp...h.shp...sp..h.h.p.....hs.....shcHsFoH.acLcl..p..shh....spls.....pt....t...........s.sshhWhshpp.h.p.p..huLPsshcKllp ........................................hllhp.pp.sp...lLLpc.RPsp...G........L..h.uGLapF...Pt.h.c........t...................p........tppl.........................................................p..p.ht.....pp....h..s.h.s.......ss.........h......p......phs.......slpHs........F....S........H..h+l..cl....pshh..........spls.........st..tst...........................ssshhWhshpp..h.s..s....hulssshc+lh................................................................... 0 191 386 534 +14667 PF14816 FAM178 Family of unknown function, FAM178 Coggill P pcc Jackhmmer:Q8IXR5 Family \N 25.00 25.00 25.10 30.60 23.40 22.20 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.42 0.70 -5.75 7 112 2012-04-23 16:07:15 2012-04-23 17:07:15 1 2 42 0 51 106 0 318.10 44 44.42 NEW tuusphspspusoYsNoL-aLLQEKcp.th.pEhE+.Lhp-h.php..s.D...t-Es......sLs.EHRtllcRFSVohpuIPshHPGEsVF.......l.ppasLs..LcpSplpP+StlEpLhLpSssspQLohlppGLLoshYhpss.CPlslL+WLFQhhoh.P-..sSsthhuhLa-lolcuh.p.oDpssp...hWsPSLp-VstsFashGA+..uLaPLsshQ.sh..csLhuchphopscpQ..ssssphu.DhS......LtplhKFLsLCshspPsAYTDppLLhLItLLC+huL-spL+LLPpsDhQpLLl.LLcNIp-WssKlppLCpsLSplSsHHHNLLtLVQlhP-hoSRuRpLRppLSLVlIA+hLsppcph ...................................................................................................................t..pYhNsL-hLlpEhccp.....t..h..pEh...pc.L.p-h.ph...p.........p.s...t-p.............sL........EHR.hlc.+FSVo..l.psIPshHPGEplF.......l.pp.asLs..Lc..sSthhsp..SslE...pLhL.pSs.spQl.hhppGhLoshY.h.as...CPlPlLp....WLFphhoh.s-..sSsphhuhLh-l.olcs....p...s.Dpshh.....WhPSLp-lstlFhshGh..p..uL.aPLtshQ.sh..csL.....h.u-.sph...o.ttpp.....stspp.ht..hS............lhplh.KFLu.LCs....plpPp..uYpDp-lhhLI.hLh+h.uL-tpL.+.lP.hsDhQpLLl.L.hcNI..+-..WssKl.cLChulsplSsH.HNLLhLVQhhPshToRuR...QLRp.pLSLVlIu+hLsppc........................................ 0 7 11 19 +14668 PF14817 HAUS5 HAUS augmin-like complex subunit 5 Eberhardt R re3 Jackhmmer:O94927 Family This family includes HAUS augmin-like complex subunit 5. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2]. 25.00 25.00 25.30 25.30 24.50 24.50 hmmbuild -o /dev/null HMM SEED 632 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.22 0.70 -5.98 8 103 2012-04-24 09:49:48 2012-04-24 10:49:48 1 6 59 0 65 103 0 479.50 25 90.83 NEW p.ucELtRWAsEEMslPsuu..........tPs-sth++LClGpGAcIWtYllpHV+SpRoV+sIRGNLLWYuttsss...................................clp.........R.........+hELppplpcLRAElpcLDpplpthEpEssup-tuhppuh.......pphp-sp+RpLLLcAastpsc+pp+sLp-shp+LpsQhcpLQ-hpR+Acs-...lsFGsss...SAu...................suLEPtVLRDVRcACshRspFL....QsLLpspsptsSshssp........cDphusSaQpWh.....SuVEslLss.....HPPsplLuALptLsucpcsplcp.LsosD........shp.Ds.....Ehp+........sphsD...................pScsppsLPuhppLlQEuWppVspllsppuplpp..cpQsLsp+LpuhlcEsccc.hlsS.sppssh..............hhpL..cpsslhsslc.uL+spsppLpptsuc+pctl.......RpLQsppQpIh-aRpLs-c+QEQIRhLIK.GNSuuKocLsRsstEltphlpcKLlPshpsVsspSpcLpcslpcEs+HFsplsLu..................................sLh+ppssGhphlPssh.SIHRL.pst..h.......suuhtsLscoLulPh.h+APEtLLspAsoh+p-LlaLpcphuLpupuLhs..l..+suL.Pus..sTQtLLphtppp-Kcph-sLsPpL++LhppscpsLEts.plQullsDWWEQPuQhALsp.pppGLolpQWppR ..............................................................................................................................................tWh.pEhth...............s....t.h....pplChG.p.hh.lWtalhp+Vhppcslphh+tNl..hat..t.t.................................................tht......c..........+..htt.tltpLcs.ltp.ppp.lp.h..pht..tp-.thpp.h.......tphtp.pp+thhLpu.ttthptttp.htp..pplp..hpphpch.t+.tt..t......h.............t...............................hhEs.h.tslRthCp.h.t.h....ptl..s..t.ts.................th.htthh......p.sp.hhts.....hsP..lLtulthhs.c.tt.lpp.hpths........hht-h.....ch.p........................h.s.........................usp....hss..pLlpcth....ttlt.hhsptt.h.p...ptp.hppcL.thhct......hptt..h.t....t.......p...t.tsh...............hhth..chsshhuplp.sLhsps..ppLpphstppp.h........ppLpt+.pcI.cacthhpchpp.hphLl+.uN.sutshhppt.hts.thht...tpllP..t.l...spp.hphlpcEhtth.t...s..................................sl.ptpstt.....Pshh.SIp..pl..psh............................................s.shh.l.ptLths....pusE..h.tshsht..p.hh.hhpp.....t..l.p..h.....tst.....t...stp.hlph.tpp-p...cphh.tl+ph...hppshp..h..lpshltcWa-QPuthsls...h..tG.sh.tW...................................... 0 21 36 49 +14669 PF14818 DUF4482 Domain of unknown function (DUF4482) Eberhardt R re3 Jackhmmer:Q6ZU52 Family This family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF11365. 25.00 25.00 25.40 29.70 24.80 24.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.85 0.71 -3.53 13 155 2012-04-24 10:34:25 2012-04-24 11:34:25 1 3 38 0 83 137 1 130.10 34 10.45 NEW MDL+cQlcpoE+NWp+EKhELL-+FDsER+EWEsQhK.hQ+KIEcLp+EVchRRp.......uphhs.p+-pshpspshpusss.Ppsup....s-hpuhspcDshtpcpKcppslhupp....................s.hscp.phsccscsshhss.sh-.cK ...........................MDLppQlp.poE+NWs+EKlELL-RhDpERppWEpQhKE...Lpp+lc......Q............LpKtsssh......................sl+.t.hptp.p..c..st...h.pDt-shs.t-h.hsphKcscpsou..p....................s.hhcshpLsspst.s..t...h-.p...................................................................................................... 0 6 11 31 +14670 PF14819 QueF_N Nitrile reductase, 7-cyano-7-deazaguanine-reductase N-term Coggill P pcc CATH;3bp1_A_01 Family The QueF monomer is made up of two ferredoxin-like domains aligned together with their beta-sheets that have additional embellishments. This subunit is composed of a three-stranded beta-sheet and two alpha-helices. QueF reduces a nitrile bond to a primary amine. The two monomer units together create suitable substrate-binding pockets [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.42 0.72 -3.98 126 1208 2012-10-01 20:59:24 2012-04-24 16:18:52 1 5 1189 25 217 794 335 110.50 60 39.70 NEW Y.spYDPsLL.PlPRshsR-pLGlss..sLPF.p.GtDlWsuYELSWLNs+GKPpVAluchplPssSsNLIESKSFKLYLNSFNQT+Fso.hcpVppplpcDLSssAsusVsVpl ......................................YtcpYDsSLLQsVPRoLNR-sL....G.....Lpu....psLPF.p.GsDIWThY...EL.SWLN.u+GlPQVAlGclplshsSsNLIESKSFKLYLNSFNQTRFss.....h-p.....VcpTLp+DLSsCAtupVoVpL.............. 0 41 94 159 +14671 PF14820 SPRR2 Small proline-rich 2 Eberhardt R re3 Jackhmmer:Q96RM1 Family This family of small proteins is rich in proline, cysteine and glutamate. They contain a tandemly repeated nonamer, PKCPEPCPP [1]. They are components of the cornified envelope of keratinocytes [2]. 25.00 25.00 38.50 26.20 21.80 23.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.64 0.72 -4.10 4 108 2012-04-25 10:24:47 2012-04-25 11:24:47 1 3 16 0 43 102 0 59.60 78 98.41 NEW SYQQQQCKQPCQPPPVCPsPKCPEPCPPPKCPEPCPPPhC.PEsCPP..CQcKCPPV...PPCQpKCPP ........SYQQQQCKQPCQPP.PVCPsPKCPEPCPPPKCPEPCPP.PKC....PEP.....CP....P.....ppC.....QQKhPPVp..PPCQ.KhPP.. 0 6 6 6 +14672 PF14821 Thr_synth_N Threonine synthase N terminus Eberhardt R re3 CATH:1kl7_A_01 Domain This domain is found at the N-terminus of many threonine synthase enzymes [1]. 25.00 25.00 25.00 26.60 24.40 24.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.61 0.72 -3.96 302 3034 2012-04-25 12:31:21 2012-04-25 13:31:21 1 15 2909 4 759 2286 1521 77.50 33 16.58 NEW cYlSTRG.p....s...sslsFp-slLp.GL.AsDGGLalPc..phPp.lsts.plpphpsL....oYt-LAhclhphFl....s-....Isps-L+sllscuY ..................a.STRs..p........s.......pploFupAllp.GL.Ap.D.GGLahPp..s.lPp.hshs...l.c...ph...tsh..........sa.-hAhclLssFl....s-.....lsp..-pLcphlppAa................................. 0 214 441 611 +14673 PF14822 Vasohibin Vasohibin Eberhardt R re3 Jackhmmer:Q86V25 Family This family of proteins function as angiogenesis inhibitors in animals [1-2]. 24.00 24.00 42.30 25.10 19.60 19.00 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.45 0.70 -5.29 10 165 2012-04-25 13:13:52 2012-04-25 14:13:52 1 5 72 0 95 174 0 192.10 48 64.09 NEW GGVPFalNRuGhPls-tTW............ERMWpHVAKhHP.DGcchsp+IRsAsc...LPKlPl................PoVPsF.....psot..oVs-tLcAlQpYl+cLQYNHTGTQFFEIKKsRPLoGLMElAKEMs+ESLPIKCLEAVILGIYLTNuhssLERFPISFKTpFSGsaFRHIVLGla.huGRaGALGlSRRcDLMYKPLpaRTLS-LlhDacsuYccpaHsLcKVKIGtsVsH-PHSsEpI-WKHsVLslp+.hu+EDlRKELE+auRDMR ...........................................s............h....t.hW............ppMa.ashp.ps.pstp.httl+sss....Lsc.sh.................PplPsa.....p.sh..slsphLpslQpYhppLp...YNaTG.sQFFplcK.RPLsuLM-hAKEMh+EuLPIKCLEAVILul......Y............LTsu.ssl-RFPISFKo.F.uG.....sh..........F+HlVLGlh..h...s...G+...Y..GuLGh.SRRt-LM.KPhsa+oLS-Llh-act.uYpphhHslpKVKl.G..VsH-PHShp.IpWKp.lLshp+.hhtt-hc+pLE+asR-hR................................................................ 0 28 39 58 +14674 PF14823 Sirohm_synth_C Sirohaem biosynthesis protein C-terminal Eberhardt R re3 CATH:1kyq_A_03 Domain This domain is the C-terminus of a multifunctional enzyme which catalyses the biosynthesis of sirohaem. Both of the catalytic activities of this enzyme (precorrin-2 dehydrogenase EC:1.3.1.76) and sirohydrochlorin ferrochelatase (EC:4.99.1.4) are located in the N-terminal domain of this enzyme, Pfam:PF13241 [1]. 24.00 24.00 24.00 24.00 23.30 23.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.26 0.72 -4.67 15 193 2012-04-25 14:50:59 2012-04-25 15:50:59 1 7 134 3 147 186 0 66.60 29 18.74 NEW PcNhGsAl-sVGpLRp+LRc...lAPss..c-GsKRMcWMoplCEtaoL--Lsphs-pDh.-sLLsaahssplsohp ..........................thttAl.plG.LRttlRt.........hsss.........ppts.+..R.McW.................hsplC-hWsLcclspl..s-pDh..csLLphY.psp....p................ 0 37 78 123 +14675 PF14824 Sirohm_synth_M Sirohaem biosynthesis protein central Eberhardt R re3 CATH:1kyq_A_02 Domain This is the central domain of a multifunctional enzyme which catalyses the biosynthesis of sirohaem. Both of the catalytic activities of this enzyme (precorrin-2 dehydrogenase EC:1.3.1.76) and sirohydrochlorin ferrochelatase (EC:4.99.1.4) are located in the N-terminal domain of this enzyme, Pfam:PF13241 [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.97 0.72 -7.03 0.72 -4.96 20 356 2012-04-25 15:03:26 2012-04-25 16:03:26 1 10 344 3 153 298 5 29.90 43 11.74 NEW cs.usLQIMISTNGsuPRLuuLl+ccIcptl .........G.LpIhVSTsGtSP+LAphIRccItsph....... 0 50 98 134 +14676 PF14825 DUF4483 Domain of unknown function (DUF4483) Eberhardt R re3 Jackhmmer:Q6ZQR2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 326 amino acids in length. There is a single completely conserved residue N that may be functionally important. 24.00 24.00 24.70 24.40 21.90 22.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.35 0.71 -4.65 27 116 2012-04-26 08:48:36 2012-04-26 09:48:36 1 10 64 0 94 109 0 165.70 23 57.40 NEW shpsohtpNsLL.hKsclG+sppssasLPstc...asYGhhstt.pstGstEsh.pW.........ptptss.sppssp.........DFhphN+tAlppG.lsTA+.......-.htaRpp+s...l+hKs.t.....stptt.........ptssphssshs...aGhso+sso............PltpllptpYtppahpppttpptthppppp.........ttp.....lttT+AShh......++ht..spspp......aKMs+F.cpVss+lpo ............................................psclG+s+.psh..sLPs.s...asYGh..tt..stGs.-.........sh.pW.............................p.ht.p.st..th..........sahthN+tulptG.hhoup.................-.h.aRp.ps...l+hp.tp......tt.t...........................p.s.thssshs...aGh.s+sso..................shhpllptpYtp.ahppttt.t.ttht.pptpt.........................hhps+us.h..........................ppht...p.pt......a+h.+F.pph..th............................................. 0 65 74 81 +14677 PF14826 FACT-Spt16_Nlob FACT complex subunit SPT16 N-terminal lobe domain Coggill P pcc CATH:3biq_A_01 Domain The FACT or facilitator of chromatin transcription complex binds to and alters the properties of nucleosomes. This family represents the N-terminal lobe of the NTD, or N-terminal domain, and acts as a protein-protein interaction domain presumably with partners outside of the FACT complex [1]. Knockout of the whole NTD domain, 1-450 residues in UniProt:P32558, in yeast serves to tender the cells sensitive to DNA replication stress but is not lethal. The C-terminal half of NTD is structurally similar to aminopeptidases, and the most highly conserved surface residues line a cleft equivalent to the aminopeptidase substrate-binding site, family peptidase_M24, Pfam:PF00557 [1]. 27.00 27.00 27.50 35.30 23.20 26.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.01 0.71 -4.71 75 332 2012-04-26 15:17:54 2012-04-26 16:17:54 1 12 266 8 243 336 0 156.80 32 16.08 NEW IDtssFtcRLptLaspWcpsppt.....hausssulllssGt.ss-..-stYpKosALphWLLGY.EFPcTlhlFs..pc..plhhlsSpKKAcaLcslp..p...........s..ssh.slclhsR..stpD...tpspp.Fpcll-tlc...p....sG.pp.lGslsK-..shpGpFh-pWpphhp....ppsh.cpVDlosul ................................lDtpsFhcRlpplaspWcpspp..........aus....ssul.llhsGt..sc-..-..YtKos........ALp..hWLhGY..EhscTlhlFs.....pc..plhhlsSpKKs..............caL....cplp..p.............sut.slpl..l..lRs.p-......pscp.Fccllctl+...p........sGpp.lGshsKD.....p.pGpFhcpWpchhp....ppsh..cplDloss............................................. 0 81 133 202 +14678 PF14827 Cache_3 Sensory domain of two-component sensor kinase Coggill P pcc CATH:3by8_A_00, Pfam-B_120 (release 26.0) Domain Cache_3 is the periplasmic sensor domains of sensor histidine kinase of E. coli DcuS. This domain forms one of the components of the two-component signalling system that allows bacteria to adapt to changing environments. The ability of bacteria to monitor and adapt to their environment is crucial to their survival, and two-component signal transduction systems mediate most of these adaptive responses. One component is a histidine kinase sensor - this domain - most commonly part of a homodimeric transmembrane sensor protein, and the second component is a cytoplasmic response regulator. The two components interact in tandem through a phospho-transfer cascade [1]. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.42 0.71 -4.19 258 2633 2012-10-01 23:40:40 2012-04-26 17:59:26 1 59 1299 16 414 1784 44 115.40 31 21.38 NEW hslAcslApsPtlh..pu..lp..s.ssss...............................Qshscplppp.ssssFlVlhDhpGlRhoHP.sspp...IGcphh.....sD....ptAL.....pG.csasu.p.....spGoL..G.SlRuhsPlh...s..ss....G.cl.lGhVuVGhhhssl .......................................shAchlAsssplh..pu..lp....p..ps.p...............tl........pshsp.plpc.p...ss..hsalV.lsDt.pul.Rh.sHP....ss..p+....IGpshp...s..sD........tAL....pG....c...p...hs.u..h.............................scG..ol.....G...ul.Rshs..PIh.........s...ps..............G...cl.IGVVulGh.lsp........................................ 0 115 250 340 +14679 PF14828 Amnionless Amnionless Eberhardt R re3 Jackhmmer:Q9BXJ7 Family The amnionless protein forms a complex with cubilin. This complex is necessary for vitamin B12 uptake [1]. 26.90 26.90 27.00 26.90 26.00 26.80 hmmbuild -o /dev/null HMM SEED 437 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.78 0.70 -5.99 16 114 2012-04-27 08:29:45 2012-04-27 09:29:45 1 8 76 0 77 100 0 313.20 24 83.41 NEW hKhWsssssFssAuNWss.sphPCupDhl.FPuphsssl.l.psh.olsshlLPpsGtllLApsu..hhhuusss.sssCts...ttpAhF+sPcpppWaDPss..Wpstsspsuh.........hss-hERVPCpp..........-cVlF..up.ushplsLp.ssphl+ltplphuGpoho+sp.LppaLuochGphhFH.supslpVp.h+....CspsptCsC...............spschh-hlCusl......pCst..spChsPlRP.GpCC.lCGAllplsps...sshDhcphps+lpcthhppshhp.plphtVuhVsppp.........hssplQllllD..pGs.otpuschhtp.hhtchpsphtshtt.shplptuGpPhssssuhs.............lsshlLhsLlLVullusllLh+h...........p.shls+lspWh+c.chhth.t....thspsFh.spFDsss...........ttsssltpls..............uhcutsspspc.psFsNPhF-p ................................................................+.a...shphpsstsW.p.tthPp.ts.l.Fst......h.l.l.....tsh..tltthhlP.hpGthllsptu...h.h.......s.s..tt....t...........t..h.h.....st....p....WhsPt.Wtstst...........................h..c.EplPCp...........-pVhh...sp.......sshtl..tlt...t.lpltpl.hh..s..pp..hstt..htth.hsp.p.....Gp...Fh...s...ttl.......lt..t.........phC.C...............s...t..........lCt.h.........pC.....stC.tslhP..GpCC.lC..Gu.hl.hpht....t.shpphpttl.phh.......t...tlththphh.p......................lp..hllhc.......tt..st.tht.ht.....tp.tt.h............hph..stt........h..................h..hh...hh.hhhhh.......hh.h........................................................................................................................................................................................................ 0 29 34 59 +14680 PF14829 GPAT_N Glycerol-3-phosphate acyltransferase N-terminal Coggill P pcc CATH:1iuq_A_01 Domain GPAT_N is the N-terminal domain of glycerol-3-phosphate acyltransferases, and it forms a four-helix bundle [1]. Glycerol-3-phosphate (1)-acyltransferase(G3PAT) catalyses the incorporation of an acyl group from either acyl-acyl carrier proteins or acyl-CoAs into the sn-1 position of glycerol 3-phosphate to yield 1-acylglycerol-3-phosphate. G3PATs can either be selective, preferentially using the unsaturated fatty acid, oleate (C18:1), as the acyl donor, or non-selective, using either oleate or the saturated fatty acid, palmitate (C16:0), at comparable rates. The differential substrate-specificity for saturated versus unsaturated fatty acids seen within this enzyme family has been implicated in the sensitivity of plants to chilling temperatures [2]. The exact function of this domain is not known. it lies upstream of family Acyltransferase, Pfam:PF01553. 25.00 25.00 26.00 48.40 21.00 19.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.34 0.72 -4.29 19 48 2012-04-27 10:01:24 2012-04-27 11:01:24 1 2 38 2 14 50 0 73.80 53 17.95 NEW +S+oFLcscsEQ-LlSsI++ElEuG+LPsslAsGMEELYpNYKsAVlpSGsPpAcEIlLSNM.sshhDRlhLDVc-PF ........SRsFLcsRsEp-LLusI+KEsEuG+LPssVAsGMEELYtNYKsAVlpSGsPpAcEIlLSNM.sshhDRlhLDVc-PF...................... 0 3 9 12 +14681 PF14830 Haemocyan_bet_s Haemocyanin beta-sandwich Eberhardt R re3 CATH:1js8_B_02 Family This antiparallel beta sandwich domain occurs in mollusc haemocyanins. Each mollusc haemocyanin contains several globular oxygen binding functional units. Each unit consists of an alpha-helical copper binding domain (Pfam:PF00264) and an antiparallel beta sandwich domain [1-2]. 24.00 24.00 24.60 24.00 23.10 22.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.09 0.72 -4.32 27 179 2012-04-27 10:19:59 2012-04-27 11:19:59 1 15 36 7 0 211 0 99.90 40 24.82 NEW R+p+DRVFAGFLLcGltsSAsVca.lCps..s....scC.pc..AGhFhlLGG-tEMPWuFDRLaKYDITcsLcphslc......hc-sFplclplsusNGotLsucll.sPollapP .........ppc-RlFAuFLLpGItsSAsVph.lCts....s......spC..phAGpFhlLGGp.pEMPWsFDRlaKaDITcsLcclslp......hcssF......plclclhulsGotL.s.ssllPpPolla.P........................................... 0 0 0 0 +14682 PF14831 DUF4484 Domain of unknown function (DUF4484) Coggill P pcc KOGs (KOG4704), PF09804 Domain This domain is found, in a few members, a the the C-terminus of family Avl9, Pfam:PF09794. The function is not known. 25.60 25.60 27.00 26.20 18.40 25.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.43 0.71 -4.17 13 120 2012-04-27 10:43:25 2012-04-27 11:43:25 1 6 101 0 85 117 0 151.50 27 30.65 NEW lEPlSWscLAYouFIWWASAGEpc.....cppE..ph-pDopLLAss-sssoP...................................sp.s...........pt.sElulVAYFHRLTsplFssLuDlIscpDsc.....ss.-s-......s........................-s.spD.................tD-s.uPL...................psp.slclTspDhs-MGLDsWSsuDh.FVcElVtpaWGR+AhV-uscIcsCGluIs ...............................................lEPlSWsthhasuFhWWASAGpph........t..p.tpDttlh.t............t.....................................................................................tpsph..tlsllsYF+RLToplhshltDlltppppp....p....t..................................................t.......................................................................ptt.lplsspshtpMsLDsaSttDt.FVc-ls.haa.tRp..h.lpshtlthsh.............. 0 13 43 71 +14683 PF14832 Tautomerase_3 Putative oxalocrotonate tautomerase enzyme Coggill P pcc CATH:3c6v_A_00 Domain 4-oxalocrotonate tautomerase enzyme is involved in the anthranilate synthase pathway.1 25.70 25.70 25.70 25.70 25.60 25.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.69 0.71 -4.16 26 117 2012-10-01 20:38:22 2012-04-27 13:13:24 1 3 81 22 87 147 121 130.80 28 84.02 NEW MPLWpIaHPsssFo.s.p.-KpsLApsITchY........ssh.GLPsFYVsVhFhclsss...shalGGcspss.................FlRIslsH.IARp.........hssc..-p..ppchhp.tlstsLcPal...tD+.GhcWEaHl-EsspcLW+IsGlhPPsssStsE+cWsp-N+ ............MPhaplaHs..ssho.s.ppKpplApsIT.p.hY........ssh..sl.PtFYVsVhFh..clsss....shalGGctpsp..................hlpl.hlpH..lu..Rp..........hpst..-t..ppphhp.tlsthl........t.shh........tpc...uhchp........hhls-sshphhh.sGhh.P..tptt...Wht.s................................................................................. 0 13 44 69 +14684 PF14833 NAD_binding_11 NAD-binding of NADP-dependent 3-hydroxyisobutyrate dehydrogenase Coggill P pcc CATH:2cvz_A_02 Domain 3-Hydroxyisobutyrate is a central metabolite in the valine catabolic pathway, and is reversibly oxidized to methylmalonate semi-aldehyde by a specific dehydrogenase belonging to the 3-hydroxyacid dehydrogenase family. The reaction is NADP-dependent and this region of the enzyme binds NAD. The NAD-binding domain of 6-phosphogluconate dehydrogenase adopts a Rossmann fold [1]. 30.00 30.00 30.30 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.37 0.71 -4.05 132 7339 2012-10-10 17:06:42 2012-04-27 14:05:12 1 47 2857 51 2191 5638 4290 121.10 28 40.35 NEW GuGpssKhsNshlsAsslhAsuEAlshut+.sGlDspthhcll.su.usupS...ts....h....p......sh......hsphll....s....psF.ssG.FslsLhhKDlslAhsh..ucphsssh.Plsuh.sppla.p.s.u.t.s.p.hs..s.s.t.Daoul.l.+h.l ..................................GsGpssK..lsNphllusphtuhuEA.........hsLA.p+..sGl.c..pt...lh.cs.l........ss.....u...s...u.....s.S......hh.........h.....c..........sp............hst.h....hh...........s..........ps.a....s..P......u....F...t...l.....c...lhhK...D...........L.....slAlcs............Ac....p...hs...h..s.l..P..l.s.ut...s....p.pha.p.p.h...t...s...t...Gh...upt...DhSulhph........................................... 0 558 1233 1760 +14685 PF14834 GST_C_4 Glutathione S-transferase, C-terminal domain Coggill P pcc CATRH:3bby_A_02 Domain GST conjugates reduced glutathione to a variety of targets including S-crystallin from squid, the eukaryotic elongation factor 1-gamma, the HSP26 family of stress-related proteins and auxin-regulated proteins in plants. Stringent starvation proteins in E. coli are also included in the alignment but are not known to have GST activity. The glutathione molecule binds in a cleft between N and C-terminal domains. The catalytically important residues are proposed to reside in the N-terminal domain [1]. 30.00 30.00 30.10 30.00 29.80 29.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.50 0.71 -4.16 7 561 2012-10-03 01:14:49 2012-04-27 14:19:54 1 4 556 1 52 223 10 116.50 75 54.87 NEW DlQKRARARQlQAWLRSDLMPIRtERSTDVVFAGsKhuPLSpsGptSAcKLhAsApsLLuHGp.NLFGEWCIADTDLALMlNRLlLpGDcVPEtLsDYAoFQWQRASVQRalALSAK ...DLcpRARARQIQAWLRSDLMPIREERP....TDV..VFAGAKK.APL....os....cG.KASA...EK...L.FA...h...AE...+.L...L...u.h.GpsN...LFGEWCIADTDLALMINRLVLHGD-VPEpLsDYATFQWQRASVQRFIALSuK................. 0 5 15 38 +14686 PF14835 zf-RING_6 zf-RING of BARD1-type protein Coggill P pcc CATH:1jm7_B_00 Domain The RING domain of the breast and ovarian cancer tumour-suppressor BRCA1 interacts with multiple cognate proteins, including the RING protein BARD1. Proper function of the BRCA1 RING domain is critical, as evidenced by the many cancer-predisposing mutations found within this domain. A dimer is formed between the RING domains of BRCA1 and BARD1. The BRCA1-BARD1 structure provides a model for its ubiquitin ligase activity, illustrates how the BRCA1 RING domain can be involved in associations with multiple protein partners and provides a framework for understanding cancer-causing mutations at the molecular level [1]. The corresponding BRCA1-RING domain is on family zf-C3HC4_2, Pfam:PF13923. 35.00 35.00 35.00 35.30 34.80 34.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.37 0.72 -4.19 5 67 2012-10-03 15:03:13 2012-04-27 14:43:25 1 22 45 1 38 63 2 62.10 57 9.95 NEW LDRLEKLLRCSRCTNILREPVCLGGCEHIFCSNCVSDCIGSGCPVCYTPAWIQDLKINRQLDSMI ....LtclEpLLRCS+CssI.LREPVCLGGCEHlFCSsCl..uD....ClG..o..s..CPVCaTPAWlQDlKINRQLDuMI............ 0 5 8 18 +14687 PF14836 Ubiquitin_3 Ubiquitin-like domain Eberhardt R re3 Jackhmmer:Q6K1E7 Domain This ubiquitin-like domain is found in several ubiquitin carboxyl-terminal hydrolases [1] and in gametogenetin-binding protein. 24.00 24.00 24.50 25.10 23.70 23.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.98 0.72 -4.32 6 283 2012-10-03 10:59:06 2012-04-27 15:38:54 1 13 52 9 133 242 1 86.40 46 9.70 NEW uhKLClPGhssLpSPlpKtFRSoDTVGFlEuELKKLLul..Q+EoRLWKhsus-GhELLspP-lTlpEAGlh-..............GQpLLLEEhsEMGNWPP ......................-.hpLC.sss..hsssloppFS+sDTIs.sIE+EhRclFsIssEcE.T..RL..WsKh.hsN.oa..E.LscsDsTlpDAu...Lhp................................................G.Q.lL....VIE.+NcDGTWP......... 0 23 31 57 +14688 PF14837 INTS5_N Integrator complex subunit 5 N-terminus Eberhardt R re3 Jackhmmer:Q6P9B9 Family This family of proteins represents the N-terminus of subunit 5 of the integrator complex involved in snRNA transcription and processing [1]. 27.00 27.00 27.20 27.50 25.00 26.20 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.42 0.70 -4.92 17 78 2012-04-30 08:33:14 2012-04-30 09:33:14 1 5 69 0 57 82 0 214.30 46 23.26 NEW tpslls-LptFlpsss...tp..shp......pLs+sALpLLcslPuuRsAVh-ahshlFccuVppahsshE...............t.ptstssss.-sslpElpssLpp.hlpssPpAWuslIssWulcLlGclus+hstRts.h.t...sls-hlphWhu.CpAh+pLlslhspClspllspss-sClssLLsouspHoPpFDWVVAHluosFPtsllo+lLssGlccFsst ....s.QpL.pElKsFlsulsshhupp.ssc......-hs+suLhLL+sLPsARsAVL-ahpsVF-EuVptalssl-.t..t................sscsssssssl--llpEVpplLpc.FlcsNP+AWAPlISsWS..l..........-LhGpLSSpYosR+t.hPpus.ulNElLQLWMu.CpATRoLM-lhspCLusLlsussDACV-ALLDTSVpHSPHFDWVVAHIGSsFPsTIIoRlLuCGLKDFss.p. 0 14 18 40 +14689 PF14838 INTS5_C Integrator complex subunit 5 C-terminus Eberhardt R re3 Jackhmmer:Q6P9B9 Family This family of proteins represents the C-terminus of subunit 5 of the integrator complex involved in snRNA transcription and processing [1]. 27.00 27.00 28.70 27.30 23.70 23.20 hmmbuild -o /dev/null HMM SEED 696 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -13.19 0.70 -6.24 6 104 2012-04-30 08:33:54 2012-04-30 09:33:54 1 5 74 0 78 100 0 471.50 28 62.76 NEW KlsSVVGILGHLAupHucsIRKtLLcMFptSLhst........ssshsus.pl++...ATVPFLLQLAuhSsTLLuAlSs-llchL+Pssl..Lsphtscapua.+p-h-shhsLsVHLl.psssGGA.+llphLlDTuossSslhstsssus.psl+EsCccLLQhLLh+LccLV+s+.............s..spsIPFL-ulpscluplssphLplccpRp.htpQLLsLLu.assPSlss-uhsaLLspApssEcLALhhpLhTp.ss.............shuGllshslcpsLupI......aspsls.p-htQLhpNLuphlp..aEcssphss....sh.uphlupAluuNLpshosLL.hps-sslucAhspLLuhhslPp.....sLSss.lLpLoRAsVpaFFhCLpppupstKs.........shpssspLLsRLsuh...SssupthsLppLlEsALa+usscLFGu..cpspcspppuhl........LL-pN++hsTolshstph.SVFHAGVIG+GhK.hhsspphs.-hVspNsppLlslIhpCCSssusss..............sp.ssIshEAsphVulhLVE.VsPDVhhstLsWPsEEauKsTlERDl+IRRpFcctPlLapLLclVAssRPALCYCSVLLRuLhATLlupWcoopc......ossS.hhLpsoshLVslMu.GQLLPPsLusl+-lhPpLsPFEVplLLhs.VWsYMR-NsP.PthFshsucpGhaaRDhoh-u..ssupYsssl+.VLp+NIcpL ......................................................................................................................................................................................................................................................................................................................................................................hh....u..ps..t.s.h.plht.......................................tp..h.p..................h...........hthh.Nl.hh.t...p.......................ht.th......l...h............hh........th.p.hspllt...hs........t....hL.ls...lphFF..hslp.ps..t...................tlL.c.sth...p..shhhsLp.llcsul...tt.thhGt....t......s.........................Ll..N.+......hstt..SVh.....HuGlI..G+......Gh+..............t...s.p.....p.t.hlphl.tCCts...t..t............................s..c..uhphlulhLV.E.l...ssD........s...h......stLsaP...sE-a...s+.s..ThERDltIt+tF.ppPlLatlLtllAtt.PA....LsYsSslLRuLhAshltpWcu...p..p...p......sst..ps....hh.soppLltlh.....u.upLlPPsLsphp.lh..htshElthlLhp.lWsah+...-psP..PthFh....stpu.hhhRshs.......tt......s..ahssl+.lhppplpp........................................... 0 26 33 57 +14690 PF14839 DOR DOR family Eberhardt R re3 Jackhmmer:Q96A56 Family This family of proteins regulate autophagy and gene transcription [1]. 25.00 25.00 25.90 29.60 20.70 24.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.04 0.70 -4.59 12 132 2012-04-30 10:33:17 2012-04-30 11:33:17 1 3 62 0 75 135 0 175.90 37 79.55 NEW ps-c--DtWllV-h.s....................................................................shsttssstpsP.....stsuhPss..........sh...................MEESWFVTPPPCFTAtG...ssstlEoSPhEsLLIEHPSMSVYsspssp.tsst..................cs...stsss.tsptppp...h.cps.spsAth.ss.puthLEpspphRtsQpspp+tE+ptLs+pslpRQNhsR-ppsRph+pp..ushlHQPs.Rp ...............................................p-.-.DtWhll-h.s................................................................................................................................h...s.sttss..........s.tshsss................ph........................M-ESWFlTPPsCFTAtG...ssh+lpoSPhEsLLIEHPSMSVYsstsohst.st..................................tsth....spt...c.p.psp.....hptp.t.......tthp.phsAsh.su.psshLcpspph....R....Qhh+p+sE+ptLs+pslpRQNhsR-pps+ps+pp..u.hhaQPs.p................................................................................... 0 11 18 37 +14691 PF14840 DNA_pol3_delt_C Processivity clamp loader gamma complex DNA pol III C-term Coggill P pcc CATH:1jr3_D_02 Family This domain lies at the C-terminus of the delta subunit of the DNA polymerase III clamp loader gamma complex. Within the complex the several C-terminal domains, of gamma, delta and delta' form a helical scaffold, on which the rest of he subunits are hung. The gamma complex, an AAA+ ATPase, is the bacterial homologue of the eukaryotic replication factor C that loads the sliding clamp (beta, homologous to PCNA) onto DNA. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.72 0.71 -4.01 3 1110 2012-04-30 11:55:48 2012-04-30 12:55:48 1 2 1105 10 199 712 93 123.50 45 36.20 NEW PFHWVDALLuGKSKRALHILQQLRLEGCEPVILLRTLQRELLLLVsLKRQSAHTPLRoLFDKHRVWQNRRsLLo-ALsRLStTQLRQAVTLLTRsELTLKQDYGQsVWAELEoLSLLLCHKALAD .............PF+WlDALLhGK..upRAh+ILpQL+hEG.sE...PVIL.LRTLQREL..hlL.ls...L......+....+.........p.....p..u..........p.....s..........P.........L.c..s.L..F.D.+.a..R..VW.Q.NR.R.s.hhspA..LpRLo..s...pLp...pAlp...lLs+hE...lplK....p.sa.upslWs.pLcsLSLhhCt......................................... 0 36 94 149 +14692 PF14841 FliG_M FliG middle domain Eberhardt R re3 CATH:1lkv_X_01 Family This is the middle domain of the flagellar rotor protein FliG [1-2]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.33 0.72 -3.97 335 2392 2012-10-02 13:19:07 2012-04-30 15:19:29 1 7 2070 8 564 1518 436 79.20 35 23.40 NEW -spplsshlpsEHPQslAllLuaL.csspAAplLutLPp.....c.hps-lhhRlAphcsl....sPcslcclccsLccclsuhsspptsp ......-PpplAslIpsEHPQhIAlILuaL.csspAAclLuhhs-.....c.h+t-VhhRIAshssl....pPssl.pElsclLpppLsu.stpp..p................................... 0 185 361 455 +14693 PF14842 FliG_N FliG N-terminal domain Eberhardt R re3 Jackhmmer:O66891 Family This is the N-terminal domain of the flagellar rotor protein FliG [1]. 25.00 25.00 25.80 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.03 0.72 -3.94 137 2340 2012-10-02 13:19:07 2012-04-30 16:08:09 1 5 2032 4 556 1742 439 106.20 30 31.40 NEW ttppLoGtpKAAILLluLGc-tuuplh+c.LspcElpplotphuplpplsppphcsVlcEFhp..hhhupshlshGuhcas+plLpcuLGs-+Appllpclt.tshpspsh- ...........h..ppLoGh-KuAILLh.olGE-tAAcV.h.+H.LspcE.....lpplottMApl....pplsp...c...p...l...p...s...V...Lp.E....Fhp.....h.p...p...p..ss...ls......hs..ut..-Yh..+...plLh..+ALG..p-+Assll-clh.tstp.....p..................................... 0 183 356 449 +14694 PF14843 GF_recep_IV Growth factor receptor domain IV Eberhardt R re3 CATH:1m6b_A_04 Domain This is the fourth extracellular domain of receptor tyrosine protein kinases. Interaction between this domain and the furin-like domain (Pfam:PF00757) regulates the binding of ligands to the receptor L domains (Pfam:PF01030) [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.16 0.71 -12.29 0.71 -4.54 30 1237 2012-10-02 14:20:19 2012-05-01 10:27:03 1 65 119 40 458 1185 9 117.10 34 13.30 NEW lCsshCossGCWGPGPsQCLSC+sasRs..........spCVpsCshhpG.sREats.sppChpCHsEC...ttt...tTCsGPGsDpCscCsHap.......D..GsaCVpcCPpGl..utpt..laKYuDtsshCc.CH.NC.opGCsGPs.psCh ...............................Ccs.Cs....s.u..CaGsssspC.l.sC...p....p.....a..p...h..t..........................ppCl..spCs..h...........s...tt...h......tt....pppChtCHPcC.............poC.sGs....Gu....D.pC...........h..........p.C..t....p....hp...............................-....s.pCVsp.CPps.......................................pY..s..s........ps.hCp.Cp..sC.....t..s..Ctt.......s................................................ 0 183 209 322 +14695 PF14844 PH_BEACH PH domain associated with Beige/BEACH Eberhardt R re3 CATH:1t77_A_01 Domain This PH domain is found in proteins containing the Beige/BEACH domain (Pfam:PF02138), it immediately precedes the Beige/BEACH domain [1]. 27.50 27.50 27.70 27.60 27.40 27.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.17 0.72 -4.20 47 917 2012-10-04 00:02:25 2012-05-01 14:40:39 1 50 255 6 593 862 7 103.80 25 4.48 NEW pllhohpsphlsPhsshpGplplossclhFhss..........................ttppthhs.stshphcW.hsplcpla.hRRYhL+coALElFhsDposhhhsF..sppsppclhpplh .....................................................................................................s.hltsh..h..h.GhL................ls..pp..p..hY.h.ss........................................................................t.t.t......hst.s..pthstp.Whhscl+pla.pRRaLLp.ssALEl.Fh....s..............s......t.....p.....s...h....h.l..sF..spps+pclhptl.h.................. 0 218 303 448 +14696 PF14845 Glycohydro_20b2 beta-acetyl hexosaminidase like Coggill P pcc PF02838 Domain \N 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -11.26 0.71 -3.48 183 633 2012-10-01 20:56:08 2012-05-01 14:42:21 1 14 263 35 389 763 42 125.00 26 22.32 NEW lW..PtPpt..hph......uspshtlsstshpht.tss...ttt......................................................plLppAhp.Rah...................phlh...............................................s.t.tshh.........................................................................hpt.............................................................................tl...ppl.................p...........lpl...ps.t.t.........l....phssDES............Y.sLpl.................phs.......spIpApohaGAh+G.LEThoQL ...........................................................................lWPhPpp..hph......usthhh.l.s...thphp.tsss..........................................................sppllppAhp.Rhh..............phlh..............................................................t..t.hhshh.........................................................h..tstt.............................................................................................tl....ppl.................p..lpl........ss.s..ts...........h....phssDES............YsLsl.......................sps..t...tsplpAposaGAl+G.LEThSQL.......................................... 0 139 213 310 +14697 PF14846 DUF4485 Domain of unknown function (DUF4485) Eberhardt R re3 Jackhmmer:Q8N8E3 Family This family is found in eukaryotes, and is approximately 90 amino acids in length. 24.00 24.00 26.40 24.50 23.90 22.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.81 0.72 -4.27 27 127 2012-05-01 14:06:49 2012-05-01 15:06:49 1 7 65 0 79 139 0 82.30 36 12.52 NEW pLDppFphhlthlcshltpLs.ct.-+hhsstWlpKLpps.....psht.+cpRNtYhthLlttlp.psplp.sPFsc.PPsGsL.slsphh .......................................................LDtEFcphlhphcshlhpLspcp.-+ppsshWlcKLspss...tsshht++sRNhYuclLLchLp.cGhL-.uPFscpP.sG.sLtsLspa............ 0 37 42 59 +14698 PF14847 Ras_bdg_2 Ras-binding domain of Byr2 Coggill P pcc CATH:1k8r_B_00, Pfam-B_3317 (release 26.0) Domain This domain is the binding/interacting region of several protein kinases, such as the Schizosaccharomyces pombe Byr2. Byr2 is a Ser/Thr-specific protein kinase acting as mediator of signals for sexual differentiation in S. pombe by initiating a MAPK module, which is a highly conserved element in eukaryotes. Byr2 is activated by interacting with Ras, which then translocates the molecule to the plasma membrane. Ras proteins are key elements in intracellular signaling and are involved in a variety of vital processes such as DNA transcription, growth control, and differentiation. They function like molecular switches cycling between GTP-bound 'on' and GDP-bound 'off' states [1]. 27.00 27.00 28.90 27.40 26.60 25.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.31 0.72 -4.11 35 148 2012-10-03 10:59:06 2012-05-01 16:35:46 1 5 140 2 96 150 0 108.10 33 12.03 NEW pslphIhpsGpo+tVNlssChsupplhc+sLKKhulpp............p.psashalhss......................t.ssssh+hLsDsELlsICpu..ssRsE+pRLIL...sppsc..Pstctlpputplhh .......llplIhssGsTKsVNlpsCpsu--lhcpsL+Khshpp................pp.psYshaVLsu......................pssssss+hLsDsELhpICcu..scRsERsRLIL.Rplpts-...Pstcplpputtlh.h................ 0 24 53 83 +14699 PF14848 HU-DNA_bdg DNA-binding domain Coggill P pcc JCSG_Target_393235 / GS13689A, Pfam-B_2593 (release 26.0) Domain \N 30.00 30.00 30.00 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.02 0.71 -4.40 21 175 2012-10-02 15:10:05 2012-05-01 17:10:52 1 2 71 0 39 179 1 122.40 23 58.18 NEW hLKhhLh-NhlT.-spsDahu.pltsssolslc-Il-chhKc.sosl+pETltpslplhpchls-hlhsGhuVNTGlhpssssl+GVaps.sspass....p+poltlshssuspL.RpslucspVphhs.t ........................phhLhcNh.hs....-stsch.hu.ps..pt.ssslslc-lsccltpc.sosl.ppt..slhsllshhpcpht-hlhsGhsV...p...h...G...lhphslpl..p..G.s.h.s......sssa..ss....php..pl...tVsassupcL.+ctltphth.....h....................... 0 16 36 39 +14700 PF14849 YidC_periplas YidC periplasmic domain Eberhardt R re3 Jackhmmer:P25714 Domain This is the periplasmic domain of YidC, a bacterial membrane protein which is required for the insertion and assembly of inner membrane proteins [1,2]. 24.00 24.00 24.10 24.00 23.90 23.40 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.91 0.70 -4.50 388 2338 2012-10-02 23:57:29 2012-05-02 08:46:08 1 6 2296 4 565 1722 2667 274.10 26 49.21 NEW plslcTchlp.hsI.s....spGGclschpL....t..pY..tsh...........ssps........s...hpL....lsssst........ahuphGhh..s.........s.t.ss......ps...t.apsss...s.....ht.hp..........sphs..ls.Lshsss.s.G.l.....php+sashc.s...s........Yhlslphplp.N.pustslss..p.as..plpps............tt.........t....h.h.....sa.pGssh.............p............h..pchsa.....s-h.p.........................hp....tt.st......Wluhhp+YFsoAh.......l....P.p.........ss...shhsp.hh........................hhth..sh...................h..s..shslssG.sstsh..ssplasGPK...........chchL....................p.......t.........................l................s..........pL-hslD......aG.Wh........halu+PlFhl .......................................................................................................................lslcT-slc.lsI.sspG.GclpphhL....h..pY...pph........................sssp.....................s.......hpL...lpsssth......................Y.ApsG.Lssts........s.ss........spsh.asspt.......s.sh...l..ptp.................................................sphp....Vs.hoasss..s....G..h........phpKsashccs.....s.....................YhlsVshplp.......N..s....u.......spslph.....ssas...plpps.......s.t...................tss....hsh....oa.pGssh.ssp...........pphpKhpa....cclpc..........................................tp.phssp..sG..........Wl.Ahhp+YFsoAa.......I....P.p.......................ss..p.ssh.hsp.phs........s.....................hhth..uh...........................p...s.....shsltsG...psssh..supLasGPc............pctl....................t......s...........l....ss..........pL-.h.s.VD....Y..G..Wh........hFIupPlFhl..................................................................................... 0 177 356 464 +14701 PF14850 Pro_dh-DNA_bdg DNA-binding domain of Proline dehydrogenase Coggill P pcc CATH:1k87_A_02 Domain This domain lies at the N-terminus of bifunctional proline-dehydrogenases and is found to bind DNA. 27.00 27.00 30.40 29.10 22.00 20.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.16 0.71 -3.89 114 1463 2012-05-02 13:07:34 2012-05-02 14:07:34 1 7 1420 13 300 1184 316 113.30 63 9.56 NEW uhl-uhLpEYuLSocEGlALMCLAEALLRlPDstTtDtLIcDKlusucWppHlG.pS.sShhVNAuTWGLhlTG+llsss.....c..pt...shsusLppllpRhGEPlIRpAlppAM+lMGcQF ...hVpuLLpEFSLSSQEGVALMCLAEALLRIPDpATR...DALIRDKISsG..sWpSHlG...pS...sSLFV.NAATWGLlhTGKLVusp........s.....-ssLSpuL..sRlIuKuGEPlIRKGVchAMRlMGcQF............. 0 58 149 224 +14702 PF14851 FAM176 FAM176 family Eberhardt R re3 Jackhmmer:Q9H8M9 Family Members of the FAM176 family regulate autophagy and apoptosis [1-2]. 25.00 25.00 25.00 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.01 0.71 -4.53 13 190 2012-05-03 10:05:55 2012-05-03 11:05:55 1 8 53 0 110 149 0 132.10 37 48.52 NEW -MsLLSNSlAAYAaI+sNPEphALYFVhGVChGLlLTLChLVlpISC...Rscs+t.psPc++ph+-ss..ss--ss-p--s-p--suDlcssc..............psph-tTL.ssNVFTSAEELERAQRLEERERIIREIWhNGQPDlh..GTtolGRha ..............h.ll.SN.LAuauaIp.spPEpsALYFVsGVChGLlLTLChL.VlRlSC....ppch+.....pts...p.cchh.tppp...psp.-ss....-..s.p.-t-.p.-.sss-.sh.t................pt.pts...shsVa.oShEthEhApRlEcREpIlpEIWhsut.Dh...hstshs......................................... 0 5 14 58 +14703 PF14852 Fis1_TPR_N Fis1 N-terminal tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q9Y3D6 Domain The mitochondrial fission protein Fis1 consists of two tetratricopeptide repeats. This domain is the N-terminal tetratricopeptide repeat [1-2] 25.00 25.00 25.00 25.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.27 0.72 -4.65 63 299 2012-10-11 20:01:04 2012-05-03 11:57:25 1 4 237 10 204 283 0 34.80 37 21.62 NEW tpopFpYAWuLl+SpttpDh.pcGltlLpplh+sss ....popFpYAWuLl+SptpsD..pcGltlLp-lh+ps....... 0 51 100 164 +14704 PF14853 Fis1_TPR_C Fis1 C-terminal tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q9Y3D6 Domain The mitochondrial fission protein Fis1 consists of two tetratricopeptide repeats. This domain is the C-terminal tetratricopeptide repeat [1-2] 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.39 0.72 -4.08 60 363 2012-10-11 20:01:04 2012-05-03 11:58:45 1 12 284 10 239 400 24 52.10 39 30.22 NEW R-tLYYLAlGpa+lt-YscA++alctLLchEPsNpQAtsLcphI-c+lp+.-GL ............R-hLYYLAlGp.Y+LscYscAc+Ys-tLLch.EPsN...pQAtsLcphI-cclpK-Gl........... 0 73 128 198 +14705 PF14854 LURAP Leucine rich adaptor protein Eberhardt R re3 Jackhmmer:Q96LR2 Family This family of proteins activate the canonical NF-kappa-B pathway, promote proinflammatory cytokine production and promote the antigen presenting and priming functions of dendritic cells [1]. 25.00 25.00 25.10 25.00 24.90 24.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.66 0.71 -4.40 7 138 2012-05-03 13:55:31 2012-05-03 14:55:31 1 3 62 0 88 131 0 103.30 40 36.75 NEW psstsussppssspushctL-sKlhhL+.EMAaLRAlDVKlhQQLlslNEGIEulKWlhEE+uslTSRsSSLouS.YSLstup.soShRGSasSLpDss..DcLDuISlGSYLDTLA.D-lsE.s ...............tss...........ts.s.hpsL-splthLRpE.MssLRulDl+LLpQLhslNESIEul+WhlEE+usloSpsSSLouS.h.SLhtup..tpp.cGShs.......................................p........................... 0 12 18 43 +14706 PF14855 PapJ Pilus-assembly fibrillin subunit, chaperone Coggill P pcc Pfam-B_9717 (release 26.0) Domain PapJ is part of the Pap pilus assembly complex that plays an auxiliary role by ensuring the proper integration of PapA into the fimbrial shaft. PapA is the major shaft protein of the pilus. 25.00 25.00 50.30 50.10 19.60 19.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.10 0.71 -4.82 3 54 2012-05-03 16:50:17 2012-05-03 17:50:17 1 1 37 0 1 26 0 178.40 63 99.69 NEW M..NRTTsGLYLAALLlSuSMsoVLQA-ELllRDDFFVADEsRHQWVNE+NGRTGoLNVKGALVSSPCIL-TPEVsLPLpcDNG+YV....LNLKLS+CGDGtS-lPE+cussphNlsVKQSlVLKcGcsslLLS-+KsuGpsRcll+cGDNQLlYhlN+cQYEKIAcsQpcsTt+.chSDu+os..sL+LsIhYE MVV.NKTTAVLYLIALSLSGFIHTFLRAEERGIYDDVFTADE.+HYRINE+GGRTGoLsVSGALLSSPCTLsoNEVsLSLR.-N+....shu....LhL+LuGCGDGGAl.Pu+cuss...MsVsuSlVhusGpsusLLP-+KhuGscHhVl+DGDs.Llhhss+cQp-hLAuhppcsTtc.tcYSD..u+os..hLRLsIcYE............ 0 0 0 0 +14707 PF14856 Hce2 Ecp2; Pathogen effector; putative necrosis-inducing factor Stergiopoulos I, Coggill P pcc [1] Family The domain corresponds to the mature part of the Ecp2 effector protein from the tomato pathogen Cladopsorium fulvum. Effectors are low molecular weight proteins that are secreted by bacteria, oomycetes and fungi to manipulate their hosts and adapt to their environment. Ecp2 is a 165 amino acid secreted protein that was originally identified as a virulence factor in C. fulvum, since disruption reduces virulence of the fungus on tomato plants. We have recently determined that Ecp2 is a member of a novel, widely distributed and highly diversified within the fungal kingdom multigene superfamily, which we have designated Hce2, for Homologs of C. fulvum Ecp2 effector. Although Ecp2 is present in most organisms as a small secreted protein, the mature part of this protein can be found fused to other protein domains, including the fungal Glycoside Hydrolase family 18, Glyco_hydro_18 Pfam:PF00704 and other, unknown, protein domains. The intrinsic function of Ecp2 remains unknown but it is postulated by [3] that it is a necrosis-inducing factor in plants that serves pathogenicity on the host. 25.00 25.00 25.60 25.10 24.80 23.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.54 0.72 -4.08 39 124 2012-05-04 15:03:41 2012-05-04 16:03:41 1 6 44 0 97 117 0 102.60 27 20.68 NEW spCss.So.ap.spTss.sSP..hssDCtplhpslt..ps.....G....p.W..s..lt.....s.tsp.........+plsphGoCsFGlps......h.p..ush...th.t...lGspDlhDllps..uIs..pau......ps......G+..VGApGphpC ................Css..oo..ah..spTos..uSP...hssDCttlhpslt..ss...........G....p.Wpht...........s..ssp...............pplsphG.....oCsFGlps................tp...ssh.....th.tlGspDlhcllpp..ulp..pas................ts.......G+..VGupGphpC.................... 0 17 47 73 +14708 PF14857 TMEM151 TMEM151 family Eberhardt R re3 Jackhmmer:Q8N4L1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 338 and 558 amino acids in length. 27.00 27.00 28.90 33.20 26.50 26.20 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.62 0.70 -5.88 9 144 2012-05-08 07:39:01 2012-05-08 08:39:01 1 4 59 0 100 128 0 347.60 50 78.89 NEW QRPlKQSLotSLCRESHWKCLLLSLLMYuChGsluWCpLspVT+Luhss.......u.....apGpS..hlYH......DSPCSsGYlYIPLAFLsMLYlVYLVECWHCas+sphthKs-lsoVhERlpRhQQAsPCIWWKAISYHYVRRTRQVTRYRNGDAYTTTQVYHERVNTHsAcuEFDYupCGVKDVSKpLhGL-sassTRLRFTKCFSFAsscuEsuYLTQRARFFs-NEGLDDYMEAREGMHLKNVDF+EahlAFsDPs+.PWYspphsFWhAuhhhLSWPLRVlsEYRTAaVHYHVEKLFG.......h-hs........u.oPss....sshtttlsRVsTlDoTELEWHIRoNQQ.LVPSYSEAlLMshuptsstsstos..........................psYut...........hhpsC-RCpR..........osSSSSl..................hSRsuht.s..s....ssRLu........husS+FSLGRlaGSRpo..sLaRSRS .............................................................................................QRPlp.ShstSLCRESH.WKCLLLoLLhauC.huslsWCplspVs+Lshss...............u..ht.Gps..hh.Y.......sSP.CSsGYlYIPLAFlhhLYllYLs..ECWHC.psRppht.h+s.DspoVhphlpRhQQAsPCIWWKAlSYHYVRRTRQVTRY..RNGDAYTTT..QVYHERVNTHsAcuEFDY.....uppG.V+DVS.....K.p......LlGLptt..ssTRLRFTKCFS....FussEuEsuYLsQRARFFs-N.......EGLDDYMEAREGM+LK.s.V.DF+E..hls.as.DPtp..PW.YspthsFWhsuhhhLSWPLRVlscYpTAasHYpVEKLFG.....................................................s.ssst.......s.t..l....sRVsTlD..T.E...LEWH....Ip..sNpQ.lVPSY..SEAhlMshst........................................................h..h..p..ssppC.....p+..........s.opsuh.....................tt.t..................................................................................................................... 0 23 34 67 +14709 PF14858 DUF4486 Domain of unknown function (DUF4486) Eberhardt R re3 Jackhmmer:Q96N23 Family This domain family is found in eukaryotes, and is typically between 542 and 565 amino acids in length. 25.00 25.00 25.50 25.50 21.80 21.80 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.84 0.70 -6.25 5 92 2012-05-08 08:35:29 2012-05-08 09:35:29 1 4 58 0 60 81 1 326.70 35 26.02 NEW pcaEh+RRGAsTLFNIWsKYcPRLPssYYNEKLLKVGDSLsQIK.................EYKLALhQCYGRYLQQFso.NsDEspsD..lspFKssFFPcGFcDcTAtLTFHALpG+NlCsYQLVC-SDsNLQNcESVppCL+ILSSLRLIMQAALPQEsLCWIIFNGTlHIYoICR+LMsIGQSSKVLEYLLWASMCMESSVPLLSlRYLTWRATLYsAVCQCYYDC+AGIHGEuFARRALuKINEL+QLEpMSSScuopEop+hFREATIKMAVMIFKRuVFESRRKPKulFRPKlRlNLKEsQsLPWPRTsTERLLsEhFDGTASQFLAVLEALSDSNRRlLQTGP.VoD.EsEI+DVVSELFhAGhE.L...LIhuNl+ss.............upLDFPpoSLLEpllsc+NsISV-AAVKFlKLAFoYEEWulF-SLustLlpFLQpQ-DPpuKKAEK-LcLLtAlEPLlss+Rs+.Ghhlsp-s-K-uQospsaLKHhss+psphhssta....oEDlFpLAsTLHsCVCsssQsI..pPDKDIVlDllhFLWQKCKlGlQRlsIs+h-ssKYspKhu .............................................................................................hl.hu-.h....t...........................................................................h....h..hpsh.....s.hth.htts..h.p...sh..hh....p....hLt.....hRhhhphsl.s.t.-.....phhWllaNGolhlYplCcpLh..hh..G......oscslthl.hh.s..hshE........l.L.s.+YL.WRhpLhshlstsh.c.....t.h...pu.hhAp+ul.+lp-L.pLE.hs......t.tt.hh.s................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.......................................................... 0 23 31 41 +14710 PF14859 Colicin_M Colicin M Eberhardt R re3 Jackhmmer:P05820 Family Colicin M is a toxin produced by, and active against, Escherichia coli. It catalyses the hydrolysis of lipid I and lipid II peptidoglycan intermediates, therefore inhibiting peptidoglycan biosynthesis and leading to lysis of the bacterial cells [1]. 27.00 27.00 32.20 38.40 24.40 24.30 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.77 0.70 -5.51 2 47 2012-05-08 11:57:22 2012-05-08 12:57:22 1 2 30 16 4 47 0 223.20 56 87.11 NEW phpl....PuT.l.s.t.Gs.p.ou.pVPusGsllsphV......Yuhhp.ssMh.ptLpthcs.h.taGhss.....hI.sslthhpsADh.LlhpPtlSshDAap..hpcsu..Q.shp.hs.pphSGsshTPhhAltHaLhGNGsptSVpIupIGlp.oP.KIsplhsIIpouhlGphsVshphoahTGp.sVIsthYLGsITLphpGplThsssGohoasGVV+uasD+YDhNASoHRsshsEuhT....clGthhsuK-YtI.lsGEl.I..ShtR .....................................................................s.....................................................MslpsLptlcDalcpHGhss.....hI.sslthhpsADh.LlhpPtlSshDAap..htcsu..p.shpphsh+phSGNVhTPIsALAHYLaGNGA-RSVNIuNIGLKISPhKIsQIpDIIcS..Gs.V..GTFPV...Soc..Fo...+AT.....Gc...hsVI...o...uuYLGNITL+TcGTLTISAsGoWTYsGVVRSYDDKYDFNASTHRGlIGESLT....RLGAMFS.GKEYQIllPGEIcIp.SGKR................ 0 0 0 2 +14711 PF14860 DrrA_P4M DrrA phosphatidylinositol 4-phosphate binding domain Eberhardt R re3 Jackhmmer:Q29ST3 Domain This domain binds to phosphatidylinositol 4-phosphate. It is found in Legionella pneumophila DrrA, a protein involved in the redirection of endoplasmic reticulum-derived vesicles to the Legionella-containing vacuoles [1,2]. 27.00 27.00 49.40 49.30 25.80 18.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.55 0.71 -4.36 3 23 2012-05-09 07:41:44 2012-05-09 08:41:44 1 1 8 9 3 20 0 112.70 42 28.65 NEW cNLcEsl..KhupccKhtuRpHcaTA...s+psspshKE+YpQlKGDuLK+sILs-LKDcLAEIcsh-sLK-hltEFKuSsEYpILAKGQGLTTKlLGLKTSSp+uVEcIFcEAcEcIpSscp ............pshc.sl..c...stKhtuR.Hhass...s+hhs.phpEpYptlKGDhLK+sILp-LKssLtcIssh-pLc-hhtEFKsSsEYpILucGQGLhT+shsLKTSSh+ulpchhcEscccIpsp..... 0 3 3 3 +14712 PF14861 Antimicrobial21 Plant antimicrobial peptide Eberhardt R re3 Jackhmmer:P86698 Domain This family includes plant antimicrobial peptides [1-2]. They adopt an alpha-helical hairpin fold stabilised by two disulphide bonds [2]. 27.00 27.00 36.50 30.10 24.90 21.20 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.86 0.72 -4.47 22 86 2012-05-09 09:58:20 2012-05-09 10:58:20 1 7 8 0 22 72 0 31.30 45 52.48 NEW spspccCc+pC.pHHp..D.hc+QpClpcCcp+c ..scupccCR+pC.pHH+..D.W++QpChp-C+p++. 0 0 8 15 +14713 PF14862 Defensin_big Big defensin Eberhardt R re3 Jackhmmer:Q0H293 Family Big defensins are antimicrobial peptides. They consist of a hydrophobic N-terminal half, which is active against Gram-positive bacteria, and a cationic C-terminal half, which is active against Gram-negative bacteria. The C-terminal half adopts a beta-defensin-like structure [1,2]. 25.00 25.00 25.00 25.00 24.60 24.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.97 0.72 -3.91 4 41 2012-10-01 20:50:19 2012-05-09 13:58:57 1 1 10 2 2 47 0 78.80 58 67.17 NEW AIPllYhGAsVuPsVWsWLVshhGAAAVsAAulp.......puSsDsHSCAsNRGWCRSpC..FpHEYlDsapSuVCGpYcCCRs .................Ll..PlA.YAGhTVSsPVFAALVusYGsYAVhRYsIR....................pusp..DSHSCANNRGWCRsoC..FSHEYpDWaN.ssVCGSYcCCRP............. 0 1 1 2 +14714 PF14863 Alkyl_sulf_dimr Alkyl sulfatase dimerisation Eberhardt R re3 Jackhmmer:Q9I5I9 Domain This domain is found in alkyl sulfatases such as the Pseudomonas aeruginosa SDS hydrolase (Swiss:Q9I5I9), where it acts as a dimerisation domain [1] 26.00 26.00 26.00 26.80 25.90 24.80 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.37 0.71 -3.95 196 988 2012-10-11 20:01:04 2012-05-10 11:46:15 1 8 790 10 240 798 507 139.00 47 22.62 NEW hTssEIA.-plp.LPssLsp.paasRuYYGolsHNs+AlYptYhG.WaDGNPAsLpsLsPt-pAc+YVchh..GGs-pllppAcp...saspG-..........YRWuAplls+lVa.............A-PsNp.p.........ARpLhAc........sh-QLGYpuEsusWRNhYLoGAhELRsGs ........................hThsEIu-hl+..LPsuLsp.sWtsRGYYGSlSHNs+AVYphYLG.aaDGN.P.AsLpPhsss-huc+YVphh..GGu...spVlphApc...uhcpG-..........YRW....uAcllcpllh.............AsP.ssp.s.........A+sLpAs........shEQLGYQAESu.sWRsaYLoGApELRpG.l........................................ 0 55 123 185 +14715 PF14864 Alkyl_sulf_C Alkyl sulfatase C-terminal Eberhardt R re3 Jackhmmer:Q9I5I9 Domain This domain is found at the C-terminus of alkyl sulfatases. Together with the N-terminal catalytic domain, this domain forms a hydrophobic chute and may recruit hydrophobic substrates [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.44 0.71 -4.14 140 895 2012-10-02 14:08:01 2012-05-10 11:49:45 1 6 760 10 211 992 282 122.00 34 19.64 NEW sssos-hltshssc.hhDhlAl+lsus+At.......st.clslshhhs.....Dh.s.....c...........patlplpNuVLst....hpstp.......ssA-sTlol.s+ssLhslh.hstsshsphhtsspl...pl.pGDtstlppLhuhLDph-.....stFsIVsP .....................................................s.suSsDslpuMos-hlFDahu..VRL..s.usKAs..........Gp..slslNashs.....-...s.....-................shpLpLpN.uVLsa........pcshp.......spADsoltl.sRpsLpsll..h....G...psphsphlp.u...tcs..+l...tG.ssstLpclluhLDsFD.....hhFNIVTP............................... 0 53 111 163 +14716 PF14865 Macin Macin Eberhardt R re3 Jackhmmer:B3RFR8 Domain The macins are antimicrobial proteins [1-3]. They form a disulphide-stabilised alpha-beta motif [3]. 27.00 27.00 28.00 34.10 25.00 18.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.69 0.72 -3.87 6 15 2012-05-10 12:11:02 2012-05-10 13:11:02 1 1 8 2 2 17 0 57.30 45 63.35 NEW sCa-sWSRCoshoSthTGlLW+oCs-pCK.cLG+psGpCh.sPS.sCP..sch.pshQCpCa sCa-sWSRCothoushTGILWcoCssRCK.ChG+ssGsChpsPS.sCsh.pc..csaQCpCh.... 0 2 2 2 +14717 PF14866 Toxin_38 Potassium channel toxin Eberhardt R re3 Jackhmmer:Q9NJC6 Family This family includes scorpion potassium channel toxins [1-2]. 27.00 27.00 27.00 27.00 26.90 26.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.51 0.72 -3.88 13 40 2012-10-01 23:31:40 2012-05-11 08:49:57 1 1 17 0 0 57 0 56.80 41 66.59 NEW splKstLpplhcKlht....l.upSpaGCPs...I-paC-DHCps.c+t.GpC-shcCpChp..u .................s.pl+phlpsVlHKl........uKopauCPs...hpsaC-cHCps.c+ccGhCHG...h...K..CKCsh..s... 0 0 0 0 +14718 PF14867 Lantibiotic_a Lantibiotic alpha Eberhardt R re3 Jackhmmer:P86475 Family Lantibiotics are two-component lanthionine-containing peptide antibiotics active on Gram-positive bacteria [1-2]. 27.00 27.00 37.50 36.90 25.20 24.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.73 0.72 -4.70 14 26 2012-05-11 09:53:12 2012-05-11 10:53:12 1 1 26 1 4 22 0 31.70 45 48.22 NEW ssssssshslShsLGNcGthCThThEC.ssCp .h.sshsphhhlSpsLGNcGtlCThThEC.ssCp. 0 2 2 4 +14719 PF14868 DUF4487 Domain of unknown function (DUF4487) Eberhardt R re3 Jackhmmer:Q9NSG2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 209 and 938 amino acids in length. There is a conserved WCF sequence motif. There is a single completely conserved residue W that may be functionally important. 25.00 25.00 31.00 25.90 21.80 23.80 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.86 0.70 -5.80 12 134 2012-05-14 11:54:13 2012-05-14 12:54:13 1 2 71 0 67 136 0 387.30 29 64.66 NEW hslhssoWKhlIp....pt+sslcsplphp...-IlssLhpslh.Sh+sshp.........hhpsshp-...shshp.hp+shh.s+FahsslV+hspta.s.h.tsspplhpLhl.l.hSpFh.uLpttplscstpE.luphh.hshssLlspLLsts.....hhpphhtphLs.......hssc..p.....hsp.shhhh.s.hD+lso.s.plhslassss...tphpcl.lhp..slFtshhpsSsElp.sspLt..............................Gs.upupt..lh.olYptlhhpLpsah..ss.hsss.tascLEthLLpslLpsphlst.lsh-lWCFlu.Rausu-LssphlshlspLlKsh........P........usshphhs..LshL..LpR....hhh.hssphpsphlpphpsp...psps..........hhlhcthsL..puLssp....l+p...pstcplhsshhu.hhppa.p..........sosohGtlt..stsLSusLtsppsussslDt+plsslltllsphhshl..cpltsc..h.pphhshhLslluhh.hphhpsphlppllptL.psLhhppsssp......l+hs...hhpFluuLGch.hsps.psthsstl.....pLaphLLp-cpWhlhphAlpAFshFApcTspppl ........................................................................................hhhp..........h.....h.....h....h......h....................tt......t.t.h.hhhh.h+ahh..hhph.t.a...s.h.t.h.tp.htLhl.h.tS...tF..SL.ttphsKutpE.hsshh.hshssLlstL.Lsht.....hhp.lh.sh.L-........h.sE..p....hs...hh.h...hDphspts...h.slhshss...pptspl.Lhp..hlF.sh.phSsELp.sspLt..............................t..sputt..h..shaptlh.tLpshh..h...h.ss.ha.tl-hhLLpshht..p..s..l..-hWsFhh.+..............hup.phs....hh..lh..lht.h........s...............t......h.......ht.h..ltp....h....h.................................hht..........................h..h....t.h........t........th..................................h......................................................................................................................................................................................................................................... 0 18 30 46 +14720 PF14869 DUF4488 Domain of unknown function (DUF4488) Coggill P pcc Pfam_7936 (release 26.0) Family In most members this family covers almost the whole sequence, but a few member-sequences also carry a TonB_C domain, PF03544. 25.10 25.10 25.10 36.00 24.70 23.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.68 0.71 -4.59 11 63 2012-05-15 08:09:31 2012-05-15 09:09:31 1 4 49 0 9 41 0 130.40 55 67.57 NEW psLpGVWQhCh.hspss-lsucLcsuss.LKlLS-DspFhNlhh...hs.suAIIhupGpYch.SDs.......sYsEplEK.slaLshlsGp-N.lphEhhc.....DsLhhl+ahlsschs....spWhpEhW+RV..PshhP.s...hsc .....u.sLcGIWQLCaYVS-sP-lPGtLKPSNo.FKVLSDDGphlNhTh...hPsus.AIITGYGTYcQhoDs.......oYpEsIEK.NIHLPhLcspDNlLcFEht-.....ssll+LKYFlcsDhNGNElNsWaaETWKRVpMPshaPcDlVR...... 0 2 8 9 +14721 PF14870 PSII_BNR PSII_YCF48; Photosynthesis system II assembly factor YCF48 Coggill P pcc Pfam-B_547 (release 26.0) Domain YCF48 is one of several assembly factors of the photosynthesis system II. The photosynthesis system II occurs in Cyanobacteria that are Gram-negative bacteria performing oxygenic photosynthesis. One of the three membranes surrounding these bacteria is the inner thylakoid membrane (TM) system that is localised within the cell and houses the large pigment-protein complexes of the photosynthetic electron transfer chain, i.e. Photosystem (PS) II, PSI, the cytochrome b6f complex, and the ATP synthase. YCF48 is necessary for efficient assembly and repair of the PSII [1]. YCF48 is found predominantly in the thykaloid membrane [2]. It is a BNR repeat protein. 27.00 26.00 27.00 26.00 26.90 25.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -12.09 0.70 -5.43 23 762 2012-10-02 00:45:24 2012-05-15 10:59:18 1 27 377 1 329 819 370 140.60 23 53.77 NEW hsshsssPWchlpLso-s..slLDluFs..sspHGaLVGoptolLETsDGGcoWctRs...LDls--.saRhpSVSFsGsEGWIlGcPulLLHTsDGGcoWoRlsLSsKLPGsPhhIsALGss.sAEMsTss...GAIYcTpDuGpsWcAtVpEsl...................GslRslpRus-GcYlAVSSRGNFYuTWpPGQshWpPHsRsSSRRlQsMGFs.c...GpLWhlsRGGplpFos....sss.-sWsc..........shhPlhosGaGlLDLAa+ss.s-lWsuGGuGoLlsSpDGGcoWp+Dpss-slPoNhY+IhFhss.....spGFlLGpcGlLLRYs .................................................s.....................h..pt......lhs.lhFh..stp..pGas.......V....G...p.........t.......u.....h...l.....LpTs.DGGpoWp..ttt................s....h....t...p................................h......s..l...t....h......s.p..p.....s..a..h.....s.......G.........p.........u....h........lh+..op.D.sGtsWpt........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 128 201 276 +14722 PF14871 GHL6 Hypothetical glycosyl hydrolase 6 Coggill P pcc Naumoff D [3] Family GHL6 is a family of hypothetical glycoside hydrolases. 35.00 35.00 35.10 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.66 0.71 -4.13 13 111 2012-10-03 05:44:19 2012-05-15 14:59:04 1 3 70 0 59 240 54 134.20 26 19.46 NEW Dscphscth+ch+usolslFupstaG.hsaYsop.l....ts..c..HPtL..p.t....DlLtEtlcAs+ccGl+Vslhhshs.hccplhppHP-WthhstsGt.........stthstst.a.p...lChNSsYh..-.ahttpl.cEslc...th..slDGlFhDh ............................................................tphhphlcpuplsulsl.upsttG.huYYPoc..l...............tp..h.......pPtL.....p..p.....................Dllu-hlcAs+ctGl+lhs..h.h........sh.....s.....h....-...p......p..............h.h.........c.pHP-Wh...hh....stsGp...................tphhpss...aht..........hCh.N.u.s.Yh..-..alhppl.cEllp...pY...ssDGlFhDh........................................................... 0 36 52 54 +14723 PF14872 GHL5 Hypothetical glycoside hydrolase 5 Coggill P pcc Naumoff D [3] Family GHL5 is a family of hypothetical glycoside hydrolases. 23.70 23.70 26.50 24.60 22.80 22.20 hmmbuild -o /dev/null HMM SEED 811 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.45 0.70 -6.77 10 27 2012-10-03 05:44:19 2012-05-15 15:19:15 1 1 22 0 6 24 9 739.50 52 96.71 NEW ppLL-WAssI-p.S-sThFEKAQplAsRLGAHYRs.DGLTEIGFWTPELuu-lIQs.+sIaLEVFTPl-sIDhpts-QslpF+RDplpLc+QGEYtWGVVuGL+AGTR-QhGSFYWLRYlD.p.sclpsIRDsLAYSLPYGVFAPAELYDhcsLQccRADLsYFcppuup.............p..s.....sss..........lsRVPAPpNILQLHVsTASPsGTLuGLTclYQRIucKLusupPLTPAEpNYlGYDAVQLLPIEPTlEYR.sE.pss.cHcFFslcs.........c-.sphss.s..l..pstcl.....clsL+KP-TQNWGYDVsIlGSuATNPAlLpTLRPDElVDFIATLHNFsTGPIQlIYDLVYGHADNQul-LLNspFLKGPNMYGQDlNHQsPsVRAILLEMQRRKlNTGADGIRIDGGQDF+FFNPLoGcVEYDDsYLLAMuDlVQ-IGstpRhLFTIFEDGRPWPsEGWEEISTYRDLIEL+P-SFQWGPLIFAHNTPsLpGFWD+KWRRVCEVMhpG-+WITGCGNHDTVRRGNQlssst.sINWpLGsTLPEVLpNAYDNPAlsLWVYGFuPGLPMDFINusMRAPWGFFRNTDDRYGVKVVAEEh.GFLDWQloPElYppsp.lFs+LKpLGFo-L-tLRQFh+ALppAht-oDYDL-cVAphCQpslGssst.p-.............Lpclshs-tssFLssLDVuKLKpFAhAFMEDsH-hCNVp+apDpl-ssQsuFNLALRcFR+u+PWL+cNLs..s.s.DRFN+Io-cppTlFYGlRssPh-t-......sptpspplAMVAHMGGEPhTVsLG-WLpLDLscWplAIASPGLc....l....c...D....L+u..FEL+DSQulLLE .............................t.pLhsWAtslpp.SstshFptApplAp+LGAHaps.DGLTplGFWTPcLsup.hhpp.tpIaLEVaTPhptIDhpusp.QslhF+R-hlpLtppGEahWuVluGh+sGoR-phGSFYWLRYhD.t.sphphItDPLuYSLPYGVFAPAElYDlcphQtpRsDhsYhcppss......................................p.........t...............ls+lssPtNILQlHlsTAospGTLsGLTclYpRlucKlhpt.sLoPsEpNYlGYDAlQLLPlEPTlEYR.hc..p..tptFFshts.........c-...............tpl.....plpLpKPcTQNWGYDlsIh....GSuATNPulLtThRPDElVDhIATLHNFPsGPIplIYDlVYGHA...DNQu.hpLLNtpahK....GP..N..MYGQDLNHQ.PhVRAILLEMQRRKh.NoGsDGIRlD..GuQDF+aFNPloscVE.DDsYLhtMuDlVQ-IusscRhhFsIFEDGRPWPp-GWE-hSTYR-LI-hc....P-uaQWGPLIFAHNTPsLpuFWp+KWcRVCEVhhpG-pWITGCuNHDTlRRGsQls.p.....t.pINhpLGsTLsEVLpNAYDNPAstLhsYGFsPGLPMDFlNAhh+APWGFhRNTD-RYGVKVsuEEs.GFLDWQlsPEhYptst.hFspLKtLGFpcLt.LppFhcuLtpAhhpsDYsLptlAchCptshs.pst.pt.............hpphthsp.sthlppLsVsKLKpFAcAFMEDsHEhCsVSpat-pl-stpssaNLtLRpaR+s+PWL+cNLt..ss.D+.Fs.pIps.sspTlFYGlRssP.ptc......s...stpVshVsHM.tG.-PhpVslGDWLpLDl..s..cWplAIAoPGlp....l....-...s....Lps..FEL+DuQullL.............. 0 1 5 6 +14724 PF14873 BNR_assoc_N N-terminal domain of BNR-repeat neuraminidase Coggill P pcc Pfam-B_13890 (release 26.0) Domain This domain is usually found at the N-terminus of the BNR-repeat neuraminidase protein family. 25.00 25.00 25.60 25.80 22.30 23.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.24 0.71 -4.41 37 147 2012-05-15 14:59:52 2012-05-15 15:59:52 1 11 89 0 21 137 9 139.20 24 23.46 NEW tus-s....lhlcpsplPlLlc+psNslhhlRlpsppspt...LsclsLshstsssLsDIpul+LYhuGo.cuhpcps+thhtPlshh..........................spplsLp.....us.pLh.sGsNaFWlSlphK.sssoLts+lssphsplphsspph ......................tp.....lhlcpsplP.lL.lc+psNslhtl+lpsppsps...LsclsLshstssslsDIpulcLYhuGs..cuhpsps....+hhhss.lsh...........................spplsLp.....sp.pLh.sGsNaFWlolph+.ssssLts+lssslsplphsspt.h.............. 0 6 16 21 +14725 PF14874 PapD-like Flagellar-associated PapD-like Coggill P pcc Pfam-B_1987 (release 26.0) Domain This domain is a putative PapD periplasmic pilus chaperone protein family. 27.00 9.40 27.00 9.40 26.90 9.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.40 0.72 -4.07 5 621 2012-05-15 15:06:52 2012-05-15 16:06:52 1 29 105 0 426 554 6 86.70 17 7.00 NEW sPpLEVpPspVcFGpVlPGpRYltTVcLTNsSTVPCRYRVRlssss+shLpVpYs+QFVAPGLTscscVELsGoQPhGsMcupLsVsHEGGsl-VsVchcTs ..............h...hplps.s.plcFG..slh.hsp........s.h.s.p.s..lpLpNsu..hl..s..s..pa.c...l........p......h...s..p............t....t.....h.....h...ph........................s.......lss.t..p........h........l.h......................................................................................hhh................................................................... 0 201 249 315 +14726 PF14875 PIP49_N N-term cysteine-rich ER, FAM69 Coggill P pcc manual Domain The FAM69 family of cysteine-rich type II transmembrane proteins localise to the endoplasmic reticulum (ER) in cultured cells, probably via N-terminal di-arginine motifs. These proteins carry at least 14 luminal cysteines which are conserved in all FAM69s. There are currently few indications of the involvement of FAM69 members in human diseases [1]. It would appear that FAM69 proteins are predicted to be have a protein kinase structure and function. Analysis of three-dimensional structure models and conservation of the classic catalytic motifs of protein kinases in four of human FAM69 proteins suggests they might have retained catalytic phosphotransferase activity. An EF-hand Ca2+-binding domain, inserted within the structure of the kinase domain, suggests they function as Ca2+-dependent kinases (unpublished). 21.00 21.00 21.00 21.00 20.70 19.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.23 0.71 -4.57 15 182 2012-05-16 11:38:45 2012-05-16 12:38:45 1 5 71 0 108 161 0 138.70 34 35.20 NEW uRLshh+hKYLlhsWlullluSWVlYhpYs..oYoELCRGpsCchhI...C-+Y+pGlloGSsCpsLCspcolh.ht+C......lSsssspQl.....aouhWp-p.llIKCthpcsh+tchs.p.hs+p-hshacpPT+GTShpEF+EMV+salKsKlG-QssLssLlspllsl .........................................Rhshh+h.hhhhshlslhl.ushl.hhhtYs...saoEhCpsc.s.+hhl..............C-cY+pGhlsGshCpsLCspcslh.at+C......Lss.pssppl.....atuh.Wps.p...sllKsthcpshc.s.htsp....scc-hs.h..h-tP....ohpcF+EMlhshhKspLG.phs...sthls.ll..h.................................................... 1 21 29 61 +14727 PF14876 RSF RSF1P; Respiratory growth transcriptional regulator Wood V, Coggill P pcc Pfam-B_36578 (release 26.0) Family This is a family of transcriptional regulators that determine the transition from fermentative activity to growth on glycerol [1]. 27.00 27.00 35.70 34.80 24.10 24.10 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.26 0.70 -5.66 5 29 2012-05-17 09:46:49 2012-05-17 10:46:49 1 1 16 0 8 23 0 327.70 73 82.81 NEW MKDLNPEMGKFATTKGPPQDNRGMVDIATLPNFPANRSGTPREEMYLAPNKMETPRTLNMNMVPDYLQKENFSPDFSSATVSAKSSPVNVTHDESLPLGTIESNSTKDSKYAVQRQQQQVVDFIENNMQLLSSETLNFRSDIMKTLELPIPKRRDIKGNHLSKLLFAKSPLTINTYCQFYDRRTKRICNQEMIWKDKNSREKHGSRKYQRHLSKVHDVQLTPNNFTEFFDHNSPLFQECYDYQSRLMRDLLVEPDAKFKEKKKKKKGDVNGNHPETGSSLINHQVQQQNVRELQSKIAMNDLIEILIDLNIPFSVLDYQPMRNWLIKYSIISTDTLPDEVYFKTDPGVNELEHNSSNLNNSNSGTPHNHNQNQH .............................................MKDLNPEMGKFATTKGPPQDNRGMVDIATLPNFPANRSGTPREEMYLAPNKMET.RhLNMNMVPDYLQKENFSPDFSSATVSAKSSPVNVTHDESLPLGTIESNSTKDSKYAV...QRQQQQVVDFIENNMQLLSSETLNFRSDIMKTLELPIPK..RRDIK...G.NHLSKLLFAKSPLTINTYCQFYDRRT....KR.ICNQEMIWK..DKNSREKHGSRKYQRHLSKVHDVQLTPNNFTEFFDHNSPLFQECYDYQSRLMRDLLVEPDAKFKEKKKKKKGDVNGNHPETGSSLINHQVQQQNVRELQScIAMNDLIEILIDLNIPFSVLDYQPhRNWLIKYSIISTDTLPDEVYFKTDPGVNELEHNSSNLNN..SN..SGTPHNHNQNQH................................ 0 1 4 6 +14728 PF14877 mIF3 Mitochondrial translation initiation factor Wood V, Coggill P pcc Pfam-B_22619 (release 26.0) Family This is a family of mitochondrial initiation factors IF3. 31.10 31.10 32.80 62.20 30.30 29.60 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.06 0.71 -4.91 14 37 2012-05-17 10:23:41 2012-05-17 11:23:41 1 1 36 0 26 37 0 177.60 35 49.66 NEW +KslphcasoGS-+AppAhpsllsclathsppspl+hlsspssclEppslhchspslDLscpGlthVssp...pspt...plPLVKhVcsppAlKpYSDcLAppKEcELlphG..hs.+phtp+.cs-+ccsshKhl+lSWpISssDLspQKspEItspL...cKGp+lhl.alscKsshsss..stsc-p .......+.hhhpasoGo-+A+pAhpsllsclaphsp.ptplchlsssssplcpsslpphspslDLscpGLplVshc..................pssptp.plPLVKllcs+.hlKpYSDhLAppKEpELhphG...sh+phtpphps-+Kc.sshKpIplSWpIsssDLppQKspEIhphL....cKGp.+lhl.alssKpshsss......................... 0 3 14 25 +14729 PF14878 DLD Death-like domain of SPT6 Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain This DLD domain maintains the characteristic overall topology of death domains, as it consists of a six-helix bundle with three stacked antiparallel helices and an additional helix inserted between the final two helices of the bundle. Although it is unlikely that the Spt6 DLD functions in an apoptotic process in yeast, its prominent location and the observation that it displays the most highly conserved region of the Spt6 surface suggest that it mediates important intermolecular interactions [1,2]. 35.00 35.00 38.40 38.40 31.60 32.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.53 0.71 -3.98 4 271 2012-05-17 11:48:29 2012-05-17 12:48:29 1 35 233 2 208 285 1 113.10 46 7.70 NEW h-lLDuoRlHPEsYEhARKMAsDALEYDEs..AEctsstGslpElLE.......pst+Lc-LsL-uaAEELERpsatcKt.TL.cIRhELpstYc-LRssa+s.ss-EIFpMLTtEoPET ....-hLDsTRlHPEsY...-hARKMAsD...AL.E..hD........E-........t.p..-...p..s...s...sG....A.l.cc.l.l.c..........................s.s.-.+Lc-Ls..L-taA-pL...E+p....thtcK.+tT...L.sIRsELpssYc-LRpsap...ss-..ElFsMLTGET.-......................................... 0 70 117 174 +14730 PF14879 DUF4489 Domain of unknown function (DUF4489) Coggill P pcc Pfam-B_28643 (release 26.0) Family \N 27.00 27.00 28.20 76.20 26.50 18.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.86 0.71 -4.51 17 32 2012-05-17 11:57:03 2012-05-17 12:57:03 1 2 17 0 15 29 0 140.70 37 79.89 NEW spsIlKCG.ssGusslPssos....hussasluolTlDTsshpsPCl+LEFsSNIsssssh....solNFQlaKpCcsQhsPlPVGPsao.Fut..........hluhhtopoF.SFhVCDC.DhCss-.CCTYSVssTssu.hsssushoIsNAsLuAIss-s ...spslLKCG.ssGu..sslPhsos....husshslAololDTsshcsPslpL-FuSNIsssssh.....thsLNFQlFKpCcsQhhPhPVGPsas.Fsp..........................hlsshsopoF.SFhVCDC.DsCssc.CCTYSVssossu.hsssusssIsNAsLuAlhss... 0 6 15 15 +14731 PF14880 COX14 Cytochrome oxidase c assembly Coggill P pcc Pfam-B_122767 (release 26.0) Family COX14 plays an essential role in cytochrome oxidase assembly. The COX14 product is a low-molecular weight membrane protein of mitochondria, but it is not a subunit of cytochrome oxidase [2]. Orthology-prediction methods have identified the vertebrate C12orf62 orthologues to be orthologues of the yeast COX14 [1]. 25.00 25.00 25.20 25.00 24.30 24.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.73 0.72 -4.34 45 167 2012-05-17 14:06:12 2012-05-17 15:06:12 1 2 159 0 124 156 0 57.10 23 37.66 NEW thhsttc+lhDtsHRssVhoLlGhTshuushhsas.....haphhpa.+pp+t......h.tpccpt.t ......h....hucphsDhsH+ssVhollu..hTlhuGhlsshs.....hhphhphp+pp+t......h.ppph................................... 0 27 55 96 +14732 PF14881 Tubulin_3 Tubulin domain Coggill P pcc Jackhmmer:Q9P6K5 Domain This family includes the tubulin alpha, beta and gamma chains, as well as the bacterial FtsZ family of proteins. Misato from Drosophila and Dml1p from fungi are descendants of an ancestral tubulin-like protein, and exhibit regions with similarity to members of a GTPase family that includes eukaryotic tubulin and prokaryotic FtsZ. Dml1p and Misato have been co-opted into a role in mtDNA inheritance in yeast, and into a cell division-related mechanism in flies, respectively. Dml1p might additionally function in the partitioning of the mitochondrial organelle itself, or in the segregation of chromosomes, thereby explaining its essential requirement. This domain subject to extensive post-translational modifications. 30.00 30.00 32.00 30.20 29.40 29.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.21 0.71 -4.81 18 276 2012-10-03 12:11:42 2012-05-18 09:44:05 1 11 230 0 183 277 1 185.30 29 35.18 NEW sPpLTo-oVRYWSDFNRVFYHPRSIVQLN-Y.EL..NSp..lM...P...F...EcWssGE-LFssLDKEHDLLDRDlRPFAEECDQlpGlQlFTGsDDAWGGFAA+Yl-RLRDEYGKpuh.hWsa.ul....psshptptph.h.+.phtt.h.NpARSlppl.us...QuShalPl........s........p........l.........s.........c....s......pS..t.....WasSALlusAlESsTLPoRL ................................................psl+hWSDa.+lhaHP+Sls.lp.........pa..ph.........ssp.............hh.................t.........F.........-s...au..hGpslap...p..s...hp...c...-hh.D.......R...........lRhalEECD...tLQ...GhQllsDhcsuauGhuu..phl.-pLpDEY.......spps....h.....h.....a.s..h...sh................t.ss...t....t......p..t.p....t..........pp....h.h.chl..NsAhuhsplsp.....puo.....lhsPlu.............p.................s.............ss.................pp.a.asSAlhAsAl.-oholPhRl............................................................. 0 53 95 145 +14733 PF14882 GHL12 Hypothetical glycosyl hydrolase 12 Coggill P pcc Naumov D [3] Family GHL12 is a family of hypothetical glycoside hydrolases. 27.00 15.20 27.70 16.40 26.00 15.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.80 0.72 -3.87 38 137 2012-10-03 05:44:19 2012-05-18 09:59:44 1 19 28 0 72 140 139 48.70 34 25.22 NEW EYpc+YKcDP+LPSsPsphY..pc.-Wp...uWhsFLGsp..pchYsTht-AppAshpL ............Yhp+Y+cDP+LPusPpp..hY..ps.-Wt...uWhsFLGs...t......t...hY.ohttAptus.t....................... 0 16 28 48 +14734 PF14883 GHL13 Hypothetical glycosyl hydrolase family 13 Coggill P pcc Naumoff D, [3] Family GHL13 is a family of hypothetical glycoside hydrolases. 27.00 27.00 47.50 45.90 23.10 22.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.90 0.70 -5.26 12 474 2012-10-03 05:44:19 2012-05-18 11:51:12 1 3 410 0 61 295 6 291.70 52 48.30 NEW HIDLDYVYDsD.tQps+NLshLlpRlhchtlooVYLQAFuDPDGDGhADAlYFPNRaLPhRADLFsRVAWQL+TRAuV.p.VYAWMPVLuaDL....P.s.sp..ptphlssp....ph.t.....t...cs.......hh..RLSPass+spphlppIYEDLAht.AtF-GILFHDDAhLs.DhE..................hs....th.p...st....u..............s........t.................h..........KopsLlcFThpLsstl+....th....c...Ppl+TARNlaAtsllpPpuEsWFAQsLsshhpuYDhTAlMAMPaMEs..sp.....p.....scpWLtpLlstVcspssuhcKolFELQuhsW+..s......pps.lspspLhsa.hphLQtpGlhsaGYY ........................HlDLDYVYD.s.pQ.-+NlDhLIpRlpDMploTVYLQAFA...DP.......D......GDG...h....l.....c....p.VaFPNRhLPM+ADlFuRVAWQL+TRuGV.p.lYAWMPV..Lu..a..-L.....P.s..ls...c.h..chlsst.......pt.c......scp........Yh...RLSPFcscsRtplt.lYEDLAta.AsFDGILFHDDAlLS.DaEDASssAlpA.......YpphGhssslucIR...ps....sp.....pht............pWsR.................................a..........Ko+sLsDFThELsspV+sh....R...uPplKTARNIFAh.PllpP.-.SEAWFAQNhsDFLc.sYDaTAlMAMPY....hEs....ls........-..u-pWLhp.Lsspl+shPtuh...cKolhELQApsWp..p......pps..I.soppLApWMphLphs.GspsaGYY.................................................................. 0 14 31 46 +14735 PF14884 EFF-AFF Type I membrane glycoproteins cell-cell fusogen Podbilewicz B, Coggill P pcc Podbilewicz B, [1], Pfam-B_25631 (release 26.0) Family EFF-AFF was first identified when EFF1 mutants were found to block cell fusion in all epidermal and vulval epithelia in the worm [1]. However, fusion between the anchor cell and the utse syncytium that establishes a continuous uterine-vulval tube proceeds normally in eff-1 mutants and thus Aff1 was established as necessary for this and the fusion of heterologous cells in C. elegans [2]. The transmembrane forms of FF proteins, like most viral fusogens, possess an N-terminal signal sequence followed by a long extracellular portion, a predicted transmembrane domain, and a short intracellular tail. A striking conservation in the position and number of all 16 cysteines in the extracellular portion of FF proteins from different nematode species suggests that these proteins are folded in a similar 3D structure that is essential for their fusogenic activity [3]. C. elegans AFF-1 and EFF-1 proteins are essential for developmental cell-to-cell fusion and can merge insect cells. Thus FFs comprise an ancient family of cellular fusogens that can promote fusion when expressed on a viral particle [4]. 25.00 25.00 58.70 28.00 18.70 18.20 hmmbuild -o /dev/null HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -13.15 0.70 -6.45 4 33 2012-05-18 13:38:09 2012-05-18 14:38:09 1 2 10 \N 29 26 0 393.20 31 92.34 NEW MhlhphlLhhhh.hhl.h..............s.Shh...hCscoh.hpu....psctsusplspThphQhpIGLppThCFhLp..sspc.S...sl..uh.sp.pslLHsLpYEplEpcYPlcppYpFulPcl-osClC-CsuhuDhCsscstphscpt.ssTspp..shCapTYHPsQsstGC....sutpuclCCcl.hpPhps+.YVAh+lpQPhssssapas...hasppuhchap.hscpphps.sstspph..hschtplplth.s.ups.pQLcpGMYahs.psps.hh-ht...INclsE.shcKLGWhR.ptss.casVRsuclplpsAHhlpscNC+sQhphsphsupahhs.pts.s.pcaa.GptVEch.sWlRoV+l.-..souRplpVhpppusslsVtLp.hpossshshhactScLs-FouolplDt+SNRFhNlThhsspGolhGpl..Yp.sss+pssspatFosalG.pspspNsshRIuLPuhIN.GsphlCLpP.pcPs...pElC+hlsFpppALppshl.poWppu..cutCspsNp.sLtsFlu.LNPspWhpsls.......uhhEhhthslclshhlshhhlhhhlpp+slssl ....................h......................................hp..aCsc.s.hpup.....pts.pth.phhph.hphsLppshCh.h.......................ps.lHslph.phEppaPlptpY.FulP.lpssChCcC.t.tp.ss.p....................Chppahst.ts.sC.....s..sphCCtl......sh.ahAh+ltpP.s.hhhhat....at..s.t.h..ht.p.hp........tt.t...h.t.tphplth.s.uts.pph.pGMYa..ppsts...clt...lNEls-sshc+LGWhR..c.ss.ca.VtsuhlhhpshH+shlcNCKtQhahs.hsup..h.......................tch..tt.lp..psWlpsscl.-..to.RphhlsHtcGTslplulp....sp..s...p................shh+stScltsFsGoIhlDpcSNRhhNlThatupGpltGpl..hh.sst.hp.s.ashoh.hu.....phtsps.hlsLPu.ls.tsphlCl.ssp.ss...tplC+hl.a.pps.lc.sh....tpWpth...u.Cspspp.shtshht.h.P..Wh.shp.......shh-hhhhshclslhhhh........hh.h............... 0 14 16 29 +14736 PF14885 GHL15 Hypothetical glycosyl hydrolase family 15 Coggill P pcc Naumoff D, [3] Family GHL15 is a family of hypothetical glycoside hydrolases. 27.00 27.00 27.00 51.50 26.80 26.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.52 0.72 -3.77 16 26 2012-10-03 05:44:19 2012-05-18 17:04:23 1 2 26 0 10 26 0 76.50 32 17.93 NEW hssussa.ttpsh.s.psssaau+chsGppl..pWpGast+aphhshs.....ssaRttWVstlsctlp..ss......saDGVhhDNsshs ..........pus.a.htpph.p.pptsWhA+cssGctl..EWp.s.YstHaQhtVWs.....ssYRttWlcpVsptht..so......saDGVhADNDlh.. 0 3 6 9 +14737 PF14886 FAM183 FAM183A and FAM183B related Coggill P pcc Jackhmmer:Q6ZVS7 Family The function of this family of metazoan sequences is not known. 27.00 27.00 27.10 27.10 26.80 26.40 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.63 0.72 -3.52 36 120 2012-05-22 14:57:57 2012-05-22 15:57:57 1 5 77 0 86 127 0 103.70 30 53.97 NEW plh+EhlcKEp+p...+lhsp.FslsPhpph..slTpKP.sph.......................s.tc.ppstphhphhp.....ctpppPpcKash.PhTpuQEhGWhspshhp..........tpcpphpas+pps-lTpas ...................................................................tlhpEhhhKE.+p...+lhsp.aplsPhp.........phh..slTcKPhuhp...................................................ssh-t.ssscaLphl+.....+sspsPpcKYsh.PpTEuQElGWhspsllp.t................ppcpphpa.+hps-lThah............. 0 40 49 66 +14738 PF14887 HMG_box_5 HMG (high mobility group) box 5 Eberhardt R re3 CATH:1l8y_A_00 Family Nucleolar transcription factor/upstream binding factor contains six HMG box domains. This is the fifth HMG box domain in these proteins. This domain has lost DNA-binding ability [1]. 25.00 25.00 25.00 25.00 24.50 24.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.05 0.72 -4.20 4 102 2012-10-02 14:16:02 2012-05-23 15:39:34 1 15 38 3 41 95 0 82.70 69 13.76 NEW uKLPEoPKTAEEIWQQSVIGDYLARFKsDRsKA.KAMEuTWpNMEKKEKIMWIKKAAEDQKRYE......REL.EMRossAus.suuKKhKF ................uKLPEoPKpAEEIWQQSVIGDYLARFK.NDRsK.A.LK.AMEhTWsNMEKKEKLMWIKKAAEDQKRYE......RELSEMRuPPAus.sSuKKhKF.................... 0 2 7 23 +14739 PF14888 PBP-Tp47_c Penicillin-binding protein Tp47 domain C Coggill P pcc CATH:1o75_A_04 Domain Domain C is the largest domain in this unusual penicillin-binding protein PBP), Tp47. This domain is mainly characterised by an immunoglobulin fold with two opposing beta-sheets that form the typical barrel-like structure. In contrast to the classical immunoglobulin fold, however, this has an additional beta-strand inserted after strand 3. Also, the strands are connected by rather large loops. Helices are inserted between strands 2 and 3 and between strands 4 and 5. Domain C interacts with domain B via a surface that has a slightly concave, goblet-like shape. Tp47 is unusual in that it displays β-lactamase activity, and thus it does not fit the classical structural and mechanistic paradigms for PBPs, and thus Tp47 appears to represent a new class of PBP [1]. 25.00 25.00 47.30 45.40 24.70 17.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.07 0.71 -4.63 14 25 2012-05-24 12:59:46 2012-05-24 13:59:46 1 11 25 2 2 24 2 159.30 51 20.57 NEW ssTNGLKTAsKsuDGoFoFSAR.ssGTpSGlKDpsLKsA...sslsssVKpAo..GSYGEFLRVDLsGs.YGsLGAsMQAVKWTYYGsDSTYTpslAoYGTKFAADNWMHKuMGIQLGLTDSlRCpLPtGTDGTGYWolTVYALGYsDhTapFpATcpNIVKspsssss ..NTNGLKTAhKppDG.aoFSAR.psGotSGl+DtslcTA...oshpspl+suu..GSaGEFlRVD...Ls.G-.YGDLGuNhQuV+WsYYGDDuTYTsshASYGTKFAADNWMHKuhGIQLGLTcShRCpLPcGsDGTGYWpLTlhALGYpDsshcFpssttNlsp.t....pt..... 0 1 2 2 +14740 PF14889 PBP-Tp47_a Penicillin-binding protein Tp47 domain a Coggill P pcc CATH:1o75_A_03 Domain This is the first domain in this unusual penicillin-binding protein PBP), Tp47 is mainly composed of beta-strands and is sequentially non-contiguous. The first three domains in Tp47 interact with each other through intimate domain-domain interfaces. Domain A contacts domain B through its N-terminal segment. Domain A also interacts tightly with domain C, Tp47 is unusual in that it displays β-lactamase activity, and thus it does not fit the classical structural and mechanistic paradigms for PBPs, and thus Tp47 appears to represent a new class of PBP [1]. 25.00 25.00 55.60 49.50 19.40 18.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.13 0.71 -4.93 13 26 2012-05-24 14:39:44 2012-05-24 15:39:44 1 12 25 2 2 25 2 163.80 45 21.76 NEW ptpsEYsYVYAGLTWuEYWAuEGVYAAGDTSSSsptDSHsEhDKGAFDsVTRATsNHGLHRGSaQ..CtAsIhscsGs..pYplSaWss.........ssphlLTDGos.loas.....RGs...ITpsDGootphscYcVsGlKYVPVKVKouDasAFcucYsVVENGupLsGGauENsLpSYp ........u.pcEapYsYAuLoWu-YWAuEtV.uAs.ssuuss-tDp+sEhDtGuFDsVoRATsNHGLHRGSFQssshlhuc..cuh.......saslptWpt.............tsp.slTcssp.sohs.......................Ruo.hhsDupphclspYcVpGhKYVPVtVtspDhsshKpKaplVEsut.L.GGauEtpLpsYp............ 0 1 2 2 +14741 PF14890 Intein_splicing Intein splicing domain Eberhardt R re3 Jackhmmer:D7E146 Family Inteins are segments of protein which excise themselves from a precursor protein and mediate the rejoining of the remainder of the precursor (the extein). Most inteins consist of a splicing domain which is split into two segments by a homing endonuclease domain. This domain represents the splicing domain [1]. 26.50 26.50 26.60 26.50 26.40 26.40 hmmbuild -o /dev/null --hand HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.82 0.70 -5.54 5 448 2012-10-03 10:25:13 2012-05-25 15:14:27 1 159 235 3 197 474 68 305.80 19 35.05 NEW DSLVpLsDsGh.VsIK-LVGcs-FsVlAlNpcThKLEoApVo+sFsTGsKPVFcLKTRLG+oI+ATANHKFLTI-GWKRLDc....LsssppIuls.........................................................................................................................................................................................................................................................................................................................................................................................E.htplushsQs..l.thspocIYWDcI.lSIpPsGVEpVYDLTVPsLHNFVANNIIVH ............................................................................................................................................................t........................................................................................h.t..h...h....p....s..p....lhc.....lpht.Gtpl.psTssH.hh..s...h.p...........s...............h.....p.........l....tp...........Lp.G-h.lshs....................................................................................................................................................................................................................................................h....h.................................t....t.......h....h.......huhhhu....-...G....h......................t............................................h..............p.....................h.t........................................................................................................................................................h..........................................t...t.....h...Pt.lh.t.........h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.....................................................................l......h.h..p.....s.c..h....h....a......c....pl...h...pl.p..............s....p.....VaDh.p.l...t..Hsal.uss..h....................................................................................................................................................................................................................................... 0 47 107 153 +14742 PF14891 Peptidase_M91 Effector protein Eberhardt R re3 Jackhmmer:D3QNI8 Family This family of proteins contains an HEXXH motif, typical of zinc metallopeptidases. The family includes the E. coli effector protein NleD, which cleaves and inactivates c-Jun N-terminal kinase (JNK) [1]. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.19 0.71 -4.20 13 118 2012-10-03 04:41:15 2012-05-28 13:09:29 1 14 96 0 40 155 31 175.40 23 37.51 NEW ssstlphcGo-.cFtc+lcAuLD+luSosTGcphLpsl.....pohsp....s+pcclsIsEtsscpsssspsshstp..............................GsssplshNPs.p..ht.s.tts.hp.s...t.ssslLhHELhHsachLsGop.....tsp...s.....spssssst-EtpAVGLstasa-..tps...............hoENulR-EhGhP+RspY ........................................................................................................................pss.p..phhpphpssLphltss..sGpthLptl.........pthtt..........tpcplslp...t.s.p.ss.s..ttshsht.ht.......................................stGsssplphsss.......ht....s..sssshp.....t.t....t..hssLhHELlHAhchhsGsh.....................tsch...st.................pptsshsppEhcsVGLst....aspp.................................................hoENtlRpEhGhshRppY...................................................................................... 0 19 23 33 +14743 PF14892 DUF4490 Domain of unknown function (DUF4490) Eberhardt R re3 Jackhmmer:Q5BN46 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 101 and 220 amino acids in length. In mice, a member of this family whose expression is induced by p53 may play a role in DNA damage response [1]. 27.00 27.00 30.40 29.70 26.30 22.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.31 0.72 -4.39 11 94 2012-05-28 14:49:04 2012-05-28 15:49:04 1 1 70 0 62 110 0 94.10 31 61.20 NEW hpppssctptp.hps+sss...+TS-hY+lscsLPpR..FsNPspF+GYus.ps.ssshYRTSNppYGphsPTsHEhPpsFaPpspKFSpclstuGMaRssuLNThl-KSh ........................................................................pptphh...tp..l.tR..hpsP..F+GY...ts.pc..ssshY+ToNpsYGth.sP.......osaphPppaaspsppFS............pplstsGha+sssLNshh-cp.h............... 0 31 36 49 +14744 PF14893 PNMA PNMA Eberhardt R re3 Jackhmmer:Q86V59 Family The PNMA family includes paraneoplastic antigens Ma 1, 2 and 3, found in the serum of patients with paraneoplastic neurological disorders [1,2]. The family also includes modulator of apoptosis 1, which has a role in death receptor-dependent apoptosis [3]. 25.00 25.00 25.00 25.00 24.70 24.90 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.42 13 255 2012-10-02 13:37:57 2012-05-29 14:18:52 1 13 30 0 128 295 0 253.30 30 58.05 NEW M....ALsLLcDWCRGMslNs+RuLLIlGIP--CuEsEhpEuLpAuLtPLGcYRVLG+hFRRE-NuKAALVEluptlNhoLlP+cIPG+GGsWcVVF+P.ssDsEF..Ls+LscF.LcsEGpsh..p-luRsLGssstsssst.s....p...Wscslupsl..shQPhh.EshtYpcL+lFSG....p-pPusGEEoFEsWL-Hss-hlph...................Wp.VSEpE+RR+LlESLtGPALcllpsLhtpNsshost-CLtALtplFGsp-sptsspl+aLsssQcssEpLpA....aVlRLEsLLQKAVcKuAlp.sssssQsRLcQVLutAphspsLps.......+L+hhphcppP.PuFLpLL+ .............................................................h.hLp-WC+..ths.p+shhlhGIP.tht........-hptslp.sh...s.aplhthhhhpp.t...pssllphstthshshlPpch..GpG..GsWcVlhps.s.-s-h..hpplp.F.Ltt-G.s.h...tshst..hlt.ts...s....t.........hsc.hs.h...h.ts.pshh...hhh...pphp...lFSG....pt.st..t.cEsFc...sWh...pssp..h..l.........................Wp...h..s-.E+h++lhcsLtGsAhpl.hphlttpNsths.stphLpuht.sFGs.-s.....hshph+hhps...Qt.sEphps....alhRLEshLppshppssht..ptss......psRlp..phh.tu.hst...........phc....h...h....t......ttt..Pshhthh........................................................... 0 32 35 40 +14745 PF14894 Lsm_C Lsm C-terminal Eberhardt R re3 Jachkhmmer:Q8ZVU2 Domain This domain is found at the C-terminus of archaeal Lsm (like-Sm) proteins [1]. 27.00 27.00 63.20 62.30 20.90 17.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.02 0.72 -4.46 14 43 2012-05-30 08:12:54 2012-05-30 09:12:54 1 3 42 28 23 37 0 62.80 46 43.66 NEW h+EFA-hlp+...lhPshVKlh-EsslVhV.-+l+VoEpGV.EGoGPhApRlhclacEYlcp+Kc ..REFA-hlpKphtlhPu.VKlhEEssVVhlh-+l+VScsGV.EGSGPhApRlhcLYp-Ylpc+K+.. 0 6 11 16 +14746 PF14895 PPPI_inhib Protein phosphatase 1 inhibitor Eberhardt R re3 Jackhmmer:Q96LQ0 Family This family of proteins interacts with and inhibits the phosphatase activity of protein phosphatase 1 (PP1) complexes [1]. 25.00 25.00 27.10 26.00 22.60 19.50 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.13 0.70 -5.71 17 94 2012-05-31 15:19:19 2012-05-31 16:19:19 1 3 58 0 59 99 0 304.60 35 76.71 NEW asDLpc..AcERYIpTNG..............aKFLRTlsQEEElIFRptFsRscsoaDt........DTVllsDlRDLVLFhMPcEFLo..hKFlpFMHpPsVaRLLHuLlIYFEYaLRhVEFlLIRRDELuGphuQlQSEQTN-MKRlaShYLSQYRhLVARNYshIlpGEGDhscaYHh....KcllNI..SuTI+D+hFHEQFLAVuTQIVWIsMHRRAYhlIEMEMNRLFRSEHFlhsR.EYL.........cFTssERSLLYGRssKlhNYRsQhSPLlQELpplscEDhPILWIGERKYRGoDhRIsplELEYlVPGsQL+hIDVuHGILGHPKpLYN..TlL............sLDWPuVRauNaS.paDPYallRQPpLcIPpIs-hphRKh .......................................................sh.p..hcE+hh.spu..............h+Fhch.s.tp-hlhcpth.st.tpsp.s.................sslslpDl+.lsLhLh.pp.....hp..hpF.hpFM+p.slcphLhALlhYhpaaLchsph......Ehpsp...h.lt..ppp-hchhhSh...LpthphhlAppYshllhG...u.hschaHM....Kp..pI..SsT.+DhtFaEpFhshss...lsWIsh+Rpta.p.I..............E.Ehs..............RLFRochFshs.Rtch.....................ch..T.sphph.h..hu........+pst.lh.p.......hhs.pSPllppLh.s.cEch....lsc+KY+tssh+lsthp.c.hlsshspl.h.sV..GILGcP+pLas..TlL............sL-..t....shp...hc...lhcpsthpI.ph.th.h+p...................................................................... 0 16 18 37 +14747 PF14896 Arabino_trans_C EmbC C-terminal domain Bateman A agb Pfam-B_4670 (release 7.5) Domain Arabinosyltransferase is involved in arabinogalactan (AG) biosynthesis pathway in mycobacteria. AG is a component of the macromolecular assembly of the mycolyl-AG-peptidoglycan complex of the cell wall. This enzyme has important clinical applications as it is believed to be the target of the antimycobacterial drug Ethambutol [1]. This domain represents the C-terminal extracellular domain that is likely to bind to carbohydrate [2]. 25.00 25.00 28.20 27.20 18.40 24.40 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.30 0.70 -5.87 7 454 2012-06-06 15:10:05 2012-06-06 16:10:05 1 7 175 1 94 412 0 346.80 41 34.95 NEW EluShstGhVtpYPsYosGpANltALsu....ssCuhADDVLVEsDsNAGMLpPlPut.aGs.GPLGGhsslGFsPsGVs-clpu-PVhppPGpsso-s..sps.ls.u.pssT.uGshussGlNGS+shLPaGLDPsRTPVhGSYsps.p.sAphoSAWYpLP....spst..s+PLVVVoAAGtIhuhp..tsh.hGQolcLQWusscPsGsh.PhuplpPhDl.GspP.AWRNLRaPLo.hPs-AsssRlVApD.sLosppWlAhTPPRVP.LpTLQphlGSpsPVhhDhtsuhsFPCQRPFscphGVsElPcaRIlPDhhtttssSshWpsutsGGPhhhhphLLRsoslsTYL+cDWaRDWGSlc+ah.lVP.DttP.Atl-.GohThsGWhpsGPlRhh ..................................................................................lsShst.uhltQYPsaSsutuNlpAl....sG....ssCulA-DVLVEsDsNsGhLpP..l....s..u...p........h.....us.....................s...L..........u.u......s.......s.....s..hGFsPsGls.....p.clp........u.-........s...............hh.....t.......ps......u..............ts.s.s..............s..s.........s....ts.............s........sts....u...s............T.........s....G.t.t...ussGlN....G.Sp.stLPasLDPu+sPVhGSa.sp....s..........p...........AplsSuWYpLP.....tp.st......stP...L.lV.VoA.AGplts.hc........t...h......Gp...pl......hlpauh......s...t.....s...s..G....s...h...p....s.h.G.p....h......hD...l..G.s..........tP...uWRNLR...hPhuthPscAsslRlVApD.sLsscpWlAhT.PPRlPpLcoLpphlGSssPVL.lDWsVuhsFPCQRPhsc.t...GVsE..lPc..aRIhPDt.sttssoshatsttsG.GshGhs-hLhcspslsoYLpsDWhRDWGulpcap.hss....stt.....P..Ap..lphGohT+oGhWpP.GslRh.h.................................................... 0 15 64 85 +14748 PF14897 EpsG EpsG family Bateman A agb Jackhmmer:B0MX13 Family This family of proteins are related to the EpsG protein from B. subtilis Swiss:P71056. These proteins are likely glycosyl transferases belonging to the membrane protein GT-C clan. 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.72 0.70 -5.51 270 873 2012-10-03 03:08:05 2012-06-06 16:27:04 1 2 639 0 156 806 120 316.40 15 89.49 NEW hhhhhhhhlhlhhhsuh+h.ss........s..Dah...sYh.phapp.ht................................hthE......Gahh..lshlhph.lsts......hthhhhlhuhlhhh..hhhhhlp+hsp..........hhhshhha..hhhhhh.h.....hRQslAhu.l.hhh...u.ltalh.....c+c.....hhtall....hlllAshF.H.tSu...llhlsh....hhlhp.hchp..........................hhhh.....................................................................................................hhllshh...........lhhhhhhshlhphhs.h...................phstYhptsthstsh.........hhthhhhhhhhh..........hhhhhhhhtppht.....................phhhhhhhhsh.hhhhlhhhh............hhhsRlsh......aahhhhh......hlhshlhp.............hhptp...ppp..................h..hhhhhllhhhhhhhhthh.h................hhPYps ..............................................................................h...hhhhhhhhhhhsuhRh...th........us..Dhh..sYh.pha.pt.htt.................................hth.E....ua.hh..l....hl.h.ph..h..s.ts.......h.hhhhlh...sh.ls...hh....hhh.......hhhtc.h.spt................hhlslhla....hhhhhh.hh.ph.stl.Rpsl.Ahu..l.hhh.......u.lh..hlh.............pcc......hhphhl....hlllA.sh.F.H.hSu.....llhl.h..........hhlhp...hphp.................hhhh.....................................................................................................hhh.h..hhh.........lhhh...hhh.s.lh..shhs..h..............h.t..............phshY.hptt.tttth........................hhthhh.hh.hhhh........................h..hhhh.hhhttpht........................thhhphhhhsh.hhhhhhh.h..........hhhsR...hsh..hhhhh.h..hhlshhlhp..........hhtpp....tpt..............................h.......hhhhhllhhhhhhhh.hh..................................................................................... 0 50 103 133 +14749 PF14898 DUF4491 Domain of unknown function (DUF4491) Bateman A agb Jackhmmer:B0MXX8 Domain This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 107 amino acids in length. There is a conserved EYY sequence motif. 27.00 27.00 42.80 42.60 21.70 20.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.11 0.72 -3.84 40 168 2012-06-07 11:57:47 2012-06-07 12:57:47 1 2 159 0 29 132 11 92.20 53 89.23 NEW hsapGllIGlsoFlIIGlFHPlVIKuEYYFGp+hW.lFLlhGIsslssSLhlpsllhSulLGVlGhSsLWSItELaEQc+RVcKGWFPcNPKRK ....phsGllIGlsTFLIIGlFHPlVlKuEYYaGo..+.sWhlFLllGIsslluSLhlcslhlSulLGVhuhSshWoItElFEQccRVcKGWFP+NPKR+...... 0 18 27 29 +14750 PF14899 DUF4492 Domain of unknown function (DUF4492) Bateman A agb Jackhmmer:B0MUW2 Family This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. The function of these proteins is unknown. 27.00 27.00 35.40 35.20 21.80 19.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.02 0.72 -4.47 47 245 2012-06-07 12:06:30 2012-06-07 13:06:30 1 1 244 0 25 127 0 64.10 55 83.98 NEW hh+lachYhDGFRsMT.LGKTLWhlIlIKLFIMFslLKlFFFPshLppp..tscp.p+.usaVtppLhp .....pIFsFYhEGF+shT.LGKTLWtIIhIKLFlMFhILKLFhFss.hsoh..psDpEKusFVhcpLh.... 0 11 22 25 +14751 PF14900 DUF4493 Domain of unknown function (DUF4493) Bateman A agb Jackhmmer:B0MWC6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 264 and 710 amino acids in length. Many of these proteins have a lipid attachment site suggesting they are lipoproteins. 27.00 27.00 27.30 30.50 26.60 26.90 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.71 0.70 -4.74 66 146 2012-06-08 12:54:42 2012-06-08 13:54:42 1 5 36 0 25 139 5 229.40 19 45.16 NEW pspGhLp.....Lslsss..............ssstT..+ussstt.t.......ssssatlpIhsss....sthhpptphsphs......ttlpLssGs.YslpAp.hu..-ssss..uhct...PaY.tGpp.shslpp.spsssss..lsCpluNstlslsa....sps.htshF.ss.....aplpVssu.........ssslsa.......tscs...cssYatssp.........pl..phslpuspp.s.s...spstph...................pslpsssph.plshshs..................psGss......slslslsps.spshshslplssph ......................................tpGhLp.....Lslssp..............ssstT.+usspt.................phpsaplpIhsss.........sslhp.sshschs......ppltLssGs.Ys.lpAh.hG.......-ssssuh-p.....PYY.tGpp..shslpp.spss.sls..lpCpluNstVolpa.........scs.htphF.ss.........aslsVssu..........ssslsa......ttsps...cssYapssp...........pl.phslpuspp.s.s....spptsh...................pslpstppa.plshshp...........................psuth......slslsl-ts.hpphshshtls........................................... 0 5 20 25 +14752 PF14901 Jiv90 Cleavage inducing molecular chaperone Coggill P, Hetherington K pcc Pfam-B_1192 (release 26.0) Domain Jiv90 is a fragment of the DnaJ protein in eukaryotes and in J-domain protein interacting with viral protein (Jiv) located in the N terminal region of the pestivirus viral polypeptide. The viral protein interacts stably with non structural (NS) protein NS2, causing a conformational change in NS2-NS3 and stimulates NS2-NS3 cleavage in trans. Cleavage of NS2-NS3 increases cytopathogenicity and consequently aids viral replication. Jiv therefore acts as a regulating cofactor for NS2 auto-protease. The efficient release of NS3 from the viral polypeptide by Jiv is considered crucial to the pestivirus cytopathogenicity [1]. In eukaryotes, it usually lies 40 residues downstream of DnaJ family Pfam:PF00226. However, the function in eukaryotes is still unknown. 27.00 27.00 27.90 27.90 26.20 16.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.33 0.72 -4.00 20 161 2012-06-08 14:47:20 2012-06-08 15:47:20 1 17 91 0 88 157 0 93.00 51 11.91 NEW puupsItCspCsshHhthtTc+.stupARaCpcCphhHsA+-GDlWsEsshhGhha.........+hYsCh-upVYDIT.............EWAsC..Qthth.......psNTHpVpa+lshs .....EshNThhCo+Cts+Hh.RhphcR.p.tsARaCs-CsphHsAc-GDhWAEoS.hh.GL+h.........pYFAhMDGKVYDIT.............EWAsC..Q+sul.......sPsTH+VsYHIshu.......... 0 20 36 62 +14753 PF14902 DUF4494 Domain of unknown function (DUF4494) Bateman A agb Jackhmmer:B0MZU7 Family This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 172 amino acids in length. There are two conserved sequence motifs: VDA and EAE. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 77.50 77.40 23.30 22.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.69 0.71 -4.57 28 161 2012-06-11 12:23:02 2012-06-11 13:23:02 1 1 137 0 32 137 51 139.90 55 85.59 NEW WFEsKl+Y-KshE.sG..........hpKpVoEsYlVDAlSFTEAEspIhEEMusal..o.GEFclssI+pAsYuElFFs-h-ssD+....................WaKuKlpFITlDEKotKEK+ossshLVQAsolppAlcpl--sMu..sThlDYsIsulsETpIMDVF.ap .WFECKlRYEKshE.sG.............hpKKVTEsYLVDALSFTEAEuRIIEEMosaI..S..GEFsVosIK+AsY..uElF...S-t-suDR....................WFKsKLhFIT...lDEKS..G....tEKKTssphLVQAssl+-AlK+l-EsMc..uTMuDYpIuulsETsIMDVaPY...... 0 12 28 32 +14754 PF14903 WG_beta_rep WG containing repeat Bateman A agb Jackhmmer:B0MSK5 Repeat This repeat contains an N-terminal WG repeat motif. The extent of the repeat is poorly defined. This repeat may form a beta solenoid structure (Bateman A pers. obs.). 25.00 11.50 25.10 11.50 24.80 11.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.34 0.73 -7.86 0.73 -3.53 486 4416 2012-06-11 14:25:50 2012-06-11 15:25:50 1 86 508 0 1201 4277 336 36.40 27 39.14 NEW aGh.....lsp..pG......ch.l..l......s.s.p...Ycp..............h.....t.s...a...p..s....u........................h.shV..............p..p..s................s.....................................c.........a ...................aGa..lcp.pG.......ch..l..l......s.s.p........Y-p....................................s.........t.s....F....p..p......G..........................................h.AhV..............p.....t...............t......................................................................................... 0 586 987 1110 +14755 PF14904 FAM86 Family of unknown function Coggill P, Hetherington K pcc Jackhmmer:A6NEL3 Family Function of this protein family is not known. 23.50 22.10 23.50 22.10 22.90 21.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.28 0.72 -4.26 3 140 2012-06-11 15:45:07 2012-06-11 16:45:07 1 5 47 0 46 98 0 79.80 52 31.85 NEW MAPEE+AGAspLLQGFERRFLAARALRSFPWQSLEEKLRDSSGSELLLDILQKTVKHPVCVKHPPSVKYSRCFLSELIKKHEAVHTEPLDcLYEALAEVL .................................ttt.lLpsFpR+FLAsptLtoFPWp...S...LEtKL+...-SSs........S..E...LLp...DI.L........p.KT...V......tHPlCl+aPPSl+YtppFLoELI+KpEusth-slDpLYcsLsph.................. 0 19 21 28 +14756 PF14905 OMP_b-brl_3 Outer membrane protein beta-barrel family Bateman A agb Jackhmmer:B0MXY4 Family This family includes proteins annotated as TonB dependent receptors. But it is also likely to contain other membrane beta barrel proteins of other functions. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -12.97 0.70 -5.81 316 2158 2012-10-03 17:14:37 2012-06-12 10:57:21 1 25 394 0 605 5700 1826 472.30 13 60.34 NEW s..tp...h.p...p......p...s.p...s..p...sp.s.p..s..h..sh..phuh..s.Yp..l......ss.....pp.sluh....phshp...hst..tpt...ps...................sspsp..........................tth..............ths..pppppp.ptppt..shshsltYptphs.p.sp..p..lsh..sh..s..a..sht..ps..ss.......ppp..hpp....p..h...............t....ppph....t..sps...pp..p.phhssplsaspsl.s..c......t..plphG.hchshspsp.ssh...h....t.hp..............s.t...th....................................................................s..........s..ssp.......hphpcphh..usYssas...t.p.h......s..+....h.sh..psGlR....hE..h....sp..h......................ph...pp...........t..h.t.p.......p.....p............psa.hs..hh..Pohs..lsa...phspt...p..lplshspchhppPshtpLss.hhp.ht.s.s.hshppGNPtLcPp..hspshpls...a....s.h.....c..pt........h...phs....hsh..pa..s...pst...h.........th...s..h.h...........pt..........s.p..ss..........................................hh..hhp.tN..h.spp.pph..shslshsh...ph................sph...hphs...........h...shshhht.p.p....s.................s....st..hs.................................................................................hs.....h..........sp..h...s....hth....ph..s..s..s..............................................hp..l..s.....p..s.hphphsht..ap...sps..hps....................p.........................................h...t....h.psh.hh.hshulp+shhc.c+.hslslpssDl....Fssp...p.pt..tpht....hss...hhp....p.....ph...pspt.hhlslsY..pF..spt+ .....................................................................................................................................................................................................................................................................................................................................................t......................................h.....phthp.a..p..h......s.p.......p..tl.th.......th..t.h.t..htt....pt....t..........................................t..t........................................................tttt..t.......pt...thp.hsh..h..p...t.....p.h........s.......tt.....p....l..s..h.........sh..p......h...th...pp...pp...............pp........htp.........h......................................................t..........pps.................pt..p..t....p...h......t.h.p....hs....a....p......hs......h...s........p..............h....pl...ph..G...hp.h.p...h..p...p..pp...s.s.h........h.....ht...............tt..................................................................................................................................................................................................................s.......t.......t...s.s.p...........h..p..h...p......p...p..h....h....shah..p..hp.....h...p...h...........s...c..........h..s..h....ps..G....l+....h-..........s.p...h.....................................................................ph....p...................................t.t.........p......p...............................psh...h.....p......hh....P..sh...p..l..p..a....................phspp....p....lp..hs.a..sp......p..........ptPs....h........t.........p.........L.........s.............s.......h...........h.................p.......h.............t.........s..........s.......h..........p..............h............p.........t.............G..............N............P......p......L..cP.......p...hspshpls.........a.....p..a................p..pt..........h........hs......hsh...thp........pst...........................h....t...h.h...........................t..ss..............................................................................................hh....hh..p.....t.....N......h..s......p.s...pp.....h....s..hp..hsh..sh..ph...............tph.....h...p.h.p...........h...sh...s...h...t...h..p..p...p....s...................p.......t..................................................................................................................................................................p..........p..t.....s....h..th.........ph.....s.sp...............................................................................................hp...h....s........t...s......h....p.h.p.....h..s...ht.....hp........stt...h.s.....................................................................................................h..h...........ps......h.h...h........shs..lp.h..p.......h......h.p....p........p....h.....p....l.....p..l..p..sp..s..l........hp......pt.......h..h.......p..h.......s.t...............p.............................tp.....hhlshpYpht.................................................................................................................................................................................................................................................................... 0 234 513 586 +14757 PF14906 DUF4495 Domain of unknown function (DUF4495) Coggill P pcc Jackhmmer:Q8IV33 Family This domain family is found in eukaryotes, and is typically between 322 and 336 amino acids in length. There are two conserved sequence motifs: QMW and DLW. Proteins in this family vary in length from 793 to 1184 amino acids. 25.00 25.00 28.60 36.10 21.80 17.90 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.16 0.70 -5.48 10 82 2012-06-12 11:20:08 2012-06-12 12:20:08 1 3 65 0 58 84 0 292.90 43 32.31 NEW scFVpllsKsAsplLE+L+plupEslc+AsLssL.....pusLuuAuhV+NpLhpYsphhp.......ssscpshlphSYcpYpEhsEuLsEpllDhHsRlluhhILQDADShaW-s.+sFaEGERCSFsIQMWaaYhpuh+pDLWshlPPKhAQ+lhAuMLp-SLolLosRYoQupPShsRosQlhlDIsslLLCosphLhSlCsoupchlss........p.sspSKIh+.IHs+CspLhssLllcuoPLssLYKlF++Gl-.shs....hhpspttpPs.Wlhhh...hPsLhp...........hchoslssshAlplpLKlLLuQPpssWsLLL+lLLM+cshls+lLL+pohps ..........................s.pFVphssKsAstlLp+LpchupEs.s.+AsLpsL.....pshLusAshVhpphtpYpphhp.........pspKpshhhlshppYpEhhpsLt..plhDhpsRlhuh.ILQDA-ShHW-Dh+sFaEuERCSaolQMWaaahhuh+pDLWshlPP+hAQcIhuphLpcoLulLssRYspspPShtRosQl.hhDlsslLlCstphL....hulCpSs..pthls...........p.tspspIh........+.IHs+CppLhssLllhsuPLs...LY+sFpcGhc.shp.......hpshht.PhhWl.hh...hPphht............................ss..ssphshp.hpL+lLLupPtssWsLLLcsLLh+DshLhpIll+p.h.t............................. 0 15 19 38 +14758 PF14907 NTP_transf_5 Uncharacterised nucleotidyltransferase Bateman A agb Jackhmmer:B0MX79 Domain This family is likely to be an uncharacterised group of nucleotidyltransferases. 24.60 24.60 24.60 24.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.51 0.70 -5.04 171 654 2012-10-02 22:47:23 2012-06-12 14:18:15 1 9 531 0 265 712 105 226.80 16 64.96 NEW ssh-WpplhphAptpplhshlhpslp.phs...........................hss...t....tlht.ph...ppt..hptsttpshthttchtclhphh.ppp.sIpslhLKGhsl..u.p.hYs..sst.h.....RthuDlDlLl........pc...hp..c...........................s..pp..lL..tph.Ga.p..........h....p..t.....p.......................sp.p.....................................cth.a.h.p...........................ps..hh...lE..lH.ap....lh............t.t.h.........ph.h.....p.....p.h..htp...................ht..h...ph..ssps...l.........hshs..spchhla.hhhH.h.h.c...H..h.h.......t..........tth..sL.RplhDl.thh.lpp.........h.pp.h.........-..apt....lh.pphpchshtc.hhhhshtlspphhs .................................................................................................p.......hh.....tth.shh...h........................................h..................h.....tt..h.tthtplh.phh.ppt.slthlhlKGhsh...s..t.has...p.t.h.............R..hsDlDlllt.....pc..htp...........................s.tp.hL.tph.Ga.p......h..pt....t......................stp...............................................p..hhht.............................ps...hh...l-..lH.ap.......lhtt.............h.........t..h.....s.....p.h..htp...................tp..h..pl..sstt...h.........hh.hs.spphhl..h.h.hhH.h...h..p...c...h.h.t............t..tl.p.lhDlth.h.hpp.........h...p..h....................................s..hpt....lh.thhpphth...h..hh.h........................................................................................... 0 93 196 230 +14759 PF14908 DUF4496 Domain of unknown function (DUF4496) Coggill P pcc Jackhmmer:Q6ZN84 Family This domain family is found in eukaryotes, and is typically between 134 and 154 amino acids in length. Proteins in this family vary in length between 264 and 772 amino acid residues. 28.50 28.50 28.50 31.20 28.40 28.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.37 0.71 -4.29 29 151 2012-06-13 13:08:07 2012-06-13 14:08:07 1 19 72 0 104 143 8 135.60 24 18.56 NEW lpplWpslspalpcplttp+uVplsshGsFohph.p.th.s........................tcPsFllsccFhppaslpttp........t...........sptssspl.sasplu..ttsshs+-.hlcsslccllptluctlp..stpsl.plsh.ulGpLph.+spplphpF .............ptlWtslupalpcpLthp+GVpIsshGsFohpppphphsstph...................hhhp+PlFlhsccasptasLpps+h..........s...............................schshhtl.Nastlu..hts.s..hs+c..slcsslcchlthluctlp..................ttpsl.phsh.ulGhLhh.+spphphpF................................................ 0 55 65 85 +14760 PF14909 SPATA6 Spermatogenesis-assoc protein 6 Coggill P pcc Jackhmmer:Q9NWH7 Family This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family has similarity to the motor domain of kinesin related proteins and with the Caenorhabditis elegans neural calcium sensor protein (NCSâ€2). 27.00 27.00 27.20 27.10 26.50 26.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.66 0.71 -4.13 17 142 2012-06-13 13:52:55 2012-06-13 14:52:55 1 3 68 0 70 133 0 124.80 47 36.03 NEW tl-Lpl+AVT..CPGVaLss+sclYLslplhGpahcTpshPshFPlLhp-+hpFEKsFhsssssupls-hLcschlhlELlQhssss...GplLApapsssRDFLaPtsphhssh.sGssR-lLMcpo......sFPG.I.uPKlEFSTcosI .................h.ltLplculo..CPGVhL.s+p-laLulhlhsQYhcTpshPssFPlhhpppMhFEK...lF.pAlDPusVsphLE..hhhhELlQls.ss....u-pLAhY--NTRDFhFPtPp.hsua...sssRpVLM+ph......uFsG.I.APKlEFSTposI.......................................................... 0 16 20 34 +14761 PF14910 MMS22L_N S-phase genomic integrity recombination mediator, N-terminal Coggill P pcc Jackhmmer:Q6ZRQ5 Family MMS22L (Methyl methanesulfonate-sensitivity protein 22-like) is found in yeast, plants and vertebrates, and is integrally concerned with DNA forking and repair mechanisms during replication. MMS22L complexes with TONSL and this complex accumulates at regions of ssDNA associated with distressed replication forks or at processed DNA breaks. Its depletion results in high levels of endogenous DNA double-strand breaks caused by an inability to complete DNA synthesis after replication fork collapse [1]. Thus the complex mediates recovery from replication stress and homologous recombination in vertebrates, yeasts and plants [2,3]. This family is the more N-terminal region of the proteins. 27.00 27.00 27.60 27.10 23.60 21.00 hmmbuild -o /dev/null HMM SEED 704 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.34 0.70 -13.29 0.70 -6.29 4 99 2012-06-13 15:05:30 2012-06-13 16:05:30 1 8 63 0 55 96 0 462.00 36 57.13 NEW PPCFoCsa-.sppss.phSupuYLusGuLKRllL+LDPtPssF-tDsl-lFuFtWVTETALVESC...phLFsLhRQQlhpLEsLlQ..SpDFGpAAoLHscA-plRpQClhFLHYlKVFIaRhLcs.ps.sc.tshHPacchEAQLPShLV-EL+uLhLhIG+lssLPussluAF.s.Q+QsKlFPPSWHLLHLaLDhHWLVLEILHlLuc+h.tQVVYuppFls.sG-sLTNlSLFEspsEpLhsDLIsLuhp+YsKV+PoEsLpopHa.CpCsKELWlLLI+LLtaRsKh.tsc..sFWshlNKhLpolhcpsostcp.suhuhspsKDPhuFohWlhsHLApLhpasRpG..ss-cpKQhEsNWpFltpLLKp.lssQsuh.EEQlRhaLpCCLoL.sphWpPNlSVlThLWEYYSKNLNSsFoVsWLsLcGLssIs+osLuhLphs+sCCSc.....pphssLY+ousSahIFLpILA+hlK...cpuGspPW+QlKGRIYSKFHp+RM.ELoEsGLppFhpLFLlL.AtsAElEDlAS+lhDLLthLs.suhs.supRALlW+GphAhLLlYspKsLDlushAEKLustFpptA+EFh..Ks.-.sp+.sLWslluhYl-GVQEVFETSssLshSEE+LLN-GFuhLLPACRpuELspVLsFLQsVlARLRpVHppsuQs.p..sss..s..s.sAKE+..A.VAuALWpHFFPaL+SQRho ............................................FpC...t..tt.t.....h...tualtpG..h.ph.....t.c....thp.t..ph.th.hVtphhhs.ss...p.Lhthht...tp.....p.hp........s.hsshp...th...phRp.psh.Fhphl....h..................................h.hctl..tl...hsp..th..s......hh........h..................................................................hh.hu...att........-..h..t.h.C.C.+EhWlhl..hh........tp....sFWthhpphhpthhpt....pt.............shs.htF.hWlhhplu.hhpa...sppG......pp....hpht.ssh..htplLKp.hss......ps......Ep....phRhhl.hhhsL.hthhps.sh....shls.LW-Yap+pL.N.ssFslsh..hptls.h.poshshlc.hpphhs.......t..pL.....sSahhalhlLuhhhp....ttstst..hpp..lhGRlaSKFptt+h.tLsE.Gl.phh.LFLhl..ht..-hp-lss...+h...hp.hL..h....l....t.........pt..lh.h.+GphuhlLha.p+thshsshsthh.t.thtthtp-.................hthhs.ahtsl.pplhphu....ph..upphLl..s.hhshhl.tst.sp.p.hhphlp.lhtplp......................................................................................................... 0 17 19 36 +14762 PF14911 MMS22L_C S-phase genomic integrity recombination mediator, C-terminal Coggill P pcc Jackhmmer:Q6ZRQ5 Family MMS22L (Methyl methanesulfonate-sensitivity protein 22-like) is found in yeast, plants and vertebrates, and is integrally concerned with DNA forking and repair mechanisms during replication. MMS22L complexes with TONSL and this complex accumulates at regions of ssDNA associated with distressed replication forks or at processed DNA breaks. Its depletion results in high levels of endogenous DNA double-strand breaks caused by an inability to complete DNA synthesis after replication fork collapse [1]. Thus the complex mediates recovery from replication stress and homologous recombination in vertebrates, yeasts and plants [2,3]. This family is the more C-terminal region of the proteins. 28.40 28.40 48.30 29.50 27.70 25.90 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.89 0.70 -5.40 15 86 2012-06-13 16:19:41 2012-06-13 17:19:41 1 7 62 0 52 90 0 316.60 36 35.01 NEW EplscLTphVhpLsEh+plh.cus.....hhpss+-PLstFFcAlG++hpp.cssuc..h+hphspKhcsYlscF-pWlsss...tp.ssthhpRhYshLulllhpCuslhYs+SKusChh+hhhs+hLLP.opLQsspsspspllpsl+KhaPllLQGlu...phsapsDsYLscpLcsllp+asP+Fhhsossthss+........h..hhpsss..spcLspalLppltspFlplpp.stsss+suhlLsllppLlcsh........pspsplhshlchltsulL-plhhVs-...s+thshslhphlVpssphppusts+pphssslpuhscKaLuhsTh.YFphLtcLAchsPclVtsLlspl+pplppsEhKRGsGcDsulRcsLpRLpssLp ......................................plhtLTphlhpLsEhctlh.cst........ssppsLh.FhcAlGhpattlQshu-...+oshspKsLpYlGclhKalpP.L..........tph.stuLplsYthhGhlVKphu.lhhT.SKuQpLLapllDsLLLP.s.Lppppt..s.hhpulpcsLPlaLQGhs...p.s.s.ssYLpphLtpllppYhs+FlsuSs..s.sht...............l....htsssss.hs....tLp+hllp...hlpcsalphcu.ptssP+LuslLsFl.pLhcc...............ps.tht.lchlLPulLcClhhV..s.......psps++hus-.lphhVpssp...htspttstsphsulhRpFlpcahhhashplaplLcslAhLs.plVhtLlsplppsL+poEhKhGlGcshu.Rpshp+L.shL.s................ 0 16 18 33 +14763 PF14912 THEG Testicular haploid expressed repeat Coggill P pcc Jackhmmer:Q9P2T0 Repeat This repeat is the only conserved part of the THEG proteins from vertebrate spermatids. Both human and mouse THEG are specifically expressed in the nucleus of haploid male germ cells and are involved in the regulation of nuclear functions [1,2]. Although the differential gene expression of THEG in spermatid-Sertoli cell co-culture supports the relevance of germ cell-Sertoli cell interaction for gene regulation during spermatogenesis, THEG was not found to be essential for spermatogenesis in mice [3]. 27.00 8.00 27.00 8.30 24.90 7.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.12 0.72 -3.42 37 329 2012-06-13 16:25:20 2012-06-13 17:25:20 1 8 58 0 222 470 0 51.20 24 50.06 NEW W.......slspuALcupsopRlppLAp...P+hhtshh.hphs..................tls.tAhtht......sosRlhcLupPpp ......................................l..tshph.sopRltpLAp...PKhhtshh..tps........................l.................................ch..hs........................................ 0 93 104 146 +14764 PF14913 DPCD DPCD protein family Coggill P, Hetherington K kh6 Jackhmmer:Q9BVM2 Family This protein is a found in eukaryotes and a mutation in this protein is thought to cause Primary Ciliary Dyskinesia (PCD) [1]. This protein is 203 amino acids in length, 23 kDa in size and its function remains unknown. The gene that encodes this protein is a candidate gene for PCD and is expressed during ciliogenesis. PCD affects the airways and reproductive organs, and probing Northern blots show DPCD expression in humans is highest in the testes. Additionally, there is no indication of major splice variants [1]. 27.00 27.00 43.90 43.60 20.80 20.10 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.13 0.71 -5.19 18 117 2012-06-14 09:39:13 2012-06-14 10:39:13 1 3 97 0 68 121 1 179.00 46 89.83 NEW sWLshL+uAcKTullQDG+RKVHYpFsDGpEMAEEYDhcTspLlhR+WRpKusLGupGpWplElGEP.sssst..................psphlKEsuosPlFsR+sTKsuFpWRIRNLPYPh-sYoVTV-tcpRslllRToNKKYYKKhslPDLDRspLsl-pssLoasHtNNTLIIoYKKPcplLphEcplLpEL+KlKss..p-GDl .................Whp.lpsupKoullps.G+RKlHahFsDGpEMsEEYDhcTspLLlRKWRhKs.s.L.Gu.upWplEVG-sss.ttss.h....................ssphlcESsusPlhhR.+DTKpuFpWRIRNLPYPc-VYuVsV-pcc....R....sIlVRToNKKYaK+hsIPDL-Rh.plsLcpstLSasHtpsTLIIoYpKP.tllt.hEppl.pElpplcstp....t............ 0 26 31 49 +14765 PF14914 LRRC37AB_C LRRC37A/B like protein 1 C-terminal domain Coggill P, Hetherington K kh6 Jackhmmer:A6NN04 Family This family represents the C-terminal domain of the putative Leucine Rich Repeat Containing protein 37A or protein 37B (LRRC37A/B) found in eukaryotes. The Leucine Rich Repeats (LRR) lies in the central region. The gene that encodes this protein is found in the chromosomal position 17q11.2, and its microdeletion results in the disease, neurofibromatosis type-1 (NF1) [1]. The function of the protein, LRRC37B is unknown, however experimental data shows expression in the aorta, heart, skeletal muscle, liver and brain during gestation [2]. 27.00 27.00 37.10 35.00 24.80 24.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.91 0.71 -4.78 5 149 2012-06-14 09:49:06 2012-06-14 10:49:06 1 7 25 0 80 93 0 137.50 61 14.63 NEW YPuLsSPG-QFEuQLNQQLRSLIPNNDVRRLISHVIRTLKMDCS-o+VQLoCAKLISRTGLLMKLLSEQQEsKlSKAEWDTDQWKTENYINESTEAQSEQKE.csSELsKEVPGYGYNNKLILAISVTVVVTlLIIIFCLIEICSHRRApcEDE .....................................................P.h.SsGDQFEhQLsQQLpSLIPNNsVRRLISHVIRTLKMDCS-spVQls..CAKL....ISRTGLLMKL..LSEQQEsK.sSKs-WD.T-QWKoENYINESTEsQoE.QKE..c.sp.Eh...pKE.VPGaGYs.pK.LILAl.VTs..llhlLIIlFCLIp..l............................ 0 13 14 15 +14766 PF14915 CCDC144C CCDC144C protein coiled-coil region Coggill P, Hetherington K kh6 Jackhmmer:Q96IX9 Family This family includes the human protein CCDC144C and the ankyrin repeat domain-containing protein 26-like 1 found in eukaryotes. Its function remains unknown, however, it is known to contain a coiled-coil domain which corresponds to this region. The ankyrin repeat which features in this protein is a common amino acid motif. 27.00 27.00 27.00 28.80 26.90 26.90 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.93 0.70 -5.50 17 241 2012-06-14 12:50:39 2012-06-14 13:50:39 1 41 38 0 86 211 0 241.80 46 28.85 NEW NphLQ-EIAhLRLEIDTIKspsQEKEpKYhEDIcIlKEKN-sLQ+slKLNEEsLTpTlhpYssQLNsLpAENTMLsS+L-pEKpsKERLEs-lESa+uRLAuAlpDp-pSQsoKRDLELAFQRs+DEah+LQ-KMsh-lSsL+DpNEhLSQQLScsEuKhNoLEhELH+sRDuLREKoLhLE..plQR-LsQsQsQtKEhEphhQsEpsKlsKahu.KQESlEERLuQLQSENhLLRQQL-DApsKs-sKEKsVhslQcphpshlppLQA-sEKpsLhLcE+NKELhsEssaLKERhhpYEpEKsERE ...............NphLp-EIAhLRLElDTlKppspcKEpKYhcDlchlKEKN-sLpKslKLN..EEslT.......cTh.pYstpLpsLp.s....ENshLsScLppcKps+pRLEsEhcSapsRLsuAlp-tppp.ssc+shclshppst-..hplptphs.chuth..pschLoppLScsctKhpsLc.ch+.sp-sL+EKoLhlE..psQp-LpQsQpphKEhcphapstpsphpchht.KQpsl-ERlsQlppcNhLLpQQL-DAppKssspE+hlhsIQtph........h-scKp.hhLcE+NKcLhschsaLKEphhpYEpEKsEp.......... 0 26 28 36 +14767 PF14916 CCDC92 Coiled-coil domain of unknown function Coggill P pcc Jackhmmer:Q96LY2 Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The function is not known and the proteins carry no other domains. 28.30 28.30 28.80 29.20 28.00 28.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.03 0.72 -4.38 15 147 2012-06-14 14:55:10 2012-06-14 15:55:10 1 2 59 0 90 137 0 59.70 42 20.86 NEW hpp+lpslp+slpFLQppHtpsLcuLHpEIc+Lpccs+-LpacLhhppssps.ps.ss...o.sp...ph ......ppplpshpKsL.FLQp-HusTLcsLHt....EIc+Lpp+spDLpacLhhppsppp..tps...t.t..t................... 0 25 31 52 +14768 PF14917 CCDC74_C Coiled coil protein 74, C terminal Coggill P pcc Pfam-B_23141 (release 26.0) Family This is a C-terminal conserved domain of coiled-coil proteins from vertebrates. The function is not known. Expression levels in humans are elevated in breast cancer []. 25.00 25.00 25.40 53.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.53 0.71 -4.08 6 31 2012-06-14 18:18:57 2012-06-14 19:18:57 1 2 22 0 19 42 0 124.20 59 34.53 NEW tPhMsL...P.tLRKPTTLQQCEVlIRQLWNANLLQAQELpHLKSLLEGuQRP+AssEE.........AGhuuP.+DQ-.....uppLPKVosKulSKKCLlLS.ssV.AE+uILPALKQoLKsNhAERQ+RLQAVQpRRlHRo ...................PsMhL...P..LRKPTTLpQCEVlIRpLWNsNLLQsQELQHLKSLLEGoQR.PpAsPEE...........A..S.P.+DQE.....AhphPKVoo.KulSKKCLlLSP.PV.AERAILPALKQThKNNFAERQKRLQAhQ+RRLHRS.................. 0 5 6 9 +14769 PF14918 MTBP_N MDM2-binding Coggill P pcc Jackhmmer:Q96DY7 Family MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle [1]. MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner [2]. MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells [2]. MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) [3]. It is unclear which regions of MTBP interact with which binding-partner. See PF14919, PF14920. 25.00 25.00 31.40 31.40 19.00 18.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.66 0.70 -4.99 14 53 2012-06-15 13:15:41 2012-06-15 14:15:41 1 4 37 0 28 41 0 239.20 56 30.94 NEW MDRYLLLVhWtEtKh.usAutEhE.ts-houhcsopppPclpAsNlYHLLKRSIosSlpP-DSTFPACSVGGhPGS+KWFFAlQAIhGFYQFCSSD..WpEIaFssEKDcIEDVLQTNlEECLuAlECFEEEDSNSRESLSLA-LYEEuAEsLHQLSDKLPAPGRAMlDlILLsSDKDPPKLKDCLPslGALKHLKEWaSAKITIAGsHCEhs......sQKIAEYLSAsVVohE-l+NsID..S+ELWRGcIQIhERKFG.ElSFPEFCLKGVTscNaSs.NLNo ........................................................MDRYlLhl.a.............t...t.s...t......s.hs.AsslYchLKcShssSlps-sSTFP..ACSVuGhPGo+KWFFAlQAIhGFYQFCSSD..WpEIphsspK-c.EDsLQTslEECLuAlpsFEE-DsNSRESLSLs-LYEEuAEsLHQLSDKLPAP.GRAMlDlILLsS-cDsPKLKDCLPslGALKHL+EWaSAKITIAuscschs......hQKIA-YLSAslVu.--LpNsID..u+ELWRGKIQIhERKFu.ElsFPEFCLKuloscpass.plp...................... 0 3 5 11 +14770 PF14919 MTBP_mid MDM2-binding Coggill P pcc Jackhmmer:Q96DY7 Family MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle [1]. MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner [2]. MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells [2]. MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) [3]. It is unclear which regions of MTBP interact with which binding-partner. See PF14918, PF14920. 25.00 25.00 39.20 39.00 20.00 19.20 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.09 0.70 -5.76 6 58 2012-06-15 13:23:16 2012-06-15 14:23:16 1 5 40 0 30 45 0 303.50 55 40.32 NEW pcVFHYYGsALEaVQMVpLSDLPuhalSDhEFEL....uls++usKt.StLLL-QlsSLpGKVGALFsLsCslSslshPssuQLSS+KWREYhA+KPKsIsVPDVEVKGEpusYahLlQGsGsut...C+ATLlHSAoQINGuAALshlpuhl+.pscsupsuhshsshlpSLP+FsGEQllpRE+pLAplQsLALKEhLKR+ctsppssulsssELKuLLsLTREpaLchaDosLPcus..phtphpsshhlscsosssSspSstMcsNshEWPERpVLQNLENhEKhKQKhRsuhL.stSSEQLLG+KDG.R-ShTLLDAKELLKaFTs-GLPlG-LQPLplpRG..-sAF ................s.cVFHYYGPALEFVQMlpLSDLPShahS...DhEFEL.........sLo.ppss+tpShLLL-QlSSLpGKVGALFsLsColSslhlPsss.Q..LSS+KW+EYlA+KPKoI.sVPDVEVKGEpuuYYLLlQGsustt...CKATLlHSAsQINGuh..ALshlpG+h+.ps..ppuchu..hshc..lhSLPpFoGEQllpRE+pLA..pl..QsLALKEhLKR++hupQPps...lSssELKoLLhLTREpFLcha..-uhlPcss.....hp.hsph.ps..shlss..hs.ssp..sssSshhEsNs..LEWPERpVLQNLEshEKsKQKhRsu.......L.s+SSEQLLGHK-G.R..-ShTLLDAKELLKaFTsDGLPlGD.LQPL.lQ+G-psF......................... 0 3 5 13 +14771 PF14920 MTBP_C MDM2-binding Coggill P pcc Jackhmmer:Q96DY7 Family MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle [1]. MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner [2]. MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells [2]. MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) [3]. It is unclear which regions of MTBP interact with which binding-partner. See PF14918, PF14919. 28.40 28.40 30.40 32.10 23.80 23.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.59 0.70 -4.89 7 57 2012-06-15 13:45:35 2012-06-15 14:45:35 1 5 42 0 33 45 0 234.50 59 31.44 NEW P-LoPcKL+tLPFEKAutC+YHGIEYCLDsRKALERDsGFuELQSRLIRYETQoTCsR-ssPlPh.......sLSPLPSPAVLSEPtSVPDGEuLQsElRs-sscLKRRS+DhssLhPtKRlsKScSSDSLlStsSssoupa.hshsoRp.puERshSs.............t.tlstpsuuuppssppocoopstKESRSQKHsRhLKEVVucTLpKHGIspcHtCFsuCSQRLF-ISKFYLKDLKTSRGLh-EMKKsAsNNVh.QVI .............................P-LSPtKLpsLPFEKAusCHYHGlEYCLDsRKALERDsGFuELQSRLIRYETQTTCT..+EshPlPh.......lLSPLPSPAV....hSEPGSVPDGEsLQsEh.......+...sEsuRLKRRS+Dlssl..a.P...pK.RLsKSESSDSLlSQsSGsos....pptthsssR+.psER.Shuss.....s.ts.....pss+hspps.uuup+ssppscs......s+phKESR...SQKHTRhLKE.....VVscTL+KHuIsE...sHcCFsACSQRLFEISKFYLKDLKTSRGLaEEMKKsAssNsh.QVI............. 0 8 10 17 +14772 PF14921 APCDDC Adenomatosis polyposis coli down-regulated 1 Coggill P pcc Jackhmmer:Q8NCL9 Domain The domain is duplicated in most members of this family. APCDD is directly regulated by the beta-catenin/Tcf complex, and its elevated expression promotes proliferation of colonic epithelial cells in vitro and in vivo [1]. APCDD1 has an N-terminal signal-peptide and a C-terminal transmembrane region. The domain is rich in cysteines, there being up to 12 such residues, a structural motif important for interaction between Wnt ligands and their receptors. APCDD1 is expressed in a broad repertoire of cell types, indicating that it may regulate a diverse range of biological processes controlled by Wnt signalling [2]. 27.00 27.00 31.60 30.40 25.90 26.40 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.74 0.70 -5.02 11 205 2012-06-15 15:05:53 2012-06-15 16:05:53 1 3 56 0 131 192 0 199.50 39 81.35 NEW pCpp.hpclppts+lT.s.shPPcLcGpWlSppCEVRPGPEFLTRsYpFa..sNppF+AhQaYYsD.uCppPoaoLlI+Gpl+LRpuSWlspGATEA-aaLc+VsIl.HSppshp+lspclNpoCss.....h.s.spsWhPhh.YpLh....stpsppc............................ChsAhGFuhpELpLlRlppphhhps.........phspELaLGDIHTshspRtpYRPTuYQ.PLpsshcp.spsCPsCullh+uoEppPPlLPs ....................................s.tLpGpWVSptCE...VRPu........s.....FLTRpapFa.....s.N.p.oacua.aaYuDstCpp.PTaTlhs+G+hph..t.psShhlpGGTEhsaclp+spVssh.spsssphLsh.h.sp.oCuu.........tssWt.Gst.-lh...psp...t................................Ch.t.ulphs..hpEhpLh+hEpc...hhtp..................LalGph.TD.so..p..R...p..hRPTS...YQ.PL.ps............................s............................................. 0 27 35 74 +14773 PF14922 FWWh Protein of unknown function Coggill P pcc Jackhmmer:Q5TG08 Family This is a family of eukaryotic proteins. Most members carry a highly distinctive, conserved sequence motif of FWWh, where h represents a hydrophobic residue. The function of the family is not known. 25.00 25.00 28.80 28.80 23.40 21.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.90 0.71 -4.74 18 109 2012-06-15 15:39:53 2012-06-15 16:39:53 1 3 49 0 68 108 1 146.90 32 28.93 NEW l-htshsshcsschssLP...tplchpplhsplhcutph.tth.......................tt.ap.....................phh...Sct.tulhhDoFWahahctap.c...................p.phps+LFsRlApsYVplhhslt.sch+Dtahpha.phlApslahshhpsFPpuhphasps..F+tpLhphhhhhhoGlps..p.hphscWs ...........................................................................................................................hphstaptpc.stLP...ptlphpphh..llcupp...ph........................c.hc.....................pah...S.tshAlhhDoFWWhFhccapPs..................................................................................................c.QspLFcRIupsYstLhhph..s..sph.....c-...s.hhchhsshLupAlYssFppsFPpSh....Fssc..FKpslssph.hWho.GhhP..p.t.appW......................... 0 35 37 46 +14774 PF14923 CCDC142 Coiled-coil protein 142 Coggill P pcc Jackhmmer:Q17RM4 Family The function of this coiled-coil domain-containing family is not known. It is found in eukaryotes. 25.00 25.00 31.60 31.00 23.70 22.20 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.52 0.70 -6.17 6 80 2012-06-15 16:15:37 2012-06-15 17:15:37 1 2 63 0 52 76 0 357.80 33 57.08 NEW hLWsthGtuLsphh.............sslslas.shshslhphLppshspssLPppsppsLpsls+sLpppushpsWDpuFCtsLGSuspspsl.........th..sshuotTspLltpLF.PLlslLp......p..............sp.RspLhLphP.....LsRhlsTLpooplWlho+sppaLuuWuhspFLLllQ+DL.sLLc.......sscsLshLsps.......tshshslhspLstElp+hhscLptloccslplFot-C+KhoTphFp.sMPpG+aWR+chps-lPspPSpYAthsVppVLtPVlcGlptL.spAphsALopAloAhh-AWL-HILpccI+FSlpGAlQLppDFusVR-hlpuEp.uLStEh+QpLLSLclF+plDGAlhhLLpQPhsKptlscps....+pstsspsQshpplsoSS.LsSLcuh..psshpssl.ssp...s.................us..-.YhluNQQtWL ......................................................................................................................hWs.hu..L-..................lshh..pp.pthtp.L..p..s..splPp.s.ppLtsls.ctLhtpsh.htWDQh.Fp.ALsSu..tsps....................................s....so.Tsp..lh.p....lFssLlsh........................psp.slh..p.Ph....lthh.polposhLWhhtcupphhssWs.spFhhllppDl....hLp..........phpsLp.htpp...............lsltlcppLshElpp.hsplphhscEslp..lhup.C+p.uhtsFphhhPpupaWRhpltsp..stsS.Ysshslcpll.PVlpuhp......phshLs.hLphhhtAWLDHIhp+..tI+FSlpGAlQLhpDFssVRphlpp.phsLo.-l+ppLh.hplhpph-GshhhLLppP.st.plppp........pts.ptthp..p..thsst....s............................................................................................................. 0 15 18 35 +14775 PF14924 DUF4497 Protein of unknown function (DUF4497) Coggill P pcc Jackhmmer:Q9P2G4 Family This domain family is found in eukaryotes, and is typically between 107 and 123 amino acids in length. There are two completely conserved G residues that may be functionally important. 23.00 23.00 23.20 23.90 22.10 22.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.21 0.71 -3.75 29 85 2012-06-15 16:57:50 2012-06-15 17:57:50 1 8 45 0 74 80 0 109.70 25 15.98 NEW ppsph.pFppGKSCLFshsspslppphpshslph.shhch.s.h.hsss..ph.............lGsstlslsph......htpl..hpp........hp.p..................stu...cshcspasLhs.psp.ps.GplslhlRloshG..psl..lT .........h..t..tFspGKSCLFphpsssLpptlhphPLth.hlhplsssh.hsss..pl...................lGssslslsst......hppl....htt........ht.p............................................sss.psh+spasLh........s...tsup.ps.GslslhhRLosLGppl....... 0 26 33 60 +14776 PF14925 HPHLAWLY Domain of unknown function Coggill P pcc Jackhmmer:Q9P2G4 Family Members of this family carry two distinct, highly conserved sequence motifs, CPPPLYYTHL and HPHLAWLY. The family is found in eukaryotes, and the function is not known. This family lies at the C-terminus of members. 25.00 25.00 38.90 35.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 640 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.98 0.70 -6.53 5 43 2012-06-15 16:58:46 2012-06-15 17:58:46 1 4 29 0 26 37 0 485.50 48 70.18 NEW CSNuSSsRSVSPPNQEVTELDlETNIFCPPPLYYTHLTQEKsPPsQGKITIEPQINAPEELDGsFhEEcRVNPPTHTssLKHTsSAT+ESPPMLINPPHVQDlGASNQTTDHsQTEQNRINTIRQLPLLNALLVELSLLYNQPVASPTHIHPHLAWLYRTEDKKSPESSAKSTCKSESKKDKLShGGNEKSVSLQYKKNQsENLKKGKYFEKsSGAPPKRVPRGKLLYGLTNTLKLRLKQTNPDMLVVHEKREQYRKMQAQMLGTKLRIPSSKVKlLSFAEQaQKPHQLPKDKCLESDASFAENSDTSKQISGVlDDPSTopETKLKCATE.KTVDCuENRoNNGLLEEIVSPANSIVsE+FTsAsILEGKh..EMKVQSPsVFQQVAVVDRhlVDKEIDDKQVKTTDsDILTsD..ISEK+PSKNSCSESISELKYSDDFTSPCsSEDFsTSEDTSRILQAHDSSPGTENPKHSQaTSKSSETtLSIRKNSSEKSSILSPPFSAGSPVaSaKRFHISKTQDKSLEEASSISTSDLSSSHWTEEKENQIDQNSMHNSKVIKRDQ..DISlK.KTRTGCKSSEKSQSPRTSQVSSYLPSNLSELELNVLDSSTSDHFEEssDDlGSLNISKQCKDICELVINKLPGYT ........................Cssu.s.pslSs.spEVTELDhETNhhCPPPLYYTpLopEK.ss..spsphTh.sQhN.sE-h-sh..EpphlssPh..ps.ctspssspEpPshL.s.P.phps.utsspss...QsEQstlssIRQLPLLNALLlELSLLhsQPhsoP..splHPHLAWLYRs..E...D..pcu.P-sSsKuTspoEo.psKhshttpcK..sls.Qh+Ksph.p.s.pcs+a.EKpuus..p+Vs+t+LLYGLTNTL+LRLKpTNPsMLlVHEKRE.YRKhQsQhl.Gs.KhRlPSSKsKlhS..A.cQp.Qhs..QLPcDc.l.-ucushsEso-TStQlSssh-csSsocE.sch.p.h.shc.cplc.scs+hssh.lct..hsshtslhs...Ec.h.sshhtt.ph....ch+lpSPsl.tp.shlDp.hls..ct.hs.cplKsht.-..hus..hu-.p+.s.upsSs.EslSELpYSDDh.....s..S..P...CYSEDFsosEsou+....hpAhDSSst.....sE..sspps..phs..sKSS-splSh+csoS-.pSSlLoP..PF...SAGSPVpSh++.+l.Kt.pcpSLEEsSs.lS....sSDh........SS.pWTppKE....sph........Dpsuhpp....Sclh+psp....D.ss..psp.sshKS.EKSQS.pTSQVSSYLPSNlSEL-LsslDs..SsuschpE.pDplGSLsIocQCKDICELVINKLPGYT................................................................... 0 1 2 6 +14777 PF14926 DUF4498 Domain of unknown function (DUF4498) Eberhardt R re3 Jackhmmer:Q9BRQ4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 308 amino acids in length. 27.00 27.00 28.20 28.20 26.80 26.70 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.66 0.70 -5.23 16 119 2012-06-18 07:58:32 2012-06-18 08:58:32 1 3 86 0 74 116 1 184.50 36 75.31 NEW sFs.LstpsFstl..pD+-lpchL.hKWulpsplphpsFpacpp...ap.shppschltsFFpDpsVtpsLpl.p.pssthst.lutp......ssclcsp.lssohhShsFFD+Lhs..su...IVR.psGc.Is+ChD-hh-sh.lSDELRchLL.EDSEpYclFS-s-RpEhLFcLFcpLlLG.GslCQaEDplpPYl-soKplYK-LVulpKsspT.cpIpls.StVa+Vsuh.s.pss..sh...ps....ssHtQshsYlllDPt+RclpllYH ................................................sh.t.phphptFtasp......................ap..hptpphhhsFFpcssVh.pLthh......ts..h....s.t.........stplph..lsso.hohshFppLhs....tsll+.tsGp.lhpChcp.h.tsh.hsDpLRphLL..........p..-S-papl.aop.-RpEFLFplFp+LslG.GslCQaEDslsPYL-ssKhlYKcL......Vu.......lp+ssps......p.......plt...............l..o.lhcV.sh........s.t.t..h....tt..........t.p.tshsahhlss.p+.h.hh.p........................................ 0 28 36 55 +14778 PF14927 Neurensin Neurensin Eberhardt R re3 Jackhmmer:Q8IZ57 Family The neurensin family includes the neuronal membrane proteins neurensin-1 and neurensin-2 [1]. Neurensin-1 plays a role in neurite extension [2]. 25.00 25.00 25.50 25.80 24.90 23.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.80 14 169 2012-06-18 09:29:10 2012-06-18 10:29:10 1 2 52 0 110 147 0 131.00 32 57.78 NEW haGVRuYLHpFY-..........-Cs.......suh.cppc....sh...hphssp+hsulhWKV.......ulssGhLlLlhGlssLhlGYh......lP.+hEhh.....s...............p.uph.hlDspAspaNpsLDsh+LsGssLhClGGlhlAhsLl..lsshtps.tcpE.hhpts.ppt. .................................................................................................................hch...................uhlSuhlFLlhGlhllsluYh......VP.clcs......s....................p.t-h...hl-p...pu...s...phsutLDpChlAGhsLhslGGhlLushLh..hSha..ttp...............h.................... 0 15 24 50 +14779 PF14928 S_tail_recep_bd Short tail fibre protein receptor-binding domain Eberhardt R re3 CATH:1ocy_A_02 Domain This domain is a receptor binding domain found on bacteriophage short tail fibre proteins. It contains a zinc-binding site and a potential lipopolysaccharide-binding site [1]. 27.00 27.00 31.40 42.40 22.50 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.50 0.71 -3.90 9 26 2012-06-18 10:30:31 2012-06-18 11:30:31 1 5 24 2 0 30 0 119.20 54 23.35 NEW ssstcIh..ss+GsDuKsKPtLGsGsuGhslGpVQtQQlphHKHAuGaGE...psssusFGsTspssalGoppt.DWDNtpYFTN-GaEl-us.pRsshsTLNocsLIGsEsRPWsMSl.aIIKV .................ssGsHIh..ss+GpDuhGKsRLGsGCsGhhVGpVQsQQhpYHKHAGGaGE...pcspu...sFGsTstssYlGTRKthDWDNtSYFTNDGaE....lsss..RsuhsTLNpEGLIGsETRPWNhSLNYIIKV. 0 0 0 0 +14780 PF14929 TAF1_subA TAF RNA Polymerase I subunit A Coggill P, Hetherington K kh6 Jackhmmer: Q15573 Family TATA box binding protein associated factor RNA Polymerase I subunit A is found in eukaryotes and is encoded by the gene TAF1A in humans. Its function is to aid transcription of DNA into RNA by binding to the promoter at the -10 TATA box site. It is a component of the transcription factor SL1/TIF-IB complex, involved in PIC assembly (preinitiation complex) during RNA polymerase I-dependent transcription. The rate of PIC formation depends on the rate of association of this protein. This protein also stabilises nucleolar transcription factor 1/UBTF on rDNA. 27.00 27.00 28.30 27.60 26.90 26.70 hmmbuild -o /dev/null HMM SEED 547 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.85 0.70 -6.06 13 157 2012-06-18 12:06:50 2012-06-18 13:06:50 1 5 65 0 85 134 0 250.30 21 84.78 NEW chpsst-tpshpssstppchhhphhhshpt.shh...........pcs+hhplLpchlpp+pWspAushhpshlcsht+s.shpt..sc.hh.tlts-llhphsps.......huschpshuthhslhhpp..............h.....p.phhlhLppuLahlppsthttsshphoh......h.ppp.t.shpslhphahGLltYcpWhpsl.cph+hccpsh................phtts.hsppst....pps..........hspphspsshtsssspsslhphsts..................lDphlps.spthpFhpspcthcpsho.hs..E+hssss..t.h..............................................shaphl.+aL..+....uss.s........................hhlhutclp-t...hpp.s.sstssh.hchtshhhp.hs...ps.sshlhSslcchhphsPspphhl-+tphh+cp....hpphh-h...+hp.h.sp..hW..hs.sh.p.htphtp.tEcpppuhplhF.hLDauss+pN.pAWphhtphlpplhh.....saltppWcsRpsWW.saHFSp.h....h.p.......tts...chhspKAssAuhhhG.thtYh........p...hp.h.hLtcphpp.....p+lhpsh ..............................................................................................................t...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tt........................................ 0 12 24 48 +14781 PF14930 Qn_am_d_aII Quinohemoprotein amine dehydrogenase, alpha subunit domain II Eberhardt R re3 CATH:1pby_A_02 Domain This is the second domain of the alpha subunit of quinohemoprotein amine dehydrogenase [1,2] 27.00 27.00 57.60 56.70 26.00 19.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.41 0.72 -4.10 20 36 2012-06-18 12:07:57 2012-06-18 13:07:57 1 4 31 4 13 43 1 108.10 38 20.58 NEW tsssLsGpWsluG+hPG+G-apGsMolsuuut.DpYsVshsh+.aADGsshsupGsAllYTGYEWRAslslG...........ssshRQVhAhs..ssphpGRhF.tspDchGuchhAs+ss ..p.ssLsGpWshoG+hPu+G-hpGsMolssuss.DsYpVplchc.aADGsshsupGsAllYsGYEWRuslclG..........ssshRQVhAhp..suphpGRhF-sscDEhGhchpAs+t........ 0 2 9 10 +14782 PF14931 IFT20 Intraflagellar transport complex B, subunit 20 Coggill P pcc Jackhmmer:Q8IY31 Family IFT20 is subunit 20 of the intraflagellar transport complex B [1]. The intraflagellar transport complex assembles and maintains eukaryotic cilia and flagella. IFT20 is localised to the Golgi complex and is anchored there by the Golgi polypeptide, GMAP210, whereas all other subunits except IFT172 localise to cilia and the peri-basal body or centrosomal region at the base of cilia [1,2,3]. IFT20 accompanies Golgi-derived vesicles to the point of exocytosis near the basal bodies where the other IFT polypeptides are present, and where the intact IFT particle is assembled in association with the inner surface of the cell membrane. Passage of the IFT complex then follows, through the flagellar pore recognition site at the transition region, into the ciliary compartment. There also appears to be a role of intraflagellar transport (IFT) polypeptides in the formation of the immune synapse in non ciliated cells. The flagellum, in addition to being a sensory and motile organelle, is also a secretory organelle [5]. A number of IFT components are expressed in haematopoietic cells, which have no cilia, indicating an unexpected role of IFT proteins in immune synapse-assembly and intracellular membrane trafficking in T lymphocytes; this suggests that the immune synapse could represent the functional homologue of the primary cilium in these cells [6,7]. 27.00 27.00 27.90 28.70 26.50 26.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.40 0.71 -4.21 26 146 2012-06-18 12:46:22 2012-06-18 13:46:22 1 2 119 0 100 147 2 114.40 42 85.21 NEW tGlahD-hs+lRVl-P-htspoppL+-EspcFlc+lspFpcllpphhphlcphA+cVEpEKl+AIGsRNhlcoh.scpRcscpQplQshItEKpsELERLpsEacuLp+lEpEQpphIpph ........GlaFD..-ls+lRVLDP..............-ssppT.cLK-ECccFlcKls...pFp+lVsshlcll-plAKc...sEsEKhKAIGsRNhLcSh.ucpRcsppQplQshItEKphpLERh+sEY-uLpKlEtEQpEhIpph............... 0 40 50 77 +14783 PF14932 HAUS-augmin3 HAUS augmin-like complex subunit 3 Coggill P pcc Jackhmmer:Q8IY31 Family This domain is subunit three of the augmin complex found from Drosophila to humans [1]. The HAUS-augmin complex is made up of eight subunits.\ The augmin complex interacts with gamma-TuRC, and attenuation of this interaction severely impairs spindle MT generation. Furthermore, we provide evidence that human augmin plays critical and non-redundant roles in the kinetochore-MT attachment and also central spindle formation during anaphase in human cells.The HAUS complex is required for mitotic spindle assembly and for maintenance of centrosome integrity [2]. 27.00 27.00 29.70 29.30 24.90 24.80 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.64 0.70 -5.20 25 132 2012-06-18 13:04:39 2012-06-18 14:04:39 1 2 88 0 88 120 0 228.30 27 43.67 NEW Wlh.ps.-phcpFhcWlspsls-sNlLo-p-Lppa-pLp.ppGc.lLcup-L-.sLpplpspsssh....ph......s..-......p-lctLcppltsl.tchpp.htpLh.schp.phcph.hspph...spLpspp...tcsstthptspph..ltsphpc+sppLpplp.pcssphsp-hpc.tp....ps.sslFlpQhslcpYhhps-phhphLshYh++pFpht.hchs..psssps....lp.......t....c.pc..th.ppp...ptELppLpptht.hsphpaIctcscs.p ..................................WhF.ps..-scsFLcWhCssls.ppNlLotpElpta.ppLp....cpG+.l.....L....-utsL-tsLcs......hps.s.ph.......ph...............p..-........................ppl-tLcpplpsl.tchpphphphh.pchp..hsph..tupp.....hplpscp......tpsstphppstth.......lps..sphspplptlhsplsphh......t...............h............pp......t.s...................pp...ssl...aluphsLcpYltp--p.ottLs.ahpKpFhpG.tchs..Essspppa......hD.hpt..hp.........cppc...hpcc....phEhtRL...p.ha.........h.huppphIphpsp........................................... 0 22 36 61 +14784 PF14933 CEP19 CEP19-like protein Coggill P, Hetherington K kh6 Jackhmmer:Q96LK0 Family This family includes the centrosomal protein of 19 kDa found in eukaryotes. In humans, it is encoded for by the gene CEP19 which is also known as C3orf34. These proteins localize in the centrosomes. Centrosomes are dynamic organelles that assemble around the centrioles. They organise the microtubule cytoskeleton and mitotic spindle apparatus and are required for cell division and cell migration. C3orf34 localizes near the centrosome in early interphase, to spindle poles during mitosis, and to distinct foci oriented towards the midbody at telophase [1]. 27.00 27.00 27.00 28.50 25.00 26.50 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.02 0.71 -3.94 20 94 2012-06-18 14:37:52 2012-06-18 15:37:52 1 4 74 0 64 86 0 149.40 38 67.25 NEW ++sGlRhpPPslllhYpsp....ssKhRpRh....lPl.Rshppposssths-cLtpp.......s++psaLpplsphQlc.........+hhphL.........................psphpuhshspshpthtpctsls................pcDLNKlDDppLpctKstM-ctFc+Npl+PGDssFlYDhcl-Fspsc..psSuWD ....................................................................++sGl+hpPPslllhYppp....ps+.RpRh....hPl.pshpp.Sss...sphAEpL+ps.......s+H+sYL..pplshtQlc.........+lhphL...............................................................................................pshhpGpolspshpphppctsl-..................s-cDLNKLDDcELt+pKshMDEhFc+Nph+.sDPsFVYDlEl-Fspsp...pssuWD................................ 0 32 38 48 +14785 PF14934 DUF4499 Domain of unknown function (DUF4499) Coggill P, Hetherington K kh6 Jackhmmer:Q8TBM7 Family This family contains a protein found in eukaryotes. Transmembrane protein C10orf57 is encoded for by the gene chromosome 10 open reading frame 57 (C10orf57) located in chromosomal position 10q22.3. The exact function of this protein is still unknown, however it is thought to be an integral membrane protein. The protein sequence is 123 amino acids in length and has a mass of approximately 14.2 kDa. The family also includes some longer proteins that possess an N-terminal dehydrogenase domain, Pfam:PF01073. 21.90 21.90 22.60 24.90 21.80 20.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.06 0.72 -4.04 28 133 2012-06-18 14:45:52 2012-06-18 15:45:52 1 4 93 0 76 130 1 88.90 31 42.66 NEW luhshahhslFtsstlPht.lGPluhh...hchhlhpphpllphsahhAhllHlsEAlYAhhLs+ptsl.csssphtWFlQThlhGasSLplLlc .................................................h..hshhhhslFhst.lP...lGPlu.h...sphl.l.p.phpllp.hsahlAhllHl.sEulYAhhLC+cp.sl.sspsphhWFlQTFlhGhsSLslLl.t........ 0 30 43 54 +14786 PF14935 TMEM138 Transmembrane protein 138 Coggill P, Hetherington L kh6 Jackhmmer:Q9NPI0 Family This family of proteins is found in eukaryotes and members are approximately 160 amino acids in length. There are two conserved sequence motifs: YYY and DPR. This transmembrane protein belongs to a family found in eukaryotes and is involved in the biogenesis and degradation of ciliated cells [1]. Mutations in this protein cause the disease Joubert syndrome(JBTS) where the cilia becomes non-motile. Ciliopathy can be severe since cilia provide the cell with large amounts of information through signals. Ciliopathy can affect cell behaviour as the appropriate signals between the cell and its environment are not made, which can affect cell survival. 27.00 27.00 31.60 31.60 23.70 23.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.53 0.71 -3.78 16 91 2012-06-18 14:53:30 2012-06-18 15:53:30 1 1 76 0 59 79 0 117.90 48 73.53 NEW stllLFllQDssIlhshlllhLshauTaVaQsGhsplLlc+F+hhlllsslYFhLSluhHhWlls.hRh..tssspa.Ws.pGLhALaVlQRlsSVhYYYhYKRTALphuDPRaYc-pl.Wlpcph .................lQLVLFIIQDlsllhslIllhLhhFsT...aVFQuGLlsLLh++F+ssllloslYhsLSIulHsWlhs.lRW.....pssspalWT....cG.LpsLFVhQR...l....uAVlYaYhYKRTAlpluDPRFY.pDSh.WLRcpa...... 0 14 19 40 +14787 PF14936 p53-inducible11 Tumour protein p53-inducible protein 11 Coggill P, Hetherington K kh6 Jackhmmer:O14683 Family TP53 is a tumour suppressor gene, when switched on it suppresses tumour development by inducing stable growth arrest or cell apoptosis [1]. The tumour protein TP53 inducible protein 11 encoded for by the gene TP53I11, has a protein sequence of 189 amino acids in length and 21 kDa in mass. The role of this protein is thought to negatively regulate cell proliferation in response to stress, and therefore suppress tumour formation [1]. 27.00 27.00 37.90 37.60 22.10 22.00 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.27 0.71 -4.33 6 94 2012-06-18 15:01:03 2012-06-18 16:01:03 1 2 50 0 42 69 0 143.60 62 78.27 NEW MKKHSQTDLVSRLKTRKILGVGGEDDDGEVHRSKISQlLGNEIKFAVREPlGLRlW.hl...SAslFTulAlMALsFPsQLY-sVF-pt.s......oo+lSlRLYGGALLSlSLIhWNuLYTuEKVIIpWTLLoEACYFulQhLVTolT.LlEhGhhu.us.llLLluRlLFlhlolsYYYhLGR+PKK ..............MKKHSQTDLVSRLKTRKILGVGG.E.DDDGEVHRSKISQVLG..NEI....K.....FsV.REPLGLRVWQFl...SAVlFou.lAlMALsFPD..Q..LY-sVF-tups......sSc.ssl.RLYGGALLuluLlhWssLhssE.+s.IphsLLspAsaaulQhlV.................................................................................................. 0 8 11 24 +14788 PF14937 DUF4500 Domain of unknown function (DUF4500) Coggill P, Hetherington K kh6 Jackhmmer:Q96KF7 Family This family is found in eukaryotes. The function of this protein remains unknown. The gene which encodes for this protein is named chromosome 6 open reading frame 162 (C6orf162) and is found between the chromosomal positions 6q15-q16.1. It is thought that this protein may be an important part of membrane function. 27.00 27.00 28.20 28.10 23.50 22.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.05 0.72 -4.15 16 85 2012-06-18 15:12:04 2012-06-18 16:12:04 1 2 73 0 57 80 0 83.80 51 80.74 NEW ppsps.t...htusGlRSl+TToLFRAlN.ELalKPNKslMuhGLlAlohCsGYluYM+sph-spp...hYsAlcuDGpc.hh..+KpS+W ..........h.....pcpshtssGLRusRTToLFRAVNPELFlKPNKsVMAFGLlsloLCVuYIuYhHAppENcp..pLYEAlcS-GcphhR..RKoSKW..................................... 0 13 18 35 +14789 PF14938 SNAP Soluble NSF attachment protein, SNAP Eberhardt R re3 CATH:1qqe_A_00 Domain The soluble NSF attachment protein (SNAP) proteins are involved in vesicular transport between the endoplasmic reticulum and Golgi apparatus [1]. They act as adaptors between SNARE (integral membrane SNAP receptor) proteins and NSF (N-ethylmaleimide-sensitive factor) [2]. They are structurally similar to TPR repeats [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.82 0.70 -5.26 62 864 2012-10-11 20:01:04 2012-06-18 16:23:28 1 81 388 5 519 1551 81 229.00 25 66.89 NEW scupplhtc.A-KKlpsstuhh.uhF..Gupp.KaE-Au-LappAANtaKlsKpaccAGpsah+uAcsphc.hsspc-AAssas-AucsaKKs........ssp..cAlpsLppAlph.asctG+appAAcappclAElYEp-........tD...........hcpAlcs..YcpAu-aaps-p.usutANpChlKlA.pl.uAphp..pY.cAl-laEclAcpSlsNsLhKaSlKcYaLpAuLCpLsts.DsVusppuLp+YpchDPoFssoREt+hLtsLlpAh-ptDs-tFosslh-aDpho+LDpWKTolLL+lKpslp .....................................................................t....th.tp.Ac+hhp........th.h...................chcpAsphatpA.As.ha....+...h......t..+p........hppAs.psa...hcs.A.p...hp.....p....hp...s....................c...A...A.......p.s.h..h...p.Au......p..s.h.+ch.................s..p...................cA.l.p.h..l...pp.Al....ph...Y...t.....c....h...G...p...h.p.h....A...Ap.th...........t.plAc...........lhE.s....p......................c...................hccAlp.h..Ypp.Au..-....h....a...p..t...-p.......pp....t.......t...A.s........c...hhh....+...sA............p...h...hs.p.h...p.................pY.pcA.h.ph.a.c...p.......h......tt.......h...p.p...h...h....p.....h..s..h.+..t......hhhtt.hl.s...hh.......h..................D.........h.....ts........p.shp.ph...t.....tF..t.oc-ht.h.httl.htuhc........pts......................pthtphh...................................................................................... 0 197 301 426 +14790 PF14939 DCAF15_WD40 ShortName; DDB1-and CUL4-substrate receptor 15, WD repeat Coggill P pcc Jackhmmer:Q66K64 Repeat DCAFs, Ddb1- and Cul4-associated factors, are substrate receptors for the Cul4-Ddb1 Ubiquitin Ligase. There are 18 different factors, the majority of which are WD40-repeat-proteins [1]. 23.90 21.80 26.50 23.10 23.70 21.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.46 0.70 -4.98 11 84 2012-06-18 15:44:06 2012-06-18 16:44:06 1 3 69 0 57 80 0 186.10 44 29.51 NEW stppptphFp+.lPs+lplsLKslls.s..LhsGHlFLGhTKCGQaLLSYoh.h-h-ss.h.....sshY+YcLaWWpFpP...........+ppLpKltpVpLFs-c..tlsstLplslspW..sDpppllVaG..hp...........susc-spcsYlTlssVPsls.Cp-C+plssu.............ssh.hp.....CLcHshTlHopYpllsPaPsFpPplsLppsshlllNoushlhsLplplshsc ...................................u.tphp.phFc+.lPsRlplsLKsllsps...LhtGHIFhGFopCGpYlLSYssssssss...........shYhYpLYaWtFps...........+p+L+hltpVRLFpDc............tl.splhlo...lspW..sDtphllVaG...............husEspcDhYlohVsVPs.s.ChtCpchspstsh......................ss..up.......CLpHuahlHTKYpll.PaPsFpPshpLppsphlLlNTuh.lhshtlslcs.st...................................... 0 18 22 39 +14791 PF14940 TMEM219 Transmembrane 219 Coggill P, Hetherington K kh6 Jackhmmer:Q9NWD8 Family This protein belongs to a family found in eukaryotes. Proteins in this family are typically between 240 and 315 amino acids in length. The domains in this family vary in length from 202 to 249 amino acids. Its exact function remains unknown, however, it is thought to have a role as a transmembrane protein. More specifically, it is possible that this transmembrane protein may have a role as an insulin-like growth factor binding protein 3-receptor (IGFBP-3R). This receptor binds to the ligand, insulin growth factor 3, which is a p53-induced, apoptosis factor important for cancer prevention [1]. 27.00 27.00 28.10 30.70 24.60 24.10 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.37 0.70 -5.25 9 99 2012-06-19 15:03:32 2012-06-19 16:03:32 1 2 50 0 54 95 0 210.30 46 75.39 NEW L+salspRPPhVlFhlslhshAluhlsLuhah..phptlpsPDhspDWNphLhphuphcFCs.........tpstshphhhs-oss....................hpspsshslolshhLsls.hhs.sGhspNhsh.lpusltGpplGLpGt.utEplNlTFpLsssh....s...sth..sphtTChshoAssplhPsoh.PPpC.spphssssh.p.hhhsh.pp...........hshpsssh+lhps.sPcLTVhl ....L+lhlup+PPLVshhlsl.hhuluhLsLGhFh..+hptl+SP-hspDWNoFLhpFspLcLCs........................ps.s.t..hN-TsT.....................................hc..ppGPhslolhhTLsls.....sG.sR.NhT+.l.uTlhGpQlGLpGppApEplpIThplsssh..................shsTChshoAssslhPso..P.pCss-shuNATL...h...hs.................s.ttthhphh.t.p.hLo.hl............................. 0 11 15 25 +14792 PF14941 OAF Transcriptional regulator, Out at first Coggill P, Hetherington K kh6 Jackhmmer:Q86UD1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 198 and 332 amino acids in length. The domains in this family vary in length from 239 to 242 amino acids. The gene, OAF (out at first), which encodes this protein, has a promoter which may help mediate regulation of neighbouring genes [1]. An alternative name for this protein is HCV NS5A-transactivated protein 13 target protein 2, which stands for Hepatitis C virus nonstructural 5A-transactivated protein 13 target protein 2. NS5A inhibits double-stranded-RNA-activated protein kinase (PKR) activity, which is thought to allow Hepatitis C Virus replication to continue in the presence of an alpha interferon (IFN)induced antiviral response [2]. 27.00 27.00 56.30 34.30 22.70 22.20 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.66 0.70 -5.22 7 85 2012-06-19 15:11:22 2012-06-19 16:11:22 1 3 68 0 59 91 0 210.50 50 81.54 NEW u-L+VpVRLsDGQVTEEsLpADSscDhIoLEF+psDGTLITalADFKp-VKIFRALILGELERGQSQaQALCFlTRLp+NEIIPSEuMA+LRQKNP+slRpAEEhRGhEphoMslAVNho+uhQLSsHI+NlCuEA+-AlYTRctDV+aWL-+.....Gh-uShFEhhPpsuphssLppCppspDhWpPClCoYsLsLEWYPChLKYC+uRDs..+s...........osYKCGI+SCpKuYpFsaYVPQKQLCLWDE ................................pLhl.Vp..sGplhpEslpus.stD.IoLEhp+sDGTLlo.hhDF+p-VpIh+ALlLGE.E+GQS..QaQshCFlT+hp+s-hIsS-AMAKLRQKNP+slRpAEEs+GhEphpMsshVshopuh.lS.HlpslCAEAh-AhYsRptDl+hWhEp...............us.tus..hEhhPp.s...........p...................RCtp...sushhtPClCphthsluWYPChLKYC+u+s....ts.................osY+CGI+oCpKsapFsaYV.Q+Q.CLWDE..................................................... 0 16 21 40 +14793 PF14942 Muted Organelle biogenesis, Muted-like protein Coggill P, Hetherington P kh6 Jackhmmer:Q8TDH9 Family The protein is a coiled-coil protein and belongs to a family found in eukaryotes. It undergoes alternative splicing forming two isoforms. The larger isoform is 187 amino acids long in protein sequence length and 21 kDa in mass. The smaller isoform is 110 amino acids long in protein sequence length and 12 kDa in mass. This protein associates with other proteins in order to form biogenesis of lysosome-related organelles complex-1 BLOC1 complex. BLOC-1 is required for the normal biogenesis of specialized organelles of the endosomal-lysosomal system [1]. 26.00 25.70 26.00 25.70 24.70 24.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.90 0.71 -4.32 11 90 2012-06-19 15:23:15 2012-06-19 16:23:15 1 3 76 0 64 85 0 139.90 38 76.07 NEW llKDlGEIaSRLLDHRPVlQGEI+YFl+EFEEKRuhRElchLEslpphlsEhsEchLP+CppshpspLsplhppLpsAssslp+Lpp+EpcpcK..uspLpsucctRptcWEcFhp-Qpp+ptcVDcEaccthc+lpEQYs-hc+cL ............lh+DlG-IaSRLLDHRPllpGEh+aFlKEFEEKRGhREh+sLcslpphlpEosEphLP+Cpcs....h....p......cpLsp..lhp+L.....psAscslp+Lpp+Epcpcc....sspL.tsccp+ptpW-cFhcc.ppcptclDpEacct.ccLp-pYschphcL................. 0 21 25 44 +14794 PF14943 MRP-S26 Mitochondrial ribosome subunit S26 Eberhardt R re3 Jackhmmer:Q9BYN8 Family This family of proteins corresponds to mitochondrial ribosomal subunit S26 in eukaryotes [1] 27.00 27.00 30.40 29.90 26.10 21.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.00 0.71 -4.70 24 113 2012-06-19 15:32:32 2012-06-19 16:32:32 1 6 90 0 71 118 0 151.90 34 68.71 NEW KPRalPsAKSKhaRVspts.hs.-EhhELpchappY+sthpulRphhp...cEshppphpscssplh.p..cppcE....pEapphhthN-phNtcltchREtRlpcEpEcpcphhhcphhtcppcppphhcptEppVhp.p....EpuKoFITtENLDptIEcALssPssYNaAlDhpGslh ..................................KsRa.PsAKSKhhR.l.h.s.hsstEhh.lhchap..pY+phhpul....Rt.ht...pElhppt.pspss.thh.p.pttpp......pEacplhshNctcNtchtphR.tRlppEtccpcph.hpphttctpctpthhppt-pcVhphp....EcuKsFITtENL-ttIEpALs.s.P.hsYNaAlDhpGph........ 0 23 29 52 +14795 PF14944 TCRP1 Tongue Cancer Chemotherapy Resistant Protein 1 Coggill P, Hetherington K kh6 Jackhmmer:A1KXE4 Family This family of proteins are found in eukaryotes. Tongue Cancer Chemotherapy Resistant-associated Protein 1 (TCRP1) is resistant to the chemotherapy drug, cisplatin, which induces apoptosis in tumour cells. There is suggestion that TCRP1 can be targeted to reverse chemotherapy resistance. The precise mechanism of TCRP1 inducing resistance against chemotherapy is still not clear, but it is thought that TCRP1 alters cell signalling pathways affecting apoptosis or DNA repair capacity. Proteins in this family are typically between 194 and 235 amino acids in length [1]. 27.00 27.00 109.60 30.10 22.60 22.10 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.01 0.71 -11.41 0.71 -4.37 3 109 2012-06-19 15:33:00 2012-06-19 16:33:00 1 2 39 0 56 76 0 203.30 71 99.36 NEW MNPVYSPuSoGVPYuNsKGMGYPAGFPsGYAAAAPAYoPNMYAGuNPAFs..........................P..........................................GYTPGTPYKVSCSPooGTVPPYSSSPNPYQTAVYPlRSAYPQQNPYA.......QQGAYYTQPLYAAPPHVIHHTTVVQPNGMPAAMYPAPIPsPRsNGVAMGMVAGTTMAMSAGTLLTSPpPTPVuPHPVSVPTYRPPGTPTYSYVPPQW .....................................MNPVYSPspsGsPYuNsKshuYs..GaPhuYsAAAPAYsPshYPsssPoat............................................s................................................................tYT.sGTPYKVsso.osuAsPPYSsSPNPYQTAhYPlRSAYPQQN.YA........QGsYYTQPlYAA.PHVIHHTTVVQPNuhPu.slYPAPl..s..sPRsNGV.sMGMVAGTTMAMSA...GTLLTs.p.TslusHPVohPTYRA.GTPsYSYVPPpW............................. 0 2 7 23 +14796 PF14945 LLC1 Normal lung function maintenance, Low in Lung Cancer 1 protein Coggill P, Hetherington K kh6 Jackhmmer:Q9H1P6 Family This protein is part of a family found in eukaryotes. It is 137 amino acids long in protein sequence length and mass is approximately 15.7 kDa. The protein is present in the normal lung epithelium, but absent or downregulated in most primary non-small lung cancers. The gene is known as Low in Lung Cancer 1 (LLC1). This protein is thought to have a role in the maintenance of normal lung function and its absence may lead to lung tumourigenesis [1]. 26.10 26.10 26.40 27.30 26.00 26.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.69 0.71 -3.79 8 54 2012-06-19 16:02:06 2012-06-19 17:02:06 1 2 41 0 42 55 0 111.30 41 84.16 NEW hVapDEIaKDHl++EptApKpWspcWGaLp..sphcclt-cpcchpss+s+lshtpcphhppLPPhcs.ht......tsuPPVPpTTuGhIGWRSupPphNLEhYt+aspstpsKGull+cLpWPpEG .............................hVspDEI.WKh+l+sEpcAppsWsppWGFLs....sshcE.Llcpc.cc....s.sKPKlcLPp+......hplcPloPl-KYIKl.........hPSP.PVPpTTQGFIGWRSulPths.cphc+ptchtpCKGuas+cLpWPcpG............................................................ 0 19 19 24 +14797 PF14946 DUF4501 Domain of unknown function (DUF4501) Coggill P, Hetherington K kh6 Jackhmmer:Q96HA4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 167 and 308 amino acids in length. The exact function of this protein remains unknown, but it is thought to be a single-pass membrane protein. This family contains many highly conserved cysteine residues. 26.50 26.50 53.90 26.50 21.50 21.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.12 0.71 -4.79 9 65 2012-06-20 08:49:22 2012-06-20 09:49:22 1 2 30 0 24 54 0 129.60 50 72.75 NEW SotStuQpPECCsDss-lNuoCsGouLCGPGCYR+WstDGSuSCV+CtNGT.....shaNsSECRshuGRGhphPhN+SoGsPG..phGGPpVAASLFLGThFISoGLILSVAuFFYLKRSSKLPcVFYRRN+APlLQPGEsAuMIPsPQSSVRKPRYVRRERs.-psssPushSosEARlSNV .............................................sp.sECCs-hhshNsoCsssshCuPGCat+htt-us.sClpC.sts..............................s.st.h.su.Gh.hshNpSossss....p.Gu.PpV.AASLhLGThFIS.hLILSVAuFFYLKRo..sKLPcl.h.YpRsKAssLQPuEsAuMIPs..PpS.SlRKPRYlR+-p....................................... 0 2 3 9 +14798 PF14947 HTH_45 Winged helix-turn-helix Eberhardt R re3 CATH:1r7j_A_00 Domain This winged helix-turn-helix domain contains an extended C-terminal alpha helix which is responsible for dimerisation of this domain [1]. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.45 0.72 -4.16 27 290 2012-10-04 14:01:12 2012-06-20 10:24:33 1 6 113 3 147 420 77 75.80 24 67.77 NEW +Ro+h-IIh-ILcsh..pusspKT+IhYpANLsachhpcYlshLhcpGl...I....pp.ssspYplT-KGpclLcphcchhchhp ......................................pl.hhcILph..h.......p.......sstp....h...T..cl.....h..hps.s.....L.sapp....hp+Y.lphLhcpGL...l.......................pp..psp.....p....Y.p..l.TcKG.p.c.hL.c.p.h.cph.p...t....................... 0 48 87 120 +14799 PF14948 RESP18 RESP18 domain Eberhardt R re3 Jackhmmer:Q5W5W9 Family This domain is found in the glucocorticoid-responsive protein regulated endocrine-specific protein 18 (RESP18) and in the N-terminal extracellular region of receptor-type tyrosine-protein phosphatases containing the protein-tyrosine phosphatase receptor IA-2 domain (Pfam:PF11548) [1,2]. 25.00 25.00 25.40 27.50 21.50 24.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.31 0.72 -3.98 6 113 2012-06-20 10:28:28 2012-06-20 11:28:28 1 3 36 0 39 121 0 96.30 40 14.55 NEW GQuQVGVGQhWPL.GhsTPVFQ+LQsVLQQIsPpGLFWKDDhTQcVMoQKMt+IS+LHPp-P.....CsRsspAusPT+TstshuKQEEKLpLLhP...tpSPhVKVNR-pC .............GQsQsussQhhshhpVosPVLQ+LQs.VLpQLh...sQG...LoW+D.DlTQaVloQEME+lPR..L.+..P...-P.....ps+DtpulsPp+s....s.ttp......P............................................................................. 0 3 5 11 +14800 PF14949 ARF7EP_C ARF7 effector protein C-terminus Eberhardt R re3 Jackhmmer:Q8N8R7 Family This family represents the C-terminus of the ARF7 effector protein (ARF7EP). ARF7EP interacts with ADP-ribosylation factor-like protein 14 and unconventional myosin-Ie and through this interaction controls movement of MHC-II-containing vesicles along the actin cytoskeleton in dendritic cells [1]. It contains a conserved CXCXXXXCXXCXXXCXXCXXXXCXXXCXC motif in it's C-terminal half. 27.00 27.00 33.40 33.40 19.50 20.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.15 0.72 -10.99 0.72 -3.61 25 126 2012-06-20 11:48:40 2012-06-20 12:48:40 1 4 78 0 92 118 0 103.30 45 54.39 NEW csp+pLcpLthpsssp.hl.....ssFsPpsopRcKR+h...scp.p......shsccsplYDcpGhLhpsu...t...DLCDCL-h-CsGCaaPCscCuSsKCGscCRsNRKWhY-plEh-u ....................................................................p.t+tL+sLtFpNPG.ps........-.Fs....Pcs...t.p....RcKRth..hpphs..................hpsh..tps+hYDppGhLltss......h......DLCDCL-c-ChGCFYPCPpCsSsKCGsECRCsRKWlY-pIchEu....... 0 20 26 51 +14801 PF14950 DUF4502 Domain of unknown function (DUF4502) Eberhardt R re3 Jackhmmer:Q14159 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 181 and 876 amino acids in length. 27.00 27.00 37.90 37.30 26.10 23.90 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.31 0.70 -5.29 4 71 2012-06-20 15:14:56 2012-06-20 16:14:56 1 5 36 0 34 68 0 196.00 42 38.14 NEW KRKRshchEpPSFPsEps.phRRuGh+TstsusSLScAWLRCGEGF.coush.SL.TuEKKohTEKHLELss+PKpE.TTSK..uTStLssIsWSSStSDhSDEDKThs..........phQR.........................D-LQhIDWElDSD+t-ss-sD.EhE--c.sl-ISDCsSsA...SLTs--p.sE.PcssssEILEYSSDSEc--DsEpsLhIDSESsHKYcssFtSDuR.lh.p.hs.cscSsEsILpTPQK.Ts.....KhPKTPEsSuK+.KKLLRGGLAERLNtLQNRcRSAISLWRHQClSYQpT.uGcKSGVLTVKILELHEECuMQVAhCEQLst..hsusstuhA..sGAsLKVLFTKETAspLpG+PQDlV+IaPPWQKL ...........................................................................................................................................................................................................................................................................................................................................................................................................................hplshC.c.h..........ssts...........ts.lhVLFo+ETsthL.s.tPtDhlhIaPPWp............. 0 5 7 14 +14802 PF14951 DUF4503 Domain of unknown function (DUF4503) Eberhardt R re3 Jackhmmer:Q14159 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 313 and 876 amino acids in length. 27.00 27.00 122.80 61.50 21.30 20.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.32 0.70 -6.32 4 70 2012-06-20 15:23:30 2012-06-20 16:23:30 1 4 36 0 29 74 0 325.50 56 58.67 NEW QDAhGMFuEVHLpushhK.......u+QLEGKSCSLsGMKVLQKsTRGRTsGLFSLIDoLWPPllPLKsPGpuQsspplcTaLPPPuFCYILoAHPslGQIDhI-tD.IsKLYQPPVsRsLR-IlQhN-hSTRCSFYApVIYQ+PQLpSLL..tQ+EIWLhVTDlTLQhp-EpssuLPKTLsVhlAPSCVLusEVlEALsstssauLLF+DAlR-pGRIVClERTVLLlQKPLLussSuspSC-LsuPVpLD-LDusT.VNSICSVQGTVVGVDESTAFSWPVCDhCGNsRLEQpPEDRGsFSCGDCSplVoSPll+RHLpVFLDCsSRPpCTV+VKL.QpSIS.LLRhAAuEDGSYEVcSVLGKEVG.LNCFVQSlTop.uSC.VsLEEIELLSAs ..............QDuhGhFuEVpLph..sh.p.......s+phEG+oCpLsGhKVLQ+sTRGRssGlFSLIDoLWPPshPLKs......PGps.Qs.s.cEh+sp...LP...PPuhCYlLoApss.upl-lh-...............t-sIspLYpPPss+sL+-ILQhssh.u.sRCSFaApVIYQR...PQLp.ull.lp.QREIWLlVTDsTLQhp-EpcspLP..KTL.VhVus.CVLssEVlEALssuus+slhFKDALR-p.GR.IlC.sERTVLLL..QKPl..LsssSu..........ApssE...LssPVh...........LDpLDSsT.VNSICSVQG..sVVGVDEsTAFSWPVCshCGNu+LEppPc..c.pGsFpCupCuplVsSPlh+hpLpVFLsC.SpPps.pVKVKLhQpo.ISSLLt.uAt.E...D...GsYEVcsVLGpcVG.L.saVpuhotp.s.h.hsLEElpL........................................... 0 4 5 13 +14803 PF14952 zf-tcix Putative treble-clef, zinc-finger, Zn-binding Coggill P pcc Jackhmmer:Q9NWW7 Domain This domain resembles the zinc-binding domain of prokaryotic topoisomerases, family DNA_ligase_ZBD Pfam:PF03119. The function of the eukaryotic proteins it is carried on is not known. 31.20 31.20 31.90 39.20 27.60 31.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.54 0.72 -4.62 12 74 2012-06-20 15:35:29 2012-06-20 16:35:29 1 2 59 0 45 72 0 43.10 65 7.99 NEW pplu+hTlRGl+KCPcCGshN..GoRuh.CKNcsCstlhpthsstp .SDLGKATLRGIRKCP+CGTYN..GTRG.LSCKNKsCGslFRhuucK.p... 0 8 11 26 +14804 PF14953 DUF4504 Domain of unknown function (DUF4504) Eberhardt R re3 Jackhmmer:Q96LT6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 253 and 329 amino acids in length. There are two conserved sequence motifs: LLGYP and SFS. 25.00 25.00 25.70 25.10 24.30 24.20 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.71 0.70 -5.10 29 96 2012-06-21 08:33:36 2012-06-21 09:33:36 1 6 77 0 61 96 0 236.90 26 75.63 NEW stpshhttt+p+hstssshcLss-lLAlspGL+PslLhDhs...usssspLQpaLppL.....Qssuhl......hp.........sL+lh.Ip-.sh...........hllpscthsp+lcpsLhupst.ll.VshppppPslso..p.sslts.ltsllsphp...................................................shppst.ssshssplpsopasLsTl.GhLLGYPVsYh....Fspsput-..pCLohpsL+la..........pshl..sapsspst...t.......p.pLhSFSVPpsL.sthps.h-sWtcphhs+hppp.....ssassLplpspssphsulsL ..........................................................h.......thphp...s.phpLtt-llAVspG.L+PulLhDhs...ss..hsplQphL.ppL.......pshshl...............hp.......sL+l.h.ls-.s............hllssctht.p.a.l...cpshh...up.t...hlpVs.pptpPplhs...p..ps.ltshltp.lhpthp.................................................................................t.pps..hhs..sthpssshsL..sTlhGlLLGYPVsYh...............Fp.spspc.......sCLuhssLcla..........pshh...sh..t..p......................................LhSFSlPtsl..thpp......p.W.pph.tphptp........p.hts..lphp.p.hp..s............................................................................. 1 20 33 46 +14805 PF14954 LIX1 Limb expression 1 Eberhardt R re3 Jackhmmer:Q8IVB5 Family This entry represents the limb expression 1 (LIX1) family [1]. 27.00 27.00 31.40 31.20 25.90 18.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.57 0.70 -5.40 8 135 2012-06-21 10:00:33 2012-06-21 11:00:33 1 3 70 0 80 111 0 228.70 68 83.86 NEW c-ulsulspshAcaspGaucVNVVEALQEFWQMKpARGAch+sGALVlYESlPSsuPPYVCYVTLPGGSCFGSFQsCPTKAEARRSAAKIALMNSVFNEHPSRRITD-FIEKAVsEApASF..............pG.sss-s-sPsTGIGAFRFMLEuNKG+TMLEFQELMTVFQLLHWNGSLKAMRERpCSRQEVlAHYSpRuLDD-MRSQMALDWlsREpE...o.PGlLSpELAhAE+ELEpARLAGRELRFPKEKKDIL.LAtuQl ....................p.........s.tp.s.sh.t..c.....lNVVphLQEFW.phKQsR.....G.......As..............h...pstuLVVYE.lPSsuPPYVCYVTLPGGSCFGsFQhCsTKAEARRsAAKlALMNSVFNEhPSRRITcEFIp+SVpEAlASh.....................sG..sh--ADsP.sTuIGAa+aMLESNpGKoMLEFQELMTVFQLLHWNGSLKAhRERpCSRQEVluaYSpRuLD-cMRspMALDWltREpp...s...PGhlupELt.spREL-cARhAGpELRFaKEKK-IL.LAhsQl..................... 0 15 21 43 +14806 PF14955 MRP-S24 Mitochondrial ribosome subunit S24 Eberhardt R re3 Jackhmmer:Q96EL2 Family This family of proteins corresponds to mitochondrial ribosomal subunit S24 in eukaryotes [1-2]. 25.00 25.00 27.80 27.00 22.60 21.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.58 0.71 -4.33 19 118 2012-06-21 10:09:16 2012-06-21 11:09:16 1 4 92 0 73 109 0 125.70 48 78.56 NEW tKspuGRa+lo.+ts+PLTYEMAp.PHhIuHRKoWNSWpTuNLcsttp.suEs.........slEDhFIR+FlpGTaHshls...SEllIKRppNhIhIAull..hpplsspKhYFLlGYTEElLShaL+CPVKLElQoVssKpsVlaKYI ......................s.+spuuRh+ls.+ts+PlTYE.ApsPHaIuHRKuW.ShpTuNLcGp.......t+.su-p.........slEDhFlR+FhhGT.a.uhls...sEllIKRptNhlcIsull...hpplss+KhYFLlGYoEpLLSaahKCPV+LELQTVssK..VlaKYl................ 0 29 35 54 +14807 PF14956 DUF4505 Domain of unknown function (DUF4505) Eberhardt R re3 Jackhmmer:Q6P1X6 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 166 and 225 amino acids in length. 27.00 27.00 43.60 33.10 19.30 22.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.32 0.71 -4.65 12 106 2012-06-21 10:49:48 2012-06-21 11:49:48 1 2 92 \N 71 107 2 161.30 42 81.20 NEW slpYsQGQSPpP+lREYFYYIDHpGMLFLDDu+hKNFTSCFKEK+FLcFFFpRLRhNc.....ouRYcp-FPalS.CGRERNalRCDDhPlVFTHllpcsssppp.....LsYsHuG-hLoV.F-Pp+lhMhPtoGRVYHPAPE+sGGlGLlRSpLAIELSppFsFssG-s..pP.PTHFpWsGppaELspcWhcs ...........................t............t.phREYFYalDHpG....LF........LDDu+hKNFhoCFK-h.pFL.FFFpRLR.Np............................osRY.pt.......pFPalS.CG+ERNFlR.....C-D..pPlVFT.cl..lttspt.t.t..................Lsastu.uptLslP.FpPppLhh.spsGRlYH....PA.....P....p.....p......s......G.....u...l.....GLV+StLAhE.LStpFpats.....st........t...Psph.WpspphtLp......s........................................................... 0 30 37 54 +14808 PF14957 BORG_CEP Cdc42 effector Eberhardt R re3 Jackhmmer:O14613 Family The Cdc42 effector (CEP) or binder of Rho GTPases (BORG) proteins are involved in the organisation of the actin cytoskeleton [1]. They may function as negative regulators of Rho GTPase signaling [2]. 25.00 25.00 25.20 25.20 24.90 24.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.70 0.71 -3.20 13 233 2012-06-21 12:15:36 2012-06-21 13:15:36 1 5 43 0 137 195 0 116.20 30 44.46 NEW hlKNAlSLPhLsspputphsstph................sKS.SSSPsKpsp.ttt...........p.hNuuuutus...hc.phsEppFGpLTDhs.............................sstspssuhc+A-SlhSFHlDLGPShLuDVLulMDKpth-pc .................................................................................................................................................................hlKNAhSLP..lst...th..hs..p................................................t...sPs+.sp..ht..t............................hp.s.ut.st..........p.s...stpt...sphps.............................................................................................................................................s.spps..hh.cpu-SlLS...h+..l....DLGPSlLs-VLslMDpt............................................................... 0 7 23 57 +14809 PF14958 DUF4506 Domain of unknown function (DUF4506) Eberhardt R re3 Jackhmmer:Q9H8K7 Family This domain family is found in eukaryotes, and is approximately 140 amino acids in length. 27.00 27.00 27.60 27.50 26.40 24.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.71 0.71 -4.60 14 90 2012-06-21 12:23:39 2012-06-21 13:23:39 1 1 68 0 65 83 0 134.90 33 35.56 NEW p-tslLhs..ss-sp.sPC.LplphsPc...cIuulsllsSA.slEla.lG..pEYptTshGcslp...pppsc.tlphYRhclcl-.SshsshplKLLo....upchCVasu+lhls.pssslsop.....hsptIDlp+VQplLpphGo ..............p.sllhp...pspsp.pP..ChLhlpssPp...pcIsulsllSsARshEVY..l...G......p....EYCGTsR.Gc...sVs......................tssppcplhlY+p.LcL-...os..spuCclK..LLSh........uc+.psValu+lhVphpssssss..s.t..slsstlDLp+VQshhpshGo................................ 0 14 21 35 +14810 PF14959 GSAP-16 gamma-Secretase-activating protein C-term Coggill P pcc Jackhmmer:A4D1B5 Family GSAP, or gamma-secretase-activating protein, also known as PION, regulates gamma-secretase activity. The holo-protein is a large, approx 850 residue protein that is rapidly cleaved to an active 16 kDa C-terminal fragment that is the stable, predominant form. GSAP is expressed in inclusion bodies and is important in brain function. It dramatically and selectively increases neurotoxic beta-Amyloid production in the brain through a mechanism involving its interactions with both gamma-secretase and its substrate, the amyloid precursor protein C-terminal fragment (APP-CTF). Accumulation of neurotoxic beta-Amyloid is a major hallmark of Alzheimer's disease. Formation of beta-Amyloid is catalysed by gamma-secretase, a protease with numerous substrates that catalyses the intra-membrane cleavage of integral membrane proteins such as Notch receptors and APP (beta-amyloid precursor protein) [1]. The secondary structure of GSAP is largely alpha-helical, lacking well-defined tertiary structure. GSAP represents a type of gamma-secretase regulator that directs enzyme specificity by interacting with a specific substrate [2]. 27.00 27.00 30.40 42.60 26.40 20.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.29 0.71 -4.34 14 77 2012-06-21 13:25:58 2012-06-21 14:25:58 1 3 66 0 53 85 0 111.40 39 13.83 NEW L-hsptLhpLlp+hhthshp.....hpph+shsh.lsphssspc...htlFplhpRhhpAspsLshPhP.GFpohashLGhRsLshcsFLQYl-psVhpLotsslptlhpDl.-so.ccs..pphKa ........................L-h.ptLhpLls+hhthsh+.......hcph+uh.L.....hsphuust.c...aslFclhpRhhpAs.polshPLPsGFpohaThLGh+CLshcshLpYl-suVh.Lopsslpplhp.Dl.-so.pp.s...chKh......... 0 17 22 37 +14811 PF14960 ATP_synth_reg ATP synthase regulation Eberhardt R re3 Jackhmmer:Q96IX5 Family Members of this family are subunits of mitochondrial ATP synthase (F-ATPase) [1-2] and vacuolar ATPase (V-ATPase) [3]. In F-ATPase, this subunit regulates mitochondrial ATP synthase population [4]. 25.00 25.00 25.90 25.20 24.30 18.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.45 0.72 -4.77 11 92 2012-06-21 13:36:47 2012-06-21 14:36:47 1 3 58 0 53 100 0 46.90 48 42.08 NEW MAu....tt.thpGhp+hFNupThsGRANsAKATYAuluLlhlaa+h+.pK ...........MAG.....tt.phpGhpKaFNShThsGRtNsshATYAuluLlllaa.+l+.+K..... 0 15 18 32 +14812 PF14961 BROMI Broad-minded protein Eberhardt R re3 Jackhmmer:Q96NH3 Family Broad-minded protein (BROMI) interacts with cell cycle-related kinase (CCRK), together these proteins regulate ciliary membrane and axonemal growth [1]. 25.00 25.00 25.30 25.00 16.30 22.50 hmmbuild -o /dev/null HMM SEED 1296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.44 0.70 -14.11 0.70 -7.43 2 151 2012-06-21 13:57:55 2012-06-21 14:57:55 1 4 62 0 96 138 1 532.70 29 89.28 NEW LQshLRpLhpSVK-+IoGAPSlECAEEILLHLEETDcNFHNYEFVKYLRpalpsoLGuhIE.EhEpaTpspsps.tSG.DTlVptVTKpTpESppYKpMMpoLKphMMhVVEShINKFEEDpMpppEhp+KIQ+ppS.S.hsDNCSDSDSSFNQSYtFh.ptpLQlIh-pLDPGpP+EVRaEALQoLCpAPPSDVLsCEsWTsLpcpLosuLoDPDP.hSD+lLpFaApTFo.SPLphTK-IYsSl..sKhLchaFL.+c.phPohoTuIDhspPshhpLLKphRLhN-aQKEssoFWIRHPEKYMEEIlENTLSLLul+p-Q..Sp.sSpK.L-PIahhuLlDhKAsWFKKWMHuYYSRTsVLRLLE+KYKSLlssAlQQCl.Yh-.C-Ahp.-EhLth.+phtppp....tphhYoupELpalYFlHSLClLGRLlhYTpGR+hFPIKlKpR+D.VoLTDLlVlhhplhY.pPp.PphspsAhhDshSPsshVhEVLRhLCDppECAVECLYp.sVIEsLLtPlhsLhpGphst.ss.EoALhHhADhLARIAos-cGLoLLLYscNhsSuEtcS.ouAHlIsQFopKLLsc-lpl.suSpM....pGAFI.VCRQhYsTCEGLQVLhPYSLHEsIApAW+pTS.hSERlPTPV.sussh.u.SQE.QsshAWEE.LLDsLLNFAATPKGLLhLQpTGAINECVTaMFsRasKKLQlSRpcKFGYGVhVTQVAoTAsGhlALQSSGFlpTllsELWusLECGR-DVRlsHP+STPhDPIDRSC.KSFLuLVNLLs.P.AVaELlGppsLPNKpEYsLREhsTsVlDlhDRLIIlNS-AKI+SLFNYEQSHhFGLRLLSVlCCsLsohLLLEuQYplo-lLLpuQc-NlhEsspuct-FIIDGLSVERNHlLVRIshlGGP.ERhLPPRsLpKGsDPYPWPMhSoYPLPphYl.-VsK.hchKQ-s-lGthLhp.K.o-+pspWh-sCRRQFCKhMtsKsshloG.sLh-LLEhhVLHLSpSss-CaFPssE.pssDssVKscSLSSVpQLGlcholRYGKFLpLL+-suEpDLsLlLKHCpcFLpQQps.lpSpL............ssYsGHDWFsSolFhlMhGDht+oLphL.+FSRLLsSAFLW.PRLH.ShaLsh-hhpSuIHPlY.CosHYlEMLLKsEVPLVFSAF+MSGFsPSQIClQWluQCFWNYLDW.EICpYlsTCVhhGPDYQVYhCluhhRHLQQDILQHTQTQDLQVFLKEEslpGFRVSsYhEYMEhLEpsYRshVLpDMRsIhspSo ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 33 35 68 +14813 PF14962 AIF-MLS Mitochondria Localisation Sequence Coggill P, Hetherington K kh6 Jackhmmer:Q8TDB4 Family This family contains a protein found in eukaryotes. Proteins in this family are typically between 240 and 613 amino acids in length. The family is found in association with Pfam:PF07992. This protein family is an N-terminal domain for the mitochondrial localisation sequence for an apoptosis-inducing factor [1]. The protein is also known as Corneal endothelium-specific protein 1 or as Ovary-specific acidic protein. It is thought to be important for membrane function and is expressed in the ovary and corneal endothelium. 27.00 27.00 32.20 28.80 25.10 24.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.24 0.71 -4.63 7 112 2012-06-21 14:32:17 2012-06-21 15:32:17 1 4 39 1 56 87 0 138.80 31 31.37 NEW MahpRAV.+pLA..hRAssssAPLt...KDA.........olRthSSsthPGsSGoNMlYallVGsoloAGGhYsYKTVoucps+asE+hsplcp+TKuElpP..hpucpEslupsEcAssE.......As..olsEus.....sl-sEEsPsA.shssscEus.ssssps..EAs.sEssslsuEstPcVsD...AAs..Eos..ssEss.EVp ..............................uh.ppLs..hRsss.....sht...+st...................hRphuSsussGu.sG.sNhlYhLlVGlossG.uGhYAYKTlppDppRYsERlssl....pp+s+.tp.h..p.......................................................................................................................................................................t............................. 0 2 7 21 +14814 PF14963 CAML Calcium signal-modulating cyclophilin ligand Eberhardt R re3 Jackhmmer:P49069 Family Calcium signal-modulating cyclophilin ligand was originally identified in a screen for cyclophilin B-interacting proteins. It is likely to be involved in calcium signalling [1]. It has also been shown to interact with many other signalling molecules including proto-oncogene tyrosine-protein kinase LCK, tumor necrosis factor receptor superfamily member 13B and EGFR [2-4]. 25.00 25.00 25.60 25.50 24.40 24.80 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.69 0.70 -5.03 5 67 2012-06-21 15:17:21 2012-06-21 16:17:21 1 5 46 0 39 60 0 205.50 50 78.87 NEW SASQRRAElRRRKLLMNSEERlNRIMGhH+PssGsp-Espp-oc..h-.DKosPLoLsSlSKRo.llpGDuVp..ouss-.psuSuu-t+ssplG-KLD.a.Kssphpu-DsutlRcRNRGDlsS-ussRss+cGL-pYLSRFDEAMKLRsQLssEKPSQDcGsssEE....FDSFRIFRLVGsALLAlsVR.hFVCKYLSIFA......PFLTLQLAYMGLSKYFPKGEKKVKTT..VLTAALLLSGIPAEVINRSMDTYSKMGDVFTDLCVYFFTFIFCHElLLaaG ..........................................oAuQRRAElRRRKLL.NSEpRhNRIhGap+s.ssssp-.Es.phpth......-.-+.pshsh.sSsSKR...sshGssss........suss-.p.ss...hhs.t.h.tp.p..........cs.htlR.RpRst..ssp..shtpss+.GLppYLSRF--AMKLRtQLhsEKPsQ-sGsssEE....hDsFRlFRLVGssLLAlhVR.hFVCKYL...S.IFA......PFLTLp..LAaMGL.KYFPKsEKKhpTT.........VLTAALLLSG.IPA..EVIsRSMDTYp...+Mu-.VFsDL.CVYFFTFIhsHElh.hhG........................................................................... 0 6 10 23 +14815 PF14964 DUF4507 Domain of unknown function (DUF4507) Eberhardt R re3 Jackhmmer:Q96N11 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 346 and 434 amino acids in length. 27.00 27.00 28.30 28.20 25.10 23.10 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.19 0.70 -5.71 18 101 2012-06-21 15:24:45 2012-06-21 16:24:45 1 3 82 0 69 102 0 311.90 35 86.20 NEW M..scl+psL++hcFPpsApEALt+l-.......pLhssR..s.pttchsh-llsEFlFtEhc.ccspttp.................phsslQELQLlplLs-a...Fsps.us-AsRsslFhuLFuspts.t...........................RhplLu+LVShAVussssslLsuAGhWMQQ..lGssSs.SlcLApsllsDYhsLs......ssos-pLKpLshluP+FsusFlTAVs-LY.s.....tpsp.tsPPssLL-lIs-Wlo-NPp.LClss.p...thsLPs.........Gulshs.hTPlsGLlRW.CVLAPLspspppp..........................lYSKLHLulLpuL....................phssssspttslsupcLspllcsLpphhpp......sssssspthplul-RhAQAlQVAhuosClhsNpppLhshLppLPtp ......................................h......p.shh..ph.tsAhEsLhpl-...........p..hpp.......hpphshpllpEFlF...p...ccss..p.................phsslQELQLlclhssY...Fpcp..scDusRphlF.uLFuspts.t..........................p....RhplLu+LVShAV...............u.ssphPlLpsAusWhQp...........s..ss...hslcLApslVcDYssls............................ssohppL+pl.psuP+FsspFlTuVssLY.............sst..h.PP.sLL-hIspWlhEsPp.lhhs.hp....shsL....Ph...........Ghlth....TPLsGLlRW.sVhAPLs.ppp.......................................hYSpLHLulLpsL...............................hp.ps.hsphthl.hpchsslV.cplpphssp.......hss.psspthphul-RlAQAlQlAhuosslhsspppLhslhppLP........................................................................................................... 0 23 29 50 +14816 PF14965 BRI3BP Negative regulator of p53/TP53 Coggill P, Hetherington K kh6 Jackhmmer:Q8WY22 Family This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 213 and 245 amino acids in length. It is found in various tissues, including the brain, liver and kidneys. It was first discovered as a functional unknown gene, murine brain I3 (BRI3). This protein is also known as HCCRBP-1 and it plays a role in tumourigenesis, as it binds to an oncogene, HCCR-1, and acts as a negative regulator of p53/TP53 tumour suppressor. BRI3BP induces tumourigenesis by activating protein kinase C (PKC) activity but decreasing the pro-apoptotic PKC-alpha and PKC-delta isoform levels. BRI3BP is over-expressed in many tumours [1]. 27.00 27.00 66.20 65.00 25.20 21.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.07 0.71 -4.90 26 99 2012-06-21 16:19:23 2012-06-21 17:19:23 1 2 38 0 51 85 0 172.40 45 78.46 NEW hpphupohppTLsshlGtEsh+slpchhSplh.thspulsVhh.sL.tIhspLLssLGLDuspLTQ..hhSPupVpo....hLLhsuusLlAYWhLSLLLGhshuLL....GRhhWhl+lsLFhhuhVhll+ph.sssp+AlL.Lshlsshhhhot.sGshhpt..........spLEtKlcpL-pQlc.LphRhpR ........................pphupuhhtolsshhG.-sh+hltchhoplh.thspulsshhtsLhtlhscLLcsLGLDu..spLoQ.....hhSPupVpo.....hLLhsuusLlAYWhLSLhLGhshulL....GRhhWll+llLFhhuhVhllpp.h.s.sp+AlL.Lshllhhhhhot.sG.hhpt................spLEtKlctLEpQlc.LphR.pR.................... 0 2 5 18 +14817 PF14966 DNA_repr_REX1B DNA repair REX1-B Eberhardt R re3 Jackhmmer:Q96EN9 Family This family of proteins includes Chlamydomonas reinhardtii REX1-B (Required for Excision 1-B) which is involved in a light-independent DNA repair pathway [1]. 24.00 24.00 24.00 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.13 0.72 -3.61 20 102 2012-06-22 07:42:38 2012-06-22 08:42:38 1 2 75 0 64 107 0 91.40 35 42.95 NEW stsLL+pFhplQpcRApsYscaccGFpsalpsusp...ssYppLCsclTp-FsshScp........llplEstLp..chsRs-lApllcslQppEKpKLpLTAplQlLK ...............h..tLlpphhtlQpcRsphapphccGappYlpousp...stYpphspc.lTptFsssS+p..........................VltlEutLtt..hspssLAphl+slQptE+p+LphsAhlQlh................... 0 26 39 49 +14818 PF14967 FAM70 FAM70 protein Eberhardt R re3 Jackhmmer:Q8WV15 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 241 and 349 amino acids in length. The function of this family is unknown. 27.00 27.00 33.10 27.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.22 0.70 -5.23 7 144 2012-06-22 08:12:48 2012-06-22 09:12:48 1 3 39 0 58 143 0 275.00 57 97.69 NEW h.s...s.sssl.h.sshuuFs+RK+sSlahssoLLlVSlhILTlGLAATTRTENVTVGGYYPGlILGFGSFLGIIGhNLlEN+RQMLVAuIVFISFGVVAAFCCAIVDGVFAARHI-.RPLhuGRCpaaSSssuahhD...h......................EVTCpo.s.stCpLKV+SNTCYCCDLYNCtsp.E.ssuYYEalsVpuCQDVlHLY+LLWuuslLNllGLFLGIITAAVLGuFKDM..sshup.shu.ss.Ppl.YssstQlhuYsuFh.ossslPshouY..sLQ.suhFPu.........ossSsLu...DsQssusS....hhsspsPPpYuPsYa.PsEKPPPYoP ...................................htsFsRRK+sSlahssoLLlVSlLILTlGLAATTRTpNVTVGGYYP.GlILGFGSFLGIIG.sLlEN+RQML....VAuIVFISFGVlAAFCCAIVDGVFAARHI-h+PLhs.s...RCpah.spssth..h-.h.t.........................pV.sC.phs.s.Cp.+l+uNTCaCCDLYsCGsp.EhssuY.....YEalsVpSCQDl.lHLY+LLWuuTlLNllGLFLGIlTAAVLGuFKDM........s....u..........s.s....ss.Ppl.Y.st..Q..lhuYssah.osspLPsh.SuY..shQ............s.usFPu....ossos...lS....-sQ.sso.ssS.ahh..sssAPPpYuPsYa.PhEKPPPYsP....................................................................................... 0 3 7 22 +14819 PF14968 CCDC84 Coiled coil protein 84 Eberhardt R re3 Jackhmmer:Q86UT8 Family The function of this coiled-coil domain-containing family is not known. It is found in eukaryotes. 25.00 25.00 25.50 25.50 24.50 24.60 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.54 0.70 -4.93 13 109 2012-06-22 08:26:03 2012-06-22 09:26:03 1 3 64 0 60 112 0 256.10 32 89.59 NEW aCplCRpNHsp.G++H+Y.ssH+puLsshLs+FpsKls-lRthL+sPsl.c.t..pscs+hWChFCct-lt-psS....ohssusAlpHLASs-HlKsl++Fhh+aGushcph-pFpIocs-hu+acppsppshsphpsps-shhsptsssl+cl.cs.........p.pp.phh.pshpss..p..ppP..sst...pssshshhhhssp.t..httotts.....................................................ssshshs.ss...shshtts..G..sL....sslus..shsut.GNVHoGAsPPWLpss-cs.....tssp..phsPSspuhhpppcptKh+KLNPcR..V...GAsascc...p+................ssssWLPsFGRVWQSGsRpcSR+EFc+E+pphcc ..............................................C.lC+.sa.p.GctHhY..pHpppLpthLp+hh.plpssRhhl+ssps.p..............tpcpphW.ChhCst-lpcp.S.....ohhhushlpHLA..Ss-Hh+sspcFhhcptuphp.h-..pFhlo..p-ht+acpph...phhpph..tppp-t.h.tt.hs...pphcp.........p.p.p.h.psh.ps.......s.......p...........................................................................................................................................................................t..s.........h...h....s.lu....t.ssh.GNlHoGA.PPWh.tp-t..........t.........p..s...t.hhppp....p.ptK.+KL.sspR..V...GAsascp...pp................ssssWLPsFGRVWpsGpRhpSR+pFctctt....p.................................................... 0 14 28 41 +14820 PF14969 DUF4508 Domain of unknown function (DUF4508) Eberhardt R re3 Jackhmmer:Q9NWQ9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 253 amino acids in length. 27.00 27.00 28.30 27.80 25.70 24.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.49 0.72 -3.99 12 83 2012-06-22 08:49:40 2012-06-22 09:49:40 1 4 70 0 53 78 0 98.90 44 54.48 NEW stpEhRhllpWFtpWSthQRpcFLpsLlpK........Asss.p......lsuLlsuLpsLsl..pD+PPSlFpCpl+LaspWFpsWs-p-+sphLppLcchDscFst+ahpclu ....spEh+ClLpWFssWSssQR-cFLpDLVuK........................AVPG.K..............lpsLLcuLppLuV...sD+PPsIF-CQL+LacQWFpsWuEpERNcFlcpLEtt-scFssKaaptlu.............. 0 11 15 33 +14821 PF14970 DUF4509 Domain of unknown function (DUF4509) Eberhardt R re3 Jackhmmer:Q86SX3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 212 and 449 amino acids in length. There is a conserved WLL sequence motif. 27.00 27.00 27.90 27.50 25.90 24.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.33 0.71 -4.63 8 80 2012-06-22 09:10:21 2012-06-22 10:10:21 1 4 45 0 38 72 0 151.30 42 43.60 NEW DR..PEAshsLWplLa................phL.psLs-hph........hsssts-sp.hpl...VKpALtspGY.PphphhQLPpDsupGSRELLLALuWLLA+ssllEph..Ltpp+Vphucthslsp..........................sEs.sS.u.P.ustht..scts...s-l+clpWLhG+LRapaRsLhopppEQstLlsKIHhhTp.ss+Sc....QsLuH..LSVsEschL+DPEshpp ...................................................hWplLh................t.L...l...t.........htp.h.psp..hph....VK.sLt..GY.PphthhpLPpDus....pGSRELLLALuWLLucssl.-ph....LtptpV...l.ucph.s.lsp................................s-shsS.s.s.....ustht....scss......sDlRtlpWLhG+LRapWRpLhspQQEpCtLLsKI.H.hTt.usp.p....psLsH..LSVsEschl+cP-shpp...................................... 0 11 15 24 +14822 PF14971 DUF4510 Domain of unknown function (DUF4510) Eberhardt R re3 Jackhmmer:Q86SX3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 242 and 452 amino acids in length. There are two conserved sequence motifs: LEA and WMD. 27.00 27.00 91.90 91.90 20.40 19.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.07 0.71 -4.07 4 44 2012-06-22 09:22:30 2012-06-22 10:22:30 1 3 20 0 11 48 0 161.10 69 41.03 NEW TCusEsPsssuQPsFLPhlsE.sGuscL-hVsppLQALpEELppssEsRRAAWEA+lGGsupGsEWSAuR+AspEAVppE..L.uALp........tsWEcuusPu....QPp.sP+RLV+u-sGAuss.tsLpAApVItsLRu+EACLctsL+pLQpQCRQELARLAuAhPGLIWI.PPt .....................TCuPEsPAAASQPTFLPhlPE.pGsGELELVsRELQALpEELp.......EAsEpRRAAWEAKAGGCGpGPEWSAu.RRASREAVE+E..L.uALQ........psWEpDuGPA....QPH.GPHRLVRREDGAAGs.psLRA.AEVIRTLRSQEACLEAVL+pLQGQCRQELARLsGALPGLIWIPPPG. 0 1 2 2 +14823 PF14972 Mito_morph_reg Mitochondrial morphogenesis regulator Eberhardt R re3 Jackhmmer:P17152 Family This family of proteins regulate mitochondrial morphogenesis via a mechanism which is independent of mitofusins and dynamin-related protein 1 [1]. 27.00 27.00 52.90 31.30 22.60 22.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.88 0.71 -4.84 10 101 2012-06-22 10:12:39 2012-06-22 11:12:39 1 3 80 0 70 99 0 156.30 54 87.21 NEW Sso....stlIREVYDuENAHEpFEhEL-+ALEAcsshIVIEPo+LGDETuRWIsVGNCLHKTAVlSGlAuLhouLlWh.-Rh...llusPhuulSlhCsuLYslSWpaDPCCcYQV.EpDspcLs+LP.Ls..ssSSPVVLVR+DsppRKhLHpsloluuAuaCAW+lYcsa.K .................h.sssaIl+ElYsuENAp-pFEhELEpALEAphcaIVIEPoRlGDETARWIsVGNCLHKTAVLuGsusLho...sL.................hh......s....chp........aIulPuGsLSlsCssLYulSWQFDPCCKYQV..Eh.Dsh+Lu+LP.LpsLos.SoPVVLVR+DDh+RKhLHsoIALuAhsaCsh+lYchY.t..................... 0 23 28 50 +14824 PF14973 TINF2_N TERF1-interacting nuclear factor 2 N-terminus Eberhardt R re3 Jackhmmer:Q9BSI4 Family This is the N-terminus of TERF1-interacting nuclear factor 2. It is required for the formation of the shelterin complex. The shelterin complex is involved in the protection and maintenance of telomeres [1-3]. 27.00 27.00 27.50 28.70 26.50 24.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.74 0.71 -4.06 12 102 2012-06-22 11:39:58 2012-06-22 12:39:58 1 18 37 0 46 84 0 124.00 38 29.97 NEW WQVlppRslcHaG+ltEFVshlspsVP-Llsh+p+tpLhhGLRA+hlLEhhcppcshs..hptlp.Hlsph....ps.psptpDhch..............cpscssFhcLVpsLlcDspt+cpahpphh.tEYGssasssLcpLhhEFLp+L-plL...PlPshpp ..................WpVlctRpVcHas+l.EFlp.lptssP..sLlpa+c+t+LhhGL+A+lllEhhhttps..hs..Lpslp.Hhsc...........tsptptpDhch...............cuppsFhp.VcpL....csP.chtphhQ...h.t-YGpsF.s.uhcpLhhEaLspLEphLPssphpp.............. 0 3 7 24 +14825 PF14974 DUF4511 Domain of unknown function (DUF4511) Eberhardt R re3 Jackhmmer:Q99622 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 116 and 127 amino acids in length. 27.00 27.00 28.90 30.40 26.50 19.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.33 0.72 -4.29 16 82 2012-06-22 12:03:31 2012-06-22 13:03:31 1 5 71 0 61 72 0 99.00 47 71.78 NEW shosEpAKslLs-lLpulspP-Nut+lp-A+csuG...N-MlKhMQhVaPlssQIQh-VIKsYGFs..sstEGllpFspll+phE+-DsElApLpsplRohaLPP.....lslss .......hosEpAKslLs-lIpAhssPENuh+hsEA+-sAs...N-MhKMhQaVhPlssQIQ.EVIKsYGFs..sstE.GllpFupLl+phEppDsEIApLpupl+ulaLPPhsls.s.................. 0 18 24 42 +14826 PF14975 DUF4512 Domain of unknown function (DUF4512) Eberhardt R re3 Jackhmmer:Q8TCD1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 74 and 104 amino acids in length. There are two completely conserved residues (C and P) that may be functionally important. 27.00 27.00 29.90 32.10 22.10 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.66 0.72 -3.16 36 88 2012-06-22 12:35:03 2012-06-22 13:35:03 1 2 72 0 54 79 0 80.60 34 93.55 NEW VClPChlIPlLLala++FlpPllh+hhsP.....W..KcA.tp....ts..ph.hpscsssCsh..........................................pspspst............tt.st.tssssspsKK ..VCIPCIVIPlLLWlY+KFlpPhlY.hluP....hWs.Kss.pp..psts..ph.hpspssss...............................................................................tt............................... 0 13 17 37 +14827 PF14976 FAM72 FAM72 protein Eberhardt R re3 Jackhmmer:Q86X60 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 264 amino acids in length. The function of this family is unknown. 27.00 27.00 34.70 29.40 25.70 23.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.92 0.71 -4.37 7 81 2012-06-22 13:09:17 2012-06-22 14:09:17 1 2 54 0 52 72 0 127.70 55 75.13 NEW MSss.thsFpD+sVohLsC+FCcpVLspRGMKAlLLADT-h-LaSTDIPPspslDFlGpCY.Tp.CKCKLKDIACLKCGNlVGYHVlsPCpsCLhSCNNGHFWMFaSpuVhshsRlDsoGsNhLLWGsL.P-h-Ess-Eph.....phstEEhlR .....................Ftp+pV.hLsCpaCcplLssRGMKAVLLAD.Tcl-LaSTD........IPPsss....VDFh......G.........p........CY.h...........T..............chCKCKL+DIACLKCGNlVGYHVllPCssCLLS......C...N....NGHFWMFHSpAVhslNRLDu.oGh..shLLWGsL.P-h--sps-p......p.stEphlR.......................... 0 26 28 34 +14828 PF14977 FAM194 FAM194 protein Eberhardt R re3 Jackhmmer:Q7L0X2 Family This family is found in eukaryotes, and is approximately 210 amino acids in length. There is a conserved YPSG sequence motif. The function of this family is unknown. 27.00 27.00 29.80 31.30 26.80 25.10 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.27 0.70 -5.34 13 112 2012-06-22 13:34:15 2012-06-22 14:34:15 1 4 50 0 70 110 0 189.40 30 29.71 NEW ssppshhtchYcpGshF.hhhhPDGoupl..aYPSGNlAllhlss.ctsp..hhsllhEDsspss.....lLALFssoG+ussYasNGs...lhLsls.tGGhhsDppGs+h+pWsW.........sspscss....shpslplplNchlslRlhsQDKlslsFtshspps........plslGo+hhhlpP..ctlst.+p......p..c-hhhsspttK...hp+Lls+hcsplshssophh-plt.PutL ..........................s..ptp...phhpps.ha.hh.F.DGos.l....aYPSGNlAlhhlss...phst..hhshlhpDtspss........lLAlhsspGpussYassss...hhlhls.tGGphtDp.pGphl+tWsW.........sspsp.s....shpsl.hplNc.ltl+lhsQDpI.loFh.uhs.pps........plslusp....pP..cthsh.+h..........hst..c...h.sph.K....hpphhtclpthhphsss..hpphh.ss...................................................................................... 0 22 27 36 +14829 PF14978 MRP-63 Mitochondrial ribosome protein 63 Eberhardt R re3 Jackhmmer:Q9BQC6 Family This family of proteins is present in the intact 55S subunit of the mitochondrial ribosome. It is not known if it belongs to the 28S or to the 39S subunit [1]. 27.00 27.00 27.80 27.70 26.90 25.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.11 0.72 -3.55 11 74 2012-06-22 14:28:01 2012-06-22 15:28:01 1 2 68 0 49 75 0 88.20 41 79.01 NEW PG..+QWIGK+RRsRsVohptKcshl+RhEhEtcspaaLs+PYLThEQEtGHApph+t...thhcthctppppKastH+hltDpL.sHLNlscpW .........PG+.ahGK+Rhs+.Vohptppshh+cLEhEtcNpaaLs+PYhTtEQEtGHAtch..+p....................ts.hcthcttphpKa.sHhhlt-pL.sHLplscpW.............................. 0 14 17 33 +14830 PF14979 TMEM52 Transmembrane 52 Eberhardt R re3 Jackhmmer:Q4KMG9 Family This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 160 and 236 amino acids in length. There is a conserved LLCG sequence motif. The function of this family is unknown. 25.00 25.00 25.00 29.40 24.80 24.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.10 0.71 -4.32 5 88 2012-06-22 15:00:50 2012-06-22 16:00:50 1 1 32 0 42 84 0 134.20 47 70.79 NEW --sCcsoDpCs.sAcWlsLWYVWLILVslsLLLLCGlTAuCVRFCCL+Kps.sQsapsuAhQPCEVTVIAhDsDSTlHSTVTSYSSVQYPhuhRlhhsFu-hDssuMsPPsYSLYAsEsPPuYEEAlKMhKoRpEVAtsSQKsscLsuloEpEs .....................................t.C...sp...C...t.s.cWspL.WYlWLlllhshLLLLCGlTusChRhCCLp+p...Qstps.u..Ph-lTVIuhDpDS.olpSTlTShpSV.hPhuhR.l.hshtphss...u..hP.......u.-h.PuYEEAl+M..sR.psshsupKss.L.ssst.c.................................................................. 0 2 3 11 +14831 PF14980 TIP39 TIP39 peptide Bateman A agb Jackhmmer:Q96A98 Family \N 27.00 27.00 34.30 37.00 21.80 19.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.50 0.72 -4.33 3 29 2012-06-22 15:25:55 2012-06-22 16:25:55 1 2 23 0 16 22 0 49.80 68 39.08 NEW u-WuSPuuspsKRNLVVADDAAFREKSKLLTAMERQKWLNSYMQKLLVVNS ........tsWus.usshsRRSLALADDAAFRERARLLAALERR+WLNSYMpKLLVLDu. 0 1 2 5 +14832 PF14981 FAM165 FAM165 family Bateman A agb Jackhmmer:Q5T5W8 Family This family of proteins known as FAM165 are found in eukaryotes. Members of this family are as yet uncharacterised. Proteins in this family are typically short membrane proteins between 55 and 70 amino acids in length. 25.00 25.00 58.00 57.60 24.50 23.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.57 0.72 -4.46 4 34 2012-06-22 15:39:24 2012-06-22 16:39:24 1 1 30 0 19 29 0 50.50 72 86.55 NEW pAL-NVPLLhYILAhKTLlLCLAFAGVKIYQuKK.EtKLK+pctEK.+R.AE ..KVLEHVPLLLYILAAKTLILCLAFAGVKlYQRKRLEAK....p+l.EAE+.++puE.......... 0 2 2 6 +14833 PF14982 UPF0731 UPF0731 family Bateman A agb Jackhmmer:Q4G0N7 Family The UPF0731 family of uncharacterised proteins is found in mammals. 27.00 27.00 35.90 35.90 22.80 21.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.68 0.72 -4.07 2 39 2012-06-22 15:48:12 2012-06-22 16:48:12 1 1 24 0 23 39 0 75.90 66 83.69 NEW shp.ssQsRRFPlEuGDS.....PGLuSuspsp......up-.sPsR.LRRCPGsHCLTlhcVPIsVYhAMttsP.....ch+sp .PFcFGTQPRRFPVEGGDSSlt.EPGLSSSAuss......uKEhSPsRQLRRCPGSHCLTITDVPIsVYATMRKPPApSSKEM+P......... 0 2 2 3 +14834 PF14983 DUF4513 Domain of unknown function (DUF4513) Bateman A agb Jackhmmer:Q6ZNM6 Family This family of uncharacterised proteins is found in chordates. 27.00 27.00 53.30 31.80 25.30 25.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.73 0.71 -4.33 3 32 2012-06-22 15:50:45 2012-06-22 16:50:45 1 2 28 0 22 28 0 117.90 62 82.57 NEW SGKDTsPlLPKLNNNsS-ENoYKPu+......Ks--IHLPRFSLKQGMIPRRYVMPWKENMKFRsVNLK+AEACGIHAGPLEDSLFLNHSERLCHGEDRKVVLKKGPPEIKIADMPLHSPLSRYQSTVISHGFRRRLV ........................shP.hPKls.sststEs..+..p......+hp-lHLPRFSLKQGMIP+RYVMPWKENMcFRNVNLKpAEssGIa.sGPLEDSLFLsHSERLCHGEDRKsVLpKu.PP.EIK.IADM..PLHSPLSRYQSTVISHGFRRRLl....................... 0 4 4 7 +14835 PF14984 CD24 CD24 protein Bateman A agb Jackhmmer:P25063 Family \N 27.00 27.00 35.40 34.90 21.80 21.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.51 0.72 -4.16 5 28 2012-06-22 15:58:32 2012-06-22 16:58:32 1 1 16 0 14 53 0 51.80 61 65.88 NEW SNQTTVuTsSs.SSQoTSs...APNPoNATT+uuGGoLQSTASLFVlSLSLLHLY ................SNQT.oVs.ssSs..oSQsTSs...uPNPoNATTKAuGGALQS.TASLhVVS.LSLLHLY. 0 1 1 1 +14836 PF14985 TM140 TM140 protein family Bateman A agb Jackhmmer:Q9NV12 Family This family of uncharacterised membrane proteins are called transmembrane protein 140. They are found in mammals. 27.00 27.00 68.80 68.60 22.30 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.29 0.71 -4.60 3 37 2012-06-22 16:07:53 2012-06-22 17:07:53 1 2 29 0 20 35 0 151.50 64 96.16 NEW RPRRt-QLLFMuIMVLVAsVISLMFYALLWKAGNLTDLPNLRIGFYNFCLWcEDTGSLcCHQFPELEALGVPRVGLALARLGVYGALVLTLFVPLPLLLAQCNSDEGEWRLAVGFLAsSSVLLAGGLGLFLoYVWKWlRLSLLGPGFLALGlAQALLILLLMAMVVFPPRAE.KuEs+LESC ............hhtppLLFhuIhlLsssVIsLhhYALLW+AGNLsDLPNLRIGFYNFCLWNEssusLQCHQFPELEALGVPpVGLALARLGVYGALVLTLFss.PLLLA.CstscttWpLAVsFLAhuShLLAuGLuLFLoYsWKWlRLSL.GPGFLALusAQALLlLLLhAhshFP.Rupcs.Sph-sC............. 0 1 1 3 +14837 PF14986 DUF4514 Domain of unknown function (DUF4514) Bateman A agb Jackhmmer:Q5T292 Family This family of uncharacterised proteins are found in mammals. 27.00 27.00 51.40 50.50 20.20 20.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.03 0.72 -4.16 4 27 2012-06-22 16:13:00 2012-06-22 17:13:00 1 1 17 0 11 30 0 58.40 77 55.09 NEW DlGGAQVLATGKSAGsEIDhKYAlIGTALGsAISAGFLALKICMIR+HLFDsDSSDL+STs .DVGGAQVLATGKssGsEIDaKYALIGTAlGlAISAGFLALKICMIR+HLFDsDSSDL+ST........ 0 1 1 1 +14838 PF14987 NADHdh_A3 NADH dehydrogenase 1 alpha subcomplex subunit 3 Eberhardt R re3 Jackhmmer:O95167 Family This family of proteins are accessory subunits of the mitochondrial membrane respiratory chain NADH dehydrogenase (Complex I). This subunit is not believed to be catalytic [1-2]. 27.00 27.00 29.60 29.60 26.20 26.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.96 0.72 -4.07 6 72 2012-06-25 07:58:29 2012-06-25 08:58:29 1 2 38 0 27 63 0 76.80 64 83.37 NEW MAuRluAFLKsAWsKEPVLVVSFsIGGLAlIlPhlSPaTKYosMINpATPYNYPVPVRDcGNMPDVPSHPpDPQGPSL-WLKsL ..............u.luAFLKNAW..sKEPVLVsSFsluuLA.lILP.lSPYTKYusMINp...ATPYNYP................VPVRDD....GNM..PDV.PSHPpDPpGPSLEWLKpL............................ 0 1 3 7 +14839 PF14988 DUF4515 Domain of unknown function (DUF4515) Eberhardt R re3 Jackhmmer:Q6ZUS5 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 198 and 469 amino acids in length. There are two completely conserved L residues that may be functionally important. 24.30 24.30 25.40 25.30 24.20 24.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.23 0.70 -4.66 11 151 2012-06-25 08:27:04 2012-06-25 09:27:04 1 1 48 0 91 151 0 173.10 28 47.21 NEW Es+hFLEYLpc+sEcppcph-pLWpsYlQpppEI-c+RpELsScaspppupLpppLhQpcKh.pusL+pcLQAlcsluplKEpQ-pcIpsLccEhcchpu-puhpc+Es+hQFLcEKutLE+Qlp-hchhphGccts+ELppKspAhchtAKphhp-aspulp+ENppL++cLhQLhpEhpcLcsp+p+LEpp+QplpcpQWYLEuL .......................................hhpaLpppspcppp.hppL.pph.ppptph.p.pc+p.ch.pp...YstphstLc.tphhpppp...s.lppcLpslcphpth.............K.p.-p-lpsLccphtphptcpppphpchctpFhpEKttLEc-s.Epcl..h...Luc+Ac+E.............A.hhtl.phspslhcENhpLpctLh.hhc......csptLptppppL.pcp+ptLhppp.h.p.................................................................... 0 23 28 48 +14840 PF14989 CCDC32 Coiled-coil domain containing 32 Coggill P, Hetherington K kh6 Jackhmmer:Q9BV29 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 188 amino acids in length. The gene that encodes this protein is C15orf57 but its protein product is called Protein CCDC32 (Coiled-coil domain containing 32). The exact function of this protein is still unknown. 27.00 27.00 27.90 34.60 19.70 25.40 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.81 0.71 -4.37 14 92 2012-06-25 08:39:40 2012-06-25 09:39:40 1 2 70 0 54 88 0 137.80 41 74.06 NEW D.Wsphsss..psppt......pssppFcDsFpsshstt.tptp..................p....tPLsDS-sYLAsLER+Lp+l+uts......chLcoLuptKc-Chc+LLps.....shsspha.ph.-hDpsshpp.L+RaLhP.c.AlsstEl.aLl.h-tLppp ......................D.WsEhssslspstpp.....sssssuFpDSFhss.stsptppps.t.................s.tsa....tPLpDSEsYLASLE+.KL++IKGhs.......p.-VTSK-ML+oLuQAK+ECWDRF.Lp-.........phsuEhF.-s...l-sDcs.......slpp........h+..RaLtP-+sAlospElpaLl.s-t..p.p............ 0 13 15 35 +14841 PF14990 DUF4516 Domain of unknown function (DUF4516) Eberhardt R re3 Jackhmmer:Q69YU5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 56 and 69 amino acids in length. 27.00 27.00 29.10 29.10 22.60 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.29 0.72 -4.41 14 71 2012-06-25 08:52:15 2012-06-25 09:52:15 1 2 61 0 43 70 0 47.90 43 66.80 NEW MPuG.........suhspYhhhhssulhSMhAGAplVHphYKPDLolP.l.scssp ...............MPuG.............VshssYlphhusulluMhAGApVVHpaY+PDLolPpl.sc.t.......... 0 8 15 25 +14842 PF14991 MLANA Protein melan-A Bateman A agb Jackhmmer:Q16655 Family \N 27.00 27.00 81.80 81.70 23.80 23.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.57 0.71 -4.08 5 35 2012-06-25 09:02:05 2012-06-25 10:02:05 1 1 30 12 22 30 0 108.70 59 98.78 NEW PRcDhHa..G.aF+G+GRoYsTAEEAlGIGILIVVLulLLllGCWYaKRRSGYKpLhsKoI+lGo.psl.....+sRCspEusc+p-SKlShQEassh.pPVVPNAPPAYEKIAAEQSPPPYSP .PRE-sHa..GaP+KGHuHSYlTAEEAAGIGILsVILGlLLLIGCWYCRRRSGYRsLhDKol+sGTQs.sl.TtRCspEuhsHpDSKlshQEpss..cPVVPNAPPAYEKlSuppSPPPYSP. 0 1 1 5 +14843 PF14992 TMCO5 TMCO5 family Eberhardt R re3 Jackhmmer:A8MYB1 Family The TMCO5 family includes human transmembrane and coiled-coil domain-containing proteins 5A and 5B. 28.10 28.10 28.30 28.60 27.50 28.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.89 0.70 -5.27 9 134 2012-06-25 09:09:25 2012-06-25 10:09:25 1 2 26 0 62 136 0 210.10 29 85.47 NEW NlpSLN.DLE+DhQRlDEANQ.LLpKIpEKE-sIQsLE+EIs.oht.uc-c..E-.Nchs.tE+EpALp-LE.ETA+LE+cNcpLs+slsELQ+Klo++pps.sssEptshcptlpE.K..s+LQp.ppSCAsQEKELsKl.p...DYppVspLCEDQAhhIK.KYQEhL.+chEc.EKEshlLE+ElsKs.sp....sSph..cssSh.hEshtpNhEcshlpKpp...p...........tFWh+hFRh.LhFhVlhFIRLLuYlhFH.lpaINPDLLVc.sLPhlLSRsoLhcLRchlFPFLTLEsE-lLP .......................................................................................................................sLp.ch-tc.QplsctNp.hL.plp.pEtthppL.....p-lh...t.hpcp..-c...s.h...p.pps.h...h...cp.shLEhps..ph......-lppp..sct..p...h........-t.t......pt..p..c..splp...tphh..p.EpplhKl.p..................-ht...s..tphp..tD...ts..h+....chpE....hL.cphEp.ph-hhhLppc.lphh........t.......s.........p..t..h.ptt...............hh.hphhph.hhh..hhhhhtlhshhhha.h.al....h.p.hLPhhhu+thhhcLRphh.P.LsLpsEthLP..................................... 1 5 5 9 +14844 PF14993 Neuropeptide_S Neuropeptide S precursor protein Bateman A agb Jackhmmer:P0C0P6 Family \N 27.00 27.00 89.30 88.90 26.90 15.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.13 0.72 -4.30 4 24 2012-06-25 09:09:30 2012-06-25 10:09:30 1 1 20 0 12 31 0 64.00 73 77.25 NEW YPVssS...KVsGKsDYFLILLNSCPuRl-tS-cLAhLKPILEKsFhKRSFRNGVGoGhKKTSFRRAK ....YPVssS...KVSGKSDYFLILLNSCPoRlDRS-tLuhLKP.ILEKhFhKRSFRNGVGTGMKKTSFRRAK 0 1 1 3 +14845 PF14994 TSGA13 Testis-specific gene 13 protein Bateman A agb Jackhmmer:Q96PP4 Family This family of uncharacterised proteins are found in chordates. In humans this gene is found to be expressed specifically in the testes. 27.00 27.00 29.40 44.50 20.50 24.80 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.78 0.70 -5.24 5 31 2012-06-25 09:19:14 2012-06-25 10:19:14 1 1 24 0 18 28 0 260.70 55 92.14 NEW Gs.....KcpoKhQ..sGtSKsuKoSul+FEKpll......sDu-EIhDAVGQSKFVLc................................NLRHYTV.........HPNLAQ.....................YYEPLKPTALQKFLAQN+KI.........................................................pSFMLKVTEYDQDKTLLIMTNNPPPCsIDHQGK-usPKYFSsELL.............LK..EosaQH....KPT-NhaLPpMPQK.............KKL+stLK............PlFPlpLl-DPsSK+EQWFRFSTDcDFKSEGRYSKVYALR+QKKMYPQLsFAPVscRph..........+ssVSKKSuS-sPT........SQVIWEPLTLSSLLEEKPTRTAPGESsFRsGRAQQWIIKsATVI ......................................................................................................................++poKhQ..pstu+h.csssh+hEKthh.........ssscEI.DsVGpSKFVLc................................................................................................................NL+HYTV.........HPNLAQ...............................................YYcPLKsTALQKFLApN+Kh.........................................................pSFMLKVT-YDQDKTLLIMTNNPPPssIspQsK-ssPKYFScELL.............lK....ppp.aQH....KPo-shhLPhMsQK.............KKLRstLK............PlFPlhh.-DPsSK+EQWFRFSTDNDFKoEGKYSKlYALR..pQKKMYPQL...sFAPVpcRph..........+ccsS..hKStSphPh........Sph...hhEPLTLuSLLEchPTRo..sPGcusFRpGRA.Q.WhlcpAsl............................................................ 0 4 4 4 +14846 PF14995 TMEM107 Transmembrane protein Coggill P, Hetherington K kh6 Jackhmmer:Q6UX40 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 138 and 164 amino acids in length. There are two completely conserved residues (H and E) that may be functionally important and four transmembrane helices. The domains in this family vary in length from 124 to 126 amino acids. The precise function of the protein family is still unknown. 26.00 26.00 26.20 26.10 24.90 24.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.52 0.71 -4.00 37 76 2012-06-25 09:47:02 2012-06-25 10:47:02 1 4 60 0 51 85 0 114.90 39 64.62 NEW L.lPsRFlslhsHhlhl..lslhauc-pslpush..........hshspt....p........Ysptcp......plsssLslohhhhslEhhGhhoGlShFssspslhphssHsuAuVhL.hFhhppWcssp..hWalFshh.SslPshhEhhhhh ........LVPuRFLoLlAHLVll..ITlaWS+-sNlp.A.sLP.........hpao.p.....p.......................Ysppct.......p.LlsuLuloluhhslElsGhhoGlSMFssopuLl....slssHsuAuVhLshalhcpWcsst..aWalFshh.ShhPshhEhhhh........................................... 0 24 30 40 +14847 PF14996 RMP Retinal Maintenance Coggill P, Hetherington K kh6 Jackhmmer:Q96NL8 Family RMP is encoded for by a gene, C8orf37. Mutations in the gene cause two types of retinal dystrophies: cone-rod dystrophy type 16 (CORD16) and retinitis pigmentosa type 64 (RP64). CORD16 affects the cone receptors which detect red, green or blue wavelengths of light and RP64 affects the cone receptors first and then the rod receptors. Both of these affect the photo-receptors in the eye leading to colour blindness or blindness respectively [1]. 27.00 27.00 27.70 27.70 23.10 23.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.37 0.71 -4.28 14 77 2012-06-25 10:04:19 2012-06-25 11:04:19 1 2 67 0 52 66 1 137.00 40 62.04 NEW LDshls-..........lspcss.ssh.s.phssppsssssshss..s...........p++Cssl.hLuGsphshGlsoshpp......+sCspLRCssCDFcVlpasshcWcpssDYLFFRNNhPchpKLpsKLhpc.GspAYuCQCsWpos..schsslps..spl+WVCuuH ...........................................................sshlp...........h.pp.t..t.......s.p..ts.sstss........................................tppCssl.alGGoshspGluoshsp........RuCDpLRChsCDFtVlpasDhhWDc........S...sD.YLFF.R.NNhP-hpKL+s.......K...............LhccpGsRAYACQCSWpol.....p-lscLpst...ppL+WVCutH............... 0 20 24 33 +14848 PF14997 CECR6_TMEM121 CECR6/TMEM121 family Eberhardt R re3 Jackhmmer:Q9BTD3 Family This family includes Cat eye syndrome critical region protein 6, a protein which has been identified in a screen for candidate genes for the developmental disorder Cat Eye Syndrome (CES) [1]. It also includes the TMEM121 transmembrane proteins. The function of this family is unknown. 25.00 25.00 25.10 25.10 24.20 24.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.45 0.71 -4.70 17 120 2012-06-25 10:07:15 2012-06-25 11:07:15 1 2 66 0 90 116 0 190.60 31 45.76 NEW saAhl....sWhlY.shsLh.Klshlapshhs-t....h.sh-h.hu.psLclsLulo.shLFlLLltsc+.phspu........p++hhlpshhhtlslDLLDslshlphLhcspt....hsLslhl-shllhhshlsl...hLPsluLhElshsph........t.hspchl.aslLthlhVNlPhlhIRs..hLa.......atpspssSlFhhKNlhhlhhtspphhpt ....................................................................................................................................................................................taAhl....hWhlY.hhsLphKlhhlapshh.tc........................htsh-s..h...u.p..pslplhLuls.l.P.hLalLlsuhs.chphsps........pc+cchpsphhhVsLDLLDhl.s.h......t.Lhcspp............hsLP.lahcslhhhhsalhL...slPslu....LsElshps...........................t.hsp+hh...aslLulhhVNlshlhlRs..hll.......app..p..pssoIFlhKNlhhluh+shphlp......... 0 25 39 65 +14849 PF14998 Ripply Transcription Regulator Coggill P, Hetherington K kh6 Jackhmmer:Q5TAB7 Family The precise function of this family is not clear, but it is thought to play a role in somitogenesis, development and transcriptional repression. Ripply is also known by an alternative name, Bowline. Bowline, is an associate protein of the transcriptional co-repressor XGrg-4 [1]. This family contains two conserved sequence motifs: WRPW and FPVQATI. The WRPW motif is thought to be required for binding to tle/groucho proteins [2]. Ripply3 is also known as Down Syndrome Critical Region Protein 6 homolog [3]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 154 amino acids in length. 27.00 27.00 40.80 39.90 20.50 20.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -3.87 16 104 2012-06-25 10:24:52 2012-06-25 11:24:52 1 1 41 0 57 95 0 82.20 50 63.51 NEW sshWRPWl..osp-stppspt..............stsstpspsspts.sFpHPVRLaWPKS+saDYLYppGEpLLpNFPVQATIshY-.-SDS---EE .......................................................s.hWRPWl..sst.c...ph..t..............stu..sthstsstt..tFpHPVR.LaW.PKS+saDYLYppGEtLLpNFPVQATIsFY-.D.SDSE-Ep................ 0 5 12 21 +14850 PF14999 Shadoo Shadow of prion protein, neuroprotective Coggill P, Hetherington K kh6 Jackhmmer:Q5BIV9 Family This protein family is a Prion-like protein and its function is neuroprotective and similar to PrP(C)-like. Shadoo is mainly expressed in the brain, and highly expressed in the hippocampus, the area of the brain which co-ordinates memory as well as spatial memory and navigation. This protein may also alter the biological actions of normal and abnormal Prion Protein (PrP) which lead to lethal neurodegenerative diseases [1]. This family of proteins is found in eukaryotes. Proteins in this family are approximately 150 amino acids in length, of which the first 90 are alanine rich. 27.00 27.00 49.40 49.00 26.30 25.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.93 0.71 -4.29 5 54 2012-06-25 10:39:11 2012-06-25 11:39:11 1 1 32 0 11 48 0 121.80 75 86.49 NEW CDSGAAKGGRGGARGSARGGlRGGARGAuRVRVRPAPRYu..GSSLRVAAAGAAAGAAAGAAAGLAAGSGWRRAsGPGEpGLEDsEDGAPGGNGTGRGVYSYWAWTSGAGPTssh+LCLLLGGALGALGLLpP .......CDuuAAKGGRGGARGSARGG.....RGAuRVRVRPAPRYu..GSSlRVAAuuA..A.AGAAAGA...AAGLAAGSuWRRAAGPuElGLEDs.EDGAPGuNGTGRGVYSYWAWTSGA...GPTsph...+LC.LLGGALGALcLLRP....................... 0 1 1 3 +14851 PF15000 TUSC2 Tumour suppressor candidate 2 Eberhardt R re3 Jackhmmer:O75896 Family This family of proteins are candidate tumour suppressors [1-2]. 25.00 25.00 26.90 26.30 23.90 22.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.68 0.72 -4.01 13 77 2012-06-25 10:40:10 2012-06-25 11:40:10 1 1 56 0 45 65 0 102.30 56 97.96 NEW MGuSsSKt.cuhh.h.ssss.sssss......tssc.p..p.schcshRsusP...FVaoRRGShYaDEDGDLAHEFYEE.lls+sGp++uph+Rlp+N.LpPQGpl+hshPplHVDFPlllC...Es .......................................MGsSuSKs.+GhWPFsusuussssts..........tssc.p..shscs+s..RsssP...FVFTRRuShaaDEDGDLAHEFYEE.TlV...TKNGpK+AKL+RlpKN.LlPQGIVKLDhPR..IHVDFPVlLhE.V........ 0 11 15 26 +14852 PF15001 AP-5_subunit_s1 AP-5 complex subunit sigma-1 Eberhardt R re3 Jackhmmer:Q9NUS5 Family This family of proteins are subunits of the adaptor protein complex AP-5 [1]. 25.00 25.00 26.00 25.90 20.80 22.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.12 0.71 -4.35 10 72 2012-06-25 10:41:30 2012-06-25 11:41:30 1 1 55 0 47 67 0 166.60 42 86.40 NEW MVhshlIHolp........sps+VLaSphYus-.....tp.pp.shcpcR.hpKEpl....tslA+pVcSpsshp+ps......su+sss-phhp.s-psluhtEts......sGsFplhsG-sFsscphVLWhuVsuluFsLVC-sHENlhLA-sTLRpls+hLhcpl..+sLss....uuclLh.....KuD+lcAlLc+FLPpGQLLFLNcphsptLEKElp ..............................MVpuFLIHTlpss.....tss.sRVLYSphFGs-p.....spp.c.hssEccRlhcKEpl....hsVARQ...VcShspLpppA......uGcssh-htst.u-EslsLpEAs......pGsFpLtsu-PFpps+.sVlWLuVhuLuFsLVh-sHENLLLAEsTLRhLs+hLL-pL...+lLss....usplLh........+uDcltulLpcaLPpGQLLFlNppasptLccch.......... 0 15 18 29 +14853 PF15002 ERK-JNK_inhib ERK and JNK pathways, inhibitor Coggill P, Hetherington K kh6 Jackhmmer:Q9H6E4 Family This coiled-coiled domain, CCDC134, is a secretory protein that inhibits Mitogen activated protein kinase (MAPK) pathways such as Raf-1/MEK/ERK and JNK/SAPK but not p38. CCDC134 is widely expressed in normal adult tissues, tumour tissues and cell lines, which shows its importance in cell signal transduction pathways, transcription regulation and therefore cell survival [1]. Additionally, CCDC134 is known to bind to a transcription adaptor, hADA2a, which forms part of the general control nonderepressible 5 (GCN5) histone acetyltransferase complex. Acetylation usually 'switches genes on' for transcription. Moreover, knocking out CCDC134 suppressed hADA2a-induced cell apoptosis activity and G1/S cell cycle arrest suggesting its importance in cell survival [2]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 188 and 257 amino acids in length. This family is a coiled-coil domain containing protein 134 (CCDC134) whereby the coiled-coiled domain is a ubiquitous motif involved in oligomerisation. 27.00 27.00 29.10 28.10 19.70 20.30 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.25 0.70 -4.95 8 99 2012-06-25 10:48:46 2012-06-25 11:48:46 1 7 72 0 62 88 0 174.20 39 69.01 NEW pscsppspsppssssppstKlacclFspKRcEHhpAlcplhplpph-KRhcLlchllcclh+llpcu+ptLEcusasu-.SsFP.c-p.sLpDALupllENTAFFG-LlL+hP-hoccllcpss-WpsLhpWulsaoppos.lL--socchLcLlsQEls.hscRcssYhNPY.......pct.+cphcpppcsphKKKp+Kc......hpKtPpL.....+pEL ......................................cha++hFchKR+-ph.A.lcsLhpls-hpppYKll-lhLcslh+VLc-S+thL.sushh..Pc.ssFP.pDc..plK-AhSpllENTAFFGDllLRFP+IlHphac+s.s.sWs.Ll+Wul.sFCspo...u..la...........s.p..s.p.p.l....LpLhuQELs.lsE+sssa.NPa..........ptc..p.cthptpcppcccccp+Kc.........pKtPpl.....p..................................... 0 22 28 48 +14854 PF15003 HAUS2 HAUS augmin-like complex subunit 2 Coggill P, Hetherington K kh6 Jackhmmer:Q9NVX0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 291 amino acids in length. HAUS augmin-like complex subunit 2 is alternatively called centrosomal protein of 27 kDa (CEP27). It localized in the microtubule organising centre, the centrosome. These microtubules are part of the cytoskeleton and give the cell its shape, provides it with a platform for motility and are crucial for mitosis [1]. This protein is part of the HAUS augmin-like complex. This interacts with the gamma-tubulin ring complex (gamma-TuRC) which is required for spindle generation. HAUS2 may also increase the tension between spindle and kinetochore allowing for chromosome segregation during mitosis [2]. This protein is involved in mitotic spindle assembly, maintenance of centrosome integrity and completion of cytokinesis. 27.00 27.00 27.00 27.10 22.30 26.00 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.68 0.70 -5.33 7 100 2012-06-25 10:53:06 2012-06-25 11:53:06 1 3 62 0 56 102 0 194.40 34 89.31 NEW sssWsstp.h.phuGhh.uh.lASslshp.h.s.Spcss.shs............hphLpploslQtcIhphplElphhK.-KssAcLsHso.ht+KhcsLt..phsshLcsVlppK-cIhtRL.pPhsh-ClPlEAcYp+phsc...............LLhhAso.hutLpsslpshp...phc-s.phhuchLt.hslhlspspchhEsl.thREp.tplpphhsth.s.p..lp..........................................................................+clshPP.shpsp ........................................................p...phuuh..sh.lAushh.t...s..spcp..shs............hphLpploslQtcIhphplElphl+h-KcsADlsHs.hltpKhcsLp..phsspLcsVlppKcplhpRL.pPh...st-sLPlEA.Yp+.hsc...............LL.hAss..h.ttLpsplpshp...ph..pts...p.h...sphLt.hshhlspspch.hEsh.thRcp.tph....s...................................................................................................tt............................ 0 11 22 32 +14855 PF15004 MYEOV2 Myeloma-overexpressed-like Eberhardt R re3 Jackhmmer:Q8WXC6 Family This family of proteins is found in eukaryotes. It includes human myeloma-overexpressed gene 2 protein. Proteins in this family are typically between 45 and 74 amino acids in length. There are two conserved sequence motifs: MKP and DEMF. The function of this family is unknown. 27.00 27.00 27.40 28.80 23.30 22.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.06 0.72 -3.42 8 78 2012-06-25 11:40:17 2012-06-25 12:40:17 1 3 63 0 48 69 0 57.70 71 54.98 NEW MKPulssDEMFPEGAGsYMDL-E.......uGGuoGhhhD.L..sANEKsVHuDFaNDF-DLFDDD............sh. ....................MKP.uV...DEMFPEGAGPYVDLDE..........AGG.STGLL.MD.L...AANEKAVHADFFN................DFEDLFDDDD..................... 0 7 9 27 +14856 PF15005 IZUMO Izumo sperm-egg fusion Coggill P, Hetherington K kh6 Jackhmmer:Q6UXV1 Family Izumo is a molecule with a single immunoglobulin (Ig) domain. It is thought that Izumo bind to putative Izumo receptors on the oocyte. Izumo is not detectable on the surface of fresh sperm but becomes exposed only after an exocytotic process, the acrosome reaction, has occurred. Studies have shown that knock-out mice (Izumo-/- males) were sterile despite normal mating behaviour and ejaculation, indicating the importance of the protein in fertilization [1]. There are cysteine residues thought to form a disulphide bridge. Izumo is a typical type I membrane glycoprotein with one immunoglobulin-like domain and a putative N-glycoside link motif (Asn 204) [2]. There is a conserved GCL sequence motif. Izumo expression has been found to be testis-specific [1,2]. This family of proteins is found in eukaryotes and are typically between 193 and 305 amino acids in length. 27.00 27.00 42.40 41.80 22.60 22.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.18 0.71 -4.19 8 129 2012-10-03 02:52:13 2012-06-25 13:02:24 1 2 29 0 53 139 0 145.10 28 60.57 NEW aGu+GCLpCDPphlEslspLcusLlPpch.VsshpthhptlhppMpchhF+shthsshluhlslppL-clsoalKschp+LtssohKsshllp.ELlslRcplh+cLcchL+saphc.sCsccCtlhctslLDChsCp+hos+Ch+ucaCtc-c.p+sph+ ....................................uhGCl.Ccsph.-thp.hcpphhspph..s.tp..htshhphlhpthcshhhpsat.s.uhhuhlspppL-pluphlhpphppLhpsshpsshhlp.ELhslhcpthcplppsltpaphc......hC.ccC.slht..hl.C.sCpc.hhtCh.uh.Ctppt....thp........................... 0 4 4 8 +14857 PF15006 DUF4517 Domain of unknown function (DUF4517) Coggill P, Hetherington K kh6 Jackhmmer:Q9GZN8 Family The function of this protein remains unknown. This family of proteins is found in eukaryotes and are typically between 160 and 182 amino acids in length. 27.00 27.00 30.50 30.00 23.70 24.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.19 0.71 -4.38 16 94 2012-06-25 12:06:34 2012-06-25 13:06:34 1 2 70 0 54 82 0 136.30 48 83.71 NEW p+HVHFDpp.......hHDssl.............h.hp.pscu.shhV+lGFLplpHRYcIchslPs.s.hhs.....ts.sshs....slPshps+ll.....shs.....ssssscp.hcshlEahAaKEtlL+EcltLsuppssspplcllltARVLu+t+GTPML+sGIHClGVEh-.p-S.EtSDWpGFc ..............................................................................ppHVHFDpp........h+DS.sV..............hspppuDs.sh.lV+lGFLpIhHRYcIsFoLPs.s.hhs.....pshpphs.....sPslHl+lh.....sls......hsEt.....hphEa.AaKEtlL+EchhLsspsss...........spplplhlpARVhs+p+GTPMLhsGl+ClGs.Eh-..-S.EtSDWtGFD........... 0 18 22 36 +14858 PF15007 CEP44 Centrosomal spindle body, CEP44 Coggill P, Hetherington K kh6 Jackhmmer:Q9C0F1 Family CEP44 is a coiled coil domain found localised in the centrosome and spindle poles. 27.00 27.00 28.00 27.00 24.40 24.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.65 0.71 -4.44 13 98 2012-06-25 12:15:38 2012-06-25 13:15:38 1 3 67 0 57 87 0 127.10 46 34.24 NEW Dl+ssLc+LcptLRslpYP.....tclDhstLcpGDPuAhLPllcYsLhsYSppluchLlcpGaE......L..hu.KoDLRFlEslaKlLR-pFsY+PhLotpQFhp.pGFAE+KlhllsDIls...hlhc+H+Els+tp+tpspphpp ..............DLKpsLRpLEpsLRhLsYP..............p-VDhsGLhKGDsuA.LPIISYuhTuYSshVsElLh-.s.slE......L..hu.KNDLRFl-sVYKLLRDpFsYKPlLTKpQFlp.sGFAEhKIpIlCDIls...tVhKKHKELsphpKh.spttpc..................... 0 22 30 38 +14859 PF15008 DUF4518 Domain of unknown function (DUF4518) Coggill P, Hetherington K kh6 Jackhmmer:Q5JPI3 Family The precise function of this protein family is unknown but it is thought to be involved in apoptosis regulation. 27.00 27.00 37.10 31.80 26.40 26.20 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.75 0.70 -5.35 14 96 2012-06-25 12:29:39 2012-06-25 13:29:39 1 4 68 0 56 90 0 239.00 41 84.87 NEW LS-pc+pGhR-LLtphss.ssLhpLscoVTppllc..l-sp-EAlchIlsaopsspcLL++++IpR-lLF+YLtp+tsssssshsKssLlp+llpaWppphstp......spps.ppsptstpsth..t..............tppp.slchLAccFscWFFshLNu.............-shu.pcFWsDssL+lphhss-tss-ph..puAptV.ptLlultpp.thhFNPNLspsGlpGph-saGhVlVhssGTlHpsppClGlFEpsFGLlRDP....hssNNWKhKpsclpL+upt ..................................................St.EhpGh+pLLt.hcs...splhuLscTlTsp.llp..spspp-AlcAILsYSpsscELL+R+KVpRElIFKYLuspulhlsPso-KpsLIp+shpaWppp..p.p...................hppsspss...ppp.....pp.p...pp..............................................................ptpphs.hppLuccFspWFFtlLNu.t...............spaGPpHFWpDspL+h..h..h..psu....-p.ss..ch....pGAp.Vuh+LluLspp-hlhhsPNLsspGl+st.ssHGLVhVtVsGTlHc.....s.s.......s.......C.LGIFEQlFGLlRsP....hhpNsWKIKhlplplhup.s............................................... 0 14 18 32 +14860 PF15009 TMEM173 Transmembrane protein 173 Eberhardt R re3 Jackhmmer:Q86WV6 Family Transmembrane protein 173, also known as stimulator of interferon genes protein (STING), is a transmembrane adaptor protein which is involved in innate immune signalling processes. It induces expression of type I interferons (IFN-alpha and IFN-beta) via the NF-kappa-B and IRF3, pathways in response to non-self cytosolic RNA and dsDNA [1-4]. 27.00 27.00 62.70 35.70 19.30 19.10 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.89 0.70 -5.62 16 77 2012-06-25 12:34:37 2012-06-25 13:34:37 1 2 63 7 51 89 0 271.20 37 83.39 NEW chlpplsh......h..hh.thChhs.p.hppha.h.csphWpll+psashshpsshhhhuhllluhhhassssshssls.shhhh......hp................lsppsLoahlhl+psp......h...tls...sL....shAsGhAhSYaaGYL+LlLP........uLpcRhcpapcppN...hphss+RLaILlPp-shVsssl..p..ss..hlchtcsLpsphlsRAGlctRsY.KpuVY+l...tcctsspshhhshEhATPLlThach.pp..usshhh+E.+pE.hhhFh+pLc-lLp.....shPEo+spscLIhYssh.-sp.Gs.hsluclllp+lcpppc .........................................................................hhhh.h.ls.llpt..h..C...ht.p..hpphp.h.psp..hhphhptshshs.tth....hhhl..lshhhh..h.phssh..s..h.hhhh.......................hhsphLshhlhlpt..t..............h...ph.tls..tsh....shApGhAaSYYhGYL+LlLP........GLptRIptY.pppps........hshss+RLaILlPhcshVsssL...p.sDs....slchhcpLsppphsRAGl+sRsY.pNolYcl....hcssppshhCllEhATPL.TLFsMspp..upuuhu+EpRhEQsplFhRpLc-ILt.....ssPEspspscLIlYp...-st.usphslup.lLpHlpppp................ 0 11 14 32 +14861 PF15010 FAM131 Putative cell signalling Coggill P, Hetherington K kh6 Jackhmmer:Q96AQ9 Family The precise function of this protein family is unknown, however studies have shown it undergoes Protein N-myristoylation; a type of lipid modification in eukaryotic and viral proteins. Protein N-myristoylation is usually an irreversible co-translational protein modification which is useful in cell signal transduction pathways [1]. This indicates that FAM131 may have some sort of role in cell signalling due to its ability to be myristoylated. This family of proteins is found in eukaryotes and are typically between 257 and 361 amino acids in length. 27.00 27.00 32.10 31.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.95 0.70 -4.66 5 167 2012-06-25 12:34:48 2012-06-25 13:34:48 1 2 38 0 87 138 0 229.00 44 79.80 NEW VEDTluMLPKSRRALoIQEIAALARS...ShpGISQVVKDHVTKPTAMAQGRVAHLIEWKGWuKPsD.....oPpAl.cSDFsSYSDLSEGEQEARFAAGVAEQFAIAEAKLRAWSSVDGEDso--SY-EDhusNa-ssoQpLhpst.....t.lhpsphsShPpphsSps..s.sSpss.E..hSs-oLsAS.sohs..........sp...hs........stpGuuup.uuphL...ts...tsGElpLA+uPsppcpsAh+tht....sscpDSssYssshoEouLSPtE-......Dptsh....ss..pEh.huachsR+VSDVoSSGVpShDE ......................................hLP+.+.Rs..shhtIuALA+S...SL.s...GIS...pshKDHVTKPTAMAQGRVAHhIEWpGWuKsss..................u.tsh.cp-hsuYSD.LS-GE+EAR.F.......AA..GVhEQFAIuEAsLhAWSShDGE-.s.s.sShp-s.hs..tt.p.......................p..hhps.h...shs.hp.su.t...s.sSpss.....sspolhuS.sshp..........................................t..s........h...........t....h.p...................................................................................................................................................................................................................................... 0 4 13 36 +14862 PF15011 CK2S Casein Kinase 2 substrate Coggill P, Hetherington K kh6 Jackhmmer:Q9NX04 Family It is suggested that CK2S (C10orf109) is important in the regulation of cancer cell proliferation. Studies have indicated that CK2S is the downstream target of a protein kinase, casein kinase 2 (CK2), which is upregulated in cancer cells. CK2S has been found to be upregulated in cancer cells. The precise mechanism of CK2 targetting CK2S is not well characterised. It is found to be localised in the nucleus and cytoplasm [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 221 amino acids in length. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 27.10 27.50 25.50 25.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.87 0.71 -4.46 18 90 2012-06-25 13:10:52 2012-06-25 14:10:52 1 3 72 0 58 89 0 155.10 31 72.95 NEW ppL+ptFpslcpppctWpuslscstsLluSLusLsEQLpAhpclphtss..LcsFPsLppRLptK.htAl-tlLscLsEcLspLpcVpcsluptlpsshplYcpph..ssLslssshpRuulsPSlADhLEaLQDl-RaYRppalp++.lLpsLshcsLsshpuhs+pW ..............ptl+ppF.hlccppshWpsshpcs.shlsuLusLsEQLpAhpslc....htsss.LpsF....P.sLp-RLhtKQht.......Ah-hlLspL.tEpLspL.pVpcslsptlcpshplhcpps..sslslc.slhptuulsPSlADhLEWLpDl-RaY+p.p.........Y..lp+c.lLssl.phs..sLsshpuh.pth.................... 0 15 25 41 +14863 PF15012 DUF4519 Domain of unknown function (DUF4519) Eberhardt R re3 Jackhmmer:Q9NRQ5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 59 amino acids in length. There are two conserved sequence motifs: KET and VLP. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 54.70 54.50 21.60 21.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.76 0.72 -4.05 12 70 2012-06-25 13:22:02 2012-06-25 14:22:02 1 2 63 0 47 63 0 55.60 62 84.22 NEW MRQL+.GKsKETpKpK+ERKp-.hEhpp+lhTVVLPslushhlhIlVaVYLKTRPp .MRQLKGp.spKETsK-KKERKpshpEu+QQIsTVVLPTLAVVlllIVVFVYlsTRP..... 0 10 12 28 +14864 PF15013 CCSMST1 CCSMST1 family Eberhardt R re3 Jackhmmer:Q4G0I0 Family This family of proteins was discovered in a screen of Bos taurus placental ESTs. The B. taurus member of this family was named cattle cerebrum and skeletal muscle-specific transcript 1 [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 97 and 157 amino acids in length. There is a single completely conserved residue D that may be functionally important. The function of this family is unknown. 26.10 26.10 29.20 27.50 26.00 26.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.67 0.72 -4.06 15 66 2012-06-25 14:36:52 2012-06-25 15:36:52 1 2 57 0 39 66 0 74.40 37 49.36 NEW scpPl+aooSpAu..+Wcsppohusspsc...PWapsalluhSlsshllYFCllREEsDlDptLc........psLh-c.l.uLEcpphp ...scPI+FSuSpAs........pWpspcohutsppc...PWapshslSsSLsshlla.ChLREEoDlDphLc........psLh-p.l.t.pp....t......... 0 13 15 26 +14865 PF15014 CLN5 Ceroid-lipofuscinosis neuronal protein 5 Eberhardt R re3 Jackhmmer:O75503 Family \N 27.00 27.00 33.60 53.90 18.50 21.50 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.13 0.70 -5.06 4 59 2012-06-25 15:14:48 2012-06-25 16:14:48 1 2 42 0 40 54 0 283.40 61 85.46 NEW ppWPVPYKRF-FRPcsDPYCQA+YTFCPTGss...IPVMK--DlIpVaRLQAPVWEFKYGDLLGHhKIMHDAlGF+SoLTGKNYTMEWYELFQLGNCTFPHLRPsMsAPFWCNQGAACFYEGIDDsHWKpNGTLVhlupIS.GsMFNcMA+WVKpDNETGIYYETWTVpASP-tsSssWFDSYDCSpFVLRTYpKLh-LGApFs.KspTNYT+IhLYSGEPhYLGN-TSIFG.oGNKoLAhsIRcFYYsF+PHpSh+EhllSLLcIh-+Vllc+pFYhFYNhEYWaLPMKFPYlKlsYEEIPLP ............................WPVPY+.RFsaRPcsDP.YCQ.A..+YT.F..CPTGSs...IPVM+s-DlIEVaRLQAPVWEFKYGDLLGHlKIMHDAIGF+STLTGKNYTMEWYELFQLGNCTFPHL....R..P-...hsAPF....WCNQGAACFFEGIDD.hHWK....EN....GTLlhVATIS.GshFNpMA+WVKpDNETGIYYETWsVpASPp..+sAcsWF-SYDCScFVLRTapKLAEhGA-F.K....pI-T..NYT+..I.FLYSGEPsYLGNETSlFGPsGNKTLuhAI++FY..aPF+...P.HhSsKEFLloLLpIFDsV..Il++pFYLFYNFEYWFLPMKaPFlKITYEElPLP....................... 0 9 14 22 +14866 PF15015 NYD-SP12_N Spermatogenesis-associated, N-terminal Coggill P pcc Jackhmmer:Q9BXB7 Family NYD-SP12, also known as SPATA16, is a germ-cell specific participant in the Golgi apparatus, and its expression is confined to spermatogenic epithelium, not being found in interstitial cells [1]. Computer analysis of the protein-sequence showed that NYD-SP12 contains a cluster of phosphorylation sites for protein kinase C as well as for cyclic nucleotide-dependent protein kinases [2,3]. It is postulated that since the mutation of some Golgi apparatus’ proteins are responsible for male infertility that NYD-SP12 might play a role in modification and sorting of acrosomal enzymes [3]. OMIM:102530. 27.00 27.00 27.30 36.70 24.70 24.40 hmmbuild -o /dev/null HMM SEED 569 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.15 0.70 -12.83 0.70 -6.28 3 107 2012-06-25 15:20:51 2012-06-25 16:20:51 1 1 36 0 24 105 0 196.80 46 99.32 NEW MDSGsSRSLEsoVsRlY+DpLLPKINTSKKMSTLssuP...sILEsopEIKKNsG-tQVEsosERlKhTKoIKEKQSNDLEKAAhKRKAEuEEK.sGKKEAKIhELDNQLl.TsPLPHIPLKNIMDVEMKLVYlDEp-VuYEFApPsMspGpQsTsQsAphsDPsSs+shSsLPQIDKWLQVALKDASSCYRQKKYAVAAGQFRTALELCSKGAALGKPF-AaAEDIASIASFIETKLVTCYLRMRKPDLALNHAHRSIVLNPAYFRNHLRQAAVFRCLERYSEAARSAMIADYMFWLsGGoEcSISKLIKLYWQAMIEEAITRAESFSVMYTPFATKIKsDNIEKVKDAFTKTHPuYs-aIYTDsQGLHlLPQTsDWSSFPPQQYLLTLGFKNK-DGKFLEKlSSRKLPIFTEHKTPFSPLTREDTVRQMETlGKRILPILDFIRSTQLNGsFsACSGVMEKLHYASLLSRLQRVKEQSQVINQAMAELATIPYLQDISQQEAEL...LQSLMADAMDTLEGRRSDKERVWNpIQKVGpIEDFLYQLEDSFLKTKKLRTARRQKTKMKRLQTVQQ ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2 3 7 +14867 PF15016 DUF4520 Domain of unknown function (DUF4520) Coggill P pcc Jackhmmer:Q96MH7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 197 and 638 amino acids in length.This is the C-terminal domain of the member proteins. 27.00 27.00 27.00 27.70 26.80 19.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.89 0.72 -4.41 13 57 2012-06-25 15:24:44 2012-06-25 16:24:44 1 3 53 0 36 59 0 90.00 38 13.52 NEW ushsssVlp-sslsGsGpFhAYoDG+VRshFsDRshLsLsh..pt...................spshscllhPDGppshlplsps...sshccY..VssAlpas+as ........s.hhslllp-ShIsulGRFhAYoDs+V+ulFhDthhLoLsashss................................shuhC+LshPDGppplIplp+P...tsac.RY..VssslpasRh......... 0 8 14 21 +14868 PF15017 AF1Q Drug resistance and apoptosis regulator Coggill P, Hetherington K kh6 Jackhmmer:Q13015 Family AF1q is an oncogenic factor involved in leukaemia development, thyroid tumourigenesis, and breast cancer metastasis. AF1q plays a critical role in the regulation of apoptosis and drug resistance. Initially identified as a mixed-lineage leukaemia fusion partner (MLL11) in infant acute myelomonocytic leukemia carrying t(1;11)(q21;q23) translocation. It is located in chromosome 1 band 21 [1]. AF1Q may be a novel mediator of metastasis promotion in human breast cancer through regulation of the MMP pathway and RhoC expression [2].This family of proteins is found in eukaryotes. Proteins in this family are typically between 25 and 482 amino acids in length. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.30 0.72 -4.08 10 164 2012-06-25 15:33:04 2012-06-25 16:33:04 1 8 61 0 89 141 0 74.70 26 19.11 NEW +sspps..pssssppP-stEFSSFhFWRsPLPsI-c-LhEhL.scthssss.........stp-pc..................t-ccp--s-sDssGWITP.SNIKQIQp- ..............................t............t.p.ps.p.asSFhaWRsPLPsIDh.s..lE.hL.....h.sp....ps...........................ttcpp..................................................................................................................................... 0 8 16 38 +14869 PF15018 InaF-motif TRP-interacting helix Coggill P, Hetherington K kh6 Jackhmmer:C9JVW0 Motif This highly conserved motif is thought to be a transmembrane helix that binds to transient receptor potential (TRP) calcium channel. It is known that proline-rich proteins inactivate tannins found in food compounds, and it is putatively thought that PRR24 does too. This is important since tannins often inhibit the uptake of iron [1]. InaF is a protein required for TRP calcium channel function in Drosophila [2,3]. TRP-related channels have been suggested to mediate store-operated calcium entry, important for Ca2+ homeostasis in a wide variety of cell types [3]. The amino acid sequence of PRR-24 contains two completely conserved Y residues that may be functionally important. This domain family is found in eukaryotes, and is approximately 40 amino acids in length. 20.00 20.00 21.50 21.00 18.00 18.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.64 0.72 -4.50 26 111 2012-06-25 15:53:43 2012-06-25 16:53:43 1 3 57 0 78 107 0 37.50 40 22.29 NEW spKhlRlhTVluYlhuVSLuAlhLolYYhFlW..ssshts ...pKhhRllTVh.sYlhuVShsAlhLulYYlFlW..-ssh..... 1 24 30 61 +14870 PF15019 C9orf72-like FTDALS; C9orf72-like protein family Coggill P, Hetherington K kh6 Jackhmmer:Q96LT7 Family The precise function of this family is unknown but members have been found to be localised in the cytoplasm of brain tissue. Defects in the gene, C9orf72, are the cause of frontotemporal dementia and/or amyotrophic lateral sclerosis (FTDALS) which is an autosomal dominant neurodegenerative disorder. The disorder is caused by a large expansion of a GGGGCC hexa-nucleotide within the first C9orf72 intron located between the first and the second non-coding exons. The expansion leads to the loss of transcription of one of the two transcripts encoding isoform 1 and to the formation of nuclear RNA foci [1]. This domain family is found in eukaryotes, and is typically between 230 and 250 amino acids in length. There is a single completely conserved residue F that may be functionally important. 27.00 27.00 39.70 32.90 20.50 20.10 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.23 0.70 -5.12 18 93 2012-06-25 15:54:28 2012-06-25 16:54:28 1 2 61 0 64 77 0 237.90 45 51.15 NEW p.ls+hsLus.hppppsthsshl.sphhplsppslhlhuhlF.h.spsspp...shauloll..lsppclpthhphhpslppphpplstph+thhpp.....................ppslpchss.ltphhp............hlsthhpsslhs..........hpItsoshs...............FhupsloSHLpTQhsolI.usshcpspphhshLuhFhhstphphSphphpss......hpssLaLQslpcpssss....hphllp.ppPhThIcLcpcpVhposshcppp ................................pFLAppTLssEIh..csupsssl-sKFaVLsE+ullllS..hIFsu.shsGc+...........sTYuLSlI..LPpoc.LuaYLPLHplCs-R......Lscll+K.sRlhhpK..............................pupslIsh...LTuEllslMc............LLuSh+oauVscc.........I-Is-TlLND-cIus.........psapsFLhpAISSHLQTsGCSVVVG..SssEc..VNKllcTLsLFLTPsER+CSRlsps-up.............pYpsGLFlQGLLK-uoGShshPhcplhhuPaPTTaIDlDhs.TV+QhPPpHEHh..................... 0 28 33 46 +14871 PF15020 CATSPERD Cation channel sperm-associated protein subunit delta Eberhardt R re3 Jackhmmer:Q86XM0 Family The CATSPER (cation channel of sperm) complex is a tetrameric complex consisting of CATSPER1, CATSPER2, CATSPER3 and CATSPER4, it functions as an alkalinisation-activated calcium channel. This complex requires several auxiliary subunits, including CATSPERD. CATSPERD is essential for the cation channel function and may play a role in channel assembly or transport [1]. 27.00 27.00 32.60 28.20 18.50 23.70 hmmbuild -o /dev/null HMM SEED 733 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -13.31 0.70 -6.52 7 107 2012-06-26 07:21:54 2012-06-26 08:21:54 1 4 32 0 57 111 0 504.10 29 86.63 NEW .l.ss.hhhssstpclhtpsschp.shhhshplhLThs.hpss.lPhhlPhSh.sGsP.lsShch.suShlLLVsstcsa.hs.phpoWop.pu..c.lScsptc.................................................................sssaots.hhlshos.hFAhlpssph.ps.lhhSssushphpphpYstps..lusLhuhhphpohopsh.lllhspshtpapYpDasLshoh......cshLphhht.uthshLllWsc+slhhuhpshplstsVpshpGptsL.SSlscuslhhpslsss.s-lsVhhcpNslaYuplsI..oshlKhtt.s.aopcsslhFsssGplplLhslcD....uhsappshs.l.th..ssphphshC.h.hhpsphhsphYplDhsppLplhA.hl...shuLhslV.s.sP+.Lshpssl.aE.u....G.sp+pLsIphpQppcatpsDs.FpsphK+sslsslplc.ushphoCsshK.hsh.IuVGCD.pK+IslQs.phSuC.........LpcsaSYlIEK-hhc.....ppsSccLcVpYpapchGCPLhl.asp.a+PVVELac-sta.Ell-ApallhElpGh.sYoashThppusChspsQsWsohhc.sht..h..sWs.ENYhsCas.shspP.t..s.PYpILsupstN+llas.p+sGhYlFhlpIlDP.YSaCpLpThFul.saGhhPhs.h.hssshlhllhhhhholhl..................hshhhhpRhh ...........................................................................................................................h.........h.....h.............h..................................................................................h.........h....................h.h.hs.....t.h.h..t.....th.h..h.........t.hh.h.......p...hh......ptt...a...ph.h...s...........pt.h..hh..u.hs.hlhh.p.phh...pth.....h.h..t.......h..tt.hht.hh...s...phh..hhh..s.h.ha....phth..p.hhhh......s.p...shhhhptsG.h.hlhsh.s....hh...pph..h.th....s..tht.C.h..hhpsph...hahlDhtptLphhs.hl...shu.h.......l......l.s.sPc.Lt.ptph.aE.uh...G.sphpl............plhhhQp.pa...tthps.Fp.p.ppsshshlplc.sphth.Cs..p...sh.luVGCs.pKhIhlps..phptC...........hpc...sasalI-+.phhc.....pp.....pcs...lt...V.Y.hpphGCPlhl.asp.a.pPllpLacpsta.c.ltspallhElpGh.sYoas.Thtpu.hChppsQsWpohhp..p.t.........h..hWs.pNYhpCas.s.sts.h..s..YpIlstpst.Nplhas..t.sGhYlFhlpllDP.YSaCpLpshhul.shG.hs...h...h....hh..hh..hhh.hhh................................................................... 0 10 10 23 +14872 PF15021 DUF4521 Protein of unknown function (DUF4521) Bateman A agb Jackhmmer:Q8IYI0 Family This family of vertebrate proteins is functionally uncharacterised. The family includes the Chromosome 20 protein C20orf196. 27.00 27.00 67.40 67.30 23.20 22.20 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -4.73 5 40 2012-06-26 08:56:18 2012-06-26 09:56:18 1 1 28 0 21 29 0 170.30 61 98.08 NEW MAoQEATPGSpSEESSsLDLPSVCDIRDY.lLQRPSQEosSEAFSSlEupShPsSSDVDPDoSNLNTEQssSWTSENFWLDPSVKGQsETKEEDDGL.....RKSLDRFYEsFuHPQPGSuNPLSsSVCQCLSQKIoEL+GQESQKYALRSFQMARVIFNRDGCSILQRHSRDAHFYPlcEGSoSL-DEKPTPGLSKDIIHFLLQQNVMKDp ..Msup-ATsuS.hSEES.S.sLDLPSsCDIRDY..VLQ+PSQEAsSEAFSSlEhaShPsSSDVDPDoSsLNsEQss.SWsSENFWLDPusKGQsE.ppEDDGL.....RKSLD+FYEhFGpPpPuStssLSASVCpCLSQKIopLcsQESQKYALRSFQMApVIhsRDGCollppHs+-s+FYP.t-Gssul-ccc.sPGLSc-llpFLLpQshhKc....... 0 2 2 4 +14873 PF15022 DUF4522 Protein of unknown function (DUF4522) Bateman A agb Jackhmmer:Q96KX1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. In human this protein is known as C4orf36. 27.00 27.00 34.70 33.20 19.40 18.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.67 0.71 -4.38 3 26 2012-06-26 11:36:54 2012-06-26 12:36:54 1 2 18 0 11 23 0 97.20 68 81.83 NEW MAYGLPRKNTVKTILRGSCYKVQEPWDLALLTKTWYTNLANIKLPFLEEIoFGSPVpLpKspTpK-spLPSAESIKLEREYEsKRLsKLKCQENVuKEIQhSLRERPVGLRRPLPPK MAYGlPRKNTVKTILRGSCYNVQEPWDLALLsKTWYoNLANI+LPFLtEIsFGuslpLpKspThK-uLLPSAESIKLEREYEhKRLscLKsQENsucEIQh.LRcR.sGLRRPL.sK.............. 0 1 1 1 +14874 PF15023 DUF4523 Protein of unknown function (DUF4523) Bateman A agb Jackhmmer:Q7Z4U5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. 27.00 27.00 29.10 36.10 26.70 24.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.13 0.71 -4.73 4 34 2012-06-26 11:58:47 2012-06-26 12:58:47 1 1 22 0 14 28 0 139.70 58 77.70 NEW NLHKLLPN+LMElLaSa+SEEDK+KCENsEFSGLERILtRHQhPKEINLTPKPSpMPLW+RKh.NNhspGWKKC+LWsKsTKEPPMSTIVVRWLKKNMQPoEDLcSVhpRLSsFGPIpSVTlCGRQSAVVVF+DhsSACpAVoAFQSRsPGoMFpCuWQQRFMSK- ......................................NLHcLLPN+LhEhLaSh+SEEDKcKCE.NPEhSGLERILARHQLPKEINLTPKPs+MPsWKRKIINNlo-GWKKCHLhc+NhKEPPMSTIVVRhL.pKNh..pcsL..+sl.p+LpthsshtpsT.sG+ppshV................................................... 0 2 2 3 +14875 PF15024 Glyco_transf_18 Glycosyltransferase family 18 Eberhardt RY re3 Jackhmmer:Q09328 Family Enzymes belonging to glycosyltransferase family 18 (alpha-1,6-mannosylglycoprotein 6-beta-N-acetylglucosaminyltransferase) contribute to the creation of branches in complex-type N-glycans. This domain is responsible for the catalytic activity of the enzyme [1]. 26.00 26.00 26.60 26.20 25.20 24.30 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.84 0.70 -6.17 11 229 2012-06-26 12:02:26 2012-06-26 13:02:26 1 8 75 0 150 195 2 376.20 35 73.28 NEW CYA.-aGVD.GS.CSFllYLSEVEsaCP.h.tRt+ps.....s.......pp..sslRcslt.L...h...pcsphpah+pRIpRhWspWlpAucp.Lpc.ppshppR++h+lLValGhLusEsuh+huppuhpGGPLGELVQWSDLluoLplLGHpLclSsspspL+ulls.hhstspssssssscpphDLIaTDIhGLs.h+pphs.hhh.pa+C+lRlLDSFGTEs-FNhpsYspppsh......ppp..WGuasLphpQahTMFPHosDNoFLGFVV-pcs...........ps.++pspul.....VYGKctYMWc..spcchlcllp+ahclHATVss.tp.....pslPoh...VpNHGlLsupElppLL+csKlFlGLGFPYEGPAPLEAlApGsVFLNsKFcPP+SRhNpcFFc-KPTlRclTSQHPYsEtaIGcPHVhTVDIsNpc-lEtAlKcIlphK.lcPalPaEFTspGMLpRVsshlpKQsFCsp.s...................pWPPlsAL+lhtuspup.SCcpsCpspsLlCEPoaFPhINsppthp+...phsCsuscspss..lAPuh.....sCslQussLLFSCAussP...phpRlCPC .............................................................................................................................................................................hhh.Rh.th.........Wh..u....ht............................thplhhh.t.h..t......h...s.pGGPLGEhlQWsDl.ssL.hlGHtl.hs.p..ph.........................hhh.Dh.G...hpt.....h.......pC.hRllDoFGTcstash..h............tt...aG.hshp..Qa..ThaPHo.s.D..N.oFhGFl.pp..................p.ht..ullYGK.t..h.a..p.......sp......p......h.....lthl....pp.h..h.plH...uTV.....t..........hPsh....lpNHGhlst.-h..LLpcs.+lhlGhGFPhE.GPAPlEAlu.Gs..hFlps...chp...s.csp.s.thh..................tK..Ps.R.c..h.SQpP.Yhp.hlG..PaVhslshps.tthctslpthht..p..h.................p....PahP....h.....EaostGhLpRl.thhlpp.p.....h...............................................aP.Ph.ps.lph.hhu.....up.uC.psC..ppthhC-PshF.hlNppp.h.p...hth...Cps.......p.............................p....h.............hP.........t...........ttChh.p...tp..l..aSCsutt.......phpRlCPC................................ 0 54 67 108 +14876 PF15025 DUF4524 Domain of unknown function (DUF4524) Coggill P pcc Jackhmmer:Q96MH7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 197 and 638 amino acids in length.This is the N-terminal domain of the member proteins. The human gene is from C5orf34. 19.50 19.50 19.70 19.50 19.20 18.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.81 0.71 -4.73 7 59 2012-06-26 12:02:50 2012-06-26 13:02:50 1 3 49 0 40 58 1 132.60 45 25.25 NEW phllhsD-pVpspasDGspLpLSPCGStFlhEptss.stHPLptscpl+QRT+FslSpa+pplhpAL-FRNpasspPaLsppllpsE....Rptphhscloclc.WPssssss....hpstpsGpVploSlDGhApLhLsppQcEFTVcFlC+lup ......................................Mlla-D-SVpVpYhDG...opLpLSPC....GoEFlhc+sss.u............sHPl.ptscRlRQRTcFslSsa+ppl.pAL-FRNp.us......p.P.a.LspplIss.-...............+Kp.....p.lhhc.....hoEsc.WPs.ssss...........thhh...hpsGpVcIsSlDG+AhLsL.spsQcEFTVcFLCKlSp................ 1 8 13 23 +14877 PF15026 FAM74 FAM74 protein Bateman A agb Jackhmmer:Q5TZK3 Family This family of uncharacterised proteins are found in humans and are known as FAM74 proteins. Members of this family contain several short protein repeats. 27.00 27.00 31.40 48.50 25.10 22.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.11 0.71 -4.50 2 11 2012-06-26 12:07:12 2012-06-26 13:07:12 1 2 3 0 8 9 0 84.70 64 100.32 NEW MWRELRGCPGGDVETAQRLSQRRRGKSSEAVPEKTWRAQRMSQRRRGESSEAVPEKTWKELRNSETVPEKTWKQLRtCLQEDVpRVQRLShhhHhts.hhlhhth.hp.pGs+..ssTaL...hhht ........cLRsCPGtDhETAQRLSpRRRGcSSEAVPEKTWRsQRhSQpR..cSSEsVsEKTW+p...SEsVsEKoW+pL+tC.pEDVpRVQRLS.hhHhts..hlhhth.hp.pGs+..ssTaL...hh.... 0 8 8 8 +14878 PF15027 DUF4525 Domain of unknown function (DUF4525) Eberhardt RY re3 Jackhmmer:Q09328 Family This domain is found in eukaryotes. It is often found at the N-terminus of glycosyltransferase family 18 enzymes (Pfam:PF15024). It is also found in coiled-coil domain-containing protein 126. 27.00 27.00 63.10 62.30 23.90 22.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.68 0.71 -4.64 7 96 2012-06-26 12:10:43 2012-06-26 13:10:43 1 3 41 0 56 75 0 130.60 64 31.53 NEW shshshK..SQKLuhhLlsFGhIWGhMLLHaThQp.sp+pSSspLRpQILDLSKRYlKALAEEN+slMDGs.uuoMsuY.DLK+TlAVLLDsILQRLsKLEsKVD.llsNGousNhTNuTusshsslsssc+sssuuslp ...............hhhhshK..SQKLuhhLlsFGhIWGhMLLHaTlQQ.spppSSuhLREQILDLSKRYlKALAEEN+Nl.VDG....s.uusMsuY..DLK+TlAVLLDsILQRlsKLEu...KVD.lVlNGousNoTNuTo.slss.lss.c+lNsus.I........................ 0 5 8 21 +14879 PF15028 PTCRA Pre-T-cell antigen receptor Bateman A agb Jackhmmer:Q6ISU1 Domain The pre-T-cell antigen receptor (pre-TCR), expressed by immature thymocytes, has a pivotal role in early T-cell development, including TCR beta-selection, survival and proliferation of CD4(-)CD8(-) double-negative thymocytes, and subsequent alpha/beta T-cell lineage differentiation [1]. This protein contains an immunoglobulin domain [1]. 27.00 27.00 31.40 67.40 26.90 26.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.07 0.71 -4.65 3 29 2012-10-03 02:52:13 2012-06-26 13:13:27 1 1 19 3 13 51 0 146.70 75 63.73 NEW GVGGTPFPSLAPPITLLVDGKQQTLVVCLVLDVAPPGLDSsIWFSAGNGSALDAFTYGPSPAsDGTWTSLAQLSLPSEELAAWEPLVCHTGPGAGGhSRSTQPLQLSG.EASTARTCPQEPLR...............GTpGQsLRLuVLRLLLFKLLLLDVLLTCSRLC .........GVGuTPFPSLAPPITLLVDGKQQhLVVCLVLDVAPPGh-SPIWFSAGNGSuLDAFTYGPSPAsDGTWTuLAQLSLPSEELAuWEPLVCHTGPGAtG+SRSTQPLQLSG..EASoARTC.hEPLR.................Gh.utsLhLGsLRLLLFKLLLhDlLLTCS+L........ 0 1 1 1 +14880 PF15029 DUF4526 Protein of unknown function (DUF4526) Bateman A agb Jackhmmer:Q8WUU8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals and includes the human integral membrane protein TMEM174 protein. 27.00 27.00 175.40 175.30 21.50 21.00 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.83 0.70 -4.78 5 34 2012-06-26 12:24:07 2012-06-26 13:24:07 1 1 32 0 23 31 0 233.20 66 96.34 NEW -DFulNVFSVoPYPu.SpSDupVSDGDKAGATLLFSGIFLGLVGITFTVMGWIKscGpoHFEWTQLLGPILLSVGVTFlLISVCKF+ML.SCKPCKpoEEss...LDhEQoSuGQSFVFTGINQPITFHGATVVQYIPPPYuopDssGGso....PllSNsNulaSuau..sssusGPPQYYsIYPMDNPAFVpD-ssPupluts-....lcsSpPDAuhp-E-tLGsscosEhSPPuYEEIFPTsR .EDFPlNVFSVTPYTP.STADIQVSDDDKAGATLLFSGIFLGLVGITFTVMGWIKYQGVSHFEWTQLLGPILLSVGVTFILIAVCKFKML.SCQhCKESEERl....Do.EQTsGGQSFVFTGINQPITFHGATVVQYIPPPYuSpEPhGhsosYLQsslsPCGLlsSGGA..sushPSPPQYYTIYP.DNuAFVsDE.shPuas.suus.....cRsssDAcpLE.EspL.t--.ssssFSPPPYEEIYuls.p.. 0 1 2 7 +14881 PF15030 DUF4527 Protein of unknown function (DUF4527) Bateman A agb Jackhmmer:Q6ZRC1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. 27.00 27.00 33.40 33.40 26.90 23.80 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.79 0.70 -5.09 5 28 2012-06-26 12:29:08 2012-06-26 13:29:08 1 2 22 0 14 26 0 239.40 52 67.78 NEW QLRDsEA-soEEDLRLRVQQL+HQVLTLQCQLRDQsuAHRtLQAuh-EATsLQDcLQuKL-ELQKKQHEANLAVoPLKAKLASLVQKCRERN+LITHLLQELHRHGluNHLLSEhAQuMVsDVALAEYAATFLAPGlPETSHH..LDVESEcTAsc+A.QKYLLNPEtDSV..LQssLpSESWPlPEAEWPAQTApLDS.hKLPLPSGsTPsPGTC.AuVAVEPuLPAQsL+EcGGsSCPl.LpADsLPPPsELLSPARILAFHQELRQSICSNSQVNKSPLE ................................................pDsEApssEE-.RLcsppLHHpVLTLQCQLRDQuuAH...QAuh-E.As+LpccLpscL-ELQKKQHEApLAVTPLKAKl.ASLVpKCpERN+LITHLLQEL+RHGhsNhLLSElAQsMlsDVALAEYuATFLuPGlPETSHH..LDVcSEhTAshRA.QphLLNPchDSV..lQpshpSEShPlPcsEWPApsApL-S.lK.LPLs.ssT.DPGsC.AusssEsGLsAQpLQEcGGhsCPs..LpssslsssSELLSPARILAFHpELRQSICSNSQVpKSPLE................................................... 0 2 2 2 +14882 PF15031 DUF4528 Domain of unknown function (DUF4528) Eberhardt RY re3 Jackhmmer:A6NNL5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 95 and 154 amino acids in length. This family includes Human C15orf61. 27.00 27.00 38.50 38.50 19.70 19.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.83 0.71 -4.38 13 65 2012-06-26 12:31:18 2012-06-26 13:31:18 1 1 59 0 42 60 0 119.40 64 85.25 NEW KPtASEVLTuaL+QRscPsWTSYFV+YpcVtNDQaGhSHFNWsV.sGsNYHILRTGCaPaIKYHCT+RPhQDLohEDpFFphlKllNLGlPsLhYGlAAhhLI+HpEhVcsuc..GsVsIYFLhcED+G ...............+PpASEVLTpaLhQRphP.WTSahV..YusVpNDQFGhSHFNWsV..pGuNYHlLRTGCFPFIKYHCSKtPhQDLuhpD+FFpsLKVlNLGIPsLhYGLuuhhhh+hTEhV+TSh..GPVTlYFL.KEDcG...... 0 10 14 29 +14883 PF15032 DUF4529 Protein of unknown function (DUF4529) Bateman A agb Jackhmmer:Q6P387 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. The proteins contain a conserved VLPPLK sequence motif. 27.00 27.00 43.80 43.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.48 0.70 -5.87 5 37 2012-06-26 12:32:13 2012-06-26 13:32:13 1 1 27 0 20 36 0 334.60 49 98.13 NEW MDL...CpKsET-LENuENsEI-SoEETELTYTCPDERSEKNHVCCLLslSDITLEQDcKApEFlIuTGWEEAVcGWGRTSPTACIWPRKKsKKARVGEuAs..SsCLLCssLSpGSLEuRP.opuuK......AEsGPEKDpGSsSQTpusPQGPosASRE.INKICFPTYhpGEKKSLQIKEFIWChEDWAoPETlRGKssRsPSpGscphLSISDuLTSRALLVLPPLKuSssNuLDVLGKKSKNhFLQPEEKVLuVEKDECVAhsYGLKoVDGKGEKpssELA+Hs+VsDhhPFPPsVApTsLL.A-sEpCCLHWSLLPEKNLlCPPsPoNl+YLATlQLLQKQGsQNYKA+hKA+EPRPPhpTpK+lLTEAKQENRPQMLETKVFPRPLLPSLTVSRVVIPVSTHRlL ....MDl...CQKsET-LE.tcssEIppsEETp..ohTCPDt+SE+sHVhCLLslSDlTLEpDp+A..pEFhIGTGWEEA....VpGWG+sSPsACIW.P.RKpsKKu+sGEus...SsCLlChsLsp.....ho...h..-s+P.hps...............sspcspuossphpusstu.tsuSRt..hsplshPs.hpuEKKSLQlKEFIWC.ccWshPpo.+s.Ks.ts...sstsspp.t.ohsssLoS+ALLVLPPLKuS..NsLDlluKKo+s.hhpsEEKshsVcK-tshuts.GhKos-GKGEKR..E.....LApH.hVsDh.s.PsssApTsLL.scsEpCCL+WSLLsEKshhsPPs...s...ssl+YLAsLQLLQKpGhQsaKs+h+A+-PRsPh.opK+hh.cAKQEsRPphLEoKVFs+PLLPSLTVsRVlIPh.sHRhL........................................................................................................................... 0 1 2 5 +14884 PF15033 Kinocilin Kinocilin protein Bateman A agb Jackhmmer:A6PVL3 Family This family of kinocilin proteins is found in vertebrate. In mouse it has been shown that this protein is expressed primarily in the kinocilium of sensory cells in the inner ear [1]. 27.00 27.00 34.50 34.30 22.90 19.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.60 0.71 -4.60 6 31 2012-06-26 12:53:32 2012-06-26 13:53:32 1 2 22 0 16 38 0 105.50 75 99.54 NEW MDIPISoRDFRCLQLACVALGLVAGSIIIGVSVSKAAAAVGGIFlGAAGLGLLlhA.YPhLpu+FphshlhPsI.........GsLRIHPpsGPDHGEGRSSsNuNKEGARSuLSTVoRTLEKLKPGGRGTEEG ...MDIPIS.oRDFRCLQLACVALGLVAGSIIIGlSVSKAAAAhGGlFIGAAuLGlLlhA.YPFLKARFNLDHILPsl..........GsLRIHPpsGPDH.GEGRSSsNGNKEGARS....SLSTVoRTLEKLKPGs......RGsEEs.................... 0 1 2 4 +14885 PF15034 KRTAP7 KRTAP type 7 family Bateman A agb Jackhmmer:Q8IUC3 Family This family of keratin associated proteins are found in vertebrate. 27.00 27.00 123.30 123.20 21.10 17.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.17 0.72 -3.34 5 24 2012-06-26 13:00:44 2012-06-26 14:00:44 1 1 19 0 12 27 0 85.90 84 100.00 NEW MTRFFCCGSYFPGYPCYGTNFHGTFRATPLNCVVPLGSPLNaGCGCNGYSSLGYuFGG.SNFsNhGCCYGGSFYRPWGSGSGFGYSTY MTRFFCCGSYFPGYPsYGTNFHRTFRATPLNCVVPLGSPLNaGaGCNGYSSLGYuFGG.SNFsNLGCsYGGSFYRPWGSGSGFGYSTY... 0 1 1 2 +14886 PF15035 Rootletin Ciliary rootlet component, centrosome cohesion Coggill P pcc Jackhmmer:Q86T23 Family \N 25.10 25.10 25.40 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.00 0.71 -4.30 29 212 2012-06-26 13:02:07 2012-06-26 14:02:07 1 5 88 0 120 189 1 174.70 35 10.71 NEW ++Rl-AspEpQccpAsLhutLQpK.....ltpY..Rp+hu-lEppl....................ssp..........+.sccphshslcc................................hls+LEEEcpRs-p.......LR.ph-ptphpNppLps-lp+Lpppapt...plppKEchappcEcshspYlssEpp+.hhsLWp-lppVRRQhuEh+spTERDLpp.+sEas+ss+sl ...............................................++Lps.pEuQpcQApLVp+LQuK.....lhQY..+..p..hCt-LEppl............................................................................ttp...t....p..hpstpc...sL-p.......................................................................................................hLhRLEEEpQ..RspsLspVN..s.LRtph-puphsNcuLpcDlpKLTs-asphpcELtp+E.spap..pEpct.....ap.sYhp....sEHsR.LLtLWRpVVshRRph..tEh+ssT-.RDLtch+sEhsRhutp............. 1 24 33 72 +14887 PF15036 IL34 Interleukin 34 Bateman A agb Jackhmmer:Q6ZMJ4 Domain \N 27.00 27.00 161.00 160.70 18.60 18.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.31 0.71 -4.70 5 39 2012-10-02 01:28:15 2012-06-26 14:07:47 1 1 27 13 17 42 0 160.20 69 72.21 NEW GLElWslAus-ECulTGaLRDKLQY+NRLQYMKHYFPINYRVuVPYEGVLRsANITRL.....p+ApVSpRELRYLWVaVSLsATEpVpEVLLEGHPS.WKYLpEVcTLLccVcpuLu....pDVEls.P+VEulLoL...LopAsG.Sl.KLVRPKALLDNCaRVMchLassCC+pSSl .........LEhWPLTQs-ECslTGaLRDKLQYRNRLQYM........KHYFPINY+ISVPYEGVhRlANlTRL.....QRAcVSERELRYLWVLVSLSATESVQ-VLLEGHPS.WKYLpEVpTLLLsVQpuLh.....DVEVu.P+VEuVLSL...Ls.APG.sL.KLVRPKALLDNCFRVMELLYCSCCKQSS....... 0 1 2 4 +14888 PF15037 IL17_R_N Interleukin-17 receptor extracellular region Eberhardt RY re3 Jackhmmer:Q6ZVW7 Family This domain is found at the N-terminus (extracellular region) of interleukin-17 receptor C and Interleukin-17 receptor E. This is the presumed ligand-binding domain [1]. Human putative interleukin-17 receptor E-like consists only of this domain. 27.00 27.00 28.40 28.00 24.50 23.80 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.55 0.70 -5.48 7 139 2012-06-26 13:28:15 2012-06-26 14:28:15 1 5 37 0 62 140 0 299.40 33 61.25 NEW sluC.h....pT+C.Ls.Rhssphssputh...c...t.....cs.h..................pGlpsuhhphhV.tShpua.hthCthachsssAphplhup...Sptut+hsshpsslup+shhop+TQPs.scsh.......SQ+ptsP.................cauasl.s-scslpVTls...susEhslRLCapWslpCp-hupPhss.K.loGs+sVsLsYphLLPCLCIEu..Lp.DoVRpchCPFps.PcAahp-hWcuh+asshSt...hVhtLshhCPLKlcAoLC.Rps.to.CcsLssAhupEu...-thhlhptVDhHPpLChKFShtsuoalcCPapss..ohssWsssMshpspQhhLchSShstssFSuuhshPt.u.st.h.ss..hSpspsps.Pssh-LllshL+sGsCl.VWRsDV.FuhhtL.Cs .............................................................................................................................pt...p.s.hph.s.........thChhhch.ssut.hh..up.....s.tshhhsshpsslupcshhhphTpPphtcth........s.ppthP.......................a.hsl.s-ucslpls...ls...su.-.phtLsh.W...psps.spPhhh.+.lousptlsLsap.LlPCLCIp.s.h...h.p.....DslR..pphCP..Fpp.....scAa........ps.............hWcsschp.ho.........shthps.Csl.s.csuLC.pts...ts...CpsLsssht..ps...pthh.h.hlchHPpLCh.phs..p..p.p..lppsh.ts....shs..shpssh.h.......p.tohh.ht.Ssshs....................o...s.s..h.hp..lh...t.t.C..h................................................. 0 3 6 22 +14889 PF15038 Jiraiya Jiraiya Eberhardt RY re3 Jackhmmer:A6NGB7 Family Jiraiya inhibits bone morphogenetic protein (BMP) signaling during embryogenesis [1]. The human member of this family is TMEM221. 27.00 27.00 33.30 28.20 22.70 22.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.09 0.71 -4.85 11 78 2012-06-26 14:47:06 2012-06-26 15:47:06 1 3 63 0 58 82 0 135.70 31 46.85 NEW LsuLuLLsllSllhAlLSLIhLL+lpssstpstt.hhtstt.th.ss.....-thhhlY-lohALsALuLoLNLsCLLVCAlQhhFAs+llRus.sssuRs.....spaLtcSpssRpsAluuFFluIsVaLoulhLYohlpFcspPuIsoShllGsGllF.......CsuuMlHslalWp+tcspuh+phshs ......................hhhhsl.uhlhuhluh.hlhphps..ht.....................t.h.hlh.lshsLsuLsLsLslsslllsslphhhsschh+s......tRs..................shaL.cs.ph.RhsAluhF.hhulsVaLsul.lYs.hl...Fc.psuhssu.llGsGhlh.......h.sssh.sHslhhht+tpp.uhpt...s................. 0 14 20 42 +14890 PF15039 DUF4530 Domain of unknown function (DUF4530) Eberhardt RY re3 Jackhmmer:A6NGS2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically around 140 amino acids in length. The human member of this family is C19orf69. 27.00 27.00 77.20 130.50 23.20 21.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.59 0.72 -4.16 4 23 2012-06-26 14:53:34 2012-06-26 15:53:34 1 1 18 0 9 24 0 111.20 69 85.98 NEW MELW+QL+QAGLVPPGLGPPP+ALRtVPPVtpsGQTLhSsGADTGGAREpLLWIWEELGNLRRVDVQLLGQLCSLGLEMGsLREELVTlLEEEEEppppEEcp......tPEcKQE ..MELW+QL+QAGLlPPGLGPPPpALRtlPPs-pPGQTLhouGADsGGARESLLWIWEELGNLRRVDVQLLGQLCSLGLEMGALREELVTILEEEEEsscEE.EEs.....ppsppKQ-............. 0 1 1 1 +14891 PF15040 Humanin Humanin family Bateman A agb Jackhmmer:P0CJ71 Family This family of proteins is found exclusively in humans. Humanin is a short anti-apoptotic peptide that interacts with Bax [4]. 27.00 27.00 43.70 43.50 24.20 17.70 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.62 0.72 -6.71 0.72 -4.70 9 21 2012-06-26 14:56:28 2012-06-26 15:56:28 1 1 4 2 11 26 0 24.00 75 97.68 NEW MAsRGFSCLLLSTSEIDLsVKRRh MAsRGFSCLLLssSEID.LssKRRh. 0 11 11 11 +14892 PF15041 DUF4531 Domain of unknown function (DUF4531) Bateman A agb Jackhmmer:A6NCJ1 Family This family of uncharacterised proteins is found in mammals. This family includes the human protein C19orf71. 27.00 27.00 85.80 85.80 18.10 17.50 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.23 0.71 -4.52 3 25 2012-06-26 14:58:44 2012-06-26 15:58:44 1 1 20 0 15 25 0 164.20 65 88.51 NEW PcsLYSDDYLSLEGPRWsPAIRQAVRWKYTPMGRDAAGQLWYTGLTNSEoREAWYsLPRALDSPYREAYsRWHGCaQ+RERoMPSAYTQHLRETAWaDPlIPAQYpsPSTRWGSTLWKDRPIRGKEYVVNRNRYGVEsPWRuSDYVPhLSAPQRPRaTTQsYRQWGLEPYCPSTsQRPPPuaTP ....PsPLYSDDYLSLEGPRWsPAI+QATRWKYTPMGRDAAGQLWYTGLTNS-spEAWYsLPRA.sSPaREAYsRWHGCap+RE+ohPSAYTQ+LRETAWaDPllPA.QYhsPSTRWGShLWKDRPIRGKEaVlNRpRaGlEs.WpuSDYVP.LSsPQRP.hTsQsYRpWsLEPYCPSTsQts.P..TP..................................... 0 1 1 3 +14893 PF15042 LELP1 Late cornified envelope-like proline-rich protein 1 Bateman A agb Jackhmmer:Q5T871 Family This family of uncharacterised proteins is found in mammals. 27.00 27.00 60.80 30.30 26.70 25.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -11.23 0.72 -11.72 0.72 -3.87 3 23 2012-06-26 15:02:28 2012-06-26 16:02:28 1 2 18 0 11 14 0 85.90 71 91.06 NEW MSSDDKsKSuDPKNEPKNCDP+CEQKCETKCQPSCLKKLLQRCSEKCsh-KCPsPPKCsPCPPCPP.........SsPssPhCPP.CsPPCPuP...CPPSCPPKPCVKPCPPKCPS....PCPPPE .MSSDDKsKSs-PK....sEPKNCDP+CEQKCEoKCQPSCLKKLLQRCSEKCPREKCPsPPKCPPCP.s..ss.........................Cs..Cs.....CP..CP........................ 0 1 1 1 +14894 PF15043 CNRIP1 CB1 cannabinoid receptor-interacting protein 1 Eberhardt RY re3 Jackhmmer:Q96F85 Family This family of proteins interacts with cannabinoid receptor 1 (CNR1) and attenuates CNR1-mediated tonic inhibition of voltage-gated calcium channels [1]. 27.00 27.00 28.40 30.10 25.70 17.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.93 0.71 -4.96 11 73 2012-06-26 15:23:00 2012-06-26 16:23:00 1 1 58 0 49 64 0 150.20 52 97.72 NEW Mus....hstlh+lolSL+hpPssuPVFaKVD.GpRFuQs.RTIKLLTsSpY+l-VshKPush..pshulGGsslsLEp.po+..DtppsVYou.asT-GlssoKSG-RQPlplslpFsshGthcss.......hQsKaYshpcp-H....CpWGsshpsIEaEC+ssEsRolhhl.........pKEsF+ ................Mu-lPsll+loluL+hpPN-GPVFaKVD.GpRFGQ...........s.RTIKLLTGSpYKlEVplKPsTlp..sp..sluIGGlhlPLE.+o+...DspplVYoGhYDTEGVssTKSG-RQPlQlsh.FschGsFETs.......WQVKaYNYcKR-H.......CQWGssFssIEYECKPNETRoLMWlNKEoF.h............. 0 10 14 33 +14895 PF15044 CLU_N Mitochondrial function, CLU-N-term Coggill P pcc Jackhmmer:Q96NS8 Family CLU_N is the N-terminal domain of the Clueless protein, also known as TIF31-like in other organisms. The function of this domain is not known. It family is found in association with Pfam:PF13236. 22.60 22.60 22.60 22.60 22.20 22.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.42 0.72 -3.82 51 327 2012-06-26 15:24:58 2012-06-26 16:24:58 1 45 226 0 243 326 1 76.20 35 5.87 NEW lss..p-plp-l+phLhpt..sposhhTsapL.p..h..c..G.tp......l.scas-lspltsl.......ps....ss....p..Lpll..c-..sYsp.+pAchHlpRlR-ll ................................................ssp-pVp-l+QhLh-p.ssTsahTsFsLp.h..c..Gpp........L.ccasElpslpsl...................pt........ss....pLplV...........E-PY.Tt.+-ARhHVp+lR-Ll......... 0 60 122 197 +14896 PF15045 Clathrin_bdg Clathrin-binding box of Aftiphilin, vesicle trafficking Coggill P pcc Jackhmmer:Q96F83 Family Aftiphilin forms a stable complex with p200 and gamma-synergin. This family contains a clathrin box, with two identified clathrin-binding motifs. This family of proteins is found in eukaryotes. 22.50 22.50 22.50 25.00 22.10 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.52 0.72 -4.47 36 120 2012-06-26 15:31:09 2012-06-26 16:31:09 1 2 65 0 69 134 0 73.40 46 10.68 NEW EhhcVWspLQDlcsuauLRYQWuuSHSNKpLLsSLGIDoRNILFoGsKp.......pslhVPhaAAGLGM..LEPTKtslc.P ...t..hclWppLQDIcsApuL+YQWuuS+ss+pLLsSLGIDoRNIhhoGsKt.......pslhhPhaAAuLGh..LEPsKt.lp..s...................... 0 12 17 38 +14897 PF15046 DUF4532 Protein of unknown function (DUF4532) Bateman A agb Jackhmmer:A6NCN8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 52.20 37.60 22.60 19.00 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.77 0.70 -5.41 5 36 2012-06-26 16:18:15 2012-06-26 17:18:15 1 1 29 0 24 26 0 228.90 52 89.88 NEW MVHApEoLPTsQTWAQREFLLPuEopEaPGFTpQAYHQLALKhPPCT-hKuKVRQRLhpPWKDAAQHTWGFHTWLDVGRLPATFPTRPDRPYDSNVWRWLTDScAHRpPPAEPPIPPPSWMGQNSFLTFIsCTPIFVDhNRKKQVIlRTVKELKEVEKLKLRSEARAPPLDAHGNILPPKNFKKYRHISAGGRaEPQGLQLMPNPLPNNFARSWPCPNPLPHYQEKlLKLALLPSAPLSQDLVRsYQTLIEsRVALPLHHLS+ApPGKTssRKhKRRPG .................MhpApE...s.pTWsQR.EFhLPscohchPGFT.QuYHpLALK.PPsT-hKScVRpRLhpPWKsu..s...p..H.TWGFHTWLDVGRLPATFPoRPD+PYDSNVWRWLTcop....AH+pP..........P...........uc...P..I..PPPSWMGpNSFLoFIpstPlFl.DhpRKpQVIhRThKEL+E..lEKLKLRSEuRAPPLDupGNIlPPtsFKK...ahss.................................................................................................................................................................. 0 5 6 9 +14898 PF15047 DUF4533 Protein of unknown function (DUF4533) Bateman A agb Jackhmmer:A2RU48 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. This family includes two human proteins: C12orf60 and C12orf69. 27.00 27.00 52.20 52.10 25.70 25.50 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.35 0.70 -4.93 4 54 2012-06-26 16:22:02 2012-06-26 17:22:02 1 1 28 0 35 55 0 208.40 41 92.27 NEW c+pcLlpht+phF.pMpD.hphTNpLhElhNuphsspItpIpMKEDusVK-sh-.Ilphh+ElQstlpthpspMpcc........h.sohhcK.pslKEhpppthtlh+pVhosllspshsoush..lKhLpSSLohlhhs.lhsL............tp.uuol.tuhSsus.tlGhs.IL+tl.sAlcpsplpssl+Sht-pLtphhcs.c.hhchlpcshsTlc .c+pcl.phtpphh.plpD.hssTNpLs-lhNtphssplh.lphKc-uoIK-sh-.l...lpshpchQptlpphc-slpcc............h.solhcKhpslKEhcppthslhppVhosllspAhsuush..lchltSslophhh..lhslh..............tp.u....so.htShus.us.thshc..hl+tl.sAlcppphpssI+ShtccL.phhpA.c.hhcslpcshpTlc....... 0 2 2 8 +14899 PF15048 OSTbeta Organic solute transporter subunit beta protein Bateman A agb Jackhmmer:Q86UW2 Family \N 27.00 27.00 39.00 37.80 26.70 22.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.64 0.71 -4.05 9 37 2012-06-26 16:25:08 2012-06-26 17:25:08 1 3 31 0 24 34 0 122.30 45 82.98 NEW -HSpssptAsssshVPQEL.......................LEEMLWaFRsEDAoPWNYSILsLusVVslISFlLLuRSIpANRNpKhp....ss-KppPEs.pLs-u.h+-sssLshLRET..LLSEKP...sLAQ.............sEhElcp+Dssh.shLsDP .........................pt.pss.tsssussVPQEL.......................LEEMLWFFRsEDAoPWNaSILALusVVlhlShhLLuRuIpANRpcKhp........s.-KpsPEshphsEu.....t.h......+.-csuLss....LcET....LLSEKP...sLsp.............sphELK-+Ds...shhs-.......... 0 1 1 6 +14900 PF15049 DUF4534 Protein of unknown function (DUF4534) Bateman A agb Jackhmmer:Q8N7C4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. Proteins in this family are typically between 170 and 190 amino acids in length. The protein includes the human integral membrane TMEM217 protein. 27.00 27.00 73.40 73.10 19.20 19.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.93 0.71 -4.61 10 43 2012-06-26 16:29:12 2012-06-26 17:29:12 1 1 23 0 28 31 0 158.00 42 85.33 NEW u+MhSlLsGlFSllsTp.aLIFEhpalsphshp-phshYpsTpshltsallsaphsIshsLShlTIllSCFLLYslasphYhGLLhYslWIlhYEhlNhsl.lLTNs..cpphKElphL+Whh.lSRhlLHhFhL.FVlpHAYhlYKspppssllua+RRhSs .A+hGolLuGlFoIhsTh.aLIFE.palsphssp.-p.hshhpsspshlssallsaphsIslhLShlTIllSCFLLYSVasplYpGLllYslWIlhYEhhNhsl.lLTNs..cpshcEl+hh+WhGhlSRhhhHhFhl.FVlpaAahlYKspppssllua+RR.Sh............ 0 1 2 2 +14901 PF15050 SCIMP SCIMP protein Bateman A agb Jackhmmer:Q6UWF3 Family This family contains the SCIMP proteins which are a a transmembrane adaptor protein involved in major histocompatibility complex class II signaling [1]. 27.00 27.00 29.60 28.50 26.90 26.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.74 0.71 -4.26 4 30 2012-06-26 16:34:01 2012-06-26 17:34:01 1 3 24 0 16 30 0 121.40 61 67.48 NEW MsWWRsNFWlILAVAIIsVSsGLGLILYCVCRh.LRQG+KWElAKPLcpcpRDEEKMYENVhNpssVQLPPLPPRuh..PEcouPQEsPSQPsAsYS.VNKV+NKKsVuIPSYlEPEsDYDDVEIPAshEpp+ .....MsWWRsNFWIILAVA.IIlVSluLGLILYCVCRhpLRQGKKWEIAKPLKpcQtDEEKMYE...NVlNpSPVQLPPLPPRsh.S.Ecs...SPQEsPS.tP.PATYSLVNKl+NKKsVSlPSYlEPEsDYDDVEIPANhEpt............................ 0 1 1 2 +14902 PF15051 FAM198 FAM198 protein Eberhardt RY re3 Jackhmmer:Q9UFP1 Family This family of proteins is found in eukaryotes. The function of this family is unknown. Murine FAM198B is downregulated by FGFR signalling [1]. 25.00 25.00 27.50 26.70 24.10 23.30 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.23 0.70 -5.74 8 108 2012-06-27 07:44:34 2012-06-27 08:44:34 1 3 42 0 61 111 0 277.00 52 64.22 NEW usIRlYutpuPPWFSc-DIpsMRhLADupVsuhs+.........lPu+uts.hllLc.......pussssuptCssp.CGlIKRPhDhoEVFAFHLDRVLGLNRoLPAVuR+Fco.......hssGpspPVlLWDPSlp...ssss-psShpLsWspYQphLKp+ChtsGpl.....PpsphsCosIHHpEWu+LALFDFLLQIa-RLDRsCCGF+PcspDsCVppGLHt+CcNpcclpLsHIlpR+pDP+HLVFIDNpGhFc+sEDNLNFRLLEGIcEFPEoAVSVLpS.u+LRp+LLQSLFlDQhYWESQGGRpGl-+LIDVIE+RAKlLLpYIptHsl+llsMN .....................................................................................................................utpsPsWhoccDlpth+LLAputVssht..........ls..u+utsh..lshp..............ts....s.s..hs.hCu.u.CGLlKpPhDhsEVhAFHLDRlLGLNRoLPuVuR+hc..............hpDGpspPlIh..Wss...slp...sss...spsp...sSltLsWhpYQpLL+p+Ch..sGps........sp.sCstIHHpEWu+hALFDFLLQlasRLDp.CCGFcP...c.pDsCVpptL+.KCcs.s.pltLsHIl.RppDPpHLVaIDNtGhhp+sEDpLNF+LLEGIcpFP-SAVpVLpS.tpLpphLLpSLhhDpVaWESQGGtpGlcpllcslEpRuplLlpaIptHshplh................. 0 8 13 29 +14903 PF15052 TMEM169 TMEM169 protein family Coggill P, Hetherington K kh6 Jackhmmer:Q96HH4 Family This domain is thought to be structured transmembrane helices and includes the intermediary cytoplasmic domain. It is found in eukaryotes, and is approximately 130 amino acids in length. 27.00 27.00 98.30 97.90 25.10 25.10 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.81 0.71 -4.25 9 66 2012-06-27 09:45:27 2012-06-27 10:45:27 1 1 59 0 48 53 0 133.10 58 41.06 NEW pt+psCphGlspG.HllLWSllClPlVFllSFlhSFYYGTlTWYNlFLhYNEERTFhHKIolCPhLIlhYPllIhssoluLGlYuAlsQlSWtaspWhpslpDhEKGFhGWlCuKLsL-DCSPYslVELhDs-s ....p..ppsCpsGscpGPHllLWoLlCLPlVFllSFlsSFYYGTlTWYNlFLVYNEERTFaHKIohCPhLILFYPlLIhshuhuLGLYuAVsQLSWuauuWWpuspDhEKGFhGWLCSKLGLEDCSPYolVELL-SDs... 0 9 14 28 +14904 PF15053 Njmu-R1 Mjmu-R1-like protein family Coggill P, Hetherington K kh6 Jackhmmer:Q9HAS0 Family This protein family is thought to have a role in spermatogenesis. This family of proteins is found in eukaryotes. In humans, it is found in chromosome 17 open reading frame 75 (C17orf75). Proteins in this family are typically between 217 and 399 amino acids in length. 27.00 27.00 50.10 33.60 20.50 19.70 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.26 0.70 -5.70 4 77 2012-06-27 09:53:00 2012-06-27 10:53:00 1 2 48 0 46 60 0 278.80 51 84.02 NEW asLYuYRuuRhoQptuDo-DGpsuGsss-oPSG-DFSLSLVDTNLPuEsEsELRSFIAKRLSKGAVFEGhGNVASVELpIPtY+VGCYYCLFQQEK.LPEsAshESEpNssEYVVCFLGGSEKGL-..LFRLELDKYhQuLKsshssEp+sLEsclpsYLsSWFEssVCPIQRVVhLFQEKLAFLLHAALSYTPVEVKpuDE+Tc+DINRFLulASLQGLlpEGTMTSLChAMTEEQH+SlIlDCSGsQPQhHNAGSNRFCEDWMpAFlNGAEuGNPFLFRQlLENFKLKAIQDhNNLKRFIRQAEMNHYALFKCYMFLKNCGSGDILLKIVKVEHtEMPEA+sVVsVLEEFM+Eu .......................................t.......t.t...stss........tth......t.ss.....stpDF.S...LoLhDosLPuEsEPELRoaIuKRLSK.GAlatGhGNVAoVELpIPt.t.VGCYhCLhpp..E+..sE.t.....ssh-u..p......t.....ss-YVlCF....LGGS..EKGLc.......hF+.L...ELDKYlQGL+ssh....s.sEp.p..sL-scl..+.sYLspW.aE-sVt.IpRVV.LhQcplsFL.LHAALSaT.VEVp..poD..-+TKpD....lpRFlpsASLQGL...p...ps...........T.h....s.SLCh..AhoE-p+psl..llDC.S..ss.Pphh..N.A.s..........S..N+FC-DWhpsa.Ls..us-tuNPFLhRQlLENFKL.KAIQDhNsLKRFIRQAEhsHYALF+CahFLpsCGsGDlLLp.hs+sEH..p..-hPEApsllsVLEEFhpE............................... 0 16 19 28 +14905 PF15054 DUF4535 Domain of unknown function (DUF4535) Coggill P, Hetherington K kh6 Jackhmmer:E0CX11 Family This family includes the uncharacterised protein C7orf73 that is found in eukaryotes. Members are generally less than 100 residues in length. Although the precise function of the domain is still unknown, members have a predicted N-terminal signal peptide sequence which suggests they are short secreted peptides. 28.30 28.30 28.30 29.00 25.40 27.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.32 0.72 -4.55 29 115 2012-06-27 09:58:10 2012-06-27 10:58:10 1 2 61 0 77 131 0 45.20 40 60.68 NEW +ushoFhhGsssGlYlAQNYpVPNl...pKLhpshh.hAKclE-pY+...KP .........shhsFhsGsssGlYlAQNYpVPNl...pKLhpph..htKcl--ph+K........... 0 16 35 54 +14906 PF15055 DUF4536 Domain of unknown function (DUF4536) Coggill P, Hetherington K kh6 Jackhmmer:Q96GE9 Domain This domain family is thought to be a transmembrane helix. It is found in eukaryotes, and is approximately 50 amino acids in length. In humans, it is located in the chromosomal position, C9orf123. 27.00 27.00 27.50 27.30 25.50 25.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.28 0.72 -3.89 11 53 2012-06-27 10:01:04 2012-06-27 11:01:04 1 1 48 0 35 58 0 46.50 40 47.33 NEW -ChuCRllSGsGLIGuGsYVahpA++R.p.stphuhholsphssulG .sChuCRllSGhGLlGuGuYVahtA++..h.K.shs.u.tolsphshulu........... 0 5 11 20 +14907 PF15056 NRN1 Neuritin protein family Coggill P, Hetherington K kh6 Jackhmmer:Q9NPD7 Domain The domain family Neuritin1 (NRN1) is a GPI-anchored protein expressed in post-mitotic-differentiating neurons in the developing nervous system [1]. NRN1 is a glutamate and neurotrophin receptor target encoding a neuronal protein that functions extracellularly to modulate neurite outgrowth (OMIM:607409).\ \ This family of proteins is found in eukaryotes. Proteins in this family are typically between and 158 amino acids in length. 27.00 27.00 29.80 29.20 25.40 22.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.22 0.72 -4.03 10 92 2012-06-27 10:04:05 2012-06-27 11:04:05 1 2 40 0 51 83 0 84.20 50 38.75 NEW C-ulaKGFSDCLLpLGDsMuNYsQcl--cpslpoICoaWDDFHsCAsTALuDCQEsAu-lWEpLRpES+plsFQGSLFELCuuuouAssu .......CDslaKGFu-CLlcLGDuMus.sp..t.......p.sppplcTlCp...W-DFHuCssoALssC..EtAsslWEpL+pES+phshpGsLa-LCuusst...ss........... 0 2 9 25 +14908 PF15057 DUF4537 Domain of unknown function (DUF4537) Hetherington K kh6 Jackhmmer:Q9NQ32 Domain The function of this domain family is unknown. It is found in eukaryotes, and is typically between 119 and 141 amino acids in length. In humans, it is found in the chromosomal position C11orf16. 27.00 27.00 27.00 27.10 26.80 26.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.61 0.71 -4.34 24 111 2012-10-02 16:56:36 2012-06-27 11:18:52 1 15 44 0 74 124 2 127.80 31 19.59 NEW sppVlARps...pDGaYYhGsVppph...ssstalV-a...........spptptltppclIths.ssh.ppsLpsGDtVLAh......hsstp.p..........YsPGhVh.............sspscplsVpFasGp.....psp..lPpppshhl.sp.shacpssthl ...............sppVlARp-...p-Ga.Y.a.G...plKpss....spsphLVcF.....................tttpppsVstppll.hu.suh..s.sLpsGDhVlAh......hpspthp..............YsPuhVlhh.ptp..........ssp-c.hTVhhhNs+........pth..lPtsslhhl.s...shathusph................................... 0 38 40 47 +14909 PF15058 Speriolin_N Speriolin N terminus Eberhardt RY re3 Jackhmmer:Q76KD6 Family This family represents the N-terminus of the sperm centrosome protein speriolin [1-2]. 27.00 27.00 37.40 31.70 22.40 22.10 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.29 0.71 -4.41 7 60 2012-06-27 10:18:56 2012-06-27 11:18:56 1 4 26 0 35 66 0 166.50 43 42.05 NEW Moh.ssaEGhpHphcRLV................pENp-LKK.VRLl+ENpcLKpA................LuEusts.stRtspsVhhP.uPAsApEss...psGhhshuPhAsh.soPp......Sst.tslhs.hTuoLssLLsu.ushSpps..ss.h............................os.h........ssuPGt.hAoSLulPppu.Lossu.........................shsuplAVShuSPLLoSTss..thsQphLtsP....................lushsLsEsPR ............G.pc.hpRLh................pENt-LKK.VRLl+ENp.L+ph................Lu-usttsss+.....t....slhhP..uPAhs..........Ess...tsGl.shu.hssh.sosp.................TusLpsLLsu.uP.hSpps.....................................................sssPu...hAoShtlP.pu.LoPsp.................hsspsssph.SPLLSs...........Ltss....................hsshsL.E.PR................................................................................................................... 0 3 3 7 +14910 PF15059 Speriolin_C Speriolin C-terminus Eberhardt RY re3 Jackhmmer:Q76KD6 Family This family represents the C-terminus of the sperm centrosome protein speriolin [1-2]. 27.00 27.00 32.70 31.80 23.50 18.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.78 0.71 -4.18 4 66 2012-06-27 10:19:36 2012-06-27 11:19:36 1 4 32 0 39 68 0 137.60 60 35.82 NEW lVGEIAFQLDRRILuhlFPt.hsRLYGFTVS.NIPEKIhQsSlps.saplDEchspsLTpRYVolhs+LppLGYNtcVHPshoE.LlNtYGILRpRP-LtAS.spS..sshLp+lLl-sV.PphLsDuLLLLSCLspLS+DDuKPhFhW .........................lVGEIAFQLDRRILu.lFPt.hsRLYGFTVS.NIPEKIhQsSlps.DtplDEchhppLTQRYlolhsRLpp.LGYstcVHPu.hoE.LlNsYGIL+pRP..-LtAs.hto...sshLp+lll-sV.PphLsDuLLLLsCLspLu+-DuKPhFhW................................. 0 5 7 14 +14911 PF15060 PPDFL Differentiation and proliferation regulator Coggill P, Hetherington K kh6 Jackhmmer:Q8WWR9 Family Pancreatic progenitor cell differentiation and proliferation factor-like protein (PPDFL) is alternatively named Exocrine differentiation and proliferation factor-like protein. PPDFL regulates exocrine cell fate. This protein is highly expressed in exocrine progenitor cells which eventually differentiate to form exocrine pancreatic cells [1]. 27.00 27.00 29.20 29.00 24.60 23.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.51 0.72 -4.06 7 87 2012-06-27 10:23:41 2012-06-27 11:23:41 1 2 40 0 40 71 0 95.70 50 94.60 NEW MAAIPSSGSLVATHDYYRRRLGSTSSNSSCGSuEYsGEsIPHpPGLPKuDsGHWWASFFF.GKSThPhMusVlESsEpp.tohpsSsu.lsCsLApcsh......ppQ.uupsuKossu ....................MAAIPSuGSLlATp-YYRR.R.lGSoSS.sSS...CuS.sE.asGEsIPHp....PGLPKsDsG+WW.uSFFF.uKps.Phhssl.Epspp......tssts.h.s..h...s.........p............s........................ 0 2 5 17 +14912 PF15061 DUF4538 Domain of unknown function (DUF4538) Coggill P, Hetherington K kh6 Jackhmmer:Q8N5G0 Family This protein family is thought to be a transmembrane helix. Its function remains unknown. This family of proteins is found in eukaryotes. Proteins in this family are typically between 58 and 87 amino acids in length. 27.00 27.00 33.60 33.60 25.60 24.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.80 0.72 -4.66 13 61 2012-06-27 10:24:17 2012-06-27 11:24:17 1 2 50 0 36 71 0 56.10 51 54.00 NEW hhRGh+hslhlGGlVuhlGhAhYPIhlcPhhpsEEYK..clQplNRsGIcQE-lQPssh+ .................huRshRouLIFGGFlullGAAhYPIaFRPLh+hEEYK..+pQtlNRAGIhQE-lQPsGlK......... 0 9 12 22 +14913 PF15062 ARL6IP6 Haemopoietic lineage transmembrane helix Coggill P, Hetherington K kh6 Jackhmmer:Q8N6S5 Family ADP-ribosylation factor-like protein 6-interacting protein 6 (ARP6) is a transmembrane helix present in the J2E erythro-leukaemic cell line, but not its myeloid variants. In tissues, ARL-6 mRNA was most abundant in brain and kidney. While ARL-6 protein was predominantly cytosolic, it is known to bind to SEC61-beta subunit of a protein conducting channel SEC61p [1]. 27.00 27.00 27.70 29.50 22.30 26.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.01 0.72 -4.08 11 55 2012-06-27 10:28:35 2012-06-27 11:28:35 1 2 49 0 35 58 0 75.40 48 36.26 NEW Qplps.VQuphshsh.WS.......+L.PLLsGLllsuFuYslVYLDSs.PGlhPPoPFSPps+pRhp..pupShHLuYhsAlhsG ...........................plpppstschshhsFWoh.......LllSLhuGh.CCSFSWTlTYaDSaEPGhaPPTPLSPu+hK+hs.....spSaHhGY.hAlLsG........ 0 6 9 19 +14914 PF15063 TC1 Thyroid cancer protein 1 Hetherington K kh6 Jackhmmer:Q9NR00 Family Thyroid cancer protein 1 (TC1) is thought to decrease in apoptosis and increase cell proliferation. It is found to be expressed in thyroid papillary carcinoma [1]. This suggests its importance in thyroid cancer. The molecular mechanism of TC1, involves up-regulating cell signalling through ERK-1/2 signalling pathway and it positively regulates transition between the G1 and S phase in the cell cycle [2]. It is thought to positively regulate Wnt/beta-catenin signalling pathway by interacting with its repressor [3]. In humans, it is located in the chromosomal position, C8orf4. This family of proteins is found in eukaryotes and contains a conserved NIF sequence motif. 27.00 27.00 28.80 56.20 23.20 17.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.71 0.72 -4.44 9 80 2012-06-27 10:42:52 2012-06-27 11:42:52 1 1 39 0 49 70 0 77.50 54 59.15 NEW tot.hh.SsSlRVuPSspG.+FD..TAsRK+AsANIFcsVsp-sLQ+LFc+oGDpKAEERA+lIashspDsEEhA+ALhAL+ .............s.....h.ssSlhsuPSsh..hpt.s........stuRKpAsANIFpss-..plQpLFpp....SGDppAEERA+IIaphstDhc.hAcALhtL+............ 0 2 5 18 +14915 PF15064 CATSPERG Cation channel sperm-associated protein subunit gamma Coggill P pcc Jackhmmer:Q6ZRH7 Family This family represents the gamma subunit of the CATSPER, or cation channel sperm-associated protein complex. The complex appears only to be expressed in the flagellum of sperm. The complex is activated at alkaline intracellular pH, and being restricted to the flagellum is the mediating calcium channel. 27.00 27.00 73.00 28.20 25.30 24.80 hmmbuild -o /dev/null HMM SEED 825 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.36 0.70 -6.81 4 89 2012-06-27 14:03:08 2012-06-27 15:03:08 1 3 30 0 42 80 0 504.50 38 85.95 NEW Mhp.s....s...........shhlhhhVLLlsh+shutpshpcCoWhVslscap+lucphs.pp.....PlssV.p.hth..ssS.lpsoc.Y.uFPYaL+IphsCssKsSc-luRhhtLpGlpPhV+lhhp....ssssahphphE............................plph.h-sAsh+Ss.s....sC.s-hhCp.uWhsPhPhcsGoll.pV-lhssGlGsaI.ppR.hlphsGFhp.pstssssthultp-..l.t.thapshpuhPl....shuPVhILGGhsspchlLaosopFpcasllphpIsSphsuShhCshhuhssTIassluh.uoLhIppsphVh.asGsaosLh-+spuS....Rhh.scCI++Lpss..susGp..lLAlsst.pcGhlalGslpDu.hph..hsc.................taSsCphlsu.......uph.ssptTlLLLVths.hptotpaallpa..ssupcshElLYpIP-hIPpt+ph-aLhhLsocoass.......shhhscs.FhsslsshlalWGNhlL.p.............................................................................................................................ppss.h.loh.p.CPaphhph.plPp.Q+.oc.cpa.hhPshl..p.shHssNoLAlYpGLVaYhhah+s..-hs.h-shp.sshp.atp.ht....DpDYaFahhSNp...tslalsMsuYp+lashpushpl.Pp+hFLDpGspasaslhh.hasso.+sppp...thp..pplsLSlslu-PsslsVosppEhLlppsolLhpIsltDp+hsh-QshsGc+lptsSh.hpslsssGhphthTahhsHhp..t.h.hsVhIssPsGhRhth.lohoLpasht.spahhDpl.hstE.ssFhaRD.........D.shssSt.uhpGSash.VVsssshlpohpshh-pEI.Rhpushcp .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 9 10 15 +14916 PF15065 NCU-G1 Lysosomal transcription factor, NCU-G1 Coggill P pcc Jackhmmer:Q8WWB7 Family NCU-G1 is a set of highly conserved nuclear proteins rich in proline with a molecular weight of approximately 44 kDa. Especially high levels are detected in human prostate, liver and kidney. NCU-G1 is a dual-function family capable of functioning as a transcription factor as well as a nuclear receptor co-activator by stimulating the transcriptional activity of peroxisome proliferator-activated receptor-alpha (PPAR-alpha) [1]. 27.00 27.00 30.60 28.30 23.90 23.20 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.10 0.70 -5.76 20 84 2012-06-27 14:54:08 2012-06-27 15:54:08 1 2 63 0 54 97 0 305.00 35 82.60 NEW ssLlHlcA...sGsscTlHYlWss.hGsPolLlsh...Tsssso...lplsWschLssp.....hsuulphssps..pYotullhs+lhEFsDsNDouhhs..sssp.h.sYsLppFsWsphshs....sstslssshsut.hpp.......sGolshplsAFsspsRssphP+LLHouNSoQl-lslsslss+uspS...RFAlE............llsVssstsssssp...............p+SIDDEYTPuIFclsplhs.....hstsssssGalQW+PVuYspspRshssu..s.spps.......phpslps.....sshssoslshAaaGsp....h.hpuhNlSFGhssDG.FYpsosYhoWThllGhGpPPs-phSshVlhlhulGLGlPhlhllhGGlalsl++t+tpp .....................sLlHlRA...sGsssTlHYlWss...hG..sPullllt.....Tssspo...lslsWsphlusp..................sssulhl.sps.l...paStAllFs+lhEasssssoshhs.........sp......hssYsLtpFsWsphs.s.....hs.tslssshp..Gpshpps.............sGSlshclpuFspssRssp..P+LLH.Tus.osQl-lsLsshss+....u..spS...hFuLE............lh.slupsssssphp...............pcSIDDEasPulFplspl.......................huo.ssGahQW+PVuasp.tp.tshpsu....hsspts.......shpss.s.........hshspsslhpAaaGsp.....................thpshNloFGsssss.hYhsppYloWohllGhG.PPsDthSsLVlslhuluLGsPhlhllhGGlhlhlt++p...p................. 0 19 23 36 +14917 PF15066 CAGE1 Cancer-associated gene protein 1 family Coggill P pcc Jackhmmer:Q8TC20 Family CAGE-1 is a family of proteins overexpressed in tumour tissues compared with surrounding tissues. CAGE-1 gene showed testis-specific expression among normal tissues and displayed wide expression in a variety of cancer cell lines and cancer tissues [1]. CAGE-1 is predominantly expressed during post-meiotic stages. It localises to the acrosomal matrix and acrosomal granule showing it to be a component of the acrosome of mammalian spermatids and spermatozoa [2]. 27.00 27.00 68.40 68.40 21.50 20.90 hmmbuild -o /dev/null HMM SEED 527 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.66 0.70 -6.06 4 63 2012-06-27 15:13:07 2012-06-27 16:13:07 1 3 26 0 23 63 0 325.00 48 63.21 NEW hSEu-oMNhsuhSQDloaScSPhhMEToSTTSDLPQsEhKNscRENESc.TLsEDIYGTlDshLsDhsItN.ucNlLTQPVDT.SlSShRQFEPICKFHhhEAFNDE.hshpsLptuhsYTEKPEhQSpVYNsAKDsshKpDSFKEENsVET..SsSsscDQLupEhVRQ.ssRSPPLlHsSGET.KFsEsShsKSsuhEuALpPSQPQSFLhhENsppsscps..ptNsFp.LDLRA.YptEEhsVSSKtlQshG-IPEhsVsapKEVshEGV-SPtIsSPWSPAGIsWpGuAs.-suhMPDhEQShES.QPlEEDMAL.ElLtKLcHTN+KQpspIQDLQsSNhYLE++VcELQhpsTKQQVFVDIINKLKsplEELIEDKY+lhLEKsDTsKTLQNLpElLsHTQKHLQEu+N-KEoLQLplKKIKuNYV+LQERYMTEhQQKs+oVSQCLEM-+TLScKEEEVcRLQQLKGELEKAToSALDLLKREKcTpEQEFLSLQEEFQK+-KENLEERQKLKSRLEKLLAQVp ...................................................................................................................................................tpPt..s.hht..tcs..p.t...Eps.h....sh..pp-.hs.t.h.p..spS....h.p..tc.....h.E.sh.ts...tssht..ps.sh....................................hsspthp...-hschsss.tc..-st.-shpp..thhSshssssl.hpsts..-ss.h.DhE.shEuLpPl.p.ED.hALNEsLp+LppsN+cQphpIQ-LptpNhhLEp+lcELQhphs+QpVhlDlIsKLKtplEcLI--KYplhLEKN-hp+pLpslpEh.spop+pLpES+p-KchLpLphKKhKspYhpLQERYhsEhppKscs.ophlEhp+sLSpK-EElpRLQph+t-hE...+sT.SALshLpcEKc.pEpEhLuhp.EFQ+h-+tpLpERpcLK.plpcLlsQl......... 0 2 3 5 +14918 PF15067 FAM124 FAM124 family Coggill P, Hetherington K kh6 Jackhmmer:Q9H5Z6 Family The exact function of this protein family remains unknown. This family of proteins is found in eukaryotes. Proteins in this family are approximately 480 amino acids in length. There is a conserved LFL sequence motif. 23.50 23.50 23.60 41.90 21.60 23.00 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.51 0.70 -4.93 13 111 2012-06-27 15:31:04 2012-06-27 16:31:04 1 4 44 0 66 93 0 222.70 50 52.79 NEW lolHlls-sG-uphLQpAlDpLLsaIcPDlpLFpVS.ERtssh+tpcstptp..........tsphPuLuVlLFLpEs.hG..EEplhplpctLppPPW..paHHTppspG+..h............PahsssQDFaoLusthPlWAlRpVHYGpEIlRhTlYCsa-NYsDhl+hYElIL++csst+KsDFChFslYos.sh-lQlSLKpL.PhGhsPpPp-SulLpFRV+-lGQLVPLLPsPCoPIScsRWQTpDaDGNKILLQ .....................holHllAssGcuphLQpslDplLshIpP-lpLFpVS.ERtush+hpct.ps..............st.PuluVlLFLp..Ep.hG..EE.....plhplpcsLQ+PPW..paHHTppspG.R..hh..................PYhsssQ-FaoLss..thPlWulR.VHaGpEIlRhTlYCpa-NYtDslRhYEhILpRps...otpKssFChFslaSsh..shslQhSLKpL.Phu.sssPp-SSVLpF+Vc-IGpLVPLLPNPC..sPISpsR.WQTpDaDGNKILLQ............ 0 12 16 34 +14919 PF15068 FAM101 FAM101 family Hetherington K kh6 Jackhmmer:Q8N5W9 Family This protein family includes the actin regulators, Refilin A and B, however the exact function of this protein family remains unknown. Refilin is thought to stabilise peri-nuclear actin filament bundles, important in fibroblasts. Refilin is important as changes in localisation and shape in the nucleus plays a role in cellular and developmental processes [1]. 27.00 27.00 32.50 29.40 20.30 18.90 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.45 0.70 -4.88 12 102 2012-06-27 15:39:28 2012-06-27 16:39:28 1 2 41 0 55 78 0 169.70 52 92.01 NEW MVG+LpLQsh..s-sLctpsR.-GlLDSPDSGLPspPSPS..aasLusGhh........ssh......p.ptspput..s.PhhL.ss.uspspPRhpPl.FGEuIElcPhPs+EIRhsSpVKYDSE+HFhDcVhhhPls.slouhSpTllulPNCTWRsYKoplphEPRp+shRFpSTTIlaPKHs+ohYpTTLsYss...tpstRhFhSSVpLEssE .....................................................................................ss...........................................s.....h..ss...ssthpPRh..hPl.FGEulEhsP..PspEl.R..hsSpVKYDSE+.HFhDcV.hhPls.sVsuhSpTllssPNC...TWRsY+uplphEPRp+shRFpSTTIlaPK+s+ssYpTTLsYss...t+s.hRhFhSSVpLEhsp........................... 0 4 7 22 +14920 PF15069 FAM163 FAM163 family Coggill P, Hetherington K kh6 Jackhmmer:Q96GL9 Family This protein family is alternatively named Neuroblastoma-derived secretory proteins. Highly expressed in neuroblastoma compared to other tissues, suggesting that it may be used as a marker for metastasis in bone marrow [1]. 27.10 27.10 27.20 27.10 26.90 27.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.33 0.71 -3.61 13 90 2012-06-27 15:45:19 2012-06-27 16:45:19 1 2 40 0 58 70 0 148.20 51 97.46 NEW MTAGTVVI....................TGGILATVILLCIIAVLCYCRLQYa...CC++p.......-s-p-.....ccp.shshpsh.s...................su.shhs....s.sppptpsp.hCsoC..o.at.s.Falpss............GGcRlsatshp.......................h...h.thhp.spuIST-V ............MTAGTVVI....................TGGILATVILLCIIAVLCYCR...LQYY........CCKKs.......coE--......EEE.Dhs...scs+hPshpusps.sh........sstsuLhPhtspshuppsstupshCsoC..S.YpsP.Falpps-............stNGGERlsatshp.c-ht.Ps...................hthus.Quhsssh.uuhREuFopsRuISTDV....................................... 0 5 10 22 +14921 PF15070 GOLGA2L5 Putative golgin subfamily A member 2-like protein 5 Hetherington K kh6 Jackhmmer:A8MS94 Family The function of the GOLGA2L5 protein family remains unknown. This family of proteins is thought to be found in the Golgi apparatus of eukaryotes. Proteins in this family are typically between and 840 amino acids in length. 27.00 27.00 27.20 27.20 26.20 26.70 hmmbuild -o /dev/null HMM SEED 617 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.33 0.70 -13.17 0.70 -6.03 8 690 2012-06-27 15:46:46 2012-06-27 16:46:46 1 6 83 0 210 470 0 199.30 19 59.74 NEW QLpESl+QLQsERDQYAEsLKEEuAlWQQRhQQMSEQl+pL+EEKE+uhuQVQELEoslAEL+NQhsVPP...sPpPPA...GPSEsE.ppLQuEsEQLQKELEsLutQlQAQV+DNEsLSRLNQEQEpRLLELERsAEpWuEQAE-R+QILESMQSDRsTISRALSQNRELKEQLAELQNGFVKLTNENMElTSALQSEQHVKKELAKKLGQLQEpLGELKETVELKSQEAQuLQpQRDQYLuHLQQYsAAY.......QQLuuEKEhLHKQhLLQTQLMDRLQHEEVQGKsuAEhApQELQEoQERLEAssQcNQQLQAQLSLhAsPG........EGDGLDSE-c-E.........EsspPpluIPE-LESREAMVAFFNSAlApAEEEpARLRpQLKEQKhRCpcLuHLlAssppp.ccc.AsuP..........tsuGD.SVPsEoHpALQVAMEKLQuRFTELMQEKsDLKERVEELEHRCIQLSGETDTIGEYIALYQsQRAVLKpRH+EKEEYISRLAQDKEEMKlKLLELQ-LVLRLVsERNEWpuKaLAAAQNPus.sosussA......QELGAA-sQGsLcEVSLAp.ss.........ltPsQGEA..Gsss......PpENPTAQQIMQLL+EIQNPQERPG..LGpNPCIPFFYRADENDEVKIMVl ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 122 129 171 +14922 PF15071 TMEM220 Transmembrane family 220, helix Coggill P, Hetherington K kh6 Jackhmmer:Q6QAJ8 Family Transmembrane 220 (TMEM220) is a domain of unknown function. It is thought to be a transmembrane helix. The length of this protein is typically between 150 and 160 amino acids. In humans, it is found in the chromosomal position 17p13.1. 27.00 27.00 29.30 28.40 24.30 24.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.53 0.72 -3.58 29 81 2012-06-27 16:25:50 2012-06-27 17:25:50 1 1 72 0 51 95 19 97.70 30 68.70 NEW hFshsAhlQlNDP...........DshlWlslY.....hlsull...shhhhhthhsphlhhhhss............shhhhhhhhtsslhttht.t.th............phE.uREhhGLhlsshhhhlhhhhs .........hFshhAhlQhNDP...........Ds.lWhslY.....hlsAll....hl..hhs.tlssp.hlathlushah........hhshhhshhlsshlhtcsppshh.......................ppEcuREhhGLlIlsshhhlpt...p........... 0 17 29 34 +14923 PF15072 DUF4539 Domain of unknown function (DUF4539) Coggill P pcc Jackhmmer:Q8N3J3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 230 and 625 amino acids in length. 27.00 27.00 27.70 42.00 26.70 26.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.70 0.72 -4.29 32 116 2012-06-27 16:39:42 2012-06-27 17:39:42 1 2 98 0 75 119 2 86.40 37 17.40 NEW stKVsplssll+slst..........sst.Ds.hlhlpD..PTGphpuslH.+clh.p..p..asstlssGusLlL+cVs.Vas...........ss......tptta.LslThpNllplass-s ..p.sKVsphsshl+Slst..........ssh.Ds.tllhKD..PT.G.phpuolH.+pllp..p...htspLpsGuVLlL+pVu.VFs..................PS......hpspYLNlT.pNLlplass-.s......... 0 27 38 56 +14924 PF15073 DUF4540 Domain of unknown function (DUF4540) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A4D263 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 302 amino acids in length. In humans, it is found in the chromosomal position, C7orf72. 27.00 27.00 28.30 28.10 21.20 17.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.81 0.71 -3.60 6 31 2012-06-28 09:34:24 2012-06-28 10:34:24 1 3 28 0 21 43 0 119.80 47 33.03 NEW PsKPlsFVSsSoRSKYIPLYTG+VQSTsADDlDNPhGDIsSlApPRsSchhYTNoSRoAsIPGYTGKsHasAT+PsNschPSpoPSsD.SEhHRlhh+cMtV.....DhFtHQuPLS+MVTTVpPYNPFNKKDKET .............................s+PlsFVSsSoRSpaIPhY..oG+V.pSssu.DDlDNPhGDhtulupsRpS+.h....Y.Tsoo+usNIPGYTGKV..HFsATHPAN.SsIPsT.sPSsD....SEh+RslhcEMtV.......DhFRHQuPLS+hVTTV+PYNPFNKK-KET............. 0 11 11 11 +14925 PF15074 DUF4541 Domain of unknown function (DUF4541) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A4QMS7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 100 and 163 amino acids in length. There is a conserved KLHRDDR sequence motif. There is a single completely conserved residue Y that may be functionally important. In humans, the gene is found in the chromosomal location, C5orf49. 25.00 25.00 26.40 25.10 24.30 22.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.01 0.72 -3.44 12 46 2012-06-28 09:38:35 2012-06-28 10:38:35 1 1 39 0 33 52 0 87.70 51 61.98 NEW YFssspppsphoh....YDplFph.ppsa...ssKLHRDDRp+hpslGLclppEEpp+sVPlLsSS.YG+.h..cpPl-..s+.casRls+Vps-FYp+ssI .......Ya.p.tpsGllSL....YDslF++.c.sY...sQKLHRDDREHAKolGL+VNEEEppRsVsVLoSSVYG+Rl..ppPlEPhsR.casRssHVpsDFYRKNsI........ 0 16 19 22 +14926 PF15075 DUF4542 Domain of unknown function (DUF4542) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A8MV24 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 123 and 173 amino acids in length. There is a conserved IPPYN sequence motif. The gene that encodes this protein in humans, is found in the chromosomal position, C17orf98. 27.00 27.00 28.20 33.80 26.60 23.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.63 0.71 -4.22 9 48 2012-06-28 09:40:34 2012-06-28 10:40:34 1 2 36 0 36 51 0 121.10 49 77.44 NEW LE+uFVLDuluVuolupshp+tpPKhhoAIPPYNAQpD+HApsYFcS.sVpslL++TtQ....spsGsohsGhllD+ahhhG........tGttYlspRNhsGAG..........HSpptVsGHs.ahsshpshp.GapGhFGaRRNTPpLRppPSsF ..hEKuFlLDuVAVuohucs.as+tpPKlhSAIPPYNAQpDhHAcsYF......pS+sV.slLRKTsQ....c+GGTutcGhllDhhalhG........tGpcYLs+RNhA....GuG..........HShppVsGHs.a.usl+sh..GaNGpFGYRRNTPsLRppsSsF................. 0 16 18 22 +14927 PF15076 DUF4543 Domain of unknown function (DUF4543) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0P5P2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 90 amino acids in length. The human member of this family is C17orf67. 27.00 27.00 75.00 75.00 22.30 19.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.80 0.72 -3.95 4 35 2012-06-28 09:57:32 2012-06-28 10:57:32 1 1 31 0 22 30 0 72.90 78 80.11 NEW aT.DASPILsEKpAKQlLRo+RpDR.pKsGaPDEPMREaMLaLQtLEQRuEEQFLEHWLNPHChPHCsRNlVHPV .ht...-uSPI.LpEKQAKQLLRSRRQDRP.SKPGFPDEPMREYMHHLLsLEHRAEEQFLEHWLNPHCKPHCDRNhVHPV.. 0 1 2 8 +14928 PF15077 DUF4544 Domain of unknown function (DUF4544) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q3KP22 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 118 and 256 amino acids in length. The human member of this family is C11orf85. 27.00 27.00 28.60 28.50 22.20 17.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.24 0.70 -4.85 6 50 2012-06-28 10:05:16 2012-06-28 11:05:16 1 2 29 0 21 34 0 161.40 51 91.32 NEW MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRsVLGNLDsLQPFATEHFIVFPYKSKWERVSHLKFKHGEsVLVPYPFVFTLYVEMKhFHEsLSsGKPhsDSPLGLVLAERKAAtAsM+..KRKpsEVPSSPSRPGLDRs.........uKc........KP.p+cs+R.hphhsshstscVtsQ..h.csu.tGphlP.pppos.h.L+sPpphtssu.hGF ...MSLKPFTYPFPETRFLHAGssVYKFKIRY...GpuIRGEElEsKEVIhpELEDSlRsV.LtNhDsLQPFs..T-HFllFPYKu+Wppsu+L+FKHtphhLhPYPFVhTLYlEhKhFp.EsLssG+..hps.s.ths.scp.....cs.ts.h.hc...KR+hh-....sostpst.....pR...........tpc........p..pcps.pp.h.......hsh.shscl.pt.....tps..G.h....pps.........c.....t..u.hG.............................................. 0 3 3 5 +14929 PF15078 DUF4545 Domain of unknown function (DUF4545) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JVX7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 417 amino acids in length. The human member of this family is C1orf141. 27.00 27.00 30.80 30.70 21.30 21.00 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.61 0.70 -5.57 4 51 2012-06-28 10:14:33 2012-06-28 11:14:33 1 2 20 0 18 46 0 233.80 39 100.21 NEW MsE+IL-KhDhL-EpA+hLhAhRtKpsshppplKKKs.lhPLhFDaplcFt-sIssShSKT.upIpKD+upslKKsKR.VSFK.pPcPp+SDFEcSsLRPPhLsTsIphpE.K.hE.tEE.LKSRSh.Sh+YLKDpsETE.AKPh...h.....................................sQHcpps++ohcSTsaSusSSppsptp+.Ess.h.TKEsEhhRNDQhpch.sV+Qp.LLPLshED.LKsPchKhIDluPscTVpopMcpNcsNPIIFa-TcYVQMLhLTK.RhsPauhc......ap+pNlVLE+NCEhLKolhpDps.TsSKsppThsssQpKcl.slShElup+sls-Kh+KKpc+.s.cpIS.spLYNlSQTFSSLoKKFVGahDKsVIQEKSsKss+FE+.FSpsKPss..KFos.PlKYsSKP.+NILclHKlNNlTPLDsLLs .....................................sphQsph+Kps.hhPLTFDFplpFEcs.s.s.htKt.spIpcs+.sh..shp.psch.ssh+.....p.......s.pspFpp.slt...h.s.hp..pE..c.............................................................................................................h............pEsp......hp....htpp.hhPLshED.LhpsphKhl..t...p...ph.......................................................................................................................................................................................................................................................................................... 0 2 2 2 +14930 PF15079 DUF4546 Domain of unknown function (DUF4546) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5T0J7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 88 and 212 amino acids in length. The human member of this family is C1orf49. 27.00 27.00 76.30 35.60 21.70 26.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.23 0.71 -5.17 5 48 2012-06-28 10:17:19 2012-06-28 11:17:19 1 2 20 0 16 49 0 178.30 66 92.20 NEW MSAKRAE.KKTsL.......SKNYKAVCLELKPEPTKTYDYKGsKQEGhFTKsGsT+ELKNELREVREELKEKMEEIKQIKDlMDKDFDKLaEFVEIMKEMQKDMDEKMDVLINIQKNsKLPLRRuPKEQQELRLlGKTD+-PQLRl+KMD...GuDGsPLALHKKlVAPQpT.+sPLDPLHpCsoCCEKCLLCALKNNpNRG+.saHAWusFSPL ..................MSAKRAELKKTpL........SKNYKAVCLELKPEPTK...TaDYKuVKQEG.FTKsGsTp-LKNELREVREELKEKMEEIKQ..IKDlMDKDFDKL+EFVEIMKEMQKDMDEKMDlLINhQKN.KLPLRRuPKEQ.QELRLhGKTcp-PQLR.KKMD...GssGAPhuLHKKsMAsQKs.pssLDsLHpCtoCC.EKCLLCALKNNhNpGt................................... 0 1 1 1 +14931 PF15080 DUF4547 Domain of unknown function (DUF4547) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q147U7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 144 and 206 amino acids in length. The human member of this family is C3orf43. 27.00 27.00 101.90 101.70 20.10 17.50 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.38 0.71 -4.89 3 28 2012-06-28 10:20:18 2012-06-28 11:20:18 1 1 22 0 16 37 0 189.00 76 93.09 NEW DHKLQALETQFKELDFIKDNLTQKFEHHSKTLASQAAQDElWTAVLALpFTSMELNILYSYVIEVLICLHTRVLEKLPDLVRSLPTLASVLRRKVKNKRIRsVWESILEEaGLQEGDITALCTFFVAHGNKAEHYTAKVRQMYIRDVoFMITNMVKNQALQDGLLRAVQVIEKGKAsRTPEcpKSPLKELIPSVKu .D+KLQALEsQFKELDFTKDNLhQKFEpHSKoLASQAAQDEhWTAVhALcFTSMELNILYSYVIEVLICLHTRVLEKLPDLVRGLPTLASVLRRKVKNKRIRVVWESVLEEpGLQEGDlTALCTFFIAHGNKAEHYsAKVRQMYIRDVTFhITNMVKNQALQDuLLRAVQVIEKGKAVRsPEcQKSSLKELIPSVKN....... 0 1 1 3 +14932 PF15081 DUF4548 Domain of unknown function (DUF4548) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95561 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 178 amino acids in length. The human member of this family is C1orf105. 27.00 27.00 65.70 65.70 22.80 21.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.06 0.71 -4.71 4 26 2012-06-28 10:27:17 2012-06-28 11:27:17 1 1 16 0 9 25 0 132.00 63 89.87 NEW PWLSEASLVNKPLlLSlP+RYP+tSushLsSsKKsMsLPlLhQsP.DshSKARRNQs-sMLlRNpQLCSTC...QEhKMVQPRTMpIPDc.KsSFENsMSaR.MSLHpPKhQssscs.+sDIPTESI+YRLPILGPRTAVFHGLLT-AYcTL+EpQ+SSLPRKEPhGKTsR ...PWhsEAShlNKPLlLSlP+RYP.tSushLsSs++shshPhhhQsP.DshSKARRNQpssML.hRNpQLCSTC...pEhKMVQPRshpIPD.D.KsSFENhMSaR.MSLHpPKhQssscsa+cDIPTESIH.YRLPILGPRTAVFHGLLo-uYKTLpEpQpSSLPRKEPhuKThR.......... 0 1 1 2 +14933 PF15082 DUF4549 Domain of unknown function (DUF4549) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5T699 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 143 and 1871 amino acids in length. The human member of this family is C6orf183. 27.00 27.00 35.90 40.40 21.30 18.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.81 0.71 -4.55 4 24 2012-06-28 10:34:07 2012-06-28 11:34:07 1 2 20 0 19 27 0 128.00 53 23.05 NEW DplYKlSSTERV..LEKELtspLsELKsplE-pGlL.GTsNRsaSSlthPKDssaFR+EREhhLKKsLQVAEuKPLVIQADlhpRELESCLpREYTstsLPLLLhQaYT-RItQLuQSKYLHhLRWKRFCp+SphhEpLYPLYp .......................DplY+lsSoERlp.LE+cLthpLsELKsEIEEpthh.sst..p+s..aSSVphPKDltaFR+EREhALK+sLQVAEuKPLVlQADVMQRELESCL+REYTsENLPLLLhQaYs-RIpQLsQsKYLHMLRWKRFCpHSphhEQLYPlYp......... 0 8 10 12 +14934 PF15083 Colipase-like Colipase-like Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NCL2 Family This is a family of colipase-like proteins. 27.00 27.00 31.50 28.80 25.70 25.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.80 0.72 -4.05 9 62 2012-06-28 10:57:55 2012-06-28 11:57:55 1 3 22 0 35 60 0 71.90 41 90.69 NEW polh.pClsapKsNuhpCs+HSECpSsCClpspppsttFC.sP+o.IhhpClPhRKsstsh...CpptpEChSpsCl..pE....+Cs+RstlL ...................hpK..s.hCpcHsECpSsCClhss.ssppFC.ss+o.IhhpClPW+Ksshth...Cp.t.pC.p.p.h.............................. 0 3 3 7 +14935 PF15084 DUF4550 Domain of unknown function (DUF4550) Bateman A agb Jackhmmer:Q9ULG3 Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 100 amino acids in length. This domain contains an N-terminal HXE motif. 27.00 27.00 52.70 51.30 23.40 22.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.13 0.72 -3.89 8 33 2012-06-28 11:36:29 2012-06-28 12:36:29 1 3 30 0 20 49 0 98.00 47 16.75 NEW spsaaHIEY.hhPcD.cspKlDlVlassVAKl.......Fh-sph.Kssp.h+.ssDpsW.lsWspsaslsVsp-hlpchh.+tlsl+laDo+-KVSs+ARaDRsKsht .....h.ppaaHIEYaLLPDDt.EP+KVDlVlF.shl.AKV.......FL-Sus.KTV+PWc.EGD+sW.VSWpQTFsIsVTKELL+Kls....h....HKITL+lWDoKDKlScKsRY.R.Ksh.s.... 0 9 10 11 +14936 PF15085 NPFF Neuropeptide FF Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O15130 Family \N 27.00 27.00 34.50 32.50 23.70 22.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.78 0.72 -3.76 9 43 2012-06-28 11:36:44 2012-06-28 12:36:44 1 1 29 0 15 40 0 90.30 49 79.89 NEW stuhpp.ppshsc-D.tP.ssp-thscphhphsup.......................lLRuLL.u.QR.sRs..SsLaQPQRFGRsopGsh.sEpph.psRsWEussuQhWShAsPQRFG.KK ...........................................................................................-sph.so..........LLRsLLQAMpRPGRS..uFLFQPQRFGRN.opGSWusEcL...SPRAhEul..........supFWSLAAPQRFGKK. 0 1 2 5 +14937 PF15086 UPF0542 Uncharacterised protein family UPF0542 Bateman A agb Jackhmmer:Q7Z3B0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved LSWKL sequence motif. This family includes human protein C5orf43. 27.00 27.00 47.90 47.80 24.20 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.57 0.72 -4.26 3 46 2012-06-28 11:42:20 2012-06-28 12:42:20 1 2 45 0 33 41 0 73.30 71 82.43 NEW M..FDl.KAWL-pV....VcWAA-DPGuFlTTVLLuLTPFFLlSAVLSWKLAKtIEAE-KRcKp+pK+ptNlAcsRRpKKD .............MhDl.KAWAEYV....VEWAAKDPYGFLTTVILALTPLFLASAVLSWKLAKMIEAREKEQKKKQKRQENIAKAKRhKKD........ 0 8 9 18 +14938 PF15087 DUF4551 Protein of unknown function (DUF4551) Bateman A agb Jackhmmer:Q8IXR9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa. This family includes human protein C12orf56. 27.00 27.00 36.80 62.90 17.10 23.60 hmmbuild -o /dev/null HMM SEED 617 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.20 0.70 -12.95 0.70 -6.15 7 68 2012-06-28 11:45:46 2012-06-28 12:45:46 1 2 35 0 29 57 0 424.10 47 94.34 NEW MA.....tsuhsstpNu+LDuFL+RshssplY-plRuaEPClVVS-p.c+sFhaVlLSD-plYLTEpPPRslphsVsh+cIhuI-LIsDhP-FLSGpDREpsQHIRIhY..................ussp.shPtptthutsppssc.sst.h.h.sphS.ohspu..sLtuspcposthtsspst.stsL.c.p.tt........................hPp.s..sspss.spcpstshPsssssshtsps.ssstss.spts.p.ptssptht.hsSllup.lppspshc............cc-uELHLYhlSsTSplaL+LpSoWNsYIIRuT..Lh.DPlYhpcpss....pusp.........p.EcphplFsQLpuELL.csssLEplahLLQEL+sAApRNhsLK+LFWKosDLF.FLVppLp-..L.tscstpshpt..ppADcLhlsh.lsQTLuhMFRET-l-ssRLshLsAcpGshh.pLLlhLlscPph.h.....p.p..tp...............cs-lQtLht-YhDsAsuLLFEllLlsQpushsssss...............phhsluWlhphLpspP..lhsFluh.scpsVhsLSsot.t.LSPuQAlLLYQphhlLhuCLQaSppLupaIRssa+EEFRYFl+hPslEcKLPspYPIopPsh+LlpplLphl. .......................................................................................................lsDhPpFLps.ppphsQHIplha...................................st..p....h.hth..t..ppphp.....p..s......h...p....p.tts...h.......................................................................................................................................................................t.................................................................................hchY.lp.tS.h...hps.Wpsh...h...................................ppEcphphFsQL+uELh....cs.olc+lh.Ll.EL+sAAp+shhL++LFWKosDLFhFLVspLcE.hLstsp.stpuhpstsp..csDcLh...hsh.llQTLshMFRETEhEsuRLNhLsA++usLh.pLLllLlscPpl.p..tss..sh.p.sss..........st.....hsh-scLQpLlhEYhssAouLLaElLLlhpQushs.s..us...............pF.uluWhhphLQspP..lhsFlut.VcplV.p................sLSss..p.LoPsQAVLLYQphhlLhuCLpaSppLupalRssa+EEF..RYFl+hsslpc+LP.pYPIopPThpLhcElLpll............................ 0 9 10 15 +14939 PF15088 NADH_dh_m_C1 NADH dehydrogenase [ubiquinone] 1 subunit C1, mitochondrial Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O43677 Family \N 27.00 27.00 39.60 39.40 22.20 21.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.49 0.72 -4.46 7 40 2012-06-28 11:47:20 2012-06-28 12:47:20 1 1 32 0 17 39 0 47.10 70 65.75 NEW hFhspcPspupPNWL+VGLsLGTolFlWhhLhKQHN-DVhEYK+RNGLE .......KFYVREPssuKPsWLKVGLTLGToVFLWlYLIKQHNEDVLEYKRRNGLE.. 0 1 1 2 +14940 PF15089 DUF4552 Domain of unknown function (DUF4552) Bateman A agb Jackhmmer:Q86WS4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. Proteins in this family are typically between 425 and 649 amino acids in length. 27.00 27.00 150.10 149.90 22.70 21.80 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.66 0.70 -5.58 5 29 2012-06-28 11:47:55 2012-06-28 12:47:55 1 1 23 0 14 25 0 374.50 62 67.71 NEW SsGN+sFLT+os.uVVMGEDCGShDERRQSDFITEKpSlQHIWGENRKEhSNFLEDVNQsTssLLSENCDSFIScNMINLLNIDQQ+IKKTFDKCDYDoMuDsssVlSSDKNHsTDRCIRSIFTDPELsFSNSTFNKoSYPEKCQPNK.CQKEYsNNERNsLSTSFEKDaYPASS-+KGKFENDYQEKTPQ+co..QKYPVNHMuNIPLEELHSKQSWDFGlGE....ILMcEGGhsSLKu+sTSTKKs.YLDSSQSSQSTSYSPRPTD.SCFSSSSEMPSEDEDQI.QQIEDSNRpSIKsKETTNNFYLEsM...sKLPsD+IIKNNAKhHKQNENFHQFShKNNTDQFPQSQCNSAHILQNKTosNCILQlARCDAWVQTESEsVMEEKLDlAIQCDIISKCKCRS.......-VSsLCNVERCoENlKADTTGGQEILKNN ...S.GNRNhLTcpP.sVlMsEDCtSMsEhRQSDaITEKpSlQHIWGcNtKElSNFLEDVNQssPslLSENCDSFVSQNMINlLNIDQQ+IKKTFsKCsYDSMGDhCsVTuSDKNcsTDRCIRsIFTVPELT.FSNSThNKTSYPEKCQPNKphQ+EYNsNERNDLSTSFEpDhYPuSSE+K.GKhENDYQEKsPQKsI..QKYPsNoMssIP.EELHtKQSWDFGlsE....ILMcEGGhsSLKup.PTSscKI...LDSuQSSpSoSYSPRPTD.SCFSSSS-h.SEDEDQl....QphE-SN+hsI+scEptN.....NhalEpM...sclss-cIlKsNsKhHKQNENFaQaShKsNoDpFspSQCNSAHlLQNKTssNClLQsuRCDAhVQTEoEslhcEKLDAAIQCDlIScCpC+s.......-lS.LsshcpCotNlpuDTTGGQEIhpNN.......... 0 1 1 2 +14941 PF15090 DUF4553 Domain of unknown function (DUF4553) Bateman A agb Jackhmmer:Q8N655 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. This family includes the human protein C10orf12. 27.00 27.00 45.20 45.10 24.00 22.70 hmmbuild -o /dev/null HMM SEED 463 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.77 0.70 -5.49 7 61 2012-06-28 11:50:07 2012-06-28 12:50:07 1 1 32 0 32 58 0 389.50 41 45.00 NEW shDslhsspPthhhWssptpNppLltEhNspa.plppuWlQ..lpt-ptssshs+s+scphp-.haKspp.hcpChhtLEV...SPVpMLF.KpapLsclCpWFhpTTETpSLslV+KhNsR.Ph-l.ss+t.h.phpsSs.....hpsphh+KHhKKFAhuoPu+sshKhphLathlppss.pscup.....oLu..tcsphcclpp-Rhup.tppl.o.uoschhpKhpNlRhhhpsQh.......hpss.G.tsusEsppsppols.pslhsP....shpstspphsspschss+sph..........pto.psppshpKtps.schpssphpupo.+cs+lhh+Khsplcp.ps.phsshhhpPtuh-psu..s+ps..cEs+hh..+ppss+pss.p+ppcEppsh+sspPs..ottlst..cpptLscusspspps..sscthuphppRtRP.hKosE.sstpR+++.psptsp.tuhhs+hpst .....................................slDslhsppPphh.WsspppNppLltphNspa.plppuWlQ..lp+-ttsssht+s+s.cp.p-..haKspc..cpChspLEs...SPVpMLF.......ppacLsplCpWFhpTTETpSLsIVKKhNsR.Ph-l.ss+tsh.thpsSs......hpAphl+KHhKKFshuoPu+sshKhp.hLath.hpps..p.lcsp..........s.u..pcsphcclp....c-R.p+.s.t.l.s.uoschhpKhpNlRhhhpsQ........hpps.u...u.spsp.uppoVs.pslhsP....shpss...spthsshsc.ppthph..........ptspp.hpc.h.Ksph..p.chptsphpot...s.....pcs+h.hhKhsp.cphss...phsshhhpPtuhDpsu..s+phphcEsKhhh..+ppsu+pss.p+pp+E.pst+ssps...s-thst.hpppth.scssshspps..tsch.uphpp.....+tRP.hK...osE.ss.pR+++.psp.sp.tshhp+....................................................................................................................... 0 2 5 16 +14942 PF15091 DUF4554 Domain of unknown function (DUF4554) Bateman A agb Jackhmmer:Q8N6T0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in some vertebrates. This family includes human protein C11orf80. 27.00 27.00 52.30 43.80 18.70 23.80 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.37 0.70 -5.75 4 51 2012-06-28 11:52:02 2012-06-28 12:52:02 1 2 24 0 17 40 0 291.10 58 83.54 NEW MTDsLVIK+FL+KIlhVHsKl+FpFSVKVNGlLSt-IFus....ENEPsLsLuNGISLhlshpHYVS+PpFshsE.pCSRIHPVLGHPl.LhIPsDhAsMGLLGELhLTPAAALCPsPKsaSNQLNRISS.lp...IFLYGPhGLPLlhss.EQPpTohF+DhuhhlDWKKapLsMlPNLDLsLDpshlLPDVsYpVE............ssEcsQSQs.cuQtQTLLLFLFVDFpSsFPVQQsElWulaTLLTsHLssILsES+SsVQsSIQssVDpsLEpHaQtAKs+Q+LQASLSVAVsSIMSIlTGSTsSSFRKhCLQoLpAsDTQEFpTKL++sFp-ITpHpFLs+CSC-hcQclT.ccp.oAQsTcDthcs.suLEhhh-ouGQsENKRLK...cuS.phutccopThPsuc-usssEsssppssPTstutpsp.upuphtssGsththsupssth.-sLWLQEVSNLSEWL ................MTDCLVIKpFL+KllhVHPKlRFpFSVKVNGlLSpElFGs....EpEPsLsLsNGIuLlss.pHYh.psthsshE.hCSRIHPVLGHPVhLhIP-DhsshsLLGELhLTPAAALCPs.Kl.uNQL.s+ISo.lt...IFLYGP.GLPLh.sp.tp..hshhps.s.hlsWKKapLphlPsh-.pLpps..hhPDhsYplE............s.p.ts..psps.t..tpsLLLFlFhDFpssF.sQ.hEhhts.sLLpsHLssILhcs+shVQsslphslDpsLpQHpQtsKs.Q+.............................pshps.pspEhthpLtp....hh....h..h..t.t....................................................................................................................................................... 0 1 2 3 +14943 PF15092 UPF0728 Uncharacterised protein family UPF0728 Bateman A agb Jackhmmer:Q8N6V4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa. There is a conserved GPY sequence motif. 27.00 27.00 76.20 32.00 19.90 18.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.92 0.72 -4.11 10 35 2012-06-28 11:54:29 2012-06-28 12:54:29 1 2 30 0 25 42 0 85.10 57 77.93 NEW MPppuhVslRYGPYsusGl.V-HRTtRL-GLQAVLtpDGHpVlLEch-DaNsVELlVNGEhVFpCsIp-L-......FGGDG+LDP.......LCcEAhpAV .......MPcpAlVhlRYGPYSAs.GLsVEH+TaRLpGLQAVLspDGHcVILEKIEDWNlVELhVNtElVF+CNIpDLE......F.GGDG+LDP.......LCccAhhAV................ 0 7 10 13 +14944 PF15093 DUF4555 Domain of unknown function (DUF4555) Bateman A agb Jackhmmer:Q8N865 Family This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa.This family includes the human protein C7orf31. 27.00 27.00 75.20 64.90 25.50 23.70 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.96 0.70 -5.51 4 43 2012-06-28 11:56:23 2012-06-28 12:56:23 1 1 32 0 29 47 0 246.30 53 49.21 NEW MtsTtshP.....hh+Es.ulshLSsTahSs-LaoPLpospR.TssEspappaRtplt+ss...+sP....WGpc+.cYGGhtPl.LPspaRPKsEPPphVtKuH+HYGSGhp.aP................cthPlpQ.YphTp.phSclRhNDpLLPpP.puslhsh.lctPaPtEHPYtSHIs+hulFPs.sSPc-..ttltstpp......PhPspsPTpP.p.hl..Ko+GsPaRaEl.Dhspcs.+++ALsW.GpssYp.hptssptsR..hYPtPPKohtPN.s.ps.hs.ths.+pssshRslcKSthhToYppc ...........Mtshpshs....hhp+EhpGsDlLSsTahSNclaTPLppshRsT..sSpcRYpELR-oLpps....RLP....WGu-R.EYGGlhPloLPE-H.RPKsEPPplMuKGHpHYGFGG-sW..P............................................................................................................RclPIEQaYaLTQsKKSclhsNDSLlPKPP.sS.sltc..IshPaPlEHPYpTHIsRsAMFPoFsS.PcDhhTGlcARsp.....QPFPPTlPT.....KsaDsslL.KT+GNPYRa.EllDhPhDS.KKKALsWPGQslYashP+sspKs+.lF...YPKPPKohAPNoohps.hDsh.sh+pANIpRNLERSpWlToYs+.......................................... 0 11 11 15 +14945 PF15094 DUF4556 Domain of unknown function (DUF4556) Bateman A agb Jackhmmer:Q8N9H9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. This family includes human protein C1orf127. 27.00 27.00 36.50 36.30 18.80 17.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.45 0.70 -4.77 3 33 2012-06-28 11:58:33 2012-06-28 12:58:33 1 2 21 0 11 33 0 196.40 68 35.49 NEW +CPMhsARLGcESV+CcPpFIQVSRPlPhhsDusQTPWLLSLRGELVASLEDASLMGLpVDlGATsVTVQSPRQELLQR.E.........PLhhVSGuhhh.hpQAlPLVSpQPtSEVuVHIPKQRLGLVKRGShlEEoLS.RFLcVpQo-oFpVs...Es+DFVlVSIPouslLQsQsCQcuc-oPGTQAFYRVDLSLDFAEMAuPVhWTVEsFFQC .....KCPMlpSRLGQESVHCGPhFIQVSR.PLPLhpDspQTPWLLSLRGELVASLEDA.SLMGLYVDlNATTVTVQSPRQsLLQRhE.............VSGt.....psAhP.s...VShQPESEVLVHIPKQRLGLVKRGSalEETLSLRFLRVHQSNhFhVT...EN+DFVVVSIPAAuVLQVQpCQEssGoPGTQAFYRVDLSLEFAEMAuPVLWTVESFFQC...... 0 1 1 2 +14946 PF15095 IL33 Interleukin 33 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95760 Family \N 27.00 27.00 78.10 27.40 21.50 22.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.77 0.70 -4.94 4 41 2012-06-28 12:02:12 2012-06-28 13:02:12 1 2 23 1 13 44 0 204.20 59 97.66 NEW MKYSTsKISPAKhpsoAuKALV.ssKLRKSQQKscEVCphYaMpLRSGLhIcKcsCYFRKEsTKRaSL+o..tpptpppthslsuppcphpt.......sFshth.hltsastuhthsSIph...lTEasASLSTYNDQSITFVLEDGSYEIYVEDLtKsQEKDKVLLRYY-SQpPSspoGDGVDG+pLMVNLSPTKDKDFhLHANsKEHSVELQKCEspLP-QAFFVLHcpSSpCVSFECKssPGVFIGVKDNpLALIKhtDpsps....NIhFKLS ..........MKYSssKhSsAKhpsoAu+ALs...Klt+SQQK.scElCphYaMpLRSGLhIcKcuCYFR+EhTKR.S.+o..tt....tthshsupppp..................h.thttth...t..Sl......lpE...ASLSTYNDQSloF.shEstuY.I.VEDhtKspcKDKVLLpYY-SQpsSsEo...GDGVDG+hLMVsLSPp..KDFhLHANsKEHSVELpKCEpsLPDQAFFlLHpp.....sSpCVSFECKssPGsFIGVKDNHLALIcs...-.opp.....NIhFKLS....... 0 1 1 1 +14947 PF15096 G6B G6B family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95866 Family \N 27.00 27.00 161.30 160.90 22.00 18.00 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.58 0.70 -4.83 5 39 2012-06-28 12:10:19 2012-06-28 13:10:19 1 1 24 0 16 49 0 203.90 70 89.65 NEW NPGASLDGRPGDRVNLSClGVSHPIRWVWAPSFPACKGLSKGRRPILWASSSGTPTVPPLQPFuGRLRSLDoGIRRLELLLSAGDSGTFFCKGRHE-ESRTVLHVLGDRAYCKA.........PGPTHGSsYP+lL.IPLLGsGLVLGLGsLGhsaWR+RRlPPpP.cPhPRFA...PllsT......EuQRPl+EQDuKhPGcLDQEPuLhYADLDH.sLpR.RRhSsssPuDA.STVYAVVV ......ssGASL-G.RPGDRVNLSClGVSHPIRWsWAPSFPACKGLSKGRRPILWASSSGTPTVPPLQPFsGRLRSLD.sGIRRLELLLSAGDSGTFFCK...GRHE-ESRTVLHVLGD+s.C+s.........PG.PT..HGSsYPQlL.IP...LLGAGLlLGLGALGlVWWh+R+.PPt........Ph.P+hA.....Phsps..........................EsQ+..ss+..c.p-s+hsGc.D...p.EsuLhhu.............s.h.stc..sTl.uhss................................ 0 2 2 3 +14948 PF15097 Ig_J_chain Immunoglobulin J chain Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P01591 Family \N 27.00 27.00 75.90 57.10 17.70 17.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.95 0.71 -4.57 4 36 2012-06-28 12:14:02 2012-06-28 13:14:02 1 2 26 0 21 57 0 126.70 71 78.70 NEW -DEpTVLVDNKCpCsRlTSRIIPSs-sPsEDIVERNIRIIVPLNsRENISDPTSPLRTpFVY+Lo-LCKKCDPVElELssQlhpAoQSNlCsEDs...ETCYTYDRNKCYTThVPhsY+GpT+MVpAALTPDSCYPD ..EDEchVLVDNKCpCsRlTSRII.Ss-DPsEDIVERNIRIIVPLNNRENISDPTSPlRTcFVYHLS-LCKKCDPsEVEL-NQlVTATQSNlCDEDs...ETCYTYDRNKCYTssVPLsYsGcT+MVpTALTP-SCYPD. 0 1 1 6 +14949 PF15098 TMEM89 TMEM89 protein family Bateman A kh6 Jackhmmer:A2RUT3 Family The function of this family of transmembrane proteins, TMEM89, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are approximately 159 amino acids in length. 27.00 27.00 162.40 44.70 20.00 18.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.87 0.71 -4.14 5 25 2012-06-28 12:19:38 2012-06-28 13:19:38 1 2 22 0 14 28 0 126.50 69 84.98 NEW WSRPLWYQVGLDLQP..........WGCQPNSLEGC+uSLGCPGYWMGLGuNRIYPVAGVTITTTMMLllSRslhQRRRSQAoKuEHPQVTssPCssWKRR.uPISDRTLLLGVLHMLDALLLHIEGHLQ+LAoQcQIQIKGTPs ...WSRPLWYQVGLDLQPWGCQPsSlEGCcuuLuCPGYWhGLGus.RIYPVAGVhITTTMMLlhuRhlhpRRRSQATKu..EHPQVTTpPCGPWKRR.sPISDRTLLhGVLHMLDALLlHIEGHLQ+LATQpphQIKGTss.. 0 1 1 2 +14950 PF15099 PIRT Phosphoinositide-interacting protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:P0C851 Family The function of this family, PIRT, is not known, however it is predicted to be a multi-pass membrane protein. This family of proteins is thought to have a role in positively regulating TRPV1 channel activity via phosphatidylinositol 4,5-bisphosphate (PIP2). This family of proteins is found in eukaryotes. Proteins in this family are located in the cell membrane [1]. Proteins in this family are approximately 140 amino acids in length. 27.00 27.00 39.00 38.00 26.80 26.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.85 0.71 -4.92 10 80 2012-06-28 12:20:04 2012-06-28 13:20:04 1 1 40 0 50 69 0 125.90 42 88.69 NEW hssPsu.s-.shhEKssps........ssslslPLVsEsQLTAATGGAELSCYR...CTlPFGVVlLIAGIVVTAVAYoF...NSHGSlIShhGLVLLSuGLlLLAsSAlCWKsRhc+K+c+RRESQTALVsNpRslFs ..........................t..hPKs..pssphp.psps........ssshsls.h..SEspLTsATGGuEhShYR...CIhPFGsllLlhGlVlTuVAYoa..............Nops.....S...l...lphhGlslLShGLhLLssuhLCWpsp.++Kpt+RpcSphhhshpph.Lh.t........ 0 3 6 17 +14951 PF15100 TMEM187 TMEM187 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q14656 Family The function of this family, TMEM187, is not known, however it is predicted to be a multi-pass membrane protein. Members of this family are as yet uncharacterised. This protein family is also alternatively named ITBA1. This family of proteins are found in eukaryotes. Proteins in this family are typically between 239 and 267 amino acids in length. 27.00 27.00 36.50 34.50 21.20 24.50 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.62 0.70 -4.99 10 38 2012-06-28 12:20:47 2012-06-28 13:20:47 1 2 35 0 21 36 1 226.10 56 86.68 NEW ALhHVslssCLClulshTGlFDuVhV-lGY-HYAEpPVss...LPuaLAMPFNSLVNluYlLLGlYWLpppsssspsst.............cAcYlKDVFAhMAlsYGPVQWLRlsTQpRssAVLDQWhTLPIFAWlVsWC.aL-+..GW+sphhLulEslSlsSYuLuLLHspGFElA.LGhHIssAVhpulp....sQtRaGs.ssStpYLsLAVLSCuGFVVLKLhDHpLA+a..cLFQpLTGHFWSKVCDlLQFHauFpFLTs .........................................................................................................AhhHVAluusLCsssVaTGlFDuV.V.pVGYEHYAEsPVsu...LPAFLAMPFNSLlNlAYsLLG.laW.Lp+ssss.stss................+YlKDVFAuMAL.....lYGPVQWLRls....TQh+hsAVLDQWhTLPIFAWsVAWChaL-+.....GWcPWhhL....ulEslSLsSYuLALLHPpGFEVA.LGsHlsu...AVupAL+......................spp+aGs.ssSspYLsL....GlLSCL.GFVVLKLhDHpLApW..pLFQpLTGHFWSKVCDVLQFHFAFlFLTp................ 0 7 9 11 +14952 PF15101 DUF4557 Domain of unknown function (DUF4557) Bateman A agb Jackhmmer:Q8NHR7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved TVF sequence motif. 27.00 27.00 27.20 27.10 25.20 25.10 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.26 0.70 -4.49 5 40 2012-06-28 12:20:57 2012-06-28 13:20:57 1 1 29 0 25 36 0 188.00 62 92.08 NEW FpuQ+AWFSuSVSpDlpphWVcEGGsISD.s+sADFLFSsDASHsDTtRIYpS-DYlcDpATVFHupaLtAssNscSpsSVs..LGHYVL.........PP-s............lQcElRuKIGSF.IWEQDEpFl....lp+c-chsPsc...pslpcpupsos-Hspcpp+Ss-aatTRTslscKspsash+s................YPVNNM..VoGYlSIDAhcKa.GE..L+DFlPGsuGahVh+sscs ......FQGQRGWFCsSVSp-LRQFWVAEGGoISD.s+AADFLFSCDASHPDTLRIYQSLDYIEDNATVFHAYYLSAVANA..EI..KsSVA..LGHFlL.........PPAC............LQK.EIRRKIGSF.IWEQDp+Fl....lcKHDEVsssE..lcshpEsS.cluT-HcKELScSsE+HhhRTPVlEKQMYFPLQs................YPVNNM..VT.GYISIDAMKKFLGE..LHDFIPGSSGYLAYHVQsE............ 0 4 5 6 +14953 PF15102 TMEM154 TMEM154 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6P9G4 Family The function of this family of transmembrane proteins has not, as yet, been determined. However, it is thought to be a therapeutic target for ovine lentivirus infection [1]. This family of proteins is found in eukaryotes and members are typically between 138 and 320 amino acids in length. 30.30 30.30 30.30 30.30 30.00 29.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -11.17 0.71 -4.49 7 50 2012-06-28 12:21:22 2012-06-28 13:21:22 1 6 36 0 27 44 0 129.00 39 50.53 NEW sE-sppSuc.slpstsh.csos...................soshuoVT.sE.....slssshs.sph...pssssQhEh...............lLMlllP.llLsLLlL.llhllhhh+R+RsKpc..p.spppt...p-lssEslh.P............IFEEDTPSVMEIEMEELDKWMNSM.....N+NADhEsL ................................................................................ph....................sshsulT.sp.....slssshssop..h....st-ps..QhEF..........................lLMVlIPh.lLLsLLlLsV.lhlshhh.+RKRs.Kp-soSpGSpssLQ.o.E.lusEsl+sP............IFEEDTPSVMEIEMEELDKWMsSh.....N+Nssh-t...................................... 0 6 9 14 +14954 PF15103 G0-G1_switch_2 G0/G1 switch protein 2 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P27469 Family This family of proteins regulate apoptosis by binding to Bcl-2 and preventing the formation of the anti-apoptotic BAX-BCL2 heterodimers [1]. 27.00 27.00 66.30 66.20 21.80 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.47 0.72 -3.79 8 46 2012-06-28 12:22:18 2012-06-28 13:22:18 1 2 34 0 23 43 0 103.30 54 90.29 NEW METlpELIPhAKEMhuQKPSRKhVKLYlLGSVLAFhGsVlGLVETVCSPFTuupcL.DpEtA..lAEhcsAhERptlppp...........slhEcsKpppss.psRulSpRpHA.S METlpELIPhAKEMhuQKPstKMVKLYVLGSVLAhFGVVlGLVETVCSPFoutupL.DpEtt...lsEhcsA..htRpthppp......................hlhEpuKppsss.tpRulSpRpHAS.............. 0 1 3 9 +14955 PF15104 DUF4558 Domain of unknown function (DUF4558) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VU69 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 78 and 121 amino acids in length. One member is annotated as being a flagellar associated protein. 27.00 27.00 27.40 27.00 26.20 26.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.86 0.72 -4.06 11 43 2012-06-28 12:22:34 2012-06-28 13:22:34 1 2 40 0 28 44 0 85.30 39 79.28 NEW pchsptcchspc.cpssphs.WpEphppp....ssht.sspph...pcphcpElplAN+plhtlRpAtL+cLaEcEtppaEQELstpGhAhYccRt ..................pchhthcKhlp+hcssppcshWp.sLspR......cssh.Ahl+h...pcshspELsLAsKpLLhVRQAtL+pLFE+EappYpQELsph.GKAFYhER.... 0 10 15 16 +14956 PF15105 TMEM61 TMEM61 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N0U2 Family The function of this family of transmembrane proteins has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 150 and 211 amino acids in length. 27.00 27.00 27.80 41.90 25.60 23.80 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.28 0.71 -4.78 3 25 2012-06-28 12:22:48 2012-06-28 13:22:48 1 2 21 0 12 21 0 172.70 63 95.11 NEW ASTLRYCMTVSGTVVLVAGTLCFAWWSEGDAusQPGQLAPPTEaPlPEuPsPLLRSVSFlCCGAGGLLLLlGLLWSIKsST+GPPRtDPYHLSRDLYYLTVESSEKESYRsPKVsAIPTYEEAVsCPLAEGPPTPPAQPsEEuLEs+AScDALLGoQss.PPPSYESIlLAtsAVSGpTs..PSPGpSCsGLlQsARGGs .ASTLRYCMTVSGTVlLVAGTLCFAWWSEGDA......usQPGQhA...P....P.TtaPlPEuPusLLRSVSFhCCGAGGLLLLhGLLWSlKASopGPPRWD.YHLSRDLYYLTVEoSEKESCRoPKlssIPTYEEAVsh..PlA..EGP.PTPPAhPhEEsLcsSA.....stDALL...........uTQsshPPPSYESI.hAhsulSuETs...usstoCsG.sphstGG....... 0 1 1 4 +14957 PF15106 TMEM156 TMEM156 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N614 Family The function of this family of transmembrane proteins, TMEM 156, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins are found in eukaryotes. Proteins in this family are approximately 310 amino acids in length. In humans, the gene encoding this protein is located in the chromosomal position, 4p14. 27.00 27.00 61.40 61.10 18.90 18.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.47 0.70 -5.12 6 34 2012-06-28 12:23:08 2012-06-28 13:23:08 1 1 28 0 19 29 0 213.80 56 78.32 NEW EVCLQsNFTaSLsSlNFSFVTFLQPl+ETQTI.MGIFLNHSNFQNFTcICQsITuEhKhCSsCLsCESKGshDFISQEQTS...KVLlMRGShEVKusDFHSPCQHFNFTsAPhsDcLEEYNhTCpLKTHssRSsIhEE-Ps+EpSlN+TC..RhMEp.NNChpISL+LEMDVKNssCSMKITWYlLVLLVFIhhlIllI+KILEuHRRVQKWQSHKYKPTSsLLRGuDSEK ..EVCLpsNhTYSLsSlNaSFVTFLQPlRETQsI.htIFLNHSNFQNFTRICQsITuEhKhCSpCLlCESKGshDFISQEQTS.................KVLIh+GShEVKAsDFpSPCpHFNFTVAPhVDcLEEYNsTCpLKsHTt+SslhE--Ps+cpSlN+TC..RhMEh.NsCh+ISLHLEMDlKNhoCSMKITWYlLVLLVFlFLlILhI+KILEuHRRlQKWQ..S..H+.+sTS.sLLRGpDSEK.................. 0 1 1 3 +14958 PF15107 FAM216B FAM216B protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N7L0 Family The function of this family of proteins, FAM216B, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins are found in eukaryotes. Proteins in this family are approximately 150 amino acids in length. In humans, the gene encoding this protein is located in the position, C13orf30. 27.00 27.00 35.30 35.70 25.80 24.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.51 0.71 -4.20 6 56 2012-06-28 12:23:32 2012-06-28 13:23:32 1 2 26 0 25 61 0 106.40 50 55.52 NEW Mspchp+.QcLhNlP+IPpI+VPpShsDTSLLK..DLTQGQpRYhYSIMRIYNSRPQWcALQsRYlHSLQHQ.QhLGYITQ+EAhusAhVLRcST+RASApsuP....Rol.pRssuho ..........hsppht+.QcLWpsPQ..pI+lPpShh..DsShhK..DLTpGQpRYhhSIh+IYNup..hphhpppYhHsLQHp.Q+...G.........hlTp+cuhhs.......o+huptphhP....Rp..c+psuh....................... 0 2 2 3 +14959 PF15108 TMEM37 Voltage-dependent calcium channel gamma-like subunit protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8WXS4 Family This family of transmembrane proteins, TMEM37, has a role in stabilising the calcium channel in an inactivated (closed) state. It is a subunit of the L-type calcium channels. This family of proteins are found in eukaryotes. Proteins in this family are approximately 210 amino acids in length. 27.00 27.00 123.00 122.80 25.00 24.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.36 0.71 -4.59 6 38 2012-06-28 12:23:58 2012-06-28 13:23:58 1 1 31 0 23 39 0 179.50 68 93.16 NEW QAQRLLupRRPp+uFFESFIRuLIILCsuLAVVLSSISICDGHWLLAcD+LFGLWaFCTsSNp...........ousHClRDLSpApVPGLAsGMlLARShuoLAVVsAIFGLELLhVSQVCEDhcSR+KWAhGSsLLLlSFlLSuGGLLSFlILL+splTLhGFTLMFWCEFTASFLFFLNuISGL......HINSlTpP ..........QAQR.LuQR+PpRSFFESFIRoLIIlCsALAVVLSSlSICDGHWLLA.ED+LFGLWHFCTsoNp...........osspChRDLupApVPGLAVGMuLsRSlGALAVVAAIFGLELLMVSQVCEDtHSRRKWuhGSlLLLlSFlLSSGGLLoFVILL+NQVTLlGFTLMFWCEFTASFLFFLNAISGLHINSITpP....... 0 1 3 7 +14960 PF15109 TMEM125 TMEM125 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q96AQ2 Family The function of this family of transmembrane proteins, TMEM125, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 55 and 232 amino acids in length. 27.00 27.00 30.80 63.40 22.50 20.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.54 0.71 -3.96 6 45 2012-06-28 12:24:17 2012-06-28 13:24:17 1 1 33 0 25 36 0 109.40 73 53.85 NEW slLEEQVELWWFp-P++SLLCYssuVsLILuCGhGGVGLLSTToShSGE...WRLusGTsLCLLALsVLLKQLLSSAVQDMNClRsR+RI-hLKSGGhuDsLllLloGLsLLlCG .slLsEQVELWWSQpPRRShLCFsVAVuLVsGCGAGGVuLLSoTSSRSGE...WRLAsGTsLCLLALLVLlK.QLhSSAVQDMNCIRpt+HVuLLRSGGGADsLVVLlSGLVLLVsG 0 1 4 11 +14961 PF15110 TMEM141 TMEM141 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q96I45 Family The function of this family of transmembrane proteins, TMEM141, has not, as yet, been determined. Members of this family remain uncharacterised. TMEM141 protein family is found in eukaryotes. Proteins in this family are typically between 103 and 124 amino acids in length. There are two completely conserved residues (C and W) that may be functionally important. 27.00 27.00 27.00 34.80 20.00 26.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.16 0.72 -3.70 12 53 2012-06-28 12:24:45 2012-06-28 13:24:45 1 1 44 1 29 54 0 88.90 47 74.94 NEW ssI..p+lc-t.tsKHPGhspYhs......C.o+AhhpGluTFsLGhuusahlQ+hlp++lPYPhpaNlLVSslsuoVsSYpVTphcTppCpshWhhhEstp ............l..p+lcDt.tsKHPGhtpYhs......C.S+AhhcGlhTFlh.GouusFhlQhhlp+KhPYPhQWslLVusls..uSluSYtVTpsEop+CsslWlaLEsG.p.......... 0 7 10 17 +14962 PF15111 TMEM101 TMEM101 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q96IK0 Family The function of this family of transmembrane proteins, TMEM101, has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 127 and 257 amino acids in length. 27.00 27.00 37.30 30.80 21.40 21.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.54 0.70 -5.08 7 47 2012-06-28 12:25:10 2012-06-28 13:25:10 1 1 42 0 29 47 0 235.60 71 95.47 NEW ssR+phLphhsplGsFllTRaPFWpsFshLMhaAERA-s++hP.....DltlP...al.YlDLusAVlCASFMSFslKRRWFAlusAlQLs.ISshhuhhutpshYu-WLKVRhaSRslAlIGGaLhluSGsGEhYRp.KPRoRSLQpTGplFLGlYLIC.AYsL.aS+EDRhAaLpHIPGGp.hl.lhhVlaslLuLuaLSGa.hphhuplLAllLshshLllDGslsYWa+o+.+VEFWsQh+LlucNluIFGAllIL..AsDu .......u..R+hhLpLlhQlGuhLLTRhPFWsCFS.LMLaAERA-u+RKP.....DIPVP...YL.YF.DMGAAVLCASFMSFGVKRR.W.FALGAALQLA.lSTYAAYlGGaVHYGDWLKVRMYSRTlAIIGGFLVLAS..GAGELYR+.KPRSRSLQSTGQVFLGIYLICVAYSLQHSKEDRLAYLNHLPGGELMlQLaFVLYGVLALAFLSGYYVsLAAQILAVLLPsVhLLIDGNluYWHsTR.RVEFWNQMKLlGEsVGIFGAAVIL..ATDG......................... 0 7 9 13 +14963 PF15112 DUF4559 Domain of unknown function (DUF4559) Bateman A agb Jackhmmer:Q8TB03 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein CXorf38. 27.00 27.00 65.20 27.60 25.40 23.40 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -11.92 0.70 -5.26 8 62 2012-06-28 12:25:15 2012-06-28 13:25:15 1 7 37 0 32 49 0 241.90 51 64.70 NEW RhNstsYKNWLKsGpuLLlLRsuLQsFlt+EscsaHpuLpsKlss..ssppCp....Css+uRp.pstCplCc.W+cEIlpaHsscsucIHWsNCcPstW...ss-hWEVAKAFMPRGp.sD+sGPEcsDsSALLNFlstCcHFph...chcpVpcVIcVRNclMHSsDLKhSspshpcahsKIpphlp...hsslPslptstccIpplpss-apltssptspcDGsclpT-sh.shpclL-lEpctLcD+lpcLhsphEpsps.s.Echhpslpslh-FLpsNpDLppsLpscl.......sKLpplps+lpKh-tpls-l+sphsQL ..................RLNsspYKNWlKAGpCLLlLRssLpsFlspEshsFHptLlsthss......spssCpt..tCsP+u+phpspCpl...CtpW+pEILpHHhscsus...lpW.uNCcPshW...ss-...WEVA......KAaMPR..Gh..sc+pu..s-pCDAsALLshlstCc+Fhs...DtcpVpc.VIcsRNc.lMHSu-hKVSstWhp-at.+IpsFLs..pFpplP-lssshp+IEplLssDaslahsttDphD.....Ghch-htshls.pp....lp..-lEhphL+E+LpEhhhphccpphhs..Ec..pplpsltpFLcpNcDLppsLpt-h.......pKLpsh..p.phtp...............t................................................... 0 6 9 16 +14964 PF15113 TMEM117 TMEM117 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H0C3 Family The function of this family of transmembrane proteins has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 181 and 504 amino acids in length. 27.00 27.00 32.60 32.60 19.00 19.00 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.54 0.70 -5.88 4 63 2012-06-28 12:25:37 2012-06-28 13:25:37 1 2 39 0 38 51 0 317.70 70 79.75 NEW t+DFRYYFQHPauRLhVAYLVlFhNFLlFAEDPVSHSpTEAplIVVGNsFSFlssKYP.GhGWplLKVlhWLLAllhGLlsGKFlhH+hLFGpLLRLKMFREDpGSWMhMFhoTllFLFIFSpIYNhhLLhAGs..tsahIoDhMGIRNpSFMKhAAlGTWMGDFVTAWMVTDMMLQDp.YPsWA+usRtFW+p.GpsRIILFWoVLloLToVVVhVIoTDaISWDhLNRGFLPosElSRAFLASFILVhDLLIVMQDW-FPHFMGDLDlpLPGhpTsHh+h+lPhppplaK-EapIHITGKWFNYGIIhLVlILDLNMWKNQIFY+Pa-YGQYlGPssKIaTVc-.-oLts..hNRTpLTa-WRuNphsPcTNcoYlEcDhahHSRYlGhoLsVKsLAFlPSLhAFVhFGhhIWhaGR ..........................s..cFRYYFQHPWSRhllAYLVhFhNFLIFAEDPlSHSQTEAphhVVGNCFS..FlhsKYP.uhGWthLKVlhWl....LAIlhGLlAGKFlFHp...pLFGph....lRLKMFpED.cGSWMsMFhSsllhLFhFSplYNhhLlhtGs.htsahlsphMGIcNpsFMK..hAAlGTWhGDFlTAW.M.VTDMMLQDp.......YPDWG+uAR.....tFWK+..Gp.RIhLFWoVLhoLTSVVVLVIoTDWISWD+L.......NRG.....FL...PSDEVSRAFLASFILVFDLLIVMQ.DWEFPHFMGDlDl.NLPGLpTsHhpF+lPhhp+IFKEEY+IHITGKWFNYGIIFLVLILDLNMWK.NQIFYKPaEYGQYlGPGp.KIYTVcDsEoL+D..hNRTpLSWEWRSNpTNPpTN+TYsEuDMFLHSRahGuSLDVKCLAFlPSLlAFVhFGFFIWFFGR................. 0 8 12 19 +14965 PF15114 UPF0640 Uncharacterised protein family UPF0640 Bateman A agb Jackhmmer:Q8WVI0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 70 and 80 amino acids in length. There are two conserved sequence motifs: PGK and YRFLP. 27.00 27.00 66.50 66.40 26.60 25.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.37 0.72 -4.50 10 56 2012-06-28 12:25:59 2012-06-28 13:25:59 1 1 50 0 38 56 0 68.10 56 89.06 NEW Mhh.+..Spsl+plLcpWPGK+phGlYRFLPlFFsLGAuhEasMINhRlGcpsFYcsY+R+QAE+hhEp+hcp .................h..ptpl+RlLpphPGKpRFGlYRFLPhFFVLGushEWhMIplR.V.Gc.EoFYcsY+R+puEc.hpc+lcp...... 0 7 11 22 +14966 PF15115 HDNR Domain of unknown function with conserved HDNR motif Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VZQ5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 219 amino acids in length. There is a conserved HDNR sequence motif. The function is not known. 27.00 27.00 35.40 35.30 20.10 19.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.34 0.71 -4.34 6 73 2012-06-28 12:27:26 2012-06-28 13:27:26 1 2 36 0 47 64 0 150.10 36 84.54 NEW Ms+uhh..ssssctcGpWFsphh........cuHhsoph+psas..apc...pscspsPshFtpRpKpssspph........FShHDNRHoFpspGs...YFssGLGK++hs.sp.....p+t+.SpNhhpWss..hsuspDs..hSoYpsuaht.......cpss.ssthRphPRhhscp.puuphtssppsspa..hsppPcs.........chslstpsp .......................shsGpW.asphh.........puhh..puphKp.psschpp...pscsp.PslathRpKpsssppF.........FShHDNRaSapssGh...Y.L.spGlG.RKpl........p+.t.p+sF.hWAss...l..p.p.t...Ss.Qhsah.......................................................................s................................................. 0 11 12 17 +14967 PF15116 CD52 CAMPATH-1 antigen Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P31358 Family \N 27.00 27.00 40.70 40.10 19.80 17.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.08 0.72 -4.15 4 22 2012-06-28 12:27:31 2012-06-28 13:27:31 1 1 17 0 8 20 0 44.90 48 65.52 NEW LGpsTTut.sop...........+SussAhusLuGGShLFFlANTLIpLFYLS ................GpssTot.........................pSussA.SsluGGsFLFFlANslIpLFhhS 0 1 1 1 +14968 PF15117 UPF0697 Uncharacterised protein family UPF0697 Eberhardt RY, Coggill P, Hetherington K, Bateman A agb Jackhmmer:Q96E16 Family This family of uncharacterised proteins is found in vertebrates. Proteins in this family are typically around 100 amino acids in length. 27.00 27.00 61.40 61.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.29 0.72 -4.23 4 50 2012-06-28 12:28:25 2012-06-28 13:28:25 1 1 39 0 30 41 0 93.30 77 93.22 NEW uDsGoIDYSVHEAWNEATNVYLlVILVShuLhMYAR+NKRKIMRIFTlPPTAEossEsNFYDohpKIRLRQQLEMYSIARKa-.Q..Q...tQsDSVQLSlE ....uDDGSIDYoVHEAWNEATNVYLlVILVSFGLFMYAKRNKR+IMRIFSVPPTtETLS...EPNFYDTlSKIRLRQQLEMYSISRKYDhQ.QPQ...sQuDSVQLSlE.......... 0 2 4 12 +14969 PF15118 DUF4560 Domain of unknown function (DUF4560) Bateman A agb Jackhmmer:Q96HG1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 66 and 78 amino acids in length. There are two conserved sequence motifs: FCK and RTL. 27.00 27.00 74.90 74.70 22.60 22.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.99 0.72 -4.17 5 46 2012-06-28 12:30:52 2012-06-28 13:30:52 1 1 23 0 30 64 0 62.80 76 75.50 NEW AsALSGLAVRLuRoAAsRuSYGVFCKGLTRTLLsFFDLAWRLRMNFPYFYIlASVILNVRLQV+I ........AAALSGLAVRLSRSAAsRuSYGsFCKGLTRTLLsFFDLAWRLRhNFPYFYIlASVMLNVRLQVhI.. 0 1 1 4 +14970 PF15119 APOC4 Apolipoprotein C4 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P55056 Family \N 27.00 27.00 58.70 58.60 24.60 19.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.27 0.72 -4.02 6 27 2012-06-28 12:31:28 2012-06-28 13:31:28 1 1 21 0 14 24 0 92.60 62 78.88 NEW CQ.t.sEto.SPsPtPcpS+.WSLVPu+VKEhVpPLVTRTRE+WQWFW..GPuAFQGFhQTYYDDHL+DLGsRT+AWLpSSKDsLLNKAHSLCPRLlCGD+D .....................................h.pto.SPsPt.c.Sp.WSLVpu+hKEhlEslVsRTR-pWQWFW..uPusFpGFhQTYY-DHL+DLGPRT+AWLhpSKDSLLNKTHSLCPRLlCGD+D.. 0 1 1 2 +14971 PF15120 DUF4561 Domain of unknown function (DUF4561) Bateman A agb Jackhmmer:Q96E40 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 34.30 33.30 24.10 21.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -10.90 0.71 -4.79 6 65 2012-06-28 12:32:32 2012-06-28 13:32:32 1 3 39 0 34 54 0 160.90 60 76.91 NEW MsEl+cpL+hlpQ+YKLFpQQQFTFlsALERsREsAaD+hcPVuoIsQVQpYh-HaCsNuTDRRILhLFLDICsDLsshpp+lEsLp......SssssosphL-pC+sLls.SNDlSslRA+YPHDVVNHLSCDEARNaYGGVVSLIPllLDhlpEhht..psc+Lt.tpp.ssstppp ................MNEVKEsLRslEQ+YKLFQQQQFTFIAALEHCRENA..HD...KIRP.ISSIuQVQsYMEH....aC.....NNSTDR.....RILlMFLDICoELs+LCQ+FEuLH................SGTPVTNsLLEKCKoLVSpSNDLSoLRA...KYPHDVVNHLSCD........EARNHYGGVVSLIPllLDlhKEWIA..+oEKLPp...........th...t......... 0 8 10 18 +14972 PF15121 TMEM71 TMEM71 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6P5X7 Family The function of this family, TMEM71, is not known, however it is predicted to be a transmembrane protein. This family of proteins is found in eukaryotes and located in the cell membrane. Proteins in this family vary between 41 and 291 amino acids in length. 27.00 27.00 27.00 39.90 26.90 26.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.98 0.71 -4.47 10 47 2012-06-28 12:32:51 2012-06-28 13:32:51 1 1 32 0 21 39 0 131.30 62 55.16 NEW MY..phStlhSTPVussopp-tththp.SPsslhsSasCD.LD......GDSSFECsSlDPLTGShasCRRSPRLLTNGYYlWTEDSFLCDcDGNITLoPSQTSVhYKENLVRIFR+K+Rh+RSLuSLhS..hpAScSWL+GSIFscV................sSsPSEDsWLEGsRp ............MYRlopLMSTPlASpst....pt..tphosp.plhsS.FsCD.LD......GDpSFECsSlDPLTGSaasCRRSPRLLTNGYYlWTEDSFLCDcDGNITLsPSQTSVhYKENLVRIFR++++hp+shuSLFs..lssScSWL+uoIFscl................cS.s.sEDsWL-Gh+p...... 0 1 3 6 +14973 PF15122 TMEM206 TMEM206 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H813 Family The function of this family of transmembrane proteins, TMEM206, has not, as yet, been determined. Members of this family are remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are approximately 350 amino acids in length. 27.00 27.00 149.50 149.10 19.20 16.40 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.82 0.70 -5.51 3 51 2012-06-28 12:34:56 2012-06-28 13:34:56 1 2 39 0 31 44 0 287.80 75 84.57 NEW PIRFSKACLKNVFSVLLILIYLLLMAVAVFLVYQTITDFRDKLKHPVMSVSYKEV-cYDAPGIALYPGKA+LLSCcHHaYDsIPPLsuPGQPG-RsCsTQ-IsYpcPYoN+TMK+ALIVQGPpDVR+RELVFLQF+LNETcEDFSAIDYLLFSSFc-FLcSsDKAuFMQDCESuYSSWKFSGGFRTWVKMSLVKTKEEDGpESVEFRQETSVVNYID+RPssE+osQLFFVVFEWKDPFIQcVQDIITANPWNTIALLCGVFLALFKAADFAKLSVKWMIKIRKRHLK+RuREhNHIS ..........SIRFSKACLKNVFSVLLIFIYLLLMAVAVFLVYQTITDFREKLKHPVMSVSYKEV.DRYDAPGIALYPGQAQLLSCKHH.Y-VIPPL..suPGQPGD....hsCTTQRINYTDPFSNQThKoALIVQGPpEVKKRELVFLQFRLNposEDFSAIDYLLFSSFQ..EFLp...S.........P-+sGFMQuCESAYSSWKFSGGFRTWVKMSL.VKTKEEDGREAVEFRQETSVVNYIDQRPAAE+SsQLFFVVFEWKDPFIQKVQDIITANPWNTIALLCGAFLALFKAAEFAKLSVKWMIKIRKRaLK+RuQAsNHIS......... 0 2 4 12 +14974 PF15123 DUF4562 Domain of unknown function (DUF4562) Bateman A agb Jackhmmer:Q96LM5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved HRYQNPW sequence motif. This family includes the human protein C4orf45. 27.00 27.00 41.50 41.10 18.10 17.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.46 0.71 -4.21 9 43 2012-06-28 12:35:21 2012-06-28 13:35:21 1 1 30 0 24 42 0 109.70 47 59.28 NEW lFTGPDhl+Dahs+lhpassYIG.tp.u.EtTuDLpYLWRPAsspshPhthKpchlGEIGWGIs.ashhs+p+Lpo....GhpIKhGEhppAs.D+hTHRYQNPW.Pt...P.hl-tpsthuR ....lFTGPDYl+DahPKlppaTsYlGEpp.uLEpTuDLcYLWRPAsspShPschKacYVGEIGWGlPpasalN+oRLpo....GFpIKhsEhppAu.DphoHRYQNPWQPpPplhDhpst.S............................. 0 10 11 13 +14975 PF15124 DUF4563 Domain of unknown function (DUF4563) Bateman A agb Jackhmmer:Q96PS1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C3orf24. 27.00 27.00 120.30 27.30 19.00 18.70 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.23 0.71 -4.70 3 35 2012-06-28 12:37:45 2012-06-28 13:37:45 1 2 29 0 24 35 0 167.80 73 99.93 NEW MAsYQLWSPWSPLDENLQWLRHTTPThuSKHPFRuSPsFPaTPuDVEVQtCFHEVslVpDpPhlcAG+SPcLPsHshEP+..ThsNhpupIRKPQPIRLlGVDSVFGRVITuQPPKWTGTFRVS-KSAFSKIIS+EpQWPpGLKEPQIEMTlsMCKQMLRSILLLYAIYKKCTFALQHSK ......MAGYQLWSPWTPLDESFQWLRHTTP...TPSS.KHPF..RASPCFPHTPSDLEVQLCFQEVTLVLDSPhLEsGhSP..KLPC........HTSELR..ThsspKGLVRKPQPVRLSGVDSVFGRVITAQPPKWTGTFRVSDKSAFCKIISREHQWPTGLKEPQIQMTVTMCKQMLRSILLLYATYKKCTFALQHSK................... 0 1 2 10 +14976 PF15125 TMEM238 TMEM238 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:C9JI98 Family The function of this family of transmembrane proteins, TMEM238; has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 61 and 153 amino acids in length. 27.00 27.00 29.80 29.30 26.10 25.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.25 0.72 -4.07 9 40 2012-06-28 12:38:05 2012-06-28 13:38:05 1 1 24 0 26 43 0 67.20 55 48.09 NEW GRCphhhh.lAVlhDslGlslLLlGlFAsL.....sFaDhLlYoGALllhLSLlhWlhWYoGNIE..ls.cEL ....GRC+hhLh.LAVshDlsGhssLLsGVFApL.....sFhDhLlYoGALllFLSLLhWIhWYTGNIE..lohpEL..... 0 2 6 13 +14978 PF15127 DUF4565 Protein of unknown function (DUF4565) Bateman A agb Jackhmmer:Q9BSF0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C2orf88. 27.00 27.00 35.00 35.00 22.80 18.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.07 0.72 -3.65 5 31 2012-06-28 12:40:36 2012-06-28 13:40:36 1 1 29 0 18 24 0 91.00 58 99.30 NEW MGCMKSKcphPhssThcu-K.....s+cuEEAa..tchh..........lsssEEs+cPsuPp..huPVLLEYAcRLSEEIVs+AVQQWAElDp+YuDIPYIESDuP MGCMKSKQTFPFPTshEuEK.....pH-SEEsF.MPEE+hL.+....st.lslpEElKcP..suss....sVlLEYAHRLSQEILsDALQQWAssNlKYtDIPYIESEuP. 0 1 2 4 +14979 PF15128 T_cell_tran_alt T-cell leukemia translocation-altered Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P57738 Family This family of proteins is required for osteoclastogenesis [1]. 27.00 27.00 43.00 32.50 20.20 24.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.26 0.72 -4.54 4 48 2012-06-28 12:40:48 2012-06-28 13:40:48 1 2 35 0 24 42 0 84.70 61 85.78 NEW opllsuh.uFhSEFlc-W.usDMRVoIFKlLLuWLVlSLlAIphAW+sYGNTVNshYYRQGhuGQNGGTPDsss+hsuWEpuus-sLKTHpE .......splLsuLsuhuSEFlcEWEApDMRVoLFKLLLhWLVLSLLuIQLAWthYGsTVTGLYaRs................GhG.G..QNGuTPDsusHFsuWE.susEshKTHRE..................... 0 2 4 9 +14980 PF15129 FAM150 FAM150 family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6UX46 Family This family of proteins known as FAM150 is found in eukaryotes. Members of this family are as yet uncharacterised. Proteins in this family are approximately 143 amino acids in length. The function of this family has not, as yet, been determined, however it is predicted to be a secretory protein family. 27.00 27.00 50.70 30.50 18.50 23.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.81 0.71 -4.24 6 65 2012-06-28 12:42:58 2012-06-28 13:42:58 1 1 30 0 34 58 0 92.10 66 77.03 NEW -cpoLLcLIh-llp-hpppcpspspp....l..hupcspsss+ccsttlsshssEphlEIhPRDLpMKDKFlcHLT.GPLYFSPKCpKcFHRLYHNTRDCTIPAYYKRCARLLTRLAsSPhChE .............................................................................................................................t.ppps..EIhPRDLphKDKFlKHLT..GPLaF.SPKCSKHFHRLYHNTRDCTlPAYYKRCARLLTRLAVSPhChp........ 0 2 7 16 +14981 PF15130 DUF4566 Domain of unknown function (DUF4566) Bateman A agb Jackhmmer:Q9GZU0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein C6orf62. 27.00 27.00 177.20 61.80 18.70 18.30 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.68 0.70 -5.20 2 54 2012-06-28 12:43:17 2012-06-28 13:43:17 1 3 40 0 33 38 0 209.50 86 92.76 NEW M.........uDP.oRKppslsRLRtpLR+K+ESLADpFDFKMYluFVFKDKKKpsALFEVs-VIPVMTNNYc-sIh+GV+-puYSLESS.ELLpKDVVQLHAP+YpsMR+DlIGCsQ.hDFhLWPRpDI-KIVChLFSRWKts.DtsaRPVQscFEFcHhDYEKQhLHlLuR+DpTGlllNNPoQShFLFlDRppLpTPpNKAshFKLsSlCLal.Q-QLhHWssGoI-chLc.YMP..p ................MGDPNSRKKQALNRLRAQLRKKKESLADQFDFKMYIAFVFKEKKKKSALFEVSEVIPVMTNNYEENILKGVRDSSYSLESSlELLQKDVVQLHAPRYQSMRRDVIG.CT...QEMDFILWPRNDIEKIVCLLFSRWKtS.DEPFRPVQAKFEFHHGDYEKQFLHVLSRKDKTGIVVNNPNQSVFLFIDRQHLQTPKNKATIFKLCSICLYLPQEQLTHWAVGTIEDHLRPYMP...E................. 0 3 5 15 +14982 PF15131 DUF4567 Domain of unknown function (DUF4567) Bateman A agb Jackhmmer:Q9BTX9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in some mammals. 27.00 27.00 37.50 115.50 19.90 19.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.42 0.72 -3.84 4 11 2012-06-28 12:52:58 2012-06-28 13:52:58 1 1 11 0 6 2 0 75.90 76 84.43 NEW pRLDEs.AsLRLQHaLQLtEGLAVPLPPLVVpuPAAHaVAGGuLuDFTLDIALGARRItLAhVRQVAQDGPVAFLA .RRLDEsPAsLRLQHHLQLREGLAVPLPPLVlQSPAAHHVAGGShuDFTLDIALGARRVRLALVRQVsQDGPVAFLA 0 1 1 1 +14983 PF15132 DUF4568 Domain of unknown function (DUF4568) Bateman A agb Jackhmmer:Q9H693 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 51.10 51.00 21.10 19.40 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.71 0.70 -5.13 4 25 2012-06-28 12:54:36 2012-06-28 13:54:36 1 1 20 0 13 23 0 132.50 44 81.24 NEW GsGh.psWha.......GRs+h+sLschChu+hss.VspTVHpEsICCECpsKFGGaLPVPRA-AsLPYWVPLSLRPpKQIQKMVRhYIPKooKAC.CPCHpFGGRLPMPRDQAVMPYWVPQVLRSpKKVVKRQQshcslPEsslDlRShYspWRICG-G+hLLKWQQLQALHQsc..PlAsGpPtSh.AsLLPlshSLLTLLQAlLRVllAIRpLFWs ...................................................................................slCCECQs+FGGRLPVsRsEAALPYWVPLSLRPRKQh.+h..hph.................................................................................................................................................................................... 0 1 1 2 +14984 PF15133 DUF4569 Domain of unknown function (DUF4569) Bateman A agb Jackhmmer:Q9HAI6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein CXorf21. 27.00 27.00 38.20 36.40 19.90 19.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.96 0.70 -5.03 8 57 2012-06-28 12:56:17 2012-06-28 13:56:17 1 1 36 0 37 43 0 279.10 45 97.54 NEW MLuEGaLopLsYpsphcts....hpopsppsspppthcpp.ssh...shushDcsphcslhsuscsstch.uuVHspts.ps.p....tphlpsspsPshpussSsulcIPccs.cps.thaLVPS.SC..cSICpNYsDLHIuGDpVhslsssuGsh..ssssshpcGPlLhSsDIP.uhpsphps.sp.h....h.pshSShW+ssps+E+.ShL.ppc..PlSNSlLNsYLEpKlhELYKQYlhEsss+suSs........splLuSELlMTNVDQISlQlSREpNlEToKA+DhlLssL....LplsSph.pSEISTPsLpISs.us ................MLuEuaL.slhYhpphahs....hsshsppsstcct.cpp.ht....ohSSs-csp.cshhhpspossch.oolpsptsppshp..pphsslpss..ssshpuss.sAlpIs+csp+-p...aLVPs.SC..cSIC+NYsDL+IAGspVhshsssossF...scsshp.GPLLpSs-IPLshEsulssp.s.h..hsh.pphSShW+hsSI+EK.S.L.hpp..PlSNulLNcYLEpKVsELYKQYlh-slh+suSs........TplLASELlMosVDQISLQlSpEpNlEsoKA+DhVlspL....LphsS....oElSTPsLpISp.o.......... 0 1 4 11 +14985 PF15134 DUF4570 Domain of unknown function (DUF4570) Bateman A agb Jackhmmer:Q9HBI5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 29.00 28.70 24.00 23.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.43 0.72 -4.21 9 41 2012-06-28 12:59:01 2012-06-28 13:59:01 1 1 37 0 28 40 0 107.30 54 85.97 NEW MsS.hsp........El+Luc+HEEILupRthLLppMEs+hpspppc+ppphpsspsAppRNtpLLpDl-ssEcpLpsR...hscPslluLETpYWASVEc......lPtWEpaLLuRu.hPhu .............MoShasQ........El+LSKRHEEIlSQRLMLLQQMEs.+huDQpsE...KA.SQhQusEsAa+RNhoLLpDIEAAEKSL..QoRhaslPpP.ElVoLETpYWASVEEa.....lPKWEQFLLGRAsYPh.u..................... 0 5 7 11 +14986 PF15135 UPF0515 Uncharacterised protein UPF0515 Bateman A agb Jackhmmer:Q9NUL5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There are two conserved sequence motifs: PLT and HSC. 27.00 27.00 39.80 39.80 25.90 25.40 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.11 0.70 -5.33 4 53 2012-06-28 13:38:07 2012-06-28 14:38:07 1 2 34 0 27 52 0 227.20 68 88.44 NEW REKFHG+lu.csAssLMRRasssHptVut.lshhscscssl-hpsphcLpssP.uh.V..thht--.ptpt..t......................D+DIpslA.p+hshLPLTpcNl+MFscAptshIPu.s+QFACcuCDhhWWRRVPQRKcVSRC++C+K+aDPVP.ss+MWGluEFpCspC++pF+Ga.uphsspSPCYGCppsl....YPh+ILPPR..Rs....ss.+opNpHSChAE.CYpRhEPaVPGspCsHP+SRptNphPKVlaPS.hHISoGSTluTCLSQGSL.E.-lspLIL-Dl ..........REKFHGKV.osc+AssLMR+FuSDHTGVGRSIVYsVKQcDGQ-LSNsLDAQDPP...........EDhKQ................................DpDIQAVA...TSLLPLTcsNLRMFQRA.Q-DLIPAVDRQFACSSC.DHVWWR..RVPQRKE.VSRCRKCRKRY-PVP.sDKMWGlAEFHCPKCRHNFR....Ga.AQMGosSPCYGCGaPV....aPTRILP.PRhDR-....scRRSsHTHSCS.AtDCYNRREPHVPGTSCAHPKSR+QNHLPKVLHPSssHISSGSTVATCLSQGuLlE.DLDsLILEDL............................ 0 4 7 11 +14987 PF15136 UPF0449 Uncharacterised protein family UPF0449 Bateman A agb Jackhmmer:Q9UFG5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved LPTRP sequence motif. 27.00 27.00 27.40 28.20 24.40 26.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.27 0.72 -3.78 9 48 2012-06-28 13:41:22 2012-06-28 14:41:22 1 2 39 0 29 50 0 89.30 47 80.26 NEW uKKRsVLPTRPsPPoVEQILEDVcsA.usDPVFpsL....sss.s.pstss-uptEph.............Y.QSRpYlshNpRLppAtssLcp+p-pL+pAGEcLEp-lspV .......KKRVlLPTRPsPPTVEQILEDV+uAsscDPVFThL..........cssss....t...p...tcss..-uttEph..........................................YQQS+sYlshNpRLppAt..s.Lpp+p-tLptsGppLcppl.p................. 0 5 8 16 +14988 PF15137 DUF4571 Domain of unknown function (DUF4571) Bateman A agb Jackhmmer:Q9NYP8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrate. This family includes human protein C21orf62. 27.00 27.00 122.70 122.30 25.50 24.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.34 0.70 -4.91 6 34 2012-06-28 13:43:34 2012-06-28 14:43:34 1 1 27 0 20 29 0 213.10 65 94.93 NEW MAPPSuHsLLLhuALGlFALssFTcGQ+NSTLIFTKENTIRNCSCSADIRDCDYSLANLMCSCKTVLPLAlEpTSYsG+LTIWFTDTSALGhLLNFTLVpDLKLSLCGTNTLPTEYLAICGLKRLRlsTEAKHPSsEQSLLIHsGGEucsREKsh.La+GWQTChYISFLDMALFNR-SuLKSYSIENluSlAssFPsFSYFcTFPl.oNKSYVVTFIY ........Ms.P.tasLLLhusLGlFALssFT+GQKNSTLIFTKE.NTIRNCSCSADI..RDCDYSLANLMCoCKTVLPhAl....E+TSYs..G+LTIWFTDTSsLGpLLNFTLVQDLKLSLCuTNTLPTEYLAICGL+RLRIssEA.KpsaPEQSLLIHSuu-o-sc-cshhLpKGWQsChYISFLDhALFNR-SuLKSYSIEN..VoSIANsFPsFSYF+oFPhsSNKSYVVTFIY..... 0 1 2 6 +14989 PF15138 Syncollin Syncollin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VAF6 Family This family has a role in zymogen granule exocytosis [1-2]. 27.00 27.00 27.90 27.60 26.40 26.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.59 0.71 -4.17 6 27 2012-06-28 13:46:15 2012-06-28 14:46:15 1 1 23 0 17 28 2 110.00 61 83.22 NEW ACPsPA-LKs-sGo+hCARhaE+SssYYDpsCGGu.LsVcPGsDhPYhPSsWsNpISSLVVupRCpLTVWSppGKpGssRKFSAGoh.+LcEY++GlFGsWscSIuuhYCpC ........ACPssA.DLKcsDGTRsCA+LY-KSDPYY-NCCsGApLSlEPGsDLPYLPSsWsNssSSLVVusRCELTVWSppGKuGKT+KFSAGoYPRLEEYR+GIFGsWuNuISulYC+C.. 0 2 5 9 +14990 PF15139 DUF4572 Domain of unknown function (DUF4572) Bateman A agb Jackhmmer:Q5VTT2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 220 amino acids in length. 27.00 27.00 36.50 35.80 25.50 24.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.43 0.71 -4.48 10 47 2012-06-28 13:47:15 2012-06-28 14:47:15 1 3 40 0 31 48 0 169.10 43 81.23 NEW ERKGSLsL+Ss+hcYSsssLlpsWHpsREAcPKDYDlcshsstKpNLHpSTY+RlG..oDcsshslSpT+-thuQ.Vhlpc-apchh....++uhlchpo...hspshlE+Ds-tssouhcslhhc......psschcp.cYcTThppDYpsPYPYp....PssssP...sshslspRKh+spFsDL-up+RhGhspWpD-s.....tlht.pts+pclYcsp ........ERKGSLhLRSp+hpYSpssLs.sWHpsREA.PKsYDlcs.s...ts+pLppSTYpRlG..TDcsshhhSET+-phuQ.lhLpp-atchc....p+sLLs.-T...hspullERs.suhPtoGFGulhsR......HPP-hpKhph.TThppDYssPYsY......s.s.P...tsaSlsaRKCpSQFsDlss..+RhGhpoWpD-s.....tlhs.p.h+tpla...s........ 0 10 11 19 +14991 PF15140 DUF4573 Domain of unknown function (DUF4573) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6P6B1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically approximately 360 amino acids in length. 28.20 28.20 28.40 28.70 27.80 28.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.32 0.71 -4.66 10 82 2012-06-28 13:48:36 2012-06-28 14:48:36 1 15 39 0 62 80 12 137.60 21 37.03 NEW AsDPstsTccTpPLcGlpEs-PPQPuGKDDs.sspppK+DlcAlTEspPLKGsAEsEPltstschpPLRssuEpDustAVcshEsPQsAuEMKPLpTAEpI.PLEuApEhpPpEAsGKscQsQlsEslPKEssSPEI..LEGSQ.lEsucppQLpEsLGcsEQsQsLEsVPKENto ....................................hps..s..t.ts.ps.tt.ps.t.s.tthcsspssopspPlpssspscPhts.sphpPlpsssphcshpslpss.csspsssp...hcPhpsspplpP....lpsspphpP.pshstsc.sp.hp.h..................................................................................................... 0 34 39 43 +14992 PF15141 DUF4574 Domain of unknown function (DUF4574) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6UW78 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 86 amino acids in length. 27.00 27.00 29.40 28.70 26.00 25.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.81 0.72 -4.25 11 54 2012-06-28 13:51:19 2012-06-28 14:51:19 1 2 48 0 30 53 0 82.10 39 95.11 NEW MsulRplLhusullGhG.GlGYulaslloPGEERKpEMLKshPEusPhRh-EscKpstLlhpsLcEAApTsENlAR....hhGu.t.......ppp ...............MtuhRhhh.slsllGhG.GlGhuLhsLloPGEcpKp-hLKphPppsPtpp-Est+pptLhhtsLQEAAsTpENlsh......h.s......stt......... 0 4 7 13 +14993 PF15142 INCA1 INCA1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VD86 Family This family of proteins inhibits cyclin-dependent kinase activity [1-2]. 27.00 27.00 37.60 36.90 20.40 19.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.32 0.71 -4.48 6 29 2012-06-28 13:54:22 2012-06-28 14:54:22 1 2 21 0 12 36 0 149.10 70 64.37 NEW Phs+CSRVVSRSsPPuLPSQSLtLMPQ+YGDlFWENLSQRPoPTWhEEQYsPPLLRATGCSQPGLYPPEGLPPPEhLCRRKRRRPpLuGM.QQGsGuIPARVRAVTYHLEDLRRRQRIINELKKAQWGSSGAAsEPLsLsE-GCthPSTocY...DlEEERAsYPQEEs+hLTsGRsQLLW Phh+CSRVVSRSsPspLPSQshR.MPQpYGDlFWcNLSQRsSssWhEEQaIPPhL..RATsCSp.uLa.P.EtLPPPEhLhRRK+RRPpLttM.QQG.GulPARVRAVTYHLEDLRRRQpIINELKKAQWGSSGAAsEPlslsE-GCthsSTscY...DLEEERAsYPQEEsphLTPGRsQLLW.... 0 1 1 2 +14994 PF15143 DUF4575 Domain of unknown function (DUF4575) Bateman A pcc Jackhmmer:Q6ZSN1 Family This family of uncharacterised proteins is found in eukaryotes. 27.00 27.00 148.00 38.40 20.30 18.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.94 0.71 -4.42 4 8 2012-06-28 13:55:04 2012-06-28 14:55:04 1 1 1 0 2 14 0 73.10 36 95.59 NEW spGGpcuhLSlPuShGVPPssAMGVLRARGspGAGSQS.PRsGs..P..........pctlhLS.PpLhGsAS..CDGuPKuQGGKRuWhSV.sSR...........................GVPPPsAhGVLtARGGRG.AGSpSLPRGsshP.hch .............................................................ttctlhlS.shL.......s.Gs.+spGsptsh.ph.sStGlPPPsAhGVLtARGtcG.s.spp.............. 0 2 2 2 +14995 PF15144 DUF4576 Domain of unknown function (DUF4576) Bateman A pcc Jackhmmer:Q6UWT4 Family This family of uncharacterised proteins is found in eukaryotes. 27.00 27.00 69.40 67.80 22.00 21.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.90 0.72 -4.36 5 24 2012-06-28 13:56:29 2012-06-28 14:56:29 1 1 19 0 13 22 0 83.00 70 99.75 NEW MAVSVLRLTlVLGLLlLILTCQADDKPp-...KPD-KPDDSGKNPEP-FPKFLNLLGSEIIENAVEFILRSMTRSTGFMEaD...DKQGEHSoK MAVSVLRLTlVLGLLlLILTCpADDpP.-.....+PD...cKPDDSuKsPcP-FPKFLsLLGTEIIENAVEFILRSMoRoouFhEht...sppsp+........... 0 1 1 1 +14996 PF15145 DUF4577 Domain of unknown function (DUF4577) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N8F7 Family The function of this family of proteins, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically 128 amino acids in length. 27.00 27.00 125.50 70.50 23.40 22.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.44 0.71 -4.23 3 33 2012-06-28 14:01:31 2012-06-28 15:01:31 1 2 26 0 19 25 0 118.50 71 98.86 NEW MTHsSQDAGSRGhPE-RKLYVVDSINDLNKLSLCPAGSQHLFPLQEKIPDsGTsPGNGuRGLFFMGLIlVLIVSLALVSFVIFLIVQTGNKMDDVSRRLTAEGKDIDDLKKINSMIVKRLNQLDAEQN ............MsHSSQDsGSpGl..pEDtKLYVVDSINDLN.KLNLCPAGSQHL..FPLE-K..lPshG...TN.SG...NG..S....+SLFFVGLlIVLIVSLALV.FVIFLIlQTGNKMDDVSRRLsAEGKDIDDLKKINsMIVKRLN...QLDuEQN.... 0 1 1 2 +14997 PF15146 FANCAA Fanconi anemia-associated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VG06 Family This family of proteins plays a role in the Fanconi anemia-associated DNA damage response [1]. 27.00 27.00 27.30 47.50 20.90 26.70 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.57 0.70 -5.58 6 50 2012-06-28 14:04:41 2012-06-28 15:04:41 1 2 34 0 23 52 0 384.00 58 57.07 NEW supAGQ+IK-LLSuIGsVSERVS.LKKAVDQ+N+ALssLNpsMNVSsALLSuppGs+P........IuCTlosoWSpL.hpDsLhATClLENSSuFSL-pGWTLCIQVLsSspAL-h-SusSAhTYThPVDpLsPGs+REVTLPLusucsGsLDLPVTlSCsLaYSLREllGsuLsss-shcs...tp.s..lLP-Q-GlCLPLscpTVDMLQCLRFsuhssspstus..s.huPspDPVcTFL+ospssssp.uGsu.S.p.....spYhPPSsASI+VSuELL+uAL+.soaSulsLssATLpWLLAENAAssllpupsloSlpGhAPDGs-V+Lhl+EVuloDLsPAGPIQAlEIQVESSSLAshCRhHHAllpRlQsLVhEQAApGSusPDLRhQYLpQhpsNHEoLL+ElQoLRD+..LCsED-hS.psuTup+LLplY+pLRsPSLlLL ..............s.tpsGp+IK-LLSGIGslSERVShLKKAVDQRN+ALssLNEsMNVSCAL.....L...SS.tpGs+P........ISCTsoTsWSRL.hpDsLhATClLENuSsaSL-pGWTLCIQVLsSSpAL-hDussSAhTYTlPVDpLuPGs+REVTLPLGPuEsGsLDLPV.TVSCsLaYSLREVlGuAh.ssS-u.-ss.h.-csPsshLP-Q-GlCLPLScpTVDMLQCLRFPuLA.....ss.pspuP..u.huPspDPVsTFLcos+t...P..uups..uGPu.SLR..........AcaLPP..SVASI+VSAELLRAA..Lc.D........ucSG..l..........s.....LCCATL.pWLLAENAAlDll.......+A.....p.....uLSSlQGlAPDGs-V+LlV+...E.....VAhTD..LC....P..AGPIQAVEIQVESSSLAshCRsHHAVltRhQs.MVsEQ.AAQGSSsPDLRlQYL.RQIauNHEsLLREVQoLRDR..LCTED-uS.SsATAp+LLQVY+QLRpPSLlLL............. 0 2 4 10 +14998 PF15147 DUF4578 Domain of unknown function (DUF4578) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96A22 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 44 and 137 amino acids in length. 27.00 27.00 27.50 72.50 25.20 24.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.80 0.71 -3.76 4 27 2012-06-28 14:06:32 2012-06-28 15:06:32 1 2 21 0 13 26 0 116.80 67 94.04 NEW MGNRLCCGGSWSCPSTFQ+KKKhGSpsRhTLphQp......p.ht.NsoKsH-Tpu+TYEQVLpQPuSQcRS.puLpSEESsLHYADIQVhppspPRShpEVKHLpLENATEYATLRFPQATPRYDSKNGTLV .....MGNRlCCGGSWSCPSTFQ+KKKTGSQsR.pTL+..Q............QQlpQNusKG+-TpGHTYEpVLpQ.sSQcRS..GLhpE-SsLHYADIQVCS+spsR...EVKHlpLENATEYATLRFPQATPRYDSKNGTLV...... 0 1 1 2 +14999 PF15148 Apolipo_F Apolipoprotein F Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q13790 Family \N 27.00 27.00 27.30 31.90 21.40 26.80 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.38 0.71 -5.01 8 40 2012-06-28 14:12:08 2012-06-28 15:12:08 1 3 28 0 20 37 0 180.80 47 61.13 NEW PloCQsLLspu..shsphAPLPcFLs.LALclsLEphGC.sEsa.LQLQLhchGGh-uTETLI+c......S++oscchulsslpuhLptLutpsssh+RscRSl.ssEsCcpEpc.sl+clAQL.......LPulshhhpLtTAlh.ATQpCoDcuhE+hc-suhcLshs..hp.AshshsspshlIupulpslh+ssVphlhpYaQ .....................................PhoCQ.Lh.pu....sphAPLPcaLssLALcssLEcsGC.s-shsLQLQLh+.GGlsATpsLIpaLpt..LppuppspcpsSlcALsSALQLLAp...Ep.uspRspRSl.ssccC-sEcEQsVHsllpL.......LPuVGTaYNLGTALYYAoQNCssKA+ERGpDGAIDLGYDLLMsMsGhoGGPhGlsIoAuLKPAl+uGVppLIpYY.......................................................... 0 1 3 4 +15000 PF15149 CATSPERB Cation channel sperm-associated protein subunit beta protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H7T0 Family The function of this family of transmembrane proteins, CATSPERB, has not, as yet, been determined. However, it is thought to play a role in sperm hyperactivation by associating with CATSPER1 [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 220 and 1107 amino acids in length. 27.00 27.00 182.50 52.80 25.30 21.70 hmmbuild -o /dev/null HMM SEED 541 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.77 0.70 -6.31 5 43 2012-06-28 14:14:55 2012-06-28 15:14:55 1 3 29 0 29 39 1 462.60 52 59.02 NEW +FcsIHhGKVIHSSKTGpAYIRcVhpHcT.PKGFhoSVIAElIEPFGlEss+ESsCLsSsLpIsauGNlaYpLoLpSQsspupFcuTDlEKTVlIPGYSSFLITcIlDspTAlAlATMPpolssNLsFLpuSWFLYNF..GptsGRoWpItoKPCNYWl.QQDphDuhSLNlVKYIDlGNolsFphKVIPssKuhpThEIP....LloVlVGNPsLLEVKApGaFDss-NYhLsIaluuKshppGSTSLAlIlWEuSocCaVoTllPTlKSSCSYLRoMHHIPu+aIP.EDWISGVHKDSQGFNMIKTLPINYRPPSsMGIuIPLTDNFYHADPS+PIPRNtFHpSKcTGKYKQCANVooREcCNCTccQKFSHAVAFSDC+EKVaRFKFPVTQYPVuLEIaNER-+IolEoPYLVTlTEVNMRcNWcLKHoVPENVKKMKsYLEPlL+oPVYNPLGLNLSIKGSELFHFRVSVV.PGVTFC-LpEEFQIYVDEsPLPFPGHsLIAVATAVVLGGLIFIAFlFQL+NIHPl+shp+.I+sNssphSoool ................................pFpslHhGKhIp.tpoGpAhIcKlhpHsh.spGFhSSVlsEhhcPFslEphp-SsCLsSSL.I.s..psGsh.Y+LoLp.p..sh..pu.FpsoDIEKTVVlPGYSSFLITpIlDspNALAlATMPppsssNhTF.csoWFLYNF..Gp+sGRpWpIhs+PCNYWh.Qpc..-o.SLNllKYIDLGsohshphKlI..p.s+.uhphhclP....LLpVhVGNPsLLE.VcscuhFD-oDSYlhpIsssSKhLppGSTSlAhlhWpASscChVTThVPTLKSSCSYL+oMHHlPu.......phIP.EDWlS..Gl..H+DSpGFNhIKTLPlNYRPPSNMGIAIPLTDNFYHADPSKPhPRNhF.hSKco.GKaK.QCANhooRcECNCTpcQKhSaAVAFSDC+EKVPRFKFPloQYPluLcIhs.Ec.splPlc.....sPYLVTlTEVN.RpNWcLK.H.slP-sl++hKpalEshltssVYNP.GLNLSIpGSELFHFRVoVl.sGVTFCsLlEEFQ.IYVDEsPLPFPGHsLIAluTAVVLGGLIFhAFh.FQlpsIHP.hpshpphhhpp........h................ 0 6 7 12 +15001 PF15150 PMAIP1 Phorbol-12-myristate-13-acetate-induced Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q13794 Family This family carries a BH3 domain between residues 23 and 40. 27.00 27.00 27.50 33.80 25.10 24.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.66 0.72 -4.20 3 17 2012-06-28 14:17:15 2012-06-28 15:17:15 1 2 13 4 9 25 0 50.50 67 80.49 NEW MPGRKSRKSsQ..PuPTRV..PsDLEVECAIQLRRIGDKLNFRQKLLNLISKLFRLGT ......MPG+KARKsAQ...suPsRs...s-LE....VECAhQLRRhGDKLNFRQKLLNLIuKLFp.GT.... 0 1 2 2 +15002 PF15151 RGCC Response gene to complement 32 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H4X1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 44 and 130 amino acids in length. There is a conserved KLGDT sequence motif. 27.00 27.00 30.50 72.90 25.20 25.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.66 0.71 -3.68 7 49 2012-06-28 14:17:28 2012-06-28 15:17:28 1 2 36 0 28 52 0 124.20 66 97.60 NEW M+Ssssp.tstshhh..................-.ts-Lu-lLpEF-sVlc-F.tSPhpppp.tY-cHLcphKRRoutSlSD.SGlsDSE.Su-o.htsShshS-EcLNososs..........TssKAKLGDTKELE-FIADLD+sLtp..M .............................tstsshh..................ssstDLoDsLCEFDAVltDF.uSPhccRHF+Y-EHLE+MKRRSSASVSDu.SGFSDSE.SADSLYRNSFSFSDEKLNSPTsSoPuh..oPssoPpKAKLGDTKELEDFIADLD+TLAS............. 0 1 5 13 +15003 PF15152 Kisspeptin Kisspeptin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q15726 Family \N 27.00 27.00 29.80 29.20 20.50 19.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.84 0.72 -3.29 10 51 2012-06-28 14:20:42 2012-06-28 15:20:42 1 2 34 0 12 56 0 74.40 49 58.57 NEW ssh.puLRss-p+Pss..Ap.ss+hou.s..scS.usGtphsuhshspSRlIPuPpGtlLVpREKDlSuYNWNSFGLRYG ....sWppu.pCsE+KPss..At.psRtsu.ssPsES.SuG.ppsuhs..uspSR.IPAPpGAlLVQREKDLSsYNWNSFGLRYG 0 1 2 5 +15004 PF15153 CYTL1 Cytokine-like protein 1 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9NRR1 Family The function of this family of proteins, CYTL1, has not, as yet, been determined. However it is thought to be a secretory protein expressed in CD34+ haemopoietic cells [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 134 and 145 amino acids in length. There are two conserved sequence motifs: PPTCYSR and DDC. 27.00 27.00 75.40 27.90 21.10 20.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.56 0.71 -4.46 9 49 2012-06-28 14:21:46 2012-06-28 15:21:46 1 2 38 0 26 39 0 123.40 55 96.34 NEW phsLhhLhsllu...hhhhup..ssPPTCYSRhLsLS+EIhsthpcLpsspsscsClchLPclalDlHNsClhoKLRDFlh......sspCtchs+lshLKc+lppLYsIhsphC+RDLVFhoDDCpALEsshsss.......phhs-.pp ....................h....hLLhLls...hs.hup..s...sPPTCYSRhLuLS+EIspsFppLQsoEPs-sCVchLP+LYLDIHNYCVLsKLRDFVA......SPpCh+hspVcsLK-KlRpLYTIMNSaCRRDLVFLoDDCsALEhPIsss.Ts.sD............. 0 1 3 9 +15006 PF15155 MRFAP1 MORF4 family-associated protein1 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9Y605 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 127 amino acids in length. 27.00 27.00 58.40 35.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -3.80 2 52 2012-06-28 14:25:25 2012-06-28 15:25:25 1 4 22 0 17 44 0 117.90 79 68.99 NEW MRPlDhsEhtEPcE....EPtp.....L.Ps.pthREslAuLpRE+uRAahRsRpKLhEhpshL.tIKopVEApEcuAhs.h.pPtsts-tRsA+hstcA-cKAtEhA+MuchlVELhpRIttsEss .....................MRPLDIsElsEPEEVEVLEPEEDFEQFLLPVINEMREDIASLoREHGRAYLRNRSKLWEMDNMLIQIKTQVEASEESALNHlQsPustA-tRsuchCEKAEEKAKEIAKMAEMLVELVRRIE+SESS.................. 0 2 2 3 +15007 PF15156 CLN6 Ceroid-lipofuscinosis neuronal protein 6 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9NWW5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 190 and 310 amino acids in length. 27.00 27.00 127.10 46.60 24.00 23.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.82 0.70 -5.30 4 65 2012-06-28 14:27:27 2012-06-28 15:27:27 1 3 39 0 31 62 0 249.00 76 86.16 NEW RtGStcscs.sspsu.FHhDLWhhFTLQNWlLDFGRPIsMIlhPL-WFPLNKPSsGDYFHMAYNIITPFLLLKLhERSP+TLPRSslYlSIITFVMGASIHLVGDSlNHRLlhSGYQhHLSVRENPIIKsLpPtTLIDSFELLYYYDEaLGHsMWYIPFFLILFlYFTGCFTplKAEp+MPsSAWlLLuPSulYYWYLVTEGQIFIlFIFTFFAMlAhVhHQKR+GhlLDSNGLFLhYSFulTLhLVulWVAaLWNDpVLRKKYPGVIYlPEPWAFYTLHlpsp ...............ps--.sspsu.FHLDLWFYFTLQNWVLDFGRPIAMllhPLEWFPLNKPSVGDYFHMAYNVITPFLLLKLIERSP+TLP..RShhYlSIITFlMGASIHLVGDSVNHRLlFSGYQpHLSVRENPIIKNLKPETLIDSFELLYYYDEY....LGHsMWYIPF...FLILFhYFSGCFTssK..uE.u.pMPssALLLluPSuLY..YW...........YLVTEGQIFILFIFTFFAMLALVLHQKRKtLaLDSNGLFLF.SFuLTLLLVALWVAWLWNDPVLRKKYPGVIYVPEPWAFYTLHVSS.p... 0 2 5 13 +15008 PF15157 IQ-like IQ-like Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q1A5X6 Family This family of proteins includes Human IQ domain-containing protein J (IQCJ). 27.00 27.00 27.90 36.20 26.30 22.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.32 0.72 -3.79 3 41 2012-06-28 14:29:59 2012-06-28 15:29:59 1 4 29 0 21 50 0 89.20 78 58.91 NEW MRLEELKRLQNPLEQVNDGKYSFENHQLAMDAENNIEKYPLNLQPLESKVKIIQRAWRcYLpRQ-....sLGKRSPSPPSl...SSEKLSSSlSMNTFSDSSTP .....EELKRLQNPLEQV......NDGKY.hENHQLA..MDsENNI.EKY.LNLQPLESKVKIIQRAWREYLQRQ-.....PLtKRSP.SPPSl...SS-KL.SSSVSMNTFSDSSTP 0 1 3 6 +15009 PF15158 DUF4579 Domain of unknown function (DUF4579) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q2WGJ8 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 192 and 239 amino acids in length. The human member of this family is C8orfK29. 25.00 25.00 25.90 25.60 18.00 17.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.14 0.71 -4.77 4 30 2012-06-28 14:36:54 2012-06-28 15:36:54 1 1 24 0 22 27 0 156.90 47 77.13 NEW Gh.psERpluppLppNpFaPFtp..pPssFhLEYhhsoLaKull.FIlhhlhlshthlscVp+Q-..........TWsF.sYGlulGLWLhISShPpRRLVLNHsRGsYHFSIQGRTVCQGPhHLVYVRLALsSDA.tthFapLlLsGaplEshlLspho-Rh-ph-hLGRhIAR+lNlNYFDshs.......sShRpVVRHWs ......................p.ccpltp+LppN.FaPFhp....pPssFlLEYhhcsLaKGhLhFllsllh.lsht......lppspcQ-..........TWsF.saGlsVGLWL..l..lS.S.LP+RRLVLNHs+GhYHFSIpGRTVCQGPhHLVYVRLALsSD..u.GphaapLVLsGa+lEshsLspho-+h-phEhLGRpIARKLNlNYFDhhs.......sShRHllRHWs.......... 0 6 6 10 +15010 PF15159 PIG-Y Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q3MUY2 Family This family of proteins represents subunit Y of the GPI-N-acetylglucosaminyltransferase (GPI-GnT) complex. It may regulate activity of the complex by binding the catalytic subunit, PIG-A [1]. 25.00 25.00 25.10 25.20 24.60 24.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.48 0.72 -3.76 10 56 2012-06-28 14:46:25 2012-06-28 15:46:25 1 2 51 0 36 65 1 68.90 33 66.62 NEW hhGsLhllhGllhFluhhauAVlscllPsussshlSulp.DhYYsLLlPlTLPVhlssVYapWLShKlFKHA .....................huhhhllhshl.hsshhYus...s..ls...c.............hPps.sssh...hshh....aYsLLlPlTlPVhlhhshasWluhKhF+Hs.... 0 15 22 29 +15011 PF15160 SASRP1 Spermatogenesis-associated serine-rich protein 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q496A3 Family Spermatogenesis-associated serine-rich protein 1 is a serine-rich protein differentially expressed during spermatogenesis [1]. 27.00 27.00 31.60 36.20 25.90 26.40 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.72 0.70 -5.14 3 40 2012-06-28 14:52:34 2012-06-28 15:52:34 1 3 28 0 18 50 0 193.70 59 77.00 NEW Ap+SDFLESKGCFANTTSS..GRSVSsSSSVETGLSVo-uPGLPRVhsYlDTAADLDpKoSSSHS........DHSSEsSLPEVQKDKYPEEFSLLKLQTKDGQRPEWTFYPRFSSNIHTYHVGKQCFFNGVFLGNRRSLSERTVDKCLGKKKYDIDPRNGIPKLTPGDNPYMaPEQSK-FaKAGSTLPPVNFSIVPYEKKFDTFIPLEPLPQIPNLPFWVKEKANsLKNEI+EVEELDNWQPAVPFLHuLLso .....................................p..h.tSpts.Apthss..sppsS..SsstpG.plpts.u.s+s.s..s.sstLs.K.S.opS..................s+S.chSLPEl.KccaPcEFSLLp.QTpDGpRPEWTFYPRFSSNIHTYHVGKQCFFNGVFhGN++SluERTVDKshG+KK..YDIDPRNGIPK.LTPGDNPYMaPEQSKsFaKAGSTLPPVNFS.lsPYEKKaDTFIPLEPLPplPsLPFW.KEKANpLKNEIpEVEELDsWQsuhPhhp.hh.................. 0 5 5 9 +15012 PF15161 Neuropep_like Neuropeptide-like Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5BLP8 Family This family contains putative neuropeptides [1]. 27.00 27.00 29.90 58.60 25.90 19.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.05 0.72 -4.49 3 42 2012-06-28 14:58:50 2012-06-28 15:58:50 1 2 30 0 23 43 0 60.10 73 47.24 NEW A-p-AGSAIPAESRPCVDCHAFEFMQRALQDLKKTAYNLDoRTEoLLLQAEKRALCDChPAs.L+ ........t..uGoslPApSRPCVDCHAFE.FMQRALQDL+KTAaSLDuRTETLLLpAE+RA...LCsChPA.....t.................... 0 1 3 10 +15013 PF15162 DUF4580 Domain of unknown function (DUF4580) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VVC0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 63 and 185 amino acids in length. 27.00 27.00 41.10 62.70 26.50 26.00 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.87 0.71 -4.88 6 41 2012-06-28 15:08:27 2012-06-28 16:08:27 1 1 33 0 25 38 0 157.00 62 91.64 NEW sssp.ssslIIsuSLpsSElsphLps..psHKlRhScoltcsollFPLSGVAFLLlss.ph.h.......lsclppFlsh+pNuallLsuslas.pphthh..l..pRFLG.psLplLPVHssupslphMtTIAKhTsKPhhssIppRhpph+uhhlsp..usVWchLpplsLsp ........EphcWoTTlIISSSLcuaElATALEN..RSHKVRYSDSVEsGSIIFSLSGVAFLLMDscEChhSs...........EEhFLsKIEKFINIHpNSFLVLsAALHGPEEWcLMFRI.QQRFLG.sNLRILPVHNTsNAlsLMCTIAKoTSKPalDsICYRMIssKAYIIEQ..SPVW+TLQKIpLs................................. 0 4 6 9 +15014 PF15163 Meiosis_expr Meiosis-expressed Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JSS6 Family This family of proteins is essential for spermiogenesis [1]. 27.00 27.00 50.90 50.80 20.40 17.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.78 0.72 -3.62 10 56 2012-06-28 15:09:45 2012-06-28 16:09:45 1 3 50 0 36 56 0 75.20 66 74.37 NEW hoRAKcWScElEssYRFQpAGYRDElEYpplcpss.sERWPsp..GFVKKLQR.+DGsFhYaNKpREC-DK-lpKVKlYs ......hSRAK+WSEEIENLYRFQQAGYRDElEY+QVKQVuhVDRWPET..GYVKKLQR...RDNTFYYYNKpREC-DKEVHKVKlYt............ 0 10 13 20 +15015 PF15164 WBS28 Williams-Beuren syndrome chromosomal region 28 protein homologue Bateman A pcc Jackhmmer:Q6UE05 Family WBS28 is an integral membrane family. These proteins have been identified as being linked to Williams-Beuren syndrome, OMIM:194050. This family of proteins is found in eukaryotes, and are typically 266 amino acids in length. 27.00 27.00 27.10 62.60 19.60 26.80 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.07 0.70 -5.31 5 30 2012-06-28 15:40:57 2012-06-28 16:40:57 1 1 22 0 16 30 0 239.20 61 99.54 NEW MEAlPsVRSSLhGILLlVlKLSVLLVQNRlHLYNFLLLKIsLFNHWLSGLAQEApGSss.Qs.HPPusIAACPLGRlLRAGLALlEVPsWLlLRGPRLsWAGhLGCARALGLAPKaLuAWEQLGLSAATWTDLFLSCLHuLMLAALLLLLLTWRLCQKAHCCuLGRLLSKALLtN+VVhcLLALLKRLYWWVEopTALTSWHLAYLITWTTCLASHLLQAAFEHTAQLAQAQEsEPQKu.SGsSSEoPLPEPsuPEAGPVLPEPGTPGE ....................MEAhP.VRSSLhGILLpVh+LSVLLlQNRsHLYNFLLLKIsLFNHWVSGLAQEApGSts.Qs.h.P.sh.suCPLGpALRAGLuLlpVPhWLlLpuPRLsWAuhLsssRslGLAh.hLuAWE.LGLSs.AsWpDLhLSCLHuLMLVALLLlLlTWRLCQKAHp.huLGhLh.SpAL..NplVhchLA.LRRLYWWVEohsALTSWHLAYLlTWTTCLASHLLQAAFEHTAQLAQ..AQEsEspcs.SGs...tssLsts.ssEuGslLsE.tTPtE........ 0 1 1 1 +15016 PF15165 REC114-like Meiotic recombination protein REC114-like Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z4M0 Family REC114-like members are necessary for meiotic DNA double-strand break formation. It functions in conjunction with Mei4. This family of proteins is found in eukaryotes. Proteins in this family are typically between 43 and 259 amino acids in length. 27.00 27.00 49.40 27.60 26.80 26.10 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.66 0.70 -4.63 7 59 2012-06-28 15:59:42 2012-06-28 16:59:42 1 3 38 0 29 53 0 177.40 46 87.50 NEW WPLpRYGRFh..sptcssssups.uttssPsWKVF-SNEESGpLlLTIVlSGHFFISQGQTLLEGFSLIsSpsWLK.IVRRhDCLLFsTshK....scSRhFRVQFuGs.ScEpALE+CCuCVQKL.upYVTVQssDshsppLp.u.Pu..psscSQsccp..phs.p.us..thppppsshshtsuhss..sphSh..........ppLAQolLssc.c.LPhsYcpSuWsAEELGPFLRLCLMDQNFPAFVE-VEKELKKL ........................................................................+lhtptcpt..h.hsll.uGahhl.pGpt.lLEG.FSLlsuppWLK..IsR+hDCLLFssphK....sc.SRhFRVQFuGp.S+EpALEcCsSCVQ+L.upYloVQhsDs..ppht.....Pu..tsstpp.pst...h..p.t..............tts..t..tphSh..........tpluQ.t..hLspt.t.LP.sYcp.....usassp-LtshLRLCLhDpsFPAFVEcVEcELKKl.............. 0 5 9 14 +15018 PF15167 DUF4581 Domain of unknown function (DUF4581) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N3F0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically 131 amino acids in length. 27.00 27.00 39.80 38.80 20.70 16.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.71 0.71 -4.24 3 37 2012-06-28 16:04:52 2012-06-28 17:04:52 1 3 31 0 20 34 0 108.80 82 93.65 NEW cpLADsAEKWCossPFDLIAsE-s.ERRhDFYA-PGlSFYVLCP-s..GssDpFHVWSESEDCLPFLQLAQDYISSCGKKTLHEILEKVFKSFRPLLGLPDVDDDTFEEYNADVEEEEPEADHQQMGVSQQ ..............ppLsDsA-KWCSssPF-LIhsE-s.ERRMDFYADPGVSFYVLCP-s..GCGDsFHVWSESEDCLPFLQLAQDYISSCGKKT.LHEVLEKVFKSFRPLLGLPDADDDAFEEYuADVEEEEPEADH.QMGVSQQ......... 0 1 3 9 +15019 PF15168 TRIQK Triple QxxK/R motif-containing protein family Eberhardt RY, Coggill P, Hetherington K rdf Jackhmmer:Q629K1 Family TRIQK member-proteins share a characteristic triple repeat of the sequence QXXK/R, as well as a hydrophobic C-terminal region. Xenopus and mouse triqk genes are broadly expressed throughout embryogenesis, and mtriqk is also generally expressed in mouse adult tissues. TRIQK proteins are localized to the endoplasmic reticulum membrane. This family is found in eukaryotes and members are typically between and 86 amino acids in length. 27.00 27.00 48.80 48.80 21.00 20.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.66 0.72 -4.06 6 38 2012-06-28 17:39:22 2012-06-28 18:39:22 1 1 32 0 21 31 0 75.90 70 94.04 NEW MGR........KDASos+hPVDQYRKQIGK.QDYKKTKPlLRAT+LKAEAKKoAIGIKElhLh...lsAILsLLhAaYAFFaLplSsshsl- .........MGR........KDAuThKLPVDQYRKQIGK.QDYKKTKPILRATKLKAEAKKTAIGI.KEVuLV...LAAILsLLLAFYAFFYLpLos-lD.s................ 0 4 6 9 +15020 PF15169 DUF4564 Domain of unknown function (DUF4564) Bateman A agb Jackhmmer:Q9BQA9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C17orf62. 27.00 27.00 32.10 31.30 20.90 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.00 0.71 -4.56 8 54 2012-06-29 07:58:34 2012-06-29 08:58:34 1 2 47 0 41 65 0 171.20 52 93.47 NEW M.YMplEc+TushLHLcRuPuIRSWSLhVGIuSlGLAAAYYSoDohLW....KlFYlsGClFVALQNlE-WEEAlFsKpKsclpLcohsLYchlLTh.+tGpEp..VVl-LcclRDlsVQEE+lRYhGcGYlll...LRassGhSaPLTQoushGsRSDVEAlAshls+FLphcpltup...phspopssDsDpspDpu ...................................M.YMpVEp+TuohLHL.KRuPGIRSWSLLVGIhSlGLAAAYYSuDSlhW....KLFYVsGCLFVAlQNLEDWEEAlFsKssGcVhLKTFSLY++lLTL.+sGH-p..VVl.Lp-lpDVsVEEE+VRYFGKGYhVV...LRhuT..GF.SaPLTQSAshGp..RSDVEAlAcLIssFLcLcplps.t....p.spopsu-sst....st................................. 0 10 14 24 +15021 PF15170 CaM-KIIN Calcium/calmodulin-dependent protein kinase II inhibitor Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z7J9 Family CaM-KIIN is the inhibitor of Calcium/calmodulin-dependent protein kinase II (CaMKII). CaMKII plays a central part in long-term potentiation, which underlies some forms of learning and memory. CaM-KIIN is a natural, specific inhibitor of CaMKII [1]. This family is found in eukaryotes. 27.00 27.00 30.90 30.50 22.50 21.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.52 0.72 -3.99 5 59 2012-06-29 10:35:08 2012-06-29 11:35:08 1 1 33 5 31 45 0 72.70 70 98.10 NEW MSElLPYSE-KMusYGsDu-VGQlSFSCRLQDTsuFFAGuQuKRPPKLGQIGRAKRVVIEDDRIDDVLKGMoDKuPPGV ...........MSElLPYu--KhutaGs-s-suphSFSCRLQDTNsFFuusQuKRPPKLGQIGRuKRVVIEDDRID.D.VLKsMs-KsPsGV....................... 0 2 6 14 +15022 PF15171 Spexin Neuropeptide secretory protein family, NPQ, spexin Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9BT56 Family Spexin, alternatively named NPQ, is a peptide hormone and is derived from a pro-hormone. This family of proteins has a role in inducing stomach wall contraction and is expressed in the submucosal layer of the mouse oesophagus and stomach. Spexin, like most peptide hormones, is a ligand for G-protein coupled receptors [1]. Spexin is also thought to have a role in controlling arterial blood pressure as well as salt and water balance [2]. 27.00 27.00 40.40 39.90 20.80 18.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -9.99 0.72 -3.99 3 34 2012-06-29 10:43:10 2012-06-29 11:43:10 1 3 27 0 20 24 0 79.00 66 73.58 NEW APQRLFERRNWTPQAMLYLKGAQGRRFLSDQSRRKDLuDRPPLERRSPNop.LTLPEAAALLLASLpKuQEsE-ENhD+ocaLEDsLhNW .APQthhERRNWTPQAMLYLKGAQGRRFISDQSRRKDLuDRs....PERRSPNsphLolsEAAAlLLASLQKspEstEcNhDp.sphL.DpLhsh.................... 0 1 2 5 +15023 PF15172 Prolactin_RP Prolactin-releasing peptide Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:P81277 Family \N 27.00 27.00 32.60 32.20 23.90 18.30 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.46 0.72 -4.74 8 46 2012-06-29 10:46:53 2012-06-29 11:46:53 1 1 34 0 17 37 0 46.20 57 48.59 NEW SRuap......Hsh-hRoP-IDPhWYsGRGlRPlGRFG+Rputhtcuupsph+ .........Rsap......HShEhRoPDIsPuWYsGRGIRPVGRFGRR+ushtcsspst.......... 0 1 3 6 +15024 PF15173 FAM180 FAM180 family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6P0A1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 182 amino acids in length. There are two conserved sequence motifs: ELAS and DFE. The function of this family is unknown. 27.00 27.00 34.90 34.60 22.40 18.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.68 0.71 -4.39 7 69 2012-06-29 10:48:52 2012-06-29 11:48:52 1 1 38 0 40 57 0 133.30 48 77.10 NEW LaPuAhRsKRu.uuhlNPs.....hQpolE-VpLLaElLLAGlphsst.tthplpDtELASLR+spcLcsICpcllP+pLs-I+RLoupLusphG..sL+hEDFERTlLThVYTA.plspu.pspQR-hWupohlpLapAlKtDL ..........LaPuApRsKRuuu.PlNPs........LQpShE-VELLaEhLLAtL-lsss.hplpIcDEELASLR+Apch+hlCpclIPKsls-I+RLsupLush.u..sL+p-DFERTlLThsYsAYRhshu.pucQ+-hWApuhspLaQAl+aDL.................................. 0 2 3 11 +15025 PF15174 PRNT Prion-related protein testis-specific Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86SH4 Family PRNT is a family of prion-related proteins expressed in the testis [1,2]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 52 and 94 amino acids in length. 27.00 27.00 37.50 87.20 17.80 16.80 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.66 0.72 -4.31 3 15 2012-06-29 10:52:38 2012-06-29 11:52:38 1 1 15 0 1 13 0 49.00 70 73.43 NEW NTsLhHSAWPLShLHQTVSTLKAVAVTHSLWHLQIPVDCQACNRKSKKIYC NhPILLSHYPLP...QQTETWKAAsAllSLWaLQSPGDGQACDRESVKIYC 0 1 1 1 +15026 PF15175 SPATA24 Spermatogenesis-associated protein 24 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86W54 Family This family of proteins bind to DNA and to TBP (TATA box binding protein), TATA-binding protein (TBP)-related protein 2 (TRF2) and several polycomb factors. It is likely to function as a transcription regulator [1-2]. 27.00 27.00 52.40 29.70 22.10 26.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.15 0.71 -4.52 6 45 2012-06-29 10:53:44 2012-06-29 11:53:44 1 1 33 0 24 40 0 148.00 65 69.24 NEW uKEEFpAlcKcL.-EKstHAKTKsLLAKEpEKLQFALGEV-VLSKQLE+EKhAFEKAhusVKsKAhQESuc+DQLloKCsEhp...pcll....+QEDlLNuKE.cI+-LpphlupQKpsh..............H+sphS-hc..........IQppQ-tYh..sp.h.scs+hpputphsGp .....................SKEEFQAVEKKLV.EEKAAHAKTKlLLAKEEEKLQFALGEVEVLSKQLEKEKLAFEKALSSVKS+VLQESSKKDQLITKCNEIE...SHII....KQEDILNGKENEIKELQQVISQQKQIF....................psphSshR..........IpK.Qtphh..ApslstKpKhssuh.....s.............................. 0 7 8 10 +15027 PF15176 LRR19-TM Leucine-rich repeat family 19 TM domain Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IVY1 Domain LRR19-TM is the single-span transmembrane region of LRRC19, a leucine-rich repeat protein family. LRRC19 functions as a transmembrane receptor inducing pro-inflammatory cytokines. This suggests its role in innate immunity [1]. This family of proteins is found in eukaryotes. 27.00 27.00 45.60 44.30 26.80 26.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.58 0.72 -4.33 9 67 2012-06-29 10:54:56 2012-06-29 11:54:56 1 8 31 0 41 50 0 103.90 48 43.54 NEW souSssuPusuoG...uRuWPlLVGVVluAlllSLLIALAAKCpLC++ahsSYpH+PLsEs.....................GpuspPsVs.....csE...............DDDGFIEDNYIQPutst.Es ..........s.outsshstsups....u+uWshLVGVVlsslshSLLIhlAhKC.lhhpahhSYpH+.LpEp..........................GhsspPpss.....psc...............DDDGFIEDpYIpstphp........ 0 2 6 12 +15028 PF15177 IL28A Interleukin-28A Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IZJ0 Family The protein family, Interleukin-28A, plays an important role in modulating the immune system. This protein family is induced by viral infection and interacts with a class II receptor [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 195 amino acids in length. 27.00 27.00 30.10 30.10 25.00 19.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.85 0.71 -4.73 10 96 2012-06-29 10:56:35 2012-06-29 11:56:35 1 3 32 6 51 94 0 145.40 55 79.55 NEW ssuKGCHluQFKSLSPQELpAFKKAKDAlEESL.LKsWsCSSRLFPRshDL+QLQVhERPVALEAELALTLKVLEshADo..uLuslLDQPLHTL+HIHSpLQA..ClpsQPTAGP.RP+GRL+HWLHRLQEAsKKEStGCLEASVTFNLFRLLTRDLKCVASGD ........pt+uCclupFKSLSPp.ELpAFK+A+DAhE.-SLh.KshpCpS+lFPRshDL+pLQ......V.hERPlALEAELsLTLKVL.pshscs..uLsclL-QP....L+TL+HIpSpLpA..ClpsQPTAuP....+P.pGR.L++WLHRL.....pEA..cKESsGCL.EASVhFNLFRLLTRDL+CVAsGD....... 0 3 3 14 +15029 PF15178 TOM_sub5 Mitochondrial import receptor subunit TOM5 homolog Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N4H5 Family This is a family of transmembrane proteins thought to form part of the pre-protein translocase complex of the outer mitochondrial membrane (TOM complex) [1]. This family of proteins is found in eukaryotes. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 30.20 30.10 25.50 25.50 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.55 0.72 -4.27 4 39 2012-06-29 10:58:52 2012-06-29 11:58:52 1 1 25 0 14 50 0 46.20 79 78.66 NEW MF+lEGLuPKhDPEEMK+KMRpDVISSVRNFLIYVALLRlTPYILKKLDSI ..........MFRIEGLuPKLDPEEMKRKMREDVISSIRNFLIYVALLRVTPallp............. 0 3 4 6 +15030 PF15179 Myc_target_1 Myc target protein 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N699 Family This family of proteins is regulated by the c-Myc oncoprotein. It regulates the expression of several other c-Myc target genes [1]. 27.00 27.00 27.70 27.20 25.10 24.80 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.33 0.71 -4.72 7 50 2012-06-29 10:59:41 2012-06-29 11:59:41 1 1 38 0 29 42 0 176.40 62 89.94 NEW MApNsTshh.phhcsF.apsllLAFslSMllGLllGulIahllThhS.RRRASApIoptsspppppp...pS.s.hts+hGaYR.souh-R+Ss.SL..AuLohpRQsSlE.s...sshsRKsSFcuSTF+Phhpss.hs..s-psuQh...........sshssssssssshs.sss..pRssFahussuLRshhsoQTPPPAYDSlI+AF.E..o ..................MApNsTslh.sWscsF.WEDLIhSFTVSMAIG.LVlGGhIWALhsCLS.RRR.A.SA..sISQWSsSRRoR.......SSasHuLNRTGFYR.HSGCERRSNLSL...ASLTFQRQASL.EQA...NSFPRKSSFRASTFHPFLQCPPLP..VET-SQL...........hTLPuSs...sossl..sosHSLuRPDaaWSsNSLRhuhST.sPPPAYESIIKAFPD...S..................... 0 1 5 13 +15031 PF15180 NPBW Neuropeptides B and W Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N729 Family The function of this family, NPBW, which includes Neuropeptides B and W, is thought to be involved in activating G-protein coupled receptors, GPR7 and GPR8. It is thought to play a regulatory role in the organisation of neuroendocrine signals accessing the anterior pituitary gland. It is predicted that this effect will stimulate the increase in water-drinking and food-intake. This suggests it plays a role in the hypothalamic response to stress. This family of proteins is found in eukaryotes [1,2]. 27.00 27.00 31.90 31.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.68 0.71 -4.47 8 57 2012-06-29 11:40:11 2012-06-29 12:40:11 1 3 30 0 29 48 0 111.70 41 85.80 NEW sphshlslAlsLLlus.PutAWYKpsAGPuYYSVGRASGLLSGlRRSPYsRRS-scuuAts.ut.............suspsp.pssLRShslCVpDlsPNLpSCEhLsDGsGsa............QCKA-VFLSLcShDChsA ........t...shhslhLhLLlhs.PuhAWYKpsAuPpYYoVGRAuGLLSGlRRSPYh+Rupspsssts.u...............tsshp.pssl+ohs.........lhlpshs.pL.pschh.sstGhh............ptps-shLoLcuhDt.t........................................ 0 2 4 13 +15032 PF15181 SMRP1 Spermatid-specific manchette-related protein 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NCR6 Family This family of proteins, SMRP1, is thought to have a role in spermatogenesis and may be involved in differentiation or function of ciliated cells [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically approximately 260 amino acids in length. 27.00 27.00 85.50 85.50 25.50 25.20 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.92 0.70 -4.99 5 38 2012-06-29 11:41:30 2012-06-29 12:41:30 1 2 28 0 21 48 0 226.30 62 93.33 NEW MFLFSRKTKTPISTYSDSYRAPTSIKE.VYKDPPLWAWEANKFVTPGLTpTMcRHVDPEAL.QKMsKCAsQDYTYKuSISGHPYLPEKYWLSP-EtDKCsPsYLss.......D+YNTWRTuPYSsh.WNKYTTYLPRLPKEsGMEThVRGMPLEYPPKPERLNAYEREVVVNMLNSLSRNpsLPQIsPRCGCVDPLPGRLPFQGYESsCSGRHYCLR..GMDYhsoGsPsT-RRLRPLCscpPThpolLpsssRsulsC.........YsSPslIlPhSEP ..MFLFSRKTKTPISTYoDSYRAPTSIKE.VYKDPPLhAWEANKFlT....PGLTpTM.cRH.VDPEAL.QKMsKCAsQDYoY+uSIsGHPYLPEKYWLSp-E.....t......DK.......CsPsYLsu.......DRYNTWRhuPYNsosWNKYTThLPRLPKEAGM.ETsVRGMPL-hPPKPERLNAY.......EREVhVNMLNSLSRNQ.LPpIsPcCGCscsLPGRLPFpGY-SsCSGRHYCLR..GMDYhssGsPss-R.+Lpshs.pp.T.pss.......tRsshpC.........Ys.Pslhhsh.p............................................... 0 2 2 6 +15033 PF15182 OTOS Otospiralin Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NHW6 Family This family of proteins, Otospiralin, has a role in maintaining the neurosensory epithelium of the inner ear [1,2]. This family of proteins is found in eukaryotes. Proteins in this family are approximately 90 amino acids in length. 27.00 27.00 60.30 60.10 22.10 20.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.42 0.72 -4.31 4 41 2012-06-29 11:45:03 2012-06-29 12:45:03 1 1 35 0 27 29 0 66.30 70 77.66 NEW A+Pl.tEtsPYtEsPAhPYWPaSTSDFWNYVpaFpolGAYsQlpDhARTFFAHaPLGsTLGacVs.p-E ......A+Pl.EEuDPYsEsPAMPYWPFSTSDFWNYVpYFQoLGAYsQIpDMARTFFAHFPLGoTLGaHVPYpE-.... 0 1 3 11 +15035 PF15183 MRAP Melanocortin-2 receptor accessory protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8TCY5 Family This family is thought to be involved in cell trafficking. It is required for MC2R expression in certain cell types, suggesting that it is involved in the processing, trafficking or function of MC2R. MRAP may be involved in the intracellular trafficking pathways in adipocyte cells [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 47 and 205 amino acids in length. 27.00 27.00 54.50 50.30 25.90 25.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.20 0.72 -4.38 6 76 2012-06-29 12:32:18 2012-06-29 13:32:18 1 2 40 0 41 79 0 84.20 54 53.03 NEW MANpTNuSs.aaSYEYYLDYlDLlPVDE+KLKAsKYSIVIAFWVSLAsFVhhLFLILLYMSWSGSP.Qs..Rsusppa.hCsWs+uhsLPLCl ...........pusss..Ss.h..apYEYY.-h..h.PVs.ctLKAHKYSIVIuFWVuLAsFVlFhFhlLhhhohoGuP.Q....csut++aphssaspshsh.Lp................. 0 2 5 14 +15036 PF15184 TOMM6 Mitochondrial import receptor subunit TOM6 homolog Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96B49 Family TOMM6 forms part of the pre-protein translocase complex of the outer mitochondrial membrane (TOM complex) [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 43 and 74 amino acids in length. 27.00 27.00 36.20 49.70 24.20 18.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.22 0.72 -4.64 3 36 2012-06-29 12:33:59 2012-06-29 13:33:59 1 1 28 0 16 40 0 70.80 80 98.87 NEW MSuSsVKs.uAGSuG........VuDWlRusCRFATDRNDFRRNLLVNLGLFAAGVWVARNLSDFDLMSPQPlT ..............MASSGVsVoAAGSANEsPEIPDNVGDWLRGVYRFATDRNDFRRNLILNLGLFAAGVWLARNLSDIDLMAPQPGV........ 0 1 1 2 +15037 PF15185 BMF Bcl-2-modifying factor, apoptosis Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96LC9 Family BMF is thought to play a role in inducing apoptosis. It is thought to bind to Bcl-2 proteins [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 75 and 190 amino acids in length. There are two conserved sequence motifs: GNA and DQF. 27.00 27.00 27.30 27.00 17.80 17.20 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.50 0.70 -5.05 4 51 2012-06-29 12:35:41 2012-06-29 13:35:41 1 2 32 1 21 64 0 159.20 56 91.31 NEW -hPpCsEpshpphct..........................ccDsapu-sup.............sspsushhAuulhspspphDs..tchph.PlophhGsshRshptp-+ATQoLu.usuup.............ulhhsCGlptpP+tLFaGNAGaRlHh.PAsFthu.shhEpspptQ....................p.thpsEVQIuRKLppIuDQFpp.HlQ....pHppN.sthhh.lhhFhc.Lh.p.t...pssG.p .............................................................................CVE........L.........................EDDVFQPEDGE............PusQPGuhLSADLFAQS..LDCPLSRLQLFPLTHCCGPGLRPsuQEDKATQTLSPASPSQ.............GVMLPCGVTEEPQRLFYGNA.GYRL.l.PAuFsss.shtEpP.EuQ......................pHRsEVQIARKLQCIADQFHRLHhQ.......pHQQNpspsWWQlhLFLpNLALNtctNRpssG........................... 0 1 3 7 +15038 PF15186 TEX13 Testis-expressed sequence 13 protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BXU2 Family The function of this family of proteins has not, as yet, been determined. However, members are thought to be encoded for by spermatogonially-expressed, germ-cell-specific genes [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 177 and 384 amino acids in length. There are two conserved sequence motifs: FIN and LAL. 27.00 27.00 82.70 79.20 21.80 20.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.74 0.71 -4.64 12 77 2012-06-29 12:37:16 2012-06-29 13:37:16 1 2 22 0 44 57 0 147.00 52 44.30 NEW cs-DsoSGF+HucVlhFINEchu+pu+GPEFYL-NlSLSWEEVEDKL+sIL-DopVPpplKcACsWuoLALGVRFAtRQsQLQu+RVpWLpDhupLH+SAAhuLAS-LpcLpcQpEhEppEAAhQLp.spspLtEsp+ERDlhRh+lhpsEL .....s-DPuSGFRHucVltFINEchspps+GPEFYlpNhShSWcEVEDKLRuILpDopVPppsKcACsWuuLALGVRhApRQtpLQs+RVphLp-hsc.H+oAu.ALAS-LpcLppppEh-ppEAAhQLphspssLtcspcERDhLph+Lhph.................. 0 2 2 4 +15039 PF15187 Augurin Oesophageal cancer-related gene 4 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H1Z8 Family Augurin is alternatively named oesophageal cancer-related gene 4 protein. The function of this family of transmembrane proteins, is to induce the senescence of oligodendrocyte and neural precursor cells, characterised by G1 arrest, RB1 dephosphorylation and accelerated CCND1 and CCND3 proteasomal degradation [1]. Augurin has been found to stimulate the release of ACTH via the release of hypothalamic CRF [2]. This family of proteins is found in eukaryotes. Proteins in this family are typically 145 amino acids in length. 27.00 27.00 81.90 81.80 24.00 23.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.58 0.71 -4.22 9 52 2012-06-29 12:39:44 2012-06-29 13:39:44 1 1 39 0 31 47 0 112.90 68 82.25 NEW spLc+lLpKR-sst...sPu+ssVAVstuKAKEFLusL+RsKRslWDRSRPDVQQWIQQFMYMGFDEs+LEsDLSYWMDpuRuuDQG....RQHHYDENAsIGPRsP.....poaRHGAsVNYD.Y ....NKL+hhLQKREAs....sPoKspVAV..sEsKAKEFLuSL+RpKRQLWDRoRP-VQQWYQQFLYMGFDEAKFEDDloYWhN+sRsGc-YY.sYaQ+HYDEDuAIGPRsP.....toFRHGAuVNY.DDY 0 1 6 13 +15040 PF15188 CCDC-167 Coiled-coil domain-containing protein 167 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9P0B6 Family The function of this family of coiled-coil domains, has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 103 amino acids in length. 27.00 27.00 28.20 27.60 26.50 25.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.76 0.72 -4.04 11 60 2012-06-29 12:41:00 2012-06-29 13:41:00 1 2 48 0 40 60 0 83.00 45 80.04 NEW hSVs+EIDphEEclspC+ppl-plEp+L+cpcLocEpRpslE.......cEhstlpppLps.EccLptL++......ENpKshhlusAlhhlphLlYs ............uVs.EIDGLEEKLupCR+cLEsVss+L+ptELSsEsRcuLE.....................cE+ssLhs+hpshE+ELphLRp......ENRKshhLusulhllhsLlY...... 0 8 11 21 +15041 PF15189 DUF4582 Domain of unknown function (DUF4582) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A2RUB1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 126 and 788 amino acids in length. In humans, it is encoded for on the chromosomal position, C17orf104. 27.00 27.00 34.80 28.40 19.60 24.50 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -10.69 0.71 -5.03 10 105 2012-06-29 12:45:26 2012-06-29 13:45:26 1 4 56 0 70 98 0 140.20 36 32.71 NEW pRousus-LHhpLEEChEQaRpLEKERKKTEA-LARpshGK+VS.SoNNhPlPRLsssPSRVDRLIVD.hREHAR..VlTLLuKMEpLRusslssslapALcpaLEAI+hlQspRpsEhhNhlpp.R...tth..h+ap-D+DlhsLAuAlpplspAsR+ARTAhWCuL.hTLshssssp .........p.G.hpplph+h.psh.Q.pth-.cE.c.pKpp.uLupNh.tctlp.SpsphslsRh.sus....sSRV.s.Rhhlsphpp.up..lhs.Lt+....cp...LtSs.sh.hphopshs+pl.slh.s.shhpp-hh...................................................................s.......................... 0 16 21 38 +15042 PF15190 DUF4583 Domain of unknown function (DUF4583) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N6I4 Family This family of proteins, also known as UPF0694, is found in eukaryotes. Proteins in this family are around 135 amino acids in length. In humans, it is found on the chromosomal position, C14orf109. 27.00 27.00 33.10 32.20 21.90 20.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.86 0.71 -4.50 3 53 2012-06-29 12:59:36 2012-06-29 13:59:36 1 3 42 0 32 56 0 123.10 77 85.96 NEW MNFRQRMGWIGVGLYLLASsAAsYYVFEIS-TYNRLALEHIQp...........suppsPSuTTWppTLKTRLLuLPFWhWslIFLlPYLQVFLFLYSCTRADPKTVGYCIlPICLAVlCNRHQuFsKAS...NQISRLQLI .MNFRQRMGWIGVGLYLLASAAAFYYVFEINETYNRLALEHIQQ...........HPEEPhEGTTWTHSLKARLLSLPFWLWTlIFLlPYLQMFLFLYSCTRADPK.TVGYCIIPICLAVICNRHQAFVKAS...NQISRLQLI............................ 0 7 9 16 +15043 PF15191 Synaptonemal_3 Synaptonemal complex central element protein 3 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A1L190 Family \N 27.00 27.00 67.30 67.20 23.10 19.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.30 0.72 -4.25 2 24 2012-06-29 13:00:14 2012-06-29 14:00:14 1 1 22 0 14 28 0 84.90 78 98.03 NEW Mssu.s-.p.h-sh.p...pLNpcLEKhhEpMEclSVphohMsYDMVVhRTsPsLAESh+pLEstF.pCK.........pEhcpp.t-..p.o.p ...MADuDPsERNYDNMLKMLSDLNKDLEKLLEEMEKISVQATWMAYDMVVMRTNPTLAESMRRLEDAFLNCK.........EEMEKNWQELLpETKp........ 0 1 3 3 +15044 PF15192 TMEM213 TMEM213 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A2RRL7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 154 amino acids in length. The function of this family is unknown. 27.00 27.00 99.80 99.70 21.40 20.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.07 0.72 -3.97 3 30 2012-06-29 13:01:11 2012-06-29 14:01:11 1 1 27 0 20 29 0 78.70 73 69.80 NEW uAusSs.SssouuTsH+PcsGTLSs..Cs-VDFCPQAARCC+TGVDEYGWIAAAVGWSLWFLTLILLCVDKLMKLTPDEPKDLQA .............t..tAou.SssSolTsHHPDs.GTLEp..C.NVDFCPQAA+CC+sGVDEYGWIAAAVGWSLWFLTLILLCVDKLMKLTPDEPKDLpA. 0 1 1 4 +15045 PF15193 FAM24 FAM24 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NFZ4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 101 amino acids in length. There are two conserved sequence motifs: FDLRT and CLY. The function of this family is unknown. 27.00 27.00 37.60 37.60 17.10 15.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.64 0.72 -3.79 6 38 2012-06-29 13:01:55 2012-06-29 14:01:55 1 2 16 0 19 39 0 68.70 51 71.62 NEW LY.KIuKALKsA+.....-sEssh....DPsKssp-phIp.............AKsIssEoCtsLQCC-sCuhatsssSLPPChCshNEGL ...LYhKlu+ALKAAK.....-s-ssA....NPsKshh.sKs..p.............AcohssESCPuLQCC-sC+MYAsaDuLPPChCDlNEGL. 0 2 2 2 +15046 PF15194 TMEM191C TMEM191C family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NGB0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 302 amino acids in length. There are two conserved sequence motifs: QDC and RLF. The function of this family is unknown. 27.00 27.00 36.00 36.00 23.50 22.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.45 0.71 -4.23 3 23 2012-06-29 13:02:45 2012-06-29 14:02:45 1 2 17 0 11 31 0 105.90 59 74.86 NEW MEAAAALDAoRSGpEPCDSQLRRVQDCoGSLMEEVARADCEKRLFGGAGAGuIRLWALGALQTLLLLPLGFLuLPLLYLVLlcPsAlusGLtSLoS-AsLRRLRYTLSPLLELRARGLLPA .....................................................MEAAAtLDAhpuGpEPhDup.RtVQ.sstSLMEEVARADp.EhRLFGG.ssA.............hul.RhhsLuALQsLLhLPLhFLsLsLLahsLhcPsAlpthL.tpLsS-sshRRLRYTLSPLLELRApGLLPs............. 0 1 1 1 +15047 PF15195 TMEM210 TMEM210 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NLX4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 149 amino acids in length. The function of this family is unknown. 27.00 27.00 27.00 60.70 26.90 18.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.62 0.71 -3.96 3 17 2012-06-29 13:03:35 2012-06-29 14:03:35 1 1 15 0 8 14 0 105.80 72 79.88 NEW TYCECSLGLSREALIALLVVLAGVSASCFCALVIVAIGVhRAKGETCPRHs-NRLVGsYGVQEDRMDLHTVaVESHLMDPDLEVSMMPPLE-QGLhsMThPl-P.sP....PPPPPLsPp TYCECSLGLSREALIALLVVLAGlSASCFCALVIVAlGVlRAKGETC.Ptth-sRLVtpFGVQEDpMDLHsVaVESpLMDsDLEVShMPPLE-puLhsIsM-sssEEP....PPPPP..P............ 0 1 1 1 +15048 PF15196 Harakiri Activator of apoptosis harakiri Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O00198 Family \N 27.00 27.00 126.40 126.10 19.90 19.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.41 0.72 -3.74 2 15 2012-06-29 13:04:25 2012-06-29 14:04:25 1 1 14 2 9 12 0 91.20 88 100.00 NEW MCPCPhHRGRGPPAVCuCusuR.GLR.uAAQlTAhRLpALGDELHpRsM.RRRARsRc.PhPuhLPsh...WPWLCAAAQVAALAAWLLGRRsh MCPCPLHRGRGPPAVCACSAGRLGLRSS.AAQLTAARLKALGDELHQRTMWRRRARSRRAPAPGA.......LPTY...WPWLCAAAQVAALAAWLLGRRNL 0 1 1 1 +15049 PF15197 Leukemia_assc_2 Leukemia-associated protein 2 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O43262 Family \N 27.00 27.00 53.80 89.80 19.50 18.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.77 0.72 -4.30 6 9 2012-06-29 13:05:10 2012-06-29 14:05:10 1 1 7 0 1 6 0 53.30 76 83.92 NEW MKMSFERCTARNKMFVNSAFTK.VDNYCTFL.KKhhhKshF.lphhpKtKK.DLNF MKMSFERCTARNKMFVNSAFTK.VDNYCTFL.cKhhFKshF.lphhpKtKK.DLNF.. 0 1 1 1 +15050 PF15198 Dexa_ind Dexamethasone-induced Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95424 Family \N 27.00 27.00 138.80 138.60 22.60 16.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.43 0.72 -4.07 2 27 2012-06-29 13:05:54 2012-06-29 14:05:54 1 1 27 0 19 18 0 90.90 90 99.92 NEW Mssu.VhhpLDultsLls......LP.MaYlGLFFVNVLILYYAFLMEYIVLNVGlVFLPEDMDQALVDLGVLSDPuSh.YDsDoELDVF-GYLE ..M.GARVAAHLDuLGPLVsaVPPPLLPSMFYVGLFFVNVLILYYAFLMEYIVLNVGLVFLPEDMDQALVDLGVLSDPGSGLYDADSELDVFDGYLE 0 1 3 7 +15051 PF15199 DAOA D-amino acid oxidase activator Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P59103 Family \N 27.00 27.00 53.50 52.80 18.70 16.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.85 0.72 -3.35 2 15 2012-06-29 13:06:51 2012-06-29 14:06:51 1 1 6 0 2 10 0 67.10 71 58.39 NEW MAQRHLQRSLCPWVSYLPQPYAE......H.uKVhhstNhc...................Cpapp.pp..ssHh..ssTc.t ........MAQRHLQRSLCPWVSYLPQPYAELEEVSSHVGKVFMARNYE.....ASKDRRQPLERMWTCNYNQQKDQSCNHKEITSTKAE. 0 2 2 2 +15052 PF15200 KRTDAP Keratinocyte differentiation-associated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P60985 Family \N 27.00 27.00 76.00 68.30 25.50 18.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.76 0.72 -4.21 3 29 2012-06-29 13:07:40 2012-06-29 14:07:40 1 1 23 0 16 20 0 76.80 71 78.34 NEW AALGoPp...EDTThuNYPoGTEGLNuEFLNFcKLQSAFKSD-FLNWHVLTDMFK+ALPFINWDFFPKVKGLRSAsPDSQ AuLGusE....EETTIsNYAutPEAFNspFLNlDKLRSAFKs-EFLNWHALFESIK+KLPFLNWDAFPKLKGLRSATPDAQ. 0 1 1 2 +15053 PF15201 Rod_cone_degen Progressive rod-cone degeneration Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q00LT1 Family This family of proteins is involved in vision [1]. 27.00 27.00 29.70 29.70 18.40 17.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.61 0.72 -4.12 5 14 2012-06-29 13:13:08 2012-06-29 14:13:08 1 1 12 0 8 16 0 49.10 76 96.08 NEW MCTTLFLLSTLAMLWRRRFANRVQPEPScVDGAVVGSuSETDLQSSGRE+..uPVK MCTTLFLLSTLAMLWRRRFANRVQPEPSsVDG..AVhGSS.-sDhQSSGRcc..pPlK... 0 1 1 1 +15054 PF15202 Adipogenin Adipogenin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VDE8 Family This family of proteins is involved in the stimulation of adipocyte differentiation and development [1]. 27.00 27.00 36.10 81.80 26.30 22.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.98 0.72 -4.11 2 20 2012-06-29 13:14:00 2012-06-29 14:14:00 1 1 14 0 8 23 0 73.80 79 66.89 NEW MKYPLhPLVNDLThSFLVFWhCLPVuLLL.LhIlWL+FLLSQDScEsDSslChsWEPWSKGPuE.shcGThpGQEcc+..W .MKYPLhPLVNDLTFSFLVFWhCLPVGLLLFLLIIWLRFLLSQDSEENDSsVChDWEPWSKGPAEFCWcuTLHGQEcERPC.h.... 0 2 2 2 +15055 PF15203 TMEM95 TMEM95 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q3KNT9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 102 and 231 amino acids in length. There is a conserved LGG sequence motif. The function of this family is unknown. 27.00 27.00 86.10 33.20 21.10 19.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.91 0.71 -4.81 3 20 2012-06-29 13:14:47 2012-06-29 14:14:47 1 2 16 0 8 32 0 134.10 68 76.95 NEW CVFCRLPAHDLSGRLA+LCSQsEA+pKECGASssFsAFALDEVSMN+VTEKTHRVLRVMEIKcSlSSLPLYWpWLRKTKLPQYTREALCAPAC........RGSTTLYNCSTCcGTEVSCWP+KRCFP.......GSQDLW-A+ILLLsIFGtsLLLGuLSLLVEp+ ..CVFCRLPAHDLSGRLARLCSQMEAp.K.ECGASPDFSAFALDEVSMNKVTEKTHRVLRVM.EIKculSSLPsYWpWL+KTKLPcYTREALC..sPAC........RGSThLYNCSTCcGhEVSCWP+KRCFP.......GSpD.h.h+lh.hss.Gss...Gsl............................. 0 1 1 1 +15056 PF15204 KKLCAg1 Kita-kyushu lung cancer antigen 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5H943 Family This is a family of cancer antigens [1]. 27.00 27.00 37.10 87.60 18.90 17.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.14 0.72 -3.95 2 17 2012-06-29 13:15:39 2012-06-29 14:15:39 1 1 13 0 5 20 0 87.10 72 78.31 NEW QpssGEMSSNSTuLALVRP.SSoG.hpSNTDpN....LuVhsLShDIL.N.P+oIshQKRhLVNLphhp.KLsELEHhLl.KGhpGA.sphKS ....QRNTGEMSSNSTALALVRP.SSTGLINSNTDNN....LSV.cLSRDIL.NNFPHSIAMQKRILVNLphVE.KLsELEHhLVSKGhRGASsHRKS... 0 1 1 1 +15057 PF15205 PLAC9 Placenta-specific protein 9 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JTB6 Family This family of proteins was identified as being enriched in placenta [1]. 27.00 27.00 28.30 27.70 22.00 20.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.45 0.72 -3.70 4 31 2012-06-29 13:16:29 2012-06-29 14:16:29 1 1 23 0 14 30 0 68.10 66 72.19 NEW AEP.sPutGDsutSsuCDRaMAVpcRLDVhEETVEKTVEHLEAEVKGLLG.LEELAWNLPPGPFSPhPDLLGDs ...........AEPhsPspGDsApSTsCDRHMAVQpRLDVhEEhVEKTV-HLtsEVKGLLGLLEELAWNLPPGPFSPsPDLLG-s......... 0 1 1 3 +15058 PF15206 FAM209 FAM209 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JX69 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 170 amino acids in length. The function of this family is unknown. 27.00 27.00 123.80 123.60 21.10 20.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.79 0.71 -4.39 5 40 2012-06-29 13:17:50 2012-06-29 14:17:50 1 1 24 0 22 48 0 149.80 61 88.16 NEW FMFSSLREKsKEPQGKVPCGGHFRIRQNLPEHTQGWLGSKWLWLhFVVVLYVILKFRGDSEKNKEQoPPGLRGCoaRSPlRKpQNASPNKDYAFNTLTQLEMDLVKFVSKVRNLKVuMAT..uSNLRLQsLEuPADPaNNVTIYEIWGEEDS .FMFSSLREKspE....PQGKVPC....G.GHFRIRQNLPEHAQGWLGSKWLWLlFVVVLaVILKF....ptDu.EK..NKEQo.PsGLRGssFRSPLKKsQNASPsKDhsFNTLspLEh-LVKFVSKVRNLKsAMAT..uSNL+LppsEhPADPap.lTIYEIWGEEsS... 0 2 2 3 +15059 PF15207 TMEM240 TMEM240 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5SV17 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 54 and 175 amino acids in length. The function of this family is unknown. 27.00 27.00 94.00 93.90 26.60 21.00 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.32 0.71 -4.70 2 25 2012-06-29 13:18:38 2012-06-29 14:18:38 1 1 21 0 18 34 0 141.90 78 98.56 NEW Mph.ssThhhMlhGAslVhAIsCl.DMNALLDRFHNaILP+hRG.-RVCHCsCGRHHVcYVIPY-GstSLssuSts..susSVoKQEhDLhLGLLhGFCISWlLLWLDGAhHCAlRhWRuSRaYss...SWpWlsphCNLR-LRRRhQhRp....-suusNhVHl+QKLYHNGHPSPR+L .................lhGtshh.tIsClMDMNALLDRF.HNYILPHLRGEDRVCHCN.CG...RHHlHYVIPYDGDQSVVDuSENYFVTDNVTKQEIDLMLGLLLGFCISWFLVWMDGVLHCAVRAWRAuRRYDs.....SWoWLPKhCsLRELt+RsH...a...EEssGNMVHlKQKLYHNGHPSPRHL.... 0 1 4 9 +15060 PF15208 Rab15_effector Rab15 effector Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6BDI9 Family This family of proteins has a role in receptor recycling from the endocytic recycling compartment [1]. 27.00 27.00 78.90 78.60 17.90 16.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.66 0.70 -4.91 2 26 2012-06-29 13:19:39 2012-06-29 14:19:39 1 1 25 0 16 26 0 232.30 61 97.83 NEW MGQKsSQQls.pDSpEV.uhCEVVStAIsHAAQKlKEYLGFE.PLSpLC.AusoLsElFLlHFVTFCQ-+GsDEWLTTTKMTKHQAhLFGADWIWTFWGs-KQIRLQlAVQTL+MuSLP.s-PKsC.....ESRuEE.ShK+uRFDKLpEFCNLlGEDCLGLFIIFGVPGcPKsIRGVVL-SV+pthhpuQLsGRKAVtQFlLETcDClSI+ELLGNCLSK+DGLp-hG+VYIpIL .......MGQKsSQQls.+DSpEl.slCEVVSpAlVHAAQKLKEYLGFEsP.S+LsPAuNTLNEIFLIHFITFCQEKGVDEWLTTTKMTKHQAhLFGADWIWTFW.GuDKQI+LQLAVQTLQM.....uShPP.sEucss.chsssES+....uE.EsSh+KoRF-KLEEFCsLIGEDCLGLFIIFGVPGKPKDIRGVVLDSVKsphs+upLPGtKAVtQFVL-TE-CVsI+ELLtNCLSKKDGL+EVGKVYIsIL........................ 0 1 2 5 +15061 PF15209 IL31 Interleukin 31 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6EBC2 Family \N 27.00 27.00 27.40 73.30 19.90 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.63 0.71 -4.57 4 23 2012-06-29 13:20:31 2012-06-29 14:20:31 1 1 20 0 10 18 0 131.70 46 85.76 NEW opohshtt.hsppDlp+hl-.Lpp.S+sLhcDYpccE.oGlssspshpLPChS.Dpps.sNI..SuIhAYLcpl+sLScpoV.hsclIcpLsclph.ss.psNISVP..TD.oa-CKsFILTlLpQFSsCMspl.tp.Noss .........SHshPhthL.P.s.DlpKIlcELQsLSKhLLcDh.ccE...cGV.sSp.shpLPChosDuQsPsNIpSuAIhsYL+sI+p...Ls....s+oV.I-cIIEpLDKLh...FQcsPETNISVP..TD..oaEsKpFILTILQQFScCM-hshtuhsss.......... 0 1 1 1 +15062 PF15210 SFTA2 Surfactant-associated protein 2 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6UW10 Family \N 27.00 27.00 54.80 53.90 25.80 20.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.85 0.72 -4.39 4 17 2012-06-29 13:21:16 2012-06-29 14:21:16 1 1 15 0 6 19 0 57.40 70 70.04 NEW sGPGMTLQLKLKEoFLssSSYsSSFL-hLpKlCLLLHLPSGTNVTLHpAGS.HHVTCRs ......TGPGMTLQLKLKESFLsNSSY-SSFLELLEKLCLLLHLPSGTsVTLHHAtSpHHVsCps... 0 1 1 1 +15063 PF15211 CXCL17 VEGF co-regulated chemokine 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6UXB2 Family \N 27.00 27.00 32.80 32.30 22.70 19.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.31 0.72 -3.59 6 23 2012-06-29 13:22:01 2012-06-29 14:22:01 1 1 20 0 12 17 0 89.40 67 76.13 NEW SPNsGVARGHRDp+QAspRWLpEGGQECECKDWFLRAP+RKhhsV.GhP+KQCPCDHFKGphKKTR+Q+HH+.............Ksp+PSRsCQQF...L+pCp ..................SsNPGVARGHRDp+QAS+RWLQEGGQECECKDWFLRAP+RKlMT..VsGLPKKQCPCDHFKGsVKKTRHQ+HHR.............KPNKHSRACQQF...LKpCQ. 0 1 1 1 +15064 PF15212 SPATA19 Spermatogenesis-associated protein 19, mitochondrial Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q7Z5L4 Family \N 27.00 27.00 74.00 28.90 18.50 17.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.62 0.71 -4.10 3 31 2012-06-29 13:24:21 2012-06-29 14:24:21 1 2 22 0 17 29 0 101.30 63 85.84 NEW SSDIEVsESEAVSVVQHWLKKTEEEASRuIK.............EKMSTNsPPTHGQDIHVTRDVVKHHLSKSDLLANQSQEVLEERTRIQFIRWSHTRIFQVPSEsp-DlMRDRIEQVRRSISHLoD-SuQDhShRsSsSEC ...............SSDl-VlEoEAVSVlpHWLKK..............TEEEASQuIK.............EKMShssPPTHGpDlHVTRDVVKH+LSKoshhus.SQEVLEERTRIQFIRWSHTRIFQVPSEhhp-hhp-RIEQVRRShspl.s......t.pss..pC.... 0 2 2 4 +15065 PF15213 CDRT4 CMT1A duplicated region transcript 4 protein Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8N9R6 Family \N 27.00 27.00 82.10 81.90 20.40 19.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.85 0.71 -4.08 6 29 2012-06-29 13:25:57 2012-06-29 14:25:57 1 1 21 0 14 25 0 134.80 61 89.10 NEW cLTENIGLPlsLLEKHsPWPAYVTYhSPhVKRLIEKSKARDLEChpAlEcsp+su+QSKPSSlhQLKRRKSSKsSGphsLKDThSETMLSsWuuhSsssVuPohlPEPtpLHsDSREsPTuNYNKIIFuR+PhMRhLP ..............t.LTENhGLPhpLLEKHDPWPAYVTYTS.sVKRLIEKSKsRELEChpAlEEsp.hsSRQsKPSSlIQLKRRKSSKSSGcssa+DsLSEosLShWGsYSl.AhuPThlPEPT+lHoDSR-sPTpNYNKIIFuRKPhMRMLP.. 0 1 1 1 +15066 PF15214 PXT1 Peroxisomal testis-specific protein 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8NFP0 Family This family of proteins is testis-specific [1]. 27.00 27.00 28.40 48.10 20.30 18.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.64 0.72 -3.71 2 16 2012-06-29 13:26:47 2012-06-29 14:26:47 1 1 14 0 6 16 0 50.90 72 56.96 NEW MQLRHIGDslsHRhlpEcLtQ-stDsLs.FVhhhFhRsQVLL+FFWNNHLL .MQLRHIGDSIDHRMVpEDLQ.QDGRDALs+FVhFFFRRVQVLLHFFWNNHLL... 0 1 1 1 +15067 PF15215 FDC-SP Follicular dendritic cell secreted peptide Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8NFU4 Family \N 27.00 27.00 75.20 75.00 24.20 24.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.42 0.72 -3.67 2 10 2012-06-29 13:27:30 2012-06-29 14:27:30 1 1 9 0 1 10 0 65.10 68 77.32 NEW hPVspDQEREKRSh..SDELspth.h.Pas.PFtshPPh...t.PWahh.a..Ph.lP..sPTT.hP FPVSQDQEREKRSISD..SDELuSGFhVFPYPYPFRPaPPIPaPRaPWFpRsF..PIPIPESsPTTPLP 0 1 1 1 +15068 PF15216 TSLP Thymic stromal lymphopoietin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q969D9 Family \N 27.00 27.00 27.90 30.90 20.20 16.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.85 0.71 -4.40 5 25 2012-06-29 13:28:18 2012-06-29 14:28:18 1 1 18 0 10 23 0 101.00 53 84.64 NEW YNFTNCNFE+IpcIYpslIF+DLpsYlNGhKSscFNphVsC-sRPuCLTKIEpaTFNPlsGCsSLAKKtFAp+TKAALssaCPGYSETQIN.uTQAM+KR...cVssNKCLcQVSQLLcLWRpFsR.....p .....YsFosCsFpKIpttY.psI.psLhpYMsGsKSopFNpsl.Cps+spCLscIpphTFsPs.tCsSLucch...FAh+TKAsLslaCPGYSETQIN.uTQsM+KRhcccVssNKCLEQVSQL.GLWRcF.R..s.... 0 1 1 1 +15069 PF15217 TSC21 TSC21 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q96LM6 Family This family of proteins is testis-specific [1]. 27.00 27.00 176.40 176.20 17.90 17.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.28 0.71 -4.60 5 23 2012-06-29 13:28:57 2012-06-29 14:28:57 1 1 21 0 15 23 0 175.40 72 99.63 NEW MAGVVYPpQAPVDLDIYQSSYMVDYKPYGKHKYSRVTPQEQAKLDAQLRDKEFYRPlPsPNPKLEDGYPAFRRPHMTA+DLGlPGFFPPQ-HVTTsEDEsRFTSTC+olYPASHsLYLAQGDPNRl+QSADFPCLLEPERQPAsEVGKGYFLLPGCsCsYHppVKVPILNRWGPLMPFYQ .........MAGVhYPtQsPVDLDIYQSSYMVDYpPYGKHKYSRVTPQEQAKLDAQLR-KEFYRPlPsPNPKLpDGYPAFKRPHMTAKDLGlPGFFPsQ..-+sATtEDEs+FTSTC+hsYPASHsLaLAQGDPNplpQSADFPCLLEPE+QPAuEhGKGYLLLPGCsCsHHp.hVKVPILNRWGPLMPFYQ. 0 1 1 2 +15070 PF15218 SPATA25 Spermatogenesis-associated protein 25 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9BR10 Family This family of proteins may be involved in spermatogenesis [1]. 27.00 27.00 127.00 126.90 17.50 16.60 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.59 0.70 -4.92 3 28 2012-06-29 13:29:44 2012-06-29 14:29:44 1 2 25 0 16 25 0 204.40 78 100.09 NEW MSYFsSPQTHPGLLPSGQGGAASPGSSLGLYSPAEPVlVASGGQGPLSQKAEQVTPVAQAWGPALAV.EARGCPGGVSWEPPRRKEYNRYCHKhPsARQLESLGWEDuCSRSRAPaLGGPSRPpPLLLCGLSPGALPhPSEAGGKEAuSQPDICILTLAMMIAGIPTVPVPGLREEDLIRAAQAFMMAHPEPEGAVEGAQWpQ..A+oHhASGPMALVRSRRGQPPGSCL .MSYFhoPQTH.G.LPSGQGGAASPG.SLGLhSPsEPVVVASGGhGPLSQKAEQVsPuAQAWGPALAhPpARGCPGGsSWETL..RKEYu.RYCHKFPasRQ.ESLGW-DGsSRSRAP.cL.....GG.....PSRPtPLLLCGLSPGVLPhPSEAsGKEAuSQPDICILTLAMMIAGIPTVPVPGLREEDLIRAAQAFMMAHPEPEGAVEGspWEQ..A+A..HTASGpMPLVRS+RGQPPGSCL.. 0 1 1 2 +15071 PF15219 TEX12 Testis-expressed 12 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9BXU0 Family \N 27.00 27.00 31.30 30.80 23.10 20.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.19 0.72 -4.03 3 27 2012-06-29 13:30:40 2012-06-29 14:30:40 1 1 24 0 16 28 0 96.60 76 82.11 NEW Ms-SP..QlSSLGK.....SDSShLESSGLFYK-EuLEKDLSDMSKEINLMLSTYAKVLSERAAVDASYIDEIDGLFKEANsIENFLlQKREhLRQRFTVIoNTLH+ ........................PDSP..QLSSLGK.....SDSSFSEsS.GLFYKDEuLEKDLNDhSKEINLMLSTYAKlLSERAAVDASYIDEIDtLFKEANsIENFLIQKREhLRQRFTVIANTLHR... 0 2 2 2 +15072 PF15220 HILPDA Hypoxia-inducible lipid droplet-associated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9Y5L2 Family This family of proteins stimulate intracellular lipid accumulation, function as autocrine growth factors and enhance cell growth [1-2]. 27.00 27.00 28.90 43.00 19.40 18.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.94 0.72 -3.99 3 15 2012-06-29 13:35:03 2012-06-29 14:35:03 1 1 15 0 6 18 0 62.40 76 95.12 NEW MKaVLNLYLLGVVLTLLSIFVRVMESLGGLLESPSPGSSWTTRGQLANTEPTKGLPDHPSRGV MKHlLNLYLLGVVLTLLSIFVRVMESLEGLLESPSPGoSWTTRuQLANTEPsKGLPDHPSRuM........................ 0 1 1 1 +15073 PF15221 LEP503 Lens epithelial cell protein LEP503 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9Y5L5 Family This protein may be involved in lens epithelial cell differentiation [1-2]. 27.00 27.00 27.80 44.40 21.80 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.01 0.72 -4.27 3 18 2012-06-29 13:35:43 2012-06-29 14:35:43 1 1 16 0 10 19 0 61.10 81 92.36 NEW MQPpTQPLAQALPFSLRcALpDTGLRVPVIKMGTGWEGhQRTLKEVAYILLCCWCIKELLD MQPRTQPLAQsLPFSLtGALRDTGLRVPVIK..M.GTGWEGhQRTLKEVAYILLCCWCIKELLD 0 1 2 2 +15074 PF15222 KAR Kidney androgen-regulated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P61109 Family The function of this family is unknown. 27.00 27.00 171.80 171.60 22.00 19.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.30 0.72 -3.94 2 4 2012-06-29 13:37:42 2012-06-29 14:37:42 1 1 3 0 2 3 0 102.80 75 85.09 NEW FP..-lsSINcELQsSIhDlLNSs.D.QLuSYcsopuP.ED.T.p-.sTD...hMphT.utshQ.SEhSsssETVSSuFLEEhTEso-.TVchPLA.ssshSsTS FPLSELVSINKELQNSIIDLLNSVFD.QLGSYRGTKAPLEDYTDDDLSTDSEQIMDFTPAANKQNSEFSTDVETVSSGFLEEFTENTDITVKIPLA.GNPVSPTS 0 1 1 1 +15075 PF15223 DUF4584 Domain of unknown function (DUF4584) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A6NHQ4 Family This family of proteins is found in eukaryotes. Proteins in this family are approximately 835 amino acids in length. The family is found in association with Pfam:PF02437. 27.00 27.00 29.50 29.50 21.00 26.20 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.55 0.70 -5.20 8 68 2012-06-29 13:50:24 2012-06-29 14:50:24 1 3 37 0 39 59 0 321.00 50 52.29 NEW pPPlhhpssF+hpssssssspps....tssssGhDsppp.Kspss-ph.s.pthssuusssssPsSl..s.s.hppp............t.pssspRTD..........ss......oPsPptspsF.Ppp+h.......Pupsspsssus.ss.ss.pp............s.ssp.t+c.+hs.s.phsoslc+hp.-.........PpssucpsAcoPssh..s.s.PFpL+Nl+lKlE-....-EaEhthpssclpCcscsuctp..s...hKptD......p......stpcssu....s.s.......sLcsPss--Gtp+sss+..Ksh+s.l.tp+...t...ssA+sstKssRps+ssu+ossspto.t.............tht.h......sRRK+...AssssuPsKpsFSLMANFPsPPoLllGpDGDLsPAYSLNop+sspPPP.uHPlW+WQlGGsslP.PPu........pKhRKh ..............................................................s.ssh.hpssF.Yp.tsttt.pt........p.tt.....pp.ttpp...h.s..thsshss.h.p.suhssshschppc....t.....c.p..sssppTD..........................ss.t.ss.SPusppspsF.s.pRh.......htpspKC.psh.ss.sspsp...................................................................p.t+.s+h..s.ch......sosl.p.ps-s.................................tstssttsts..........t.PF.LHNlKIKlE-su..-EYE.ph.....spplKCcssssctph.s.st.cpp-hhh..thc............sppcssuh.s......hs.....ps.......sLsoPps--GEhK.sA+VpKNaRoLVLGK+..h.ps..sPsKsshKssRSPRPsuKo.............popEu.oLc.............shssh......sRRK+..sAuNs..sSssKpsFshMANFPCPPSLllGcDGDLhPAYSLNop+DSpPP..AHPlW+WQlGGsAlP.P.Pu........pKFRKa........ 0 4 7 14 +15076 PF15224 SCRG1 Scrapie-responsive protein 1 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:O75711 Family This protein family has an important function in acting against the prion protein, Scrapie [1,2].This family of proteins is found in eukaryotes. Proteins in this family are approximately 98 amino acids in length. 27.00 27.00 126.90 126.70 20.80 19.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.92 0.72 -4.16 2 33 2012-06-29 13:53:27 2012-06-29 14:53:27 1 1 27 0 19 25 0 76.80 81 79.22 NEW hPupR.SCY++IL+s+sCHSlPEGhAsLp+lD.slQDHFW-GcGCEhlCYCNFpELLCCPK-lFFGPKISFVIPCNsc .MPuNRLSCYRKILKD+NCHNLPEGVADLTpIDV.NVQDHFWDGKGCEMICYCNFSELLCCPKDlFFGPKISFVIPCNNp....... 0 1 2 5 +15077 PF15225 IL32 Interleukin 32 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P24001 Family \N 27.00 27.00 69.50 63.00 22.50 18.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.45 0.72 -4.09 2 18 2012-06-29 14:50:41 2012-06-29 15:50:41 1 1 7 0 1 29 0 92.30 85 50.78 NEW E.TPLL.c.RptLRsRspRSsVPslED.uhE..-PtESFhD+shRhFQthLppLQphWpuVLAWV+chVs....ALspAVpAlWp.FQsFCs.luplhhSuhQu .ELTPLLEKERDGLRCRGNRSPVPDVEDPATE..EPGESFCDKVMRWFQAMLQRLQTWWHGVLAWVKEKVV....ALVHAVQALWKQFQSFCCSLSELFMSSFQS... 0 1 1 1 +15078 PF15226 HPIP HCF-1 beta-propeller-interacting protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NWW0 Family HPIP is a small cellular polypeptide that binds to the beta-propeller domain of HCF-1. HPIP regulates HCF-1 activity by modulating its subcellular localisation. HCF-1 is a cellular protein required by VP16 to activate the herpes simplex virus- immediate-early genes. VP16 is a component of the viral tegument and, after release into the cell, binds to HCF-1 and translocates to the nucleus to form a complex with the POU domain protein Oct-1 and a VP16-responsive DNA sequence. HPIP-mediated export may provide the pool of cytoplasmic HCF-1 required for import of virion-derived VP16 into the nucleus [1]. 22.10 22.10 24.20 22.10 19.20 17.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.66 0.71 -4.05 9 35 2012-06-29 14:59:46 2012-06-29 15:59:46 1 1 28 0 17 37 0 118.50 66 81.84 NEW ILQQPLERGP.GtAQRDPRAAoGsohGLDs...................REPLRKQFLSEENMATHFSRLSLHNDHPYCSPPhsFPP.ALPPLRSPCSELLLWRYPGsLIPEALRLLRLGDTPoPaYPAoPAG- ......................................ILQQPLERGP.GtsQR.PRAA.GsotGLDAs........................pEPLRKQFLSEENMATHFScLSLHNDHPYCSP..PhsFP...P.ALPPLRSPCSELLLWRYPGsLIPEALRLLRLGDTPoP.YPAoPAGD..................... 1 3 3 5 +15080 PF15227 zf-C3HC4_4 zinc finger of C3HC4-type, RING Coggill P pcc manual Domain This is a family of primate-specific Ret finger protein-like (RFPL) zinc-fingers of the C3HC4 type. Ret finger protein-like proteins are primate-specific target genes of Pax6, a key transcription factor for pancreas, eye and neocortex development [1]. This domain is likely to be DNA-binding [2]. This zinc-finger domain together with the RDM domain, Pfam:PF11002, forms a large zinc-finger structure of the RING/U-Box superfamily. RING-containing proteins are known to exert an E3 ubiquitin protein ligase activity with the zinc-finger structure being mandatory for binding to the E2 ubiquitin-conjugating enzyme [3]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.66 0.72 -4.08 13 3078 2012-10-03 15:03:13 2012-08-03 15:33:49 1 84 232 5 1547 3331 31 42.10 42 9.14 NEW CPICh-YLccPsolpCGHsFChsCIsshpc-scut....hhCPhC .....CsICL....-.hh....p..-.....PV.o.l.s.C.GHsFCpsC.l....p...p...h.......a...p...p.....t....p...t.t..............hsCP.C.................... 0 267 479 886 +15081 PF15228 DAP Death-associated protein Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A0PJW8 Family \N 27.00 27.00 31.40 30.90 26.20 26.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.73 0.72 -3.26 49 159 2012-08-06 10:09:36 2012-08-06 11:09:36 1 2 102 0 88 149 0 90.50 37 87.26 NEW KuGHPPAVK.AGGMRIs.p++.........t..pscpcp.psc.sppcs.hpphssss.h.......lhloGslsKts+DFP...spAspshH.p....KPpPul-ptsss...pth....h...............pQPRK ....................KuGHPPA...VK.AGGMRIs.pKp..........sscpcpppp.ttppp..ps.ssss.h.......shloGslsKhs+DFP...ssuspsAH.p....KPpPul-Khsss....cp....I.....QQPRK......................................................... 0 22 29 56 +15082 PF15229 POM121 POM121 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NNC1 Family \N 27.00 27.00 27.50 27.20 26.70 26.80 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.36 0.70 -4.93 32 248 2012-08-06 10:20:27 2012-08-06 11:20:27 1 5 42 0 92 193 0 179.20 33 32.00 NEW L+Es.t++ss.ccE-.hhs-u.......-s+RRps-ostpu.SAFcPLhssGs.uSFVP+PGsLKRulpspsS-cshsK.RSpsSShSSh..ssh.sthP..opRNAIoSSYSSopGhsphh..KRs.........suoopsQhPcpPsKKtpcc...tpp..s...ssPhhosp..ttt............s..p..s..sussosuuStp.++.....RKlPLL..ssppG-.LsLPPPPQLGasVTsEDL.-LEKKAulphhNpsLEs .............................................................................................................p...t...p.c-.h..pt.......psp+p..psstt.h.Ss.cPL.tpGs.sshhspP..tsLptsh.pspp.o.-.p.sp.+upsS.hSSh............spRNAIs..SSaSSotGhsthh..+Rps...............Puoupsphs.psuKphpcc...............ss.hssst...ptptc+......ss.sshpp.s.tsu.s.pst..uSt..p+......+.K..h....Ll..........pttt-.l.LPPPspLGa.lstcDl.sht+cuthph.pphh................................................. 0 25 28 36 +15083 PF15230 SRRM_C Serine/arginine repetitive matrix protein C-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A7MD48 Family This domain is found near to the C-terminus of Serine/arginine repetitive matrix proteins 3 and 4. 27.00 27.00 30.50 30.50 24.00 24.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.59 0.72 -3.89 14 73 2012-08-06 10:34:30 2012-08-06 11:34:30 1 2 35 0 39 59 0 67.70 64 12.36 NEW hSt.p+p...sRE+-pcscspc.pcsctsRtRRRpRSYSPl..RKRRRDSPSahEsRRIT.......SA......RKRPIPYYRP ......................................hS+ao.....uRERDschpp+c.pppc+pR.ARRRRRSYSPh..RKRRRDSPSHLEARRIT..........................SA...RKRPIPYYRP........... 0 3 5 16 +15084 PF15231 VCX_VCY Variable charge X/Y family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O14598 Family The variable charge X/Y (VCX/VCY) family of proteins has members on the Human X and Y chromosomes, is expressed in male germ calls and may play a role in spermatogenesis or in sex ratio distortion [1]. 27.00 27.00 43.00 28.30 26.50 26.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.17 0.71 -3.80 5 104 2012-08-06 10:46:14 2012-08-06 11:46:14 1 6 8 0 12 91 0 92.90 61 104.49 NEW MAPKhRASGPPAKAKETRKRKSSSQsSPSoPKK....sPKlAKKGKAuRGGRGGKKRAA.cKM..AAVsAPEAGSGPAPPGPScPPSQELPQHEL.P.PEEPVSEGTQPDPLSQETQL....................EDPLSQETQlEEPLS-tt....lpps.p..opl.p.........-PLS .............................................................................................................................-PLSQEoplEEPLSQEoplEEPLSpEop.lE-P.pQpoph.................................. 0 12 12 12 +15085 PF15232 DUF4585 Domain of unknown function (DUF4585) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:E7EW31 Family The function of this protein domain family is yet to be characterised. It is putatively thought to lie in the C-terminal domain of the DNA nucleotide repair protein, Xeroderma pigmentosa complementation group A (XPA). The function of XPA is to bind to DNA and repair any mismatched base pairs. This domain family is often found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved DPE sequence motif. In humans, this protein is encoded for in the chromosomal position, Chromosome 5 open reading frame 65. Mutations in the gene lead to myelodysplastic syndromes, where there is inefficient stem cell production in the bone marrow. This suggests that the protein may have a role in forming blood cells [1]. 27.00 27.00 36.70 39.10 22.90 18.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.68 0.72 -4.12 13 64 2012-08-06 11:01:28 2012-08-06 12:01:28 1 2 28 0 39 60 0 71.60 48 6.08 NEW pssassTQ+KlL.DPcSGcYalVDhPl..Qs+hKphaDPETGpYVcVslPsSttu..tssss.shs.uPhsLhPuhaPs ...p.shstTQtKlLlDPpoGpYYlVDsPl..QPph+pLFDPETGQYV-VslPs.sp.s...shssh.hs.sPlALuPGhYss......... 0 3 9 13 +15086 PF15233 SYCE1 Synaptonemal complex central element protein 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A8MT33 Family This family of proteins includes synaptonemal complex central element protein 1, a component of the synaptonemal complex involved in meiosis, and synaptonemal complex central element protein 1-like, which may be involved in meiosis [1-2]. 27.00 27.00 27.20 27.20 25.20 25.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.83 0.71 -4.23 17 64 2012-08-06 11:54:32 2012-08-06 12:54:32 1 2 29 0 32 68 0 134.00 58 50.35 NEW tGSLEPpIE-LIsRIN-LQQ...AKKKusEELGEspslh-ALp+ELDSLsuEKlHLEElLs..KKQEsLR.ILQLHCQ-KEoEuphp............................RhcF...Ep.QLEpLhpp...........HKcL......WEF+h.sppLupEIssl-su..KEQLLpE .....................................GSLEPplEsLINRINElQQ...AKKKusEELGEApslhEALp+ELDSLpGEKV.+LcElLs..KKQEsLR.ILpLHCQEK.......ESEApRp..psh...h.....................RLsFEE.QLE-LMGQ...........HKDL......W-FHh.sc+LA+EIssL-SS..KEQLLpE....................... 0 2 3 5 +15087 PF15234 LAT Linker for activation of T-cells Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O43561 Family \N 27.00 27.00 28.90 36.50 23.70 23.70 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.73 0.70 -4.88 10 45 2012-08-06 12:03:20 2012-08-06 13:03:20 1 2 25 0 17 36 0 221.60 68 95.17 NEW MEsslLl.PssLGLLLLPLLAVLLhALCVRCRELPG..SYDSA..........ooDSLaPcSIlIKpPt...TluPWPPAs.Ys.VTSaPPLSQPDLLPIPRSPQPPGGSHRMPSSRQDSDG.ANSVASYENE...........................EPACEDsDEDEDEEDYHNE...GYLlVLPDSoPATSou....VPs.APVPSNPGLRDSAFSMESG-DYVNVPESEESA-ASLDGSREYVNVSQELQP...sARTEPATloSQ.......ps---EEEsAPDYENL..QE ...........MEtshLl.PslLGLLLLPlLAh.LhALCV+C+cLPG..SYDSs..........SSDSLYPRu.I.lK+Pp...TlsPWPPAh..P.VTSYPPLSQPDLLPIPRSPQPLGGSHRhPSSRpDSDG.ANSVASYENE..............................................EP.ACED.-tD..EDEDDYpN....GYLVVLPDSoPAoSou....ssu.APs.SsPGlRDSAFSMESh-DYVNVPEStESA-ASLDGSREYVNVSQELpP...sA+TEPAslsSQ.......Et.-psE-EtAPDYENL..QE........... 0 1 1 2 +15088 PF15235 GRIN_C G protein-regulated inducer of neurite outgrowth C-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O60269 Family This represents the C-terminus of the G protein-regulated inducer of neurite outgrowth proteins [1]. 27.00 27.00 63.80 59.70 20.30 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.10 0.71 -4.09 16 86 2012-08-06 12:19:08 2012-08-06 13:19:08 1 1 34 0 51 88 0 133.70 48 21.15 NEW +SVATuPhpss.............s...pststsa...............PEVplc.........s.pcspp.PVR-VsWDEcGMTWEVYGASlDsEVLGhAIQKHLEhQIEpa..QhtP.....st.t.pts.....................ppssspppt+Rps...FRshhpslR+PsCCsRuusss..E ...............................................................+uVtsuPhhst.................sutsthh...............Ppsphc.............s..ctspPVRDVsWDEcGMTWEVYGASlDsEVLGlAIQKHLEpQIcEatcphts.....t.....ts........................................................p.spssspttt+Rt.sh.FRuhlQslRRPsCCsRuusu.............................................................................. 0 4 9 16 +15089 PF15236 CCDC66 Coiled-coil domain-containing protein 66 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A2RUB6 Family This protein family, named Coiled-coil domain-containing protein 66 (CCDC) refers to a protein domain found in eukaryotes, and is approximately 160 amino acids in length. CCDC66 protein is detected mainly in the inner segments of photoreceptors in many vertebrates including mice and humans. It has been found in dogs, that a mutation in the CCDC66 gene causes generalized progressive retinal atrophy (gPRA). This shows that the protein encoded for by this gene is vital for healthy vision and guards against photoreceptor cell degeneration. The structure of CCDC66 proteins includes a heptad repeat pattern which contains at least one coiled-coil domain. There are at least two or more alpha-helices which form a cable-like structure [1]. 27.00 27.00 27.00 27.40 26.60 26.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.18 0.71 -4.61 19 118 2012-08-06 12:25:34 2012-08-06 13:25:34 1 2 69 0 69 126 0 148.20 35 16.22 NEW sssssttspsspptpsspsspssshps.p....p.saLRuhoA.LLDsup.....lpER-cRRpKthEaQcAIttQlEE+c+hKphEcpp+ppEEppEEpRltRE+pphpcpaEc-hhcp+pKEEhhppKTptLhpshp+ApEhApc.K.pc......................pRh+cltpcsc-hpph ..............................................................p.......tptp......saLRu.ss..lDstp.....ltE...R-c+RpKQhEappAlptQlEEK+p++phEcEp++pEEpcEE..pRLAc-Rp.chQcp..aEE-hh.+p+pKEEh......phKspcL....hpshpcApc.Ap+hK.pc................................................pRh+chtpctpt....h....................................................... 0 20 24 41 +15090 PF15237 PTRF_SDPR PTRF/SDPR family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95810 Family This family of proteins includes muscle-related coiled-coil protein (MURC), protein kinase C delta-binding protein (PRKCDBP), polymerase I and transcript release factor (PTRF) and serum deprivation-response protein (SDPR). MURC activates the Rho/ROCK pathway [1]. PRKCDBP appears to act as an immune potentiator [2]. PTRF is involved in caveolae formation and function [3]. SDPR is involved in the targetting of protein kinase Calpha to caveolae [4]. 27.00 27.00 28.60 28.10 24.40 24.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.65 0.70 -4.97 30 215 2012-08-06 12:39:37 2012-08-06 13:39:37 1 2 43 0 103 180 0 216.10 41 68.95 NEW QlsAlTVloLLDKlsshlDsVQpsQpphEpRQt-hEsu....V+sIQu-lsKLo+uHssTSsTVsKLLEKsRKVSsplKsVRsRl-+QusQVKKLEsNcscLL+Rs+F+VlIaQ-EsElPuplsspp.psh......................t.t..t.....pch.tshcL.SSDEE..h.l.............................p.hEESRAcRlKRSuL++VDsLKKAFS.........................................................................Rp..................sl-KKhs+luT+I.VsPERREKI+......cKShpss+t.stc..KposhKlsPh ...........QlsulhVloLLDKlhshlDplQtsQtphEpRQ..tphEsu....VpuIQs-LsKLo+uHssTSsTVsKLLEKsRKVSspsKsV+tplE+QssQlK+LEsNcscLL+RppF+VlIa.Q-EsclPupl.hpt.t.h............................t..s.ptpt.cct.tshpLSSD--.h.l...............................-ESRAp+l+RSuhc+VDslKKAFS....................................Rp..................sl-+Khs+lsT+l.VssERRE+h+........pShp..+...tp..Kps..pssP......................................................... 0 4 15 39 +15091 PF15238 FAM181 FAM181 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:A6NEQ2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 256 and 426 amino acids in length. 29.00 29.00 29.00 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.20 0.70 -4.35 20 80 2012-08-06 12:48:17 2012-08-06 13:48:17 1 5 39 0 48 70 0 248.20 35 73.36 NEW tsscpss+sLLsFls.ASSsIKhALDKsu.s+RpVsHRKYLQKQlKRhSthhsthP.t....Pspsspsps...+...tstststp...pspssspppustus.........shp.........pcSLuu.......................................t.h.tt.s.s...scp.VPLR+RsLPsSFapEP.....psspsh.....sussssslss...............scsschh-lLGP-..................sshsscps.h..ss...t...olss+..ssshss.....sPh.acupslhtu...hstsh....s.....sssshssls.hp..ss.s...........sscshp.sttss..up.hh.pssl.cs.sspsu ................................hsscpss+.LLsFls.ASSsIKhALDKsu.s....+RpVsHRKYLQKQlKRhSthhuthP.s............Pspssts.h.......+........us.ts........s.ht..h.s.ssspp.ustss...........tsshp.........pcsLut......................................tt..ttpsstu....stpVPhRpRpLPsSFapEP......p.spuh......sG.psuLss........cutcsschhE.LGP-...................ts..tp-shhh.ss.......uhssh..sss.ph.....pPh.acspshhsG.....s...sh......s.s.hssLsh.+..ss.............shc.ha.sttss..up......................................................................................................................................................................... 0 12 15 25 +15092 PF15239 DUF4586 Domain of unknown function (DUF4586) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A7E2U8 Family This protein family, refers to a domain of unknown function. The precise role of this protein domain remains to be elucidated. This family of proteins is found in eukaryotes and are typically between 256 and 320 amino acids in length. There is a single completely conserved residue, phenylalanine (F), that may be functionally important. In humans, the protein is found in the position, chromosome 4 open reading frame 47. 27.00 27.00 27.00 31.40 26.80 26.20 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.38 0.70 -4.87 41 143 2012-08-06 12:48:46 2012-08-06 13:48:46 1 5 80 0 94 135 0 260.60 27 87.27 NEW csch-RhGLFS-.sa......lolGDpYhp.tpp......scssscsKpMLsuusKpts.shpsuYFs.pahplhpGEsY........tD.s+hcRphclcptKK..slu.KsFhPusGtKp.s.GhGsaaGsass.h...h........hss.p+spcthh.sts+........NhhTNPuK+Go.aGYsslTlup...sass..DsY-pt+chtccctpc+cphh......cG.ssF+h..shaspcaFDss.Pa.........h........pppsl..sPhcp.t.ttpphstP........FKPosP.....sc..t......ut+sGsFss.aPpapsDPhs.chpp..t..hpspp..............tt+hF+PssssK.o........pPssSIhstslp..+ph .....................................................................hhGlFSp.sa......lsl.G.....DpY.p.htt.......tstsps+phls.s...ssKp.h...phpsuaF-.pah.plhpG-sY........hs..........p..hp+ptthcptKK.........sls.psFhPss..s.Kcss.GhGsaYGshst.h...h..............hsstt+spcthh....s.s+..............NlhTsPuK+Go.YGY..ss...l..slup...pat..........-.Y-ttc...cpppppccphh......hu..ssF+....shhstt...hFDts.sh........h.............ppsh....s.pp.....ph..ht.s........a+P.pP.....s...t...........uhhtGshs...aPpa.tDPh.ht.tp........pt...............tc.ahPssssp.s........h...Sl..........h................................................................................. 0 50 62 78 +15093 PF15240 Pro-rich Proline-rich Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P02810 Family This family includes several eukaryotic proline-rich proteins. 27.00 27.00 28.10 27.00 26.50 26.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.38 0.71 -12.12 0.71 -4.18 16 217 2012-08-06 12:57:24 2012-08-06 13:57:24 1 10 16 0 45 325 0 107.60 39 99.71 NEW MLlILLoVALLALSSAQs.sE-VupE-ssSllu.....tt...........tsps.t.spp.Qt...........PP.GG..spsP...........................................................................................P.sGGPQp.pPPQsGp....................PpGPPP.GG...........................................ts.......tp.QGPPPQtGsp.....tPP.Ps......................pPQGPP....................t.........................s.sGss...................QtPPP.........PPsGpPQG..PP........................tPPQsG.pPptPPQ .................................................................................................................................................................................................................................................................s..t..tsQt..PPpsGp..............pGPP.PQGs.sp.......t..sspP.....Gp.QGPPPQGGsQ....tPP.ss.........pPpG.PPsQt...s......................................................................................................................................................... 0 21 21 21 +15094 PF15241 Cylicin_N Cylicin N-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P35663 Family This is the N-terminus of cylicin proteins, which may play a role in spermatid differentiation [1]. 27.00 27.00 31.90 30.20 25.30 25.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.40 0.72 -4.18 22 69 2012-08-06 13:08:30 2012-08-06 14:08:30 1 1 23 0 27 48 0 108.90 52 24.53 NEW QclNhsTYDNhIPlSE.S+KSWNQpHFuLsFPKPPpPG+K+RS+PSpLp...sVP....+p-ccKlccspKs..lWh++SLh+I.pRPSlYLAsRRQs.Ph+.shsspscscpApsc ...................cVNhtsYDN.IPlSE.S+KSWNQpHFALsFPKP.pPGpK+RS+PSplp..TVs....hhDccKLcpspKs..lWh++SLh+I.pRPSlYLAAR+Qs.Ph+.sassKscsKpAE....................... 0 2 2 3 +15095 PF15242 FAM53 Family of FAM53 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q14153 Family The FAM53 protein family refers to a family of proteins, which bind to a transcriptional regulator that modulates cell proliferation [1]. It is known to be highly important in neural tube development [2]. It is found in eukaryotes and is typically between 303 and 413 amino acids in length. 27.00 27.00 28.70 27.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.39 0.70 -4.88 20 155 2012-08-06 13:28:35 2012-08-06 14:28:35 1 2 41 0 67 126 0 256.80 38 76.63 NEW MVhllTcpLpppuh...DDlss+ohshs..apsccho.pussLFshsls..-s..pWpslstsssl.....ptcss...usShtshhssh.t......h..tuhpW.........p.-uss.ssslouLlpcLSLs....-ssu.....sPsAPPSKRpCRSLShsDELupC.RSsWRPtGSKVWTsVcKRRCpSGGSlphts.......p.tsG.sshQRSoShSLPupus......h....hspt..h..shsutss.sssstuupsss......t.t.hs.pRpLSLSpEpl.shsc....suAs..SoPsSTPELuRRsGt.......LsRSRSQPC..VLsc+KsuhKRRRsEDs+hpRPSLD..hsKMTQ .......................................................................................MlhllocpLpppsh...Dpltpptht.t........scphs.pussLhshtl....-s.s.W+sLs.ttssh....p.pts...s.sh...................u..W.........p.puss.sst.pshhpsLshp............-pts.....................sPsAP.PoKRpCRSLS.s-E.hu..psRosWRPtuSKVWT.PVpKRRCtSGGSsph.p..........ss..thpp.pshsLPppss.hs.........h.pt.hh..s.t.tsu.hs...supusps.u....s..t.s.hs.pRphSLSpEph..phsp.......PSAs................SoPsSTPELsRRtu........L.RsRSQPC..VLss+KsthKRRR.EDs...p..pRPSLD..hhKMsQ................................................. 0 5 10 25 +15096 PF15243 ANAPC15 Anaphase-promoting complex subunit 15 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P60006 Family This is a component of the anaphase promoting complex/cyclosome [1]. 27.00 27.00 30.50 37.60 26.80 26.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.53 0.72 -3.92 25 114 2012-08-06 13:34:13 2012-08-06 14:34:13 1 2 81 0 67 91 0 93.20 45 78.43 NEW Ms.....shFPsLhPRls-shWFsl.D...............cPCsEEsELp.........p.EQpHQsWLpSIuc+sssLlPIGKsssE.............pct-s---....-sp--s..-csEptpp-p-Eh--h ........Ms..shFPSLhP+lT-oLWFNl.D...............+PCsEEoELp.........ppEppHQsWLpSIuc+ssNL...lPIGKPsoE................tp-c--pD-E....-s--Du......--sE-..pD.DEhsD....................... 0 11 20 40 +15097 PF15244 HSD3 Hydroxy-steroid dehydrogenase Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9P0W8 Family This family also goes by the name of Spermatogenesis-associated protein 7 or SPAT7. It is an aldo-keto reductase (AKR) human type 3 3-alpha-hydroxy-steroid dehydrogenase (H3-alpha-HSD3, AKR1C2), and it plays a crucial role in the regulation of the intracellular concentrations of testosterone and 5-alpha-dihydrotestosterone (5-alpha-DHT), two steroids directly linked to the aetiology and the progression of many prostate diseases and cancer [1,2]. Mutations in the gene cause Leber congenital amaurosis (LCA) and juvenile retinitis pigmentosa (RP), the most common hereditary causes of visual impairment in infants and children [3]. 27.00 27.00 28.10 27.50 26.90 26.90 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.49 0.70 -5.23 10 108 2012-08-06 13:52:06 2012-08-06 14:52:06 1 3 48 0 44 107 0 234.10 43 66.25 NEW +VRuosVLPRYuPPCLFKGHLSTKSNAFCTDSSSLRLSTLpLlKNHMAVHYNKILSAKAAVDCSVPVShosSIKYADQQRREKLKKELARCEKEhKLoKouhQANSKssSKSlhNoLQKPuGEPQsctshllEchNpFsSFu+SllsSSE+LcLu.LsKSscslosGocKNuSSS.oSh.............-hssSsPR+ssSussauR...+PRSshssSHR.FQLVlSKAPSGDLLDKHSEhFSNKQLPFTPRTLKTEAKSFLSQYRYYTPAKRK.KDhoDQcIEAETQTEL.SSF+S-hsTAEpKshT-SElNIpQA..SsCsTaGTK-KhsPhstptpsLsW-cl.K-ssLQpSSsRu.lCpYSlQ..Pus+KIaS-EEELLYLSFIEDVTDEILKLGLFSNR.FLERLFERHI+QNKHHLEEcKMRHLLHlLKVDLGChS ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p....t..c.......-EhhYlpFhtslTp-llphGlaos+..LpplFcpHlppp+.pLpttph...h..Lp.p.................................................................................. 0 11 13 27 +15098 PF15245 VGLL4 Transcription cofactor vestigial-like protein 4 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14135 Family These proteins act as transcriptional enhancer factor (TEF-1) cofactors [1]. 27.00 27.00 28.30 27.20 24.00 25.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.90 0.70 -4.50 18 112 2012-08-06 13:52:32 2012-08-06 14:52:32 1 2 63 0 55 101 0 186.70 47 66.05 NEW LLNaQYLDKM.NNNIG...lLpYEG....pstLRuEsRhpoLs......................thoscRTuPPPlsPoKRKhSt-QuDsch-p-s-HhoKMSRhFusp..Lsp....s.st-..Rpc......php+u+......SPh-phsss.o.ulhus.HlYuoh............sshu.hDQPLAL.TKsoh.-us+o.....tthsspssssER.QNRPSVITCAPAssRNCNLSHCs..shsusss....s.++.s....sTuCDPVlEEHFRRSLGcNYKEs ...........................................................................................................................................h....ht.......pt..............pstL+.u-sRhpsLsh.....................shospRTsPPPlsPsKRKhSh-.uDpch-p-s-HhoKMSRhFss+..Ls+....ssNGDp.Rc-.........R-RSR..........SPIER.usus.ohoLHus.HlYsSh.................h-QPLAL.TKNSh.-uuRs......ths.shsssERQQNRPSVI...T.CAsAs.sRNC..NLS.HCs.hsHsuCss.hss............................t.ssssssCDPVlEEHFRRSLG.KNYKEs........ 0 9 14 35 +15099 PF15246 NCKAP5 Nck-associated protein 5, Peripheral clock protein Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:O14513 Family NCKAP5 is short for Nck-associated protein 5, which is also known as the Peripheral clock protein. NCKAP5 is a protein family, which interacts with the SH3-containing region of the adaptor protein Nck. Nck is a protein that interacts with receptor tyrosine kinases and guanine nucleotide exchange factor Sos. The role of Nck can be thought of as similar to Grb2. The role of NCKAP5 is to assist Nck with its adaptor protein role [1]. 27.00 27.00 82.30 38.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.26 0.70 -5.05 20 107 2012-08-06 13:52:54 2012-08-06 14:52:54 1 2 37 0 48 83 0 261.60 41 20.62 NEW slpsoIEEKVMhGIpENVpKsQtQsKu.sosEsKQKsuPSlASWFGhRKSKLPALSuRKsDsuKsK-EKK-hKhhuhssppthscc++ccKctppphplss.hs+sp-htc..p.-puhhspps.cpspcstsp.........hppttpph.s.sstsspDsFMpELLNRVDcKsstpscpssspsshpshp+uSspupsh.ssshuspusp++.hps+hphptsppsh......htpts-phpc-Epss.sDos....hQsHhlsSusQhRTLDSGIGTFPLPDSssRusuRpsspppps.cp-s.sshp.shsssssl.+....ApTLEREVPSss .............................................................................................................................................................................s..pssIEEKVMhsIpENV.+hQsQp+u.susEsKp+s.ssShASWFGh+KS+LPALs.R+h-soKsK.t....thhu....s.pp..schctc.Khtt.phplpp.ht+spc.tc..p.-puh.spps..pspshhst.........ht...tph.u.ht..stDsFMppLLN.R..VDtKth..pp.tpspsphtsh..soopsps...suhuspss....hhs.hphpt....th.......................ssEshpc-E.ss..-sh.....psHhhtsss.hRTLDSGIGTFP.PDpGsp.ussph.h.tsps.ch-s..sl.su..t.sssh.+....ApTLEREVPu..p.......................... 0 2 5 16 +15100 PF15247 SLBP_RNA_bind Histone RNA hairpin-binding protein RNA-binding domain Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14493 Family This family represents the RNA-binding domain of histone RNA hairpin-binding protein [1]. 27.00 27.00 37.20 39.80 26.90 24.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.61 0.72 -4.21 52 232 2012-08-06 14:32:12 2012-08-06 15:32:12 1 2 135 1 151 227 1 76.20 47 23.06 NEW hEsD.p......pLtpRQKQ....I-aGKNThGYppYlppVP.+ppRp..p.....tHPpTPsKapphSRRuWDupl+hWR+tLHp.a......D...........Ps ......................EpDpp....hLtpRQKQ....IsYGKNThuYc+YlctVP.+c..tRp.......shHPcTPsKap+hS+RuWDtpl+lWRptLHh.aDs........................ 0 62 84 119 +15101 PF15248 DUF4587 Domain of unknown function (DUF4587) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:P58505 Family This protein family is a domain of unknown function. The precise function of this protein domain remains to be elucidated. This domain family is found in eukaryotes, and is typically between 64 and 79 amino acids in length. There are two conserved sequence motifs: QNAQ and HHH. In humans, it is found in the position, chromosome 21 open reading frame 58. 27.00 27.00 44.40 44.40 19.90 19.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.13 0.72 -3.38 28 70 2012-08-06 14:40:19 2012-08-06 15:40:19 1 2 34 0 36 68 0 68.60 47 28.36 NEW RsGuI..KEDMVEhMLhQNAQMHQllMpshMl+ALPPhs....s..................stssslcspc.....pcPssVHHHHaa .......+sGplKEDhVEhMLhQNAQMHQllhpNhML+ALPPs.s....st........p....s...p.hhhcspc...pcPssVaHHHa................ 0 4 7 14 +15102 PF15249 GLTSCR1 Glioma tumor suppressor candidate region Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NZM4 Family This domain family is found in eukaryotes, and is typically between 105 and 124 amino acids in length. There is a single completely conserved residue F that may be functionally important. Mutations in the gene for this protein in humans leads to the development of oligodendrogliomas [1]. There is evidence that these protein interacts with SH3 domains [2]. 27.00 27.00 27.60 27.40 26.90 26.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.51 0.72 -3.95 54 193 2012-10-03 11:11:44 2012-08-06 15:41:14 1 6 115 0 126 197 0 109.40 33 10.45 NEW htpphttcptt.......shpP...........Dh.p...sPFpo..hpDAlcRL..LPYHlhtp.p.s..........pDhpt........................hDcp......hcspssp.h..hp+hpphhp+aphllhccut......cts...spEtlh...lpphhhp-E+pplpc ...............................................hppLppcpst....................shpP...........Dh.poPFpShcDAlpRL..LPYHVhpss.sst.................pDhpp.......................................................................hDpc......aEshusp.h..hc+hpthlsKaphlLhc-uh......chss..osEhlM...l-phhlp-E+tplt............................................................................ 0 38 57 88 +15103 PF15250 Raftlin Raftlin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14699 Family This family of proteins plays a role in the formation and/or maintenance of lipid rafts [1]. 27.00 27.00 30.90 41.60 24.80 24.80 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.61 0.70 -5.96 19 145 2012-08-06 14:45:02 2012-08-06 15:45:02 1 6 38 0 65 124 0 348.00 40 86.42 NEW MGCGLRKLEcsDDsSPGKIaSTLKRPQVETKsDsAYEYhhLDFTl..pu..ss.sssl+luSlhDlPsplp-hYpQGalluAlHPhlpPsscpcplPtpplaRAlLl+hp.p..ppcss.hspct.pLhlEECs.stcshss-hlppLIcK.lp-AAcpGh+FVGhl.........spths.spssssos....sstsh-.c.s..........................................tthcpss-css+s.......spush.sspss-suhcpchptpss......h.......ss.s.sp.........ch+LaslFNt.c......c.spsChpYapsslsh+VoRpGpslSoL-AsWLEhhohaa+pG.hSLVDuasphshs+.Dpls+sl-GlFIaEEtuousstosp.G.DAIVVEQWTVIEGsEVKTDYsPLLpoLApFGWhLTCVLPTPIl+ps.SEGNLATKQlVFLQRPsh.pp..stpp.p..........cppsRph+p..cpppsuscp.s...tp..ssE ...............................................................MGCtL.KLc.c.--.p.pPGpIaSTL+RsQVET+ht.sYpYhhL-F.h....tu..stspslplsSlh-lsspl.-hY.pGa.lsAlHPhlpPsttpcphP.phlaRslL...+.p.p...ppp.ss..tpp..t.hL.l-pCs.h.p..spchh.thlcK.lp.uAppGh+FVGhl.........................p.hs..t.ss.ss.........p....................................................................t.....................spsp...s.t.p..pthp..pp.pt.pt..............t..p.shst.........c.claslFNt.c......s.pppshpY..shlsh+Vo+pGps.losL-AsWLEhho.aa+pG.h.Llsuhhhhth.s.p....-p.h.t.h-GlFIaEt.ustssto.p.G.DAIVVE.QWTVlEG...sElpTDYsPLLpoLAtaGW.LTsVLPTPll+ps.pEGsluTKQlVFLQRPsh.pp..htpp.p..........cht.t+..pt..t.t..sttp.t..........tt................................................... 0 3 9 26 +15104 PF15251 DUF4588 Domain of unknown function (DUF4588) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14CZ0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 200 and 274 amino acids in length. There is a conserved LYK sequence motif. There is a single completely conserved residue A that may be functionally important. 27.00 27.00 27.20 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.82 0.70 -4.38 18 156 2012-08-06 14:50:59 2012-08-06 15:50:59 1 2 115 0 102 137 0 208.80 31 89.72 NEW EppCl..s-up.ccp..........................t.pssphpsppp+LWptFpsuAsAVApLYp...................t...................t.thohWssFpsAAtuVTsLYK-Ss.DuppRSh-hGhpsGhpc......Rs+DllpWs+.+tRphIRRE-LluaLsG+sPPs......+s.sps..............sscs........ss..sssp.....p..ts.t.s.s......l-ssLpsF+-AluLtt..hsshhsshuh.susPsossp.pt.t.......................uhpcscLsshhs...--lshp.............pRKRppspss-s.............sSPoHKRsR.hh ........................................................................t....t.....pplh..FptsApulApLYp.........................tp.shuLWssFQsAAsuVTsLYKpSs.-spp+oh-hGhphGhpc............................cs+-lhsWs+.KpR.ph..IRRED...LluaLsGKssPs......ps..pt.............................ssp................st.......t.....tssss........spsshp.sh.ppsluh.....st.hss.phtss...Ps.sst....t.............................t...............tpht.................p...t.............................h....................................................................................................................... 0 24 37 70 +15105 PF15252 DUF4589 Domain of unknown function (DUF4589) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q2T9L4 Family This protein family is a domain of unknown function. The precise function of the protein domain remains to be elucidated. This family of proteins is found in eukaryotes and are typically between 215 and 293 amino acids in length. The protein contains two conserved sequence motifs: SSS and KST. 27.00 27.00 97.90 60.20 20.50 25.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.70 0.70 -4.53 13 97 2012-08-06 14:54:27 2012-08-06 15:54:27 1 2 37 0 58 73 0 209.90 48 85.56 NEW EVVsQID+LTSDh-aELE..sDDWTTuTlSSTSSS-+...sGss.-Lt+LDFhsuDhLSD..........SW-FCSFL-tSsPsssssssssspsts...................ssapLMNGGl................IPNGPth.TPDSSSEEA.sss........Ksp.....pRTsGTRERVRFSDKVLYHALC..CDD-p-tsppp...ctp..t....p............................sPc..sssuhssutstshsst.....s..++lhRNoSTQTVuDKSTQTlL ........pVVsQIDpLTSDhphE.E..sDs.pTsTlsSoSSSsp.........s.sLt+lchhsss.L.c...........s.thhohLchssPssssst.pss+st.ss..................sshpLhpsGs................IPNGshsphPsSs.-cA.sts....s.Ksp.........p.p.sGsRERVRFs-KV.YHuhC..CDsc..s.tpcEht....ct.-..st....sp...p.........................sh.h.ssshshs...pp.p.shssh....Ptps..sTtspsppTl.cKSTpTsl................. 0 2 7 22 +15106 PF15253 STIL_N SCL-interrupting locus protein N-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q15468 Family \N 27.00 27.00 28.10 27.40 21.90 21.10 hmmbuild -o /dev/null HMM SEED 410 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.48 0.70 -5.76 17 97 2012-08-06 14:56:12 2012-08-06 15:56:12 1 3 58 0 54 91 0 313.50 46 33.79 NEW LWsssPhG-hhh.+lthh.RNs+.Lh..lsEKslRLApRHs+Q......sptpshsCFLlGolhVDpDEEGlolslDRFDPGRE........stt.t+hPss.LPGDhllPChl.tpshusspsl.sHsspDhstsFpsLQppls.o+psL-hspLl..slRuplpsp-s.Dtl.hshpWuuVThusshcssPV+slPIIPTALARNL.so.hs.lsplpGshKpGaLTMDpTRKLLL.lLESDPKshoLPLVGlWLsGlsHlpoP......VWusCL+ahaSuulp-R...VhS-sspFlllLaslsa..ppPpFYcC.hspsspt..LpaQLl...ospcslpLa.ppVcsspcp.lphE..LSupspsspts......lFpps.tsh..oh........psssppssss.phssoc...pDSulEDcDlSPR.......P.PoPHPssQpssplhPpVPELSllhDssFhs .......................................LWsshPhG-hhhLHlsha..RsP+...Lh..lsEKslRLAhRHA+Q.......s.cps.sCFLLGolhlDpDEEu.....lolslDRFDPGRE........sts.thsPss.L.PGDhllPChlphpt.sspphh.scssp-hstshpsLpppls.u+p.l-hsphh..th+sphh.pp...t.h.hphphtuVs.usshchsPl+slPlls.TALARsL.so.h........s.luplpGshKhGaLTMDpTRKLLL.LLESDPKshoLPLVGlWLuG.lh..HlhoPp.............VWAsCLRYhasuulp-R....Vho-sGsFlllLashT+..pp......PpFYcC.hsppsp...hpaQLl...osppslpLa.ppVcssppp.lphE..Lospspshpsp......hFpph.psh..sh........ppsspp.sss.phshpc...psSslEDEDhSPR.......P.PsPHsssQphptl.sphscLulhhs.ph..t.................................................................................................. 0 17 20 34 +15107 PF15254 CCDC14 Coiled-coil domain-containing protein 14 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q49A88 Family This protein family, Coiled-coil domain-containing protein 14 (CCDC14) is a domain of unknown function. This family of proteins is found in eukaryotes. Proteins in this family are typically between 301 and 912 amino acids in length. 27.00 27.00 28.40 28.30 21.10 25.90 hmmbuild -o /dev/null HMM SEED 861 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.69 0.70 -13.43 0.70 -6.17 11 77 2012-08-06 15:03:12 2012-08-06 16:03:12 1 3 40 0 44 93 0 449.60 39 86.45 NEW DSESQs-sVp.GLDGCASLL+DIL+NEDo..GsEhsaScs......Rssu+PLEuKtst.KKKG.cK+hsPshVpKEILSSssKK.IsN-uSsusc+DsSsltQpWSLQDH...Yph..YSPlIYQALCEHVQTQMSLMNshuSKsssNGIPslPCH..ssSsu-oQu.ostSsYGLsTSssVhSPQpPsCP.hVHSEV....QTsu-sQhuSQspssSVsss.slspsshsspPulsC.uLPtsspsAlPshptLshsstlhP.Q...pphsKEsDLLKChQTahuLhpuH......s..hpsDsQsppSsophQsu.hlAosEEcsAcEpIt-ssSEtc-LN.hpltDuchsKslQ........KucNlscTAcKV+hlKYLLGELKALVs-QEDSElpRLlTElEAClSlLPAVuGsTNlQVEIALAhQPLRSENAQL.R.RQLRILNQQLREQEKTcKsoGsh-CNLELhSLQSLNhSLQsQLpESLKSQELLQSKNEELLKVIENQK-ENK+asslFKEK-QTLLENKQQFDIEhT+lKIELEEALsNhKohQFKLEoAEKENQILGITLRQRDAEVsRLRELTRTLQsSMAKLLSDLShDoARsKstssLTKSLLNIY-KQLQcDPsPupTS..IMSYLsKLEss+oh.sHuEsl.shcscEshtPs+.YEssLsScsPp...pusststEEhSAstllsshSKp-SDp-SEohTLlE-csNL.DpTlYIPFARSTSKK+SsLScR.......lSPQPQhsVAssQLssssGl.sSc+Es+hssPsVCSuppp-u...E-uspcLuRsu-hEDcQLLcKIKEsIsKIPsuh.......................-c.c-puspHuPuApppsslplK..GssVsDuSFLNSDLM...SDWSlSSFSTFTSRDEQDFRNGLAALDANIARLQKSL+oGLLcK ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..-.s.Lh..+................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 11 13 23 +15108 PF15255 CAP-ZIP_m WASH complex subunit CAP-Z interacting, central region Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5SRD0 Family This domain is found on WASH complex subunits FAM21 and CAP-ZIP proteins, as well as on VPEF (vaccinia virus penetration factor). This family of proteins is found in eukaryotes. Proteins in this family are typically between 305 and 1321 amino acids in length. The exact function of this region is not known. 27.00 27.00 27.60 27.40 26.40 26.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.75 0.71 -4.25 19 184 2012-08-06 15:04:32 2012-08-06 16:04:32 1 4 61 0 91 153 0 131.50 38 14.78 NEW PhKsKEPS.oRIGKlQANLAINPAALLPGAsPpluGsKslhPthuhssucPstspuscssss.stsuuEtGVSFDtPAQADTLaSANKoRlKhpGKRRPQoRAAR+LAAQ-SsEs..--sssscss..s.htpsssssssppP ................................................sS.shItK.lQ.A.NLAlsPAALLPu..A...u..Pp.sslKssls.hs....P....sSpss...tu.ts.........hsh....s.........tutEsuVSF.D.Ps.p.u...s.TLp.S.s.N.K...sRs+hpuKRRP.oRtuRR.Au.pcSsts.....pshs.scts..................................................................... 0 16 23 45 +15109 PF15256 SPATIAL SPATIAL Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q53FE4 Family SPATIAL (stromal protein associated with thymii and lymph node) proteins may be involved in spermatid differentiation [1]. 27.00 27.00 48.40 28.50 24.20 26.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.56 0.71 -4.24 23 106 2012-08-06 15:09:08 2012-08-06 16:09:08 1 2 40 0 49 103 0 171.60 38 56.60 NEW FFSRHsPHP+RVsHIpGLNGhPICsVsD-..............shh..p..hP.sth....ssshhthP...hs.lus........PRsspcP.h.........psW+cEL+-huSclslshKcpEhKsKc...................p.s.YSspTGRlhPsuopusoccssp.t.+schptpsh..phssa.DQElhILEhLCQILpTDSLutlQpWLLpAusK..EK-LV.uLlpoAlA ..................................................FhsRHpPHPp+VsHIpsLssh..PlCsVpDc..............s.h......lP.uph....Sts.ht.Pshsls.lus...........PpssppP..........................pthp+EhhchuScsshhhK.cpEhKsKc....................................................sspss+hhss...ssps.uchpop.......pspsptps...ph..h.DQElhlLthLCpILpTDSLstl..WLL.AssK..EK-hs.uLlpstlu............................... 0 11 13 19 +15110 PF15257 DUF4590 Domain of unknown function (DUF4590) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q5RHP9 Family This family of proteins remains to be characterised and is a domain of unknown function. This domain family is found in eukaryotes, and is approximately 120 amino acids in length. There are two conserved sequence motifs: CCE and PCY. In humans, the gene encoding this protein lies in the position, chromosome 1 open reading frame 173. 27.00 27.00 35.50 34.40 21.60 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.57 0.71 -4.28 11 71 2012-08-06 15:24:02 2012-08-06 16:24:02 1 3 46 0 40 56 0 114.30 64 13.59 NEW VHLuac....DhRDEIKVYQQHCGGENLCVa+G+.LLEsETFpFlS+RHhGFPFSLTFaLNGlQV-RLSSCCEYKH...++GSRLGG..+pGaFGFlsVEGASPCY+C.Il.........uhG.LDKKPpPPh++ ...................................VHLupDp...DaRDEIKVYQQHCGGENLCVYKG.....K.LLEp...ETFQ..FISKRH+GFPFSLTFFLNGhQVsRLSSCCEYKH...RKGSRLGG..K+GYFGFVsVEtuSPCY+C.II.........AMG.LDKKsosPp..c............. 0 14 16 22 +15111 PF15258 FAM222A Protein family of FAM222A Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q5U5X8 Family This protein family, FAM222A are a domain of unknown function. This family of proteins is found in eukaryotes and are typically between 411 and 562 amino acids in length. In humans, the gene encoding this protein domain lies in the position, chromosome 12 open reading frame 34. 27.00 27.00 93.40 51.40 20.70 24.70 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -13.00 0.70 -5.28 19 161 2012-08-06 15:29:42 2012-08-06 16:29:42 1 3 37 0 87 116 0 305.50 34 95.79 NEW KWDTTQ+MRS....ApYPTPAELDAYAKKVANNPLTIKIFPNSVKVPQRKHlRRTVNGLDT..SGQRYSPYP.sQuuu+sGLLAIVK.............sP.sKGllK-h-GsR..sRLh..scuhMNPssuPY..susSTLs......H....Pt......tl.h..Qt..............HsQ.............................oLtp...................................................h.H......sQuh.p...............................................................p..shtp.ps.st........t................h..GuRKhPDuD..AP.PNVT..VSTSTIPLSMAAsLpQ...sc.sDLuSIVHQINQaCQARA.GhusTShC.EGQIANPSPISRNLLIsAsoRVSsHs......sss...suC.hlss.-p...u.AslPsust.s.s.hshsthssuY.s..p.....................sWsQH....QLsahQphspsut.........sppsthctstspsFss+s..sYP.chs.huQsaslKs...sh-+ssPSsPV..Ns..ushs......YsNGpYap..PhWssI....LsTPsSDuuGu.QDLshsFpGutsuuss.................................................ssGs+YRhusuu...su.....QsshMQohDYLu.GDFQ..PCF+-QshuhhtKhp.....R.sshs+ss.-sscupshHIQHPGYR .........................................................s.aPosApLDAaAp+sAppPLoIpIFPssl+VPQ+pplpRTVNGhDT..os.RaSPYs..psss.tGLLAll+..................ss..sKullKs.cGtR..s+h.....s..p...ssY..s..sshs.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sPh..ss............hssGpYht..s..stl.....sssssss.ss...h..hs................................................t...t...............sp.hpsh-hh..t-hp..s.hp-p.h............c......h........t...t...p............................................................................... 0 3 13 41 +15112 PF15259 GTSE1_N G-2 and S-phase expressed 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NYZ3 Family This family is the N-terminus of GTSE1 proteins. GTSE-1 (G2 and S phase-expressed-1) protein is specifically expressed during S and G2 phases of the cell cycle. It is mainly localised to the microtubules and when overexpressed delays the G2 to M transition. the full protein negatively regulates p53 transactivation function, protein levels, and p53-dependent apoptosis. This domain family is found in eukaryotes, and is approximately 140 amino acids in length. There is a conserved FDFD sequence motif. 27.00 27.00 30.30 29.20 21.20 20.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.99 0.71 -4.23 25 105 2012-08-06 15:40:42 2012-08-06 16:40:42 1 1 43 0 46 87 0 128.60 40 24.70 NEW DlhLLsDEKFDFDLSLS............su...........Susc.....DDEVFhGPVGHKERClAssl.....st..stts..sstssphoWSPLsGEKFsElaKEAHhLALQlEssu+sptspssp.....ptstspssEpFlp-octKlslhppppchcpSPhslKRETasl........p-S .....DlhhlsDEphDFs.lsLSso...................supE....pDspVhhuPht.hcRslutth.....sp...sptss.sspss+hohuPLosEKh.ElhcEAphLAhQlEpsuhpcpppusp...............h.pt.t.plsh..t.pchp.SP...+RETahlp-.......................... 0 5 7 17 +15113 PF15260 FAM219A Protein family FAM219A Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q5XKK7 Family This protein family, FAM219A is a domain of unknown function. This protein family has been found in eukaryotes. Proteins in this family are typically between 144 and 191 amino acids in length. There are two conserved sequence motifs: QLL and LDE. 27.00 27.00 30.30 29.00 25.80 23.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.09 0.71 -3.78 11 124 2012-08-06 15:42:48 2012-08-06 16:42:48 1 2 49 0 67 112 0 120.40 62 71.42 NEW QK+R-hAR+uL+K+Gh..GsslspQP+pss+....R.oV+aNKGYsuLSQ.os-Es.LVoLDSD.SDsEl-.ppa.SSGYSSA....QVspDLo...+QLLpDGY+LDEIPDDEDLDLIPPKsluossCsC.....s-ssSCslQ .................................................................................pKpR-LARpu.h+pGsh.GuslspQP+pssh.....R....Vh..NKGYouLsQ.SPDEp.LVuLDoD..SD--h-.S......RY.SSGYSSA.........EQlNQDLs...hQLLpDGY+LDEIP......DDEDLDLI.PPKshsso...shsCp...hssSouCplQ........... 0 11 15 32 +15114 PF15261 DUF4591 Domain of unknown function (DUF4591) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6NUN7 Family This protein family is a domain of unknown function. It is found in eukaryotes, and is approximately 120 amino acids in length. In humans, the gene encoding this protein lies in the position chromosome 11 open reading frame 63. 27.00 27.00 29.30 27.40 25.80 26.80 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.24 0.71 -3.90 15 75 2012-08-06 15:52:54 2012-08-06 16:52:54 1 2 55 0 47 70 0 117.60 37 20.44 NEW KLGGLGPDh.-uh+s..KhpKLppQKEYA+plKEaNhKsluh..h.ppspss+s-scssls+....+K.ALEYAKoIPKPK...stp...................sQtsKc...tpp..s..stppsslPcIohLEhLpsRHE+EKpsVAAFKsLHIl ................+LGGLGPDh.ps.c...KhpKlhpQKEYAcpl+EhNh+slsh..p...pst+spspsslsc....p+.ALEYAKsIPKP.K...ssp.s...........................................cpssKc....ppp..p...stc-tshsclo.hLEhLpsRHE+EKpsVAuh+hhhh..................................... 0 19 24 29 +15115 PF15262 DUF4592 Domain of unknown function (DUF4592) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6NV74 Family This protein family is a domain of unknown function, which lies to the N-terminus of the protein. This domain family is found in eukaryotes, and is typically between 114 and 130 amino acids in length. There are two completely conserved residues (L and A) that may be functionally important. In humans, the gene that encodes this protein lies in the position, chromosome 2 open reading frame 55. 27.00 27.00 28.10 28.40 26.80 26.60 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.64 0.71 -3.60 27 118 2012-08-06 16:00:45 2012-08-06 17:00:45 1 2 40 0 57 103 0 116.20 39 13.35 NEW tpsI+hGps....Ph.hptpc.psussssE-DslPpSP.Ehss.p-sh.ssspppsssh.t.pp....stsupp.p..pssss+sspshps................suuo..slshsusspshupLDNSAA+HKLulKP+pQRsup+ ........................................pNlKhG.s....Ps.s.lshK+sp....susso.E-DhlspSP.Ehsh.p-lh.usspppsss.s.sp.p....st.tsc.p..tssss+sSps.t.hu..............suush.slshsssPpu.upLDNSAAKHKLuVKP+pQRsS+............... 0 5 13 25 +15116 PF15263 DUF4593 Domain of unknown function (DUF4593) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6ZR54 Family This protein family is a putative uncharacterised protein family. Its existence is uncertain and its precise function is unknown. This family of proteins is thought to be found in eukaryotes. Proteins in this family are estimated to be around 155 amino acids in length. 27.00 27.00 279.40 279.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -10.97 0.71 -4.33 2 2 2012-08-06 16:06:49 2012-08-06 17:06:49 1 1 2 0 2 1 0 155.50 50 89.11 NEW GpLLushGPSTlPhh.suGuCpPpPLuPGGppPPPPPRAHhuP.EAs.utsPus..LPPTRGl.hKsh.opsuPshpLGGPGLPG+uGPCGPRupPsQs.uGptsssGtGspoPhhTLPCSlstSptlh+GRSHLttsLuslGEARGshAhhsWGQ GpLLushGPSTlPhh.suGuCpPpPLuPGGppPPPPPRAHhuP.EAs.utsPus..LPPTRGl.hKsh.opsuPshpLGGPGLPG+uGPCGPRupPsQs.uGptsssGtGspoPhhTLPCSlstSptlh+GRSHLttsLuslGEARGshAhhsWGQ 0 1 1 1 +15117 PF15264 TSSC4 Tumour suppressing sub-chromosomal transferable candidate 4 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9Y5U2 Family This family of proteins is expressed from a gene cluster where in humans the TSSC4 gene is not imprinted [1,2]. This same cluster is associated with the Beckwith-Wiedermann syndrome [3]. This domain family is found in eukaryotes, and is typically between 120 and 147 amino acids in length. There is a conserved YSL sequence motif. 27.00 27.00 28.20 27.30 26.60 26.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -11.01 0.71 -3.64 50 121 2012-08-06 16:07:49 2012-08-06 17:07:49 1 2 89 0 70 104 0 142.80 25 47.58 NEW pculFssL-sspc.....................hsssshsp...sssps......................................................puhFK+P.sPssp..............................................hphsplPDYh.tcP-+WTKYSL-D..Vs.p..hS....-poNpAAAluFLtphctp+t............s..sshsp-sssssp...............s+ ..........................................................................................pslFspLpsstp.........................sh.p......sp..p.................................................puhFc+P.sPssp.s..........................................sps.slPDYl.tpPc+WT+YSL-D...Vsc..sS..........-poNptAAhuFLpphppppt.................tph.sp.p.........tth.................................................................... 0 22 36 54 +15118 PF15265 FAM196 FAM186A; FAM196 family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6ZSG2 Family This protein family is a domain of unknown function. This family of proteins is found in eukaryotes and are typically between 441 and 534 amino acids in length. 27.00 27.00 62.30 52.30 23.50 23.50 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -13.01 0.70 -5.26 27 99 2012-09-25 05:53:12 2012-08-06 17:11:43 1 2 35 0 61 62 0 405.70 40 97.84 NEW Msp+............ps-scPsh..........h+.uLDssctlK....+RNKu.QVRFKD.spuQNppt.spls.p....st+-upshssK.tA.R+ahsss.sppSlPpspK.....ShulQTSPsL+K+a.oF..cRKK...upsl+phssssshptQsNGhLs-tc.ht...p.ht.st....-uscpst.spshtsps.hhpSstshsht............h.sutpsosuspsP-...pSss.p...sh.ssspssttpsph.......hhstsptcttth.sp-...tsssshsps.t...........ppssplhhPpssspsspssh.s...spscpstsPusspp+pps.s.Ls..shpp.shsttsu.Css.s....pshoscs.ust..shps..............ps...tp.....t..ss........tostthpptsQphls+sE.lsDLpupLQshEphlpSsQEpIKVLLsVIQ-LEKucAhpEGho.YRTGQDhsNCsTCpNoACIIYSVEhDF+QQ..EsKhpsV..L+pLc.shEpsphss.PhppEshsssPcpKoKscp..KKctRhphWaL .......................................................................................................spp.............p.cscPsh.........h+.uL-ssc.lK....+RsKu.QVRFK-.spupN.pt.tplsst.....ttc-stsh.h+.tA.R+ahsss.hphShPpspK.....shulQTSPsL+Kpa.sF..cRKK.......up.lhphsssps.t.QsNG.L.-.c.h.......t..t....-u..pst.stshhsph.hhpost.hshh.......ss.pssts.hpsP-....u.s.t....h.p.sps..st.psph.......h.shs.tc..ttt..spp....sssshtth.t...........................pthsplhhP..t.ssps.t.s..s...stsc..hsPus.pp+pps...hsuhp..t.s.st..t.Css...s....phho.psspt..s..s...............p.............................ss.............sshth.st...sp.hl...spsE.lsDLpupLQ.hEp.lpSsQEpIKVLLsVIQ-LEKucAhpEGhs.YRTGQDhsNCsTCpNoACIIYS.VEhDF+QQ..Es+hp.l..LppLc..hE...sp..s.P.p...t.s..sssP..cp..pshpcp..KKht+hphWal.......................................... 0 4 12 25 +15119 PF15266 DUF4594 Domain of unknown function (DUF4594) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6ZUT6 Family This protein family is a domain of unknown function. The protein family is found in eukaryotes, and is typically between 170 and 183 amino acids in length.In humans, the gene encoding this protein lies in the position, chromosome 15 open reading frame 52. 27.00 27.00 35.30 35.30 19.50 19.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.63 0.71 -4.14 14 111 2012-08-06 16:19:33 2012-08-06 17:19:33 1 2 45 0 55 87 0 163.10 40 33.26 NEW Ps.pscssRpEGScctsRNWuGssacsV+phs-pp+.c..utRss........stsshDhshuhotpEptEYlRWKpEREQIDpERLARHRsupGpWRRtWDh-Ks-sMh+-ssp.tsptsshstpptt.....cc.p+PPpsPshts.h.pstp.upsp.pstspS+upGpph.......ot+ccRWEtcp-.tpc.p ...........................Ph..scpptpEGSptpsp.shusss.ptlp...-pp+....GtRts..............tssschsh.uh..s.sccptEYhpWKQEREpIDp-RLARHRcupGpWRRtWDh-K...s...csh.hpDs.st.t...tcss....pchss.............ptht.PPhsPshtt...hhspttttspsp.t..ttpS+spu+tt.......os+scRW-h+Et.t...tt................................................................ 0 5 8 21 +15121 PF15268 Dapper Dapper Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5SW24 Family This is a family of signalling proteins [1-2]. They act in a diverse range of signaling pathways and have a range of binding partners. They act as homo- and heterodimers [3]. 27.00 27.00 27.10 27.10 20.30 24.60 hmmbuild -o /dev/null HMM SEED 748 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.49 0.70 -5.74 24 169 2012-08-09 10:45:45 2012-08-09 11:45:45 1 4 40 0 81 188 0 446.60 32 88.35 NEW ERLcAsLAGLpELphLRpRQphhVpusLt.....s..ss.hs.....................hpEphLEtslhsL+cQL.....spLRRpDsGLhspLppLDpQIS-L+LD....VcKsopEtL-oDSRPSSGFYELSDGuSsSLSNSssSVaSEslSS.........hhsusph....csphshuDsRP+S.AD................s............................hGpL-p.h.upst.ppsut.c.p.ss.hspuhcl.sc.lcPKYQs.DLVSKsGp-VY.YPSPLHAVAlQS..PhFhLsh..........Ep.tsssp..ssttst.sss..sssh.p....hsssh.sss..hcuYI.pLLQ+ppptspss+spsu.pus...thhtt.......sss..tpupsss.csptphss.ssGtsshs.....ushpphstcppu.p.pstt...........ssssp.pssshscptspssspphttps.sss...........................psp..ssssptp..pssh................ppspshtP.ppls.ps.........................................s..t.G..thVpupalsu.psp.s+l+p.GsppsKssK.+ppsopKstt.utpt..ts.pp.Rp...............+spthsphsRhP....o.tppspt..pSspppp.hssshh.shlsuR..u....uptpuh..hcutshu.ts..uts++Kp....RRW........................pSosEIShcpt.pp..............s.RRstt.ph..uhhts.hsh..sps.h..u.S-...SEYSAECtSLFHSTls-TSED..EpSsaTTNsFGDSESShS-s-hsspSosoS.o............-uusLlWsphs.t...shp.sssuu.....c.t.sPs.phs+IKAS+sLKKKIhRFpsuSLKlMThV ........................................................................................................................................ptL.....GL...l.pLs.Ql.scLpL-.........tt....h-p-St.SSGFY-.ssususs.t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................aG-tESS.up.p.s..........................t....hs................................hhs+IKAS+sLKKKIhRFpssuLKlMThV........................................................................................................................................................................ 0 5 15 30 +15122 PF15269 zf-C2H2_7 Zinc-finger Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q32MQ0 Domain this is a family of eukaryotic zinc-fingers. 27.00 27.00 27.00 117.80 26.90 20.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.90 0.72 -4.10 2 69 2012-10-03 11:22:52 2012-08-09 12:40:46 1 2 37 0 49 53 0 54.00 86 8.43 NEW ptRKPKKPHYIPRP.GKPapYpCFQCPFTC..KSHLaNHMKYsLCKNSlSLl.p ..KERKPKKPHYIPRP.GKPapYKCFQCPFTC.EKSHLaNHMKYuLCKNSloLl.p. 0 2 6 19 +15123 PF15270 ACI44 Metallo-carboxypeptidase inhibitor Coggill P pcc Pfam-B_261362 (release 26.0) Family ACI44, a metallo-carboxypeptidase inhibitor, is one member of a battery of selective inhibitors protecting roundworms of the genus Ascaris, common parasites of the human gastrointestinal tract, from host enzymes and the immune system [1]. 27.00 27.00 28.10 154.10 26.50 18.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.37 0.72 -3.99 3 3 2012-08-09 11:46:34 2012-08-09 12:46:34 1 1 1 1 0 4 0 64.70 93 86.22 NEW DQVRKCLSDTDCTNGEKCVQKNKICSTIVEIQRCEKEHFTIPCKSNNDCQVWAHEKICNKGCCWD DQVRKCLSDTDCTNGEKCVQKNKICSTIVEIQRCEKEHFTIPCKSNNDCQVWAHEKICNKGCCWD 0 0 0 0 +15124 PF15271 BBP1_N Spindle pole body component BBP1, Mps2-binding protein Wood V, Coggill P pcc Pfam-B_31027 (release 26.0) Family This N-terminal domain of BBP1, a spindle pole body component, interacts directly, though transiently, with the polo-box domain of Cdc5p. full length BBP1 localises at the cytoplasmic side of the central plaque periphery of the spindle pole body (SPB) and plays an important role in inserting a duplication plaque into the nuclear envelope and assembling a functional inner plaque [1]. Although not a membrane protein itself, BBP1 binds to Mps2 as well as to Spc29 and the half-bridge protein Kar1, thus providing a model for how the SPB core is tethered within the nuclear envelope and to the half-bridge [2]. 20.00 20.00 27.20 66.80 19.40 19.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.02 0.71 -3.83 14 27 2012-08-09 11:50:23 2012-08-09 12:50:23 1 2 27 0 16 22 0 154.40 47 40.10 NEW sus.sGla+WThDALFGs+lSPSpKY.+-.........auQDDTNYphpt............pstps+oR.SsSWss............DssFhc+YDLLssppppshp................................................ptLhsPlclhsp.............................s..pcsTDTFup+............t.pphsstph.hcsPptD....DsllS+LFt+t .....u.GGhhGLFKWThDALFGoclSPShKY.K-.........aAQDDTNash+hspsp.......s+pssshSRSNSWSGl...........DSoha+KY-LLP-hsEsshs........................................s.cspcclcSLhSPsslsPR............................pPhps-PTDTFupR.........pRpshschussplsFhsPpcD....DPLlSKLFsK........... 1 2 7 13 +15125 PF15272 BBP1_C Spindle pole body component BBP1, C-terminal Wood V, Coggill P pcc Pfam-B_58229 (release 26.0) Domain This C-terminal domain of BBP1, a spindle pole body component, carries coiled-coils that are necessary for the localisation of BBP1 to the spindle pole body (SPB) [1]. Although not a membrane protein itself, BBP1 binds to Mps2 as well as to Spc29 and the half-bridge protein Kar1, thus providing a model for how the SPB core is tethered within the nuclear envelope and to the half-bridge [2] 22.90 22.90 22.90 22.90 22.40 21.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.38 0.71 -4.75 12 35 2012-08-09 11:51:47 2012-08-09 12:51:47 1 3 34 0 23 31 1 176.50 32 43.31 NEW c-aos-YlcLLDpLspNs+pLcpLpp-lcp+ppchpppEpoY+pKYhphRtELIpELKQSK+laDNYacLapKYppLKc........hspcs.chppplssL-splVppslpKs+chpphpcclhplcl+tpchp.t+ch-thtYEoRIc-LEppL...pNp.ptpshhup.s.os........s.............p.hpc.N.solDopFlcpls ..........t.capptYhcLhsphshNs+sLccLsc..-lcppccphccpE.....poY+pcYpphRsELlsELK+SKpLa-NYYpLhpKY+sLK+.............................shcpshshpsclu..s.cccLhpcts.KshcIpsLp.pcL...shcl+hppLp.t+p...hpc...sYEScIcDL.hpLp..pss.ptssshsophhSs........s.................c.s.tshssph.cp.............................................. 0 4 12 20 +15126 PF15273 NHS NHS-like Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5SYE7 Family This family of proteins includes Nance-Horan syndrome protein (NHS) [1]. 27.00 27.00 27.20 27.00 26.00 26.90 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.51 0.70 -5.89 26 237 2012-08-09 11:54:02 2012-08-09 12:54:02 1 3 40 0 110 185 0 389.90 30 41.26 NEW stp+SpTRDSuCQTE-lhI...ssPShRRIRAQ+GQ.GIAAphSp.o......GNhSl..LoDsusshF.ssphstssp.hpp.LPRpGuRss.ps.pthts......p........phuphps.-shh..tss.R.....tscspEspspput.......suhhhS.Hus...........Pstolsppucshh.+p.tshGps-hpssSsohsSs.hp.psshs..ttth..K-sHpSSSGsW..sposssppo.pohsssuusshssSShsDSplSLNsss.st-ss..........sps.thpsppssSFsSpstD..scspsutossst.s..tspcph..aps.ssttsps..sphssP....uhoossop.Stss.cpsSsKsDosShYSVDs-GYYT.SMHhDsGL+uspph..s...s.s.thttstpsh.shh-httppp.t...htpc+php+sISL+KsKt.PhPPpRosSLR+hsp............Kpp.p.uph.pEshluohppoLQLsL.tpss.....SSsspSssssh-s.hlhpscSposlussSS.hS...........phhShsssossapDsSulpS-YAD.Whh..Dhpusstc.ts.ossuoAoussshp..pustus.sps............SRuosPsl........PSspsc.KluSPEKhttLsSPSSGYSSQSpTPTuuhPl...shF.pshs.usGtGKhKPKVPERKSSLhSssphSSSSTSLSSsoSc- ...........................................................................................s....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..........p.p.ht.shSh+KsKh.P.PP.RosSLhc......................................................................................................................asc.Whh..Dhps.p.s.ts...sssssssus.ssht..ps..pp.p.s............sRss.Pp.........uh.psc..+.hsSPt.+.ttlhSPSSGYSSQSpTPTs.hsh...sh.h..t..s....ssu....ts+.+PhVPERKSSL............................................................ 0 4 14 43 +15127 PF15274 MLIP Muscular LMNA-interacting protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VWP3 Family MLIP is a Muscle-enriched A-type Lamin-interacting Protein, an innovation of amniotes, and is expressed ubiquitously and most abundantly in heart, skeletal, and smooth muscle. MLIP interacts directly and co-localises with lamin A and C in the nuclear envelope. MLIP also co-localises with promyelocytic leukemia (PML) bodies within the nucleus. PML, like MLIP, is only found in amniotes, suggesting that a functional link between the nuclear envelope and PML bodies may exist through MLIP [1]. 27.00 27.00 81.60 48.00 21.30 25.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.66 0.70 -4.89 2 110 2012-08-09 12:06:34 2012-08-09 13:06:34 1 3 32 0 43 83 0 137.00 44 42.14 NEW ptMQQSDLFKAEYVhIVDSEGE-EAsuRK.-ptPssG.GpuhsRPKSLAlusu.losl.+P+..tsDhpsssps-h.pshAs.QKphQQYKhKSSYKAFAAIPTNTLLLEQKALDEPuKoEploKDsTL-s.lEhh.PAQLRQQTEELCAsIDKVLQ-SLSMHSSDSPSpS.pThLGSDssKhPsTlPRAAGRETKYANLoSPSSThu.SQLTKPGVIRPVPsKS+IlL+KE.EEsYEPNPFSKYLEDsSsLFutQD .........................................................................................................s.h.t...P........p.t..s.st.h..uhus.ppK+sp....................................................................................................................................................................................................................................... 0 2 4 10 +15128 PF15275 PEHE PEHE domain Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q68DK7 Family This domain was first identified in drosophila MSL1 (male-specific lethal 1) [1]. In drosophila it binds to the histone acetyltransferase males-absent on the first protein (MOF) and to protein male-specific lethal-3 (MSL3) [2-3]. 27.00 27.00 27.00 27.00 26.60 26.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.04 0.71 -3.67 27 274 2012-08-09 12:08:56 2012-08-09 13:08:56 1 2 88 5 145 277 2 121.70 32 15.40 NEW lhsP..sWRsl..slpshsssp...t.t.........hEsLSDpsFspRHpKYE..EpERpRWshhp.pc.pRppp...Rphpcs.............s.sp.ss.shsp..............sps.psschsscsss.shtcsh...lptsp-...................hslPW ........................lhsP..oWRts....slpslcsps.......st-................................hEsLSDssFstRHtKhE..Ep-R..p.....RWsh.pp..pc....p+hpp.....R.hcpp.................t..s.p...spsphoo...hs.................psc.s..ts..h..hs.....s..shtps....h..pp...............h...W........................................................... 0 36 50 92 +15129 PF15276 PP1_bind Protein phosphatase 1 binding Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q69YH5 Family This domain contains a protein phosphatase 1 (PP1) binding site [1]. 27.00 27.00 28.60 32.40 26.40 20.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.19 0.72 -3.95 12 102 2012-08-09 12:15:31 2012-08-09 13:15:31 1 17 45 0 50 96 0 62.80 49 4.09 NEW tK+KRVoFGtcLSPElFDcsLPsNTPL++GuTPs+ppssts.ssslh.cu......t..c.h.QP..sF-s .....KRKR.VoFGscLSPElFDEsLPsNTPL++GsTPs++pshsst..o...s...lLcc...........c..sQP........................... 0 10 13 21 +15130 PF15277 Sec3-PIP2_bind Exocyst complex component SEC3 N-terminal PIP2 binding PH Wood V, Coggill P pcc Jacckhmmer:Q10324 Domain This is the N-terminal domain of fungal and eukaryotic Sec3 proteins. Sec3 is a component of the exocyst complex that is involved in the docking of exocytic vesicles with fusion sites on the plasma membrane.This N-terminal domain contains a cryptic pleckstrin homology (PH) fold, and all six positively charged lysine and arginine residues in the PH domain predicted to bind the PIP2 head group are conserved. The exocyst complex is essential for many exocytic events, by tethering vesicles at the plasma membrane for fusion. In fission yeast, polarised exocytosis for growth relies on the combined action of the exocyst at cell poles and myosin-driven transport along actin cables [1]. 25.30 24.10 25.30 24.10 21.80 21.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.09 0.72 -4.12 101 335 2012-08-09 12:23:49 2012-08-09 13:23:49 1 8 228 7 232 344 0 91.70 31 10.66 NEW pp+Ks.Rhlhlu..lpps.s...lplp...KsKpss........sGs...aplu+oWsLc-Lptl-shsst..................sFslsh.sK.......sYhWpussspE+p.hFlpsLh+lhpca .......................tcKsphlsls..Vppp...s.........lplp...KsKpss.........sus...aphtcsWtLc-Lptl-uhss................psspFsLph.sK.......sYpWhAsospE+s.tFlpsLh+lhp+Y............. 0 54 101 171 +15131 PF15278 Sec3_C_2 Sec3 exocyst complex subunit Coggill P pcc Jackhmmer:Q10324 Domain This small Sec3 C-terminal domain family is based around the fission yeast protein, and is rather shorter than the budding yeast/vertebrate domain Sec3_C, family. Pfam:PF09763. In fact it is only this coiled-coil region that they carry in common. The full length fission yeast, UniProtKB:Q10324, protein Sec3 is redundant with Exo70 for viability and for the localisation of other exocyst subunits, suggesting that these components act as exocyst tethers at the plasma membrane. Sec3, Exo70 and Sec5 are transported by the myosin V Myo52 along actin cables. The exocyst holo-complex, including Sec3 and Exo70, is present on exocytic vesicles, which can reach cell poles by either myosin-driven transport or random walk [1]. 27.00 27.00 29.40 28.40 25.70 19.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.81 0.72 -3.85 2 2 2012-10-03 17:31:52 2012-08-09 13:28:37 1 1 2 0 2 3 0 86.00 29 14.06 NEW hp+L.pLcWsttsshccs.pph..ShstuhhpsVEpLhcpchphpplps.LpDshhGC-ol.STlNLaShpLSssLssVINhEQQ. ...hp+L.pLcWsttsshccs.pph..ShstuhhpsVEpLhcpchphpplps.LpDshhGC-ol.STlNLaShpLSssLssVINhEQQ.. 0 1 1 2 +15132 PF15279 SOBP Sine oculis-binding protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9Y5P3 Family SOBP is associated with syndromic and nonsyndromic intellectual disability. It carries a zinc-finger of the zf-C2H2 type at the N-terminus, and a highly characteristic C-terminal PhPhPhPhPhPh motif. The deduced 873-amino acid protein contains an N-terminal nuclear localisation signal (NLS), followed by 2 FCS-type zinc finger motifs, a proline-rich region (PR1), a putative RNA-binding motif region, and a C-terminal NLS embedded in a second proline-rich motif. SOBP is expressed in various human tissues, including developing mouse brain at embryonic day 14. In postnatal and adult mouse brain SOBP is expressed in all neurons, with intense staining in the limbic system. Highest expression is in layer V cortical neurons, hippocampus, pyriform cortex, dorsomedial nucleus of thalamus, amygdala, and hypothalamus. Postnatal expression of SOBP in the limbic system corresponds to a time of active synaptogenesis [2]. the family is also referred to as Jackson circler, JXC1. In seven affected siblings from a consanguineous Israeli Arab family with mental retardation, anterior maxillary protrusion, and strabismus mutations were found in this protein [1,2]. 27.00 27.00 28.30 27.80 26.90 26.90 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.80 0.70 -4.04 17 146 2012-08-09 12:36:43 2012-08-09 13:36:43 1 5 65 \N 86 127 0 243.80 35 40.11 NEW sCDWC+HlRHsssYVDFQDGtpQLQFCSsKCLNQYKMpIFh+ETQAHLsh....sPHl+stupstt.........sLITP-LW......L+sC+SpSsus.sssh..ssussPu.............s.ptSP..............pshlosu.sopLhs.p...st...sss...........................................................................................hshsshss...............hppph.tp.psPhhs.ss..............hs..........psshhs.sPttt.hhpPa.p.ss.......s.PPh.h..ssPtshh..sh.s......Ps..............................lPPVTlLVPYPl..lIPLPlPIPIPlPl ............................................................................................................................................................................................................................................................................................................................................................................................................................................................ts.s.hlhs.sp.u.Ps.lP.hhh...E.....pphhQplp.sPhlh...Pspt.........ssssss.shsN.h.s..........ssss...t.....h..s.ss..p..htPa.tss.......sh..s..Psth.h...s.PP..Pst.....suhs.P.s.s........hP.s....P...........hs.h........................VPPsTlL.VPYPV..IVPLPVPlPIPIPl............................................................................................. 0 14 23 43 +15133 PF15280 BORA_N Protein aurora borealis N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6PGQ7 Family This family of proteins is required for the activation of the protein kinase Aurora-A [1]. 27.00 27.00 28.00 27.30 22.60 26.20 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.56 0.70 -4.98 25 110 2012-08-09 12:38:03 2012-08-09 13:38:03 1 4 81 0 72 106 0 195.70 37 39.28 NEW husptps.hphTPpssuh..............plhNPFE.ss.hppL+pshh.SPSlF...poss.........spposspFcWSIDQhAhLtPs-I.Dpc.-hpppuhhh......s.........-hE-+hQpAI-pFF.opssIVPSPW......................................s.thh.pp..hhphs..t.oshspph..p..stppssusQ..TsLoLPssh..-LEclLG.caapspcss..ct.............tt......hosSSLRRKLFhsssss.tsspsssssus .......................................................thphTPpo.sshh.................lhNPFE..ss.hspLHpthlsSPSlF...+o..............osspFcWSIDplAhlpPs-I..Dsc.-hpppshhh......s.t........-lEcKtQcAI-pFF.scssIVPSPW.........................................ts..hpphh..tshp.....sp...oPhspph...t..spppssssQ..ThLSLPssh..sLEplLu..sYFpsc-hs..-ps.......ts.........lSsSSLRRKLFhsssssh..........ss..................... 0 24 30 53 +15134 PF15281 Consortin_C Consortin C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6PJW8 Family Consortin is a trans-Golgi network cargo receptor involved in targeting connexins to the plasma membrane [1]. 27.00 27.00 33.00 31.90 23.20 21.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.54 0.71 -4.58 12 60 2012-08-09 12:38:45 2012-08-09 13:38:45 1 3 36 0 31 52 0 109.40 64 19.29 NEW ElsPupGLVSILKKRpss.Gpp......lsp.ppcpoKRRVRFpEs-DshDQ.DEsuGsSClLLlLLClsTVhlSlGGTALYCThGDhcSsVCpDFusNhDFYhsplhQslpcL+HWls..hS ...............................ElsPsEGLVSILKKRs-olGcp......sAQhQpKsSKRRVRFQEhDDsLDQ..DEVGGGSCILLlLLCIATVFLSlGGTALYCTFGDMESPVCTDFAcNhDFYYT+LLQGhAELKHWIh.lS....................... 0 2 5 12 +15135 PF15282 BMP2K_C BMP-2-inducible protein kinase C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6ZSR9 Family This family represents the C-terminus of BMP2K and related proteins [1-2]. 27.00 27.00 27.00 54.70 26.10 26.10 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.76 0.70 -4.17 17 79 2012-08-09 12:40:07 2012-08-09 13:40:07 1 4 35 0 45 64 0 232.80 47 35.62 NEW tD-hDVFoKAP..........FspKs.............pssstpPcp.sDVFhpuPFp.........................................Kp+ShpcLoshQtpo+p.ssp....u.pphusssss.hs.pPsapo.E+shppptsu.Rsp.SsspFlphos.spcslps..shssups+usshps..............cEshlsPhu.uKPF+PQsLu+aupH.u.pD..................s.shpup.huAa+sssphp.ps.hGuVsh.TsLsspo..........sphsph.DPFuuAPFPSKt ....................--hDVFoKAP..........FspKs.............pshPspPcp.sDlFhpsPFp....................................Kp+SlpcLoutQppo+p.sup.....u.pphGsssssphs.pPsapo.ERAhpppt.stRsp.SospFlphSs.spcslps..sLssupsRGssLps..............cEullsPhu.uKPF+P.sLuhaspH.u.pD..................tpshpsp.huua+sssthp.hsshGuVPh.TpLsspo..........op.spl.DPFGAAPFPSKp.. 0 3 5 14 +15136 PF15283 DUF4595 Domain of unknown function (DUF4595) with porin-like fold Godzik A adam JCSG target SP16885A/PDB 4ghb Domain Large family of predicted secreted proteins mostly from CFG group, but also from Burkholderia, Pseudomonas and Streptomyces. Function of these proteins is not known. A 3D structure of a representative of this family from Bacteroides uniformis was solved by JCSG and deposited to PDB as 4ghb. There is some overlap with RHS-repeat (PF05593) family despite lack of obvious repeats in the structure 23.10 23.10 23.10 23.60 23.00 23.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.42 0.71 -4.65 26 98 2012-10-09 19:20:05 2012-08-14 01:41:03 1 6 71 0 9 96 5 188.50 20 52.61 NEW apYsspG+lsuhsts.pphsst.p....phpss.sshsYssspl...lh......Tcctssss.....sah..LsppGalppCs.....p..phs....u.....phcsasFoYs.....scspLhplscst.........ss...ht.phslsYpsGslsplppphss.tp..............pshhths.hss..sphss.h.s...hshlhlt..-hhs.lsh...hhhAhYuthLGcssppLsh.phhsc.....ss..p..cspsY..oYshDpcGhsos ............................................hst.splhphh...pp..t........hppp.hsh.sassspl......sh.........ss.cttshh..............shh....LsspGasppss.........phss...........p.csapFoYs.....sps.Lsplpcsh.........ss......hp...phslsYps.Gslsplpsp.ps.p................................p.hhphtYss..sphts..hhp...hshlh....hh...chhs.lsh.....h..uhauth..lG....cssppL.h.p.hsp..................ss......pptpa..oaphcppuhss....................................................................................................... 0 4 8 9 +15137 PF15284 PAGK Phage-encoded virulence factor Coggill P pcc Pfam-B_45688 (release 26) Family PAGK represents a new of virulence factors that is translocated into the host cytoplasm via bacterial outer membrane vesicles (OMV). Members are small proteins composed of ¡­70 amino acids. In Salmonella they are secreted independently of the SPI-2 type-III secretion system, T3SS. The OMV functions as a vehicle for transferring virulence determinants to the cytoplasm of the infected host cell. OMVs are released from the cell envelopes of Gram-negative bacteria and comprise a variety of outer membrane and periplasmic constituents, including proteins, phospholipids, lipopolysaccharides, and DNA [1]. 22.80 22.80 23.30 22.80 19.80 19.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.19 0.72 -4.23 3 81 2012-08-20 15:00:40 2012-08-20 16:00:40 1 1 70 0 3 21 0 65.20 61 93.70 NEW MK+hNSVFLALVLlLSAoTFSALsMAA-SusHph.psIFPh...WCplWPAGIshPE..h.KhCp ........MKKLKEMAAISLFTLLAAGFSASVMADDQA.....P.....ERVPAAEVKPVGE....HVHWCT.LFDPDGMELPPLP......GMEW..... 0 0 0 2 +15138 PF15285 BH3 Beclin-1 BH3 domain, Bcl-2-interacting Coggill P pcc Manual Domain The BH3 domain is a short motif known to bind to Bcl-xLs. This interaction is important in apoptosis. 25.50 25.50 25.80 25.50 23.90 22.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.56 0.72 -6.70 0.72 -4.39 8 70 2012-08-31 13:46:58 2012-08-31 14:46:58 1 3 47 9 34 58 0 25.10 89 6.09 NEW suush-sLS+RLKVTocLFDIMSGQ ...DGGTMENLSRRLK.......VTGDLFDIMSGQ.. 0 5 7 15 +15139 PF15286 Bcl-2_3 Apoptosis regulator M11, B cell 2 leukaemia/lymphoma like Coggill P pcc Jackhmmer:P89884 Family Pfam:PF02180. Bcl-2_3 is a small family of eukaryotic proteins associated with autophagy. The family is found in association with Pfam:PF00452, 25.00 25.00 25.00 25.00 24.70 24.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.70 0.71 -3.68 3 4 2012-10-03 11:38:54 2012-08-31 17:02:54 1 1 4 5 0 9 0 125.20 59 73.14 NEW TYWATLITAFLKoVSKVEELDCVDSuVLsDVSKIITLTQEFRoHYDSVY+tDYGPALpNWKssLo+LFTSLFlDsINpGRIVGFFDVGRYVCEELLCP.GSWTE-HDLLN-pMTpFFIENNLMNaFo ...............................TYWATLITAFLKoVSKVEELDCVDSuVLsDVSKIITLTQEFRpHYDSVY+...tDYGPALpNWKpsLo+LFTSLFlDsINpGRIVGFFDVGRYVCEElLCP.GSWTE-H-LLN-pMTpFFIENNLMNaFs............................. 0 0 0 0 +15140 PF15287 KRBA1 KRBA1 family repeat Coggill P pcc Jackhmmer:A5PL33 Repeat KRBA1 is a short repeating motif found in mammalian proteins. It is characterised by a highly conserved sequence of residues, SSPLxxLxxCLK. The function of the repeat, which can be present in up to seven copies, is unknown as is the function of the full length proteins. 27.00 0.10 51.90 0.30 17.70 0.00 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.11 0.72 -4.60 21 189 2012-09-03 08:46:19 2012-09-03 09:46:19 1 6 22 0 74 223 0 44.00 32 25.27 NEW Gshup...u...sSs.u.SSPLQGL.sCLK-I.lsG..Pp..pPps.....sss..hsPtP ....................tp...u...sss.u.sSPLpGL.sCLK-Issst...Pp..tsps.........ss.......................... 0 6 6 9 +15141 PF15288 zf-CCHC_6 Zinc knuckle Coggill P pcc Jackhmmer:A6NNH2 Domain This Zinc knuckle is found in FAM90A mammalian proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.38 0.72 -4.28 25 289 2012-10-03 11:39:54 2012-09-03 09:47:41 1 25 154 0 191 274 0 37.10 43 3.39 NEW RVKCKsCGAFGHpu+SpRCPhKp..WpusLsPQslGs+c..tKE ..+lKCssCGAhGHh+TsKtCPlhp..tp..ss.ss.shu.pp..pE.................... 0 65 91 131 +15142 PF15289 RFXA_RFXANK_bdg Regulatory factor X-associated C-terminal binding domain Coggill P pcc Jackhmmer:O00287 Domain This C-terminal domain of Regulatory factor X-associated protein binds to RFXANK [1,2], the Ankyrin-repeat regulatory factor X proteins. RFXA is part of the RFX complex, Mutants of either RFXAP or RFXANK protein fail to bind to each other. RFX5 binds only to the RFXANK-RFXAP scaffold and not to either protein alone, and neither the scaffold nor RFX5 alone can bind DNA. The binding of the RFXANK-RFXAP scaffold to RFX5 leads to a conformational change in the latter that exposes the DNA-binding domain of RFX5. The DNA-binding domain of RFX5 anchors the RFX complex to MHC class II X and S promoter boxes [3]. 26.10 26.10 26.70 26.70 24.70 25.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.75 0.71 -3.93 7 62 2012-09-03 08:49:35 2012-09-03 09:49:35 1 1 48 1 31 50 0 118.50 71 58.86 NEW oCTYpGCsETooQsAKQRK.PWMCK+HRNKM.YKDKYK.KKKsDQAhussuh.................hp-....ss-sslSlsKQRsGuhG-RPARPTLLEQVLNpKRLSLLRSPpVlpFLQpQQphLopQshtQppQphpG .......................................................................oCTYEGCpETToQVA...KQRK.PWMCKKHRNKM..YKDKYK.KKKSDQAlssuGsus........................usss.s+hEESsDshl............SlsKQRTGShG.DRPARPTLLEQVLNQKRL.SLLRSPEVVQFLQKQQQLLNQQVLEQRQQQFs.G................ 0 5 8 14 +15143 PF15290 Syntaphilin Golgi-localised syntaxin-1-binding clamp Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O15079 Family Syntaphilin or Syntabulin is a family of eukaryotic proteins. Syntaphilin binds to syntaxin-1 thereby inhibiting SNARE complex formation by absorbing free syntaxin-1. So it is a syntaxin-1 clamp that controls SNARE assembly. 25.00 25.00 25.60 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.28 0.70 -4.93 19 130 2012-09-03 08:50:22 2012-09-03 09:50:22 1 3 41 0 54 139 0 285.30 55 52.53 NEW +sssuPsss+cs..YGsoS.s.....SSSNSuShKGSDoSP.phpRssRYpoCGDNHGI+PPsPEQYLTPLQQKEVsIRHL+sKLKESpspLp-..REoEIcELKoQLuRMREDWIEEECHRVEAQLALKEARKEIKQLKQVlETM+sSLs-K.....DKGIQKYFlDINIQN+KLEoLLpSMElAQsGs.hpDEssh-hhC..sSPu+oLs.SsshsKlu-.........uhtht-Q..usEc..huDSGlLssD-h...sspsDlhpp..hhos...sssc...........sssulhpstsh.st..ps..h.th..........ptsh........h.pEpulQTDs......lshSsDlcsll..plhp .............Rpp.sPlsh+su..YusS.S.s.....SSSN.SGShKGSDsSP.......hhRRS...............s+YhsCu-NHGl+PPsPEQYLTPLQQKEVslRHLKs+LKEopcRLp-..R-oEIs-LKoQLuRMpEDWIEEECHRVEAQLALKEARKEIKQLKQVIETh+ssLh..DK.....DKGlQKYFVDINIQNKKLEoL.LpSMEhApsGs.h+-Ehsh..........-........ss.sSPt+SLshusshs+huD...........shshpppssE-..sADSthlssDsh...sstsDlh-p..hlou...sss-...........phpLhpohshssh..h.t..sh.hh.....................t.psus............h..EpAlQTDh......V.YsPslspllpplh......................................................................................................................................................... 0 2 7 21 +15144 PF15291 Dermcidin Dermcidin, antibiotic peptide Coggill P pcc Jackhmmer:P81605 Domain Dermcidin is a family of peptides produced in the sweat to protect against pathogenic Gram-positive bacteria. 22.00 22.00 22.00 23.20 21.90 21.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.34 0.72 -3.81 2 11 2012-09-03 08:51:23 2012-09-03 09:51:23 1 1 10 1 2 10 0 94.50 73 82.66 NEW AYDsEAASAsGSGNPs+EASAAQcENAGEDPtLARQAPKPRKQRSSLLtKuLcstcKulsGLtpLGK-AV-sLEssGKGt..........Vp.sp.sLsSV ...................................AYDPEAASAPGSGNPCHEASAAQKENAGEDPGLARQAPKPRKQRSSLLEKGLDGAKKAlGGLGpLGKDAVEDLESVGKGA...........VHDVKDlLsSV................ 0 2 2 2 +15145 PF15292 Treslin_N Treslin N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z2Z1 Family This family represents the N-terminus of treslin, a checkpoint regulator which plays a role in DNA replication preinitiation complex formation [1-2]. 27.00 27.00 29.80 29.80 24.90 24.50 hmmbuild -o /dev/null HMM SEED 803 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.59 0.70 -13.42 0.70 -6.67 11 59 2012-09-03 08:52:17 2012-09-03 09:52:17 1 3 43 0 36 60 0 621.90 46 46.02 NEW LYWVDTTEhuKLh-SPDHhGYWTlsELLpplGGTlLPsEohstshscstpslhssshcho....spPpLSsW.....hosLPhDSoLNsLLhssscYcAoFPphEGsLFLslctGKc.Q....cossVTLEPLuMpQRphppPVpI.hLKGoVsp......WshPtuuoLGT-SWlLpSs.-p.spssp.....phLFQQLspcLssEcLHLVA-Vsss-uhPPhTGllSPLSsoAslLTVhpsccs.EhQcahLQssssE.sspDssshhsDlVpsVLsplcsu.....-sssss....ssPVPEWsQQEL.....uRTs.PWosAVlE+W.FPhSNlSGASSsLMESFhLLQAsSss.cEEuS+oEuELT+pLSEhYQRKScE-us.susQc-s+KKRG.lPRTPVRQKMpTM....sRSLcMLNVARL...NVKAQKLpPDGuPssu.uEKuhQKssttRosDKlEs+GRsL+SSKsp-FKTEEELLSaI+ENYQKsVusG.-hhLhoCApshloTIKtFLKSpsTK-lEhsC..lsplKspLLKTSKsLRQp.lGpc..hDKEsKVRECQLQVFLRLEMClQCPSlppssD-hEQlVEEVT.-LLRhlsLTEDsuYLucFL.EEILpLYIsSIPcTL.......GsLYpSLGh.IPpKLAsVLPsDFFSDDSMTQEscSPh.shshsSs.sppulssuoEoDQLEELRTRSAKKRRKNALtR...H+SluEsSQNLRQIElPKVuKRss+pE.NS+ss......QQ..PhPpK-sVQEVTKVRRNLFNQEhlSPSKRshK+.hPRS+SVSAVEGLcaK.tphp+s+spth..sa+KLLTKpVuETPlHKQlS+.RLLHRQIKGRSSDPGPDIsVVEESPEKs .....................haWlDotp..phhtssDHhGa.ph.clLt.hGGsllP..sh...hsp.h..................t.s.......hp.lPh-uslsh.Lh.ps..aptsFP.hpGhLhhsht.sp.......pshsVhLEPlshpQ+.h.psVpl.hL+Gshtp......hshs..sthso-oW..hLpss.tt.ttttp.....t.hFQpLhppLsscplahlupVs...uhsshTullSPlSssshlLTlhpsc.s.thpthhhpsshsp.ssp-s..ss.hs-llsslLs.h.p....psssss....sssVPEWsQQEL.....upst.saosullEpW.FPhSshSGuSSsLMESFhLLpAsstp..p...-..-sScs-sELhptLuEhYQppsp-pss...ttpcppKKRG.lPRTPVRQKMpTM....sRS.LpMLNVARL...NVKAQKhpPDussssu.spKshp+hsttRss-+hcs+u+hh+o.ut.t-F+o.EEELLSa.lpcsYQKsVust..-hhL.ssApshlSslKhFLKScssc-lEhsC.......lspl+.spLLKTSKuLRQphupc...hDc...EsKVRECQLQVaLRLEhChQsPSlpps.--hEplVEEVs.-LLRhls..LTcD...suYLucFL.EEILtlYlsSIP+sL.......GplYpSLGh.lPpc....LAuVLPsDFFSDDShop-shSP....s..Ss.sppusssuscuDpLpELRsRSA+K......R.R.pssLhR...H+SlsEsSQsLRQIElP.......KhoKRs.s+pc.sspss.......................p.s.shKpslQEVTKVRRNL..FN..QEhhSPSKRuh+p.hPRS+SVSAlEGLcp.K......hsp.tp...t.sh++LLT+pVsETPhHKQlSp.RLLp+QhhGR.p.Ssss.-htlVEESP.K.s....................... 0 5 8 17 +15146 PF15293 NUFIP2 Nuclear fragile X mental retardation-interacting protein 2 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z417 Family \N 27.00 27.00 34.40 34.30 20.90 23.00 hmmbuild -o /dev/null HMM SEED 599 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -13.23 0.70 -5.79 8 69 2012-09-03 08:53:03 2012-09-03 09:53:03 1 2 37 0 43 47 0 452.40 49 85.05 NEW ppKKTGaG-lNGsAs-tts.oPKsLsos-sssPs.ShVhNGsQphsDoNlp.KsotKuhThuKsGl+sKshhpKssMDKKN..-KoaESKs+Es.plDKpEslsl.NGVloNNSGYITNGYsGK.GADNDGSGSESGYTTPKKRKAR+NusKGsEsLshhp-KhhQQc.ss..slt.pL-s.Ks..shscttGsRlEGhKPsaKh-sssuG..hupuc.sss-lQRKNSDsKs.GssuKKFEDRsKGKhuossuSKEDSWTLFKPPPVFPVDNSSAKIVPKISYASKVKENLNKsAQss....u...........GEs.s.s.ts.......sRLSQVPMSAhKSVTSAoFSNGPVluGsDGsshss...ustslhss.AAuolss..ssuutssshs.-tussou.......AsEp+KsSLFIY...PSNMQslLPussQ....lshsup.TNQQsLGDIFQNQWGLSFINEPSAGPEsusscssccch.sEVoFQGE.pssshsopusphhPoGsp.hPsFPKAYELDKRTsPQ...uuhlKsu..ossE...uuuhspchph.h-.pKsDsuupGu.hVF.S+sp-l-..ss.AsPossLhuSAK-Q+Yp+GLER+-SWGSFDLRAAVlYHTKEME.IaNLQKQDPKRllTYcE ....................................................................................p+sG.htclNssss-tth.s.+s.sus-hspP..S...NG.spph.cssl....K.o.Ks.o.ststlps+sh.pKpsMDhKN..tKohE.pstEspshDKp-shsl.NGVls.NuGhITNGYhuK.uADND...GSGSESGYT.TPKKRKARpNusKusEslshhp-KhhQpcssss..lt.tL-t.+s..shscptGsRl-usKshaKhEstsuG..sspG+shhu-h.RKsSD.Ks.....GhsuKKhD-Rs.KuKtuosssSKEDSWTLFKPPPVFPVDNSSAKIVPKISYASKVKENLNKssQssos.uss...........GEo..ts.ss...........sRLSQVPMSAhKolTSAsFSNGPVluss-ssh.ss......shhss.AAsolss...hssu.sss...-hshsos..............AhE.hKsoLhlY...P.NMQshL.ustp.....lshPup.TsQpsLGDIFQNQWGLSFINEPSAGPEsshupsscpph.hploh.tc.hst..shsopusphhsoGsp.hssFscAh-L-KRTsPp...hushhpsu...spE...utsh..c.p.h.s-.pKs-spopGu.hsh.Spsh-lc..ss.usPossLhuSsK-ptap+shERp-SWGSFDl+AAlhYHTKEME.lhpLQKQDPp+llhYpE.............................. 0 1 6 20 +15147 PF15294 Leu_zip Leucine zipper Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86TE4 Family This family includes Leucine zipper transcription factor-like protein 1 (LZTFL1) [1] and Leucine zipper protein 2 (LUZP2) [2]. 27.00 27.00 27.10 27.10 26.80 26.20 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.87 0.70 -5.29 18 148 2012-09-03 08:54:27 2012-09-03 09:54:27 1 4 59 0 78 120 0 222.00 42 84.38 NEW FA+pKRshpLKoVDsCFpDLK-SRLs--TaTsDEVs-hLDGLpsVV+uEVEsELINou+TNVLLLRQLFpQAEKWaLKLQ.sDISELENRELLEplAcFEKp-houus.....p.s.-..ps.K....LpPLsEu.GsutLLs+EIpRLQEEN-KLKsRLKolEppATssL-EKsKLcpsL+-LQh...sp.cs....hhcuQ-ls-LEsplAslKsEhEKshpcposppKsLc-sLhssKHcLL+VQ-Q..LphAEKELEKKFppTuAY+NhKchLopKN-QIK-LR++Lp+ ........shpchthpLKpV...Fp-.+pohLh.p.p.TaT.cEl.phLsGLpsslpo.hc.ELhNssaoshLLl+pLhppAcK.h.h+Lp.p-luElEs+pLlEplt..chEKtpho.s.......................Lts.s..Et.......hsK.ItcLQpENcpLKs+Lhohph.ss.th-EppKlpttLp-Lph............s.....hh...+uQplssLEpplAsh......K............s-hpKs.h.Dppp....p.KuLcEslths...hps....lh+sQ.....Lp.hhppp.pp.......hh..p...pshhpph.phhsppp.Q.pt.................................................... 0 18 24 40 +15148 PF15295 CCDC50_N Coiled-coil domain-containing protein 50 N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IVM0 Family \N 27.00 27.00 27.00 29.30 22.00 24.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.92 0.71 -4.23 11 129 2012-09-03 09:22:21 2012-09-03 10:22:21 1 1 71 0 74 106 0 127.30 47 30.86 NEW MuElpIDQSNLP.tVpEVCpsFAVLEDtuLAHNLQEQE..IEpahuoNlp+N+LVQ+DlpVAKpLQ-EE-....psps.hpppp+-LEcpDsEhAp.IQEcLhppAEctRppEpcDE-IA+pLQEcEhpEp+...R+p+ph ........................LP...tV.pEVC+-FAVLEDtsLAHsLQEQE..............IE.cHhuoNlpRNpLVQcDlpVA..KpLQ.EEDh...+Aps.p...h...p+ch+.....-l..Ep.........pDsElApEIQ..EcL.th..-....AE.c.pR....p..pE.c..cD.EcIA+hLQEcEhppc.++pp...h..................... 0 13 22 50 +15149 PF15296 Codanin-1_C Codanin-1 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IWY9 Family This domain is found near to the C-terminus of codanin-1 [1]. 27.00 27.00 31.90 29.20 21.60 20.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.59 0.71 -4.25 23 96 2012-09-03 09:23:55 2012-09-03 10:23:55 1 1 75 0 64 95 0 117.00 46 10.95 NEW ppslDshss..lspplLhssCPaLsEh+slltsshsstsps+....ushh+.pIT.......................Psusp.........t.sppplQt.pLE-AFF+sQPsSlRRoVEFVsERlsSNsVKchpssllhshhppupshlpphh ........p.puLDshsl..VDppLLYsCCPaluEhRpLL.uuhlusosu+...........suGhhR.KIT.............................................Psosp..t.hssps.spspptLQt.pLtpAFFHsQPsSLRRTVEFVAERluSNCVKHIKATLVs-llcpA-shLp-.h........... 0 19 24 43 +15150 PF15297 CKAP2_C Cytoskeleton-associated protein 2 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IYA6 Family This family includes the C-terminus of CKAP2 and CKAP2L. CKAP2 is a microtubule associated protein which stabilises microtubules [1]. 27.00 27.00 47.50 30.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.45 0.70 -5.63 19 145 2012-09-03 09:24:35 2012-09-03 10:24:35 1 2 41 0 87 116 0 252.30 36 51.18 NEW Apsssppsp+hhtps+shsppphpsspusss...........pRsstsKEoscE...R.+A+LsEW+suK.G+slKRPPhshh.......t..p.ptpp..EcsstSFWTThsEEDEQ+L....FT-KlNpThuECLpLIs.-GCP+EElLshLsclIpsIP-A+KLsKYWlCLARLEshpuslpclIuIYEcAILuGAp...PIEEhRcsls-ILp.hKsptp.sphupNhpptsssppplp-lp.........pt.shs..cstc.tp-ppcc+ssh.ppspppp-cppc.ssssslp.Tssp-sptuslIKYNVSoTPhLQShKKKhQh-pssu..sh+-LKFLTPVRRSpRlpcKss+LPDMLKDH.PCVSSL-QL.........sElssp.TssFlhR.NsAL ..............................................t.p..t...t.p..ts.h..................pst...Kpsst-....c.+tpLpEWptuK.G+shK.....RPP.phh............tttp..cp.shSFWpoh.....tcE-Ep+h....hopKlNpohoECLpLIp.pGs.tp-lhshLp....pIP.pA.cKhsKaWlChstl...push..pll.slYEcAlhsGAp....PlpEhRcslhsI.Lp...............stphp.pt..t........................................................ts.p...c.tpppp.....p.....p.pppp.th.sss.pht.psppcst..shIKhpls.shPhlpuh..................thp-hKhlTPVRRStRlpct...ss+hP-MLp-H.......sVuSLppL..........-ltt....spsalhR.NtAL............................... 0 10 16 33 +15151 PF15298 AJAP1_PANP_C AJAP1/PANP C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IYJ0 Family This family includes the C-terminus of adherens junction-associated protein 1 (AJAP1) and of PILR-associating neural protein (PANP). AJAP1 inhibits cell adhesion and migration [1]. PANP is a ligand for the immune inhibitory receptor paired immunoglobulin-like type 2 receptor alpha [2]. 27.00 27.00 29.70 33.30 24.80 25.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.60 0.71 -4.44 9 85 2012-09-03 09:25:14 2012-09-03 10:25:14 1 2 35 0 49 74 0 163.40 48 56.79 NEW IsWGPTusD..lE-ss.suhhsssssssst..shTssossuToTstssps.sh.hThp..G...........P.thSThtss.s.sts.....sh.Pphhscsu.GLAVHQIITITVSLIMVlAALITTLVLKNCCuQSupsR+sSHQRKIpQQEESCQNLTDhoPupVsSslDIFTAYN-SLpCSHEClRsslPlYT-Etlp..ossaKouFNGNR ........................IsWGPTssc...Esss.Ps.hsPsh.sl......sh.suspshsTstss.ssth...phpspGlh.s.sP.........................ss.h.PhlhGspu.Glssp.hlTITlSlIhVlsA..TsllhK..C..hspStppRRsStQpth.p...QEESpQsLTDhoP....uulslhsAas-o.pso.-p.cspsssh...sh...pschtssFphNR.................................... 0 2 6 17 +15152 PF15299 ALS2CR8 Amyotrophic lateral sclerosis 2 chromosomal region candidate gene 8 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N187 Family This domain is found in amyotrophic lateral sclerosis 2 chromosomal region candidate gene 8 protein [1]. 27.00 27.00 27.30 28.50 26.90 25.80 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.60 0.70 -4.76 33 110 2012-09-03 09:26:01 2012-09-03 10:26:01 1 15 36 0 86 153 0 199.60 31 36.09 NEW psshspsssclpWctt...........s.hlshsuhP............Fh........lhspthht.Cpa...Gpc+ctttpc+pppp.........................ptphphptocKhsCPA..plhl+...................clhpFP.................ca+lp...................................................................cspptp+cpshppL+pslhpt............hpsph.caalphPs.ppH.psHshtp.s...............................................shpptlc..............splhpKIccLVppG..lssl.plcc.pL.....ctalpcpha.pspphP.hpsp............pahPohpDl.....cschtpsppphc ..............................................s....ttstp+lhWcsp.............hlPa-GlP...............Fl...........htup..tsh...CQa...G.cRcthphc+hppp...........................pppspp.hKtsCPA..pIhl+...................cV.+FP.................-Y+lsst.................................................................c.h+hppccshphL+pslhs.............hsshh.RaYlpLP..sppsH.phH.htp.sh..............................................sh.pplc..............spltcKIp-LVupG..lpplhtl++.pL.....+pFVcc-LF.cscphPpppNh............paaPTspDI.....pN+htpsphph.p........................... 0 53 55 69 +15153 PF15300 INT_SG_DDX_CT_C INTS6/SAGE1/DDX26B/CT45 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N7B7 Family This domain is found at the C-terminus of integrator complex subunit 6 (INTS6), sarcoma antigen 1 (SAGE1), protein DDX26B (DDX26B) and members of the cancer/testis antigen family 45. 27.00 27.00 33.30 32.50 26.50 24.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.90 0.72 -4.52 36 206 2012-09-03 09:26:55 2012-09-03 10:26:55 1 3 71 0 105 190 0 64.00 48 9.46 NEW cchNs-l+pplhKElR+sGRsYcpIFplL.cpVpGsl-l+ppFlchsIKEAsRFKR+sLlpp.LEch .....pclNs-lKtQlhKElR+.GRcYE.+IFpLL.cpVQGslcs+ptFlc.sIKEAuRFK+RhLIpp.LEc...... 0 23 29 55 +15154 PF15301 SLAIN SLAIN motif-containing family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8ND83 Family The SLAIN motif containing family is named after the presence of a SLAIN motif in SLAIN1 [1]. They are a family of microtubule plus-end tracking proteins [2]. 27.00 27.00 33.20 30.90 26.80 25.20 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.75 0.70 -5.29 13 195 2012-09-03 09:27:58 2012-09-03 10:27:58 1 3 43 4 93 159 0 316.00 44 81.31 NEW hLY......ss.pp.hssspKshSPlpWC.RplL-pPoP-hEsA+.psLph+L-Qh............................................................................h.s...ppsslSPQSSlD.....SELSTSEh--..su.uhsY............KLpDlTDVQIhARhQEES....LRQ-YAoTo..ussSRRSSShShpSh+...............R.ushSDQEhDt.ShE.-c-EphcpLshPpsphhssSP.........hp..s.P+Spo.Soh.................cpsR+Sspu..hs.tt.s.hpp.........................DKLRRShPNLs.Rssuhss.......................spsV+NSpShDSsLpsssuGluRhQSp.s.....l.pps+...usup.PlulRQPhKAsu.husslp..................us...t.hsssu.....shsspss.......s.s.....ossRSuLPRPusssssu.ssPRSKluQPsR..........................phLssPKo...h......usl+DsuW+DGCY ...........................................................................................p.up.psLh.+L-.h..................................................................................................................................................................................p...htssLSsQSSlD.....SELSsS-.....DSIu.sY.........................KLpDl.TDVQIhARhQEES....LRQ-YAuTo...........SR+SSusShpSh+...............+..GThSDQEhDt.SL-.D.--phcphs.Ptsph.psSP..........p..s.P+Spo.uph.................ppsRhS.pup.hs..ppp..ppp.........................-KLRRShPNLu.Rhssh................................oV+sSpS.DSshpssssGls.RhQsp.s...................l..p.h+...............ssu..PlslR.QPl...K...Ahu.husss..s....................st......sssu............tsstss..............h.......hhhR..SuLPRPuhs..h..s...G.sl..PRSKluQPsR..........................p..L.sPKs...h......ushpDtsW+DGCY........................................... 0 17 24 54 +15155 PF15302 P33MONOX P33 mono-oxygenase Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96A73 Family This family of proteins contains a flavine-containing mono-oxygenase motif. It may have a role in the regulation of neuronal survival, differentiation and axonal outgrowth [1]. 27.00 27.00 77.10 32.50 19.00 18.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.82 0.70 -4.80 13 91 2012-09-03 09:29:03 2012-09-03 10:29:03 1 3 39 0 48 87 0 232.10 60 95.71 NEW uGhLG+MSLPlGhpRRAhSYDDsLEDsAPMTPPPSDhuSplhWKpPVIP-+KYQcLucs..E-scssh......ssls.Suus-shsKlPVVKAKATplIMNSLITKQTQESIQ+FEpQAGL+DAGYTPHKGLosEETKYHRlAEAlH...KL+hQSG-.sKE-+QsoSAQS....TPSoTPpSSPKp............+.RGWFsp.......GSSsuLsGP-hS....ohDuGus-ts+ssu-KWSlFGPRo.lQKSso..G.......GFolQuY+GAQKPSPMELh+AQATphuEDPAsF.KPPKM-IPs.hEuK+..sPRsHNLKPRDMNVLTPTGF ..........hGtMSLPIGhhRRAhSYDDsLEDsAPMTPPPSDMuS.l.WK.PVIPERKYp.cLuc..hE-Gpsshs.....suhs.usuh-s..hppsPVVKAKATplIMsSLIT..KQTQE.SIQ+FEpQAGLpDuGYTPHKGLTsEET+a.hRluEulp...KLphQSGE.s+E-+.suSuQS....TPSoTPpSSP+p............p.R.GWFsp.uuosslsuss.s.....ohD.G....ssp......sts.s-+WShFGPRs.lQKsso..t.......uFuhQuY+GAQKPSPMElh+sQAsRhs-DPAs.h...pPPKM-lPs.hEu++...ss+sHpLKPRDhNVLTPoGF........ 0 1 4 14 +15156 PF15303 RNF111_N E3 ubiquitin-protein ligase Arkadia N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96B23 Family This domain is found at the N-terminus of E3 ubiquitin-protein ligase Arkadia [1]. 27.00 27.00 30.50 30.40 22.30 21.90 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.03 0.70 -4.87 13 116 2012-09-03 09:29:48 2012-09-03 10:29:48 1 3 41 0 57 120 0 236.70 51 37.61 NEW hKuE.ssDAspptpsLct..ssPcshtsucshsschEshsuKsGs-h.p....-ot....t.susphpo..sLlhch+RcSpputsussp.....................-psSssctpc-SS.ScChpSPSSShHhGDSDTLSSs-Eptsstttuttt.ssts.............upopuuRpp+hsRSEoEoss..hMtt.p.+.........tpp++sssRhphVK......upRoQKQKERIhL.RpKREshAR+KYsLLpsSSoSsp.DLosDSSoSSSpEuE--l.....SGsS+s.ouslP ..............................................................tucshPttsEh.suKsust.sc.Lp.sp.ppptshsus..pp..sLlhch+RcSpput.u.sp.....................ppPos....pssS.scCh.SPSSShHhuDSDTloSsE-pEss.tcots...sut.............utoh.uRpp+hsRoEoEos...hMtt.p.+.........pS.c+.ssRhphVK......upRoQKpKERIhl.RpKREshAR+KYsLL.sSSoSsp.DLos-SSoSSS.-s-E-l.....oups+p.suplP..... 0 2 6 23 +15157 PF15304 AKAP2_C A-kinase anchor protein 2 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96FF7 Family This family includes the C-terminus of A-kinase anchor protein 2 (AKAP2). It includes the site where the regulatory subunits (RII) of protein kinase AII binds [1]. 27.00 27.00 28.00 29.40 26.90 26.10 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.28 0.70 -4.83 18 145 2012-09-03 09:30:27 2012-09-03 10:30:27 1 7 47 0 79 126 0 242.10 23 39.72 NEW I.sRPLLopsuhs...ssP.c...+-RuRs.SlalQRDltpETpREcDhRRp.Gh.......ttsusPshhspssp.stLpRthSS......cslLu.ssDupstsPsPEs...++Vs+hsscuYQshLssGss..chss....uhstPst..stttsps.t.spuths.t..hs.sssps.spp.t.....p..t...pst.shl..-..hlp...ht.ph.s....sph....p.hs.chs....ussslRht+sQuSsLLEcElcsVLcRERElpEpRRsshh.sE.......shsPss..s..hc.psup..uSopuuG..hsGohSVSES.hasslphaStLsasspsPs.ts...tpppctthYAGIps.Dtls.EllpuoRVoRHKNuhApRWEuGlas ..........................................................................................................................................................................................................................................................................................................................................................................................................h...h.s.t......tph..........t..t.........ushhlRtp+ppo.shlEcEl+uspEREcELpcQR.phh..sp..........................hhtPs..............c.................Ssotpss..........psshuspps...shtp.....p..ps.h.hh.h.......pt...tt......tpphp.................s.spllcuhRVsR+KsshA.RWEutIhs..................................................... 0 8 11 34 +15158 PF15305 IFT43 Intraflagellar transport protein 43 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96FT9 Family Intraflagellar transport protein 43 (IFT43) is a subunit of the IFT complex A (IFT-A) machinery of primary cilia [1]. 27.00 27.00 27.90 27.70 25.70 25.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.95 0.71 -4.17 34 125 2012-09-03 09:31:47 2012-09-03 10:31:47 1 2 100 0 81 136 0 137.10 37 64.49 NEW Wu-c.......................u......pcpsppsscphp.tppphpstshp........t..............DIPl..IPDL.--lp.....-EDhhsplAsPPolpss.+VhTh+-LD.sDLhptsuhpsl.........DstlD............LpLLT+.sLsscpplc.EcDpsW-WDpLFTEVouElps-h.stpt .......................................................................................t.tp..t.......tpthpt...p..........t.t.....................DIPl....IPDL--.lp..........-EchshpVAsPPolpss.RVhoa+-LD..sDLhc.hsAapsl........................DG-lD............LpLLT+.sLsPEcp.lc.EcDssW-WDpLaTEVoSElhs-hp...h........ 0 33 39 60 +15159 PF15306 LIN37 LIN37 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96GY3 Family LIN37 is a component of the DREAM (or LINC) complex which represses cell cycle-dependent genes in quiescent cells and plays a role in the cell cycle-dependent activation of G2/M genes [1-2]. 27.00 27.00 38.90 28.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.32 0.71 -4.22 48 143 2012-09-03 09:33:57 2012-09-03 10:33:57 1 3 106 0 95 141 1 153.20 28 60.56 NEW hpps.sps.tt.ppo.............hlhcLa-RuV-Luphppp..ss..........LYs.lCRuW..hcNpsp...hstt.t..............................tsttspplhpLPpP..........tss.h.phPs.hp.ppppttppt..............................hotpsLlppHhpRWKclRp+....Wppp.pppphpR....YppohplLpt ......................................t.pp.....ttt.p.pps..............alh+LFDRSVDLupasps..oP............LYs.lCRAW..h+Ns..Pps.tht.tpst.s.............tp.............psscspslhpL..P.P.........ssssshs.plPsshp.ppptptspt.....p..........................o.ssLltpphpRWK+lRp+Wpct.pppp..RYppshplLp................... 0 31 41 71 +15160 PF15307 SPACA7 Sperm acrosome-associated protein 7 Coggill P pcc Jackhmmer:Q96KW9 Family SPACA7 is a family of eukaryotic proteins expressed in the testes. Proteins in this family are typically between 104 and 195 amino acids in length. There is a conserved DEIL sequence motif. The function is not known. 27.00 27.00 81.90 81.70 19.40 18.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.50 0.72 -3.92 8 19 2012-09-03 09:34:46 2012-09-03 10:34:46 1 1 11 0 5 25 0 104.90 59 62.33 NEW WQt.sphpPpp....pssGSsTEts..FsSppEDluElLDEILVQEIL-......uKTTspEhsSTuTTLpTth....AGh-ENYQtsu..oENYHEhLENlpaSSGTccclSsD-tsAsANLHu ........WQ..sp.cPpp....tssGSsTEIP..FSSKpEDhuELLDEILVQEILDh.....NKTTPoEMPSTASTLST.lH...AGIDENYQAGG..SENYHELLENLQFSsGhEsKlSNDEAsANANLHu. 0 2 2 2 +15161 PF15308 CEP170_C CEP170 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96L14 Family This family includes the C-terminus of centrosomal protein of 170 kDa (CEP170) [1]. 27.00 27.00 61.90 48.60 20.60 22.80 hmmbuild -o /dev/null HMM SEED 691 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.26 0.70 -6.27 14 215 2012-09-03 09:35:26 2012-09-03 10:35:26 1 6 39 0 63 196 0 476.50 42 45.21 NEW suchshphp.ss.s.p.sshusts.p...u.FVRQESFTKEpuSusls.sKLPHISSHPh..........LpDLpss+usph-h.opDT+LlLKETEosLAALEAKLhsppspt-st..........sssst..-DSLSG-SDVDTASTlSLlSGKstsssos...pt..shuuhQKEKSSossusQD....ss.sSARERLoEKp+p..ssts.tpsEss+R.hph+RupGspGSLDhTDD-+uSuhsa.PsoDhsoSDpEp.....uuRstsR+KP.st.....ssK-Epu+sotssp+s..........QQsLTRSNSLSTPRPTRAS+LRRARLG-ASDsEsADs-+us.u...ss.sussuKssspsKKLSRLDILAMPRK.RAGSFTsPSDoEus.s.RouFSGR..SlEthhsuRKs.osu-u...+suu++sAsus......s+QPh..oRsRouSs+YoSsosp....RRQpGSDYoSTSE-EaGSs+sSPK.H.....pRSHsSTAhQTsRspss.utsp.h...ss.u.....sppp---.pcEs-sY..hhs.TuchA.EI.ARL.SQsLsKDlAILAREIHDVAGDGDShoSuGsu.Sool..SolP..sTPASTISsREE..............LVQ+IPEASLNaQKVPPGuhu........sc.shDQs..hsDsp-cshup+pRshsR-E...............VlhDNLMLNPVSQLSpsIRENTEpLApKMKILFQNptRsWEElEAKIsoENEVPILKTSNKEISSILKELRRVQKQLEVINAIlDPsGsLDlhsuN+usu ..........................................................tp.................t..s........t.hlRQtSFTh-.soss...s.thlP+Isp...............tphtttptth....s.DTthlLK-oEsshAhLEA+l.pp..................tss....psSlSs-SDVDTuSTlS.hsucst.ps............s..+-pss.sss.ps.......sss+-p.pc+t+.....................puth..sShDhss-ppsss......s-.hsss.p.......s...spp....................p.pts+..s..................ts.s+psoLstPRPTRsShLRRARLG-sSDs-.s-s-+hs.t....t.....sst..s.tt+tlSRlDhLA.PR+.RhGShss.SDsEss.s......pss.SsR..ss..E.h.t.th.....sts....+.ss+httssh........s+.s...s+s+s.ssthssss...............................................sshp..p.t..s................t.pcc...cE.-.Y..hhs.Tsc.t.EI...A............Rl.......SQ....DlAllAREIpDVAG-hDS.sS..su.su..uso....tshs.ssPu..osl..ss+-E........................LVp+l.-tS.......LNapKlPPhshs...........st.s.c.p.....s-..c.....+pRsh..sR-E...............shhDsLhLssV.QhSptIRpsh-phAtKh+ILFpsptRsW--lEuKlpuEsElPllKTSshEISSILpEL+RVpKQLpsINshlDPsGsL-h.h.s...h..................... 0 4 11 30 +15162 PF15309 ALMS_motif ALMS motif Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96L16 Family This domain is found at the C-terminus of Alstrom syndrome protein 1 (ALMS1), KIAA1731 and C10orf90 [1-2]. 27.00 27.00 27.00 27.00 26.90 26.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.76 0.71 -4.33 17 152 2012-09-03 09:36:21 2012-09-03 10:36:21 1 5 71 0 88 160 0 123.80 34 9.40 NEW shl+hoLQEALph+RPDFIS+StcRlKRL..cLlscER+hQphhppERctLFs.s....cp.tt....s..hshp...hth.+..pRslsKKEMlpRS+RhY......cpLPEVp+++EEE+RcscYpo.RL+AQLYKK....KlpN...plhG.++s.hp ...........................................phoLQEuLph++PpFluRSppR...hK+L...chhsp..p.R+hp.phhpt...cpp..thhs....................tt.......h.h.shp.s.hhhs+.....pRhloc+EMptRo+RlY......ppLPEVpp++.EEp+..+ctt.......hpoNRL+AplF+K....plhsplht+p....s..................... 0 24 31 49 +15163 PF15310 VAD1-2 Vitamin A-deficiency (VAD) rat model signalling Coggill P pcc Jackhmmer:Q96LK8 Family VAD1-2 is a family of proteins found in eukaryotes. The family is expressed in testes and is involved in signalling during spermatogenesis. 27.00 27.00 165.10 48.60 22.30 21.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.65 0.70 -4.71 8 28 2012-09-03 09:37:07 2012-09-03 10:37:07 1 3 21 0 13 30 0 218.50 53 65.13 NEW EE-QV.sssHRSIRVQTSKHLFWADKLIQASEHSLQ+thshQ.t+posscTsup.spp.l.pcshsSc.cQLQsPSspPs.PuTsS.Q..P.oPp..SS..sLoPAIGLAELlNFASSLAlASSSKhDLPNLEHMIKuPPQ...KAppPST-Pss...p.As-pp-tcpp.....sch.sEKP.LEss..EspKuaKQEDKNls+PYLDFSKPGhKRATIEGEVKLLQsPAhSPp.QGApKDSVPGTKKGoPLLLKIHFKLSSPoSPE ..........EEpps..Sus++SIpsQTSKHLFWAsKLIQASEHSLQ+slshQ.pptSsspshpp.hppslspsshsSc.cQlQ.PsupsuPPsTuSQt..PSPh..SS..sLsPsIGLsELIsFASSLAhASSS+hDLPsLEHMhKAPPQ...cAhEPSTEPhh....p.ss-cp-scpp.........sEt.sEKP.tEAt...ts.KuWsQEDKNhspsYhDFSKPGlKRATIcGplpLLQsPApSP.LQGuccDSVP.GpcKtsPLLlKIHFKLSuPo.PE............ 0 1 1 1 +15164 PF15311 HYLS1_C Hydrolethalus syndrome protein 1 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96M11 Family \N 27.00 27.00 27.10 27.10 26.60 25.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.22 0.72 -3.75 20 80 2012-09-03 09:37:52 2012-09-03 10:37:52 1 2 70 0 59 82 0 79.30 40 26.71 NEW IRPthpt.pppt....tKsDPVs+YapY+cpWcph+sP.GEcpRpsLRWplREpMhpps....s..+s.+hhl.PNsYlVPTpKKRpALRWplRspLA .............IhPhhsp.pppp.....tKsD.Vu+YapY+c-Wcph..+..hP.GEccRppLRWsVREpMLp+s....s..hs.+hhl.PssYlVPotKKRpslcWtlR..hs................... 0 20 22 41 +15165 PF15312 JSRP Junctional sarcoplasmic reticulum protein Coggill P pcc Jackhmmer:Q96MG2 Family JSRP, junctional sarcoplasmic reticulum protein 1, or junctional-face membrane protein of 45 kDa homologue, is a family of eukaryotic proteins. The family is to the junctional face membrane of the skeletal muscle sarcoplasmic reticulum (SR); it colocalises with its Ca2+-release channel (the ryanodine receptor), and interacts with calsequestrin and the skeletal-muscle dihydro-pyridine receptor Cav1. It is key for the functional expression of voltage-dependent Ca2+ channels. 25.00 25.00 30.60 29.70 23.40 18.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.11 0.72 -4.01 17 32 2012-09-03 09:38:57 2012-09-03 10:38:57 1 1 26 0 19 27 0 67.50 50 25.24 NEW s-ELPWG-lTLNKCLlLASlVALLuSuFQlh+......DslsGptssst..sPt.Wl..Pou.ss+c..st.sPtP......c ..p-ELPWGDloLNKCLVLASLVALLGSAFQLC+......DAVsG-ssh.t..sPtsWV.PPSS.sP+c.ss.uPhPc......................................................... 0 1 2 7 +15166 PF15313 HEXIM Hexamethylene bis-acetamide-inducible protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96MH2 Family HEXIM is a transcriptional regulator that functions as a general RNA polymerase II transcription inhibitor. In cooperation with 7SK snRNA it sequesters P-TEFb in a large inactive 7SK snRNP complex preventing RNA polymerase II phosphorylation and subsequent transcriptional elongation. HEXIM may also regulate NF-kappa-B, ESR1, NR3C1 and CIITA-dependent transcriptional activity. 29.00 29.00 29.70 41.00 28.90 28.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.59 0.71 -3.95 25 120 2012-09-03 09:39:46 2012-09-03 10:39:46 1 2 80 4 79 116 0 132.50 41 43.16 NEW WK.PY.K.LoWc.............E+pph-Epps..pRAsRl............hu+GpPlAPYNTTQFLM-DH-tpE................................scLssssthpcttss.........................sspst.hsup-Dp....................................................FLp+-FS-sYEph+.sEpLpsMSKpELlpEYLcLE+chuclc .............................................W+PYhc.LoWp...........................E+pph-E+po.....pRAoRlRt.....pMFA+GpPVAPYNTTQFLM-DH-.cE........................Pc...Lcssph.p+ss..sps.................................................ssssp.tspscsct-.........................FLp+DFSEsYE+hH.sEpLQsMSKQELlpEYL-LE+phSph...................... 0 23 29 50 +15167 PF15314 PRAP Proline-rich acidic protein 1, pregnancy-specific uterine Coggill P pcc Jackhmmer:Q96NZ9 Family PRAP, or proline-rich acidic protein 1, is a family of eukaryotic proteins. PRAP is abundantly expressed in the epithelial cells of the human liver, kidney, gastrointestinal tract, and cervix. It is significantly down-regulated in hepatocellular carcinoma and right colon adenocarcinoma compared with the respective adjacent normal tissues. In the mouse it is expressed in the epithelial cells of the mouse and rat gastrointestinal tracts, and pregnant mouse uterus. This article describes the isolation, distribution, and functional characterization of the human homologue. PRAP was abundantly expressed in the epithelial cells of the human liver, kidney, gastrointestinal tract, and cervix. PRAP plays an important role in maintaining normal growth suppression [1]. 27.00 27.00 54.30 30.00 24.00 21.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.76 0.72 -3.67 15 33 2012-09-03 09:44:09 2012-09-03 10:44:09 1 2 22 0 16 31 0 44.80 55 28.18 NEW PE.D+DsLYH..PtspE...sQtEscPhsp...sl.s+pVLp..GPEED+DHIYH ..PEPD+DuLYH..P.s-E...sQsE-pPhhh...shss+QVLp..GPEEDpDHIYH 0 1 3 5 +15168 PF15315 FRG2 Facioscapulohumeral muscular dystrophy candidate 2 Coggill P pcc Jackhmmer:Q96QU4 Family This family of proteins is found in eukaryotes. The family is localised close to the D4Z4 repeats on chromosome 4 and 10 that are associated with the autosomal dominant facioscapulohumeral muscular dystrophy (FSHD). FRG2 are transcriptionally upregulated in FSHD myoblast cultures suggesting involvement in the pathogenesis of FSHD [1]. 27.00 27.00 29.00 28.50 22.10 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.39 0.71 -4.31 8 40 2012-09-03 09:44:56 2012-09-03 10:44:56 1 1 15 0 18 49 0 172.50 58 68.15 NEW GSEPppcE.-sSpEo-hptps.SsusoEsESuophEusRKRKhSS+DSopsssGus...st..EsSsohcpK+.Rsss.us+ssEspcsusu+pcspu..tsG+pcRpRsRs.sscPPPlRKSLVToLRuhSEAIYpDlsQhpAQQttSPLT.EQLstLuQLpGPLsAslQTlYoMAsQAAaAFPAEGWL ..............................GSEPsPNc.ENScEoKL+usN.SoAsSEsESSShpENsRKRKISS.+DSspDpAGNC......Pc.cEsSloLpKKu.RuST.uVHsSEIQETs-uH+RGpSRApoG+u+RHRSRuhuspsPsLRKSLVTSVRuhSEAlYQDLAQVhAQQh+SPLTpEQLohLoQLRGPLCAtVQThYoMAoQAAasFPAEuWL..... 0 3 3 3 +15169 PF15316 MDFI MyoD family inhibitor Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q99750 Family Members of this family inhibits the transactivation activity of the MyoD family of myogenic factors [1]. They affect axin-mediated regulation of the Wnt and JNK signaling pathways [2], and regulate expression from viral promoters [3]. 27.00 27.00 50.20 49.80 25.30 24.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.44 0.71 -12.10 0.71 -4.27 9 115 2012-09-03 09:45:47 2012-09-03 10:45:47 1 1 42 0 61 106 0 162.80 51 76.62 NEW QPps..........lPh.ssustcs.ptEhGphps.......sGs.....Gsh....ss..............p+hp+KlposhSlsSsuu+KSKsuo.....sppsuphP..tp...DCCVHCILACLFCEFLTLCNlVLspAoCGh.CoSEs....CCCCC.GsshGsDC....sCPCDMDCGIhDACCESSDCLEICMECCGICFPS ......................................................................................p..thps.....lsNGs..Gh.p..Gstphuus........h.u..Assu..pKhH.RKlQ.op.SlsSpsSKKSKssu.....pppsup.P...tE..DCCVHCILuCLFCE...FLTLCNIV...LspAoCG.......CoSEs.....CC......CCC.......Gst......s...sDC....shPCDhDCGIhDACCESuDCLEICMECCGlCFsS..... 0 2 9 21 +15170 PF15317 Lbh Cardiac transcription factor regulator, Developmental protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BQE6 Family The family of proteins are cardiac transcription regulators, named Lbh, short for Limb, bud and heart. They regulate embryological development in the heart [1]. More specifically, in humans, they may act as transcriptional activators in MAPK signaling pathway to mediate cellular functions [2]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 92 and 116 amino acids in length. 27.00 27.00 28.10 33.50 26.60 17.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.47 0.72 -3.73 18 80 2012-09-03 09:46:51 2012-09-03 10:46:51 1 2 40 0 42 78 0 86.90 56 67.14 NEW MTEV.hso..ssh--hsls.......PccspLohQIFPDsu-h....-cssK......LKcRLPSIVVEPTE.uEVESGELRWPPE-hLlp.p-pc-ptt-pthpsppp ........................MTEVhMso..ssM--huLs.......PcKDtLSaQIFPDPSDF....-RsCK................................LKDRLPSIVVEPTE.GEVESGELRWPPEEFLlpED-p-pspEsttpsp.................. 0 2 8 19 +15171 PF15318 Bclt Putative Bcl-2 like protein of testis Coggill P pcc Jackhmmer:Q9BQM9 Family This family of proteins is found in eukaryotes. The family may represent a set of Bcl-2-like proteins involved in apoptosis, see UniProt:Q9BQM9. 25.00 25.00 69.60 40.30 19.90 19.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.37 0.71 -4.28 6 22 2012-09-03 09:50:31 2012-09-03 10:50:31 1 3 17 0 10 20 0 140.00 60 88.05 NEW MGNsSSHKRTKAPKQA+KE+PPDMDKA+t.+QFFSHLKpK........KPus..................uhhsKIVLLFPLDKRQQLAEA.......ut.s......-cssG.....As.s....uhPA.APMLRGAGDus-RR........c.......ch.plhlLL...hlt.-uhhttc.........G.......GuKuAQsWQcLas+LLocuEA-sEuss.AEEQPRKRR+CPR .....MGN.SSHKRTKAPKQA+KERPsDMDKAha.KpFhsHLpRK.............KPus..........................+IVLlhPLDKRQ.LApA........sp..-.ssG.....As.h....u.PA..APhLRGAG-Gs-Rc..........ch..lhlLl...h...-sh..t-..........G.......GA+us.sW..Lhs+hho.tcsstEuts.tcpQPRKpp+s.R........... 0 1 1 2 +15172 PF15319 RHINO RAD9, RAD1, HUS1-interacting nuclear orphan protein Coggill P pcc Jackhmmer:Q9BSD3 Family RHINO, or RAD9, RAD1, HUS1-interacting nuclear orphan, is a family of eukaryotic proteins [1]. Under genotoxic stresses such as ionizing radiation during the S phase, RHINO plays a role in DNA damage response signalling. It is recruited to sites of DNA damage through interaction with the 9-1-1 cell-cycle checkpoint response complex and TOPBP1 in a ATR-dependent (ataxia telangiectasia and Rad3-related) manner. It is required for the progression of the G1 to S phase transition of breast cancer cells, and it is known to play a role in the stimulation of CHEK1 phosphorylation. It interacts with RAD9A, RAD18, TOPBP1 and UBE2N [2]. 25.00 25.00 37.30 32.90 20.40 20.10 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.76 0.70 -4.55 18 48 2012-09-03 09:51:53 2012-09-03 10:51:53 1 2 35 0 26 42 0 219.60 49 98.14 NEW MPP+KKRpppspKAQLLF+ppPLEGPKHchuSPQhsh.THT..RQVPoKPID+sTlTSWVSPQFDsTsEoaFPuppK....H.R.......c........pA+pSSRKSsos..KFPpLoFEospoS...S.S.-.sL..uls..hs+......-s.PsQscKDlopRPLVPhLSPQSCGElSs+.....sLpohPhVFlPPDIQTPEsS......sl+p....s......slPs-p+csuLsu.CshHssoPpSP-PGPVLVcDTPEEKYGlKVTWRRRcHLhsYLRERGKLS+SQFLV .......................MPP..+..KKR...pp.opKApLLF+ppPLEGPKHphuSsQhsh..THT+QVPSKPIDps...TlTSWVSPQFDssupoh.FPstpK....Hp..............s...............pA++uSR+osos..KFPcLoFEospsS...S.S.-.sL..uhs....hh+...............cs.Psps...cKslsp..RPLlPhhSPQSs.uphSsp.....t.pshshVhhPPDlQTPE.S.......s+p............p..ls.spppsph.s.s..tssoPt.ss-PusVLVcDTPEccYGlKVTWRRRpHLhsYL+-RGKLspuQhLV........................................ 0 4 5 10 +15173 PF15320 RAM mRNA cap methylation, RNMT-activating mini protein Coggill P pcc Jackhmmer:Q9BTL3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 102 and 154 amino acids in length. There is a single completely conserved residue D that may be functionally important. RAM is a family of eukaryotic proteins that are an obligate component of the mammalian cap methyltransferase, RNMT (RNA guanine-7 methyltransferase). RAM consists of an N-terminal RNMT-activating domain and a C-terminal RNA-binding domain. Either RAM or RNMT independently have rather weak binding affinity for RNA, but together their RNA affinity is significantly increased. RAM is necessary for efficient cap methylation, maintaining mRNA expression levels, for mRNA translation and for cell viability. 25.00 25.00 25.90 28.30 22.30 22.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.06 0.72 -3.72 24 82 2012-09-03 09:53:27 2012-09-03 10:53:27 1 2 67 0 60 71 0 82.00 43 64.28 NEW pchE-hFssRFT--DpEapcahp..+PscsPPlV-sW..p.....u...p....suGsp.........+sp.ssphp.s..pcph....pGpstpt...shpstsR..pp.pa.ps.R...saup ...saEEhFAsRFT--DcEYQEYlc..+Ps-sPPIVEpW...p.....u..R....uGGsp.........................Rsc.sNphp.s..sRpa....cGcss+t...GWsscs+..tp.pa.ps.R..sht.................................................... 0 10 14 33 +15174 PF15321 ATAD4 ATPase family AAA domain containing 4 Coggill P pcc Jackhmmer:Q9BU68 Family ATAD4 is a family of proteins is found in eukaryotes. The family is also known as PRR15L, or proline-rich 15-like. ATAD4 is expressed almost exclusively in post-mitotic cells both during foetal development and in adult tissues, such as the intestinal epithelium and the testis. Its expression in mouse and human gastrointestinal tumours is linked, directly or indirectly, to the disruption of the Wnt signaling pathway. 25.00 25.00 25.00 60.40 24.70 24.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.29 0.72 -2.98 28 78 2012-09-03 09:54:15 2012-09-03 10:54:15 1 2 41 0 44 68 0 89.60 47 75.69 NEW WWK.LThhRKKps....ps.+VhhpsP.................................sp.....us.........ss..sspp.......s......s.....p....sst.stcs........phssRL....-Kls.ccp...sppRplKVS+SGRFKEK+KVRATLs..p ...................WWK.LTh.RKKcu....ps+Vhh-hP.................................sph....us............ss..uusc.......t......s...ps......sst.sssu.........shssRL....EKls-cp...optRplKVS+SGRFKEK+KVRATLs-p. 0 4 8 19 +15175 PF15322 PMSI1 Protein missing in infertile sperm 1, putative Coggill P pcc Jackhmmer:Q9BUN1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 249 and 341 amino acids in length. 25.00 25.00 26.10 26.10 24.70 24.10 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.20 0.70 -5.34 11 35 2012-09-03 09:55:01 2012-09-03 10:55:01 1 2 23 0 14 27 0 272.00 60 89.90 NEW AQG.Tpo.Tu....hQRVSFRFG.....GP.ho..RoY+oTu..RT...shPR+hR..VThEDEsDssAsADRLAGPAAAELLAoTVuTGhu+sp...ss.s....-EDGSLEEGVVIsARKsso....shct.sssspT.stusoo..RFhANoQEPEIRLTosl.suoh+sTt..-.lsS-sTLopWSTAGSTPsRWPsPSPTAMPP.PEDLRLVLMPWGPWHCHCKSGTMSRTRuGKLpGLSGRLRVGALSQLRTEHRPCTYpQCPCNRc+EECPLDouLCsDosCoopsosp...ohsshPslahRhpPs.h....s...SP..SPALAFWKRVRhGLEDIWNSLSSVFTEMQPl ..........AQG.TpTsTt....MQRVShRFG.....GP.hs..RSYRoTu..Ro...slPRKhR..lhLEDENDAhAsADRLAGPAAAELLAuTVuTGhSRSS...s..s....EEDGShEEGVVIsAtKsso.........stchsSssssT.sGuSST..RFhANoQEPEIRLTosLPposh+sTp..D..sS-sTLopWSTsGSTPsRWPsPSsTAMPs.PEDLRLVLMPWGPWHCHCKSGTMSRoRuGKLpGLSGRLRVGALSQLRTEH+PCTYppCPCNRh+EECPLDouLCsDosCuopsTTp...ossshsslHhRppP..h...Pss...SPsPALAFWKpVRIGLEDIWNSLSSVFTEMQPl............... 0 1 1 1 +15176 PF15323 Ashwin Developmental protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BVC5 Family This family of proteins are found in eukaryotes. These proteins have an important role to play in developmental biology, particularly embryogenesis. It plays an important role in cell survival and axial pattern [1]. It is also thought to be a crucial subunit in the tRNA splicing ligase complex[2]. Proteins in this family are typically between 141 and 232 amino acids in length. There are two conserved sequence motifs: HPE and PQR. 27.00 27.00 32.40 32.40 24.90 24.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.46 0.70 -4.34 17 75 2012-09-03 09:55:43 2012-09-03 10:55:43 1 3 67 0 50 70 0 183.50 38 84.61 NEW ss.hLLHPEL..LSc-FLlLhLp.p+sIhscs....p.sKDpLT-LYlpHslPLPQRcLPcsRWGKhhEKpRt.p.th......tspp............ppssss-s.RKRPLI.........VFDGsSoso.slKl+Ks-su....ss.DRLKPPP.s..hossl+KLSssospsSsss..ss................p.s........spsshts.ps.P.o.........hstssssKL.....................KRsuspp.-sssss-hKssEsK+KIQHV..TWP .........s.-hLLHPEL..LSpEFLLhhLc.p+....sIsVEs....+hsKDsLsDLYlQHAlPLPQR-LPcsR..WGKhhEKcRp.pp.ch......pspp..........................ppss..ss-s....RKRPhI............VFDG.sSooo...olKl++s-su......ss.DRLKPPP.t.......hossh++lptsss.psus.....s....................p..........tpt.....p....p......ht.sssspL.....................KRssspp.t.p..tphpsspsK+KIp+l..TWP.............................................................................................................................................................. 0 12 15 32 +15177 PF15324 TALPID3 Hedgehog signalling target Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BVV6 Family TALPID3 is a family of eukaryotic proteins that are targets for Hedgehog signalling. Mutations in this gene noticed first in chickens lead to multiple abnormalities of development. 27.00 27.00 34.00 33.50 24.10 25.60 hmmbuild -o /dev/null HMM SEED 1252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.39 0.70 -14.02 0.70 -7.21 10 106 2012-09-03 09:56:21 2012-09-03 10:56:21 1 3 35 0 35 97 0 709.20 48 82.78 NEW sDIFISQYosGQKDALRAVLKQKAQShPVFKEVKVQLLEDAusEKcsl.uQEsRhSPuGIDSATTVAAATAAAIATAAPLIKVQSDLEAKVNSVoELLsKLQETDKQLQRVTEQQTsl.QsKpEKlHCHDH....EKQMNsFMEQHIRHLEKLQQQQIDIQTHFISAAL+ouS...aQPsslPsSRsVEKaslKs-psslGuushSS+ssFss+p..As...................h+psEDhuFDcQKSPLETPAPRRFAPVPVSRDscISKRENPhEEKENh-hssp+GssRLLEQILNspDo.oRKSESS-.tTSLopSKhGWNPE...+ps........pFPSsEELGTAcVTVQKu-DlLpDLGQK+KEoculLQ...KpS.shlcLuDhPQsss.+.............LQoTpsTRSlLKDAEKILRGVQNNKKVLEENLEAIIRAKDGAAMYSFINALoTNREhoEKIRIRKTVDEWIKsISAEIQDELuRKDYEQKRFDQKspRsK+ApsM.......SK-IKsNTQ-Ks...lN+ssh.tK..pKQlE..-phcs..lpshs.Sshp.+cR+-GhLKussllQDEDYhhQlYGKPVYQGHRSTLKKGPYLRFNSPSPKSKPQRPKVIEpVKGTKVKShRTQTDhaATKPlKhDSKhpHSlshLP+u-.QQYLFSPSREMPThSGTLEGHLIPMAILLGQTQSNSDShPPAGVlVsKPHPVTVTTSIPPSSpKscTGVKKPNIAVlEMKSEKKDPPQLTVQVLPNVDIDSISNGSu-sS.s.ssSPcEAS.sPlpsWIQsPEhhKsDEEElKFPGoNFDEVIDVIQ-EEKsDE.IPEaSEPlLEFNRSlKVVSTKYNGPPFPPVASssQPTsDILDKVIERKETLENSLIQWVEQEIMSRIISGhaPlQ.pQsssslSVSsSEsSEPlTSDIVEusuGGuLQLFVDAGVPVNSDMIsHFVNEALAETIAlMLGDREu++ssPsAssVPGsloosET.L.ARlsTPVATPQPTPPpS....PsSssKEhVLVKTP-SSPCsS-HDsshPlKElhAEcGsD.hPAlTLVsTPsVTPsoTPPPA.AuhTPThSEhSI-KLKhsSPELPKPWuDuDLPL-EENPNslpEEshaPRAlVMSVAKDEEPES.chPs.PusPc.PlPhpPhPsus+APSsspsPSS-SST.ESoLShT.sTETETLDRPISEGEILFSsGQ+LAs+.luDuG.LaLsNLNDSLSSTLpDAhEMEDDPPSEGQV.IR+PHKchHpDslLSLLAKQNQcslsSQQulYHSEDLENSVGELSEGQRPpLsAAAEsIhMGp .................................................................................Psh+tV+Vplhtsst..+t...s.c.t.s.pthpsssolAAATAAAIAssAPLlKsQpphpAplspVsphLpKLppspt.l..h.t.t.t....p.p....tp................................sphIsuAhp.tt...h.......hssst...h...c.t......t.....p.h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 3 5 11 +15178 PF15325 MRI Modulator of retrovirus infection Coggill P pcc Jackhmmer:Q9BWK5 Family MRI, or modulator of retrovirus infection, is a family of eukaryotic proteins that regulate the activity of the proteasome in the uncoating of retroviruses [1]. 27.00 27.00 27.70 41.30 21.10 25.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.51 0.72 -3.07 14 32 2012-09-03 09:57:11 2012-09-03 10:57:11 1 1 25 0 17 32 0 99.20 66 70.21 NEW TVYCMNEAEhVDVALGILIE.uRKQEKPhEpsslAGADKPEhSP....ssStSPtsSS.GusSE-EDsGpDu.sP..GLuPspsPuGScSACScSPEc...-EDsLKYVREIFFS .TVYCMNEAElVDVALGILIE.uRKQEKshEQsuLsGADpPEhSP....ssSsSPaoSS.GSoSE-EDuGc-u.sP..GLSPSQtPuuSsSACSRSPE...E..E-EDsLKYVREIFFS... 0 2 2 2 +15179 PF15326 TEX15 Testis expressed sequence 15 Coggill P pcc Jackhmmer:Q9BXT5 Family TEX15 is a family of eukaryotic proteins that is required for chromosomal synapsis and meiotic recombination. TEX15 regulates the loading of DNA repair proteins onto sites of double-stranded-breaks and, thus, its absence causes a failure in meiotic recombination [1]. Two polymorphisms in the TEX15 gene could be considered the genetic risk factors for spermatogenic failure in the Chinese Han population [3]. 27.00 27.00 307.70 44.90 19.00 18.60 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.58 0.70 -4.73 12 63 2012-09-03 09:57:43 2012-09-03 10:57:43 1 4 25 0 32 57 0 220.00 40 16.72 NEW N..hsKR..c+pGchKs.SpcsQo.spth..phshh.SKPulh.tlsphPlhct..+Schscs.s....pss...s-LcEpHsosspsuhlscLSpILQRADEASSLphLQEEocsCQNlLPLFVEAFERKQcCShcQILISR-LLVEpNLWsN.CKa+LKPsAlDoLVELQMMMETIQFIENKKRLLtGEPTFRSLLWYD-oLYuELLttP+GaQQQSsFYPuFQsRLKYNAFsELQpYHsQLIELh ...........................................................................................p........p...hh.tcst.h..thsp.slh.tppcsspls........pcsss.hpphK.pps.h.s...Hss.IsslSpILcpA-.susLphLQE.ThhCpshL.lhhchFphhQEssl-pIhIocE...llD..sl.pN.sp.hhLKPpAl-ohlEl.Mh.ETIpFlcNpht+hhsc.pFRuhLWaD.SLhsELltp.c.............................................. 0 2 3 5 +15180 PF15327 Tankyrase_bdg_C Tankyrase binding protein C terminal domain Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BY89 Domain This protein domain family is found at the C-terminal end of the Tankyrase binding protein in eukaryotes. The precise function of this protein is still unknown. However, it is known interacts with the enzyme tankyrase, a telomeric poly(ADP-ribose) polymerase, by binding to it. Tankyrin catalyses poly(ADP-ribose) chain formation onto proteins. More specifically, it binds to the ankyrin domain in tankyrase [1]. The protein domain is approximately 170 amino acids in length and contains two conserved sequence motifs: FPG and LKA. 27.00 27.00 28.00 28.00 21.40 21.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.49 0.71 -4.08 17 95 2012-09-03 09:58:28 2012-09-03 10:58:28 1 3 38 0 50 89 0 166.70 50 15.10 NEW s.cFuF..tpTslLDS.SAL+oRsp....Lu++.pp..+RAPsStuhRt....sR..ts.sthshs--ss.sWhFpDS....sppp+ss.p--u-.t.E.t+s.+ocpssssps.+lslFPGhDPSALKAQLRKRs-u........-u.s-t...ussQhSKSPKsPh.h.G..usRVLPPus-K-suSE-.sSPpWL+-LKSKK ...............................t..DFSF.I-pTplLDS.osh+oRsp.Lu++.pt.+RAPs...........Rsuto..t.s.ts-sD.sWhFpDS....sptp+sP.pp-p-..EE.pspRochosso+s.+lslFPGhsPSALKApL+pRscu..................-S.upp...ShsQpuKSsKsPh.h.G..tshlLPspsEKspt.....SEt.sSPpWLptLK.KK...... 0 2 5 16 +15181 PF15328 GCOM2 Putative GRINL1B complex locus protein 2 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BZD3 Family This protein family is named Putative GRINL1B complex locus protein 2. GRINL1B is short for: glutamate receptor, ionotropic, N-methyl D-aspartate-like 1B. The name indicates what sort of receptor it is thought to be, a ligand gated ion channel specific to the neurotransmitter Glutamate. This family of proteins is found in eukaryotes. Proteins in this family are typically between 325 and 463 amino acids in length. The protein is thought to be the product of a pseudogene with a role in helping assemble a gene transcription unit [1]. 27.00 27.00 27.00 27.00 26.40 26.70 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.46 0.70 -4.60 48 192 2012-09-03 09:59:30 2012-09-03 10:59:30 1 2 70 0 83 176 0 245.50 28 63.40 NEW hLcRQp+lLpN.........++hlpcLPDKGcKIpchhp+lpstlsc+.c-l-pp............uphhpsLslssp....tth..sphphpttt.pcphhphsshh.tsp..sppcpstht.hsQ...........ss.ctpthhhhpsssshhst...........................ts.t-htphh......t......t..............................................p....................p..p+ht.hhpppp++..................................cshchcsKhpshpssp ..................................................................................................hpcp.phlpp.....................................cchhtphs-htp+l.p.htcl.ht.tpp.c.lcpp............sth.psls.s..sh....tth..sshphpttt.p-thhphpph..tss..p....p-ppp.hpssQ..s.hh+hp.tpopctsspVhhEhotplhsp...........................tshpEuspthpt...tEhshp.p+-pcluEl.......p+hltthppEpptL..ph...........................................h..t...s.u-ppc....pp.LEc.tsssL+E+I+HLcDMlcsQQ+KV+tMl.............................tEs.Ehps+hc........................................................................................................... 0 13 17 43 +15183 PF15330 SIT SHP2-interacting transmembrane adaptor protein, SIT Coggill P pcc Jackhmmer:Q9Y3P8 Family SIT, or SHP2-interacting transmembrane adaptor protein, is a disulfide-linked dimer that regulates human T Cell activation. 27.00 27.00 27.40 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.69 0.72 -3.90 36 122 2012-09-03 10:10:56 2012-09-03 11:10:56 1 3 34 0 64 132 0 102.10 28 42.21 NEW hhLhslhullL.Ll.Lshslhtahtt++pp+ssph...........................tslE-sPlYGNLshhps...sshspsshcphpspPppSspspp...Ehss.s..hscpphsYASLshss.sp.c ...hhLhsllulLL.Ll.lsssLhtWphh.p+ppKhsph............s......ppsttsh.E.ss.h.YuN..Lshh.s...tshsppshpphps..Pppusptt....Ehss.s..hsccphsYASLshss.tt..................................... 0 5 5 14 +15184 PF15331 TP53IP5 Cellular tumour antigen p53-inducible 5 Coggill P pcc Jackhmmer:Q9Y2B4 Family TP53IP5 suppresses cell growth, and its intracellular location and expression change in a cell-cycle-dependent manner. 27.00 27.00 77.00 38.00 23.80 21.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.46 0.70 -4.43 14 41 2012-09-03 10:11:55 2012-09-03 11:11:55 1 3 28 0 22 33 0 185.80 51 80.98 NEW pQPVSKlIERNRLKMVLKNLSLLKLLKSSNsRIQELHNLA+RCWNSLLRVPKILpISSGsssVssKscQNNcEhQE.AsssccpLESKKhE.SsuEPK.....t.csc.tst.pstsptSPuAVsp+EcQhEsElP+TS+ucGL..s..PGApu+QssTtsPpVlhLKTapp+TPhtDhKQL-.sADQasWFEGLPTRIHLPGPRVMCRuSoLRWVKRCCTRFCSASL ...........................QslsKhlERNRL+hVLKNLSLLKLLKSSN.RIQELHpLA+RCWpSLLp..VP+ILpISSupsssss+scQsscEhQE.hssspcpLcSK+l-.usu-PK.........c.ps..t.thpsttptS.uAhs.+-cphcs-lP+Ts+spuL..s..ssApu+Q..TcsPpllhlKsappRsPhtch+Qh-.hAcQahWFEGLPTRIHLPuPRVMCRuSsLRWVKRpCTRFCSASL........................................................ 0 1 1 5 +15185 PF15332 LIME1 Lck-interacting transmembrane adapter 1 Coggill P pcc Jackhmmer:Q9H400 Family LIME1 is a family of eukaryotic transmembrane adaptors. It plays an important role in linking BCR stimulation to B-cell activation and is expressed in primary B cells. LIME localises to lipid rafts in T cells in response to TCR stimulation [1], and is phosphorylated by Lck and recruits signalling molecules such as Lck, PI3K, Grb2, Gads, and SHP-2 [2]. LIME acts as the transmembrane adaptor linking BCR-induced membrane-proximal signalling to B-cell activation [3]. 27.00 27.00 32.50 35.30 26.00 21.00 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.71 0.70 -5.12 8 37 2012-09-03 10:12:49 2012-09-03 11:12:49 1 2 22 0 14 35 0 187.90 51 73.69 NEW LCTsC....HR.D-.L..RKpupRQpuRLQGohMPuEhSLLRQspLCSLSKSDTRLHELHRGPpsS+A...RPASMDLL+PpWLEsSRussRs...PsAFsHRELPQu.PuA....s.husEATYSNVGLAAIPRAu...........LAAsP.VV................AEYAClQK.hKGT-pGP.....Qs...LpQuKAclpPAsQVDILYSRVsKPKRRssuPs.....sDQ.DPKupusILsL........GSD.uYEsL.PLpGpuh-su.LENVYESIQE .................................Ktspcppst.psshhss-hsLLRps+LCSLSKSDTRLHELHRGspsohA...RPASMDLL+P+WLEhSRusops.ssssAFs.ppLPpA.PAA..hssshusEATYSNVGLAAlPRuS...........LAASP.VV.....................AEYAslQK.hKGTcpGs.....Qp....tQ.KschhPAsQhDlLYS+lpK.t++s.tss.....ss..s.pstts...h........tss.tYpsh.s.hu.thppt.hENhYESlpE........... 0 1 2 2 +15186 PF15333 TAF1D TATA box-binding protein-associated factor 1D Coggill P pcc Jackhmmer:Q9H5J8 Family TAF1D is a family of eukaryotic proteins that are members of the SL1 complex The SL1 complex includes TBP and TAF1A, TAF1B and TAF1C, and plays a role in RNA polymerase I transcription [1,2]. Alternatives names have included 'JOSD3, Josephin domain containing 3'. 27.00 27.00 153.10 70.30 23.90 23.00 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.71 0.70 -4.55 12 58 2012-09-03 10:13:26 2012-09-03 11:13:26 1 2 34 0 33 42 0 201.10 57 79.67 NEW SuSSLF+TQClP.oPpp+pR.ss..Rp.slpustslp.pDSSSDSS.hE.P.pP..LsLKA.IFE+FKK+....KK++.K..RKYKP...Tt+.h.GRPcG++s..s+hSp..h-K.+plKDKG.pFPFlESE.s.+KsLPW+KILoaEQAVARGFFNYlEKLKYEpHLKESLpQMcsGEDLEcEDh-sR+YKYLDDDGuISPIEEsssED-.ssss..cp.s..-CDlKLV.E.sS.FIlSo-hP+K ...............SsSSLFKTQClP..hSPKpppR.Ns.hRK.hVcss.tsVpspDSSSDSS.hE.P.hP..LTLKA.IFER.FKp+.p...K.K+KK..RK...Y+P....oGRP+.GRP........cG++s.........sphS...l-K...KQh+-+GssFPFLESE.s.cKslPW+KILoaEQAVARGFFNYlEKLKYEaHLKESLKQMsVGEDLEcEDhDSRRYKYL.DDD...GSISPIEEStsEDE.sssph...cp.s.-CDIKLV-sopFIlSSEhPp..... 0 1 2 6 +15187 PF15334 AIB Aurora kinase A and ninein interacting protein Coggill P pcc Jackhmmer:Q9H7T9 Family AIB is a family of eukaryotic proteins necessary for the adequate functioning of Aurora-A, a protein involved in chromosome alignment, centrosome maturation, mitotic spindle assembly and aspects of tumourigenesis. AIB is likely to act as a regulator of Aurora-A activity. 27.00 27.00 191.40 185.70 17.90 17.10 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.13 0.70 -5.37 12 28 2012-09-03 10:14:07 2012-09-03 11:14:07 1 1 23 0 14 29 0 319.50 66 92.02 NEW QTHLIKsuTKMLTLhPGERKspISFTQRphPsAGsRQTSIASFFTLQPGKTNGGsQ+SVSSHtESQhNKESKcDsTQL.DHLhQGLcDDChAsPLATSTPADIQEA.GLSPpShQ.sSGHHphtTPhLThhSL.QP-TL.VCAG-SKASLAhSFTQDlEsSCLLDQKEuc...DSShK+EWLpGSK.K.N.hQuhERHs+PsGGKsHQsLDKsKlE.KVSAKENRQuPV.lQTYR-.SaSGcNT.uVKQSPCPVslFSWDSE+sDKDSWSQLFTEDSQGQRVIAHNoRAPFpDVTNspNQGLGQFPsSPtAQsQtsssph.NLQPDLLFTQDSEGN QTHLIKPGTKMLTLLPGERKspIsFTQRRsPssGI+QpSIASFFTLQPGKTNGuDQ+SVSSHsESQlNKESKKsATQL.DHLI.GLtcDCMuuP..LATSTsADIQEA.GLSPQSLQ.TS.GHHRhtTPFLT.L..SLhQPDTL.sCAG-SpssLAhSFTpDLESSCLLDQKEtc..tDSupKtEWLHGSK.K.N.YQuME+HsK.PGsKCpQPLsKTKLE+KVSAKENRQAPVhLQTYRE.SWsGENs-uVKQSPCPVSVFSWDsE+NDKDSWSQLFTEDSQGQRVIAHNoRAPFQDVTNNhNpsLG.FPNSPWAQCQ-c....ssp.NLpPDLLFTQDSEGN. 0 1 1 1 +15188 PF15335 CAAP1 Caspase activity and apoptosis inhibitor 1 Coggill P pcc Jackhmmer:Q9H8G2 Family CAAP1, or caspase activity and apoptosis inhibitor 1, is a family of eukaryotic proteins involved in the regulation of apoptosis. It modulates a caspase-10 dependent mitochondrial caspase-3/9 feedback amplification loop. 25.00 25.00 25.10 27.30 24.80 21.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.12 0.72 -3.77 14 90 2012-09-03 10:14:51 2012-09-03 11:14:51 1 1 71 0 60 86 0 61.70 53 16.01 NEW shKPluhYIpDK+EMLcQCFpVlGEKKLpKMLPDhLKssolc-lccLCh-QLphlScKplh..pIL ...sLKPlSaYIsD++EMLpQCFsllGEKKLpKMLPDlLKssSl-EIKcLC.EQLEhLScK+lLpIL........... 0 13 17 36 +15189 PF15336 Auts2 Autism susceptibility gene 2 protein Coggill P pcc Jackhmmer:Q9HAH7 Family Auts2, or FBRSL2, Fibrosin-1-like protein 2, is a family of eukaryotic proteins associated both with a susceptibility to autism [1] and with influencing the number of corpora lutea produced by breeding sows [2]. 23.00 23.00 23.10 23.10 21.40 22.50 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.48 0.70 -4.30 17 176 2012-09-03 10:19:03 2012-09-03 11:19:03 1 3 48 0 86 152 0 197.20 53 25.25 NEW KPGKWCAhHVplAWQIh+....H.QQKlK..QMQLDPHKL-.huhKhDhhSRPPuPulFsuhHaPpDLARP..LFSu...o.GuuHPussPFGPssHH.uuFL.PsuHL..DPFSRsuoFuGLGsLuSsAFG.....GLGs.sLo..ssSlFu.K-GP.slpsh...uuP.HEsWNRLHRTPPSFPT.PP......PWPKsuDsERsuusssH-tc..........R-s-K.........uK--+-..RDhL-KsR.Hss+uSP ....................KPGKWCAMHVpIAWpIY+....H.QQKhK...MQhDPHKLD.huhK.-hLuRPPu...Pu.......l..F....uu.......lt...aPpDL.ARP...sLF.S..u...s..G..A.sHP.u.usPF.GP.s.P.HH.usFL..PuuHL...-PFuRPooFuGLuuLuusAFG.....GLGsPols...ssSlFupK-uP...ultsF.......usP.H-.PWN.RLHRoPPSF.P..TPP......sW....KPs.-h..ERou.u.hss+D+-................R-s-+c...ph.sK-....-+-...R-.l-+p..h.s+sSP................................................ 0 8 15 39 +15190 PF15337 Vasculin Vascular protein family Vasculin-like 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9HC44 Family GC-rich promoter-binding protein 1-like 1 or Vasculin-like protein family 1, is likely to be a transcription factor. The domain family is found in eukaryotes, and is approximately 90 amino acids in length. 27.00 27.00 27.90 48.40 22.40 21.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.33 0.72 -3.52 16 123 2012-09-03 10:19:58 2012-09-03 11:19:58 1 1 40 0 69 117 0 95.20 68 21.42 NEW -s-hLSpSLEAEH.RLLKuMGWQEYsENDEshLPLTEDEL+EFphKoEQL++NGht+NGhLtp.pshs.hFssWRsoscschpcspDoETSS.S-TS.DDD ...............t.s-VLSpSLEAEH..RLLKtMGWQE.sENDEsChPLTEDEhREFQshoEQLp.+NGhpKNGhLps..hhss.hFuPW+sosht.s.-s..pDTETSS.S-TS.DDD. 0 5 8 20 +15191 PF15338 TPIP1 p53-regulated apoptosis-inducing protein 1 Coggill P pcc Jackhmmer:Q9HCN2 Family TPIP1 is a family of eukaryotic proteins whose expression is induced by wild-type p53. Ectopically expressed TPIP1, which is localised within mitochondria, leads to apoptotic cell death through dissipation of mitochondrial A(psi)m. Phosphorylation of p53 Ser-46 regulates the transcriptional activation of TPIP1, thereby mediating p53-dependent apoptosis. 27.00 27.00 66.20 66.20 23.40 17.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.68 0.71 -4.26 2 7 2012-09-03 10:20:35 2012-09-03 11:20:35 1 1 5 0 1 11 0 101.70 89 91.40 NEW MGSSScsSFRSAQASCSGsRRQGLGRGDQNLSVM.PNGRAQTHT.GWVSs.LVLGAQVHGGCRGIEA.SVSSGSWSSATVWhLTGLGLGLS+PFLPGshVLRDRPLtSAhELSYDQKKA.LpLQ .MGSSScASFRSAQASCSGARRQGLGRGDQNLSVMPPNGRAQTHTPGWVSs.LVLGsQVHGGhRGIEA.SVSSGSWSSssVWhLTGLGLGLS+PFL.GshVLRDRPLtSAhELSYDQKKA.LpLQ........ 0 1 1 1 +15192 PF15339 Afaf Acrosome formation-associated factor Coggill P pcc Jackhmmer:Q9NQ60 Family Afaf is a family of single pass type I membrane proteins. Afaf is a vesicle factor derived from the early endosome trafficking pathway that is involved in the biogenesis of the acrosome on the maturing spermatozoon head. 27.00 27.00 91.10 90.50 21.50 19.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.32 0.71 -4.72 11 38 2012-09-03 10:21:37 2012-09-03 11:21:37 1 1 22 0 16 39 0 182.00 62 66.58 NEW PANEKoGNYYKDIKQ.YVFTTQNPN.GopSEISVRAT...TDLsFuL+NaKhlstosh....tppusE-csshpEspcsphp+oT..PN.PAFWTMLAKAlNuT.ssp...--KDQLFpPIPsSDlNATsEDplu-Lp-lKLKLMLGISLMTLhLFlhLLAhCsATLYKLKpls.K.spES.QYSlNPELAoLSYFHPSEGVSDTSFSKSAESS ....PANEKoGNYYKDIKQ...YVFTTQNPN.GTpSEISVRAT...TDLsFuL+NaKhhNtTsh.....tpssptpt..pE.pcpp.pcsT..PN.PAFWTMLAKAlNuT.sst...--+DQhFpPIPsSDlNu....TpE.Dphu-Lp-lKLKLMLGISLMTLhLFVsLLAhCsATLYKL+plphK.sCES.QYSVNPELATLSYFHPSEGVSDTSFSKSAESS.............................. 0 1 1 1 +15193 PF15340 COPR5 Cooperator of PRMT5 family Coggill P pcc Jackhmmer:Q9NQ92 Family COPR5 is a family of histone H4-binding proteins expressed in the nucleus. It interacts with the N-terminus of histone H4 thereby mediating the association between histone H4 and PRMT5, PRMT5, the Janus kinase-binding protein 1 that catalyses the formation of symmetric dimethyl-arginine residues in proteins. COPR5 is specifically required for histone H4 'Arg-3' methylation mediated by PRMT5, but not histone H3 'Arg-8' methylation, suggesting that it modulates the substrate specificity of PRMT5. This family of proteins is found in eukaryotes. 27.00 27.00 93.50 93.30 19.20 18.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.17 0.71 -3.84 8 37 2012-09-03 10:22:24 2012-09-03 11:22:24 1 1 26 0 19 38 0 146.60 76 88.73 NEW EAGhATADHSG..EpETEtAsDRLupGAQSlPs-sPs+GEGopuEEEGa...AhDcc-uDGEhNsWELs-Gs..ssPPpEpsus.lFNEDWDhELphDQGN..PYDADDIQGSISQElKPWVCCAPQGDMIYDPSWHHPPPLIPHYSKMVFETGQFDDAED ........AuFATuDHSu.pERETEKAMDRLApGAQSlPND.uPA+GEGTHSEEEGF...AhD-EDSDGELNTWELSEGs..sCPPKEQsuD.LFNEDWDLELKADQGN..PYDADDIQtSISQElKPWVCCAPQGDMIYDPSWHHPPPLIPHYSKMVFETGQFDDAED 0 2 2 4 +15194 PF15341 SLX9 Ribosome biogenesis protein SLX9 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NSI2 Family SLX9 is present in pre-ribosomes from an early stage and is implicated in the processing events that remove the ITS1 spacer sequences. In eukaryotes, biogenesis of ribosomes starts in the nucleolus with transcription by RNA polymerase I of a large precursor RNA molecule, called 35S pre-rRNA in yeast, in which the 18S, 5.8S, and 25S mature rRNAs reside, while RNA polymerase III transcribes a 3'-extended pre-5S rRNA. The 35S precursor also contains external transcribed spacer elements (5' and 3'-ETS) at either end as well as internal transcribed spacers (ITS1 and ITS2) that separate the mature sequences [1]. 27.00 27.00 28.30 27.30 25.00 26.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.86 0.71 -3.67 125 252 2012-09-03 10:23:41 2012-09-03 11:23:41 1 5 220 0 174 248 0 135.90 22 65.24 NEW psththoKK-....Kpph.K+ppFlpK....lpps..t.sp..........p....pp+p...++R........tpp...tls.s.....shcsLtcuL..Pp...........................................ttptth.pptt.....................hpppthst.........+p+cplh.ppEtpRFsplLsp.sa.................................................pssPhuAlcpalppsh ............................................................t..thh.oKK-....Kpph.++ppalpK....lptt....sp...................p....pp+p...++R...tpp.....tls.s...sLpsLt-uLP-................hp...t.............................tttp.t..ptts.............................hppp.hst....................+p+cplh.ppEppRFpplLsp.sa...............................................................pssPhuAlppalppp........................... 0 49 92 138 +15195 PF15342 FAM212 FAM212 family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NTI7 Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. 27.00 27.00 29.60 29.60 24.90 16.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.13 0.72 -4.36 8 105 2012-09-03 10:24:18 2012-09-03 11:24:18 1 2 41 0 47 82 0 59.60 62 20.65 NEW Rs+P+SopsssL-phpuhs.ts-ucDWTooLhSpSRNRQPLVLGDNsFADLVtNWhDLPEss ..........................hP+Sps.ssshpphpuhc.hs-s-DWTooLhSRGRNRQPLVLGDNsFADLVtNWhDLPEh... 0 2 6 17 +15196 PF15343 DEPP Decidual protein induced by progesterone family Coggill P pcc Jackhmmer:Q9NTK1 Family DEPP is a family of proteins expressed in various tissues, including pancreas, placenta, ovary, testis and kidney. High levels are found during the first trimester. Its expression is induced by progesterone, testosterone and, to a much lower extent, oestrogen. The family is alternatively known as fasting-induced gene protein, FIG. 27.00 27.00 30.40 29.60 19.80 19.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.21 0.71 -4.68 10 32 2012-09-03 10:25:33 2012-09-03 11:25:33 1 1 24 0 16 26 0 159.20 52 85.86 NEW GGPspEPP.......uSPSLDDYVRSICQLAQPTSVLD..cAsstspss+spR.PAp.............usEKSsPssSLQDITs+FSGQQPsLPtssTsDPLDWLFGESQEKQsS+RDhsRRTGsSAssWGsHRQhDoGKutusPRGRhC-ARsPGHSLuRhSpDtpQutp..Sh..suppstpssuSstpsRsSSlLRTLa.HLPVIHEL .......GuPGQEPP.......sSPSLDDYVRSIspLAQPTS.VLD..cATApupPpsPaR.PAp................uscKupPAsSLpDlTs+FSuQ....QPsLPhssssDPLDWLFGcSQEKpsspRDhsRRTGsSus.hssHRQh-ssKs.sssRGRhs-A+hstaSLsR.spchpQs....S...s.tp.tpthuu..tsp.uShL+oLh.cLPVIaEL............................. 0 2 3 5 +15197 PF15344 FAM217 FAM217 family Coggill P pcc Jackhmmer:Q9NTX9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 329 and 507 amino acids in length. There is a conserved YPDFLP sequence motif. 27.00 27.00 85.50 33.40 20.10 23.60 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.39 0.70 -4.80 21 73 2012-09-03 10:26:04 2012-09-03 11:26:04 1 2 32 0 40 62 0 213.10 41 51.55 NEW SDLSDSERls.lssSPhTPPDLsLRAEEIDPVphp...cPtpupsps-ahYPDFLPPPFNSWDL+chAlhhpoEshpsslPpssuhLtKYI-RLLQLEWLQhQTIQsEKsK.ss+uRssousus....ptshKSPG.+oKhhsusls.cs.s.hQ.p.Gs.sKsssp.....R.....KpshtppcscPshht.apsSspshchhus..oR....sS.+Qosph+p.....ctK++cssK.ss+.QphshsCs-...susKhposuNlRhPp ...SDLS-sE+hs.h..S.h...p.sDLNL+sEpI-sVp..............c....sh...ptcsp..a.YPDFLPPPFsohDL+chAl..poEshptsls.stu.lt+hIsRLLpLEhLQh.TlQpE+s+..hs.sp.sTssuo....pts.pS.u.psKlhtstls.csLs.hQ.p.us.sKot.p...........R...........Kps.tppchc.stht.aphSsts.chhhs..oR....sS.+posps+p..pppcssK.ssK..phhs.sCp-....SsKspsstshp............. 0 2 5 10 +15198 PF15345 TMEM51 Transmembrane protein 51 Coggill P pcc Jackhmmer:Q9NW97 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 233 and 253 amino acids in length. 25.50 25.50 25.60 25.50 25.10 25.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.68 0.70 -4.66 17 64 2012-09-03 10:26:44 2012-09-03 11:26:44 1 2 40 0 32 46 0 218.10 49 87.87 NEW puuGSHYALsALGlGhlALGllMhVWslVP..Gtus..u.ssssssu.ss.ts..sss......tt+oKoSSVAaVLVGsGlhlLLLSlCLulRsK++pppst.p....ptpsss....pp.spppEc.t.-.tsupYsVPSYEEVlsost...ss..ppssh..ptspS...pLPSYEsLsshsp............spss.....s.....sssth.ssssps..........PsRpsSRsuRhL+PL+VRRIKS-KLHLKDhRlslt....p.ss..p.sslTIEPLTPPPQY- ................uNGSHYALTAIGlGMLVLGlIMAhWNLVP..GhSs..up+ssspu.N.po.suuG......hhKSKTFSVAYVLVGu.GVhLLLLSICLSIRcKR+pRQu--...chQptsussPpspcpcuQpE-....E-suuRYhVPSYEEVMsosh..sps..Rttpp..psphS...hSLPSYESLsulDE.................ssss.....ssputspssssps..................PsRpsS+..hu..++L+PLKVRRIKSEKLHLKDFRlsLs....s..ts..h.sPsoIEPLTPPPQYD................................................................ 0 2 5 13 +15199 PF15346 ARGLU Arginine and glutamate-rich 1 Coggill P pcc Jackhmmer:Q9NWB6 Family ARGLU, arginine and glutamate-rich 1 protein family, is required for the oestrogen-dependent expression of ESR1 target genes. It functions in cooperation with MED1. The family of proteins is found in eukaryotes. 27.00 27.00 29.20 30.20 26.80 22.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.29 0.71 -4.43 19 163 2012-09-03 10:27:43 2012-09-03 11:27:43 1 6 106 0 113 158 1 130.50 45 54.99 NEW pEhEtKllEEEsA+RVEchlcKRVEEpLp..p-EIcpElpRRlEEu++th-cphht-lE+c+cttlp-t+p+EEcE+pc+E-LEclhpENpRKlEEAQ++.AEEchph.......hEEph+.hE.......-Rt+hcp-cc++h+-.E.....Qth...lLGKppoRPKLSFuL ...........pEhE.KhlE.EE.sA+RlEchltK+VEEcLp..p-EIcpEl.RRlEEu++hhccplhtpLE+p+pttlttt+t+.....E............EcE+pc+EELEchLEENpRKltEAQtKhAtEp.th.EEphp..E.......pR.+hcttcp+p.cc.E.....Qhh.....lLGK.s.p.....uRPKLoFsh................................... 0 37 59 85 +15200 PF15347 PAG Phosphoprotein associated with glycosphingolipid-enriched Coggill P pcc Jackhmmer:Q9NWQ8 Family PAG, or Cbp/PAG (Csk binding protein/phospho-protein associated with glycosphingolipid-enriched microdomains) is a transmembrane family that has a negative regulatory role in T-cell activation through being an adapter for C-terminal Src kinase, Csk. This family of proteins is found in eukaryotes. 27.00 27.00 30.30 94.20 25.70 19.70 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.24 0.70 -5.38 5 46 2012-09-03 10:28:35 2012-09-03 11:28:35 1 2 37 0 27 40 0 401.30 61 99.17 NEW MGP-GGLLSoGpVHIIlWGSLAAVuThLlITFLIFLCSSC-REKKPKQQNGDHENLMNVPSDKEVFSHSVTSLATDsPASSEQNGGLSNGDILSEDSTsAChQPYEEVQTSlSDLLEuQDSlGKSlKCHQSRELPRIPPNsTlETILSTRNsEsDQGLGMEGPYEVLKDSSSQENMVEDCLYETVKEIKElGAoAssEKupsG+us..uusAs+Es.uslsts+lESAEYASVDRNKKSRQSsNuESlLGNosDlEEEAPPPVPVKLLDENENVQEKEs-Es......EtpATEGsu-ssKRLSSLSYKSREEDPoLTEEEISAMYSSVNKPGQA.R..hsPESoYTCIpElAPpRSPSSCNDLYATVKDFENsPsu..ThPPu..u.RsNGEPEPDYEAIQoLu+DEERss.hPpos+lshspENDYESIGDLQQsKDVTRL ...........MGP.GuhLu.uGQh..Q..lsLWGSLAuVuhFhlIo.FLIFLCSSC.-REKKP+..p..puGDHENLMNVPSDKEhFS+SVTSLATDAssSSEQNGuLTNGD....ILSEDSTh.TChQHYEEVQTS.uSDLLDSQDSoGKs.KCHQSRELPRIPP-uAVDThLssRssDuDQGhGhEGPYEVLKD..SSS..QENMVEDCLYETVKEIKEVuAssp.-+G.....psu+u+.......Ss.sulKElsuPpsps+...s-F.AEYASVDRNKKCRQSsNsES.........lL.Gso..sDs........EEEuPPPVPVKLLDENENlQEKEs.tps.......EcpAsEsT.u.-ssK.....Ra.SSL..SYKSREEDPTLTEEE.IS.A.MYSSVNKP.G.......Q.....s.....h.....p......p.............ssES......s....YosIpt..sspRSPSSCNDLYATVKDFEKsPsu...ohPPA..u..RPstE.PEPDYEAI...Q...oLsREEEKss.tssu.ppshssK.ENDYESIGDLQQsRDlTRL..... 0 1 3 10 +15201 PF15348 GEMIN8 Gemini of Cajal bodies-associated protein 8 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NWZ8 Family GEMIN8 proteins are found in the nuclear bodies called gems (Gemini of Cajal bodies) that are often in proximity to Cajal (coiled) bodies themselves. They are also found in the cytoplasm [1]. The family is part of the SMN (survival motor neurone) complex that plays an essential role in spliceosomal snRNP assembly in the cytoplasm and is required for pre-mRNA splicing in the nucleus. GEMIN8 binds directly to SMN1 and mediates the interaction of the GEMIN6-GEMIN7 heterodimer [2]. 27.00 27.00 31.30 31.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -4.28 39 92 2012-09-03 10:29:33 2012-09-03 11:29:33 1 2 65 0 64 84 0 197.70 35 87.38 NEW psWaspssYu+.................YWpHYppAMtWhppHptA.hptthpthhssshhhsss.....psphsppstp.........................t.t....s....p.tttp.....t.pp.cc-t.-oco-sc.hcsDhs...NMElTEEL+QaFA.pTE+HREEh++.....QpQ...l-scp.-s..........YVpAD+sLhhs.h+.SspsPsEpPucpRpAE.MK+LYG.cuAs....KI.AMEsAlQLsFD+pC.Dc+pPKYWPlIPLKh .......................................................................................Whtp.hatc.......aWpHYppAhtWhppHp....p..ht.hht.shhhs.s.....psphsppttt...............................................................t.tp...t..t.p.ptpp......t.pp.c--.-o-SDs-.lEsDlo......NMEITEELRQYFA.pTERHREEh++......QQQ...l-scchcs.....................YVsAD+sLhhstp+..SspsP.sEcPu..c+RpAE.MK+LYG.csAs....KI.AMEsAlQLoFD+ps.DcKpPKYWPlIPLKh..................... 0 18 22 40 +15202 PF15349 DCA16 DDB1- and CUL4-associated factor 16 Coggill P pcc Jackhmmer:Q9NXF7 Family DCA16 is a family of eukaryotic proteins that interacts with DDB1 and CUL4A. The family may function as a substrate receptor for the CUL4-DDB1 E3 ubiquitin-protein ligase complex [1]. 27.00 27.00 349.80 349.70 20.50 18.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.68 0.70 -4.98 2 16 2012-09-03 10:30:57 2012-09-03 11:30:57 1 1 14 0 8 12 0 207.10 95 100.00 NEW MGPRNPSPDPLSESESEEEtNANYLNESSGQEWDSSEtEDPVVPNloPLESLAWQVKCLLKYSTTWKPLpPNSWLYHAKLLDPSTPVHILREIGLRLSHCSHCVP+LEPIPEWPPLASCGVPPFQKPLpsASRLSRDHATLNGALQhATKQLSRTLSRATPIPEYLKQIPNSCVSGCCCGWLTKTVKETTRTEPINTTYSYTDFQKAVN+LLTASL MGPRNPSPD+LSESESEEEENhSYLNESSGEEWDSSEEEDsMVPNLSPLESLAWQVKCLLKYSTTWKPLNPNSWLYHAKLLDPSTPVHILREIGLRLSHCSHCVPKLEPIPEWPPLASCGVPPFQKPLTSPSRLSRDHATLNGALQFATKQLSRTLSRATPIPEYLKQIPNSCVSGCCCGWLTKTVKETTRTEPINTTYSYTDFQKAVNKLLTASL..... 0 1 1 1 +15203 PF15350 ETAA1 Ewing's tumour-associated antigen 1 homologue Coggill P pcc Jackhmmer:Q9NY74 Family This family of proteins is found in eukaryotes, where members are expressed at high levels in the brain, liver kidney and Ewing tumour cell lines. Proteins in this family are typically between 648 and 898 amino acids in length. 27.00 27.00 75.90 40.40 21.10 20.50 hmmbuild -o /dev/null HMM SEED 814 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.85 0.70 -13.37 0.70 -6.52 8 53 2012-09-03 10:31:34 2012-09-03 11:31:34 1 2 34 0 31 50 0 614.10 41 89.43 NEW RYETPKRlLKMDLLSSTFSSP.NDPDGQNDIFWDQNSPMTKQLGKGRKKQIYoTDSDEISHIVNRIAPQDEKPTTNSMLGhWIGETAIPCTPuVAKGKSRAKlSCTKLKTQNQEEELMKLAKQFDKNMEELDVIQEQDKRNHDFIQMISEsETLNNYKDNVQMQhLp-I...VPEIDsuIIKKPhKtNTKISVs.NDQsSSQKPFDQNAEAAFNAIFDGSTQKCSGQLSQDLSDAFLNTSNTTFGKKssLKEEKIIoNETlVTE+L.NK.......TPsSLSsQVDTPsMTKSCVTSsTKEPcshsKalDsFsTSDFEDDWENLLuNEPFVMQNlEhsELhP.u.KTsQhsDQKuICoFN.uKNDKSKSthNoSLDsRLRDSKILQDLPScTpNpELhDAtKapF.P...sPNDKPNKL.S.TGNKhKFEKSFNKlVlQDKIQD...sAlASsLTKVKEDhpT...K...Fs...ss..SpKKSuLNTGYSN-Q.......KNKsIFNQSFKsPAslcPFGSAsLusET.SVsNsNQTNASKLsSFFDDWNDPSFANElVKACHQLEsTWEADDVDDDLLYQACDDIERLTQQQDlRKDSKTSEShL-lNNSSpHGA.KNhFTTSKQuSQLlQSKHLNLuSISsp.T.SlTNSSQlsKSVKMEKGEhCGNSPuFLGATTNLoIYSKNSssQhsN..VuhNNTcVPlQVNSSKSlLsGSSSLNVsSDHMoTEIATspKKLSTppLSHsTlTDEuQocLN+sV+hSKYTFTKhKNsQhhSQhNQNCl.sGSlSDTKIoQsLEKNK..T..VNsLhGcAVQQQSLh+.hSESLKQPSKEEEEKNRKYSPEEIQRKRQEALVRRMAKA ............................................................................hcTPKRhhphchhSss.FSSP.NDsDsQp-IFWD.sSPhTppL.GcuR.+KQh.hspsosEIScIVN..RIAPQDEK.Ps.s..s..ShLshWIG-sAIPCTPsVsKtKSRsKhsss+.h......K.spspEEELMKLAKQFDKNM.ELDslQEQspcsasFhQhhSEsthL.ssa+D.shphp...shpsh...lsEls...ps..t.cshctso.pluh...pspsSSQKshD.sAEAAhsAlFDGSTQKCSGQLSQtLs.-s.hssopshh..hcpss....LhcEp........st....cohl...-ph.sc............oshulos......psDoshhspSp.lT....ppc..s....t.h.c.....s.....hsssDF-DDW-..s.hLss-sFsMQ.sp.sELhss..cosp....s.....ppth..ht..spsspshsthsts.....th.htssp.h.shsSph.stph.ss..p..h..pst.c...pl....tNp.ph.pps...h...sch.p...h..stshpphpEs..hs.p.........s..stccsshspt..sppp.......ps..s.h....pshpsssphp..sShtlspps....thsp.spppssK.s.s...aDDWNDPphusEllcthpp.-shW-u.sD..DDDLLYQsCDDlE+LTQpQshpcssctoEs...hs.sS.pGt.ps.hshScpt....p...hhQs.cHh....N.ssh.Shp.h...o.hpps.phsK.hthp.c.t.htsssp.hss.pNLoh..t.ss...p...h..sssssshthstop.hhstp.s..h....tpsthtsphs...pphpsppLstpshsst.sp..st.sp...st.s.casFp+hKssp.h.phsps.h.ssphs.sschh..pthtpp+..s..ls.shhtp...s......pp...h+..hSESht.ssp..t..pEE+N+KhS.EEIpRK+QEALsRRhu+....................................................... 0 2 5 13 +15204 PF15351 JCAD Junctional protein associated with coronary artery disease Coggill P pcc Jackhmmer:Q9P266 Family JCAD is a component of VE-cadherin-based cell-cell junctions in endothelial cells. The cell-cell or adherens junction is an adhesion complex that plays a crucial role in the organisation and function of epithelial and endothelial cellular sheets. These junctions join the actin cytoskeleton to the plasma membrane to form adhesive contacts between cells or between cells and extracellular matrix. The junctions also mediate both cell adhesion and cell-signalling. JCAD localises close to the apical membrane in epithelial cells. This family is found in eukaryotes. 27.00 27.00 191.40 27.20 17.50 17.20 hmmbuild -o /dev/null HMM SEED 1356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.15 0.69 -14.27 0.69 -6.96 15 60 2012-09-03 10:32:28 2012-09-03 11:32:28 1 3 35 0 36 53 0 1002.50 39 97.42 NEW MYSVEDLLISHGYKlSRclPAP+E-ch-G+ppARocsRAGpG.LLNGCEDGsAAhspSK....suhG+GalS.soE......s++p.sPRuHsEsQS.sSAuRsSEuGFYcQPsLsWSSQPQoucDpAY.RRR.GQ-sSu.lLGPRDR--LEsRGMAQAHSLPsHsREGPWEVGGRTEpVMKKAVWEEELRMuGPuKWQNlSLESWNQPRKLGRQMSDGDGEKLFQD....LYPFhpGEcsLsSQsKGKSQSLPRVLSPESLSChEIPIPLNDGHhPuVPKMPsYPPNCAPsLEsTRNsEKuGSSsPLPRPKFGRPLKPPS..YsSHppSRuGsEsSchpDSpQsD...hsssh...sppP.RpEhsssDsGLEPPVYVPPPSYRSPPQHIsNPYlE.DssP+sVsuGppQQpps...sE+suAusplPuGshuoGsEYGAusp.SPpGh.spPRPsssa-uSVQYIPFDDPRIRHIKLApPpGFs--sKh--psYsuuPlsssEPApGptQpDGAlhsPpulssssGsERu....ssh.AsPSPpWLWGQLPRDuENuGhPDQRDHCss.RGQ.PsspGSp+t+sEG.VSSPsPQ.GESTCETpTKLKKFE..TGhQTKKSSKKKhNETIFCLVSIPVKSESHLPDpDpNNNDLKQSADKKNGhDKSsALQEQSLLSMSSTDLELQALTGSMuGRTEhQKQ-LGEPE-sKQTNDLRFIHPsKHRELKYSGSWPGHQYRDQQTQTSFsEESKSsQhLPupKsGuss.cssLoP+psDP.......sA.S-.sphHsALuSSDpsQRPsAhsLKGQhS.LSPSSNSAFSRTSsshsQAPsPKAupoQP...shDspG+suuPsP+uEVVKGEo.T.GPCNS+QLFGQFLLKPVSRRPWDLISQLESFNKELQEEEESusSS......SsSuSE....-S-sEhppEspAssps+shGhpcsSt-hRsctts+tls.EcPsh+SGRVKSKSESWSEE.csG..aspupP.S..GsspstsGRGpshhsAcGSLlsEp+cQEscsRhschslSPuPV+R.h..SSRu..SDs+PsssupsAE.REPQEspcLss.....shsSVplSpuuPPcssuutERuo..slsLSLuuKsRGLSAPDLRSVGLs.u.EpSAscLDGSLG-AsAIEIPP..NESLQARAARILGIEVAVESLLPGspRsGQsQsPEPDuSApssEuPREE.osuSsA.ss..sPososDAFYGRRKCGWTcSPLFV..GERDushRuP.AsE....pSsVDuslsSpsssPEPpPss.EspshppKDhts+PPFRSTLFHFIERTPoVuGSEKRLRSTSKVIESLQEKLASPPRRADsDRLMRMKEVSSVSRMRhLSsRSADSsEEAEELKA..RG.us.PtG.sohssuDhup+sGpssulSKGslSLEEsGHPAupR.EKs.scQDFWCP .........................................................MaSVEDLLlSHGYt.....spp.s...s............th.........t..t....u.t...NGh..........................t..t.ssp......tt..................................thhpps..hhsup.s..tps.hh.hpR.ttp...u...t.p.t.tt....t.tthut.s.uhshp.......+Eu.h-VutpsEpVh..+pshhp-ph+hss.t+WQslshESWppP+clGRQMSDGsGE+hhp-....LYshh.u-pslsopsKtKSpSLPRsLSPESLphhElPhshs-ta......sKhs....as..Pspsssh-.ss+p.c.pssp.sPhP+PKFGRPLKPPS..Yp.pppoRussc....s..pDpp....phc...............sRp-hsh.D...s...G...LEPPVYVPPPSY+S..PPp.ph.ssPah..stsPh.hssspppQpp....hE+stsst......ssss.ususphssssp..P.uh..pP..p.ss.sasu.VQYIPFDDPRlRHhKlApstsh.hp-hchtcp.hss.us.ssst-sshtthp.cuAhhs.spshhs.sssp+u....ss..sss..SspWLhst..stss...Ess.uhPcQR-ps..ss.psp..Pss.psu..ptpstt.sos...sp..u-u.....o.CET.TKLKKFp..TG....hpoKK.SS...KKKhNETIFCLVSIPVKSESpL...PD.pDpNNNDLK............us-..ppsG.cp.s.suLQEQ.SLLShS..STDLELQALT.GSMus+schp+QshtcPctt+QssDLpFlc.sKHRELp.aSGSWPGcQYRDQQTQTSFsE-sposQ.hPus+.Gtss.sss.oPp.t.-s...............................ss.oc.sthpsuhsssD.pp+spu.slKGQhS.LSPSSNSAFSRTS.shsQuPhsKus.uQs............th.s.pt.sspPss..+tEVVKGEs.s.usCNSppLFGQFLLKPVSRRPWDlISQLESFNKELQEpEESpssu.....p.sssE....-u-tc...p..shs.st.cs.t..t.t.t.cht...thhs.-tPshpsGRhcspSpSWS.cc.p.s..t.ts.....hs....s..p.ttutu.sh..spush.s-...ptp.-..pthpp.shp.shPs.c.h..u....stps.s.....t.+.stt..c..t......ht.sp.ut.s.s.p.sth.ptts...h.LsLss+spGhStPDlpsltL..s.t.ts.p.....s.tsshEIPs..sESLQtRAsRILGIEVAVESLLPsstp.sttp..stsssssht.tsstpt....t...s.....th.tsua.uRRKCGWTcSsLFV..G-.......hs.t.......sthpt..ssp...PE.............ps.t.......sPh+uslh...phh-+ssss.ssEKRlRssSKVIEoLQtKLsSsPp+ss.-RLhRMKEVsSlSRMRhLS.+ss-S.--s-t.K.........................t............tt.t................................................................................................................................................................................................................. 0 1 7 15 +15205 PF15352 K1377 Susceptibility to monomelic amyotrophy Coggill P pcc Jackhmmer:Q9P2H0 Family This family of proteins is associated with a susceptibility to monomelic amyotrophy. 27.00 27.00 32.60 32.40 21.20 23.00 hmmbuild -o /dev/null HMM SEED 982 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.99 0.70 -13.80 0.70 -6.61 18 67 2012-09-03 10:33:30 2012-09-03 11:33:30 1 3 37 0 32 60 0 648.90 38 85.13 NEW QQRKQKFEEVTEKFQRAHlPhSQRRRu...Va....QKPVPPLEEALKQIQcSsLKSElNL.PsS+RPTlNWRsI...DsAL.PSuLS+N-a++Q+pLhS+l...sC-KEMpENs+ssLsos+.ssFQLKLEETQ+LLEDQ.HLSsLQ+Fp-E.VNQITNSETLSSIDSLEAGE...+.EEIYlTL.sKEs..SoSsQ.pNo.lSLcS...u.NlQSsN..hsCFDcDKLsaSKTQ.HINNWLpNLcspNTQosoPFoDILuKsNVL..PshEa....hNuKEQNs..sshspss-..RsTsTu.sNoluFV.pSPssFlpscKsE+sSEoSohpoTDu.oStsFK+E+PhVTESPsFKFSKAWsTPDSlTpEhsphSDQE+hSElTQcNRTTSl.TShlPhATPLlL.Po...........NpQSu.+sLsKsslH.lKEIcPlQ..CSDpLsELcDhKDE..+lKahssscccLP.LFSDshpsuhl...sp...Ns-scD.cKp+.hscTu.oL.SshhSNhDLVuQHKKhKaNIaERNGV+FLKSILKKESKYEHsYhKALllNpGhphGNQKAAAIRDSIELTK..cKGcsuEIsKT.IKKLRWFDEsuph.cpss--spSLKNpstlopQhSQ.hH..sp...SuAsSNlhSlPAsAlNSAsscpsK-............s.u.lS.csssslGtSspDpVPLNsF.lPSGYshAKQAW.sSK+EEu+sPV+.s.DSKsQKssPQRGtsKlIRRs+SAKVQSu.hlppsRK..GTllRPQSAS+AsshlQsQGKLllPHPPP+ssoN.RuuKshpsSp.CQsVhP.-sSQN.hhTp.ssh.NS+alLPsEaplNphsQESS.Pl.sssC.SDhVTVhPSLP.YsoSECpTlAK.lNaS.susQslApQDuolaCT.pRsPVhEEuh.oloL+sT-EEsssLWK+t+ssLsQN-+uADS.TVsRRKpIl...EN...KpRsLLEQKRQssGSlupKasEQhsNFup....oVpLSSSEPKQosRGTSsh.EEVS-STSpFLMAENLVKuSVPEDEILTshsSKQhpKssLsLNKTQph.NICALSAEEQKILQSLs+LNERLa ..........................QQR+p+hpEsTEpFQRAHlP.SQR+p.s...s.....p+ssP.LE-ALcQIptSs.lp..hsh...tp+shh...sh...cps...Sshsp.t....pt..t.....t.pc.hpEs.hhslsspp..hFp.pLcEhQphLccp.Hlsslpphpc-.lpphspSEoLSSlDSLEss-...p.pp.h.oh.s.p...u.shp..sp.h.hts.....p.p.tp...sh...h.ts.hthu+st...pph.hshts.sspss..ht-ll.K.s.......ph....hpspctps..s..pt.hp..t.hss..sst.s.s..p...h..shpt..tssp..th..st.............hst....p.s+s.sssp..s.th.....pt...p.p..pt......s..s.h..hhL.ss...........s......t...pt.h....thps....s.sthtph..hppt..php.hps.ttt....h.s..ps.............p..p.pt.....s..sh..t..st.s..tp.p..p.s.hc.pts+hl+uILKKcu+.c.s.h+slhhspth.htpp.stsl+DSlELsK.+t...E..+s..+KL+WhDEht........ph...t.t.tt..psh.t.......................p......ts...tp..t................h..p..........ttp....s.h.hssG.p.s+pAW.sst.tt.......ttsp..pt..pps..+hh+psts..s+sp.s..h.tpp+..ssh.p.QSspcsp...tsQu+h.hPpPP.ts...............................................t..h......ps.......s..s...s.....s....s......p......p.......ppt............tpt...............................s.....h.p......t....p.t.l.pQ++p.st.pht.+.....pph......sh......ss.p.Phps.ps..h.....tplS-STspFLhAEpLsptshsEscILshhpshp.tp..h..ppstp..sh.ssLShEEQ+lLpSLpcLNpRL....................... 0 5 9 16 +15206 PF15353 HECA Headcase protein family homologue Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9UBI9 Family HECA was characterised first in Drosophila where it regulates the proliferation and differentiation of cells during adult morphogenesis. In humans, HECA affects cell cycle progression and proliferation in head and neck cancer cells. It by slows down cell division of oral squamous cell carcinoma cells and may thereby act as a tumour-suppressor in head and neck cancers. 27.00 27.00 31.10 31.10 21.30 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.34 0.72 -10.86 0.72 -4.34 14 102 2012-09-03 10:34:47 2012-09-03 11:34:47 1 4 77 0 66 93 0 102.00 54 20.64 NEW hstCCsPhus..............p.tt.hphschpss.....+VhCsNEpCst.upaMH+pCF-tWEpslLs......hL+ohGRARSWS-+QRpQNLWTKKGYDLsaKhCuC+CG+GpL+KDh-Wh ......................................s..CssPhhs..............shhtslchpc.cDh.KVlCNNEpCPh..upaMHhpCF.cWEuolLs......phps.....hGRA.RSWsEKQpRQNhWTKKGYDLAF+hCSCRCGpGHLKKDhDW.h....... 0 17 22 45 +15207 PF15354 KAAG1 Kidney-associated antigen 1 Coggill P pcc Jackhmmer:Q9UBP8 Family KAAG1, kidney-associated antigen 1, or RU2AS (RU2 antisense gene protein) has been found in mammals. It is expressed in testis and kidney, and, at lower levels, in urinary bladder and liver. It is expressed by a high proportion of tumours of various histologic origin, including melanomas, sarcomas and colorectal carcinomas. 27.00 27.00 88.30 88.30 25.00 19.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.95 0.72 -3.65 2 9 2012-09-03 10:36:12 2012-09-03 11:36:12 1 1 9 0 3 2 0 79.60 84 100.00 NEW MDDDAAPp.EGVPVAVHpHALH.tLRQVsGPGAuAsHLPRhsP.pLAAs.t.AP.LSQhPHRTpGAGSsPETNtp.TNPpV+EK MDDDAAPR.EGVPVAVHKHALHDGLRQVAGPGAAAAHLPRWPPPQLAAsRREAPPLsQRPHRTQGAGSPPETNEKLTNPQVK.... 0 1 1 1 +15208 PF15355 Chisel Stretch-responsive small skeletal muscle X protein, Chisel Coggill P pcc Jcakhmmer:Q9UHP9 Family The murine X-linked gene Chisel (Csl/Smpx) is selectively expressed in cardiac and skeletal muscle cells. It localises to the costameric cytoskeleton of muscle cells through its association with focal adhesion proteins, where it may participate in regulating the dynamics of actin through the Rac1/p38 kinase pathway. Thus it is implicated in the maintenance of muscle integrity and in responses to biomechanical stress. 27.00 27.00 82.10 74.60 20.30 16.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.02 0.72 -3.97 9 43 2012-09-03 10:37:14 2012-09-03 11:37:14 1 1 39 0 28 31 0 84.40 77 98.29 NEW MSKQPuSNV+ulQANINIPMGAFRPGAGpPPKRKEhTsEs.Eps.P...sstt.EEKK.lPGAhKLPGPAVNLSEIQNlKSELKaVPKAEp .....MSKQPsSNVRAIQANINIPMGAFRPGAGQPPRRKEsTPEs...EE.u.s.P.....PTo-.EEKKPlPGAKKLPGPAVNLSEIQNlKSELKYVPKAEQ.. 0 1 3 11 +15209 PF15356 SPR1 Psoriasis susceptibility locus 2 Coggill P pcc Jackhmmer:Q9UIG4 Family SPR1 is psoriasis susceptibility locus 2 protein family. 27.00 27.00 123.90 123.70 18.30 17.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.10 0.71 -3.73 5 32 2012-09-03 10:38:06 2012-09-03 11:38:06 1 1 23 0 15 26 0 113.20 79 84.46 NEW SpDHPSPPusEA+EEsuuPTLPQGPPIPGDPWPGAPPLFEDPPPPGPSRPWRDLPESGVWPPEPPoTDPPQPPLPDDPWPAGPQPPENPWPPAPElDHcPQcEPDLDPPREEYR ....StDHPS.sssEspEEtGSPTLPQGPPlPGDPWPGAPPLFEDPPPPGPSRPWRDLPEoG..VWPPEPPpTDPPQPPRPDDPWPAGPQPPENPWPPAPEVDHtsQEEPDLDPPREEYR.. 0 1 1 1 +15210 PF15357 SEEK1 Psoriasis susceptibility 1 candidate 1 Coggill P pcc Jackhmmer:Q9UIG5 Family This family is considered a candidate for susceptibility to psoriasis. 27.00 27.00 28.40 32.70 21.70 20.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.95 0.71 -4.30 3 13 2012-09-03 10:38:49 2012-09-03 11:38:49 1 1 6 0 1 14 0 128.30 85 94.99 NEW TDQKSHSQRALCTQTPALQGPQLLNTDPSSEETRPPHINPDRLCHMEPANHFWHAGDLQAMTSKEFHLAATQDDCRKsRTQEDILVPSSHPELFASVLPMAPEEAARLQQPQPLPPPSGIHLSASRTsAPTLLYSPPPSHSPFGLSSLI ...TDQKSHSQRALsTQTPALQGPQLLNTDPSSEET+.P.l.PDRLCHMEPANHFWHAGDLQAMhSKEFHLAATQDDCRKGRTQEDILVPSSHPELFASVLPMAPEEAARLQQPQPLPPPSGIHLSASRT.APTLLYSsPPSHSPFGLSSLI............ 0 1 1 1 +15211 PF15358 TSKS Testis-specific serine kinase substrate Coggill P pcc Jackhmmer:Q9UJT2 Family TSKS, testis-specific serine kinase substrate, is expressed in the testis and is downregulated in cancerous testicular tissue, in comparison with adjacent normal tissue. TSKS expression is very low to undetectable in seminoma, teratocarcinoma, embryonal, and Leydig cell tumours, while high in testicular tissue adjacent to tumours which contain pre-malignant carcinoma in situ [1]. Recently it has been shown in human testis to be localised to the equatorial segment of ejaculated human sperm. The finding of a TSKS family member in mature sperm suggests that this family of kinases might play a role in sperm function [2]. TSKS is localised during spermiogenesis to the centrioles of post-meiotic spermatids, where it reaches its greatest concentration during the period of flagellogenesis [3]. 27.00 27.00 87.10 87.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 558 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.87 0.70 -6.35 3 39 2012-09-03 10:39:39 2012-09-03 11:39:39 1 2 26 0 19 39 0 447.70 73 95.07 NEW ESRopLuPEsPGusooPsKGhsKKKKAVSFHGVEPRMSHEPM+WCLNLKRSSACTNVSLLNLAAsEhsDSosp-oTTDD..PshsASGspSsPl...PPuuPo.sWAsDDPDIo-lLsGVNSGLLRAKDSITSLKEKTTRVNQHVQoLQSECSVLSENLERRRQEAEELEGYCoQLK...........ENCRKVTRSVEDAEIKTNVLKQNSALLEEKLRFLQRQLQDEsPRRQEuELQELEQ+L-AGlSRpsLuhousSouss.sPPsSE-cPsPP.........cuLulAthsupsRAGEGsEVS.pEhQKVoAGL......EELR.........REVSSLTARWaQEEGAVQEALRLLGGLGGRLDGFLGQWERAQREQAQAARGLQELRGRADELCTMVERSAVSVASLRuDLEGLGPVKPlLEELGRQLSSuRRGS-LSMsLDR..sGSCuRCuSQ.GQQLSTESLQQLLERALTPLVDEVKQRGLA.PACPSCQRLHKKILELERQALAKHVRAEALSSTLRLAQDEALRAKNLLLTDKMKPEEKVAoLDYLHLKMCSLHDQLSsLPLEGSssohGGGSuGGAPPKRGGPsPEQ ................................SpsQLsPEsPtusos.sKGIsKKKKAVSFHGVEPpMSHpPMHWCLNLKRSSACTNVSLLNLA.Ah..Es.DSoGTDSTsED........SG.hsLss...PPuSPo.PWss-DsDIoElLSGVNSGLV.RAKDSITSLKEKToRVNQHVQoLQSECSVLSENLERRRQEAEELEGYC.QLK...........ENChKVTRSVEDAEIKTNVLKQNSALLE...........EKLRYLQQQLQDETPRRQEuELQE.E..............Q.K....EAGLSRpGLuPss.s.GCs.GPPGSP-cPsRP.........RuLsPuGWGMGsRAGEGPhlSEQ..ELQKV.sGl......E.ELR.........REVSSLTARWHQEEGAVQEALRLLGGLGGRlDGFLGQWERAQREQAQTARGLQELRGRADELCTMVERSAVSVASLRSELEGLGPlKPILEEhGRQhQsSRRGsDLSMNLDRu.QGsCuRCASQ.GpQLSTESLQQLL-RALTPLVDEVKQRGLs.PACPSCQRLHKKILELERQALAKHVRAEALSSTLRLAQDEALRAKNLLLTDKMKPEEKhAsLDaLHLKhCSLHDpLSpLPLEGSTGoMGGGSuuGsPsKpGG.ssEQ..... 0 1 2 5 +15212 PF15359 CDV3 Carnitine deficiency-associated protein 3 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9UKY7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 128 and 251 amino acids in length. CDV3 is also known as TPP36 - tyrosine-phosphorylated protein 36. The function is not known. 27.00 27.00 30.90 30.00 25.60 25.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.82 0.71 -4.06 9 101 2012-09-03 10:40:36 2012-09-03 11:40:36 1 1 63 0 53 93 0 118.10 51 57.56 NEW KE--EWKEFEQK.EVDYSGLRlQuhQh..sEKE--EsEK+--.t-shEEsGth........uuDKSSGPWNKSA.A.sPsAssh..VpEs.EP.spsuGVYRPPGARhTs.pRtssQGPPEIaSDTQFPSLQSTAKHVEoR .............---EWKEaEpc.chDYSGL+lQshQI..s.EKE---s.EKc.p-.su-sh-EsGus.........Gs-Ku.S..G.PW.NK...........oA.s.......s...pAPsussh.......VsEsP.cP...sh..TuGV....YRPPGA......Rhos............sR+ss.........Q...G...P...PEIaSDTQFPSLpSTAKtl-oR.............................................. 0 13 17 31 +15213 PF15360 Apelin APJ endogenous ligand Coggill P pcc Jackhmmer:Q9ULZ1 Family Apelin is among the most potent stimulators of cardiac contractility known. The apelin-APJ signaling pathway is an important novel mediator of cardiovascular control [1]. Apelin is an adipokine secreted by adipocytes where it is co-expressed with apelin receptor (APJ) in adipocytes. It suppresses adipogenesis through MAPK kinase/ERK dependent pathways and prevents lipid droplet fragmentation, thereby inhibiting basal lipolysis through AMP kinase dependent enhancement of perilipin expression. It also inhibits hormone-stimulated acute lipolysis through decreasing perilipin phosphorylation. Apelin induces a decrease of free fatty acid release via its dual inhibition on adipogenesis and lipolysis [2]. As a vaso-active and vascular cell growth-regulating peptide Apelin is a target of the BMP pathway, the TGF-beta/bone morphogenic protein (BMP) system - a major pathway for angiogenesis [3]. 27.00 27.00 27.00 27.00 27.00 27.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.76 0.72 -3.75 8 19 2012-09-03 10:41:47 2012-09-03 11:41:47 1 1 18 0 11 19 0 54.80 67 72.82 NEW GPLhpssDGK-LEE.GolRpLVQP+suRsGsGsWQGGRRKFRRQRPRLSHKGPMPF .GPLhpssDGpsLEE.GNlRaLVQPRsuRsGPGsWQGGRRKFRRQRPRLSHKGPMPF 0 1 2 3 +15214 PF15361 RIC3 Resistance to inhibitors of cholinesterase homologue 3 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z5B4 Family RIC3 is a protein associated with nicotinic acetylcholine receptors (nAChRs), neurotransmitter-gated ion channels expressed at the neuromuscular junction and within the central and peripheral nervous systems. It can enhance functional expression of multiple nAChR subtypes. RIC3 promotes functional expression of homomeric alpha-7 and alpha-8 nicotinic acetylcholine receptors at the cell surface. 27.00 27.00 27.10 27.30 26.70 26.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.10 0.71 -3.98 27 152 2012-09-03 12:43:49 2012-09-03 13:43:49 1 4 75 0 80 140 0 158.40 28 37.52 NEW lVL.ClulllP+hh.....hsts....pp.....p..............hspssssphsPhhp........tts.ss..ss.t...ss..t.s+scshcuhspscsts.tt.ts.....supppuhhupllPlYuhGllLallYhlaKl.p.+scp..p.ppppps.............................s.t...........pss.........t...+pc..hs-hcLspLQ-RLppTEchMppIlo .......................................................................................hhs.............................h..hths...P.hhc...........................pts.ss...ss.s..sthh.tRst.h.uht.shGuu.stt.ts............tsptpuhhG..lhP...l...YshGIllahlYhLhKl..ptKp..s.pc.ttss..............................................................................................tt.............tp...........hcpt...sphcLtpLpp+LppTEptMppll.s................................................................................................................ 0 16 26 50 +15215 PF15362 Enamelin Enamelin Coggill P pcc Jackhmmer:Q9NRM1 Family ENAMELIN is involved in the mineralisation and structural organisation of enamel. It is necessary for the extension of enamel during the secretory stage of dental enamel formation. The proteins are expressed in teeth, particularly in odontoblasts, ameloblasts and cementoblasts. 27.00 27.00 34.50 34.50 16.30 16.20 hmmbuild -o /dev/null HMM SEED 906 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.82 0.70 -13.77 0.70 -6.46 36 250 2012-09-03 12:45:25 2012-09-03 13:45:25 1 2 199 0 21 246 0 720.00 60 93.29 NEW PYYSEEMFEQDFEKPK.EcDPPKsESPs.TEPouNSTVsETNSTQP...s...sGGSQGGNDTSPTGNss.G.NsGsNPssQNGlhssPsVNlSG...QulPRSQIPWtPsQPNIaENYPNPN...lRsFPsGRQWpPTGTshGpRQsGPFYRN.psQRGspWNSFAhEuKQsA+PGNPsY+KsYssToRuN...PNaAGNPANFRRKPQGPNKpshGTNls...PluPKp.GTls+sEKlQNP+EKsl.GQKERhVhPT+DPoGsWRNSQsYG.lN.KsNYKL..PpPEGNh.lPNFNSlDQ+ENSYYPRGDSRRsPsSsuQsQoQNh.PKGIsLEPRRIPYEoETspPELKHuTapP.hYPEEIPSPsREaFPsGRNTWNcQEISPsFKEDPt+QEcpLPa......PShGSRGsVaYsEYNPYsPR....ENSPYhRSNTWDERsDSPNTh...GQPcNP.Y.PhN...TP.D.QK-TlsYNEEDPlDPT.GDE.FPGQs+WG.E.E.suFKtuPTVRaY.EGpQYs...S...NQP...KEYLPYSlD.NPsKPREDhPYuEFYPWsPDEsFPSYNsuPTloPPsEsRGYYs.NNAltQEESolFPSWNSWDa+IpsQuQKE+pPYFNRNaWDQuTNLHKsssssPsQKENpPYSSNsPAGLQKNPTWpEG.EN.LNYsM.QITRLNSP-+-HLuFsDllPQsYPssQcEsaLFH..SQRusCCAGuSsG.+D.NPLALQDYTPSaGLsPGEsp-osPhYTEuSHTKHARalISPTSILPuQ..RNSSEK+L..PGESQs..P..FRDDVSTL++NTPCSl+sQlGQtGhh.sFPEAuSLQSKNTPCLKuDLuGDGNN.lLEQIFEuNQ.lNERTl.sLTPEQLlIsTP-EuPKPEuIQSEl.QGsEGE+QQpR.PPSILQlPCFGS+LsKa+sSSTGTPSopGRpGsaDGDssMPTEpP.sTLsGLATGEQFpulNVD.LNA ...........................................................................PYYSEEMFE.QDFEKPK.EcDPPKsESPs.....o-PosNoT..ssETNSTQs...s...stGuQGGNDTSP....TGsss...u.NsssNsssQNGl.s.PsVNsSG...QssPto.QlPh......t..PuQPNIaEN.aPNPN...hRsFPsGRQWp...............TGo.shGpRp..suPFYRN...........sQRu..pWNoaAhpuKQs.s+PGNPhY+KsYsss.uRuN.....PNaAGNPuNhR...R...KPQuPNKpPh.sTNs.u...sh.uPKp.s.T.ss+sEplQNP+EKsh.uQKERhlhPT+..sPousWRNSQpYt.sN.K............sNYKL..P.PEush.sPsFNSlDQ+ENSYYPRsDS..R+sPsSssQhQsQNh.PKGlhLcP+RhPaEoEsp..pPElKHuoap.P.s..............Y.......sEthPsss+EpFPs.G+NTWNpQE..ISPsFKEDPGRQEEpLPH......PS.hGSRGsVaYP-...YN.PYsPR....ENSPYhRuN.oW-ER.sDSPNTh...tQscsPhY.PhN...TP.D..KcTlsYNEEDPlDPT.GDE.FPGQsRWG.-E..SFKt..sPTVRaY.EG.cQYs...S............NQP...KE...YLPYSLD.NPsKPRED.FPYuEFYPWsPDEsFPSYNsuPThs.PPsEspGYYh.ssA.htp.E.Eus.h.PSWsSWDp+hpspspcEptP.YasRNhWspsspLpKs.sp.spQ+cNpPYsss.PsG...LQ+NPhWpEu.Es.LNYsh.QhsRlNsP-tpp.sF..-hls.sYPssQpEuphFH..SQRusCCsGushG.+-.s..PLALQDYTPuaGLsssEst-ssPhYo-sSHoKasR.hlSPsuh.ssQ..RNSSEKp......tEsts..P..FRDDsuoL++ssPCShps....plsQhthh.shsEssS.Qs+NhPCL+sDLuGDGss.lLcplF-ssQ.hsERTs.sLsPEQLlh...ssP-EsspPEsIpsEh.ttpEschpQpR.sssIhplPCFsSplsphhsSosGsPsu.uR.sshst-.h..hPTc.P.sohstLATttphpshNlD.ls....................................................... 0 1 1 6 +15216 PF15363 DUF4596 Domain of unknown function (DUF4596) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9UPP5 Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. There is a conserved ELET sequence motif. There are two completely conserved residues (S and E) that may be functionally important. 27.00 27.00 37.00 37.00 25.90 18.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.28 0.72 -4.04 10 87 2012-09-03 12:47:34 2012-09-03 13:47:34 1 3 38 0 49 63 0 45.80 71 4.71 NEW -ThuRWuELhSPLDDSosSl.T.VoSFSP.ED.ssSPQGEWTIlELETaH ...pLuRWuELhSPLD-SpASI.T.VsSFSs.-D.suSPQG-WTllElET.H 0 3 8 19 +15217 PF15364 PAXIP1_C PAXIP1-associated-protein-1 C term PTIP binding protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BTK6 Family This protein domain family is the C-terminal domain of PAXIP1-associated-protein-1, which also goes by the name PTIP-associated protein 1. This family of proteins is found in eukaryotes. The function of this protein is to localise at the site of DNA damage and form foci with PTIP at the DNA break point. Furthermore, studies have shown that depletion of PA1 increases cellular sensitivity to ionizing radiation. Proteins in this family are typically between 122 and 254 amino acids in length [1]. 27.00 27.00 42.40 42.00 24.70 24.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.17 0.71 -4.27 21 81 2012-09-03 13:14:09 2012-09-03 14:14:09 1 1 71 0 53 71 0 129.70 44 61.23 NEW -WplssSD-Eh......................sW.PsPpEItcL...Ychl...spstsLELpaps.sRRs....PoPp.tsp..csppsp....pttctt.-pcs.ps.oEFDF.DD-.s..p............sphhsRRpssuo..ttSspK+pAphDcVLscM+RH+..........+l.ppthpps ....................-WslsCSD-Eht.s......p..................sWhPsPpEIpcL...YEhL...sspssLELQhchhsRRs....PoPEspsptpcs-c......cscEpt.Ec+PphP.oEFDFDDE..s.osp..........sshhsRRRTPGo..ss+Sp.KRpA+hDKVLSDMKRH+..........+l.pppI...hcp......... 0 16 20 35 +15218 PF15365 PNRC Proline-rich nuclear receptor coactivator Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NPJ4 Family The PNRC family, proline-rich nuclear receptor coactivator, is found in eukaryotes, and is approximately 60 amino acids in length. There is a conserved YAG sequence motif. 27.00 27.00 27.10 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.06 0.72 -3.96 36 209 2012-09-03 13:16:56 2012-09-03 14:16:56 1 1 132 0 140 210 0 56.20 38 22.51 NEW ssssYAGu+FopsPsPosLPhPP.cWhtsss....................p..shpthop.............pL+tlL.clp ....t.tppYAGup.F.us.sPuPSsLPhPs.ahhsss..............................................t.stp.hs.........................................pL+tlL.pl....................................................... 0 27 56 92 +15219 PF15366 DUF4597 Domain of unknown function (DUF4597) Coggill P pcc Jackhmmer:Q96GX8 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 63 and 76 amino acids in length. There is a conserved TPPTPT sequence motif. 26.50 26.50 27.60 82.30 25.30 18.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.15 0.72 -4.39 4 25 2012-09-03 13:30:01 2012-09-03 14:30:01 1 1 22 0 15 24 0 61.10 81 62.28 NEW MCVSS.suSp-EAPVLsDKHLDVPsIIITPPTPTGMs.lPRDSp+sVWhDEsGShsDDGElDsE MCVSSsSSSHDEAPVLsDKHLDVPsIIITPPTPTGMM.LPRDSppTVWLDETGSCPDDGElDPE.. 0 1 1 2 +15220 PF15367 CABS1 Calcium-binding and spermatid-specific protein 1 Coggill P pcc Jackhmmer:Q96KC9 Family CABS1 is a family of proteins found in eukaryotes. It is also known as NYD-SP26. It binds calcium and is specifically expressed in the elongate spermatids and then localised into the principal piece of flagella of matured spermatozoa. 27.00 27.00 50.90 50.90 18.90 18.50 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.29 0.70 -5.60 9 23 2012-09-03 14:10:36 2012-09-03 15:10:36 1 1 20 0 11 26 0 382.60 67 87.66 NEW MAEDG.PKIYSHPPTESSKTsTEATIFFGADNTIPKSETTITSEGDHITSVNDahhEuDFSTTssNKLTssKE+lKSEDDlEu.hlKSoThhEKEITTLTuTsNShAs-SITENFIPVKIGNISSPVATVSLIDFSTNhAKEDILLsTlDsGDc-Vs.hTSElSGol+-STsslsDTPsLPscpscsDs...sSSsKSsssADtsVQlTDS.lPEAEIsPoTE+NlTTIPDITslTEEKITEIDLIlsEDDPssVsKLTDSDEEKFITVFELTsoAE+DKDNPEDh.LsDEESTDElNVWMER-pssEAEoHsVLLTAVESRYDFlVPsSlAhNlpE-Ss..T..c..EDLsENspsESVTKsTEshS....tsTs..Ds.spcEDs.TTEoGlFKLLKE-PDEFhI .MAEDGLPKIYSHPPTE.SSKTPTtATIFFGADNsIPKSETTITSEGDHVTSVN-YhLESDFSTTTsN..............KLTssKEKLKSEDDhtochIKSTT.HLpKEIToLTGTsNShs+DSI...TENFhPVKIGNISSPVsTVSLIDFSTsIAKEDILLsTIDoGDtEls.ITSEVSGTLKDSoAulADoPAhPccKDEuDhsNYsSSlKSNVPADEAVQVTDShIPEAEIPPusEcsFTTIPDITALpEEKITEIDLsV.EDDssAVupLTDSDEEKFITVFELTTSAEKDKDN.EDTLLTDEEST-GANlWMER-oANEAETHSVLLTAVESRYDFVVPASlATNL.sE-SST-..EDLSEs-pTEoVsKlTEPFS....uTTSlLDTPsaKEDTSTTETsIFcLLKEEPDEFMI.......... 0 1 1 2 +15221 PF15368 BioT2 Spermatogenesis family BioT2 Coggill P pcc Jackhmmer:Q96M83 Family BioT2 is a family of eukaryotic proteins expressed only in the testes. BioT2 is found abundantly in five types of murine cancer cell lines, suggesting it plays a role in testes development as well as tumourigenesis [1,2,3]. 27.00 23.80 57.00 23.80 24.70 19.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.08 0.71 -4.49 9 46 2012-09-03 14:16:37 2012-09-03 15:16:37 1 2 17 0 13 36 0 153.80 67 46.74 NEW MKsuKa.hsoSsKl.ssVPELs.KKGlh.s.shSscsKEK+SuKll+sKlEPMVLRS.PPTGESllRYALPIPSSKT+-llucDEhl++IT++LKMlVSTLE-TYGhs..hpsGEcshsKsEpE-h...sLSVGDDlsSFLhpCSphAuQLEEAVKEEcsILESL..FKWFQtQVNQMEE ...MKPsKHLLToSsK..ssVPtLshKKGLh.N.PlSPchKEKHNAKLl+DKIEPMVLRS.PPTGESIVRYALPIPSSKTKsLlsc-EMItKIhKHLKMVVSTLEETYGtC..spNGEcshlKpEpEEL.........oLSlGDDhsSFLhhCSQFAuQLEEAlKEEpN.....ILESL.....a.KWFQhQVNQMEE............ 0 1 1 1 +15222 PF15369 KIAA1328 Uncharacterised protein KIAA1328 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86T90 Family This function of this protein family remains uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 28.00 28.00 23.80 23.20 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.40 0.70 -4.62 8 73 2012-09-03 14:28:52 2012-09-03 15:28:52 1 4 37 0 31 57 0 214.80 46 55.11 NEW DLCPEDKRRIANLIKELARVSEEKEVTEERLKsEQESFEKKIRQLEEQN-LIIKEREALQ...QYRECQELLSLYQKYLSEQQEKLohSLucLuAA+tpEQplSu+KSshpsus.c.....LDGSYLulAtspshhpsp++sKuuspupuupohsshpNs.sh+sphhppPp-shcc.P.Es...RoCssctssh+.tsst.hcp.....h...-.+hpEh.shpPsssocsCuH+putsuspl+-upasophu.ppstophcoCsasphshsS.hptpsh.ts.ETpluKplSE-RRQQLLLQKMELEIEKERLQpLLAQQEsKLLLKQQQL.HQSRLDYN ........................................sRlscEKE...pch+sEppphE..lp.Lcppp.hh.pE+..h............................................tts...pp..osccs..pps..p.....LDGSYLulutst.h...ptt.pss..s.ss.s...hpNp.s.p...h..spcthtc.s.cs...tsss.t.sshc...s..hc..........-hphpEh.phpss.p.psCu.c+hu..ss.spps.pstphs.phstop.coCsasthshsS.hpstsh.ts.Ep..t+p..E-++ppLhhQKhpLEhE+E+LQthLspQEtKLLhppQQL.pQSphpYs.............. 0 5 9 15 +15223 PF15370 DUF4598 Domain of unknown function (DUF4598) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N5I9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 159 and 251 amino acids in length. 27.00 27.00 27.60 27.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.51 0.71 -3.78 7 138 2012-09-03 14:34:31 2012-09-03 15:34:31 1 2 125 0 98 135 0 109.00 27 54.16 NEW sLLSRLSAFLPpMKsAN--LpREItAGR..uKDlpLD-lD-.p-GQ....YIEMNLGLGVLEEKR....ssDssusstp.scupc......s.stpts...pDSslL-+LhGp+csossp.......KPoIpEhs ...........................lLs+lpsFLPphtpAN-cLccchss.us..stchsI..E..sl--..scsp......................hIEM............sluLG.lh-p..pp...............stcs.ppssp.spp.sspc............ts.tppp.p.ttpt...........................................t........................................................................... 0 32 44 69 +15224 PF15371 DUF4599 Domain of unknown function (DUF4599) Coggill P pcc Jackhmmer:A6NFA0 Family The function of this family of eukaryotic proteins is not known. 27.00 27.00 28.70 28.10 25.00 25.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.12 0.72 -3.92 56 185 2012-09-03 14:35:58 2012-09-03 15:35:58 1 4 23 0 114 144 0 88.00 33 9.50 NEW tRs+pRp+s...ssh+u....pR..p..spcEsE.cspcLlSlLcS.h.u...........hLspc...s...shRpLLCsDPsCpVCNssssElp.....pLLhtEshp.s.uss.ss.usssu.sus ...........................t+s+pRt+s...ts.+u....pR..pspcEhE.ch.......pcLlSlLpS.h.G...........hspcu...sh+pLLCsDPssclCpsssschp.....phh.tEshp.s.uss.sluPhsusss....................... 0 15 15 15 +15225 PF15372 DUF4600 Domain of unknown function (DUF4600) Coggill P pcc jackhmmer:A6NNP5 Family \N 27.00 27.00 29.60 29.60 26.40 26.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.74 0.71 -3.93 11 65 2012-09-03 14:38:42 2012-09-03 15:38:42 1 3 40 0 35 65 0 99.30 45 50.06 NEW cEuNEWKTRYETQhElNcQLE+QIhhL+c+lEph+s.NspD..+hu....SlRs............h-ph..ospuLpphlKpLE+EKptLpsQL+-hEhRL-QEuKAY+Kss-ER+tYlsElspsp.ss.php++Qphs...thpcsscs.scs .......s-hphpYcs.hEhNppLpcph..hppphpph+u.sspD.ths....sl+s............h-pM..ss..-SLNpLLKQLEcEK+sL-sQlK.hth+LEQEuKAYpKhssER+sYLuEhopsS.s..phs++Qphsth.chpEp...s.............. 0 9 11 15 +15226 PF15373 DUF4601 Domain of unknown function (DUF4601) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NA69 Family This protein family is a domain of unknown function, which is found in eukaryotes. In humans, the gene encoding this protein is found in the position, chromosome 19 open reading frame 45. 27.00 27.00 31.60 27.50 26.20 26.30 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.46 0.70 -5.62 11 46 2012-09-03 14:42:42 2012-09-03 15:42:42 1 2 32 0 25 42 0 319.10 42 85.36 NEW LFppDsRWsupE+VSEs+RAF..PPPs..shp..pEpsRERohAhQuSsL+lHADuRstssLSoARusYGWPElPs+A+EpIRGARLlFDRDSlPsGDR-KLRIPsTTaQshFPP+DA.sPQPRAPspHL.GGPNsL+W-YcpQ.-sTSYQ+QFQALPuPPALMCKRASSSVcLGDsKIGYuPhCSEQKpAYpPQsLPPD..RYDKAQAuAHIHpVNIpPGDuLF+DRTTpu-HFYu+EP.EPFVLHHDpTPESHILcGNWsPG.PGSLsTShphFYG.QPsPsTpPPuRHlsH-pLQSHl.sLG-PcLLGpFFQToMuoDYsPs....phs.......p.p+AsNL+LhpSpLPpsouEhDFLToN.QpMlKPHphssASsTEEhLQRCKYSHlEPPLGpQRFFSTQYcDEFPaKYQGPsVL+.hushQESHVPLGTs+phGChtcKlDPpAPQhPhYPCPSQ ..........................................s.....a..................t.......sphph.ts.p.t...o.hp.t..h.t...t.....ttst.hatpDohPsGD+p+lchP.osaptha.sa-h..psts.+sPshHh.uGs.sslphs.hptp......tToYpppFp..uh.usPAh....p.+ch.t..Spl..hGD.phsht..so..pphhts.t..s.t..R.Ys+tpAsu+lp.ss..ltsGDshh..c.p..TThscpFh.spcs.-Phhlc+c.ppstSpIhcGshsPG...suu...ls....T...hphFau.psss.s.......spPsscclspc+Lp..SHV.pLG-scLhtpFFpTohsoDYhss....ph...........p.p+s..sshph..Stl..s..ssp.salThs.pthh.hPH.t..h..t..hoc-hlQ+s+hSHh.sPhst.+aFSTpap-pas.K.Y.uPhs.p..tp.Qcu.lPlGo.t............................................. 0 5 7 11 +15227 PF15374 CCDC71L Coiled-coil domain-containing protein 71L Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N9Z2 Family The protein family, Coiled-coil domain-containing protein 71L, is a domain of unknown function, which is found in eukaryotes. 30.00 30.00 30.40 32.10 27.60 29.80 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.17 0.70 -5.14 18 76 2012-09-03 14:45:56 2012-09-03 15:45:56 1 2 35 0 40 87 0 232.80 33 89.74 NEW sEEKAVHSWSR..ISoAGpKALEEAL+VFNPMSpDLosTEsQLVAFLQGLR--GFQPTILRSKDVYGYSSCTAssPSQTKhpspssssss..sSsPs+ssppuhthssu+ustlshslssp.uKsustslsK.tsoTNLLLsSLKQopuspupssshuFPsphYPGVYPAMRLSVVLEALVP..LKssssCLpuKh......pphtLulusSslKLhKssu.s..ps..Ks.......hpsKu.pplhp+ust..............GPptsshppSpssKuoG.LsGhhhpsuSphssptspspt......................................................................................+spt.s.sphsh+stpsp.EshG.pKRK+s-EsK-hss+K+s+.hP....s.K.....splspuThsLL+FpsIKVs+....psSDDEVRp+AQ+ILRVNLSPVIRlQPL .........................t+sVhuhup....ssuppsht-AhhlF.PhSp-h.so-tphhsFlptL+cp.hpPhlLpStDVYGYsSsps....................................................................................................................................................................................................................................................................................................................................t....hh..t............................h.tt.........t..........us..phhph.sI+Vst....p.S.stsRppApplLpVNLpPhlplp.h............. 0 3 5 10 +15228 PF15375 DUF4602 Domain of unknown function (DUF4602) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NDD1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 173 and 294 amino acids in length. This family includes Human C1orf131. 27.00 27.00 27.00 27.00 26.60 26.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.55 0.71 -4.10 42 158 2012-09-03 14:46:48 2012-09-03 15:46:48 1 3 126 0 111 162 0 137.30 22 56.05 NEW VsFps.p..........p+cpp.........................p.stspspst.t...t..tp...................ptp.tpchslc+.s.......+h-VpcFGhouapt.cp++phctppslpLGAKsP.KpphlNYKhlhpppKppKtcccc.ctphtptsshhptppp......tpppppc+ppcpcsss .....................................................................................................................................................t.tt.pptpt..t.t........t.................................ptpppchslcK.s.........+hEVccFuhos.h..st.tc++phEpccsltLGAKPs..KpphlNYKhLppphKcpKttccc.ccch..tptss..hhppppp..........ptpcpppp.ppp.....t........................ 0 43 58 85 +15229 PF15376 DUF4603 Domain of unknown function (DUF4603) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q92628 Family This protein family is a domain of unknown function. In particular, this domain lies at the C-terminal end of a protein found in eukaryotes. 27.00 27.00 39.70 38.70 19.00 19.00 hmmbuild -o /dev/null HMM SEED 1286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.37 0.70 -14.36 0.70 -7.37 11 83 2012-09-03 14:47:49 2012-09-03 15:47:49 1 3 49 0 49 72 0 742.60 54 88.25 NEW GPlSsSEMSLL+ALGPVQTWLGQELEKCGIDAMIYTRYVLSLLLHDSYDYDLQEQ..ENDIFLGWEKGuhKKWGKSKKKC.oDLoLEEMKKQAAVQCLRSASDESSGIETLVEELCS+LKDLQSKQc.EK..IpKKhEGS.SPEs-.SPoAKDQVEMYYEAFPPLSEKPVCLQEIMTVWNKuKlCSYSSSSSSSTAP.TSTDTSSPKDCNSEuEssKERsspA.s...ssspE+uQ.pRSKpEKEN+hssuss...EEK.sshhKKQsRH+SEGKhRPRSWSSGSSEAGSSSSGNQGEhKsu.hKhVKVRHKsREs.RNKKG.RuGQsRhshKss-KsER+stuG.....sSuSuuuGul+.QLCKRGKRPLKEltRK-uGspEuK-lhsEu+s-KEYKEEPLWYTEPIsEYFVPLS.RKSKLETTYRs+p-ssshs..SEAVE-LSEuV+GLCISNsNlH+TYLAAGTFIDGHFVEMPA.VlsEsh-LsGTShCS.PEDspaLDDlHLSELTHFYEVDIDQSMLDPGAS-shQGESRILNMIRQKSKEcsDFEAECCIVLDGhELQGESAIWoDSsSSlGAEGhFlQ.DLuNLAQFWECCSSSSSsDADGESFGGDSPlRLSPlLDSThhsschLAGNQE..LFSDssEGS.GlNSs.FSVFEVQCSNSVLPFoFEoLNLGsEN.TDSsSosNhLGKTQSRLLIWTKNSAF-ENEHCSNLSTRTCSPWSHSEETRSDNETlNlpaEESoQFsuEDINYVVPRVSusYlDEElLDFLQ--oCQQppcoLGEhPo..LlFpKKSKLESVCGIQLEQKsEsKsaETsps.sssuS.pGDsYSSGVIKDIWTshuDRsSsAsl-s-ch-..-cLFSsDVNsY.CCCLDsEAKhEslQ-.spKAVQRSEYHLWEGQKtshEKRAFlss-LSKVDG.GDYTTPSKPW-lspDKE.sSFILGGVYGELKTFuSDGEWAVVPPucs+GSLLQCAA.......SDVVTIAGTDVFMTPGNSFAPGHR.LW+PhVSFEQuEQs.KuG-sGLNKGFSFIFHEDLLGuCGNFQsEEPGLEYsFSSFDLsNPFSQVLHVECSFEPEGIASFSPuFKPKSILCSDS-sEVFHPRICGl-RTQYRAIRISPRTHFRPISASELSPGGGSESEhESEK-EuSlPlsSQsDVF-DPQADLKPLEEDAE+EGHYYGKSELESGKFLPRLKKSGMEKSAQTSLDSQE-SuGlLPhscQs.CL-Cshp-olpss.hESScusCKlhpppppEhschCSCcsuCphPshp-s..luust.hpEFPlLNsDlQshstuQpcpsWWpKALaSPLFPuSpC ...............hsss-MS.LpuLGPVQoWLGQELEKCGIDAMIYTRYVLSLLL+DSYDYD..L..p-Q..EpsI.LGhtc.......Gsh++.hs...+u++..Ks...shs..hE-...hKKQAAVQCLpSASD-sS......GIEoLVEELCs+LKDlQ...scQ........c...EK...hp+K.-tu.sP.Eht..S.ss+...DpsEMYY-AFPsLspp............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 8 10 24 +15230 PF15377 DUF4604 Domain of unknown function (DUF4604) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96AT1 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 141 and 174 amino acids in length and contain a conserved LSF sequence motif. 27.00 27.00 28.70 28.10 23.50 23.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.42 0.71 -4.02 68 196 2012-09-03 14:49:01 2012-09-03 15:49:01 1 3 171 0 140 196 0 148.90 26 88.71 NEW ppsloYsc..sEPsFLp+h+.sphGhppss..................s+cpth......................sscscss.s.-..cp.....D-tPpVVllc.ss.-Lot.-Ehpthhpt...........................ttppppptsstsuclha+ps................pKRpsscthssts.......................t.ttt..............tt.tp+ppp+pp+..tpshLSFs--- ..............................tpsloYsc..sEPuFLp+h+.pphGhppss.....................spcp.h........................sscscsss.-.......cc.........D-tPpVVshc.ps..cLot..EEhpthhpp..............................................................tttptppp...ssssuclhh+ps..........................................tKRps..schh...ss.s......................................ttt.t.............ppp.ptp+pppKphK..p..LSFs---....................................... 0 42 66 108 +15231 PF15378 DUF4605 Domain of unknown function (DUF4605) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96D05 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 82 and 137 amino acids in length. 27.00 27.00 27.30 27.00 24.40 26.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.63 0.72 -4.27 13 84 2012-09-03 14:50:09 2012-09-03 15:50:09 1 2 46 0 54 67 0 59.00 54 46.72 NEW olFuplNcpLlshGFPphphGs+lVEPlsslhhhhlLhhlGlpGLLLVGllhlVhhhs.Qc ...o.Fu-LN+pLlNMGFsphahGp+lVEPVhsIhhhhhLhhLGlpuLhLVullhLV.hhsQp.... 0 8 11 25 +15232 PF15379 DUF4606 Domain of unknown function (DUF4606) Coggill P pcc Jackhmmer:Q96LL4 Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. 27.00 27.00 31.90 31.50 22.10 19.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.35 0.72 -3.96 13 38 2012-09-03 14:51:17 2012-09-03 15:51:17 1 1 33 0 26 26 0 100.60 56 38.32 NEW CsVPcELlNRIahKNhRsslKQsusu+pHlsSQCPsCN+KRAELAQusFLRpKKTLLEShLLpEKIDEHLaTKDhLThIGEAHpuLPRLSDDP+IIWKRLsEKu ...........................CTVPDELlNRIYhKNhRso.KQhusAKQHlSSpCPsCN+KRAELApuAFL+pKKTLLEShLLQEKIDEHLaT+DFLThIGEAHp.s.hPRLSDDPRhIW+RLspK...... 0 5 6 9 +15233 PF15380 DUF4607 Domain of unknown function (DUF4607) Coggill P pcc Jackhmmer:Q96LP6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 207 and 359 amino acids in length. 21.00 21.00 95.20 95.10 20.40 19.10 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.82 0.70 -4.94 4 19 2012-09-03 14:52:14 2012-09-03 15:52:14 1 1 13 0 6 20 0 243.90 63 81.77 NEW hAhKRLL.sTpQhllPRs.sVSTsSF---SYtEhtspPsPSSEhDEsPhhFTsctEhp+csRtsP+QAWSSshLEQ.hst+PshsHSVNPlHLEAtGhHIpRHsRPpsQPLsssKtsSGSsARPaTAIGLCRRSQTPhA.QSsu.SsoE.E.EERhAAPAGu.AHPD.lQSRLLGAoGssVs+GAVAMAPEMLPKHPHsPccRRPRADoSLHGNLAGAPLPLLAGASTHFPSKRLIKVCSSAPPRPoRtFHTVCSQALSRPVVNAHLH ......huhKRLL.pTCQYIVPRS.oVSTsShDEE..ShtEhpSSPuPSSETDEAPLIFTAcGEsEcRARGsPKQAWsSSFLEQ.hspKP.shs+SVNPlHLEAtGIHIsRHTRPKuQPLSssKpNSGSuARPhTAIGLCRRSQTPsA.QSsusSsoEhE.EERhA.sPAGu.AHPD.lQScLLGASGNPVGRGAVAMA.PEMLPKHPHsPccRRPpADoSLHGN.....LAGAPLPLLAGASTHFPSKRLIKVCSSAPPRPoRRFHTVCSQALSRPVVNAHLH.. 1 1 1 1 +15234 PF15381 DUF4608 Domain of unknown function (DUF4608) Coggill P pcc Jackhmmer:Q96MR7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 85 and 157 amino acids in length. 25.00 25.00 26.90 116.20 22.20 16.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.57 0.72 -3.84 3 10 2012-09-03 14:53:25 2012-09-03 15:53:25 1 1 8 0 2 4 0 72.90 90 60.65 NEW SSAETLRTVSRRSVPSSSMPYLALAHSRVSSLNHAASVDGsuTSHRNVADSFSRTSRSCSRFLKGTAGSAGR-Gs SSAETLRTVRRRSVPSSSMPYLALAHNRVSSLNHAASVDGWGTSHRNVADSFSRTSRSCSRFLKGTAGSARR-..s.. 0 1 1 1 +15235 PF15382 DUF4609 Domain of unknown function (DUF4609) Coggill P pcc Jackhmmer:Q96N06 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 70 and 139 amino acids in length. 27.00 27.00 122.40 122.30 19.60 17.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.32 0.72 -4.27 7 23 2012-09-03 14:54:21 2012-09-03 15:54:21 1 1 18 0 10 23 0 69.40 74 58.25 NEW EKPDsKtKSSKKKsVIPQIIITRASsETLlShuS.sS-EQRTI+EpADWGPYtRHRNPSTssAYssQs+E .EKPDVKQKSS+KKsVVPQIIITRASNETLlSsSSoGSDpQRTIREpcDWGPYtRHRNPSTsDAYssHhKE.. 0 1 1 2 +15236 PF15383 TMEM237 Transmembrane protein 237 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96Q45 Family This protein family is found in eukaryotes. The function of this protein is to aid the production of new cilia in ciliogenesis. Mutations in the protein cause a disease, named Joubert syndrome type 14 (JBTS14) and also affect cell signalling using the Wnt pathway[1]. Proteins in this family are typically between 203 and 512 amino acids in length. There are two completely conserved G residues that may be functionally important. 27.00 27.00 27.50 31.90 25.50 26.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.54 0.70 -5.25 22 115 2012-09-03 14:55:22 2012-09-03 15:55:22 1 7 77 0 76 122 0 207.20 32 51.64 NEW sstpt.sscltscs-Dhhs..........-.t....pp.s.hsssph.SQP............ss+laVE+s..pcFpssc+schh+spp.h........cshh-.csh....aooh-lAlplacuF+hlulasHGFLAGaAlhphlllasLsupphst..............sLLppYphLAhPhpulhYhLLslSoVSAFD.............Rl-lu+sshs...lRshlplsssuLs.hlYFsuLlloLSpphhs-+lshh......s.ssshasssscpp...........llpsWhhlNllhAlLsuLuWlhluhpPspDhs .....................................................t.................................................s.P.........................splalpts..ttFpthc.......hpt......................p...c.c......hoshclAhsVp+sa+hluh....hsHGhLAGhAlhpllhlasLsspphstl.................................................................sLLppYpslAhPhpshhYhLhsluhlSsFD.........................Rh-hu+hohu...hpth.ls.hcsssls.h....lYhssllLolspp.hs-+ltLh.................ss.N.olh..thtpp.............llpsWhslslshulhshhuWlhluhpPt.Dh.................... 0 19 27 53 +15237 PF15384 DUF4610 Domain of unknown function (DUF4610) Coggill P pcc Jackhmmer:Q9BUH6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 164 and 206 amino acids in length. There is a conserved NPG sequence motif. 25.00 25.00 30.50 59.20 22.60 23.10 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.30 0.71 -4.78 6 40 2012-09-03 14:56:59 2012-09-03 15:56:59 1 2 28 0 21 47 0 176.00 56 94.80 NEW ssLCTLs.sGsspPRaVCYCEspputt..stGlFNltlTsus-lWSTphos-SLuphKu+FGLsuTEDhssRFRsACpQQsVuloLQED..pAsLpLuGsPSsLoF-LSKlPssEAtPRLpALhLpLA-+VpsLEpRLAssEpoAhSPRKSsp.uusp.FhP-.-+QRGGsGsGVR+RhPGESLINPGFKSKKPAoGVDFD- ....................................................sLChls..sGstsP+aVCYCEs-tst..hstusaslhVTDAsELWSTsFoPDSLusLKA+FGLousEDhssRFRuACcQQAVsloLQED..+ASLTL.SGusSuLsFDLSKVPuPEAAsRLQALTLuLA-+VpsLERRLA..AsEcs..A.......sSPRKSsp.uGsQ.LFLPDsD.QRG.GsGsG....VRRRC..P.GESLINPGFKSKKPAuGVDFD-......... 0 5 6 7 +15238 PF15385 SARG Specifically androgen-regulated gene protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BW04 Family This family of proteins is found in eukaryotes, the function of this protein is still unknown but it is thought to be an androgen receptor. Protein expression is up-regulated in the presence of androgens, but not in the presence of glucocorticoids. SARG tends to be highly expressed in prostate tissue [1]. Proteins in this family are typically between 340 and 587 amino acids in length. There is a conserved EETI sequence motif. 27.00 27.00 33.60 27.90 23.30 26.70 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -13.05 0.70 -5.13 31 124 2012-09-03 14:58:22 2012-09-03 15:58:22 1 4 37 0 65 113 0 341.50 32 92.15 NEW ps.sDpShcaL....otEEK-sLLFhEETIsSL.-s-h-s....slss..Dputpsp................uPp.........s.EEosspp...sEPcclh-hsps.......u.shput..shuLPcshp.hu...pspsh+cs....s.......t.pppth....s.P....pst.s.......s.Pstsssustsss.ch-hl..sPPtt....pPchtp.......psspPhs.upp.........................pcc.susEuhS.pusp+sscss.t..ps.su.sspsu.............................t.pt..sP.sAPK.s++hPsNIslpsSpu...shp.s.t......Nh.pR+spshss.sus.....sps..pcppKuRhpuLcKhGLPpDp.p-suht.......................................................................................ssuptssstshsh.hsppttspus.op....................sss........hsGhp..p.shKSpohchsusGhoshhps-...hssp.p..pp.uptShh-+hsP.sshpssR.RsuSLs...............................Rsth.RP...sloVphS....s+Gss-EpRREAL+KLGLLKE ..................................................................................sDpShcaLotEEK-sLhFhEETIsSL.-t-h-p..h.s..Dps..s...................ss.......t.cts.sp........tspphhp.sps..........s.t.t....tl.p..t.hs.....s..hppt...........................s.....t...................s......chp.h..sPP......p.p..t..............t....s...upp.........................pcp.htspshs..s.ptt..p.......t...ss...tph...............................t...ss.ssPK..p+hPsNI.lpsstu...shp.p.t......ph.ptpsps..t..s.................................p.p..t.tp.s..tp.............................................................................................s.........................................................suhp......p.shKSts..h.shGho.hhpst.................sh.t...h.s.sh.ps.R.RssSLs................................................ts.h.RP...sloVphu....s+GsspEtRREAL+KLGLL+E................................................................................... 0 3 8 27 +15239 PF15386 Tantalus PRR14; Drosophila Tantalus-like Iyer LM, Aravind L, Eberhardt RY, Coggill P, Hetherington K pcc Manual Family An alpha+beta fold domain found in metazoan proteins such as Drosophila Tantalus [1]. Drosophila Tantalus binds the chromatin protein Additional sex combs (Asx) and also binds DNA in vitro [2]. 32.80 32.80 32.80 32.80 23.20 20.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.25 0.72 -4.21 13 121 2012-10-09 15:10:48 2012-09-03 16:02:35 1 2 64 0 68 111 0 61.80 60 8.38 NEW sslTPh.GLP+stR.l...pKKEhSL.EEIYTNKNYKsPsst+sLETIFEE..Pcp................+sGslhhh.s+Kh .............NLTPM.GLPRP+R.L......pKKEFSL.EEIYTN.KNYKSPsspRsLETIFEE..P+E...................RNGoLI.hSppKh............... 0 9 12 35 +15240 PF15387 DUF4611 Domain of unknown function (DUF4611) Coggill P pcc Jackhmmer:Q9BXV9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 71 and 100 amino acids in length. There is a conserved AKR sequence motif. 27.50 27.50 27.50 57.60 24.40 27.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.14 0.72 -3.84 9 26 2012-09-03 15:05:01 2012-09-03 16:05:01 1 1 20 0 11 34 0 90.10 53 94.44 NEW LlGEaVspDGppQ+lRlsCEusu-usshQuLLSGlAQM+EhVoELhusLVppEtpstlsuss-Eul-.GsDEDDuED.EsNhcs+T.....sSsGPsAKR.KP .LLGEYVGp-GpsQ+LRVsCEAPGDuDPFQGLLSGVAQM+ELVoELFusLVppEsQppVAAuP-EuLD.G-DEDDuED.EsNhss+T.....s.sGPsAKR.Ks........................... 0 1 2 2 +15241 PF15388 FAM117 Protein Family FAM117 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9C073 Family This protein family is a domain of unknown function found in eukaryotes. Proteins in this family are typically between 269 and 453 amino acids in length. There are two conserved sequence motifs: RRT and TQT. 27.00 27.00 27.60 35.90 22.30 26.10 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.52 0.70 -5.20 19 194 2012-09-03 15:06:59 2012-09-03 16:06:59 1 3 59 0 110 184 0 263.60 45 73.36 NEW lRRTuSLDsIsusYLpGpWP+-.scs..hssp.hpDKuTQTPsuWsEpstp+p...........t....tH+RSASWGSs-pL......+E.........luKL+QQ..........LQ.Ro+p..us.........tspc+cppush.Gspus.....hstsp.........t....shss...sshtphssphRpSlEGLNQELEplal+......ppsc-phht.......l-lPDGHRAPlPs.t..t..................susspo.sspss.........u....Ssssuss.s.........s...ps.ssssp.tpsh...hs..........................................tsssP....h.thuoSP+PN+ohhFpREPPEGCE+V+l.hpE.ts.sp.spth...sCPDcNKVsFpP...sGSAFCPV.ol.hpPLh....Pos-hhh.ps. .................................................................lRRT.SLDsl.husYL.GpWPR-..sph..hssh.hpDKAT...QT.psWsEptt-+p..............................sHpRSASWGSsDpl......KE..........IuKLRQQ..........LQ.RoKp..sSR.........pt+-K-RpSPhp.....GsHss..........hspsp..............ssPhs.h..s.s..hs..tp.ssphtsSlEGlNpElEtlhlK......tptcEplh...........-lPDG+..RAPhP.....p....................SusopolsopoPs...........tpsSs......ssp.pssss........h.....tstptSPpst.cshh..p.t.............................h-sGssSP.......l.taAoSP+PNpSYhFKREPPEGCE+V+V.FEE..h..sst..hth....h.CPDKNKV..sF.P...oGSAFC.V.pl.htPLh.....Ps.shhh...st.................................................. 0 18 29 58 +15242 PF15389 DUF4612 Domain of unknown function (DUF4612) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H246 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 109 and 323 amino acids in length. 27.00 27.00 31.70 28.90 24.20 23.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.78 0.71 -3.48 10 88 2012-09-03 15:07:59 2012-09-03 16:07:59 1 3 69 0 58 69 0 104.90 46 53.32 NEW GCsSAKpVusV.s--............-.spuKsauNGDshsD...EY+hKsVEcVKYh+s.....EEp+hsupsppsh........cppsptt+s+spscsuutsst.h...............slHlSESQQEFFRMLDEKIEKGRDYCS..tEE .................................................................................................GCspuKphstl.ppc............cst+t+satsuDshsD...Eh+hcssEcsp..ps................-Ep+hsupspcsh.....ccpsssph+hpsstchsu.spt.h..............tshcI.ScSQQ-FFRMLDEKIEK.G+D..YsS-pE..c..................... 0 11 15 34 +15243 PF15390 DUF4613 Domain of unknown function (DUF4613) Coggill P pcc Jackhmmer:Q9H6R7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 625 and 725 amino acids in length. 25.30 25.30 134.10 25.30 23.90 23.90 hmmbuild -o /dev/null HMM SEED 671 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -13.21 0.70 -6.35 23 69 2012-09-03 15:11:50 2012-09-03 16:11:50 1 3 41 0 44 65 0 614.70 48 94.99 NEW ELGKGKLLRTGLNALaQAlHPlHGlAWTDG+QVVLTsLpLpsGEsKFGDSpVIGQFEHVpGLoWuPhssuDs....PuLLAVQHKKHVTVWQLs.SssEpuKhLhSQTCEIpEshPlLPQGCVWHPKpslLsVLTu+DsSVhhsVHsDuoRVKADI.pspGhIHCACWTc.DGpRLVVAlGS..uLHSYIWDssQK.TLptCSFCPVFDVsu..hlCuIpATVDSQVAVATELPLDKICGLNAutsFDlPssuE...sss..hto.s.lhs.-.hshD..ptussSEss.St.sSlu....SS....osPLDLTHIhhs+p+S-sSuLlpLR+KDaLTGoGQDSSHLlLVTF-+.cVTsTRKVoIPGILVPDlIAFssKuQlVAVASNTCNlILlYSlhs.SshPNIQQIQLEssERPKGlCFLTDKLLLILVGKQK.sTDsAFLPSScSDpYhIRLhl+Elhh---sSsosstsQsshss......hsshLspus++KhhEsLSs.-hp.ps.+sLLLsusss...pSspstRpL.IcEI......+SP.........soSss..s.....uS....l..................s...L-...scPsspssol......................s....psS......usP..s.....tssp...Es............................ss..lPp..p...psL...ppEKEsppLo+pLEtLSpshs-lQpsLSELpDhLpNGK..Ks..sssYPhSpDPPYVHIsYQKshssssls.EKRuVLLCcGKLRLSsVQQhFGLSLVEMhHu.SpWILLsADoEGFIPLTFoAsQEllI .................................ELGKuKLLRTGLNALaQAlHPlHGlAWTDGpQVlLTsL.hpsGE.sKFGDSpV...IGQFEHVpGLtWuP.sssDo....PsLLAVQHKKHVTVWQLs.SssEp.sKhLhS.QTCEIp-shPlLPQGCVWHPKpslLsVLTtpDsSVh.sV+sDsoRVKADI.pspGhIHCACWTp.DGpRLVVAlGS..uLHSYIWDssQK.oLptC..SFCPVFDVsu..alCuIpATl.-uQVAlATELPLDKICGLNAu.sF-lPssuc...sss.....s.s.lhs.E..shD..ctussu-ps.u...Slu....SS...ssPLDLTHlhhspp+S-sssLlpLR+KDhLTGoGQDSSHLlLVTF-+.tVTpTRKVoIPGILVPDllAFs.+uplVAVASNTCNhlLlYSlhs....SshPNIQQIpLEpsERPKGlCFLTDKlLLILVGKQK..sD.ssFLPSSpSDpYhl+Lhl+Elhh-c-sososstsppsh..ps......hss.lshssc+KhhEsLus.-hp.ps..+sLLlsssss...QSspstRpL.IcEl..+Ss..........ss.sssSh..................s.Ls...scPsspssol...............................sp.o......sss.s..tsph...ps...............................sphsp..p...psh...tpE+phtpLo+plEhLstshs-lQpsLSELp-hhpNG+..+s..sssYP.Sp-sPaVplshQKs.h..sssh..E+RsVLLCcGKL+LsslQphFsLollEMhas.s.WIlLsADs-GFlPLpFpupQElhl................................... 0 7 9 21 +15244 PF15391 DUF4614 Domain of unknown function (DUF4614) Coggill P pcc Jackhmmer:Q9H6X5 Family This domain family is found in eukaryotes, and is approximately 180 amino acids in length. There is a conserved EALT sequence motif. 25.00 25.00 26.60 26.60 19.20 18.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.30 0.71 -4.51 21 53 2012-09-03 15:12:47 2012-09-03 16:12:47 1 2 44 0 35 45 0 166.20 47 32.84 NEW psossssYS.-DFEpSsp.spsspss..spSpposspohs..opS-tS...ushposhs........tshps...pcppsps....spclhlK.EsAVQT.....csshsh.Wsp...ssuhAslGPslGuuYVDPsPIAoHVlSsDAlEALTAYSPAslALsDMLKQQLsLTQQFlEsSR+LHtSLlpSL-t-saHYpTLEETKEYIRcHRss ..................................s.s.sssYS-DFEpSsp..psopsp..spScpS.s+Tls..shS-.S.........uShposhs..................pshps...p+cpscp....ssRlhVK.-sAVQT.....-PuFsYpWsc...suuhAshGPuLGu.uYVDPsPIAsHVlSADAIE..ALTAYSPAslALpDhLKQQLsLTQQFlpsSR+LHtSLLpSL-t-sFHYHTLEEsKEYIRpHRP.s.... 0 9 11 18 +15245 PF15392 Joubert Joubert syndrome-associated Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H799 Family This family of proteins is domain of unknown function, which is found in eukaryotes. However, mutations in the gene lead to Joubert's Syndrome, indicating that the protein that the gene encodes for is vital for correct ciliogenesis[1]. 27.00 27.00 30.50 30.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.12 0.70 -5.21 12 56 2012-09-03 15:16:48 2012-09-03 16:16:48 1 1 35 0 30 46 0 293.90 54 20.35 NEW PLQMTGLTDIADII-DLITKcGVSS-ELGLTEpQA+sISRIQ+oSGR+PQRT-cERREIQlWMKRKRKERMAEYLNQLAEKRGQEHDPFCPRSsPFYMTSREIRhRQKMKHEKDRLLLS-HYSRRISQAYSLMNELLSESsQLPssAQKPLPs+PpTsphsRpQ+ssSPRRENpHGHNFPlNRPGKsRaIS.KsSahpKG+PhspspGSsh.................................+GSssPspShQps+s+tuAGLsP.ocQVClEYEREETVVSPWhlPS-I+cILH-sHuSLLQDlSPs.EEEPEsP.....htluGhDSlSESTGSILSKLDWsAIEDMVAuVEDKuLSVHWA ................................................PLQMTGLTDIADII-DLIsKcGVSS-ELGLTE.QA....pshSR..I.Q+sS.G.R..+s..Q...R.T-KERREIpsWMKRKRKERMAcYLspLAEKRuQEHcPFC.PRss.P..hY...MTSREIRh+QKMKcEKD.RLLLS-HYScRISQAYuLMNELLSESVQLPsss.pK..PLPs+s.ps.sp.sppQ+.ssSPRt.ENpaGHsh.lspsuKs+ahS.+PSahpKtcshs..spGss..t............................................................cuSssPs.p.p.h+pptsstlsP.sppsC..lEhERE-pVVSPWtlPs-I+pIL+cspsSLLQDhS...ss.EEc.c.s.......hsGhDSlSESTGSILSKLDWsAlEDMVAuVEDpt..s................................................................................ 0 4 6 14 +15246 PF15393 DUF4615 Domain of unknown function (DUF4615) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H7E9 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 161 and 229 amino acids in length. There is a single completely conserved residue F that may be functionally important. 27.00 27.00 28.80 28.80 26.40 25.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.94 0.71 -3.89 33 85 2012-09-03 15:17:45 2012-09-03 16:17:45 1 3 70 0 55 85 0 127.30 38 57.03 NEW cQhcpELsWClpQLEhuL...pstK...opKQtc...-sh+sl+sL+SspsPLlKKRQlM+sthGDYRsKMppEc+Khh+shpshtho.upsps....s+Ku.Fh++uthh..................................sotc.s.phsa.hspp..................................................................pF+FNF ....QhppELsWCVcQLElGLcppKs...o.KQtEpuhpsI+oL+Sp+sPLs+KRQlM+shFGD....YRupMctEppchh+shcsstho.upsps......s+K...h.++sthh.................................pst.t...phphshshspp....................................................................................pF+FNF.............................................. 0 20 25 39 +15247 PF15394 DUF4616 Domain of unknown function (DUF4616) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H972 Family This protein family is a domain of unknown function found at the C-terminal domain of the proteins. This protein family is found in eukaryotes. Proteins in this family are typically between 166 and 538 amino acids in length. 27.00 27.00 27.50 27.50 23.30 26.60 hmmbuild -o /dev/null HMM SEED 537 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.88 0.70 -5.49 7 55 2012-09-03 15:20:01 2012-09-03 16:20:01 1 2 30 0 25 44 0 360.90 66 96.47 NEW SFSATILFSPPsG.uEA+CCCCuCKSEssuusous.sGs.P...ssTPITVTGcGLAVQSoEQLLHlIYQRV-KAVGLAEAALuLA+ANNELLK+LQEEVG-LRptpssp.--ssputtpssPscpss.hctSsGcA.pshs..u.EEEs-ulGoGVQVVIEELRQLGAAuu.ssGsLG.FsssQcchchPGCsLAu...sEuuPLLNPh.....DDYluoEGslQRVLsPuaAKQLSPuoQlAhppusu-su.EshschssspPcshLuusAsL....DuAL-..-ssPGuo.......GElphSLG.......assoPsRsRGoGQKNSRRKRDLVLSKLVHNVHNHITNDKRFNGSESIKSSWNISVVKFLLEKLKQELVoSsHNYTDKELKGACVAYFLTKRREYRNS...LNPFKuLKEKEEKKLRSRRYR.LFANRSuIhRpFuPEDQ+LWcsVTEELMSDEEDSLsEPGVWVARPPRFRAQcLTcLCY+LDANSKHGTKANRVYGPPSDRLPSAEAQLLPPcLYNPsFQp-.s-uGspsuPsSsshspsHKohCPDLNSFIEIKVEKDE ......................................SFSATILFSPPuG.SEA+CCCCACKSEssuussGSpGGs.PP...suTPITVTGHGLAVQSSEQLLHlIYQRV-KAVGLAEAALuLA+ANNpLLKpLQEEhG-LRptpss..tct...s.ttt.P....t..c.u.Gcs.p.......................................h.assspp.hRhPGCsLsu...s-usPlLsPh.s...DDYVASEGulQRVLVPAYAKQLSPATQLAIQRAouEousEsGsKLPssRPEDhLsusAsL....DuALE..EusPGus.......GELphSLG.......hssSPsRsRGoGQKNSRRKRDLVLSKLVHNVHNHITNDKRFNGSE...SIKSSWNISVVKFLLEKLKQELsoSPHNYTDKELKGACVAYFLTKRREYRNS...L..NPFKGLKEKEEKKLRSRRYR.LFuNRSpl.hchhusE-QphWpsVTEELMSDEE.DShsE.PGVWVs+sPpaRu......LTpLsh+lDAts.cpG.sKtpRh.G....S-RLPSsEsQhhP.cLaN.pap....tt...............tss.......+cs.s...sSFlplKVEcD............................................... 0 6 7 11 +15248 PF15395 DUF4617 Domain of unknown function (DUF4617) Coggill P pcc Jackhmmer:Q9HCM1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 702 and 1745 amino acids in length. 27.00 27.00 48.00 48.00 15.90 15.40 hmmbuild -o /dev/null HMM SEED 1068 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.01 0.70 -14.29 0.70 -6.51 14 45 2012-09-03 15:22:00 2012-09-03 16:22:00 1 2 30 0 25 50 0 865.30 45 67.36 NEW SMELLATCLSLWKKQPSEssE........EKQsN....pSppNpT..ulGlS+Psc.lss+uspSssGN...SQsKhl..s.ppThLSsllQNaESSusslsKGTELQIAVVSPLILS-l+TLS.hKslsPpslPEssYPVIKEGSlCSLQsQ.hsENs.lsA.sLKssl....spPVuSossSsKl.s.LhQ....KEKQ..scsopuss-sssssspGpp.t....p....sshuspQss.co+...........................................cSslVsuDhLQI-sICSLVEGDsSYNSQIAcIFNSsPhp+VEPQKsShPsppslSstpQcEQl-ps...TEs+DhuhQc-ch.lpsTDlSpc.ls-.scs...P.......tc.u..............Stphlcsssuhl.EEuslE+..............hspc...sssslsSs..AuhpQDspspEsDssu...Nhsu.ps.PstsEl.s-.ppslhYL+DQLSELLKEFPYGIEulN..s....+csSVupphspplsc-QTssKps..sDSK-...ssDQIpITlLsS-QhKELFPEpcppss.......csD+lsEsQp.EKsls.....EsusQCDsQss.ppsEopDSs.hsSEKDcl+CC.ALGWLSMlYEGVPpCpCsSlcp.sSpE-Ksc-Qsush.-oNSsKQGEpsScsDlslhc...sss.lsss...PKss....ss...cs+hschcp..c...sh...........................KDtopT+...csoph+sEQ-...ss..QhpS.....KsD.Kh................-shQspK+p.pLpaHEVsFpousK.pt...c.hSQEu......Lp+KhhsQsspPlKsKsshhss..K..D......hh+cssSlhQolSsEKhKlKhpu....hp....pt....EKRKlDpsphh-..ElKKKKp-KQEQN+NsG.ss....hKLssplspsNERA.lpEp.s.....luss.....cSSD.K...sSSsKhp+VlospEY.LQRpKcK-shu.ppsuK+hpl....csVsscSpah+sS..Khsspl.tSssKss-+..psSuhpTsKEshNshss+uKsLKhH+..SE..pSKoaslSpNsKGpsDGKQsDKhhh-Koh.DK.lsplsNEhp.s.hs.QsK-QRKpYLNRVAFKCTERESICLTKL-sSP+K..Lp..K-..K+..p......-sKscs...lssK-socKssMLEFKLCPDlLlKNTsosE-ppshpspPcKEQAsVQVSGIKSTKEDWLKClss+.K+M.EusQE.....D.s.l..sS+Lu+RShSADGhEsLQNPVKDSKsMFQTYKKMYLEKRSR ...............................SMElLATCLuLWKKQPS-ssc........-Kp.s....p.ppspT..ssuhSpPsp.hp.csshSshGN...Spschs.s..ppTsLshlsp....saESouhslsKGoELQIAVVSPLlLS-l+ols.sKslsPt.s.lsEslYPVIKEGSVCSLQsQ.hsEN..ssA.uLKssl....stsVsuossuschhs..hp....KEcp..scsspsss-ssss.spspp......p....sshsspQ.s....ps+...........................................sossVuuD.hLQIssICSLVEGDsSYNSQIAcIFsS.PhphlEspKsShPsppshus.p.ccQl-ph...sEscDhshppsp..lpsTDlspc.ls-tsc....P........c.u..............o.p.lcsstshh.Ecssh-+..............hscc...sssssCS...uuhppD..spEhDsss...........shss.pc.PstsEl....s-.ptslhYL+DQLSELLKEFPYGIEsls..s.......+csSVsQQhs.phsc-p.TssKss..sDSK-...ssDQIpITlLsS-QhKElFPEpcpps.........sDcLtcspp...-csls.....-s.s.ss...phstttc..s.Dsh..so-KD-ltCCALGWLSMlYEGVPpCpCsuhpp.sSppcKtcpQsu...cssSscpsE.psucpslshhp....ps.lsss....Pchs.....st...cschschpp..p...sh...........................+phspp+...cps..psEpc...ssph.S.....psc.p.................ss.pppKtt..lph+ElpFpspsK.hh......SQEs.......ppKhhsps.tPl...KsKsth...ss..p..D.......hh+psShsQolSsEKhKLKhtu....hp.h....pKRKlDpsphhD..ElKKKKa-KQEQppssG.so....hKLssplopsNERAhlpEK.tloss.....cSscsK.....sSSsKhs+llTspEY.LQRQKcKEshu.spsSKchpl.....csl.scSpah+sS......Khsspl.tSsGKssE+..psouspTsKEShsshosHGKslKhH+.....Sc.....-S+oasl.+NsKtpssGKQsDKhah-Ko.h.DK..sphsN-hphsphs.Qs...K-Q+K.YLNRVAFKCTE+ESICLTKL-sSP+K..Lp..+-+c..p....-sK.ps...hss+css-KssMLEFKLCPDhLl.KNosos--ppchpstPcKEQAsVQ.VoGIKSTKEDWLKtlspcp+h.c.ssQch...D.sl..sS+ls+RohSADshEhLQNPlKDS+tMFQTYKpMYhEKRSR................ 0 1 1 5 +15249 PF15396 FAM60A Protein Family FAM60A Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NP50 Family This protein family, FAM60A is a family of proteins is found in eukaryotes. It is known to be a cell cycle protein that binds to the promoter of a gene transcription repressor complex, named SIN4-HDAC complex. This means that FAM60A has an important role to play in 'switching on' gene expression [1]. Proteins in this family are typically between 179 and 324 amino acids in length. 27.00 27.00 58.70 45.90 20.80 20.50 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.78 0.70 -4.59 17 107 2012-09-03 15:23:46 2012-09-03 16:23:46 1 2 64 0 60 90 0 190.90 61 71.84 NEW FuFHKPKlYRSssGCCIC+AKSSSSRFTDS+KYEc-FhtCFtLpE..pRsG-ICNACVLLVKRWKKLPsGScRNWpH.VVDAR.AGPGhKshspthpK.+pht....................hcp+ph..+http++pp.............ss...shsp.........spssSpspSPs.SspS--sspt...................................c.............tptpsp.sl.ss..FlD.oYWKRpclCCGhIF+G..aGEl..llDPcLhKPCssspc ..........FGFHKPKhYRSh-GCCICRAKSSSSRFTDS++YEK-FppCFGLpE..sRoG-ICNACVLLVKRWKKLPsGSKKNW.NHVVDAR..AGPuhK.hsh+PKKhKshs.........................tp....thppp.hp+lpKchKpp..................................sSD......ApSo..........TSSsSPsQSPshSNpSDDuu-s.........................................................Eh............tstpscssl.hS..FLDhoY..WKRp+lCCGIlaKG.paGEV..lIDs+LaKPCCsp+.p................................................................................................................................................................................ 0 14 17 36 +15250 PF15397 DUF4618 Domain of unknown function (DUF4618) Coggill P pcc Jackhmmer:Q9NUD7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 238 and 363 amino acids in length. There are two conserved sequence motifs: EYP and KCTPD. 27.00 27.00 27.00 30.80 26.20 25.80 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.67 0.70 -5.05 17 62 2012-09-03 15:26:35 2012-09-03 16:26:35 1 2 38 0 31 53 0 217.00 45 74.36 NEW l+sR+soL....pELppHcshLschNt-LhcpIQDhEcoTshpVRphLpQQ-lhsollslLEauNc+cLpphKsELQEWcEKpcs+hspLcpQlcpLcu+IcKspEElsFLSTYMDHEYPVKuVQIAsLhRQlQplKDsQQ-ELD-LsEhpctVLtolus+hppKccplLpulshKs.pPhppsLlp+s.hcsQshhKphspaRchIcphcE-lPhL+AEVcpLpsphp.csRElIFtDl.LLR+PKCTPDMDVlLNIPsEEhLP ..............l+st+suLpELps+csaLsKhNp-LlcsIp-hEsSTsLpVRtlLQQQ-hhsollDILEYsN+K+LQph+uELQEWEEKccsKhshLpQQs-QLsu+IcKspEEVsFLSTYMD.HEYslKuVQIusLhRQLQplKDsQQDELD-LsEMR+pVLpoLSs+IQcKc++lLpolVscsppshpp.sLlphh.hcspthh+ph.ta+c.htthpp.h..L.tplptht.p...p.pp.ha.ph.hhphsKCpsp.tl.lpIs.pp.................... 0 9 10 15 +15251 PF15398 DUF4619 Domain of unknown function (DUF4619) Coggill P pcc Jackhmmer:Q9NVL8 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 128 and 299 amino acids in length. 27.00 27.00 33.60 33.40 25.60 25.00 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.00 0.70 -5.00 10 52 2012-09-03 15:31:30 2012-09-03 16:31:30 1 2 30 0 27 46 0 211.30 53 96.47 NEW MGLuaSKuHPRVTKVAPLQsKEtETPsAGslDFshsQNLEE+S.aohARLQDpsKuLEGQLPPLREThYGRYSo...ssRsMYFDIPLEpGETSIIKRHPPRRhQKLEPlDLPQVlTScRLLSQpEAc.ssp+AK..QELEK+MQospYoSGKRQYLHKMQMLEMNRKRQEAQhELKKoLH+EARINKQ+.R-HKAKKlLpolPRNDDt.DllThLP-c...sLNRsPGNupNuEFL.pHQstNsYsPRKsGKhEsWlpEQEApGQLhWDSSSSDSDEhG+-EKK...PRALVRTRTERIPLFDEFFDQE ..................................MGLu+SKsH.RVhKVAPLQs+E.Eossss.ltFshppsLEEco.h.hsRLQ-pp+shEGQLPPLpEshhGRh.s...ssRsh.FDIPL.-p.tETSIIK+HPPpRhQ+LEPh.shPpshTutph.spppst.thp+t+....hEK+hQs.hasstpRQaL+KMphLEh.p++QE.....sQhE.....L.K+sLpt-s+lsppp.c-cpucKhhpshs+...sssh....chhshlPsE...hlsc.ssGs..stth..t.p......s.p.s.ht.h.hpp.hptph.h-sSSosS-t.tc.p++.....sLVRT+Tp+IshaDpFhD............................... 0 4 4 7 +15252 PF15399 DUF4620 Domain of unknown function (DUF4620) Coggill P pcc Jackhmmer:Q9P2W6 Family \N 27.00 27.00 149.60 51.40 25.40 19.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.70 0.71 -3.94 2 9 2012-09-03 15:33:08 2012-09-03 16:33:08 1 2 6 0 1 5 0 98.40 84 76.38 NEW VPRWPHLS.QSGlcsPshWTtoPG.PSRDQpAPGs.MPPsAAQPSshG.LVPPATA.E.lDcPA.HWLACsCCLuLPuQLPLAIhLGhsL.LcuuP.sGKLCP+ARRWQPLPS .VPRWPHLSSQSGVcPPDRWTGTPGWPSRDQEAPGShMPPAAAQPSAHGALVPPATAHEPVDHPALHWLACCCCLSLPGQLPLAIRLGWDLsLEAGPSSGKLCPRARRWQPLPS.... 0 1 1 1 +15253 PF15400 TEX33 Testis-expressed sequence 33 protein family Coggill P pcc Jackhmmer:O43247 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 147 and 280 amino acids in length. There are two conserved sequence motifs: NIRH and SYT. The function is not known. 27.00 27.00 46.70 45.70 26.10 19.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.60 0.71 -3.78 7 37 2012-09-03 15:36:24 2012-09-03 16:36:24 1 1 29 0 23 35 0 134.30 63 56.81 NEW SlIPsNIRHKFGSplV-pLlSEEQstp....slschhEtppt.souhPs.hTcs.scl.oohhssYaDLGhshRssLh..GsspETKSLMpuSYTsEVhE+uVRDlEHWH.GRKTDDLGRWHcKNAhshNLQKALEE+huEpsKs+us ...........ShIPsNIRHKFGSphVDpLVSEEQAp+....AIsEshEGQKR...sSuWPS.RTQoPhcl.oSlFSDYYDLGYNMRSNLF....pGuPp.ETKSLMKASYTPEVIEKSVRD..lEHWH.GRKTDDLGRWHpKNAMNhNLQKALEEKYGE+SKS+u... 0 3 4 6 +15254 PF15401 TAA-Trp-ring Tryptophan-ring motif of head of Trimeric autotransporter adhesin Coggill P pcc Jackhmmer:Q48152 Domain TAA-head_Trp-ring is the tryptophan-ring motif of some Gram-negative Enterobacteriaceae. The Trp-ring folds into a beta-meander type on the top of the head domain of its trimeric autotransporter adhesin proteins. In conjunction with the GIN domain it is thought to be the region of the head that adheres to fibronectin. 18.60 17.30 18.60 17.30 18.50 17.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.38 0.72 -3.75 23 175 2012-09-05 12:42:40 2012-09-05 13:42:40 1 29 47 1 31 163 2 64.50 44 11.54 NEW VssspKsNGKtsc.....VKlGsKh.hhpcc-hclhTGKs.................p.ss.sssssssssssspGsGlVTupsVh-..Al ..............hpsKsNG+pTp.....VKlGAKT...uhhp.cKDucl......hTGKsh....................................................s..sss..ssTspGpGLVTAppVl-Al................................ 1 1 1 31 +15255 PF15402 Spc7_N N-terminus of kinetochore NMS complex subunit Spc7 Coggill P pcc Jackhmmer:O59757 Domain \N 27.00 27.00 27.60 27.60 24.60 26.40 hmmbuild -o /dev/null HMM SEED 927 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.08 0.70 -13.83 0.70 -6.54 19 122 2012-09-06 11:41:45 2012-09-06 12:41:45 1 10 72 0 106 126 0 496.30 28 58.18 NEW hDKENsTsDluusp..ts.....hsup-KKSRSKSLGPGGLD.ALpsuNGNRRKSsss...hPLKSILKPTlPVSPlpsIPoF-ETR++oPs.........t.t...ptptt.hIshsssstsshsus-p.sNPFDsFsssu..p.......stt-.p.AAAREREE+ER+E+E+c...sILE+R-ARRKSM...................ANRRVSFAPEATLHTWNVVEls-DSToSSuu.NSTRRASSLs.ts......ppsststp.sps.s....pt.t-sshu.SPsp..cLpphppp..pu..s...ss...pphuSSPaSGuSssus-.tulps.t............-.sssSosss.DupuTsMSh--hTspSsuosts.s..tsSosSSu+L-EuLRpAApc.AGTpuID.D-s.s-hSMEhsspEIsGAFpPWlKKu.pppsh-hE.........................DlouchDQENlsP...sstshp.ppsssppsss-..pts.................p.....DhSMDlTpAlGGIlsptpsppp.....shspRpS.u...............t.oshs-QTMEFTsVVGGItpstSstputsssps.ttDE-MTMEFTSslGGVLspshspssspptptt.s......t.........ppssstpss.-MEMTuAVGGILsshpEptp..p..........DpThGM-hT.sAlGtILssthsspsc..ppthhthps-tst.p.sSSPFQtslh.SPs+.ssss....+hsslsSEsGSPoluSl+s+psR.pS.sppsusosssp.os.psSPh+.sshpss............pptsspsspssTPs+TPPSp.phshRuuSP+Klhps-hptsuop......ttssspc......sLFppss.TGpoTPtFVLpPp.....pRRSSGlGID+-GLGSPRVAtlLDRRRSIGE-AtcFlPpp.....ppuVRF-DPhcLpEElD+ERp-EEp+Esu+..lpt.sst...-+DsThNL+-MISSLT...PKKNKL+GRKSLHVGAA+GLLGKRPsELDpD----ssps...sKRLKG.+p.uSPVKsI+LPAPPSKsETs ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................-hsM-hTpshG.tI..h....................................................................p.sh-h...h..tu.........................................................................................................................hc.p..........................................................t...hchp..s.ttlh.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 21 52 87 +15256 PF15403 HiaBD2 HiaBD2_N domain of Trimeric autotransporter adhesin (GIN) Coggill P pcc Jackhmmer:Q48152 Motif HiaBD2_N may represent the GIN domain of the Head region of TAAs - trimeric autotransporter adhesins. Not all TAAs carry this domain; however, in those that do, the GIN in combination with the Trp-ring domain is necessary for adhesion to fibronectin in the host cell. 25.00 25.00 30.40 30.40 18.80 17.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.35 0.72 -3.82 19 46 2012-09-06 14:40:45 2012-09-06 15:40:45 1 14 15 3 0 47 0 53.10 45 7.47 NEW TNVsSGL+sYGDTs...hsssssussshs+ps-..sAapGLlNLsEKuu.....s.ps...shVuD .TNVsSGLKsYGDsN...FsshssSsssls+psD..sAYcGLlNLsEKuu.....sKp..hVAD.......... 0 0 0 0 +15257 PF15404 PH_4 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:Q03868 Domain This Pleckstrin homology domain is found in some fungal species. 27.00 27.00 30.50 35.60 22.80 19.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.26 0.71 -4.76 32 91 2012-10-04 00:02:25 2012-09-10 15:36:12 1 3 89 0 65 98 0 169.50 38 15.94 NEW hsGhLYpKs++HosFpchhVlLssGhLllFpshhRs.hoGhthp...plcap+hhslsLp-CYlYSGhlTc.DLLppspsaDs...........hsPGp+ulPRlY..sDGWpSs---sspsFslWasp++shhcppts.tp.........................................t..tpstsph+hVspLGssG+S.hVFhARSRtERDhWVhuItsEl .......hSGhLYpKs++HusFpch.VlLssGhLllFpshhRp.hoGhths...psaap+hhslsLpDCYlYSGhl..Tp.DLLhpspohDs...........spPup+ulPRlY..sDGWpSsD--sspsFslWasp++slhpsppp.tp............................................tpsppph+plspLG.ls.G+S.hVFpARSRtERDhWVhuItsEI.......... 0 21 37 56 +15258 PF15405 PH_5 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:Q9Y7U6 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.87 0.71 -4.22 42 284 2012-10-04 00:02:25 2012-09-10 15:41:14 1 12 152 1 212 344 0 136.40 27 11.08 NEW llaKG.LKK+sssptps....DlpsYLFDHALLhlKhKtlsKtEpaKVY+RPIPLELLhls..........h--hsst.t......pps...........sshlsppssssst.s......pt..pssasITFpaLG+pGYp.lTLYAushsuRppWlEpI-pp.Q .................hlhputLp+tsspp...s...-l.psaLFD+h...LlLs.+..........h...+s......s......s......K...p...c...p..ac..Va+.......cPIPlcLLhlp.....................................shp-s.sthuh..........................s...............................sshhs.stssssst................tps...p.haPlshp+LG+psh........hTLYAssttsRpcWh-pIppt.................................................... 0 62 116 179 +15259 PF15406 PH_6 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:O94356 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.20 30.00 29.80 29.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -4.02 13 72 2012-10-04 00:02:25 2012-09-10 16:46:12 1 1 72 0 58 96 1 108.00 51 19.22 NEW NFlYoKcFFWFGo.-A...VE..sKsLSuahcu.....-h..tss...AHHssAWAocTGKGLLFFuc.tsDKA.uPsGlIpLADASEPssDGssKFHFTu.KGHKHTFKAuosAERDNWVuQLKsK ............................hhFoK+FFaFus.-A....VE..sKpLosYhps.....EK.....ps.AHsssAWASQTGKGLLFauK.+spDKu.sPsGlINLAD..s..o..-lsp-G.usc...FpF...ph.pGpKHoFpAussuERDsWluslcsK................................... 0 10 27 47 +15260 PF15407 Spo7_2_N Sporulation protein family 7 Coggill P pcc Pfam-B_51974 (release 26.0) Domain Spo7_2 constitutes a different set of fungal and related species from those found in Spo7. This domain is found in general at the N-terminus. In many members the domain is associated with a Pleckstrin-homology - PH - domain. 25.00 25.00 26.10 45.50 24.20 24.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.97 0.72 -4.51 30 67 2012-09-10 15:57:22 2012-09-10 16:57:22 1 2 66 0 46 70 0 65.60 35 6.14 NEW cthsIP+sSFTAhRLsYsSspcLShoS+slhLGsIPphWhps+ppshh+sh..hph.sppps+ppspssp ......p..hslsppSaTA.+Lp.........asSPpcLphoSRphFlGsIPctWhps++cpha+sh..hph.pphpsRptohss.p... 0 6 20 38 +15261 PF09061 Stirrup Stirrup Sammut SJ Bio::Pfam::PfamLiveDBManager=HASH(0x4ef6130) pdb_1dq3 Domain The Stirrup domain, found in the prokaryotic protein ribonucleotide reductase, has a molecular mass of 9 kDa and is folded into an alpha/beta structure. It allows for binding of the reductase to DNA via electrostatic interactions, since it has a predominance of positive charges distributed on its surface [1]. 25.00 25.00 25.20 150.50 24.60 23.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.62 0.72 -4.07 3 5 2012-09-10 18:18:52 2012-09-10 19:18:52 1 3 5 1 3 5 0 79.00 80 4.54 NEW GuFGLoLNFNAFKEWAS+YGVEFKTNGSQTLAIIsNEKVSLGQWHsRGRVSKAVLVKMLRKLYEsTKsEEVKRMLHLIE GsFGLoLNFNAFKEWAS+YGVEFKTNGSQTLAIIssEKISLGQWHTRGRVSKAVLVKMLRKLYEATKsEEVKRMLHLIE 0 1 1 2 +15262 PF15408 PH_7 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:P78953 Domain This Pleckstrin homology domain is found in some fungal species. 20.00 20.00 20.10 20.20 19.90 19.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.23 0.72 -4.15 2 2 2012-10-04 00:02:25 2012-09-11 13:50:59 1 1 2 0 2 11 0 103.00 27 10.99 NEW EGYLYh.Ecuulp+RFshLpuK.hsh.t-KGG..LsoFpLppolluhPhsphppAssN.GlsA.GhLhhusstc+lphFAsStcshppWlpshNp.uhtls+uo ..........EGYLYh.....E..cuulp....+R..FshLpuK.hsh.t-KGG...L.soF.pLppolluhP.hs.phppAs.sN.....GlsA.Gh...Lhhuss.tc+lphFAsStcshppWlpsh.Np.uhtls+uo........ 0 1 1 2 +15263 PF15409 PH_8 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:O13944 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.96 0.72 -3.91 11 458 2012-10-04 00:02:25 2012-09-11 14:08:28 1 13 185 0 226 895 10 89.10 43 10.24 NEW GlLLKKRRK+hQGas+RaFsLshptuTLSYa+sssus..sLRGphPlslu.lIuANtcsREI.IDSGhEVWpL+ApNcpDFpsWscAhp+s+ ..............GalLKK.R+h....LKGW+KRaFsL...-s..GhLpY...h+...sp...pc..h.........+l..+....Gu....I.s......l.u....Lu....lh...S.....l.p....K....+.u...+...c........I...-...L....D.o...t....p..c...I...YHLKspopc.FptWVspLcp..pp.......... 0 44 84 151 +15264 PF15410 PH_9 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:O13690 Domain This Pleckstrin homology domain is found in some fungal species. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.46 0.71 -3.95 16 917 2012-10-04 00:02:25 2012-09-11 14:15:48 1 73 189 3 531 1121 1 111.50 33 7.42 NEW YKpGalhRKhhh-ssup+sPhG+RuW+hhYshL.+shVLYLaKs......Ep.................thcsuphpp..o...........hpNu.....IplHHuLApp..AsD....YsK.KpaVF+LpTA-tupaLFQTssscEhQpWlcsINaVAA ..................................................................cGhLtRK.p.c..h-s..s..s.K+..s.........upR..uWcph....YsV.L..cst....LhhaKD.......pp.............................................................................p..tpu.h.st.....t............................................hcps........lsl....+.c.A.lssh......As-..........Y.p..K...+paVF+L.....p.hs...D..sspaLFQApsp--MpsWlptI.................................................................. 0 135 197 357 +15265 PF15411 PH_10 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:P40995 Domain This Pleckstrin homology domain is found in some fungal species. 27.50 27.50 27.70 27.50 27.10 26.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.47 0.71 -3.91 39 249 2012-10-04 00:02:25 2012-09-11 15:43:54 1 24 148 0 185 253 0 117.60 29 12.77 NEW l-pFGpLLLaGhhsVh..K......s.......ct..E+EaclYLFEclLLCCKEhpss..+......ppsp....p.uht....p.t..........p....ppps+LpLKGRIahp.s.l.sclhsh.u.c........s..GpasLpIhW+uDst.l-sFhl+FpsEEphcpWtssl .............................ppFGcLhhaGphpVt..+.....................s.....pp..pE......hcsYL.....F..pchLlCsKEhpss...p...................pptp....ttt.t.t........................................tppps+hpLKGRIhhp..pl.pclhsh..s.p........................sphpLplth..............usst..lssFhl+aps-cphcpWppsl...................................... 0 52 101 157 +15266 PF15412 Nse4-Nse3_bdg Binding domain of Nse4/EID3 to Nse3-MAGE Wood V, Coggill P pcc PF08743 Domain This family includes Nse4 and EID3 members [1], that bind over this region to the Nse3 pocket, in MAGE family Pfam:PF01454 [2]. 18.50 18.50 18.70 18.50 18.40 17.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.40 0.72 -4.12 87 205 2012-09-12 13:26:22 2012-09-12 14:26:22 1 7 173 0 145 200 1 64.60 27 16.30 NEW DuchLV.so-Lutc+sppLphs...ssuhDs-cFls+hhsaMttp..............ssttsssppp ....Du+hLV.AoDLutc+sppLp.s...tsuhDhccFlsphlsaMttp.ht............tp..........ttt.p.....pp........................... 1 37 65 107 +15267 PF15413 PH_11 Pleckstrin homology domain Coggill P pcc PF00169 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.50 0.71 -3.82 7 262 2012-10-04 00:02:25 2012-09-12 14:36:45 1 17 124 6 153 1119 8 108.70 26 18.63 NEW hcGalh....+pts+..hpsW+pRahlLc......sshhcYYcs.................................pcshhptshphuhphhplps.t....sshphphppphth.............s.tpphhhpscsp-pphsWlptlppt ..................pGhhp.....Kh..s....sp.....tc....ua+pRWFsLc.......-tpL.YaKs............................................................................................s......h.c..sh..t.p..........tp..s....t..h.....u.s.c..t..p..t........h..s....s..hpsh.........ssp..u.p..p....h....t.c.s..h..ol.......................................hTs.p.+.phhhpscocc-pt-Wlpulp..s................................................................................ 0 36 80 107 +15268 PF15414 DUF4621 Protein of unknown function (DUF4621) Coggill P pcc JCSG:Target_394740-GS13541A Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 350 amino acids in length. 27.00 27.00 767.00 766.80 21.10 18.70 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.15 0.70 -5.75 4 5 2012-09-14 08:43:32 2012-09-14 09:43:32 1 1 5 0 1 6 0 329.00 99 93.47 NEW YDLSDVNTDDAVMGESWVAPLGTGYVTSDDVVNVEKVPSIREVDGAYVMIYDGEMKIKGKSLRAASsKVEIASEDITTGDIDGLFDGDFVLALTNPHITLKSNVKNASLDCSLSIEAENTSKKEATSSDFTLSTVSPNIWIGPLDPKTDAFKFVKNEKLPGIVQIVPQKIHLSLSADSKQWTNAPADALSELRYAVELPLTPAPEFSAVSVERIEDAFDEDFVDYIFSDGSARIYGEVTNEMPFDMSIEMVIMDENNVPVDIQFPAQEVKGQSGEVIFEITKEDMPKMKDARHIDLNLHLTGRDQGEALKKGQKTTFNLKLKKEGGISI YDLSDVNTDDAVMGESWVAPLGTGYVTSDDVVNVEKVPSIREVDGAYVMIYDGEMKIKGKSLRAASDKVEIASEDITTGDIDGLFDGDFVLALTNPHITLKSNVKNASLDCSLSIEAENTSKKEATSSDFTLSTVSPNIWIGPLDPKTDAFKFVKNEKLPGIVQIVPQKIHLSLSADSKQWTNAPADALSELRYAVELPLTPAPEFSAVSVERIEDAFDEDFVDYIFSDGSARIYGEVTNEMPFDMSIEMVIMDENNVPVDIQFPAQEVKGQSGEVIFEITKEDMPKMKDARHIDLNLHLTGRDQGEALKKGQKTTFNLKLKKEGGISI 0 0 1 1 +15269 PF15415 DUF4622 Protein of unknown function (DUF4622) Coggill P pcc JCSG:Target_390149-GS13960A Family This family of proteins is found in bacteria. Proteins in this family are typically between 348 and 360 amino acids in length. 20.00 20.00 21.90 495.10 18.10 17.70 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.03 0.70 -5.65 3 9 2012-09-14 08:50:26 2012-09-14 09:50:26 1 1 9 0 2 9 0 313.90 71 89.46 NEW GKVpV-sQLGGpYuIhS....PDAQTRAssssG.ssL+D....splhLLs-GSTLWLshhcpAKsGTTptT...QGYVVR.TGTGGssoLYPC.psDE.NGcl.I..su..SuTPLYLcAGTYpF+hISPAKAlsoDGKssIcNGEYLlATDsRYTQTtSTslsIotlsp....sNVQslhLNPIIsQTARMpFTI+uG-GVaTLEhLpAGIEISGIQpPlDs....TouaNWos...GDsLPsKlGDKpupVplPupphpTsA-GoLsG-TGVLPTDsRSTPVuVLlNltVNGVPTQYphLlsGpaLLsGHSYNYTsTVKIcsG.ITVlTWQNpSWTsDI- ..DRVRIDPVAGGYYPSIS....PSAQTRGATPDG.ETLKD....RPIFLLEDGSTIRLVVYDDAKNLLEEYS...KAYLVRNAGTSGSSLLYPC.EVDD.NGAV.I..SS..SSTPLYMKAGTYYFRILSPAKALNSKGFVNIGNGEYLLATDDRYTQTAMTAVTITKIDEGGTLNNVQTLYLPPIINQTARMQFTVRAGEGVHTLEMLAEGIEISGIQQPLDN....TTSFDWVN...GDVLPVKVGDQSASVRIT..pATpNADNSLVAHTGVLPTDARSHSISVLLNLKVNGNPTQYQMLLTGLYLTAGHSYNYTATVKISNG.VTVLTWQNRSWTENV.V 0 0 1 2 +15270 PF15416 DUF4623 Domain of unknown function (DUF4623) Coggill P pcc Jackhmmer:269656 Family This family of proteins is found in bacteria. Proteins in this family are approximately 470 amino acids in length. There are two conserved sequence motifs: HLL and RYL. 27.00 27.00 699.10 698.90 26.80 25.60 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.49 0.70 -6.11 6 9 2012-09-14 09:54:18 2012-09-14 10:54:18 1 1 9 0 2 9 5 445.00 55 94.19 NEW -DYPtShs.uPY-T-LLSIKIlNAGssGspVVEGTIDE-sKpINFPRLDstTNFSAL+lEAcLSsGApLpppVhDhoM-EtsspKTlVLRVlNpsRYK-YhhKlRK+VPVaGADFEpPTVYsFS....GDs....lYssFsohLTRsAuFDGcaVLlVoRsss....tPHLLKVS-LKtGcIsPI.LDlTGVoGGTasYNMGALsNGHlYlASLSGups.SPLKIYYW-TPTSsPEVIAcINVusIPGAGsRHGDNhSlNLDcNGNGYIFFGDNAuoclLRlsluNaKols.s-spllPssscsshhoslaRltNTupYLaoGlchPloLlDpuhsspa...phs...ls...sEulAPRlhsFNpERYLIsCTAGpGuAoKAossLhVYDITKGsTlp-ALppF-pu-pHpPVYpFlLGGuGNsAPusQTsaYIEKDANGKDAKLhlFASRo-SGFVIsEFPlK. -DYPcSh..uPYDT-LLuIKIlNAGssGspVVEGTIDEspKpINFPRLDstTNFSALplEAcLS-GApLpppVhDhoM-t-sspKTllLRllNpsRYK-YhhKVRK+VPVaGADFEcPTVYsFS....GDN....lYsDFs.sh.hTRCAuFDGcHVLlVoRsss....tPHLLKVSDLKtGcINPI.LDlTGVoGGTFsYNMGALsNGHlYlASLSGu+s.SPLKIYYWETPTSpPEVIAsINVusIPGAGsRHGDNhShNlDcNGNGaIFFGDNAAochLRlsluNaKTVs.spsplLPucscsshsoNlYRltNTspYLaSGlchPlTLlspuhsppa...p.s...ls...sEAVAPRlhsFNpERYLlsCTAGhGuASpAo.sL.VYDloKGsTlpEALc+FDpu-pHpPlYpFhLGGuGNGushsQTsaYIEKDtNGKDAKLhlFASRosSGFVIsEFPlK.. 0 1 2 2 +15271 PF15417 DUF4624 Domain of unknown function (DUF4624) Coggill P pcc JCSG:Target_390388-GS13780A Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. 25.00 25.00 122.10 112.60 22.40 21.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.48 0.71 -4.54 4 12 2012-09-14 09:57:47 2012-09-14 10:57:47 1 1 11 0 1 10 0 126.80 68 85.31 NEW KsNtsphNsu-stEstpholEMEhstNYssSDPF.NuRLFCVScDl-sLsAEloFQMDG-pGIVEIKDpcoD-VLWSNsWcG+VsuDTaolSLsNLpK-KEYsVpFTGTKINHAlVcVSFESsLVpEKERPS KoN-S+hNshsscEoupTTIEM-LDKNYDTSDPFVNuRLFCVSpDIDlL-sElSFpMDGDSGIVEIKDNKTDEsLWSNTW+GpVssDTFoISLsNlQKEKEYslpFTGhKINHAVVKVoFESsLV+EKE+PS. 0 1 1 1 +15272 PF15418 DUF4625 Domain of unknown function (DUF4625) Coggill P pcc JCSG:Target_390125-GS13882B Family This family contains a likely bacterial Ig-like fold, suggesting it may be a family of lipoproteins. 27.00 27.00 27.00 27.00 26.90 26.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.81 0.71 -4.03 44 136 2012-10-03 16:25:20 2012-09-14 11:44:52 1 3 77 0 34 134 4 125.00 28 71.81 NEW sSC.sc--..........Ds.pcPsIsht................PpssphhptGcs..lpFchploDsttLuuasl-IHpNF.DtHoHsspstts...............spsasappsa...........slststp.....shph+pcIsIPs-.....sssG-YHhhlplTDpsGpppht.shsIpIp .........................s.uCsps-p......DsppP.hIphh.................pssphhphGsc..lHhc.hplss.sstlpshpl-I.......HsHstp...................stsasappsa...........shsstpp.....shpa+ccIsIPus.........sssGcYHhhlhlsDtsGspsh.hstslpl......... 0 19 30 34 +15273 PF15419 LNP1 Leukemia NUP98 fusion partner 1 Eberhardt RY re3 Jackhmmer:A1A4G5 Family This family of proteins includes leukemia NUP98 fusion partner 1, the gene encoding this protein is involved in a chromosomal translocation with the NUP98 locus in a form of T-cell acute lymphoblastic leukemia [1]. 27.00 27.00 56.30 56.10 19.30 23.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.33 0.71 -4.18 12 48 2012-09-14 13:29:50 2012-09-14 14:29:50 1 2 25 0 18 37 0 150.80 60 94.99 NEW DDDDV...SFAKWMSSFWGHSWh-EcE+tlRc+..+puQpsupR+sSLPCP...h..ssL.........P....................phhsS-ph...................PRRHS+EDQsFRs+sHh+shRcsSsDuSF+-PhcscsRSHS.IQcFSESFEQQLChRTKRSVSLuPEuRKERpEREsLRh.ch+S+KKscERRsS+KEEctEA.hssLhcK .DDDDV...SFAKWMSSFWGHSWp-EcpRuLRc+..+psQssscRKsSLP..CP....h...shh..........P..............+h.SSDph.PRRHSHEDQcFRC+oHh+shRchStDuSF+-PhcscsRSHSKIpcFSESFEpQLCFRTKRSsSLGPEuRKERsEREpLRh.ch+S+KKscEcRsS+KEEcuEAhMusL.EK... 0 1 1 6 +15274 PF15420 Abhydrolase_9_N Alpha/beta-hydrolase family N-terminus Eberhardt R re3 COGs (COG4425) Family This is the N-terminal transmembrane domain of a family of alpha/beta hydrolases which may function as lipases. The C-terminal domain (Pfam:PF10081) is the catalytic domain [1]. 27.00 27.00 37.50 28.00 26.00 26.20 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.14 0.70 -4.58 27 350 2012-09-14 14:16:58 2012-09-14 15:16:58 1 4 283 0 95 257 8 196.00 35 35.63 NEW LTPSLlPRsalhQGlluGlshAlGYulGs......hhthlh+hhtlsphtshssp.....thphshsssssssslhhlh.ustWQsslpslMGhtshsshshhtsshlAllshssllhluRLlpthh+hlsthltRh..lPtcl...utslGlslllsLhhslssGVlhRthhpshspsapthsthhcsssstPppPt+SGSssSLlsW-sLGppGRsFVuuGP ............................................hTPSLlPRs.hhQullsGhstAhGYulGs.........hhhhlhc...hh.h......p.t....s.p...................................hhh.shhshsshhhlhhh.lhs.s....tWQcslpclhG.ls.phsh.h.shsl.sshlullshsshlhluRhltth.hR....hLsp.t.lpRh....ls.t..l...usslslslslsL.hlh.l.hNsVll.+tshsuhspshutsNs.psssspssPs.oPhRSGSPuSLhoW-u.LG+pGRsFVssGP.............. 0 22 67 84 +15275 PF15421 Polysacc_deac_3 Putative polysaccharide deacetylase Coggill P pcc JCSG:Target_416920-SP13771A Family \N 27.00 27.00 28.00 84.50 26.10 23.50 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.26 0.70 -5.79 8 18 2012-10-03 16:37:10 2012-09-14 15:30:07 1 3 16 1 3 20 1 428.50 40 52.98 NEW uEIsIshsssl.stsslslEIPPLKYNKchlhhhTQDDCp.AAashTWAAINGKPlosp................haacls.......HLptsDhP......PDhYhhs+sLusTDGsGpEVRFuhTsolus-...-pYMspcstlphGaTp-YaRFah+shlhWc-l+-hhNYGsuIAaHDVssh-h-ss.splht+aslupsl.Ihc+LsGRsCKhLuE.PNGs+sYlsAAhspDsIQTITAQuGs...hKlYPFpsstsLpK..sslpRhFhsss-.........h+psItcphu..pssE-RpAI..sIGsHs.......TDusWss...hLhWLNssYG+cGsDslWFPspEEYYEY.YhRpaopss.Kplspssl+LTVplPutcs..FYYPSlTlNLsGlp.h-hsSlpSs-sVTGLSYu......stcstlMlNlDCR+hLsEHAE+FVcpYEAs.ostus+ADAhYFVsMLK-SsKKstLhpRIp ..sEIsIchpssl.stptl.p..lEIPPLKYNKchlhhhTQDDChpuAashTWAAINGKPlSpp................haacls.......HLptsDLP......PshYshs+oLusTDGsGsEVRFuhTsTlus-...-paMspcohlp.GaTp-YaRFah+shLhWc-l+EhhNYGsuIAaHDlpsh-hcss.splhtcasluQsh.Ihc+LsGRsCKhLuE.PNGs+sYl....pAAhshDsIpTlTAQuGs...hKlYPFpsstsLpK..sslcRtFhsusc..........hKpsItc.hp...Ps.E-RpAI..sIGsHs.......TDspWss...FLhWLNcsYG+DG-DSlWFPopEEYYEY.YYRhpophs.KplsssolKLplpLPup.cs..FYYPSlTlNlpGlphh-IhSlpSs-sVTGLSYu......sacstlMlNIDCR+hLhEHAppFVcpYEAs.ost.s+ADAhYFVsMLK-SsKKstLhpRIp..... 0 2 2 3 +15276 PF15422 DUF4626 Domain of unknown function (DUF4626) Eberhardt RY re3 Jackhmmer:A1A4T8 Family \N 27.00 27.00 57.80 57.70 25.60 22.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.91 0.71 -4.22 3 11 2012-09-14 14:52:08 2012-09-14 15:52:08 1 1 8 0 2 6 0 102.50 74 87.51 NEW MTGRMATLEKSHSSACWRKRSSRTCVEPDRTQDAIHEPRGLSRSHTVLRHRHFVFLPLSSGA+PSVPPR.....shhtscssGslphssPu.hhhhps.hhpthpphhh..hL....tKQhAcscsppPRuspSlhcsAD+LsW hTGpMATLEKsHoSACWRKRSSRTCVEPDRTQDAlpEPRGLSRSHTVLpHRHFVFLPLSpGA+PSVPPR.....s.hh.c...............................................................h. 0 1 1 1 +15277 PF15423 FLYWCH_N FLYWCH-type zinc finger-containing protein Coggill P pcc Jackhmmer:Q96CP2 Family This family is the N-terminus of some FLYWCH-zinc-finger proteins, found in eukaryotes. The family is found in association with Pfam:PF04500. There are two conserved sequence motifs: EQE and QEPS. 27.00 27.00 29.90 29.80 18.80 17.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -3.88 10 49 2012-09-14 15:06:59 2012-09-14 16:06:59 1 5 20 0 23 51 0 98.40 64 23.19 NEW MPLPEPSEQEGESlKAGQEPS...scPGT-VVP..AAPRKPcEFScLVLLTsSpps.sDsssocPp-VHCVhSLEMuuPATLAs.TLQILPsEEQspllQPsPp.sEQK+SKlD ..MPLPEPSEQEGESVKAGQEPS...PcPGTDVVP..AAPR...K..PRcFSKLVLLTASpps.tcssGuK.ptVHClhSLthsGPATLAp.sL.hl.sEtQ.RslpsuPptPEQKRSK.D..... 0 2 2 2 +15278 PF15424 ODAM Odontogenic ameloblast-associated family Eberhardt RY re3 Jackhmmer:A1E959 Family \N 27.00 27.00 43.60 43.60 19.10 18.50 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.82 0.70 -4.90 12 37 2012-09-14 15:07:17 2012-09-14 16:07:17 1 1 24 0 16 27 0 246.60 65 94.37 NEW APLIPQRLlSASNSNELLLNLNNuQLh....PLQlQ.GPhNSWIPPFoGlLQp.QQQAQIPGLoQFSLsoL-.pFAGLhPNQlsFPGQsuFAQGsQssQlDPSQ.QTPsQTQQGPspVMPYVhSFKhPQEQuQMhQYYPVYMLLPWEQs.QTsspSP.QTGp.Q.F...EEQlPhYspFGYIPQQsEPshPGGQQQhsFDP.hlGTAPEhAsMPsttVlPYLQKEhINF+HssAGlahPSTS..KPSTsshFTSulDP..TIsPchhEcKAKTDSL+EP ......APLIPQRLhSASNSNELLLNLNNuQLh....PLphQ.GPhNSWIPPFSGlLQQ...QQQAQIPGLoQFSLSsLD.pFAGLhPNQIPFPGQsSFAQGsQAGQhDPSQ.QTPsQTQ.GPs.p.VMPYVFSFKMPQEQuQMhQYYPVYMLLPWEQPQQTlspSP.QTtQ.Q.a....EEQlPFYsQFGYIPQ.AEPslPGGQQQLAFDP.hLGTAPEhAlMssGt.lPYLQKEsINF+HssAGlFhPSTS.sKPSTTNsFTSAlD...TITPcl.EcKsKTDuL+EP..... 0 1 1 2 +15279 PF15425 DUF4627 Domain of unknown function (DUF4627) Coggill P pcc Jackhmmer:217257 Family This family of proteins is found in bacteria. Proteins in this family are approximately 230 amino acids in length. There is a conserved WYK sequence motif. 21.80 21.80 23.80 23.40 20.30 19.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.64 0.70 -4.82 3 20 2012-09-14 15:08:15 2012-09-14 16:08:15 1 1 20 1 2 15 0 213.20 76 92.30 NEW sGlpAQNLIKNscFD.T-LsTEhTsAspsTsG-WFAhNDEssGsTTISactTsDcK+GNAlcl..SussKssSWYKAFLGQRlpsGlEKGlYsLoFaAKAc-sGsQV+VFIR.....+ssNGKssspFFMRcsaDh..-SQPNpSuApYstsIKKA...GKWTKVolsFDFGKVVNAIuShKu..Ncst-VT-TDsssAhLKDFsIsIQ.SQoKsSsVLIDsVSLKK ....VSVSAQNLIKNEKFA.TEVKTKVTNANKATAGEWFIMNNEADGVTTIAWEpTGD..AKYPNAMKLDNSGAEKNlSWYKAFLGQRITDGL-KGIYVLTFYAKAKEAGTPVSVYIKQTNEEKNDNGKYNTTFFMRRDYDA..DAQPNASGAQYNFKIKDA...GKWTKVVVYYDMGQVVNAISSKKA..NANLEVSDTDDDAAILKDCYVAIL.SQNKGGVVEISDVTLKK....................................... 0 1 1 2 +15281 PF15427 S100PBPR S100P-binding protein Coggill P pcc Jackhmmer:Q96BU1 Family S100PBPR is a family of proteins found in eukaryotes, and localised to cell nuclei where S100P is also present, and the two proteins co-immunoprecipitate. S100P is a member of the S100 family of calcium-binding proteins and there have been several recent reports of its over-expression in pancreatic ductal adenocarcinoma. In situ hybridisation shows S100PBPR transcripts to be found in islet cells but not duct cells of the healthy pancreas. An interaction between S100P and S100PBPR may be involved in early pancreatic cancer. 27.00 26.40 48.50 26.40 26.00 25.70 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.44 0.70 -5.39 9 68 2012-09-14 16:09:37 2012-09-14 17:09:37 1 3 29 0 29 67 0 252.40 51 84.12 NEW NAPF.SWu..SL..DEDtLDDSLLELS-GEEDDGHFSaTEEEIpELLKDDD.SsEpp.ht..hhcDD..uscsEKGE+tSQILLDTPQEKNSLYSLGPsAETPulaKLPQLSTSlGHGPoPoKsLNR+FsLEKNLIKlT.V.APFsPTVCDslLDKDKTD....SSKDTEK.......sSSLGE-hRE-sLusNESKLCTESEsISPsNSAW-GPshsS.SNssFpQTVSDKNhP-SK+PTPVFSQI.DHSEoPs.su.Sh+NuGSHKSusEhR.PVVSSSSp.K.csLDKDSGKhKGpERRLGKVIPVLQsKsRTNVsTFSQS-LEQQKQIYL+sVIAHIEDPhDSNQGsLGELhALMDQV....HHMQN.+WQHPSDLTMRNYARFRQKPLQRYSLTQWVDRNhRSHHRFQRL ......................................................................................................................................ssh.SWs..o...DEDtLDDSLLEhS-GEEDDGchsaTEEEIp.LLK-D................t..st...p.Eht.+tSpIh.-sPpE.N..S.hSLGPsAETs.shhKLPQLsss.upt.......sR..s.EKshlKlT.V.sPFpshlhDshLsKcch-....SSK-hpp........usht-phpcts.t.sptp.hot....pst...........................................................p.stthp..hspospt.p..hhsh.....s.tp..........h.pp+hupl.sh...sttR......hSpspLEpp+.p.YlppVhtHlpp..t.pps..hEL.sLhsp.....h..p....p..hQHPSDhThRNY.h.ppp.h..hSLppW....................................... 0 1 3 6 +15283 PF15428 Imm14 Immunity protein 14 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with mostly all-beta fold and several conserved hydrophobic residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI1 or Tox-HNH family [1]. The protein is also found heterogeneous polyimmunity loci. 23.30 23.30 23.30 23.40 23.10 22.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.90 0.71 -3.74 70 172 2012-09-18 20:10:21 2012-09-18 22:06:20 1 3 152 0 38 165 2 121.50 18 51.88 NEW ..GDlFslsLs..sspauhGpll...shtths+............hhshhhsps.......p.hph.hpphp.shs..lcth.....hh..hhh.hcpshhpGc.....Wc.lIGppshtptptt........slhahh..shs................hpthp.h....sh...ph..p........t..... ........GslFshsl...tspauaGhll...shttht+............hhshhhshs.......p.hph.hsp.p.sl.s..lp.l...t.hh.sthh.hcphhhpGc.....a..IIGphshtptch.........sh.a.h..shs..............htthpth....s..ph..p......t.......................................... 0 15 24 26 +15284 PF15429 DUF4628 Domain of unknown function (DUF4628) Eberhardt RY re3 Jackhmmer:A1L170 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 152 and 673 amino acids in length. 26.00 26.00 27.60 26.20 19.50 19.10 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.04 0.70 -4.97 10 42 2012-09-19 13:37:54 2012-09-19 14:37:54 1 2 32 0 24 34 0 220.80 60 57.31 NEW MFENhNsApsPKLQpS+ShspLo+ssususu.shGssEPuGPulhsGSSQHLK...NLGKAVGAKVNDFLRRKEPuuLGs.lGVhEVNKoAuApLuuusssspsshh....psctSh..EuFPRLDPPPPss+KRTPRALKTTQDMLISuQPVlSShEtuppsssupspco.spsps....sptsstpsE.ussppttpscsLsNG...................EsoL.SVPDLIHK........Dup--s+l+so-sR+uSoPs.hEpsGlKlSLS.hsLtE...psuuPsspuRTsSLDsEGPHPDLLSFE .................MFEN.Nss.sPKLQtS+ShspLo+ssusssh.s.ussEPuGPulhsGSSQHLK...NLGKAhGAKVNDFLRRKEPouLGu.VGVhElNKTAuApLuuGscsssssh.....pscpSs.pEuFPRLDPPPPlT+KRTPRALKTTQDMLISSQPVLSShEhusp.ssupsp-o.stsps.......t....c.sshth.tpscsl.NG...................E.sL.SlPDLIHK........-sp-.shhp.sp.R+sSSss.hcp.uhK.SlS.hpLhE...psssss.tsRssSlDsEuPHPDLLSFE...................... 0 3 5 10 +15285 PF15430 SVWC Single domain von Willebrand factor type C Coggill P pcc Jackhmmer:P0C5F3 Family SVWC is a family of single-domain von Willebrand factor type C proteins from lower eukaryotes. The canonical pattern of most von Willebrand factor type C (VWC) domains is of ten cysteines, however this family, largely but not exclusively of arthropod proteins, contains only eight. SVWC family proteins respond to environmental challenges, such as bacterial infection and nutritional status. They also are involved in anti-viral immunity, and all of these functions seem linked to SVWC expression being induced by Dicer2. 27.00 27.00 27.30 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.59 0.72 -3.53 127 301 2012-09-19 13:48:04 2012-09-19 14:48:04 1 5 52 0 121 307 0 67.30 24 53.12 NEW Ch..a.p..sp..hlss.Gtt.....hp.....pp..sCpphpCp.....pp..tplp..lpsCsth..s....st..sC...phtp...sssts.a.PpCC...phhC ........................C.hp..st...hltsGpp.....hp......ps...CtphpCp......pstplp..lpsCshh..s......sssC...chts.......shsts.a.PcCCs..phhC........ 0 34 41 86 +15286 PF15431 TMEM190 Transmembrane protein 190 Coggill P pcc Jackhmmer:Q8WZ59 Family \N 25.00 25.00 89.10 80.50 23.90 21.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.72 0.71 -4.23 3 18 2012-10-03 17:26:12 2012-09-19 14:49:08 1 1 17 0 10 16 0 125.60 72 76.72 NEW GNGIQGFFYPWSCEGDVWDREACGGQAAIENPNLCLRLRCCYQEGICYHQRPDENMRKKHLWTLSLTCuGLLhLIFLICFFWWAKRRGLhKpLKMPGhLS+hKKsKlSRsVSpoSssphoLpcKcpSPLLus.sG ......GNGIQGFFYPWSCEGDVWDRESCGGQAAIEsPNLCLRLRCCYRDGVCYHQRPDENMRRKHMWALGWTCGGLLhLIsSICLFWWAKRRDh...LHhPGFLtG.+CD.LS+oVSLLSKcRGTtcp...........susss....... 0 1 1 1 +15287 PF15432 Sec-ASP3 Accessory Sec secretory system ASP3 Coggill P pcc Jackhmmer:Q9AET7 Family Sec-ASP3 is family of bacterial proteins involved in the Sec secretory system. The family forms part of the accessory SecA2/SecY2 system specifically required to export GspB, a serine-rich repeat cell-wall glycoprotein adhesin encoded upstream in the same operon. 27.00 27.00 39.30 39.30 26.00 21.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.50 0.71 -4.18 38 390 2012-09-19 13:50:34 2012-09-19 14:50:34 1 2 375 0 22 205 2 123.30 46 50.80 NEW oYhYGSplpapspspVpFpNsLhPSGphI+sWtStsNaQu.sRtsPsLPLL++GppYclphsh-sp..Ppsulal+lhFaD+hscplpphlh+spp..hpFsYPccAYsYpIpLlsAGhpplsF+plpIp-hp ......TaMYGSpVsaps.scVpFhNPLMPSGhsIHpWh.hppFpp..h+.osPsLPlL+RGpcYpLphsF-sp..PtsoVYhhIhFas+pupcLuppIlKsps..hshpYP-EAYuYclphhsAuspSLhF+sloIpEh.s....... 0 7 8 18 +15288 PF15433 MRP-S31 Mitochondrial 28S ribosomal protein S31 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q92665 Family MRP-S31 is the mitochondrial 28S ribosomal subunit S31. This family of proteins is found in eukaryotes. Proteins in this family are typically between 246 and 395 amino acids in length. There are two conserved sequence motifs: RHFMELV and GLSKN. 27.00 27.00 30.40 30.40 20.30 25.20 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.18 0.70 -4.99 29 122 2012-09-19 13:52:14 2012-09-19 14:52:14 1 6 87 0 76 120 0 254.30 40 76.53 NEW spccLhslltuMKl-.lssp......p.h......pps.c..tpp.h.cshpsssuhhp+spp-us...p.ps-slss-LlsAApsVAsoLs...s-+p.pTESELLppLhpHcspspspppt....................pppcls.......hu.slIusMK.......ls+ssst+...hss...................Rssp...pIpFc-p.tcshs....pcc........c.stsh+p..........chsLFsG+pLsIFssts....cpsspstst.olW-h.ht+pLuhsspQPPtNtFEEMIQWTcpGKLWcFPIsNEtGh--E.ss.cFpEHIFL-+HLE.sa.P+pGPIRHFMELVssGLSKNPYLoVcpKhEHItWFR-YFppKcDlLKEs .........................................................................................................................................................................s...................................................tt...t.t.hpct.t.t.......tc..shs.plltAspslAs.Ls.....cpp.p...scSELLtpL.t+pptststttt.............................................................................tt.ph..p.......hp.pll.sshK.......ls+ptt.p....t.........................................p.p........p.phcpt.pt......tpc...................p.t.hpp.....................................phslap..u..c...Ls..IFp.tt.....t.....s.st.shWsh..t+...pLsh.hs.........p.ps..s.tNtFEEhIpWTcpGKlWcFPIsNEt.Gh---.ss.pFpEHlFL-cHLE..sa...P+pGPIRHFMELVssGLSKNPYloVppKh-HItWFRsYFppKc-lLp-........................... 0 27 33 55 +15289 PF15434 FAM104 Family 104 Coggill P pcc Jackhmmer:Q969W3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 113 and 185 amino acids in length. There is a conserved SLQ sequence motif. 27.00 27.00 43.40 43.30 25.20 25.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.57 0.72 -3.86 10 68 2012-09-19 13:53:57 2012-09-19 14:53:57 1 1 32 0 30 93 0 113.20 66 80.69 NEW RKRRR............sGsEEDsalsPQsKRss......+NslhQDsaDTE.....................SSSSDstp.......SSSuINSP-RA...........SG...PEsSLNQhssssusNh.PQ.hp.EpSAlC..QGPYsHINQlLKEAHFaSLQpRGRP.PT ........................RKRRR.NGsEEDNHlsPQoKRSS......RNPlFQDSWDTE.....................SSuSDSGG......SSSSSINSPDRA..............SG...PEuSLsphhsGSuPNT.PQsh..EQSALC..QGsYFHINQTLKEAHFHSLQHRGRPsT.... 0 2 4 9 +15290 PF15435 UNC119_bdg UNC119-binding protein C5orf30 homologue Coggill P pcc Jackhmmer:Q96GV9 Family UNC119_bdg is a family of eukaryotic proteins that probably plays a role in trafficking of proteins, via interaction with unc119 family cargo adapters. The family may play a role in ciliary membrane localisation. 27.00 27.00 221.20 221.00 18.30 17.10 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.26 0.71 -4.96 5 39 2012-09-19 13:59:18 2012-09-19 14:59:18 1 1 37 0 25 31 0 196.10 83 95.14 NEW MDINGsSRoslSlLP..AAElpSTLKPEAEKPRCSSTPCSPIRuTVSGYQILHMDSNYLVGFTTGEELLKLAQKWSsG-ssKu...EAhPS....slsKsVDlGLHRSSRIYKuKSRYYQPYDIPAVNGRRRRRMPSSGDoCpKSl.Pa.EPtKALHGPLPLCLLKGKRAaSKSLDYLNLDKMsIKESuDTEVLQYQLQHLTLRGERV .........VDINGESRSsLoTL..PhPsAEssSPGKAEAEKPRCSSTPCSPMRRTVSGYQILHMDSNYLVGFTTGEELLKLAQKCTGGEESKG...E.Ah.Po....LRSKQLDuGLARSSRLYKTRSRYYQPYEIPAVNGRRRRRMPSSGDKCTKSL.PY.EPYK....ALHGPLPLCLLKGKRAHSKSLDYLNLDKMsIKEPADTEVLQYQLQHLTLRGDRV 0 1 3 10 +15291 PF15436 PGBA_N Plasminogen-binding protein pgbA N-terminal Coggill P pcc Jackhmmer:O25249 Family PGBA_N is an N-terminal family of bacterial proteins that bind plasminogen. This activity was identified in In Helicobacter pylori where it is thought to contribute to the virulence of this bacterium. Both PgbA and PgbB are surface-exposed proteins that mediate binding to plasminogen such that it can be converted into plasmin in the presence of a Pg activator. 27.00 27.00 69.90 69.60 23.00 22.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.48 0.70 -5.07 27 196 2012-09-19 14:39:47 2012-09-19 15:39:47 1 5 194 0 21 117 2 213.50 51 74.30 NEW hpslcsslhtV-stt.sh.lphpu.clpVGpSGhVlpph.ss.pssIlApAsVhph..p..sGhApl+apsF-sLpQcALPpPphhPptGDcllLsahYsRullIAPsp-hYpplpssas.slpFlHsDlhuuaL...ss....sPp+cDF++hCst.ulGLlalshssch.hlDCpSFplLp.pphs.ts.spphphPFYSRlpsI-sshas......hsu.pchpsY.sYYcpLl ....................l.+PlKscLlcVDDhh..Ga.I+.DSsDIKlsSSGVVlp+F.ssspSIIARAuVIsK..c..sGlAKLcFoVFsuLKQ-ALPLPsllP+tGDEVVLNFLYDRuLlIAPD-pTYsclstsFP.pIhFsH.DlhGApLhhsss.h.uPKRuDFRcFCs-sAVGlLhlAL-NpupllDCQsFshL..hEls.lScsossQlPFYSRIuGh+osFFD......FNS.pclsNYYcYYDALl................... 0 5 17 21 +15292 PF15437 PGBA_C Plasminogen-binding protein pgbA C-terminal Coggill P pcc Jackhmmer:O25249 Family PGBA_C is an C-terminal family of bacterial proteins that bind plasminogen. This activity was identified in Helicobacter pylori where it is thought to contribute to the virulence of this bacterium. Both PgbA and PgbB are surface-exposed proteins that mediate binding to plasminogen such that it can be converted into plasmin in the presence of a plasminogen activator. 25.00 25.00 27.10 25.60 21.50 20.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.31 0.72 -3.85 7 118 2012-09-19 15:07:48 2012-09-19 16:07:48 1 5 44 0 3 126 0 85.50 60 25.41 NEW hcEssspsNuo.....ppptpcN.....APs.KEsNA.KEtsKLssKEEKRRLKEEKKKAKAEQRAREFEQRAREHQERDEKELEERRKALEhNKK .................................t......ppspts.....ppptscN.....APs.cEsNspKtEpKLsuKEEKRRLKEEKKKAKAEQRAREFEQRAKEHQERDEKELEERRKALEhsKK.... 0 3 3 3 +15293 PF15438 Phyto-Amp Antigenic membrane protein of phytoplasma Coggill P pcc Jackhmmer:Q7M1T6 Family Phyto-Amp is a family of phytopathogenic wall-less bacterial antigenic membrane proteins [1]. The bacteria are limited to the phloem and pose a major threat to agriculture worldwide. They are transmitted in a persistent, propagative manner by phloem-sucking Hemipteran insects. Phytoplasma membrane proteins are in direct contact with hosts and are assumed to be involved in determining vector specificity. Phyto-Amp is thought to be one family of proteins that mediates such specificity. The proteins appear to be encoded by circular extrachromosomal elements, at least one of which is a plasmid [2]. 27.00 27.00 48.60 47.80 21.50 20.40 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.26 0.71 -4.67 3 24 2012-09-19 15:22:09 2012-09-19 16:22:09 1 1 22 0 3 23 0 179.30 70 82.31 NEW MQNQKTQKSLVAKVLVLFAAVALMFVGVQVFADsPLDLSTLcC-ssLELTANDASDAEKVVsQWKVQNTSLDKKVTKDSVKVtVADNKVTVTPVDuSATSALoGSKVLoLVGVCELNKLTLGTDKKLTLTVKDDKVDAEAGLKALKEAGAKVPATVTKDDLTFTVGKGDDANKVTVKAVDGKTTVSGQVTFEFNV MQNQKsQKSLVAKVLVLFA...AVALMFVGVQVFA.DDKLDLuTLEC.KssLELTAA.DAuDAEKVVKQWKV..QNT..SLsAKVTKDSVKVsV....AD.NKVTVTPADuDAuKALoGSKlLsLVGVCELNKLTLGTDKKLTLTVKDsKVDAEAGLKALKEAGAKVPATVsKDDlTFTVGKsDsANKVTVKAVDGKTTVSGpVsFEFsV...... 0 1 1 2 +15294 PF15439 NYAP_N Neuronal tyrosine-phosphorylated phosphoinositide-3-kinase adapter Coggill P pcc Jackhmmer:Q6ZVC0 Family NYAP_N is an N-terminal family of eukaryotic proteins that are substrates of tyrosine kinase in the brain. When first identified, the family members were referred to as unconventional myosin XVI, or Myr 8 [1]. However, proteins have now been identified as being integrally involved in neuronal function and morphogenesis. The family is involved in both the activation of phosphoinositide 3-kinase (PI3K) and the recruitment of the downstream effector WAVE complex to the close vicinity of PI3K; it also appears to regulate the brain size and neurite outgrowth in mice [2]. 27.00 27.00 137.40 34.80 20.80 19.70 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.86 0.70 -5.28 20 174 2012-09-19 15:53:14 2012-09-19 16:53:14 1 16 37 0 86 128 0 321.70 41 37.94 NEW AREsDRlRschstshhpcphp.t...stpp-........ssK.tp-tsuht.............RHFR...ushshPhu.Dplsps......su.u.uhRS.SLHSVhSh.D-uuuhs...........SsRKQPPPKPKRDPsTRLSAShEA...........Vu..AsLss......usKcuupt.....hs+PRPHS...D-..ohKKIPP.KPKRSPNT+LSuSaEElsuts...P...u.....spsu.ss...........sht.ph.spsspsssp....................pp.cs.E-EPVYIEMVGssh+stu.........................scuss-puEAVYEEMKY.hs.E-ss..sts...shssuS.Pshpsp..p.hs..h...psth.sttpus..ppssCDIPsPFPNLLsHRPPLLVFPPsPsTCSPsSDESPLTPLEVpKLPVLEs.............s.pashQssuSSP.Ss.Qhstsp.Kusss....Psus.shsshss..usRspSpuTPhP.Pp ...........................................................................t+Ep-thRpp.p.sh.tt..t.....th.t...........sh+.ht..-.tsht.............+Hh+h..u.hohPs.s.-p....h.....sps...........ss.u.usRS.Sh..HSVtuh.Dss.uuhs...........tsp+.pPPsKP+RcPsT+LShS.cs........................................ss..uphss...................................spptssp..........+PpPcu...c-....+KlPP.KPKRsPNTpLSsSa-E.hh.t.......s.........stt...s...sh...........sht.t.....upsss........................sp--EPVYIEMVGshhRsht..........................tpps-puE.ulYEEMKYsl..--hs.......t....s..t...hss...........sS.....s.p.p..................s.h.t...ss..pt..C-IPPPFPNLL.HRPPLLsFPsusspsSssuDpsPlsPlpVp+LPVLp..............s.pa.c.pssuuoP.u......phstppptps.........tsshhh.ss....ssps.opusP.Ps.s......................................... 0 3 11 30 +15295 PF15440 THRAP3_BCLAF1 THRAP3/BCLAF1 family Eberhardt RY re3 Jackhmmer:A2AJT9 Family This family includes thyroid hormone receptor-associated protein 3 (THRAP3), which is a spliceosome component and a subunit of the TRAP complex which plays a role in pre-mRNA splicing and in mRNA decay [1]. It also includes the transcriptional repressor Bcl-2-associated transcription factor 1 (BCLAF1) [2]. 27.00 27.00 27.30 27.30 25.60 25.60 hmmbuild -o /dev/null HMM SEED 646 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.67 0.70 -13.46 0.70 -5.86 22 245 2012-09-20 09:01:36 2012-09-20 10:01:36 1 8 42 0 94 181 0 501.30 34 75.82 NEW uRSRSRSP..K+RSh.SPtsR.spp+..acp+pSps+h.ssp.......R.....+csppss.phR.hcs-K.pspscp+hssctsst..h+ph..E..c+ususshc.tsh-shhs.ps.p..................p.sh.Pt...tsD.sspRuph.Pphsps.sshccppps.......shs.psps+hh.sshp.ptstpst+ssp.........RSh......usS......+apsp............h+.........psphc.tpph..pppphspS..+cu....s--hpsRoSFpK..RYP..E..Dp-hcphs+ppcRsp..DhE+h.........c.....s+cssR.....ssch......c.cc.uhss...hp-cc...cphshu.psc.RhstR-h.-p.....ups..sK......ohDYc+K++p.hs..cs-pcFsDs........csp+hsccEDpKappptt...s.hs.+.EussFs......ssRsccoc-tps........+c.s.K...scKcshsps....p..ssps-lchcssppKhcp+hc..pp....shR+p.ssspppphspspch..sthKsSssph+c.....cplslKlDst..shDphR..suuShssERphu+DLVtsu+KpppF+slF-HlcS.sQs.pppPotpFsQcIlTIlHpVKtpYFsSsshTLpERFo+hp...css..p-s.+pppsPEIHRRIDhS.upLpp+pph......hpE.......oppshhK........h...DPsDLRHDIERRRKcRhpsc-c+thp......hsSuspRspppS..phpph.ps.cs-tFpKss+hh.+.sphRph.ppPpps........oppppcshpp...ctathcsppcs..cp..ppsFc ..................................................................................................uRSRSRSP..K+RS..S.tsR.spp+..p..+pS.....p..s....ppp.....p.......sp.ss.p.R.hpppK.ptptt.t.........s.ptps....tsh.p...ppt.psp.p..s.ts..php..p..................t.t..st.....ths.sp.pu....s..tt..s.pppp.p.......s.s....s.....s..s..p.t..tsh..tts..p....................puh..................ttu.................phpsp......................p............ps.hp....h..p.tph.ts..pt..............pt.tttuua.........Y...-.....-pcht.......phtch.p+.p..-..cp..................p+tshp.....p.th......................t.c.s.........tc.pp...ths..st+.t..+p.t..pp......sph..tc........p...D.pphcp.t..hh...tpptc...st.............t..p..s.tts.+.p.t.t..........hp.h.E..hht..........ptct.chpp.t.....................tt+pp.t.tt....p..s....h..ph..........ph.......st........c.p..p.......hp...s.s.pp.h....s.ttp.....s.hc....S......h+..........tp.pV+...hc.h..s.D..p...............su..hsp-R.hspsLVtss+Kpp.pF+slFpHlp..sQs.ppssSc.FhQcIlollH.pVK...tpaF.SsshTLpERFoph.......+usppc...p.ppsPEIHR..R.IDl.S.ushpp+.thh......htE........ppts.K.............h.tDssDLRhDI-RR+K....c+.pp...cp.hp......htSs.s...pRppppo..p.pp......cscthpK..p...p...........tp...s...........p.................................................................................. 0 3 12 34 +15296 PF15441 ARHGEF5_35 Rho guanine nucleotide exchange factor 5/35 Eberhardt RY re3 Jackhmmer:A5YM69 Family This family includes Rho guanine nucleotide exchange factor 5 [1] and Rho guanine nucleotide exchange factor 35. 27.00 27.00 69.70 36.70 23.10 18.80 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.70 0.70 -5.69 11 32 2012-09-20 09:25:48 2012-09-20 10:25:48 1 5 22 0 18 32 0 418.10 51 33.63 NEW MEAEEPpHGASTPlPAlcEhullPEulMRSSQIPAL-PEAQEGpDPSapWsEGHRLltsQp+-LRDssDaAs-Shs.FPK.EuSsssEosQEsLVAEAsD.TPEpQEuVsQSLsDcpARTlAsPEhhACPlQuEaLDh.sshSocLsoR.VEsEhpPELTSLsLusupA-E..cEEsSPssSspstaaPsCc.cHPuETsps.csutussl+QGEcLQhctsQES...p...G...L...LpPQEAQGLEEQttQEsthQtEGTlpEslCsDGLhGE.p.QhsEp.ssGsEtEQ+QKQEQlQ.D.sh.LG+QGEppGLs..GELEGLssuEhs.EppEpcspspcssEptEppcpEhpuPEEpcssoQstEsQSLlEKSEcVotKQEscGlp...GcshsVEGQE.......E.......EEPGsWDuushushscppsspEpcE+cGPSsL.AhVAPEVsSPsDLFP-sShPhopIPGTQpEP..tAEELSPtALsPsLEPscWSaQPlS.PuSFPstESLDscT ..MEAEEsQHGASsPIsAltEhSlIPEA.MRSSQlsALs.EAQEscDPSYKWpEtHRL.tTQQp-LRDVsDaAhEoMsSFPK.EuSu-VEssQEslVAEssD.TPEp.EAsPQSLAsRQARTlAsPELhACPlQuE+LDh.sshSS-LGSc...EVEFhPtLTSLsLGouQAEE..cEEoSsDsSuQTpaassC-.-HPsETsQs.csutSGolRQGEEL..c-hQESp...GLlpP..QEsQsLEEQGpQE.sshptEGTLtEslCuDGLLGE.p..QMhEQ.lNspcGEQ+QKQEQlQ.DshLG+QGER.tLs..sE.EGLNsuEhtpEphEpcspsQts.cptEtpcpEhphPEEpcssuQspcsQ.ohhtKSEcVotKQEspGhp...tcshslpsQE.t.......cE.tshDusthss.s..ttpstEEtpEHcGPsh..s.lAPEsss.sDlFPsssh..ho.pI..PtTQpEs..pAEELoPtA.sPsLEPhthSaQPhS..uSFsstES.DpE.......................................................................... 0 2 2 3 +15297 PF15442 DUF4629 Domain of unknown function (DUF4629) Eberhardt RY re3 Jackhmmer:A6NCI8 Family This domain family is found in eukaryotes, and is approximately 150 amino acids in length. There are two conserved sequence motifs: MHML and LGKK. 27.00 27.00 44.60 42.40 19.10 18.20 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.08 26 89 2012-09-20 12:35:51 2012-09-20 13:35:51 1 2 25 0 50 72 0 142.30 49 19.49 NEW usS-psRKNKHKASE.lsuAPcAKIQscs.-sLltGEss.lssAsuS-+AstshAK+pssKs.KsAsuRss..ps+upGQ-+s++spENsoKKstEpKQSss+V.KAEEKPsIPKpKRK+s.PELSQEsFKKPRosLGMHMLESVQVFHsLGKKs ..........s.SDQsRKsKHKASEPlpGAPcAKIQscss-sLltGEss.lssAsuS-+AssNhAK+sssKspKuAuSRss...psKupGQE+sKpsp-NsSKKspEpKQSss+V.KuEEKssIPphKRK+s.PELoQEoFKKPRosLGMHMLESVQVFHsLGKK.......................... 0 1 1 5 +15298 PF15443 DUF4630 Domain of unknown function (DUF4630) Eberhardt RY re3 Jackhmmer:A6NCS6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 124 and 286 amino acids in length. 25.00 25.00 25.50 25.50 24.00 24.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -10.98 0.71 -4.41 6 29 2012-09-20 12:49:22 2012-09-20 13:49:22 1 1 25 0 18 23 0 138.90 47 63.95 NEW GA..ALVGVLVAEAuPEDAVAP....tLRLLEALLRsVFGRQAGG....PVQAAAYsPGpPA.SSLAVQsAACRALQAAGPu+PAEGAWERPGLPuLLACFSWGPWS.RtKs.ssous.suPsQschQ-sEEELALTulaPNGDCED.sG+GStApDGlsp.sPs-PsGDo ....................................hlGlhhh.st.c-u.su........tlplLEuLLRsVF..G+psGG....sVQAAsYsPGpPA.SsLuVQtAACRALQAAGsucPs-u..AhERPuLPuLLsCFSWGPhp..++Kspsssus.psPup-shQ-sEEELALTul.aPNGDCED.hGpGocApDGshH.sPs-PsG-................ 0 1 1 5 +15299 PF15444 TMEM247 Transmembrane protein 247 Eberhardt RY re3 Jackhmmer:A6NEH6 Family This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 197 and 222 amino acids in length. The function of this family is unknown. 27.00 27.00 30.50 30.40 22.00 22.00 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.58 0.70 -4.52 7 30 2012-09-20 13:25:11 2012-09-20 14:25:11 1 2 23 0 16 41 0 198.90 71 95.41 NEW MAsEDREhMEuRGAGESCPThPKhVPsDshsEGKP+ApLEAEs.KPDSSYDaLEEhtsCEDGuCsGPPK........s.sstuuPs.TKGQAGDGP..EssELsh........sPGsEHssEMELEKlRMEFELTRLKYLHEENERQRQHEpVMEQLQ.....pQAss................FSGGLQDLLLPQNQFAMFLYCFIFIHIIYVTKEMlFFLFSKHYLFCIAAILLCLIKTLWS ..........MAsEDREMMEARGAGESCPTFPKhVPuDshSEGKPRAsL...EAES.KPDSSYDaLE..EMEsCEDGGCsGPsK........SLSsKusPs.TKGQAGDGP..cs.uELP.........sPGT...E+.NsEMELEKVRMEFELTRLKYLHEENERQRQHE.VMEQLQ....pptp.....p....l.p......h.s..FSGGLQDLLLPQNQFAMFLYCFIFIHIIYVTKEMVFFLFoKHYLFCIAAILLCLIKThWS............ 0 1 1 2 +15300 PF15445 ATS acidic terminal segments, variant surface antigen of PfEMP1 Rask T, Coggill P pcc Rask T, [1] Domain ATS is the intracellular and relatively conserved acidic terminal segment of the Plasmodium falciparum erythrocyte membrane protein-1 (PfEMP1) [1]. this domain appears to be present in all variants of the highly polymorphic PfEMP1 proteins. 27.00 27.00 29.60 29.10 21.30 20.80 hmmbuild -o /dev/null HMM SEED 437 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.78 0.70 -5.37 35 168 2012-09-20 15:35:02 2012-09-20 16:35:02 1 33 2 0 22 173 0 348.00 56 19.28 NEW STlsaulGluhuohsahaLK...KKTKosV.sLFplLpIPKuDYsIPThKSsNRYIPYsSs+YKGKTYIYMEGD....ouuD-.KYsh.scooDIT.SSESEYEElDINDIYVPuSPKYKTLIEVVLEP.................................o.p........sTQNDIssDsl.PSs..................lTDsEWNpLKcDF......ISphLQsp..Ns.P.pshhssslshNTpP..Th.ccsh............EKPF......IhSIH.......DRNLYoGEE....YoYNl....sM........STNohs............D.shsssN..........................sl.......YSGIDLINDoL..Gsp.lDIYDElLKRKENE....LFGT.NHsKp.TSspSVAKpT.sSDPIhNQLsLFHKWLDRH...........RDMCEKW...NNKEElL-KLKEEWpp-sp........SuNh............................................................spN......................+sLNTDVSIQIcMDNPKPhNEF............oshDo.sptssMDoI..L-DLEK.aNEPYa...hpDD.IYYDVNs-cssosps...................................hspNs.hDV...PoKVpIEMcl.s..p..cllcEcYPIuDVWsI ........................................Tl.aululuhu.uhsahalK..KKs.KosV.sLhpllpIPKuDYsIPThhSsNRYIPYsSspY+GKpYIYhEGD....pusDp.tYh..schoDIT.SSESEYEEhDINDIYsPtuPKYKTLIEVVLEP.................................o.s...........c.T.p.NDI.sDsh.Pos.................phTDsEWNpLKc-F......ISphLQsp..ps.P..shhpssls.NTps..Th.hcs..............-EKPF......IhSIH.......DRsLYoGEE...hsYsh.............sNs.p...........................p..h.tpN....................................YSGIDLINDsL.sGs..hDIYDEhLKRKENE......LFGT.pasKp......To.psVuK.s.psDPIhN.QlpLhHp.WLD....RH...........RsMCEpa...pscp-hLsKLpEpWpp-sp.............ussh..................................................stN.........psLNTDVSIpIcMDpsKshpph............o.........sMDshhps.hc..................sD.IYaDVp.p.t..........................................spN..h-h...PpKlplEMpl.s.......t.hc.paPI.......................... 0 22 22 22 +15301 PF15446 zf-PHD-like PHD/FYVE-zinc-finger like domain Wood V, Coggill P pcc Pfam-B_5236 (release 26.0) Domain This family appears to be a combination domain of several consecutive zinc-binding regions. 27.00 27.00 27.20 27.80 26.20 25.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.84 0.71 -11.46 0.71 -4.98 28 70 2012-10-03 17:27:21 2012-09-20 16:37:26 1 6 70 0 57 73 0 166.90 44 10.80 NEW sCcoC...usstp+G..sLlaCQGCosSYHKsCLGsRSsR-HhVTKVGs-sFVLQCRhClGlh+cKDstAP+putCpsC+ppGhuCtPFpp+pTs+QEpplR--NuGsDPITsV-ssL..lNNs-NVLFRCs....pC+RuWHhcHLPshup.s.sssssps..........p.h..pppRhcEYShcWpC+-Ct .................................CcsC...uss.p.+G..sLlaCQGCosuYHpsCLGsRusR-HLVTKVup...-pFVLQCRhClGhs+pKDspAP+putCptCpp.Gt..hscPh.Rp+hTs+QEpplR--NuGtDPITsV-.sL..lNN.....s-NV.....LFRCs....sC+RuWH.hcHLPshspss.t....t.p.p...........p.htppRhpEYohcWpCc-C.......................................... 0 10 27 45 +15302 PF15447 NTS N-terminal segments of PfEMP1 Rask T, Coggill P pcc Rask T, [1] Domain NTS, the N-terminal segment, is the most variable part of the variant surface antigen family of Plasmodium falciparum, the erythrocyte membrane protein-1 (PfEMP1) proteins. PfEMP1 is an important target for protective immunity and is implicated in the pathology of malaria through its ability to adhere to host endothelial receptors [1]. A structural and functional study of the N-terminal domain of PfEMP1 from the VarO variant comprising the N-terminal segment (NTS) and the first DBL domain (DBL1α1), shows this region is directly implicated in rosetting. NTS, previously thought to be a structurally independent component of PfEMP1, forms an integral part of the DBL1α domain that is found to be the important heparin-binding site [2]. This family is closely associated with PFEMP, Pfam:PF03011, and Duffy_binding, Pfam:PF05424. 25.00 25.00 25.40 25.20 22.90 22.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.49 0.72 -3.56 205 318 2012-09-24 07:58:16 2012-09-20 17:12:31 1 36 2 2 14 324 0 37.30 39 3.36 NEW sAKclLDpIGcpVa.cc.l+...p.........c.....A..cpY.ps.pLKGsLspApa ..SA+slL-pIGcplpcc.scp.........c....A....ppY.pspLKGsLopApF.. 0 14 14 14 +15303 PF15448 NTS_2 N-terminal segments of P. falciparum erythrocyte membrane protein Rask T, Coggill P pcc Rask T, [1] Domain NTS_2 is a family of the most variable part of the variant surface antigen family of Plasmodium falciparum, the erythrocyte membrane protein-1 (PfEMP1) [1]. However, in this group of proteins conservation is high. PfEMP1 is an important target for protective immunity and is implicated in the pathology of malaria through its ability to adhere to host endothelial receptors. 25.00 25.00 29.60 98.10 22.50 17.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.60 0.72 -3.67 6 14 2012-09-20 16:26:11 2012-09-20 17:26:11 1 6 2 0 0 16 0 50.70 79 1.93 NEW MDSKoTIAsKIEAYLccKSs-ScIDQSLKADPSEV-YYsSGGDG.YL+pNI MDSKoTIA-KIEAYLttKSsDSKIDQSLKADPSEVpYYpSGGDG.YLKNNI 0 0 0 0 +15304 PF15449 Retinal Retinal protein Eberhardt RY re3 Jackhmmer:A6NGG8 Family This family of proteins is found in the photoreceptor cells of the retina [1,2]. Mutations of the gene encoding this protein have been associated with retinal disorders such as retinitis pigmentosa and late-onset progressive retinal atrophy [1-4]. The function of this family of proteins is unknown, but it is likely to be important in the development and function of the retina [2-3]. 27.00 27.00 60.30 40.20 17.70 26.80 hmmbuild -o /dev/null HMM SEED 1287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.27 0.70 -14.14 0.70 -6.91 14 48 2012-09-21 07:43:49 2012-09-21 08:43:49 1 4 32 0 29 36 0 990.40 43 96.82 NEW MGCTPSHSDIVNSVAKSGIQFhKKPKAILPG+QusSE+sSIPLLVpSSTCaDsGtshptG.......pc.tpEpPus+hsQosucuhpQhstsPhsuptKDhEGhhPEscoo.SQLscSQSHhAcDlsh+TQuSHtoQGuuFuG-EucEssoQcoSpht+KP+CHpsucQ.GHs..sQT....hhPAhtscuKVDFPEPLVKAHQ+AYsYLHosLSKYEAILplscQAoQTpELLQPMVoFLLLCF-ElNQLLGEISKDGEsLLQEV+tDLAWP.+KGEPpEQPDLLQQLLQYTVuKLQlLpGTVAoLTuShLEGSuSYLposASHL-sKLSsKRGhDE+LLRALGQLESLASGHuDPGlpshPLCSEDSGIGADNESVpsl...DKLGKQuSWDhssEPuEWKsshsP.psEA+hsGpuWQpuPahhGuDRPQDCPLSRPhtAKlQPAAQGpAsssssSuouPEssoo.RPhthuKSssp....-Shusssss-A+hsKu.StlhsoPSLSEsEDSSsE.E.-ED-huuhs.sshpcpss.sRPpSSPAssESsFQP+s+RLRSPQAQEMILKMKEAISERIKFVPVPSu+QDWAEE.EEt+ThVssRPSTsSGSRRAPtRQRRSQSEuCLKSasEDPTLQELRRVQ+DLSQRLEsFYALGs+pQGQu........pEplL.PRAAs.LhP-sssRVsPSoTISKLKASLTKNFSILPSQDKSILQ+ssPpP.......EuEps.Q...scAEtLPss...hPsuE+......s.EA......PGspDhssRGCPTRTSVKKLIETFSPsEuLRT.GDS+ssGsSPCLRKWGlPIMPPRFPIYRGLAPLYPKPQISPAuGp-sLphG.uWRPhAPhFPPL.sAEAscs-D..h..ssEs-ED.EcLPPPPLEILMDKSFsSLEsPESSpsuGSSsEsTtsPGLupssss.+RTWASPKLRASMSPhDLLPSKsssoPsRspuTGPGsoKsssssRKLsLDLsp...sPAsutsPEsEu...tu...Q..sp..A-+AsSLuKpPpKAlPWHHsSpTSGQs.RTpEPSlARPopGP.....+SPEAs......RpopERSPsllRKASPTRuH.WsPpuD+Rp.S.PSoHRP........AQPSlPsVpuSP....SPPl.........SP....Rs............hSPPss++hsSPPsp+KhPSPP...s.............huSPPsQ+sEAuSPuSuPSsSPPsSPSQGpK-sp...cS.EDupuusu+suuNTpSIFCPATSSLFEA+...sP.Sss+PhoP....PEuGGshtpPsGsWRuSuGPRhRu-SQRpsuLCALNPQPFlRRTASDRpP....Gs..+LpLP.usuhsupsspsthSpSSS.S.EESPKK-sEPWsuPsuPEL+G.uuRtASPPELCVLGHGLQ .......................................MGCoPSHotIlpolA+sGlphh+KPKslhss.ttss-+hslPLLspsSohhs.st................ppt..t.ph..thtps...........ptcthpt..stsps...ph.cuQp+hscch.h+pQuSptoptsshss-pspEpsspt....t+p.+spppupQ.s+h...pT.....hss.tsctKVDFPEsLVKAHQpAYsYLHssLSKYEAILplhcQAoQTp.lLQsMloFLlLpF-ElspLLtEIucDGEhLLpEVttcLAWP.pKt-spEQ..............PDLLQQLLQYTVsKhQhLpuTVAsLTuohLEsSsuYLposAs+LppKLpsKRshDEpLL+sLupLEuhAsupucPthpshPLpSEDSGIGADNESlpth...DKLG+QsSaD.ssc...phc.hhts.phEst...psWppsPhhhu.tpspDs.Lpt..hshhpPtspststsss.S..shsstshss..shthtpStst....sShshshshcsch.cs.st.hsssShs-sEDSos-.E..--p.ushs.p..pcpt...RPpSSPAsh-usapsps+Rlcu.QApEMILKMK-AISE+IKFVPs.sspp-WsEE.E-tpshlssRPSTssG..up+sst+Q+RSpSEtsLpSpsEDPTL.ELpRlQ+DLSp+LEhFYshst.+tpsps........pcp.hpPRsss.lh.ss.spsssSsohS+LKASLoKNFSILPSQDKslhQ+sssps....csc...p.spAEtL.ss...hsssEh......pcsstspchsscuC.ss.RsSVKKLIETFSP..sEplth.tso+s.GsssClR+hGhslhPPRhPhYRGLAPLYPK.pIoPusutp..phs.uW+PhAPhFPPl.tuttscpp-..h..pt.Eh-t.s.EpLPPPPLElLMDpSFsSLEsPEospsstsSscts.t.Pu.spssss..++ThsSsKL+AShsPhDLLPSKssssss.t.pus.usGss+stspsRc.sL-lpp...sPssstssEhps...tt....sps-cAsuL.+pspKslshppsu..s..SGps.ps.Esuh...uRs...............+pupE+SPshsRKsSPoRsp.Wssps-+R..S.PusaR.........uQPS.sslpp..P....SPPh.........SP+s............hSPP.st+c.sSPPhp.KhPoPP....s............pp.sSPPsp+.EsusPsshso..PSPPhSPSps.Kpsp....co.--spss.uKsuuNspSIFCPAoSSLFEA+...ss.s.sp..ss....PEsGs..ttss.shRsS.tsh.hu-pQRphslsAhNP.PFlRRohSD+p.....t.t.t......t.....t....p.t.spssu..t.EEus.p-sts.hssspss-lpt.us+.su.P-hhVlGpGLQ................................................................................................................................. 0 1 6 14 +15305 PF15450 DUF4631 Domain of unknown function (DUF4631) Eberhardt RY re3 Jackhmmer:A6NI56 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 394 and 668 amino acids in length. 28.00 28.00 29.70 28.60 27.90 27.60 hmmbuild -o /dev/null HMM SEED 532 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.85 0.70 -6.22 8 50 2012-09-21 08:26:51 2012-09-21 09:26:51 1 3 32 0 23 51 0 415.00 42 75.89 NEW LsSosssPEQDTsKRWcQLEQWVA-LQAEVssLRGHKpRCE+AoLSLLRELLQVRA+lQlQsSELcpLppEl+.psAhsPEKEA.EhSGsQ..sQNQMQALDKRLVEVREALTQIRR+QALQDoERKGuEQEAsLRLs+LoshL+QEEQuREsACSuLQKsQE-uSQKVDcEVARMQAQlTKLGEEMSLRFLKREAKLCGFLQKSFLALEKRMKASEooRL+sEsuLREELEuRWppLQELsEERLRuLpGQpEVususp.QpEEu+LLEQCRGLDpAVVQLTKFVcQNQsSLNRVLhAEQKAR-AKspLEESpApELAuYlQENLEAsQLA......u-LApQETpssLELLQ.EKSQsLEsSVApLspQLKDLsDHhLALSWRLDLQEQTLuLRLoEs+s.EWEGsER+SLEcLAphpcEspAHL+EVpEKVD.uLPQQIEuVSDKClLHKSDSDlKIsAEG+AREaEVcslRQELAsLLSSVQLL+E-NPGRKIAEIQGKLA..TsQIhKLENSIQsNKTIQNLKFNTETKLRoEEhAoLRESh .......................ppt...pQDs.pph.Q.pp.hspLps-lsplRtpppp..s-pthhtL.p-lhpl+sphphQso-Lhplpp-h...hs.s......-..st.....psthphLDpRLhEl+EtLsplp+p.s.pps-RctsppphshRLs+Lssh...LpQE-psREsACusLpKsQE-sup+ls.E...sA+hp.AplscLuEEhSL+FL+REAKLCuaLQKsFh.ALEpthKspEssR..hEtsLttELEs+WptlpthhE-+ltuLpuQp-..........tEcu+LlEQCpuLDtAVstLT+FVppNQsSLs+lLhAEtKAh-u+splEcopst-LsshlppslEAhphu......uc.ApQEhpspLplLp.EKspsLEsSl.spLsppl.+-Lss...+h.ALS.+...............lDLQEQhLsh+Ls-s............ps.Eapusc+csLcclsphpcEspscLctlpEKV-....ulPppIcslSsKCllhKsDsDh+IssEu+sRch-ltslRQELAslLpulQLL+EcsPu.....RK........IAEhQGcLs..ppQIhKLEsslQssKTlQNL+FNsEs+hRhp-hAsL+Eph........... 0 6 7 11 +15306 PF15451 DUF4632 Domain of unknown function (DUF4632) Eberhardt RY re3 Jackhmmer:A6NIN4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 59 and 190 amino acids in length. 27.00 27.00 126.50 126.40 24.00 18.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.45 0.72 -4.05 5 14 2012-09-21 10:53:32 2012-09-21 11:53:32 1 2 12 0 8 17 0 69.60 77 70.53 NEW AGuPAKESGDsDGEAD.EEGESEKGAGPRSAGWRALRRLWDRVLuPARRWRRPLPSNVLYCPEIKDIAHMTR hGsPAKESG-.DGEsD.-EtESEKGAGPRSAGWRALRRLWDRVLAPARRWRRPLPSNVLYCPEIKDIAHMTR 0 1 1 1 +15307 PF15452 NYAP_C Neuronal tyrosine-phosphorylated phosphoinositide-3-kinase adapter Coggill P pcc Jackhmmer:Q6ZVC0 Family NYAP_C is a C-terminal family of eukaryotic proteins that are substrates of tyrosine kinase in the brain. When first identified, the family members were referred to as unconventional myosin XVI, or Myr 8 [1]. However, proteins have now been identified as being integrally involved in neuronal function and morphogenesis. The family is involved in both the activation of phosphoinositide 3-kinase (PI3K) and the recruitment of the downstream effector WAVE complex to the close vicinity of PI3K; it also appears to regulate the brain size and neurite outgrowth in mice [2]. 25.00 25.00 33.40 33.20 23.70 18.80 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.17 0.70 -5.42 3 80 2012-09-21 12:13:01 2012-09-21 13:13:01 1 6 35 0 40 66 0 245.10 47 38.50 NEW LASPHSLPDPTusPL.sPLWTYPoT.AGLKRPPAYESLKAGGlLsKGCGVGAPuPMVKIQLQDQGTsGGAFASISCAHVIAS..SGTPEEE...EEEVGsuTFGAGWALQRKVLY.GRKsKDp.-TEssEGuRAWNGSuEGPSKsEREEK...GsLoSGIPVRSQGAEGLLARhHHuu+....GGSRTGLPlPCQTFPACHRNG.DFTGGYRLGRSASTSGVR..psslHTPRPCSQPR-ALSQsHPAL..sLPLPPQPu+ERDGKLLEVIERKRCVCKEIKARHRPDRGLCKQESMPILPSWRRGPEPRKSGTPPCRRQQTVLWDTAI ...............................tSPHuhP..cspuus..oPl.h.h.u.s.shuLKRPPsY-Sl+uGul.ppu...usPpshs+.plQ-tu.p...suAh.so.h..usupshup...utTPpp.....EE.hsuhFsuGhuL.RKs....GtRptc...h.-..cspDth+shstSs.Es.sKlEpc-R....Gs..su....St.P....V+....uQth-Gh.....tss............suSRhuhs.ss.Th.Asp+su.-.psuh.RLGRSASTSGV....su.l.p.s.R.sSps.p.s................................................................................................................................................ 0 1 3 13 +15308 PF15453 Pilt Protein incorporated later into Tight Junctions Coggill P pcc Jackhmmer:Q5JTD0 Family Pilt is a family of eukaryotic tight junction-proteins that binds to guanylate-kinase. Pilt is a component of TJs (Tight junctions) rather than AJs (Adhesin junctions). The protein is incorporated into TJs after TJ strands are formed, thereby suggesting the name Pilt for 'protein incorporated later into TJs'. Pilt binds to the guanylate-kinase region of hDlg otherwise known as Disk large homologue [1]. 27.00 27.00 27.00 27.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.36 0.70 -4.74 15 99 2012-09-21 13:25:27 2012-09-21 14:25:27 1 3 38 0 48 81 0 224.40 47 54.09 NEW Moss..ss..........sus..tstpsspQ.pNGpCpSpuoh-uss..E-.hslPuFEKLNPYPTPsPPHPLYPGRKVIEFS.-DKV+IPKNSPLPNCTYATRQAISLSLVQs--..c+.s.........+s.ussPs.......o..P..uSs......s..........uSsQssP.S...............................s.PQ.sPSshASSuSSEEDL.LAsWQRMFVEKssPou-tull.pRTuFS+-TAtELQ++h.sh...us..ushspsssAa...........................................................us..u.-Eus.pll.........sttssstps+-c...p-lslPsSspEE..+..p.LLptpcttppsss.stp..scs....................stssSuRPQRSPKRMGVHHLHRKDSLTpAQEQGsLL ......................................................................................................................................st...........us..hstpst.Q..NGpCps.uostuss..E-..shPuF..EKLsPYPTPsPPHPLYPGR+VIEFS.-DKV+IP+NSPLPNCTYATRQAISLSLVppss..cp.t.........p..sssss.............s..s..tus..................tuspppP.s......................................s.s..h.Ss.ASS.uSSEEDL.LusWQRhFV-+hsPsu....s..h..spRTuFuccshs-L..Q++h...sh....us....sshst.h.s........................................................................h..............s......ptt.............th.ls.Sst-p..pp.l...t...t.h...t.....t..............shss.pRPp+SPKRMGVHHLHRKDSLTpAQ..GsLL................................. 0 1 7 18 +15309 PF15454 LAMTOR Late endosomal/lysosomal adaptor and MAPK and MTOR activator Coggill P pcc Jackhmmer:Q02205 Family LAMTOR is a family of eukaryotic proteins that have otherwise been referred to as Lipid raft adaptor protein p18, Late endosomal/lysosomal adaptor and MAPK and MTOR activator 1, and Protein associated with DRMs and endosomes. It is found to be one of three small proteins constituting the Rag complex or Ragulator that interact with each other, localise to endosomes and lysosomes, and play positive roles in the MAPK pathway. The complex does this by interacting with the Rag GTPases, recruiting them to lysosomes, and bringing about mTORC1 activation. 25.00 25.00 25.20 25.50 24.20 24.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.53 0.72 -3.69 45 181 2012-09-21 14:43:52 2012-09-21 15:43:52 1 2 170 0 125 166 0 75.80 28 44.32 NEW psEpo+LLscs...ps...shstht....st.s........pssspphp.Rc.pAL.ssIlpcTu-slIDlsuhpsps...ht..ppp.hs.s.phhst ...............sEpp+LLt-s.....ps..s.stht..pssp.s...........pppspphp..cE.pAL.psIltcTusslIDlsuhsspsh...tpp.h...p....s........... 1 25 52 91 +15310 PF15455 Pro-rich_19 Proline-rich 19 Eberhardt RY re3 Jackhmmer:A6NJB7 Family This family includes proline-rich protein 19. 27.00 27.00 40.10 40.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.14 0.70 -5.42 8 30 2012-09-21 15:10:35 2012-09-21 16:10:35 1 2 24 \N 15 29 0 307.60 61 73.26 NEW MDPRGPAPQPFQQPEKPGRVRRRKTRRERNcALV.......GSRRPLA+Q-PPVASRDP......P......VuPsAPKLVVITQGRLSREHRGLFNHEVKSLDVARLLSSGoLEPsTPsLsTKPSPSPGRuQEPu...QSRGKENQVPGGSGPGPPSsPELPGlGQLLpELQCQLlLPQAFPRRNLVQEARDAIVGTLQACHGCVPDLuLVLRGCQPPLP..GsKPpssERpRMTPSWINSPEQ..APtEGRQRRp..QGTKElTFsMPHT.SSTPTsHRsSLsPP+GPW.....PPsLPSLsSPSGsAWGPPTAFDLLKSIWLVATPP...PP+PWGVGPPQPLPQPPSPLLPRTSALDWSPSPPAPLPSLSWVVAQSSPEAWSFPPMRLY .........MDspGPssQPFQpPEKPGRVRRRKTRRERNcALs.................GSRRPLs+pDPslupRDP.............P......VsP..sAsKLVVITQGRLSREHRGLFNHEVKSLDVARLLSutsLpPsoPsLPsKPSPS.......PuRuQEPu...QSRuKE....NQVPGGSGPGPPSs..P-LPulGQLLtELQCQL.LPQAFPRRNLVQEARDAIVtTLQACHGCVPDLuLVLRGCQPPLP..GsKPtssER.+MTP.WINsP-Q..sPtptRQR+p..pGTKEhsFshPaT.SShPssHRsSlsPP+uPW.....PP.hs.hsSPSGsAWGPPTAFDLLKSIWLVATPP...PPpPWslG.sQPLPpPsSPLLPRTSsLDWSPsPPAPLPSLSWVVAQSSPEAWSFPPMRLY.................. 0 1 2 2 +15311 PF15456 Uds1 Up-regulated During Septation Coggill P pcc Jackhmmer:Q9P6S3 Family Uds1 is a domain family is found mostly in fungi, and is typically between 120 and 138 amino acids in length. The GO annotation for the S.pombe protein describes the protein as barrier septum assembly involved in cell cycle cytokinesis, GO:0071937. Many of the uncharacterised members are listed as being involucrin repeat proteins, but this can not be substantiated. 27.00 27.00 27.00 33.30 26.90 24.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.42 0.71 -4.13 55 148 2012-09-21 15:33:05 2012-09-21 16:33:05 1 4 90 0 122 149 0 126.70 34 15.67 NEW tlcLLscsAlsDSppaElLohEEV-sLKKEhphLspRlcss+cpLs.lcsKhR-.AAtS..ls+Lhsstptt.......................t.sppshpcs-cpLstss++s-Ehsp-LhplEpRhtclcp+LLEHsAulLphop+ ............hcLLpcpAhs-upcaElLshEEV-sL+......+.....Ehph..............LspRl-hh+cphs.lctchRc.usts.lsphhpssptt.............................................sphspcshhcpEEtLup.-pph-EhspcL.phEpRttcl+p+LLEHsAulLphs........ 0 32 66 100 +15312 PF15457 HopW1-1 Type III T3SS secreted effector HopW1-1/HopPmaA Coggill P pcc Jackhmmer:Q8RP17 Family HopW1-1 is a family of bacterial modular P. syringae Avr effectors that induce accumulation of the signal molecule salicylic acid (SA) and the transcripts of HWI1 (HOPW1-1-INDUCED GENE1) in Arabidopsis. Thus HopW1-1 elicits a resistance response in Arabidopsis [1]. 27.00 27.00 64.80 64.70 22.20 21.30 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.11 0.70 -5.39 5 28 2012-09-21 15:54:11 2012-09-21 16:54:11 1 1 20 0 1 31 0 288.10 65 38.93 NEW MMPSQITRSSHSSLP-sSPASuDAsSSpEQSPQQsRT+AFVASGELusAFGRTSTAPAQDsVRLLusLQREL-KcsPSaP-VApLuppLAEAAMTEQGhHhLAoEE.Q.psLKslLDRCT+QLADTPAScASHDuLSQACEGLKTARLHQSlApLTGcoHApsRGVPDLLALsHLDP-VLA-KPsuhsSYspFGSFIpTAKsRTA-Ls-SLpcsuuEVsuLLRuHADTLpuLE+LPuALAALTENCPDsPTpcDLRuLAEsAGELLQQLRssDLLPRSEEISSEsGEosV+u+EsVE...P+LTpuQALLhAGGNLVRKFDAYGAl .MMPuQITRSSHSSlP-susASuDAsuspEQoPQQsRThAFhASGELusAFGRTSsAPtQDsV+LLusLQREL-KppPSa.sVApLsppLAEsupTEQGhH.LAoE-.Q....sL+-lLDRCpppLADhPAucASHDsLSQACEGL+TARLHQSlApLTucspuhsRulsDLLsLsHLDP-sLutcPsshoSYshFupFlpTAKpRTA-Ls-sLpcpPstVsuLLRuHADTLp-LEhLPuALtALTENC.DsPsps-LRpLAEsAGuLLQhLREpDLLPR.EEIS.EsGEuPs.u+EssE...P+LTppQALLKsGGNLVRKFDAYGAl...... 0 0 0 0 +15313 PF15458 NTR2 Nineteen complex-related protein 2 Coggill P pcc Jackhmmer:P36118 Family NTR2 or Nineteen complex-related protein 2 is a family of largely fungal and plant proteins that form a complex with the DExD/H-box RNA helicase Prp43. Along with NTR1 it is an accessory factor of Prp43 in catalysing spliceosome disassembly. Disassembly of the spliceosome after completion of the splicing reaction is necessary for recycling of splicing factors to promote efficient splicing [1]. NTR2 and NTR1 associate with a post-splicing complex containing the excised intron and the spliceosomal U2, U5, and U6 snRNAs, that supports a link with a late stage in the pre-mRNA splicing process [2]. 27.00 27.00 27.80 28.00 26.30 26.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.71 0.70 -5.14 41 137 2012-09-24 10:30:19 2012-09-24 11:30:19 1 6 131 0 107 140 0 244.70 26 44.95 NEW spYsp-YlsELpsuTsss.P..............tp-tp...........hsl-shph.st.h.................spsstIPocuEIcEtKpRRsRL...spcp...............pc.alSLpD....p-ctth..............cpppt-oRLsc-D--h..sEGh--asp....Ds+luL.G++sc+pppp++RcphpEhIps...........p-s-sEcpts..aEssQh+tGhsshp.................th.p.s..................PphssLP...pLsssl........p+Lppsl...sshptpppphppplppLpcE+tcIspRcp-lpphlpc ...................................................YspcYlpELpsso.ss..Ppt......................ts.p......................hpl-..t....th.sthh..................spsssIPocucIcctKpRRs+h.........tpcp..................tc.aIuLp-......tsp.pph............t.cppptcoRLlp-D--h..sEG.h--asp....DspluL.Gc+sc+cpcc+...+Rppht-hIsc..................ttts-s-sEcptt....aEssQhRtGhsshp.................................t...t..p.s...................................sphssLP...pLsssl.............t+Lpptl...sthctppsphttplpp.LccE+t-lttcct-lpthlp.......................................................................................................................... 0 34 59 89 +15314 PF15459 RRP14 60S ribosome biogenesis protein Rrp14 Wood V, Coggill P pcc Pfam-B_10508 (release 26.0) Family RRP14 is a family of nucleolar 60S ribosomal biogenesis proteins from eukaryotes. RRP14 functions in ribosome synthesis as it is required for the maturation of both small and large subunit rRNAs and it helps to prevent premature cleavage of the pre-rRNA at site C2 [1]. It also plays a role in cell polarity and/or spindle positioning 2], 22.40 22.40 23.20 23.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.32 0.72 -3.90 54 167 2012-09-24 12:25:37 2012-09-24 13:25:37 1 6 153 0 128 164 0 56.80 45 11.77 NEW c-cL+pHuptFDsLlsLIPAKaYhs.-csp.............cphKp..KKpo...KcptKpA...K+sKLDP- ...p-RL+sHupuFDuLLuLIPAKaYas.--ss................cQh+p......KKpT...K-ps+pA...KRuKLDP-.... 0 43 79 112 +15315 PF15460 SAS4 Something about silencing, SAS, complex subunit 4 Coggill P pcc Jackhmmer:Q04003 Family SAS4 is a family of largely fungal silencing regulators. This silencing is mediated by chromatin. SAS4 specifically silences the yeast mating-type genes HML and HMR [1]. SAS4 is found to be one subunit of a complex, the SAS complex, that interacts with chromatin assembly factor Asf1p, and asf1 mutants show silencing defects similar to mutants in the SAS complex. Thus, ASF1-dependent chromatin-assembly may mediate the role of the SAS complex in silencing [2]. Co-expression of Sas2, SAS4, and Sas5 in Escherichia coli leads to formation of a stable SAS complex that acetylates histones. SAS4 is essential for the acetyltransferase activity of Sas2, and Sas5 is also important [3]. 22.50 22.50 22.90 23.20 20.90 22.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.33 0.72 -4.14 48 116 2012-09-24 12:56:10 2012-09-24 13:56:10 1 1 114 0 88 119 0 97.90 40 18.86 NEW DPLsDshYpshH++hcRpE+phpNt-+tRuppEt-pLpclL-tLpsh.............DWlRsh.slTs.Is-.s-cc-hEsKRphhlcplpslLcKFcpW+ccccchp.tcpc ........DPLsDshY.hhH++hERpE+plRNtE+tRupaEp-pLp+LL-pLpuh.............DWLRlh.Glou..ls-..s-++phEsKRphh.......lctlpulLcKFctW+ccE++p+hc..c.............. 0 15 43 73 +15316 PF15461 BCD Beta-carotene 15,15'-dioxygenase Coggill P pcc Jackhmmer:Q4PNI0 Family BCD is a family of bacterial and archaeal proteins is found in bacteria and archaea that catalyse or regulate the conversion of beta-carotene to retinal [1]. Characterisation of BCD proteins shows them to cleave beta-carotene at its central double bond (15,15′) to yield two molecules of all-trans-retinal. However, the oxygen atom of retinal originated not from water but from molecular oxygen, suggesting that the enzyme was a beta-carotene 15,15′-dioxygenase, rather than a mono-oxygenase that catalyzes the same biochemical reaction [2,3]. 27.70 27.70 54.90 54.90 27.60 27.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.98 0.70 -4.97 56 106 2012-09-24 13:25:51 2012-09-24 14:25:51 1 1 93 0 33 115 1588 261.10 23 85.69 NEW llullhlGlPHGAhDthlsh+h.h.........phpthhtahshYlslushhlhlWhhhPshuLhlFlhlohhHFGpuDhtth...........ssh...chh..th..........ls+GGhlhhhls.hh.ph.....sc........shtlh.shl.ss......ss.tsh..............shp.......s.....lhh.hhhh....hh.luh.h...h......hh...htthp........tc.tht.h.....hsEhs...hLhhh.FhhlPPlluFulYFslhHShRHltchh................ppl....st.ts...st.t...........h.............hhp.puhsh...olsuhlh..hs...slhh..hhss...shsh......tss...h....lthhhlhlAuLTlPHhlllshhc+c .................................lullhlGlPHGAhDhhlsh+h.h..........php.hhhahshYlslsshhlhhWhhtPshuLhlFlhlohhHFGpsDhthh...........sph..phh.h...ls+Guhhlhhss.hh.phsp........shtlh.shlss.......ss..sh..............hht..h....hhhhhhh....shlsh.h...h..hh...htt.ht........tptht.h....hs-hhlLhhhhhhlPPlluFulYFslhHSh+phhphh................ttl.....pt.tt....ht.............h...hhtpshsh...shhuhhh..hs...shhh..hhss...s.........tst...h....lthhhlhlAuLTlPHhllsshh................... 0 12 25 29 +15317 PF15462 Barttin Bartter syndrome, infantile, with sensorineural deafness (Barttin) Coggill P pcc Jackhmmer:Q8WZ55 Family Barttin is a family of mammalian proteins that are chloride ion channel beta-subunits crucial for renal Cl-re-absorption and inner ear K+ secretion. Bartter syndrome is a term covering a heterogeneous group of autosomal recessive salt-losing nephropathies that are caused by disturbed transepithelial sodium chloride re-absorption in the distal nephron. Mutations in the BCD proteins lead to sensorial deafness. 27.00 27.00 47.90 46.70 18.70 17.30 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.71 0.70 -4.69 11 35 2012-09-24 13:54:09 2012-09-24 14:54:09 1 1 25 0 16 28 0 204.70 63 69.83 NEW SHDRPQVYGTFYAMGuVMVIGGVIWSMCQCYPKITFVPADSDFQGlLSP..KALGLLENGLusEhK...u..PQPPYVRLWEEAAYDQSLPDFSHIQMKVhGYSEDPRPLLAPc.up...p.tuuDGGcG.uPp-uQAWlEAAVVV......HRGSDEcEGcRss.oQSpsuPPssPQGP...APLASFQDDLDhGSSEGSSPsPSPPstEEPpsPs.tEP..hAsRs.LDRFcDFALID.uP.TsED ...SHDRPQVYGTFYAMGulMVIGGlIWSMCQCYPKITFVPADSDFQGILSP..KAhGLLENGLusEhK..S..uPQPPYVRLWEEAAYDQSLPDFSHIQMKVMuYSEDPRsLLAP-huQ...chGsSDGGEG.GPtDsQAWhEAAVVl......HRGSDEsEGERph.oQShPuP.u..CPQGP...APLASFQDDLDhuSSEGSSPssSP.-tEEspsPp.pEP...uCRs.LDRFpDFALID.APThED................................... 0 1 2 4 +15318 PF15463 ECM11 Extracellular mutant protein 11 Coggill P pcc Jackhmmer:Q04110 Family ECM11 is a family of largely fungal proteins. ECM11 interacts with Cdc6, an essential protein involved in the initiation of DNA replication, and is a nuclear protein involved in maintaining chromatin structure [1]. It was previously identified as a protein involved in yeast cell wall biogenesis and organisation, but is also found to be required in meiosis where its function is related to DNA replication and crossing-over [2]. 25.00 25.00 26.20 25.50 22.80 22.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.47 0.71 -3.89 53 98 2012-09-24 14:36:33 2012-09-24 15:36:33 1 3 98 0 76 99 0 135.80 26 27.89 NEW DYssptLpphsas-LpspsFD..hsPssht...............................ttsssss...shpp.+....lpphhpt..sppp...pc....phFushohc-W--sGDhhl-pFspllp+lpcsRpp+RchhphFEsEIscRpcsVptcsptlscKLpch+ptGpcll ......................................................................................................DYssthL.phsas-LpspsFD..hsPssst..........................t..ss......slp-.+....lpphhsh....scpp...pc..........paFusholsEW--sGDhhl-pFsplhp+h+csRpp+RphsthFEsEIpcRt-sVctcspt...lsc+Lcch+ptGtcll... 0 12 36 63 +15319 PF15464 DUF4633 Domain of unknown function (DUF4633) Eberhardt RY re3 Jackhmmer:A6NJI1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 94 and 123 amino acids in length. 27.00 27.00 69.70 69.50 22.20 21.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.52 0.71 -4.24 6 20 2012-09-24 14:54:23 2012-09-24 15:54:23 1 1 18 0 12 20 0 109.70 68 95.26 NEW MGTuLRSQSLRGPpPSYGKLQEPWGRPh.....EGRLRRALSLRpGREKSRSp..-.tGPEtLDssGQEhLPGoLGDTEQLIQuQp.usSRRWLRQYQQpVRRRWcS.FVuSFPsVTLSpPAS MGTGLRSQSLRGPRPSYGKLQEPWGRP......EGpLRRALSLRQGREKSRSQsL-...tGsEGLDssuQEtLPGoLGDTEQLIQAQR.uGSRRWLRQYQQpVRRRWcS.FVu.FPSVTLSQsAS 0 1 1 2 +15320 PF15465 DUF4634 Domain of unknown function (DUF4634) Coggill P pcc Jackhmmer:Q8WWF1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 98 and 133 amino acids in length. 27.00 27.00 33.40 48.00 18.80 17.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.76 0.71 -3.94 9 32 2012-09-24 15:04:50 2012-09-24 16:04:50 1 2 22 0 14 26 0 117.90 64 98.13 NEW MDVLFlAILAVPLILGQEYEDEEtLEED-YYQVsY.YYTVTPoYDD..FusNFTlDYShFESEDRL.N+.LDK-V..TEAVETTI...SLpTcttD+p+PVTlKPVThE.P...........SPDLNDA.VSoLpSPlPLLLShsLVQuGMY .MDVLFVAILAVPLILGQEYEDEEtLtE.D-YYQVlY.YYTVTPsY.DD..FuusFTlDYShFESEDRL.N+.LDK-l..TEAsETTI...SLcTttsD+.KPVTlKPsThE.P.............SPDhNcA.VSuLpSPlsLLL.hshlQsGha................................................................. 0 1 1 1 +15322 PF15466 DUF4635 Domain of unknown function (DUF4635) Eberhardt RY re3 Jackhmmer:A6NLE4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 120 and 154 amino acids in length. There are two conserved sequence motifs: LEQ and DLE. 27.00 27.00 110.20 110.00 22.10 21.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.78 0.71 -4.88 6 26 2012-09-24 15:10:22 2012-09-24 16:10:22 1 1 19 0 12 15 0 130.80 70 94.84 NEW MslQ......cVGsRtRsuA..........E.....lhERRccS.+C-.....DKKQoLLuLLILVLYLGTGI..SG+SWEVSERIRECNYaQNsVsSQGhEYQTsEPuEEPlKslRpWLKENLHVFLEKLEcEVRELEQLV+DLE.WLDALLGDsahEEPCS .........MshQQVuuRtpVAA...............E.....LlEpRRsS.HC-.....D+KQTLLALLILVLYLuTGI..SGSSWEVSERIRECNYaQN.VsSQGhEYQTsEPSEEPIKslRsWLKENLHVFLEKLEcEVpELEQLVpDLEhWLDALLGE..s+.EE.Cu.. 0 1 1 1 +15323 PF15467 SGIII Secretogranin-3 Coggill P pcc Jackhmmer:Q8WXD2 Family Secretogranin_3 is a family of vertebrate proteins that is one of the granin family. Granins are rich in acidic amino acids, exhibit aggregation at low pH, and possess a high capacity for calcium binding. Because granins are restricted in their localisation to secretory granules of neuroendocrine cells, two interesting characteristics of their sorting mechanisms have been observed. These are, first, that they aggregate on low pH/high calcium concentrations and second that two of them carry an N-terminal disulfide loop, mutations in which lead to mis-sorting. Thus, granins are thought to be essential for the sorting of secretory proteins at the trans-Golgi network. Chromogranin A (CgA) binds to SGIII in secretory granules of endocrine cells [1]. SGIII directly binds to cholesterol components of the secretory granule membrane and targets CgA to secretory granules in pituitary and pancreatic endocrine cells [2]. Mutations in the SGIII gene may influence the risk of obesity through possible regulation of hypothalamic neuropeptide secretion [3]. 27.00 27.00 60.80 60.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.79 0.70 -5.40 8 53 2012-09-24 15:34:09 2012-09-24 16:34:09 1 2 36 0 27 59 0 404.80 72 96.68 NEW plpAFPsPsuu.cDKulYNRELoEERPLpEQIAEADolKt....ucopsttspp-pcN.pDsDDLslLKSLAEppK.sKcsuslpsoh.-cphssDDoDSTKsRRLsDDYDSTKsGhDY.KYQDDP-uhRQLDGTPLTAEDIVQKIAs+IYEEsDRGVFD+IVSKLLpLGLITESQAcTLE.EVAEALQcLITKpAKNNEh-stshsttss+u-p......................ssscpscp+h..........h..ts.p........--o......susoWs..pshEcRNElsPEDshpDLQYFPNFY+LLKSLsS.EpDscE+ETLITIMKTLIDFVKMMVKYGTITPEEGVSYLENLDAMIAlQTKNKLGKuLu...ssshssPs-..Ksh-EsDsTKsEAAKMpKEYEsLKDSTK-E.Qsso-.s-cP.GKSEoYLEAIRKNIEWLKKHNK-GNKE..DYDLSKL+DFhDQQsDuYl-KGIL-KEEuDsIKRIYSSL ..........lpAFPpPsuu.pDKslHNRELSsERPLpEQIAEAEuDKh....+cshssENKsuppNYSFlDsLNLLKAlsEKEK.EKE+pSlRSSsh-s+Lsl-DsDSTKNR+LlDDYDSTKSGLDa.KaQDDPDGLHQLDGTPLTAEDIVpKIAsRIYEENDRGVFDKIVSKLLNLGLITESQAaTLEDEVAEsLQcLIoKEApNhEc-....scPsS+oEs........................psschsEc.h.....TshAuhQ.DuhspsEND-T..lSNThTLoNsLERRscsasEDsFE-LQYFPNFYALLKSIDS.EKEAKEKETLITIMKTLIDFVKMMVKYGTISPEEGVSYLENLD-MIALQTKNKLEKNsTp..hS+LFsuPoE..KSpEEoDSTKEEAAKMEKEYsoLKDSTK--psssus.T-EPKGKTEAYLEAIRKNIEWLKKHsKKGNKE..DYDLSKMRDFINQQADAYVEKGILDKEEA-sIKRIYSSL...... 0 1 3 11 +15324 PF15468 DUF4636 Domain of unknown function (DUF4636) Coggill P pcc Jackhmmer:Q8WWF3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 196 and 244 amino acids in length. 27.00 27.00 59.00 58.60 21.50 21.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.47 0.70 -4.91 8 36 2012-09-24 15:38:09 2012-09-24 16:38:09 1 1 27 0 19 25 0 224.10 63 98.88 NEW MG-LhSLFWEVDPPPlPLoaoIPsQDaECpKDDSCGAIGSFLLWYFlIILVLMFhSRASVWMS.EpKcDEDSGTSASVSKASKDsSYKRQsK-GsWDSsQhM..KKPKQsQLoPVTDSEVALVNAYLEQRRAp+HSQhsQVNQlppDSDTTEsDSEESNSGASSWKESESEHHPSPAuI+RRKlAQRQ+slGSYQIRERPCLHCKAMRTNEWLTRHFLQssSsssPhKuDhQEENSlPEINTKFSK ....................MGcLaSLFWEhDPPshPLshsIPsQ-aECt..KDD..SCGslGsFLLWYFlIlhV...LMahsRASlWMS.EsK+...DEc..SGT.SsSluKASK-sShKcQSK-u.sWDs.QhM...K.KPKQsQLoPV..TDSEVALVNAYLEQRRAR+pSQFspVNQsQ+DSDTTECDSEESNStASSWKESESEHHPSPsSIKRRKhAQRQRNl.GSYQlRERPCLHCKAMRTNEWLsRHFhQpsSlssPMKG.D.QEEsShs-.INTKFSK..... 0 1 1 4 +15325 PF15469 Sec5 Exocyst complex component Sec5 Wood V, Coggill P pcc Pfam-B_353125 (release 26.0) Family This Sec5 family of eukaryotic proteins conserved is not representing the Sec5-Ral binding site. 30.00 30.00 30.60 30.70 29.40 29.80 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.89 0.71 -4.55 80 361 2012-10-03 17:31:52 2012-09-24 17:03:10 1 14 260 0 261 467 2 197.60 24 22.04 NEW scplcpLlcsNFspalpsKsslDslapph..............pp...pptp.ttpt..........................................shppLpppl.pp...stppusphhpslLcppp+scphRsslshlp+h+hlFsLP...pplccs..lpc.s-Ycthlp-Y.......p+u+slhppp.................................psplhc+.....las-VEphh..pph+pplhppLhsss....................pshcp............hpphI.chLlcL........ps..........pt............cPhhhhlssp ...................................p.tthphlVcsNaspFlpspsolcslhpch............................................ct......ptpp....tt........................................................................hsppLpsslpp...ssspupplapslLsp+p+t-pl+sslshlp+h+hl...FsLP...sslpcs..Ipp.t-Y-tslp-Y...............p+A+slhtps..................................pl.lhp+..............................hhp-VEphl.....pph+c..pLhc+Lhpss.................................pshcp......................ttchl..phLhcL....................ps......ps................sPhW.hl.................................................................................. 0 99 153 221 +15326 PF15470 DUF4637 Domain of unknown function (DUF4637) Coggill P pcc Jackhmmer:Q8WW18 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 142 and 178 amino acids in length. 27.00 27.00 43.00 42.30 21.70 21.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.32 0.71 -4.64 5 25 2012-09-24 16:10:32 2012-09-24 17:10:32 1 1 22 0 13 24 0 151.70 67 90.85 NEW MDKHGVKTPLWKKElEEPRAcEsEtEEAcEGSE-EDEsppRPPEESAAEGEt-uR.AEtsEGRERRSVSYSPLRQESSTQQVALLRRADSGFWGWLSPFALLGGLAAPADRKRSLPEEPCVLETRRRPPRRGGCARCEILFCKKC+oLHSHPAYVAHCILEHPDLGKAsAuGu ................MDK+..GVKTPLWKKEsEEscAcEsEtE..c..tcEGS....E.-c-p..pRsstESAsEGE..E.s.R.A-EsEGRERtSVSYsPLRQESSTQQVALLRRADSGFWGWLuPhALLGGLsAPsDRKRShPEEPCVLEhRRcPPRcGGCA+CEILFCKKCRoLHSHPAYVAHClL-HPDLG+AtAuGu..... 0 1 1 2 +15327 PF15471 TMEM171 Transmembrane protein family 171 Coggill P pcc Jackhmmer:Q8WVE6 Family This family of proteins is found in eukaryotes. TMEM171 is also known as parturition-related protein 2. Proteins in this family are typically between 242 and 326 amino acids in length. 25.00 25.00 25.20 172.40 24.50 24.40 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.04 0.70 -5.64 10 40 2012-09-24 16:12:44 2012-09-24 17:12:44 1 1 32 0 22 37 0 297.50 63 97.65 NEW SPsAAAEPDG-ppDRpVSKLIFFLFVFGAVLLCVGVLLSIFGFQACQYEsLsDCShVLKlAGPuCAVlGLGsVILARSRARLQl+ptphQGpQsDPDpAFlCGESRQFAQhLIFGFLFLTSGMLISILGIWVPGCuSsWsQEPLN-TDouDuEPQICGFLSLQIMGPLIVLVGLCFFVVAHVKKRNNLNsuQDASEsEEtpoQSsEPVQVTVGDAVIIFPPPPPPYFsEoSASAso.pSPGANuLLPsENPPSYYSIFsaG.RT..P-..sQGssSERDpESIYTISGTssSSEISHsPHLsSE.PPRYEEKETssuTsLSsSS ..........SssusAEPDG-ppDR+lSKLIFFLFVFGAlLLCVGlLLSIFGaQACQYcslscCuhVLKlAGPuCAVlGLGAVILARSRA+LQLRptphp.GpQhDsDpuFlCGESRQFAQCLIFGFLFLTSGMLISlLGIWVPGCuSsWs.Q-sLNETDou-uEPphCGFLSLQIMGPLIVLlGLCFFVVAHVKKRsNLNsuQDu.SEpEEtpsQosEPVQVTVGDuVIIFPPPPPPYFsESSuSA.ls.pSPGuNuLhPsENPPSYaSIFNaGpT.hP-sQGsAoERDpEoIYTISGssSSSEhStss+LsSELPPRYEEKEsssss.Ls.SS..... 0 1 2 7 +15328 PF15472 DUF4638 Domain of unknown function (DUF4638) Coggill P pcc Jackhmmer:Q8WTQ4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 240 and 272 amino acids in length. 27.00 27.00 41.20 41.20 22.40 21.80 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.02 0.70 -4.80 7 31 2012-09-25 09:50:52 2012-09-25 10:50:52 1 1 25 0 16 34 0 224.40 51 86.44 NEW MoEps.DhcsLMPTERKShWRTAEERRMSDLTRVLEWLERRQGKK+Qs.p+pKpcsps...ss.tKtpKKspGhpc.tpttsp+ss.hspphlp.ht...+cs..ssth++h.ts-.KG+RLShlsusYs+Dus+K....S............DlDIKDsIALES.sQRsssaRRQSo.lDPhlQEs.hFGsRRuolLR-W.osKsPDssYERKLKSLMEKGhEPKhEhsKMLKPEEVLSCRYLRLSKNNIRTLLKLCKDAGMsVDIHPHMVEuEIDAKKVFupp.SVA ..............................MoEps.sh+slMPTcRK.hW+TsE-RRMSDLTRVLEWLERRQGKKKQs.pK.pKscshs......ctp.tKctKKspGhhptp......ttsp+.........ts.hs...p....psht.......++s...ssta++h.sl-.KG+RL..Shl.s.usY.h+Dus+K....S............-hDIKDsls.ES.TQRsssaRRQSh.lDPhLQ-...s.hFuuRRsohhRDW.ssKhPD.sYERKLKSLMEKusEPKhEsh+MLKPEEVLSCRYLRLSKpNI+TLlKLC+DAGhsVDIHPHM.lEt-IDs+KlFst..shA........................ 0 3 3 4 +15329 PF15473 PCNP PEST, proteolytic signal-containing nuclear protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8WW12 Family PCNP is a PEST-containing nuclear protein that is ubiquitinated by NIRF, a Np95/ICBP90-like RING finger protein. PEST sequences, which are rich in proline (P), glutamic acid (E), serine (S) and threonine (T), are found in a number of short-lived proteins, such as transcription factors and cell cycle-associated proteins. Their function is generally controlled by proteolysis, mostly via ubiquitin-mediated degradation. Thus, NIRF and PCNP are a ubiquitin ligase and its substrate, respectively, that may constitute a novel signalling pathway with some relation to cell proliferation [1]. 27.00 27.00 28.30 31.90 26.70 26.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.99 0.71 -3.90 11 113 2012-09-25 12:41:17 2012-09-25 13:41:17 1 5 66 0 63 106 0 125.80 61 77.26 NEW GPEEcuEcsh+p+slSS.........osGGcuSS..RSuE+tust..............-tspsssssPsPsKlSKhGFuh..............sothh+Ks..ssISIKLGAo.....KEss..lss.p.slAuVFNtD.DDSEPEEMPPEAKMRMKNIGR-TPTSAGPNSFsKsKaGFsDspKlaERclK .....................................................................pEtttp.hppphssS............ssuGEuoS..+SuE+pusp...........--stshssc..PsKhSKhGFu...l................uoQ...ssKKu....suISIKLGus...KPKEss......PolsPK...phoVAusF..NED...-DSEPEEMPPEAKMRMKNIG....R...DTPTSAGPNSFNKGK+GFSDsQKLWERslK.............. 0 16 20 37 +15330 PF15474 MU117 Meiotically up-regulated gene family Coggill P pcc Jackhmmer:Q9Y7V0 Family This protein was identified as being up-regulated during meiosis in S.pombe. This family of proteins is found in largely in plants and fungi. Proteins in this family are typically between 128 and 920 amino acids in length. 27.00 27.00 32.10 31.30 25.00 24.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.18 0.72 -10.87 0.72 -3.50 16 50 2012-09-25 12:57:57 2012-09-25 13:57:57 1 5 20 0 46 50 0 101.90 29 25.76 NEW shsCKGSulCus........ts......ssuCssAlspa..psssl............Y..............pshsu.............hspus.....ChA............hapCss..s.ss........uho......Gpplhsthpsl...csssCptCGohtass.....pCplslsYCo.sC ....s.hsCcGSuhCus.........st.....tssCcpAlsp.h...ssssh............Y.......sshou...........................hssGp.....Chs............hapssG..s.ss........slo........Gsplhssappl...cspsC.p.hCGstta..ss.....uCphslsYss.sC.............. 0 11 31 44 +15331 PF15475 UPF0444 Transmembrane protein C12orf23, UPF0444 Coggill P pcc Jackhmmer:Q8WUH6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 94 and 119 amino acids in length. 27.60 27.60 27.70 71.40 25.50 27.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -3.93 8 68 2012-09-25 12:37:30 2012-09-25 14:34:08 1 1 40 0 44 43 0 91.50 75 79.89 NEW sKDHPpsps....GMlWRVTGGLFuVTKGAVGATVG....GVsWlGGKSLElT.KTAVTo...VPuMGVGLVKGGVSAVAGGVouVGSuVAuK.VPhT.uKKKDKo ............KDHPQQ.QP....GMLSRVTGGlFSVTKGAVGATlG....GVAWIGGKSLElT.KTAVTo...VPSMGlGLVKGGVSAVAGGVoAVGSAVssK.VPhT.GKKKDKS.... 0 2 8 23 +15332 PF15476 SAP25 Histone deacetylase complex subunit SAP25 Coggill P pcc Jackhmmer:Q8TEE9 Family SAP25 is a family of proteins found in eukaryotes. SAP25 is a core component of the mSin3 co-repressor complex whose subcellular location is regulated by PML. mSin3, the transcriptional co-repressor, is associated with histone deacetylases (HDACs) and is utilised by many DNA-binding transcriptional repressors. SAP25 is a nucleo-cytoplasmic shuttling protein that is actively exported from the nucleus by a CRM1-dependent mechanism. It binds to the PAH1 domain of mSin3A, associates with the mSin3A-HDAC complex in vivo, and represses transcription when tethered to DNA [1,2]. 27.00 27.00 155.10 155.00 19.80 14.90 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.49 0.71 -4.81 5 25 2012-09-25 13:48:04 2012-09-25 14:48:04 1 1 22 1 14 27 0 186.90 67 79.86 NEW MTLLAPWDPNYEAKAGPQLVWGPSCGSGASFSGRTLCHPSFWPLYEAuSGRGhRPps...PuuGHQsGEQAPRDAGFPVMCsEDVFLLDPLLPsGQRVPLYLSEAPQQVMGSLKLLLPPPIMSPpVhPhPSpspGCSTAWLSGPELIALTGLLQMSQGEPRPuSSGuP..ussussPussS-sPusS...GGP.SCSHuTDPSLPpTPDTHCP MT.LAPWDPpYcAKAGPR.VWGsuCuSGASFSGRTLCHPSFWPLYEAASGRuLRPhs...PuTGH.NGpQAPtDAGFPVMCsEDVFLSDPLLPpGQRVPLYLSEAPQQVMGSLKLLLPPPIMSPhVLPp.SsspGsSTAWLSGPELIALTGLLQMSQGEPRPuSS..uus..ss.sGsPsssS-.PuPs...Gus.SsSpssDsSlPtTPDsp............. 0 1 1 2 +15333 PF15477 SMAP Small acidic protein family Coggill P pcc Jackhmmer:G2TRQ9 Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a single completely conserved residue G that may be functionally important. 23.10 23.10 23.60 23.60 22.60 22.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.49 0.72 -3.71 65 309 2012-09-25 13:57:31 2012-09-25 14:57:31 1 6 154 0 194 283 0 74.90 31 22.26 NEW p.W..ssu...s.......h...G.ssp+p..pKFhRLMGupK.......................tsss.s..ts......p..s...............stu....tsppcppclppcLEpQaptu..hph+......G..p+.+..GLG .......................Wcss...sF....s..sc-pp..tKFh+LMGstc..................................................csss.sts.......p.s...............................sth....tstcppcclppsL-pQYptuhstp.......tpp.+...GLG........................... 0 51 78 127 +15334 PF15478 LKAAEAR Family of unknown function with LKAAEAR motif Coggill P pcc Jackhmmer:Q8TD35 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 119 and 235 amino acids in length. There is a conserved LKAAEAR sequence motif. 27.00 27.00 37.50 36.10 24.00 19.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.76 0.71 -3.91 8 31 2012-09-25 14:00:14 2012-09-25 15:00:14 1 2 25 0 19 27 0 117.80 48 65.91 NEW .PKNWpphospcLpKhuPQpRSRYLAYE-ssK-lt-..shuhshKRl+-..hptch.scPR.shsp-h.l-+-+ps+LIGQLKAAEARNRlRlMRLRYpthRApElpHLIuCQPTAhKAlRLEuhlPPplcsspssDhLDKh- ...........s.s+shhtho.ptLtth.stp+p+hhhatc..ccl.t..thuhhspcShE.........sh+h.sDPR.shopslplssc+pspL.lG.LKAAEARsRlRshRLRYs+hR.A-EIshLIppQpSARAAIRLEhFLPPQLKPs+IsDPLDRpE................ 0 8 9 11 +15335 PF15479 DUF4639 Domain of unknown function (DUF4639) Eberhardt RY re3 Jackhmmer:A6NN90 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 161 and 601 amino acids in length. 27.00 27.00 29.70 29.20 24.20 23.80 hmmbuild -o /dev/null HMM SEED 576 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.86 0.70 -5.86 12 53 2012-09-25 14:32:57 2012-09-25 15:32:57 1 2 36 0 32 52 0 368.20 40 88.05 NEW QERQsR.DRGVTRSKAEKARPPTVPVPQVDIVPGRLsEAEWhALhALEEGEDVVGDILADLLARVMDSAFKVYLTQQCIPFTISQAREAMLQITEWRFLARDEGESAVAEDPTWGEDEEPhACTTDuWAQGSVPVLHAPAs.GlE-pFQuE......D.GusDph.LGRSWhsRGSQE.hpShE.SsEh+hh..susssTsELFQEAGPtssLEEsDsQtp...........uth.....AtS.ssShQ....SsEhssstSPcsSLELo.VASsQAsscRuQPhuSpLSLEDLYhChPQ.DAAGDRL.chcpEGhPplASss..ussShGssT.hsPSsShps.ppPh......ss.p.Rhs+KssssRLDPARLPRHWVRPLAEVLlP............DSEs+PLEsYRGRpRspKTcA...pAtPQusssGsRVSsAsF.....FPLpPusPFRALGss...tlp.PTLNLu.sSPshsSKlPFPSPGlRFLssHPshPDVARSPSPKLWPuAKWPSGWEtEAELLGELWAGRTRVPPQGL-ssD+..cuQDsttWPpssPQVLEATSQVhWKPhLLPEAhKLAPGVSMWN.oTQVLLSS....uVPppEcccGosssP....-QpPIQTGsPKP .................................................s+o+sEKs+........s.ss..VP....pV..D.......IVPGR.lsEuEWhshhshEEGEDhVuDIluDllscVM-ssa....clYLspQslPFTlsQA+-AMLQIsEWRFLARDEGEssVApDP........TWsEDEE.P.sssTDuWAQGSVPV..Lpsss........u.cp..........hpsp..............................p.tt......ht...hht..ptSpp...............................t..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 12 13 15 +15336 PF15480 DUF4640 Domain of unknown function (DUF4640) Eberhardt RY re3 Jackhmmer:A8MTZ7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 99 and 306 amino acids in length. 26.00 26.00 26.70 26.70 19.10 18.60 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.97 0.70 -4.84 11 31 2012-09-25 14:47:49 2012-09-25 15:47:49 1 2 21 0 18 37 0 230.50 51 92.00 NEW SSscSNLSLSVGYFPCE-TaSYE-TsSpEDssS.ssSlHFLPPIQGoWpTESTtRLhtRRDQlpDsPEQFCKLSITLAWDlDVuSssSDSluNWDLsucsQWhDKaPE-cTpLTLuKLDsLVQKLEpFLENcKssccDDSlhPESsQE......EDhpLsSSoPPchAQl...SHQEHsoCQDLPph+s.ENE-lhQhPphP.RLpcpE.sc.hoQuoGSptssosETSSlSoGpsEc-s.....ssSshQsLSCLNFtW.VFRWLRpQVhSSLhRRccPp.cATcusHphAtK+RhSaRuKRIQPQE ...........................SpspSNLSLSVGYFPCEDT..hE-TsuhED.ssSpss.SlHhlPPIQGsWtTEphtRhhpRpDQIQDpPEQFCKLSIhLAW....DlDluSsso.DShsN........h.LsucNpWhDKhPc-cTpLoluKLssLVQchppFLEN.Ks......DDulhPcosp................cDhQLsouSPPchsQs...SHQEassC.QD.s.hps.cNcclh..........t.Eh.........sphhsQuTuSQpTsssEhSSl.StpPEc-D.....TsSpTps.sCLNFtW.sFpWLRpplhSsLhRRccP..pATcusHp.A.p+Rh.aRuKRIQPQE.......... 0 2 2 2 +15337 PF15481 CPG4 Chondroitin proteoglycan 4 Coggill P pcc Jackhmmer:O16883 Family CPG4 is a domain family found in nematodes of one of nine core chondroitin proteoglycans. Vertebrates produce multiple chondroitin sulfate proteoglycans that play important roles in development and tissue mechanics. In the nematode Caenorhabditis elegans, the chondroitin chains lack sulfate but nevertheless play essential roles in embryonic development and vulval morphogenesis. CPG4 has the largest predicted mass of the C. elegans CPGs at 84 kDa. The majority of its 35 predicted glycosaminoglycan attachment sites reside in the COOH-terminal half of the protein, of which four sites were confirmed by DTT modification [1]. The family is rich in conserved cysteines. 27.00 27.00 28.00 27.00 26.50 26.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.52 0.72 -11.24 0.72 -3.65 15 49 2012-09-25 14:50:01 2012-09-25 15:50:01 1 3 10 0 44 41 0 91.60 21 27.44 NEW sCh+pChsslhcslpthhp.hsp..ps-phcslCspaspuppCl.pp.ttpCcp...phFsshTSuhcahCl-pc-sassphsClptssuslppcC-ppCpsp ....................C.ptChpshhp.hpthht..tp.......shpphpplCptasputpCl.pp.ptpCsp...tthhpthosuhcahCs-pct........tFppphsClst...ss.h.ptCcppC...t..................... 0 17 22 44 +15338 PF15482 CCER1 Coiled-coil domain-containing glutamate-rich protein family 1 Coggill P pcc Jackhmmer:Q8TC90 Family This is a family of coiled-coil family proteins found in eukaryotes. Proteins in this family are typically between 160 and 397 amino acids in length. 27.00 27.00 38.10 37.50 23.20 23.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.68 0.70 -4.70 7 24 2012-09-25 14:58:47 2012-09-25 15:58:47 1 1 22 0 14 25 0 187.80 59 55.95 NEW Tlsp+EDPLNL........GG..GWApSssLpoWSSCHRRRRGAPhY+RRaRYGPKsEYEPPRKQPKQQHuPGsWFQ.PPRpP.h.VhSNWG+WGGPW+PPPssFhKPPs.VQhIRVYGLpPlCL.CCCSCWsGPWNPGWhRPPGRKKRWGRR...GRGLRRpPRRShPRuPP...sDLShLLRPVNLYGWRAPGMRAPpNTTQFIMNQIYEDMRQQEKLERQQEALRAQQA .................................Tlsp+EDPLNL..........Gs.GWApSssLpoWSSsHRRR.GAPhapR..aRYuPKsEYtPPRKQPKQQHuPG.WFQ.PP......lhsNWGphGGPW+PPPssFp+..s.VQhhRVYGLHPLCh.CCCSCWsGsWNPGWsRPPGRKKRW.GRR...GR..GLR+H...PRRS.P.RsPP......sDlSpLLRPVNLYuWRAPGMRAPpNTTQFIMNQIYEDMRQQEKLERQQcALRAQpA........ 0 1 4 6 +15339 PF15483 DUF4641 Domain of unknown function (DUF4641) Eberhardt RY re3 Jackhmmer:A8MYA2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 201 and 519 amino acids in length. 27.00 27.00 122.20 85.00 19.30 18.80 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.43 0.70 -5.77 16 39 2012-09-25 15:14:33 2012-09-25 16:14:33 1 2 19 0 19 45 0 376.40 41 87.67 NEW pEGcPGo.PsD-cts.slD..h...Lu..-psAAIhp.LoshpshtVp+pPSPEus....ss-suslWA-lEsGPuuRGA.u.............SssEup.pASAusLalsGPttGRAWsss+Ruop.SRhshsss.QpssscG.sthsS-sE..SSDEho-lQhMRVoIphKc...GuQAKssSsccsuDosRHosspsREsFlpVPGshLoSAsRGLoss.lERQA.sGEh-sS...s.KKhpShlWGKtsuRPSasGuAs..................uGuLP+uoPR+KhAQEKKSLGsASchsLGR...sFPsWGQRlSAuPsEPATFPPlSGVsLLG+Sp+.u.ls.tPKpsKpsusGKKsu.u++sREsps...sutEDNDPsRDssspuQlPTHRPtss.....shpsH+GEhSSG-sNhRusQsPGsSpssALSQuulhPRtsAsSGD....QpPss+ssRP-....R.QQpPPGtQGCPRClhLQ+EI-DL+-QLuuhpuLu-+FQ ........EGpsuo.PsDcpts.sl-..h...Lu...pssslh..ho......lpc.Ps......P-ut....sttsus.hssh-sGsstRuuhu.............ussctp.psouushahsG.t.tGRAWtsPcRusp.uRhshp.s.Q.Ps..........o.c.s-..SoDE.s-l.hh+VslphKc...ssQA+ssu.pc.s-ssR+sshps+-sah.lPushLoSsspGLoss.hER.u.sGE.-sS...s.KKh.uhhWGK.tsRPShsssss..................sGsLPpsssR+K.sQEKKS.ussSphs.G+...sFPsWGpRlpuAP.-PAThPPlSGVsLlG+op+.......sKpsKptssG.KKss.u++sREsps...sstEDN-PsRD.sspuQlPTHRst.s.....shsha+GEhSSGD.NhRusQlPGsSp..uhSptslhPRt..uPSGD....Qps.scsstPc....R.QQ.s.GspGCsRChhLQ+EI--L+-QLushp.h.t........... 0 1 1 1 +15340 PF15484 DUF4642 Domain of unknown function (DUF4642) Eberhardt RY re3 Jackhmmer:A8MZ97 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 115 and 196 amino acids in length. 27.00 27.00 88.90 88.80 20.20 19.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.13 0.71 -4.25 11 23 2012-09-25 15:24:08 2012-09-25 16:24:08 1 1 20 0 13 25 0 142.90 58 83.00 NEW Capu+pcEETEKsPCTsuNsGEDC..sAANsEpsNscDQEK.hlhphhshshPhRPGILVQRpsK-.hsTsLtNpc-hEsccEs+hK-+QcPcsutEssQE.sD.lpKs.Isloto.SVs-sQKRPLKGVTFSREVIVVDLGpEa.PsPRSYTREHKERK .......hhpsppscETcKsPChssstG.cC..sAAps.psNstDpE+..llhQlhshssPMRPGILVQRpoKEshsT.PL-N+c-hEsEEEsphpEKQEPc.NAGEsuQE--D.LQKh.IslT+TsSslEsQKRPLKGVTFSREVIVVDLGpEY.PsPRSYTREHKERK 0 1 1 1 +15341 PF15485 DUF4643 Domain of unknown function (DUF4643) Eberhardt RY re3 Jackhmmer:A8MZF0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 254 and 462 amino acids in length. 27.00 27.00 163.60 129.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.07 0.70 -4.92 10 19 2012-09-26 07:55:05 2012-09-26 08:55:05 1 2 16 0 10 13 0 231.20 53 73.08 NEW spDuDtuPssPosQPLlPVAHIRPLPsuAQ......ssSPpPEEPsV.......s+sPPuFQAS...VsREuusRVVV.PIAP..ThRSsuPSsaSlsPsuP-up+lEc.Ph....AuPusEAcpVsSsstuSosu.sSsPHPsPsP+VAPKP.............+hSGWTRLKKQLhEEAEEP.aPp.ptsLEsp..t...ppEsstP.........ss+sPsSRAS+MWDAVLY+MSVAcs.ps+.sGPssuE+oh..............uul..oRLP.FLYRPRFNARKLQEAA.RPPPTlpsllEL........sPpPKNFNRTAsGWRL .....tDuDtsPssspsQPLlPVAHIRPLPTusQ......usSPhPEEPsl.......sRPPPuFQAS...VsREuusRVVV.PIAP..Th+S.tsSsaShsPhuPttcplE-.Ph....AuPAsEscpVsSsshASuPs.sSGPHPsPsPKVAPKP.............+lSGWThLKKQLhEEs.EP.hPt.p.sLEsp..t.....cssts.........sspsPsSRAS+MWDAVLY+MSlscs.psp.sGsssup+s...............usl..oRLP.FLaRPRFNARKLQEss.RPPPTlpsllEL........sspPKNFNRTAsGWRL.. 0 1 2 4 +15342 PF15486 DUF4644 Domain of unknown function (DUF4644) Eberhardt RY re3 Jackhmmer:A8MZG2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 143 and 191 amino acids in length. 27.00 27.00 54.80 54.60 16.90 16.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.07 0.71 -4.28 7 34 2012-09-26 14:07:21 2012-09-26 15:07:21 1 1 24 0 17 35 0 151.20 72 86.36 NEW .AVShAtG+PuHsDsPPNIYEGGLGspQpQCPSsQGSKPKNFRLRHLRGLALYLPuHMQPAGQCESHWLuRLMuGGCLPp....PE....GsAWsLc..LPQGsLuPsNShCoAhLEAplPRDSLGsTASSSShDPsKGs.sQPuPsEG.GlRPKRSWGshEEuhCPLCKR ....................DAVS.AtG+PuHPDsPPNIYEGGLGuQQp.QCPSAQGSKPKNFRLRHLRGLALYLPGHhQPAGQCESHWLGRLMAGGCLPp....PE....GoAWsL.D..LPQGTLGPGNShCSALLE.ApLPRDSLGsTASSSSMDPsKGs.sQPuPsEGLGLRPKRSWGs.EEshCPLCKR.... 0 1 1 3 +15343 PF15487 FAM220 FAM220 family Eberhardt RY re3 Jackhmmer:B1ANY3 Family This protein family is a domain of unknown function which is found in eukaryotes. Proteins in this family are typically between 217 and 277 amino acids in length. There are two completely conserved residues (S and L) that may be functionally important. 27.00 27.00 41.40 38.80 22.90 22.20 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.70 0.70 -5.03 11 40 2012-09-26 14:59:18 2012-09-26 15:59:18 1 5 26 0 18 42 0 241.00 47 83.03 NEW +DtRsslslsLhsh+suct.......DsD+Lhpslppp..............pcssh...........p-..ssshsspPsst.pGsSptpth.Ssch+pstShu..u.lh+uuppsLP....l+posptsSsuuuups.................cuVsh..tP..tcE.pFssl.ssls-AllsDWLt+ss+ssssh+sWsppG-s.......WlStlPspQKl....hEhGllcDE..sAhhcGlusc.lp.ssLculLSAllppYP..psLhssEs+pVFL-hLpshhScpThEYKKhLSsl+soosshQlshhLLAlpuFsLh ...............................................................RD+RGsLGoCLApVptutGu......DSDKLshuLKKR...............EusaP..........uD..sPSWhsKPsV..DGNSQuEuL.SlEM+ps.StA..uLhLHuGsslLP...al+ESlRRNsASAAo.S.................pAVuLhsAP..oE..pFApVuCussEALh.p.DWLutusRATcupRGpC.KGEs........hlScLPp+QKL.....EMG.hpD-PPsAhscGLGoE.LE.SsLHSlLSAsL+shP..-VLLs-ETKplFLDpL.............pPhFScQThEaKKML...KsTocsLQIsLuLLALpsFpLh.................... 0 8 9 11 +15344 PF15488 DUF4645 Domain of unknown function (DUF4645) Eberhardt RY re3 Jackhmmer:B1ATL7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 200 and 298 amino acids in length. 27.00 27.00 52.20 52.10 20.00 19.60 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.85 0.70 -5.25 7 21 2012-09-26 15:18:55 2012-09-26 16:18:55 1 1 18 0 11 21 0 273.50 64 98.88 NEW MAClENVLGGHAsSPhsVss-cNuspEs+..sh.LQC.SSh.+...-DsEsWG+PpVsLRPPhsVLoDLsRpQLEpPSERTGSCIPVsssRAL+pPYsPPPAlAEESLATAElNSSEGLAGhRQcGQDSI.NVSQEFSGuPPALMlGGTRVSstGTERGGNNA+hYssLPRGQGFFPPRGPQlRGPPaIPTLRSGIhMElPPGNsRMAs+c+LA+VSFP.GuPRHPhcNWPRP..lPLSSSTsGLPspooAHCFIPPp.PPSFNPFLAMPhAFAPPPlFuPPLPSYFupFPShGMPsPussN ..MACIENVLGGHAPSPhlVssDcNGNpE.+..DhPLQC.SSh.c...DDAEsWGpPpVsLRPslNVLTDLspcQLEtPSERT.GSCIPlcS...RAh+HPYGPPPAVAEESLATAEVNSS-uLAGWRQcGQDuI.NVS.EhSGuPPALhlGGT+VsNtGTERGuNNARLaVALPRGpGFFPPRGPQVRGPsHIPTLRSGIhMEVPPtNTRhAs.+t+LAHVSFPLsGPpHPhcNWPRP..lPLSSSTPGLPsCSTAHCFIPPR.PP.FNPFLsMPlsFAPPPIFGPPLPSYFupF.ShGMPAPAssN.. 0 1 1 1 +15345 PF15489 CTC1 CST, telomere maintenance, complex subunit CTC1 Coggill P pcc Pfam-B_19246 (release 26.0) Family CTC1 is one of the three components of the CST complex that assists Shelterin to protect the ends of telomeres from attack by DNA-repair mechanisms. Mutations in human CTC1 have been recognised as contributing to cerebroretinal microangiopathy. 25.80 25.80 26.50 25.80 25.30 21.30 hmmbuild -o /dev/null HMM SEED 1144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.30 0.70 -13.98 0.70 -7.22 13 86 2012-09-26 17:22:21 2012-09-26 18:22:21 1 5 45 0 49 95 0 701.10 36 91.14 NEW sLPLSYSFVSVQ-L+TaQ+LPCCSHLSWSSosYQAWApEAt........PsGsPLPREpLLLLGTLTDLsuDhEpEsRsGuLYVRDNTGsLsCELlDLDLSWLGHLFLFPoWSYLPPAphsSst...pGHLELhusPVPVhPLslSPsPhTPlPVLYPEpASpLLphRsKhRsspsNLAG+LVRlSALV+oppKsYFlLSLG.t.s....sAsopVslIVQlPuQLVWH+ALpPGcuYVLTpLRVoplRG.+ppRVWsTosSScLhsLcPpsVREhEl-htt..........shL-AsspshstPosSQDt......t.psLlRpS+lLsYpGTVTcVLNpsAGLYELDGQLsLCLAYQQhpuhRRVlRPGssLELpDVHLLQSlGGGTp+PVLAsCLRGuVhLpuFSppc.PtspsSaps.GAuLaEpLlWE+pLGLPLYLWAs+ALcELupKLCPHhLRHcQhLpaSuPGsPuLGLQLLAPsL-lLtsP.ssstRNsaoEILEEPH+CPLQKYopLQTPsSFPTLssLpEE..uQ..p+AWASFDP+uLLPLPEAuHLTSsQLNpRLAWSWlCLhPosFpPA..VLLGVLVASScKGsLQLRDpo..GSLPCLsLscspQ...PhhDs......plIGCLVRlE+FQLVlERpV+SsFPSWKELshssFIQK+pARVYlQFhLuDALILsVPRs....ssssso.Ppst.........................spPEGPphGQSRLFLLpHKEuLMKRNFssssusSs-sspPsLSFpVsGoWLGGTQR.KEGouWGPPEspt-...EspDQKVhLlFhGSSVRWFtFLHPGpVYRLVAsssssPtlhcttsuSslSpRsLELuussSCLTVQc-WTLEhtuup...DlsssLshs+sLs..ESSLo-LLSuN.ssSLVSFS.AEILSR...sLCpslssshhh+s........Gsusus+psVKLTVAL-sADschPPH.LDVYIE-PHhPsPLGLLPGARVaFSpLEK+VSRupNVYCCFpsoThVpVLSF....PsEoshSsPLPHIaLAELhpGspsPF.pAosSCHlVsVhSLpLhWVCAHCTSlCsQG+CoRQuPsCPoQooVSQAsI...RLLVEDGTAEAsVTCpsHpVAsALGLsPoEWsuLL-tVRsPG+VALQFpGsGA..QsESsucs--PLohFLpTL.CTSsuVLRPlhLoFcLER+Po.clhPLEPPRLQRFQCGEhPhLTRVNP+LpLoCLSI .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................shhlpa.GhlTtllp...s.ulY..hDt..plhLChsa..h.t.hthhR.Gsplpl.psHhh...t......h.h..C.hutl.l.taot...s..............................................h........................................................................................................................................................t.......hl.Gh...tt.pu.h.l.Dtp..ttl.Cl......t...s..ss.......................hhushl.phpch.llhEp...sthso..................pph.hYl.h.hscshll..s.s..................................................................................................................p.......h.hhh.thchl...............................................................................................................t..h.......sh.l..t...p..........................h.phhtsp....sphlshp.s.h.......hh......................s...t..h..thtltlphtt.t.....lplYhp.sths...hGLLPGstlhhpthp++lS+.ttslYsp..s.so.lplhuh...............s...............PhhhLh....ttt.............u.h.splsslh.lpl.WhCshCt.........slh.ts.t...........C........C.s.tsh.ps.h...phhh-.DGoupAhh.hps.t.lh.hLtls...htsl.t.h...Gpl............................................................................................................................................................................................................................................. 0 16 19 31 +15346 PF15490 Ten1_2 Telomere-capping, CST complex subunit Coggill P pcc Jackhmmer:Q86WV5 Family Ten1_2 is a family of primarily plant and vertebrate telomere-capping proteins that is evolutionarily related to the mostly fungal family of Ten1, Pfam:PF12658. 27.00 27.00 27.90 28.10 26.20 26.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.19 0.71 -10.21 0.71 -4.51 29 82 2012-10-03 20:18:03 2012-09-27 10:45:23 1 5 69 0 56 78 0 112.70 36 82.16 NEW hspsGshhhh.E.l...uusth.pGpolRshGRLppYDhtpuhusl...........ssspppltVsTph..lcshps.........plGSlY.hlGElppp...pssts..............llpARVhpsVDGlDlsLhcpAlp.pRpahpc+ ....................................h..psGhhhh..E.l........uus.h.pGtolRshG+LptYDhtpuhssl............tssppplhVsTph....l.cshph.........plGSlY.hlGELptp....pssss..............................llcARVhpsV-GhslsLhcpAlp.pRpa.pc.......... 0 19 30 42 +15347 PF15491 CTC1_2 CST, telomere maintenance, complex subunit CTC1 Coggill P pcc manual Family CTC1 is one of the three components of the CST complex that assists Shelterin to protect the ends of telomeres from attack by DNA-repair mechanisms. This family largely represents sequences from plants species. 27.00 27.00 35.20 39.70 24.10 19.90 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.89 0.70 -5.40 11 18 2012-09-27 10:28:16 2012-09-27 11:28:16 1 3 14 0 13 19 0 276.80 36 26.22 NEW sslspPslLsGpLtLs.....ots.s.sspppChthoD..........uuusVCCsllch-hsslG+cIpVhuWNalPstp......ssG.....hLEllchphs-sss........hs+...........ssslcolPLt.....ssppssuKsRhslpGlLcSVSPlhslPpt.stpsu...............................ss.hGFlsplhsCtCc.hhtt.............psHpFsp.hh.........VYFsGu.uupW+PVLs+hVGphVslSGLKKKlVhlGccsSpl.haVuTpcohlthss.t.phh...hpps.h.h.GcChGpYpGlVoGlYhQGhllELDc....pVWLLlTDp.Ls....ssHuLRVGAl ......sPlspsslLsGsLsLP.....ups.s.sspppChphSD..........uuuoVCChlLcF-.cAlG+cIpVLuWNaLPshpp.....uuG.....hLEllcW+hs-sss........hsc...........suhlpolPLt.....ssppsctto+hpVhGVlcSVSsVhsVPptsttusus..............................sN.sGFlsclhsCtC+.hph.............spsHpF-tphF.........VYFssu.uupW+PVLs+hVGc.VslSGLKKKlV.ls+csShh.hhVoTccohlphssst.thh...hsps.....GcChGsYsGhVoGlYhQshllELDc....pVWLLlsDQhLs....ssHSLRVGAh.................. 0 4 7 11 +15348 PF15492 Nbas_N Neuroblastoma-amplified sequence, N terminal Coggill P pcc Jackhmmer:A2RRP1 Family Nbas_N is an N-terminal family of metazoan sequences. This domain lies at the N-terminal of several WD40-containing proteins. The human protein is over-expressed in neuroblastoma cells [1]. 27.00 27.00 27.00 29.90 26.90 26.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.78 0.70 -5.30 4 74 2012-10-05 17:30:43 2012-09-27 13:19:37 1 8 57 0 54 74 0 243.70 51 15.14 NEW WHLVLASNGKLLAsVQDQCVEIRSA+DDFGSlIGKCQVPKDPNPQWRRVAWSHDCTLLAYA-STGTVRVFDLhGSELFVIsPusSFPGDhSYAlAGLlFLEYpuSAQWSAELLVIsYpGpLKSYLV..SVGTNQuFQEsHoFSFSSHYsHGIToAIYHPuHRLLLVGGCEou..-sGhSKASSCGloAWRVLSGSPaYKQVTShEDDlus.sp+pGhh+h.Sh+haSRpupEpDGVF+MSLSPDGTlLAsIHFSG+LSlWsIPSLKQpGpWtQsEQPGaD-INPEW ...............................................................................W+llLu..ssGKLLAslQDpslEIRSA+D-FsSllGKC.Q.V.PKDPpPQWRRlAW...S...DCTLLAYA-SoGTV+lF..DLhG.oc.LF.lIsP...u...s....o....h...s....G.....D........l....ShAIAGLlFLEY.+.u.S...A...QWSAELLVIsYcGpL+SYLV..SluT.NQuaQEsHs.FSFuuHYspGIsoAlYHP.uH.R...LLL.....VGGCEss......-..suh.S+...AuusGLoAWRlLSGSPYYKQ.Vo.sttDplsssspphulh.+..hh.s...........hphao.+p...spcpDGlF+MSLSPDGplLAsIHFSGcLSlWslPSL+.ptpWp.sEQPGaschsP....................... 0 18 22 35 +15349 PF15493 YrpD Domain of unknown function, YrpD Coggill P pcc JCSG:Target-418961/SP17457A Family This family of proteins is found in bacteria. Proteins in this family are typically between 236 and 351 amino acids in length. The member from Bacillus subtilis, UniProtKB:O05411, is named YrpD. 26.40 25.50 28.00 26.20 25.90 23.80 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.55 0.70 -4.69 8 35 2012-09-27 13:15:57 2012-09-27 14:15:57 1 1 27 0 9 35 0 192.80 53 73.28 NEW AstlscGIGGRshlNSo....GuhlsTKlpLPosspls.....ssGssYIYoGF..oG...........ssEuDhGLpYS.......ssYsV..WKP..hhKlGucsppsstYlp.....GhschTYpNGF+PGosVQLTlYKNhNGsT.....RholhGT..........ssDsYsspIIpElssoNluSlopWKhLATlAsossstpp.......hpupFoNIslDupuhTPV......lDspDaApVTl.SGNoVolsV ..........ASQLscGIGGRAYLNST....GulhsTKIpLPoohpl.......SsuTsYIYSGF..oG.........GTEADIGLQYS.......cpYNV..WKP..lMKVGSKspsp..YlE.....GtspFTYsKGFRPGSTVQhTIYKNLNGNT.....RhThWGT..........NNDGYTGRIIoElptTNlGoISKWKsLATlAsotpsQp.h....usFSTuFsNITIDNKAlTPV......lDTQDFAKloV.SGNsVohSV........................................................ 0 2 3 3 +15350 PF15494 SRCR_2 Scavenger receptor cysteine-rich domain Coggill P pcc Rawlings ND Domain SRCR_2 is a scavenger receptor cysteine-rich domain family found largely on vertebrate sequences up-stream of the trypsin-like transmembrane serine protease, Spinesin. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.44 0.72 -3.75 47 411 2012-10-03 20:35:02 2012-09-27 15:58:16 1 29 62 0 195 605 1 92.80 28 15.67 NEW utshlLpVhsstpssWhsVCp-sWssshGptsCppLGa.....p.sappuhslsshpht.uppFhpL.......ssshs..ltpthp....pssCsSuplVSL+Ch.-CGhps .................p.lLpVhsstpss.WhsVC.tD.sWspshu.ptsCpplGat.....p.s..tp...p.s.l....t.l.....s.s.....h.p.t...............p.ahpl..............ssp.s.s...l.phh.........pp...sC...sStp..l.VSLp.C.....t..CGhp.s........................................................... 0 14 28 87 +15351 PF15495 Fimbrillin_C Major fimbrial subunit protein type IV, Fimbrillin, C-terminal Coggill P pcc JCSG:Target-417041/SP13489F Domain Fimbrillin_C is a C-terminal family of major fimbrial subunit protein type IV proteins largely from Bacillus species. The family is associated with family P_gingi_FimA, Pfam:PF06321. 27.00 27.00 27.10 27.10 26.90 26.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -10.01 0.72 -3.83 35 193 2012-09-27 15:57:20 2012-09-27 16:57:20 1 3 69 0 26 187 4 85.60 27 16.60 NEW lphYpsGls.YYphhI+H..Dstssssh.................thuca.GVVRNshYslslsslps.GpP................sssssspss-cscsaLslcIpV.PWshhsQsh ............................hY.puhs.YYphhlcH..sstsssth.................thhca.ulVRNNhYplslsslps.GpP..............................tssssssss-ppp.s.alslplplhPWshhpps......................... 0 7 23 26 +15352 PF15496 DUF4646 Domain of unknown function (DUF4646) Coggill P pcc Pfam-B_61885 (release 26.0) Family This is a family of proteins largely from fungi. The function is not known. 23.10 23.10 23.10 23.50 22.90 22.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.77 0.71 -4.06 20 64 2012-09-28 09:46:54 2012-09-28 10:46:54 1 1 49 0 50 68 0 125.70 22 31.09 NEW PsuFuRsPssslsYs..sF.pPhplhuh..usp....LssGFshl........PpsL.ssHDVsppDWtRFlcDlptAuhhospp..hhuuhhPlh.hls.........lsuhhhstthccthcs+c....sshlsclI-hWNpsFFpPRt ...........................................h..tshpl.oh..ssp......lssGFsh.........sphL.hppcVs.p-WppFhc.......-lstAspLostp.....phstss.uhs.shhlu....................hG...hsuhhsu+..shcptt.tcp....................puplpshlcpWNpshFpsRt............ 0 19 27 41 +15353 PF15497 SNAPc19 snRNA-activating protein complex subunit 19, SNAPc subunit 19 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O75971 Family SNAPc19 is a family of proteins found in eukaryotes. It is one of the five core components of the snRNA-activating protein complex or SNAPc that helps direct the nucleation of RNA polymerases II and III. The core RNA polymerase II snRNA promoters consist of a single essential element, the proximal sequence element (PSE), whereas the core RNA polymerase III snRNA promoters consist of both a PSE and a TATA box. The SNAPc binds to the PSE of both of these. SNAPc recognises the PSE sequence common to all human snRNA genes, irrespective of polymerase specificity. SNAPc is also known as the PSE transcription factor (PTF) or PSE-binding protein (PBP). The human SNAP19 and SNAP45 subunits are dispensable for transcription in vitro and are not as widely conserved as the other three, SNAP190, SNAP43 and SNAP50, suggesting that these vertebrate-specific SNAPc subunits may have adapted specialised regulatory roles for snRNA gene transcription [3]. 27.00 27.00 29.10 29.10 26.20 26.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.22 0.72 -3.78 22 69 2012-09-28 13:02:45 2012-09-28 14:02:45 1 1 56 0 43 67 0 82.80 46 84.02 NEW EL+pEEcpLL+lhsslp-QLN+LKVEELtL+Shlsspp..tps.sttss.............sppp.shhh...plDsp....spINpppLp.Lshtssh...........pcptEEEc-sD ....EL+KEEEsLLRl+ssLpDQLNRLKVEELtLpShlsupp..sph.ss.ss.............sppphch.h...tl-sp.........spINQppLc.Lshtsph............cp.EEE--p................... 0 7 10 24 +15354 PF15498 Dendrin Nephrin and CD2AP-binding protein, Dendrin Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O94850 Family Dendrin is a family of eukaryotic proteins found in the podocytes of the kidneys. Dendrin, originally identified in telencephalic dendrites, is a constituent of the slit diaphragm, SD, complex of podocytes, where it directly binds to nephrin and CD2AP. Kidney podocytes and their slit diaphragms (SDs) form the final barrier to urinary protein loss. SD proteins also participate in intracellular signalling pathways. Dendrin appears to prevent programmed cell death (apoptosis) through its binding to nephrin. The SD protein nephrin serves as a component of a signalling complex that directly links podocyte junctional integrity to actin cytoskeletal dynamics. Thus, dendrin is identified as an SD family with proapoptotic signalling properties that accumulates in the podocyte nucleus in response to glomerular injury. 27.00 27.00 109.60 47.40 20.10 19.50 hmmbuild -o /dev/null HMM SEED 657 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.28 0.70 -6.02 4 28 2012-09-28 13:42:25 2012-09-28 14:42:25 1 2 22 0 17 28 0 508.80 67 94.99 NEW MDhQASaWApGPQSRTCt.RPGSPEPPPRRPWASRVLQEATNWtsG.PsElRAREQEKRKAASQEREAKETERKRRKAGGsR..RSPLGp......sR.EPRNu.RsAQPsGhssPsRPERhG.sGRsPRPuAQPQuDPG.AAWAGsWuGRRPGPPSYEAHLLLRGAAGTAPRRRWDRPPPYVAPPSYEGPHRTLGoKRGPEhspuPssuAPAPT.suRTEGGRsKKRLDPRIYRDVLGAWGLRQGRGLLGGuPGCuuAtsR.EhuKGuuEKSsGlssAGLNSuuDu+.QucssuuPGsptA.AtuAsuosusPRssPRsR.pL+GScEGKEupEQ.WLPcCWlPSPK+QPsRHSQTLPRPWAPGGTGW+ES.GpR-G..suPch.EsWKtTRRAHTLPRSuRGsAttEGVFVIDATCVVIRSQYVPTPRTQpVQLLPSGtsRssGDu.u.PpPu.cEEGEtAuAhsSsCQKLL.SSRlhcQ.StG....htpEAEuGpstDSSL.cERuS+lLGhPlGEVsltsAP....uQPGSPE+sA.GsAAsssAuss+GSE.ssusPRRAGsGWARTPGPYAGALREAVSRIRRHTAPDSDSDEA.tELSV+SGSSDGSDTEAsGASWpsEpo.PthusspPt-GGKTAELpDSIREILDVISQsEEuLFtscDoptsPQGsRc ...................MDhQASaWARG.QsRTCtPRPGSPpPPPRRPWASRVLQEATNWRuG..AEsRAREQEKRKAASQEREAKETERKRRKAGGAR..RSP.Gp......PR.EPRNA.RsAQ.sGhsus.RPERhu.sGRsPRP...sA.PQusPG.uAWAGPWGGRRPGPPSYEAHLLLRGuAGTAPRR.RWDRPPPYVAPPSYEGPHRTLGTKRGPt.SQsPsSSAPAPs.sARTEGGpsKKRLDPRIYRDVLGAWGLRQGpGLLGGSPGCusu+sR.EsuKGssEKS.GLAAAsLsSGucuHsQA+.ssGusuochsPAGuAsussssPRPsPRSR.HLcGSpEGKEG.pEphWhPKCWlPSsK....KQPsRHSQTLPRPWAPGGTGW+ESLGhtEG..sGPEsLEsWKsTRRAHTLPRSSpG......sutGEGVFVIDATCVVIRSQYVPTPRTQpVQLLPSGssRlVGDuPoQsKPs.KEEGEGAsshPSsCQKhL.SSRl.HQPutG....+shEAEGGcsuDSSL.EERssRILGLPssEVNLpDAP....oQPGSPEHpALGPAAsustutscGSE.ssss.RRuGtGWARTPGPYAGALREAVSRIRRHTAPDSDoDEA.tELSVHSGSSDGSDTEAsGASWRsERT.P.hus..spPpEsGKTAELSDSltEILDVlSpTEE.sLFtscD.+tT.QGsRc...................... 0 1 1 3 +15355 PF15499 Peptidase_C98 Ubiquitin-specific peptidase-like, SUMO isopeptidase Rawlings N, Coggill P pcc Jackhmmer:Q5W0Q7 Family Peptidase_C98 is a small family of SUMO - small ubiquitin-related modifier - isopeptidases found in eukaryotes. Reversible attachment of SUMO is an essential protein modification in all eukaryotic cells, The family neither binds nor cleaves ubiquitin, but is a potent SUMO isopeptidase, and the invariant residues required for SUMO binding and cleavage, in UniProtKB:Q5W0Q7, are Cys-236, His-456 and Asp-472, all of which are fully conserved in the family. Member proteins are low-abundance proteins that colocalise with coilin in Cajal bodies. Peptidase_C98 depletion does not affect global sumoylation, but causes striking coilin mis-localisation and impairs cell proliferation, functions that are not dependent on the catalytic activity. Thus, Peptidase_C98 represents a third type of SUMO protease, with essential functions in Cajal body biology. 27.00 27.00 27.60 30.00 24.90 21.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.99 0.70 -5.11 14 62 2012-10-10 12:56:15 2012-09-28 15:54:51 1 5 41 0 33 64 0 238.60 57 27.06 NEW hCLQW+NspuLCWLDCILoALVHhEsLKcsl.sphssc.........pcSlht+Lhp+YsQAstLLpssppst.hp.s.t.s..............hhphss-lLscsEo.pLN-lRppIFtpLQPpL+CcL.GchESPVFAhPLLL+h-splEcLFhaSFSWpFECspCGapYQsRshKoLsTFTNllPEWHPLNAsHhuPCNsCpsKSQ+R+MVLE+lsslFMLHFVEGLP+NDLppYSFpFEGshYQlooVIQYpsp.cHFlTWlhsuDGo.WLECDDLKGP.hCp+Hp+hEVPASEIHIVIWE .............h.slQW+Ns.uLCWLDClLoALVH.csL+psl.sthssp.........pcSlhhpLhpcYspAstLL.spphsthp.s.tp...............hhphssplhsch-s.pLsclRtplF.pLQPpL+C.pL..GchESPVFAhPLLL+h-shhEpLFhhSFoWpF-CspCGapYpsRphKoLsTFTNllP-WHPLNAsHhuPCNsCssKSQhR+MVLE+VSslFMLHFVEGLPpN.DLp..cYuFpFEGshYQlToVIQYpsp..pHFITWlLsuDGo.WLECDDLKGP.pscRHc+FEVPASEIHIVIWE............... 0 5 7 16 +15357 PF15500 Toxin_39 Putative RNase-like toxin Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and conserved cysteine, 25.00 25.00 148.20 146.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.25 0.72 -3.86 3 3 2012-10-01 01:44:25 2012-10-01 02:44:25 1 2 2 0 3 3 0 100.30 33 10.51 NEW tE+cssAc+KPADGGHDVVVTP-GVG+CSPPPCPVI+VEYKKELuAcP-LKpWNEoVQuhRKsDPchAAD.............EAAKLIpALEsARsNGG+APsEchV+ tE+cssAc+KPADGGHDVVVTP-GVG+CSPPPCPVI+VEYKKELuAcP-LKpWNEoVQuhRKsDPchAAD.............EAAKLIpALEsARsNGG+APsEchV+ 0 1 1 3 +15358 PF15501 MDM1 Nuclear protein MDM1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8TC05 Family This family of proteins is present in the nucleus [1]. The function of MDM1 is not known. 27.00 27.00 27.30 27.30 23.30 26.60 hmmbuild -o /dev/null HMM SEED 577 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.29 0.70 -13.10 0.70 -5.57 20 126 2012-10-01 08:54:23 2012-10-01 09:54:23 1 4 65 0 65 137 0 377.30 29 69.79 NEW SEYpRsFpW+cshhscphsss.t...........pKssaAGL+S....Dph..GIs+EPshhuKRRVPhpcs.plscphcWptsss.p.ssss..csps..sspsppsppptshsptph..s.-ssphs.+spuhussSt.c...s............stpp.thssstts.spps.h..ssp...spphssths+hLpt+AGls.sh.tpphh+s.SEYQpQFsWKssp.ctuPhhsAp.pshtsps.t.hs..saps.sph.tcoEYpppFps.t.sc..p.cpphtt..............ppshcths.pp..pspKts-.hph.csctts.hstsp.c+p.p.h.hp..ph............t+hsoEY+u+FlSPupYhYccGuWs+s................................+pssssp.......st...lsshWYtEVpELREKActYRpRs.pGTHFSR-HLs.QlLocssslWDsSSsoSS-uslSsslpshcLus.............t.cp.p................................h.st...cpp.shccpspcssTtclshustsssPs+..R+LsWtpscss.pcctpppsptptp.........pppppttttpp.hpp.pc.c.pp..................psptsh.stuossuS.lS.........................uscsGRLPTPcL+...ph.sGspRoHaDhTTPAsG.GAVLlSPoKh+ ...............................................................................SEYppsahW+p....pt..st.............t....su.h..................u.spEP.h.p+++sP.hs............t.p...........st........t..tt...................................................................................t........ptt.....s..........t..t.p......ss...........h..oEYp....pa...................hhsts..................sthptt.........................................t.hs..t.....p.tc...........php........t...p...t.....................tph..p.t.p.....p.t.a.hptttahh........................................................................lhELRc+At.Y+......pRs..Gschs.c.....lhpcppphW-.sSppSS.t.......slS.s.p...hts.........................................................................................................sh.....t...pcp..psst.pht.st..s.Ps+..c+Lh.s.tptt..p...pp.t.................tt...t.......p.t...........................st.p..tts.tss...t.........................sttstRhspPphh...th....pRh+ts.h.ss.t..t.hss........................................................................ 0 20 23 44 +15359 PF15502 MPLKIP M-phase-specific PLK1-interacting protein Eberhardt RY, Coggill P, re3 Jackhmmer:Q8TAP9 Family \N 27.00 27.00 27.00 27.00 26.90 26.40 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.74 0.71 -3.51 24 90 2012-10-01 09:08:51 2012-10-01 10:08:51 1 2 77 0 63 81 0 150.70 23 75.75 NEW P.hs.......t.s....ssph..tt..........hu..tt.......t......s.P...p...........tP..........tt.......................shpptptht.sttp+pstpss...........ps.hG.......pt.t.pt....s.ppYa+PSMLEDPWtpLh.hp.sthspphsssptss ............................................................................................................s.tsssst......................t...t.........p....hG..tst.....s................ts.P...................tttPt.......p.stsPss.........................................................spppthu..hssu...tpppt.pss............sths.ssh.G....phpsRppphsp...........sl-pYa+.P.SMLEDPWtsLpshp.hths.p.ppp....ht............. 0 15 25 44 +15360 PF15503 PPP1R35_C PPP1R35; Protein phosphatase 1 regulatory subunit 35 C-terminus Eberhardt RY, Coggill P, re3 Jackhmmer:Q8TAP8 Family This is the C-terminus of protein phosphatase 1 regulatory subunit 35. This protein interacts with and inhibits the serine/threonine-protein phosphatase PPP1CA [1]. 27.00 27.00 30.40 29.50 26.00 24.90 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.68 0.71 -4.68 21 62 2012-10-01 08:54:13 2012-10-01 10:52:41 1 1 54 0 42 66 0 138.20 29 58.61 NEW PphpoolsLppELp.....httt.........chsut+ulpcpLppo...ps+stlp...spsscslNhspsppla+sLVslsVscppll...pptlpc+........pt.s..pchstspPsltpFhcs..p.hhhpssphtscths..phphpspPs.cshhshac+hp ...........................................................Pthpoolslt.Elp....thttt.........phsu.+slpcpLcpo...ps+sulp.....tplscsLNhstscplF+sLVsLpVs-ppll...sttlp-+.thh..t.ptss..pcsssstPchs.hhcs..pphhhpssphhhpsls..plphp.pPs.tshF.ha+ph...................................................... 0 12 16 25 +15361 PF15504 DUF4647 Domain of unknown function (DUF4647) Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAL5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 282 and 480 amino acids in length. 27.00 27.00 30.50 29.90 20.30 20.30 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.60 0.70 -5.63 13 45 2012-10-01 10:03:11 2012-10-01 11:03:11 1 2 27 0 21 50 0 351.10 52 88.73 NEW MDLPDESQWDETTCsLAlCQHPQCWAolRRIERGHPRILuSsCKTPl-sEDKLPsLTlVNIoDSCFtA+R.lspppLstFTFTKs+SLLSpuSKFcSKFQG.Rs.KuLPDK-LIsposRsPKLSVLNLNETpLPsspDVRNMVVlWIP.......EEsEpp...........sSQsGKK+RKKSss.......Ks+s.lhhsG+Qpscoph..psPuhlVPPPSPV+hhEQLSoEulPhWsQhDhLPQDLLK-LLsscGKohPssEMKhQLAMMKKshPLEKsRPDSAISSKMFLSlHRLTLQRPuLRYPEHLKKLa.NLKoE...uh+KQQthQQpp.......VKTPsKKQEAKKKuKuDsGSQsTS+KpsusssaDPhh..spRTL.sppS-hK.QQpthpptGsshppsSTcRsph-hu-pshs.h.....ppcsPELschEsopKDlssphEslLEu.t.....sscsLSsShuphuWNPELKLLRILQATD-EDEE .......................................................................................McLPDESQWDETTCshAVCQHPQCWAolRRIERGH......P.......RILuSssKT.Ph-sEDKLPsLTlVNIs..DSCFts++......LsthTFoKs+SLLS.tSKh.SKFpG.Rs.KsLsDKslhspspR.PKLsVLNLNETpLPsspDV+NMVVlWIP.......EE.Ehp...........spQptKc+RKp.sscp+..h.....hstpQ.s.t....tsPuhhVPPPoPVph.EQhss-.lPhWsQ.-hLPQDLLc-LLssttpoh.s.EhKh.pLAhMKKshPLE+sRPDSAIS...oKMFLolHRLTLpRPuLRYP-+LKKLa.NLKpE......uhpcppth....Q....p..pp.........sKTs.++QEsK+KuKu-st.Qsspccpssshhhs.hh..t.csL.tppsphc.pp..htp.u.t.ppsppcpsh.ph.ch...........hpssch..hEsspcDlss.h-hh.cs....ptp.shshot.hsphuWNsELKLLRILQsTD-E-EE.................................... 0 3 3 4 +15362 PF15505 DUF4648 Domain of unknown function (DUF4648) Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAG6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 115 and 207 amino acids in length. 29.00 29.00 95.90 95.60 28.60 28.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.27 0.71 -4.05 8 38 2012-10-01 10:06:50 2012-10-01 11:06:50 1 1 27 0 20 35 0 148.90 73 78.47 NEW VVI-SDLY.spRPLELLPHRoDRhp..ssEuc..RhGRLQsupQ.Gs.sAKTsARPVGISEPKsopLCGNRAYGKSLlPPs....uRIoVKAPs..shEssAsGo-psAlLsRGSRHLKKMsEEaPsLPQGAEASLPLTGSuSCGsPSILRKMWh+HKKKSEYVGATNSAFEAD ...VVIESDLY.sppPLELLPHRuDRRD..sGDuR..RFGRLQsARPPssHPAKssARPVGISEPKoSNLCGNRAYGKSLIPPV....sRISVKsss..slEAsAhGoEpGAVLhRGSRHLKKMTEEYPsLPQGAEASLPLTGSASCGVPuILRKMWTRHKKKSEYVGATNSAFEAD... 0 1 1 3 +15363 PF15506 OCC1 OCC1 family Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAD7 Family The human member of this family, overexpressed in colon carcinoma 1 protein (Swiss:Q8TAD7) has been shown to be overexpressed in several colon carcinomas [1]. 27.00 27.00 44.80 44.60 25.30 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.98 0.72 -4.09 3 14 2012-10-01 10:37:31 2012-10-01 11:37:31 1 1 11 0 8 20 0 57.00 77 93.44 NEW MGCGNSTAuSAGsGpGPAGoAKDVsEESVoDDDKRRNYGGVYVGLPSDAsNMVSGQTKos+K MGCGNSTATSAGAGpGPAGAAKDVTEESlTEDDKRRNYGGVYVGLPSEAVNMVSSQTKTVpK... 0 2 3 4 +15364 PF15507 DUF4649 Domain of unknown function (DUF4649) Coggill P pcc Pfam-B_83 (release 26.0) Family This family of Firmicute sequences has members that are annotated as ribose-phosphate pyrophosphokinase; however there is no evidence for this attribution. Member proteins are all shorter than 100 residues in length. 27.00 27.00 27.00 28.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.34 0.72 -4.21 14 556 2012-10-01 12:54:17 2012-10-01 13:54:17 1 1 311 0 28 149 0 67.10 50 97.61 NEW IElTYLDAhKpERploFEsYpEF.pu.puC.lslsD.asVpKlsYpG+cLsYpGsYGDlahahhc.DLopYc ..............ElTYhDu.+sERTlpaEsaE-h.ho.puC.lslsDhh.VppLTa+G+plsYpGhhGDlahaL.+tDh.......... 0 4 6 14 +15366 PF15508 NAAA-beta beta subunit of N-acylethanolamine-hydrolyzing acid amidase De Vivo M, Coggill P pcc Jackhmmer:Q13510 Family NAAA-beta is a family of vertebral sequences that form the beta subunit of vertebral N-acylethanolamine-hydrolyzing acid amidase, a member of the choloylglycine hydrolase acid ceramidase family. The alpha subunit is represented by family CBAH, Pfam:PF02275. 27.00 27.00 27.00 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.00 0.72 -3.64 85 283 2012-10-01 15:26:22 2012-10-01 16:26:22 1 8 132 0 173 266 14 88.30 28 24.28 NEW sslP.ha.slsLDhPPpcRWpplhpsap..splpt.lhp.hlcpllss..hhst...hlhphlsphh.sthh..phl..sps....ascEl+GIAcss...ulsLG-llhhNlhYEl ........................s..sPhaslsLDhsP.cRWtplhppht..stlp...lhp.s.hpphlss.....hhPp...hl.hphlsphl.stlh.....pth...P..pP....ap...-El+GIAsss...slsLu-llhhNlhYEh....... 0 62 81 126 +15367 PF15509 DUF4650 Domain of unknown function (DUF4650) Coggill P pcc Pfam-B_31507 (release 26.0) Family This family of vertebrate proteins lies to the C-terminus of Ubiquitin-specific peptidase-like protein family peptidase_C98, Pfam:PF15499. It might be acting as the exosite for the peptidase. 27.00 27.00 27.20 28.70 22.60 23.10 hmmbuild -o /dev/null HMM SEED 520 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.66 0.70 -5.58 16 46 2012-10-02 09:43:10 2012-10-02 10:43:10 1 3 31 0 24 45 0 485.10 45 49.01 NEW plLSGscsLVDssllsLTLpEh........QVsSEuh..LEspPVt......EssslscssoLQsQ-...ShhuS.louPscEKlspsphlshSF.oQslssshpssQ.Nstsossstslss.spsss.LhQtlKslch............E......+ssphcphLsscsE...KLpscpsspSQsS.NLKc+pssA.pSpsssupSsQNp...shK-sQKKsFVGSWVKGLLS+GsSFMPsCVSApsRst.........lTDLpsoVKG..AsNFsGFKTKGl.pQ+up+sS+Ksp+ssspssssupssPt..tsssoustsp.ssssAsstlh++p-ssStssphs+sopssEsslSSus+s-usEuQlHKLRLKLLKKLKAKKKKLAuLhSSsps..............GphsS-shEplSps.......GSPNDCESl-DLLpELQaQIDlADsKSusTTssssS.hSuQoHEEILAELLSP.oTlsSoEhScssEsDhRYLEM..GDsphsuPs.PoEhsslspsshLpQDHNYCSPsK+sQsEVQsDSLsNsuCl+TLNLESPhKTDIFDEFFSTSALNSlANDsLDLPHFDEYLFE .............................................lLSGscsLVDssIlsLTLEEs........plsSEuh..LEs+PVu......ENssllcssohpsQE...ShhuS.lSuPsc-KlhpsthlDhSFsSpslssshp.sQlNstsossspslss.spsos..LlQslKslch............E......+DsphcphLosKsE...pLcPcp.lsSQsS.NL++ppssA.pSQossucShQNp....shKEsQKKsFVGSWVKGLLS+GuSFMPsCVSApsRss............lTDLQsSVKG..AsNFGGFKTKGl.pQ+up+sS+Ksp+ssscPsshupsPst..usssssuhs..spspAsstshcKstssS..t.............suphsHsopsp.sulSSus+t-.sEuQlHKLRLKLLKKLKAKKKKLAuLhSSspp..............tsssS-slEplsps.......GSsNDCESl-DLLpELQYpIDhADscSusTosssso.hSoQoHEEILAELLSP..oTssSoEhScssEsDh+YLEM..GDsphssPs.PoEhsslspsspLpQDHNYCSPsK+s.pEVpsDSlhssuCl+TLNLESPhKTDIFD-FFSoSALNolAsDs.DLPHFDEhLFE............................ 0 1 2 9 +15368 PF15510 CENP-W Centromere kinetochore component W Coggill P pcc Pfam-B_49340 (release 26.0) Family CENP-W is a family of vertebral kinetochore proteins that associates directly with CENP-T. CENP-W members are histone-fold proteins. The histone fold region is critical for binding to centromeric DNA. Importantly, the CENP-T-W complex does not directly associate with CENP-A, but with histone H3 in the centromere region. CENP-T and -W form a hetero-tetramer with CENP-S and -X and bind to a ~100 bp region of nucleosome-free DNA forming a nucleosome-like structure. The DNA-CENP-T-W-S-X complex is likely to be associated with histone H3-containing nucleosomes rather than with CENP-nucleosomes. 25.00 25.00 38.10 38.10 24.80 24.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.42 0.72 -4.02 6 38 2012-10-10 12:36:46 2012-10-02 12:47:25 1 1 26 0 16 43 0 87.60 64 99.88 NEW MAlSTTVSQRK.IKRKAPRGFLKRVFKR+KPHLRLE+ssDLL...............VHLNCLLFV+RLAEESRTNACEsKCGVIKKDHVLAAAKVILKKSRG ..........................MA.oshVsp+K.IKRKAPRGFLKRlhK+pKP+LRLppsuDLL.......................VHLNCLLFlHRLAEEoRTNAsEsKCtlIKK-HVlAAAKVILKKSRG.. 0 1 1 2 +15369 PF15511 CENP-T Centromere kinetochore component CENP-T Coggill P pcc Pfam-B_9162 (release 26.0) Family CENP-T is a family of vertebral kinetochore proteins that associates directly with CENP-W. The N-terminus of CENP-T proteins interacts directly with the Ndc80 complex in the outer kinetochore. Importantly, the CENP-T-W complex does not directly associate with CENP-A, but with histone H3 in the centromere region. CENP-T and -W form a hetero-tetramer with CENP-S and -X and bind to a ~100 bp region of nucleosome-free DNA forming a nucleosome-like structure. The DNA-CENP-T-W-S-X complex is likely to be associated with histone H3-containing nucleosomes rather than with CENP-nucleosomes. 30.00 30.00 30.80 30.70 29.90 29.50 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.63 0.70 -5.23 40 140 2012-10-10 12:36:46 2012-10-02 17:33:53 1 3 109 5 91 148 1 349.70 26 78.83 NEW ssTP+uhtu...hphh.t+Rsuh.TPt..psRR+Sspt.............................p+-TPcslL+sLu+sLA.sSpslssSssps......................................ss..ppthp.scp-cs-sp.plptPRLSlPlst..............tcccp.............................-ssc.tPP+hShL.sts............Thp...SlEhsRRsho....p.tshhpchs......ussphSc.....h.s.spl.....ttspsh....sss.....hpssssppshsps.tstsphsppsss....................................................sh..tht........--.tphshths.ssssusstsphtpsps.....s-hptt.sstssstsp.s.t.s..tssssssstpsssptsss..........................stt..ph.hpssssssssspsstsh...sh.......................ppp+lS++.GlshPshPsssVK+lAppau+h.uthuKs+ls+-uLpAlppuo-hFFEQlu-DLpAYAcHA.GRKTI-EoD .................................................................................................................................................................................................................................................TPht.tu......pRshh.Ts.......psRRhS.p............................pcpTshslL+plhhshs.po..h.pSs.......................................................t.cp.ptp....hPcLp.shs.................tcp.........................................ps....sschS.h.p...s...............ohp..p.thsR...Rs.s......tshhpchp.........ss...t.........ss.s.h......ttppsh...shh.....h.p.p..ps.sp...ptst.st..tsts..............................................................................................................................c-...hsh..th.ssts...s....tptt......tp.t.t..s..shts..p........t..ststtsstpsp..t.......................................................................................psssssttt..h.tst................................................htp+hs+......t...thshPphstssh.Kphhppas+..shhuKh.ls+cslthlpps.-haFcpls-DLpsaApHA.GRKTlc.pD.................... 0 15 33 59 +15370 PF15512 CAF-1_p60_C Chromatin assembly factor complex 1 subunit p60, C-terminal Coggill P pcc Pfam-B_74766 (release 26.0) Domain CAF-1_p60_C is a family of vertebral proteins that is involved in chromatin assembly. CAF-1_p60 is one of the three subunits of the CAF-1 complex, and this domain binds to the C-terminal region of CAF-1_p150, family Pfam:PF12253. The N-terminal part of the CAF-1_p60 proteins is a WD-repeat structure, Pfam:PF00400. 27.00 27.00 29.40 28.50 19.10 20.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.32 0.71 -3.84 16 48 2012-10-03 13:48:41 2012-10-03 14:48:41 1 6 35 0 27 42 0 156.60 48 29.99 NEW GIPLKEKPVLuV.RTP-TA.KKsK.uQsppsSSPGPR.sEGTPoSRspDPSSPssTPspu+.uPAssu.KDsPuossus+ushsssSEEK.s.QP.ouQNsKup.PRRVTLNTLQAWSK.TTP..RRINLhPLKTDoPssusPssls.SoPSoEclQ......s...EsPuDPpssPPE.KRPRLsEpptuspu.- ........GlPLKEKPVLsh.RT.Ps.TA.KKsK..uQopps.SSP.GPR.sEGTPsu.RspDPSSPssTPspup.uPAsss.h+DsPshssus+us.ss..sSEEK.s.QP.uuQNsKut.sRRVTLNTLQAWSK.TTP..RR.IsL....hPLKT-oPssusssss..osPusEphp......spsPsDs.spPPE.KRPRhpEp.tsspu............................ 0 1 4 11 +15372 PF15513 DUF4651 Domain of unknown function (DUF4651) Godzik A adam JCSG target SP18156A Family family of short, secreted proteins specific to the Streptococcus genus, with distant homologs, not recognized by this HMM, found in other cocci. In all sequenced genomes, proteins from this family appear in a conserved genomic context with an thioredoxin, tRNA synthase and tRNA binding protein, but the functional implication of this is unclear 22.00 22.00 22.10 61.00 21.10 18.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.81 0.72 -4.52 20 335 2012-10-09 15:12:14 2012-10-04 05:16:11 1 1 332 0 21 126 0 61.70 63 63.01 NEW -+c+EclhpplRpaFSshGpIuVLYlssaESspcplsGGlVhEDGRpapFsYcpGplsYEEE ..M.KTQEELTpIVRDaFSDMGEIATLYVQVYESSLEoLVGGVIFEDGRHYTFVYENEDLVYEEE. 0 1 5 12 +15373 PF15514 ThaI Restriction endonuclease ThaI Eberhardt RY re3 PDB:3ndh Domain This family of restriction endonucleases belongs to the PD-(D/E)XK superfamily. It cuts the recognition site CG^CG leaving blunt ends [1]. 25.00 25.00 42.40 42.40 21.30 19.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.37 0.71 -5.03 2 3 2012-10-11 20:44:47 2012-10-04 10:36:20 1 1 3 2 2 4 1 154.70 49 96.47 NEW LFpDc.IIsKlpp+LPYhFQLAELESSRAGKlGMpVGShREcIlSuLLIYKFGEpNVETslPITEPElDVKLFGs.ISIKTIoG+EPsGVKLIWTVDupKA+.FLETW+P+aDhILsHINWuS.GulYYIPs.VQpplFDplG+-KYIKLPKQGTNPRGVElSN-Ah+.lhss-cTMSIpIEWKKT.lpYssFKRWV-LWu- ...LFpDc.IIsKlpp+LPYhFQLAELESSRAGKlGMpVGShREcIlSuLLIYKFGEpNVETslPITEPElDVKLFGs.ISIKTIoG+EPsGVKLIWTVDupKA+.FLETW+P+aDhILsHINWuS.GulYYIPs.VQpplFDplG+-KYIKLPKQGTNPRGVEISN-AL+Ellsc--TMSIsIEWKKT.NlcYSPFKRWVELWu-. 0 1 2 2 +15374 PF15515 MvaI_BcnI MvaI/BcnI restriction endonuclease family Eberhardt RY re3 PDB:2odh Domain This family of proteins includes the restriction endonucleases MvaI and BcnI. These enzymes both function as monomers. MvaI cleaves the sequence CC/WGG, where W is an A or a T nucleotide, leaving sticky ends. BcnI cleaves the sequence CC/SGG, where S is G or C, leaving sticky ends [1-2]. 25.00 25.00 25.90 25.00 24.10 23.10 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.57 0.70 -4.33 21 77 2012-10-11 20:44:47 2012-10-04 10:53:55 1 2 72 13 23 80 38 220.60 24 68.37 NEW llschcplpptGalp.....pRsusTGlGcTLEcLLGIpcNN.ctPDat.slELKopR.....p..ssohlTLFopsP....ss.h+usppl.hp+YGY.pcttp........tpLasTlpssphNsh...u...........hh.Lc.......l.c..pspp..l....p.lhhp.c.............phhs.hhshp......tpKppchhaV.Acoc.......hpsspEpF+atcu..hhhss..h..s..p..hhpLlEpGhItlDlcIsph.s..t.s..t.s+D+GsuFRIppcch.shLF ..........................hltpl+pltphsal...........stttss.s.u.l..........GpTLEshLGIttNs.ptPDht.s.hElKut+.....t..ssohhTLFs.pP............ss...pps...s.tth.hppaGa.pppt..........tplapoh..ps..sp.....ss.s...........hhlp.......................hs...ppcp..l....hthp.s..............................th.sh.ph.....................tpKhtpshaV.Acsc.............hpsspchFp.as.....ch...hhh..ps.....h....s..p..FlphlcpGhlhhDhchppt.t..h..t.h+c+GstFRIp.ppsl.thLa......................................................................................................................................................................... 0 5 13 19 +15375 PF15516 BpuSI_N BpuSI N-terminal domain Eberhardt RY re3 PDB:3s1s Domain This is the N-terminal (nuclease) domain of the BpuSI restriction endonuclease [1]. 27.00 27.00 57.30 247.20 20.40 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.78 0.71 -4.54 4 4 2012-10-11 20:44:47 2012-10-04 14:08:44 1 2 4 0 1 4 0 159.80 43 18.68 NEW .sYsDDEVusFHPICcoALNpALpphGLDspYcVlH..HpslGol.sDFVlhpKsTcKalLhlEVKRp.utVpSTRY+hQApSYl.EAsp.plEpP....YYslTNLEVhshFKaDusRPsVspQllpPSPhcsGsFscshSEhaspLVcsFpslIshsVsDoGoa ..sYsDDEVusFHPICcoALNpALpphGLDspYcVlH..HpslGol.sDFVlhpKsTcKalLhlEVKRp.utVpSTRY+hQApSYl.EAsp.plEpP....YYslTNLEVhshFKaDusRPsVspQllpPSPhcsGsFscshSEhaspLVcsFpslIshsVsDoGoa. 0 1 1 1 +15376 PF15517 TBPIP_N TBP-interacting protein N-terminus Eberhardt RY re3 PDB:2czr Domain This is the N-terminal restriction endonuclease-like domain found in several archaeal TATA-binding protein (TBP)-interacting proteins [1]. 27.00 27.00 146.40 146.10 21.50 20.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.44 0.72 -4.35 9 13 2012-10-11 20:44:47 2012-10-04 14:39:59 1 1 13 1 11 16 0 99.50 55 43.95 NEW YpELss+lKKVYu+VRhLDDYHWcIps-pIhGIHKKSsl+lcIclAcu+EcA-KLu.c.K.cssGIcIlVlPsKGTFYl+NGsFIhoh+aL+uTLtDIsDHI YpELss+lKKVYu+VRhLDDYHWpIp--pIlGIHKKSslRlcIclA-u+EcA-KLAc.c.cssGIcIlVlPsKGTFYl+NGAFIhohKaLRuTLtDIsDHI 0 1 1 6 +15377 PF15518 L_protein_N L protein N-terminus Eberhardt RY re3 PDB:2xi5 Domain This endonuclease domain is found at the N-terminus of many bunyavirus L proteins [1]. 25.00 25.00 25.30 25.20 24.20 24.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.04 0.71 -4.90 11 334 2012-10-11 20:44:47 2012-10-04 15:37:54 1 2 46 8 0 196 0 165.00 60 28.09 NEW MDstchsQahsRIpss+DsplAKDIssDLLhsRHNYFGRELCpulsIEYRNDVPhhDIlL-hlPuhsshshcIPNlTPDNYllhsG+lhIIDYKVSVusESophThEKYsclhtclhspLslsaElsIIRhsPVspplHlsS-sFhptaPsIslslDFohFF-L+shLYcKFtDDEEFhhhVu .................................Mp...ht.ah.RI.tsppsp.AKDI.sDLLhsRHsYFG+ELC+SLNI..EYRNDVPhlDIlLDlhP.............slD..P..hsl-sPaITPDNYLalNsh......LYIIDYKVSVSNESShITh-KYhElhcDIts.LulshEIVIIRIDPlScsLHIsS-cFKchaPslslDIsFspFF-L+phLa-KFtDDEcFL.hls......................................... 0 0 0 0 +15378 PF15519 RBM39linker linker between RRM2 and RRM3 domains in RBM39 protein Godzik A adam Jackhmmer:Q14498, residues 339-411 Domain A conserved linker between the second and the third RRM domain in human RBM39 (CAPER) protein, also present in other RNA binding proteins, especially those involved in RNA splicing. This linker was implicated in interactions with ESR1 and ESR2. Preliminary results from JCSG suggest that this is a structured domain with a well defined fold. 22.60 22.60 22.70 22.60 21.70 22.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.67 0.72 -3.69 26 417 2012-10-05 14:20:47 2012-10-04 21:53:55 1 17 220 3 225 369 2 75.20 32 17.17 NEW LD..-sDsuGlshNshSRsALMpKLAR..o-p...ssss...sst.thht.t.th.......sh..s.........hASRCVlL+NMFDPuEEsspsW .......................................-.-hsGlshss.t.uRhtL.Mt+LAc.......sss....hphss...............ssp..psh.th...sssh.................................s..s................................lAopChhLpNMFsPp...pEppss.............................. 0 66 110 164 +15379 PF15520 Toxin_40 Putative toxin 40 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the type 2 secretion system [1]. 25.00 25.00 32.00 303.10 22.50 19.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.09 0.71 -4.83 2 4 2012-10-05 01:29:56 2012-10-05 02:29:56 1 2 4 0 0 3 0 177.20 72 11.02 NEW RsPhoPhs.Thst.L.sA.AlAhcpA.Gphs.hphlG...GphAhDssspsshAhhs.GhthhputphlsNslPpphARVIPsth.hs.LGhPspSDVFlTAAcDItGLsshQIup+LTIPtSso.FplIcF.TP.sGlASPI.RssPGFlGtGRTLGGAREFsIPNt.IP.sAhhpIl RSPMSPLGSTFNQ.LDVAQAIAYDKANGTMTSAEVIGNQWGDIAFDAVTTAVGAGVGRGTGLYKGQTLVTNSIPQKVARVIPDGIKTSMLGAPNQSDVFVTAAKDIKGLNAMQIANKLTIPQSSSGFKVIEFRTPMNGLASPINRTNPGFVGKGRTLGGAREFTIPNQQIPKDAIIKIV 0 0 0 0 +15380 PF15521 Toxin_41 Putative toxin 41 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin contains two structural domains, an N-terminal alpha/beta domain and a C-terminal all-beta domain. The domain contains conserved GxR, RxxxoH GxE and GxxH motifs and a conserved histidine residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 48.30 46.40 16.60 15.80 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.97 0.70 -5.24 12 19 2012-10-05 01:45:39 2012-10-05 02:45:39 1 2 16 0 7 21 0 254.30 26 28.37 NEW phhtpppcutplLppVhhlLptG.lphhssptp.............tschphhstsVApsLuHGGRVsIplPshsstpt.........................................shhlsphL.Glspssh................p.sstVhpRhhuTHchplspN................tpFKEptu.....................................hsslpstl...................tpschaGhNlulGGlGppD......................asGDllLPD.GuHGHhalsa+sPosc+sGuL.lGlETsuPuthsshGahHsh+uoEt..ossPhusht.hKsDKlGsh.........thstRhVDLsphst......tDWpthlcchtppatstlt...ttpstppptthhpEhl .............................................t.h.tpt.pthtlLpplhhlhp.G.h.hhptpt................hsphp.hp.slsphLuHGuRV.IplPshsstpt............................................l..hL.Ghp.tt...................tttV.pRhhuTHphphtps.................phcEptu.....................................hhsltstl....................t.chaGhNluhGGhGppc......................hsGcllhss.GtHGHhalhapsPssc+.GulhlGlETotP.......uthsshGhhHshpuopt..phsPtush...Kss+lGs..........phsshhVDLstht........chhthhcph.ptatttht....tp..ppth.....-............... 0 3 5 7 +15381 PF15522 Toxin_42 Putative toxin 42 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 163.70 163.70 303.20 302.10 25.20 16.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.37 0.71 -5.13 2 2 2012-10-05 01:46:05 2012-10-05 02:46:05 1 1 2 0 2 2 0 201.00 29 8.90 NEW PtlsAchpsL..AK.RltllGhtssssIa-pcch+hps..AF.++hR-hl-.........AlsusplssWppYspTs..Ss.......VGhHGQl.Gh.hG.hs.c.ssGpthG.......tsRtSHHhsQYLLlpYLpNh+us.p.hsH....+.hh.PshssoGspspuhou.sG+phD..Rl-.Gss.sDRGtshPAI.lAt+THQhGplHIsAt PtlsAchpsL..AK.RltllGhtssssIa-pcch+hps..AF.++hR-hl-.........AlsusplssWppYspTs..Ss.......VGhHGQl.Gh.hG.hs.c.ssGpthG.......tsRtSHHhsQYLLlpYLpNh+us.p.hsH....+.hh.PshssoGspspuhou.sG+phD..Rl-.Gss.sDRGtshPAI.lAt+THQhGplHIsAt 0 1 2 2 +15382 PF15523 Toxin_44 Putative toxin 44 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha helical fold and conserved [DNE]xxH motif and arginine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 26.90 26.90 29.20 28.00 25.70 24.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.06 0.72 -4.10 26 28 2012-10-05 02:16:07 2012-10-05 03:16:07 1 16 27 0 6 33 0 83.80 26 8.40 NEW tssGsC..sstppscLpc-hs+h...Cp.pst..sCtss..sptplht+hcthppCtsuRpplspKCFsG.GDpGHppphspAapshspCpphl .....t..sGsC..sstppscLps-hsch...Cp.pst..sCtss..sptplht+hcthppCtsuRpplspKCFuG.GDpuHppphspAapshssCpph.. 0 2 4 5 +15383 PF15524 Toxin_45 Putative toxin 45 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly all-beta fold and a conserved ExD motif and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 7 or TcdB/TcaC-type secretion system [1]. 25.00 25.00 27.00 57.20 23.00 16.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.28 0.72 -3.85 4 13 2012-10-05 02:17:50 2012-10-05 03:17:50 1 4 13 0 0 17 0 79.20 69 8.50 NEW WolTuRIphAcLPspG+IRYIPP+sYpsStsLP+GPsNGYlDKFGNEWTKGP...SRT+GQpFEWDVQLScpG+cQhGhho+DGp.HLNlSlDGpITH .WSITARIQYAKLPRQGRIRYIPPKNYSPSAPLPKGPNNGYlDKFGN.WTKGP...s.p+s.pFEWDVQLScpGhcphGhho+sGp.alNlu.DGpIoH....... 0 0 0 0 +15384 PF15525 DUF4652 Domain of unknown function (DUF4652) Godzik A adam JCSG target SP18005A Family This family of uncharacterised proteins from Clostridia and Bacilli classes has an unusual structure of three beta propeller repeats that do not form a barrel, as in well known 6-, 7- etc beta propeller barrels, but instead are stacked in a three-layer beta-sheet sandwich. The function of all the proteins from this family is unknown. 22.60 22.60 23.70 22.60 21.70 20.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.29 0.71 -4.88 7 33 2012-10-05 17:30:43 2012-10-05 06:04:42 1 1 33 0 5 28 0 166.80 35 75.33 NEW NTspppspccs.ssstcpssscKpspcst...KspssssKphothpFlKKc..lscsocspFsTpWKsSpsspaSACIEGKGs-A.EEGlGKIaIKs.pspphathpIppsp.K.oPKYIEWhDDcNLhVlIupuaGTVSpGGsLYhlNlpTGpso.lhpspD+KpQllSApKpGDp.....lsLpl.VY-DDsh.cSHhEshTIssh ............................................................................................................EuIGclYlKppsssch.hhLpIsp............pp...........phoPK.alhWlDDcNLhVIIGpuaGTVSKGGNLYplNlpssphs.lYcsp-p.KppVlSscpst-p.....L.LplslY-sDsh.csH.EphsIt..t..................................... 0 2 5 5 +15385 PF15526 Toxin_46 Putative toxin 46 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family An RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, with two conserved lysine residues and and [DS]xDxxxH, RxG[ST] and RxxD motifs. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 4, type 5 or type 7 secretion system [1]. This is also referred to as the E. cloacae CdiAC and has been shown to target tRNAs. 25.00 25.00 32.80 32.50 20.50 19.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.57 0.72 -3.82 12 100 2012-10-05 10:45:44 2012-10-05 11:45:44 1 7 98 0 10 56 0 71.80 61 27.41 NEW AppAAKcLGY.pcsp......hSHGphlFhppKs.....shYIo.DhDu......Hs.GGsWKtAso.+sL........spKpsR.GTYDtsLpRIGD ..........A.htAAKKLGY.pthKc.....sppGttlFKKspp.......YIShDhDu......Hp.GGAWKtASS.KNL........spKcTRsGTYDtNLp.RIGD.... 0 2 3 6 +15386 PF15527 Toxin_47 Putative toxin 47 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly beta fold and two conserved histidines, two aspartates and a glutamate residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 5 secretion system [1]. 25.00 25.00 218.70 218.70 24.20 19.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.66 0.71 -4.36 4 4 2012-10-05 10:46:14 2012-10-05 11:46:14 1 4 3 0 3 4 0 128.80 57 4.74 NEW ltPDYATlssGlLSuuuSulVNLYDGTpYlAGGVuQosPSuVSapPGlouTlGWIaGApsApusNSFLNGDGNQAFVSIPTPashNVlGAVTHAYGGuTAIElGlGpPGsloaGlsPWSHosPVsssuK ltPDYATlssGlLSuuuSulVNLYDGTpYlAGGVuQosPSuVSapPGlouTlGWIaGApsApusNSFLNGDGNQAFVSIPTPashNVlGAVTHAYGGuTAIElGlGpPGsloaGlsPWSHosPVsssuK. 1 0 0 3 +15387 PF15528 Toxin_48 Putative toxin 48 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved ND and DxxR motifs and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or TcdB/TcaC secretion system [1]. 26.20 26.20 26.80 47.80 23.00 25.50 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.41 0.71 -4.41 9 16 2012-10-05 10:46:39 2012-10-05 11:46:39 1 5 13 0 6 13 1 172.10 31 18.04 NEW lshGhNhshsaps.G......sFphusGlGlts....sh.GaGcsu..httuathua.....t.sphuYs.sshh.ph.stQpoGslshth.scaplhhEND........hh.Gs.s+D+aRTuAlplsls.....phohGhtlaTGcstcps.hcs.s.shst...................shhshppp.hGshppG...ppY.phushalGhps.....hRlGhsu-p.lRphhQN.lhHc ........................................................shGhshthsaps.G......sathssGhslhs.....h.G.stps..hhhuhhhsa.....t.hphuah.th.h.ph.ttQpoGhltht..scaphhhEND........hh.Gs.spD+aRTuAlplsht.....phshGh.laTGct.p..ps..ct.p...st...................GhhstphphGhshphG.........cpY.RhushalGhss...........aRIGhDu-p.VRchlQNhhhHp. 0 3 6 6 +15388 PF15529 Toxin_49 Putative toxin 49 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved ND and DxxR motifs and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or TcdB/TcaC secretion system. Interestingly, the toxin is also found in type-II toxin-antitoxin systems [1]. 38.30 38.30 51.40 51.40 25.30 24.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.16 0.72 -4.05 15 25 2012-10-05 10:47:04 2012-10-05 11:47:04 1 8 24 0 6 28 1 88.70 40 7.83 NEW tpuRstssushPputGpsc.sl.hc.cs..G..plspYsTYsscGh......l.KcaRss..G+sHGslssPpVhE.tppNpsPcushhh..sptc..sRtspPcElP ....h.puRs+ssGP.PcApGssH.Th.a+t+s..G..+lTpYuTYspsGs.....hhKQaRGp..GKPHGsVPRPNVKp.sphNpsPtsppth..PKp+..VRpPh.cEhP... 0 1 5 6 +15389 PF15530 Toxin_50 Putative toxin 50 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly all-beta fold and conserved FGPY motif and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 5 secretion system [1]. 25.00 25.00 143.30 142.70 22.30 20.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.81 0.71 -4.34 5 24 2012-10-05 10:48:05 2012-10-05 11:48:05 1 9 20 0 2 21 0 155.40 71 9.16 NEW sPPRVQQDsSLAhDhuQcGhsptEIscAlDtSHlGPSWGTEYKV+ssVKupVuuGhusGYpl-GoIDsc+lSVNsG-ThulGA+AGASIGLoFGPYFPGllsss-RDYShulGhGVhSVulSsGKDGlGFSFGVGPSWGaSuopo.stscpVDlNGooToElY+YDFK SWNQYAQDNNLTPEQVQEGMNRIAIGE.......GPSWGTTYKVHPVVQAGGDVSFIRGYTLSGTIDDNHISVNQGDIYSIGAHGGASIGLSFGPYFPGLINSNDNDYSINGGFGVGAVGLSTGKDGVSFTFGFGPSWGWSATEI....KGVDVNGTSTSEVYRYDFK..... 0 0 0 2 +15390 PF15531 Toxin_51 Putative toxin 51 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and conserved aspartate and glutamate residues, and an RxW motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system [1]. 25.00 25.00 27.70 27.70 22.60 21.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.63 0.71 -4.34 6 8 2012-10-05 10:48:58 2012-10-05 11:48:58 1 5 6 0 5 10 0 125.20 25 7.22 NEW hGlNspu-sL...sus.Gu..cTasspsYuts.stuhsls-...tPlWpssVcsAltNPsV+IuloLDGh.su..........pshsEAhpsshp+Gcuht.scWc...ttGhGTAWEMscltpAlRhts..............RsW-SIcWYhssccs ...............hGhNshs-tL...sus.Gt..coasshsYups.shshsssp..stshWhssVcsAltNPsV+lpVsLDGhssu..........psh.-AFhsshppGtsh..tsa....ttGtGTsWEMuplupAlRht-..............RsW-SIcaYhsscp...... 0 2 2 5 +15391 PF15532 Toxin_53 Putative toxin 53 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and two conserved histidines present in an RxH and THIP motif. The domain additionally has a highly conserved arginine residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6 or type 7 secretion system [1]. 25.00 25.00 49.00 46.50 24.30 23.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.35 0.72 -3.81 11 20 2012-10-05 10:50:16 2012-10-05 11:50:16 1 6 20 0 5 21 1 103.50 39 15.15 NEW scPhshlscssppps..h......psss..pshchKWt......ss..shsaclRlHssDsssstGsNuusG.IaRlp.............hGhcY.hDssGpaa+putlp......hp...sPpasppAuNcTHIPl. ..................................u.sPtchls.sh+ppG..Lp....scshP...thKtKas......su..shpYcVRsHsssPoAPsGSNuusGsIYRIut........tpQGhGhEY.hssDGsWaHpSsL+......spSPsYNssAANDTHIPls. 0 5 5 5 +15392 PF15533 Toxin_54 Putative toxin 54 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and [DN]xHxxK and DxxxD motifs. It is usually exported by the Type 2 secretory system [1]. 26.30 26.30 26.30 30.30 26.10 23.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.20 0.72 -3.72 8 17 2012-10-05 10:51:18 2012-10-05 11:51:18 1 2 17 0 3 16 0 62.90 47 25.94 NEW NstQDKhLohuDIc+LKcuGhDhH-LK....Gt+.NuS+hDLYKD+c.GNIYlKPKGGpGtG-PTGlNIN ...hptsK.lshs-.chLKcsGlDhH-LK....Gtp.ssSKhDlYhDcp.Gplalh.KGGpGts.sTs..Ip.. 0 2 2 3 +15393 PF15534 Toxin_56 Putative toxin 56 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains a conserved histidine residue and a KH motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2 secretion system [1]. 25.00 25.00 29.90 29.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.55 0.72 -3.49 14 39 2012-10-05 10:52:08 2012-10-05 11:52:08 1 4 37 0 3 32 0 76.20 35 26.79 NEW hssNKh.pHIhssKHsWsplhct...sapplpslhppsh+pGppsshppushpc.....shphsGpslsVThth.hcGphcI.SsuWVp ...psNKh.pHIht....sKHsWsplhKt...sappVpslhp+sM+pGpposYppS...uhp+.....shphsscsVslThsh.pcGpl+I.SsuWVp.... 0 1 2 3 +15394 PF15535 Toxin_57 Putative toxin 57 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and a conserved glutamate residue, and [KR] and Hx[DH] motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system [1]. 25.00 25.00 32.00 32.00 24.70 23.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.81 0.72 -3.93 15 22 2012-10-05 10:52:50 2012-10-05 11:52:50 1 5 21 0 8 21 1 79.00 29 16.33 NEW ucssus.sspEhht....hssccGhp.ass.sPs...tsG+.uuWhcscsGcsahPssptssst....ptGPHWDVpcPpGt.....apsshPpsh .................ucs.ss.sspEhh.....hssccG.p....ats.sPs...hsG+.suWhssc.Gpsah.shttsstt....ptGPHWDVpcsput.....acsshPtt..... 0 3 7 7 +15395 PF15536 Toxin_58 Putative toxin 58 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved aspartate, arginine, histidine and cysteine residues that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 32.60 160.10 24.70 18.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.72 0.71 -4.30 3 5 2012-10-05 10:53:14 2012-10-05 11:53:14 1 2 2 0 4 5 0 128.40 56 13.54 NEW RAGIuVPsAthRYERQEPsAcLQtRARIlASPDVRVAV.PGTRITYsLs+su-.LHApuu.YpYQWYhLNDP+TopTaG+P..ARV-Gu-GP+u-FRAGFVGNHKIICK.Vh..AuGDuQAPVFYEaPQsVVSEGK RAGIAVPDAALRYERQDPAAELQGRARIVASPDVRVAV.PGTcIoYALAHGoQuLHASSSPYRYQWYhLRDP+ToRsHGEP..ARIDGPDAP+A-FRAcFVGNHKVICK.VTPRAGGcAGVPAFYEFPQTVVPEGK. 0 4 4 4 +15396 PF15537 Toxin_59 Putative toxin 59 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold with two conserved histidine residues. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2 or TcdB/TcaC-type secretion system [1]. A of this, the Pseudomonas RhsT-C has been experimentally characterized. 25.00 25.00 45.80 72.00 20.00 17.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.61 0.71 -4.04 4 7 2012-10-05 10:54:46 2012-10-05 11:54:46 1 6 7 0 1 9 0 117.90 37 7.14 NEW GohhutsshpcGulssppush+.GSGPssGhlEVS-uhpSoKul+Nats...uGs.-FVFDPppppFshGsst......hpsHptlA+uhG...As-usllGGRh.R.psGplhTsEaSGHYGcpW..osEhRpQ .......t.ht.hhhhpGuls..sp.shhp.GSGPssGhltVS-pspSstAl+NatP...pGs.-FVFDPppspFssGssp......htsHptLAculG...AscusllGGRhhRsssGpl.TsEaSGHYGppW..ssphRp............ 0 1 1 1 +15397 PF15538 Toxin_61 Putative toxin 61 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin domain found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold with a conserved glutamine residue and a [KR]STxxPxxDxx[ST] motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system [1]. 25.00 25.00 27.70 27.10 22.80 19.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.07 0.71 -4.23 8 33 2012-10-05 10:57:02 2012-10-05 11:57:02 1 3 32 0 5 32 0 150.90 47 35.76 NEW RRsYLNcKFGRTGDLs+DINIRGN+EsAssFaKSpGhspsc.hEsYMsGIDFocPVpVETIN+GKpLaQaQsPGuhQ.GsWYSLossVtPTcLGINPpGplauT-hlVPKVhssYQSpcKVplLRSTAAPsLDTWSV.cpPYpAcGGApQhhSspK-lF ..................................RRtYLNpKFGRoGsls+DIshRuN+EhAscFF+Sc..sls.tc.hcsYMpGlDFs+PVpVETlssGKpLWQaQsPG..u..hQ.GNWYoLoPsVpPT-LGINPhGphhtssh....hhsKVLNsYpoTpKVpVLRSTAAPslDpWS....V..tpsYsAcGGApQlhSsp+t.F................. 0 3 3 4 +15398 PF15539 CAF1-p150_C2 CAF1 complex subunit p150, region binding to CAF1-p60 at C-term Coggill P pcc manual Domain CAF1-p150_C2 is part of the binding region of the CAF1 complex p150 subunit to the p60 subunit. The CAF1 complex is essential in human cells for the de novo deposition of histones H3 and H4 at the DNA replication fork [1,2]. 27.00 21.60 28.10 21.60 26.30 18.60 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.01 0.70 -5.29 4 69 2012-10-05 10:57:55 2012-10-05 11:57:55 1 8 48 0 39 64 0 238.00 41 28.17 NEW AKEWDEFLAKGKRFRVLQPVKIGCVWAAD.RDCAGDDLKVLQQFAACFLETLPAQEEQTPKASKRERRDEQILAQLLPLLHGNVNGSKVIIREFQEHCRRGLLSNHTGSPRSPSTTYLHTPTPSEDAAIPSKSRLKRLISENSVYEKRPDFRMCWYVHPQVLQSFQQEHLPVPCQWSYVTSVPSAPKEDSGSVPSTGPSQGTPlSLKRKSAGSMCITQFMKKRRHDGQIGAEDMDGFQADTEEEEEEEGDCMIVDVPDAsEVQAPCGAASGAGGGVGVDTGKATLTASPLGAS ....................................................................AKEWD.EhhuKGK+h+VLQPVhlGClW..tut...ts.s.....ss-LclLQpFs.ACh......L-s..s.......spE.-.p......s.P.c.s...o.p+p....p.+..DpQlLsQLLPLLHGNVNuSKhIIpEFQEh.C...Rp.....Gh..h..u..p.t..ss.......S..s................Pup.......oh...p..oPt.sps.sslPSKu+LKR...lISENSV..Y.E.KRPca..R.h.CWYVHsp.VLc..pF..pQ-cLP.VPCQWsYlTp........s........Ps.ss...+.-.D.sG.s.....s..ss.ss.s.p.usP....h....ShKRKssuSMsITpFMK+.tt.t.h.....-.DGFQuD............-Chhht......................................................................................... 0 8 10 20 +15399 PF15540 Toxin_62 Putative toxin 62 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains a two conserved aspartates, a glutamate, a histidine and an arginine residue and an RT motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6 or type 7 secretion system [1]. 26.60 26.60 27.20 84.40 26.00 24.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.42 0.71 -3.97 6 18 2012-10-05 10:58:54 2012-10-05 11:58:54 1 6 18 0 5 18 0 113.50 57 17.75 NEW stspaphNsspDlDhRGpGpoY+-....ALDEAF+RTGlPKcpFsVTK...WuKDcsGKShPVEa...pGPN.GApVNlDlP+hs.....sshssGPcpPHlGYQTsGK.uGuutsRGHIhlDslPssR .N.LYpYAPNPIRWIDPLGLAILEHQ....SNFDAARRTGFENAGMTNPEDVTFSKVDPKTGTVVEF...KGPN.GAKVAYDAPHAD.....MDVTAGHDKPHVGWQSAGKRGSGGANRGNITYDGPQHPH........ 0 1 1 4 +15400 PF15541 Toxin_63 Putative toxin 63 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 140.00 139.00 23.40 18.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.37 0.72 -4.07 4 4 2012-10-05 10:59:14 2012-10-05 11:59:14 1 2 4 0 2 4 0 97.80 47 14.68 NEW LKGWERAH..GsGhGsEAKtGIhYAP.tVNQElQNRGhEKaIRELa.lspssslKlFhpscshAaP..........GpLhLcolpYcV.tEttstRRIl..F-ASl LKGWERAH..GsGhGsEAKtGIhYAP.tVNQElQNRGhEKaIRELa.lspssslKlFhpscshAaP..........GpLhLcolpYcV.tEttstRRIl..F-ASl 0 1 2 2 +15401 PF15542 Toxin_64 Putative toxin 64 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains two conserved histidine, a serine, two lysine, and a threonine residue and a HxVP motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6, type 7, and MuF-type secretion system [1]. 26.90 26.90 26.90 28.10 24.00 26.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.48 0.72 -3.45 34 374 2012-10-05 11:00:33 2012-10-05 12:00:33 1 12 254 0 20 174 0 87.00 40 46.17 NEW psp+Qs+HhhGpp.Ycpph....hsstth.Sahh.lsscclpcLlpphuusGpllhs.ptG.appKElIDF.sphIGcsa....hsGca.l.cTshGplHYSK.sG.sHIVPth ......................tQt+H..spp.Y.ppK.....sst.hh....SYhh..ls.pphpclhcc.hhsGsllhs..ccs.FchKplIsa..pplIGKsa.....hsspY.l.ETphGKlHYSK.TG.sHIVPal.. 0 6 9 18 +15402 PF15543 Toxin_65 Putative toxin 65 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 206.90 206.10 21.90 21.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.75 0.71 -4.52 2 2 2012-10-05 11:01:05 2012-10-05 12:01:05 1 2 2 0 2 2 0 130.50 37 12.91 NEW MRRR.ht.hASs.-p.LsFLLstcGpa.......RStRsh+t+.hhpcPD..IVQhGHhhSs....thucc-alMLpsu..N.hsshT....chpGuVhspsAV.ItGhsVDl.TAphWEshGhL.tGTVusuPhVp.P MRRR.ht.hASs.-p.LsFLLstcGpa.......RStRsh+t+.hhpcPD..IVQhGHhhSs....thucc-alMLpsu..N.hsshT....chpGuVhspsAV.ItGhsVDl.TAphWEshGhL.tGTVusuPhVp.P 0 0 1 2 +15403 PF15544 Toxin_66 Putative toxin 66 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 486.60 485.90 17.50 16.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.59 0.70 -5.13 2 2 2012-10-05 11:01:34 2012-10-05 12:01:34 1 2 2 0 1 2 0 269.50 35 70.37 NEW hsTulsPph............ulhupspstsppspuhGQslhA.sEl+LhDERshhssLsGR..NLGu-GTYhINPpISATLGts.uch.usPplFhG-polGuHV+LMVpNPQT.pch.sGGapth+FoApYpLKNucoWpTPNMshNhtstNpRtLcGSlsAsusL.Itt.lsVpusApIGss.ooEpshcAsl-EFpLGGosoYasHhHclphhlsSuG.Asup.h-SsGcG.pTscsVaspphstucph-spcGS+sh.p.tphP.RVpMPL.+ hsTulsPph............ulhupspstsppspuhGQslhA.sEl+LhDERshhssLsGR..NLGu-GTYhINPpISATLGts.uch.usPplFhG-polGuHV+LMVpNPQT.pch.sGGapth+FoApYpLKNucoWpTPNMshNhtstNpRtLcGSlsAsusL.Itt.lsVpusApIGss.ooEpshcAsl-EFpLGGosoYasHhHclphhlsSuG.Asup.h-SsGcG.pTscsVaspphstucph-spcGS+sh.p.tphP.RVpMPL.+ 0 1 1 1 +15404 PF15545 Toxin_67 Putative toxin 67 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and HxR and HxxxH motifs that is usually exported by the type 2 and type 6 secretion system [1]. 25.00 25.00 44.10 43.20 24.30 23.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.36 0.72 -4.10 7 8 2012-10-05 11:02:03 2012-10-05 12:02:03 1 4 8 0 4 8 0 69.90 47 10.05 NEW LAsD.KQsphlRGWl+pElp+lcp....sp..........+pslRlP........PGh-LAHhRGhEupKGYuYpYosLQstcLHKhQH+hD LAsD.KQsphlRGWl+pElp+lcp....sp..........+pslRlP........PGh-LAHtRGaEupKGYuYpYSsLQstDLHKhQH+hD. 0 2 3 3 +15405 PF15546 DUF4653 Domain of unknown function (DUF4653) Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAB5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 93 and 229 amino acids in length. 27.00 27.00 90.20 81.70 23.40 22.70 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.78 0.70 -5.08 6 45 2012-10-05 12:37:08 2012-10-05 13:37:08 1 2 32 0 24 42 0 196.70 64 97.68 NEW MF.AlQPGhs......ctupaLGu.PsuVsp.EhpPDSNSNFVppuhDANENWpth.utl-shh.+SpSE............ScNpshtu.t...........P.......EutVRSPPEGAEIs...GscPEKsscsssss...SPLEDNGYASSSLSlDS.sSSP-susuosp......uP.u.P................sDsLhPoVupAhQQLQApERYKEQEKEKHHVHLVMYRRLALLQWIRuLQppLsDQQsRLQESFDTILDNRKELIRsLQpttsPotsQ-pu ..............................................................................................................................................MF.hhpPshs......ctup.lGs.Psulsp.EhpPDsN.NFhtp.shDsNENWpth.uts-shh.pSpsE............psNpshts.t..................sEttVR..SPPEGAEls...GscP..Ec.sssussp...SPLEDN..GYASSSLSIDS.uSSP-suCuTPp.......sP.u.P.........................sDsLhPoVApAVQpLQspERYKEQEKEKHHVHLVMYRRLALLQWIRGLQppLlDQQsRLQESFDTILDNRKELIRCLQQttsPstspptu........... 0 1 3 9 +15406 PF15547 DUF4654 Domain of unknown function (DUF4654) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NEQ6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 169 amino acids in length. There is a conserved IDC sequence motif. 27.00 27.00 28.00 71.60 21.10 18.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.87 0.71 -3.85 12 24 2012-10-05 12:39:56 2012-10-05 13:39:56 1 1 21 0 14 21 0 132.00 62 78.91 NEW AppQKAlPsAHLTFlIDCupGKQLSLAAPssPPpuP....pPp.GPVsPPMKTYIlFCGcsp.P.hTQcssLGtssLApA+sTLPPpRGhsAPsS.PsSshsPQEsPcAKGsPlKssss.RSSsWGTVKGSLKALSSCVCGQA- ..At+QKAlPsAHLTFVIDCspGKQLSLAAssuPPpAP....uPspGsVTPPMKTYIlFCG-N..PpLTp.sPhGGGsLAQARuTLPPCRGssAsAS.PlSPhsPQEsPEAKGpPlKssPs.RSSsWGTVKsSLKALSSCVCGQAD.. 0 1 1 1 +15407 PF15548 DUF4655 Domain of unknown function (DUF4655) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NEP4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 533 and 570 amino acids in length. 27.00 27.00 439.50 198.90 24.50 21.00 hmmbuild -o /dev/null HMM SEED 532 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -13.08 0.70 -5.60 13 28 2012-10-05 12:43:12 2012-10-05 13:43:12 1 3 21 0 15 27 0 485.20 57 91.37 NEW lKTNKsGSKVAVSAp+GuEV.ossoPQRGpGY.lhASSQRSAAlSlSPsspR..RSEAApsTo.+SsSDaPRosSPQoGPulSussoP.RGTE......TRoRsEs.............RpsSPpRKsQQsQ..TssSHsluh....pRNVSPsREEuTRRGuEsKsGR.ElopRsSlssDAKSSRRLSFlDpKDNlp.......s.l..E-DPPSKVQsPQGVRVPRRh.laPKDEAVQT......EPIR+.hTsuElRSP+sPsoPE+uSSRhssD.RsspR+lPuQEuEhGPpSSIhoEPKAL+RN..hsLESSLKLSVLKDLDuGHRV.ShRs-PESh+KpSVYsETKPSsKVLIuS-VEosh+SShRuDsEluRRVTISsGsQSlpsspRsTuRssSESP.+KSolasoP-..aKt....p..+Pscsshh.stss.RYs......Es.SpKPSlHA..ELELTPRPLPPRSLPRYGPDSSWWALLNPElEhP.QSpPTT.........PD.....FEPK...SPP.....PLDPhlShaEMDSSPFCEDLhFQREKA........ossP....PssPKESPSpA.PLREVPQ...uP..........KaTsKQPlQRFSAFFLDVSEEMYNRVIWWL ..VKTNKsGuKVAVSApRGuEV.TsToPQRGpGY.llASSpRSAAlSlsPS.pR..RSEAAHsTo.+SuSDYPRSsSsQsGPGh.ussoP.RGoE......TtsRoEu............uRHsSPHpKoQQ.............oQ..ThASHssoh....pRNVSPsREEusRRGGESKsGR.-lup+sS.hPDAKSoRRLSF.DQKsNLp.......o.l.hE-DPPSKVQNPQGVRVPRRhLsaPKDEAVQT......EPIp+hhTsuElRSP+sPo.PEHuSSplusD.pTAQRRlPspEuEsuPhuSI.oEPKALaRN..hNL-SSLKLSVLKD.DGsHRV.Ss+s-PESl++aSVYsETKPSsKVLlSSpVESNVRosIRGsoEVGRRVTISPusQSlpssp+VTupuVSEus.+KSShaVTPEshYKQ....pTp+PscsshMS.GPs.RYP......E..SpKPSlHA..ELELTPRPLPPRSLPRYGPDSSWWsLLNPElEhP.QSpPTT.........PD.....FEPK...ssP.....sLDshLShachDSSPFCEDLhFQREKAS.SP............PssPKEsPShA.PLpEVPQ...sP..........KaTsKQPIQRFSAFFLDVSEEMYNRVIWWL.. 0 1 1 2 +15408 PF15549 PGC7_Stella DPPA3; PGC7/Stella/Dppa3 domain Iyer LM, Aravind L, Eberhardt RY, Coggill P re3 Manual Family The domain belongs to a fast evolving family known only from the placental mammals [1-3]. The PGC7/Stella/Dppa3 protein protects imprinted regions from demethylation post-fertilization [3]. This suggests that it might bind methylated DNA sequences directly [4]. The conserved core includes a postively charged helical segment and a C-terminal CXCXXC motif that is predicted to chelate a metal ion [4]. Most placental mammals contain 3-6 paralogs of this domain family. The CXCXXC motif is also conserved in a subset of fungal MBD4-like proteins [4]. 32.30 32.30 32.30 36.00 22.70 27.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.22 0.71 -4.25 11 48 2012-10-08 22:59:17 2012-10-05 13:55:24 1 1 21 0 18 71 0 155.40 37 88.99 NEW MD.P.QKhNPshss-S.....S.htstEsSp-ssus..SQPshSEpLhhsLusLosuPGsc.s.sPLPEuLLpQ+YR--..KThpERphERlt....asQRK+phLtplRR++hcHhAPY.....plcc....cu...+lssu.sD..RcQNtFRCcC+YCQSpR.NhsG.Pu-pp......SW-sLspGLouLslSlGTNpss .....................MD.P.QKhNPs..scS.....S.MsstEsSp-ssusSQP.hSEpLhhsLSsLo...sPusp.s.sP..LsEuLLppphRpc.....cohpE+hhE+lt....F.pR+hthLtpVcRc+hs+Mtsa......tlt+...........ct...+hs.h...s-..+spp...FRCpCpYCpupt.........................W-......su.shplGsspss................... 0 2 2 3 +15409 PF15550 Draxin Draxin Eberhardt RY, Coggill P re3 Jackhmmer:Q8NBI3 Family This family of proteins inhibit Wnt signaling and act as chemorepulsive axon guidance molecules [1-2]. 27.00 27.00 33.60 33.60 25.80 25.70 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.44 0.70 -4.77 10 51 2012-10-05 13:15:45 2012-10-05 14:15:45 1 2 36 0 31 52 0 272.10 61 88.17 NEW tpshssNshppppPththspusHp...........+RpGhuKKcRusGhhu..t.thssscsss-uoslpuLssVtLEt.tP...uuLhp..cK-sahGhchP.stR-Np.sG.pc..+s+K+uR-H++..........sRRD+lRpp+Gc...stP..SSlh...Kcscs.EDth.ss..............s-ssosluPslh.hoshtss.shoEcsssLPssSs+PQh..u.Rs+ssGDVMPTLDMALFDWTDYEDhKP.-hWPSuKK..KEK+RSKshSsGNpTosA-s.EPCDHHLDCLPGSCCDLRcHlCKPHNRGLNNKCaDDCMCpEGLRCYAKFHRpRRVTRRKGRCV-PEolNuDQGoFIsV ......sps.PEN+lsh.tPtLapspuuHH...........+R+G.uKKcpusGhsu..t.ptssspss+puotLstht..h.tp.uP...AuLhQ..cK-lhLGhthPasE+ENpsPG..E...+s+K+uREH+R...........RRDRL+hHRG+shstuP..SSLh...KKsEhsEsth.su............shpEuSTSLAPThhaLToh-su.suTEEs.lLPsTShpPQt....psRsDGEVMPTLDMALFDWTDYEDLKP.-sWPSAKK..KEK++uK..sSsGNETSsAEG.EPCDHH.DCLPGoCCDLREHlCpPHNRGLNNKCFDDCMChEGLRCYAKFHRNR.RVTRRKGRCVEPEoANGDQGSFINV.............. 0 2 5 12 +15410 PF15551 DUF4656 Domain of unknown function (DUF4656) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NAX2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 286 and 398 amino acids in length. 27.00 27.00 37.50 37.50 18.40 18.30 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.31 0.70 -5.27 3 51 2012-10-05 13:20:37 2012-10-05 14:20:37 1 2 35 0 29 42 0 317.30 55 90.92 NEW sh+cuuQPPssRpTRRP.sPKDPGt+tPESITFISGSAEsPPEsPACC.LsRPWVWshCKAVFCFR+CRuClQRCGuC.VRuC-sCLuusssPEuhuEtuWuKEHNGsPP..SPshAPPSRRcGpRLKsTs..GSSFSYPDVKLKGIPVYRYRsuhuPusDsDSCsKEPsAEsPPhRaSLPuTLuuS...PRSSE.EYYSFHESDLDLPELS..SGSMSSREIDVLIFKKLTELFSlHQIDELAKCTSDTVFLEKTuKISDLISSITQDYHLDEQDAECRLVRGIIRISTRKSRoRPu..TsEGRpoRsutsuGsAPDSGsDTMluSVL.SQDDLsVQISQETTSDAIAR+LRPYGTPGYPsSHDSSFQGTETDSSGAPLLQVYC .......................................................cp.sQPPssR+TRRs..DPKDPGt+GPESITFISGSAE.sss.Es.PsCC..hpshsWshC+AsFC......Cl.pCGsC.h+uCssCLuss..-ss..Ests-tshsc-H..NGh.Ps..SPspusPsc+c..upc...hKp.sh....GSSFSYPDVKLKGIPVY.Y..s.u.h.DsDSCsKEPlscssPhRpSlss.ThssS...PRuSE.EYYSFHESD.LDlPEhu..SGSMSSREIDVLIFKKLTELFSlHQIDELAKCTSDTVFLEKTuKISDLIsSITQDYpLDEQDAEsRLVRGIIRISTRKSRsRPp.....sspG.ppp+.tutssusAPDSGpETMlsSsL.SQD-..LsVQISpETTADslARphR..sau....ssGhPhu+DS.SaQsT-TD.SSGAPLLpVYC......................................................................... 0 2 5 12 +15411 PF15552 DUF4657 Domain of unknown function (DUF4657) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NAV2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 305 and 370 amino acids in length. 27.00 27.00 27.60 27.00 19.10 18.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.99 0.70 -5.13 14 41 2012-10-05 13:34:09 2012-10-05 14:34:09 1 1 27 0 22 38 0 262.30 55 77.22 NEW MuVGDSsLAs.PGLSQDSLshEPsGSP........EPPApluRLLAS+KLEQVLERS+pLPouPASLStpppshp..uKPcsEhslhuAGcQEuTcAEo-LEAGLEEuEVV......tGhtP-AWACLPGQGLRYLEHLCLVLEQMARLQQLaLQLQoQRP.tDPEEEEp......slAPuPsPS+sPGspVpGshEhLSQTccT.GuKsAuPsKVGsPSssPPRLsEAPsEPAHohPSSQGHKpDLSHWsKVKVLLNRIRWRSP+pPEPsAPPs..Gs.uPRhESR-.LPE+P.spshRKTFMPSLVVKKQR.AKNLSV ..........................................................................MsVGDSsLAs.PGLSQDSLshEssGSs........EPPAplsRhLASpKLtQVLERS+pLPssPsolStpppshp..sKPcpEhPlhuAGpQEuhcA-o-LEAGLEE.EsV......uuLsPtAWACLPGQGLRYLEHLCLVLEQMARLQQLYLQLphQRPPsDPtEEEp.........shAP.PsP.+sPGstspsPhELLSQTcp......T.GAcuAuPPKVtlPoANPPRLsEsPsEPsaphPSSQGHKpDhSHWsKVKVLLNRIphRS.+tPEPsAsPD..Gs.sPRh-S+c.LPE+P.spshRKTFMPSLVVKKQRAKshs..................... 0 2 3 5 +15412 PF15553 TEX19 Testis-expressed protein 19 Eberhardt RY, Coggill P re3 Jackhmmer:Q8NA77 Family This family of proteins is expressed in testis [1]. 27.00 27.00 61.40 41.80 25.80 25.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.24 0.71 -4.37 12 30 2012-10-05 13:48:42 2012-10-05 14:48:42 1 2 17 0 16 24 0 142.80 58 56.18 NEW MCPPVShRastEGMSYLasSWhYpLQHGsQLplCFuCFKuAFL-lK-hLE.EDW.....E-E-WDsE.ME.hoEAtsEQtussG..t.uW.GpG.hp.spusS.shGsGsLssus.t.p-..hs.phVPTELtPQ-AVPLuLGsEDADWTQuLPWRhsslPsCoHWPo ..........MCPPVShRattEGMSYLYuSWhYQLQHGsQLplCFsCFKAAFL-hKchLE.EDW..........E--sWDsE.ME.hoEAtu...........EQtu..ssG....t.uW.Gpu.GpsspusS.uhGsGsLssus.s.E-h.LssphVPTELtPQ-AVPLuLGsEDADWTQuLPWRF-GLssCSHWPo... 0 1 1 2 +15413 PF15554 FSIP1 FSIP1 family Eberhardt RY, Coggill P re3 Jackhmmer:Q8NA03 Family \N 27.00 27.00 30.80 28.00 24.90 23.10 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.61 0.70 -5.66 19 68 2012-10-05 14:32:29 2012-10-05 15:32:29 1 4 46 0 43 69 0 301.10 34 73.22 NEW DIIKGsLDuIS+PASs....SRs+PuS.RsSsu..............SLEVLoPEP...s.........htl-o.s.p.susc-................spopssssEccR.........spts.p....tt............p..phsccupD-chshsppph.sppoc-.c...........c...phDspLpcAIcKM++LD+ILs+K....h+EKEVK+QGlEh+hKLWEE.Lp.php....ppshpSsEEhcNT+KFLuLTsssppss.....ph..p.pssahsVFtTQ.....lPs-c...hEpphpphtp.chTtssppscShh+sc+p.hspppp.ch....................K+sQDFIKRNIELAKsuss.lsMhDcEK+RLtELLcDlD-tsushssspu.Dp.sshLlPGcGYT.ss.spppQLAEIDoKLQp.lssss.th...oShs.sthpspssQ....cs.hppp.psschsPGE+lLRsTKEpRD.QppRL+EI-pcLcchcEs ..............................................h.cGs.c.h.p.tts........stp.R..................slEhLss-s................hp....t.p.tpp...................tpsppp.tEppp..................s..........................hscptpcpp.sh.ptph.sttscp.c..............tphDsplpcAIpKMc+LDpILsc+....h+EKElK+psh-h+h+LWc-.lp.t......ttt.t.pcp.tNTppFLuLss..tt...........p.ptsh.slF.TQ............ls.pp....ctt.pt....s.ohcsptscuhhcst+p.hppppt.p.t..................t++spDFIKRNI...........E...........LAcputp.l.sMh-cEKcRL.ELL+Dl--tsss.sstpu........s..sshlssspGYs.ss.sppppLAEI-.KLQp..hssts.h.....s.s.st.tpp.sp.............................t.....p.tt.p.tsG-plL+ssKcpR-..ppRL+cI-ppLp.hpc......................................... 0 11 17 27 +15414 PF15555 DUF4658 Domain of unknown function (DUF4658) Eberhardt RY, Coggill P re3 Jackhmmer:Q8N912 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 161 amino acids in length. 27.00 27.00 88.90 88.90 22.20 20.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.60 0.71 -4.27 9 27 2012-10-05 14:35:39 2012-10-05 15:35:39 1 1 19 0 12 30 0 122.60 61 69.82 NEW hsRAtREssR+CPsSILRRS.sE+ps.GsEPQRToRRVRFREP.EVAVHYIAsRssT..sTspu....PuRPRP+tGSLLLRLoVClLLllALGLYCGRAKPlAhALEDLRApLLsLlLRLpHsALoCW+sLLpL ......sRApREssRKCPPSIL+RSpPE++pstAEPQRTSRRVRFREP.tVsVHYIAs+ssT..uTl+s..........PuRP.R...P+G..........GSLLLpLslClLLslALGLYCGRAKPVAtALEDLRApLLsLlL+LhHsALoCW+sLLpL..... 0 1 1 1 +15415 PF15556 Zwint ZW10 interactor Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95229 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 127 and 281 amino acids in length. 27.00 27.00 81.60 61.20 24.20 23.30 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.82 0.70 -5.13 8 45 2012-10-05 14:41:10 2012-10-05 15:41:10 1 2 23 0 17 49 0 188.50 66 88.54 NEW QEEAELPAQILAEFVMDSRKKDKLLCSQLQVVDFLQNFLAQEDssQGLDPLASEDTSRQKAlAAKEQWKELKATYQEHVEAIouuLTQALs+hEEAQRKRsQLQEALEQLQAKKQVAMEKhRsAQKQWQLQQEKHLQHLAEVSAEVRcRQoGsQQELEpLhQELGsLKQQAGQERDKLQRHQTFLQLLYTLQGK.LFP....EAEAEhPQ......L-LPEDK....sQ..TpPQEQ......NTGDTMGRDtuVopKAsG.hQPAGDsoLP ....QEEAELPApILsEFVhDSpKKDKLLCSQLQVVDFLQNhLAQEDTs.puLDPLASEDTSRQKAlsAKEQWKELKATYpEHVEAIphuLTpALsphEEAQRK+sQLQEAhEQLQAKKQhAhEKh+hAQpQWQLQQEK+L....QpLAcsSAEV+pRpptsppcLpt..QcLtsLpQQAtpcp-KLQR.QoaLQLLhoLQuK..hsEu-sEh....................................................................................................... 0 1 1 3 +15416 PF15557 CAF1-p150_N CAF1 complex subunit p150, region binding to PCNA Coggill P pcc manual Domain CAF1-p150_N is part of the N-terminus of the CAF1 complex p150 subunit that binds to PCNA - proliferating cell nuclear antigen. The PCNA mediates the connection between CAF-1 and the DNA replication fork. The CAF1 complex is essential in human cells for the de novo deposition of histones H3 and H4 at the DNA replication fork [1,2]. 25.00 25.00 37.10 34.40 21.00 16.10 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.54 0.70 -5.02 4 49 2012-10-05 14:44:19 2012-10-05 15:44:19 1 4 31 1 24 45 0 203.20 44 23.92 NEW MLEELECGAPGARGAATAMDCKDRPAFPVKKLIQARLPFKRLNLVPKGKADDMSDDQGTSVQSKSPDLEASLDTLENNCHsGSDIDFRsKLVNGKGPLDNFLRNRIETSIGQSTVIIDLTEDSNEQPDSLVDHNKLNSEASPSREAlNG...QREDTGDQQGLLKAIQNDKLAFPGETLSDIPCKTEEEGVGCGGAGRRGDSQECSPRSCPELTSGPRMCPRKEQDSWSEAG ................................hAM-CKD+suhP..sKKLlQARLPFKRLNhVPKEK..s-......-t..u..-..s.....p.........s....s.S.....spS...psPD...L-...s...S...L..DsLENcCchuSDlcFpPKLVNGKGPLDsFL.pp.cl.csSl.upo..hVIIDLTEDSN..-p.s..........Du....scHscL.socASsup.cslNG....tpcstsp.u...pu.ppsphs..tET.oslPscsctt..s.t....tpp..s.t.u..shP.L....p.....t..sWSt................................................ 0 1 3 9 +15417 PF15558 DUF4659 Domain of unknown function (DUF4659) Eberhardt RY, Coggill P re3 Jackhmmer:Q8N715 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 427 and 674 amino acids in length. There are two completely conserved residues (D and I) that may be functionally important. 27.00 27.00 32.50 31.10 25.50 26.70 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.45 0.70 -5.80 21 78 2012-10-05 14:49:01 2012-10-05 15:49:01 1 2 41 0 48 66 0 313.20 35 59.61 NEW tslPpRD+KIsALMLuRhcctphhpE.pptAthtWEp.++p-pph+.p.E+E+phtLtQupctWppphEpR+s+h.........spEp+sptccppppsh......pE.p+W+ctsEcQEp.Rp-+l-+s..phpscp+KppQEppL+ppEchtpsh+Eppp.hthpc+hppAspK+ph+p.hptQ+clptpNh.uphlpHpAhKhhl-pQp+sEc.hh+hSLEpphp+uQEsappLlccRs+EL+EKApKE-pQhp+A+htAEcpEc.p+pc+hcsLsclu-pKlppApphuccsspcKAp+ltp.ph.+E+sp+l.KpKlEcEEcs+hctlppuIc+K-pRsEQIp+EKcsslEpuRplARAShpsR-+l+p.htspoFD+MAhEApLpAshpct ..........h.slspRDpKIsALhLsRhpctphhtE.pttAthtWEp.+tpspphp.p.E+E+phhLtpupctWptph-pR+sp..........s.Epctttccpppp........sEpch+t.s-cQp..Rtp+h-cs..ttpsch+K.pQ.ppL+ppEchhpphREptp...hpc+htpAspp+phpt.hphQ+chpphNh.uphhpapAhh.hhspQtctEc.hh+hSLEtshtRuQ...E.aptLh.cpRp+EL+E+...Ap+E-.QhppA+.tAccpEc.pppt+hchLschu-p+lppApphsccssppKsp+ltp.ph.+E+sp+h.K.KsEc-Ecs+hctlhpuIt+K.pRscplpcp+csshpphpphupAShp.+ctsht....pohDphshEApl.Ap.tt........... 0 9 14 23 +15418 PF15559 DUF4660 Domain of unknown function (DUF4660) Eberhardt RY, Coggill P re3 Jackhmmer:Q8N6N3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 93 and 189 amino acids in length. 27.00 27.00 32.20 31.90 18.20 18.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.14 0.72 -10.72 0.72 -3.97 14 53 2012-10-05 14:52:51 2012-10-05 15:52:51 1 1 37 0 28 47 0 103.20 61 53.36 NEW SSDS-scupscpppt..ts.sssssSsupsttt.........hLPcPDELF+SVS+.PAFLYNPLNKp...IDWE...s+......sl+APEEPsKE......FKsWKTNAVPPPEoYssc..-pK......ssPPuhDMAIK ......SSspE.DssEPE-ssp+sss..supuuG..Gptspuc.......tRLPGPDELFRSVoR.PAFLYNPLNKQ...IDWE...RH......VVKAPEEPPKE......FKlWKoNhVPPPEoYosE....KK.....PPPPthDMAIK.... 0 5 7 13 +15419 PF15560 Imm8 Immunity protein 8 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and several conserved charged and hydrophobic residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family [1]. The protein is also found in heterogeneous polyimmunity loci. 25.00 25.00 154.00 153.80 22.70 22.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.40 0.71 -4.49 7 13 2012-10-05 15:01:24 2012-10-05 16:01:24 1 1 12 0 1 11 0 133.00 45 92.86 NEW MElhlssVlGGplssp....+slpsllhcMRKplKcpFtshphEuLs+hKIslahSGDlSpYsspoGIhpsRYapcKKEalspFCID+phWoup.h.Dlp.KFLhhlps.hlpLGslI+pKLcptsYsFDschaK-hl MElslshVhGGplssp....KslpshlhpMRK.lKcoFpsh-.EulsphKlsLahsGDlSSYhspTGIYQsRYastKKEalspFCIDRspWousKhcDls.hFlhhlpshhIchGsLIcpKLtKhpYsFDhpsYKchV.. 0 1 1 1 +15420 PF15561 Imm9 Immunity protein 9 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and several conserved polar and hydrophobic residues. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 34.30 33.70 21.70 21.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.86 0.71 -4.64 5 11 2012-10-05 15:02:01 2012-10-05 16:02:01 1 1 10 0 1 10 0 140.10 38 90.75 NEW KIlTspt.......hcEIpchlc.+sstFEEIPIIoRasRtchLp+sIu+E-lslaLLNlALFaLNNIpLaA+clLsp.psssLFICITlVD...lE-EhsDl.GFsIPNILIS+cK.htsplppcstlNL-cpcaLpcphupluhpshFphY+opoDDGaGt-IhRIYllPK ..Kl.+shQ.cpI.phlt.hsstFEEIPIIophtR.chhhcshu+c-hslhhLNsuLFhLsNlpLhs+clLspppppsLFhCIThss....p..hsDl.GF.lPNIhISppt.hhphhppt.thslcp..aLpp.hppluh.shFthhhshosDsaG.-hhRlallsK................ 0 0 0 0 +15421 PF15562 Imm10 Immunity protein 10 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with two transmembrane helices, and a WxW motif and a conserved arginine between the two helices. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 27.00 26.70 24.40 23.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.14 0.72 -4.21 46 149 2012-10-05 15:02:37 2012-10-05 16:02:37 1 2 105 0 21 102 0 59.30 30 73.33 NEW llhlhlGllhLluuIhsWcWhhssp.ss......ph.....hl.chhGcpshRlhh.ul.lulllIhsulhha ....lhlhlGllhLluulhsWc.Wh..asspss......ph.....hl.cphG+pttRlhh.ul.LGlllIshulahF...... 0 4 16 18 +15422 PF15563 Imm11 Immunity protein 11 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved HxxRN motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 65.90 65.60 22.00 21.60 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.53 0.70 -5.25 3 9 2012-10-05 15:03:45 2012-10-05 16:03:45 1 1 9 0 2 8 0 221.80 47 96.85 NEW MN+IpshEhshSNshFWNFhhsaa.RGaDEE+EsNlDEVh-......lVEscchKuY.W..sWFsQlDlKTsEsttElE..NPRTlshPIsDDlsaslEFHPsEThYFlNDTYIGNlGGHFHLpaLTaoELhuI..pcEKYuSLLFaLLLPLTuIcEsEKsIl+sEIspHLpcIPlFKsHSsYIuDC..ILNGLllu-SsFQppcGIGhlsspNHShRNthsY---+csIpELN+LLV+ .MNhIpphEhshSNshFW.Nhhhpaa.cuaDEE+DsNhDEVh-......ll-pcchKpY.W..sWlsQlDlK....TsEspGEIE..NPRTlsLPIssDlsholEFHPssThYFLNDThIGpluGsFHL+aLTaoELhcI..TcEKYuslLFaLLLPLouI+EpEKshhhstIhp+LppIPlFKpHSsYIucC..ILNGLllusSshQph.tIGhlsspNHShRNthhY---+ppIpELNhLLs+... 1 1 1 2 +15423 PF15564 Imm13 Immunity protein 13 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in heterogeneous polyimmunity loci of polymorphic toxin systems [1] 25.00 25.00 235.30 235.00 19.80 19.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.69 0.71 -4.44 2 3 2012-10-05 15:04:31 2012-10-05 16:04:31 1 1 3 0 0 3 0 131.00 66 79.39 NEW .ps-FtSVlsEFtpLls-asFpCPcKLWYssLlsLSKpl.DIYYCYlIsRVYKpsGSLEsThWVGsIsRPDDGLENLSANIKhQIGYsQshD.hFF+pCESKIl.lIESG.LhsLlssSQ.EhKh.ShHNh ..oDDF+SVVAEFGNLINDFGFSCPEKLWYsNLISLSKNVcDIYYCYVIARVYKsDGSLETTLWVGPINRPDDGLENLSANIKIQIGYTQVsDPLFFRNCESKIIsLIESGILKTLLssSQNELKaPSIHNh. 0 0 0 0 +15425 PF15565 Imm16 Immunity protein 16 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly alpha-helical fold and a conserved DxG motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-SHH family of HNH/Endonuclease VII fold nucleases [1]. 23.00 23.00 23.00 24.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.30 0.72 -4.18 6 55 2012-10-05 15:06:40 2012-10-05 16:06:40 1 1 47 0 13 56 0 98.40 38 65.46 NEW ccLhuscphtsQs-CccF.ullcAlpcpl-tcllcsLLpTFuDc.cDa.GlQ-plVpVl-ps-sphahsclstthsplh-pAs.+EWAhhLlGRllNSsstcpthttlh ....................Lhp.Rhhcsp.p-.h.c.FppsLtclhphsssplIs-LCllh-D-.....pp......-VMFsLlahlE...p.....th...-ct...LhplsculP+MlpsA...EWsclLahRILNscthR.tYscl.h......... 0 0 5 7 +15426 PF15566 Imm18 Immunity protein 18 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved histidine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox12 or Ntox37 or Notx 7 families [1]. 25.00 25.00 51.80 51.40 22.60 22.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.63 0.72 -4.63 9 13 2012-10-05 15:10:02 2012-10-05 16:10:02 1 1 13 0 6 12 0 52.50 36 65.01 NEW tcGLthLhcpLppLps+.....spHhHLMTsuWuGsELo....-ppQupcs.pLlsHl+l ..-GLchLtcplppLtp+.....scHhHLMTPuWGG-ELo....EccQup-s.pLlpHl+l. 0 3 4 4 +15427 PF15567 Imm19 Immunity protein 19 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a protease domain such as Tox-PL1 and Ntox40. In some instances, it is also fused to a papain-like toxin, ADP-ribosyl glycohydrolase and a S8-like peptidase [1]. Based on these associations the domain is likely to be a protease inhibitor. 25.00 25.00 31.20 30.50 23.40 22.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.87 0.72 -3.98 20 224 2012-10-05 15:12:48 2012-10-05 16:12:48 1 6 217 0 16 64 2 85.00 73 70.53 NEW hccAhtpAppaLpt....ss-hs.........lll..ststchshGWlFsasotcaLcoGD.tspLsusuPllVc+ssGclt.........shusspP..htcalpcacspt .................Y.HDAFAKANHYLD.....DADLP.......VVI..TLHGRFSQGW..YFCFEAREFLETGDEAARLAGNAPFIIDKDSGEIH.........SLGTAKP..LEEYLQDYEIKK............ 0 8 11 13 +15428 PF15568 Imm20 Immunity protein 20 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved GR, and GxK motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family of nucleases [1]. 25.00 25.00 43.50 43.50 24.40 19.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.48 0.71 -4.25 2 3 2012-10-05 15:13:23 2012-10-05 16:13:23 1 1 3 0 1 5 0 96.70 50 94.16 NEW MAcpRhhl.GGVuLhhGRV+pu..shh..pDclE.hL.D.sFhp-hPFKhluhlhhaGpKs.LhPpY-.INK+au-LPltlELchthLchAsp...-llhshFhhusl-sLlcVucKYtLPst.Lcp.suh MAHNRKYVPGGVALVMGRVRNS..GKVVEQDElE.hL.D.sFhp-hPFKhluhlhhaGpKs.LhPpY-.INK+au-LPltlELchthLchAsp...-llhshFhhusl-sLlcVucKYtLPst.Lcp.suh... 0 0 0 0 +15429 PF15569 Imm21 Immunity protein 21 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved phenylalanine and tryptophan residues and a GGD motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox19 family [1]. 22.20 22.20 24.20 24.20 20.30 17.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.12 0.72 -4.21 11 48 2012-10-05 15:14:00 2012-10-05 16:14:00 1 1 45 0 6 26 0 84.30 53 77.70 NEW sLt.lGltphAashpDsLchlshhcppphsILGGDVYphpsstlclTYDuWaas...cpss-sshp...psh.hAh-YIspYhs..psup.phlasl .......s.LhulGIcEsAhPlshAIDILNLalsERILVLGGDIY.IKKDN..YFYQTYDNWYYE...GSNLFNSI-.......KAMHYLSQlKhENAYVSFV...Lph.............. 0 3 6 6 +15430 PF15570 Imm24 Immunity protein 24 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold with conserved tryptophan, proline, aspartate, serine and arginine residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-AHH family of HNH/Endonuclease VII fold nucleases [1]. The gene for this toxin is also found in heterogeneous polyimmunity loci. 23.40 23.40 24.10 27.00 22.00 22.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.69 0.71 -4.39 11 23 2012-10-05 15:15:18 2012-10-05 16:15:18 1 1 15 0 11 23 0 119.60 29 55.27 NEW hshpW.......aphs.stphs.......p.....hPppLa.lCs++hctLpFDah..as.shaIlS-cFlchlpchphspphptshlplss+cGc.pls.cchYYhlRhhahsp-.F...ctSph.pp-hpcsh...........l..Y.clpL+-ps. ....h.phpW.......apspstth........phPspLYhlCsKKsttLpFDah..as.phhIlS-cFLchlpc.shsccashupltlls+Ksc..ls.cchYYhlRl.ahsp-.Fh..ct.ph.pp-hpcpl...........l..Y.chpL+-psh...................................... 0 0 7 11 +15431 PF15571 Imm25 Immunity protein 25 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI1, Tox-URI2 or Tox-ParBL1 families [1]. The gene for this toxin is also found in heterogeneous polyimmunity loci that show variations in structure even between closely related strains. 25.00 25.00 27.10 27.00 23.00 22.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.81 0.71 -4.01 3 30 2012-10-05 15:15:50 2012-10-05 16:15:50 1 1 30 0 3 20 0 125.40 37 95.19 NEW MKaFIGGEIE....psIsDKFR+sRNsV.....I-aLDsL.GscIpSINElSFhVas.LKcFphsPlo+YpKKpNRlELEIhI-F-pFEsANDspssELLKQsILAVIpcYcNKsI.psSlDlIhoKlEsEIpp .M+hFloGELE....psIu-c....FRcsRspl..........hch..ssh.ts-l.ppIs..lshhVhh..hKpap....t.pEhphap+Kppch-hRLhI-a-pF.oANDSppspLllpsIlpuIcpht...sKs..pp.h-s...KhctsI....................... 0 1 3 3 +15432 PF15572 Imm26 Immunity protein 26 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved C-terminal tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-ColE3 family [1]. 25.00 25.00 26.70 26.50 22.20 18.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.12 0.72 -3.86 19 24 2012-10-05 15:16:50 2012-10-05 16:16:50 1 3 21 0 11 26 0 95.40 35 75.88 NEW hKLl-apschlpRGslhRhsut...............aPaE.clVDFMVh-h.s..tt....shuLllsSGaKAGhlh.hhhPpEuhsppst..ulSTpWllcNWp+WlYscCslccVal .....+Ll-atschlpRGslhRhsuh...............aPa-.plVDFMVh-t.s.tt....shuLllsSGaKAGhlh.hhhPpEuhsppsh..ulSopWllpNWp+alYspsssccVhl.... 0 2 8 10 +15433 PF15573 Imm27 Immunity protein 27 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved KxGDxxK motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 26.20 101.60 23.80 18.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.50 0.70 -5.18 10 46 2012-10-05 15:17:35 2012-10-05 16:17:35 1 1 40 0 5 43 0 234.00 55 96.89 NEW sLhsuIWaG-loshols-LKpsLLsspTE+EsllLIlELhKtGDFoVKsLLIpLMNpTcDEsVLNLCIRLFCSVuTHDDL+-oNNL+FLusASEhuVaTFsuuAlsTLSYEVIPYLLsLLEEWE-..oDlEhuIRDuLDhFLNacspluE-ATlEElGshYh-hlcspDhspYYY+ssLAFPGDLTKcLhpclhlAAp+EEpa+hhlIPSLLSIaTGEKVPlDhcTIIoscch+chlsYIDsLocKsWccGpKYFYGahV ..LhpsIWaG-ho.hshpplKpplhcusTEpEslh.lhELhKtGDFo.KsLLlpLMNpT+DEsVLNLCIRlFhSVATH-DLc-oNNL+FLSpsoE-sVcTFsuuAssoLShEVIPYLLALLEEWE-..o-stphIRDulD.alsacDpluE-Aol-ElGphYhcaspppDsppYYapppLuFPGDLsKclhpcVMhAApsccphph.lIPSLLSIWoGh+sPh-YsTIIospph+-hhsYIs.LopcpWccGpKYFYGahl..... 0 1 4 4 +15434 PF15574 Imm28 Immunity protein 28 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an all alpha-helical fold and a conserved HRG motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 70.50 70.40 22.30 18.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.59 0.71 -4.56 8 15 2012-10-05 15:18:14 2012-10-05 16:18:14 1 1 15 0 0 16 0 117.70 43 93.88 NEW alsEuhphsschlphlpl..DlpDE......hpRQlluuYlFGhLNuLAh-cshsPsDlQusMIclhIcpLsYsspsAsphspFlIcuTDKpFHPThaAIIHRGlEGYahYp-c.+.s-LpcDFp-IlpllK ..lsEulchlschlpllpl..DlpD-......h-+QllAuYlFGMLNGhAa-csIsPsDlQuhMIcIuI-KLsYosEsAsQhoQFlIcuTD+pFHPThaAIIHRGlEGYahYp-p.c.ppLpcDFp-IlpllK. 0 0 0 0 +15435 PF15575 Imm29 Immunity protein 29 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an all alpha-helical fold and a conserved proline residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-REAse-1 or Tox-REase-6 families [1]. 26.00 26.00 26.00 26.30 25.30 25.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.29 0.70 -4.73 81 217 2012-10-05 15:19:04 2012-10-05 16:19:04 1 2 129 0 57 210 3 198.40 19 70.51 NEW csshsph...cphthhssthphhsshhtsst...........................h............h...s.t.hhs...sts........Wh..ssh..hhAllscspp.hhshLsphss.........ph.hppstsths..............hshshhttLpshhhsps......shhcplhpthp..tsts.....................phhpthhh.s.hphahslh...p.pDpssappuLtptLph+pthhttpcttt............sscs...........hlshhsluhAplAacp.G.....hplpl-SsYlPppLl ...................................................................tth.hh.sththht.h...t..............................................s.....hs...hhs........h...ssh...ahslhscs.p..hhchLsph.s.........sh.hppspshhs.............thshshhhhhpshhhsps......t.hccLhpphpts.hs.............ts..tchhpphhh...hchhhuls...p..pDtpshctuLpthlch+pthhttpcp.p............hhct...........alshtllshAplAhpp.G.....hplsl-sthlPptLl................................ 0 11 28 42 +15436 PF15576 DUF4661 Domain of unknown function (DUF4661) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95873 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 281 and 302 amino acids in length. 27.00 27.00 56.40 29.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.68 0.70 -4.73 7 26 2012-10-05 15:20:41 2012-10-05 16:20:41 1 2 20 0 13 29 0 232.20 69 83.25 NEW RRKss+PD.ssPEsRhhDSSPENSGSDWDSAPETMGDsGPPKsKDSGsp+ssGsAPE.SR....csps-QLGS+RMDShKhspsssshpEStRLEAuGslsplGpDPhsusGshc.tVs.Etthss.GPEAPVEKsuR+Q+LLGWLpGEs....GAPspYLGsPEEhLQISTNLTLHLLELLASALLuLCSRPLRAsLDALGLRGPLGLWLHGLLSFLAALHGLHAVLSLLTAHPLHFACLFGLLQALVLAVSLREP .............RRKsh+PD.PsPEPRplDSSsENSGSDWDSAPETMGDVG.PKTKDSGshRsStAAsEPS+....EspVEQLGS+RMDSLKh-pssSsTQESGRLEAGGA.P+LGhD.VDSuGs++sGVSPEGshSsPGPtAPlEKPGRRpKLLGWLRGEP....GAPspYLGG..PEEsLQISTNLTLHLLELLASALLuLCSRPLRAALDsLGLRGPLGLWLHGLLSFLAALHGLHAVLSLLTAHPLHFACLFGLLQALVLAVSLREP........... 0 1 1 1 +15437 PF15577 Spc7_C2 Spc7_C2 Coggill P pcc manual Family Spc7_C2 is a short family to the C-terminus of fungal Spc7 proteins. The Ndc80-MIND-Spc7 complex plays a role in kinetochore function during late meiotic prophase and throughout the mitotic cell cycle [1]. The N-terminal region of Spc7 co-localises with the mitotic spindle, and it has been argued that Spc7 has the potential to associate with spindle microtubules and that this association is regulated by the C-terminal part of the Spc7 protein [2,3]. However, this family represents only the conserved region towards the end of the C-terminus; the majority of the C-terminal part is in family Spc7, Pfam:PF08317. 22.30 22.30 24.00 22.90 21.70 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.70 0.72 -4.32 16 47 2012-10-05 16:37:54 2012-10-05 17:37:54 1 7 47 0 42 48 0 66.50 29 5.52 NEW hchLs.lpptWspuphlsp-l+tlphpaP.ssssc.so.Dpolplpss.lhlsslco+Vclshslst.pltp .....t.plLphVpsuWspAptlsppl+hlshsaPTsls.......c.oS..Dsol...sls....uS.lLLsslpT+VclshsLpt.....p........ 0 10 26 37 +15438 PF15578 DUF4662 Domain of unknown function (DUF4662) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95567 Family This family of proteins is found in eukaryotes. Proteins in this family are approximately 290 amino acids in length. 27.00 27.00 201.10 200.90 20.20 19.60 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.76 0.70 -4.81 8 21 2012-10-05 16:38:14 2012-10-05 17:38:14 1 1 20 0 13 24 0 259.30 75 91.71 NEW HPIYVRR-PSIPTYGLRQSILLNTRLQDCYVDSPALTNIWsuRTCAcpNIpAPsPGTTSSWEVVKNPLIoSSFSLVKLVLRRQLKDKCCPlPsKFG.EAKspK..RLKsKDsSssKAT.puRhRNSIssKSKpPuGQhPG..SscpR+PAGulpESKESSKEKK.uTVsQDLE-RYAEHVAATQuLPpDotTAAWKGQA.LPETpKRQpLSEDsLTIHGLPsEuY+ALYHuVVEPMLWNPSGTPKRYSLELGKAIKQKLWEALCSQAAsPEsAQcD ............HPI.VRRDPSIPhYGLRQSILLNTRLQDCYVDSPALTNIWhARTCAKQNIsAPAPuTTSSWEVV+NPLIASSFSLVKLVLRRQ.LKsKCCPsPpKFG.EuK.SK..RLKpKDsSshKATQpuRhRNSISSKSKpPAGp.........RRPAGGIpESKESSKEKK.lTVRQDLEDRYAEHVAATQALPpDSGTAAWKGpsLLPETpKRQQLSEDTLTIHGLPTEGYpALYHAVVEPMLWNPSGTPKRYSLELGKAIKQKLWEALCSQuAhsEGAQ+D.. 0 1 1 1 +15439 PF15579 Imm32 Immunity protein 32 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved tryptophan and phenylalanine residues, and a GT motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-REase-5 family [1]. 25.00 25.00 36.10 35.90 22.60 22.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.24 0.72 -4.45 36 138 2012-10-05 19:59:56 2012-10-05 20:59:56 1 1 70 0 44 152 0 95.40 43 46.47 NEW hpllpsls.ttasshalplsspsY..hpcpVF.sD+suVGWMLYLP+hITtpQVPEApsLlPV.ssscp..pGTIIVS.TDtsFoscNsEHlchANcIEIRLlDtsLL ....................h..hlpshh..hhps.hh.sssptY....hpcplF.sD+.uVGWMlYLP+hloppQVPEAcALIsVss.....t.....scp..pGTIIVS.TDsPFoscNPEHVtlAN+IEIRLlDtsLL. 0 4 9 25 +15440 PF15580 Imm33 Immunity protein 33 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved tryptophan, and WE and PGW motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox24 or Ntox10 families [1]. 25.00 25.00 60.60 59.90 21.40 19.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.18 0.72 -4.50 33 42 2012-10-05 20:00:33 2012-10-05 21:00:33 1 2 40 0 18 47 1 89.90 36 81.91 NEW lpWLpcWYtspCDG-WEHpaGlcIsTLDNPGWplpIDLscTshpshpht.lplcps..........c.sDWhtsplcssp.....FpusuustpLpclLphF+pWl ..lpaLQsWYtspCDG-WEHpaGlpIsTLDNPGWplpIDlspTshttpthtplplcps..........c.sDWhtsplcstp.......FpuhsusppLpclLphFhpW........... 0 7 12 16 +15441 PF15581 Imm35 Immunity protein 35 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and YxxxD, WxG, KxxxE motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 25.00 25.00 137.30 137.10 21.80 19.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.04 0.72 -3.97 4 5 2012-10-05 20:01:14 2012-10-05 21:01:14 1 1 5 0 3 5 0 93.00 41 86.43 NEW l.sslhhAYhhlD+SIoLoYhpQuhEoucuslcsLppLIEcEWRGLsptQVh+KLcsVAtppsttchVlK+..EsslIWF-pVRFpFspGRLspV VssslhFAYhWIDRSISLSYucQGpETAcuoV+sLppLIE+EWRGLPEsQVh+KLcAVAApustu+IVVKK..EGslIWFD-VRFNF--GRLcSV 0 1 3 3 +15442 PF15582 Imm40 Immunity protein 40 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved YxC motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-JAB1 family [1]. The immunity protein typically contains a signal peptide and a lipobox. 25.00 25.00 33.10 32.00 18.90 17.60 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.17 0.70 -5.73 2 16 2012-10-05 20:02:21 2012-10-05 21:02:21 1 1 13 0 1 14 0 222.90 40 87.58 NEW lpQtVttQsclLlDSGhlhspYl.l.Elh.sDSNplY.IttuDsPus.thEhPS+lIKYK-+YLCFIELDEs.MStpEhhEt.....SsaptNLslp..tG+sWLLslpKht-K+ILlch...hhh..hFphsELWsYFSG.l.ptpss.MGlhSHDl.lssSYlss.h-..hcsL.......pshlcph.Gphal+NpTDSVlhLSpsot+p..YAVlsG.DoLhLsLpDSLPlhluPp-hK.LcYcS.P.ps..Fhp.h.ccD.W..hYpLFscSTasFlNlNsh.pph+lMapD.ssYu.slp.o.sph.h.IhN+GlYDKc.t.hphF+a. ....................................................lpQtVttQsKlLlDSGhl.s.Yl.l.Elh.NDSshIY.IpsADsPus..thEhPSKllcYKs+YLCFIE.LDE..MStpEhhEt.....SsaptNLhlp....csWlLslpKhtptphLlch....ha..hFphspLW...............................................hshlpph.GphahpN.hDphhh.spsoh+p..aAVlsG.DoLhLsl.Dols........p..........................hYpLhtcSTa.hhs.psh..thpl.apD....a......t..s..h...l.pcuh.......................... 0 1 1 1 +15443 PF15583 Imm41 Immunity protein 41 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved glutamate residue. The domain is often fused to one or more immunity domains in polyimmunity proteins [1]. 25.00 25.00 30.00 39.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.92 0.71 -4.42 13 63 2012-10-05 20:02:45 2012-10-05 21:02:45 1 12 59 0 9 44 1 147.30 48 27.96 NEW MYI-.KYWGsaIGGoDDSLsLlsYLtspc+-cIoLsEIFsDhGLDcL.....shsF+pTsp..ltap....sp-G.h-h-FcaAIDllsDLAAllLECptsGpVsLp-L.t.ss.psRhIRIsAosEEhshlspALtDFspsPhcYDLsEMhs--DhtEhApcs-pLRpEL ....MhI-.pYWGpahGsSsDShsLspYLtspc+E.lslsEIFpDhsLDcL.....shNapps............phDG.sthcFc.AhplVhDLAsLlLEsctsGphNLtcl....tss.sRhhRIsATsEEhhslshALpcFAhuP.-YcltEhhD--.-hhEhuphsEplRtpL....... 0 3 5 8 +15444 PF15584 Imm44 Immunity protein 44 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly all-beta fold and GxxE, WxDxRY motifs and a glutamate residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox48 family. This domain is often fused to the Imm71 immunity domain [1]. 25.00 25.00 28.90 51.50 22.40 17.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.36 0.72 -4.13 17 44 2012-10-05 20:03:57 2012-10-05 21:03:57 1 2 19 0 19 51 0 100.50 46 32.10 NEW sFPssLPtlP..s.stssllpSGcclPssGIWEPhss.psKl.................GChNYhltGohAPphttpsss.................pthsspWRLlWEDpRYpDGoIP-EEpsYlh .......hFPssLPcVP...pstsslIpSGpclPC-GIWEPVshtpsKlhulhsh...spt.h..sGChNYFltsstAPphtp-sss.................t.lsspWRLLWEDcRYtDGsIP-Epphal.... 0 0 0 5 +15445 PF15585 Imm46 Immunity protein 46 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved GxaG motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a Tox-REase-3 domain [1]. 25.00 25.00 27.90 27.60 21.20 20.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.43 0.71 -4.09 17 27 2012-10-05 20:04:40 2012-10-05 21:04:40 1 1 21 0 9 26 0 115.60 35 94.69 NEW MaEaHGWhTIptospppDs...tph-pllsclpphlschshs.......shlcLphhNGphhlphuGhsNH+spph.pllslappluclAPGSYGLLYh+DDE..-.....thsNpFRVahhtRGplocppDsaLSPslPhlED ........MhEaHGWholptostttDp...tp.cthhpclpthlschsh........shlsLphhNGphhlphuGhsNH+st.h.pllslappluclAPGSYGLLYh+DDE..-.....thsNpFpVhhhsRGplspppDsaLSPslPslED.. 0 5 8 8 +15446 PF15586 Imm47 Immunity protein 47 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved Wea (a: aromatic) motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox7 family [1]. 26.90 26.90 28.30 38.20 25.30 25.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.38 0.71 -4.26 29 121 2012-10-05 20:05:12 2012-10-05 21:05:12 1 1 87 0 25 68 1 107.40 38 93.43 NEW +splKulpss..chs...LcsahP-csssFshhlplpIGs.pspp.Gu-.FplhlCTPcWLppphtptthh.htR+hLlVpc.Y.chcpIhshlpchlspCpupsWtclupKLuRhhtWEaEDYp ...........+lcLKshsh...DhD...hEcFsP-h.-NFphhlsLsIGh.-spp.G.ush.FplhICSPcWlpp...phpc.t..h......h...pstllhpp.a.shchIhp.IscIL-hC.sp.poW-cohspLtRaFsWEFEDYp.... 0 7 14 22 +15447 PF15587 Imm48 Immunity protein 48 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved lysine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family [1]. The protein is also found in heterogeneous polyimmunity loci. 25.00 25.00 35.30 35.20 20.70 19.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.98 0.71 -4.84 7 30 2012-10-05 20:05:35 2012-10-05 21:05:35 1 1 29 0 2 21 0 153.90 43 97.65 NEW Mc.clpl...hl..plPpL.Ehsslcsl....+.tLpsalpslhstINlpc.Ls-WpLhlhl...shhus..NtIGlaK+uppaoSsKEhphSIulslPspccshaGlsch.cpuahs.lscppFh....lLpss.FspYsNLtcYIlEsuKluIh.hhppGhphpGhKIph. ..................Mp.Qltl...LlssplspLhEhspl.pl....pc.lpsYhpDlsNtIphD-.L...sDWpLhI.l...shhss..stIGIaK+uh.pasSsKEhploIuIPlPspcpsRaGlsch.....puahs.lsccsFh....lLsss.FuKYDNLhpYIl-suKhAIhshFphGFThpGhKIKK+. 0 1 2 2 +15448 PF15588 Imm7 Immunity protein 7 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly all-beta fold and a conserved arginine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a Pput_2613 deaminase domain [1]. The protein is also found in heterogeneous polyimmunity loci. 25.00 25.00 32.80 31.90 23.80 21.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.20 0.71 -10.31 0.71 -4.05 32 42 2012-10-05 20:13:00 2012-10-05 21:13:00 1 2 40 0 14 45 1 117.50 21 78.64 NEW .hpFpAphlsspt-sc...hhhlGhAD.cc.ssppallhQRsh....c-pDtptshsuha..sps....sstssYsslcplpLppspltlplpssshh.....slsls..plplsh........ph......hphhpphhpplLp. ...thpFpAphlssph-sc...shhlGhAD.cc.ssppYlllQRsh....--pD.p.shssha..hch...tsspssYsslccltLppsplhlplpcss.t.....slshs..plplsh........sh.......hphlpphhpplhth......... 0 5 8 12 +15449 PF15589 Imm12 Immunity protein 12 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved WxG and YxxxC motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the NGO1392-family of HNH/Endonuclease VII fold nucleases [1]. 29.00 29.00 32.50 32.20 25.70 24.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.46 17 45 2012-10-05 20:14:09 2012-10-05 21:14:09 1 1 43 0 16 45 1 144.10 33 88.74 NEW pWVEShGGPLlsVPtosLshWsGss........s.spsssDYDRACsVDGhsGllslGs..up.....ALVLGDEPAsTsYL..PcHtsF..lRW......hAADS.....Es-Llusscssls..uspW-..spspWpss...GPsVLhDuA..........aPG.usss...sphtVsLssGRatVRAscscs-tcoh. ..........pWlcotGGPllhl.psshthWpGsp.....................s.sDYsRACsVsshlGllslss..tp.....ALVLGDEPhsTsa..l...scc..tsF..lRW......hsA-s........Es-llsuscsshs....sstWp..spsp..Wpss...GslVLFDSA..........hPssphs...spl.lsL...sGpatV+ssphpss..s.............................................. 0 4 10 15 +15450 PF15590 Imm15 Immunity protein 15 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved aspartate and GGxP motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox10 or Tox-ParB families [1]. 25.00 25.00 69.80 69.60 19.20 16.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.29 0.72 -4.89 5 9 2012-10-05 20:14:46 2012-10-05 21:14:46 1 2 9 0 1 8 0 69.80 37 13.46 NEW Dsls-RIEsLIsNp..LpclAh.....pPD..uWEsLYpDPcDGRaWE+lhscSchcGGGPPpLpslSQsss+s+Ypl ..Dsls.RIcsLlsNp..Lpcluh.....pss..GWpphapDPsDGRaW.hshspSchcGGGPPpLpslSQ.ps+spYph..... 0 0 0 1 +15451 PF15591 Imm17 Immunity protein 17 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly all-beta fold and a conserved GxS motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox17 or Ntox7 families [1]. 25.00 25.00 73.40 73.30 23.10 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.27 0.72 -4.21 6 26 2012-10-05 20:15:26 2012-10-05 21:15:26 1 1 26 0 3 9 0 77.40 76 97.53 NEW M...KhsFYp.Vclhp..ssst.....cltGc+GVVlGhSE-DsthaGYuVlIa.-hcpsh.l-cc-ltsTGphhsR--FY ..MTNLKLDFYSEVIIKDSCPNDLLENGETIKGKKGVVLGISEEDGIIYGYTILLF.DIKYCIYIDKKYIIPTGKKFSRDDFY 0 1 2 2 +15452 PF15592 Imm22 Immunity protein 22 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved SF motif and tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox21, Ntox29 or Tox-ART-RSE-like ADP-ribosyltransferase families [1]. 23.30 23.30 25.40 24.90 21.70 21.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.65 0.71 -3.77 13 133 2012-10-05 20:17:06 2012-10-05 21:17:06 1 1 76 0 12 78 1 110.90 58 90.11 NEW +Nlsps.pa-EsSFIuplh-pupWscccYW+LEpsLhpl...sp...phphsc-lspplhhuhhpllshlh.thsstc.apIps..h...........cshc....I.shhERhchlhptlFs..scslcpstF.hh ............RNIPsFEcYDENSFIGKWYD.DGVWDDEEYWKLENsLIEV...R+...KYPYPMDIPRDIVIGIGTII-FLM..VsNWKLFcIKu..SP.......WLPcSVt....IpERYERh+sMLRYIFT..-hDl.NspF.Y..................... 0 5 8 9 +15453 PF15593 Imm23 Immunity protein 23 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox18 family [1]. 25.00 25.00 26.00 25.80 24.40 24.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.98 0.71 -4.00 26 70 2012-10-05 20:18:30 2012-10-05 21:18:30 1 1 56 0 16 50 0 150.50 23 94.87 NEW MIaG-...P.pFulhh-h.lppWs.sss..a.....ppGhFsaaIsuchhPs.hhsso..LtssltpL..pph..........thhsthpsppLFshsstchhphl..............hphh.sphts....pt.ch.......aphhloh..shsDs...Gh.lFh..lps.s-ph+llatp.................pstslp..-hhLptsphppllppl.t .......hhGp....pcFuIhh-p..pchs..ts..a.....h.GhhphhIssphaPp....t...hho..LssshpsL..csp..........th.pphhssthhsL-htc.thh.l...............hchsp.phss.......ph.......hshhhsss..shp..-...shslhh..hps.s-p-RLhath..................sspshc..Ehhhc+Gplppll.pl.p....................... 0 2 7 11 +15454 PF15594 Imm30 Immunity protein 30 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an all-beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-HHH or Ntox24 families [1]. 23.20 23.20 25.00 25.00 21.40 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.47 0.71 -4.03 40 70 2012-10-05 20:22:57 2012-10-05 21:22:57 1 1 61 0 24 80 1 120.60 23 86.37 NEW pplhssptlpplasp.hPshpcsclhslplc+pss.........pLslph.....hhpchsps..Pp+W..spass........hhlslsFhslppLpl.....suhuppshlsplplppssst.h..........plslps.sssh......phpC....cahplpsls.sa .....hhssptlhslaGp.hPshccs-lhslplpRsss.........pLslph.....hspc.spsh.Pp+W..scass.........hhlplsFhslpsLpl.....puhuspsllsphclp.ph--p.h..........hlcIcs...psph......hlpC....cahclcsls.s................................. 0 4 11 16 +15455 PF15595 Imm31 Immunity protein 31 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved tryptophan and Dx[DE] motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-RES or Tox-URI1 families. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 29.80 29.80 35.20 34.10 25.50 25.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.41 0.72 -4.28 33 97 2012-10-05 20:23:36 2012-10-05 21:23:36 1 2 91 0 16 77 0 106.30 27 70.46 NEW p.hhhlcpc..tshslsL.l.tctchp.hsp+hc-..Gh-u..sGYsWtulhpsalpcct..PcL..h-clcaDPEAGhFsA.Y....upst-sLcchsthlcchh-scphlhcll..pts-lp. ...................................h.lhphp...shslsh...pppcshthsphhpp..shcu..sGYsWcuhhphalpcht..Pcl..h-thphDPEAuhFsA.Y....upsh-slpclsphlpshh-s-chlhphl..pts...t........... 0 5 12 14 +15456 PF15596 Imm34 Immunity protein 34 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly alpha-helical fold and conserved aspartate and cysteine residues and an SE motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the LD-peptidase or Tox-Caspase families [1]. 26.50 26.50 26.50 70.30 25.60 18.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.32 0.72 -3.89 3 76 2012-10-05 20:25:01 2012-10-05 21:25:01 1 1 76 0 2 35 0 91.60 92 74.39 NEW MA-+uILsSLlusposEu+uACSto.aACsuA-hAELGLAhluuSc............................ScAuscuLVNLhRaRlDGALSE-YsCYLLo+G+ulsstLc+LsAKpLAucChpTFsclK+R.sht ......................hADKuILWALISASspEGRKACSLSYFuCKAAE.AELGLAYMAAND............................NKEFLTSLSNIMRYKIDAGLSESYTCYLLSKGKIIRPYLKNLNPLQLAADCIETVNKIKDKNKK... 0 1 2 2 +15457 PF15597 Imm36 Immunity protein 36 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved [DE]R motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox13 or Ntox40 families [1]. In some proteins this domain is fused to the Imm38 -like (PFAM:PF15599) immunity domain. 25.00 25.00 26.60 26.40 19.70 19.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.22 0.72 -3.86 31 66 2012-10-05 20:00:33 2012-10-05 21:26:13 1 3 33 0 6 64 0 92.00 35 93.27 NEW .slcp.Kp-l.ppIcchGY-SL+YslFs..Epp..pt.Wts+l.Ycs..sca.V.upcDRu.lsG+.hEFssFp-AKc+FlchL-hhVptN+htlcpGhss.YsSPLWD .................phtp.+t.l.ptIp.hsapsL+.YslFp..cpp..ph.ats+l.hpp..spa.VYsTsDRuShs.GK..hpFpsFp-AhcpFlphLp.hVh.N+htlcpG.sspYssPLWp.......... 0 3 3 6 +15458 PF15598 Imm37 Immunity protein 37 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved arginine. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox40 family [1]. 25.00 25.00 30.30 29.60 22.00 23.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.76 0.71 -4.37 27 97 2012-10-05 20:27:06 2012-10-05 21:27:06 1 2 64 0 16 59 0 153.10 32 84.76 NEW ..tchhphsttuGhshssssss................hhhssssptthhlhppsut....ah.h.....sps-Rupttt.....hh...hssshsslc+allhhhusplRts..hpLPtlthPhs.pslssuaslpphs.t.........ht.lhsspssslshshsst.....hphssLSahhshshp-Lhs.ualc....ssG....tPLh .....................................scl.sahphuGhshhtsscs...............hhhhspGucsthalpchsGh....ah.l......spo-Rhsccs....hph...uuuShsllEKYLhshaGsslRuc..+cLPsl...psPaps--lhPpaolsshshs........hss.LlsssGosluhssssp.......LlpLSHhLslolsslcc.SaLDs-G...pPLh............. 0 8 13 16 +15459 PF15599 Imm38 Immunity protein 38 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved E+G and ExxY motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox40, Tox-CdiAC and Tox-ARC families [1]. The protein is also found in polyimmunity loci in polymorphic toxin systems. 25.80 25.80 25.80 26.80 25.40 25.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.43 0.71 -4.59 30 130 2012-10-05 20:27:51 2012-10-05 21:27:51 1 2 118 0 19 109 1 116.40 27 77.94 NEW .tclpsclhclut+lusss..ths.hhstp....t....hs-uts.lhhssst....Y+hhhhERGphhpccpTssh-ElLYWlhcslspshAhchthpptss.........c.pshR+lhascph..cLlsslssc...WupcttpchsthL .....s.chpphh.chttchsh.p...phs.hhhtp.....t......stGp..h.hsppt....YhhhhhERGpt.schpTssh-EhhY.lhpclshh.huhcathc+tht.........c.pctR+ltFpKpl..pLhstlNss...Wtccstpphsthh..................................... 0 2 8 12 +15460 PF15600 Imm39 Immunity protein 39 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved DxEA motif and arginine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-ColD family [1]. 25.00 25.00 27.40 32.10 24.60 24.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.36 0.71 -4.43 8 16 2012-10-05 20:28:56 2012-10-05 21:28:56 1 1 16 0 4 13 0 175.80 43 95.36 NEW MuSaIulG...........hVYc..s....csphpslIcha...l-LlusIssl.........Kh.s.hscsh-u-lhpshsl-chch.cssauplo.......cIpscllp.cslphhlRh.cEpcY.GlLhDIuE-cLltspSs-plE...pcLIsllhpIYpsssY-auFCDsEAEl-h..sPpslcPl.cssYullhhPc..Nsch+VhhssWcIDGlTpRpc+... MuSalSsG...........CVYK.......puShKNllKhY...IDLluShANl.........-IENllChssEu-lppTcsLEElch.-ssFSplT.......cIsCK.lc.csl-hslRhhcEpsYpGVLFDlS.DaL.EhhSscpLE...NcLIsIlhsIYshhPY.FuFhDoEAEl-h..NPpslcPl.csPYAhLlhP+..Nsch+VYhssWcIDGlopRpc....... 0 2 4 4 +15461 PF15601 Imm42 Immunity protein 42 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved tyrosine and tryptophan residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-REase-10 family [1]. 25.00 25.00 30.90 30.90 22.10 22.00 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.59 0.71 -4.74 33 74 2012-10-05 20:38:09 2012-10-05 21:38:09 1 1 69 0 17 69 1 125.60 35 92.37 NEW VGhclshhha-lGpsshlauFFSTlshpLEpstWGo+aPhLhscL.YpGpLph-clppAhpELcpIcptLpphs.PspVl..WDh-sl.sppPPW.usplusslssLusYFhTscGcshh-lltcAl-pA.............hcpphclhIp ........VGhtlshhhaplGpsshlauFFSTlshplEsptWGo+aPlLMpcL.YpucLphcclppAlpELcpIpp.Lpphs.PspVl..WDh-cl.sppPPW.GssIssclssLusYFhTssGpshhcllhculppu.............hc.phsl.I.p......... 0 4 10 13 +15462 PF15602 Imm43 Immunity protein 43 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly alpha-helical fold and conserved arginine and phenylalanine residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox48 family [1]. This domain is often fused to the Imm72 immunity domain. 27.20 27.20 29.70 29.20 25.30 19.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.10 0.71 -4.30 15 39 2012-10-05 20:41:16 2012-10-05 21:41:16 1 2 20 0 15 42 0 171.40 31 51.90 NEW hss-hpRRplFaLLK+hoSaThWpRhh-hapsFAsshEcsl+phsts...-.stl.sschssILcsLA+hEcGlpcLp+G......................s+pla+hstshE.....huhsc.....t...pss......uaW.cth.phtshts.pls.sh.th.......hphsPhtsth.ppsltpLtssht.lssshhEstFh-.shP.ha ..s--hsRR+lFaLLp+hTSaohWcRth-hattFAstaEpsV+shPps........-.ptl.ssplstIhchLAth-cGlpcLt+G......................sRhVaphGpsh-.....huhcc.s.hsshhasps......saWccth.phtsh.s.pls.shsth.......hphuPhtst..ppshtpltshth.l.sstY-ptFhshshP.sa...................................................................................................................... 0 0 0 6 +15463 PF15603 Imm45 Immunity protein 45 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-ARC family. This domain is also found in heterogeneous polyimmunity loci [1]. 25.00 25.00 29.20 28.20 23.80 22.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.59 0.72 -3.84 16 55 2012-10-05 20:41:55 2012-10-05 21:41:55 1 1 53 0 12 43 0 75.10 44 98.95 NEW Mhlss.ssutlpl-lspGthhph.pGEhlhs.............spFllahsolppW-sPa-s..lotsEhppIlctlpcphscpshplsF- ........MKIoG.spStlpFDLENGallKA.pGEhLls.............GcFVVa+DSMcsWEPPaEscplopsElpcIIppVcppps-pTlplsF-.... 0 6 8 9 +15464 PF15604 Toxin_43 Putative toxin 43 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a most all-alpha helical fold and a conserved HxxD motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion systems [1]. 25.00 25.00 27.70 27.00 24.50 22.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.16 0.71 -4.17 30 88 2012-10-05 20:43:07 2012-10-05 21:43:07 1 8 64 0 11 97 2 123.50 36 39.38 NEW ..sppcttEFcRQLcsQEcGLNcLTV-EalcNRptahsss...psp-sssspcARcc..hpcclc-phpppl....utp.....h..pAcppApcphpolAALHNPDhlAGGcsh......................IushGD+plNSSIGuQW+.......sRIssL-ptAccsspsh....ppssthNlKLp .......................................t.p.tEFt+QLpsQppGhNch...TVcEalps.t.hhtts...........hthpsthtppstpc.....hppclpchhppth...ph.........tApptAtch......hps.AALHNPD.lAGGps.......................IsshGD+plNSSIGsQW+..............RlttlDtthpp.hscph....htsshhNl+L........................ 0 3 6 8 +15465 PF15605 Toxin_52 Putative toxin 52 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all alpha-helical fold and conserved aspartate and glutamate residues, and K[DE] and[DN]HxxE motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5 or type 7 secretion system [1]. 25.00 25.00 27.70 27.40 20.90 20.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.21 0.72 -4.14 8 23 2012-10-05 20:49:50 2012-10-05 21:49:50 1 7 23 0 4 26 0 91.40 45 7.60 NEW stLosuQpssls+l-Nh.......IpsHLTDtDhoGThRDLsGsPV.PKss..GGYWDHlpEMpDoh+GLpNhp+sLcs....LsNPshsppstthhQutLscAspplcKIEshhcs. .......stLsppQcuulc+IDNs.......IcstLpDpDl.GTL+DhsGpPV.PKps..GGYWDHhQEMpNoh+GL+NptcTLcs....lsNPp........hQutascAoctlsKIEshl+.u. 0 1 3 4 +15466 PF15606 Toxin_55 Putative toxin 55 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha helical fold and conserved lysine and cysteine residues, and GNxxD and WxCxH motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system [1]. 25.00 25.00 28.40 27.60 20.90 17.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.49 0.72 -4.01 5 65 2012-10-05 20:50:46 2012-10-05 21:50:46 1 10 64 0 3 72 0 75.50 87 9.54 NEW GhAlu...shMAuPGNQADTGIsccVs-lhs-thhsuGK+PDRC-VLQpLID....sGsIuAKs.AKuTQKAWGCRHSRHS+D+ .....GLALD..ITMIASRGNVADTGITDRVNDIINDRFWSDGKKPDRCDVLQELID....CGDISAKD.AKSTQKAWNCRHSRQSNDK....... 0 3 3 3 +15467 PF15607 Toxin_60 Putative toxin 60 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha-helical fold with conserved DxK, GNxxxG, and DxxxD motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6 or type 7 secretion system [1]. 25.00 25.00 32.20 29.50 24.60 23.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.48 0.71 -4.04 62 137 2012-10-05 20:52:01 2012-10-05 21:52:01 1 12 114 0 26 125 1 105.80 30 31.36 NEW shhhahppVpssusWDaK.pht.th............................h....tth.hhh-sauNhHYGalGpAh..GhspshLhpuAGhsQhhssstp......................................shhDDssDphsIphGlchapp. ..........................................................shahttVpssusWDaK.pht.th...........................hhhp..tphthhaDsauNlHYGYVGhuh..Ghs-shLLtuAuhtQhhsstt.......................................shhDs.sDptsIphGhphapp.p.... 0 10 18 24 +15468 PF15608 PELOTA_1 PELOTA RNA binding domain Anantharaman V la_psag Anantharaman V Domain This RNA binding Pelota domain [1] is at the C terminus of a PRTase family [2]. These PRTase+Pelota genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo-nucleoside involved in stress response [2]. 25.00 25.00 25.10 47.60 23.60 23.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.34 0.72 -4.24 65 343 2012-10-10 14:40:03 2012-10-06 20:01:06 1 7 333 0 72 266 2 102.60 39 24.86 NEW psps.sp..ht.hhs..spps..phpuh.pslcpltpcasIsslNhlKPGlGEsTRVLLRRVP.+lLl+.stss....s-lsHlhhLAcc+GVs..Vcphs..sh..sYsslulI+slt ............................tsh......hth.hpsspshphpuhpslppl.uccapIsslNhIKPGluEsTRslLRRVP.+lLVR.sh..ss....PDlshllhLAc-KGlsVcEhs.sh..tYpslslIKplh........... 0 20 44 60 +15469 PF15609 PRTase_2 Phosphoribosyl transferase Anantharaman V la_psag Anantharaman V Family This PRTase family, with C terminal TRSP domain, are related to OPRTases [1], and are predicted to use Orotate as substrate. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 35.20 35.20 39.00 40.70 31.40 30.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.26 0.71 -5.00 67 330 2012-10-10 14:25:38 2012-10-06 21:18:51 1 4 322 0 70 261 4 196.20 43 44.89 NEW slscL.....hshAtR.+NPKRuFLFVS+VLGKHlPlsPsshhsshppLApthstpl...........................................................................sts................slhIGhAETATuLGpuVacsh..p.s..tp.........YLHoTR+s.lss.....p..hspFcEpHSHAosHlla.sss.shhps....su...s.LVLVDDEhoTGpThlNllpuLppt..hP...h.cchllsoLlDW............ps.t..ssltpcls.l.....lssVSLlpGph ..............................s.lcsLhslAtR.R.NPKRAFLFVS+VLG+HlPlpPushhpshppLApphss.sL...............................................................sss.............lLhIGhAETAsGLGtuVacplptpp......hYLpSTR+slsu.....shhspFcE-HSHATsHLlYhsss....th.hpp.....uc.slVLlDDEhTTGsThlNllpAL+sp..hs...hcphlssoLsDW........pt.......sslscch.s.l..lssVSLlpGp......................... 0 20 44 60 +15470 PF15610 PRTase_3 PRTase ComF-like Anantharaman V la_psag Anantharaman V Family This PRTase family is related to the ComF PRTases [1]. These genes are found in the smaller biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 110.50 110.50 192.20 191.90 29.10 28.50 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.79 0.70 -5.48 17 23 2012-10-10 14:25:38 2012-10-06 21:57:01 1 1 23 0 9 24 0 273.30 30 91.63 NEW Mhs.thSLHpItsss..phsFssssYSRaKaGDsphAcpFGppLhcuFIsp...........htstphps-plVhlsSsapplPTAopsLppaFlppLNpaLhppstpsslpsKlhRsto.hp..DYusLshE-R.pl.ts-saaIDpphl...pG+sllFlDDI+lTG.S+Ectlhp.hpphtlcs...chhaLhhs-..ppsh..csslEspLN.hsVcohpD..ltplhpussFphNpRhVKalLsts.s-.ashFlp..p.scshhppLhchAluNsYH.p.ppYpsNlphLt..l.....shtuh................. ......h...thuLapIhsss..phsFssssYSRaKaGDsphActaucpLhcsFlsp...........apsthhps-plVhlsSsapslPTAossLppaFVccLNpaLscput.sslpsKlhRhto.sp..DYusLshE-R.pl.tsssaaIDp-hl...sG+plIFlDDl+lTG.S+EcpVhp.htphslcu...chhalhhs-..spsh..csslEs+LN.hsVcshhD..ltpllsussFphspRhlKalLutsppc.hssFlp..phssshhppLachAluNpYa.p.ppYtsNLshLp..l......h....... 0 3 5 7 +15471 PF15611 EH_Signature EH_Signature domain Anantharaman V la_psag Anantharaman V Domain This domain with a strongly conserved glutamate at the N-terminus and a histidine at the C-terminus [1], is found in a SWI2/SNF2 four gene operon [1]. Its strict-neighborhood association with\ \ SWI2/SNF2 ATPase strongly suggests a function in conjunction with it [1]. The other genes in the operon are a OmpA protein and a TM protein [1]. This has a DNA related function along with the TerY-P triad [1]. 25.00 25.00 41.50 40.40 23.20 23.10 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.60 0.70 -5.41 79 135 2012-10-09 15:52:58 2012-10-06 22:16:26 1 1 129 0 40 115 7 383.70 22 79.64 NEW h.phshcplhstlppht...hstplps..........phhhhshs..h.h.....p...th.t..sp.........t.hhshlhp.........ht...ph..pptshtsLhpsYhstas............pthtshh..........lttph...thppt..........hhphhpphp....tlhsspss.th.lspphhp..st.ts.hptl..t....phslss....ssahtpshhthhlpphtphp....spthl.............ccl.ph.h...lh....ps......hhphtsphhpthLphah.p..pt..t..pphcpt.lpphl....ls...hhG-Ppltss..p......Wtt........lsc........pstphlptWLscp...slchFFpllp..t.st................h.ptRtpFWhpahcp..lspshlhhuss..Ahp.hppthtt...tth.s.phu.php........p.s..spsshlhpl...s....sh.hhVE............asssupshhhap.ps...ph...........s..pthp....h.....thstpl.....+pts..........sh.t................ph....sH.ps...............sWpt+hpphL ................................................................................................................................ht....shtplhstltphp...htp.lss..........plhhhslsh.h.tt.tp..tph....sc....plhstlhc.........hhpp.pphshpuhhptahspas..s...t.cshtsah..........lttph..pspcp.............ph.p....pllsssus.phlucphhp..sp..hsh.chl..t....phtlpp...tupahstshhhhhlcplspls.....-cshl..............-cl.ch.h..lh..cu.........phctpshlhptlLphhh..t..ps...s..sphpcshhphl....Ls...hhGDP+lsssssp....Wpp.........lp.............shhphlhsWLsch...DLchFhchlpths.t.............thh.sRcpFhpthhct..lsps+lhLu.c..Atphhpppht....cph.spauplp......csspuhlhhph.s....sh.hhVE...........a.csu.ssh.asYp.hs.p..............s.pphp..h.....shssph...........pchs.p.....sht...............+lsHss...........uWppKhht................. 0 10 19 29 +15472 PF15612 WHIM1 WSTF, HB1, Itc1p, MBD9 motif 1 Iyer LM, Aravind L la_psag Manual Family A conserved alpha helical motif that along with the WHIM2 and WHIM3 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins [1].Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [1][2]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. The conserved basic residue in WHIM1 is involved in packing with the DDT motif. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA [1]. 16.70 16.70 16.70 16.70 16.60 16.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.02 0.73 -8.48 0.73 -3.75 36 528 2012-10-09 18:07:10 2012-10-07 01:56:28 1 70 195 2 305 505 1 49.50 21 3.66 NEW pptshspthpptsa.............hplsspp+lplLphLschh...hsssshpshlppptctt .....................................h....hh.thpppsa.............hplssppKlplLphLscph...lsottl+shlppp....h........................ 7 53 121 197 +15473 PF15613 WHIM2 WSTF, HB1, Itc1p, MBD9 motif 2 Iyer LM, Aravind L la_psag Manual Family A conserved alpha helical motif that along with the WHIM1 and WHIM3 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins [1]. Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [1][2]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. The acidic residue from the GxD signature of WHIM2 is a major determinant of the interaction between the ISWI and WHIM motifs. The N-terminal portion of the WHIM2 motif also contacts the inter-nucleosomal linker DNA. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.77 0.73 -7.97 0.73 -3.82 15 298 2012-10-09 15:57:33 2012-10-07 05:24:23 1 45 177 0 195 286 0 41.60 35 3.52 NEW RppsLGhDRpts+Yahh........................tsspslhltcp.s ...RtpPLGpDR.tNRYWaF.....................................s.....t..tpsthahp......................................................... 0 38 95 146 +15474 PF15614 WHIM3 WSTF, HB1, Itc1p, MBD9 motif 3 Iyer LM, Aravind L la_psag Manual Family A conserved alpha helical motif that along with the WHIM1 and WHIM2 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins [1]. Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [1][2]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. WHIM3 along with WHIM2-N constitutes the inter-nucleosomal linker DNA binding site in the major groove of DNA. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA [1]. 18.10 18.10 18.10 18.10 18.00 18.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.32 0.72 -4.18 35 259 2012-10-09 17:50:33 2012-10-07 06:11:50 1 43 130 0 154 252 0 77.50 32 6.14 NEW WthhpstcplcpLh..csL.................................................................................................................................................................................sscGhR...EppLpppLtphh....pplpssh .....................................................................................................................................................................Wthhscscpl-pLl...puL..............................................................................................................................................................................................pPpGhREtpL+ccLpp............................................................ 2 27 51 96 +15475 PF15615 TerB-C TerB-C domain Anantharaman V la_psag Anantharaman V Domain TerB-C occurs C terminal of TerB in TerB-N containing proteins. This domain displays multiple conserved acidic residues (TerBC) [1]. The presence of conserved acidic residues in both TerB-N and TerB-C suggests that they, like the TerB domain, might also chelate metals. These two domains might also occur together in the same protein independently of TerB [1]. 26.30 26.30 27.20 27.40 25.40 24.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -11.01 0.71 -3.94 140 224 2012-10-09 17:52:00 2012-10-07 20:41:01 1 11 214 0 64 226 6 149.20 21 22.62 NEW slcshts........................tpptst.ss...................lplDhsclsplcp-sspspthlss...........lhs--c............tp.........................................-.t...p......................ht.p..p...ts..............pssss.......................................................G..L-s.......sctphLchL..........lsc.pp..W.ctchpphstph.............pLhhsssl-pIN-hsa-ths-sllE...s..s-shtlst...-hhccLp ..................................................................................................................................................s........................p.......hplDhsplstlcp-sppsp....ss...........lhs--p.....t...............................................p..p..................t.t.ps....s........t.tss..........................................................h.u.Lsp.......schphLchL.....lsc.ps.a.htphpphspsh.............plh.lsshl-pIN-tha-hhsssll-...s..s-shtlsp-htpcL........ 0 21 41 52 +15476 PF15616 TerY-C TerY-C metal binding domain Anantharaman V la_psag Anantharaman V Domain TerY-C is found C terminal to TerY-like vWA domains in some proteins [1]. It has 8 conserved metal chelating cysteines or histidines [1]. It occasionaly occurs as solos [1]. 28.40 28.40 29.30 75.20 27.60 27.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.31 0.71 -4.22 48 97 2012-10-09 18:02:15 2012-10-07 22:23:15 1 2 92 0 17 68 6 127.90 48 40.28 NEW ssshD-shVllsG+C....s+T++PYLhKYER......t..sh.t....h.plphpta..p...lsGsaPlD.csYhsa.SDspssstplNTo-LhGs..PGCPaCGNthuaAhC..s.CG+lhC.l.sG.sc......pssCPWCsp.sssau.sss....ss.FDlsRGcG ..spAaDENCVTLsGRC....SKTRRPYLhKYERP....sspl.ouLs....F.pLNlsuF..NluGCYPID.EDYFuWS.Dtosou.QVNTS-LhGs..PGCPaCGNtsAFAhC.s.CGKLhClsG.sc......cVhCPWCtpshsausss...tssFDlsRGRG..... 0 5 11 14 +15477 PF15617 C-C_Bond_Lyase C-C_Bond_Lyase of the TIM-Barrel fold Anantharaman V la_psag Anantharaman V Family This family of TIM-Barrel fold C-C bond lyase is related to Citrate -lyase. These genes are found in the biosynthetic operon, with other enzymatic domains, associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 217.60 217.60 221.30 220.40 214.70 214.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.11 0.70 -5.63 70 344 2012-10-10 15:06:27 2012-10-07 23:05:45 1 2 337 0 72 251 6 324.30 42 93.39 NEW phhsauLGATLYhPATRpclAcslhcp+hsGlpSlVlCLEDAlu-p-VshA.pNLhphLppLsst............................ptsshPLlFlRsRssc.hppLssth......slphlsGFVlPKF...............stpshpsah-hltp..ss...............LhhMPsLEo.-lh....csppLtpltphLspa..+-plLAlRIGusDLhulhGlRRs+chTIYD.ssluslIsslVslFutt....uaslouPVaEaa.ss.p..............................................sLhRElsLDhANGLlGKTsIHPSplssVpuhhhVopE-apDAhcILs.s.s.p......uVhKSs..spMsEstsHppWApclLtRAclaGV.hscstshsphh .......c..sasLGATLYhPATRpcIA-sllcp.Kh.....s.GlpSlVlCLEDAlu-s-lshA.pNLhphLppLusthtt.........................stsshPLlFlRsRpsp.hptLhsch..............slptlsGFVLPKF...............T.sshssah-hhsssp...............LhhMPsLEot-lh....cstphtpluptLcpa..+-RIlALRIGusDLhulhulRRs+chTlYD.sshu.lIt.lVsVFu.t....sFslTuPVaEahsspp...........................................sLh+ElsLDhApGLlGKTsIHPuQIpllpshhhVopc-asDAhcILss..sp......uVhKSp..stMsEstsH+pWAppILpRAchYGlhs-pss.....h........ 0 23 45 62 +15479 PF15619 Lebercilin Ciliary protein causing Leber congenital amaurosis disease Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95447 Family Lebercilin is a family of eukaryotic ciliary proteins. Mutations in the gene, LCA5, are implicated in the disease Leber congenital amaurosis. In photoreceptors, lebercilin is uniquely localised at the cilium that bridges the inner and outer segments. Lebercilin functions as an integral element of selective protein transport through photoreceptor cilia. Lebercilin specifically interacts with the intraflagellar transport (IFT), and disruption of IFT can lead to Leber congenital amaurosis. 26.00 26.00 26.00 26.00 25.50 25.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.94 0.71 -11.35 0.71 -4.88 31 158 2012-10-08 11:36:17 2012-10-08 12:36:17 1 4 81 0 93 142 1 180.10 39 32.43 NEW hls+RlLSARLhKIp.ELcN-ls-lph+L--ltpEN+hLKpLQhRQcKALs+aEsopsElPQLlspHpsEl+sLRccLRKoQE+ERshc++L+-s-scLh+s+ssLp+Lp+Lu-D+pLsER-ELsc+Lsthps+hpps-++IppLEKpLcLsspuFpRQLtsEp+KstpApppsctLppElppLppKLKEKEREL ..............h.hs+RlLSARLhKIp.cLpN-ls-lph+LpplhpENchLKpLQhR.ppKAL..sKaEsspsclsQLl.t+HpsEl+sL+ppLRKSQEcERssp++l+-s-s-Lh...+.......s+ssLp.......c....LpcL.u-s+pLsE.R..--L...sc+Lsthpt+h-ss-++Ipp.........Lp+...pLcLsspsapRQLhsEp+KshpspppschLphElppLppKLKEK-+pL............................... 0 25 30 57 +15480 PF15620 CENP-C_mid Centromere assembly component CENP-C middle DNMT3B-binding region Coggill P pcc Pfam-B_64009 (release 26.0) Family CENP-C is a component of the centromere assembly complex in eukaryotes. CENP-C recruits the DNA methyltransferases DNMT3B, in order to establish the necessary epigenetic DNA-methylation essential for maintenance of chromatin structure and genomic stability. This middle region of CENP-C is the binding-domain for DNMT3B. Binding of CENP-C and DNMT3B to DNA occurs at both centromeric and peri-centromeric satellite repeats. CENP-C and DNMT3B regulate the histone code in these regions [1,2]. 27.00 27.00 183.80 183.80 25.00 25.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.95 0.70 -4.67 13 40 2012-10-08 12:56:00 2012-10-08 13:56:00 1 5 23 0 16 32 0 248.70 59 31.10 NEW hKL.LEDEFIIDESDpSFASQSWITIPRK.ussLKQpoVsPsESTAlLQuKKSREKHH........slSPpTLTSDKHScKA+PVEKSQPScp++LupSCsLosEhENssRSTKpEhaSENAcKsSGsKRTlcQK.Q++KhKsNlsEEplchtQSK-cNlNh..SHIsQDKLQRNSDRNMc-sEEhpNsslSKKQhPsV.GsKKpoo......ppKK-K.cuKKK+FSstSpKNKlVP-EVT.TlTRSRRISRRPSsWWVVKSEpSsl.SN.oSlRNELsV ....hKLlEDEFIIDESDpSFASpSWITIPRK.uGsLKQ+slS.PAESTAlLQu+KSREKHH........sl.PpTLsssKHScKsHPVEpSQPS-cphLssShALosEhENshRSTKaEMaScNAcKsSusKRTIKQK.QRRKaKApsuEEQLDhGQSKDENIph..SHIsQDKhQRNSDRNMEEpEEhtN-sl.SKKQMPPV.GSKKsSs........+KDKtEuKKK+FSsES.KNKLVPEEVTSTVTRSRRISRRPS-WWVVKSEESsV.SN.SS.lRNELsl... 0 1 1 1 +15481 PF15621 PROL5-SMR Proline-rich submaxillary gland androgen-regulated family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:P02814 Family SMR is a family of proteins found in eukaryotes. The family of SMR proteins is expressed in the submaxillary gland. SMR members may play a role in protection or detoxification. 24.90 24.90 25.00 24.90 23.10 21.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.82 0.71 -3.64 16 60 2012-10-08 13:28:38 2012-10-08 14:28:38 1 1 16 0 13 58 0 99.30 40 68.32 NEW MKsLsLlhGLWsLhuCFpsuEspRGPRt.asPt.......ssPPs..PatsthhPsPsPPsaGsGhssP.P.......husGhh.Pss..............s.htsh.......l.ssss.P..sPuhP .MKsLshlLGLhALhuCFpsuESpRGPRtPYsPG........sPP...PatPtaVPPPsPPPaGsuphsP.s.......assshh.Pss....s.s..........s.................................................................................................................. 0 3 3 3 +15482 PF15622 CENP_C_N Kinetochore assembly subunit CENP-C N-terminal Coggill P pcc Pfam-B_21609 (release 26.0) Family CENP-C is a vertebrate family that forms a core component of the centromeric chromatin. On depletion of CENP-C proper formation of both centromeres and kinetochores is prevented. The N-terminal of CENP-C is necessary for recruitment of some but not all components of the Mis12 complex of the kinetochore [1,2]. 27.00 27.00 126.40 104.90 17.80 17.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.74 0.70 -5.16 10 50 2012-10-08 13:43:13 2012-10-08 14:43:13 1 6 23 0 19 35 0 223.80 58 35.09 NEW DHLKNsYRRRFCRPS.RAPsINTEQGQNlLEILQDCFEEKShANDFSsNSTKSVlhSTPKlKDhClQSPSKE..CQKSHP....KSlPVSSRKKEusLQhhlEPSEAusRSVQAaEVHQKILATDVuSKNTPD.+KhSS+KhcD+HuEuDEEFYLSVGSPSVLLDAKsSsSQNAlPSsAQKRETaoScNSlNhLSSSTEIShKT+KRLNFEDKslLKKlEIEscVSclEDKlSEt.QE+KsScTSQKRlQDoEhEIQPQAKKSFSTLFLETVKRKSESSsVVRHsATsPPHsSP ...........DHLKNtYRRRFCRPS.RA.sINTcQGQNlLEILQDCFEEpShAssFSsNuTcSl..ST..KhKD.plQSsSKE..sQcSHs....KSlPVSS++KEusLQh.....sEP....sEssscSVQAHEV+Q+hlusD.VtS+sTss.p+hSS..+ph.pshcs-AsEEFYLSVGSPsVLLD.AKsS..s.QpslsSsAQKRETYT.cNSVNhhsSST-lShKT+KRLNFEDKshL+phEItNpVSc.EDKhSEt.QE++sStoSQpRhpDoE.EIQ.puKKSFSoLFLETVKRKScSSslVRHhAssPsp.sP............. 0 1 1 1 +15483 PF15623 CT47 Cancer/testis gene family 47 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:P0C2W7 Family CT47 is a family of proteins found in eukaryotes. Proteins in this family are typically between 262 and 291 amino acids in length. There is a conserved HIL sequence motif. The function of this family is not known. 27.00 27.00 149.70 149.60 22.50 22.40 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.89 0.70 -4.66 14 37 2012-10-08 14:01:00 2012-10-08 15:01:00 1 1 15 0 12 35 0 262.80 50 96.59 NEW MSsTGDtDPspssQEuPso.tGuQuttutAt-uhutDSuPssG-.....ssPtscsuGlsGP.tG.pEt....EG............GsAEEDSDItss.........E-tptpppstuhshhVsA++FPMsGFRhhFLDLVHShLpRlYaNDHILIcspp.shlhht.ptst.............up.tpstlhhhsphhssussshEGpu.sLh....p.ht.hP..EPs..u-.sE.ApE.........tcssEEusthEsApctscE.....................u.E.tspEsstPE..........chsctQsEc.cEEAQsstu-tcccp.pccppc ..MSATGDtcPsQsDQEAPVSQEGAQAEAutAGstEGuDSGPcSuD.....hVPsAEssGVAGPhcGLuEE.....EGEQt.....tuLAAsPhsGuAEEDSDItss....E....EEEEEtppAsNhDlsssuRRYPhsGhRhhFLDhVHSLL+RlYHNDHILItsRp.uRLMhts+sus............Ps.h.spsslLLlspRLGsGAsuhEGcuLGLl......pEAAoVP..EPsVPA-.AEhApEPt..............EEuAEEt.sEEsAE-.spEc...........u...........uEEPs.....spEAsAPE..........ElTK.Q.EKW-EEAQsusuEEcK-ptpcKstc........ 0 2 2 3 +15484 PF15624 Mif2_N Kinetochore CENP-C fungal homologue, Mif2, N-terminal Coggill P pcc PB002175 (release 26.0) Family Mif2_N is a family of fungal proteins homologous to mammalian CENP-C. On depletion of CENP-C proper formation of both centromeres and kinetochores is prevented. The N-terminal of CENP-C is necessary for recruitment of some but not all components of the Mis12 complex of the kinetochore [1,2]. 23.50 23.50 24.00 24.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.99 0.71 -3.57 44 123 2012-10-08 14:07:44 2012-10-08 15:07:44 1 7 115 0 94 122 0 132.60 24 21.73 NEW cahslGhpuR..........KTGlslc-pGpRDEaGMEslDshFSSP-c..oslp.........................ttt.ppuSp..sMchsss.ssssPsshlss.........tpp.th.sPpu.poP...........t...h....p.tht...toSshppsstpss...................ts.hppch-..h ................................phhplGhtuR..........KTGlsl+cpsp+DEaGMEsl-shFuus-c.ushp........................................p...tp..ssptsSp....shshsss.stssPsshlps.........tpp.hh.s..Pps..pos.............ht...s.ht..us....spR.........s..ht.........pSsh.p....tt.............................................................................................................. 0 20 50 80 +15485 PF15625 CC2D2AN-C2 CC2D2A N-terminal C2 domain Zhang D, Aravind L; la_psag Mannual collection Family Many ciliary proteins are involved in ciliogenesis and implicated for ciliophathies. A recent study has shown that many of them contain various new versions of C2 domains which are predicted to mediate membrane localizations for Y-shaped linkers of transition\ zone of cilia [1]. This is the first C2 domain of ciliary CC2D2A proteins which also have another C2 domain (CC2D2AC-C2) and a new inactive transglutaminase-like peptidase domain (CC2D2A-TGL). 25.00 25.00 26.80 25.90 24.60 24.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.47 0.71 -4.78 20 138 2012-10-10 12:23:49 2012-10-08 22:33:14 1 4 88 0 84 110 0 160.10 33 11.78 NEW tPG-sphhspLsssssls......ssspsPpsEtsRRpphpphphalplhlNs.......ppVspopsp.L...tssFssphscthplpl.p+.hPcslplplaEsss...htsphlupVhlPlPsstssh.hs...............tspphpFoocp.hp.sh.h.s....................ppssphhsGslhhsssWsptpstt.s .............................PtEshlhs.Lshssslo......ssptssp.sEh....RR.pclp+pphalKlhaNs.......KcV.u.pTps+sL..ssDF+lpFsplFsl........pl.hp.hPESlsLplaEsss...sssslLApV.alPlPpsosspups.............................shcphEFSSsptlthsapusGu.............hpsssssphhh...hTSGplshssuWuhscss..l...................................................... 0 31 39 59 +15486 PF15626 mono-CXXC single CXXC unit Iyer LM, Aravind L la_psag Manual Family This is a solo version of the zf-CXXC domain with a conserved CXXCXXCX(n)C, zinc-binding motif. This is, thus far, only detected in the plant lineage in diverse chromatin proteins [1]. Structural comparisons show that the mono-CXXC is homologous to the structural- zinc binding domain of medium chain dehydrogenases [1]. The regular zf-CXXC domain binds nonmethyl-CpG dinucleotides. 20.00 10.00 23.30 13.20 18.10 9.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.49 0.72 -4.02 7 7 2012-10-09 00:30:47 2012-10-09 01:30:47 1 4 4 0 7 8 0 44.10 22 4.97 NEW cssuC.hCpsCtt.s.ssps.s.ttt.tsp...............tsspsspClplcsh cssuC.hCpsCtt.s.ssps.s.ttt.tsp...............tsspsspClplcs.. 0 4 6 7 +15487 PF15627 CEP76-C2 CEP76 C2 domain Zhang D, Aravind L; la_psag Mannual collection. Family Many ciliary proteins are involved in ciliogenesis and implicated for ciliophathies. A recent study has shown that many of them contain various new versions of C2 domains which are predicted to mediate membrane localizations for Y-shaped linkers of transition zone of cilia [1]. This is the new C2 domain that is contained by ciliary CEP76 proteins [1]. 47.00 47.00 47.60 81.10 40.00 46.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.73 0.71 -4.72 16 82 2012-10-10 12:23:49 2012-10-09 01:37:56 1 2 63 0 49 79 0 157.60 51 25.06 NEW hsphssscphLalclhsG+AFl-als.......................ssssophplplpatsQRFsSpsVssssEPsFsEpFLhcLcppshtts...............htsllslspPl+llllpsc.stpp..............pLlusphl-WRplLppss.............shslELtGhsspt.phssGlLplplELlPshpshh ...........p..plDPsRRYLYLpVLGGKAFLEHLp-s-s.................LsGQssSTFTLsLHFRs.QRFRS+PVP.CACEPDFcDGFLLEl++-uhGcusch..........................ssuTTMLSIuDPlHhVLIKTDhhGET..............TLVuSaFLEWRoVLuSpsGhs..........sLoVELhGVG...sEu.KVsVGILsl+LEhaPsLspsL... 0 18 23 34 +15488 PF15628 RRM_DME RRM in Demeter Iyer LM, Aravind L la_psag Manual Family This is a predicted RRM-fold domain present at the C-terminus of Demeter-like glycoslyases [1]. These proteins are involved in DNA demethylation in plants where they catalyze removal of the 5mC base and subsequently cleave the backbone through lyase activity. Orthologs of Demeter are present in plants and stramenopiles. The RRM fold domain is predicted to facilitate interaction of the catalytic domain with ssDNA or regulatory RNA [1]. 34.60 34.60 34.60 35.00 18.20 34.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.40 0.72 -4.34 21 84 2012-10-09 00:46:22 2012-10-09 01:46:22 1 10 25 0 52 90 0 95.50 62 7.39 NEW GTlLIPCRTAMRGSFPLNGTYFQVNEVFADHpSShNPIsVPRshIWsLsRRhVYFGTSVsSIF+GLosEcIQtCFW+GaVCVRGFDRcoRu............P+P...LhARLHhsso ............GTlLIPCRTAMRGSFPLNGTYFQVNEVFADHsSShNPIsVPRphlWsL.RR..hVYFGTSlsoIFKGLosc-IQpCFW+GaVCVRGFDppoRu............P+PLhs+LHhsss................................ 0 10 30 41 +15489 PF15629 Perm-CXXC Permuted single zf-CXXC unit Iyer LM, Aravind L la_psag Manual Family This is a permuted version of a single unit of the zf-CXXC domain that is detected in the Demeter-like proteins of land plants. Structural comparisons show that the mono-CXXC is homologous to the structural-zinc binding domain of medium chain dehydrogenases [1]. The classical zf-CXXC domain binds nonmethyl-CpG dinucleotides. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.79 0.72 -3.95 10 47 2012-10-09 00:22:00 2012-10-09 02:18:17 1 4 17 0 28 56 0 31.40 54 2.09 NEW PcppCsS.pEoG+LCs-pTCFSCNSlREspSQT ...PcppCsS.pEsG+LCsppTCFSCNSlREspoQh... 0 4 17 23 +15490 PF15630 CENP-S Kinetochore component CENP-S Coggill P pcc manual Family CENP-S is a family of vertebral and fungal kinetochore component proteins. CENP-S complexes with CENP-X to form a stable CENP-T-W-S-X heterotetramer. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.57 0.72 -3.80 46 231 2012-10-10 12:36:46 2012-10-09 13:40:56 1 9 190 12 158 473 3 72.70 38 43.37 NEW p+LKuAlaasVGc.ls-E..p.sh....cpshs...so.phl......uALoEllapQ....lp.sl.upDLEsFA+HAtRoT.Iss-DVhLluR+Ns...sL ...........................pcLKuAlahsluplsc-.....s............ctshp........ho.phI......uAlsElsapQ.....................hc.sh.upDLEhFA..............+........HA.tRsT.lss-DVhLlARRsssL........... 0 47 84 126 +15491 PF15631 Imm-NTF2-2 NTF2 fold immunity protein Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein of the NTF2 fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-NucA family [1]. This domain is also fused to ankyrin repeats and the PFAM:PF14025. 26.00 26.00 26.00 32.10 25.90 23.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.32 0.72 -3.99 17 170 2012-10-09 13:50:12 2012-10-09 14:50:12 1 1 121 0 11 48 1 67.30 70 58.81 NEW sAlclAEhhlh.lYGppl..ppKPahlshpssp..WllpGoh........ssp........hhGGshhIhIpKpDG+lLplhHsK ...AlpLAEIYV+.RYGpch.AEEEKPY.ITELssS..WVVEGsh.......hs.p.........lAGGVFIIEIsKpsGplLNFhHuK... 0 10 10 11 +15492 PF15632 ATPgrasp_Ter ATP-grasp in the biosynthetic pathway with Ter operon Anantharaman V rdf Anantharaman V Family This ATP-grasp family is related to carbamoyl phosphate synthetase. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo-nucleoside involved in stress response [1]. In press. Mol. BioSyst. 2012, DOI:10.1039/C2MB25239B. "Ter-dependent stress response systems: novel pathways related to metal sensing, production of a nucleoside-like metabolite, and DNA-processing" Anantharaman V, Iyer LM, Aravind L; 182.70 182.70 192.60 190.40 175.00 167.50 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.94 0.70 -5.69 99 206 2012-10-10 13:17:03 2012-10-09 14:55:16 1 1 201 0 43 148 9 327.20 42 96.59 NEW hlWFhcGhSupppllpul+s..s.................................tslplhASHpppcstlhthADhuhh..EPp....sspc...............hlpalLchscppslclllsu+psph..httpRspFput.....GscLhs.ssshpslphh--Kspahtthcp.....t.Gl...sssssht.lsos-EL.psuh.ss....lth...s......sp.lClKPssGlhG.hGFhhlcp........sssshc.shtp...ssspp.lshpthlsuhpts....-ph.....shllMPYLsGsEhSVDhls.cp................Gc...llAAV.sR.+Ks.....G..hh.Qplppcsph.hplApchActhshcGlhNlQhRpc.ssGp.Ph.LLEINsR.SGGlshot.ts....GVNLPtlhs.thtLGhht..-....h............h....psh.pVpslosslth.s ...........pIWFhcGhSSpR-lIpul+shs.............................pshulplaASH+sp+spILphADhuhh..EPp.....Dspc.................hlpalhchspsasIchIhsGRssph..hEpHRuth-st.....GspLsTGusssshlslsD-Kspasphhcp.....pGL.PVlPohp.VsohsEL+stlus.....sas......sp.lClKPVoGIYG.hGFW+hDc........osushssFs+...s-pRhVosppYlsAhptu....EshpPhlLMPYLPGPEaSVDhLs-+................GcllAAVuRRKp......G..slQhLh.p-usAhELApcsAchhpsDGLVNVQTRsD.ssGp.PlLLEhNhRPSGGlsYTh.co....GVNLPGLFA.thpLGLhsp-h..sh......h.pslsVRulTssl.h.p.................... 0 13 28 36 +15493 PF15633 Tox-ART-HYD1 HYD1 signature containing ADP-ribosyltransferase Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the ADP-ribosyltransferase superfamily present in bacterial polymorphic toxin systems. The domain has characteristic histidine, tyrosine and aspartate residues that comprise the active site. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, or type 7 secretion system [1]. 25.00 25.00 45.30 43.40 23.00 22.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.31 0.72 -3.18 18 25 2012-10-09 14:47:52 2012-10-09 15:47:52 1 12 21 0 9 25 0 97.30 27 14.38 NEW lYHYTsccGhsuIh-Sshlp.....hpu.s.hp.thssGpYhoshuPGp......hp.ucuht+hsh.sss+ss+.al..ElDsss..Lphlcs..pcsshh.h.psslDlssct .....laHYTsccGhpuIhcosplp.....hpupsshp.utssGtYloshuPsc.....................hphucuhtphsl.sssKsoH.aI..ElDsss..Lphlcs........pcsshhhh..ptslDlssp............... 0 4 8 9 +15494 PF15634 Tox-ART-HYE1 HYE1 signature containing ADP-ribosyltransferase Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the ADP-ribosyltransferase superfamily present in bacterial polymorphic toxin systems. The domain has characteristic histidine, tyrosine and glutamate residues that comprise the active site [1]. 25.00 25.00 530.80 530.50 22.00 21.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.96 0.70 -5.44 2 8 2012-10-09 14:52:20 2012-10-09 15:52:20 1 1 8 0 1 2 0 284.60 86 32.52 NEW GLsL.pMlRNss.TLps....tPpKsEDGYYYHlopupNL.ShlppGFhPQGS.GPTLSttDhppRKhGlI+hIYohlATplN+schuK.KISpsNFhMPp-FWpEFK..hpsIsspssI-..sphLpcSIscuIst.LDpscFhcKHs-+Kpp.IsppR.tlhppDE.INcIIop+shh.QQREAtNTcGYIYLAsp+NTL.cYhhphpppps.hllLAl.-sIFotKhLEpD.QEPcsAVRYpGul.ot-LpFVNpEGQlssh.hShu....G-hILs..pVIsFh+K GLNLFRMVRNNPITLRDDDGLEPKKAEDGYYYHLTNARNLMSIVEKGFIPQGSQGPTLSAGDLENRKKGVIRYIYTKVATELNRGEISKNKISSNNFKMPSDFWSEFKLEGSSITNDNDIESVNKKLNDSIVRGISAVLDKGEFKKKHNEKKSEDITRSRMRVMDDDE.INKIITQKPLLEQQREAANTKGYIYLAATRNTLEKYAINYKSSNDDMILLAIPDSIFSAKILEEDEQEPDCAVRYSGGVLSADLRFVNREGQVVPFEYSAS....GEIILDYPSVINFIRK 0 0 1 1 +15495 PF15635 Tox-GHH2 GHH signature containing HNH/Endo VII superfamily nuclease toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic s[AGP]HH signature motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type secretion system [1]. 25.00 25.00 49.60 48.70 22.40 19.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.50 0.72 -3.68 21 50 2012-10-09 14:59:33 2012-10-09 15:59:33 1 7 48 0 12 40 2 100.90 30 23.52 NEW pTGHHLIsspphpstst...............ptYspspAPslCs-Gssps.sGoHGphHsthpshththt............t.t.spastusspsssuhs..csastspCsccClctQLssaac ...QTuHHLIsspth+stup...............ssYocscAPslCsEGssps..sGoHGthHsthsshhtcpp...........hstp.tsassAcspshsuhu..pshstspCs+cCLcuQL-saap.......... 0 1 8 11 +15496 PF15636 Tox-GHH GHH signature containing HNH/Endo VII superfamily nuclease toxin Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic sG[HQ]H signature motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, type 7 or TcdB/TcaC-type secretion system. The metazoan teneurin proteins possess an inactive of this domain at their C-terminus [1]. 25.00 25.00 27.50 26.40 24.20 24.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.36 0.72 -4.34 38 392 2012-10-09 15:03:10 2012-10-09 16:03:10 1 43 112 0 171 362 1 78.50 52 3.34 NEW -cE+pRllccA+pcAlcpAWpcE+ptlcsGtt.GstsWocsE+ppLLssGpVs............GY-Ghahhsl......ppYPcLA-sstNIpFh+ .......-EEKsRlL-hARQRAlspAWs+EQQ+l+-GcE.GsRtWTEGEKpQLLosG+Vp............GY-GaaVlSV......EQYP...ELADSusNIpFhR............ 0 29 40 94 +15497 PF15637 Tox-HNH-HHH HNH/Endo VII superfamily nuclease toxin with a HHH motif Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with characteristic conserved s[GD]xxR and HHH motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 27.10 27.10 29.80 29.00 25.30 23.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.37 0.71 -4.31 9 31 2012-10-09 15:05:23 2012-10-09 16:05:23 1 6 29 0 6 30 1 111.30 43 22.97 NEW phsshssthlsDP+hsV-MsahG+GpssTNut.GWhRssKhFWpchhctpP-hhSpsNptpI.cpGh....uPhlDssalKaFPpa..sshhsDsLhHHHlGtGupAsslPssLHsG....GGlHshc ................chsph.sphhhDP+lsV-MsahGKGptsTNAt.GW.RssKhaapphhcp.pPE.haStpNpt+I.cpGh....sPllDppFlKHFPQY..sshhsDsLhHHHIGt.GGQAsAlPpsLHPGh...GGIHNhE....... 0 0 1 2 +15498 PF15638 Tox-MPTase2 Metallopeptidase toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 33.30 32.50 22.80 22.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.34 0.71 -4.78 6 10 2012-10-09 15:10:23 2012-10-09 16:10:23 1 2 9 0 2 10 0 186.80 24 14.59 NEW DYFsssG+FI+ss.ps+ssNIYIpsssGN.llhSsYsh+.......pucNhQhhANIsuHYAKtlGlc.........ttsolGspu.tcpsspuchssss.pGtpIpsushNGahcphhsNhYNLpSTLtHEshHp..spRstsc.....scll+..Vlh+phps.cFu+TTpsF+puhAthhpchlpsAYtpssp..Ntshscs.Khlsphspssshphh ..........................DYFsspG+FI+ss.ps+ss.IYIppspsN.llhSphshp........tpshphhhpIshHYAptsGlp.........ttsslGssu.tspsspuphsAss.pstpIphtlpsGahs+phsNhYNLpSTLhHEshHp..ptRGssc.....scllc..Vlh+phcs.cFs+TTpsF+cupAshhpchLpphhtpssp..pthhtcs.chlst.thpss.ph.......................................... 0 1 1 2 +15499 PF15639 Tox-MPTase3 Metallopeptidase toxin 3 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 26.50 157.80 23.60 20.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.86 0.71 -4.41 6 6 2012-10-09 15:10:47 2012-10-09 16:10:47 1 2 5 0 5 6 1 138.70 31 33.31 NEW pt+KYP+LsaYLKasls..sl.l+sPsllpAhtKhutlscuplp-uLsWGKGPpIpIsshss......................AhGthoPsppuppl.pIctcLVspaEpups.c..pththallttTlLHELlHWuccpsGlD.PGEE.......GctFEc.lYGp ..t+KYP+LsaYLKasls..sl.l+sPsllpAhtKhutlscuplp-uLsWGKGPpIpIsshss......................AhGthoPsppuppl.pIctcLVspaEpups.c..pththallttTlLHELlHWuccpsGlD.PGEE.......GctFEc.lYGp. 0 1 5 5 +15500 PF15640 Tox-MPTase4 Metallopeptidase toxin 4 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 31.90 31.80 24.90 24.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.64 0.71 -4.51 7 110 2012-10-09 15:11:08 2012-10-09 16:11:08 1 13 105 0 3 60 0 125.40 78 16.88 NEW -th.VSGQs.+ls.sGpRlhslp-hKsa+K-MsphGIKV.IDKKspIL.....PpssAuGFDPhTGcIhLRpcsohlsshHEshHAcQWhcLGKEsYhpQotLEREEaVaNEIMKNKthFscuEIhaup+YIapLRst ..........DGRRVSGHTGFL..DGVRLSR.SQINNIAKEMEKLGIKV.IRKADKYL.....PPNARAAFDYGLRNIYLRKNATLYEVYHEVIHAKQFAKIGREAYEALGRLSR..........EEHVLNEILKSK...NLFNEAEIAHAIKYVEGLREK... 0 1 2 2 +15501 PF15641 Tox-MPTase5 Metallopeptidase toxin 5 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 107.40 107.40 190.70 189.70 25.20 18.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.35 0.72 -4.08 5 5 2012-10-09 15:11:30 2012-10-09 16:11:30 1 3 3 0 1 4 0 109.20 79 12.30 NEW DLGHVTGSTARARNKEITAILKEDFAYLKLTYIPQYNPFMRTGIAKLGEGTQFGKNSFSSRAELRDVIIHEELHHRWWKRGLNDHHPK.GTEMEQKFYETIRRYKRMRGW DLGHVTGSTARARNKEITAILKEDFAYLKLTYIPQYNPFMRTGIAKLGEGTQFGKNSFSSRAELRDVIIHEELHHRWWKRGLNDHHPK.GTEMEQKFYETIRRYKRMRGW 0 0 1 1 +15502 PF15642 Tox-ODYAM1 Toxin in Odyssella and Amoebophilus Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted all-alpha fold toxin present in bacterial polymorphic toxin systems of the endosymbionts Odyssella and Amoebophilus [1]. 25.00 25.00 665.80 665.80 23.20 21.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.22 0.70 -5.80 3 3 2012-10-09 16:09:44 2012-10-09 17:09:44 1 3 1 0 3 4 0 385.30 52 22.38 NEW LKEHFKTtAoQuo.GAWKAulPV..KDI-psLSlpEQcALQQADKIVLDYNAKYpQsVp.NLSRFS+uKQQFSKAFuKTAQVoKDAVLGP-INSLp+LQQcVNuAYGSEVlKLDpGRhERNHcERps-lTTSHQQ+EEuLE+K+-cISuQL+..lsP++RlEL++cpspLssQLtEEEElYclALKRLsNPAQoALEDFYDWKRSVFHDFTKLHLGNaLITYEALALGTVARHPEAYEpsLQGSTAIGSVLADAFLPLGGLLGNVIGKIAEcGAELYADKQIRIKAAKIGSLYGHKGLEGMVALTREVANGLLFRLKDVIIDLTPESLDKLAKVATTQMIDYALKh..WEcNs.sQTINAhNLLL+GTQHQPSWhKuFTQsssLEpEDGRaVDG LKEHFKTtAoQuo.GAWKAulPV..KDI-psLSlpEQcALQQADKIVLDYNAKYpQsVp.NLSRFS+uKQQFSKAFuKTAQVoKDAVLGP-INSLp+LQQcVNuAYGSEVlKLDpGRhERNHcERps-lTTSHQQ+EEuLE+K+-cISuQL+..lsP++RlEL++cpspLssQLtEEEElYclALKRLsNPAQoALEDFYDWKRSVFHDFTKLHLGNaLITYEALALGTVARHPEAYEpsLQGSTAIGSVLADAFLPLGGLLGNVIGKIAEcGAELYADKQIRIKAAKIGSLYGHKGLEGMVALTREVANGLLFRLKDVIIDLTPESLDKLAKVATTQMIDYALKh..WEcNs.sQTINAhNLLL+GTQHQPSWhKuFTQsssLEpEDGRaVDG 0 3 3 3 +15503 PF15643 Tox-PL-2 Papain fold toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A papain fold toxin domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 26.00 90.80 24.00 23.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.37 0.72 -4.03 10 11 2012-10-09 16:10:18 2012-10-09 17:10:18 1 3 9 0 4 16 0 98.80 33 18.26 NEW s.spl+QtIusIusc.F.p..laQChcCApAlcpaLppptIsGKlI+LpTt.spssa..Ias-tl..p...poIopNGpH.GItVt....shVFDNlascGlsRc-Wlpshps ....spltQtIspIssp.a.t..lhpC.pCApAltpaLppptIpGKlIcLpTt.pptsa..Ihs-tl..p...poIopNGpH.GltVt....shVFDNlpspGlsRc-Wlpsht... 0 1 4 4 +15504 PF15644 Tox-PL Papain fold toxin 1 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A papain fold toxin domain found in bacterial polymorphic toxin systems. In these systems they might function either as a releasing peptidase or toxin [1]. 24.10 24.10 24.50 24.10 23.40 22.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.64 0.72 -3.32 46 104 2012-10-09 16:13:10 2012-10-09 17:13:10 1 16 86 1 28 91 0 112.80 19 12.33 NEW pNChsCslAstssht..GtshsAtPph.tt..................sppsshshhpphhu..............tp.ht.h...................sshptltptlpstspGupullhsph.............tsts......GHshsll..ppsGs..lhalDsQsGpss ........................................................NC.sCshuhthpht..GhshpAtsth.t...................stpsshshhpphhu..............t...h......................sstphhpphlpshspGutshlhhth..............tsts........uHshssh..hpsGp..lhalDsQsGp..h...... 0 11 22 26 +15505 PF15645 Tox-PLDMTX Dermonecrotoxin of the Papain-like fold Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A papain fold toxin domain found in bacterial polymorphic toxin systems [1]. 26.10 26.10 26.80 27.00 20.70 25.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -11.04 0.71 -4.12 14 38 2012-10-09 16:45:02 2012-10-09 17:45:02 1 5 35 0 8 29 1 134.70 47 14.49 NEW ssPhspCcsuhp.lsshh+.ptsh..psl+htslhh......W.psu...sc-hshNHalVlu+hsshcYVhDsTAcQFpsht................sp..sPlltstssWh.t...+aptuhpcKhlhhtchss...sphutssapt..t...hss.cslpssphLspPpWY ..............h.NPlGQCESLMTPVSsFMp.-KGF..-NIRYRGIaI......W..Dcs...TEElPpNHFAVVGsK-GKDYVFDlSAHQFcN+Gh...............SsLsGPLILoAD-Wls....KYRh..Ao++KLIhYsDFuN...uulAusuacALsc...chpsEohsGclhlTSPRWF....................................... 0 1 4 6 +15506 PF15646 Tox-REase-2 Restriction endonuclease fold toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 7 or PrsW-peptidase dependent secretion system [1]. 67.30 67.30 109.90 109.20 25.30 21.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.60 0.71 -4.25 7 7 2012-10-09 16:47:01 2012-10-09 17:47:01 1 2 6 0 4 7 0 127.40 32 25.86 NEW spsAYQhclAGYPEhclslP...upcpslhsDGhRsp...DGhhl-AKaVscPspsph..........p.lcplchpppupht.....DhhhcpDccELp+YstAhsss.spph+GlEIVTNs..................-ussYWpshMAhhGVs spsAYQhclAGYPEhclslP...upcpslhsDGhRsp...DGhhl-AKaVscPspsph..........p.lcplchpppupht.....DhhhcpDccELp+YstAhsss.spph+GlEIVTNs..................-ussYWpshMAhhGVs 0 1 2 4 +15507 PF15647 Tox-REase-3 Restriction endonuclease fold toxin 3 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, type 7 or PrsW-peptidase dependent secretion system [1]. 25.00 25.00 36.00 68.00 22.20 20.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.19 0.72 -4.25 5 39 2012-10-09 16:47:57 2012-10-09 17:47:57 1 7 38 0 3 32 0 108.70 62 15.12 NEW puuKh+sstscpYEDhlREKLGGpS+lltu.....REaDAVTDchlAQsKcolSuIcpPKNFLNKKsRcQIKATIEAApQQGK+ApFWFKhc....VpscV+EYIEp+GGcVIIsocs ........ppuKhREALDlHYEDLlRRKLGG.Sp.IsG.....REaDsVTDchIAQsKRThSuIDpPKNFLsKpsRsQIKtTIEhAcpQGKcApFWFKYG....VpsKV+EYIESKGGcVllGhG.s 0 1 2 2 +15508 PF15648 Tox-REase-5 Restriction endonuclease fold toxin 5 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, or PrsW-peptidase dependent secretion system. Versions of this domain are also found in caudoviruses [1]. 25.00 25.00 34.30 33.40 24.80 23.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.38 0.72 -3.70 66 128 2012-10-09 16:49:27 2012-10-09 17:49:27 1 5 85 0 41 164 2 94.30 33 27.11 NEW tshpYQtpITG.hs.......hshphtht..t.............................h.h.slcFDGapss...pslLhEAKu..sY.c.pFhc.pst...p.st.h.............hpuhcsh...hcQupcQspssps.t.s.splcWaFtp. ....shcYQtplTG.hs.......hsh.-.ht.hs...pp............................h.h.slcFDGFpss...cChLhEAKu..sY.c.pFhs.ssh...p..t.h.............hpGhpsh.hppApcQsssscs.s.sssplcWaFtp........... 0 3 9 21 +15509 PF15649 Tox-REase-7 Restriction endonuclease fold toxin 7 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, or type 7 secretion system [1]. 25.00 25.00 27.60 28.40 23.30 22.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.87 0.72 -3.85 21 52 2012-10-09 16:49:57 2012-10-09 17:49:57 1 10 43 0 15 60 0 86.20 34 11.40 NEW tGptuEppss.sht.......tspcplsstss.......tpRIPDshsp.spplsEVKNVpcl..shopQl+s.sphApppGhphsLlVs..csTplS.psL ........................h.GctuEhtAuhhhs.......us++plssshs.tt....tppIPDhls..p.sc..slsEVKNVpc...uhTcQlptplchApp.thchhLlss..chTclst......... 0 4 9 14 +15510 PF15650 Tox-REase-9 Restriction endonuclease fold toxin 9 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system [1]. 25.00 25.00 30.10 29.10 23.10 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.81 0.72 -3.78 11 16 2012-10-09 16:50:23 2012-10-09 17:50:23 1 7 15 0 9 17 0 83.00 43 6.76 NEW thopSuh+hGRphHKsYKsu.h.....st.KEFpLPSG+RhD..FlDa-s..KhIYELKPsNP+uIKcGhKQLcsY+pElcphh........GtsWpslL-TY .t.hopSuh+hGRphHKsYKhs.......shpKEF+LP..SGK..RhD..hlDhps..+hIYELKP.NP+uIKpGhKQLcsY+pElpph.........GtsWpshl-sY...... 0 4 9 9 +15511 PF15651 Tox-SGS Salivary glad secreted protein domain toxin Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family An alpha+beta fold domain with four conserved cysteine residues and a conserved [DE}xx[ND] motif. This domain is mainly present at the c-terminus of RHS repeats containing proteins in insects and crustaceans. Although no bacterial homologs have been identified, the domain architecture suggests an origin from bacterial polymorphic toxin systems [1]. 25.00 25.00 29.20 27.80 18.20 17.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.35 0.72 -4.23 18 21 2012-10-09 16:50:45 2012-10-09 17:50:45 1 2 6 0 14 21 0 97.10 33 3.39 NEW pNCa....ssss-tsplpCYppcupsplFs+ss........phhspDpassChPlpapGpPSluCcGppooalYTPhps....s+hFDhlDGWLhLApVuPsslRNlppsh ......................NCa....s.ssphshlhCapppuhshlFs+ss........pshspDpassChPlpapGpPSluCcGppooalYTPhps....schFDhlDGWLhLApVuPssl+slpph................ 0 3 4 14 +15512 PF15652 Tox-SHH HNH/Endo VII superfamily toxin with a SHH signature Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with two conserved histidine residues. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6 or type 7 secretion system [1]. 22.70 22.70 23.80 24.60 21.60 21.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.63 0.72 -3.42 14 38 2012-10-09 16:51:09 2012-10-09 17:51:09 1 12 37 0 9 38 1 91.60 36 20.28 NEW .suhlQSHHsIQDtWAc....pslssYspc..tAPshLl.Ssst.sHAtloA...tpRscRsupG......WsoslpcEFstuhpE.Mh-.AGVsppssc+sh+c..uY+YFD ...su.lpsHHul.DhWAc....pNlssYstc..tsssslh.otc...Hshhpss.pthhhc+sucs.sGK..Wpos.pcEhpphscc.Mh-.AsVscpstpphhcs..pYpah... 0 1 5 8 +15513 PF15653 Tox-URI2 URI fold toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the URI nuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system [1]. 25.00 25.00 25.60 25.20 22.10 20.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -10.04 0.72 -3.79 13 44 2012-10-09 16:52:12 2012-10-09 17:52:12 1 9 34 0 16 47 0 76.20 23 13.91 NEW .chupccshuYphhcKs-....laKlGEos.....ppps.sh+sRhstphLc+ssup...................hphlpcss....t..sKpsspphEst+lppapcppG .......p.spcpshsYthhccss....lhKhGcos.....tpts.ssctRhphphLpcpssp...................hphhspss........s+pphpphEptclpth................ 0 7 13 14 +15514 PF15654 Tox-WTIP Toxin with a conserved tryptophan and TIP tripeptide motif Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin domain with two membrane spanning alpha helices and RxxR, Wx[ST]IP motifs. The domain is present in bacterial polymorphic toxin systems. The toxin is usually exported by the type 2 or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 25.00 25.00 81.00 79.70 21.00 19.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.82 0.72 -4.35 7 9 2012-10-09 16:52:36 2012-10-09 17:52:36 1 5 9 0 1 10 0 54.00 48 9.63 NEW .phsssssshsss-sussssAusGsuYllYRslRhlPSLhPPLW.TIPANlAhP .p.phssssoshoTs-sussssAslGsGYlIYRslRhlPSLhPPLWhoIPANlssP 0 0 0 1 +15515 PF15655 Imm-NTF2 NTF2 fold immunity protein Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein of the NTF2 fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-JAB-2 family [1]. 25.00 25.00 27.70 27.40 21.70 20.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.00 0.71 -3.68 32 92 2012-10-09 18:50:36 2012-10-09 19:50:36 1 3 74 0 15 92 2 130.10 21 66.80 NEW .ppscphlhs...FhttMppWEp...h.....ppht.pp.t.............hppclpsIappasTcK...pRptut....shSh...G.PspYs.ppt....hl-h..pptspsplhl.hp..pps...thptc.acahlhhpsscWhIDp....hppc........-pWppsh. .............................................h...hpphl.tFhtthppaEt.h.......pt.s.pp......................t.hppclttlappaso-K....+htst...hshSa...u.supYss.p.....hl-h..cphscsclhlht..ppp....hphc.hpFlhhhhsspWhIDp....hppc........ctWpps..h....... 0 2 9 9 +15516 PF15656 Tox-HDC Toxin with a H, D/N and C signature Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted alpha/beta fold peptidase domain with a strongly conserved triad of a histidine, aspartate/asparagine and cysteine residues that are predicted to comprise the active site of the predicted peptidase. Proteins bearing this predicted toxin domain are particularly common in both intracellular and extracellular pathogens [1]. 25.00 25.00 28.30 27.40 20.40 19.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.73 0.71 -3.90 9 20 2012-10-09 19:11:59 2012-10-09 20:11:59 1 5 16 0 5 19 0 116.40 28 18.40 NEW tlaGscsphtshc.lptslpsIs+p.S..spsIhIlSGoHGhssG.........pNWhtps.........lRcPpl.h-hpFhtpDhpshpt....hscplhlhDlssss.tchss..lpss......ssphILuYCaStsDpsht. .........................lhstptth.sht.lptshphlt+p.s.sstcIhIlSGoHGtssG.........pNasups...............lRcPsl.hE+tFahEDlpsap.t...........hstpV+lhDlushop.pEhss.plpss......spplIhGaCaSpsDchhh.h.... 0 1 2 4 +15517 PF15657 Tox-HNH-EHHH HNH/Endo VII superfamily nuclease toxins Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic conserved [ED]H motif and two histidine residues. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 25.00 25.00 26.70 26.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.70 0.72 -3.83 33 80 2012-10-09 19:15:32 2012-10-09 20:15:32 1 26 69 0 26 75 0 66.90 30 8.65 NEW tlhcpsspshhsRpYcFpsscG.....ppllIp-HotGHths.....tspsPHFNsts.cs...........................hpsGphsspcsHYsa .......hhsspspshhsR.acapshcG.........pKhlIp-HotG+th.......spuPHapsts.pp............................h.c.puphstp.psHY.a....................... 0 2 9 18 +15518 PF15658 Latrotoxin_C Latrotoxin C-terminal domain Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A toxin domain present in arthropod alphaproteobacterial, gammaproteobacterial endosymbionts and also at the C-termini of the latrotoxins of the black widow spider. The domain is characterized by a conserved, hydrophobic helix and is predicted to associate with the cell membrane [1]. 25.00 25.00 46.20 43.40 24.70 22.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.46 0.71 -4.31 8 23 2012-10-09 19:27:40 2012-10-09 20:27:40 1 16 8 0 8 25 0 126.00 38 6.65 NEW phDsNuslhLLDlLIRKlTspKYhsT....sc.polSPLEApGYALsIsKcFEcVlc.QAulKSGISh+cLNIDhlElQcpIssK..ItSGKFsEISulLsSYlEcAhPs......ucLS.Kph-KFhspFNscl-.....slLNp ......hDVNGslhLLDlLIRKhoupKYhss....sc....pSISsLEAQuYALsIsc+FEcVLp.psulKuGlShcpLNlDhstlQpcIhtK..lhuG+FsEIuthLsSasccAhPt......upLp.KphcKFh.pFppthc.hlp............. 0 0 2 2 +15519 PF15659 Toxin-JAB1 JAB-like toxin 1 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family \N 26.40 26.40 26.40 26.60 25.60 25.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.36 0.71 -4.36 13 24 2012-10-10 14:49:21 2012-10-09 20:38:55 1 2 16 0 6 24 1 153.20 26 34.55 NEW p+ss+las....phtsusps-phhssp.ts..psIpl.spsll...uphsp..tpsppG....thpoas....oTssspsAtslFcFsA-NTo..VEWpLsshp.-sGs.pshsltTspcptuspss..hsph.pc.htssuphlhc..IHSH...........Ptss.....tsS.....hsDhphup.tps..........huhYhpc.......tthhphYs ......................ppssclhp.....hhpssps-phh.hp.ts..psIpl.sppll...sthhp..tpsppG....hhpoas....sTss.csAtslFcFsA-NTo..VEWpLssht.-p....Gs.pshhlsTsppppuVt....hs.htpc.h.stsphlIc..hHSH...........Phss....tsS.....spDhpshp.hps...........shahcc........t.h..Y................ 0 6 6 6 +15520 PF15660 Imm49 Immunity protein 49 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family \N 25.00 25.00 168.90 168.80 24.80 19.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.79 0.72 -3.94 4 10 2012-10-09 20:02:34 2012-10-09 21:02:34 1 1 10 0 0 10 0 82.90 99 96.96 NEW hRALVALKRELLPGVTTFIDSVRLEAIDDKADRLMVTTSVGEEARLVYFNPDFAGTPTFGRRLYRLRDWTDDLADWVDRLRRER VRALVALKRELLPGVTTFIDSVRLEAIDDKADRLMVTTSVGEEARLVYFNPDFAGTPTFGRRLYRLRDWTDDLADWVDRLRRER 0 0 0 0 diff -r 000000000000 -r 68a3648c7d91 pfam_annot/pfamA.txt.gz Binary file pfam_annot/pfamA.txt.gz has changed diff -r 000000000000 -r 68a3648c7d91 pfam_annot/pfam_annot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/pfam_annot.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,13 @@ + + Generate synthetic reports + /home/inmare/galaxy/tools/pfam_annot/annota.pl $prot $pfam $out + + + + + + + + + This tools produce a synthetic report, in fasta format, containing a description of PFAM domains annotated to each protein. A separate report is generated for each fosmid. Fosmid names are inferred from the heades of the input fasta file, using the naming convention described in the "Sanger Fosmid End" annotator tool. Therefore contigs should be assigned to fosmids prior to perform this step and ideally prior to the annotation of functional domains. The output file contains links to the PFAM wiki, where a more comprehensive description of the domains can be found. A pfamScan output table is required as input. The output of this tool is compatible with the PFAM search utility, which might be used in order to perform keyword searches on the annotation + diff -r 000000000000 -r 68a3648c7d91 pfam_annot/pro --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/pro Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,646 @@ + + + +Proteins with PFAM domains: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 68a3648c7d91 pfam_annot/prots --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/prots Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,80 @@ +>PROKKA_00001 Mercuric resistance operon regulatory protein +MENNLENLTIGVFAKAAGVNVETIRFYQRKGLLREPDKPYGSIRRYGEADVVRVKFVKSA +QRLGFSLDEIAELLRLDDGTHCEEASSLAEHKLKDVREKMADLARMETVLSELVCACHAR +KGNVSCPLIASLQGEAGLARSAMP* +>PROKKA_00002 MerT mercuric transport protein +MSEPQNGRGALFAGGLAAILASTCCLGPLVLVALGFSGAWIGNLTVLEPYRPLFIGAALV +ALFFAWKRIYRPVQACKPGEVCAIPQVRATYKLIFWIVAVLVLVALGFPYVVPFFY* +>PROKKA_00003 Mercuric transport protein periplasmic component precursor +MKKLFASLALAAAVAPVWAATQTVTLAVPGMTCAACPITVKKALSKVEGVSKVDVGFEKR +EAVVTFDDTKASVQKLTKATADAGYPSSVKQ* +>PROKKA_00004 Mercuric resistance protein MerC +MGLMTRIADKTGALGSVVSAMGCAACFPALASFGAAIGLGFLSQYEGLFISRLLPLFAAL +AFLANALGWFSHRQWLRSLLGMIGPAIVFAATVWLLGNWWTANLMYVGLALMIGVSIWDF +VSPAHRRCGPDGCELPAKRL* +>PROKKA_00005 Mercuric reductase +MSTLKITGMTCDSCAVHVKDALEKVPGVQSADVSYAKGSAKLAIEVGTSPDALTAAVAGL +GYRATLADAPSVSTPGGLLDKMRDLLGRNDKTGSSGALHIAVIGSGGAAMAAALKAVEQG +ARVTLIERGTIGGTCVNVGCVPSKIMIRAAHIAHLRRESPFDGGIAATTPTIQRTALLAQ +QQARVDELRHAKYEGILEGNPAITVLHGSARFKDNRNLIVQLNDGGERVVAFDRCLIATG +ASPAVPPIPGLKDTPYWTSTEALVSETIPKRLAVIGSSVVALELAQAFARLGAKVTILAR +STLFFREDPAIGEAVTAAFRMEGIEVREHTQASQVAYINGEGDGEFVLTTAHGELRADKL +LVATGRAPNTRKLALDATGVTLTPQGAIVIDPGMRTSVEHIYAAGDCTDQPQFVYVAAAA +GTRAAINMTGGDAALNLTAMPAVVFTDPQVATVGYSEAEAHHDGIKTDSRTLTLDNVPRA +LANFDTRGFIKLVVEEGSGRLIGVQAVAPEAGELIQTAALAIRNRMTVQELADQLFPYLT +MVEGLKLAAQTFNKDVKQLSCCAG* +>PROKKA_00006 zinc-responsive transcriptional regulator +MSAYTVSQLAHNAGVSVHIVRDYLVRGLLRPVACTTGGYGVFDDAALQRLCFVRAAFEAG +IGLDALARLCRALDAADGAQAAAQLAVLRQLVERRRAALAHLDAQLASMPAERAHEEALP +* +>PROKKA_00007 MerE protein +VNAPDKLPPETRQPVSGYLWGALAVLTCPCHLPILAAVLAGTTAGAFLGEHWGVAALALT +GLFVLAVTRLLRAFRGGS* +>PROKKA_00008 Phytochrome-like protein cph2 +MTSSQPAGWTAAELAQAAARGQLDLHYQPLVDLRDHRIAGAEALMRWRHPRLGLLPPGQF +LPLAESFGLMPEIGAWVLGEACRQMHKWQGPAWQPFRLAINVSASQVGPTFDDEVKRVLA +DMALPAELLEIELTESVAFGNPALFASFDALRAIGVRFAADDFGTGYSCLQHLKCCPITT +LKIDQSFVARLPDDARDQTIVRAVIQLAHGLGMDVIFRRRLHQLIGRNGCCAASS* +>PROKKA_00009 Transposon Tn7 transposition protein TnsB +MATDTPRIPEQGVATLPDEAWERARRRAEIISPLAQSETVGHEAADMAAQALGLSRRQVY +VLIRRARQGSGLVTDLVPGQSGGGKGKGRLPEPVERVIHELLQKRFLTKQKRSLAAFHRE +VTQVCKAQKLRVPARNTVALRIASLDPRKVIRRREGQDAARDLQGVGGEPPAVTAPLEQV +QIDHTVIDLIVVDDRDRQPIGRPYLTLAIDVFTRCVLGMVVTLEAPSAVSVGLCLVHVAC +DKRPWLEGLNVEMDWQMSGKPLLLYLDNAAEFKSEALRRGCEQHGIRLDYRPLGQPHYGG +IVERIIGTAMQMIHDELPGTTFSNPDQRGDYDSENKAALTLRELERWLTLAVGTYHGSVH +NGLLQPPAARWAEAVARVGVPAVVTRATSFLVDFLPILRRTLTRTGFVIDHIHYYADGHC +CK* +>PROKKA_00010 Integrase core domain protein +MNPFKGRHFQRDIILWAVRWYCKYGISYRELQEMLAERGVNVDHSTIYRWVQRYAPEMEK +RLRWYWRNPSDLCPWHMDETYVKVNGRWAYLYRAVDSRGRTVDFYLSSRRNSKAAYRFLG +KILNNVKKWQIPRFINTDKAPAYGRALALLKREGRCPSDVEHRQIKYRNNVIECDHGKLK +RIIGATLGFKSMKTAYATIKGIEVMRALRKGQASAFYYGDPLGEMRLVSRVFEM* +>PROKKA_00011 DNA-binding transcriptional regulator LysR +MKLRHLDIFYAVMTCGSLTRAAEVLHISQPAASKALKHAEH* +>PROKKA_00012 hypothetical protein +MPSRFLTPYIPLVNLFSLHVYELILVTTKPKFEL* +>PROKKA_00013 Sodium/glutamate symport carrier protein +MILDASYTLLVACIALLIGMFVVKFTPFLQKNHIPEAVVGGFIVAIVLLIIDKTSGYSFT +FDASLQSLLMLTFFSSIGLSSDFSRLIKGGKPLVLLTIAVTILIAIQNTVGMSMAVMMNE +SPFIGLIAGSITLTGGHGNAGAWGPILADKYGVTGAVELAMACATLGLVLGGLVGGPVAR +HLLKKVSIPKTTEQERDTIVEAFEQPSVKRKINANNVIETISMLIICIVVGGYISALFKD +TFLQLPTFVWCLFVGIIIRNTLTHVFKHEVFEPTVDVLGSVALSLFLAMALMSLKFGQLA +SMAGPVLIIIAVQTVVMVLFACFVTFKMMGKDYDAVVISAGHCGFGMGATPTAIANMQTV +TKAFGPSHKAFLVVPMVGAFIVDISNSILIKIFIEIGTYFT* +>PROKKA_00014 Antibiotic biosynthesis monooxygenase +MIAVIFEVQIQPDQQTRYLTLAEELRPLLSHVAGFISIERFQSLATEGKMLSLSWWENEY +AVLQWKNHVLHAKAQQEGRESIFDFYKISIAHITREYSFKKDKDNV* +>PROKKA_00015 hypothetical protein +MFDVHVVLDNQIGQLALLGKTLGNKGIGLEGGGIFTVGDECHAHFLVEQGKEAKIALEQA +GLLVLAIRTPLIRKLKQEKPGELGEIARVLAENNINILVQYSDHANQLILITDNDSMAAS +VTLPWAIK* +>PROKKA_00016 Helix-turn-helix domain protein +MSDISRVKILSALMDGRAWTATELSSVANISASTASSHLSKLLDCQLITVVAQGKHRYFR +LAGKDIAELMESMMGISLNHGVHARVSTPVHLRKARTCYDHLAGEVAVKIYDSLCQQQWI +TENGSMITLSGIQYFHEMGIDVPSKHSRKICCACLDWSERRFHLGGYVGAALFSLYESKG +WLTRHLGYREVTITEKGYAAFKTHFHI* +>PROKKA_00017 Tetracycline repressor protein class B from transposon Tn10 +MSRLDKSKVINSALELLNEVGIEGLTTRKLAQKLGVEQPTLYWHVKNKRALLDALAIEML +DRHHTHFCPLEGESWQDFLRNNAKSFRCALLSHRDGAKVHLGTRPTEKQYETLENQLAFL +CQQGFSLENALYALSAVGHFTLGCVLEDQEHQVAKEERETPTTDSMPPLLRQAIELFDHQ +GAEPAFLFGLELIICGLEKQLKCESGS* diff -r 000000000000 -r 68a3648c7d91 pfam_annot/prova --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/prova Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,536 @@ + + + +Proteins with PFAM domains: +

+
+
+
+
PROKKA_00001
+
+
+
 
+MENNLENLTIGVFAKAAGVNVETIRFYQRKGLLREPDKPYGSIRRYGEADVVRVKFVKSAQRLGFSLDEIAELLRLDDGTHCEEASSLAE
+HKLKDVREKMADLARMETVLSELVCACHARKGNVSCPLIASLQGEAGLARSAMP
+
+ 
+
+

+ PF00376 +

merR;
MerR family regulatory protein. Prosite & Pfam-B_3021 (Release 7.5).

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+

+ PF09278 +

MerR, DNA binding
Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold ..

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00002
+
+
+
 
+MSEPQNGRGALFAGGLAAILASTCCLGPLVLVALGFSGAWIGNLTVLEPYRPLFIGAALVALFFAWKRIYRPVQACKPGEVCAIPQVRAT
+YKLIFWIVAVLVLVALGFPYVVPFFY
+
+ 
+
+

+ PF02411 +

MerT mercuric transport protein
Pfam-B_1796 (release 5.4). MerT is an mercuric transport integral membrane protein and is responsible for transport of the Hg2+ iron from periplasmic MerP (also part of the transport system) to mercuric reductase (MerE)..

+ +

+
+
+
PROKKA_00003
+
+
+
 
+MKKLFASLALAAAVAPVWAATQTVTLAVPGMTCAACPITVKKALSKVEGVSKVDVGFEKREAVVTFDDTKASVQKLTKATADAGYPSSVK
+Q
+
+ 
+
+

+ PF00403 +

Heavy-metal-associated domain

+ +

+
+
+
PROKKA_00004
+
+
+
 
+MGLMTRIADKTGALGSVVSAMGCAACFPALASFGAAIGLGFLSQYEGLFISRLLPLFAALAFLANALGWFSHRQWLRSLLGMIGPAIVFA
+ATVWLLGNWWTANLMYVGLALMIGVSIWDFVSPAHRRCGPDGCELPAKRL
+
+ 
+
+

+ PF03203 +

MerC mercury resistance protein
Pfam-B_2720 (release 6.5).

+ +

+
+
+
PROKKA_00005
+
+
+
 
+MSTLKITGMTCDSCAVHVKDALEKVPGVQSADVSYAKGSAKLAIEVGTSPDALTAAVAGLGYRATLADAPSVSTPGGLLDKMRDLLGRND
+KTGSSGALHIAVIGSGGAAMAAALKAVEQGARVTLIERGTIGGTCVNVGCVPSKIMIRAAHIAHLRRESPFDGGIAATTPTIQRTALLAQ
+QQARVDELRHAKYEGILEGNPAITVLHGSARFKDNRNLIVQLNDGGERVVAFDRCLIATGASPAVPPIPGLKDTPYWTSTEALVSETIPK
+RLAVIGSSVVALELAQAFARLGAKVTILARSTLFFREDPAIGEAVTAAFRMEGIEVREHTQASQVAYINGEGDGEFVLTTAHGELRADKL
+LVATGRAPNTRKLALDATGVTLTPQGAIVIDPGMRTSVEHIYAAGDCTDQPQFVYVAAAAGTRAAINMTGGDAALNLTAMPAVVFTDPQV
+ATVGYSEAEAHHDGIKTDSRTLTLDNVPRALANFDTRGFIKLVVEEGSGRLIGVQAVAPEAGELIQTAALAIRNRMTVQELADQLFPYLT
+MVEGLKLAAQTFNKDVKQLSCCAG
+
+ 
+
+

+ PF00403 +

Heavy-metal-associated domain

+ +

+
+

+ PF07992 +

Pyridine nucleotide-disulphide oxidoreductase
This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain..

+ +

+
+

+ CL0063 +

FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD.

+ +

+
+

+ PF00070 +

pyr_redox;
Pyridine nucleotide-disulphide oxidoreductase. This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain..

+ +

+
+

+ CL0063 +

FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD.

+ +

+
+

+ PF02852 +

pyr_redox_dim;
Pyridine nucleotide-disulphide oxidoreductase, dimerisation domain. This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases..

+ +

+
+
+
PROKKA_00006
+
+
+
 
+MSAYTVSQLAHNAGVSVHIVRDYLVRGLLRPVACTTGGYGVFDDAALQRLCFVRAAFEAGIGLDALARLCRALDAADGAQAAAQLAVLRQ
+LVERRRAALAHLDAQLASMPAERAHEEALP
+
+ 
+
+

+ PF13411 +

MerR HTH family regulatory protein

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00007
+
+
+
 
+VNAPDKLPPETRQPVSGYLWGALAVLTCPCHLPILAAVLAGTTAGAFLGEHWGVAALALTGLFVLAVTRLLRAFRGGS
+
+ 
+
+

+ PF05052 +

MerE protein
Pfam-B_5840 (release 7.7). The prokaryotic MerE (or URF-1) protein is part of the mercury resistance operon. The protein is thought not to have any direct role in conferring mercury resistance to the organism but may be a mercury resistance transposon [1,2]. .

+ +

+
+
+
PROKKA_00008
+
+
+
 
+MTSSQPAGWTAAELAQAAARGQLDLHYQPLVDLRDHRIAGAEALMRWRHPRLGLLPPGQFLPLAESFGLMPEIGAWVLGEACRQMHKWQG
+PAWQPFRLAINVSASQVGPTFDDEVKRVLADMALPAELLEIELTESVAFGNPALFASFDALRAIGVRFAADDFGTGYSCLQHLKCCPITT
+LKIDQSFVARLPDDARDQTIVRAVIQLAHGLGMDVIFRRRLHQLIGRNGCCAASS
+
+ 
+
+

+ PF00563 +

DUF2;
Alignment kindly provided by SMART. This domain is found in diverse bacterial signaling proteins. It is called EAL after its conserved residues. The EAL domain is a good candidate for a diguanylate phosphodiesterase function . The domain contains many conserved acidic residues that could participate in metal binding and might form the phosphodiesterase active site ..

+ +

+
+
+
PROKKA_00009
+
+
+
 
+MATDTPRIPEQGVATLPDEAWERARRRAEIISPLAQSETVGHEAADMAAQALGLSRRQVYVLIRRARQGSGLVTDLVPGQSGGGKGKGRL
+PEPVERVIHELLQKRFLTKQKRSLAAFHREVTQVCKAQKLRVPARNTVALRIASLDPRKVIRRREGQDAARDLQGVGGEPPAVTAPLEQV
+QIDHTVIDLIVVDDRDRQPIGRPYLTLAIDVFTRCVLGMVVTLEAPSAVSVGLCLVHVACDKRPWLEGLNVEMDWQMSGKPLLLYLDNAA
+EFKSEALRRGCEQHGIRLDYRPLGQPHYGGIVERIIGTAMQMIHDELPGTTFSNPDQRGDYDSENKAALTLRELERWLTLAVGTYHGSVH
+NGLLQPPAARWAEAVARVGVPAVVTRATSFLVDFLPILRRTLTRTGFVIDHIHYYADGHCCK
+
+ 
+
+

+ PF13518 +

Helix-turn-helix domain
This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding..

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+

+ PF00665 +

Integrase core domain
Pfam-B_10 (release 2.1). Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site ..

+ +

+
+

+ CL0219 +

Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H.

+ +

+
+
+
PROKKA_00010
+
+
+
 
+MNPFKGRHFQRDIILWAVRWYCKYGISYRELQEMLAERGVNVDHSTIYRWVQRYAPEMEKRLRWYWRNPSDLCPWHMDETYVKVNGRWAY
+LYRAVDSRGRTVDFYLSSRRNSKAAYRFLGKILNNVKKWQIPRFINTDKAPAYGRALALLKREGRCPSDVEHRQIKYRNNVIECDHGKLK
+RIIGATLGFKSMKTAYATIKGIEVMRALRKGQASAFYYGDPLGEMRLVSRVFEM
+
+ 
+
+

+ PF13610 +

DDE domain
This DDE domain is found in a wide variety of transposases including those found in IS240, IS26, IS6100 and IS26..

+ +

+
+

+ CL0219 +

Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H.

+ +

+
+
+
PROKKA_00011
+
+
+
 
+MKLRHLDIFYAVMTCGSLTRAAEVLHISQPAASKALKHAEH
+
+ 
+
+

+ PF00126 +

Bacterial regulatory helix-turn-helix protein, lysR family

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00013
+
+
+
 
+MILDASYTLLVACIALLIGMFVVKFTPFLQKNHIPEAVVGGFIVAIVLLIIDKTSGYSFTFDASLQSLLMLTFFSSIGLSSDFSRLIKGG
+KPLVLLTIAVTILIAIQNTVGMSMAVMMNESPFIGLIAGSITLTGGHGNAGAWGPILADKYGVTGAVELAMACATLGLVLGGLVGGPVAR
+HLLKKVSIPKTTEQERDTIVEAFEQPSVKRKINANNVIETISMLIICIVVGGYISALFKDTFLQLPTFVWCLFVGIIIRNTLTHVFKHEV
+FEPTVDVLGSVALSLFLAMALMSLKFGQLASMAGPVLIIIAVQTVVMVLFACFVTFKMMGKDYDAVVISAGHCGFGMGATPTAIANMQTV
+TKAFGPSHKAFLVVPMVGAFIVDISNSILIKIFIEIGTYFT
+
+ 
+
+

+ PF03616 +

Sodium/glutamate symporter
TIGRFAMs, Griffiths-Jones SR.

+ +

+
+

+ CL0064 +

CPA/AT transporter superfamily This Clan contains transporter proteins that belong to the CPA superfamily and AT superfamily according to TCDB .

+ +

+
+
+
PROKKA_00014
+
+
+
 
+MIAVIFEVQIQPDQQTRYLTLAEELRPLLSHVAGFISIERFQSLATEGKMLSLSWWENEYAVLQWKNHVLHAKAQQEGRESIFDFYKISI
+AHITREYSFKKDKDNV
+
+ 
+
+

+ PF03992 +

Antibiotic biosynthesis monooxygenase
This domain is found in monooxygenases involved in the biosynthesis of several antibiotics by Streptomyces species. It's occurrence as a repeat in Streptomyces coelicolor SCO1909 (Swiss:Q9X9W3) is suggestive that the other proteins function as multimers. There is also a conserved histidine which is likely to be an active site residue..

+ +

+
+

+ CL0032 +

Dimeric alpha/beta barrel superfamily This superfamily of proteins possess a Ferredoxin-like fold. Pairs of these assemble into a beta barrel. The function of this barrel is quite varied and includes Muconolactone isomerase as well as monooxygenases.

+ +

+
+
+
PROKKA_00015
+
+
+
 
+MFDVHVVLDNQIGQLALLGKTLGNKGIGLEGGGIFTVGDECHAHFLVEQGKEAKIALEQAGLLVLAIRTPLIRKLKQEKPGELGEIARVL
+AENNINILVQYSDHANQLILITDNDSMAASVTLPWAIK
+
+ 
+
+

+ PF01842 +

ACT domain
This family of domains generally have a regulatory role. ACT domains are linked to a wide range of metabolic enzymes that are regulated by amino acid concentration. Pairs of ACT domains bind specifically to a particular amino acid leading to regulation of the linked enzyme. The ACT domain is found in: D-3-phosphoglycerate dehydrogenase EC:1.1.1.95 Swiss:P08328, which is inhibited by serine . Aspartokinase EC:2.7.2.4 Swiss:P53553, which is regulated by lysine. Acetolactate synthase small regulatory subunit Swiss:P00894, which is inhibited by valine. Phenylalanine-4-hydroxylase EC:1.14.16.1 Swiss:P00439, which is regulated by phenylalanine. Prephenate dehydrogenase EC:4.2.1.51 Swiss:P21203. formyltetrahydrofolate deformylase EC:3.5.1.10, Swiss:P37051, which is activated by methionine and inhibited by glycine. GTP pyrophosphokinase EC:2.7.6.5 Swiss:P11585.

+ +

+
+

+ CL0070 +

These domains are involved in binding to amino-acids and causing allosteric regulation of linked enzyme domains . The relationship between these two families was first noticed in .

+ +

+
+
+
PROKKA_00016
+
+
+
 
+MSDISRVKILSALMDGRAWTATELSSVANISASTASSHLSKLLDCQLITVVAQGKHRYFRLAGKDIAELMESMMGISLNHGVHARVSTPV
+HLRKARTCYDHLAGEVAVKIYDSLCQQQWITENGSMITLSGIQYFHEMGIDVPSKHSRKICCACLDWSERRFHLGGYVGAALFSLYESKG
+WLTRHLGYREVTITEKGYAAFKTHFHI
+
+ 
+
+

+ PF12840 +

Helix-turn-helix domain
This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins..

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00017
+
+
+
 
+MSRLDKSKVINSALELLNEVGIEGLTTRKLAQKLGVEQPTLYWHVKNKRALLDALAIEMLDRHHTHFCPLEGESWQDFLRNNAKSFRCAL
+LSHRDGAKVHLGTRPTEKQYETLENQLAFLCQQGFSLENALYALSAVGHFTLGCVLEDQEHQVAKEERETPTTDSMPPLLRQAIELFDHQ
+GAEPAFLFGLELIICGLEKQLKCESGS
+
+ 
+
+

+ PF00440 +

tetR;
Bacterial regulatory proteins, tetR family.

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+

+ PF02909 +

tetR_C;
Tetracyclin repressor, C-terminal all-alpha domain.

+ +

+
+

+ CL0174 +

TetR protein, C-terminal domain-like This clan features families of transcriptional regulators for multidrug efflux pumps, which belong to the TetR superfamily. They are induced by the presence of a variety of factors, such as antibiotics or organic solvents. The C-terminal region featured in these families is thought to contain the inducer-binding site; the divergent sequences in this region allow for the binding of a variety of different inducers [1-4].

+ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
PROKKA_00001
+
+
+
 
+MENNLENLTIGVFAKAAGVNVETIRFYQRKGLLREPDKPYGSIRRYGEADVVRVKFVKSAQ
RLGFSLDEIAELLRLDDGTHCEEASSLAEHKLKDVREKMADLARMETVLSELVCACHARK
GNVSCPLIASLQGEAGLARSAMP* +
+
+

+ PF00376merR;
MerR family regulatory protein Prosite & Pfam-B_3021 (Release 7.5) + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+

+ PF09278MerR, DNA binding
Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold . + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00002
+
+
+
 
+MSEPQNGRGALFAGGLAAILASTCCLGPLVLVALGFSGAWIGNLTVLEPYRPLFIGAALVA
LFFAWKRIYRPVQACKPGEVCAIPQVRATYKLIFWIVAVLVLVALGFPYVVPFFY* +
+
+

+ PF02411MerT mercuric transport protein
MerT mercuric transport protein MerT is an mercuric transport integral membrane protein and is responsible for transport of the Hg2+ iron from periplasmic MerP (also part of the transport system) to mercuric reductase (MerE). + +

+
+
+
PROKKA_00003
+
+
+
 
+MKKLFASLALAAAVAPVWAATQTVTLAVPGMTCAACPITVKKALSKVEGVSKVDVGFEKRE
AVVTFDDTKASVQKLTKATADAGYPSSVKQ* +
+
+

+ PF00403Heavy-metal-associated domain
+ +

+
+
+
PROKKA_00004
+
+
+
 
+MGLMTRIADKTGALGSVVSAMGCAACFPALASFGAAIGLGFLSQYEGLFISRLLPLFAALA
FLANALGWFSHRQWLRSLLGMIGPAIVFAATVWLLGNWWTANLMYVGLALMIGVSIWDFV
SPAHRRCGPDGCELPAKRL* +
+
+

+ PF03203MerC mercury resistance protein
MerC mercury resistance protein + +

+
+
+
PROKKA_00005
+
+
+
 
+MSTLKITGMTCDSCAVHVKDALEKVPGVQSADVSYAKGSAKLAIEVGTSPDALTAAVAGLG
YRATLADAPSVSTPGGLLDKMRDLLGRNDKTGSSGALHIAVIGSGGAAMAAALKAVEQGA
RVTLIERGTIGGTCVNVGCVPSKIMIRAAHIAHLRRESPFDGGIAATTPTIQRTALLAQQ
QARVDELRHAKYEGILEGNPAITVLHGSARFKDNRNLIVQLNDGGERVVAFDRCLIATGA
SPAVPPIPGLKDTPYWTSTEALVSETIPKRLAVIGSSVVALELAQAFARLGAKVTILARS
TLFFREDPAIGEAVTAAFRMEGIEVREHTQASQVAYINGEGDGEFVLTTAHGELRADKLL
VATGRAPNTRKLALDATGVTLTPQGAIVIDPGMRTSVEHIYAAGDCTDQPQFVYVAAAAG
TRAAINMTGGDAALNLTAMPAVVFTDPQVATVGYSEAEAHHDGIKTDSRTLTLDNVPRAL
ANFDTRGFIKLVVEEGSGRLIGVQAVAPEAGELIQTAALAIRNRMTVQELADQLFPYLTM
VEGLKLAAQTFNKDVKQLSCCAG* +
+
+

+ PF00403Heavy-metal-associated domain
+ +

+
+

+ PF07992Pyridine nucleotide-disulphide oxidoreductase
Pyridine nucleotide-disulphide oxidoreductase This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. + +

+
+

+ CL0063 FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD. + +

+
+

+ PF00070pyr_redox;
Pyridine nucleotide-disulphide oxidoreductase Sonnhammer ELL, Griffiths-Jones SR This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. + +

+
+

+ CL0063 FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD. + +

+
+

+ PF02852pyr_redox_dim;
Pyridine nucleotide-disulphide oxidoreductase, dimerisation domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. + +

+
+
+
PROKKA_00006
+
+
+
 
+MSAYTVSQLAHNAGVSVHIVRDYLVRGLLRPVACTTGGYGVFDDAALQRLCFVRAAFEAGI
GLDALARLCRALDAADGAQAAAQLAVLRQLVERRRAALAHLDAQLASMPAERAHEEALP*
+
+
+

+ PF13411MerR HTH family regulatory protein
MerR HTH family regulatory protein + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00007
+
+
+
 
+VNAPDKLPPETRQPVSGYLWGALAVLTCPCHLPILAAVLAGTTAGAFLGEHWGVAALALTG
LFVLAVTRLLRAFRGGS* +
+
+

+ PF05052MerE protein
The prokaryotic MerE (or URF-1) protein is part of the mercury resistance operon. The protein is thought not to have any direct role in conferring mercury resistance to the organism but may be a mercury resistance transposon [1,2]. + +

+
+
+
PROKKA_00008
+
+
+
 
+MTSSQPAGWTAAELAQAAARGQLDLHYQPLVDLRDHRIAGAEALMRWRHPRLGLLPPGQFL
PLAESFGLMPEIGAWVLGEACRQMHKWQGPAWQPFRLAINVSASQVGPTFDDEVKRVLAD
MALPAELLEIELTESVAFGNPALFASFDALRAIGVRFAADDFGTGYSCLQHLKCCPITTL
KIDQSFVARLPDDARDQTIVRAVIQLAHGLGMDVIFRRRLHQLIGRNGCCAASS* +
+
+

+ PF00563DUF2;
Alignment kindly provided by SMART This domain is found in diverse bacterial signaling proteins. It is called EAL after its conserved residues. The EAL domain is a good candidate for a diguanylate phosphodiesterase function . The domain contains many conserved acidic residues that could participate in metal binding and might form the phosphodiesterase active site . + +

+
+
+
PROKKA_00009
+
+
+
 
+MATDTPRIPEQGVATLPDEAWERARRRAEIISPLAQSETVGHEAADMAAQALGLSRRQVYV
LIRRARQGSGLVTDLVPGQSGGGKGKGRLPEPVERVIHELLQKRFLTKQKRSLAAFHREV
TQVCKAQKLRVPARNTVALRIASLDPRKVIRRREGQDAARDLQGVGGEPPAVTAPLEQVQ
IDHTVIDLIVVDDRDRQPIGRPYLTLAIDVFTRCVLGMVVTLEAPSAVSVGLCLVHVACD
KRPWLEGLNVEMDWQMSGKPLLLYLDNAAEFKSEALRRGCEQHGIRLDYRPLGQPHYGGI
VERIIGTAMQMIHDELPGTTFSNPDQRGDYDSENKAALTLRELERWLTLAVGTYHGSVHN
GLLQPPAARWAEAVARVGVPAVVTRATSFLVDFLPILRRTLTRTGFVIDHIHYYADGHCC
K* +
+
+

+ PF13518Helix-turn-helix domain
This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding. + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+

+ PF00665Integrase core domain
Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site . + +

+
+

+ CL0219 Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H. + +

+
+
+
PROKKA_00010
+
+
+
 
+MNPFKGRHFQRDIILWAVRWYCKYGISYRELQEMLAERGVNVDHSTIYRWVQRYAPEMEKR
LRWYWRNPSDLCPWHMDETYVKVNGRWAYLYRAVDSRGRTVDFYLSSRRNSKAAYRFLGK
ILNNVKKWQIPRFINTDKAPAYGRALALLKREGRCPSDVEHRQIKYRNNVIECDHGKLKR
IIGATLGFKSMKTAYATIKGIEVMRALRKGQASAFYYGDPLGEMRLVSRVFEM* +
+
+

+ PF13610DDE domain
This DDE domain is found in a wide variety of transposases including those found in IS240, IS26, IS6100 and IS26. + +

+
+

+ CL0219 Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H. + +

+
+
+
PROKKA_00011
+
+
+
 
+MKLRHLDIFYAVMTCGSLTRAAEVLHISQPAASKALKHAEH*
+ 
+
+

+ PF00126Bacterial regulatory helix-turn-helix protein, lysR family
Bacterial regulatory helix-turn-helix protein, lysR family + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00013
+
+
+
 
+MILDASYTLLVACIALLIGMFVVKFTPFLQKNHIPEAVVGGFIVAIVLLIIDKTSGYSFTF
DASLQSLLMLTFFSSIGLSSDFSRLIKGGKPLVLLTIAVTILIAIQNTVGMSMAVMMNES
PFIGLIAGSITLTGGHGNAGAWGPILADKYGVTGAVELAMACATLGLVLGGLVGGPVARH
LLKKVSIPKTTEQERDTIVEAFEQPSVKRKINANNVIETISMLIICIVVGGYISALFKDT
FLQLPTFVWCLFVGIIIRNTLTHVFKHEVFEPTVDVLGSVALSLFLAMALMSLKFGQLAS
MAGPVLIIIAVQTVVMVLFACFVTFKMMGKDYDAVVISAGHCGFGMGATPTAIANMQTVT
KAFGPSHKAFLVVPMVGAFIVDISNSILIKIFIEIGTYFT* +
+
+

+ PF03616Sodium/glutamate symporter
+ +

+
+

+ CL0064 CPA/AT transporter superfamily This Clan contains transporter proteins that belong to the CPA superfamily and AT superfamily according to TCDB . + +

+
+
+
PROKKA_00014
+
+
+
 
+MIAVIFEVQIQPDQQTRYLTLAEELRPLLSHVAGFISIERFQSLATEGKMLSLSWWENEYA
VLQWKNHVLHAKAQQEGRESIFDFYKISIAHITREYSFKKDKDNV* +
+
+

+ PF03992Antibiotic biosynthesis monooxygenase
Antibiotic biosynthesis monooxygenase This domain is found in monooxygenases involved in the biosynthesis of several antibiotics by Streptomyces species. It's occurrence as a repeat in Streptomyces coelicolor SCO1909 (Swiss:Q9X9W3) is suggestive that the other proteins function as multimers. There is also a conserved histidine which is likely to be an active site residue. + +

+
+

+ CL0032 Dimeric alpha/beta barrel superfamily This superfamily of proteins possess a Ferredoxin-like fold. Pairs of these assemble into a beta barrel. The function of this barrel is quite varied and includes Muconolactone isomerase as well as monooxygenases. + +

+
+
+
PROKKA_00015
+
+
+
 
+MFDVHVVLDNQIGQLALLGKTLGNKGIGLEGGGIFTVGDECHAHFLVEQGKEAKIALEQAG
LLVLAIRTPLIRKLKQEKPGELGEIARVLAENNINILVQYSDHANQLILITDNDSMAASV
TLPWAIK* +
+
+

+ PF01842ACT domain
This family of domains generally have a regulatory role. ACT domains are linked to a wide range of metabolic enzymes that are regulated by amino acid concentration. Pairs of ACT domains bind specifically to a particular amino acid leading to regulation of the linked enzyme. The ACT domain is found in: D-3-phosphoglycerate dehydrogenase EC:1.1.1.95 Swiss:P08328, which is inhibited by serine . Aspartokinase EC:2.7.2.4 Swiss:P53553, which is regulated by lysine. Acetolactate synthase small regulatory subunit Swiss:P00894, which is inhibited by valine. Phenylalanine-4-hydroxylase EC:1.14.16.1 Swiss:P00439, which is regulated by phenylalanine. Prephenate dehydrogenase EC:4.2.1.51 Swiss:P21203. formyltetrahydrofolate deformylase EC:3.5.1.10, Swiss:P37051, which is activated by methionine and inhibited by glycine. GTP pyrophosphokinase EC:2.7.6.5 Swiss:P11585 + +

+
+

+ CL0070 These domains are involved in binding to amino-acids and causing allosteric regulation of linked enzyme domains . The relationship between these two families was first noticed in . + +

+
+
+
PROKKA_00016
+
+
+
 
+MSDISRVKILSALMDGRAWTATELSSVANISASTASSHLSKLLDCQLITVVAQGKHRYFRL
AGKDIAELMESMMGISLNHGVHARVSTPVHLRKARTCYDHLAGEVAVKIYDSLCQQQWIT
ENGSMITLSGIQYFHEMGIDVPSKHSRKICCACLDWSERRFHLGGYVGAALFSLYESKGW
LTRHLGYREVTITEKGYAAFKTHFHI* +
+
+

+ PF12840Helix-turn-helix domain
This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins. + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00017
+
+
+
 
+MSRLDKSKVINSALELLNEVGIEGLTTRKLAQKLGVEQPTLYWHVKNKRALLDALAIEMLD
RHHTHFCPLEGESWQDFLRNNAKSFRCALLSHRDGAKVHLGTRPTEKQYETLENQLAFLC
QQGFSLENALYALSAVGHFTLGCVLEDQEHQVAKEERETPTTDSMPPLLRQAIELFDHQG
AEPAFLFGLELIICGLEKQLKCESGS* +
+
+

+ PF00440tetR;
Bacterial regulatory proteins, tetR family + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+

+ PF02909tetR_C;
Tetracyclin repressor, C-terminal all-alpha domain + +

+
+

+ CL0174 TetR protein, C-terminal domain-like This clan features families of transcriptional regulators for multidrug efflux pumps, which belong to the TetR superfamily. They are induced by the presence of a variety of factors, such as antibiotics or organic solvents. The C-terminal region featured in these families is thought to contain the inducer-binding site; the divergent sequences in this region allow for the binding of a variety of different inducers [1-4]. + +

+
+
+ + diff -r 000000000000 -r 68a3648c7d91 pfam_annot/prova2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/prova2 Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,660 @@ + + + + + + +Proteins with PFAM domains: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
PROKKA_00001
+
+
+
 
+MENNLENLTIGVFAKAAGVNVETIRFYQRKGLLREPDKPYGSIRRYGEADVVRVKFVKSAQRLGFSLDEIAELLRLDDGTHCEEASSLAE
+HKLKDVREKMADLARMETVLSELVCACHARKGNVSCPLIASLQGEAGLARSAMP
+
+ 
+
+

+ PF00376 +

merR;
MerR family regulatory protein Prosite & Pfam-B_3021 (Release 7.5)

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+

+ PF09278 +

MerR, DNA binding
Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold .

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00002
+
+
+
 
+MSEPQNGRGALFAGGLAAILASTCCLGPLVLVALGFSGAWIGNLTVLEPYRPLFIGAALVALFFAWKRIYRPVQACKPGEVCAIPQVRAT
+YKLIFWIVAVLVLVALGFPYVVPFFY
+
+ 
+
+

+ PF02411 +

MerT mercuric transport protein
Pfam-B_1796 (release 5.4) MerT is an mercuric transport integral membrane protein and is responsible for transport of the Hg2+ iron from periplasmic MerP (also part of the transport system) to mercuric reductase (MerE).

+ +

+
+
+
PROKKA_00003
+
+
+
 
+MKKLFASLALAAAVAPVWAATQTVTLAVPGMTCAACPITVKKALSKVEGVSKVDVGFEKREAVVTFDDTKASVQKLTKATADAGYPSSVK
+Q
+
+ 
+
+

+ PF00403 +

Heavy-metal-associated domain

+ +

+
+
+
PROKKA_00004
+
+
+
 
+MGLMTRIADKTGALGSVVSAMGCAACFPALASFGAAIGLGFLSQYEGLFISRLLPLFAALAFLANALGWFSHRQWLRSLLGMIGPAIVFA
+ATVWLLGNWWTANLMYVGLALMIGVSIWDFVSPAHRRCGPDGCELPAKRL
+
+ 
+
+

+ PF03203 +

MerC mercury resistance protein
Pfam-B_2720 (release 6.5)

+ +

+
+
+
PROKKA_00005
+
+
+
 
+MSTLKITGMTCDSCAVHVKDALEKVPGVQSADVSYAKGSAKLAIEVGTSPDALTAAVAGLGYRATLADAPSVSTPGGLLDKMRDLLGRND
+KTGSSGALHIAVIGSGGAAMAAALKAVEQGARVTLIERGTIGGTCVNVGCVPSKIMIRAAHIAHLRRESPFDGGIAATTPTIQRTALLAQ
+QQARVDELRHAKYEGILEGNPAITVLHGSARFKDNRNLIVQLNDGGERVVAFDRCLIATGASPAVPPIPGLKDTPYWTSTEALVSETIPK
+RLAVIGSSVVALELAQAFARLGAKVTILARSTLFFREDPAIGEAVTAAFRMEGIEVREHTQASQVAYINGEGDGEFVLTTAHGELRADKL
+LVATGRAPNTRKLALDATGVTLTPQGAIVIDPGMRTSVEHIYAAGDCTDQPQFVYVAAAAGTRAAINMTGGDAALNLTAMPAVVFTDPQV
+ATVGYSEAEAHHDGIKTDSRTLTLDNVPRALANFDTRGFIKLVVEEGSGRLIGVQAVAPEAGELIQTAALAIRNRMTVQELADQLFPYLT
+MVEGLKLAAQTFNKDVKQLSCCAG
+
+ 
+
+

+ PF00403 +

Heavy-metal-associated domain

+ +

+
+

+ PF07992 +

Pyridine nucleotide-disulphide oxidoreductase
This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain.

+ +

+
+

+ CL0063 +

FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD.

+ +

+
+

+ PF00070 +

pyr_redox;
Pyridine nucleotide-disulphide oxidoreductase This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain.

+ +

+
+

+ CL0063 +

FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD.

+ +

+
+

+ PF02852 +

pyr_redox_dim;
Pyridine nucleotide-disulphide oxidoreductase, dimerisation domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases.

+ +

+
+
+
PROKKA_00006
+
+
+
 
+MSAYTVSQLAHNAGVSVHIVRDYLVRGLLRPVACTTGGYGVFDDAALQRLCFVRAAFEAGIGLDALARLCRALDAADGAQAAAQLAVLRQ
+LVERRRAALAHLDAQLASMPAERAHEEALP
+
+ 
+
+

+ PF13411 +

MerR HTH family regulatory protein

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00007
+
+
+
 
+VNAPDKLPPETRQPVSGYLWGALAVLTCPCHLPILAAVLAGTTAGAFLGEHWGVAALALTGLFVLAVTRLLRAFRGGS
+
+ 
+
+

+ PF05052 +

MerE protein
Pfam-B_5840 (release 7.7) The prokaryotic MerE (or URF-1) protein is part of the mercury resistance operon. The protein is thought not to have any direct role in conferring mercury resistance to the organism but may be a mercury resistance transposon [1,2].

+ +

+
+
+
PROKKA_00008
+
+
+
 
+MTSSQPAGWTAAELAQAAARGQLDLHYQPLVDLRDHRIAGAEALMRWRHPRLGLLPPGQFLPLAESFGLMPEIGAWVLGEACRQMHKWQG
+PAWQPFRLAINVSASQVGPTFDDEVKRVLADMALPAELLEIELTESVAFGNPALFASFDALRAIGVRFAADDFGTGYSCLQHLKCCPITT
+LKIDQSFVARLPDDARDQTIVRAVIQLAHGLGMDVIFRRRLHQLIGRNGCCAASS
+
+ 
+
+

+ PF00563 +

DUF2;
Alignment kindly provided by SMART This domain is found in diverse bacterial signaling proteins. It is called EAL after its conserved residues. The EAL domain is a good candidate for a diguanylate phosphodiesterase function . The domain contains many conserved acidic residues that could participate in metal binding and might form the phosphodiesterase active site .

+ +

+
+
+
PROKKA_00009
+
+
+
 
+MATDTPRIPEQGVATLPDEAWERARRRAEIISPLAQSETVGHEAADMAAQALGLSRRQVYVLIRRARQGSGLVTDLVPGQSGGGKGKGRL
+PEPVERVIHELLQKRFLTKQKRSLAAFHREVTQVCKAQKLRVPARNTVALRIASLDPRKVIRRREGQDAARDLQGVGGEPPAVTAPLEQV
+QIDHTVIDLIVVDDRDRQPIGRPYLTLAIDVFTRCVLGMVVTLEAPSAVSVGLCLVHVACDKRPWLEGLNVEMDWQMSGKPLLLYLDNAA
+EFKSEALRRGCEQHGIRLDYRPLGQPHYGGIVERIIGTAMQMIHDELPGTTFSNPDQRGDYDSENKAALTLRELERWLTLAVGTYHGSVH
+NGLLQPPAARWAEAVARVGVPAVVTRATSFLVDFLPILRRTLTRTGFVIDHIHYYADGHCCK
+
+ 
+
+

+ PF13518 +

Helix-turn-helix domain
This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding.

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+

+ PF00665 +

Integrase core domain
Pfam-B_10 (release 2.1) Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site .

+ +

+
+

+ CL0219 +

Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H.

+ +

+
+
+
PROKKA_00010
+
+
+
 
+MNPFKGRHFQRDIILWAVRWYCKYGISYRELQEMLAERGVNVDHSTIYRWVQRYAPEMEKRLRWYWRNPSDLCPWHMDETYVKVNGRWAY
+LYRAVDSRGRTVDFYLSSRRNSKAAYRFLGKILNNVKKWQIPRFINTDKAPAYGRALALLKREGRCPSDVEHRQIKYRNNVIECDHGKLK
+RIIGATLGFKSMKTAYATIKGIEVMRALRKGQASAFYYGDPLGEMRLVSRVFEM
+
+ 
+
+

+ PF13610 +

DDE domain
This DDE domain is found in a wide variety of transposases including those found in IS240, IS26, IS6100 and IS26.

+ +

+
+

+ CL0219 +

Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H.

+ +

+
+
+
PROKKA_00011
+
+
+
 
+MKLRHLDIFYAVMTCGSLTRAAEVLHISQPAASKALKHAEH
+
+ 
+
+

+ PF00126 +

Bacterial regulatory helix-turn-helix protein, lysR family

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00013
+
+
+
 
+MILDASYTLLVACIALLIGMFVVKFTPFLQKNHIPEAVVGGFIVAIVLLIIDKTSGYSFTFDASLQSLLMLTFFSSIGLSSDFSRLIKGG
+KPLVLLTIAVTILIAIQNTVGMSMAVMMNESPFIGLIAGSITLTGGHGNAGAWGPILADKYGVTGAVELAMACATLGLVLGGLVGGPVAR
+HLLKKVSIPKTTEQERDTIVEAFEQPSVKRKINANNVIETISMLIICIVVGGYISALFKDTFLQLPTFVWCLFVGIIIRNTLTHVFKHEV
+FEPTVDVLGSVALSLFLAMALMSLKFGQLASMAGPVLIIIAVQTVVMVLFACFVTFKMMGKDYDAVVISAGHCGFGMGATPTAIANMQTV
+TKAFGPSHKAFLVVPMVGAFIVDISNSILIKIFIEIGTYFT
+
+ 
+
+

+ PF03616 +

Sodium/glutamate symporter
TIGRFAMs, Griffiths-Jones SR

+ +

+
+

+ CL0064 +

CPA/AT transporter superfamily This Clan contains transporter proteins that belong to the CPA superfamily and AT superfamily according to TCDB .

+ +

+
+
+
PROKKA_00014
+
+
+
 
+MIAVIFEVQIQPDQQTRYLTLAEELRPLLSHVAGFISIERFQSLATEGKMLSLSWWENEYAVLQWKNHVLHAKAQQEGRESIFDFYKISI
+AHITREYSFKKDKDNV
+
+ 
+
+

+ PF03992 +

Antibiotic biosynthesis monooxygenase
This domain is found in monooxygenases involved in the biosynthesis of several antibiotics by Streptomyces species. It's occurrence as a repeat in Streptomyces coelicolor SCO1909 (Swiss:Q9X9W3) is suggestive that the other proteins function as multimers. There is also a conserved histidine which is likely to be an active site residue.

+ +

+
+

+ CL0032 +

Dimeric alpha/beta barrel superfamily This superfamily of proteins possess a Ferredoxin-like fold. Pairs of these assemble into a beta barrel. The function of this barrel is quite varied and includes Muconolactone isomerase as well as monooxygenases.

+ +

+
+
+
PROKKA_00015
+
+
+
 
+MFDVHVVLDNQIGQLALLGKTLGNKGIGLEGGGIFTVGDECHAHFLVEQGKEAKIALEQAGLLVLAIRTPLIRKLKQEKPGELGEIARVL
+AENNINILVQYSDHANQLILITDNDSMAASVTLPWAIK
+
+ 
+
+

+ PF01842 +

ACT domain
This family of domains generally have a regulatory role. ACT domains are linked to a wide range of metabolic enzymes that are regulated by amino acid concentration. Pairs of ACT domains bind specifically to a particular amino acid leading to regulation of the linked enzyme. The ACT domain is found in: D-3-phosphoglycerate dehydrogenase EC:1.1.1.95 Swiss:P08328, which is inhibited by serine . Aspartokinase EC:2.7.2.4 Swiss:P53553, which is regulated by lysine. Acetolactate synthase small regulatory subunit Swiss:P00894, which is inhibited by valine. Phenylalanine-4-hydroxylase EC:1.14.16.1 Swiss:P00439, which is regulated by phenylalanine. Prephenate dehydrogenase EC:4.2.1.51 Swiss:P21203. formyltetrahydrofolate deformylase EC:3.5.1.10, Swiss:P37051, which is activated by methionine and inhibited by glycine. GTP pyrophosphokinase EC:2.7.6.5 Swiss:P11585

+ +

+
+

+ CL0070 +

These domains are involved in binding to amino-acids and causing allosteric regulation of linked enzyme domains . The relationship between these two families was first noticed in .

+ +

+
+
+
PROKKA_00016
+
+
+
 
+MSDISRVKILSALMDGRAWTATELSSVANISASTASSHLSKLLDCQLITVVAQGKHRYFRLAGKDIAELMESMMGISLNHGVHARVSTPV
+HLRKARTCYDHLAGEVAVKIYDSLCQQQWITENGSMITLSGIQYFHEMGIDVPSKHSRKICCACLDWSERRFHLGGYVGAALFSLYESKG
+WLTRHLGYREVTITEKGYAAFKTHFHI
+
+ 
+
+

+ PF12840 +

Helix-turn-helix domain
This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins.

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00017
+
+
+
 
+MSRLDKSKVINSALELLNEVGIEGLTTRKLAQKLGVEQPTLYWHVKNKRALLDALAIEMLDRHHTHFCPLEGESWQDFLRNNAKSFRCAL
+LSHRDGAKVHLGTRPTEKQYETLENQLAFLCQQGFSLENALYALSAVGHFTLGCVLEDQEHQVAKEERETPTTDSMPPLLRQAIELFDHQ
+GAEPAFLFGLELIICGLEKQLKCESGS
+
+ 
+
+

+ PF00440 +

tetR;
Bacterial regulatory proteins, tetR family

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+

+ PF02909 +

tetR_C;
Tetracyclin repressor, C-terminal all-alpha domain

+ +

+
+

+ CL0174 +

TetR protein, C-terminal domain-like This clan features families of transcriptional regulators for multidrug efflux pumps, which belong to the TetR superfamily. They are induced by the presence of a variety of factors, such as antibiotics or organic solvents. The C-terminal region featured in these families is thought to contain the inducer-binding site; the divergent sequences in this region allow for the binding of a variety of different inducers [1-4].

+ +

+
+
+ diff -r 000000000000 -r 68a3648c7d91 pfam_annot/script.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/script.js Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,11 @@ +function show(elementID) { + var ele = document.getElementById(elementID); + if (!ele) { + alert("no such element"); + return; + } + var pages = document.getElementsByClassName('page'); + for(var i = 0; i < pages.length; i++) { + pages[i].style.display = 'none'; + } + ele.style.display = 'block'; diff -r 000000000000 -r 68a3648c7d91 pfam_annot/table --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/table Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,51 @@ +# pfam_scan.pl, run at Fri Jun 19 13:56:11 2015 +# +# Copyright (c) 2009 Genome Research Ltd +# Freely distributed under the GNU +# General Public License +# +# Authors: Jaina Mistry (jaina@ebi.ac.uk), +# Rob Finn (rdf@ebi.ac.uk) +# +# This is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later version. +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# query sequence file: /home/inmare/galaxy/database/files/000/dataset_62.dat +# cpu number specified: 2 +# searching against: /home/inmare/galaxy/tools/pfamScan/hmm/Pfam-A.hmm, with cut off --cut_ga +# resolve clan overlaps: on +# predict active sites: off +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# +# + +PROKKA_00001 9 46 9 46 PF00376.18 MerR Family 1 38 38 43.1 2.2e-11 1 CL0123 +PROKKA_00001 51 113 51 113 PF09278.6 MerR-DNA-bind Domain 1 65 65 67.9 7.2e-19 1 CL0123 +PROKKA_00002 1 116 1 116 PF02411.10 MerT Family 1 116 116 214.7 1.7e-64 1 No_clan +PROKKA_00003 25 85 25 86 PF00403.21 HMA Domain 1 61 62 60.3 1.4e-16 1 No_clan +PROKKA_00004 8 121 8 123 PF03203.9 MerC Family 1 114 116 81.1 7.4e-23 1 No_clan +PROKKA_00005 3 62 3 63 PF00403.21 HMA Domain 1 61 62 37.8 1.5e-09 1 No_clan +PROKKA_00005 100 410 99 412 PF07992.9 Pyr_redox_2 Domain 2 199 201 140.8 4.8e-41 1 CL0063 +PROKKA_00005 271 345 271 354 PF00070.22 Pyr_redox Domain 1 74 80 51.6 9.3e-14 1 CL0063 +PROKKA_00005 440 548 440 549 PF02852.17 Pyr_redox_dim Domain 1 109 110 104.5 2.9e-30 1 No_clan +PROKKA_00006 4 72 4 73 PF13411.1 MerR_1 Family 1 68 69 44.0 1.3e-11 1 CL0123 +PROKKA_00007 2 74 1 75 PF05052.7 MerE Family 2 74 75 142.4 3e-42 1 No_clan +PROKKA_00008 13 218 11 219 PF00563.15 EAL Domain 3 209 236 169.6 7.3e-50 1 No_clan +PROKKA_00009 26 78 26 87 PF13518.1 HTH_28 Domain 1 46 52 26.9 3.4e-06 1 CL0123 +PROKKA_00009 173 313 172 313 PF00665.21 rve Domain 2 120 120 85.4 2.9e-24 1 CL0219 +PROKKA_00010 75 212 73 213 PF13610.1 DDE_Tnp_IS240 Domain 4 139 140 170.7 1.6e-50 1 CL0219 +PROKKA_00011 3 40 3 41 PF00126.22 HTH_1 Domain 1 38 60 48.8 4.1e-13 1 CL0123 +PROKKA_00013 2 366 1 366 PF03616.9 Glt_symporter Family 2 368 368 544.8 6.1e-164 1 CL0064 +PROKKA_00014 1 76 1 77 PF03992.11 ABM Domain 1 77 78 48.0 8.8e-13 1 CL0032 +PROKKA_00015 78 98 74 115 PF01842.20 ACT Domain 9 29 66 20.5 0.00024 1 CL0070 +PROKKA_00016 5 55 1 55 PF12840.2 HTH_20 Domain 11 61 61 35.6 5.6e-09 1 CL0123 +PROKKA_00017 10 55 9 55 PF00440.18 TetR_N Domain 2 47 47 53.5 1.3e-14 1 CL0123 +PROKKA_00017 68 201 68 201 PF02909.12 TetR_C Domain 1 139 139 165.1 6.5e-49 1 CL0174 diff -r 000000000000 -r 68a3648c7d91 pfam_search/annota.Filter.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/annota.Filter.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,206 @@ +#!/usr/bin/perl -w + +use strict; +my $d_file="/home/inmare/galaxy/tools/pfam_search/pfamA.txt"; +open(IN,$d_file); +my %decode=(); +my %clan_decode; +my $id=""; +my %c=(); + + +my ($prot_file,$pfam_file,$prefix,@search_T)=@ARGV; +my $searchP=""; +while() +{ + if ($_=~/^\d/) + { + my @vl=(split(/\t+/)); + $decode{$vl[1]}="$vl[3]
";#$vl[8] $vl[9]"; + my $cc=0; + my %repeated=(); + foreach my $v (@vl) + { + $v=~s/\[\d+\]/ /g; + $cc++; + last if $v=~/hmmbuild/; + last if $cc>10; + next if $v=~/anon/; + next if $v=~/Bates/; + next if $v=~/Cogis/; + next if $v=~/Bateman/; + next if $v=~/Sonnhammer/; + next if $v=~/Finn/; + next if $v=~/Studholme/; + next if $v eq $vl[3]; + next if $v=~/Kerrison/; + next if $repeated{$v}; + #next if length($v)>=30 && $cc<=10; + $decode{$vl[1]}.="$v " if length($v)>=20 && $cc<=10; + $repeated{$v}++; + } + } +} +close(IN); + +my $clan_file="/home/inmare/galaxy/tools/pfam_search/clans.txt"; +open(IN,$clan_file); +while() +{ + my @vl=(split(/\t/)); + #$clan_decode{$vl[1]}="$vl[3]"; + my $cc=0; + foreach my $v (@vl) + { + $cc++; + $v=~s/\[\d+\]/ /g; + $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10; + } + +} +close(IN); +open(IN,"$prot_file"); +while() +{ + if ($_=~/^>(.*)/) + { + $id=$1; + $id=(split(/\s+/,$id))[0]; + }else{ + chomp; + $c{$id}.=$_; + } +} +close(IN); + +foreach my $s (@search_T) +{ + $searchP.="$s "; +} + +open(OUT,">$prefix"); +print OUT "\n\n\n"; +my $color="\"#czb9dz\""; +my %printed; +open(IN,$pfam_file); +print OUT "Proteins with PFAM domains matching the keywords:\n

\n"; +print OUT "
\n\n"; +my $ntokens=0; +while() +{ + next if $_=~/^\#/; + my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1]; + next unless $name; + next unless $domain; + $domain=~s/\.\d+//; + my $sd=$decode{$domain} ? $decode{$domain} : "MagnottaPantaleo§§"; + my $sc=$clan_decode{$clan} ? $clan_decode{$clan} : "SciarrattaCalogero@@"; + my $continue=match($searchP,$sd,$sc); + #print "$name $domain $clan $sd $sc\n"; + next unless $continue; + unless ($printed{$name}) + { + my $seq=$c{$name}; + + $seq=~s/\*//g; + $seq=form($seq,90); + print OUT "\n"; + print OUT "\n\n\n\n"; + $ntokens=2; + } + my $hd=uc $domain; + #www.canoro.altervista.org + if ($decode{$domain}) + { + my $ddes=$decode{$domain}; + if ($ntokens % 2==0) + { + print OUT "\n\n\n\n"; + $ntokens++; + } + if ($clan_decode{$clan}) + { + my $clanD=$clan_decode{$clan}; + my $ddes=$decode{$domain}; + if ($ntokens % 2==0) + { + print OUT "\n\n\n\n"; + $ntokens++; + } + $printed{$name}=1; +} + +#print OUT "

Proteins without PFAM domains:\n
\n"; +#foreach my $seq (keys %c) +#{ +# next if $printed{$seq}; +# print OUT "<>$seq\n\n

\n$c{$seq}
\n"; +# print OUT "
\n"; +#} +print OUT "
\n"; + print OUT "
\n"; + print OUT "
$name
\n
\n"; + print OUT "
\n"; + print OUT "
 \n$seq\n 
\n"; + print OUT "
\n"; + }else{ + print OUT "\n"; + } + + print OUT "

\n"; + print OUT " $domain\n

$ddes

\n\n"; + print OUT "

\n
\n"; + }else{ + print OUT "\n"; + } + + print OUT "

\n"; + print OUT " $clan\n

$clanD

\n\n"; + print OUT "

\n
\n
\n\n\n"; +close(OUT); + +sub form +{ + my $string=$_[0]; + my $len=$_[1]; + my $outS=""; + my @vl=split('',$string); + for (my $i=1;$i<=$#vl;$i++) + { + if ($i % $len==0 && $i>0) + { + $outS.="$vl[$i-1]\n"; + }else{ + $outS.=$vl[$i-1]; + } + } + $outS.="\n"; + $outS=~s/ //g; + return $outS; +} + +sub match +{ + my $terms=$_[0]; + my $d1=$_[1]; + my $d2=$_[2]; + $terms=~s/AND/ /g; + my @t1s=(split(/OR/,$terms)); + my $print_out=0; + foreach my $t (@t1s) + { + #print "$t\n"; + last if $print_out==1; #OR non serve se 1 è verificata; + #permute + my @vl=(split(/\s+/,$t)); + my $nm1=0; + my $nm2=0; + foreach my $v (@vl) + { + $nm1++ if ($d1=~/$v/i); + $nm2++ if ($d1=~/$v/i); + } + $print_out=1 if ($nm1==($#vl+1)) || ($nm2==($#vl+1)); + } + return $print_out; +} diff -r 000000000000 -r 68a3648c7d91 pfam_search/clans.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/clans.txt Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,515 @@ +1 CL0001 EGF \N EGF superfamily Finn RD, Bateman A anon Members of this clan all belong to the EGF superfamily. This particular superfamily is characterised as having least 6 cysteines residues.\ \ \ \ These cysteine form disulphide bonds, in the order 1-3, 2-4, 5-6, which are essential for the stability of the EGF fold. These disulphide bonds are stacked in a ladder-like arrangement. The Laminin EGF family is distinguished by having an an additional disulphide bond. The function of the domains within this family remains unclear, but they are though to largely perform a structural role. More often than not, there domains are arranged a tandem repeats in extracellular proteins. 2008-09-03 15:50:29 2004-03-17 16:02:08 26 325 6259 696 88541 1 +3 CL0003 SAM \N Sterile Alpha Motif (SAM) domain Finn RD anon SAM domains are found in a diverse set of proteins, which include scaffolding proteins, transcription regulators, translational regulators tyrosine kinases and serine/threonine kinases [1-3]. SAM domains are found in all eukaryotes and some bacteria [3] . Structures of SAM domains reveal a common five helical structure. The SAM domain is involved in a variety of functions. The most widespread function is in domain-domain interactions. The SAM domain performs domain-domain interactions using multifarious arrangements of the SAM domain. More recently, the SAM domain within the Smaug protein has been demonstrated to bind to the Nanos 3' UTR translation control element (Rfam:RF00161) [3]. This clan currently only represents the diverse SAM domain family and does not contain the more divergent SAM/Pointed family (Pfam:PF02198). 2008-09-03 15:50:29 2004-03-17 16:21:50 20 126 742 467 11010 1 +4 CL0004 Concanavalin \N Concanavalin-like lectin/glucanase superfamily Bateman A anon This superfamily includes a diverse range of carbohydrate binding domains and glycosyl hydrolase enzymes that share a common structure. 2008-09-03 15:50:29 2004-03-17 16:44:11 19 1631 2750 3131 34755 1 +5 CL0005 Kazal \N Kazal like domain Finn RD anon Kazal domains are found in both serine protease inhibitors and extracellular regions of agrins. The structure of the Kazal domain is a small alpha/beta fold. Typically the Kazal domain consists of 2 short-helices and a 3-stranded anti-parallel sheet. The fold is contains several disulphide bonds. 2008-09-03 15:50:29 2004-03-17 17:00:11 26 106 337 450 6552 1 +6 CL0006 C1 \N Protein kinase C, C1 domain Finn RD anon The members of this clan are all variations of the protein kinase C1 domain that is characterised by a rich cysteine and histidine content. The C1 domain is the N-terminal region of conservation found in protein kinase C domains. This domain is involved in binding many ligands, which include diacylglycerol, phorbol esters and zinc [1]. 2008-09-03 15:50:29 2004-03-17 17:47:56 19 30 728 396 10495 1 +7 CL0007 KH \N K-Homology (KH) domain Superfamily Finn RD anon The KH domain is thought to be the second most prevalent RNA binding motif in proteins. The motif is characterised by a conserved GXXXGXXG in the middle of the domain. Structures of KH reveal that the KH domain is arranged as either a beta-alpha-alpha-beta-beta (mini-KH domain) or beta-alpha-alpha-beta-beta-alpha (maxi-KH domain). The secondary elements are separated by at least four loop segments. The second loop is located between beta-1 and al The KH domain can be found either as single or multiple copies. The KH domain usually binds RNA as a multimer. 2008-09-03 15:50:29 2004-03-17 17:58:30 17 312 491 5344 38636 1 +9 CL0009 ENTH_VHS \N ENTH/ANTH/VHS superfamily Bateman A, McMahon H anon This clan includes the related ENTH and ANTH domains as well as the VHS domain. The ENTH domain is approximately 150 residues in length and is a solenoid of alpha-helices. The various ENTH domains have various lipid specificities but the key feature that distinguishes it functionally from ANTH domains is its ability to bend membranes. It does this by folding an additional N-terminal helix on lipid binding. The ANTH domain is approximately 300 residues in length and is a PtdIns(4,5)P2 binding domain. It has no membrane bending properties. The VHS (Vps-27, Hrs and STAM) domain is a 140 residue long domain present in the very NH2-terminus of at least 60 proteins. Based on their functional characteristics and on recent data on the involvement of VHS in cargo recognition in trans-Golgi, VHS domains are considered to have a general membrane targeting/cargo recognition role in vesicular trafficking [5]. 2008-09-03 15:50:29 2004-03-18 10:53:33 19 75 85 345 4028 1 +10 CL0010 SH3 \N Src homology-3 domain Finn RD anon Src homology-3 (SH3) domains are comprised of about 60 amino acids, performing either an assembly or regulatory role.\ For example, SH3 domains in the Grb2 adaptor protein are essential for protein-protein interactions and\ \ signal transduction in the p21 Ras-dependent growth factor signaling pathway. Alternatively, SH3 performs a regulatory role in the Src family of tyrosine kinases. SH3 domains bind a variety of peptide ligands, many of which contain a PxxP motif. This PxxP motif is flanked by different specificity elements [1]. Structures of SH3 domains, both free and ligand complexed, have provided insights into the mechanism of ligand recognition. The SH3 fold consists of two anti-parallel beta sheets that lie at right angles to each other. Within the fold, there are two variable loops, referred to as RT and n-Src loops. When SH3 binds to its ligand, the proline rich ligand adopts a PPII helix conformation, with the PPII helix structure recognised by a pair of grooves on the surface of the SH3 domain that bind turns of the helix. The SH3 grooves are formed by a series of nearly parallel, well-conserved aromatic residues [1]. 2008-09-03 15:50:29 2004-03-18 11:12:55 20 564 2030 4044 40209 1 +11 CL0011 Ig \N Immunoglobulin superfamily Bateman A, Finn RD anon Members of the immunoglobulin superfamily are found in hundreds of proteins of different functions. Examples include antibodies, the giant muscle kinase titin and receptor tyrosine kinases. Immunoglobulin-like domains may be involved in protein-protein and protein-ligand interactions. The superfamily can be divided into discrete structural sets, by the presence or absence of beta-strands in the structure and the length of the domains [1]. Proteins containing domains of the C1 and V-sets are mostly molecules of the vertebrate immune system. Proteins of the C2-set are mainly lymphocyte antigens, this differs from the composition of the C2-set as originally proposed [1]. The I-set is intermediate in structure between the C1 and V-sets and is found widely in cell surface proteins as well as intracellular muscle proteins. 2008-09-03 15:50:29 2004-03-18 16:23:40 25 3370 4292 3474 136715 1 +12 CL0012 Histone \N Histone superfamily Bateman A anon Members of this clan all possess a histone fold. Generally proteins in this clan are DNA binding. 2008-09-03 15:50:29 2004-04-19 14:28:04 17 742 154 9904 23976 1 +13 CL0013 Beta-lactamase \N Serine beta-lactamase-like superfamily Finn RD, Bateman A anon This superfamily contains proteins that have a beta-lactamase fold. This includes beta-lactamases as well as Dala-Dala carboxypeptidases and glutaminases. 2008-09-03 15:50:29 2004-04-19 15:42:28 17 961 303 5474 58501 1 +14 CL0014 Glutaminase_I \N Class-I Glutamine amidotransferase superfamily Bateman A anon Most members of this clan are glutaminase enzymes. This superfamily is shown to be related in [1]. The clan also contains the DJ-1/PfpI family that includes the peptidase PfpI that has a catalytic Cys-His-Glu triad that differs from the class I GAT Cys-His-Glu triad. 2008-09-03 15:50:29 2004-04-28 09:27:01 21 334 418 7216 67224 1 +15 CL0015 MFS \N Major Facilitator Superfamily Bateman A anon The major facilitator superfamily (MFS) is one of the two largest families of membrane transporters found on Earth [1]. It is present ubiquitously in bacteria, archaea, and eukarya and includes members that can function by solute uniport, solute/cation symport, solute/cation antiport and/or solute/solute antiport with inwardly and/or outwardly directed polarity [1]. All permeases of the MFS possess either 12 or 14 transmembrane helices [1]. 2008-09-03 15:50:29 2004-04-30 16:48:27 19 22 846 6356 249360 1 +16 CL0016 PKinase \N Protein kinase superfamily Studholme DJ anon This superfamily includes the Serine/Threonine- and Tyrosine- protein kinases as well as related kinases that act on non-protein substrates. 2008-09-03 15:50:29 2004-06-11 14:28:37 21 3279 6514 9586 173964 1 +18 CL0018 bZIP \N bZIP-like leucine zipper Studholme DJ anon This family of eukaryotic transcription factors contain a basic region adjacent to a leucine zipper. 2008-09-03 15:50:29 2004-06-16 18:30:26 14 321 111 611 8901 1 +20 CL0020 TPR Tetratrico peptide repeat superfamily Studholme DJ anon Tetratricopeptide-like repeats are found in a numerous and diverse proteins involved in such functions as cell cycle regulation, transcriptional control, mitochondrial and peroxisomal protein transport, neurogenesis and protein folding. 2008-09-03 15:50:29 2004-06-21 18:12:39 24 947 20914 6771 404043 1 +21 CL0021 OB \N OB fold Studholme DJ, Bateman A anon The OB (oligonucleotide/oligosaccharide binding) was defined by Murzin [1]. The common part of the OB-fold, has a five-stranded beta-sheet coiled to form a closed beta-barrel. This barrel is capped by an alpha-helix located between the third and fourth strands [1]. 2008-09-03 15:50:29 2004-06-22 18:31:10 17 1592 988 7656 210543 1 +22 CL0022 LRR Leucine Rich Repeat Studholme DJ anon Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains. This Pfam entry contains Leucine Rich Repeats not recognised by the Pfam:PF00560 model. 2008-09-03 15:50:29 2004-06-23 16:13:12 31 250 9511 3145 175606 1 +23 CL0023 P-loop_NTPase AAA; P-loop containing nucleoside triphosphate hydrolase superfamily Studholme DJ anon AAA family proteins often perform chaperone-like functions that assist in the assembly, operation, or disassembly of protein complexes [2]. 2008-09-03 15:50:29 2004-06-23 17:05:20 33 5523 12211 50680 1511292 1 +25 CL0025 His_Kinase_A His Kinase A (phospho-acceptor) domain Studholme DJ anon This is the dimerisation and phospho-acceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536. It is usually found adjacent to a C-terminal ATPase domain (Pfam:PF02518). This domain is found in a wide range of Bacteria and also several Archaea. It comprises one of the fundamental units of the two-component signal transduction system [2-7]. 2008-09-03 15:50:29 2004-06-29 14:19:46 13 497 5781 7648 242300 1 +26 CL0026 CU_oxidase \N Multicopper oxidase-like domain Studholme DJ, Finn RD anon Many of the proteins in this family contain multiple similar copies of this plastocyanin-like domain. 2008-09-03 15:50:29 2004-06-29 16:37:59 19 1015 245 19953 63536 1 +27 CL0027 RdRP \N RNA dependent RNA polymerase Bateman A anon This clan represents the replicative RNA dependent RNA polymerase. from a variety of RNA viruses [1]. 2008-09-03 15:50:29 2004-08-26 14:33:23 14 852 1801 12549 220781 1 +28 CL0028 AB_hydrolase Alpha/Beta hydrolase fold Bateman A anon This catalytic domain is found in a very wide range of enzymes. 2008-09-03 15:50:29 2004-08-29 17:32:06 21 1989 2396 7428 180167 1 +29 CL0029 Cupin Cupin fold Bateman A anon This clan represents the conserved barrel domain of the 'cupin' superfamily [1] ('cupa' is the Latin term for a small barrel). The cupin fold is found in a wide variety of enzymes, but notably contains the non-enzymatic seed storage proteins also. 2008-09-03 15:50:29 2004-09-06 15:03:53 19 945 1162 6529 112082 1 +30 CL0030 Ion_channel \N Ion channel (VIC) superfamily Bateman A anon This superfamily contains a diverse range of ion channels that share a pair of transmembrane helices in common. This clan is classified as the VIC (Voltage-gated Ion Channel) superfamily in TCDB. 2008-09-03 15:50:29 2004-09-08 16:21:26 15 718 809 5250 44250 1 +31 CL0031 Phosphatase \N Phosphatase superfamily Bateman A anon This family includes tyrosine and dual specificity phosphatase enzymes. 2008-09-03 15:50:29 2004-10-26 13:53:48 12 480 558 2966 20482 1 +32 CL0032 Dim_A_B_barrel \N Dimeric alpha/beta barrel superfamily Bateman A anon This superfamily of proteins possess a Ferredoxin-like fold. Pairs of these assemble into a beta barrel. The function of this barrel is quite varied and includes Muconolactone isomerase as well as monooxygenases. 2008-09-03 15:50:29 2004-10-26 16:31:17 14 510 186 4601 39476 1 +33 CL0033 POZ \N POZ domain superfamily Bateman A anon The POZ domain is found in a variety of transcription factors. POZ domains are also found in the tetramerisation domain of voltage gated K+ channels. In general these domains mediate homo-oligomerisation. 2008-09-03 15:50:29 2004-10-27 13:52:11 13 223 1167 1178 26677 1 +34 CL0034 Amidohydrolase \N Amidohydrolase superfamily Bateman A anon This family includes a large family of metal dependent amidohydrolase enzymes [1]. 2008-09-03 15:50:29 2004-10-27 17:19:50 14 704 479 5687 79783 1 +35 CL0035 Peptidase_MH \N Peptidase clan MH/MC/MF Bateman A anon This clan contains peptidases belonging to MEROPS clan MH, MC and MF. We also include Nicastrin that is part of the gamma secretase complex and not known to be a peptidase. 2008-09-03 15:50:29 2004-10-28 13:48:22 15 680 539 5405 63766 1 +36 CL0036 TIM_barrel Common phosphate binding-site TIM barrel superfamily Bateman A anon This large superfamily of TIM barrel enzymes all contain a common phosphate binding site. The phosphate is found in a variety of cofactors and ligands such as FMN [1,2]. 2008-09-03 15:50:29 2004-10-28 15:12:01 23 3964 973 10099 253453 1 +37 CL0037 Lysozyme \N Lysozyme-like superfamily Bateman A anon Barley chitinase, bacterial chitosanase, and lysozymes from phage and animals all hydrolyse related polysaccharides. The proteins little amino-acid similarity, but have a structurally invariant core consisting of two helices and a three-stranded beta-sheet which form the substrate-binding and catalytic cleft [1]. 2008-09-03 15:50:29 2004-10-28 15:30:21 13 1502 527 5612 33680 1 +39 CL0039 HUP PP-loop; PP-ATPase; HUP - HIGH-signature proteins, UspA, and PP-ATPase. Bateman A, Anantharaman V anon The HUP class contains the HIGH-signature proteins, UspA superfamily and the PP-ATPase superfamily [1]. The HIGH superfamily has the HIGH Nucleotidyl transferases and the class I tRNA synthetases both of which have the HIGH and the KMSKS motif [1],[2]. The PP-loop ATPase named after the ATP PyroPhosphatase domain, was initially identified as a conserved amino acid sequence motif in four distinct groups of enzymes that catalyse the hydrolysis of the alpha-beta phosphate bond of ATP, namely GMP synthetases, argininosuccinate synthetases, asparagine synthetases, and ATP sulfurylases [3]. The USPA superfamily contains USPA, ETFP and Photolyases [1] 2008-09-03 15:50:29 2004-10-29 14:36:02 11 1108 838 6582 177746 1 +40 CL0040 tRNA_synt_II \N Class II aminoacyl-tRNA and Biotin synthetases Finn RD anon Aminoacyl-tRNA synthetases are key components of the protein translation machinery that catalyse two basic reactions. First, the activation of amino acids via the formation of aminoacyl adenylates and second, linking the activated amino acid to the cognate tRNAs. The aminoacyl-tRNA synthetases generate AMP as the second end product of this reaction, which differentiates them from the majority of ATP-dependent enzymes that produce ADP. In addition, there is a specific aminoacyl-tRNA synthetases for each of the 20 amino acids and there are two structurally distinct classes of aminoacyl-tRNA synthetases, each\ \ encompassing 10 different specificities. The two classes have alternative modes of aminoacylation: class I aminoacylate the 2'OH of the cognate tRNA; class II aminoacylate 3'OH (with the exception of PheRS). Each class contain a conserved core domain that is involved in ATP binding and hydrolysis and combines with additional domains that determine the specificity of interactions with\ the cognate amino acid and tRNA. The class II core domain consist of a mixed-beta sheet, similar to that found in the biotin synthetases, hence why this family has also been included in this clan. The core domain contains three modestly conserved motifs that are responsible for ATP binding. The class II aminoacyl-tRNA synthetases can contain additional nested domains, found inserted in the loops of the core domain [1] (and reference therein). 2008-09-03 15:50:29 2004-11-08 11:24:57 16 514 340 5927 72316 1 +41 CL0041 Death \N Death Domain Superfamily Finn RD anon The death domain superfamily is composed of three families: the death domain (DD); the death effector domain (DED) and the caspase recruitment domain (CARD). All of the members perform a pivotal role in signalling events that regulate apoptosis. Protein-protein interactions are mediated by self-self associations, in which CARD-CARD, DD-DD and DED-DED contacts are formed exclusively The three families possess remarkably similar structures, each comprising an antiparallel six helical bundle in the Greek Key topology. Structurally, the DD and CARD families are the most dissimilar. The former is comprised of two perpendicular three-helix bundles, whereas the latter CARD domain contains six helices that are almost parallel with each other. Interestingly, the interactions in CARD or DD containing heterodimers are quite different [1]. 2008-09-03 15:50:29 2004-11-11 10:28:31 12 135 742 249 6544 1 +42 CL0042 Flavoprotein \N Flavoprotein Finn RD anon Members of this clan are FMN or FAD-binding redox proteins. Flavoproteins act in various electron-transport systems as functional analogues of ferredoxin. They are characterised by an open twisted alpha/beta structure consisting of five parallel beta-sheets connected by alpha-helices which surround the sheet. 2008-09-03 15:50:29 2004-11-12 15:13:12 11 497 221 5019 35666 1 +43 CL0043 Chelatase \N Chelatase Superfamily Finn RD anon Metallated tetrapyrroles are used as prosthetic groups in proteins involved in biologically important processes such as photosynthesis, oxygen transport, drug metabolism and nitric oxide synthesis. In living organisms, metallation is catalysed by a group of enzymes called chelatases. This clan contains ferrochelatase (heme) and cobalt chelatase [1]. 2008-09-03 15:50:29 2004-11-12 16:49:49 11 77 45 4178 7625 1 +44 CL0044 Ferritin Ferritin-like Superfamily Finn RD anon The members of this clan all share a distinctive four helical bundle. The four helices are arranged antiparallel with a left-handed twist. This helical bundle is distinguished from others by the long connection between the second and third helices. Some of the members contain a Fe or Mn dimer at the centre of the helical bundle. The ferritin fold was first described by Murzin AG and Chothia C, Cur Opin Struc Biol 1992, 2:895-903. 2008-09-03 15:50:29 2004-11-12 17:32:27 13 2058 121 6054 34310 1 +45 CL0045 Rubredoxin \N Rubredoxin-like Finn RD anon The Rubredoxin clan is comprised of three families:Rubredoxin, COX5B and desulforedoxin.Rubredoxin domains are small domains (5-6 kDa) and bind one iron atom tetrahedrally bound by four cysteine residues.Similar, desulforedoxin domains are small (4 kDa), but usually form homodimers. Each monomer binds one iron atom, but in a distorted tetrahedral arrangement. COX5B domains are membrane-anchored rubredoxin-like domains. The domain in the Rubredoxin clan are usually comprised of 2 alpha helixes and 2-3 beta strands. 2008-09-03 15:50:29 2004-11-15 11:37:59 11 172 52 2672 4680 1 +46 CL0046 Thiolase \N Thiolase-like Superfamily Finn RD anon Thiolases are ubiquitous and form a large superfamily. Thiolases can function either degradatively, in the beta-oxidation pathway of fatty acids, or biosynthetically. Biosynthetic thiolases catalyse the formation of acetoacetyl-CoA from two molecules of acetyl-CoA . This is one of the fundamental categories of carbon skeletal assembly patterns in biological systems and is the first step in a wide range of biosynthetic pathways [1]. Thiolase are usually dimeric or tetrameric enzymes. Within each monomer there are two similar domains related by pseudo dyad. The N-terminal of these two domains contains a large insertion of about 100 amino acids. 2008-09-03 15:50:29 2004-11-15 12:47:24 15 575 1810 7516 102610 1 +47 CL0047 CuAO_N2_N3 \N Copper amine oxidase, domains 1 and 2 Finn RD anon Copper amine oxidase (CuAO) are comprised of three of four domains. In the case of the four domain CuAO, the N-terminal domain (termed N1, and is not present in the three domain CuAO) and the C-terminal catalytic domain sandwich two repeated domains (termed N2 and N3). The function of these two homologous domains is uncertain. N2 and N3 both have a cystatin-like fold [1]. 2008-09-03 15:50:29 2004-11-15 13:04:24 11 186 31 504 1887 1 +48 CL0048 LolA_LolB \N Lipoprotein localisation factors LolA/B Finn RD anon Gram-negative bacteria lipoproteins are anchored to the periplasmic surface of the inner or outer membrane depending on the sorting signal, which is the residue at position 2 of the polypeptide. Five Lol proteins are involved in the sorting and membrane localisation of lipoprotein. An ATP-binding cassette (ABC) transporter, LolCDE, releases outer membrane-specific lipoproteins from the inner membrane, causing the formation of a complex between the released lipoproteins and the periplasmic molecular chaperone LolA. When this complex interacts with outer membrane receptor LolB, the lipoproteins are transferred from LolA to LolB and then localised to the outer membrane. The structures of LolA and LolB are remarkably similar to each other. Both have a hydrophobic cavity consisting of an unclosed beta-barrel and an alpha-helical lid [1,2]. 2008-09-03 15:50:29 2004-11-15 13:13:26 11 10 5 2158 3976 1 +49 CL0049 Tudor \N Tudor domain 'Royal family' Finn RD, Bateman A anon This clan covers the Tudor domain 'royal family' [1]. This includes chromo, MBT, PWWP and tudor domains. The chromo domain is a comprised of approximately 50 amino acid residues. There are usually one to three Chromo domains found in a single protein. In some chromo domain containing proteins, a second related chromo domain has been found and is referred to as the Chromo-shadow domain. The structure of the Chromo and Chromo-shadow domains reveal an OB-fold, a fold found in a variety of prokaryotic and eukaryotic nucleic acid binding proteins.\ More specifically,the chromo-domain structure reveals a three beta strands that are packed against an alpha helix. Interestingly, a similar structure is found in the archaeal chromatin proteins (7kDa DNA-binding domain). These are sequence neutral DNA binding proteins.\ The DNA binding in these archaeal proteins is mediated through the triple stranded beta sheet. These archaeal domains are though to represent an ancestral chromo domain. Homologs of the chromo domain have been found in fission yeast, ciliated protozoa and all animal species, but appear to be absent in eubacteria, budding yeast and plants [2]. The precise function of the chromo domain is unclear, but the chromo domain is thought to act as a targeting module for chromosomal proteins, although the chromosomal contexts and functional contexts being targeted vary. In all cases studies, the chromo domains are found in proteins that are involved in transcription regulation, positive and negative [2]. 2008-09-03 15:50:29 2004-11-16 16:02:09 14 325 682 504 17304 1 +50 CL0050 HotDog \N HotDog superfamily Bateman A anon The HotDog fold was first observed in the structure of Escherichia coli beta-hydroxydecanoyl thiol ester dehydratase (FabA), where Leesong et al. noticed that each subunit of this dimeric enzyme contained a mixed alpha + beta 'hot dog' fold. They described the seven-stranded antiparallel beta-sheet as the 'bun', which wraps around a five-turn alpha-helical 'sausage', This superfamily contains a diverse range of enzymes. Membership includes numerous prokaryotic, archaeal and eukaryotic proteins involved in several related, but distinct, catalytic activities, from metabolic roles such as thioester hydrolysis in fatty acid metabolism, to degradation of phenylacetic acid and the environmental pollutant 4-chlorobenzoate. The superfamily also includes FapR, a non-catalytic bacterial homologue that is involved in transcriptional regulation of fatty acid biosynthesis [1]. 2008-09-03 15:50:29 2004-11-19 13:30:09 11 673 1411 5349 52323 1 +51 CL0051 NTF2 NTF2-like superfamily Bateman A anon This superfamily contains a variety of enzymes such as Scytalone dehydratase, Delta-5-3-ketosteroid isomerase, Limonene-1,2-epoxide hydrolase among others. The family also includes presumed non-enzymatic homologues such as NTF2. 2008-09-03 15:50:29 2004-11-19 15:35:00 13 603 332 4350 23892 1 +52 CL0052 NTN \N NTN hydrolase superfamily Bateman A anon In the N-terminal nucleophile aminohydrolases (Ntn hydrolases) the N-terminal residue provides two catalytic groups, nucleophile and proton donor. These enzymes use the side chain of the amino-terminal residue, incorporated in a beta-sheet, as the nucleophile in the catalytic attack at the carbonyl carbon. The nucleophile is cysteine in GAT, serine in penicillin acylase, and threonine in the proteasome. All the enzymes share an unusual fold in which the nucleophile and other catalytic groups occupy equivalent sites. This fold provides both the capacity for nucleophilic attack and the possibility of autocatalytic processing [1]. 2008-09-03 15:50:29 2004-11-19 17:25:18 17 2263 255 5468 47927 1 +53 CL0053 4H_Cytokine 4-helical cytokine superfamily Bateman A anon Cytokines are regulatory peptides that can be produced by various cells for communicating and orchestrating the large multicellular system. Cytokines are key mediators of hematopoiesis, immunity, allergy, inflammation, tissue remodeling, angiogenesis, and embryonic development [2]. This superfamily includes both the long and short chain helical cytokines. 2008-09-03 15:50:29 2004-11-21 12:08:45 14 272 33 865 5160 1 +54 CL0054 Knottin_1 \N Scorpion toxin-like knottin superfamily Bateman A anon This clan includes a number of toxin families that share the knottin structure. These families come from scorpions, plants and arthropods. 2008-09-03 15:50:29 2004-11-22 17:39:28 12 164 12 369 2025 1 +55 CL0055 Viral_ssRNA_CP \N Positive stranded ssRNA viruses coat protein Finn RD anon The clan contains a set of viral coat protein families and peptidase A6. The only known peptidase activity is an autolytic cleavage releasing a 44-residue C-terminal fragment. The reaction is very slow and only occurs within the assembled virion. There is debate whether this is actually a true peptidase. The virion with these coat or capsid\ proteins are icosahedral viruses containing sixty triangular coat protein units, each unit consisting of three proteins. The coat protein consists of two subdomains, an eight-stranded beta-barrel on the surface and a three-helix bundle on the inner face. 2008-09-03 15:50:29 2004-11-24 13:16:32 12 790 144 9475 53300 1 +56 CL0056 C_Lectin \N C-type lectin-like superfamily Bateman A anon This clan contains domains that have a C-type lectin fold. Many of these are known or expected to mediate interactions with sugars. 2008-09-03 15:50:29 2004-11-24 13:34:58 11 581 825 748 15749 1 +57 CL0057 Met_repress MetJ/Arc repressor superfamily Bateman A anon This superfamily contains the MetJ and Arc repressors that feature a ribbon-helix-helix DNA-binding motif with the beta-ribbon located in and recognising the major groove of operator DNA [1]. 2008-09-03 15:50:29 2004-11-24 15:10:49 13 185 88 3932 18127 1 +58 CL0058 Glyco_hydro_tim Tim barrel glycosyl hydrolase superfamily Bateman A anon This large superfamily contains a range of glycosyl hydrolase enzymes that possess a TIM barrel fold. This CLAN merges clans GH-A, GH-D, GH-H and GH-K from CAZy. 2008-09-03 15:50:29 2004-11-25 11:37:58 15 2459 2481 7032 114169 1 +59 CL0059 6_Hairpin Six-hairpin glycosidase superfamily Bateman A anon This Clan includes CAZy clans GH-L, GH-M and GH-G. The members of this clan share a common structure composed of 6 helical hairpins. Most members of this superfamily are glycosyl hydrolase enzymes. 2008-09-03 15:50:29 2004-11-26 15:47:32 14 530 867 4822 41246 1 +60 CL0060 DNA_clamp \N DNA clamp superfamily Bateman A anon Sliding DNA clamps are ring-shaped proteins that allow DNA polymerase to achieve high processivity during chromosome replication by tethering the polymerase catalytic subunit to DNA. All of the structures share a 12-fold symmetry around the ring consisting of a simple structural repeat, though there is structural divergence in some of the repeats. Bacterial beta-clamps contain six repeats per subunit with two subunits per ring while the eukaryotic and bacteriophage clamps contain four repeats per subunit with three subunits per ring. Pairs of these repeats form a domain, which has been termed the 'processivity fold'; thus the ring of the sliding clamp contains six domains and therefore is often described as having 6-fold symmetry. A structural representative of a fourth family of processivity fold proteins, namely the herpes simplex virus UL42 protein, is also available. UL42 does not form a ring-shaped clamp, however, but rather functions as a monomer and interacts with DNA quite differently than do sliding clamps; it has been suggested that UL42 resembles a primitive ancestor of sliding clamps [2]. 2008-09-03 15:50:29 2004-11-26 17:13:41 11 241 64 5243 18116 1 +61 CL0061 PLP_aminotran \N PLP dependent aminotransferase superfamily Bateman A anon This superfamily contains a variety of PLP-dependent enzymes. 2008-09-03 15:50:29 2004-11-29 11:27:35 12 1560 574 7712 152775 1 +62 CL0062 APC APC superfamily Bateman A anon This large superfamily contains a variety of transporters including amino acid permeases that according to TCDB belong to the APC (Amino acid-Polyamine-organoCation) superfamily. 2008-09-03 15:50:29 2004-11-29 18:13:15 12 36 417 5587 126928 1 +63 CL0063 NADP_Rossmann FAD/NAD(P)-binding Rossmann fold Superfamily Finn RD anon A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site [1]. In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD. 2008-09-03 15:50:29 2004-11-30 13:45:28 24 8681 8330 18998 984446 1 +64 CL0064 CPA_AT \N CPA/AT transporter superfamily Bateman A anon This Clan contains transporter proteins that belong to the CPA superfamily and AT superfamily according to TCDB [1]. 2008-09-03 15:50:29 2004-12-02 10:12:55 11 10 163 5252 48799 1 +65 CL0065 Cyclin \N Cyclin-like superfamily Bateman A anon This Clan contains cyclins, Transcription factor IIB (TFIIB), and the Retinoblastoma tumour suppressor proteins. These were predicted to be related by sequence [1]. 2008-09-03 15:50:29 2004-12-02 13:15:02 14 236 144 804 14080 1 +66 CL0066 Trefoil \N Beta-trefoil superfamily Bateman A anon This family corresponds to a large set of related beta-trefoil proteins [1]. The beta-trefoil is formed by six two-stranded hairpins [2]. Three of these form a barrel structure and the other three are in a triangular array that caps the barrel. The arrangement of the secondary structures gives the molecules a pseudo 3-fold axis. 2008-09-03 15:50:29 2004-12-06 15:34:20 13 659 720 1705 12929 1 +67 CL0067 SIS \N SIS domain fold Bateman A anon This catalytic domain catalyses isomerisation reactions of a variety of sugars [1]. 2008-09-03 15:50:29 2004-12-06 16:11:44 12 275 84 6149 42474 1 +68 CL0068 RIIa \N RIIa-like fold Bateman A anon This clan includes both the RIIa dimerisation motif as well as the Dpy-30-like motif [1]. 2008-09-03 15:50:29 2004-12-07 15:58:47 11 37 67 303 1405 1 +69 CL0069 GFP \N GFP-like superfamily Bateman A anon This superfamily has an unusual fold of an 11 stranded beta barrel enclosing an alpha-helix. This superfamily includes green fluorescent protein as well as a domain from nidogen. 2008-09-03 15:50:29 2004-12-08 17:25:21 11 623 171 194 602 1 +70 CL0070 ACT \N ACT-like domain Bateman A anon These domains are involved in binding to amino-acids and causing allosteric regulation of linked enzyme domains [1]. The relationship between these two families was first noticed in [2]. 2008-09-03 15:50:29 2004-12-08 17:26:00 12 241 221 4906 47783 1 +71 CL0071 His_phosphatase PGM; Histidine phosphatase superfamily Finn RD, Rigden DJ anon The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue.\ Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches [1]. 2008-09-03 15:50:29 2004-12-09 14:54:21 11 292 168 4773 26470 1 +72 CL0072 Ubiquitin \N Ubiquitin superfamily Bateman A anon This family includes proteins that share the ubiquitin fold. It currently unites four SCOP superfamilies. 2008-09-03 15:50:29 2004-12-09 18:04:44 19 1053 1522 6169 107825 1 +73 CL0073 P53-like \N Beta-sandwich DNA-binding domain Bateman A anon This clan contains a variety of DNA-binding domains that contain an immunoglobulin-like fold. It includes the DNA-binding domains of NF-kappaB, NFAT, p53, STAT-1, the T-domain and the Runt domain [1]. 2008-09-03 15:50:29 2004-12-10 09:44:45 12 340 136 553 5385 1 +74 CL0074 Matrix \N Retroviral matrix superfamily Bateman A anon This clan brings together matrix proteins from a variety of retroviruses. 2008-09-03 15:50:29 2004-12-10 10:07:15 12 41 88 280 38433 1 +75 CL0075 Defensin \N Defensin/myotoxin-like superfamily Bateman A anon This clan includes diverse defensins as well as myotoxins. 2008-09-03 15:50:29 2004-12-10 13:40:46 12 146 13 137 1375 1 +76 CL0076 FAD_Lum_binding \N Riboflavin synthase/Ferredoxin reductase FAD binding domain Finn RD anon Riboflavin nucleotide coenzymes and flavin adenine dinucleotide (FAD) are essential cofactors for a large number of flavoproteins involved in a diverse set of redox reactions. There are thought to be four different FAD-binding folds [1].The FAD-binding fold of this clan is a cylindrical beta-fold. More specifically, the domain forms a flattened six-stranded antiparallel beta-barrel organised into two orthogonal sheets (1-2-5 and 4-3-6) separated by one alpha-helix. The cylinder is open between strands strand 4 and 5. This opening of the cylinder makes space for the isoalloxazine and ribityl moieties of the FAD, to which hydrogen bonds are formed from the open edges of the strands. The other end of the cylinder is covered by the only helix of the domain, which is essential for the binding of the pyrophosphate groups of the FAD [1].The structural differences in the FAD-binding domain are manifested mainly as loops of different length and extra extending structural elements, which may be important for interactions with their redox partners [1]. The structural core of all clan members is highly conserved. 2008-09-03 15:50:29 2004-12-10 14:19:14 11 202 346 5130 31559 1 +77 CL0077 FAD_PCMH \N PCMH-like FAD binding Finn RD anon The FAD-binding domains contained in this family fall within the PCMH (p-cresol methyl-hydroxylase) family of FAD binding proteins as defined in [1]. In this family, the structure of the FAD binding domain is comprised of two subdomains. Both of these subdomains have an alpha-beta fold. The first subdomain is comprised of three parallel beta strands, surrounded by alpha helices. The second subdomain contains five antiparallel beta strands, also surrounded by alpha helices. The junction between these two subdomains forms the FAD bind pocket, where the ligand is bound by hydrogen and van der Waals bonds [1]. 2008-09-03 15:50:29 2004-12-10 16:27:25 11 247 229 5425 25538 1 +78 CL0078 DNA_ligase \N DNA/RNA ligase superfamily Bateman A anon This superfamily contains both ATP-dependent and NAD dependent DNA ligase enzymes. The family also includes mRNA capping enzymes. The members of this clan were shown to be related by sequence in [1]. 2008-09-03 15:50:29 2004-12-10 18:04:20 12 72 170 5269 10191 1 +79 CL0079 Cystine-knot \N Cystine-knot cytokine superfamily Bateman A anon The cytokine families in this clan have the cystine-knot fold. In this 6 cysteines form three disulphide bridges that are interlinked. 2008-09-03 15:50:29 2004-12-14 17:03:09 12 253 73 2754 10562 1 +80 CL0080 Mss4-like \N Mss4-like superfamily Bateman A anon This clan contains TCTP, Mss4 and SelR families [1]. 2008-09-03 15:50:29 2004-12-14 17:24:43 11 51 31 4242 6196 1 +81 CL0081 MBD-like \N MBD-like DNA-binding domain Bateman A anon This clan contains proteins with a distinctive three stranded DNA-binding domain [1]. 2008-09-03 15:50:29 2004-12-14 17:44:54 12 24 149 2355 10578 1 +82 CL0082 MIF \N Tautomerase/MIF superfamily Bateman A anon This clan groups 5-(carboxymethyl)-2-hydroxymuconate isomerase (CHMI) and 4-oxalocrotonate tautomerase (4-OT) with macrophage inhibitory factor (MIF). Interestingly they all share an amino-terminal proline. Members of this clan for homotrimers [1]. 2008-09-03 15:50:29 2004-12-15 11:44:36 11 347 29 3218 5290 1 +83 CL0083 Omega_toxin Omega toxin-like Finn RD anon This clan contains a set of related small protein toxins and what appears to be the functionally distinct Albumin I domain. All members of this clan have a knottin-like fold. Additional information about this clan may be found from [1]. 2008-09-03 15:50:29 2004-12-16 13:53:46 14 83 23 290 1853 1 +84 CL0084 ADP-ribosyl \N ADP-ribosylation Superfamily Finn RD anon The members of this clan all represent ADP-ribosylating catalytic domains. The structurally conserved regions are located at the NAD binding region [1]. According to SCOP, the ADP-ribosylation domain is thought to have an "unusual fold". 2008-09-03 15:50:29 2004-12-16 14:31:35 12 208 320 842 3271 1 +85 CL0085 FAD_DHS \N DHS-like NAD/FAD-binding domain Finn RD anon The members of this family adopt a Rossmann fold, similar to CLAN:CL0063. However, the members of this family are distinguished in that the FAD/NAD cofactor is bound in the opposite direction. In this arrangement, the adenosine moiety is found bound at the second half of the fold.\ In addition, the conserved GxGxxG motif found in classical NADP binding Rossmann folds is absent. Finally, another distinguishing characteristic is the formation of an internal hydrogen bond in the FAD molecule [1]. 2008-09-03 15:50:29 2004-12-16 15:32:59 13 399 174 5116 30822 1 +86 CL0086 FAD_oxidored \N FAD-linked oxidoreductase Finn RD anon The members of this clan adopt a TIM barrel fold, which is reminiscent of flavin mononucleotide binding proteins, rather than one similar to other flavin adenine dinucleotide binding domains. However, the way the FAD cofactor binds in quite different compared to the binding of FMN in the TIM-barrel structures [1]. 2008-09-03 15:50:29 2004-12-16 16:43:38 11 60 30 4179 7837 1 +87 CL0087 Acyl-CoA_dh \N Acyl-CoA dehydrogenase, C-terminal domain-like Finn RD anon The Acyl-CoA dehydrogenase FAD binding domain forms an mostly alpha helical domain, comprised of four helices\ arranged in up-and-down bundle. In Acyl-CoA oxidase II this domain appears to have been duplicated. 2008-09-03 15:50:29 2004-12-17 10:15:37 12 254 148 3538 31652 1 +88 CL0088 Alk_phosphatase \N Alkaline phosphatase-like Finn RD anon The members of this clan all share a common structure of their catalytic domains, which contain conserved metal binding residues [1]. 2008-09-03 15:50:29 2004-12-17 11:25:45 15 217 321 5028 38421 1 +89 CL0089 GlnB-like \N GlnB-like superfamily Finn RD anon The members of this clan are characterised by the fact the domains, each comprised of four beta-strand and two alpha helices, tend to form tetrameric structures [1]. 2008-09-03 15:50:29 2004-12-17 12:10:20 12 264 47 4170 11982 1 +90 CL0090 Globin \N Globin-like Finn RD anon The globin fold is an evolutionary conserved six helical fold that is found in bacteria and eukaryotes. 2008-09-03 15:50:29 2004-12-17 12:17:06 11 2247 121 4808 13282 1 +91 CL0091 NAD_Ferredoxin \N Ferredoxin / Ferric reductase-like NAD binding Finn RD anon The Ferredoxin / Ferric reductase-like NAD binding domain is adopts a Rossmann like fold. However, these families have been excluded from the classical NAD(P) binding Rossmann clan (CLAN:CL0063), due to a divergence of the GxGxxG motif. In this clan, the motif phosphate binding motif is G-T/S-G-A/I-P. The changes in the motif are a reflection of the different way that the NAD(P)H is bound by this fold and the classical Rossmann fold [1,2]. 2008-09-03 15:50:29 2004-12-17 14:54:00 11 192 335 4749 22088 1 +92 CL0092 ADF \N Actin depolymerizing Factor Finn RD anon For motile cells such as Amoeba to move, there must be the rapid recycling of their actin cytoskeleton to enable a dynamic change in their shape. Gelsolin (PFAM:PF00626) and Cofilin (PFAM:PF00241) are two key domain families in this process. Both of these domain are structural and functional similar [1,2]. In particular, the beta sheet found at the core of the domain is structurally well conserved, with the helices that surround this sheet less conserved[2]. 2008-09-03 15:50:29 2004-12-17 17:36:22 11 154 175 529 7818 1 +93 CL0093 Peptidase_CD \N Peptidase clan CD Finn RD anon The members of this clan are all endopeptidase that have the catalytic dyad histidine followed by cysteine. The catalytic histidine is preceded by a block of hydrophobic residues and a glycine, where as the cysteine is preceded by a block of hydrophobic residues and a glutamine and an alanine. The members with a know structure adopt an alpha/beta fold [1]. 2008-09-03 15:50:29 2004-12-21 13:25:00 13 474 1163 1706 9782 1 +94 CL0094 Peptidase_ME \N LuxS/MPP-like metallohydrolase Finn RD anon All members of this clan are characterised by a HXXEH motif, which is is involved in zinc binding. Furthermore all members adopt an alpha and beta fold. More specifically, there us a four to six stranded antiparallel beta sheet surrounded by five helices. However, LuxS (PFAM:PF02664) is not a peptidase, although its hydrolytic mechanism of catalysis appears to be conserved [1]. 2008-09-03 15:50:29 2004-12-21 13:57:38 11 224 73 4739 30361 1 +95 CL0095 Peptidase_ML \N Peptidase Clan ML Finn RD anon This clan contains HybD-like domains. HybD is a nickel binding endopeptidase. Structural and sequences analyses have highlighted the presence of two highly conserved motifs that are shared with germination proteases and HybD [1]. Members of this clan adopt an alpha/beta fold, comprised of a central beta sheet, surrounded by alpha helices. 2008-09-03 15:50:29 2004-12-21 15:54:20 12 12 11 1706 3673 1 +96 CL0096 Pept_Inhib_IE \N Peptidase Inhibitor Clan IE Finn RD anon The members of this clan are all cystine rich domains, which form a knottin scaffold. This clan should also contain alpha-amylase but currently this family is a singleton and can not be put into Pfam. Also see [1]. 2008-09-03 15:50:29 2004-12-22 12:16:32 11 34 2 22 52 1 +97 CL0097 TypeIII_Chap \N Type III secretory system chaperone Finn RD anon The translocation of pathogenic proteins into a host cell is mediated by the type III secretory system. A component of this system is a chaperone, which binds to the protein which is going to be secreted in the bacterial cytosol and is involved in translocation of the secreted protein, although the chaperone is not translocated itself. An individual chaperone associates with one or two specific proteins [1]. There are a large number of type III secretory system chaperones, which are small acidic proteins and exhibit significant sequence divergence. This clan groups type III secretory system chaperones. Members with a known structure form small compact globular domains with an alpha-beta(3)- alpha-beta(2)-alpha like organisation [1]. 2008-09-03 15:50:29 2004-12-22 12:58:31 11 45 14 1697 3343 1 +98 CL0098 SPOUT AB_Knot; SPOUT Methyltransferase Superfamily Finn RD anon A distinct class of methylases that includes the SpoU and TrmD superfamilies and two superfamilies of predicted methylases defined by the YbeA and MJ0421 proteins in bacteria and archaea, respectively [1] (PFAM:PF00588 PFAM:PF01746). SPOUT is structurally distinct compared to more classical methyltransferases [1]. More specifically, the members of this clan form alpha/beta knots. Knots are extremely rare in protein structures as they pose a\ folding problem. The mechanism that allow a domain to be folded as a knot are unclear, but are discussed in [2] and reference therein. All members with known structure form homodimers. 2008-09-03 15:50:29 2004-12-22 15:38:51 13 112 85 5028 32567 1 +99 CL0099 ALDH-like \N ALDH-like superfamily Finn RD anon The aldehyde dehydrogenases (ALDHs) are a superfamily of multimeric enzymes which catalyse the oxidation of a broad range of aldehydes into their corresponding carboxylic acids with the reduction of their cofactor, NAD(P) into NAD(P)H. The way that the NAD is bound is distinct from other NAD(P)-dependent oxidoreductases. The domain represented by this clan consists of two similar subdomains. 2008-09-03 15:50:29 2004-12-22 15:45:47 12 614 141 5455 48240 1 +100 CL0100 C1q_TNF \N C1q and TNF superfamily Finn RD anon The members of the C1q and TNF superfamily are involved in a diverse set of functions, which include: defense, inflammation, apoptosis, autoimmunity differentiation, organogenesis, hibernation and insulin-resistant\ obesity [1]. Both C1q and TNF domains form a compact jelly-roll beta- sandwich. The core of these structures are conserved between the two families and corresponds to the detectable sequence similarity. Proteins containing both of these domains, form trimers before they are active. However, the surfaces of the domains are quite different and this difference is thought to give rise to the function difference between the clan members[1]. 2008-09-03 15:50:29 2004-12-22 15:46:56 12 294 48 443 3534 1 +101 CL0101 PELOTA RNA_ribose_bind; Pelota - RNA ribose binding superfamily Finn RD anon The members of this clan are all involved in binding to ribose sugar of RNA[1]. Indeed, the key RNA binding residues are conserved across the different families [1]. Members of this clan form mixed alpha-helical and beta-sheet structures [1][2]. 2012-10-06 18:35:16 2005-01-04 15:00:27 11 212 56 5002 13986 1 +103 CL0103 Gal_mutarotase \N Galactose Mutarotase-like superfamily Bateman A anon This clan is composed of a beta-sandwich that was first observed in domain 5 of beta-galactosidase, then as the central domain of copper amine oxidase, the C-terminal domain of chondroitinase, the C-terminal domain of hyaluronate lyase, the N-terminal domain of maltose phosphorylase and in Galactose Mutarotase [1]. All these enzymes act on a sugar substrate. 2008-09-03 15:50:29 2005-01-28 16:18:34 11 539 313 4717 24927 1 +104 CL0104 Glyoxalase \N VOC superfamily Bateman A anon This clan contains the VOC metalloenzyme superfamily [1]. The known types of reactions that are catalysed include isomerizations (glyoxalase I), epimerizations (methylmalonyl-CoA epimerase), oxidative cleavage of C-C bonds (extradiol dioxygenase), and nucleophilic substitutions (fosfomycin resistance proteins) [1]. 2008-09-03 15:50:29 2005-01-28 18:11:34 12 363 157 4720 43346 1 +105 CL0105 Hybrid \N Barrel sandwich hybrid superfamily Bateman A anon This superfamily contains proteins with a hybrid motif [1]. This motif is embedded in structurally diverse proteins. 2008-09-03 15:50:29 2005-01-31 16:15:18 12 394 669 6221 139176 1 +106 CL0106 6PGD_C \N 6-phosphogluconate dehydrogenase C-terminal-like superfamily Bateman A anon This helical domain is found associated with Rossmann domains. 2008-09-03 15:50:29 2005-01-31 18:34:00 12 232 123 5955 39015 1 +107 CL0107 KOW \N KOW domain Bateman A anon This superfamily includes proteins involved in translation that have a KOW like SH3-fold. 2008-09-03 15:50:29 2005-03-18 14:03:54 11 584 79 6487 23716 1 +108 CL0108 Actin_ATPase \N Actin-like ATPase Superfamily Finn RD anon The actin-like ATPase domain forms an alpha/beta canonical fold. The domain can be subdivided into 1A, 1B, 2A and 2B subdomains. Subdomains 1A and 1B share the same RNAseH-like fold (a five-stranded beta-sheet decorated by a number of alpha-helices). Domains 1A and 2A are conserved in all members of this superfamily, whereas domain 1B and 2B have a variable structure and are even missing from some homologues [1]. Within the actin-like ATPase domain the ATP-binding site is highly conserved. The phosphate part of the ATP is bound in a cleft between subdomains 1A and 2A, whereas the adenosine moiety is bound to residues from domains 2A and 2B[1]. 2008-09-03 15:50:29 2005-03-22 09:34:28 15 1022 508 11461 141414 1 +109 CL0109 CDA Cytidine deaminase-like (CDA) superfamily Finn RD, Coin L, Iyer LM, Zhang D, Aravind L anon This clan contains both free nucleotide and nucleic acid deaminases that act on adenosine, cytosine, guanine and cytidine, and are collectively known as the deaminase superfamily. The conserved fold consists of a three-layered alpha/beta/alpha structure with 3 helices and 4 strands in the 2134 order [1,2].This superfamily is further divided into two major divisions based on the presence of a helix (helix-4) that renders the terminal strands (strands 4 and 5) either parallel to each other in its presence, or anti-parallel in its absence [2]. Structurally, the deaminase-like fold is present in four other superfamilies including the JAB-like metalloproteins, the C-terminal AICAR transformylase-catalyzing domains of PurH, Tm1506 and the formate dehydrogenase accessory subunit FdhD. The active site of the deaminases is composed of three residues that coordinate a zinc ion between conserved helices 2 and 3. The residues are typically found as [HCD]xE and CxxC motifs at the beginning of helices 2 and 3. The zinc ion activates a water molecule, which forms a tetrahderal intermediate with the carbon atom that is linked to the amine group. This is followed by deamination of the base. 2008-09-03 15:50:29 2005-03-22 09:57:40 11 208 1010 5319 27014 1 +110 CL0110 GT-A Glycosyl transferase clan GT-A Bateman A anon This is the GT-A clan that contains diverse glycosyltransferases that possess a Rossmann like fold [1]. 2008-09-03 15:50:29 2005-03-22 10:54:55 11 713 1496 7680 127750 1 +111 CL0111 GT-C Glycosyl transferase GT-C superfamily Bateman A anon This is the GT-C clan that contains diverse glycosyltransferases that possess 8-13 predicted transmembrane segments [1]. 2008-09-03 15:50:29 2005-03-22 13:52:31 10 19 303 4345 19864 1 +112 CL0112 Yip1 Yip1/YIF1-like Finn RD, Mistry J anon Yip1 and YIF1 are members of an integral membrane complex which bind to Ras-like GTPases and are required for membrane fusion of ER derived vesicles with the Golgi [1]. 2008-09-03 15:50:29 2005-03-22 15:32:52 12 4 63 2706 5788 1 +113 CL0113 GT-B \N Glycosyl transferase clan GT-B Bateman A anon This is the GT-B clan that contains diverse glycosyltransferases that possess a Rossmann like fold [1]. 2008-09-03 15:50:29 2005-03-22 17:19:24 12 539 1601 8102 141567 1 +114 CL0114 HMG-box \N HMG-box like superfamily Bateman A anon This clan includes the DNA-binding HMG-box proteins as well as the YABBY-like transcription factors. 2008-09-03 15:50:29 2005-03-23 13:44:43 11 62 241 1493 10953 1 +115 CL0115 Steroid_dh \N Steroid oxidoreductase superfamily Bateman A anon This clan includes several enzymes, including steroid dehydrogenases and isoprenylcysteine carboxyl methyltransferase enzymes. These protein contain a varying number of transmembrane regions. 2008-09-03 15:50:29 2005-03-23 14:15:40 11 1 60 2542 7438 1 +116 CL0116 Calycin \N Calycin superfamily Bateman A anon The calycin structural superfamily [1-3] includes the lipocalins, the fatty acid-binding proteins (FABPs). 2008-09-03 15:50:29 2005-03-23 14:57:38 12 662 102 3233 10920 1 +117 CL0117 uPAR_Ly6_toxin \N uPAR/Ly6/CD59/snake toxin-receptor superfamily Bateman A anon This superfamily contains snake toxins as well as extracellular cysteine rich domains. 2008-09-03 15:50:29 2005-03-30 14:52:26 10 228 28 268 2910 1 +118 CL0118 Ribokinase \N Ribokinase-like superfamily Bateman A anon All of these enzymes are phosphotransferases that have an alcohol group as an acceptor (EC:2.7.1.-). However, 4-amino-5-hydroxymethyl-2-methylpyrimidine phosphate kinase (HMPP kinase) catalyses two phosphorylation reactions: one to a hydroxymethyl group of hydroxymethyl pyrimidine (HMP) and the second to the phosphomethyl group of HMPP [1]. The common structural feature for the enzymes in this superfamily is a central eight-stranded sheet that is flanked by eight structurally conserved helices, five on one side and three on the other [1]. The active site is located in a shallow groove along one edge of the sheet, with the phosphate acceptor hydroxyl group and -phosphate of ATP close together in the middle of the groove, and substrate and ATP binding at the ends [1]. 2008-09-03 15:50:29 2005-04-01 13:48:37 11 400 155 5041 42135 1 +121 CL0121 Cystatin Cystatin-like superfamily Bateman A anon This superfamily includes cystatins and cathelicidins [1]. The cystatin superfamily comprises cysteine protease inhibitors that play key regulatory roles in protein degradation processes. The progenitor of this superfamily was most probably intracellular and lacked a signal peptide and disulfide bridges, much like the extant Giardia cystatin. A primordial gene duplication produced two ancestral eukaryotic lineages, cystatins and stefins. Stefins - included in Pfam:PF00031 - remain encoded by a single or a small number of genes throughout the eukaryotes, whereas the cystatins have undergone a more complex and dynamic evolution through numerous gene and domain duplications [2]. 2008-09-03 15:50:29 2005-04-05 16:56:37 11 86 26 392 2322 1 +122 CL0122 UTRA Chor_lyase; Chorismate lyase/UTRA superfamily Bateman A anon This clan includes chorismate lyase as well as the UTRA domain 2008-09-03 15:50:29 2005-04-05 17:05:44 10 71 33 3574 14169 1 +123 CL0123 HTH Helix-turn-helix clan Bateman A anon This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. 2008-09-03 15:50:29 2005-04-05 17:52:07 17 2812 5699 10949 1020775 1 +124 CL0124 Peptidase_PA Peptidase clan PA Bateman A anon This clan contains a diverse set of peptidases with the trypsin fold. 2008-09-03 15:50:29 2005-04-06 15:44:18 14 2540 1159 7202 60527 1 +125 CL0125 Peptidase_CA Peptidase clan CA Bateman A anon This clan includes peptidases with the papain-like fold. 2008-09-03 15:50:29 2005-04-06 17:40:48 14 814 1796 6922 76571 1 +126 CL0126 Peptidase_MA Peptidase clan MA Bateman A anon Clan MA is one of two zinc-dependent metallopeptidases that contain the HEXXH motif. The two histidines are zinc ligands. The structures of this clan show the active site is between its two sub-domains. 2008-09-03 15:50:29 2005-04-07 08:52:20 17 981 1676 6188 88418 1 +127 CL0127 ClpP_crotonase ClpP/Crotonase superfamily Bateman A anon This family includes several peptidases of peptidase clan SK as well as crotonase like proteins. 2008-09-03 15:50:29 2005-04-07 12:12:33 11 1093 331 8610 70137 1 +128 CL0128 vWA-like von Willebrand factor type A Finn RD anon To add. 2008-09-03 15:50:29 2005-04-07 17:59:38 11 223 1340 4970 35661 1 +129 CL0129 Peptidase_AA \N Peptidase clan AA Bateman A anon This clan contains aspartic peptidases, including the pepsins and retropepsins. These enzymes contains a catalytic dyad composed of two aspartates. In the retropepsins one is provided by each copy of a homodimeric protein, whereas in the pepsin-like peptidases these aspartates come from a single protein composed of two duplicated domains. 2008-09-03 15:50:29 2005-04-08 09:36:39 13 1842 629 2629 138509 1 +130 CL0130 Peptidase_AD \N Peptidase clan AD Bateman A anon Members of this clan are peptidases that are integral membrane proteins. The catalytic aspartate is in the conserved GXGD motif. 2008-09-03 15:50:29 2005-04-08 11:13:31 10 5 33 3841 6449 1 +131 CL0131 DoxD-like DoxD-like Mistry J anon The families in this clan are all membrane proteins. The DoxD family is found on enzymes involved in elemental sulphur oxidation [1]. The other families in this clan are poorly characterised. 2008-09-03 15:50:29 2005-04-08 11:57:52 10 0 56 3235 10608 1 +132 CL0132 AbrB \N AbrB/MraZ DNA-binding domain Bateman A anon This superfamily includes the DNA-binding domain of AbrB as well as the presumed DNA-binding protein MraZ (per. comm. A Andreeva and A Murzin). 2008-09-03 15:50:29 2005-04-08 13:53:50 12 50 24 3586 11690 1 +133 CL0133 AT14A-like \N AT14A-like Mistry J anon This clan contains plant proteins. DUF677 family members are AT14A-like proteins that have sequence similarity to fungal, insect and human integrins [1]. The other members of this clan are poorly characterised. 2008-09-03 15:50:29 2005-04-08 14:00:57 10 0 14 26 879 1 +135 CL0135 Arrestin_N-like \N Arrestin_N-like Mistry J anon The families in this clan are involved in vacuolar protein trafficking, G protein signal termination and sporulation. The Arrestin N terminal domain has an Ig-like beta sandwich fold which binds to receptors and impairs their capacity to active G proteins [1]. Arrestins have also been implicated in the endocytosis of receptors and cross talk with other signalling pathways [2]. 2008-09-03 15:50:29 2005-04-12 14:27:53 10 34 59 768 5476 1 +136 CL0136 Plasmid_toxin \N Plasmid toxin-antitoxin system Mistry J anon The families in this clan are plasmid encoded toxins involved in plasmid maintenance. The plasmid encodes both a toxin and an antitoxin. Upon loss of the plasmid the antitoxin is inactivated more rapidly than the toxin. This allows the toxin to interact with its target thus killing the cell or impeding growth. 2008-09-03 15:50:29 2005-04-14 09:55:24 11 91 44 3558 18077 1 +137 CL0137 HAD HAD superfamily Bateman A anon This clan represents the haloacid dehalogenase (HAD) superfamily that includes a diverse range of enzymes that use an asp carboxylate as a nucleophile [1]. 2008-09-03 15:50:29 2005-04-15 16:57:28 14 780 821 6664 120193 1 +139 CL0139 GADPH_aa-bio_dh \N Amino acid biosynthesis and glycosomal dehydrogenase Mistry J anon This clan contains the C terminal domains of dehydrogenase enzymes involved in the biosynthesis of arginine, aspartate and aspartate derived amino acids. It also contains the C terminal domain of GAPDH, a dehydrogenase involved in glycolysis and gluconeogenesis. 2008-09-03 15:50:29 2005-04-18 13:42:09 10 525 39 9289 24271 1 +140 CL0140 Viral_NABP \N Viral nucleic acid binding Mistry J anon This clan contains viral nucleic acid binding protein families. Two of the families in this clan are known to contain zinc finger motifs [1][2]. 2008-09-03 15:50:29 2005-04-18 14:24:28 10 0 5 71 578 1 +141 CL0141 MtN3-like MtN3-like, vesicle-trafficking cargo-receptors Mistry J anon The clan forms a large and diverse family of proteins with seven transmembrane helices, common topology and, most likely, similar function. Their coding genes exist in all eukaryota and in several prokaryota. Some are responsible for metabolic diseases (cystinosis, congenital disorder of glycosylation), others are candidate genes for genetic disorders (cleft lip and palate, certain forms of cancer) or solute uptake and efflux (SWEETs) and many have not yet been assigned a function. Comparison with the properties of well-annotated clan members suggests that the proteins could be involved in protein trafficking and serve as cargo receptors in vesicle trafficking [3]. 2008-09-03 15:50:29 2005-04-19 09:24:39 11 0 74 1425 7738 1 +142 CL0142 Membrane_trans Membrane and transport protein Mistry J anon This clan contains membrane proteins involved in the transport of molecules including amino acids sugars and signalling molecules. It also includes integral membrane cell cycle proteins and some putative ammonia monooxygenases. 2008-09-03 15:50:29 2005-04-20 13:47:04 11 10 78 4928 72098 1 +143 CL0143 B_Fructosidase Beta fructosidase superfamily Mistry J anon This beta fructosidase superfamily [4] is composed of glycosyl hydrolase families. The members of this clan adopt a five-bladed beta-propeller fold [2-3]. The beta-fructosidase superfamily is also known as furanosidase superfamily [4]. 2008-09-03 15:50:29 2005-04-25 11:24:48 15 191 281 2992 11034 1 +144 CL0144 Periplas_BP Periplas_BP-like; Periplasmic binding protein like Mistry J anon This clan includes proteins involved in chemotaxis, membrane transport of sugars and allocrites, and the LacI family transcriptional regulators. It also includes some antigenic basic membrane lipoproteins. 2008-09-03 15:50:29 2005-04-25 15:04:24 12 462 379 4786 70533 1 +145 CL0145 Golgi-transport \N Golgi-transport Mistry J anon This clan contains families that are involved in intracellular transport and signalling.\ \ Arfaptins are proteins which interact with small GTPases involved in vesicular budding at the Golgi complex. They form an elongated dimer of three helix coiled coils and are structurally very similar to the BAR domain [1][2]. The Sec34 family is involved in tethering vesicles to the Golgi [3]. 2008-09-03 15:50:29 2005-04-25 16:18:47 14 49 165 766 6140 1 +146 CL0146 Herpes_glyco \N Herpes glycoprotein Mistry J anon This clan contains herpes envelope glycoproteins [1][2]. 2008-09-03 15:50:29 2005-04-25 16:48:44 11 0 3 78 288 1 +147 CL0147 Traffic \N Trafficking protein Mistry J anon The members of this clan are involved in protein trafficking. The Sec20 family are integral membrane proteins involved in ER to Golgi transport [1] and V-SNARES are involved in membrane fusion [2]. 2008-09-03 15:50:29 2005-04-26 10:46:36 10 7 29 336 2129 1 +148 CL0148 Viral_Gag \N Viral Gag protein Mistry J anon This clan contains Gag proteins which are involved in viral assembly and replication [1][2]. 2008-09-03 15:50:29 2005-04-27 12:13:23 10 185 102 374 45347 1 +149 CL0149 CoA-acyltrans \N CoA-dependent acyltransferase superfamily Finn RD anon All characterised families in this clan are involved in CoA-dependent acyltransferase. All families have a characteristic HXXXD motif. 2008-09-03 15:50:29 2005-04-27 14:03:23 11 193 1986 4892 38354 1 +151 CL0151 PK_TIM \N Pyruvate kinase-like TIM barrel superfamily Bateman A anon This superfamily consists of a number of TIM barrel domains found in enzymes such as pyruvate kinase, malate synthase and citrate lyase. 2008-09-03 15:50:29 2005-05-03 14:40:00 11 507 138 6403 40855 1 +153 CL0153 dUTPase \N dUTPase like superfamily Bateman A anon This clan contains dUTPase and many viral proteins that appear to be related. dUTPases are important in virus replication. 2008-09-03 15:50:29 2005-05-04 09:16:52 10 268 42 4817 8241 1 +154 CL0154 C2 C2 superfamily Bateman A anon This superfamily includes C2 domains and C2-like domains. 2008-09-03 15:50:29 2005-05-04 15:46:49 10 222 791 571 27805 1 +155 CL0155 CBM_14_19 \N Carbohydrate binding domain 14/19 clan Bateman A anon This clan includes two different carbohydrate binding modules. 2008-09-03 15:50:29 2005-05-04 15:50:34 10 1 158 335 6959 1 +156 CL0156 Nucleocapsid \N Mononegaviral nucleocapsid superfamily Bateman A anon This clan contains paramyxoviral and ebola type virus nucleocapsid proteins. 2008-09-03 15:50:29 2005-05-04 16:11:33 10 4 2 470 5978 1 +157 CL0157 Kleisin \N Kleisin superfamily Bateman A anon The kleisin superfamily includes ScpA, Scc1, Rec8, and Barren [1]. Scc1 interacts with SMC proteins through N- and C-terminal domains to form a ring-like structure [1]. 2008-09-03 15:50:29 2005-05-04 16:26:02 10 4 11 3187 3510 1 +158 CL0158 GH_CE \N Glycoside hydrolase/deacetylase superfamily Bateman A anon This superfamily contains diverse enzymes that act on carbohydrates including both hydrolases and deacetylases. 2008-09-03 15:50:29 2005-05-04 16:44:41 11 125 249 4786 20178 1 +159 CL0159 E-set Ig-like fold superfamily (E-set) Finn RD, Bateman A anon This clan includes a diverse range of domains that have an Ig-like fold and appear to be distantly related to each other. The clan includes: PKD domains, cadherins and several families of bacterial Ig-like domains as well as viral tail fibre proteins. it also includes several Fibronectin type III domain-containing families. 2008-12-15 16:59:57 2005-05-09 16:19:14 15 1072 9593 6497 213896 1 +160 CL0160 Methionine_synt Cobalamin-independent synthase Finn RD anon The N-terminal and C-terminal cobalamin-independent synthase domains are structurally similar, adopting a TIM beta/alpha barrel. However, the two domain perform functionally different roles. The N-terminal domain and C-terminal domains both define a catalytic cleft in the enzyme. The N-terminal domain is thought to bind the substrate, in particular, the negatively charged polyglutamate chain. The N-terminal domain is also thought to stabilise a loop from the C-terminal domain. The C-terminal domain contains the active site residues[1]. 2008-09-03 15:50:29 2005-05-09 16:58:23 10 56 28 4455 12453 1 +161 CL0161 GAF \N GAF domain-like Finn RD anon A clan of related transcriptional regulator domains. 2008-09-03 15:50:29 2005-05-09 18:32:14 11 220 3047 6850 47618 1 +162 CL0162 FBA \N F-box associated Finn RD anon Clan containing related F-box associated families. 2008-09-03 15:50:29 2005-05-09 18:41:19 10 0 47 50 1557 1 +163 CL0163 Calcineurin Calcineurin-like phosphoesterase superfamily Bateman A anon This clan contains the calcineurin-like phosphoesterases. This clan also includes the apparently inactive homologues from the small DNA polymerase subunits [1]. 2008-09-03 15:50:29 2005-05-10 16:59:39 10 274 576 5495 51255 1 +164 CL0164 CUB \N CUB clan Bateman A anon This clan contains the CUB domain [1,2]. 2008-09-03 15:50:29 2005-05-10 17:23:26 12 31 892 190 13225 1 +165 CL0165 Cache \N Cache-like domain Finn RD anon The Cache domain an extracellular domain that is thought to have a role in small-molecule recognition in a wide range of proteins, including the animal Ca(2+)-channel subunits and a class of prokaryotic chemotaxis receptors [1]. 2008-09-03 15:50:29 2005-05-10 17:26:33 10 40 407 2883 12801 1 +166 CL0166 PRD \N PRD domain superfamily Bateman A anon The PRD domain (for PTS Regulation Domain), is the phosphorylatable regulatory domain found in bacterial transcriptional antiterminator of the BglG family as well as in activators such as MtlR and LevR. The PRD domain is phosphorylated on a conserved histidine residue. PRD-containing proteins are involved in the regulation of catabolic operons in Gram+ and Gram- bacteria and are often characterised by a short N-terminal effector domain that binds to either RNA (CAT-RBD for antiterminators (Pfam:PF03123, see also comments for this family)) or DNA (for activators), and a duplicated PRD module which is phosphorylated on conserved histidines by the sugar phosphotransferase system (PTS) in response to the availability of carbon source. The phosphorylations are thought to modify the stability of the dimeric proteins and thereby the RNA- or DNA-binding activity of the effector domain. 2008-09-03 15:50:29 2005-05-11 14:46:29 11 9 110 2060 16551 1 +167 CL0167 Zn_Beta_Ribbon Zinc beta-ribbon Finn RD anon A clan of zinc-binding ribbon domains. 2008-09-03 15:50:29 2005-05-11 16:19:40 14 628 1382 6834 87461 1 +168 CL0168 PAN \N PAN-like Finn RD anon PAN domains have significant functional versatility fulfilling diverse biological functions by mediating protein-protein or protein-carbohydrate interactions [1]. These domains contain a hair-pin loop like structure, similar to knottins, but the pattern of disulphide bonds differs. 2008-09-03 15:50:29 2005-05-11 16:56:50 14 59 399 404 5383 1 +169 CL0169 Rep \N Rep-like domain Bateman A anon This clan includes replication proteins for viruses and plasmids. This domain is known to bind DNA. The members of this clan have three motifs. The central HXH is conserved in most families in the clan. 2008-09-03 15:50:29 2005-05-13 18:23:32 10 25 62 3938 11895 1 +170 CL0170 Peptidase_MD \N Peptidase MD Finn RD anon This clan is comprised of carboxypeptidases and the N-terminal domain from Sonic hedgehog proteins. The structure of the latter is similar to the peptidases, but the N-terminal domain of hedgehog has been demonstrate not to be involved in peptidase activity, but is more likely involved in signal transduction [1]. 2008-09-03 15:50:29 2005-05-19 09:30:19 10 53 142 3407 9216 1 +171 CL0171 Phospoesterase \N inositol polyphosphate 1 phosphatase like superfamily Bateman A anon Members of this clan show metal-dependent / lithium sensitive phosphomonoesterase activity. The clan includes inositol polyphosphate 1 phosphatase and fructose 1,6-bisphosphatase [1]. 2008-09-03 15:50:29 2005-05-19 17:12:45 10 289 56 4585 15156 1 +172 CL0172 Thioredoxin Thioredoxin-like; Thioredoxin-like Mistry J anon This clan contains families related to the thioredoxin family. Thioredoxins are small enzymes that are involved in redox reactions via the reversible oxidation of an active centre disulfide bond. The thioredoxin fold consists of a 3 layer alpha/beta/alpha sandwich and a central beta sheet. 2008-09-03 15:50:29 2005-05-20 15:54:37 16 2327 1079 7198 142836 1 +173 CL0173 STIR \N STIR superfamily Fenech M anon Both members of this clan are thought to be involved in TOLL/IL1R-like pathways, by mediating protein-protein interactions between pathway components. The N-termini of SEFIR and TIR domains are similar, but the domains are more divergent towards the C-terminus [1]. 2008-09-03 15:50:29 2005-08-11 10:18:06 10 27 1046 1412 7537 1 +174 CL0174 TetR_C \N TetR protein, C-terminal domain-like Fenech M anon This clan features families of transcriptional regulators for multidrug efflux pumps, which belong to the TetR superfamily. They are induced by the presence of a variety of factors, such as antibiotics or organic solvents. The C-terminal region featured in these families is thought to contain the inducer-binding site; the divergent sequences in this region allow for the binding of a variety of different inducers [1-4]. 2008-09-03 15:50:29 2005-08-11 10:47:22 9 244 37 2464 10523 1 +175 CL0175 TRASH TRASH superfamily Fenech M anon TRASH-like domains contain well-conserved cysteine residues that are thought to be involved in metal coordination. These domains are thus expected to be involved in metal trafficking and heavy-metal resistance. It has been suggested that the members adopt a 'treble-clef' fold, with 3/4 beta strands preceding a C-terminal alpha helix [1]. 2008-09-03 15:50:29 2005-08-15 13:59:13 10 149 389 2803 12513 1 +176 CL0176 Chemosens_recp \N Chemosensory 7tm receptor superfamily Finn R, Fenech M anon The members of this clan are families of various gustatory and odorant receptors. They are described as being seven-transmembrane receptors, and in fact all show characteristic regions of hydrophobicity on the alignment. 2008-09-03 15:50:29 2005-08-15 14:20:45 9 0 50 129 5871 1 +177 CL0177 PBP \N Periplasmic binding protein clan Bateman A anon Periplasmic binding proteins (PBPs) consist of two large lobes that close around the bound ligand. This architecture is reiterated in transcriptional regulators, such as the lac repressors. In the process of evolution, genes encoding the PBPs have fused with genes for integral membrane proteins. Thus, diverse mammalian receptors contain extracellular ligand binding domains that are homologous to the PBPs; these include glutamate/glycine-gated ion channels such as the NMDA receptor, G protein-coupled receptors, including metabotropic glutamate, GABA-B, calcium sensing, and pheromone receptors, and atrial natriuretic peptide-guanylate cyclase receptors [2]. 2008-09-03 15:50:29 2005-08-22 13:15:50 15 1664 837 5964 242652 1 +178 CL0178 PUA \N PUA/ASCH superfamily Bateman A anon This clan consists of the RNA binding PUA domain and ASCH domain. It also contains uncharacterised protein families. 2008-09-03 15:50:29 2005-08-22 16:11:13 15 169 216 5007 19669 1 +179 CL0179 ATP-grasp \N ATP-grasp superfamily Bateman A anon The ATP-grasp domain is found in a wide variety of carboxylate-amine/thiol ligases [1]. It is composed of two subdomains, with ATP being bound in the cleft between the two. 2008-09-03 15:50:29 2005-08-23 14:07:58 13 397 518 7468 61819 1 +181 CL0181 ABC-2 \N ABC-2-transporter-like clan Fenech M anon These families are similar to the ABC-2 transporter subfamily, as described in [1] (Pfam:PF01061). Members of this family are involved in drug transport and resistance. CcmB protein family (Pfam:PF03379) members are also transporters; they are required for haem export into the periplasm [2]. 2008-09-03 15:50:29 2005-08-23 16:06:40 9 3 239 5481 49701 1 +182 CL0182 IT \N IT (Ion Transporter) superfamily Bateman A anon This superfamily of secondary carriers specific for cationic and anionic compounds, has been termed the ion transporter (IT) superfamily [1]. 2008-09-03 15:50:29 2005-08-23 18:40:04 12 0 126 4842 49966 1 +183 CL0183 PAS_Fold PAS; PAS domain clan Bateman A anon This clan contains PAS domains that are found in a wide variety of bacterial signaling proteins. 2008-09-03 15:50:29 2005-08-24 15:20:49 13 311 6949 5998 88093 1 +184 CL0184 DMT Drug/Metabolite transporter superfamily Bateman A anon This clan contains a variety of transporters which have 4, 5, 9 or 10 membrane spanning helices. Many of the 10 membrane spanning transporters appear to be a duplication of the 5 spanning unit [1]. Many of these families contain a characteristic glycine rich motif close to the C-terminus. 2008-09-03 15:50:29 2005-08-25 11:36:38 10 12 341 5620 114318 1 +186 CL0186 Beta_propeller Beta propeller clan Bateman A anon This large clan contains proteins that contain beta propellers. These are composed of between 6 and 8 repeats. The individual repeats are composed of a four stranded sheet. The clan includes families such as WD40 Pfam:PF00400 where the individual repeats are modeled. The clan also includes families where the entire propeller is modeled such as Pfam:PF02239 usually because the individual repeats are not discernible. These proteins carry out a very wide diversity of functions including catalysis. 2008-09-03 15:50:29 2005-08-26 09:32:54 13 1163 8240 11212 356413 1 +187 CL0187 LysM \N LysM-like domain Fenech M anon The LysM domain (Pfam:PF01476) is thought to be a general peptidoglycan-binding module. Although originally described in bacterial proteins, it has been also found in some eukaryotic sequences. It takes up a beta-alpha-alpha-beta conformation, with the beta strands forming an antiparallel beta sheet and the two alpha helices packing on one side of this sheet [1]. 2008-09-03 15:50:29 2005-08-26 09:44:43 10 8 614 4599 29820 1 +188 CL0188 CH \N Calponin homology domain Fenech M anon The calponin homology (CH) domain is found in a variety of contexts, ranging from proteins involved in signalling pathways to cytoskeletal proteins. They seem to have diverse cellular functions, which are thought to include actin binding, involvement in the MAP kinase signalling pathway, and regulation of GEF activity in Rho family GTPase pathways. Structurally, they are organised into three layers, with two parallel alpha helices in the core being sandwiched between another two helices, one on each side [1]. 2008-09-03 15:50:29 2005-08-30 18:32:49 9 107 636 512 11687 1 +189 CL0189 Endonuclease \N Endonuclease V-like superfamily Wuster A anon This clan contains DNA repair proteins. In E. coli endonuclease V initiates DNA repair of deaminated DNA bases and has similarity to motifs required for the catalytic activity of the UvrC endonuclease [1]. 2008-09-03 15:50:29 2005-08-31 14:14:38 9 20 27 4568 5628 1 +190 CL0190 HSP20 HSP20-like chaperone superfamily Fenech M anon The small heat shock proteins (sHSPs) prevent protein aggregation during heat shock and oppose regulated cell death. A conserved arginine residue in the HSP20/alpha-crystallin domain (Pfam:PF00011) has in fact been implicated in the development of cataracts and myopathies [1]. The CS family (Pfam:PF04969) includes proteins that are known to bind HSP90 [2], as well as p23 (Swiss:Q15185), which is an HSP90 co-chaperone [3]. 2008-09-03 15:50:29 2005-09-01 14:25:02 11 201 170 4085 13158 1 +191 CL0191 POTRA \N POTRA domain superfamily Fenech M anon The polypeptide-transport-associated (POTRA) domain is predicted to be organised into three beta-strands and two alpha helices, the latter being found between strands 2 and 3. It is usually found associated with a beta-barrel outer membrane domain. It is thought to have a chaperone-like function; the proteins it is found in are involved in processes as diverse as bacterial septation and protein transport across membranes [1]. 2008-09-03 15:50:29 2005-09-01 14:31:22 10 20 53 4354 20069 1 +192 CL0192 GPCR_A Family A G protein-coupled receptor-like superfamily Fenech M anon This clan contains various seven-transmembrane receptors and related proteins. A major member is Pfam:PF00001, members of which have been considered to be typical members of the rhodopsin superfamily. Many members of this clan are Caenorhabditis proteins, suggesting great expansion of the relevant families in these nematode worms. 2008-09-03 15:50:29 2005-09-02 12:58:56 12 304 924 9484 92105 1 +193 CL0193 MBB Outer membrane beta-barrel protein superfamily Bateman A anon This clan gathers together a large set of beta barrel membrane proteins.Although these proteins have different numbers of beta strands in the barrel they have significant sequence similarity between families. 2008-09-03 15:50:29 2005-09-05 10:13:31 13 356 515 4665 118601 1 +194 CL0194 DNA_pol_B-like \N DNA polymerase B like Mistry J anon DNA polymerases replicate DNA by adding nucleotide triphosphate (dNTP) residues to the 5'-end of a growing chain of DNA. They use a complementary DNA chain as a template.` 2008-09-03 15:50:29 2005-09-05 13:42:41 9 181 97 2623 7651 1 +195 CL0195 DBL \N Duff-binding like superfamily Bateman A anon This clan includes DBL (Duffy-binding like) domains from a variety of plasmodium surface proteins. 2008-09-03 15:50:29 2005-09-05 15:25:46 9 4 55 9 832 1 +196 CL0196 DSRM \N DSRM-like clan Bateman A anon This clan contains RNA-binding domains. 2008-09-03 15:50:29 2005-09-08 18:03:50 11 267 170 5456 15069 1 +197 CL0197 GME \N GME superfamily Bateman A, Shirai H anon This superfamily contains a number of related enzymes such as AstB, peptidyl-arginine deiminase, arginine deiminase and amidinotransferase [1,2]. 2008-09-03 15:50:29 2005-09-15 15:15:35 9 117 27 3437 6472 1 +198 CL0198 HHH Helix-hairpin-helix superfamily Bateman A anon This superfamily includes Helix-hairpin-helix DNA-binding domains. 2008-09-03 15:50:29 2005-09-16 17:04:15 15 655 587 6368 81507 1 +199 CL0199 DPBB \N Double Psi beta barrel glucanase Bateman A anon The DPBB fold is often an enzymatic domain. The members of this family are quite diverse, and if catalytic this family may contain several different functions [1,2]. This clan represents the barwin like barrels. 2008-09-03 15:50:29 2005-09-16 17:23:52 11 29 104 3343 9071 1 +200 CL0200 Prefoldin \N Prefoldin GriffithsJones S, Finn RD, Mistry J anon The Prefoldin domain forms a coiled-coil structure that is involved in substrate-binding in the the chaperone co-factor prefoldin (PFD). Each PFD is assembled from two alpha and four beta subunits. Each alpha subunit contains two, and each beta subunit one, central beta-hairpin that is flanked N- and C-terminally by coiled-coil helices. The N-terminal regions, the prefoldin domain, are found facing into the central cavity of the chaperone. Here exposed hydrophobic patches form an interaction with the substrate (an unfolded protein) [1]. 2008-09-03 15:50:29 2005-09-19 13:51:59 9 9 52 526 2970 1 +201 CL0201 Peptidase_SH \N Peptidase clan SH Bateman A anon This clan includes the serine peptidase assemblin from herpes virus as well as other viral peptidase families predicted to be related [1]. 2008-09-03 15:50:29 2005-09-20 12:38:50 9 47 16 1555 2100 1 +202 CL0202 GBD Galactose-binding domain-like superfamily Bateman A anon This large superfamily contains beta sandwich domains with a jelly roll topology. Many of these families are involved in carbohydrate recognition. Despite sharing little sequence similarity they do share a weak sequence motif, with a conserved bulge in the C-terminal beta sheet. The probable role of this bulge is in bending of the beta sheet that contains the bulge. This enables the curvature of the sheet forming the sugar binding site [1]. 2008-09-03 15:50:29 2005-09-20 16:50:13 10 807 2917 4159 38880 1 +203 CL0203 CBD \N Carbohydrate binding domain superfamily Bateman A anon This superfamily includes several carbohydrate binding domains. These domains have a beta sandwich structure. 2008-09-03 15:50:29 2005-09-21 11:02:43 11 97 423 950 3904 1 +204 CL0204 Adhesin \N Bacterial adhesin superfamily Bateman A anon This superfamily includes a variety of bacterial adhesins that have a jelly-roll beta-barrel fold [1]. These domains are involved in sugar recognition. 2008-09-03 15:50:29 2005-09-21 11:17:50 10 156 136 1489 19890 1 +205 CL0205 Di-copper \N Di-copper centre-containing domain Bateman A anon This superfamily includes tyrosinases and hemocyanins that share a di-copper centre [1]. 2008-09-03 15:50:29 2005-09-23 12:59:27 10 106 90 1801 4482 1 +206 CL0206 TRB \N Transcriptional repressor beta-barrel domain Bateman A anon This beta-barrel domain is found at the C-terminus of a variety of transcriptional repressor proteins. 2008-09-03 15:50:29 2005-09-23 14:02:56 10 132 32 4239 8501 1 +207 CL0207 Rhomboid-like \N Integral membrane protein / protease Mistry J anon This clan contains proteins from both bacteria and eukaryotes. The Rhomboid protein is an intramembrane serine protease which is involved in epidermal growth factor (EGF)-dependent signalling pathways [1]. The DER1 family is involved in degradation of misfolded ER proteins [2]. 2008-09-03 15:50:29 2005-12-01 11:27:30 9 16 91 4052 9025 1 +208 CL0208 UBC \N Ubiquitin conjugating enzyme like superfamily Bateman A anon This superfamily includes a diverse set of proteins that bind to ubiquitin [1]. 2008-09-03 15:50:29 2005-12-02 15:22:57 10 294 238 870 12063 1 +209 CL0209 Bet_V_1_like \N Bet V 1 like Mistry J anon The Bet_V_I family is composed of sequences related to the major Birch (Betula verrucose) pollen antigen Betv1. This allergen is known to cause hayfever, dermatitis, asthma and occasionally anaphylactic shock. The other families in this clan share the same structure as Betv1 which is composed of antiparallel beta sheets and alpha helices. There is a cavity between the beta sheet and a long C terminal helix. The cavity appears to play roles in the binding of lipid molecules [1][2][3] which seems a common feature of the families in this clan. 2008-11-07 17:26:25 2005-12-02 17:50:32 10 443 245 4418 23609 1 +210 CL0210 HNOX-like \N Heme NO and oxygen binding like Mistry J anon This clan contains families that bind small molecules and are predominantly involved in signalling. Members include the heme NO binding domain. This domain is related to soluble guanylate cyclases and is mainly alpha helical in structure.\ Other members of this clan include V4R, which is predicted to be a small molecule binding domain, and a domain often found adjacent to this that is found on activators of aromatic catabolism, and on signalling molecules. 2008-09-03 15:50:29 2005-12-06 09:55:10 10 54 47 793 1692 1 +212 CL0212 SNARE \N SNARE-like superfamily Bateman A anon This clan includes part of the SNARE like superfamily. 2008-09-03 15:50:29 2005-12-08 17:07:43 8 44 96 372 5553 1 +213 CL0213 ShK-like \N Sea anemone toxin k like Mistry J anon Members of this clan include the Crisp domain which is involved in ryanodine receptor Ca2+ signalling, and the ShK domain which is named after the ShK channel inhibitor toxin. Both domains are cysteine rich and contain multiple disulphide bonds [1][2][3]. 2008-09-03 15:50:29 2006-01-03 16:32:02 8 27 121 194 2516 1 +214 CL0214 UBA \N UBA superfamily Bateman A anon This superfamily includes domains related to the UBA domain. These domains are often involved in ubiquitin binding. 2008-09-03 15:50:29 2006-01-05 15:54:42 12 148 350 5009 18121 1 +217 CL0217 Rotavirus_VP7 \N Rotavirus VP7 protein Bateman A anon This clan consists of several Rotavirus major outer capsid protein VP7 sequences. The rotavirus capsid is composed of three concentric protein layers. Proteins VP4 and VP7 comprise the outer layer. VP4 forms spikes and is the viral attachment protein. VP7 is a glycoprotein and the major constituent of the outer protein layer [1]. 2008-09-03 15:50:29 2006-01-24 14:08:46 8 28 2 702 4849 1 +218 CL0218 ox_reductase_C \N Oxidoreductase C terminal like Mistry J anon This clan contains the C terminal region of oxidoreductase proteins and putative oxidoreductase proteins. Families in this clan form an alpha/beta structure and are usually found adjacent to an N terminal Rossman fold. 2008-09-03 15:50:29 2006-01-26 11:16:09 11 193 37 3633 12363 1 +219 CL0219 RNase_H \N Ribonuclease H-like superfamily Bateman A anon This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H. 2008-09-03 15:50:29 2006-01-31 17:56:29 13 1007 2352 7614 190937 1 +220 CL0220 EF_hand \N EF-hand like superfamily Bateman A anon The EF hand is a calcium binding domain found in a wide variety of proteins [1]. 2008-09-03 15:50:29 2006-02-01 09:23:33 11 1170 2215 3113 56360 1 +221 CL0221 RRM \N RRM-like clan Bateman A anon This clan contains families that are related to the RNA recognition motif domains. However, not all these families are RNA binding. 2008-09-03 15:50:29 2006-03-05 12:30:44 10 736 1162 5818 73704 1 +222 CL0222 MviN_MATE \N MviN, MATE-like superfamily Bateman A anon This superfamily consists of a variety of integral membrane protein families. The MATE family are known to be transporters. Other proteins have been implicated in virulence and polysaccharide biosynthesis. 2008-09-03 15:50:29 2006-03-05 13:21:19 7 4 109 5014 53746 1 +223 CL0223 MACRO \N MACRO domain superfamily Bateman A anon This superfamily includes the Macro domain as well as the amino terminal domain from peptidase M17 proteins. 2008-09-03 15:50:29 2006-03-06 10:58:15 7 147 130 4353 8980 1 +224 CL0224 DHQS \N Dehydroquinate synthase-like superfamily Bateman A anon This superfamily includes Dehydroquinate synthase and Iron containing alcohol dehydrogenase which have a similar active site organisation [1]. 2008-09-03 15:50:29 2006-03-06 13:57:58 7 116 47 4875 18804 1 +225 CL0225 FtsL \N FtsL-like superfamily Bateman A anon This clan includes two proteins that are known to interact, FtsL and DivIC which are part of a trimeric complex with DivIB [2]. DivIC and FtsL are bacterial proteins essential for cell division. 2008-09-03 15:50:29 2006-03-06 15:46:31 7 0 9 3920 6384 1 +226 CL0226 M6PR \N Mannose 6-phosphate receptor Bateman A anon This clan includes cation dependent and independent mannose 6-phosphate receptors. 2008-09-03 15:50:29 2006-03-06 16:32:14 8 48 95 319 2892 1 +227 CL0227 Enolase_N \N Enolase N-terminal domain-like superfamily Bateman A anon This domain is found at the N-terminus of the catalytic Tim barrel-like domain in enolase and other enzymes. 2008-09-03 15:50:29 2006-03-06 16:37:34 8 887 42 5636 14408 1 +228 CL0228 Acyltransferase Acyltransferase clan Bateman A anon This clan includes several families of related acyltransferases. 2008-09-03 15:50:29 2006-03-06 16:43:34 7 2 169 4885 24321 1 +229 CL0229 RING \N Ring-finger/U-box superfamily Bateman A anon This clan includes the Ring zinc finger domains as well as the U-box domain that appears to have lost the zinc coordinating cysteine residues [1]. 2008-09-03 15:50:29 2006-03-06 16:54:46 10 159 1983 4407 49901 1 +230 CL0230 HO \N Heme oxygenase-like superfamily Bateman A anon This clan includes the Heme oxygenase family as well as the TENA/THI-4/PQQC family that are less well characterised [2]. 2008-09-03 15:50:29 2006-03-06 16:57:26 7 195 41 2847 5166 1 +231 CL0231 MazG \N all-alpha NTP pyrophosphohydrolase superfamily Bateman A anon This superfamily includes MazG, HisE and dimeric dUTPases (Not yet in Pfam) [1]. 2008-09-03 15:50:29 2006-03-06 17:03:44 8 112 46 4768 13122 1 +232 CL0232 NifU \N NifU C-terminal domain-like superfamily Bateman A anon This clan includes the C-terminal domain of NifU as well as a large family of uncharacterised domains. 2008-09-03 15:50:29 2006-03-06 17:10:22 7 28 49 4450 9837 1 +233 CL0233 SufE_NifU \N SufE/NifU superfamily Bateman A anon This clan includes iron sulfur cluster assembly proteins. 2008-09-03 15:50:29 2006-03-06 17:18:24 7 30 26 4741 7304 1 +234 CL0234 CTPT \N CTP transferase-like superfamily Bateman A anon This clan includes the integral membrane CTP transferase family as well as a large family of uncharacterised proteins that may also function as nucleotidyltransferases. 2008-09-03 15:50:29 2006-03-06 17:30:58 7 0 20 4821 6518 1 +235 CL0235 PspA \N PspA/ESCRT-III Bateman A anon This clan includes PspA like proteins that are transcriptional activators as well as Snf7, a protein involved in cellular trafficking. 2008-09-03 15:50:29 2006-03-07 09:10:43 8 24 36 2194 5578 1 +236 CL0236 PDDEXK PD-(D/E)XK nuclease superfamily Bateman A anon This clan includes a large number of nuclease families related to holliday junction resolvases [1,2]. 2012-10-03 14:09:06 2006-03-07 10:09:50 16 584 895 6041 71985 1 +237 CL0237 HD_PDEase \N HD/PDEase superfamily Bateman A anon This clan includes a range of phosphohydrolase enzymes with a common helical fold. 2008-09-03 15:50:29 2006-03-07 17:08:35 7 446 682 5155 45938 1 +238 CL0238 PP2C \N PP2C-like superfamily Bateman A anon This clan includes the PP2C family of phosphatases as well as the SpoIIE family. This suggests SpoIIE proteins may also be phosphatases. 2008-09-03 15:50:29 2006-03-08 17:11:21 7 81 723 3947 17476 1 +239 CL0239 Insulin \N Insulin-like superfamily Bateman A anon This superfamily includes the insulin like hormones. 2008-09-03 15:50:29 2006-03-09 13:10:30 7 895 8 341 1563 1 +240 CL0240 PFK \N PFK-like superfamily Bateman A anon This clan includes two SCOP superfamilies. Strong similarities between NAD kinases, DAG kinase, sphingosine kinase and PFK have previously been shown[1]. 2008-09-03 15:50:29 2006-03-09 16:35:37 7 146 173 5088 20115 1 +241 CL0241 ABC_membrane \N ABC transporter membrane domain clan Bateman A anon This clan includes families that are the membrane components of ABC transporter complexes. In general these regions are composed of six transmembrane helices [1]. 2008-09-03 15:50:29 2006-03-09 17:38:52 7 22 211 5258 54224 1 +242 CL0242 DNA_primase_lrg \N DNA primase large subunit like Mistry J anon This clan contains the large subunit of archaeal and eukaryotic DNA primase, an enzyme which synthesises the oligoribonucleotide primers essential to DNA replication. The large subunit of DNA primase forms interactions with the small subunit and the structure implicates that it is not directly involved in catalysis, but plays a roles in correctly positioning the primase/DNA complex, and in the transfer of RNA to DNA polymerase [1]. The clan also contains the Lef-2 family, which is required for the expression of late genes. There is some evidence to suggest that LEF2 binds to both DNA and the DNA primase small subunit LEF-1 [3]. 2008-09-03 15:50:29 2006-04-21 14:57:17 8 11 11 506 609 1 +243 CL0243 AEP Archaeo-eukaryotic primase Mistry J anon This clan includes the small subunit of 2 and eukaryotic DNA primase, and primase-helicase proteins from bacteriophages and plasmids. All known cellular life forms use primases to synthesis a short RNA primer which is extended during DNA replication by a polymerase. Bacterial DNA primase adopts a different fold to archaeal and eukaryotic primases and belongs to a different superfamily. 2008-09-03 15:50:29 2006-04-24 16:40:55 9 21 52 1612 2766 1 +244 CL0244 PGBD \N PGBD superfamily Bateman A anon This clan consists of small putative peptidoglycan binding domains composed of three alpha helices. 2008-09-03 15:50:29 2006-05-16 17:51:06 8 23 345 3348 11009 1 +245 CL0245 EDD \N EDD superfamily Bateman A anon The EDD superfamily was identified as an evolutionarily conserved domain (EDD) common to three different folds: mannose transporter EIIA domain (EIIA-man), dihydroxyacetone kinase (Dak), and DegV [1]. Both Dak and EIIA-man perform similar phosphotransfer reactions, suggesting a phosphotransferase activity for the DegV-like family of proteins, whose function other than lipid binding revealed in the crystal structure remains unknown [1]. 2008-09-03 15:50:29 2006-05-17 09:35:18 8 92 62 3559 18450 1 +246 CL0246 ISOCOT_Fold NagB-like; Isomerase,CoA transferase & Translation initiation factor Superfamily Bateman A, Anantharaman V anon This superfamily contains a variety of enzymes and non-enzymatic ligand binding domains. 2008-09-03 15:50:29 2006-05-17 13:20:27 7 279 161 5233 53370 1 +247 CL0247 2H \N 2H phosphoesterase superfamily Bateman A anon This clan includes a number of phosphoesterases that contain an internal duplication. 2008-09-03 15:50:29 2006-05-17 15:53:15 8 24 78 2901 4878 1 +248 CL0248 ParBc \N ParB-like superfamily Bateman A anon This superfamily includes nucleases related to ParB as well as uncharacterised proteins. 2008-09-03 15:50:29 2006-06-02 16:12:35 7 28 97 4788 13574 1 +249 CL0249 Phage_tail_L \N Phage minor tail protein L clan Bateman A anon This clan includes the phage minor tail protein L as well as a group of uncharacterised proteins that are also presumably phage components. 2008-09-03 15:50:29 2006-06-02 18:35:28 7 0 7 706 1564 1 +250 CL0250 GAD \N GAD domain superfamily Bateman A anon This domain is found as an insert within aspartyl-tRNA synthetase as well as GatB proteins. 2008-09-03 15:50:29 2006-07-27 15:00:10 6 16 17 4522 4847 1 +251 CL0251 MORN \N MORN repeat Mistry J anon The MORN (Membrane Occupation and Recognition Nexus) repeat is found in multiple copies in several proteins including junctophilins (See Takeshima et al. Mol. Cell 2000;6:11-22).\ A MORN-repeat protein has been identified in the parasite Toxoplasma gondiis as dynamic component of cell division apparatus [1].\ It has been hypothesised to function as a linker protein between certain membrane regions and the parasite's cytoskeleton [1]. 2008-09-03 15:50:29 2006-07-31 12:59:47 7 27 334 1412 30871 1 +252 CL0252 NfeD-like \N NfeD like Mistry J anon This clan includes the NfeD family which contains several proteins described as nodulation efficiency protein D (NfeD). The nfe genes (nfeA, nfeB, and nfeD) are involved in the nodulation efficiency and competitiveness of the Sinorhizobium meliloti strain GR4 on alfalfa roots [1]. The specific function the NfeD family is unknown although it is unlikely that NfeD is specifically involved in nodulation as the family contains several different archaeal and bacterial species most of which are not symbionts. 2008-09-03 15:50:29 2006-07-31 13:35:37 6 3 8 2902 4042 1 +254 CL0254 THDP-binding Thiamin diphosphate-binding superfamily Mistry J, Bateman A anon This clan includes pyruvate dehydrogenases, branched chain alpha-keto acid decarboxylases, phosphoketolases and the pyrimidine binding region of transketolases. 2008-09-03 15:50:29 2006-07-31 17:23:48 8 598 271 5650 93998 1 +255 CL0255 ATP_synthase ATP synthase F0 subunit Mistry J anon This clan contains subunits of the F0 complex of ATP-synthase. The F0 complex is the non-catalytic unit of ATPase and is involved in proton translocation across membranes. 2008-09-03 15:50:29 2006-08-01 13:45:46 8 29 78 10710 23627 1 +256 CL0256 Enolase_TIM \N Enolase like TIM barrel Mistry J anon This clan contains enzymes which adopt a TIM barrel fold. 2008-09-03 15:50:29 2006-08-01 15:00:35 6 1004 67 6025 21543 1 +257 CL0257 Acetyltrans Acetyltrans-like; N-acetyltransferase like Mistry J anon This clan contains families related to N-acetyltransferases. N-acetyltransferases catalyse the transfer of acetyl groups from acetyl-CoA to arylamines. 2008-09-03 15:50:29 2006-08-02 14:16:08 8 664 967 6524 128815 1 +258 CL0258 DALR \N DALR superfamily Bateman A anon Members of this family are anticodon binding domains from various tRNA synthetases. 2008-09-03 15:50:29 2006-08-17 18:00:51 6 13 28 4898 11747 1 +259 CL0259 OstA \N OstA superfamily Bateman A anon This superfamily includes the OstA family as well as a large family of uncharacterised proteins. 2008-09-03 15:50:29 2006-08-22 10:05:45 6 11 32 2407 6093 1 +260 CL0260 NTP_transf \N Nucleotidyltransferase superfamily Bateman A anon This clan contains a diverse set of nucleotidyltransferase enzymes. 2008-09-03 15:50:29 2006-08-22 16:46:46 7 338 577 5508 42917 1 +261 CL0261 NUDIX NUDIX superfamily Bateman A anon This superfamily contains the NUDIX family and one related family. 2008-09-03 15:50:29 2006-08-24 18:58:30 6 360 321 5236 48986 1 +262 CL0262 Trigger_C \N Trigger factor/SurA domain Bateman A anon This helical domain is found in two families of chaperones. It is found at the N terminus of the SurA proteins and at the C-terminus of the trigger factors where presumably it shares a common but as yet unknown function. 2008-09-03 15:50:29 2006-08-30 10:54:44 6 20 54 4464 9322 1 +263 CL0263 His-Me_finger His-Me finger endonuclease superfamily Bateman A anon This superfamily defined originally by SCOP contains a diverse range of endonucleases. Later Grishin identified the MH1 domain as belonging to the superfamily [1]. 2008-09-03 15:50:29 2006-09-06 17:41:01 7 141 462 5328 22334 1 +264 CL0264 SGNH_hydrolase \N SGNH hydrolase superfamily Bateman A anon This superfamily contains a diversity of hydrolytic enzyme activities. 2008-09-03 15:50:29 2006-09-07 09:23:58 6 122 427 4610 21200 1 +265 CL0265 HIT \N HIT superfamily Bateman A anon The HIT superfamily are a superfamily of nucleotide hydrolases and transferases, which act on the alpha-phosphate of ribonucleotides [1]. 2008-09-03 15:50:29 2006-11-09 16:05:44 6 147 99 4930 15333 1 +266 CL0266 PH \N PH domain-like superfamily Bateman A anon Members of this clan share a PH-like fold. Many families in this clan bind to short peptide motifs in proteins and are involved in signalling. 2008-09-03 15:50:29 2006-11-10 10:42:30 8 419 1526 2586 41992 1 +267 CL0267 S11_L18p \N Ribosomal protein S11/L18p superfamily Bateman A anon This superfamily includes two ribosomal proteins S11 and L18p as well as a domain from eukaryotic peptide chain release factor. This superfamily is likely to share an RNA-binding function. 2008-09-03 15:50:29 2006-11-10 14:46:27 7 451 37 5925 12981 1 +268 CL0268 Pec_lyase-like Pec_lyase; Pectate_lyase; Pectate lyase-like beta helix Bateman A anon This superfamily all contain a right handed beta helix similar to that first found in pectate lyase [1]. 2008-09-03 15:50:29 2006-11-10 15:10:57 6 217 1681 4399 45644 1 +269 CL0269 Maf \N Maf/Ham1 superfamily Bateman A anon This superfamily includes the Maf-like proteins and ITPases related to YjjX [1]. 2008-09-03 15:50:29 2006-11-10 17:10:08 7 21 29 3802 6399 1 +270 CL0270 Iso_DH \N Isocitrate/Isopropylmalate dehydrogenase-like superfamily Bateman A anon This superfamily of enzymes form dimers and have an active site between the two halves. 2008-09-03 15:50:29 2006-11-10 17:50:17 6 323 67 6496 27220 1 +271 CL0271 F-box \N F-box-like domain Bateman A anon This clan includes classical F-boxes and the PRANC domain found in pox ankyrin proteins. 2008-09-03 15:50:29 2006-11-22 13:28:07 6 29 805 512 18595 1 +272 CL0272 RGS \N RGS-like superfamily Bateman A anon This clan includes RGS domains that possess an alpha helical fold. 2008-09-03 15:50:29 2006-11-27 17:23:03 6 92 152 303 4401 1 +273 CL0273 CYTH \N CYTH-like phosphatase superfamily Bateman A anon CyaB like adenylyl cyclase and the mammalian thiamine triphosphatases define a novel superfamily of catalytic domains called the CYTH domain that is present in all three superkingdoms of life. The catalytic core of these enzymes contain a novel alpha beta scaffold with 6 conserved acidic residues and 4 basic residues [1]. 2008-09-03 15:50:29 2006-12-05 13:50:23 6 48 23 3410 3949 1 +274 CL0274 WRKY-GCM1 \N WRKY-GCM1 superfamily Bateman A anon WRKY and GCM1 are metal chelating DNA-binding domains (DBD) which share a four stranded fold [1]. We present evidence that they share a stabilising core, which suggests a possible origin from a BED finger-like intermediate that was in turn ultimately derived from a C2H2 Zn-finger domain [1]. 2008-09-03 15:50:29 2006-12-05 13:57:43 7 5 215 443 6439 1 +275 CL0275 HAS-barrel \N HAS-barrel superfamily Bateman A anon The HAS barrel is named after HerA-ATP Synthase. In ATP synthases, this domain is implicated in the assembly of the catalytic toroid and docking of accessory subunits, such as the subunit of the ATP synthase complex. Similar roles in docking of the functional partner, the NurA nuclease, and assembly of the HerA toroid complex appear likely for the HAS-barrel of the HerA family [1]. 2008-09-03 15:50:29 2006-12-06 11:13:24 8 401 41 11124 22310 1 +276 CL0276 Nucleot_cyclase \N Nucleotide cyclase superfamily Bateman A anon This superfamily includes adenylyl cyclase and the GGDEF domain [1]. 2008-09-03 15:50:29 2006-12-06 14:41:38 7 119 2538 3679 52683 1 +277 CL0277 FAD-oxidase_C \N FAD-linked oxidase C-terminal domain superfamily Bateman A anon This clan consists of a duplicated subdomain in a variety of FAD-liked oxidase/dehydrogenase enzymes. 2008-09-03 15:50:29 2006-12-07 10:48:02 6 129 128 3530 14475 1 +278 CL0278 AIG2 \N AIG2/ChaC-like superfamily Bateman A anon The structure consists of a five-stranded beta-barrel surrounded by two alpha-helices and a small beta-sheet.\ Conservation of residues in a hydrophilic cavity able to bind small ligands in some members suggests that this may also serve as an active site. 2008-09-03 15:50:29 2006-12-12 14:39:36 7 20 48 2219 4183 1 +279 CL0279 GatB_YqeY \N YqeY-like superfamily Bateman A anon This superfamily includes a domain from GatB as well as one from YqeY. Although being structurally distinct they share a common sequence relationship. 2008-09-03 15:50:29 2007-01-24 12:42:11 6 38 17 4060 6337 1 +280 CL0280 PIN \N PIN domain superfamily Bateman A anon This superfamily contains a variety of nuclease enzymes, including PIN domains and the FLAP exonucleases. 2008-09-03 15:50:29 2007-01-25 17:50:12 6 117 241 5231 29721 1 +281 CL0281 CCT \N CCT like-motif Bateman A anon This clan includes the CCT motif as well as a related motif that is similar to the first half of the CCT motif. 2008-09-03 15:50:29 2007-01-29 13:51:28 6 21 28 152 1759 1 +282 CL0282 Serum_albumin \N Serum albumin superfamily Bateman A anon This superfamily includes serum albumin and related families. 2008-09-03 15:50:29 2007-01-30 15:32:33 6 113 8 71 738 1 +283 CL0283 LigB \N LigB-like superfamily Bateman A anon This clan includes the LigB subunit of the aromatic ring opening dioxygenase LigAB [1]. The clan also includes the Memo-like proteins. 2008-09-03 15:50:29 2007-02-12 17:08:37 6 13 17 2285 4015 1 +284 CL0284 Allatostatin \N Allatostatin superfamily Mistry J anon Allatostatins are pleiotropic neuropeptides. In some insects they are known to inhibit the synthesis of juvenile hormone, an important regulator of development and reproduction. The full role of allatostatins in hormone production is still unclear [1]. 2008-09-03 15:50:29 2007-05-04 14:22:41 5 0 16 41 455 1 +285 CL0285 YycI_YycH \N YycI/YycH superfamily Bateman A anon Both, YycH and YycI are always found in a pair on the chromosome, downstream of the essential histidine kinase YycG. Additionally, both proteins share a function in regulating the YycG kinase with which they appear to form a ternary complex. Structural studies show that these two protein families share two related domains. 2008-09-03 15:50:29 2007-05-14 11:16:06 6 13 4 723 1328 1 +286 CL0286 GCS \N gamma-glutamylcysteine synthetase/glutamine synthetase clan Bateman A, Pei J anon This clan represents a superfamily of carboxylate-amine/ammonia ligases [1] that includes Gamma-Glutamylcysteine synthetase (gamma-GCS) and glutamine synthetase (GS). Gamma-Glutamylcysteine synthetase (gamma-GCS) catalyses the first step in the de novo biosynthesis of glutathione. 2008-09-03 15:50:29 2007-08-10 14:04:49 5 394 88 9114 22193 1 +287 CL0287 Transthyretin \N Transthyretin superfamily Bateman A anon This clan unifies several SCOP superfamilies that all share a 7 stranded beta sandwich fold. 2008-09-03 15:50:29 2007-08-14 16:23:44 6 929 1227 4080 40864 1 +288 CL0288 DAP_epimerase \N DAP epimerase superfamily Bateman A anon This superfamily includes DAP epimerase and proline racemase as well as the PrpF protein. It has been suggested that this fold may have evolved from the HotDog fold [1]. 2008-09-03 15:50:29 2007-08-17 11:58:37 5 61 43 3955 13393 1 +289 CL0289 FBD \N Folate binding domain Bateman A anon This folate binding domain is found in the GCV T protein as well as the sarcosine oxidase gamma subunit [1]. 2008-09-03 15:50:29 2007-08-17 13:17:41 5 63 59 4863 12811 1 +290 CL0290 EPT_RTPC \N EPT/RTPC-like superfamily Bateman A anon This superfamily includes Enolpyruvate transferase (EPT) and RNA 3'-terminal phosphate cyclase (RTPC). 2008-09-03 15:50:29 2007-08-17 13:33:07 5 195 54 4992 12126 1 +291 CL0291 KNTase_C Nucleotidyltransferase substrate binding domain Bateman A anon This alpha helical domain is found associated with a variety of nucleotidyltransferase domains. 2008-09-03 15:50:29 2007-08-20 15:49:55 6 35 91 3566 11834 1 +292 CL0292 LysE LysE transporter superfamily Bateman A anon This clan includes a diverse range of transporter families [1]. 2008-09-03 15:50:29 2007-10-04 12:03:53 5 0 68 4993 52424 1 +293 CL0293 CDC \N Cholesterol-dependent cytolysin superfamily Bateman A anon This superfamily includes the MACPF domain as well as the Cholesterol-dependent cytolysins [1]. 2008-09-03 15:50:29 2007-10-19 12:42:17 5 21 68 720 2339 1 +294 CL0294 Sec10 \N Sec10-like superfamily Bateman A anon This superfamily includes large proteins that are parts of the conserved oligomeric Golgi complex and exocyst complex. 2008-09-03 15:50:29 2007-11-20 17:02:32 5 0 34 298 1198 1 +295 CL0295 Vps51 Vps51 domain superfamily Bateman A anon This clan includes an N-terminal domain from several vesicle transport proteins that are related to Vps51. 2008-09-03 15:50:29 2007-11-20 17:11:52 6 11 168 380 6736 1 +296 CL0296 GroES \N GroES-like superfamily Bateman A anon This superfamily includes the GroES protein as well as the N-terminal GroES-like domain from Alcohol dehydrogenase. 2008-09-03 15:50:29 2008-02-08 16:40:31 4 569 527 6115 49027 1 +297 CL0297 PhoU \N PhoU-like superfamily Bateman A anon This superfamily includes PhoU and its relatives that contain a three helical bundle domain structure. 2008-09-03 15:50:29 2008-02-08 17:40:34 4 21 32 4083 13356 1 +298 CL0298 tRNA_bind_arm \N tRNA-binding arm superfamily Bateman A anon This domain is found in Phe and Ser tRNA synthetases at the N-terminus, and at the C-terminus of Val tRNA synthetase. The domain is composed of two helices. 2008-09-03 15:50:29 2008-02-12 16:57:18 4 43 35 4877 13679 1 +299 CL0299 Peptidase_SF \N Peptidase clan SF Mistry J, Rawlings N anon This clan includes the peptidase S24 and S26 families. These families adopt a mainly beta fold. Members of the family S24 have an additional C-terminal domain containing a bundle of three helices presumably important for binding DNA. 2008-09-03 15:50:29 2008-02-25 12:55:51 4 47 75 5096 20383 1 +300 CL0300 TAT \N Twin-Arginine Translocation Motif Bateman A anon This motif is found in a wide range of secreted proteins. It is named after the conserved pair of arginines that is followed by a hydrophobic stretch. 2008-09-03 15:50:29 2008-02-29 15:32:23 4 26 172 1437 2441 1 +301 CL0301 PA14 \N PA14 superfamily Bateman A anon This clan includes the PA14 domain and related families. 2008-09-03 15:50:29 2008-03-03 17:31:48 5 32 336 731 2126 1 +302 CL0302 Arginase \N Arginase/deacetylase superfamily Bateman A anon This superfamily includes arginase enzymes as well as histone deacetylases and related enzymes [1]. 2008-09-03 15:50:29 2008-04-30 14:26:28 5 410 86 3456 11530 1 +303 CL0303 H2TH \N Helix-two-turns-helix superfamily Bateman A anon This domain is thought to play a role in binding nucleic acids. It is DNA binding in nucleases and RNA-binding in ribosomal S13. 2008-09-03 15:50:29 2008-05-28 16:24:30 4 289 53 5430 13968 1 +304 CL0304 CheY \N CheY-like superfamily Bateman A anon This clan includes the CheY-like response regulators from bacteria [1-2]. 2008-09-03 15:50:29 2008-06-04 14:50:30 4 475 3478 5545 154594 1 +305 CL0305 PTH2 \N Peptidyl-tRNA hydrolase II superfamily Bateman A anon This clan includes Peptidyl-tRNA hydrolase II as well as a large family of uncharacterised proteins called DUF2000. A structure for DUF2000 shows a similar structure to PTH2. It is not clear if the DUF2000 family are also Peptidyl-tRNA hydrolases. Both families contain a conserved positively charged residue close to the amino terminus that may be part of the active site. 2008-09-03 15:50:29 2008-06-05 14:00:28 4 24 15 963 1328 1 +306 CL0306 HeH \N LEM/SAP HeH motif Bateman A anon This superfamily includes protein domains with the helix-extended loop-helix (HeH) structure. 2008-09-03 15:50:29 2008-09-03 10:49:37 3 74 240 4102 7665 1 +307 CL0307 FUSC Fusaric acid resistance protein-like superfamily Bateman A anon Members of this clan are likely to be integral membrane bound transporters. 2008-09-05 13:12:16 2008-09-05 14:12:16 3 0 73 3485 13824 1 +308 CL0308 DMSO_reductase \N Dimethyl sulfoxide reductase type II family Coggill P anon This clan includes members that are type II dimethyl sulfoxide reductase families, all of which are also membrane anchor proteins belonging to the iron-sulfur molybdoenzyme (CISM) family [1]. 2008-09-23 15:56:29 2008-09-23 16:56:29 3 8 25 1456 4376 1 +310 CL0310 DinB DinB-like superfamily Bateman A anon This superfamily are thought to be metalloenzymes. They possess a four helical bundle core structure with a beta hairpin. Members of the superfamily have a predicted active site composed of three histidines that chelate Nickel or Zinc. In some cases these histidines are replaced with Aspartate or Glutamate. Mostly they form a dimeric structure. The dinB gene is one of the DNA-damage-induced genes and the corresponding protein, DinB, is the founding member of the clan. The protein contains a four-helix up-down-down-up bundle that has previously been described in the literature in three disparate proteins: the enzyme MDMPI (mycothiol-dependent maleylpyruvate isomerase), YfiT and TTHA0303, a member of a small DUF (domain of unknown function). Most (but not all) clan members seem to have the ability to coordinate a metal ion using a conserved histidine-triad motif. The proteins that share the fold exhibit four different quaternary structures: monomeric and three different dimeric forms [1]. 2008-10-08 12:01:30 2008-10-08 13:01:30 3 36 73 2472 9715 1 +311 CL0311 SCP2 \N SCP-2 sterol transfer superfamily Bateman A anon This superfamily includes the SCP2 family as well as a domain from the mycothiol dependent maleylpyruvate isomerase. 2008-10-13 14:19:21 2008-10-13 15:19:21 3 29 62 2115 4976 1 +312 CL0312 HemS_ChuX \N Heme iron utilization protein-like superfamily Bateman A anon This superfamily includes HemS and ChuX like protein families. 2008-10-22 16:49:13 2008-10-22 17:49:13 3 13 11 760 1473 1 +314 CL0314 PP-binding \N ACP-like superfamily Bateman A anon \N 2008-11-19 12:30:53 2008-11-19 12:30:53 3 127 3226 5406 42314 1 +315 CL0315 Gx_transp Gx transporter superfamily Bateman A anon This superfamily includes a wide range of transporters that contain many conserved glycine residues in the presumed transmembrane regions. 2008-11-21 16:40:49 2008-11-21 16:40:49 3 4 132 4338 23854 1 +316 CL0316 Acyl_transf_3 \N Membrane acyl transferase superfamily Bateman A anon This superfamily includes a wide variety of integral membrane acyltransferase enzymes that often acylate sugars. 2008-11-24 13:14:42 2008-11-24 13:14:42 3 0 72 4090 17608 1 +317 CL0317 Multiheme_cytos Multiheme cytochrome superfamily Coggill P anon This family includes cytochromes that contain multiple CxxCH motifs. 2008-12-10 17:38:20 2008-12-10 17:38:20 3 247 280 1655 11292 1 +318 CL0318 Cytochrome-c Cytochrome c superfamily Coggill P anon This family includes proteins where a covalently-bound haem completes the core. The core is three helices in an open folded leaf formation. The members are monodomain cytochromes. 2008-12-12 14:30:33 2008-12-12 14:30:33 3 552 457 3452 28903 1 +319 CL0319 SHS2 Rob_SOUL; SHS2 domain Bateman A, Anantharaman V anon SHS2 is a novel domain with a simple fold containing a core of 3 strands, forming a curved sheet, and a single helix in a strand-helix-strand-strand (SHS2) configuration [1]. SHS2 is found in the bacterial cell division ATPase FtsA, the archaeo-eukaryotic RNA polymerase subunit Rpb7p, the GyrI superfamily, and the uncharacterized MTH1598/Tm1083-like proteins [1]. The fold exists as single copy versions in FtsA (where it is inserted into the RNAseH fold), Rbp7p and Dodecin [1]. It is found as a diad in the GyrI superfamily. In MTH1598/Tm1083-like proteins two copies of SHS2 are found with one inserted into another [1]. The single-copy versions in FtsA and Rbp7 mediate protein–protein interactions, while the one in Dodecin is a small molecule binding domain. The GyrI also binds small molecule, while the MTH1598 is predicted to be enzymatic [1]. 2008-12-12 19:39:43 2008-12-12 19:39:43 3 250 83 4602 13173 1 +320 CL0320 PepSY \N PepSY domain-like superfamily Bateman A anon This family includes the PepSY domain as well as a family of uncharacterised proteins. 2008-12-15 13:39:08 2008-12-15 13:39:08 3 53 124 3218 10624 1 +321 CL0321 PLAT \N PLAT domain like superfamily Bateman A anon This domain has an 8-stranded sandwich structure. 2008-12-15 14:40:00 2008-12-15 14:40:00 3 91 192 284 3462 1 +322 CL0322 RND_permease \N RND permease superfamily Bateman A anon Different members of the RND superfamily have been shown to transport hydrophobic drugs, fatty acids, bile salts, organic solvents, heavy metals, autoinducers and lipooligosaccharides in bacteria [1]. 2008-12-18 12:52:06 2008-12-18 12:52:06 3 91 154 4816 43065 1 +323 CL0323 Patatin \N Patatin/FabD/lysophospholipase-like superfamily Bateman A anon This superfamily of enzymes contains a Ser/Asp catalytic dyad. Members of this superfamily are all serine acylhydrolase enzymes. 2008-12-18 16:43:32 2008-12-18 16:43:32 3 75 1413 5254 25679 1 +324 CL0324 Homing_endonuc \N Homing endonuclease-like superfamily Bateman A anon This superfamily includes a variety of LAGLIDADG-like homing endonuclease like families. 2009-01-08 17:06:53 2009-01-08 17:06:53 3 109 309 2676 6450 1 +325 CL0325 Form_Glyc_dh \N Formate/glycerate dehydrogenase catalytic domain-like superfamily Bateman A anon This superfamily includes the catalytic domain of a variety of dehydrogenase enzymes. The domain has a flavodoxin-like fold and contains an inserted Rossman fold NAD-binding domain. 2009-01-09 10:02:56 2009-01-09 10:02:56 3 370 93 4952 24576 1 +326 CL0326 Reo_sigma \N Virus attachment protein superfamily Bateman A anon This superfamily includes virus attachment proteins that share a common beta sandwich domain. 2009-01-09 10:44:19 2009-01-09 10:44:19 3 291 20 282 678 1 +327 CL0327 Pilus \N Pilus subunit Mistry J anon This is a clan contains bacterial pilus subunits and proteins involved in secretion. Pili proteins enable the transfer of plasmid between bacteria. The families in this clan adopt an alpha helical structure which is packed against a beta sheet [2-3]. 2009-01-12 11:15:14 2009-01-12 11:15:14 3 115 552 3218 21160 1 +328 CL0328 2heme_cytochrom Transmembrane di-heme cytochrome superfamily Bateman A anon This superfamily includes a variety of different heme binding cytochromes. 2009-05-08 19:48:23 \N 3 111 270 42880 138619 1 +329 CL0329 S5 \N Ribosomal protein S5 domain 2-like superfamily Bateman A anon This superfamily contains a wide range of families that possess a structure similar to the second domain of ribosomal S5 protein. 2009-05-08 19:52:37 \N 3 764 344 6126 79017 1 +330 CL0330 AVL9 Late secretory pathway transport machinery Bateman A anon Members of this clan are involved in vesicle formation/trafficking. 2009-05-08 19:52:58 \N 3 2 129 304 2924 1 +331 CL0331 EpsM General secretion pathway protein M Coggill P anon These families are involved in the general secretory pathways of bacteria and are normally membrane-bound. 2009-05-08 19:53:19 \N 3 6 33 1882 7823 1 +332 CL0332 AcetylDC-like Acetyl-decarboxylase like superfamily Coggill P anon These families are double psi-beta barrel structures. 2009-05-08 19:53:39 \N 3 175 114 3736 19949 1 +333 CL0333 gCrystallin gCrystallin-like; Gamma-Crystallin-like superfamily Coggill P anon This superfamily includes a number of mammalian crystallins as well as ancestral beta gamma-crystallin precursor structures. 2009-05-08 19:54:54 \N 3 107 65 320 3580 1 +334 CL0334 THBO-biosyn Tetrahydrobiopterin biosynthesis-like enzyme superfamily Coggill P anon The families in this clan bind purine or ptein in topologically similar sites between subunits. 2009-05-08 19:57:58 \N 3 423 52 4568 17543 1 +335 CL0335 FumRed-TM Fumarate reductase respiratory complex transmembrane subunits Coggill P anon This superfamily constitutes two distinct families: in one family the common fold is contained in a single-chain subunit, in the other it is formed by two chains. 2009-05-08 19:58:20 \N 3 154 20 3092 7355 1 +336 CL0336 FMN-binding FMN-binding split barrel superfamily Coggill P anon This includes those related to the ferredoxin reductase-like FAD-binding domain and those that are Pyridoxine 5'-phosphate oxidase (PNP)-like. 2009-05-08 19:58:45 \N 3 224 139 4634 22171 1 +337 CL0337 RF \N Release factor superfamily Coggill P anon These families are peptide chain release factors. 2009-05-08 19:59:09 \N 3 19 24 4802 12227 1 +339 CL0339 PFL-like PFL-like glycyl radical enzyme superfamily Coggill P anon The N- and C-terminal halves of the structure have similar topologies but in some cases only one is represented by the members here, viz; the C-terminal domain of the R1 subunit of ribonucleotide reductase, and the N-terminal of PFL. The full-length structure is modelled by NRDD. 2009-05-08 19:59:52 \N 3 156 130 5379 23959 1 +340 CL0340 PTase-anion_tr Phosphotransferase/anion transport protein superfamily Coggill P anon The families here are the cytoplasmic regions of anion transporter proteins. 2009-05-08 20:00:18 \N 3 25 126 3482 19584 1 +341 CL0341 LDH_C \N LDH C-terminal domain-like superfamily Bateman A anon This superfamily includes the C-terminal domain of lactate/malate dehydrogenase as well as the C-terminal domain of the glycosyl hydrolase 4 family. 2009-05-08 20:00:39 \N 3 454 19 7644 15125 1 +342 CL0342 TolB_N TolB, N-terminal domain Bateman A anon Members of this superfamily appear to behave like the N-terminal fold of the TolB transport-portal complex protein, which is beta-stranded. 2009-05-08 20:00:59 \N 3 23 67 2109 6978 1 +343 CL0343 MHC MHC antigen-recognition domain Coggill P anon This superfamily includes all the Class I-related antigen-recognition domain families. 2009-05-08 20:01:29 \N 3 1002 27 843 46029 1 +344 CL0344 4Fe-4S 4Fe-4S ferredoxins Bateman A anon Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 2009-05-08 20:01:52 \N 3 366 2266 6690 129668 1 +345 CL0345 Aerolisin_ETX Aerolysin/ETX pore-forming domain superfamily Coggill P anon This superfamily includes pore-forming venoms and toxins from bacteria, plants, insects and fish. 2009-05-08 20:02:15 \N 3 26 17 136 416 1 +346 CL0346 Ribo_L29 \N Ribosomal protein L29, L29p, superfamily Coggill P anon Superfamily includes Ribosomal protein L29 family and its corresponding mitochondrial ribosomal family, L47. 2009-05-08 20:02:37 \N 3 242 12 4897 5412 1 +347 CL0347 Tetraspannin Tetraspannin-like Mistry J, Finn RD anon This clan includes the tetraspanin family which contains four transmembrane regions. The CD20 family also has four transmembrane regions, but its members are not considered true tetraspanins as they lack nearly all of the key functional tetraspanin residues [1]. 2009-05-08 20:04:53 \N 3 8 50 746 6471 1 +348 CL0348 Phage_tail Phage virion morphogenesis superfamily Finn RD, Coggill P anon Families involved in joining the tail to the head of the phage as well as those completing the head are included herein. 2009-05-08 20:05:12 \N 3 0 8 1841 2993 1 +349 CL0349 DprA MoCo carrier protein-like superfamily Bateman A anon Known family members of this superfamily are required for natural chromosomal and plasmid transformation. DprA is a new member of the recombination-mediator protein family, dedicated to natural bacterial transformation [1]. Superfamily includes lysine_decarboxylases. 2009-05-08 20:05:29 \N 3 59 53 4587 10964 1 +350 CL0350 PRC-barrel \N PRC-barrel like superfamily Finn RD, Coggill P anon The PRC-barrel is an all beta barrel domain found in the photosynthetic reaction centre subunit H of the purple bacteria [1]. 2009-05-08 20:05:48 \N 3 122 34 4098 7238 1 +351 CL0351 CHCH Coiled-coil helix coiled-coil helix superfamily Bateman A anon The conserved [coiled coil 1]-[helix 1]-[coiled coil 2]-[helix 2] domain (CHCH domain) superfamily members include NADH-ubiquinone oxidoreductases, some cytochrome oxidases and yeast mitochondrial ribosomal proteins. Within each helix of the CHCH domain there are two cysteines present in a C-X9-C motif. 2009-05-08 20:06:07 \N 3 58 39 353 2801 1 +352 CL0352 EsxAB WXG100-A/WXG100-B dimer Finn RD anon The WXG100 protein secretion system (Wss) is responsible for the secretion of WXG100 proteins (PF06013), such as ESAT-10 (6 kDa early secreted antigenic target) and CFP-10 (10 kDa culture filtrate protein) in Mycobacterium tuberculosis or EsxA (ESAT-6-like extracellularly secreted protein A) and EsxB in Staphylococcus aureus. These two proteins, generally encoded in the same gene cluster, form a 1:1 heterodimeric complex. These proteins are virulence factors involved in host-pathogen interaction [1], as demonstrated in Mycobacterium tuberculosis, Staphylococcus aureus or Bacillus anthracis. The Wss is encoded in many other Gram-positive (monoderm) bacteria. This superfamily contains a number of DUFs which are closely related and may or may not represent the same family of proteins. 2009-05-08 20:06:25 \N 3 34 135 901 12295 1 +353 CL0353 TIMP-like \N TIMP-like superfamily Bateman A anon This superfamily consists of the C-terminal domains of netrins, complement proteins C3, C4, C5, secreted frizzled-related proteins, and type I procollagen C-proteinase enhancer proteins, as well as the homologous N-terminal domains of tissue inhibitors of metalloproteinases (TIMPs). 2009-05-08 20:06:43 \N 3 69 60 208 1675 1 +354 CL0354 bBprotInhib \N beta-Barrel protease inhibitors Coggill P anon Superfamily consists of both metalloprotease- inhibitors and staphostatins. 2009-05-08 20:07:02 \N 2 12 6 424 573 1 +355 CL0355 CheC-like CheC-like superfamily Bateman A, Tuff TJ anon The chemotactic response regulator superfamily are CheY-P phosphatases. Their structure is two intertwined alpha-beta-(X)-beta(2) motifs. This superfamily comprises two classes of proteins each shown to interact with the chemotaxis response regulator CheY: the FliM switch proteins and the CheC-type phosphatases [1]. FliM is a component of the flagellar switch found across the bacteria and is responsible for binding CheY-P and changing the rotational direction of the flagella. The N-terminal domain is CheC-like and the C-terminal shares the SpoA domain with FliN and FliY. The CheC family is broadly broken down into three phosphatase subfamilies: CheC, CheX, and FliY. All three have an active site consensus sequence of D/S-X(3)-E-X(2)-N-X(22)-P. 2009-05-08 20:07:19 \N 3 20 27 2027 5059 1 +356 CL0356 AMP_N-like Creatinase/prolidase N-terminal domain superfamily Coggill P anon Bacterial amino-peptidases and creatinases, where the fold is a ribonuclease H-like motif, are grouped in this superfamily. 2009-05-08 20:07:37 \N 3 91 34 4808 10134 1 +357 CL0357 SMAD-FHA SMAD/FHA domain superfamily Bateman A anon Superfamily members carry a few short helices inserted in loops within the 11 strands in 2 sheets (greek-key) of the parent fold. 2009-05-08 20:07:54 \N 3 135 399 2102 12462 1 +359 CL0359 Intron-mat_II \N Type II intron maturase-like superfamily Bateman A anon Superfamily includes a variety of transcription factors that bind intron RNA during reverse transcription and splicing. 2009-05-08 20:08:30 \N 3 14 51 26168 39344 1 +360 CL0360 MTH1187-YkoF MTH1187/YkoF-like superfamily Coggill P anon Putative cell-wall biogenesis proteins and HMP-binding proteins, all with the same Ferredoxin fold, are included in this superfamily. 2009-05-08 20:08:47 \N 3 35 7 1666 2146 1 +361 CL0361 C2H2-zf Classical C2H2 and C2HC zinc fingers Coggill P anon Superfamily of classical and closely related C2H2 or beta-beta-alpha zinc finger DNA-binding domains. 2009-05-08 20:09:05 \N 3 241 8637 2337 328673 1 +362 CL0362 RAMPS-Cas5-like CRISPR-associated (Cas) Repair Associated Mysterious Proteins Bateman A, Coggill P anon This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats [1]. It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation [2]. Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers [3]. 2009-05-08 20:09:23 \N 3 24 24 1529 4925 1 +363 CL0363 H-int Hedgehog/intein (Hint) superfamily Bateman A anon This superfamily includes Hedgehog C-terminal (Hog) autoprocessing domain and Intein (protein splicing domain) families. 2009-05-08 20:09:41 \N 3 29 477 1142 3298 1 +364 CL0364 Leu-IlvD \N LeuD/IlvD-like Bateman A anon Superfamily includes LeuD-like, IlvD/EDD C-terminal domain-like, and AF0055-like families. 2009-05-08 20:09:58 \N 3 36 36 4411 10971 1 +365 CL0365 MurF-HprK_N MurF and HprK N-domain-like superfamily Bateman A anon This includes both the MurE/MurF-ligases N-terminal domain and HPr kinase/phosphatase HprK N-terminal domain superfamilies. 2009-05-08 20:10:16 \N 3 14 40 3206 4809 1 +366 CL0366 JAB Mov34-like; JAB-like superfamily Bateman A, Iyer LM, Zhang D, Aravind L anon This superfamily includes a number of proteasome regulatory subunits, eukaryotic initiation factor 3 (eIF3) subunits, regulators of transcription factors and ubiquitination-assisting protein families. In eukaryotes and in prokaryotic cognates of the ubiquitin-based modification pathway, they function as ubiquitin isopeptidases/ deubiquitinases. JAB domains are also found in diverse metabolic pathways in prokaryotes such as siderophore and cysteine biosynthesis. Other distinct versions of the JAB domain, such as RadC are predicted to function as nucleases. Structurally, the JAB domain is related to the nucleotide deaminase and binds a Zinc ion in a similar structural location. 2009-05-08 20:10:33 \N 3 35 121 4285 10806 1 +367 CL0367 CI-2 CI-2 family of serine protease inhibitors Bateman A anon This superfamily includes a range of universally found subtilases, that are serine proteases. 2009-05-08 20:10:50 \N 3 54 6 411 761 1 +368 CL0368 PhosC-NucP1 \N Phospholipase C/P1 nuclease superfamily Coggill P anon This superfamily includes the Phospholipase C and P1-nuclease families. 2009-05-08 20:11:07 \N 3 26 29 2543 5327 1 +369 CL0369 GHD Glycosyl hydrolase domain superfamily Bateman A anon This domain is C-terminal to the catalytic beta/alpha barrel domain. The superfamily includes the C-terminal domain of a number of sugar-lytic families. 2009-05-08 20:11:24 \N 3 345 276 4003 18678 1 +370 CL0370 Uteroglobin Uteroglobin-like superfamily Coggill P anon Members of this superfamily are disulfide-linked dimers of two identical chains, with 4 helices in each. They constitute important new cat, rat and rabbit allergens that are contributing to asthma world-wide. 2009-05-08 20:11:41 \N 3 12 2 47 309 1 +371 CL0371 Inovirus-Coat \N Inovirus (filamentous phage) major coat protein Coggill P anon Superfamily contains a number of filamentous phage coat-protein families. 2009-05-08 20:11:58 \N 3 29 3 67 82 1 +372 CL0372 Hy-ly_N \N Hyaluronate lyase-like catalytic, N-terminal domain Bateman A anon This contains virus envelope protein, Chondroitin AC lyase and hyaluronate lyase families. 2009-05-08 20:12:16 \N 3 44 53 774 1055 1 +373 CL0373 Phage-coat Phage coat superfamily Coggill P anon A number of different phage coat-proteins are collected together in this superfamily. 2009-05-08 20:12:33 \N 3 76 25 2757 4895 1 +374 CL0374 PEP-carboxyk \N PEP carboxykinase-like superfamily Bateman A anon This includes the PEP carboxykinase C-terminal domain and HPr kinase HprK C-terminal domain families. 2009-05-08 20:12:50 \N 3 100 16 5373 7630 1 +375 CL0375 Transporter Transporter superfamily Bateman A anon The members of this superfamily are probably all transporter protein domains. 2009-05-08 20:13:08 \N 3 0 42 409 5210 1 +376 CL0376 Oxa1 Cytochrome oxidase biogenesis family Bateman A anon The cytochrome oxidase biogenesis families are membrane transporters akin to the E coli protein YidC. For those proteins whose N-termini must reside in the intermembrane space, export is mediated by the Oxa1p export machinery, machinery that depends upon the membrane potential. Qxa1p homologues are found in all living organisms. TCDB:2.A.9. 2009-05-08 20:13:27 \N 3 0 30 4817 6863 1 +377 CL0377 FAH \N Fumarylacetoacetate hydrolase, C-terminal domain, superfamily Bateman A anon Superfamily contains fumarylacetoacetate hydrolase and related enzymes, 2009-05-08 20:13:45 \N 3 104 39 3513 10941 1 +378 CL0378 ANL Ac-CoA-synth; ANL superfamily Bateman A anon This superfamily consists of enzymes including luciferase, long chain fatty acid Co-A ligase, acetyl-CoA synthetase and various other closely-related synthetases as well as a plant auxin-responsive promoter family. The name ANL derives from from three of the subfamilies - Acyl-CoA synthetases, the NRPS adenylation domains, and the Luciferase enzymes [1]. Members of this superfamily catalyse the initial adenylation of a carboxylate to form an acyl-AMP intermediate, followed by a second partial reaction, most commonly the formation of a thioester [1]. 2009-05-08 20:14:02 \N 3 149 2269 6257 68785 1 +379 CL0379 PgaPase \N Pyroglutamate aminopeptidase superfamily Bateman A anon This is a collection of pyrrolidone carboxyl peptidase or pyroglutamate aminopeptidase families from bacteria and archaea. 2009-05-08 20:14:20 \N 3 60 16 1654 2070 1 +380 CL0380 IDO-like Indolic compounds 2,3-dioxygenase-like superfamily Bateman A anon Superfamily contains bacterial tryptophan 2,3-dioxygenase and indoleamine 2,3-dioxygenase-like families. 2009-05-08 20:14:37 \N 3 60 18 837 1644 1 +381 CL0381 Metallo-HOrase \N Metallo-hydrolase/oxidoreductase superfamily Bateman A anon This superfamily of enzymes including beta-lactamases, thiolesterases, members of the glyoxalase II family that catalyse the hydrolysis of S-D-lactoyl-glutathione to form glutathione and D-lactic acid all bind two ions of zinc. An additional family of competence proteins essential for natural transformation do not appear to bind zinc, and might be a transporter involved in DNA uptake. 2009-05-08 20:14:54 \N 3 403 305 5380 50665 1 +382 CL0382 DNA-mend \N DNA breaking-rejoining enzyme superfamily Bateman A anon This is a superfamily of DNA recombinases, topoisomerases and integrases. 2009-05-08 20:15:13 \N 3 116 209 6017 46990 1 +383 CL0383 PheT-TilS Phenylalanine- and lysidine-tRNA synthetase domain superfamily Coggill P anon Families here are thought to contain a putative tRNA-binding structural motif. The families are the C-terminal domains of tRNA-Ile-lysidine and the phenylalanine-tRNA synthetases. 2009-05-08 20:15:30 \N 3 31 54 4735 8465 1 +384 CL0384 PLC PLC-like phosphodiesterases Coggill P anon Superfamily consists of Glycerophosphoryl diester phosphodiesterase and phosphatidylinositol-specific phospholipase C families. 2009-05-08 20:15:48 \N 3 108 279 4173 15000 1 +385 CL0385 Hydrophilin \N Hydrophilin-like superfamily Coggill P anon This superfamily includes plant and bacterial hydrophilin families. 2009-05-08 20:16:05 \N 3 0 17 752 2456 1 +386 CL0386 Ant-toxin_C \N Superantigen toxins, C-terminal domain superfamily Coggill P anon Superfamily contains bacterial super-antigen toxins and the MAP family. 2009-05-08 20:16:23 \N 3 216 9 194 4767 1 +387 CL0387 DHFred \N Dihydrofolate reductase-like Coggill P anon Superfamily contains the dihydrofolate reductases and the RibD C-terminal domain-like including HTP reductase families. 2009-05-08 20:16:40 \N 3 474 41 4883 12238 1 +388 CL0388 FadR-C \N Fatty acid responsive transcription factor FadR, C-terminal domain Coggill P anon Superfamily includes C-terminal domain ligand-binding GntR families and families of fatty acid responsive transcription factors. This C-terminal domain, an antiparallel array of six alpha helices, forms a barrel-like structure, while a seventh alpha helix forms a 'lid' at the end closest to the N-terminal domain - a separate, DNA-binding winged-helix, domain. 2009-05-08 20:16:57 \N 3 24 47 3168 21611 1 +389 CL0389 TRAF \N TRAF domain-like superfamily Coggill P anon Superfamily has a circularly permuted immunoglobulin-fold topology with extra an extra beta-strand. Families include the Math and the SIAH, or Seven in absentia, members. 2009-05-08 20:17:16 \N 3 106 201 1094 6315 1 +390 CL0390 zf-FYVE-PHD FYVE/PHD zinc finger superfamily Coggill P anon Superfamily contains a number of zinc-fingers, of the FYVE/PHD type, which are found in several groups of proteins including myelin-associated oligodendrocytic basic proteins (MOBP) Rabphilins, melanophilins, exophilins and myosin-VIIA and Rab-interacting protein families. 2009-05-08 20:17:33 \N 3 184 1096 1231 20485 1 +391 CL0391 CAP_C-like Adenylate cyclase associated (CAP) C terminal like Mistry J anon Families in this clan adopt a beta super helix structure [1-2]. The clan includes the C terminal domain of adenylate cyclase which binds binds actin [1]. 2009-05-08 20:17:50 \N 3 14 29 324 1064 1 +392 CL0392 Chaperone-J \N Chaperone J-domain superfamily Coggill P anon The J-domain is found in a number of stress-response proteins. It is found at the N-terminal of Hsc20, DnaJ-chaperone in E. coli, and viral large T-antigen proteins; it is also in Hsc40, mammalian auxilin and in both animal and plant DnaJ proteins. It is also found in degenerate form in Pam16 proteins. 2009-05-08 20:18:07 \N 3 72 636 5592 28318 1 +393 CL0393 FucI-AraA_C \N FucI/AraA C-terminal domain-like [50443] Coggill P anon The enzymes in this superfamily function as a hexamer, which is the largest structurally known ketol isomerase, that has no sequence or structural similarity to other ketol isomerases. 2009-05-08 20:18:26 \N 3 24 7 1558 2470 1 +394 CL0394 DsrEFH-like \N DsrEFH-like superfamily Bateman A anon This is a superfamily of small proteins from phototrophic sulfur bacteria that are involved in oxidisation of intracellular sulfur. 2009-05-08 20:18:45 \N 3 54 21 2423 6201 1 +395 CL0395 Tubby_C Tubby C-terminal domain-like Bateman A anon This superfamily contains the scramblase protein family, the Tub family and the DUF567, a family of plant and bacterial proteins of hitherto unknown function. All members are membrane-tethered transcription factors. 2009-05-08 20:19:04 \N 3 9 44 886 2617 1 +396 CL0396 Marvel-like MARVEL domain containing superfamily Bateman A anon The MAL and related proteins for vesicle trafficking and membrane link (MARVEL) domain is a module with a four transmembrane-helix architecture that has been identified in proteins of the myelin and lymphocyte (MAL), physins, gyrins and occludin families. 2009-05-08 20:19:22 \N 3 0 28 346 3075 1 +397 CL0397 TusA-like SirA-like; TusA-like superfamily Bateman A anon Member families include sulfurtransferase TusA. 2009-05-08 20:19:39 \N 3 8 61 3075 6124 1 +398 CL0398 RMMBL_DRMBL \N RNA/DNA-metabolising metallo-beta-lactamase motif Mistry J anon This clan contains the fifth motif found in RNA and DNA metabolising metallo-beta-lactamases. The fifth motif appears to be specific to function [1]. 2009-05-08 20:19:56 \N 3 52 66 3672 6801 1 +399 CL0399 Asp-glut_race Aspartate/glutamate racemase superfamily Bateman A anon Superfamily contains aspartate racemase, glutamate racemase, hydantoin racemase and arylmalonate decarboxylase families from fungi, plants, bacteria and archaeal species. 2009-05-08 20:20:14 \N 3 108 17 4346 8931 1 +400 CL0400 GG-leader \N Double-Glycine leader-peptide cleavage motif Coggill P anon This is a collection of short bacterial families that carry a distinctive GG-cleavage motif. Conservation C-terminal to the GG-motif is not apparent. However, the families are all interconnected with critical virulence attributes of one kind or another. 2009-05-08 20:20:32 \N 3 5 10 475 1960 1 +401 CL0401 AsmA-like AsmA-like OmpF regulator protein superfamily Bateman A anon Families in this collection are AsmA-like. Mutations in the AsmA gene restore the assembly of OmpF, a trimeric outer membrane porin from E coli and related bacteria necessary for the cytotoxic action of group-A colicins. 2009-05-08 20:20:49 \N 3 0 60 2432 11866 1 +402 CL0402 Cdc48_2-like Cdc48 domain 2-like Coggill P anon Superfamily contains C-terminal domains of N-ethylmaleimide sensitive fusion proteins, VCP-like ATPases, membrane fusion ATPase p97 domain 2, peroxisome biogenesis factor 1 (PEX-1), domain 2, and ubiquitin fusion degradation protein UFD1 families. 2009-05-08 20:21:06 \N 3 52 46 547 1899 1 +403 CL0403 ADC-like \N Acetoacetate decarboxylase-like Finn RD anon Superfamily contains the acetoacetate decarboxylase enzyme family EC:4.1.1.4, and a family of uncharacterized proteins from bacteria. 2009-05-08 20:21:23 \N 3 20 14 682 968 1 +404 CL0404 BPD_transp_1 BPD_transp_1-like; BPD transporter like Mistry J, Finn RD anon This clan contains families that are involved in transport of molecules across membranes. It includes the bacterial binding protein-dependent transport system inner membrane component, Pfam:PF00528, which is ATP dependent system involved in transport of a range of substrates [1-2]. 2009-05-08 20:21:41 \N 3 36 162 5100 199771 1 +405 CL0405 DNA_b-psBarrel DNA-bdg_psBarrel; DNA-binding pseudo-barrel domain Finn RD anon Superfamily consists of type II restriction endonuclease effector (N-term) domain and plant B3 DNA binding domain families. 2009-05-08 20:21:58 \N 3 5 51 174 2348 1 +406 CL0406 YjbJ-CsbD-like YjbJ-CsbD-like superfamily Finn RD anon CsbD is a bacterial general stress response protein. It's expression is mediated by sigma-B, an alternative sigma factor [1]. The role of CsbD in stress response is unclear. YjbJ is a hypothetical protein with a similar structure. 2009-05-08 20:22:16 \N 3 2 5 2526 5640 1 +407 CL0407 TBP-like TATA-binding protein like Mistry J, Finn RD anon TBP is a transcription factor whose DNA binding fold is composed of a curved antiparallel beta-sheet [1]. This fold is also found in the N terminal region of DNA repair glycosylases. The N terminal domain of DNA glycosylase has only a single copy of the fold, whereas TBP contains a duplication of this fold [2-3]. 2009-05-08 20:22:35 \N 3 138 32 2007 3848 1 +408 CL0408 PUP \N Purine and uridine phosphorylase superfamily Bateman A anon superfamily contains a number of purine nucleoside phosphorylase, uridine nucleoside phosphorylase, and various nucleosidase families of proteins. 2009-05-08 20:22:53 \N 3 825 229 4830 16130 1 +409 CL0409 GAP \N GTPase activation domain superfamily Finn RD anon Superfamily contains BCR-homology GTPase activation domain (BH-domain) and p120GAP domain-like, including the GAP related domain of neurofibromin, families. 2009-05-08 20:23:11 \N 3 42 410 365 9451 1 +410 CL0410 LEF-8-like LEF-8 like region of RNA polymerase Rpb2 Mistry J, Finn RD anon Late expression factor 8 (LEF-8) is one of the primary components of RNA polymerase produced by polyhedrosis viruses. LEF-8 shows homology to domain 6 of the second largest subunit of prokaryotic DNA-directed RNA polymerase[1]. 2009-05-08 20:23:28 \N 3 138 141 12554 17032 1 +411 CL0411 Vir Antigenic variants from Plasmodium cell-surface Finn RD anon Several families of paralogous proteins are included in this superfamily, largely from Plasmodium species. The genome expresses great numbers of them, and they vary subtly from each other. 2009-05-08 20:23:45 \N 3 0 7 9 1914 1 +412 CL0412 Frag1-like \N Frag1 like Mistry J, Finn RD anon This clan contains the Frag1/DRA. +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# query sequence file: /home/inmare/galaxy/database/files/000/dataset_578.dat +# cpu number specified: 2 +# searching against: /home/inmare/galaxy/tools/pfamScan/hmm/Pfam-A.hmm, with cut off --cut_ga +# resolve clan overlaps: on +# predict active sites: off +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# +# +10FR_NODE_6#PROKKA_00001 25 131 23 132 PF14691.1 Fer4_20 Domain 3 110 111 91.1 3.2e-26 1 CL0344 +10FR_NODE_6#PROKKA_00002 28 394 28 394 PF00310.16 GATase_2 Domain 1 361 361 446.4 5.4e-134 1 CL0052 +10FR_NODE_6#PROKKA_00002 471 755 468 755 PF04898.9 Glu_syn_central Domain 4 288 288 358.5 2.3e-107 1 CL0036 +10FR_NODE_6#PROKKA_00002 815 1178 815 1178 PF01645.12 Glu_synthase Family 1 368 368 503.8 2.4e-151 1 CL0036 +10FR_NODE_6#PROKKA_00002 1259 1450 1256 1452 PF01493.14 GXGXG Family 4 200 202 228.0 5.3e-68 1 No_clan +10FR_NODE_6#PROKKA_00003 6 81 6 82 PF13740.1 ACT_6 Domain 1 75 76 81.8 2.1e-23 1 CL0070 +10FR_NODE_6#PROKKA_00003 196 364 196 364 PF12710.2 HAD Family 1 192 192 66.9 2.5e-18 1 CL0137 +10FR_NODE_6#PROKKA_00004 95 263 94 264 PF02222.17 ATP-grasp Family 2 171 172 138.3 1.5e-40 1 CL0179 +10FR_NODE_6#PROKKA_00005 5 153 4 154 PF00731.15 AIRC Domain 2 149 150 217.4 4.8e-65 1 No_clan +10FR_NODE_6#PROKKA_00006 98 355 93 355 PF02353.15 CMAS Family 5 273 273 296.5 1.4e-88 1 CL0063 +10FR_NODE_6#PROKKA_00007 44 78 43 79 PF07676.7 PD40 Repeat 2 38 39 35.2 6.5e-09 1 CL0186 +10FR_NODE_6#PROKKA_00007 99 131 98 135 PF07676.7 PD40 Repeat 2 33 39 14.8 0.017 1 CL0186 +10FR_NODE_6#PROKKA_00007 344 368 343 372 PF07676.7 PD40 Repeat 7 33 39 19.4 0.00057 1 CL0186 +10FR_NODE_6#PROKKA_00007 391 409 375 412 PF07676.7 PD40 Repeat 8 26 39 12.3 0.098 1 CL0186 +10FR_NODE_6#PROKKA_00007 481 501 473 505 PF07676.7 PD40 Repeat 11 33 39 12.2 0.1 1 CL0186 +10FR_NODE_6#PROKKA_00007 681 748 680 749 PF14684.1 Tricorn_C1 Domain 2 69 70 76.4 1.1e-21 1 No_clan +10FR_NODE_6#PROKKA_00007 761 851 761 852 PF14685.1 Tricorn_PDZ Domain 1 87 88 80.6 5.1e-23 1 CL0466 +10FR_NODE_6#PROKKA_00007 881 1036 881 1036 PF03572.13 Peptidase_S41 Family 1 169 169 68.4 4.5e-19 1 CL0127 +10FR_NODE_6#PROKKA_00008 4 214 4 225 PF02811.14 PHP Family 1 170 175 43.3 3.2e-11 1 CL0034 +10FR_NODE_6#PROKKA_00009 1 48 1 48 PF13793.1 Pribosyltran_N Domain 70 116 116 63.1 1.8e-17 1 No_clan +10FR_NODE_6#PROKKA_00009 130 237 120 238 PF14572.1 Pribosyl_synth Domain 74 183 184 84.5 7.3e-24 1 CL0533 +10FR_NODE_6#PROKKA_00010 6 90 6 90 PF01386.14 Ribosomal_L25p Domain 1 88 88 84.9 2.9e-24 1 No_clan +10FR_NODE_6#PROKKA_00010 98 180 97 183 PF14693.1 Ribosomal_TL5_C Domain 2 85 88 63.7 1.3e-17 1 No_clan +10FR_NODE_6#PROKKA_00011 4 185 4 185 PF01195.14 Pept_tRNA_hydro Domain 1 184 184 190.2 2.3e-56 1 No_clan +10FR_NODE_6#PROKKA_00013 53 132 53 132 PF08545.5 ACP_syn_III Domain 1 80 80 102.2 8.2e-30 1 CL0046 +10FR_NODE_6#PROKKA_00013 184 271 184 271 PF08541.5 ACP_syn_III_C Domain 1 90 90 104.4 2.3e-30 1 CL0046 +10FR_NODE_6#PROKKA_00014 129 287 128 287 PF13377.1 Peripla_BP_3 Domain 2 160 160 108.8 2.5e-31 1 CL0144 +10FR_NODE_6#PROKKA_00015 25 111 25 111 PF13715.1 DUF4480 Repeat 1 88 88 54.3 9.9e-15 1 CL0287 +10FR_NODE_6#PROKKA_00015 121 232 119 232 PF07715.10 Plug Domain 3 108 108 50.6 1.9e-13 1 CL0072 +10FR_NODE_6#PROKKA_00015 626 885 626 885 PF00593.19 TonB_dep_Rec Family 1 277 277 62.5 5.6e-17 1 CL0193 +10FR_NODE_6#PROKKA_00016 41 435 41 435 PF00474.12 SSF Family 1 406 406 242.4 6.5e-72 1 CL0062 +10FR_NODE_6#PROKKA_00017 23 197 3 221 PF12899.2 Glyco_hydro_100 Domain 19 223 436 47.4 8.8e-13 1 CL0059 +10FR_NODE_6#PROKKA_00018 37 156 35 186 PF00485.13 PRK Domain 3 130 194 23.7 2.9e-05 1 CL0023 +11FR_NODE_14#PROKKA_00020 4 317 4 317 PF00389.25 2-Hacid_dh Domain 1 133 133 116.2 6.5e-34 1 CL0325 +11FR_NODE_14#PROKKA_00020 110 285 110 285 PF02826.14 2-Hacid_dh_C Domain 1 178 178 208.1 5.2e-62 1 CL0063 +11FR_NODE_14#PROKKA_00021 304 326 303 326 PF00037.22 Fer4 Domain 2 24 24 31.1 1.1e-07 1 CL0344 +11FR_NODE_14#PROKKA_00022 189 342 173 346 PF00158.21 Sigma54_activat Domain 21 163 168 138.4 1.5e-40 1 CL0023 +11FR_NODE_14#PROKKA_00024 21 165 21 165 PF13492.1 GAF_3 Domain 1 129 129 32.7 6.4e-08 1 CL0161 +11FR_NODE_14#PROKKA_00024 199 365 198 365 PF00158.21 Sigma54_activat Domain 2 168 168 229.7 1.3e-68 1 CL0023 +11FR_NODE_14#PROKKA_00024 462 505 461 505 PF02954.14 HTH_8 Domain 2 42 42 27.5 1.6e-06 1 CL0123 +12FR_NODE_3#PROKKA_00026 38 98 36 100 PF01584.14 CheW Domain 3 63 138 55.5 3.9e-15 1 No_clan +12FR_NODE_3#PROKKA_00027 4 115 4 116 PF00072.19 Response_reg Domain 1 111 112 97.1 5.9e-28 1 CL0304 +12FR_NODE_3#PROKKA_00028 10 121 10 122 PF00072.19 Response_reg Domain 1 111 112 94.8 2.9e-27 1 CL0304 +12FR_NODE_3#PROKKA_00029 4 121 3 121 PF02951.9 GSH-S_N Domain 2 119 119 147.8 1.1e-43 1 No_clan +12FR_NODE_3#PROKKA_00029 125 297 125 299 PF02955.11 GSH-S_ATP Domain 1 172 174 236.0 1.3e-70 1 CL0179 +12FR_NODE_3#PROKKA_00030 204 274 203 284 PF03544.9 TonB_C Domain 2 70 79 71.7 4.2e-20 1 CL0428 +12FR_NODE_3#PROKKA_00031 16 172 16 172 PF02622.10 DUF179 Family 1 161 161 176.0 4e-52 1 No_clan +12FR_NODE_3#PROKKA_00032 11 141 10 143 PF03652.10 UPF0081 Family 2 133 135 126.1 9.4e-37 1 No_clan +12FR_NODE_3#PROKKA_00033 9 125 3 126 PF00156.22 Pribosyltran Domain 8 124 125 52.4 3.8e-14 1 CL0533 +12FR_NODE_3#PROKKA_00034 20 166 20 166 PF02729.16 OTCace_N Domain 1 142 142 131.3 1.8e-38 1 No_clan +12FR_NODE_3#PROKKA_00034 172 318 171 319 PF00185.19 OTCace Domain 2 157 158 139.2 1e-40 1 No_clan +12FR_NODE_3#PROKKA_00035 47 378 47 378 PF13147.1 Amidohydro_4 Domain 1 304 304 90.3 2e-25 1 CL0034 +12FR_NODE_3#PROKKA_00037 10 97 9 98 PF00216.16 Bac_DNA_binding Domain 2 89 90 105.8 8.2e-31 1 CL0548 +12FR_NODE_3#PROKKA_00040 272 695 267 697 PF06808.7 DctM Family 10 414 416 73.1 1.5e-20 1 CL0182 +12FR_NODE_3#PROKKA_00041 3 465 1 466 PF01293.15 PEPCK_ATP Family 4 466 467 681.2 5.8e-205 1 CL0374 +12FR_NODE_3#PROKKA_00042 4 271 4 272 PF01430.14 HSP33 Family 1 279 280 305.2 3.4e-91 1 No_clan +12FR_NODE_3#PROKKA_00043 15 60 14 60 PF01479.20 S4 Domain 2 48 48 43.1 2e-11 1 CL0492 +12FR_NODE_3#PROKKA_00044 23 198 22 201 PF13419.1 HAD_2 Family 2 173 176 74.7 9.7e-21 1 CL0137 +12FR_NODE_3#PROKKA_00046 2 115 2 116 PF01149.19 Fapy_DNA_glyco Domain 1 115 116 117.4 4.1e-34 1 No_clan +12FR_NODE_3#PROKKA_00046 130 219 130 222 PF06831.9 H2TH Domain 1 90 93 85.2 1.9e-24 1 CL0303 +12FR_NODE_3#PROKKA_00046 242 269 242 270 PF06827.9 zf-FPG_IleRS Domain 1 28 30 34.9 8.1e-09 1 CL0167 +12FR_NODE_3#PROKKA_00048 13 216 12 226 PF00487.19 FA_desaturase Domain 2 231 257 67.7 1e-18 1 No_clan +12FR_NODE_3#PROKKA_00048 267 388 263 390 PF01610.12 DDE_Tnp_ISL3 Family 81 203 249 42.1 7.1e-11 1 CL0219 +12FR_NODE_3#PROKKA_00049 66 298 66 299 PF12697.2 Abhydrolase_6 Domain 1 227 228 146.3 1.3e-42 1 CL0028 +12FR_NODE_3#PROKKA_00050 25 201 25 202 PF03602.10 Cons_hypoth95 Family 1 182 183 199.7 2.5e-59 1 CL0063 +12FR_NODE_3#PROKKA_00051 125 201 124 201 PF02881.14 SRP54_N Domain 2 75 75 62.2 3.5e-17 1 No_clan +12FR_NODE_3#PROKKA_00051 220 420 220 421 PF00448.17 SRP54 Family 1 195 196 266.1 1.4e-79 1 CL0023 +12FR_NODE_3#PROKKA_00052 18 165 18 166 PF00005.22 ABC_tran Domain 1 136 137 116.0 1.4e-33 1 CL0023 +12FR_NODE_3#PROKKA_00053 201 317 199 321 PF02687.16 FtsX Family 3 115 121 40.0 2.6e-10 1 CL0404 +12FR_NODE_3#PROKKA_00054 17 49 16 50 PF00140.15 Sigma70_r1_2 Family 4 36 37 29.8 3.4e-07 1 No_clan +12FR_NODE_3#PROKKA_00054 54 123 54 124 PF04542.9 Sigma70_r2 Domain 1 70 71 64.2 5.9e-18 1 CL0123 +12FR_NODE_3#PROKKA_00054 229 280 229 280 PF04545.11 Sigma70_r4 Domain 1 50 50 55.7 2.1e-15 1 CL0123 +12FR_NODE_3#PROKKA_00055 6 163 4 163 PF03446.10 NAD_binding_2 Domain 3 163 163 170.7 2.2e-50 1 CL0063 +12FR_NODE_3#PROKKA_00055 165 282 165 285 PF14833.1 NAD_binding_11 Domain 1 119 122 97.3 5.9e-28 1 CL0063 +12FR_NODE_3#PROKKA_00056 21 220 21 220 PF01261.19 AP_endonuc_2 Domain 1 213 213 107.9 4.2e-31 1 CL0036 +12FR_NODE_3#PROKKA_00057 27 136 27 137 PF00072.19 Response_reg Domain 1 111 112 87.8 4.5e-25 1 CL0304 +12FR_NODE_3#PROKKA_00057 171 246 171 246 PF00486.23 Trans_reg_C Domain 1 77 77 96.1 8.1e-28 1 CL0123 +12FR_NODE_3#PROKKA_00058 166 234 166 234 PF00672.20 HAMP Family 3 70 70 49.9 2.6e-13 1 No_clan +12FR_NODE_3#PROKKA_00058 245 301 240 303 PF00512.20 HisKA Domain 8 66 68 28.2 1.3e-06 1 CL0025 +12FR_NODE_3#PROKKA_00058 348 460 348 462 PF02518.21 HATPase_c Domain 1 109 111 88.4 2.3e-25 1 CL0025 +12FR_NODE_3#PROKKA_00063 111 370 110 370 PF02353.15 CMAS Family 2 273 273 301.3 4.9e-90 1 CL0063 +12FR_NODE_3#PROKKA_00064 9 123 5 124 PF00903.20 Glyoxalase Domain 5 127 128 24.6 1.9e-05 1 CL0104 +12FR_NODE_3#PROKKA_00066 17 140 17 141 PF00892.15 EamA Family 1 125 126 59.3 3.7e-16 1 CL0184 +12FR_NODE_3#PROKKA_00067 56 134 56 135 PF12833.2 HTH_18 Domain 1 80 81 90.6 4.9e-26 1 CL0123 +14FR_NODE_9#PROKKA_00069 9 553 2 554 PF00821.13 PEPCK Family 38 586 587 522.6 9.4e-157 1 CL0374 +14FR_NODE_9#PROKKA_00071 108 361 108 361 PF00425.13 Chorismate_bind Family 1 257 257 280.6 9.9e-84 1 No_clan +14FR_NODE_9#PROKKA_00071 388 570 387 577 PF01063.14 Aminotran_4 Domain 2 208 231 112.0 2.7e-32 1 No_clan +14FR_NODE_9#PROKKA_00073 33 257 30 258 PF13531.1 SBP_bac_11 Family 4 229 230 49.3 4.5e-13 1 CL0177 +14FR_NODE_9#PROKKA_00075 320 601 313 604 PF00128.19 Alpha-amylase Domain 8 312 316 56.3 3.1e-15 1 CL0058 +14FR_NODE_9#PROKKA_00076 3 72 3 73 PF00076.17 RRM_1 Domain 1 69 70 82.8 9.4e-24 1 CL0221 +14FR_NODE_9#PROKKA_00077 3 72 3 73 PF00076.17 RRM_1 Domain 1 69 70 84.0 4e-24 1 CL0221 +14FR_NODE_9#PROKKA_00078 3 72 3 73 PF00076.17 RRM_1 Domain 1 69 70 81.0 3.4e-23 1 CL0221 +14FR_NODE_9#PROKKA_00079 8 88 6 89 PF00381.14 PTS-HPr Domain 3 83 84 99.0 1e-28 1 No_clan +14FR_NODE_9#PROKKA_00080 19 143 19 144 PF05524.8 PEP-utilisers_N Family 1 122 123 85.1 3.1e-24 1 No_clan +14FR_NODE_9#PROKKA_00080 167 242 162 242 PF00391.18 PEP-utilizers Family 7 81 81 85.9 1e-24 1 No_clan +14FR_NODE_9#PROKKA_00080 275 558 267 559 PF02896.13 PEP-utilizers_C Domain 9 293 294 351.7 2.1e-105 1 CL0151 +14FR_NODE_9#PROKKA_00081 13 86 11 105 PF02357.14 NusG Family 3 80 92 41.1 1.5e-10 1 CL0439 +14FR_NODE_9#PROKKA_00083 10 183 8 183 PF00753.22 Lactamase_B Domain 3 194 194 59.5 3.3e-16 1 CL0381 +14FR_NODE_9#PROKKA_00083 374 453 362 454 PF00581.15 Rhodanese Domain 12 112 113 68.5 5.7e-19 1 No_clan +15FR_NODE_1#PROKKA_00087 53 390 53 391 PF00344.15 SecY Family 1 345 346 377.1 6.4e-113 1 No_clan +15FR_NODE_1#PROKKA_00088 1 37 1 37 PF00444.13 Ribosomal_L36 Domain 1 38 38 75.0 3.1e-21 1 No_clan +15FR_NODE_1#PROKKA_00089 3 108 3 108 PF00416.17 Ribosomal_S13 Family 1 107 107 122.9 7e-36 1 CL0303 +15FR_NODE_1#PROKKA_00090 19 128 19 128 PF00411.14 Ribosomal_S11 Family 1 110 110 157.1 1.5e-46 1 CL0267 +15FR_NODE_1#PROKKA_00091 2 97 2 97 PF00163.14 Ribosomal_S4 Family 1 94 94 87.5 5.5e-25 1 No_clan +15FR_NODE_1#PROKKA_00091 99 145 98 145 PF01479.20 S4 Domain 2 48 48 76.5 7.5e-22 1 CL0492 +15FR_NODE_1#PROKKA_00092 28 227 28 228 PF01193.19 RNA_pol_L Domain 1 65 66 67.0 5.5e-19 1 CL0509 +15FR_NODE_1#PROKKA_00092 64 153 56 156 PF01000.21 RNA_pol_A_bac Domain 15 108 112 81.8 3.6e-23 1 No_clan +15FR_NODE_1#PROKKA_00092 245 306 241 307 PF03118.10 RNA_pol_A_CTD Domain 5 66 67 82.6 9e-24 1 CL0198 +15FR_NODE_1#PROKKA_00093 20 116 20 116 PF01196.14 Ribosomal_L17 Family 1 97 97 125.2 1.1e-36 1 No_clan +15FR_NODE_1#PROKKA_00094 5 307 5 311 PF07992.9 Pyr_redox_2 Domain 1 196 201 76.9 1.7e-21 1 CL0063 +15FR_NODE_1#PROKKA_00094 175 225 175 237 PF00070.22 Pyr_redox Domain 1 51 80 28.3 1.7e-06 1 CL0063 +15FR_NODE_1#PROKKA_00094 343 445 340 447 PF02852.17 Pyr_redox_dim Domain 4 108 110 53.1 2.7e-14 1 No_clan +15FR_NODE_1#PROKKA_00095 23 182 14 182 PF13489.1 Methyltransf_23 Domain 10 161 161 68.0 7.3e-19 1 CL0063 +15FR_NODE_1#PROKKA_00097 2 102 1 103 PF00939.14 Na_sulph_symp Family 364 466 471 30.7 1.5e-07 1 CL0182 +15FR_NODE_1#PROKKA_00098 235 263 230 265 PF07849.6 DUF1641 Family 5 33 42 22.6 5.5e-05 1 No_clan +15FR_NODE_1#PROKKA_00099 7 322 6 324 PF07992.9 Pyr_redox_2 Domain 2 199 201 34.1 2.4e-08 1 CL0063 +15FR_NODE_1#PROKKA_00100 39 537 4 537 PF03600.11 CitMHS Family 38 351 351 115.1 2.5e-33 1 CL0182 +15FR_NODE_1#PROKKA_00100 219 287 218 289 PF02080.16 TrkA_C Domain 2 69 71 47.1 1.3e-12 1 No_clan +15FR_NODE_1#PROKKA_00100 309 370 309 375 PF02080.16 TrkA_C Domain 1 61 71 35.0 7.6e-09 1 No_clan +15FR_NODE_1#PROKKA_00101 797 859 746 860 PF00005.22 ABC_tran Domain 84 136 137 23.2 6.6e-05 1 CL0023 +15FR_NODE_1#PROKKA_00102 14 459 12 472 PF13520.1 AA_permease_2 Family 4 419 426 134.3 3.9e-39 1 CL0062 +15FR_NODE_1#PROKKA_00103 130 164 124 183 PF13188.1 PAS_8 Domain 10 43 64 17.5 0.0027 1 CL0183 +15FR_NODE_1#PROKKA_00103 249 357 248 357 PF00989.19 PAS Domain 2 113 113 44.6 1e-11 1 CL0183 +15FR_NODE_1#PROKKA_00103 384 485 382 486 PF13426.1 PAS_9 Domain 3 103 104 45.2 9.1e-12 1 CL0183 +15FR_NODE_1#PROKKA_00103 500 581 499 608 PF00989.19 PAS Domain 2 85 113 31.6 1.1e-07 1 CL0183 +15FR_NODE_1#PROKKA_00103 622 777 622 780 PF00990.16 GGDEF Domain 1 158 161 138.4 1.5e-40 1 CL0276 +15FR_NODE_1#PROKKA_00103 801 973 799 974 PF00563.15 EAL Domain 3 174 236 129.8 1.1e-37 1 No_clan +15FR_NODE_1#PROKKA_00105 4 334 3 334 PF00456.16 Transketolase_N Domain 2 333 333 518.3 6.2e-156 1 CL0254 +15FR_NODE_1#PROKKA_00105 354 526 353 527 PF02779.19 Transket_pyr Domain 2 177 178 155.3 1.1e-45 1 CL0254 +15FR_NODE_1#PROKKA_00105 548 656 541 656 PF02780.15 Transketolase_C Domain 7 124 124 36.4 4.1e-09 1 No_clan +15FR_NODE_1#PROKKA_00106 2 155 2 155 PF00044.19 Gp_dh_N Domain 1 151 151 201.6 5.6e-60 1 CL0063 +15FR_NODE_1#PROKKA_00106 160 315 160 315 PF02800.15 Gp_dh_C Domain 1 157 157 210.0 1.1e-62 1 CL0139 +15FR_NODE_1#PROKKA_00107 6 379 5 379 PF00162.14 PGK Domain 2 384 384 466.3 5.2e-140 1 No_clan +15FR_NODE_1#PROKKA_00108 9 344 8 349 PF00224.16 PK Family 2 341 348 392.0 1.6e-117 1 CL0151 +15FR_NODE_1#PROKKA_00108 363 476 362 477 PF02887.11 PK_C Domain 2 116 117 97.0 4.9e-28 1 No_clan +15FR_NODE_1#PROKKA_00109 65 165 62 166 PF00034.16 Cytochrom_C Domain 4 90 91 26.0 1.2e-05 1 CL0318 +15FR_NODE_1#PROKKA_00109 332 407 331 407 PF13442.1 Cytochrome_CBB3 Domain 2 67 67 39.7 4e-10 1 CL0318 +15FR_NODE_1#PROKKA_00109 437 510 435 510 PF13442.1 Cytochrome_CBB3 Domain 4 67 67 52.4 4.3e-14 1 CL0318 +15FR_NODE_1#PROKKA_00110 7 356 6 358 PF02322.10 Cyto_ox_2 Family 2 323 328 172.6 1.1e-50 1 No_clan +15FR_NODE_1#PROKKA_00111 86 839 84 846 PF01804.13 Penicil_amidase Family 3 722 735 358.9 8.7e-107 1 CL0052 +15FR_NODE_1#PROKKA_00112 4 154 4 178 PF01656.18 CbiA Domain 1 163 195 70.7 1e-19 1 CL0023 +15FR_NODE_1#PROKKA_00113 10 198 1 199 PF01734.17 Patatin Family 21 203 204 53.0 4e-14 1 CL0323 +15FR_NODE_1#PROKKA_00114 1 285 1 287 PF03668.10 ATP_bind_2 Family 1 282 284 276.2 2.3e-82 1 CL0023 +15FR_NODE_1#PROKKA_00115 4 95 2 95 PF02482.14 Ribosomal_S30AE Family 3 97 97 101.0 3.5e-29 1 No_clan +15FR_NODE_1#PROKKA_00116 70 263 63 264 PF04963.8 Sigma54_CBD Domain 7 193 194 165.8 7.3e-49 1 CL0123 +15FR_NODE_1#PROKKA_00116 277 435 276 435 PF04552.8 Sigma54_DBD Domain 2 160 160 198.0 6.4e-59 1 CL0123 +15FR_NODE_1#PROKKA_00117 20 166 19 166 PF00005.22 ABC_tran Domain 2 137 137 120.4 6.3e-35 1 CL0023 +15FR_NODE_1#PROKKA_00117 215 236 214 236 PF12399.3 BCA_ABC_TP_C Family 2 23 23 26.2 4.2e-06 1 No_clan +15FR_NODE_1#PROKKA_00118 40 151 40 153 PF03968.9 OstA Family 1 111 113 60.7 1.1e-16 1 CL0259 +15FR_NODE_1#PROKKA_00119 19 189 16 189 PF06835.8 LptC Family 4 176 176 104.3 4.4e-30 1 CL0259 +15FR_NODE_1#PROKKA_00120 76 143 71 151 PF08282.7 Hydrolase_3 Domain 179 246 254 29.6 4.7e-07 1 CL0137 +15FR_NODE_1#PROKKA_00121 44 174 43 175 PF01380.17 SIS Family 2 130 131 106.9 5.4e-31 1 CL0067 +15FR_NODE_1#PROKKA_00121 204 260 204 261 PF00571.23 CBS Domain 1 55 57 35.6 5.7e-09 1 No_clan +15FR_NODE_1#PROKKA_00121 273 322 270 324 PF00571.23 CBS Domain 4 54 57 33.9 1.9e-08 1 No_clan +15FR_NODE_1#PROKKA_00122 10 74 8 75 PF01722.13 BolA Family 2 75 76 63.0 1.8e-17 1 No_clan +15FR_NODE_1#PROKKA_00123 7 407 6 407 PF00275.15 EPSP_synthase Family 2 419 419 391.1 4.2e-117 1 CL0290 +15FR_NODE_1#PROKKA_00124 3 76 3 77 PF13417.1 GST_N_3 Domain 1 74 75 60.1 1.7e-16 1 CL0172 +15FR_NODE_1#PROKKA_00124 117 183 86 183 PF00043.20 GST_C Domain 28 95 95 28.0 1.6e-06 1 CL0497 +15FR_NODE_1#PROKKA_00125 8 105 6 107 PF04386.8 SspB Family 5 111 155 98.2 3.2e-28 1 No_clan +16FR_NODE_10#PROKKA_00128 28 374 28 377 PF13458.1 Peripla_BP_6 Family 1 339 343 198.0 2.5e-58 1 CL0144 +16FR_NODE_10#PROKKA_00129 5 60 4 62 PF01842.20 ACT Domain 2 59 66 37.5 1.1e-09 1 CL0070 +16FR_NODE_10#PROKKA_00129 72 123 70 136 PF01842.20 ACT Domain 3 52 66 21.6 0.00011 1 CL0070 +16FR_NODE_10#PROKKA_00130 3 141 3 141 PF01118.19 Semialdhyde_dh Domain 1 121 121 103.3 9.8e-30 1 CL0063 +16FR_NODE_10#PROKKA_00130 158 316 158 316 PF02774.13 Semialdhyde_dhC Domain 1 184 184 75.1 5.8e-21 1 CL0139 +16FR_NODE_10#PROKKA_00131 82 174 69 183 PF01551.17 Peptidase_M23 Family 4 88 96 24.1 2.7e-05 1 CL0105 +16FR_NODE_10#PROKKA_00132 246 272 244 273 PF13174.1 TPR_6 Repeat 6 32 33 12.0 0.22 1 CL0020 +16FR_NODE_10#PROKKA_00132 315 338 312 341 PF00515.23 TPR_1 Repeat 8 31 34 13.4 0.043 1 CL0020 +16FR_NODE_10#PROKKA_00133 27 197 27 198 PF01734.17 Patatin Family 1 203 204 60.8 1.6e-16 1 CL0323 +16FR_NODE_10#PROKKA_00134 94 231 94 231 PF13365.1 Trypsin_2 Domain 1 120 120 75.7 2.8e-21 1 CL0124 +16FR_NODE_10#PROKKA_00134 267 357 267 358 PF13180.1 PDZ_2 Domain 1 81 82 62.5 2.4e-17 1 CL0466 +16FR_NODE_10#PROKKA_00136 3 63 2 65 PF07879.6 PHB_acc_N Domain 2 62 64 80.9 3.8e-23 1 No_clan +16FR_NODE_10#PROKKA_00137 8 285 8 286 PF03007.11 WES_acyltransf Domain 1 262 263 127.4 6.8e-37 1 CL0149 +16FR_NODE_10#PROKKA_00137 326 468 322 471 PF06974.8 DUF1298 Family 6 149 153 56.4 2.8e-15 1 No_clan +16FR_NODE_10#PROKKA_00138 12 310 11 345 PF02374.10 ArsA_ATPase Family 2 273 305 104.5 5.1e-30 1 CL0023 +16FR_NODE_10#PROKKA_00139 32 187 31 194 PF02374.10 ArsA_ATPase Family 2 154 305 78.9 3.3e-22 1 CL0023 +16FR_NODE_10#PROKKA_00139 194 258 185 267 PF02374.10 ArsA_ATPase Family 193 256 305 24.7 1e-05 1 CL0023 +16FR_NODE_10#PROKKA_00140 3 61 2 66 PF07879.6 PHB_acc_N Domain 2 60 64 56.3 1.9e-15 1 No_clan +16FR_NODE_10#PROKKA_00142 9 178 9 179 PF01734.17 Patatin Family 1 203 204 72.1 5.7e-20 1 CL0323 +16FR_NODE_10#PROKKA_00144 55 308 54 309 PF01761.15 DHQ_synthase Domain 2 260 261 306.5 9.6e-92 1 CL0224 +16FR_NODE_10#PROKKA_00145 87 330 69 335 PF00793.15 DAHP_synth_1 Domain 9 267 272 262.7 2.1e-78 1 CL0036 +16FR_NODE_10#PROKKA_00147 14 75 12 75 PF00392.16 GntR Family 3 64 64 76.3 8.8e-22 1 CL0123 +2FR_NODE_5#PROKKA_00149 53 83 11 83 PF00005.22 ABC_tran Domain 107 137 137 23.1 6.8e-05 1 CL0023 +2FR_NODE_5#PROKKA_00149 133 155 133 155 PF12399.3 BCA_ABC_TP_C Family 1 23 23 31.2 1.1e-07 1 No_clan +2FR_NODE_5#PROKKA_00150 39 182 37 183 PF00005.22 ABC_tran Domain 3 136 137 96.1 2e-27 1 CL0023 +2FR_NODE_5#PROKKA_00151 72 372 72 373 PF01546.23 Peptidase_M20 Family 1 188 189 118.9 1.7e-34 1 CL0035 +2FR_NODE_5#PROKKA_00151 175 268 175 279 PF07687.9 M20_dimer Domain 1 97 112 79.2 1.7e-22 1 No_clan +2FR_NODE_5#PROKKA_00152 531 547 530 547 PF00353.14 HemolysinCabind Repeat 2 18 18 16.6 0.0069 1 No_clan +2FR_NODE_5#PROKKA_00152 548 565 548 565 PF00353.14 HemolysinCabind Repeat 1 18 18 15.6 0.014 1 No_clan +2FR_NODE_5#PROKKA_00152 566 583 566 583 PF00353.14 HemolysinCabind Repeat 1 18 18 21.4 0.0002 1 No_clan +2FR_NODE_5#PROKKA_00152 584 599 584 600 PF00353.14 HemolysinCabind Repeat 1 16 18 13.1 0.09 1 No_clan +2FR_NODE_5#PROKKA_00152 2919 2935 2919 2936 PF00353.14 HemolysinCabind Repeat 1 17 18 8.9 2.2 1 No_clan +2FR_NODE_5#PROKKA_00152 2937 2954 2937 2954 PF00353.14 HemolysinCabind Repeat 1 18 18 16.4 0.0078 1 No_clan +2FR_NODE_5#PROKKA_00152 2955 2972 2955 2972 PF00353.14 HemolysinCabind Repeat 1 18 18 18.0 0.0025 1 No_clan +2FR_NODE_5#PROKKA_00152 2973 2989 2973 2989 PF00353.14 HemolysinCabind Repeat 1 17 18 12.0 0.2 1 No_clan +2FR_NODE_5#PROKKA_00152 3035 3051 3034 3051 PF00353.14 HemolysinCabind Repeat 2 18 18 7.7 5.1 1 No_clan +2FR_NODE_5#PROKKA_00152 3052 3068 3052 3068 PF00353.14 HemolysinCabind Repeat 1 17 18 12.1 0.2 1 No_clan +2FR_NODE_5#PROKKA_00152 3190 3362 3182 3367 PF13229.1 Beta_helix Family 31 149 158 22.7 6.6e-05 1 CL0268 +2FR_NODE_5#PROKKA_00152 3488 3504 3487 3504 PF00353.14 HemolysinCabind Repeat 2 18 18 15.1 0.022 1 No_clan +2FR_NODE_5#PROKKA_00152 3496 3513 3496 3513 PF00353.14 HemolysinCabind Repeat 1 18 18 12.2 0.19 1 No_clan +2FR_NODE_5#PROKKA_00152 3506 3522 3505 3522 PF00353.14 HemolysinCabind Repeat 2 18 18 10.0 0.93 1 No_clan +2FR_NODE_5#PROKKA_00152 3871 3888 3871 3888 PF00353.14 HemolysinCabind Repeat 1 18 18 10.0 0.94 1 No_clan +2FR_NODE_5#PROKKA_00152 3890 3905 3889 3905 PF00353.14 HemolysinCabind Repeat 2 17 18 8.3 3.3 1 No_clan +2FR_NODE_5#PROKKA_00152 4250 4266 4249 4266 PF00353.14 HemolysinCabind Repeat 2 18 18 12.6 0.14 1 No_clan +2FR_NODE_5#PROKKA_00152 4280 4294 4278 4294 PF00353.14 HemolysinCabind Repeat 4 18 18 10.8 0.5 1 No_clan +2FR_NODE_5#PROKKA_00152 4296 4311 4295 4311 PF00353.14 HemolysinCabind Repeat 2 17 18 7.2 7.4 1 No_clan +2FR_NODE_5#PROKKA_00152 4367 4383 4367 4384 PF00353.14 HemolysinCabind Repeat 1 17 18 9.7 1.1 1 No_clan +2FR_NODE_5#PROKKA_00152 4428 4444 4428 4445 PF00353.14 HemolysinCabind Repeat 1 17 18 13.6 0.066 1 No_clan +2FR_NODE_5#PROKKA_00152 4564 4693 4557 4717 PF13229.1 Beta_helix Family 9 137 158 34.5 1.5e-08 1 CL0268 +2FR_NODE_5#PROKKA_00152 4728 4852 4720 4868 PF13229.1 Beta_helix Family 2 132 158 21.8 0.00013 1 CL0268 +2FR_NODE_5#PROKKA_00152 5013 5030 5013 5030 PF00353.14 HemolysinCabind Repeat 1 18 18 9.5 1.3 1 No_clan +2FR_NODE_5#PROKKA_00152 5031 5047 5031 5047 PF00353.14 HemolysinCabind Repeat 1 17 18 8.4 3 1 No_clan +2FR_NODE_5#PROKKA_00152 5102 5117 5101 5117 PF00353.14 HemolysinCabind Repeat 2 17 18 12.4 0.15 1 No_clan +2FR_NODE_5#PROKKA_00152 5162 5179 5162 5179 PF00353.14 HemolysinCabind Repeat 1 18 18 13.4 0.076 1 No_clan +2FR_NODE_5#PROKKA_00152 5180 5196 5180 5196 PF00353.14 HemolysinCabind Repeat 1 17 18 14.2 0.043 1 No_clan +2FR_NODE_5#PROKKA_00152 5270 5433 5258 5438 PF13229.1 Beta_helix Family 5 158 158 31.4 1.4e-07 1 CL0268 +2FR_NODE_5#PROKKA_00152 5845 5862 5845 5862 PF00353.14 HemolysinCabind Repeat 1 18 18 13.0 0.097 1 No_clan +2FR_NODE_5#PROKKA_00152 5864 5876 5863 5877 PF00353.14 HemolysinCabind Repeat 2 14 18 8.1 3.9 1 No_clan +2FR_NODE_5#PROKKA_00152 6026 6122 5998 6129 PF05048.8 NosD Domain 80 169 236 25.3 7e-06 1 CL0268 +2FR_NODE_5#PROKKA_00152 6498 6514 6497 6514 PF00353.14 HemolysinCabind Repeat 2 18 18 13.0 0.099 1 No_clan +2FR_NODE_5#PROKKA_00152 6515 6532 6515 6532 PF00353.14 HemolysinCabind Repeat 1 18 18 14.0 0.047 1 No_clan +2FR_NODE_5#PROKKA_00153 56 244 56 244 PF02321.13 OEP Family 1 188 188 79.2 2.7e-22 1 CL0105 +2FR_NODE_5#PROKKA_00153 265 439 264 445 PF02321.13 OEP Family 2 179 188 92.6 2.1e-26 1 CL0105 +2FR_NODE_5#PROKKA_00154 54 176 50 178 PF03412.10 Peptidase_C39 Family 10 129 131 37.1 2.4e-09 1 CL0125 +2FR_NODE_5#PROKKA_00154 209 469 206 475 PF00664.18 ABC_membrane Family 4 269 275 49.9 2.7e-13 1 CL0241 +2FR_NODE_5#PROKKA_00154 538 687 538 687 PF00005.22 ABC_tran Domain 1 137 137 123.0 9.8e-36 1 CL0023 +2FR_NODE_5#PROKKA_00155 66 358 63 364 PF00529.15 HlyD Family 4 296 305 85.0 5.3e-24 1 CL0105 +2FR_NODE_5#PROKKA_00156 56 283 41 303 PF07470.8 Glyco_hydro_88 Family 22 210 336 32.8 3.6e-08 1 CL0059 +2FR_NODE_5#PROKKA_00157 1 219 1 221 PF09140.6 MipZ Family 32 258 261 232.4 4.5e-69 1 CL0023 +3FR_NODE_4#PROKKA_00158 2 129 1 132 PF02653.11 BPD_transp_2 Family 136 266 267 69.5 2e-19 1 CL0142 +3FR_NODE_4#PROKKA_00159 32 292 31 294 PF02653.11 BPD_transp_2 Family 2 264 267 115.4 2e-33 1 CL0142 +3FR_NODE_4#PROKKA_00160 19 169 19 169 PF00005.22 ABC_tran Domain 1 137 137 100.7 7.5e-29 1 CL0023 +3FR_NODE_4#PROKKA_00160 217 239 217 239 PF12399.3 BCA_ABC_TP_C Family 1 23 23 39.0 4.1e-10 1 No_clan +3FR_NODE_4#PROKKA_00160 280 423 280 424 PF00005.22 ABC_tran Domain 1 136 137 94.9 4.8e-27 1 CL0023 +3FR_NODE_4#PROKKA_00162 30 154 29 155 PF08240.7 ADH_N Domain 2 108 109 96.3 8.4e-28 1 CL0296 +3FR_NODE_4#PROKKA_00162 197 316 197 326 PF00107.21 ADH_zinc_N Family 1 120 130 111.4 2e-32 1 CL0063 +3FR_NODE_4#PROKKA_00163 15 470 15 471 PF00171.17 Aldedh Family 1 461 462 520.8 2.2e-156 1 CL0099 +3FR_NODE_4#PROKKA_00164 5 139 1 139 PF12276.3 DUF3617 Family 8 162 162 94.3 4.9e-27 1 No_clan +3FR_NODE_4#PROKKA_00165 30 426 28 427 PF00501.23 AMP-binding Family 3 416 417 314.2 8.6e-94 1 CL0378 +3FR_NODE_4#PROKKA_00165 436 509 435 509 PF13193.1 AMP-binding_C Domain 2 73 73 48.3 1.4e-12 1 CL0531 +3FR_NODE_4#PROKKA_00166 6 117 5 117 PF02771.11 Acyl-CoA_dh_N Domain 2 113 113 101.5 3.7e-29 1 CL0544 +3FR_NODE_4#PROKKA_00166 121 172 121 172 PF02770.14 Acyl-CoA_dh_M Domain 1 52 52 81.9 1.6e-23 1 No_clan +3FR_NODE_4#PROKKA_00166 232 378 230 379 PF00441.19 Acyl-CoA_dh_1 Domain 3 149 150 168.8 8e-50 1 CL0087 +3FR_NODE_4#PROKKA_00167 28 77 28 79 PF09339.5 HTH_IclR Domain 1 50 52 57.9 5.3e-16 1 CL0123 +3FR_NODE_4#PROKKA_00167 145 264 142 269 PF01614.13 IclR Family 4 124 129 44.6 1e-11 1 CL0161 +3FR_NODE_4#PROKKA_00168 3 267 1 268 PF00108.18 Thiolase_N Domain 3 263 264 256.4 2e-76 1 CL0046 +3FR_NODE_4#PROKKA_00168 276 399 275 399 PF02803.13 Thiolase_C Domain 2 123 123 149.2 3.3e-44 1 CL0046 +3FR_NODE_4#PROKKA_00169 19 129 17 129 PF02771.11 Acyl-CoA_dh_N Domain 3 113 113 109.8 1e-31 1 CL0544 +3FR_NODE_4#PROKKA_00169 133 184 133 185 PF02770.14 Acyl-CoA_dh_M Domain 1 52 52 65.7 1.9e-18 1 No_clan +3FR_NODE_4#PROKKA_00169 235 380 234 382 PF00441.19 Acyl-CoA_dh_1 Domain 3 148 150 94.1 8e-27 1 CL0087 +3FR_NODE_4#PROKKA_00170 7 166 5 169 PF02776.13 TPP_enzyme_N Domain 3 164 172 157.0 3e-46 1 CL0254 +3FR_NODE_4#PROKKA_00170 234 362 232 362 PF00205.17 TPP_enzyme_M Domain 3 137 137 56.3 2.7e-15 1 CL0085 +3FR_NODE_4#PROKKA_00170 426 568 424 572 PF02775.16 TPP_enzyme_C Domain 3 149 153 70.9 8e-20 1 CL0254 +3FR_NODE_4#PROKKA_00171 26 383 16 396 PF00144.19 Beta-lactamase Domain 8 311 330 229.7 4.4e-68 1 CL0013 +3FR_NODE_4#PROKKA_00172 20 443 16 444 PF00501.23 AMP-binding Family 5 416 417 305.0 5.3e-91 1 CL0378 +3FR_NODE_4#PROKKA_00172 453 527 452 527 PF13193.1 AMP-binding_C Domain 2 73 73 50.5 2.8e-13 1 CL0531 +3FR_NODE_4#PROKKA_00173 59 137 59 137 PF03061.17 4HBT Domain 1 79 79 56.7 1.8e-15 1 CL0050 +3FR_NODE_4#PROKKA_00174 8 178 7 180 PF00106.20 adh_short Domain 2 165 167 109.7 1.3e-31 1 CL0063 +3FR_NODE_4#PROKKA_00175 12 137 12 163 PF13279.1 4HBT_2 Domain 1 110 121 26.3 7.9e-06 1 CL0050 +3FR_NODE_4#PROKKA_00175 230 300 224 304 PF01643.12 Acyl-ACP_TE Family 72 142 261 23.4 2.4e-05 1 CL0050 +3FR_NODE_4#PROKKA_00177 21 70 21 71 PF09339.5 HTH_IclR Domain 1 50 52 59.6 1.6e-16 1 CL0123 +3FR_NODE_4#PROKKA_00177 140 261 136 263 PF01614.13 IclR Family 6 128 129 41.8 7.2e-11 1 CL0161 +3FR_NODE_4#PROKKA_00178 11 428 7 429 PF13609.1 Porin_4 Domain 3 310 311 64.8 8.8e-18 1 CL0193 +3FR_NODE_4#PROKKA_00179 30 453 28 455 PF00501.23 AMP-binding Family 3 415 417 340.3 1.1e-101 1 CL0378 +3FR_NODE_4#PROKKA_00179 463 540 463 540 PF13193.1 AMP-binding_C Domain 1 73 73 61.6 1e-16 1 CL0531 +3FR_NODE_4#PROKKA_00180 98 286 98 293 PF00528.17 BPD_transp_1 Family 1 177 185 78.3 4.9e-22 1 CL0404 +3FR_NODE_4#PROKKA_00181 99 312 97 315 PF00528.17 BPD_transp_1 Family 3 175 185 68.4 5.2e-19 1 CL0404 +3FR_NODE_4#PROKKA_00182 36 326 28 327 PF12849.2 PBP_like_2 Family 9 280 281 165.7 1.5e-48 1 CL0177 +3FR_NODE_4#PROKKA_00183 58 125 53 125 PF05598.6 DUF772 Domain 4 77 77 56.5 1.9e-15 1 No_clan +3FR_NODE_4#PROKKA_00183 370 434 358 435 PF13751.1 DDE_Tnp_1_6 Domain 60 124 125 51.6 8.2e-14 1 CL0219 +6FR_NODE_12#PROKKA_00187 9 201 9 201 PF02223.12 Thymidylate_kin Domain 1 186 186 158.8 9.1e-47 1 CL0023 +6FR_NODE_12#PROKKA_00188 170 427 166 444 PF04616.9 Glyco_hydro_43 Family 7 257 286 39.4 3.4e-10 1 CL0143 +6FR_NODE_12#PROKKA_00190 666 746 666 746 PF06429.8 Flg_bbr_C Domain 1 74 74 70.0 1.2e-19 1 No_clan +6FR_NODE_12#PROKKA_00191 23 225 17 225 PF00753.22 Lactamase_B Domain 7 194 194 56.0 3.7e-15 1 CL0381 +6FR_NODE_12#PROKKA_00192 8 117 7 118 PF02771.11 Acyl-CoA_dh_N Domain 3 112 113 97.3 7.8e-28 1 CL0544 +6FR_NODE_12#PROKKA_00192 122 173 122 173 PF02770.14 Acyl-CoA_dh_M Domain 1 52 52 62.7 1.6e-17 1 No_clan +6FR_NODE_12#PROKKA_00192 229 378 229 378 PF00441.19 Acyl-CoA_dh_1 Domain 1 150 150 178.1 1.1e-52 1 CL0087 +6FR_NODE_12#PROKKA_00193 28 180 23 185 PF01012.16 ETF Domain 7 159 164 126.3 8.6e-37 1 CL0039 +6FR_NODE_12#PROKKA_00194 11 171 10 172 PF01012.16 ETF Domain 2 163 164 137.8 2.6e-40 1 CL0039 +6FR_NODE_12#PROKKA_00194 211 293 209 294 PF00766.14 ETF_alpha Domain 3 85 86 97.6 2.3e-28 1 CL0085 +6FR_NODE_12#PROKKA_00195 35 152 34 161 PF00293.23 NUDIX Domain 2 125 135 63.7 1.3e-17 1 CL0261 +6FR_NODE_12#PROKKA_00197 9 259 8 260 PF00378.15 ECH Family 2 244 245 159.0 1.1e-46 1 CL0127 +7FR_NODE_8#PROKKA_00202 44 150 40 159 PF00156.22 Pribosyltran Domain 5 110 125 31.2 1.4e-07 1 CL0533 +7FR_NODE_8#PROKKA_00206 63 139 61 139 PF03703.9 bPH_2 Domain 3 80 80 32.1 8.2e-08 1 CL0266 +7FR_NODE_8#PROKKA_00209 22 225 19 226 PF01791.4 DeoC Domain 4 235 236 105.3 2.8e-30 1 CL0036 +7FR_NODE_8#PROKKA_00210 3 437 1 440 PF03972.9 MmgE_PrpD Family 3 442 445 391.4 3.1e-117 1 No_clan +7FR_NODE_8#PROKKA_00211 19 73 19 73 PF01381.17 HTH_3 Domain 1 55 55 52.0 4.4e-14 1 CL0123 +7FR_NODE_8#PROKKA_00211 85 369 84 370 PF03480.8 SBP_bac_7 Family 2 285 286 150.1 6.6e-44 1 CL0177 +7FR_NODE_8#PROKKA_00212 19 399 18 400 PF03435.13 Saccharop_dh Family 2 385 386 256.9 2.9e-76 1 CL0063 +7FR_NODE_8#PROKKA_00213 13 65 11 66 PF12911.2 OppC_N Domain 3 55 56 28.7 5.8e-07 1 No_clan +7FR_NODE_8#PROKKA_00213 112 295 112 296 PF00528.17 BPD_transp_1 Family 1 184 185 107.3 6.2e-31 1 CL0404 +7FR_NODE_8#PROKKA_00214 112 303 112 304 PF00528.17 BPD_transp_1 Family 1 184 185 152.4 9e-45 1 CL0404 +7FR_NODE_8#PROKKA_00215 73 420 72 425 PF00496.17 SBP_bac_5 Domain 2 369 374 316.6 1.8e-94 1 CL0177 +7FR_NODE_8#PROKKA_00216 32 182 31 183 PF00005.22 ABC_tran Domain 2 136 137 113.9 6.3e-33 1 CL0023 +7FR_NODE_8#PROKKA_00216 234 299 234 299 PF08352.7 oligo_HPY Family 1 64 64 64.8 6.7e-18 1 No_clan +7FR_NODE_8#PROKKA_00217 6 164 5 165 PF00005.22 ABC_tran Domain 2 136 137 97.3 8.4e-28 1 CL0023 +7FR_NODE_8#PROKKA_00217 216 282 216 282 PF08352.7 oligo_HPY Family 1 64 64 69.9 1.7e-19 1 No_clan +7FR_NODE_8#PROKKA_00218 209 374 209 379 PF00270.24 DEAD Domain 1 164 169 35.3 7.1e-09 1 CL0023 +7FR_NODE_8#PROKKA_00219 2 74 1 74 PF02082.15 Rrf2 Family 10 83 83 75.8 2.1e-21 1 CL0123 +7FR_NODE_8#PROKKA_00220 13 56 13 57 PF08369.5 PCP_red Family 1 44 45 44.1 1.3e-11 1 No_clan +7FR_NODE_8#PROKKA_00220 97 266 87 281 PF01077.17 NIR_SIR Family 15 122 157 46.4 2.5e-12 1 No_clan +7FR_NODE_8#PROKKA_00220 176 218 175 219 PF12838.2 Fer4_7 Domain 2 51 52 35.6 8.4e-09 1 CL0344 +7FR_NODE_8#PROKKA_00222 13 87 13 95 PF01584.14 CheW Domain 1 75 138 61.7 4.7e-17 1 No_clan +7FR_NODE_8#PROKKA_00222 91 159 90 159 PF13682.1 CZB Domain 2 70 70 60.8 1e-16 1 No_clan +7FR_NODE_8#PROKKA_00222 185 269 133 271 PF01584.14 CheW Domain 65 136 138 30.6 1.9e-07 1 No_clan +7FR_NODE_8#PROKKA_00223 23 104 23 104 PF13466.1 STAS_2 Domain 1 80 80 41.1 1.3e-10 1 CL0502 +7FR_NODE_8#PROKKA_00223 163 376 162 376 PF02405.11 Permease Family 2 215 215 235.4 4.6e-70 1 No_clan +7FR_NODE_8#PROKKA_00224 23 172 23 172 PF00005.22 ABC_tran Domain 1 137 137 105.2 3.2e-30 1 CL0023 +8FR_NODE_2#PROKKA_00225 1 255 1 255 PF03054.11 tRNA_Me_trans Family 97 356 356 267.1 1.5e-79 1 CL0039 +8FR_NODE_2#PROKKA_00226 27 209 26 209 PF07947.9 YhhN Family 2 185 185 147.3 2.9e-43 1 No_clan +8FR_NODE_2#PROKKA_00227 70 264 70 264 PF12695.2 Abhydrolase_5 Domain 1 145 145 81.3 5.5e-23 1 CL0028 +8FR_NODE_2#PROKKA_00228 322 426 233 426 PF00795.17 CN_hydrolase Family 98 186 186 33.7 2e-08 1 No_clan +8FR_NODE_2#PROKKA_00229 9 158 8 158 PF00881.19 Nitroreductase Domain 2 165 165 123.5 6.8e-36 1 CL0529 +8FR_NODE_2#PROKKA_00230 10 94 9 95 PF02357.14 NusG Family 2 91 92 66.1 2.4e-18 1 CL0439 +8FR_NODE_2#PROKKA_00231 6 153 6 153 PF02674.11 Colicin_V Family 1 146 146 104.2 5e-30 1 CL0292 +8FR_NODE_2#PROKKA_00232 53 146 39 147 PF00657.17 Lipase_GDSL Family 142 233 234 36.0 6e-09 1 CL0264 +8FR_NODE_2#PROKKA_00234 220 403 132 407 PF01609.16 DDE_Tnp_1 Domain 43 209 213 38.5 8.1e-10 1 CL0219 +8FR_NODE_2#PROKKA_00235 203 328 203 329 PF10017.4 Methyltransf_33 Family 1 126 127 124.0 3e-36 1 No_clan +8FR_NODE_2#PROKKA_00236 10 114 9 116 PF00581.15 Rhodanese Domain 2 111 113 43.6 3e-11 1 No_clan +8FR_NODE_2#PROKKA_00236 149 262 144 264 PF00581.15 Rhodanese Domain 10 111 113 55.3 7e-15 1 No_clan +8FR_NODE_2#PROKKA_00236 297 417 294 418 PF00581.15 Rhodanese Domain 4 112 113 51.8 8.8e-14 1 No_clan +8FR_NODE_2#PROKKA_00237 1 88 1 90 PF04248.7 DUF427 Family 1 93 95 98.9 9.1e-29 1 No_clan +8FR_NODE_2#PROKKA_00239 12 96 6 98 PF00355.21 Rieske Domain 6 95 97 48.5 4.8e-13 1 CL0516 +8FR_NODE_2#PROKKA_00239 113 277 112 280 PF02776.13 TPP_enzyme_N Domain 2 165 172 139.0 9.8e-41 1 CL0254 +8FR_NODE_2#PROKKA_00239 305 430 304 432 PF00205.17 TPP_enzyme_M Domain 2 135 137 111.7 2e-32 1 CL0085 +8FR_NODE_2#PROKKA_00239 494 640 494 640 PF02775.16 TPP_enzyme_C Domain 1 153 153 115.2 1.9e-33 1 CL0254 +8FR_NODE_2#PROKKA_00242 7 67 6 67 PF03625.9 DUF302 Domain 2 65 65 53.7 1.1e-14 1 No_clan +8FR_NODE_2#PROKKA_00245 12 77 12 80 PF04542.9 Sigma70_r2 Domain 1 68 71 43.6 1.6e-11 1 CL0123 +8FR_NODE_2#PROKKA_00245 123 175 122 175 PF08281.7 Sigma70_r4_2 Domain 2 54 54 32.7 3.7e-08 1 CL0123 +8FR_NODE_2#PROKKA_00246 3 386 3 386 PF00162.14 PGK Domain 1 384 384 518.4 7.7e-156 1 No_clan +8FR_NODE_2#PROKKA_00247 10 355 8 360 PF00155.16 Aminotran_1_2 Domain 3 358 363 187.3 3.7e-55 1 CL0061 +8FR_NODE_2#PROKKA_00248 14 159 6 161 PF00795.17 CN_hydrolase Family 12 177 186 56.5 2.1e-15 1 No_clan +8FR_NODE_2#PROKKA_00249 10 172 10 175 PF12804.2 NTP_transf_3 Domain 1 158 160 94.4 7.3e-27 1 CL0110 +8FR_NODE_2#PROKKA_00250 48 150 33 152 PF13185.1 GAF_2 Domain 23 141 148 30.3 4.9e-07 1 CL0161 +8FR_NODE_2#PROKKA_00251 20 317 8 335 PF00144.19 Beta-lactamase Domain 18 307 330 108.0 4.6e-31 1 CL0013 +8FR_NODE_2#PROKKA_00252 40 165 9 181 PF01209.13 Ubie_methyltran Family 37 161 233 71.4 5.7e-20 1 CL0063 +8FR_NODE_2#PROKKA_00253 2 78 1 78 PF01797.11 Y1_Tnp Family 44 121 121 85.1 3.2e-24 1 CL0481 +8FR_NODE_2#PROKKA_00254 1 115 1 115 PF02915.12 Rubrerythrin Domain 15 137 137 84.1 1e-23 1 CL0044 +8FR_NODE_2#PROKKA_00255 9 155 6 159 PF01613.13 Flavin_Reduct Domain 4 150 154 90.7 8.1e-26 1 CL0336 +8FR_NODE_2#PROKKA_00255 197 243 197 243 PF00301.15 Rubredoxin Domain 1 47 47 85.0 2.2e-24 1 CL0045 +8FR_NODE_2#PROKKA_00256 2 1029 2 1030 PF00873.14 ACR_tran Family 1 1020 1021 1080.8 0 1 CL0322 +8FR_NODE_2#PROKKA_00257 3 288 2 288 PF00529.15 HlyD Family 6 305 305 98.9 3.1e-28 1 CL0105 +8FR_NODE_2#PROKKA_00258 4 110 3 112 PF05635.6 23S_rRNA_IVP Family 2 108 110 98.1 2.6e-28 1 No_clan +8FR_NODE_2#PROKKA_00259 42 233 42 233 PF02321.13 OEP Family 1 188 188 96.2 1.7e-27 1 CL0105 +8FR_NODE_2#PROKKA_00259 262 444 260 444 PF02321.13 OEP Family 8 188 188 91.6 4.4e-26 1 CL0105 +8FR_NODE_2#PROKKA_00260 1 240 1 241 PF03824.11 NicO Family 50 281 282 177.1 4.5e-52 1 CL0549 +8FR_NODE_2#PROKKA_00261 14 125 14 125 PF00072.19 Response_reg Domain 1 112 112 90.2 7.8e-26 1 CL0304 +8FR_NODE_2#PROKKA_00263 14 153 14 155 PF01242.14 PTPS Domain 1 121 123 105.0 1.9e-30 1 CL0334 +8FR_NODE_2#PROKKA_00264 61 134 58 134 PF03544.9 TonB_C Domain 4 79 79 35.2 1e-08 1 CL0428 +9FR_NODE_13#PROKKA_00266 76 162 75 162 PF04468.7 PSP1 Family 2 88 88 107.0 3.3e-31 1 No_clan +9FR_NODE_13#PROKKA_00267 5 372 5 372 PF09334.6 tRNA-synt_1g Family 1 391 391 390.4 6.5e-117 1 CL0039 +9FR_NODE_13#PROKKA_00267 577 669 577 670 PF01588.15 tRNA_bind Domain 1 94 95 94.0 3.5e-27 1 CL0021 +9FR_NODE_13#PROKKA_00268 186 398 185 404 PF12695.2 Abhydrolase_5 Domain 2 137 145 54.1 1.3e-14 1 CL0028 +9FR_NODE_13#PROKKA_00269 67 105 67 105 PF00805.17 Pentapeptide Repeat 1 39 40 45.0 4.1e-12 1 CL0505 +9FR_NODE_13#PROKKA_00269 137 168 136 171 PF00805.17 Pentapeptide Repeat 6 37 40 44.0 8.7e-12 1 CL0505 +9FR_NODE_13#PROKKA_00269 303 357 289 359 PF07885.11 Ion_trans_2 Domain 23 77 79 42.6 3.4e-11 1 CL0030 +9FR_NODE_13#PROKKA_00270 27 246 27 247 PF12697.2 Abhydrolase_6 Domain 1 227 228 139.9 1.1e-40 1 CL0028 +9FR_NODE_13#PROKKA_00272 222 401 213 421 PF01435.13 Peptidase_M48 Family 31 189 226 44.0 1.8e-11 1 CL0126 +9FR_NODE_13#PROKKA_00272 474 530 469 534 PF13371.1 TPR_9 Repeat 12 68 73 21.2 0.00018 1 CL0020 +9FR_NODE_13#PROKKA_00272 564 595 563 596 PF07719.12 TPR_2 Repeat 2 33 34 17.0 0.0037 1 CL0020 +unf122_NODE_11#PROKKA_00276 1 64 1 64 PF06805.7 Lambda_tail_I Family 24 82 82 54.4 7.7e-15 1 CL0072 +unf122_NODE_11#PROKKA_00277 6 75 1 86 PF14464.1 Prok-JAB Family 29 96 105 39.7 2.7e-10 1 CL0366 +unf122_NODE_11#PROKKA_00277 88 200 88 201 PF00877.14 NLPC_P60 Family 1 104 105 51.5 6.9e-14 1 CL0125 +unf122_NODE_11#PROKKA_00278 29 230 29 230 PF05100.7 Phage_tail_L Family 1 206 206 324.2 2.1e-97 1 CL0249 +unf122_NODE_11#PROKKA_00279 1 109 1 109 PF05939.8 Phage_min_tail Family 1 109 109 127.7 1.6e-37 1 No_clan +unf122_NODE_11#PROKKA_00280 50 257 49 258 PF06791.8 TMP_2 Family 2 209 210 224.4 1.1e-66 1 No_clan +unf122_NODE_11#PROKKA_00280 626 703 626 703 PF09718.5 Tape_meas_lam_C Domain 1 78 78 72.5 1.8e-20 1 No_clan +unf122_NODE_11#PROKKA_00281 11 113 11 113 PF06223.7 Phage_tail_T Family 1 103 103 173.9 8.1e-52 1 No_clan +unf122_NODE_11#PROKKA_00282 1 125 1 126 PF06894.6 Phage_lambd_GpG Family 1 126 127 192.8 1.3e-57 1 No_clan +unf122_NODE_11#PROKKA_00283 161 238 160 241 PF02368.13 Big_2 Family 2 76 79 44.8 8.2e-12 1 CL0159 +unf122_NODE_11#PROKKA_00284 1 131 1 131 PF06141.6 Phage_tail_U Family 1 133 133 210.9 5.1e-63 1 No_clan +unf122_NODE_11#PROKKA_00285 3 192 3 192 PF06763.6 Minor_tail_Z Family 1 189 189 304.9 1.7e-91 1 CL0504 +unf122_NODE_11#PROKKA_00286 2 117 1 117 PF05354.6 Phage_attach Domain 2 117 117 207.3 3.1e-62 1 CL0504 +unf122_NODE_11#PROKKA_00287 2 129 2 129 PF14000.1 Packaging_FI Family 1 125 125 142.1 9.2e-42 1 No_clan +unf122_NODE_11#PROKKA_00288 4 339 4 340 PF03864.10 Phage_cap_E Family 1 328 329 215.8 6.9e-64 1 No_clan +unf122_NODE_11#PROKKA_00290 149 297 148 300 PF01343.13 Peptidase_S49 Family 2 151 154 158.6 9.5e-47 1 CL0127 +unf122_NODE_11#PROKKA_00291 30 401 29 401 PF05136.8 Phage_portal_2 Family 2 355 355 350.7 5.6e-105 1 No_clan +unf122_NODE_11#PROKKA_00292 1 68 1 68 PF02831.10 gpW Family 1 68 68 125.4 4.3e-37 1 No_clan +unf122_NODE_11#PROKKA_00293 39 600 39 607 PF05876.7 Terminase_GpA Family 1 547 557 592.4 6.8e-178 1 CL0023 +unf122_NODE_11#PROKKA_00294 1 164 1 164 PF07471.7 Phage_Nu1 Family 1 164 164 301.5 9.7e-91 1 No_clan +unf163_NODE_15#PROKKA_00295 7 407 2 408 PF01053.15 Cys_Met_Meta_PP Domain 17 385 386 378.4 2.2e-113 1 CL0061 +unf163_NODE_15#PROKKA_00296 17 234 12 239 PF07690.11 MFS_1 Family 5 234 352 75.9 2.2e-21 1 CL0015 +unf163_NODE_15#PROKKA_00297 35 87 34 88 PF01381.17 HTH_3 Domain 2 54 55 34.7 1.1e-08 1 CL0123 +unf163_NODE_15#PROKKA_00298 219 463 217 465 PF00082.17 Peptidase_S8 Domain 3 277 282 129.0 1.8e-37 1 No_clan +unf174_NODE_16#PROKKA_00301 1 195 1 196 PF00872.13 Transposase_mut Family 187 380 381 216.5 4e-64 1 CL0219 +unf174_NODE_16#PROKKA_00304 105 218 103 228 PF05140.9 ResB Family 143 260 464 26.7 1.9e-06 1 No_clan +unf174_NODE_16#PROKKA_00305 113 255 86 260 PF01497.13 Peripla_BP_2 Domain 32 175 238 51.6 6.5e-14 1 CL0553 +unf185_NODE_17#PROKKA_00311 2 187 2 189 PF02305.12 Phage_F Domain 1 200 510 244.0 2.6e-72 1 No_clan +unf185_NODE_17#PROKKA_00311 187 425 185 425 PF02305.12 Phage_F Domain 259 510 510 298.9 6e-89 1 No_clan +unf185_NODE_17#PROKKA_00312 1 56 1 56 PF12283.3 Protein_K Family 1 56 56 111.5 1.2e-32 1 No_clan +unf196_NODE_18#PROKKA_00317 48 104 33 106 PF07883.6 Cupin_2 Domain 16 69 71 34.7 8.5e-09 1 CL0029 +unf207_NODE_19#PROKKA_00320 23 231 21 234 PF02596.10 DUF169 Family 3 213 217 107.7 5.2e-31 1 No_clan +unf207_NODE_19#PROKKA_00321 24 173 23 173 PF00005.22 ABC_tran Domain 2 137 137 112.1 2.2e-32 1 CL0023 +unf207_NODE_19#PROKKA_00322 8 245 8 247 PF12704.2 MacB_PCD Family 1 230 232 98.2 5.1e-28 1 No_clan +unf207_NODE_19#PROKKA_00322 287 409 283 409 PF02687.16 FtsX Family 5 121 121 57.2 1.3e-15 1 CL0404 +unf218_NODE_20#PROKKA_00324 9 60 3 62 PF00571.23 CBS Domain 7 55 57 33.2 3.1e-08 1 No_clan +unf218_NODE_20#PROKKA_00324 72 119 72 127 PF00571.23 CBS Domain 1 49 57 44.9 6.9e-12 1 No_clan +unf218_NODE_20#PROKKA_00325 80 222 79 224 PF02518.21 HATPase_c Domain 2 109 111 62.4 2.8e-17 1 CL0025 +unf218_NODE_20#PROKKA_00325 279 448 279 449 PF00204.20 DNA_gyraseB Domain 1 172 173 146.6 4e-43 1 No_clan +unf218_NODE_20#PROKKA_00325 476 587 476 588 PF01751.17 Toprim Family 1 99 100 85.5 1.9e-24 1 CL0413 +unf218_NODE_20#PROKKA_00325 618 690 617 690 PF00986.16 DNA_gyraseB_C Family 2 65 65 78.3 2.5e-22 1 No_clan +unf240_NODE_22#PROKKA_00331 1 57 1 115 PF02203.10 TarH Domain 1 59 171 24.1 2.6e-05 1 CL0457 +unf240_NODE_22#PROKKA_00331 670 859 667 862 PF00015.16 MCPsignal Family 4 207 213 176.0 7.1e-52 1 No_clan +unf240_NODE_22#PROKKA_00332 3 85 1 86 PF02596.10 DUF169 Family 135 215 217 55.4 5e-15 1 No_clan +unf251_NODE_23#PROKKA_00333 19 156 15 160 PF01077.17 NIR_SIR Family 6 149 157 73.5 1.2e-20 1 No_clan +unf251_NODE_23#PROKKA_00334 51 157 51 160 PF06073.7 DUF934 Family 1 107 110 133.1 3e-39 1 No_clan +unf251_NODE_23#PROKKA_00335 53 221 52 222 PF01507.14 PAPS_reduct Family 2 173 174 122.2 1.9e-35 1 CL0039 +unf251_NODE_23#PROKKA_00336 20 100 19 100 PF03951.14 Gln-synt_N Domain 2 84 84 99.3 6.5e-29 1 No_clan +unf251_NODE_23#PROKKA_00336 108 383 107 383 PF00120.19 Gln-synt_C Domain 2 259 259 338.4 2.1e-101 1 CL0286 +unf251_NODE_23#PROKKA_00337 17 65 5 65 PF00543.17 P-II Domain 54 102 102 62.9 3e-17 1 CL0089 +unf262_NODE_24#PROKKA_00338 4 151 2 151 PF00005.22 ABC_tran Domain 10 137 137 76.9 1.7e-21 1 CL0023 +unf262_NODE_24#PROKKA_00339 21 105 20 108 PF01895.14 PhoU Domain 2 85 88 67.6 9.2e-19 1 CL0297 +unf262_NODE_24#PROKKA_00339 125 208 123 208 PF01895.14 PhoU Domain 3 88 88 67.3 1.2e-18 1 CL0297 +unf262_NODE_24#PROKKA_00340 9 139 9 143 PF02915.12 Rubrerythrin Domain 1 133 137 61.3 1.2e-16 1 CL0044 +unf262_NODE_24#PROKKA_00341 11 186 11 186 PF13419.1 HAD_2 Family 1 176 176 69.8 3.2e-19 1 CL0137 +unf273_NODE_25#PROKKA_00343 40 324 39 325 PF02653.11 BPD_transp_2 Family 2 267 267 105.3 2.5e-30 1 CL0142 +unf273_NODE_25#PROKKA_00344 18 163 18 164 PF00005.22 ABC_tran Domain 1 136 137 111.3 4.2e-32 1 CL0023 +unf273_NODE_25#PROKKA_00345 91 113 91 113 PF12399.3 BCA_ABC_TP_C Family 1 23 23 31.0 1.4e-07 1 No_clan +unf284_NODE_26#PROKKA_00346 2 80 1 89 PF04126.8 Cyclophil_like Family 34 111 120 75.9 1.8e-21 1 CL0475 +unf284_NODE_26#PROKKA_00347 5 112 4 115 PF02771.11 Acyl-CoA_dh_N Domain 2 110 113 71.9 5.8e-20 1 CL0544 +unf284_NODE_26#PROKKA_00347 120 171 120 171 PF02770.14 Acyl-CoA_dh_M Domain 1 52 52 68.0 3.6e-19 1 No_clan +unf284_NODE_26#PROKKA_00347 227 379 227 380 PF00441.19 Acyl-CoA_dh_1 Domain 1 149 150 97.8 5.8e-28 1 CL0087 +unf284_NODE_26#PROKKA_00348 6 120 6 120 PF02771.11 Acyl-CoA_dh_N Domain 1 113 113 100.7 6.9e-29 1 CL0544 +unf284_NODE_26#PROKKA_00348 124 175 124 175 PF02770.14 Acyl-CoA_dh_M Domain 1 52 52 62.0 2.6e-17 1 No_clan +unf284_NODE_26#PROKKA_00348 236 382 236 386 PF00441.19 Acyl-CoA_dh_1 Domain 1 146 150 150.2 4.3e-44 1 CL0087 +unf295_NODE_27#PROKKA_00349 145 330 143 331 PF00149.23 Metallophos Domain 29 198 200 25.8 5.8e-06 1 CL0163 +unf295_NODE_27#PROKKA_00350 48 121 24 122 PF07238.9 PilZ Domain 27 101 102 29.8 4.4e-07 1 No_clan +unf295_NODE_27#PROKKA_00351 18 173 18 174 PF00300.17 His_Phos_1 Domain 1 157 158 116.5 1.1e-33 1 CL0071 +unf306_NODE_28#PROKKA_00352 23 143 23 144 PF13380.1 CoA_binding_2 Domain 1 115 116 126.7 4.5e-37 1 CL0063 +unf306_NODE_28#PROKKA_00353 23 261 20 262 PF00378.15 ECH Family 6 244 245 173.4 4.1e-51 1 CL0127 +unf306_NODE_28#PROKKA_00354 80 155 80 157 PF03061.17 4HBT Domain 1 77 79 54.7 7.7e-15 1 CL0050 +unf306_NODE_28#PROKKA_00355 13 128 12 144 PF01814.18 Hemerythrin Domain 2 131 133 48.4 9.7e-13 1 No_clan +unf317_NODE_29#PROKKA_00356 16 156 11 160 PF12849.2 PBP_like_2 Family 104 277 281 84.4 8.9e-24 1 CL0177 +unf317_NODE_29#PROKKA_00357 113 313 109 316 PF00528.17 BPD_transp_1 Family 1 183 185 90.1 1.2e-25 1 CL0404 +unf317_NODE_29#PROKKA_00358 83 279 82 288 PF00528.17 BPD_transp_1 Family 2 175 185 77.9 6.5e-22 1 CL0404 +unf328_NODE_30#PROKKA_00359 21 98 20 105 PF00005.22 ABC_tran Domain 2 78 137 63.3 2.7e-17 1 CL0023 +unf328_NODE_30#PROKKA_00360 32 350 30 381 PF13458.1 Peripla_BP_6 Family 3 308 343 154.0 6.2e-45 1 CL0144 +unf328_NODE_30#PROKKA_00361 1 193 1 194 PF01266.19 DAO Domain 170 357 358 57.2 1.3e-15 1 CL0063 +unf339_NODE_31#PROKKA_00362 2 267 1 267 PF07592.6 DDE_Tnp_ISAZ013 Domain 46 311 311 409.3 6.7e-123 1 CL0219 +unf339_NODE_31#PROKKA_00364 12 221 9 249 PF00378.15 ECH Family 5 215 245 179.9 4.3e-53 1 CL0127 +unf350_NODE_32#PROKKA_00367 3 227 3 228 PF01370.16 Epimerase Family 1 235 236 137.7 3.9e-40 1 CL0063 +unf350_NODE_32#PROKKA_00369 25 104 5 105 PF01266.19 DAO Domain 279 357 358 56.4 2.1e-15 1 CL0063 +unf361_NODE_33#PROKKA_00371 108 224 64 238 PF01695.12 IstB_IS21 Family 48 162 178 54.1 1.2e-14 1 CL0023 +unf383_NODE_35#PROKKA_00375 12 72 10 72 PF00392.16 GntR Family 3 64 64 64.5 4.2e-18 1 CL0123 +unf383_NODE_35#PROKKA_00375 82 201 82 203 PF07729.7 FCD Domain 1 123 125 92.2 2.9e-26 1 CL0388 +unf866_NODE_7#PROKKA_00376 25 84 24 100 PF01926.18 MMR_HSR1 Family 2 57 116 56.4 2.5e-15 1 CL0023 +unf866_NODE_7#PROKKA_00377 5 219 2 221 PF01902.12 ATP_bind_4 Family 4 216 219 107.8 4.7e-31 1 CL0039 +unf866_NODE_7#PROKKA_00378 7 184 6 185 PF02737.13 3HCDH_N Domain 2 179 180 226.0 2.4e-67 1 CL0063 +unf866_NODE_7#PROKKA_00378 187 283 187 283 PF00725.17 3HCDH Domain 1 97 97 118.2 1.5e-34 1 CL0106 +unf866_NODE_7#PROKKA_00379 6 198 3 242 PF00753.22 Lactamase_B Domain 4 157 194 65.4 5.2e-18 1 CL0381 +unf866_NODE_7#PROKKA_00382 14 293 12 294 PF01180.16 DHO_dh Domain 3 293 295 240.8 1.4e-71 1 CL0036 +unf866_NODE_7#PROKKA_00383 36 254 36 257 PF02358.11 Trehalose_PPase Family 1 232 235 87.8 4.8e-25 1 CL0137 +unf866_NODE_7#PROKKA_00384 5 469 4 470 PF00982.16 Glyco_transf_20 Family 2 473 474 483.7 4.5e-145 1 CL0113 +unf866_NODE_7#PROKKA_00385 5 108 4 109 PF01740.16 STAS Domain 2 116 117 48.3 5.4e-13 1 CL0502 +unf866_NODE_7#PROKKA_00386 14 137 13 137 PF13581.1 HATPase_c_2 Domain 2 125 125 101.7 2.2e-29 1 CL0025 +unf866_NODE_7#PROKKA_00387 204 305 204 305 PF03448.12 MgtE_N Domain 1 102 102 89.0 2e-25 1 CL0436 +unf866_NODE_7#PROKKA_00387 311 365 307 368 PF00571.23 CBS Domain 5 54 57 16.9 0.004 1 No_clan +unf866_NODE_7#PROKKA_00387 378 425 371 427 PF00571.23 CBS Domain 8 55 57 30.6 2e-07 1 No_clan +unf866_NODE_7#PROKKA_00388 35 387 32 387 PF01566.13 Nramp Family 4 358 358 286.2 2.8e-85 1 CL0062 +unf866_NODE_7#PROKKA_00390 19 154 18 156 PF08327.6 AHSA1 Family 2 122 124 57.6 1.2e-15 1 CL0209 +unf866_NODE_7#PROKKA_00391 9 68 9 68 PF12840.2 HTH_20 Domain 1 61 61 59.1 2.7e-16 1 CL0123 +unf866_NODE_7#PROKKA_00393 12 427 10 427 PF00275.15 EPSP_synthase Family 3 419 419 342.8 1.9e-102 1 CL0290 +unf866_NODE_7#PROKKA_00394 11 236 9 236 PF13505.1 OMP_b-brl Domain 3 176 176 51.2 1.3e-13 1 CL0193 +unf866_NODE_7#PROKKA_00395 55 188 54 188 PF13492.1 GAF_3 Domain 2 129 129 64.5 9.6e-18 1 CL0161 +unf866_NODE_7#PROKKA_00395 217 352 217 352 PF13492.1 GAF_3 Domain 1 129 129 71.3 7.3e-20 1 CL0161 +unf866_NODE_7#PROKKA_00395 415 602 414 603 PF07228.7 SpoIIE Family 2 192 193 132.6 1.3e-38 1 CL0238 +unf866_NODE_7#PROKKA_00396 20 134 14 135 PF00156.22 Pribosyltran Domain 8 124 125 60.3 1.4e-16 1 CL0533 +unf866_NODE_7#PROKKA_00397 4 242 2 243 PF01790.13 LGT Family 5 255 256 151.8 1.6e-44 1 No_clan +unf866_NODE_7#PROKKA_00399 51 235 51 236 PF01612.15 DNA_pol_A_exo1 Domain 1 175 176 111.4 3e-32 1 CL0219 +unf866_NODE_7#PROKKA_00399 267 646 265 646 PF00476.15 DNA_pol_A Family 3 383 383 431.2 2.5e-129 1 No_clan +unf866_NODE_7#PROKKA_00400 9 383 9 384 PF01053.15 Cys_Met_Meta_PP Domain 1 385 386 438.0 1.8e-131 1 CL0061 +unf866_NODE_7#PROKKA_00401 253 312 233 312 PF13517.1 VCBS Repeat 1 61 61 34.1 2.6e-08 1 CL0186 +unf866_NODE_7#PROKKA_00402 41 209 38 209 PF02572.10 CobA_CobO_BtuR Family 4 172 172 136.4 6.9e-40 1 CL0023 +unf866_NODE_7#PROKKA_00403 78 252 71 256 PF13535.1 ATP-grasp_4 Domain 9 180 184 35.9 5.8e-09 1 CL0179 +unf866_NODE_7#PROKKA_00404 58 131 56 132 PF03061.17 4HBT Domain 3 78 79 38.8 7.3e-10 1 CL0050 diff -r 000000000000 -r 68a3648c7d91 pfam_search/f2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/f2 Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,3012 @@ +>10FR_NODE_6#PROKKA_00001 +MGDPKGFMTVPRKEASYRPRNERIYDFGEVEQTLNEEDRKLQTSRCMDCGVPFCHWGCPV +GSKIPEWQDAYYRGNEEVAYEILHSTNSFPEITGRICPAPCEKSCVLSINEEPVTIRENE +AANVENAFTQGWIKANPPSIRVV* + +>10FR_NODE_6#PROKKA_00002 +MEKKRKEMAFRFPKPDGLYNPANEHDNCGIGFVAHIKGEASHDIVERGLEVLRNLDHRGA +KGSDNASGDGAGVMVQIPHEFIKKVLKIDVPAKGSYGTGLIFLPQLEAEANACVDILSNI +IQEEGLQLIGYRDVPTDSSIPGEIARTTEPRIKQVFIKANLEEDILEQKLYIVRKRAEKA +VQASDLSQKEVYYHSSLSAKTMIYKGMLTPDQMKDYFTDLQHPLFKSALILIHSRFSTNT +FPTWDLAQPFRLVAHNGEINTIKGNRLWTQAREGLLKSEVFGDDLPKILPVLEEGKSDSA +SFDNVLEFLHRTGRSLHHSLCMMIPESFNEKNPIPESLKAFYEYHSTIMEPWDGPASIVF +SDGRYIGGTLDRNGLRPSRYVITKNDLIVMASETGVQDFAAEEILEKGRLRPGKILLVDT +RLGIIIPDEEVKEQLSRRNPYGMWLKENRLLMEDIKVRQRVPSTMDDFLTYAKVFSYSKE +DMEFLIQSMSNTAVEPINSMGNDTPAAIFSRQPQRLFNYFKQTFAQVTNPPIDAIREGLV +MSLTNYIGSLNSNILKESPDHCKLIKFPDPIVTNTDLGKIKDLKDEMFSHEIISIVFPVD +QGFEGFKKAFDEMLERAEKAVDDKKNFIILSDRAIDSKHAPFPSLLAVSAVHHHLIQKKK +RMQVGIAVETGEAREVNHYALLLGYGASVINPYLAFAAVDHLVKEGKLDMEYKDARRNYI +KSIKKGLLKIFSKMGISTVRSYHGAQIFGAFGLSKELVDKYFKGTSSPISGIGLEEIYEE +YSQFHKDAFREEATQEKFRFETTGVYAWRKNREDHAWNPDSIGLLQWATRTNSYEKFKEY +SRTVDEYNRKPSFIRGCFQVKRNSISIEEVEPVEEIMKRFVTGAMSYGSISKEAHESLAV +AMNTVGGRSNTGEGGEDHNRFGTEKQSAIKQIASGRFGVTSNYLTNAREIQIKIAQGAKP +GEGGQLPGYKVNEVIAKLRNSTPGITLISPPPHHDIYSIEDLAELIYDLKATNPKAKISV +KLVSQDGVGTVAAGVAKAFADLIIISGGEGGTGASPISSIKHAGLPVEIGIAEAQQTLVK +NNLRGRVKIQVDGQLKNGHDIVTMACLGAEEFGFATSALITLGCVMMRKCHLNTCPTGIA +TQDETLRERFTGNPQLVINFFRFLAGEVRELLAEMGFKKFDDIIGRADLLEENKEVFGWK +MKNVDFSAVLNRPAEADKFDIRYVPGSASLNLDGHLDHTLIEESGKAIKGKEKVWLHHPF +ANTDRAIGAMLSGVISQKYGEFGLPEDTIHATFDGSAGQSFGAFLAKGVTFRLEGDSNDY +IGKGLSGGKIIVVPPTGSTFTPEENIIIGNSTFYGATGGEAYIQGVAGERFCVRNSGMEA +VIEGAGDHCCEYMTGGRVVVLGKTGRNFAAGMSGGIAYVLDEDGDFDFYCNKGLVELLEV +EDKKDIKELQGLISKHLTYTQSPKAAKILTQWEEYLPKFVKVIPYEYRKVLRERELRELE +QKMKMTEDANVMQE* + +>10FR_NODE_6#PROKKA_00003 +MLEQKELILLNISGEDKPGLTASLTEILSQHNVNILDIGQSVIHKDLGLGILFEVPKKYR +SASILKDLLFKAYELKSHIKFTPIPIEEYEKWVAEQGKERFIITLLAHKLTALHLSKVSS +LIASQKLNIDTISRLSGRKSLNGNNKVTNSVVEFSVRGTPLDINAMKQSLMNIASETGID +IAFQEDNIYRRSRRLVCFDMDSTLIQTEVIDELAQKAGVGDEVKKITESAMRGEIDFKES +FKKRVSLLKGLDESVMKGIAENLPITNGAERLLSTLKQYGYRTAILSGGFTYFGNYLKTK +LGFDYVFANELEIKNGKLTGKHLHEIVDGKRKAELLELLAFKEDIHLEQVIAVGDGANDL +PMLEKAGLGIAFHAKPKVKASAQHAISATGLDTILYLLGFRDREINAS* + +>10FR_NODE_6#PROKKA_00004 +MLIQEASKWDIITYVLDNDETCPANSLATHFVKGSNLDFDSVYRFGKMVDLLTYEMENIN +IEALKKLKSEGHQIIPDPDILELIQDKGKQKEFYQDNNVPTAPFKIYSSRQDIVQAIKNG +EIKFPFVQKLRTGGYDGRGVAVISDENDLDKLLDGASIIEDKVNIAKEIAVIAARNKQGE +IKCFPVVEMVFDPEANLVDKLICPSKITAEQSEKAIEIAGKIIGLLGMQGLLAVEFFVDE +NGEVIVNESAPRPHNSGHHTIESIITSQFEQHLRAIFNLPLGSTRPKLPAVMVNILGGEG +YEGPVRYEGLTEIMAIEGVKIHLYGKKITRPFRKMGHITVLSDSLETALEKAEKVKQLIK +VKSWDKN* + +>10FR_NODE_6#PROKKA_00005 +VGQKLVSIVMGSDSDLPVMKPAAEMLEQLGVEYEIDIVSAHRTPEKLFDFASNAHKRGIQ +VIIAGAGGAAHLPGMVASMSPLPVIGVPVKSSNSIDGWDSVLSILQMPGGVPVATVALNG +AKNAGILAAQIISVSDSQVREKIIEYKAGLKEAVMKKAKNLKG* + +>10FR_NODE_6#PROKKA_00006 +MDPKTVIRELLTGTGVHLNGPHPYDVQVHDERAYERWLSEAELGLGESYMDGWWDCLALD +EFIERILRAGLEEKVKRNFSTAFYVLSKRLFNQQTRVKSKRVGREHYDLGNELFSKMLDR +RMVYSCGYWQRAKNIDQAQEAKLDLICKKLNLKPGMKVLDIGCGWGSFAKYAAEKYGVEV +LGVSISKRQIELGNELCKGLPVTLLYKDYRDVEGKFDAVVSVGFFEHVGYKNYDTYMKIV +DRCLTDNGISLLHTIGNNTTTHYVNRWTNKYIFPNGMLPSIAQVAKAAEPYFVIEDFHNF +GPDYDKTLMAWYDRFNKAWKELKNQYDERFYRMWRYYLLSSAGGFRSRATQLWQFVMTRT +GRQQPDCRFA* + +>10FR_NODE_6#PROKKA_00007 +VAHAATTGNNETLLCRFPTLHNNTIVFEAGGNLWRVDRTGGVATRLTTDPGYDMMPRFSP +DGKTIAFTGQYSGNVDVYTIPADGGAVTRLTYHSDVVRKAPTRWGPDNMVMTWTPNGKDI +VFLSRRDTWNSWFGQPFEVSKMGGLPTHLPLPKGGVMSYSPDGSKIAYNRIFRNFRTWKR +YKGGLAQDIWIYDFKTKKIQRVTKWKGTDTYPMWYKNTIYFASDRGANHRLNIWAYSLDT +KTFRQITHFKNYDVDWPSLGNNGIVFQDGGSLYVLDLPSEQLHKINVKVPTDGTQTLPRW +INASKMIRSLDISPNGKRVLFGARGDIFTVPAKHGATRDITQTSDAQEQYPAWSPNGKWI +AYLTDASGVNELAIRPSDGSGHQTYITNAKTGYYYNPTWSPNSQMLAYSDNNHVLWYISL +KDKKPVRIAQDKYNAMRDYHWSPDNNWISYSKTNASGLSQIYIYSLADHKSYKVSDGIYS +DNDPVFGPNGKYLFFVSARHENPLFSESESNVATEKMDGIYMVTLQKNEKSPFAPVSDEG +MPEAKKASSSASKKTESAKDVKIDFNGLMNRVIMLPIKSGDYGNIQVTGNKVFYQTRPLI +TIEGFLHGTGQSSIMVYDLKSKKGHTVVANGARTYGLSADGKTLVYMRRGKFFLMPSASV +NAKGSEPVNTSHMKMKIYPHQEWSEMFHQAWRLFNNFFYNTKMNGVNWNEVGANYGKLVP +LLGCREDVNYLVGEMIGELDNSHCYVWGGDDNYLGKTNPTGVLGVDFGLNKSSGRYYFKK +IYAGDNSRPGYGSPLDRPGVNVKTGDYLLAVNGHQLKAPMNPYSLFVNTVGQQTTLTLAD +KPDGKGEHTVTVKPINNSLNLRLLNWIRTKRAYVNKKSDGKIGYIYMSDMESLGMTQFIH +QFYPQLSKQGLIMDDRFNGGGFIDQIVLERLRRVLIGMSTNRAHAAMRYPEQVLHGYKAC +LLNHYSASDGDMFPFYFRKYGLGPLIGERTWGGVRGYNRVWTLLDGGNLVVSQNSIYGLD +SKWAIENHGVTPDIKVDNLPGQVMEGKDPQLDTAINYIMKKLKEHPMPIPQPPAEIPAYP +SGNDAGGTN* + +>10FR_NODE_6#PROKKA_00008 +MIRYNLHQHSIFSDGAAEPEAYVQSALNLGFEAMGFSEHSPLPFPTKFSLKAERVEDYIR +ETERLKEKYNDRIDLYRALEMDFIPGYSENFTEWRKKAQLDYAIGSVHMVQPEDDGELWF +IDGPDRSIYDDGLQNFFGGDIKKAVKTYFHQVNRMVETQDFEVVGHVDKIKMHNQNRYFT +EEEKWYRDLVEETLHLIKEKDFIVEVNTRGLYKKRSNRLFPDDYALQRISELGIPVLISS +DAHKPEELNLLFETAEKRLLDMGLGAVVRFDHGKWKDFPLS* + +>10FR_NODE_6#PROKKA_00009 +MIDAAKRASARKIVAVIPYFGYARQDRKDKPRVSIGAKMIANLLTTTGIDRLITMDLHAD +QIQGFMDFPVDNLYASVIFYPYLKKLNLPNLMMASPDTGGTRRAANYAKALDTGFVICYK +QRTRPNVVEQIQLIGDVAGKDVVLVDDIIDTAGTITKAARVILDKGANSVRAMVTHPILS +GDAFKIIADSPFTEVVVTDTIPVKDDLGGKIKVLSTAQLFSEVIKRVENYKSISSLFNLG +NQSNK* + +>10FR_NODE_6#PROKKA_00010 +MNTVSLSGSLRENVGKKDAKKQRRLGKVPCVIYGGSEQKHFTLDQLEFKPLVFTPEASVV +NLTLGEKTYECILQDVQYHPVTDEILHADFLEIHSEKPVNIALPVELTGTAPGVVKGGKL +RLKMRKLRVNGIIKLMPEHIVLDISKLDIGRSIKVRDINQANLTFLDPGNQVVVAVVAAR +GLSAEEEAEEAEEGEEGEAAEGGEEGGEKSAE* + +>10FR_NODE_6#PROKKA_00011 +MKYLIAGLGNIGVEYANTRHNIGFIVADALVNELKGKFETERLASVASVKHKGRTLVVIK +PTTYMNLSGKAIKYWIDKEKIPIERVLIVVDDIALPLGTLRMRKKGGAAGHNGLSDIIMK +LGTEKFPRLRVGIGDDFAKGYQVDFVLGQWTDKEVNVMIPRVQKAVEIVQSFVSAGIDNT +MNLYNNK* + +>10FR_NODE_6#PROKKA_00012 +VFQGKHQLNVFINILTQIKTMLRIRNFNNFVSNFLKIFAKFYNQAKIISYRLIILLQASV +TGQPNHSIEMEKQRQQSGLLFKIK* + +>10FR_NODE_6#PROKKA_00013 +MAVEAVRGLLEKTGTKPEEVDLVICATVTPDMQFPATANLVSYKLGINNAFSFDMNAACS +TFIYALITGSKYVESGEYKKVIVIGADKMSSIVDYTDRATCVIFGDGAGAVMLEPTASDE +PGIMDHRFYTDGAGWIHLHQKAGGSLKPASHATVDAKEHFIYQEGQPVFKFAVTRMADVA +AEIMERNQLKSEDIAWLVPHQANLRIIDATARRMGVSKDQVMINIQRFGNTTNGTIPLCL +YEWEPQLKKGDNIVLAAFGGGFTWGSVYLKWAYDGKK* + +>10FR_NODE_6#PROKKA_00014 +MHYNPNAFALSLRNSYSKIIGLIIPEITLYAFPSMIRGVSEFCYNAGYNVLILSSNESYK +REVQNTELMLSSQVDGLLVAITKETRNHKHFDQLEKEGIPVVFFDRVFNNYGTSKVIIDD +RRAAYEATEHLIKTGRKNIAYFGGNAALYITQQRLMGFRKALSDYNLEEHDLVFADDSHM +ARNKALQIFKRKNYPDGIMSISDEVLTGIIPALQELNIKIPNEVGVISFSDGPISQMYKP +AISIVHHSLARVGQVAVDLLIQRIEHPEDMHQQIHIIDTELIARGSTAIGK* + +>10FR_NODE_6#PROKKA_00015 +MKKVVQAFMVLLALTITTGLMAQGTIKGTLKTTKGKTVPGVNILLKGTTTGTTSSLNGSF +VLKVPAGKHVLLVSFTGFKPINYSFTIKDGETLTKNFVLHEDLLALDQVVVTGVQNKQTK +LQSSVAITTLSPQKISQIAPRSAADLLKAIPGFYVESSGGKGNANVFARGLPSSGGLRYV +QFQEDGMPVFEYGDLMFGNTDIMVRIDQTMSRMEAVRGGSASVLTSDAPGGIINIISKTG +GPTTKGVFMQTIGLTYMHARTDFDIGGPVSKHLRYNIGGFYRADNGIRSPGFLANNGGQI +KANFTYTFNKGYVRFRTKILNDKTIAYLPFPMMGNPAKSIPGFNANYGTMKSLDLLHLHA +TTPTGNSVNESLADGMHPKIFAFGGEAFFDLGNKWSLKDNFEKTFTHIQFNSIFGVNAPE +SASAYATAQGLTNYHYAFADGYNAGKPITNMSSLNGNGLVATYGWWSVGLNLQEFGNDFK +LTKQSTNNTFTAGWYFSTNQVGGNWWWHNMLVDISGHNTRKLNLINDNTGESLTTNGYSQ +YGTLYADYNALTVINAPYVYDEIDLGRLTINAGLRWDMGTITGRVENTGSYSYDVNGDGI +ISPAEKNIQYGNGTYTPFHYDYSVLSYSLGLNYEFNKSTAIFARASQGHRSPADRAYVFG +ATTSTPNGFPSSAKDESIEQYELGLKYNSSKVALFATGFYSFFNHIDFTDFVNVGGNLTA +IQQYYNTSAMGLELEAAAQLGKLNLSLTGTAQSAKYHNWVYHDQSGNLHDFNNHFIQRLP +KLYFTFRPSYNFGKLNVSAAWEYFGKRYTNPENKQVLPQFSQINAYIDYTVSPHITISAA +GNNLFNVIGLTEGNPRSGLVSTGGSQYFYARSILGRSAILSFKYSF* + +>10FR_NODE_6#PROKKA_00016 +VKHIDISPIDIAIIVIYIVGIAVWGLMYSKKKSKGKGHEGYFLAGRNMTWPIVGITLYAA +NMGSPALVGLAGDAYSTGISVFNYEWMALVVLVFFAIFFLPFYLRSRVYTMPEFLQRRFD +IRSRYYFSFITLVGNIIIDTAGVLFSGALIVKMIFPAMALWHIIAVLAIITAAYTITGGL +SAVMYTEAVQGVLLMLGAVLLTFFALKRIDFNVARIFTETPHHMMSLIRPNSDKAMPWLG +LVLGVPLLGFYFWGTNQFMVQRVLSAKNTNHGRWGALFAGILKLPGLFIVVLPGIIGRLI +FPHLSDPDLIYPMMLFHLLPVGILGIVLAGLIAAISSSISATLNSASTLMTMDFVNNLKP +GLTPKQLVRIGQIFTGVFVVISAAWAPMIAGFPSLFKYLQQVLALISPPVVAVFLLGLFW +KRANAQGAFYGLMGGLLMTIFAVIVRYVNPDIFPWLGHIQFLLVAPVLLVGTMAIIVPVS +LMTPPPPEEAVAQFTWSFKFFNAESMELAGTPWYKNYRYQAIGALLATAILVYIFR* + +>10FR_NODE_6#PROKKA_00017 +MEEAKNTYSKALDLLKNGLLKEGFVAALDQQANYRRVWARDSIITGLSALLADDTTLIEG +MKKTLISLKQHQHANGMIPSNVSFDADGNVTMVSYGTLTGKVDTNLWFIIGVMVYVRKTS +DTDLLKEMLPAIEKVFELLLSWEFNGRGLLYVPQGGNWADEFILEGYNLSEQLLYYWALS +EASAMDEKFSTKAKKLKDLIEINYWPTESNRSKVYHKTAFERQLEKGQTSHWLPGFKPAG +YHTFFDCFAHGLSFVLQFNSPEQEGEIIETLVRTTSETSGSLLPSFWPPVRETDAQWETL +QTNWIYKFRNQPGAYQNGGIWPISNGLLIAGLYRSGHKGMADKMKEALFLATALPENQFG +FYEYIDAFSWEPGGAKHQLWSAAGVIFAEKAAQNVFIV* + +>10FR_NODE_6#PROKKA_00018 +MEGDIINLNDEHRSTARKIVAPLIEEIKNCQTIYTFSVAGESGAGKSITAAAIAEQLELA +GFSVKVFQQDDYFFLPPFTNDQKRRKDLEWVGIKEVDLALIDEHLKAAKDGVKTIKKPLV +IYGKNKITSEVFDMKGVNVCIAEGTYTSLLKNVDKRIFIDRDFFDTHNDRKKRGRDLIDP +FTEKVLEIVQCS* + +>10FR_NODE_6#PROKKA_00019 +MEAITNGMMNLNRQRHQHLVALRIIFAHGENGGEEVVHIGHVAIKTGDTFPGTGRVDLLC +LTHCLTSTD* + +>11FR_NODE_14#PROKKA_00020 +MFKVMIRDSMSPVAREILTATGKIEVVTDNDKAANAPEVLAEMIGEFDGLAIRSGTQVTR +AVMEKAGRLKIIGRAGIGVDNIDLEAATRQGIVVMNAPGGNTVTTAEHTVSMMMALARNI +PQATASLREGAWDKKKLIGVEIAGKTLGIIGLGHVGRIVADRARGLKMRVIAADPYVSCD +AAARINVRLVSLDELFSASDFISLHVPRLKETVNMINADTLSRMKPGVRIINCSRGDVVN +VDDLYRALESGRVAGAAIDVFPKEPPDASLPLLKHPRVVLSPHIGASTGEAQVKVARMIA +EQMAACLIDGVITNAVNFPSVSMEEMARVV* + +>11FR_NODE_14#PROKKA_00021 +MNDPIYQKLATVLDTLPNGFPATEDGKEIRLLKKIFSPEEAELFCDLKLTFETAEQIANR +TARPVEELKARLSVMQEKGQIFGIDMGGVGIYKMLPWAFGIYEFQLPHMDRELAELCEQY +GKTYGKQFFANKPQLMQVVPIESEIKAEHEALPYERVSTIIENSRSFMYFDCICKKEKGL +MDEPCDKPVQVCTAFAPIPGVFDDHPYGKTMTKEEAYQLLNKAEEAGLVHLTWNVKSGHF +FICNCCGCCCGVLRGINELGIDASKVINSYYYAQIDAEACVACGTCADERCQVNAIMEGD +DAYTVIAEKCIGCGLCITTCPGDAISLVRKPAAQIETPPDDEMDWYEKRAQLRGVDISDY +K* + +>11FR_NODE_14#PROKKA_00022 +MELTNDEREFFALVNRASLLNPFSDERNDVDLKLAGLPSAAPGTGRVKKAIQSVNERIRQ +LETDGRADISQYTGRDRELVEKAFLFELFYRFRKQFDELIESQIASDDVPARIPFYNDAF +SAMQKRGFTEEDFRRYFALAFQIRRAFYFIGRSLVGNSASMKSLRLNLWNNVFTHNMDLY +DRYLWNRMEDYSTLILGETGTGKGAAALAIGRSGFIPLKKKSFEESFTRSFISLNLSQFP +ETLIESALFGHKKGAFTGAIENYQGIFEQCSPYGAILLDEIGEVSKPIQIKLLQVIQDRV +FTPVGSQTRSRFNGRVIAATNRPLETLRGKGFFRDDFYYRLCSDIIVVPPLRQRVQEDPT +ELDVLLDFTINRLVGRSSPELVQIVREVIDRHLGNDYPWPGNVRELEQCVRRVLLKGIYT +GDAAVADIDLCRSLTTGIEQGNIDANSLTSGYCYLLYQRHRTFEEVARRTGLDRRTVKKY +IQDWTSSHSTDNPPETDIPG* + +>11FR_NODE_14#PROKKA_00023 +MKIQQIIIREFEEMMAELKEVLAKMTCPLLGEDWLPM* + +>11FR_NODE_14#PROKKA_00024 +MAVDKNEFFRQATIRICGSLDIETALERCFHYLEQMLPVDEIGLYLYDPGLNVFQRIAGV +KSHGKNEFSPVSPLPEASKEKWSAIWADMGDITIINRVEERPEIQEVIEMYGLEHDISLM +SMRLELEGKRVGLLLLRTRGRDRYEEKHARLMLLLHEPFAIAMTNALQHQELIRLKDILT +DDNRYLRRQIRDLSISEIVGADLGLRHVMEMVQQVTQLDSPVLLLGETGVGKGVVAHAIH +DASPRKNAPFVSVNCGAIPESLFDSELFGHEKGAFTGAIAQKKGRFERADKGTIFLDEIG +ELPPHAQVRLLHVFQEKIIERVGGTTPISVDVRIISATHRNLEEMIRSGKFREDLWFRLN +VFPIHIPPLRQRKEDIPALVHHFIEKKTIDLKMQEQPRLSPGAMDQLMAYDWPGNVRELE +NIVERALIQFKGGMLRFDGLIFSPIASSRGGGHETTDRFLSIDEVNAIHIRRALKVTNGK +INGPGGAAELLGINPNTLRKRMNKLNIPYKKKEIGQGVD* + +>11FR_NODE_14#PROKKA_00025 +MRFKTLKYILVPVLAIGLSGCASTLTLLSPPSSRLVQGKNTAGAFNSYEYQYAVRGNKIY +IKRTPLCDEVKHVMRVEQKREIGYGPALLELPLFGLGLVDIANAHAISVNSKKVTPLADY +NTGKLMACGPLQPAANEKVIIENKNLNLYRMVRTDKNGVVNLDKVLSGIGNNVNLSVRLA +NNHNVAFSCMYIANR* + +>12FR_NODE_3#PROKKA_00026 +MSAQAAPFAVLSDIATRSRSQSRGLPAQEEAVELWNGIGFSLAGQLYVAPMGEVVEILHL +PRYTQVPGVRAFMVGVSNVRGRLLPLVDLGLFLDFPRSVV* + +>12FR_NODE_3#PROKKA_00027 +MARILIVDDSPTEVKKISSLLEKHNHEVLTADNGADGVAKARAESPDLVLMDVVMPGLNG +FQATRQLTRSPDTADIPVVIVTTKDQETDRVWGTRQGAKGYLVKPVKEDELIKTIDDLLA +* + +>12FR_NODE_3#PROKKA_00028 +MEDNFENLKIMVIDDSKTIRRTAETLLKKVGCEVITATDGFDALAKIADSHPDIIFVDIM +MPRLDGYQTCALIKNNSAFKSTPVIMLSSKDGLFDKAKGRIVGSDQYLTKPFSKDELLNT +IRQHIPSREST* + +>12FR_NODE_3#PROKKA_00029 +MSVKLGIVMDPIGAIHYKKDTSLAMLLAAQRRGWELHYMEMQDLYLRDGEPRARTQALTV +AANPDDWYSLGEPSDRALASLDVILMRKDPPVDKEFLVTTWMLEAAERLGTLVVNPPQAL +RDCNEKLFATWFPQCTPPLVVSRDAARLRAFHAEHGDVVLKPLDEMGGRSIFRVREDGDN +LGVIIETLTKDGSHQIMAQKYLPEITQGDKRILLVDGEPVPYALARIPSQGEHRGNLAAG +GRGEGRLLTDRDRWIVEQVQPMVREKGLLFVGLDVIGDYLTEINVTSPTCVRELDREYDL +DISDQLMQVIADRLARR* + +>12FR_NODE_3#PROKKA_00030 +MAASAVRVSDTDRLTFTLFLALVLHAIVVLGVTFTAHKPQPSARTLDITLAQRDDQKAPK +HADYLAQTNQKGSGTLSKKAQITTRHRAPINASQVHKVKPIPRSQPQHSQAKPEKRHVVT +TVSQQATQQVDSDDKEQKAHQKHNHKSLMSRALEIASLEAKLDQETQRYAKRPRVLRVTA +ASTLKSTDAWYVQAWVNKVTRIGNLNYPEAARRRGIHGTLRLLVDILPNGHVKDIQVLQS +SGYKVLDQAAMRIVRLAAPFAPFPPELRKRKDVLEIIRDWSFEPRGLSTNG* + +>12FR_NODE_3#PROKKA_00031 +MSEANPYIRNQFLIAMPYMQDPNFNGTLTYICDHNDQGALGLVVNRPLDFSLGEILEQLD +IECGHLDVPVYSGGPVKVERGFVLHRSRGEWQSTLEISGDLSVTTSRDVLEAIAEETGPE +DYLVALGYAGWGAGQLEQELAGNFWLTCPADPDILFNVPWQQRLPAALARLGIDWSQLSD +SVGHA* + +>12FR_NODE_3#PROKKA_00032 +MPDLKPEGQRQVMAFDFGLRRIGVAVGQEMLGTASPVTMIGARDGIPRWDEVEALIADWK +PDFFVVGLPLNMDGSESEMCRRARKFARRLHGMYHRDYAMMDERLTSFAAKSAIVEREGG +RDFGVKGVDDLAAVLILEGWFLQQRDQQPKIPTP* + +>12FR_NODE_3#PROKKA_00033 +MTAQINVERLLETMCSQLEQTLEARGAVNPVLTGIRTGGVWLADYLHKRLRLEEPLGELD +ISFYRDDFSRIGLNPRVKPSNLPFATEDRHIILVDDVIMSGRTIRAAMNELFDYGRPASI +ILVTLLDLGARELPIQPDIVGQQMQLQRDQRVKLMGPDPLRVELRENVRENPAKDASTKS +H* + +>12FR_NODE_3#PROKKA_00034 +MIDTCEAARQLQLNAAGSLRHFLTLDGLDRPLLTEILDTADSFIEVGERRIKKVPLLRGR +TVVNLFFEASTRTRSTFELAAKRLSADVLNLNISTSAASKGESLSDTLLNLEAMASDMFV +VRHAQSGAPHFIARHVTPGVGIVNAGDGRHAHPTQAMLDMLTIRQHKGGFEGRVVAIVGD +ILHSRVARSQIRALEILGADEIRVIGPNTLLPRDVESLGVKVFNDMQRGLKDVDVVIMLR +LQNERMEGALLPGEREFYRLYGLTTEKLRYAKPDAIVMHPGPINRGVEIESAVADSPRSV +ILNQVTNGIAVRMAVMSMVMSGQLAQLNQGDAAQEQSRTL* + +>12FR_NODE_3#PROKKA_00035 +MKLLIQNGQLLDSRTGQVRSGAVLIEDQKIVAVGEQVLDQAADRVFDADGAWISPGFIDL +CCFVREPGDDQKGTLASETRAAAHGGFTTVCASPESSPVNDSGAVTTLILERARKQGCVR +VLPVGALTRGLQGELLSDMASLARAGCVALSNGSLSRGNARVLRRCMAYAKTFGLTLFMR +PENPDLAADGYAHEGVVATRLGLPGIPEIAETIAVGELIQLAEDTGVRLHLSQLSAARSV +ALLRSARERGVPVTADVAIQQLAFNEGWLADFDSRFHCRPPLRTEADRQGLLAAVNEGWI +DAIVSQHQPHDPAAKQAPFGETEPGLSTVESLLGLGLKLVNAGELELPRFLQALTLGPAQ +VLNLPEPRLEAGSRADLTLFNPNGQWIPAPETLLSAGKHAPVLDQPLPGRVMLTLSRGKV +AYADPQTEFGL* + +>12FR_NODE_3#PROKKA_00036 +MSFLIFVALLVALWLLWTISRNTADALDKQTAVQYEIIALEKRMEELSEALKAQSGETAK +PARRSSSSRSKKEDEKEKKEGE* + +>12FR_NODE_3#PROKKA_00037 +LENSTQNALHKPELAGKIADQTQLTRAQAHEVITAFTDQVSAAMARGETVALAGFGSFNV +RERQARTGRNPRTGEALQIPAHKTVGFRPGKAFREAIE* + +>12FR_NODE_3#PROKKA_00038 +MRWLSHSAFLFSHGRSAVKALSLLLLFSLGGCSAVNNMMYKTTGEVMVGYAKAHAVPYVL +SSDDLGMSCAMSEALTPLLMSFGQVTAKPDQLGVMMQMSAGTCAEEKGWNAELAYMKELR +NQHPQNAEDDMIVEKRHYIEAADRYYSAWKHLVAYYGDPSTGQCPTFKNDEGQFIYMAGL +LAGVQALAAEIQSTSDEGVPKNIGSTVAQASGCLSDDKWWGVPMALRATVWSMIPGAKPD +GENPFQRLDESDKKANKARVRLAYVLHVIAAWNKGDTKLVKKLIREQQAQEAKYPADPRW +KMIDKLSTLYLRSISDRMWVEHTGHRTPIGGLGTFWDDSKGSGEVIDLDSVM* + +>12FR_NODE_3#PROKKA_00039 +VKPSGKRFSLKTWSLASLTLAGMLALQPAANAGSLPKRSFCVFDPVGANGPLFNLMKSTK +PAALDWGVDLQMRAYTDEKIAAEDFKGGQCDSVLLTGTRAREFNKFTGTLEALGAITSNK +EERVLMDTLNQPKAAKLLTNGDYEVAGILPAGAVYLFTRNRNIDTVNKLQGKKIATLSYD +RASLTMVRHVGASVVGASSASFAGLFNNGSVDLAYAPAVAYTPLELYKGLSHDGGVLQYP +LAQMNFQIILHKSRFPKGYANHVREYAREHLNQAFSIINKATDEIKKKYWMYPTDKQTAS +YDQMLQSVRLSLRDKGVYDAKALKLMKIIRCKVQPSRGECSQNAE* + +>12FR_NODE_3#PROKKA_00040 +MSEAIEQSSMYLQKRTIGGRSAREWFSSLPACILLMAVVLFTTSSDIHNKALQLGQVLWS +GYYKLRVDPVKPDCNPNVNVDAQVKRQIAAQAAQQDSMLGSLVGSSPVNPAAVRQSVINA +KQACEAQFADYNATKGRITEGVRVYRSVELFISDVVAFGLASQRYILALLVLVCAATATF +SRHHIAMRGMETRLDHIVSHFMQFIANTMLLISSFMYRQMSHNSGAVVTTGQEISHDIWI +AGFLLLTIVSLVQLFRVPEDAEEGGTLGHAFLCVPLYTTMCLISGTFFAFVGSPAGIGIY +LDKMMELADQFLNVGLYVWAGMMLKQTRLASLVFNVLRPLKLPPELLAVVAVMVAAVPTA +YTGASGIFVIAAGAVIYSEMRKAGARRQLALASTAMSGSLGVVLNPCLLVVVIAYLNREV +TTDSLFHWGGWVFLLTSTLFLITSLVVNRQKGFKVAPMNEALPEMVMRLKPLIPYVLVIA +GVVFFYWLLLGVTMNEFSAPRILPIIMVGILVYEHVHFRGDRNKVSGEVDHQGLEKSLRT +ATSETTAEIGALLLLFGLSVSIGGVIERSQVMSLFPQALPSPWLAMMLMVVILVILGMIM +DPFGAVILVSATIADLAYQSGIAPVHFWMVTLVAFELGYLSPPVALNHLLTRQVVGESEM +NLSYRESGSFYQRHERVLMPLLVMGSALLIVAFVPLLFYAR* + +>12FR_NODE_3#PROKKA_00041 +VSNTYTDLSSARLVELALERNEGKLAANGALVVNTGRRTGRSPMDRFIVEDPATAELIHW +GPVNRPFDAAKFDALWERVESHLEERDQFVSYVHVGADPEHYLPVKMTTETAWQNLFGRN +LFIRPDNYNPIDKGEWQILNAAGFVCEPERDGTNSDGCVILNFAERKVLIAGMRYAGEMK +KAMFSVQNFLLPEQDVLPMHCSANVGEDGDTCLFFGLSGTGKTTLSADEDRYLIGDDEHG +WGRGTVFNLEGGCYAKCINLSKKNEPIIWDAIRFGAIVENVVIDNDSREPDYDDVSLTEN +SRCAYPLEHVEKRVLENRGGEPRAVIFLTCDMTGVLPPVSILNKEGAAYHFLSGYTALVG +STEMGSSAKLRSTFSTCFGAPFFPRPAGVYANLLMKRMEEFGSRVYLVNTGWTGGPYGVG +KRFSIPTTRAIIRGIQTGALENVQTQHLDDLNLDVPVEVPGVDSNLLNPRNTWQDKEAYH +HKAQELIAQFVENFKKFDVSDAIVNAGPKLKD* + +>12FR_NODE_3#PROKKA_00042 +MTGQDQLQRFLFENSNIRGSIVRLDDTFQQATGQQDYPTVVRNLVGQSLAACALMGDSLK +FQGSLSLQAQGEGPLRLLVSDSTDQLTLRGLAHWNPEAAEAETLPALIGNGHLVITITPD +AGQRYQGIVPLEQDTLAGCLEDYFRLSEQLATFMCLFADEKGAAGLLLQQLPGELAGPDT +DLWPRAIKLAQTLTTEEALQLPSEELIHRLYHQEQVRLFPARATRFGCSCSRERTRLALE +SLGQDDCMALLDEQEVIEIDCHFCGQRYRYDRADVRAVFGGPRLH* + +>12FR_NODE_3#PROKKA_00043 +MTAHSDQQQQEARIRLDKWLWAARFYKTRTLAKEAIEGGKVHYNGQRTKPGKVVELGARI +RLKQGWAEKEVVIQGLSDRRGGAPQARELYQETDDSQQRREDEHWQRKMMQAAQMPPARR +PNKKQRRELQRLKSGQG* + +>12FR_NODE_3#PROKKA_00044 +VLACFYFFADSPMLNWSQIDTALLDMDGTLLDLHFDSHFWLEHLPRRYAELKHLDPEHAR +QSLLSKIEQLRGKLDWYCIDFWSDLLDLDVVALKRETRDRIAWRPHSKAFLERLRACGIR +RVLVTNSHPDGLNLKIETTGIDQHLDRLFSSHSFGQPKEGPDFWEQLAQQEPFDPERTLL +IDDSLPVLESARRYGIRHLLAILSPDSQQPPRQPSHHPCVHDFDELFQSLDQFAHQKNRI +DGLSD* + +>12FR_NODE_3#PROKKA_00045 +MRPKLSHALAAILAACVMSLAPMTQAETTQSTPSALAMTGDALFARPALLAMTLVGSAVY +VVSLPFSLLGGNASEAGKVLVVDPAKATFTRCLGCTMNQNRQNEQKNQNQVATADNTDTT +SN* + +>12FR_NODE_3#PROKKA_00046 +VPELPEVETTRRGIEPHLVGHTVTQLQVRESRLRWPVPDKLDQMLPGQKVGQVARRGKYL +LVHLERGTLLVHLGMSGSLRVVTRAEALRKHDHIDLTTDAGTIIRFNDPRRFGAWLWTED +WQHHPLLASLGPEPLSPAFSGHYLHRQSRRRKAPIKQFIMDSHMVVGVGNIYANEALFIS +GIDPRRPAGRISAARMEALVLAIQQVLENAIAVGGTTLRDFVNSEGQPGYFRQSLQVYGR +EGQPCRRCGKPLRQLRLGQRSTVFCGHCQR* + +>12FR_NODE_3#PROKKA_00047 +MENINRITTGNCSIDPIDNTEPQAYQLRVF* + +>12FR_NODE_3#PROKKA_00048 +MWLYGLFHHLPAWQLILIALAMTHVTIVSVTVYLHRHSAHNSVDLNPVVAHFFRLWLWLT +TGMVTKEWTAIHRKHHATCETEEDPHSPVVKGFSEIMWRGAENYRAAISDEICERYGQRT +PEDWVERNVYSRYRLGGVALMAVIDLLLFGVNGIWIWAVQMMWIPIFAAGVINGIGHFWG +YRNFECADNARNIVPWGILIGGEELHNNHHTFPNSSKLSRRWWELDIGWGYIRLLQLFGL +AKPKGYRPIAHQIPGKMDMDVETVQAIANNRFHVMRLYRKRVLEPVLRQQRSVVEKDIKP +LYRRVRKLVFREESLIKPQERQSLEQVLQNSAVVRLIYEKSHELQAIWQRRPGMRPQDKL +NALVEWCHQAEESGVRYLEEFAATLRSYSLRPQTA* + +>12FR_NODE_3#PROKKA_00049 +MHLWRIVWVLIVLVAVLGGVYFLFPGTIVNADKSFELWRAGLAVHDINVDDQHIHYVDSG +GQGRVVLMLHGFAADYYSWPRMARYMKAGYRVIAPDLPGFGQSSRIAADNYGISQQAQRM +HDFLRALNVDKVDIVGNSMGGWIAAEFAARFPAQTRTLTLIDTGGITAPHPSPFMQAVEK +GENPLVVHNRAQFNHLLTIVFHHQPFIPGPLKGYFAKQAVEHAAFNEKVFKDLTDDYVDL +EPLLPKLTMPTLVMWGRYDQILDPSCVEVLKAGLPNATIKWFDTGHAPMLEQPKASAEVL +KAFLQANRGD* + +>12FR_NODE_3#PROKKA_00050 +MPPTRRSPPRQQRVTAKKAGGLSRLRIIGGQWRSRQVPFPPVEGLRPTPDRVRETLFNWL +AGDIPASRCLDLFAGSGALGLEALSREARHLVFVDTASEVIRTLRENLRTLGCQQADVFQ +QDAEQFLQRPPATPYDVIFLDPPFRQGWLDKVIPLLQQPGWLKPGGWVYVEHEAELNARP +WPSHWHEHRQKEAGQVVYRLFHVADALKDTAEGVERAS* + +>12FR_NODE_3#PROKKA_00051 +MTAEWISVGLLALLVLAFVIDIGLRLRKPPQKPQPPVAERPPAAPEQREAVPEAKAPPPR +PEAPAAEEKPAKAEPEAEVVEPEAEAPPAPPVAEEAPPVEAPPAETPAAEPEVEEAPVNW +FARIKQGLGRTRGNFSEGLSNLLKGQKAIDDELMEDIETLLLTADVGVTATTEIIDTLTE +KLERKQLKDGDALKQALREELHGILAPSTAPLNIDDGHKPYVILMVGVNGVGKTTTIGKL +ARRFQDQGKRVMLAAGDTFRAAAVEQLQVWGERNNVPVIAQQTGSDSASVIYDAVQSAQA +RGFDVVIADTAGRLQNKENLMSELEKVVRVMKKLDPEAPHEVMLVLDAGTGQNALSQAQI +FQQAVGVSGITLTKLDGTAKGGIIFAIARQLKLPIRYIGVGEQVGDLRPFQAEEFVEALF +DEPA* + +>12FR_NODE_3#PROKKA_00052 +MIRFEHVTKRYEGGHVALRDVSFALERGEMAFLTGHSGAGKSTLLKLIMLMERASEGQVV +IGGQVLDKLPRRRIPYIRRHIGVVFQNHQLLFDRTVYDNVALPLEVMGIAPREVGRRVRA +ALDKVGLLSKERMNPMELSGGEQQRVGIARAVVNKPPLLLADEPTGNLDPELSASIMHLF +EAFNQVGVTVLVASHDISLIRHLGHRVITLDGGRLAQGDRMPDEEALYG* + +>12FR_NODE_3#PROKKA_00053 +VDESREKSGRRGAGVAENPTRELLDAYASHHRKIARDSLIRLLRNPIGSLMTWLVMGVAL +ALPLGLMLLLASAQSLGEGWSDSSRINLYLKQNVDETAAMNLQGKLRSRGDVRDVQLVTR +KQALAQLRKDSGLSAAFDYLNDNPLPNTLIVAPALQDPGAVQSLSQSLKQLPQVAEVQVD +LAWLKRLRAMIGLVVNAVWALGVLLALAVLLVVGNTIRLAIENRRDEIVVAKLVGGTDAF +VRRPFLYTGAWYGLGGSIVAIILVALFEAWLDGPVNRLASLYGSHFQLQGAGFGDFLLVI +MVGVLLGWMGSWLAVKRHLDAIEPR* + +>12FR_NODE_3#PROKKA_00054 +MGTSLQVMDKLIPGANVQAYIQGVNAIPMLTVEEERELAARLQQDNDLEAARRLVLSHLR +FVVHIARSYSGYGLAQADLIQEGNVGLMKAVKRFNPDYGVRLVSFAVHWIKAEIHEFILR +NWRIVKVATTKAQRKLFFNLRSAKKRLAWLNNDEVTAVAADLGVEPRVVREMEGRLAAQD +TAFDAPTDDDDDNAWQAPAYYLEDRRYDPAQQLEAADWTEDSNSRLLEAMDSLDERSQDI +LRERWLSESKSTLHELADKYGVSAERIRQLEKNAMKKIRKMMGEESIA* + +>12FR_NODE_3#PROKKA_00055 +MTDLPRIAFLGIGLMGRPMATNLINAGYPVTVWNRSPEKARALAGQAGVAESAAQAVAQA +DRIITMLENGDAVQQVLVEQGVAEAIQPGAVFLDMSSIAPEMAKSHAGRLKARGVGYIDA +PVSGGTVGAEQATLSIMAGGSHEDLEAVRPLLETLGRVTHIGPAGSGQLAKLANQAIVGI +TIGAVSEALLLAAKGGADPEAVREALLGGFAGSRILELHGQRMLARDFEPGAPSRIQLKD +MRMILDQARAEDLTLPLAQQAFQSYRALIALGEGECDHSALLLQLEHLNQTRMSDPSDGQ +ER* + +>12FR_NODE_3#PROKKA_00056 +MPRLAANLSLLFNEVPFLERFEQAARAGFRAVECQFPYAWAPEAIAAQLQGQGLQQVLFN +LPAGDWDGGERGIACLPGREADFREGVERALRYAEIMKCRQINCLAGPLPTGAPPEPYWA +TFEANLRWAAPRLAEQDITLLIEAINSKVDVPGFLLDHSKLALDLIDRLNLPNLKLQYDL +YHMQIMEGDLLRTLGANLPQIGHIQFADNPGRHEPGTGEINFRRIFEQLDAWGYEGWVAA +EYVPEVGTFDGLSCLKSWL* + +>12FR_NODE_3#PROKKA_00057 +MFGITATEAQSEYEPTEEPQGETKLRVLVIEDDQDVAAYLIKGLKESDYVVDHAADGKTG +LLMAAGEDYDMMIVDRMLPGMDGLNIIKTVRATGNTTPVLILSALGDVDDRVEGLRGGGD +DYLTKPFSFTELLARMEVLVRRTRSSNEPETVLKVADLEMDLLARTVKRAGQSIDVQPRE +FRLLEYLMRHAGQVVTRTMLLEKVWDYHFDPQTNVIDVHISRLRAKIDKGFEKPLLQTVR +GAGYMLREDT* + +>12FR_NODE_3#PROKKA_00058 +VKIHSQLRNSTFQIALLYMVVFATSVFLLLAFIYWRTAGFMTAQTDETIEAEITGLAEQY +RSRGINGLIAIVRERVARDPNGKSLYLFTTSDYDKLAGNLSAWPQNVQASNGWINFTLND +SVGWRGEPHLARARVFKVQGGLRLLVGRDVQELTTLKHLIERAIDWGMGITLALSLFGGF +MISRSTAKRIEVINQTARKIMNGHLSLRIPARGTGDDFDQLADNLNQMLDRIVHLMEGIR +HVSDSIAHDLRTPLTRLRTQLENTLLTVEGDAARDQVARAVAETDQLLATFNALLRIARL +EMTGHSADKSPVQLGPLVHDACELYEALAEDKEQEFVLDIPQDVTIEGDRDLIFQVVSNL +IDNAIKYTPPEGNIRVLVTQEEDDAIFQVEDSGIGVPDSEKDKVFERFYRVAKSRSQPGN +GLGLSLVSAVVDMHQGRIELADRYTDGRENPGLKVTLRFPRLKPNRRKEIKPTSTTEPEG +SAS* + +>12FR_NODE_3#PROKKA_00059 +MNTQKLINRALSGLDDLGYQLDSMGITSKVNRHNVIAYVMAEQKHWEGEYDSLVARIDQQ +RFRVEQIVGRVEGLVRGGAEFALKPVSGLRSLVKA* + +>12FR_NODE_3#PROKKA_00060 +VQWMWSYWDVLVDLYTEREGASDLVLTRRMDEVEGKPQFTVGLVYVPKPQPNHAFQFVPW +LTATPPDVLKRAAEFRGRL* + +>12FR_NODE_3#PROKKA_00061 +MLSRAIMAPMDLVLDTAEAQRMALFFDHILIWKLSRRTFNKEDNQRYSSELRYLRERGVA +LLCGLDIPNLISFGRADGTTWNPMEEMKKDCDLLLPFQVGTGVPDQAENEAHADRLIRHL +SSRLMYNDKPVVAHAEAVNLNTQGNELNALEITINNIPMPPENIPWEDLIQFRNEEETVA +KLRALRIWLKDRSSAGQSPREIQEELEHLLYEYRKYMEIQHKKFRQGILSTLISSTPEIV +ASVATLNFGAAIKSVFDIKGRYLGLSEAELSAPGREVSYIAKARDFLTS* + +>12FR_NODE_3#PROKKA_00062 +MQTLNIPSLATVTAITLLTTACANNPTPETLQSRGLHPLDTTQLHQLYSKTLQFDWRNAR +SRSGSGEYQPNGEISIEWSGESFNGKWRILNNHFCATYASIHNGQEQCYMVYQTGARRYV +AFLNGDYSYSFNVKKVK* + +>12FR_NODE_3#PROKKA_00063 +MSHTSSDHSIAFPLRQKSKLRHLAQELLAKADIRIDGDRPWDMRILKEGVLERILGEGSL +GLGESYMDGEWDAERVDEFVYHLIRAQLDREVRPWNLILHGLRYRLFNMQSLRRAWMIGQ +RHYDLGNDLYEAMLDPLMTYSCGYWKTATHLAAAQEAKLELICRKLQLKPGMRLLDIGCG +WGSLMAYAAQHYGVECVGVTVSEEQVKWARHQYKGLPVEFRLQDYRTLDEQFDCIASVGM +FEHVGHKNYREFMQVAHRCLDDGGLFLLHSIGNNSRDSGSDPWIDKYIFPNGELPSVGQI +GDAADDLFVIEDLHNFGADYDKTLMAWHANFEAAWPKLAYLGERFRRMWTYYLLSCAGTF +RARDIQLWQWVLSKRGVQSGYIRPYF* + +>12FR_NODE_3#PROKKA_00064 +MNKGTVLRVARPTDQLEKIAQMYMEGLGFERLGEFREHDGFDGVMLGLRSHAYHLEFTQC +QHEKAGRAPTQDHLLAFYIPDAVEWVRTCEAMVKAGFVCKPSFNPYWDRLGKTFEDVDGY +RVVIQKEQWLD* + +>12FR_NODE_3#PROKKA_00065 +MKDIQLESTIICPACGHQKTEQMPTDACQYFYECESCHTLLKPRAGDCCVFCSFGTNPCP +PVQQGDDCCASD* + +>12FR_NODE_3#PROKKA_00066 +MSSPVRTATFIGAISVVLWGTLALLTKLTGGRIPPFQLMSMTFGIAFLLMAVRWWSRGES +GLGYIRQPFPAWLLGVGGLFGYHLAYFKAMTLAPAVDVSLIAYLWPLFIVLLSALLPGHS +LRAQHLVGAVLALAGCWLLVGRNSQGFDWTYADGYLVAFGCSLIWSSYSVLSRLVRSVPT +DAVGWFCGVTALLALGCHLLWETTVWPVGTLQWLGVIGLGLGPVGIAFFTWDHGVKYGNL +PLLGTLAYSAPLISVVLLLLAGFGQASGMLFLASALIVAGSFVAGRAKHASPELAEEPVP +E* + +>12FR_NODE_3#PROKKA_00067 +MTMITPSYLGETIEYSSLHACRSTLEDPTRKQPRNTSTAKAYLDEHFQHKLTLETLAGVA +HLSVRQLNELFRRQIGMTPHHYLTEVRMQQAWQLLEGTDLSVQAVAERVGYSSLAAFSDR +FHQHFGHPPSHFRRTGKTLRQNR* + +>12FR_NODE_3#PROKKA_00068 +MEAITNGMMNLNRQRHQHLVALRIIFAHGENGGEEVVHIGHVAIKTGDTFPGTGRVDLLC +LTHCLTSTD* + +>14FR_NODE_9#PROKKA_00069 +VKAIDGLEELQLTTNGHTVHFDNYYDQARKKEVTKYLVPADVELDKNLNQMEREAGVKEM +DDLQRGSYKGREMFVRFFCLGTTGSSFSIPCLQITDSAYVVHSEELLYRRGYEEFKRQNA +ADPNFEFFKYLHATGEVTERMTSKNVELNRVYMDYTRNCVRSVNTQYAGNTVGLKKLSLR +LAIRKADKEGWLAEHMFIMRCNGPNGRKTYLAGAYPSACGKTSTAMIPGENIVGDDLAYF +KVIDGEFRAVNVESGIFGIITDVNSKDDPVIWDVLHTPGELIFGNILVKDGKPYWQGMGE +DIPATGMNYCSTEWTEGMEGPDGKVASCSHKNARYTIRINDLANKDPEWDKPEGMPMGGI +IYGGRDSDTNAPVREAYSWEHGVCTMGAMLESETTAATIGAEGVRKWNVMSNMDFLSMSV +GRYIQNNLDFAKDIERPKVFGTNYFLKKDGSYTNGKLDKSVWVKWMELRIHGEADAIDAG +YGLIPKYEDLAKLFKQVLKEDYAKEDYAFQFRVNIPALIAKLDRMEEIYSTKVTDTPEIM +KAEMKAQRERLEAIKAAKGEIVSPFDLD* + +>14FR_NODE_9#PROKKA_00070 +MTQGLGPGLVSPAPFGAEEKPRGGLGEMGAFYAKGVTYWRSVTTP* + +>14FR_NODE_9#PROKKA_00071 +VKPASVLLATPDGEGFYRFSDPVRVVTAGSLEEVLPTVTAVEAAVAQEGVFATGFVSYEA +GPAFDRALAAYPPGEFPLVWFGLYRNREVVPKTEMQDVPPLAWRPCLDQDEYVAAIRRVR +EYIEAGDTYQVNYTFRLHAPFAGDPEALFARLASAQACRYAAYVDTGRYVVCSASPEMFY +TQNADVFRSRPMKGTRPRGMTLAEDRAHREELLESEKDRAENVMIVDMVRNDLGHIAEAG +TVHVPELFSAEPYPTVWQMTSLVEARSRAGFGQTLKALFPPASITGAPKPRTTEIIRELE +TTPRHIYTGTIGYLGPEDARFNVAIRTVLIDRQTQQAEYGVGGGIVWDSDPLAEWEECMT +KTRVLRTVRPEFSLLESLLWTPDEGYALLDRHLARLCDTAEYFGYPVDVVSVRQKLEELA +GNLEPVPNKTRLLVDRHGEITVEGSPLGPAPDALVWRVCVHPERVDSHDPFLYHKTTHRA +VYTQAAAAHPDCDDVILQNERGEITESCRANVVVEMPEGRFTPPVSCGLLAGTQRAELLA +RGEITEKVLTPEDLYAATKVFLINSVHGWVVAELSDSSD* + +>14FR_NODE_9#PROKKA_00072 +MTAFPRTDVGGVSVSRLIIGTNWFLGYTHSTSAQSRTNSERVNHRDVVAGIVETFVEFGV +DSIMCPHTDTVIPEAIEEARQRTGKPLVVISTFALPVTKRTALDGFDLGEVERILDEQVA +RDVDIAMPHQSVTDIMLDKCSREIRQMAPVCALIRDRNMVPGLSTHAPETVIYSDETGLD +VESYIQPFNLMGFLMQVEVDWIARIIQNAKKPVMTIKSMAAGQVRPFQALTFSWNVIRPQ +DMVTVGTSSKHEARELCEMSLQILDRRATTQELQRTRSKASISPA* + +>14FR_NODE_9#PROKKA_00073 +MRWQRAMLFVAVLVACGGCSWRGKPRANHVRMYCLVDLEPVAEKLIQGFTESQGVRVDVR +YIRPDELNKRLQRDEYVGLFLYANTWSGSAEENLLRARGNGGGRPAELGRFTPCLIVPKG +NPKGIRSAFDLGKPGMVHGRTRQGACLLARISESGYRKPPKGPDTEPSNIRVRDTDYDVA +RLVAGGSIDGAVVWSFTQSAMADEVEEVESKGLKRYGNVQHMVVGTPMSTPQPGLVKAFI +EFATGPDGQAILRKTGLDVDPKCR* + +>14FR_NODE_9#PROKKA_00074 +MRIATFLLPLLVAIAGAAPKVETGTDGLRVRTARYVATFSTDSGLLASLALVDGTPLLTS +SRLYADVLPDGRKNFSAKAKAAPKAKPQPDGSLLVEVAGALLDKDGKPHPTYPFTYTASY +RFDDTAQVRVSVSVIPGFDSDAVFGFLGQVLSTASQREFFVNTADGLISEMAATHSGRTY +QSESEPLDLKDPYLGVLLKTGQILQFRLVSGAESLLNVFFHDSGAGPTHLFLCPLSGSNP +RQAKTGKAWQQELVIEAMPLAEWTKSR* + +>14FR_NODE_9#PROKKA_00075 +MKPHRLAIFLGTFPMLFSLLLAGEGTMKEAGKSLRFGNGRLILTFDRDTGIWTGLEASGG +AVCFRRTADSPSLNVQVDGKPVFGADRKMSLREQKVVQLPTASRLELTMGQGDWAVTAAY +TLWDSGTLQRQATFVYSGPKPEGDHEVRNALFVLPDVGLAGTDAFWFATAEYPPRDHPFR +HTDSGRRFGFPFSESTFHGFLARDPQAKLSLVSAYYTEDERAKLLVQEGKGTATAFHTHL +LAETLRPGLRFEVGSQLLRVVPGTRQDALRALQGFYDLPGLRTKIGMPPDTGRQIFYSAH +PGGTIDSSFRDVGGFANFTKLLPSIRDLGVNTLWLMPFWYGPVYAPYDYYRLDPKRCGTP +AELKALTDKAHALGMRVLGDLIPHGPREEPGAKPSFAEQHGDLVCRDKDGKMIQWWGCHY +CDYANPGWQDYMAKHAAYWVRECGLDGYRVDVAAGGAPNWRPYDHNRPSFSGLHGGLALL +RKARAACLKENPNTIFLAESTGPTMYSAVEHGYHWAFSTLLEDHVLKDAPADFVQAMSGY +LENQTYAFPADAFPIRFLTNHDKLRARYRYGPNLHRTLLALCAFMKGAPLLYEEEEMGNE +DFIAKLYRIRQTYDELSVGTVSYRSIPVEPKHVFCIEREYKGKRSVVLINFSNQMSEVKL +SLPKSDLKNPGIYEAVSGQRVDYAQDLTQSLDPYAYAVLVIRQRDELPPSVPKERGESPA +APDGRAMDIKITQEDSLTRVSTPLYSAVIDSARGGLLQEVRGADGKLLVNGVELKEGRRK +LFVGHDSVDFADCSVPLRILARDRQFPDGGKVSLLRGRAELRDGDGHAWMDLTVLYSLRA +KSLSLNVSLTPQYRLSPSKSDLGMKIHFVPTTHWFAETAEGNLLGHVIRRHPASHGFSGR +YWHGAGEAFFNGSLYPVVGEFGVLDTNRRIALGSMALRLDGAPLPVRLLEDEPPGSPVVL +GGPAAATADIPLLRGSQRAVWQQGKAKGMVVTLDFRSVPARFTQYPDSFGVRGWDTGTPK +LCYRGGWCTFGPEYLFRGYGMRATVVRSHGGELTALTDAAGNGLRVTDARFYTDQGLFGD +WRDPRGVLRKMSASNVNDPEPDTQLLHLFEGPQDSAPLRFRSFFRHPHAGGRSLLNPRVE +YEISYTPPTEKGKGLRIDCGVRPHLVKIGTGGFLAYKISLGGCDQWQVDGGEWQPLPAKG +GRLWENKEAGHLPKTLLLRNSKTGLWTRFSDFVGGPDQVENVFLHAGQGQVHLFVAFYDA +EPTDVRPVWRRAAFTMQAGGKQ* + +>14FR_NODE_9#PROKKA_00076 +MNIYVGNLPYSVNDDELRGVFEEYGSVDSARVIMDRDSGRSKGFGFVEMGNDTEANAAIE +ALNGQDFSGRPLTVNEARPRADRGPRRGGGGGGFDRRPRY* + +>14FR_NODE_9#PROKKA_00077 +MNIYVGNLPYSVSDDDLRTAFEEYGAVDSARVIMDRDSGRSKGFGFVEMGNDNEAQAAIE +GLNGQDLGGRPLTVNEARPRADRGPRRGGGGGGFDRRPRY* + +>14FR_NODE_9#PROKKA_00078 +MNIYVGNLPYSAGDDALRTAFEEYGSVDSARVIMDRDSGRSKGFGFVEMGNDDEAKAAIE +GLNGQDLDGRALTVNEARPRSDRGPRRGGGGFDRGPRY* + +>14FR_NODE_9#PROKKA_00079 +MAQLQTRTTQVIVQNQLGLHARPVTLIVKLAKTFSSRIAFERGGTVSDAKSVMALLLLAA +GKGTELTITAEGHDAEEAIEALERLFSDKFGEE* + +>14FR_NODE_9#PROKKA_00080 +LSDSSRTSSARNEPGETVLAGTGVSPGTVIGKAVVVGASVVSVREHELPVSQLEDEVGRF +RAALEKSRRELEELRDRARDEKNQDLVDILEMQVMVIEDGMLDQEVSDRIRDTRRNSGFV +LKNYVDEFCDQLVKAGSAFFAERTNDIQDLAGRILRQLLGSESVDLSELPEPCIIIAHDL +SPSDTAGMDRDNVLAFVTAMGSRTSHTAIMARALGIPAVVGLGESLAQVGDGVRLVVDGT +QGRVVVSPENATLVKYHERIEQEKAWRAKLEVNALLPAETRDGFHVSVAANVELPEEVER +IRRVHRVGIGLFRTEFLFVKGGSISDEEQQYAAYRRVAEDVAPHSVVFRTLDIGGDKFLS +HLDVPVEINPFLGMRAIRFCLRREDVFRSQLRAILRASAHGGVRILFPMITTMEELHATL +AILDDVKAELERQGIPHNPDLDVGIMIEVPAAALIADKLAPHVDFFSIGTNDLVQYMMAV +DRSNPDISYLYQPGHPSVVRLLDRVVRAANEHGRWVGICGEMAAEPLFVPLVLGLGIHEL +SMSPVAIPIVKDLVRDINMLEAEELVDQAMACGSAEEVTQLCRSFVERIAPELFLD* + +>14FR_NODE_9#PROKKA_00081 +MSISIASQPGLTWHPAYCRPRTEKVVDDYCKRHDIPCYLPLLRQRKRYQRRTVETYLPMF +PGYVFVQLGPDTRTTFLECHRIVHIVEVREAQERTLVAELTELQHLETAQATVDLEVMPD +IKPGTQVTITDGPLAGITGVVEKRKGKTRVTVNVELVGRSVVAEMDLGELELDGDA* + +>14FR_NODE_9#PROKKA_00083 +VYFRQYKVEGLGCYSYLIGCPAAGTACVVDPERHTGQYIQTAEHQGLRITHVFDTHLHAD +HITGSAELAAATGATICVHPAIGAEYEHEDLLDGQHYRFGAAELEVVETFGHTPNSVSLA +LTDHGRSEDVFALLTGDLLFVGDVGRPDLAGADLLEEQIHNLYESLYTKLGRFPDWTEVY +PAHGEGSLCGKGMSAKPMTTLGFERLNNPLLADLEFAEFHRIMTEAFQVRPDNFAAIVAK +NQRGPQLLREAPAFMELSVLQAERALAAGAQIVDTRAQSAFGAAFLPGSLNIGVSPSSVN +WLGMLVPADTDIIIVADSKDVACQVADQFRRAGYDRLIGYVPDGVASWALQGKPMDHLPQ +LTPASLKHVVGRYGNHVILDVRTDAEWATGHIEGAIHLPLPRLVREGIDLGKDRHITTVC +RSGYRSNVAGSFLKSQGYEHVFSLIGGMTAWQAANR* + +>14FR_NODE_9#PROKKA_00084 +MERSSLALPCSGGSCLHAAVRAQMTMQRTAFSLSIALSAVLASALLVGIVPLLRGGDNYE +LAVKSALLVLVVGYAWLHWRAVTRRGRCLLKAAFCLNALVLVPLGATAALCHVFGGPKIV +PREAGAIGGAIAILAAAAAAMQVILLTRCRTIELTVPKGD* + +>14FR_NODE_9#PROKKA_00085 +MAQPYEGLVGTSGRVQVNRNGRPAFSISTGAFADGWRGASLTPAKVGETTDGVCLGKISL +PDKLTIASALRATAAGKAMELRYTLTPKADAKLNSLHVSFGLPASFLKGASYTIEGETKE +VPAVLGATHLRAGDHVPSVRFTWPNGDWLQVDILSKTPVLFQDNRQWGDSFDLRLGPQMV +PAQTLPANQPVEIAMRVSAKDGMKLDFDRPVTITAGKDWVPLDLELDIEPGSALDFSGLG +QFDAPSGKHGWLQATPDGKFAFADSLDTPRRFYGVNLCFTAQYLSHDEAERLAERFLRLG +YNTVRFHHHEYPLIDRKNGCSTDLKPESIDQLDYLFAQFKKRGIYVTTDCYVSRPVYASE +IWDGAKGNVEMNEFKMLVPVNERAFENWKTYNRNFLTHRNPYTGMRYADDPTLAWLSMIN +EANFGNYIRSVSDRARPDWERAWGAWLKARYGSAEAITKAWGSTFDGDLSKPTAKLAKSF +TDDNRQSRDFAVFLADTERTMFLKMKKFLREEIGTKAMLTNMNGWTNTPQSQLARAEFDY +VDDHFYVDHPQFIEKSWRLPSRCPNTSPVLAGAPGGRGTAFNRLMNKPFTISEYNYSGPG +RYRGVGGILTGCMAALQDWSVVWRFAYSHRRENVLKPSTAGYFDMATDPLNQAAERASMC +LFLRGDLDPAPRSAAITLNPETLEKGDSHQGRTPPSWDELVPVIQVGTFLGDRQSKVPAD +IALPTTDAAPAAADVVMPKPYDSGKGSAILKELRAKGWLDAANKTDLDRKRGQSASDQFL +MDGEKDMMVLDTPRTAGGYAEAGQTIHTQAADFSILDTGATVWISSLDKQPITSSKRLLL +THLTDLQNTEVRYAERGRKTLLAWGKLPHLVRVGEAKISLHRSGAKLPKVYVLATSGHRL +GEVPVTKGKNGTLELAISTKGEAGAQLMYELDFR* + +>14FR_NODE_9#PROKKA_00086 +VDHALLDRHFPRYTEYDPQVPVWDLTPQCPGAFHRFFDTSPLSPSGRYLAVTRYQPERLP +EPGEAAEVVLVDLHTGKSQVIWESRGWDTQLGAQVQWGATDEQLFFNDMDPADWQPFGVC +YNPLNGTSCRLAGTVYMVSPDGKQAVSPCS* + +>15FR_NODE_1#PROKKA_00087 +MFLAGALIVFRIGIHVPVPGVDPTAYAHLFNQNAGGILGIFNVFSGGALEQMSIFALGVM +PYISASIIIQMLTSVVPSLEALKKEGQAGQRKITRYTRYSTVALALFQSLGAAFALQSQG +VALTAGPGFIVTATVSLVTGTMFLMWLGEQVTERGLGNGISMIIFAGIVAGLPSAIASTL +ELVRNGELSSIVAILIFVGVLLITAFVVFVESGQRRITVNYAKRQQGRRMYAAQTSHLPL +KLNMSGVIPPIFASSIILFPVTLAGWLGQSSGFGWLNTLQLWLSPGQPLYVALFAVLIIF +FTFFYTALTFNSDETADNLKKSGAFIPGIRPGKQTAGYIDTVLTKLTLWGALYLTAVCLL +PEFLIAYAHVSFNFGGTSLLIVVVVAMDFMGQLQAHMMTHQYEGLLKRARMRGLQR* + +>15FR_NODE_1#PROKKA_00088 +MKVRASVKKICRNCKIIRRRGAVRVICSDPRHKQRQG* + +>15FR_NODE_1#PROKKA_00089 +MARIAGINIPPHKHTVIGLTAIYGIGRTRAAEICATAGVDPTRKVKDLSESELEAIRQAL +TAYKLEGDLRRELNMNLKRLMDLGTFRGIRHRRGLPVRGQRTRTNARTRKGRVRRSAKR* + +>15FR_NODE_1#PROKKA_00090 +MAKPAAKARKRIKQQVVDGIVHVHASFNNTIITITDRKGNTLSWATAGGSGFRGSRKSTP +FAAQVAAERAGAVAQEYGVKNLEVEIKGPGPGRESAVRALNNLGFRVLSISDVTPIPHNG +CRPPKKRRV* + +>15FR_NODE_1#PROKKA_00091 +MARYIGPKCKLARREGTDLFLKSPIKALDQKCKIDRIPGQHGQATRRGRMSDYGLQLREK +QKLRRMYGVLERQFRRYYKEAARRKGATGALLLQLLESRLDNVIYRMGFASTRAEARQLV +SHKGVTVNGQLVNIPSFEVKGGDEVALTERARKQNRVEMALEISRQIERPAWVEVDEKAC +KGTFKAMPEREELLPDINENLVVELYSK* + +>15FR_NODE_1#PROKKA_00092 +MEISVSEFLKPRIAGLTELGENRTRIVLEPLERGFGYTLGNSLRRVLLSSMPGAAVVEAE +IDGVLHEYTAIDGVQEDVVEILLNLKLLAIRMHAREEATLTLNATGAGVVTAGDIQVDHD +VEIVNKDLVIAHLAKNGKLSVRLKVMRGRGYMPVVKRYADESQGRKIGKLKLDATFTPIR +RVAYYVEAARVEQRTDLDKLILDIETNGTIGAEEALRRAAGILTDQLSVFADLSSVSSHT +PTESRSVKPILLKPVEELELTVRSSNALKAERIRFVGDLVQKSEDELLKTPNLGRKSLTE +IKDVLARHELALGMKLEDWPPAALAERRAS* + +>15FR_NODE_1#PROKKA_00093 +MRHRNSGRALSRTSSHRAALMRNMSKSLIEHEQIRTTVPKAKELRRVVEPLITLAKSDSV +ANRRLAFSRLRDDAIVAKLFTDLGPRYRERPGGYLRILKAGFRPGDNAPVAIVQLVEEQE +TTSAAT* + +>15FR_NODE_1#PROKKA_00094 +MRKVRVAIVGAGTAGLTALAQVRRRTDEFVIVNDGPYGTTCARVGCMPSKALIHIANDFH +RRRRFAEVGIAEGETLRIDLSKALAWVRAYRDSRTADSIKLTDPLGERNIPGRAELLSAH +ELHIRRADGGEERIAADAVILAPGSTPVIPKSWDGFSARILTTDTLFEQRDLPRRMAVLG +LGAIGLEMGQALARLGLQVHGFELRDRLGALTDPQLIAPAIEHFSREFDLHLGAPAELHP +TGEYWRVETADAQVEVDAVLAAFGRRPRLDGLGLERLGVPLDAKGLPPVDPHTQRVADLP +IFLAGDANARSPIMHEASDDGYIAAVNALDGPTPLNRRVPLVMAFTDPEMAIVGASFESL +PAGSFDAAGYDFSRQGRAIAMRHAEGRLRVYAERNSGRLLGAEIFAPEGEHLAHLLALAL +DRGLNVAELLRMPIYHPVLEEGLRSALRALARRVYDQPPQEFRRLPEGCPGMSSSSP* + +>15FR_NODE_1#PROKKA_00095 +MNPYERYLLPWLIDAVCALPAAARERAKIVPRARGEVLEIGIGTGHNLPYYAPRRVAGVT +GIDPGVLRRRIMRRAHAAGIEVKLLSLSAESIPAEDASFDTLVSTFTLCSIPDVERALAE +MRRVLKPTGRLLYLEHGTAPDPRVRRWQDRLTPWWKPLAGGCHLNRDIPRLITGAGFDIV +EQHSEYIRGPRILSYVFRGEAQPIAVAGSK* + +>15FR_NODE_1#PROKKA_00096 +MPKFILKDLSDYQQYRDNRELISSFFENLPNKTLQLIAKPLCGSSAAKLIALVAFAVL* + +>15FR_NODE_1#PROKKA_00097 +MLVVIYALTTILTAFIGNNAAAVLVFPLAYAAATKLGQPFLPYAIAIAMAASASFTTPIA +YQTNLMVYGPGGYRFSDFVRFGLPLNLIVGVISVVVIAWLWMP* + +>15FR_NODE_1#PROKKA_00098 +METKTLSGNATPLTDEQWAGLARLGDLGNRLGALVDGPLSGPASAALDRIGALDGQYDLT +ALAEKLVGTLSALDRAGLLDLLRDNAQFIADNLNTLTPMLDQWLAHIAELPADEFKADAK +FALALLRKARLVTTFIQEKLAGELTTKAVEVTEFMQRNDTDEAVAEALVQLGRIYRSGLL +ARLGDLADTVAGLEEGTDLDDQIDVLIKSSTAGGLGTFLIFLKSVSIAMQKVGQEPEPKL +GGYIGMLHLLRDKEVQKGLRMLTVLPIYLEKRLEKSAS* + +>15FR_NODE_1#PROKKA_00099 +MNAKPHVLVLGGNFAGLGSAQKIREFAGDAVDITVIDRKNYLLFVPNIPADVFENKDPAV +GQRLDLPPVLVKDDIYFVQGEVTELDVDNRIVHYTPSERPGAAPQKIAYDYLVVALGNRL +AFDKIEGFDEFGDSVSDIYLGNKLRKKLWEGGYKGGPIAVGSAMFHQGDGAKGLEPYPGG +SIPDALAACEGPPVEVMLSAATYLKKTGQGGPEKITVFTPAELIAEDAGEKVVGQLLDIA +SGMGFNYVNNAKDITRVTAEGVELANGQTIEAELKILFPDWVAHDFMRGLPISDSEGFVI +TDLLMKNPKYPEVFAAGDAAAVTMPKLGAIGHQECDIVGRQIACAVGRMNEAAANTPLQP +VVYCIGDMGDNQAFYIRSNSWFGGDTQVLKMGHTPFLLKMQYKNLFFRTQGKMPDWGLDF +SELMAEKIAS* + +>15FR_NODE_1#PROKKA_00100 +VSWEAILTLVVLGAVVLGLAWPRMPPDLPLVGGLAILAVTGCAPIDKVFSGFSNPGLIAI +AALYIVAAGLRHTGAVTAPARWLFGRSRRLWVAQLRIMLPTAVVSAFINNTPVVAALLPA +VLDWGKRHRFAASRLAMPLSFAAILGGTCTLIGTSTTIIVNGLLTSTTHGPGMGFFTIGA +VGLPVAIAGFIYILLFGRRLLPDRQGAMGEFTNPREYTVEMRVAAGSPLAGQTLEAAGLR +HLPGLYLVEIERGGNLIPAPGPEELLEENDQLVFAGIVESVADLQKMRGLIPTTGQIFKL +DTPRPDRRLIEAVIAPENPMVGRTVREGRFRSRYGAVVIAVARAGHRVTGKIGAITLIAG +DTLLIEAPSEFQRRYRHSREFLLLRPLEESVQPHYERAWIAWLILAAVIGLVTARIVPLA +PAAIFAAVAMVVTRCINLAAARRAIELQVILVIGAAFGIAAALVHTGAAALIAQPLLALA +EGSPLGMLVVVYALTTILTAFIGNNAAAVLVFPLAYAAATKLGQPFLPYAIAIAMAASAS +FTTPIAYQTNLMVYGPGGYRFSDFVRFGLPLNLIVGVISVVVIAWLWMP* + +>15FR_NODE_1#PROKKA_00101 +MDTIRIRGARTHNLKNINVELPRGSLTVITGLSGSGKSSLAFDTLYAEGQRRYVESLSAY +ARQFLALMEKPDVDSIEGLSPAIAIEQKASSHNPRSTIGTVTEIHDHLRLLFARAGTPRC +PHHGLTLDAQTVSQMVDTVLADPSERRVMLLAPVVHGRKGQYQELLEDLKSRGFIRARID +GTVYELDPLPRLDGHQPHDIEIVVDRFRIRSDMAARLAESFETALALADGSALIADIDKP +HTSEMVFSARHACPQCGWSIPELEPRLFSFNNPAGACPQCAGLGRESYFDPGRIITQPSL +SLAGGAIRGWDRRNPYYYRLIESLARHYEFDTEAPWSELSERTHRVLLYGSGEEEIDFTY +VSARGQKQQRRHTFEGVLNILERRYHETGSQAVRDELVRYQSSRACTACNGTRLGEIARN +VFIADTTLPDISNLAIDAVWRFFNDLDLPGRRGEIAKRIQHELHSRLGFLVDVGLGYLTL +ARSTETLSGGEAQRIRLASQIGSGLTGVMYVLDEPSIGLHQRDNRRLIDTLTRLRDLDNS +VIVVEHDEDAIRSADYLIDMGPGAGAHGGEVVATGTPEEVMNNPNSLTADYLSGRRTIPV +PQARRHPQPGQAIIIRDAHGNNLKHIEVSIPLGLFTCVTGVSGSGKSTLVLDTLQAAGER +LLNRARTEPAPHASIEGLDALDKVIAIDQSPIGRTPRSNPATYTGVFTAIRELFAQTPEA +RARGYKPGRFSFNVRGGRCEACQGDGLVKVEMHFLPDIYVPCDLCHGSRYNRETLQIHYK +GRTIEEVLNMTTEQAREFFANIPSIHHRLDTLVTVGLGYVKLGQSATTLSGGEAQRIKLA +RELSRRDTGRVLYILDEPTTGLHFHDIRQLLAVLLRLRSHGNTVIVIEHNLDVIKTADWI +IDLGPEGGHGGGQIIGEGSPEDIARLEHSHTGHYLMPLLKPHKLQKETTT* + +>15FR_NODE_1#PROKKA_00102 +MAGSGKLRKDAGIIGLLYFSLGGIIGSGWLFGPFDAAKAAGPWSIASWIIGAAVVMLLAL +VFAELATMMPKSGALIHISHIGHGELIGRIWSWILFLSSVVTPPIEVMAVLTYLNNKIPY +FVDPSTHVLSTIGFFAAIVLLGVVVVVNFFAIRFVLWINNIATWWKMFIPAISIIVLMSY +SFHPGNFHLDLGSVNAAGMLTAVSTAGIVFSFLGFRLAINLGGETKNPGKYIPIAVIGSV +LIATLIYVGLEVTTITSVRPSDFANGWPSLAFKGDAGPFAALAVTIGAVWWSWVLYADAI +VSPFGTGLIYTTNTSRLGYAMAEVGSAPKKMQKLSRQGVPWISLLVTYVIACIFFFPFPS +WHQLVGYVSDITVLSYGIGPVVLLIMRKRRPEEPRPFRLKGAKVIAPMAFIASNWVIFWT +GCTTVTFLFGLLGSLFAVYAIWYYIIARKPSKEFGWKYAWWVFPYFIGMWLLSYIGPSTL +GPAHVSLFNVQPLDILPLGWDMIAVAVFSLAVLYTATSSALPREEADRYFDELKKLNLPE +EYSEGTESP* + +>15FR_NODE_1#PROKKA_00103 +VQATDWLHQIAGPALVLNFADGTVIDINAAGRRLVGIEGQGLIGQDFCGFFVSSDADCCW +PTLQRSINLQGGFRYEGLHLRTPGGAMRRVNVSAELLQSEQERAVLMLLQPGTASSPQAT +DHEKELAQYATVGLYRLDAEGRLTHANHALARLLGYETVGQLLDSAAVQRSQWYVCDGVS +EERVSDVNDAAIYRCKVQLRRAHGAAFRAVEAIREIRDVRGQLMSRIGTLREISDQSSSE +QALAISEDKYRSLVEHSQDGVFVIRDGVYVFVSQVYSSMLDYAPEEMVGESFLRFFAPED +RQKIVDVWHERQAGHWEQGAYEAHLLKKDGTRVLVSVRAGPIRFAGAMASTGTVRDITAY +RDTQQQLSMAEQRYRDIFEHAVIGIYQTAPDGRLLAANPAMAQILGYDSVEELQEQVDDV +GELFFDRIERDTLIEKLEAEGRMYGAELRLRHRKGTQLWVQDSARVVYDANGKLVCYEGM +VADITARKIVEQALHRSEQLFRTLVEHTHVGVIMVREGVVTYANRALAHMLDYAESDLLE +QPLASLFAPESRDCVERLEQELKTAVGSNIYESSMLAADGTRRVRANLSVASVIFEDNPV +MIITAHDLTREKRAEARLRRLATHDPLTNLPNRVVLRERLAQVLKKTRETGNVDWAVLFL +DLDAFKLVNDSLGHAAGDELLRQVAVRLRRAVRHDDLVCHHGGDEFVVLAFNITHEIDAV +ELAEHIETAMAEPFRISDHEIYNQITIGIALGRQEYELPEEVLRDADSAVAAGKRLGKVC +HVVFSSSMHVAAMERLELETTLRAGLTRGEFDCYYQPIFNVKNNRIESLEALLRWHHPEQ +GVLRPHSFLQVAEESGAIVPLGWIGLRRALAACSQWQSLGLEREVSVAVNLSDAQFRLPQ +LPEQLAHELEQAQLPFHLLHLEVTERVFLETPGLARRTLGRLHALGVKLYLDDFGTGYSA +LSYLRELPFDALKDRS* + +>15FR_NODE_1#PROKKA_00104 +MLSDLLPPPVIEVFATGLAIHGLTVVIPLKTIAPHISEYARLFGLFHPFGNDQELQVIGQ +* + +>15FR_NODE_1#PROKKA_00105 +MPTRRELANAIRALAMDAVQKANSGHPGMPMGMADIAEVLYNDFLRHNPANPHWPGRDRF +LLSNGHGCMLQYAALHLSGYDLSMDEIRNFRQLHSKTPGHPEYGHTPGVEVTTGPLGQGV +ANGVGLALAEALLAAQFNRPGHKVIDHHTYVFCGDGCLMEGISHEAASIAGTLGLGKLVM +VYDDNGISIDGEVKGWFHDDTQKRFEAYGWHTIGPIDGHDAEALKKAFAEAQIETKRPSL +ILARTIIGFGAPDKQGTAEAHGSALGDAEVAKARKELGWKFPPFEIPESIYAGWDARARG +EQAETEWHERFAAYAKAHPQLAKELKRRLAGELPADWATTVEQHIAHVARNGKAQATRKA +SGATLAALAPTLPEIVGGSADLTPSNDTCWPEAKAVKPGTPEGNYLHWGVREFAMTAILN +GMAVHGGFVPYGGTFLTFSDYARNAVRLAALAHYPTILVYTHDSIGLGEDGPTHQPVEHV +ASLRAMPNLTLWRPADDVETAVAWRDAIERRDGPTMLVLTRQSVPHYERKAQQIEAIHRG +GYILHEPQNAPRALIIATGSEVDLAMQAARVLTEENLPVRVVSMPCQELFLAQDVDWQEH +VLPAQVTARVAVEAGVSMPWYRFVGIHGRVVAMERFGESAPAKQLFEEFSFTAERVAAAV +REAVAAAAG* + +>15FR_NODE_1#PROKKA_00106 +MIKVGINGYGRIGRNVMRALYESGRRDQLQVVAINDLGDAQTNAHLTRYDSVHGRFPGDV +QVEQGHLVLNGDVIQVLAERDPSKLPWGKLGVDLVLECTGLFTSREKASLHLQGGAKKVL +LSAPAKDDVDATIVYGVNHKTLEPEKHVIVSNASCTTNCLAPIAQVMHELAGIEGGIMNT +VHAFTNDQNLLDVYHKDLRRARAATASMIPTSTGAAKAIGLVLPELDGKLDGFAIRVPTQ +NVSFVDLTLNLTREVKVEDINRAMREAADGRLKGVLAYNEVPLVSIDFNHNSHSSTYDAG +FTKVKGRLVKVCSWYDNEWGFSNRMLDTAAVMFGRG* + +>15FR_NODE_1#PROKKA_00107 +MGVKSLKDCELQNRRVLMRVDFNVPVNDGAIADDTRIRAALPSIHEALKAGARLMLMSHF +GRPEEGKPESRFSLHPVARRLGELLGFDVPLVTDYLARDPEPGSGRAVLLENVRFNVGEK +RNEETLARRYANLCDVFVMDAFGSAHRAQASTYGVARFAPQAVAGELLCAELKALGRALK +APDRPLIAIVGGSKVSDKIGVLDALIERCDGLVVGGGIANTFLAAAGHPVGRSLYEPGFV +DEAKRLMIAARERGVNFPLPVDAVVAEALAEDAEADVKPVHAVGAGDMVLDIGPETAVLY +RPLLANAATIVWNGPVGVFEIDQFAEGTRAVAEAVASSGAFSIIGGGDTIAALAKFGVTD +RVSYISTGGGAFLEFLEGKTLPAVDILEARASD* + +>15FR_NODE_1#PROKKA_00108 +VTETMARFRRTKIVATLGPAVDEGDVLARMIAAGVDVVRLNLSHGTHAEHRKRVKAVRKA +AAEQGRDVGVLIDLQGPKIRIECFRDGPIELKEDDAFTLDCGLGSNAGDSKRVGVAYKNL +PRDVQAGDMLVLADGEIVLEVREVIGEQVHCRVETGGALSDHKGLNRRGGGLSAEALTQK +DQVDIQLAAELEADFLAISFPRVAADVERARALLRAAGGTAAIVAKIERAEAVENLDEII +DASEVVMIARGDLAVEIGDAPLPGVQKRIVRHARARNTVVITATQMMESMVTSPTPTRAE +VLDVANAVLDGTDAVMLSEETAVGRHPVKVVEAMARVCLGAEAEPREDRDRRIGGDRFEL +VDEAISMAAMSVSQHTDVTALVALTESGRTPLYMSRVRSGIPIYALTRHECTRRLLTLYR +GVYPIAFEDEHESDEVLPDVAAALLERGLVTPHALIIVTRGKLRHVSGGTNSLHLVQVAD +VLPEQVL* + +>15FR_NODE_1#PROKKA_00109 +MNGHWAREKRRMRVINRLLGLSAVVMVLVLVSPTASASIASDAVAYPPVSLSNVPPAKAE +EIRKGEYLTKLSDCMACHTDHGNGKAGKPFSGGLAIKTPFGNIYSPNITPDKKTGIGNWT +FKQFDDAVRYGEGPNGYLFAAMPYNYYSMMNKDQVHAIWEYLKHVPAVNRRNKPLGMPPP +FRWRWLQFGWRFMFVKPTQGEFKYDPKHSKAWNRGRFIVEGPEHCGACHTPHNMLGGSEK +RFFLGGSDITGFWAPNISGLATKPHPIATIMRVFREGKGLGGGDLKGPMIDAIANSMRYM +TPADMRAVAVYIQSVQSEVPPGPRPVAMDEVNLARGEKTYQTDCAACHATGIGGAPRVGV +AKDWDALGKSPLFILFENVWHGVSIMPPKGGCKACTRDDVTSAIVYMLKRSTSRSSKPAV +QATTSKSGIPRDTVSLAVGDKIYHAHCAACHASGAAGAPRHGDIKEWASRLKLGLDKLHH +NALDGIGMMPPKGGCTSCSKDQILSAVDYLVDGSGGKALVEKSLSGKQGG* + +>15FR_NODE_1#PROKKA_00110 +MSGDSILRIFWWLVLGAWMSGIGVMLGRELGLTVLLRYLGRNESERRELLAPHIERPSEG +HQVWLLLGGGALMAAWWPLFTATLFGGLWLVLLFMVLAVLVGPVGHGYRKRLSEHTRGPW +DLLWAGISLAALLVFGLAIGATVSGVPLHFDAHMDAMWGGFFSRFTPYSLLVPGLMAITF +GLWLAAARAAHECTGAVAARARALLLPVGGVTLLIFAGGAAWATQLPGYAVGGLPKVGAS +PLDGTTFAVGGAYLERFLSHLPLVIVPVLTALAIVGALFFSWRGRLQRVGPLVVIAVVGM +VATLGAMTYPVILPSFAEPAQSLTLWNAAAERPVLVAFLVWLGILVPVVLGYELWLRRRN +AQTVVAGSTAR* + +>15FR_NODE_1#PROKKA_00111 +MNATAPLQEQEPEPGNRRFPGLLMGGARQVARWSWQHRRYGRWPLRILLVLLLVLIILVG +AGYGLLRGSLPQTEGTVRLPGLGGRVVVTRDAQGVPTIRAHNALDAWRVLGYLEAQDRFT +QMDFMRRVAAGDLAALVGPAALPLDRIHARFDLRARAERIYLDAPSVERARLEAYTLGVN +EGLDNLSVRPWAYLLLGERPRAWEPADSVLVIYAMGWMLQNPLGPRMRARAALRSLYPPA +VTAFLGAPDTHWAAPMAGQPPALPPVPGTQLINLSASGKSRATAPVPSTAMYADTVAKLI +LPQPFPGSNSFAVSGDLTGTGHALLANDPHLSLRVPATWYRARLIYPAPGATASQPVELT +GVFLPGVPALVIGTNGHIAWGLTNSGGDWTALVRVKATAAGSRGGPLVYATPSGTATLAI +QHVLLKVRGQTARPMSIRRTIWGPVIGTTADGALLVSHWALAQPGGVNLRFMQLDSQTTV +KQALMVAGSAGIPVQNFLVADDQGHIGWTLAGRIPVRKAGCDYAVPQSWADGSCGWTGWL +APGSYPSIVDPAQGYLATANNRVDARTAAVLALGDENFADGARAHQIVSDLKALAKRGKI +TANDLHDVQLDDRAQFLQRWHDLLLNVLSPSALEFHPHRQALREAVVNWGARAAVDSVGY +RMVRAFRNEVAASMFMPILKRLHTRDPGAGLPFSNQLEGPLWRLLQVRPHNWLNPAYPTW +NALLVHAADAVIHRFWNPVSGLADATWGARNTVRINQPLAVALGPLGHWLDMPPTQLPGD +SNMPRVQTPDFGASMRMVVSPQPSAPGLFELPGGESGHPLSPWYSDEFKAWAEGLLTPLA +PGPARKTLRFIPWSRRVSDRPTVSTGSVVPAQSGQ* + +>15FR_NODE_1#PROKKA_00112 +MKKIVIANLKGGSGKTTVSTTLAAFWASEGYKTCLLDLDPQRAATSWLRRRPESLPSIHT +LSLPNQTSGVTLSYALRIPRDTERLVVDTPAGLSGIALADTVRGAAAVLIPVLPGTMDSD +AAARTVADLLLIAKLGRHSGRMAVIANRVRRGTLGAERLQKFISALDIPLIATLHDLQAY +SHAILSGLGLHELPRRRIGGERMAWVPLLEWLERRELEITAQTALGPRSLLTQSAGQTPS +DSME* + +>15FR_NODE_1#PROKKA_00113 +VLAGIAEWYGEDEAVPFRVISGTSAGAMNAAYLSANMENFAHGTQRLAQVWSQLEAQQVY +RPEYRKVFGALLHWAWSLLSGGLGDSNPRSLLDNSPLRALLAENIDFDAIARNIERGLLR +GVSVTVAGYSTERSLSYFQAETGVQSWWRQRREGRPVQMTLDHVMASLGLPIIFPAVKVA +GEWCGDGSTREFAPLSPAIHLGAKRVLVIDTQYPAPQHVLGQDQAYPSLSKIMGYLFDSV +FSDSLYADLERTKRINRTLDYIKRQSGHEPPELGLSHIDTLVIAPSRRPLEIASRYESHL +PKSMRWILRSLGGDVSSGDQLLSYMLFQSGYCSEMVALGRHDAHARREEIGQFLGLSKIK +VR* + +>15FR_NODE_1#PROKKA_00114 +MNLILLGGLSGAGKTGALDMLEDLGYQIVDNLPLSLIEPAIDAMLGDDARHHSRLAIGIA +PHNTPEEFEALARQIEIWRTRPHGCTVIYLFCEPGTLVKRYRATRRRHPLTGPDTDLAAA +IEIETTLLEPLAQLADACIDTTHTNIHQLREIIRARVNEGGDHPMALQIESFGYRRGLAQ +DADLVFDMRCLPNPYWEPTLRELTGLDQPIAEYLETHGTVTRMLSNLVNFLNAWLPSYAA +SNRSYLTIAIGCTGGRHRSVYMAEQLAAQLAHGGWAVTVRHRDLDTPTRDVKPILADD* + +>15FR_NODE_1#PROKKA_00115 +MQNTITGRHLDVTPALKDYVNTKLSRLGRHHEPPTSTQIILSVENLDHKAEGILQVRGGT +VYAEANETDMYAAIDILADRLDRQLVRHKERHASHHATPTARLNMEN* + +>15FR_NODE_1#PROKKA_00116 +LLALPRPDLEQILETALEENVMLERLEPETGEGDPEVATVMEQTEPAGEWDELSWSSSAG +TGERPDMQTFEDIRPPDLRQHLIEQLVLERFSDRDFLIALALVDSLDDNGYLREDLDTVS +QELDALDPSPELIEIEAILHRVQRLDPIGIGARDTAECLSLQLEALPPDTAGLVVARELI +DGHCARLTQADMATLASLTCSDEDSVRRALSLIQSLNPRPGNDYSAQTAEYLIPELRTYR +TPDGWQVELYPGNHPRISINATYVAWLSANRLNEASQSLTRQLEEARWLIRSLAQRENTL +LRVARVLVRRQTAFLDQGVMHLAPLTLREVAKELDMHESTISRAVQGKAMSTPRGVITLR +HLFSNALSNDNDEAISARAVHERLRHLLNHEDPAAPLSDAALAAALARDNMPIARRTVAK +YREALGFASTRARKRPAHSVAISKG* + +>15FR_NODE_1#PROKKA_00117 +MSQLRAENLHKRYRTREVVRGLNLNVNSGEIIGLLGPNGAGKTTTFYMILGLVPTDSGNI +YLDQRDITGLPIHARARAGLGYLPQEASIFRNLSVRDNLIAVLELGGHGTRAEQQRRADE +LLDELGVTHLAKDKGISLSGGERRRVEIARALANEPAFMLLDEPFAGVDPVSVADIKRII +DHLAKRGIGVLITEHNVRETLDICNRAYVMNRGTMLAEGSPKEIADNQTVREIYLGDKFT +L* + +>15FR_NODE_1#PROKKA_00118 +MHPKLFACCTALLACCAFPALGTPAPGSANASSSVSGVMTINADHSSMANTTGQGTEVTY +SGHVIVTRGALRLYGHSAVIHGRSNTIGKVVVTGTPARFELREPGKPHVLGEADSITYNG +KTDILQLDGQVHFSRPGEHFSAAHITYRIATRQLEASGNGNGRVHAVLSPAARTSP* + +>15FR_NODE_1#PROKKA_00119 +VRLSRGWAVASAWAALLGAAALTSWLFLRSHDHSPATDLASHAVERPDYLLHQAIVTRFA +KDGSRRYIIKARRIAHMPRNNIALLTRVDLDYFPVHGKPWHLQSDNGRLFANGTRLNLIG +HVRAHELDTPIPVHFLTTEVTVLLPEARLASRYRVILRQGHRETRGTGLAANLQTGTLSL +LKDVTSQYAP* + +>15FR_NODE_1#PROKKA_00120 +MSTPLAAIRLLALDVDGVLTDGRLWYSETAGEIKAFNAHDGAGIKRLMREGIAVALISAR +QSPIVTHRARELGIIQVHQGVKDKGHCLTETAQAVGVRLAFCAFMGDDEADLPAFAIAGL +RIAPANAVARVRDEADWCTQATGGQGAVREVCERLLAARQANAGGKS* + +>15FR_NODE_1#PROKKA_00121 +VNRPASATGLIESGREVIQIEAAAVSALESRLNETFAAACGLLLACRGRVVVTGMGKSGH +IGRKLAATMASTGTPAFYVHPAEASHGDLGMITSEDVMIALSNSGQTPEVVTIVPLIKRL +GVALIALTGEPDSMLARASDCHLDISVSREACPLNLAPTASTSATLAMGDALALAVSAAR +GFTPEDFARSHPGGRLGRRLLVRVADIMHTDDAMPIINESTRLGDALVTMSAKGLGMAMI +TDVQGRLAGVFTDGDLRRHLDQGVNLDTPMQHLITRECTVATPDMLAAEALRLMETRHIN +SLPVVTDDKPVGAFNMHDLLQAGVV* + +>15FR_NODE_1#PROKKA_00122 +MDRTDLQQLLEQAFPNAEIEVQSEDGVHFAARLIDAGFSGQGRLARHQTVYAALGARVGG +EIHALSLQTLTPEEAGARA* + +>15FR_NODE_1#PROKKA_00123 +LNRLLIRGGGRLDGEIRISGAKNATLPILAASLLAETPITIGNVPHLHDVTTTVTLLRRM +GVDVTVGEHMALEVDSNTIKDMVAPYELVRTMRASILVLGPLLARFGCAEVSLPGGCAIG +SRPVELHLKGLSAMGADIDVRNGYIYAKANRLRGARIFMDMVSVTGTENLMMAAVLADGE +TVIENAAREPEVVDLANCLNKMGAEIEGAGTETLHIQGVAKLEGCYYEVMPDRIETGTYL +VAGALTGGRVRVKRTRPDMMESVLEKLREAGAEITSKDDWIELDMKGRRPQAVTLRTAPY +PAFPTDMQAQFTALNAVAEGSGAVTETVFENRFMHVQELVRMGARIQLEGNTAMIQGVER +LTGAPVLATDLRASASLVLAGLVAEGETVVDRIYHIDRGYECIEEKLAQLGAHIRRVTS* + +>15FR_NODE_1#PROKKA_00124 +MILYSRPDDPAAHSIRLVLAEKAIGVKIVEVEPDSPPEDLLHLNPYGTLPTLVSREVVLY +DPRIIAEFIDERYPHPSLLPSDPVLRARARLFVSEIGGSWYELCDEVANGAGRGRTRARR +ELTEAVVSSDELFTGTAYLLGGDYGLADCVAAPVLWRLPHLGVRLPREAKAIRGYMQRVF +KRPTFVYALVASERAMIES* + +>15FR_NODE_1#PROKKA_00125 +MTEKPRPSRKPYLVRALHEWMGDASLTPQIIVDATVDHVDVPVEHVHDGKIVLNLSLEAV +RDLELGNDAITCTARFGGVARSLWVPMKAVLGIYARETGEGVAFACS* + +>15FR_NODE_1#PROKKA_00126 +MEAITNGMMNLNRQRHQHLVALRIIFAHGENGGEEVVHIGHVAIKTGDTFPGTGRVDLLC +LTHCLTSTD* + +>16FR_NODE_10#PROKKA_00127 +MTTPDINFTDLLQFLFMGIQRGSIYAMVAMGVV* + +>16FR_NODE_10#PROKKA_00128 +VKRTLLSTAIAVLFLLSAVSVYAAQKKPYKVGCVFAITGGASWLGGPERNTAEMLAKQIN +AAGGINGHKLELFIEDTQGDNTRAVNAVKKLIKKNHVCAIIGPSRTGTSMAVIPIVQQAK +IPMISCAAAESIIAPVSKRKWIFKTTQNDSDAVRRIYENMDKKGIHKIGIITGTTGFGAA +GREQLKALAPHYKIKIVADETYDPTDTDMTAQLIRIRNSGAQAVVNWSIVPAQSIVPQNM +RQLKMKIQLYQSHGFGNIKYVEAAGAAANGLIFPGGRLLAADTVSASNPQKAILMKYQKE +YEAAFKEPPSTFGGHAFDAISILAKALKKVGDNPAKLRNVIEHTNFVGITGVFHYTKTNH +CGLNQNAFEMLTVKNGKFVVLHQ* + +>16FR_NODE_10#PROKKA_00129 +MKISQLSIFLENRSGRLARIATVLGNAGINIRAMSLADTSDFGILRLIVSDTEQAEKTLK +DQGFTVLISAVVAVAIPDSPGALGNVLSIMEHAGLNVEYMYAFVEKDMGQAIVIFRFDDV +DRAISTLIENDIAVLESKRVLRL* + +>16FR_NODE_10#PROKKA_00130 +MIRVGVAGATGYAGAELVRILAGHREVRITALTSRQYAGVPFAKVFPALAGVVSNECEAF +DVERVCGQTDVIFTALPHKLPMAIVPGLIKNGKKVIDLSADFRFSDVRLYESAYQPHSSS +DLLSRAVYGLSEVYTDDIRKADLIGNPGCYPTSTLLPLVPLLKNRLVDSGGIIVDSKSGV +SGAGRSPSLTVHFSEVNESFKAYKVAAHRHEPEIESILTKSAGTPVDITFVPHLVPMTRG +MATTIYAGLAGNVTKHDIAACLCDYYAGRQFIRIDRDGHPPDTRNVRGTNYCDIAFVVDE +NNRRLILMSVIDNLVKGAAGQAVQNMNLMMGFEETAGLSAPPFPV* + +>16FR_NODE_10#PROKKA_00131 +MSRSFKSAGRMADPSDVAADICPVPAFIPDLMAVNGLADAGFSRLIFFPGMLFGSTEKWW +GDGGVRPSAHEGLDLCFFETSDGLRYRLDETVSVPAAFDGKIVRIMDDLLGRTVVVQSRC +APSDAPFYTFYAHIRPDNGLRQGDTLAAGTVFAAIARIVSPKIRLPAHLHITLARANDLP +PVDTLSWPVMNRLDRSVFLNPLDLLMCDYAIEDDTRFTPGSDAVKPVRRIRQDRKGA* + +>16FR_NODE_10#PROKKA_00132 +MGLFSIFGGKPPEELERRGDFHYEAGAFGDAKMAFEKAIDRIERRFPEKKHLLPRIMEKY +HLARNALAKMHVENGDHMITVRDYEEAGALYRLAMELTSDEAFAAEIHGKIAKLKDLIAD +EDEPEMEWVGDAREEAYVDDWGGAEEDAGGAEDDDAYADDDMTADAEGMADETDADAQLY +DSPENLFHVLVSALPEAVQDAYLGYGEAFAAGYIALNHGEFRKAVKELSRALEENASAKT +LIPVELATAYMHLNDPDHARQILEDFLKENPGEIRGYQLLCEILWEAGNTADARNLLSGA +PNDIQTTRPMQMLQGETLFQVGLYDEAEKVFTRCLEIHGKDEIVNRGLAKTYEAKGQIEK +ARDLYADILNRCIMCGSAADPIIKRRYADLCIKSGDKSLKLLELYFGLAKEDPDNRADYF +FRIADLYEAQGKDIEARKYRKLSTQAGGGKRPQ* + +>16FR_NODE_10#PROKKA_00133 +MNPIEAISHTGRSVRSRLKGFPRKKVLVLEGGGMRGIFTVGVLQAFSERGYAPWKTIIGA +SAGALSGVVYAAGQIHMARDAFFTELISGRFIRMSNIFRPEKHILNLDWLVDHIIGGDEP +LNIRRLRTTACPVLITVTRFSRDFPPDTLYLSTKTDSVPQALKATAAIPFFYRGFVHYRN +DLLLDGGVLDSVPFKKALSMGFPERDILVVLTRPKGYRKERDSFWIKTLYESYYKDSQYR +YLVNSLEHHFGNYNRMLDDLETNYDFDIIYPPDNFKVNRLTRSEDKIVDGFEQGVAAAKA +YLKPKS* + +>16FR_NODE_10#PROKKA_00134 +MRKKIHLFLVIGCIVFCFSPAVFAKTGGPKVLHYPREDAVVRAVKLVSPAVVNISTQYEV +RTRVNPFANFGANDFFNNFFDQGIERKEKLTSLGSGVIIDGRRGFILTNAHVVVRGAKIT +VVLKDGRKFHADIVGIDPESDLAVLKIKTKSPLPSIAMGNSSDLMIGETVIAIGNPFGFS +NTVTVGVVSAVDRSFRIKNRIYRDLIQTDASINPGNSGGPLLNIDGQLIGINTAIYKNAE +GIGFAIPINRAKKIISDLIKYGEVVPGWIGLSVQNLNSRLAAYLNLPQHSGVVVRSVDPS +SPAGAAGIREGDILLAIDGHKIESIDDYKTAMRGYRKGQHAVVKIDRNGRHLTLSVRIEV +FPESLAPELVQRLLGVKVVGIGQKVRFNQTINADKGVIISEIDPQSSLAGIGVRPGDVIR +KVDAEATNTVQSFYKAMIKDRWKQSIVILLQRGDQGYYITLKLS* + +>16FR_NODE_10#PROKKA_00135 +MFQFLLFVGSITAFIIGGLIVLIGIGAITGCAGGILAMCSGAIIAVLGAWSAITFFLPSP +DPSVPARETINLIRRNGRWM* + +>16FR_NODE_10#PROKKA_00136 +MHLVKKYANRKLYDTTDKQYITMEKLAELIKSGSEVMIIDNETGDDLTAQVVSQLLAREK +NEDDTALPSSVLMQMLRKGRGTLFGYGKKYISLWQSAVLMSRDENEKLINTLVKDKELSE +TEGRTLKKEITAYTNGLKTWIRENIDQRVNEALNMMNLASKEQVKELIDQVESLSLKVQS +LEREIRRKN* + +>16FR_NODE_10#PROKKA_00137 +MRKQVEIMSSIDNFWLYMDHPTNLMIITGFLQFDKPINFERLKQTIKNRLLCYDRFKKRV +IRPMTGVGNATWELDPRFDLRSHLHRVALPAPGDKETLQELISDLTATPLDPTKPLWQLH +YIENCENGGSVLFARIHHCIGDGISLIRLLLSLTDTEPNAVWSDCLNEPKIEKETSFNLF +PPLESAMKKVTRARRRAQKVTRFVSREIEKSFSNPYHIVKRTRTVTKFALDVATVMSKIL +LLPADRKTVFKGELGVRKSVAWSDPLPLDDIKVIGKYFNATINDILVALVTGALRRYLQQ +CNNLVGDLDIRVAMPINIRPIDGDIELGNQFSLILVALPVHIDDPVLRIREVQRRINDLK +EAPDAAVAYAVLNALGVSSAKLAKTAATMFANKTTGVFSNVPGPRQQLYFCGEKINNIMF +WVPRIGGLGIGISIISYNNEVSLGIATDSGLVQDPKAILDHFANEFRMLLGMYKAGQMEK +EPLVINDRSVEPPVFAFNTEKIASVQAIRCKAITRSGTQCHNRAATNSMYCTLHLSKYET +IASREENDMPAEADNTLPAEDQAAG* + +>16FR_NODE_10#PROKKA_00138 +MNAPAPTPNNPRIVVCCGSGGVGKTTISAAIGLCGALMGKKTVVLTIDPARRLADALGIS +ALNMEAQRVPLEASVPASGELYAMMVDAKRTFDRLIGRYSSAGLRDRILENRYYQHVSNN +MAGSHEYMAMERLYEIYHEKRFDLIVLDTPPSRRALDFLEAPQRVINLLGHPYFLKLFKP +YIKAGQLSGRLFNLLAMPVLRAVGQVVGGQTISDIFSFFQLFNDMLFDGFSKRASAVESL +LSDPMTTFFAVTTPQEYPIQEATYLFRQLQQRNMPFGGFIVNRVHSDTADSPFDSEAADR +KRVLMEKIADKPIFQRLEIADRMDRKLARSDAAAIDRISSISPGLAVFPILFADETVNDI +SGLRVISTQLMKHPEFKI* + +>16FR_NODE_10#PROKKA_00139 +MAKADESDRTQLEALFSIGAGVRSLDELLTRRLIFLMGKGGVGKTTLSVALALTAEMMGK +RVLLTEIGDSQGIGRYFDAQPDVRPRQVSSAIWAARVDPKDELTAYLHYHMKSGFIANRI +TQSRLFDYLLAATPGLKEIMTLARIWRWEKAKNKAGTPLYDTIIVDAPATGHGLSLLRLP +KMLVEMIRVGPIASQVNGVQQMLLNPERTALTLVTLPEELPVNETREMIDIAVDEVGIPV +QAVFINGVHPVFVTPDEFSRIQELDRDCPDADPDCPDLRFALDVARRQIVRNAAQQVQMN +EVHAAAPGHVIHVPYYYTNDLGPEEIRTIAASLHRQISEAPRGGGR* + +>16FR_NODE_10#PROKKA_00140 +MYQIKRYANGRFYDTVEKNYVTREQISKLLGAGKKISIIDTRTEKDITDDIVSRIKAKKQ +NPSKSKKAGKSNKAVDDSTGMLVQLFRKGGDALFDYGKRYASMWQNMVTMSRDEVDKLVN +MLVKDNKLTELEGSKLKKEIDRYRTNIQGWITRNIDNRVNEVLNRMNLANRDQILELTGK +IEELNKRINRLGKEKKGPAKTKKTS* + +>16FR_NODE_10#PROKKA_00141 +MAKTATKKGETAQTKITGKIQKAAESVTDKVKGYNEKYVAKNIEKGKATLKEYNEKYLVK +TVEKGKDTLKEYNDKYITKAVEKGRSYVDGPYKKLSGTMDQWLEKGRSFEKDAWKKMDGY +VENGKKFMYKLPLVETVEKKVTSSLNSVPSVVNLPGKGDIEKLTLAMEALNSNIEALRKQ +SAQ* + +>16FR_NODE_10#PROKKA_00142 +MGKTIRRALVLSGGGARGAFEVGVMRYLNEVNWQPDLICGTSIGAINGAAFGSGMSVDEL +AHLWKTYHRKQMYKITFPAFFRTLLSGRKFSPLSDNRPTRSLLEKTIDIDALRNSTTEII +ISVLNMRTSQVRYFTHKAIGIEHLMAAGGIPMMFPWQYIDGDPYWDAGVMVNTPIMPAFE +RGATEIIVVLLSPLGAIPQRLPSTHREVSELVFEQFLIGSYTACLPNAGWRTNPEADVYD +TPLPDSPQLQLSMKGVRMATVYPTRMLGFRSLLDFSPRQAKTLLRDGYVNARMQLKSFFK +* + +>16FR_NODE_10#PROKKA_00143 +MPTATIRQQLIELLSENKYDARDLSQRLGVRETVVYDSIPHITRSVTSMGKKLKIVPSRC +TSCGYTFKDRKRAAKPSRCPTCKSERIAKPKFYIV* + +>16FR_NODE_10#PROKKA_00144 +VKTVSISGQTGASKIVIGERLENLSNYLPDRRIVVITDTNVAGHYGKMFPDVEVITIGCG +ESIKTLDTAKMIYERLVSMAADRSVFIVGIGGGIVCDITGFIASTYMRGVRFGYVATTLL +AQVDASVGGKNGVNFMGYKNMVGVFNQPEFVICDPYVLGTLPPRELACGFAEIVKHAAIS +DKDYFADLEESHEKACARDPETLERIIRKSVVIKAGVVNADEKERGERRKLNFGHTLGHA +IEKTLGVPHGEAVSAGMVMAAELSANRGHLPRPDIRRLKDLLTHLDLPTALPIDPERIID +AMARDKKRQGEKIHFVLLSAIGAAFVDSISLAELEAVVTG* + +>16FR_NODE_10#PROKKA_00145 +MLIVMRQDASREQIDAVIRAIEARGYTARSIPGGDRVSIGILNNRTAIDAAWFQDMPGVK +ETIPVTRPYKLVSREIQPHDTIIRVGGVEIGNGHLVIIGGPCAVESEAQVMATAERVKKA +GADIFRGGAFKPRTSPYAFQGLGEEGLKILARAREQFGMPIVTEVMDLEYFDMVEAYADI +VQIGTRNMQNFSLLRRAGESKKPILLKRGMSATIDEWLMAAEYVLSQGNPNIILCERGVR +TFVRHSRNTLDLSAIPVVQRESHLPIIVDPSHATGFRDQVIPLSRAAAAARAHGLMIEVH +NAPDTAQCDGSQSLYPDQFETLCRQVRSIFRILGETDETR* + +>16FR_NODE_10#PROKKA_00146 +MILIDILRRNWYDLLPLNEIGNICAVKFGNKAADP* + +>16FR_NODE_10#PROKKA_00147 +MTNATNDQPFRPARFTEQRLITAILDGTCPPGSVLPAERRLAEQFGVTRPTIRETLQRLA +AEGWITIRHGKPTRVNDFWETGGCS* + +>16FR_NODE_10#PROKKA_00148 +MEAITNGMMNLNRQRHQHLVALRIIFAHGENGGEEVVHIGHVAIKTGDTFPGTGRVDLLC +LTHCLTSTD* + +>2FR_NODE_5#PROKKA_00149 +MIGNYGGLEHGLLFNLFRRRAFRRQYEEALEEARGLVATFSRNLADRLFDPVGALPMIER +RRIEICRALIAHPKLLLLDEPSAGMTHDETHQLMDDILSVRDRLDGLAIIIIEHEMGVIE +RVSDHCVVLNYGRKIAEGSYQDVASDRLVQEAYLGSA* + +>2FR_NODE_5#PROKKA_00150 +MTAVSSAVSDSAAVPQTADVAVEIEGLFTGYDKADVLLDVSRTVPKGQITCLLGSNGAGK +TTLIRSILGLTPPRQGTIRLFGEDTTGLPTHKVVARGVACIPEGRRMFSKLTVEENLRLG +AYQEPSEAKIRTSLEDVYQTFPRLAERRNQLSGTLSGGEQAMVSIGRGLMGAPRLLMIDE +PSLGLSPLYVQENFRIIENIRTRGITVFLVEQNVHQTLAISDYGYVVSGGRLVAQGAAAS +LQNDPEVHAAYFG* + +>2FR_NODE_5#PROKKA_00151 +MPAKIPDPVELTRRLVAFETINPPGQEQACAAFLADLLAGAGFDCVLHPLGDDRASLVAR +RGRPTEQRLPLAFTGHIDTVPLGAVPWKHDPFAGEIVDGRLYGRGSSDMKSGVAAFVVAA +IAEAERIGDGAGVELVITAGEETGCDGARALADGGHLGRAGALVVAEPTANRICVGHKGA +LWLKAITHGVTAHGSMPEHGDNAVYKAARAIGRLADFDFNVARHPVLGRPTLNVGTVSGG +LNVNSVPDRATVGIDIRTIPGMDHAPLRDGLGGVIGEGADLETLCDLPGIWTEPELPWVQ +RAAAAVAAVTEQPFAPESVAYFTDASVITPAYGDIQTLVLGPGEPSMAHQTDEYCEVERI +VEATDIYRRLIADWMKPES* + +>2FR_NODE_5#PROKKA_00152 +LEGLGAEGEKAVATLELIVERASGDVLKVELKPGTNVPAIHPGDKVQIVTPAGETLNAVV +VGQDVQITPVDASGTVGETIVFKNLALYLHDGQSEVAVVNADTGQTTEITDVASLADLGT +VPLQMASGEGTGPVSPGTSSPFQNSDAIDHGGETAGNAAGTLGDILNRGAAGTDGGRAQL +AGTGETGAGTGTGSSTTDHVETPISESTEGGGSGTGTGTGTTTSGHAVDGYIVGATVFAD +ANANGVLDSGEASTTTSYNGTFELSGSSGQLVMTGGVDQATGESFKGTLTAPAGSTVVTP +LTTLIQSLVEAGQSAADAQAAVKSALGLTGNSIDLTTANPVEDVENGVSGADDVLAAAIK +IQNTVVQAASVLQGAGGSTVAMSTATNAVFAQLATTLQNNPGSNPITDATAVQNLITGAA +NSSSLGLSSTAKTQVGNAASDAASVIDAGNSHINGLSSTGSSLLTDLASAARVAQNGAAE +ALHDALNAVQGTSNSANLSTATSSYTGANLTSEIGNASSGLGTVGTASSVGTSGDDTIQG +TSGNDTLNGGAGNDTISGGTGNDILIGGAGNDTLKGEAGNDTLDGGAGDDSLYGGAGTDK +ALFDGNFSGYQIATDSGSSGVITVTGSGTDTIDTTEVLKFKDLTVRMVGDSGGTANGYTS +LSSALSAASVGERILILDSATDPSTLTLSKKVSVQKIGEDPLISIASDGALVVDGSQLSA +VTTLDLSSLPGTTTVRFTSLGSIASISTASTETLNLSASQLDGLTVSGSGKIQTSGIVAT +SADLSNLSSDLSVASGQSLELTAAQASGKTIAGAGNVTVNALGSSAVDLSGITASGTLTA +NVPSSATLNTNTDLGTFGVSVASGQTLTLSATLADGTTIGGDGNVTVTGLAAATDLSSVS +ASGTVTATVTSTVDISSNTHLGSVDAYQVTGALTLTAAQVGDSTISGSGNVTVSGLAATT +DLSGIASSLSLTASVTSSIDISANTNLTTVDTYQVSSGQTLTLSAAQAAGHAISGGTVTI +SGNITANTDLTDISSTLSFDDGDSGAISVASGTKLTVTPSQAAALQTAGQTITGDGTVLI +DGNVTADTNLTNISAAVDFNGNSVSVDSGHTLTLTATQASDTTISGAGAVALSGSDTNAD +LSSITADITVASGQTLSLSTAQLATLDSNAIPIGGDGTVSLTGNATSALNSDLSTYLGSS +LNLAVPSGESLSLTAIQANGLTMEVAGTANITGPAGTTAADFSNISFTGSGAATFTVGAD +LVISEANADFGSVSINIPAGRTLTIDAADASGVTISGDGTLAVTGTLDSSVNMSNWGTGA +IDLTNVSASNFSLTQLDLNGSADYHLTYAQVQALSDGIDGNNSDNTLIIDVSTAGGVTYT +NNAATIDLDISLLGGADRVKFDFGGTTDSGNTLTIRGPLGFGDGSDTLESRHGTISLTDP +GLTLSGGPEALVANSGFSLTATLFDLLYGSSGVTLQGEGTYIVSIDAGFASGASPTLDLT +VLDNFVPAGGVLPTLQIVATGYSVVGGDDGTGDGIATLSDGTHTITIKLPDDPDNAGTFN +PGNTPVIIEIDNGTSQFFLGGLDDQRAYYESETTVYTGSQFADLATAIATDASSLGINAG +DIQTIKLGDSVILDSDSAVDLSAFGGVIDYNGQTIQVTSGDSLTLTAAAADGGTISGAGS +VTITDLGTTAVDFSGLTAASVTLAISSGTVDLSAISGLDLGNVGITVTTPGAVTLTAAQA +DGLSISGNGDVTLTGLGASEVDLSGLTATNATAEISADLTLSSLTNLGSLDLTLDDGVTL +TLSRAQLDGRDISLASGSATLAFGGNASGLDLSQIASGIAFEVVAGRTLTLSTAQINDGH +TITGAGSLVVVLSGTEVDLASTAIQVTGTRTAVVSTSATLDGNTDLGDFSVSILSGQTLT +LSTAQAAEHAISGAGNVTVTGIGSTAVDLSTITVTGTKTVSLSADATLDPDTNLGNFAID +TAGHSLQLTVAQANGLSITGSGTAVVTGLGSDTVDLSGITATASTTVSDTVALAAGTNLG +SVAVTVDGTGSLTLSASQADGHTISGTGAVTVTGLTAGIDLSSLASSLDVTATVPGTVDI +TGNTAQLATVDTYEVAGALTLSASQASGHTIEGSGSIVVSGLDGSAAYDLSGITASASTT +VSGTVTLDSGSNLGTVAVSVTGSLTLAAAQATGHTLSGDGSVTVTGLAATTNLSGLASTL +SLTAAVTGTVDISSNTLLGTVDSYTVSQSTDALSLTAAQASGHAISGSGTVAISGLGAAV +VDLSGIAATVDATATLSGTEVILTAGTDLGTVALSVGSGQTLTLGAAQASGHAISGGGDV +VVNGLSASTDLSTVTVTGTVTATVSADLDISASSQLGSVDTFQVVAGKTLTLTATQANGQ +AVEGGGNVTVTGLAAATDLSGITATGTLTATVTADTDITANTHLGAVDSFQVASSTTLTL +TADQADGQAVTGTGNVTITGLGTTAVDLSAIAATGTLSAAISGDITLAGDTNLGDVTLTV +GAGNTLTLTAAQADGNTITGTGAVVITGDVAGYDLTHIAGTLDLTLPVTGDVLTLTDGET +VHLTVAEANAYDSITGDGTIQLSGNATANFDHLTSILGDGVSLAVADGDTLYLTATQASG +VTIGGIGTVDASGTVTDGDFSGISADLNLTDATLDGTTTLPTVGAGHTLSMTSDQINAAS +IALADSTATLHVAVSFDALSSSNDALPEIDISEIRVDGSNSPEAVWNSVDVASGSIVDKF +KLFWISADKQYYDSTPLGQDVDANRAFVELGNLYAAYLAGADGELGTADDGTPILDVVQT +KSGGVADYDARQQSLHDNLLGNLSDGAIAGRFGTDDPRSDLAKLFGDRPYLAGSVDGNGL +YTNDDSVAAVVGWDLYHGLDYTASLSGGYAVLDGDNSVTGTSGSDYIYAGGGDDTVSGGD +GADVLYGGSGDDTLNGGAGDDTLYGGSGDDTLTGGAGADTLSGGDGTDTASYAASTEGVT +VDLETGVGTGGDAEGDTLSGIENVTGSAEADALSGDANANVLDGGAGDDTLTGGAGDDTL +SGGDGTDTAVYTAALTTDNITFDTDHWVVATDGAEGTDQLSGIEVIDHGGSGNILLVGGD +GFASIQDAIDVASDGDTIMVAPGTYAESLTIDKSLTLIGDPATGDAGAGTSAPQILGSTD +WTLATVSIEAENVTFSGFDVTNDTGPYGIHIKAGDADVSDNYVHDINGALSGDGIRAIFI +NPVDNVTVSNNIVEDFGNADNPSAASYTKTAAGIYYWARGGTLPGGTADIAELHNVTIEN +NVIHNDGLPTFTGTSVLGIWVGSSQGGSVLDTVSIAGNQISDLHTDNADRLTGGILVNHG +SNPDGVDPLASLDTPGVTTGLEISGNTIDDVSGASVFAVGLRGQTPDASVIDNVISNLAL +ASGSSDGLLASISFQYNTTTHSVSLSGNDLGGYDLLQVGHDTSDDTLTPATADDTLTAID +GYDNILVGQSGDDTLTGGDGNDTLLGGNGTDTLTGGGGTDTLDGGDGTDTAIFTGTRADY +TIAVDTDGHLTVTDTGGTDGTDSVSNVETLEFTDTSVSVLTVTETGANGTYSSIQAAIDA +AADGDIIYVADGTYTQTGTLNVDKAVTLIGQSEAGVVIDASAVHGYGILLTADGATLSDF +TLNGPQGGDETVWSSYRVDYGIKVSPNGTASSLSDITLQNLTVSGSHNTEIDFNGIHDST +LSNITVDGGTGVAGNGISLTDSSNITVNDVTAANNPWGGVAIYTDGTHYAGGSDGVTFTG +DYTYDAGSTGASPIYIQATGNTYPVTNLTLPDGYDFAVTNSEYRADGNEFTFFFTSESDA +TAFGNSLGAGSFVSTPDADTLTGTANADYLYGGGGDDHLSGAAGDDRLVGGSGNDTLDGG +DGMDTAAVEGNRADFTFTDDGSGHLVMSDTQGTNGTDTVSGVETLSFTDGNVLVVGAGSE +YATIQSAIDAAASGDTIVIAGGTYAESLSLDKALTLQAVSGADVVIDPASGNGLTVSGDL +AGGDVTVSGLTFTDGTMGIQVAANADVGTLTLDGVTVEDNLQYGLRTDSGSMAAVIVTDS +TFGDNGTQNVNGSAQMKLYNFDGDATFTRVDLVGAPAGTDQNSRPDYGIELTGLSNTGLA +EGGTSPDLGTVVFTDVTVSGEFHKIGVAVYNYGQIDGLDIQSLDLSGTETNWGPVFNIDG +VEDSTVDARNYNITYPAGDAIVAELQGEVPDQTATDTTIYGTDANERLMGKAGDNVLHGG +GGNDELYGADKPGNPAEDDSGNDKLYGEAGDDLLAGGAGADILDGGDGIDTASYARAGAT +EGVAVDLANGTASGGDAQGDVLSNIENLVGSSYDDTLTGDGNDNVLTGGAGADALDGGDG +SDTVSYAGSSAAVNVDLATNTVSGGDAEGDILSNFENVTGSSHDDTLSGTSGDNVLVGGE +GNDTVVYTTTVAASDVSFDTDHWVVTTASAGTDQLSGIETIEHGGGSNILLVGGGGYATI +QAAIEAAGAGDTILVAPGTYAPFATSFGGPANITVQAMGDPGDVIIDATGGAPSNGRILD +LRADGMTLDGFTIEGPGHAGVGISINGQGITVENNVISNVLTGIQTGTQYDTGNVTITGN +TVDADYGISLQNTANTVTDNTVHATTEGLGVLDVAATLSGNSFTVDAGGEGLALYGGATS +STFTTSGNTVTVGEGANLQHATDLAGTDGTLNIGAGTYEQVISIAKDGLTVNGSDATLVV +DGSSSDVNGIARVDAVTIYGDNVTLQGLTIVDSLVDQSYVTYGWPETTRGIVVKNGAENF +TLTGNTIESTRNGILINGIDNTGSVTDNVIDNTKSGISVQYTDASGIIIAGNQEGTYGNE +WGLNLHLNGYWDGTTYTSNNADNYPILGTAPTADWQASLLGLSTGNDGWAVMDQAYALYN +RTLVTVDPDGSPSSFSNQGSQRSPISTIQNGVDLVVAGGTVHAHAGDYSGESVTVHVDNL +ILDGDAGATGITVQLADGLSNLTLAGEADYTATGNAADNTLIGGAGDDVLTGGGGADTLT +GGDGSDTASYAASAAAVDVDLGSGTASGGDAAGDTLTSIENVIGSAYDDMLTGDAGDNVL +QGGAGADSLIGGAGSDTASYADSSAAVDIDLAAGTIAGGDAVGDTYSGIENLTGSAYADS +LTGDSGDNVLTGGAGNDTLTGGAGNDTIDGGAGTDTAVLSGNRATYTLGVNADGNITLSG +EGTDVVKNVETLQFADDSASILVVDPGTSGAYATIQAAVTAASAGDIILITGGTYTENVT +LDKQVTLLGAQAGVDADGRTGVTESVIEGNITVSGAADNATIDGLTIHNGASVGGDLAGV +YLASGATGTEITNTIFTRDGTVDGDSSRGILTTYNGGNTDVTIAHNSFSGWATGTYVNPG +SQDIQITDNQFDGNYVGLSVDGPNGAVVTGNSFTGNQFEGLGIGPGTGISGITLSNNSFA +DNASQVGVYTDAIDVNALSGNTFDGAVVISGSDTVYASIQDAVDASSDGDTILVYPGEYS +ELANYNPTTGENSGTGNPLGLLINKSVTIQGVTADGTYITDAGDVAATVTSGAQSNWGTN +FFVTADDVSINGLDLVATGSTGQPYVNKSIEVVGDGFTLNHSVLGAADGLPMYTAVYVND +WSVDSGFTASAIASYAVLNSQLYGDMVVTNGPGTGYTADQLDMRIVGNSFLTIDGGIPND +GILVTGNDDNIAWRNASAALPTEISGNDFGDASGVLWVRGDGTQDFPTTAEVNGILADNS +VPAYAYAVDGNGDLAAGTYGSSSIPSLAIRATAADFAPSELSGAGAESLMVQQAGETTPH +SYSLIVGADGVADSLTGTSGDEALIGGSGDDSLSGGGGNDILVGGDGNDTLTVGDGSAVV +YGGDGTDTTAYSTTVSADDISFDTDHWVVNTSSGTDQLTGVEAIDTGGSNKILLVGGDSG +YATIMEAVDAASAGDTILVAPGTYEPFSMGYWSPSDLTIQGMPGAVIDATSISTPARIVD +LTAEGTTFSGFTIVGPGDVDDAGISVGISISAQGVTVSDNTISDITTGIQNHTPADQTGA +SSILDNTISGANVGISLQNVNNTVSGNTVTTVEAHTLGVGEVALGVLGGDNTITHNTFTV +SNSGKAIGLPDLPAVANLTTSENVVTVGEGADLQNAADLAGTNGTLHVGAGTYAQELTIT +TDGLTVTGDDGATIQVADPGVYSPSSDAFAARTIAFTIAATGVSVSGFEINGPLSAYTYT +TTDFATLGYTYGFFINDGVQNTTLHDITIQDIRTGMSFEGDNTATVYDNVIDNTRGAFLV +RSDGVDLHDNSFGSTGNEWDLTMLAGTPSDYFGDPLTDPGTYGDNMMALSAANNDMTIAD +RMYGEGGVLARAASDPDLADQYAAVANRSHVEVLAGADNDTSAGLGETRGNGFGTERLPV +GTLQDGVNAVVQGGSVHVQGGDYSGESVTVHSDNITINGEASAIGIDVHLGIGLSAITLE +GSADFTATGNDLDNTITAGAGDDILSGGDGADILFGGDGNNTLTGGDGADKFMISAHTDG +SKDTITDFGQGDSLDFHDVLSDPTDVVFTDDGSGNTQITTNAAPTIVLAVVEHVEPASLT +VDDHGNVTLAQTS* + +>2FR_NODE_5#PROKKA_00153 +VKSFSDAGVAANRRNGHRNATPSGRLGHSLSWGSLLLSGLVVFGGPFMADAQAQQLREAV +EMAVQSHPMVASTEAEYRAAERSVDQVEAGFYPSLDLTADSGYQHARRVNESSIKENQWR +NKQRLAMTQMLYDGEGTANRAESAKASAQSAHFDVLSAATKIAQRAIRAYLDVARDRKLV +QYAVDNIDLHRRILADVEEAARSGGGSETRVTQVKTRLYNAQSQRRRAEGNLRNSISDFQ +EAIGETPETLEDYPMPTVAIPASVDEARDEALKNNPSFQAAVETERARTLTANAERSGYF +PQVDVEVAHEQRDGVDGVSGFETDSTALLTLSWNLYGGGADQAKVRRALEQSSAAMYRIH +EVERKIRRELEVALTDYEVARDQVALLRERAATAKEVTAAYREQFRLGQRTLIELLDSGN +ELFLARSDLTTAEYRQISAAYDFLAVRGTLLKDMGVKVATGKAPKAP* + +>2FR_NODE_5#PROKKA_00154 +MTADNASNIKVVHDPQTDEAPARPLGVDTPAGEAPGNDDRPAGPLEDALPGAEDDDSLLS +CLLFLVAYHGQPKSPSVLLSALPKPDGPIPVDLFKRAAARAGLSVQVVRRGLGRIHAWTL +PAVLLLRNRGAVVLTGKSDTGHFLVTAGDAGHGTTELSPEELERAYTGFAILVKPEVDLG +GERTGADLAKPRSWFWGTLAKNGWTYAQVGMASVVINIFAVANPLFTMNVYDRVLPNNAV +ESGIALAIGAATALLFDFILRNLRGWFIDFVGRRADVVLACRIFDQVLDLKASHRPQSSG +AFASMLREFETVRDFFTSATLAAFIDLPFSLFFLAVILMLSPEIGMVLGSAMALVLFWGM +FVQFPIAKSVKKLMVHGEHKHGVLVESLFGYETIKMVGAEGRMRAKWETVVGQSAAVGQR +MRLFNNLGVNFVQFVQQGTIIAMVVTGIFLVKDGTVTSGGLVASVILCGRALGPMAQVSQ +LMMRFHQTWTSLKSLDAVMRGPVERPPEANFLHRPRLRGQIEFQEVTFRYPGTDHDVLRD +ISFVIEPGERVGIVGRVGSGKSTIAKLLAGLYAPTSGTVLLDDTDLRHIEPSDARANVGF +VPQDVFLFKGSIKENIAISVPRATDDEITEVSQALGLHDFITQHPLGYDLEVGERGGGLS +GGQRQAVALARTLLKTPTILVLDEPTNSMDTGTEKKVVDTLARTTGGRTIILVTHRTSVL +SLVDRLIVMDAGRIVADGPKATILGGLAKGQVKTGK* + +>2FR_NODE_5#PROKKA_00155 +MASPDIEYMSELRAAVSRRPTILANIILIAVLVFFILAILWASWAKIDQVSTGEGRVIPS +GHVQVVQNLEGGILSELYVAEGDHVKQGQVLMQLDDTQFSSDFMENRLKFLGLKAAVTRL +QAELEGTALKFPAEVEKQLPAVAQAERSLFEARRSEEDASLAKLQAQYQQKLHEVDETKA +KIEHLRTVIKLAKEEMAILEPLVKKGINAPIELIRLKREESQDAGDLAVARQQLLKLAAA +IDETKEAITEAKAQFRSRALKELNEAQVNMAALGQVVTSRDDRLRRTVIRAPVTGVVKQI +FLTTIGGVVRPGMDLMEIVPAEENLLVEAKIRPSDIAFMHPGLEATVRFTAYDYTIYGSL +KATLDQISADTIFDEAKKERFYMIRLRTKTNSLLDKTGKPLPIIPGMTVSVDVKTGRRTV +LQYLMKPFHKLSIRAFHER* + +>2FR_NODE_5#PROKKA_00156 +MITIDQTLTPKDLTEDLARFWTLSGQKIRQLASRWNAGDGAPVFTVEGRYTSRGWTEWTE +GFVYGSALLQFDATGEDWFLDYGRRGTREHMGGHVTHTGVHDHGFNCVSTYGNLRRLMDE +GKLPENADERAFLDMALASSGAVQAARWTNLGHGKGYIHSFNGAHSLFVDTMRSLRSLAV +AHMLGRDLKGEHDQSISLIERLAAHARTTAETAVFFGKGRDAYDEWGRTCHEAIFNVKDG +HFRCPNSQQGYSPFTTWTRGQAWITLGYAEQLEFFRALEAAGRPEADDCSDLMGIMRDGA +RATADHYIANTPTDGVCYWDTGAPGLAAMPDHKDRPADPFNEHEPVDSSASAIMAQGLLR +LAAVLAEEGDDSAERYRQAGLTVARALLKAPYLSEADGHEGLLLHTIYHRPNGWDHIPAG +RKVPCGESCQWGDYHVRELALMIGREAAGQEPYRFFNGLV* + +>2FR_NODE_5#PROKKA_00157 +VASLDLDVRQASLTRYLENRAAFAARRGVSLPMPSHRRGNDPTAPDDVAEMIRAQAAGHD +VLLVDTPGRVDAVSMAAHVVADTIVTPVGESHLDLDLIGNVDRNQGRAIRPGPYAEFVWA +VRQERARAGRTPTDWVVCHNRRRAPQTRVGREVERTLADLSKRFAFRLVPGFSERTIFQE +LFPDGLTLLDLRESETGVGLTLSHVAARNEIRHLVDALDFR* + +>3FR_NODE_4#PROKKA_00158 +VIAWVVLLALWLFVTRSRPGKAMLAASMSRTGLALVGYDIGKVYLQVWGLYGLLAGIAGV +LLASFTGASASIAISLTVNAFIIVVLGGLGNVAGSLGAAYIIGLLGTLTAYLISPSVREI +PGLLVLILILYVRPQGLFGRH* + +>3FR_NODE_4#PROKKA_00159 +MASRIHWDWRAGLVLLALIVLAFLPFGVSGYILGVMTVAFYLAVYAMSWDLLFGYAGEVN +FGPTFLVGLGAYGAGLSNSVFNISVWPSVAIGTLAAVIGGLVLAGPALRLRGPYFGLVTL +VAVILLEKVIGLLSSYTGGEIGLTVMDVLTISQSGNYYYAFGFMVISAVILRIIARSSIG +LILEASGQDPVATEALGFNVTKFKFMAFTLSAFFSGLAGALTVFYLGSASPGTVVSVFVT +IQIIIATLVGGRRSIIGPILGAVFLIAAGEILRPLGQLSNAVVALIALLVVLFAPNGFIG +LFSRTGGAR* + +>3FR_NODE_4#PROKKA_00160 +MSVLKAQGLYKRFGGLQAVNNVSFSVDRGEVLGLIGPNGSGKSTTLSLLMGVTRPDRGSV +QLDGQEMAGWRTHRIAKQGLSMVFQHSRPLHRQTVLENIKLALLPDTLWQLFPPHTLDRR +AREIAERVGLHNVIDTLPGNLPFADLRRLEIAKALAQDPSVLLLDEPFAGLSPRETREFA +ELVHLFREEGRAVILVDHNVKEVAGLVDRIVAMHAGQVIAEGTPDEVTRDPKVREVYFGQ +SLENASGIHADGDRRSEGNGSEALLEIDLRSVRYGLAEALRDIQIQINQGECVSVVGING +AGKTTLFKSILDFQGYEGDVRWQGTSLTGQGPGQVASQGIALCPESRELFGFMTVRENLE +LGGHKLDRQAHESQMDRVFDLFPVLRQRQAQAAYTLSGGEQQQLTIGRALMQQPKLLILD +EPTLGLAPLVIENISEALHKLQQDSGMTLLLGEQNLTFALRHSQRIYLLETGNLRWHGPA +ERFIEEVGEDVL* + +>3FR_NODE_4#PROKKA_00161 +MFCNGNQGYDFSINSHPGHAHQMQRRSHATNSQSGPVP* + +>3FR_NODE_4#PROKKA_00162 +MQRTARAALCREWNGPIQVETIRVDPPRRNEITIKLRACGVCHSDLSAATGVIPFPPPLV +LGHEGAGTVIAVGEGVTDFQEGDHVVSSFIYMCGKCRQCSRGRPVLCEQAHKALHHLPDG +TVRTHDGDGNPLNVFGACGVMAEYATLHVNNAVKIDPDVPLERAALVGCAVMTGAGSVFN +TAQLEPGSTAAVFGVGGVGLNAIQGCAIAGARVIVAVDTNEEKLAMARQFGATHTVNARE +HDDAGKAVKKMTGGVDYAFECVGSGVTVAQAYGSLGRGGTAVVVGVADVKDKTTFRTLSL +PADERTLKGSWLGSARPQFDFPRLLGLYQGGRLKLDELVTHTYTIDEAPQAFEDLKAGRN +ARGVILFD* + +>3FR_NODE_4#PROKKA_00163 +MTIKSYDSVYFDGRWQPVDGERLSVYESGTGEVMASIPGAAPAVMQQAIDAAHNAFDSWS +RRPLKERLKYIEALHGQLVARAEEIATTISREVGMPLKLSRNIQAGLPIAITDSYLKLLP +DFPFEEKVGSSLVQYTPVGVVGCITPWNYPLHQVILKVVPALAAGCTVVLKPSEVSPLSA +FMLAEMFDAIDLPPGVFNLVSGLGHVVGDSLTGSNKVRMLSFTGSPGTGRRIFHAAAEDF +KRLALEMGGKSASVILPDADLATAVKGSVNNCYLNSGQTCIAWTRMLVPADKHDEACELA +VAAAKKLTLGDPLDENTRLGPLASKEQLERVRNYIRVGIEEGAKLMTGGPDAPAGLDKGY +FVEPTIFANVDPQSRIAQEEIFGPVLCIIPYRDEEEAIAIANGTPYGLSGGVWSADQDHA +IAVASRLRTGQVTVNGGAFNPEAPFGGFGASGLGREFGRWGLEEFLEVRSLQL* + +>3FR_NODE_4#PROKKA_00164 +MLRFIVALMLLFPLVAQAEDAIKPGQWKQTIHVTIPGSSVKIPPHSSTNCVKPEQAGSIK +SIIEEAQQPGCKLNEYSRSGNKVHWKMTCTGKSQASTEGVFTLQSKTSYHIHMNALMQTP +NGPYKTVVDSDGKWVGPCK* + +>3FR_NODE_4#PROKKA_00165 +MRAPDELINGRWASEITAALPARIHQGVAQHWEADPEAEALVDHQVRWSYRELSAAVAAA +RQWLVGQGVRPGDRLMLVSENGRALVALLLAASGLDAWAAIINARLADNEIDAIRDNCDP +RLLIYTTEVSPDARNHAQRHDADIVHLDPLGEFAVGPAAAQSLPQPVVKDGTQVAAMIYT +SGTTGQPKGVMLTHRAILFIARVSGGLRNLRPGRHVYGVLPSSHVFGLSSVMLGSLANGA +CLHTVPRFEAGALLDALAGERISVLQGVPAMYARTLEYLHQHNRKLVAPALDYLSAGGAP +LDTDLKTRVEATFGTTLHNGYGLTEASPTISQTRIGEDSEASSVGRILPGLDYEVVHLKS +RQPVLQGEVGELRVRGPSIMRGYFRKPEATRAVLDDAGWLDTGDLARIDPDGQLHIVGRA +KELIIRSGFNVYPPDVEAVLNEHPAVTLSAVVGRQITGNEEVVAYVQLAPGHDMTESALS +EFAAARLSAYKRPSEIHILDQLPVTPAGKILKARLRALANQTPDR* + +>3FR_NODE_4#PROKKA_00166 +VIRDKETLNQLIDTISRFVRERLVPSEEQVAQDDAIPEDILQEMKDMGLFGLSIPEEYGG +LGLTMEEEALVAMEIGRTSPAFRSIFGTNNGIGSQGILIDGTDEQKRRYIPRLATGELIS +SFCLTEPDVGSDAGSLRTTATRDGDHYVLNGTKRYITNGPEAGLFTVMARTDPDNKGAGG +ITAFIVEGDTPGLHRGRPDRKMGQKGAHTCDIIFDNCRVPAENIIGGREGVGFKTAMKVL +DRGRLHISGVCVGVAERVLDDALHFAMERTQFGKPIAEHQLIQALLADSKSEAYAGRCMV +LDAARRKDAGENVSTLASCAKLFCSEMVGRVADRAVQVHGGAGYMAEYAVERFYRDVRLF +RIYEGTSQIQQLVIARNMVREAD* + +>3FR_NODE_4#PROKKA_00167 +VVDDNSDLGLDAALDPQNGAKDRKFVTALARGLEVLRAFRPGDGFLGNQEIARRTGLPKP +TVTRLTYTLTKLGYLSYSQRLERYSLGTGALALGYATLSTFGIRQIARPLMQELADDVDA +SVSLGARERLSMIYLENCRGSGAVTLRLDVGSRIPIATTAVGRAFLAALPEGERNYLMDH +IKRHAGNRWPPVRRGIERAIRQYQETGFVKTVGTWERDVNAVGVPLVQSDNGNIYAFNCG +GPSFVLPEERLDAELGPKLKQLVQNVEAALRRL* + +>3FR_NODE_4#PROKKA_00168 +MLDAYLYDGLRSPFGRHAGALSPLRPDDLLATVIQALIARSGFAKEQIEDIVIGCTNQAG +EDARNVARHAGLLAGLPVETAALTVNRLCGSGLAAVADAARMITCGEGELILAGGVESMS +RAPFVMAKAESAYSRQLRTFDSTIGARFPNPKVLAEFGSDTMPETADNVARDLGISREAA +DAYALQSQQRYEAARQDGFYREEVLPVEVPQGRKQPPRWVSADEHPRPDTDTAKLARLSP +LFEEGVVTAGNASGINDGAAALLIGSRSVGQRLDIKPRARILSAAAAGVPPRVMGLGPVP +AAQKALARAGLSLNDMDIIEINEAFAAQVLGCCQQLGIAGDDPRLNPNGGAIAVGHPLGA +SGARLTLTAMRQLERINGRYALVSLCIGVGQGVAAVIERM* + +>3FR_NODE_4#PROKKA_00169 +MKPFTWEDPLLLDLALDSDERMVRDSAHDYCQNKLMPRVLEANRHEVFHREIMNEMGELG +FLGPTIPEQYGGAGVNHVCYGLIAREVERVDSSYRSAMSVQSSLVMHPIYSFGSETVKQK +YLPKLASGEWIGCFGLTEPDHGSDPGSMITRAKKVDGGYRLSGAKTWITNSPLADVLVIW +AKLDDTITGFVLERGMEGLETPKIEGKFSLRASVTGQIMMDDVFVPEENRLDVTGLKGPF +SCLNKARYGISWGSMGAAEFCWHAARQYTLDRKQFNRPLAANQLIQKKLADMQTEITLGL +HGALRLGRLMDSGDWAPEMVSLLKRNNCGKALDIARTARDMHGGNGIADEYHVIRHVMNL +EAVNTYEGTHDVHALILGRAQTGLQAFTG* + +>3FR_NODE_4#PROKKA_00170 +MVERKHGGLIVAEHLQAAGISHLFALCGGHISPILVQAKALGIEVVDVRHEASAVFAADA +MARLTGRPGVAAVTAGPGVTNTITALKNAQMAQSPVVVIGGATPTVLKNRGSLQDIDQLA +LMKSLVKWQTSVGTLAQLDEAMRYALEVAAQGVPGPVFVEAPIDLLYPRDLVHSLYADQA +GLDKMKGPVGRLLRGGLDLYLLRQERQPALSVHPNLKTLTEPAAEWHAARQLGEVVKRLA +SAQRPALVLGSQVLVNRTAEQAKDIADAVERLGLPVWTGGMSRGLLGAEHDLLFRHHRGR +ALAEADLVIVCGFPLDFRLKYGRGFAKGATLVSVNLSLHDLLLNRKPTVPVLAHPGDFLQ +ALADRMSSRAAQWRAWLGELGKRENAREEEIDQQAAAPADKVNPLHFFRTLDRHLGEQDV +LVVDGGDFVATGAYTLKPRGPLAWLDPGVYGTLGVGGGFTLGAAAARPGSRIWLIYGDGS +SAYSLAEFDTYRRLGLAPIAIIGCDASWRQIAREQVEMLGDPVGTDLRDTDYHLVAEGYG +GHGILVEHNHQIDAALAEAIKLSDAGTAVCINLRLAVSEFRKGSISM* + +>3FR_NODE_4#PROKKA_00171 +MGRSDVIAGLSLERLDNIERHIDRKYLKPKRLPGTLTLVARRGQVAYVKAQGLMDVERNK +PVARDTIFRIYSMTKPVTSIAMMQLFEQGRFLLNDPVHKYIPAWKNLRVYQSGVYPQFLT +TPTLRAMTIRDLFTHMSGLTYGFMCRTNLDAAYRELKLDGGKEMTLDLLVERLSQLPLEF +SPGSAWNYSVATDVLGYLVQLLSDRPLDEYFREHIFDPLEMADTGFMVPESKRERFAACY +QFDPEQGYALQDDPADSHFTRPIKFLSGGGGLVSTVDDYYRFAQALNNGGQLNGARIIGR +KTLDFMTMNHLPGNQDLPGLSIGPFSETPYEGSGFGLGFSVKVDVAKSQTNGSVGEYGWG +GLASTNFLVDPVEDLIMVFMTQLIPSSTYPIRQELRSIINGAIVD* + +>3FR_NODE_4#PROKKA_00172 +MHGLMMNRQLLISQILEYAAVNYPEQEIVSRTTEGPIHRYRYPELRDRSCQLAHALAGLG +VTQDDRVATVAWNNYRHLEIYYAVSGMGAICHTINPRLPAEQFQFIVDHAQDQYLFVDLT +FVPLLEKLHPQLNSIKGYIIMTDEAHMPETGLPNAHCYETLIKDQPTRYDWPEFDENQAS +SLCYTSGTTGNPKGVLYSHRSTLLHAFSVMAFPGVDFGEESSLLPVVPMFHVNAWGMPYF +ALITGSKLVFPGPRLDGASLAELINSEGVTDAWGVPTVWLGLLRHMNESGERFSKLEHVQ +IGGSAAPRAMINEFQERYGVEAIQGWGMTEMSPVGSVSQPTPFMRERMSAEEQLTVRGKQ +GRALFGVEMKIVDADGKALPRDGKARGELLVRGPAITSGYYRNDEANAKAFDDEGWFRTG +DVATIDPDGYMEIVDRVKDVIKSGGEWISSIDLENEAVGHPEVAEAAVIGVRHSKWAERP +LLVVVRNPDSAVTAEAIVEYLSERVPKWWLPNDVVFVDELPHSATGKLQKTKLRDDFKDH +RFSDDEA* + +>3FR_NODE_4#PROKKA_00173 +MTQNPDQKAPIWKRPATVEALNAHAKNTMVEHLAIEYLELGPDFLRARMPVDKRTHQPFG +LLHGGASVALAETLGSVGANLCIADPDKAGVGLEINANHIRSARSGWVYGTARPFHIGGA +TQVWEIRIQDEQDRLICISRITMAIVSAR* + +>3FR_NODE_4#PROKKA_00174 +MKQMQNRVAVITGAASGFGLEFARVGAARGMKLVLADVQAEPLEQARAEMEAAGAEVLAM +LCDVRKSEQVQALADKTMERFGTVHLVFNNAGVGSGGLVWENTEQDWEWVLGVNLWGVIH +GVRIFTPLMLEAARRETDYEGHIVNTASMAGLLCPPTMAVYNVSKHAVVALSETLYQDLK +LVNAPISASVLCPYFVPTGISDSHRNRPAELQNDSGPTASQMIAQAMSQKAVSSGKVSAA +EVAQRTFEAIGEDRFYIYSHPEALGNVKHRMEDIVAGRNPGDPFAEAPQIGQMLRDKLQG +* + +>3FR_NODE_4#PROKKA_00175 +MGELVDVYRNSVQTWECDQMGHMNVQFYLDKADAGLLALTRMLGLNRRFLNERQARVRVL +ENHVRFLREQHAGSPLTLRAGLIDIRPDQLKLYFELTNPIQQAVAASFITQAVLESTAGK +DHLTLPQSALEKAQQYQIDWPRPEGPMGLESTPPRTPPTLQEADDLGMMPTYLGAVSAGM +CDADGHLAIRSYMGIVSDAVPHLLSRIRHDTREVPRPGGAALEYRWIYHQRPEQGDLVTL +RSAITHLGNKAYRLGHWLFDAETGHCLATTEAVAVMMDLDERKALVIPQTARASLEEMLV +KGFSI* + +>3FR_NODE_4#PROKKA_00176 +MKAGMVAPLDATQQSRLRVLLALFGLVWLINAGFQAVAWLAAPNASTHFIHALAKSTTVV +PRWVQPLLMTGLHSAQSLGLGIVAAIMVLLAILLGLALLTQRKVAFAARVGIIYSIICWI +FLDGFGFPYANGQTDPGVFVAYAIAFLFVLSVAPVFDREGTKAPEIDERLWHWARIAFGL +LWLFDAVLKWIPPFLLHFSSQITSVIPGQPHWIAAWLSFVAELVHAIGPIPVAVVVALAE +TAIAIGLLSGRWMRLVIPVGMLYSVAVWTTAEAFGGPYSTAGTGVRGNVLGNVLIYLIPF +LFLWVGNSSQRSAAETTGRTLTD* + +>3FR_NODE_4#PROKKA_00177 +MVKKAPEVDVPSAKDRNFVTALARGLELLRAFGPEDDYLGNAELAERTGIPRPTVSRLTY +TLIELGYLRYCERLEKYRLGAGVLALGYRYLSRMGLRELARGPMQALADRTDCLVALGTA +DRLDMTYVETCQGAGPLVLRLEVGSRIPMATSAMGRAYLAALPDARRNEYREKIREVYTD +DYEAIWQGVEQGVEQYQKLGFCTALSDWNPHIAGVGVPLVLDGGSQIMAFNCGGAAMRLS +RSVLEKKLGPQLVEVVAEVQRQMHGRRLEAVS* + +>3FR_NODE_4#PROKKA_00178 +MQRRYLATLLAGLMAVPAVAVADSGSSSSMPDMNIKLHAHLHGSVDFSNTGGRAVPNTEA +YGEAAGTPARATNLSNNNSTIGFTGQHLVPGAFMAIFQVELALPGSETGVNNSYGKGNHV +SKNVGLHDTYFGIANPLGTLLFQPSFENQGAYLSRPFNMFKDTVGDFNSIIDTANFPNGG +PNGLPAISFAGQANYAISYASPKVKGFDAVLSYTEDANGGDFGTNNTYGSGYCAGTPNTN +HYPNCYGYPNANQHNNAWSFGVQYENEFSSLQSKVNGLINYSQINVQGNTSGVPLGGFTF +ADATQPNQTPPSSKLQLKALELAGKWDYEPTGTTAIAVWERSTGLYSRDAYSLGFSQAVP +GNNDLMVSWIHAGNLSSPLANICDPTKTVCSGSEVKQSGANEYVAGIKHHFDKQVSAYLI +YAYTRNNAEGLYGLGGPNHGQSVYPLNPGDNPQSLSLGMTWDF* + +>3FR_NODE_4#PROKKA_00179 +MFDRHHQVWPEFAPLHLTLPETSICYNLEVTAHRYPHKDAIIFYDRRISYGEFQRQVEIL +AGFLAREMGVEKGDRVLLYMQNSPQWMIAYYAILRANAVVVPVNPMNRRGELEHYASDTQ +ARVILCAQELFDQVSPLLGEEGLSRAVVAAYSEYLPDQTDLPLPDAVSEPARAINQSGVV +PWRQALAGEPAAPKALVGPEDHCVFPYTSGTTGAPKGCIHTHQSVMATLVGAVAWNPATA +DSVTLVSLPLFHVTGMQVSMNAPIFVGATMVIMTRWDRRVAGALIERYGVTEWRNIVTMV +IDFLSDPEARNYDLSSLRAIGGGGAAMPKAIAERLHEMTGLTYIEGYGLSETIAATHVNP +VDNPRAQCLGIPVFDVDCRVLDVASGQQQDVGEVGEIVINGPQVFKGYWNRPQATAEAFT +EVDGKSFFRTGDLGYYDEQGYFYLVDRVKRMINASGYKVWPAEVESMMYQHPAIRESCVI +SAPDERRGETVKAVVVLTDDAPADVTEAAIQQWCQDNMAAYKVPRIIEFRDSLPRSATGK +IQWRVLQEEERERAAG* + +>3FR_NODE_4#PROKKA_00180 +MQYSSDEISNSGLDRPRAGGYRLRLALSGLGWGSTALMFALLALALIAIIAFVVIRGGAH +VNWATLSQTTQGYHGLLNAIEGTLLVTVGSLLIAAPVGVVTGIYLSEYQHRRSARFFSFL +CDVMIGVPSIVLGMFGYIAMVNFFGWQFSLLAGCITLSFMIMPYIARTSELALLQVPNSV +REAAYALGAGDRVVIFRVVLASCVPQILNGLLFAAAISMGETAPLIYTLGWSNYMWGGEF +FHHPVGYLTYVIWSFISEPSSAAHQLAYVAALLTTGFALLINILARSTIRKQSQHQSQ* + +>3FR_NODE_4#PROKKA_00181 +MKTNRLFRWSITGIASVIPLALLAIFIFLLINSWPAIKFSGWHFLTGSQWSLGNEYGDLV +TVNGQEVPPGADYGIGFLIAGTLLSSFLALLIALPISVAASAFLAEAVPKRLQNTLALFV +ELLAGVPSVVFGLWGLVVLVPFMNHYIYPGLVHVLGDVPFFQPPTGAGYGLLTSSVVLAV +MIAPLITSTVRGAIERVPMVQREAGLALGATRFEVLWKTVLPSVRRVVIGAGILALGRAL +GETMAVLMVSGNALGYLPHNIYSPISTMAAFIVSQLDSALEDASGMATHALSEIALILFF +ITLIVNVIARLLLWLARD* + +>3FR_NODE_4#PROKKA_00182 +MKGKFSLKSTRTGSMMAKGALAAALLTAGMGVANASTTLQETGSSLLYPLFNQWIPAYSK +AHSDIQVNAASTGSGTGIAQSIAGNVQMGGSDAYLSGAMMKKHSDMLNIPVAISSQMVNY +NVPGLNDKHLKLSGPVLSRIYEGTVKYWDNKEIKAMNPGVDLPHHRIVPVHRSDGSGDTF +LFTQYLSFSHPYWHKKLGYGTTVNWPAVQGEIGATGNPGMVQALKDNPYSVAYIGVSYKG +QIDKDNLGEAMLKNKAGNFVLPNSTTVPAAAAAMVPKTPKDERISLIFAPGAKSYPIINY +EYVIMHANQGDLAAPLKQFLNWAVSPNGGNASQYLGAVNFMPLPKKAEELTKAQIAKIHS +* + +>3FR_NODE_4#PROKKA_00183 +MGTTFRPYSPDQELLLPPSLNEWLPEGHLAYFVSDVVEELDLSALYARYDGDGRRNSPFD +PRMMLKVLIYAYATGTFSSRKIARKLEEDVAFRVLAAGNFPRHRTICDFRKQHLAAFKAV +FIQVVRIAQEAELITLGTLAIDGTKVRANASKHKAMSYGRMQEEEKRLSKEVDELCRQAR +RTDEEEDQQFGPDQRGDELPEELQHRQARLDKIRAAKEKLEADQKERDKARGRSPDDDRR +SPRGGRNFKRDYGVPDDKDQSNFTDPQSRIMKTSDGFQQCYNGQLAVDGEFQLIVANHQG +SNPSDNGCLLPLLNDVKDTLGTYPRQCLADAGYRKEGDLQTLEVNGIDGYVSLRREGRKP +GEIDATRYPATARMAEKLATAAGRSVYGQRKHLVEAVNGWIKHVLGFRQFSLRGLNAVQG +EWDLVCLSLNLRRMSTLMRMV* + +>3FR_NODE_4#PROKKA_00184 +MTMITPSYLGETIEYSSLHACRSTLEDPTRKQPRNTSTNISMRPGEEDKDPLGDFLASLE +RLSALPENIRVLPSHGLVFEGLHQRLKALQRHHELQIDRLLERCEHPQSARDVLSLMFRR +PLDEHAILFAMGESIAHLHHLRLQGKLSQVEQAPFRYIRN* + +>3FR_NODE_4#PROKKA_00185 +MEAITNGMMNLNRQRHQHLVALRIIFAHGENGGEEVVHIGHVAIKTGDTFPGTGRVDLLC +LTHCLTSTD* + +>6FR_NODE_12#PROKKA_00186 +MTAPRWSATRASASSQSSRAPPGREACSCRRPSSRRRRAGSSSRARPREPPLMAALRRAS +WGSSPRRVSSSTTSSGSRGEKFTRRQRLTRVGSTSRGSCTVSTRMLLGGGSSRVLSRAPE +LARLSRPASRITTMRQPPPKGAKLSVRITSRTCSTLIWAEGESEGGSTTKKSSWLPAARR +RQGPQAPQGVSPGSWQLAAANRARAVVVRPTPRGPTKR* + +>6FR_NODE_12#PROKKA_00187 +MKRAPFITLEGGEGCGKSTHASLLAQRIRELGLPALLTHEPGATELGGALRRLLADPAGP +DPCPQAELLLYLADRAQHLEQVIRPALAAGEAVVCDRFADSTQVYQGLARGLGADRVREL +NRWLCGDTWPDLTIVLDLDPALGLARARHRQGKQGLDRLEQAGGEFHRLVREGFLELARQ +EPERVRLIEAAGSRPEVARRIWEVARPLLESWRKTREA* + +>6FR_NODE_12#PROKKA_00188 +VQFVQSSYPLAGLWRPGQETPPASGDTSGSGGVSGGAGFARMLDGKLRQGEPADLSTGAT +RQTGGPVRYSPHSPLLVGASIGAAPGLFTAPVVWSDPTPAAPKGAYRMHSANPRSTFPLR +TVPPPPAPPPSSEHENEDVGLPSAISTYHGSRFRAEVQVGERQEVNADVEQFHFPHLVQK +NGLTYAYFIDHSHGSENDVGLAVSKDGVNFQYQGKVLTKGPEGFDAQMASFPAVQYDGET +NTWYMLYEAKADHDDLNTVCLATSPDGRNWTKHGPVIEPGDAGEISAVDVGTPTMFKEGG +QWHVYFHTLAKDGRVRIGYAHGENLQDLTVNQGPLLDVDPQGIEGGTVGARSNVVKVGDF +YYMAYEVCSPNTDFHRSQWGTNLARASSPGGPWVKMSGRPLLVNDRPGMGMDGPELSLQD +GKLYLYYRHGANATARVELSGLGDSSKMYLAHQSSPGVPV* + +>6FR_NODE_12#PROKKA_00189 +MSKLFGILAGLIGIFLLVASVISFGHLVEDAWVRGGMLASLLFALLLLLGSAAFLLTAVL +LFRMRSHYLPRLYELEELEGLEEPPPKTKDSGESNGPRLA* + +>6FR_NODE_12#PROKKA_00190 +MEARTLFNGALGVKAHVRGLESVSDNIANVNTYGYKATRAQFSDLLYQEMAGGAGFPQQV +GNGALTAVENMMMQAPLEPTENVLDMAINGRGFFTVKHPDRNEGNRYTRAGQFYLDKDYF +LVNSEGYRVQGFAVDADGNVNVNQVQDIQIDNQIQDATATTSVDLAVNLDASDTTEFRQA +VAIDPTDSGTYNFRMGFQVVDEDGDTQDIAVFYQKLESYTGDAPAGSQSVWKAATFHNDS +GTLTADPSYPDNTFFLHFDTNGQLVGVTTGTPATGDSYTSNAEVSSTSASVSDRLGETFA +YTGAGNTQTLRSTATITFSGTTTAGDTVTIGGTNYTFAALSPSDAAAWLADQINANSAGS +YYAQDDASGTVTLYAKDGTAAAEVSASSVVISTDDTMSLTELVNTVDSGRKATGSLFVNI +AGLTAGSSTVTVAGHTFTYGPAQDFTTLSELTTLINDLSEVDATSSGHNIYITAASVGTS +GNSLGLATNDAANVAVSASTLLNGLDDSDATNIDASATTGSGGGQALKLDRTDVGASATI +DVATTNTLGSNLGLDFTGGNFTQNSTASDGNGTSNTTGEVPLTFTFTKSGSTLTQQVTLD +YSPTDGDDSTMLAGDYETFYLKTDGRGTGYLKYLEIDDQGLITAHYTNGQGVPQAALALT +TFIAPQELLREGDNLWRATAAAGVPTVAQAGDAQTAMGEVKSYALELSTVDLAQEFVNLI +NYQRSFQANSKSIITGDEMLKTAINLKG* + +>6FR_NODE_12#PROKKA_00191 +MDHAALPQTLAPGLYRLGSYHLACFLVETPDAALLFETGMSLVAPLILAQLDELGVPREK +IRWIVHSHAHSDHSTGQAALLEALPRAELLLSPTSRRHLAKPSTAEQFAKEDDSTRRALE +RIGALPPGSLPDPLPLLPARHRTVEPGDTLDLGGLTVELRSAAGHVPGGLLAWLPELGAF +LASDSAGFHMAARPNYPLYFTGYREYLRTLEEIRRTNPELLCLGHQGWFRGGEARRYLEA +LKAHLAFEHATIWEAHRRGEDEESQARRLVERYYHDELAIYPRDILWYCCRLLVRRSLEA +GA* + +>6FR_NODE_12#PROKKA_00192 +VNYEPCVKHKVVRNTVRDFAEAELRPIAHEVDQNSRFPWEVVEKMRGLQYFGLQAPRELG +GAGLDSISYAIAIEELSRVCAGIGLCVTVHNSVALYPLLKFGSPEQIERLALDLISGRRI +GAFCLTEAGAGSDAGAVETLALPCDEGYLINGTKIFVTNGGVCGLALIFAKTDLDHPRGA +PSVLMVEKERSGFAVGEIEDLSGMRCNPVSSLFLEDCLVPPENLLGRRGDGLRIGLSALD +TGRLGIAAQALGIAQGAFEAAVRYAKERQQFGKPIARFQTIQNYLADMATKIDAARMLLY +RACAAKDQGQPFSAEAAKAKLFCSATAREVCNLAVQIHGGYGYSKEYEVERYYRDAKVTE +LYEGTSEVQRMVIARAILSAPA* + +>6FR_NODE_12#PROKKA_00193 +MKLVVFLKQVPGVTEIPWDPASGHLRREKAPGMMNPACRHALEAALILKEQHGGELTAIS +MGPPAAEEILREALALGADRAVLLSDPRLAGADTPATSYTLSLAVRAVCPDCDLLLLGNQ +TSDSETGQVGPHLAEELDLPSAINVEELELDGEVLRVKRLCDNFLETLEMDLPALVTINT +QGHPPRQVPLGGVEDAFSRGEFLVLNAEDLKADLARVGMTGSAGRIVKVYPAGGERKGEL +IKGAPKRCVLELLERHGDLLGGYLRKDLGGGR* + +>6FR_NODE_12#PROKKA_00194 +MSRRQQENGAVWVFGDYRNYFQNRVTLQLLARARDLASHLDTKVAVVVMGYRVGRWVREY +VAHGADVVYVLDHPSLKYYLVQTYTRLMERLAGEHQPQIILVGATGFGKELAARLASRLG +TGLTADCVDLTVDDQGRFIQTAPSFGGNLLAQIMIPQARPQMATVRPGTFQELPHDADRR +GEIIKLPLPDDLPPEKARLIHSRRIKPRRRKLEKARVVICGGRGMGSKKKFKNLYALARL +LGAQVGATRPVVYQGWAPEDALVGQAGRDVHPEVLFSFGVSGAIQHTAGIHDAQFIVAVN +KNPAAQMMKMADVAIAADANQVCLALIRELKARLEKKK* + +>6FR_NODE_12#PROKKA_00195 +VNYTDPQELLPVVDAQDRVIGTMTRQEIHAKGLLHRAAHVLLFDPAGRLYLQKRSAAKDT +YPGKWTSSASGHVDPAESYAQCAARELAEELGLEAELRPLGRLPAGPRTENEFVEIFTGV +SAEPPRPNPQEIETGRFFTPAQALKLAADPTRACPSLGAVLELWQELEGD* + +>6FR_NODE_12#PROKKA_00196 +MESEVLQRVAEMLRSPGAVARNRNLLEFESEAGQRAWRCYRLFLSLLAELERAAQSPEVR +VSAQETEGGLQLVLVDPRVSYRRSCLVPPELVELFLDKLTALGLLGGEKT* + +>6FR_NODE_12#PROKKA_00197 +VSRPRYVRTQRHGPVTVVVMDNPATMNAMDQDMGPRLVGALESLAADRSVRAVVLTGAGG +RFSAGGNLTRAEEFLEENPGRGAAPVFAQYTIWVHRLLAVLTRLPQPVVAAVERAASGGG +LGWLLACDLVVLAEDARLSTGFLAIGLAPAAGVSWHLPRLVGLPRAAELLMLGRTLGADR +ALELGLADQLTPPGGTLEAALELAGELARGPAQALAATKQLLGGAARRGLFPQAEAERRA +VLHTADQEEFARRLERFRQRRRRS* + +>6FR_NODE_12#PROKKA_00198 +MPMDWTPPPRGGGREPDINQVVQNLKNRLPVFKKARGLWLAVAVVLAIILGASSYYTCS* + +>7FR_NODE_8#PROKKA_00199 +MMYGDSSAGAGIEAQSLLCPRCGKAQPVRKKLLLVLPEGDKYAYFCAVCGEEVGSKLEES +QGGPSFIPR* + +>7FR_NODE_8#PROKKA_00200 +MRSIVIDELSSPDVDRLSEHLDQTLTPSGLSGVYWLELPEDLLLPLQQEHRQSCGPHRVA +VVVEEGCLRLELLVRAQESLRCNCTAYASSAQRDFLLDYLDRLIEELGLRT* + +>7FR_NODE_8#PROKKA_00201 +MSLPKRITAPLLSGLVLPGLGQLINRQLGKGALLICLMSLFFMSFLFLTVYQVSHAMSAL +GEAAAQSADKWQALRAQLARQGTGWLWGLGTAGLGIWLFAVIDAARVGARLNRSGAEGGQ +GES* + +>7FR_NODE_8#PROKKA_00202 +VVMEEYQKTLARLLAESGGLFFQEGLRLKDGRPTPYFVNLGVFRTGRLALELGRCFSLWI +HHHGLDQDLDCIVGPSYKGSAIAQATAIALYELHGKEVAYDYDRKEAKTHGEATGHGYLF +VTGAALQGGKVLIIDDVGTSMSTKLELLKKLSWLKPRLERPMELLGVVLAVDREQTQAVY +DAQGRVREGVRGPDAMESFRQESGLEVWSLLGIRQALDYLYKEGIPVLIQGEMRPLDELT +MQIAREYLELYGREEA* + +>7FR_NODE_8#PROKKA_00203 +LNRSLQNQISARYWSSHISSRNNIQNNENCACDPRLAPCLLVCRASLIPFLPFLEGAKLK +KLGAFYLGRRRRLQAGKHAGSSRHQSGPGAPRRFLFGGNCDPPWRATHPPPRRQGRPAGR +PLSTMQNKPGKGITLLSSRPPAVPPRRGRSPAAPPSPWRPAWP* + +>7FR_NODE_8#PROKKA_00204 +MVALVGGLAALILGIIGLIGWWDEFIWLLKGAIPPILILGGVLAAYLGLEEMKDKRQAED +ESAREPFTPEEQDVDKYKREVEELKAKLAAMEKEEQQDSSPAEEAPQEDEKKEE* + +>7FR_NODE_8#PROKKA_00205 +VSRDWFPAKVRFMSGGRAQQTPLAIRVEGRWLEVRLLGEELVAPESGLAYVRRYRLEDRR +GRRWELRQRQEGWFCRELH* + +>7FR_NODE_8#PROKKA_00206 +MDQSKALAKYREFRPAWKSFLPYFLGVVIFTVGPRVNPQAPISPDLSHLIATCFLAFILI +TRFSNLYELADGRLGWRRSFPRALERQAPVEQITRIDLRRGIFHRLAGVAHVHVYLENQQ +EPYLKLFGVSEPEELRRLLLDLGASDQRVTGAWRK* + +>7FR_NODE_8#PROKKA_00207 +MALLVSFFGKGRRGACLLGLLGLLLLAPPAVAAGAGYVVAPVNKGADLRLLPSEAWPPPE +GSRADILIKLAYLRGIMDALQYLELAPRSAERVLNAYQGLSLQDLAARIDAFYLTDPRRR +DLPPAVVLFRLLAPAGKDAPAVKGPARPGGQGGRSGK* + +>7FR_NODE_8#PROKKA_00208 +MQRSSGSITQLARLVESLAELLRLHYGPDGLPAAQEEDLCSQEVEGRRLGELLAELWPDS +GPWRGPCLGQGAIRETERLLRYRLQQAEAQNRALATRLERVRRQVAAQRRTLLEQLRAAR +LQVEQGQARLEALSAELGGLRAELERRRERARRRRRYLEGVVPRGMNRFADAGGRILDQA +TLRRATRELRELQALVVRTKDACARLEHRWARARLELARARSHEAGLRAELARLEPYWQA +KTHRLARAKVVLAARQEELGPLERNLHRLRVMGLAHAEVVSRGRAALEPLLAPLASGESP +DPVESLEESLTQAGEQARRGRRLTALMERLGRRLERRLEAIGPVLKEQRRLNKEITRLEN +ELPGLLEPLLAGDGADPRNRQEAGARFSLLIARLEDLIPQARATQEQLDELRQALTIGLS +RGKAWQAAWRRAGKAERAALHQAQALVEEVRLAARQAARQAEHLRRRAEPAVKALSPLRS +QDLLPSLAALAQGVSRGQLKARQLEARAAELEGRIPRPYFGNLSKPPVALKPVSAGLRRL +SGKQVELERLAALERAARRWQGLLDGPLVEEIRRPVEQVALRLARSLTLLERQKNLLASR +HQKQGRELSTLKAELDQRRRREELARRRLEQVRDRNRRQQRTIRNYETELKQAQTRAALA +QRLEDELARLGEHAQTLARRLERSDKLAAALKRKSLERHRLYRRSQYAVEWLDYWRERAL +EQEKLLSSARAELELARREYQQARSLLASAVSERDQALKELATERAARARQALDLLGGKA +LSVELAASRSEAGRWAKLAQDMALALAASGEHHRQETADLRAQVDQLSAEAAMLKRQLER +IAAMVEVQVPGLEELADLPPAPSWRRPVALRLVPLGPKQVAQALDRLSAARRRLQNLGRG +TLGHWALIAALTCGLVLTPPGTPSKATRADAPLKPPRPVLRHLAQGSPLTPIYQVPAQAR +LLGDKVARGSLELNLLPLRGQPVAVPQSVKRRLKELAREAGLSPKVLLTSARALYAGQAA +VDPSALEELAHTARQLARRHPLIFRELSRRGLPPAASAVAALAPEPEKAQHLFLDRLYRE +YRSLGFSAEEALGALAANQRAFHRLTRQWTPPRRFIGKVQPVEAVEKMGLREFLQKITPY +IQSKLKVFLRQRGMSYSGDLTLYAKNLAFDMYCAAKKFQVPVTLLLAIAHQETWYANVLG +DANRSASPFQIYEPTRELIIKSMAEAGFVPPPKRIKLQRHLTMATFMASFHLRELMQRAY +TPPRRGRQAVVNLDRVLQRYNGSSRYAAQVALRKRQLARFLRRQG* + +>7FR_NODE_8#PROKKA_00209 +MIGKRIRLERVMDRGTRRTVLVPLVHGVGMGPIEGITDVLNTVDTISMGGANAVVLHKGI +VAAGHRRGGADIGLVIHLSATCADGSQTLVTEVEEAVCLGADAVSLRIEVGGADEDESLA +LLGAVSRVAADWGMPLLALMNPAPIPPAKMPKLLMRAARIGAELGADVVLVPYHKRFAEV +VAATPVPVVAIGGVKKTPPKQMLEMARAAVDAGAYGVSVGRTVFQYEKPGNMIKAICQVV +HRKATVKKAMEILAKKPIESTLYGGTVIW* + +>7FR_NODE_8#PROKKA_00210 +LTTLETLGRFAAGLKADELPPRLGEAVNRCLIDLLGAACAGHGSGAARMVRAVAGPLFAA +GPAWLWFSGRRLASPGAALANAAAASAWDLDDGHRAAAGHPGAAIIPACLAAAQETGASA +RELEAALVLGYEVACRVAAARDLARLPTMASGRWVAYGVAAAAGRLHGLDAAGLAQAMAV +AGVLSPDLAAAGYSRLMGNLVKEGIPWATLTGLVAVKLAAHGFSGPLDILDHPDYYQAPG +ITAGLGGGQWAIEQVYFKPYACCRWCHAAIDALLALQDEQGLDADEIAEIQVHTFERALR +LSNETGPATLEGAQYSLPFTLAVAAVEGAAGLLPLRPELLGREDLVELAGRVRLEVDPEL +EAMFPERSPARVVAMTRSGRRHEHTVLDPLGDPANPLSTARLEEKFRALTAGLLPPSRQQ +ALLVRIHALEPEGLPPLLEELGRPLQPEK* + +>7FR_NODE_8#PROKKA_00211 +VARLSEDERRIRRMVGSRIRELRQALDLRATELASRAGISQSQLSKLENGKAAISIPVLT +SLCRVLDRPLSYLFQKEEEIPRVLGTMTTVSGPENRGLEWFAAEVNRRSGGRMSIIPLWA +TMLGSAPDQVAMLRQGVIDLFIEELIFYQHIAPAVKLISLPYVFADDAHLLSFLESPFFQ +ERVHGPLTKSGIRILNRRWNWRRGLERVLVARQPVTRPEEVKGLKVRIFDSPALARFWEE +LGARPVVVPWPRVREAWEAGEFDLLPTHRSHLYPLGFCRQGRFVTLLGDVPPALAVTVNE +QKYLSLPPAVQAALEESCDAAGGFFSIEIRRAEVDNQAANLAEYGAVYLKVDLEPWRRAA +GRVVERMAREGAVDLDAWQAVQELRPAGEGA* + +>7FR_NODE_8#PROKKA_00212 +MSNRDIHNATGDSVKMNALVLGAGLQGKAVIHDLSRSELVDRILVADLDLAAAQRFLDKG +GYHKVRAVQADALDPAVLRRLISENRSDIVVCMLPAHLSGRIAEVCVECGVPFVNTSYAQ +WLGELDQRARDKGVILLPEMGFDPGIDLIVGRMALDELDQVEGFYSYGGGLPDPAACDNP +LKYKITWTFDGVLKAYCRPARLLRQGRPVEIPGDEIFQEENIHFIEVPELGRLEAYPNGD +ATRFVEVFGLGPELKEMGRFATRWPGHSAFWRIMAKLGFLGDQPVELGEGVSVSPREFLV +KLLEPRLQFRENERDVVVLRVKVWGRRGGRKRTVTYDLVDYRDLATGLFAMNRTVGFAAS +IGAQMVLKGEITGAGVLSPVKVVDGQRFLDELAARGIKVQRRLEEE* + +>7FR_NODE_8#PROKKA_00213 +MTLRVAALVPTLRLWADTLTELKRSRSAVVGGIMLALIVGISLTFPLYYPVDPLAQDLMA +RLTPPAWQAGGSLAHPLGTDNLGRDVLARILYGSRVSLLVGFASVLVAEALGIVLGLLSG +YYGGRTDSIIMRIADVFMAYPFMLLTISIIAVLGNSIFNLILVLGVSDWVTYARTVRGSV +LALKEKEFVKAAHSLGTRNRTIIRRHILPNILSPLLVLGTVRVANIIIWESGLSFLGMGV +PPPQPTWGRMLAEGRAYITDAWWLVTLPGLAIMLTILAINLLGDGLRDALDPRLRNV* + +>7FR_NODE_8#PROKKA_00214 +MGRYILKRLWHTVYVVVGISVIAFFFIHLSGDPVMLMLPADASHQEIEELRQQLGFNDPL +YVQYWRFATKAVQGDFGESLYYHVPAMELILERLPASLELALAAMAIALVVAIPLGILSA +VKRGSFIDLGSMLGALFGLSMPHFWLGIMLILLFSVKLGWLPTSGRGGWEHLIMPSLALG +MSLMAMFARLTRSVMLEVLGQDYVRTARAKGLKERLVIGKHALKNALIPLVTVAGMQFGF +LIGGTVIIETVFAWPGVGRLVVQAIFNRDYPLVQATVLVLAVLFVLVNLLVDLLYVYLDP +QISYLEEK* + +>7FR_NODE_8#PROKKA_00215 +MKKFKRLCLALGVAALGLAILAGPALAKKDVLVVIQEAEPVGLDLMTSSIQTTMSVCYNI +HDTLFAPQEDASVKPRLAESWEKVDDLTWKIHLRRDATFHNGEPVNAQAVKFSFERSFKP +SIKNPHKGKLSAFKEVKVLDDYTLLISTKEPYAPGLYILGYYLPIVPPGYIKKVGDAKYN +TNPIGCGPYKLEKWVRGEEIVLTAYDKYYGPKPAFKKVIFKGVPEEASRIAALLTGEADV +ISGVSIHQRKRILASGKAYLTNQMGVMPYLGLNTYKPPFNDVRVRQAMNYAVNRELINKA +LFGGKAILCAGPISPRTFGHDPNLKPYPYDPAKAKKLLAEAGYPNGFQTRLAYPTYMSQI +QEQAEAIAADLAKVGVKVRLEPYERAVMWQRYKARKHAMYIYWWDDAPEPDRYMYSLFNS +KVRDYYYKNPEVDKLLDLGRTILDRKKRAEVYHKIDRLLYNDAPWVYLYVIPEVFAVSNQ +VAYQGRRDGFLDMRTAKPK* + +>7FR_NODE_8#PROKKA_00216 +VAEVLRVKELVKHFPVRQGFFGRRQGVVHAVDGVSFTLEENQTLGLVGESGCGKSTIAFC +LLRLIDPTAGEVWFQGRNLAAAGSEELRRLRRDIQIVFQDPFGSLNPRMTVAQIVEEPLL +NHLELSAARRRELVAEGLSMVGLLPEHAQRFPHEFSGGQRQRICLARALVLRPKVIICDE +PVSALDVSVQAQVLNLLSRLQRQLGLSYIFVSHDLAVIRYVSQRVAVMYLGRIVEQAGVK +ELYARPMHPYTQALLSAVPVPNPRRRRRRIILEGDVPRPLDPPSGCHFHPRCGRAMEICR +HQAPELRPLADGHLVACHLYDEVRSAPGGTVEGG* + +>7FR_NODE_8#PROKKA_00217 +VARAVDGVDLTVGRGEILGLVGESGCGKSALALSVLRLLPMPPAFFAGGQIRFKGRDLLK +MDPEELRRLRGNQISMIFQEPMTALNPVFTIGNQLGEVFRVHQGLARREARRRAVEMLEM +VGVPAPARRVREYPYQLSGGMRQRVMIAMALACRPALLLADEPTTALDVTIQAQILELIL +ELRDELGTAVVLITHDLGVVAETTERLAVMYTGRIVEQAPTVELFDHPLHPYTRGLLEAI +PSAEAELADKELHEIRGVVPSLLDLPPGCNFAPRCHLADERCARQEPELVEVRPGHRVAC +WRVDRG* + +>7FR_NODE_8#PROKKA_00218 +MTLMVDVALAAPLWQPLTYAVPAELAPLVKPLSRLLVPLRGGARLGFALGEPLAAGGGQD +ALKPVLDVLEDGKGPQVWPPELLPFFQRAAAYYHVPLGQVLAWCLPAGMGSARPAKALAP +KTQQVAVESWRRGEDSRLPRPESQAARILRRLKARGPLPLPELREEFPRAAALCRDLEKR +GWVTISHRPLVKDLLGRPLLPEPEPEHYTPDQQRALDELLPAVHSGGFKSFLLHGVTGSG +KTELYMACVKAALEAGRTALLLTPEIGLCLRLEGLLRQRFGAGQVAVLHSGLSPAARRGQ +WLAIARGRARVVVGARSAVFAPLREPGVICVDEEQDEAYKQEDRFRYHARDLALLRGREQ +DCPVVLGTATPAVTTYHRAQEGNTVCLRLPRRVREAPLPRMELVDLRREGRLVGGFLSRR +LLAALEQTLEAGEQAILFLNRRGFAPAYLCTACGQTVGCPACAVSLTLHQGSDRLVCHVC +GHQRPRPRSCPACGAGEEKLRPLGLGTEAVAQKLGELLPGARIARLDRDTADDPRRLGEL +LRAIAERRVEVVVGTQMITKGHDFPGIGLVGVLSADQALALPDFRAGERAYGLLTQVAGR +AGRQGGKSRVIVQAYDPDHHALRAALAQRPDEFYQTELAERRALGYPPFMRLVALRLEAV +DDRRCQRAAQALAAGLEEARRRLEPGARVLGPAPAALPRAKARHRWMILLKAPTAAAAGR +TLRLGLHRSPPLPAGVRLLVDVDPVSLI* + +>7FR_NODE_8#PROKKA_00219 +LALHTMAYLAAHPGRLISNRVIARDLGVSAAHLSKVLQRLARAGLLESLRGPTGGFRLGR +PAGEISLMEVYEAIDGKFQPSSCLLGRPVCRGGKCVLGELGRNLERQTREYLLNTKLSEF +EDFMCFEEGN* + +>7FR_NODE_8#PROKKA_00220 +MPLGKGRGGALPWDPRAEAALGRVPFFVRSLVRRKVEERVAEAGGRRVGLEDFQEAEAAF +RAVRAGKSQKELEAMLPAENRPGVEMVVVQACRSRLSNCPNPLIDTQKWLERVQAWVEEL +DLSERLRRRVADDKILFHHKLKIAIAGCPNGCSRPQIADLALVGMTRPRLVEPEVCTACG +ACAEACPDGAVSQDDGPPEFHRELCQGCLSCSRACPVGGIELDPPGVRVLMAGKLGRHPH +LARPVMEATGPEPVLAYWTRELEEYLASAPPGRRFSAWWLEQHPAG* + +>7FR_NODE_8#PROKKA_00221 +MPIPGRLLTTAMAVMPHTDVDQALASALSLDIPFWPQLPRVNYYEDMYVQASEHFPGMVV +DHKERTLVFSMDKFMVELEETLAHLEEPEYFDISPEYSVVYHRFLELELADRPAIRGQLE +GPISFGLNVKDQDDRPILFDDTVRPFLLEVMARRVNVQLTRLKARNPNAFMFVDEPGLQF +IFSGLSGYSDRKAKEDLDQFFAAIERPRGIHLCGNPDWDFLLNLDLDILSLDVYSNGEVF +SSYARSIKRFLDRGGVLAWGLVPTNFEPFSAEDHVSLKARLKEIWSALESKGVDRELMLE +RSLLSPATCCLVNPDGEKTVDKAFALVRALSAELRDEYGLDG* + +>7FR_NODE_8#PROKKA_00222 +VEVDISLFSNQNQFVILRVGEQAYALPAAQVREMQVLPEVTEVPRAPAHLRGIISPRGEV +LPLFDLRRRLGMRSLAEEADELLKILEAREQEHKQWLEELESCIREEREFTLPTDPEKCA +FGQWYQNFTTEDLALASVLERLAAPHRRVHEVAGAALEALEKEGQAAAQEVIDRARRIIL +PKLLELFAELKRLIRETHQEIAVILESGRHTLALAVDNVDSVELLQPKDLQNLERFGPVD +GSQDLLESVGRRANGETVYILKTAEFFQAATDLTF* + +>7FR_NODE_8#PROKKA_00223 +MAHGQEPAYEMKSEPAGNGELRVDLSGRLDMNALEGAVDQFGRLLKEQRPRRVELAVGGI +DYLDSGGALALTLMEEAARKAGTKFQLVQAGPEVRGMLALVDMDKIRRRPLRPAERGLGF +VEQVGQASLEVWRDFVELVTFLGDFLIALGRSLRRPRLVRWQETFFYMEQVGVNGLPIVG +LISFLLGLIIAFMSSLQLKTFGADAYVAALVSVAMVRELGPIMTAILVAGRSGSAFAAEI +GTMRVNEEVDALEVMGFDPTDFLAMPKVLAALAVVPMLTIYSCVAGILGGMVVGIWGLGL +TPYTYLHHTIDSLSAYGIVTALIKSVAFALIIAGIGCQRGFMVRGGAQAVGSATTSAVVT +AMFLIIVADSAFAILFYYVF* + +>7FR_NODE_8#PROKKA_00224 +MSAEDPIIEVRGLKAQFGEQVILRGVSFAVARGEVVVVAGGSGCGKSTLLKHMLGLYQPA +AGSVLIDGVDIAQADAAQLEWVRRRIGVLFQSGALLGSLTLLENVMLPLVGFTPLSRRGA +ELVARLKLSLVGLSGYENHLPSELSGGMQKRAGLARAMALDPQVLFFDEPSAGLDPVTSA +ELDLLIKRINRNLGTTMVIVSHELASIFEIAHRVILLDKQAKGIIAMGPPQELSGVPRLF +PGGIL* + +>8FR_NODE_2#PROKKA_00225 +MCNLLIKFGAFNDRFGKDFDRISTGHYATRYNTDEGVFLSTAADRVKDQTYFLGQITPEQ +LAKTMFPIGHLQKKEVRKIASDMKLPSAHRPDSQGICFLGKINYTDFIKKYAGEKPGEII +ELETGKVLGTHKGFWFHTIGQRRGLRLGGGPWFVVKKDIEKNIVYVSNGYDPIAQYDDKI +WLEDLHFLNKVHDYSKLNEIKFKIRHQPEFNSGKLVRDEKGIRIVSENKISGIAPGQFAV +VYDEEERTCIASSVIAENPEIAV* + +>8FR_NODE_2#PROKKA_00226 +MNPLLLFILIVSGGLYLAGHYLHKPILKYIFKPFTTFIILFFAFMQLPDVSVQYKDYILI +GLLISLIGDIFLLWPEKRFIHGLGAFLLAHVLFILAMVSDFGPYYNWQYLIPIALYMVIF +LWIILPKSGKFVIPIIVYALVLMVFFWQAAGRAIYLAESSSMQAMFGATLFVASDSILAY +NKFVKNYKWAEFFIIITYWAALYFIALSV* + +>8FR_NODE_2#PROKKA_00227 +MDWIIVIILAILLVAFIILFLLIQKGFRNPVSEHTIPEDLPFDVQEVEYPTKNGKTIYGW +WIPADPKAATVVFVHGWGRNAQRMMPYLRKFCCGKFNLLAFDARGHGNSDHDGFSNMLQF +SEDIIASMNFIEQEHKAENNMFYLIGLSIGGAASIYAAGHDPRVKKVLTVGAFAHPASVI +TKQIKDRHIPYFPMIWFLYRYMKYVKNLDVDAIAPEKHIAKAQAHFLLVHGEIDQTVPVE +QGKRLKKAAGDKADLWLMPERGHSDCHLEHGFWEKLMEFFEAPKTKVQKS* + +>8FR_NODE_2#PROKKA_00228 +MKKFHLLLLSLLSGLLLAAAWPLHGFTPLIFIALVPLFFVQQQMGDTGKRGMLLYAWLTF +LVWNGLTTWWIWNATPVGAIVAIVLDSLFLAIVFQVFHLSKKWLFNNKQGFFILIFYWIA +WEHFNANWDLSWPWLTLGNVFASKHLWIQWYEYTGVLGGSLWILSVNILIYNIIKSFLEK +RKQRALYTTILTVLFIAVPIIISLNIYHHYKETKNPVNVVVVQPNTDPYTEEFNLPPSAL +IKRNLKLAEQKVTDSTDYVVFPESTIQEQIWEGSLNRSQSIKTLRNYVMEHPNLSMVIGA +STFRWLKPGEHRTNAARFYKKGLYYYAYNTAFFIDHSPYIQVHHKSKLVPGVEKMPSWPI +LKPLEHLALNLGGTVGTLKEDDHVSLFTNDSSGTKIAPMICYESVYGDYVRQYVAHGAEL +IFVITNDGWWGNTPGYRQHFSFAILRSIETRRDVAQSSTTGYSGFVNQRGDVLQRTKYDE +KAALSQTLNLNDKLTYFMKKGDYLAHLAGFFSILILLAAIVQGFLKKRNLPH* + +>8FR_NODE_2#PROKKA_00229 +MKTFLDLVNQRQSDRKYIDKPVEKEKLMRCLEAARLAPSASNSQPWTFVVVNQPELCQDV +GKAAMGPLYSFNKFASQAPVILAIIMEKPKVITEVGGRIKKKEYPLIDVGITAEHFCLQA +AEEGLGSCMLGWFDEKKVKELLHVPEEKSIPLLITVGYTPENYKHRKKIRKPIDSAVKFN +TYG* + +>8FR_NODE_2#PROKKA_00230 +MAKQKTQEKIWYAIYVKSRAEKKVAIELEAEGIDFYLPLEKRLKQWSDRKKWVEEPLFRS +YIFVHISHKDYYRALVQNTVKYVTFEGKAVPVPPEQIEAVKVYLEEKEPIQPNDEDWETG +KEVEVISGKLTGLKGVLMEVKGRSRVKVEIEVVSSSIILHIPKSKLRLLE* + +>8FR_NODE_2#PROKKA_00231 +MGVNLLDIILAVPLIFFGYHGYRKGLIIEVTSLAAFILGLYFAFYFSNFTAGILKEYFTI +QTKYMAAIAFVVTFIVVLLIVLAVGKIVEKFIDILLLGFLNKLAGGLFGVLKGALFLSII +IFVINYFDASHSIIKQKAKDNSVLYKPVESIAPALYSWLHLKNFDFHLPSEESVIKTITH +RANPD* + +>8FR_NODE_2#PROKKA_00232 +MGLSYRINFKLGANAMVISDPSMPVPASLQFRQLKSDEYVMLDIPTDSLEVAYWGTKKPV +PAQYVLTEAQTAKVESAITSYNAEIKSLAKKYNLAFVDFNSIMKSIEHGGLTVDGIHFTT +AFITGNLFSLDGVHLTPQGNAVVANYFIQAINKQYGSHIPSVMVSDYPSVVFP* + +>8FR_NODE_2#PROKKA_00233 +MSGKLQAEFRSDLKWQPAILRKILILPSKQVLIILSLMAVVVVQARPPLF* + +>8FR_NODE_2#PROKKA_00234 +MKIIKLAMFSKTQTMLQHKDINKLSELKNGFTQSWVEPDFIFRSLKCFSFSSLNKGLSPL +KAKGYSFEWVMSLLISLPFMGISSVNRLAGVVEAKKDVFYRLKNNSSISWRYIQWLFACK +FNTITSESTGNNIQPRCLIFDDTVIEKTGRFIEKVSRVWDHVQNRGVLGFKLLVMGYWDG +TSFLPLDFSIHREVGKNKEYPYGLRKKDYRKQFKKKRSSQTHGYDRSKEAGQSKIDNMIK +MFKRALSHGFSIDYVLVDSWFTCEAIIQAVTQVKNQTVHLIGMYKIARTLFEYQGTKQTY +SQIRNRLGRPTRCRKLRLYYLQATVGFKGHQLQLFFTRQGKNGKWKVLLTTDCSIGFIRL +VEIYQTRWTIEVFFKESKQLLGLGHCQSNDFDAHIADLTITMIQHMLLTLRYRYDTYESK +GALFENVKETIAIRKLNERLWGLFVELLQILTDLFEIVDAMELLEHIITNGQALERLKLL +FDLVPENNEAA* + +>8FR_NODE_2#PROKKA_00235 +MSQAEAPVIIKPNFKITNFLSKEPRSSLIHEIFSGLTAKQKYISSRFFYDRKGSALFEEI +TKLPEYYPTRTEKSILSAHAKEILGNPESLVIIELGSGDCSKISILFDNFPEQKMSNVKY +IPVDVSESAIIKSAEILSSRYEGLKIHGLLADFLKHLDLLPGATPRLICFFGSTLGNMTR +NQATDFLWNLKNIMNPGDRLLLGLDRVKGPEILYKAYNDKQGITAQFNKNILNVVNDVSG +TNFKTSDFGHLAFYNQNENRVEMHLKALYDMRITSKHFRDDIFIVKGESIHTENSHKFLP +EQIEQLALSSGLKFQASFTDVNKYFSLNLFEYPKLK* + +>8FR_NODE_2#PROKKA_00236 +MPQFFKQLSTRELTERLNNKDVQLIDIRPVDAYNGWASRGESRGGHIRGAKSLPFAWTKY +VDWIEMVHRKKILPENEIVIYGYPDEGFRLVANRFKKSGFEKVSIYLNFLNEWVPDTTLP +MEKLFRFQNLVPASWVNELISGGKPQHFENDKYVIVHAHYRNRDAYLSGHIPGAIDMDTL +AVEDPETWNRRSPEELKQTFEQHGITVDTTVIVYGKFMFPDNSDEFPGSAAGDIGAIRIA +LIMMYAGVKDVRVLNGGFQSWLDAGYEVSYADEPKKPVADFGATIPAHPELAVDTPEAKE +MLASSRAELVCVRSWPEYIGEVSGYNYIEPKGRIPGAIFADCGSDAYHMENYRNFDHTTR +EYHEIEDIWKSNGITPDKHLAFYCGTGWRGSEAWFNAWLMGWPKVSVYDGGWFEWSADPA +NPVETGIPENYPNRN* + +>8FR_NODE_2#PROKKA_00237 +MKAIWNNTILAESNDIVKIEGNAYFPINSVKKEYLKTSETHTVCPWKGTASYYSLEVNGK +ANPDAVWYYPEPSDLAKGIKGRVAFWKGVQVVKD* + +>8FR_NODE_2#PROKKA_00238 +MIKYYLLLAFEKKTLIRAIRVAILVGIILNLINNPDFIFHFSTNYLSLGRVLLTFIVPFL +VSTYSSVLSNSALRTGSVSHIDAILKCKSCNKTHIHVPIGHEVEECPICKKETRWRPVRI +FSGSGNRDELLKSLALFARYNPTPLFRINSDSIINEANQAAKDIFGSDELTGKNLAGIIP +EIKDINLNQLIHDGAIKKTIIHKEGRDYNITLKGIPELNSVHGYLNEVTNFVEPEQKG* + +>8FR_NODE_2#PROKKA_00239 +MKENLEWHKVLEKKEDLPENRIITVNAGSKQIALSHFEGKICALDNHCPHQGGPLGEGSI +ENGILRCPWHGWDYHPCTGKAPGFDDGVATYRVEERGNGIFVGIPPKKPHKTTLSDIMVE +TMVHWGVDTVFGMVGHSNLGLADAFRRQEEKGKLKYIAIRHEGAGAFAASAYGKLKGKPA +ACFSIAGPGATNMFTGLWDAKVDRSPILALTGQVATQVVGTGNFQEVDLVRAFQTVAAFN +HRVQKDSKHAELMSLAIKHALLKRDVSHLTFPDEIQEILEGKEESQTPEGRMGELQISPA +AGSMDKAVDFITKSKRPVVIVGHGARFVMEQIISFAEHLNAPVLTTFKGKGLIPDDHPLA +AGVLGRSGTPVASWFMNESDLLIVLGASFSNHTGITPKKPIIQVDFDPLALSKFHKIDVP +VWGELSTTVNILMKRLPVKPNTVDQRTELAQRWKIWRTEKQKRLLEDRGKGISSIAVFDN +LSKLIHPEAVVCVDVGNNAYSLGRYFESKNQSFLMSGYLGSIGFAFPAALGAWAATRGKR +QIVAVAGDGGFAQYMAELATSVKYNMNIKLVLLNNSELGKITKEQRSGGFKKFATDMHNP +DFAEYARGCGALGIKVSKRKDLKTKMKEFLDYKGTALLEIVTDVLLV* + +>8FR_NODE_2#PROKKA_00240 +MQEYIKTDRQIHWLSQAIAKVNRDYVPAKKDDSHTNLFLDAAGKRLFGRWINTPKGKFIL +ALNLKTLSFEWLDNPLSVKTSISVFDKEGSSIEKEIREFPVSMGMSSKDISKPLHFEIPD +YGFSIIKSNRISSFGIKQWIYYRGLANFACLSVLGYLQSESEIRIWPHHFDTGVYAQVTD +SLGFGFGWAMADSMIGEPYYYLSGYKNSSIIIYNNLSKLNFGRWVTGEQWNGTVLPLHVL +ADNSTAKALEIINTYIKESIDWFLNL* + +>8FR_NODE_2#PROKKA_00241 +MNQEKKKLVLLAFVLLLAGISPNIFPAAQSGIASMSSLAVVLLIPSVVLIFILAILSQAL +GYNDLRKQILNGILAGLAGTVGLEIVREIGFHLGGMPGDMPKLLGVLLLNRFASGPDFWS +NVAGWSYHFWNGAAFGIIFSLIIGRGKIWMGIVYALLIGTGFMVSPATTSLGIGVFGLHF +KDGYQFLTTVYLAHIAFGSIVGLVVYKKNKDAPNIFKRLKLAFS* + +>8FR_NODE_2#PROKKA_00242 +MMVMSQLDQGKILSMTGLSVKSHSFFIGNPNVGNKAFSADPAAGLVIPVRVNVYEENGKT +YVSYFKPSDLFGSFKNAKVKMIGQMLDKKLGMMLKMVTR* + +>8FR_NODE_2#PROKKA_00243 +MSVLMHNNQDGAVRFVFNNLSNKQTKQKSAETLKKLDLYKTMSKFKSTGVVYFFDAKNKS +LISHISLARSNKQLAEALTNSEKMAK* + +>8FR_NODE_2#PROKKA_00244 +MMKKLMNILFLSCLKATELIEKKLYFKLSLKEKVQLKAHKMMCDACTNYEKQSIFLDKGI +SHLNQSKIKKEDLEEIKKSIQQKLNELK* + +>8FR_NODE_2#PROKKA_00245 +MPQTKTTDLTYLVETYTEEMVSWAMYKVSDAELARDLVQDTFLAAAEKMDAFKGESSPKT +WLFSILNHKIIDVYRNKVKQPVSFDSQVFSTYFNERGDWKKEKEPKDWHQEEKQLLDDSA +FQQVLQKCLESLPEKWSTCVKMKYLSEKKGEIICQELGLNPTNFWQIIHRAKIKLRDCVD +QNWFRS* + +>8FR_NODE_2#PROKKA_00246 +MKTIDNYDFKGKKVIVRVDFNVPLNDQFEITDDTRIRATIPTIQKLRESGGAVILMAHLG +RPKSGPEDKFSLRHVVKNLSEKLQTEVQFANDCIGDEAREKAAALKGGDVLLLENLRFYK +EETAGDEAFAKKLADLADVYVNDAFGTAHRAHASTTIIAKFFPNDKMFGYLMENEVKSLD +KVLHHAERPFTAILGGAKVSGKIEIINHLLDKVDNLLIGGGMMFTFIKGDGGKVGSSLVE +DDLIETANAAREKADKLGVSLFIPKDAVTADKFANDANQKCRPSGEIPDGWMGLDIGVET +SETFRQVIENSATILWNGPMGVFEMDAFAEGTVDVAQAIVRATEKGAFSLVGGGDSVAAI +NKYNLQDKVSYVSTGGGAMLEYMEGKTLPGVAAIKDE* + +>8FR_NODE_2#PROKKA_00247 +MSQLANDHKAINLSQGFPDFPISEELIDLVHYYMRKGYNQYAPMQGVLPLRKAISTMFQK +NYGIHYDPVSEINVTAGATQALFSAISAFIKDGDEAIIFEPAYDSYAPAVKINGGMVKYA +HLEFPDFNINWEDFPRLITNRTKLIIINTPQNPTGSVLSEDDLQRLERITSGTDIIVLSD +EVYEHLIFDGITHQSVCRFPELAKRTLVIGSFGKTFHATGWKTGFVLAPERLMKEFRKVH +QFTVFASNTPIQHAIADFIGNEDNYKNLGKFYQQKRDMFVKSLNGSKFNVLPCYGTYFQL +LDYSNISDKNEMDFARWLVEKHNIAAIPIAPFYHKKDDHKVLRFCFAKKDETLVEAGEIL +SKI* + +>8FR_NODE_2#PROKKA_00248 +MNDLKILYIQSRLAWEDAETNRKHFEEIIQKEAQHHDLIVLPETFTTGFPVDPVPFAETE +DGESVLWMREMAAQTCAVVTGSMLLKNDGVYTNSLIWMRPDGTYERYNKRHVFRMGGEHE +KIHPGDKILLVELKGWKIRPMVCYDLRFPVWTKNHYEKDAFEYDLALFVANWPAVRAYPW +DQLLIARAIENEAYVLGVNRIGKDGLGNDYNGHSKVVDAKGNVISEAPENEEAAISVKLS +YEALQKFRAKFNVGQDWDSFTIQK* + +>8FR_NODE_2#PROKKA_00249 +MKRLGKHFAVLILAAGYSGRMGMPKAFLPYDANRTFLEKIVSEYLEFGCNLVGVVLNEEG +MKLYEKMQLEHKNNITAILNPAPEKERFFSLQTGLKRLKSEGAVFLHNVDNPFLTQDILQ +ALASAFKTQAYVVPTYHEEGGHPILLSQEIVKALIETSDYEQNLRVFMESYDQIQVPVSD +PNVLANINSPQEYERLFGRSF* + +>8FR_NODE_2#PROKKA_00250 +MDKEKKQKRYQRLYKQIQDLIVKSSNNPLSNMATINAVLYHKMETFFWCGFYLYQDGKLQ +VGPYQGSLACINLAEGTGVCQAALTQQKTLTVPDVEAFPGHIACDSRSKSEIVIPVRNRE +NELVGVLDVDSKEHNSFDEVDEAELEKIVRLVYFPEG* + +>8FR_NODE_2#PROKKA_00251 +VLIKKAMNRVERGKYGEVHSVLIYKDGKLVLDEYFKGHDYKWEAKKHYGPMVVWDADRAH +SAHSVSKSITSLCVGIAVDKGLIKDIHQSIFDYLPEKYQYLNVGDKKYITVENLLTCSSG +LLWQEWSAPLSSKRNDQVGIYFHKKGPLDFVLNRPFVAVPGQRFNYSGGGVEVLGEIVKN +VSGMAFDEFSQKYLFEPMGIKTASWALKYPTGEVHAAGSLKIRPRDMIKIGAMMLNNGIW +NGKRIVSEDWVEKSRKPWGNNRGIDLPGEDLRDMGYAYNWWTKNEKINGKAVHWFSANGW +GGQQIIVLPEINTVVVLTGANYNRKVKQYALLADYIFPAIK* + +>8FR_NODE_2#PROKKA_00252 +MQQMEILERYLNENKTINEFKKVLWIYDFWGKLTEGKAAKKVLEFAGIKNGISVLDVACG +TGEMLEKVVKLNPDGQNSGIDLSPDMIAKARKKLSKTGHLNFNLKQGSALDLPFPDNSQD +LLINSYMVDLLPVDCFDKVATEFFRVLKPGGKVVMSTFSFGTKKVHRFWFWVARKFPALL +TGCRPVSFKHFLIKAGFEIVKDVEISQNTFPSQVLMALKKS* + +>8FR_NODE_2#PROKKA_00253 +MELNVQIDHVHLVVSVPPKVSVSRLMGILKGKLAIKLFKSYPSLKEKPYWGNHFWARGYF +VSTVGIDEDVIKRYVKYQEEEEKKIETQQQRFDF* + +>8FR_NODE_2#PROKKA_00254 +MRYDYFASQAKKEGLEQISALFTETSLNEKEHAKRFFKFLEGGPTEIVAAYPAGIIGTTL +ENLRAAAEGEHEEWTELYPEFARVAEEEGFKEVAAAFKMIATVEKAHEARYSKLYKNLEA +GKVFQRDGVVVWKCRNCGYLHEGKKAPKKCPACLHPQSFFEVETFGY* + +>8FR_NODE_2#PROKKA_00255 +MNIEAFYSLSYGLYIIGTASKGKKNGYVANTAFQVTASPEQIAISCNKDNLSEQMIDESG +YFSLSVLEKDASKEIINRFGYKSGKTLDKFEGTKYFETNNGIPVVTEECVAWFECKVEQK +VDVGTHIIFIGRVLNGEYLDENKESLTYTYYRQVRHGLSPKNSPTYVDKSLLPEKEKKEE +KAEETPAEKPKGKSMQKWECIVCGHIYDPAVGDPEQNIPPGTAFEDLPDDWVCPDCGAEK +EDFEPIG* + +>8FR_NODE_2#PROKKA_00256 +MFSKIFINRPITAIVISLFIIIVGIISIFKLPVAQLPKVTPPVVSVSGHYTGANASDVEK +AVATPVENSVNGATGMLYMNSTSANSGSFNLNVTFKIGTDVNVDAMEVQNRVNLATPILP +AEIRQTGLSVKKASTSMLEIVGLYSPHGTHDEKFLSNYAALYIQNALSRVDGVGDVHVFG +NSFAMRVWLNPQKMANLHLTTQDVINAVREQNAMIPAGSVGASPAPKGQTFQVTVQVKGR +LVTAKEFGNIVVGTNPATGSVIRLKDIARVKLGSSSYAGTPRLNGKVGCGLAVYQTPGGN +ALETADLVKAKMEQLSKNFPTDVAWTTMVDNTRFVQSSIDEVVKTLFEVLLLVIIVVFFF +LQTWRPTLITMLAVPVSIIGTFAIFTLIGFTINTLTLFAMVLAIGIVVDDAIVVVEAVQH +NIDRYGLTAKEAAIRAMSEVGGPVVAIALILTAVFIPVTFMPGITGMLYKQFAFTIAISV +LLSAFVALTLTPALCSIMMRPNPVNENSKGLNRMFYKFNIWFDKTVENYGATVRKTIKHA +PLMFILLGAIYIGTGLFSKYTSTSFLPNEDQGMVMAIAQLPPDASTQRTVKVLNQFGKIL +NHNKNVKRYFLAPGFSVLQGAQMSNFGTAFIRLTNWSKRKGKNSSIQAVIGQLMGASSQI +KGAKFMIIAPPPIRGLGRTNGFSFVLKQSTGSIQDLEKVQNKFLAALNKRPEIQMAYSTA +TFNYPDIRVTIDRVKAKKMGVSLSALDNTIQTFLGGYYINDFTLFNRTFRVYAQADSSYR +ANINDLSEYYVRNNQGNMVPVSALVNITRGTSAPVITHYNMDRNVNISGNAAPGYSSGDV +IKVLRQVAQQVLPEGYSYEFSGTTLQEIEGGKTSTFIFILAIVFVFLFLSALYESFAVPF +AVLLAVPIGIFGAYLSLHIGGLSSSIYAQIGIITLIGLAAKNAILIVEYCKMKYESGVPL +VQAAVEAAKLRIRPILMTSLAFDLGVIPLMIATGAGANARINIGYTVFGGMLTATLLAIF +FIPLFYVTIIKIRDRKKKPELVKTED* + +>8FR_NODE_2#PROKKA_00257 +MKSEVNGRVEDILFKEGGSVKKGQPLYTINKSLYQAAYDQAAAQLNIAETNWATDTTDAR +RYKNLWAHNAVDKIQLDHAIAKVNVAKASVIAAKANLESAKTNLDHATVRAPFSGSTDVS +KVRLGDVVVAYQTPLVTIVDNSNMNADFFITENDYLQLGSSDKSIKEKLSHFRLVLPNGK +LYPYKGKLYAVDNRVDPTTGTLMVRLKFPNPEDLLKSGMNCVVRSTQNSAGKVVVIPQQA +VTQLLNEFFVYTVNSKGIVSQQKVELGAEYGNMQVIKSGLKPGTKVIVEGIESVRPGAKV +KTVPMKTGGMAKQSKPE* + +>8FR_NODE_2#PROKKA_00258 +MEFKFEKLIIWQKAMEFGEEINSIAHKFPKDEVYNLSSQIRRAVDSIALNISEGSIGQSN +LEFKKFMSYAIRSLAEVVSCLHKAKRRNYITEDEFKKQYEFAYNLMNMMVAFREKIK* + +>8FR_NODE_2#PROKKA_00259 +MLLKKLNQNKFFSAFLAVSFLLSGFSVSLKAQGQKAVYQFNLNDCIHYALQNQASVKNKI +LSEKISRENVKEAYSKLMPQVSAGAKYQYTIKRQVSFIQGNPVLFGVPHQLQGYLNVDQT +LFDPSVLGSAKAAHLSENLSKENTQLSKIDVAANVKKAFYGVLVFREQLNLLNANIKRDT +KSLADTKNQYKNGLAQKVDVDRIQVLVNNDVTARANASRNLNTLIQTLKYHMGMPIKDSL +VIKGTISDAMLTEILPENNPMFYKNRVEFQQAQTTLAATKLLKSNVIRSYFPTLSAFYTL +EAPYNSNTFPGLFKDKLYPTSFVGLQLSIPIFSGFNKHYQYQAAKMNIQISKNNISDLEN +NIKLEYGNYFRQYKSDIANLKTQKENTKLAKLNYDNLKYQYDNGVQPLIEVLNAETTLLQ +AQDNYINALYQALVDKVDLDKSLGKLKY* + +>8FR_NODE_2#PROKKA_00260 +VVLLLSLAIIIAFRKFSSNIHFLENIGGVLGTVVSASFLTLIGIINFFILKNLYRMFKLY +KKGEGAEKRIEEITENLLNKRGLLNRFFRFAYRSIDKSFKMYPLGFLFGLGFDTATEVAI +LGISATVAKDSQLPIWGILAFPLLFAAGMSLMDSLDGLIMMRIYDWAMVDAVRKVFFNMV +ITGTSVFVALAIGTIEWLQVVSIEAKESLSFFSFLNHLDFSVLGVGVVIIMLISWLSAFV +YYRKVLS* + +>8FR_NODE_2#PROKKA_00261 +MDAGEEKASPKKTILIAEDDETSFFFLKFVLAKENVNILYAQSGQEAVDICEAHPEIDLI +LMDIKMAGMSGIEATQLIKKRNPRVPVIAQTAFALSSDKENILKAGCDDYITKPIRKEEL +LEKVNFFLYSKKES* + +>8FR_NODE_2#PROKKA_00262 +MLAISIVILLILSFQVMPVALGIDSHKMNVFHVASSIFLLILGQVLLFLLGILLGDKFMY +LMSGFKRFVLFIGFFIIATRMIMEALEIRKGKRTYLLDKAKQFILPSIAQAINTFLAGIL +FQLLIFNLSKDLIYLGIFALAFSVPFIFIKNEKQSMLAVSLLYMVGGGILSILSFYFLFI +* + +>8FR_NODE_2#PROKKA_00263 +MKKHLNINEPTRIRITKEFKFEMAHALKGYDGLCRNIHGHSYELMVTVSGFPIEEENHPK +LGMVMDFGDLKKIVKEEIVGQFDHALVLSKKMPVPLVDELKNQFERIILTDYNPTSEMML +IDFAARLKARLPENITLKHMLLRETVTSYAEWFAEDQD* + +>8FR_NODE_2#PROKKA_00264 +MYKKLLLLSLTFFMFTSFTNIASAQSNNIEIARLKQIHKLLDYRFVGGFYGFEKLFFQTV +SYPDEARQNCTLGIMIASFTVNCDGDLVGIRIRNSLGKPLDNQVSKFLKATKGHWNPCQD +KKFTHFEIPIQFTLKGTETDSTAAALVYVGKSAGYSCYPDSYYSVPRLFPGGIL* + +>8FR_NODE_2#PROKKA_00265 +MEAITNGMMNLNRQRHQHLVALRIIFAHGENGGEEVVHIGHVAIKTGDTFPGTGRVDLLC +LTHCLTSTD* + +>9FR_NODE_13#PROKKA_00266 +MGKIVGIRFKKGGKVYDFDAGHFVLSVGDMVIVETEQGQALGEVVRPPVSHVLPELAPKN +RCCEGCEDSGDEPAQLKQVYRLATEEDLRQLVENAKLEKEAFRYCQERIAARRMDMNLVK +VECFFDRSKLMFYFTAEGRQDFRELVRDLVSRFRTRIEMRQIGVRHEAKLLGGLGSCGRE +LCCATFLRDFEPVSVKMAKEQNLSLNPTKISGLCGRLMCCLTYEFETYKGLKQGMPKLGK +RVSLNSGLEGKVIRQNVLKRQLTVILSDGREFTGTPEELEQLEPLAKPQAPPKPRGGQRQ +QRNQQQGKGQQQTNSGGKSRSRNRRRKKKGS* + +>9FR_NODE_13#PROKKA_00267 +MSETFYLTTPIYYVNAEPHLGHAYTTIVADVACRYQRLAGRQVRMQTGTDEHGDKIAQAA +AKEGIQPKEFTDRISGKFRSLWPQLNIQFDNFIRTTDPQHMQVVQAILSKVHEAGDIYFA +KYGGHYCVGCERFLTEHEMIDGKCPDHGTEPVYQEEENYFFRMSDYTQPLKEYIRANPDF +IRPERYKNEVLAILDQGLEDLCISRPKTRLTWGIELPFDQNFVTYVWFDALINYLSGLDW +PDGELLERFWTGPKADPQHLIAKDILKPHGIYWPTMLMALAKAEGRPLDHYLYRHLNVHG +YWQVGEGKMSKSRGNVVKPLDLAGIYGVDPFRYFLLREMTFGLDASFSEDLLVERYNADL +ANDLGNLFSRVLNMLSRYRDGLLPELHPQELTEADREMKGALAASLGPGAEHDFQAQVRE +FRFHTALADLWSQVRRANKYIVAREPWVMAKDPDRAAELDNVLYILVQLLASVTHLAWPV +MPATAEKMAAMLGLELVVPVDWQRLFALELMTPGAKAEKPQALFPRVQTDKVKAKAARKE +AKQAQQQPAAKGGGKQKAKPQDKAGLITIDEFAKVELRLGRVLEAGAIKGADKLLKLKVD +LGEPEPRQIVAGIARHYRPEELVGRQVVVVANLKPAKLRGEISQGMVLACVAEGRVRLVA +PEEELPPGSVVR* + +>9FR_NODE_13#PROKKA_00268 +MRARLFKWAGVFFLSLAMAGPAWAASRATPHRRPVLKLHFADVSYDYELKRAMSYAVSGG +ADINECLTAARAITAGDGESWYRGWHRMARRLDQMADQALKAGHRQTARQFWLRASNYYR +AAEFFLHGNPKDPRILSAWGASRRCFRQAARLMDHPVEVIAIPYEGHKLPGYLVKPDASL +KPRKTLLLQTGFDGTGEELYMEVAWYAIQRGYNVLIFEGPGQGGALREQHLYFRPDWEKV +VTPVVDYALTRPEVDPKRLALMGLSMGGYLTPRAAAFEHRLAALVADPGDFDMMVGHRPT +PAEWAGMKKYPKQANQALRAKMKHDTGFRWLVNNGMFTTGRKTPLAFLEFFSRFELTPKI +AAQIKCPTLVVVGAGDHFASPKWQRLLYDNLTAPKTLLRFGPDNPARQHCQVGGLLWGNA +KIFDWLDQVLR* + +>9FR_NODE_13#PROKKA_00269 +MSETPRRPRDSRYMWRGIRPSEEELKTILEDHAQWLERLRSWEYSWREFIEEIPPPHDLS +GADLLEADLSDADLTWAKLSNAILFEADLSNADLREADLSNAKLWWADLSDADLTWAKLS +NAKLLAADLSNAELWWADLSNAKLIKADLSNADLTGADLSNCDVTGVRYHGPWLGIPFIQ +IRKPNKLTCRGIRADTCYGSPRFRRDVMDQDFLEEMRETTGGRWLYRLWWLTSNCGRSFI +RWAFFSLSLAVAFACVFCSSLGPECFDLHRAEGSRWVAEVAGRYLEVDPAYLGTTAASRG +LPGDFWTMLYYSLVTFTTLGFGDVVPLTPWAAFWVTIEVVTGYIMLGGLVSILASKMARR +AG* + +>9FR_NODE_13#PROKKA_00270 +MWQEHIAGRKIGLETGPRPFDPHKPCLLMVHGSGGRGETFRPQLSGLAPYLNPAAIDLPG +HGNTPGPGRDQVAHYADWLAEFIRRGPLRPALLGHSLGGAIVMQLALDHPDLAPALVLVG +TGSRLRVLPAILDGLLSDFDATLDLVLKYAYAPGADPRWVQAGREIMSQPGPRVVHDDFA +ACDRYDITDRLGEITAPTLLIYGDQDQLTPPKYGRFLAERLPDARLEIVAGAGHMVNLER +HAEVNRLIPPFISAFSPPASS* + +>9FR_NODE_13#PROKKA_00272 +VLFTQIIAFVLVMVVYQAYDPAPPDYGWGWGLLLFITGPLLEWLLASVIARSGLRRLARP +AADPARSLQRSEILLHLSALTVFFLFMVSYDLKAGLIATPLLAASETLSGLAALFYYALL +LIPVWGHCHRLERAAGRALALDRRRYILEQARFVAPVAFPWFLVSALRDLLTLAWPGLTA +WLETPAGDLAFLGFFLLVISWLFPPLVRSWWGCPPLPPGRAREICQMVLKVARVRVGGIL +SWDVLQGRLVTAGILGLFPRFRYLLLTPALLEALSPTELAGVVAHEAGHVRLKHIPAYLM +FFMAFFLLAYALAEPLDILLRLALLTLAQSDWGAGLLNSPDAGSTLSITFALPLLALMIV +YLRFVMGFFMRHFERQADLFALNLMGEAAPLVGALEKLALMSGQTRDLPSWHHFSVAQRV +SHLLTAQANPPAWLHRQGRLIKKALAVYLAGMVLVLGLGWGMAGLDWSRQVNQELALELV +RHQLAQHPDDPRLRFQAGMLCYQLGREDRALSHFRRAFLAAPDNPELLNAMAWIFATSQD +PRRRRPQVALVLARRAVSLSPLPHIWDTLAEAYFAAGQPVKALAAARAALEAGPKARLDY +YRAQLERFKRAVEDLKKKGPAGRRPRPAAPAPGGRQG* + +>9FR_NODE_13#PROKKA_00273 +VEPTVPSPLLDRRQRGDAFLRRYWKLAAPLLILFMLALFVLPWVWFSFVEALCLQVGGAG +LLYILGRLFTTAFNPAYHQAPEPQDGDAGRRDPSSSDPPPRA* + +>9FR_NODE_13#PROKKA_00274 +LPLAFHSSSHGVVAFGFFHIETPMLLLEELFFFAPEFCDALVRLAQAPPERDWQGSWTGF +EIPDPASRGDLHGAIAGRRLEGFIGALYARWPFPRDPHQFRQRTRGAAPREVVEQEAESF +GRRREVVLEARAGGGEFAIGQYRFHRTGFLALVDYVWRGGMPGWEGGRRPEWLLEAARRI +QESDSPWLAGLDWDPARLGFTI* + +>unf122_NODE_11#PROKKA_00275 +MISVYDDQSVLFCSWSVVLSGVILRIRILNGRGGRLPITTEARYCEVVLPLMVMSFSVTQ +PPLRCI* + +>unf122_NODE_11#PROKKA_00276 +VKTGAEAIRALATQLPAFRQKLSDGWYQVRIAGRDVSTSGLTAQLHETLPDGAVIHIVPR +VAGAKSGGVFQIVLGAAAIAGSFFTAGATLAAWGAAIGAGGMTGILFSLGASMVLGGVAQ +MLAPKARTPRIQTTDNGKQNTYFSSLDNMVAQGNVLPVLYGEMRVGSRVVSQEISTADEG +DGGQVVVIGR* + +>unf122_NODE_11#PROKKA_00277 +VVSTPEGERYFPCVNISGEPEAYFRMSPEDWLQAEMQGEIVALVHSHPGGLPWLSEADRR +LQVQSDLPWWLVCRGTIHKFRCVPHLTGRRFEHGVTDCYTLFRDAYHLAGIEMPDFHRED +DWWRNGQNLYLDNLEATGLYQVPLSAAQPGDVLLCCFGSSVPNHAAIYCGDGELLHHIPE +QLSKRERYTDKWQRRTHSLWRHRAWRASAFTGIYNDLVAASTFV* + +>unf122_NODE_11#PROKKA_00278 +MQDIRQETLNECTRAEQSASVVLWEIDLTEVGGERYFFCNEQNEKGEPVTWQGRQYQPYP +IQGSGFELNGKGTSTRPTLTVSNLYGMVTGMAEDMQSLVGGTVVRRKVYARFLDAVNFVN +GNSYADPEQEVISRWRIEQCSELSAVSASFVLSTPTETDGAVFPGRIMLANTCTWTYRGD +ECGYSGPAVADEYDQPTSDITKDKCSKCLSGCKFRNNVGNFGGFLSINKLSQ* + +>unf122_NODE_11#PROKKA_00279 +MKTFRWKVKPGMDVASVPSVRKVRFGDGYSQRAPAGLNANLKTYSVTLSVPREEATVLES +FLEEHGGWKSFLWTPPYEWRQIKVTCAKWSSRVSMLRVEFSAEFEQVVN* + +>unf122_NODE_11#PROKKA_00280 +MAEPVGDLVVDLSLDAARFDEQMARVRRHFSGTESDAKKTAAVVEQSLSRQALAAQKAGI +SVGQYKAAMRMLPAQFTDVATQLAGGQSPWLILLQQGGQVKDSFGGMIPMFRGLAGAITL +PMVGATSLAVATGALAYAWYQGNSTLSDFNKTLVLSGNQAGLTADRMLVLSRAGQAAGLT +FNQTSESLSALVKAGVSGEAQIASISQSVARFSSASGVEVDKVAEAFGKLTTDPTSGLTA +MARQFHNVSAEQIAYVAQLQRSGDEAGALQAANEAATKGFDDQTRRLKENMGTLETWADR +TARAFKSMWDAVLDIGRPDTAQEMLIKAEAAYKKADDIWNLRKDDYFVNDEARARYWDDR +EKARLALEAARKKAEQQTQQDKNAQQQSDTEASRLKYTEEAQKAYERLQTPLEKYTARQE +ELNKALKDGKILQADYNTLMAAAKKDYEATLKKPKQSSVKVSAGDRQEDSAHAALLTLQA +ELRTLEKHAGANEKISQQRRDLWKAESQFAVLEEAAQRRQLSAQEKSLLAHKDETLEYKR +QLAALGDKVTYQERLNALAQQADKFAQQQRAKRAAIDAKSRGLTDRQAEREATEQRLKEQ +YGDNPLALNNVMSEQKKTWAAEDQLRGNWMAGLKSGWSEWEESATDSMSQVKSAATQTFD +GIAQNMAAMLTGSEQNWRSFTRSVLSMMTEILLKQAMVGIVGSIGSAIGGAVGGGASASG +GTAIQAAAAKFHFATGGFTGTGGKYEPAGIVHRGEFVFTKEATSRIGVGNLYRLMRGYAT +GGYVGTPGSMADSRSQASGTFEQNNHVVINNDGTNGQIGPAALKAVYDMARKGARDEIQT +QMRDGGLFSGGGR* + +>unf122_NODE_11#PROKKA_00281 +VFDGELSFALKLAREMGRPDWRAMLAGMSSTEYADWHRFYSTHYFHDVLLDMHFSGLTYT +VLSLFFSDPDMHPLDFSLLNRREADEEPEDDVLMQKAAGLAGGVRFGPDGNEVIPASPDV +ADMTEDDVMLMTVSEGIAGGVRYG* + +>unf122_NODE_11#PROKKA_00282 +MFLKTESFEHNGVTVTLSELSALQRIEHLALMKRQAEQAESDSNRKFTVEDAIRTGAFLV +AMSLWHNHPQKTQMPSMNEAVKQIEQEVLTTWPTEAISHAENVVYRLSGMYEFVVNNAPE +QTEDAGPAEPVSAGKCSTVS* + +>unf122_NODE_11#PROKKA_00283 +MPVPNPTMPVKGAGTTLWVYKGSGDPYANPLSDVDWSRLAKVKDLTPGELTAESYDDSYL +DDEDADWTATGQGQKSAGDTSFTLAWMPGEQGQQALLAWFNEGDTRAYKIRFPNGTVDVF +RGWVSSIGKAVTAKEVITRTVKVTNVGRPSMAEDRSTVTAATGMTVTPASTSVVKGQSTT +LTVAFQLEGVTDKSFRAVSADKTKATVSVSGMTITVNGVAAGKVNIPVVSGNGEFAAVAE +ITVTAS* + +>unf122_NODE_11#PROKKA_00284 +MKHTELRAAVLDALEKHDTGATFFDGRPAVFDEADFPAVAVYLTGAEYTGEELDSDTWQA +ELHIEVFLPAQVPDSELDAWMESRIYPVMSDIPALSDLITSMVASGYDYRRDDDAGLWSS +ADLTYVITYEM* + +>unf122_NODE_11#PROKKA_00285 +MAIKGLEQAVENLSRISKTAVPGAAAMAINRVASSAISQSASQVARETKVRRKLVKERAR +LKRATVKNPQARIKVNRGDLPVIKLGNARVVLSRRRRRKKGQRSSLKGGGSVLVVGNRRI +PGAFIQQLKNGRWHVMQRVAGKNRYPIDVVKIPMAVPLTTAFKQNIERIRRERLPKELGY +ALQHQLRMVIKR* + +>unf122_NODE_11#PROKKA_00286 +VADFDNLFDAAIARADETIRGYMGTSATITSGEQSGAVIRGVFDDPENISYAGQGVRVEG +SSPSLFVRTDEVRQLRRGDTLTIGEENFWVDRVSPDDGGSCHLWLGRGVPPAVNRRR* + +>unf122_NODE_11#PROKKA_00287 +MTKDELIARLRSLGEQLNRDVSLTGTKEELALRVAELKEELDDTDETAGQDTPLSRENVL +TGHENEVGSAQPDTVILDTSELVTVVALVKLHTDALHATRDEPVAFVLPGTAFRVSAGVA +AEMTERGLARMQ* + +>unf122_NODE_11#PROKKA_00288 +MSMYTTAQLLAANEQKFKFDPLFLRLFFRESYPFTTEKVYLSQIPGLVNMALYVSPIVSG +EVIRSRGGSTSEFTPGYVKPKHEVNPQMTLRRLPDEDPQNLADPAYRRRRIIMQNMRDEE +LAIAQVEEMQAVSAVLKGKYTMTGEAFDPVEVDMGRSEENNITQSGGTEWSKRDKSTYDP +TDDIEAYALNASGVVNIIVFDPKGWALFRSFKAVKEKLDTRRGSNSELETAVKDLGKAVS +YKGMYGDVAIVVYSGQYVENGVKKNFLPDNTMVLGNTQARGLRTYGCIQDADAQREGINA +SARYPKNWVTTGDPAREFTMIQSAPLMLLADPDEFVSVQLA* + +>unf122_NODE_11#PROKKA_00289 +MTSKETFTHYQPQGNSDPAHTATAPGGLSAKAPAMTPLMLDTSSRKLVAWDGTTDGAAVG +ILAVAAD* + +>unf122_NODE_11#PROKKA_00290 +VTAELRNLPHIASMAFNEPLMLEPAYARVFFCALAGQLGISSLTDAVSGDSLTAQEALAT +LALSGDDDGPRQARSYQVMNGIAVLPVSGTLVSRTRALQPYSGMTGYNGIIARLQQAASD +PMVDGILLDMDTPGGMVAGAFDCADIIARVRDIKPVWALANDMNCSAGQLLASAASRRLV +TQTARTGSIGVMMAHSNYGAALEKQGVEITLIYSGSHKVDGNPYSHLPDDVRETLQSRMD +ATRQMFAQKVSAYTGLSVQVVLDTEAAVYSGQEAIDAGLADELVNSTDAITVMRDALDAR +KSRLSGGRMTKETQSTTVSATASQADVTDVVPATEGENASAAQPDVNAQITAAVAAENSR +IMGILNCEEAHGREEQARVLAETPGMTVKTARRILAAAPQSAQARSDTALDRLMQGAPAP +LAAGNPASDAVNDLLNTPV* + +>unf122_NODE_11#PROKKA_00291 +MKTPTIPTLLGPDGMTSLREYAGYHGGGSGFGGQLRSWNPPSESVDAALLPNFTRGNARA +DDLVRNNGYAANAIQLHQDHIVGSFFRLSHRPSWRYLGIGEEEARAFSREVEAAWKEFAE +DDCCCIDVERKRTFTMMIREGVAMHAFNGELFVQATWDTSSSRLFRTQFRMVSPKRISNP +NNTGDSRNCRAGVQINDSGAALGYYVSEDGYPGWMPQKWTWIPRELPGGRASFIHVFEPV +EDGQTRGANVFYSVMEQMKMLDTLQNTQLQSAIVKAMYAATIESELDTQSAMDFILGANS +QEQRERLTGWIGEIAAYYAAAPVRLGGAKVPHLMPGDSLNLQTAQDTDNGYSVFEQSLLR +YIAAGLGVSYEQLSRNYAQMSYSTARASANESWAYFMGRRKFVASRQASQMFLCWLEEAI +VRRVVTLPSKARFSFQEARSAWGNCDWIGSGRMAIDGLKEVQEAVMLIEAGLSTYEKECA +KRGDDYQEIFAQQVRETMERRAAGLKPPAWAAAAFESGLRQSTEEEKSDSRAA* + +>unf122_NODE_11#PROKKA_00292 +MTRQEELAAARAALHDLMTGKRVATVQKDGRRVEFTATSVSDLKKYIAELEVQTGMTQRR +RGPAGFYV* + +>unf122_NODE_11#PROKKA_00293 +VNISNSQVNRLRHFVRAGLRSLFRPEPQTAVEWADANYYLPKESAYQEGRWETLPFQRAI +MNAMGSDYIREVNVVKSARVGYSKMLLGVYAYFIEHKQRNTLIWLPTDGDAENFMKTHVE +PTIRDIPSLLALAPWYGKKHRDNTLTMKRFTNGRGFWCLGGKAAKNYREKSVDVAGYDEL +AAFDDDIEQEGSPTFLGDKRIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVAC +PHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGI +WTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTT +LGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEES +WLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMSISRICWDTGGIDPTIVYERSKK +HGLFRVIPIKGASVYGKPVASMPRKRNKNGVYLTEIGTDTAKEQIYNRFTLTPEGDEPLP +GAVHFPNNPDIFDLTEAQQLTAEEQVEKWVDGRKKILWDSKKRRNEALDCFVYALAALRI +SISRWQLDLSALLASLQEEDGAATNKKTLADYARALSGEDE* + +>unf122_NODE_11#PROKKA_00294 +MEVHKKQLADIFGASIRTIQNWQEQGMPVLRGGGKGNEVLYDSAAVIKWYAERDAEIENE +KLRREVEELRQASEADLQPGTIEYERHRLTRAQADAQELKNARDSAEVVETAFCTFVLSR +IAGEIASILDGLPLSVQRRFPELENRHVDFLKRDIIKAMNKAAALDELIPGLLSEYIEQS +G* + +>unf163_NODE_15#PROKKA_00295 +LPDPLPRARNTPTYQPAGYVFEDVEHAADLFNLQTFGFIYSRLTNPTVAVLEERIANLEN +GRAAVCAASGHAAQFLTFFTLLEPGDEFVASRNLYGGSITQFGLSFKRLGWTCHFVDPRD +PENFRRAITPRCKAIFLEQLANPSGIVIDVEPVVDIAHEAGLPLIVDNTVPTPYLFQPFD +WGADIAVHSTTKFLGGHGLALGGAVVESGRFDWGQNDKSPGMVNPEPAYHELVFHETFGD +FGFTTKARAVALRDFGPALSPANALYTITGIETLPLRMERHVQNAQAVAEFLDGRQAVAC +VSYAGLPSSPYRALASKYLPKGAGAVFTFGLKGGFEAGVKLGGAGELFSHLANIRDTPSP +VLHPASTTHRQLSEEQQLASGAGPEVIRLSVGIESVDDLIRDLEKGLAASQS* + +>unf163_NODE_15#PROKKA_00296 +MSDRSAQLSLGFSCLGHAYSHMFAPIFYVAALTLENDLGLTHGEVVGLIVIGNVLFGVAA +PLAGWLGDRWSSTGMVGLFFIGTGGGMVMTGLAASPIQIGLSLAVTGLFASIYHPVGIAW +LIRNARARGKALGINGVFGGAGPAAAPLIAGALIEISGWRAAFLVPGAVIAGTGVLFYWM +MARGMVVETKQDALPLPPASRQDTVRAVSVLAVTLLCTGLIYQATQPALPKVFSERLAEI +LGEGVFGISVFVAAVYFTAGGLQILAGHLADRYPLKFVYLVCFILQVPLLVLASAVSGAA +LVVVAMAMVSVNVGALPAENSLVARYAPSNWRGLAFGIKFILAFGISGLGVKLEGVLYDH +TGGFAGLFIVLGASAAVGTAAALLLPSDSRVKAPVAAE* + +>unf163_NODE_15#PROKKA_00297 +VIIIGTTWHSLRDMRNPLKDEGPDPIDRYVGSRIKGKRTGLRISQGNLGKSIGVTFQQIQ +KYESGANRVGSSNLYKISKALGVDVSFFFEGIEAQLEASKSAAAAGMSDQPMVRLEMEPM +NSRHASELAHNYFRIADPQVRKRLFYLVRALVGDESLSDDSGGDTDDDIILDDDDMSMKN +PYDMGGSGD* + +>unf163_NODE_15#PROKKA_00298 +MRNIMIVAGLLVVSAASGGGLYLAQSEPESLSLEVASKQGARAEKARVFGGRAKATAPTL +DGKDGVDSRSRATVARTGASQPGQVGGSGPEGGRIRGSAGGPIAARPSGVSETHHQPGEV +LVANPPPNFENMVVSQGYRIGDLLQLRSLDMRVLRLRTPSNVSVASAVQRLRQQFPRTTT +DANTLYELSQGQGFPESYARMLIGWPGVPQNCGAGVHLGMIDAGVQPDHPALAGTDIRYR +SFHQPGRQPGPANHGTAVAAMMVGKPLPGKGWSGLLPGARLSAANIFQLGANRKIAASAR +GLLEAIDWMAEQRVDVVNMSIAGSNNKVVREVVEKARAKGLVLVAAAGNWGTAKRPAYPA +AYGDVIAVTAVDADRRIYEFANSGQYIDFSAPGVRVWTAIPGGGQYQSGTSFASPYVSVL +TGLIVSRGHPRDPEAVRKTLRAKVVDLGAAGRDDVFGWGLVDMQPRCVKLARAEVWSDTV +NR* + +>unf163_NODE_15#PROKKA_00299 +MPGVVWSIVASGVAVQNRWHTLAITMATVVEQMVANKVVGMMPAGSVEPAATLKAMTPVG +SRGTLEVLMARNRAMALVAVPLMGFSLSSSCRARMPNGVAAFPRPSTLADMLRIIAPIAG +* + +>unf163_NODE_15#PROKKA_00300 +MAGSLMMDARMPYAPEPESGRNRAMGSASAGNPNSSVSDPSGVIKRASAPLARNIATATR +MATRYGMMRIATWKPSRAPSTNTS* + +>unf174_NODE_16#PROKKA_00301 +VVIGVNDRGEKHFLAIEDGVRESTQSWREVLLDLKRRGLTVPPKLAVGDGALGFWAALDE +VYPETRHQRCWVHKTANVLNCLPKAVQPKAKKALQEIWMAEDRASAHKAFDHFVQMYQAK +YPKAVACLEKDRDALLAFYDFPAEHWVHIRTTNPIESTFATIRHRTDRTKGCVSRNTMLA +MIYKLGMSAEKRWRKIRGFSYLAKVINGVKFKDGTEVNSKDRNSREAA* + +>unf174_NODE_16#PROKKA_00302 +MTIKNIFQVRLKPTRITFYATGLGILSIFLSLMLNIFPTDLGHWIFRIIFQISIVGLVAP +LYLLSNNNELKPAGLRYNKAYVYLLISIGIAGSLLFLFWTEDNKLFSKFNTQSFEPAAYI +AVANVYEVIFFVVFLRYYFEKAFGILPAIVLSSLFYSFHHAGFQPEFAKLFFVGLVFFSI +FRVANHWLICFPFWWVGGIVDVLTKAKDISDISGLTWGKSLFVLTVIIVSVAYFQRSSRN +AFASR* + +>unf174_NODE_16#PROKKA_00303 +MSPFSLWISNSFYLATIFYLLSLAVLVWPRRWLENLLLGLGLACNLVSMGIRLYYSWPMQ +APYQEPFWLTACLAAFALCLSLARRQRLVRWLIPVIAGLALLAALFPKDYYLPFPRSNTI +FSHLFLFLSASGKACLWVAGMDALRFALGVDEYRAAGNRRPLFLKLIVWGFVVYTLSLFV +AETWSYLGWSSPVIWEDNNLPATMGTWFYYGCFLHLYLLRGWGLRRRAWFAVVGMALLFY +FNYLPETGEFKLPVSR* + +>unf174_NODE_16#PROKKA_00304 +MALLKSIWDFWGRPGLTFVLLLCLVADAYAGFFLVRAQPDIFASLNRLMHHDWLATYGWH +SLEITWWFFLLLVLMFLLVMNTLVCTLQKVLLLIRHRRRNQDRLGYVLRCSPHVMHLAFI +VILASHLISYAVGINSQNNVVRKGGVIPLPGSPYRLRLDEIQVKFYEGRRLDFFRGRAVS +QRISLTLFDRQQRPTSKVLAINPPIWYRGYSIHLKRYYPSRRSGMRRAPYANLIIRKDPG +ITMFFTGTAIFTLGLLAYLWQAVRERRRETPGDPAREATHAVV* + +>unf174_NODE_16#PROKKA_00305 +MRWFSLFCLALILLIPPAAAWAKAGGCESFGPFSVRTLPQGRSLVPDASGREFHLVPRGQ +KPPAGVALVQVVFIPVKRVAMSGGQDVSLLITLHAMDTRVALTGGRPEDWVLEPVRKGLD +QGRIVSLGGNHAIDYEQLATIKPEVFFTWDESLIPTAAALQVPVVITYGELARNLETQIK +FVRFLAPFFGREKEADQYVARVRAAVARVRERAKGVKHRPKVIWGDIYEKRVLVEPGHSW +AAEIVRLAGGDYLFDDVRGTS* + +>unf174_NODE_16#PROKKA_00306 +MITYRTPKQGITTKAQLAKINPEMAAIRPMTQGRVFYPRKIYKQSGHRLDEIIEEVAAML +HPELYPGHKLKFFGELR* + +>unf185_NODE_17#PROKKA_00307 +MIIPMLCVTIFVILVVWFLLPRVARLMIHTPTAISIFVCLSMVQLMAVFISMRCTLCGHF +LQVALTLILVVGYAIAAS* + +>unf185_NODE_17#PROKKA_00308 +MAVNIHITIIELNALHTKRQNLFQELDAVIHLLMEAKHWGLRKSRNATSLNSRFKSLDTL +KVKIISVTFRRVIRTNHKKASKIWRHENIQLGGCQS* + +>unf185_NODE_17#PROKKA_00309 +MKNINHTRSSISDDIRNILCSSANMRRAIPLILRLLMNSVNLSTNLASHFFDLVIGKILT +SRLSLSKHLAHNLGNLLLLGKIGVFHNRRNASSRLLLLISKHLILCIYLVFRILA* + +>unf185_NODE_17#PROKKA_00310 +LHLGNLFLIVQLHFSKLFLILKSGVNHTSRGSISTSTLPSIKLRKCSSKIITSILSFISG +RLATKSNQIKQLIRNGRSASLQRTFKKSFTSFSHSTRNKTRDGLIRVRNIRALNGRFNTS +ITHAYSIVIGSKHITLNATGGGFLTASKQFRHGATSKSRSNTASNSTKHKSPHLSGWRQI +ISLIT* + +>unf185_NODE_17#PROKKA_00311 +MSNIQTGAERMPHDLSHLGFLAGQIGRLITISTTPVIAGDSFEMDAVGALRLSPLRRGLA +IDSTVDIFTFYVPHRHVYGEQWIKFMKDGVNATPLPTVNTTGYIDHAAFLGTINPDTNKI +PKHLFQGYLNIYNNYFKAPWMPDRTEANPNELNQDDARYGFRCCHLKNIWTAPLPPETEL +SRQMTTSTTSIDIMGLQAAYANLHTDQERDYFMQRYHDVISSFGGKTSYDADNRPLLVMR +SNLWASGYDVDGTDQTSLGQFSGRVQQTYKHSVPRFFVPEHGTMFTLALVRFPPTATKEI +QYLNAKGALTYTDIAGDPVLYGNLPPREISMKDVFRSGDSSKKFKIAEGQWYRYAPSYVS +PAYHLLEGFPFIQEPPSGDLQERVLIRHHDYDQCFQSVQLLQWNSQVKFNVTVYRNLPTT +RDSIMTS* + +>unf185_NODE_17#PROKKA_00312 +MSRKSIVIKQEFRLLGYELNRSGLLAENEKIRPILAQLEKLLLCDLSPSTNDSVKN* + +>unf196_NODE_18#PROKKA_00313 +MDPNLVIPVQQIEDDLRMLSRYNRCIRTYGVGQGLDRVPEVAQSLGMTVMLGAWISPNAS +DNSRELGLAIDVANRFPDTVSALVVGNEVLLRGDLSEQKLVEHLRETQNAVSVPITYADV +TDYWFRHRGVADSVDFVTIHILPYWDDDPVGVEDTMVRVRTLWEQARAVFPDKRVFVGES +GWPSAGRMRGEALPSRVNQARFVRELLNLAQSENMDINLIEAFDQPWKRVNEGTE* + +>unf196_NODE_18#PROKKA_00314 +LFSEPREEKWPLMGPVSDDASWRAHLAWSIAIAVLPMLSVLVFLGNLTPVRWLMLALASH +TAACTLVVAMLNVFQTTMTPIDWAIGLARWGLEAAAFALTVAAFVEASKKGGGKPWVPAF +RDIVVALRARTFRAFRGLGFALGLVRALTLFGAAVVTLGLIFDARYRDFPVAVYAVPAVA +FAVLALYRLDARSADFREEVLLALILAAGGIAITVLEGVANHQAMAWAAVNFVLAGTIAY +DVFSRRRAG* + +>unf196_NODE_18#PROKKA_00315 +MKPPIFSFPVIVLASAIAAMTVGAAVRHGIVQYKAIHNICADAIDFWPCEIRSLVIMTLM +NTPVLGLLALALGLAALIGNRRALIIAALVTGGLGLFLYNTELGACGLLLGALGAVRADV +RSTG* + +>unf196_NODE_18#PROKKA_00316 +MRIKIAITIVLTVSVVLVAMQPIRSPAAERLGSFAIDPGAISISGVSSGGFMANQFHMAH +SATLMGAGIIAGGPYQCARVNSGFGDYLGSYKELWNAIYICSHQAGQIPFIGFMQPFLGP +PDPKKSVGAARAEAETGAIDALENMRGDRVWLFSGTRDTLVPTSVVDAVDASYTELQRAF +TDRPEDTIVYVNDVDVHHAMVVAHKGDNNCLEFALPYINDCDYDAAGNMLTFFYRRPETS +LNPPGEWDRRSLSAFDQTEFFPDGSGRDENGDGSISMNDIGHVYVPANCRAGTTCRLHVA +LHGCEQYQEKVERECGEEGKCEPRLFFETAGYNPWAEANDTIVLYPQTIPWDGPSVTRTN +PKGCWDWWGFSGEDYATKNGKQIRAIKSMIDRLTGS* + +>unf196_NODE_18#PROKKA_00317 +MESGNLFDGMPGTPIAEEAMATVLETPGFRLVRIVSTGQATPYGDWYDQDEAEWVMVARG +RAGLLIEGEGDARTMAAGDHVLIPAHVRHRVEWTDHDQPTVWLALHFSE* + +>unf196_NODE_18#PROKKA_00318 +MREFLAEKWTQILIIASIGVAWITFTALVG* + +>unf196_NODE_18#PROKKA_00319 +MRTAGGGNKRGEKATIQGGWPEGPDEGAMQATTYKTLSQENIADVFPGTAKGQATSASRA +RLKALLSKIGGRDTPLERPGVWHQIPYEPPCDEPVRSLPGREERAKVGPLPGPLAKNAAR +* + +>unf207_NODE_19#PROKKA_00320 +MSQPNQAKSAPEGEMSYRQASEQMIWNLRLDFDPVGIRFVFDENERDFLPVSHRAKARIT +YCQFLAAARQAHYSFFMEPHQCVCGNAQPVFGFRELDKESDTKRHMKYLCDEELAWQAPQ +EKARLEVGALKGIYIAPLSKFDDLEYSPSLAFVMCLPYQAYHIFNDYMGAMRRPNLTFFH +TPNSAVCSGSVYAFNHDTANMTTMCAGSKTSGKSEMAYVNVFIPGDQFLRTAQQQKYRIE +EMGGPSLLGKGGQPWPGLDACKGCPLFKFEALS* + +>unf207_NODE_19#PROKKA_00321 +MALVTLENVGKTYHMDEVEVTALAEISLSIEQGGFAALVGPSGSGKTTALNLMGSLDKPT +TGKVLVAGQDVSALDRKNGAAFRGQKLGFIFQDFNLLPVLTVAENVEYPLLMIRNLPKSQ +RGPAVARVLEAVGMNDQADKYPAQLSGGQKQRVAIARALVGEPALVLADEPTATLDGATA +QRVIELMKRMRDEFGTTFVFSTHDPRIMDQAEALFHLEDGRLVDSPQTGEEVGHA* + +>unf207_NODE_19#PROKKA_00322 +MLKVIRLALKNLLRYKRRSLLTGLLIAFGVVALIIFVGLSDSFKRAVVGQITDSVLSHLQ +VHRKGYMASIDNLPLDRMLHPKAYKKLTSVLSREPGVEAFSPRIKFGAMLSNYAQTTNVR +LNGIDPAREQAAVPLLKSRIKSAAHPDVLLAPGEVLLPETLAKGMGLKTGDTVVLVANNK +DGSVNGMTFKVAGVVESLMGPGGRDGYLPIKGADKLLRLAEPELSEVAVRVRNFVRLGEA +AARLRGVLEPMTNQKGKPMFELHTWTQLTPFNNVVRMIDLMNVGIKVILVAVVLISILNV +MMMSVYERVREIGALAAMGTSPGRIRGLFVAEGFCLGLASSLAGAAIGLGVLAVMSLAGV +EIAFGRSNEVFALAPSIAPTEVISAVLIVLAVSVLASLQPAAKAARLEPVEALRHV* + +>unf207_NODE_19#PROKKA_00323 +MFKNTEKALVNITLRLFLLGCIILSMMLAAPAWALSGQEILEQVDHNLLPGDLEMYRKII +NIEPSGAKKEFVLWFLRKDKDKVVVRVPRLFPGGIL* + +>unf218_NODE_20#PROKKA_00324 +MNVEAVLKTRGANVVTVAPGDTVVAVARLFGEKKSGIAIVCDAASDVIGVVSLGDIVHAV +GAKAADALEQPVETIMTKDPAVCKPGDDIESALNTMEELGIRHLPVVEDGKLKGFVEQRA +ALETLYEDAALDFAQLRSYVIRPGGRR* + +>unf218_NODE_20#PROKKA_00325 +MDDLFSRITTASEKARTRTQPKAPAKPRAKPESNARPRAPGRARDEEQGQERGYTAEDIE +VLVGLEPVRRRPAMYVGGTDDRALHHLVAELLDNAMDEAVAGHASRIELELALDNRVTVR +DNGRGIPVDPHPKFKDKSALEVILTTLHSGGKFSNKVYHTSGGLHGVGVSVVNALSDILT +IEVARNRNLWTQSYARGTPVGPLANQGQVSNRRGTTVSFHPDPDIFGEKATFRPAALYRL +ARSKAYLFRNVEIRWSCDPALLTETDTTPAMDVLRFPGGLRDFLVSTLKNRPALTPTPFA +GQSDTAGGIGRVEWAVAWPEDEEAFFRSYCNTVPTPEGGTHEAGFRAALTKGLKGYADLV +NNKAGCKITPEDVVGGACIMLSLFIPEPQFQGQTKEKLTNSDAARLVENSVRDHFDHWLS +GAPDMANVLLDALVDRADLRLRRRQDKQTNRKSATRKLRLPGKLTDCSSTSAAGTELFIV +EGDSAGGSAKQARERKTQAVLPLRGKILTVASASADKQRGNQELNNLTEALGCGLGAAFD +LSALRYEKIIIMTDADVDGAHIASLLMTFFFKKMPKLIEKGHLYLAAPPLYRLSRGTTTL +YARDDAHKDEIMSTAFKGNGKVEVSRFKGLGEMPPQQLRATAMDPATRTLLRVTLPEGTE +SPGPDDAEAAENLVETLMGRKPELRFQYIQKHARFVDEIDV* + +>unf229_NODE_21#PROKKA_00326 +MTSSHHGPYDQGYTRATMAHTKRSDLARASGPHKVRRSPDWSLQLDSMKSESLVIVDQNA +TVNTFPGLVHTARHTMGVGCKRSR* + +>unf240_NODE_22#PROKKA_00331 +MFKKIRLGTKLLVAFLVVGIIPFATIGLYSLSMSSKALSEQAYRQLESVREIKKAQINKF +FLERRGDMGVLVDTVSAMTHEAFQKLAVVQQLKKTALEKLFGQIEKDVLALSKTTDIVRF +FRALRRYSKETDCKPDGPFDVTSQRYQAISSIYSGFLHAYVKIYGYYDVFLIDAATGQVM +FTDAKENDLGANLKYGKLKDEGLGRLWRKVVESKRVAIEDFSPYSPSKNQQAAFVGAPVF +EGTGELVAVVALQIPIEPITSIVDQRQGLGRTGESYLIAKVGGRYQFRSNLKTMGGGKYV +IGHDASVIATDYLNMALEGKQGVGIFLDSRQQPNFVAYAPLDIKGLHWALISKITTEEIL +ASRLRGEKKDFFAKYIDKYRYHDLFLIAPNGYCFYTVAHEADYHSNLVSGRYAASNLGAL +VRKVIQTRRFALADFAPYAPSNGKPAAFIAQPVLRQGKVELVVALQLSLDAINEIMKQRA +GLGKTGETYLVGPDKLMRSDSYLDPKNHSVKASFANPAKGRVDTEAVREALAGRTGEKII +IDYNGNPVLSAFCPVKVGGITWALLAEINQTEAFQAVYHLKRAMLIIAVVGIVVIILVAF +FMGRSISKPIKRVAESLGGGAEQVAAASEQVSSGSQTLAQGASQQAAALEETSGSLEEMS +SMTRQNAESAQQARDSSQQVAQALEEANELMAQTAESMTEIKTAGEETSRIIKTIDEIAF +QTNLLALNAAVEAARAGEAGAGFAVVADEVRNLAMRAAEAAKNTAKLIEGSVRNINKGAE +LLDKTRESFETVVKQNRRVGELIEEIAAASSEQSQGIEQINRAVAEVDKVTQQVAANAEE +NASASEELSSQAELMKEMVGELVSLVQGAAARRERASLNDDAPKRGNGRQRKLALLPGPQ +KGRDLEVEGDSSDF* + +>unf240_NODE_22#PROKKA_00332 +LACVRRPEIMCGLHQRASFVTGDLEAVRSPMGAACSNLITWPLHYLARGENKAVLGGWDP +AARKFFKTDELSLTVPWSMFLAMLERWQDSFLSTNAGRTVLKKVARSARTWGED* + +>unf251_NODE_23#PROKKA_00333 +VWRTLEDLDLATPNVGLVTDIISCPGMDYCSLATARSIPVAQRITARFGDIARVHDVGEL +KIKISGCINACGHHHVGHIGILGVDKKGEEFYQITLGGDASENAAVGKIVGPAFSYDEVV +DAVETVINTYVEQRKEGERFLDTYARLGLEPFKEKLYGAH* + +>unf251_NODE_23#PROKKA_00334 +MALIKNGQPVQDPWIAVADDGGLPSSGSVIVSLARWRSDRDALSNRADPVGIRLASHETA +SDIASGLNRVSLVALEFPTFRDGRAYSTSRLLRERYGFAGELRAVGNVLRDQLLFMHRCG +FDAFEIASDDAVETCRKAFEEFSAWYQPASDGRAPVTALRHIRRAAE* + +>unf251_NODE_23#PROKKA_00335 +MTTMRSATAAAENEQSVVAEAEIRARLLAEQYRGADTRQLLAAMISEEYQGRIVLVSSFG +AEAAVLLHMVSEIDTATPVVFVNSGKLFGETLRYRDQLVERLRLSDVRTVGPEPDRVDQV +DPDGVLWYGNPNMCCYIRKVEPLQRALDGFDAWITGRKGFQGGDREGLPIIEAGDDGRIK +VNPLAGWEKQAVDSYFADHDLPRHPLEADGFLSIGCMPCTDRVAPGEDARAGRWRGKSKT +ECGIHLPRANWKVMGID* + +>unf251_NODE_23#PROKKA_00336 +MALNCKTPADVLQVVKENEVNYIDLRFTDPRGKWQHLTMTSDFVDEDAFADGIMFDGSSI +AGWKAINESDMALIPDATTAVMDPFSAQAELILFCDVVEPSTGQPYGRDPRSVAKKAEAY +LASTGVGDTAYLGAEPEFFVFDDVRFAVEMNNCFYEFSSDEGPYVTGRILPEGNAGHRPP +IKGGYFPVPPVDSCQDLRAEMVTVMRDMGLTMDKHHHEVAPSQHELGMTFDTLVRSADNV +QIYKYCTHMVAHTYGKSATFMPKPVAGDNGSGMHTHQSIWKDGNPTFAGSGYADLSDTAL +YYIGGIIKHAKALNAFTNPSTNSYKRLIPGFEAPVLLAYSARNRSASCRIPFSSNPKGKR +VEIRFPDATANPYLGFAAMLMAGLDGIQNKIHPGDAMDKNLYDLPPEELSGVPTVCGSLR +EALDSLSADREFLKKGDVFTDDMIDGYIDLKWEEVYNFEHTPHPVEFQMYYSS* + +>unf251_NODE_23#PROKKA_00337 +LHACRSTLEDPTRKQPRNTRIELIVDDDLVEQAIDSILQAARTGRIGDGKIFVSTVEEAI +RVRTGERGTEAI* + +>unf262_NODE_24#PROKKA_00338 +VCPQNRLTALIGPSGCVKSSFLLLWSRMNALIPGTGVEGRVLLKGVDLYGPGVDPVEGRR +RVGMVFQKPNPFPKSIYQNVAYGPRLHGLRDRRELDRLVEESLKAAALWDEVKDMLHRSA +LGLSGGQQQRLCIARAIAMQPEVLLMDEPTSALDPIATARIEELVGELKKRYTVVIVTHN +MQQAARVSDTTAFFYLGRLIEVGPTEKIFTRPDQEQTERYITGRFG* + +>unf262_NODE_24#PROKKA_00339 +MTPDESSRFHRRMEEVREDLLGMAGLVEAAVQDAFAALSQRDSTRARHVLEGDRRIDLME +NQIDEKALVLLATQQPVAVDLRFLSAALRICSFLERIGDQAVNLAWRALALQEMEPKELP +AKLHDIYTISRGMVRSCLDALVGGDRELARQVIELDDEVDDLTRDMLVEGIEAMQQGRED +LRRGVELILCSRHLERIADEATNIAEEVVFLVEGRVIRHGGPEPSVGPL* + +>unf262_NODE_24#PROKKA_00340 +MQDFASVNEILDFAIEREKEARDFYRSLAEEVSARHMKELFKSFAAQEQKHADKLLAVKS +GGGLTPSGETVPNLKLSDYLAEADMSEQIDYQDALVMAMQREKLSFKLYINLAALAVNPV +VKDTLEALAQEEARHKLYLELEYDEQVLTEN* + +>unf262_NODE_24#PROKKA_00341 +VSRTKTEIQGLIFDFDGTLAELNLDFEAMRQGVHRLAREAGFDGDWPGGYLLEEIERLDR +RPELAETGFATRALELIQAIELEAAGRGRLFDFTRPMLARCRELGFGLAVVSRNSAAAIR +RVFPDLERAVDAFLPREAAPRPKPHPDHLLRACRRLRLEPSRCAMVGDHPTDLEAARAAG +CMAVGVISGRTPAAALEEAGADLVLPDASALAVVLGSEAACMSIARQS* + +>unf273_NODE_25#PROKKA_00342 +MNQVISAPKMPLRRRVKSPVNSPSRAAETAPANMPTQGEIPRRTASRAEV* + +>unf273_NODE_25#PROKKA_00343 +MLRSLLARLFDWLRTAAGMTILATIVITVAATIDRGPCILVKGIVTGGMLALVSMGLALV +FGVMSIPSFVHGEYFMIGGLTAYFIITPLQTYLQAHPSGLLNLLAPFIAIGGATVVGAVA +GSLTERLVFRELRRRNRTNWVMNCFLLTVGLNVILVNGHQLLFGADFKGIVNYFTGAPLQ +VMGVYVSPARTIAFTLSILVIVGFGMFMRYTKTGRAIRAVSQDETGAMIVGINLEGIMML +TMALGCALAAVAGSSLLFMYPSYPTSGLEPLYLAWFVVIIVGLGNVMGALVGGFMVALFK +VLTVEYIGSGWDFVVPSALIMLVLLLKPNGIFGSEVRGVLDQ* + +>unf273_NODE_25#PROKKA_00344 +VLLEVKNLNAGYGFLQILRDVSLKIDQGEYVCLIGPNGAGKSTTMKTLAGLIKPISGEIT +FEGESIAGLPGNQVTSRGISYISEEMNLFTNMTVHENLLMGAYTIKDKQAIARQLDFVYS +LFTRLEERQKQLAGTMSGGERKMLAIARGMMSRPQLVLVDEPSFGLSPQMTQVVFDSLES +LKQSGVTILVVEQNVNLSLSVTDRGYVIENGRIGIEGKSSDLADDPYVRKVFLGV* + +>unf273_NODE_25#PROKKA_00345 +LVESPGRENPISETLTPAHPKRRARARARAGSPRLIFLDELAAGLTEGELKDIIKIIRKI +QDSGVTILMVEHIMQLIMNVCDRLVCIQFGTKIAEGPTEEVARDPKVSEAYLGAAH* + +>unf284_NODE_26#PROKKA_00346 +VKLSRWGEEYYGDLGEPLGSFEGETQTVMSVGDLAYWEPGNAFCIFFGPTPASSGPEPVA +ASQVYPLGRVEGDWQALSALGASVTARISAA* + +>unf284_NODE_26#PROKKA_00347 +LHLDQDQQDFLRHIQQVVETQLAPLALEIESQSRFPQQAREIFARAGLFTLAVPRSYGGQ +GADATRLALMVENIARVSPSAALLVFPSNAVLRTIALTGSEEQKERLFGELVQAGDQCLA +FCLTEPDYGSEAFNLQTRAERQGDHYVVNGTKTFITLGPNARYYLTFVRTGPAPKAGGIS +ALLIPHDAPGLGFGPPEKKMGLHGSVTTNMYMKDVPVPVANRLRGEGEGWQVLTRVCNPM +RVWGAAAMALGTAQGLFDQTLAYVKANAERLNPADRQSRDFALADMKMRIEACRSLIYRV +CRMVDDPRTPPQQVDAFVSMSKCYAADTGMETGELASRILGMDLMRPDCLAGRLYLDAKA +IQIFDGTNQIQRLVVAKSLALG* + +>unf284_NODE_26#PROKKA_00348 +MDFELSEELKMLREMAADFAKEQIAPYADKWDEEHYLPHEEVIKPMGELGFFGTVIPEEY +GGNEMGWLAAMILTEEIARASSSLRVQINMLELGCAFTIYRYSLHDEVKKKYIPKLVSGE +WLGGFGITEPNAGSDVMGMKSTAVDKGDYWLLNGSKTWISNADIANCIIYYAYTDREARG +KGLSAFVLDLKNEDGSRPEGISVTRLDKLGSHSSPTGEIYLDNVKVPKENILGKPGDGAK +IVFSSLNQTRLSAAAGGVGLAQACLDESIKYANEREQFGKPIAKQQMIQDQIAQMAIEVE +AARLLVYKAACQKDAGNLGNTLEVAMAKWKAGEVANFCAQQALRILGAYGYSTEYPVARF +YRDAPTYFMVEGSANICKWIVALDALGLRKANR* + +>unf295_NODE_27#PROKKA_00349 +MSHRPRNRKLLPRFIALLVLALVLLLCSVAPAAAWQDFSFLALGDTRTEPYLAGGRDQAT +RMKQILRQRYRTEAQLFFTPDGSALERAELQHKGARYTLYYQDGWPERIEVTKGGATRTI +MHAAGRRWVFREMLNDIKPGAPLPEEGARFILHGGDLVLNGYLGTSPQSPYWGLLKKELL +DRLPPADAALGLPGRVMVCVGNHETWEDPKLAGLLGTMPWLRELGFSEHNRIYAVDYQNC +RFIFLDTGGYSPLGTRWLGQYPPFKAQMAYLRRLLKDAVIQGLDHVVVLFHKPAFVKVGH +DPLPADQNPHYLLKGFAPLIDILVISSHTHTTERYQVDGINYLVLGAGGAPQKFKDCKHP +SPQPELYWRGKPRVEEYNYLKIDVKGKEMVGWLHRWRPGGDKPHGQWVEFFRSRTP* + +>unf295_NODE_27#PROKKA_00350 +MTQEDEQAYRRLLVDRVMAHHGLNRRAYQRAWPSSTRPARLWLEGQPYRVKDLSQGGCSF +LVQEAPPDGGWVRRGALELPDGGVPLPVTVVVVAFQPGGLVRGAFLGLDDEAKRRIQRFV +AGRTGELVAEATPENEPS* + +>unf295_NODE_27#PROKKA_00351 +MPLARKRKSAQKISDFTRVYLMRHPEVVGHYEGKFWGHSDVSLSRYGKAQMKAMAQRMGQ +EKLAAVYCSNLQRTRQVAEAIGRIQRPRLKPQADPAFRELNLGIWEGLTYQDISERYPDQ +LAARARDLANFAIEGGESLAQLSQRVLPAFWSMVEQNRGKEVCLVAHAGVNRVILVEIMG +APLENVFRLDQEYGCLNIIDIFEDGIPLIKLLNQAIEV* + +>unf306_NODE_28#PROKKA_00352 +MTAATDVLHYDNANLRRILRDYRNIAMIGASANWVRPSNFAMKYLQGKGYRVIPVNPKDA +GETIHGEPVYARLADVPGPIDMVDIFRNSEAAGSATDEAIALAGALGIKVVWMQLHVRNA +AAARRAQAAGLEVIMDRCIKIEYGRLFGEIGWSGVNSRIISAKRPVLHP* + +>unf306_NODE_28#PROKKA_00353 +MTESTPETTTESDAPVLLRHDVDGIATLTLNRPDKFNALSVALLSALETELEALADDASV +RVVILAGNGKAFCAGHDLQEMRACPGRQSYKALFEQCGRVMLAIPRIPQPVIARVHGVAT +AAGCQMVATCDLAVAVEQARFAVSGVNLGLFCSTPMVALTRNMPRKQAMEMLLTGDFIDA +ETALSYGLVNKVVRRDRLDETVAELADKIAGKSSAAIALGKQLFYKQLEAGMEAAYELAS +ETMTCNMLTEDAETGIDSFINKQPLPPWKGR* + +>unf306_NODE_28#PROKKA_00354 +MREGADEVDVYVNVKYLRRQRTNQKYGERADVPAISVEEFNEIIRTDLTWAFDMGMQADK +IGAGTAVLRLPYRASMLRPGGVIAGPMMMALADACMFAVALSLIGEVKLAVTTSFNINFL +HRASPGDLLAEGRVLKKGKRLMVADVTVHSEGHDLAVAHATGTYSIPPINGGR* + +>unf306_NODE_28#PROKKA_00355 +MPIIHWTEDLSVGTDTIDADHKVLIDMINRLDEAIKSKEPQGTVNRVLSELLDYTGYHFD +REEALMKAAKYPDYEAHARTHGILKAQVADIRSRYERNPNSIHEREVLAFLKNWLTAHIL +GRDKLYAPFMESRREDVDKANEAFTQSSGV* + +>unf317_NODE_29#PROKKA_00356 +MIVTLSPRRRHTSNLHPSNPVKDLTLEQLKGIYTGKIRNWKEVGGQDKPIVVISRDSSSG +TFEVWNHKVLKKARVRPDAQLQASNGAVAQAVAGNKYAIGYVGIGYLNPKLKALTVNGVK +ASPNTAMSGTYPVARSLYMFTPGEPKGEVKKFLDFVKGPEGQKIAAQEGFVPLK* + +>unf317_NODE_29#PROKKA_00357 +METSAQAAAASPTGTAAGAAGETPSYRRGARSDAWMRRVFLGAGLVSVGTLALIVLFLFR +EGGGIFKVTSLSDFLSGHYWYPTYDPPDFGILPLVVGSLAVTALSSALAVPLGVATALYL +AEVASPRTRELLKPAVELLASLPSVVLGFVGMVVMAPLLQDWLDIPTGLNLFNASLMLAL +MAIPTITSIGEDALQAVPRELREASLALGATRWETLSRVVMPGALSGLGTAVILGMSRVM +GETMVVLMVAGGAAQIPHSIFDSVRPLPSTIAAEMGEAPVGSDHYVALFAIGIVLFLITL +AFNLVAARISARFQQKGAATL* + +>unf317_NODE_29#PROKKA_00358 +MTGFSLRARRLRQAVAFGLLRLAVLMVLASLGGILFFILVHGAGAITWEFLTDAPRDAMT +KGGIFPALVGTLYLTVGAILVSLPLGVAAAIYLSEYARPGRLLNLVRLGIQNLAGVPSVV +FGLFGLALFCVVFGLGTSLAAGSLTLGLLILPTIIGAAEEALRQVPDTFREASLALGATR +WQTIRKVVLPAALPGILTGGILGLGRAAGETAPIMFTAATFYTLHLPSSPLDQVMALPYH +IYVLATAGTHIEQTRPLQYGTVLVLIGLVLGLSLTAMVVRSRMRRKQRW* + +>unf328_NODE_30#PROKKA_00359 +MTTVLETHNLTKRFGGLVAVNQVSMHVDEGEIVGLIGPNGAGKTTLLNAIAGLNPPTSGK +VSFFGEDTTGLPPEAMCHRGLSRTFQIPQPFPQMTALEAVMTAAIFV* + +>unf328_NODE_30#PROKKA_00360 +MSGKKRYWVIVALAAALMMMLPGQALAGKVFKLGVLGPFTGPSAKTGAEFKGSVKMAMEK +IGYKIGDYKIELVWIDSQSDPAKATSAYSEACERAGIQAGVLNWHSSVAIAVMEVAAQYK +VPHFFGFGASEVVNAKWHKDKKYHYWGGKGWPVPGKLMAGYVDCLNYAVKQGKLDKNNKT +VAIYGEDSDWGRSVGKAFRERFTATGWQVISADYFPLTQTDFYPLLSKYKKAGVAVVAGT +STSPPSISAFVKQASEVGLKATIVADGLGWIGDWYKMTGNGSNYVLDMIPQLTTKKSREW +AKEVKAKFGYNPSPSSGGLSYDGINFMIKLLKRTLEREGKLDKESVYKVMIDEVNTGKLT +YTKADGAIIMNKYQYTPKTMPDMVLGPGAYFFPVIQYMNGKGAIVYPKDWATKEFEAKK* + +>unf328_NODE_30#PROKKA_00361 +VVDIRSQNGKVQGVTLKDGTKIDAPIVINAAGPHSFVVNAMADGVLEGMNIKTKALRHEV +HHVPSPEGFDFLNDGYHTSDGDNAIYFRPEVGNMILVGSEDPECDPQQWIDDPDNFNRQI +TDAQYKAQVYRLARRIPSLRIPNKHIGIADLYDCSDDWIPIYDKSDLGGYYMAIGTSGNQ +YKNAPVVGALMAHLIEACEKGHDHDSDPVQYPLKYVVGRTLDVGFFSRKREINPESSFSV +NG* + +>unf339_NODE_31#PROKKA_00362 +VVGNLLRDMGYSLQANRQTREGTNHPDRDAQFGYINDQVKQALAAGQPAISVDTKKKELV +GDFKNGGREYRAKGQPEPVRVHDFVIPELGRAAPYGVYDIAGNAGWVSVGIDHDTASFAV +NSIRRWWQTMGQARYSQATQLLITADCGGSNGARVRLWKRELQALANELGIAITVCHLPP +GTSKWNKIEHRLFSFITQNWRARPLVSYQTIVQLIAATTTDTGLRVQCEIDTNTYPAGVK +VTDAEMDSINIQRHHFHGDWNYTINPQIPIRSDSS* + +>unf339_NODE_31#PROKKA_00363 +MTDDDLFLCNWRNLFHPDRAFMLRWLPPPQRAALEARMLQDYHATIANLTGTDHGKPRAD +* + +>unf339_NODE_31#PROKKA_00364 +MATMNVHIDHRPGGVGVLTLARPPVNALNPHFLKDIENALQELKADPGIRALVITGAGKT +FSAGMDLKEVQAFTVDDQTAMVEVLSRVMARLYGFPKPVIAAINGHAIAGGLLLVLGTDF +RVASAGAVVGLAEVRVGVRFPLAPLAIVRRELSPGALRRLVLSGNTVAAEAAERMGIVDD +VVETEAVIDHARAVAEELAKTPPETFAAVKSQLRSETLKSMDHIMDRRLDPLLRGWFSDE +TKDAARAILAARTK* + +>unf339_NODE_31#PROKKA_00365 +MFGLFKKSDKPKIDAAEIERREREQRKREMAEQMKQTVQSEKEDQQASQKS* + +>unf350_NODE_32#PROKKA_00366 +VLARILVPGGVLVINVPHPRPGHWLERLRRRPGLTDQWHGHLRPGYRLDQLRAMLGGRFE +LERHTHYHGLFCELVDTALNFMYMSRQKNKQTSAKGLVVTAEDWGRRSKEAKLLKAVHPL +LKAFCALDALVPRGWNYSLVVRARRR* + +>unf350_NODE_32#PROKKA_00367 +MKVLVTGGTGFTGSALVRRLIQEGHQVVALDYREGIMLDRLRRLGAEIMIGSVTDRELVR +RAMHGVEVVHHLAAAFRELNVPESHYYHVNVDGTRNVVREALEHGVRKFIYCSTCGVHGN +IDNPPGGEDAPIQPADYYQQTKYLGEEVVHELGDPAGLDYTILRPAAIYGPGDPERFYMI +FKWVKKGYFPMFGSGKTFYHPLYIDNLIDAFMLAMEPEAGRGQAYLIADEHFYPIKELVK +RVAKAMKLPVKILHFPLPPVILAGHVCEKVCKPLGVVPPIFPRRVDWYRQNRAFKIDRAK +ADLGYRPRVGIDEGLRNTAIWYREEGYL* + +>unf350_NODE_32#PROKKA_00368 +MVRLYSYVIVYDTGFAPNPFWDFCTLACCKARLRNSARLGDWIAGFASKTYELVFAMRVD +EILSFAEYYLDKRFALKIPDFSKNNPVYHCGDNIYKPLPNGGYQQLRSFHSKNCSPEENH +QTKADDLKSENVLISQHFHYFGSKATPVPPKFDQLVYKGRGYKYNFPSDLVKQFLDFLDT +LPMGVNGSPSGLQADICSDRGNGCSPVCSAKMPPSCC* + +>unf350_NODE_32#PROKKA_00369 +LLDCHSAVTPFRDDVSINGLLELCGRDLSIDPRRVAGIQAAVSRYLPWVKGLKPELVWTG +LRPCCPDGLPIIGRLGGLSNLLVAGGHDQKGISLAPVTGQLIARLLQGRPLGGELDEALS +PNRF* + +>unf361_NODE_33#PROKKA_00370 +MQWFRLYQEFANDPKVQIMSEAMQRRLVMLMCYRAQDMITDDGLPMDEEELAFALRISDE +ELAETKKLFIRKGFIDENWRILNWDKRQQRSDLSTERVRKHRLKKKQEKQPEEAVLGTPE +ETGGNGDETFHETDETVTETGRNVTETEGETKCNGLDKTREEKTREEKTRETMEFTSPSY +SSTTSQNKTTKSSKAALSTSAQKSIIEAYHEECPDLPRVSKLTDQRKSKLKSRCREGPER +LEPDWWRSYFRKAASQPFLVGRNDRRWKANFDFLIRSEDVVVRVLEGSYGDGRSAENPYA +DL* + +>unf361_NODE_33#PROKKA_00371 +MATAAVLKIHTRTCEEHVCPQCGGEVKPVSLPPMLGMSAREHWPALCGRCADLAEQEQER +MQRRADMERLFEASNLPPAARRWNLITTAAMADRQGLNAVEAWEYGPMGLYLHGPAGSGK +TVLAWGLIIREIAVQLRSCLFLGVPELLYRVRMGFSKADAVDWLARGRQVRLLVLDDIGA +EKPTEWVRETLLSLINHRIDYELPTVFTSNYALGELHEHIGDETGRLTNRIAGNCRVVEV +KTRSFRLDKARAMKGLH* + +>unf361_NODE_33#PROKKA_00372 +LPRKRRLIPKGTPGRLIWLLFAWFEHRTVIEHPKARWPRFAKRWFFDGNALVGVDGVCLI +PSTAPPEVSDVEEFGVGFNKLPDLDRKILEALVFNAAQGWPPEPDNSEDEDWITALDKLN +LSSRSYERLLRDALRRLTDETRRRGLLEA* + +>unf372_NODE_34#PROKKA_00373 +MEQALCAHGAYDVWERIRDCGTDRKRCGSRYCINCFNRYVTSQTVLVKKLFDNYATEVDQ +RTNIRQLTVLFDAFDFDLHSKPHPTICAQRLDKAREYARKELDALKRHFPSIRIVGALEL +EPINGSTKALRKDSTAIDALLAIGAHRDTTPSLYRIFTAPGMQRWTKHLILYHGHFVVDL +NGTDDKDFRVWCHERWGHNRPKNPVKRGAHVQRLYKDRTIGQSLRTLARYPLKTPFNYHV +PKNNCGQPSTGLIRYEDAILSAMIVDDDRIGIRGVKVKKGT* + +>unf372_NODE_34#PROKKA_00374 +MTGGLGHVHGERPGRRVQPLGLGAVGVAPPQRRPFIVLGAEEAFTLDPHRQIEQRGEDHA +HILRSICDQLFHKGLNGRIVLAPHVWFSMLDWSLSWNTKKTDRPTEGHAPRWGSKRKLQP +NFQTSGYTSGMRAWELKEDYDLEKHVLGEPNSRSGIRGPKLTLHHINKLKRMKAARRAEH +NAKQALWQMMYGHEEIRERDLDRREADLEARQHELRLREIVAGIDKAIADAEAEEESKQH +LHAMATREINRRKKKS* + +>unf383_NODE_35#PROKKA_00375 +MRKIETVRDLRGEAYKILRQAIADGTLPPGQRLKETDLARDLSVSRTPIREALQQLSKEG +FVVITPRQGAYVRRWTREEALEILLIREVLEGLAARLAASSMSFPDIERLARHIEDFESG +RIDYAESDRRFHEDILRACGMRRLIGLIRNLYDGIQMFKVLNTSFQSPERIRQSIEEHRS +IIRAFRAKDPDLVEQAMRNNFRHTRGFIAKFF* + +>unf866_NODE_7#PROKKA_00376 +MQVYPKFLLSATAPAHFPPPSAPEFAFLGRSNVGKSSLINALLGSRQAKVSSTPGRTRAI +NFFSLTTSPNRQQPNFLFADLPGYGYAKISKSISAEWPKVV* + +>unf866_NODE_7#PROKKA_00377 +VKKVWLSWSSGKDSAWALHILRQQKDVEVVALLTTLNEHFDRVAMHSTRRDLVEAQARAA +GLPLVRVPLPWPCSNEQYEAAMGKACAKAVAEGVEAVAFGDLFLEDVRQYRVEKLAGTGL +EPLFPLWGLDTRALAREMIAAGVKTRLVCVDPRKMPREFAGRDLDEAMLRELPEGVDPCG +ENGEFHSFVYGGPMLGEEIPVESGEVVERDGFVFADVKLKH* + +>unf866_NODE_7#PROKKA_00378 +VSEIQIVGVVGAGTMGNGIAHVFAKSGFQVRLCDVEQRFLDRGMDTIRKNLGREVTKGKL +MQEEADAAVKRIEGTLARAALADCDLIVEAATEQLEVKRQIFEDLDRVAKPEVILASNTS +SISITKLAAFTERPERVIGMHFFNPVPVMKLVEVIRGLATTQETFEMVKALAERLGKTAV +EVNDAPGFVSNRVLMPLLNEAMYAVMEGVATPEAVDQVFQLGMAHPMGPLTLADFIGLDV +CLDIMRVLQEGLGDPKYRPCPLLIRMVDAGWLGRKSGRGFFEYGNA* + +>unf866_NODE_7#PROKKA_00379 +VLASGSKGNSTVVSSSRTRLLVDAGLSCREIFRRMQAAGEDVESLDAILVTHEHQDHVQG +LAVTARKLGIPVYFTEATHRAWMRWMTPRKRLTYAQWLEQQKAIVAAGKEPAAAGAEAQD +AAEEDISGAKPKKDPCALPAVEYFSSGTDFQVGDIAITPYTVPHDAADPVGFVFEAEGVR +LGIATDLGYVTPNVHLHLKKCDVLMLESNHDVDMLRDGPYPWSVKQRVMSRVGHLSNDAA +ADFLENSYDGRAAFVVLAHLSESNNLPELARVSAERALRDRMNLLGNKLMLASQQTPLEP +IVL* + +>unf866_NODE_7#PROKKA_00380 +MKTCIDPVVGDILSSWRYDISGITPEMRIDYEEHLASCSVCRSRQRLHRAIDVVLIGLTT +LSTIVFVLALAVIHHVEPLRTFALFIFHIRDFSVVLTLQAVAFAGLVVSMFAWLLVAVAT +PVPTFLSGVAREQARELQSRIPEEWRNRFQRGAL* + +>unf866_NODE_7#PROKKA_00381 +MAKGRMEIRPFAFAVDPVCGWGLLPGLHAANQL* + +>unf866_NODE_7#PROKKA_00382 +MSATRGIQHGVDMRVEVADIQLPNPVLAASGTFAYGIEFEDVVNLDHIGGFVTKGISREP +LSGNPAPRLIETAAGMINAIGLQNIGADAFVQQKLPALARYQCPVIVNIFGYQMQDYIAV +IRRLNEAEGIAAYELNVSCPNTHAGGIAFGIDRAALSDLVAHARHYSRRPLIVKLSPNVT +SIATMARSAESSGADAISLVNTFVSLAIDVETRRPRLSNITGGLSGPAIKPIALRMVWEA +AQAVKIPVIGMGGITTPEDAVEFLLAGASAVQIGTASYADPRAVEHIGQGLAHWCSRHHI +AKVSELIGGMQTGQ* + +>unf866_NODE_7#PROKKA_00383 +MTKNQDCMTLPVRWEQAPAREDFWQELRTASRWLLLLDYDGTLAPFHQDRMKATPYAGVK +ERLEQLLKIEKGRIVVISGRQIEDLKQLLQLSQPVEIWGSHGREHLLHDGSYRLVDLTED +ERRVVEAVTARMSERGWAGQLERKPTAIAVHWRGLPVSEQKELREAAEQYFAEANPPDTL +EMMPFESGVELRSRSRTKGQVVAEVLAEEPADIPTAFLGDDWTDEDGFAELRGRGVGILV +RPEARESCADYHLTPPEELLEFLDRWLENAKESIR* + +>unf866_NODE_7#PROKKA_00384 +MSENQVIIVSNRLPLSMTVKFGSLKVGRSSGGLVTALQPILKSRGGTWIGNGGTREDKRM +ARALEEEARRSGFDCVPVFVTEQEDRNFYEGFSNQVLWPLFHDFIGECRFEPEYWDFYRK +VNGKFADAVMRVYNGKQILWVHDYQLMHVAASLRERGCKGRVAFFLHTPFPSYDVFAKLP +WRRHLLLAMLEYDLIGLQTERDVRNLVSCLRRLVPEASMSTDHGVHRVSWHGRTVVIQDF +PISIDFDEFARAANQPAVEERMRTILARMGMGQVIFGVDRQDYTKGIPHRLRAYGELLRR +RPEMVGKVKLVQIVVPSRQNIPGYEALKSRIEHLVASINGEYTQPGWVPIHYIHRAIPRE +ELLALYRAAHVGLVTPLKDGMNLVAKEFCASRIDDRGVLVLSEFAGAAAEMYRGALLVNP +FDLEGVADALEQALQMPGAQQQERMRKLRRFLKHANVHRWVEDFMEEIESVKAPRSRRG* + +>unf866_NODE_7#PROKKA_00385 +MSMKVTTRQVDGVTILDLSGRILLGEGSVQLRDAVRDLLAKGQKKILLNLGDVTYIDSSG +IGEMASALTAVRNQGGDLKLLNLTKRVHDVLQITKLYTVFDIKDDEASAIASYN* + +>unf866_NODE_7#PROKKA_00386 +LRGQQETRVTYTLASSLDSVDKVEQTAEQMARNAGIDEDEAFRVSMAVREAAVNAVLHGN +AYAPDKRITVTFENNGSDLIIHIMDQGEGLDPAALPDPLKPENLLSGSGRGIFLIRSFMD +EVHFKQLHPGTELTLVKHLGTAKQSLQGGNSL* + +>unf866_NODE_7#PROKKA_00387 +MPVRLMPMPEVDAKVTLTMLLGTPVTDATGKLRGKVRDVAVATGAEAGRVAGLVVKNRDG +LQVVTSVDLRRTPSGTLELRADAQMRPLTGEESFLLLRQDLLDRQIIDVHGRKVVRVNDV +ELDWWNQERGAAGQQESLRVTGVAVGLRGALRRLLLGLMPQATLDRLARKVPQRSIPWEF +VDIVEVDPARRVKLKIEHERLARLHPSDIADILEDLAPAEREALLRSLSEELAADALEEL +DPKLWRSLLQSMDSETAAGIVEEMDPSAAADLLADLSKADSEAILGEMDPEERQEVKELL +EFREDSAAGRMTTEYVAVPEDATVADCVAALREFEGDPETITEIYLLGEDDLLVGVVPLA +RLVLAREETRAQVLSEPETITCELEAHQNEVAELFDKYNLRALPVVDEQRRLAGVVEADH +VIAFLRERR* + +>unf866_NODE_7#PROKKA_00388 +MLKRWRTRILLFLAVLGPGIITANVDNDPSGIFVYSQAGAKFGYELLWTILPVTLALIVI +QEMCARMGVVTGKGLSDLIREEFGLRITFVMMVLLVVVNFGNVIGEFAGIAGSLELFHMT +KYASVPVCALLVWLLAVKGDYKRVEKIFLVGSVFYIAYVATGVLAAPNWHLSILKTVTLP +HRSVWRQDGYLFMVISIIGTTIAPWMQFYLQSSVVEKGIRVQDYAASRADVVVGSFFTDV +VAWFIIVACAGTLWVHGLGKINLPSDAAVAMRPLAGNYAFLLFALGLFNAGFFAASVLPI +STAYTVCEGLGFESGVDKKFSEAPFFYWLYTLLIVGGAAVVLIPHFPIIEFSIFSQMLNG +ILLPIVVVFMLLLINRKDLMGEYTNSRWFNAVAWVTAVVVTVLSVVLMVQSIRQV* + +>unf866_NODE_7#PROKKA_00389 +MPNQCRHVRANGVQCRAHRVWKEDYCFFHLHHRTPNGTAKRSEDPPPPPPKNGIEIPLLE +DLASIQIAIGRVLTALAQGKITSAEARTYLYGLRLAASNVKQKDFAPVNTVETYVQYDNG +DTLGPEQFHAEKQPQHPLMDSGLLALRHLSNRLTYEATLDAYLTQGQEPPSTLRPPVAGP +PADKSELQNWIKTGWKACQSRAHALELARKAIDQPIPDPIDPKAAFSINANIQRTA* + +>unf866_NODE_7#PROKKA_00390 +MEKPTVIHSSFTLERSYPYPAEKVFAALADPEKKRRWFADSPNHEVVKFAMDFREGGAER +MEYRFNEGTPFPGVMLVNSGHYEDIVPGRRVVLCSTMTIAGRKISVSLVTFDLLPQGEGS +ALLCTFQGVFFEGADGPEMRKAGWEFLVERLGEEVAREG* + +>unf866_NODE_7#PROKKA_00391 +MRKRKPKVDRIFHALGDPMRRVMVELLRKRPYSVSALAEPLGITLTAVGQHLKILEEAGL +VRTEKLGRVRTVQLEPEGFAVLEAWAAEHKGEWALRLDRLGDVLADDG* + +>unf866_NODE_7#PROKKA_00392 +VTTANTFRANWPFVLIAAIFLGTALVGALTGRIWLQMRTITRTNDKVMYWATIWCSVIMA +GFMLLVAAHYAPVINLIKEL* + +>unf866_NODE_7#PROKKA_00393 +LHLSVSERTIAPARNIEGSLRLPGDKSISHRYALLSGLAEGTSRFTNFSTGADPSSSLAC +VEALGAKVVRGEDGSVEVTGVGGQFQPSSSPLDCGNSGSTMRMLAGFLAAQQGEFTLVGD +ESLSRRPMERIRKPLMQMGANISLTEGHAPMVIHGIPLKAMEYATPVPSAQVKSAILFAG +LQASGTTTVRESVRTRDHSELALRAFGAELERTIDSITIAGGQKLSAIEAKVPGDISSAA +FYLCAAALFPGSNLVFDDLGMNPTRASLLDVLTALGAHIGVIDLEDKASELIGTVKVNAP +PDGLTGTTVSGALAAQLIDELPVLAAIGPYTNNGIRIRDARELRVKESDRIDLVVKNLRA +MGAEVEEFEDGLDVPGGQTLHGAEIDSGGDHRIAMAFSVAALRAEGETLIRGAECASISF +PEFFDLLDAIAQR* + +>unf866_NODE_7#PROKKA_00394 +VNVLRKSFVVAVVLLSFGASAAVAQTTVAASVYGAFRSSTRTGGISNFTVENPSNAAGFL +LELRHISNPLMGYELTYSYHRANEAYSNTLKVLCPISPGGSCPEQITTAGVSANAQEVTG +DWVVSFPLANLKPFVLVGGGVIVTSPATGSVTATITDFDPVTNMMSQTTSSMPTQTQTKG +VFQYGAGLDWTVLPHIGLRFQYRGNVYKAANLTKVFTSTDKFTQTAEPVVGVFFRF* + +>unf866_NODE_7#PROKKA_00395 +VYESRPQTTPEQHSSSIEGSHPTDAAAAAGSRSVELAQTDFLLRLTDALNTTLNLQTLLQ +RTADLVRTVIDYRIFAILLLDNRTNDLRMRFQTGHRPEVERMRIRLGQGVTGQVALTRKP +MLIPDVRDVENYINANPDVHSEIAIPLIVKNRVIGVIDIQSEQPNYFQPDHLHLLTLTAS +RIAHAIDNARLYTRVSRQAQTLEVLNEISRDLSSILDTDRLFERISQLLRRLFDYQMFTI +WTVRPIEHVLENRFALRFGERYYPNETIPVERGIVGAAIAERRPMNIPDVRRDPRYHKVN +EETRSELAVPLMYKSKVVGVLDVEHTQPHYFSEDHVRALTTLAAQIAIAIENAQLYQRVI +QQEQRLDHDLQMAREVQLRLLPPSLPSRPHAEFAARFLPARTIGGDLYDYLNYDDQRGAL +AIGDVSGKGAAAALYGAVVSGTMRSQASLKPSPAAMLQALNASLHERRLDSQYVAMLYTV +WNDENLTLQIANAGSVQPIYCRSGEIETVPVEGFPLGMFPQAEYEEISLSMRPGDSVFFF +SDGITDGENEAGESFEERLTDSIARHHHLPAEEAVNAVFEELQEFQGDCDRFDDETLIAL +RVV* + +>unf866_NODE_7#PROKKA_00396 +MPDQTQFPPAESLRIRFTRDQIQSRVREMGRHIREDLHGESVVLVGVLKGAAIFLADLAR +AINLDCTFDFVAVSSYKSGTRSSGAVQLIKDLTEPIEGRHIIVVEDILDTGVTLSFLQSH +FERHNPRSIRVAALLDKPSRRIRPIQGHYIGFSIPNEFVVGYGMDYAERYRNLPDIRILD +SIPAS* + +>unf866_NODE_7#PROKKA_00397 +MYPFIHIGHFTIPTYGIMMWLAAVAGCIVLYRNFKRWKVEGDAITIVAFATVIGIIGGKL +YHVLEKPVLLMHHPALLISRSGFAWYGGMIAGILALLFQAGTYNIRPLRMLDLCVPSAAL +GYGIGRLGCFFSGDGGYGPPTKMWFGMSFPHGTVPTTQKVYPTPLFEFVAAVIIFYILWR +RSRPAAERKLGHMTAEYLLLAGGARFLFEFIRINPKIFLGLSNAQWASIAEMLGGTALLW +WSRKYASTPQPGQQGRQPKEEPALVAAGDSGGPPTAEQTQ* + +>unf866_NODE_7#PROKKA_00398 +VSACSHRPDARPLRIGQAQKDLRIPANKLHQEASPAGPQQILGRHLSQLPRRPRPAPPPE +DIEDNAGPDKLKNRRRIHFLCCRHNSVRIAHPEPHLRRNAVIPVSGQLASNAPDSVPQRR +RGHAQIQHPQGTNLIGPGLEQQRQNPGNHPAKPGKPGPADQQRWMMHQQHWLFQHMVELC +AHHSSHRRKGDDADRIGINLPALEVLVKEVAPDHRGKPHHDAVCANR* + +>unf866_NODE_7#PROKKA_00399 +VRLAQDAPVSFDPDTFKVTPPDPAVITPVLAQLEFNQLLNQFAAPPPKADYRRLSDPEEI +EDFLKPVARKKRLAIDTETTSIQPMLAELVGVSLCHQAGEAVYIPVAHNLTPGQSQADKE +AVLQTLAPVLADPAVTKIGQNIKYDLIVLGRCGMEINGPLFDTMVASYLLNPGKTSHNLA +SIAAEFLGRSVISYQEATGGKNRPFADTDLDQATDYAAEDADVAWQAAQVLEKKLAESHL +DGLFRDLEMPLVPVLARMERNGVGLDVQGLEDLGKELAAKLDEIERTCYRLAGHEFNLNS +PKQLAQVLFEELGLTPVKKTKKGKTSSTDVSVLTVLAAKHPLPAEVLNYRTLSKLKSTYI +DTLPKLVNPQTGRLHTSFNQAVTATGRLSSSDPNLQNIPVRSEIGERIRACFIAEKGNLL +VSADYSQIELRVLAHLSRDPLLVEDLTKGLDVHTQTAARLFDVMPELVTKPMRARAKTVN +FGILYGMSAFRLAREQGISRKEAQQIIDKYLGRYQGVARFQEENLRQAREKGYVTTLLGR +RRYLPAINAGDRLARQAAERMALNTPIQGTAADIIKLAMLAAHRLLEERFPQALMILQIH +DELLFEVPASQAEDLAQAVKQAMEGVIELAVPLVVDIGIGPDWAQAH* + +>unf866_NODE_7#PROKKA_00400 +MDHGKHKLATRLIHAGEPQEPVEGAVTLPIFQSSTFVYQGQASYHDLKYIRLNNTPNHRA +LHHKLAALENAESALVTASGMAAISATLLALSKSGDHLLCQDCLYGGTLDLITKDLAALG +IGHDFLDPERPESWAELLKPTTRLIYVETISNPLMQVMDLEAVVGFAREHGLVSVIDNTF +ATPVNFRPAEWGFDLSLHSGTKYLNGHSDIVAGAVIGRAELVERITHKLNHLGGSLDPHA +CFLLHRGLKTLAVRMDWHNRSAQRLAEFLAGHPAVTRVNYPGLPDAPDHQRASHLLDGFS +GMLSFELRGGVAAAERFLSRVRLPYLAPSLGGVESLVTRPATTSHAGLSPQERQAAGISE +SLIRVSVGLEDPDDLVADFDQALA* + +>unf866_NODE_7#PROKKA_00401 +MSAMGISRTVRRTVVLVAALSLLLSASAWAAPQRVAVLPFTANAKEDISYLVKGVRDMLA +SRLAWQDKVVVIEPDLVAPVMKEVPPPYNEAKARKLGNKLSADVVVYGSITALGSTVSVD +AQVIKVKGKQPPLSTFVQAADLNQVIPQINDFAKRINAEIFRRPEAVAALQKQGQQAEKG +KQEAGSSGKPLVEAPKTPAAEWQQKRAVEVGKLPPNISPLNPLFLRSLSGVDSDRYWRSP +RIDGVVSSLAVGDIDNDGRNEMVVLLHKRIRVYRLDGQRFGLIHEFKKGPDGEYLFVDIA +DLDGNGRPEIFVSSIINGEIVSFVLEWGEGGLGIKAKDIPWFFRVQPNPTGKGNIVWGQG +KSINAAFAGPVYRMKYENGQYVPGEPIRLPEYANVFNFVKADLNGSGRPMTVMVAPGFRL +KVFGKPDDELFASGEMYAGSSKFIEVPSHSDPSNPGDEPAREFLPTRLIVNDLDKDGRSE +IVVVRNKDSLQGIMENMQFFYQGTIYSLYWNGMSLLENWRTPRISGYLTDYTIADVGNVG +RPALVLSVVQTKYGGMVEKGFSHIVAFTLKPQAKKKKHYIKRTKGL* + +>unf866_NODE_7#PROKKA_00402 +MTPLPRLTSRRGFPWPRKVHEEMERTYYQERSQHGHITGPGLVHVVYGQGVGKTSRCVGL +AVRAAGAGLKVAWVQFMKDDTSSEVKVLRDLAGVHYFCPGPHAFITKKGPGDEHRQHARR +ALEHARELVEKDRVQVLICDEILNTLLFKVLPLEEVMALVELCRGRVELVMSGADVHPDL +LDAADYATELVQRKHPYYRGIEARKGIEF* + +>unf866_NODE_7#PROKKA_00403 +MPKPVISYHPALEADQSFLLRSRRPLERRDLLAILRAGAVLLPQAPRADLYLLVAGMGRP +HFPRAAVYFSLDGKVGNHRLFSALGLPQPPTLSFENLEQALAAWREGGLEAAGITPPLVV +KGAGGGEGSNVFLVRDIGELAGLAGRVETFCARGPSGLVVQKYLDGGGGDARVVLLGRSC +EAFWRRSAPGEFRSNLSSGGRVDRRWRPAELERAVEPARRPQGATGVVVAAVEILVPPGG +EPLLLELNFYFGRRALGGSETFLRRYLAAVRRWLEGLGLDPRRVQLYE* + +>unf866_NODE_7#PROKKA_00404 +MTTPRETPLENPYSQSGCFFCGQDNPVGLKLRFARVEGKEELVCRWRPDRRYLGLGRVLH +GGIQCGLFDEIMGWTAHHFSQGPGVTQEVSVRYLAPLFIDRPLELRCRVVERKERRIFME +AEIRDHQGRVCSRARGSYALMDPERFARLVQDQPEPPPAE* + diff -r 000000000000 -r 68a3648c7d91 pfam_search/lipase --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/lipase Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,162 @@ + + + +Proteins with PFAM domains: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
15FR_NODE_1#PROKKA_00113
+
+
+
 
+VLAGIAEWYGEDEAVPFRVISGTSAGAMNAAYLSANMENFAHGTQRLAQVWSQLEAQQVYRPEYRKVFGALLHWAWSLLSGGLGDSNPRS
+LLDNSPLRALLAENIDFDAIARNIERGLLRGVSVTVAGYSTERSLSYFQAETGVQSWWRQRREGRPVQMTLDHVMASLGLPIIFPAVKVA
+GEWCGDGSTREFAPLSPAIHLGAKRVLVIDTQYPAPQHVLGQDQAYPSLSKIMGYLFDSVFSDSLYADLERTKRINRTLDYIKRQSGHEP
+PELGLSHIDTLVIAPSRRPLEIASRYESHLPKSMRWILRSLGGDVSSGDQLLSYMLFQSGYCSEMVALGRHDAHARREEIGQFLGLSKIK
+V
+
+ 
+
+

+ PF01734 +

Patatin-like phospholipase
Pfam-B_2206 (release 4.1) This family consists of various patatin glycoproteins from plants. The patatin protein accounts for up to 40% of the total soluble protein in potato tubers . Patatin is a storage protein but it also has the enzymatic activity of lipid acyl hydrolase, catalysing the cleavage of fatty acids from membrane lipids . Members of this family have been found also in vertebrates.

+ +

+
+

+ CL0323 +

Patatin/FabD/lysophospholipase-like superfamily This superfamily of enzymes contains a Ser/Asp catalytic dyad. Members of this superfamily are all serine acylhydrolase enzymes.

+ +

+
+
+
16FR_NODE_10#PROKKA_00133
+
+
+
 
+MNPIEAISHTGRSVRSRLKGFPRKKVLVLEGGGMRGIFTVGVLQAFSERGYAPWKTIIGASAGALSGVVYAAGQIHMARDAFFTELISGR
+FIRMSNIFRPEKHILNLDWLVDHIIGGDEPLNIRRLRTTACPVLITVTRFSRDFPPDTLYLSTKTDSVPQALKATAAIPFFYRGFVHYRN
+DLLLDGGVLDSVPFKKALSMGFPERDILVVLTRPKGYRKERDSFWIKTLYESYYKDSQYRYLVNSLEHHFGNYNRMLDDLETNYDFDIIY
+PPDNFKVNRLTRSEDKIVDGFEQGVAAAKAYLKPK
+
+ 
+
+

+ PF01734 +

Patatin-like phospholipase
Pfam-B_2206 (release 4.1) This family consists of various patatin glycoproteins from plants. The patatin protein accounts for up to 40% of the total soluble protein in potato tubers . Patatin is a storage protein but it also has the enzymatic activity of lipid acyl hydrolase, catalysing the cleavage of fatty acids from membrane lipids . Members of this family have been found also in vertebrates.

+ +

+
+

+ CL0323 +

Patatin/FabD/lysophospholipase-like superfamily This superfamily of enzymes contains a Ser/Asp catalytic dyad. Members of this superfamily are all serine acylhydrolase enzymes.

+ +

+
+
+
16FR_NODE_10#PROKKA_00142
+
+
+
 
+MGKTIRRALVLSGGGARGAFEVGVMRYLNEVNWQPDLICGTSIGAINGAAFGSGMSVDELAHLWKTYHRKQMYKITFPAFFRTLLSGRKF
+SPLSDNRPTRSLLEKTIDIDALRNSTTEIIISVLNMRTSQVRYFTHKAIGIEHLMAAGGIPMMFPWQYIDGDPYWDAGVMVNTPIMPAFE
+RGATEIIVVLLSPLGAIPQRLPSTHREVSELVFEQFLIGSYTACLPNAGWRTNPEADVYDTPLPDSPQLQLSMKGVRMATVYPTRMLGFR
+SLLDFSPRQAKTLLRDGYVNARMQLKSFF
+
+ 
+
+

+ PF01734 +

Patatin-like phospholipase
Pfam-B_2206 (release 4.1) This family consists of various patatin glycoproteins from plants. The patatin protein accounts for up to 40% of the total soluble protein in potato tubers . Patatin is a storage protein but it also has the enzymatic activity of lipid acyl hydrolase, catalysing the cleavage of fatty acids from membrane lipids . Members of this family have been found also in vertebrates.

+ +

+
+

+ CL0323 +

Patatin/FabD/lysophospholipase-like superfamily This superfamily of enzymes contains a Ser/Asp catalytic dyad. Members of this superfamily are all serine acylhydrolase enzymes.

+ +

+
+
+
8FR_NODE_2#PROKKA_00232
+
+
+
 
+MGLSYRINFKLGANAMVISDPSMPVPASLQFRQLKSDEYVMLDIPTDSLEVAYWGTKKPVPAQYVLTEAQTAKVESAITSYNAEIKSLAK
+KYNLAFVDFNSIMKSIEHGGLTVDGIHFTTAFITGNLFSLDGVHLTPQGNAVVANYFIQAINKQYGSHIPSVMVSDYPSVVF
+
+ 
+
+

+ PF00657 +

GDSL-like Lipase/Acylhydrolase
Prosite & Pfam-B_543 (Release 7.5)

+ +

+
+

+ CL0264 +

This superfamily contains a diversity of hydrolytic enzyme activities.

+ +

+
+
+ + diff -r 000000000000 -r 68a3648c7d91 pfam_search/lista --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/lista Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,1 @@ +a diff -r 000000000000 -r 68a3648c7d91 pfam_search/pfamA.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/pfamA.txt Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,14831 @@ +1 PF00389 2-Hacid_dh 2-Hacid_DH; D-isomer specific 2-hydroxyacid dehydrogenase, catalytic domain Finn RD, Griffiths-Jones SR anon Prosite Domain This family represents the largest portion of the catalytic domain of 2-hydroxyacid dehydrogenases as the NAD binding domain is inserted within the structural domain. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null --hand HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.50 0.71 -4.69 98 16860 2012-10-02 14:31:05 2003-04-07 12:59:11 25 50 4524 180 4691 12854 6366 308.20 17 85.12 CHANGED lllhp....sh..pptshphlcc.........plphtp....shsp-..clhcthps..s-ulhstsps.....plspcll.pth..spLKlluptusGhDslDlcsAsc+GIhVsNsPs.ssspulAEhsluhllulsRclspspppl+pGpapppthhshphtspshsllGhsthGttssthtpthththhhhshhhs.pttpttthhhht.thhh...................psspllshps.tshppthhhtpptttthhsshhlsssttsshhststhtshtpptthsssshssppp.sshtp.LhshsNVllTPHluus..TpEAppshutpsspslhphhpG....psstssVs .......................................................................................h.t.hpt..............h..pl.phhp.............hspp....ph..h....p...t..l...t.s...........s..-..s.l.h..s.p.st.s...........l..s.p..c..ll...pth............spL+..hl.u....p....h.u...s.G....h....-....s..l....D....l.s.s....A...p.c..........p...........G...........IhV............s........N.s......P.s......s...s..s....p......u.....V....A..EhsluhlLshsRplspsppph+pGpapttshhshthtstphtslshsthsthhtthtpthththhhh.hh.ttttttttthh..t..h..........................................................................................................................................................tst.hh.htshts.ppthhhspphhthhhsshhlhssttsshhppshhtshhpttthsssshss.tptsshtpsLhphsNVllTPHluus..T.tE.up.pp.hs.p...t.s.hpslh.p.hh.ps......t....p.l.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 1361 2755 3878 +2 PF00198 2-oxoacid_dh 2-oxoacid dehydrogenases acyltransferase (catalytic domain) Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain These proteins contain one to three copies of a lipoyl binding domain followed by the catalytic domain. 23.00 23.00 23.10 23.50 22.90 22.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.30 0.70 -5.09 100 10039 2012-10-02 12:01:53 2003-04-07 12:59:11 18 49 3997 65 2920 7770 5825 224.80 35 47.20 CHANGED ssstpplPlsshR+slAcphspStp.shPphshs...s-l-hspLhplRp.p..................lppph..tt........KlohhshllKAsuhALccaPhlNush..s..s-s.....llhpcplslGlAVsos.....pG..LlVPVl+sscptulhplupclpcLsp+ARss+.Lpss-hpG.GTFTlSNlGsh.G.sphhoPIIN..PpsAILulGpl.pcpP.ls.tssp.........lshpph.....hsloLohDHRllDGAsuucFlppl+.chLE..sPttll.l .......................................................t....ppl.h.stlR+tlApphhcuhp.ss.s..p...lThh...s-lDhst...lhshRp.p.......................................hpp.t.htcpps..............KLohhsahsKA.lstAL..+..c.a.P.tlNuuh.........s...............scs........llh+ph...hs..lG.......lAV..s...T..s............pG.....L.lV..P...V.l+..ss...-.p.h.ultpls.p.........c.l...pcLup....+.......AR.cG...K..Ls.....s...p-h..pG...GTFTI....oNhG...s........h....G..uh..h..T.PI....l.Ns..Pps.A.I.L.G.luph..tp+......P.ls....hssp................................lshcsh...h.LuLSaDHRllDGtpuu.pFLspl+.phLE..sPtthl............................. 0 940 1801 2459 +3 PF04029 2-ph_phosp 2-phosphosulpholactate phosphatase Kerrison ND, Finn RD anon COG2045 Family Thought to catalyse 2-phosphosulpholactate = sulpholactate + phosphate. Probable magnesium cofactor. Involved in the second step of coenzyme M biosynthesis. Inhibited by vanadate in Methanococcus jannaschii. Also known as the ComB family [1]. 21.70 21.70 24.00 23.70 21.40 20.80 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.39 0.70 -5.40 61 532 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 495 17 238 524 260 226.60 27 93.52 CHANGED plplhhsscth.tt........pssVVIDVLRAToTIssALpsGAc.tlhPsssl--Ahthtpt......cshLluGE.RsGh+ltGFDhGNSPh-hp......tppl.....p.GKpllhoTTNGT+Alpcsp.sAp..pllhuuhlNtpAVschltpt.....scs...lhlVsuGhp.GpaolEDhlsAGhlhptLhpptt...........thsDtshsAhtlapp......ttshhphlppusHup+LtpLGh....pcDlca.CsphDhhslVPhh..psGhlhtt .............................h.....................spsslVlDVLRAooslssAlssGAp.....pl.hss..t.sh..-.c.Ahthtpp.............pshllsGE..Rsu..h..pl.p.......GFDh.uNSPhphp...................t..ptl.......p..G.+p.llhoTTNG.Tpu.lppst...pu..p....plls.u.uhhNspAlAchltpt.......scs..............lhllsuG..p..c.........G..........p...........h...........ul...............EDhlsAGhlhptLtpptt..............................hsDtuhsAhthapp...............ttshh.p.hlp..p.uspupcLhp.h.Gh........pcDlph..Cs.ph.chhshVPhh..ptt....th...................................................... 0 95 178 222 +4 PF03171 2OG-FeII_Oxy 2OG-Fe(II) oxygenase superfamily Aravind L anon Aravind L Domain This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily [1]. This family includes the C-terminal of prolyl 4-hydroxylase alpha subunit. The holoenzyme has the activity EC:1.14.11.2 catalysing the reaction: Procollagen L-proline + 2-oxoglutarate + O2 <=> procollagen trans- 4-hydroxy-L-proline + succinate + CO2. The full enzyme consists of a alpha2 beta2 complex with the alpha subunit contributing most of the parts of the active site [3]. The family also includes lysyl hydrolases, isopenicillin synthases and AlkB. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.51 0.72 -3.71 138 7390 2012-10-10 13:59:34 2003-04-07 12:59:11 15 72 1609 66 3461 10006 2827 101.90 28 28.36 CHANGED sspthplspYs...................thshuhssHsDs...................shlTllhp...........psuGLplhpps.....................thhsssshss..uhllshG-.h.hhossthpushH+lhssp...............tpsRhShshFhps ..................................................................s...hpl.tYP.h.........................................psphshG.h...s.s.....Ho.Dh........................................................shl.TlLhp.......................................sps.uGLQ..l..hpps.............................................................pWls.l.s.s.h.s......s.....u.......hllN.l.G.......Dh..h.....p............h...h.......o.....N.....u.....t..h......c......S....s...h...HRVhsst....................................ttsRh.SlshFhp........................................................ 0 752 2020 2858 +5 PF01073 3Beta_HSD 3-beta hydroxysteroid dehydrogenase/isomerase family Finn RD, Bateman A anon Pfam-B_504 (release 3.0) Family The enzyme 3 beta-hydroxysteroid dehydrogenase/5-ene-4-ene isomerase (3 beta-HSD) catalyses the oxidation and isomerisation of 5-ene-3 beta-hydroxypregnene and 5-ene-hydroxyandrostene steroid precursors into the corresponding 4-ene-ketosteroids necessary for the formation of all classes of steroid hormones. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.68 0.70 -5.70 12 1802 2012-10-10 17:06:42 2003-04-07 12:59:11 14 22 886 0 831 37083 19903 231.50 24 68.31 CHANGED lVTGGuGFlGppIlphLlptc..ltElRlhD......hthssph.p.chtphpst....hlpGDlpDtpplcpAhpGsslVIHTAullDlhG.....hhpppplhcVNlpGTpsll-AChpsuVphhlYTSShpVlGPN.hucslhsGcEppsapss..apcsYspSKthAEchVLtANG..h+sGu.phhTCALRP.hIaGcGsphlhstl.pshcps.hhhthuptpsh.s.VYVsNlAWuHlLAA+sLpss......tstltGphYalsDsoPpppYt-hshplh+shGhchss.h....hPh .......................................................................................hVTGus..G...F..l...G.......t..p...l..l.......p..t.L.....l...p......p......s.............................h..........p....l......p.....s.....h....c.......................................h..t........t..............t................................................t..........t..h..t.....................hh.p...u...D.......l.......p.........s...............p.........t.......l.........t...........p..............A..........h...............p...............s...............s...............c...........s.............V.......h.......H.......s..........A...........u.............s...........h...h.s............................s...t.....p...........h.......h....p...s.....N.....l.......p....G.......T.....p..s...l.....l........c..............A.........s............t............p...............s..............s................V..............p.............p.......l..........V.........a........T...S...o......h..........s......s............l...........h.............s..................................................t...........p...........s............l.........h..............s.........s................s...............E.............p..........h........s........h................................h.........s..........h........Y........s.....p......o...K....h....h........A..E.......p....h....l........h.........t.........t.........s.....t.............................................................t...........................h....h.....o..s..s.....l...R........P...h........h..I........a..........G.......................s...........p...........p........................h........h..........s..........t............h.......h........p........h..........h.......p.........t............t........................h..........h.......h.......h.......s.........................p............................h......s..........h.....s....a........l...............t......N...l..s........h......H...l..........h.........A....h..........p....t......h...............................................G.....p.................a........h.....s.........t.................h..............................h........................................................................................................................................................................................................ 0 251 444 652 +6 PF04419 4F5 4F5 protein family Bateman A, Wood V anon Wood V Family Members of this family are short proteins that are rich in aspartate, glutamate, lysine and arginine. Although the function of these proteins is unknown, they are found to be ubiquitously expressed [1]. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild --amino -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -7.95 0.72 -3.22 40 536 2012-10-03 11:22:52 2003-04-07 12:59:11 9 10 268 1 333 500 2 37.30 34 47.06 CHANGED MuRGsQ+ptsRc+stKKptt.t......spsspsphpssp+t..p .....MuRGsQ+chuRpKNtKKpsp.t......t+ptp-thsssp+t..p.............................. 0 100 162 256 +7 PF03061 4HBT Thioesterase superfamily Bateman A anon Pfam-B_2758 (release 6.4) Domain This family contains a wide variety of enzymes, principally thioesterases. This family includes 4HBT (EC 3.1.2.23) which catalyses the final step in the biosynthesis of 4-hydroxybenzoate from 4-chlorobenzoate in the soil dwelling microbe Pseudomonas CBS-3. This family includes various cytosolic long-chain acyl-CoA thioester hydrolases. Long-chain acyl-CoA hydrolases hydrolyse palmitoyl-CoA to CoA and palmitate, they also catalyse the hydrolysis of other long chain fatty acyl-CoA thioesters. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.14 0.72 -3.85 140 18406 2012-10-02 20:54:35 2003-04-07 12:59:11 17 57 4420 294 5240 15763 6423 79.30 17 47.56 CHANGED hGhlaGGsh....hshh-pusshhhtphstt..............hsssshphslsahcs.sphup.hlpspuplhchG+sshhhphclhspssthhs .................................................GhlaGGhh......h.s.h.h.-.....p.ss.u....h...s...s.t..phstt.................................................tsss.shph........s.lsa..hc...s..s..p..h..........u........p...h.....l.....ps......pu.....p.....l..h......c..h.....G..p..p.sh..hhphclhsppt....h.......................................... 1 1455 3083 4266 +8 PF02872 5_nucleotid_C 5_nucleotidaseC; 5'-nucleotidase, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_1318 (release 3.0) Domain \N 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.11 0.71 -4.25 119 7487 2009-01-15 18:05:59 2003-04-07 12:59:11 13 118 3115 27 1573 5671 648 166.60 21 29.23 CHANGED slu.......psth.......................t.sh....tp-ss....luslls-uh................tttsss...............c.............lulhss.G.sl........Rs..s........................tG.s...lTht-lh.slhPFs.Np........lhshplsGp.plpphLE...........................tsssthh............phu.........G.lch.ph.Dhsps.................upRlssl..................sup....sl..-sspp..Yplssss.......a.hs....sGucs..a..h......htp ...............................................................................................th........htpss.....hspllssA......................tth.s.ps........................s...................luhh...ss..G..sl..........................Rs..sh..............................tG..s........lThp...slh..sl....hP....as...Ns..............l.ssh.c.loGp.p.l.+..ch.LE..........................................psthh...................................................plh.......G..lpa...p.h....chs.ps.............................................up.R..ltsl.................................................pGc.................sl......-....s..s...pp....Yplusss.............a..hu...........s.G.Gst...a......h................................................... 0 507 918 1282 +9 PF00003 7tm_3 7 transmembrane sweet-taste receptor of 3 GCPR Sonnhammer ELL anon Prosite Family This is a domain of seven transmembrane regions that forms the C-terminus of some subclass 3 G-coupled-protein receptors. It is often associated with a downstream cysteine-rich linker domain, NCD3G Pfam:PF07562, which is the human sweet-taste receptor, and the N-terminal domain, ANF_receptor Pfam:PF01094. The seven TM regions assemble in such a way as to produce a docking pocket into which such molecules as cyclamate and lactisole have been found to bind and consequently confer the taste of sweetness [1]. 25.80 25.80 25.80 26.30 25.70 25.70 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.91 0.70 -5.02 109 4297 2009-09-14 23:22:24 2003-04-07 12:59:11 17 59 213 0 2615 3557 7 227.40 29 32.05 CHANGED hGhhh.slhlh...........slFhpappTPlV+usst.pLsallLhulhlsahsshhal.ucPs................shs.........Chl+phhhulsFslshSslLsKohp.lhh..........hFc..tst..stt..hhh......................sttphhhlhhhshl.Q..lllsslWls..hsPP..................................ht.......sht..tptp.llltC.........p..s.sshshh.............hhLuYhshLhllshhhAa........................hsRclP...csFNEAKaIsFoMhlhshlWlsFlP.hahss......pu.p.....................................apssl.shullhSuhuhLhslFhPKsalI ..............................................................................................hhhh..shhlh...........hlFl.....p.a..p..s.T...Pl.V+...Asst...pLsallLhulhls.ahs.........s.hh...a.l....u.c.P.s.......................................hs........................C......h.lRph...h.hGlsFslsh..S.....slLsKThp..lhh.................................................h.Fc.....tsp..s.tp....hhp.............................................stt.hhl...lh.hhshl.Q...............ll....lsshWls.......hsPP.......h.............................................................pt............shp....sp.tp....ll...l.....p...C..................p...p....s.oshuh..h....................................ssL.G..Y.h.slLh.l.h.s.hhhAa........................hsRplP...-sF.NEAK..a..I.sFo..M...............hs..h..s....h.l..W....l.sF..l...P...h...ah.s.o...pu.p.....................................hh.s..s..s.h...hu..ll.hSu..h.sh.Lss...lFhPKsall......................................................................... 0 448 660 1746 +11 PF01661 Macro DUF27;A1pp; Macro domain Bateman A, Mistry J, Wood V anon Pfam-B_434 (release 4.1) Domain This domain is an ADP-ribose binding module. It is found in a number of otherwise unrelated proteins. It is found at the C-terminus of the macro-H2A histone protein Swiss:Q02874. This domain is found in the non-structural proteins of several types of ssRNA viruses such as NSP3 from alphaviruses Swiss:P03317. This domain is also found on its own in a family of proteins from bacteria Swiss:P75918, archaebacteria Swiss:O59182 and eukaryotes Swiss:Q17432. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.40 0.71 -4.19 124 5297 2012-10-02 00:07:53 2003-04-07 12:59:11 16 119 2806 93 1542 4535 236 112.30 29 10.96 CHANGED VNAANssLhs.....GGG.VsuAI++tuG.spltpts.pph......tt......stsGpAhlTsuhpLs......s+aVIHs..VG..Phap.....spppptchLtssYcssLpl........s..pcpslc.....................olAhPsISoGlaGaPh-cusplh ..........................................................VNAANsp.h.h.t.........G.uG..V.s..sAI.........pct..u..G..sp.l..t..p.t.s....pp..h............s...........................h.s.Gp...A..h.l..T...p...u..h..s.Ls...............s+hlI..Hs..VG...Ph.hp...................spppptphL...ts..sY...cssLph..............s..pp.p.s.hp...................................................................olAh.....P.s.I.SoGl..a.uhPhppusph..................................................................................................................... 0 543 862 1216 +12 PF02177 APP_N A4_EXTRA; Amyloid A4 N-terminal heparin-binding SMART anon Alignment kindly provided by SMART Domain This N-terminal domain of APP, amyloid precursor protein, is the heparin-binding domain of the protein. this region is also responsible for stimulation of neurite outgrowth. The structure reveals both a highly charged basic surface that may interact with glycosaminoglycans in the brain and an abutting hydrophobic surface that is proposed to play an important functional role such as in dimerisation or ligand-binding. Structural similarities with cysteine-rich growth factors, taken together with its known growth-promoting properties, suggest the APP N-terminal domain could function as a growth factor in vivo [1]. 22.10 21.60 22.10 31.90 19.70 21.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.61 0.72 -4.10 15 356 2009-01-15 18:05:59 2003-04-07 12:59:11 11 16 88 12 114 317 3 98.20 60 14.97 CHANGED a-P.........pVAhhCG+hshahs...hpsGpWhsDssst...tsChpscp.-ILcYC+KlYPchsITNlVEuu.p.VpIssWCchsp...spC+s....sHhVpPYRCLsGcFhS-ALLVP- ............A.EPQlAMFC.G+LN....MHhN...........lQsGKWEsDPoGT...KoCltoKE.tlLQ.YCQE.hY..PELQITNVVEAN....QP..VoIpNWC++u+....KpCKs.....HhVlPa+CLVGEFVSDALLVP-............................... 0 20 28 61 +13 PF00962 A_deaminase Adenosine/AMP deaminase Bateman A anon Sarah Teichmann Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.85 0.70 -5.52 15 4245 2012-10-03 00:45:34 2003-04-07 12:59:11 17 22 2418 67 1678 3809 1081 319.30 23 76.53 CHANGED sl.Ks-hHlHtsushs.cpLLchh+pphht..stssh.p.tphlphtps.csLphhhhshslsshsl+.t........................................................................................h.hhpssRhhsh.ptcchhtstsphlEshapPhhps..........shssp.hsthclhpthlsGhDps-cEst..hss+hhhsch.p.....sppaspph...scshshYh.h..hhsshslhs................phppptthsslhhpsHuGEsushpplhsAhhh....htuccIuHGltltccPhL.thht.p...........QIslphsPlSNssLthhtshccpPlhpahccGlsVSLuTDDPhhFphT...LhcEYslAspsa...thop..s-hsclA+NSlhtSuhscctKpchLuc ........................................................................................................................................................................................h.t.s-lHhH....lsGslp....phl...hc.l..sp....p..t..t...........h.....t..............................................h.........p..........h.......h....h.....p.........h....h.....t....t...........t.......s....L..............p...a........h....p..h..h..s..h.s.h.tslt..............................................................................................................................................p.ps.hp.t.hs.h.t....h.h.p.c.h....t.....p...s...s.ltahElh...a.s.P.....hhs....................................pts.........h....s...h...t......t......s...l..p...s....l.......h.p..u..h...c......p....u.......p...c...p.hs..............lps.p.h.....l.....h.s..h...Rp..............stph...s...p....ph................hp...hh....h...t...a....t....p............hlsul.sl..s..u.s.E...s....h.sst................ahp.h..h..p..p...s....p.p.t.......s..h..t..hssHA.......G..E........s........s........s........s........p........p...l..h..p...Alth.........................hts.c..R..I.uH....Gl.....p.....h......h.....c..........c.......................t..........L.....h......p..h...l...h..p..p........................................pIs..l...Eh....CPh.....S..Nh.p............h...........t..h...............h..............t....s.......h.............t....p........H........P....l.....t.......p.....h...........h.........p.........t.G.........l.....................s........o...lsoD.DPsh........sss.....................l.cEat.hs.t.....pth....shs.........pp...ht.p.h................s.h.Nul..phua.hst..p.p.Kpthh......................................................................................................... 0 516 944 1371 +14 PF01490 Aa_trans Transmembrane amino acid transporter protein Bashton M, Bateman A anon Pfam-B_419 (release 4.0) Family This transmembrane region is found in many amino acid transporters including UNC-47 and MTR. UNC-47 encodes a vesicular amino butyric acid (GABA) transporter, (VGAT). UNC-47 is predicted to have 10 transmembrane domains Swiss:P34579 [1]. MTR is a N system amino acid transporter system protein involved in methyltryptophan resistance Swiss:P38680. Other members of this family include proline transporters and amino acid permeases. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.16 0.70 -5.99 24 6390 2012-10-03 01:44:59 2003-04-07 12:59:11 13 58 501 0 4195 7221 166 337.70 15 78.72 CHANGED pssoshpushpllsuhlGsGlLuLPaA.hpphGhlsGlhhllhhuhlohaohhlLspshph.........tppttoYt-ls...........tphhGsth.hhlhshs.hlp.lGhsluYhlhsupsl.slhpshh....tt.......h.h..ppshahhlhshlhhsLo.hlsshstLuhhSlluslhhhh.....................hshlhhhhhshshhsshshshhsh........thphhphhhulGslsFAasspshlhsIpssh+s.Ppp........cshhhuhhhsslhahhhGhhGYhsFGsssts..sllhshs.p......hhshsplhlsltllhuhsl.hhPlhphl-phlh...................................................................................s.hps.hhthh...chhhRshlVlhohhlA.lhhPhhstllullGuhushsloalhPshhahphhtsphhshphhhh...hthhhhhhulhhhhhushshlh ........................................................................................................................................................................t...t...shh.hhp..........sh.....l..Gs.G.l...L.u..h...P.....h...u....h.t..................p.........h......G.....h.....h.................u..h.....h....h..h..h.........h...h...u..h.hs..h....a.s....h.h.l.l....h.....p.s.h....................................t....h..sa...t..phs.......................................................thh.h..G.....hh....th....hh.t....h..h.h........h...h........h.......s.h..............sh.s....ah....lh....h.u.p..h....t...h..h..t.....t..........................................................h...p...h....h....h..h.....h...h...h..h..h.h...h...h...s....h.......s.....h......h.....t.....p...h...p..........l....t..h....h.u..h....h.uhh.hhh.h...........................................................hhh...h..h..h...h..h....................h......................t.h...........................................t.............t....h......h.....s....h....s...h....h...h.....a........u......a.............................s...p.......................h...h...l.................s....p....h...............p...............p....s......pp..........................ts.......h....h......h..s.....hh.......h...s..h.h.h.....a.....h..h...........h.........u.h...h...u.............Y............h....t.....a.........G......s..........t......s....t...s...............s.....l.hh.s..h.s..............................h.h.h...s.......h....h...h....h...h....t.h...h.h...s.....h....s........l.......h....h.........s..h...h..t...h...l...t.hhh.......................................................................................................................................................................................................................................t.........h.......h.h...........hh.h.h..p..h...h..h..h....h..h...s............hh...lu....h....h...h..P.........h..s...........l....h...........u...h..h..............G.uh...h..ss................hsh...h.......hP..s.h..h...hh.......h...........h..h......................t..........................................................h.............................................................h..h.h..h..h.h..s.hh....h........h.............................................................................................................................................................................................. 2 1380 2401 3501 +15 PF00004 AAA ATPase family associated with various cellular activities (AAA) Sonnhammer ELL anon Prosite Family AAA family proteins often perform chaperone-like functions that assist in the assembly, operation, or disassembly of protein complexes [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.75 0.71 -3.95 207 52090 2012-10-05 12:31:07 2003-04-07 12:59:11 24 426 6164 264 20937 92205 29927 130.80 25 20.90 CHANGED lLLhGPPGsGKThlA+ulAsp...............h.............s....hs.................a.hplsusplh.t................ahGtutppl+plapp.A....cpts........s.sllFlDElDulssp+................t..tppshspLLsthDshpst..........................s.lhlluATNc..s-tl-sALh.h.uRh-phlhhsh ....................................................................................................................................hlLhGP.P..G....s.........G....K.........T....h........l....A....c..........u...l...A...pp...........................h..............................s................tp.......................................................................h...h......p...l....s...s...u....s....h.....h.....pt..............................................................................................................................................................a..h..G......p....h....t....p......c......l......+......p......l.....h..cp....s..........cpps...............................sll...F...l.....D...E....l....c...s...ls.sst.......................................tt.t....t.t...p...s....h.....s....t...L....l......s..t......h...-...s.h.p........................................................................................h...h...h..l.....u...A.....T....st.............p..........l.....s........s.lh.........tR.hph......h................................................................................................................................................................... 0 6957 12667 17600 +16 PF00696 AA_kinase aakinase; Amino acid kinase family Bateman A, Birney E, Griffiths-Jones SR anon Pfam-B_100 (release 2.1) Family This family includes kinases that phosphorylate a variety of amino acid substrates, as well as uridylate kinase and carbamate kinase. This family includes: Aspartokinase EC:2.7.2.4, Swiss:P00561. Acetylglutamate kinase EC:2.7.2.8, Swiss:Q07905. Glutamate 5-kinase EC:2.7.2.11, Swiss:P07005. Uridylate kinase EC:2.7.4.-, Swiss:P29464. Carbamate kinase EC:2.7.2.2, Swiss:O96432. 24.90 24.90 25.00 24.90 24.70 24.80 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.76 0.70 -4.83 135 25691 2009-01-15 18:05:59 2003-04-07 12:59:11 23 74 5328 286 5789 16900 9693 240.60 20 64.39 CHANGED phh..V.lKlGGssls.st...........t.lp.plspplt......t.......................hhpt........shcl.llVsu...uG.....s....hssthhpt.............t..........................t.h..ptt.................tltp....................httlhst.hpphss.thhshhhps......tsh.shth....................................tptlpphlp..p..s.hlsllsuhh.....ss....ps....th..................sthssDtsAshlAttlpA...c..lhhh..oDVsGlastpP.....su.phlsplshp-htphh......................psGhtshh.tuhpss.pp..s..shplhlhs .....................................................................................................hlVlKhGGsuls.st......................................p.lp.p.ls...p.p.lt.......p...........................................ht.pt.....................................Gh..cl..llV....pu.....uG........s.....hssthhtt..........................................................................................................t..h.h.h.pt.t.....t..........................................................thtp..................................................hh.s..hlsth.....h......p.t...h...s...s.......hs....h.sh..h.hpp................ts.h...s.t.t.h...hth..........................................t.....hs.tp.tlp.p.h.Lc......p.......s.tls..l.h.suh........ss...............ps...th....................................shhssDpsAu.hlA...t.....t..l....p...A.............D...hlllh........T-V..-.G..l....as.s.s...Pth............s....s....u...p........h...l...s....p..l..s..h..p..-..hhchh..........................................................tG.ht.sh.hts....Ahphs.pp....t.s..h.shhlh.............................................................................. 0 1858 3683 4867 +17 PF03109 ABC1 ABC1 family Bateman A anon Pfam-B_339 (release 6.5) Family This family includes ABC1 from yeast [1] and AarF from E. coli [2]. These proteins have a nuclear or mitochondrial subcellular location in eukaryotes. The exact molecular functions of these proteins is not clear, however yeast ABC1 suppresses a cytochrome b mRNA translation defect and is essential for the electron transfer in the bc 1 complex [1] and E. coli AarF is required for ubiquinone production [2]. It has been suggested that members of the ABC1 family are novel chaperonins [1]. These proteins are unrelated to the ABC transporter proteins. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.40 0.71 -4.13 32 5521 2012-10-02 22:05:25 2003-04-07 12:59:11 11 49 2872 0 2412 4867 2734 116.90 31 21.56 CHANGED ppLG.pshcchFppF-ppPlAuASluQVH+ApLps........GppVAVKVQ+PslppphptDlthhphlsphhpth..t........clttllc-hpcpLhtElDahpEApssc+htcthpch.....shlplPpla .................................................t.thG.tsh.p.p.hF..s..pF-...t...p.P..lAuASIu......Q..............V..HpAp.L+s.....................................Gc..c....V.sVKVp..+Psltp.................hlpt.Dlpl....l......p...hl.u.......p.h....h....p...t....h.h.st...................hhc.hp...pll.......cEhp..........c.......plh.........pE...........lDhhpEAtNs.pphp...c..p..hp.sp............s.hl..hlPcl........................................................ 0 828 1543 2052 +18 PF01842 ACT ACT domain Bateman A anon Bateman A Domain This family of domains generally have a regulatory role. ACT domains are linked to a wide range of metabolic enzymes that are regulated by amino acid concentration. Pairs of ACT domains bind specifically to a particular amino acid leading to regulation of the linked enzyme. The ACT domain is found in: D-3-phosphoglycerate dehydrogenase EC:1.1.1.95 Swiss:P08328, which is inhibited by serine [1]. Aspartokinase EC:2.7.2.4 Swiss:P53553, which is regulated by lysine. Acetolactate synthase small regulatory subunit Swiss:P00894, which is inhibited by valine. Phenylalanine-4-hydroxylase EC:1.14.16.1 Swiss:P00439, which is regulated by phenylalanine. Prephenate dehydrogenase EC:4.2.1.51 Swiss:P21203. formyltetrahydrofolate deformylase EC:3.5.1.10, Swiss:P37051, which is activated by methionine and inhibited by glycine. GTP pyrophosphokinase EC:2.7.6.5 Swiss:P11585 20.80 20.20 20.80 20.20 20.70 20.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.83 0.72 -4.43 180 20885 2012-10-02 00:29:19 2003-04-07 12:59:11 20 130 4518 161 5944 23355 10616 64.10 16 16.98 CHANGED thlt...l.tst.-c.sGlluclhshlucpslNlpplpttsstt...........thhhhhhhssppshpphhcplcphhs ......................h..l.h..h...Dc.PGllucls.shlu.c..p..slsls.ph.p...psstp......................hthhhhhh...hspt.t.tthht.h.....t............................................. 0 1838 3898 5060 +19 PF04083 Abhydro_lipase abhydro_lipase; Partial alpha/beta-hydrolase lipase region Wood V, Finn R anon Pfam-B_267 (release 7.3); Family This family corresponds to a N-terminal part of an alpha/beta hydrolase domain. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.17 0.72 -4.61 82 1362 2012-10-03 11:45:05 2003-04-07 12:59:11 11 14 288 4 980 1460 9 63.00 36 14.19 CHANGED sscllppaGYssEpHpVpTcDGYlLslaRIstsp...........................................t.tspPs...VhL.HGLlsSSssWlhss ........................phlptaGYssEpHp.VpTcDGYlLslaRIPpsp.......................................................................................................ttt.ttt+..sVhLp..HGLlssSssWlhp.h........................................................................... 0 284 440 775 +20 PF00583 Acetyltransf_1 Acetyltransf; Acetyltransferase (GNAT) family Bateman A anon MRC-LMB Genome group Family This family contains proteins with N-acetyltransferase functions such as Elp3-related proteins. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.44 0.72 -3.95 241 50804 2012-10-02 22:59:21 2003-04-07 12:59:11 19 363 5227 314 15580 72453 11235 83.90 18 39.27 CHANGED hptsspllGhsththhtp................................................ttstltslhVp......ssa+spG.lGptLlpthhchstp..hs.......hpplthts.tts.Ntt...uhphYp.+hGFp ......................................................................................................h...tspllG...h....h..h..h...hh..p.............................................................................ttht.l...t....p..l..h..V.p...................s...p...h...+........s..p..........G....l......G....p......t....L....l........p.....t......h....h.....p..h....upp.....ts..............................h.p..p..l....h....L..t.s.....tt.......p....st............uh..t.hY.p..chGa................................................... 1 4938 9501 12964 +21 PF01648 ACPS 4'-phosphopantetheinyl transferase superfamily Bateman A anon Pfam-B_1679 (release 4.1) & Pfam-B_3672 (Release 7.5) Family Members of this family transfers the 4'-phosphopantetheine (4'-PP) moiety from coenzyme A (CoA) to the invariant serine of Pfam:PF00550. This post-translational modification renders holo-ACP capable of acyl group activation via thioesterification of the cysteamine thiol of 4'-PP [1]. This superfamily consists of two subtypes: The ACPS type such as Swiss:P24224 and the Sfp type such as Swiss:P39135. The structure of the Sfp type is known [3], which shows the active site accommodates a magnesium ion. The most highly conserved regions of the alignment are involved in binding the magnesium ion. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.84 0.71 -4.27 154 7983 2009-01-15 18:05:59 2003-04-07 12:59:11 15 48 4744 99 1916 5342 1894 110.90 21 48.33 CHANGED slGlDlEph.....pt................................thtp..lhpph.h...sspEhphltphss....................................thhhthWohKEAhhKuhs....t..................h..hsthphphtss.................................................h..htttttphpht...hphttptthssshs .....................................................................lGlDltcl......pph..............................ppspp...h..h...c+l...h...os.s.E.h.phhp.shpt...........................................................................h.p.hh.s.thausK.EAhhKAhG......pG.......................................th..hshpp.l.plhps...............................................t.................................................................................................................................................................................. 0 622 1194 1612 +22 PF01064 Activin_recp Activin types I and II receptor domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_338 (release 3.0) Domain This Pfam entry consists of both TGF-beta receptor types. This is an alignment of the hydrophilic cysteine-rich ligand-binding domains, Both receptor types, (type I and II) posses a 9 amino acid cysteine box, with the the consensus CCX{4-5}CN. The type I receptors also possess 7 extracellular residues preceding the cysteine box. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.21 0.72 -11.34 0.72 -3.34 26 942 2012-10-03 01:43:02 2003-04-07 12:59:11 18 17 116 67 464 892 0 83.70 25 16.38 CHANGED lpCh.Css..........Cs..pss.....ppCps...................suhCasthphspst...hphhpp....GChshppts......hhCps..................sss...spshtCCp.sDaCNcshp..h ........................................................Ch.Cp.......C....pss.........tpCps...................sstCasshpp...sss........hphhpp..........GChstt.tps.................hpCts.............................psps.........spsh.h..C.Cs...s-hCNpph....h................................................. 0 82 118 270 +23 PF00441 Acyl-CoA_dh_1 Acyl-CoA_dh; Acyl-CoA dehydrogenase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain C-terminal domain of Acyl-CoA dehydrogenase is an all-alpha, four helical up-and-down bundle. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.47 0.71 -4.11 58 26994 2012-10-01 23:33:27 2003-04-07 12:59:11 19 118 3319 208 9422 25135 11877 150.80 24 33.41 CHANGED spGathshps.lshpRhslustshGhsppsl-pshpascpR..psFG.p.lhshpslpppLAchtsclcus.+hhshpsup...thcpspss........tttsuh.sKhhuschstclsppAhplhGuhGh.......h.pchs..lp+haR-s+shpIht.GosEl.tttllu+phh ..........................................................................................................................spGhthhhp...t..LshsR..l...s........l....u....s....t.....u....l...G..........h....u........p..t......u.....h..c..t.....s.....h..p.....Y...u...p..p....R....................p....p...........F............G......p.............s...............l......s........p......h......p...s..........l..............p......p...p........L.......A.......c...h...t.....s...p..l..p....s.......u.....+.......h.......h.......s......h....p.s.A.t..................th...-...p...upss................................shtsu.h....s........K........h.....h...s....s.....c...t..............u...h........p....s....s..s...p...A.l.Q......l.....h...........G....G.h..Ga..............................h..p-hs..........lt+..h...h.....R....-....s.......+....l....h...p...lhp....G.ospl..p.t...hluc...h................................................................ 0 2614 5667 7871 +24 PF01757 Acyl_transf_3 DUF33; Acyltransferase family Bashton M, Bateman A anon Pfam-B_708 (release 4.2) Family This family includes a range of acyltransferase enzymes. This domain is found in many as yet uncharacterised C. elegans proteins and it is approximately 300 amino acids long. 31.00 31.00 31.00 31.00 30.90 30.90 hmmbuild -o /dev/null --hand HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.29 0.70 -5.61 111 12610 2012-10-02 17:00:17 2003-04-07 12:59:11 17 41 3761 0 3737 10989 1698 328.50 15 74.75 CHANGED hhtlD.....hlRsluhlhllh.hHshh...hhhh.shhh......................................hhsthslslFhhlS..Gahh...................................hpttshhphhhcRhh+lh....lshlhhshlhhhhthhhhththhthhhhhhhh.............................................................................................................................................hhshhW........alhsh......hhhhllhshh...............hhhhpphtthhhhhhhhhhhhhhhhhhhhhh...................hth..hhh.............................hhhahhGhhhsphttphptthh..............hhhhhhhhshhhhhhhhhhhhhht................................h..hhthhhhhhshhhhhhhshhhtpht.....thh..............hlthlu.phShslYlhH...........hhlhhhhtthhhhh.................................hhhhhhhhhhslhlsh....lhhh ...................................................................h...l-slRuluhlhVlh...hH....hhh...............hh.h.......................................................................................................huh..h..u...V.s..l..FFhl.S..Galhh.................................................................t.tp..h.....s.h..t..p....a...h..h.......+.....Rh.hR.lh................hsh.l.h...h...h.........h..l.....h.........h.........h........h......h.........h......h........h.....h......t......h.....h....h...h....t.....h..t...h.....h.hh.h..............................................................................................................................................................................................................t....h..h..h..p.....h...W.................hLss...........hhha..ll...h..sll............................................hhh.h.....t......p........h....t.......t.........h.........h......h......h.........h......h.......h.........h...h....h......h.....h....h.....s....h...h...h...h...h...h..h..h................................................h....................h..h..........................................................hhha..hhG....h........h......h....s....h....h....h...h...t....h...t...t..t...h........................................................................h.h.h..h.h....h..h..h.....h......h....h.s..h..h..h..h.h.h..h.h.hhhh.....................................................................h..h.....h....h...h....h......h....h.....h.....h..h....s..h....h....h...h...h....h...h.h..h...t.h....p..ht.................hh........................................h.ht.hlu..p.h.S..a.u....l....Y....L..h..H................................h..l...h...h..h..h.t..t..h..h.hth...................................................................hhh.hh.hh.h.h.h.s.hhh.sh....hh.................................................................................................................................................. 0 1161 2171 3133 +25 PF00928 Adap_comp_sub Adaptor complexes medium subunit family Finn RD, Bateman A, Coggill P anon Pfam-B_1007 (release 3.0) Family This family also contains members which are coatomer subunits. 20.80 20.80 20.90 20.90 19.80 20.70 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.86 0.70 -5.29 46 2208 2012-10-02 01:13:52 2003-04-07 12:59:11 16 30 497 31 1352 2129 25 258.90 25 54.82 CHANGED hsh+sss.hp....appNElalDllEc....l.ss.lhsp.sGp...llpu-.lpGplphpshLsG.hPplplulsc.h.............................................sh.l--hpFHtCVphsp.a.cp....................p+hlsFlPPDG..cFpL......hpY...+l.......................................................spp....hthPhpl.hsphp........................................................p.tst.schchhlphctp......htp...sspslhlplPl....Pps......................................ssssshps.stG....pspapsppp...hLhWsI....tclss......p...hp......................................................u.plphssthp...................phss............................lslpFplshhssSul...........................pVchlcl..h..t......pshpsh+hV+YhTpus.paplRh ................................................................................................................................hp.t.s.hp....apps.ElalDllEp....................l.s.........h.l..............hst.....s.....G..p....................llpuc....ltGpl....phps.h..Lo...G...hP..-...lp..lulsst................................................................................................................ttshtl-D..spF..H...CV..chsc..F...-s....................................................c+hl......uF......l.......P.....P.....D......G.....pFp......L...........hp........Y....Rh.........................................................................................................................................................................ssp.........h.l.h.pl..ps.hp.......................................................p....up..s+h-h.hl...p.hctp..........................htp.....hspsV.lpl...P.h....Pps............................................................................ssssp..hps.....s.G................php.aps.ppp....slhWpI.....tphss..........tp.....shp.......................................................................................................u.ph..ph..s.s..t...st.t..........................th.s.........................................................................................................lslpFp.....l....s......h..s...sS.Gl................pV..chlpl....p................tthpsh.h.V+a.spst...h....................................................................................................... 0 456 720 1076 +26 PF00107 ADH_zinc_N adh_zinc; Zinc-binding dehydrogenase Sonnhammer ELL anon Prosite Family \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.35 0.71 -4.60 108 42002 2012-10-10 17:06:42 2003-04-07 12:59:11 21 474 5352 463 12979 36140 9081 125.70 20 28.57 CHANGED ulGhhulphA+shG.......spllssstspp+hchAcphG...Astslsspptt......................hhppltchssu.........GsDsll-ssG.ssshppulphlphsGplshlGhss............tshshshhtlhhpphslhGshhss........pphtphlphlhp .......................................................................ulGhhul.p.h....A.+..t.hG...........scV.l....u....s........s....t...........s.........s......p.........+........h.........c.......h.........A.....c......p...........h.....G..........A......s........p......s....l....s......ppts.................................................hhptl..t..c..h.sss.......................G.s..D...h..s..l...-.....s.......s.....G..........................s............t...........s............h.....p............t.......s.....l.....p...........h.....l........p................s........G.........p......l......s.h.l.G.hss........................t..h...h...s.....h..................h....h........h..p...t..h.p..l.h.....G..h..hht............t...........t................................................................................... 0 3503 7483 10666 +27 PF02682 AHS1 DUF213; Allophanate hydrolase subunit 1 Mian N, Bateman A anon COG2049 Family This family is the first subunit of allophanate hydrolase. 20.40 20.40 20.40 21.10 19.70 19.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.08 0.71 -4.85 158 3092 2012-10-02 15:38:38 2003-04-07 12:59:11 11 20 2321 14 753 2183 1289 203.30 32 52.43 CHANGED h..p.lpshG......-pulllphs.pt.hs.......shtppl....hsltptlppt.......hsulh.-llPuhsolhlpa-.shth............sht..plt....stl....pph.hpp...ttspttsst..............p...........lclPVtYssphG......................DLtpVAchsGLo.hc-VlphHosspYpVhhlG.FtPGFsYLuulstp...L.t.sPRpssPRtplPuGSVuIAGpQoulYPhsSPGGWplIG+Tsh..tlF.Ds.pp.psP...sll ...............................................................chh.hG-pulllc....ht........lsh..........shp.tcl....hsltptLppt................hssll...-l.lPuhs....slhlhac..stt.l..............................shp...slh..ptL..............pph..hpp.............tpshpsss.............................................+h...............lcIPVhYs..s.th...G..........................DLp.VAchsG.Lo...hcpVlchHoss...pY....hVhhlG.FhPGFP....YLsulspp..........L..t.sPRpspPRhtlPuGSVGIuGspoGlYP.hs.oPGGWQlIG+T.s..l...plFcs.pc.p..h....................... 0 200 423 624 +28 PF03915 AIP3 Actin interacting protein 3 Wood V, Griffiths-Jones SR anon Pfam-B_38461 (release 7.2) Family \N 24.50 24.50 24.80 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.37 0.70 -5.57 18 245 2009-09-10 22:05:34 2003-04-07 12:59:11 8 6 183 3 166 277 0 303.00 27 37.23 CHANGED FL+lssKsKKshlsh...slo.hspLRLLFlEKFAYSPGss..sFP-IYIpDPpouVpYELE-..Lp.DlK-GolLsLphcp..................c.ssshssLpc.hlcsl+pplscppssl..................hpclpsh.tsssh.hsp.ssuss.....sttspppssstt.ttps..................hpElpslc+-Lull+QhasstpsslpsolsslhpKlssh+s.sulsss.....ssSsRuYh-pupscLu--SDpLls+VDDLQDllEsLRKDVApRGVRPsc+QL-slsK-lppApc-LpcMppaIspEKPhWKKIWEsEL-+VCEEQQFLsLQEDLshDLp-DLcKApETFsLVEQsocEQtKssups................+s+sl..lslscPG.ohpph+DtVLsEVpuLpPsHESRlEAIERAEKLRp+E+chp+sscFpcELusFVEcuKLKKSGGlEElERhR+tKD- .....................................................................................FLQhtscsK+hhlsp........tlo.hsplphh....FlcpF....shp..t.p....chP..p..IYIpDs..ps.lhaELED......lp.-l+D+olLhL..c................................................................................................................................................................................................................................................tphptlp.-lt.h+p......h.pt..tthtt.htshh.p.h.t.t..htt....s..t..............ttt.hhpttpp.htt.s.tl.tc...lp-LpshlEth+tD....VhpRtsps..ptlcth...ltthptpltphp.hh.t.p..hpphhptEhp.l..sptpp.hp....c.h....th.t.htth.thh..ht...h.................................................................................................................................................................................................. 0 57 95 141 +29 PF00842 Ala_racemase_C Ala_racemase; Alanine racemase, C-terminal domain Bateman A, Finn RD anon Pfam-B_1496 (release 2.1) Domain \N 25.00 25.00 26.50 25.50 24.90 24.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.61 0.71 -4.73 121 5638 2009-01-15 18:05:59 2003-04-07 12:59:11 16 16 4223 90 1060 3928 1372 125.90 36 32.58 CHANGED lhs.Lcucllpl+pl.psGps..lGYGtsapspcss.plAslslGYADG..hs.Rthus.ps................l....hlsGpps..Pll....GRlsMDhhhlDlo...........ph...ssp.G........-pVhlhG.tp......hss--lActssTIsYEllsplut.Rl.Rhah ......................shoLpoplltV+pl.p.s..G.-..s..VGYGs.sap..u..pc.cp..pluslslGYAD..G..as.Rph..ss..ts...........................V.......ll.......s....G....p....cs....sll....GRVsMD.hhVDls.......................p.sphG......sp.VhLaG..pp........................lsl--lAph.s.s.TI.sYEl.l.stl.sp..RlsRha.h.......................... 0 358 686 893 +30 PF01168 Ala_racemase_N UPF0001; Alanine racemase, N-terminal domain Finn RD anon Manual Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.87 0.70 -4.79 613 12691 2012-10-03 05:58:16 2003-04-07 12:59:11 15 33 4649 104 2910 10839 5318 218.10 20 66.44 CHANGED lsLsslppNlpthppts....s..........ss.lhAVlKusuYutuhhpsut.h..t.u.....s..c.....shulsplpEAl........................tLc....ps..............tl.hh..........ppl..hhp.......slhhslsshcth.p.tlsptttt................tpslplhlpl-o.Ghp........RhG.........hs..s........pth.........phhp...tlp...........s.....lplpGlhoHhus.u............-....p.t........tpp.p.hpph....pphhptl........t.....t...............hhshusSushhht.........h......p...hsh.VRsGhslaGh...ps .................................................................................................................shttlppNlpthppts...ts............ss.lhA..VsK.s.......hsts.h...h.p.hs...h.....................t......t..u.....h.....p.....................t.h..u.....s.p..l.p..Euh........................................................................pLp..psh.................lh....h....h.....................pp....l...........h.hp..........t...sh...h.....h....s...l.s...s.h.chh...p..tlpptt................................t.ps.lp..l..hl..p.....l.....s............s...uhp.................RhG..................lp...s......cch................phhp.......plpp..................h..ss......lp.l.t.Gl..hohhuts........................................-c.p...............htp....p...h....pp.h......................pphh.ptl..............pt..t......................hlS.h.u.sS..ushhht..................p...hhsh...VRhGhslaG.t.............................................................................................. 0 960 1857 2470 +31 PF01315 Ald_Xan_dh_C Aldehyde oxidase and xanthine dehydrogenase, a/b hammerhead domain Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.35 0.72 -3.99 30 5239 2009-09-12 04:57:33 2003-04-07 12:59:11 17 77 1902 114 1766 4735 2068 108.20 30 12.95 CHANGED TGcAhYsDDlshssst.ahshltSshAHA+IhuIDsspAhshPGVlullTucDlsstspshh....s.hsp.......lhAc-cVphhGpslAhVlAcspttAccAAcLVpl-YEcLs ............................................oGcupYs...-D...h...s....h....s....s....h....h...a..sth.l.pSs.h.A.H.A+......I..t...........u.lDs.st.A..t.......s.hPGVl..s..llT..tp..D...lss...h........t....hssh...........h..tsp........................hh..s.......s...s.......c..V.c....ahGpslA.hVlAcotttAccAsphl..c...l-Y-.L............................................ 0 567 1035 1411 +32 PF02738 Ald_Xan_dh_C2 Molybdopterin-binding domain of aldehyde dehydrogenase Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 19.60 19.60 19.70 19.60 19.10 19.50 hmmbuild -o /dev/null HMM SEED 547 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.43 0.70 -6.19 30 9467 2009-01-15 18:05:59 2003-04-07 12:59:11 13 89 2059 116 3266 9151 5213 368.20 19 64.65 CHANGED Alc......ppshhsph............hphhppGD...sspuFspADt.......................llcuphphupQpahahETpuulAl.c.p-sclplasSTQsPphspphlApsLGlPtp+lclhspclGGGFG.pcscshshsshsALAAh+htRPV+hhhsRpEchhho.utRHshhhph+lGhsp-G+ltAlchchhssuGshsshu.slh.puhhts....ssYchsslchsuhsstTNhssss.AaR.GaGssQuhauhEshlcclAppLGl-s.ElRchNhh...cts.tsha.spphsshshhp...th-cshppspappRp.......ttlcp.....hshsp.pahc+Gluhsssphshshsss...tusutlplhsDGo...lhlphGus-hGQGhpTphtQlsAcsL.........ulshccIplppss.TspsPpusuTuuSpuoshsGtAltpAscplpc+Lp.htsphht.........shsacch........sssuhsssls.sApuhatss...........s.shphtt...s..spsh.hasaGssssEVElDshTGcscll+s.hlhDsGpslNPtlshGQIcGuhsQGhGhhhhEElhas.ssG.lhspuhhsYcIPshs-lP ......................................................................................................thh.........................................................................................hp.........h................hEs....hs...............t...............t......t..........t.....h.......hh...s.Q.........................t.....h.s...h.....h.....t.....h..................t..................t....l.....h............hG.GuFG.....t..........................................................s...h.....h.....h......s..h...h.....h.........t.....t.......s.....lp.................h...sR..p.h..t....t....................p.h..p......h...u......h..p.......t.......p.......G............l.............u....ht.....h......t........................h.....................t......................h....................................t.......................................................................................................Y......h......t..h...h..t...................h...s............................s..h.....h.R.....u.....u.s.....s.hh.h.E.hhp.h.A..tht......h..ssht.hR.hph......................................................................................................................................................................................................................................................................................................................................................................................................................t.........s.....l..........h...........s..Gp......h..l...sh..p..GpG.h...Thh.t...h..hup.h................................th.s....p..t..l.......h...................s......t..............t...................h.................s............................s.....h........u......S.t.............h.s...h...................s....s........h..t........h..h....h....t.........................................................h........................................................h.......................................................................................................................s.......s.....s......h..s.....l...t.l..s....................s..h...........p..l.p..h.................h.s.h..G.......lNs.h..tQ..............h.Guh.h...u.h.u..h.tt..........h.................................................t..........h..................................................................................................................................... 0 967 1893 2579 +33 PF00248 Aldo_ket_red aldo_ket_red; Aldo/keto reductase family Finn RD, Griffiths-Jones SR anon Prosite Domain This family includes a number of K+ ion channel beta chain regulatory domains - these are reported to have oxidoreductase activity [2]. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.57 0.70 -5.58 61 28932 2009-09-12 09:01:48 2003-04-07 12:59:11 16 110 4596 377 9885 23584 7566 269.00 25 87.49 CHANGED pluhGshthst.............thspppshph...lctAl.ctGhs.....hlDTAph.........Y...........utuEph.lGc...hLpp.........+cplhlsoKs..................................ht.shstp.....plppslcpSLccLph.-hlD.lhhlHtsssths..........................hp-sh......ptlcchhp....pGpl+tlGlS...shs.hpplpph........tpttht.....hsssQspashh...........tthppplhp..hspp..p.slsll.uauslut...............GhLptt..........................................................................thhphlpplucp.hsh.ohsplulpaslp..............s.....................tsssslsGs..sshpclcpslpsh...ph.pLsppphstlcplht .........................................................................................................luhGsh.th.........................ths.t.p.p.s.h.ph.......lctAl....-t...G.hp...........................hhD.T.A.ph.........................................Y...........................p.sE...p......h....lGc.....ulpp.................................h.R-c....l....h......l......sTKh..........................................................................................................h.....t..s....h..s.hc...................plh.p.u.l-.p.S.L...c.........+...L..........t.........h....D..........Y.....l..............D....L...a....h....l.H...h.....s..s...s..t..ss...............................................................................hp-..sh...................pAh.p.c.l...hc........................pG...+..l.+.t.........l.....G.....lS.....................Nas...sp.p...lpph.....................................t.ps.tht..................hs.s..s..Q.s.c.hshh................................h..p..p..p...l..ls.............hspp.......p..sl.s...hh..ua.....o......P.Lu.p..........................................................G...hl.t...t....................................................................................................................................................................h...pt.l.p..p.....l..A.....p.....c...h.....s......t...o...s...u........Q........l.....s.....L...t.Wh..l....................................t......................................................................................................t.s.s.ssIs.uu........sp.p..p..l...p..p....N.h.tsh........sh...pLotc-httlpth..t................................................................................................................................................................................................. 0 3142 5960 8241 +34 PF01263 Aldose_epim Aldose 1-epimerase Finn RD, Bateman A anon Prosite Family \N 21.10 21.10 21.20 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -12.04 0.70 -5.31 66 8201 2012-10-02 23:57:29 2003-04-07 12:59:11 15 27 3660 74 1995 6054 1148 286.20 18 90.64 CHANGED phlslpss.pthpsplhphGAplhShpsss.t...t-llhshsp.....ta.tt........ushlsthAsRl.ttGta.hpGtshplshNs................stpshHGtsps...phWpltphp.........stsphphh..sssc.ps...aPtplpsplsapLsspst...Lplphcupsst....cshshshs.HsYFNls.........hptpplplpu..pphhphss...hs....hshptssh.sh...ppsphlspch............thDpsahhss..........hphshhhhptspshtlchpsspsshhlassshht.................hhhpttuhshpsphhssssspsphs....hhLpsGEpaptphthp ..........................................................................................................t..hplpss...thps..p.l.h.s.h.GAplhu.hphss.....................pcll..h..s..hss..........t..h.tt......................hu..s.h.l.s.hus..Rl..tt........u...t.h...........h............t...G....p....sh.p.l...shsp......................ssp.s.h.HGh..scs........................p.Wp.lhpps............................ptlphs.h.p..................ssp..ts..................a...P...t......sh............plplpap.....L............ss.......s......s................lp.....lp.h..psp...s......s.............psh......shsh..s.Hs......Y.Fs.ls.....................hp.p.pp....lp....l...su.....t.thh..........h...sp...............h....s............t...........h...t.h.t...s....s.....s....h....-h........ppst...l.s.tp.............................................th..Dcs.ahh.t..............................................phthh.h.h..p...........p...p..t....h..t.l..p..h....t....s...s...t.......s..l.......l..a....ss...s.ht.....................................................................hhth..s...h.s.h..ts.p..h..h..s.s.p..s..t.......p.....s.....ths........hhLtss.p..hp................................................................................................................................................. 0 599 1167 1644 +35 PF03155 Alg6_Alg8 ALG6, ALG8 glycosyltransferase family Mifsud W anon Pfam-B_3941 (release 6.5) Family N-linked (asparagine-linked) glycosylation of proteins is mediated by a highly conserved pathway in eukaryotes, in which a lipid (dolichol phosphate)-linked oligosaccharide is assembled at the endoplasmic reticulum membrane prior to the transfer of the oligosaccharide moiety to the target asparagine residues. This oligosaccharide is composed of Glc(3)Man(9)GlcNAc(2). The addition of the three glucose residues is the final series of steps in the synthesis of the oligosaccharide precursor. Alg6 transfers the first glucose residue, and Alg8 transfers the second one [1]. In the human alg6 gene, a C->T transition, which causes Ala333 to be replaced with Val, has been identified as the cause of a congenital disorder of glycosylation, designated as type Ic OMIM:603147 [2]. 25.00 25.00 26.20 26.00 22.80 23.30 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.88 0.70 -5.65 9 764 2012-10-03 03:08:05 2003-04-07 12:59:11 10 16 267 0 527 725 13 353.20 28 86.10 CHANGED hsslhl+hh..LhPh.tp......sDFEspRpWhtIThsLPlupWYhpuosp....WsLDYPPhhAYapalLuhlupa.h.sschls.hpshGaES.sp..hlF.RhoVIhs-lLhhls.lhhaspsh...th.+..spppphhsuhhlLhpPGLllIDHhHFQYNulhhGlhlhuIsslhpcp.alhuAhhFslulsFKphhLYhAPsaFsaLLtp.Clp.sp....ht...........taspllpLussVlusFslhahPahhh....ppl.QVLpRLFPFsRGLhcshhA.NFWshaNsh.Kltphlsh.................................................plt..lohhhTllu.LPuhlhLahcPp....+hFhhuLshCuhu.FLFuapVHEKuILLslLPhslLhsttst..h.hhhhhsssuhaSLaPLLh+sshhh.phlhhhsa.hh.....hs........................p...thhh..hphlphhalluhhslhhh.phlp.hl...s.spKaP.LhllLsushuslshhhha...lah.h .....................................................h....hh+hh..l..s...t............sDaEspRpWhtlT.pLPlppW....Y..h.t..........WsLD.YPPhhA............aapahhu...huth....h...ssthht..................p..s.....h.shts..th...hhF.RhoVlhs-hlhhhs......lh.......h.h......h......................................t....................................t........h...............h.h.....llh.suhlllDahHFQ............YNs.h..hGhhlhul..hh...t.............tp..................h....h.....uu.hhFshhlshKph.LYhu.sh.hha.LLtt....sh.............t...........................................................hhphh.tluhhshhshhh.h.hPa.h............................tth.QlhpRlFPh.tRGLhc...shhA.NhWshhsh..h......+hh..hh.....................................................h...hshh..hT.....h.h...h....................shh.....l....h.h......p..........sp...........................th.h.hslh.su...hu....FhFuapVHEKulLls......hlPh...sll..h...t.................................................hh...hhh..huhh...ohhP..LLhps..t.h..h.hhhhhha.hhh......................................................................................................t.....h.h........h..h..h..h......h..h...h......th...h.....hh...............p...h..l..hh.s.hs..th...h........h............................................................................... 0 184 291 438 +36 PF00245 Alk_phosphatase alk_phosphatase; Alkaline phosphatase Finn RD anon Prosite Domain \N 19.80 19.80 19.80 19.90 19.70 19.70 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.42 0.70 -5.79 10 3561 2012-10-03 20:55:17 2003-04-07 12:59:11 15 33 1898 200 1070 2985 602 329.70 28 81.45 CHANGED AKNVIlhlGDGMGVuolTAARhLKsptcsphs.-h.LuhDchPhsGhu+TYssDpp......VsDSAAoATAahCGVKTspssIGVo............spGs-V..hSVLEtAKcAGKusGlVTTTclsHATPAuhhAHsssRshhussss.....ssthhptG.....ptDIucQLlssh...cIDVlLGGGR+aFh.pspss.......tttGhRhDGRNLlcEa......K+pGapYVhsRp...pLlpspssp.sp.LLGLFtsuchta-l.....................pccsspsPSLtEMT-sAl+lLo.+Np......+GFFLhVEGuRID+ucH-scshtALsEsltFDpAlctAh-hs..p-c-TLllVTADHoHs......aohGuYs.+sspshGLusupsssDttshpll.asp...GsGtslcs.........................tt+.slst.scthts.apppuslshsocsHoGEDVsVaAhGPpAc.......hl+GlpEQTplu+sMstAhsL .....................................................................................KNlIhhlGD.G.Mu.s...hsssR...h......t..t.....t.................t......s................................................................................ht...........h.......-t...........h....h.u...........psas.st.........................................lsDSAssuTAh.sGhK..o....ss..sl..u.ls...................................pt...p...h........olh..c..h.A.c..t..t..G.h.ss.GhVsT.scl...pcATPAuh.hAHsspRp..s..s...............................................................p.hhpt.......................h-..V.hhGG..G..t..p...a................................................................t.hp.s.t.s.L..h.pth.............p..t....t....u.....h....p....h.s......s.t.t...........th......t.............p.....t..s.....p....................l.l...G....L.....F......s....s...s.p........h.......ht...h................................................................................p..t....t.....t.....t.....P.........o..L...tpMsptA..l..p..h..L.....p.....p..st....................pG.FFLh....VE....G...upIDhttHsss......h..t...........ht-hhth-pAlphA....hp......h.s.........p...p.......t......p....TL.llsTADHsps.......................s.t...............................t.......................................s.........h.........t............t.................................t.....................................p...........h..h..................h.........................s....................................................................................................................................................................................................t...s...tt...Hsutp..l...lh.......A..h.G..s.t..t.t........................h...G.h-ps.h.hh.hh.h...................................................................................................... 0 299 560 845 +37 PF02806 Alpha-amylase_C alpha-amylase_C; Alpha amylase, C-terminal all-beta domain Sonnhammer ELL, Griffiths-Jones SR anon ref [2] Domain Alpha amylase is classified as family 13 of the glycosyl hydrolases. The structure is an 8 stranded alpha/beta barrel containing the active site, interrupted by a ~70 a.a. calcium-binding domain protruding between beta strand 3 and alpha helix 3, and a carboxyl-terminal Greek key beta-barrel domain. 21.20 17.00 21.20 17.10 21.10 16.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.74 0.72 -3.85 246 4792 2012-10-02 20:10:03 2003-04-07 12:59:11 13 63 3353 197 1256 4323 339 94.60 25 14.30 CHANGED W....ssssslluFtR.............stt....hlllhN...hss..hppsh.psulP..sG....p........Yp-ll....susttph....us.......s......t.......h..............h...pGp........h..plpl....Pshusls.ht.h..p .........................Wl.hpcsspslluFhRt.......................spt.t...llslhN...Fs.s.s.....s.p.p...sY..pl.GlP...sG..........p........Yp.-ll................NoDs.tpa....uGs......sh..spttp................lpt...............................hs..cGp..............l.plsl....Psh...uslhht...tt............................................................. 0 397 756 1059 +38 PF05111 Amelin Ameloblastin precursor (Amelin) Moxon SJ anon Pfam-B_6419 (release 7.7) Family This family consists of several mammalian Ameloblastin precursor (Amelin) proteins. Matrix proteins of tooth enamel consist mainly of amelogenin but also of non-amelogenin proteins, which, although their volumetric percentage is low, have an important role in enamel mineralisation. One of the non-amelogenin proteins is ameloblastin, also known as amelin and sheathlin. Ameloblastin (AMBN) is one of the enamel sheath proteins which is though to have a role in determining the prismatic structure of growing enamel crystals [1]. 25.00 25.00 30.00 29.90 16.90 16.90 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.48 0.70 -5.73 4 141 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 79 0 27 129 0 207.90 45 100.34 CHANGED MSASKIPLFKMKDLILhLsLLKMShAVPAFPQQPGs.....PGMASLSLETMRQLGSLQGLNsLSQYSRaGFGKShNSLWhpGLLPPHSSFPWhRPREHETQ...............QPSLQPpQPGhKPFLQPTAsTuhQsTsQKsuPQPPhH.GpLPLQ-uELPhscp.QVAPSEpPPsPELPshDFADPQhPo..........................lFQIARhISRGPMPQNKsS.LYPGMhYhoYGANQL...............GGRssPhAYGulFPGFGGhR.slcthPpNPsMGGDFTLEFDSPVAATKGPEKGEGGAQGSPl.EApssc.ENPALLSplAPGAHsGLLAhPpDsIPuhARGPuG+.pthL.uVTPAAADPLhTPELAEVYETYGADhTTsLG.ptEATMDoTMoPDT.QT.MPGNKspQPQhhH-AWHFQEP .....................................................................................................................................................................................................................................................................................................................................................shtuhP.NPshGGDFTLEaDSPsAuTKGPEpt....EGGspssPh.-spsss.EsPAhLsEht..PsshuG.LLA.PpsslPsLsRuPuGp.+t...tVTPAsADPLhTPtLA-sYcTYsuD.TTsls.pcEs..Th..DsThsPso.po..hPtN+sppPphhpcs.................... 0 2 2 6 +39 PF01510 Amidase_2 N-acetylmuramoyl-L-alanine amidase Bateman A anon Pfam-B_735 (release 4.0) Domain This family includes zinc amidases that have N-acetylmuramoyl-L-alanine amidase activity EC:3.5.1.28. This enzyme domain cleaves the amide bond between N-acetylmuramoyl and L-amino acids in bacterial cell walls (preferentially: D-lactyl-L-Ala). The structure is known for the bacteriophage T7 structure and shows that two of the conserved histidines are zinc binding. 21.10 21.10 21.10 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.26 0.71 -4.10 117 7328 2009-01-15 18:05:59 2003-04-07 12:59:11 20 149 3627 173 1411 5665 2129 140.50 22 44.18 CHANGED ttsttth..psphlllHpTssss..sts.t..............hhphps..............pssuhH...ahlst......cGp.....l..h.ph...hshs..........thuhH.su............................................h........Nst....s..lGl.phht.u....................................................sssQh.ps...ht..pL..hthl.........ppa.....s.ls.p........................plh..uH.......pcls...................sPush ...................................................tt.........phphlllHpTu.s.ss..spstt.h....................h...ts...............................pplusHal.lsp...................cG..p............l...h..ph...........lshs......................phuWH.AGs...........................................................................................h.Nsp.........S....IGI.Eltsst.........................................................................ttsh.ss.sph..ps.........ht....pL....sttl.............hppa...s..ls.p.............................plh.uHp-luspp...............sPu..h..................................................... 0 418 807 1119 +40 PF01520 Amidase_3 N-acetylmuramoyl-L-alanine amidase Bateman A anon Pfam-B_888 (release 4.0) Family This enzyme domain cleaves the amide bond between N-acetylmuramoyl and L-amino acids in bacterial cell walls. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.22 0.71 -4.49 103 7380 2012-10-02 19:46:12 2003-04-07 12:59:11 13 197 3575 11 1463 5592 1358 194.90 29 50.10 CHANGED lslDsGH.....G....Gp.Ds.....................G.....Ahu....ss....hh.EcslsLpl.upplt.ph.LpppG....hpVhhTRss.D.....t.h................sLppRsph..........A.....sp...hpuDlalSlHtN.uh............ssss......pGsplah...................................................ptptspupplApt...ltpp.ltpththt.......scG.......l+tss....................hhll..cp..sph.....PulLlEhGFloN.p-tphlpssphppp.lApuIspults ...............................................................................lhlDsGH.....G......Gp...Ds.................................G........AlG..................su......hp..EKclsL.p.l.Ap.clp...ph.Lpp.pG.......hclh.h.T.Rss..D..............hhl....................................................sL..p.p.Rs.ph...............A.........pp..........ppAD...lFlSIH.ss..uh...............................................ss..ss..s........pGsplah................................................................................................................................t.tph..ppuhplA....pt....lhp...p...lt..p..h.s.t.hp...................s+s.......................scp.us............................................hhVL....+p.....ssh............P.ul.LlEh..GFloN..sp.-..p.p.h.L.ts.sp.ap..pp.lApuIhpGlh.p....................................................................................................................... 0 519 1014 1265 +41 PF01593 Amino_oxidase Flavin containing amine oxidoreductase Bashton M, Bateman A anon Pfam-B_606 (release 4.1)b Domain This family consists of various amine oxidases, including maze polyamine oxidase (PAO) [1] and various flavin containing monoamine oxidases (MAO). The aligned region includes the flavin binding site of these enzymes. The family also contains phytoene dehydrogenases and related enzymes. In vertebrates MAO plays an important role regulating the intracellular levels of amines via there oxidation; these include various neurotransmitters, neurotoxins and trace amines [2]. In lower eukaryotes such as aspergillus and in bacteria the main role of amine oxidases is to provide a source of ammonium [3]. PAOs in plants, bacteria and protozoa oxidase spermidine and spermine to an aminobutyral, diaminopropane and hydrogen peroxide and are involved in the catabolism of polyamines [1]. Other members of this family include tryptophan 2-monooxygenase, putrescine oxidase, corticosteroid binding proteins and antibacterial glycoproteins. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.44 0.70 -5.42 84 8963 2012-10-10 17:06:42 2003-04-07 12:59:11 19 111 2576 276 3844 15154 7049 357.50 13 77.77 CHANGED luGLsAAppLtpt....Gh..............pVhllEAp-RlGGRlt..ohptt.....shhh-hGsphhh.s.ss.hhphhtphshp.phthh.tstthhhthphttt.hhthssth.........................................t.hhshtphhphttthtsthhthtpthhphtttsh...tphhhppshtphhht.....................................................................................................htshhhsthth..hthttust.hthhthth.shpptthhhshtt....hshhsthhsthts..ts...plphsppVpplphps.............ttststttsu..........thpsDt.Vlsssshsshp..............thhPthst.t.phh.pththssss.plhlphppthh.pt...................th.shhhsss...............ththhhhshphsss.....ttsshhshlhssts.....thhtshsccphhptlhpplpphhs.....th.ssh...............t..hppWtpsthstusashhsstss.....hhsthtssh.....................slahAGstsstth..sslpGAlpSGhpsAppll ..........................................................................................................................................................................................................................................................uGLssA...h.....L.....t.p.t...........uh................................plhl.hE......t......p....s........p.h.G.G.+....h.............s..h.....t..t...................sh..h....h..-.h....G...s...t.....h....h................................h.........t...h.....h.....t...............t.......h....h......................t................h.............................................................................................................................................................................................................................................................................................................................................................................................t.................................................t....h.............................................................................................................................................................................................................h...........h...h...............................t.................h............................h......................................................................................................h..................t........................................t..h........t.......h.......t...h...h...h...t........t..............plp..h..s...p.........V...pp.lp.h..ps......................................tt.h..t..l..h..h..t..sst..................................................hp.s-h..l......lh.s.s.s.h.t..hht........................................h...s.......h.s..t.................t....h........h...t........t.....h................h.....t...s.........s....hh..h....h..........h....t..t.......h.h..............................................................................th.hh.t......................................h......hh......t..................................h..h.....h......h..h...h..t........................................h..t...h....s.............p.....t.....h....h....t.........h.h.....t......l.t.p....h..hs.................t.................................................................t.h..........................h...........h..............h...t................t....................h..............h..t...............................................................................tlh.hsGt....h..t...........................h.h..ssh.su...s...................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1229 2344 3195 +42 PF00501 AMP-binding AMP-binding enzyme Finn RD, Eberhardt R anon Prosite Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.67 0.70 -6.09 354 67453 2012-10-02 15:58:18 2003-04-07 12:59:11 23 2256 6205 141 23415 62718 26362 376.00 19 52.12 CHANGED hppps...pptP.cps.....Al....h..............tspplTYp-LppcusplAphL..t.p..h.Gl.p..tsc.h..Vulhh.pss-hlluhlAlh+sGushlPl..ss......shss.....cclthhlpcu.......psphll...s.............................................t.......hhhhp.......t....tt...................................................t.....sch...Ahll..Y..TSGoT....GpPKGVhloH.psl....hsh.hs.............ht.......tt........hh.hth.ss.......hsashu...h.htlhssLhtGu............................slllhpt..t.hs....thh...hp....hlp.c......hplT...hhhssPshhp.hl.......hp.t...t........................................ths...................s.L......+h..................................................l..h.......s..uGpslssphht.hpphhtt........lhss.YG.TEssshs.....................hhh...................ttt...hs.l.G...pP..l.....s.ssphtll..Dtt.tp.l...s.....sG.s...GE...lh.lp....Gs....s..lhcGYh........s.c...P.ch.....Tscp.ah......t...........................tt.ha+TGDlup.h.p.....D.G....h.lthlGRhc.c.lKlp .......................................................................................................................................................................................h..........s...pt..Al...h..............................tttpho....a...t..p.....l....t....p..........p....s....p..p.h....A....t....h....L...........h.....t.................h.....G......l....t..............................s.......sh....................V.u...l.h...h...........p..s......................p..............h....h...........h......s.....h..h..u.h...........h.........p....h...............G....s......s....h.........s....s.....l.......s...s.....................................t.h..s..........................p...p...l.....t.....h.....h..l.p...p..s........................t..s.p....hll.................................................................................................................................................................................h.....h.....h.........h....p.............................t....................................................................................................................................................................................................................................ts......pch.....s...h....l..h......a........T........S.....G..o.....T..................G......p....P..........K............G..........V...h.....h.....s.....+.....tsl....................hs.t.h.h.t.h.........h.............t.......................................-....hh..hth...hs......................h.h..a..s....hu................h....h.t..hh..s..s.......l.....h....t.....G..u...........................................s.h...h.l.h.......s.......t.....h...............tth..................hp...............................hlp....p........................h.....p..s..o............h.h.h.s..s...P.s..hhp..hl.............................hp..t....................................................................................................................................shs...............s...l...+.h.................................................................................................................................................................................h...h.......................s.......uG..p....s...l..s...s......p......h..h......p......t......h..t..p.hhs.....................................l.h..p.s...Y.....G.................T.Essshs...............................sh.................................................tt....p.h.s.s...h..G.................ts.......h........................s...s...h......p...h...t......ll................D.t...p................h..p..h...........s..................................ut..............GE................l..h....l.p...............Gs.............s......l.h..p.........G......Y...h...........................p...p.........s...ph.....................o..t..pt...hh...s...........................................................................tt..h.a..c...T...G..D..luh..h..p..............t.........c..G..........................h...l....h....h..h.G.Rh..c..c...lhh.t................................................................................................................................................................................................................ 0 6394 13492 19232 +43 PF05195 AMP_N Aminopeptidase P, N-terminal domain Finn RD anon Manual Domain This domain is structurally very similar [1] to the creatinase N-terminal domain (Pfam:PF01321). However, little or no sequence similarity exists between the two families. 20.40 20.40 21.80 20.90 20.20 19.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.84 0.71 -4.77 154 2593 2012-10-02 11:23:57 2003-04-07 12:59:11 11 16 2060 51 934 2167 1097 135.80 28 29.60 CHANGED hstptatpRRpcLhppht................sulsllsuuspthRs.sDscYs.FRQ-Sp.........FaYL..................TG...asEP-ulhll.........ptp......t.lLFspt+-............ptElWsGhRhG.-sAhctausDpAasls-...lsphL.sphlpstp..t...laash..s.pssthc ...........................................tppatp.RR.ptLhp.pht......................suhsll.uus.ph...h..R....s..sD.s.c.Y.s.FRQ..sSs..............FaYl...................TG....h...s...E...P-.ulllL........................ststs......cslLFscs..+c.................ptElWh.Gt.Rh..u........-..t...A.pph.G.l..Dcsh..shs-.......ls..phL..phl..tths..h...lahs............t......................................................................... 0 292 563 795 +44 PF03098 An_peroxidase Animal haem peroxidase Bateman A anon Prosite Family \N 19.30 19.30 19.70 19.80 19.00 19.10 hmmbuild -o /dev/null HMM SEED 530 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.83 0.70 -6.13 123 2338 2009-01-15 18:05:59 2003-04-07 12:59:11 10 150 421 314 1401 2365 62 385.50 22 55.96 CHANGED YRoh-GpsNNh..ppP.thG..ssss.asRh.lsstY..tcuhspsts............tth.LPs...........sRt.l..Spplh...tptp.......................hssssph....ohhhh.au.QalsHDhshssps.................C.t............t..shs....l.ls.sDshhs..........sh.hhRoh.s...ss.t.....................tpQ..............................lNphTualDuS.lYGsspptsppLRs...hpt.....GpL+...........ss.s...hhshs.........p.h..................hhhsG-tRssp.suLsshpslahREHNplAcpLpp.............h......................sPc..........................................................................................WsDEp..laQcARpllsA.........................h.Q+.................................IshpEalPtlL.Gp.p...shph.hh..............................................................hps..............Ys.slssslssEFtsuAaRahHohlssthhhhtpph................................tl....lpcthassp...hhpp.........................ls....hl.hGhhs.....................................psutphc...phhssplpppl..a...phstt........h.DLsulsIpRuR.D+GlssYNpaRchhs......Lpshpoac-ltt..........hssppl.............................hppLpplY...................................sss-slDlalGulsEp.h...Gu..............hhGsoh......ssllspQFp+h+pGDRFaa....E...ss..........................s.FTtpplpplp.....ps.oLscllscs.....h.th...tp..ht..ssF ..............................................................................................................................................t..h.R...h...s...................................................hP............st..l....hh.....................................................................................................................t......s.hh.......h.au...phh.a.ph..s.....................................................................................................................................................................................................hs..hothlDhu.lYGp.......s........t..t..tl.....R.......t..........G.hht............................................................................................................................h..h..u..p...h.h.....s......hl.....h.hthlahR.HN................h...lsp..L.tt......................................p..p......................................................................................................................................................h..ss-p.....l.....a....ppu.......Rhl.hh...u..........................h..p......................................l.sh.-al.hlh.s..........ht............................................................................................h.s................hp.....thsstl..s.F.shs.a+.a......Hshl.s.h.hhstph..........................................h........p.h..h....t..........h..h.t.......................................t...........hh...th.t............................................................................t.hst......t..ltp.h........h.........t......................cl..shsltcuR.c............h.....s.ls.shsphR.t...h.h.s....................................h......hp....s..ap-h..........................p.....t..h...............................................tthtthY...........................................................................tp..pt..l-la.GhhhEt...........st.................h.G.hh..............................phhh.ph.....t..............h.h...uD...+...hhh.t.....p...........................t.h.s...th..t.l.........p...sh.plhh................................................................................................................................................................................................................................................................................. 0 473 646 1103 +45 PF01821 ANATO Anaphylotoxin-like domain Bateman A, SMART anon Prosite Domain C3a, C4a and C5a anaphylatoxins are protein fragments generated enzymatically in serum during activation of complement molecules C3, C4, and C5. They induce smooth muscle contraction. These fragments are homologous to a three-fold repeat in fibulins. 21.40 21.40 21.40 21.50 20.90 21.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.24 0.72 -3.82 65 552 2009-01-15 18:05:59 2003-04-07 12:59:11 13 71 77 20 202 520 0 35.50 37 3.14 CHANGED CChDGhptss..hspoC-cRsthl.........tpsppChpsFhpCC ...CCt-Ghptts..hspoCcpRsthl............ps.tCtpsFhpCC..... 0 14 34 79 +46 PF01094 ANF_receptor Receptor family ligand binding region Bateman A, Finn RD anon Prosite Family This family includes extracellular ligand binding domains of a wide range of receptors. This family also includes the bacterial amino acid binding proteins of known structure. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.96 0.70 -5.66 119 7015 2012-10-02 13:57:41 2003-04-07 12:59:11 23 144 390 116 4127 13698 3388 312.90 15 40.55 CHANGED thttAhphAlcclNpsss.hh...sshplshthtss.............................sstt.....ssshstphhtpptlhull..Gsssos.sstssuplssth..pl.Phloh.........................u.usssthss....tp..ashhhR..sssssptt.spuhscllpp..................asWppluhlhssssh...spth.......hpslppthpph.th.hhth.........tth.spspshp..shhptlpp.....pps+lllhtstssph......ptlhpps.......t.th..ttta.halhhshh.pt.....................................htstphtcshpuslshphhtstt.thpphhpphp...........................................................................ttt.shhshhhYDulhhhApAlpphhpppssthtst......................................................................s.tlh.phhps...........spa.......pGhsGplphsptssph.shthhhhphp ...........................................................................................................................h.huhpt.h.s...t...p.......h.................t....h..p..l..t....h.....h....p.......................................................................h............................................t.................h...h..u...lh.......G..s......s.st..................s..........................h......s....p..h...h.....t....h..h............pl....P................l.....o...h....................................................s...s....s...s.......h..ss...................t.p...as...........h.hp....hh.s..........s.....t...............t..............s....t....u......h...h....p....l.l.pt...................a.t..W.....p......h...l..s.h.l...h.....s...s.s..ph.........h..h.........................................hp.t.h.pp.....h...pp.....t........h.shth...............................h...t.....t..t...p.............p.......th.......p....l..p.p.......................t..............p.....sp...l.l..l..l..h...s..p..t.p.th.............................................ttlhpt.h....................th.......t....t....h....h..a....l.h..s.s....h.h....t...............................................................................................h.h.t............h..t....s.....h...h......s.....h.....t.......h..................p............p..................t...h.....p...p....a...h...t.p.h.p...............................................................................................................................................t..h......t.................s..h...h....a....s..AV.....h...s...h...A..h.....A.l..p.p.h...h...t...p..................................................................................................s.....p...l..h...p.hh..h.p.......................................h..p.h.................................................p.s....s.G..p.....h...p..ts...t..............h.................................................................................................................................................................................................................................................... 0 856 1347 2917 +47 PF00023 Ank ank; Ankyrin repeat Bateman A, Sonnhammer ELL anon Swissprot_feature_table Repeat Ankyrins are multifunctional adaptors that link specific proteins to the membrane-associated, spectrin- actin cytoskeleton. This repeat-domain is a 'membrane-binding' domain of up to 24 repeated units, and it mediates most of the protein's binding activities. Repeats 13-24 are especially active, with known sites of interaction for the Na/K ATPase, Cl/HCO(3) anion exchanger, voltage-gated sodium channel, clathrin heavy chain and L1 family cell adhesion molecules. The ANK repeats are found to form a contiguous spiral stack such that ion transporters like the anion exchanger associate in a large central cavity formed by the ANK repeat spiral, while clathrin and cell adhesion molecules associate with specific regions outside this cavity [2][3]. 20.60 14.70 20.60 14.70 20.50 14.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.19 0.73 -7.67 0.73 -3.89 1072 8812 2012-10-02 12:10:21 2003-04-07 12:59:11 25 2020 897 214 5230 139773 9187 32.40 29 5.11 CHANGED pGpTsLHhAspps......................phpllchLlp....tuushshpsp ..............................................................................G.TsLH.h.A..stps............................................................................phcll.chLlp.........pGAshph......................................................... 1 2564 3256 4303 +48 PF00191 Annexin annexin; Annexin Finn RD anon Prosite Family This family of annexins also includes giardin that has been shown to function as an annexin [1]. 20.90 20.90 20.90 20.90 20.20 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.96 0.72 -4.07 177 6936 2009-01-15 18:05:59 2003-04-07 12:59:11 15 27 307 347 3328 6752 29 64.80 29 64.50 CHANGED hDAp.hLcpAhcshG...Tc-psllcIlsoRoptphppIpptYpptau.+sLpcslp.s-hSGcacchLlsL .....................................-Ap..hLc.pu..h..+.G..h.G.............TD.E....ps...l...l....c..Il..s..s...R..o.p..t..p...l..p....p....Ip.p...tY.........p........p........h..a..s.....+......s..L.tc.....slc...s.......-..h....S.G.ca.cchllsl..................... 1 791 1389 2220 +49 PF03861 ANTAR ANTAR domain Yeats C anon [1] Domain ANTAR (AmiR and NasR transcription antitermination regulators) is an RNA-binding domain found in bacterial transcription antitermination regulatory proteins. The majority of the domain consists of a coiled-coil. 20.10 20.10 20.10 20.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.33 0.72 -4.39 179 2458 2009-09-15 11:05:26 2003-04-07 12:59:11 9 45 1401 6 888 2155 145 56.00 30 24.33 CHANGED phpplcptLpsRc........hI-pAKGlLMpp.ps..ls.EpcAachLRctuhppshpltclAcpll .................................htplpptLpsR+.............ll-pAKGlLMpp.ps..lo.EpcAachLpctuMcpphplt-lAptll................ 1 317 595 768 +50 PF04729 ASF1_hist_chap Anti-silence; ASF1 like histone chaperone Mifsud W anon Pfam-B_3167 (release 7.5) Family This family includes the yeast and human ASF1 protein. These proteins have histone chaperone activity [1]. ASF1 participates in both the replication-dependent and replication-independent pathways. The structure three-dimensional has been determined as a a compact immunoglobulin-like beta sandwich fold topped by three helical linkers [2]. 20.70 20.70 22.80 23.80 20.60 19.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.76 0.71 -4.58 24 449 2011-09-19 02:26:53 2003-04-07 12:59:11 8 4 313 29 308 425 2 145.60 53 66.27 CHANGED M.S...hVslpslplh.sNPusFpsPapFcIoFEslpsLp-D.....LEWKllYVGS...............................Apspca...............................................DQhLDslhVGPlst.GhppFsFcussPshppIP....p-llGVTllLlossY+spEFlRVGYYVNNpYp-pELcENPPs.....ps.l-+lpRpIlsscPRVTRFsIsW- ................................MuhVplhsVtVL.sNPu.FhsPapFE.IoFEClE.sLpcD...............LEW...KllYVGS...............................................AcSc...ca..................................................DQ.LDSlLVGPl.Ps.Gh........ppF.lFpA..............Ds.Psssh.IP...psDhlGVTVlL.lT.C.oYcGpEFlRVGYYVNNE.Y.s..s..t..EL......+.....E.s......PP.s..............KP....h.-+lpRNIL.AppPRVTRFtIsW-........................... 0 115 168 246 +52 PF02822 Antistasin Antistasin family Bateman A anon PDB Family Members of this family are inhibitors of trypsin family proteases. This domain is highly disulphide bonded. The domain is also found in some large extracellular proteins in multiple copies. 21.20 21.20 21.40 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.67 0.72 -3.63 60 460 2009-09-11 13:25:17 2003-04-07 12:59:11 9 70 93 8 298 535 14 27.10 43 9.73 CHANGED Csthp..C...phtCshGappD.p.sG.Cp...h.CpC ..................Cstlt....C..........phtC.s.aGFppD.p.pGCp...h..CcC 0 107 126 218 +53 PF00847 AP2 AP2-domain; AP2 domain Bateman A anon Pfam-B_409 (release 3.0) Domain This 60 amino acid residue domain can bind to DNA [2] and is found in transcription factor proteins. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.71 0.72 -3.81 206 5868 2012-10-03 08:51:45 2003-04-07 12:59:11 15 52 728 5 2816 5799 189 52.10 39 17.05 CHANGED sth+GV..phcp.hs+...Ws..Aplh................t...pphhlGpass..-..cAtpshptsthphpst .....................tYRGV....Rp+t....hG+...........Ws..AEI+c..s.........................p....pRlWLGT...FsT..sE....cAA+AYDtAAhph+G....................... 0 596 1740 2384 +54 PF02424 ApbE ApbE family Bateman A anon Pfam-B_1963 (release 5.4) Family This prokaryotic family of lipoproteins are related to ApbE from Salmonella typhimurium. ApbE is involved in thiamine synthesis [1]. More specifically is may be involved in the conversion of aminoimidazole ribotide (AIR) to 4-amino-5-hydroxymethyl-2-methyl pyrimidine (HMP). 20.00 20.00 20.20 20.10 19.00 19.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.54 0.70 -5.10 245 4122 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 2989 11 985 3311 1196 240.90 28 74.58 CHANGED lpclsp.hSs..a..c..ss...S-lsclNpsss.hp..h..tlss-hhp..llppuhclsphosG...sFDsTl.GPLh.sLW.......uFssp............thPs...tppls................pthth..sshpp.hh............ttphhhppsshtlDLuuIAKGausDplsph.L..ppt..........Glp.shlVphG.G-ltsh..Gpp....pup.........s.......Wplul..p..s..P..............................ppt..........phht..hl.....plps.t...uluTS..G.sYcpaa.....s..Gp+apHllDP...pTGhPhp...pphsSVTVl..usss....ht.....ADuhuTulh..lh.G.....cp.uhphscp....ps.........ls.s......hhl ..............................................................................tthcphhSsa.p.....ts.....Spl...clNp.....ts......u....h......p.....sh........tl.s.t.phhp.llp.huhchuttosG....thDlol.GPLl.phW.........thG.pt................sphPs.......tp.pl..p............................................thh....t..h.....s.s.hpplhh.....................tppp..l.hhpp....ss.htlDLuulAKGYusDplsp.....h.....L..cpp.......................................Glp...shllsl.G.....G.s.lhsh......Gtp........sp.sp.........................s...............WpluI...p..c...P..p............................................tps.shhs.....hl....plss...t................ulsTS...G.sYc.pah..p....hs...............Gpc..apHllDP..........pTGh.....Plp...pp.....lsS.V.T.Vl.....u.s..s.u.............h.p............A.DuhuTulh.....sh..G.........pp....uhphhpp......ts....lt.shhl............................................... 2 336 675 848 +55 PF04049 APC8 Anaphase promoting complex subunit 8 / Cdc23 Wood V, Finn RD anon Pfam-B_13808 (release 7.3); Family The anaphase-promoting complex is composed of eight protein subunits, including BimE (APC1), CDC27 (APC3), CDC16 (APC6), and CDC23 (APC8). 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.99 0.71 -4.46 26 298 2009-09-11 06:08:17 2003-04-07 12:59:11 8 107 258 0 221 308 6 146.10 29 24.42 CHANGED Mssp.......slpcl+ppLhtushpho-+sLhQSAKWu........AEhL.uLs.........................................ps.hsspsstpsttp...........spEh.-tYLLAKSYFDs+EYDRsAahLcssp...............................................SpcuhFLtLYupYluGEK+cpE-sptslssts.........sstNcchs ................................................h...........htpl+hpLhtuhhphscRsLhpuuKWu.....................AEh.L.uLs..................................................................................s....h.ts.ss.ptt.t...............................p-h.stYh..LAKoYFDs+EYDRsAahlps..sp....................................................op+uh..F.LthYupYLuGEK+..+p-.-p.t.hss.p................................................................................................................. 0 76 124 184 +56 PF04106 APG5 Autophagy protein Apg5 Wood V, Finn RD anon Pfam-B_12134 (release 7.3); Family Apg5 is directly required for the import of aminopeptidase I via the cytoplasm-to-vacuole targeting pathway [1]. 20.30 20.30 21.40 21.20 20.20 19.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.15 0.71 -4.83 34 341 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 279 5 222 339 2 200.20 30 65.17 CHANGED WahPlGlLaDLhsssssh........................................................PWplplHass....aPsptLl.hssc..........cslcstahsslKEAsal+sGs.......scsIhshscpDpppLWpulhs..cshcpFhpIspKLh............................s.psth+pIPlRlYlsss..................hthlQphlpsh.................tpsp.h.TLt-hLpphLPshh.................................................stp..........ptphspsllpGIclsh..csPlp.lscphsasDsFL....aIslhhh ........................................WahPlGlLaDlhs.sss.h..................................................................................................................................................................................................................PWplsl.Haps....aPpp.Ll..hssp.......................cslcstFhsslKE.......ADhl+pts........psl.s.hp+cDp.ppLWtul.s..c........cac.pFh..tlsp+Lh.....................................................................................s.ttssh+pI.....PlRlYhsss..................................................................h.hlQphh..t..sh...........................................tpsp.p.TLschLpplh.Pshh...............................................................................................................................tp..................t.thspshlpG..l.pssh....csPlt.lscphu.asDsFLalslh............................................. 1 76 119 183 +57 PF04602 Arabinose_trans arab_transf; Mycobacterial cell wall arabinan synthesis protein Waterfield DI, Finn RD anon Pfam-B_4670 (release 7.5) Family Arabinosyltransferase is involved in arabinogalactan (AG) biosynthesis pathway in mycobacteria. AG is a component of the macromolecular assembly of the mycolyl-AG-peptidoglycan complex of the cell wall. This enzyme has important clinical applications as it is believed to be the target of the antimycobacterial drug Ethambutol [1]. 25.00 25.00 28.00 27.90 17.90 22.20 hmmbuild -o /dev/null HMM SEED 657 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.09 0.70 -6.34 7 497 2012-10-03 03:08:05 2003-04-07 12:59:11 7 7 176 8 90 457 0 565.40 47 61.09 CHANGED cussps.RIARhVAslAGlhGhlLslssPLLPVpQTTATlpWPQ....sGplsplTAPLluhsPpsLslTlPCpAsAsLPsss....GlVlSTsPttGh-AscsGLhlcsspssVsVshRssVsssAPRstVsu....ssCphlchhAsssussA-FVGlst.....suGp.......ss-hRPQlsGVFTDLpsPAssGLuhpssIDTRF.TuPTsLKhhAMlLGlssslhAllALthLDphsttth+h...hlPhtW.................hssthsDssVlusLllWHllGAsoSDDGY.LThARVup+AGYhANYYRaFGosEuPFsWYaslLAhhApVSTAulWMRLPshlsuluCWLlISRpVLPRLGsu...lsssRsAhaTAGuhFLAAWLPFNNGLRPEPlIALGlLlTWsLVERAluhpRLhPsAlAhllAhhTlsluPpGLIAlAALLsGuRsltplltRR+ptsGhhs.lAPLhAAsoVlhhllF+-QTlATVlEus+lKhsVGPolsWapEaLRYYaLhlto.sDGSluRRFAVLshlLCLFsslhhhLRRu+lPGsApGPsWRLIGhThhohLhLhFTPTKWshpFGsFAGLuGulGALsAhAsophuL+oRRNhslalsAlLFVLAhuhAulNGWaYVSNaGVPW.st.PhlstaslTohFLsLollsuLlAuWhHFph .............................t.........h..+hhAhluG...llGh..lLslhsPlLPVpQssAplsWPQ...............sG...p.h.s.....sls..APLluhsPhshsholPC.ssht.....s...Lss..st................slllu.ThP.tt..u...c.A...st...p...uLhlps........s........t......s......t......lsls.Rssslh.s.sshppl...u....ssC.ptlpltustst....stA......phsGl.t.............sGs.....................................s........shRPQlsGlFTDL.......p.......s.....s.......A......s....s.....G..........L...shpsslDoRasooPTsLKhhshll.ulhh..slsuLlALhhLDp.h.-Gt..pth.c..................hhP.spW...........................................................hp....phhDssVlusLlhWHhlGANoSDDGYlLsMARVupcAGYhuNYaRWFGsPEuPF.GW.YYslLAhhspVSs.Aol.W.MRLPsLhsulssWhllSRcVlPRLG.u....ltss+..sAh..WsAAhVFLuhWlPaNNGLRPEPlIALGsLl.TWs.lERulutp.RLhP.uAlAslhAuhTLssuPoGLhAlusLls.uhpslh.pllh.+.............R.............p..................t......h.........................G.............h.h....s....h...lA......PhhAAuollhhslFtDQTLuslhEus.+l+ttlGP...ohsWapEhlRY.hLh.h.o.sDGSluRRFulLhhhlsL.hssl......hhhLR......+..t.RlP..GsAtGPs.hRl....lGhh....hhohhhhhFTPTKWsHHFGlaAGluuul..uAls.slshuth.sh+.sp...RNRshah..u.u..l......l.F.l..hAhuhuuhNGWWYVusaGVPW.st.Ptltuhsh...oohhL...s....Ls.llshhhAsW.Hhh.......................................... 0 15 61 81 +58 PF03079 ARD ARD/ARD' family Mifsud W anon Pfam-B_2276 (release 6.4) Family The two acireductone dioxygenase enzymes (ARD and ARD', previously known as E-2 and E-2') from Klebsiella pneumoniae share the same amino acid sequence Swiss:Q9ZFE7, but bind different metal ions: ARD binds Ni2+, ARD' binds Fe2+. ARD and ARD' can be experimentally interconverted by removal of the bound metal ion and reconstitution with the appropriate metal ion. The two enzymes share the same substrate, 1,2-dihydroxy-3-keto-5-(methylthio)pentene, but yield different products. ARD' yields the alpha-keto precursor of methionine (and formate), thus forming part of the ubiquitous methionine salvage pathway that converts 5'-methylthioadenosine (MTA) to methionine. This pathway is responsible for the tight control of the concentration of MTA, which is a powerful inhibitor of polyamine biosynthesis and transmethylation reactions [1,2]. ARD yields methylthiopropanoate, carbon monoxide and formate, and thus prevents the conversion of MTA to methionine. The role of the ARD catalysed reaction is unclear: methylthiopropanoate is cytotoxic, and carbon monoxide can activate guanylyl cyclase, leading to increased intracellular cGMP levels [1,2]. This family also contains other members, whose functions are not well characterised. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.82 0.71 -4.28 11 964 2012-10-10 13:59:34 2003-04-07 12:59:11 9 12 776 3 436 1144 153 149.50 30 82.56 CHANGED phahh--psp.Dp+lPccpsPtchhphpcLsclu.hhha+lsscsppsscchpplhch+shshhsh..........spssscphPpa--KlcpFaEEHlHsD-ElRahlcGsGYFDV+..sps-sWIRlhlcpGDLIllPsGIhHRFTsssssalKAhRLFsssstWss .................................................................................................................p.t.t...p.hs..t......t...........tlt..h..h......ch.s..t.s..................s.s.p.p..h..t.hltthp.sh..t.h..................l.hshps.s..t..P.s....h....-....p.h....lppF.hpE....H.hH.s.D.-.E.lR.ahl.sGpG..h..F.s.l+.....sp.s.....ct....ahclhscp.GDLIslPAGhhHh.F..s.h.s.s........s.s.h..lp.A............lRlFssp.sWss.............................. 0 130 246 354 +59 PF01412 ArfGap Putative GTPase activating protein for Arf Ponting CP, Schultz J, Bork P anon SMART Domain Putative zinc fingers with GTPase activating proteins (GAPs) towards the small GTPase, Arf. The GAP of ARD1 stimulates GTPase hydrolysis for ARD1 but not ARFs. 27.50 27.50 27.50 27.90 27.00 26.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.97 0.71 -4.31 141 3394 2009-01-15 18:05:59 2003-04-07 12:59:11 13 165 341 30 2020 3186 33 114.20 32 18.59 CHANGED pphlppltp...h.ssN.phCsDC...s..stsP.pWsSls.hG.....lalClcCSGl...HRs.LG..s.......HlS+V+SlsLD..p..Wsscplphhpth.GNp.pssphacsp.....................sst..........ssppphcpaI.ppKYtp+t.as..pst .....................................p..hhpplhp..h.ssN...phCsDC....s............u.s....P...pWASls.hG............lhlClcCS..Gl.H.R..s.LG.s.............................H...lS.+V.......+....S..ls.LD........s.....W.ss.p.......p..l......p......h..........h.pph...GN.s.t.ss.p..haEsp................................t...s.ss.......................ssppphcp.aIpsKYpp+tah..t.................................................................................... 0 652 1022 1510 +60 PF01388 ARID ARID/BRIGHT DNA binding domain Bateman A anon [2] Domain This domain is know as ARID for AT-Rich Interaction Domain [1], and also known as the BRIGHT domain [3]. 21.00 21.00 21.00 21.30 20.80 20.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.86 0.72 -4.13 53 1730 2009-01-15 18:05:59 2003-04-07 12:59:11 16 96 289 17 1077 1669 7 89.90 29 8.00 CHANGED pspspppppFlcplhpFhcppGss..........h.tchPt................lst+.lDLapLaphVpchGGhc......pVscp+p........WsclupcL..shss.....ttssu...........ppL+stYt+hLhsYE ..................................................t.....tctpFlppLhpFhc.p.p...G...s.s................................h..p+.h.Ph....................................l..s.t+.LDLapLaphV.p.ch.G.G.hp....................p.V.sp..p.+t.............................W.p.clsppL..sh..s.s........s.pssu...................s.sL+ptYp+h.L.hsaE........................ 1 277 474 782 +61 PF04683 Proteasom_Rpn13 ARM_1; Proteasome complex subunit Rpn13 ubiquitin receptor Waterfield DI, Finn RD, Coggill P, Wood V anon Pfam-B_4497 (release 7.5) Family This family was thought originally to be involved in cell-adhesion [1,2], but the members are now known to be proteasome subunit Rpn13, a novel ubiquitin receptor. The 26S proteasome is a huge macromolecular protein-degradation machine consisting of a proteolytically active 20S core, in the form of four disc-like proteins, and one or two 19S regulatory particles. The regulatory particle(s) sit on the top and or bottom of the core, de-ubiquitinate the substrate peptides, unfold them and guide them into the narrow channel through the centre of the core. Rpn13 and its homologues dock onto the regulatory particle through the N-terminal region which binds Rpn2. The C-terminal part of the domain binds de-ubiquitinating enzyme Uch37/UCHL5 and enhances its isopeptidase activity. Rpn13 binds ubiquitin via a conserved amino-terminal region called the pleckstrin-like receptor for ubiquitin, termed Pru, domain [4]. The domain forms two contiguous anti-parallel beta-sheets with a configuration similar to the pleckstrin-homology domain (PHD) fold [5]. Rpn13's ability to bind ubiquitin and the proteasome subunit Rpn2/S1 simultaneously supports evidence of its role as a ubiquitin receptor. Finally, when complexed to di-ubiquitin, via the Pru, and Uch37 via the C-terminal part, it frees up the distal ubiquitin for de-ubiquitination by the Uch37 [5]. 25.00 25.00 25.70 25.50 22.50 24.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.14 0.72 -3.90 78 393 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 279 4 246 369 1 86.10 40 26.93 CHANGED G+hphc....s......phVpPcscKG.hlhlhpsc.-....sl............h+FpWpsRss.......spsEp.......-lll..hPs-spFpclsps............psGRVasL.+Fpsusp+.hFFWhQ-t ..............G+hplc....s......phVpP-t+KG.hlaltp......s-.D.......sL.................lHFsW+sRss.........spsEc................DLIl..hPsDspFp+V..sps............soGRValL.KF.pu.....uspR...hFFWhQ-................... 0 78 132 204 +62 PF00514 Arm Armadillo_seg; Armadillo/beta-catenin-like repeat Bateman A, Ponting C, Schultz J, Bork P anon SMART Repeat Approx. 40 amino acid repeat. Tandem repeats form super-helix of helices that is proposed to mediate interaction of beta-catenin with its ligands. CAUTION: This family does not contain all known armadillo repeats. 20.60 20.30 20.60 20.30 20.50 20.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.45 0.73 -7.89 0.73 -3.96 242 12724 2012-10-11 20:00:57 2003-04-07 12:59:11 18 321 451 497 7518 13312 326 40.80 24 18.23 CHANGED ssp.ptphll..psGslshLlpLLp......sps.plpppAshuLsNlss ..................tptphll..psGs.lshLlpLLp...............sss.pl.pp.p.AshALsNls....................... 0 2088 3612 5404 +63 PF00339 Arrestin_N arrestin; Arrestin (or S-antigen), N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Ig-like beta-sandwich fold. Scop reports duplication with C-terminal domain. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.74 0.71 -4.39 42 2044 2012-10-02 22:29:00 2003-04-07 12:59:11 24 33 301 26 1377 2143 4 138.80 19 29.63 CHANGED hslhls....p.chlaasG..-sl......sGpVll.sspp.hcs.RtlhlplpGts+sshpc.cshshphpc.............t...p....shhchpphLhpp..............h.hGsasaPFpaplP...ss.PsShpuphG........tlcYpl+shl....-ts.hchspp...pppshhVlphlshs ..........................................................................................................................h.ht....p..t......h..ss.......p.l............sGhlh..l....p......t.....h......ph...p..t.............l.hlphput..hcht....h...p...p.....ps..h...s..p...tt.......................................................................p.t....th.h.p.hp.t.t....l.h.tp..........................................h.........G..p.a..p.....a.P.F.ph.p......l.....P.......t.s..h....P.....s..S..h..p....h.p....G......................................tl.pYp.l.cuhl...............cps.....t..h...p......hpp.hhllp.h........................................................................... 0 452 648 1092 +64 PF02752 Arrestin_C arrestin_C; Arrestin (or S-antigen), C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Ig-like beta-sandwich fold. Scop reports duplication with N-terminal domain. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.69 0.71 -4.06 173 1981 2012-10-02 22:29:00 2003-04-07 12:59:11 17 29 291 26 1311 1936 0 151.20 19 30.82 CHANGED tsstlphplslsppuas.Gpslslplpls.......N.pos.hs.........lpplphpLhpphpahspt.......................................................................ppp.ptpphlsp..pttssl....................................tstppsphp..hpl...........plP...........................................................slss...os.........................................................psphIplpYplclplphs........................t..plphplPlhl..hps...sh ..............................................................................................................................................t.ps.lplphsls+..ps.as.....G.-s...lsl.p.h.pls.......N.po..s.hs.....................lp...plcsp...lh.pph...shhsp...............................................................................................................................................................................................ppp.phpphlsp..tpstsh............................................................ssss..shp.......hpl......lP..........................................................................................................................slss.......ohh.........................................................psphIpl.pYp.lc....lplpls.........................................t...httcltl..clPlhlhp...................................................................................................... 0 417 606 1041 +65 PF04959 ARS2 Arsenite-resistance protein 2 Moxon SJ anon Pfam-B_5269 (release 7.6) Family Arsenite is a carcinogenic compound which can act as a co-mutagen by inhibiting DNA repair. Arsenite-resistance protein 2 is thought to play a role in arsenite resistance [1]. 21.10 21.10 21.20 21.70 20.90 21.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.52 0.70 -4.15 8 282 2009-09-11 08:45:49 2003-04-07 12:59:11 8 12 212 1 189 287 1 200.40 25 26.08 CHANGED MPNRCGlIHVRGPhPsN.RITpsE..............................VsEapKoaEEKLuPLLuh+-sLSE-EApKMG+KDPEpEVEKFVoANTQELuKDKWLCPLSGKKFKGPEFVRKHIhNKHu-KlEEV+KEV-FFNNYLtDAKRPuLPE.KPh.sPGsst..ssshuP.uhs.YsPQ..sPQuhhPaGtP...RPPhhGa........s....GGPsFPPs.aG.......................uGRGNYDsFRGQssthuhP+s.R.hcGs .........................................................................................................................................................................h......p..a...+s..h-pKl..h.Lp....p...p.................-....h..pK..hGt.K...s.....s...-....p....tl.Echlp...t.s..pc..scs..K.....ahC..Ph..s.......u..KhFK..us-Fl+KHlhpKHs....Ehl....c....c..l...+...p....E...l...t...aaNN...Yl.....h...D...Pp+..P.s.h...sc.........t...t.....s....s.s..............................t.s......s.......a....t.....................h.................................................................................................................................................................................................................................................................. 0 65 107 155 +66 PF01037 AsnC_trans_reg ASNC_trans_reg; AsnC family Finn RD, Bateman A anon Pfam-B_773 (release 3.0) Family The AsnC family is a family of similar bacterial transcription regulatory proteins. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.17 0.72 -4.27 263 13775 2012-10-02 00:20:33 2003-04-07 12:59:11 16 36 2996 126 3987 9832 2190 73.20 21 46.93 CHANGED ltlplp.pp.........hcphtctltp......hPEVhpsatloG.shDall+lhs.pshpshpphltp...ltplssltps.poplllpp .......................................................lplp.pp.........hpphtptlpp...........hs....E..V..h.psahl.oG..chDall+lhs.p......sh.pphp..chltch....l...t.p...lsu...l.pps.pohllhp............................ 1 944 2215 3156 +67 PF05118 Asp_Arg_Hydrox Aspartyl/Asparaginyl beta-hydroxylase Finn RD anon Pfam-B_2775 (release 7.7) Family Iron (II)/2-oxoglutarate (2-OG)-dependent oxygenases catalyse oxidative reactions in a range of metabolic processes. Proline 3-hydroxylase hydroxylates proline at position 3, the first of a 2-OG oxygenase catalysing oxidation of a free alpha-amino acid. The structure of proline 3-hydroxylase contains the conserved motifs present in other 2-OG oxygenases including a jelly roll strand core and residues binding iron and 2-oxoglutarate, consistent with divergent evolution within the extended family. This family represent the arginine, asparagine and proline hydroxylases. The aspartyl/asparaginyl beta-hydroxylase (EC:1.14.11.16) specifically hydroxylates one aspartic or asparagine residue in certain epidermal growth factor-like domains of a number of proteins [1]. 20.10 20.10 20.10 20.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.92 0.71 -4.41 11 1101 2012-10-10 13:59:34 2003-04-07 12:59:11 10 40 713 5 388 906 1501 146.90 31 44.69 CHANGED pLppNWptIR-EuLtLts...httuttas-tuasshhKssWcpahLhhhsstpssAtphs.PpTssLLcplPpspu.................shFupL.PGuclhPHpDPasus.....lRhHLGLsTP.s-tChIcV-spcpsW+-GEsllFD-oa.H.utNpo-psRllLhhDl.+P. ............................................................................h..ptpW..IRpEh..tl................t.........t...........s.........s..........s........h......ct...sWcp..F..hL..h........hh...s.t............t.h....ss.....s.t.............p..hC....P.....pTs.pl..l.p..p..l.Pshpu............................AhFuhL..sPGscls.Hc..s...P....hsup.........lRhH.L..GL.h.......sP.....p.............s......t............C....hl.....p..V.....s...s.....p.....p.....h....s....W.c.....-.....Gcs.llFD.-o.a.H.shN..c.....s...s.....p......s...RllLh.sDl.+P............... 0 129 195 286 +68 PF01177 Asp_Glu_race Asp/Glu/Hydantoin racemase Finn RD, Bateman A anon Prosite Family This family contains aspartate racemase, maleate isomerases EC:5.2.1.1 [1], glutamate racemase, hydantoin racemase and arylmalonate decarboxylase EC:4.1.1.76 [2]. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.87 0.70 -4.43 177 8372 2012-10-03 04:39:14 2003-04-07 12:59:11 17 13 4320 108 2004 6434 2979 210.50 21 83.24 CHANGED lGhl..sstushshh...ppl.........tphhsst.......................hlhhtssp.............hhptt.............................h..hhh..............pssptl.t.......tssc................................sllluCsos.shhtshp.....tt....thP...l.ls.hscss............ht........th.t......thp+...lullus..hsshtsthhpphlpp..G.....l........p..t.......th........shhp...hup.hsst...htthhpthp............ppss....-sllLu.CTphshh.phhpth..t..........h.......llDsspshsp....ts..h ...................................................................lGhh..Sshuslohh....cpl....................hc.hlsppp...................hl.hhtsss...........thP..........ppstpp......................................lhph.hh....................p.h.sp.tL.p........tssc.............................................hlllAC..N..Tu..p..s......s.s.h.thh........................pp.h..slP...............l..lu..lhpsu............................hc..................shtp......stsp+..........lull...uT......psTh...p.s..s...h...a...p...pt..lpph..s......l........................p..t..................th..hh............pls-.........tup..hpsp........shphltphlp................h.........ppss.............-sllLG.CT.chsll.hs....l..pphh.t............l.......llDostthsct..h..................................................................... 0 548 1190 1650 +69 PF01400 Astacin Astacin (Peptidase family M12A) Bateman A anon Swissprot Domain The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. Members of this family contain two conserved disulphide bridges, these are joined 1-4 and 2-3. Members of this family have an amino terminal propeptide which is cleaved to give the active protease domain. All other linked domains are found to the carboxyl terminus of this domain. This family includes: Astacin Swiss:P07584, a digestive enzyme from Crayfish. Meprin, Swiss:Q16819, a multiple domain membrane component that is constructed from a homologous alpha and beta chain. Proteins involved in morphogenesis such as Swiss:P13497, and Tolloid from drosophila Swiss:P25723. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.34 0.71 -4.91 26 2029 2012-10-03 04:41:15 2003-04-07 12:59:11 19 142 328 13 1290 1935 24 172.70 33 37.99 CHANGED ppWsps......IPYhlssshsspp+shlppAhpcacppTCl+Fh.ps.......scsshlhhhpssGCh.ShVG+p..sGt.QslSlG.pGC..phGllsHElhHAlGFaHEpoRsDRDcalsI.apNI.sut.tsFpKhs.spssshGhsYDYsSlMHYsstuFSpss.hsTlhs+sst...hpssIGQRhphShhDlpplNphYpCss .................................................................................................h..W.........lPYh.l....s.....s..p..........h........s..t...t...........p..p......t......h.l..hpAhppa...p.p......p......T......C.lc..Fh.ps.........................pppt....h..l.........h......h..............p......s.......s....G...C..h....S...h.l....G..............+............p.......s...........G............t........Q...t..l.....S......l............s.......t............s........C.................p........h.......G.............h.l....hHEl...hHulGFaHEpoR......s.......D......R......D.........p...a....Vp.I.......h...p........N...I.....................s.......s..h........p.......h................N...............F.......t.....c.......h..............p.....t....p.s........s...............s................h.........s.h.........s..........YD..asSl.....MH..Yu.t......h........s.............F.....o.......t.....s.......s.........................s........T.......Ihsh.s...........................ts.l.G...Q..R.....t.hSt..hDlh.......pls..thY.pC..t................................................................................... 0 532 658 1119 +70 PF02178 AT_hook AT hook motif SMART anon Alignment kindly provided by SMART Motif At hooks are DNA binding motifs with a preference for A/T rich regions. 14.50 0.10 14.50 4.00 14.40 -999999.99 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.40 0.75 -6.23 0.75 -3.59 40 2152 2009-11-26 15:21:48 2003-04-07 12:59:11 14 167 542 4 1233 2051 224 12.60 52 7.35 CHANGED p+tRGRPpKstsp ...tRtRGRP+Kst..t.... 6 380 638 979 +71 PF03029 ATP_bind_1 ATP-bind; Conserved hypothetical ATP binding protein Griffiths-Jones SR anon Pfam-B_1301 (release 6.4) & Pfam-B_2154 (Release 8.0) Family Members of this family are found in a range of archaea and eukaryotes and have hypothesised ATP binding activity. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.56 0.70 -4.83 30 1967 2012-10-05 12:31:07 2003-04-07 12:59:11 12 16 660 9 1105 2459 404 206.90 26 79.02 CHANGED VsGssGuGKTTassulschh.hpth...hlNLDPus..h.a.s.hslc-hlohsclhp-..hslGPNGulhhuh-...........................ahphsl...h...tccl...........................tt..s...hhLFDTPGQlEhathhsshs.hhp.h....phshssVhLlDopphsc...sssahushh.s..hhh.hplPalsslsKhDlhs.........h.hh.t........................hppLspslsphl-phshs.phhssuspsppuhpsLlshl-pthpt ............................................................lhGshGsG..........K..........TTas......t.slpp.hh..................h....p.......................................................................................................................................................................h....c..-......h.ls.h.t.slhp-..................hslu.s.p..u...u.h.h...hu.....h.-......................................................ahph...sl................pppl.....................................................................hh..LFss.....P...G...Q..h..............c......hah.h.h.s..s..hs..............................phsl.s..slh.L....l...D.o....p..p.h.s.s.........ss.cahss..h...............h...h..............hc.l.P.al.ss.lsKh...Dhhp...........................................................................................................................pthspsltthlp.h.....s..s.....hhs..s...t...p.p.t.pu.hptlhthlcps...h...................................................................................... 0 386 688 948 +72 PF00306 ATP-synt_ab_C ATP synthase alpha/beta chain, C terminal domain Finn RD, Griffiths-Jones SR anon Pfam-B_15 (release 1.0) Domain \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.85 0.71 -3.60 150 20853 2009-09-12 08:32:44 2003-04-07 12:59:11 22 34 11148 406 3509 16060 5135 103.40 33 21.66 CHANGED tplusplpth....LtpacELpshsp....h..hG.-tL.u-pD.+h...h...Lp..+.uc+lcc.hLpQ....stassh...........s.................................lpc...plphh.....h.tllph..hpphs.pth.ttl........t............hpphcphtp ......................................csAppl+psLppY+ELp-lls........l..lGh......D-Lo-pD.+hp...lsRA++lpc.aLpQ...shassp.....s..t.phhs........................................................................lt-...slpsh.....h.tlhp..G...hD..pl....P.ppth..hhh.t..sh..s.......................tt......................................................................................................... 0 1226 2225 2960 +73 PF02874 ATP-synt_ab_N ATP synthase alpha/beta family, beta-barrel domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes the ATP synthase alpha and beta subunits the ATP synthase associated with flagella. 20.90 19.00 20.90 19.00 20.80 18.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.90 0.72 -3.76 196 22044 2012-10-02 13:55:04 2003-04-07 12:59:11 18 33 11118 393 3707 16715 4934 65.20 30 14.01 CHANGED lsplhGsVlslthst.s.hsslhsslplp..c...........tp.hh.h.uhshpLtsspVtslshssTsGlpc.GspVhsTG ..................................................p.hGsVlsV.......t.......s...........s......p.....h......sl..s..shtschhc.................................s..ss.h.tlt.pLtssp..Vtslshu.s..o..........cGlpc.G.pVhcTG.................. 0 1311 2362 3125 +74 PF04718 ATP-synt_G ATPsynth_g; Mitochondrial ATP synthase g subunit Waterfield DI, Finn RD anon Pfam-B_5977 (release 7.5) Family The Fo sector of the ATP synthase is a membrane bound complex which mediates proton transport. It is composed of nine different polypeptide subunits (a, b, c, d, e, f, g F6, A6L). The function of subunit g is currently unknown. The conserved region covers all but the very N-terminus of the member sequences. No prokaryotic members have been identified thus far [1]. 25.00 25.00 26.40 26.20 24.20 24.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.28 0.72 -3.37 37 369 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 264 0 237 356 3 99.30 30 72.48 CHANGED sscssphsst.slhau+stlphh+phh+sE.LsPP.s.uchpphhpshhphhp............hpsss......hh..hp..pls..hc-ushsulhusEllsaFhlGElIGR.RpllGYps .................................................................h.thssthsst.slhau+...stlp.hhhhhh+sE..lsPP.osuch...phhpslhphhp.................hpsss............h+...pls.............sc-shhsul..husEllsWFhlGElIGR.tpllGYp................... 0 67 119 188 +75 PF03768 Attacin_N Attacin, N-terminal region Finn RD anon Pfam-B_2791 (release 7.0) Family This family includes attacin and sarcotoxin, but not diptericin (which share similarity to the C-terminal region of attacin). All members of this family are insect antibacterial proteins which are induced by the fat body and subsequently released into secreted into the hemolymph where they act synergistically to kill the invading microorganism [1]. 23.90 23.90 25.30 35.40 23.70 23.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.85 0.72 -4.10 17 105 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 32 0 30 120 0 66.10 44 30.45 CHANGED htGolTsNscGuussss+lshs.sscshhsAlGuV.hsssp+....hussTtGlshs.NssGHGhSLo+o+ .......huGSlouNPsGGusAplclshs.lGssptsslGpVFAuuNop......uGPVTsGssluhNssGHGhSLT+TH.. 0 7 9 20 +76 PF03797 Autotransporter Autotransporter beta-domain Bateman A anon [1] Domain Secretion of protein products occurs by a number of different pathways in bacteria. One of these pathways known as the type V pathway was first described for the IgA1 protease [2]. The protein component that mediates secretion through the outer membrane is contained within the secreted protein itself, hence the proteins secreted in this way are called autotransporters. This family corresponds to the presumed integral membrane beta-barrel domain that transports the protein. This domain is found at the C terminus of the proteins it occurs in. The N terminus contains the variable passenger domain that is translocated across the membrane. Once the passenger domain is exported it is cleaved auto-catalytically in some proteins, in others a different protease is used and in some cases no cleavage occurs [1]. 22.00 20.80 22.00 20.80 21.90 20.70 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.69 0.70 -5.04 212 8209 2012-10-03 17:14:36 2003-04-07 12:59:11 14 136 1526 14 904 6497 851 238.90 16 26.88 CHANGED sstpsshWspshushtp....pspsstsuhp......tphsGhtlGs-tth.......s.sshhlGhshuhspuphp....................s.stsupspspshtsuhYsph............................psshhlsuths..auhhcp.......thts..............tptpupapspshssplpsuaphth..........pshtlpPhsplpahtsptss..apEps.....t.hshpht.pshpslpsplGlchphpht..............................thpsh.hphshtap.hsstppsstss.....t......hsht..hscsshthpsGsphpls.pphsl.hhshs.....tptspss ..............................................................................................s...t.shWhphhusp.p........ts......s..s........thp...................tphtshpl..GhDhhh.................s.sph..hh..Gh.h.huhspsphp..............................ssttup.s.ps...pu...ht..huhYush...............................tsshalcsh..hp..hshh.pt..........t.ts...........................t.psph.p.u.pshsss..lcs..G.h.p.ath..............pshhlpP....s.p.l.pa....t..hpt.ss..ap-ss.......................s..hp.h.p..s.pshpshts....plG...hch.shp.hpht......................phpsh..hphshhpp...hssstp...ht...hss..............................shttt.hst.s..th..phthGh.ph..p.hs....pphsh..hhshs........h......ttt................................................................................................. 0 205 439 657 +77 PF03547 Mem_trans Auxin_eff; Membrane transport protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs & Pfam-B_5261 (Release 7.5) Family This family includes auxin efflux carrier proteins and other transporter proteins from all domains of life. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -11.80 0.70 -6.17 25 9093 2012-10-02 17:06:44 2003-04-07 12:59:11 13 27 3153 0 2760 7555 2542 180.00 10 85.92 CHANGED sshpsll.lhllhllG.Yhus+h.plhss-pspslNchVhhhulPsLhFpplupssshphhhphhhhslhhhlhshlshhlsall.....s+.hh+hshchptslhshushsNssslGlPlLtulaGs...................tuhuhhlhhsslsslhhaolhh..................hlhp.putph.h.tt.s........................................................................................................................................................................................................................................................................................................................................................hhh.h..hhhh+hlhsPsshusllullhuhl.sahhsh.h..................phltpolslLusuulPhshhsLGlhLuhp.hhhshssthsth.........hhlRhllsPhlhlshshhhu..l.s..hhpshllp............sAhPsuhsshlhAppaslp.chsSsslhhuhllulhshslah ............................................................................................................................................................................................................................................................................................................................................................................h.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................................................. 0 820 1729 2289 +78 PF02310 B12-binding B12 binding domain Bateman A, Mian N anon Pfam-B_359 (release 5.2) Domain This domain binds to B12 (adenosylcobamide)[1-3], it is found in several enzymes, such as glutamate mutase Swiss:Q05488, methionine synthase Swiss:Q99707 and methylmalonyl-CoA mutase Swiss:P22033. It contains a conserved DxHxxGx(41)SxVx(26)GG motif, which is important for B12 binding [2]. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.47 0.71 -4.24 133 8147 2009-01-15 18:05:59 2003-04-07 12:59:11 14 80 2972 71 3031 7333 2597 115.00 22 16.83 CHANGED hpslhssstschaslGht.hlushlcp.t.Ga.........-Vh..hl.......shps...ps................................cpllp.tltph.ps-l..l.ul.....Ss..........hssshsth..tclhct......hcphtsplhlllGGshsshs.ph...t........hu.sshhhGpG.csshtslh ...................................................................................................s.phlhuphttDsHD.l.G....tp.ll....ush....l....ps....t...Ga............-.Vh......l.......s...h.hh.....ss.....................................................................................-cllc...tA.h.cp...ps-..l......l.Gl..........Sul.....................hss...s...h...s.t...h....hp.l.hct...............hcp..t....t....s....l.h..ll.lG.G.s.h..s...s....tsh.....t...................t.s...................h....................................................... 0 1180 2176 2679 +79 PF02607 B12-binding_2 B12 binding domain Bateman A, Eberhardt R anon Bateman A Domain This B12 binding domain is found in methionine synthase EC:2.1.1.13 Swiss:Q99707, and other shorter proteins that bind to B12. This domain is always found to the N-terminus of Pfam:PF02310. The structure of this domain is known [1], it is a 4 helix bundle. Many of the conserved residues in this domain are involved in B12 binding, such as those in the MXXVG motif. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.39 0.72 -3.95 255 3838 2009-01-15 18:05:59 2003-04-07 12:59:11 12 34 2594 16 1269 3424 1047 78.30 34 9.66 CHANGED ttphhpplhpullph-ppthpphlpp...........sl.p..........hsshpllpchlhsshpclGchap.psphhlsp.hhuuphhcpslshlhs ...........................................ltc+LppullcG.pchlppssc-..........................Ah.pp.s.........................hpPlclIpssLhsGMsh.VG-LFs..pGchFLPp..VlpSAcs.MKpAVuhLpP............................. 0 482 904 1109 +80 PF02362 B3 B3 DNA binding domain Bashton M, Bateman A anon Pfam-B_582 (release 5.2) Family This is a family of plant transcription factors with various roles in development, the aligned region corresponds the B3 DNA binding domain as described in [1] this domain is found in VP1/AB13 transcription factors [2]. Some proteins also have a second AP2 DNA binding domain Pfam:PF00847 such as RAV1 Swiss:Q9ZWM9 [1]. DNA binding activity was demonstrated by [3]. 30.10 30.10 30.10 30.10 29.90 30.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.41 0.72 -4.37 114 2169 2012-10-02 12:51:43 2003-04-07 12:59:11 16 44 101 2 1310 2264 0 98.00 24 23.90 CHANGED Fhphh.....h.ssshpp.....shl...slPppF..sppp.........th..........tt.plhlps..pG............ppWphph.........pppspt.....hhls.......pGWppFspspsLp.sGDhlsFp..h..tt.psh..hlplhpts ..............................................Khh.h.oss.spt......s.th......slPcpaucph..........................t................tt..plh.h.cD..hpG.............ppWph+a...h.....................pspspp.......hhLs.......sGW.pp.FVpsppLt.sGD..sllFh......h....ps...pph..hltlh...s......................... 0 219 746 1021 +81 PF01313 Bac_export_3 Bacterial export proteins, family 3 Finn RD, Bateman A anon Pfam-B_898 (release 3.0) Family This family includes the following members; FliQ, MopD, HrcS, Hrp, YopS and SpaQ All of these members export proteins, that do not possess signal peptides, through the membrane. Although the proteins that these exporters move may be different, the exporters are thought to function in similar ways [1]. 25.00 25.00 25.00 25.30 23.60 24.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.49 0.72 -4.30 202 3289 2012-10-03 02:46:00 2003-04-07 12:59:11 14 2 2171 0 652 1501 334 75.20 37 85.64 CHANGED p.l..l...slsppAlhlslhluuPhllsuLlVGLllulhQAsTQIpEQTLoFlPKllulhlslhlhusWhhspl.hsaspp .......t..llthsppAhhlsLhluuPhlllAlllGLllulhQAsTQIpEpTLoFlPKlluV.hlslhlhusWhhshLhsasp.t........... 0 203 390 513 +82 PF02673 BacA Bacitracin resistance protein BacA Mian N, Bateman A anon COG1968 Family Bacitracin resistance protein (BacA) is a putative undecaprenol kinase. BacA confers resistance to bacitracin, probably by phosphorylation of undecaprenol [1]. 20.80 20.80 20.90 20.90 20.60 20.00 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.61 0.70 -5.19 287 4568 2012-10-03 02:02:08 2003-04-07 12:59:11 13 2 3924 0 995 3043 2915 255.80 36 94.07 CHANGED pAllLGllpGlTEFLPlSSoGHL.llssplluhp................s.uhsFslllQlGollAVllhFhcclhplhtshhpt...................................tptst+hhhhllluslP.ssllG.ll.hcchIc.....shh.t..shh..........h...luhsLllhGlllhhu-phspp......................hcshcplshpsAlhlGlsQslA.L.l.PGsSRSGuTIsuuLhlGhs.RcsAAcFSFlLulPshhuA..uhhclh....................c.hhp...........hss..s.sh....h.lhlGhls...uFlsuhlslchhlcalp+p...shhsFuhYRlllG....lll ................................................................................AllLGlVEGlTEFLPlSSTGHl.Il.s.s.c.l.lshp...............................shspsFpllIQLGulLAVllhahc+lhtlht.thhtt.........................................tptshphhhpllluhlPusl..l.G....ll.......hc..D..h..Ic............shh........ssh..................s..VuhsLllsG.llhlhs-...phppp.........................................................................thpslccloappAhhIGhh.Q.sLA.l.l.PGhSRSGuTIsGGllhGh.s.Rp.sAu-FSFhLul...PshhGA.ss.lclh.........................K..hhph..............hss....s...sh...........shh.hlGhlsAFlluhluI+hhlpalp+t....satsFuhYRlllGhl............................... 0 336 666 853 +83 PF01011 PQQ Bacterial_PQQ; PQQ enzyme repeat Bateman A anon Pfam-B_1319 (release 3.0) Repeat The family represent a single repeat of a beta propeller. This propeller has been found in several enzymes which utilise pyrrolo-quinoline quinone as a prosthetic group. 20.30 20.10 20.30 20.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.89 0.73 -8.05 0.73 -4.17 130 3998 2012-10-05 17:30:42 2003-04-07 12:59:11 16 133 1153 135 877 8732 2893 36.40 26 10.04 CHANGED sthhhs..shsGtlhAlDspT.GchhWphpsssss.stshs ................h.hh...shsu.lhAlDhpT..GchhWphphssss.................. 0 220 462 667 +84 PF03704 BTAD BAD; Bacterial transcriptional activator domain Yeats C anon Yeats C Family Found in the DNRI/REDD/AFSR family of regulators. This region of AFSR (Swiss:P25941) along with the C terminal region is capable of independently directing actinorhodin production. This family contains TPR repeats. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.57 0.71 -3.89 64 2191 2012-10-11 20:00:58 2003-04-07 12:59:11 12 200 672 3 901 2383 154 140.00 25 19.33 CHANGED lDlppFcch.sttGptshttG.c.tpAsptLppALuLW+GssLuslts......tshhpspst+LcEtRlpsh-tph-....scLpLG...Rtp..chlsELpsLlspaPh+EphatpLMhALaRsGRpu-ALpsYcclRptLs-ELGl.-PuspLppLpptlL ..................................................................hDh.tFtth...htt..u..thh..t.t....t....p............t.t.uh..th..h..pp..AL..s.........L.........a..+..G...s.s.Ls.s.hss....................h..h...p...s..t..t.t....p...L..c....c...h...pl..p....s..hc..p...hs..c.....ttl.thG..............ct.s......ps..ls.t..L.pp.lls........tcPh..c..EphhttLhtALh.psGR...p....uc...ALps.Ycch+phL...t.c.E.LGl.-.PusplptLhptlL.................................... 0 399 719 858 +85 PF01426 BAH BAH domain Bateman A, Aravind L anon [2] Domain This domain has been called BAH (Bromo adjacent homology) domain and has also been called ELM1 and BAM (Bromo adjacent motif) domain. The function of this domain is unknown but may be involved in protein-protein interaction [3]. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.87 0.71 -4.35 41 2185 2009-01-15 18:05:59 2003-04-07 12:59:11 13 109 308 34 1324 2175 2 128.50 19 14.46 CHANGED tphplG-hVhlpsss....................................scs.....hhlsplhcl.......................................................................................................................................hpsssss.......thlc..spha..........hRPp-s....hhspthsp.......p..Elahosc...ptsh.hpslpu+CpVhhtsch.stp..h....................tsshF...aCchhYss.pptsFppls ...............................................................................................................h..hpl.G.DhVhl.psps.......................................................................sp.........h..lu..cIpcl.......................................................................................................................................hpsppsp................hh.hp...spWa...............................................h+Pc-s...............t.tcthtp..............................pElF.h...osp..........h-shs....h.s.s.l..h.GK..CpVhh.h..p..c.....h.pp.h...psht..................................................ppcsa...hhphhYs..pptth..................................................................... 0 367 643 1003 +86 PF01145 Band_7 SPFH domain / Band 7 family Bateman A, Finn RD anon Bateman A Family This family has been called SPFH [1], Band 7 or PHB domain. Recent phylogenetic analysis has shown this domain to be a slipin or Stomatin-like integral membrane domain conserved from protozoa to mammals. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.28 0.71 -4.37 120 13903 2012-10-01 22:02:33 2003-04-07 12:59:11 20 43 4514 5 4366 10740 5942 188.90 21 54.61 CHANGED hhlss..................sphull...hp..hGchpp..............................shpsG....................hph.hhP....................hhpphh...hhshp.hpphphss................................................sshopDt...........hslsl...shsl.pa..+l..........................scshphhtph..............t.ps.......hpphlpshlpssl+shl....uphsh...p-lh.........ss.....................+.splspplppplp..........................................................pp.....................hpphG......................lp.lh..slp.lpclphsp...phtps...lppphtupppt......................ptphtpuctcs ..............................................................................................................hVtpsptull..........hp........hG+hpp..................................hh.psG....................................lph..hlP.................................hl..cplt......................hlshc...hpshchss..............................................................................................................................................psl.Tp...D.p................................ssl.p.l.......ss.s..l..ha....+l................................pcstph..hhsl.........................................................ps..............hc.p.t....lpp....h...spss...L....Rsll..............................Gph..sl.............-cll....................................................sp...........................................R.pp....ls.pp.l.pp.t..ls........................................................................................pp............................h..s..s...a..G.....................................lp..lh.....clp...l.p.......clp..sp..........plppu...............ht.pph.pAcppp......................pAthhpAcu..t............................................................................................................................................................................................................................................................................. 0 1454 2620 3579 +87 PF03594 BenE Benzoate membrane transport protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.30 20.30 20.30 20.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.20 0.70 -5.91 9 1251 2012-10-03 01:44:59 2003-04-07 12:59:11 8 2 1036 0 255 994 220 346.90 45 93.78 CHANGED sshs.SsllAG.hlAslluYuuslsIhapAA...psAtsossQhuSWlhululuhulsulhLShRa+sPlloAWSTPGsALLlouhsshslsEAlGAalVuuhLlhLsGlousFs+llppIPtulAuAMhAGILhtFulpshpAlsspPhLshsMlhsYLLsRpauPRYuVhhlLlsGlshuhhhGphphsslsh..clupPpalsPsFShtAhlsLALPLhLVuhsuQ.lPGhAlL+usGY.psPsuPllssTGLAShlsAPhGuholsLAAIoAAICpGP-AH.Dss+RYhAulhsGhFYllhGlFuuolluLFuuLPtsllshLAGLALLGulusuLttAhp--ppR-..........AAllTFllTASGhohlGlGuAFWGLlhGhlshhl .......................................s.sslhAG.hlAlLlGYuS.ShsllaQAA...tsuG...AosuQluuWhhALuluMGloolh.Lo.l..hYRsPl.lsAWS.TP.....G...A...ALL...l.o..u..lt..G..h........o.....h.s....-AlGsF.l....lsssLlllsGloGhFsRLhp.h.IPtulA..uAMLAGI...LLp.............FGl..psFsu...lssp.s...L....shsMlhsaL.....l...s...+...th...s...P...R..Y..........A.....l.....lu.....s.h.....lh.....Gl...s.....l...s....h..h.....t....G...p.l..s..h...s.s......l.t.h.....ph.s...hP..sa..l...sPpFS.hushlul.A.l.............PLFlVTMASQNhPGlAsh+AsGY.....p..........sPsuPllshTGLhuLlhuPFGsaulslAAITAA.IC.tus-AH.D.scRahAuhssGlFYllsGlFGuslsulhsALPhshlthLAGLALLuoIuuuLhpAh.p..s..p.cpR-..........AAllsFLVTASGloLhGIGSAFWGLluGhlshh.................................................................................................. 0 54 114 193 +88 PF02944 BESS BESS motif Bateman A anon Bateman A Motif The BESS motif is named after the proteins in which it is found (BEAF [2], Suvar(3)7 [3] and Stonewall [1]). The motif is 40 amino acid residues long and is composed of two predicted alpha helices. Based on the protein in which it is found and the presence of conserved positively charged residues it is predicted to be a DNA binding domain. This domain appears to be specific to drosophila. 20.80 20.80 20.80 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.34 0.72 -4.37 43 441 2009-01-15 18:05:59 2003-04-07 12:59:11 15 14 44 0 223 465 0 36.70 27 9.80 CHANGED pDsDp.hFLhSlhPtl+pLsspp+hch+hclhpllhch .....ssDp.hFLhSlhPhl+pLss.pp+hch+hclhpllh-...... 0 53 68 167 +89 PF02369 Big_1 Bacterial Ig-like domain (group 1) Bateman A anon Bateman A Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in bacterial surface proteins such as intimins and invasins involved in pathogenicity. 30.40 30.40 30.40 30.40 30.30 30.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.43 0.72 -4.10 20 5706 2012-10-03 16:25:20 2003-04-07 12:59:11 11 124 515 5 385 5116 64 96.30 26 34.44 CHANGED hls..phpAsloplh.......Asss-ssTlTAoVpDtsGsPlssppVoF.....ssstssLsss.....shTcssGhAploLouo......psGstsVoAols.sssss.pspsV .................................s.....tuph.s......h.h...t....hhA..s...sss.s.so.lpA..sVp.D...s...p.G...N.s.l..s..s..t.sVsF............ssssss...ls.s.s......................spTss.sGh.A.p..l.o..l....o.u.s.........p.s..G.s..hs.V.oAo.ls..ssssp.p.................................. 0 82 201 296 +90 PF02785 Biotin_carb_C Biotin carboxylase C-terminal domain Griffiths-Jones SR anon ref [1] Domain Biotin carboxylase is a component of the acetyl-CoA carboxylase multi-component enzyme which catalyses the first committed step in fatty acid synthesis in animals, plants and bacteria. Most of the active site residues reported in reference [1] are in this C-terminal domain. 20.70 20.70 20.70 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.09 0.72 -4.06 109 10242 2009-01-15 18:05:59 2003-04-07 12:59:11 14 97 4650 113 3077 8078 3751 106.20 38 14.54 CHANGED EsRlhAE..DPtps.....F.hPus.G.plsthp.h...............P............susulRlDou.ltp.....Gs.plssaYDsMlAKlIsa..ussRppAlp+hppAL.schp..ltG..lpTNlsaLppllppspFtsu.....phsTsal- ................................................EsRIsAE......DPtps.........F..hPus.G.......c.l.p..php..h...................................P.........................uG..GV...RlDou..l..h.s................Gh..slsPaYDSMluK.l...............Is..a......G.p..s.....Rpp.Alt+hppAL..p.E..h..h.............l..c.....G.......l......p......T.Nlsahh.p.llps..sF.tsG.........shsTpal-....................... 0 942 1879 2586 +91 PF02012 BNR BNR/Asp-box repeat Bateman A anon Bateman A Repeat Members of this family contain multiple BNR (bacterial neuraminidase repeat) repeats or Asp-boxes. The repeats are short, however the repeats are never found closer than 40 residues together suggesting that the repeat is structurally longer. These repeats are found in many glycosyl hydrolases as well as other extracellular proteins of unknown function. 23.00 12.80 23.00 12.80 22.90 12.70 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.75 0.76 -6.08 0.76 -3.20 188 830 2012-10-02 00:45:24 2003-04-07 12:59:11 15 46 547 34 224 1738 836 12.00 57 2.20 CHANGED hhSpDsGpTWpt ...hSsDsG+TWp... 0 106 173 212 +92 PF00528 BPD_transp_1 BPD_transp; Binding-protein-dependent transport system inner membrane component Bateman A anon LMB bacterial genome group and Prosite Family The alignments cover the most conserved region of the proteins, which is thought to be located in a cytoplasmic loop between two transmembrane domains. The members of this family have a variable number of transmembrane helices. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.97 0.71 -4.78 81 156339 2012-10-03 05:18:07 2003-04-07 12:59:11 17 117 4892 36 36523 106567 36248 195.90 15 65.39 CHANGED ulhlGhhsu...hhhsphhcphlhshh.hhh.....slPshh.......lhhl......lh..............................................shht.tshhs.h..lhlhhhhhsshshhhptthlp.tlspshhcsucshGhsphphlhphhlPsuhsslhsshhhshsts.ltssshhphlhs.........lGhhhhpuhhshshs.h.................................hhhhhhslhhlllsllhshlhphls.+hpt ............................................................................................................................................................................................h.hGh.hhu.................h..ht..h....h....h....p..t....h...h...t...h...h...h........l...h.h.........s.l..P..sll............................h.hhh.................h.h.hh.....................................................................................................................................................................................................................h.t.h.ht....h.....s...h...h...s....s..........l..l...s....h....s....h....h...h......h.......s......h......h.......h...h......h....h..t....s....s....l.p....s....l...s..p...s....h.......h-..A............A.p.s.....h.G.s.............s.p....h.p.hh.h.clh..L.P...t..s...h....s...s...l.l....s...s.h...h...h.s.h...s......t...s....l...s......s......h....s...h...s...t......h......l........s......................................h.s.t...h...h....h....t.....s.......h.....h.....t....h....t...h.s.hh.......................................................h.hh.h.s..h.s..l.h...h...l...l...l..s.h.l..h...h.h....t.h.h......t............................................................................................................................................ 0 10630 22134 29422 +93 PF02237 BPL_C Biotin protein ligase C terminal domain Bateman A anon Bateman A Domain The function of this structural domain is unknown. It is found to the C terminus of the biotin protein ligase catalytic domain Pfam:PF01317. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.15 0.72 -4.27 50 3072 2012-10-01 19:11:18 2003-04-07 12:59:11 12 11 2996 75 760 2212 402 47.40 30 14.56 CHANGED hlGcpVplpt..tstpl..pGhspuI.DcpGtLllctssu.............hctlhuG-l.ht ...................lG+tVplhh.....ssppl.....pGlupuI.DcpGtLllcpssG.......................hp.s.lhuG-lsl................ 0 221 459 621 +94 PF03099 BPL_LplA_LipB BPL_LipA_LipB; Biotin/lipoate A/B protein ligase family Bateman A, Reche P anon Reche P Domain This family includes biotin protein ligase, lipoate-protein ligase A and B. Biotin is covalently attached at the active site of certain enzymes that transfer carbon dioxide from bicarbonate to organic acids to form cellular metabolites. Biotin protein ligase (BPL) is the enzyme responsible for attaching biotin to a specific lysine at the active site of biotin enzymes. Each organism probably has only one BPL. Biotin attachment is a two step reaction that results in the formation of an amide linkage between the carboxyl group of biotin and the epsilon-amino group of the modified lysine [2]. Lipoate-protein ligase A (LPLA) catalyses the formation of an amide linkage between lipoic acid and a specific lysine residue in lipoate dependent enzymes [3]. The unusual biosynthesis pathway of lipoic acid is mechanistically intertwined with attachment of the cofactor [5]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.78 0.71 -4.11 66 11826 2012-10-02 14:22:40 2003-04-07 12:59:11 14 48 4791 113 3031 8364 4175 120.70 19 40.71 CHANGED huppt.sst............hthpthppsslhlsccpouGp.....tsacs.hu.slhaollht.t....p...h..h..shthshssh.csL.....................t..s..h......h.sDlhh............ss+Kl..uGlhhch......pt...ttthpthslslshs ................................................................................thh...p.p...s.....s...l......l.......s..c.p..p..o..G....Gp..............ts.a...p..s..............ts...s...lhhol.....l.....h......h......t.......p...t.......................................htth...........shhht.h.shl..csL.........................................................t..u.s.p.sp.......hp..h.N.D...l..hl.........................................ss.+.Kl......uGlhhch................pp......sst.hp....t..h.s.lslshs........................................................ 0 1020 1916 2578 +95 PF02485 Branch Core-2/I-Branching enzyme Mian N, Bateman A anon Pfam-B_842 (release 5.4) Family This is a family of two different beta-1,6-N-acetylglucosaminyltransferase enzymes, I-branching enzyme (eg Swiss:Q06430) and core-2 branching enzyme (eg Swiss:Q02742). I-branching enzyme is responsible for the production of the blood group I-antigen during embryonic development [1]. Core-2 branching enzyme forms crucial side-chain branches in O-glycans [2]. 23.40 23.40 24.30 23.50 23.10 22.30 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -5.22 47 1645 2009-01-15 18:05:59 2003-04-07 12:59:11 16 15 375 10 869 1568 50 236.40 20 55.03 CHANGED lAFhals.+GsLPhh.lW-pFhp...sH.cs..haslYlHsp.ss.hpp.h.t........sshF.sR.Is.SptVsaGphohlsA-++LLAsALh.D..sNchFlLLSpSClPLhsFsplapalhps..spSFl-shspss.ttp.sRas.p......hhP..clphp...............caRKGSQWhtlsRphAhhl..l..tDshhashFppaC...................................ssChsDEHYhsTl.........lshh..hsstssNpol.......TalcWs........pttsHPtpat....hpslo.chlpplp ......................................................lAahhhs..p...s....h.hh....hhhphh.....h....t...............pshas.lalDtc....s..t.ht.p.th.t......................ps.h.....s.s..h.h..h...s.......p..........t..l..ha..ush.ohlpAphphh..........tsh..........L.....p......................s......h.....s......h....caal.L.SusD..hPlpo.p...p...........l.h...phl..........p.....t...........................t......p...p.................s.....h..h...p..p..h...p......t..s............s...h.h...........h.p....t+....hp.h.............hh......hthp.................................................................thphhtG....S.t............Wh..sLo.....Rpas....pal...l..........pD..ph...hhphh...phhp....................................psh.h..s...-Epa..atTl........................lp.....t........ht.....p.....s..h....h...s.ps.l.............................phh...pWs.............................tph.......t.hs.t.h....h.................................................................................................................... 0 213 446 651 +96 PF00533 BRCT BRCA1 C Terminus (BRCT) domain Bateman A anon [3] Family The BRCT domain is found predominantly in proteins involved in cell cycle checkpoint functions responsive to DNA damage. The BRCT domain of XRCC1 forms a homodimer in the crystal structure. This suggests that pairs of BRCT domains associate as homo- or heterodimers. BRCT domains are often found as tandem-repeat pairs [2]. Structures of the BRCA1 BRCT domains revealed a basis for a widely utilised head-to-tail BRCT-BRCT oligomerisation mode [3]. This conserved tandem BRCT architecture facilitates formation of the canonical BRCT phospho-peptide interaction cleft at a groove between the BRCT domains. Disease associated missense and nonsense mutations in the BRCA1 BRCT domains disrupt peptide binding by directly occluding this peptide binding groove, or by disrupting key conserved BRCT core folding determinants [5]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.45 0.72 -3.83 114 8489 2012-10-02 11:51:29 2003-04-07 12:59:11 21 297 4740 142 3410 8863 1950 78.30 25 11.50 CHANGED tttphhpshphhl.......tthpphp+ppl...pphlpphGupl.........................tsph.sppssall....ssps...................tt.tc.............httshphshtlls.pWlhcsl .............................................................................................s.....htGhshll...........sush.p..p..h.s.R..s.ch..............cph.l.p.p.h.G.u.+l................................................................................ssol....o...c.c...T..s...a.ll..............sGcs................................................su..sK.......................................ht.cA..p........p....h....G......l...p....l..l.s-p.hhp............................................................................................. 0 1124 1906 2771 +97 PF04089 BRICHOS BRICHOS domain Sanchez-Pulido L anon Sanchez-Pulido L Domain The BRICHOS domain is about 100 amino acids long. It is found in a variety of proteins implicated in dementia, respiratory distress and cancer. Its exact function is unknown; roles that have been proposed for it include (a) in targeting of the protein to the secretory pathway, (b) intramolecular chaperone-like function, and (c) assisting the specialised intracellular protease processing system [1]. This C-terminal domain is embedded in the endoplasmic reticulum lumen, and binds to the N-terminal, transmembrane, SP_C, Pfam:PF08999, provided that it is in non-helical conformation. Thus the Brichos domain of proSP-C is a chaperone that induces alpha-helix formation of an aggregation-prone TM region [2]. 21.10 21.10 21.10 21.30 20.90 20.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.12 0.72 -4.12 40 591 2009-09-14 09:13:14 2003-04-07 12:59:11 9 13 95 6 287 508 1 93.50 25 36.19 CHANGED huuucsushlaDappslsAh+shstppCYlhphspstlPshpsLtchhhph......ptpsth.ssphhcpphh.....sspplpDhshLG.tI.tpLCtshPhYhl .....................ssucsushl.aDFp..ps..loAhhs.h.......shp.......pCalhthsps.hl.s.cslhchhhph...................pttshh.p..ohh..hpcphh.....sspplp-...h...s..L.G...hI.hpL..CpshshYhh....................... 0 42 57 123 +98 PF04427 Brix Brix domain Bateman A anon Dlakic M Domain \N 28.10 28.10 28.30 28.30 27.70 28.00 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.40 0.71 -4.64 149 1923 2010-01-08 13:34:18 2003-04-07 12:59:11 13 24 441 1 1305 1849 23 196.60 24 58.96 CHANGED l..hsu................p...psspphpphhcDLpplh..Psup........phs+tp......h.php......chhchst.ps....ssslllh..pppct.psstlhls+h.....spGPoh....pFp............l......ps...hphhc-l..........t...............Phllhsshtsp....................hphlpphhpshFss........hs..t.......phc+llshp.t.....................p.........pch................................IhhRpa.h...............................p........................................................................................................hpLpElGPRh.slc...........lhcl ................................................hpup..pss.tphpphhp-lpt.....lh...Psup.....php+tp...................t..plp....................phl.phsp.ps....soslllh..............pp....p..cp.....p....s.......ss......L......hls+h........PpG.Poh........pFpl......pshphhc.-.lhtthptsh.....spp..................................................PhLlhsshtsp........................hphltp.hh.tp.lFss...........sh.............thc+Vlshs.t...................p...cch.................................................................I.hRpa.h.......hhp.............p..............p.................................................................................................................................................................hpLtElGPRhslplhc........................................................................................................................... 0 469 738 1083 +99 PF03097 BRO1 BRO1-like domain Bateman A, Kim J, Mistry J anon Bateman A Domain This domain is found in a number proteins including Rhophilin Swiss:Q61085 and BRO1 Swiss:P48582. It is known to have a role in endosomal targeting. ESCRT-III subunit Snf7 binds to a conserved hydrophobic patch in the BRO1 domain that is required for protein complex formation and for the protein-sorting function of BRO1 [2]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.12 0.70 -5.71 70 1087 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 297 31 739 1088 8 341.70 21 47.18 CHANGED shlslshKcop..pl-..hspsLppaIpppY...spsssh...a.....pcclpplspLR....pshhs.........psstsulch........................LhcYaspLphLpt+h..P....sppht.l...........pFsW.aDuh...........................tpst......shspps..ltaE+uslLFNluAlaoplAspp..........sp...p.ss-GlKpAsshFQpAAGsF....palp-s...........hhpsPoh....D..lstpsLpsLtpLhL..........................AQAQEshhtKslt...s................shcsullAK.......LuspsuphYpps.........hpshppsshtt...............................................hp........tpWhshlphKtta......apAlApYatuhsh.....................................pppppaGct.......l...................................Ac...............................LptAhptlp-Ahphtth...................ttttlhpslpthpsplppcLpptp+D.........NDhIYhp.lPs.ssls...slt.shshs+sl.shsp.h............pt.p...ssclFppLlPhsltput......ohas-cpsphlppph ............................................................................................................................................+....s......hp..h.....h.thl.t.h...tp...................h............tpth.t.thtt.hR....................pthht............t...thhph...................................lh.pYhs..L.hl....t..+h.....s..............tpp.t...l.............................................Fp.W...hssh..............................................................ptt.................hs.ps.....ltaEhsslLaNluulhophus.pt................................sp.....s..sp.p...u.h....+.t.A...hphap....p......A.A..Gha........palpcp..............................................h..psss.......D.....hs.pslp....sL.plhL................................................................A.QAQE..shh.tKsht.......p...........................phpssl......lA.+.......lus...ps...sphYppu..........hpt.hp..p.....s..s..t..t..................................................................................................................h.........ppWhthl........phKtth......atAhAph..atuhth......................................ppppph...Gptl...........................................................up...............................L.ptA.phhp.puht.tt.............................................t...h.t.hpthh....ptlp...p.phpphp+-.........N-hlY.h.p.lPs......t.h...................ht...h.hst..sh......................................a..thhP.th....t.ht...........h........................................................................................................................................................................................ 0 248 387 590 +100 PF00439 Bromodomain bromodomain; Bromodomain Finn RD anon Prosite Domain Bromodomains are 110 amino acid long domains, that are found in many chromatin associated proteins. Bromodomains can interact specifically with acetylated lysine [3]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.74 0.72 -4.06 69 7921 2009-01-15 18:05:59 2003-04-07 12:59:11 20 370 355 282 4778 7646 90 84.20 26 10.39 CHANGED hpplh........p...phhppt...st.F.t.......stpthssYhp.......hl...ppPhsLspIpp.......+l...csspYp...s...................................h..tpahpDhphlhpNuhpa........st.tss.hhp....tupp ................................................................h...........h.ppp........huh...s...FhpPV........................stp..p..h..P..-.Y.a.c.........................lI............+p.PM....DLs.Tl.c.c..................................+l...............cs..p....p....Y.p.......s............................................................................................................................h...pca.h..........p.....Dh.pL.......hhpNshtY.............Nt..sso.lhp.u..h............................. 0 1623 2367 3612 +101 PF03909 BSD BSD domain Yeats C anon [1] Domain This domain contains a distinctive -FW- motif. It is found in a family of eukaryotic transcription factors as well as a set of proteins of unknown function. 20.40 20.40 20.70 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.85 0.72 -4.23 81 1184 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 308 4 822 1161 12 60.20 21 16.77 CHANGED hst.ppph.sh...t.p........tcphpplLppsss.LpphhpchV.......PptlscppFWppYFhththhptptspp ....................................s......ph.ph...p.p........hc.hppllppsPt...lpphatchV............Pptls.c....p.p....FWpp.aFhthhhht.ptt................ 0 250 448 674 +102 PF03092 BT1 BT1 family Mifsud W anon Pfam-B_1804 (release 6.4) Family Members of this family are transmembrane proteins. Several are Leishmania putative proteins that are thought to be pteridine transporters. One such protein Swiss:Q25272, previously termed (and is still annotated as) ORFG, was shown to encode a biopterin transport protein using null mutants [1], thus being subsequently renamed BT1. The significant similarity of ORFG/BT1 to Trypanosoma brucei ESAG10 (a putative transmembrane protein and another member of this family) was previously noted [2]. This family also contains five putative Arabidopsis thaliana proteins of unknown function. In addition, it also contains two predicted prokaryotic proteins (from the cyanobacteria Synechocystis and Synechococcus). 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 433 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.50 0.70 -5.77 12 726 2012-10-03 03:33:39 2003-04-07 12:59:11 11 11 173 0 486 924 508 341.70 21 71.34 CHANGED pGlus.lhphss.hhhp-chGlssuthQtLsslushsWslKshhuhlsDs.ashhGYp+R.YhhlSslhG.h.uhhauLLsuh.sSsshAuhhlhLuohuhA.sDlls-uhhschhRppPps..usuh.ShhWhh.hlGullushhsGsLs-thtsphshhloAsl.hlshl..................suhhlhcp..........................................................................................Eshthscs...sphhs.....+cslsp.W+hhhh.............ssIhtssL..............hhshashslslspA...................hFYhsTspht.....FohpFhspVt.llGsluuLlGVslasthhpphsaRhhhhloslhpsluulhDlIlVc+hNhhlGIsDa.hallGDullhplshhltaMPhlVLhuRLCPpG.EuslaALlhuhhsLGpssSutLGulLhcahh...hTpss.....asNLshLlllsslss.LlslPLshLL.s.t...c.hDtsschspcts ........................................................................................................................................................................................................................................h..hsh..hhpp.hthp..ss..t.hth.....ht..shh.t.h.PWs.h.Ks...hhGh.loDs..h..s..l..h.G.a...+.....R+sY.h.h.l..u....t...hl...s.......h....h.........h..h............h.u..h..h...............s......t..............................................h.............................h......h......h..........h..h....h...h......s......h....s..hs..........h.....-.....l.h.-..uh...h..s.......p.......................t...............s..................................t............s............t............h.........................o...hh......h.h.........h...u.s..l.....h..........s............h.h.....s..........G........h.......h.......p.................h.......................p..................h.......h..h............hh...h.h...h............................................h...h........................................................................................................................................................................................................................................h.t...hthh.....................................................lh.shh........................................hh.h.h...h.........ts...........................................h..ah..t....h..............h.s...............hhs......hht..hhs.......thh.hhuhhhap...h..h.....t..p...........h...s....h..+.............h..h....h....h..t.h.h.....h...h..h.h.h.........l......t...h.s.....h........h............ls....st..hah.hh..t.hl.....phh..hthhPhhhh.........sphsP...u......E..ushauhhhuh.shu..h.ut.hu.h.l.thh..............hstt................aptl.hhlhht.h....hhsl.h......hl..........................tt.................................................................................................................................................................................................................................................................................................................................................................................. 0 200 338 460 +103 PF00651 BTB BTB/POZ domain Bateman A, Bardwell VJ anon Prosite Domain The BTB (for BR-C, ttk and bab) [1] or POZ (for Pox virus and Zinc finger) [2] domain is present near the N-terminus of a fraction of zinc finger (Pfam:PF00096) proteins and in proteins that contain the Pfam:PF01344 motif such as Kelch and a family of pox virus proteins. The BTB/POZ domain mediates homomeric dimerisation and in some instances heteromeric dimerisation [2]. The structure of the dimerised PLZF BTB/POZ domain has been solved and consists of a tightly intertwined homodimer. The central scaffolding of the protein is made up of a cluster of alpha-helices flanked by short beta-sheets at both the top and bottom of the molecule [3]. POZ domains from several zinc finger proteins have been shown to mediate transcriptional repression and to interact with components of histone deacetylase co-repressor complexes including N-CoR and SMRT [4,5,6]. The POZ or BTB domain is also known as BR-C/Ttk or ZiN. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.48 0.72 -4.08 96 15670 2012-10-02 01:20:04 2003-04-07 12:59:11 26 989 511 79 10101 15120 88 105.10 20 20.57 CHANGED hsp.pppt...hsDlslhlt.......pphpuH+slLuupSsYFpshFpsp.................tpts...........l.hpslssp..shptlLcah..Ysupl.h.............tpsltplLphA.phhpltslhptCpphlhpph ..............................................................h.pt....hs.D..ls.l..hlt...................tpph....sH......+...........s................l...............L...u....u...p.......S.s......a..F.....c..s...h..hpss........................................tcpp...........................tlp.l......p...s.......l......s.......s..p...........s...hp...t.l.....L..c...a..h.......Y...o....u...p...l.phs..................................tps....l...........p....l...l......t...s...A...p....hh......p..l......t..t..l.hphstphh....h..................................................................... 0 2968 4235 7306 +104 PF03437 BtpA BtpA family Finn RD anon Pfam-B_4453 (release 6.6) Family The BtpA protein is tightly associated with the thylakoid membranes, where it stabilises the reaction centre proteins of photosystem I. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.39 0.70 -5.13 6 562 2012-10-03 05:58:16 2003-04-07 12:59:11 10 3 505 0 178 689 382 246.50 36 93.56 CHANGED pcKPlIGVVHLhPLPGSspasu...........sLstVID+Alp-Apslp-uGhDAlIlENaGDtPa.Kp.Vs.tTVuAMolIssclpp-VulPlGINVLRNDuluAhuIAhulsAcFIRVNlLoGsthoDpGIlEGsAtELh+h++hL....su+.lclLADVtVKHAhahus..lpsslhDTlER..uhADAVIloGpsTGucsDl--LchAKcsss...sPVllGSGVs.cNlpphhphADGhIlGThlK+sGph.N.lDh-Rspplschscc ...............................................................................................................................pKslI.uMlHLpsLPGsPtass...........shptll-+Ahc-htsLpsGGVDulhhpN.ashP.a.h..p..c.....l.t.scssuuMuhl...htpl..tp.p.l..p........l.P.....h.....GVNVLh.DshuuhslAhAssA.cFIR......p.ha.sG.sas.uD....Glh.-.sssu-hlRa...p+pl....s.As.pl+llh.slhsctu..sh..L...u..s........c..s..l..........s..p.h.s+.s.sh.p...stsDAlh...Vo...G...hs.sG..s....p....s....s.........t...........L....c....p.l..+csss..........tsPVl.ssoGV.s.h.-.N..l.pc...L.s.h..ADGsl...luos.h....Kc....c...G.hhtN.....................V...D.tRVppFMctlp.p........................... 0 60 104 151 +105 PF03131 bZIP_Maf bZIP Maf transcription factor Mifsud W, Eberhardt R anon Pfam-B_482 (release 6.5) Family Maf transcription factors contain a conserved basic region leucine zipper (bZIP) domain, which mediates their dimerisation and DNA binding property [1]. Thus, this family is probably related to Pfam:PF00170. This family also includes the DNA_binding domain of Skn-1 (Swiss:P34707), this domain lacks the leucine zipper found in other bZip domains, and binds DNA is a monomer [2,3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.04 0.72 -3.69 15 969 2012-10-02 13:17:30 2003-04-07 12:59:11 12 8 158 8 512 2370 5 91.10 34 20.20 CHANGED hSD-cLlohoVRELNRpL...+GhocEElh+LKQ+RRTLKNRGYApsCRhKRlpQ+csLcpc+scLppplcpLppEhuthppEpDulpt+hptLt .......................................os-pllshsVc-h...NchL........ctLo..c-plthl.....+.p....hR...Rp..h...KNR..shAQs....CRp++l...pphppLE....p...-h.pp.......L.ppc.h-cLh.p.Ep....s.p.h.tp.phcth+p+hpth.h................................ 0 105 151 298 +106 PF00168 C2 C2 domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.90 4.50 20.90 6.10 20.80 -999999.99 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.89 0.72 -4.14 382 23398 2012-10-10 12:23:49 2003-04-07 12:59:11 25 644 543 119 13594 21474 315 86.10 20 17.18 CHANGED LplplhpApsL.shc..........................................hps..psDPYVplplts.......tp...........................h+Tcshcps..hNPhWs.....Epatap..h..sphpp......LplpVh.Dpcthst...............cchlGpsp ............................................................................................................................................ltlhpu....p...sL.....s.h.s....................................................................................hts....ts..D....P...a.l.p..l..p..lt.s...........pp......................................................................................t+T......c...s.....h....p.....p....s.....h.........s..P..h....as..............-.p..h..t.ap...h.............t.p.hpp.......................Lp.l.p...Vh....D..t..c.t.htt...................schlGph.t............................................................... 0 3560 5729 9404 +108 PF02743 Cache_1 Cache; Cache domain Bateman A anon [1] Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.49 0.72 -4.42 19 6574 2012-10-01 23:40:40 2003-04-07 12:59:11 13 246 1855 16 2046 5858 159 78.30 20 11.74 CHANGED lT-PYh-ss...........ssphVlThuhPlhs............ttphhGVluhDlslcsLhphhpplplGtpGYsFllstsGpllsHPspcshscpttt. ............................................................otsYh..s.t............................st..p..h..h..l..oh..u..t..P.l..hs.....................................................s.u.p.h..h...G...V....l..u....h....D..l....s....l....s........p....l....t...p....h....l...p....p.....h.......p..........h......s...p..s....G....a.s....h.l...l..s.....p.....s.....G.p..ll.s...+..sp.p.........h................................ 1 687 1216 1644 +109 PF04857 CAF1 CAF1 family ribonuclease Bateman A anon Pfam-B_1567 (release 7.5) Family The major pathways of mRNA turnover in eukaryotes initiate with shortening of the polyA tail. CAF1 Swiss:P39008 encodes a critical component of the major cytoplasmic deadenylase in yeast. Both Caf1p is required for normal mRNA deadenylation in vivo and localises to the cytoplasm. Caf1p copurifies with a Ccr4p-dependent polyA-specific exonuclease activity. Some members of this family include and inserted RNA binding domain Pfam:PF01424. This family of proteins is related to other exonucleases Pfam:PF00929 (Bateman A pers. obs.). The crystal structure of Saccharomyces cerevisiae Pop2 (Swiss:P39008) has been resolved at 2.3 Angstrom…resolution [3]. 23.50 23.50 23.60 24.00 23.30 23.10 hmmbuild -o /dev/null --hand HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.68 0.70 -5.06 27 1134 2012-10-03 01:22:09 2003-04-07 12:59:11 15 22 328 16 721 1069 11 271.30 26 67.58 CHANGED pcVWppNhpcphphlpphlcphs...alAhDTEFsGllscshtp......htsss-.pYptL+pNVsthpllQlGLohh.....sppuphss..........shs.........................sapaNFp.Fshcp........chh.stcSIchLpppGhDFpcppcpGlshtphs.......................................................................................................................................................................................................................................................................................................phlhsSsllhst.p..lpWlsapusYDhuaLl+llp.s......tpLPppht-FhphlpthF.Pp....lYDlKhlhp..h..........................................................................hp...................ph....uLpclA-tLplpR.......................................................................................................sGp.tHpAGuDoLlTstsFhc ...................................................................................................................................................................................................-VhtpNhppp.hthlpp..h.l..p..phs....alAh........Ds.EFs..Gls....s..p..shst................htsss-.p.Y.p...t.l+p..s....s.c..hh.pl..lQlGlohh...............sppsphs..........t.hs...................................................................................sapFN.Fp...Fshpp.........................chh..stsSlph.Lt......p......p......G.h....s......Fpch..pp..G..Ishhphs..................................................................................................................................................................................................................................................................................................................phh.h..h.Ss.l..l.ht.......p.....h.h.h.lsa..p..uhhDh.salh+hhhs..............sLPps...t.pFhphl...phhF..Pt..........laDhKalhp..t......................................................................................................................................................................t.psuL.pplt..c.t.L.thpp...........................................................................................................................................................................................................................................s....HpAG.DuhhTu.sah........................................................................................................................................ 0 236 379 557 +110 PF03135 CagE_TrbE_VirB CagE, TrbE, VirB family, component of type IV transporter system Mifsud W anon Pfam-B_843 (release 6.5) Family This family includes the Helicobacter pylori protein CagE Swiss:Q48252, which together with other proteins from the cag pathogenicity island (PAI), encodes a type IV transporter secretion system. The precise role of CagE is not known, but studies in animal models have shown that it is essential for pathogenesis in Helicobacter pylori induced gastritis and peptic ulceration [1]. Indeed, the expression of the cag PAI has been shown to be essential for stimulating human gastric epithelial cell apoptosis in vitro [2]. Similar type IV transport systems are also found in other bacteria. This family includes the TrbE Swiss:P54910 and VirB Swiss:P05353 proteins from the respective trb and Vir conjugal transfer systems in Agrobacterium tumefaciens. Homologues of VirB proteins from other species are also members of this family, e.g. VirB from Brucella suis Swiss:Q9RPY1. 20.50 20.50 20.60 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.95 0.71 -4.56 15 1506 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 797 0 316 1404 70 190.30 20 25.11 CHANGED phLuphpp.psltas-lLpFhsphls.Gppp.shtlsps..hlDshls......uplhhscct...........hhhcpssp....ppasuhlul+-Y.sscopsshlsslLptchEhllhpoFshhs+ppupshlp...ppphhstscputspltclsptlcphsusphshGhap.olhlaAcshppLccpstcspstLpspGhlustEoluh-suaaupLPuNhphpsR .....................................................................................................t.........pth.hsc.hpahphhls..sp.p...lhhspt....lsthls.......sphh..hstcp...............h...h.p..t.t.t......................ppasshl.s.lp....p.Y.s.phpsshhsh.hh......t........h.........s......h...p...hhhhpsa....p...hhs.......ppp....s..................hsh....lp...t....................p.............p...p.....................hh..............p....h....................s.................s.....s...u....ts...p......h....................t.-.hs.p......ul..p..p.lss..sph.s.h.G.a....pholh............lh..............u.......c....s............h....c.....plcppspts.t.sh.l...p.s...t.G....hhs..h.h-..sls.h.s..u..aauplPuphhhp.R...................................... 0 58 171 237 +111 PF02515 CoA_transf_3 CAIB-BAIF; CoA-transferase family III Mian N, Bateman A, Heider J anon Pfam-B_887 (release 5.4) Family CoA-transferases are found in organisms from all lines of descent. Most of these enzymes belong to two well-known enzyme families, but recent work on unusual biochemical pathways of anaerobic bacteria has revealed the existence of a third family of CoA-transferases. The members of this enzyme family differ in sequence and reaction mechanism from CoA-transferases of the other families. Currently known enzymes of the new family are a formyl-CoA: oxalate CoA-transferase, a succinyl-CoA: (R)-benzylsuccinate CoA-transferase, an (E)-cinnamoyl-CoA: (R)-phenyllactate CoA-transferase, and a butyrobetainyl-CoA: (R)-carnitine CoA-transferase. In addition, a large number of proteins of unknown or differently annotated function from Bacteria, Archaea and Eukarya apparently belong to this enzyme family. Properties and reaction mechanisms of the CoA-transferases of family III are described and compared to those of the previously known CoA-transferases. 20.70 20.70 20.70 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.21 0.71 -5.06 118 8297 2009-01-15 18:05:59 2003-04-07 12:59:11 12 30 2042 90 3017 7416 4895 185.50 29 47.03 CHANGED lsLDL+sspG+tlhpcLlp....pADVllcNh+PGshc+lGLsh...csLc....phN.....PcLlhsuloGaG.p....sG..Phss..psuaDh.shpAhoGl.....hsh.s..........sss.s...P.hhsuhs..lsDhsu.uhhuuh...ulLAALh....pRp..c.oGp..Gph....l-lohh-s.shth.hs.hhhthhts.s.th.t..........Gstp...........sssssh..shaps....tDG.......alsl..u.shssphWpth .................................................lsLDL+..s..scGpphht.cLl.t..............pA....D....Vll..EN..a+....P.G..sh.....c.+..h.Glsh......-.s.Lp......th..N.........P.+....Lla..ss.l.o.GaG..p............s.G.....P.h.ss....p.s..u......a.............Dh.l.h..p.A.h.uGh...........................hsh..s........G...........................ss....s...............P...h.....h.su...ss...........lu.D...hss.G.h.h.ush...ulL.u....ALh..............pRp......c......o........G.....c..............G.....ph..............lDsuhh-s...shs..h........h.t............h..h..h....t...h..h..t..s..u....t.st................t.h.................uttp.........................sshssh...ssaps...............tD..G...............hlhl..u...shpst..at.............................................................................. 0 656 1710 2447 +112 PF02888 CaMBD Calmodulin binding domain Bateman A anon Psi-blast P70604/413-489 Family Small-conductance Ca2+-activated K+ channels (SK channels) are independent of voltage and gated solely by intracellular Ca2+. These membrane channels are heteromeric complexes that comprise pore-forming alpha-subunits and the Ca2+-binding protein calmodulin (CaM) [1]. CaM binds to the SK channel through this the CaM-binding domain (CaMBD), which is located in an intracellular region of the alpha-subunit immediately carboxy-terminal to the pore. Channel opening is triggered when Ca2+ binds the EF hands in the N-lobe of CaM. The structure of this domain complexed with CaM is known [1]. This domain forms an elongated dimer with a CaM molecule bound at each end; each CaM wraps around three alpha-helices, two from one CaMBD subunit and one from the other. 25.00 25.00 28.80 28.10 23.70 22.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.63 0.72 -4.17 5 334 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 83 7 163 297 0 74.40 61 13.92 CHANGED DoQLTKEhKNAAAsVLQETWhIY....KHT+h.p+t-ppRlRKHQRKFLpAIHp...FRoVKhEpRKlsEQsNohsDluKs+pl ............DTQLTKR.lK.NAAANVLRETWLIY....KaT+Ll.......KKhDpu+VR+HQRKFLpA.I..Hp........LRpVKh-QRKLsDQANTLVDluKhQsl.................... 0 31 45 98 +113 PF01302 CAP_GLY CAP-Gly domain Bateman A, Finn RD anon Prosite Domain Cytoskeleton-associated proteins (CAPs) are involved in the organisation of microtubules and transportation of vesicles and organelles along the cytoskeletal network. A conserved motif, CAP-Gly, has been identified in a number of CAPs, including CLIP-170 and dynactins. The crystal structure of Caenorhabditis elegans F53F4.3 protein Swiss:Q20728 CAP-Gly domain was recently solved [2]. The domain contains three beta-strands. The most conserved sequence, GKNDG, is located in two consecutive sharp turns on the surface, forming the entrance to a groove [2]. 24.90 24.90 25.10 24.90 24.60 24.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.56 0.72 -4.27 146 2400 2009-09-10 15:43:18 2003-04-07 12:59:11 20 120 319 45 1448 2206 31 68.20 38 10.48 CHANGED lG.pRl.pl...........tstthGsl+a......lG.psp.hss............................G.............h......WlGlEh.Dp....s....h.....GK....NDGo..l.pGh+YF..pC........p.sp.....tGhFl+spplp ...................................lGsRV.l............ssschGsl+a......lG...tsp.hss....................................G................h......WsGVEL..Dc.....P....t.............GK...................NDGo........V......p.........G......h+....YF..pC...........p..sp......tGlFspss+l.p.............................. 0 487 694 1078 +114 PF01039 Carboxyl_trans Carboxyl transferase domain Finn RD, Bateman A anon Pfam-B_299 (release 3.0) Family All of the members in this family are biotin dependent carboxylases. The carboxyl transferase domain carries out the following reaction; transcarboxylation from biotin to an acceptor molecule. There are two recognised types of carboxyl transferase. One of them uses acyl-CoA and the other uses 2-oxoacid as the acceptor molecule of carbon dioxide. All of the members in this family utilise acyl-CoA as the acceptor molecule. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 493 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.40 0.70 -6.15 34 11662 2012-10-02 13:07:06 2003-04-07 12:59:11 17 72 5959 146 2951 11205 7464 323.30 27 68.57 CHANGED tpascG+hssc-Rl-lll-sGS.Fsph-shhtpcssphuh...c..hPssullTGhGsltGptshlhupDhsshGGshushputK...lsch.chAlps...............GtPhlslsDuuGA....c.t-GVpsLpGhGpIFtpsspASu.sIPpIollhGsssGGuuY.PuLsDhslhVcs.tuhhalTGPsllc..............pVhG..EphospphGGuptHhtpoGluHhsupsD.-ulphl+chlSalP...sst.......ssPlhtshDssc+..............shlPss.ppsYDsRplIptlsD.............pupFhEhpssaApslVsGhARlsGhsVGllANpsp......................AG.sL..cSu.KsAcFlchCss.hslPllhLsshsGFhsGpcpEasGIl+aGAKllhAhucusVPtlolIst..cuaGGualVMsupthsschh...huassAcluVMGscGAssIlaRcchttts.........p.pth....................hppphtchccphsssYhssuptasDsllcPschRs+lshshphhhpcpt.hh.h+p+tpl .......................................................................................................................................................c..................p....h...................h...............t.....................................h...........t............................t.............................t......t....................................ssul....l.sG....h...G....p....l......p.............G.......h...................s.......h.l.hs.....D..hs.............hh.uGo.hush.sucK..........ls+hh-hAhpp...................................p.h.P.l..l.hl..s.s..S.GG.........A.......R...........h.....Q.......E............u......hh..............u......L.......h..............t...h..u..+...l..............t......s.........t..p.....h..p...........t..t...........ls.I.o.l....l.h.sP..s..sG..G..s.A...h.s..h......h.s..D.......h.......h.......lh..p.....uhl.hhsGPcVl.c...............................p.s..h....s......E......h........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 955 1849 2497 +115 PF00755 Carn_acyltransf Choline/Carnitine o-acyltransferase Bateman A anon Pfam-B_438 (release 2.1) Family \N 19.40 19.40 20.50 19.40 18.70 19.20 hmmbuild -o /dev/null HMM SEED 591 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.77 0.70 -6.43 17 1813 2012-10-02 12:01:53 2003-04-07 12:59:11 15 34 362 41 1101 1697 26 442.50 26 79.93 CHANGED sLP+LPVPsLpcTls+YLc.ulcPlhs-.-paccopplspcFss...shGppLQchLhphuttp......sNWlscaWhphhYLphR.hPlslNSs.h..sshsph.hpsp.........psQhtpAuplhpshlpahptlcpcpl...........sphhtG....................ttPlsMsQapplFsosRl...PGhp...pDslhphtcsc.......HllVlp+spaaplclhcs..suphlo.s-ltpQLppIhppupp.psh.tsluhLTopsRspWApsRptLhpsss..NpcsLctI-pulFslsLDcs..................pss.cs....t..hphh.........htss.p...ssutNRWaDKohphllspsGpsGhshEHossDGhsllplh-h.h.hspphhc......................phhts.s......lshPp+Lpaplssp.hpspIpputpphpthhs-L-lhshpFpsaGKshlKp.pplSPDuFIQlAlQLAaY+.haG+hssTYEoAosRhFtcGRTETlRosos-uhcFVpuM...........s..sstott-.............+hpLhpcAsppHsphhcpshpGpGlDRHLhuLphlupt.....pslphP..c...hahcpsathu...sshhlS.....os..plssphhhhhs...aGPVssDGYGlsYphp.ppplhhslSuapSsspTsup+asphLpcAhp-hts ..........................................................................................................................................................LP.p.lPlP.lppThp...pa.lt..shps..l..........h........s...................p..............p..............h..............t......p..........h..........pt...h....sppF.t.......................t.u.........LpthL....ht....t.........................psa.........l..............t....p....hW.p.......h.h....ht....c.........sl.hss..............h.........................................................t...t....h..h.uuth.h..s.h...h..ah..t..lcpttl..........................s.h.hht...............................................Phs.tpa.h...hFsssRl...P...t.t........tD.h..t.......tsp......................HlhVhpps.pha..hh...h..ht.................s...p........h..p.......tpl.....p....hp........l...h...........p....t..s.t.....................................................t..........................lu.hLTst....tRs....Wu.ph+p....hh.t...tt.......NtpsL.p.h.lcp.uhhhlsLDp.............................t.t...............................................tt.h.h......tp.s...hsRWaDK....s.hp...h.ll...tsG...............p.h.uh..hEH.s.....h.Du..hhphh...p.hh..........t...........................................t.t................s.s..ptl.pa..p.....hs..t........h..t........lt.......u.....t.th.t..t.hhpphp.h.h..h...a.t.t.......a.G..+.t..h..hKp...th..S.PDuhlQhs..hQ.hAa.a.p........h.s........p......................sY..Ess.h+hFhpGR.T-.....sh..R.sso.pshta...sps..h..................................................p.......t...s...tp.............................................................................hhthhptAhptH..th.h..t......s.....GtGhDRHLhsLh.h.t...............................................t....P.....t................h......s.......ath.......tp..h.p...................hs...........ph.s.s...h.......h...h...s....a.us.................s...................pG..aG..hsY......p...h...hthssh........................h..pst......th...l.t.h.ph......................................................................................................................... 1 386 559 894 +116 PF03378 CAS_CSE1 CAS/CSE protein, C-terminus Mifsud W anon Pfam-B_3786 (release 6.6) Family Mammalian cellular apoptosis susceptibility (CAS) proteins are homologous to the yeast chromosome-segregation protein, CSE1 [1]. This family aligns the C-terminal halves (approximately). CAS is involved in both cellular apoptosis and proliferation [2,3]. Apoptosis is inhibited in CAS-depleted cells, while the expression of CAS correlates to the degree of cellular proliferation. Like CSE1, it is essential for the mitotic checkpoint in the cell cycle (CAS depletion blocks the cell in the G2 phase), and has been shown to be associated with the microtubule network and the mitotic spindle [3], as is the protein MEK, which is thought to regulate the intracellular localisation (predominantly nuclear vs. predominantly cytosolic) of CAS. In the nucleus, CAS acts as a nuclear transport factor in the importin pathway [4]. The importin pathway mediates the nuclear transport of several proteins that are necessary for mitosis and further progression. CAS is therefore thought to affect the cell cycle through its effect on the nuclear transport of these proteins [4]. Since apoptosis also requires the nuclear import of several proteins (such as P53 and transcription factors), it has been suggested that CAS also enables apoptosis by facilitating the nuclear import of at least a subset of these essential proteins [5]. 19.60 19.60 19.60 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.31 0.70 -6.18 16 387 2012-10-11 20:00:58 2003-04-07 12:59:11 10 14 287 3 271 370 12 387.00 29 44.71 CHANGED ERlLhlpcsssp.slhsss-lsPhsppLLspLFphlphssS....pENEalMKClMRVLhllp-ullPh.ssslLs+LssIhphluKNPSNP+FtaYhFEulushlRhsspusss...sFEpuLaPsFssILppDVp.....................EFhPYV.........FQlhu......tLLEhss.ssslPs.sYhsLhsslL..................sPshWEppGNlPuLlRLLpAhIt+uuppI..sssspLpslLGIFQKLluSKs.sDppGF.LLpullpphPssslp.YhtpIhpllFpRLQsSKTs+FhpphlhFhphhss+p.....GushhIphh-slQsslFs.lh.plllP-hpKlutsl-RKlssluhTKhL.sEo.Ahhsp.YtKhWuhhhpuLlpLhchPspsss....c--hls.tDss.hGassuFopLssstpptcDPhP-..lsDsKhalup.LpchsptpsG+lsshlsppLss-uppsLtpY ...............................................................................E+lhhh...p...t..s....t..tt...l....h.stscl.tPhs.p.LLppLFphl.ph.sso...............tENEalM+..slMRs.h.hhp........-sh.h..sh.hs.sllsp.Lhp.......hl.thls+N....PS.pPcFsHYhFEuluhh.....l+..h.ssp...s...ss.s..h.....phEps..LassFp.tILppDVp.....................EFhPYl..............FQ...lhu......hLLEh...p..................s......sl........P.p.sYhsLhs.LL..........................p.P.slW..-.pp.G....Nl.PuLlRLLpuhlp+ss..ptl..............ht..t..s.pltslLGlF.Q.+LluoKs...s-....pt...GF........LLpslltph...s...p...lp...a...h.tp.IhhllhpR.L.Q.s....u....+T...s.......c..ahpphl.hF..hs..hhshph.....................usshllphh.-.pl...Q....s..t............hFs.lhp...pl...h...lP....ph..p.p..l....................s.t...h...-.+Klssluho+hL..s.p.s.....h.hs....p.....h.tphWs...hhpuLlt...lhp...........s............t.s.sh...................pcc...h.........h..s....t.............-.s.....................u.......a.ts.u.......aspLshst.t.t..h..Dshsp....l...t...s...sp..alsp...Ltphs.tt.p.s.sth.thht..h................................................................................... 0 103 159 224 +117 PF00690 Cation_ATPase_N Na_K_ATPase_N; Cation transporter/ATPase, N-terminus Bateman A, Griffiths-Jones SR anon Pfam-B_138 (release 2.1) Domain Members of this families are involved in Na+/K+, H+/K+, Ca++ and Mg++ transport. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.10 0.72 -4.51 168 9625 2009-01-15 18:05:59 2003-04-07 12:59:11 21 104 3314 73 3641 8446 271 66.10 25 7.18 CHANGED hpphsscplhppL.......p.ss.......ppGL.op....ppsppRh....ppaG.Nplt.ptctpshhthhlppa.psshlllLlhuullS ...............................................s.pplhpph.................p...s......................ppG....L...os...............p-s......p....c..Rh.................ppa.G.....N.........p..l......s...tpc.....tpsh.......hh..hhhpp.a.ps..shh.h...lLhhuAllS................................... 0 1148 2096 2947 +118 PF01545 Cation_efflux Cation efflux family Bateman A anon Pfam-B_232 (release 4.0) Family Members of this family are integral membrane proteins, that are found to increase tolerance to divalent metal ions such as cadmium, zinc, and cobalt. These proteins are thought to be efflux pumps that remove these ions from cells. 22.80 22.80 22.80 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.80 0.70 -5.34 89 12806 2012-10-02 19:55:49 2003-04-07 12:59:11 16 54 4824 10 4122 10055 1117 268.50 19 81.33 CHANGED lhlulhhshh.hslhclhsuhhs.sShulluDuhashhDhhuthlslhuhphu...................p+.sssppa.......sa..Gat+hEslsulhsulhllhh.uhhhhhp....ulpphlps....................tphphshh.hhh...................h.sllu.....lslshhhhhhhpp.......................................................................................................................................hp..ots....l.pusthchhs...Dshso....luslluhllhhhh.......................hhh..................hDslsulllulhllhsuhplh+puht.Llsts.sssthh...pplpphl...........hsslh.slpcl+hhphGs...phhlslplp.hsssh...shpphcp..........ltpplcptlppphstltp...h..lp.hpsttptp ....................................................................................................h.hhuhhhshh.hh.lhch...h...s...u...h...hs...s.......ShulluDuhc.lsDh............hu.hl.............s..lh..u..h......phu...............................................................p+...s.s....s.tpa.......................sa...........Ga......t.+......hEh.l.uuhhs.u.l.........h..lh..hh..u...h.h.l.lh.c.......ulp+.l...hps.................................................................ptlp.ss...hh...hhh............................................................................uhlu........ll.s...s..h.h..hhhhhtc.......................................................................................................................................................................................................................................................................................t......................................................................................................................................hp...s.s.......l...p...ush..h..c..hhs....Ds.lso....................luslluhllhhhs.........................................................shhh..........................................hDslhulllul.hllh.s..uhplh..pc...uhphLhpt...s....st....ph..........................ppl.p.phl.p.........................hstlh...s.l...c..c..l..+..s.h.......p..hus...........phhhsh+l....h.s..sph...............h..ph.pt.........................................lhpplpp.hl.....tp.p.h......t.lt.t....hh.lp..h-.....t.......................................................................................................................................... 1 1327 2487 3430 +119 PF04586 Peptidase_U35 Caudo_protease; Caudovirus prohead protease Waterfield DI, Finn RD anon Pfam-B_4836 (release 7.5) Family Family of Caudovirus prohead proteases also found in a number of bacteria possibly as the result of horizontal transfer. 20.70 20.70 20.80 20.90 20.60 20.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.00 0.71 -4.22 36 1699 2012-10-01 19:43:34 2003-04-07 12:59:11 12 10 1302 0 286 1376 536 154.10 23 52.03 CHANGED psh-l+stp...pst.....lpGYAshFsp.sp.........htEhltsGAFspsLtp.ts....lhhLasHDt.sp.slGpsps.........cLppDspGLchchcl..sssstuc-l.hphl+pGslsuhShGFpsh.....ppphptps.........hRplpcl-.Lh..ElSlVoh.PAhscuplp......tpshpphtph ............................................................................t.....p..t........tt...hhhpGaushas....s.................ht-hlt.s.sA.h.p..tsh.tp...s..........lhhLa.......pH........c..........s.........p.........s...l...Gpsp...........pl.p.......D.......spG.L.hhcsch.......ss.......s.....s.......tuc-h...h...t...t...l..+....s..G...s.l.suh..S..hGFpst...............p.phppss.......................t........hpplpchc.Lh..ElSl...V..o...h...PA..spsplp......hpt......t.......................................... 0 81 174 230 +120 PF01607 CBM_14 Chitin_bind_2; Chitin binding Peritrophin-A domain Tellam RL, Hutter H, Bateman A anon [1] Domain This domain is called the Peritrophin-A domain and is found in chitin binding proteins particularly peritrophic matrix proteins of insects and animal chitinases. Copies of the domain are also found in some baculoviruses. Relevant references that describe proteins with this domain include [1-3]. It is an extracellular domain that contains six conserved cysteines that probably form three disulphide bridges. Chitin binding has been demonstrated for a protein containing only two of these domains [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.46 0.72 -4.04 209 6920 2012-10-01 20:20:38 2003-04-07 12:59:11 19 153 308 1 4290 7189 6 53.80 23 23.27 CHANGED Cst..........tsGhhss......ssC.spa.hhCt......supsh.......hhpCssG.hhFst.....ptptCshspp.........spC .............................C..........tu.h.hsp......spsC...spa.ahCh.........s.G.p.sh................hhpC.s........s........G........h......hF...st...................ptp...t....Cshspp..........tC....................... 0 1391 1754 3585 +121 PF00942 CBM_3 Cellulose_bind; CBD_3; Cellulose binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1126 (release 3.0) Domain \N 20.60 20.60 20.70 20.80 20.10 19.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -9.79 0.72 -3.96 29 651 2012-10-01 21:34:18 2003-04-07 12:59:11 13 120 165 35 192 639 2 83.10 27 12.10 CHANGED l.hcsusss.ussNtlcs+hplpN.oGssuhsLsclplRYaash-t.thstsahsc.sslu.........susl.hshsphss.ssu.ssaYlEls ................h.hpsssss..sssspIpsphplhN.oGssslsLsclplRYaash-.t..st...s.tshs..sDauplu..............su..sl.p.....tp.hsp.lpsstss..AshYlElu............................ 0 101 152 164 +122 PF02018 CBM_4_9 CBD_6; CBM_4; Carbohydrate binding domain Bateman A anon Chris Ponting Domain This family includes diverse carbohydrate binding domains. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.94 0.71 -4.14 47 1727 2012-10-03 19:46:52 2003-04-07 12:59:11 12 250 613 41 590 1782 213 134.50 15 21.89 CHANGED sshlhsssFEs...................shssWtspsss.............ssssss.....sGphslplsspss.....sasuhhhphs..stlppGpsYplShhsptsss........pplplplphpss.........shpthts......thshs.spWpplpss.aThs...sssssshlhlps..........ssss ...........................................................................thl.NssF-p..............................sh.ss...Wp..s....hs.ss...........................sshsss..................sGs..h.s..l..t....l......s...s...tss..........shss...h..h.h.p..hs....hslp..t....Gp.s.YplShhs+ssss.................tplplplptsss...................htth.ts...............tsshs..spWpp.h..p.hs.ashs.....ssssps.lhht.......sts.............................................................. 0 271 468 540 +123 PF03422 CBM_6 Carbohydrate binding module (family 6) Bateman A anon Pfam-B_1231 (release 6.6) Family \N 21.00 21.00 21.20 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.65 0.71 -3.97 41 1755 2012-10-03 19:46:52 2003-04-07 12:59:11 10 402 465 60 713 1810 331 121.80 21 17.74 CHANGED cAEsastt..uGlshpppss..t.....sGhslshhssG-Wlsast..lchssuushphp.spVAsssus...usl-lclsu.....suslluolsl.ss.TGuWpsa.......tsspsslshss.Gs+slhLshsust......hhNlDahpFsp ...........................................................ht.......t.u.hphtspst....t...........sGhsl...u..h...h.p.s...G....-..a..l..pas....Vs.hs...s..u...u..s......h.....s.hp..h.+s..A.ss.sss...........uslpl.p.lsu...............ss.h.l...u..s.hs.l...s......s.....TGu.......W.p....s..a......................ps.h..s..s.s..l..s..hss...G.s.p.s..lhlhhsuss............hNlDhhph........................................... 0 338 629 686 +124 PF00571 CBS CBS domain Bateman A anon [1] Domain CBS domains are small intracellular modules that pair together to form a stable globular domain [2]. This family represents a single CBS domain. Pairs of these domains have been termed a Bateman domain [6]. CBS domains have been shown to bind ligands with an adenosyl group such as AMP, ATP and S-AdoMet [5]. CBS domains are found attached to a wide range of other protein domains suggesting that CBS domains may play a regulatory role making proteins sensitive to adenosyl carrying ligands. The region containing the CBS domains in Cystathionine-beta synthase is involved in regulation by S-AdoMet [4]. CBS domain pairs from AMPK bind AMP or ATP [5]. The CBS domains from IMPDH and the chloride channel CLC2 bind ATP [5]. 24.00 16.50 24.00 16.50 23.90 16.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.50 0.72 -4.16 850 77225 2010-01-08 14:28:41 2003-04-07 12:59:11 23 464 5124 378 23594 56987 15502 57.10 19 26.04 CHANGED lpcl..hsp..............sshs..ls.s....sslp.cshp..hht.....cpp......hp.tls.V.....ls......cp...........scll..Gl.lohpDllpthht ............................................................................th.hp.................phhs....ls.s..........tslp..cshp.....hhp...........................cpp............................hptlsV.......................ls.......................................-p.............................spll.....GllohcDlhp....t........................................ 0 7547 14981 19849 +125 PF02754 CCG DUF224; Cysteine-rich domain Bateman A anon Bateman A Family The key element of this family is the CX31-38CCX33-34CXXC sequence motif normally found at the C-terminus in archaeal and bacterial Hdr-like proteins [2]. There may be one or two copies, and the motif is probably an iron-sulfur binding cluster. In some instances one of the cysteines is replaced by an aspartate, and aspartate can in principle also function as a ligand of an iron-sulfur cluster [2]. The family includes a subunit from heterodisulphide reductase and a subunit from glycolate oxidase [1] Swiss:P52074 and glycerol-3-phosphate dehydrogenase. 20.60 11.80 20.60 11.80 20.50 11.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.31 0.72 -3.98 61 13777 2009-01-15 18:05:59 2003-04-07 12:59:11 11 93 2942 0 4139 10623 3794 86.00 20 33.05 CHANGED lsaasuC..hhcst........h.pstphhtplhshhshch.h..ptptCCGusuhhsst...pt......huh....plsppplpphpc...h.s.....s-hllssCssChhplc ...........................................lsaassC...hhchh...........................ttps.t.p.s.h..t...p.l.h...t..t...t........s..h.....c.....l...h.....h............t.....t....p...t....C...C.Gt.su.t.h..s.sp..............pt.........hup..............ph..s.pp....p.l....c....t.h.pp...............t...t...........................sc..h..l..lsss..s.s.Chhtl.t....................................................... 1 1454 2943 3646 +126 PF03379 CcmB CcmB protein Mifsud W anon Pfam-B_3059 (release 6.6) Family CcmB is the product of one of a cluster of Ccm genes that are necessary for cytochrome c biosynthesis in eubacteria. Expression of these proteins is induced when the organisms are grown under anaerobic conditions with nitrate or nitrite as the final electron acceptor. CcmB is required for the export of haem to the periplasm. 23.10 23.10 23.10 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.43 0.70 -5.13 13 1869 2012-10-03 10:13:34 2003-04-07 12:59:11 8 2 1817 0 379 1383 1059 208.50 38 96.40 CHANGED hhsllpR-L+lAhRssushhssLhFFLlVlsLhPlulGP-splLuRIAPGIlWluALLusLLuL-RlFtsDaEDGSL-...lhhsshPLthllluKshAHWllTGLPLllsuPLhuLLLsLshsuassLhhTLLLGTPsLShlGulGuALTVGL+RGG..lLLuLLlLPLhIPlLIFusuAlpsuuhs...hshss.hhlLuuhhlsslsLuPFAsAAALRl ..................................................................htlhth-Lpluh..R...p...tuplh....ssLhFF.Ll.V....Is..L.F.PL...u..l....G....P......-.......s..p.....L.......L........s..+...lu.PGll.......W.luALL.uuL.L...u.L..-RL.FR.....sDhpDG..oLEp..lhL...t..s..hP..........L..s.hllLuKlhA...H...W....l.lo.G.L..P.L.ll.lu.P.l..l..u.l.h.L...s.....h.....s...s..t....u....h....t....h....h....h.L....o..L.L..L...G.T..P.s.L.u.h..lG.AlGsA.LTlGL++G.G....lL...Lul..LlLP.L.hlPlLIFu.su...Ahsuushs......hshs...u.......L....t.l....LuAhhhh....s....h.s.L.s.PhAhuuuL+........................................................ 0 109 232 305 +127 PF04103 CD20 CD20-like family Bateman A, Moxon SJ, Pollington J, Finn RD anon Pfam-B_1979 (rel 7.3), Pfam-B_10092 (rel 9.0) Family This family includes the CD20 protein and the beta subunit of the high affinity receptor for IgE Fc. The high affinity receptor for IgE is a tetrameric structure consisting of a single IgE-binding alpha subunit, a single beta subunit, and two disulfide-linked gamma subunits. The alpha subunit of Fc epsilon RI and most Fc receptors are homologous members of the Ig superfamily. By contrast, the beta and gamma subunits from Fc epsilon RI are not homologous to the Ig superfamily. Both molecules have four putative transmembrane segments and a probably topology where both amino- and carboxy termini protrude into the cytoplasm [1]. This family also includes LR8 like proteins from humans, mice and rats. The function of the human LR8 protein is unknown although it is known to be strongly expressed in the lung fibroblasts [2]. This family also includes sarcospan is a transmembrane component of dystrophin-associated glycoprotein. Loss of the sarcoglycan complex and sarcospan alone is sufficient to cause muscular dystrophy. The role of the sarcoglycan complex and sarcospan is thought to be to strengthen the dystrophin axis connecting the basement membrane with the cytoskeleton [3]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.79 0.71 -4.28 80 1022 2012-10-02 01:14:40 2003-04-07 12:59:11 10 9 113 4 533 1008 0 133.70 19 47.50 CHANGED sLGsl...QIhlGlhplslG..hlhhhhh.s.........hhhhsGhPaWuulhalluGsLu.lsuppcss....phllpsslshNllSslsAhsulllhshslthtp.hhth.........................................tshhhstph.......htulhsslLlhslLphhlulshshhss ........................................LGhh...QlhlGlhhh.sh.G...hh.h.hh.h.hs..................hhht.uh.sh.W.u..uh..h..hll.uG.lu.lhs.....tppsp...........phh.hp..s.phshs.l.lus.hh..u.h.......s........uhhlh..shsh......th.t.h.............................................................................................................................h..............hhsl.hhhh.hhshlphhlsh..shh.................................................................................... 0 125 173 258 +128 PF05179 CDC73 Cdc73; RNA pol II accessory factor, Cdc73 family Wood V, Bateman A anon Pfam-B_6394 (release 7.7) Family \N 25.00 25.00 25.70 27.20 22.40 24.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.79 0.70 -5.04 6 336 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 288 1 245 334 3 259.70 26 58.02 CHANGED Tcsplhlpstchh.thslhshhpuspspp-.s+sp.ssss.sostscc.plctpptp..upaspaspcphss..p.scthcIsshGSh+GssLsulcpG...........................h.....ssupu+tstss.suK+ssRsPIIllPSAsoSLIThhNlK-hLp-hpaVP..sst++tpGsp+ssplslQ++psp.p........ThsacVlDsspKLp.P--WDRVVAVFVhG.sWQFKsW.W...s.sPs-IFs+IpuFal+aspD..cssssVppWNVchlplSpsKRHhDRsVhpphWEoLE+altp+ ........................................................................................................................................................................................................................................................................................t........................................h.st.hh..t.........t...htthp..s.shup....h...t.t....s....h........hppt..........................................................................h.....s.s.t..s..t.....sp...p.........t.......p.....p.t.p....p..s..PI...I...lls.s..u..s........o..S..LlshhNsKp..hL.p-..tp..a.ls........spp..t...p...t....p.........s....s...t...p.p..s.l.h........lp..+......p.h.p.ph..............................shp...ah...lV..Ds..s...t.ph..........p.....s...........c.WsRVVAVF..sp.....Gts...W...QFKsa....W.........s.s.P.....s.....-lFp+...........lpGaalpacsp.......phstp.V..ppW.................s..Vphl..p..l.......sc................p..K...............RahD+tsh.pFWcpl-.chhh............................. 0 84 131 200 +129 PF01066 CDP-OH_P_transf CDP-alcohol phosphatidyltransferase Finn RD, Bateman A anon Pfam-B_651 (release 3.0) Family All of these members have the ability to catalyse the displacement of CMP from a CDP-alcohol by a second alcohol with formation of a phosphodiester bond and concomitant breaking of a phosphoride anhydride bond. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.44 0.72 -3.71 124 11822 2009-01-15 18:05:59 2003-04-07 12:59:11 16 43 4879 0 3729 8699 4901 147.30 20 63.29 CHANGED hhhhPNh..lT...h....hslhh.uhhsshhhhhs.....ph..........................................hhushhhhlshlhDslDGtlARthspsSt....hGthlDshsD........thshshhh....................................................................hslhhhhshhhsshhhhhhht .............................................................h...hPNhlT.l....hp.lhh.....s...h.h..h.hh.hh.hs.....th..................................................................................................hhusllhhlusl...hDhl.....DGhlAR....p.h..s.t.s..op............hGthLDs..luD.....plh.hs..sshhhhh.........................................................................................................h.h...............................hhhhhhhhhhhhh.......................................................................................................................................................................................................................................................... 0 1273 2397 3165 +130 PF00150 Cellulase cellulase; Cellulase (glycosyl hydrolase family 5) Sonnhammer ELL anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.05 0.70 -5.20 65 5309 2012-10-03 05:44:19 2003-04-07 12:59:11 13 205 1466 165 2106 7158 527 265.90 14 56.85 CHANGED lstsGpsh.....phhGhsst.........W..tsth.....stpshhphhpshGhNslRlshs......h.........ssah.....s.s.sh........s.......thhsclcpllshu.hspGhYl.IlDhHp........................................ts.tsshss............spsh................Fpp..........h..lAs+a...ssss.pllaE..lhNEPps.st..........stW...............stlpshsppslssIRss.uss...phIllsssp..............................Wus..........ssst.shs.......P..................ts..........tsplhaohHhYs.ssphs.t....................................................spthpsthphh.hspGh.slhluEaGss.....ssss.................tspsstW......lshh......ppp.slshsh.Wshssps ...........................................................................................................................................h.............................................................pp.h.p...h.h....p..s....hG.h.shl.R.l..s.ht.........................................h.................tth.........................t..sh.....................................hst........................................s.h.h.p..t...l..c..p........h...l.p.h......s...pp..t...G..l..h....l.....l...l.....-....h....Hs.................................................h.......................................................t..t.............t.....p.......tt...............................st.ph.............................................................................ht.p.............................h.h....p....t....l....u....p........p........a................s.....p.....s...s....hl.......h........a.-...l.hN....E...Pts........................................................................t..h...t....t.h..h....p...phh...p..t..l+..........t...h....s.sp.........thl....hl..s...s....t..............................................................................ast.................................t.t......hhp................P.....................................t.p..h.h.....h......s.h.H.....Y....s.................................................................................................................t.t.......h.p..t..h...h....t.......h.......h..p..t...s......h....s...h...hls..E......aGh......................t.......................................h....................ht.h...................t.......................h......t................................................................................................................................................................................................................................... 0 829 1465 1881 +131 PF04218 CENP-B_N CENP-B N-terminal DNA-binding domain Bateman A anon Bateman A Domain Centromere Protein B (CENP-B) is a DNA-binding protein localised to the centromere. Within the N-terminal 125 residues, there is a DNA-binding region, which binds to a corresponding 17bp CENP-B box sequence. CENP-B dimers either bind two separate DNA molecules or alternatively, they may bind two CENP-B boxes on one DNA molecule, with the intervening stretch of DNA forming a loop structure. The CENP-B DNA-binding domain consists of two repeating domains, RP1 and RP2. This family corresponds to RP1 has been shown to consist of four helices in a helix-turn-helix structure [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.33 0.72 -4.54 6 534 2012-10-04 14:01:11 2003-04-07 12:59:11 8 19 145 3 370 898 32 51.80 26 11.42 CHANGED ++pRssLThcEKlclIpphE-sp..S+sslA+caslstoTlpsIlcpKcplLpth ..........................p.+ps..Lolc-Klcll.pp.l..-p.Gp......s.pspluccaGl.scoTlps.lhK.s.+cclh...h.............. 1 79 158 248 +132 PF04734 Ceramidase_alk Neutral/alkaline non-lysosomal ceramidase Mifsud W anon Pfam-B_3385 (release 7.5) Family This family represents a group of neutral/alkaline ceramidases found in both bacteria and eukaryotes [1,2,3]. 19.40 19.40 19.40 19.50 19.10 19.30 hmmbuild -o /dev/null HMM SEED 674 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.05 0.70 -6.65 42 810 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 474 3 419 771 118 444.80 26 82.17 CHANGED sYllGsGpADITGPssElshhGYAshpQhusGl+pRlauRAFIlu..ppss.........pRhValshDsshhspuV+hsVLctLps..phs.shYscpNVslouTHoHuGPGGahpYhLhpl.....oohGFscpsapAlV-GIlhSIp+AHpsLp....PGplhhups-ltsAslNRSshAY.sNPpcERupY......stsVDKphTlL+hsc.ssspslGslsWFuVHsTSMsssNpLlSGDNKGhAAalaEcphp.....................................................................sp.ss.tssFVAuFuQoNsGDsSPNlhGshC.p.oG..Cph.pSoCs.utsthChupGP..tt..sthcSsplIGc+QaptAppLas.....susp.lsGs..VcshHtalDhsshshs..........sthssss..h+TCsAAhGaSFAAG.ToDGP........G................h....FsFsQuss....pssP.............hWphlpshl..t.PotcpppCQtPKPI..LLssGphs.PYsWsPsIlslQllRlGpLhllusPuEhTTMuGRRhRcsltsshtssh................p.pVVluGhuNsYupYlsT.EEYslQRYEGASTLaGPaTLsAYhp.htphhsuLssu......tss....ssGP......pPPs.p.scplohhsuV.laDstPhspsFGDVhsp..ssss.YphG.-s.VsssFhuuNPRN..sL+p-sTahtVE+hp...................tsssWpsVtsDsDWshhacWc.......Rsssh.s..tScsTlpWpI.........PpsstsGsYRl+aaG.shKshhsu....lpsapGsopsFpVt ..................................................................................................hGhshhDhT...s.........h..thshhG.Y..up......p.ph..s..tGlcp+lhuRualht........tt.....................tp+hlh.ls.hDhhhh......p.....t.....lp.....tlhp.p.ltt.......tht.s..a..p..pp...sl...hlsuTHoHu.u......P......u....u..h....h....t.....h.h..h.th.........s.t.....u...a...p.t..........h..p.h.l.V.s.u.....lh...pul.pApps..lt..........s.upl.h.sp.s..............pl....s...ss..h.N.R.......s.........................s....a...........t....NP........t...pt.tth.......................ttssDtphshlph.p.......ss....p.....h.Ghl..sa.ass.Hs....s.s..h.t......s.s......s...p.....hlouD...hG.h.sshhhEp.ht.......................................................................................................................................................................h.......up.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s................................................................................................................................ 1 157 259 363 +133 PF03859 CG-1 CG-1 domain Bouche N, Bateman A anon Pfam-B_18451 (Release 7.1) Domain CG-1 domains are highly conserved domains of about 130 amino-acid residues containing a predicted bipartite NLS and named after a partial cDNA clone isolated from parsley encoding a sequence-specific DNA-binding protein [1]. CG-1 domains are associated with CAMTA proteins (for CAlModulin -binding Transcription Activator) that are transcription factors containing a calmodulin -binding domain and ankyrins (ANK) motifs [2]. 23.10 23.10 23.80 30.40 22.80 22.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.57 0.71 -4.56 15 270 2009-09-10 19:07:15 2003-04-07 12:59:11 11 34 93 0 157 282 2 111.00 51 10.85 CHANGED lhpEs+pRWL+sp............ElhtILt..sac+athh...pssp+PtSGSlhLasRKVlRhFRKDGHsW+KKKDGKTl+EAHE+LKVGs...................l-sLpCYYAHu..-psssFpRRsYWLLppshpcIVLVHYhcVp ........................................h...p.phRW.psp............EIsthLh....saccap...h.....pP.sRPt............sGSlhLasRKhl+..aRK.DGasW+K+KDGKT...s+EsH.KLKVtu...................h-s..LashYsHu......p.sPsFpRRsYWLLp.ps..cIVLVHYhpV.............................. 0 32 82 120 +134 PF00307 CH actinin-binding; Calponin homology (CH) domain Finn RD anon Prosite Domain The CH domain is found in both cytoskeletal proteins and signal transduction proteins [1]. The CH domain is involved in actin binding in some members of the family. However in calponins there is evidence that the CH domain is not involved in its actin binding activity [4]. Most member proteins have from two to four copies of the CH domain, however some proteins such as calponin and Swiss:P15498 have only a single copy. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.54 0.72 -4.01 194 10780 2012-10-03 10:10:54 2003-04-07 12:59:11 26 564 496 152 5898 10465 47 104.80 20 12.85 CHANGED ccshlpWls.ptht..t....................................................................th.tlp.......sh.tpslpDGhhLspLlctl.pP...ph.........h.shppl..........p...........pphcNhphslp...hspc.hGhshhh........sspDlh.....pss.p........llshlhplhphht ................................................................................................................................................................................................pthhpWhp.ptht....t..................................................................................................th.plp.................sh..tpsh.p......D.G.hsL..s..tL....l....c.t..l...p.P.....sh........................................l..s.h.ppl..................................................p..................p...thc.Nl..p.h.slp...............hsc......p..........h.Gl.phhh...............sscDls............css.p.............llshlhplhth..t.............................................................................................. 0 1688 2487 4051 +135 PF04420 CHD5 CHD5-like protein Bateman A, Wood V, Mistry J anon Wood V Family Members of this family are probably coiled-coil proteins that are similar to the CHD5 (Congenital heart disease 5) protein. In Saccharomyces cerevisiae this protein localises to the ER and is thought to play a homeostatic role [2]. 29.30 29.30 30.00 29.50 28.70 29.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.77 0.71 -4.57 27 271 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 233 14 174 269 0 152.20 27 75.98 CHANGED lhslFhl.lhphLlsshtsutlspllhhhh.t.......spphpptpphpcElhpl+pEhsshSuQDcaAKWsKLpRch-Klpp-lcphspplsuppsphcthlphhhhlhssshhhhLphaatKsPlahLP...pshhPhhlchllu.....hPpushG..........................uVSlshWhhssss .................................lhhh.lhh.llssh.ss...hlsphl..h..........ppssppppph+tEltph+cEhssl...Ss...tD...EFA+aA...K...L...cR+hsKhpccLcs...........hspphsuppsphchhlshshhlhpsshhhhLhhhat.psPlhhlP...psah..h.lphllu........FPpsshG........................uVulssWhhsCt........................................ 1 44 83 135 +136 PF03067 Chitin_bind_3 Chitin binding domain Bateman A anon Pfam-B_2364 (release 6.4) Domain This domain is found associated with a wide variety of cellulose binding domain. This domain however is a chitin binding domain. This domain is found in isolation in baculoviral spheroidins and spindolins, protein of unknown function. 21.20 21.20 21.40 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.76 0.71 -4.02 128 1556 2010-01-08 15:51:16 2003-04-07 12:59:11 10 34 838 15 481 1278 28 176.70 28 53.03 CHANGED HGalpp.....PsuRshhCth.ss....................t....t...............................tsu.sspap.psh.Eusts..............h.pts.s..........DGplCuA.Gs.........s...phss....lD.ts...ss.pWtps...sl..........pssts..hshpaphT.AsH..tsshacaalT...Kssasssps..LshssL-hh.sh.........tt.t...........ts...ssstpa....ssslPp.......RsGppVlhshWphu...Dsss...................uFYsCsDVs ...............................................................................HGalpp.....P.s.SRshhCt.sss.......................................................s.....psG...ssta.ps..pul.Eusts...........................h.tts..s.......DGplsuu.Gs................t........thss.L.D.tps....us...cW.h+s..sl......................psG.s......hshpWphT...As..H....ts........upacaY.l..T........Kssa....s...s...sps....Lsh..ssh-hh...sh.t..h.....st.t...............................................................s.....sssssa......psslPss........RsG.hpV......l.hshWphu...D..oss..............................................................uFYsshDVs........................................................................... 0 173 287 412 +137 PF00379 Chitin_bind_4 insect_cuticle; Insect cuticle protein Finn RD anon Prosite Family Many insect cuticular proteins include a 35-36 amino acid motif known as the R&R consensus. The extensive conservation of this region led to the suggestion that it functions to bind chitin. Provocatively, it has no sequence similarity to the well-known cysteine-containing chitin-binding domain found in chitinases and some peritrophic membrane proteins. Chitin binding has been shown experimentally for this region [1]. Thus arthropods have two distinct classes of chitin binding proteins, those with the chitin-binding domain found in lectins, chitinases and peritrophic membranes (cysCBD) and those with the cuticular protein chitin-binding domain (non-cysCBD) [1]. 21.50 21.50 21.50 21.50 21.00 21.40 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.69 0.72 -3.70 157 3539 2009-01-15 18:05:59 2003-04-07 12:59:11 18 25 87 0 2490 3895 1 53.00 32 25.39 CHANGED YpasYpsssu.tt.....pppup...............tsstslp.GsYoahssD.Gp.hhsVp.Y.sA.Dcs.G.F ..........................................YpasY..ps...pD..s...ps..............pppsp............................sussVp.GsYo..h..hssD.Gp..hpsVp.Y.sA.Dc.s.GF.......... 0 942 1256 2169 +138 PF04968 CHORD CHORD Finn RD anon Pfam-B_1217 (release 7.0) Family CHORD represents a Zn binding domain. Silencing of the C. elegans CHORD-containing gene results in semisterility and embryo lethality, suggesting an essential function of the wild-type gene in nematode development [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.57 0.72 -3.73 47 678 2009-09-11 00:43:51 2003-04-07 12:59:11 7 14 243 3 422 649 3 62.90 45 38.49 CHANGED hstpCpNtGCsppap.ssc....s.scssChaHPGsPlFH-uhKtWoCCpp+...shDFs-FhpI.GCsp.GpHs .................h.htCpppGCu..p....pas....s..s..p...............s...pc-sCtYHPG..sP..lFH.........-uh...K..............uWSCCcc+...shDFspFLsI.GCsp.GpH.................. 0 135 212 319 +139 PF02017 CIDE-N CIDE-N domain Bateman A anon [3] Domain This domain is found in CAD nuclease Swiss:O76075 , ICAD Swiss:O00273 the inhibitor of CAD nuclease. The two proteins interact through this domain. 21.80 21.80 21.90 22.20 20.80 21.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.51 0.72 -4.21 7 382 2012-10-03 10:59:06 2003-04-07 12:59:11 10 9 81 7 199 371 0 76.00 38 26.28 CHANGED p.+Ph+lpshcpsh++GVsApSLpELlsKst-hhtlsp..tssoLsLtEDGT.V-sE-aF.sLscsTchhlLttGppWps ....................+Ph+lpstpRsh++GlsA.soLpELhp..K..........sp..........c..t..L.tls................tsssL.VL..-.EDGTtV-sE-YFpsLssNTphhlLppGppWp................... 1 36 51 107 +140 PF02487 CLN3 CLN3 protein Mian N, Bateman A anon Pfam-B_1060 (release 5.4) Family This is a family of proteins from the CLN3 gene. A missense mutation of glutamic acid (E) to lysine (K) at position 295 in the human protein (Swiss:Q13286) has been implicated in Juvenile neuronal ceroid lipofuscinosis (Batten disease) [1]. 24.50 24.50 25.00 25.10 23.60 24.40 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.29 0.70 -5.69 2 477 2012-10-03 03:33:39 2003-04-07 12:59:11 12 13 234 0 293 463 9 272.00 30 81.25 CHANGED .hhhFWLhGLhNNh.YVVhLSAAhDIlu...............P..shs.......pSl....VLLADIhPoLhIKLhuPhhlchl.YS.Rlhs.hhhushuhhLVuF.+slhssLhGlshASISSGhGEVTFLpLTtaY.phslshWSSGTGGAGlhGuhSYhhLTp.htlssphTLLsh.hlP..hhh.aaFhLpSs-sp.shtp.pt.spAcps.lss..ss.o+ss......Sop.pl.pphphh+tLla.YhVPLshVYhhEYhINQulh.hLhF..s............hpatp.YhhYthLYQhGVFlSRS.hHhhRhR.halLAhLQslNLshhllpsWF.hh.S.ahVhllIhYEGhLGGAuYVNTFhNIh.ppsspcpEFAMuAssIuDohGl.LuuLLuLsLcshLC+hp .....................................................s.......................................................................................................................lllsslhPshhh+h..h..P.h.hh...hl.a.......Rhhh.hh.hthh.uhh.......hl....uh..........s..............t.......s......h................h..t......l..hGlhhsShuuuhGEhshLt.h.st.ha.........................s...l.shauuGTGhuGlhGuh.a.h.hhpt...hths.t...shh......h.s....h.a......hh.....h.....l..............................................................................................................................................................................................................................................h.......t.....p.h......h....h..........h....hh..hh.hPLh.hVYh...hE...Yhlspul..h..lha................p.h..t.t...Y.hh.thhY....Q.......h.....G....VFhSRS....o.........h...............h.........h..........p........h...........p......hhh....shhQh...h.hhh...h.h..........hts...........h...h..................h.......h.........p....................h.......hlh.hl....hhh.GhhGGhsYVNs.a.............h............h........p.........................................t..............p..phshshhs.s.shuh..hushh.......................................................... 0 118 172 253 +141 PF02861 Clp_N Clp amino terminal domain Bateman A anon Pfam-B_102 (release 6.0) Family This short domain is found in one or two copies at the amino terminus of ClpA and ClpB proteins from bacteria and eukaryotes. The function of these domains is uncertain but they may form a protein binding site [1]. 20.70 7.40 20.70 7.40 20.60 7.30 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.23 0.72 -4.03 167 16656 2009-09-14 13:11:56 2003-04-07 12:59:11 15 55 4576 80 4358 12959 3258 51.20 24 11.11 CHANGED A.pphApphscpalssEHLLluLlppspuhstplLpphGlshpplcptlpphhs ...................Atp.Apphsc.palssEHlLluL....l.......c....p....s..p.......u........s..s......s....p..l....L..p....p.h...G.l.s..h..p.pl.cpt.lpphh................................. 0 1453 2953 3792 +142 PF02353 CMAS Mycolic acid cyclopropane synthetase Bashton M, Bateman A anon Pfam-B_862 (release 5.2) Family This family consist of Cyclopropane-fatty-acyl-phospholipid synthase or CFA synthase EC:2.1.1.79 this enzyme catalyse the reaction: S-adenosyl-L-methionine + phospholipid olefinic fatty acid <=> S-adenosyl-L-homocysteine + phospholipid cyclopropane fatty acid. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.72 0.70 -5.32 12 4148 2012-10-10 17:06:42 2003-04-07 12:59:11 15 22 2427 44 1236 14323 7479 263.10 35 68.06 CHANGED pchcpphcslptHYDlSsDFFpLaLDPohTYSCAYFcc.......sDhTL-EAQhsKlDlhLcKLpLpPG.pLLDlGCGWGuhhh+AspcYDVpVlGlTLScpQhphspptlsphshtcphclhLtsac-hs-....hD+lVSlGhFEHh.......Gh-pYssaFptsapll.ssGhhLLHoIsshc.cphs.tth..........cFIsp.IFPGGpLPolptl.p.pspcsGFplhchpsLp.HYAcTLchWs-sLptph-cAhsl.huEchhchahhYLsuCAttFRhGhlsltQh ......................................................................................p...ppshpsl.ptH.Y...D...l....u....N..-...a..ap.h.a..L..D.s.s....M....p..Y.S......C..Aa...a....c.s......................................s..c...p....o....L.....-...p...A....Q............s.....K..l....c...h.......l....h......c...K.....L.....p.....L.........c.......P......G.........t......c.....l.....LDI..G..C..G...W...G...u...h.....s......h...h...A.....A.....c.....c.........Y......G...s....p...V...s..G.......l.......T......l......S.p......-......Q....h.......p.........h....u............p.......p.....+..........h....c............p......h.........s......h......t..........c.........p..........l...........p.......l.......h.....L......p.....D...........Y......R........-......l.....s..s.................pa..D..+.......I....V......S.....l....t...M..h...E...H..V.......................G...h.....c.....p.....a....s...s....a....F....p...p...l....p...c...h.L.c.s.......s...G..h..h..........l........l......H.....s....I....s....t..t....c......p....t....h...t...s............................................sa.....I..p...+...Y.....I......F.....P......G......G.....h.....L...P.....o.....l.....p.....p...l...h.....p...........h......p...c...s.......s.....h........p.....l......p...c.h....c..s......h....t....h....H...Y......s......c..T.L.pt..W.......tcp..ap............s.......p...h...............s..................c............l....h...................t............h........h..............s...........-........c........a........h.......R........h....W.ph....Y....LsuCssuFct..t.tlplhQh.................................................................................................................................................... 0 353 757 1043 +144 PF00780 CNH CNH domain SMART anon Alignment kindly provided by SMART Family Domain found in NIK1-like kinase, mouse citron and yeast ROM1, ROM2. Unpublished observations. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.79 0.70 -4.98 62 1933 2012-10-05 17:30:42 2003-04-07 12:59:11 17 73 285 0 1105 1799 2 265.10 21 23.92 CHANGED sssh...stspplllG.T-pGlalhsh.............t..ht.sphhph.t......lpQltVlpphslLlhl........u......s....cpLhsasLssLpstp.........t.....................plsts+ssphhs...........sshstsphLssuh+p.....slhlhchhp.shppht......................chhp-..hths................s.shslphh.......csplClGs...........ppsFcllsls.p......sts..tslh.p.sstt..........h....tthpslshhpls.......p.s...chLLCasphuhaVst...pGt......psc....shtlpWss...tPpuhsh.htsalluFps.sh.lElRslpss.....cl...hpplssppl+hlssss ............................................................................................................................................h......t...lllG.s-.p.G...l...hhhph........................................t.sph..p.hhs................hpQltl....l..pt..s........l.Llsl................u...................................s......pplhha.LstLpsp..t..............................................................................................pl.schc.G.sthhp.............hsth....pt....h....p........h...L..s..sAl+p.............plhlhp.h..t.....t...s..htph..............................................................p.hp-...h.p.hs................s.shslshh............p.p...lslGh.....................................sss.Fp..hl.sls..p...............uss.....slh.....sp.p..........................................t..h.....p..s.h...t.lh.pls...............................p..s........chL.l....s....a.....s.....p....huhaVst.............pGc....................hsc......p..lpW.s.t.........h.Pp.u...h..............s..h...t.........t......s.......lhu.a...tp....pu...l-l+.s...lpss......pl........hppht.h.pphphh....s............................................................................... 1 320 476 785 +145 PF00027 cNMP_binding Cyclic nucleotide-binding domain Sonnhammer ELL, Finn RD anon Prosite Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.62 0.72 -4.17 342 26013 2009-01-15 18:05:59 2003-04-07 12:59:11 24 502 4562 267 10760 22035 3366 90.20 18 23.38 CHANGED hpphptGphlhppG-....sstlallhpGplclhptspssp............................hthhtsGshhGchshhtsp.........tsssshA.....hsssplhtlsppphpplhpppsp .......................................................h...httGph.l.h..p...p.......G.-...........ss..p...h.......ah..l.h.p..G.......p......l..c......l...h.t.t...s...p..s...s.pt......................................................................................................hht.hh.t..s....G....s.....h..h..G...c......h....u..l..hpsp..........................psss.spA.............hs.s..s.p.l.h.tl.s.t.p...phpp..lhtp...t......................................................... 0 4474 6939 9042 +146 PF02629 CoA_binding DUF184; CoA binding domain Mian N, Bateman A, Griffiths-Jones SR anon COG1832 Domain This domain has a Rossmann fold and is found in a number of proteins including succinyl CoA synthetases, malate and ATP-citrate ligases. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.99 0.72 -3.47 70 6798 2012-10-10 17:06:42 2003-04-07 12:59:11 14 37 4404 64 1756 6726 3805 96.70 32 28.56 CHANGED scpsclhlhGhosp..phs.aphppthphshp.hlhulsPp.csG.pplt.........GlPlatslc-hhcch......t..sssulIhV....PushAtcsl.Etlc.Asl+slVslo.G .......................................cpo+Vll.G..ho...G....p..pth..a...+.s....p...p...t....h....s......h.......G.......sp....hlh..G...l.sPt...+....s....G....p.p..ht..........................................GlP.V...a...s..s..l.c...-..shpph....s......ss.sulIhV......P...s...sh...s...t...c...ulhE.ul.....-..AslchllslTtG.............................................. 0 610 1119 1487 +147 PF01144 CoA_trans Coenzyme A transferase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.19 0.70 -5.16 43 6881 2012-10-04 00:26:15 2003-04-07 12:59:11 18 14 2405 102 2146 5554 1112 208.60 24 73.96 CHANGED hpphspAlu.plcDGthlhlG....hG.hP.sLlsslhcps.....hsshshlpsss..Gh....hGluslhhstplccsl....ssh.hsp.......shhhspphhsuph.phchhsQGshh-thhhGGstls..u.lsssslG.shhttt......................phhshs..........Gtthllptuhpsslshl+ttptDthuplhacsostphs.shhs...........sthlT.hpV.cls...hsph.s.phhhPGlhl-pllts ..................................................................................h..phtptlu.tl....pDGthl.s.....l...G........hG..h.........P......sLl....s.s.l.h..pp.......................psh.shl.psps.....Gh...............hG.l.G...h.......h...p.p..t..p....l..cphl..........ssh..ssp.............shhhspt.hhs...u...p...h....ph.t.hspGs...hs-hhh.hGuht.....ls..G.lss.h.u...l...s...s.htst................................................................................pl.h..s..hs.................G..t..h...h.L.h..t.t....u..h.p.h.s.l.sl......h..c.t.pp.s....Dt.....pu.....plhh....c....psh..h..s......sshs.......................shlT.lpV.c..ls........h.s...p...h.......s....p...hhPGlhV-plh..t.................................................. 0 620 1257 1756 +148 PF02514 CobN-Mg_chel cobN-Mg_chel; CobN/Magnesium Chelatase Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_647 (release 5.4) Family This family contains a domain common to the cobN protein and to magnesium protoporphyrin chelatase. CobN is implicated in the conversion of hydrogenobyrinic acid a,c-diamide to cobyrinic acid [1]. Magnesium protoporphyrin chelatase is involved in chlorophyll biosynthesis [2]. 18.40 18.40 18.50 18.50 18.20 18.30 hmmbuild -o /dev/null --hand HMM SEED 1098 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.88 0.70 -13.70 0.70 -7.17 129 1875 2009-01-15 18:05:59 2003-04-07 12:59:11 11 21 1069 0 690 1853 1652 877.50 30 85.15 CHANGED lhpYhttGG......tcNhpshlphLspph..........thts..........tsPhthPp.h.GlY+.st.........thhtt.t.h.t.....................t..tssVGllh.aRuhhhuushshhDul..lcsLEpcG.hsslslassu.........tstst..ltphhht...........tlD.sllshsuFulsssss.t............uhphL.ppLsVPllpslhh..hpohcpWpsuspGLs..sh-luhpVslPElDGtltsh.slut+pt........s...t..h.....tthtPhs-Rlchlss+stpalpL+cpsss-K+lAlllaNaP.sp..pupl.GsAs...hLDs..sSlhplLptLpppGYsl..........s...lPps.scpLhctlh.tthsss..h.st.t.h.p......hh.hshpcYhpaasp...Ls.thppclpppWGssPG..........t....hlh....t...............hhlsGlphG.NlhlulQPsRG.....hpt..Ds.h...phhHssslsPsHpYlAaYhWLcct.....apADAllHhGsHGoLEaLPGKplGLSssC.aP-hllGslPplYsYhlNsPuEGs...AKRRutAsllsHLoPPlspAuL..YssLtcLcphlccYppst....t.sst..ctptl...tppIhphsp.phsLsp.-...........................................................................................................................shtphlsp..lcs...aLp-lcps.IspGLHlhG..psPp.......s-ph...sphlhshhphs............sh.t..h.....................................thtttphhphlpthstth......t..................................................................................................................................................................................................................................................................................................................................................................................h..hh.thh.tl.....ssspElsuLLpALsGcalsPGPuGsPsRs..-lLPTGRNhauhDPptlPopsAaphGpchA-pLLc.cahp-p..Gc..aPcslulsLWGossh+otG-slAQhLtLlGV+P.....laD.ssGRVsslElIPLs-L.........GRPRIDVslplSGlFRDhFPphlpLlDcAlphlAs..................h.D.Ess-.Nhl+ppsht..............t.t.s.....p.uthRlFussPGs..YGu.G.lsthl-uu.sWcscs-Lu-sYlstpuaAYG.......t.s...s........tu...........p...pspc................shcptLpsl-sshpsp-opE...hslhDsDcYapahGGlstAlcpls..Gp.pPshYhuDps...pssps+l+oLp-pls+.hRoRlLNP+WlcGMhcHGYcGAtElutpl-thaGasATs.stVscahacplscsalhDcp...hpcalpp.tNPhAhpshspRLLEAtpRGhWp.sssphlc ..................................................................................h...ah..Gu.......tNhtthh..hh.t.h......................................s..hs..t......u........l..ht..........................................................................................................tts.hlullh..+shh.ss.s.hshhp...slhptl......ct.tG.hps.lslas.u...........................t.............l.ph.h.t.....................ss.hlls.hh.t....hs.hss.......................t.thh...tth..s.hP.l.hpsl.......htshttW.t...t...s.....pGls.......sh...phshpls.l.PEhDGtl.sh.shuhpp................t......h.......th.s..cRhthlsthshpahpLpphsstp++lAllhhsaP..s.......t.....psplusu..s....hLDs..t..S...hhtlLpthp..ttGYpl.............t.......h..........P...............t.................s....sptLhp.lh...........s.....s...............................hsht...pY.th..htt...l....thtpth....tt.WG.s.s...............................t......hh..........t..tt.......................hhlsuhphG...NlhlhlQPstG........ht.t....ss...t.hHs...shsP.HtYhAhYhWlpp..............................atu-AllHhGpHGsLEalPGKthulSt.t.C.aP-hhlGslP.lY.ahsssPuEus...AKRRuhA..sllsalsPPhtpAth..Yt.t.ltcL.cpllsca.ph.........sst......ch..l..............tppIhph..ht..thtlpp-h............th...............................................................................................................................................t.thltc..lcthLt-lcp.t........l.tGLHlhGps.s.p.......s.pt.h...hthlhshhp..................sl..............................................................................................................................t....ht.ht......t........................................................................................................................................................................................................................................................................................................................................................................................................................h.....hhtt.h..h.tl...tsstEhtslhpuLpGtal.suPuGsPhRs..plLPTGRNh.auhDsptlPo.hAhphGhthAppllp.pahtc........p..Gp.aPcslulslWGossh+otGpsl.AphLhLhGlcP.....hhs..st............tRl.......s.sh.cl.ls.....tpL......................sRP......R.l..DVslp.lSGhFRDhFs.p.hthlDpAlphsAt...........................................h...-.Es.c.N.ltt+s.t...................................t.s.................ptuthRlFustsGs...Y.Gu.G.lpthl-s......t...tW...........ps...............ct-....Luc.sahshtuaAYu..............t..t................u...........t......ttt.........................hhpttLpphphshps.-spE...hslhsss...caapa.GGhhtsspthp...........G......p...t.......st....h......a..hsDp.s...p...pp..sphcslp-phth.hRs+hlNPcWhpuhhpHGYc.Guh-h.s.t.pls.hhGa...........sAT...s.thl.....s...sahap.tltpsalhD.t...hpphhtp...N.PhAhpphs...t+hlEA.pR......uhWp.ss.p...t............................................................................ 1 220 463 581 +149 PF02492 cobW CobW/HypB/UreG, nucleotide-binding domain Bateman A, Mian N, Bashton M anon Pfam-B_428 (release 4.0) & Pfam-B_1247 (release 5.4) Domain This domain is found in HypB, a hydrogenase expression / formation protein, and UreG a urease accessory protein. Both these proteins contain a P-loop nucleotide binding motif [2,3]. HypB has GTPase activity and is a guanine nucleotide binding protein [3]. It is not known whether UreG binds GTP or some other nucleotide. Both enzymes are involved in nickel binding. HypB can store nickel and is required for nickel dependent hydrogenase expression [1]. UreG is required for functional incorporation of the urease nickel metallocenter.[4] GTP hydrolysis may required by these proteins for nickel incorporation into other nickel proteins [1]. This family of domains also contains P47K (Swiss:P31521), a Pseudomonas chlororaphis protein needed for nitrile hydratase expression, and the cobW gene product (Swiss:P29937), which may be involved in cobalamin biosynthesis in Pseudomonas denitrificans [5]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.41 0.71 -4.83 62 9942 2012-10-05 12:31:07 2003-04-07 12:59:11 14 44 3597 7 2693 11049 3134 177.40 27 56.79 CHANGED sshslsG.lGuGKTTLLpcll.....pp...h+lAVIhN-hs..sshDuphl..........pps..ss...............llplssGshCpo.lptDh.hslpsltcht.......................................phDhllIEssG.lssPssh......................shthDshls....llDs.........................scscchspph.ptlthADllllNKsDLssts.tshcthtpphcplssptpllh ......................................................................................................................hhllsGaL.......G...uGKT....TL...LpclL........................p.pps....h.+lAV...I..h....N.....-.......hu.....t......s.....s...l.D.....u.....p.....l....l.......................................................tps......ss.....................lhp.l.s...s....G......C...h.....C.C....o.....hp.......t....D....h....h.s...l.p.s....l....hcht...............................................................................................................................................................ph.D.....h.l.l.....I....Eso.G....lsp.P.s.sl.......h.....................................h.........h..t..l......c...s......h..l.s.........l.lDs....h...........................................................................................sp.t..p..s...h..s...p...h.h...h..p..Q..l...t..h..A...D.........l.........l.........llNKs.D...L..ss.....t.......p........p..thp...p.th.p.p.lsspt.h..t..................................................................................................................... 0 789 1689 2240 +150 PF00135 COesterase Carboxylesterase family Sonnhammer ELL anon Prosite Domain \N 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 535 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.71 0.70 -5.80 123 8736 2012-10-03 11:45:05 2003-04-07 12:59:11 23 92 1650 371 4719 13851 2441 405.50 22 85.47 CHANGED hhhhhhhhhhhhhhhsstss.........................ltst......GplcGh.hts.tt.....t..h.sFhGIPaAcPPlGpLRFctPpP.....sps....W.pslhcuophsstChQ.sphh...................hpsshs...................SED.CLYLNlasPphspps............................................hPVhVaIaGGuFhh..........Gsu........s.hsusthhhpcsVllVohsYRLGshGFLohsspphs......................GNhGLhDQhhALcWVppNIssFGGDPsplTlFGcSAGuuSVshhhlS..........ts...cuLF++AIh.SGsshssas.hpsss..hphspplApthGCsssssp..................plhcCL+.ptsspcllsspt..hhhthhshhs...............hhP.............sl-us..................hlscpP......tphhppu.hp.plshll.GssppEGhhhhshhhtt.tt.........t.p...................................hhphhtthhh.hspht........tpplhptYhss.pt..st.pph.pthtplhuDhhahts.hhthhtphppsssss.YhYpFsapss........hthh.thtsssHus-ltalFstshhtt.th.........................ppcctt...cphhphassFApsGsP...............s...t.stWsshspp.....t.hthhhtstpthhtpp.......hpaa ..............................................................................................h........................................................h..h.........u.h...G.................................................h...........ahGlPa.A.tsP..h.....t....................................R.a.......t......................P.....................................h.....p...t.......h......p....s.......p.....p..................s........s..........s.Q.................................................................................................sE...D...C...L...h..L........N..l..a..s.P..t......t..t..tt...........................................................hP...V.hla.........l.....a................G.G..u..a....................................Gsu........................h..t.....s.....t...........h.....h.............t......t......s......l......l.....l......Vohs....YR.....l......G........h.......h.............G....F.....h...s....h......s.....t.t.............................uN..h.....G.......l...h........D....Q......h....h.....A.L....p....W..l........p........c.......N........I..t...........t..........F.........G............G...........D....P............p...p..l..T................l....h....G.p...S.A.......Gu........h...u..l..t..h.h...hhs....................s.s...............ps........L...F..........p...p..u........I..h......S...............G..........s.........h...h....................................h.....h..t...................thh..th...s.....p.....h..s.ht....tt...................................................hp.p..L....p...t.h.s......p..p...l..ht...........................................................P...........................................h.l.-.s......................................................hls...p.......................th..h......t.......t.......t.............h.............p...........h.......s.......h..hh...G......s.p....p..Es................h..h.h..........................................................................................................h..........h....................................t.h..h......a........t.................................................................................................................t.h......t...hhs....-..h......h.h.....h....s.........................h............................................h................t...........t.............................t....t..............................ahY.ath.s.................................................................t.s..H.s..-..h....hh...a..t.............................................................t..h.......t.h.thhhs.Fs..t..t..s....ps...........................................a.................................................................................................................................................................................................................................................................................... 2 1694 2505 3925 +151 PF01484 Col_cuticle_N Nematode cuticle collagen N-terminal domain Bateman A, Bashton M anon Pfam-B_200 (release 4.0) Family The function of this domain is unknown. It is found in the N-terminal region of nematode cuticle collagens, see Pfam:PF01391. Cuticle is a tough elastic structure secreted by hypodermal cells and is primarily composed of collagen proteins [2]. 20.80 20.80 20.90 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.38 0.72 -4.28 113 1198 2009-01-15 18:05:59 2003-04-07 12:59:11 12 22 21 0 1054 927 0 51.90 23 16.32 CHANGED hss..usshSslullus.llslshlasplpshpsplps-hpth+spuc-hWs-h .....h..shhusshSslAllss.llslPhlashlpplpsplps-lshh+sp.......u...cshWpch........... 0 359 519 1054 +152 PF01391 Collagen Collagen triple helix repeat (20 copies) Bateman A, Eddy SR anon Swissprot Repeat Members of this family belong to the collagen superfamily [1]. Collagens are generally extracellular structural proteins involved in formation of connective tissue structure. The alignment contains 20 copies of the G-X-Y repeat that forms a triple helix. The first position of the repeat is glycine, the second and third positions can be any residue but are frequently proline and hydroxyproline. Collagens are post translationally modified by proline hydroxylase to form the hydroxyproline residues. Defective hydroxylation is the cause of scurvy. Some members of the collagen superfamily are not involved in connective tissue structure but share the same triple helical structure. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -11.88 0.72 -11.88 0.72 -4.43 41 44425 2009-09-12 04:47:45 2003-04-07 12:59:11 13 899 1492 46 19240 35044 5977 63.30 39 36.42 CHANGED GssG.sGssGssGssGssGssGssGssGssGssG.sGsPGssG.sGssG.sGssGssGts .............................................................................................G........G........s......G................s.....G.....................G...............t......G...........s.....G..............t........G............s......G...............t......G....s....t......G................t.......G............s......G........s.....t...G.........s.....G..........s....G.................G........G..G..h............................................................................................ 0 3964 5810 11551 +153 PF03772 Competence Competence protein Bateman A anon COG0658 Family Members of this family are integral membrane proteins with 6 predicted transmembrane helices. Some members of this family have been shown to be essential for bacterial competence in uptake of extracellular DNA [1,4]. These proteins may transport DNA across the cell membrane. These proteins contain a highly conserved motif in the amino terminal transmembrane region that has two histidines that may form a metal binding site. 20.20 20.20 20.30 20.60 20.10 20.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.75 0.70 -5.34 123 4251 2009-01-15 18:05:59 2003-04-07 12:59:11 11 14 4131 0 907 3604 477 262.70 21 39.09 CHANGED LlhG-+s.tlspphhptappsGlsHLlAISGhHlullss....hhhhlhphh................hssptht..hhhulhhhhh...Y.shlsGhss.sshRAhlMhslhhhuhhh...t+c....hsshssLshuhhllL.lhsPhslhssGF.LSFhAlhullhhh........thhpphhthhthhhh............................sshss...plssh..PlhhhhFtplSh.hullsNLlsl.Pl.huhll....lPhhlhuhlhhhh..............shuthhhh....hh..th......lphlhthh.phhup........hsh.....lhh..stsshhhhhhhhhhlhhhhhhhthh ..............................................lhhGc.p.p.tls.p.c.......h.pphhppsGlsHLhAlSGhHluhhhs.......hhhhl.httl...................................................h.s.ptht........hhhshhhhhhY.s.h.L.sGhss...uslRAhlhhs.lhh.h.h.h.hh........t.p.p..............hs.shp.s...lshshh.h.ll.lhsPhhlhshGFhLShhAshsllhhh............................thh...t.h.hth..h......hh..h..............................................................................hs.hsh.plshh..PlhhhhFtphsh....hu.......hls...Nllhl.Pl..hshlh...lPh...hlhshlh.hhh..........................lsthhhh.................................hh......shh.......hthlhh.h...l....phhsp..............h.........hhh...s.t..sh.hhh.lh.h.hhhhhhh.hh.....h......................................................................... 0 309 604 770 +154 PF05071 NDUFA12 Complex1_17_2kD; NADH ubiquinone oxidoreductase subunit NDUFA12 Bateman A anon COG3761 Family This family contains the 17.2 kD subunit of complex I (NDUFA12) and its homologues. The family also contains a second related eukaryotic protein of unknown function, e.g. Swiss:Q9BV02. 21.30 21.30 22.00 21.50 20.40 20.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -11.06 0.72 -3.57 88 843 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 595 0 467 747 993 102.70 29 67.27 CHANGED GphVGcDchGNpYYcst...........stc+RWVlYs.uhs.......E..ASpVsP-WHGWLHaphDpsPo...pp.shst+sWpc..sHp.NhT.GTstA..YtPtG.....uh.....t......psh.ptD.YcAWsP ..................phVGpDchGNpYYEst...............pthhtp+RWV..Y..s.stt............-........uopl..P......P..pW...........HuWL+at..t-pP.Po........pp...t...h...t.......c...p.a.pp....+.p..Nho...so...tt..Yhs.s...................................................................................................................... 0 150 256 364 +155 PF00329 Complex1_30kDa complex1_30Kd; Respiratory-chain NADH dehydrogenase, 30 Kd subunit Finn RD anon Prosite Family \N 21.10 21.10 21.30 21.70 20.60 21.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.32 0.72 -3.78 131 5889 2009-01-15 18:05:59 2003-04-07 12:59:11 14 20 4153 16 1220 4082 2058 105.60 34 33.03 CHANGED plhphL+pp...thshLsslsusDhhptt........chplsYp.lhsh.....tpspp........ltl+stl.st..cssplsSlsslasuAsahEREsaDhaGlpFpG.HPc......h+.Rllhs.-s...a.s.......aPLRK .........................................................hLh........tashLh.slsuhDhtsts..................caslhYp...Lh.s.h......p.p.s.pc....................lpl.K.l.....hs..st...........ps............s......p.......lP...Sls..la..uAsWhEREsaDMaGIhasG.HP.c........L+.RIl...h.s..ps...W.G.......aPLRK.......................... 0 403 782 1023 +156 PF02950 Conotoxin Conotoxin Bateman A anon Pfam-B_529 (release 6.4) Domain Conotoxins are small snail toxins that block ion channels. 25.10 25.10 25.20 25.10 25.00 24.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.34 0.72 -3.18 112 926 2012-10-01 22:06:18 2003-04-07 12:59:11 12 1 54 27 2 953 0 70.10 26 96.65 CHANGED hKLshlhllslLLlo.......ss..stsspt.sp.......p.tpthpphpssp..pphh...ptp..h.....................hsppCCsh.....hC........hCh ........................KLss.lLhlslLLhs.....hsshthsu-pssc....................p.scp.t.p.chpssc....p.p.th...hh..........................htppCCsh......hCt.......h.sC...................... 0 0 0 1 +157 PF05019 Coq4 Coenzyme Q (ubiquinone) biosynthesis protein Coq4 Wood V anon Pfam-B_14948 (release 7.6) Family Coq4p was shown to peripherally associate with the matrix face of the mitochondrial inner membrane. The putative mitochondrial- targeting sequence present at the amino-terminus of the polypeptide efficiently imported it to mitochondria. The function of Coq4p is unknown, although its presence is required to maintain a steady-state level of Coq7p, another component of the Q biosynthetic pathway [1]. The overall structure of Coq4 is alpha helical and shows resemblance to haemoglobin/myoglobin (information from TOPSAN). 20.50 20.50 20.50 20.80 20.00 20.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.36 0.70 -5.44 27 475 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 363 8 319 505 118 198.00 31 79.73 CHANGED YsuHlPL....sshp+hhLssGSulsulhcPcR.uDhIAsLGEsTuh.hhLppL+cpMhsDspGRpIL+-+PRIoopoLshstLpphPcsThGtsYspaLccpsVoPDoRssV+alDD.EhAYVhpRYREsHDFhHslhuhPhsh.GElAlKhhEhhNhGLPMshLuulhushRL+spp+pphhphYlPWAl+sGhpu+.......sLlNVYWEchhEpDls-LRcELGIp.sPc ............................................................................tph.hhhhuuhhu.hh.pP.c.tp.....l.shhuE.so.uh...hl..phh.ppMhpsspGpplLp-+.Pclssps...l...........s..hstL......ps.L..Pps...olG........tsYhpaLcp..............ps.lo........P...D........s.........R.s..s......l........c.......a.l....-.....-..........-......h.....A..Ylh..pRaR-sHD.haHslh.G.hs.h.s.h.....GElulKhhE...hh........p...h.......t..l......P.......h...sh...L....s.....ul...h.u...s.h.......c.........L............p.....t...p..t.p...........t........h........hp...........h.h..l......P...W....A..hcs....G....hp.u.c..............lhslhaEc.hh-pslpclRpcLsIp............................................................................................ 1 90 172 272 +158 PF03471 CorC_HlyC Transporter associated domain Bateman A anon Bateman A Domain This small domain is found in a family of proteins with the Pfam:PF01595 domain and two CBS domains with this domain found at the C-terminus of the proteins, the domain is also found at the C terminus of some Na+/H+ antiporters. This domain is also found in CorC that is involved in Magnesium and cobalt efflux. The function of this domain is uncertain but might be involved in modulating transport of ion substrates. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.46 0.72 -4.20 174 11377 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 4360 35 2334 7354 2259 80.60 23 18.84 CHANGED pphssssallsGphslc-lpchhsl.p....lsp.c..c.....hc.TluGhlhpplsclPp..G-......ph........p...s...............hp.hpVhchcsp+..Ipplclphhp ....................................p..hscssallcGpss.lc-...l.sch.h....sl..c.........ls-..-............-..............hc..TluGhl...hp..tl...s..c.l.Pp...hG-.......pl................p....s..................................hp..hplh....ch.-.sp+....lpplplph..t................................ 0 682 1425 1903 +159 PF02389 Cornifin Cornifin (SPRR) family Bateman A anon Pfam-B_1215 (release 5.2) Family SPRR genes (formerly SPR) encode a novel class of polypeptides (small proline rich proteins) that are strongly induced during differentiation of human epidermal keratinocytes in vitro and in vivo. The most characteristic feature of the SPRR gene family resides in the structure of the central segments of the encoded polypeptides that are built up from tandemly repeated units of either eight (SPRR1 and SPRR3) or nine (SPRR2) amino acids with the general consensus XKXPEPXX where X is any amino acid [1]. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.63 0.71 -12.31 0.71 -4.54 5 373 2009-09-13 06:27:35 2003-04-07 12:59:11 10 30 87 0 232 424 33 118.10 16 44.10 CHANGED cQHQVKQPCQPPPQEsFVPp.TKEPCHocVPpPGNT........................KlP-sGsThV.EsshT........................KVPEPspTKVPEPCpoKVPEPspTKVPEPCsTKVPEPsYPKVPEPupsKVPE.G..................................PsHsKsPEPGasKVPEPGhPKVPEPCQo+VPEPCPSTVTPusAQQKTK ................................................................................................................................................................................................................t........th.tst.....st.hs.t....t.....th.p....st..ts..ths..t.t.....th.t..st........th.s..t....t....th.....t..................................................................................................................t................................................................. 0 106 118 167 +160 PF02628 COX15-CtaA Cytochrome oxidase assembly protein Mian N, Bateman A anon COG1612 Family This is a family of integral membrane proteins. CtaA is required for cytochrome aa3 oxidase assembly in Bacillus subtilis [1]. COX15 is required for cytochrome c oxidase assembly in yeast (Swiss:P40086). 21.50 21.50 21.50 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.24 0.70 -5.55 146 2337 2012-10-03 10:28:09 2003-04-07 12:59:11 10 13 2048 0 837 1935 3330 287.80 24 85.08 CHANGED thhhhhshhhshsllllGuhTRLTsSGLuCs-.WPsChGt..hsPhsptp.............................hth....ahEahHRhhuthlGllhlshhlhsh.............h.h.h...tthh.hh.shhlhhLlshQuhlGhhhVp.s....l...............p.hh.lssHLhhuhhlhuhlhhhshphhth............................hsh..tthphhshh.sls.llhlplhhGu..hluu.pAuh.ss...................st.h..h..................t.h...........................phhHRhhAhlshhhhlhlhhhhhhtt.........tthpphuhhh..hsllhh.QlslGlsslhh.tlPl.h...lulhHphsAslLlsshl .........................................t.hhhhhsshhhhslllhGuhs..RlTsSGLuCss.W.PhCt.Gt...hsPhstt.t.................................................................hphhhEasHRhhuthlGl.hhlhhslhsh..............................................tt.tth......tpphh...hh.shhhhsh.lshQ...............uhl..G...hhs..VthsL.....................................................................sshh...lu...h...HLh.hu..hl.h...hus.hh.h.hshtlhp.....................................tp.hh..tth+..h.h..s....hh.hhh..hlhlt.l...h....h.Gu....hV..uu....p...A.uh.sh...............................t..h.h...........................h.....hp..thl..........................................phsH.RhhAhh.shshlhhhhhhshpth..........................tthtthshhh..hhllhl..Qs.hlG..lhslh.........h.....tls...........l...h............lu.hhHthhushLhshh.h................................................................................................... 0 262 541 716 +161 PF02936 COX4 Cytochrome c oxidase subunit IV Bateman A anon PSI-blast P00423 Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit IV. The Dictyostelium member of this family is called COX VI Swiss:P26310. The yeast protein Swiss:P53077 appears to be the yeast COX IV subunit. 23.50 23.50 23.60 24.00 23.30 23.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.77 0.71 -4.45 10 463 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 283 51 246 436 0 129.20 29 76.99 CHANGED chVuptshupPshsDl.-pWtshsp..pEpssllssLR-+pKssWpsLSh-EKKAlYhISFspphsc.httssGEhKhlhusslhslulohslhslh+hhshPphP+Ths+EWQcussEhhhspchNPlsG.huptYchcsph.p .......................t.t..u.shhhshp.ch..s...........p.-...ss.hpsL+-+pKssWppLohpEKpAhYhluFspphsc....hpt.sssEhp...plhs.ss..hhhluhohhlhhh..+ha......s.h..s...P.pThsc.E.Wpttp.chhh...s.hphNPlpG..uttashpt..................................................... 0 62 111 185 +162 PF03626 COX4_pro Prokaryotic Cytochrome C oxidase subunit IV Finn RD anon Pfam-B_3217 (release 7.0) Family Cytochrome c oxidase (COX) is a multi-subunit enzyme complex that catalyses the final step of electron transfer through the respiratory chain on the mitochondrial inner membrane. This family is composed of cytochrome c oxidase subunit 4 from prokaryotes. 25.10 25.10 25.20 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.75 0.72 -3.89 250 2280 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1772 0 517 1230 725 81.10 30 74.43 CHANGED ahhsallhllLTs..lshhls...........................hts.......hh.s.hshshsll..hh.lAhlpshlhlhaFMHlpt.spp.........hhphllhshllhsslhlhh ........hhGFlLullLTl.lsFhhs...........................hts.......sh...o..ssh..lshl...ls.hAllQshlpLlhFhHhsp.psctt.p....hhthlFsll..l..lsh.lllG.olWlh................ 0 131 290 402 +163 PF02937 COX6C Cytochrome c oxidase subunit VIc Bateman A anon PSI-blast P04038 Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIc. 21.40 21.40 21.40 22.60 20.50 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.37 0.72 -4.09 8 161 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 101 51 89 163 0 69.70 43 80.44 CHANGED ushLsKPQMRGLLu+RL+hHlVsAFlluLGVsAhaKFusA-PRK+AYADFY+NYDAMK-FEcM+cAGlFQSVK ..........s...lsKPt.MRGLLs.+pl+hclssAhslo.lssusha......KasVs-.PRK+AYADFY+NYDuhK-FEcM..+c..AGlFpSs.............. 0 23 28 53 +164 PF02935 COX7C Cytochrome c oxidase subunit VIIc Bateman A anon PSI-blast P00430 Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIIc. The yeast member of this family is called COX VIII Swiss:P04039. 25.00 25.00 25.10 25.00 24.70 24.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.21 0.72 -4.17 14 253 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 202 51 155 269 0 68.00 35 72.16 CHANGED Mlupp.....sRRssspulRt.+...........atpGPsp.........NlPFpVpN.KahlhshhhsFhusGFusPFlllcaQLhKp .............................tpt.....sR.p.h.ss..s...s.h..Rt.p.................asEGPtp............NLPF....s..l.....ps..K.ahhh..hhahsahusGF.usPFh.ls.haQhhK........... 0 42 76 124 +165 PF04516 CP2 CP2 transcription factor Mifsud W anon Pfam-B_2156 (release 7.5) Family This family represents a conserved region in the CP2 transcription factor family. 25.00 25.00 28.30 28.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.70 0.70 -5.31 29 661 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 171 0 367 552 0 208.40 37 38.82 CHANGED oh..p.p.h....h.shhs.tspphshst............tpphcFphsLpAPTAhhp+tsE.PlTYLNKGQuYslolh-s..ss.sshpssp...hR.ohlRlsFc--cpppc.tppWphW+.......u+Q+suK.psl-hsDhpp.......shsplp.lEpsua..suhsVhWssst.s...EssIhlphNhLSTDFSppK..GVKGlPlRlphcT.hhss..........tssssctschsaCpVKlF+D+GAERKlpsDht+lpKphtKh+ ......................................................tttpt.......................................ttspt.sFpYsLpAspS.t.K.t-tshTYLNp...G..Q.Y...t.l.plh-s..tt..s.t.htthp...........V+.SllhVVFc-c+.p...p.phpphchW+........pp.s....pRllDl.D.h..........Shshl.p.......hp.psu...NulpFhWsssc......cspl.FI..pVp.C.lST-..FospK..G...KGlPh....plQIDTap.sp................................p.sc.lHpA.CQIKVF.p.sKGA-RK.h+s-ccKhpK+psp..t.......................... 0 66 102 219 +166 PF00118 Cpn60_TCP1 cpn60_TCP1; TCP-1/cpn60 chaperonin family Sonnhammer ELL, Finn RD anon Prosite Family This family includes members from the HSP60 chaperone family and the TCP-1 (T-complex protein) family. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 485 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.32 0.70 -5.88 94 23232 2009-01-15 18:05:59 2003-04-07 12:59:11 19 54 11481 758 4711 17190 6810 316.10 36 86.46 CHANGED lu......chlpooLGPpGh......sKhlhp..tstphhlTsDGsoIlcclcl.pcPs....AcllhpsupspscpsGDGTTosllLsspllppu.phl...pt.GlcPpplhcGhchAhptslchL......pph..shpsp..............pplhplupsuhsu+..........phlupllscAl...h.............................hclsp....ltlhphpsuph..pcoplh..cGlh.l..c+shhss.......ph.........pph...css.plllhstslph..ps.........t..hts.tth.th.t.p.t.lhphlcplhcts.hsl.................l......lspcsl.....ss.uhphLspsslhulpcl............ccpplccluhhoGup.hlsp..............tslp.s.s.......LGpsppl....plsp..cphsh.lp................................................s...................hsspssolllpGsopthlcEhccslcDAlsss+sslc..tst..llsGGGss.hpluptL.........chsps............hssc.........pph....ulchhupALcths+tLApNuGh.ss..........hpllsp.lps..............tpsp..............thGlshtssp......hh.D.hhptG......llDshpVpppslptAspsAs.hlLplDplltstp ..........................................................................................................................................................................................................................................................................................................................................................................To...........AT..VL......A..p..u.l.lp....E.G.h+.sV....................sA...Gh.......NPh.....s..l.....+.R..G..I-.+.A.l......tssl..cpL....................cph.u.h...spsp................................ctIA..pV.usl.SAssc......................................p.p.lG..p..l...IA-AM..................................................................cKVGp........-GVITl.E..-.ups..................-...p..Lph........sEG.Mp.F......D+.GYlSsYh.............................pph................t.htl.........-sP.aILlh...-.c........K.lSsh..pp....................................................................................l..l..sh.L.-.tlh....p.tu..ps.L.............................l.........Il.u.-.......c.l........-s-AL.s.sL.....l.h.N.....p.h.p..uhhp........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1615 2762 3850 +167 PF02787 CPSase_L_D3 Carbamoyl-phosphate synthetase large chain, oligomerisation domain Griffiths-Jones SR anon ref [1] Domain Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. 25.30 25.30 25.30 25.80 25.10 25.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.40 0.71 -4.35 95 8742 2009-01-15 18:05:59 2003-04-07 12:59:11 14 76 7559 40 1436 7541 2587 120.70 43 14.93 CHANGED lhp.p...LppPsspRlahltcAh.+pGhol-claclT.pID.aFLpplcpllphE.cplpp.....hth.sp.......hLpcsKphGFSDppIA....pl.......h..st........scpplRph.RpchslhPsaKhVDTCAuEFtu.pTPYaYSTY ......................................pcLpcPsDcRhFhlAs.......Al......+t....G..a..o........l-cla...-LT.KIDhW.FLpKhcpIlchp...pp......Lcph...................pslsh-...............hLppAKphGFSD+pIA......th.....................................h......ps...........................oEhs.VRch.R.c..c.h..sI.pP...hhKplDTsAAEasu...sTsYhY.TY................................ 0 471 910 1212 +168 PF04969 CS CS domain Finn RD, Fenech M, Eberhardt R anon Pfam-B_1217 (release 7.0) Domain The CS and CHORD (Pfam:PF04968) are fused into a single polypeptide chain in metazoans but are found in separate proteins in plants; this is thought to be indicative of an interaction between CS and CHORD [1]. It has been suggested that the CS domain is a binding module for HSP90, implying that CS domain-containing proteins are involved in recruiting heat shock proteins to multiprotein assemblies [2]. Two CS domains are found at the C-terminus of Ubiquitin carboxyl-terminal hydrolase 19 (USP19) (Swiss:O94966), these domains may play a role in the interaction of USP19 with cellular inhibitor of apoptosis 2 [3]. 21.30 10.00 21.30 10.10 21.20 9.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.72 0.72 -3.43 100 2605 2012-10-02 21:54:05 2003-04-07 12:59:11 11 115 372 25 1627 2732 72 77.20 20 23.00 CHANGED spasWtQo.spVtlsl.l.sss....tcslplphpp.pp...lplt.............lp.st...p.hhhpsc.LhspIss-cSpaplpss........plplpLpK ...........................................phpWtQ...ots.p.......V.h.....lpl...l.sst..............tc.s..l......p.....V...phpp...p.p.......lplp..................................................hp.ttt.......pthhh....p..h......c.....L..h..pt.Ip..s.....-..pS...p..a...plpss.........plp.lpLpK....................................................... 0 581 857 1254 +169 PF00988 CPSase_sm_chain Carbamoyl-phosphate synthase small chain, CPSase domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_345 (release 3.0) Domain The carbamoyl-phosphate synthase domain is in the amino terminus of protein. Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines [1]. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00289. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117. 21.10 21.10 22.80 22.30 20.70 19.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.42 0.71 -4.64 134 6372 2009-01-15 18:05:59 2003-04-07 12:59:11 17 41 5635 40 1365 5095 2458 123.10 45 25.61 CHANGED hpAhLlLEDGolapGp.uhGu...p..u..pshGElVFNTuMTGYQEllTDPSYsGQIlshTaPhIGNhGlNtpD...........hE..S......p.....pstspGlll+-hspt..sSsa+up.poLspaLpppslsGIsGlDTRuLs++lRp.pGshpuhIs ...................................thLhLtDG.s.hapG..u.hG.u..............p......t.............t.sh..GEl............VFsTuMTG.YQEhlTDP...SYp..sQIlshTY..Ph...IGNhGl.ss-.D..........................................hE..S............p..........plpssGllVp-h.s.pp....sSNaR.sp......poLscaLcpcslsGIu.GIDTRtLT+hl.R-.p.G.shpGtI.h.................................................................................................. 0 437 856 1143 +170 PF03178 CPSF_A CPSF A subunit region Mifsud W anon Pfam-B_1224 (release 6.5) Family This family includes a region that lies towards the C-terminus of the cleavage and polyadenylation specificity factor (CPSF) A (160 kDa) subunit. CPSF is involved in mRNA polyadenylation and binds the AAUAAA conserved sequence in pre-mRNA. CPSF has also been found to be necessary for splicing of single-intron pre-mRNAs [1]. The function of the aligned region is unknown but may be involved in RNA/DNA binding. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.87 0.70 -5.34 80 1132 2012-10-05 17:30:42 2003-04-07 12:59:11 10 23 315 30 802 1130 24 296.80 22 26.18 CHANGED puslpllss........hp.........sl......s..shpL..p.p.sEtshulppsphp.spt....................t.cp....hllVGTuhshspc..sp............Gplh.lacltp...................cLchltcpcl.cGs...ssAlsth..pG+.llsuhG.pplhlaclsccp.....Llthu.hptsh...hlsslps.......h.......ss....hllluDhh.cSlthl...tapp.........-sp.hhhhucDhp..sh.lsssphL.Dt.c.ollsuDptsNlallch...s.cssps.ssp...........................+L.ppupFalGchsssh..hsll.....t.p.........................llauohpGolG..hllP.lsccshchhppLppplpsph..............slsGhs.pta.....Ruhh................s+sllDG-Llcp.......ahp ..............................................................................................................................................l.lhs..p...hp......hl...p..thph...t...t...tEhsh.......shh.s.p...http..........................................pthhhVGTuhsh..s..p.tsp........................Gtlh.laph.p..............................................................cL.phl..tcp.ph....c.ss............shulsth........pG....+.llsul....u....pp......lh...laph..spcp..............................Lh..ths.h.tph.........hlhtlpsh...................................ts.....hll.lu..Dlh.c.S.lhhl......tapt................-sp.....l..........hu.c..Dht.........s..t..h..ss...s..s..phl..Dt....c..sh.......lsuDp...................sNlhlhph..........s..s.psspp.ptp...................................................................................................+Lthhsp..aal..G....ch.ls.sh...p....ss....lh.......tpps..........................................................................................llau.T..l.p.GslG..hl...hs..l...scp..p........hchh.tl..ptp.lt..p.....h........................................................s..h...s.Gh.s..h...ta.............Rs..hh.s.......................................scshlDG-Lhppa.................................................................................................................................. 1 286 462 673 +171 PF00313 CSD 'Cold-shock' DNA-binding domain Finn RD anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.10 0.72 -4.19 40 14886 2012-10-03 20:18:02 2003-04-07 12:59:11 17 69 3908 61 3740 7958 3141 65.20 46 67.33 CHANGED hpGsVKWFNscKGFGFIs.--Gst.DVFVHaSuIpssG.....a+oLpEGpcVpF-lpp..Gs+GspAsNVpsh ..............................pGpV..K.WF...Ns.p..K....G..F..GFIs.......s.......-..........s......G..........s..........t.....D..VFVH..a........SA..I.p.s.sG..........a.+oL......p..E.G.Q..c.............V.pF-..lpp.........Gp+G..s...p.A.sNVh..h............................. 0 1082 2120 2935 +172 PF04442 CtaG_Cox11 Cytochrome c oxidase assembly protein CtaG/Cox11 Kerrison ND anon DOMO:DM04116; Family Cytochrome c oxidase assembly protein is essential for the assembly of functional cytochrome oxidase protein.\ In eukaryotes it is an integral protein of the mitochondrial inner membrane. Cox11 is essential for the insertion of Cu(I) ions to form the CuB site. This is essential for the stability of other structures in subunit I, for example haems a and a3, and the magnesium/manganese centre. Cox11 is probably only required in sub-stoichiometric amounts relative to the structural units [1]. The C terminal region of the protein is known to form a dimer. Each monomer coordinates one Cu(I) ion via three conserved cysteine residues (111, 208 and 210) in Saccharomyces cerevisiae (Swiss:P19516). Met 224 is also thought to play a role in copper transfer or stabilising the copper site [2]. 25.00 25.00 26.00 27.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.05 0.71 -4.42 135 1018 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 946 2 456 917 1462 143.90 41 68.61 CHANGED hVP.LYclFCc.sTGhsGpTp..............t.ssstph...........ts..RplpVcFsAssssshPWcFcPpppplpV+PGEsshshYpApN.occslsGpAs.sVsPspAutYFsKlcCFCFspQsLpsGEph-MPVhFalDP-lscD.....lcslTLSYTFFcs .....................hVPLYchhCp...sTGhsGpstp.............pttssp.............hsts.....Rp...l...pVpFsAssssshPWcFpPpppplp...............VpPGEsshshYpApN.o.scslhGpAshsV..sPtpAut.........YFsKl.pCFCFpcQpLpsGE.ph-MPVhFalDP-hspD.....lcslTLSYTFFc.......... 0 129 252 360 +173 PF01148 CTP_transf_1 Cytidylyltrans; Cytidylyltransferase family Finn RD, Bateman A anon Pfam-B_921 (release 3.0) Family The members of this family are integral membrane protein cytidylyltransferases. The family includes phosphatidate cytidylyltransferase EC:2.7.7.41 as well as Sec59 from yeast. Sec59 is a dolichol kinase EC:2.7.1.108. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.96 0.70 -4.73 48 6283 2012-10-02 14:06:56 2003-04-07 12:59:11 15 17 4727 0 1821 4702 2431 255.40 24 86.77 CHANGED htRhlsshhhlslhhhhlhhut..........hhhhhllhhlshhshhEhhphhthphhthhhhhs......................h.hhhhhhshhhhhh..h..hhthhhhhh.hhhhlhh.....................htthhhhhh.hhhhshhh.............................h.hht..htGhhhhlhshhllhssDshAYhhG+taG+p....hhPplSPsKTlEGhlGGhlsullhuhlhhhhht..........................................................hshhthlhlullsulhuhhGDlhcShhKRphtlKD.GphlPGHGGlhDRhDuhlhsuslhahhhhhh ....................................................................................................................................................................................Rhhsu.h.lhls.hhl.h.h.l.hhss...................hhhh.h.hh.h...h.l....s......h...luhh.E.hh...p..h.h.....t.h.p.t...h...p.h........h..hhh........................................................h..hh....h..h...h..h.....h...h....h.....h.............h...h.....................h........h...........h.....h.........h.....h......h.......h.....h....h....h..h.....h.h...h.lhhhh..........................................................t..ht.p..h..h...h...s..l..h...s..h..h..h.h.shhh.....................................................................................hh.h.h..t...s...h.........t....G..h......h...h....l.....l..h..h...h...h.l.lh..s....sDsuAYh.....h.....G.....+.....h......a.....G......+......+....Kl..............hP........p.........l.......S....Ps.K...ThEGhlGG.ll..s.uh...ll..u..h..h..h..s.h.h..hs...............................................................................................................................................................................................hs.hht...h.l.l.h..u.ll..h.u.l...h.u...hGDLh.EShhKRphG...l.K.........DS...G...p...llP.G.HG.....G....lLDRhDS..h........hhssPlhhhhh...h............................................................................... 1 615 1145 1531 +174 PF04145 Ctr Ctr copper transporter family Bateman A anon Pfam-B_3006 (release 7.3) Family The redox active metal copper is an essential cofactor in critical biological processes such as respiration, iron transport, oxidative stress protection, hormone production, and pigmentation. A widely conserved family of high-affinity copper transport proteins (Ctr proteins) mediates copper uptake at the plasma membrane. A series of clustered methionine residues in the hydrophilic extracellular domain, and an MXXXM motif in the second transmembrane domain, are important for copper uptake. These methionine probably coordinate copper during the process of metal transport. 22.10 22.10 22.20 22.20 21.80 22.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.75 0.71 -4.14 116 1128 2009-01-15 18:05:59 2003-04-07 12:59:11 10 21 303 3 804 1068 7 123.20 22 63.53 CHANGED MsMh....Fphsh.ssslLFpsWpss..otutahsohlhlhlluhhhchLchhppphcpthh......tp........thtt......t...............................................................................thphhtclhp.....uhLah......lphsluYh..LM.....LlsMoaNshlhlulllGh....slGhhhF ...................................................................................h.Mh.....F.hsh.p....hsl..laptWphp...............o..s..thhhuhlslhhluhhhEhLphhpt.ht..t.t.hh........tt.........................................................................................................................................................................................................................................................h..h.chhp.....shlah......lphsluYh..lM........LhhMoaNshlhlullhGhhlGhhh................................................................................................................... 1 242 446 686 +175 PF00394 Cu-oxidase Multicopper oxidase Finn RD, Griffiths-Jones SR anon Prosite Domain Many of the proteins in this family contain multiple similar copies of this plastocyanin-like domain. 20.30 19.90 20.30 19.90 20.20 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.94 0.71 -4.33 72 9871 2012-10-02 17:41:00 2003-04-07 12:59:11 17 57 2516 325 2092 9906 260 140.70 23 37.09 CHANGED cphslhluDWY.cpsspsh...h.ts............s..sDuhlINGpst...s....................................hhslsVpsGKpYRlRll.ssuspsshsFpIsuH.phollEs......DGsasp..Phss-slpIhsGQpaulllsA...sp.s...sssYaltst........h..s....htsssssulLcYpsu ........................................................................................................................t..hhlh.pDaa...pstps....h.p...h.t..tt.s...th...............................thhssp..h.lh..N.G.............................................................................................................................sss...h.s.s.p...sh..p...s..t.h.s...............p.l.h...ll..s..t.u...s.p...c.s....h....s......a.....hI..........s.........G........H..............h.l..ltu..........c.....G....p.a.ss..............s.h..s...l....-.....s.....h..h........l.t.s.G.............pthssl...hsh......pp............ss.Y....hhhs..........................................s.................................................... 0 504 1249 1757 +176 PF03712 Cu2_monoox_C Copper type II ascorbate-dependent monooxygenase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of members of this family adopt the same PNGase F-like fold. 20.50 20.50 20.60 20.90 20.20 20.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.01 0.71 -4.88 25 590 2009-01-15 18:05:59 2003-04-07 12:59:11 10 37 157 18 376 631 214 149.50 26 25.41 CHANGED pa.AGlhhLhsshh....sIPPspssash-ssC.hpp.scshaP......FAhpsHsHthG+hlsuhphRssp.....hphlu+psshsP..QtFYslc..phlcVpsGDtLsscCoY-op..sp.........spssthGtospDEMCshYlhYYs.....-ssphpshtsChustssp...hhpshssts ...............................................AGhh.hsh........hIPPsppsh...p.shCphpp..sp.hhs...............FAhhhH.....sHhhG+..tlps..h.phR.ssp.......................phphlscss.......sas......thQt...h.h.....lp.......p.lsl.hs...........GDhL.....hscCsasop....s+.............................spsThhGh..ssp-EMChhalhY.Ys......t...p.h..h....t..t................h.................................... 0 189 216 300 +177 PF01082 Cu2_monooxygen Copper type II ascorbate-dependent monooxygenase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of members of this family adopt the same PNGase F-like fold. 21.10 21.10 21.60 21.50 20.10 20.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.20 0.71 -4.20 38 534 2009-01-15 18:05:59 2003-04-07 12:59:11 15 24 136 18 344 574 28 124.70 27 21.58 CHANGED spph-lthpsl..hPsp.sssYaCphh+lss....p+aaIlpaEPlhst......shlHHMlLatCs......pshsp..................sstC.....h.tphthC.......spllhAWAhGutshta.PccVGhslGtst.s.s+allLEVHYss..sshpsups .............................h...h-lhh.sl...hPpp..pssYhChhh...cls......pcpal..l.p.a.-Phhs................shVHHhll......at...Cs........psh.sph......................st.C...........hs.tph.t..sC................ppll..hAWAh.Gut....s..hph..Pc..csGhslGsss...sspYlhLplHYss..sth....th.......................... 0 156 180 269 +178 PF02845 CUE CUE domain SMART anon Alignment kindly provided by SMART Family CUE domains have been shown to bind ubiquitin [3-4]. It has been suggested that CUE domains are related to Pfam:PF00627 [4] and this has been confirmed by the structure of the domain [5]. CUE domains also occur in two protein of the IL-1 signal transduction pathway, tollip and TAB2 [2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -7.57 0.72 -4.44 132 1581 2012-10-01 23:03:33 2003-04-07 12:59:11 11 43 284 10 1034 1491 4 41.20 24 8.01 CHANGED ppstl..pplpphFPslspphIptsL.ppsssl-tslstlLphs ............thl..pplpphFPs.lspphlppsLt..tpsssl-tslspLLph.......... 1 287 499 788 +179 PF00190 Cupin_1 Seedstore_11s;Cupin; Cupin Griffiths-Jones SR anon Prosite Domain This family represents the conserved barrel domain of the 'cupin' superfamily [1] ('cupa' is the Latin term for a small barrel). This family contains 11S and 7S plant seed storage proteins, and germins. Plant seed storage proteins provide the major nitrogen source for the developing plant. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.78 0.71 -4.76 225 3563 2012-10-10 13:59:34 2003-04-07 12:59:11 17 24 631 221 1395 6143 837 141.10 18 62.47 CHANGED hshtpssp....hhpspuGtl...pthsspphs.hhtphthsththth.pssulhsP+ap..sAspllaVhcGpuhhshlssss...........................................................................................p.hppc......lppGDlhslPtGhshhhhsst..psshthsshssssss...................lst.hhtpsFhlusppsp ........................................................................................................p.t.su..h...p...h.s.s..pphs........tth..t..h..s..tsth....h.tss.u.h.h.....P...Hap..pA..s....c..lhal...l...p.G.......p..uhlsh.l.ssss......................................................................................................................................ph.hspp......lptGD..lhh......hPt..G...h..s..a.a.h.h...s.......s........t......s....s...sh..t.l..h.h...h..p...sss..t...................................hs..hh.psa.hs.................................................................................................. 0 198 814 1141 +180 PF04889 Cwf_Cwc_15 Cwf15/Cwc15 cell cycle control protein Mifsud W anon Pfam-B_6589 (release 7.6) Family This family represents Cwf15/Cwc15 (from Schizosaccharomyces pombe and Saccharomyces cerevisiae respectively) and their homologues. The function of these proteins is unknown, but they form part of the spliceosome and are thus thought to be involved in mRNA splicing [1]. 27.20 27.20 28.10 27.50 27.00 27.10 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.91 0.70 -4.66 30 382 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 282 0 279 376 9 193.80 37 96.06 CHANGED MTTAHR....PTac.PA+GtpspuG......otpasSRsLPuHTpLK...........hRps.GQssp-ch.......t...p+DLRsEL.ctEtpptscc..........................thhth-stspppshch....spcp.cc..h.........................K+.h.............tpspshDADc........tp.............ssssD--.................uD...........s--DEsttLhtELE+IK+ERtEcct+c-cEtttpctcp+cpclhpGNPLLN.................ttssFslKRRWDDDVVFKNpA+s..cpt+c.pFlNDslRS-FHKKFhsKYl+ ...................................................MTTAtRPTacsAp.Gtpst..tt........otthpsR.LP.uH...........TplK..................hRps..sQ.s.....s.t-h.................p..t+-h+tEL.ptEttthtpp...................................................ttt.t......t.t.................................................................................................st..shDuDs.s..............................................tppp-p-................tc.........psc-D-s.ttLh..tELp+IK+ERtpcpt+cEtcpttp-tc.+.tplhpGNPL.LN.........................tsh..plKR..RWDDDV.V..F......K...NpA....+u....cptc......c....FlNDhlRS-FH++FhpKYl+....................................... 0 99 157 228 +181 PF04677 CwfJ_C_1 CwfJ_N_1; Protein similar to CwfJ C-terminus 1 Kerrison ND anon DOMO:DM04663; Family This region is found in the N terminus of Schizosaccharomyces pombe protein CwfJ (Swiss:Q09909). CwfJ is part of the Cdc5p complex involved in mRNA splicing [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.63 0.71 -4.46 8 549 2012-10-01 23:45:21 2003-04-07 12:59:11 10 23 278 0 412 1265 265 125.80 29 20.81 CHANGED spcp++hpps.csChFClsssslccHLlVSlGppsYluLPc.ssL...........spsHslIlPlpHhsss.hols-....-lh-EIppF+KuLstM.asupspDslFaEhs...spRs.HhplpsIPlPpphuch..ushhFp ..................................h...pp.ttt.spChaC....h...s....s.....s.....p.....h.....t....p...H....l...ll.o.l.Gsc.............sY.L.s..lsp..ssL....................ssG..HslIlPlpH.hssh.......sphsc................-sh.c......E....l....p......p....a.....+....p....u....L....p.c..M.....a..p....s.......p....s....p...s.......s.......l.....h..a...Eps..............t+.p......hHhthps.....lP..lPtphsphh...a............................................................ 0 144 225 337 +182 PF04676 CwfJ_C_2 CwfJ_N_2; Protein similar to CwfJ C-terminus 2 Kerrison ND anon DOMO:DM04663; Family This region is found in the N terminus of Schizosaccharomyces pombe protein CwfJ (Swiss:Q09909). CwfJ is part of the Cdc5p complex involved in mRNA splicing [1]. 21.30 21.30 21.50 21.30 21.10 20.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.14 0.72 -3.50 48 528 2009-01-15 18:05:59 2003-04-07 12:59:11 9 19 269 0 399 536 6 99.10 26 16.57 CHANGED castpt.hchtshpt...........hp..hspshsYFhVpls......................hsp..shhphl-.....................cpp..cFslp..FuRcVlus..lLsL.pRhpW+c..stpsppcEctcstpF+ptacsF.DaT ................................................h.ph.t.........pshcp..hs.pshsYFtVphs.............................hss....shs.Hhlc..............................................cpt..c.FPhp..FGR..-Vluu..hLs.lt.s....+...tW+p.....sppstc-cppcstpF+ct..ac.aDaT.......................... 0 140 219 326 +183 PF01705 CX CX module Hutter H, Bateman A anon Hutter H Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 6 conserved cysteines that probably form three disulphide bridges. 25.00 25.00 25.80 26.40 24.00 24.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.31 0.72 -4.00 14 114 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 8 0 108 106 0 59.90 32 25.21 CHANGED YYWsspa....lpsscpP.shCEYpIs-..-DtELpNVsFsNGo+PpSlhFuCsst.ppCCGh-CCssh ....................YYWttta.......hts.pts..hCpa.lsp..pDhp..lpslpFssGop...P.pslhFuCtts.ppCCGhcCCt....... 0 30 40 108 +184 PF04673 Cyclase_polyket cyclase_polyket; Polyketide synthesis cyclase Mifsud W anon Pfam-B_5596 (release 7.5) Family This family represents a number of cyclases involved in polyketide synthesis in a number of actinobacterial species. 23.70 23.70 25.00 53.00 23.30 23.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.28 0.72 -3.95 23 122 2009-09-11 03:24:20 2003-04-07 12:59:11 7 2 90 1 40 120 0 96.20 46 87.19 CHANGED MsPususcVAclFuEpD...sTELP+lhGspRRpLFpa+s.LYFHLh-.ssscssssltpA+sHPpFhclSccLpsalssYDP.sTWRuPpD.AMAppFYpWsA .....MsPususcVAclFA-SD...uoELP+hhGVpRRpLFpacs.LYhHLlE..s....c....csssttltps..+.s..cP...cFhclSccLpsalssYDP.tTWRuPpD.AMAppFYpWp... 0 10 30 38 +185 PF00134 Cyclin_N cyclin; Cyclin, N-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Cyclins regulate cyclin dependent kinases (CDKs). Swiss:P22674 is a Uracil-DNA glycosylase that is related to other cyclins [4]. Cyclins contain two domains of similar all-alpha fold, of which this family corresponds with the N-terminal domain. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.54 0.71 -4.57 127 6213 2012-10-03 00:42:12 2003-04-07 12:59:11 18 86 547 190 3707 6602 83 123.90 20 31.34 CHANGED -IapahpphEtp...t.s.saht.pp.....ls..pMRsILlDWLlc.VppcacLh.ETLaLslshlDRFLutp......l.+pcLQLlGlsuhhlAuK........aEEh...............hsPplp-ashlo.....Ds.sas...............pcpllpMEphlLpsLpapls ......................................................................................................................hp....th.Rt.h.h...h...p..a....l............h..........p........l..t....p.p....h...p...........l........t..t...p..T.h..h.h...u.l......s.....h.h.......-.......R...F......htpt..s.....................lpp...p....p......h.......p...L....l....u......s..ssl.h..........l.....Au...K.............................h..E..Eh...................................................s.s..p.l..p.c.h..h.....h.hs.........cp..shs.................................................tpp..lhphEh.hlLpt.Lpapl....................................................................................... 0 1218 1928 2867 +186 PF02276 CytoC_RC Photosynthetic reaction centre cytochrome C subunit Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_5109 (release 5.2) Family Photosynthesis in purple bacteria is dependent on light-induced electron transfer in the reaction centre (RC), coupled to the uptake of protons from the cytoplasm. The RC contains a cytochrome molecule which re-reduces the oxidised electron donor. 22.40 22.40 22.50 26.80 20.30 22.30 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.26 0.70 -5.32 25 104 2012-10-01 23:37:15 2003-04-07 12:59:11 13 3 88 20 28 104 73 250.30 32 79.94 CHANGED h.ppP.ss-shQoG.RGsGMptsp.scslsp.sss..shss.stsssss..usPpAu-lYpNVp.VLGDLosupFsRLMsAhTpWVu.Pc-GCsYCHss.p..sausDshYTKhVuRpMlpMTQplNssWss.HV...upsGVTCYTCHRGpPVPstlWap..ssshssuhtGhsssQNtuss..............ss.saoSLPsDshp.aL....L-.....scsI......+Vpshsuhs...ssssc...s.lppAEhTYuLMhHhSsSLGVNCTaCHNoRAFa-W.sQuTPQRssAahGIcMVR-lNssYltPL.pshhPspRLGPh.GDssKlsCtTCHpGshpPLtGsshlcDaPELAss .......................................P.....s.QhG.RGhuh......t...t......t..s.s...s....stshAsp.hapNVp.VL.tcls.spFs+lMsuhopWlu...pps..CsYCHs....t.......shAsDshapKhVuRpMlpMTpplNs...pW.....ps..Hs.......stsG....V...oCYTCHRGpshPt.hW...hp.......tt.h....t..ps..t.................s.thsuLP.Dshp.aL.....t......tt.l......pl.s........puhs.....t..tt...s.hppsEhoauLM..ahSsuLGVsCsaCHNopsFhsh.p..p..ssPp+shuhhulpMsp-lNtpah.sh.t..hPt..phh......G...s-s.+hsChTCHpGh.pPL.G..h.phhtpa..L..s................................... 0 14 24 25 +187 PF05038 Cytochrom_B558a cytochr_b558a; Cytochrome Cytochrome b558 alpha-subunit Moxon SJ anon Pfam-B_5327 (release 7.7) Family Cytochrome b-245 light chain (p22-phox) is one of the key electron transfer elements of the NADPH oxidase in phagocytes [1]. 25.00 25.00 26.50 25.80 21.30 23.30 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.15 0.71 -4.66 4 85 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 62 1 43 84 0 159.20 60 95.33 CHANGED GpIEWAMWANEQALAoGLILlsGGIVusAGpFppW.FGAYuIAAGVLVCLLEYPRGKRsKG.oThERsGQ+hLTtsVKshGPLoRNYYlRAhLHLulsVPuGFlLATILGssCLsIASlIYLhAAl+GEpWpPI.s+.c-.Rs.VGtoIKpPPoNPPPRPPsEhR+KsuE-.sssA.........NPhsVTspsV ............GpIEWAMWANEQALAuGl.lLlsGGIVu.......sAG.pFptW.F....uA...YuI..sAGVhVCLLEYPRG.KRpKG.oTMERsGQ+YhTslVKhFGPLT...RNYYlRAhLHhh.LuVPuGFLLATILGTsCLuIAShIYLLAAlRGEpWp.PIE......s.+scp..RsplGsoIKpPPoNPPPRPPs-sR+K.s-t.t.ss...........NPhsVp.................................... 0 14 17 26 +188 PF01820 Dala_Dala_lig_N Dala_Dala_ligas; D-ala D-ala ligase N-terminus Bateman A, Moxon SJ anon PSI-BLAST 2dln Family This family represents the N-terminal region of the D-alanine--D-alanine ligase enzyme EC:6.3.2.4 which is thought to be involved in substrate binding [2]. D-Alanine is one of the central molecules of the cross-linking step of peptidoglycan assembly. There are three enzymes involved in the D-alanine branch of peptidoglycan biosynthesis: the pyridoxal phosphate-dependent D-alanine racemase (Alr), the ATP-dependent D-alanine:D-alanine ligase (Ddl), and the ATP-dependent D-alanine:D-alanine-adding enzyme (MurF) [3]. 21.60 21.60 23.00 21.60 21.30 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.29 0.71 -3.78 176 7730 2009-01-15 18:05:59 2003-04-07 12:59:11 16 22 4347 83 1553 5359 3003 82.10 29 32.44 CHANGED h+..lullhGGpSsE+-VSltSApslhpuL....p.p..ppaclh.......l.hls.cpG.tahhhtt..t.ht..t......................................................htphDllFslLHGs..hGEDGslQGlL-hhslPYsGs ............................................................plsllhGGhSsE+plSl.Sutslhtsl..................p......tth.p.sh...............h...h.....p....t................................................................................................................................................thsh...ha.hlHG....GE..DGslQGhhchhtlPasGs........................................ 0 540 1043 1317 +189 PF01113 DapB_N DapB; Dihydrodipicolinate reductase, N-terminus Finn RD, Bateman A, Studholme, DJ anon Prosite Domain Dihydrodipicolinate reductase (DapB) reduces the alpha,beta-unsaturated cyclic imine, dihydro-dipicolinate. This reaction is the second committed step in the biosynthesis of L-lysine and its precursor meso-diaminopimelate, which are critical for both protein and cell wall biosynthesis. The N-terminal domain of DapB binds the dinucleotide NADPH. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.49 0.71 -4.20 99 5206 2012-10-10 17:06:42 2003-04-07 12:59:11 15 17 4187 43 1251 5533 3347 112.90 28 41.03 CHANGED l+lslsGAsGRMGppllcslpp.p......sshpLsuul-css......t.shs...........t.....sl.l...sclppshsp..sD..........VllDFT.pPpushpplchshpt.shshVlGTTGa...opcphpplpp.hu...cc..lsllhusNa ........................................h+lslhG.stG+MG....p....t.l.l.p..slt.p...t........p.s..h.p.L..s..u..s..l-pss...............................................................t.ht......h...s...l.....l............ss..l..s...s.l.hsp.........sD.......................................V.ll.......D.F.......T.....p..P..p...s....s........h.....p..p...l..p...h....s...h...pp...s..........h.....s......h.V..l......G...T.T.G.h........sptt..h..t...t.lpp...hu...pp..........hsh.lhusNh........................................................................................... 0 372 808 1057 +190 PF01682 DB DB module Hutter H, Bateman A anon [1] Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 12 conserved cysteines that probably form six disulphide bridges. This domain is found associated with ig Pfam:PF00047 and fn3 Pfam:PF00041 domains, as well as in some lipases Pfam:PF00657. 25.00 25.00 28.80 25.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.97 0.72 -11.48 0.72 -3.96 26 351 2009-01-15 18:05:59 2003-04-07 12:59:11 14 38 33 0 314 315 0 96.30 27 28.50 CHANGED CCpspt.lsstChp.hCsapsh......h.shhhtsspCsh..phsplhpCAupscDHosCCtcpuVs.......spChshCp.p.s......hsshthshhsChp.phsshhpCFh ..................................CCppps..l.sstChs.hC..sapth.......................p.h.hhtsspCsh..phsphhpCAup..G..+.............D..HopCCt..cpGls.....................spChshCpsp.s......hsslthshhsChp.thpshhpCa............................................ 0 111 144 302 +191 PF05011 DBR1 Lariat debranching enzyme, C-terminal domain Wood V, Bateman A anon Pfam-B_9676 (release 7.6) Domain This presumed domain is found at the C-terminus of lariat debranching enzyme. This domain is always found in association with Pfam:PF00149. 21.10 21.10 22.50 21.40 20.20 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.26 0.71 -4.31 24 333 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 279 0 244 330 6 132.20 28 26.79 CHANGED lPc....os+s..T+FLALDKCLPtRcFLQll-lsstsss............hpLpYD.EWLAIh+shsphhp......l.sshss.sstspG....c.tacshhE-ppphVpEcl.ttscLpl.PcNFshTAPsacsu..sth...sst.PstatNPQTspFCcLLGlcshh .....................................h......tsps...T+FLALDKCLPc....R...c....F.....LQl..l-ltstssss............................................pLpYD.EW...L.....uIh+shpshhs..............................l.t.p.........stp.t..................hts.....p.pthp.l..pp...l...........t...t..p......htl....P.p....sF.....T.ss.s.ass....s...............................hhNPQTttasthlsl....h....................................................... 0 86 135 205 +192 PF03107 C1_2 DC1; C1 domain Bateman A anon Pfam-B_16 (release 6.5) Domain This short domain is rich in cysteines and histidines. The pattern of conservation is similar to that found in Pfam:PF00130, therefore we have termed this domain DC1 for divergent C1 domain. This domain probably also binds to two zinc ions. The function of proteins with this domain is uncertain, however this domain may bind to molecules such as diacylglycerol (A Bateman pers. obs.). This family are found in plant proteins. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.83 0.72 -3.83 90 558 2012-10-02 13:15:50 2003-04-07 12:59:11 11 57 24 0 370 664 0 31.30 34 11.07 CHANGED hhCslC.c+phssh......hYpC.ppss........aslHsp.Cu ..........CslC.ccp..lssp.....hhYpC..pcCs........aslHsp.Cs. 0 312 351 358 +193 PF00383 dCMP_cyt_deam_1 dCMP_cyt_deam; Cytidine and deoxycytidylate deaminase zinc-binding region Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.79 0.72 -4.37 59 17292 2012-10-02 00:10:39 2003-04-07 12:59:11 17 96 5079 154 4759 12505 5590 105.20 26 47.17 CHANGED hp.cchhhphAhthAppuh..stsphsVGAllVp..sspllupGhNtp.tuts......................................................shHAEhsAltpAsp....t.ph........................s..sslal.............ThpPCs.h......Csphllpt.u.lp+Vlhst ..................................................................................................................................................p.hhphAlp.h.Appuh.......st.sphsV.G.A.ll..V...........p....................s........s........p..........l...........l..........u.........p..G.h.N.t.p.....t.s.ts.............................................................................................................................s.hHAEh.p...Alpp......Aspt....t...t.ph............................................................................................................t.s.....sslYV.......................................T.l...pP..Csh................Cspsll...ps...t..ltclhhh.t............................................................................................................ 0 1638 3063 4050 +194 PF05026 DCP2 Dcp2, box A domain Wood V anon Pfam-B_10622 (release 7.6) Domain This domain is always found to the amino terminal side of Pfam:PF00293. This domain is specific to mRNA decapping protein 2 and this region has been termed Box A [2]. Removal of the cap structure is catalysed by the Dcp1-Dcp2 complex [3]. 28.00 28.00 29.10 29.90 26.80 27.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.02 0.72 -4.09 23 313 2009-09-11 21:45:37 2003-04-07 12:59:11 8 12 259 7 225 319 2 85.10 41 13.28 CHANGED cchL-DLssRFIlNlPpE-LsolERlhFQlEEApWFYpDFlR.hsP..tLPshsh+sFupplhp+....CPLl.pa......ttshcpALpcFtpYK ...........clLDDLssRFIlNlPpEE...l....p....sh.........RlhFQlEpAaWFY.DFhp.............t.......t.s......P......sLP.sh.s.l+sFut..t.............l.F....p+......CPh..Ltta......ttchcchhpcappYK............... 0 83 130 190 +195 PF03607 DCX Doublecortin Griffiths-Jones SR anon PROSITE Family \N 21.40 21.40 22.10 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.58 0.72 -4.38 53 1063 2009-01-15 18:05:59 2003-04-07 12:59:11 12 41 119 7 602 931 2 60.20 32 14.44 CHANGED lllsp.+phpoa-slLsclTc....plpLs.tG.VRplaThcG.+plsslcc.LpsGpsYVsu.upE.tFK ......................hhlsp.+ph+oF-slLs-lTc.......tlp..L.....s...pG..V+plY.Th-G.+p.....l..ssLp-..l...t..-...u.ps..aVss.u..E..F+........... 0 142 196 349 +196 PF03455 dDENN dDENN domain Callebaut I anon Callebaut I Domain This region is always found associated with Pfam:PF02141. It is predicted to form a globular domain [1]. This domain is predicted to be completely alpha helical. Although not statistically supported it has been suggested that this domain may be similar to members of the Rho/Rac/Cdc42 GEF family [1]. 21.10 21.10 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.47 0.72 -3.90 46 1393 2009-01-15 18:05:59 2003-04-07 12:59:11 14 84 197 2 782 1322 6 70.50 27 5.94 CHANGED lsp.plpcsFLphhsp..lhtsYcsalph.p..tt...............tFpppuFlcs.pspshp.....pFlpphhco...QhFspFIcc+ ...........................sttlptsFl+hhsp...lht..s......Y....c.palphhp...tt.p............................................sF.pp....p.uFLps...psps.p.......pFl.pphh.cT...QhFttFIpp+............................................ 0 198 284 494 +197 PF02791 DDT DDT domain Iyer LM, Aravind L, Bateman A anon [1] Family The DDT domain is named after (DNA binding homeobox and Different Transcription factors) and is approximately 60 residues in length [1]. Along with the WHIM motifs, it comprises an entirely alpha helical module found in diverse eukaryotic chromatin proteins [2]. Based on the structure of Ioc3, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [2][3]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. In particular, the DDT domain, in combination with the WHIM1 and WHIM2 motifs form the SLIDE domain binding pocket [2]. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.84 0.72 -4.20 42 658 2012-10-07 04:36:59 2003-04-07 12:59:11 12 81 241 0 419 641 5 61.50 26 4.04 CHANGED sp.shschL......tlacFLpsFuclLpLssF.....oh--FtpAltspssp....LhsElHhsLl+h..llps.ps ............s.pthuchL................hlacFLpsFuclLtLs.sh...................ol.-.s.h.ppAL.......h.spssp......................hhsElphsLL+h..lhpt.t..................... 2 113 213 327 +198 PF00270 DEAD DEAD/DEAH box helicase Bateman A, Bruskiewich R, Sonnhammer ELL anon Published_alignment Domain Members of this family include the DEAD and DEAH box helicases. Helicases are involved in unwinding nucleic acids. The DEAD box helicases are involved in various aspects of RNA metabolism, including nuclear transcription, pre mRNA splicing, ribosome biogenesis, nucleocytoplasmic transport, translation, RNA decay and organellar gene expression. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null --hand HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.48 0.71 -4.76 182 63415 2012-10-05 12:31:07 2003-04-07 12:59:11 24 708 5596 153 23464 73599 16995 166.80 22 21.58 CHANGED osl.Q...tpsls...hlh..p.s............cD.ll........stutTGoGKThsahl...shl...ptlht............................................................................................................t................................................................................................................pslllsPT+pLspQhhpph...pphhph..............h.l...........psshl...hG........shs..hp.pptthl.........ps.scllluTPsc.....lhshlpp....th.th........hp.lph..lVlDEscphhs.........sats....plpp......lhpt..h....................t.................hp.hlhhSAThs.pslpcl ............................................................................................................................................................................................................................................................Q...tp..s.l.........hh......p..s...............................ps....h..l.....................st.u...t..T...........G...o....GK............T.....h.....s...h..h..l........s.h..l........p..t..l.....pt...t...........................................................................................................................................................................................................................................................................................................................................................................psl..l...l...s...P...s...+.....t....L.......A...t....Q..........h.......h.......p...p....h...........p.p..h.h.tt................................t.h.......................................................ps.s..hl................hG...................................s.hs.........hp.......p..p...hphl...............................pt.t....s...c..l.........l....l..u.....T.s..ut.............................l..h..s...h...lpp...............h...th..................................pp.....l....p....h.........l.........l.l..D.E...s..c..c...h.h.s..................................sh.t..................................p.l.p.p..........................lh.ph.....h.....................................p........................................hp..hlhh..S.A.Ths..tt....h................................................................................................................................................................................................................................................................................................................... 0 8187 14068 19550 +199 PF00531 Death death; Death domain Bateman A, Griffiths-Jones SR anon Reference [1] and [2]. Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.70 0.72 -4.21 88 3125 2012-10-01 21:41:45 2003-04-07 12:59:11 17 542 171 79 1743 2836 0 80.60 19 10.09 CHANGED pplhphl..sphhsppWpplu.cpL...slspsplcphcp...ps......ptshplLptWtpp......puolssLhpsLpphstpcssct.lpphh ................................t......h....sp.hspsW...p..pL....A..ppL.......sh.s..ppplp..tlcp.............pp...t....ptshp...lLptWtpp..........t.....t.s......u..........s......l.......s..p..LhpsLpp..hs..ppchsc..lt...t.......................... 0 804 885 1205 +200 PF04626 DEC-1_C Dec-1 protein, C terminal region Kerrison ND anon DOMO:DM04594; Family The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing [1]. Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). Alternative splicing generates different carboxyl terminal ends in different protein isoforms, so this is region is the most C terminal region that is present in the main isoforms. 25.00 25.00 88.40 36.70 24.70 24.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.84 0.71 -3.99 2 24 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 11 0 11 30 0 120.90 52 10.72 CHANGED MPSMMQREVEDEDNKAEDDLVGEAGPQMPENEGTARHKVDALGVGGNKRKKSKSKSAPPTVINYYYAAPQRPVVQSYGTSYGGGGYGSNAYGVPRPVNSYQSQGYRAAVGNDEVDEMLRQHQTMARTINPKQ ....................t.....Qpt.p.tppsct.p..-sll.GEAtPQMPEstGpARHK.VDhLGlGGs+..RKKSK....upou.PsVINYYauuP....p.............h...........h.....s.............SYGTSYG...GGGYGS...NAYGs..........h.s...NsYQ...t......GYR.AAVGNDElDpMLRQHQTMAps......p............... 0 3 3 8 +201 PF02141 DENN DENN (AEX-3) domain Mian N, Bateman A anon IPR001194 Family DENN (after differentially expressed in neoplastic vs normal cells) is a domain which occurs in several proteins involved in Rab- mediated processes or regulation of MAPK signalling pathways [1]. 23.40 23.40 23.40 23.40 23.30 23.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.14 0.71 -4.57 40 1791 2012-10-02 14:18:06 2003-04-07 12:59:11 16 97 243 2 1055 1806 19 178.90 27 16.35 CHANGED hsPcshCllS+hsahssF.cchLstlhphhtpus..............hslcsaltshltpsshPsPGpshph.p...husp-hhhhspPt.sspL..Ph..pssshp.....tLapsLuscNllpLasssLh.Ep+IlhhSpchshLopsscAlsuLLaPhpWQasYIPlLPspLl-sL.sAPsPFllGlpuphhs.....hhps.s-lllV..DLDss ......................................................................................................................................h...pshsl.lS.p.hshhpha.p..........c...hLt...lhphhh.ps.......................................................l.p.t...h..l..tp......l..hph.s...h..P.s.s....u..ps.hhh...........................ts.t.thh..h..h....pt..P......sspL.......sh.......pshsh.p.........................l...h..p.tL.u..l..c..sl..l..p........lh.....sshLh..Ec+l..lhhSp.ch.................s..h......Lo....t.ssc...u........l..su..ll..........a..P..hp....W...p....a...s..a...IP...lL..P..s...p.....h.....h...c.........h.l....s...............u..PsP.........al.lGlp.u..p..hhp.....................t-l..l.hl..DlDs............................................. 0 352 490 745 +202 PF00610 DEP Domain found in Dishevelled, Egl-10, and Pleckstrin (DEP) Ponting C, Schultz J, Bork P, Martemyanov K, Thorner J anon SMART Domain The DEP domain [1] is responsible for mediating intracellular protein targeting and regulation of protein stability in the cell [2-3]. The DEP domain is present in a number of signaling molecules, including Regulator of G protein Signaling (RGS) proteins, and has been implicated in membrane targeting [4-5]. New findings in yeast, however, demonstrate a major role for a DEP domain in mediating the interaction of an RGS protein to the C-terminal tail of a GPCR, thus placing RGS in close proximity with its substrate G protein alpha subunit [6-7]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.43 0.72 -4.23 133 2616 2009-01-15 18:05:59 2003-04-07 12:59:11 16 134 308 16 1545 2411 15 72.20 25 9.41 CHANGED ulplpcc+h..thps..atpsFsGs-hVcWLh........h..pt.htsRpcAlpluptLl..............ppGhlpplpscp.t..............Ft-s.thhYpF .......................lps+ch...hh..ps..h..psF...s.....G.s...-lV-WLh......................p.h.....p....hps.RpcAlpluptL.l....................................................cpG..hlpp.lssct.p...........................Fp-s.thhYpF..................................... 0 410 638 1072 +203 PF02272 DHHA1 DHHA1 domain Bateman A anon Bateman A Family This domain is often found adjacent to the DHH domain Pfam:PF01368 and is called DHHA1 for DHH associated domain. This domain is diagnostic of DHH subfamily 1 members [1]. This domains is also found in alanyl tRNA synthetase e.g. Swiss:P00957, suggesting that this domain may have an RNA binding function. The domain is about 60 residues long and contains a conserved GG motif. 21.20 16.00 21.20 16.90 21.10 15.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.45 0.72 -4.27 133 12856 2009-01-15 18:05:59 2003-04-07 12:59:11 14 38 4786 16 3060 9901 3277 66.90 22 10.25 CHANGED ppsllhhs......pssphpsssRs.pslshp.......pllpphtt.hsh...tGGG+spsAuushppsp...........lpphlptlpp ...................................t...hllhs.........ttss....p...l...p...hssR....S..h.ps..lshp...........pllc.ph..t........h.h.....pGGGHsts..A..us.s.hpsss.............lpphhpth..t........................... 0 1073 2042 2615 +204 PF04922 DIE2_ALG10 DIE2/ALG10 family Wood V, Bateman A anon Pfam-B_9570 (release 7.6) Family The ALG10 protein from Saccharomyces cerevisiae encodes the alpha-1,2 glucosyltransferase of the endoplasmic reticulum. This protein has been characterised in rat as potassium channel regulator 1 [2]. 19.70 19.70 29.00 23.10 18.30 18.20 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.52 0.70 -5.44 5 365 2012-10-03 03:08:05 2003-04-07 12:59:11 7 9 246 0 265 367 4 294.00 27 77.66 CHANGED s+hVPEPYMDEIFHlsQAQpYCcGcao..pWDPhITTPPGLYllSlu...sLpPu..hhusSsloh........LRhlNhLsuV..hhhshLlhRhIplhN.t.u..slsahAloLusaPlLaFFoFLYYTDluSlhhVLhuh...LshsaGsh+s...SAFhuslSshF....RQTNIIWsuFlAso.hhs.phu.tp.pllQcphs-...........LRohlpaLK.........hFl+.....SlccFSsLlL.....................PYhhlhluFhlFllWN.GuIVLGDKSuHpAuLHluQIFYFhsFsAhFSaPhaISsNhl+Hh++p..lp+phsppShlllulVhLlsaF.ThVHPFLLADNRHYTFYlWRRllsp..+hlh+ahLsP.......uYlauh....ashtslosp...........hs+loWpLLahlsTlloLVPuPLlEFRYYILPYllWRL .............................................................s.......ah....DEhFHl.Qs.tYhp........t.....pa.............WDshITT.PGL....Yh.h.uhh............h.sh..h......t..h..s.....................................LR.hs..h..hh..h.......h..h..hh.h.....p.......h............................h...t........................................................................................h..........s....h....s..l.....hhP.haha.hLaYTDhhShhh....ll..h...................h....h...........t.....h.....................s.s.......h....h...u.hh.......uhhh........RQTNlhWhh....h.h.hs................................h..........................................................................................................................................................h...........................................sah.h.hhhFhhFlh.hN.tulsl..................Gc+ptH.s.s..hHhsQhhYh...hhh.h.h...s.h....s...h............h...........................................h......h..............................h.........................................................h........h........h............h....h....h....h........hhh..ohh......H.ahLADNRHYhFYlaphhh............hhthh..hhs.......................hYhhsh...........h..h.h...h......................................................h.......hh..h..h.shhhshhst.LhE.RYahlPhhhh...................................................... 2 82 142 217 +205 PF01843 DIL DIL domain Bateman A anon [1] Family The DIL domain has no known function. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.40 0.72 -4.15 56 1158 2009-09-11 05:00:57 2003-04-07 12:59:11 14 77 276 3 698 1062 2 99.80 30 7.74 CHANGED plhsQLapaIss.hhNsLlhR+sh...soappGhpl+.hslspl-cWscstshpts...shppLcplhQAspLLpl.pKpphp.-hchltphCs...sLsstQlh+llstYpssshp .................................QlhsQlFhhlss.hhN..........s.Llh..R+ch..................soasp.Ghpl+.hsls.pLE.cWhcs...ps.......ht.u.................uhppLp............l.hQAspLLph..p.....K..tp.p...shphlpshC...........................tLost.Qlh+lls.Ytssp..t....................................................... 0 167 330 521 +206 PF03018 Dirigent disease_resp; Dirigent-like protein Griffiths-Jones SR anon Pfam-B_835 (release 6.4) Family This family contains a number of proteins which are induced during disease response in plants. Members of this family are involved in lignification. 27.60 27.60 28.30 28.30 25.30 27.40 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.86 0.71 -4.68 86 847 2009-09-13 07:42:01 2003-04-07 12:59:11 9 14 70 0 466 810 1 136.60 29 66.27 CHANGED phscl+hYh.HDhls......G.sssTu.stVssssssst.................tFGslsVhDDsLTcGsshs..Sp.h....VGRAQGhYlhsupp.sh.....shhhuhohVFss..Gcas....GSTlslhGts.h.hsps..REhslVGGTGpFRhARGashh+Th.............phpssssllchs.lplh..h ..........................................................................................phphYhH-hht...........u.sssos.s.lstsstss..............................hFGs.l.sVhDDsLTpGss.hs........Sp..h........VGRAQGhYhhs.u...ts...sh....shhhshs.hsFp....s...s..cap............G...STlslhGt.s.......h..hsps....+Eh.ulVGGT..GpFphA+Ga...shhpoh.............t.tsssshhchslhl......................... 0 35 260 373 +207 PF02377 Dishevelled Dishevelled specific domain Mian N, Bateman A anon Pfam-B_1381 (release 5.2) Family This domain is specific to the signaling protein dishevelled. The domain is found adjacent to the PDZ domain Pfam:PF00595, often in conjunction with DEP (Pfam:PF00610) and DIX (Pfam:PF00778). 19.70 19.70 19.70 19.70 18.70 18.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.64 0.72 -3.77 4 215 2009-01-15 18:05:59 2003-04-07 12:59:11 10 14 74 0 107 198 0 70.60 48 10.92 CHANGED pR-RsRRRsp.EpAshhpGpsh.GcpcRcsus.h-SSSTlLSSELEooShhDS-EDDohSRhSSSTEQSSuSRL .....................pRERsRRRpp.....E.cs...s....+.h...NGps..t...u.c.p..c..R...c.hu.u....h-SSSTlMSSELESTSFhDSDEDD.o..h.S...RhS......SSTEQSSuSRL..................... 0 17 27 58 +208 PF02916 DNA_PPF DNA polymerase processivity factor Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 25.10 26.10 24.70 24.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.20 0.72 -4.00 11 530 2012-10-02 11:47:48 2003-04-07 12:59:11 10 9 359 26 20 382 453 114.60 54 23.78 CHANGED hKhpKhThslLh.huhlsusshhuhtphl..hs+hsssosYSE..hshSsllstDssIpDlsplpSlluPsssDs-.....lstlpp.stslplssspos.sa.sA.pShlsucs+ulVh ..............KKAcKFTlhLLVhSlLVSSVoLFAlQQFVsLTNRLNuTSNYSE..Yol..SVsVhADS-IcNVoQL..o..S..Vs..APTus.DsENI.pcLLuDIK.o.opssDLTVs...p.So.SYLAAYKSLIuGE.oKAIVL........................................... 0 2 4 14 +209 PF01965 DJ-1_PfpI ThiJ; DJ-1/PfpI family Bateman A, Enright A, Ouzounis C anon Enright A Family The family includes the protease PfpI Swiss:Q51732 [1]. This domain is also found in transcriptional regulators such as Swiss:Q9RJG8. This N-terminal region of the full-length AdpA proteins is necessary for dimerisation of the molecule. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.94 0.71 -4.74 39 9251 2012-10-03 00:28:14 2003-04-07 12:59:11 19 42 4024 121 2389 11671 1005 150.20 21 67.66 CHANGED plsslusppst......hpsp.s...........hplpsDtslsclsspp........aDslllPGGhssscpLt............ttlhchl+chhpp.uphluAICpuP.hlLhsssllp....................G++hTua.shps........hpttstphh-..t.Vs....hss.............llTutssssshpFshpllctLt ................................................................................t.........................h.t.t.t.................ht.l.t..s..s.t.s.......l..s..c.l.p....s.sc....................aD.ull..l.P....G.....G...h.u...s..s..p....s..Lp.c.....................................ppl.h.p...h....l.....c.p....h....h..........p..........s.....s..........K............lu...A.lCpuP..th....L....s...s.s.....s.l.hp.........................................G.+.p..h.T.u...a..s....s..h.pp..........................th.p.t..h..u..s...p..a...h...c.............t...tss.......................hDt............................................pllTupsPsss.thuhtllp.l............................................................................................ 0 717 1413 1949 +210 PF00751 DM DM-domain; DM DNA binding domain Bateman A anon [1] Family The DM domain is named after dsx and mab-3 [1]. dsx contains a single amino-terminal DM domain, whereas mab-3 contains two amino-terminal domains. The DM domain has a pattern of conserved zinc chelating residues C2H2C4 [2]. The dsx DM domain has been shown to dimerise and bind palindromic DNA [3]. 21.70 21.70 22.20 21.70 21.00 21.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.05 0.72 -4.43 26 1027 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 215 1 380 930 0 45.10 62 16.36 CHANGED RsPpCuRCRNHGl.hs.LKGHK+h.Cpa+sCpCpKCtLlt-RQ+lMAAQ ........R.PpCARCRNHGl...hosLKGHKRa.Cc..........aR-CpCpK.......CpLlsERQRVMAAQ........... 0 112 146 271 +211 PF01068 DNA_ligase_A_M DNA_ligase; ATP dependent DNA ligase domain Finn RD, Bateman A anon Pfam-B_788 (release 3.0) Domain This domain belongs to a more diverse superfamily, including Pfam:PF01331 and Pfam:PF01653 [3]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.23 0.71 -4.95 46 3956 2012-10-02 00:43:09 2003-04-07 12:59:11 16 83 2053 23 1751 4137 1269 187.60 23 33.16 CHANGED PMLAp.hp..shtphhtph...................ttshhsEhKaDGtR..splH..t.pssphphaSRshcshTt........phsclhphlt.pthhssh.p..hlLDGElls..hs.pptphhPFttltp+h.+pph..........t..............phslshalFDlLhh.........sGps.L.......hphslpcR+plLpphh...................psclhlspthpssshc-lpchhcpslppGtEGlllKs...ssuhYcsu..+R.spsWlKlK .................................................................................................................................stsahhEhKaD..G.hR........s.ht....................h...ps.....s.......p........h........plh.S......R....s....sc.shos..................................th.s.p.l..h...t....hh..........th.....h...........h..........t..t.............p.............hlLD..G..Ells.............h........t..p.........t.....t......h...h.s...F.pt.ltpp.h......ptph...................................................................phtlphhsFDllhh..................................sG.ps...l....................hph...sh...pcR.+...p...h...L..cp..hh......................................s..s..s.p...h...p.h..s....t...t......h...............s......p............s..............p...p.........h..t.......p.........h.......h.....p........p.......s....h.......p.......t...u.......h.......E..GlhhKp..............hsu...Y.....psG.....+R.....tsWhKlK.................................................................................................. 1 555 1043 1451 +212 PF04679 DNA_ligase_A_C ATP dependent DNA ligase C terminal region Kerrison ND anon DOMO:DM04655; Family This region is found in many but not all ATP-dependent DNA ligase enzymes (EC:6.5.1.1). It is thought to constitute part of the catalytic core of ATP dependent DNA ligase [1]. 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.41 0.72 -3.58 150 2662 2009-01-15 18:05:59 2003-04-07 12:59:11 10 60 1269 10 1353 2742 442 102.70 27 15.67 CHANGED ppsth....GuLLLusacs...........sp.......LthlG+VGTGFsspphpcLtppLp.slt.....tsps.Ph...........t........sshW.l..........cPpl...VsEVpa.s-hTts..................G.....pLRaPp.ah.tlRpDK ........................................tuhhuuhLlGsacst...........................sp.......................................hhhlu.+s.uoG.ao-ppl.pp.lp..pp.Lp...sht...........hppt.sh...................................t..........tssha..l................................cPph....VhElpa.s.ph.stu...........................G...slRa.Pp.ah...+lRpDK.............................. 0 429 801 1111 +213 PF04675 DNA_ligase_A_N DNA ligase N terminus Kerrison ND anon DOMO:DM04655; Family This region is found in many but not all ATP-dependent DNA ligase enzymes (EC:6.5.1.1). It is thought to be involved in DNA binding and in catalysis. In human DNA ligase I (Swiss:P18858), and in Saccharomyces cerevisiae (Swiss:P04819), this region was necessary for catalysis, and separated from the amino terminus by targeting elements. In vaccinia virus (Swiss:P16272) this region was not essential for catalysis, but deletion decreases the affinity for nicked DNA and decreased the rate of strand joining at a step subsequent to enzyme-adenylate formation [1]. 21.40 21.40 21.40 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.99 0.71 -4.46 154 1767 2009-01-15 18:05:59 2003-04-07 12:59:11 9 63 1048 8 992 1785 366 162.80 20 23.21 CHANGED h.atpls.chhppl....pp.....souRhph.................sphlsshh...cp...h.ttp................lssslalhhspl.hPshp.sp.clGlup.phLh+.slupshs..hs.pplcp......thpp...........................hGDlu.sstphh......pppp.h.h...........psLTlp..c.VhppLpclAphsG..........ps......SppcKhphlppLhsps..ss.................tEt+allRhlhscLRlG.luctslhsAlup ........................................................h.ths.phhptl.........pt..................sstp.th.........................hp.hltphh.....pp...h.t.t........................hh.shhhhht.h.l.hs..........ht..t.................h...hsl.tp.p....h.lhc.hhsp..hhs......hs.....tthct..................phtt..........................................................hG..Dlu.tshtthh..............................pppp..h..................ssLTlp....p.V.ptLpplup...h.st......................................t..........spppph.p.h.......ltpl....hp.ps..ss.......................pEt+all.Rh.l.t.t.............c.l.R..l.G.hupphlhpAlu........................................... 1 319 572 820 +214 PF00875 DNA_photolyase DNA photolyase Bateman A, Griffiths-Jones SR anon Pfam-B_777 (release 3.0) Domain This domain binds a light harvesting cofactor. 24.70 24.70 24.80 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.76 0.71 -4.50 143 3841 2012-10-02 18:00:56 2003-04-07 12:59:11 13 21 2592 52 1233 3283 3027 163.30 23 33.49 CHANGED slhWFR.cDLRlpDNsALh....tA......hps...s..t.......llsla..lh-sp.........................ussptt..aLhcuLpsLpppL....pphG...spLhlhpG.s.......stphls....pl.sp........phs.sssVahscchpshtpppDpplpptLp.p.........slph..ppapsp.hLhp...Ptpl.......s...psu.psacVFTPFh+thhp....ph.............shss..Pp .................................lhWFR.cD.LRlpDN.ALt.....................tA............hps.....s..st................................l.ls.la..lhsPp..........t.t.......................................ustphp...alhpsLpsLppsL....tp.h...s..............hs..L...hlhp.u.s.................................shphl.....pl...sp....................................ph.....s.....ss.....pl.....a.hs.tp.h.........p.s...p.p.pR.D..p..t.l..p..p...t..hp..p.................t..s..l.th..pth.p.s.p...hlht...s.tpl...h............s......tps..p.aplFosF.hpthhp.hh.................h.h.................................................................... 0 406 772 1048 +215 PF00136 DNA_pol_B DNA polymerase family B Sonnhammer ELL anon Prosite Family This region of DNA polymerase B appears to consist of more than one structural domain, possibly including elongation, DNA-binding and dNTP binding activities. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.64 0.70 -5.71 67 6234 2012-10-02 01:06:00 2003-04-07 12:59:11 16 79 2425 290 1405 5186 2614 299.90 23 42.52 CHANGED Gpph+lhshLLctstpcshllPspppt......................................................................pptsYpGAhVl-Php.Ghapsslh....llDFuSLYPSIIhsaN.....LCaoTllts......................................................................................................thsphp.p........chhp..hh..t....hhsp..splppulLspLLcphlp......hR+th+pthtpsps.hp............phlhDtpQhAlKlssNSsYGhhGsssu.hL.shslAtolTshGRphlppTpchlcph.........h...................................shcVlYGDTDSlalphss.sh......................ptshphucchsptlsppl........hhpslcLEhEKsaptllLls.KK+YhGlhh..........t..sch.hKGl-hVR+sssphspphhppllchlhpcpsssps.......httlp............hhhchlps.......................htp.h.tttl..slsphlhoptLo..+shssYpspp...........Hlplst+htpcs...........tphPtluDRlsYVllps......................................................tptsh.hchAccsp.ah............l.lDscaYls.plhtslpplhpslhhs.................sshtpsphltthh.pp .................................................................................................................................................................................................................................................................................h.s.h.V....hp.s.......s.h.....a...t......l......hhDatuLYP.SIh.shp.............l.s.sshh..........................................................................................................................................................................................................................................................................................................................p..h.s...l...l...sth...ltphht.........................................hRpt.h.+...p......h.tt...t..............................hhs...Q....Ah.....KlhhNuh.YGhhG.s.t............t.u.....h.h...s.h...lA.s.slThhG......R.phlppscph.lct....................................................................................................................th.p.lhY..GDTDSh.hl....h.......................................................................t....t..h.t.t.h...ht...p.........................................t......phch-..th....a...........hhh.......................t....K.........K........p...Yh.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h......................................................................................................................................................................... 0 474 778 1158 +216 PF03104 DNA_pol_B_exo1 DNA_pol_B_exo; DNA polymerase family B, exonuclease domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family This domain has 3' to 5' exonuclease activity and adopts a ribonuclease H type fold. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.10 0.70 -5.57 32 3652 2012-10-03 01:22:09 2003-04-07 12:59:11 14 66 1910 166 1392 3971 1871 275.30 17 26.07 CHANGED hsp-stsshhpshshpsYFYhts.cspp..phtp......................................hhcthpsthhplchlp+.............psh.h.ts......hh+l.hss...h......plpp.h..............sthpha................................EhslshhtRahlDp.slhshsWhplpts............................ht.sphch.h.hpsl.sh.tpt......sthplhuFDIEshtttt....FP..-sppD.lltIShhh.t.s.......................hssshpphhaoltsps...............................sstlh.a..ssEhclLptahphlpphcP-llsuYNhssFDhsYlhsRsptl.......hshthp.htph..tth..............................ppppphphsGtlthDhathhpcch........phsSYpLssV ......................................................................................................................................................................h...........................................................................................................................................................................................................................................................................................................hhtl.hhtt..t.h....................tht..p...ltt.........................................thhp.ha..................................................................................................Es.sl..................+ahh-p....t....lh.....s.......s.W.h..php.t.................................................................................p...h...h.........t...p.h.....s.h...t.................................sshp.h....h.uh..DI.E.s.ttp.t..........................t...p...lh.t..I..u..........................................................httshtp.hh.h.h.l..t.s.p.s............................................................................................................shpl.....h......ssE....hpl.Lp.........thh.........t..hh....t....p...h..........c...P...D....l....l.h..GaN......l..p..FDh.h.lhpR..........s.p...t.h...........................h.t..h.t..........t..........ph.....tt.t..t..h.......................................................p.t..thh.p.htGplhhDhh.p.hhppth.........phtoapLpsl.................................................................................................. 0 451 750 1140 +217 PF04081 DNA_pol_delta_4 DNA polymerase delta, subunit 4 Wood V, Finn RD anon Pfam-B_25322 (release 7.3); Family \N 25.00 25.00 27.80 27.60 24.50 24.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.68 0.71 -3.85 3 194 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 154 0 132 178 1 116.20 30 71.61 CHANGED sG+KssI+DVaPVVVRcEusQuHp.K...uEpuP..............h+p-.ELu.tlEEP.......WNQIcuERhuEsl...HsEslTclEhlLRaFDlou+YGPClGITRLQRWpRAKphGLNPP.EVhpVLhL+EGDsEsRh+ESLaH .....................................................................................................tt.tt.................................................................................................................................................................pt..sthE.clLRpFDls.pYGPClGloRLcRWcRAppLGLs.PP.EVhslL.tcps.c................................ 0 38 71 100 +218 PF00772 DnaB DnaB-like helicase N terminal domain Bateman A anon Pfam-B_1000 (release 2.1) Domain The hexameric helicase DnaB unwinds the DNA duplex at the Escherichia coli chromosome replication fork. Although the mechanism by which DnaB both couples ATP hydrolysis to translocation along DNA and denatures the duplex is unknown, a change in the quaternary structure of the protein involving dimerisation of the N-terminal domain has been observed and may occur during the enzymatic cycle. This N-terminal domain is required both for interaction with other proteins in the primosome and for DnaB helicase activity [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.18 0.72 -4.15 156 5852 2009-01-15 18:05:59 2003-04-07 12:59:11 16 42 4498 45 1143 3975 2460 101.40 33 21.41 CHANGED t.Pps...h.-AEpulLGulLh..s..sc..shsplhshL.p...s-cFapttHphIFcshhcLhpps...pslDhloltppL.....cppsplcp.h.uGhsYLspLspssso...ss.slptYAc.llp- ................................Ppsl.-AEpuVLGulhl......c......s-..th.s...p.l...t..-..h..l...p....scDFYpts.HchIF.pshhcLhpps........csl.Dh.lT.l...tppL.........pp..ps.p.L.c.p....l....G.....G...hsYL.....s.cL....s.ps.s.Po....uA...NlphYAc.IVp-............................................................. 1 376 752 973 +219 PF03796 DnaB_C DnaB-like helicase C terminal domain Bateman A, Eberhardt R anon Pfam-B_1000 (release 2.1) Domain The hexameric helicase DnaB unwinds the DNA duplex at the Escherichia coli chromosome replication fork. Although the mechanism by which DnaB both couples ATP hydrolysis to translocation along DNA and denatures the duplex is unknown, a change in the quaternary structure of the protein involving dimerisation of the N-terminal domain has been observed and may occur during the enzymatic cycle. This C-terminal domain contains an ATP-binding site and is therefore probably the site of ATP hydrolysis. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null --hand HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.57 0.70 -5.50 36 6233 2012-10-05 12:31:07 2003-04-07 12:59:11 10 50 4730 57 1194 6420 5596 259.00 43 57.26 CHANGED GlsTGah-LDphos..Ghppu-LIIlAARPuMGKTAFAlslApslAhcp..............p.tsVulFSLEMuscQLshRhluspupl.......ssppLRs.Gpls..cc-ap+lspshspLscts.laIDDosslolsplRu+sRRL+pp..tslslllIDYLQLhpust...ts-s.....RppElopISRuLKsLA+ELslPVlALSQLSRslEpRs.DKRPhLSDLRESG.............................................................................................................................................................................................................................................................................................................................................................................................................................................uIEQD.....ADlVhFlaR--hYpp................cspttt..................................hsElI.........luKp..RsGssG.............oVp.....LtFpspas+Fssltt ...................................................................................................................................................................................................................................................................................GlsTG.ap-L.D.c.........h.T..u..G...h....p.t.u-.L.....IIlA........ARPuM..GKT.sFA....lNlups.s.A...h....p.p......................p....tsVs.lF..SLE.....M.u.u...c.......Q.....l...s...h....R......h.........l...u.....u.....t......u.pl.........................st.p+..l.....R......s......G......p......L...s.............-...-...-...............W............t.....+...l......s...t....s...h....s.......p......L........p....c...t........s......l.a..I..D......D.....o...s...u........l...o....h...s..-....l..R.........u.+....s....R....R....l....t..............p...................c....................t.........s.......l.s......lI.lID.Y....L....Q....L....h.p.uss.............ts-s...........Rp...pE.l.uE...I.S...RsLK.s...LA..K....EL...plPVlALS....QLs.R..u...l.............E....p......R...............s................D.......K....R.....P....h...h.........S......D..LRE..S...G..............................................................................................................................................................................................................................................................................................................................................................................................................................................u...I.E..Q.D.....ADllh...F.lY....R....-....-..h..Y.p..p...............p...s...p.t..ps........................................................hsElI...luKpR.N.G.s.h.G.....o.Vc...LtF.tpas+Fssh..t......................................................................................................................... 0 402 793 1014 +220 PF00226 DnaJ DnaJ domain Birney E, Finn RD anon Prosite Domain DnaJ domains (J-domains) are associated with hsp70 heat-shock system and it is thought that this domain mediates the interaction. DnaJ-domain is therefore part of a chaperone (protein folding) system. The T-antigens, although not in Prosite are confirmed as DnaJ containing domains from literature [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.02 0.72 -4.23 257 27964 2012-10-01 22:35:57 2003-04-07 12:59:11 26 628 5589 64 12023 23598 6923 62.00 36 16.78 CHANGED -aYclLGlsp.........sAopp-IK+AYR+LAhpaHPD+Ntsss.......................ApccFcclscAYclLoDsp+.RptYD ............................................shYplL..Glsp.............................sA..s.....t...p.-....I....K.........+...A...YR...+L..u..h........c...a.....H.....P..D+sssss...........................................................Apc+.....F.p..........c.....l.......s....c.....AY..-........l.........L.s.D.pp...Rt.YD.................................................................... 0 4164 7147 9951 +221 PF03351 DOMON DOMON domain Aravind L, Coggill P anon Aravind L Domain The DOMON (named after dopamine beta-monooxygenase N-terminal) domain is 110-125 residues long. It is predicted to form an all beta fold with up to 11 strands and is secreted to the extracellular compartment. The beta-strand folding produces a hydrophobic pocket which appears to bind soluble haem. This is consistent with the predominant architectures where the protein is associated with cytochromes or enzymatic domains whose activity involves redox or electron transfer reactions potentially as a direct participant in the electron transfer process. The DOMON domain superfamily, of which this is just one member, shows (1) multiple hydrophobic residues that contribute to the hydrophobic core of the strands of the beta-sandwich, and small residues found at the boundaries of strands and loops, (2) a strongly conserved charged residue (usually arginine/lysine) at the end of strand 9, which possibly stabilises the loop between 9 and 10, and (3) a polar residue (usually histidine, lysine or arginine), that interacts or coordinates with ligands [1]. The suggested superfamily includes both haem- and sugar-binding members: the haem-binding families being the ethyl-Benzoate dehydrogenase family EB_dh, Pfam:PF09459, the cellobiose dehydrogenase family CBDH and this family, and the sugar-binding families being the xylanases, CBM_4_9, Pfam:PF02018. The common feature of the superfamily is the 11-beta-strand structure, although the first and eleventh strands are not well conserved either within families or between families. 25.40 25.40 25.50 25.50 25.30 25.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.66 0.71 -4.19 83 1117 2009-09-13 17:03:55 2003-04-07 12:59:11 12 71 205 0 875 1100 7 114.80 18 23.84 CHANGED ps..sphplpWphs.tspplphplp..sp.....ssasulGFSs...pst.MsssDhllshsp.s...upsplpDta..........sssts.sphD...p.....pshphhs...st.psshhphpFpRplso.....s-s.pDhtl.tsssh.plla.AhG ............................................t...th.ltaph.....t.....s.p...t.l.phplp........sp........ssaluhGhSs............stt..M.......s.su.D.hhlshhs.s........sps..h.lp-ha....................sssp..s....sphD....t..............psh.ph..hp.................s.t.....p...s.s..h..h.hhpF.p.....R.lts.........s-s..t-h.l.....stsh..hllhu...................................................................... 0 378 488 741 +222 PF04124 Dor1 Dor1-like family Wood V, Finn RD anon Pfam-B_12640 (release 7.3); Family Dor1 is involved in vesicle targeting to the yeast Golgi apparatus and complexes with a number of other trafficking proteins, which include Sec34 and Sec35 [1]. 19.80 19.80 19.80 19.90 19.70 19.70 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.94 0.70 -5.98 7 407 2012-10-03 17:31:52 2003-04-07 12:59:11 7 12 287 0 282 582 28 253.10 23 60.75 CHANGED lpcLsspsl-pLc+...c.ttLsp-ttpl.tphpsLuhsNY+shlcsAcsppshhpphss.ctphssLhppl......................scLpptspcF.ppupplsEp.....p+hsphhhpppsplh-lLElPpLMs............................pClcpu..haccAL-LpuaspRLppphsp...PllpslssplcpshtphLspLlppLcss.lpLspsl+llsYLR+h.sshscsp.LRhpFLps..p-thLps....hlp.ls.sss..hlpphIphhRsphachlhQYhulFs..-ssh........h.sp.shssstlhs.ashsphoshhphlEthl.+t...lus.lcplhhphhhht.uFthsstDhcuhhs.hhpphlhpphppslppsh- ..............................................................................................................................h.p.......lt.p.tth..phptls....tpht.hlts...tpt....th..t..h...t.h....ttp.htt.l.....t.t.l.....................................................................thtt.......t.h....th....p.t..t.p......pp.........................p.p.s....hhpp.pp..l...-....lL-.lPpLhp....................................tshpt..s.............app.u.l.p..l..ta.hp.p.L...tp.............hh........s....p..............s......lhp..........tl.....tps.t..t...h.t.hh.t.p.Llt...L.p.t......lpL....stsl..+.hlsaLR+.h....s................h............p................t................p....hp...........hhtt......ptthhtt............................................................t.s..........hl...p+hlph...hR.phas....hls.Ypsl...Fs....pt....................................t.........th...........s.tlhp...a.....p.ht.h.h...ph.lct..L.ph............t.thpplh.phhhhs.uhshhshDht..h............................................................................................................................................... 0 96 157 238 +223 PF04173 DoxD TQO small subunit DoxD Kletzin A, Studholme DJ anon [2] Family Swiss:P97207 is a subunit of the terminal quinol oxidase present in the plasma membrane of Acidianus ambivalens, with calculated molecular mass of 20.4 kDa [1]. Thiosulphate:quinone oxidoreductase (TQO) is one of the early steps in elemental sulphur oxidation. A novel TQO enzyme was purified from the thermo-acidophilic archaeon Acidianus ambivalens and shown to consist of a large subunit (DoxD) and a smaller subunit (DoxA). The DoxD- and DoxA-like two subunits are fused together in a single polypeptide in Swiss:Q8AAF0. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.96 0.71 -4.57 7 130 2012-10-02 13:32:46 2003-04-07 12:59:11 8 6 104 0 41 859 161 154.60 30 51.50 CHANGED ah..lRlslGhhahsAhlR+tlLpPuKLsPsSouYVGtKhlpFLPpu.ushKshL.hll.s.sLLhshLlhFohlEhlhGLhhllGhhTRLsulsshshuhGhhLuAhWLGoTC.DEWQIuhLhsuuuhhlhhoGutph.ulDhlLh+Khpp...t.hlhlhp.........al.Lh ................................................................h.lRhshGahahsuhhR+h...l....h...t....s.....KLsP.s.u....stalG.Kh..p......aLP.pu.....hs.......h.....+....s........hl.thll.s.sl....L..a.......th.h....l..lF.oh...l...Eh.lhG.Lh.lllGlhTR....L...s...u.......l.......s...s........h...h....L....u....h....s.......l...h........L........u........u........u.....W..h..G.s.......T...C...h.....D.......E....W.......p....l.....u......h...l..h.....h.....u.su....h.sl..h..h..s..G...uGt.a...SlDhh.lh.p+ht......................hhhh............................................ 0 17 29 37 +224 PF00930 DPPIV_N DPPIV_N_term; Dipeptidyl peptidase IV (DPP IV) N-terminal region Finn RD, Bateman A anon Pfam-B_1017 (release 3.0) Family This family is an alignment of the region to the N-terminal side of the active site. The Prosite motif does not correspond to this Pfam entry. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.13 0.70 -5.86 43 2539 2012-10-05 17:30:42 2003-04-07 12:59:11 16 64 1098 252 1068 2839 947 293.20 19 41.08 CHANGED SsDtphlLlths........hpp.aRaShpusaalaDlps...........tphppLsss...........t.lphspaSPs..GptluaVhsNNlYlp...phsss..pthplTs..DG.....psslh.....NGlsDWVYEEE.huspsuhWWSPDup..........tlAahchs-otVshhphshassptt..........YPcshph+...YPKAGssN.spVpLhlhsl...psspsh......pltsss..tsp..DhYlspVsW..ssc.....sc..lhlphhsRtQsthplhhs...-h............ts.spsshhp.......p-ssssWl....chpptshhh.........pssspalhhsp.+sG....apHLhhassssp.....................t...tlTpGsW-V....hp.lhuhDtppshlYFtusc..cs....Ptp+plYplshp.......ssph..ppLosstspp........sssFSss.spaalhsapuPssPh ..................................................................................................................................................................................................................th...sst...s..........p........l........s....ah..................p......t......s.....l....ah......................t..tt..........t.htlT..........su...........hh...........................G....s.....t...h.....hh......p...............-..E..........h......t......................u.......h.....h...Wussup...................................hlhh.hphs.p..s.l..h..h....................................................t..th.........Y.Ph..........sG...t.....s....s.hp....l.hh.h.ph.........tt.t............................................h.....................t.............h...l......s..t...htW......ss......................tp..hh.h.hhsR....p..p...lhhh.....s................................................................................tp...tt.....hh.............................ppp.st..tal.........p....t.t.hhh....................................................sspph..hhh.p....+s....G.......ap....HLahhshsstt..................................................................hp.....tlT.....p.G.p.apV..........................tp...hht...hs...pp...pp....h...la...Fhusp.....pp............sh..p.....p.....p...lYp.l.shs................................ssth....ppl.o..s.p.t..spp..............................s.ss.hSs......s.....t.p.....ahl...hp.s.t.................................................................................................................................................................... 0 360 605 877 +225 PF05186 Dpy-30 Dpy-30 motif Wood V, Bateman A anon Pfam-B_13490 (release 7.7) Motif This motif is found in a wide variety of domain contexts. It is found in the Dpy-30 proteins hence the motifs name. It is about 40 residues long and is probably formed of two alpha-helices. It may be a dimerisation motif analogous to Pfam:PF02197 (Bateman A pers obs). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.99 0.72 -4.47 14 749 2012-10-01 20:11:07 2003-04-07 12:59:11 8 40 244 4 498 708 14 41.20 37 11.81 CHANGED ss+pYLspsVsPhLlpGLstlA+pRPpDPlpaLApaLh+pps ..................h+pYL.p.p.p.Vs.PsLhpGLs.pls......+..p........+P...s......DPl...c.aLApaLhcpp....... 0 197 267 394 +226 PF01414 DSL Delta serrate ligand Ponting CP, Schultz J, Bork P anon SMART Domain \N 28.90 28.90 29.50 28.90 28.40 28.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.66 0.72 -3.97 18 522 2012-10-03 09:47:55 2003-04-07 12:59:11 14 157 113 2 298 473 0 61.00 42 8.53 CHANGED Wppshasusps....clcYphRssCD-pYYGpuCspFCRPRDDtFGHaoCsppGpKhC.sGWpGp.C ..................Wpt...tsths.....plcaphRlhC.c-pYYGp..sCspaC.+P....R.D.D.h.FG.HYsCs.p.s.Gs+..sChsGWpG..C........... 0 117 141 227 +227 PF01666 DX DX module Hutter H, Bateman A anon [1] Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 6 conserved cysteines that probably form three disulphide bridges. 25.00 25.00 25.70 25.70 24.00 24.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.98 0.72 -3.54 5 33 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 5 0 33 30 0 78.20 28 17.46 CHANGED PYhTppKCospcsIPhc.t.auFCDsDTGRluILGchpl......cGs-Np-sc.RYCooN+DCoso....oVCVh........hssssupCascP .......YpTshsCssspslstsap.auFCcs-Tp+lhllGphsh......sGpch...pchp..ppCshNpDC...up.s....pV.CVh...........s.ppthCa.sP............................. 0 7 12 33 +228 PF00782 DSPc Dual specificity phosphatase, catalytic domain SMART anon Alignment kindly provided by SMART Domain Ser/Thr and Tyr protein phosphatases. The enzyme's tertiary fold is highly similar to that of tyrosine-specific phosphatases, except for a "recognition" region [2]. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.73 0.71 -4.57 24 6758 2012-10-02 20:12:17 2003-04-07 12:59:11 15 122 1439 108 3888 7101 596 125.70 20 31.76 CHANGED lYLuutssst....shhpthslshllNlstths................sphphhplP.lpD..........ppssplstahspshpFIc.ps...ppputpVLVHCtAGlSRSAolslAYLMpppsh.shs-Ahphl......+p+R.stlsPNhsFhtQLhpacpp ................................................................................................................................................tt...tlph...l.l..s.hs.....t..................................................t.t....h....p.h..h.p.l...h.D.............................................................tt....sh.......t..h....h......p...p....s....h..p..h...lc....ps...................pp..p...s........s........p.........V....l..V..H.........C....t....s...G........l.uRS........us..........l.........l....h.....A.....Y.....L....h.......p............p......t.............t.............h....s.............h...........p..-.A.h..p.h.l.....................+pp.R....s...h..h.........s.....s...ahttL......t.................................................. 0 1325 2005 2947 +229 PF00035 dsrm Double-stranded RNA binding motif Eddy SR anon Published_alignment Domain Sequences gathered for seed by HMM_iterative_training Putative motif shared by proteins that bind to dsRNA. At least some DSRM proteins seem to bind to specific RNA targets. Exemplified by Staufen, which is involved in localisation of at least five different mRNAs in the early Drosophila embryo. Also by interferon-induced protein kinase in humans, which is part of the cellular response to dsRNA. 23.00 21.00 23.00 21.00 22.90 20.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -9.21 0.72 -3.39 110 8915 2012-10-02 17:51:51 2003-04-07 12:59:11 20 142 4747 68 3095 7369 1675 65.70 27 21.06 CHANGED sts...hLpc.hspptt......hthththhtppssspt...tFts...plpl........ss.pths....pGtu.......p.sK.KpAcppAAppuLppL ...............................................KotLpE.hhp.t.pt.....................hshtap.h.lp.ppGs....s+p....p..pFss..............plpl.......................ss..pp..hs.........pGpG.............p.SK.KpAc......ppAAppALp.............. 0 830 1497 2299 +230 PF01951 Archease DUF101; Archease protein family (MTH1598/TM1083) Enright A, Ouzounis C, Bateman A, Anantharaman V anon Enright A Domain This archease family of proteins [1], has two SHS2 domains [2], with one inserted into another. It is predicted to be an enzyme [2]. It is predicted to act as a chaperone in DNA/RNA metabolism [1]. 25.00 25.00 41.10 41.10 24.60 24.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.78 0.71 -4.22 96 503 2012-10-02 11:08:51 2003-04-07 12:59:11 11 6 472 2 280 485 93 134.80 29 85.80 CHANGED acal-H..TADltlcuhGsoLcEsFpsAuhAhhslhs.-hsplc...........sc.cp...hclplp.upDh-sLLacaLsELlahhcsc.thlhpc...hclp.hs..........................................thplcupshGEphc.p+Ht..hts-lKAlTYathc............................lpp.psst..........................apspsllDl ..............achl-H..TADlt.lcuaGsol-EsFppuuhuhhshhs...D.hsplc...........st..pp.....hplchp....ucDh-sLLacaLsEllahhss.c...thlh.pc....hclp..hc.............................................thplcupshGEphsht..+H....t..sElKAlTYpthp............................lpp..ppst..............................acstlllDl...................................... 0 104 172 233 +231 PF02575 YbaB_DNA_bd DUF149; YbaB/EbfC DNA-binding family Mian N, Bateman A, Eberhardt R anon COG0718 Family This is a family of DNA-binding proteins. Members of this family form homodimers which bind DNA via a tweezer-like structure [1-3]. The conformation of the DNA is changed when bound to these proteins [3]. In bacteria, these proteins may play a role in DNA replication-recovery following DNA damage [1]. 25.20 25.20 25.20 25.30 25.00 25.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.97 0.72 -4.10 139 4505 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 4084 9 1047 2557 1477 92.10 35 83.07 CHANGED hcQApp.hQpp....hpchQccLsphplpGpuGuG....hVpVphsGctclhslpIcspll.c......-DhEhLpDLlhuAhN-Ahp+scphtpp...chsphou.Gh..shP ..................................MKQAQp.MQcc....MpchQc-L....up...hclsGpuGuG....lVpVoh....s....G....ppplpclcIcssll...-...............-DhEhLpDLlhuAhN-Ahp+s-ctppc...chuphou.Gh.t......................... 0 372 713 898 +232 PF02580 Tyr_Deacylase DUF154; D-Tyr-tRNA(Tyr) deacylase Mian N, Bateman A, Moxon SJ anon COG1490 Family This family comprises of several D-Tyr-tRNA(Tyr) deacylase proteins. Cell growth inhibition by several d-amino acids can be explained by an in vivo production of d-aminoacyl-tRNA molecules. Escherichia coli and yeast cells express an enzyme, d-Tyr-tRNA(Tyr) deacylase, capable of recycling such d-aminoacyl-tRNA molecules into free tRNA and d-amino acid. Accordingly, upon inactivation of the genes of the above deacylases, the toxicity of d-amino acids increases. Orthologues of the deacylase are found in many cells [1]. 25.00 25.00 25.50 25.50 24.10 24.10 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.71 0.71 -4.21 37 3993 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 3782 94 988 2599 777 142.60 45 94.69 CHANGED +sVlQRVppApVsV....-sc........llGpI....spGlllLlGltcs.....Dop-chchhscKllslRlF.....-D-psK.hNhSlpDls.GplLlVSQFTLhu-spKGpRPsFppuuss-pAptLY-pFsphl+pts..............................pVcsGpFGAcMpVpLsNDGPVTlll-s .........................................+sllQRVppAsVs..V..........................-uc............lsGp.I.........spG....LllLlGlpps.....................Ds....c.....p.....cs.....c.....hl...........scKlhshRlF......................pD..-...p....G..........K.....MNhSl.p-l.............s.....Gp.....lL...l................VSQ..FTLhADT.+K.Gp.R.PuFsp.....AAs....P-tAptLY-hFs.p.p.h+ppt...h............................pVpTGpFGAcM.pVs.LlNDGPVTlhL-............................................ 0 335 604 813 +233 PF02583 Trns_repr_metal DUF156; Metal-sensitive transcriptional repressor Mian N, Bateman A, Eberhardt R anon COG1937 Family This is a family of metal-sensitive repressors, involved in resistance to metal ions. Members of this family bind copper, nickel or cobalt ions via conserved cysteine and histidine residues. In the absence of metal ions, these proteins bind to promoter regions and repress transcription. When bound to metal ions they are unable to bind DNA, leading to transcriptional derepression [1-5]. 21.20 21.20 21.30 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.84 0.72 -3.79 147 4323 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 2741 7 851 2245 340 83.60 30 88.61 CHANGED tpp+ccll.pRL+RIc...GQlcGl.p+Ml.Ep...cc..Ch-lLpQluAl+uAls....pltthllcsHlcpClt.pshps.......tppp...ptlcElhphlp+h ...........p+ppll...sRL+RIcGQlcul.p+Ml.Ec.....c.c...Ct-lLpQluAl+uAls....slhtt..llcpHlp.cCls..pshpp.........tppc...pplc-hhphlpp.............................. 0 298 568 728 +234 PF02588 DUF161 Uncharacterized BCR, YitT family COG1284 Mian N, Bateman A anon Yeats C Family This is probably a bacterial ABC transporter permease (personal obs:Yeats C). 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.64 0.72 -3.68 189 12563 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 2439 0 2056 8039 1861 84.60 21 58.10 CHANGED phlhhlhGuhlhuhGhshhhtss.hssGGhssluhllp.phhs...................lshuhhhhlls.lslhlhuhhh...................................h............ph.........................................................slhollshhlhuhhl .....................hlhhlhGuh.lh.ulG.l.s.hh.htss.h..ssGG...hssl.uhllp..p..hhs...........................l.s.h.uhhh..hh...lN.hsllll.uhhh.........................................................................h............sh.................................................................................slhollshhlhuhh...................................................................................... 0 729 1389 1730 +235 PF02639 DUF188 Uncharacterized BCR, YaiI/YqxD family COG1671 Mian N, Bateman A anon COG1671 Family \N 24.30 24.30 24.30 24.70 24.10 24.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.41 0.71 -4.70 167 2006 2012-10-03 20:43:45 2003-04-07 12:59:11 9 4 1980 0 382 1203 282 129.60 39 85.25 CHANGED hlh+sApRtpl.lhlVANphlphPs...............p.shlctlhVssGhDsADshIscpspsGDlVlTuDIPLAucllpKGuhslsP+GchaopcNItptLshRchMpclRsu..Gh.......o.GGPssauppDRppFtssLD+hlt+ ...............................lh+sAcRhp.l.lslVANps..h.t.s.ss.............................p..phlcslhV.ss.GhDsADpcIVppscsG..D.....l..VITtDIPLAutllcKGu.hsLsP+GchYosssIcptLshRshhsplRtu.....Gh....p....TuGP.sshopcDRptFtspL-+hl................ 0 115 236 310 +236 PF02641 DUF190 Uncharacterized ACR, COG1993 Mian N, Bateman A anon COG1993 Family \N 20.50 20.50 20.50 20.50 20.20 19.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.33 0.72 -4.14 5 725 2012-10-01 21:59:08 2003-04-07 12:59:11 10 8 534 4 284 539 18 99.30 27 76.82 CHANGED lKpKLLRIYouEs-+aEGcPhYKsllc+L+E.cGIcGATVaRGIsG.YGK++clHuc-lFpLSscLPVllElVDccEsIpRsLccl+EhhKs.GLITlEcVcVh .............................t....hLplahuEp...c+......h..p..u.cP.la.c.tllchh+c.tul.A.G.AT.VhRGltG..aGp..pp.h.l.Hs..schh.pL...u..p...c.lP.l.slphVDstcclpthls.p.l.p.phh....pp....u...LlTh-.s...h................................... 0 107 189 248 +237 PF02958 EcKinase DUF227; Ecdysteroid kinase Bateman A, Eberhardt R anon Pfam-B_2081 (release 6.4) Family This family includes ecdysteroid 22-kinase, an enzyme responsible for the phosphorylation of ecdysteroids (insect growth and moulting hormones) at C-22, to form physiologically inactive ecdysteroid 22-phosphates [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -12.02 0.70 -5.27 54 1726 2012-10-02 22:05:25 2003-04-07 12:59:11 15 28 286 0 1069 4192 1235 241.30 19 65.13 CHANGED pG-NYsShhhRlplch.........p.spsppp.......hohllKs............h..tpstttphhp.phtlFppEhshYppllPchcplh.cps.............s..hphhscshhhp.....tp.p....phllhEDLs.pGapsssR.hpsLsh-cschslcKLApaHAsShshtp...p.s..htph...pGhhppthhps......pshh...pshhpshhchh.pphsththht.....p+lpplts..phhcphhphhp..........sssspasVLsHGDhWsNNlMFcYcsps...p.p-shhlDFQhspauSPuhDLhYhlhoSsp.-h.+hpph-pllphYappLhc.pLctLsa .............................................................................................................................................................thhs.hhph.h.h..................t...t.t.....p..................hshhlKh.........................................tp.......hht....t....t..h..a....t..p..Eh.....hY..pp..hhP..thtth.tt.............................h..hspshhsp.................................t............hll..h.EDL....p..........t....u...a..p......h.s.c...ht.s.h......s......hpc....h.ph.slppLAphHAhoh...shtp..........p..........................t......h........s...h.h....t..t........h.....t..t..........................t.....h..h........t..th..h.p..t..hh.p...h.h.......t..t...........t.h.........................................tth.p.t..h....t......p.h..h...p....h..h.p.hht................tttth....p...s...l....s.....HGDha..............hs..Nhha.......p.......h.........s......tt......t........................................p...........s............h...hlD...........a....Q..hsthu....s....s....shD....l....h...a....h...l...h......s...s...h....p......ph.....p...............pp...h....pt..h.l...phYaptLhp.lt..................................................... 1 287 431 878 +238 PF02995 DUF229 Protein of unknown function (DUF229) Bateman A anon Pfam-B_1566 (release 6.4) Family Members of this family are uncharacterised. They are 500-1200 amino acids in length and share a long region conservation that probably corresponds to several domains. The Go annotation for the protein indicates that it is involved in nematode larval development and has a positive regulation on growth rate. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.73 0.70 -6.08 17 540 2012-10-03 20:55:17 2003-04-07 12:59:11 12 17 54 0 424 1427 722 390.00 22 71.40 CHANGED c.hphppCspsp.hhspphs.phpphhlphpt.....hh.....tshpChY.pphtRtpst...pshhhh.phhth...hppsh.l.ss...................................s-hhpspChp.shsphh.............pDshtalpp.....ph..p.............ptpss.ptcc.SVhllGlDSlS+hphhRphP+shpalp.phsahEh.GYNKVGDNohPNLlslLoG...........hsp.thptsp....psttshDphsaIWKpFppt.GYtThauEDt..sshssFsY.....p.GFpcpPsDaYhRPhhhthEpphphhtp.ths....ChGp+.tpphlh-ahtpFh.+apsph....hFuahWssphoH-.hphssth.................DcthhpaLpphccpuhh-solllhhSDHG...........................................................hRaGphcpsh....pGhhEERhPhhhlhhPpah+..cpaPphhpNLphNpc+LoosaDlHtTLccllpLsshsctphps...t.ps.............+shSLFhPlPpp.RoCtpAsI.p+aCsCpshpplssss.....hhpphuptlVpplNchlts.........pthCpsLpLpplp ..........................................................................................................................................t.................................................................h.C.h.....h.h.........................................................t.....................................................................phh.h.C......t.............................hh..............................................t...sVhhhslDShSphp.hh.R...............p..hPcshp........al........p...p...............................s........h......h.ph..uaN...p........l.u..-soh..sNhhslhsG............................stt.th.t.h.......t.t.hDth.s.hla.ppapp.......t.GYh....Th....a....uED..............ths.h.ap..................h.GF...p..P.s.....D..aYh.....R.sh....h....h....t..hp..p......ht..............th...................C.h.s.tc..hphhh-..a.h....t............phh.tt.a....pp..................hFuh....h..a.....s...p...h....sH....s...h.p.hh.thh.............................................................Dpt...hh.phl.pp.h.pp..p.t..................................hpp..ohllhhuDH.G.......................................................................................................................................................hR......a..u.......t..h.t...p.t.................pG..h.h...E..c....p.....Ph...hh.l...hlP.hh+..............pp..h.sphhp...sLp.Npp+Ls...............os...a.Dl+tT.L.......hcll.p.htt.....t......................t.......................................ps.S.Lh....l.P.p..RsCt.ps.s..I.....p.aChC......ht..t.h....p.p............................h.thuphh...lt.......hNphh.t....................Ct.hplt...h......................................................................................................................................................................................................................... 0 173 212 373 +239 PF01697 Glyco_transf_92 DUF23; Glycosyltransferase family 92 Bashton M, Bateman A, Eberhardt R anon Pfam-B_1694 (release 4.1) Domain Members of this family act as galactosyltransferases, belonging to glycosyltransferase family 92 [1,2]. The aligned region contains several conserved cysteine residues and several charged residues that may be catalytic residues. This is supported by the inclusion of this family in the GT-A glycosyl transferase superfamily. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.81 0.70 -5.10 52 812 2012-10-03 05:28:31 2003-04-07 12:59:11 22 17 132 0 706 894 66 240.60 15 52.56 CHANGED +slslCluPlass-s....phhpalphh+lt..Gus+hhlYhts.hspphhpllcpY.pct...Gh.lplp.a..................................hp.ptph.h+spthuhsDChlp..h+ttscahuhhDlD-hlhsps..phh.....pphpphhpshts.phhthphpstthhp..tp.hssh...sh..h............................t..................................KsllcPpplsthhhHhshph.t.....................s.thhplpp..h.....cs.p.............................................................................hhpht...phhtch.hpphhhshhhpshppphht .....................................................................hhhCh.t.shhht....t....plhpalth.hphh................Gs..s+h.hh.Y....ts......h...sp.p..h..hc..l..lc....Y..pt...........G.h..l..p..l...h..sh................................................................t.p..t.th..h..hts..phhsh...sDCl..h+................p.............t......p...s.......c........ahshhDlDEhl....h..s....hp..........................p..h...t....ph.h.p...p...h...t...............t...........h........t...t......h...t........hpphhh....t..........t...t..hs..th............t....t...h...p.h.hh.th........t..t...............................................................K.hlhps.....ptl.....t..h.h.h.H..h.....sh.phh................................t.h..h..........h...Hhp...................................................................................................................................................hh.......................................................................................................................................................................................... 1 240 340 659 +241 PF02996 Prefoldin DUF232; Prefoldin subunit Bateman A, Moxon SJ anon Pfam-B_1664 (release 6.4) Family This family comprises of several prefoldin subunits. The biogenesis of the cytoskeletal proteins actin and tubulin involves interaction of nascent chains of each of the two proteins with the oligomeric protein prefoldin (PFD) and their subsequent transfer to the cytosolic chaperonin CCT (chaperonin containing TCP-1). Electron microscopy shows that eukaryotic PFD, which has a similar structure to its archaeal counterpart, interacts with unfolded actin along the tips of its projecting arms. In its PFD-bound state, actin seems to acquire a conformation similar to that adopted when it is bound to CCT [1]. 23.20 23.20 23.20 24.00 23.00 23.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.16 0.71 -4.46 37 1254 2012-10-02 17:27:01 2003-04-07 12:59:11 12 25 488 2 812 1220 90 116.60 19 50.84 CHANGED hcttlpplpsphsplppthsclcpshpslcslpp........spshchhlslusslahpupl..cssc.....lllplGsshhlEhshc-Ahchlcc+lpplpcphcplppplpplppphsphttphpphtpp ...................................................p...tphppphpplptthsc....hppsh.cslpplpp..........................tppsh.c...hhl.sl.s......ss..h.a......sp.............upl..ss..s..sc............................Vhlt...lG..ss..hhlEhshc-Ahphlcc+.....lptlp.pplcp....lp....pplpt....h....ppphp...hht...htph......................................................... 0 274 444 656 +244 PF03080 DUF239 DUF239; Glucoamylase; Domain of unknown function (DUF239) Griffiths-Jones SR anon Pfam-B_913 (release 6.4) Family This is a family of plant and bacterial proteins, a small number of which are putative carboxy-terminal peptidases (see for example Swiss:Q9XIN9). 22.00 21.50 22.40 21.90 21.70 21.30 hmmbuild --amino -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.71 0.70 -5.28 54 602 2009-01-15 18:05:59 2003-04-07 12:59:11 10 22 47 0 366 574 2 189.70 33 56.66 CHANGED aaGspAslsVasPpltp..t..paShuplal.sGs..stp...hssIpAGWtVpPplaGDspsRhasaW...cshpts.GCYNhhCs...GFV.Qsspp.lslGsslpssSs.hsst.p.htlphhlh+...D.psGNWWLp..h.ts........hlGYWPupLFsp..lsst.AshlpaGGpVhsst......st..pssP.MGSG+.Fstp.s..htcAuahpslp.llDtss...phhss......psh..hssp.p.CYslpsht.......................hGth..haYG...GP .............................................hhGspuslslap.Pplpt...s...phShuplalhsGs...ttp..............hss.IpsGWp..............V.........PphY...s...Ds..ts+hahaW....o.............t............-............sh....pts....GC.a....Nh...Cs....GFl.Qs..s.pp..hshGss.lt...shSs....hsut.p....htlplhlap.................D.tpGsWWlt....h.sst......hlG.....YWPttLFs..........lt.p....t....As.hlpaGGplhssp.....s..sss...MGSGp....astp...s...htpuuahpslp.hhstst...phhs.........h...hsppsp.CYslt.h...................t..t.h.hhaGGP................................................. 0 63 183 236 +245 PF03087 DUF241 Arabidopsis protein of unknown function Griffiths-Jones SR anon Pfam-B_1563 (release 6.5) Family This family represents a number of Arabidopsis proteins. Their functions are unknown. 23.10 23.10 23.20 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.62 0.70 -4.87 11 556 2012-10-01 23:20:42 2003-04-07 12:59:11 9 4 17 0 392 492 0 161.50 25 80.30 CHANGED huLusLpELYcslpchLchssoppthtppp....hhEchLDuSlclLDlCussRDlhspl+EplpsLQSuLRR+c..ut.....lcsclcuYls.RKpl+KEhpKhltuLKphpst.............t...ptsslsslhcpshtholslh+olhphLSs.....scs.h.hpstLhshhhhppstt.pstt...............hcsEhpplDtthpt....sppphhcclcphEhs...........lc-lEcpLEuLa+pLIpsRVSLLNI ..............................................................t.....h.tlh.thtphh.hs...st.....................h-t.h-t.l.llDhpsh.hp-.h.th+t.htphp.sl++tc.....................tspl.t..t.hht......h.+ph.p...Ktht+..h.tthtt.t.............................................hhthhtps...hsh....shh.pshh..hlst...................th.h.hsphh........p.t...................................................................................................................................................ppl.p.tlc.s...............................................ltthEttht.laRpLlpsRs...slLN....................................................................................... 0 16 176 308 +246 PF03103 DUF243 Domain of unknown function (DUF243) Bateman A anon Pfam-B_1157 (release 6.5) Family This family of uncharacterised proteins is only found in fly proteins. It is found associated with YLP motifs Pfam:PF02757 in some proteins. 25.00 25.00 51.50 46.00 19.40 23.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.57 0.72 -3.87 31 401 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 29 0 246 417 0 96.70 39 34.43 CHANGED llpKchYlHssP.--.E-......tptth.susspKpY+llFIKAPss..ssspAslthspstsEEKTllYVLsKKs-ttp.sttls.s.tsspsuKPEVaFIKY .........lpKcFYsasAP.E-s.-p.....ts......tphh....susspKsh+VlFIKuPps..sh.psA.s.lthsp.tssEp+TsIYVLsKps-.sshsp.pls...s.psspssKPEVaFlKY............. 0 45 60 165 +247 PF03140 DUF247 Plant protein of unknown function Mifsud W anon Pfam-B_1292 (release 6.5) Family The function of the plant proteins constituting this family is unknown. 24.80 24.80 24.90 25.50 24.10 24.50 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.35 0.70 -5.46 48 1014 2009-01-15 18:05:59 2003-04-07 12:59:11 10 19 25 0 634 924 0 301.50 22 82.37 CHANGED IaRVPtpl+.ch..sp.cuY.pPplVSlGPY.H+u..pppL................psMEpaKhRhLpphls+ss.............tslpphlsslpslEpcs.Rss....Ys-sss................hssc-...FlcMllLDGCFlLchhhthsp......................stsDs.lashphhhsh...l..ppDhlLLENQlPaFVLccLhphhp.................................ttpspssLsplshp....ah.............t.shthss......thhtptps............pHlLcLh+pshl......ss.tp......tsttshpp.t...........................................phlhsAs-LcpsGV+F..+ppcs.............sp........................hhD.lpF..c....pG.s......LclPplhlc-sTpplhtNLlAFEQs.psss...............sshlTuYlhFMssLlsoscDVslL.pccGllcshlGs-.....p-Vuc.hFsp.Lscs......ss.hs.hcs...salss....lhpplspappp....php.....phhApl++.paFssPWshhuh...lAAlllllhT .........................................................................................................h...h.........p....t.a..PthlulG..P.h...+tt......t.th...........................h-phKhhhhtthhtt.t.......................htthht...ht..t........ph..+t.............Ytt...............................ht.tpp.....hh.hhhhDus...Fll.hh....h.................................................................................t..................h...............l.....DhhhlENQlP....hhl.l.pt.lhth.........................................................................h....thhht......hh...................................................................................Hhh.chhh...hh..............................................................................................................................................................h.s.stpLt.t.sG.lph...t.ttt.........................................................................hhs..lph...p............tu..h....................lp..lP.l.....l.pt..st.hhhNhhAhEtt..t...........................t..hssY...h.hhs.l.lss..pDlthL..hpptl.l......p.h...h...t......s.p............pt.ssp..hhpt.lspt.............................hh..hs..............hh.t.........hht.tlpt....ah.pp.........h.............................hhh.t...hh......a...h...s...h.hhs..hhshhhh.......................................................... 0 33 377 521 +248 PF03141 Methyltransf_29 DUF248; Putative S-adenosyl-L-methionine-dependent methyltransferase Mifsud W, Moxon SJ, Eberhardt R anon Pfam-B_1462 (release 6.5) Family This family is a putative S-adenosyl-L-methionine (SAM)-dependent methyltransferase [1,2]. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.73 0.70 -6.17 22 712 2012-10-10 17:06:42 2003-04-07 12:59:11 11 15 46 0 435 2409 358 380.90 35 73.10 CHANGED -YhPChDsppshph....spcphpapERHCPs..........pcphpCLlPsP.cGYKsPlpWPcSRDhlWasNlPHs+...LsppKusQNWlphpG-hhpFPGGGTpF.pGAspYI-pluphls.......hsuplRssLDlGC.GVASFGAaLLs+sllTMShAP...+DsHEuQlQFALERGlPAhluVh...uTpRLPaPSpuFDhsHCSRChIsWppp-GhhLhElsRlLRPGGYalhSusPlh.........+pcp-.ppthcchpslscplCWchlsccsp......hsIWpKPhss.sChtp.Rpt..spPslCc.sc-sDssWYsphcsClo.hP-s.......ptsustlp.WPpRLpssPsRlps.....h.t.sh-tFcpDschWpcpVstYhclhp.hlppsclRNVMDMsAshGGFAAALtch...lWVMNVVPs.....ssssTLslIY-RGLlGsYHDWCEuFSTYPRTYDLlHAssLFShhp......pR.Csh.psILLEMDRILRPpGtlIIRDph-llscVcc.lspuh+Wcsphp-p...ccss....pEplLhspK .................................................................................................hhPC.s.......t.......ht.h..hERpCP..............t...Clls.P..sYt..............Ph.WPtS.......+.......c.h...hhtNh..sa.t...ls....p..t.ppW....h....ptp..hh.....F.P.u.u...u.o.....F...t..G.s..........t..Y...l....p.................l......t.p......hhs...........................st..th...R...s.....sLDh...GC...G...................V....A.......S....a....G...u..h...L....h.....p.....c......s....l...l..s....h...S..h......AP........p..D..t....H....p....s....Q....l......Q......F....A......L......E....R.....G.......l....P......A....h....l...u......s..h.......u..o..p....+.....L.P.a.P.u.p.s...FD..hs......H..C..u..R..C..h...l.....s.....W...t....t......................s....G..h...........h...L.h.Els.RlLRPGG...a..a..l...ho...u...s..h...............................t...........t..t.....pt..h....t.h.....s.pt....hC...Wphh....p..tp......hslapK..sp..pCh......pt........hCp..t.p.ss..s.t...sW......................h.hpsCht..s.............t....aPtRh...s....hltt........t...t.p.att.Dpph.....Wpphltthh.h...........l.........t.......sp...............hRNlhDMpAhhGGFAAA.L..h..p........lW.VMNVVP...................tt.sTLslIa-RGLl......G....hhp........DW...CEuFsT.YPRTY.DLlHAstl..hoh.h...p......p+....Cp.h...lhlEhDRILRPtGh.hllRDp.thl.plpt...hhtthpWp...s...h..h.....t......p................hh............................................................................................................................. 0 54 278 354 +249 PF03164 Mon1 DUF254; Trafficking protein Mon1 Bateman A, Wood V anon Edwards YJK Family Members of this family have been called SAND proteins [4] although these proteins do not contain a SAND domain. In Saccharomyces cerevisiae a protein complex of Mon1 and Ccz1 functions with the small GTPase Ypt7 to mediate vesicle trafficking to the vacuole [7]. The Mon1/Ccz1 complex is conserved in eukaryotic evolution and members of this family (previously known as DUF254) are distant homologues to domains of known structure that assemble into cargo vesicle adapter (AP) complexes [5]. [3] describes orthologues in Fugu rubripes. 20.40 20.40 20.50 21.20 19.60 20.30 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.53 0.70 -5.87 7 443 2009-09-11 23:32:11 2003-04-07 12:59:11 9 13 291 0 297 444 4 359.80 32 71.63 CHANGED s.hsEthptpcKplFlLSEAGKPIaopa.Gs--tlsShhGlhpAlVSahpsst...sslpShpupup+lsFLp+SPLlLVusScospS.stpLhppLthlYtQIlShLTtsplp+lFpp+.pNaDLRRLLuGoEphhcsLl......pphsps..shLhsulpslPLssohR-tlossltp.......sphcsLlFulllAt.s+LlshVRhKchh....LHstDLpLlhsLlus..ps..csuEsWsPlCLP+FNssGFhaAalua.Lss....s.ssCLlLlSscR-sFFshpss+pclhp+Lcc.pthhpsLtcshpp........staplpplG..hPpL+HFLYKs.......KpssQassPthchshpstpEpp.......RL.ulYppLHs+l+p......sRshphhhchsp+-s.................................LhAWVTssF-LYhhhs.PlsoKshslpsVpKLl+Wl+KEEsRLFIhsshoa ..........................................................................t......hptppKHhalLSpA.GKPla........o.pa....G...........s.p....p.....hl.................s.shh....Glhpsl..l....S......ah.p.s..st.......................stlp...sh.......p..u..ss......h...+hV.al..p.cuPLhLVul..............S....p...........h..........t..........po..................t.p...........Lp.....tpL.ph...lahQIlShLThsplp+lFpp+..saDLR.+hLt.Go...-thh.ssLl..................................pthsps....s...s....hl..hs.ul.ps..l...Lt..t.s...hRptlsshl...p.................................spstsllaullhst..spllsl.lp.....+.p..h........................L+PsDLpLlhshl.s.............pt..s....h.........p..............s..u...........EsWhPlCLPp.FNssGahasalsa..lp.......................................thsl....lLlSs..p..+....-..s..F.at....lpp.....h+...ppl........pt...lpp.tsshtt.....lt..puh.pt..........................................hp...h..t...l.s........hs...l.pHFl..YKs........+t.s....Qa..s.....sph......psshtp...t..ppp.................................+.Lhth..Ypp...La........s...ph+s..........................................t.t..h.......+..hhhh.....h..s...ppts..................................................................hhsWh............Ts.FEL..Y......hsh.s.....s.......s.s+.s......hhts.hpclhpWhpppcp.clFl......................................................................................................... 0 103 166 245 +250 PF03194 LUC7 DUF259; LUC7 N_terminus Mifsud W anon Pfam-B_2902 (release 6.5) Family This family contains the N terminal region of several LUC7 protein homologues and only contains eukaryotic proteins. LUC7 has been shown to be a U1 snRNA associated protein [1] with a role in splice site recognition [2]. The family also contains human and mouse LUC7 like (LUC7L) proteins [3] and human cisplatin resistance-associated overexpressed protein (CROP) [4]. 33.00 33.00 33.50 34.40 32.30 32.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.90 0.70 -4.85 8 689 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 292 0 416 616 10 242.10 34 76.98 CHANGED sDphRphLDQLMG...osRsscpp+st..l+asDccVC+uaLlshCPHDlFssT+hD.LGsCsKlH-.ph+t-YEpAs+pccah..aEh-hhchlpp.........................hlsDsD++lchu+pRLccspE-pss.sss.p..scpltslscc...........................IschLscsEsLGccGcV--u.clhcclEcL+sc+pcltc..................................................ps+sssPusu.h....................................spQKL+VCElCGAaLultDsDcRLADHFsGKhHLGYsplR-pltELccstsc.........+pc-Rcc+t......hsspRph .......................................t....tthL-pLMG...........tt.p....s.....h..s.pctp...........lpa.s.D.cV............C+.aLhshCPH-l..hs..sT............+....D......LG................C..............K.l.H...s...........t.....L+tpY....E...t...us...c.p...t....ch.........h.....aEh-...hhchLpp.........................hls-s-R+lchuccRL.....tpop...........cc.....h....s...s...t....s....s...s.p.......tcclppLscc................................Isp.hL.......tc.sE.pL.Gt...pGpV--up.ph.hpclE...pl+tc+cchcp...............................................................................................................................h....p..s.....t.h..s.....s...........................................................................................tppKLcVC-VCuAa..Lul..tDs..-..pRl....s..DHhhGKhHlGahpl..RcplpcLpcphtp......................................ppptptt.............................................................................................................................................. 0 139 215 329 +251 PF01657 Stress-antifung DUF26; Salt stress response/antifungal Bashton M, Bateman A, Eberhardt R anon Pfam-B_980 (release 4.1) Family This domain is often found in association with the kinase domains Pfam:PF00069 or Pfam:PF07714. In many proteins it is duplicated. It contains six conserved cysteines which are involved in disulphide bridges [1]. It has a role in salt stress response [2] and has antifungal activity [3]. 25.00 25.00 25.20 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -11.24 0.72 -3.70 175 2349 2009-01-15 18:05:59 2003-04-07 12:59:11 12 43 34 4 1517 2447 0 101.20 22 40.19 CHANGED hphC.....sssth.s.....sss.a.ppslpsllssLss..p.ust..............hassu.......s.......sss....pVYuls.Cps.D.l....s....s....ss..CpsC.lspuhppht....p.C......st....ppsuhlh.....hss.ChlRYs.ts.F ..................................................t...............t........sss.a..ppslp...pl.hs.s..Lss.pssss.....................ttFsssp..................s...st..sss........plYu.lspC.....p..s..D....l.....s......s.....ss....Cp..sC.lss.uhs.pl.......ph.......C.............ss.....pps..u...tlh........hsp...ChlRYp...................................... 0 213 869 1163 +252 PF03195 DUF260 Protein of unknown function DUF260 Mifsud W anon Pfam-B_2998 (release 6.5) Family \N 21.50 21.50 22.10 24.70 21.00 19.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.22 0.72 -3.73 46 709 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 35 0 506 727 0 98.50 43 47.36 CHANGED sCAACKaLRR.+Csss.ClhAPYF.....Ps..sp.s...ppFtslH+lFGsuNlsKhLppl..sspp.R.....scuhsSlhYEApuRhpDPVhGssGhlhpLppplpphps.-lshspppl ...............................sCuACKhLRR.+Csps.ClhAPYF................Ps......sp..s...p+FsslHKlFGASNlsKhLp....cl.............P..pp..R.....s-AssSlsYEA...........puRl+DPVY.GCVGhI.tLQpplpplps-Lshhpt..h............................................ 0 69 316 409 +254 PF03268 DUF267 Caenorhabditis protein of unknown function, DUF267 Mifsud W anon Pfam-B_4201 (release 6.5) Family \N 25.00 25.00 48.50 36.40 21.30 20.90 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.07 0.70 -5.60 7 50 2012-10-01 21:54:26 2003-04-07 12:59:11 9 2 6 0 49 42 0 323.90 31 88.29 CHANGED lLGsa+allKhosLDCSppu........+lpulhsplhslshlshhhhRhhhhhph-upsLohuWAEsNhFuFhulpuhshulsLauWTKsuhlspahp+LtclR.LRlpsNpc.hDpYtph+hchFlaSh.allshhupAIashlp.pKIhhussshs.shhhhh.hh..hhshahshlpLshahLlphulsREhcaFNpELEcAppsKpLpsssllpcFsaRQtcllchlp.sscpLpsasussPLFhahuLhNulalho...hhs.lsslYhIhlhh.LhulIhhshhhLhPAuhVQ-plhpTo+ILMssp-FcpSKDspVYpTYRhMlDRSh+spophhVlsuhsIs.pshphAhFlIPNls .................................................lhG.achhh+hohLDC.shhs........phpthhstlhulhllhhhhh+.hhhh.hth..pup.LShsWAEushauFhuhpuhhsuhslhsWTppshl.pa.cpLsplRhLRlpss.pp.hDsYptl+h+Ahlhsh..hhsshhupu....lashhp.p+lhhu......sspss....hhahhhshlshlshah..sslsLshYhLlpsulsRElcaFNcELccApccKpL.................ps.slLpcFshRQt-llchlphsNcpLssasshuPhFhhh......uhlNusYlso...F.hsslPslahlhLhh.lhuslhhshhhLhPsutlQcplppTucILhsscphcpspDsplapTYRlMlDRs.+scopltVlsuFslsppshstAhFhlPNls........ 0 15 20 49 +255 PF03269 DUF268 Caenorhabditis protein of unknown function, DUF268 Mifsud W anon Pfam-B_4252 (release 6.5) Family \N 21.10 21.10 22.00 27.00 20.90 20.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.14 0.71 -4.69 6 72 2012-10-10 17:06:42 2003-04-07 12:59:11 9 5 28 0 60 76 45 152.30 40 44.35 CHANGED DGhSGVVlGShpPWVEVpALppG.......sspILTVEYNs..LsIpEcF+-R..lSSIhPhDFspNacpYusoFDFAASFSSIEHsGLGRYGDPlDPIGDLREMLKIKCsLK.GGLLFLGlPlGpDAl.aNsHRIYGslRLAMMhhGFEWIsTFSG-oEpuhDLoupcL+ccsLFuhsQpTLVLRKL ........................................................pshlhuS...PhhEh.ul.pG..............AtplLolp.s...lph....psp..hooh...-Fspp..a.p.pastp.FDFsuohooIEHsGLGRYGDPlD.PhGDl+thhcl+ClLKpGG................LLFLulPlG.s.DultaNAHRIYGslR.LsMhh.Ga-hlsoaut.ppp...ph.................................................................. 0 18 24 60 +256 PF03312 DUF272 Protein of unknown function (DUF272) Mifsud W, Pollington J anon Pfam-B_3609 (release 6.5) Family This family of proteins is restricted to C.elegans and has no known function. The protein contains a ubiquitin fold. The GO annotation for the protein indicates that it has a function in nematode larval development. 25.00 25.00 25.10 27.90 23.50 24.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.81 0.71 -3.91 15 49 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 5 0 48 51 0 125.30 30 28.11 CHANGED FLWlsDpppcolh.pospasLphGHFF-GhFpcpssG.p..WpChcYl+pIctll.cGtlsss......KlplpsslppapPtsssp+.aPpsaucalGhllDsc...sKLstsCs.G+pVplptp+ls...tcpasWhVocll ............................aLWlhDpppculh....ho..........p....papLthGHFFEGhFpcptsu..+............WpC...p.....cYh+tl.pt..L..l..cGslsts......+I.lplplpp.a.pPs.sss.pc.aspshucalGcll...-tp..................s+Ls.tsss.G+...plplphtp...lt...ppsahWhVscl.................................................. 0 14 16 48 +257 PF03409 Glycoprotein DUF274; Glycoprotein_Ce; Transmembrane glycoprotein Finn RD, Pollington J anon Pfam-B_4416 (release 6.6) Family This family of proteins has some GO annotations for positive regulation of growth rate and nematode larval development. This is probably a family of membrane glycoproteins [1]. 19.50 19.50 19.50 21.00 19.30 19.20 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.11 0.70 -5.71 17 125 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 5 0 124 122 0 283.90 24 79.27 CHANGED s-sshplYl.AuuDcsphLppI...hlTssspshoLspLpss...pssG.hpsas..lsssshloTo.sssshtsLsGhIYloospQtpDss.FtVYslpss.pslshs....sspoTllhLNT.h........stPhtoShlophsQsssoslhhYtGhPtDshpphs......sphFsNPhhhps.........tsphFFssVEshpl.uLssaYl+ohs.slsFhlcstahs...hsshsTTus.sTTGhhMp..spssssh.sVNhtpDss.hs.GsSGs.lsutls..tusloVsh.ssssshppshsssp.hs..saphshhu.pshplsST..sshsGpaYlQYFshpGs..s...oooshssp.sssp..........................................................lpTTTKuusslplhhShhlhhhh.h ...................................................................................sshplYl.ApsDs..sthLppI...hhsssst.ph...........oL.pLtt.....ps.sG...p.sh...lp..s.shhlsos.sss.hptLsGhlYloo.tQhp.....ss................s.F.Vhslptt.p.lpht.......psThlhLNoth...........P...sohlophtQs.ss..h.ha.uhPtst.p..t......p.hFpNPh.h............................haFsplE.hpl.sh.haYhps..s.shph.lpstahs...................ps.hs..Touh.sTTGhhhp..s....pph.slphhp.....D.p.....hsGhsGh.l..........s...h....splsh.....t..s.....t....t..sh...tt.........h.h.h.s.pp..hplps.s..s...s.G.aah.....QYahhps............ss.s..s..............................................................................ooTt......................hh..................................................................................................................................................................................................................................... 0 26 36 124 +258 PF03357 Snf7 DUF279; SNF7; ESCRT-III; Snf7 Mifsud W, Moxon SJ, Mistry J, Wood V anon Pfam-B_1641 (release 6.6) Family This family of proteins are involved in protein sorting and transport from the endosome to the vacuole/lysosome in eukaryotic cells. Vacuoles/lysosomes play an important role in the degradation of both lipids and cellular proteins. In order to perform this degradative function, vacuoles/lysosomes contain numerous hydrolases which have been transported in the form of inactive precursors via the biosynthetic pathway and are proteolytically activated upon delivery to the vacuole/lysosome. The delivery of transmembrane proteins, such as activated cell surface receptors to the lumen of the vacuole/lysosome, either for degradation/downregulation, or in the case of hydrolases, for proper localisation, requires the formation of multivesicular bodies (MVBs). These late endosomal structures are formed by invaginating and budding of the limiting membrane into the lumen of the compartment. During this process, a subset of the endosomal membrane proteins is sorted into the forming vesicles. Mature MVBs fuse with the vacuole/lysosome, thereby releasing cargo containing vesicles into its hydrolytic lumen for degradation. Endosomal proteins that are not sorted into the intralumenal MVB vesicles are either recycled back to the plasma membrane or Golgi complex, or remain in the limiting membrane of the MVB and are thereby transported to the limiting membrane of the vacuole/lysosome as a consequence of fusion. Therefore, the MVB sorting pathway plays a critical role in the decision between recycling and degradation of membrane proteins [1]. A few archaeal sequences are also present within this family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.96 0.71 -4.87 34 2816 2012-10-03 05:15:35 2003-04-07 12:59:11 16 31 380 24 1871 2693 44 169.10 19 70.47 CHANGED cshhpLccshctlc+cpcplEpplcc.ctpl....+chtpp.....ts...........KctAhhhLKc++phEpplsphtsphssl-phthslcshpssppshsuMptuscshKsh..ppphcl-clcclM--hp-ph-htcpIpEslucshs.....sshDE--lptEL-pLtpE.h.pp...................sspLPssPossh .......................................pLcpshc..pLp+p...t...p.........pl..cpp.cp....p..tpl.........+ch..h.pp......sp..............................pptA.hh....h.h+pthp....h.c.pp.hpphh..........stts.p....l.ct.hthplp.................st.ps...........ppps...hpu.hpt....uscs....hc.ph...pp..p.h..s......l.p..........c......lpplh..c-h.p.c...p..h.ph....h....s............l..p-....h..l..ss...s....h.s...............sp.h-.....E........-...E....l......-...t...E..l.p......p..l.h..pE...ht.p.................................t.ph.s.ths....h......................................................................... 1 636 1020 1514 +259 PF03380 DUF282 Caenorhabditis protein of unknown function, DUF282 Mifsud W anon Pfam-B_2840 (release 6.6) Family \N 22.80 21.80 22.80 21.80 20.30 18.00 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.08 0.72 -4.21 29 67 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 7 0 65 80 0 37.50 43 23.36 CHANGED PCosCsK.IYDssCQGhGlPShtsaCssAu-VslsYolGs .......sCssCsp.lY-s..sCpGhGlPshhsaCsTAuElslpYolG...... 0 23 24 65 +260 PF03368 Dicer_dimer DUF283; dsRNA_bind; Dicer dimerisation domain Bateman A, Mistry J, Eberhardt R anon Bateman A Domain This domain is found in members of the Dicer protein family which function in RNA interference, an evolutionarily conserved mechanism for gene silencing using double-stranded RNA (dsRNA) molecules. It is essential for the activity of Dicer [1,2]. It is a divergent double stranded RNA-binding domain [3]. The N-terminal alpha helix of this domain is in a different orientation to that found in canonical dsRNA-binding domains. This results in a change of charge distribution at the potential dsRNA-binding surface and in the N- and C-termini of the domain being in close proximity [4]. This domain has weak dsRNA-binding activity. It mediates heterodimerisation of Dicer proteins with their respective protein partners [4]. 22.60 22.60 23.10 22.70 22.50 22.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.00 0.72 -4.21 29 491 2009-01-15 18:05:59 2003-04-07 12:59:11 9 58 217 1 308 546 1 94.40 28 5.81 CHANGED AlshLt+YCupLPpDsasphpPpaphtpts.s...........thhs.plhLP..lsusl+p.lhGp.shpsp+hAKpsAAapACptLachG.LsD+LlPl..hccphtt .............................................ulshlp+YCupL........P.s..D.t...........a....s.p.h.p..P.paphpphp..s.............................................thhs.plhLP.....hsuP..l...+p...lhG....shs........spchA+psAAhpACppL+ch...G...tLs.D.pLhPh..hpc...t.......... 0 93 165 251 +261 PF03382 DUF285 Mycoplasma protein of unknown function, DUF285 Mifsud W anon Pfam-B_2864 (release 6.6) Family This region appears distantly related to leucine rich repeats. 25.00 10.00 25.20 10.00 24.70 9.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.06 0.71 -11.96 0.71 -4.54 147 2904 2012-10-02 21:32:02 2003-04-07 12:59:11 9 141 295 0 944 3084 4291 103.50 29 50.46 CHANGED oshpthF..tss...p.......ph........s.psl.......ssWD.....TSsVTsMssMFtsAp...sF..Nps.I....u........s.W.sTSsV........ssM.stMF....tsAps..............F.NQ...sl....us............WssSsVpsMppMFps...AssF.Nps...l....u..sW..ss.usV.pshp.....tMFtsu..ps...Fsps.l......s.sW ......................................................................................................................................................................................................sl........spa..s......s.S.pV.....s.s..M.pt........M......F..t........s......s.....p..........s.h.........s.ts....l......u................s.W..s.s...S..s...V..............ssM..s..tMF..........ts.s...p.s...................Fsp......sl....us.............................W..s.....s..S..p.V.........s.....s......M.p..tM.Fts............sp.sa...s..ts..........l....s...pW..sh.....pp..h.........t.....h................................................................................................................. 0 393 623 794 +262 PF01060 DUF290 Worm_family_2; Transthyretin-like family Bateman A anon [1] Family This family called family 2 in [1], has weak similarity to transthyretin (formerly called pre-albumin) which transports thyroid hormones.\ The specific function of this protein is unknown. 24.90 24.90 24.90 25.20 24.80 24.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.84 0.72 -3.97 60 474 2009-01-15 18:05:59 2003-04-07 12:59:11 18 15 21 0 419 374 1 79.90 33 48.62 CHANGED +G+LhCsscP..spslp.V+La-c-.p........s.DchLspspT..cssGpFplpGspsEl....os.........I-P..hlpIaHcCsstsh.......Cp.+chplpIP .................pGpLhCs.spP.....ss...slp.V+La-cD.p..........................s.D..-lls.p.shT....cssGpFplpG.s..p.sEh.......ss..................I-P....hlpIaHcCsstsh................Cp.+phph.lP............................... 0 150 203 419 +263 PF03478 DUF295 Protein of unknown function (DUF295) Bateman A anon Pfam-B_790 (release 7.0) Family This family of proteins are found in plants. The function of the proteins is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.81 0.72 -4.60 168 1123 2009-09-11 23:18:59 2003-04-07 12:59:11 13 33 21 0 656 979 2 57.40 22 14.95 CHANGED hhclp..sl....Gc.cslFl.up.spshsss..s...pp...............u.lc.sNslY.....Fs....cs.............................shsl..asl ................hhclp..sL....G.s.csl.....Fl....Gp..s......p.uhs.hs....u.....pp......hs.......u..lc..ssslY...Fh.....cs......................................h.................................................... 0 78 186 369 +264 PF03556 Cullin_binding DUF298; Cullin binding Finn RD, Mistry J, Wood V, Eberhardt R anon Pfam-B_3021 (release 7.0) Family This domain binds to cullins and to Rbx-1, components of an E3 ubiquitin ligase complex for neddylation [1-3]. Neddylation is the process by which the C-terminal glycine of the ubiquitin-like protein Nedd8 is covalently linked to lysine residues in a protein through an isopeptide bond. The structure of this domain is composed entirely of alpha helices [1,2]. 27.90 27.90 28.80 28.30 27.80 27.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.92 0.71 -3.95 46 733 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 292 14 456 685 5 115.20 37 44.82 CHANGED lpcl+pplsplcp.....cL..........ptDspt............F+clYcaoFsaup...cps.....Q+sLsl-hAlshWpLlh..........................ssp.h..............shlctWhcFL.ppp...............pc+u....................Is+DoWshhLpFspph...p.tshosYD.--uAWPsllD-FVEahc ................................................................................pclpspl.spLcppL......................p...-.ssp...........F+chYpasFsau+.....pts.......Q+uL..cl-hAlshWpLlL............................stp..a..........shlshWhpFL.ppp...........................ph+s...........................Is+DpWs.lL-Fsppl.......s..s-hSNYD..-...-...........G.A.WPsLlD-FVEah.............................. 0 137 223 342 +265 PF03619 Solute_trans_a DUF300; Organic solute transporter Ostalpha Finn RD, Eberhardt R anon Pfam-B_3382 (release 7.0) Family This family is a transmembrane organic solute transport protein. In vertebrates these proteins form a complex with Ostbeta, and function as bile transporters [1]. In plants they may transport brassinosteroid-like compounds and act as regulators of cell death [2]. 20.70 20.70 20.80 21.10 20.50 20.60 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.75 0.70 -5.08 55 925 2009-01-15 18:05:59 2003-04-07 12:59:11 11 17 295 0 664 890 27 242.80 29 59.14 CHANGED sphshh..luuhhslhAhhlShatIhpHLhpYppPppQRhllR.........ILhMVPlYAlsSaluLlh.ptu.........hah-slR-sYEAFlIYsFhsLLhsaLGG....Ep....slhthhp.t+.shp........ashPl.................t.hh.cs.............................hhs.pahphsKtGlLQY...........................sllKPlhsllsllhphhGhY.tpuphshstuahalsllhNhSlolALYsLslFYhshcc..-LpPacPlhKFLslKsllFhSFWQGlllulLsth..Gllpsttthtt..........plusulQshLlClEMhhhAlsHhaAFshpsY .................................................................h.....hhush.hshhshhlohh.lh...HhhtYp...p...P......................Q.+..h..ll...R.........Ilhh.....lPlY.uh..Salu..Lhh..pts.............hahsslR-sYE..Aa.slY...sFhtLhhp.alGu..............cp...........sl.ht...hp...sc..hp................psh.sh....................................hhh...h....s...............................................................................................hhhs.....thht.......hhKhu.slQa...........................s.ll+Plh..sll..s..llhph.h...G...hY..p.........-...up......h......s....hp.t..ua...halsllhNlS..........h..sh.AL....YsLhlFa.hsh..+c.............Lp...s....h.p.P..l.hKFl.s..lK.h..l.lF..h...........oaW..Qu.hhl.u.lL.th.....Gh....l.sh...t................p..lssulps.h.llClEMhhhulhthauFshpsY............................................. 2 236 372 538 +266 PF03703 bPH_2 DUF304; Bacterial PH domain Yeats C, Bateman A anon Yeats C Domain Domain found in uncharacterised family of membrane proteins. 1-3 copies found in each protein, with each copy flanked by transmembrane helices. Members of this family have a PH domain like structure [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.37 0.72 -3.96 219 4794 2012-10-04 00:02:24 2003-04-07 12:59:11 9 20 1523 0 1147 3777 643 80.00 18 42.58 CHANGED h+sht..apl..osc+.ltlpt.Ghhs..+cppplshsclpslph.p..l.RhhGh.uclslphssss....................plphhslspsccltphl ..........................h.hp...atl....pscp..lh..lpp..G.l.ht.......+c..p..thlshpR.lQslsh..pp.u.hltRhhGl..sslplpouuss...................................h.lsh.lsh.ppspplht.................................................. 0 385 845 1038 +267 PF03713 DUF305 Domain of unknown function (DUF305) Yeats C, Bateman A anon Yeats C Domain Domain found in small family of bacterial secreted proteins with no known function. Also found in Paramecium bursaria chlorella virus 1. This domain is short and found in one or two copies. The domain has a conserved HH motif that may be functionally important. This domain belongs to the ferritin superfamily. It contains two sequence similar repeats each of which is composed of two alpha helices. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.20 0.71 -4.39 86 1321 2012-10-01 21:25:29 2003-04-07 12:59:11 8 8 713 3 551 1532 484 122.80 24 68.56 CHANGED DhsFhppMlsHHpQAlcMuphsh...p+upssplcsLAppIhsuQpsEIspMpuWLptWstssssss................................................t.ttttttssMs.......GMhostphspLpsupG.............sshDphFLphMIsHHpGAlsMApstl.........p...pu.pssplp.plApsIlssQpsEIspMpphL ............................................................................a...h..ap..u.l.hs................t....th.ths.tl..tpt.pht.h..hh.................................................................................t....tth...............th..hpt........h.t...h.t.t.h..t........................sphDhhFhph.M.l.s.H.HpGAl.t..MA.p.h.t.l............p......u..ps.s.p...h.cplApp...llpsQptEIttMpth............................................................................................................................. 0 158 358 476 +268 PF03754 DUF313 Domain of unknown function (DUF313) Finn RD anon Pfam-B_2540 (release 7.0) Family Family of proteins from Arabidopsis thaliana with uncharacterised function. 24.40 24.40 24.40 24.50 24.30 24.20 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.27 0.71 -4.16 14 95 2012-10-02 12:51:43 2003-04-07 12:59:11 8 4 11 0 70 93 0 102.50 29 36.72 CHANGED pphPcWLlpsM+chpG...pcP.+LIhcKs.LhpTDl.ssspuRL...uhPhspllpsDFLT.-EpRhl.......................c-ct.sscshGlsshLV-sctpcaslpLK+WsM......pushpYsLspGWNsVVcs .................................P..lhphhpphpu...p-s..+hlhpKp.LhtoDl.ssspsRL....hPhp.plhp.p..-......FLTppEpchl..................................p.p.pt...t..pcthGltshlls........ph....pph.tlph++WpM........pts..YsL..psWp.pllp.s............ 0 21 40 47 +269 PF03761 DUF316 Domain of unknown function (DUF316) Finn RD, Bateman A, Pollington J anon Pfam-B_2972 (release 7.0) Family This family of proteins with unknown function are from Caenorhabditis elegans. The protein has GO references indicating the protein is a positive regulator of growth rate and is also involved in nematode larval development. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.91 0.70 -5.06 9 172 2012-10-02 13:45:52 2003-04-07 12:59:11 10 6 7 0 171 227 0 232.30 16 67.50 CHANGED hlhlhhhllhhshssspcLo.cENppRhpoCGpchls.ps...........ssshhsptpppsWhlhsthpstsptphh........suohISsRHllTsuplhhsscptW.h.....cpssspssCsus..pL.VP.-lLcclcl......sphpsppshptpls+AhlhshC......h..t.phttsPMllElcts.....sssshsCLuscspp.hccs-hlcsYGl......psstchhcpphslsss........s.hphsosthhsptctuGsLlpphss+hsllGhtussshtspt......sthFaslsp.hpppICchsGIC ............................................h.........................tpLs.pENt.h.tpCG.......................................................................................................ts..Whh..h...h..t...................................sushISsRHllsss..p...hh..h.......p.....t........p........t.....h....t......................t..h....t.p.Ct..t.t......h..........ls.p.p..h.h.p.p.h.h....................t................h....hp.hhhhthC......................................t....h....ll...Ehpps........................h.....t....sCl.s.......p.......p....................tp......h..p.hath......................tt...h......t...hph.t.h......................t.h.ht......t.t.....s......tp....Gus.hlt........h..s.sc...p..l.lGl...hs.tss..t..........................hah..p.hth.htpplCphsGlC.................................................................................................................................................................................................... 0 23 38 171 +270 PF03777 DUF320 Small secreted domain (DUF320) Yeats C anon Yeats C Domain Small domain found in a family of secreted streptomyces proteins. It occurs singly or as a pair. Many of the domains have two cysteines that may form a disulphide bridge. 20.20 20.20 22.10 20.50 19.90 19.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -8.88 0.72 -4.20 17 305 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 44 0 123 327 0 60.30 52 54.99 CHANGED usuusAtu..susApGuAssSPGVlSGNsVQlPVHVPVNVCGNTVsVlGlLNPAhGNtCsNs .................s..uuhAtu..suuA.pG.....s.As.u..SPG....VlSG..NsVQlPVcVPVNVCGNoVsVlGlLNPAaGNsCsN.............. 0 47 99 123 +271 PF03860 DUF326 Domain of Unknown Function (DUF326) Yeats C anon Yeats C Repeat This family is a small cysteine-rich repeat. The cysteines mostly follow a C-X(2)-C-X(3)-C-X(2)-C-X(3) pattern, though they often appear at other positions in the repeat as well. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.56 0.73 -7.95 0.73 -3.88 20 528 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 287 12 108 324 6 22.60 39 31.24 CHANGED spchpppCt....chCpcCuctCppps .....pchCphCA....chCppCA-tCtch..... 0 38 77 102 +272 PF03935 SKN1 DUF338; Beta-glucan synthesis-associated protein (SKN1) Finn RD, Moxon SJ anon DOMO_DM01831 Family This family consists of the beta-glucan synthesis-associated proteins KRE6 and SKN1. Beta1,6-Glucan is a key component of the yeast cell wall, interconnecting cell wall proteins, beta1,3-glucan, and chitin. It has been postulated that the synthesis of beta1,6-glucan begins in the endoplasmic reticulum with the formation of protein-bound primer structures and that these primer structures are extended in the Golgi complex by two putative glucosyltransferases that are functionally redundant, Kre6 and Skn1. This is followed by maturation steps at the cell surface and by coupling to other cell wall macromolecules [1]. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.72 0.70 -6.35 26 512 2012-10-02 19:29:29 2003-04-07 12:59:11 10 17 128 0 415 1147 311 346.50 30 68.38 CHANGED asLhh--cEsDDaLHNPDP..csph-+p+hhh-hchhs+RuhsslhGllhLhluhlslFlshPlLTao...sssp+tss..............pshphloshpYPhLuuIR.ouLlDPDTPpsAho+pup.c.GppWpLVFSDEFNs-GRTFY-GDDQFaTAsDlHYsATpDLEWYDPDAl.TTtNGTLplRhDAFpNHsLhYRSGMlQSWNKhCFT.tGhlElSAsLPshGsluGLWPGlWTMGNLGRPGYhAoTEGlWPYSY.-uCDsGITPNQSSsDG.....................ISYLPGQRLssCTC.sGEDHPsP.....G.sGRGAPEIDllEuphssshs.........lGsASQShQlAPFDIWYhPDY...salplYsposTshNoYsGGPFQQAlSulTsLNssWY..tt..su.ttFQpYuaEYh...N-ccsG.......YlsWaVGcpsTaTlhupALtPNG..NIutRhIScEPMSlIhNLGlSsNWAhIDWtpLh..FPssMpIDYVRlYQspsphs....VsCDPs-YPTh-YIppH.ssYpNsNLTsW....ppuGY..oaPKNpLhss.C .......................................................................................................h....................................................................................................................................................................h...lD..pT...P......th.........h.......s......p.......s.........tth.L..............lhSDE..F.....p........................s.....R......o.......F..........h....G.....-..D.........h.a...p...u......c.....h.........................s.s...t..s....h.E.....h.....Y.............s............p...h...h.......s.T...............t................s.....G.......h............h..................h........p..................t..........................................................................t................t..................h..........................a.......puu................M................l...p............o.....W................N.................p.................hC..ap.....t........G..hl..Eh.............p.........hpL.........P..............s...................................s..t.......h...G....................h......WPuhW.h.G..N..Lu....Rs.h.a..uospt..h......WPaoY.ptC-.....shh...s.......................................................................+lstCss.....s....t.........t.s............uRuAPEIDhhEuth..................................hSpS..hQhu...P.............................................................................s.....p..h........s......................h........p..........p........uhSs.........ht.th.a..............................a.hathE......ah......................p.u..........lpW.h.tt...p.......a...p.....h..s..........t.u.....h......t.....s..ss..........p........s..........+.......h...........E.......P.......M...................hlhN..huh..S...sa...............t.....................................p...............l................hP....h.lDalRlYQ...........s...t...p.......lsCDP.s..a.P..T.p.aIt.t..a...t.Y..s..p......................................................................................................................... 0 198 310 400 +273 PF03990 DUF348 Domain of unknown function (DUF348) Yeats C anon Yeats C Domain This domain normally occurs as tandem repeats; however it is found as a single copy in the S. cerevisiae DNA-binding nuclear protein YCR593 (Swiss:P25357). This protein is involved in sporulation part of the SET3C complex, which is required to repress early/middle sporulation genes during meiosis ([2]). The bacterial proteins are likely to be involved in a cell wall function as they are found in conjunction with the Pfam:PF07501 domain, which is involved in various cell surface processes. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.69 0.72 -4.38 167 1769 2009-01-15 18:05:59 2003-04-07 12:59:11 9 27 678 0 449 1328 21 42.70 28 24.80 CHANGED sVslsl.-Gcpp.plh...TtusTVs-lL....pptsls.l........s.ppDplsPuhss ............Vslsl.sGcpp.plh...Tss.sTVs-hL......pptsls.l........s.spDtVsPuhs.................. 0 167 335 408 +274 PF03993 DUF349 Domain of Unknown Function (DUF349) Yeats C anon Yeats C Domain This domain is found singly or as up to five tandem repeats in a small set of bacterial proteins. There are two or three alpha-helices, and possibly a beta-strand. 21.40 21.40 21.60 21.40 20.60 21.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.62 0.72 -3.82 128 2482 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 722 0 644 2200 886 77.50 24 48.45 CHANGED -sLWpR.FcsApcta.cp+pthhpphcpppppNhptKctLlpcA.Eplts.ssc.....hpsstpph+pl.ccW+slG....plP.Rcps .................................-tLWp+.Fpsspstahct+pt.th.......pptcppppp.shptKc....tLlpc....A.c.tLss..ssc................hpsstpph+pL.h....c....cW+.s.lG....tls.+c.............................. 0 229 508 614 +275 PF04008 Adenosine_kin DUF355; Adenosine specific kinase Bateman A, Lott S, Mistry J anon COG1839 Family The structure of a member of this family from the hyperthermophilic archaeon Pyrobaculum aerophilum contains a modified histidine residue which is interpreted as stable phosphorylation [1]. In vitro binding studies confirmed that adenosine and AMP but not ADP or ATP bind to the protein [1]. 25.00 25.00 34.70 34.70 20.10 17.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.85 0.71 -4.72 34 283 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 277 26 124 228 14 154.30 59 95.54 CHANGED Vsl-hP-.ssNlIlGQoHF.IKTVEDLaEsLlsoVPul+FGlAFsEASG.pLlRhsGND-cLhclAhcNAhtIuAGHsFllhl+su.aPINVLNslKsVsEVspIFsATANPlpVIVAcTc.pGRGllGVlDGhsPhGVEs-cDhpcR+chLRc.IGYKh .....Vsl-pP-.ssNlIlGQoHF.IKTVEDLaEALVsssPul+FGlAFCEASG.RLVRaoGNDt-LlcLAscNAhsIGAGHsFlIaL+cG.aPI.NVLNslKsVPEVCpIaCATANPlpVlVApTs.pGRGllGVVDGhoPLGVEo-cDltpR+-LLRp.I.GYK......................... 0 45 78 104 +276 PF04013 Methyltrn_RNA_2 DUF358; Putative SAM-dependent RNA methyltransferase Bateman A, Eberhardt R anon COG1901 Family This family is likely to be an S-adenosyl-L-methionine (SAM)-dependent RNA methyltransferase [1]. 20.50 20.50 20.60 20.50 20.40 20.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.12 0.71 -4.77 7 215 2012-10-01 22:53:19 2003-04-07 12:59:11 7 2 203 7 89 165 66 185.90 39 94.48 CHANGED MR.FllhuscAhTssshsLcDLPGsuGRlDllCRshssAhalSHuhRcsVplallLhGsPsPP+olphcusclc...hpPDEtslAhhlp+ALpuht....tttcph.pspPGlaVsphuFEsllptlhc..hslhhLcEcGtDIpssphs....NPsFllGDHlshsccphphL-ch...sh+lSlGPhsLhssHslsllpthLD+hth ...................................MRsFll+A+sAsTsup.h..Lc-lsG.pu+h-lLs+shhsulFhupuhRcDVhlaLVLpus.....D.s.+TIphcus.-lpp..sh.-pt.lAhll+th.tust......hsc-ph+.spPGl..pVcs.hoFEtLlt-lucc..t.......pLYhhcccGcsIR-hchss......NPsFlLoDHIshsccsts.lc+lG..scKI.SLGPchLaASpClsllHNElDpt...................... 0 20 51 70 +277 PF04020 DUF360 Membrane protein of unknown function Bateman A anon COG1950 Family These proteins a predicted transmembrane proteins with probably four transmembrane spans. The function of these bacterial proteins is unknown. The sequences do not appear to contain any conserved polar residues that could form an active site. 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.72 0.72 -3.70 107 1355 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1191 0 468 1052 200 109.60 31 76.40 CHANGED h...hphllphll.sAluLhlsuhlh..s........G...lplsu.....................hhsslluAlllGllNsll+PlLh.llolPlhl....lThG........LFsh.........VINAlhlhLs.uhll.s....G.....Fpl.pu.Fh.sAlluulllollshll ..............hphllphll....sulslhlluh..lh.s.........G.....lplsu.....................hhsAlluAlllullNsll+PlLh..ll....oLPlsl....lTL....G........LFt.h.........VlNAl.hlhls.uhlls...............uFpl....su.....Fh.sAlluulllSllshll.......... 1 163 349 428 +278 PF04037 DUF382 Domain of unknown function (DUF382) Wood V, Finn RD, Moxon SJ, Sammut SJ anon Pfam-B_10232 (release 7.3); Family This domain is specific to the human splicing factor 3b subunit 2 and it's orthologues. Splicing factor 3b subunit 2 or SAP145 is a suppressor of U2 snRNA mutations. Pre-mRNA splicing is catalysed by a large ribonucleoprotein complex called the spliceosome. Spliceosomes are multi-component enzymes that catalyse pre-mRNA splicing and form step-wise by the ordered interaction of UsnRNPs and non-snRNP proteins with short conserved regions of the pre-mRNA at the 5' and 3' splice sites and branch site [1]. 21.90 21.90 22.30 24.60 21.60 21.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.57 0.71 -4.32 24 357 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 295 0 253 336 8 123.00 59 20.84 CHANGED Vp+P-lVEhaDssupDPhhLltlKup+NsVPVPpHWspKRcYLpuKRGlEK.PFpLPcaIcpTGIschRssl.-c....-scpoLKQKpRERVpPKhG+lDIDYp+L+DAFF+aQo..KPc.LopaGDl......YYEGKEh ........Vt+P-VVEhaDsoApDP+LLVpLKuhRNoVPVPpHWshKRcYLp..............GKRG.IEKPPFpLPcFIpcTGItEMRp.........Al.EK............--ppoLKpK.RE+VpPKMGKlDIDYQKLHDAFFKaQT..KPc.LThaGDl......YYEGKE........................... 0 88 143 209 +279 PF04075 DUF385 Domain of unknown function (DUF385) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of Mycobacterium tuberculosis proteins. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.68 0.71 -4.19 10 1560 2012-10-02 11:35:36 2003-04-07 12:59:11 9 13 225 27 512 1351 399 123.90 24 82.54 CHANGED s.hh.Rt.sthaRcssG.tlGpph...G.hP.hlLLpTsGR+oGpsR+TPL..htsccssp..ahlVAShGGss++PsWYhNl+AsPcVclplGscphssTA.R.ls..s--+Achathsstshsu.YssYQstTsRp.lP...VhVLps .....................................................h.............h.t..s....h.u.t...h.............s...h.......s...hhl..LpssGR.KS.GpsRpoP..L.............hh.h..c.c.s.sp.........hhl.Vuo.t...Gu.....p...p....ss....WhpNlp.A.s..P.p...s..pl....p....h.....s.s.....c.....p.....h.s...s...s.A...c.ls..ss.-.+....s.phh..h..h.t.......s...s...h...h...t........s.t.....t..ls...hh.......................................... 1 139 367 466 +280 PF04087 DUF389 Domain of unknown function (DUF389) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of hypothetical bacterial proteins with an undetermined function. 22.60 22.60 22.90 22.70 21.90 22.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.89 0.71 -4.23 127 1004 2009-09-11 12:27:41 2003-04-07 12:59:11 9 4 851 0 318 852 168 140.40 38 35.44 CHANGED MlIuPLhuPlhuhuhuhshsDhpLhp+ulhs.....lhlGhhlulhhuhlhu.hlhs.........l..p..hssElhuR..opPs....lhDlhlAlsuGsAGshuh.spp............h.usulsGVAIAsALlPPlsssGluluhu...................................p.hshuhGuhhLahsNl.luIslu ....................MLISPLMuPIlGluhululhDhcLl++Shps.....Lhltlhlullsuslah.hlsP......................ls....ssuElluR.........TsPs........lhDllIAlsuGlAGhluhppc.........ptssslsGVAIAsALhPPlsssGhulAtu....................................................s.hphhhGuhhLahlNsshIsls...................................... 2 94 200 279 +281 PF04155 Ground-like DUF398; Ground-like domain Bateman A, Moxon SJ, Burglin TR anon Pfam-B_3296 (release 7.3) Domain This family consists of the ground-like domain and is specific to C.elegans. It has been proposed that the ground-like domain containing proteins may bind and modulate the activity of Patched-like membrane molecules, reminiscent of the modulating activities of neuropeptides [1]. 35.40 35.40 35.70 36.60 31.30 35.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.44 0.72 -3.67 41 278 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 8 0 267 225 0 77.80 24 27.83 CHANGED spsspCsstcLcplhpcshpt.........pshstspctlppsspppass..papVICupscFuasspss..paCph.ppsshsChsap .................................ps.pCsstcLcplhppshp...........................sssppupctlppsscpphss...pFsVICupscFuahspss..taCph.ppsshsChsat........... 0 89 124 267 +282 PF04241 DUF423 Protein of unknown function (DUF423) Kerrison ND, Finn RD, Pollington J anon COG2363 Family This family of proteins with unknown function is a possible integral membrane protein from Caenorhabditis elegans. This family of proteins has GO references indicating the protein is involved in nematode larval development and is a positive regulator of growth rate. 25.00 25.00 25.90 25.60 23.20 22.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.16 0.72 -4.06 133 2074 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 1956 0 507 1237 289 88.50 40 68.64 CHANGED ulGAFuAH...uL......c..sh.lsschhph.a.pTuspYphhHulALl.slu....hhht.......ssphhthuuhhhhhGllLFSGSLYhluLouh.....+hlGh.lTPlGGl ........................hLGAFGAH...sL..................p.....pp..lust.....t.h.sh...h.pTGlpYQhaHuLAlLslu....lhhtp...........huhhh.thu..uhhhhhGhlLFSGSLYsLuLotl.........+hhuh.lTPlGGl............................. 0 160 296 414 +284 PF04255 DUF433 Protein of unknown function (DUF433) Kerrison ND anon COG2442 Family \N 21.40 21.40 21.40 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.31 0.72 -4.55 140 905 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 315 2 319 945 159 53.80 31 47.88 CHANGED Ish...sPslhuG+PsI+GTRlsVpsllphl.usGhoh-EILps..YPp.LstcDlhAALtYA ................IshsPphhtGpPsIcGpRlsV.tsllphl.ssGh.o.h--llpc..aPt.LshcslhsALtYh............ 0 106 251 305 +285 PF01862 PvlArgDC DUF44; Pyruvoyl-dependent arginine decarboxylase (PvlArgDC) Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family Methanococcus jannaschii contains homologues of most genes required for spermidine polyamine biosynthesis. Yet genomes from neither this organism nor any other euryarchaeon have orthologues of the pyridoxal 5'-phosphate- dependent ornithine or arginine decarboxylase genes, required to produce putrescine. Instead,these organisms have a new class of arginine decarboxylase (PvlArgDC) formed by the self-cleavage of a proenzyme into a 5-kDa subunit and a 12-kDa subunit that contains a reactive pyruvoyl group. Although this extremely thermostable enzyme has no significant sequence similarity to previously characterised proteins, conserved active site residues are similar to those of the pyruvoyl-dependent histidine decarboxylase enzyme, and its subunits form a similar (alpha-beta)(3) complex. Homologues of PvlArgDC are found in several bacterial genomes, including those of Chlamydia spp., which have no agmatine ureohydrolase enzyme to convert agmatine (decarboxylated arginine) into putrescine. In these intracellular pathogens, PvlArgDC may function analogously to pyruvoyl-dependent histidine decarboxylase; the cells are proposed to import arginine and export agmatine, increasing the pH and affecting the host cell's metabolism. Phylogenetic analysis of Pvl- ArgDC proteins suggests that this gene has been recruited from the euryarchaeal polyamine biosynthetic pathway to function as a degradative enzyme in bacteria [1]. 21.00 21.00 21.70 23.30 20.80 20.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.93 0.71 -4.64 33 246 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 222 50 144 240 51 163.40 30 93.55 CHANGED hlP++hFhTpGsGcpc.spLs.......uF-hALt-AGItphNLVpVSSIlPPps..chlscppuhthLsPGpllasVhu+..tsoscsschIuAulGhAhsp........pspaGhlsEacuhsppcc.utchucchApphhpsp....................utclhcsttlsppscl..p.ucasoslAAsValh .........hPpphFhTtGsGcuc..stLs........uF-hALhcAsItshNLVplSSllPsps...pllshpps...c.hl..Gpll.slhAp..ssos.csschIuAulGluhsp.c.......ppthGhlsEapuhsp.s.pc..stchscchspthhppp....................shclt..-hphhspptpl..p.sthsTslussshh............................................... 0 48 93 125 +286 PF04332 DUF475 Protein of unknown function (DUF475) Kerrison ND anon COG2899 Family Predicted to be an integral membrane protein with multiple membrane spans. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.00 0.70 -4.84 7 451 2012-10-03 02:02:08 2003-04-07 12:59:11 10 2 409 0 130 677 107 296.80 50 83.95 CHANGED FDNAllNAuIL+pMS.hWQKhFLTlGILIAVFGMRLlFPllIVuhoAtlsPlcshcLALps...........sspYpcllpsAHPpIAAFGGhFLLMlFLsFhhc-cc.h+WLphlE+PLs+lG+lshlssllshlhLllhuthhstsuc.ht........tVLhAGLhGllsYhlVsulsphFc.....................ss....shthuG+AGlAhFLYLElLDASFSFDGVIGAFAITsD.VlIALGLG.IGAMFVRSlTlYLV+pGTL-cYVYLEHGAHaAIGsLAlIhLlolp..aclsEllTGLlGlsaIuhuhhtSlhtNRpp .......................................................................................................................................FDNAlVNAslL+cMoshWQ+hFLTlGILIAVFGMRLVFPllIVulsA..tlsPlcAlcLAlps...........PspYppllscuHstIAAFGGhF...LL..MlFLsFhF-ccc.l+WlphlE..p...h.....u......+.lu......p......l......s..slplhlAll.....hll.lhuthlst.spp..t...........olllAGl.hG....l.lT...a.lsVpu..luph.h-st............................................t.ts.......shsss..s....+..u.G...h..u.....t.F....L....Y....L...EV...LDAS...FSFDGVIGAFAITssl..llIAlGLG.IGAMaVRSlT..laLVc.+GTLscYlYLEHGAH...Y..AIG..s....L.A.l..Ihll.shh........h....cl..P.E..l.l.T.G.L..l..Gl.s...h.Iuhuhh....oSlphN+t.t............................................................ 2 35 82 106 +287 PF04720 DUF506 Protein of unknown function (DUF506) Waterfield DI, Finn RD anon Pfam-B_4111 (release 7.5) Family Family of uncharacterised plant proteins. 25.00 25.00 26.30 25.50 24.80 19.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.48 0.70 -4.71 10 299 2012-10-11 20:44:42 2003-04-07 12:59:11 7 7 31 0 209 295 2 182.20 30 59.25 CHANGED Ls-lVpuFlE-sspt.pp.p.........pusus-uSs--s-psssu.p.tcsp-clcpLL.spshc-pp...l.scltpsscps.t..................stosh+cclsshLRp.hGYDAulCKS+WcpSsc....lPAG-YEYlDVlhsss.....t.RlllDlDFpSEFEIARsTcpYpplLppLPtVFVGps-RL+pll+llCcAAKpShKc+GlalPPWR+scYMQuKWhGsYcR ................................................................................................................................................................................................................h.....p....p.t......hh...t......pt.t.......h.tph.thhpt...........................ttsshhphlsptLpt...hGasuu.l.CcS.+W..p....p..o.sp........hP.u.G.................caEYlD...V.l.h...........t...s..p................Rlll-lcFRupF..ElA.R..soppYpt.ll.p.tLP....t.lFVGps-+LpplVplhspAu+pSh+........ppuhplPPWR+.pYMpuKWhus..R......................... 0 62 148 177 +289 PF04784 DUF547 Protein of unknown function, DUF547 Mifsud W anon Pfam-B_3926 (release 7.5) Family Family of uncharacterised proteins from C. elegans and A. thaliana. 20.20 20.20 20.30 20.20 20.00 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.49 0.71 -4.22 101 769 2009-01-15 18:05:59 2003-04-07 12:59:11 9 30 360 0 388 743 150 124.20 29 25.55 CHANGED pls.....pph.scpEpLAFWINlYNAhslchlLpph...........lp.Slp...h.................s.a.pc.................................thhsluGpthoLssIE+tILRtp...................................hs-.PRlHFAlsCuohuCPsLc..saoupplcppL-puscc.Fl ...................................................t.ls..pph.scp-plAFWINlYNAhsh.............+sh.Lphh......lsts.tch...h........................shh.pc.................................sshsl....u.......G...p..ph.ohssIEptILRs........................................................................hs-.Ph.l+FALsCuuhos.Pslc....sYoupp...lcppLcpAtccal.................................................. 0 122 263 342 +290 PF04484 DUF566 Family of unknown function (DUF566) Finn RD anon Pfam-B_3992 (release 7.5) Family Family of related proteins that is plant specific. 25.00 25.00 27.60 27.40 23.60 23.60 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.95 0.70 -4.91 18 186 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 20 0 122 170 0 289.00 27 56.20 CHANGED t.....................phuosssstspssspssutcthtst...............s.ss+spshsssupt.sossspsssSpSs........................s.hspthhuPspstss.....s..h....................sPs+tppsss..........................tsssssslh...shhsshh.+uKpstschE.csHpLRLLt..NRhLQWRFsNA+s-sshhspphsAE.ptLassWhplspLps.Vsh+RltlQhh+QclKLhtlLptQhsaL-cWstlEpcHuoSLsuAhcuLcAuTLRLPlssGApuDhpulKpAlsSAVDVMpshuSSlhpLLu.KV-thsslloELuplsspEphhLp .........................................................................................................................................................................................................................................................................................................................ttsts......................................................tsp.pth.s.pssp..ss..ss.p...h.s.s......................................................t..ss.h.s.pthh.oPsp.....tt.ss.........ss..st...h...........................sPsp.ptsss....................................................................................t.stssslh........shhs.shp..+s+t..stsp....hE.-sHpLRlLt..NRhLQWRFs.....NA+A-sshtspp.hsAE.ppLassWhphspLpcsVshKRhplQhh+pchK...Lhsl..LpsQ.......hs........hL-pWu.lEpcausuLssshcuLpAuoLRLPlssGA..........psD.hts.lppulsuAlclMpshtusltt.hhs.+spthsslls-LutlstpEphhh......................... 0 19 72 95 +291 PF04526 DUF568 Protein of unknown function (DUF568) Waterfield DI, Finn RD anon Pfam-B_4977 (release 7.5) Family Family of uncharacterised plant proteins. 25.00 25.00 25.80 25.00 21.60 20.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.40 0.72 -3.71 19 208 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 26 0 118 209 0 101.80 32 29.55 CHANGED MsGoQullAap...sus..GshpshT.slsSYss..Ltpus...LuFsVspluAp...ss............uchtIFATlpLP...sNssslNpVWQsGshst.GsshshHshSGsNlpShusLDl .......................MsGoQALlAap.......sss...Guht..lh..Ths..lsuhss.....l..ptus......l.uasls.s.luAph..ss.............uphpIaATlpLs.....sstoplNpVWQsGshs...ussPthHs.h..su.sNlpShuslDh.................................. 0 14 75 98 +292 PF04502 DUF572 Family of unknown function (DUF572) Finn RD anon Pfam-B_3967 (release 7.5) Family Family of eukaryotic proteins with undetermined function. 24.00 24.00 24.00 24.00 23.40 23.80 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.22 0.70 -5.21 18 653 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 303 0 474 629 5 286.40 27 91.35 CHANGED MuERKslNKYYPPDFD.............PuKlP+h+psK....spQhsVRhMhPFNhRCsoCspY...IhcGpKFNARKEcVtsE.YLGl.IaRFYhKCspChsEIshKTDPcNsDYshEsGAoR.sapsc..........cttp-phcphcccccp........................EptsssMctLEpRTtDu+pEh-shtsL-EL+chppR..csslshsuhLcph..tcpcccptpcp-E-Dpphhcpltht......pppcp+Rhhs-..........p-tpc-.................t.sstspsspsutspsoshhspsstppspssstpphptss...................t.+sthssllhtKKptssssppst...................................tpsttpsspsssssuLstLst....hssS--s ...................................................uERKshsKYYPPDaD....................................................sph...sht.cp.s+..................pt...hh.l.R..h.hPFsh.hCssCs..pa..........IhpGh+FNAcKcpVss...........Yh.u.h.Ia+FhhKCs.tCss.pIph+TDPcN....s....D....Yshpp.G.ApR..shc.........................................ttp...ppp.h...t....t..h.....p..c..c...c....cc.........................c.ttscsMt.tLE....pcttD..pc.pthc.........h.h.ttLp..cl.pp.hp.pc......pssh.s...hs...p...h...Lcph.............hcp.pc..c....t......h.pp.....pp...c.pcp..t..h...hpph..h......................pp.tp..p.chht................................tp................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 176 266 389 +293 PF04504 DUF573 Protein of unknown function, DUF573 Mifsud W anon Pfam-B_2087 (release 7.5) Family \N 29.50 29.50 29.90 29.50 29.40 29.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.20 0.72 -3.72 21 208 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 21 0 126 194 0 93.60 31 25.96 CHANGED thFQRl.Wo--DElslLQGhlDa.cscpGt..sshp.DhsuFa-hlKppIshcsSKsQhssKlRpLK+Kapsphp+......tsp-.sFspsHDpcsFcLu+hIWGs ...........................t..hpRl.WoccDElhlL......pGhlsa..psppGh.........ssht..Dhssha.-tl+pp.l.p.h.c.h.op.sQLh-KlRRLK+KYpsthp+........tu....tt..shppsH-pphacLu+plWs............... 0 23 58 87 +294 PF04535 DUF588 Domain of unknown function (DUF588) Bateman A anon Pfam-B_1439 (release 7.5) Domain This family of plant proteins contains a domain that may have a catalytic activity. It has a conserved arginine and aspartate that could form an active site. These proteins are predicted to contain 3 or 4 transmembrane helices. 21.10 21.10 22.30 21.40 20.80 20.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.80 0.71 -4.64 84 771 2012-10-03 17:26:12 2003-04-07 12:59:11 7 12 74 0 461 799 0 135.20 22 69.99 CHANGED ppshphsplsLRlsshshslsuhslMuospps..h...............h.hpspasshsuapahVsusulsssYsllphshshhthhh.......tsh.hhhhhhhhDtlhshLhhuAuuAAuuls.ls......ppGstpht.h....hCpthspF.Cpps.suulshuFluhlh ............................t..hthhslhLRlsshshslsuhslMusspp........................................h.tspa.sshsu...a...pa...hlsusulsssYslhphhhshhthhh................tsthhhhh..h.....hhhD................t.......l..hshLhhuuuuAAsuls..lh..............tpu.t.t.............hC..........t....h..........s..pF..Cpph.ssulshuFhuhh................................................ 1 61 279 376 +295 PF01883 DUF59 Domain of unknown function DUF59 Enright A, Ouzounis C, Bateman A, Eberhardt R anon Enright A Family This family has an alpha/beta topology, with 13 conserved hydrophobic residues at its core and a putative active site containing a highly conserved cysteine [1]. Members of this family are involved in a range of physiological functions. The family includes PaaJ (PhaH) Swiss:O84984 from Pseudomonas putida. PaaJ forms a complex with PaaG (PhaF) Swiss:O84982, PaaI (PhaG) Swiss:O84983 and PaaK (PhaI) Swiss:O84985, which hydroxylates phenylacetic acid to 2-hydroxyphenylacetic acid [2]. It also includes PaaD Swiss:P76080 from Escherichia coli, a member of a multicomponent oxygenase involved in phenylacetyl-CoA hydroxylation [3]. It is found near the N-terminus of the chloroplast scaffold protein HCF101 Swiss:Q8LD16, involved in the assembly of [4Fe-4S] clusters and their transfer to apoproteins [4]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -9.16 0.72 -3.99 74 6019 2012-10-01 19:25:19 2003-04-07 12:59:11 14 30 3431 23 1809 4321 2549 73.70 27 34.66 CHANGED cpplhsALppltsP.sG.tsllstshlcs....lslps.......spVphslpls.Psps..thpsl+ppscpslt.slsG..sspVpl ......................pplhpsL.c.pVhDP.....E....ls.hslssLGl..l..h..s.........l.p.l.cs...................spl.p......l.....s.hTl..T..s.suCP....hsshltpplppslp..tlst......ps......................................... 0 581 1153 1528 +296 PF04547 Anoctamin DUF590; Calcium-activated chloride channel Mifsud W anon Pfam-B_2735 (release 7.5) Family The family carries eight putative transmembrane domains, and, although it has no similarity to other known channel proteins, it is clearly a calcium-activated ionic channel. It is expressed in various secretory epithelia, the retina and sensory neurons, and mediates receptor-activated chloride currents in diverse physiological processes [1]. 28.00 28.00 28.60 28.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.64 0.70 -5.84 102 1393 2012-10-02 00:51:22 2003-04-07 12:59:11 7 39 257 0 866 1250 37 444.10 27 58.75 CHANGED I+sYF.....GEc...luhYFuaLsaYophLlhsAllGlhsalhthh...................................................................................................................t..hp..hsshFulhhslWuolFlEhWKR+.psplshcWshhshp.............p.p..R....spFputhh............................ths.lTs......................c..hhsthc...phh+h.lhshsllhhhlshhhshhluhhh............................hchhltphht...................hph.h.shlssll.hlhl.llshlYpplAphLTchENa+...........opspY-suhhhKhal..............................hpFlssasslFalAFhps........................................h....shpcLphplhhlhlspQlh...p.lhEhllPhlhph.......................................................................................................................hpph..............phppptpppp.......................................................................................................................pt.hhpphcp-h.phpsas..................................shh....s-YhEhhlQFGalsLFusuFPLuslhuLlsNllEl+sDthKhhp...h+RPhsp.+spsIGsW.pllphluhlulloNshllshsp................................tphpshhst...............................................................hlhhllhhE.................alhhhl+all....phhls-h.....Pptl......ptphcRpca ....................................................................................................................l+pYaGp...+luhYFuaLsh..YTthLh.sulhGlhhah.h.shh.....................................................................................................................................p......hslhFul....h.h.sl..W.....u...s..hFlE..h...WK.Rp.ps........tls.hpWshhshp.............t.cp.R...spaps.hh..............................................................hhs.hst.........................................................p.....c...h.hs.hp...ch..hhh...hhsh.shh.hhhl..hh..h......hshhhshhh......................................................hc....hhhhthht..........................................ph.hhshhsshl....hlhl........l.h..s............lY.......p.p.l.A.h.hLTph....E..+................Tp.s...pa-pphhh...Khal.......................................hpFlN...as.shFY..l.A.Fhht........................................................................h...........shhc..Lp.plhhl.h......l.spQ.hh...s.shhE...hh.hPh.h..h..ph.........................................................................................................................................................................................h.pph.........................hppt.tt.t..................................................................................................................................................................htphc.-h...pL.ts.ht...............................................shh.-..YhEh.......h....lQFGal.olF....ss.uaPL.As.lh...ALlN.NllElRhD......AhKh.sp..............h+R..........P............h................s..t.....+......s...p.....s.........I............G.........h......W...t.hhphluhlullsN.s..hl...luhsp.....................................................................tphpthhst....................................................................................................................................................h.tstlhhlllhE.................pl.hhhlphhlthhlPch....Pt.l.t.pht+.............................................................................................................................................................. 0 302 410 651 +297 PF04576 Zein-binding DUF593; Zein-binding Mifsud W, Eberhardt R anon Pfam-B_2802 (release 7.5) Family This domain binds to zein proteins, Pfam:PF01559 [1]. Zein proteins are seed storage proteins. 21.00 21.00 21.20 22.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.14 0.72 -3.86 26 261 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 22 0 164 232 0 91.70 43 16.67 CHANGED lptL+ctlctp+cshpsLhpELEcERsAuAsAAsEAMuMIhRLQcEKAslpMEApQapRhhEEptpaDpEtlphLpcllhcREc-hptLctcl- ......................h.ttL+ctlptp+cshpsLhtELEcERsAuAsAAsEA.MAMIhRLQcEKAuhpMEApQapRhhEEptpaDp-slptLp-llhcREp-hppLctcl..................... 0 23 93 133 +298 PF04642 DUF601 Protein of unknown function, DUF601 Mifsud W anon Pfam-B_5475 (release 7.5) Family This family represents a conserved region found in several uncharacterised plant proteins. 21.70 21.70 24.00 23.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.95 0.70 -5.06 4 14 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 3 0 8 16 0 185.00 38 47.09 CHANGED MuGTLEDSKLRAh-KAKp.ps-scutoRQcs...Ss.AuKsscsPTS-utossKsssuKDssKRtADKKRKpsEcDupSPsRSSRsRpEEKssu.sppKtcsKcu.sQsLV..VLSSp.SEscpSphRoo.PlPAPPhsFADhhRTLVtPGusIsPhcEhKtsN+ENYLRFAtKLGchl.EFN.sFhSHEDQL.DKDpEIESFKpsE-ENA+hV-RANpVLsRM+sAEs+VQpLElsNhDLsAKLcuGKNAY.ssI-pEspuRA-LhsCEEKh+KLEEtQAshlssARpEERRKVRAQF+DFSSKYGsFatpSEEV ...................................................................................................................................................................................................................h.th-.Elpsa+.sE--Nu+hVp+AspVhsRM+tsE.plppLElsNhDL.tKLp..........................hpth.EtQt.hl..tR............................................ 0 6 6 6 +299 PF04641 Rtf2 DUF602; Rtf2 RING-finger Mifsud W, Pollington J anon Pfam-B_5482 (release 7.5) Family It is vital for effective cell-replication that replication is not stalled at any point by, for instance, damaged bases. Replication termination factor 2 (Rtf2) stabilizes the replication fork stalled at the site-specific replication barrier RTS1 by preventing replication restart until completion of DNA synthesis by a converging replication fork initiated at a flanking origin. The RTS1 element terminates replication forks that are moving in the cen2-distal direction while allowing forks moving in the cen2-proximal direction to pass through the region. Rtf2 contains a C2HC2 motif related to the C3HC4 RING-finger motif, and would appear to fold up, creating a RING finger-like structure but forming only one functional Zn2+ ion-binding site [1]. This domain is also found at the N-terminus of peptidyl-prolyl cis-trans isomerase 4, a divergent cyclophilin family [2]. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.05 0.70 -5.19 23 575 2012-10-03 15:03:13 2003-04-07 12:59:11 7 9 286 0 402 606 6 231.20 20 66.07 CHANGED MGsDGGoI..PpRcELV+h.........+++scpl...............D.ppt.ptt+WphCuLopc.Lpc...PIVsspLGpLYNK-sllchLL-Kp....thscsss......HI+uLKDlhpLplosNPs.pttt............psspps.alCPloul-h.sGph+FhhLhsCGCVFSE+AL+-l.....K...sch......................C.hCspsas.pc............Dl...ls........lNsocE.-h-hh+tch.cccpuc......pKpcKcpK+pKpstssusstpsstsstsst.................................tt..tt..t..phtptpshsppsppscsap.........SLFsoppppK ......................................................................................................................................................................................................................aphCsL.S.p.hp................P....l..ss....GplashpsllphLh..ptt.........s..ttht.....................................+lpsl....K-lh.cL.p.hsts.................................................pcspa.h.CPls..t...pth.....sspp...+hl.hl...t..ssG....s....V..ao....tcAlc..cl......c.........sph..................................................C...hh...sspsap...cp................D.l...Is.......................lp.....s.........sp.........p.......-h..........th......h.p.p..ph...pthp..p...............tp.t...pp....t.ct..t...t..................t......t......................................................................................................................t................................................................................................................................................................. 0 152 233 342 +300 PF04652 DUF605 Vta1 like Mifsud W, Mistry J, Wood V anon Pfam-B_5537 (release 7.5) Family Vta1 (VPS20-associated protein 1) is a positive regulator of Vps4. Vps4 is an ATPase that is required in the multivesicular body (MVB) sorting pathway to dissociate the endosomal sorting complex required for transport (ESCRT). Vta1 promotes correct assembly of Vps4 and stimulates its ATPase activity through its conserved Vta1/SBP1/LIP5 region [1]. 24.70 24.70 24.70 24.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.38 0.70 -5.06 39 629 2009-09-11 15:36:18 2003-04-07 12:59:11 11 22 297 9 412 611 7 210.40 18 49.99 CHANGED pslp.alppApEh-ptc.PllAYaC+haslpphlchst..pssEsppah....hsLhDpLE...................phKpphs......sp.......cslh.scssupualEpFALclFspADpp.Ru.sp..hs+.sss+sFasAuhhh-llphF...Gp...................l....sp-stp+hKYAKa+AscItKul+pG........csPssss..hp-pcpt.............................................................................................................................................................t.sstssssssstsssp..s...........t.s.t..s....sss.PssP...........................................Pss......s.ss...sst.....st...............................................tstPsspp.s................................................sht.sscplspAQKaA+aAlSAL....sa-DlpTAhcpLppALclLs ..................................................................................................................................................................................................................................h.hu.pht.............................hhsahh.......thahhp.....h...ht.............ts.t..thh........lhp.....lE...................t.p.tt......................................................t.....hh.tah.thh..t.pt..p........t...................t.....hhp.h.ss..hhphh.t.h.......t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 142 230 338 +301 PF04678 DUF607 Protein of unknown function, DUF607 Mifsud W anon Pfam-B_5620 (release 7.5) Family This family represents a conserved region found in several uncharacterised eukaryotic proteins. 23.80 23.80 23.90 24.20 23.40 23.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.14 0.71 -4.35 41 400 2009-09-11 03:12:19 2003-04-07 12:59:11 8 11 197 0 257 399 0 179.00 27 53.31 CHANGED ht.........hpstscutchssslcsuushl.slpssV..hlp.................Ppt...............hhthlt.hhsh...t..t.pp..............hlcphppclpslpchKpphcptAc......ppsp+lhWuGluhlu.sQhullh+LTFa.-huWDlMEPVTYalohushhsuYsaFLhppc-hoYpuhhcpphst+.c+hhptpsFDlpc.......YpcLp ....................................................................................................................s........tt.tpuhphsthhptsushl..hpshh........hL.....................................Ppp.................................hhphltthhsh.............tpt...h.............plcphcppL.psLp.phKtclcptAc......ppsptlh.WuG.Luhhs..sQhGllh+LTaW..-aSW...DlMEPlTYFlThushh.s..s..YuYFlhTp.p.-hsYpshhppphhtt.c+hhppppFDlp+YppL.................................. 0 78 135 199 +302 PF04685 DUF608 Protein of unknown function, DUF608 Mifsud W anon Pfam-B_5657 (release 7.5) Family This family represents a conserved region with a pankaryotic distribution in a number of uncharacterised proteins. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.21 0.70 -5.74 46 661 2012-10-03 02:33:51 2003-04-07 12:59:11 8 24 450 3 246 857 87 284.20 22 30.49 CHANGED GcFshhEuh-h.thhsohcsphY..uShuLhhLaPcLEcolhpshspsh.pps.........................GslPHDlGhs............cpPh.tss...hp....ssttWpDLsssaVLpVYRDa..hhoG-...................................................................ps...aLcphaPsltpul-ahh..p......aDpD.sDGl.-ssst.D....................................pTYDsh...hhGsouYsuuLalAALpAuhphuchlsc.........................ttptppapphlpcu+phhcch.................LWNGc..Yat..hD....................................ptphupulhuDQLhGpa...auphhGL.....ssllsc-phcoALpslachNhh.............hhhsuchGssNuhhP.......cGp............hc.ssspspE.VWo.GlsaulAAhhl.cGhh-c.uhclscsshpphh...phGh.appsEsh...t.......hhuspYhRshuhWuhhh ...............................................................................................................................................................................................................................................................................................-........hh..hh..h.............t....................................................................................................................hh.t..ha................h.ht.....hht........t......hs.p...t..sG..h.....p..s...t.h......................................................................................................................................................................p.h....h....D..s.......h.........h.....t..G......S...u....a.........s.....u..tL...a...h.s..AL.....p...u...hhthA........chhs.c...............................p.t.t.sp.pa.pthhpps+psh-pp........LW..s...Gc......aat..h...............................................................................................s.p.ptsc.h...hs-QltGph...ahp...s.uL...........s.h..hspc.+sppAhpslhch..h............................h.s..sphGhssthh...............sst.................s...hp...................................................................................................h......................................................................................................................... 0 82 143 207 +303 PF04727 ELMO_CED12 DUF609; ELMO/CED-12 family Mifsud W anon Pfam-B_3095 (release 7.5) Family This family represents a conserved domain which is found in a number of eukaryotic proteins including CED-12, ELMO I and ELMO II. ELMO1 is a component of signalling pathways that regulate phagocytosis and cell migration and is the mammalian orthologue of the C. elegans gene, ced-12. CED-12 is required for the engulfment of dying cells and cell migration. In mammalian cells, ELMO1 interacts with Dock180 as part of the CrkII/Dock180/Rac pathway responsible for phagocytosis and cell migration. ELMO1 is ubiquitously expressed, although its expression is highest in the spleen, an organ rich in immune cells [1]. ELMO1 has a PH domain and a polyproline sequence motif at its C terminus which are not present in this alignment. 20.60 20.60 21.80 21.90 20.40 20.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.03 0.71 -4.71 60 873 2009-01-15 18:05:59 2003-04-07 12:59:11 8 24 242 0 533 801 13 171.00 26 34.37 CHANGED pphphlcphtphshDspp...............................spHpch.........LppLaphhhss........................................tphssphsp................pWcclGFQ.upsPsTDFRus..GhLGLhsLlaascpaps..thpclLppspptt...........................................................................................paPaAlsuIslTt................hlhphl..ch.sph...........stcthpsh.............................htsthpsFpcLashshhtFschWh...................pptsslh-Fsplhp ................................................................................................h...t.lcphhph.hDsps................................p..H...pch.........LhpLaphhhss.................................................p.hpshhsc..................pWc.clGFp.s....s..sP......ssDFcs.s..GhLuLtsLlYFuc..p...ats..thpc.llhcspppp........................................................................................................................................h....phPFAhsuIslTp................hlhphL.ch..sph.......................pscthpth...................................................hhsp.psFppha.shsh.hh.s.+hWh...................phts.oh.-FspVh......................................................................................................................................... 0 181 274 395 +304 PF04747 DUF612 Protein of unknown function, DUF612 Mifsud W anon Pfam-B_3614 (release 7.5) Family This family includes several uncharacterised proteins from Caenorhabditis elegans. 18.30 18.30 18.80 27.70 18.20 18.20 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -13.08 0.70 -5.60 2 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 4 0 25 45 0 198.90 36 72.46 CHANGED MRSPKSVRRPHIRQQLTNRRKNLGRVAKSQRNQFRQWLLTAVLPNSINDQRKEAFASLELTEQPQQVEKVKKSEKKKAQKQIAKDHEAEQKVNAKKAAEKEARRAEAEAKKRAAQEEEHKQWKAEQERIQKEQEKKEADLKKLQAEKKKEKAVKAEKAEKAEKTKKASTPAPVEEEIVVKKVANDRSAAPAPEPKTPTNTPAEPAEQVQEITGKKNKKNKKKSESEATAAPASVEQVVEQPKVVTEEPHQQAAPQEKKNKKNKRKSESENVPAASETPVEPVVETTPPASENQKKNKKDKKKSESEKVVEEPVQAEAPKSKKPTADDNMDFLDFVTAKEEPKDEPAETPAAPVEEVVENVVENVVEKSTTPPATENKKKNKKDKKKSESEKVTEQPVESAPAPPQVEQVVEp.......................VEcPV..APsSKKPTADDsMDFLDFVTAKP-+oEss.......h.VEss+s-ppTAssuts+KKNKKsKpKppSEp...Ess ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tts.t..ss....ss.s.....SpKPTAD.sMDFLDFVTAKs-cs-.s............h....p.....................................t.......................................................................... 0 3 9 25 +305 PF04749 PLAC8 DUF614; PLAC8 family Mifsud W, Bateman A anon Pfam-B_3635 (release 7.5) Family This family includes Swiss:Q9NZF1, the Placenta-specific gene 8 protein. 21.40 21.40 21.40 21.40 21.30 21.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.97 0.72 -3.48 114 1148 2009-01-15 18:05:59 2003-04-07 12:59:11 12 23 216 0 794 1125 22 103.20 24 44.26 CHANGED WssGLh...cCh........s.Dhs...sChhshh.sPChhhuph......uphhsttt................ssCshhshhhhhh............................h.....shhRs.plRpcasl............pu..s.sssDshhph.hC.sCuls........Qpt...RE...l ............................................................................................Wpsul.h.s..Ch..............s..D..h.s.........hC.hhshh..CP...C..h.hhuphtptlt.t....................................spChhhshhhshh...........................................................h......hh..hs.....shhRtplRp+a..sl...............................ps.....s...ss...sD..hhs.th.hC..sCulsQthRE................................................. 0 276 496 660 +306 PF04759 DUF617 Protein of unknown function, DUF617 Mifsud W anon Pfam-B_3842 (release 7.5) Family This family represents a conserved region in a number of uncharacterised plant proteins. 20.90 20.90 21.40 22.00 19.20 20.60 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.07 0.71 -4.60 23 195 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 19 0 135 184 0 163.50 47 61.51 CHANGED plTGTlF.....GaRRG+VshulQ-sspus..Psl.LlELAh.sTusLh+EM..uuGhVRIALEs-+p..sspsstt...........................LL.-EshWshYCNGRKsGYAlRR..Eso-sDhtVLchLcsVShGAGVLPu............ttsssst-G-lhYMRA+FERVVGS+DSEoFYMlNP-.......Gsu......G......PELSIFhlRs .................................lTGTlF.....GaR+G.+VphulQccs.+us.......Ps.l.LlELAh.sTusLs+EM..uuGlVRIALEC-+ppspp.s.........t..............................Ll-EslWshYCNGRKsGYAlRR...-so-sDhpVLchlcsVShGAGVLPs.................ssssus-GElhYMRA+FERVVGS+DS..EuFYMhNP-.......usu..............G....PELSIFhlRh.................................... 0 15 75 104 +307 PF04818 CTD_bind DUF618; RNA polymerase II-binding domain. Mifsud W, Eberhardt R anon Pfam-B_3687 (release 7.6) Family This domain binds to the phosphorylated C-terminal domain (CTD) of RNA polymerase II [1,2]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.19 0.72 -3.73 41 1005 2009-01-15 18:05:59 2003-04-07 12:59:11 8 25 269 19 650 939 0 63.20 28 8.67 CHANGED KLshlYLsNDVlQpu.K++...ph.pFh....psFssllssshpplhpph..spchcpplpRllslWp-Rslas ...+LshlYLhN-llp..pu...++p.............pt.pFh.............tpFppslssshpp...l...h...pph.......spcs+ppltR...l...lsl...Wpcpslat................ 0 167 294 487 +308 PF04789 DUF621 Protein of unknown function (DUF621) Waterfield DI, Finn RD, Fenech M anon Pfam-B_6219 (release 7.5) Family Family of uncharacterised proteins. Some (such as Swiss:O01625) are annotated as having possible G-protein-coupled receptor-like activity. 20.80 20.80 20.80 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.99 0.70 -5.42 14 52 2012-10-03 04:04:29 2003-04-07 12:59:11 10 4 6 0 48 64 0 200.60 40 89.57 CHANGED huElPsp-KsslYYhlloLFVlSTlsoTLLTuuFLllullLWu+FKs........MKFFWFLoQLTlSVFIlSsLNLlINVPATLFSLlTK-FlpSclFhhlSYlIDFCHYoILFSNLVIAIQRhhVFFaRpl.TsphF-S.lIYhWLl.VWlluhhlhhhhhhsNC+Ypapp........ps..p+Y.LpCpst...sslVshssPphIQl................lEhllQhGIPlhILslYlAllhKIhhMKpooLsKsEhplLKQAIFlFllFQsSSsVFLhsQTlphsssTAFLIKRhINT..............hEIhAGAATPCFFFFTSKEIRKllSoKlSAsSSQGsS ............................................................................................pt.haa.hl.hFlh..hhsolLohsFlh.h.hhhW..h+...............h+aFWFL.pLThuhFlhS..NhhlslPAsL.ulho.phhpo...............t.a.hh....ph.h..hlhsNhhhu......................................................h.h................h.h.C.........h.........h................h-.h.hQhhlPhhI.h.hhYhulhh.......+lh.h.h..Ktss.p...p.......EhhlLhQuhhlFhhFQhss..lhhhs..h....t....AFhlK+hlpT...............h.-............................................................................................ 0 14 17 48 +309 PF04802 SMK-1 DUF625; SMK-1_Ce; Component of IIS longevity pathway SMK-1 Waterfield DI, Finn RD, Wood V, Mistry J, Pollington J anon Pfam-B_6319 (release 7.5) Family SMK-1 is a component of the IIs longevity pathway which regulates aging in C.elegans. Specifically, SMK-1 influences DAF-16-dependant regulation of the aging process by regulating the transcriptional specificity of DAF-16 activity [1]. SMK-1 plays a role in longevity by modulating the transcriptional specificity of DAF-16 [1]. 21.40 21.40 21.60 21.90 19.90 20.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.26 0.71 -4.78 25 450 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 272 0 288 477 3 185.10 41 22.89 CHANGED l.scsYlp+L..lslFctsE-l.-shcsLHhLpsIlK..........sllLLNcsslhEhllu.D-hlhslVGsLEY.....DP-hPp.sK.pHRpaLpppu.+FKEVIPIpssplppKIHQTaRlQYLKDVVLs..RlLDDsshusLsohIahNps-IlshLQcDp.pFLpELFuhh..................pssssss-+++-hlhFL+-hCshu+sLQs.....s+ppha+sL ......................scsYIcKL..lpLFchCE-L.Esh-sLHpLapIlK..........uIl...h..L...N.c..ssl..hEhh.hS.D.-sIhcVlGsLEY..............DP.sh.s.p....s+....p....H.R.caLpcpu....+FKE.....................Vl..PIp...-s...p..l...h...pKIHQTYRlQYlpDllLs...pl..h--shhSsLsShIFF.N.pl-I.Vs..hL..Q..cDp...pF...Ls-LFuth........................................................................s.sp...ss.s.p..c++c-hV.Fl+-hCshup.sLQs.....s+pshacsL........................................... 0 91 148 218 +310 PF04783 DUF630 Protein of unknown function (DUF630) Kerrison ND, Eberhardt R anon Pfam-B_2481 (release 7.6) Family This region is sometimes found at the N-terminus of putative plant bZIP proteins. Its function is not known. Structural modelling suggests this domain may bind nucleic acids [1]. 21.30 21.30 21.30 21.70 20.80 19.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.85 0.72 -4.07 33 295 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 25 0 203 284 0 60.00 42 8.62 CHANGED MG.CstS+.l-.......s-........-sVthC+-R++hlKpAlctRpsLAuAHsAYhpSLRssGsALpcFspsEs ..................................MG.CstSK.l-.......p-..............-uVshC+-R++hl+pAlctRtsLAuAHsuYhpSL+ssGsALppFsps-................. 0 21 111 159 +311 PF01683 EB EB module Hutter H, Bateman A anon Hutter H Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 8 conserved cysteines that probably form four disulphide bridges. This domain is found associated with kunitz domains Pfam:PF00014. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.12 0.72 -3.90 63 858 2009-09-13 14:03:54 2003-04-07 12:59:11 13 76 43 0 729 809 1 57.00 25 22.16 CHANGED C.s...........sh....hhhsspChsp.................................................st.Gps.Cp.hspQCt....ss..otC..hs....u.....pCpCssu.........hpthsst..C ............................................................................................................................tstCh.t..................................................s.t..Gps.Cp..ts...ppCt..............ss..ShC...hs......u.........hCpCss.u.........hh....s.shC.................... 0 278 372 715 +312 PF04782 DUF632 Protein of unknown function (DUF632) Kerrison ND anon Pfam-B_2481 (release 7.6) Family This plant protein may be a leucine zipper, but there is no experimental evidence for this. 19.50 19.50 20.20 19.60 19.00 19.00 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.95 0.70 -5.33 33 381 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 30 0 243 367 1 266.10 28 43.51 CHANGED slh-llc-I-chFh+Aupuup-VochLEs....s+......h.appp.sphpt.............................uuplhpslohs...hs.ts....................htptsssttss.ts.tstoauoTL-+La.AWEKKLYcEVK.....................upEpl+hpY-KKhppLpph-t+Gscttpl-+T+sslpcLpo+lhlshpslsshSppIp+LRDcELhPQLl-LlpG.............................Lh+MWcsMhcsHchQhpllpps+..lst.....stsp..............so-tappsThpLEsplppWpssFssalssQ+sYlpuLssWLphsl........tpppt..s.s..p..sPslashCppWppsl-.cls-ctstcAlcuFssslpsl ...................................................hhphhp-lcp.F.+AupuupcVuthLEs..s+.......ht..hptp...t..t.th...........................................................ttphh.pslsht......hs.ts.............................t..s...s.h.tt...s....stshuoTL-+LhsWEKKLYcEVK....................................................stEth+hta-+Kht.Lpp.-.+G.tp....pl-+T+s.lccLpo+.lhlshpshpshoppIpclRDpELhPQLh-LlpG.............................hhpMWcsMhcpHphQhphlpphp..hs......t.....................psc.pt..ptshpLc.tlppWtssFsphlptQ+sYlpuL.suWLph..sl..........t...t......s......p.h.ssslhshsppWtttlc..plspp.......s.pulcshhtslp............................................................. 0 26 142 192 +313 PF04826 Arm_2 DUF634; Armadillo-like Kerrison ND, Eberhardt R anon Pfam-B_2700 (release 7.6) Family This domain contains armadillo-like repeats [1]. Proteins containing this domain interact with numerous other proteins, through these interactions they are involved in a wide variety of processes including carcinogenesis [2], control of cellular ageing and survival [1], regulation of circadian rhythm [3] and lysosomal sorting of G protein-coupled receptors [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.57 0.70 -5.27 11 392 2012-10-11 20:00:58 2003-04-07 12:59:11 8 14 78 0 216 672 4 217.60 29 44.07 CHANGED sssphcsh.Lssp-LcKLLslLcpTcDPhIpEhALlsLGNsAAashNQ-IIR-lGGlslIushlssssspl+.pKALNALNNLSsNsENQpclKsYlsQVC-DlhssPLNSsVQlAGL+LLsNhTVTs-YQHllss.lssFhpLLstGNscTKhplLKlLlNhScNPuMT+cLlsspVsSSLluLFspp.s+ElLLplLplFpNIscplKtcuplssppcFocuoLF.hlhp-sp.hspKlpuLssH.pDs-V+tKVl ...........................................................hsstph.pc...llt.llchopDPhIpphuhhsh.up.s.s..s......a.s.h......s.p...........sh.I+c.h..Gsl.sl.ltsh.l.s.p.s.s......s....pl+....ppA....Lpsl.s...sh.oss....h-N..pt.p..l...c...h......a..l.s.p.Vsc....-h...h.s...t..s.l.......s..S...s...l.Q..h...u...G..L+hLsph..T..........l...s..sp..h..p..p...h..l...s...s.......h...s...s.h.h...p.....L.Lst.G.st..p.....s+.hplLK.lLhN.loc.NPsh....sc..c.Lls.s..p..s...ushh.sLas.....pp.spp.llphlshhppltpth+.t......s....p.......as.tsLh..hhtp.t.....hspph..t........................................................................................................ 0 33 46 80 +314 PF04859 DUF641 Plant protein of unknown function (DUF641) Kerrison ND anon Pfam-B_6069 (release 7.6) Family Plant protein of unknown function. 23.10 23.10 23.10 24.10 22.90 22.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.60 0.71 -4.40 13 143 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 20 0 98 139 2 124.70 33 29.11 CHANGED spstspsspsh-ulluclFuslSSlKuAYhpLQpAHsPYDs-pIQuADpsVVuELcpLS-LK+tat+ppsssu.....spssthhAclpEpppLl+TYElhl++..LctElcsKDsEIcpL+pplppthtssppLEK+l ..........................t....pphtsh-ulluclFsslSulKuAYhpLQtApsPYDs-pIpuADphlVuELcpLS-LKchahcpphs.ss...............................st.sh.hhuph.pEppsll+o.YEhhsc+..LpuplptKDuElttL+pplpctt.ptspplpc+l................................ 0 14 62 80 +315 PF04884 DUF647 Vitamin B6 photo-protection and homoeostasis Mifsud W, Eberhardt R anon Pfam-B_6523 (release 7.6) Family In plants, this domain plays a role in auxin-transport, plant growth and development [1,2] and appears to be expressed by all cells in the plant as well as in plastids. The family has been shown to play a role in vitamin B6 photo-protection and homoeostasis in plants [3]. 28.30 28.30 31.30 29.20 28.20 28.20 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.45 0.70 -5.20 27 424 2012-10-10 14:40:21 2003-04-07 12:59:11 9 9 206 0 293 410 13 219.60 31 52.45 CHANGED sptshph.pthpphhshlhphFLPpGYPpSVosDYlsYtha........cslQshsSslsusLuTpAlLp.u...................................lGVGs.......ssAsssuAslpWlL+DuhGtlupILFAthh............................GotlDs-sKpaRlhADllsDhAhhl-l.loPhaPp......hhlhlhssusls+ulsuVAuGu.o+AuLotHF.AhpsNLA-lsAKssuQpTls..sLlGhhlGh.......hlsshls......shtsshhs.hhhlsslHlhsNYpAVRuVphcTLNcpRsslllppalps.upl. ..............................h.............hthhhthhlPtGaPpSVos-Yh..Ytha.....................cslQ.shsoshtusLuTpulLt.u................................................................................lGV.Gp........tsus..stuAs..lpWll+.Du.hGhlupllFuhh..............................................................G..sphDscsKp......aRh...hA...........DllpDhuhhlEl.hoP.....h.h.Pt..............hhl.lhshuslh+ul..........suVuu................uu.T+uulptHa...A.................h....p.....s...Nhu-lsAKs...............tuQpsls..slhGhh...lGh...................hlhphls........................s..hhhhs..hhhlshlHlhssapul+ul.hpTLNttRhpllhppahpp.s........................................ 0 105 187 246 +316 PF04900 Fcf1 DUF652; Fcf1 Mifsud W, Wood V, Mistry J anon Pfam-B_6634 (release 7.6) Family Fcf1 is a nucleolar protein involved in pre-rRNA processing [1]. Depletion of yeast Fcf1 and Fcf2 leads to a decrease in synthesis of the 18S rRNA and results in a deficit in 40S ribosomal subunits [1]. 21.40 21.40 21.50 21.50 20.80 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.51 0.72 -3.92 60 807 2012-10-03 20:43:45 2003-04-07 12:59:11 7 10 439 0 568 803 47 95.30 35 44.72 CHANGED hcsLhu...cspshlTpCVltELcclu..phpsulplsps.p.hpRhpCsHpss.......Ac-CIhphV..s........pc+.........YlVATpDp-L+++lR.+lPG.VPlhalp.psphhlEph ................................chLhu.csh.hlTpCVhtELEcLG....t.c....hp.hAh.....p...........lA..+...c...............t...hpc.h..p....CsHpss.....................h.A--Cllphl..p......................................ppc...........YlVAT......pDp...........cL+++lR.clPG.VPlhalp..ppphhlEt.................. 0 188 312 453 +317 PF04949 Transcrip_act DUF662; Transcriptional activator Finn RD, Eberhardt R anon Pfam-B_6952 (release 7.6) Family This family of proteins may act as a transcriptional activator. It plays a role in stress response in plants [1]. 27.20 27.20 27.20 29.60 27.10 27.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.09 0.71 -4.60 6 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 19 0 44 72 0 147.50 58 88.85 CHANGED Qh.plhsppoGulShsG.usthc-c-EEhSRSALSsFKAKEEEIER+KMEVRERVQAQLGRVEEEoKRLAhIREELEuhuDPhRKEVsslRK+IDsVNRELKPLGpTCQKKE+EYKEsLEAaNEKNKEKApLloKLhELVuESE+hRMKKLEELSKolEol ....................h.t.su.uhuh....us.thsc--EEhSRSALSsF+AKEEEIE+KKMEVREKVpsQLGRVEEETKRLAhIRE.............ELE.......uhADPhRKEVuhVRK+IDsVN+ELK.PLGpoCQKKE+EYKEsLEAFNEKN+EKspLlo+LME...LVuESE+lRhKKLEELsKsl-oh................. 0 11 27 34 +318 PF04950 DUF663 Protein of unknown function (DUF663) Moxon SJ anon Pfam-B_5147 (release 7.6) Family This family contains several uncharacterised eukaryotic proteins. 20.20 20.20 20.50 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.82 0.70 -5.45 11 954 2009-01-15 18:05:59 2003-04-07 12:59:11 7 33 507 12 524 951 15 256.70 33 35.98 CHANGED El-hs.c.sAcpchpKYRGL+uhhsSsWDssE........s.Pp-apclaphpNhcpoKpphlpp..............pt.hpsshsGtaVplhlcpVPhphhpsass....tp...sl.lsauLL.aEpKhullshpl++apthccPl+o..p-pLllphGaRRFhs................pPlaSp....sssss+pKhpRahpscpsssAThYuPlsassssllsh+p.psssp.............hplsAoGsllsssssphlhK+hlLsGHPaKIaK+suslRYMFassEDVpaFK..............................slplhT+sGR+GhIKEsl.GTH.GthKsoF-s+lpspDsVhhpLYKRV ...........................................................................................................................................................ht+hp.tl+thhsu.a-stE.........................st........c....a..pclh.p..h..t....ch....c.t.....p..+.....p.p.hlpc..........................t.h.tGhpsGh..hVcl..plcs..V.P..sphhp.php.............................Pl..hlhuLLtpEp+.hshlphplp...t..p..........p.h....c..c.s.lKo..+-.LIhphGhRRh.s....................tPlaSt.............pssssh...p+h.+ah....t.h....p.shAo.ahG.............Plsh..s...s...s...shls.a..pphp.sps...................................................thcl....huTGsllss..D.......t....sphl...sK+l...hL.sG..........h..PhK.Ih+..p..hshl.+.MF.s....-lthFc..............................shtlh.Th.t.G.hpG.IKcs............l....t...................s..............c....Ghh............+s.Fpspl..p...Dhlhhphah.h................................ 0 197 301 442 +319 PF05057 DUF676 Putative serine esterase (DUF676) Moxon SJ anon Pfam-B_5941 (release 7.7) Family This family of proteins are probably serine esterase type enzymes with an alpha/beta hydrolase fold. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.53 0.70 -5.05 14 1193 2012-10-03 11:45:05 2003-04-07 12:59:11 9 58 403 0 795 2452 363 177.00 20 25.94 CHANGED spptsHLVVhlHGl.us..........ssDh........phltpplhp......phspc..hlhhhsppsstspThsGlch.hGcRLspEllphlpctpst..........KISFluHSLGGLltRhAluhlh.t......................httlcshsFlolAoPaLGshhspss.........ptlptshuhthlspoGppL.hh..............Ds.cspsshLh+l.tptst..hhps.......LthF.............KpRllauNs.pDthsshho.....sph .......................................................................................................................t....pl..hlhhHG....l.us.................................tc..h............................................h.ht..p......h................................t......tt.......h....h....h..........................p....t......s.................p.....T..........t....s........hc.......h.....s..p...+...l.......h..p.....p......lh.p..h.l.p.t..h..t.t....................................p+.l..S...a..luHSLG.G.L.l.h.R.h.u.lshhhtp...........................................................p.....h..s..F.....l........o....l..uo.....P.H.......lGs............hh.tp.........h.........................................................................h...h................................h...shp..........................................................................t...........hh.h...............................................h..F......................p.h.hhs....D..s...........s.h.............................................................................................................................................. 1 242 449 666 +320 PF05097 DUF688 Protein of unknown function (DUF688) Moxon SJ anon Pfam-B_6331 (release 7.7) Family This family contains several uncharacterised proteins found in Arabidopsis thaliana. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.90 0.70 -5.54 7 218 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 22 0 135 189 0 203.30 14 61.22 CHANGED ME-..KpLshstPLhSlRRhsph...s...pp....p..s.hp.....hP.hcs-h......pp..s.VppPuoVPFhWEQtPG+PK............st.ts...........................................................................................tcss--p-sth.-AhDTlSpstSF.hNCS.sSGlSthttss......ss.ss-..hpspDhMMuRFLPAAKAhslcp.spasu++ps.....sh.pp.hpQltchh...........sucp+ss.s.Yc.phsss....p...hcD-Ep--ps--Dt........h..ophhup+sCGhL.PplChKsSlshLNPVsuht.t....lpssSpctsK..u+hs....t..o.pK........uhs.h.c+KLpthhp.sht.s.spphpspSpp.p.h...Sps.psSp.hht...ttth.P.acsss..S..........................hpphpc..tELhhs+sshptst..Sshh-+Tlassp.p......................................hlE ..........................................................................................................................................................h.p...su.u......VPFhWEppPGpP.K........................stt..p..s..................sP................................................................sP...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.ht.tt.................................................................................................................................................................................................. 0 23 78 104 +321 PF05108 DUF690 Protein of unknown function (DUF690) Moxon SJ anon Pfam-B_6322 (release 7.7) Family This family contains several uncharacterised bacterial membrane proteins. 19.20 19.20 22.70 20.60 18.00 18.40 hmmbuild -o /dev/null HMM SEED 483 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.67 0.70 -6.12 7 613 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 233 0 136 440 0 410.80 30 94.49 CHANGED hshshoT+hQVoGaRFlhRRlttAlshtDsRMhsDPLRtpopulslGhllslshhhGshlhuhl+PpGtlGsssllsDRsouALYVRluspLaPVLNLTSARLlsGpssNPtsVKSSELs+hPpGsLlGIPGAPtths..sssssSsWslCDolutstSp....sstsTVIAGPh-.su.ptssLpsspulL..Vsh....susTWllhsG+RutIDLsD+AVT.uLGlss....sstPpslupuLFNAlP-ss.lpsPhIPsAGuPsShGlsss......IGuVlph.......ps.ssusQYYlVLsDGlQplssssAslLRsssSaGhstPPslsPshls+hP.sppLssphaP-pPlphVD...tstsPshChhWp+sAusssuplslluGpsLPVssu....sVpLsps.t.s...sAspVhlssGtutFVsspu..ssutsotSLaaVossGVRaGlssscs............scALGLs..ss.sAPWslLpLhssGPsLSRssALLtHDTLssDs .........................................................................................ss+.QVoGapFlhRRhttulshtcs.c.Mht-....Ph+ppstulhhGhll...sllhhhGsh...lhu.hh.....p...P...s..u.....t...h.....u.....s.s...s..l..lss+pouuLYV..h........l..........s.........s.........p........L+PVhNLsSARLl.s..................G.......p.s..s..s.P..t..h..Vp...ss....pL..s..p....h..P.pGshlGIPG..APtth.s........ps.s..s..s.u..............s..Wsl.C....D......s..s.s.ts......ut................st.s.osl........ss.....s..h.p.ss......th..........t..Lt..s..sp.ul..L..lph..................sus.s.a.l.l........h...........s...........u.....p...........R.....t....l.....-....................s....s....p.......A....l.h......u.LGls...........stsp..lupulhsulPtss...s...lt....sP....t...l...s.......t..u...G..s..s..s..s...h...s.h..ss........lGsVhth....................................s...ss.s...paYllL.DG.lttl.s.h.hAtl.L.......p.....t....s......s.......u.............G............s...t....s.......t...ls..s..s......l...s....t.........h.....P.....s.....p............l......s....s..p......hP.s..t.....ph...ls.........htt.t.s.....s.hCh.Wp........s.........s...s..............t...t.......tsp...htl.....h.s.Gt.th.P.ls..s.........htls.ts.................s........sstVhlssG..t..u..t.aVt....stu................s...sts.s....t..o.h..a.h.lo-sGlRault.s..s..ps.............................hcsLG...Ls..............ss..........sPWsllp....hlssGPsLS+ssAhhthss.............................................................................................................................. 0 31 85 123 +322 PF05127 Helicase_RecD DUF699; Helicase Bateman A, Eberhardt R anon Pfam-B_884 (release 7.7) Domain This domain contains a P-loop (Walker A) motif, suggesting that it has ATPase activity, and a Walker B motif. In tRNA(Met) cytidine acetyltransferase (TmcA) it may function as an RNA helicase motor (driven by ATP hydrolysis) which delivers the wobble base to the active centre of the GCN5-related N-acetyltransferase (GNAT) domain [1]. It is found in the bacterial exodeoxyribonuclease V alpha chain (RecD), which has 5'-3' helicase activity. It is structurally similar to the motor domain 1A in other SF1 helicases [2]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.22 0.71 -4.64 106 1317 2012-10-05 12:31:07 2003-04-07 12:59:11 9 23 1199 2 421 2768 389 161.80 45 21.72 CHANGED llTAsRGRGKSuulGluhutlhtpth.......plhlTAPshpslpslapashpshp..........................ttt....htht.......ttplpahsP-shhptt.ts............-l.......LllDEAAAIPlPlLcpl...ltta...+llh...........uoTlpG....YEGoGRGFsl+Fhpt.Lcpp.......................................ts.phpplpLppPIRautsDPlEpW.l.chLlLc ............................................................................t.lTAsRGRGK.SA....h....h....G..h.h...l..u..p.h....s.up.............................hhVTAP...u...hsusc...s.L.h.p.F.u........................................................................................................................................tpph.p...F...l...u.PDsLltp.....s.....pA....................................................Dh.......LllDEA.......A.A.I.Ph.P.L..LcpL......lt.t...a.....+s.lh.....................o..oTlpG.....................YE..GTGRGF.LKFhsph......................................................hs.pl+chpLppPI.RaAtsDPLEph.lschLlhD...................................................................................................................................................................... 0 132 223 339 +323 PF05129 Elf1 DUF701; Transcription elongation factor Elf1 like Bateman A, Wood V, Mistry J anon Pfam-B_8884 (release 7.7) Domain This family of short proteins contains a putative zinc binding domain with four conserved cysteines. Swiss:P36053 has been identified as a transcription elongation factor in Saccharomyces cerevisiae [1]. 27.40 27.40 27.40 27.50 27.30 27.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.27 0.72 -4.29 37 383 2012-10-03 10:42:43 2003-04-07 12:59:11 8 4 317 1 269 345 1 79.40 41 60.55 CHANGED G+RKppp+...s.tp....+th.pLspt....FsCsFCsacpSlplpl....c+p......pthuplpCplC..stpa.psp.lstLspslDVYucWlDtscpss ..............................G+RKppp+......s.stp....KhttpLs..opFsCPFCNH-pSlpV+h......D+p..........pshGplsCplC...........tppF.pTs..Is.h.......LopPl...DVYS-WlDAC-th................ 0 89 147 218 +325 PF05197 TRIC DUF714; TRIC channel Bateman A anon Pfam-B_9855 (release 7.7) Family TRIC (trimeric intracellular cation) channels are differentially expressed in intracellular stores in animal cell types. TRIC subtypes contain three proposed transmembrane segments, and form homo-trimers with a bullet-like structure. Electrophysiological measurements with purified TRIC preparations identify a monovalent cation-selective channel [1]. 25.40 25.40 25.70 27.50 24.70 25.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.23 0.71 -4.89 13 194 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 104 0 124 182 0 184.70 41 61.19 CHANGED l+c...pPGAlshS++sPlAsWLSuMLaCFGGulLushLLGEPPltsLsNsssILLAoslWYLVFYCPhDlhY+hssaLPl+LllsuMKEVsRshKIsuGVsHAtchYpcualVMlllGasKGAGuullpshEQLlRGVW+PEoNEhL+MSFPTKsoLlGullFTLQ+sphLslupHsLhahaTl.FhVhhKlsMhhhpsp ......................tsGu.thuR+pPlusWLssMLhs....FuuslLushLLG.EP.......ls.hpNssslLLAosl.WYllFasPhDlhY+sssa.....LP.V+llhsuM.KElhR.spKIhs.GVpHA....t+.hY....puall....MI.hlGhl+...GuGuul.hpshEpLlRGs.W..p.P.p..s..s.EhL+.SasTKsoLhuu.l.l.FsLpppp...h...Ls.huppsLhahhhl.FhVhhKl..hhht................................................................. 0 36 46 84 +326 PF01031 Dynamin_M dynamin_2; Dynamin central region Finn RD, Bateman A anon Pfam-B_220 (release 3.0) Family This region lies between the GTPase domain, see Pfam:PF00350, and the pleckstrin homology (PH) domain, see Pfam:PF00169. 24.60 24.60 24.60 24.80 24.40 24.50 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.70 0.70 -5.64 93 2249 2009-09-16 15:19:57 2003-04-07 12:59:11 15 54 534 28 1262 2222 40 229.30 26 35.58 CHANGED slDlLpsclhsL+hGYlsVhsR.uQpDIpspholpcAlcpEcpFFpsHstYps.hsctpGT.shLAcpLsp.LhpHI+csLPplcspIppplpphppELppY.Gssss...tstsc+sthllphlspFspsapsh.lcGpps..........ssp-LpGGu+IphlFpchhsphhps.hps..hpphhcc-lcphlcptpGhchshFlstpuFEhlVKpQIcpLc-PulcslchVh-tlpclhpphssp....phs+FPsLppplpshlpsllpcphpsscphlpshlchEtsalsTpc.sahsshp..thpp........ppppp ............................................hplltsc..h.h.......L+.h.G..........ahsVhN.........R.......uQ.t.....-...........l.........p.....t.......p.......h.......s...hp...t.........u.h.p.t.EptFF..t.p...c........................a.......p....p....h......s......p......p...hGo...hLtcpLs..........p.L.hp+Ic.pp.LP.t.l...csplpptht.phpp-lp.ph.sp.hs.........t...tpptthl..lph.....hppF...spt..hpph..l.cGptt............................pht.h.h...s....+l....................F....pp.h....h...........h...t....h......................t...............p...l..h...p.p.G............p.............hhhs...sac.ll+tpl.thlctPshptlphV..........h.p.l.phhpp.htt...............phtp.........a...P..t....Lppt..h..phh.phhp..pt...sp.p....l...........h.hthp.....h.h.......s....h..................pt..................................................................................................................................................................................... 0 336 647 994 +327 PF03028 Dynein_heavy Dynein heavy chain and region D6 of dynein motor Griffiths-Jones SR anon Pfam-B_928 (release 6.4) Domain This family represents the C-terminal region of dynein heavy chain. The chain also contains ATPase activity and microtubule binding ability and acts as a motor for the movement of organelles and vesicles along microtubules. Dynein is also involved in cilia and flagella movement. The dynein subunit consists of at least two heavy chains and a number of intermediate and light chains [1]. The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This C-terminal domain carries the D6 region of the dynein motor where the P-loop has been lost in evolution but the general structure of a potential ATP binding site appears to be retained [2]. 21.70 21.70 21.70 22.00 21.50 21.60 hmmbuild -o /dev/null HMM SEED 708 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.38 0.70 -13.22 0.70 -6.50 18 2690 2009-01-15 18:05:59 2003-04-07 12:59:11 10 239 285 14 1923 2507 199 542.40 27 19.03 CHANGED hpsshssPh.-aLospsWsulptLsphpp...........FpsLscclcpss....ppW+pah-p-sP.EcpclPppWc..................ppsshpKLhllRslRPDRhshAlpsFlpptL..GpcaV-s.psl-hucsac.....-osssTPlhFlLSPGsDPhpcVEsLu+chG......ppapslSLGQGQth.lA-ptl-pAsKpG+WVlLQ........NlHLsspWLs.pLEKhL-phs....ssHscFRLFLoAEPsss.....lPhulLcsSIKlTNEPPsGl+ANL++uhss...hsp-hlEhss+p.sEa+sllFuLCaFHAVVtER++FGP.........hGWNhsYsFNpuDLphSlpsLpsaL-ts.........sKlP...W-sLRYlhGEIhYGG+IoD-aDRRLhpoaL-cahpsphh-s..-hhLsP......t.uatlPss.scacpYhpaI-p.LPsps.PhhaGLHsNAEIsahTppscplhsslLchQP+puus..ttGuu..hop.............................E-hlpsllc-ll-+ls...chFsht-lhtKhs....s.+sPhhhVhhQEh-RhNhLlp-l+pSLp-LshGl+GclshosphEsLhsuLhhspVPspWsphuYPShhsLsuWhsDLhpRlcpLppWspc........shPpslWLuGFFNPQoFLTAlhQshAR+N..pWsLDchsLps-VTK+pp-phss..s..............sc-G...saVaGlalEGARWDhp.pshlh-uc.K-LhsshPVlahKslsh..pctcpcshYcCP.VY+TcsRus.........saVashpL+o+c..psu+WlluGVALlhp ..............................................................................................................................tal.t..h.........t.h..h.........h....t.t...h.....t...................a.tt.....l.....p.pht..tt.............................ttW.p..t.h.h.pp..t.t..s........p....t........h..P.t.....hp.................................................ths.h.p......+....l....hll..+sh....RsD+..hh..sh........p.pa...l.............p................t..h..........G.............t.p....a....hps..........h.....s....h.tt....hp.....-osst....s.Pl.lhlL...............S.....s.......G......s......D........P.h..t..t..l.......t........hu.....pp.t................................tp....hpt..lu.h.G..Q.G..Q.t...hA....t..p...hlptu..h.p..p.G.p.WlhLp...........NsH.....L.s...p.a....h.....p.L..........-.c.h...h.p.p.ht............................c......sF...Rl.al..o..s..s..p..sp.............................hP...slLpt......u.....l.K.hs..E.....................sPt..........G...l.+tslhpsh.t...............................s....p.......h....p.......................s...t.....p..........t.....hptll..as..ls......ahHull.ER....++aus...........hGWsh.....Y..t..FspuDhph.........uh....p......l..p.....a.lpt....................................................t.p.ls...a.pslp.hhh.uph.......YGG+lsDsh.Dp+hh..shhpt....hhs....t...h...h..p.............t.h.h.........................................h.......h..P...............t...............p.........h..p...t.............a....h...p...a.......l...pp...L..............P.....................p...P.t.haG..hp...NA.chsh......t.p...s.p.t.....hh......pp..lh.h......p....st...st...........ttt...........s............................................................................pph...h...th...ht...p...hhpphs.............................h..s..........h..................p...h.............................p...s.h...............hhh.h..........pEh............php.Lht.lp..psL.p..........l.....h............s............l.p...G.....h......h...h.o..t.......t.h...c........t....h..pshh.s..........p.l..P.......t........W.............t......h.............s....h.s...s..............s.....ls.................aht....chh.R...h.t......hp.tWh.t..............................h.s.....hWlsuha..sp.u.aLT.....uhhQth..........sRt..........th..s.l.Dp...h.......hth.phh.....................................................................................................st.p..u.....hhl.G..Lhh-....................G.....Ata..................s.....t.......t........................l....................p.t......t......h..h......h.s...hh...h.h.....s....................................................................t..................h.......sP..lYh.......Rts.........................................hlh...h......l..............st.............th.ah.tGsshhh..................................................................................................................................................... 0 816 1017 1584 +328 PF04261 Dyp_perox Dyp_perox_fam; Dyp-type peroxidase family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family of dye-decolourising peroxidases lack a typical heme-binding region. 20.40 20.40 20.40 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.00 0.70 -5.69 15 2768 2012-10-02 00:20:33 2003-04-07 12:59:11 7 7 1901 38 529 1917 88 307.60 30 85.21 CHANGED QsGIloPhsssulalshslsutp.t......thh+phsutlstLptut................Dth.shtttt.sucLolslGhGsshac.....Rhuhs.pcPpcL+saschsssphpAssTsGDLhlaIpApct-lsFpsh+slhcphssslpVptchcGFph.........hssRsLhGFlDGTcNPpuscstct...sll......sscssshsGGSYVsVQ+ahHsl-sW-+lslp-QEslIGRcK.sssELstp....................stPssSHlsLss.c..-u.suhpILR+uhPauc.....sGph-tGLhFluYs+shcstct.lppMh.G...GssDtLh.-aopsloGuhFFsPsssph .............................................................................................................................................................................QsGIhs......t....ptssha..lshslt..st............................thht.....hstthstL..ptut.h.............................-ph..hthhh.ssspLohsluhGsshac..............ph.uhs...t..t..........P....p....c.L.c..c....hs..t..a.ss.s..p.h.ps.........s.h.o.puDlhlpIpus.cpslsapshpslh..c..t..h.....s.s...h..l...pVc..chc.GFth...............................hssR..s..LhG..FhDG.......TtNP.....pup......pstph..........shl.....................t-t.........h.htGGSYhhVp....+hpap...lct.W....cR..............h........s....l..........c-.......Q..........E..............p.....h...hGR.sKp....s.st...lstp...........................................................hhPtsSHlphss........c...t..........p............s....p......s..........h.....p.........I......lR+uhsYus................................sut..h.-t..GLhFluYppshcph.h.lpshh................u.....p.tDtL.....c....a.sp.sl.s..GuhFFsPshs..h.................................... 0 143 307 438 +329 PF02221 E1_DerP2_DerF2 ML domain Bateman A, Yeats C anon Yeats C Domain ML domain - MD-2-related lipid recognition domain. This family consists of proteins from plants, animals and fungi, including dust mite allergen Der P 2 (Swiss:P49278). It has been implicate in lipid recognition, particularly in the recognition of pathogen related products. A mutation in Npc2 (Swiss:Q15668) causes a rare form of Niemann-Pick type C2 disease. This domain has a similar topology to immunoglobulin domains. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.33 0.71 -3.91 137 1034 2012-10-01 19:31:57 2003-04-07 12:59:11 10 11 326 68 586 1089 2 125.90 20 73.86 CHANGED shshpsC...s.t.....sphpplsls........ss..hslhpGpsh.slshsh.ss.pp.spshps...plpsphss.......lplsh............csCc.........................pCPl.ptG.phhshphsh.s.........ls.p.hPss.....phplphplhsp.s.sp..tl..sChphsspl .................................................s...hp.C.....st......hp.lpplsls..........ss....hslhcGpsh.s.....lphsh.ss......ppssps.h.ps..plpsphss.........lpl.sh.................shCc..............................shpC.....P.....l.ptG...ph...........s.hphsh.s........................l..p.hPss...........ph..plphplhsp.s...tp...pl....sChphsht................................ 0 239 316 476 +330 PF03271 EB1 EB1-like C-terminal motif Mifsud W anon Pfam-B_1529 (release 6.5) Family This motif is found at the C-terminus of proteins that are related to the EB1 protein. The EB1 proteins contain an N-terminal CH domain Pfam:PF00307. The human EB1 protein was originally discovered as a protein interacting with the C-terminus of the APC protein. This interaction is often disrupted in colon cancer, due to deletions affecting the APC C-terminus. Several EB1 orthologues are also included in this family. The interaction between EB1 and APC has been shown to have a potent synergistic effect on microtubule polymerisation. Neither of EB1 or APC alone has this effect. It is thought that EB1 targets APC to the + ends of microtubules, where APC promotes microtubule polymerisation. This process is regulated by APC phosphorylation by Cdc2, which disrupts APC-EB1 binding. Human EB1 protein can functionally substitute for the yeast EB1 homologue Mal3. In addition, Mal3 can substitute for human EB1 in promoting microtubule polymerisation with APC. 21.10 21.10 21.10 21.50 20.10 20.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.18 0.72 -3.91 61 559 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 308 17 346 525 3 42.60 52 14.59 CHANGED slEcERDFYFsKLR-IEllsQp..............ctppps..lh......................pcIpcILYu .....LEKERD.FYFuK...LR-IEllCQc.p............-t-sss....ll......................pcIhcILYA............. 0 102 172 257 +331 PF05009 EBV-NA3 EBNA-3; Epstein-Barr virus nuclear antigen 3 (EBNA-3) Moxon SJ anon Pfam-B_4674 (release 7.6) Family This family contains EBNA-3A, -3B, and -3C which are latent infection nuclear proteins important for Epstein-Barr virus (EBV)-induced B-cell immortalisation and the immune response to EBV infection [1]. 24.40 24.40 274.20 273.20 22.60 22.40 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.64 0.70 -5.13 11 121 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 6 5 0 56 0 235.00 84 38.12 CHANGED MAlRQRlpDlRRsPhshcs...QR+W+LsSPupoW.MGYRTtolhhhoassstssss.lhLsAThGCpsGtRstsTFSAGsapPP+sust-QchhsspupVtplRppsscRY+hFFD.hlsltpSLptIWpslLps-.pRlsFhcFlGaLs+s-pshl+hWaccslGsh+sppPWhpssPst.sac..cslsscshstAah+GpshGlshLtssu.E.tcssssETssEpE......DsEs-u-D-..........plPpIlPp.t..shppRPslFlpR ..................................WPMGYRTATLRTLTPVPNRVGADSIMLTATFGCQNAARTLNTFSATVWTPPHAGPREQERYAREAEVRFLRGKWQRRYRRIYD.LIELCGSLHHIWQNLLQTEENLLDFVRFMGVMSSCNNPAVNYWFHKTIGNFKPYYPWN.APPNENPYHARRGIKEHVIQNAFRKAQlQGLSMLATGG.EPRGDATSETSSDEDTGRQGSDVELESSDD..........ELPYIDPNME..PVQQRPVMFVSR. 0 0 0 0 +332 PF00679 EFG_C Elongation factor G C-terminus Bateman A, Griffiths-Jones SR anon Pfam-B_40 (release 2.1) Domain This domain includes the carboxyl terminal regions of Elongation factor G, elongation factor 2 and some tetracycline resistance proteins and adopt a ferredoxin-like fold. 24.00 24.00 24.00 24.00 23.60 23.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.83 0.72 -4.12 172 18629 2012-10-02 20:07:24 2003-04-07 12:59:11 19 79 5330 69 5120 13346 6308 88.10 29 13.46 CHANGED hlhEPlhplplpsP.....p-hhGslhsplsp.RRG.plhshpsp.........ss......tshlpsplPhsphh.satscL+ohTpGputhshpa..spYp.sssphhpph ......................lhEPhhplp..l...sP...................c-a.hGs.Vhst.ls....p.RRG..ph.hsMp.p...................ss........................................tstlp..hpl.PlsEhh..Ga...tscL+ShT...pGpu.s.h.s.hp.F..scYp.sshsh.t..h............................ 0 1815 3287 4370 +333 PF03764 EFG_IV Elongation factor G, domain IV Bateman A, Griffiths-Jones SR anon Pfam-B_40 (release 2.1) Domain This domain is found in elongation factor G, elongation factor 2 and some tetracycline resistance proteins and adopts a ribosomal protein S5 domain 2-like fold. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.12 0.71 -4.54 81 9390 2012-10-03 01:04:38 2003-04-07 12:59:11 13 49 5414 57 2657 7106 2810 117.60 36 17.30 CHANGED sp.Vua+E.hhpssc.tptphtcp.sGhcspau+hhlphcP..........ssuh.Fhstsp....GstlscEahsulppGhppuhppGsLusaPlhsl+lsLhDushHs..lcSoshsap.Auphuh+puhhpAp ....................................................................PpVuYREThp.pssc.hc....tpat+Q...o.GG....c.G.Q....a.u..clhlp..h...p.Php........................spGh....p.F.......s..plh...................GG....s...l.P...c......E...............a..lsuVpcG.l.p.p.uh.c......p.GlL....A..G....aP...........lh..Dl..+ssL.hDGuaHs............VDS..o..p...hAF.+hAuphAh+puhpcAt..................................... 0 986 1715 2277 +334 PF00036 EF-hand_1 efhand; EF_hand_1; EF hand Eddy SR anon Prosite Domain The EF-hands can be divided into two classes: signaling proteins and buffering/transport proteins. The first group is the largest and includes the most well-known members of the family such as calmodulin, troponin C and S100B. These proteins typically undergo a calcium-dependent conformational change which opens a target binding site. The latter group is represented by calbindin D9k and do not undergo calcium dependent conformational changes. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.44 0.74 -7.46 0.74 -4.24 893 3781 2012-10-02 16:17:27 2003-04-07 12:59:11 27 216 892 544 1838 18263 638 28.50 33 12.77 CHANGED plcphapthDp-ssGtIshpEhtphhpph ...........lccsFphaDpDssGhIshpEhhphhp............ 0 649 934 1356 +335 PF04189 Gcd10p eIF3_gamma; Gcd10p family Wood V, Finn RD, Bateman A anon Pfam-B_8933 (release 7.3); Family eIF-3 is a multi-subunit complex that stimulates translation initiation in vitro at several different steps. This family corresponds to the gamma subunit if eIF3 [1,2]. The Yeast protein Gcd10p has also been shown to be part of a complex with the methyltransferase Gcd14p that is involved in modifying tRNA [3]. 21.00 21.00 22.70 22.80 20.60 19.80 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.77 0.70 -5.28 34 336 2009-09-13 16:20:58 2003-04-07 12:59:11 8 6 291 0 241 322 10 288.60 26 61.49 CHANGED pshIp.spaVllc.hsu-thKlVclpsss....hlpLGKhGsF.lssllGhsaGpoFEIh.sp.................ph.h.l.psph.tpph.sp......................t.ttt......t.tpsscsN+sll.......Dsup.sQK..Lot--IppLKcp.stu...Gp-IIppllpsppoFcpKTtFSQ-KYl+RKccKahcha...Tl.h.ssshLsphahcc..-s.......t+lhclRt-oLuhlLshuNl+s..............sG+YLVhDc.TuGLlluAhhERMuu..........................................pGpllthapspt.sshs...hLchhs.....t..ts.p..pttlpslshhthhcs........ttpppppp.sshspc-hsshcss ...................................................................hIp.spaVll...h.sphh+.hh.p.lp..st....................plp.lGK.t.h.hspllGpsastoFE..lhspt............................................p.h.t.t...........................................................t.....................ptpp...s.spsNR.sll.......Dssp...sQp..Lot--IcpLKcp..uhs....................Gc-IIppLlpspsoFcpK.TtFSQpKYlp+KpKKYhp...hh..............plh..sssp.lsphahtc..-s..............s+lh.plR.h-oLu.lLshuNl+s....................................................................su+hLlh-s.suGLlsuAhhERhGu...................................................................................................pGsllphh....ss...s...sshs...hlphhs...hs.p...........p...t.......h....th.l.plp.h...p.......................................s........................................................................................................................ 0 83 133 201 +336 PF03610 EIIA-man PTS system fructose IIA component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.00 21.00 21.00 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.41 0.71 -4.02 36 8406 2012-10-02 12:41:15 2003-04-07 12:59:11 11 38 2718 39 912 3993 258 113.60 21 41.09 CHANGED ulllsoHu.chApGlhchhphlhGc....sltslshhss..tshsshtppltpAl.pphsts-tllllsDlhuuss.stsst.hhcc.t..ht...hlsshslPhlhpuhsst...sphshsphhsplh ........................tlllsoHG.phApulhpssphl...h...Gp....t..p.lt.s.l...s..h.h..ss..........p.ssssh...hpcl.pptl.....p..p..h....s....s....sc......s.lllLsDlhu.u.o.shst...ss..p...h...h...h..c...t.p......hp.............llsGhNlPhll...pshhtp.....tths.h.tt.ht...h....................................................... 0 255 550 740 +337 PF01448 ELM2 ELM2 domain Bateman A anon Bateman A Family The ELM2 (Egl-27 and MTA1 homology 2) domain is a small domain of unknown function. It is found in the MTA1 protein that is part of the NuRD complex [1]. The domain is usually found to the N terminus of a myb-like DNA binding domain Pfam:PF00249. ELM2 is also found associated with an ARID DNA binding domain Pfam:PF01388 in Swiss:O82364. This suggests that ELM2 may also be involved in DNA binding, or perhaps is a protein-protein interaction domain. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.86 0.72 -3.32 53 960 2009-01-15 18:05:59 2003-04-07 12:59:11 19 46 123 0 518 897 2 55.30 31 7.90 CHANGED IpVGsc.aQ.....AcIPphtspspppppppt.........................ccpl...lWsP.........s....tlsc...ppl-p.......alp.hupspt .............IpVGsp.aQ...........A.-..IP-.hhsps...ptspcp...pp...............................................................................tcpL......VWsP.........p...sls-.......pcl-p...aLs.hApp..s...................................................... 0 109 160 306 +338 PF02990 EMP70 Endomembrane protein 70 Griffiths-Jones SR anon Pfam-B_1312 (release 6.4) Family \N 22.80 22.80 22.90 23.10 21.20 22.70 hmmbuild -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.91 0.70 -6.04 15 1526 2009-01-15 18:05:59 2003-04-07 12:59:11 11 27 425 0 910 1437 39 423.80 34 82.92 CHANGED hsYcYYs..LsFCpPpp..lpcpsEsLG..EVLpGDRlhsSPaclchhccppCpslCpp.......plspccschlpctIppsYahpallDs.LPlsspltphcs...........h.h..Gah.s.p.......................................pspKhYlaNHlchhlpYH....cc.........RllslpVpshSlc...............p...........ppP.hlcEsp-.......lhFTYSVcWpcoc.lcWuo...RWDtYLc......hpshpIHWFSIlNShllVlFLoGlVuhIlhRTL+pDlu+YNp.-ptp.............DspEEsGWKLVHGDVFRsPp....pshLLSsllGoGsQlhhMlhsslhFAhlGhLuPusRGuLhTshlllYslhGhluGYsSuplY+phcGp..................pWKpshlLTuhLhPGslFshhhlLNhllWupsSSuAlPFuThlhllslWhhlSlPLohlGullGh......Rppsh..csPsRTNpIPRpIPp..QsWYhpslhuhLhuGlLPFGuIaIELaFIaoSlWtpc..hYYhFGFLFlVFlILllssuploIllTYFpL..CuED....Y+WWWRSFlsuG.uoulYlFlYSlaYahsKhp.lsGhh ...........................................................................................................................sYpYat..hs.a.Cts....t..........................t................................t..........sLG..p............lL...Gccl....o..a.p.....hp..h....p......p....p..t.....hh..Cp....................t.h..s.t.t.p.hp.ht.phlppt.Y.hphh.....................l.Ds.LPhhh..h....h....tt................................................................................................t.tt.p.h.h.l.sHhph.......l..ap..................t.p..........................................................pllt.hpl.s.......t........................................................................t.......l.t....t..tt...........l.aoYsVt.a.p..sp....hta..ts..............R.aD..h..ah.t.......................p.plHWh.....................SlhNSh..h....h.VhhLs..........u.h.l..shI.h.hRs.L.+.pDhs..pYs.p..pt.t...............................................t..t-....-.GW.K.llHGDV.FRs.Pp.........t.shlLsshlGsGsQlh...hhshhsl...h.h.......Ahl............G...........h.........ls......PspRGu.l.hT...shlhlashhu.hh.uGYsuu..p......h...Y+ph....t...Gp..........................................pWh..pshh....hT.uh..lh...Pu..hlFsh.hh.hl...Nhhhhs.......t...S...ouAl..PFsThl.s.llhlW.hhlslPLshlGuhhG..........................+p.sh...p.Ps......+..s.....s.pI.PR...p......I......P............p........a....ah........p.....s.........h..h....l..hu..GlLPF..u..sl....alE...L....a.FI.h...............s.S...l..W..pp.....hYa...ha..G................FLh.ls..h..h.........l....L....h.lssu.plo.l.lhs.Y.h.L........s.sE.s..................a.pW.Wpuahsuu.ssu.halhh..YuhhY...hh..c.p.h.t..h......................................... 1 327 520 748 +339 PF01223 Endonuclease_NS Endonuclease; DNA/RNA non-specific endonuclease Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.68 0.70 -4.70 59 2367 2012-10-05 18:28:12 2003-04-07 12:59:11 18 35 1457 25 866 2278 269 199.70 19 53.76 CHANGED hhptaslsaspp..pchshasttplsssshttspsp.....................................spat.-.slsst.hpst.ssatsth.......aDRGHlss.......sspthspss.psTFhh.sNhsPQ.htshNp..ss.........WttlEshsRphsp...............ptpslhlhoGshhh.s.........................................................ttlslPphhaKllh..ts.t.................htuhlhspts..pth...............tt.............h.hsl.tl.t.......sG..lsahsslsst .........................................................................................................................................h....as.sastp.....pchs.asu.p...l.pt..ps.htttptp................................................................................ttspa....t....-..s...pl..s....t......t.....h....p..s.t...ss...Ypt.su......................hDRG.Hhss...............sshph...u..p.ps.psoF.hh....oNhsPQ..t....t......shNp.....sh.................W......s.t.l.E.p.h.l.R.p...h...s.p....................................ph.p.slhVhoGslhh.p.........................................................................h.h.t.tspl....lP...phaaKllh.s.pttt....................................h.uahh.pt.....t.........................................................................h.httlpt.......................ss....hphh......t.......................................................................................................................... 0 233 407 665 +340 PF00812 Ephrin Ephrin Bateman A anon Pfam-B_1390 (release 2.1) Family \N 25.00 25.00 25.20 25.70 24.80 23.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.06 0.71 -4.70 14 554 2012-10-02 17:41:00 2003-04-07 12:59:11 12 4 91 31 288 430 1 131.80 39 52.16 CHANGED tsuuscasVYWNSoNs+Fhp.pshsltlpIsDhLDIlCP+hEss..........spshEhYhLYMVsh-thcsCph.tpcsphhhpCs+Pps.......................sPl+FopKFQcFoPhshGhEF+tG+-YYYISosssshtsp.......ChphphpVhh+sup ..............................h.......h.Ws.s..t..N..+...............Fh......t...t............sh.sl...tlp.l.s......DhLDIhCP+hcss.........sttphE..hYh.LY...............hV.s.t-tacsCph...p...pp..s.h.hhhp.Cs+Ppt........................slKFo.KFQcFoP.s..hGhEF+sG+-YYaISosssst...tth.........C...hphp.hhlhhps................................. 0 47 74 164 +341 PF05139 Erythro_esteras Erythromycin esterase Bateman A anon COG2312 Family This family includes erythromycin esterase enzymes [1,2] that confer resistance to the erythromycin antibiotic. 26.30 26.30 26.80 26.80 25.40 26.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.09 0.70 -5.25 73 695 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 516 4 246 666 50 310.70 22 67.58 CHANGED slAlEuDWPDAhpls+YVpst.ss..................sspt............tsapRFPtWMW.RNpEhhchlcWLRsaNsshs.....tpc+...lsFaGlDlY....ShtsShpsVlp...YLcpsDPptA.......ptA+ppYushs.h.sccsttYGhtsh....t..hpssccpllppLpcLhppthphttp...ts....-chh.ApQNAplltsAEpYY.....+sMhtstsps.......WNlRDpHMs-oLptLhcph..stp.....uKslVWAHNoHlGcApuTphu.........ts.hslGQll+-paGcc.shhlGhsohpGsVsAuspW......ssshphhplpPuhssSaEplhcpsuhspallch...........ssthtphL..tp...RhpRuIG.VlYpPco..Eh.SaYhpssLscQFDullah-cT .....................................................................................................................luhEssassuh.t.l.scYlpst.ts..............................s.p....................hppa...th..ha..psp-hhshl.cWhRpaNtp..............ttpp....lthhG.hDl...........s.tp.s..h.p.t.lhp.......Yl.c.p.h.s...sphh...............ttscpt.h..tth....p....h...tppsthh.sh................htppppphhpthpclhphhtp..ttt....s.................cphhhs.ppsAphltphtphh.................pshht.s..s.pp.....................hsh.....RD.ptM.h-slthlhcph......t.......................u+hlVWAHNuH..l.ucspss.hh.................................t.hshGphlpc.p.hGcp....hhslGhsshpGphsshpt.........st.hthhtlts.s..ssShE.t.h..h.t.p...s...s....h......s......t.hhlsh................pt.ht.thl..tt...t..hhh.....G....shhhst..........hhh....ht...ptaDshlahpp........................................................................ 0 83 170 220 +342 PF04800 ETC_C1_NDUFA4 ETC_CI_21; ETC complex I subunit conserved region Waterfield DI, Finn RD anon Pfam-B_6275 (release 7.5) Family Family of pankaryotic NADH-ubiquinone oxidoreductase subunits (EC:1.6.5.3) (EC:1.6.99.3) from complex I of the electron transport chain initially identified in Neurospora crassa as a 21 kDa protein [1]. 25.00 25.00 25.10 26.80 20.80 24.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.35 0.72 -4.15 6 625 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 542 2 330 566 1035 98.40 40 65.94 CHANGED RpsRIYpPA+sshQSGpu+spcWpl-F-.spt...RWENPLMGWooouDPlup...hcLoFsT+E-AluFAE+pGapYpVcEPps.+hKsKuYusN.......Fuas++pp..h ...............psRIYpP.A+.sAMQ..SG..p.s....pT.......c......p.......Whl-a-....ppt....R.h..sPLMGWsSS......uDshsp.......lpL..pFso+E-AluaAc+pGhsY.pVp.E..Pp.............p...............p....h..+.......+...u...Yus..N.......Ftas+.t.h..p....................... 0 107 195 261 +343 PF04716 ETC_C1_NDUFA5 ETC_CI_29_9; ETC complex I subunit conserved region Waterfield DI, Finn RD anon Pfam-B_4159 (release 7.5) Family Family of eukaryotic NADH-ubiquinone oxidoreductase subunits (EC:1.6.5.3) (EC:1.6.99.3) from complex I of the electron transport chain initially identified in Neurospora crassa as a 29.9 kDa protein. The conserved region is found at the N-terminus of the member proteins [1]. 20.10 20.10 20.20 21.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.69 0.72 -4.44 32 283 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 238 0 190 282 2 58.80 37 34.55 CHANGED ssP+tsLhsLYschLcpLp.phPcsusYRptTEtlsppRLplVcp......ppshcclEpp...lss ....sP+psLhhLYschLctLp..phPcsu.sYRptTEtlTpcRLslVcp..............-tslpclEcpls...................................... 0 66 104 156 +344 PF04621 ETS_PEA3_N PEA3 subfamily ETS-domain transcription factor N terminal domain Kerrison ND anon DOMO:DM04577; Family The N terminus of the PEA3 transcription factors is implicated in transactivation and in inhibition of DNA binding [1]. Transactivation is potentiated by activation of the Ras/MAP kinase and protein kinase A signalling cascades. The N terminal region contains conserved MAP kinase phosphorylation sites [2]. 30.00 30.00 30.10 30.10 28.10 29.90 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.48 0.70 -5.21 10 264 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 49 0 96 260 0 269.80 47 68.50 CHANGED MDGFYD...QQVPFhVss.sppu...ppstsRsss-RKRKF.lso-LAp.......DoEELFQDLSQLQEtWLAE......AQVPD.DEQFVPDFQS-N.LhFHGPP.sKIK+E.pSPup-h.SsCSpcps.shsaGEKCLYsh..........................SAaDpKPssuhKPsoPsoTPsSPhss.sth....uotPhpcsosss...........................hthPh.sps.P.hls.Ps.s..sssass-pR.......FpRQLSEPChsFPs.suth................s+-sRPsYaRQMSEPhlPh...PPQGFKQEhhDPlYscpGhPs............stsap.....tshsIKQEPRDFsaDSEVPsCpSsYh+ssuF...sstp-..Gata-+-s+hFaDDoCVVPERhEGclKQE.suhaREGPPYQRRG ........................................uhhDQp.V....Pah.st..pp.s.....tt.ttt....h.sp.t+hh...sp.......s...............DSE........-LFQDLSQhQEsWLAE...................A.Q.VPD..DEQFVPD.apu-s.......L..................sFHu.P.hKIK+E.pSPso-..puCSpcps.h.hsaGEpCLYs......................................sAY-p.p..suh+s.sPssss......oP...................h..hpttu..s...................................................................................Ph.....spt.................h..s....c.pp.......................................a.p.Q.sEs.CpsFss..s................................sttsts.YpRQhSEP....hPh...P.QuFKQEYhDPlYEp..t.shsu...............................s.stp........sshhIKQEPpDasaD...S..-...VPsC.t.......S.Yh+tt.sF.......st.pt.........Gh.a-Ks.R..a.DDsCVVP.E+h.-.G.clKQE.ssha..REGPsYQRRG................................................................................................... 0 7 15 38 +345 PF04777 Evr1_Alr Erv1 / Alr family Finn RD anon Pfam-B_5005 (release 7.6) Family Biogenesis of Fe/S clusters involves a number of essential mitochondrial proteins. Erv1p of Saccharomyces cerevisiae mitochondria is required for the maturation of Fe/S proteins in the cytosol. The ALR (augmenter of liver regeneration) represents a mammalian orthologue of yeast Erv1p. Both Erv1p and full-length ALR are located in the mitochondrial intermembrane an d it thought to operate downstream of the mitochondrial ABC transporter [1]. 25.60 25.60 25.90 25.80 25.40 25.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.63 0.72 -4.09 59 881 2009-09-10 22:39:07 2003-04-07 12:59:11 8 18 378 43 544 854 401 96.80 29 29.08 CHANGED puhWshLHshuupaP..pp...Potpp..........pp...shpsahphhsphaPC.ppCupcFpphht....p..tssp.............lso+cshstWLhphHN..pVNp+LuKs...................s..sph.pppats ..................................puhWp.lhHohssp.as....cp.......Po..tpp....................pp...shpshhphh.sph.aP.C..p.............cCupc..Fp.phhp............c......p.sp.............................lp.o.ppshshWlhphHN..pVNp+LuKs...................ps.sph.p.pa..s............................. 0 190 296 442 +346 PF03372 Exo_endo_phos Endonuclease/Exonuclease/phosphatase family Dlakic M anon Dlakic M Domain This large family of proteins includes magnesium dependent endonucleases and a large number of phosphatases involved in intracellular signalling [1]. This family includes: AP endonuclease proteins EC:4.2.99.18 e.g Swiss:P27695, DNase I proteins EC:3.1.21.1 e.g. Swiss:P24855, Synaptojanin an inositol-1,4,5-trisphosphate phosphatase EC:3.1.3.56 Swiss:O43426, Sphingomyelinase EC:3.1.4.12 Swiss:P11889 and Nocturnin Swiss:O35710. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.30 0.70 -4.44 326 20400 2012-10-02 01:25:08 2003-04-07 12:59:11 18 388 4759 145 7593 17817 4636 258.00 13 59.21 CHANGED h..oa..Nlpt....................................................................................ltp.........hlppts.................sD..lls.....................lp....Esp............................................t.h...tthhtt.......hs...tt.h........................................shslhs+h................s..httthhthh............................................sttththhhthtttth...............................................................lh...ssH..............................................hss................tstttp...t.phhpth........tthhtt................................................hllhG.DhN...........................................................................................tshh....stth......pshpthh...h.................................................................................tl........Dalhs.............htththtththht...........................................SD..H ................................................................................................................................................................................oaNlts....................................................................................................lhp.....................hl......p.p.s................................sD......lls.....................LQ..........Esp..............................................................................................................................................................................................p.h........tthht.h.......................h..................h.............................................................................................................G.lu.l...h.s...+.h.................................................................h..t..h..h.t.thh............................................................................................................................................................................stt.t.h.h..............h..h.h..h.t..t.........................................................................tt.tlh........lh......ssH........................................................................................hss......................................ttpttp............ht......thh.phh......................p.phh.ptt.............................................................................................................llls.G..Dh.N.........................................................................................................................................................................................................................................phh................p.t.h......................p.h.h.p.t.h...h..t................................................t..h...shtht..................t..h...............................................................pl...............D.alhh......................................................thh.h.t.t...........................................................SDH............................................................................................................................................................................................................................................................................................................................. 0 2415 4456 6161 +347 PF02609 Exonuc_VII_S Exonuclease VII small subunit Bashton M, Bateman A anon COG1722 Family This family consist of exonuclease VII, small subunit EC:3.1.11.6 This enzyme catalyses exonucleolytic cleavage in either 5'->3' or 3'->5' direction to yield 5'-phosphomononucleotides. This exonuclease VII enzyme is composed of one large subunit and 4 small ones [1]. 20.80 20.80 20.80 21.30 20.70 20.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.35 0.72 -4.19 148 4033 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 3983 6 847 2207 1003 52.90 37 66.86 CHANGED FEculpcLEpIVpcLEsGclsLE-ulchaccGhpLhcpCpptLppAEp+lphl ..................FEculp-LEpIVpcL.E.s.G.-ls.LE-ulshac+GhpLs+pspppLppAEp+lphl........... 0 283 566 723 +348 PF00929 RNase_T Exonuclease; Exonuc_X-T; Exonuclease Bateman A anon Pfam-B_1153 (release 3.0) Family This family includes a variety of exonuclease proteins, such as ribonuclease T and the epsilon subunit of DNA polymerase III.; 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.07 0.71 -3.96 57 17634 2012-10-03 01:22:09 2003-04-07 12:59:11 19 169 4800 65 4662 12914 5053 160.30 21 36.06 CHANGED lslDsEsTGhss..............hspllEluslplsstt...........htt.hphalpPpp...lsshsschsGIosth..lpstsph.p..shpthpphhp.t........phhlsps..hsh.hshhhpp.hchhhh.h......ht.hhcththspthhtthtt...........sLstlspphthpt.pp.......HpAlpDsptshplh ............................................................................lhlDhE.T.T.Ghss...........................ttsc.ll...-..l.u.s....l..pl..pssp.............................hssphp.h..h...l...p..P..p......p..........................l.....s....s..h...sh...p....l....p.....G................Io....s...ph.............l..p..s...t..s...s.......htp.......shp.p....h.h.p.....h...l....p.....s...........................................th..hVu.....+......N.....s......s.....FD.....hs.hh..t.t.p.h..t..c..h.h.h.h...hs.........................p.s.h...h..c.s...h..p....l...s..p....t.....h......h....thtp................................hpL.s.p.l.sp...t...h....s..l...p.h.ppt.....................H.p.AltDsptospl............................................................................... 0 1427 2706 3817 +349 PF03016 Exostosin Exostosin family Mifsud W anon Pfam-B_2031 (release 6.4) Family The EXT family is a family of tumour suppressor genes. Mutations of EXT1 Swiss:Q16394 on 8q24.1, EXT2 Swiss:Q93063 on 11p11-13, and EXT3 on 19p have been associated with the autosomal dominant disorder known as hereditary multiple exostoses (HME). This is the most common known skeletal dysplasia. The chromosomal locations of other EXT genes suggest association with other forms of neoplasia. EXT1 and EXT2 have both been shown to encode a heparan sulphate polymerase with both D-glucuronyl (GlcA) and N-acetyl-D-glucosaminoglycan (GlcNAC) transferase activities [1]. The nature of the defect in heparan sulphate biosynthesis in HME is unclear. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.97 0.70 -5.38 33 1543 2010-01-08 15:44:36 2003-04-07 12:59:11 10 45 197 0 1043 1489 205 267.00 21 51.06 CHANGED tpsshpVYlYcls......................................................pshh.sppash-hhlhpplppsp........hhstcPscAslFalPhhsshp.......................pphh.t.l.phhpph..shW....pc.tGcD...Hllsssps......................ssthhh...thtspshhsl....h..s...................ptphh.spD...lslPshhpssshssthst.........tpRphLhhFsGshtht..t.................ttslRstllcph..........ts..sst.t.......................tptppts..p..........hhchhpsupFCLh.PtGt.p......sptplh-ulhuGClPVll.uc.......shp.....LPFtshl.DapchoVhlscpcl......splhphLcsl ...............................................................................................................................................................h....hhlalY.........................................................................................................................pt......ht.......t..hh.hhttl......pst..................................hhstssppAshahl..s...h..sh.........................................................t.t........h.......t.l.....t....h....h....t....ph............sh.W......................pc......Gts......Hh..hhshts.............................................................t..........thh..shhsh.................t..........................................................................ptthh.thD.......lshP..h.....h..........ts....t....h........................p.......R..ph.L.h..h.F..tG.t....h...................................................................ttth+.h...lhp.p..h.....................ps....tt...p.......................................ht.t.t..t...p...............................hhphh..tpS.....pFCLh...Pt.Gt...p..................ss.Rlh-ulh.sGC..lP....Vll..uc......................ph......LPa....t...p.hl....sapphu...lhlsppcl..............pl.phLpt................................................................................ 0 337 682 871 +351 PF03124 EXS EXS family Mifsud W anon Pfam-B_605 (release 6.5) Family We have named this region the EXS family after (ERD1, XPR1, and SYG1). This family includes C-terminus portions from the SYG1 G-protein associated signal transduction protein from Saccharomyces cerevisiae, and sequences that are thought to be murine leukaemia virus (MLV) receptors (XPR1). N-terminus portions from these proteins are aligned in the SPX Pfam:PF03105 family. The previously noted similarity between SYG1 and MLV receptors over their whole sequences [1] is thus borne out in Pfam:PF03105 and this family. While the N-termini aligned in Pfam:PF03105 are thought to be involved in signal transduction, the role of the C-terminus sequences aligned in this family is not known. This region of similarity contains several predicted transmembrane helices. This family also includes the ERD1 (ERD: ER retention defective) yeast proteins Swiss:P16151. ERD1 proteins are involved in the localisation of endogenous endoplasmic reticulum (ER) proteins. erd1 null mutants secrete such proteins even though they possess the C-terminal HDEL ER lumen localisation label sequence. In addition, null mutants also exhibit defects in the Golgi-dependent processing of several glycoproteins, which led to the suggestion that the sorting of luminal ER proteins actually occurs in the Golgi, with subsequent return of these proteins to the ER via `salvage' vesicles [2]. 25.00 25.00 26.30 25.10 23.00 23.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.26 0.70 -5.42 68 815 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 326 0 551 823 12 287.00 27 50.36 CHANGED hhh.lapshhLlllhhahaulslahWpptplNa..s....hIhchs..........................ppplshpphhp.............lushhs.................................hhhslshhhhhhh.h.t...p.hth.................PhhhlhlhhhlllhPh....phh........htpuRhahlpslhRllhush....h.VpFsD..FaLuDtLsShshsltDlthhhChah..h.h.............ptt..............stCtpsphhh............sllsslPshhRhhQClRR.ah-spct...............sHLhNAhKYssshhshhhtsh..hchppspt.......................hhhhalhsuhlsShYshhWDlhhDWuL.........................hpps.t.s....................................cshhLR..ccLha.............sp.......hhYYhAhlhsllLRhsWhhp..hh.................pht.hh..pp....plhshl...luhlElhRRhlWsFFRlENEHls ..................................................................sh..hath.hhhh.hhhhhshsh..h.hap..t..tl...sa..s......hlhphp...............................................tpthp.tphh...............................................hsshhh..........................................................hhhh.h.hhh.hh...............................................Phhhh..hhhhh.....hlh.Ph....thh.....................................hpsRhhhhhhh................hRlh.hush............h.ltFtD..haluD.lsSh......s......h..........sh...hsh..thhhChahh..........................................................t.C....t.t.hh..............................hl.slP...hhRhh.QClRc...ahc.s.tph..........................aLhNuhKYssshhhh....hhtsh....hph........t.t..t............................hhhhalhhshlsohYshhWDlhhDWuL..........................hp......s..................................................................................................................pthhLR......cplhh.................................p........h.hYYhuh...l.sllLRhsWhhp.h.h...................ph..h.....ps....phhs.hh...huhlElhR.RhhWsaaRlEsEal............................................. 0 185 336 476 +352 PF04554 Extensin_2 Extensin-like region Bateman A anon Pfam-B_1707 (release 7.5) Family \N 20.80 20.80 21.10 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -11.10 0.72 -4.10 14 490 2009-01-15 18:05:59 2003-04-07 12:59:11 8 22 25 0 352 530 3 49.70 73 84.32 CHANGED YcYpS.PPPP.hp........PPPP...Ya.......Y+SPPPP...s....YhYpS.PPP..Pha..........SPPPP.........YhYpSPPPP ................YlYsS.P.PPPhYp......Po..P+..spYKSPPPP.........Y...VYuS.PPP.......PhY.........SPSPp.........spYKSPPPP...................... 0 227 309 310 +353 PF00646 F-box F-box domain Bateman A anon Prosite Domain This domain is approximately 50 amino acids long, and is usually found in the N-terminal half of a variety of proteins. Two motifs that are commonly found associated with the F-box domain are the leucine rich repeats (LRRs; Pfam:PF00560 and Pfam:PF07723) and the WD repeat (Pfam:PF00400). The F-box domain has a role in mediating protein-protein interactions in a variety of contexts, such as polyubiquitination, transcription elongation, centromere binding and translational repression [3]. 20.50 15.60 20.50 15.60 20.40 15.50 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.28 0.72 -4.22 463 9939 2012-10-02 00:56:31 2003-04-07 12:59:11 28 388 416 10 7006 15392 101 45.20 20 10.27 CHANGED htl.pLPtc........................llppllpplshts.........hhphphls+phcphlpphphhtptht ...........................................................................h.pLPt-..........................................................lltcIl.p..p.L..s.hps..............................lh.p.hp..tls+pac.plhpp.........hh........................ 0 1706 3015 5562 +354 PF00754 F5_F8_type_C F5/8 type C domain Bateman A, Finn RD anon Pfam-B_478 (release 2.1) Domain This domain is also known as the discoidin (DS) domain family [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.67 0.71 -4.31 185 10300 2012-10-03 19:46:52 2003-04-07 12:59:11 20 1197 1614 169 3934 9293 615 125.60 16 18.56 CHANGED phsuSo..phssth............stttlD...ss............t.stWps....t.......sssspalpl..DL.spstplsslhhpst..psttt...........hspsaplphS..sDG.....p.........sWpphtt................ss.ssss.hh..........hh.sshpu.....RalRlhsh...........h.stsphsthhEl .....................................................................s.....t.................tttl.D....us............................s.s..t..W.ps......t.........................sstspa.l..pl.................D.........L....s...p.......p.....p....l.s..t.l..t...h...p...sp....tsttt........................hspsa..p.l.t..hS........s..cu.........p..................sW.pphtp.........................................ss..s..s..s.s..hh........................................hh....t.s.h.p.u..............+a..l..Rlh.h................tt....h............................................................................................. 0 2052 2661 3306 +355 PF00487 FA_desaturase Fatty acid desaturase Finn RD, Bateman A anon Bateman A Domain \N 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.16 0.70 -4.94 127 9036 2009-01-15 18:05:59 2003-04-07 12:59:11 19 46 2567 0 3138 8639 3621 228.30 15 66.62 CHANGED hs.hhhhhhhhhhhshhhthhhhltH-ssHt.....thhtpphhsphhuthhshh....hhhshht.a...p.......Hhh.HH......phhss.t........Dssthhh..............................hhhhtphhhhhhhthhhhhhhhhhthtttthhttthh........................................hhhhhhhshhhhhhhh.hhhhhhht..................shhhhhhshhhhhhhhhhhs...........h.t..Hhhh...........tttttstshhtpp....hh............hhshhhhs..hs.aHh..................tHHh...aP...sls....ahpLsphttthtphhtttthshhtht ...................................................................................................hh.h.h.h.h..h.h..h..h.hh.t...hhh..hh.tHphuHt......................th....t..p....t.....h..h..p..p..h.h..u.h.h..hhhh.........................h.sshht..a...thp...........................Hpt.HH......................phsss.t...............Dss.shh............................................................................t...t..h....h..h.....t..h..h.h...h....h......h...h..h....h...h..h..h..h......h.....t...h..t..h....t.p....h..t..phtt................................................................h...h.hh.hhh.hh..h...h....s.h..h...hh........hh...h...h.........h...h...t........................................................hh...h..h....h..h....l.......t..h..h.....h...h.t.h.h.h.hhs.........................ht.......Hhh.............................h.tpttpthhp.pt................h...........................t..h.h.h....s..h..h.hh..s........hs..hHs...................H...Hh..........aP............th........h..h........................t........................................................................................................... 0 1025 1826 2610 +356 PF02913 FAD-oxidase_C FAD linked oxidases, C-terminal domain Griffiths-Jones SR anon Structural domain Domain This domain has a ferredoxin-like fold. 21.20 13.90 21.20 13.90 21.10 13.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.30 0.70 -5.06 62 9445 2012-10-02 00:48:38 2003-04-07 12:59:11 14 65 3272 53 2996 8280 3740 227.50 22 39.33 CHANGED hPpttssshshasshpsAs..psstplhpp.thhssshEhhDptshphshshhsh.........................tt.shlllchpusp...th.tpphp..tltphhptsssss................hhsps.tphp..plWp...hR+thhs.............hhhpttshsht.DsslPhsclsshlpphcphhsphs..........hhhshhuHsG.......DGNlHhhlhhs.ts......tthcphpphhtcl....hthshphGGolouEHGlGht+pt....ahttthuttslshh+plKpshDPpsILNPG+l .......................................................................................................................................................................Pt.ttshhhta..s..s.......hpsAh.....psl...t.h....h....t..........t.....t.............h..s....s..u..hE......hhDph....s.l.p..hs.pph.hth......................................................h.p.t..t.s..h.ll.lE...h.s...G.s.s.............t..t.h.....p.........p.......p........hp................hltph.hp..p.t.s..s.t.p............................................h.......hsps.tptp.........plWt.........hRcpshs..........................................h...t...p...h...t...s..hhh.t..p...Ds..slPh..spl.sphlpp..h.pthhp.phs...............................................ht.h.s..hhuHsG.............DGN..l.Hh..h..l..h....h...s..h..s..p......................tp..h..p..p.h....c..t..h.h.p.cl...........hp.h.s.h.c.h.G.Go.l....o...u.EHG..lGht+.t.........hh.h........h..u..ttt.......h.thh.ctlKtshDPpslhNPGK......................................................................... 0 916 1869 2506 +357 PF01565 FAD_binding_4 FAD binding domain Bashton M, Bateman A anon Pfam-B_352 (release 4.0) Domain This family consists of various enzymes that use FAD as a co-factor, most of the enzymes are similar to oxygen oxidoreductase. One of the enzymes Vanillyl-alcohol oxidase (VAO) has a solved structure, the alignment includes the FAD binding site, called the PP-loop, between residues 99-110 [1]. The FAD molecule is covalently bound in the known structure, however the residue that links to the FAD is not in the alignment. VAO catalyses the oxidation of a wide variety of substrates, ranging form aromatic amines to 4-alkylphenols. Other members of this family include D-lactate dehydrogenase, this enzyme catalyses the conversion of D-lactate to pyruvate using FAD as a co-factor; mitomycin radical oxidase, this enzyme oxidises the reduced form of mitomycins and is involved in mitomycin resistance. This family includes MurB an UDP-N-acetylenolpyruvoylglucosamine reductase enzyme EC:1.1.1.158. This enzyme is involved in the biosynthesis of peptidoglycan [2]. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.52 0.71 -4.70 142 20637 2012-10-02 01:00:47 2003-04-07 12:59:11 18 164 4917 139 7390 17693 6472 133.60 21 26.34 CHANGED P.thllhPpotp-ltphlchup..pp...shs..lhshGuGsshs.uts..........psu...lllsh.t..p.hsp..lhpl-s....t...stpsplpuGshhtpltptlttp..uhhhs.p.sush.ss..olGGhlsssusGhtstta.G.hh..-hlhulcllh.ssGpllphu .........................................................hllhPpsh.p-ltth..........l....ph.st.............pp..................p..h.s.........l..h...s...h.G.u.G........o.s...hh..utsh.................tsu.................ll..l.sh..p...........p..hsp...........l..h.p...l..c...............................t...........s..t..h..l..p...l..p..u.....G.s.h.h.t..p............l.t.p..t.........h......t........t....p...........u...h..h..h......s................p....s............u....s...t....................s............s........o........l.G...G.s...l..s.s...........s...u...u.......G...h..t..........s...h.........t..........h...G.....hh......-h..lhs.l....cl..lh..ss..Gplhph................................................................. 0 2058 4492 6188 +358 PF00970 FAD_binding_6 Cyt_reductase; Oxidoreductase FAD-binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_143 (release 3.0) Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.35 0.72 -3.87 115 14779 2012-10-03 00:38:56 2003-04-07 12:59:11 19 159 4278 144 4286 11998 1730 96.10 21 26.84 CHANGED tspLls+pplScDs+hF+F.tL..s.sssph...luL..PsGpalhlpss...ls.sc.hhRuYoPsSss..s..-hGh..h-LllKlY.........sGGtMSp.aL.-s.L.plGsh.lcl+GPhGp.htY ...............................................................................lhphp...s..t..s..h.h..h.....hp..h..th...........s....t..........t....t..........h..t.a......p.s.G....Q...a......l..t.........l...t...l...........t.................p.......s............p.........................h........h............R.s....Y.........S........h......s.................s.......s................s........s.............pp.s...t...............lc......l..tl...+t.h............................s..G.h....h..S..s....h...L....p.....p.....l.....p.......s...G.......D.....p......l.p....l.t.s....PhGp.h..h................................................... 1 1115 2425 3487 +359 PF03441 FAD_binding_7 FAD binding domain of DNA photolyase Griffiths-Jones SR anon SCOP Domain \N 20.10 20.10 20.10 20.10 19.40 19.90 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.86 0.70 -5.19 163 4493 2009-09-11 23:21:46 2003-04-07 12:59:11 9 22 2644 55 1440 3950 7444 245.00 30 51.07 CHANGED sGEps..AhppL....psFl..p..p..plpsYpps..Rsh.P......ut.....s.........................................uoSpLSPaLpaGplSsRplhptspptt...........................shpsalpcL.hWR-a.hhphhhptPpht.....................t.shppta.ps.h...Wt..................ts.p...tthpsWppGcTGaPllDAuMRpLhpTGa..hHNRhRhhsuSFLsK.cLhl-WctGtc.....................aF...........hppLlDh-.usN.ssWQa.uu.sGhss...hRlaNPhpQuc+aDPsGpal+pWlPELtslssc.hl..Hp...........................Paphst......................s.......YPp.P..ll-hppsRctslctatphp ......................................................................................ucptAhppL.....ppFh.p............ptl.t.pY.ppt..Rsh...s.......sh....s..........................................uo....Sp....LSsaLt............h...G..tlS.sR..phhpplhpttt.......t................................................tsspsa.lpcL..hWR.EF.ah.tl.h.h.t.h.P.p.h.t............................................................................p.ts...h.ps..t..h..pp..h......W........................................s..tthpt..W...p.p...GcTGaPll.D.Au.....MRp.LhpTG.a..hHNRhR.h...h........s.A.oFLsK..cLh..l...cW.c...G..t..c.....................aF.......................hppLlD.h....Dh...us...N......ssW....Q......ahuu....s...G..............s..D....s.........h..............h....R.h.aNP..............h..t.Q.........u.c.+a.....DspGp..a...l..+.p.....al.PpL.tt...lss......p......l.ap.............................................sht...........................................................t..............YP..s..hl.phtttp...hthh....t................................................................................................... 0 488 923 1231 +360 PF03101 FAR1 FAR1 DNA-binding domain Mifsud W, Bateman A anon Pfam-B_2535 (release 6.4) Domain This domain contains a WRKY like fold and is therefore most likely a zinc binding DNA-binding domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.27 0.72 -3.46 43 1076 2012-10-02 23:28:20 2003-04-07 12:59:11 10 54 116 0 699 1040 0 87.40 24 15.96 CHANGED pFYspYAtpsGFulRhsppp+o...ptss.hhpppFsCs+pGhppppppp.............................ptpp.....sps.oRsGCcAtltl+hpp...sucWhVsphs.....h-HNHtLss .....................................................aYptYAtphGFsl+hpp.s..p.ps........ts.t..t..hh..ppp..a.s...C..s..+pGhppppptp.....................................................t..pp............pspsR.suC..pA.h.h.t.l..p...hpt.........ss......p..W...hl....p.p.h...........hcHNH.l............................................. 0 82 399 567 +361 PF04300 FBA F-box associated region Bateman A anon Bateman A Family Members of this family are associated with F-box domains, hence the name FBA. This domain is probably involved in binding other proteins that will be targeted for ubiquitination. Swiss:Q9UK22 is involved in binding to N-glycosylated proteins. 25.00 25.00 27.50 25.90 21.50 23.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.36 0.71 -4.32 6 396 2012-10-03 19:46:52 2003-04-07 12:59:11 8 16 84 7 192 352 2 158.30 36 59.10 CHANGED WKshYhhpshcRNLLRNPCu.............................EtshpuWp.lp.pGGDtWpVEpLPhssusphP.sstVpphFloSFcWCpKpQhVDLcAEGhW-ELLDoFpP-IVVcDWausRtDsGChYpLcVpLLuAD.psVLspFossPssh.Qhss.ssWpcVSHsFScYsPGVRalpFpHtGpDTQFWsGaaGsRVTNSSVlVcs .................................................................p.hh...h...hp.htRNLl+NP...su...........................................................................-.p.sh..phWp...lp..pG..G..s.tWtlEp....t.......p....t....t.hs...s...........tl..........p....ph.Fl.o.S.athChKp.Qll....DL.tEG.h....htclh.DphpPcIh...lpDW.as.s...Rt..ssus.h.Yp.lpVpLL...........stp...p.....ps.............l..............sp..........F...............p......ss..........................h...h.ph.ss.......tt....WpploHsFpsYs.sGVRalhFpHtGp..Dsp.aWtGaaGs+lTsSSl.l..h.......................... 0 32 57 98 +362 PF00611 FCH Fes/CIP4, and EFC/F-BAR homology domain Ponting C, Schultz J, Bork P anon SMART Family Alignment extended from [1]. Highly alpha-helical. The cytosolic endocytic adaptor proteins in fungi carry this domain at the N-terminus; several of these have been referred to as muniscin proteins [2]. These N-terminal BAR, N-BAR, and EFC/F-BAR domains are found in proteins that regulate membrane trafficking events by inducing membrane tubulation. The domain dimerises into a curved structure that binds to liposomes and either senses or induces the curvature of the membrane bilayer to cause biophysical changes to the shape of the bilayer; it also thereby recruits other trafficking factors, such as the GTPase dynamin. Most EFC/F-BAR domain-family members localise to actin-rich structures [3]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.07 0.72 -3.80 157 2299 2009-11-12 14:53:54 2003-04-07 12:59:11 18 60 270 55 1342 2138 1 91.60 21 13.78 CHANGED psFhsph.........tuhcslhp+hcpuhphh.c-ltpah.+cRuplEccYuppLp.clspchhptht.........ttpp......uol.................ppuapplhspscphuptHtphuppl ..............................hhtth.............tthcsL.pphppuhp.hh.c......-l...tpah.+c.Ru..plEcpYup....p.....Lp...cLucchtttht...................sppp.............soh......................................................tpuapslh.sp.scphuptHtplupph........................................................................................................ 0 361 566 953 +363 PF00111 Fer2 fer2; 2Fe-2S iron-sulfur cluster binding domain Sonnhammer ELL anon Prosite Domain \N 20.70 15.00 20.70 15.40 20.60 14.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.55 0.72 -4.23 206 17474 2012-10-02 17:47:23 2003-04-07 12:59:11 22 166 3695 266 5439 17310 7619 75.20 21 24.22 CHANGED hphpup..thphpsssspp.lLcshc...p.t.slslshuCps.......Gs....CusCtsplhtu.................hpspphttt.h.....................LuCtshsp .......................................h....sp...th.pl...p..s..s..s..s..p.s.....lLcshc.............p...t..........s....l...s......l......t......h.......uCpt................................Gs..C.Gs..Cp..lh.lh..p.Gps.........p...................ptp......t........................................................LsC.sh......................................................................................... 0 1464 3188 4405 +364 PF01799 Fer2_2 fer2_2; [2Fe-2S] binding domain Bateman A anon Bateman A Domain \N 20.90 20.90 21.20 21.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.77 0.72 -4.08 157 5947 2012-10-02 17:47:23 2003-04-07 12:59:11 15 71 2180 114 2017 5159 1915 75.40 40 17.35 CHANGED TlE..GL..............s.p..ssplcslQpAFl-ppuhQCGaCTPGhlhuutuLLc...p..................ps.ps......occ-.lccu.l.sG..NLCRCTGYppIlcAlpp .............................................TlEGL.....................s.p.ssp.l..cslQpAahcp......pusQ..CGa.C.TPGhlMoshu.LLcp............................................ss...pP.............ocp-..I.c.cu.l..u.G.....N..LCRCTGYptIlpAlp.t......................... 0 620 1185 1606 +365 PF04324 Fer2_BFD fer2_BFD; BFD-like [2Fe-2S] binding domain Kerrison ND, Finn RD anon COG2906 Domain The two Fe ions are each coordinated by two conserved cysteine residues. This domain occurs alone in small proteins such as Bacterioferritin-associated ferredoxin (BFD, Swiss:P13655). The function of BFD is not known, but it may may be a general redox and/or regulatory component involved in the iron storage or mobilisation functions of bacterioferritin in bacteria [1]. This domain is also found in nitrate reductase proteins in association with Nitrite and sulphite reductase 4Fe-4S domain (Pfam:PF01077), Nitrite/Sulfite reductase ferredoxin-like half domain (Pfam:PF03460) and Pyridine nucleotide-disulphide oxidoreductase (Pfam:PF00070). It is also found in NifU nitrogen fixation proteins, in association with NifU-like N terminal domain (Pfam:PF01592) and NifU-like domain (Pfam:PF01106). 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.09 0.72 -3.91 190 6775 2009-01-15 18:05:59 2003-04-07 12:59:11 10 90 2902 2 1800 5000 364 54.70 27 11.81 CHANGED thl..CtCpsVoctplppsl.....................tp.............hp...s.....hpplpptsts.............GssCG.sChs....ht......pllpptt ...................hlCpCpsVocupItpAl.....................tp..............tuspo.......lppl+ppTcs.........................Gss.CG...sChs..........ht....plltt..t............................... 0 527 1115 1483 +366 PF00037 Fer4 fer4; 4Fe-4S binding domain Bateman A, Eddy SR anon Prosite Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.61 0.73 -7.60 0.73 -4.16 533 8206 2012-10-03 08:56:42 2003-04-07 12:59:11 22 662 2969 164 3310 39684 11065 23.50 39 7.69 CHANGED hhlstcpChsCGtChpsCPssAlp ......h..ls.ccClsCGhChpsCPhsAI........ 0 1442 2410 2918 +367 PF00210 Ferritin ferritin; Ferritin-like domain Finn RD, Bateman A anon Prosite Domain This family contains ferritins and other ferritin-like proteins such as members of the DPS family and bacterioferritins. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.60 0.71 -4.43 243 10640 2012-10-01 21:25:29 2003-04-07 12:59:11 19 13 4559 1610 2459 7325 1350 137.80 20 82.92 CHANGED hptLNctlstE...htushtYhthuhhhc..shshtshsphhcppupEEh.pHApclh-hlhthG........G.....ph.......phhp...........h.................................s.ssh-hlctslptEpplspplpclhphup....pp......p..DhsotshL.pthlp-.p.cchthlpshlppl..cth ...........................................phLNctl...spp...hhuhh.hYhph........phhhc......shsh..slt..phh..cc.........ps.p-ph...p.....Hs.......-cl.h.-+lh..p..lG............u.....s...hphh...........thhp..........................h.....p..........................................................hs.ssp-h.lc...p...s...lp...t...p..p.t.l.s...........p.pl.p...c.h.h.p.h.up......pt................p..D......sotshl.pth.l.p-.pccphhhlpshlp................................................................................................ 0 681 1367 1912 +368 PF01839 FG-GAP FG-GAP repeat Bateman A anon [1] Repeat This family contains the extracellular repeat that is found in up to seven copies in alpha integrins. This repeat has been predicted to fold into a beta propeller structure [1]. The repeat is called the FG-GAP repeat after two conserved motifs in the repeat [1]. The FG-GAP repeats are found in the N terminus of integrin alpha chains, a region that has been shown to be important for ligand binding [2]. A putative Ca2+ binding motif is found in some of the repeats. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.94 0.72 -4.16 470 2456 2012-10-05 17:30:42 2003-04-07 12:59:11 18 220 351 67 1119 4198 1515 38.40 33 6.96 CHANGED hstslssu..DlsGDGhsDlll..........................ssts....stlhlh ............Ghulusu..DlNGDGhsDlllG.....................................stptp......GtlYl..................................... 0 347 596 821 +369 PF02181 FH2 Formin Homology 2 Domain SMART anon Alignment kindly provided by SMART Family \N 25.20 25.20 25.20 25.20 25.00 25.10 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.25 0.70 -5.57 48 2361 2009-01-15 18:05:59 2003-04-07 12:59:11 18 102 317 21 1446 2150 30 329.40 23 32.57 CHANGED tppt.psct+LKsLpWsKl..............................p.ssp.ps...........lWsclpppp..............ph-ht-h-thFsststpthtpt............................sstpppspclplLDs++upNluIhLppl..phshc-lhpulhph-p.phLs.............h-hl-pLh.chhPsc-Ehptl..p........phps-......hppLucsE...pFlhplsp.lsthppRlpshhFptsFpsplpplptplpslppAscpL+pScphpplLchlLthGNaMNsGst..GsAhGF+LsoLhKLsDsKus.-s+pT..LLHalsphltc.............................................phsclhshs.s-LpslccAuplsh-slppslppLpcslpplcpplptttp.......t.tptFhphhppFlppucpclcplpsthppshpthcchscYa...spssp....phs.pphFshlppFlphac ..................................................................................................................................h.....tsphth.h.h.W..pp..l....................................................................tp.h..pt.......................lW..sp......hp...ptt..........................................t.ph..t..c..hpthFttpttt...t...........................................................................t.tppp.pplpl..............L.-s.++upN.............ls.I..hL.p........p......h......p........h.s..........cc........l.t.....p...s...l..h..p..h..-p....ph..ls.............................................h-hl..p..p..Lh..phhP.p.p..cEh.phl.......p..........................ph...ptp..............hppLsps-..pFh.ht...h................p..ls.....php...t.....R.l.pshha.p......hpFpp...........p..h.p.p.lp....p..........lp..s.............ltt.Asp...p.......l.....p...pS....p.....p....hpp..lLp......h....lLthGNah..N....s..u..........p..................up....A.......hGF.c.L.s........L.+........L......t-.....s....Kus....c........p.p...........h...o......LLHal..s.ph.h.tp....................................................................................................................p.h.s..p..h..h..p.....h.........s-l..........ttl....p..p..A.u...p............l..s.hc.p.ltpp.............lppl.ppt.hpt.hpp.plph.tp.........................................h.....hp.h.h...........p......pFlpp...sp.tphp...tlpt..hp...p............h............p....hhpp..hhtaa..............s.ssp..........ph.p...p.phFthhtpFht.a....................................................................................................... 0 487 743 1102 +370 PF02661 Fic Fic/DOC family Bashton M, Bateman A anon COG2184 & COG3654 Family This family consists of the Fic (filamentation induced by cAMP) protein and doc (death on curing). The Fic protein is involved in cell division and is suggested to be involved in the synthesis of PAB or folate, indicating that the Fic protein and cAMP are involved in a regulatory mechanism of cell division via folate metabolism [1]. This family contains a central conserved motif HPFXXGNG in most members. The exact molecular function of these proteins is uncertain. P1 lysogens of Escherichia coli carry the prophage as a stable low copy number plasmid. The frequency with which viable cells cured of prophage are produced is about 10(-5) per cell per generation [1]. A significant part of this remarkable stability can be attributed to a plasmid-encoded mechanism that causes death of cells that have lost P1 [2]. In other words, the lysogenic cells appear to be addicted to the presence of the prophage. The plasmid withdrawal response depends on a gene named doc (death on curing) that is represented by this family [2]. Doc induces a reversible growth arrest of E. coli cells by targetting the protein synthesis machinery. Doc hosts the C-terminal domain of its antitoxin partner Phd (prevents host death) through fold complementation, a domain that is intrinsically disordered in solution but that folds into an alpha-helix on binding to Doc [3].This domain forms complexes with Phd antitoxins containing Pfam:PF02604. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.65 0.72 -3.29 221 6829 2009-01-15 18:05:59 2003-04-07 12:59:11 13 61 3371 61 1631 5456 731 97.70 21 34.91 CHANGED hls...hptlhp.lHp.tl......hppht............................Gth......Rs..h...................shhptshspsp.ph.thh..........thhs....................hhptuAth.thtlsphHPFhDGNtRsuthhhthhL ......................................................................................................................................p.t.lhplHp..hl..........hp.t.hht........................s................................Gph.........Rp.th...........................................................t.thp.h...h.h.s.t.st...pl..t.t.h............pphhphhp...................t..p...................htphu..hh.ahph.ttlHPFtDGNGRsuRhhhthhL.................................. 0 529 1058 1363 +371 PF00254 FKBP_C FKBP; FKBP-type peptidyl-prolyl cis-trans isomerase Finn RD anon Prosite Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.95 0.72 -3.97 174 16739 2009-01-15 18:05:59 2003-04-07 12:59:11 23 143 5125 202 5251 12394 3280 99.80 27 37.70 CHANGED ssp.sppGD.pV......plcYpGpl.........s...Gph..........FDuo..........h........+spshpaplGp...sp...VIp......................Ga-pulhs...........MpsG-ctplhlss.phuYGp......................................puh.s.......IPss.........................usLhF-V-Ll ..........................................................................................................................................................tt....sppsc..tV.......plcYp.G.pl....................D.........Gph..................................F.Duo..........................................................................pups.h..s...a....h.l.....G..t.............sp....lIs...........................................................G.a...-cul..hu.........................h.p....sG...-.c..h.p.l.h.ls...s...p..........u..YGt...........................................................................psh....s.................st.................................................pslhFclcl..................................................................................................................................................................... 0 1593 3021 4223 +372 PF01003 Flavi_capsid Flavivirus capsid protein C Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family Flaviviruses are small enveloped viruses with virions comprised of 3 proteins called C, M and E. Multiple copies of the C protein form the nucleocapsid, which contains the ssRNA molecule. 21.00 21.00 21.60 24.90 20.90 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.64 0.71 -4.55 27 4730 2009-01-15 18:05:59 2003-04-07 12:59:11 14 17 108 12 0 4102 0 108.70 56 4.51 CHANGED ts.ups+slNMLKRshscs...httsKRhhhsLhsG.GPhRhVLAhlsFh+...FsulsPTsuLhpRW+sVsKppAh+tLpsFKK-lGshlssl.s+Rt++.tth.s.sshlLhl....hhlshshA ..................................KKstpsshNMLKRsRNRVSTspQLsKRFSh.GLLsGpGPh.KLVMAhlAFLR...FLAIPPTAGlLsRWGohKKssAIKVL+GFKKEIusMLsIlNRR++o.........shhLlMl....hsss..hhhh.............. 1 0 0 0 +373 PF02832 Flavi_glycop_C Flavivirus glycoprotein, immunoglobulin-like domain Bateman A, Griffiths-Jones SR anon Pfam-B_146 (release 3.0) Domain \N 20.80 20.80 21.00 20.80 20.70 20.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.32 0.72 -4.10 12 9726 2009-01-15 18:05:59 2003-04-07 12:59:11 11 26 131 56 0 6580 0 95.50 58 6.56 CHANGED oYsMCss.KFpapKsPuDTGHGTVVlcVpYoGssuPC+IPlpsssphpshssVGRLlTsNPhlsss..putVhIEhEPPFGDSYIlVGhGpppLshpWa+ ..SYuMCTs.pFKlhKEsAETQHGTlVlclpYcGsDuPCKIPhsshD.hpshsslGRLITsNPlVosp..-ssVNIEsEPPFGDSYIVlGhG-ptLplpWaK............. 0 0 0 0 +374 PF00869 Flavi_glycoprot Flavivirus glycoprotein, central and dimerisation domains Bateman A, Griffiths-Jones SR anon Pfam-B_146 (release 3.0) Domain \N 20.00 20.00 20.60 20.20 18.90 18.90 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.83 0.70 -5.43 35 10895 2012-10-01 19:42:26 2003-04-07 12:59:11 15 32 141 36 1 7165 0 269.90 59 20.34 CHANGED h+ClulpNRDFlcGsoGsTWVslVLEpGuCVTlhAcsKPolDlhLsshptps.AtsRcYChcAploshpssuRCPThGEApLsEEpsssaVCKRsaoDRGWGNGCGLFGKGSIVsCAKFoC...scphpGhhlptpplpYsVtlpVHsupttsstt......ssspttphphTspusppshsLu-Y..GplolsCcspSGlDhsphhlhpls......s+sWhVHR-WFpDLsLPWp.....tsusss...Wcsp-pLV-FcpPHAsK.sVhsLGsQEGulhpALuGAh.lphsusp...hpLpu.............GHlpC+lph-KL+lKG .......................................+ClGluNRDFVEGlSGuTWVDlVLEtGSCVTsMAKsKPTLDlcLhphEApp.AplRchClcAploshoTsuRCPTpGEApLsEEpDtsaVC++shVDRGWGNGCGLFGKGS.llTCAKFpC...tpphpG+llQhENlcYoVhlpVHoG-pptsss.......ssppusphpITPpusosphpLs-Y..GslTl-CpPRoGLDhNphllLphc......sKuWLVHRpWFhDLsLPWo.....uuusTp...Wpp+EhLVpFcssHApKQ-VVsLGSQEGAhHoALsGAs.lph..Suss.....plhu.............GHLKCRl+MDKLpLKG....................................................................................... 1 1 1 1 +375 PF00949 Peptidase_S7 Flavi_helicase; Peptidase S7, Flavivirus NS3 serine protease Finn RD anon Pfam-B_199 (release 3.0) Family The viral genome is a positive strand RNA that encodes a single polyprotein precursor. Processing of the polyprotein precursor into mature proteins is carried out by the host signal peptidase and by NS3 serine protease, which requires NS2B (Pfam:PF01002) as a cofactor [4]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.50 0.71 -4.75 50 3378 2012-10-02 13:45:52 2003-04-07 12:59:11 16 23 150 15 17 3915 116 149.10 68 4.59 CHANGED hc.lpsGVY..RIhptulh.GppQ.......hGVGhhppGVhHTMWHVTRGAsLph...sstthsPtWus....V+cDllsYGGsWcLpt+WcG.c-VQlhAh.Pst.hp.hQspPGhhph.tG.plGAlslDaPsGTSGSPIl...NppGcllGLYGNGlhhs......ssYlSuIuQu ..............................................................................................................................................................................u.-L--GlYRIhQRGlL.GpoQ.......lGsGVhpEGVFHTMWHVTRGAVLMa...pGKRL.....EPsWAS....VKKDLISY...G...G.G.W+....Lp...up...Wpp.GEEVQVIAVEPGKNPKNV..............................................................................QTtPGsFKTspG.....E.....l...G.....A.l..u..L.D..F.pPGTSGSPIl...sR.cGKVVG.L.YGNGVVTp.....sGsYVSAIAQs...................... 0 13 14 14 +376 PF01004 Flavi_M Flavivirus envelope glycoprotein M Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family Flaviviruses are small enveloped viruses with virions comprised of 3 proteins called C, M and E. The envelope glycoprotein M is made as a precursor, called prM. The precursor portion of the protein is the signal peptide for the proteins entry into the membrane. prM is cleaved to form M in a late-stage cleavage event. Associated with this cleavage is a change in the infectivity and fusion activity of the virus. 21.20 10.00 21.30 20.40 20.60 9.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.50 0.72 -4.55 30 4840 2009-01-15 18:05:59 2003-04-07 12:59:11 14 26 115 0 0 4095 1 71.00 55 2.92 CHANGED SVslssHsppsLssRsp.sWhcucpspcaLs+VEsWllRNPsaAlssssluWhlGsohsQRlllhlhlLLlAPAYu .......SVALsPHsGhGL-TRsp.TWMSSEGAW+plp+lEoWhLRpPGFsllAhhLAahIGoohhQR.slIFILLhLVuPSh.... 0 0 0 0 +377 PF00948 Flavi_NS1 Flavivirus non-structural Protein NS1 Finn RD, Bateman A anon Pfam-B_157 (release 3.0) Family The NS1 protein is well conserved amongst the flaviviruses. It contains 12 cysteines, and undergoes glycosylation in a similar manner to other NS proteins. Mutational analysis has strongly implied a role for NS1 in the early stages of RNA replication. 19.90 19.90 19.90 20.40 19.80 19.20 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.14 0.70 -6.04 11 4700 2009-09-11 01:25:55 2003-04-07 12:59:11 16 26 120 0 1 3925 0 270.30 70 11.15 CHANGED hGCAIDhpR+EL+CGuGIFVas-VcsWh-pYKYhPEoPppLApslpcAac-Gl...CGlRSssRLEptMW+ultsELNhlLpENplcLoVVVscscshhppu..s+sLphpsc-LchuWKoWGKuhlaus-huNsoFllDGPpTpE..........CPsppRAWN.....sacVEDFGFGlhoT+laLclREcsTpECDotlhGsAVKss+AVHoDhoYWIES.phNsTWplE+A.htEVKsCoWPcoHTLWu.sGVlES-hlIPhoLAGPhSpHN+RsGY+TQspGPWcpsclclDF-aCPGTTVslsEcCspRGsSlRTTTsSGKLIs-WCCRSCTLPPLRFpspsGCWYGMEIRPl+ccEssLV+ShVsAssGp ................oGCVIsWKs+ELKCGSGIFVTN-VHTWTEQYKFQs.-SP.p+.L.As.uItpAa..c..-Gl...CGlRSsoRLEphMWcplpsELNhlL.EN.thchol..h..s..sc...G.hhttu...+.lpsps.chchuWKsWGKuh......hh......ss-.t..NpoFllDGPpT.E...............CPstpRAWN................hEVEDaGFGlhoTplaLKlR-t.s..CDp+lhusAlKsphAVHuDhuYWIES..th..Nto.WKlt+AshhEVKoChWPcoHTLWu.sGVLES-hIIPt.huGPhSpHNhRPGYhTQstGPWc..G+lEhDFshC.GTTVslsEpCGpRGPSlRTTTsoGKlIp-WCCRSC...TLPPLRahs-sGCWYGMEIRPhpccE.cshVpS.VsAhpu.p................ 0 0 0 1 +378 PF01005 Flavi_NS2A Flavivirus non-structural protein NS2A Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family NS2A is a hydrophobic protein about 25 kDa is size. NS2A is cleaved from NS1 by a membrane bound host protease [2]. NS2A has been found to associate with the dsRNA within the vesicle packages. It has also been found that NS2A associates with the known replicase components and so NS2A has been postulated to be part of this replicase complex [1]. 25.10 25.10 25.20 40.20 24.80 25.00 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.28 0.70 -5.05 37 3339 2009-01-15 18:05:59 2003-04-07 12:59:11 14 19 108 0 0 3344 0 208.40 46 6.38 CHANGED Gllslhlshp.llR+Rh..Tu+thlhuulhlLslhlhGhlThpDLhRYllhVGhsFst.psGs-lhaLsLl.AsFclRsuhLsuahLRppaTsREslllhluhshlphuhst....h.hslhphhculAluhhll+Ahsptppssluh.llulhs.tthhhlhtuhphhlhllsssulhp.t+ssst+.Kshshllslu.....lhus.Ghh..hlshhtlhhtps.t+.R ................GlLslulhhEEVhRsRa..upKhhhsullssFllLlhGplTapDLhRhsIMVGAshoD.chGMGlTYLALh.ATFKlpPhFAlGhhLR.+LTS+EslLLslGluhhspsp......LPpsl.-lsDuLAlGhMlLKllTphpsaQLhssLluLossssthhLpsAW+shshlLulVSLhPLph.oootQ.Ks.s.WlP.lsLu.....hhGs.....sPlshFlhshs+s.p+R............... 0 0 0 0 +379 PF01002 Flavi_NS2B Flavivirus non-structural protein NS2B Finn RD, Bateman A anon Pfam-B_156 (release 3.0) Family Flaviviruses encode a single polyprotein. This is cleaved into three structural and seven non-structural proteins. All, but two, are cleaved by the NS2B-NS3 protease complex. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.29 0.71 -4.55 30 3263 2009-01-15 18:05:59 2003-04-07 12:59:11 14 18 115 15 2 3330 1 127.00 55 3.81 CHANGED hoEslTAVGlhhsLsuulh+.sssphhsPhssuGllllsYlloG+sssLhlE+su-lpW-p-AphoGuSscLcVchDspGsh+Lh--ctsshphhlhhsshlshuAhaPhuIhhshsuWahhp...pss+R ......LNEulMAVGlVSILsSSLL+.NDlPMAGPLlAG..GLLhACYVIoGpSADLpLE+AADVoWE--AEhoGuScpl.VplsDDGoMpIKs-Ec-shLTlLL+ssLLslSGlaPhSIPsTlhsWahWp...pppQR............................... 0 0 0 2 +380 PF01613 Flavin_Reduct Flavin reductase like domain Bashton M, Bateman A anon Pfam-B_710 (release 4.1) Domain This is a flavin reductase family consisting of enzymes known to be flavin reductases as well as various oxidoreductase and monooxygenase components. VlmR is a flavin reductase that functions in a two-component enzyme system to provide isobutylamine N-hydroxylase with reduced flavin and may be involved in the synthesis of valanimycin [1]. SnaC is a flavin reductase that provides reduced flavin for the oxidation of pristinamycin IIB to pristinamycin IIA as catalysed by SnaA, SnaB heterodimer [2]. This flavin reductase region characterised by enzymes of the family is present in the C-terminus of potential FMN proteins from Synechocystis sp. suggesting it is a flavin reductase domain [1]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.19 0.71 -4.37 141 7912 2012-10-02 11:35:36 2003-04-07 12:59:11 13 38 3556 70 2331 6180 2131 151.60 19 73.61 CHANGED htp..hsssVsllos.t........sspsh............G....hss.oshsslShc.P.Phlh.hsls.......pp...............................................................................spohshlpp...ssp............Fslsl....Lsp.sp.tp.lu....ptFus.......................tttc+..hs.shphpts.......................sus.......Phl..psuh.uthcC...clhp..p......hss.G.s............HslhlucVhsh...thtps..t.................Llahp.tpapshs .................................................h.....hstslslloo..st.............suths..............................s.........hss.oh......h...s.s...l....sh..s....P..Ph....lh..l....s.ls...........pp................................................................................ppohth.lpp...ssp..........................Fslsl........lst...sp...t.....p..hs.............pthus..s................................................tt-+......h..s..slshttu.........................................sus..........................P.hl........pp.uh..sth-C..clhp....h..................hph...G.s.................................................................as.lhl..ucl..h.s.h........thssp......................................................h...................................................... 0 675 1440 1926 +381 PF00258 Flavodoxin_1 flavodoxin; Flavodoxin Finn RD anon Prosite Domain \N 24.40 23.20 24.40 23.20 24.30 23.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.84 0.71 -4.19 92 10556 2012-10-03 05:08:30 2003-04-07 12:59:11 20 121 3998 199 2717 8477 791 138.50 22 32.80 CHANGED llYuSpoGsscphActlscthpt.tsh.spshshtphs.........tpl.ptthllhsssThstGpss.s.......h.phhthh.............................................................tt..htshphulhGhGspsatt.......attssptlcpplpp.hGuppls......................shsts-pps...shcpthptW .....................................................................lhauS.p.o.Gssc.t....l....Ac.t...........l.t....ct.l............t..........t..................s............h...........p........s..........p........l....h...s..h.s...c.hs...........................tpl..t.p....t......c.....h...l..lls...s..u..T.a...G...p.G.-...hP.pp................h.h.c.hhp...h...........................................................................................tt...pL..s.s..h..p...hulh...G.h....G...D..psatt..................FstuscplcptLpp.hG.u.p.h.ls............................ttlth.D....p....t.......ct.......................................................................................................................... 0 813 1530 2194 +382 PF04500 FLYWCH FLYWCH zinc finger domain Krauss V, Dorn R anon Krauss V Domain Mutations in the mod(mdg4) gene have effects on variegation (PEV), the properties of insulator sequences, correct path-finding of growing nerve cells, meiotic pairing of chromosomes, and apoptosis. The occurrence of FLYWCH motifs in mod(mdg4) gene product and other proteins is discussed in [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.26 0.72 -4.08 128 1130 2012-10-02 23:28:20 2003-04-07 12:59:11 11 45 72 1 775 1222 0 60.70 23 26.35 CHANGED ahpop+.Gpth...LlhsGahatpsppp.....s.sptaWcC.sphpp......hpC+ARlhTp.....sspplhhhp.s....HNH ..............hhstp.Gt.h....L.l.hp.u.ahYptpppt.............s.sphhWpC.spppp...........pC+u+lhTp.............stplhhhp..t......HsH.................. 0 187 296 645 +383 PF03358 FMN_red NADPH-dependent FMN reductase Mifsud W anon Pfam-B_2010 (release 6.6) Family \N 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.81 0.71 -4.71 276 11795 2012-10-03 05:08:30 2003-04-07 12:59:11 10 50 3935 96 3325 13002 2408 145.70 18 69.81 CHANGED h+llsl.sGSs........R.......ps......uhsptlsphstp......hh.....t.........t.h-s...............cll.-....lsch..hP.....hhstch.............ssts.....pphtppltpAD..ulllsoPEYssuhsusLKshlDhhs........................tpthp.sKss...............uhl..ususu.tGuhpshtpL+thhs....hpshslss.thslstshp .........................................................................................plhhl...G..S....p...................tt.......u..h.sptl.s..p..h..htc...................th......tt..................................s..h-s..........................phh...p...........lsch......hs.............................hh.s.tsh.................................tssh...................tt.h....h....p..p......l.t.p.AD...ullhuoPpYht...uhs..........usl..Kshl.D.h.hs...................................h.tpsht...sK.ss........................................s.lh........ss.u..s....s..........t..u.......s...........p...s.........h...p...h.t..h.h...h...hth.hhh.s............t............................................................................................ 0 1024 2182 2831 +384 PF02434 Fringe Fringe-like Mian N, Bateman A anon Pfam-B_1900 (release 5.4) Family The drosophila protein fringe (FNG) is a glucosaminyltransferase that controls the response of the Notch receptor to specific ligands [2]. FNG is localised to the Golgi apparatus [1] (not secreted as previously thought). Modification of Notch occurs through glycosylation by FNG. The xenopus homologue, lunatic fringe, has been implicated in a variety of functions. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.81 0.70 -5.17 27 1045 2012-10-03 05:28:31 2003-04-07 12:59:11 11 26 186 2 715 1675 8 183.50 22 50.40 CHANGED p.hphcD..lFIAVKTT+KaHcsRlsllhcTWhspA+cQTahFTDs-DppLppp.....hs.pllsTNCSssHsRps.........LsCKhuspaDpFlpSsp....+WaCHVDDDNYlNlspLlcLLssYshopDlYlG+PSls+Plpshct.............................p.spFWFA.......TGGA..........GFCl..................SRuLALKM..........P..aAStuphhssuctlphPDDCTlGaIlpshLslplh+osLFHSHLEsLtplsspplpcQV............................olSYuth....sphNhlphtts...Fs.ppDPoR ............................................................................................................h..........................ph.......l....t..TW.t.h..................................h....................o.....s.........................p..t....h...................................h.........t.....tt....................t..............................h.t.h....h........h......h...t..h...h...h..p..s..pt................cWahhsDDDT.Y.l.....hpsLh.p...hL.....p......s......a....s.....s..s....p...s...l.Y.lG...p.............h......s.h...h.pt.........................................................a.h.........sGGA.........................Gasl..................S+t.hhp+.h........................hh.p.t.t..p....h....p....s..t....................-....Dh......h.Ghhhtt...hth.................................................................................................................................................................................................................................................... 0 208 292 533 +385 PF01534 Frizzled Frizzled/Smoothened family membrane region Bateman A anon Pfam-B_949 (release 4.0) Family This family contains the membrane spanning region of frizzled and smoothened receptors. This membrane region is predicted to contain seven transmembrane alpha helices. Proteins related to Drosophila frizzled (Swiss:P18537) are receptors for Wnt (mediating the beta-catenin signalling pathway) [1], but also the planar cell polarity (PCP) pathway and the Wnt/calcium pathway. The predominantly alpha-helical Cys-rich ligand-binding region (CRD) of Frizzled is both necessary and sufficient for Wnt binding [2]. The smoothened receptor mediates hedgehog signalling [3]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.19 0.70 -5.65 11 1043 2012-10-03 04:04:29 2003-04-07 12:59:11 12 9 159 0 557 939 0 269.20 38 53.08 CHANGED saFop-E+pFschWIGlWSslChhSTLFTVhTFLIDhc.RF+YPERPIIFLSsCYhhVSluallphhst.c-cluCst.....................................tttttpplp.sohpspuCTllFlLlYFFsMAuSlWWVILolTWFLAAGhKWG.pEAI-t+upYFHLuAWulPulpTIslLAlupVDGDsloGlCaVG.hshcuLpGFVLuPLslYLllGshFLLAGhVSLF+IRolhpppG.....spo-KLEKLMlRIGlFSlLYhVPAslVluCYhYEttptspWthshhsp.Ctphp...s.s.t....s...t+Pp.hsVhMlKYhMsLlVGITSusWlWSuKTlpoW+pahpRhps+ ............................................................happp-hphs.phaluhhuhl.C.h.huThFT..l.hTF.Ll...Dhp...R....Fp....Y.P..ERPIlaluhCY.h.hSlua...lhthhh.....tcplsCst................................................................................t.t.t.....p...h...l..h.....t......s.....p..stuCsllFhllYaFsMAuolWWVlL...olTWFL..AAu.h..K....W....u..pE...........AI...........p..t.......p......upY...F..........HhsAWu..lPul.hTlslLsh.sp......V...-....G.....D.l.o.Gl..CaVG...s..hp....s.LpsF....VLsPLhlhLhlGs..FLlsGh.l.u.L......h+lRp.hpptt...................ppscK.L.c...+hM...lR....IGlFohLYhlPsh..hlluCahYE.h.ht.W.....t...h.....h....t.......h.................t.................................................................................................Pph.lhhl+hhh..h.llG.IssuhWlh.o.t.KTh...pWtphhpt....p........................................................... 0 125 171 367 +386 PF01827 FTH DUF38; FTH domain Bateman A anon Pfam-B_67 (release 4.2) Domain This presumed domain is likely to be a protein-protein interaction module [1]. It is found in many proteins from C. elegans. The domain is found associated with the F-box Pfam:PF00646. This domain is named FTH after FOG-2 homology domain [1]. 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.74 0.71 -4.59 87 1268 2009-09-12 02:26:25 2003-04-07 12:59:11 22 47 6 0 1226 1298 1 138.50 17 38.99 CHANGED pphhp..thtphLcs....tpslpl+plp.lpthsh...........s.clhslLsh...FpsphLcpIpl.........................ptpphpphccls.pL-QWKpAKplp..hpsthh.......sI-plhHFppFpl.phsp..hoh.pDslclRD.......L.hpsss....Fppspl..............ph.phsshcls+lFp.Pp ...........................................................................................t....t.h.phlps...pp.l..p..lcplp....l..p.s...h..s.....................p...p.l..h...p.l.Lsh...hcs.p.hLc.plpl............................................tppt.p..h....h.p.h....p....c.l....h..ph....-...QWK....p.Ac.p.lp.........lp.shhh...t.............slc..p..h..h..Hhp....p.h.p..l...ph......p..........p........h..oh..pc........lh.......t....l+ch..................h..hpp.s.p.......h.p.h.h................................................................................................... 1 257 261 1226 +387 PF01913 FTR Formylmethanofuran-tetrahydromethanopterin formyltransferase Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain This enzyme EC:2.3.1.101 is involved in archaebacteria in the formation of methane from carbon dioxide. N-terminal distal lobe of alpha+beta ferredoxin-like fold. SCOP reports fold duplication with C-terminal proximal lobe. 19.60 19.60 19.80 21.90 19.50 18.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.88 0.71 -4.47 36 245 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 155 24 95 260 64 134.40 49 47.64 CHANGED MclNGVpI-DTFAEAFshpssRllITAtoccWAhpAApcsTGFuTSVIuCssEAGIEp.hlsPsETPDGRPGssIhlh....shstcpLccQlhcRlGQCVLTsPTTAlFsul...........tpphslGtpL+aFGDGaphpc..clsG.....R....+hW+lPl .............l.-TFAEAFshtssRlllTAts.cWAhhAApshTGFATSVIuCssEAGIE+..LsPs.....ETPDGRPGlul.LlF.....uhutctLt..+Ql.pRlG.QCVLTsPTTAsFsGl..........................p...sscplslGtpLRaFGDGaQhuK..pl.sG.........+RaWRlPV.. 2 27 67 82 +388 PF02741 FTR_C FTR, proximal lobe Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain The FTR (Formylmethanofuran--tetrahydromethanopterin formyltransferase) enzyme EC:2.3.1.101 is involved in archaebacteria in the formation of methane from carbon dioxide. C-terminal proximal lobe of alpha+beta ferredoxin-like fold. SCOP reports fold duplication with N-terminal distal lobe. 25.00 25.00 37.30 36.40 19.60 18.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.72 0.71 -4.43 38 245 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 155 24 95 256 56 142.80 52 50.61 CHANGED -GEFlsE-phGhh.cGVuGGNFhlhucsp.uALtAAEAAV-AlppVpGsIsPFPGGlVuSGSKVGop...Y.+h.lsASTN-taCPTL+sp.s.cSclPssVsuVhEIVIDGlsccuVtcAM+sGIcAAs.....pssGVl+ISAGNYGGKLG.a+F+L+-L .-GEFlsE-shGhs.cuVGGGNhLlLAcspssALtAAEAAVsAhc.plssVIhPFPGGlVRSGSKVGS+...Y.+t.ltASTN-AaCPTL+uh.s.cScLs.....s....-lpuVhEIVIDGLsptsVttAM+sGIpAus.....thsGlhcIoAGNYGGpLG.h.h.L.t............. 0 27 67 82 +389 PF01728 FtsJ FtsJ-like methyltransferase Bashton M, Bateman A anon Pfam-B_1791 (release 4.1) Family This family consists of FtsJ from various bacterial and archaeal sources FtsJ is a methyltransferase, but actually has no effect on cell division. FtsJ's substrate is the 23S rRNA. The 1.5 A crystal structure of FtsJ in complex with its cofactor S-adenosylmethionine revealed that FtsJ has a methyltransferase fold. This family also includes the N terminus of flaviviral NS5 protein. It has been hypothesised that the N-terminal domain of NS5 is a methyltransferase involved in viral RNA capping [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.27 0.71 -4.29 239 10200 2012-10-10 17:06:42 2003-04-07 12:59:11 14 62 4426 60 2263 11482 3102 173.80 25 13.42 CHANGED ahS..RuuaKLhph.cpa...........p........l..............................................p....ph.slDlGuuPGGaopshhpps.......................................................sspVhulDlt.h............................phshtl..........puDh...............tsh....hshp.plhph.....................................................................................................sDlllsDh.......................................p.phhuh....pLshsslt.hs................................p...................ssG...................hlsKlhp.....................stph.p.pllpplpp.......tFph.....................................................lthh+ss..........so+s.pu.sE.....a...llshth ................................................................................................................hSRuuhKLpth....hcph........h........l.........................................................................................ps.......tp.l..lDL..G...susGG.Wo...hhsptt.................................................................................................................stcVh.u..lDhG...s.s..........................................................t...............hs....p.h..................pt.sh...........................hch.........pshp...sl..h....h....s...t........................................................................................................................................................................................................................................tpsD.h.l.l.s..Dh...........................................................u.....................s......shl..p.h...............t.h.s...l...h.s....Lp...h..s......h..........................................................................................................L..p..............................ss..s.......................................h.s...lK....lhp............................................................hh......pllc.ph..cp...............h..p.p......................................................s..t..lh.+.ss..........hSRs..ps..pEh....ahls...h................................................................................................................................................................................................. 0 771 1367 1878 +390 PF02687 FtsX DUF214; FtsX-like permease family Bashton M, Bateman A anon COG0577 Family This is a family of predicted permeases and hypothetical transmembrane proteins. Swiss:P57382 has been shown to transport lipids targeted to the outer membrane across the inner membrane. Both Swiss:P57382 and Swiss:O54500 have been shown to require ATP. This region contains three transmembrane helices. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.51 0.71 -4.35 133 37091 2012-10-03 05:18:07 2003-04-07 12:59:11 16 39 4654 0 8576 29605 7637 121.80 17 28.36 CHANGED hhhshlhllluhlslh..shhthtlt.....p+ppchulh+slGhsppplhthhhhcshll....shluhllG.hlluhhhshhhtphh................................................hhhshhhshhshhhshh...hshhlshlsshhsshphtp.hss .......................................t.hhshlhlllusls....lh......shhh.htlt.........pRp.cElul.h+s..l.G...ssp.pplht.hhhhE....shll......ullu..sllG...lh....l..u....h.h...h..s..h...h...l..t.t.hh.................................................................................................hths.h...t...h...s...h...h...s...h....l....h....s..hh.........hsh.l.l..s.l..l.s..u.h.h.s.shphhph................................................................................................................................. 0 3457 6175 7555 +391 PF04082 Fungal_trans Fungal specific transcription factor domain Wood V, Finn RD anon Pfam-B_306 (release 7.3); Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.79 0.70 -5.52 102 10823 2012-10-01 23:57:08 2003-04-07 12:59:11 13 234 216 0 8645 11592 1 237.30 12 31.77 CHANGED lphaaph....hp.sha............sll+c...ssahpph.............................spshLl...hsllslGsthtpptttpp.........................................................................phphhh.............pht.sspplhhlQ...uL....lllphathths...s..pphtht.....................apGhslp.hspshuLptpsst.p........................................hshp.tEh...p+.......RlaassahhDphhuhhhGpss......h...htspplphs....LPssss.h..stp.tt.t.............................................hhhhhpLp....plhsp...............................................lhs.lhshptphpppptp..........................hpplppplptWpp ................................................................................................................................................................................aht..........h....h..h.......................h....lp................ph.tth...............................................................................................h..lh................hsl..h..s...h..u...s........h.....h......t...t......t........t................................................................................................................................t.t.thhh...................................................t...h.t..t..s..p..l.t...t..lQ.............uh.................ll..h....s......h...h.h...h....tts....................p....p....t..s.ah................................hh..u.h..u..hc....h..u...h......p....l..G...L....+..p..p.s.s.t..t........................................................................................hs........t...-h........+..+...................R..l.....aW.s.lah...h...D..........p.............h...h...u.....h...t...h..G..p..s............................h...............hp.p..p..p..h..s.h........................h..P.....t.............t..............................................................................................................................................................................................................................................................................................................................................h............................................................................................................................................................................................................................................... 0 1444 4077 7109 +392 PF01363 FYVE FYVE zinc finger Bateman A, Armstrong J anon Pfam-B_655 (release 3.0) Domain The FYVE zinc finger is named after four proteins that it has been found in: Fab1, YOTB/ZK632.12, Vac1, and EEA1. The FYVE finger has been shown to bind two Zn++ ions [1]. The FYVE finger has eight potential zinc coordinating cysteine positions. Many members of this family also include two histidines in a motif R+HHC+XCG, where + represents a charged residue and X any residue. We have included members which do not conserve these histidine residues but are clearly related. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.60 0.72 -4.08 174 4289 2012-10-03 17:27:21 2003-04-07 12:59:11 16 325 331 13 2818 4121 81 71.10 32 8.02 CHANGED stWhsD.pps.spCht..Cpp.pF.s..............h..........h....pR+..HHCRpCGp....................lhC.......................................ssCS.........................spph.hh..............................................................t.ths.....psh..................................................RVCssCaptlpp ............................Wh.D.ppsspC..ht.......C.p..p.....p.F...s.............................h.........................................h.....pR.+......HH.C..R.........t.........CG.p........................lhC.......................................spCS................................spph.ls........................................................................................................h.t.t.pss...........................................................................................RVC.st.Capth..t.......................................................................................................... 0 898 1408 2169 +393 PF01392 Fz Fz domain Bateman A anon Bateman A Family Also known as the CRD (cysteine rich domain), the C6 box in MuSK receptor. This domain of unknown function has been independently identified by several groups [1,2,3,4]. The domain contains 10 conserved cysteines. 22.60 22.60 22.70 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.53 0.71 -3.73 26 1926 2009-01-15 18:05:59 2003-04-07 12:59:11 17 130 149 11 1122 1701 0 112.50 29 19.98 CHANGED Cpsls..hshCpslsYstshhPNhLsHps..psEsthp.....hspahsLlph......pCpsshphFLCuhasPhC.hspht.....l.PCRshCEts+p.tCtslhpt...........ashtWP-hLcCschPh.pc.......hCh ....................................................Cpslp.....h.hC..p..s....l..s.....Y.s.h.T.h..h.P....N.....h.l..s..a.ps....pp-s.thp............................hp.t.a.h....sL.lph..........pC....p..s....p...l..ph..FLCuha.sPhC...p.p...h.......t.........................l.PC...R...slC...cps+p...tCp.slhpt.....................hs.htW.Pc..hhpCs.ph.P.tst.......C........................................................... 0 341 427 729 +394 PF01585 G-patch G7; G-patch domain Bateman A anon Pfam-B_585 (release 4.1) Family This domain is found in a number of RNA binding proteins, and is also found in proteins that contain RNA binding domains. This suggests that this domain may have an RNA binding function. This domain has seven highly conserved glycines. 20.30 20.30 20.30 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.14 0.72 -4.17 55 3420 2012-10-01 21:03:39 2003-04-07 12:59:11 18 129 386 0 2214 3446 25 44.30 34 7.29 CHANGED ssshGhchhp+MGact...GpGLG.....csppGltpPlpsphpppp......tGlGtp .............pshGt+hL.p.K.MGWpt.................G.p..GLG...............pp....t.p.......G.h...t..p..Plp..sphpp..pp.......tGlGh.............................. 0 704 1102 1668 +395 PF01019 G_glu_transpept Gamma-glutamyltranspeptidase Bateman A anon Pfam-B_878 (release 3.0) Family \N 19.10 19.10 19.30 19.30 18.90 19.00 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.63 0.70 -6.02 52 5126 2012-10-03 21:14:07 2003-04-07 12:59:11 16 27 2591 76 1814 4680 4706 454.30 29 87.76 CHANGED hclLcp..GGNAlDAAlAsshsLuVlpPpusGlGGGuFhllhsssstps..sl..suREpAPtssot.....cha...........tsttphsthGsh.uluVPGtlsGhtphac+aG..plshtpLlpPAIcLAccGaslstthutshtpttth...hpppssht.......phF.....hs..sGps.hcsG-hhp.pPsLAcTLctlAp..pG..............scuFYpG.c..lAcpllcp..........hpp...t..GGllTtcDLssYc...s.chtpP.lpssa..s.........thtlhtsPPsouGhslh.hLslL-pas....tt............spthHhllEAh+hAaucRsp.lG........DPsass....s....hppLlsppaspphtptIssppshs..........................................................h.......t..sssToHhullDppGNsVShTpolshhFGSslhsstoGllLNNchsDFo...........ls.sstsN............tltPGKRPhSohsPsllhc...sGpshh..slGssGGspIhsshhpsllph.....lphshs.............lppAlsuPRhap.....p..s........plphEt.........shstphhptLp..ptG+plphhts.sshstshhhh......................tsushhuuuDsRpsG ...................................................h.clLc.p..GGNAlDA.........A........lAsshs.LuVs.............p.P.p..us..G.l.G.G..suFhl.....l.......h......s.................s................p................s........t......p...........h.........................sl...................s.h.......p...thA....P...t....t..s.s..........pha.........................ttts.....t...t...h..s..h...h....u.h.......h..u.......l.u.l.PG..s.l....tuh....t........h.....h.....p.....+.....a...G.............ph.shppllpPAIp.lA..c.p...G.a....................ls........t....hs....p....h...h........t.........ptttt..........htp...........s..h.t.........................th.F........................h..........pG...ps...h......p...............G.......c..h.....hh..ps.....p.......LApTLchlAp..pG................................scu.F..Y..p..G..p.....lAc..plstt....................................hpp...........t.....GGh......lohpDL.ts..Y..p..................s..t...p.............p.P....lss.sa....t..............................................hhhl.h......P.P.s.u.s.G..........l.s.h.ht.h.L.s.lL-.ths.......htth.s.ts.......................................................spthHhhhEAh.+hA.au...........D.R.spa..l..u...............Ds.s..ahp....................hptLl.s.p..s.Y.h.tph..t.p......Is.p....p.shs...............................................................................t.....p.....tss...TsahsllDppG...s.sVS.......hTpolt.h..........hFGSul..h..s.........st.....s.....GhlLNNc.h.s.sFo..............................hs.ss..tsN......................tl.t..P....s.KRP.h.oohs.Psl...lhc.............s...............u.................p...s......hh...............sh.G.s.s...G.G..sth.....pshhpsllsh...............................ls.a..s.h.s....................hppAlst.PRhtt...............p........................plp.hEp........................sh.s.........p...h.h..p.tLp....thG.a.p....l....t...h....h...t........h........t....s...h...t...t..h.hhh............................ss..hh.uuuDsRpt.................................................................................................... 0 561 1043 1459 +396 PF04114 Gaa1 Gaa1-like, GPI transamidase component Wood V, Finn RD anon Pfam-B_12685 (release 7.3); Family GPI (glycosyl phosphatidyl inositol) transamidase is a multi-protein complex. Gpi16, Gpi8 and Gaa1 for a sub-complex of the GPI transamidase. GPI transamidase that adds glycosylphosphatidylinositols (GPIs) to newly synthesised proteins. 19.50 19.50 23.20 20.20 18.90 19.20 hmmbuild -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.80 0.70 -5.80 4 376 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 269 0 274 373 2 347.30 24 66.68 CHANGED hpGpNlYGlLRAPRusuTEulVLsVPapsusu.phN.puuVuLhluLAcaFpthshWuKDIIhlloEpshhGhpuWLEAYHD.....shshh..pP..LphRuGSIQAAlsLElsosEssp..l-Vth-GLNGpLPNLDLhNlhppIht+cG.hsthph+hpspDhpopss..ps..L+pLhhhlhsQAsusspusHG..LF.pYRI-uLTL....R.h+uptphuaDhsshG..+AlEuhFRSLNNLLE+hHQSFFFYlllu.p+FlSIGsYMPullhLshshhLpAhptWhs.ttsshsL.cshu.....t..s.L.s...s.hhtl.asolsu.hLlophhthssalh.hLtpphht..hssh.hpssshhhLSl..huhhh................h.hhhlhsLlhhuhtlsslulhNFSLuhlsAhhhVP..lth.sKccspR.....olhhAsLshps.hlhhlslLhl..hphs.sPht.lhhcshphhhshls.uVhthlshsshla.VlshhahPhWllhhshohpK ..................................................................................................G.slaulhp.A.PRusu....sEuhVlss.s...........................s.........................p..............s..............s.....tulslhlslhpa..hp..p....hW.u.KDllhl..h...s...................p..........p.........h.................u....h....p....uWlcsYHs............................................................l...h..puGs.l.puAlsl-..h.s....t..t..........t............lplhh-GhNGpLPNLDLh...N.hh...............th....s.t..t..t......................t.h.....t...............t................................................................t..t...t.................hpt....hh..hh.hh..p...tu.....u.h.s.p.ss..Hu.......Fh.aplpulTl...............................t.s..h...t..p.....................s.....h..t..hG........+hlEuhhRslNNLLE+hHQSaFhYll.t.ppFlSIu.Y.h...shhh...lhhs.hhl............u..h....h..t.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 92 160 233 +397 PF01590 GAF GAF domain SMART, Hughes J anon [1] Domain This domain is present in cGMP-specific phosphodiesterases, adenylyl and guanylyl cyclases, phytochromes, FhlA and NifA. Adenylyl and guanylyl cyclases catalyse ATP and GTP to the second messengers cAMP and cGMP, respectively, these products up-regulating catalytic activity by binding to the regulatory GAF domain(s). The opposite hydrolysis reaction is catalysed by phosphodiesterase. cGMP-dependent 3',5'-cyclic phosphodiesterase catalyses the conversion of guanosine 3',5'-cyclic phosphate to guanosine 5'-phosphate. Here too, cGMP regulates catalytic activity by GAF-domain binding. Phytochromes are regulatory photoreceptors in plants and bacteria which exist in two thermally-stable states that are reversibly inter-convertible by light: the Pr state absorbs maximally in the red region of the spectrum, while the Pfr state absorbs maximally in the far-red region. This domain is also found in FhlA (formate hydrogen lyase transcriptional activator) and NifA, a transcriptional activator which is required for activation of most Nif operons which are directly involved in nitrogen fixation. NifA interacts with sigma-54. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.70 0.71 -4.09 450 14959 2012-10-02 14:34:25 2003-04-07 12:59:11 21 1267 4851 93 4355 20618 2072 147.20 15 24.89 CHANGED shpplhpphhpplt.phhssspshl......................................................h..hptpthhh.hhhth.hsp............................................tthth.httsh............................hspshpsspslh...................h.........shhhtt...............ththpuh..hssP....h...hsp.......tp.........................lhGllsltppps....................ctasppc....hp....llpt.luppluhsl .............................................................................................................................................................................................................................hhpth.hptht.ph.h.s.h.ct...shl........................................................................................h...h..p..t.....s.....t....h.....................h....h...t.h...s..h.sp....................................................................................................t...t.t....h....h......h...t..p.sl............................................................h.t..p....h.h....p....s......s...p..s.h.s...............................................l.t.....s....h...t.........p....p.....h.........t...........s..h..s......t...p..t..................................................th.t.h.p.u..h...lshP..................l..ht.p..................sp...................................................lh..G.ll..s..h..pppps.....................................+hh..s...t...t.p..ht...........lhps.hutthsh........................................................................................ 0 1335 2627 3635 +398 PF01140 Gag_MA gag_MA; Matrix protein (MA), p15 Finn RD, Bateman A anon Pfam-B_229 (release 3.0) Family The matrix protein, p15, is encoded by the gag gene. MA is involved in pathogenicity [1]. 20.80 20.80 21.00 22.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.88 0.71 -4.28 9 296 2012-10-01 20:35:47 2003-04-07 12:59:11 14 20 82 4 9 313 0 123.90 61 20.54 CHANGED GQolT....TPLSLTL-HWcDV+cRA+NQSVEl+Kt+W.ThCsuEWPTFsVGWPt-GTFNhslIhQVKphVFp.sPaGHPDQVPYIVTWcuLAhsPPPWVcPFlsPs......hsPos.stPsuPs.PSsP.pss..LY .............GQTVT....TPLSLTLpHWsDVp+hApN.QSVDVKK.RRWlTFCSAEWPTFsV....GWPpDGTFNlsII.QVKu+VFsPGPHGHPDQVPYIVTWE.ALAhDPPPWVKPFVsPK.........s..Po.AP.hPssPs..spsPspSs................................................................. 0 4 4 9 +399 PF01141 Gag_p12 gag_p12; Gag polyprotein, inner coat protein p12 Finn RD, Bateman A anon Pfam-B_821 (release 3.0) Family The retroviral p12 is a virion structural protein. p12 is proline rich. The function carried out by p12 in assembly and replication is unknown. p12 is associated with pathogenicity of the virus [1]. 25.00 25.00 33.60 33.60 24.50 21.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.06 0.72 -3.85 10 138 2009-09-12 05:17:05 2003-04-07 12:59:11 13 8 48 0 0 153 0 81.50 71 10.13 CHANGED PALTPolpsK..P.KPQVLP.DsGGPLIDLLTEDPPPYtsPtPsPPst-ssctEAssssEsP........sPSPMA.....SRLRGRREPPsADSTTSQAF ..........PALTPSIKsK..PsKPQVLP...DsGGPLIDLLTEDPPPYGsQ..PSSSst-sscEEAssTuEls........sPSPMV.....SRLRGRRDPPAADSTTSQAF..................... 0 0 0 0 +400 PF02140 Gal_Lectin Galactose binding lectin domain Mian N, Bateman A anon IPR000922 Family \N 21.30 21.30 21.60 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.31 0.72 -3.67 126 1519 2009-01-15 18:05:59 2003-04-07 12:59:11 13 122 181 22 813 1468 19 79.30 33 12.15 CHANGED LpCs.s..phlplp.hAsYGRss..s.sC......s...hppsp................Cpus....s.ohshlpp.....pCpG+psCs.lsss.sssF...uD.P.C.sushKYLpV.....papC ..........................................................LpCsss....pll.tIp.sAsYG.......Rss.....sth..C............st.....t..p..h.p..p.h..p...................................Chss.......suhphl...pp........p.C.p....s+..p.....p.....Cs.lsss..sssF....sD...P....C..s.G.o.h.K..YLpV..pYpC...................... 0 314 482 674 +402 PF03127 GAT GAT domain Bateman A anon [1] Domain The GAT domain is responsible for binding of GGA proteins to several members of the ARF family including ARF1 [1] and ARF3. The GAT domain stabilises membrane bound ARF1 in its GTP bound state, by interfering with GAP proteins [2]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.10 0.72 -4.04 33 1104 2009-01-15 18:05:59 2003-04-07 12:59:11 9 17 265 13 629 997 2 98.80 23 18.93 CHANGED tps-phuKht.scl-pVpsssclLsEMLsphssupppss-.-....Llp.......................-LhppCcphppplhcLh.........scspD--..sls-lLplNDsLspslp+Ycphscspps.tp....sts ...................................................t...t.htphh.splp.l.p.s.ss.clLsEMLpp.hs..s......s.....p..p..p...s..s.c..-......llp.................................-.LhppCcphpppl..hcLl........................................sp..sp-.--...h..l..tcl...LpsNDpLspsltpYcp.htpsp.......ssst......................................... 0 138 279 452 +403 PF00320 GATA GATA zinc finger Finn RD anon Prosite Domain This domain uses four cysteine residues to coordinate a zinc ion. This domain binds to DNA. Two GATA zinc fingers are found in the GATA transcription factors. However there are several proteins which only contains a single copy of the domain. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.18 0.72 -4.60 69 3798 2012-10-03 10:42:43 2003-04-07 12:59:11 22 92 384 40 2299 3654 15 35.30 45 8.26 CHANGED Cs..sCsTop..TPhWR+us..sGph...LCNACGLaa+hpthh .......Cs..s..C..t..osp.....TP.hW..R+..ss............sGph.........LCNACGLa.aKhps.......... 0 638 1149 1789 +404 PF00117 GATase Glutamine amidotransferase class-I Sonnhammer ELL anon Prosite Domain \N 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.44 0.71 -4.85 134 29637 2012-10-03 00:28:14 2003-04-07 12:59:11 23 151 6931 113 7451 26546 18089 186.40 22 46.33 CHANGED lllDshtuhptslh+tltphs......hplplh..s......pth........................psculllSsGPGsst........httthphlpphhp.....plPllGICLGhQhlshshGu......................plh..........................ptt.hshpGtsp.ltp..t............hhhshspshhstphHuhtls.....t.lsps..hplsshstss.......tlhulhcppt.....hhulQFHPEshhsspstphLhshhlph ...............................................................................................llDhh...tht.tslh...ct..h.t....p.hs..................s.p.s...p...l...h....h..s...t.....t.tph......................................................................hpscu..l....l.ls...s....G......P.G.s.stt.......................hpt.t...h.p.h..l....p..t..h.hp..............p...l...P...l.....l..GICL.Gh.Q..h...h.sh.t.h.Gs.....................................................................................plh...............................................ptt....h.s.p..t.G.t....s..h.....l.t..p...t.......................................................hh..h..s...h..........p.....s....h...h...s....h.....p.......H...u....ht.lst.................................hs.ss......hp....l....s.....u....p.s......p.s........................thhu.....h.......h.......c......p..................s.....p..........hhulQFHPE..h....t..t..s..s..t..s..h..p.h.Lh.shh...h.................................................................................................................................... 0 2366 4699 6278 +406 PF02934 GatB_N PET112_N; GatB/GatE catalytic domain Finn RD, Bateman A anon Prosite Domain This domain is found in the GatB and GatE proteins [1]. 23.40 23.40 23.80 23.90 22.10 23.20 hmmbuild -o /dev/null --hand HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.94 0.70 -5.55 147 4153 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 3865 37 1215 3100 3120 280.70 48 60.26 CHANGED Es..lIGLElHsQLsT.coKlFsssssp.aus.....p..P.NopssslslGh..PGsL...........................Pl.lNccAlchAl+huLALssc.I...sph..spFcRKsYFYPDLPKGYQIoQaphP....lspsG..hlc..lp......................pup.......+c...............................................................lt..IpRlHlEEDAGK.lHpts.................................thohlDhNRuGlPLlEIVo-P....Dh+oscEAtt....alpcL+pllpalulsDssM-cGolRsDsNlS.................l+......................Gp.....t...........hGsRsElKNlNSh+tlpcAlcaEhpRQhclLc.............................................................................................................................................pG..t....p.l.QETRtaDsspspThsMRsKEsApDYRYFP-PDLsPlhlscchl-cl ...............................EsVIGLEVHspLsT.poKlFss.....sssp...FGs............c......PNopssslshuh..PGsL....................................PV...lNcpsVchAl+huLALssc..I..s.p.p..stFsRKNYFYP.....D.PKuYQISQ......a....-....t....P....IstsGhl-.lp...............sGpp.........Kc...................................................................ltIpRhH.lEEDAGKshHts..............................................................shShlDhNRsGsPLlEIVoc....P....Dh....R....S....scEAhA....YlcpL+pllpalGlSDsp...M.......-EGShRs..DsNlS..lR..P...........................hGp..........pp............aGTRsElKNlNSh+hlp...cAl.c...YEhpRQhcl.L..c.......................................................................................................................................................s.G...G......p.l....h.....Q.ETRha.Dps........p...s....pThsMRs.KEsA.pDYRYFPEPDLsPlhlsc-al-p.h.............................................................................................................................................. 0 421 796 1040 +407 PF02637 GatB_Yqey DUF186; GatB domain Bateman A anon Bateman A Domain This domain is found in GatB. It is about 140 amino acid residues long. This domain is found at the C terminus of GatB Swiss:O30509 which transamidates Glu-tRNA to Gln-tRNA. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.59 0.71 -4.49 103 3872 2012-10-02 13:42:24 2003-04-07 12:59:11 13 14 3640 37 1195 3139 2254 147.40 34 29.94 CHANGED -aFEpslpts..ss....s+tsuNWlhs-lhuhLscpshslpp...................lss...ppLupllph....IpcGpISsKhA.Kclh.pphh..ps.stss.........cpllccc.GLtplo..Dpstlppllc-llspN.spt.lcca+sGKp....+shualVGQVMKt.o+G+A.s..PphVsclLpccL ..................................................................................................caFEts.lttu....ss......sKhsuNW..l..hs-lhth...L.N.p......p.s....h.s.lpc........h..........................sl.oP...ppLupl.l.pl.........I.c.-G..sI.......SuKl......A.K.c.Vh.p.t.lh........ps...usss........................c.p.ll-c...c.G..L....h.Q.lo.............D......s.....u...s........l....psh.....lc-l..ls....s..N....spt...Vc....c...a...+........s......GKp........+u.....hGalVGQlMKs....o+.....G...p.A.NPphVscLLppcL............................ 1 410 790 1026 +408 PF03615 GCM GCM motif protein Griffiths-Jones SR anon PROSITE Family \N 25.00 25.00 31.80 34.40 22.30 21.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.60 6 146 2012-10-02 23:28:20 2003-04-07 12:59:11 10 1 79 1 94 148 0 139.50 68 28.86 CHANGED caDcFsEWsDGaVRaIYuscDccA+KHlSGWAMRNTNNHNspILKKSCLGVllCSpcCpLPsGuplpLRPAICDKAR+KQQsKpCPNptCc.G+LElhPCRGHsGYPVTHFWR+sGpuIFFQAKGsHDHPRPE.sKsooEARRuh .....................aDtFpEWsDGaVRaI.You.c.-+pAQRHlSGWAMRNTNNHNsp...ILKKSCLGV.VVCu.ps.CsLPsGs+l..pLRPAICDKARpKQQcKtCPN..Cs.GsLELlPCRGHuGYPVTNFWRp-GpuIFFQAKGsHDHPRPE.oKspsEARRu.h.......... 0 21 27 55 +409 PF03074 GCS Glutamate-cysteine ligase Griffiths-Jones SR anon Pfam-B_541 (release 6.4) Family This family represents the catalytic subunit of glutamate-cysteine ligase (E.C. 6.3.2.2), also known as gamma-glutamylcysteine synthetase (GCS).\ This enzyme catalyses the rate limiting step in the biosynthesis of glutathione. The eukaryotic enzyme is a dimer of a heavy chain and a light chain with all the catalytic activity exhibited by the heavy chain (this family). 25.00 25.00 27.20 25.20 19.30 22.80 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.37 0.70 -5.40 10 455 2012-10-02 17:21:26 2003-04-07 12:59:11 11 6 292 4 308 460 7 297.10 42 57.32 CHANGED IYMDsMGFGMGCCCLQVTFQAsNIsEARaLYDQLusICPlhLALSAATPFaRGhLuDhDsRWsVISASVDDRTtEERGlsPL................psp.hcI.KSRYDSlDsYlSsss...EcYNDIsLsIscclY-pLl-sG...IDchLApHlAHLFIRDPLslF-EpIclDDssco-HFENIQSTNWQTMRFKPP.....PPcS.-IGWRVEFRPMEVQLTDFENAAYsVFVVLLTRsIlSa..+lsFhhPlShVDENMKhApcRDAlLppKFhFRKDI..Cps.s....hssKsssso--suE....MSIDEIING..KcGsFPGLIP........llRpYLEstclDsDTRC.lpsYL+hISKRAoGEl.TsAcWhRpFlssHPDYKcDShlTDcIsYDLlp+scpIAs ...........................................................................laMDuMuFGMGssC.LQl.TaQuts.ls-uRhlYDpLsslsPlhhALoAAoPha+G.als-.DsRWshIusuVDsRT.cE..h...u......Ph..............................................tp.t.................hI.KSRY..sShs.Yl....u..t.............p....................................p.Y...sD.s.lshs..pplhp.p.LhptG........hDchLAp..HhAHLFIRDPlslFpEplp....s..pp.....s-.HFE....................NlQSTNWQphRFKPP...........P.s....s.s...s...lGWRVEFRshElQlTDFENAAassFlsLlo+.sI.....L.s.a..........plsh.hlPloplpENMphApt.psAshp..t.hFaFR.c.s.....................................................................p............p..........................holspIh.pG......p.............t........s.FsGLls..............l.l...pp.a..l......p..p.h......p....hs......p...pt.................l.tYLphIppRAs..Gpl......TsApahRpFlhpHPtY+pDShlspplsaDLl.th.tl..t.................................... 0 114 166 260 +410 PF03009 GDPD Glycerophosphoryl diester phosphodiesterase family Griffiths-Jones SR anon Pfam-B_4008 (release 6.4) Family E. coli has two sequence related isozymes of glycerophosphoryl diester phosphodiesterase (GDPD) - periplasmic and cytosolic. This family also includes agrocinopine synthase, the similarity to GDPD has been noted [1]. This family appears to have weak but not significant matches to mammalian phospholipase C Pfam:PF00388, which suggests that this family may adopt a TIM barrel fold. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.62 0.70 -4.88 34 9993 2012-10-01 22:17:21 2003-04-07 12:59:11 12 139 4031 48 2657 7515 2527 235.00 21 65.67 CHANGED HRGsusphP...........ENTltuappAhptGADhlEhDVplTKDGhsVlhHD..........tplscssssss.....................hlpchohp-lpphp................................................tst....h.tpth........................sTLp-hlph.........hssshphtlclp.hsphhthpt....hstthsthh.phh..........tstplhhpoFphcthphhpphts.....phshhhLhptss.ths...........tthphhts.shhsththhsts.......................hlpts+ppG.ltVhsaTlsst..................tphthhhchGVDGllTDpsst ..............................................................................................HRG..s....s...s.....h..hP.............................................ENT........l........t...A.hp.t.A.........h............p........h......G...s......c.....hl..E..h.Dlp..............hT+...Ds...p.............lVlhH.....D.............psl...c....R...so.ssps..................................................................................hlp-.hTh....p.-.l.pplc..................................................................................................................................................................h.t..............h.....s..p..t.h.................................hsoLc-hlch...................thp..h..t.h....t..l....c....l..c..............h....s....t.h.h.t................................t.h.h.......t.........h.h.....t.....hh................................................................................................................t....p...........h.h....lp..S..F..s...h.p...l.pth.pphts.......................ph..h...s..h...l.h...t........t.............................................h.t......h...t..t...........t..h..h..h.....t..h.t.......h.s...h..............................................................................................h...l.pt..h...+.p....t..s..h..l..h........s..a...Tlsp......................................t.ht.h.h....h.....p...h...s.......l......c....u.lh.TD.st.h.................................................................................................................................................................................................................................................................. 0 842 1578 2169 +411 PF00626 Gelsolin Gelsolin repeat Bateman A anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.46 0.72 -4.29 54 5387 2012-10-01 21:06:05 2003-04-07 12:59:11 17 133 440 183 3048 5007 35 78.00 19 21.63 CHANGED ph..s.ss.hstsplpsscsallDssh...........plahWhGpp..ss.tcpshus.hstplp............pph.shs.hhp.spu...pcsspFh ................................h....s..s.hsts.pl.ps..s....D.sa..ll-.sst...............................................................plahWhGpp..........ustp....E......c......t......t.........u..h..h...hs..tplp..................................pphts..h.....s...hhh....pG...tEs.tF............................................................. 0 854 1348 2192 +412 PF01408 GFO_IDH_MocA Oxidoreductase family, NAD-binding Rossmann fold Bateman A, Griffiths-Jones SR anon Pfam-B_342 (release 3.0) Family This family of enzymes utilise NADP or NAD. This family is called the GFO/IDH/MOCA family in swiss-prot. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.62 0.71 -3.59 52 20348 2012-10-10 17:06:42 2003-04-07 12:59:11 17 96 4230 256 6149 17689 10023 120.50 24 33.45 CHANGED h+lullGs.Gth...upha...htshhps.pth.......clsulhs.sttpucth....upphshs.....sass..........hp.plhsps..c...lDslhlssPs......th....HhphshthLptGh..aVlhEKPlsh......ohp-spclhphsccps..........hh.ltlsa ....................................................................................................h+lullGs....G...th.............up.t.a.........h.t..s...h..h..p..t......ssh................................cl.s..u..l..s..s....h....s.........p....p....s..p.t.h................................up.p.h....s..ht..............hh..s.s...................................................................hc...cllsps.........p.............lD...sVh.l..s.oPs.............sh.......Hhp....hs..h...tA..lp........u..GK........HVh.....s.....E...K...Ph..uh..........shp-..sc.c....l...h....ph...Ac...c...p.s....hh.lhh............................................................................... 0 2220 4074 5250 +413 PF02894 GFO_IDH_MocA_C Oxidoreductase family, C-terminal alpha/beta domain Bateman A, Griffiths-Jones SR anon Pfam-B_342 (release 3.0) Domain This family of enzymes utilise NADP or NAD. This family is called the GFO/IDH/MOCA family in swiss-prot. 20.80 12.60 20.80 12.60 20.70 12.50 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.30 0.71 -10.30 0.71 -4.29 37 12324 2012-10-02 13:21:44 2003-04-07 12:59:11 12 35 3630 193 3449 9524 4058 106.70 15 29.99 CHANGED +chlppst.lGplhhhp..thhtspttt.spht.p............suGshh-huhH.lDhhphLhGt..spss...shh..hphsptsphtsst.......................hshthssht....ssthssshspshshphhhh .................................................phlppGt.lG...c...l...h....h....h.....p.................h...........t...h....h...............t.......s...............s......t...h....t....hp......................tuuG...s...l...hD.hu.sHhlDh......s...h.a...l....h....G.........p.............s.....p...h...ss..............sh..............h.......h...........t..t....t............s.........................................................................h.h..........h..............................hh.......................................................................................................... 0 1200 2256 2931 +414 PF00990 GGDEF DUF9; GGDEF domain Bateman A anon Pfam-B_112 (release 3.0) Domain This domain is found linked to a wide range of non-homologous domains in a variety of bacteria. It has been shown to be homologous to the adenylyl cyclase catalytic domain [1] and has diguanylate cyclase activity [4]. This observation correlates with the functional information available on two GGDEF-containing proteins, namely diguanylate cyclase and phosphodiesterase A of Acetobacter xylinum, both of which regulate the turnover of cyclic diguanosine monophosphate. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.97 0.71 -4.65 49 42746 2012-10-01 23:51:22 2003-04-07 12:59:11 16 1972 2995 26 12809 36762 3058 153.10 28 27.32 CHANGED AthDsLTsLsNRphhpppLppthpps.tp....ppthullhlDlDpFKplNDpaGHpsGDplLpplAptLppslRp....sDlluRhGG-EFsllLspss.p....stphtpthcphlpphphsh.thsshth.lohSlGluth..........tpspshppllcpADpALYpAKppG+Np .......................................................pDs.LTG.LhNR....p....h...h.p.....p...p.......l........p.......p.......h...h.........p.......p.....s......tt....................t..p.......t.....h.....u....l....l....h....l..Dl...D.p.F..K..p....l....N....D.....s..a.G....H..t..sG...D.....p.....lL.p.pl.A....p.t.l...p....p.......t......l......c...........p................s-..............h............l.............u...............R..............h......G.........G....-........E..F...s.....l...l..........l.........s.....s...s......s...t.p........................p.........h.....t....p.....h.........h.p....p...h...l...............p...................t.............h.........p...................h..............................h.................................................t............t...........h..............t...................h........t.................l.....o...s......S..l..Gl....u..h.h...................................p...s.....p......s.....h.....p....p....l.....l....pp...A.D...t.Ah.Y.p.A.KppG+s...................................................................................................... 0 3883 8047 10652 +415 PF03321 GH3 GH3 auxin-responsive promoter Mifsud W anon Pfam-B_3652 (release 6.5) Family \N 19.60 19.60 19.60 19.60 19.50 19.10 hmmbuild -o /dev/null HMM SEED 529 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.79 0.70 -5.99 64 945 2012-10-02 15:58:18 2003-04-07 12:59:11 8 9 449 8 439 940 720 439.50 24 91.84 CHANGED ctlc......p..h-pht....................psst...plQcclLpclL.ppsssT-ah++ashp......uh..hs........c..........FpppVP.......lhsY--l.cPaIcRltsG-........ps.llhsps.lphFstSSGTouupsKhIPhoc-.lcphphhtuhthhh..hppphssh.hpGK..hLths...sphppp..GhhhGsloshhhc..s.h..hp............hhpsPsptlht.-sapshhppllpthh.cpclttluul.uhhllhhhchLEpphpch...........................................................ltclWPshc.lhshsssuht.Yhtphcthhsu.h.hh.psYsuSE.Gahulpsp....spcsuhhlh.shu.aaEFlPhcc.t............................sphlsLs-VclGppYtlllTThuG...................LaRYplGDsl+h.Tuhpst..plphssRpphhlslhu-+hstcclppAlppu....h.ptt.....shpls-aT.st.sp.h.stsu+ashhW.lpsc......st.........................shp..phsptlDpuL...NssYctpRptshplsslcl+llptGsFpchhc.thu+.Gu...QaKsPRhs..sspph..t...l .........................................................t.....hp.hh..ppst...thQpplLtpll.ppstpopahp.pa.tht...............t.h.shp.................p..........FppplPlhsY-ch.pshl.pR...h.hp.Gc................ps..ll.h.st.....lhhahh.o.S...G..........To.ss...t...KhlPhspc....hpphp..hshth...hh...........htp...s......p.G+..hh.h.....th.p...ss....h...ht.shts.....h..h..p...s.h......hp..............p.Pst........h...h.....sthp.p.h..httlhpthh.ppplthlu...uh.s.hhhhhhchlp.t.....phpp.h..........................................................................................................htcl..WPphp.l.hhssssht..Yhtph.c.t.hhs....t..h.hh.phYsuSE.u.ah.uhp.p.............................pp..s..s..h..hlh.shu..aaEFlPhcp.t......................psphlsLt-VchGppYtlllTT...uG...........................LaRYplGDslch.suh....t.s.....t............phphssRpph..hlsh.u.-c.h.tpphppAlppu.........h..t..........shplh..-ao....sh.....s........t...............................st...........s..c..........hhh.W.lpht......sp.........................shp...phsthl-ptL...sssYcttRht.hsltslclphl..p.ts..h..Fpphhc..hsp.Gu...QhKhPRh....p.......t......................................... 0 135 318 377 +416 PF00288 GHMP_kinases_N GHMP_kinases; GHMP kinases N terminal domain Finn RD, Mistry J anon Prosite Family This family includes homoserine kinases, galactokinases and mevalonate kinases. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -9.32 0.72 -3.91 455 15529 2012-10-03 01:04:38 2003-04-07 12:59:11 21 62 4900 114 3877 10927 3091 64.70 24 18.95 CHANGED clpl.pssl....Phu.uGLGSSuusssuslhu....lsphh.....s....h.s.............l........sp............pl...hphuhtsc..........tGsss....ssshhGs ....................................lplpssl.....P.h.u.uGLuSSuushsusl..hu.....hsphh........s...........h..s............................................L........sp.................................pcL.........hphu.t...ts..E............ts..ss..ssshhG.............................................................................. 0 1260 2384 3266 +417 PF00594 Gla gla; Vitamin K-dependent carboxylation/gamma-carboxyglutamic (GLA) domain Bateman A anon Prosite. Domain This domain is responsible for the high-affinity binding of calcium ions. This domain contains post-translational modifications of many glutamate residues by Vitamin K-dependent carboxylation to form gamma-carboxyglutamate (Gla). 20.40 20.40 21.20 20.40 19.90 18.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.92 0.72 -4.27 29 998 2009-01-15 18:05:59 2003-04-07 12:59:11 15 54 109 47 394 801 0 41.10 45 11.04 CHANGED lEEhctGslEREChEEhCshEEApEhapss.tTptaap+Yhs ..........hEEhppGsLEREChEEh.CsaEEARElFE.s.sp.cT...pt.FWppYh................ 0 28 60 159 +418 PF00208 ELFV_dehydrog E_L_F_V_dh; GLFV_dehydrog; Glutamate/Leucine/Phenylalanine/Valine dehydrogenase Finn RD anon Prosite Domain \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.48 0.70 -4.80 124 6466 2012-10-10 17:06:42 2003-04-07 12:59:11 16 23 3999 221 1773 5816 1828 223.60 33 52.44 CHANGED GGS.hsRscATGhGlhahscchhcp..ttp...plcGppVslpG..GNVutasschhhch.GA+lVulSDupGslh....c.sGl......clptlhch.+pppts..lssas.............c.hh.ss..p....................ht.......l.......csDlhlPs.AhpNplstcsAc.hl.....+.......s+hlsEGANhPsT..-Ahc.lhpcpG..........lhhsPshhANAGGVssSuhEhsQN.tth.WopEcVsp+Lcpl...........Mpshacsshptupc..hshs........hhtuAslhuhp+VAcAhtspG .........................................................GGS..hsRscATG......hGlhhhsc......phh..cp....shs...................h.cG..t....p.V...s...l...p.G...GNVutas.s..c..h..h.h..........p.........h...G..A..........+..V....l..s..h......S.....D......u.....s.......G...h...l.h......s....s...Gl........cl.stLh..c....h....+pppts........lspas......................thp.hh...p.st....ph..............................................................ap........h.......psD.lhlPs......A.h.pN.plst...c.s.A.p.hl................................p.....................s...phVsEGAN..hPo.T.....-.A.....h.........c...lh.p..c..p.G...............ll.hsPshsuNA.G...........G.......Vs.s.S.h..h.EhsQN..........t.h.W..s..t..-cl..pt+Lcpl..........................................Mtshapt...s.h.ph.u.pp...ths.................hhhuA.hhuh.+lspAhh.pG...................................................................................................................... 1 635 1175 1539 +419 PF00042 Globin globin; Globin Bateman A, Chothia C anon Structure_superposition Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.33 0.72 -3.71 73 6000 2012-10-01 21:46:00 2003-04-07 12:59:11 17 34 2886 1971 1261 5331 34 99.80 25 43.59 CHANGED ppthlpssWsp.lhs......ph.thGschhtclFpsaPps+shFsph.ts.p.....tsssphcsHut+Vlsulspslspl.....s.slpstlppLuspHtpct....lssspFphhtpslh ........................................hshlpsshsh.ltt................pssch.ssch........h..t....R....hF..t..saPp.s.p.p.hF..s....ph..........s.t...................ss..t.phpspu..ppl.hs.ulsphlppl..................sl...t...sslppL...u.phH...sshh........V.cPppapllupsL................................. 0 348 594 949 +420 PF04898 Glu_syn_central Glutamate synthase central domain Bateman A anon Pfam-B_455 (release 7.6) Domain The central domain of glutamate synthase connects the amino terminal amidotransferase domain with the FMN-binding domain and has an alpha / beta overall topology [1]. This domain appears to be a rudimentary form of the FMN-binding TIM barrel according to SCOP. 21.50 21.50 21.60 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.48 0.70 -5.15 37 3372 2012-10-03 05:58:16 2003-04-07 12:59:11 9 34 3044 15 950 2992 3369 283.90 42 18.60 CHANGED lhppQpAFGYThE-lchlltPMAcsGpEslGSMGsDoPlAVLSc+s+hLYcYF+QhFAQVTNPPIDPlREp..........lVMSLpohlGscuNlLc.sspp..s+plpLcoPlLsps-............................hpp.l+sh..........pthpstslshsashp....pu............................hpu.LcsulcplsppAcpAlcsGtslllLSDRp........hstp.+ssIPu...LLAluAVH+HLlcpslRscsullVEou-sR-sHHFAsLlGYGAsAlsPYLAhEolpp..htccshh........phshppshpNYp+AlspGllKlhSKMGISTlsSY+GAQIFEAlGLup .................................................h.phQptFGYotE-lchlltsMupsG...pEslGSMGsDoPlA..VL.S.pc.s..+hLacYF+Q...hF...AQVTNPPIDs.lREp...........hVhSLtohlG.t.-.h.......N.l.L.s...........st..tp.........s+...+l.plpsPlLsp.s.-...........................................................................................................hpp....lpshpp................ppac..sppl..sh.sasss....................................................................................tps...Lctulccl..ssc.A.ppA.V.c.....s.G.s.s.lll.L.SDRs...................l.s.p.s..+hsIPu...LLA.luAVHp+Ll.......c.......p.......s......LRs.psullVEoGpsR-sHHaAsLlG.aGAsA.l.s....PYLAaE.oltc.........hhcpttl...........................sh.s.h.cpshpNYtculsKGlhKlMSKMGISTltSYpGAQlFEAlGLs.p........................................................................... 1 289 615 816 +422 PF04960 Glutaminase Glutaminase Bateman A anon COG2066 Family This family of enzymes deaminates glutamine to glutamate EC:3.5.1.2. 19.80 19.80 19.80 19.80 19.70 19.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.78 0.70 -5.43 17 2528 2012-10-02 21:13:33 2003-04-07 12:59:11 10 25 1760 57 544 1601 173 277.00 43 80.87 CHANGED GpVAsYIPtLA+ssssphGlulsssDGphhpsGDschsFolQSISKVhshslAhtchGtc.lap+VGpEPSGpsFNSlstLEhcp.uhP+NPhINAGAlsVosllpucss.t-thphllphlcplsGspplshsptVhpSEhpTu.RNtAlAaah+phGshpp-l-...ssLchYF+tCulchoCpsLAhhushLAssGhsPhosEpVlstchs+phtAlMhTCGhY-sSGpFAa+VGLPuKSGVuGGIluVVPs..........hhululaSPsLDchGNSltGsthLcpLuphhshSlF .......................................GclADYIPtLApV.s.ssphGlAlsT.s.D.G.p.h.a.p.A....G....Du.....chp.Fol..QSISKVhoLslAhpc..h..Gtct..lap+VGt-PoGp.sFNSlltLE..h..cp.GhPpNPhINAGAlslss.h..l.......p.......u...........c..h.......s........p........th.....pclLchhcpLuG........p..p....l..shsptV....spS.Eh.p.p.u.t.RNtAlAah.h+..oh..G.h..p..s..Dsp..........psLcsYh.+tCulphssh-LAphushLAstGh.s.Ph..s....s......c.pV.lss..pp.s+plpAlMhTs..G..hYstuG..-F.Aa...............+....V...G.LPuK.SGVGGGIlAlVPs...........phuIulaSPtLDpt.GNS.ltGhthlc.p....Lspph.Ghsla.............................................. 1 146 287 427 +425 PF04488 Gly_transf_sug Glycosyltransferase sugar-binding region containing DXD motif Kerrison ND anon DOMO:DM04307; Family The DXD motif is a short conserved motif found in many families of glycosyltransferases, which add a range of different sugars to other sugars, phosphates and proteins. DXD-containing glycosyltransferases all use nucleoside diphosphate sugars as donors and require divalent cations, usually manganese. The DXD motif is expected to play a carbohydrate binding role in sugar-nucleoside diphosphate and manganese dependent glycosyltransferases [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.52 0.72 -3.63 22 1796 2012-10-03 05:28:31 2003-04-07 12:59:11 10 51 807 0 967 2109 434 94.40 21 24.68 CHANGED spchhtshcohh.phpP-...hphhlhscpht..............................shphlhpp.ss.......hh.phhs......hh.h.hthuDhhRhhlLa+YGGlYhDhDshslpsl.......sslhsppthh .................................................................................................................................................................................hpphh..phpPs.....aph.h.h.hscp........................................................................................................................................h.thh..hpp..hs.............hhpsap............p.h..h....h.h....tuDhhRhhlLa....phG.GlYhDhD.sh.sl+sl.......s.hh......t................................. 0 321 557 794 +426 PF00722 Glyco_hydro_16 glycosyl_hydro9; Glycosyl hydrolases family 16 Bateman A anon Pfam-B_759 (release 2.1) Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.48 0.71 -5.00 120 4255 2012-10-02 19:29:29 2003-04-07 12:59:11 16 172 1239 83 2173 4494 761 179.30 18 46.71 CHANGED hsppass.sasssps.h........s.......tslsLslc+......................to.GushpS...pptahaG...phpsplK..supusGlVouFal..s...o.ts...sspDEIDh.EFLGs.sssp......lpTNha....spGpus......p....E.phhh...haDso.psFHsYulhWssspIhahVDGhslRphpppps...s....s.a...Pp.pPMp.lhso..lWsus.......sausptG..........hhphDWst ..............................................................................................................................t......h......ts..........h.l..thtp............................................hs...uutlpo...............ph.p....h....h.....a...G..............p.h...c.s..p..hK...........hst.....u.s...G.h.....hs.A.hah......p...................s............s..s..ps....E...IDl..E..h.......l.......Gs..p.ssp............................hp.s..s.....l..a..................sp.u.tss......................................p.........ptp.h.h..............h.....s....ss....s.saH.....s.....Y.....s.....l.........................W.......s.........s.......p........p........l..h...a....h.l.....D....s.........t.........h.t.....p....hpttt................................a...........P............ht....l....h...h....s........a.s.ut..................t..........................hta............................................................................................... 0 645 1357 1879 +427 PF00704 Glyco_hydro_18 glycosyl_hydro8; Glycosyl hydrolases family 18 Bateman A anon Pfam-B_574 (release 2.1) Domain \N 29.60 29.60 29.60 29.60 29.50 29.50 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.36 0.70 -5.00 165 8946 2012-10-03 05:44:19 2003-04-07 12:59:11 23 253 2235 288 3234 8695 370 279.40 18 59.57 CHANGED tpllsYas....pau.htts..................h.hppls.......pthoHlhauFsslssssth..............................................................tt.ststpsthpphspl+..ppss..slKlllSl...GGht.us.............sasthstss......pt+pt.FlpSshphlppat.....................hDGl.DlDWEassst...............................ctpsastllpcL+pthpp..........................phhLosA..........hssss....hhpth...shsphtph.lDalslhsaDahu......htt......suhpssLasss..............................................................hssp.slphahpts.ss.spKl.hGhshY.upuaphsss..t...............................................s.ttuhhsapplsphh.........................tht.thsss.spss.ahhps...............h..lsa-sspohttKspahpptslGGlhhWsl.s.hD ........................................................................................................................................................................................................................................h.sh.h......h.sh.h......................................................................................................................h..t.....h....t...ht.....t.......th..p...h.h....h...u.....l............G..Gh....tt.........................h...t.h..s.t.s.t...............tt.tpt..a.hp..shh.p.h.h..p..p..hs......................................................h.D..G..l...D............l...D....aE..hssst....................................................................stps.as.hL.lp.....pl+.pthsp.................................phhL.o.h..A...........................h..ss.s.t...............hhpth......ph......t.....t....h........s....p.......h.....l...............D......a.ls.l...M..sY....D...hhu.......htt................ss..ts..s..lht.........................................................................t...h.p.......s.l...p...h...h........h...t........t...s..hs...sp..K..lhlGh...s.ha..s..h..sat................................................................................................................................h............................................................................................h.....h...................................h.....h...s......h..p...........................t.h...G...h.............................................................................................................................................................................................................................. 0 1050 1817 2735 +428 PF02055 Glyco_hydro_30 O-Glycosyl hydrolase family 30 Mian N, Bateman A anon IPR001139 Family \N 20.20 20.20 20.50 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.55 0.70 -6.30 3 1151 2012-10-03 05:44:19 2003-04-07 12:59:11 11 52 589 84 517 1057 187 328.70 21 77.79 CHANGED AsDCspKTF.KTGlVCVCNlTYCDEI.PPlslshGQAApYTTS+SGARLHRDVlYATso-PhToLHhTIDSSKKYQTIQGFGSTFSDASGANLKSLPDphuDLILKQYFSDoGLNLQFGRVPIASsDFSuRVYTYsDss-DYsMpNFSLs+EDaQWKIPYI+pAQKYNpc.LKLFAuPWoAPGWLKTTsussGhGuLpGpsGDsYHQoYA+YFV+FLEEYuKsGIpFWGLSTQNEPTuGSDKKsKhQShGFTAEaQRDFIKpDLGPALAuSouGKNVKLLILDDNRGNLPKWADTVLNDhDAAKYVuGIAVHuYQD..uEoDsHLsETH+pHPNsFIFGTEASEGSKSKDppVDYGSWDRAtDYuSDILDNhNNWVTGWTERNLlLDApGGPSWVSsFsDAPVIAFPAhAQFYKQPMFYAIAHFSHFIKPGAVRIDHSLNhhN.ElEsoAFLNPDGSKVVVlLNKuSLsss.aoLoIKDsAcSpsHYphTLSP+sIlTLYIQ ..........................................................................................................................t..........................................................................................................................................................................................................l....l..........t...p..h.Qp.h...GhG..ss...hsttsh........h.......t...h.......t.....p..t...........hh.......p.........ha....s..........................p.................t.......h.thshhR......h........sh.......su....s.....D.h..s................t..............a..s.h..........s....c.................t..........D......t............h.....t....p.....h...s......h...................................p......t..........................l...s.hl.p....t..h.......................t......h..........s............t....h.p..lh.u.oPWosP....saMK......s.......s...t............t.........h...........................................................s...................h...........................................................................................h......t..........s..........aApYh.hcal.pt.h.t.t.p.Gl..h.slohpNEP....................................h...s...hhhsst.....t..tta...hht....LtP.t......ht...tt................t.htl..........h............h..........h.....-.........p..........p..................................................t...............h..................................h......h..t.p......t...........s.....p...h..h.tGhuhHhY....................................h............h....t..h.P..p.h.......hh.oEt.......................................................................................................................................................h.......h..h...t.....h.....h..........t...hh..Wshhh....p....................s..........t...........................t.....................t........h.......h..............t............t.........h.....h.....p...........a.Y.hhuphu.+al.....G...u.p..lt.............................................................h................suh......p..s......-.s.p.h.s.h.l.h.h.N.t.t..........................................hh...................................................................................................................... 0 224 348 456 +429 PF01055 Glyco_hydro_31 Glycosyl_hydr15; Glycosyl hydrolases family 31 Finn RD, Bateman A anon Pfam-B_369 (release 3.0) Family Glycosyl hydrolases are key enzymes of carbohydrate metabolism. Family 31 comprises of enzymes that are, or similar to, alpha- galactosidases. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null --hand HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.43 0.70 -5.66 107 5543 2012-10-03 05:44:19 2003-04-07 12:59:11 21 109 2144 97 2001 5559 314 398.50 24 54.47 CHANGED YhhsGs...................s..s.pcllcpY.sploG..........+ssh.PhWuh.Gaap..sRasY..........ps....ps................................................cltplsppaccpsIPl-shahDh-..............Yhc.shc..sF...oactppF..........P......................pscphlccL+.ppGh+hlshlcP.........slptsps.......h.sapcuhppsh.al.......+.pssG.p..................hhuts.Ws.....................s.........ssasDF.....hsP.c.sppWW.tsphpp....hhsp.........ul-uh.WhDhsEsus..........h...........................................................................................................................................................................................................sh.shsshphsss............pphchHNlYuhhhspusacu..lhp..h.psscRsalloRSsasGup+auut.WoG......Dst.usW............pslph...olsthLshu.lsGlshhGsDluGFtus...........sst.....ELhsRW.hQhGuFhPahRsH.sshs....sptp..................EPahasp....spshh....+phlplRYpLLPYlYohhhcupps.GhPlh.RPLhhcaPp.D....tpshs.lccQahhGs.s...lLVuPVl.................pps.ssphpsY.LP......pG..p...Wa-hh......stcth....puG..p...hlp..l.suPl..splPlalR..sGsIl ...................................................................................................................................................................................................................................................hh.G...p.....ttlhppa.tt.lsG.............ps..s.h...P.h.a.......u........h...Gh.h........sp.hth.................s.....pp...........................................................................p.l...p.hh.ct..h....p..p..t...s..l.P......h............s...s.h....h.hDh.......................................................ahp...shp..........sa................p.a..s.....t..pa..............................P..............s...p.thl.p.p.l..+..p..p.G..h..+.h.h.h.h..l...p.P........................hlttpps......................a.pc..h.h...p...p...s.h.hl.................p..p.pu...t...........................................................................................h....h...th...a.........................................................................................s........ssh...D..h.....hs..P....p...stp.a.a..t..p..h..h..p..t.......hhtt...................Ghs.s.h.h..h.................Dh.s.E...........................................................................................................................................................................................................................s.ssh.h.h.ss...............p..p...h..+N.h..........a.s..h....h................h..s..ps..shps................htp........................tp.....csh....l.hs......R.........u........u.h...s.Gsp+.a.......s.sh....W..sG...............Dsh....ss....W.....................ps..ht.pl.ts.Lshu.hsG.h.s...............h.h...........u...tD.l..G..GF..ss.....................sst................................ELh.h.RW.h.phG....s.a....P...h.h.Rh..H....s..s.s........t.........................................-PW..a.spt.............stsh..h..................+ph....h....p....l.R.h.p.L..h.PYlYshhh.p.....s......p.......p..p........GhP......l.h..RP..h...hh.c...asp....D.....................t..s.h..p.....l.........p.....p.............p.a........hh.Gs..s..................l.LVu.PVh.....................................pts...t......t.....p......h...ph.....Y..LP........pu...p...............W....hchh.......................ss.p.th...................pGu...p..............hhp...........h.tssh........t............p........l.P.l.ah+sssh.................................................................................................................................................................................................................. 1 765 1179 1663 +430 PF01532 Glyco_hydro_47 Glycosyl hydrolase family 47 Bateman A anon Pfam-B_958 (release 4.0) Domain Members of this family are alpha-mannosidases that catalyse the hydrolysis of the terminal 1,2-linked alpha-D-mannose residues in the oligo-mannose oligosaccharide Man(9)(GlcNAc)(2). 20.40 20.40 20.40 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.47 0.70 -5.83 139 1956 2012-10-03 02:33:51 2003-04-07 12:59:11 15 40 313 21 1367 1890 72 398.90 31 70.19 CHANGED hFh+uWsuYtc................aAaspDELpPlSpss.............pss..........................h....s...shGhTlVDuLDTLhlM...........shp..................cEapcuhcalt......ppl..s...............as.................st.t...l...........slFE...........................TsIR............hLGGLLSAatL..o...............................................tcsh..................lLc+Ah-L.u-pLh..sAF.pT..s.oGl.......PhsplN........................l.........tp...st.t..thsst.........................................................ts..s.....h.AchGoltLEFsh............LSpLTG-spYhchsp+shphlhp.........ttp...GLhPhhl...sspsGpas.t..............................................................................................................................................................phslGutuDS......aY........EYLlK.........talhh..u...t...p..................................p............................ahchap...........................puhpulpca.hhp.......t.............thhalup.....h..............t...th.hps...ph-HLsCFhuGhhuLuuh.h..........................sclchA.pclscsChphYpph..oGlhPEhhphs....................................................................s.......thhhp........................................................................................................................................................................tstpYhLRP..............EslESlaahYRhTsD.pYpchGWchapulpc.ts..+s....ps.............................GauulpDVp........................sp............hpD..........................pMESFa..LAETLKYhYLL.Fs...........-ssh..ls......................LccaVFNTEAHPl.h ..............................................................................h.puapsYhp..................a.Aa.shDEL.pPlo.p.ps................................ps..........................h......ssh.uhTl...lDuL..D.TLhlh............s..p...................p-Fpcuhph..lt..........pl...s....................................as............s...t...l........slFE...........................ss.I.R..........................hlGG..LLoA.a.ls...............................................................................................................tpt.hLphAh-l.u.p.t.Lh..sAF..p...T.s.ou...l...........P...h.s.hls...........h.......tp.t.h.........sst.....................................................................................ts......h.Aphuol.lEFt.h..........................LSpl.TGssha..p..hs..p..p.....hhp.hl.p................G.Lh.s..hl......ssp..sG.pah...t...........................................................................................................................................................................................................................................................................................................................................................................................................................................tphslGu...hsDS....................aY.....................EYLlK............talhhs............c............................................................tt...........................hhpha...........................................pu.h.p....ul.pahhht......s...............hhhlst.hp...............................th.hp.h.hp.p.Lss..F.hsGhhsLu.....................................................tp...h.p..hAhpltps..hhth.a............pph...shhPEhhpht.................................................................hp.........................................................................................................................................................................................................................................tptha.L....R....P........E..h.lEShahh.a..R.h....T..t....D..........h..Yhc.............hGh..p......hh..pu.lpp..hs..+s..ts.....................................GauulpsVh..........................s..sp......................hpD..................................................p..h-......SFaL.......uETlKYhYLLFs....pss........ls.............................................................hspaVFsT.....EAHsh........................................................................................................... 0 462 748 1108 +431 PF03200 Glyco_hydro_63 Mannosyl oligosaccharide glucosidase Mifsud W anon Pfam-B_2589 (release 6.5) Family This is a family of eukaryotic enzymes belonging to glycosyl hydrolase family 63. They catalyse the specific cleavage of the non-reducing terminal glucose residue from Glc(3)Man(9)GlcNAc(2). Mannosyl oligosaccharide glucosidase EC:3.2.1.106 is the first enzyme in the N-linked oligosaccharide processing pathway. 19.00 19.00 19.00 19.00 18.90 18.90 hmmbuild -o /dev/null HMM SEED 801 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.88 0.70 -13.48 0.70 -6.70 15 722 2012-10-03 02:33:51 2003-04-07 12:59:11 11 9 421 0 454 910 161 431.20 23 58.05 CHANGED lsllhlu..htshhhhthhhst..............+slssasss.h.p.s..hpsttssshaWGoYRPplYFGlRsRoPcSLlsGLMWhs...s.tsGtsslRHh..CEQGD.sLssYGWhcHDGRsFGpQcIpD.ps.hsLpT-FVKp.t.t..aGGDWusRIpupspsusps.s......psSLhaYsusEGps.....slsscl.stpspls........................hloGpop-LGpFplpl............ps.sss......hhcspahuhpsPs...lhplpD.......................lVhpsLppphpp....hss.t..tphlslss.h.........pspppuNllhh......QlohpsshphDIlFpSusstcpsp.............LTGpslsspLcc+pppF-cKFpppFsLppKh...............hsssphpFA+sALSNhLGGIGYFYGsSlVp.......................puhpptcslhYhPus..........LaTAVPSRPFFPRGFLWDEGFHpLLlt+WDscloh-lluHWhsLlNs-GWIPREQILGsEARSKVP-EFllQpspsANPPTLhLslccLl-shcsst..t..................................thhsaL++laPRLcsWFpWFpsTQ.pG.l.s..........oYRWRGRshs....LNPKTLsSGLDDYPRASHPossEhHVDLpCWhuluopsMsslAchLGpscshtc.......ptts.lssN..LschHWu-cppsYsDaGsHTc...................................tVtL....s....t.............h.R.s..hptPphphV.sphGYVSLFPFLL+lls.sDSs+LcplLclI+DscpLWosYGLRSLS+oushYhp+NoEHDsPYWRGPIWINhNYLhLpuL+HYu.....phsGPapspApplYpELRsNLlsNlh+QYppTGalWEQYDDp.oGcGcGs+sFTGWoSLVlLIMuEpY ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.hh..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s..........t......h............h.h....h..............t.h.h...............s.........p................+L..........t.plL..p.hhhD.p.c.h...hos.....aGlR..SLS...+..............p...............s.....................h...............a....h.............p........t.............p..........s.......................................................................Y...................WRGP.lWhshNaLhlcuL.t.c.at.......................h.s..................................................................................................................................................................................................................................................................................................................................................................... 0 176 289 401 +432 PF03648 Glyco_hydro_67N Glyco_hydro_67; Glycosyl hydrolase family 67 N-terminus Finn RD, Moxon SJ anon CAZY Domain Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the N-terminal region of alpha-glucuronidase. The N-terminal domain forms a two-layer sandwich, each layer being formed by a beta sheet of five strands. A further two helices form part of the interface with the central, catalytic, module (Pfam:PF07488) [1]. 22.00 22.00 22.10 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.72 0.71 -4.08 38 324 2012-10-01 20:56:08 2003-04-07 12:59:11 9 8 239 17 133 354 23 111.20 22 14.45 CHANGED sWLRYt..lsssth..phpshsspIVslsso..sslpsAtpELppGlpullGpsh..p.hssphsppss.......lllGThcshph.....ttshsp....Lss-Gahlpo....tpspllIsG.ps-pGsLYGsFca ...............................................................t.lh.h..tps.......................sslphAhpELppslptlhGpps...t...lh.s.p.ssppss............lllGohss..h......tth..t..h....s..t....t...ht....p...l....t..p....EGallps....................t.............s...pptllIsG.sscpGsLYGsach.............. 0 49 88 115 +433 PF03663 Glyco_hydro_76 Glycosyl hydrolase family 76 Finn RD anon CAZY Family Family of alpha-1,6-mannanases. 20.60 20.00 20.60 20.00 20.50 19.90 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.64 0.70 -5.08 90 1340 2012-10-03 02:33:51 2003-04-07 12:59:11 9 29 493 1 758 1302 51 320.60 20 73.21 CHANGED Sl....csAAsplAtshh.saYpGsp...G...shsGhhsss.......haW......WpuGuhassll-YWhh....TG..s..........soYNcllppulhaQsGps.........c.ahPsNtops...GND..DQuFWGlAsMsA.AEtsFsss..sst.p...WLsLAQuVFNs.s.sRWDsp....s.CsGGLRWQIasas.sGYsYKNolSNGshFpLAARLARYT..sNp..T.Ys-WA-KlWD.Whts.ssLl.......s....sp..apVaDG.s.....ssssN...C.os..hsph...pWoYNtGlaLsGAAaMYNh........T........pu...s.......tWpsRscslLsu..sh.....sh..F..Fsss...........lhhEh.uC......Es........t.p....CssDQtsFKuhhuRaLuhTstls.P..Th...-...pIhshLpsSApAAA.ppCoG...................us..ssp...hCGhpWh...ts.a.....DGhh.G...lGpQhuALpslpuhll......t.ssshoss .......................................................................................................................................................................................s...........................h...................W.tuth.hs....s......hlchh.hh......tt...s............................sphp.p...h..htp...thh.hth..stt............................h.s.h......s......s......t..............h......D....Dtsa.......hu.l.u......hhp....u......hc..........hshsp............p..........a.L.p.hAp.tl..asphh...s....tWssp..................s.......G..Glh.W.............p.................................p.......s.......h.....saKNs......h.........uNush...h.h.u.u....+....L..h.p........h.................s......tsp...................p..YhchApch.ac.Wh..tp......sll.......ts......pp......hh.l..hDGh........................phps.......s.tp.....hsp.h.........paoYNpGsh.lsus.s.l.aph.....................T................ts..s.....pah..p..csptlhps......sh.......................ph......h...h.ss.............................lh.p...................................................sssD..t.t.....F+uhhhRhhs.hh......phh...s...t....p............................thhsh.lttsupssh........tt...........................st.....hhuh.W.t.................................................................s............................................................................................................................................................................................ 0 203 429 641 +434 PF03644 Glyco_hydro_85 Glycosyl hydrolase family 85 Finn RD anon CAZY Family Family of endo-beta-N-acetylglucosaminidases. These enzymes work on a broad spectrum of substrates. 36.00 36.00 36.50 40.30 31.30 35.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.04 0.70 -5.30 36 655 2010-01-08 16:09:57 2003-04-07 12:59:11 8 45 559 13 183 522 4 304.40 35 29.23 CHANGED hhusYt..ssph.......spGs.sph...sYs.FtaWphlDhFVYFS....H.....................thlslPsssalssAH+pGV.VLGT.....lhh...EhssthpphcphLp.sppus..hhh...........A-+Ll-luchYGFDG.WhlN.Essh.s.......................stspphhsFlphLpcphpp...........t...............hlhWYDu.hT.cGplpWQNtLsppNthF................hpssDuhFhNYtWst.....................................pp......lctStphApshshs.hc............lasGlDVauRu..............htuthpsptslcph.......t..psphSlulFAPuWsac.........................spFhpp-spaW.......................................shlsp...................................lsF.hTsFspGpGtpaah ...............................tsuhs..psph.............s.Gs.pp........uYs.FsaWQYlD.hVa.Wt....c..........................GlV..Psssh.l....sAu.HRNGVs..V........hG.T...........laF.............sas..s....u..h...t.....htph...L...c.....p...c.scus...asl.......................AcKLV-hA+aaGaDG.aFINpEssss.............................shspphppFhhYhpchutp......h........................................ph.WYDu.hT.ssth.aQs..u.Ls.-hNh...F................h..ss.DshFhNasWsp.................................................sp............cho.lttApth.G.ts.sa.p..............lasGl-lptsG..............hpsph..c.h...sshLcp...............t.phch....SluLF...AP...s.....hhhp.........s..................htpsatc.pEchaasGhpssPptp..................ssspsWh.Ghus.hlss+oshsu................................................sF.hTsFNsGpGhcaa.......................................................... 0 71 108 148 +435 PF04101 Glyco_tran_28_C Glycosyltransferase family 28 C-terminal domain Bateman A anon Pfam-B_1105 (release 6.4) Domain The glycosyltransferase family 28 includes monogalactosyldiacylglycerol synthase (Swiss:P93115, EC 2.4.1.46) and UDP-N-acetylglucosamine transferase (Swiss:P74657, EC 2.4.1.-). Structural analysis suggests the C-terminal domain contains the UDP-GlcNAc binding site. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.96 0.71 -4.59 34 6284 2012-10-03 16:42:29 2003-04-07 12:59:11 11 38 4635 6 1557 7997 2525 160.10 23 45.21 CHANGED hlhVhsG.Spstp.lNphlhphtthhtpt...............................hplhhtsGts.shpthptp..htph................hsahcphsphhppA-.llIo+uGAuTlhEhhthGtPsIllPh.ptht.tH.pppthplsp.thushhhhttphs....cpLtptltchhppppp....hppspt........hpphtshhcpl ....................................................................................................................................lLl.h..GG....S...G.Ap...h..lN....p..h.l.....p...s.h..s..t.l.t.pp...........................................................................h.p.ll.a.sG..cs...php.p.h.p.pt.....htpht.....................................thph..h.s.a.....h..c.....c.........h.......s.p.h.h.........s.........t.........A.........D...l..V..lsR.u.GAsTl.sElh..s.......h.....G.....h..P...s.l...l...l...P.h.s..tt....p..t.cQhtNA.t...l.tc...tG.u..u....h..h....l.....t....p.s....p...hs..........sp..tl...h..p.t.l.t.....p...h.p...ptt.......h.th.ttt.....................hh........................................................................... 1 542 1040 1326 +436 PF00852 Glyco_transf_10 Fucosyl_transf; Glycosyltransferase family 10 (fucosyltransferase) Bateman A anon Pfam-B_1677 (release 2.1) Family This family of Fucosyltransferases are the enzymes transferring fucose from GDP-Fucose to GlcNAc in an alpha1,3 linkage [1]. This family is know as glycosyltransferase family 10 [2]. 21.00 21.00 21.10 21.60 20.80 20.80 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.29 0.70 -5.88 49 1458 2010-01-08 14:05:26 2003-04-07 12:59:11 14 27 340 9 858 1374 288 287.80 23 74.46 CHANGED pphhhshhhhshhhshhhhhhhh.htsssssh.t..t.............................ppphhl.LlWhaPFstphs.............hssCtphh.shss..ChlTssRshhscA...cAVlhHH+-lp.t...hssLPpt....................................RP..tQcWlWashESPops.....phsslps.lFNhThoYRpDSDIhhPYGhLhstpsttp....................hslP....pKs.......+..lVsWlVSN...astppt...RspYYpcLpcH.lpVDlaG+s............sp.lstsp...hhpslupYKFYLuFENS.lHpDYITEKLa.NsLtsGsVPVVLGPsRtNYEpF..lPs.-uFIHV-DFsosccLApYLhtLDcN-ptYhcYFpWRcphpl.....phht..htp...................hh....CpsCptlppt....pphps..hpsl.tsWah ...............................................................................................................................................................................hh....................................................................................................................................................................C..h.......p..pt..........t.......psllh...................h................h.s..............................................+....p...a.hh.ht..E..s...P......t..................h.......h....t..ha..N....h..Th.o.a......+.h.c....S....Dl.....h..s...h...t..h..h.....h...t....................................................................p.Ks..................t...hss.ah.sSs..................sp.......s.t..pt...............R.....p......a..h....p....p...L....t....ph.....l.....p......lDs.a...Gps...................................................................tt.h...s..t..tp..............hhp.h.l.s.pYKFhL....uF.EN.o.h......s...pD.YlTEKh.a...s....ul...hss.....s.....VPlhhG....p.....ssh..p..p..h.........hP..s...pS..a..I..plp..D.F.p.ssppLApalphLs..pspphY...p.Y.h...t.....W.+...tp.h....................................................................C.hC.hh.t............................................................................................... 0 422 516 726 +437 PF00982 Glyco_transf_20 TrehaloseP_syn; Glycosyltransferase family 20 Finn RD, Bateman A anon Pfam-B_1035 (release 3.0) Family Members of this family belong to glycosyl transferase family 20 [1]. OtsA (Trehalose-6-phosphate synthase) is homologous to regions in the subunits of yeast trehalose-6-phosphate synthase/phosphate complex, [2]. 19.40 19.40 19.40 19.40 19.30 19.20 hmmbuild -o /dev/null HMM SEED 474 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.53 0.70 -6.02 23 2717 2012-10-03 16:42:29 2003-04-07 12:59:11 16 20 1706 12 1143 2357 379 432.00 33 74.10 CHANGED s+hllVSN+lPls...hpRps...suc..aphshSuuG..Llsshpulppp......tph..hWhGhsGlpssppc.....pspls.......ppLppcas..........shPlaLs......-chhcpaYpG......aupslLWPlFHYhhs.......ptpa-cstWpuYhcsN+hFA-plsphhp...csDhlWlHDYHLMLLPphLRcchsc......hKIGFFLHsPFPSSEIaRsLP..sRpEILcGlLusDLlGFHTh-YARHFLSsCsRlLs..................lcspss....tlcatGRplsVtuhPIGIDssphtcslppssVtp+lppL+pca.pthKlIlGVDRLDhlKGlspKLhAa.EpFLppaPEapGKVVLlQlusPoRscsc-.hpplcpplp-lVsRINucaGsls..apPlhalcc.slsap-hlALashuDlslVoulRDGMNLVuaEYlssppc...............+cGsLlLSEFsGuApsLs.GullVNPWshcclAcAlppALsMoc-c+pt+ac+ha+hlspassshWucsFlp-Lpc .......................................................................................................................................p............................................ht...u...sG..G...Lss.u.l.shh.pt............tts.....hWhG.....W.s.........G..ss.tsp............tlt.....................t.s..p..h.s.............................................................................hhsl..L.s.............ppph...cpaYps..............................Fu..N..ssLWPlaH........Yh.................................hspa...p...c.p...tW.puYhcVNphFAcpl....hthhp.............cs..D.........h.....l.WlHDYH..Lhhlsp.hL.Rc.t..t.sp................................s+IGFFLHl..PF..PssElapt..L..P...................h+..ccl.Lc.....ullshD..............LlGFpT.s.t....ppF.lsss....p.phh.t............................................................hp.spss.........thph.....G....+....th......p....lts...aPI...GI....-s..pp..h.t.p.hs.........p...t.....s.....t.........l....tt..c......h....t.....pl..+..p...p.....h..........t......s.................h......p.l.l..luVDRLDYoKGlsp+lhAaEthL.....cp.hP..pa.p.............s..+..l.shlQ......lA..s.s....S.....R.......t...c.....l.p.........t.....Y............p....p....l+pplpphsGRING.c.a.........G...pls.........a........s.P..l.h.a........l.p....p...ph.s....h.pp...Lhulaph........uDlsLVTslRDGMNLVA+EYlAsp...ss.............................................................................psGsLlLScFAG.A.A.p.p..L...s...t..u............l............l.V.NP.............a.......Dhcth....As....Al...p...p...A...L...s....M...s.....t...E+.....tp.....Rhpphhc.hlhppsl.shWtpsFlpcLt.t................................................................................. 0 336 691 979 +438 PF01755 Glyco_transf_25 LPS_glycoyl_T; Glycosyltransferase family 25 (LPS biosynthesis protein) Bashton M, Bateman A anon Pfam-B_1857 (release 4.1) Family Members of this family belong to Glycosyltransferase family 25 [1] This is a family of glycosyltransferases involved in lipopolysaccharide (LPS) biosynthesis. These enzymes catalyse the transfer of various sugars onto the growing LPS chain during its biosynthesis. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.38 0.71 -4.67 22 1865 2012-10-03 05:28:31 2003-04-07 12:59:11 12 20 892 0 399 1567 1071 173.10 22 55.71 CHANGED h.thalISLcpu.cR+phhppphtp..........plsFpFFDAlsscphpp...........................p..tt.h..hhpphhhtp.LohGElGChhSHhhLWpcslcps....hphhhIhEDDlhlspph...ht..........t.tal.h.thh...a....h.h.ph.sah..shphcpp.h..............................................h..h.pthhGTsGYllo.puAcchlphh.pphh....hslD.hh ............................................................................................................h...halIs.L.tpt...p.Rhtth..ttp.htp............th.t.h...p...hh-Alsspthst......................................................t.t...h..h.......h..h.h.p........h........t.........l.o.....G.E.........lGCahSHhtlWcphl.....c...p...s..................phh..ll.hE.DDl..h..h.p.p..s.....h.....t...h.t..h..t...................t..th...lh.................................................hh.h.......................................................................................................................................................................................h.............htshuYhlo.puAphhlph.......h......hshD......................................................................................................................................................... 0 99 205 301 +439 PF00777 Glyco_transf_29 Sialyltransf; Glycosyltransferase family 29 (sialyltransferase) Bateman A anon Pfam-B_1020 (release 2.1) Family Members of this family belong to glycosyltransferase family 29 [1]. 20.90 20.90 21.10 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.87 0.70 -5.25 105 2130 2010-01-08 14:04:21 2003-04-07 12:59:11 13 26 197 3 993 1851 142 224.50 21 69.43 CHANGED ss.tph..hhtppssthsp.....phhhs.hs.ppps..plspsltplhs.......t.Pht.pp...........h+pCAVVGNuGlLpsSphGpcIDsaDhVlRhNhAPlps.accDVGsKTohphhsPpsh.pp..............hspsshhhhls..hhstshhWlsuhhhpp.......sshhthhh.hhhphhhspppl.........................................hlhpP........pal+tltphahppsh........csphPS..........TGhlhlhhAlphC-c.VplYGFhs.....tsp.sh.pHYY..-phh.phht.......HshshEhthlpcL.tppG..hlchhhsp ................................................................................................................................................h...h..........................t.......................hppCA....VVGNuGhLhsSphGp.c.............I.Dp..tDhVh........R....h..NtAP.sp.s....ap.pDV.Gs+Ts..hhh......hsspsh....t.......................................ttp..h.h..h....h........hs..thh.ah.shh.pt...............................t....h..hh....h.......h...h.........h...t....p.h................................................h..h.h.p.P................thh.t...h...t.t.....hhhp..h.........................phph.o..........oGhh..h.hhhAh...ph.C-p....lplaGFhs...........pt.t...h..HYa.....-.p............t..h...................Hph.hEh.hh..ph.p.....th.......t.......................................................................................... 0 297 410 626 +440 PF03360 Glyco_transf_43 Glyco_tranf_43; Glycosyltransferase family 43 Mifsud W anon Pfam-B_1447 (release 6.6) Family \N 20.20 20.20 20.80 20.60 20.10 19.60 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.28 0.70 -4.77 15 602 2012-10-03 05:28:31 2003-04-07 12:59:11 11 9 129 16 335 571 6 196.50 36 58.65 CHANGED LupTLppVs.sLHWIVVEDussssP.hVuslLcRoGLs..aTalst+TPpsac....t.p.hhsRGshQRNhALcalR.............ppppcGVVYFADDDNoYsLcLFp-.hRslc..+lGlWPVGLlG...........................GhhVEuPhlpss..KVluap....ssWcsc..RsFslD...........MAGFAlNlcllLp.....+ssAsFshcs.p...GhpEoshLcpLsh.chs-lEPh.ussCo...cILVWHTRTcp ...................................LupTLhhVs...sLhWlVVEDutpps......sspl...Lpp..oG.......l....asHLss.....p.s....tthp............p.h.h.sRusp..Q.R..NhALp.alR............................ppphpGV.....VaFAD..D.DNo.........YslcL.F-E......h................Rp...sc....plusWPVu.....h.lu................................shhhEuP.h.s...psu.....+Vl..Gap......................ssap.sp...R..s..FslD............................................MA.GFAlshp.llhp................................pspsh.hphps.p.....GhpE.o..sh..L..pp.ls....s.pc...l.Esh....u..ssCo.........clLVWHs+oc........................... 0 90 147 256 +441 PF01501 Glyco_transf_8 Glycosyl transferase family 8 Bateman A anon Pfam-B_730 (release 4.0) & Pfam-B_5903 (Release 7.5) Family This family includes enzymes that transfer sugar residues to donor molecules. Members of this family are involved in lipopolysaccharide biosynthesis and glycogen synthesis. This family includes Lipopolysaccharide galactosyltransferase Swiss:P27128, lipopolysaccharide glucosyltransferase 1 Swiss:P27129, and glycogenin glucosyltransferase Swiss:P46976. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.73 0.70 -5.05 67 5748 2012-10-03 05:28:31 2003-04-07 12:59:11 15 52 1943 49 1770 4515 1043 233.00 18 55.72 CHANGED slshshspsY..................................lhushsslpSllpssst...............................hthphhhhssslsscthpplpthhpth..h.......................................httphhshhsasRhhlsc.........................lh..sphcKllaLDuDllV.hpslspLaslshs.s.hhusstp...............................p....tth...t...sshFNsGhhlhshstWpppslppphhph..hphptp......h.hs-QshlshhFt................sphp.LshtaNhhshhh..................t..hp.h.ppstllHa....sG...ttKPWpphs ...................................................................................................................................................................................l.hshsts..a.......................................h.tshsh...htSlh..hp..sp....................................................hththh.l...h...s.s...t....l..s........t...p.........h....p..h...l...p....p....h..h...p...p....h....t....t....h..........h..h......s....................................................................................................ttph..h..s..h.h..s..a..h.R.h.h.l....p................................hh...sp....h.....c....+.llYLDu.D.lls....h........s..........s....l.s..c.L.......a....s....h...sht....sth...h.us....s.s.....................................................................................t...................h....t............ht..................ss.h.....F..NuG.l....hll.shp.t..h...p.....p......p....s...l...p.p....p.h...hph............hptptt....................h.hh.D.......Qs....h..L..N...h...h...ht......................................sp.h...h.....L...s.h.p..a....Nh.hthh.................................................t.......t.....h.....p..p..s...hl..lH.a........hu.........KPW................................................................................................................................................. 0 480 946 1364 +442 PF00535 Glycos_transf_2 glycosyl_transf_2; Glycosyl transferase family 2 Bateman A anon MRC-LMB Genome group Family Diverse family, transferring sugar from UDP-glucose, UDP-N-acetyl- galactosamine, GDP-mannose or CDP-abequose, to a range of substrates including cellulose, dolichol phosphate and teichoic acids. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.86 0.71 -4.57 253 43678 2012-10-03 05:28:31 2003-04-07 12:59:11 21 600 5370 41 12098 45842 18220 163.70 16 45.40 CHANGED SlllPsaNc.......tphltpslpolhpp..........................h.phEllllDDuS.sDso..hplhcphht...........plpllppsps...Ghusuhstulpt.upGc....hlhhlDuDshhtss.hlppllphhpp.ssthshsshhthttpttththh..................................hhhttttttthththhhhhsstthhppphhtphh ......................................................................................olllPs.a..Np........tph...l....t...p...s....l...p...o...l...hpp......................................................h.shE.....l....l..l.....l.....D....D......u.........S....s.......D........s....o...........hp.....l...h.pphtt..................................p.l..p...h....l.....p........p.......p.........t.......s...........h......G.......h..........u......s.......u.......h......s......t....G....l.........p......t......A.......p...uc.....................hl.h.h.l.D...u.D......s..h.......h......s.........s....s....h....l.....p....p........h....l...p....h....h......p........p.................s........h.....t.....h.....s..h...s....s....h..h.h.h...t..t.........................................................................................................................................................hh........................................................................................................................................................................... 0 4072 7912 10203 +443 PF00953 Glycos_transf_4 Glycosyl transferase family 4 Finn RD, Bateman A anon Pfam-B_534 (release 3.0) Family \N 21.40 21.40 21.40 21.70 21.20 21.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.31 0.71 -4.43 116 8422 2009-01-15 18:05:59 2003-04-07 12:59:11 16 18 4730 0 2028 5693 4534 172.80 31 48.36 CHANGED hhhhh....hssh...llshlGhlDD...h.........h......slsshh+lhhphlsuhlhhh..................................shttlshhhs.................................................h.hs.hh.h.....hh....slhhlsuhh.....NAhNhhDGlDGLu.......uuhulls.hlshshls..........................ssh.hhhh.............slhu.ul.lG....FLhaN.h.P.....A...+lFhGDsGShhlGhh..lu....hlulhht ................................................................................................h..hlLhshhshshlGhlDDh............................................h.u.Lss+..hK.hhh....Qhlsullhh.hhhh............................................thstls.h.hh...........................................................................................h.h.sl.u.......h.h...hl......hl....slhhl....luhs....NAl.NL.sD........GLDGLAuusssls.hsuhullu..............................................................hht.............sphpl.sl.h...............sh..............ullG....Ah..LG....FLhaN...hhP............A....clFMGDsGSLhLGhh.luslulhh.p.................................................................. 1 688 1325 1728 +444 PF00903 Glyoxalase Glyoxalase/Bleomycin resistance protein/Dioxygenase superfamily Bateman A anon Pfam-B_1207 (release 3.0) & Pfam-B_5495 (Release 8.0) Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.44 0.71 -4.16 82 14717 2012-10-02 15:00:03 2003-04-07 12:59:11 20 46 3790 237 3666 25008 5991 128.70 16 62.42 CHANGED tlsHhs........lhlsDhpcuhpFYpcs...LGhplhpphs.............................hhhhhhsssttlplhhttsssstttth.....................................................................................................................................ththhshsscDlttthcclpppG..................sphhtt.scthhsthhhh.......................hpDP.sGhhlEl ...............................................................................................................................................................ltHhs..............lhs..s..-...h..p...c..s...h..p...FY...p...cl......L....G...h.....c...l......h.cp.hs....................................................................h.h.h....h..h..h....t......s...s.....s.....s.....t......l.....p....l....s.......h.....t...........s....s...s..t.t.t.t.t...................................................................................................................................................................................t..h.u....a..h..s.........s....h...c...-.....l...t.......t...h.....h......p....p....l....p.....p...t..G..................................h..p...h....p...t........h.............s.....p....h....h....h...t....t....h...h..........................................hpDP...sG.hhEh.......................................................................................................................................................................................................................................................................................................... 0 972 2122 3010 +445 PF04464 Glyphos_transf glyphos_transf; CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase Waterfield DI, Finn RD anon COG1887 Family Wall-associated teichoic acids are a heterogeneous class of phosphate-rich polymers that are covalently linked to the cell wall peptidoglycan of gram-positive bacteria. They consist of a main chain of phosphodiester-linked polyols and/or sugar moieties attached to peptidoglycan via a linkage unit. CDP-glycerol:poly(glycerophosphate) glycerophosphotransferase is responsible for the polymerisation of the main chain of the teichoic acid by sequential transfer of glycerol-phosphate units from CDP-glycerol to the linkage unit lipid [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 369 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.14 0.70 -5.70 22 3039 2012-10-03 16:42:29 2003-04-07 12:59:11 9 50 1148 20 433 2103 385 317.90 20 61.59 CHANGED hhhhhthhhP..hcsshllFtuhttcshusNsptlac.hhp..c.tsshchlWshcp...........tth.tssphlthsShchhhhhhcuchllss........s.hss..hhh++psphalQsWHGs.sLKphGh-..t.....h.p.....phht....psppaD...ahlossphpsphap....csFs..hstt.phlphGhPRsDsLhpspttp..httlppphslshscp..llLYAPTaRcsttt.......................hthtlDhcplh....pthtpchhllh+hH.hhsstht.........hppsshllslsstp......clp-LhhsuDlLITDYSSlhFDaulLc+PllhYs.Dh-pY...ppp.RGhYh-.a.ptsPGslscs.p-Llchlpph.ppspt.tcphphhpccasp.apDG+uScRlhphlht ...........................................................................................................................................h..h..............tp.lh..h..s.......t....t..hsss.t.hlhp...hh.......p...t..p..h...p..h..hh.hhp.........................t..th.....t..hl.....................p......h.....c....h.......h.....hh.....tp....A.c.h.hh.s....................s.h.........h..h....h..p..........p......p....h....h...lQ.........hWHus.sh.K.p.h.shs..t...............................h.h...thpph.s..........hhl.s.....s...s.p..h..t...p...h.at.........puat.......hpp...t....p....l..l...h.G......h.PR...Dhlh.....pp....p.tp.............tth...p....p..p.....h......s......l..........t..tcp..........ll.LYA..P.T..a..Rssttt..................................h.h...s..h.....p.tlt........tht.p.....sh....h..l..l.h+hH.......h...h...t..p..p.h.........................thtp.h..h..h.s.h.s.s..h..............................ch.p-..l..hhh..sDl..l.ITDY.SS.lha.-..a.h..h..h..c..+..P...h.l...a...a....s.............Dh....p...p....Y...............pp....R.s.h..h..s...h....p...........h..s.............u.....h........s..p.......s......pt.Lh.p.t....l..p..p......................................t....h............h.....hp.....t..h....hpst...p.s.spphhp....t........................................................................................................................ 0 162 296 399 +446 PF00958 GMP_synt_C GMP synthase C terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1137 (release 3.0) Domain GMP synthetase is a glutamine amidotransferase from the de novo purine biosynthetic pathway. This family is the C-terminal domain specific to the GMP synthases Swiss:P49915 EC:6.3.5.2. In prokaryotes this domain mediates dimerisation. Eukaryotic GMP synthases are monomers. This domain in eukaryotes includes several large insertions that may form globular domains. 21.10 21.10 21.10 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.23 0.72 -4.27 161 4898 2009-09-11 08:43:03 2003-04-07 12:59:11 17 31 4651 24 1280 3535 2105 95.50 57 18.75 CHANGED h-Elccs.......GLYcclWQuFAV.Llsl+SVGVhGDpRoYsas...................................................................................................................luLRAVpSpDuMTA-as.....c.lP.........a-hLpclSsRIlNEVpuVNRVVYDITSKPPuTIEW ...........................................h.EElppAGL.cclaQsFsV.l.h.s.V+S..VGVMGDsRT.Y-..a.s...........................................................................................................................luLRAVsohDhMTAcaA.........+l.P..........................a-lLt+lSsRIlNEVc.slNRVVYDIoSK..........PPATIEW................... 0 423 820 1086 +447 PF01825 GPS Latrophilin/CL-1-like GPS domain Bateman A anon SMART Family Domain present in latrophilin/CL-1, sea urchin REJ and polycystin. 21.50 21.50 21.50 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.64 0.72 -3.85 139 2565 2009-01-15 18:05:59 2003-04-07 12:59:11 16 386 110 4 1472 2317 28 46.70 35 3.56 CHANGED tpstCsaWs..........pspusWu.ocG..Cps.hp.sst............................sp...s.pCpCsH...LT.sFulLh .............................h.pstCsaWs......................sttGtWo..opG..Cph..hp...ssp..................................sp.......o.pCpC...sH....L.T...sFAlLM............................ 0 524 628 925 +448 PF02893 GRAM GRAM domain SMART anon Alignment kindly provided by SMART Family The GRAM domain is found in in glucosyltransferases, myotubularins and other putative membrane-associated proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.24 0.72 -4.45 33 2555 2012-10-04 00:02:24 2003-04-07 12:59:11 15 89 326 5 1481 2393 6 70.70 24 10.41 CHANGED pch+.phFp......lstsEpllss......asCtlhp................ph.hpGplYlospclsFpS.hhss.t...........shslPhsslpplcKhs ...........................................................................................ap.phFt......lst.sEpLhps...........................as.CsL.p.................................ph..h..pGplal.ospp...l.sF.t..S..h.h..sp.s..............................plslPhtplpplcK..t............................... 0 338 634 1024 +449 PF03514 GRAS GRAS domain family Bateman A anon Pfam-B_393 (release 7.0) Family Proteins in the GRAS (GAI, RGA, SCR) family are known as major players in gibberellin (GA) signaling, which regulates various aspects of plant growth and development [1]. Mutation of the SCARECROW (SCR) gene results in a radial pattern defect, loss of a ground tissue layer, in the root. The PAT1 protein is involved in phytochrome A signal transduction [2]. A sequence, structure and evolutionary analysis showed that the GRAS family emerged in bacteria and belongs to the Rossmann-fold, AdoMET (SAM)-dependent methyltransferase superfamily [3]. All bacterial, and a subset of plant GRAS proteins, are predicted to be active and function as small-molecule methylases. Several plant GRAS proteins lack one or more AdoMet (SAM)-binding residues while preserving their substrate-binding residues. Although GRAS proteins are implicated to function as transcriptional factors, the above analysis suggests that they instead might either modify or bind small molecules [3]. 19.90 19.90 20.10 20.50 19.80 19.00 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.25 0.70 -5.74 31 1831 2012-10-10 17:06:42 2003-04-07 12:59:11 9 14 343 0 790 1894 0 305.00 31 67.57 CHANGED LhphLlpCAcAlss..sshp..hAptlLtclsph..uSstG.s.....shpRlAsYFsEALssRlss.....h.p.h.ssptss......sschhtuh..................ph...a.cssPhh+FuahTANpAILEAhcsp...ppl..HIIDFsIspGhQWPuLhpuLA.pRs....su......sP.plRlTGlus.t........stttLppsGpRLspFAcplslsFcFps....hsp.phspl...phptL....plps...sEslAVNhshtlH+hh.......................................................spssshtp.....thLphl+slsPclVTllEpE..ssp.Nss......sFhsRFhEuLcaYoulFDSL.css............hs.ts..ppRthlEchhlu+cIhsllAsEGsc.RhcRpEshtpWRp+htp.uGFpslsluspshpQAchLLphas.scG..aplc...-c......susLh.LuWps+sLlssSAW+ ................................................hphLhtCApAltt....ts.hp......hApt.lltp..l.t.h......us.stu...s.................s.hp...+.lAtaF.scuLtt.Rlht..............................h.....stt.........................ph.h.h.....................................h........hhphs...Pah+huahsANpAIl-Ah.t.sp...................ppl.....Hl.lDa.s.........h....t......G...h....QWssLhptLu..........Rs.......sG......................PP..thRlT.uls.s...................stttlppsG.+LsphAc....p..h.t..l...F..c.aps.......................hst....ph..t..s.l.............p.....p....L.........plp..............sE.sls...VN..h.hth....+plh.......................................................................................................................sp....ss.s.htp.............hL..........phl+.th........pPc..........lhs..lsE...p-.......ush....s...s.s...................FhsRFh-u...Lp.aYushFDuL.c..ss.........................h.s...ts........tpc.hhhpp.hhuppIhsllu..s..........-..G.p.....Rh...........E.....R....Et...........h.t..........pWptph..tt..uGF.t...h.ls........sht.p.s..p..hl.t..........h....................t..........a....h.......tt............t.t...hh...tW.t...hh..osW........................................................................................................ 0 78 520 682 +450 PF01465 GRIP GRIP domain Bateman A anon Sean Munro Family The GRIP (golgin-97, RanBP2alpha,Imh1p and p230/golgin-245) domain is found in many large coiled-coil proteins. It has been shown to be sufficient for targeting to the Golgi. The GRIP domain contains a completely conserved tyrosine residue. At least some of these domains have been shown to bind to GTPase Arl1, see structures in [4,5]. 20.50 20.50 20.50 20.80 19.80 19.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.90 0.72 -4.44 44 627 2009-01-15 18:05:59 2003-04-07 12:59:11 15 21 256 8 415 639 5 44.60 31 4.30 CHANGED pshshpYLKNVllpFLppp-..t.+ppll..sVlsolLcFopp-pppl. ............tsshEYLKNVllpFlpt+..-.....tpp..p..Ll..sllsslLpFoscEpptl........ 1 146 206 324 +451 PF02946 GTF2I GTF2I-like repeat Bateman A anon Pfam-B_101 (release 6.4) Family This region of sequence similarity is found up to six times in a variety of proteins including GTF2I. It has been suggested that this may be a DNA binding domain [1,2]. 20.60 20.60 20.60 21.00 18.90 17.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.42 0.72 -4.14 18 742 2009-09-10 22:54:23 2003-04-07 12:59:11 9 8 42 10 300 939 0 75.00 47 38.66 CHANGED LRcpVc-LFspKYucALGhspsV.VPYpphhppPpslhVpGLP-GlsFR+PssYslspLc+ILcspcpIpFslK+P ............LRcpVc-LFspKa...u..EALGh..spsVtVPYp+hppsPpslhVpGLP-GlsFRcPssaslspLc+ILcspppI+FlIKRP...... 0 17 38 98 +452 PF01018 GTP1_OBG GTP1/OBG Bateman A, Studholme DJ anon Pfam-B_875 (release 3.0) Domain The N-terminal domain of Swiss:P20964 has the OBG fold, which is formed by three glycine-rich regions inserted into a small 8-stranded beta-sandwich these regions form six left-handed collagen-like helices packed and H-bonded together. 21.50 21.50 21.50 21.70 20.70 21.30 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.19 0.71 -4.58 8 5081 2009-01-15 18:05:59 2003-04-07 12:59:11 17 23 4657 3 1432 3621 2408 149.90 46 37.77 CHANGED FVDpscIhlpuGsGGsGhVSFRREKalPtGGPDGGsGG+GGDVlhEsDpslsTLlDaRap+HaKApsGppGtucNpsG+sGcDlllKVPsGThVhD.-ssplluDLVcsGpRhllApGG+GGLGNu+FtSssp+APpaApsGpcG-pRplpLELKl .................................................FlDpspIhlpAGcGGsG.sl.............u...F....R......R..E...K...al..P...h.....G....G........P..s.GG....DGGcGGsVhh........s..Dc..s..l..p...TL...lD......a..R......a...p...+.+....a....+A..pp.Gps....G...t.....u......p..s......p.p..G.+..........s.......G....c..D..lhl+VPsGT....h.......V........h...............D........t.............c..................T........s........c........l.................l........uD...Ls..c..p...G.p.c.hl.lA+GGpGG....h.G..Nt.+.F.....t.....o....s..s.........NpA...P.....ch.u.....p................GpsG.-c+plhLELKl.................................................. 0 493 898 1211 +453 PF00009 GTP_EFTU Elongation factor Tu GTP binding domain Bateman A anon Prosite Domain This domain contains a P-loop motif, also found in several other families such as Pfam:PF00071, Pfam:PF00025 and Pfam:PF00063. Elongation factor Tu consists of three structural domains, this plus two C-terminal beta barrel domains. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.24 0.71 -4.86 216 69868 2012-10-05 12:31:07 2003-04-07 12:59:11 22 241 24054 243 11959 72575 19001 186.10 31 41.29 CHANGED cphhNlullGHVDcGKTTLssplhhhsuh....t..t...t..t..............hDphppE+cRG.....ITIpsutsphp.....................................spptphsllDsPGHtD.FspphhpG..hu.hDuAlLVlsAs........cG.......stsQTccthhhApphsls......hll..hlNKhDps...........p.tchtclhcpltpph...........................................................................................lpth..hss...........................thsl.l.sSAhpuh..................slp......................................................................................................s.Ll-slsphlPs ..........................................................................................................................................................................p..st.....GKooh...h........t...t..l..h.............................................................................................................hD......h..t......E....c...p...R..G.......I..T...I...p..h..u..h..h.php.....................................................................................................htphhls.l...l..D.s...........P..........G............H.............t............D..........F............h...........t...........p...........h...........h...........p....u................hs.........s......D........s.........A..........l........L...l.....V...s..A.s..............................s.G.........................hp..s..Q......T.....+......c........t......h.........h........h.........A.........h........p.........h........G..............l................ll..V..s.l...NK.....h.Dps..........................ps.c..h..p....c..l....h...c...p...l.p......ph.......................................................................................................................................................................................................................lp.th...shsst...................................tss.h.l...h.S.....u......hp.u.....................................s.h.......................................................................................................................................p.Ll-sl.t................................................................................................................................................................................................................................................................................. 1 4102 7392 10000 +454 PF03144 GTP_EFTU_D2 Elongation factor Tu domain 2 Bateman A anon Prosite Domain Elongation factor Tu consists of three structural domains, this is the second domain. This domain adopts a beta barrel structure. This the second domain is involved in binding to charged tRNA [1]. This domain is also found in other proteins such as elongation factor G and translation initiation factor IF-2. This domain is structurally related to Pfam:PF03143, and in fact has weak sequence matches to this domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.28 0.72 -3.86 305 58968 2012-10-05 12:31:07 2003-04-07 12:59:11 20 172 19933 232 11011 44749 11474 68.00 28 14.18 CHANGED GslssuRl.sGpl+hGsplhlh..t....t..............sclpsl.hhpt.thpps.....................hsusssGhhltshshcc...lptGpsls ...............GslsssRVpsGsl.+.s.G.ph.V..h..hh...s.s.....................................scV...p..ul..p.h...a...+.p....t...h.p.c.s.................................................ls.G.s..s..s..G....h.....l..p.s..h.s..hc-......lppGpsh........................................................... 1 3673 6761 9154 +455 PF03143 GTP_EFTU_D3 Elongation factor Tu C-terminal domain Bateman A anon PF00009 Domain Elongation factor Tu consists of three structural domains, this is the third domain. This domain adopts a beta barrel structure. This the third domain is involved in binding to both charged tRNA [1] and binding to EF-Ts Pfam:PF00889 [2]. 21.30 7.10 21.30 8.40 21.20 -999999.99 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.29 0.72 -3.83 82 23082 2009-01-15 18:05:59 2003-04-07 12:59:11 12 49 16442 102 2870 18678 2585 86.00 47 25.05 CHANGED slpsppcFcApVhlLs+.....poshhsGYpP.hah+Tsclss.hhpL.........stts......chlhsGDsshlplchhp.PlslEptt......RFAlR-GG+TVusGllpclh ..................................s+ssscFsAQ...VI.V...LNH............PGp.I.ss.G........Y...sP.V.Lc.s.H.Tu.clss.........+h..sEl....pt.h-.............sucs........................c.ltsGD..ss.h...l.p....l...........+...P.h....s...l.Eth..........................cF.................................................................................... 1 948 1617 2282 +456 PF01493 GXGXG DUF14; GXGXG motif Bashton M, Bateman A anon Pfam-B_428 (release 4.0) Family This domain is found in glutamate synthase, tungsten formylmethanofuran dehydrogenase subunit c (FwdC) and molybdenum formylmethanofuran dehydrogenase subunit c (FmdC). A repeated G-XX-G-XXX-G motif is seen in the alignment. 19.80 19.80 20.60 20.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.42 0.71 -5.32 29 4251 2009-09-11 14:28:49 2003-04-07 12:59:11 14 50 3290 15 1375 3769 3934 186.10 39 14.86 CHANGED hphslhsscRslssclsstls+thGpps.h...............sslplphpGoAGpShGuahssGhp.............lplpG-AsD..alGpuhsGGcIllpsssss...........shtsspsshhGssshaGATGGplahpGpAG-Rhul+N...........SuuphVlEGs.GsassEYMsGGhllVLGcsG..........cNhGAGMoGGhsYlhsc.s..............shspphshchVchp...plps...........tstppL+tl .............................................................h....ltNssRsl.Gshlustl...sc..p.aGppG..L..........................s.t.s.slplph..p..G.o..A..G.Q..S.F.G...u..a.h..s..t...G.l.p.........................................lplpGDA.ND..YVG.K.G.h.uGG..pll.l.p..Pshss.............th.s.pcshIhGNs.sh.......YG........A..TuG.c.la.hp.Gp.A..G..E..RFuVRN...........................SGApsVVEG................s.G..D......a..uCE.YM........T........GGhVllLGp..sG....................pNFu...AGMoG.GhAYVh-pss..............ph.t.t..p.s.N...chlph............lpt..................h..................................................... 1 407 886 1167 +457 PF02756 GYR GYR motif Ponting CP anon Ponting CP Motif The GYR motif is found in several drosophila proteins. Its function is unknown, however the presence of completely conserved tyrosine residues may suggest it could be a substrate for tyrosine kinases. 20.60 20.60 20.70 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.19 0.73 -6.39 0.73 -4.01 4 377 2009-01-15 18:05:59 2003-04-07 12:59:11 9 37 13 0 227 370 0 17.80 73 19.15 CHANGED -DGY+YKTVRRL+hRhRH ...DGYcYKTVRRLKaRpR+.. 0 34 34 134 +458 PF03457 HA Helicase associated domain Yeats C anon Yeats C Domain This short domain is found in multiple copies in bacterial helicase proteins. The domain is predicted to contain 3 alpha helices. The function of this domain may be to bind nucleic acid. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.41 0.72 -3.87 186 906 2009-09-13 12:23:34 2003-04-07 12:59:11 9 52 116 1 382 984 370 69.10 24 38.11 CHANGED hss...hpppWppta.ptLppatppcG...p..tp.lPpsh....................shpLGpWlppQR....pth+p..................sp......Lst...-+hptLspl..shh .............................pttWpp.ta.ptLppatppcG.................p...hp..lPp.sh..t.....................shtLGpWlspQR.............pphcp....................................................up............Lss......-RhptLcplGh.................... 0 258 341 371 +459 PF04408 HA2 Helicase associated domain (HA2) Bateman A anon Bateman A Domain This presumed domain is about 90 amino acid residues in length. It is found is a diverse set of RNA helicases. Its function is unknown, however it seems likely to be involved in nucleic acid binding. 21.30 21.30 21.30 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.36 0.72 -4.00 140 7005 2009-01-15 18:05:59 2003-04-07 12:59:11 18 172 2406 5 3299 6442 376 102.00 28 9.96 CHANGED pulphLhtLGAL..........st....ps.................pLT.tlG+phuphP.l-PpluKhL..l.husphsC.........hcchlsIsuhLos.pssFhp..............................s.t.tttsp.........................................................t..t+tph.............ps.........DalshL ...............................................................................t.AhphLppL.GAL.........................ss......ps.......................................................pLT..slG...+ph...Apl...P....l.....-P+L..u.+.Ml.......l..t.u...t....p..h......s..C.................................hpp..sls...IsAhL..oh....ps..sh.p.................................................................................................s.t.t.pt.tsp............................................................................t..h+.th....................................uDhh............................................................................................................................................... 0 1063 1774 2655 +460 PF04774 HABP4_PAI-RBP1 Hyaluronan / mRNA binding family Kerrison ND anon Pfam-B_2044 (release 7.6) Family This family includes the HABP4 family of hyaluronan-binding proteins, and the PAI-1 mRNA-binding protein, PAI-RBP1. HABP4 has been observed to bind hyaluronan (a glucosaminoglycan), but it is not known whether this is its primary role in vivo. It has also been observed to bind RNA, but with a lower affinity than that for hyaluronan [1]. PAI-1 mRNA-binding protein specifically binds the mRNA of type-1 plasminogen activator inhibitor (PAI-1), and is thought to be involved in regulation of mRNA stability [2]. However, in both cases, the sequence motifs predicted to be important for ligand binding are not conserved throughout the family, so it is not known whether members of this family share a common function. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.94 0.72 -3.54 39 511 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 219 0 289 525 1 99.40 26 29.83 CHANGED s+RpFDR+SGosRu..............s-..KRpGuGptNWGosp--.h................sph.-.s.ss.t-ps.ssEp.s.....t-cs........scEssscptt...Epc..sKEMTL-Ea.KsLp-Kp+uh.s...hph.cctcss .......................................................pRta-Rp.S..G.os+s................tp.K+tG......uGttNW.GsspD-..l..........................sc.h.....-ps....s..tp.cs..s.....t....tEpts..........tppt............sppptspt.t.....Ept......spEhTL-Ea.Khhpp.pp+s.h.t...hphcc.t............................................................. 0 102 154 221 +461 PF02183 HALZ Homeobox associated leucine zipper SMART anon Alignment kindly provided by SMART & Pfam-B_1492 (Release 7.5) Family \N 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.13 0.72 -4.24 69 597 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 81 0 280 618 0 43.70 38 16.66 CHANGED KQLE+Da-hLKpsa-sLppc.-sLppEppc.L+uplhsLppphttp ....KQhEhDh-hLKcsa.-sLpp-sc.......pLp+E.......hpc.L+A.hht.p.h....t.......... 0 43 169 230 +462 PF00672 HAMP DUF5; HAMP domain Bateman A anon Pfam-B_113 (release 2.1) Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.25 0.72 -3.79 222 60876 2009-09-13 11:30:41 2003-04-07 12:59:11 20 935 4667 65 16414 49005 5371 70.40 19 12.75 CHANGED hh.hhhhhslllshhhshhlspplhpPlppltpssppl.u.p...........G...cl...s.............s.h......ttsElupLupuhNpMtppLpp ...........................................h..hhhhhs.l.l.l.s..h.....h....h..s...h....h....l....s.....p........p.........l.......h....p........P.....l....p....p....l.t....ps....s....p....p.l..u..p..........................G............cl.......s..........................................................ppl.......shp...sp..c..E...l.up...Lu....pu...h....Np.MhppLp.t....................................................................... 0 5121 10085 13473 +463 PF04849 HAP1_N HAP1 N-terminal conserved region Mifsud W anon Pfam-B_4571 (release 7.6) Family This family represents an N-terminal conserved region found in several huntingtin-associated protein 1 (HAP1) homologues. HAP1 binds to huntingtin in a polyglutamine repeat-length-dependent manner. However, its possible role in the pathogenesis of Huntington's disease is unclear [1-3]. This family also includes a similar N-terminal conserved region from hypothetical protein products of ALS2CR3 genes found in the human juvenile amyotrophic lateral sclerosis critical region 2q33-2q34 [4]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.93 0.70 -5.48 8 271 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 86 0 153 239 0 249.20 44 35.16 CHANGED hppp...............lssaulRADsltsa-.............ps-Wt.osthusstp.slos.phppsL+ah..hLCssRssQMTK.TasDlDulhpLLEE................KERDLELAARIGQSLLKpN+sLp......................EcN-tLEEpL..............................ppsh-plsQLRHELshKDELLQhYos..ssEEuEsESssossh..........................+.pcupstspph.pL-sLQcKLKpLEEENppLRpEAs+L+pETh..ThE-KEQpLlpDCV+pLc-ustQluuLoEELupKoE-hsRQQEEIo+LluQIVDLQ+KsKpaulEsEELpppLsuuK-uQcQLpAE.......LpELp-KYtEChtMLtEsQEElKsLRs+s .........................................h........................................................................p....tt.h..h....lLsspRVtQMTK.T.YsDI-sVT+LLEE........................KERDLELAARIGQuLLc+NpsL.p..........................................................EpN-hLEEpl........................................................................................tphh-plsQLpHELshK-ELLphhopss.-..-u-s-usssp.h..........................................h..t...s....pthhpL-.LQcKL+pLEE...ENh.LRs.EAspLcp-T...shEEc.EQpL.ltD.C..VcpL...p..-uN.QhsploEELutKs--hhRQQEEIopLluQll-LQp+h+ths..hE....pEELhtpLtus.K-sQ.pp.L..shE................................LpELp-+.tEshthL+EuQEElKpLRp+.......................................................... 0 29 43 97 +464 PF02184 HAT HAT (Half-A-TPR) repeat SMART anon Alignment kindly provided by SMART Family The HAT (Half A TPR) repeat is found in several RNA processing proteins [1]. 21.00 21.00 21.20 21.00 20.60 20.80 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.16 0.72 -7.40 0.72 -4.20 3 422 2012-10-11 20:00:58 2003-04-07 12:59:11 11 23 237 0 304 452 7 32.20 46 6.81 CHANGED KEIDRARuIYERFVaVH.P-VpNWIKaARFEEc .......Eh-RARsIYE.....R......a....l..h....V.H.Pc.lcsWIKYA+FE...... 0 108 161 234 +465 PF02518 HATPase_c Histidine kinase-, DNA gyrase B-, and HSP90-like ATPase SMART, Griffiths-Jones SR anon SMART Domain This family represents the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.30 0.72 -4.35 659 129386 2012-10-11 19:05:54 2003-04-07 12:59:11 21 5116 7141 397 36085 119054 19749 113.50 24 18.95 CHANGED sstttLtplltsllsNAlchs.tt................splplplptpt.................................................................tphplpV...pD..................s..Gh...Gls..............................p.htplhp.hhphp......................t.tthsG..pGlGLslspp.lsct....hs.Gp.......lphpsp.......................................spGop.hpl.....plsht ................................................................................................................................h...ttlpplltNL...l..s...NA...l....c....astst..............................................spl..p..l...p....h....p...p..p.t............................................................................................................................................................................tp..l..t.l..p......V....p.D...........................s...Gh....G.l..st.....................................................................................................c.t.h..t...p...l....F...c......a.h...pspt.........................................th...s.G.........sG....l....G......L.u.....l....s.....c..p....l.s..c.t.........h...s.....G.p..........l.p.l...p.u.p.........................................................................sp.G.op...hplplP..t........................................................................................................... 0 12473 24121 30935 +466 PF00955 HCO3_cotransp Anion_Exchanger; HCO3- transporter family Croning MDR, Finn RD, Bateman A anon Pfam-B_1004 (release 3.0) Family This family contains Band 3 anion exchange proteins that exchange CL-/HCO3- such as Swiss:P48751. This family also includes cotransporters of Na+/HCO3- such as Swiss:O15153. 19.70 19.70 20.00 20.10 17.80 19.20 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.61 0.70 -5.84 18 2233 2012-10-03 01:44:59 2003-04-07 12:59:11 16 22 295 7 1138 1987 18 300.50 27 52.48 CHANGED sLpRTGRlFGGLIpDlKR+hPaYlSDhpDALssQCLAAllFIYFAsLSPAITFGGLLG-tTcshhGV.E.llSTAlpGllFsLluuQPLlIlG.TGPlLVFEchhFsFCcspsl-YLshRlWIGhWlshlsLllVAhEuShLVRaITRFTpEhFusLISlIFIYEohtKLlclacsaPlptsa.............................................t.hsht.hshtps..hsuhhhsssssh..tstPNsALhSllLhhGTFhlAhhL+cFKNS+FFPuplR+lIuDFuVsIuIllMlhlDahls.shTpKLpVPsshpsTsss.RGWaIsPh...ss.PhWhhhAuslPALLlhILIFM-pQITslIVs+KE+KLpKGuGaHLDLLllulhhulCulhGLPWhsAATVhSloHlNuLpl.ScssAPGEpPchhtV+EQRVTGllVhlLlGLSllhsslL+hIPMsVLFGlFLYMGVoSLsGIQl................a-RlhLlhMPsKapPDhhYlR+VpshRhHLFThI ...........................................................................................................................................................................................................................................................................................................................................t...h...................s................................h...............................h..h...h......h................a..h.hhh.sh.t.....h.h..h.p....tc.hs.h.sh.hh..s........................................................................................................................................................................................................................................hh.....................h...h.ths...................................................................................................................................................................................................................................................................................h.hlhhh-...........t.ls........hh.t...t.........h.......ps..........sh.th..sh.hhhs............h.shhu.hP.......................................t..............................................................................................................................................................................h...............EpRhs.....hh...........h..h.........................h..............l...t..h....P.s..........hh.GhFhhhu.....h.s.....h........................h........h......................................................................................................................... 0 277 517 835 +467 PF01966 HD HD domain Enright A, Ouzounis C, Bateman A anon Enright A Family HD domains are metal dependent phosphohydrolases. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.83 0.71 -4.10 154 24448 2012-10-01 20:28:14 2003-04-07 12:59:11 17 317 4881 70 6755 26042 7473 120.90 18 28.53 CHANGED hhpHslpVstlsttlutphs..........................p.thhhhuuLLHDlu+sh..p............................Hshhutphlpphtt...................lhphltpHppth........................................................hshtstllphADtlpsh .................................................................................hpHolp..V..h.p.h.u..t.t...l.u.p.phs.......................................................................s.h...p.l...l...t...h..A...u....L...lHDl.G..+.....s....h....hs.c...................................................................h..t...H.s...h..u...t....p....h...h..p.p.h.t..t..h.............................l...h.....p..hl..t..t.H.p.tth.t....................................................................................................h..........t.h....l.lthsD.ht..h............................................................................................................................................................................ 0 2723 4803 5965 +468 PF00271 Helicase_C helicase_C; Helicase conserved C-terminal domain Sonnhammer ELL anon Published_alignment Family The Prosite family is restricted to DEAD/H helicases, whereas this domain family is found in a wide variety of helicases and helicase related proteins. It may be that this is not an autonomously folding unit, but an integral part of the helicase. 20.90 11.60 20.90 11.60 20.80 11.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.48 0.72 -4.12 480 89228 2012-10-05 12:31:07 2003-04-07 12:59:11 26 1286 6190 172 32236 78491 16395 81.30 23 8.77 CHANGED phhpttshpshhlaG...................shspppRpphlppFpsspt...................................plLluTslsupGlclsslshVl.h-h....................shs.pp...........hhQphGRsuRhu ................................................................................................................................t......th..t.h.h.h..l..cu..................................................p.h..s..p...p....p......R....p.......p.......h...h....p...p....F....p....p..s.ph...................................................................................................................pl.L..l..u.......T....s...........l....h.....s....p...........G.....l.........c..l.........s.........s..........l.......s........h.........V..l.......h..ch.....................................................................sh.s..hps.....................h.h.Q..p.......h.....G...RsGRs.......................................................... 0 11050 19238 26835 +469 PF02602 HEM4 Uroporphyrinogen-III synthase HemD Bashton M, Bateman A anon COG1587 Family This family consists of uroporphyrinogen-III synthase HemD EC:4.2.1.75 also known as Hydroxymethylbilane hydrolyase (cyclizing) from eukaryotes, bacteria and archaea. This enzyme catalyses the reaction: Hydroxymethylbilane <=> uroporphyrinogen-III + H(2)O. Some members of this family are multi-functional proteins possessing other enzyme activities related to porphyrin biosynthesis, such as Swiss:Q59294 with Pfam:PF00590, however the aligned region corresponds with the uroporphyrinogen-III synthase EC:4.2.1.75 activity only. Uroporphyrinogen-III synthase is the fourth enzyme in the heme pathway [2]. Mutant forms of the Uroporphyrinogen-III synthase gene cause congenital erythropoietic porphyria in humans a recessive inborn error of metabolism also known as Gunther disease [1]. 27.50 27.50 27.80 27.50 27.20 27.40 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.59 0.70 -5.12 137 4602 2009-01-15 18:05:59 2003-04-07 12:59:11 10 22 3824 15 1293 3545 2287 221.40 21 70.07 CHANGED sphspt...l..pphG.hpslhhPhlc.hts..........t.ltthh.ptht...phchllhTSssuVch.hhp.h...........................t..hphht....shplh.uVGppTupslcp...h.G......hps.h.....hss.phsu.csL..hp.hltt..........thts.pp...lLh.....hpuphs..pstlh..ptL.p.....ptG..hplpplh...sYcs......t......................httphhphlpp..tph..........cslshsSspsscp.hhphhtt...........hhpshplsu.IGssTucshp.chGhps.....hhu..cpsshcul..lp ...............................................t..h.thl.pt...h.G..hts...hth...Phlc.hps.......................t..t.l.p.ttl.....t.....t.......l.............................phchllhoSts...AVchhhphhp............................................................t...th.ph..t........sh.p.h.h..ul.GpsT.Ap..t.L..cp.........h..G.........hps.th......................hPp....p.tsu..-sL....lp..tltt................................phss...p+....................lLl...................hcustu............pph.ls.......psL.p.....................ptG.....h.p.Vpp.h....sYpp........t...............................htpht.p.t.hpp.....tph......................................ssl.shoSupslps....h.hphhsp.................................hhppspl..hs..lG.psActh....p...p....hG......h..p.s.........h.h..s..pp.s.tulh.t................................................................................................... 0 418 849 1111 +470 PF00173 Cyt-b5 heme_1; Cytochrome b5-like Heme/Steroid binding domain Bateman A anon Bateman A Domain This family includes heme binding domains from a diverse range of proteins. This family also includes proteins that bind to steroids. The family includes progesterone receptors such as Swiss:O00264 [1,2]. Many members of this subfamily are membrane anchored by an N-terminal transmembrane alpha helix. This family also includes a domain in some chitin synthases. There is no known ligand for this domain in the chitin synthases. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.61 0.72 -4.18 210 5879 2009-01-15 18:05:59 2003-04-07 12:59:11 23 159 836 98 3763 5749 252 82.90 25 17.81 CHANGED haohpElpcHs...........spcssWlllcG.+VYDlTp.FlccHPGG..............tcslhttAGpDsT-tF...................................................ssh...Hu..spA+phLcp...ahlGplt ............................hohpEltp+s............pppsh.al.s.l....p..G.......p......VYDlTp...ah..p....p.....HPGG.......................................tphl..h..p...h..A.......G...c...Du.Tc...t.F..........................................................................................................t.th.....Hs.......ph.h...p.p..hhpp.........hhlGpl.h................................................................................................................................................... 0 1318 2157 3127 +471 PF01814 Hemerythrin Hemerythrin HHE cation binding domain Bateman A, Yeats C anon Yeats C Domain Iteration of the HHE family ([2]) found it to be related to Hemerythrin. It also demonstrated that what has been described as a single domain ([1]) in fact consists of two cation binding domains. Members of this family occur all across nature and are involved in a variety of processes. For instance, in Nereis diversicolor Swiss:P80255 binds Cadmium so as to protect the organism from toxicity ([3]). However Hemerythrin is classically described as Oxygen-binding through two attached Fe2+ ions. And the bacterial Swiss:Q7WX96 is a regulator of response to NO, which suggests yet another set-up for its metal ligands ([4]). In Staphylococcus aureus P72360 has been noted to be important when the organism switches to living in environments with low oxygen concentrations ([4]); perhaps this protein acts as an oxygen store or scavenger. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.79 0.71 -4.00 189 6352 2009-01-15 18:05:59 2003-04-07 12:59:11 18 141 3149 37 2104 4919 320 127.90 17 51.44 CHANGED shhphlpppHcpltphht.....plpptssph.....................htthtphhpplhp....hptHhptEEph......l.....aPhhp..................thtt.h.pthpp..-H.....cphtph.lpplpphhppht............................tthhphhpthhphltpHlppE-..phlhshh ...........................................................................................................phlpppH.p.tl.t.c.hhs...........pltphhsph.......................................hptl.t.p.h.h.p...t...lhp...........h.p....t.H.h.p.p.EEph......h......aPhlp............................h..tt...h...pthpp......cH...................................cphtph.....lp.pl..p.phhp.shs..............................................p.th.hp.t..h....p....t.h...h..p.h.l.h.pHlthE-phhh.......................................................... 0 719 1432 1824 +472 PF00353 HemolysinCabind hemolysinCabind; Hemolysin-type calcium-binding repeat (2 copies) Finn RD, Bateman A anon Prosite Repeat \N 20.30 5.80 20.30 5.80 20.20 5.70 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.88 0.74 -7.19 0.74 -3.06 305 35811 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1002 944 173 11803 38527 14276 17.60 43 19.13 CHANGED hGusGsDplhGusGsDhl ..............GusGsDt.LhGGsGsDhl.......... 0 2356 7679 9410 +473 PF00132 Hexapep hexapep; Bacterial transferase hexapeptide (six repeats) Sonnhammer ELL anon Prosite Repeat \N 20.60 16.00 20.60 16.00 20.50 15.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.13 0.73 -8.23 0.73 -4.23 216 70883 2012-10-02 11:29:45 2003-04-07 12:59:11 19 311 5065 687 17837 54606 32792 35.10 23 25.26 CHANGED sshIGpsshlsssshlhssspIGcsshlusssslst .....................shlGpsshl..ss..s..s..h.....l....h.....s...s....s.....p....I.....Gcs...s.hlusssslt..................... 2 5663 11374 14981 +474 PF03129 HGTP_anticodon Anticodon binding domain Bateman A anon [1] Domain This domain is found in histidyl, glycyl, threonyl and prolyl tRNA synthetases [1] it is probably the anticodon binding domain [2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.87 0.72 -4.04 116 17881 2012-10-02 17:25:11 2003-04-07 12:59:11 15 90 4984 142 5154 13645 6909 92.50 22 17.01 CHANGED pVhllslsp...........thh.....phstcltppLppsG.............lpsphD..pspplG..t+hccs-hhGlPahlllGpc-l..p.........p..splsl+pR....sst.cp.....h..plshsclhph.lpph ..................................................plhllslsp..............pht......phA.pc...l.....h...p.p.L...p.p..t..G...............................lc.l...p...h....D.........c...s......c....ph..G.....h+..h+.c.s-.h.h.shP..at..l....l.lGcc-l..p....................................s..spVsl+..pp..................pst...-p..................t....pl.sl..s.c..lhphl...t................................................. 0 1742 3245 4321 +475 PF03578 HGWP HGWP repeat Bateman A anon Pfam-B_220 (release 7.0) Repeat This short (30 amino acids) repeat is found in a number of plant proteins. It contains a conserved HGWP motif, hence its name. The function of these proteins is unknown. 20.40 20.40 20.40 20.60 19.40 20.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -7.27 0.72 -4.63 91 1454 2009-01-15 18:05:59 2003-04-07 12:59:11 10 26 5 0 1162 1453 0 27.60 65 31.14 CHANGED AhsA.ADWChRLHGW.lhPPLL....GV..as.FTs ..........A.sA.ADWChRLHGWPIM.PPLL....GlYs.FTs............... 1 0 0 1 +476 PF00633 HHH Helix-hairpin-helix motif Bateman A anon Prosite Motif The helix-hairpin-helix DNA-binding motif is found to be duplicated in the central domain of RuvA [2]. The HhH domain of DisA, a bacterial checkpoint control protein, is a DNA-binding domain [5]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.31 0.73 -7.19 0.73 -3.96 104 8740 2012-10-03 02:11:09 2003-04-07 12:59:11 18 51 4519 41 1840 8454 3232 29.70 42 10.45 CHANGED phpshhssoh--LtslsGlGttpApslhph ...........t.asGplPpsh-pLhsLPGVGcpTAsslhs....... 0 624 1195 1556 +477 PF01079 Hint Hint module Finn RD, Bateman A anon Pfam-B_766 (release 3.0) Family This is an alignment of the Hint module in the Hedgehog proteins. It does not include any Inteins which also possess the Hint module. 29.70 29.70 29.70 29.80 29.50 29.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.45 0.70 -5.14 46 484 2012-10-03 10:25:13 2003-04-07 12:59:11 15 23 195 12 241 449 25 193.60 31 44.69 CHANGED uEs.......SsA.A+oGGCFPGsupVplpsGtpK.lp-LpsGD+VLA...sD.ssGpllaS-VlhFlDR-s......pppcpFhsIcT-ss.cplsLTssHLlFVucsp................ssss........................pssFAucVpsGphlhl........psssptlpsscVhclst.ppppGsYAPLTspGTlVV-sVlASCYAllcscsLAHhuFuPl.Rlhpt.........................ltshhhs..................................st.pppG.......lHWYuplLYpluphlLs .........................................................pts.GC..FP...usup.VplpsG.tp+hlc-L..psGD+VLu...hs...tpG..p.h.h..aScll...h.Fl.DR-.s...............p...t..p.ptFhsl..c...T......c...........s..s....cpLtLTs.sHLlasspps.......................sts.........................................pshFAsclpsGpplhs.......................sssttlhsspVtp...........l....sh.....t...pt....hG.hYAPLTtpG.ollVssV.lASCaA......hlp.......ppthuphsahPh.Rhh..t.........................h............................................................................................................................................... 0 86 134 190 +478 PF01634 HisG ATP phosphoribosyltransferase Bateman A anon Pfam-B_1142 (release 4.1) Family \N 25.50 25.50 26.00 25.50 25.10 25.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.92 0.71 -4.66 18 3683 2012-10-03 15:33:52 2003-04-07 12:59:11 13 11 3572 24 1021 2521 1995 161.50 36 61.55 CHANGED l+stDIPtaVtpGtsDLGIsGhDllpEpp.....ts....VppLh-LsFGpC+LslA.lPps.t..h..pssc-ls..........thRIATpaspLsccYhccps.......................hssclhhlsGSlElu..PtlGlADuIlDLVpoGpTL+tNGL+.I..-slhc.SoAtLIss+ssh..cppcs..hlppllsRlpuVl ...................lRssDlPsaVtpGssDlGIsGcDlLhEpt.............ts....................hhp..lh..cL.....sF.....G.....t.....C.......+.....h.....sl.A.sPps......ts...h......ps..tslt............ttRIATpYPpls+cah.s.p.cG.......................ls.s.c.llp.LsGS.VElA.............P.h...h...GLADuIsDlV...............sT..GsTLcANGLc.l..-sIhc.SpApLIss...cssh.......pp..cpp...hlcpll.p+lpsh.............................. 0 333 672 875 +479 PF00512 HisKA signal; His Kinase A (phospho-acceptor) domain SMART anon SMART Domain Dimerisation and phospho-acceptor domain of histidine kinases. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.08 0.72 -3.98 265 79834 2012-10-11 19:05:54 2003-04-07 12:59:11 20 3878 5148 22 22919 67684 10793 67.20 24 10.76 CHANGED tppphlusluHElRoPLsulhshschLpp.................tphstpp..................pphlptltppspc.ltpllsclLphu+hcss ......................................hpphlusloHE......LRTP....L...s...s....l....h....u...h....s..c....hltp........................................tthsspp..........................................pch...l...p...t...l.t...p...p...s...p...c..lt.p.l.lsclLshu+hpt...................................................................................... 0 7780 15044 19599 +480 PF00850 Hist_deacetyl Histone deacetylase domain Bateman A anon Pfam-B_343 (release 3.0) Domain Histones can be reversibly acetylated on several lysine residues. Regulation of transcription is caused in part by this mechanism. Histone deacetylases catalyse the removal of the acetyl group. Histone deacetylases are related to other proteins [1]. 20.40 20.40 20.60 20.70 20.30 20.30 hmmbuild -o /dev/null --hand HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.46 0.70 -5.13 148 5457 2012-10-01 22:40:15 2003-04-07 12:59:11 14 64 2207 138 2589 5145 3452 289.40 27 65.04 CHANGED H....................st......s.HP.Ess.....pRlpt....lhptLtp......s.s.........lh.th......................................................................sp.ss.pppl.t....tlH............................................................stpY.lppl......................................................pptt.pt..tt..................................................st.s.h................sstshpsu.....hhusGuslpAsc.tlh.....ps..........................................................t...cpu.....asls......RPPGHHA.p............................................t................................................................................................................sp............ut.....GFClaNslAlAAphh....................hp...hs.hp...RVhIlDhDVHHGsGTpchFh...............................................s................c.................spVhhhSlHp....s.....t..aP.....oGhh.pch.G..................................................................ttutu...tslNl.PLss........s..........ssDpsahphh...........pp.hlh.sh...hpp...............................F.pP..-llllsAGhDuttsD............sluph.slo.................spuatph...schlhphuptht...l.lhllEGGYsh.pslupsstthlpslh .............................................................................................................................................................................................................hhh..............t.HP..pstRlp.........hhp..t..Lht...........t..s...lhtph....................................................................htsp.As...tpcl....t...hh..H............................................................s.tc.Ylphl................................................................................................................................pphp....tt.tt...........................................................................stss.h.........thhc.t.s........thusG.usl.tus..c.tlh........p.s........................................................................................................................................................................................................................psu.....h.uls..tsshHHA..p..........................................................t............................................................................................................................................................................sp..............us............G.F.Chh...N.s.lulAhphh....................................................hp..........t..hp....RVhll.Dh.............D...lH...HGs..GspphFh........................................................................................................s......................................c.................scVhhhSh...Hp................................................tthaP........ssG.th.p-h..G......................................................................................t.u.s...............hslNl..P.Lss.....G.....................hsD...t.sa.h..p.hh..........................cphl....l....hp..t....................................................F...pP...-hllls..........sGh.....Du.htsD................................Lu.p...h..s..lo..............................................p.s...as.p.h........sphl.hphu.........................................llhl...ht..GGYsh....pslspshth.ht...h.................................................................................................. 0 894 1512 2100 +481 PF00010 HLH Helix-loop-helix DNA-binding domain Eddy SR anon Unknown Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.60 0.72 -4.27 164 13830 2009-01-15 18:05:59 2003-04-07 12:59:11 21 125 1653 43 7025 12625 9 53.50 29 14.63 CHANGED pRpttsthE+pRppplNpsh.ppL+phlPp.s......................................tspK.lsKsplLchAlcYIppLp .................................+.t+s.h...hE.R.......p..R.R......p.c........lNpt.......h.p.....p.L.+pl..lPp..s......................................................................................................................spK..h.....s....K..sslLct.AlpYlppL......................................................................... 0 1550 2941 4943 +482 PF00403 HMA Heavy-metal-associated domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.90 0.72 -3.82 73 19664 2009-01-15 18:05:59 2003-04-07 12:59:11 21 116 4700 120 6028 15739 1631 61.20 26 16.34 CHANGED phtl.shsCssCspplppslppl.sGV...pslplshts.pplslph....sss............pp..lhptlp........p...huap .................hplpGM.oC.u..u.Csppl-c.sLp.p........l...s....G..V..............pp.s..p...V....s..l...s...s...p..p.....s..pV.p..h..............sssh...........................pp.....lhpslp..........p....hGa.......................................................... 1 1678 3706 5056 +483 PF00368 HMG-CoA_red HMG-CoA_red1; Hydroxymethylglutaryl-coenzyme A reductase Finn RD anon Prosite Family The HMG-CoA reductases catalyse the conversion of HMG-CoA to mevalonate, which is the rate-limiting step in the synthesis of isoprenoids like cholesterol. Probably because of the critical role of this enzyme in cholesterol homeostasis, mammalian HMG-CoA reductase is heavily regulated at the transcriptional, translational, and post-translational levels [2]. 17.10 17.10 17.80 17.30 16.70 16.40 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.14 0.70 -5.80 116 2199 2009-01-15 18:05:59 2003-04-07 12:59:11 13 21 1868 100 595 1829 689 348.70 33 68.69 CHANGED pculphRRphlp...phsstp..........................................lppls.tshDhp.....................................shspshENhIGhlplPlGlAGP.lhlsGct........ahlPhATTEGuLVASssRGs+slstuG.Gspstlhp-uMoRuPshth.....sltcAtphtpWlp..cshpplcphup.....sTopau+Lpclcs.hl.......sGphlalRFthsTGDAMGhNMlopusEtshphl....pcph..ss....hphlulSGNhssDKKsuslNhlpGRGKpVsAEshlst-llpp.hL...+ssscslsc.lsht+NhlGSAhuGuh...GaNAHhANhlsAlalATGQDsAplsESSpshThh-sp.ss...............-LhhSlThPoltVGTVGGGTtL.ssQptsLclLGlp.ss......sAppLAc.....IlAAsVLAGELSLhuA......Luuscls+uHhpLs..R ..........................................................p......Rhthlt........t..t.p..........................................hp.h.......pths..p.....................................lhs.phhENslGhhtlPlGls.ss.lh..ls.Gpp.........................YhV.PhsT.pEsul...VAusspGuKhlp.t...........u..G....G.hps...............hlhp.c.tM.htps...............slt.....slsc....s.t.p.h.p.thlp......pphpp..ltp..h.ss................hspRtGthpclps..ph................................ts.p..h..l.h..lcht..scTtDAMGtNMlsphhEslhsh.l....pp...h..ts.............tl..huI.uN.aso-....................phVssp.stI.shchl.pc.hh......c.s...scpls..c.lsh.spphstss.htA..h...saNtthhNslsAlhlATGpDhtsl.tu.upuh.st.h.-uphpu.............................tpLhsslolP.l..luTVGGuotl..PhtptsL.clLGst..............sAcpLAp.....lluussLAtphuhhpA......Lsu...stltpuHMpLp................................................... 1 177 339 495 +484 PF02301 HORMA MAD2; HORMA domain Bateman A, Mian N anon [2] Domain The HORMA (for Hop1p, Rev7p and MAD2) domain has been suggested to recognise chromatin states that result from DNA adducts, double stranded breaks or non-attachment to the spindle and acts as an adaptor that recruits other proteins. MAD2 is a spindle checkpoint protein which prevents progression of the cell cycle upon detection of a defect in mitotic spindle integrity. 20.70 20.70 20.90 20.80 20.50 20.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.31 0.70 -4.85 95 975 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 319 45 638 916 11 183.40 20 60.33 CHANGED o.ptStpllpchlphul..............ss...........IhYhRG..........laPscsF..pphchh...............................................s......sspss...plhp.alpp...tlh-hLppp..........hlpplslsIh................ttc......slEpapFslph.............sp................................................................hstpphppplps..........llRpls.shsphL..s.L................ct...shshclhhas...........................s...........-...hpsPh.chsps.................................p...hhss..pphpltshsTshH ............................................................tshthlpchhphul..............ps...........ILY.Ru.................lYPt...psF..pph..ph.h..........................................................................................................hspssplhp..alpp.......tltch.lppt.....................hlppl.slsIh.....................ptp...................slEpapFchph..............tt......................................t...........................................................................t.t.hs..p..plppplps...........ll+p..ls.sts.p.h.L..s.L......................ct.......h..shplhhas............................................s...........s.hpssh...p.tpp.............................hphtthts....................................................................................................................................................................... 1 196 321 493 +485 PF00104 Hormone_recep hormone_rec; Ligand-binding domain of nuclear hormone receptor Sonnhammer ELL, Griffiths-Jones SR, Bateman A anon Prosite Domain This all helical domain is involved in binding the hormone in these receptors. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.13 0.71 -5.20 86 7507 2009-11-03 19:52:29 2003-04-07 12:59:11 25 75 533 1034 3461 7141 2 187.90 19 41.59 CHANGED phhp.ptpppph....phhpthshpphh.phhppp...hhtsscahpp.hstFp.pL.shp-ph.tlLcshahhhhhlphhtpss...........htppphhhssstth.hhphpph......................th..c.lp...........hhpphhp.phtpL.ph....sphEhshhhshlhaph.....stpphp.......chpc..h.scphpptlsssLccYh............pthphsp....................stRlscl..lpllstlp............ph...............hphhpp..................hplscla ....................................................................................................hh.....................h.tphs..phh.sp.t...lht.hlcaAKp.lP.s....Fp...pL...sh..........pD...............Q............l...tL....L.c...s..sh.h.El....h.hl....t.hshp.h...............................................tpst.lh.h...s.s.s.hh...h.s...p.p.phpph.............................................h.hhp.hhp..........................................................................hhh.ph..s....p...php.pL..pl......sppEhs....h.l.p.AllLhss........................s.t..slp...............................pspp.........lcp...lp.cp..h.hps.Lppah....................ptt.t...t..p................................................................t+h..s..cL..lhhls...tl+........ph..........s..h.t...................h...................................................................................... 0 821 1092 2529 +486 PF03241 HpaB 4-hydroxyphenylacetate 3-hydroxylase C terminal Bateman A anon Pfam-B_3148 (release 6.5) Family HpaB Swiss:Q57160 encodes part of the 4-hydroxyphenylacetate 3-hydroxylase from Escherichia coli [2]. HpaB is part of a heterodimeric enzyme that also requires HpaC. The enzyme is NADH-dependent and uses FAD as the redox chromophore. This family also includes PvcC Swiss:O30372 may play a role in one of the proposed hydroxylation steps of pyoverdine chromophore biosynthesis [1]. 25.00 25.00 59.60 31.30 22.50 20.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.80 0.71 -5.48 87 994 2012-10-01 23:33:27 2003-04-07 12:59:11 8 3 749 14 219 738 251 197.20 33 41.46 CHANGED chssthh.ps.uhhphhsapu.sRhul+h-hlhGhutths-ssGs..schptVpp+luEllthtEhhhuhshAsttpuptt...sGshhPsh.hhpss+hhhsphhs+lhcllpcluuuullh..lPSps.Dhpss........-ltshl-+Ylpus.shsuc-Rl+lh+LhhDhssopauuRpphaEhahuGss.th+.htlhpths..hpsttchscphhsh ..................t.hspthh.cs.uhh+hhshQuhsRhuVKhDFlhGlhtpsh-ssGs..scaptVQscLuEllshpphhaAhu..uhsscup.h....hs...........G..salP-hthhpshRhhssphY.s+Ipcllcchsuuullh..LPSps.Dhp.sP........plsthLsKYl+Gstuhst.cRlKlh+LhWDhhuSpFGuRa.plYEhpauGs.pph+.hph.hpphp...tssphchhhshsp........................ 0 82 138 182 +487 PF02185 HR1 Hr1 repeat SMART anon Alignment kindly provided by SMART Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.42 0.72 -4.23 106 1359 2009-01-15 18:05:59 2003-04-07 12:59:11 11 60 288 5 738 1202 2 69.40 23 12.45 CHANGED ptlpplpcclphEhpl+pGuEshh+h......sssppp...........hppspsplpcopp+lphL+ppLcchptpt.stpsss .............h.hhpplp+clphEhpl+pGAENhhchh......ss-++t......................htpsptplpcSpp+..lphL+tpLpc.hptph...p...t................... 0 142 264 477 +488 PF00570 HRDC HRDC domain Bateman A anon Medline:98060076 Domain The HRDC (Helicase and RNase D C-terminal) domain has a putative role in nucleic acid binding. Mutations in the HRDC domain cause human disease. It is interesting to note that the RecQ helicase in Deinococcus radiodurans has three tandem HRDC domains [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.84 0.72 -4.26 147 6087 2012-10-03 03:05:55 2003-04-07 12:59:11 18 73 3520 22 1677 4778 961 67.30 27 11.64 CHANGED phslhcpLpphRcphA.ccp..s.lsshhlhs-psLhplApt.hPpshpcLtpl...pGluptplc.caGpp..hlphl ......................tlhptLtphRcph.A.ccp.......s..lsshh.....lhsDpsLhphA..c......t..h....Pp....o....hs-lhpl....pGlu.tp.+lc..+aGpt..hltll................................ 0 541 1053 1412 +489 PF03878 YIF1 Hrf1; YIF1 Wood V, Bateman A anon Wood V Family YIF1 (Yip1 interacting factor) is an integral membrane protein that is required for membrane fusion of ER derived vesicles [1]. It also plays a role in the biogenesis of ER derived COPII transport vesicles [2]. 25.00 25.00 27.70 27.00 21.90 24.90 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.54 0.70 -5.22 6 451 2012-10-01 22:34:14 2003-04-07 12:59:11 10 7 302 0 283 437 3 221.80 34 73.12 CHANGED hltsshss.huhtYGpslsspGpEhVcpshs+aluss+L+YYFsVsssYVspKLtLllFPahHps...............Wphphc.......p-s.lPPR.DlNAPDLYIPhMAFlTYlLlAGhtLGlQc+FoPEhLGlpASoALAalhlEllhlpLuLYLlslp..oshsslDLLAasGYKaVGlILusLssllah...uYYllhsassluhuhFllRoL+hslL...usssstshsshs...........tppp++hYhLhhlAAsQ.llhaWLo ......................................h...t.h...huhtaG.pshhspup-hhpppl.s..+al..s.s.....s.....t..L+aYFsVsstYVhpKLtL.llFPa.hHc.s.................................................Wphphp..........................ptt.hsP.R.D..lN.......u........P....DLYIP...................hMuhlTYlLluu....hhhGh.......p.......s............p....Fs.P.EhLuhtsopuls...hhhh.EllhlpluhYll.s..lp......ssh.shlD..LlA.asGYKaV...u.h.lls.l.ls.s.h.l......h......s.....t......hsa.....a.....h..s.hhashhuhshFl.........l.........RoL+hhll..........s.sss.t.t......t....................ttpph+hhhlhhhu.hh.Q.hhhhWL.......................................................................... 0 99 159 232 +490 PF02793 HRM Hormone receptor domain Bateman A anon Bateman A Family This extracellular domain contains four conserved cysteines that probably for disulphide bridges. The domain is found in a variety of hormone receptors. It may be a ligand binding domain. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.80 0.72 -3.96 131 2035 2009-01-15 18:05:59 2003-04-07 12:59:11 17 119 140 37 924 1697 0 64.90 28 8.33 CHANGED s.hCstsaDsh.hCWPpsssGphsshsCPphh...t.pt.........................pu.....pspRpCsts....GtWsph...........sshspCtt. .............................t..hCstpaD.th..hCWP.potsGph.s.thsCPp.hh.......t.......................................................pG......pshRpC.sts......G....Wsth...........s.sshspCt..................................................................... 0 149 213 503 +492 PF01381 HTH_3 Helix-turn-helix Bateman A anon SCOP Domain This large family of DNA binding helix-turn helix proteins includes Cro Swiss:P03036 and CI Swiss:P03034. Within the protein Swiss:Q5F9C2, the full protein fold incorporates a helix-turn-helix motif, but the function of this member is unlikely to be that of a DNA-binding regulator, the function of most other members, so is not necessarily characteristic of the whole family [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.58 0.72 -4.12 175 36941 2012-10-04 14:01:11 2003-04-07 12:59:11 17 294 5111 176 7651 48830 5885 53.20 22 32.31 CHANGED l+phRp..ptshop..pplActhG..lspstlsphEpG....pppsshptlhpluchls.lshshl .............................lpthRp....pp..s..l.oQ........pcL..A.c.t.hG............lo.p..s.s....l....u....p..h....E......pG........pp...p.P...s....h....p..h....l...h...p.l...uphhs.hsht.................................... 0 2481 4888 6400 +493 PF01402 RHH_1 Ribbon-helix-helix protein, copG family Bateman A anon Bateman A Domain The structure of this protein repressor, which is the shortest reported to date and the first isolated from a plasmid, has a homodimeric ribbon-helix-helix arrangement [2]. The helix-turn-helix-like structure is involved in dimerisation and not DNA binding as might have been expected [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.46 0.73 -7.61 0.73 -3.86 49 5913 2012-10-02 18:44:02 2003-04-07 12:59:11 16 35 2497 74 1378 3973 439 38.40 24 37.71 CHANGED plolplscchhcpLcchuccts.hS+Sphl+tAlpphhpp .............lslplsp-hhcpLcphu.......p..p...p...s..ho+.S.cll+chlpphl............. 1 404 897 1169 +494 PF02954 HTH_8 Bacterial regulatory protein, Fis family Griffiths-Jones SR anon Structural domain Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.63 0.72 -4.41 86 19848 2012-10-04 14:01:11 2003-04-07 12:59:11 14 176 2830 54 5558 15626 3713 41.30 31 9.21 CHANGED tlcc..lEcphltpALcpsssshscAAch.LGloRpoLth+l+ch ........................tp.hE+phIppsL..p...p...sp...G...s......h...s...c....A....Ach.LGl.s...RsTLhcKlcch................ 0 1884 3516 4654 +495 PF04005 Hus1 Hus1-like protein Wood V, Bateman A anon Pfam-B_12502 (release 7.3) Family Hus1, Rad1, and Rad9 are three evolutionarily conserved proteins required for checkpoint control in fission yeast. These proteins are known to form a stable complex in vivo [1]. Hus1-Rad1-Rad9 complex may form a PCNA-like ring structure, and could function as a sliding clamp during checkpoint control. 20.60 20.60 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.81 0.70 -5.47 34 342 2012-10-02 11:47:48 2003-04-07 12:59:11 7 8 284 3 231 353 3 286.70 24 90.95 CHANGED M+F+splss...hpphpcllsslupluKhChl+LsssplhFll.t........sssGsQlWsplphss...lF-..pYp.lpSs.sp..spIsLEl.slssLhRAL+oups..............us..s.........................lpl+Ls++............tphPhLslphpssoh.........................................................tppslsp..-lPVclL.sppphpplpEPph.-sDlp...IhLP.sLhpL+sll-+h+pl................................ushlplpAs...........hpGpLp..........................Lslpo-.hlsloopa.psLtsstht.t..............................t.t.tttssspt.sptthsplpVDhKchsphLp.stplssspslhslsccpslhlhhhh.......pcsslpaalPuhs ..........................................M+F+s.l.s.........hphhpphhsslspL....t......K..h.s........h......l+lsspplphhht...............sssusplWs..pl.t..h.pt...............lFp..pap.hp..us..st...................NpIhlEl.shpsLtRAL+oups.......................................up......s...........................................hpl+Ls++..........................tphP.h.Lslshp.h.sh..................................................................tp.pphlsp...-lPVcll..p...pth...p.plp......p.Ph..s.s..p........s...-lp.....l.h.LP....s........Lt........p........l+..sll-+hcpl.........................................us.hl........lpAs...............hpG..pLp........................................Ltl.po-....h....hplsoha.ps.Lts....h........................................................................t..ttt....p..p.....t.tthspl..plch+ch.phLt....stph.p....spslhs....................lhppp...hhhh..hhh..................pss.lphhls...s.................................................................................................................................... 0 77 128 192 +496 PF03810 IBN_N IBN_NT; Importin-beta N-terminal domain Griffiths-Jones SR anon PROSITE Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.70 0.72 -4.06 52 2930 2012-10-11 20:00:58 2003-04-07 12:59:11 14 77 330 59 2037 2826 22 72.40 20 7.63 CHANGED AEptLpphppp..Psah.tlhpll.p..ps.s.psR...hhAulhL+shlpp+..Wp..................plstpp+ptI+spllphlhp ..........................................ApptLp.phpp....p...s......s..hh..hLh......pl..........l............s.......p.............s................s......................t..........s.R.............hh...A...ul.hL+sh..l....p..pp....Wp...................................t.lsppp+t.tI+ptllphlh.p......................................................... 1 655 1084 1648 +497 PF01485 IBR IBR domain Bateman A, Mistry J anon [1] Family The IBR (In Between Ring fingers) domain is often found to occur between pairs of ring fingers (Pfam:PF00097). This domain has also been called the C6HC domain and DRIL (for double RING finger linked) domain [2]. Proteins that contain two Ring fingers and an IBR domain (these proteins are also termed RBR family proteins) are thought to exist in all eukaryotic organisms. RBR family members play roles in protein quality control and can indirectly regulate transcription [3]. Evidence suggests that RBR proteins are often parts of cullin-containing ubiquitin ligase complexes. The ubiquitin ligase Parkin is an RBR family protein whose mutations are involved in forms of familial Parkinson's disease [3][4]. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.68 0.72 -4.10 166 4521 2012-10-06 20:19:25 2003-04-07 12:59:11 16 133 321 4 3180 4384 29 66.80 23 18.68 CHANGED p+...a.ph...................tppph.........p..aCPs.ssCpthlthpptspp..................................................hpCs......Ct.tpaChpCppth....Ht..........sh..sC ..............................................................................................t.ph...........................+....hC.Pp........Cp.tsl..pt..s.s.sssp..............................................................................................................hpCs..t...........Ct..tpF.Ca..h..Ct..t.t.h......Ht..................................................... 0 984 1588 2459 +498 PF01614 IclR Bacterial transcriptional regulator Bashton M, Bateman A anon Pfam-B_755 (release 4.1) Family This family of bacterial transcriptional regulators includes the glycerol operon regulatory protein and acetate operon repressor both of which are members of the iclR family. These proteins have a Helix-Turn-Helix motif at the N-terminus. However this family covers the C-terminal region that may bind to the regulatory substrate (unpublished observation, Bateman A.). 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.35 0.71 -4.45 23 10733 2012-10-02 14:34:25 2003-04-07 12:59:11 13 33 2457 47 2964 8004 908 124.00 23 46.91 CHANGED tphpss.tlGp+lPlauouhGKsLLAah.sccthpphlcp..hphpphTptTlos.psLhcpLsplRpp.GauhssEEpptGltulAAPlast...puplluAlSlossssRhscpphpp...hhshlppsApplopp .............................................................................h....h..tlGp+.hP.h..a.s.o.uhG+slL...Ahh...sp....p.p...h.p.p.h...l.pp..............t.h...p....t...h......o.......t....p..........o.....l...s......s...............t.......t...L....h.....p.......p......L....t......p.......lR.......c....p......Ga.u......h...........s...........p..........p......E..h..p............G.....l.p.....slAs....P.l...h..st..........pG.p...sl...u...Al......olu............u....s...s..t...R..h...sp..p..p.hpp..........hhs.hlhpsAppls..t...................................................... 0 689 1722 2433 +499 PF04760 IF2_N Translation initiation factor IF-2, N-terminal region Kerrison ND, Laursen BS, Studholme DJ anon DOMO:DM04974; Family This conserved feature at the N-terminus of bacterial translation initiation factor IF2 has recently had its structure solved. It shows structural similarity to the tRNA anticodon Stem Contact Fold domains of the methionyl-tRNA and glutaminyl-tRNA synthetases, and a similar fold is also found in the B5 domain of the phenylalanine-tRNA synthetase. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.33 0.72 -4.31 141 7695 2012-10-04 14:01:11 2003-04-07 12:59:11 10 48 4486 1 1683 5556 2986 52.00 26 10.04 CHANGED .splpVp-LAccls.....h.ssp-llcpL.p.h....Glt..tshsssl-p-psphltpphshp .........pplsVtELAcchs.......h.pss-llcpLhp.h........Ghh...sohsps.l-p-.s.hphlspchsh........................... 0 547 1098 1438 +500 PF05198 IF3_N Translation initiation factor IF-3, N-terminal domain Bateman A, Finn RD anon Pfam-B_629 (release 2.1) Domain \N 21.30 21.30 21.40 21.30 21.20 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.11 0.72 -4.05 181 4308 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 4162 1 1055 2633 2165 67.60 52 37.87 CHANGED ppp...plN-cI+.s..pcVRLl.spcGcplG..................llshp-ALc.hAc-tsLDLVclu.........PsAcPPVCKIMDYGKa+Y-ppK....Kp+Eu+ ......................hNpt.Ip.......plRll....s..p.Gcp.lG..................................lhshp-ALp.hAcct.sLDLVclu...........Ps..Ac.P.PVC+IM...D...YGKF..+YEppK....Kp+Et+....................... 1 369 701 898 +501 PF05004 IFRD Interferon-related developmental regulator (IFRD) Moxon SJ anon Pfam-B_4730 (release 7.6) Family Interferon-related developmental regulator (IFRD1) is the human homologue of the rat early response protein PC4 and its murine homologue TIS7 [1]. The exact function of IFRD1 is unknown but it has been shown that PC4 is necessary to muscle differentiation and that it might have a role in signal transduction. This family also contains IFRD2 and its murine equivalent SKMc15 which are highly expressed soon after gastrulation and in the hepatic primordium, suggesting an involvement in early hematopoiesis [2]. 25.00 25.00 25.20 25.60 24.40 24.50 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.98 0.70 -5.34 7 396 2012-10-11 20:00:58 2003-04-07 12:59:11 8 7 219 0 250 334 1 275.60 27 66.11 CHANGED SD--us.pohS....s.s..ssuu.s-.hss.ht-thtppclE-pLcptlDtsp-KS...ApsR.psLctlphhl.s+hh.-Fl.-chhTLh-hlp+sh+KG+u.cEphLusplhuLlslQhGsG.cs-Elhcphts.LpsllpDu.otuhpsRtpCsoslulhsalussDspplht..tshc.lFthsa.h+sssss.Vlsst...slhusuL.AWuLLL......Tlsss.ph..phhppphstLsp......LLpusslslRlAAGEslAllaEhup............sh-p-.........F..hh-...shEpL..........sphLRpLAoDS.sKh+AK+D++tQRssFRDVlchlE .............................................................................................s.......t......t.p...............t...h...ptst...p...pph.p........t...Lpthl-th..h-Kp......sp.sRpsuLpulhph....ht.s+..h.h..........-...hl...c...+hhT.Lhcsht+sl..........K+.Gp......u...c.....E........p..t.h.A.hthhsllslph.s.s.....p.................sc.clhc..p........hts.hL.................pp..hlp.Ds....sss.hp.....s...........ss..hp..sLulssal..us.s..-...p-h....pshc.ha.h.h..hp..tss.p.........h..sst....tsslhsuA.lpuW.ulLl.........................Thh......s.s....plp....p......h....h........cpt........h....t.t..Lss..........lL.p.u.s..-hs....lRlAAGEsl.....ALl..aEhup..........................................s.pp-........................................h..hh.p......shptL....................................hphlppLAs....-......u..sKtt.uKpD++p.Q.RusFRsllphlE...................................................... 0 64 125 193 +502 PF00817 IMS impB/mucB/samB family Bateman A anon Pfam-B_1349 (release 2.1) Family These proteins are involved in UV protection (Swiss). 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.70 0.71 -4.41 113 9024 2009-01-15 18:05:59 2003-04-07 12:59:11 15 52 4250 224 2364 7309 2792 150.20 30 34.04 CHANGED lDhDsFaAosE...thhp.......P..pL..cscPlsVssttt.................................................uhlhsssYtARp.hGl+su.MslhpAtc..l...............C...P.p.l..........lll..................................ssshshYpchSpplh.pllpch.s............lchhS.lDEsal........Dlos..h.phhss...............................................................................................................pplupplccplhcph.t..lssSlGlusschlAKlAoc ...............................................................................................lDhcsFaAulE..........hhpp............P......pL...pspPl..sVuus.t.p...........................................................................................tGllh.sss.h........p.A.+.p..hGl.+.su.hs.h.h.p...Ahc...h....................................s........P...p..l..................................hhl............................................................................................................................sss.h.phYtc..hS.p.plh.plh..pc.a..o......sh........................lEs.h.S..l....DE.ual..............DlTs........shp..hh.s.s.....................................................................................................................................................................................................................................................................tp.l.u.ppl+ppl.h..p.c.h..t.....lssos..G.l.u.s..s..KhLAKlAs................................................................................................................................................................................... 0 760 1436 1969 +503 PF04836 IFRD_C Interferon-related protein conserved region Waterfield DI, Finn RD anon Pfam-B_4453 (release 7.6) Family Family of proteins thought to be involved in regulating gene activity in the proliferative and/or differentiative pathways induced by NGF [1]. 20.30 20.30 21.30 20.30 19.20 20.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.71 0.72 -3.79 13 196 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 99 0 108 166 1 54.50 53 13.18 CHANGED NEhLR-lF-LGPslh...sssh+shKlo+hE+HhhNuAAFKARTpsRuKhRDKRusV ........NEhLR-lFpLGPslh...sssshKshKlSRhE+HLaN......uAAFKARTKuRuKsRDKRuDl.... 1 24 40 71 +504 PF04762 IKI3 IKI3 family Wood V, Bateman A anon Wood V Family Members of this family are components of the elongator multi-subunit component of a novel RNA polymerase II holoenzyme for transcriptional elongation [1]. This region contains WD40 like repeats. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 928 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.64 0.70 -13.77 0.70 -6.74 34 404 2012-10-05 17:30:42 2003-04-07 12:59:11 7 9 257 0 291 526 10 666.90 27 69.81 CHANGED MRNLpslspsphp.tsps.........hslhso..saDsso-slhhshu...ssps.sslElpphtpss........th.thlssa....................sclluhpahsD.......spplshshssGDllhsp.t.............ssspshlEIVGol-sGIpAusWSPDEEllAlsTtp.............psllhM.o+pFEslsEhsLs..s-Dlph..SpaVoVGWGKKETQF+G+t..uKA..................................h+DPThs.pl-pGtlot.Ds.tpssISWRGDGpYFAVool..css...........RRslRVasRE.Gp.L-SsSE.sVDGL...........EpsLuW+PpGsLIAusQ..R.pscp............h-lVFFERNGLRHGEFsL+h..st-Etl.....................hpLpWNuDSslLAl..hh.........p-c.............lQLWThsNYHWYLKQEl.h..t...t.............hpWcsE..+shplhssss..spl.hh-asapsspussh.stD.Gh......ssVIDGpplhlTPhphusVPPPMuhtclphs..........sslh-lAhutsspp............hAslspcs.lhhht..shpth..............................stpPhltsphsh.........t.tst.tph+plsalscs....plhllhsss.........................hsclhlhph.t-spp............hhhpphsphppllhhhspsshpt....hhhpsh.sGplh...........plssptphp.h.............phPphs.phclsthpsst................hhhhGL.ossG+LaAssphlu........sssTShhl..........T.................................psaLL.................aTTspch......................................LpFlcLs..............phpthtls.ssst.......DER....sRpIERGS+LVslhPoc.huVVLQhPRGNLETIaPRhhVLuslRph..lc....pccY+cAFhsCRopRlDlNIlaD.asP-hFhpNlth....FlcQlpclsalsLFlS..sLp-EDVTp..ThY+-sh...................................hp..ttttthhtsspsoKVNplC-AlLpsL........sphhspalpsIlTuasppsPPsLcsuLplltpLpt.......................sstps-pAlcalsFLsDVNpLYcsALGlYDlcLsLhVAQpSQ+...DP+EYLPFLppLpphsphc.R+FpIDcaLp+apKALpaLtphs ....................................................................................................................................................................................................h...........................................................................................lhsh..h.t..........hhhh.ttGplhhh................................tt..h-hhs..htsGl.shtWSsDt-hlhhhTt.t....................tpllhh.s.pta-sl.....-..ht...pt.t..........tt.lshsa...Gpp.pTQFpGp..hp.............................................................ttspl.oWR.GD..utahul.s...h....s...................................................hRhh+la..s.....R......-.....h.........L...pss.....u...E...h.....ssh..........................................t....sl..sW+Pp..G..s..hIAshp...p..h.pp...............plsFaE.+.N.GL.h.Hup.Fs..l.............tp...h......................t.l.Ws..ssSplLAl...h.............tsh...........................................lplWs.h..t.NY.Ha....YLKppl...h.................................hhWc......pshphhhh.....st...............................th............hphhathst..u.....................p.u......................shVlDG...pplhlTs....hp.s.............lPPPhs.hph.h................t.s.h..tlsh..p.........................hshh.tpt.l.hh..............................................................................h............................h..hhh.ttp.....hhhh....t................................h.....................................................t.....h..s....t............hhhp.........tuphh.................................t.........................t..s...h...hth.h.httt......................hhhul.s.ppttha.h.....s..t............ls....................pshoSh.h..........h............................................tall.....hTs.....p.p.................................................................h.hh.lt................................h........t.........................t....................hRt.lERGuhllsshspp.htllLQhsRGNLEslh.RhhVLs.lt.p...lt....th.....atpAh.hhRp.+lshsllh..D.a.tsp...Fht.p..ht......Flpp........l........tp.......p..hls.Fhs...................pL.........pppshst..shY...h...............................................................tpKlshlCpthht.hh.........................tp.h.sllTsasppt.s..t.lp.sLthltpht..........................................................t...t-pu...lpal.hhL..-sptlap.uLuhY-hpLshhlAppuQh.............DP+EYLPaLpp...Lpt...................................h.p.....pa.ID.hLtpatpAltpL.t.............................................................................................................................................. 0 106 165 244 +505 PF00478 IMPDH IMP dehydrogenase / GMP reductase domain Finn RD, Bateman A anon Prosite Domain This family is involved in biosynthesis of guanosine nucleotide. Members of this family contain a TIM barrel structure. In the inosine monophosphate dehydrogenases 2 CBS domains Pfam:PF00571 are inserted in the TIM barrel [2]. This family is a member of the common phosphate binding site TIM barrel family. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null --hand HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.10 0.70 -5.56 84 7826 2012-10-03 05:58:16 2003-04-07 12:59:11 20 15 4695 86 1800 6840 6242 406.30 38 94.26 CHANGED cuLTF-DVLLlPstotlh.scsclshp.....lo....+plpLslPllS.AsMDTVTEt.cMAIuhAptGGlGl.IH+.Nhohc.pQsptVccVK+.h-sshlpsshhhtPptpltps.thhtphthsuhslspttt.hhhlht....h.................................h.ppphlss..shshtps.phh.pp+htcl.lVscpspLhullohpDh.+sppaPpAs+....DppspLhluAAlGststshcRhpt..Lsc..A..Gl..DslllDoAHGaSpthlchl+tlKppaP..plplIAGNVsTs-uscs.LIpAG.ADulKVGlGPGSICTTRhVsGVGhP..QlTAlhcsAcsAcph......G....lPlIADGGIchsGDIsKAlAuGAssVMlGShhAGTpEuPGEhhh.hsG+paKpYRGMGSlsAMpc............GSscRYhpts......tpchVspGlpGtVPaKGslpchlhplhuGL+.uuhsYsGupslp-L+cps.......alRlosAth.EupsH ............................................................................................................................................................................................................................................................................p.uLTF-DVLLlPu.c.......Sphlssps-lsop..........................ls....cs..l.pls.l...P.l.l.o.As.MD......T.V.T....-s..chA...IAhAp..p.G.GlGl.....l......H......+...s......h......o...h...p......pQ.A...p....p....V..ccVKc..csshh.ss.hh..p..h.th..h.tt.thsshsl.tt.....hh....................................................................hhh..s.t.......h.t.ch.ph..hstt..h.shlhhpch.p..phPp.us+.....D.p.pu.+..L..h..V...u..A...A..l.G..s.........s...s........s.....s.........h...-..R....s....c..t.........L.l.c....A....G.....l........D....h.....l.....l.............l............D...........o.....A.......H...........G...H...........S.......p..t...........V.......l.......p.........h...........l.....p.......c......l...+..p...p..a..P...........p......l.p...l...I...A.GN...Vu...Ts.-u.scs....L...h.c.A.............G..A.Du......V.K.V.G.I.G...P....G....S....I.C.T..T..R......l...........l......s.G..........V.....G.s.P....Ql.o.A...lh.-.s...A....p....A.....A..+.t..h...............s........h.PlI..ADGGI+huGD.l..uKA..l...A..u.GAs....s.V.MlG..S....h....h.....AGs.-Eo.P.G.....-h..hh....h...................pG.........+.p...a.....K...p.......Y.hGMGShsA.Mpp................................GutsRYFpts........................tpKh..VsEGlcu.p....Vs.a....+Gslpshl..hp..hhGGLR.S.uhuYsGutslp-Lppps........pFlclss.uthtEut......................................................................................................... 0 567 1087 1490 +506 PF03941 INCENP_ARK-bind Inner centromere protein, ARK binding region Wood V, Griffiths-Jones SR anon Pfam-B_67765 (release 7.2) Family This region of the inner centromere protein has been found to be necessary and sufficient for binding to aurora-related kinase. This interaction has been implicated in the coordination of chromosome segregation with cell division in yeast. 20.60 20.60 20.60 21.00 20.20 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.75 0.72 -4.12 20 272 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 207 13 206 269 2 57.30 33 6.18 CHANGED -IsSDs-s--sst.....hslsuWAcuspL.cptlhcQtth...DssplFGsIsPLclEEIFps ..........................-ltSDsps-D-sp.............p.p..slP..sWAc..uspL.pptlhpQtth.....-spplF.G.s...l.....s..hphE-lFt...................... 0 68 112 169 +507 PF04179 Init_tRNA_PT Initiator tRNA phosphoribosyl transferase Wood V, Finn RD anon Pfam-B_16986 (release 7.3); Family This enzyme (EC:2.4.2.-) modifies exclusively the initiator tRNA in position 64 using 5'-phosphoribosyl-1'-pyrophosphate as the modification donor. As the initiator tRNA participates both in the initiation and elongation of translation, the 2'-O-ribosyl phosphate modification discriminates the initiator tRNAs from the elongator tRNAs [1]. 30.00 30.00 41.80 33.60 28.10 28.90 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.82 0.70 -5.62 20 202 2012-10-02 20:12:17 2003-04-07 12:59:11 7 6 165 0 148 203 4 381.70 35 89.56 CHANGED pL++ssLSlpNRLpSIhpDupFVcc.Vssta.pLPLVANERCGtWYlsPchcusSuYFKSTDGHTuQWsFShRRLNLHLLsllscpsGhlIVDSTRRGKhMPDALSKTlPIWCAVlNpsla.spsp.............................phLhhPPphVusSEcspItp+.lstFVppLppL.tlDlppLpppl......pKPlRPhWh...............sscshhpssh.p.ptpsahsllLCTuS++sp..............suphpc.......sG..YlQGAuDDcEtWAp......GLTPslFW........sspppL..lsp.........uE--LsshlspLlppppppssspsst...lts............................ssslhlGthss...................tspashVlshssp.h.t.......psppppsppLphshtuuKhGS+pLRptLsplpsFlptphus................spplllsC-oG..+DlSsGlhLsllChaaspchp.........................hpt.......t.splsKphl+ppLshlhp.h...plNPSRuTLpSVNuaLM ........................................l++psholhNRLpSI.tDutFlpp.lttha...t.................h......PLluN.RCG.WYhsPpt..h..t..s.o....sYFKSTDGHhspWsFShRRLNlpll.h......h....tpps.Gh.....llVDSTR+.G.KthPDALSKTlPIWssVlNpsl.h.pt...t......................................................p.lhh..Ps.hlstoEcspIpp+.lstastplp.......ph...ths...h.tpLtttl......tKPlRPhWh.....................tsps..h....s...p........t.....tt....sahsllhsoASp..psp...............ss.t...pc.........hu..YlQGAuDDpE.Wuh......GLoPslFW........tptp.L..hss.........scppl.sp.hl..tplltpppttts.ttt......lt..............................................................sppl..lsthts....................................t......hs.h.ll.hstp......................s....pt.thlph.h..s.ssKhupp.....t.Lcp.Lsph.thhtt.htt.................................t.tlllh..C....p..sG..p.Dh..SlushLsllshh...as.p.ph..........................h.t........t...h.s....K....l..+p+Lshlhp.h....pspPSRssLppVpsaL................................................................................................................................................... 0 49 89 127 +508 PF02022 Integrase_Zn Integrase Zinc binding domain Bateman A anon PSI-BLAST 1wjb Domain Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. This domain is the amino-terminal domain zinc binding domain. The central domain is the catalytic domain Pfam:PF00665. The carboxyl terminal domain is a DNA binding domain Pfam:PF00552. 21.00 21.00 21.50 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.85 0.72 -4.51 92 14892 2009-01-15 18:05:59 2003-04-07 12:59:11 14 72 180 53 30 13552 0 39.60 77 6.93 CHANGED ApE-Hc+aHsNh+sLppcFslPthVA+cIVppCspCphpG ........AQEEHEKYHSNWRAMASDFNLPPlVAKEIVASCDKCQLKG..... 0 10 11 12 +509 PF00520 Ion_trans ion_trans; Ion transport protein Finn RD anon Pfam-B_33 (release 1.0) Family This family contains Sodium, Potassium, Calcium ion channels. This family is 6 transmembrane helices in which the last two helices flank a loop which determines ion selectivity. In some sub-families (e.g. Na channels) the domain is repeated four times, whereas in others (e.g. K channels) the protein forms as a tetramer in the membrane. A bacterial structure of the protein is known for the last two helices but is not the Pfam family due to it lacking the first four helices 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.53 0.71 -5.07 285 20399 2012-10-03 11:11:44 2003-04-07 12:59:11 26 434 1652 37 9781 20841 2116 196.60 17 31.06 CHANGED hhshlhsslFshEhll+hhuhsh......................hpYhpssaNhhDhlsllsshlshhht.ht.........................................................hthlchhRhh...R...hl+lhphhps....lptl.....ltslh.pshtslhplhllhhhhhhlaulhuhplatstht.t.................................................................................................................ptpssFcshstuhhhlapshTstuasslhhsh.............................shhshlahhhhhhlsshhllNlhlull ..........................................................................................................................................................................................hphhhhhhFs.h.Eh..h..l.....+.h..hshs..................................................................................ht.ah...p...s.....h....a.s....h..h....D...h..l....h...l...h...h...s....h.l..s.h..h.h.t.ht.............................................................................................................hp.hl.R.h.h.R.hh....R...............hl.....+..l....h..p..h...h....p...t.........................h..p..hl.................l......s..l..h....p......s...............h.......t...s.........l...h.....p.....l..h........l..l.........h....h...h....h...h.h........l...a........u....l..h..u..h...t....l....a...t...t...p...h....t.................................................................................................................................................................................................................................t..t..t....p.F...s...s...h......h....t..uh...........h.h...l.........h.........p.............h...............h.T..s...t....u....a.s..s..l.h..t.............................................................................s.h.h.s.h...l..ah....hsh....hh..l..s.s...h....h..hl.sl.hlul.......................................................................................................................................................................... 2 2792 3895 6681 +510 PF03770 IPK Inositol polyphosphate kinase Finn RD anon Pfam-B_1382 (release 7.0) Family ArgRIII has has been demonstrated to be an inositol polyphosphate kinase [1]. 22.20 22.20 23.50 22.30 22.10 22.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.31 0.71 -4.67 109 1104 2009-01-15 18:05:59 2003-04-07 12:59:11 11 19 318 15 725 1054 13 210.70 27 40.30 CHANGED allLEsLstsappP...CVhDlKhGsRpas.-....As................................pKtpp.pp+sppoTStpLGhRlsGhpla.p......................................ppst..hh.....phsKhaGR.slstp.phpculppFltss............thppttthht.....thlp+Lpplpshlcppct....aRhYuSSLLhlYDuc...................................................................................................................................................................t..spscl+hIDFA+.ssh................hsssshDcGaLhGLcsLlphhp .....................................................................................................................................alhLEsls..t..t.......ap..pP...ClhDlKhGsRpasp-...ss.................................................................................................................................pKhpp..t.+pptooossLGhRlsGhpl...ht................................................................................................tpst.....h.......hhs.K........a.G+..s....h.s.hp..ph.p.p.s.lpp.Fht...................................tp.l..ht.........................hlp+Lpplpp...hlcppp....................achhuSSLL..hlYDup..................................................................................................................................................................................................................................................................................................................................................................................................................................................ttsplphIDFA+sh...................................................h.tts.Dc...GalhGLcsLlplh................................................................................................................................................................................................................................................................................................................................................. 0 250 366 558 +511 PF00612 IQ IQ calmodulin-binding motif Ponting C, Schultz J, Bork P anon SMART Motif Calmodulin-binding motif. 20.30 11.10 20.30 11.10 20.20 11.00 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.00 0.74 -6.85 0.74 -3.79 345 16654 2009-01-15 18:05:59 2003-04-07 12:59:11 22 705 467 44 8271 15224 358 20.70 32 5.40 CHANGED ppuslhlQshhRuahsR+pap .........pAAlhIQuhaRGahsR+ph........... 0 2330 3800 5778 +512 PF01007 IRK Inward rectifier potassium channel Finn RD, Bateman A anon Pfam-B_18 (release 3.0) Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.12 0.70 -5.65 13 1452 2012-10-03 11:11:44 2003-04-07 12:59:11 15 11 294 52 764 1256 32 284.90 37 78.63 CHANGED VpKsGpCNVpasNlptc.ptRYLsDlFTTlVDlKWRapLllFshuFlsSWLhFGhlaWLIAahHGDLpt..................ssstTP......CVtsVpuFsuAFLFSlETQTTIGYGaRslTEECPlAIhlllhQuIlGsIIsuFMlGshhAKhApPKKRAETLhFScpAVIuhRDGKLCLMaRVGsLRKSHlVpAplRupLl+s+pTpEGEhlPLcQhDlsVshDsGs.DplFLVoPlTIsH.IDcsSPLa-luppsL.pppDFEIVVILEGhVEoTuhTsQARoSYlscEILWGHRFpPVloh..EcGhYcVDYSpFcpThEVs....TPtCSA+-LsEpK......Lhpshp..........sh-sph ......................................................................................................htKsGpsNl..h.t..p...h...t...p...................h.a..ltDl..aTThlD...h.p.W.R.a.hh.lFshsahho..W.hh.Fu...hh....aahlA...hh+G..D....l......................................s..h..ps.......C.l...p.l...p..u...FsuAFLFSlETpsTIGY.Gh....R...h.lT.-.cCP.ulhll.l.h..QsllGh.llsuFh............h..GshhsKhupP+...+..RA.p..Tlh.....FScpAV...............I...u...hR......c........G...+.L..CL....MhR..V..u..s......hR.......p..Sh..ll..p..upl..+s.p.L.l...+...s.p.......T.....E.G..E.....h..l....l.pQ.h-.l.s................l..sh.s.......t...u................c.....p..lFLl.P..l.....slh.H..ID.ppSP......lash.......s.t.p.....s.........l.....t.p.p..c..FE...........ll...............VhL-GhsE..uT....u.........h.o.s.QsRoSY.l........sp..E..IhW..............GaRF.s.ll....................c......p..s..hY..p...l.Dasp..Fp..psh.ps.......sP.......h...s.hh.............................t............................................... 1 170 277 492 +513 PF02174 IRS PTB domain (IRS-1 type) Bateman A, Mian N anon IPR002404 Domain \N 21.00 21.00 21.10 21.10 20.60 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.14 0.72 -4.40 32 923 2009-01-15 18:05:59 2003-04-07 12:59:11 12 28 105 45 552 834 1 96.60 28 10.89 CHANGED ptstF.V.lp..........hptsupcssl.pGsh.hLplopcsLhL............hpsppshlpWPhptLRRa........Gp.s..pshFoFEuGR+CsoGtG.asFpspc...upp..laphlppshps..pp ...................................t....a.V...............h.....s.....pt.sh..hG.s.h...hLplTp..c.......slhl.................sptpsc.psh..hpWsLpslRR..a.............Gt..s..sshFohEsG.Rp..sssG...Gh.a.thpssc......upp...I..aphltphhpt...t......................... 0 117 160 311 +514 PF02922 CBM_48 isoamylase_N; Isoamylase_N; Carbohydrate-binding module 48 (Isoamylase N-terminal domain) Griffiths-Jones SR anon Structural domain Domain This domain is found in a range of enzymes that act on branched substrates - isoamylase, pullulanase and branching enzyme. This family also contains the beta subunit of 5' AMP activated kinase. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.28 0.72 -3.94 59 8560 2012-10-02 20:10:03 2003-04-07 12:59:11 13 125 3303 51 2144 6979 701 85.90 27 11.95 CHANGED sLGuph...........sspFplWuPsAppVplhh.hss.....h.tpphshp.....tpsGlWplhls.......sht.......t.hYtaclpsss......................G.....hhhhhDPYA ...........................hGuphpst............GspFsl.WA...P.s...A.p.pV..pls...s.hss..................ht..t.p..p...hs.hp............tpssGlWc.h.hls...........slt.G......................t.hYpYclpsst.......................G..........hhhhhDPaA............................................................................... 0 684 1346 1804 +515 PF00857 Isochorismatase Isochorismatase family Bateman A, Griffiths-Jones SR anon Pfam-B_566 (release 3.0) Family This family are hydrolase enzymes. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.87 0.71 -4.33 64 13319 2009-09-13 17:43:14 2003-04-07 12:59:11 15 81 4229 124 3488 9497 1254 172.10 20 81.06 CHANGED sALl..ll..DhQ.pshh............tstphpphltshpcLlcsu..+pts.....hs....llhspphhp.................................hhh.ss.ssplhsplps......s-hhlp...Kp......phsuF.......hsosLpphLcp...psl..c...plllsGhtTphCV...tsTshsAhpp.Ga.....plhllsDu.....suuhs......spt.pptulppht......hh.uhlhsspp .........................................................................................................................ALl.llDhQ..pshh...............................hsh..ss...t..sp..p....ll..s.s..l....s..pL...h.p..t..s......+.t.t.s..............hs...............llhspphtt............................................................................................................s....h...h..h......s..s.........s..t....p...........l..h.......s...........p...L.......ss.............schl..lp....................Ks................phuuF.......................................tsTs.Lp.p......h.L.+p........p.ul.....c...............pll..lsGltTchCV.................tpTs..h......s......A......h.......ph...Ga...............................pl.hllpD.A.....su.shs................tpt..pphul.t.tht...........thh.s...................................................... 1 1002 1990 2805 +516 PF02373 JmjC jmjC; JmjC domain, hydroxylase Bateman A, Wood V, Mistry J anon [1] Family The JmjC domain belongs to the Cupin superfamily [3]. JmjC-domain proteins may be protein hydroxylases that catalyse a novel histone modification [4]. This is confirmed to be a hydroxylase: the human JmjC protein named Tyw5p unexpectedly acts in the biosynthesis of a hypermodified nucleoside, hydroxy-wybutosine, in tRNA-Phe by catalysing hydroxylation [5]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.21 0.71 -3.84 32 2778 2012-10-10 13:59:34 2003-04-07 12:59:11 17 174 310 95 1692 3393 72 108.20 25 10.42 CHANGED hlYhG.hhophsaHhEspshhS...lNahchsus+hWaslPsppspphc...phhpcp.......h.tp.th.lpphsshhsPph..LhptslsshchsQcsGEhVhs.stsaHsshNhGashu.uhNF ..........................................................................hhhuh.hot..hs.h.Hh-.stt.hh.u.............ls..a....h............h.........h..........s....us......K.......t...W......a...hl.P.s.p...p..t...p...p..hc........chhpph..........................tp..h....lp.hh...hs...h..h....sph.........lhp.h...s..l......s......h.c.............h..hQps...G-hlhh.st..sh.H.t.......s..hNh.G.h.s.hs.uhNF............................................................. 0 478 771 1239 +517 PF02099 Josephin Josephin Mian N, Bateman A anon IPR002950 Family \N 19.90 19.90 20.20 21.00 19.40 19.60 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.19 0.71 -4.60 10 651 2009-01-15 18:05:59 2003-04-07 12:59:11 12 14 168 8 262 634 1 135.20 47 52.38 CHANGED +Q-us..LCAlHCLNsLLQ............................GsaFocs-LusIApcLDppEcsphsp.....sspsstoaht..csSpNhspsG.FSIpVLppALclWsLpllsa..psschpstph...cP-spsuFIhN..........hscHWFsIR+l.........supWaNLNSlLuA.PcaIucpYLusFLcplcupGaSlFlVps.s ........................+Qctp..LCA.HsLNNlLQ..............................................................................................u.p.hFo..pLspIsppL.s.pph.h..............................th....psp.tN.h...ss........G.aslp..........VI.......sALp.s.hu.hchl.a....s.c..h..phhtl................s...p.huFIhN.......................................hccHWhslRcl.........................................s.tpWaN...LsShLsu..P-..hI.u.-..s.h..L..t.hFL...s.p.l.p..p..p.G.h.plFlV..s........................................... 1 94 133 200 +518 PF02214 BTB_2 K_tetra; BTB/POZ domain Bateman A, Eberhardt R anon Pfam-B_27 (Release 5.2) Domain In voltage-gated K+ channels this domain is responsible for subfamily-specific assembly of alpha-subunits into functional tetrameric channels [1]. In KCTD1 (Swiss:Q719H9) this domain functions as a transcriptional repressor [2]. It also mediates homomultimerisation of KCTD1 and interaction of KCTD1 with the transcription factor AP-2-alpha [2-3]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.34 0.72 -3.78 46 3855 2012-10-02 01:20:04 2003-04-07 12:59:11 17 108 285 59 2405 3406 33 92.00 29 21.51 CHANGED lhlNVGGh+FpTppsTLsph.PcohLuphhc....................hp.assspsEaFFDRsPptFcsILsaYRo...GcL+ts.phslptahcElpaatlsph...lcpC ............................................................lhlNVG.Gt.h.apTp.....h.p.T.L......sph...Pc.o..h.L.uph.hptp.....................................................ht.hc..s.p.ps..c.a.Fh..D..............R..c..st...h............FphILsa...hRs.............G.......cL.....h........h.....s.......t..p.....h.......s.....h.t....t.h........hcEhcaatlsth......................................... 0 702 944 1575 +519 PF02705 K_trans K+_trans; K+ potassium transporter Bashton M, Bateman A anon Pfam-B_677 (release 5.5) Family This is a family of K+ potassium transporters that are conserved across phyla, having both bacterial (KUP) Swiss:P30016 [3], yeast (HAK) Swiss:P50505 [2], and plant (AtKT) Swiss:O22397 [1] sequences as members. 24.80 24.80 24.80 24.90 24.70 24.60 hmmbuild -o /dev/null HMM SEED 534 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.90 0.70 -6.15 84 2676 2012-10-03 01:44:59 2003-04-07 12:59:11 11 6 1808 0 887 2162 250 506.20 40 80.78 CHANGED slsALGlVaGDIGTSPLYshpssh...............thshssstpsllGlLSLIhWoLhlllolKYlhllhRADNcGE.GGIhALhuLlp.................................................ptsptttal.......................hhhlullGuuhhhGDGlITPAISVLSAlEGLp...lhsPslp.......shllslolsILlsLFhlQphGTsplGphFGPlMllWFhsluslGlhs..lhpp.PtlLpAlNPhYAlpFlhp..psh......huahlLGuVhLslT.GuEALYADMGHFG+psIphuWhhhVhPuLlLNYhGQGAhllppP...tsh........pNPFa...thh.Pp....hhhhPhl.lLAThAslIASQAlIoGuFSlspQAlpLshhP+l+lhaTSppptGQIYlPhlNWlLhluslhlllsFpsSssLuuAYGlAVohoMllTTlLhhhhhthha.chshhhshhhsshFhhlDhsFhsANlh.KlhcGGWhPlllusllhslMhoWppG...pphlhpt.....................................htpptl.............slsphhptltp..ps.s...............................+lsGs.....AlFh...oss........pslP.slh+h.lcp.pslaccslhlolhph.shPpls.scRhplp.....pls.......ssha+lhlpaGFh- ............................................................................h.lsAlGVVYGDIGTSPLYsh+psh..................tsthshshspsslhGhlSLIFWsL..hll...solKYlhhlhR..AD.N.p...........G...........E..G..........GIhuLhuLst.................................................................................t.t.t..tptphh..........................hllhullGuuhhhG.....Dul....lTPAI.S..................VhSAlEGLc.......lssP..thp..................shl.ls.lol.l.lLshLFhlQ+aGTshVGph.Fu...P...lMllW...F...lhL...u..s...l...Glhs.....Ihtp....PpVLpA.l....NPh.aAlpFhhp....pst.....huah....s....L....G..uVh......L..........ulT.G...........uEALY..A..DhGHF.....G.....+....hsIp......l............A.Wh......h......lV...h.PsL...l.......L.NY....h....GQG..AhlLpps.....psh............pNP..FF..hhs..Ps..............hh.hhP.hl.llAs..l.A.s..l..IASQAlISGsFS..lspQAl.pLGh.lP+....h.+IhHT...Sc....pp.GQIYIPhlNWhL..hluslhl..llsFc...................s.....Ss.......pLuuAY...........G................lAVTsTMllTol.Lhs..hl...hhth......W..chshh.hs.hhhhlh.F..h.sl.-..h.saF.sAslh.K...lhpGGWlPlhluhlhhhlMhsWcpGpthhh..cp.............................................................................t..c..p..t.h...........sl..pthl.t.hpp....ps.s.....................................................RVsG.s.....uVah.....ops.........ss..lPhshhH..lpp.cslHc+slhlslhs..ssPhV...scR.hp.lc..........plu................tha+lhhpaGap-..................................................................................................................... 0 173 506 707 +520 PF01920 Prefoldin_2 KE2; Prefoldin subunit Enright A, Ouzounis C, Bateman A, Finn RD anon Enright A Family This family includes prefoldin subunits that are not detected by Pfam:PF02996. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.18 0.72 -4.28 56 1643 2012-10-02 17:27:01 2003-04-07 12:59:11 15 22 498 7 1100 1563 97 102.30 18 68.87 CHANGED QphlspapplppplppltpphpphctplpEhcpshcELphls--...ppla+hlGslhlcp.shpcspspLccct-tlptclcplcpphcplppchpchcpplpp.hhtst .................................................t..tphpphppphptlt...pp.h.p..p...lcpph........p.c.t..c.....h......s..hc........EL...p.....l...s..s-................pp.la.chl....G.s.l.........hlcp.shp.cshpplppchctlpp.c....l.........c.plc.pphpphppphpc....hcppl.thht..t.............................. 2 356 593 877 +521 PF01344 Kelch_1 Kelch motif Bateman A anon [1] Repeat The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase [1] for which a structure has been solved [2]. The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415. 20.10 20.00 20.10 20.00 20.00 19.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.11 0.72 -4.41 207 19043 2012-10-05 17:30:42 2003-04-07 12:59:11 20 753 1386 88 10669 22556 644 46.00 27 22.72 CHANGED tRsttussshssplYllGGhss..................tphhsslphacsp.....sspWpthssht ..........................Rtthusssl..s..s..p..l.Y..s.....l..G.Ghss.....................................tphh..ss..V...E.p...YDsp...............sspWp.hssh.............................................. 0 3018 4230 6832 +522 PF00013 KH_1 KH-domain; KH; KH domain Bateman A, Eddy SR, Finn RD anon Published_alignment Domain KH motifs bind RNA in vitro. Autoantibodies to Nova, a KH domain protein, cause paraneoplastic opsoclonus ataxia. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.88 0.72 -4.32 396 19558 2012-10-02 00:34:43 2003-04-07 12:59:11 24 384 4888 106 9071 18086 3141 61.70 24 16.12 CHANGED thplhls.spt..hutlIG+sGp.sIcplpcpo.usp.Ipl.......pp....................p.hlpl.p..G.s..pslppAtp.......hl .....................pl.ls..sph......hu..plI..G+..t......Gp..s.......I+p.....lpp...p...o..G....s....p...Ipl...............................spt........................................pt..h..lpl..s........G..s..pphptAtt........................................................... 0 2547 4378 6873 +523 PF00109 ketoacyl-synt Beta-ketoacyl synthase, N-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Dotter Domain The structure of beta-ketoacyl synthase is similar to that of the thiolase family (Pfam:PF00108) and also chalcone synthase. The active site of beta-ketoacyl synthase is located between the N and C-terminal domains. The N-terminal domain contains most of the structures involved in dimer formation and also the active site cysteine [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.80 0.70 -4.90 167 24208 2012-10-02 12:25:54 2003-04-07 12:59:11 21 1647 5964 191 6576 27004 6262 223.60 26 22.85 CHANGED scslAlsGhusphPsGss.........s-phWchltpGpsuls.........phs......hsth......ssphsGpshs..t...........tFDsthFshss+ps.tMDPppRl.hLpsuhEAlEcAGl.sstph...psst...........sGVhhGssts...........sh.......tt..hhttts.......htshs..hsss......ssushuu+luat...hGhcGPuhsVsTACoSu.hsuh+hAhpslRpGcs-hAlAGGssshhsPtsh.ssasptt..hhusp...s.....s+Aasstu.........-GalhuEGhGslllpcht-A ................................................................................................................................................................lslhGh........uhh.s.h.u.s.............................ptha.p..lhtGt.s.slt.............................hs......................t.t.....h....................t...s...t......h................s.................u...............p............h..........s...........................................................th.......-.......s.............h....................t............h...........s........c................................t..h....D...s.t.....p.........ph...hl.t.s.u...h..c...A..........l.c.c................A...G.l...ss.pph.........................pssp.............sGVh.h.Gsshs..........sh....................................tt.h..hht.ttt..................hp.s.s.....hsst..........................hs.sh..h...u..u..c.l.u.hh..........hs.........l...c...G....P..s.....h...s...l.s.T....A....Co...S..S..hs...ulchA.h........p.....sl..................p...t.........G......c.....s............-........h......s..........l...u..G....G....s.p....h....h....h....s.....s.....h.......s..h.....h.....s.F.......s.....th......t....hh....ots........s..................................s+shcsts..................DG..a.sh...GE...G.sGhllLccLpcA............................................................................ 1 1853 4027 5613 +524 PF02801 Ketoacyl-synt_C ketoacyl-synt_C; Beta-ketoacyl synthase, C-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Dotter Domain The structure of beta-ketoacyl synthase is similar to that of the thiolase family (Pfam:PF00108) and also chalcone synthase. The active site of beta-ketoacyl synthase is located between the N and C-terminal domains. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.34 0.71 -4.26 165 23531 2012-10-02 12:25:54 2003-04-07 12:59:11 17 1627 5851 190 6174 21515 3921 108.60 35 10.85 CHANGED hAhltGsAsspsG.tssuhssPsusuQ..tcs.....lcpALssAsls.Ps-l-hVEAHGTGT.lGDshEspAltssaGptt...........p..lhluSlKSslGHspuAAGssullKslLAlccuhlPPolphc ......................................................................hA.lhGhussp..DG.......h...p...........s........hs.....sPs.utut.......tps..........lcp..A...L....p..p.....A.....u........l....s....s.....p................p.............l.....s....a....l....p..........AHG.T......u.T.....hG...Ds..........hEspA.l.tp.s.a.Gptt...............................h..hl.u.S....s........Ko.h.GHh...u.AAGss....th.l.....tslh..u.l....c....c.s..hlPsolph.............................. 0 1709 3766 5251 +525 PF00225 Kinesin kinesin; Kinesin motor domain Bateman A, Finn RD anon Prosite Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.08 0.70 -5.74 84 9740 2012-10-05 12:31:07 2003-04-07 12:59:11 18 241 436 149 6035 9371 509 290.90 32 34.34 CHANGED RlRPhsspEpppsppthsph.........................................................tpttpshtssptpppsFsFDpVFss..sso........ptplapph.h.sllpssl.cGaNsolFAYG.QTGSGKTaTMtG.................................tp..GllPRslcplFpplpp.t..........................................................................................................taplp.lSalEIYNEplhDLLssp..............................ppp......Lpl+-.csp.t....shVpsLoph.Vpshp-lhpllphGppsRpsusTphNppSSRSHulaplplpppshstt.t.......................u+LsLVDLAGSERsscos.........stsp.h+EussIN+SL.sLGpVIsALsps................................psHlPYR-SKLTclLpDSLGG..su+ThMlsslSPusp......shpETlsTL+aAsRA+pl ......................................................................................................................................................................................................................................................................t....pta...t....F....D.t.l..a.s.......pss..................................Qpp.l.a.p...th........s.p.s.l....l.p...........s...s.h......p.......G.......a..N...............s...............s...l.FA.............Y.G...Q.......T..............G..................o............GK......T...a.T.M...Gs.......................................................................................pp.Gl.....l..s....p....s...........h.p...p.l.Fp...t...l...p.p...t...p...t..t....................................................................................................................................................................................................................................................p.a.plp..s...Sah.E...I.....Y......s.......E.....p.....l....h....D.L.....Lssp.......................................................................tt..........tlp.lc.-...c...s...p.....s..........hhl.t......s...l....p....p.....h..................l...p.............s......h..p..............-...hh..........p......l....h...............p.......h.....G.............p...p.....p...............R.......p...............s.............u........s...T....t...h....N..p....p................S.......S.........R.....S............Hu...l....hpl.p...lpppph..t..t..........................................................hhuclpL...V.DL.A......GS........E......Rh............t..........p.......ots...................................Gp.........p...........h.....................c.........E..............ut..........p.....I.N.p..............S.L...s.L...G....p.V..Ip..u...Lspt..............................................................................pt..a..l..P....Y.R..............sS........K....LT.............p......l...L...............p................-........S..L..............G................G.......s.............u.............+.....T.............h...........hl.ss..l......o........P.......st.........................shpET.l.sTL..c.aAp.Rs+p.............................................................................................................................................................................................................................................. 0 2295 3451 4869 +526 PF00467 KOW L24;Ribosomal_L24; KOW motif Bateman A, Finn RD anon Prosite Family This family has been extended to coincide with ref [1]. The KOW (Kyprides, Ouzounis, Woese) motif is found in a variety of ribosomal proteins and NusG. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.06 0.73 -7.32 0.73 -4.05 235 10200 2012-10-01 20:16:17 2003-04-07 12:59:11 24 51 5026 253 2995 5949 3093 34.10 32 20.34 CHANGED hGchVhlhsGt.pG.ctGplhclpppp............p.Vhlc .......G-pVpVlsG..t.......pG..pp.Gp.Vhcl..cc...............sp.Vhl........................................ 0 1014 1819 2444 +527 PF01352 KRAB KRAB box Bateman A anon Bateman A Family The KRAB domain (or Kruppel-associated box) is present in about a third of zinc finger proteins containing C2H2 fingers. The KRAB domain is found to be involved in protein-protein interactions [2,3]. The KRAB domain is generally encoded by two exons. The regions coded by the two exons are known as KRAB-A and KRAB-B. The A box plays an important role in repression by binding to corepressors, while the B box is thought to enhance this repression brought about by the A box. KRAB-containing proteins are thought to have critical functions in cell proliferation and differentiation, apoptosis and neoplastic transformation [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -8.05 0.72 -4.51 69 10165 2009-01-15 18:05:59 2003-04-07 12:59:11 22 1426 46 1 4775 8709 1 40.40 54 7.64 CHANGED VoFcDVAVsFopEEWphL-suQ+sLY+-VMLENapNLlSlG ..............lTF.cDVAVsF..opE.EWph..L-..su..Q....+sL..Y.R-.VM.L.EN.YpNLlSlG............. 0 382 388 643 +528 PF05178 Kri1 Krr1; KRI1-like family Wood V, Bateman A anon Pfam-B_8372 (release 7.7) Family The yeast member of this family (Kri1p) is found to be required for 40S ribosome biogenesis in the nucleolus [1]. 25.20 25.20 25.40 25.40 24.70 25.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.66 0.72 -3.74 75 304 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 266 0 220 306 7 112.60 31 16.75 CHANGED +cRKccE+cp........+cpElp+LKsLKhpElp-Klp+I+csuG.....tt..........................hsh...s-c-l.....-....s-FDspcaDppMpc.hFs.....-cYYsp...........................tc.cKPpa.--D.-l..t-hh......sstc..............c- ...................................+cRKcpEKpp...........+pcElc+LKsLKhcE.lp-K..LcKl+cssG......ps..............................hsl...s-c-l.........-....sDFDsscaDcpMpc.hFs..--.YYst.............................tctcKPpa..--D.-l..t-h.s.....t.......................................................................................... 0 76 123 184 +529 PF02735 Ku ku; Ku70/Ku80 beta-barrel domain Bateman A anon Bateman A Domain The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the central DNA-binding beta-barrel domain. This domain is found in both the Ku70 Swiss:P12956 and Ku80 Swiss:P13010 proteins that form a DNA binding heterodimer [1]. 25.00 25.00 26.10 26.00 24.30 23.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.93 0.71 -4.91 29 1701 2009-01-15 18:05:59 2003-04-07 12:59:11 11 32 1062 4 808 1704 49 191.60 20 42.31 CHANGED lplG.....tlslsVpl..ashspppthhhhhhhtcc.........p.spscpcplstpssp.............................tlp...ts-hh+ua......hhsschlslsc--lcplp.hss.......tslclluFhshs..lhchhahcsuhalhPccpts....tpsassLh..cshhcppphAls+ash+s...p.plssLhPpt....................................................puhhlhpLsas--lR.shshhthtsstt.......sppplchh ...................................................................................................................phu.....hlslsVph....asss.p.p.p.p.h.......h.h..hhtpp.........................hshhc..hp..ph...s.p.ps.sp................................................................................pVp...tc-.lsKuY......hhs.s.c.....h..Vh......lsc--l..p.p.lt..tss...............................pslclluFls..ts..t...........l......h.h.hh....c..c.s.h..a.l...h.Psp..p...ss..........................................................c.s.as..hLh..cultcps+.l........Als+h..sh+p.......+.p.lssLhPtt............................................................................pslhlhpL.as--lR...sh.s...h.h..t.......................h..................................................................... 0 264 480 665 +530 PF03730 Ku_C Ku70/Ku80 C-terminal arm Bateman A anon Bateman A Family The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the C terminal arm. This alpha helical region embraces the beta-barrel domain Pfam:PF02735 of the opposite subunit [1]. 21.20 21.20 21.60 21.40 20.90 20.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.21 0.72 -3.46 18 467 2009-09-10 20:09:42 2003-04-07 12:59:11 9 16 255 7 249 454 0 93.20 21 14.94 CHANGED lc.saposphtNP.LQpaapsLpslALchpEsh.slDtplschpphpc..+hssh..lsch+phhh.s-h....p.-spsutcpppcs..Eus.....suKKsKh-h ..................................tapssph.NPsLQpaa.p.....sLpshAL.....c.....p.....t...P......ts.....D....slsc..hpphs.c....chssh.........lp.......ch.+ph..h.....pph........p.ctt.htt.cp..t..t...tp...t...........hp........................................................... 0 75 127 191 +531 PF03731 Ku_N Ku70/Ku80 N-terminal alpha/beta domain Bateman A anon Bateman A Domain The Ku heterodimer (composed of Ku70 Swiss:P12956 and Ku80 Swiss:P13010) contributes to genomic integrity through its ability to bind DNA double-strand breaks and facilitate repair by the non-homologous end-joining pathway. This is the amino terminal alpha/beta domain. This domain only makes a small contribution to the dimer interface. The domain comprises a six stranded beta sheet of the Rossman fold [1]. 20.60 20.60 20.70 21.00 20.30 20.40 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.24 0.70 -4.92 17 702 2012-10-10 16:07:06 2003-04-07 12:59:11 10 19 300 4 399 710 4 208.10 18 32.63 CHANGED ullallDsu.sMhps.pspps.......hp.shpshpphhtp+lhsps+.DhlullhauT-pscN.....sssapNlhl.........lp.....clshsshcplpclpphhps............tsstpsshhsuLhsshsl...hpc..spt+hs+++lhlhTs.csPh.......stsphchhhtc.................upDhpppthphs..hhhL......stt...phhap-.hp.utcp.pthhhs.pt........phpchhpplpshpphcRth ........................................................hlhhlDsu.sM....hp..........sttp................h...p.sh.ps..ltph.hppplhss....s+...Dh..lul..lhas.T......c...........p.......o......c.............s.............................sts..........a.p.plh.l.......................................hp.......pls.s.s.hctlpclpph..hps...................................s.....s..p..p..s.s.....hh.ss.L....h.ss.h.p.h.......h.p.p.........t........t...t..p....h.tp++.l.hl.hTstcsPh..........spsphp.t.hhpc............................................st-lpp.s..h.thp..hh.l...................t..p.p.h.hp.....-........p.......s...t...........t.................................................................................................................................................................................................. 1 128 216 324 +532 PF00014 Kunitz_BPTI Kunitz/Bovine pancreatic trypsin inhibitor domain Fenech M anon Prosite Domain Indicative of a protease inhibitor, usually a serine protease inhibitor. Structure is a disulfide rich alpha+beta fold. BPTI (bovine pancreatic trypsin inhibitor) is an extensively studied model structure. Certain family members are similar to the tick anticoagulant peptide (TAP, Swiss:P17726). This is a highly selective inhibitor of factor Xa in the blood coagulation pathways [1]. TAP molecules are highly dipolar [2], and are arranged to form a twisted two- stranded antiparallel beta-sheet followed by an alpha helix [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.26 0.72 -3.76 147 4915 2012-10-02 12:37:03 2003-04-07 12:59:11 18 353 273 213 2635 4772 22 53.40 35 18.48 CHANGED hCphsh.ssG..s.Cp...u......th....+aaas....spsppCp.pFhYuGCtGN..t.....NsFpohc-CpphCt ...................................C.hsh...p....tG.....s...Cp.....s..................hh........Ra..a...as............s.ps...ppCp...pF..h.Y...u.................G.C...t....G...N...t.......NsFtopcp....CpptC................ 1 878 1112 2046 +533 PF03521 Kv2channel Kv21channel; Kv2 voltage-gated K+ channel Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 42.50 29.90 18.40 19.70 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.92 0.70 -4.93 4 108 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 39 0 63 94 0 204.20 45 32.14 CHANGED KosEous+KDKspDNHLSPSRWKWs+RsLSETSSNKSF-sK.QtsspK.......spSSSSPQHLssQpLE-lYNchsKTQSpP.lNopp.sQsu+P...tEElEMpplssPps.LsssppEullDMRShSSIDSFhSCATDFsEopR.shoPhuu..hphphss..ss.cta.tupst.hLs.pttpusAs+-uhpEhtststshp.-sts...h..s..pus.hlESP+oSlKssNPL+.RSLKVNFh-ucsso..ssssshps.Plp.tsashtusttlsThhL.-p .................Kss-s.spK-psp.DNHLSPs+WKW.s.++.shSETSSsKSa-sK.QtsspK.................sp.oSS.....S.......P.......QHLssQpLE.hYNchsKT.Qsps...Nscp.pt.t.t+P...cEElEM-plssP.t.Ls..s.psEsl.lDM+Sh..SSIDSFhSCATDFsEspR.shsP.su.................................s.pttt..ssp..sh.th.s..s.sh..ps..ss............s.h.-SP+SShKssNPLK.RuLKVNFh-s.c.ss...ss...hh.....s.....hs....s........................................................................................................................... 0 2 8 24 +535 PF02828 L27 L27 domain SMART anon Alignment kindly provided by SMART Family The L27 domain is found in receptor targeting proteins Lin-2 and Lin-7. 20.20 20.20 20.40 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.41 0.72 -4.28 20 1179 2009-01-15 18:05:59 2003-04-07 12:59:11 11 37 100 20 527 978 0 53.10 24 14.32 CHANGED slppsh-lL-cLps.ssss.....pchppLpplLpstahpullcla-pltppshssssss ........................hphsh-lLccLps..spst.....p-hptLpplLppsahp..u......L......l.csH-pltppphtss............ 0 84 132 273 +536 PF02448 L71 L71 family Bateman A anon Pfam-B_1976 (release 5.4) Family This family of insect proteins are each about 100 amino acids long and have 6 conserved cysteine residues. They all have a predicted signal peptide and are probably excreted. The function of the proteins is unknown [1]. 25.00 25.00 32.90 29.30 18.80 18.20 hmmbuild --amino -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.18 0.72 -4.14 10 105 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 12 0 35 105 0 71.40 36 65.23 CHANGED p.CccltcpCpcshpRLssssD.slchFNcpCRccsc..hpWRsVoRCELp+lsClh...cspshsCcNlA........chssh ...CccltcpCppshpR...LssssD...slshhNppCRccst..hpWRsloRCELpphsChh...cscphsCpslAchh............ 0 10 10 23 +537 PF00753 Lactamase_B lactamase_B; Metallo-beta-lactamase superfamily Ponting CP, Bateman A anon [1] Domain \N 22.70 21.90 22.70 21.90 22.60 21.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.20 0.71 -4.65 305 34807 2012-10-02 15:46:01 2003-04-07 12:59:11 22 191 5226 350 10222 34846 12115 193.00 14 50.57 CHANGED hsthsssshllt........ss.spsl..llDsGhststtthh..............................hthpstplctll..lTHtHhDHhGuhttltpthshshhhttttttthhtthh.............................................................hhhttttththtththhhhts.stsssshhhhh.....ttppllhsGDhhh......stsththth...........................................................h.....................h.....hh..uH ...................................................................................h....ttsshllt...........ts..pph.l.....l..lDsGhshsthpth...........................................................hthp.s..t.p..l...p....t....l....l.......lT.........H.t.H...........h.........DH.....h.........G......u.........l........s......t......l.....h.......p........p......h......s.....h......s......h....h...h...s...t....t.t...t..t.h..h..ht...t.h...........................................................................................................................h..ht.p.t.t..t.h..t.h..t..t...h....t..h..h....h....h...s.....t...s.........s..........s.......s........s...t..........h..h.hhh..............................stt....p.....h....l..h..s.GD..hhh..............ttshhhh................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 3453 6690 8675 +538 PF02652 Lactate_perm L-lactate permease Mian N, Bateman A anon COG1620 Family L-lactate permease is an integral membrane protein probably involved in L-lactate transport [1]. 19.70 19.70 19.70 19.80 19.60 19.20 hmmbuild -o /dev/null HMM SEED 522 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.72 0.70 -6.01 8 3338 2012-10-02 15:12:49 2003-04-07 12:59:11 9 5 2259 0 540 2191 157 490.80 36 95.93 CHANGED LSALlALsPIlLFhhuLslhKhKGYhAuhlolAlolhIAlhha+MPlphshuSslhGhlhGLWPIuaIIluAlFLYKlSVKoGtF-II+pSlhuIosD+RlQllLIGFsFGuFLEGAAGFGsPlAIoAAlLVGLGFpPLaAAuLCLIsNTAPVAFGAVGIPIhusuusssls......................shEISphlGhhLshhollIPFalVhlhsG.a+GIK-saPAllluGhSFAlsQaLoSshLGPELPsIluuLlSLsshslFLKh.WpPKpla+.ssptpotsspst..........+hscllcAWoPFlLLossIllWs.PhFKAlap...psuhlhhssh.h.h..lsp....hlhphsPlss..puhshssVaKhcLlhssGTuIllAsllS.hhlh+lsspDshslFspTLKEhtlPIloIshVluFAhlsNaSG........MSsoLAluLAc.TGpsFTFFSPhlGWlGVFlTGSDTSSNlLFGuLQhhsApplGlsssLlLAANTsGGssGKMISPQSIAIACAAVGLsGKES-LF+hTltaSLIaslIsullshll ............................................................................hSuLlAh.lPI..lhhhlsLs...........h........h........+...........hK...........u...h...h.A.u.h.h.o.ls..lsl.l..l..Alh....h..........a........c.....M...........P.......s.........s.....h...u.......h...u.us.s.Ghh....hG........la...PIuaIllsAlalY+lolcoGpF-lI+so.l.h.u.I..o..s..D.p.R.lQhLlIuF.sFGu..FLEGAAGFG..sPlAIsAslL.luLG.FpPLhA.......Ahl.sLIuNo.APVAFGAlGlPl..hss...u..p...s....s...u...ls.................................s.h.p..luth...s...u..h.p...L...s....hhsl.l.l.PF..h...l.l....h....l....h.......s..........G.........a............+G...........l............+.......E............s.......a.......P.sh...l.....luGh...oFuls.Qal...su..s..a....l...G...PEL...ssIl...uu..L.lol..h...sl.s.l..a...hKh....a...p....P...c....p....h....h..c...h......t.....s...t.....t....t...t..t...s...p.hsh.....................................................shtpl..l...pA....WsPal.lLss..h..l.h.l.hs.h.......h...K..s..lhs........................s.hl..........h..h...t...........lpt.....h.hphs.sl.su.................h..s.....h.shphshl.u.....ssGTsIl.l.u.s...l...lo....hh.........h.........h.........+...........h.........p.........hp..p........s........hp.s........a..spT............l+pl.thsllo....IshllAhA..hlhs..a.SG............................h.ossl.u.h.u.lAp..s.G..s.hFshh.S.....PhL.GWlGs.FlTGSsTuSNsLFusLQt.s.sA.p..p....l......G......l............u.............s........s........L........ll.AA.N.osGGs.sGKMISPQSIAlA.sA.A.l.....uh.s...G+..E.....u.....c.....lh+h..T..lp..a..S...l...h...ah.hllullshl............................................................ 1 182 344 454 +539 PF03798 TRAM_LAG1_CLN8 LAG1; TLC domain Bateman A, Finn RD anon Pfam-B_1398 (release 7.0) Domain \N 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.84 0.71 -11.57 0.71 -4.76 168 2246 2009-09-11 23:05:38 2003-04-07 12:59:11 11 31 362 0 1453 2051 32 191.70 17 58.71 CHANGED casppshshlahhhsshhuh....hlhhpps...........................thht.h..........phhh....s....thh.hhhhphuYalhs..lhhhhh.pttpt...........................................................................h.hlhHHl.hslhlhshuhh..................................hshtph..shslhllh-hoshhLphs.......phhphh.th....................pt.............h.hhh...hhahls.FhhsRlh.....hhshh.....hhhshhthh...h.................................................................hhhhhhhhshhhs.L...pllplaWhhhllc ....................................................................................patpphhphhahhh.shhhuh.....hlh.hpps...............................................hh..h.............st..h...t...........h....p.hhhhhphuaa..htp....lhhh.h.h...chtcp..............-h...................................................................................................h.hlhHHl..hslhh.ls.huhh.....................................hsh.hph...s.sh.l.hh.l.t...-.ho.....shhLp.hs.......c.hhphh.th................................pt...............t.hhh......hl.hhhs.ahh.sRlh...........hhshh..........hhhs.h.h..h............h................................................................h.h.h...hh.h.hshh.hh...L.....hlplhWhhhlh.......................................................................... 0 465 733 1095 +540 PF03161 LAGLIDADG_2 LAGLIDADG DNA endonuclease family Mifsud W anon Pfam-B_3225 (release 6.5) Family This is a family of site-specific DNA endonucleases encoded by DNA mobile elements. Similar to Pfam:PF00961, the members of this family are also LAGLIDADG endonucleases. 25.00 25.00 25.30 25.10 24.40 24.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.90 0.71 -4.36 18 250 2012-10-03 01:41:40 2003-04-07 12:59:11 8 26 160 6 57 270 38 167.00 22 52.22 CHANGED .hpllhGhLLGDualcpcs..pspp.......hphphphpp......tah.+hhhhhp..tasssss..hppphs..............ppGchhhshphpThsh.ssFshltphFYh...sspK....hhlPshlpp..aLTPpuLAaWhMDDG......p........sspulhlsTpuFohc-lphLhcsLpp+asLcsol+psp.......spahIal.spohthah ..............................................................hpllhG.LLGDup..lptps....ps..pp.......................hthp.phpp.........htah...a.h.h.hhp....thsp.p.....ht.ptht.....................s.p.htshpF..pThsh.spFshh.tchFY..........sstK...................hlP..p..p..l..tp..hLT.PhuLAhWhMDDG.........................ttsts.....lhlsTpsFo.hp-.h.hLhp...hLpppas....l.p.spl...pppt............tthhlhl.tpsh..h................................. 0 23 45 52 +541 PF04916 Phospholip_B Laminin_A; Phospholipase B Finn RD, Mistry J, Wood V anon Pfam-B_5721 (release 7.6) Family Phospholipase B (PLB) catalyses the hydrolytic cleavage of both acylester bonds of glycerophospholipids. This family of PLB enzymes has been identified in mammals, flies and nematodes but not in yeast [1]. In Drosophila this protein was named LAMA for laminin ancestor since it is expressed in the neuronal and glial precursors that surround the lamina [2]. 21.30 21.30 25.10 23.50 18.50 20.60 hmmbuild -o /dev/null HMM SEED 553 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.76 0.70 -6.39 6 390 2012-10-03 21:14:07 2003-04-07 12:59:11 8 9 163 6 279 416 25 411.50 27 85.60 CHANGED p.hph..ullhhl.http..pshsss.....ppspsslshst.shp.................sth.stst......lAhupapssVNpTGWuhLEl-stp...shssplQuYuAGhLEGhLTtthlhhHhp.NohpshCcN...tspaCscLt-ahspN.+Whcppl.....pps.sD.aWpQlshslsQLsGLhsGYppRs.pscIshc....hasIhhhNhsGDlhDLt.........pphs+TcsPs........Fchs..G+CSA..LlKllPssc.......laFuHsThSSausMLRlhKpYcFs.......c.phsPGphloFSSYPGlLtSoDDFh.lpoutLsllETTlu..saNtpLh+phsP.spVhsWlRuhlANhlApsuppWsphFSRaN.SGTYNNQWhVLDh.Kp.hcspcpL..........sc..ssll...................allEQhPG.....hlsppDhTt..hLp+.oYWsSYNlPaaKplhclSGh....phscchG.aasastsPRA+IFcRDcusVTDlsShptLMRYNsYpc...-.hu+Cc..............CsP.PYoAchuIusRuDLNssuG............ThEh..u..GhssHuu.......lDsKs......sshcLh.......pphphhAh....uGP...shcslPsFcWsc.sshc-tssHhGpPDhWNFshVshK...........Wph .................................................................................................................................h.................................................................................................h....t.h......GWs.lplts....................hs...s....hh..suGhhEu.....ho....h...h...N..h.h.....t.....h.t...........................th...t.l.pahttp.....tahppph......................t.....ps...aWtphthh.h...Q.hpGl.tuhtt......................t............................h.....hhp..uDl..D...l............................................h.t.....t........................h......sp..CSu..ll...+lh.ss.p..............plhhuHs.oWtsY.ts.hhRlh.Kpa.p..h.th.........................h...upphsaSoYP.Ghl.ShDDFY...l....h...s........s.........sLhhh.pTT..s...shN.t.L..h..p....l....p..s....t.slhtahRsh.............hAshhAps...u.tpWscha.t.phN..S..GTYNNQWh......llDhptht.......t...t.......................................thl..........................................................................................hlhEQhPs.......hh...tD.Tt.....L....p..sYasSaNhP.a.a.pl.hp..huGh.....................t.ph.u................h...pap....ssRu.IFtRsts..tl.......ts.hpshhthhRhNs.a.p...D.hu....tp.................s........IssR.....DL..ts....t.........................................................s..su...........hDsKh........ss.hthh................................tt.ht.hhsh....sGP....s...t...t.....ssFpW.....t..h.......+.G.Pp.apF..h......h................................................................................ 0 154 189 239 +542 PF04031 Las1 Las1-like Wood V, Finn RD anon Pfam-B_10636 (release 7.3); Family Las1 is an essential nuclear protein involved in cell morphogenesis and cell surface growth [1]. 22.70 22.70 25.60 25.40 19.40 20.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.97 0.71 -4.56 26 316 2009-09-11 09:56:21 2003-04-07 12:59:11 8 8 274 0 222 321 1 147.90 32 29.95 CHANGED slsW..tshpEhpplhphla..............sp........................................................s.sppccAlp+lp.sWpsRu.....plPtsl-uTuhLlpshLtD..............................................ssthsstslc...htYuhAllR.....FVNsllDstQp..uphshshpplApplGlPphhV-LRHtsTHcp.LPsLshLRhusppsLpWLacpYWppp ..................................ssWhshsEh.pVpphha..............sp.......................................................................................s.ptpppAlpplp..sWctRs........plP..hsV-uTA.LlpshLtD...................................................sst.hsp....tlp...hhYuhAl.sR.........FVNtls-t..hpt.....p...t...ht...h.shtplApp..lGl..Ps..hhV-LRHpsTHcp.LPuLshLRpusphsLpWLhcp...YWpp............................................................. 0 80 125 183 +543 PF00057 Ldl_recept_a ldl_recept_a; Low-density lipoprotein receptor domain class A Sonnhammer ELL anon Swissprot_feature_table Repeat \N 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.59 0.72 -4.05 53 19728 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1237 198 54 11586 17417 67 38.20 41 15.75 CHANGED ssCp.....sscFpCssup.....CIst.pahC...DGpsDCp.....DGSDE....psC .............................h.tCt.......ssp.F.p..C......s...s.up.............C.....I...st...p..hhC....D..G..p..s.DCs........D..s..S..DE......tsC............... 0 3071 3990 7792 +544 PF02987 LEA_4 LEA; Late embryogenesis abundant protein Griffiths-Jones SR anon Pfam-B_106 (release 6.4) Family Different types of LEA proteins are expressed at different stages of late embryogenesis in higher plant seed embryos and under conditions of dehydration stress. The function of these proteins is unknown. 36.00 10.00 36.00 10.00 35.90 9.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.72 0.72 -4.09 26 1180 2010-01-13 15:33:07 2003-04-07 12:59:11 11 27 134 0 431 1127 2 40.60 29 59.19 CHANGED cDhsu-KAtEAKDsThcKsGEhKDass-KAtEuKDpss-KstEh ........................stpKAt-..s..t.......-ss...t...c....Ku.......u-.......s.......pDts....t-K....As-..sK-...tst...pth............................ 0 117 228 375 +546 PF04004 Leo1 Leo1-like protein Wood V, Bateman A anon Pfam-B_11226 (release 7.3) Family Members of this family are part of the Paf1/RNA polymerase II complex [1,2]. The Paf1 complex probably functions during the elongation phase of transcription [1]. The Leo1 subunit of the yeast Paf1-complex binds RNA and contributes to complex recruitment. The subunit acts by co-ordinating co-transcriptional chromain modifications and helping recruitment of mRNA 3prime-end processing factors [3]. 20.10 20.10 20.70 23.30 18.60 18.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.34 0.71 -4.55 86 331 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 272 0 230 324 0 165.30 31 32.43 CHANGED hahh+lPsF......ls.l-s..............cPFcPcoa.ts.p....................pt......................hpphh.php...NolRWRhsp..s........psGp...........tpSNA+lV+WSDGShoLplGs...Eh.aDl.......................................tppsh.tp.t........saLhsppst.........................................sllp..sputlspphshpPsoh....sStsH+..............phstulsp+ptpcstt....hhhtsspDPEhc+cctp+ .........................................ahl+lPNF......Lu.l-s..............cPFDPpsa.ts.t.p..............p..hscp.......................................................................ttp+hhhcsp...NolRWRhsp....s..........tpGp.h.........pcSNARlV+WSDGS..hoLplGs...E.h.aDl..................................................hptsh..p.sp............saLhlpps..........................................sh.Lp....s..p..uhhppplshp.Pp.os........sStsH+.......pho.hulssRstKppt.....thhh.sspDPEhp+pph..................................... 2 73 122 188 +547 PF00060 Lig_chan lig_chan; Ligand-gated ion channel Bateman A, Sonnhammer ELL anon Blastp NMZ1_HUMAN Family This family includes the four transmembrane regions of the ionotropic glutamate receptors and NMDA receptors. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null --hand HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.03 0.71 -4.49 44 3260 2012-10-03 11:11:44 2003-04-07 12:59:11 21 78 277 465 1868 3242 57 253.80 21 33.83 CHANGED oh-lWlslhhsalhluhslallt+.hoP.h-apt.........................................................................spaolhsuhWFshushht.pGsc...pP+uhouRllsslWahFsLllluSYTANLAAFLTlpchpss.IpuhcDLtpps.plpassht.uoohthappop.........hpphaphhpstps..........................ph.spssp-Glpclcpu..th.hAalhEsshl-ahsscp.......CchhplGp..shsspGaGIAhstsSPhpsplohAILpLpEsGclpplcsKWappp......pCsspsst..........sssspLslpshuGlFllLshGh ................................................................................................................................t..plW...h.h..l.hhs.h...l.h...l......u.h...l...la.l...l...t..........+....hss.hpapt.................................................................................................................tsph....s.l...h....s..u..h..W....a....s..h.u...s....h.h...............p.....p.....u.sp........hs..P.....+..u.....h.o..s.R.lls..............s.............l.Whh.F.s.......l.I...l..........luSY.TANLAA.a.LT...l...p....c.h...t...s.s...I..p..u..h.cDLtpps.hthhshtshsph.hhpppp.thhphhh.hh.ttt.t.hhtttttsh.thh...hhhh....................................................................................................................pp.pp.hh.hsth..tthhhhh...ts.ht.h.hhhhh..ptt.h.phhpchhhtts..tC.tptt...........tsstLslps.htGl..Fhllhhu............................................................................................................................................................................................................................................................................................ 0 597 867 1417 +548 PF02900 LigB Catalytic LigB subunit of aromatic ring-opening dioxygenase Griffiths-Jones SR anon Structural domain Domain \N 19.80 19.80 19.90 19.80 19.60 19.70 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.68 0.70 -5.41 47 3175 2012-10-01 19:17:44 2003-04-07 12:59:11 13 7 1832 5 918 2499 472 260.20 23 90.78 CHANGED sshhhSHssshhhh.s.....sttph..t...ph......................ht.htppltp..h.............cP-slllhuscahs....phhss.hlssss................psltD........ah.......h.phphshsGss-LApplschlhtssls............spphslDHGshsPLphhhs..........shPll.lslss.....hhsspcphplGcultphht.....scpVlllGSGuloHplts.phs.............................................hstcaDchhlchlps....s...chptlhshpppt.ttttups.stchhshhhshGAhs...............phhsphhtatslsths..sshs ..............................................................................................shhluHu.s.sh..shps.........st.pt...........th..........................................h.th.t.pplst...................cPcsIll..h..........osH...Whs........phh.s..ls.s...ss.p...................................ps.laD.........as......th..P.t..h...hch..p..Ys.hsGsPpLAp..pls.ch...l.tt..s...s.lshth..............................p.shsl.D..HGs...h..sP.....Lhhh.hs...........................chc.lPl.l...p..lu......lss...........hhsstpphc...lGcslttht...........-csVhlluSG.s.ls.H..s.l...t....s......p.hstts.............................................................hs.tpFD.phlhphlpp....................s......ch.c...t.L...s...sh...h.p.......p..t..s..s.t...h..u...p...s...s...cchhshlhshGAhss.....................hphhs..chh.htshs.h.h.h...................................................................... 0 265 556 770 +549 PF03893 Lipase3_N Lipase 3 N-terminal region Birney E anon Birney E Domain N terminal region to Pfam:PF01764, found on a subset of Lipase 3 containing proteins. 21.10 21.10 21.10 21.50 20.90 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.44 0.72 -4.16 10 181 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 56 33 114 168 0 71.60 22 15.93 CHANGED hlsslhslusupWuhph..............pcssaots-shspWspusssp...apshspss+shtsVasss....L+sPcltsshsshth ...............................h.hpth.shusApWs.t................plpaohtcshsp..WshussAth..a.s.hsphsphshslhpss....hpsPthts..t....t.......................... 0 14 65 97 +550 PF01764 Lipase_3 Lipase (class 3) Bashton M, Bateman A anon Pfam-B_893 (release 4.2) Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.84 0.71 -4.57 61 3914 2012-10-03 11:45:05 2003-04-07 12:59:11 20 76 1209 69 2118 4488 506 139.30 18 28.90 CHANGED lluaRGTp..shtpahssh.phshsshp.h..............................ssplcpGFhcsap...................................thhpplhsplpc.Llpphs......s..hplhlTGHSLGGulAsLsAh.........lhppt............tplplhoaGsPRl...Gshsauphhs.................t...........hhRllpppDlVP+lPshhh ...............................................................................................................lsh+Go............s..ph....h.....s.h......h............h.........................................................................................tshsc.t.Gh.h..p..h.hp......................................................................................................h...h...p...p..l...h..p..t...l...pp.....hh.....p.phs.....................p..hplh..l..o..G.......HSL.....GGul...As..lhuh........................................h.h.t.ph...........................................hp.s.hs..F..u.s..P.t.l........s..s.....t.h.t..p.hh.p......................................................................hh.p.h.s...t........t..D.lshh....hh............................................................................................................. 0 783 1383 1855 +551 PF04571 Lipin_N lipin_N; lipin, N-terminal conserved region Waterfield DI, Finn RD anon Pfam-B_4929 (release 7.5) Family Mutations in the lipin gene lead to fatty liver dystrophy in mice. The protein has been shown to be phosphorylated by the TOR Ser/Thr protein kinases in response to insulin stimulation. The conserved region is found at the N-terminus of the member proteins [1,2]. 25.00 25.00 25.90 25.20 23.70 24.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.55 0.72 -4.41 10 549 2009-09-11 22:04:36 2003-04-07 12:59:11 9 10 268 0 306 483 2 107.30 51 13.02 CHANGED MsYV...uuplasoVpclYsuINPATLSGAIDVIVVEQpDGoapCSPFHVRFGKh.GVL+ss-KhV-IplNGp.sDlpMKLuDoGEAFFVcEs.--p.pclPshLsTSPlsssssu ....................MsYV....tplhsoVpc.h.ap...ulNsAT..L.SGsIDVIVVc.......Q.............D..G......o.......hpCSPFHVRFG..Kh..GVLRucE..K...l......V..-IclNG.....ps..l..D.l...pMKLG.-sGEAFFVpE.s....-.s.p........p....lPt.LtTSPl.s...s..................................................... 0 91 150 231 +552 PF03180 Lipoprotein_9 NLPA lipoprotein Mifsud W anon Pfam-B_1418 (release 6.5) Family This family of bacterial lipoproteins contains several antigenic members, that may be involved in bacterial virulence. Their precise function is unknown. However they are probably distantly related to Pfam:PF00497 which are solute binding proteins. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.38 0.70 -5.46 35 6507 2012-10-03 15:33:52 2003-04-07 12:59:11 9 8 3244 23 955 4106 60 229.70 38 86.36 CHANGED lplGshssscu-lhc..hlpphhKccs.lclclhpFoDYspPNpALscG-lDANhFQHhPYLcphs+s..pttpLVslussalpPlulYS........+KhKsls-L...cGusIAlPNDsoNpuRALhLLppsGLI+LKss.tshhAThpDIs-N.PKsLcl+..El-AutLPRsL-s..VDhAlINssYA.lpAsLsPpcDulhhE.s+.s..uPYsNllVsRp...ssccsstlKcllcAhp.oc-V+phlpcp...a.sGuslPua ............................lplGss..s.s..s..c..s..p..lhc.....hsp..p..h.h.c.c...c..G....lclclhpF.s.D.YshPNpALscG-lDANhF..Q.H...h...P.aL..-p..scc.....+.....Ghc......L..s.s..l..u.s..s.al......P...hu.l.Y...S..................+K...h..K..slc-l......c..Gu..p......l..AlPN.D.soNtuRAL..h.L..Lp.psGLI+L....K....ss....s....s......h....h......u..T....s..t...DI...s-....N....PK....pLKhh...El-.AuQlsR....sLsD....VDhA..l.I..N.s.saA...hp...u...G.L.s..P..t..c............Du.lhh.E.......sp...s..........s..P..Y....s.Nl.l.....ssRp.......sscc.s.pt....lKc.llcs.ap.Sc-Vpchlpcp...a.pGuslss................................................................................................................... 1 211 490 731 +553 PF02190 LON ATP-dependent protease La (LON) domain SMART anon Alignment kindly provided by SMART Family This domain has been shown to be part of the PUA superfamily [1]. 21.90 21.90 21.90 22.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.53 0.71 -4.65 158 5691 2012-10-02 17:37:24 2003-04-07 12:59:11 11 68 3513 13 2086 4772 4482 199.10 23 30.32 CHANGED plPllPLtshl.l.aPshhhslp.lt..csp............hp......hlcpsh.......................pp.st...h...lhlh.....................................................ptp........s.......t.ssl..................................................aplGsl.....upIhp.h.....................phs..c..Gp..................................................hplllp..Ghp.Rh+l....pph................................................................................ppsa....h...............huclp.h..........................................ptptpptt.......ptltph...hpp..................................hhp.h................................................phhpth.s.phh.t.hpshppssp.Lsshl...ush.ls....hshpc+Qp.lLcthsspcRlptlhphLpp .......................................................................................lPllPL...cshl...l...aPthh...lPLh..ls....ct+s..........................lp.h.lcpsh...............................................pp..s..pt......lhls...............................................................spp.ps...t.p..ts.........s.h.s..cl...............................................................................................................................a.plGsl...upIhp.hh................cls......D...Gs..........................................................................lplllp...Ghp.R..h+l.........tph........................................................................................................................................................................t.pcsa..h.................................................tAcl..p.hl............................................................................tttppt.t..........ptl.tth...h.t.p....................................................................................................................................................hhpth.............................................................................ph..p.ph.....s........c....h....h.t..s....h.p....s..h..s.c..ss....p..L...u...Dhl.......Aut..hs.........lph....p.c.+...Qp.lL....Eh.sl.....pcRLchlhthh..t.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 700 1277 1725 +554 PF00560 LRR_1 LRR; Leucine Rich Repeat Bateman A anon Reference 1 Repeat CAUTION: This Pfam may not find all Leucine Rich Repeats in a protein. Leucine Rich Repeats are short sequence motifs present in a number of proteins with diverse functions and cellular locations. These repeats are usually involved in protein-protein interactions. Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains. 20.60 9.30 20.60 9.30 20.50 9.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.84 0.76 -7.83 0.76 -3.07 2414 25597 2012-10-02 21:32:02 2003-04-07 12:59:11 28 4912 1389 82 13545 96601 2712 23.30 33 5.68 CHANGED pLppLslss..N..plp.p................lst...htph .....................pLphLcLSs.N..pLs.p................................lPs.p..ht...................................................... 0 2975 8467 11049 +555 PF01463 LRRCT Leucine rich repeat C-terminal domain Bateman A anon SMART Family Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the C-terminus of tandem leucine rich repeats. 20.00 4.20 20.00 4.20 19.90 4.10 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.26 0.74 -7.54 0.74 -3.10 59 1112 2009-09-10 23:15:08 2003-04-07 12:59:11 19 248 79 18 530 955 2 25.80 34 6.10 CHANGED shhCssPsthps..lhphs......phsCs ....sRCsuPtpL+shpltplp.....pcFpCp..................... 0 82 113 266 +556 PF04180 LTV Low temperature viability protein Wood V, Finn RD, Mistry J anon Pfam-B_15065 (release 7.3); Family The low-temperature viability protein LTV1 is involved in ribosome biogenesis 40S subunit production [1]. 25.00 25.00 26.50 26.60 24.90 24.60 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.55 0.70 -4.97 4 274 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 231 0 202 269 2 368.80 26 86.00 CHANGED sKKpAppFpLVHRsQcDP.haDEsAsp+VLlPspp.N..Kou...shpspDtp.s...shRspcGEAApYGlaFDDoEYDYhQHL+slGpssusulhlsupshspp.cpK......ctpc.h.hpspspcL..p-shhsphphph....shtsQQs......sPDtIuGFpPDMDPcL.......REVLEALEcpuhshND-E.s.--...........DucEhc-hDhp.........tthDEh-D.ut..s.p..cs.cc..a..s-E.sch..h-hspsupshshps-hpht.cca...chhpK+psDst.usus.u-hpsS.ppDs......l.-h.po..pKu+s+pKtuAhoshSshSMSSSALsRoEsholLDspa-clcEc.........Ysshh--hp.lc....................Qs....sVhsc....tppF-sMhD-FLssatsp......sRphucp+hcpp+hKpul-ElcchR+s.t+ARh .............................................................................pKKpAhpapLVHRuQpDPLht.DpsAsp+VLh......s..s.....................................................................t.................p...t.p.....................t...t.pcuEttpYGlaa.....DD..c.YDYhQHL+...........t....p.sss....s.hlt.sts.....ttp....ctc...............................ph.c..............t......t..........p......l.............ss...s......h.h.Sp.hppth...........shts...s.........sPD.lAuhps.D..h....D...c..............c..lLpA..cspsh....t.t....-..p.t.....pp..........................csp.Ehp..-...-.....t.............................t........t....p.-..p.p.t..........p.t................tt.....t..p.....a............c.-...s..p.........t........hp..........s.s..ss.s..h...p.h....s.tp.hh..h..pcF................c+.....c......p.h...s.............t...h.........u..sh.s.....s.....c....h.p.s.............p.........t...........s...s..................h.......ph..t...............pttc.....pp...h....+....h......ssh.ps............p.....s..........h.......s.hs..sst.h..cs...Et...ph.....l.ssph-..php.pt.........................Y.sp............p...c..p..p.....t.................................ps..s.sh.st.......tp...p...hpp.h.......pt.....ss.....p......................R.hsc..p.p..t.cp.tt.h.ppuhcp...pc.+t..t.sp............................................................................................................................................................................. 0 68 104 163 +557 PF02123 RdRP_4 Viral RNA-directed RNA-polymerase Mian N, Bateman A anon IPR001795 & Pfam-B_6212 (release 8.0) & Pfam-B_9867 (release 8.0) Family This family includes RNA-dependent RNA polymerase proteins (RdRPs) from Luteovirus, Totivirus and Rotavirus. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.79 0.70 -5.78 28 1146 2012-10-02 12:54:00 2003-04-07 12:59:11 11 8 405 10 1 4591 0 427.20 31 60.01 CHANGED sphhstspphhtphhshpttstsst+sslp.tplhtsthpphsps.........sth..shhphhh..............................................................ps.h.shh..............hh.......................................h.......ht...............................................tss.p.sshhsphpsl.G+hssshc.......hptEhpp+sssslsh.thsptspctl.......hshltplhppcl.................................p.h.tp.....ttt..............pthhpsc..hh..tsschs++sls-s......lchh.sptcscst........h..tpKl.Ep................................G.+sRhIh....usshlsalshchlhtstcc.........th.supshlshssttphhshtpcl.............hpttt.hhhhDhosFs.SpHshcshptlh....p.hpppls.shshttt..........h.h..hhpshtshhlhlsss..htp..........chhGslhSGp+sTohhNSllNhshhphshuthshsh.........ps.GDDslhuhpss..............hhcphpphsh+spshKpshu..tuE........FhRht..............hpptshtu..a....................lh..RssushloGsW ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...h.h.s....ph...............php.c.s.shs..th.t..tp............h..l.thh.p.l..............................................................................p.h.psp........h........s.phscphLt-s..................lph...sttpsc.p...................tK...c..............................................................................................G..+hRlIh......ph.lsp...phl....h.chhh...tpc.............h...t.p...hhs.........s..s.......t..p..h..........t.l......................h..hDhotas.S...p....sh....cch.....................lps.hh...s........s..ps..psh.........................p.Lh.t.t.h..hs....l....h.......sS...h.l....p..lsD.Gs..lltp....................................h....GshtSGphpTpusNSh.h.....p.....l...h...h....h...h.........t.....s.....h..........u.....p......h.....u...th........................................GDDshtshp...s..................hthYtp.h.s.hK..scs.............................................................t...thh....a.........................p..................................................................................................... 0 1 1 1 +558 PF01476 LysM PG_binding_2; LysM domain Bateman A anon Bateman A Domain The LysM (lysin motif) domain is about 40 residues long. It is found in a variety of enzymes involved in bacterial cell wall degradation [1]. This domain may have a general peptidoglycan binding function. The structure of this domain is known [2]. 20.90 11.80 20.90 11.80 20.80 11.70 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.96 0.72 -4.06 156 27194 2012-10-01 23:00:54 2003-04-07 12:59:11 15 602 4537 7 7042 21185 3954 44.30 31 16.15 CHANGED apV+pGD..TLhpIAppa.....................shshpplhphN..........slsssp.....lhsGQpLpls ....................YsV.pp.GD...o.Lhs....I.A.p.ca.........................ss..s...hp...p....l...h..p....h..N....................s..l...s....ssp..........lhsGQpLpl.................................................. 1 2348 4523 5983 +559 PF03466 LysR_substrate LysR substrate binding domain Bateman A anon Bateman A Family The structure of this domain is known and is similar to the periplasmic binding proteins [1]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.20 0.70 -10.87 0.70 -5.23 415 85941 2012-10-03 15:33:52 2003-04-07 12:59:11 15 39 4494 158 20277 62785 6990 202.80 15 67.81 CHANGED pst.pG..pL+lus..sshu...tthlsshlspFpp.p.aPt.....lplplptss..tplhchltpGplDlulhht.............tsssltsp.Lh.ppphsllss...ssa...............Lsp.......ttsl.shpcLtp..pshlhh..p..sss.......hpphhpphhppt......shp.........phthpssshpshhphltsGhGlullPphhht..p.h...tpsplh.hslss.........sht.tshhllhtpsp..h.ss......thpshhchltp.ths ......................................................................................t....pG..plplu.ss...s.s.hs.....tth...l.s...s..h...l.t.p..a....t..p.......p.....a.....P..p...................l..p...l....p.......l.....t...........t.......s...........s........................p..........h.....h......c..........h............l..........t.....p.....s.............p....h..D..l..u...l..t..h..tsh.................................tss.s.l...t....s...p....L....h....p........p.........p..h..h.hl...s..s........ss..a...........................................................l...sp..............ttsl...s....h.....p...c....L.....t.........p.......t..s....h....l....h.h.....p...t..sss.....................hpp..hh.p....p...h....h..p.pt...................sh..p.........................pht...h...p...s...s...s...h....p...s....h...h....p..h.l......t...s......G......h......G..........l......u..h...l..........P....p...........h........h.......h....t...........p..h..........t.p.....s.......p..........L.......h........h.s.l..st.........................tht...hs..h..h...l....h...h...p.....p.....p....p..........h....s.s.......thp.thhphlht...t....................................................................................................................................... 0 3804 9358 15032 +560 PF03816 LytR_cpsA_psr Cell envelope-related transcriptional attenuator domain TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.81 0.71 -4.30 37 6551 2009-09-11 05:59:27 2003-04-07 12:59:11 9 20 2068 19 1112 4741 444 152.40 32 38.41 CHANGED RoDohhllplssppcpsphlSIPRDohVpl............ss........p........hpKlssAhshG..................usphshcTlcphhu.lslDaYstlsapuhtpllDslG..GVcVssspshp.........sst.........................................hphpsGpppls.GcpALsasRhR+sss.G.....DhuRhcRQppllpulhpchts .....................................................................RoDohhlhslss..p.s..c..p..s..pllSlPRD.o.h.lp.Is...........................sh.............p.............tsKlNtAa.shG......................................Gsph.shcTl.cp.h........h...s.......l...s...I...-....aYsplshpuFtcllDslG...GVc.Vss.......sh.......s..hs...........pss...............................................hp..h.p.t.G.....p....p.....p.Ls..Gcp.ALs...asR...hR..........p............ss....s.....................G............Dh.........s....R.tp..RQppllpulhp+hh.s.......................................... 0 408 799 997 +561 PF02847 MA3 MA3 domain SMART anon Alignment kindly provided by SMART Family Domain in DAP-5, eIF4G, MA-3 and other proteins. Highly alpha-helical. May contain repeats and/or regions similar to MIF4G domains [1]. 20.70 20.70 20.70 20.70 20.60 19.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.14 0.71 -4.37 29 1720 2012-10-11 20:00:58 2003-04-07 12:59:11 12 37 326 24 1096 1714 25 110.00 21 15.93 CHANGED h++chttllp.EYh..tDhpEAspsl.pcLths..phctclV+hhlshsh-c....pptpchhuhLLpchhptshlsspphppGF.chhs.sh-DlslD...............lPpshphlupFlu+hltsshLs ....................................c+plhhhlp.phh........D.hpEAhppl....pc.L....ph......s.....p......hp.....t......c..............l................lph.hlptsh-c...........pphpchhuhL.lppL.s.....p..t......s..h....htppp..h.ppuFh.chh....c...ph...c..-...l.tlD....................................................hs.thp.l.uphhuchltsshl.......................................................... 1 344 582 853 +562 PF01454 MAGE MAGE family Bateman A, Wood V, Finn RD anon Prodom_3141 (release 99.1) Family The MAGE (melanoma antigen-encoding gene) family are expressed in a wide variety of tumours but not in normal cells, with the exception of the male germ cells, placenta, and, possibly, cells of the developing embryo. The cellular function of this family is unknown. This family also contains the yeast protein, Nse3. The Nse3 protein is part of the Smc5-6 complex [2-3]. Nse3 has been demonstrated to be important for meiosis[3]. 25.00 25.00 25.80 25.30 20.80 22.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.39 0.71 -4.97 47 1361 2009-01-15 18:05:59 2003-04-07 12:59:11 14 13 248 3 734 1407 0 168.80 36 45.07 CHANGED LVpalLhK.ptKp..PIp+u-hlctVl+ca.......+cpF..scIhpcAucpLchlFGhcLpEl-sp.............................................................s+sYlLlspLshs..................ssh..lsss...pshP..............................+sGLLhllLulIahpGNpssEpplWchLphhGlhss.c....cH.laG.-s+KLl.........op-hVppcYL.cY+.plssocPscaEFh.W..GPRAhsETSKhKV ....................................................................................LVpaLLhK.ptKp...PIp+u-ML..c..lhcca........cchF...PpIhp+AspplchlFGlcl+ElDsp..................................................................s+sYlLlspLshs..........................................ssh...lsss..pshP..........................................+sGLLhhlLulIFhp......G.......N......pusE...........pp.....lWchLp.t...h.G..lhsu..p...........cH.laG.-s+...+Ll........................................Tp-hV.ppp................YL.c...Y...............+....p.............V.............P..s....Sc..............P...........s..c..YEFl.W..GPRAhtEToKh+V.................................... 0 103 147 224 +563 PF00390 malic Malic enzyme, N-terminal domain Finn RD anon Prosite Domain \N 19.20 19.20 19.20 19.20 19.00 19.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.02 0.71 -4.70 48 7633 2009-01-15 18:05:59 2003-04-07 12:59:11 14 36 4079 97 1959 5725 2415 136.20 38 27.44 CHANGED ps+pEhlah+hlpsp...-.hPllYTPsVupsCppauc.hcpsp..hahohssht...h.t.h.th.......plhVlTDGstILGLGDlG.huGhslslGKhsLasshAGlcPt.slPlhLDsGssp.t.hts..YhG....R..s....thlDcFlcslpphasspshIpaEDhusspAFplLc+a+ .................................tc.phh.hp.ltsp.....c.LslsYo.PsVutsCpphsct........pp.s.............h.....hahsh.s..sh............................................................VhV.los..Gp..u...lLGLGslG..huuhs..............l........GKhs.....La.............p.....thuGl.c...s.....hsltlDst..............................s-chlpslp..th..sh............uhlph..EDhtsspsF.l.p+h+............................................................. 0 602 1170 1596 +564 PF03949 Malic_M malic_N; Malic enzyme, NAD binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 23.50 23.50 23.50 23.50 23.30 23.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.45 0.70 -4.83 66 6923 2012-10-10 17:06:42 2003-04-07 12:59:11 10 36 4103 83 1803 5218 2322 236.30 38 43.02 CHANGED .QGTAsVslAullsAl+lsucs...l......p-h+lVhhGAG..uAGlulschlhpt.....Gls....t...cplhhlDpcGllpcsR.ss..h......s.....shptt....aA+ppsphp.............hsLt-slcss..slllGsSt.ssGshsc-hlcpMup.........+PIIFALuNPss..EhpP--Ahphssutsl.ATG...........+o.hPsQsNNslsFP..GlhhGslsspAppIs-cMhhAAAcAlAshsspc..............phst.shllPshtchR.luhplAhAVuctAh ......................pGTAlVshAullsAL+....l.s....u+p..............l..............p-...h+lVh.GAG..uAGlu...hschllsh.........G..lp............hcplhhsDp..................p.....Gllhcs..R....s.s...L........s......thppt...........aApp.s.st.t.................................toLt-slcsu..Dlhl........Gl..........S....s....ss...s......hopEhl+pMup..................cPIIFALuNPs.....s..EhhP.cp.Ahphss...sts.hA....TG..............................................R..o.........t....a..PsQsN..N....sLsFP....GIhhGsLss.tA.pp.Is...--Mhh.AA..u.c...........AlA.phsp...t..p..........................ph.u.....s...hllPpshD......R..l.hplA.AVAcsA..................................................... 0 555 1086 1472 +565 PF00629 MAM MAM domain Bateman A anon Prosite Domain An extracellular domain found in many receptors. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.29 0.71 -4.46 92 3415 2012-10-03 19:46:52 2003-04-07 12:59:11 18 308 158 2 1950 3145 308 149.40 23 28.95 CHANGED CsF...Eps.....hC.uappsps...ssh..cWpp.hps.ss.......suPttD+ohss........Ga.ahhlpssts....ttGp..........pA.pLhS.hh........stss..........stC.lp.FaYah.hGpsh....Gs.Lplhlcpp...........tsttph.....lWph.sGspu......spWppsplslss....sppapllFEu.htus..stt.GslAlDDlplp....ps....tCsp .........................................................................CsF.....-ps......hC..s...ap....p.......ttt...............ssh.....pWph...ps...s......................sP..hDpohts.........................Gh.a..h..h..lpssts......t.up......................pA..pL.h.Sshh.....stss.......................stC...lp..Fa.Y.ah.....h.....Gtsh......................Gs...L..p..l.hlptp..............................tth.tp.h.....................lW....ph.....s..u....s.p.u...................stW..p..p.s.p.ls.....lss.......sp.....ta............p..l..lF...c..u...h.hst................s.t...us..lA.lDDlplt..t.....C..t........................................ 0 938 1052 1428 +566 PF03999 MAP65_ASE1 Microtubule associated protein (MAP65/ASE1 family) Bateman A, Wood V anon Pfam-B_12512 (release 7.3) Family \N 21.80 21.80 22.50 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 619 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -13.08 0.70 -5.92 6 605 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 273 3 390 587 4 449.20 20 74.33 CHANGED hpssplsclcpcspsshpctccEs..sptppullppIustcsElssL...........sphLc.......................................tthshhpp.cshshh....+Lccpl-pL....................RcphspRhsElpELhcQ.ppLCppLGp.sLsh.........h-sssLs.....Ech-phRp+lspLc-p+spRLc-hssl+psIpphhchLspp.t....sshEpclhs............ss+slo.EshspLpphhcphpsp+pcpt-+lcslpsplppLWsRLphosE.tp...............hp-Aoshoppohcl...lccElpRLptlKppplKphIEphRlEIpEhWDphhhSpEpR+p..Foshap-hhs........EpLLEtaEsElccLKphhsspKcIh-LlpcatpLhcchctLEtpusDsNRas.sRG....Gp..L.LKEEKpRKplsp+LPKlpppLptclpsaEscpspsFLhcGpslLEhhu.......spWEc+RppKppupspKKsssppss.hcss.ts..sssPpTPssp+sstslsssTsuht+s..hppsppptpsossstpoGsh+p+ssspRh.ssssp.suAppuptssh.RsttssssuptssSssptp.pos.sshhhspshshshcpssspsphpssspsspptcslppspsusssps-soostpsu ......................................................................................................................................................................................................................t....................................................h..............................................................................................................................l....hp..h...............................................................tt.ttpRhpph...tt...l...tt...p....p....h......pt..............................................................h.t...t.s.ls............pplpphptplppl....ppchp.pRhpph..phh...pplhph...ht...Lsh.s.............p.ht...t......h..t.............................................................tt.pths.l.sp....pslt.pLpphh...........ppLpt...p+ppp.pp.....h...........pplttpl.pLW.phh..p...h..s...tp-pp...F........................................h.tt.s.t....s....hs....h...phlpt...........hp.ElpRLppLKtp..ph+ph...l.p.......h+..-lpphhcth.a......h....s...-...p...ttt.............h.....h...h....h.....-.s.hs....................................p...p...L...L.p.h-.tpl.tclcp.hts++plhchlp+a.phhpc.p.LEp....hs.pDss...........Rat...sRG........up........L..L+ct.cpt+tlh...pc.lPt..l.ppLt.tp.lptWE....p....cp.......s.p...s.......F.h.h.pG.....phlphlp.......................h...p..t.p...c............p.+.......p....p........+...p............p.+...p........h....t....t.t..............t...t...t.p.............t..t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tt.................................................................................................................................................................. 0 110 216 309 +567 PF00917 MATH MATH domain Bateman A anon Pfam-B_1602 (release 3.0) Domain This motif has been called the Meprin And TRAF-Homology (MATH) domain. This domain is hugely expanded in the nematode C. elegans [3]. 21.20 10.00 21.20 10.00 21.10 9.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.59 0.71 -3.86 52 3261 2012-10-02 00:06:50 2003-04-07 12:59:11 21 124 446 92 2107 3079 7 124.00 17 25.90 CHANGED lcNhSphp.......pspphhs.shpp+ashsWp................lplhppss.......ahulaLpCtpt..ps..........hpWslpschplpllsssGpp.........hppphp.....psFsps.................................hshGhsp.....alpacplppch...lhcDolhlcupVcI ................................................................................ht.h...............thh..s..s.h...h.t..h..tG...h.p...Wp..........................................lpla.pGss................pssalS.l.alpl..h.ps.t.ps.........................tWsh.p...s..p...h...p..lpl..ls..p..p..spp.................ptp.p...........ppF.s.pt........................................................................................psh.G.atp...........Fhp..hp.pl....p.ptt.......a..l....h..s..D.s.lhlpspl.......................................................................................... 0 726 1040 1659 +568 PF01429 MBD Methyl-CpG binding domain Bateman A anon Bateman A Domain The Methyl-CpG binding domain (MBD) binds to DNA that contains one or more symmetrically methylated CpGs [1]. DNA methylation in animals is associated with alterations in chromatin structure and silencing of gene expression. MBD has negligible non-specific affinity for DNA. In vitro foot-printing with MeCP2 showed the MBD can protect a 12 nucleotide region surrounding a methyl CpG pair [1]. MBDs are found in several Methyl-CpG binding proteins and also DNA demethylase [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.56 0.72 -4.45 16 1074 2012-10-03 08:51:45 2003-04-07 12:59:11 14 66 135 6 573 1046 14 75.90 25 10.19 CHANGED hpppp.tcssL.tGW+Rchh.RpsGpph....................s+hDlhYhu...........P.sG++h+ohs-lhpYLppss...............................................hphppFsFssthhhsp ............................................................................................t.......p.sL..GW.pRchh.....R...p..t...Gt.ph.......................................tch.-lhYhu..................................P...sG+.chRoh..s-l....hcY...Ltp.ss...............................................................................................h..ppFsFpsh.....hht........................................................................................ 0 130 240 380 +569 PF03062 MBOAT MBOAT, membrane-bound O-acyltransferase family Bateman A anon Pfam-B_2359 (release 6.4) Family The MBOAT (membrane bound O-acyl transferase) family of membrane proteins contains a variety of acyltransferase enzymes. A conserved histidine has been suggested to be the active site residue [1]. 20.90 20.90 20.90 21.00 20.60 20.80 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.26 0.70 -5.31 41 4677 2012-10-01 20:09:06 2003-04-07 12:59:11 14 27 2309 0 1785 4135 489 287.90 18 62.65 CHANGED hhthhhhthphhslhhhhslshhshphluhhhphtcs..................................................................................hhpthshhcahsYlhahs...................shhsGPhhpapcahpthc.t..hphthh..................t.hthulthlhhshhhhhhhthhlshhhhthlphhthhp......hshhhhhhhhhhhthhhha...th.hhshuhshhhGlt....................................ts.sh.psh.tspshpcahcpWphslspWLtcYlYhphh...........hhhtphhuhhhsahlsulWHGhphtallashhpslhlhspphhtphhphhthtttttththhthhhthhhhhhhshhhshhhh ..............................................................................................................................................................................................................................hhh.............thhh.hulsh....h....o...h......p.l.shh...h..-hhc..t.............................................................................................................................................................................................................................................hhtphsh.hpaht.al.haaP...................................................................plhu..GPlhcapc.hh.pp.h..p....p....h.p...ph..........................................h.t.t..ul.t..h.l....h..h..G.h..h..h.c.hl..l.......u.........h......l.s....h...h..h......h..t...h...h...p..t..........t...........................................................................................h.......h....h....h.h....sh...h...ta.s..h........l.....a..h.cFsu....a..s...h...A.l.uhuh..hhGhp..................................................................................s..NF......psa.h.u.psl..p-F...W.p.+WHholspWh.+..cY.l.Yh.hsh................................................pp.thh.t...p..h....h....s.....h.......hl.sah..lsul.WH.....G..h..........s.h.t..a....l............l..........a.....G..h...........h....p..u.h.hh..h....h...tp..h....h...h....p........h....h....t......h................h...................t....t...h....h...h.....h.....h..........h...h..h..h...h.......h.h.hh....h.......h...................................................................... 0 630 1009 1447 +570 PF02820 MBT mbt; mbt repeat Bateman A anon Pfam-B_526 (Release 6.2) Family The function of this repeat is unknown, but is found in a number of nuclear proteins such as drosophila sex comb on midleg protein Swiss:Q9VHA0. The repeat is found in up to four copies as in Swiss:Q9UHJ3. The repeat contains a completely conserved glutamate at its amino terminus that may be important for function. 20.20 20.20 20.30 20.60 20.10 19.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.19 0.72 -4.06 18 2104 2012-10-02 16:56:36 2003-04-07 12:59:11 13 50 103 159 1180 1975 0 72.60 33 29.03 CHANGED MKLEAlD.pssphlClATVspVhGpc.Lpl+aDGacsph....DaWsph-.SsDIaPlGWC-tsuctLpsP.shtppth ..............................................................MKLEul..D......p...s..s....p...h.h.p..lAT.V..s.cl.......h....upc...lhl.....+.....a..........D.G....h..c...ssh....................DaW...s.phs.S.scIaPVGWCpps..G..p....Lp.PP.sh.....h................... 0 218 298 671 +571 PF02470 MCE mce; mce related protein Bateman A anon Pfam-B_475 (release 5.4) Family This family of proteins contains the mce (mammalian cell entry) proteins from Mycobacterium tuberculosis. The archetype (Rv0169), was isolated as being necessary for colonisation of, and survival within, the macrophage [1]. This family contains proteins of unknown function from other bacteria. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.65 0.72 -3.98 125 12284 2009-01-15 18:05:59 2003-04-07 12:59:11 15 19 2490 0 2643 8745 850 86.00 22 29.09 CHANGED sshplpsthssu.sGLpsG.ssVphpGlpVGpVps..l.............p.sspp......lplphplp...............................................psh............................hlspsspuplpst.sll.Gt........................................palslpss .................t...plphphs.su..sGLp.sG..ssVphp..GlpVGpVpsl........t.............h........ssss.pp....................lplph.plp......................................................sph.................................hls.p.ss.p...u.slps...sll..Gt..........taltlp.......................................................................................................................................................................... 0 540 1469 2139 +572 PF05053 Menin Menin Moxon SJ anon Pfam-B_5848 (release 7.7) Family MEN1, the gene responsible for multiple endocrine neoplasia type 1, is a tumour suppressor gene that encodes a protein called Menin which may be an atypical GTPase stimulated by nm23[1]. 19.20 19.20 19.70 19.20 18.70 18.40 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.08 0.70 -6.35 3 147 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 84 8 78 156 0 425.50 46 94.18 CHANGED MGl+usQKoLFPLRSIDDVVpLFcuELsSs.EPDLsLLSLVLGFVEHFL.........AVNRVlPVNVP-loFpPosusDPsuGNSs.FPVV-LsLIAALYcRFpAQIRGAVDlSpYPKPuGsSSRELVKKVSDVIWNSLSRSYFKDRAHIQSLFSaIT.....GTKLDSSGVAFAVVAACQlLGLKDVHLALSEDHAWVVFGpsGEETAEVTWHGKGNEDRRGQTVsAGIsERSWLYLKGSYM+C-RsMEVAFMVCAINPSLDLHTDSlELlQLQQ+LLWLLYDLGHLERYPMALGTLADLEElEPTsG+.SPLoLY+KAIESAKTYYRNEHIYPYMYLAGYHsRpRcVREALuAWA-AAoVIQDYNY.CREDEEIYKEFFDIANDVIPsLLKEsASutEAc..-Et.tEpp.tptuthSALQDPECFAHLLRFYDGICKWEEGSPTPVLHVGWATaLVQSLoRFDuQIRQKVsIlo+E..................sEAsEu-EsuGEEAREG.RRRGPRRESKs......................................-EPuGutSPNPcLPApNpNssospsuss.GucRKsuAoTsGsAssscNGSoosVPlPouSpP..................PphptG..................PVlTFaSEKMKGMKELLlAsKlNSSAIKLQLTAQSQVQMK...+QKsSAouDYTLSFlKRsRKsL ..........................................................................................ttthFPlpshssllplFtttLt......p...EPDLsLLSlllGhlE..L..................s.stshsss.....sth....t....................sh...FPshphphltuLYt+F.s....hls..h....s.h...t....tt.uoREllKKVSDVIWNSL...RS.aKDR.AHlQsLaSalo..........GsKLDs.GVAhAVVuuCQhLGh+........DV+LAlSEDHAWVlFG...p.t.pThEV.TWHG..KGsED+RGQslt.........sGl.tpt.SWLYltG..hhCsR.tMEVAhhVsAlNsSl.......shpo....DshElhpLQQpLLWlLYDhGaLc+YPMALGsLu-LE-...ht..........o...............s.....................p.ss...platcultuuppaYp.spHlYPYhY.usa.h.R......h.hp-....Ah......tsWAps...upVhp.YsY.sR-DtEIYKEhh-lAN-lIP.hh+.ts....p........................................phLppspsau.lLpFYDGIC.WEEsu.TPlLHl.sWAp.Lltsls+Fp.plRtpl.l....p..........................t..t...t.................t.........ps....pp.................................................................................................................................................................................................................................................................h...........................................sh..l.h.StKMtsh.thl..s.t.+lNstA.htL.lTAQS.sp................................h........................................................................................................ 0 32 35 60 +573 PF00149 Metallophos STphosphatase; Calcineurin-like phosphoesterase Dlakic M anon Dlakic M Domain This family includes a diverse range of phosphoesterases [1], including protein phosphoserine phosphatases, nucleotidases, sphingomyelin phosphodiesterases and 2'-3' cAMP phosphodiesterases as well as nucleases such as bacterial SbcD Swiss:P13457 or yeast MRE11 Swiss:P32829. The most conserved regions in this superfamily centre around the metal chelating residues. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.44 0.71 -4.92 324 31320 2012-10-02 19:15:56 2003-04-07 12:59:11 23 436 5141 215 11462 33578 5742 205.20 13 48.44 CHANGED hplhhhuDlHsphtph.............................................hthhphttptpsshl.lh.sGDhsspsthshtthhhh.....................................................................hhthphst.hhhlhGNH-htthsphhhhh.th............................................................................................................................hhht.thhthhhphhshh.htthtshpllhsHssh.stht...........................................................................................thhtthhtptphchlltGHpHt. ..................................................................................................................................................lhhhuDl.H....h.p..htth.......................................................................................................thh.h...p...h....t...t.....p.........t...s.......s.....h........l....l.........h....s..........G.D....h......h......s........p....s...s.........s....t.s.h.thh...........................................................................................................................................................................hhhh..p..h......t............h..h.....h.....l.h...G..NH-....h.t....h.......h....h.....t....h....h.....t..........h.......t.....t...h...........................................................................................................................................................................................................................................t....h.....h.....t......h......t..................t.............h......h............h......h................h......h...............h......t.........t........................p......h........h......l...........h.....h............s.......H......t...s...h......s.t....................................................................................................................................................................................................................h..........t....................t.....h.......p..h....h...h.h.uH.h................................................................................................................................................................................................................................................................................ 0 4014 6855 9602 +574 PF01420 Methylase_S Type I restriction modification DNA specificity domain Bateman A anon Bateman A Domain This domain is also known as the target recognition domain (TRD). Restriction-modification (R-M) systems protect a bacterial cell against invasion of foreign DNA by endonucleolytic cleavage of DNA that lacks a site specific modification. The host genome is protected from cleavage by methylation of specific nucleotides in the target sites. In type I systems, both restriction and modification activities are present in one heteromeric enzyme complex composed of one DNA specificity subunit (this family), two modification (M) subunits and two restriction (R) subunits [2]. 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.11 0.71 -4.27 65 11631 2012-10-02 00:09:28 2003-04-07 12:59:11 14 41 3019 14 2039 9932 1120 165.50 14 72.39 CHANGED s.cWc.phpLs-lhcl....tpGpt.tpp..phtpsGph...........Palsss.thps...............thhsttpphhhp.tsslhlstp........u....shGpshhtst....t.....h..hssp..phtslpsh..ph.h....chhhhalt....thhp......phpp........thussh.plstsplp..phcl.lP.......sh.pp......QptIschlcphtp...plpp.phhpplcp .................................................................................................................................h...hpl..t...p....l.h..p.h...........t...u..t...t.......p.............t..t...t.s..t..h.....................................shl.phs.....t.............................th.....t............p.....p.....h..........h...h....p.....t.....ss....l.l..hstt.......................u..sh.G..p....s...t..h...h..pt.............p...............h.....h.h.s.p....t.h.hhl...psh.....ph...............p.a.l..hhh.lp.......th.hp............ph.p.ph......................sp.u..s..s..h.....p..l...s..t...s..p...lp.....p.h..l.s..l.P..............sh..pE.................Q....pc.Is....p....h....l....p....p....h....tp........th.t..........t.................................................. 3 744 1450 1786 +575 PF01795 Methyltransf_5 DUF36; UPF0117; MraW methylase family Ayala JA, Bashton M, Bateman A anon Pfam-B_1376 (release 4.2) Family Members of this family are probably SAM dependent methyltransferases based on Swiss:P18595 [1]. This family appears to be related to Pfam:PF01596. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.69 0.70 -5.15 16 4779 2012-10-10 17:06:42 2003-04-07 12:59:11 14 11 4534 7 1122 4036 3223 300.00 42 95.42 CHANGED phtHhoVLLcEslchLp...........l+ssG...lYlDsTlGtGGHoctlLppLsp.t+LhulDpD.pAlttAppcLp.a..ct+hsll+ss..Fsplhphhtct...............hltclDGILhDLGVSS.QLDps-RGFSFpcD.uPLDMRMDpossl.TAtEllNshs.p-LscIhhcYGEE+auKpIA+uIhctRc......ppPhpTTt-Ls-llppshPsht.....++t.......ttHPAs+sFQAlRIhVNcELppLccuLppAhchLsssG.RluVIoFHSLEDRIVKphF+-hsph..p.P.uLPhh.pthp.......hlT+KPIhPSpcElcpNsRSRSAKLRlsEKh ....................................t..pHhsVLLpEsl-.s.Ls...........................................................l+.ss.G..........lYlDuThGtG.GHSchl...L.p.p..L.sp...tG......+......L..............luhDp..D....pA...lttu.....p....p....p......l.p..h...........ssRhshl..+us....Fs.p.l.....t....ph..lpch.......................................................tl..tclDGILhD..L..GVSS.Q......LDp......sER..GFSa..p..p.D..uP.L.DMRM.............D...............t.................s...........p........u.....h....oAt-llNphstp-LsclhhpYG..EE..+...a..u+pIA+t.Il.p.t.R.p......p...p....P.l..po....T.t-Ls-llp.p...u.....h...Pt...tt...................+.c..................t.t.HPA.p.RsF.QAlRItVNsELptlcpu.L.p....p.A.h.c.h.L...............ts........G.G....RluVIoFHSLEDRlVKphh........+........c...t..........u.........p.s............................p.s...P........t......s.......l.......P...hh..........p.p.h...p.sh..........................hphl.s.+...K......s...l.......hPoccElppNsRuRSA+LRlAE+.h.................................................................................. 0 409 743 952 +576 PF05060 MGAT2 N-acetylglucosaminyltransferase II (MGAT2) Moxon SJ anon Pfam-B_6001 (release 7.7) Family UDP-N-acetyl-D-glucosamine:alpha-6-D-mannoside beta-1,2-N- acetylglucosaminyltransferase II (EC 2.4.1.143) (GnT II/MGAT2) is a Golgi resident enzyme that catalyses an essential step in the biosynthetic pathway leading from high mannose to complex N-linked oligosaccharides [1]. Mutations in the MGAT2 gene lead to congenital disorder of glycosylation (CDG IIa). CDG IIa patients have an increased bleeding tendency, unrelated to coagulation factors [2]. 27.00 27.00 27.40 27.20 23.10 22.60 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.42 0.70 -5.54 10 200 2012-10-03 05:28:31 2003-04-07 12:59:11 9 4 105 0 131 210 0 274.60 35 75.24 CHANGED sloLptRshl.shN..p....l.N.sDlaspLsp.........scllIVlQVHNRP...pYL+lLl-SLu+s..+GIscsLLIhSHDhYssElNcllpuIcFCpVhQIFaPYSlplaPssFPGssPsDC.pphcK-cAhcppC.Nuc.PDpYGpYRpu+hsplKHHWWWphNhVWDtL.ccs+sasGalLFlEEDHYlhPDhY+sLphlhshK.phCPsCtslsLGshsh.ops.uh-sthsEVhsahuscHNhGhAhsRslWcKI+sCucpFCoaDDYNWDWTL.tlshsChsp.l+sL..s+uPR..lhHh......G-C.GlHp....sssCpss.ssstplccls+..slpsphaPpslploc+ps.uhpush+sNGGWGDhRDRcLC .............................................h............hN....thhs..........................phhlVl.VHsR....pYL+hLl.SLtps..psIppsLllhSHDha..-hspll.t.s.lsFC.....V.hQlFaPaShphaPppFPG.sPpDC....t+ptAh.htC.su...PD.aGpaRps.ch.sphKHHWWWKhp........hVapt.....l...c.hp.asGhllFlEEDHalh.Dh.hhhhphh.php......p.CspCt.....hlsLGsh........h..sh.hs.tpps-hhsahSs........a..NhGhAhsRssapplh.t..psppFCpaDDYNWDhoL.hls..t.s...h..............p.....................hh......sh...h.psPR..lhHh........Gc.C....GhHp......ppsCtsp..t..tp.l....pph.p..p.pt.hh..........tl...hl...t.....t.......t........pt.GGWGDh.RD+pLC.......................................................................................................... 0 44 65 105 +577 PF02142 MGS Methylglyoxal_synth; MGS-like domain Bateman A, Mian N anon Pfam-B_220 (Release 4.4) Domain This domain composes the whole protein of methylglyoxal synthetase and the domain is also found in Carbamoyl phosphate synthetase (CPS) where it forms a regulatory domain that binds to the allosteric effector ornithine. This family also includes inosicase. The known structures in this family show a common phosphate binding site [1]. 22.90 22.90 23.00 23.10 22.80 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.10 0.72 -4.05 164 10875 2009-01-15 18:05:59 2003-04-07 12:59:11 17 65 4646 116 2605 7914 4421 100.00 27 14.06 CHANGED sllshA+tL.tph.G..acllATuGTu.chLp.cs..Gls.hplsphstps.h.s......................hhchlpptpIp...lVlssh.shptsht.........-shs...lccss.phcIshsT ............................llphAptL.t.ph..G....aclhuTu.GTu....p.hL.....p....c.....s...........Gl.............s..........hpls..p...h..s.t.t..s......h.hsuc................................................................................thhshlpp...t.....c...Is.........lVlssh..hshptsst................-stsl.c.css.phcIsssT............................... 0 852 1659 2184 +578 PF01769 MgtE Divalent cation transporter Bateman A anon Bateman A Family This region is the integral membrane part of the eubacterial MgtE family of magnesium transporters. Related regions are found also in archaebacterial and eukaryotic proteins. All the archaebacterial and eukaryotic examples have two copies of the region. This suggests that the eubacterial examples may act as dimers. Members of this family probably transport Mg2+ or other divalent cations into the cell. The alignment contains two highly conserved aspartates that may be involved in cation binding (Bateman A unpubl.) 21.50 21.50 22.00 22.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.48 0.71 -3.88 17 3706 2009-01-15 18:05:59 2003-04-07 12:59:11 11 18 2452 6 1251 2988 1805 127.60 28 31.59 CHANGED hlPhlhGhsGNhGsphuohlsptLslGplp........tph.plhhcphthshh....luhlhu..shshlhsshh.....sssshlshsVssslhhshhlus...husllslhhc+htlDPs.huuPllTolsDlholhlh.hhlu .......................................................hhPllsuhGGNs.....GsQsholllR....u.lA.....h....s..p.ls.......................ps.h.h.p.l.l.h+..E.l.s.....l.u......hl........................hGll.hu...........hh..h...h..l..h..s...h..h....h..........................................................h....t.....s.............h..h....l....u.h..s.....l....u.....h....u.h...h...s.s.l....l.husl......hGshl...P.hl..hc.+h.slDP.AlsouPhl.TTlsDlh..Ghhla.hsl.................................... 2 418 721 996 +579 PF00993 MHC_II_alpha Class II histocompatibility antigen, alpha domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1288 (release 3.0) Domain \N 20.60 20.60 20.80 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.69 0.72 -4.41 51 3084 2012-10-03 22:02:01 2003-04-07 12:59:11 15 5 277 142 138 2606 0 76.50 45 55.24 CHANGED DHlutY.hthhQshsssGpaha-FDG-EhFYVDLc+KEsVWpLPpFuchhoF-...sQsuLpsIussKtNLslhhKtsNsTssss ..........................hthhts..s.s....s....G...pasa-FDGDEhFYVDlpKKE.......TV.WpLP.F.u.catoF-.....sQGALsNlAssKtNLslhhKpSspss................................. 0 7 18 38 +580 PF00969 MHC_II_beta Class II histocompatibility antigen, beta domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_331 (release 3.0) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.49 0.72 -3.99 121 17595 2012-10-03 22:02:01 2003-04-07 12:59:11 14 5 639 123 215 16212 0 69.80 52 67.66 CHANGED u-CaF.pNGTc+VRalsRahYN+EEa...lRFDSDVGcahAlTELG.....c.sAEhWNSpc-hLEppRAtVDThCRHNYtlhp ..................cChF...N..G..T-RVRalcRYhYNpEEa....lRFDS..D..V..GcahA.s.TELG......cssAEa........WNSQp.-lLEppRAtVDs..hCRHNYtl......................... 0 23 41 74 +581 PF02816 Alpha_kinase MHCK_EF2_kinase; Alpha-kinase family Egelhoff T, Ryazanov A, Bateman A anon Egelhoff T Family This family is a novel family of eukaryotic protein kinase catalytic domains, which have no detectable similarity to conventional kinases. The family contains myosin heavy chain kinases [1,2] and Elongation Factor-2 kinase and a bifunctional ion channel [3]. This family is known as the alpha-kinase family [4]. The structure of the kinase domain [5] revealed unexpected similarity to eukaryotic protein kinases in the catalytic core as well as to metabolic enzymes with ATP-grasp domains. 21.30 21.30 21.50 21.50 20.90 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.47 0.71 -4.63 63 765 2009-01-15 18:05:59 2003-04-07 12:59:11 13 29 138 16 532 775 15 177.80 24 20.05 CHANGED FupGsh+tsathh.h....................................ttsphhVsKhhpp.....t.........................p..............................c.hhp-spsphhupphscpFNpph....tts..pp...lp..............ahshh.lhchpsps........................hhhsEta......lpG....capKYNsNt..Gah..............tppsthtphhpAFSHFTYphSptphllsDlQGls...s.......hLTDPtIpot..s.sph....h..u..sshGpcGltpFht.pH.pCsphC ......................................................................................................................................tGthR.shtsh.h....................................p..sp.hlhK.hh...........................................h....................................p.hhhp-..h.p.ph.......hu.phhsptFsph..............t.s.t......p....................................hl..hh..lhp.h.psp..........................hhhlEph......l..pG........ca.h.Kas..sNs..uhh...................................ttssp..h..tphhpAFoHaoYphops....phlVsDlQ......GVs....p........................hlTDPplhshp..tph................h......G..sNhG..pulptFht...pH..pCNphC........................................................ 0 305 370 431 +582 PF02854 MIF4G MIF4G domain SMART anon Alignment kindly provided by SMART Family MIF4G is named after Middle domain of eukaryotic initiation factor 4G (eIF4G). Also occurs in NMD2p and CBP80. The domain is rich in alpha-helices and may contain multiple alpha-helical repeats. In eIF4G, this domain binds eIF4A, eIF3, RNA and DNA [1]. 21.00 21.00 21.10 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.33 0.70 -4.92 41 2682 2012-10-11 20:00:58 2003-04-07 12:59:11 14 57 339 22 1754 2678 51 203.50 17 26.69 CHANGED t+plpulls+ls....pphpphhpplhphhhpp........phhppllphl...hppshtps......shhshhApLsstlspphs....................pFsphllsphhppFpp...................................ppppptsppcthshl+FluELhphph...lspt.hlhp......hltpllpphsc..................pplchlhplLpssGphLp.....................................................ppsppthcphhpphpphhtp................................tphs.clcahlpslhpl+psp ......................................................h...hpshls+ls.....pphpplhpp.l.hph..hhpp..........................thhptllphl....hpts...ht..ps............phs...haApl.ss.t.ltpths..................................................................pht.p..hl...ls.ph..c.pFcp....................................................................t.p.t.c..t..t..s....p.....p.......c.t.hs..l+Flu-L.h..ph.ph.........ls.p........t...lhhp..........hl.t.p.....L.l..pp.hpc.............................................psl.-hlspl...Lpss.....Gt.Lp...............................................................................pp.s.p...t..t.......hc...phhp......phpphhpp...........................................tphs.c...hc.....ahlpshhplpt......................................................................................................................... 1 585 916 1394 +583 PF02815 MIR MIR domain Bateman A anon Ponting CP (EMBL archive) Domain The MIR (protein mannosyltransferase, IP3R and RyR) domain is a domain that may have a ligand transferase function [1]. 24.70 24.70 24.90 24.80 24.40 24.60 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.31 0.71 -4.76 30 1582 2012-10-02 19:42:32 2003-04-07 12:59:11 14 76 302 11 968 1488 5 172.80 24 10.35 CHANGED GaL+utcslh........htucQpphhsh..........sch-pssc........................WclEs.pps.h...........puutscW..........GshhRl+HlsTG+YLtuc-cp..............................sslscp.chpp-sosYshpsh.ts............................sDt....lplhcpcsssshusspl+stcohhRLpHhtTusaLpupsscl..sthG.tppEpsshpp.....tssh.hhEpc-shs.Ls ......................................................hL+utshhh..........htu.p.Qpp.s.h.sh.......................................s.pt.D.p.ssp....................................Wh.l.c.s.....p..p..s.s.h..............pust.h.ph........................GshhRLpHlsTu.+hLtuccst..........................................sslspp....ptE.so..sa..s.h..psh...ts..............................................................................pDh......hpl.h......phc..s.....p.....p............t..........t...s....p...l..ps.hso.hhRlpHh..s.os...saLpspshpl....sphu.tptEhs...shtp........sph.................................................................................. 0 275 441 727 +584 PF04212 MIT MIT (microtubule interacting and transport) domain Bateman A anon Crosby A, Patel H Domain The MIT domain forms an asymmetric three-helix bundle [3] and binds ESCRT-III (endosomal sorting complexes required for transport) substrates [2]. 28.30 28.30 28.30 28.30 28.20 28.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -8.98 0.72 -4.09 82 1334 2009-01-15 18:05:59 2003-04-07 12:59:11 13 33 342 20 840 1237 38 69.40 27 14.39 CHANGED hlppAhchlppAlctDp...tu.......papp.AlphYppulchhhpulphc......p.sspp+ph..l+pKhppYlsRAEpl+phl ........................hpcAhphlppAl.ctDp.......ts.......pacc.AhphY.ppul-hhlp.....ulph-................................p.sspp+ct.....l+pKhppYLsRAEpl+ph............................ 0 236 365 592 +585 PF00153 Mito_carr mito_carr; Mitochondrial carrier protein Sonnhammer ELL anon Prosite Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.00 0.72 -4.28 188 37736 2009-01-15 18:05:59 2003-04-07 12:59:11 22 243 698 9 25106 35824 767 94.40 21 75.41 CHANGED pphshhtphhuGuhAGsh.utssstPlDhlKsRlQ................................tts.tpa.puhhcs...........hpplh.+p-G........h.tuLa+GhsssllthsstsulpFs....sY-thKphhhptt .......................................................................t....h.phh.uG...u.h.u.....G....s....h.....u.......s.........h...........h..........s..........h..P....h........-..........s...l..K..s.....R.hQ.....................................................................................t.tt..tph....p....u......h...h....s.s......................................................h..p.p..l.h...+..p....-......G........................................h...tu....l....a.....+..........G...h.s........s..s.......l..h...p......h......s..s..h........s.......u.....l....has....sY-hh+phh....t............................................................................ 0 8054 13500 20193 +586 PF03637 Mob1_phocein Mob1/phocein family Bateman A anon Pfam-B_1830 (release 7.0) Family Mob1 is an essential Saccharomyces cerevisiae protein, identified from a two-hybrid screen, that binds Mps1p, a protein kinase essential for spindle pole body duplication and mitotic checkpoint regulation. Mob1 contains no known structural motifs; however MOB1 is a member of a conserved gene family and shares sequence similarity with a nonessential yeast gene, MOB2. Mob1 is a phosphoprotein in vivo and a substrate for the Mps1p kinase in vitro. Conditional alleles of MOB1 cause a late nuclear division arrest at restrictive temperature [1]. This family also includes phocein Swiss:Q9QYW3, a rat protein that by yeast two hybrid interacts with striatin [2]. 21.90 21.90 23.80 22.00 21.10 21.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -10.99 0.71 -4.60 12 1280 2009-09-10 23:30:30 2003-04-07 12:59:11 12 11 310 3 872 1169 11 157.60 35 62.79 CHANGED pphspsoLsuu..slpphVpLPpGpDhs-WlAhpsh-FFsplNhlYGolsEaCT.poCPpMousspa-YlWtD..phtK.PsphsAspYlchlhsWhpsplsscplFPo+sul.FPpsFht.lsppIhpRlFRlauHlYh+HFcplhplphEsHlNspFpHFhhFspEFsLlcp+-htsLp .............................................hhptsltps..slcphVpLPp.G..Dhs-WlAhp...................s.....h........-....Fapp...lN..h.l.a.u..sl.......s........-........h........C....o.p..oC..PhMs...As...s.................p.a................pYhWt-...................pht.+....Ph.ph.........sAs......cYhchlhsWlps.lssc...pl.FP........o.+.......h........u.......h...........s..F......P.....p...s........Fhp......h....sp...pIh..+....RLFRlauHlYhpHa.p...hhp..lt.EsH.....LNopFpHF.hhFhpcasLlstc-htsh..................................................................... 0 273 432 669 +587 PF00994 MoCF_biosynth Probable molybdopterin binding domain Bateman A anon Pfam-B_1258 (release 3.0) Domain This domain is found a variety of proteins involved in biosynthesis of molybdopterin cofactor. The domain is presumed to bind molybdopterin. The structure of this domain is known, and it forms an alpha/beta structure. In the known structure of Gephyrin this domain mediates trimerisation [1]. 25.10 25.10 25.10 25.20 24.90 25.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.80 0.71 -4.79 149 13076 2009-01-15 18:05:59 2003-04-07 12:59:11 19 66 4239 129 3598 9529 4365 147.50 23 44.05 CHANGED ullosGsElh...............tGpl..........hDsNu.hlsshlp..p.....hGh.plhths.llsD-.ptlppsl......htt.pp...DlllsoGGsusupcDhohc.............sl.tphhs........hhctlshp.st.................................................shh...hush...stt........s....lh.sLPGsPs...ushsshphhl..............PhLtp .................................................llosGcElh............................tGph...............hDssu....hl...tphlp...p.............hG...h...p....l...h.....t..hs....l...ls...D.c........p.tl.p..psl..............pps.h.pps.............Dll.lo....oGG.s.....u...s.s...p.......c.......D.hThc........................................sl..t....ph.h...s..p......................h...hp.pluhp...tt.........................................................................hush..h..h.spt........................t.s...lh..sLP.G..sPs.ust.s.s.hpthl....Phl..t...................................................................................................................... 0 1154 2293 3059 +588 PF03454 MoeA_C MoeA C-terminal region (domain IV) Bateman A anon Bateman A Domain This domain is found in proteins involved in biosynthesis of molybdopterin cofactor however the exact molecular function of this domain is uncertain. The structure of this domain is known [1] and forms an incomplete beta barrel. 20.40 20.40 20.40 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.06 0.72 -4.02 47 4409 2009-01-15 18:05:59 2003-04-07 12:59:11 10 31 3053 40 1367 3598 1140 71.70 25 16.06 CHANGED puhlspshtsstsRpcalRshlp.....t...uphhspPht...tpsSuhlpolspAsuhlhlspsspt...lpsG-pVpVhhh ..................................uhhsps.h..p.p.s.s..s..RpcFhRutlp................tts.................Gp..h.h..lp.s...s.u........tps..S..u...hls.ohupANshlhls..tspss...........lpsG-hVplh.h.................. 0 406 862 1156 +589 PF03453 MoeA_N MoeA N-terminal region (domain I and II) Bateman A anon Bateman A Family This family contains two structural domains. One of these contains the conserved DGXA motif. This region is found in proteins involved in biosynthesis of molybdopterin cofactor however the exact molecular function of this region is uncertain. 22.50 22.50 22.80 22.90 21.40 21.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.74 0.71 -4.89 52 5148 2009-09-11 13:37:57 2003-04-07 12:59:11 12 42 3304 40 1547 4152 1544 162.50 33 36.85 CHANGED hhsl-EAhchlhshhtth.......sEpVsltcuh.GRlLA-DlhushslPsFcpSuhDGYAVRupDshsust...................tLpVluclhAGpssp.hplssGpsl+IhTGA.lPpGADAVVhhEpspttss........plplh..ptspsGpNlphtGpDlppGssllppGpplsP...tclulLAuhG .................................................hslccAlp...h...l...h.s....t..h.p.shs..........sEplsL......t....c......u.....h...G.......RlLA..............c.Dl.hushs.lPsFcsSAMDGYAlR..s.s..D..ht.s.sp.......................................l..V.h.u..c..h..h.A..Gps.................p.....t.......pl...t........s........G...........p........sl.R.I.hTG..A..sl..P......p.........G........u.......D...u.V.Vh.Epspttss..........................tlp.lt.....ps..s...p....s.G.....ps..lR....h............tGEDlp.tGsll.lttGptlss..splulLAohG.................. 1 471 975 1318 +591 PF02493 MORN MORN repeat Bateman A anon Bateman A Repeat The MORN (Membrane Occupation and Recognition Nexus) repeat is found in multiple copies in several proteins including junctophilins (See Takeshima et al. Mol. Cell 2000;6:11-22).\ \ A MORN-repeat protein has been identified in the parasite Toxoplasma gondiis a dynamic component of cell division apparatus in Toxoplasma gondii [1]. It has been hypothesised to functions as a linker protein between certain membrane regions and the parasite's cytoskeleton [1]. 22.50 5.00 22.50 5.00 22.40 4.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.84 0.73 -6.99 0.73 -3.92 271 23538 2012-10-01 20:24:03 2003-04-07 12:59:11 15 292 1025 32 14186 22201 11255 21.80 32 29.18 CHANGED YpGpa.ppGhtcGpGhhpassGsp ..............YpGpa..tss..p...t..p...G.p..G.hhhassG............... 0 8471 10144 12192 +592 PF03476 MOSC_N MOSC N-terminal beta barrel domain Aravind L, Anantharaman V anon Aravind L, Anantharaman V Domain This domain is found to the N-terminus of Pfam:PF03473. The function of this domain is unknown, however it is predicted to adopt a beta barrel fold. 21.00 21.00 21.20 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.28 0.71 -10.57 0.71 -4.40 16 1954 2009-01-15 18:05:59 2003-04-07 12:59:11 11 18 1430 1 823 1627 289 117.60 26 29.64 CHANGED huplssLalaPIKSscuhulpcAp...lsstGhh....tDRtahlls.sc.G............phlTtRpcPpLshlpssh............psshLplsAP.....Ghssltl.lt.....s.ththtssplathshsuhcsG-..........tsuc.WhosaLups ......................plspLalaPlKShp.Glsls.....puh............h.s.s.p.GL............aD...Rt...aM....lsc....ss......G.............................ph...lT...t...R..ph...Pph...s...h..lpssh..................................................ps...sL..t.ls...us....................s.t.s...s.h...hl.t.hs............s.ttp.t..s...s.p..V..a.t.......s....p..h......p..u....h...sss.............................................th.sp.WhSthhu.............................................................................................................................................. 0 218 430 658 +593 PF04643 Motilin_assoc motilin_assoc; Motilin/ghrelin-associated peptide Mifsud W anon Pfam-B_5485 (release 7.5) Family This family represents a peptide sequence that lies C-terminal to motilin/ghrelin on the respective precursor peptide. Its function is unknown. 19.30 19.30 19.80 19.30 18.30 17.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.78 0.72 -4.48 16 149 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 81 3 37 152 0 54.60 38 51.89 CHANGED hlDPs...pspt-E-phcI+hsAPh-IGl+loucQhpcattsLE+hLp-lLhpps.pt..scc ........................psttt--p.pIphsAPF-lGl+loutQhppautsLt+hLQ-lLh-ps................... 2 2 4 8 +594 PF04644 Motilin_ghrelin motilin_ghrelin; Motilin/ghrelin Mifsud W anon Pfam-B_5485 (release 7.5) Family Motilin is a gastrointestinal regulatory polypeptide produced by motilin cells in the duodenal epithelium. It is released into the general circulation at about 100-min intervals during the inter-digestive state and is the most important factor in controlling the inter-digestive migrating contractions. Motilin also stimulates endogenous release of the endocrine pancreas [1]. This family also includes ghrelin, a growth hormone secretagogue synthesised by endocrine cells in the stomach. Ghrelin stimulates growth hormone secretagogue receptors in the pituitary. These receptors are distinct from the growth hormone-releasing hormone receptors, and thus provide a means of controlling pituitary growth hormone release by the gastrointestinal system [2]. 21.90 21.90 22.40 23.10 21.20 21.80 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.82 0.72 -7.00 0.72 -4.14 6 118 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 53 1 32 126 0 27.20 59 26.77 CHANGED VPIFTauElQRh.QEKEppKsp+KSLplQ .GSSFLSPEHQ+h.QRKEsKK.PsuK.LpPR.... 0 2 2 5 +595 PF01398 JAB Mov34; JAMM; JAB1/Mov34/MPN/PAD-1 ubiquitin protease Finn RD, Bateman A, Iyer LM, Burroughs AM, Aravind L anon Pfam-B_738 (release 3.0) Family Members of this family are found in proteasome regulatory subunits, eukaryotic initiation factor 3 (eIF3) subunits and regulators of transcription factors. This family is also known as the MPN domain [3] and PAD-1-like domain [4], JABP1 domain [5] or JAMM domain [7]. These are metalloenzymes that function as the ubiquitin isopeptidase/ deubiquitinase in the ubiquitin-based signaling and protein turnover pathways in eukaryotes [7]. Versions of the domain in prokaryotic cognates of the ubiquitin-modification pathway are predicted to have a similar role [8]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.47 0.71 -4.34 30 2882 2012-10-10 14:49:21 2003-04-07 12:59:11 16 61 451 19 1880 3177 141 110.70 23 23.48 CHANGED pssppVhlpslllhphlcHhp+sspt.........................clhGlLlGphps-.........................slcltssFulPhpsspcsspsht.shp...ph.chhcph...............................sc.EplVGWYHopPsh.s..ossDlpspp.apphtss .........................................................................pVhlpshslhphlc.....H..s.p.p....t..sth..............................cVhG.hL.lGph.s.sc....................................................slclpss..F....s.........h....P...p...........p.....ss..p...........s.s.....s.p.h...s.thp................phhchhcps..........................................sc...pllGW.Y....H....opP......sh...s....os........Dl.pspt.hpth...p.......................................... 0 655 1015 1500 +596 PF01853 MOZ_SAS MOZ/SAS family Bateman A anon Pfam-B_3994 (Release 4.3) Family This region of these proteins has been suggested to be homologous to acetyltransferases [1]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.31 0.71 -5.06 15 1290 2012-10-02 22:59:21 2003-04-07 12:59:11 13 26 337 14 784 1296 19 175.50 49 25.71 CHANGED h++PPGsEIYRcssISlFEVDG+cp.................plYCQNLCLLAKLFLDHKTLYYDV-PFLFYlLTEpDcpG.........sHlVGYFSKEKcSspsYNlACILTLPsYQR+GYG+hLI-FSYELS++EuplGoPEKPLSDLGhLSYRoYWspsllclLhchcs......plTIc-lSphTulsppDIlsTLppLs.hl+YhKspalIsls ..............................................................pHPPGs.EIY......R.......c.............s............s.......l..SlF...E.VDGc..cp.......................+..h..YCQNLCLLAKLFLDHKTLY..YD.......V.-P.....FLFYlhT..c...h...D..p...p...G.................................tH..lVGYFS.K..........E....Kp....S.........p.s.........a.NVuC...ILTLP...........Y.....Q.R+..G..........YG+h.LIcF..........S................Y..L....S...+....h...E.....s..............p..h..........G.oPE...........KPLSDLGhl..S.Y+uYWppsllch.Lh......p..h.p..s................................pl.o.....I.p....clSphTuhp.pDllpTLptLp.hlphhpsp.....h...................................................... 0 270 410 629 +597 PF04117 Mpv17_PMP22 Mpv17 / PMP22 family Wood V, Finn RD, anon Pfam-B_8493 (release 7.3); Family The 22-kDa peroxisomal membrane protein (PMP22) is a major component of peroxisomal membranes. PMP22 seems to be involved in pore forming activity and may contribute to the unspecific permeability of the organelle membrane. PMP22 is synthesised on free cytosolic ribosomes and then directed to the peroxisome membrane by specific targeting information [1]. Mpv17 is a closely related peroxisomal protein. In mouse, the Mpv17 protein is involved in the development of early-onset glomerulosclerosis [2]. More recently a homolog of Mpv17 in S. cerevisiae has been been found to be an integral membrane protein of the inner mitochondrial membrane where it has been proposed to have a role in ethanol metabolism and tolerance during heat-shock [3]. Defects in MPV17 is associated with mitochondrial DNA depletion syndrome (MDDS) and Navajo neurohepatopathy (NNH) [4][5]. MDDS is a clinically heterogeneous group of disorders characterised by a reduction in mitochondrial DNA (mtDNA) copy number. Primary mtDNA depletion is inherited as an autosomal recessive trait and may affect single organs, typically muscle or liver, or multiple tissues. Individuals with the hepatocerebral form of mitochondrial DNA depletion syndrome have early progressive liver failure and neurologic abnormalities, hypoglycemia, and increased lactate in body fluids. NNH is an autosomal recessive disease that is prevalent among Navajo children in the South Western states of America. The major clinical features are hepatopathy, peripheral neuropathy, corneal anesthesia and scarring, acral mutilation, cerebral leukoencephalopathy, failure to thrive, and recurrent metabolic acidosis with intercurrent infections. Infantile, childhood, and classic forms of NNH have been described. Mitochondrial DNA depletion was detected in the livers of patients, suggesting a primary defect in mtDNA maintenance [5]. 20.30 20.30 20.40 21.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.11 0.72 -4.34 93 1477 2009-01-15 18:05:59 2003-04-07 12:59:11 7 21 327 0 1058 1437 41 66.50 24 28.41 CHANGED hu.hl....-ups..hppshpclccpahssht.....ssahlWPhsQhlNFthl.PsphRllasshlulh.WssaLShhppp ................................s.hh....pups..h.pphhpcl+ppa.hsshh...............ssa...hl...W...PhsphlNFt.hl...P.h.p...h.Rllasshl....ulh.WssaLShhtt........... 1 361 595 889 +598 PF03587 EMG1 Mra1; Nep1; EMG1/NEP1 methyltransferase Finn RD, Mistry J, Wood V anon Pfam-B_3290 (release 7.0) Family Members of this family are essential for 40S ribosomal biogenesis. The structure of EMG1 has revealed that it is a novel member of the superfamily of alpha/beta knot fold methyltransferases [2]. 28.20 28.20 28.20 36.20 27.80 28.10 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.25 0.71 -5.03 60 459 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 385 15 316 463 33 199.60 41 81.56 CHANGED llLtcusLEhl.hp..................csphtLLssscHtshh..c+h...tc...........................s.ccuRPDIlHpsLLtlhDSPLN+.tGhLp.lYIH..TtcshlIpVsPpsRlPRsapRFtGLMtpLLc....+hplhss..utptLlclhc.sslsc.hlss...ssphlh..lSpcGphs...............................................phpchsp...........................................................................................................................pshs.hhlGuhs+Gc.thp.thsp....cp......hSIu.shsLoAtsssu+lssuhE ...................................................lVLppAsLEsh+h.....................t.p.pa.pLLNsDcHtshh...+Kh...s+............................-.ucsRPDIsHQsLLsLLDSPLN+..AG.hLQ.VYI+..Tt+sVLIEVsPpsRIPRTFcRFsGLM.............VQLL+....+hs..I+us.s.u.p.c+LL+V.IK.NPlo.-.aLPs....ss+Kls..lShs.u..thl...............................cs.p.-.hlp.........................................................................................................................tpcsls.lhlGAhA+Gc..hs.scas-.....cp........luIS.sYsLSAuhsCu+hspuhE................................................................................. 0 115 179 261 +599 PF03022 MRJP Major royal jelly protein Griffiths-Jones SR anon Pfam-B_1099 (release 6.4) Family Royal jelly is the food of queen bee larvae, and is responsible for the high reproductive ability of the queen. Major royal jelly proteins make up around 90% of larval jelly proteins. This family also the sequence-related yellow protein of drosophila which controls pigmentation of the adult cuticle and larval mouth parts. 19.80 19.80 19.80 20.10 19.60 19.70 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.62 0.70 -5.20 20 978 2012-10-05 17:30:42 2003-04-07 12:59:11 11 6 294 8 455 1055 14 247.90 22 65.31 CHANGED sSsa+lslDcCsRLWVLDoGllshsps......hCsPpllsaDLpTcc..ll+phclPpslsp....ssotlsslsVDhhcs....sCttsasYluDttuc...uLIVYshsscpuWRlpsp..thpsDPthsphslsGpsFphpDG......laGhuLushp.s....LYFpPLuSpp.asVsschLpspshtpss....phpphpslGs+u..oQusucuhsp.sGlLFauhlspsuluCWspppshsppNhshls..psscsLpassslKIpc.............pphlalLS....s+hpphhtsclshschNFRIhtsslsclhcsohC ...................................................................................................................ss.phhhD.t.ps+LWllD.sG..h...h.......sh....t..............hs..sspllsh.DLp.o.s.p...........ll+ph.hstshhh.....soh.hssl...s.lDht.s........s.........tsasYlsDs...s...s...............ullVashtssp..u..aR..l...pp.......hhss..s....h......s..s.....h.s.....l.t.........s..........h....p...h....sG..........................h.u.hshs..s...................t...........s........phLYapshuopp.atls.TphLp........stsh..sp...pt..........tthp...l.G.p.+s....sp.ss..u.hhhDt...sGslahs.hppsulhphs..s...p.t........tp.h.th.lh...pssp....h.assshthst..........................pt.lahhs........sph................t...................................................................................................................................................................................... 0 144 221 388 +600 PF04707 PRELI MSF1; PRELI-like family Mifsud W anon Pfam-B_5792 (release 7.5) Family This family includes a conserved region found in the PRELI protein and yeast YLR168C gene MSF1 product. The function of this protein is unknown, though it is thought to be involved in intra-mitochondrial protein sorting. This region is also found in a number of other eukaryotic proteins. 20.40 20.40 21.70 20.40 20.20 19.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.85 0.71 -4.66 16 922 2009-01-15 18:05:59 2003-04-07 12:59:11 9 17 286 0 571 815 0 145.80 29 50.50 CHANGED -pVssAahp+YPN.PhsspllusDVlcRc.lss.pGp.LhT.cRllphph.....shPcalc+..llGs..sp..sahhEpSslD.pc+ohslcopNloasshlpVcEpspYpsHs-Ns.shThhcQcApls..lhs.hhuhsstlEchuhppaspN.ssKG+cuhEallpp.lptp ..............-.VhtAhhp+.Y....P....N....Ph..s........spVlusDl..lpRp.lss...sGt.....Lto.cRllp.pp............thP....phlcp................l.huh.............sp..s.alhEcShl....Dsp.......p+....shp.hpo......pN..l.oa.ss.....h...l......p......VpEpssYp...s....ps.....c....s......s...s.....hTh.....hp.Q...pA.ls.......hh.u...h...hs.h.p.ptlEphhhppappN...spKGppshEhhlpp.....p................................................. 1 155 247 414 +601 PF00635 Motile_Sperm MSP_domain; MSP (Major sperm protein) domain Bateman A, Griffiths-Jones SR anon Prosite Domain Major sperm proteins are involved in sperm motility. These proteins oligomerise to form filaments. This family contains many other proteins. 22.80 22.80 22.80 22.80 22.60 22.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.16 0.72 -4.41 63 1788 2009-01-15 18:05:59 2003-04-07 12:59:11 21 69 335 35 1207 1567 9 102.30 22 37.32 CHANGED hlplpP.sptlhhss.h.........ssspl..plpNsosp.pluFKlKoTssppYpVcPshGllpPspshplpl..hhps.p...ss..p.......pDKhhlphshs.tstssstpshpphhppsps ..................................h.l.lpP.t.t.lh.Ftt.h.........tp...hpptl..pl..p.....N.......s....o..sp...t.lsF..K........l........K..TT...sP..c....c..Y..p.V....c....Ps.s.G.l.l...........cP.tps.s.p.l.s...........l.........hh.p.s.t.....st.p................pD+Fhlpss.s..tsts.t......ht........s.............................................................. 0 442 671 1055 +602 PF03820 Mtc Tricarboxylate carrier TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 19.10 19.10 19.50 19.40 18.30 17.70 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -11.91 0.70 -5.77 9 716 2009-09-11 12:47:20 2003-04-07 12:59:11 12 12 283 0 439 638 7 268.20 38 91.85 CHANGED aD.STahGRs+HFhphTsPhsLlsopppLpcu+pIVpcY+pGphsssh.Ts--lW+AKplhDSsaHPDTGEphhhhGRMSuQVPhNhllTGGML..shYpsssuVVFWQWhNQSFNAlVNYoNRSuss.hospQLhsuYsuATouAhssALGLNphl...KphsP....LluRLVPFAAVusANslNIPhMRppElp...pGIsVhD-sGpplGpSptAAhhuIupVslSRlhMAhPsMslsPllhppLpKpsahpppPthts....PlQslLsGhsLhhusPluCAlFPQ+ssIpls...pLEPclppplc+..pssPscpVYaNKGL ........................................................aD.sTahGRhpHahplsDP............psl...hho...p...ppLcpA+pl.l..ppa.....+.......t........G........h......s......h......pppp...LWpA+hlh-SshH.P....DTGEhhhh..hRMSs.........lPhs.h.h.ls....u.s...ML..t.....h......................ps....ss.usl.FW.QWhNQSaNAhVNYsN.....Rsu...s...s....P.....h....o..........spp.......l.htu...Y..hsAss.....u.u...susA.....l.G...L....s.ths..........+phsP..........l..lsRhV...P..F.sAV...AuAsslNlshMRtp..Elp................pGIs.V.................h...................D.........c......s...G...........p..............p..............l....G...p.SppA.A.thulspsslSR.lhhshPsM..hl.sPllhph.L...p.+.....t.t.....h...h....p..p...h...s...h.h.t.h...................P.l..phh...ls.....hhs..hh.....hshPhuhu..l..F..PQ..p.......splpss.............pL...E...s..-hptth.t.t.............t.hlhaN+Gl.................................... 1 137 197 330 +603 PF00249 Myb_DNA-binding myb_DNA-binding; Myb-like DNA-binding domain Bateman A, Finn RD anon Prosite Domain This family contains the DNA binding domains from Myb proteins, as well as the SANT domain family [1]. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.23 0.72 -3.90 194 12500 2012-10-04 14:01:11 2003-04-07 12:59:11 26 179 730 34 6476 17506 196 47.00 26 14.31 CHANGED +ssWTt-E-phlhphlpphGsp....................WptIupphs.........sRospps+p+Wpshh ............................................+ttWTt-.Ecphll.p..h..l.....p..p...hGpp...........................................................Wp.pI.u...p..p.hs...................sRT.s..pps+p+.apph................................ 0 1753 4075 5391 +604 PF02736 Myosin_N Myosin N-terminal SH3-like domain Bateman A anon Pfam-B_110 (Release 5.5) Domain This domain has an SH3-like fold. It is found at the N-terminus of many but not all myosins. The function of this domain is unknown. 20.70 17.00 20.70 17.30 20.60 16.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.82 0.72 -4.28 41 1520 2009-09-14 13:17:30 2003-04-07 12:59:11 14 67 230 150 754 1364 7 42.00 35 2.67 CHANGED KptsWV.sDscE..ualpucIp..sppG-pVoVpsts...Gc..pholKcDc ..........KptVWV.sDs+c..ualcu.plp...pc..cGsc.Vs.Vcsts...G+............plsVccDp.................. 0 141 253 484 +605 PF00784 MyTH4 MyTH4 domain SMART anon Alignment kindly provided by SMART Family Domain in myosin and kinesin tails, present twice in myosin-VIIa, and also present in 3 other myosins. 19.20 19.20 20.20 19.20 18.90 18.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.18 0.71 -4.15 11 1375 2009-09-11 15:02:25 2003-04-07 12:59:11 12 160 222 5 806 1210 7 107.20 31 9.90 CHANGED ppsh-llppIlppulpp.sLRDElYCQlhKQhscNPs..ppStt+GWcLlhhssusFsPScplhtaLp.pFlpsp.t.......htshAhhs.cpL++ohpsG..sR+hsPuhhElpAhp .................................p...thhp.ll..phsh.pp....t.L...R..DElYsQlhKQho........p.N.ss.................ppS..t..tR..GWpLLsl.ssu.hFs.P.S.p.p..hht..........aLp.pFlppp..h...t......................................................................t....t....h........Ahh.C..c.pLp...+.o.hp..pG........sR..p.......sPohhElpAh.................................................... 1 262 339 583 +606 PF03485 Arg_tRNA_synt_N N-Arg; Arginyl tRNA synthetase N terminal domain Bateman A anon Bateman A Domain This domain is found at the amino terminus of Arginyl tRNA synthetase, also called additional domain 1 (Add-1). It is about 140 residues long and it has been suggested that this domain will be involved in tRNA recognition [1]. 21.30 21.30 21.30 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.78 0.72 -3.44 174 5094 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 4796 10 1267 3829 1849 86.10 28 15.38 CHANGED pplpptlppulpph................th...hlc.h..s.....pp.paGDausNhAhtLAKth+p.......sPpplApplsp...plpts....th......lp..cl-luGP....GFINhhL .....................................hpphltpslpph................hs.tts.ph..hlc..p....s.......cpspaG.D.....ausNhA..hsLAKhh+p.............sP+plApplsp......pLsts.................sh.......hp.....c.....lEl.AGP...G.FINhhL.................. 0 428 821 1065 +607 PF01699 Na_Ca_ex Na_Ca_Ex; Sodium/calcium exchanger protein Bashton M, Bateman A anon Pfam-B_1680 (release 4.1) Family This is a family of sodium/calcium exchanger integral membrane proteins. This family covers the integral membrane regions of the proteins. Sodium/calcium exchangers regulate intracellular Ca2+ concentrations in many cells; cardiac myocytes, epithelial cells, neurons retinal rod photoreceptors and smooth muscle cells [2]. Ca2+ is moved into or out of the cytosol depending on Na+ concentration [2]. In humans and rats there are 3 isoforms; NCX1 NCX2 and NCX3 [1] see Swiss:Q01728, Swiss:P48768 and Swiss:P70549 respectively. 24.90 24.90 25.00 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.57 0.71 -4.58 111 11392 2009-01-15 18:05:59 2003-04-07 12:59:11 19 59 3066 4 4750 9872 2859 136.70 20 50.60 CHANGED lhlhhhuchlssshs.slu.p..................hhs....lsstllGlsllAhusuhPEhhsulhushp...............s.......psslulusllGSslhslhlslGlsslhssh...................................htstshthsh.shhhhshhhlhhhhh......................ttplshhpullhlhhYhhalhhh .................................................h.hlhhhuph.hstshp.tlu.p...................................hhs..............l.sphlh.....Glsll..Ah..G.oS...hP........E.l.h....s.......u.lh..u.s.hp..................................................s..............psclu...lus.......ll....GS....s.l.h......N...l...h.l....l...l.u..ls..s.llsshh....................................................hps.t.s..h..p.h..s...h......sh..h.h..h.s..h.l.hh..h.lhhh.............................................tst.p...l...s.h....h....p....G..h.l...h.l.h..hahhalhh.h......................................................................... 0 1381 2625 3903 +608 PF00999 Na_H_Exchanger Sodium/hydrogen exchanger family Finn RD, Bateman A anon Pfam-B_312 (release 3.0) Family Na/H antiporters are key transporters in maintaining the pH of actively metabolising cells. The molecular mechanisms of antiport are unclear. These antiporters contain 10-12 transmembrane regions (M) at the amino-terminus and a large cytoplasmic region at the carboxyl terminus.\ The transmembrane regions M3-M12 share identity with other members of the family. The M6 and M7 regions are highly conserved. Thus, this is thought to be the region that is involved in the transport of sodium and hydrogen ions. The cytoplasmic region has little similarity throughout the family. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.36 0.70 -5.93 80 17250 2012-10-02 17:06:44 2003-04-07 12:59:11 16 72 4700 5 5319 13718 2497 371.20 18 66.67 CHANGED hllllh.uhlssh..h.hp+lp...lPs........lluhllsGlllGshshs...........h...............................hhphluplGlshllFhsGlchshpp.lppshts..............slhhulluhlhshhh............Ghhht.h...........h...hshhtulhhGshl.osoussllhtll......-pttlspphuplllutullsDhssllllslhhshh.........shhhshh...............hhhhhhhhhhhshlhhthlhshlhphhpch.......tstplhhhhslhhshhs..shhuchl.GlssllGuahsGlslu...........ptthppp..lppp....lpsht....shhhslFFlhlGlpls.................h...........hthhhhllhhllhlhl.s+hlsshhhtthh...............t.hshppsh...hlu.hshh.+Gthslslsthuhpt......tlhsp..............ph.hshhlhssllosl.....lssh.......lht ........................................................................................................................h..hhhhh.s.h.lhsh.........l..hp+lt........lP...........h..ls...hl......l..s..G...l....l.......l......G....s...h..s..hs................h..ht.s...........................................................................hp.h.h..s...p...lu.l..s.hl...l...F...h............s.G................h.c.h.....s...h......p..p....l..h..p..h..ht...........................................hh..hhu...h.h..t.lhl.sshh.....................................huhhhhh.........................................h..hs.h.s.h.h.t..u..l.h...l.G...s.....hl...u.s......o.....s.s.......ss.l.ht....ll.......................cpt....t....h....s.p.p...h...s...p....h...lh...........utul.....h.s.........Dhsu.ll.h.....h..sh.hhshh.....................tstth.s.h.h..............................................................t.h.h.h..t.h....h.....h..h....h.h.h.....h....l....l....h..t..h...l....h...s.....h....l.....h.t..h.......hhph...............................tsp...ph.h...h...h.h..s..l.h....h..shhs........hh...............h..s........p......h.......l........s............h............S...........s.......h..l..u..s...........h...hu.G.lh..lu.....................ptth.pp..p.......hppp..........................................hp..shh..............hlh.slh.F...l.hl..Gh.tls.......................thl.....................hth.h..h.h....l.l.h....h....h.h........h.l...h..l....s+h...l...ss..h...h..h.h.hhh..................................................................t..hs.h.p..p.t.h.........hhu...hs...hs....pG....t...hu.h..s......l....s....h....h..s...tt.......................th..h.s..t.....................................ph.....h...s...h....h....h....h....s...l....l..l.ohh.h....s..h..h...hh............................................................................... 0 1593 3099 4353 +609 PF02690 Na_Pi_cotrans Na+/Pi-cotransporter Bashton M, Bateman A, Haft D anon Pfam-B_509 (release 5.5) Family This is a family of mainly mammalian type II renal Na+/Pi-cotransporters with other related sequences from lower eukaryotes and bacteria some of which are also Na+/Pi-cotransporters. In the kidney the type II renal Na+/Pi-cotransporters protein allows re-absorption of filtered Pi in the proximal tubule [1]. 28.60 28.60 29.20 28.70 28.50 28.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.63 0.71 -4.47 184 5785 2009-09-10 16:47:48 2003-04-07 12:59:11 10 11 2359 0 1241 3956 693 134.10 26 49.88 CHANGED LhLhG.hphhssGlppssG.sp..l+plLt..phTs...s........htull....sGhhsTullQSSSssol...lsluFVuuGllsltpulullhGANlGTTlTuhllu.............hcl.uthu.hhlhlGsllhhh..pppphpthGthlhGlullFhulphhppu ..............................hhhhu.hphhspuh.p...hts..sp...h.c.p.l.ht....phss.....s.............hhull.......sGhhlTsl.........lQSSoAsss..............lssuhs..u.......s.......G...............l.l.......s.......lptAlsllhGANlGTslTAhlhu..................................................hcl...u.hh...ph.h.h..hl..Gsl.l.hhh....................................h.......................................................................................... 0 429 745 997 +610 PF02445 NadA Quinolinate synthetase A protein Bateman A anon Pfam-B_1915 (release 5.4) Family Quinolinate synthetase catalyses the second step of the de novo biosynthetic pathway of pyridine nucleotide formation. In particular, quinolinate synthetase is involved in the condensation of dihydroxyacetone phosphate and iminoaspartate to form quinolinic acid [2]. This synthesis requires two enzymes, a FAD-containing "B protein" and an "A protein". 19.00 19.00 20.70 20.60 18.60 17.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.89 0.70 -5.61 144 2787 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 2689 2 795 2280 2221 307.90 41 89.62 CHANGED hpcIpcLKc-..+sAlILAH.YQpsEl.Q-lADhsG....DSLpLuctAsp.scA-sIVFCGV+FMAEoAKIL.s.P-Kp.........VLlP-h.pAGCshA-hhss-pl....pph+cpa...Ps......tsVVsYlNooAclKA....tuDlsCTSSNAlclVpp.l....sp..cc.IlFhPDcpLGpal..............................tcps.s+cl................................llWs...GhC.VHppassc.clpph+ppa.PsApllsHPEC...s.cVlchAD........hlGSTutllcasppsssp.c.alluTEhGllpcLpc..cs....Ps.Kpahshs.........s.pshCspM+hhTLcclhpsLc......shp.................sc.lpls..c-ltpcAppul-RMLcl ............t.pcIccLh+c..+sAVllAHa.YpcsEI.QplA-hsG..................DSLphA+h.u....sc....psA...ssllhsGV+FMu.ET.A.K.I.L.....o....P-...Kp......................VL...hP.....s.l.p.A.....sCSLs.usss.-ph........................ptap-ta.............P-...............tsVVsYsNTSAAVKA.....cuD.....hssTSSsAl.c.llcp....l.........sp......s...ccIlasPDcaLGpal...................................................................................pcpT..stch...................................lhWp...GtChVH-c.F.psp.plpch+.pp.a....P-.At.llVHPEs.............stsVl.phAD...........hlGSTotlIcts............p....sh...........ss...........p....c.......hIVuT-tGlha+hpp....ts....P.-...Kp..h..lt.ss.......................................................s..sshCPaMthssLpplhps...Lc.....p..tt.......................pElp.VD..ppltppAhhsLpRML-.h................................................................................................ 0 274 536 683 +611 PF03822 NAF NAF domain Griffiths-Jones SR anon PROSITE Domain \N 25.00 25.00 26.00 26.00 21.80 24.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.19 0.72 -4.26 71 764 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 57 2 323 696 0 60.30 43 14.83 CHANGED pPss..lNAF-lI.ShSpGhDLSuLFtccpct....c..sRFsSppsAsslluKlEplAc..shs.apV..+K ..................PtshNAF-lI.S.hSpGhsLSuLF-ccp.p...................pcc......sRFsS.pp...PAspIluKlE-sAc...shu.apVpK............. 0 37 177 261 +612 PF05089 NAGLU Alpha-N-acetylglucosaminidase (NAGLU) tim-barrel domain Moxon SJ, Bateman A anon Pfam-B_6295 (release 7.7) Domain Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate [1]. Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations [2]. The structure shows that the enzyme is composed of three domains. This central domain has a tim barrel fold [3]. 23.40 23.40 23.70 23.80 20.50 22.50 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.06 0.70 -5.69 30 411 2012-10-03 05:44:19 2003-04-07 12:59:11 7 31 267 5 179 405 16 304.40 38 38.80 CHANGED RYahNsCTaSYohsaWsWpcWE+cIDWMALpGINhsLAhsGQEulWpcVapchGlocpElcsahoGPAahAWpRMuNlcG.WGGPLspsWhcppttLQ++IlsRMRpLGMpPVLPuFuGaVPpshpchh.PpAplschusWsu......assshaL-P.....pDPlFscluptFlcc.pchY.G.sschYssDsFNEhsPsss...ss............................phLussupulacuhppsDPcAVWl.hQGWhF..p..hWpssthcAhLsuVP.....ps+.hlVLDLauEphP.hWpp..........ocuahGpPaIWChLpNFGGshslaGslptlsssh.pAttps..ssslhGsGhoPEGlcpNslhYELhhEhuWpps. .......................RYahNhCThuYohsaWsWpcWE+cIDWMALpGlNhsLAhsGpEslWpclhp..c.hGh.opp-l.ppahsGPAahsWtpMuNlpu..W......u..G.....P.Lsp.....sWhppphtL.Q++IlpRM+phGMpPVLPuFuGhVPpsh...............p......c...hh..P..p......splhp..s..pWss........as...ps...h....hLpP..........pD.s...hFtplu.phFhcc.pcha.G....ssphYshDsFpEhtssss......ts.................................lsphupslhpuh.pts.cscAlWl.hQuW.a........W.ps.s.hpuhLpu.V.s.........p.sc.hllLDLa..uEttP.h......app...........ppsahGpPaIaChLpNFGGshshhGphptlspt.......hpAh......tps....sssh........hGhGhs.EGlppN.lhY-Lhh-huWppt.s................................................... 0 64 108 149 +613 PF02365 NAM No apical meristem (NAM) protein Bashton M, Bateman A anon Pfam-B_530 (release 5.2) Family This is a family of no apical meristem (NAM) proteins these are plant development proteins. Mutations in NAM result in the failure to develop a shoot apical meristem in petunia embryos [2]. NAM is indicated as having a role in determining positions of meristems and primordial [2]. One member of this family NAP (NAC-like, activated by AP3/PI) is encoded by the target genes of the AP3/PI transcriptional activators and functions in the transition between growth by cell division and cell expansion in stamens and petals [1]. 21.20 21.20 21.20 21.50 20.80 21.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.25 0.71 -3.95 133 2410 2009-01-15 18:05:59 2003-04-07 12:59:11 10 26 114 15 1319 2332 1 125.10 42 37.77 CHANGED LPPGFRF+PTDEELlsaYLcpK.ltup............hsh..p................s.Is-l.D....lhch.-P.W-L..P...h.........................hh.s.......tpp..EW..Y....FFs.+-+KYs....s.Gt.........RsNR.u......................Tt.....uGY..WK.ATGp..D+tlhs.............ssth............lGhKKTLVFY....p.......G.+uPcGp....+TsWlMHE.YRL.s ..................................lPPGFR..F.+PT.DEELlhaYLpcK..ltspt..........hsh...p.................h..I.s....-l..D.....lh...+.h...-....P.W-L..P..t...................................thth....scp....EW......YF.Fs..+..-..+...K..Ys.........s...Gs........R.s..NR...u.........................................ss........uGa...W.K...A..T.Gp....D+s..lhs................ssph...................lGhK.K.sLVFY..............p...G..+....u..P...+......G..p........KTsWlMHEYRL.s.................................................................... 0 160 722 1039 +614 PF04095 NAPRTase Nicotinate phosphoribosyltransferase (NAPRTase) family Wood V, Finn RD, Bateman A anon Pfam-B_5038 (release 7.3) & Pfam-B_5422 (Release 7.5) Family Nicotinate phosphoribosyltransferase (EC:2.4.2.11) is the rate limiting enzyme that catalyses the first reaction in the NAD salvage synthesis. This family also includes Pre-B cell enhancing factor that is a cytokine Swiss:P43490. This family is related to Quinolinate phosphoribosyltransferase Pfam:PF01729. 19.90 19.90 20.10 20.00 19.80 19.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.66 0.70 -5.09 19 4673 2012-10-03 05:58:16 2003-04-07 12:59:11 11 6 3929 55 1191 3401 285 219.70 27 50.59 CHANGED aplpDFGsRtt.Shcstthssp...........sthstFhGTuslhh.............stthul.shtppshphhtscpplss...hlpphpp.........suh-hWhttht...hG.sLpDhlshcuhhtths...........hslR.DSGDPhp.................hh-clht+atsh.shcsh.....phtllauDGlshcphhplhcthcs.th...sluFGlGosLhpclsp.h.t....ssslsIshKhhpsptpPlsclS.csstKuh.................hts.....hthl+psFpsst .........................................................................................lh-FGoRRt.phcstthssp...t..........................uh.h..u.u.hsuTSNlhs......................ucta...u..lsshGTtAHsa........h.......t.....s.....a.....t.....p.h..s.....s....................h.p.s.htp.......................................tsh..sh..hhs...pa.......slt....ulssslps...st.h.t.ph...............................................hhGlRhDS.....GD.sh........................................................................hucc....stphh...-c...h....t.......sh...............................ptpllhSssL.D...pp.l..h..pLhtphsp...............................ssaGlGT.....pLhs.....s.........h.......s..........p..............................h.ss..l.................p..h..s..h......p.h....h.................................s......h.hch.o..pp.tp........................................................................p...................................................................................... 0 381 694 967 +615 PF04970 LRAT NC; Lecithin retinol acyltransferase Finn RD anon Pfam-B_3758 (release 7.0) Domain The full-length members of this family, eg Swiss:P53816, are representatives of a novel class II tumour-suppressor family, designated as H-REV107-like. This domain is the catalytic N-terminal proline-rich region of the protein. The downstream region is a putative C-terminal transmembrane domain which is found to be crucial for cellular localisation, but not necessary for the enzyme activity [1]. H-REV107-like proteins are homologous to lecithin retinol acyltransferase (LRAT), an enzyme that catalyses the transfer of the sn-1 acyl group of phosphatidylcholine to all-trans-retinol and forming a retinyl ester [2]. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.81 0.71 -4.17 32 808 2012-10-10 12:56:15 2003-04-07 12:59:11 8 10 297 3 418 739 44 120.20 26 54.98 CHANGED h.......p.GDllpl.R.......hYpHaGIYlGDspVlHhs.......Ps..............t..ssstt...........hhushtptuhVchs.sL-shstGsshhlssh........t.......tptpshss-cllpRAct.LlG....ph.sYsLlhsNCEHFVsaC+aGhsh ......................................................hh.....t.p.GDhl....h...R...........hY.pHaulY..l......G.....c....s.....h......Vl..Hhs...................................................................................hhu..hh..t...p.p.u...hVcts..pLpsh....s..t.u...s.p...h...plssh................................t.................phps...hsscpllp..R.App..hlG..........ph...p.....YsLhts....NCEHFsshs+hGh..t........................................ 0 78 152 260 +616 PF04904 NCD1 NAB conserved region 1 (NCD1) Kerrison ND anon Pfam-B_6188 (release 7.6) Family Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors [1]. This region consists of the N-terminal NAB conserved region 1, which interacts with the EGR1 inhibitory domain (R1) [1]. It may also mediate multimerisation. 25.00 25.00 40.20 39.70 21.60 20.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.57 0.72 -4.24 4 175 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 92 0 89 144 0 79.10 79 17.49 CHANGED hspPoTLuELQLY+VLQRANLLsYYDsFIppGGDDlQQLCEAsEEEFLEIMALVGMAoKPLHVRRhQKALpEWsTsPshFpp ........uLPRTLGELQLYRlLQRANLLSYY-sFIQQ...GGDDVQQLCEAGE..EEFLEIMALVGMAoKPLHVRRLQKALR-WsTNPGLFsQ..... 1 21 29 56 +617 PF04905 NCD2 NAB conserved region 2 (NCD2) Kerrison ND anon Pfam-B_6188 (release 7.6) Family Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors [1]. This family consists of NAB conserved region 2, near the C-terminus of the protein. It is necessary for transcriptional repression by the Nab proteins [1]. It is also required for transcription activation by Nab proteins at Nab-activated promoters [2]. 25.00 25.00 29.80 25.60 21.90 20.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.02 0.71 -4.45 5 176 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 91 1 93 153 0 152.80 49 30.99 CHANGED scppsSppshS.PusLGSPtSs...tlusp-ouuss....ssLssstltplsEsuERhu+oLP+uDhuEVscLLKsNKKLAKplGHIlEMS-sDP++EEEIRKYSAIYGRFDSKRR-GKpLTLHE........................LTVNEAAAQLCh+DsALLTRRDELFuLARQVuREsTYpYoh+oSRL+ ..........................................................s..tptup.shS..Pus.hGsPtus...............ttsu.........-tL-sthst.VsE.sVERhhtohP+.uDhsElppLLKhNKKLA+..plGHIFEMsDsDspKEEEIRKYSuIYGRFDSKR+-GKpLTLHE........................LTlNEAAAQlCh+Dss..LLTR.RcELFuLARQ.......luREsoYhhoh+so+............... 0 26 34 60 +618 PF03096 Ndr Ndr family Mifsud W anon Pfam-B_2481 (release 6.4) Family This family consists of proteins from different gene families: Ndr1/RTP/Drg1, Ndr2, and Ndr3. Their similarity was previously noted [1]. The precise molecular and cellular function of members of this family is still unknown. Yet, they are known to be involved in cellular differentiation events. The Ndr1 group was the first to be discovered. Their expression is repressed by the proto-oncogenes N-myc and c-myc, and in line with this observation, Ndr1 protein expression is down-regulated in neoplastic cells, and is reactivated when differentiation is induced by chemicals such as retinoic acid. Ndr2 and Ndr3 expression is not under the control of N-myc or c-myc. Ndr1 expression is also activated by several chemicals: tunicamycin and homocysteine induce Ndr1 in human umbilical endothelial cells; nickel induces Ndr1 in several cell types. Members of this family are found in wide variety of multicellular eukaryotes, including an Ndr1 type protein in Helianthus annuus (sunflower), known as Sf21 Swiss:O23969. Interestingly, the highest scoring matches in the noise are all alpha/beta hydrolases Pfam:PF00561, suggesting that this family may have an enzymatic function (Bateman A pers. obs.). 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.65 0.70 -5.93 10 725 2012-10-03 11:45:05 2003-04-07 12:59:11 9 5 129 8 272 2897 1291 225.90 42 76.06 CHANGED EHplcTsaGulpVoVpGs.ct..p+PsllTYHDlGLNHcSCFpsLFsp.EsMsEltc+FslhHVssPGpEpGAsshspsa.aPSl-sLA-plssVLsaFplcoVIGhGsGAGAYILsRFAlpaP-RVpGLVLlNssssAsGWh-WstsKlss..h...GhosslhDhllsHhFu+Etpps....ss-...lVppYRphlscshN.sNLphalpAassR+DLshctssht..........CsslLlVGDpSPah-sVlcsso+LDsppoolLKlscsGGhVpt-QPsKlscuhchFLpGhGahs ...............................................................................p.lcTsaG...lp.V.slh..Gs.c............+PsllTYH.DlG.LN.apoCFp.shF....p.........h..-sM.p.-....lh.pp...F...slhHV....-...A...P..G..p.....p.....G....A.s..s....h..P.....s......a........a...P...o.h..-p..L..A-..l.ssV.Lpa.h...s...h...p...s..l...I.G.h.Gl.GA.G..A.Yl.....L.s.+.F..A...........l..t.a....P.c....h....V.GL..lL.......l.......N............h.....s.......s.........s.....u........t........u......W........h...-......W.........h.............h.....p.........K....l.............................................................G......h...s.....p..s....l........-...h..l........h....t....H....h......F.u...p.p...htt..............p.-.......ll..p..t...a...R...t.....l...t..p....t.......s................N..l....t..h..a....h...p.....u...a..........p........p.....R........p....D....L..p.....p..h...h.h......................s..shll..s...G..s...p....u...P...h.......p..s..s...............l.....c....h....s..s..+...L......s...........p...p.........o......s.h.....l.c..h.t.c.....s.su.....s...pQPtp...lspuhchFlp.GhGhh...................................................................................................................................................................... 0 74 119 189 +619 PF03102 NeuB NeuB family Mifsud W anon Pfam-B_2572 (release 6.4) Family NeuB is the prokaryotic N-acetylneuraminic acid (Neu5Ac) synthase. It catalyses the direct formation of Neu5Ac (the most common sialic acid) by condensation of phosphoenolpyruvate (PEP) and N-acetylmannosamine (ManNAc). This reaction has only been observed in prokaryotes; eukaryotes synthesise the 9-phosphate form, Neu5Ac-9-P, and utilise ManNAc-6-P instead of ManNAc. Such eukaryotic enzymes are not present in this family [1]. This family also contains SpsE spore coat polysaccharide biosynthesis proteins. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -5.23 158 1576 2012-10-03 05:58:16 2003-04-07 12:59:11 9 19 1121 8 447 2406 3908 236.80 36 67.28 CHANGED lI-sAtc.AGADAlKFQTaps-slsspt..sppsp.....athpp....hssp.o.h-hhcchc.hsh-.appLhcascptGItahSTPFDhpul-hL.pplssstaKIuSuEloshPLlctlAp...psKP.....lIlSTGMuolsEIcpAlssl...........................tpssspplsLLHCsosYPsPh--...lNLpslpsLpptF...s..l.l.GhSDHTh...GhtsslAAlALGAsl..........IEKHFTLD+sh....tGPDHthS..L-PpEhcphVpslRplcpAL.Gsshc ................................................................hlcsAtc.uGADulKaQTaps-shhspt.......s.ps...........a.hpss.......hssp..ohh-h.hcch..c.hsh-.atpL..hcas.c.chGlhhhSoPF.Dhpul-hL..pph......s..sshaKIuSsE...ls.....shP.hlctlAp.....psKP..........lIlSTG.M.u.o.h.p.EI.cpAlphh.....................................pp.t.t..s.psls.l....L....H.......C..s....osY.P.s.s...h..-.-........s.N......LpsltsLpc....tF...s........l.........l........Gh..S....D..Hoh.....G.....h.......t.....s.....sl.....u.....A.....V.A.LG...Ap..l...........I..EKHFTLD+sh....tG.sDp.thS.l-PpEhcphlp.slRpsppAL.Gss........................................................................................................ 0 154 297 373 +620 PF02931 Neur_chan_LBD Neurotransmitter-gated ion-channel ligand binding domain Bateman A, Sonnhammer ELL anon Prosite Family This family is the extracellular ligand binding domain of these ion channels [1]. This domain forms a pentameric arrangement in the known structure. 25.80 25.80 25.90 25.80 25.60 25.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.64 0.70 -5.19 92 6005 2009-12-16 13:50:00 2003-04-07 12:59:11 18 90 267 651 3245 5129 71 186.70 25 43.27 CHANGED pp+LlccLhps....YsptlRPs.....pssp.....s.lsVplsltlpplhslsEhsp.hosslalpppWpD.+LpWsstp....asslpplplssc......plWhPDlhLhN.....sssspapso.hpsplhlp.........sGpVhahsss.hhcusC.lclphFPFDpQsCslpFuS............asYss.......pclslphtp............................pphclssFh.supWslhph...shpttp.hs.hsp.........l.aplhl+R+s ......................................................................hhppLhtt......Y.s...t...h...hRPs......................st.........................sh...V....phsl.........lt..p..lhs..l................s.-hs........................hsh..............s......l.ahp...............p....p..............W.......p..........D.......+...Lp..a.s............p................s.s.h...p.t..l...p...l..ssp......................plWhP..Dhhhh..N............p..........t...........s.........t....a.....p....h.....s.............p...p.............h....lhl..h............................sG..p....l.....h......a.....s..............................h.....hh..p...............u.sC....h.c.........l.ph..............FPh.D..t...Q..s......CslphtS........................................a.s..Y.st............pc...lth.h.htp.....................................................pt.h..pl.s..p..a.h......p.......sp.hth....ht....................h...........hs..thst...............h..thhlcRp.............................................................................................. 0 1010 1272 2385 +621 PF01436 NHL NHL repeat Bateman A anon [1] Repeat The NHL (NCL-1, HT2A and LIN-41) repeat is found in multiple tandem copies. It is about 40 residues long and resembles the WD repeat Pfam:PF00400. The repeats have a catalytic activity in Swiss:P10731, proteolysis has shown that the Peptidyl-alpha-hydroxyglycine alpha-amidating lyase (PAL) activity is localised to the repeats [2]. Swiss:Q13049 interacts with the activation domain of Tat. This interaction is me diated by the NHL repeats [3]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.93 0.73 -7.24 0.73 -3.73 108 7067 2012-10-05 17:30:42 2003-04-07 12:59:11 16 402 688 32 4354 6356 1348 27.90 34 11.21 CHANGED hstPpulsls..sssplhVsD..ppspplhha ........hptPpGlAls...ssGp..laVuD........psNpRlphh.............. 0 2548 3068 3732 +622 PF03031 NIF NLI interacting factor-like phosphatase Griffiths-Jones SR anon Pfam-B_1405 (release 6.4) Family This family contains a number of NLI interacting factor isoforms (eg. Swiss:Q9PTJ8) and also an N-terminal regions of RNA polymerase II CTC phosphatase (Swiss:Q9Y5BO) and FCP1 serine phosphatase (Swiss:Q9PT70). This region has been identified as the minimal phosphatase domain [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.36 0.71 -4.57 120 2877 2012-10-03 04:19:28 2003-04-07 12:59:11 13 63 441 30 1934 3360 347 157.00 26 34.52 CHANGED hsLVLDLDET...LlHsp.t...................hh..............................hphhlth......RPhlccF...Lp.pl.uph..aElllaTuut..ppYAptllchlD.......s.....pphh...pppl....aR........c...pCh.......hps..h.l..K...DLshl.....................sR.s..........LspllllDssspsahhp.spNul.Ips......ahss.......ps.Dp.........pLhp...LhshL.ctlt..ph.pDVR ................................................................................................sLVLDLDcT..LlHsphp...........................................................................................hth.hlhh.............R..Pt..l..c...pF....Lp..ph...uph..........aE.ll..l.aTu.u.h..............p.t...Y.Ap.......slh.c.h.L.D.....................................s..........pphh.........ptp.l.............hR...................................................c...psh...............................hpu...ph..h.........K...................-L.s.hl.....................................................................sp..s.............h..sps.ll...lDss.s.t.s.ah.hp....s......p...N..u.l..lps.................ahss........................p.Dp.........tLhp......lh.hl.p.l................................................................................................ 0 769 1211 1633 +623 PF04923 Ninjurin Ninjurin Finn RD anon Pfam-B_5824 (release 7.6) Family Ninjurin (nerve injury-induced protein) is involved in nerve regeneration and in the formation and function in some tissues [1]. 20.70 20.70 20.80 20.70 20.20 20.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.29 0.72 -4.30 50 257 2009-11-15 17:29:03 2003-04-07 12:59:11 7 5 73 0 176 264 1 98.90 39 59.48 CHANGED shNpYss+KolApuhhDlALLoA..NAsQ.....L+hllphspp...tthYhhh.lsLlslSllLQlllullllhhup...hslppt..................................ppppp.hpphNshsshh.............................lhllsllNllIouFs ............shNpYusKKolApuMlDlALLhA..NAsQ...........L+hllp.Gsp...htaYhsh.lsLIolSllLQlhlGlLLlhlup......hslpp...................................tcpt+.hshlNNhsshhlFllsllNlhIouFs........................................ 0 55 71 132 +624 PF02613 Nitrate_red_del Nitrate reductase delta subunit Bashton M, Bateman A anon COG2180 Family This family is the delta subunit of the nitrate reductase enzyme, The delta subunit is not part of the nitrate reductase enzyme but is most likely needed for assembly of the multi-subunit enzyme complex [1]. In the absence of the delta subunit the core alpha beta enzyme complex is unstable [1]. The delta subunit is essential for enzyme activity in vivo and in vitro [1]. The nitrate reductase enzyme, EC:1.7.99.4 catalyse the conversion of nitrite to nitrate via the reduction of an acceptor.\ \ The nitrate reductase enzyme is composed of three subunits [1].\ Nitrate is the most widely used alternative electron acceptor after oxygen [1]. This family also now contains the family TorD, a family of cytoplasmic chaperone proteins; like many prokaryotic molybdoenzymes, the TMAO reductase (TorA) of Escherichia coli requires the insertion of a bis(molybdopterin guanine dinucleotide) molybdenum (bis(MGD)Mo) cofactor in its catalytic site to be active and translocated to the periplasm. The TorD chaperone increases apoTorA activation up to four-fold, allowing maturation of most of the apoprotein. Therefore TorD is involved in the first step of TorA maturation to make it competent to receive the cofactor [2]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.73 0.71 -4.34 270 4923 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 2034 17 709 2437 138 149.30 21 69.62 CHANGED shhshht.h.shst..........................tpt....ltthhp..thtppshtplpt-YspLF........hsPasShYLs.......sc.......hhGpshhclpphhpptGlphss....pEssDHlulhLEhhuhL..................sphttthphlpp+Lt......sWhsth..htpl ..............................................................................hhh.........t..s.t............................................tttLt.shhp.......phts..t.sh..h....p..lp....scaspLF.............................t.s.hsS..halp...........................pc........hG.p...s....h...h.....cl+t.....hhp.ptGl.ph..s..s..........pE.........s....DHlslhL-hhuhL..t......................tht..................pphps.htphlstpLh...................sWsshF...h...h.................................................... 0 194 410 579 +625 PF00877 NLPC_P60 NlpC/P60 family Bateman A anon Pfam-B_292 (release 3.0) & Pfam-B_9022 (Release 8.0) Family The function of this domain is unknown. It is found in several lipoproteins. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.67 0.72 -4.22 28 11783 2012-10-10 12:56:15 2003-04-07 12:59:11 14 163 3617 17 2263 9076 1767 106.80 30 35.28 CHANGED Gp.PYtaGGsssp.............GFDCSGhsphsatph.GlpLPRsuspp.................hthupp..lstsphpsGDLlFFps....tssssHVGlYl................Gssphl+us..usslp..........hsslpssaWppphhtstR ......................................................................GpPY.h.aG....Gs.s.sp...................................GhDCSGhs......p...h.s........ap.......p....t.....G......l...p.....L...P..R.s.stpQ......................................................h.p.h..Gp...t.............l...s....h....s....p....h...psG....D...L.lF.Fps..................ttsssHVGIY..l........................G..s.s...p.h...l..Hus.......upslp.............hs.s..h..t..p.s.h.att.h............................................................................ 0 699 1509 1928 +626 PF04981 NMD3 NMD3 family Bateman A anon COG1499 Family The NMD3 protein is involved in nonsense mediated mRNA decay. This amino terminal region contains four conserved CXXC motifs that could be metal binding. NMD3 is involved in export of the 60S ribosomal subunit is mediated by the adapter protein Nmd3p in a Crm1p-dependent pathway [1]. 22.80 22.80 22.80 23.10 22.20 22.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.82 0.70 -5.25 73 560 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 451 0 390 543 91 215.30 34 49.77 CHANGED CspCGt...shcss.s................s...hCscChhpph.clsc.lscph...plthCppCsphhps.spWhps..tuc-lhslh.p.l.cs.l..........pcsh..hh-sphlhsEs...........pupclcl+lslps..plhs...s.lppshtlphhlptptCscCp+ht..sshacAlVQlRpp..........h.pchshhhl-phhh+tsttt.lsplpctc-.GlDhahuspstAp+hschlpsth.ssphpcotcLlup-tp.upp.Y+hTauV+l ............................CspCGs...sh..s.s.s..su...................s...hChsCl+ppl..DIoc..sls+p.s...........slphC+pCp+a......hps....sp..........Wlpst....ES+ELLslCL++l.csL...........scs+......llDAtFlWTEP..............................HS+Rl+l+lolpt.........Elhs....ssllpQsh..V-ahlptp.CscCp+h........stshWcAsVQlRQ+...........s.cK+ThhaLEQLll.K.tstpppslpIp.c.hc-.GlDFaaus+...spAp+hl-Flpshh..Ps+h.pp.SpcLlSpDhcssphsY+hTaSVcl..................... 0 138 235 327 +627 PF01234 NNMT_PNMT_TEMT NNMT/PNMT/TEMT family Finn RD, Bateman A anon Prosite Family \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.75 0.70 -5.42 7 264 2012-10-10 17:06:42 2003-04-07 12:59:11 12 4 72 91 177 297 5 229.70 31 93.18 CHANGED h-u..shhuspsahp+FsPcsYLpsaYph.Stsss..tp..llhahL.s....lhpphs.sthts-sLlDIGuGPTlYphLuhp-shc-IhloDasspNhpELt+WlccE.usaDWosslpahsplEG.....stsphp-hEpKhRttV+p..VLcsDVppsssl..su........s.lP..sDsVlohhslEsuCssLssYppAl+shsuLL+PGGaLlhhssLctohY.hGt+c.Fosl.LpcEhl.cAlhcuGhplpphp.....t..ttshhstcGlhhlsA+K ............................................................t..........t.h..pcFpPpsYLpp.Yt.....spt.........tt...............hh...a.hLpp....lhphF..s....s...tl....p.G.c..pL..lDlGuGP.TlY.p.lLSAschFc-IhhoDas-pNhpELp+W.L.............cc-..suua..D..Woshhpa.l.s.p.lEG......ptpp.hpE.+cccLR.ptlKp.....ll.sDV.ppspPl..ss............lP.....sDsl.l....oshCLEsss.s..hs.s..YppALcpl......ssLL+PGGaLlh...hs.s.....Lp.t...o........hY..hs...Gt.......p.c...asslslsc-.lcpAlt..cu..Ga.pl.php...........p.....t.sh.s.pu.hhhhhupK................................................................... 0 60 72 135 +628 PF04147 Nop14 Nop14-like family Wood V, Finn RD anon Pfam-B_8521 (release 7.3); Family Emg1 and Nop14 are novel proteins whose interaction is required for the maturation of the 18S rRNA and for 40S ribosome production [1]. 34.90 34.90 44.20 35.60 34.40 34.30 hmmbuild -o /dev/null HMM SEED 840 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.42 0.70 -6.62 37 404 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 278 0 286 416 7 609.40 24 92.83 CHANGED s+tp+ppsltt..........IccphNPF-h+ss..+sKa-l......hG+p.scssp...ucPGlo+uhup-pR+pTLhtEhpp+sKsGuhlD+RFGEp.DsshosEEKhhpRFs+E+Q+p...+K...cshFNLp-DD-p...................LTHhGpoLu..........pD-.p-tcht...t..................pt.chtt........pp....tpspPp..R+KSKpEVMcElIAKSKhaKtERQpt+-csp-hp-cLDcs..hp-lhshL..psspp..t..........................ht...........spcph.c-YDptl+pLs..a-+RupPsDRTKT-EEhAcEEtc+L+cLEp-RL+RMpG...tsps-cppcpt......................ts-Dl-Ds.........................tahhs.s.pppspcp.........................................................uhps---tppD-D.p....t.pp.p.........pt..ptts-.Esptppcpt.......................................tspspsplsaTassPpoa--hhphlp..shshp-hssllpRItts..a+PpLttsNKpKLspFhul....Llp+lhaLusps.............shpllpslhphl+sLucp.....................aPpshupshRshlcch.pphct....slps.........s-LlhlslluslFsTSD+aH.VlTPuhlhluchLupsthp...olpDlupGhalsslhLpYpphSKRalPEllsFltssLhhh.............hPpcspp..........................s.sh..shpt.sstLp.......lsss...................pt..thpspp..Lphhclh........spppssph+lslLsshlsllppsss.lapsp...suahElhpPhhslLpphsp..................tsh.splpphhpc.lsphhststht..R+PLsLppH+PluI+ohtPKF.E-sFsP.c+Kp.YDss+ERsEhsKLKtphK+E+KGAhRElRKDspFlARp+lcEppccDpEYccKh++llsplpspEGcp ...............................................................................................t..............t.NsF-hphs.....t...Khph..............hst......t............t.ss...hs+t.u.ppRppsh...cht....p.t+..s.hhhD+R..h........G..E..s.p..h........s.E-+hhtRathEp.pt.......p+.....tshasLp-p-...........................................................LTHhGpsLt..............................t...-p.tpt.............................................t...htt...................................ttt.t....p.+o+pElhpElIsKSK...Khc+ptt+cp..t.p.htpLDpt..htpl.t.h...tt..t.................................................................................................................................................tt...ptYD.hh+..phh..h..-..h.+u.tss-RhKotEE.ht......tcp..tp+LppLE.............tpRhpRM.u......ppptp..t.t...................................-s.tct.............................................................h..t.........................................................................................t.t.tp.pp...tps...........................pt.t....................................tp...pt.........................................................................................................................ttthshsh..hPp......sh.pp.....h.thht...th........tp..........hhlp+l..th...pspLt.tN+t+ht.h..h....Llpah....hhpt..........................hth.h....p.lh..lhphsp.................................................s...hstthp.hltph.tp..t......................................t.LhhhphhuhlassSDhh.H.VhTPshlhh.......sphLtp..............h.h......shtphs.uhhlstlhhp.h.t...upRhhPEhh.ah.thlhhh......................h.t..........................................t......h......................l..t.................................................h.h..h...............t.phthl..hhthlpth....hhtt.....uh...hh...s.h..hlpth.....................................hpthhpt....htt...h.p.t.........ht...L....h..........p.p...p+...............shslt.......h.P+h.....p..s...hp..t..........c.......s.sp......p..ct....-h.t+hh..tphKcEhKus..h+ElR+DstFhtp.phpp.htpptthppKhtplhs.lttpptt................................................................................................................................ 0 105 165 243 +629 PF04153 NOT2_3_5 NOT; NOT2 / NOT3 / NOT5 family Bateman A anon Pfam-B_2131 (release 7.3) Family NOT1, NOT2, NOT3, NOT4 and NOT5 form a nuclear complex that negatively regulates the basal and activated transcription of many genes. This family includes NOT2, NOT3 and NOT5. 20.50 20.50 21.10 21.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.04 0.71 -4.28 66 799 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 308 0 526 782 7 125.90 31 23.73 CHANGED lpsuhsss.pst...ss.pp..pap........Ppshh.......spps.........................phhp..phst-.....TLFalFYhh.s..shtQhhAApELppRsWRaHKchpsWhpR....t.....c.hs........sph...Ep...GsYhaFDhps...........Wpphc..c....sFphcYphL- ...................................................................sshts..tst...ss.....t..t...pap......Pps.h........h.spps.sh.............................................thhp..+h.sp-.....TLFalFYhh.u..............shhQhlAApEL.t...p........+sWRaH+chphWhpR..pt..p....pshs................................sph..Ep...GoY.haFDhpp............Wpphp..p.pFphcYphLp................................................................ 0 187 299 442 +630 PF04065 Not3 Not1 N-terminal domain, CCR4-Not complex component Wood V, Finn RD anon Pfam-B_8081 (release 7.3); Family \N 25.80 25.80 25.80 31.30 25.50 25.70 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.69 0.70 -5.09 24 389 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 268 0 251 374 2 217.20 45 32.76 CHANGED Mup.RKLQQElD+shKKVsEGlptF-sIY-Klps..ssNsoQ+.....EKLEuDLK+EIKKLQRhRDQIKoWhuss-IK.DKss..Lh-pR+hIEs......tME+FKulEKthKTKuaSpEGLp..sss......pl.DPc-pc+p-sspalssplDELpcQlEphEuEh-pl.sthKKt.ptst.spppchp-hcpthERacaHls+LEhlLRhLpNspl-s-pVp-Ic-DIcYYVEsNp-..sDFh.Es....-slYD-.Lsl-pp .............spRKLQ...tEID+shKKVsEGlptF-sIapKlps.......ssNssQK.........EKhEs...........DLK+EIKKLQ.....RhRDQIKoWh...uus-IK...D........K..p...............Ll-....pR+.hIEs...........pME+FKsVE+EsKTKAaSKEG..Lu....tup.................................+l..D....P...tp.....+cKpEsspaLsssl-pLptQ.l-phEuElEsLps......p.......t.......+..Kt.....pts......p.....s..........c.....ppRlpclcphl-RH+aHlppLEhlL.RhLcNs.p.l.ps..-pl.p.c.lK-slcYYl-.....s......s...p-.....sD..F..Es.............-tlYD-LsLp.......................................................................... 0 87 147 217 +631 PF03060 NMO NPD; Nitronate monooxygenase Griffiths-Jones SR, Bateman A anon Pfam-B_2634 (release 6.4) Domain Nitronate monooxygenase (NMO), formerly referred to as 2-nitropropane dioxygenase (NPD) (EC:1.13.11.32), is an FMN-dependent enzyme that uses molecular oxygen to oxidize (anionic) alkyl nitronates and, in the case of the enzyme from Neurospora crassa, (neutral) nitroalkanes to the corresponding carbonyl compounds and nitrite. Previously classified as 2-nitropropane dioxygenase [1,2,3], but it is now recognized that this was the result of the slow ionization of nitroalkanes to their nitronate (anionic) forms [4]. The enzymes from the fungus Neurospora crassa and the yeast Williopsis saturnus var. mrakii (formerly classified as Hansenula mrakii) contain non-covalently bound FMN as the cofactor. Active towards linear alkyl nitronates of lengths between 2 and 6 carbon atoms and, with lower activity, towards propyl-2-nitronate. The enzyme from N. crassa can also utilize neutral nitroalkanes, but with lower activity. One atom of oxygen is incorporated into the carbonyl group of the aldehyde product. The reaction appears to involve the formation of an enzyme-bound nitronate radical and an a-peroxynitroethane species, which then decomposes, either in the active site of the enzyme or after release, to acetaldehyde and nitrite. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.86 0.70 -5.29 12 5886 2012-10-03 05:58:16 2003-04-07 12:59:11 10 43 3071 11 1913 12559 6372 309.60 26 84.17 CHANGED psthschh.hp.slhtshhuh.hssscLAuAVSpAGGLGllu..uuhhos........DtLtpplptlcphTsc.PaGlNlhlsp.ttsc....................h...h.phthchuls...................................lluhuhGsP.tphlccl+puGshlhshsuosppActstpsG.......sDsllsQGhEAGGHpG......csu....thhhLlsplscsls.......lPVlAAGGItDs+slAAALsLGApGVthGTtaLsopEussssht+pthlpustccThhopshs.........G+stRsLpsshhcch-.....p..shshs.t.................................hsts....lptusspu...shctu.hhsGQstthlsclhsscpllpplsp- ..........................................................................................................................................................................................................................................................thh.thchP.l.l.p.u.....sM.uh.....l...u...ss.......cL.su.A..Vup.A.G....GL.Ghl.u.....u...u.t.h.ss...........................c..l.....c.....p.....p.l...p......t........h............+...........p......h.........s.......s.........p.....P............a.....u...l...N...l..h..h......t.......htt......................................................................................hp...h...hh.p.tsht..........................................................................................................................................................l..l...s...h.......u.......h.......G...............P................t.......p.......h..........h......p........t......h......+........p..........s......G........l.......h..........l......l.....s.......h.......l.....s....o.....s.....c..t....A...p....t....h..t....c....h.G..................................s.D.u....l....l.....s.......p..............G.....h.............E.......A...G..G.HhG.......................phs........................sh..s..L..l...s..p...l......s..s..s.hs....................................lP..Vl..A.AGG.I..s...s........G........c.u......l....s......A...A......h...........s...........L.......G.....A........s........uV....p....h............GT....t..F..l..so..p...E........u....s...s........p.t.......sa....K..p..t..l....l....p................u........p.........t.....p......D.........s......s..l.o..t.t.hs................G....h.s..s.......R.sl..p...s..p.....h..h.p..p.h.t...........t......t..h.t....h.........t.t........................................................................................hh.tt.........lpt.s..hhps........c..h..c....h.s........h.h.sG.pss...t..h.....l.....p...c..h...sstcllpplh..t................................................................................................................................................................................................................. 1 550 1189 1608 +632 PF05021 NPL4 NPL4 family Wood V, Bateman A anon Pfam-B_13681 (release 7.6) Family The HRD4 gene was identical to NPL4, a gene previously implicated in nuclear transport. Using a diverse set of substrates and direct ubiquitination assays, analysis revealed that HRD4/NPL4 is required for a poorly characterised step in ER-associated degradation after ubiquitination of target proteins but before their recognition by the 26S proteasome [1]. Npl4p physically associates with Cdc48p via Ufd1p to form a Cdc48p-Ufd1p-Npl4p complex. The Cdc48-Ufd1-Npl4 complex functions in the recognition of several polyubiquitin-tagged proteins and facilitates their presentation to the 26S proteasome for processive degradation or even more specific processing. 21.10 21.10 21.30 21.50 19.60 20.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.92 0.70 -5.26 36 376 2012-10-10 14:49:21 2003-04-07 12:59:11 10 22 298 0 260 392 4 274.70 36 51.89 CHANGED RhGaLYGpYccaspsPLGIKAVV-AIYEPPQ.psE.DGlsl.hs...pppppVDplApth..GLp+VGhIFTDLhsss.tpGoshhpRchcoaaLSuhEllhAA+hQhpaPp.s+aS...psGpFuSpFVTCllSG..s..suplshpuYQVSspAhuLV+ucllpsosp....Pshhhlpc........sspp..cYlP-VaYpchspYGtpVpc.A+P.FPl-aLlVslocuaPpsPp..........shF........psssFPlENRphhGp.........QshpsltcaLpsp...............sshhpplSsFHLLlaltph.thLs..pp-hthLscssp....pp..t.h.th.tpstsatsLlpIlp ...............................................RhGaLYGpYppap...p.h...PLGl+..AhVtAIYEPPQ......sp.-..u.lpl...hp........tptptVDplA..pth............GLp+...VGhIFTDLhs....ts.......t......t......GpVhhpR......p.h-oaa..LSu.EslhAuchQspaPp.s..+hu...........sGp.FuSpFVTsl..loG...s..t.s.s.plphpuYQlSs..pshtlV+sshltsstc.....Pplhhlpc................................sspt...pYlP...-........V....aYpc.h.s....c.a.G..t.p..lpp.A+P...hPV-YLlVs.....lstuh.PpsPh...........hF........ppssFPl.ENRphhGp..........sQshpsLtphLpptt...........................tp.hhctlSDFHL.LlaLh.p..t..h..hs..p...........cchshLhcssp........pcs...t.....h.ph.hts.tWtpl..l........................................... 0 96 146 219 +633 PF01909 NTP_transf_2 DUF76; Nucleotidyltransferase domain Bateman A anon [1] Family Members of this family belong to a large family of nucleotidyltransferases [1]. This family includes kanamycin nucleotidyltransferase (KNTase) which is a plasmid-coded enzyme responsible for some types of bacterial resistance to aminoglycosides. KNTase in-activates antibiotics by catalysing the addition of a nucleotidyl group onto the drug. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.40 0.72 -3.66 117 8895 2012-10-02 22:47:23 2003-04-07 12:59:11 18 121 3228 80 3285 7676 1203 100.00 15 25.51 CHANGED lppltctlpphh......stplhlaGShsc.....Gphp.t.....SDlDlllhhspt.....................................hhhthtthhpphhshthDlh..............hhtthp.........hhhtphhppthhh ..........................................h.......h..t.h..........hhtl.h.laGShuc............G.p.h.pst........SDlDlllhhspt...........................................................................................h..th..t.....h...t.....h........h.....h..shh...................................................hh........................................................................................................................................................................................... 0 1095 2088 2757 +634 PF01759 NTR UNC-6/NTR/C345C module Bateman A anon [1] Family Sequence similarity between netrin UNC-6 and C345C complement protein family members, and hence the existence of the UNC-6 module, was first reported in [1]. Subsequently, many additional members of the family were identified on the basis of sequence similarity between the C-terminal domains of netrins, complement proteins C3, C4, C5, secreted frizzled-related proteins, and type I pro-collagen C-proteinase enhancer proteins (PCOLCEs), which are homologous with the N-terminal domains of tissue inhibitors of metalloproteinases (TIMPs). The TIMPs are classified as a separate family in Pfam (Pfam:PF00965) [2]. This expanded domain family has been named as the NTR module [2]. 21.90 21.90 21.90 21.90 21.80 21.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.74 0.72 -4.50 103 1259 2012-10-01 21:39:20 2003-04-07 12:59:11 16 55 160 51 605 1076 0 105.20 21 14.37 CHANGED pc..hCps......-Ysh+spVhs.....hppps......shsthshpltpVhK....pu......th.ppsppthahtpt.....pC.phht...Gp.pYLlMGpt.........pptphphllsppohlchW.sptppphpchpp ...........................t.phCts......-a....s....l...ps+lhp..................h.ppps............shhph....s.s.p.l.p..plhK...............................pu..........ph.pcspp.p...hhh.pt........pCsplps..........sppYLlMG......................pptpsphlls.pohlthW.sphtpphpp.................................................................... 0 82 136 315 +635 PF04142 Nuc_sug_transp Nucleotide-sugar transporter Bateman A anon Pfam-B_2311 (release 7.3) Family This family of membrane proteins transport nucleotide sugars from the cytoplasm into golgi vesicles. Swiss:P78382 transports CMP-sialic acid, Swiss:P78381 transports UDP-galactose and Swiss:Q9Y2D2 transports UDP-GlcNAc. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.56 0.70 -5.27 10 1158 2012-10-02 19:55:49 2003-04-07 12:59:11 10 22 276 0 767 1530 427 213.10 26 58.73 CHANGED +hscpL+ctlhsps...tDoLKluVPShlYslQNNLhYVALSNL-AATYQVTYQLKILTTAlFoVlhLsR+LuphQWhSLlLLhsGVAlVQhssssu.p.ssspss...........................................spN...hlGhsAVLsAChsSGFAGVYFEKILKsos...sSlWlRNlQLuhhGlhhuLls..salpDtspIs-pG.........FFhGYshhVWhlVlLpAhGGLllAlVlKYADNILKuFAoSloIILSoluShh.LF.DFplohhFhLGAhlV ................................................................................................................hh..................p....hh.h....u.l.P.u..hl....Y.sl.p.N.sLh.a.l....u.......l.s...l..s..su.s.....a.Q.l.hhp.h.......K......Il.......sT.A..lhshh....hLp+p..Ls.h..........h.Q..W..h.uLh.l.L.h.h.G..lsl..l.p....hs.....s.st..s........................t.....t...........h.......................................................................................................................................tp.......hh....G...h......hhl...l....h...us....hhS..uh...A..u..............VY.............hEt..l...L.K..p..p.t.....................s.l....a...h..p....N.......h......l.....h..huhhhs.h..hh...............hh...h.......t....h....t........h.......p..tG..............................................h..h..G....a....s....h.....s.h..h.hl.h................p.u.h..s....Glh.huh....h..h..+ass.sl..h..Ksassshuh...lhss.hh.S.hh.hh...s...h...s........hhhs.................................................................................................................................................. 0 334 453 632 +637 PF04096 Nucleoporin2 Nucleoporin autopeptidase Wood V, Finn RD, Rawlings N anon Pfam-B_5132 (release 7.3); Family \N 21.90 21.90 23.80 24.90 21.80 21.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.80 0.71 -4.37 57 473 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 294 19 304 507 1 137.20 35 10.65 CHANGED sYahpPolppLpphohpcLpp...VpsFslGR.csaGpIpF...tsVDLssls.L-..............plVphps.+pltV.Ys-s...p..KP.lGpGLNlsApITL.sshPh...s+sspt.h.pspp.....hpcplcplpp.tpsscFlSY-s.sGsWsFcVpHF ...................sYas.PSh--Ltphstpc....hpp......V..s.sFslGR.cuYGplpF...tsVDLs.s.l.s..LD.................p.....IV.phpp..+.....plhV.YsD-..................sp..KPPl..GpGLNh.AplTLcssaPh..........s+ss+p.hpp.spc.h.....acp+lc+lpc.p.psscFl.s.Ycsp....o.....GsWsFcVpHF................................ 2 113 182 263 +638 PF01733 Nucleoside_tran Nucleoside transporter Bashton M, Bateman A anon Pfam-B_2135 (release 4.1) Family This is a family of nucleoside transporters. In mammalian cells nucleoside transporters transport nucleoside across the plasma membrane and are essential for nucleotide synthesis via the salvage pathways for cells that lack their own de novo synthesis pathways [2]. Also in this family is mouse and human nucleolar protein HNP36 Swiss:Q14542 a protein of unknown function; although it has been hypothesised to be a plasma membrane nucleoside transporter [2]. 22.90 22.90 23.10 23.10 22.20 22.60 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.89 0.70 -5.26 7 1065 2012-10-03 03:33:39 2003-04-07 12:59:11 13 12 303 1 710 1041 14 251.30 21 65.30 CHANGED hhtlslINu.hsAlhQsSlaGlAushPtpYosslhsGQuluGhhsolshl.lshAsssc....hptuAhhYF...hsuhlllllChlhh.hlphhcaY+hatphp.....................p.pt-hhpsccp...sp.s...stsshpp.............s..t.....h..hhsllppl.shshslshhaplslshFPuh.o..h.os....s....pp.aa..lssFLsFNlFDhlG+slsuhhhaPs...ssRhlshhshhRhlFlPLFhhCshtsp...+....hPshFcp-hhFhhhhhhFuhoNGYLsSLsMhhuP+pV..sccpEsAGtlhshFLslGLuhGulhSalhch .............................................................................................................................................h....hh.h.sh....ssuh...hpsu.h.huhsu...hs.....p.a........hpshh.Gp.uhuG...................hh.......s.u.hh..l.....h.s.h...h......hsp.................................pt.s....shhaF...hhuh.hh....h...hs...h....hh.h...h.h.h......h............h....ht....h.h...t.............................................................................................................................................................................................................h.h.lh.......pphh.....h...h....hsl.hhhahlT...h.lFPu.....h..............h.....t......s...........................s................................h....h...........h............h...h....hhhaNhhDhhG........+.....h..s.t.........h..........h....................p..p..h.l.h..h.h...sh.hR.hl.h..l.Phhhhs..h..t....................................hh.t..ps.....hhh.....h.h.h...hhhuhoNGahsshshhh...u......P............p.....h...s........tpt.........phu..G.hhsh....hl....h..hGlhhGuhhuhh...h................................................................... 1 294 433 600 +639 PF04880 NUDE_C NUDE protein, C-terminal conserved region Mifsud W anon Pfam-B_6501 (release 7.6) Family This family represents the C-terminal conserved region of the NUDE proteins. NUDE proteins are involved in nuclear migration [1]. 27.20 27.20 28.20 28.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.15 0.71 -3.89 3 274 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 162 6 163 251 0 178.10 37 42.72 CHANGED SLEDFEp+LNQAlERNALLEhEl....DEKESLplclQRL+DEsRDLKQEL.hVpER.ppsNRKSRPoP...........V.susSlPo...TPss...psShsSP....+SlPNGhVoSPL..TPss+lSL................pLAu..ssA+DsAsspStTSuSVN.shshsSshsh.ptSussSFssRu....h.ss.PphsQuHSRspS ...........SLEDFEp+LNQAIERNAhLEsEL....DE.KEsLhlplQRLKDEsR.............DL+QEL.tVp............c+.........p....c....h....s....p.p.....s...tsos......................................l.p.ssh..olPu.............T.Pss.........ps..sh.sos..............................puh..s...suhu..u....oPL....TP..s..s.+...l.Su........................................plAu...shsps..us...ts..h...sss.hp..........h....t..........s............s..............s............................................................................................................................................ 0 39 64 109 +640 PF00293 NUDIX mutT; NUDIX domain Bateman A, Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.73 0.71 -4.52 197 43912 2012-10-02 00:00:35 2003-04-07 12:59:11 23 282 5206 325 11753 31604 9218 130.90 17 65.32 CHANGED hthusssllhspps..................clLlhcctps.................................................shaphPuGtl-..GEs.tp..................................uAhREltEEsGlph.......t.hthhthhthttsstt.....................................thhhhahsphtsst.thp.t.t.........Ehtphpahshpclhphhhthp.......hhtthhtt .................................................................................................................................................h....shh.h.l.hptpt..................clL.l..h....c..ctp.t.................................................................................................s.h.a.p.h.P.u....G....t.l.....-.......u...E..o.......tp...........................................................us.hR.El..t.E..E..s..Glph.........................p.hph..h..s.....h.....h...t....h.....h...stt...................................................................................h.h..h...h....a..h....s....p....h..........t.....s...t..............h..t..t.t.......................Eh.t.p....h..t....a..h....s..hp.ph..p.h........................thh.......................................................................................................................................... 0 3648 7193 9734 +641 PF03826 OAR OAR domain Griffiths-Jones SR anon PROSITE Domain \N 20.10 20.10 20.10 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.38 0.72 -6.56 0.72 -4.31 45 968 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 131 0 530 872 1 20.70 51 6.33 CHANGED s-.+soSIAsLRhK...AKEHsss ......sp+soSIAuLRhK...AKcHus...... 0 82 133 288 +642 PF03137 OATP OATP_C; Organic Anion Transporter Polypeptide (OATP) family Mifsud W, Bateman A anon Pfam-B_626 (release 6.5) Family This family consists of several eukaryotic Organic-Anion-Transporting Polypeptides (OATPs). Several have been identified mostly in human and rat. Different OATPs vary in tissue distribution and substrate specificity. Since the numbering of different OATPs in particular species was based originally on the order of discovery, similarly numbered OATPs in humans and rats did not necessarily correspond in function, tissue distribution and substrate specificity (in spite of the name, some OATPs also transport organic cations and neutral molecules). Thus, Tamai et al. [1] initiated the current scheme of using digits for rat OATPs and letters for human ones. Prostaglandin transporter (PGT) proteins (e.g. Swiss:Q92959) are also considered to be OATP family members. In addition, the methotrexate transporter OATK (Swiss:P70502) is closely related to OATPs. This family also includes several predicted proteins from Caenorhabditis elegans and Drosophila melanogaster. This similarity was not previously noted. Note: Members of this family are described (in the Swiss-Prot database) as belonging to the SLC21 family of transporters. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null --hand HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.72 0.70 -6.46 16 1462 2012-10-03 03:33:39 2003-04-07 12:59:11 15 17 98 1 852 1674 81 439.80 25 82.67 CHANGED hKhFllshslshhsQ.shssuahsSslToIE+RFplsSspoGlIsusa-IushllllhVSYFGu+hHRPRhIGhGsllhulGullhuLPHFhhs...................Ypaspss.sssspstsspLC.........t....p.stsstsspcptpshhallhhhuQhlpGIGsoPlhslGlSYlDD.scppp.SPlYlulhhshshhGPAlGalLuShhhplYlDhspss.t....lplsssDPRWlGAWWLGFLlsuulslloulPhFhFP+pLPcs.......................th....thpt.pspcpppppsppscstt........................................tpl+cF.pslhplLpNslahhhllupshpssshsGhhoFlPKaLEpQauhouupAshLhGslslPssulGhhlGGhll++a+lsspuhsthshhssllshhhhlshhhlsCssssluGlsssh............ss.tt.s.hssCspsCsCspstasPVCussGhtahu...........................shsusssssstsstp.spshssssssssspss..........tGhCsss..CspphhhalhlhshsshhsshutssshhllLRsVp.--KohAlGlphhhhRlLGhIPuPIhFGhlIDssClhW.uppC.Gp+GuChhYDssshp .....................................................................................................................................hhhhhshh.h.......p....hh...s.hh.ssl..o.p..l.E+Ra...tl.S.t.sG.......hlsus...-l..u...shhhhhhlo.Ya..G.s..+..h..p.+P.phluh.G..........shlhuhushlhslPcFh....t............................................................................................................hp.ht.................................t.....t.......ht.....................................................................t.t...t......t............................h.h.hhhhhuphlhGhGtssl.slGhsYlD-...spp...pp.us.hY.....l.u..hh.s..hthhGPhhGah.l.uu.hh.hp...ha...h.......-.................t.l.s..p.c....s..pWlGA...W.Wh.....Ga..ll.suhhh...h.h...uhs.hhhh..P.+p...hstt...............................................................................................................t.............t....................................t...........................................................................................................................................h..p.th....s..hh..p...lhp.N....hahhhhh..sth....h....h...h.....h..uh.........hsF..........h.sK...al..E.........p.Q....a.........t...............s...........su............us.........hh.......h...G...h.....h.ls...sh..s.......h.GhhhGG.h...lh.p+...h....p.h..t..h..h....t...hh..t..h...shh.ht.h.h.......shhh.....h..hhhhtC........s.........h.u....G.ls.sh................................t.....s.Cs.ts.s..t..tshsh.ssh......................................................................................................................................................................................................................G..C.ts........t..t....h....a.hh.h...h..h.....h.hh.h.h.....s.ths........h....h..h....hl...R.........sl..p..+shulGh.thhhhR.h..h....u................hlPuPlhaGhhlDpsChhW...t..........p.......s....t...p.u....sChhYs...h...................................................................................................................... 0 328 378 620 +644 PF05005 Ocnus Janus/Ocnus family (Ocnus) Moxon SJ anon Pfam-B_4799 (release 7.6) Family This family is comprised of the Ocnus, Janus-A and Janus-B proteins. These proteins have been found to be testes specific in Drosophila melanogaster [1]. 20.20 20.20 20.20 23.60 19.90 20.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.43 0.72 -4.17 22 220 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 116 7 113 230 6 104.80 40 67.16 CHANGED Lst.lPpV-lD.cGh.FKYlLlpltspt.....scts+.lVRGhstspaHs...DIa-clptphcphGl..ssc......................CLGGGRIc+csppKp..I+VYGaSpuaG+A...cHphopclLp..spYscY.pI ..................................................................Lht.lPpVcI-.pGh.aKYlLlplpsts..........spts+.lVRGht...s.c....aHs...DIa-clp....tchc....p.h.Gh..ssc.......................CLGGGRIpHpspcKp..I+VYGhSpua...G+A...cHshop-lLp..spYs.-Ypl............................................. 1 44 56 89 +645 PF00215 OMPdecase Orotidine 5'-phosphate decarboxylase / HUMPS family Finn RD, Bateman A anon Prosite Domain This family includes Orotidine 5'-phosphate decarboxylase enzymes EC:4.1.1.23 that are involved in the final step of pyrimidine biosynthesis. The family also includes enzymes such as hexulose-6-phosphate synthase. This family appears to be distantly related to Pfam:PF00834. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.61 0.70 -4.95 87 7126 2012-10-03 05:58:16 2003-04-07 12:59:11 19 18 4753 429 1546 4780 3008 216.50 23 86.65 CHANGED spLpluLDhtst..............cchl...plscclss..lshlKsshslhpshG........hpllptl+pp.s..hhlhhDhKhsDIGsTstpthp.....hhphsAchlslpshsG..sslpuhhcsupphs.....................................................................................................tllhls.hoshsthshtp........hspthlcpttctp.t.............hh.Ghlsssp.s............................chhhlsPGlph............ttusstutphtts.thht.ttuchllVGRuIhtus.sPttsucph ..........................................................................................................................lhlAL..D..hpsh.......................................pps.h.............p.h.l.c..p....ls........s...ts....s....h.l.K..lGhthhhs.G.........................phl.c.tL.+.pts.............hhlhhDh.K............ht.DI.s.sTssptst.......................................................s......p........h.........u.......s........Dhlslpssu......G....hph....hcuuhcs..h.pphs.................................................................................................................................................................t.l.l.t.ls..hh..oshstp.phpp..hs..............tss..p...th.s.pp.h.tchttp.......................................t...Ghlsuspps......................................shhhls..P..Glp...................................psu.ss.s.s....pt..h...hhs..t.......s.t..............ss.......h.lllGRsIspAs...sPht.shct.h..................................................................................................................... 0 478 961 1295 +647 PF04084 ORC2 Origin recognition complex subunit 2 Wood V, Finn RD anon Pfam-B_7065 (release 7.3); Family All DNA replication initiation is driven by a single conserved eukaryotic initiator complex termed he origin recognition complex (ORC). The ORC is a six protein complex. The function of ORC is reviewed in [1]. 19.60 19.60 20.10 20.00 19.40 18.80 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.09 0.70 -5.60 31 329 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 276 0 239 313 5 306.80 30 59.16 CHANGED SspoLuplt......shlspccahphhpph.pth...pppphptL.chapph..............FtpWhh-LppGFsllhYGlGSKcpLLppFspphLss......................hshlVlNGa.Pslsh+sllpsIsphlh.t.............thhsppstcplphlhchhpst...................psclhlllHNlDGsh..LRppcsQshLupLushspIallAShDHlsuPLlWD........................ptct.ppaNFlaa-sTTatPYstEh..saps.sl..hh..............................................s.....+osp.....hutpuhpaVLpSLTtNu+sla+lLlphQLp.....................sptss.....pthGl-acsLappCpcpFlsSsEhshRohLsEFhDHKhlppp+sssGhE...............hLhlPhspspl .......................................................Stpshtph....hlspp.phhphh.pp...h..t.....p.pchptL.p..a..p.p...............FspWhhpLpp.GFslllYGhGSK+pLLp...cFtpphhpp..................................................shlVlNG.ah.P.s...lsl+s...lLssIsptlhst...............................................tthhpps.ptlphlhphhppps...............................................................shplhllIHNlDush........LRp....s.ps.QphLupL....u..s.......hsp......IpllAShDHlN.sPL.lWD............................pspt...spaNalaa-sTTatPY............s.tEh....sats..pl..hl..............................................t........pssp.........hshpuhtaVLpSLT.Nu+sla+lLhphQLp.................................................sttss.....phhGlpapslYppspEpFlsoS-hslRs.LpEFhDHpllpp++.s.s.sGs.E................hLhlPhstt..h.................................................. 1 83 134 198 +648 PF03392 OS-D Insect pheromone-binding family, A10/OS-D Mifsud W anon Pfam-B_3032 (release 6.6) Family \N 25.00 25.00 25.40 26.20 24.60 24.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.33 0.72 -4.01 91 701 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 96 9 258 733 0 92.40 40 73.06 CHANGED YTsKYDNlDlDEILps-RLlpsYhpCLl-cG..tC.TP-GcELKchLPDALcTsCuKCo-KQKpuscKVlpaLhcp+P-.WcpLpsKYDPcspYpc+Y ........................YosKaD..s.l.sl--ILpscRLlpsYhcCLLcc..G...C.TP.-............G+-LK.....ch.l.....P-ALc.s.pCsKCo-+Q+psucKllcaLhpp+P-.W.ppLtsKYDPpspYtp+a.................... 0 61 101 246 +649 PF04756 OST3_OST6 OST3 / OST6 family Wood V, Bateman A anon Wood V Family The proteins in this family are part of a complex of eight ER proteins that transfers core oligosaccharide from dolichol carrier to Asn-X-Ser/Thr motifs [1]. This family includes both OST3 and OST6, each of which contains four predicted transmembrane helices. Disruption of OST3 and OST6 leads to a defect in the assembly of the complex. Hence, the function of these genes seems to be essential for recruiting a fully active complex necessary for efficient N-glycosylation [2]. 21.30 21.30 22.50 22.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.01 0.71 -4.67 19 672 2012-10-03 14:45:55 2003-04-07 12:59:11 8 15 309 3 385 597 4 142.80 26 50.64 CHANGED hhh.Fh..hhclsph+lhhPs.lshP.hhlshhllllo............................aFlhsuGhhashIpssPhlspshcs.....psVsFh.tpsptQahhEuhhsuhlashsul....Ghlhlsps...stshs+p................chhhhu..Ghshllh.FhshhhF .................................................h.ah..hhp..h........clh+Ps.h.shs.hh..h...sh.hl..s..l...l.u.........h..h.h..t............h....hh........p..+.hWhhhslhh.hhhhsSGhhashI+....tPPhstpsscs.....t.lsahts.pspsQ..ashE..shhsuh...la.shhuh....uhl.lL.s...p.......ss.........t...s..hsct...................c.hhhhh.....Gh.shlhh...Fh.h...h.......................................................................... 0 117 190 292 +650 PF01010 Oxidored_q1_C oxidored_q1_C; NADH-Ubiquinone oxidoreductase (complex I) subunit C-terminus Bateman A anon Pfam-B_41 (release 3.0) Family This sub-family represents a carboxyl terminal extension of Pfam:PF00361. It includes subunit 5 from chloroplasts, and bacterial subunit L. This sub-family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.63 0.70 -4.86 113 14159 2009-09-12 05:40:05 2003-04-07 12:59:11 14 14 12076 0 124 13554 281 225.10 56 42.75 CHANGED RIYLLTFEGHLs......laFpNYSG...p..+sssh..Y....SISLWG..K.cs.....Kt..ls+N...hhL.t............psscpsSF...Fs...K..phY..pl.........s.pN.l+phh..p...s.Fhsh.s..pF..ss..Kp..sh......YPaESDNTMLFPLLlLlLFTLFlGhIGI.P....Fs..Qtth.....slDILSKWLsP..SIN..................LL..HpNsss..S......h..DWYEFlp...NAlFSVSIAhFGIFIA.hLYpPlYSShpNhsLINSF..lK..hss..K.R..hhh..DKIlNsIYsW..SYNRGYIDsFYsphhotGIRtLAcLTpFFD ........................................RIYLLTFEGHLN.VHF..QNYSG......p...Kssuh..Y....SIS..L..WG........K..ct..sKt........lN+s....hhLhs..................ps..s..p......p......sS...F...FS..........p.......ch.Y.....p..I..............s..pN...s....R.....s.hh.....p........s..F..h..s......l...s......pF.....s.s.....Kp....sa....................sYPa..ES..DNTMLFPlLlLlLFTLFl......G.sI......GI..P...........F...s...Q...t..s..h.......-l..D..IL.SKWL..T.P..SI.N..........................LL........Hp..s...S...N..s....S......h....DW..Y.....E.F.....l..p.s..A..l..F..S..V..S.....I..AhF..G.IFIA...hLYpPl.Y..S..S.h....QNL..s...........L...l..NSF.........lK......t.uPK...R.........hhh...D+.I..h...NhI.Y.sW..SYNRGYIDs.FYsp.hhh..Gl.RtLuc.hh.pFFD......................................................... 0 38 87 111 +651 PF01483 P_proprotein P; Proprotein convertase P-domain Bateman A anon [1] Family A unique feature of the eukaryotic subtilisin-like proprotein convertases is the presence of an additional highly conserved sequence of approximately 150 residues (P domain) located immediately downstream of the catalytic domain. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.02 0.72 -4.20 75 1577 2012-10-03 19:46:52 2003-04-07 12:59:11 15 112 646 15 726 1507 250 87.90 31 11.11 CHANGED lEaVplplslsHs.pRGDLplpLsSPsGspShLhs.pRsp.........D.tpsGahsWsFh......osptWGEsspGsWpLcl..pD.......................sstpppG......plpsWpLtl .......................lEcVplpl.s.lsHs..pRGDLplpLhSP.s..GT.p.oh..Lhs..pcst...................................D...s.ps..G..h....h.sa.sFh........................................osphaGE....s.s...p.G...s...WpLcl...p.D..................................ps.stptG......plppWpLhh.................................. 0 252 371 555 +652 PF04062 P21-Arc ARP2/3 complex ARPC3 (21 kDa) subunit Wood V, Finn RD anon Pfam-B_6413 (release 7.3); Family The seven component ARP2/3 actin-organising complex is involved in actin assembly and function. 25.00 25.00 40.00 40.00 18.30 16.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.01 0.71 -4.49 29 364 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 276 15 231 331 3 170.20 48 94.42 CHANGED MPAYHSsFhs-st........phl..G..NhulLPL+TpaRGPA.................ssspshDIlDEsLsLFRANsFF+NFEIKusADRlLIYhhLaIo-CLpKL.....ptshstp-ApKtLhsLAl-.sFsIPG-sGFP..LNuhYphP.ps+s-uElLRsYLpQlRQELuhRLlc+lYs..sp.p................spPSKWWLsFsKRKFMsKSL ....................................MPAYHSth.s.t............phl..G..NhulLPl+..Tp.h....+....GPA.h................................ss.pchDIlDEslhhF+ANlFF+NaEIK.u.ADRsLIYlhLaIo-CLpKL.....pt.ssopspupKthhsLuls.pFsI.PG-s..GFP..LNuhYthP..ts+p.-...s...............-hhRpYLp.....QlRQEhuh.RLhc..+.Va..s..sp..s..................spPSKWWhsFsKR+FMsKSL............................................ 0 73 121 183 +653 PF02331 P35 Apoptosis preventing protein Mian N, Bateman A anon Pfam-B_13247 (release 5.2) Domain This viral protein functions to block the host apoptotic response caused by infection by the virus. The apoptosis preventing protein (or early 35kD protein, P35) acts by blocking caspase protease activity. 20.10 20.10 21.20 27.20 19.80 17.80 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -12.01 0.70 -5.30 7 25 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 14 10 0 29 0 301.10 58 92.61 CHANGED MCVlhPs.hcssQTlIhDst...spphR-LlYlNplh....ss.lsKsVLMhFNISGPl+sVsR.hsspht-hhKSKlD......cpFsphp+sh.....SsphsGhc...+YFcs-cYoVsC.stsshKsKatKhLpscshs-ccsIEsacK.CL.P.......Lhsc..........psshYV..sVCsLKPuhtNs.uppsLSFpYpP.ssKVIVPhtHEIs-sG..hYcYDVh...AhVcuVp.....phcc.lQsLhh.pta.cs.c.lhascsshNcphhhhs.-FpTcshasKs....hpI.CNu.I.DccpchLhVKL+NVTspLscsl................ILsplc ...........MCVIFPVEIDVSQTVIRDCp..VDc.QTRELVYINKIM....NTQLTKPVLMMFNISGPIRSV.TR.KNN-LRDRIKSKVD......EQFDQLER-Y.....SDchDGFHDsIpYFKDEHY..SVSCQ......NGSVLKSKFAKILKSHDYTDKK.SIEsYEKYCL.Pp......LVDc..........+sDsYV..AVCVLKPGFENG.SNQVLSFEYNPIGNKVIVPFAHEIN.DTG..LYEYDVl...AYVDsVpFD.GpQFEEFVQpLILPSoFpcSEKVLYYNEASKNKNMIYKALEFTTESsWsK.SpKaNWKIFCNGFIYDKKSKsLYVKLHNVTSsLNKNV................ILshIK....................................... 0 0 0 0 +654 PF02225 PA PA domain Bateman A, Mahon P anon Pfam-B_259 (release 5.2) Family The PA (Protease associated) domain is found as an insert domain in diverse proteases. The PA domain is also found in a plant vacuolar sorting receptor Swiss:O22925 and members of the RZF family Swiss:O43567. It has been suggested that this domain forms a lid-like structure that covers the active site in active proteases, and is involved in protein recognition in vacuolar sorting receptors [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.23 0.72 -4.32 69 5065 2009-01-15 18:05:59 2003-04-07 12:59:11 17 204 1352 75 2693 4930 531 99.80 17 13.82 CHANGED sstsstptsll................................tsstssssshtsssspGpl..lls.....pputs...............sh....hp+s....ttAp..ps.GAtulllhs.....................shststhtshslPsshl.spssGptlhphh ..................................................................h........................................................t.........t..t....tp..h..t.....s.h...s..l....p.G..K...I......sll...............pcG...s.s...................................sa..............spKs...............ppAp....ps...GA..hull.lhs.......................................t....st.............s...p.....s.h...t...l...P.shhl...s.ttuttlht.................................................................. 0 704 1487 2130 +656 PF00291 PALP S_T_dehydratase; Pyridoxal-phosphate dependent enzyme Bateman A, Finn RD anon Bateman A Family Members of this family are all pyridoxal-phosphate dependent enzymes. This family includes: serine dehydratase EC:4.2.1.13 P20132, threonine dehydratase EC:4.2.1.16 Swiss:P04968, tryptophan synthase beta chain EC:4.2.1.20 Swiss:P00932, threonine synthase EC:4.2.99.2 Swiss:P04990, cysteine synthase EC:4.2.99.8 P11096, cystathionine beta-synthase EC:4.2.1.22 Swiss:P35520, 1-aminocyclopropane-1-carboxylate deaminase EC:4.1.99.4 Swiss:P76316. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.14 0.70 -5.19 148 31175 2009-09-12 08:45:30 2003-04-07 12:59:11 20 102 5380 356 8492 23404 13882 295.50 21 77.75 CHANGED lp.thstTPLhph...................pla.hKhEshp..s.sG..ShKsRsu...hhhltp..........s.p............................ptl...lsuouGN......................................puhulAhsus..th.G..lp.s....hlhl.....Pps.sst..............t+hthhcthGA...pllhhstt.t.t.stst....phsp...t......h..h.hpt.................................ssstshtGhtohuh..El...............h....pph.........................p.h............llsss.GsG.GhhsGlupshpp..........ts.c..l.luVps..pss.....sshhpthtttt...................................................hht.ulshsh........................ssth.hthhcph...............h.....sVs-p-shp.uhptlsppp............G.lhs..tsuuussluus.ht......h.........t.tp..ll.hl.lsu ......................................................h....ht.TPLhh..........................h.tt.h.h.ss....p.la..h...........KhEs.......hp....s...sG.........S.......h.K.sRsu.........hthltt...................A.tpt......................................................................................st....psl..........lp.so...u......GN.............................................pG.h.u.l...A..h.s.us......th..G.........hc.s............hlhM.................Pps...hst.......................+htt...h.c.t..h.G.A.........c.l....l..h...s..s.........s...s......s.........t..h..h..s..t.st........ph.s....p....ph.......h.t...hp...................................................................................ts.s..s.h.t....tp...t..s...hu........El...............................................h...........ppht..............................tt.D.h..................................llssl...G....s.G..G..shs..G....l...u.t.hhpp.....................................sl..cl..lul-s........pss..........................s.sh.....h...t.s..h.t..s.s.p.........................................................................p......tht....Glshsh.......................................................................................sssh..s.hp.h..h..c.c.h..t............................hh....sl.s-p-....shp..shct....l....t....p....pc.....................G....l.ls.....s.s..u..u.us...u...l...sus...h.....p.h.t.t.p............................tspp...ll.hl.hs............................................................................................................... 0 2689 5306 7156 +657 PF00024 PAN_1 apple; Apple;PAN; PAN domain Bateman A anon Patthy L Domain The PAN domain [1] contains a conserved core of three disulphide bridges. In some members of the family there is an additional fourth disulphide bridge the links the N and C termini of the domain. The domain is found in diverse proteins, in some they mediate protein-protein interactions, in others they mediate protein-carbohydrate interactions. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.64 0.72 -4.22 89 2217 2012-10-02 11:41:37 2003-04-07 12:59:11 21 217 255 64 1525 2546 76 80.20 16 20.08 CHANGED C..........atthtstthts......tshpth.ts....sshppCtptCt.....pptp.........Cpuasapts.....................pppChLpspspts.......................spltt.....stshshapp.pC ............................................th.h.s.......shpth...ts..........ohppCtptCt.....pptp.............C.puas..apsp.......................pppChLpspspts........................................th...............h..h........................................................... 0 624 791 1265 +658 PF01569 PAP2 PAP2 superfamily Bashton M, Bateman A anon Pfam-B_486 (release 4.0) Family This family includes the enzyme type 2 phosphatidic acid phosphatase (PAP2), Glucose-6-phosphatase EC:3.1.3.9, Phosphatidylglycerophosphatase B EC:3.1.3.27 and bacterial acid phosphatase EC:3.1.3.2. The family also includes a variety of haloperoxidases [1,2] that function by oxidising halides in the presence of hydrogen peroxide to form the corresponding hypohalous acids. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.46 142 14398 2012-10-02 00:53:37 2003-04-07 12:59:11 16 88 4426 44 4448 11648 1667 132.00 19 47.04 CHANGED thhhhhhshhh.............shhl....s.h........h....Khhhs..p..sR...Phhhhtth..h.....................................................uF...PSGHs...shuhshhhhlhhhhtphhhh...................................................hhhhhhshhluhuRlhhuhHahsDllsG....hhlGhhhsh.....hhhhhhtthth ...........................................................................................................................................................................................................................hhh....hhhshhh........shhl...........s.h..............h..........Kth.ht....c.......sR......Ph....h.h.h..h..h..h....hh...........................................................................................................................................SF.......PS..GHs...........s.h.u.h..s....h....s..h....h...h....h....h..h...h..t.th.h.th.......................................................................................................................................hhh..h..h.u....h.h.luh..S.R.l.....hh.G.h.Ha.sDl..lsG.....hhlGhh.hshhhhh.......hh...................................................................................................................... 0 1276 2479 3559 +659 PF03828 PAP_assoc Cid1 family poly A polymerase Griffiths-Jones SR, Wood V, Mistry J anon PROSITE Family This domain is found in poly(A) polymerases and has been shown to have polynucleotide adenylyltransferase activity [1][2][3][4]. Proteins in this family have been located to both the nucleus and the cytoplasm. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.09 0.72 -4.02 112 1402 2009-01-15 18:05:59 2003-04-07 12:59:11 14 49 312 22 919 1373 27 60.60 28 8.69 CHANGED sLGpLLhpFFcaYu.p............pFsapphsIul....ps.uth....hsK.pphthh.......................t.thlsIpDPhsssp ...........sLGpLL.hpFFcaYu..................cFsapcpsISl....+p..uth........ls+.pp.t.t.ht..............................................................phlsIE.....DPhp.s................................................................. 0 290 439 704 +660 PF04928 PAP_central Poly(A) polymerase central domain Wood V, Bateman A anon Pfam-B_1341 (release 7.6) Domain The central domain of Poly(A) polymerase shares structural similarity with the allosteric activity domain of ribonucleotide reductase R1, which comprises a four-helix bundle and a three-stranded mixed beta- sheet. Even though the two enzymes bind ATP, the ATP-recognition motifs are different. 20.50 20.50 20.60 20.90 20.10 20.40 hmmbuild -o /dev/null --hand HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.59 0.70 -5.57 36 745 2009-01-15 18:05:59 2003-04-07 12:59:11 12 32 320 11 465 707 14 304.90 37 49.06 CHANGED pppa.....GlT.PlSts.Psst-hphoppLhctLpptssaEop-.-sp+RppVLppLppllp-aV.ppluhp+shspphspssuuplasaGSY+LGVhusuuDIDslslsPpplp+........pcFFssFhchLpppsplscltsVt-AaVPlIKhpasGIplDLlFApLslsplPc..sl.sltDcslL....+slD-pslRSLNGsRVsDpILcLVPsh....csF+hsLR.slKLWAp+RulYuNlhGF.GGVuWAlLVARlCQLYPNAssus...llp+FFplaspWpWPp....P.........VlLpphpcs.......hthp..VWsP+h...spD+hHlMPIITPAYPshsuT+NVopSThpllhpEhpRuhcIspclh.........hsp..tsWpcLaEth .........................................................................................................................................................hGho.slShs.Pp..-h.hsppL.p.Lp..sha.Espp.....Ehp+..............R..........lLt.pLp.p...llpcal.pplutp+shstthhttssutlhshGohplulhs.suDlsslslssphh.......................................................................................................................................................................................ppaFt.hhphht.t.plpclpsV.cAaVPlhchpasGlplDllaAplsl.plP..p...sl..clp...sc..s..lL.....csL...D..ps.lR.SL..N.GsRV..sDpI.Lc..LV.Psh.............psFRhsLR.slKhWA+.+.............Rul..YSNhhGFhGGVsWAhLVA.RsC...Q.............L.Y....P.N....A.....s.sus....................lVp+FFhla.sp..WpW....Pp..................P........................................VhLpt..-css..........Lth...VWsP+h.....................pD+.......hHhMPIITPAY.Pp.NuoaNVo...hSThplh............hcEhpp..G..htlspcl...............sp....tpWspLFp.............................................................................. 0 142 239 368 +661 PF05028 PARG_cat PARG; Poly (ADP-ribose) glycohydrolase (PARG) Moxon SJ anon Pfam-B_5996 (release 7.6) Family Poly(ADP-ribose) glycohydrolase (PARG), is a ubiquitously expressed exo- and endoglycohydrolase which mediates oxidative and excitotoxic neuronal death [1]. 20.80 20.80 21.40 21.10 20.40 20.60 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.14 0.70 -5.20 23 357 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 168 8 240 364 7 279.40 30 52.28 CHANGED asc.h...p..ct..Fhp.h............LP..ths+lsLplsslh.t..............sl.lLtpppspplhlopcplusLLAsuFFshhspp.................ttt.sphPsh.NFspLap....s.ptpsphpKlKClhpYFcpls.......pphssGhVoFpRhth................t.pphs.WppsstsLp....plclhscutIEDp..tshLpVDFANKalGGGVLspGsVQEEIRFhIsPELlluhLFscshccsEAlhIhGApRaSsYTGYusoFpa......................pGpa.Dpp.shDphpR........+pTpIVAIDAlpa.......pshhp..QacpstlhRElsKAasGFh...ptptpspphs...................................................................sluTGNWGCGuFuGDscLKslIQhlAsS...................tspRshl.YhTFGDppLpshhp .......................................................................................................................................................................................................h.............ht...h............hs.hhphh...hph..h.h.................................................h.hl.....t...s..t.l...hop...husLlupsFhshh..t.............................ph...h.sF.sph..ht......t......tt.........pKlpslhpYFppht............tph.psh..lshp..Rp.h......................................................p.....ht..ppt.h.ht.....hph.............ps.....h......IE..pp.........th.lpVDFANch...lGG.Gsh.s.t.G.hl.Q............EEIhFhhsPELl.luh.Lh..hp.......hppsE.slhlhG..spp...a...SpapGYu.p.oapa..............................tt.p..h..ctt.....h....c..t....htc...........htspllAlDAhph...........t.t...p....Qat.ptl.RElpK..A.....h.s.G.Fh.............t..t..hs...........................................................................................................................................slu..TGpWGCGs..F.s.Gc.pLKhllQhhAuu....................s..t.+s.hh.Yhsatp..th.....hh................................................................................................... 0 114 147 209 +662 PF01734 Patatin Patatin-like phospholipase Bashton M, Bateman A, Dlakic M anon Pfam-B_2206 (release 4.1) Family This family consists of various patatin glycoproteins from plants. The patatin protein accounts for up to 40% of the total soluble protein in potato tubers [2]. Patatin is a storage protein but it also has the enzymatic activity of lipid acyl hydrolase, catalysing the cleavage of fatty acids from membrane lipids [2]. Members of this family have been found also in vertebrates. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.36 0.71 -4.40 116 10726 2012-10-02 11:19:24 2003-04-07 12:59:11 17 127 3394 5 3812 8861 1395 181.80 20 39.60 CHANGED LslsGGGs+.G....hhphGslpt....L..................................tththphchlsGoSsGulsu...................................................hhhshs...hs.pphhpthtphhtpthhshh..................................................................................hhhthhtttshhssphhtphlp......ph.......................lsptthpphttththh.............................................................................h.ht.tttthhhhttptts......stplhp..............................slh...ASs..uhPshh....................................tshph........ssp.......ha..........hDGG......lh.ss....hPh.phsh .............................................................................................................LsLsG.G.Gs+...G.....hhp.hGllcs................L.........................................................................................tp...tsh.h...c..h..l..sGoSs..Gul.su...........................................................................................................................................................u.hh..s.ss.............s..s..t....t.....h...h...th.h.t.....p.h...ht.p.hh.ht.h....................................................................................................................................................h.........h..h......t....t....s....h....h....p.....s...p...h....h.t..ph.lp..........................ph.....................................................h.s..t..t..t.h.p.p.h.h.t.....hhhh..................................................................................................................................................................................................t....t..s.....h..h....t..h.t...h..h.h.s...............................p.t.p.l.h.p...........................................................................................................s.lt.............ASs...............ulPsha............................................................sh..ph........................................sup..................................................hh.............................................................lDGG.........lh..ss...hPht...h....................................................................................................................................................................................................................................... 0 1192 2259 3131 +663 PF02460 Patched Patched family Bateman A anon Pfam-B_2400 (release 5.4) Family The transmembrane protein Patched Swiss:P18502 is a receptor for the morphogene Sonic Hedgehog. This protein associates with the smoothened protein to transduce hedgehog signals. 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 800 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.61 0.70 -13.13 0.70 -6.63 9 2194 2012-10-02 18:57:54 2003-04-07 12:59:11 13 37 648 0 1317 3841 1223 372.80 15 51.82 CHANGED hhs.chhahhs.sDhp.shtphtshspp.shs.pcahsucuhststa..hhlsupspss.......lLp.shLs-lhplschlhpsh.......................tl.pss....h.hsacclC.pa.phspsspphh..................lhpp.ps...pthslTYPhhphhsptlYlusphuGVphhs.......................ssplp.s+shhLhahschsscpscphuppaEppLtpalcpp.ssp..hlphshhpsphls-Elp+suhshhPhhslohhlLhsFohlssh..h.p...........lppKPhlAhhGllsshhAhlouhGhLhhhGh.assIssVhPFLlL.uIGVDDhFlhlsAWc+Tstpps...........hccRhucslsEuGsuIoITShTslloFulGshTshPulplFChhsulAlhFsalYQlTFauAlhulsschEhptppsh..h.......hs....p................pphpspsuh.....................................................t.upphpp..hhs.....chhhshYssFlhssps+lhslhlallYlsluhYGshshcpsLsPspLlhs-S.Llchhp.h-chlaptGttlplhVpNPPslsh.spshcchpphhscFEshsashGtpuTpaWLp-Ypp.h.p......php.hpspc..................h..hsthcpalthsttsh.Wtpshhhs......cssstlppFhFplu..hcshsshss+s+hhpphRslAcpas...FNVolFcp.thasDQh.plhssslpshlhsllsMhlVshlFIsp..sshslshulsSIslGVhGhhShWGlsLDPloMlsllMSIGFSVDaoAHIuYtahpsttp....sspcRlhsALpslGWPlhpuuhSTlLslhsLhhVsoYhlhlFhKTlhLVlslGhlHGLhhLPllLshhss ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..................................h.......t....h.....................a................................a..h..-..t..h.............h...............h..........h....h.s.h....h....h...h.h.h...l..s....h..h.h..h....................p...h.....s.......sh.h.........h..h..h.s..l...h....h...........h.....t...h...............h...G...h.......h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......................................................................................................................................................................................... 0 483 664 1096 +664 PF02170 PAZ ZAP; PAZ domain Bateman A, Song JJ anon Bateman A Family This domain is named PAZ after the proteins Piwi Argonaut and Zwille. This domain is found in two families of proteins that are involved in post-transcriptional gene silencing. These are the Piwi family and the Dicer family, that includes the Carpel factory protein. The function of the domains is unknown but has been suggested to mediate complex formation between proteins of the Piwi and Dicer families by hetero-dimerisation. The three-dimensional structure of this domain has been solved [2-4]. The PAZ domain is composed of two subdomains. One subdomain is similar to the OB fold, albeit with a different topology. The OB-fold is well known as a single-stranded nucleic acid binding fold. The second subdomain is composed of a beta-hairpin followed by an alpha-helix. The PAZ domains shows low-affinity nucleic acid binding and appears to interact with the 3' ends of single-stranded regions of RNA in the cleft between the two subdomains. PAZ can bind the characteristic two-base 3' overhangs of siRNAs, indicating that although PAZ may not be a primary nucleic acid binding site in Dicer or RISC, it may contribute to the specific and productive incorporation of siRNAs and miRNAs into the RNAi pathway. 23.30 23.30 23.40 23.40 23.20 23.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.61 0.71 -4.86 55 1915 2009-09-11 14:14:06 2003-04-07 12:59:11 17 66 319 37 1147 1880 5 132.50 21 13.63 CHANGED tsll-..hhtphhpppptpt.p.ppp........hpcsltGlhVhspaps........+pa+lsslshcssspppFphp...........spphohs-Ya+ppYslplphs.p.Phlhsppppp................................................saLP.ELCplsshpchhppphsh..pshhh+tpsps .....................................................................................................................................hpctlp...G..hh.V.s.p...aps................+pa..p.lssl....sh..p.................s........s..p.p.....pF.hp...............................spphohh..cYapp+..Y..s..l...pl...p...hs..p.PhL..p.sspppc...........................................................................................saLP..E...l..C..p..l..s....s.......hp.chhpchtph...hshhh+hp................................................. 0 337 545 909 +665 PF00564 PB1 OPR; PB1 domain SMART anon Alignment kindly provided by SMART Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.68 0.72 -4.32 53 2276 2012-10-03 10:59:06 2003-04-07 12:59:11 19 126 339 38 1408 2199 10 83.30 18 13.97 CHANGED shplKhpatssht+........hphspsh.sap-Lhptltpththt........tshplpY.Dc-t.-hlslssDcDLppslcphct............t.t.pl+lhlhss ........................................................thKhpats.phht.........hths...t...s....h..sap....cLhpplpphhsh......................tsh...plpY...........h...D...-..-...s...-........h.ls.lss-pD...Lppul.phhpt.............................lplhl...t.......................................... 0 348 710 1054 +666 PF00786 PBD P21-Rho-binding domain SMART anon Alignment kindly provided by SMART Domain Small domains that bind Cdc42p- and/or Rho-like small GTPases. Also known as the Cdc42/Rac interactive binding (CRIB). 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.09 0.72 -3.55 44 1824 2009-01-15 18:05:59 2003-04-07 12:59:11 23 43 302 22 1078 1667 2 56.30 28 11.27 CHANGED tISsP.ssacHhsHVGaDspsG.hh.....GhPppWppllpss..........hsppc.tppspsshpshtahs ............tISsP..os.F....c.Hs.sHV..Ga.D.st.s.Gths........................GhPtpatp.llpps..................................hpp.p...p..t..h.......t...................................................................................................................... 0 294 490 775 +667 PF01161 PBP Phosphatidylethanolamine-binding protein Finn RD, Bateman A anon Prosite & Pfam-B_5394 (Release 7.5) Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.37 0.71 -4.35 128 4705 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 2779 40 1522 3605 238 145.50 26 74.89 CHANGED sshlshpa..............hp.Gssl..sPsh...hpsh..Pp..........................ssp..s.asllhh..............................DPDA.....................htsah.HWl......lsNIP....................................s..pshssh...................up.............................hsYhGPsPP.Gss.hHRYhFhlauhss................sh.ptssst...sp..htpsh.................ppa...Lsps.lsus..ahp .................................................................................................t...h..pa..........t.htsG.ssl.......oPph...hssh..Pt.................................ssc....t.as.lshh..............................DPDA..Ps...........................tpsahHW..l..........Vss.IP....t..t...lsts........................................uhh..ps..t.s.sh............Gp..............................ssYtGssPP...........G.............pt..hHR..YhFh..lauhss..............ph..sl...stssst....s....hthth...........................ptpt..Lups.lsuha............................................................................. 1 420 841 1251 +668 PF01399 PCI PCI domain Bateman A anon [1] Family This domain has also been called the PINT motif (Proteasome, Int-6, Nip-1 and TRIP-15) [1]. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.03 0.72 -3.72 77 4727 2012-10-04 14:01:11 2003-04-07 12:59:11 22 54 409 7 3184 4579 40 109.70 16 22.45 CHANGED .shtpllpshppss.hppatphlpph.................................tthhtp.hhtt...........hhppLhpplhcpslhplhp.......applshppluptlplss.....................splEphlsphIhsstlp.upIDph....sthlhhpc ....................................................................................................................................................hhplhps.ht.psp.hpt.h.tp..h.l.p..ph........................................................tt.h.h.t..p...h..hhp...........................hh.p.p.Lh.p...plhpps.lt.p.hhp............................sappl.s.lsplupt...l.p.lss....................................pclEph.ls.phI.tsst....l.p..u..plD.ph.sthlhht.t................................. 1 1114 1760 2604 +669 PF03462 PCRF PCRF domain Dlakic M anon Dlakic M Domain This domain is found in peptide chain release factors. 20.80 20.80 21.00 20.80 19.30 20.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.45 0.71 -4.39 100 9136 2012-10-03 10:28:09 2003-04-07 12:59:11 13 15 4731 15 2219 6114 4472 113.30 38 32.37 CHANGED c...htppsDt-...ht..p.hppElpplp.pplpplcpc......hsh..sth..Dp.pssllEI+uG..sGGsEAs....aAp.LhRMYh+aA-.p.+........uacscllc..hstu-h.u.......GlKpsslplp.........Gc..........tAYuhLKhE ..............................................th....ttsDt-.ht..c...s..pp..El.pp.l........c.p.c.lpp.lE..c........L..l.p...........cst..Ds.....pssh.lEI+u........G...........uG..GsEAs....aAu.LhRMYpRaA....E....p.+..................Ga+sEll-.......hs.tu-h..u....................GhKplshplp.............Gc...........sAYGhLKhE.................................. 0 757 1443 1875 +670 PF02153 PDH Prephenate dehydrogenase Bateman A anon PSI-BLAST P20692/1-290 Family Members of this family are prephenate dehydrogenases EC:1.3.1.12 involved in tyrosine biosynthesis. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.53 0.70 -5.50 10 4258 2012-10-10 17:06:42 2003-04-07 12:59:11 12 22 4077 29 1083 3244 2767 243.70 25 69.28 CHANGED lAhuL++pG.tsplhGhDhsstpsttAhcLGlhDssss..lptsp-A..DlllLAVPlcsstclLcclus.tlccusllTDluSVKscllcshcphls.phtpaluGHPMAGoctsGstuucssLF-spshlLTPspc.Tssptlppl+cllcthGA+lllhsPccHDpssulVSHLPHllAhuLsstht..chpsshc.sh+hAuuGFRDhT....RIAuusPhhWpcIhhpNscslh-tl-catpclsclpphlc....spDt-...sLhchh+p ..............................................................................pl..h...shs...p........t.t..h....t......u......t.......s...h..h..p...t.......h.......t...t......h............p.....h....l.....t....p.A..............-.llllus.P..l.p.t....s...p.h....l...p...c...l.......ss...........l......t.......t.....s.s.........ll.....sD.l.u.SsKs.s.l.l.p.t..h....t....p...h...h......s............................s............phl..u.u..HP....M.s.Gsp........s..G.....s..u.ps.sL.a.p.st..h.hl.ls............ss.........c............t.......s.........s.................p....s.......h....p....h....l...t.p...h.h...p...s.h.G.A.+.lh.p.h.s.s.p.-HDpshAhl...S...HLPH..ll....u...hulstt.ht..........pp.s....t....p..h..p.......h.....hp....h.....u....u..s....u....F....R..-....ho.....R....lA....u....s....s.P...p.h...ap...-Ihhs..Npp.llphlcpa..tppl.sphhp.hlp..........psD.t.p...thhphht.p.................................................................................................................................................... 0 337 710 930 +671 PF04166 PdxA Pyridoxal phosphate biosynthetic protein PdxA TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family In Escherichia coli the coenzyme pyridoxal 5'-phosphate is synthesised de novo by a pathway that is thought to involve the condensation of 4-(phosphohydroxy)-L-threonine and 1-deoxy-D-xylulose, catalysed by the enzymes PdxA and PdxJ, to form either pyridoxine (vitamin B6) or pyridoxine 5'-phosphate [1]. 19.70 19.70 19.70 19.70 19.00 18.40 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.99 0.70 -5.21 16 2867 2012-10-02 21:08:39 2003-04-07 12:59:11 7 6 2310 19 692 2198 2820 293.60 39 88.25 CHANGED h..tpc..hpp.s.phVllu-tshL..ppttthhshp.l.........slcchp.....s..p..shh.hlt.hsL.th......s.sh........h.GcssstsGthslchLscAsphshsGphsullTuPlsKsslppA....Gh.asG+TEaLA-......hss......sc.....pslMML.ssp......cLRlsLlTsHlPL+-VssslTtctltphlcllpcsLppcaGItpP+IuVsGLNPHAGEsGhhGpEEh-pIh.....................Pul-phR..tpGlslh.....G.....PlPADTlFpts........tttth...DAVLuMYHDQGLhslKhhuF.spuVNlTLGLPalRTSsDHGTAaDlAGpG.hAcssShhsAlchAs ......................................................................ttt.......h.phllhu-hplL....p.p.t.s....th.l....s..ls....l..................................p.lps.hp......st.....s..p..........t.........s.s.hh....sl....h...s.h...tt..............s..lp........sGplsstsGphslcsl.pcAschsh.....sG.....ch.....sAllTuPl..pKt.s..l.pp.A...........G....h...s......FsGHTEal...uc.....hst...........................s.p........c.sl.M.ML..u..s.c.................................pLRVuLsTTH.lPL+clscslT.ptlppslpl....hppsL......+p..........cF....G.l..spP....R.IhVsGLNPHAGEsGhhG.p.EEh...-.h.Ih..........................................................Pulc..p..h..+......tp..G..hp.lh......G..........PlPADTlFp.t.........................hh.s.ph.........DuVlAMYHDQGL.sLKhhuF.....s...........c.........uVNlTLGL..PhIRTSVDHGTAhD.......lA....G.....pG..pAcssShhsAlphA............................ 0 212 441 582 +672 PF00934 PE PE family Bateman A anon Pfam-B_253 (release 3.0) Family This family named after a PE motif near to the amino terminus of the domain. The PE family of proteins all contain an amino-terminal region of about 110 amino acids. The carboxyl terminus of this family are variable and fall into several classes. The largest class of PE proteins is the highly repetitive PGRS class which have a high glycine content. The function of these proteins is uncertain but it has been suggested that they may be related to antigenic variation of Mycobacterium tuberculosis [1]. 20.60 20.60 20.60 21.10 20.50 20.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.30 0.72 -3.90 135 4012 2012-10-01 21:44:22 2003-04-07 12:59:11 15 13 103 2 365 2559 0 90.80 46 26.20 CHANGED MSaV.hssPEhluAAAsDLuuIGSsluAANAAAAuP.TTuVlAAuADEVSAAlAALFuuHuptYQslSAQAAA.FHppFVpsLsuuAuuYAuAE.AAN ..................MSaV.hssPEhluAAAs-LusIG.Ss.luAAN.A.AAAus....TT.ull.A.A.uADE.VSuAlA.A.LFuuHuptYQulSAQAAA.FHppFVpsLssuuuuYAsAEAu...................... 1 142 177 350 +673 PF01095 Pectinesterase Pectinesterase Finn RD, Bateman A anon Prosite Family \N 21.20 21.20 21.30 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.76 0.70 -5.85 35 2674 2012-10-02 14:50:22 2003-04-07 12:59:11 14 76 936 20 1177 2391 56 228.80 26 55.97 CHANGED slVVApDGSGpa+TIsEAlsssP.c+uspR...aVIYlKtGlY.cE.NVcVsKcKtNlhhlGDGhsKTlITG.phshhcGs.TTFcoAThAlsGcGFlA+DIsFpNTAGPpKHQAVALRVsuDhulFY+Csh-GYQDTLYsHSpRQFYR-CsIsGTVDFIFGNuAsVFQsCpIhs....R+PhssQ.pNhlTAQGRpDPNQsTGlsIQsCpIsuss-Lh....ss.sshtTYLGRPWKpYSRTVlMpShIsshIsPtGWhtWsG..sFALcTLaYuEYpNsGPGussupRVKWsGa+slhosp-AppFTlupFI.tGs ...........................................................................t..t...a.plttAl.t..h....................................................................hl....l..G...Y...................E.......l.......l..................l...h.......G...................s.................................s..........................h.....................................t...........................................................................t.....................s.....s...............h..............p.............o....A.......o..........h.......h.........s........t........u......s.....s.hh...h....p..s........lsh........p............N.............o..............h...............G............s..........t.......t............p..............p....A....V.........A..l..p..s..s..u..D..........p.s........hh.....ps..ph........h.Gh.QD.Tl........a...........s............p...........s............s............R.....p...........ha.p...........s..shIpGslDFlaG..p.u.s....s....l.F....p.ss.plhs.........h....t.....s.............t......t...........t.....s......h....l....s..A...u.p................s...t...............h..........Gh........hh.pspltus.....t........................................h.LGR..s....W..........t................h......u................p..............s..........h.......h..s.hs..........t...l......G........a......h................................................................h...............ph.s........G.s.................................................................t..................................................................................................................................................................... 0 261 757 995 +674 PF04710 Pellino Pellino Mifsud W anon Pfam-B_5882 (release 7.5) Family Pellino is involved in Toll-like signalling pathways, and associates with the kinase domain of the Pelle Ser/Thr kinase [1,2,3]. 20.00 20.00 20.40 34.90 19.30 19.90 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.47 0.70 -5.83 8 257 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 97 3 152 252 0 347.90 58 92.85 CHANGED up-ptsu.scpsl....+YGELIVLGYNGsLPsGDRGRR+.S+FsLpRRs+ANGVKPSshHhl.poPpsSKAlssKsQHSISYTLSRspoVVVEYsHDscTDMFQIGRSTEsPIDFVVhDT...........lPGu....t-sp-stspQSTISRFACRIlC-RsPPYTARIYAAGFDoSpNIFLGEKAsKW...psscGcMDGLTTNGVLlMHP+sGFo.E-SpPG...lWRElSVCGsVYoLREoRSAQp+GphV.p-sNlLQDGoLIDLCGATLLWRousGLp+sPT.+cLEtllpclNAuRPQCPVGLsTLAFPphp+.......tss.s-+QPWVYLpCGHVHGaH-WGpccEpt.p.cRcCPhCRsVGsY...VPLWhGsEPAFYlDsGsPoHsFsPCGHVCSEKTstYWuphPLPHGTpAFpAACPFCAs.LsGppGal+LIFQs.PLD ...............................................................at.LllLG....YN..Gs......Lss.G...D.+....G.RR.........+.S+hsLh+RscANGVKssshHhh.soP.......suKA...lp.s.+...sQHSIS..YTL............SRspoVlVEYscDssTDMF..QIGRSTEs.IDFVVoDT.............................hsGu.........s.ss-s.tssQSTISRFACRIlC-Rps.P.aTARIaAAGFDSS+NIFLG..............EK..AsKW.....................+ss.D.Gp.MDGLTTNGVLVMH.Pp...sGFs...p-S.....tPG......................lWREISVCGsV.aoL..R..E......oRS...A..QQRGKhV............EsEoNhL.....QDGoLIDLCGATLLWRTu...........p..G.........L.+...........sPTh...+pLEuhRpElNAuRPQCPVGhsTL.....uF..Pohtp...............................pps.sc.cpQPWVYlpCGHVHGaHsW.........Gp.cp....-........p...t................s...........p.......p..........RcCPhCRtlGPY.VPLa.LGpEs.....uh.....a...lDsGP.PoHAFs.PCGHVCSE.KTstY.WuplPLP.HG...T...c.sF+AACPFCut.Ls.....G-pshl+LIFQs.slD..................................... 0 35 49 95 +675 PF02452 PemK PemK-like protein Mian N, Bateman A anon Pfam-B_2134 (release 5.4) Family PemK is a growth inhibitor in E. coli known to bind to the promoter region of the Pem operon, auto-regulating synthesis. This Pfam family consists of the PemK protein in addition to ChpA, ChpB and other PemK-like proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.78 0.72 -3.85 306 4707 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 2273 13 846 2702 289 106.10 23 83.07 CHANGED c+G-lhhlsh.....ts..sh.GsE.......tt.hRPs........lllsss..hhs.ph..t......slllsPlT.o....pht.t...............shcltlp.s...pt..pt....................ohlhs-plpolc.+p...Rl....t.c........plGp...............ls..spphppl.ppslthhl ......................................................+G-lh.h.ssh......tP.......s...GsE.................st..t......RPs........lVlsss...hhN.ph...s..........slllsslT.o...........php.t.t......................................................................shcl.ls..sp.....th..t.......................uhlhh-plpols...+p.Rl..........t.c......................plsp.......................ls....p.p.h.h.p.p.l.pptlth..h.............................................................................................. 0 269 586 752 +676 PF01804 Penicil_amidase Penicillin amidase Bashton M, Bateman A anon Pfam-B_1410 (release 4.2) Family Penicillin amidase or penicillin acylase EC:3.5.1.11 catalyses the hydrolysis of benzylpenicillin to phenylacetic acid and 6-aminopenicillanic acid (6-APA) a key intermediate in the the synthesis of penicillins [1]. Also in the family is cephalosporin acylase Swiss:P07662 and Swiss:P29958 aculeacin A acylase which are involved in the synthesis of related peptide antibiotics. 21.30 21.30 21.80 21.70 17.40 20.80 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.03 0.70 -5.50 21 1530 2012-10-03 21:14:07 2003-04-07 12:59:11 13 8 899 125 501 1655 2079 651.20 21 92.28 CHANGED tsVcIh+Du.aGlPHIhAssptslhauhGYspApDRLhpl-hh+tpApGphuchhGs...........stlssDhhh+phthsp......ssctphpshssp........tshlcuYusGlNtYlp....t.tp.hPh-ashhshp........Pc.asshDshtlt.hhhhphsus.............h......t.hhshasshh.t..................................h.tshs.hts................h.thstthsspsts.............hGSNsWsluup+TssGpslLhssPHhsatsP.shahphpLps.suaclhGsohsGhPhl.hhGaNsplAWuhTsstschhDhYt.pls.p.ssspYhas.Gpapshcp+ppsltV+s..sp.....shphplhcThcGPVlspsss.............sshshthsshtpsspsh.sahphscAcslpphpp.s.tpthtssshNhhaADtpGsIuahssuhhPh....pss.ps........hhPssG........uph-WpGhhs.hpt...hPphhsPs.......pGalhsuNppsh.........ssshsht.hssthhts.....hRspphhphlstpss.........hshcshhplphs.stshhschhh.................llcshp.........ssssssppAhstLtuWstphsssopuAs.........lathahcthsppshhsthst.hhsshhs.h.......................s.h.hssssshh.tttp.........sthlspuhspuhs..................hpsphG.................shshpWGchpp.........hh.hpshshpu.........sssshs.hhGshsshsssths.sstsssshtsshs.....hphss.hspsttlhssupSusPtSPHYsDQh.haspspahslhhs.pplptphpt .........................................................................................t..splhcDt.hGlPHI...hA.p...s...tt.c...hha...u...GYspApDRhaph-hhR+hut...Gpluchh..G.................ttlt....Dt.hhRt.h.thtp......ssptt..h.s.t.h.ssps...........pphl..p...uYssGlNsalt............t...t...hhPh.-a.thhsht........s.t.W.ps.Dsl.hh..h.....hhh.hh..ph...tss.......p.................h..............h.................t.....h.....s...h...h.s.t..........................................................................................................................t...........................................t.hh........h..........................................................hGSNsWslusp+o.tsGps.llss.sPHh.sh.t.h.P....shaa..........t.hp....L........p.......h....s..........t................h.......s......lhGsshsGh.Psl.hhGaNt...........c.lAWuhTs.sh.s.D..Dlah.phs...............t....p......s.......t..t.....Yh......h.s...s......t........h..hp.tp.pp.sl..tV+s...st...............shp.hslh....T..t.a..G.Pl.lpts.t.................................................thshuht.h.sh..t...s...s..t.s.h....shhthsp..A..psh..pphhp..s.h........pt......h.t..h..s.s.Nhlh...AD.......p...G..s..I..uahssG.thPhR.....tthps.....................................................hhPssG.........psp.hp..W.p........G.h.hs..hpt.........hPp.......hs.Pt..................pGalhsuNsp.h...........................s.shsh......hs.....thh.s..........Rs...pcl....t....p...h....l...t...t...t.tt...............hs.hpsh..t.hphD..p...hs.......h.u...p...hh......................................................h.hp..hhp..............sts..hp.ph....hp....hL..t.t..W...st.ph....sh.....sshush...........................................lhthah.pt.....h.h...p...t...h..h.t...s.t.h.s...........h...ht.....h..s.........................................................h.........s.....s......h...h.t..t.......................hsthh.tts.h.ttsht..................httt.h.G.............................pht.W..Gphpp...............hh.hts.h.s..h..s....................................h.hs...s..G......s..t.t.s.h..t...s..........s..t...h...t.......................t......t....h............h...h..usu...ht...........hlhs...h..sc....s....tuh.h.h..hs.hGp.S.G.ps.hS.....saatDthp.ha.h..p.s.p.hhsh.hp.tth.....t.......................................................................................................................................................... 0 178 337 452 +677 PF01469 Pentapeptide_2 Pentapeptide repeats (8 copies) Bateman A anon Bateman A Repeat These repeats are found in many mycobacterial proteins. These repeats are most common in the Pfam:PF00823 family of proteins, where they are found in the MPTR subfamily of PPE proteins. The function of these repeats is unknown. The repeat can be approximately described as XNXGX, where X can be any amino acid. These repeats are similar to Pfam:PF00805 [1], however it is not clear if these two families are structurally related. 20.30 20.30 20.50 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -9.90 0.72 -4.32 126 12308 2009-01-15 18:05:59 2003-04-07 12:59:11 13 89 95 0 878 9127 90 40.00 46 47.97 CHANGED uNsGshNsGsuNhG.hNhGsuNhGshNhGsuNsGstNsGs ......................NhGhhN.sGs.sNhGh...hN.s...GssNsGhhNsGssN.sGhh.NsG............... 0 480 495 874 +678 PF00391 PEP-utilizers PEP-utilising enzyme, mobile domain Finn RD, Griffiths-Jones SR anon Prosite Family This domain is a "swivelling" beta/beta/alpha domain which is thought to be mobile in all proteins known to contain it. 23.00 23.00 23.10 23.20 22.70 22.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.57 0.72 -4.41 179 11592 2009-01-15 18:05:59 2003-04-07 12:59:11 18 59 4567 47 2573 8381 2287 81.70 31 11.56 CHANGED pspp..t.hh.ps...ILVscc.lsPuphs..hshppltGlloppG.Ghs...SHsAIlARshGIP.sllGs.t.s.............shpt......lps..Gphlhl...DGppG .............................ts.....t.hsps..sILVsc.c.hsPsphs......s.h.p...ps..tGllTsp..G.GtT...SHuAIlARshGIPsVlGs..s..s.....................sspt..........l.ps......Gc..hlhl...DutpG................................... 0 893 1669 2174 +679 PF05131 Pep3_Vps18 Pep3/Vps18/deep orange family Bateman A anon Pfam-B_6057 (release 7.7) Family This region is found in a number of protein identified as involved in golgi function and vacuolar sorting. The molecular function of this region is unknown. The members of this family contain a C-terminal ring finger domain. 20.50 20.50 20.80 20.50 20.20 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.82 0.71 -4.77 31 318 2009-09-11 06:43:15 2003-04-07 12:59:11 9 7 276 0 229 324 2 145.80 26 15.40 CHANGED GlhaGplshsssss.........pplhppspl...........h.psplssspss............hulsLTpaHlLlLhts+lhAlNpL.stplVa-psl.........hpstpphlGlssDs..tpsThWlaospslFElslpcEsRclW+lalcppcF-tALpas+s.....ssp+DtVhstpu-ahhpc ...........................................................GlhhGpl..t..ts...........hhsptph...........h...st.hststss.................................huhsLTpaHhllLh..s..s..+..lhslspL..st...p..lVhcpth.........hpthtphhG...lspDs...............tp..s..shWlaTsp.slFclhlpcEs...................RslW+laLch............pc............F-tAhpas+s..........stphDtVhttpu-hhhp.p................................. 0 82 129 192 +680 PF03051 Peptidase_C1_2 Pept_C1-like; Peptidase C1-like family Mifsud W anon Pfam-B_2136 (release 6.4) Family This family is closely related to the Peptidase_C1 family Pfam:PF00112, containing several prokaryotic and eukaryotic aminopeptidases and bleomycin hydrolases. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.53 0.70 -5.96 13 1894 2012-10-10 12:56:15 2003-04-07 12:59:11 10 5 1149 26 416 2017 382 362.80 34 93.53 CHANGED pLos-plppFspcasucPphtlspsAsp+sGlh-Ashsc.psptchspVFSh-lsT-..sVTNQKpSGRCWlFuALNshRHshhKcaclc-FEhSQuYsFFWDKlE+uNaFh-pllsTA..DcslDuRhVpaLLssPppDGGQWDMhluLlEKYGlVPKpsaPEo.asossSptLNshLsc+LRccAltLR.pLhppGss.cplput+-chLsEIa+lluhsLG.PP....csFsaEYRDKDKNYHp.+slTPh-Fac+YVs..hDLcsaVsLlNAPpsD+PYsKlYoV-aLGNVsGGpplhalNlsh-hLKchslsQlKsGEuVWFGsDVu+ph-RKsGlhDsclYph-plFslchph..oKA-RLcaGEShMTHAMVlTGVDl.ss..Gpsp+WKVENSWG-csGpKGYFVMSDcWFcEYsYplVVcKKaLPc-llcsh-p...pPIsLsPWDPMG .....................................................................................................................................................................................s.lp.p.htph.tph....ts.t.thhtpuh..p.s.sl.ts...p...ps..h.........p.....s....s.Fo..h..c.ls..pt.....tls..sQc.tS.GRCWhFuuLNshRh.thhpph.p..l.....c.....p..F.EhSQ...sahhFaDKhE+uNhFh...ppl.......lt..ou..........sp.hssR...hV...palhps.P......pDGG.Q.Ws.M.hsslhc.KYGlVPpps..h...P..Eo...h....s.o....p..so....pt.hs...thLs.ph....LRp.....u..h.LR...phh.t........p.............s..................t............s..........................t.................t.........l..........p..tt+pphL...ppla.phlshsLG....PP....cpFsa.t............a.......p......D.c...-.......p............p....a..p...............t.t......shTP.pFacc.als..........hs..........l.p.-....Y....V.s.lhN.s.....P...c.+sas+.asl-h.sNlhs..u....p....hpa.lNlsh-th+ch.shsp.l.p..sGc.s.V..........WFGsDV.u.p.....s.p.c....t...G........l..............h.........................s...h......sh....a......ch........p.......t.....h........h........s........h.....p..........h.p...............sK..u..............p..........R......................L..........p...........ht..................-.......S............hh.o...HAMslsG.....Vc.......l.....ts.......G..............p.......s..p.+..W+V.E.....NS..W.....G.....c......c...s...G...p..........c.......G.aa.sh....o..-sW..hccas...aplVVcKchlst.....-...hth....hpt....ps.h...L.sWDPh................................................................. 1 131 242 329 +681 PF00112 Peptidase_C1 Cys-protease; Papain family cysteine protease Sonnhammer ELL anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.92 0.70 -4.47 157 7017 2012-10-10 12:56:15 2003-04-07 12:59:11 18 153 1160 373 3137 7739 473 199.90 29 53.16 CHANGED lPpshDWRp..ptss..VoslKsQG...pCGSCWAFSssuulEuthtlts.....tphlsLSEQpLlDCst....sp..........GCsG.GhhppAapalhp....sGls.oEssY.......PYp.....upc........................spCchp.ppp.h.....sphssass..lsts.......sEpsltptlsp..GPl.uVulsusp....sFphYpuG..........lapt...pCss......tlsHuVhlVGYGsps.....................................GpsYWll+NSWG.ssWGE.pGYh+htR...shs.....spCG..Isspsshsh ..................................................................................................................................................................PtthDhRp..............h....l.s......l....+..s.Q..G..........p.C..........G....S...........C......W....A.....F........u......s.s.......u.........s...lE.......u...tht...h......t......s..........................t......p.......h.......h..............s..........L......S...t.....Q....p......l.....l.....s....Cst.......tst.............................G.C...s...G....G..h..........t...t.......A.a.....p...alhp............t.G..l...s...s.-...p..sY.............................P.Yp..........utp...........................................................................spCphp....tpt..................hphp.s.a..hp....ls..s.......................pptltp.t...l.h..p........G.......P...............l...............s...su...............l........p....s....t......p...............a..................h.......Y......p...s...G...........................lapt................ssp................th..sHuVhlV.G...a..G.s.p.t..............................................................................................sh...Y..Wll+..........N.......SW............G.......p.p.........W.....G.-...pG..Y.hph..tt...s.t.........Cs.lt.......h............................................................................................................................................ 0 1477 2033 2663 +682 PF01650 Peptidase_C13 Peptidase C13 family Bateman A, Caffrey C anon Pfam-B_1302 (release 4.1) Family Members of this family are asparaginyl peptidases [1]. The blood fluke parasite Schistosoma mansoni has at least five Clan CA cysteine peptidases in its digestive tract including cathepsins B (2 isoforms), C, F and L. All have been recombinantly expressed as active enzymes, albeit in various stages of activation [2]. In addition, a Clan CD peptidase, termed asparaginyl endopeptidase or 'legumain' has been identified. This has formerly been characterised as a 'haemoglobinase', but this term is probably incorrect [2]. Two cDNAs have been described for Schistosoma mansoni legumain; one encodes an active enzyme whereas the active site cysteine residue encoded by the second cDNA is substituted by an asparagine residue. Both forms have been recombinantly expressed [3]. 20.40 20.40 20.40 20.50 20.10 20.00 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.83 0.70 -5.09 34 952 2012-10-03 02:24:44 2003-04-07 12:59:11 13 35 545 0 478 982 50 226.40 30 53.62 CHANGED pWAVLVAGSsGa.NYRHQADlCHAYQlLc+.tGlcsEpIlVhMYDD.IAps.pNPhsGhlhNpPp.....u.....pDVYpG.VshDYsGpsVsscNFhuVLhGscstltt.usGKVlpSsssDplFlYaoDHGuPG...lluhP....ts.LaAp-LhcsLpphHspspYpchVhYlEACESGShFps..LPpslslYAsTAuNucESSausYC.............sssphsTCLGDhaSVsWhEDo-t.pslpt..cTLppQachVKpcT.......ptSHVhpaGDhslsp.hlsp.a....................................................hGs .......................................................................................pWuVllu.s.S.p...hahN.Y.RH..AsshphYphl+c.hGl.-ppIllhhhD..D.hAss..............cNPhsu.lh..sp...p.............t....slY..t.s..l...........hDYpu..cVssc................sFht..lLh...G.ph............spt+....hl....o..s.s.ps.p...l....h.............lYho.sHGuss...............hLth........sspplts.......t-Ls....cs.l.p...phat....cp+.................Ypph....lh..hl-sCpuuoh.............hpt...............t.s.......s......lh......s...hsuo.p.ssEs..Sa.up...h...............s..phtshlsD...h...a...o..h.......hhc...-p..............p.p....sltp.ap....h.....................................................t........................................................................................ 0 181 292 392 +683 PF00863 Peptidase_C4 Peptidase family C4 Bateman A anon Pfam-B_232 (release 3.0) Family This peptidase is present in the nuclear inclusion protein of potyviruses. 20.80 20.80 20.90 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.49 0.70 -5.40 39 1088 2012-10-02 13:45:52 2003-04-07 12:59:11 14 30 161 9 0 1336 7 226.80 49 9.57 CHANGED aEupShhpGlRDYNsIus.......slCpLpNpSsutppslaGIGaGshIIoNtHL...F++NNGp....LplpopHGpF..plcNosplplp.lpG+DlllI+hPKDaPPFsp+l+FRsPpps-+lChVussFQppphsspVSEoShhhs...ssusFW+HWIoTp-GpCGhPlVuspDGtIlGlHS.....LsstssspN...aFsshscsF.pphLpshcphpWsptWpaNsspluWuulplhsspPpt.FphsKhls ............................................HESpShaRGLRDYNPIusslC+LsN.sS-utssshaGlGa..G..sh..IlTNpHL...FcRNN.Gp....Lpl+...SpHG-.F...slKNTspL+l.h.P.It.....s...RDlllIRhPK......D....hP...P.F......Pp+.LtFRp.PccsERlC.hVG..o.....N...F.....QpKSl.o....S..hVSETSshhP............lts..SpF..WKHW...IoT....cD....GpCGhPhV...STp...DG..p...IlGlHS.....LAN.p.sopN...aFuAFs--Ft.pcYLpsh-sp......cWl......KpWpYNsstlsWGoLplpsup.Pp..u...FKloKLlp................................... 0 0 0 0 +684 PF02902 Peptidase_C48 Ulp1_C; Ulp1 protease family, C-terminal catalytic domain Griffiths-Jones SR anon Structural domain Domain This domain contains the catalytic triad Cys-His-Asn. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.61 0.70 -4.72 27 3178 2012-10-10 12:56:15 2003-04-07 12:59:11 14 120 647 37 1892 3126 158 164.70 15 27.92 CHANGED palsstlhthhhchh........pstpppcsthhsohFhshLp.....h................pphhsuspp.hpt......hhchDhlahPlp.h........stHWshl....hlsltcp.......................................pIhhhDSlhshpp..........htshsthh.aLhpp..hhptp..t..............p.ht.hhth.plPQp.sstDCGsashpal-hhspshs.p.........l...ppchtth+pchuh..hchhtstl .....................................................................................................................................................................................................................................................................................................................................t...t.p.h..l.h.lPl.p..................ttHWhLh.....llsh.ttp.............................................................................................................................................................................................................pl.h..hhD..S.h..t.ttpp.........................thh.p.th.h....p..h.l.....tt.....h.....t.....................................................tth.t....h....h....h...............t....h..s..p..Q..t..s...s.......c..CGh.alh....t.hh.ch....h....hp...t.........................................................t.h..................................................................................................... 1 468 899 1346 +685 PF03416 Peptidase_C54 Peptidase family C54 Bateman A anon MEROPS Family \N 25.00 25.00 25.50 25.40 20.40 23.30 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.91 0.70 -5.27 14 700 2012-10-10 12:56:15 2003-04-07 12:59:11 14 10 298 10 424 693 4 260.10 32 62.57 CHANGED pshhpsFhpDhtSRlWhTYRcsFssI.............................ssoshToDsGWGCMlRouQMLLApALlhp+LGRsWphstp.....................................................pppttapcIlphFsDp...susaSIHphlph..............................Gtpt.sKp......sG-WaGPsssupsl+pLscts.....shssl.plaVuhDsslhh--.hppss.....................................................................................................tstapPlLlllPlRLGlsclNshYlctLppshphspSlGIlGG+Ps.puhYFlGaQsDcllYLDPHhsQpslshspcshs......................................................poaHs.phhp+lphpphDPShslGFhC+scc-F-shppplp ..................................................................................................................t...tthhpDh.S+.l.Wh.TYRptFs.sl....................................................................................................................................s..s..s...s..hToDsGWGCMlRsG...QMlLAp........uLlhth.....LGR..s.Wp.h...tt....................................................................................................................................................t.t..thhpll.p.....hF..hDp.............p.....usaSlHph....sph................................................G.ht....GKp.......sGpWa...GP.ss....suphlcphs..t.........................th.ssl...tl..a...l.s...Ds.sl.h.h...p-hhphht...............................................................................................................................................................................................................tth..pslllLlPlRLG.h.p.p...l.N....h.Yhpslc.....thhp.h........tslGlhGG+P.p..puhYF.lGht........s..s..p.l.lYLDPH.h.s.Qsh.l...s....h.p.t..t.s..h.......................................................................................................................................................poaHs...pp...p+hthtphDPS.....hsl........GFh..sp..s.pp-apphppth.t.............................................................................. 0 158 222 329 +686 PF01433 Peptidase_M1 Peptidase family M1 Bateman A anon Swiss-Prot Family Members of this family are aminopeptidases. The members differ widely in specificity, hydrolysing acidic, basic or neutral N-terminal residues. This family includes leukotriene-A4 hydrolase Swiss:P09960, this enzyme also has an aminopeptidase activity [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.50 0.70 -5.38 33 7280 2012-10-03 04:41:15 2003-04-07 12:59:11 15 83 3187 81 3135 6965 1743 332.40 23 43.69 CHANGED RLPssltPh+Yclplp...sphpsh.......sFpGpssIplps....pp.sssplslcspc.lsIp.slplps..psss............hphptppph....lplphspshtttp........hpLplpasu.lss....s..htGhYtohYhp....sGpp+hhssTQhp.sstARpuFPCaDEPshKATFsloltpssph..sulSNhs.hpsp.hp.ss..hthspFppT.shMSoYLlAahlu-hphl..pspsps..tl..lclaucsuttppup..aAL-hst+hlpahEcaashsYslP.....KhDhlAlP.-Fuu..GAMENWGLlTYREssLLh-.t..ostppcppluplluHELAHQWFGNLVThcWWssLWLNEGFAs.ahEahssctlts..phphhphalhsph..ppshttDuls....ss+Plp....plssPsplsshFs.slsYsKG ................................................................................................................................................................................................................................................................................h.................................................h...u.........h.h..................................t........h......l....p.........t......hp..l........t....h.....h.................................................................................................................hh.l.............t.........h..............................h.l...l..t..ht..s.........tt.......................t........htG...h..a............h.........................t...p...t..........h..h..........h...TQ.....h.E.....s..p...........ss.R...p.......hhs....s....h....D....c..P.........s.h..+.........A..s.........a.s.h.p...l....ts..s...........psh..............hsl.....SN..t...s.....h......h.......p..........p...........t...............t.........h....t.....s.....................hthh....pa......p.p....s.......h....s.o..Y...L..hA....h...s.s.G....c....hphh.................php..sps....................t.h...lp.......l....a....s...p............s....t.........h....p..p.st...........as....h..p.......h...t..p..s.h.p.a...h.c....c......h...a...s...h.....Y......s....hs....................p.h..p.hl..sl.s.....p..F.....sh.......G...AME..N..h..G...L.......h.a..p...p..p...h....l....L....h......s......t........t....s..............s...................t.....t........h........t..............pl..tt.....l...........l.u..H...........E.h.u.Hp............W.h.............G..N..h..V..T.h.c...........W..p..p.L...W.L.pEGass.ah.p.h....h.......h..s.s...p.h......ts.........................h.ph....................p.....h...h.h..p.th.................t.h.t........Ds..s...........s.cslp.................t..p........p.h..p............ph...as..hhsYtKG......................................................................................... 0 1008 1673 2600 +687 PF01431 Peptidase_M13 Peptidase family M13 Bateman A, Dudgeon T anon Swiss-Prot Family Mammalian enzymes are typically type-II membrane anchored enzymes which are known, or believed to activate or inactivate oligopeptide (pro)-hormones such as opioid peptides. The family also contains a bacterial member believed to be involved with milk protein cleavage. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.42 0.70 -5.15 21 3320 2012-10-03 04:41:15 2003-04-07 12:59:11 16 34 1562 10 1433 3043 301 191.10 36 29.63 CHANGED NAaYssshNplshstslh.sPaassphs..pshNaushGsVluHElsHsFsspGspas..h............tah.tpspssapstspCslcpasthsssstst.....shsGspThtEslADluGlphAhpshpp......psss-ppl.s..hpt.s..phhahshAtsaCp.pppspstlh....psHuPsphRlNsslpshPtFsssFsCp.Gs+MascPcpps ...............................................................................................NAaYsPtpNp..I.sFP......A...........u.I.LQs.P.F.a.....s...h.p.ts..........puh.NY.GuIG.s.VIG.HEl.oH.uFDs........p.G....t..p...a...D...tpG.s.Lp............................sWW...op.p...s.hp..tF.p..p...+sps...hlc.Qa.s.....s.h.......ps.........h...............s............s..................................................p.........l...........N...G....p..hT...lu..EN..IA......Dh.GG.lphA....hc..Aap........................th.p..p...t...p...h..ss.........h.p........h....s..tQ.........h.FF..........l................sa..A................p............l.Wp................t........p....h......p.......s.....c.t..........h......t.............h....l.t..s...D........s.......H.u.Ps.......c.h..R....sss.s..lpNhsp.Fh..csF.sspt.u.ss.Mh.s.cpcl.................................................................................................................. 0 539 753 1171 +688 PF05193 Peptidase_M16_C Peptidase M16 inactive domain Yeats C anon Yeats C Domain Peptidase M16 consists of two structurally related domains. One is the active peptidase, whereas the other is inactive. The two domains hold the substrate like a clamp [1]. 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.01 0.71 -4.61 181 16090 2012-10-02 15:41:56 2003-04-07 12:59:11 16 64 4451 253 5211 13286 2764 179.20 14 37.15 CHANGED slstpplppahpphYpss.phslhlsGslshp..plhphhp....chhuplst.........................................................................tsp.hthsht..thttt.hhh.tt.........s...pspltlua.sssh...........................................sp.cthshpllspllust.........................htutLhpplp...............cpsh...shsstuh.hptht......ttuh.hslhhssssp......phpc............hhphhhptlppltpp.t...hsppc....lpcs+pth .......................................................................................................................................................................................................................................................................................................................................hshppl.hp.a.a.p.p..a.Y..p.ss..p.h..s..........lhl...s...G...s........h..s..........p....p...h.h...p.h...hp.......ch....h.u..p.httt...........................................................................................................................p..ht.h....s..h...p.............t.......t..t....t..t.h.h.h.tpt..........................s....p..p.spl....t..l..u.....a..p.....ss..sh..........................................................................................................................s.p...c.t....h..s...h...p......l...h....s....p.l.Lust.................................tu.t.L..hp.p.lp..........................cp.sl.......sh..s....s..t..u..t...hs.sht..................t.uh...h.....t.....l..t..s.s..s..s.sp...........phpp............................hhp.h.lhp.t.............l...p.pl.....t..pp..s........hspp-...lpphpt..h............................................................................................................................................................................ 0 1802 3187 4395 +689 PF02789 Peptidase_M17_N Cytosol aminopeptidase family, N-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_990 (release 3.0) Domain \N 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.42 0.71 -4.38 103 3629 2012-10-02 00:07:53 2003-04-07 12:59:11 12 10 3099 51 1002 2638 1020 122.30 21 24.73 CHANGED lls..lh..........cssthssss..........lc.pthsu.hlpphhpptshsGKhGpshh..lhshs.sh....hppllllGLGcppp.........hstcsh+pssusssctlps.hpsppsslhls...................ttspshspGhhLusYca.sca+opp ...................................................................hs..s..........tthc..th.su..hlpt.l.lcp...s...p.....h.p.G.K..Gpshh.....lhtss...s..................sp+llllGhG....cppp...............................h.s.t.c.ph+ps...hu..pshpsL...ps...t...........ts.tps.shhls.........ttts.................ttsptssc..usthutYpF.cpaKop............................................ 0 310 592 816 +690 PF02127 Peptidase_M18 Aminopeptidase I zinc metalloprotease (M18) Mian N, Bateman A anon IPR001948 Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.40 0.70 -5.98 9 1876 2012-10-02 19:46:12 2003-04-07 12:59:11 10 13 1229 44 661 1854 254 401.10 32 91.82 CHANGED FlspusTPaHsVppltc+Lh.ptGFppLpEppsWt..l.pPts+YFls+NtosllAFulGsphcstsGhslluuHTDSPsL+lKPpuppp.sptah.lulpsYGG.Ih.poWhDRDLuluGhVhlKss..tpph.phllclccPlhpIPsLAIHLs+sh.pshphspcschhslhuh.s......p..h.sptc..........pth+hslLpllscchG...lps-D...llsh-LhlhDspsuplsGhcccFlhusRLDNphssasuhpuLhpstps......csphplsshaDpEElGSsoupGAcSsaLptVLcRlshthppss.....hhpptl....sKShhlSADhsHAhHPNYsspH-psatPhhstGsVlKlssNtR..YuTsusstshlcplAptss...V.lQhhhl+sssPsGoTIGPhhuucpGlcTlDlGss.LuMHShREhsustDlh.shphapuFF .....................................................FlstusosaH...........sltphtphLp..ps..GFp.pLpE.........p..c.p.Wt..........h..ps.....u..s+.aahs+ssusllAat...lsp...p......................t......p...G..................hpll.GuHsDSPsl+lKss....s..php.......ppG...ah..........l....sschYGG.lh.tsWhDRsLuluG+lhh+.................ss.......................t...............h..........st.ll.c.h.scPlhtIPsL.AIHLs+..p.h.p.p..s...h..ph.s..ppp...ch...Plhuhhs................................t..............................................hc.tl.hpl.l.sc..phu..........lp.tc-......llsh-L.lhssp.uphhGhs....p-....hltusRlDshsssasulp.Alls....stt........................................................................t...................p.......st..lh.s.haD+EElGS.totpGA....pSshlps....slpRls....t..s....h..s.tsp............t.h.pthh.........................spShhl.SuDsuHul.H....PNY...s.c...p.a-............s...p.ps.hlstGsVlKhss..spR......Yuosuss...s...s...h...h...p...p...l...s...p...p.ss........................lshQ.pahhpss...sGoT.lG.s.hh.s.u.p.h.Ghp....slDlGss.LuMH.ShRE..hsuttDlhhhhchhpuFh........................................................... 1 270 456 592 +691 PF01546 Peptidase_M20 CO_pept_M20; Peptidase family M20/M25/M40 Bateman A anon Pfam-B_253 (release 4.0) Family This family includes a range of zinc metallopeptidases belonging to several families in the peptidase classification [1]. Family M20 are Glutamate carboxypeptidases. Peptidase family M25 contains X-His dipeptidases. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null --hand HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.37 0.71 -4.79 366 29469 2012-10-02 19:46:12 2003-04-07 12:59:11 23 76 4930 91 7433 26828 10363 326.10 13 78.41 CHANGED hltuHhDslP.......th......p.s...................................GhhaupGpsDtpuhlhuhhpslpph........ttphps.slpllhpssEEsu.....u........................Gsphhhcs....................................hth+....shsh..sc.s.hts...............shpushshhhhhhsspuptussshssssshhhsshhhhhhpthhspttsshpssshshs.......................................................................................................................thsssssssshsttshhtsthpshppthpthhpthhtthtpthstthttttthphhtshssshsss.h....hphlpps..spp.h...................t.ph.sssuu...pDhshhtt........s.sh.hlshusts...........thhHs.ssEpls..sslhtusphhtphltp .......................................................................................................................................................................................................................................htuHhDs.lP..........t...tth.........................t.................................................................................ps...s....h..h.....a....u....p....G....t....s....D.t...p.us....l.h...u.h...h....t....s.......l..pt.h..................t.t.p.h.p...s....s..lp.l..l..hp..s....s..E.Esu...t.........................................................................Gsp..t.h...hpp.........................................................................hthc.........shsh.........sc...s...t.th.......sttsssttththttttsttsstttsstshhtsshhhhthhthhtttttt.tttthhhhh.....................................................................................................................................................................................................................................................................................................................tsssstssh.thtthttthtthttphtthhttthtthttthstthththphphh.sh.hs.sst......h...hp.h.h..t..ps....hpp.hhh....................................ts.p.h.t.s.h.t..u.u...........pD..s..s...h.ht..................................t.s.h.s...h.l..h.h.usss.................tthHs....s...s...E...t.l....s....hps.l..tssphhhphh..t...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2236 4510 6175 +692 PF00814 Peptidase_M22 Glycoprotease; Glycoprotease family Bateman A anon Pfam-B_1670 (release 2.1) & Pfam-B_4550 (Release 7.5) Family The Peptidase M22 proteins are part of the HSP70-actin superfamily ([1]). The region represented here is an insert into the fold and is not found in the rest of the family (beyond the Peptidase M22 family). Included in this family are the Rhizobial NodU proteins and the HypF regulator. This region also contains the histidine dyad believed to coordinate the metal ion and hence provide catalytic activity. Interestingly the histidines are not well conserved, and there is a lack of experimental evidence to support peptidase activity as a general property of this family. There also appear to be instances of this domain outside of the HSP70-actin superfamily (e.g. Swiss:Q9ZM49). 24.80 24.80 24.80 24.80 24.70 24.60 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.90 0.70 -4.90 113 9668 2012-10-02 23:34:14 2003-04-07 12:59:11 20 26 4927 28 2530 7185 5233 230.60 26 77.47 CHANGED plluphhhspht.h.....G.GlhPp.suRpHtcplhshlcpsLpcu...slshp..-lDsIAsopGPGhhsuLhVGsshA+uLAhuhshP.lluVsHlpuHhhsshht..tt........P.........lsLllSGG+oplhhspt..tpYchhGpol........DsAsG-shD+su+hlshsh.......s..uu...tlcphu..tu.......hp..hs.s.h............hshSFS....Glcosshpth............................slshuhpcsshshLspts.cshth..........hssppl..llsGG..VuuNptL+p.......tht.hstp.....t.h.hhhPs.t.assDNuAMIuh ...................................................................................................t..ss...RpHs.cplh.shlpps...Lp.pu......................sl..s...hp.......-l...DulAho..tGP.....G.........h.....s.uL.h.l...G.lssA...+u.LA..........h.u....h....s....lP..lluVsplt....uhh.h...ss.hhp.....ps....................t.P........lslll....s....u....t....+......s....p........lh...h...s.........................p.....a.....p....h.....h....u..p.s..h...........css..h.u.-..s....h..s..phuchh.th..............................................tt........t............................................................................................thsas....G..h.s.h...h...................................................................................................h...t....h.............................................................................h...h.......................................................................................................................................................................................................................................... 0 882 1633 2143 +693 PF04389 Peptidase_M28 Peptidase family M28 Bateman A anon Bateman A Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.54 0.71 -4.54 65 5682 2012-10-02 19:46:12 2003-04-07 12:59:11 12 79 2007 144 2682 7975 2221 184.50 20 35.77 CHANGED phllluAHaDo.hs.................htsGAsDsuoGs...AslLElA+hls...p..t.p....pp..................slhFlhhsuEE..............pGLhG..Sctast......tph................................hppltshlNhDh...h...Gs......ss.shhhtsss.........t........................h....ltshhpps.tshh.shs.psh..sth.spoDahsFh..ttulPul...phtsst..................hsttY.HospDshsp..lshsuh.p ..................................................hllluuHhDohs.........................................................hssG.A..s....D............s..uoGs..............uslL.E....lA.+....h....lt......p.........t..hp.........pp..................................................................................................slhFhh...as...u.EE............................................................................tG.L..h.G.....Sp..tasp.......t..tpt................................................................................................................t.p.p.h..h...h.hlNh....D.h.....h.........us.............sst.t.h..h..h.t.sss.....................t..t.....................................................................hpth..h.p..p.h...h.................h.....h......p.........t.....h.......s......h.......p......h...............................t...t......h......h....s.....p....o......D..a...h...sFh..............ptG.l...P..s..h....th.htss...............................................................s...ha...H...s...t...Dsh.pt.ls.....h................................................................................................................................................................................................................................ 0 912 1571 2227 +694 PF01551 Peptidase_M23 Peptidase_M37; Peptidase family M23 Bateman A anon Pfam-B_291 (release 4.0) Family Members of this family are zinc metallopeptidases with a range of specificities. The peptidase family M23 is included in this family, these are Gly-Gly endopeptidases. Peptidase family M23 are also endopeptidases. This family also includes some bacterial lipoproteins such as Swiss:P33648 for which no proteolytic activity has been demonstrated. This family also includes leukocyte cell-derived chemotaxin 2 (LECT2) proteins. LECT2 is a liver-specific protein which is thought to be linked to hepatocyte growth although the exact function of this protein is unknown. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.32 0.72 -3.94 276 16518 2012-10-02 20:27:15 2003-04-07 12:59:11 17 180 4065 22 3908 13013 6175 97.80 32 25.05 CHANGED phHp...GlDl....uus.....h..Go........s..VhAsssGpV..hhuuh.....t.....uh.GphlhlcH...ssu..hhohYuH..hsp.hh..Vc..tGppVppGphIGtsG.sTG..............uo...GP............HLHF.El..p.h............su.pslsP ..........................................................................t..HpGlDh...........us.s..............t...G.s................s....l..h..A..s.s.s....GpV.........sh.ss........tt.......................................ua..Gp.h.l..hlcH...................sss....hhohYuH........h..sp..hh.................Vp.................G.p.p...V....p.t.Gph..I..uhsG..soG.....................uo..........us...........................HLHF.El..p..h.............ps..p.lsP................................................... 0 1317 2602 3339 +695 PF01434 Peptidase_M41 Peptidase family M41 Bateman A anon Swiss-Prot Family \N 20.60 20.60 20.60 20.60 20.30 20.50 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.37 0.70 -4.86 31 7039 2012-10-03 04:41:15 2003-04-07 12:59:11 13 33 4736 42 2254 5669 3966 203.00 39 31.13 CHANGED ohp-l--AlDRlluGhE++.shllopcpK+llAYHEuGHAllGhhhcssDPVpKVTIlPRGp.AhGhThhlPc-Dph....ho+ppLhsplssshGGRsAEEllFG..clTTGAusDlppsTplARpMVTcaGMScclGslshtppss.t..hh.tht.tptaScpTuphIDpEV+pllccuYc+ApplLpcp+ctl-tlActLL-+ETlcu--hppl .........................................................p..p-h-cAhD.+l.l....h..G.sE.+.+..o.t..l........h..o......ccE+..c...............hsAYHEuGHAll....uh.....h............l.....s.....p.....s............-......P.....V.........p..K.......V.....T......I.....l....P....R....Gp.....A.......L.......Gh......s...h..h...L..P.........c.....c..D..phh........ho+............p..p...........L.hs...............p.ls.shhGGRsAEEllaG.................cloT.....G....A....S....N....DlcpATp....l.AR.sMV.....Tp........a...G..M.....S...-..c..l.G..sl...ta...s.p.p.......ps...........................a.....h...h...t.....s.......h......t.................t.........p.......p.................s.hS-...p...TA..p...h.ID.pEV....+pll..ccsYpcApplLp.....c..pc................-t.lc...............tluctLlc..hETlsucplppl.................................................................. 0 790 1454 1932 +696 PF02163 Peptidase_M50 Peptidase family M50 Bateman A anon IPR001193 Family \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null --hand HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.09 0.71 -5.34 68 8451 2012-10-03 04:41:15 2003-04-07 12:59:11 17 49 4572 15 2487 6748 4385 283.10 20 78.45 CHANGED hhllulhlhls.lHEhGHhhsA..+ptGl.plpphthhhG.hlhthh.ht.h.h............................................................................................................hssshphphtptshhp+hhlshAGPhsNhllA.llhhhlhhhhss...............................................................................................................................................................................................................................................................................................................................................................................................................h.............htsshhthhhhhuhlslsLulhNLlPlssLDGG+llh.hh.............hh.tt.hs.phtthhhhhshslhhhhhhhshhps .......................................................................................................................................................................................................................................................................................hhlslhlhlh....lHE.hGHhh.sA......+h.h.Gh..pst.ph..t......h..t..h..........u..................l.......ht..h............h............................h.............h........h......t.h.h............h.....h....h.h.....h............h....h.s................................................................................................................................................................t...........t..h..t....h..tp..ps.hh.p.+.h..hl.hhA..GPhh.N..hl...l.u.l...l...h...h...h...h....h....h....h...h...t.s...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.t...s...h...h........h.l..h......h....h....u....h....l....s.l.s..L....u.l.hN....L....l.P.......l.....P.....s....L....D.GG+llhhh.h.........................t.h.h.h.....t.p..........h.............p.....h...t..t..h..h....h...h..h.s.h.hhh..hhh.hhh............................................................................................................................................................................................................................................................................................................................................................................................................................ 1 901 1712 2148 +697 PF02897 Peptidase_S9_N Prolyl oligopeptidase, N-terminal beta-propeller domain Griffiths-Jones SR anon Structural domain Domain This unusual 7-stranded beta-propeller domain protects the catalytic triad of prolyl oligopeptidase (see Pfam:PF00326), excluding larger peptides and proteins from proteolysis in the cytosol. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.28 0.70 -5.90 30 3044 2012-10-05 17:30:42 2003-04-07 12:59:11 10 23 1955 38 1041 2696 2777 371.10 22 55.84 CHANGED Psst+spttspph+Gpp......lsDPYtWLEDsc.....us-spAaVcApNphTpshLsph.sh+stltcclpphhshs+hosPa+cGpaaYYh+NsshpspsVlhppsss......pups.............cVhLDPNsLSc-G...TsulpshuhSpDGchlAYulstuGoDhhsI+hhclcsu..........................c.lsDsLcpVKaos.lsWssDscGhFYssaspspcsp.......pths.ppKlaaHtLGTsQSpDh.LlaEts-psp.hhhusplScDG+Ylllustpuss..NclahtDlppp..........s.hhphhhllsphcuphthVspc.......GsphhhhTN......csuPNh+LlssshssPs..pWcs.......lls-+ccclhh.shsltusaLllsYhc-spsplphachs............sthhhcphhhslGolsuhuuppcssclaapFoSahoPsslYch-hssschchhphp .........................................................................................................................................................................thaG.t......h.Ds.YhW.........Lc....D......s............ssc.shsa....l.p.tcNths.p...t.h.h......s..p.........................s....hp........cp.lhpch....t...s...........h...h..p...t...p..c....h...u.s.....P....h..........h....c....s...s........a..h.Y...h.th.h.p.s.....s...p..p...sl.h..hRpssh.............tt.t......................................cllL.....D...s.....Np.....h....u.......t...s...p..........t.....ah.h.....h...t...uh....s....l..........o.......s.....D....s.......p.......h..h......A.........hu.........hs...h.t.Gs.-hh.s.....l.+hhclp.o.s..........................................................ph.h..s...-.....h.....l....p.....t....s.p....s..s..........hsW....s...s.D.....s....p....s......h..a.Ysp...hct..t................................................sh.pl.a.h..H..pl.......G.....o....s...ts...p......D........h...ll....ac.c.....p.....-.....s.....s.....h...............hh........u......h.......t......t........o..p.....s......p...c.......a...........l..h.......l..p..h.s...s...tss.........s.c.l...hh..lchpt...................................tsp..hhl..h..s.c.pc....s.h...th...l..sph...........tp.p...h..a..l.....hoN.............pp.u...s..h...t...l...h....p...s.....t.....h....p....s.......t............p.hp.......................lh..cpt.h.....h.....l.....p......s..h.......s.h......h........p..s.......a.l.....l.lp...h.pp.s.hs.pl.ph..hshp..............................................tth...hh.p.........h.......s...h......h......................h....s.....h....s.......s.....c...........c.....s.....sp...lh.hsa..oShssPs..plachsht.s.t.p.phh...t................................................................................................................................... 0 356 649 859 +698 PF04080 Per1 Per1-like Wood V, Finn RD anon Pfam-B_12918 (release 7.3); Family PER1 is required for GPI-phospholipase A2 activity and is involved in lipid remodelling of GPI-anchored proteins [2]. 21.50 21.50 22.40 28.50 20.40 21.30 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.05 0.70 -4.97 34 346 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 246 0 231 344 0 246.10 35 77.58 CHANGED pp.P..lhh+hhhWsC.u-CcYp.Cp..phhTppRhpp.s.....tls............................................QFHGKWPFhRl.....hGhQE.hSslFSlhNhhspap.Gh.tphpc.pl.pp.shpph...........alhhuhluhsAWhaSslFHsRDhslTE+LDYFsAuusVLhuhahshhRhhpL......pp.thtthasssslsh.assHlhhL.hhcaDYsYNMtsNlshGllQhllWhhauhhphpphtp.......................................................aslhPhllllhshhuhuLElaDFPPhhh.hlDAHuLWHhsTIs.shh.WYsFhlpDhp ..............................................s...PlhhphhhWsC.s-CcYp.C...hhhspthhpp..s.......ls............................................QFHG.KWPFhRh....................hGhQEPhSslFShhNhhsphh.sh.hphhp....l..tp...sht.h............................................ahhauhluhsuWhaSslFHoR.DhslTE..+LDYasAs.uhllau...lahshlRhhpl................................pp.thhhhhs..sh....hlhh.hssHlh.YL......p..h.h.caDYsYNMtsNlshGllphlhWhhash....h..ph....pth............................................................hphh..hllhhhhhuh.LElhDFPPh....hh...hlDAHulWHhsTls.shl.aa.sFlhcD........................................... 0 71 128 191 +699 PF04695 Pex14_N Peroxisomal membrane anchor protein (Pex14p) conserved region Waterfield DI, Finn RD anon Pfam-B_4121 (release 7.5) Family Family of peroxisomal membrane anchor proteins which bind the PTS1 (peroxisomal targeting signal) receptor and are required for the import of PTS1-containing proteins into peroxisomes. Loss of functional Pex14p results in defects in both the PTS1 and PTS2-dependent import pathways. Deletion analysis of this conserved region implicates it in selective peroxisome degradation. In the majority of members this region is situated at the N-terminus of the protein [1,2]. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.94 0.71 -4.12 53 413 2009-09-11 00:50:26 2003-04-07 12:59:11 8 13 263 4 278 398 0 121.40 28 31.63 CHANGED sRc-LlssAlpFLpDssVpsoP.lp+KlpFLcSKGLop-EIctALpcussss.....................sspshsssssssshtssssss............hp............................sss.....................................sW+DhhlhssshuGlsauhYthsc+YlhPhl ............................R-pllpsAlp........FLp.......csp........VpsuP.ltp+hsFLcsK.....GLTp-EI-tAhpc....sssss.........................................st...ss.....s......ts..s..s......................h..............................................ss.......................................sacchhhhs.hhhuGluhuha.hh+phlhPhl................................................................................................................... 0 88 153 229 +700 PF00294 PfkB pfkB; pfkB family carbohydrate kinase Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family This family includes a variety of carbohydrate and pyrimidine kinases. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.75 0.70 -5.29 64 26513 2012-10-03 06:25:16 2003-04-07 12:59:11 19 81 4887 268 6104 23264 9854 285.90 18 86.51 CHANGED hstlsslGts...h-hhhhs.t..........thh.pstphphtsGGsutNsAhslupLG....hpsshlutlGsDphGphhhptlpppslssshhhhssp.pposhshhhhsts...ppslhhhhssssphpht...........hltpsphlhluu.....hhsts....tshhpthtphtcpts....s.shhsshht......hhpthpchh.shssllhsNccEhphhhstp...........shpphhphhtp....th...phlllThGs....cGshhhp.................pstththssh...thplVDTs..GAGDuFsuuhlsslhp..........stslccslphusssuuhslpptGsts.s.h..ph ...........................................................................................................................................................................................h......hG.h....hp..h.........t.................t....tstp.h.p.h..t.s.GGsuhNhA.hs.h.u.p.LG...............................tpst...h...l..u.h...l...G.............s..............D........t...h..G.p........h..l.h..p.t....l..p.....p........t..........u....l.........s.....s............s..h.....l.....h...h.........t.....s......p.....t..........t.......o.........u........h...t......h...h..h....h......s.....tp....t...........p......p........p......h.......h....h.............t....s.........s....s..s...p..h..p....t..................................t..h.l...t...p......s...c........h....l..h...hsu.........................h.......t.h...s...........p...h..h...p...t...h.........h.....p....h..t..p.pt.s..................s.s..h.h.s.....s..hht.........................t.h.....h...t.....p...h....l....tt.s....s...........l........l...h...s.........N..c......p..........E..h....p..h..l..h.u.hp......................................................s.pp..h..h..p..t..h...p...t......................ss...........ptl..l.....l.....o.....h........Gs....................cG......s...h...hhp...........................................................tst...h.h..p..h..ss.............t........s..p...l....l......D.......o...s...........GA...G......Duah.uu.h.l.t..u.l.hp.................................s.t..s..h..t..c.u...h...p...hA....ss...s....u......uhslpphGs.........t.......................................................................................................................................................... 0 1886 3711 5053 +701 PF00169 PH PH domain SMART anon SMART Domain PH stands for pleckstrin homology. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.45 0.72 -3.82 133 16347 2012-10-04 00:02:24 2003-04-07 12:59:11 24 818 416 205 9216 15741 151 107.30 17 13.94 CHANGED hhtpGhLhpps.........................ttppacc+ahhLps.......stLhhacspp....................................................................ttp.pttlsLpsh.plppssst.............................................................................ppptsFplttspp........................tphhlpssop.p-tppWlpslppshp ...................................................................................................................................h..hpGaLh+ps...................................hhps..W..c.c......Ra.hl..L..ps.........................st.Lh..a.acsp.p..................................................................................................................................................pp.p.s..p.s...t...l....s.L....p.....s.h....p....lptsspt....................................................................................................................................................................................................ppphs......F.p...lhstpp.................................................sh.hl...p.....A.....p..o...p..p-....h..p..pWlpslpp...t................................................................................................................................................................ 0 2389 3501 5912 +702 PF05065 Phage_capsid Phage capsid family Finn RD, Bateman A anon Pfam-B_3186 (release 7.7) & Pfam-B_9481 (release 10.0) Family Family of bacteriophage hypothetical proteins and capsid proteins. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.98 0.70 -5.21 189 3200 2012-10-03 06:22:39 2003-04-07 12:59:11 8 11 2039 63 462 2535 684 261.30 16 68.16 CHANGED GGhllPpph.tsplhpthpptss.lpplssshs..ss.ssphph.hspss..............tushhu.....E...............tt.....spsss..phs.plsh...sscclsshh.tloppl..LpDushs..lp.sh...lssc....lspthutpcstshltG......ss...pstGhh......................hthssh...sssst..............h..............-slh....phh..slpsshp...t.ss....alhspssh.stlc..phK...D.ssGp....hlht...ssh...tssts......plhG....hPlhhspsh.............t.....................lhhG.Da....pshhlsc.....cts.lpl..hps......................s..ppshsthhshpRhssslhc..s.pAhth.hph ..................................................................................................................................hlstph..ppl.hp.hpppss..lt..p..hsphhp....ss..tt..phph...httts......................sushss.........E................................sp.....spsps....phs.pl..sh...sh.t.c.l......s.s.hh..tlo.p.c.l....lcsu..s....hs.............lt.sh........ltst..lsps...hstt..cp...sh....lpG.......................ss.....p.....shGhh...................................................thssh.....ssss.................s...sh..................................................................................cslh................shh.t....tlps.shp..............t.su........hlhssssh...ttlp...phc........s..ssGp......hlh...............s.h............ttsts.....plhG...........hslhhspth.........................................................................thhhG..sh....pthhl.sp.........pts..hphth.pp..........................tps.hh.hhstth..hshthhp...pAhhhh........................................................................................................................................ 0 140 299 389 +703 PF02899 Phage_int_SAM_1 Phage_integr_N; Phage integrase, N-terminal SAM-like domain Griffiths-Jones SR anon Structural domain Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.78 0.72 -3.83 37 10422 2012-10-02 14:21:04 2003-04-07 12:59:11 12 29 4035 13 2334 8796 3027 83.20 22 26.81 CHANGED lcp..ahpaLthc+shStpTlpuYppsLpthhpahpp....thtshppl...stpcl...p.talschtpts........hsssolsp..tluulRsaapa..hhpcs ...........................................pt.ah.ph.L..h..h.c...+..s..h..S..tpTlp.uYppDL.........pp.hh..p..a..........lpp.................th.....h.....s.......h...p.....p..l...................stpsl.....................p.sa.l.s.p....h.tpps................................hsssohsR......ploulRs.Fa.p.a.Lhpp................................................................ 0 747 1479 1975 +704 PF04860 Phage_portal Phage portal protein Kerrison ND anon Pfam-B_6050 (release 7.6) Family Bacteriophage portal proteins form a dodecamer and is located at a five-fold vertex of the viral capsid. The portal complex forms a channel through which the viral DNA is packaged into the capsid, and exits during infection. The portal protein is though to rotate during DNA packaging [1]. Portal proteins from different phage show little sequence homology, so this family does not represent all portal proteins. 27.90 27.90 27.90 28.20 27.70 27.80 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.93 0.70 -5.78 127 3192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 1991 3 469 2728 646 315.20 19 80.95 CHANGED ht.st..ssh..tls.tpsshp.tssla..uslph.lupsluphPltlhcp....t.ttp.h.t....pslhpllp...pPN........ouhpahpthhhphlltG.Nua.shhhh..s......s.up..hh...p...Lh.lpspp.h.................ht..ss....phh.Yphp...........ss......tt...t.phstscllHl+.h..hshs.....shhGluslp.sutpslslshusppastphapNGspsuullphs..sth....sp..-phcp....hccphpp.thpGsssss...pshlLps...............G.hcapsl.....shssp-sphlp.s+phshc-IuphatVPsthluthtp.soh..usl-ptshsahphsLhPhlpphcpplstpLh..t.tthttthhhcFshpsllcs-.tsphphhtphhpsu....hho.N.EsRth.s ..................................................................................................................................................ht..........t..pshht..ssl...sslph..lupsluphshhhhcp..................................p.lhthhp....cPs...............suh.phh.pthhhp.hll..hG..Nua..l.hh...p....s..h..............t.Gp......sh.p.......Lh.h.sph.s................htstst.......................phh..Ythp............................................ts...................pt.......thpht.p....cl.lH.lp.s......h.p......tl.h.G.....hsshh.sshpsht.hstusppaptphapNG.u.p.suh.ll.h........stl....................sp...cshpp.........l+pphpp..t...p.G.....t..stt....+shhlps.....................G.h..chpsl.......s...ss..tc.sph.hp...hcphs...tc...c...ls....t....saplPst.hl.u...hh..s..p...ss...........uslEptst.sahpt.sl.hP.hhp.phpptlsp.hLh..................t.......hhcFs....sh..h..ths....t.ht.....h....hht..s.......hhs.s-hR....................................................................................... 0 123 284 381 +705 PF05119 Terminase_4 Phage_sml_term; Phage terminase, small subunit TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 24.20 24.20 24.20 24.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.00 0.72 -3.85 76 1248 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 969 0 185 864 92 98.70 21 66.43 CHANGED A+ppW++lss.LpchtlLsphDhssLttYCpsYupahpspcplpc........pGhh.........hpstsG................thppsPhl.shhpcshpphpphusphGLoPuuRs+lsh....spspcscs .............................tt.apchh..L.p.p..h..sh.lsthDh.thlptY...spsashapps...pcplpp.........pGhh................hpspsG...........................shtps.Phl.shtpcshpphpphssphGLossuRt+lhh.....tttt...s........................................................................... 0 56 117 162 +706 PF03354 Terminase_1 Phage_terminase; Phage Terminase Finn RD, Mifsud W anon Pfam-B_3931 (release 6.5) Family The majority of the members of this family are bacteriophage proteins, several of which are thought to be terminase large subunit proteins. There are also a number of bacterial proteins of unknown function. 19.70 19.70 19.70 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.45 0.70 -6.08 7 2382 2012-10-05 12:31:07 2003-04-07 12:59:11 10 9 1576 0 301 2092 481 410.10 22 80.19 CHANGED PaQcFhhs.haGW....cptsss+pFscshhpluRtpGKohhhAhhslhphhlt..GpsstphhlsuhshcQAtclF...............p.stp.h+t....p..hphh.c.cl.tsppptl.hhh.pshh+hhospssphDGhp.phsIhDEhtphpscphspplhpG.ht..tN..hlhI.oTussshsu.hc-chchhpphh....phpsDsaashlap.Ds.--shDsppWhKuNPhLshs.phcslhcshptstsssh....phscFhsKphNLWh.pcossahshpsh-p......hs.hshpG+csalGlDhShhsDsTAlshla.htG.....+hahcshoFhP.tsttt...t..pptt....pYpphhppG...hphpssuhIs.pplhshlhpahppp.hplpthuYD...shtsp.hhsplppp.hsh...shhclpQshtshusshKhlpphhhpp+lp+sss.lhphtlhNsshKhDshs.lphsKcts...stKIDsssAlI.AhhpAhhst. ...................................................................................................................................................................................................................ap..h.hhst...la.G...h...................t.p.t.....s..t.h....+h.a..p..c..shl.l.sR+sGKo.t.lh.u.sl.h.h.h.hh..h.hp..................sp.....s.t...c...l...h...h.s...AsspcQAphs.F....................p.s....pp..h.l.pt.................s...h.p.........h................p....h......................t.....p..p...l.........h.........h........t...s....t..u.....h..hp..h.....h..s....s....s.s.p.s.h.cGh.p.s.t..h...slhDE......h....+....t..h......t.........s.........p..........p.......hh...p....s....h....ps....Ghut...p...ps......h..hhh.I...oT..u..G.....s.....h.p..u....h.h.p...h.h.p.h.s.t.......p.......l...l......p..........t...................................p..........h..............ps..............s.....p.ha.s...h.l......h.....p..........h......-...........c.................t..........c................-.............h..p.......c.......p.s......W....h.....+.AN.....P....t.l.....s......h...s...h....t........h......c.t.l...h....pp.hpp...s.p...p...s.st................th.tt...a...hsKphN..h..h..h......t..t..p....t....s.ah.sh.....pph...pt................h....h...s..............p......h.......p....s.....p............s..h....h.........G.l.Dh....up.hpDhs..u..h..s..h...h..h...........p.s...................ph..h.h..h.s.h.....a.h.s..pt...h.p.................pp.p.............apt...a....c.......t.G.......h...lph.s.t...s...s...h...l.c......ht...lh.p......hhh.....p...hhpp.........................h....l..p..t..l.shD....h.ts.t.t.h.h.pt..l.p..p...s.h.............................hh.th.Qs...........h........p...................h......sss........h...c...p...hcphh...............h.............pt..p..........l..ha.s..s...........sPlhpWhhs..Ns..hh.c......c......s.s.s......h...h...sK...p.p.p....p.pK.IDshhAhl.Ah........th....................................................................................................... 2 82 188 254 +707 PF00628 PHD PHD-finger Pascual J, Bateman A anon Prosite Domain PHD folds into an interleaved type of Zn-finger chelating 2 Zn ions in a similar manner to that of the RING and FYVE domains [2]. Several PHD fingers have been identified as binding modules of methylated histone H3 [3]. 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.96 0.72 -4.27 92 10006 2012-10-03 17:27:21 2003-04-07 12:59:11 24 581 416 151 6059 10034 80 49.80 31 6.20 CHANGED hC.lCpp.............ssphltC..-..s.Cs.psaHhtCls.shp..ph.pt.........tWhCspCpsp ...........................hC.lCpp................tt....ssp..hltC....D............p..C...s...phaHh..tC...ls..s...hp.........p..hsps...................pW.hC.s.p.Cp..t.............. 0 1693 2722 4372 +708 PF01384 PHO4 Phosphate transporter family Bateman A anon Pfam-B_923 (release 3.0) Family This family includes PHO-4 from Neurospora crassa which is a is a Na(+)-phosphate symporter [1]. This family also contains the leukaemia virus receptor Swiss:Q08344. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.01 0.70 -5.78 174 5075 2009-01-15 18:05:59 2003-04-07 12:59:11 15 11 3412 0 1413 3524 1377 368.90 29 86.21 CHANGED hGuNDsANuhuTuVuo+slohpp.AlllAulhphlG....Alhh.....GspVspTIsp.sllssshh....................................................shhuuLluuslWhhlushhGlPlSooHullGullGsulsss.....G...h........................ss...lpWs.......................................t.lhp...IlhuWllSPlluh.hluhllh...hhlphhhh..........t.h......................................hhh.........................................................................................................................................................................................................................................................................................................................................................hpph.......aphhtlhoushhuFuHGuNDsupulGslsshhh............ts.h...................................................................................................................................................................P.................h..Wl......llhuuhululGhhhhGh+llcTlGpcloc..lssspGassphuuAhslhhAoh.........hGl......PVSoT+slsGulhGlGhsc..............................................................................................t.hp..slph...phltpIlhu..............................................WllTlPh.....uullu .............................................................................................................................................................................................................................sGhpDsANuhATslu...............o+ulp.s.ph.Al.lhAulhshhG...........shlh..............G..s...p.VutT.Itp.sl...s.s.s...hhs.................................................................h.hhhh.usLlu..AhlW..lhshhhGlPsSoo..Hu.....lIGullGsulssu.............u....h..............................................su.lshs..............................................................t.lhp.........I.hh.........uhllSPll.uh.l...hu.hll.h....hllcthh.h.................th..............p.h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pspph.........ap.hh..l.hoA.shh...uFuHG...uNDsppuhGhlh.hllh...............ht......ssh..........t..................................................................................................................................................................t..sP............h..Wl..h..hh.sululu.lGshhh..G..h+lhpTl.GpcIsc..ls.s...p..G.huAphsuAhs.l.h.h.Ao.h...............h.G....h...............PVSTTHsls.uulhGsGhsc..............................................................................................t....t....ulph.....tssppllhuWllTlPsuullu........................................................................................................................................................................................................................................... 0 437 853 1191 +709 PF01663 Phosphodiest Type I phosphodiesterase / nucleotide pyrophosphatase Bashton M, Bateman A anon Pfam-B_994 (release 4.1) & Pfam-B_6150 (Release 8.0) Family This family consists of phosphodiesterases, including human plasma-cell membrane glycoprotein PC-1 / alkaline phosphodiesterase i / nucleotide pyrophosphatase (nppase). These enzymes catalyse the cleavage of phosphodiester and phosphosulfate bonds in NAD, deoxynucleotides and nucleotide sugars [1]. Also in this family is ATX an autotaxin, tumour cell motility-stimulating protein which exhibits type I phosphodiesterases activity [4]. The alignment encompasses the active site [3,4]. Also present with in this family is 60-kDa Ca2+-ATPase form F. odoratum [2]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.38 0.70 -5.13 70 4764 2012-10-03 20:55:17 2003-04-07 12:59:11 17 83 2021 34 2144 12839 5667 283.30 14 54.15 CHANGED llllulDGhpschlpc.........tthPslppL.........hppGspst..hpssaPohThPsahollTGhhPspHGIluNphhD...........................................sppptp..athp.....shpsh............PlWhssp.cpshp..........sushhaPuspsshss......................Pphh..phhstp...hsh......................................................ptchsphhp.hhch..pscc........sslhhhah....ppsDpsG.H......................paGs..s..usc........hpp......slpclDphlupLhpsLccpth.tpssl.lllS.........DHGhsss....................................p+hlh...........lsph...........hs.sth...............hspsslstlhsp.............................ttthsclhptLpsh...........tthpshh...cp...phs.tchpht..........................spRhsslhlhscsGhhhhtpt................ttph..thtGsHGac .............................................................................................................................................................................................................................................................................lhh.hD..u....h..t......t....h.....................h.s.h..t.t..h...............................h.p.p..u.h....h...............h..............s...............h..........P..........o.h..o...h..s.s...h..s.lh....T..G......h..h...........s...t.........p....G......l...h..t..........h.h.......................................................................................................................................h...............................................................................shh....t...t...................................ss........h..............................h.......t....................................................................................................................................................................s........h........................pt............h...................................................................................................................................................................................t.h..s...p....h..h............h.h.t...t.......h.....t...p.pp...................................s..h.h..h...h........a.h...........................t.h.......D....p....h.G...H.......................................................................................................................th.Gs.......p..........u.p....................................................htp.............................tlp.p.h...D.p...h...l...t....p.l....h......p........t.l.....p........p...t.............................t......h.......l.ll.su................D.....H...G..hssh...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 703 1239 1744 +710 PF02811 PHP PHP_C; PHP domain L Aravind, Bateman A anon L Aravind Family The PHP (Polymerase and Histidinol Phosphatase) domain is a putative phosphoesterase domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null --hand HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.25 0.71 -4.48 69 14520 2012-10-03 00:45:34 2003-04-07 12:59:11 14 119 4828 48 3478 11535 3589 213.50 16 29.40 CHANGED h-LHsHopaS.......DGts.slc-hl...cpAtchGhp....slulTDH..sshhs...shphhpttpt..............................hs....lp....hlh..GhEhpls.t.....................................................................................................................................................................................................tpt....htt.hchhlhuh.pt................................plsphtshthh.................................phlpph......ppshlslhuahpsththh...............hppscphlpth...................................ht.pshhhElpspth.t.......................phpp ................................................................................................................................cLHsH.o.....s.h.S....................DG....h.......h....s...h.......p.....c.....hl..............pp.At.p.h..Gh.p....slAlTDH.................ssh...h.u.................h.s..c.h...h..pt.spt........................................................................................ts......lc........hlh....GhEhthh.................................................................................................................................................................................................................................................................................................................................................................................................................p...t.tt...h...c..h.h.l....h.u..h...p...p......s.ht..................................................................................slhp.hhs.t.t.hh.........................................................................h............phlt.th..................ppG.h.l.h.hh...u...p........t...t.......h...h...h.............................................................h...t.hht.h..............................................hhpl..................tthtth............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 1312 2409 3037 +712 PF02972 Phycoerythr_ab phycoerythr_ab; Phycoerythrin, alpha/beta chain Griffiths-Jones SR anon Structural domain Family This family represents the non-globular alpha and beta chain components of phycoerythrin. The structure is a long beta-hairpin and a single alpha-helix. 22.50 22.50 23.60 53.00 20.80 17.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.77 0.72 -4.37 5 25 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 4 6 0 31 0 56.60 54 46.87 CHANGED +APVITIFDHRGC.SRAPKEYTGsKAGGpDDEMCVKVuMsKlpV...SEusAstlLpEhLu .+APlITIFDHRGC..uRAsKEYTGuKAGspDDEMhVKVtusKlpV...SpusAsthLpphlu............... 0 0 0 0 +713 PF05023 Phytochelatin Phytochelatin synthase Wood V, Rawlings ND anon Pfam-B_9299 (release 7.6) Domain Phytochelatin synthase is the enzyme responsible for the synthesis of heavy-metal-binding peptides (phytochelatins) from glutathione and related thiols [2]. The crystal structure of a member of this family shows it to possess a papain fold [3]. The enzyme catalyses the deglycination of a GSH donor molecule [3]. The enzyme contains a catalytic triad of cysteine, histidine and aspartate residues. 23.80 23.80 23.80 24.00 23.40 23.70 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.33 0.70 -5.37 20 280 2012-10-10 12:56:15 2003-04-07 12:59:11 9 7 183 4 123 295 16 197.80 39 55.01 CHANGED shsuhhppsLPs.shlsFuSsEGccLhtcu...ushcsaasLtspF.TQspsAaCGlASLsMVLNALu...............lD....PscthcG.PaRaFopssl.......pspshtc........lpcpGlTLsclusLu+sp..usslcsa...........................+us.chSlspFRpplhpshsssspalIlsYpRpslsQTGsGHFSPlGGYcttpDhsLILDVARaKYPPaWVshphLapAMpolDs.soucsRGahLIs ..................................................................................................................h......h.lst..hlthsS.tGpphh.pu.......sthpsaapLhs.tFpTQspsuaCGlAoL..........shVLNALt................................l-.....Ps..cta.c..u...PW..RaFspshLt..............sspshpt............lcp.pGlohsc....lssLApss........Gspspsh...................................+ss..ps.o...l.-pFRphlhp......s..s................s.......s..s.........s..........p.alllsYpR.ps.....l.t.......Qs...G..............s....G...H....F............SPlGuYcttp.D............hsLILDVARaK.YPPaW.Vsh.phLacAMso.hD.t..so.s.ptRGahll......................................................... 0 64 91 107 +714 PF02567 PhzC-PhzF Phenazine biosynthesis-like protein Mian N, Bateman A anon COGs Family PhzC/PhzF is involved in dimerisation of two 2,3-dihydro-3-oxo-anthranilic acid molecules to create PCA by P. fluorescens [1]. This family also contains Swiss:P28176, though there is no significant sequence similarity to Pfam:PF00303 members. This family appears to be distantly related to Pfam:PF01678, including containing a weak internal duplication. However members of this family do not contain the conserved cysteines that are hypothesised to be active site residues (Bateman A pers obs). 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.87 0.70 -5.19 14 3486 2012-10-03 03:02:41 2003-04-07 12:59:11 11 19 2256 24 972 3693 843 247.90 26 93.31 CHANGED lDAFTspPhp.GNPsuVsh.s-.....pls-stMQplAsEhshSETsFlhhss........pssch+lRhFTPssElshsGHuTluouhsLhppsh......ssspl.hpThuGhlss.............tppssstphhlphphPhhshhshsptt.s.h.hhhths................sh.hthtssGhtplhlsLpShculsslpPch........p.htchsstuhhshsstsssustcapuRhFu.PthGlsEDPsTGSAsssLutYLspchph.......phplhQGpu.uRsGplphph.....c.t...p+VplsGpAVslhcG ......................................................................................................................................................lDsF..o..s.p..s...ht..GNs.su.Vlh...sp...................L..s...c.t.p.MQtI...A.....p.-...h.....s....h......S........ET......s......F..lh..t.s..................................t.ss....sh......c.....l..Rh..F.T.P.p.t.E.lsh.sGH.sTlusu.h.s.lh..phht......................tssp.l...hh.ps.t.u.G..h..ltl.....................................................t....t..t.th..h..h.p......s....h....P...t...h.......t...s................h.......s.....t....t....t.....h....t...h....h.......t..h.h.h..p...................................h..s.t.h.h.s..s.G.......t...t..l..h..l.......l.p.....s....t...t....s...l..t......s..l.....p....P.sh..................................th.h..t...p...h.....s....t....t......s.....h.....h...s........h......h..........h........t.......s......s........t.......s.......t....t....c...........h..hu..Rh.F..s...P..t..h.G..l......s....EDPsTG.oAsus..l..u.s.a..hh.p...p..h.hh...................................h.p.h..pQ.G.p..t.h.s..Rsu..hlt.splt.............................tp........l...h...lu.....Gpuhhh....................................................................................................................................................................... 0 306 597 824 +715 PF00454 PI3_PI4_kinase Phosphatidylinositol 3- and 4-kinase Sonnhammer ELL, Finn RD anon Prosite & Pfam-B_6771 (Rlease 7.6) Family Some members of this family probably do not have lipid kinase activity and are protein kinases, e.g. Swiss:P42345 [1]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.34 0.70 -4.90 43 4451 2009-09-16 22:35:53 2003-04-07 12:59:11 22 163 790 95 2750 4248 185 225.50 21 15.05 CHANGED th..hhhKsG-DLR..QDthhhQhl...........................plhsplhpp-shshc.......lpsYtllshusptGhlp..................hlsss.ohtpl..tpthhptth................................................................sslhphFhcpssss.......tpahp................................................................stppaspShAshsllsYlLGluDRHssNlhl......................................ptsGclhHID.................FGhhhs.....t..p.-psPFcLop........shspsh.t.....ssstphthF+phshpshphlRcstshlhsll.........phhhpsslhphpp .....................................................................................................................................................................................t...hhhK..s..t.-..DhR.....pD.hhhQhh.........................................................phhp.pl..h....t.....p.p..s.h.s.hp............................htsY..t...llsh...us...t............G.......hlp........................................hls.ss.....sl..t........t....l............p..p..h.....h............................................................................................................................................................ssh.hph..h...h..p...p.....s..ss.............pt..a..h..p..............................................................................................................................................................t.pp..p..a...spS.........h....Ashsl.............lsYl.....Ls....ls...D.....R....H.ss..Nlhl.......................................................................................................ptsGc....l....hH....ID.................................................FGh.h..hs..............t...........t.....-ps.PF+LTp........................phhpsh.......................t.st......th..a..........p............p.......h..........s................p..........sh.......h...hR..p...p...t..p..h..lhsl.h.................phhh.ssh.t...t......................................................................... 0 1017 1545 2244 +716 PF00792 PI3K_C2 Phosphoinositide 3-kinase C2 SMART, Griffiths-Jones SR anon Alignment kindly provided by SMART Domain Phosphoinositide 3-kinase region postulated to contain a C2 domain. Outlier of Pfam:PF00168 family. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.78 0.71 -4.41 24 929 2012-10-10 12:23:49 2003-04-07 12:59:11 19 41 276 79 559 820 7 139.70 21 12.87 CHANGED sp.plhlpssla...tssc.Ls.....sl.poshsshs..........phsWs.chlsas.lphpsLPtsAhLslsla.....shptsppt....................lGhssl.LFsp.+shL+p.G.ppLpl.Wsstpsst.h.h.......................thppGphpp...sphlDthsh ..............................................................s..clhVpstla..pss.c..Ls........sl...pTphhshs.......................tphtWs..c.....h..lpa.s..lplscLP+pupLslsl.a...........s.l..stspst.......................................................hsluh......sslsLFs........h...cshL+p.G.phtLpl.Wssstsptt..........................................................................t.................................................................................................... 0 172 248 399 +717 PF00640 PID Phosphotyrosine interaction domain (PTB/PID) Bateman A anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.75 0.71 -4.20 25 2909 2012-10-04 00:02:25 2003-04-07 12:59:11 18 102 112 50 1457 2821 2 128.10 21 22.94 CHANGED Fts+YLGsl-Vsps......t..h..sp-Alpplptt.................ht....h..puptpph.pplhLplSscslplhssco.........ppllhsaslcpISahus..Dss.p.ctFua...............htctsspphtCHlFhspc....ApslupulGpAFpltapphLct ......................................................................................a.spaLG.hpl.p...............s.ppulpplptt.............................................st.t.p...ph...hpl.....h...L...p...l...o...h.p...s...lp...l..l...s..sps.......................pp..l.l...h..p..p..sl.p.p..I.Sa.h.ut........D...............p..p...............chFua..............................................................hs.p...c...t....s...s....p...p....hhCH..lFp..spp.............Aptl..s.s.ltpAF..p.hthp.h...t............................................. 0 345 463 898 +718 PF04987 PigN Phosphatidylinositolglycan class N (PIG-N) Moxon SJ anon Pfam-B_5307 (release 7.6) Family Phosphatidylinositolglycan class N (PIG-N) is a mammalian homologue of the yeast protein MCD4P and is expressed in the endoplasmic reticulum [1]. PIG-N is essential for glycosylphosphatidylinositol anchor synthesis. Glycosylphosphatidylinositol (GPI)-anchored proteins are cell surface-localised proteins that serve many important cellular functions [2]. 20.80 20.80 20.90 20.90 20.20 20.70 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.54 0.70 -5.83 39 370 2009-01-15 18:05:59 2003-04-07 12:59:11 9 19 235 0 259 348 1 374.50 26 46.08 CHANGED cGLcYLQTYsWhaLpsllThGalGWIsashstllchash.......ptp.......p.....................pohhshhhFss.lhlsLhhlhhhQ+SPhpYYhYhhFPlhFWpp............lhscppsltsuhp.hhtt.p......hhphlhthlhh.lulhEsl..VhuaFcRplholhahhhuh.as..hhhshthht..ppthhshhWhlsslhhosFoLL....sss+hcshphI.......hhuuhLhhlsuhhhlh......................tsphoppl......hshQlhhlllshhsTptoshSLps..+pGLPhhsQlsuWhhLlsSll.lPhh....ss.schhtRLhlIFLshuPsFllLoISYEuLFYhsFshpLhtWlplEptlhh.stt.t.................................................h+slslsDhRlulhahhhlpsAFFuTGNlASlS............SFsL-SVhRllPlFsPFs.GALLl.lKLlIPahllossLGllscpLplsspslFhlllslSDlMoLpFFalV+scGS ..................................................................pGLpYhpsYshhhLhshlshualGWhhhhhhhllc.hs...............p..........p.....................................tthhhhhhhhs.hhhhlhhhhhhppsPhtaahYhhhPlh.....h.W.t.........................lhtp.ht....hh.tt....h..h......h.t...............................h.phhh.hhhh.hhh..hphl.....lhuaa.R..hhohhhhhhuh.hs...hhsh..h......tthh.............hhWhh.shhhusFsh....h....ss.h..sh.....h.l.............hhuuhlhhh.hshhhh.................................................h.thphhl.............hhhphhhlhh..shh...........sh.thpt...p.tslPhh.plhuW..hhlh.uhh...hPhh.............................s.pthhtRLh..lhhshhssallLohuYEu..LFhhhhshhhhhWlp.........lEpthh..pt.................................................................pthhhpshRhuhhhh......hhh.suFFuTGNlASls............SFshsolhphlslF.sPFhhuuLlh.hKlhlPahll....sshthl...........shh.hphs...............tsl..................a..........hhlh.......hhoDhhsLpFFahV+spGS.................................................. 1 74 129 213 +719 PF01850 PIN PIN domain Bateman A anon [1] Domain \N 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.43 0.71 -3.78 170 8807 2012-10-03 20:43:45 2003-04-07 12:59:11 16 13 2111 52 2682 7029 1235 121.10 16 80.11 CHANGED hhlDTslllthh............pscsttthhtthhpt........tplhhsslsh..hElhhshpp..............................tttphhphhhh.htthplls...hstphhtpstclhtpht...........lshhDshhhAtA.......httsht.....ll.T.tDpc.......htchsth ..........................................................................llDTsll.lthh...................psp...s.t.t.t.t...h....tt.h.lpt..................tplhlssh..sh...hE.lh.hshpp.............................................t.ttp.h.hthhhh.....h....t..t....h..p....l.ls..........h.s.t.p....h.h....t..t....s.t.p...l...h.tpht...............................hshhDshlh.AhA.........ht...tshs...........ll.T.tD.p.c....htt....h........................................................................................................................... 0 858 1797 2312 +720 PF04696 Pinin_SDK_memA pinin_SDK_memA; pinin/SDK/memA/ protein conserved region Waterfield DI, Finn RD anon Pfam-B_4141 (release 7.5) Family Members of this family have very varied localisations within the eukaryotic cell. pinin is known to localise at the desmosomes and is implicated in anchoring intermediate filaments to the desmosomal plaque [1]. SDK2/3 is a dynamically localised nuclear protein thought to be involved in modulation of alternative pre-mRNA splicing [2]. memA is a tumour marker preferentially expressed in human melanoma cell lines. A common feature of the members of this family is that they may all participate in regulating protein-protein interactions [3]. 25.00 25.00 25.70 25.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.86 0.71 -4.55 11 266 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 204 0 185 268 0 123.00 32 28.27 CHANGED pDpcp+.tRNRRhFGsLLGTLQKFpQEpshh..spppc+Rs-IEpKl......EcQtctERcplccc+cpLap-R+ccQhElRtLEpKhtchc..cpWppp.thltpa.I+T.KTcP+laahPtphsspppKhLcEs.pc-sp ...........spct+.pRN+R.....hFG.t.L.h.G.TLp+Fpp..E..p..p.h.............op....ppc..RR....pEIEp+h......-t...ptcp.E....c.cp.l...c....p....c++....p..ht....-.+R....t.c....ph.plR..th.-p..ch...t.....t..p....h.h...c....php.pp.tthtpa.l........+.....T..+scP.l.........aYhPhchs.tspphlppp..pp.................................. 0 68 102 150 +721 PF01504 PIP5K Phosphatidylinositol-4-phosphate 5-Kinase Bashton M, Bateman A anon Pfam-B_571 (release 4.0) Family This family contains a region from the common kinase core found in the type I phosphatidylinositol-4-phosphate 5-kinase (PIP5K) family as described in [1]. The family consists of various type I, II and III PIP5K enzymes. PIP5K catalyses the formation of phosphoinositol-4,5-bisphosphate via the phosphorylation of phosphatidylinositol-4-phosphate a precursor in the phosphinositide signaling pathway. 20.20 20.20 20.40 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.86 0.70 -5.27 117 1752 2009-01-15 18:05:59 2003-04-07 12:59:11 13 60 323 8 1132 1665 41 260.10 28 29.93 CHANGED hSL......ssp.....h.phsosGK..........SGS.hFahopDp+FlIKTl.p+...pEhc......................hhhc..hLspYap.alppssp.........................TLLs+h....aGlapl.........................p.....ss...pcht...........................hlVMpNlF....st..hplc.......cpaD........LKG.Sphs...........................Rpsp.............................t....p........tt.............sLK......Dhshhp.......phlhlstpp+phlhcplppDspFLpphplMDYSLLlGl....................+........................................................................................................................................thh...t..........................ptlhalGIIDhLppYs..hpK+lEphhKsh.hhs.t.....pslSslsPppYucRFh.cFlpphh .......................................................................................................................h...sssG+SGu.hFaho.t....Dc+alIKol....p+.....pEhp..............................hhhc..hLs.tYap...a....l.pp.s..p.......................................TLLs+hhGlapl.............................t.h..su...pphp..................................................................h...l.VMpNlF............s........hph.p............................ppaD........L..K.G.Sphs...................................................Rpsp..............p.....pt.ptpt................................................tsshK..D.shhp...................phlhls.t.p.tp.phlh.cplpp.Dsp.....aLp........ph.plMDYSLLlGl...........c..................................................................................................................................................................................................................................................................................................................................................................................s......................pc.hha.hG.II..D..hLpp.Ys............h.pK+lEphh.K...sh.hhs..s........pphS.s.lsPp.YtcRFh.c.hhpph....................................................................................................................................... 2 374 629 900 +722 PF02171 Piwi Piwi domain Bateman A, Hammonds G anon Bateman A Family This domain is found in the protein Piwi and its relatives. The function of this domain is the dsRNA guided hydrolysis of ssRNA. Determination of the crystal structure of Argonaute reveals that PIWI is an RNase H domain, and identifies Argonaute as Slicer, the enzyme that cleaves mRNA in the RNAi RISC complex [2]. In addition, Mg+2 dependence and production of 3'-OH and 5' phosphate products are shared characteristics of RNaseH and RISC. The PIWI domain core has a tertiary structure belonging to the RNase H family of enzymes. RNase H fold proteins all have a five-stranded mixed beta-sheet surrounded by helices. By analogy to RNase H enzymes which cleave single-stranded RNA guided by the DNA strand in an RNA/DNA hybrid, the PIWI domain can be inferred to cleave single-stranded RNA, for example mRNA, guided by double stranded siRNA. 19.90 19.90 19.90 20.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.82 0.70 -5.45 18 2067 2012-10-03 01:22:09 2003-04-07 12:59:11 12 30 421 62 1282 2033 12 263.30 32 35.31 CHANGED hllsllsc.pspsh.YtslKKhhps-hsl.oQslphcshhph..........tphhsNlhlchNsKlGGhN..hhlsshsh.......pshh.........llGhDlsHsssup..........tssoluuhluohs.ppsspahussphpppsp-hlss.lpp............hltchLpsapcsspp.pPp+IllYRDGsuEGph.plhp.Elspl+cshc.......plspsap..PplshIllpKpppsRhFspstsp........................tt.Ns.PGTlV...DstlspPpth-FaLsupsshpGTs+Ps+YsVlhD-hthss-cl.QpL......oYpLsahahpsh....pslulPsPlaYAcplAttstpphc ......................................................................................................................................................h.hhhll..st...pt....h..Yst.lK+h..s.p...h....h..sh..........oQsl...p..p.h.p....................ph...hss.l.sh.......plNsK..........l..........GGh..........hhh..p.........................tssh..................................hl...GhDVsHssssp................................................ts..Slsu.......h...Vu.Shs.......p....s.....c....a......h.....u...p.sp...h.........Q...........p......t......p.....p.......E.........h.l......ps...ltt...................................................h.hpph.L.h...ta........h............p........t...........s....t........t..................h........Pp...+I...lhY....R...............DG.Vo-GQh.t.pl.h..p..El....t.lc...pu.ht........................p.h...t...........s..hp............Ptlohl.lVp.K..R..H..poR.......h.......F....s...t..stpp.........................................t.t.Nh..s......GT.l..V.......................Dp.t..l..s......p..P..p.......t......a....D..F.a.L.s......S..+..t..........u..h................p......G..T...u.+.Ps+Y..pV..l...h...D....-..........s.............p..........h..s...........s...D.....pl....Qp....L..................Tap.L.Caha.h.pss.................p.slsls..sPshYAchhuhhst...................................................................................................... 0 440 645 1059 +723 PF00801 PKD PKD domain Bateman A anon [1] Domain This domain was first identified in the Polycystic kidney disease protein PKD1. This domain has been predicted to contain an Ig-like fold [1]. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.26 0.72 -4.23 93 5437 2012-10-03 16:25:20 2003-04-07 12:59:11 15 708 991 15 2785 5152 1955 70.40 22 13.50 CHANGED hsss.........shhstslpFssps....s................Gssssap....WsFGD.......................stsssptsssHsYsp......................sGsYsVsLssssssussss .........................................................s...........h..s.slpFssps.......s..........................us.s..s.s....ap.............WsFGD........................sssu.s.t..t...s...s...s..H.s...Y.sp.....................................................................sG...s.Ys.VsLol.ssshut...t......................... 0 1060 1676 2066 +724 PF01477 PLAT PLAT/LH2 domain Bateman A anon Bateman A Domain This domain is found in a variety of membrane or lipid associated proteins. It is called the PLAT (Polycystin-1, Lipoxygenase, Alpha-Toxin) domain or LH2 (Lipoxygenase homology) domain. The known structure of pancreatic lipase shows this domain binds to procolipase Pfam:PF01114, which mediates membrane association. So it appears possible that this domain mediates membrane attachment via other protein binding partners.\ The structure of this domain is known for many members of the family and is composed of a beta sandwich. 20.80 20.80 20.80 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.52 0.71 -3.70 84 3359 2012-10-02 11:40:54 2003-04-07 12:59:11 18 189 280 91 1790 3171 14 109.70 19 17.37 CHANGED hplplh....utphtussst...h.h.lhs.spGcssphthhps.h............ssshphphch..t....slG.lttltlppt......................hps.paalppl.hltt.....hsst.sph.pFss......pp.Wltss.........phhhh .........................................Yplhlh.T...ut...t.u.u.Tssp.......lhltlhG...pp..G...c...o..t...p.h.....l.h.psttt...................................tsstppF.p...l..ps...t........slG..p.l.t.plp.lt+c.......................st.shps..sW........alc..pl..hlps...................s.t....p.p..h...hFss.......pp.Wlstpp............................................................................ 0 578 838 1202 +725 PF00614 PLDc Phospholipase D Active site motif Ponting C, Schultz J, Bork P anon Alignment kindly provided by SMART Family Phosphatidylcholine-hydrolysing phospholipase D (PLD) isoforms are activated by ADP-ribosylation factors (ARFs). PLD produces phosphatidic acid from phosphatidylcholine, which may be essential for the formation of certain types of transport vesicles or may be constitutive vesicular transport to signal transduction pathways. PC-hydrolysing PLD is a homologue of cardiolipin synthase, phosphatidylserine synthase, bacterial PLDs, and viral proteins. Each of these appears to possess a domain duplication which is apparent by the presence of two motifs containing well-conserved histidine, lysine, and/or asparagine residues which may contribute to the active site. aspartic acid. An E. coli endonuclease (nuc) and similar proteins appear to be PLD homologues but possess only one of these motifs. The profile contained here represents only the putative active site regions, since an accurate multiple alignment of the repeat units has not been achieved. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.70 0.72 -6.96 0.72 -4.02 48 1567 2012-10-02 13:01:53 2003-04-07 12:59:11 17 33 652 3 833 6708 176 29.50 40 4.30 CHANGED hshhhHpKhlllDcc............hsalGuushsssp ........hhsHHpKhlllDsp.................luFlGGlDLs.tp...... 0 219 475 677 +726 PF01690 PLRV_ORF5 Potato leaf roll virus readthrough protein Bashton M, Bateman A anon Pfam-B_1335 (release 4.1) Family This family consists mainly of the potato leaf roll virus readthrough protein. This is generated via a readthrough of open reading frame 3 a coat protein allowing transcription of open reading frame 5 to give an extended coat protein with a large c-terminal addition or read through domain [1]. The readthrough protein is thought to play a role in the circulative aphid transmission of potato leaf roll virus [1]. Also in the family is open reading frame 6 from beet western yellows virus and potato leaf roll virus both luteovirus and an unknown protein from cucurbit aphid-borne yellows virus a closterovirus. 27.40 27.40 27.70 27.70 27.30 27.30 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.73 0.70 -5.34 14 317 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 35 0 0 332 1 377.50 41 71.26 CHANGED VDup.PsPsPuPsPtP...PsPoPEPsPs.p+cRFhs.YsGsPpshIpsRpsoDuIsltslssQphpYIEDEshshpslsutWhosNslpA.PhFlaPVPcGpaSVpISCEGaQussshusspcGphpGhIAYsssss.csWNVGshssssITNh+usNoa+.GHPDLclNuC+FspsQlVE+DuhlSFHlpsssp-usFaLsAPslpKouKYNYsVSYGsaT-+cMEFGhISVslDEp..-utpsupcs+pshRsGHh.hhspshc...sh.P.ssp............tpps.pTPssshspst..........................p....s.pcslspsp..ss.s.............pp.shth.thph.sstlsssspphh.sptt..................................ssc.lcs.sss.hs.shspssphhtsh.t...h..s.s.h.......ts.sPs.................slhsspp.pu............ShtuppLpGssh+tpst..pspshp.phosppptpYpRl+pohGhTsAcpahppht .......................st.s.sPpPsPsPp....PtPsPpPsP....pRFht.Y.GsPpshIpsRpNo-tIsltplts.sMhhhEDEshshpplsutahsNNphpA.shhlaPVscGpa.SV.IpCEGaQuspphusspcGph.GhIAYsssp..csWslGsYsGssIoNhhsssoa+.GH.D...hclNuspFsptQlVERDhshSFHlcss.ppupFhLhAPslpKos+YNasVSYGsaT-+hMEaG.ISVslDEp..tu.p.....sphscps.h+ss+h..hsps.p.......s...p...............................................t.ts.ppP.t.........................................................................ph..............................p.s.h............tst.....l......ts....h.tt........................................ssp.hc....t.hs...s.p...hss.............s...h..........s.ssp...........................ph..ttt....t.........................................................s..st.lpusp..tt....p.phh..phoppphtpYpplpps.s.ssAptah.t..t....................................... 0 0 0 0 +727 PF03126 Plus-3 Plus-3 domain Bateman A anon Bateman A Domain This domain is about 90 residues in length and is often found associated with the Pfam:PF02213 domain. The function of this domain is uncertain. It is possible that this domain is involved in DNA binding as it has three conserved positively charged residues, hence this domain has been named the plus-3 domain. It is found in yeast Rtf1 which may be a transcription elongation factor [1]. 21.20 21.20 21.20 21.60 21.10 20.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.23 0.72 -4.00 39 464 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 274 4 319 438 6 106.20 28 16.55 CHANGED c-lpplplsRohltchhhpP.tF-pslhGCFlRlslGsscpptt.....YRlspIpulppst...sYph.......tphtTshhlplppup..pp+.aphshlSspshscpEapcah.pphtps .....................plpclpLsRspltchhthP..hFc.csltGCFVR.lslGs..pppp............YRlspItslscss...sYpl...................sshpTsphLpl......ppGp......sp+.aphphlSs.ptFo-sEapcahpsh...h................................... 0 102 178 263 +728 PF04043 PMEI Plant invertase/pectin methylesterase inhibitor Yeats C anon Yeats C, McQueen A Domain This domain inhibits pectin methylesterases (PMEs) and invertases through formation of a non-covalent 1:1 complex [1]. It has been implicated in the regulation of fruit development, carbohydrate metabolism and cell wall extension (see [2]). It may also be involved in inhibiting microbial pathogen PMEs. It has been observed that it is often expressed as a large inactive preprotein [1]. It is also found at the N-termini of PMEs predicted from DNA sequences (personal obs:C Yeats), suggesting that both PMEs and their inhibitor are expressed as a single polyprotein and subsequently processed. It has two disulphide bridges and is mainly alpha-helical [2]. 26.70 26.70 26.70 26.70 26.60 26.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.24 0.71 -4.31 142 1441 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 79 25 905 1470 0 147.00 18 43.47 CHANGED sss....tp.hlp...shCpp..Ts.......st.hChpsLss.......tttsss.p...s....lhphsl.phshspsppshshl.p....plhtp....................spstpt..ps......slp....sChchhssul.c.pLp.puhpsl.............................pt...sc..........spsh.lSuAh....sstpoCt..-uh.................pts..h.......p.h...pt......pslpcl.......sssuLul ..............................................................s....t.lpthCpt.....o...............st.hChpsLts............tstsss.p....p............lhphul..phshs..psppshshh.p......plhpt....................sts.pt....ps....ulp.......sC..h.c.h.hst...ul..s.pLp...puhp.sl.................................pt...ss..........spsh.lS.u..A.l....sstpTCh.....-ua...................tpsthp.....s.lt.....tt......pphtphsssuLuh.................................................................... 0 137 531 706 +729 PF04721 DUF750 PNGase; Domain of unknown function (DUF750) Waterfield DI, Finn RD, Pollington J anon Pfam-B_4045 (release 7.5) Family This family of proteins with unknown function shows similarity to PNG-1, a enzyme responsible for de-N-glycosylation of misfolded glycoproteins in the cytosol [1]. However, unlike PNG-1, this protein does not contain a catalytic triad in its transglutaminase domain [1]. 21.30 21.30 21.30 22.10 21.00 21.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.14 0.72 -3.98 23 98 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 42 4 71 108 0 62.70 37 13.79 CHANGED pNYlKFTYDllssoYS+sscDGSslp.PahlpNlcRl.-........pp.stsYlcp.pstc.u.........................ItWpFs ......h.......YsllcDpY.....s+sss.....s.....spslp.....sac.....hcsIhRKVE........pDWphVYLsR.cEuss.uh........................ISWKF-...................... 0 42 46 55 +730 PF03726 PNPase Polyribonucleotide nucleotidyltransferase, RNA binding domain Griffiths-Jones SR anon Bateman A Domain This family contains the RNA binding domain of Polyribonucleotide nucleotidyltransferase (PNPase) PNPase is involved in mRNA degradation in a 3'-5' direction. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.72 0.72 -3.71 21 4259 2009-01-15 18:05:59 2003-04-07 12:59:11 9 35 4196 34 982 3077 2312 81.70 30 11.36 CHANGED sLhpcltslApsclupAapI..ssKp-RhsplDpIKspVhtthh...tppt......phsppclpslapsLcpclVRppIlsspsRIDGRc ............................................L.splpshs.p.s.c.l..pc.Ah..pl.......p-KppRpstlc.tlK....pclhs....phh.............t...p..p..p..................................................ph..s..t..p.-...ls...p...h.hc...plc+clVRptIlssch......RlDGRc..... 0 334 637 830 +731 PF01357 Pollen_allerg_1 Pollen_allergen; Pollen allergen Bateman A anon SCOP Domain This family contains allergens lol PI, PII and PIII from Lolium perenne. 20.70 20.70 20.90 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.82 0.72 -4.18 136 1558 2009-01-15 18:05:59 2003-04-07 12:59:11 16 11 171 13 632 1490 0 77.50 40 32.72 CHANGED lpFplsut...saa.hVLlpsluGsG-lssVplK.......................G............o....s....WhsMs+sWGssWp..hss...th.hst.sLSFRl..Tot-..Gpslsu.psVlP ..................l+FslsGt....s.aa.hVLlpNVuG.sGDltuVslKt..............................................st............o......s.Wh.s.Ms.R..N...........W...G.....t..s..........Wpssu...hh...st.....sLSF+l..Ts.s-...G..+..slsu...sVhP.............................. 0 78 341 494 +732 PF01522 Polysacc_deac_1 Polysac_deacet;Polysacc_deacet; Polysaccharide deacetylase Bashton M, Bateman A anon Pfam-B_502 (release 4.0) Family This domain is found in polysaccharide deacetylase. This family of polysaccharide deacetylases includes NodB (nodulation protein B from Rhizobium) which is a chitooligosaccharide deacetylase [1]. It also includes chitin deacetylase from yeast [2], and endoxylanases which hydrolyses glucosidic bonds in xylan [3]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.34 0.71 -4.49 104 13288 2012-10-03 16:37:10 2003-04-07 12:59:11 16 176 4258 46 3692 10795 3031 133.50 20 38.42 CHANGED ttsssp+slhLTFDDGs................................sshssplLslLpchplpATa.FllGp.h...sppt...............................................................................sphl+chhppG.aplusHo...................hsHs.......ph..........sshssp.phpp-lppspphlpph..sGp.psph..hRsPaGt......hssss....hphspphGhphs ..........................................................................................................t....tp.lhlT..F.D.D.G.................................ts.h.s..s...p.l.l.s.....l.......L.......c.......c.......h.........s........h......p.......A..T..a..Fl..hup..h..........hppt.............................................................................................................s.p...h.l...+..c.h.h..p......p..G...ac......l..u.s...Ho.........................................................................a..s..H.................ph............................................................sp.h.stp..ph....p.p....-.l...t....c...s....p......p....h...lp...ph................h...G...p....p.....s.ph................hp.......h...P..h..Gt..........hs.p..p..s.......h.p.hh.tt.th...h.......................................................................................................................... 0 1356 2490 3163 +733 PF04831 Popeye Popeye protein conserved region Mifsud W anon Pfam-B_3905 (release 7.6) Family The function of Popeye proteins is not well understood. They are predominantly expressed in cardiac and skeletal muscle. This family represents a conserved region which includes three potential transmembrane domains [1]. 28.10 28.10 28.10 28.70 26.40 27.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.83 0.71 -4.47 10 249 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 76 0 142 229 2 145.50 45 44.99 CHANGED EhcuLYpslapPLpVPlclF+cIstshtscVpoLpp-psYAlEGKTPI.DRLSlLLSGRl+VSh-GpFLHYIaPaQFLDSPEW-SLRPSEEGtFQVTLTA-s-C+YloWpR+RLYLLLsp-RYIuRLFSlLLGpDIu-KLYuLND..KlauKpGhRh ..............................Ehp.lYpslFpPLpVshphF+clstsh.pplhoLcptpsYAhEscTsl.D.+LSlLLSG..........+.........l+VS.c.GpFLHhI.aPhQFlDSPEWcShp......o..c-.shFQVTlsA-ssC+alsWpRc+LphhLtp-taltclFshLlGpDIscKLYuLN-..phhsphth..h................... 0 36 47 88 +734 PF01558 POR Pyruvate ferredoxin/flavodoxin oxidoreductase Bashton M, Bateman A anon Pfam-B_350 (release 4.0) Family This family includes a region of the large protein pyruvate-flavodoxin oxidoreductase and the whole pyruvate ferredoxin oxidoreductase gamma subunit protein. It is not known whether the gamma subunit has a catalytic or regulatory role. Pyruvate oxidoreductase (POR) catalyses the final step in the fermentation of carbohydrates in anaerobic microorganisms [1]. This involves the oxidative decarboxylation of pyruvate with the participation of thiamine followed by the transfer of an acetyl moiety to coenzyme A for the synthesis of acetyl-CoA [1]. The family also includes pyruvate flavodoxin oxidoreductase as encoded by the nifJ gene in cyanobacterium which is required for growth on molecular nitrogen when iron is limited [2]. 20.70 20.70 20.90 21.30 20.60 20.30 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.99 0.71 -4.36 119 5899 2009-01-15 18:05:59 2003-04-07 12:59:11 13 69 3035 27 1854 4841 1225 177.10 23 24.80 CHANGED G..GpGlhosuplLupus.hppG...ht.lhshppausphR......GGss.hsplR.lus.c..h......sth..t.psDhlluh................ct.psh..t..phhst..lpss......Ghllh..ssshhsst.ht.th.........................................slsh...pclAt.........chhs.......shhhNhlhlGsh.....sthhs..lsh..-s.....lppslcpphs.tt......................thlctNhcAhctGhp ..............................GGpGslsuuchhsphh..tp.ps...................hh...spsh.sas..ucp+......G.G.so.huclR..huc..p.hh...................sshhh.s..psDhllsh..................s..thl.p.......phhpt...l+pG........Ghhll....No....hh...ss...p..th.tpth..................................................................phhhlsu.splAp.............................-h.sh...............stthNhlhhuuh........h..plss...lsh..........-s...........hhctlcpp.....hs.pp.......................pll-hNhp.AlctGh............................................................ 0 715 1316 1619 +735 PF01855 POR_N Pyruvate flavodoxin/ferredoxin oxidoreductase, thiamine diP-bdg Bateman A, Griffiths-Jones SR anon Pfam-B_323 (release 4.2) Family This family includes the N terminal structural domain of the pyruvate ferredoxin oxidoreductase. This domain binds thiamine diphosphate, and along with domains II and IV, is involved in inter subunit contacts [3]. The family also includes pyruvate flavodoxin oxidoreductase as encoded by the nifJ gene in cyanobacterium which is required for growth on molecular nitrogen when iron is limited [2]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.34 0.70 -4.85 32 5386 2012-10-02 16:07:47 2003-04-07 12:59:11 14 71 2734 21 1633 4863 884 224.70 28 29.55 CHANGED puuhtss.cshuuYPITPuos..hsEthsphhspuph..........phlphEuEhuAhuslhGAuhuGu+shTuTSu.GlhLMhEslhhhuupchPhVlhlssRuhsusulslhs-psDhhtsR......-sshhhLsssslQEuaDhslhAatlA.c..splPshhhhDGahsoHphpplpl.s.-.....hhcp.h.shcchp.........hhsP.cpPlstsstts.sshhptcctst.shptstthhpphhc ...........................................h.suhtss.c.h.h.uhYPITPSSp............hsEhhs..p.h....t.spstt.......................phlp..h.p.s..E.h.uAhuss..h.G.As..h...u.G.uh..s..h.T.s.Tu.u.GL...L.Mh-..s.l.h.h..h...u...s.p..tl..P..h.Vlhs..ssRus.....supuL.sh.hs-...p.uDl.h.ts.p.......ps.s.h.s.hLss..uos.QEsh.Dhsh.hAaphu.c.......hpsPhlhhhD.......G.....h..h..s.......H...t....h....p....p.....l...p..h.......c..c..........h...........t.....h....h.....t.p.........h....h.t.t....p...p.......................thss...pt...s.........h..h.s.s...............s.h.h.h.p..tptth......p.......................................................................................................... 0 680 1192 1447 +736 PF04151 PPC Bacterial pre-peptidase C-terminal domain Yeats C anon Yeats C Domain This domain is normally found at the C-terminus of secreted bacterial peptidases. They are not present in the active peptidase. It is possible that they fulfill a similar role to the PKD (Pfam:PF00801) domain, which also are found in this context. Visual analysis suggests that PKD and PPC are distantly related (personal obs:Bateman A, Yeats C). 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.72 0.72 -3.01 151 2040 2012-10-03 16:25:20 2003-04-07 12:59:11 10 196 870 21 476 1650 202 70.40 24 12.12 CHANGED spchaphsls..suss.ls..ls...l.......suss............................ss-..Lh..lhtss......s.....shss......hssts..............tss.....................ssppss..hss.....spsGs.Y.altltu .......................................t..phashpls....sups..ls...ls.....l............ssts............................ssD.........la...lhtss.........s....................shss.......hssps.........hpsu.................................ssppss...hss.....stsG.p..Y.altV..h.......................................... 0 158 302 417 +737 PF01577 Peptidase_S30 Poty_P1; Potyvirus P1 protease Bashton M, Bateman A anon Pfam-B_364 (release 4.1) Family The potyviridae family positive stand RNA viruses with genome encoding a polyprotein. members include zucchini yellow mosaic virus, and turnip mosaic viruses which cause considerable losses of crops worldwide. This family consists of a C terminus region from various plant potyvirus P1 proteins (found at the N terminus of the polyprotein). The C terminus of P1 is a serine-type protease responsible for autocatalytic cleavage between P1 and the helper component protease Pfam:PF00851 [1,2]. The entire P1 protein may be involved in virus-host interactions [1]. 19.70 19.70 20.00 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.39 0.70 -4.98 26 1446 2012-10-02 13:45:52 2003-04-07 12:59:11 11 18 127 0 1 1522 0 212.30 29 12.60 CHANGED tpppsphttchhppp.....huphchpcpuphhh+thstptltcpppptpppccpppp.....hphthsshssshshttttthscstssphppt.hhppospch+pptspcphttssp.lstlhcplhpIspccshsVElIuK++..tspspa++hpushhhplpltHhpGphp+hDlshsphtpplhthhutpttppppspshph..GsSGllhpspphhs.hthsptshFlVRG+psGp....Ll-ARs+lspt.hpplcH ..............................................................................t...t.p.pthhhc.c...........uhhh.tpp.p.tslhhp...h.tspp...l.+.ppcthcp+ctcEtp.......h.h.thp.s..h.stho.h.s.ss..sss.t..hp....s.ph.ppt.......hppo....ph.......+c.p...h.s.h..p.p.s.+.....h.spt..hp....hlh+plhpIhp..tp.s.ts.lEh.Iu++p....hclpahc.h.t..t.s.phstlpssHh...G.h.....+.....tp.....h...D..h.....hs......p.t...hhlphhuth..st.p....p..hpshslp.G.SGlllpst.l...ht...hs..+s........p.p.........hFllRGc..c.c.G+....LhDARt+lop...pcl.p...................... 0 0 0 1 +738 PF03291 Pox_MCEL mRNA capping enzyme Mifsud W anon Pfam-B_4078 (release 6.5) & Pfam-B_3482 (Release 7.5) Family This family of enzymes are related to Pfam:PF03919. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.17 0.70 -5.57 11 647 2012-10-10 17:06:42 2003-04-07 12:59:11 11 21 389 17 358 751 441 284.20 25 61.34 CHANGED ch..ppspshsshVssH...........Ysch............pcss..hchRppSsIhp.............................................................................................................................................................................L+pFNNaIKolLIuhasp+s..c....sshpVL-lsCGKGGDL.KathutIuthlusDIucsSlppspcRY...pphppt.cpchh+hsa.schlsuDsahsplpEhahss......hpF..DllShQFAhHYSFcoccpAchhLpNlschLssGGhhIuThPDushl......++Lpt..t.stpcphGNslYploF-cp..........s.aGphYsapLcsslpssPEYlV.FssLhclsccYshpLl.phsFp-hhcp.h.p.cp+hhlpphsul-spsupp........................p...tshpusEhEAsh.hYllasFcKp .................................................................................................................................................................................................................................................................................................................................................................................................................................................t............l.ta...........Ysth....................t...........+.tts.l............................................................................................................................................................................ht.hsN.a...l.KohLIp....a..................h..........p.............................................................................tt..............tV.LDlsCG..+G...u...DL.K.a.....h.....u..t...............l..th...............hl.shD...........ut.ul....p.p....spp....RY.................................t............................p.........p.............................a.......t....p...h.h.ht..D..............sh............p.....ltph..h..............................F.....cllsh.............F.shHYuFcspppu..........p.hhpNhs.thLp.GGhhluo..hssu.p.l...................................tpl....pt....................................t.........t.......ph...s......s....ph....h.tl..p..att..................................................hG.h.....ha...a....h........p.p...h.s......................t...........hsEalV.h..h..h.t.lhpcashp..Lh.....p.......sF.p.hhp.............................ht...................................................................................................................................................................................... 1 117 205 299 +739 PF00481 PP2C Protein phosphatase 2C Bateman A, Finn RD anon Prosite Family Protein phosphatase 2C is a Mn++ or Mg++ dependent protein serine/threonine phosphatase. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.72 0.70 -4.99 31 7190 2012-10-03 01:39:20 2003-04-07 12:59:11 16 293 1352 43 4244 8910 707 218.80 20 49.91 CHANGED hshshsphpuh+pphpDsphthtsh.t.......ssp.shshhslaDGpuGppsuchsspplt.....phlppppsh.ps.................thppsltpuahp..tc.thtp.....t..........s.puGsoAssshlpspc........LalANlGDS+slLspssssh...........tLopcHcPssts.EppRIppsGGhlp......sRlsGs.....................LulSRAlGDhphKs..............................hVsupP-ls..ptplsts.D-FllLAsDGlWDhlsspcsl-ll+pphst............spcsupcLhchuhshuop-sh ..................................................................................................................................................................................h.........................................................................h....hs.l.h.DG.....h.....s......G...........t.......s.....up.....h.ht....p...h..................t..h....t............t...........tt.................................................htp.h..l...t...p..s...hht........h.s.tthhp.........................................................t..tt.s.G..o..T..s.......s...s...s..l..l.....s.pp...................................lhl...AN..s.GD.........S...R..s......l........L...s.....p....p..st.h..............................................L.o..t.DH.p................s.....t.........p......................t......E.t....t....R...l..p..t.....t..t...u......h.....................+..lt....u..................................................................................................l.sh....o...R......u......h.........G...D.h.....hc.................................................................................................hl.st.P-.lt...............hp.l..........p...........p.....c..............p........a.......lll......us...............DG..l......a.....c..............h........h.......s.s....p......p........h.s..p.h.l..t..p......t...............................................hs.p.lht.s............................................................................................................ 0 1476 2539 3487 +740 PF00823 PPE PPE family Bateman A anon Pfam-B_297 (release 3.0) Family This family named after a PPE motif near to the amino terminus of the domain. The PPE family of proteins all contain an amino-terminal region of about 180 amino acids. The carboxyl terminus of this family are variable, and on the basis of this region fall into at least three groups. The MPTR subgroup has tandem copies of a motif NXGXGNXG. The second subgroup contains a conserved motif at about position 350. The third group are only related in the amino terminal region. The function of these proteins is uncertain but it has been suggested that they may be related to antigenic variation of Mycobacterium tuberculosis [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.65 0.71 -4.40 146 3533 2012-10-01 21:44:22 2003-04-07 12:59:11 14 54 122 2 395 2191 1 151.60 45 28.01 CHANGED slP..PElNSuhhauGPGuushlAAAuAWsuLAuELssuAsshsullusLs...sss.WpGPuusuMsAA.ssPYluWLsssAupAppsAuQAcuAAuAYEsAhAuhVsPshlAANRsthss..LlATNhFGtNosu.......IAssEA.pY.scMWuQDAsAMhuYtusuuuAss ...................................hlPPElNSuhha.uGsGsuPhlAA..AuAWcuLAsELssu.Asuas.u.llusLs..........sts.W.pG.PuusuMsAA.ssP........Ylu.WLsssAupAp.tuAsQAcAuAu..AaEsAhAusVs.P.s....h.lA.AN...R.sthhs...L..lu..o.N....h....h..GQNosA.........IAs.sEA...pY...tcMWAQ....DssAMhuYtuuuuuAs.t.................................................. 0 97 190 347 +741 PF01535 PPR DUF17; PPR repeat Bateman A, Birney E anon Pfam-B_874 (release 4.0) Family This repeat has no known function. It is about 35 amino acids long and found in up to 18 copies in some proteins. This family appears to be greatly expanded in plants. This repeat occurs in PET309 Swiss:P32522 that may be involved in RNA stabilisation [1]. This domain occurs in crp1 that is involved in RNA processing [2]. This repeat is associated with a predicted plant protein Swiss:O49549 that has a domain organisation similar to the human BRCA1 protein. The repeat has been called PPR [3]. 25.00 9.30 25.00 9.30 24.90 9.20 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.38 0.73 -7.57 0.73 -3.61 518 31410 2012-10-11 20:00:58 2003-04-07 12:59:11 15 3797 468 0 23088 82367 965 29.30 21 15.97 CHANGED hoassllsuascpGphccAhplhpcMpppGh ....................asslls.s..a..u......+......s....G....p.....h.......c.....c.....A..h...p..lacpM.t....................... 0 2628 15898 19987 +742 PF04193 PQ-loop PQ loop repeat TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Repeat Members of this family are all membrane bound proteins possessing a pair of repeats each spanning two transmembrane helices connected by a loop [1]. The PQ motif found on loop 2 is critical for the localisation of cystinosin to lysosomes [2]. However, the PQ motif appears not to be a general lysosome-targeting motif. It is thought likely to possess a more general function. Most probably this involves a glutamine residue [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.69 0.72 -4.42 255 3702 2012-10-03 12:15:12 2003-04-07 12:59:11 9 33 711 0 2429 3643 533 59.40 21 35.09 CHANGED huphlGhlshshhhlshlPQlhpNa+p+Ss.pGlShhhhhhhhhGshhthhhhlhpthsh.h .........................h...hhGhhshhhh.h..h...s.hlP.Q...lhpsa..+.......p..+Ss.pGlShhhhhh..hhGsh..hhhhhhhhtt.....h.................. 1 810 1368 2026 +743 PF05033 Pre-SET Pre-SET motif Bateman A anon Bateman A Motif This protein motif is a zinc binding motif [1]. It contains 9 conserved cysteines that coordinate three zinc ions. It is thought that this region plays a structural role in stabilising SET domains. 21.80 21.80 21.80 21.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.44 0.72 -11.70 0.72 -3.75 86 1039 2009-01-15 18:05:59 2003-04-07 12:59:11 11 69 226 37 626 1020 2 109.60 28 14.91 CHANGED chsput-s....hslslh........NplD............t-ts..P..tsFpYlsphhhsp...............................h....hstphhh.............GC.sC..p.......................ss....C...sspsC.Chpts.....st...h.................s.Y...sp.ptpLt................p...........................................sssIYECsspCpCs..t.sCtNR .........................................................hs.uhEphslshh.........Np....lD......................sp.s........ssFp..Ylsp..hhsp................................sh.....hs.phh.........................uC..sC....p........................ss......C.......ssspC.Csphs..........tt...th.........................................s.Y..sp...pspLh.........................hpt...................................................sshIaECsptCpCs.......sCtNR............................................. 0 144 274 462 +744 PF00156 Pribosyltran Phosphoribosyl transferase domain Bateman A, Sonnhammer ELL, Finn RD anon Bateman A Domain This family includes a range of diverse phosphoribosyl transferase enzymes. This family includes: Adenine phosphoribosyl-transferase EC:2.4.2.7, Swiss:P07672. Hypoxanthine-guanine-xanthine phosphoribosyl-transferase Swiss:P51900. Hypoxanthine phosphoribosyl-transferase EC:2.4.2.8 Swiss:P36766. Ribose-phosphate pyrophosphokinase i EC:2.7.6.1 Swiss:P09329. Amidophosphoribosyltransferase EC:2.4.2.14 Swiss:P00496. Orotate phosphoribosyl-transferase EC:2.4.2.10 Swiss:P11172. Uracil phosphoribosyl-transferase EC:2.4.2.9 Swiss:P25532. Xanthine-guanine phosphoribosyl-transferase EC:2.4.2.22 Swiss:P00501. In Arabidopsis, At the very N-terminus of this domain is the P-Loop NTPase domain [1]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.41 0.71 -4.40 71 30853 2012-10-10 14:25:38 2003-04-07 12:59:11 22 68 5141 299 7483 25977 11727 121.40 18 48.82 CHANGED pslh...hs.ctlpphsct......lupp....lpppths.........llultpuGlshustluptLsh...shhhhhh.h...........................................tthtppsspththhhthst.hcucclllV.DDllsoGtTlptshc.hLc.pts.sp........lthsslh ..............................................................................................................................................................t........t..htthhpt......lupt..............h..h..p..p........h....p....h...............llu.......l...............p.......t.......G.....h.............hu..t.......t...l...u...pt.l.sh...........s.h..h...h..s..h.h.ttp............................................................................................................................ttt..t.ppp.s.....s..t....t..h...h...h...t...h.......p....s.........h....p...G.......c.....c.....V.ll.V..DDl.lsoG.sT.hp.t.h.hc...hlc...ptG...up......hl.tlssh.h........................................ 0 2412 4803 6327 +745 PF00377 Prion prion; Prion/Doppel alpha-helical domain Bateman A, Finn RD anon Prosite Domain The prion protein is thought to be the infectious agent that causes transmissible spongiform encephalopathies, such as scrapie and BSE. It is thought that the prion protein can exist in two different forms: one is the normal cellular protein, and the other is the infectious form which can change the normal prion protein into the infectious form. It has been found that the prion alpha-helical domain is also found in the Doppel protein. 25.00 25.00 36.90 36.70 19.70 19.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.62 0.71 -4.38 7 730 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 240 88 41 750 0 107.50 71 50.15 CHANGED hp+hshcFGs-.tsRYYptN.hpaPstlaYcshsphsVspptFVpsClNhT.stpphp............u..cs.schc.+Vhh+llcEhCstpapcahLt....Ruuulplhss.PhhLhlLshlhFlht .......MSRPLIHFGNDYEDRYYRENMYRYPNQ.VY...YRPV.D.QYS.NQNNFVHDCVNITVKQHTVTT..........TTKGEN...FTETDlKhMERVVEQMCITQYp+EupAa...pRGu.ShlL.FSuPPVlLL.lshlhhll.s..... 0 2 2 9 +746 PF00227 Proteasome proteasome; Proteasome subunit Finn RD, Bateman A, Valas RE anon Prosite Domain The proteasome is a multisubunit structure that degrades proteins. Protein degradation is an essential component of regulation because proteins can become misfolded, damaged, or unnecessary. Proteasomes and their homologues vary greatly in complexity: from HslV (heat shock locus v), which is encoded by 1 gene in bacteria, to the eukaryotic 20S proteasome, which is encoded by more than 14 genes [1]. Recently evidence of two novel groups of bacterial proteasomes was proposed. The first is Anbu, which is sparsely distributed among cyanobacteria and proteobacteria [1]. The second is call beta-proteobacteria proteasome homologue (BPH) [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.99 0.71 -5.02 174 10806 2012-10-03 21:14:07 2003-04-07 12:59:11 21 55 3712 1970 5186 9052 2184 173.50 21 77.89 CHANGED hppGs.TslGlc.sccuVllu...u.-p.+so..ts.hlhsppp..h..cKlhpl..scclhhshuGhsuDsphlhchhct.psp.hac.hp.hs.c.hslp........hushhpthh.t..pt...th.psh...s.s.shlluGhD.p.p.s...t.spLaph..-ssGshhph....hsuhG.suuphuhs.hl-ppa....pts..hoh--u...hclshcult.t..uh..c..pst...hss..ssl..plshl ........................................................................................................pus.Thlu......lp.....t.......p.......s.......t.....lllu.............u..Dp.p.....s.......o.......t..s...hl..........t........p.s..p....h..........cK..l..h..pl....s..............s........p........l....h..........h.u....h..............u............G...........hs.A...D..s...t..sl...............h....c.hh........c..........t........p.......h........p.............a........p........h...........................t......t.....h...s..lp.....................................h.........s.........t...h...h....t.....s.........h.....t..........pp...............ht.....p......s.h..................h.l....phlh...u...Gh....s...........t....s..........................t...s...p.la.h..l.sss..Gshh......p............p............h.......h......A........hG..uGu...phAhu.hh.......c......phh...................t...s........h.........s.......p..c..A..........hc..ls.h.culp.....uh....c....hs.h......hos..p.phpl........................................................................ 0 1761 2940 4236 +747 PF03371 PRP38 PRP38 family Bateman A, Winge P anon Winge P Family Members of this family are related to the pre mRNA splicing factor PRP38 from yeast [1]. Therefore all the members of this family could be involved in splicing. This conserved region could be involved in RNA binding. The putative domain is about 180 amino acids in length. PRP38 is a unique component of the U4/U6.U5 tri-small nuclear ribonucleoprotein (snRNP) particle and is necessary for an essential step late in spliceosome maturation [2]. 22.40 22.40 22.80 23.00 22.10 22.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.81 0.71 -4.65 41 560 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 309 0 382 526 4 168.00 32 51.79 CHANGED tsts..tlpspssshllppllRp+IhsShYaKpphaslss..........pollDchhp.lcalGuhhsust+...................PosFlCLLhKLLplpPsc-Ilhphlppp..................................-aKYlRALuhhYlRLshss...................t-laphhE....PhhpDaRKl+hpstsu...........plhahD-alDpLLsc-chhslhLPRl.tR..hhL-ppstL ..........................................tt......lhGsssphllppll+spIhsS.YaK.phatLss.........cpllDchhp...lcalushh.s.G.sp+...................................PoshhCLlhKhh.plp.s+c.lht.hlppp...............................................-.p..YlRALGhh....Yl.....Rhstss........................h-lapahE....Phhp...D.cclchpstsG..............................plhphschlcpLL.......pcpchhs..hhLPRl.hp..h.hp..t..h................................................ 0 142 214 311 +748 PF01789 PsbP PsbP Bashton M, Bateman A anon Pfam-B_1303 (release 4.2) Family This family consists of the 23 kDa subunit of oxygen evolving system of photosystem II or PsbP from various plants (where it is encoded by the nuclear genome) and Cyanobacteria. The 23 KDa PsbP protein is required for PSII to be fully operational in vivo, it increases the affinity of the water oxidation site for Cl- and provides the conditions required for high affinity binding of Ca2+ [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.09 0.71 -4.82 30 539 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 153 5 301 551 149 160.70 22 65.89 CHANGED hssshsshhshshsustutssuGhpsa.sssDGYpFLYPsGW...pcVpl....sGs-llF+Dll-ssEslSVslossscc...olc-LGoPpc........VGcpLhcphluspGus.....RpAcLlcAspRcss.G+sYYslEatl+hss.....t............sRHpLuolsVscG+LYTlssuosEcRW.K.scchhcpllsSFslh ..................................................................................................hs................................h...h....t....t.....aph..hPt.t.W.............................phth........G....................hh..acs..............t.....t.p.s......ls.V..hlss....s........spp...........sl.p..ch.Gs..scp...........lu...ppl...h..t..t.hhs.s.tt.....................tpupllps..pp......pp...h....s..G.....c.tY..Yp.hEhtsp.tss........t......................t+Htlssss..l....s..s.G.....+.......LYhhss...p...s...s.-...p...p...W...t....c...hcphlcpshsSFpl......................................................... 1 93 210 266 +749 PF01416 PseudoU_synth_1 PseudoU_synt; tRNA pseudouridine synthase Howe K, Griffiths-Jones SR anon swissprot Domain Involved in the formation of pseudouridine at the anticodon stem and loop of transfer-RNAs Pseudouridine is an isomer of uridine (5-(beta-D-ribofuranosyl) uracil, and id the most abundant modified nucleoside found in all cellular RNAs. The TruA-like proteins also exhibit a conserved sequence with a strictly conserved aspartic acid, likely involved in catalysis. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.52 0.72 -3.60 118 11373 2009-01-15 18:05:59 2003-04-07 12:59:11 15 22 4733 26 3331 8399 4001 104.70 20 69.27 CHANGED hthtahGp.sFpuap.........pp....................pshp........pshh+sltphph................................t.t.........................thhht.h+hhh.......sshhcs.spGhpuhsplt....ph.............................................Lspph.s...........tssshs.hhhcspas ......................................................................................hta.Gp..cFpuap.....................................pp....................................................pohp.........psh...+slpch.ph................................................................................................................................................................h+.h..hl........................sshscs....st...Gs.c.uhsp.ls..ch............................................................................................................................Lssp...s...s..........ss..sss..hhcsca................................................................................................................................. 0 1157 2111 2810 +750 PF01437 PSI Plexin_repeat; Plexin repeat Bateman A anon Bateman A Family A cysteine rich repeat found in several different extracellular receptors. The function of the repeat is unknown. Three copies of the repeat are found Plexin (Swiss:P70206) [1]. Two copies of the repeat are found in mahogany protein. A related C. elegans protein (Swiss:Q19981) contains four copies of the repeat. The Met receptor contains a single copy of the repeat. The Pfam alignment shows 6 conserved cysteine residues that may form three conserved disulphide bridges, whereas [1] shows 8 conserved cysteines. The pattern of conservation suggests that cysteines 5 and 7 (that are not absolutely conserved) form a disulphide bridge (Personal observation. A Bateman). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.03 0.72 -3.81 95 2947 2009-01-15 18:05:59 2003-04-07 12:59:11 20 228 154 27 1472 2447 7 54.60 25 6.46 CHANGED sCsp......a..poC.s.sClsup.t..tCuWCst.....tppC....spts.........................pCtptp....................t.tpp..Cs ............pCst......a..poC.ssC.....l.....tup.....cs..hCuW...Cst........ttpC....spts..................................................pC.t.t.............................................................................................. 0 251 365 805 +751 PF04046 PSP PSP Wood V, Finn RD anon Pfam-B_PSP (release 7.3); Family Proline rich domain found in numerous spliceosome associated proteins. 18.30 18.30 18.80 19.00 18.10 18.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.35 0.72 -4.57 40 488 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 300 0 335 464 3 49.30 49 8.24 CHANGED +PGhlSpcLRcALGhssss.....PPPWLhpMpch....GhPPuY..PsL+IPGlshsl .....+PGpLS..-EL+pALGhs.sss............PPPWLhpMQ+h...................G.PPSY..PsL+IPGLNss........... 5 114 181 270 +752 PF04468 PSP1 PSP1 C-terminal conserved region Waterfield DI, Finn RD anon COG1774 Family This region is present in both eukaryotes and eubacteria. The yeast PSP1 protein is involved in suppressing mutations in the DNA polymerase alpha subunit in yeast [1]. 21.10 21.10 22.60 21.90 19.80 19.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.78 0.72 -4.00 138 1828 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 1651 0 508 1317 149 88.50 40 24.33 CHANGED slppllRhA...o..pcDhpphppscpcp.ccAhphCpp+lpccs.............................L..sMKllcsEa....pFDpsKlhFYao......A-sRVDFRcLVK-LAphF+sRIEhRQI ............................................................................lKpllRhA...sppDlpphp.cscpct.ccAhplCpcplpcps........................................................................................................................L.......cM+LlcsEY..................shD.....p......sKll.FYFT.............A.-.sR...lDFRcLVK-LAphF.+.T.RIELRQI..................... 0 218 376 469 +753 PF04024 PspC PspC domain Bateman A anon COG1983 Domain This family includes Phage shock protein C (PspC) that is thought to be a transcriptional regulator. The presumed domain is 60 amino acid residues in length. 21.80 21.80 21.90 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -8.93 0.72 -4.50 182 3398 2009-01-15 18:05:59 2003-04-07 12:59:11 7 17 2305 0 732 2173 337 60.70 32 37.58 CHANGED pp+LhRsp....ps+hluGVCuGlAcYhslD...ssllRllhll...h.hl...........................h..sss...sl..l.........hYllhh..lllP..ppss .....................p+LhR.s..psphluGVCuGlAcaas.l.-...ss.lVRllhllhsh............................hsss.....ull.........hYl.llh..hllPtt..s.............................. 0 264 511 639 +754 PF04886 PT PT repeat Bateman A anon Pfam-B_517 (release 7.6) Repeat This short repeat is composed on the tetrapeptide XPTX. This repeat is found in a variety of proteins, however it is not clear if these repeats are homologous to each other. The alignment represents nine copies of this repeat. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.01 0.72 -4.63 15 489 2009-01-15 18:05:59 2003-04-07 12:59:11 7 108 180 0 258 482 734 37.20 41 7.69 CHANGED pPTspPospPTspPTupPospPTspPTspPostPos .............................PotpPTspPTsp.PT..spPTs.pPT.spPTspP.ottP..................................... 0 131 163 221 +755 PF01329 Pterin_4a Pterin 4 alpha carbinolamine dehydratase Finn RD, Bateman A anon Sarah Teichmann Domain Pterin 4 alpha carbinolamine dehydratase is also known as DCoH (dimerisation cofactor of hepatocyte nuclear factor 1-alpha). 20.80 20.80 21.20 21.00 20.40 19.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.29 0.72 -4.23 136 2229 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 1838 50 880 1778 2287 90.20 32 78.95 CHANGED s.s..s.hLoppchpph..Lp.pl..s.....sW..plt...s...........tplp+p.apFcsFtpAh...uFhspVuhlAEptsHHP-lts.sascVplphhTHcl....s..G.LocpDhhhAs+h-plhp ....................................................ht....lsttphtth..Lt..tl.....s..........sW...p..h.......s..............stlp+.p.a..pFp.s.......Fp.pAh..................uFh.s.c.lAhh.AE.......ph.s.......HHP.-.hhs....s.........as..p.........V.plsl..h..TH..sh............s...G...locpDhthAtph-tlh.t........................... 0 282 549 745 +756 PF03095 PTPA Phosphotyrosyl phosphate activator (PTPA) protein Mifsud W anon Pfam-B_2456 (release 6.4) Family Phosphotyrosyl phosphatase activator (PTPA) proteins stimulate the phosphotyrosyl phosphatase (PTPase) activity of the dimeric form of protein phosphatase 2A (PP2A). PTPase activity in PP2A (in vitro) is relatively low when compared to the better recognised phosphoserine/ threonine protein phosphorylase activity. The specific biological role of PTPA is unknown, Basal expression of PTPA depends on the activity of a ubiquitous transcription factor, Yin Yang 1 (YY1). The tumour suppressor protein p53 can inhibit PTPA expression through an unknown mechanism that negatively controls YY1 [1]. 20.10 20.10 21.30 20.50 20.00 19.70 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.06 0.70 -5.25 53 620 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 316 20 404 615 5 265.50 37 78.11 CHANGED tPsKRIhss.pDlphFtpStsapclhsFlpslsculpGpphs..........pshthSs...........................................slppllp.lLcclppllccsPP....hct.s.sRF......G...NhuFRsaa-clp...........ppsssllpphlss.................ttpsslsELssYlhpS..FGsppRlDYGoGHELsF.lsaLhsLhclu................llp.pp.........D.............hssllLtlFscYlclhRcLphsYhLEPAGSHGVWGLDDapFLPFlaGuuQhhscsh.hp........................Ppulpsppll.........-.papcpahYhssIsFI.splKou........shp.HSPhL.DISust.sWsKlpp................GhlKMYpsEVLuKhPlhQHFhFGs.Lhshscshss ..................................................h.P.+cIhs..tDhthahpS.......satphhsFlhp.lscultspphs................t...st..................................................................................slpplht.lLsplpphl......cchPP.........pt..s.....pRF..................G...N.huaRpahpc.lp...........pps....plht..ph..l.st.........................................................................t.tts..h.ElssYhhpS..aGstpRlDYGoGHEhsF.hhaLhsLhpls............................hhp..tp-...........................................sllhtlF..pYlp..lhRpLphsY..hLEP.AGSH....GVWGLDDapFLPal.aG..uuQ..ht.p..h...hp...............................................Ppphhptphl..........p.p..t.pphha.hpslt..al......pp.hKpu............shttHSs.L.sISus...sWsKlpp................GhlKMYp.sE..VLtKhPlhQHFhFGs..Lhsh.....s........................................ 0 157 237 341 +757 PF02302 PTS_IIB PTS system, Lactose/Cellobiose specific IIB subunit Mian N, Bateman A anon Pfam-B_9339 (release 5.2) Domain The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The lactose/cellobiose-specific family are one of four structurally and functionally distinct group IIB PTS system cytoplasmic enzymes. The fold of IIB cellobiose shows similar structure to mammalian tyrosine phosphatases. This family also contains the fructose specific IIB subunit. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -10.36 0.72 -3.50 245 18949 2009-01-15 18:05:59 2003-04-07 12:59:11 12 57 2638 16 1606 8004 181 89.10 20 29.08 CHANGED pllssCssGhuoShhstpplcctscptG........l-s....pstsss...hptpphts.s.Dllllus...plp......phpphss............hhhlshps......hh..tsscpllpp ................llssCssGhuoShhstptlcctspchG................................l-s....psssss.......hp.tps.h..tp.s..Dll..l.hus.plp........phpphss........h.........h.hlshts....hh....phtthh.................................................... 0 438 856 1245 +758 PF01472 PUA PUA domain Bateman A anon Medline:99193178 Family The PUA domain named after Pseudouridine synthase and Archaeosine transglycosylase, was detected in archaeal and eukaryotic pseudouridine synthases, archaeal archaeosine synthases, a family of predicted ATPases that may be involved in RNA modification, a family of predicted archaeal and bacterial rRNA methylases. Additionally, the PUA domain was detected in a family of eukaryotic proteins that also contain a domain homologous to the translation initiation factor eIF1/SUI1; these proteins may comprise a novel type of translation factors. Unexpectedly, the PUA domain was detected also in bacterial and yeast glutamate kinases; this is compatible with the demonstrated role of these enzymes in the regulation of the expression of other genes [1]. It is predicted that the PUA domain is an RNA binding domain. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.05 0.72 -4.25 108 4603 2012-10-02 17:37:24 2003-04-07 12:59:11 15 42 3369 67 1812 3731 1805 74.30 29 20.31 CHANGED splhlDcuAscultp..Gu.sLhssGlhpscssh......ctG-hVhlhsp....p.G........chlAhGhuthsupEhtchp..tttusclcphl ....................GplhlDcGA.spAlhp.....Gs...SLLssGlh......p.....l......pGsF............................ptG-sVt..lh..st...................p.G..........................................ctlA.pG..lspasus.cl.p..p..ht...tth.utcltt............................................................. 0 564 1073 1503 +759 PF01480 PWI PWI domain Bateman A anon [1] Family \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.59 0.72 -3.88 34 907 2009-01-15 18:05:59 2003-04-07 12:59:11 12 44 271 2 602 867 4 72.20 28 10.85 CHANGED l-llKsWIsc+ls-lLGhEDDlVl-ashshLp......................tpc....DsKplplpLsGFLs.+suttFsp-LWcLLluApps..pG ........................-hlKsWls++lp-..l.lG.h.-...-......s..s.l...l-a.lhsh.lctt...............................s..c.p.h.h.p.pLss..F..Ls..ccs...pt..FlpcLWchll.tppt...t............................. 0 199 312 474 +760 PF00787 PX PX domain SMART anon Alignment kindly provided by SMART & iterated Domain PX domains bind to phosphoinositides. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.35 0.71 -4.28 71 7076 2009-01-15 18:05:59 2003-04-07 12:59:11 19 282 785 46 4186 6459 70 114.90 20 19.35 CHANGED sthhp..hthss.t.......spttthhhhplphpss..............tphpltRRYp-FhpL+ppLpcphs.....................thhlP.hPsKthh......................................................hppphlccR+.ptLppaLpplhppPhhtp.....schltpFLpsp ........................................................................ht............................tththhh..h.h..plp.hpss..............................pht..V...hR.RY..p-..F..ttL....cp..p..Lp...cpas.........................................................th.h.lP......lP.t..Kp.h.h.st...........................................................................................................................................................hppp...h..l...c...c...R+...p.tL.p...paL....p..............p...l..h....p....p..s..h..ltp......sphht.pFLp..t.................................................................................................................................................................................................... 1 1382 2076 3184 +761 PF02194 PXA PXA domain SMART anon Alignment kindly provided by SMART Family This domain is associated with PX domains Pfam:PF00787. 21.10 21.10 21.30 21.10 20.50 20.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.17 0.71 -4.72 69 830 2009-01-15 18:05:59 2003-04-07 12:59:11 10 30 257 0 547 794 2 167.90 23 19.45 CHANGED sstlsptlppll..shll+-FVps.WY.ppIossp..pFss.plcpslppsltplppRlp...............phDhssllspclhsllspHlphappAppth.............................ht...spspthplhhshphpss.....csA.htsptpphp.........................................alRtlspslLshlLsppphpstslpsLl+ElLussVLhPllp.hlu-P-alN...phIlthhpss ............................................lsttlppllphllc-aV.s.WY....pp.........l.o.scp....pF.p.pl.cpslpthhtp.lpp.R....h.p...........................cl..D.h...sl..l.............sp.......cllshhspHlphhp.pAppth...........................................................................ttt...p...hp.h.ht.p.t.t............hc.u..h....psp....pppht....................................................................................................................aLRpl.sp.hll...hl.LP.......pt.............h..........p.......sp.......shphllpEllu....ssl.lhPh.ls.hlucP-hlN...phlhhhhp..s............................................................................................................................................................................... 0 173 277 432 +762 PF00070 Pyr_redox pyr_redox; Pyridine nucleotide-disulphide oxidoreductase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.49 0.72 -3.57 130 40648 2012-10-10 17:06:42 2003-04-07 12:59:11 22 455 5442 395 10595 41559 13431 81.90 22 17.40 CHANGED +llllGGGhIGlEhAshh....pp.hG...t..cV.ollcttsplh....hhstp.huphl..pcph.ccp....Glplhhssplpplp......tsss.t....lhhp......ss ..................plsVlGG.Gh.l....Gl....Eh.A.thh.............................pp..h..G.................s.......c..V..o.....l..l...c.t....t..s..........p...l.........h..............................h...........s.......................p......h...........s...........p.......h..........h............pc..p....h...p..pp...............G.l...pl..h..h..s..s....p.l..p..p.lp........tss.t..t.......l.....tttt............................................................. 0 3391 6626 8921 +763 PF02852 Pyr_redox_dim pyr_redox_dim; Pyridine nucleotide-disulphide oxidoreductase, dimerisation domain Griffiths-Jones SR anon Structural domain Domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.34 0.72 -4.03 102 18533 2009-01-15 18:05:59 2003-04-07 12:59:11 17 94 4904 322 4408 13761 3778 109.20 27 23.24 CHANGED lPsslaopPElAsVGl...TE.ppApppsht......lcltphsas..........sss+Ahshtp......scuhlKllsct.cspclLGsHhlG.spuuE..hIpthulAlchuh...Thp.-.hspsltsHPThuEthtpss .....................................lPtshaopP.pl.AsVGl....T....E....pp....A....+....p....p..s.hp................hcstp..h..sas.......................s...s.+..A...h.shsp...............spGhh.Kl.l.......h......c.............t.....c......o.......pc..llGsph...l..G........s..u.sE....hIpt...hul...Al.....c..h.s.h...ohp...p..h..sp..sla.sHPThuEshh...h......................................... 0 1486 2777 3700 +764 PF01729 QRPTase_C QRPTase; Quinolinate phosphoribosyl transferase, C-terminal domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_2063 (release 4.1) Domain Quinolinate phosphoribosyl transferase (QPRTase) or nicotinate-nucleotide pyrophosphorylase EC:2.4.2.19 is involved in the de novo synthesis of NAD in both prokaryotes and eukaryotes. It catalyses the reaction of quinolinic acid with 5-phosphoribosyl-1-pyrophosphate (PRPP) in the presence of Mg2+ to give rise to nicotinic acid mononucleotide (NaMN), pyrophosphate and carbon dioxide [1,2]. The QA substrate is bound between the C-terminal domain of one subunit, and the N-terminal domain of the other. The C-terminal domain has a 7 beta-stranded TIM barrel-like fold. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.98 0.71 -4.73 18 3824 2012-10-03 05:58:16 2003-04-07 12:59:11 14 9 3313 84 1170 3336 1907 167.10 40 57.32 CHANGED IATtTpchlctscus..ps+lhsTRKTtPG.LRhh-KYAVhlGGGs.......sHRhGLsDslhIKDNHIsssGu...lpcAl+psRphs.shs....lEVEl-sL--hccAlp.........uG....ADlIMLDNh.........ss..-pl+cAlc.h.ctpshc....hhlEsSGGloh-sltpaApsGVDhIShGsLT+uspslDlSLc ..................................................................................lAThTpphVct.lpus................pspl.hDTRKT.h.P...G...L.R..h.h.p.K..YAVhsGG.Gh................NHRhGL.....sDul.......LlK-....N....HI....t....s........s......G..u........................l.pp....A...l.ppA...+t...ht....st...h...s.........lEV...E.V...-...o....L....-.....p.....l.c.....-..Alp.....................................AG...........AD..lIhLDNh........................ss......-ph.+.....c.A.....V.p.........h..h..ss+...........shlEs.SGsl..oh.cs..............lpp..h..A...p..o...G...V....DhISlGu...L.T...+...ss.pslDlShc........................ 0 388 754 980 +765 PF02749 QRPTase_N Quinolinate phosphoribosyl transferase, N-terminal domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_2063 (release 4.1) Domain Quinolinate phosphoribosyl transferase (QPRTase) or nicotinate-nucleotide pyrophosphorylase EC:2.4.2.19 is involved in the de novo synthesis of NAD in both prokaryotes and eukaryotes. It catalyses the reaction of quinolinic acid with 5-phosphoribosyl-1-pyrophosphate (PRPP) in the presence of Mg2+ to give rise to nicotinic acid mononucleotide (NaMN), pyrophosphate and carbon dioxide [1,2]. The QA substrate is bound between the C-terminal domain of one subunit, and the N-terminal domain of the other. The N-terminal domain has an alpha/beta hammerhead fold. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.54 0.72 -4.16 161 3866 2012-10-02 20:27:15 2003-04-07 12:59:11 11 8 3303 80 1158 2967 1621 88.60 32 30.01 CHANGED hs...tG.DlTo..shlsssppup...uplhu+ps.ullu.Ghphspp.lachl....................sl.plp..hhhpDGcplpsGs........hlhplpGsucslLpuERsuLNhLp+hS ......................................sDlTs.....tll...s.s..s.p.p...up....upll..s.+..-.s.GV.l.u...Ghp.h.sp.p.lFptls........................................ssl.plp.......hthp...DG-tl...ps..sp..........................sl..hp..lpGss+sLLsuERsALNhlpphS............................................ 0 387 749 973 +766 PF01424 R3H R3H domain Bateman A anon Medline:99003905 Domain The name of the R3H domain comes from the characteristic spacing of the most conserved arginine and histidine residues. The function of the domain is predicted to be binding ssDNA. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.15 0.72 -4.27 402 3275 2009-01-15 18:05:59 2003-04-07 12:59:11 17 89 2043 7 1332 2719 355 63.80 24 13.76 CHANGED cppplpphspphhpps....h........pss...p.........shphp.P..h.ssh-R+.llH.phsp....th.....s...lpopS..t.......G.pt.sp..Rplllthp ...........................................................l.phspchhppl....h.....................css.........c..........shphp.P.....M..sshER+.l.........lH.phsp...hh.....s....lpopS.....p.........G...cc.sp...Rtl.ll...................... 0 444 749 1050 +767 PF03834 Rad10 Binding domain of DNA repair protein Ercc1 (rad10/Swi10) TIGRFAMs, Griffiths-Jones SR, Coggill PC anon TIGRFAMs Family Ercc1 and XPF (xeroderma pigmentosum group F-complementing protein) are two structure-specific endonucleases of a class of seven containing an ERCC4 domain. Together they form an obligate complex that functions primarily in nucleotide excision repair (NER), a versatile pathway able to detect and remove a variety of DNA lesions induced by UV light and environmental carcinogens, and secondarily in DNA interstrand cross-link repair and telomere maintenance. This domain in fact binds simultaneously to both XPF and single-stranded DNA; this ternary complex explains the important role of Ercc1 in targeting its catalytic XPF partner to the NER pre-incision complex [3]. 22.10 22.10 23.90 23.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.48 0.72 -4.28 26 301 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 268 3 216 287 4 67.50 46 21.96 CHANGED psILVsspQ+GNPlLpp..l+slsWca.s.-....IlsDYhlGpspslLFLSLKYH+L+PEYIapRlcpLpp.pash ...............sIlVu.sR.....Q+GN.PlLpa..l+.s..ls.WEa.u..D.....I.s.sDYll.....Gpo.oCsLFL.SL+YHpLH.P-YIapRlcpLsppat.h........ 1 73 120 178 +768 PF04423 Rad50_zn_hook Rad50 zinc hook motif Bateman A anon Bateman A Motif The Mre11 complex (Mre11 Rad50 Nbs1) is central to chromosomal maintenance and functions in homologous recombination, telomere maintenance and sister chromatid association. The Rad50 coiled-coil region contains a dimer interface at the apex of the coiled coils in which pairs of conserved Cys-X-X-Cys motifs form interlocking hooks that bind one Zn ion. This alignment includes the zinc hook motif and a short stretch of coiled-coil on either side. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.55 0.72 -4.50 32 262 2009-01-15 18:05:59 2003-04-07 12:59:11 9 18 230 2 157 265 13 53.50 26 4.97 CHANGED phcspttphpctlpplppspt..sCPlCtRslss-cc.pclhpchppclpph.cchp .............htttht.hpphlppLsc..ppt..sCPlCpRshss-pc..pcllpchpsclcth.cp.................. 0 38 69 121 +769 PF04055 Radical_SAM Radical SAM superfamily Bateman A anon Bateman A Domain Radical SAM proteins catalyse diverse reactions, including unusual methylations, isomerisation, sulphur insertion, ring formation, anaerobic oxidation and protein radical formation. 29.40 29.40 29.40 29.40 29.30 29.30 hmmbuild -o /dev/null --hand HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.13 0.71 -4.08 628 65493 2009-09-17 13:13:13 2003-04-07 12:59:11 16 316 5136 37 18526 49841 14153 169.40 14 43.97 CHANGED h.hsssCshp.....CtaCthtt...............................................................htttttthshcplhchhcph...ph...hp.tlhhs.....................uupshhh................................hshhthhhthttt.............................................................thplslposshhh............................tchlctltchs...........hstlslslcshspp.........................htchh...pttsh........cphhcslcthpcts......hp...........hhshhhhh.sps.p..pchtphhchl ............................................................................................................h..hhttCs.hp...........C.s.....a.......C.thst.......................................................................................................................ttt.pths...h.....c........c....l......h.......p........p........h.........p.........p........h..................h.........p.....................s.........h.........p........t........l..h..h.s..................................................G.G.p..s..t.h..h.........................................................................................................h..s..h..h..t..h....h.h.thtpt.............................................................................................t..thc..l..s..l...p..o..s..sh.h.h...............................tchl.p.t...l.tp.ts................hs.h.l..p..l...s..l...p...s...hspp..............................................hhphh......ptt..sh....................pp....h.h....c.s...l...c....t...h....p..p.t..s...........................h......................hh.hh.hh.h........s.....s........p..ph.......h............................................................................................................................................................. 0 6768 12613 16003 +770 PF02145 Rap_GAP Rap/ran-GAP Mian N, Bateman A anon IPR000331 Family \N 21.50 21.50 21.60 21.50 21.40 20.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.25 0.71 -4.92 54 1221 2009-01-15 18:05:59 2003-04-07 12:59:11 10 45 216 7 719 1143 3 181.80 35 14.83 CHANGED lhsNppu..S..sapcFLshLGpp.................................VpLps..ap.tapGGLDspss..psGpao....lYhphc.........spElMFHVoThhPh....sts...............DtQplp+KRHIGNDhVsIlF....sE.ssps.as..................................shItSpFs...alhlllpsh....................ps...................................................hY+Vplhp+...sslP..FGPhhs.sthl.p..c..shtsalhspsINAppsshph.....spasphh..pcph.phlc.......sL ................................hhsNppu...S....sap-FLs.hLGpp.................................lcLcs.....ap....ta.p..G..GLDs.p.ss..pTG...ppo......h.Y..hs..h.p.........shElh..FHVSThh..Ph....p.ps......................cp.pp...lp.........+KRHIGNDhVsIVF.........pE....ssp...s....Fs.................................................................sh.I..t.SpFp.........alall..l.ps..ht...........................s...................................................................................................................hYpVsl.p.p+.......p-..VP.....hFGP..s.....hs.....s...s...hl..t.+..........thtpalhsphINAppAshps..........tpatphh..p..cph.thlcp........................................................................... 0 262 341 531 +771 PF04078 Rcd1 Cell differentiation family, Rcd1-like Wood V, Finn RD anon Pfam-B_5278 (release 7.3); Family Two of the members in this family have been characterised as being involved in regulation of Ste11 regulated sex genes [1,2]. Mammalian Rcd1 is a novel transcriptional cofactor that mediates retinoic acid-induced cell differentiation [3]. 27.60 27.60 27.80 28.90 24.60 27.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.59 0.70 -5.56 27 453 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 304 4 291 421 6 238.70 58 77.46 CHANGED phltcLt.pPcp..REpALlELSK+REpascLAshLWpShGslusLLQEIlulYPhLs.........PPsLos..ttSNRVCNsLuLLQCVAS.Hs-TRthFLpA+IPLaLYPFLpTsu+sRs..FEYLRLTSLGVIGALVK.sDcsE....VIsFLLpTEIlPLCLRIMEhGoELSKTVATFIlQKILhDDtGLsYlCtTsERFhuVupVLspMV...tpL...spps.os........RLLKHllRCYLRLS-Ns....................RA+cALpphLPptL+..DsoFsshlc-..Dsss++hLtQLLhNlsst ........................................hl.-Lh.sspp...REsALLELS....KKR..E.p....hs-.LA.hLWaSF...G............shsuLLQEIlslYPh.ls.........Psp..Lou...ttSNRVCNALALLQCVAS.Hs-TRshFLp.................A...............HIP.LaLYPFL.pTsSKoRP..FEYLRLTSLGVIGALV.K..sDsp-.....................VI..sFLL...............sT.EIIPLCLRIM.E..s.G.SELSKTVAhFIlQKILLDD............s..........GLsYICtTh.ERFh.tVutlLupMV...hpL............scps....os..............................RLLKHllRCYLRLSDNs....................RARc.AL+.p.CLP-....L+.......DsTFs..psLc-....DssTK+hLtQLLhNlp.s..................... 0 108 166 239 +772 PF05177 RCSD RCSD region Guo JH anon Guo JH Family Proteins contain this region include C.elegans UNC-89. This region is found repeated in UNC-89 and shows conservation in prolines, lysines and glutamic acids. Proteins with RCSD are involved in muscle M-line assembly, but the function of this region RCSD is not clear. 25.00 25.00 27.70 25.50 19.50 23.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.14 0.72 -10.92 0.72 -3.52 4 51 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 16 0 36 64 0 87.60 41 11.71 CHANGED cV+SPsKKEKSP..............EKoEpp.so......EEsKos..KEKSPEcsDtp.tSPTKK-KSPppSusE...-lKSPsKKEKSPE...KsEccPuSPTKKE............psppcEKSPE ....................................................tKp.pp....tss......EcsK.sspK.cc+S..sEc.s.-c...pshSPs.KK-K.SPppS.usE......cs+SPsccctusE...csE.c..uo.sKcE.......................p.................. 0 7 13 33 +773 PF01030 Recep_L_domain Receptor L domain Finn RD, Bateman A anon Pfam-B_244 (release 3.0) Domain The L domains from these receptors make up the bilobal ligand binding site. Each L domain consists of a single-stranded right hand beta-helix [1]. This Pfam entry is missing the first 50 amino acid residues of the domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.71 0.71 -4.10 68 2555 2009-01-15 18:05:59 2003-04-07 12:59:11 19 86 174 80 1269 2434 30 112.50 25 20.58 CHANGED sCohlpG....sLpIphtstp......................t..thlsslcclsGhlhItps..thpshshhpsLchIcGpphhp......paulhlhpN.p...............LpcLth.s..LppIpp.........GslhlppNs.......cLChtpp.h.hphlh .........................................................sCohl..p.G.....sLplh.hstp........................................phphh..p..slcclo.....Ga..lhIpps......thss.h.s.hhp.N.Lph.....I.....+..G...p.p..l.hp.........................pau..Lhlh.pstp.............................................LppL.t.h..s......L.p.p....l.p..............................Gtlh.hp.p...N............pL.Chhp.p.h.h....h................................................................. 1 289 375 1023 +774 PF02010 REJ REJ domain Bateman A anon Bateman A Family The REJ (Receptor for Egg Jelly) domain is found in PKD1 Swiss:P98161, and the sperm receptor for egg jelly Swiss:Q26627. The function of this domain is unknown. The domain is 600 amino acids long so is probably composed of multiple structural domains. There are six completely conserved cysteine residues that may form disulphide bridges. This region contains tandem PKD-like domains. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.36 0.70 -6.17 25 1075 2012-10-03 16:25:20 2003-04-07 12:59:11 10 184 317 2 592 1109 109 274.00 15 21.55 CHANGED Aplpl..pC....sstpspapWplhsssssssh.phsp.....................t..plsIPphsL....hGsYshsholoh.....sssslsspptsplplts.....osLhAhIcGGopcshuhp..pslh.........lDu.SpSh...DP...........Dhsstp...sslsatWhCpspsss........................................tsC..........t.t.h.hssssuslolsuspLpussp.YpFplslsKsu.....R.oupoppslhlhpGpsPplplpChssss.t...lssssclsLpusss.sss...sppspapWsl...........ps.tt..hh.....................................................................................................................................................................................................upssTshssstLsl+tsshpsstpYthslhltssshpt......hAshshps..NtsPpsGsCsl............sPspG...hsLpTpFslpCpsapDp-t.......PLsYphhh.............hpsps.hphlhpGspspp.....thhLPsG.ssp.taslslhVpVhDshGuss.ssshslpVpsssssss.........hh.hh.tssstlsshltpGDstputphhss .............................................................................................................................................................................................................................................s.........h..htl...........................t...h..spt.h.......l.....l.........................t..s.............s.h..h.....s.s...s....t.hh.hshs.......psl.s...........................................Ls..G..otS.......Ds...........st............lp..apWppss.ssts..................................................................................................................................................t..tp.s.sh....hph...sp.h.....t......st.......hpF.p...lplpcsp.................t.sstsph...sl..h...s.............t.......................h........................................h...t.......h..h.h.....t.....................h.....h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 318 363 454 +775 PF03432 Relaxase Relaxase/Mobilisation nuclease domain Finn RD, Mifsud W, Bateman A anon Pfam-B_4002 (release 6.6) Family Relaxases/mobilisation proteins are required for the horizontal transfer of genetic information contained on plasmids that occurs during bacterial conjugation. The relaxase, in conjunction with several auxiliary proteins, forms the relaxation complex or relaxosome. Relaxases nick duplex DNA in a specific manner by catalysing trans-esterification[1]. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.43 0.70 -4.98 17 2990 2012-10-02 18:54:05 2003-04-07 12:59:11 9 15 1395 0 379 2707 170 226.00 16 49.85 CHANGED spsShsphlsYhpp...........................+shppsshp...hsschstpphhsspthas........cs........psspsacll.SFpssE..ss......cphppluhchhpplu..saQhhlhsHsD....p-plHsHIllNplchpo...G+phpsp............h..shcplccsscplspccGhphspt........st.+.s.s-tshhp.......................scsphctph+pslcptpt....spsap-h+ctLpcpGlclc..thsspplshphpspp..ppl+usplucc..ascsplcpchtcp .......................................................................................................t.....................................................................................t..h..............p.....h..................t.....t..........t...............................................pt.......................pt..sh..H.hhhSap...s....t.-...hss....................................cph....p....p....l....u.p....c......h....h....p...c..h.........t.....h.........s......p......a......p........a....l.lsp.H...p.D..............ps....+.......hHhHIlh.s.pls..pt...............G..+ph..p.sp............................................h....shcp.h...p..p.h...s...c.c..l.p.p....c...h.Gl...phspp......................t...p..p......p....h....t...pht.hhp..............................................................................................ttt.p...h...c..t..t...l...p.p.....t...lp.thht..........t..psh...p.p...hhp...h.h.t.p.....s.h..plp............tt....t...............h..............h................t..............................h....p...p..t..h.ttt....h.............t....................................................................................................................................................................................................... 0 137 269 341 +776 PF03090 Replicase Replicase family Mifsud W anon Pfam-B_2424 (release 6.4) Family This is a family of bacterial plasmid DNA replication initiator proteins. Pfam: PF01051 is a similar family. These RepA proteins exist as monomers and dimers in equilibrium: monomers bind directly to repeated DNA sequences and thus activate replication; dimers repress repA transcription by binding an inversely repeated DNA operator. Dimer dissociation can occur spontaneously or be mediated by Hsp70 chaperones. 20.10 20.10 20.10 21.20 19.90 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.56 0.71 -4.31 15 361 2012-10-02 15:26:12 2003-04-07 12:59:11 12 7 242 0 25 357 7 124.40 36 38.95 CHANGED sulhhps+psALcp+YIQsNtsphsshLVhDlD+s..sA..............ththtctst.sPNhhstNssNGHuHLlasLs.PVpss-t.ARpKPlpYhAAlppuLpptLsuDhuY.SGLlsKNPtHs.......pWpspphpsc....YsLccLuchL- ..........................h.thhhts+thAlt.h.YhQhNp.sthhtaLVaDlD+t...su.................lsWp.-tshPsPshhsp.......N.p.s...G......Hu..pLhYuls.s..V.p.....ss.....ps.....ucsKslp.....YhtAlppuhst.+L...s.A.....DssY.uGhlsKsPhHs.........................tWpshchppcs...YpLs-LAshl-.......................................... 0 6 14 24 +777 PF03248 Rer1 Rer1 family Bateman A anon Pfam-B_3358 (release 6.5) Family RER1 family protein are involved in involved in the retrieval of some endoplasmic reticulum membrane proteins from the early golgi compartment. The C terminus of yeast Rer1p interacts with a coatomer complex [1]. 21.60 21.60 22.10 22.20 19.90 21.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.21 0.71 -4.69 34 427 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 311 0 286 395 2 163.90 50 84.44 CHANGED sslsphhp+hpppYQphLDcosPasthRWhshshLlhlFhlRlhhhp.GWYlVsYuLGIYLLNLFLuFLTPKhDPuLpppp...t.-tG................LPt................cps-...EFRPFIRRLPEFKFWasss+AhhluhhhoaFshFDlPVFWPILlhYFIlLFslTM++QIp..........................................HMIKY+YlPFshG.Kt+Ystp ...............ss...thhscltphYQtaLD+.........oTPasthRWlsshslhhl...ahlR..lh........hhQ.GaYI.........VsYuLGIYlLNLFlAFLoPKh..DPslptpp.....ptGs..................sLPo...............................+ps-...EF.RPFIRRLPEFKFWausT+.AhhluhhhTaFph....FslP.VFWP...ILlhYalhLFslTM+RQIp..........................................HMIKY+YlPFshG.Kt+Ytt.p............................. 1 98 166 240 +778 PF02453 Reticulon Reticulon Mian N, Bateman A anon Pfam-B_2196 (release 5.4) Family Reticulon, also know as neuroendocrine-specific protein (NSP), is a protein of unknown function which associates with the endoplasmic reticulum. This family represents the C-terminal domain of the three reticulon isoforms and their homologues. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.94 0.71 -4.65 90 1346 2009-01-15 18:05:59 2003-04-07 12:59:11 12 20 297 3 619 1288 1 158.90 25 44.60 CHANGED ssDllh.WRch+touslhuushs..hLhp.l..tphsllolhspl.hlhsLssshshphhtpllps..............h...clslsc-thpphssslhthlNpslppl+clh...hsc-lhpolp.hsshhallohlGuhhshhTLlhluhlhhFolPllYc+apcpID...phlt....hspsplpc .........................h-llh.W+ch+..t..ouhlh..uushh..hLh..p...l.....hthol....lolhuhl...hlhhLs.sshs....h...p..l..h..p...p..llps..t............................ht....pls.l.sp......-.....t......hp.......phsss.l..htpl...Np...slttl+.c..lh...hsc...D...h...h.p............sl............p...h.s..l.h.h.a...llohl.G.u..hhs.sl.TLh.h.luhlhhao..lP.h.lYcc..hpspIDphlt.......hsppph..p........................................ 0 125 283 450 +779 PF04527 Retinin_C Drosophila Retinin like protein Waterfield DI anon Pfam-B_4914 (release 7.5) Family Family of Drosophila proteins related to the C-terminal region of the Drosophila Retinin protein. Conserved region is found towards the C-terminus of the member proteins. 22.20 22.20 22.50 22.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.51 0.72 -3.98 18 168 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 19 0 91 157 0 73.70 44 54.73 CHANGED lhpEPsVA+VGslV+slPoAVSHQSpT.VHs.pt.lhpPVl......APsVKsT.lhp....sPl..s.........................tAAPll+s ................llpEPslA+VGs..V..V+olPo...AVSHQStT.VHs.pt.llp.PVV.......AP..lVKoT..hhs......uPllts......................tuAPll..................................................................................... 0 17 20 58 +780 PF03732 Retrotrans_gag Retrotransposon gag protein Finn RD anon Pfam-B_3194 (release 7.0) Family Gag or Capsid-like proteins from LTR retrotransposons. There is a central motif QGXXEXXXXXFXXLXXH that is common to Retroviridae gag-proteins, but is poorly conserved [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.19 0.72 -3.92 57 4914 2012-10-02 13:37:57 2003-04-07 12:59:11 12 294 205 0 2568 5331 10 92.40 19 10.14 CHANGED phhshtLputAtpWapslhsspht...............oWpphpptFhppahs.tphsphppclhslpQ.sscolpEYhpcF.cplhppss......hsc...................pshlptahpGLpt ...........................................................h.htLp.s..A.t.tW.h.p..p..h..ppht..................................oW...p...c..h.p..pt.F...hpp.F......h..s...s....t...tt.s.t..t..c..p.p...l.p...s...l....p..Q..s...sc..ol.p-Yh.pcF..p.p..lt..p.ths..........hsp..........................t.hh...th..ah.tGh...................................................................... 1 853 1171 1588 +781 PF01694 Rhomboid Rhomboid family Sohrmann M, Bateman A anon Pfam-B_1399 (release 4.1) Family This family contains integral membrane proteins that are related to Drosophila rhomboid protein Swiss:P20350. Members of this family are found in bacteria and eukaryotes.\ \ Rhomboid promotes the cleavage of the membrane-anchored TGF-alpha-like growth factor Spitz, allowing it to activate the Drosophila EGF receptor. Analysis has shown that Rhomboid-1 is an intramembrane serine protease [2][3][4] (EC:3.4.21.105). Parasite-encoded rhomboid enzymes are also important for invasion of host cells by Toxoplasma and the malaria parasite [5]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.89 0.71 -4.17 65 7854 2012-10-01 23:21:32 2003-04-07 12:59:11 17 73 4018 16 2797 6356 2105 148.00 23 48.95 CHANGED shtpsphWR.....llTshalHs..hhHlhhNhhsLhhhGh.lEphh.....GphRhhhlYlluGlhu...ulhshlhssts......................tlGASGulaGllGuhhshhhhshhhhh................................hhlhhhhhhlslshshsh...hstlshhu...........................HluGhlsGhhhuhhlhtthp ....................................................h....phaR......llo...sh.F..l...H.........h........u...........h.......h.........Hl..hhNhlh....L....h.............h.....h.Gth....l...E....p...h..h......................G...p.....h....+h......h.h....l...a.l.....l...uu...l..hu..........ul.h....p....h....h...hss.s............................................................hlGASG..u.....l...a....Gl.....h.us.h.h..h...h..h.h...t.s.t.t.h.hh..........................................................hhhhh.h.h.h.h.h.....l..h.h..sh.h.hsh.............hs...s..l..s..hhu............................................................Hl.u..Ghl...s..G..hhhuhhh.....hh............................................................................. 3 991 1753 2345 +782 PF05104 Rib_recp_KP_reg Ribosome receptor lysine/proline rich region Finn RD anon Pfam-B_3249 (release 7.7) Family This highly conserved region is found towards the C-terminus of the transmembrane domain [1]. The function is unclear. 25.00 25.00 25.80 25.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.36 0.71 -3.69 6 77 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 30 0 24 79 0 127.50 54 14.53 CHANGED MKETSYEEALAKQRKEpuKTQ.ptKsDKKKK-KlsEKKsKuKKKEEKPNGKIPEpEsstEsocp........llltppP..sPsVsssPspVsls...PsVAshPcsotPs.opcpsuuP...tKuss..sPpspppKppKsA....KstPAPsptussP..hsuKuAPlsAp ...MKETSYEEALApQRKEhuKTp.ppKs-KKKK-K.ssEKKsKsKKKEEKPNGKIP-p-ssspso..........................lh.ppP...sPsVslsPssVt.........s.lh.tPhsssPs..s.pcchsus...........Pp.cpppKp.pKs.A........KscPAsuphsss..h.ssKuA.h...s............................................................. 0 2 6 11 +783 PF00636 Ribonuclease_3 Ribonuclease III domain Bateman A, Finn RD anon Prosite Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.55 0.71 -3.63 239 3186 2012-10-03 08:45:47 2003-04-07 12:59:11 21 88 1836 63 1255 5519 1999 113.60 26 26.36 CHANGED ppLtaLGDullchhlpphlh...........................tspLsp........h...psthV....sspshu..phsp............p.......................................L...tp....hlp......................................................................................................................................................................t...................phhusshEullGslaLsss ...........................................................................................................pLtal.G....DAl..hchhlpp.aLht........................p........sph..c.......................sspLpp.........h....ps.phV.......ss.cs.Apl.h.t..........................p........................................................................hh.......................h..pp........hl.p.p...pp......phhcptcs..................................................................................................................................................................................................................................................................................tpt.tt..p.......pssht.sh.phu..sshEAllGhlalst.t................................................................................................................................................................ 0 428 728 1043 +784 PF04597 Ribophorin_I Ribophorin I Kerrison ND anon DOMO:DM04452; Family Ribophorin I is an essential subunit of oligosaccharyltransferase (OST), which is also known as Dolichyl-diphosphooligosaccharide--protein glycosyltransferase, (EC:2.4.1.119). OST catalyses the transfer of an oligosaccharide from dolichol pyrophosphate to selected asparagine residues of nascent polypeptides as they are translocated into the lumen of the rough endoplasmic reticulum. Ribophorin I and OST48 are though to be responsible for OST catalytic activity [1]. Both yeast and mammalian proteins are glycosylated but the sites are not conserved. Glycosylation may contribute towards general solubility but is unlikely to be involved in a specific biochemical function [2] Most family members are predicted to have a transmembrane helix at the C terminus of this region. 25.00 25.00 27.80 25.30 24.50 24.50 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.41 0.70 -5.97 40 386 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 288 0 244 366 1 397.90 32 80.96 CHANGED sacNssltRsl-Lspsal+pohplslc.....Nhu...spP.tscYhhshss..c.hspluhlssp.ps.spttsph........slclppsp..t...........aplpLPpP.ltPusplslplp.ashspslpPhPspIsQs-pQhlhapssta.hhSsYtTcp.Qphpl+hs.sspl.saTpst.............sspppssslsYGPa..pslsu......aohpP.ltl+YEpspPlsplspLpRsIEV..SHW.GNlshEEpYpLpNsGAcLputFSRl...-appsphp................................................sssulpplchhLPs..supDhYYpDplGNlSTS+hpsspt.h.....................Lcl+PRaPlFGGWpasFslGashshppaL+psust..pYhLplPhlsuhp...-shh-plplcllLPEGApslclps.........P.hshtptphshchoYLD.ThGRsslslphpNls...--tp.pplhVpYca.......sthshhpKPlhIsuhhahlFluhhllsp .............................................................................h.s.sl.RplcLpp.ph.s+.shplhlp........shs...sps...spah.hsh.s........p..hspluhlpst.pttpptt................hphtt.tp..th.......................................................aplpLsps.ltsutphslhlp.hhhspslpPhPtpIsQ.s.-..p.Qhlhapssta....hhS.sY.Tpp...Qphpl+hs...ospl..s.aTphs..................................sspppssplpYGPa...cslss........ho.ps..hpl+aEpspPhhslsplp.R.sIEl.....SHW.GNlulE...Ephp.LppsGApL.cusFSRh...-appp...t................................................shsulpphchhLPs..uup-sYYpDpIGNlSTS+hh.tp.cpsp....................l-l+PRaPLFGGW+hpahlGYshP..........pa.Lhptss..........pahLph.hlstlh...sh.sh-plpl+llLP.............EGAcslclps...................P.hslpptt....p.h.chTYLD..ohGRsslshptpNll....-pah..ppl..hVpYsa.......st.hhhLp..cPLh.lsu.shahlFhsshlhh.............................. 0 87 140 202 +785 PF01775 Ribosomal_L18ae Ribosomal L18ae/LX protein domain Bateman A anon PSI-BLAST Q02543 Domain This family includes eukaryotic L18ae as well as archaebacterial specific LX. Ribosomal protein L18ae forms part of the 60S ribosomal subunit. 22.90 22.90 23.50 22.90 22.50 22.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.37 0.71 -4.23 63 714 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 512 7 414 682 35 104.30 44 66.40 CHANGED l+pYpVhGchhss...........................................................................................................................p..................hsshcshhKEhRshsppsAl-phYs-hGu+H+s+tppIpIhcVp ............................................................................ppYpVlGRtlPo....tt.t..s.la+M+lFAsNplhAKS.....................................................................+FWYFlppL+KhKKusGEllthp..l..............................hE+pPhplKNa...G.I....W....l.R.Yc...SR.oG.p.HNMY+EYR-hohsuAVpphY..........p-MuuRHRsRhp..sIpIl+V.......................................... 0 140 231 342 +786 PF00828 Ribosomal_L18e Ribosomal protein L18e/L15 Bateman A, Finn RD anon Pfam-B_1295 (release 2.1) Family This family includes eukaryotic L18 as well as prokaryotic L15. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.86 0.71 -3.77 104 6291 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 5075 302 2015 4096 2465 118.70 35 75.86 CHANGED Ghhtppct+sp+ut+sp........ppc.ha.th.h....la+hlu+.+....sFpp.hh...hhsphphssl...........................Nls+L.p..thhtts.........ssc.hllshuhl.......ts.hKlLGtGtl..phs.lsVpAhphScsAcp+IcpAGGpshhh ........................................................................s...GKTuGR..GpKGQ+uR.uGs.......ts+h......GFEGGQhP....LaRRLP.K.h...........GFss.hh...........phs.V....................................NLscL..s...p........t..ps................ls.p..sLhtsullt.......................tshlKl.L...u.........s...G............c......l...s........p.t....ls.V.c........u........t.phS.cuAcpt.Ic.uAGGplp..h................................................................................... 0 703 1254 1677 +787 PF01907 Ribosomal_L37e Ribosomal protein L37e Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes ribosomal protein L37 from eukaryotes and archaebacteria. The family contains many conserved cysteines and histidines suggesting that this protein may bind to zinc. 24.50 24.50 24.70 25.00 23.00 24.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.92 0.72 -4.17 46 667 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 505 75 372 579 23 50.80 56 57.37 CHANGED sKGTsSFGK+pp+.oHhhCRRCG+pSaHlpKcpCAuCGassu+ph.pYs.WspKshc ...................sKGTsSFGKRpsK.oHsLCRR.C.GR+SaHlQKpsCuuCGYPuu+pR.cYN.Wu.KAh................ 0 138 223 305 +788 PF00347 Ribosomal_L6 L6; Ribosomal protein L6 Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.30 21.30 21.40 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.42 0.72 -3.48 132 10746 2009-01-15 18:05:59 2003-04-07 12:59:11 18 5 5013 462 3006 6705 3987 74.40 26 80.83 CHANGED lshG.hclphps.....plhhshG.pt....lphphsps.lplph......phshhhh.puhc+pp.........ltphRshhs.hs+Glp.G ..................................lssG.hc.s.phpG...........pllhshG.p.ts..........lphp.h.s.ps...lpl.....p.h.......................................p........h....s........hhh....h...p.........G...t...c...+.phs...................thtshRs..s.h.scGVp.......................... 0 982 1857 2473 +789 PF02482 Ribosomal_S30AE Ribosomal_S30; Sigma 54 modulation protein / S30EA ribosomal protein Mian N, Bateman A anon Pfam-B_869 (release 5.4) Family This Pfam family contains the sigma-54 modulation protein family and the S30AE family of ribosomal proteins which includes the light- repressed protein (lrtA) (Swiss:P47908) [2]. 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.14 0.72 -3.55 144 5238 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 4060 21 1172 2968 717 96.10 30 63.58 CHANGED plploG..+p.l-lo-ul+palpc+ls+.lp.+ah.s....ph...hp.sc..Vhlsht.........t....ptpsElol.hh..sGh..hlpAps.pspDhYuAIDhss-KL-+Ql++aKc+..hp.s+ ..................................hsIpG..+p.l-.lTcA.lRpaVppKl..s..K..LE..+ah..s...pl......hp..sc...VhLps..............pp.p.stp...sEs..Tl...h...sus..........hlp......A......p..........s......p.........s......p....D.......hY....u......AID....hlh-K....L-RQL....pKhKcKhpp........................................ 0 368 735 983 +790 PF01201 Ribosomal_S8e Ribosomal protein S8e Finn RD, Bateman A anon Prosite Family \N 21.30 21.30 21.60 21.80 20.80 21.20 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.67 0.71 -4.01 93 1149 2009-01-15 18:05:59 2003-04-07 12:59:11 17 11 591 7 655 1075 110 194.20 35 92.35 CHANGED Muhscs..cp+R+tTGG+hcht+K.KRKaEhGRs.....................................................ss.T+l........u....tcl+plR.........................sRGGNp.KhR..............................uLRhcpuNhsasppssoc+s+IlsVhhNsuNschlRpsslsKGsIlplsss............-LG...............................................................................hAploSRPGQ...............sGhssuhll .......................................Ms.sc.t.pc+R+tsGt.+hchhcK...KRKhEh.G.cs...................................................................sssTpl............up...ppl+phR...................................................................sRGGspKh+.................................................ALRhcpuNho..asptssTt+scIlsVhhNsSNsphsRpssL....sKusIlplsss............pLGhhttst...............................................................................h..hthhAploocPGQ...............sGpssuhlL.............................................................................................................................................................................. 1 231 377 536 +791 PF00652 Ricin_B_lectin Ricin-type beta-trefoil lectin domain Bateman A anon Prosite Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.45 0.71 -4.09 97 3906 2012-10-02 19:42:32 2003-04-07 12:59:11 17 272 770 168 1967 4763 232 120.80 19 22.78 CHANGED ssshlpths..sshCLDst....tttpsssltlhsCss......sss..QhW...phsts......splpstst..........CLssssss..............s.ltlhsCps.ssst......QtWphps........ssplhstp.ss..hCL-stt.......sssplhlh.pC....sss....ss.QpW ...................................................................h.....t.ht...sshC..l-st............ttsst......l.t...lh.....s.Cps..............sss.......Q...h.a............th..s.t..s....................t....p....l...p...t..t.s...t...........................C...Lssssts..................s......l...tl..h.s.C....s......s...sss................Qp..W.phps.....................ss..p..l..h...ph....t..ss........hCLsstt.................sstt.l.hl....t...C....sts......s..QtW............................................................................. 0 706 1030 1517 +792 PF00355 Rieske Rieske [2Fe-2S] domain Finn RD, Griffiths-Jones SR, Eberhardt R anon Prosite & Pfam-B_31 (release 4.1) Domain The rieske domain has a [2Fe-2S] centre. Two conserved cysteines coordinate one Fe ion, while the other Fe ion is coordinated by two conserved histidines. In hyperthermophilic archaea there is a SKTPCX(2-3)C motif at the C-terminus. The cysteines in this motif form a disulphide bridge, which stabilises the protein [4]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.08 0.72 -10.67 0.72 -4.42 146 13057 2012-10-02 12:49:59 2003-04-07 12:59:11 21 95 3458 318 4393 11916 6165 94.20 19 31.33 CHANGED sahhlsppsclsp....sshhhhph.spp.llht.stcGphhuhtshCsHtGsh................lsts....tp......t......hpCshHGhpashp....GplhpsPs................hpth.sthphs .................................................................h...tpl.t............t.h..h.h...h........h....s....t.....t.........l..h...h...h....s........t.........s.....u...p.........l..h..uh.ts.hCsH..t..G.s........................................................lst..s.....ttp..................tt.............th.hCP.hH...G.h....pash.....s......Gph....h.p....s.Ps..t............................tt..................................... 0 1183 2635 3671 +793 PF00866 Ring_hydroxyl_B Ring hydroxylating beta subunit Bateman A anon Pfam-B_771 (release 3.0) Domain This subunit has a similar structure to NTF-2 and scytalone dehydratase. 20.50 20.50 20.50 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.64 0.71 -4.51 44 1346 2012-10-03 02:27:23 2003-04-07 12:59:11 13 2 779 109 317 1106 162 141.90 28 83.48 CHANGED +EAcLLDs.....pcacpWhs.lhs-DlpYahPhppsc.ptsps............ptshhas-s+ttLcsRVtRlposhuWupsPsoRTpHhloNl.ltts.psssplcVpSsahlaRsRhc...tpschasGpppchLRpsssu.....hclspRpllLcpssl.sp ...........................................................pEApLLDc.....tcaccWLs.hhs--...h.pYhhPspss....ttphspc.th................hshla.p-s+stLccRlhRl.c.o.shua.u.p.p.PssRT.pHhl.oNlp...l...h....pt.....t.......p.....s...s....t...........hp.VRsNahlh.c.sRtc...........p.ps.h.a.s.Gpp.h.cplccss-s.........h+lhc+pllLcpshltt................................... 0 51 148 245 +794 PF04068 RLI Possible Fer4-like domain in RNase L inhibitor, RLI Kerrison ND, Finn RD anon COG2042 Family Possible metal-binding domain in endoribonuclease RNase L inhibitor. Found at the N-terminal end of RNase L inhibitor proteins, adjacent to the 4Fe-4S binding domain, fer4, Pfam:PF00037. Also often found adjacent to the DUF367 domain Pfam:PF04034 in uncharacterised proteins. The RNase L system plays a major role in the anti-viral and anti-proliferative activities of interferons [1], and could possibly play a more general role in the regulation of RNA stability in mammalian cells. Inhibitory activity requires concentration-dependent association of RLI with RNase L [2]. 20.60 20.60 21.20 21.20 20.10 20.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.63 0.72 -4.36 80 889 2012-10-03 08:56:42 2003-04-07 12:59:11 10 13 472 2 602 859 65 33.10 36 7.40 CHANGED h+lAllch-....cCc.PKKCst.cht+hssl......s+sGcp ..........+lAlhsh-................+Cc.PK+Cst.cht+hs.l......lRhGp............ 0 200 348 505 +795 PF04437 RINT1_TIP1 RINT-1 / TIP-1 family Bateman A, Wood V anon Bateman A, Wood V Family This family includes RINT-1, a Rad50 interacting protein which participates in radiation induced checkpoint control [1], as well as the TIP-1 protein from yeast that seems to be involved in a complex with Sec20p that is required for golgi transport [2]. 24.80 24.80 25.20 25.10 24.70 24.70 hmmbuild -o /dev/null HMM SEED 494 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.68 0.70 -5.95 19 318 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 250 4 239 330 2 430.90 22 62.61 CHANGED hsl-hhlpPlclRFpYHFoup+.Tspl-K.PEaahshlhchlss.ssFhspplQPlhDc........hthshhss+ppFIsull.hlpcKlsspl..................plppc.phhsHLlcEllsFDpcl+psasY.s...............h..slplLs.cpsha-+WlplEcchAls+hcthlps.cshphp.pt.............l.sshssh+sscsAtph.cLLpslh-RhpsLsshs.clpFLhslQlplhcpFhppLppth.th.h................tphpssssL.+hspllsuspYlpphlc-WuscVaFl......p.t...............phscluscsou..................lFD-shsshc+.lchchpshIssslhcshcsth+sYh+.hspWsohssp......stt.shssSu.-lssshphLpsplshLppt.Lshsshtplh+plhtslppalassllhts.pFSpuGusQhthDh.cpLhslhsh..........stpspthhp+LsEulhLLsLphsts...tphh.tt.tc........................spsspshLsEl..ulppLop.........s-spslLpRRs ...............................................................................hshphhhpPhthRFp...aHF.p.u.p.+...Tshhs......K.....P.....E.....aahs.lhphhpph.thhtphlpshhsp..........................t.....h.ss.h.pa..hpull....hlhpKltt..l.....................................................................p.h.tps..thhsHhlpphhtF-pp.lp.......ph....asYss.......................................................s.shplL.....ptp..h....h..ppWlphEpc....hAhp+hp.t.h.h.ps....ssh....p...p...........................t..s.ph+..ss.sA.phhpLLpslp..........c+.apsL.phs.clpFl.plQhtllcpah.pLhph................................................................tt.pt.sth.phstlhsuspaltphl...p-Wup..p.hhFl............................p..............................thtphss.ptss.....................................................................lF-phhs.hpp.htpph.p.lhp...tlhpphptth..+sYhp...ppW..sh.tt................................hs.Ss.phs.hlt..hLppplt.Lppt.Ls.hs..........hhth.hptlhptlsphlhp.pll....h......ts...pFst.........sGstQhthDh.ps..lhslhp....................ht...p..s...p...thh.pl.p.-uhh.lLs.h..ht.t..................t....t...................................................................................hpthhtph....t.l.t..ls..........tph..lL.hh.................................................................................................................................................................................................................................................................................................................... 0 83 134 198 +796 PF01163 RIO1 RIO1 family Finn RD, Bateman A, Wood V, Mistry J anon Prosite Family This is a family of atypical serine kinases which are found in archaea, bacteria and eukaryotes.\ Activity of Rio1 is vital in Saccharomyces cerevisiae for the processing of ribosomal RNA, as well as for proper cell cycle progression and chromosome maintenance. The structure of RIO1 has been determined [4]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.31 0.71 -4.95 28 1707 2012-10-02 22:05:25 2003-04-07 12:59:11 17 19 847 16 1056 4738 997 172.30 27 45.74 CHANGED sVYpuhs.....tsup.....................phAlKla+sutssF+c.hccYlss-hRap..h.+tsh+pllchWAcKEa+NLpRltptG.l.VPcPlshpcplLVM-alG.tpGhsAPpL+-sp.......tcscplatcllp......hcthYpcspLVHuDLSEYNlLlpcs.clhlIDhuQuVphs.HPpAhpaLcRDlpNltpFFc++uss....hhshcpl .............................................................................................lahs.s........pt.......................hshKla+.......h.......s.h............h.......t.......F+......p.......h.......cph.......h.......s....h..........p.......h.......+.....................h.p...p...hs.......h...h.h......c....h....h.s........p...+...E.hp...sL.t............p..........l.....t.....p...........s..................G.......l...............s......P...c.........P........l........t.........h..........p..........c......p........l........l......l...M......-...h....ls........................G..........h.....s.....h....s....p....L......p.-.s.p.......................tp.h....t...p........h...a...t..p.llp................hh...t...h.....h......t..ps.....s.......LlHu...D..L..S.E...a...N.lL..l...............p.............p............s.....p..............h.....h..lI...D..h...s.......Q..s...V..........ph.....s....p.......s.............p..............A..............t.........h........h..pR..Dlpslt..p.aFt+hh..........h.................................................................. 1 337 599 872 +797 PF01000 RNA_pol_A_bac RNA polymerase Rpb3/RpoA insert domain Finn RD anon Pfam-B_172 (release 3.0) Domain Members of this family include: alpha subunit from eubacteria alpha subunits from chloroplasts Rpb3 subunits from eukaryotes RpoD subunits from archaeal 21.30 21.30 21.40 21.70 21.10 20.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.56 0.71 -3.91 59 7300 2009-01-15 18:05:59 2003-04-07 12:59:11 21 25 6097 183 1610 4907 2300 120.20 33 38.59 CHANGED VpI..su.......ltHchuhlshlpEDlhc....hlh.hKth...............psp-ps..hlpLpspGsup..................VhAuDlph............sssVcll....................ss-hhIspLscstc.lchchpsccGhGhs.Ac....................ao .........................hph..............VhHEa..SolsGV..pE.D..V.hc....IlLNl.Ktls....h.......................psppcp......hlplshp.Gsu.................................VTAuDIp.h....................................................sucVEIl....................................................N..P-.hhIssL...s..c............s.s..p..lphclplp+GRGYlsAp..tppptt.....h........................................................................................................................... 0 566 1019 1357 +798 PF04997 RNA_pol_Rpb1_1 RNA polymerase Rpb1, domain 1 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 1, represents the clamp domain, which a mobile domain involved in positioning the DNA, maintenance of the transcription bubble and positioning of the nascent RNA strand [1,2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.06 0.70 -5.18 24 12239 2009-01-15 18:05:59 2003-04-07 12:59:11 7 127 9183 179 1981 10137 4485 254.70 33 31.57 CHANGED h+clcplpFulhSP-pI+.phShsclpps-ohp.sp...pPcpsGLhD.+lGs.........hcpch.............hCpoCthphtc...C.GHaGHI-LspPVaHIGahctlhplLcplChhCuplhhsppt......hhhp.t.......thsphp+hphlsphstppshpcts...............................................................thtp.pshhtc.G.hslhthhc........tcphpcpht......hps.pshplhc+Isccchhlhuhsspts.+P-hhILoslPVPPPslRPuV.h-utp...hu-DDLThpltcIlppNppL++hppp.uAPptllpcphphLQ.plsshhDN..thsuhs.st.pssRPlKSlspRLKGKpGRhRsNL ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p..phGaIcL.s...P.l..Hl.......h.ah..pt.l.p...ch....s..h.h..........s.h...........t...hp.........s.........-......p..............h..........h..h..h..ht...........................ss.h.p.p.h.ph......l.h....p......p.....h...h...p.....h...h...p.c...hs......................................................................................................................................................................p...............t....t......G...............t.l..h....t..h...hct......................t..pph....p...hp..................hp.s.tt..s...p....h....hp..+....h.........cc.............h.......h..........h.............u.......h...................t.........s..........h............s.........+..........P.........-.........W........M.l.........l.s.l.lPV.PPslR....P.........l..l..D....Gut......hu..p....sDL...s..hhtc......l....Ip.......t.......N....s...............p......l..............p..+...h........................p.........................s....u.........P.................t..............h................l............l..p..................p...........................cp.........h............L...Q..........t.Vss.h.hDN......s.h...t.................s.....s.............h.........................t......t.....+.sh.Kuht..ph.lcGKp.....G.........RhRtsL..................................................................................................................................................................................................................................................................................................................................... 2 707 1252 1678 +799 PF00623 RNA_pol_Rpb1_2 RNA polymerase Rpb1, domain 2 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 2, contains the active site. The invariant motif -NADFDGD- binds the active site magnesium ion [1,2]. 23.40 23.40 24.50 23.60 22.90 23.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.15 0.71 -4.29 33 13383 2009-01-15 18:05:59 2003-04-07 12:59:11 15 145 9364 137 1887 9901 3260 133.80 57 17.84 CHANGED GKRVDaSuRoVIss-PsLclcplGlPhphAhpLphPphlsphNhcclpphh.pt.p.a.th..h..p.pus+phlptt.........s.plh.................tlhR+lhcuDlVLhNRQPoLH+hSIhuHcs+llpt...+ohRLp.sVCssYNADFDGDEMNlHlPpo.pA+uEuhpLhhs .............................................GKRVDYSGRSVIlV.GPp.LpLaQCG..LP+c.hAlELFps.Fllp.tL....t.pp...lAs...................................................................................s....l........t..s..........A.......K.....ph......lccp................cs..hVW............................................................-lLpEVh.p..sHPVLL.....NRAPTLHRLGIQA..........FpPh....LlEG......+AIpLH.P.LVC....p.uaNAD...F...........D........G.DQ........MA.V.H.V.PLShEAQuEARhLMh.s............................................... 0 671 1188 1598 +800 PF04983 RNA_pol_Rpb1_3 RNA polymerase Rpb1, domain 3 Bateman A anon Pfam-B_288 (release 4.2) Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 3, represents the pore domain. The 3' end of RNA is positioned close to this domain. The pore delimited by this domain is thought to act as a channel through which nucleotides enter the active site and/or where the 3' end of the RNA may be extruded during back-tracking [1,2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.91 0.71 -4.50 112 9996 2009-01-15 18:05:59 2003-04-07 12:59:11 13 133 7655 137 1909 7790 3140 147.50 24 14.34 CHANGED pIloPpsGcPlhussQDhlhGsYhlTtcc.........................sFhspp-shphhtts............................................................................................................pssIhhs.pt....................................haou+phhuhll.t..............pl.hpph..............................s.hhlppu.lhts.lsct.hu........tpshusllphlhcchG.ptssphlsplpplshtahtptG.holGlsDh ..................................................................................................................NllsPtsGcPlhsssQDhl...lG.hYhhThcp.........................................................................................hFhshpcsh.hh..h.................................................................................................................................................................................h..pst..l.hh....t..............................................................................................................hhhhc..p..hh...uhh.l.s..........................pl...th.........................................................................................................h.hlpps...h.................................s...........................pshupllphh.hp..c...........h....Ghp..ssthhsplppl....s.htahhh...tG.holGlsDh........................................................................................................................................................................ 0 675 1204 1625 +801 PF05000 RNA_pol_Rpb1_4 RNA polymerase Rpb1, domain 4 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 4, represents the funnel domain. The funnel contain the binding site for some elongation factors [1,2]. 24.80 24.80 24.80 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.59 0.72 -4.31 106 8924 2009-01-15 18:05:59 2003-04-07 12:59:11 12 128 7522 137 1812 6872 2412 93.40 29 8.38 CHANGED lppscc.scl...ph.upshccshc.....thhpph.hs..pc............tp.................NslhhMstoGu+GShhNlsQlsuhhG..ssps.p.........p.h.............php.h.ptPhspGFh ........................................................h....cchpcl.....sh.spstcpshc......shhspl...s.....p.pp..................t..t....t.t.....................................NslhhMus.SGA.+GS.............sh.....plpQlsuhhG.hu.........h.s.t.......................................h.......................................................... 0 647 1141 1537 +802 PF04998 RNA_pol_Rpb1_5 RNA polymerase Rpb1, domain 5 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 5, represents the discontinuous cleft domain that is required to from the central cleft or channel where the DNA is bound [1,2]. 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null --hand HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.68 0.70 -5.14 27 9187 2009-01-15 18:05:59 2003-04-07 12:59:11 12 161 6820 151 2086 7646 5482 395.80 25 36.28 CHANGED GLsspEFFFHsMuGREGLlDTAlKTAcoGYlpR+LlKshEDlhlpYDsolRsustpllQhhYG-DulDshphcthshhhh.hhsh.hptphhhshtp.hhhttt.........................................................................................................................................hthh.csthtphplhp.hphsppthpphhshhpspappsllpsGEuVGhlAAQSIGEPuTQMT.LpTFHhAGsuupssT.GlPRl+Ellplspstppsshsshhhhshspcptphhthphttpphthtthhhtt.hhhs.s.tpshtpptt.hhhphhhh.pp.tpt.hh.......hh.......h.h......................................................................................................................................................h......t....t...p.hhtttttthhtpththpptppthshpthtthtphtth.hphhhhhtsspthhhthhhsptthsspshh.htstchllph.pGsslptlhp......hps...lpss+phoNp..lhphhchlGIEAuRpsllpElpplhttcGhtlshRHlsllsDhMThcGhlhul...oRtGlspp.pShh.h ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................GLsshEaFhpshGuRc.............G.L.s.DTAl+T.ApoG......YLpR..RLVcsh.p-lhlp.cs..s....s....p....p....p....s....h.........h.....h........h..h...G..t..-hhtsht.ch.thhhhp.hhts.ptthhh..tt.h..t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.h.ttthpthhlpsshsppsp.hhshhshspshhptphlp...GEA....V.GhlAA...QSIGEPGTQhT.h+TFH.....h....u..Gs..u...s...t....s....................h.s....s..........p.......hp.p.......hp.h......p.................h...t..............h..t....................................................p....t....t..t......h....h.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...h.h.....h......t.............t.....................t.......h.........................t.................h....h.....ht....p.....l.p.pV..ap.........pG..h..t....s+php.h..lp..ph..h..p.p..ht....l...t.s.t..p.o............h.h........h.p.....h...................h............s......ch...................h......t...................ht..hp.....lhu..l...T+.tu.ltpp..ohh..s......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 752 1323 1780 +803 PF04992 RNA_pol_Rpb1_6 RNA polymerase Rpb1, domain 6 Finn RD anon Pfam-B_288 (release 4.2) Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 6, represents a mobile module of the RNA polymerase. Domain 6 forms part of the shelf module [1,2]. This family appears to be specific to the largest subunit of RNA polymerase II. 21.10 21.10 21.90 22.40 20.60 19.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.11 0.71 -4.55 153 1575 2009-01-15 18:05:59 2003-04-07 12:59:11 9 78 1215 90 258 1468 151 157.90 39 18.62 CHANGED hDGshlE.pQpl-slphSsp...pF-............+ca+lDl......s.spshh...........tshlptsh..p-l.tussc...h........QphL-cEacQLppD.....................RchLR......plhssu-.sphs..LPlNlpRlI.NApphF+...I....cppps.SDLpPhcllpsVcpLhc.+.LllVp..............................G....cD............................tLS.........pEAQpNATLLFphhLRSpLAsKRVlp.Ea+LsppAF-WllGEIEsRFppu ............................hDuhhlE..QplshlphSsp...tF-...................+ca+hDh.............s.spphh.......................pch..hp.s......p-l..hsshp..s......................p.L-pEa-pLhpD.....................RchLR.........p.l..h..sp....u-...pph................LPhNltRhl.sApphFp....l.......ptp.t...osLp.Ph.........c.....Vl.s...VppL...c+..hhlVt.................................G.....pD.............................lS.........hpApp.NATlLFphhLRSpLs.Kcl.sp.Ea+Lsp.AF-allGElEsRFtpu................................................................. 0 98 149 223 +804 PF05001 RNA_pol_Rpb1_R RNA polymerase Rpb1 C-terminal repeat Finn RD anon Manual Repeat The repetitive C-terminal domain (CTD) of Rpb1 (RNA polymerase Pol II) plays a critical role in the regulation of gene expression. The activity of the CTD is dependent on its state of phosphorylation [1]. 20.30 6.20 20.50 6.20 20.20 6.10 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.46 0.74 -6.69 0.74 -3.36 99 4609 2009-01-15 18:05:59 2003-04-07 12:59:11 8 80 222 10 2717 4635 45 14.20 79 15.78 CHANGED SP..s.SP.....s..Y..SP...o.SPsa ............SP..T.SP.......u......Y..SP.....T..SPuY.............. 0 913 1434 2266 +805 PF04565 RNA_pol_Rpb2_3 RNA polymerase Rpb2, domain 3 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 3, s also known as the fork domain and is proximal to catalytic site [1]. 21.00 10.30 21.10 10.40 20.90 10.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -8.99 0.72 -4.22 31 16867 2009-09-13 23:49:08 2003-04-07 12:59:11 11 131 12522 133 1834 13597 2855 63.90 55 9.45 CHANGED Qhh-phN.loploHhR+lsh....Guls+cptshcsRclHsopaGhlCPl-TPE.GtssGLlssLuhhscls .........................Qhhsph.s.lSp.loHhR..Rls............uuL..s.R-p..t.thc....sRclHsTHa....GhlCPhETPE.G..shGL.lpsLuhhupls.................... 0 652 1156 1555 +806 PF04567 RNA_pol_Rpb2_5 RNA polymerase Rpb2, domain 5 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 5, is also known as the external 2 domain [1]. 20.40 18.00 20.40 19.00 20.30 17.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.28 0.72 -3.99 64 7512 2009-01-15 18:05:59 2003-04-07 12:59:11 12 60 5291 96 702 6482 353 64.00 49 13.61 CHANGED apsLlppG..llEYlDsEEEEsshIuhs..th.......................................stp.sppaTHhEIaP .............WcsLlcsG.....llEYlDAEEEETsMIsMTPEDL-...R..ptsh...............................................................ttt..p.hht.hsss..sHhaT..HCEIHP................................................................... 0 239 399 587 +807 PF00562 RNA_pol_Rpb2_6 RNA polymerase Rpb2, domain 6 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain represents the hybrid binding domain and the wall domain [1]. The hybrid binding domain binds the nascent RNA strand / template DNA strand in the Pol II transcription elongation complex. This domain contains the important structural motifs, switch 3 and the flap loop and binds an active site metal ion[1]. This domain is also involved in binding to Rpb1 and Rpb3 [1]. Many of the bacterial members contain large insertions within this domain, as region known as dispensable region 2 (DRII). 23.40 23.40 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.31 0.70 -5.72 117 16703 2012-10-01 19:23:01 2003-04-07 12:59:11 23 139 12411 138 1943 12966 6003 307.10 35 46.19 CHANGED slhAphhshspaspusRh.......................hhps...scps.t...................................................................hhshs.phpRssps.s...phsQ+P..lVpst.tlctsp..............hs-lshGpNslVAhhsasG.YNhEDulllscphlccshasSlahcp.....hps..........ccpc..hG..-chhpphsss...........scpshppL..Dcs....Gll.............................pl...................Gst......VpsGDlLlGKh.....s............hpthhsppspp.h+DsSlphtts..pp.GhV.cVtl.............................................................................................................................................................................................................................................p.sss.s.....hphl+VhltppRp.plGDKhuuRHGpKGlluhlhPpEDMPahp.cGh..ssDlllNPhGlPSRMsl...................GQllEshhGhAush...........................................................................pG.hhhssssFs...........................................................................................p..................ppl...............................tchLp.....................c.....t.....................................................................................Gpp..............hlasGpTGc.h.......cs.lhlGhhYh.KLpHhVs..DKhHAR.upGPhs..hl.....TpQPltG+up .............................................................................................................................................................................................................................................s.hhuph.shl...sp......us+h.......hhts.....ttt.t.....................................................................................................................h.h..h.a.p.RoNpss.....shsQ+P..hVp..hs........l...ctsp............hs..........t......hs.ELshG.....pNhlVAhhsa.....pG..YN.EDul.........lhspp.l.cshapShahcp...h.............................p.pp...........t....p.c...hh.t.c..p...............................ttthhppL.D.c.p......Glh...................................................Gs.......l......t.t..c..lllu+h.................................................t.h...t..p......h+-...sh...h.....s.......t..Ghl....tl.h.......................................................................................................................................................................................................................................................................................................t....s.......hl+lh..hh.p+h.p...GDKhuuRHGp.K...Ghlu.........hhh.....pDMP..a..........pG.........h....s..........s..DlhlNPhulPSRMsl...................uplhEh.hhuh..sut...........................................................................................................................................................................s......h..t..sss.Fst................................................................hp.........................................t.l.t.l.....................................................................................................................................................................u...hh.s..Gh.oGp...h............h..................................................................................................................................................................................................................... 0 704 1234 1655 +808 PF04560 RNA_pol_Rpb2_7 RNA polymerase Rpb2, domain 7 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Rpb2 is the second largest subunit of the RNA polymerase. This domain comprised of the structural domains anchor and clamp [1]. The clamp region (C-terminal) contains a zinc-binding motif [1]. The clamp region is named due to its interaction with the clamp domain found in Rpb1. The domain also contains a region termed "switch 4". The switches within the polymerase are thought to signal different stages of transcription [1]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.99 0.72 -3.72 134 7219 2009-01-15 18:05:59 2003-04-07 12:59:11 15 94 6073 138 1845 5823 2697 79.50 52 6.84 CHANGED GGQRFGEMEVWALEAaGAAasLpEhLTlKSD....DlsGRsphhps...................IscGcsh.css.lPESF+VLl+ELpSLulslclhtp ........GGQRFG.EME.......VWALEAYGAAa.sLQEhLT.l.K.SD......D.V....sGR..s+hYcs.........................................................................Ilc..G...c...s....h.....cs...G..hPESF+..VLl+ELpSLulslcl..p................................................................................................... 0 651 1163 1560 +809 PF03874 RNA_pol_Rpb4 RNA polymerase Rpb4 Finn RD, Bateman A anon Finn RD & COG1460 Family This family includes the Rpb4 protein. This family also includes C17 (aka CGRP-RCP) is an essential subunit of RNA polymerase III. C17 forms a subcomplex with C25 [5] which is likely to be the counterpart of subcomplex Rpb4/7 in Pol II [4]. 21.60 21.60 21.90 22.40 21.40 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.49 0.71 -4.09 88 790 2012-10-03 03:05:55 2003-04-07 12:59:11 11 11 467 61 545 755 50 113.00 23 70.54 CHANGED hLo.sEshplLp.phpppppt...................................................................phsplhp..cslc.Ylp..p......................huchppt-s.........................spplhptLp......php........L.pchEthplsslhPpshsElpsllsphcp....................................ch..........s-...-plpplLchlpchh ..............................................................................................................L...Esh.lLp.phpppppt................................................................................................ttptphs.phhh..colp.Ylp.p......................hu+hp.spEs....................................................................lppl.h.phLp..............php..............L.p..chEh..h......pl.......s...NlpP..po..spEhpsl...ls.ph.ct....................................+..h..........s.-.....-plpplLc.lpp..h...................................... 0 176 309 446 +810 PF01351 RNase_HII Ribonuclease HII Bateman A anon Bateman A Family \N 20.60 20.60 21.20 21.00 19.90 20.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.10 0.71 -4.67 14 6064 2012-10-03 01:22:09 2003-04-07 12:59:11 13 19 4789 32 1433 4292 2775 182.20 35 75.83 CHANGED hGlDEsGRGslhGPlVsAushls.cp.....h.thGlcDSKKLocp+RptLtchIp...................pthhuhtlsh..hpssphsthsltpsshhshhc.tlpph.slpsc..tlhlDu.ps.....Pt.hththps.lht.............uDuths.luAASllAKVpRD.hhh-hhpchsth.shspssGYsoc.+tptlhchssss.......htRhoFtss ......................................................sGlDEsGRGsLsGPVVs..AAV.....l......L........s....s..p..........................................h............G...l....s..DSK+.L..o.c.p.+.R.p..p.LhptIp....................................................................pp....sl...sh..sluh......spsp.c.I.....D..p.......l.....s..I..hp...A.s....h....hA...M....t..........c....A....l.......t.......t.......L........s......h......p......P..c.....................hl..L..l...Du.ph...........................st...h.....s...h......s...t...p...s...l....lc..................................................GDupsh.sIAAASIlAKVsRD..th.......M...h...............c.................h............s..........p.....p...............a..........P............t..........Y...............s...............a.............s...........p......ptGY.s.T.ptHh.pslt..chGsss..........h.HRpSFts................................................................................................................................................. 0 496 923 1217 +811 PF01138 RNase_PH 3' exoribonuclease family, domain 1 Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family includes 3'-5' exoribonucleases. Ribonuclease PH contains a single copy of this domain, and removes nucleotide residues following the -CCA terminus of tRNA. Polyribonucleotide nucleotidyltransferase (PNPase) contains two tandem copies of the domain. PNPase is involved in mRNA degradation in a 3'-5' direction. The exosome is a 3'-5' exoribonuclease complex that is required for 3' processing of the 5.8S rRNA. Three of its five protein components, Swiss:P46948 Swiss:Q12277 and Swiss:P25359 contain a copy of this domain [1]. Swiss:Q10205, a hypothetical protein from S. pombe appears to belong to an uncharacterised subfamily. This subfamily is found in both eukaryotes and archaebacteria. 21.00 21.00 21.10 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.70 0.71 -3.84 192 13598 2012-10-03 01:04:38 2003-04-07 12:59:11 16 86 4722 247 4120 10174 6384 132.80 29 40.48 CHANGED phRslplch..G...hhppAs...............GSshlphG..................s..TpVlsslpsst...t..............................tt..s..............lslphpht...shussph.......................pp..st....ssccph..........hup....l.......l............ccslcssh..........h...........phh.......hplplshpllss................DG............................................shhssul....suushALhsuslP ........................................................................................hRslslcs..G......hh.t.p..u.c.................GSs.l.h...phG......................-....Tp..l.L.s..s.so..hsp...ts.........................................................................hh.....-.hs........................t...ltscYph.........sausGch...................................................tpt...Gp......................suc+ph..................thuR.......L......l......................................................cR.u.....l..+..slhs....................p..................................................................tth.....hsl.p.ls....s...pl.lp..u.....sG....................................................us.p..h..A.u.l....sG.uolALhsuslP............................................................................................................................. 0 1408 2573 3461 +812 PF03725 RNase_PH_C 3' exoribonuclease family, domain 2 Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family includes 3'-5' exoribonucleases. Ribonuclease PH contains a single copy of this domain, and removes nucleotide residues following the -CCA terminus of tRNA. Polyribonucleotide nucleotidyltransferase (PNPase) contains two tandem copies of the domain. PNPase is involved in mRNA degradation in a 3'-5' direction. The exosome is a 3'-5' exoribonuclease complex that is required for 3' processing of the 5.8S rRNA. Three of its five protein components, Swiss:P46948 Swiss:Q12277 and Swiss:P25359 contain a copy of this domain [1]. Swiss:Q10205, a hypothetical protein from S. pombe appears to belong to an uncharacterised subfamily. This subfamily is found in both eukaryotes and archaebacteria. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.01 0.72 -4.04 107 12290 2009-01-15 18:05:59 2003-04-07 12:59:11 10 60 4627 243 3309 8916 5491 69.10 29 20.56 CHANGED shssuloluhl..s............sp.......hllDPsh.pE-.................htpu...slslsh........ssst..........ph.slhpt........ss....st....lspcplhcslchutps .................................................sslAuluhGl.l...s.....................................sp.............hlh..Dhph...pE-..............................hu.ts..........Dh-hhV.........AGop................cs.h...sshtt................................stth.......tloc-.hhpALthA+p.................................................. 0 1115 2087 2755 +813 PF02755 RPEL RPEL repeat Ponting CP anon Ponting CP Family The RPEL repeat is named after four conserved amino acids it contains. The function of the RPEL repeat is unknown however it might be a DNA binding repeat based on the observation that Swiss:Q9VZY2 contains a Pfam:PF02037 domain that is also implicated in DNA binding. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.67 0.72 -6.75 0.72 -4.50 54 1765 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 175 8 892 1495 0 25.80 41 10.30 CHANGED cpLp++lspRPsh-ELlc+sILppps ........pLp+KLupRPshcELhc+sIL.tp..... 0 145 243 514 +814 PF04059 RRM_2 rrm_2; RNA recognition motif 2 Wood V, Finn RD anon Pfam-B_4981 (release 7.3); Family \N 21.00 21.00 21.00 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.47 0.72 -3.98 4 387 2012-10-02 20:46:34 2003-04-07 12:59:11 7 16 140 0 313 408 9 92.10 44 16.81 CHANGED RTTLMIKNIPNKYTppMLlutIDE+sKGTYDFLYLPIDFKNKCNVGYAFINhlpPppIlsFhcAFNGKpW-KFNSEKVAoLuYAcIQGKsALIu+FQ ...........................RTTlMl+N..IPNKaop.p.hLhshl..D.c.........p.....p..............p............G........p.......YDF..h.YLP.ID........F............p..........N.c......C.....NlGYA..FINhhsstthh............tFhp......sFp..........sp+W........p.t..F.s.........S.......c..........K..lsplsYA+lQGp.psLlt+Fp................................ 1 154 233 283 +815 PF01137 RTC RCT; RNA 3'-terminal phosphate cyclase Finn RD, Bateman A anon Prosite Domain RNA cyclases are a family of RNA-modifying enzymes that are conserved in all cellular organisms. They catalyse the ATP-dependent conversion of the 3'-phosphate to the 2',3'-cyclic phosphodiester at the end of RNA, in a reaction involving formation of the covalent AMP-cyclase intermediate [1]. The structure of RTC demonstrates that RTCs are comprised two domain. The larger domain contains an insert domain of approximately 100 amino acids [1]. 20.60 20.60 20.70 20.80 19.90 20.40 hmmbuild -o /dev/null --hand HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.31 0.70 -5.57 29 1194 2012-10-02 15:27:11 2003-04-07 12:59:11 16 17 881 19 569 985 32 309.70 31 89.51 CHANGED lDGShtEGGGQlLRouluLSsloGcPl+I.hsIRAsRsp.PGLppQHLsulcslpclssAplpGhplGSppLhFpP...........uplcGG.shphDl......GTAGSlsLlLQslLPhhhFucpssclplpGGTcsthuPslDal+pVhLslLc+hGhpsc...lcll+RGaYPcGGGcVhhplp........Psp.hs.lphhchupltplpGhuhssplssphupcptcssttthsphh.tshhhttt........................ssusshhhshpppsshhuusulGc+GhsAEtVGccAAppLlcplpsuusVDcahuDpllhahALuss...phpsuclos..HhhTsltllcpF.hshca.clct .......................................................................pGs.tcGGGQllRpuLsLShloGpPhpI.ppIR........u..s..............R...s...............p...P..............GL.............h..............ppHlosl+hhscl.........s.s.....u..s..l..........u.....s.............phuup.plhFpP....................................Gt.l.p..G......G....php.....ash................so.A..u.ShsLlLpsl.LshhhF..........u................c...................s.......s.................p.....lpl..p.G......G.T....s..s..s.....uP....s.....hDalcpl.hhP.l....L....t.+.....h...G.l...p.tp.................hp.l...h++...G....a............h...P.....t....G....G....G.Vthpls....s.st.t.h.p.slpLh-pGplhphpG.shhssls.phspRthtshts.hthh..shh.hspp................................................................................................................................................................................................tssssss.hthpspshttth.shGpptsssEslutpsspplhc.ltpsusVsc.ahtDplll.MAL........u....s........s..............................u........chp...l........u.......p.........o..........HhhosltllcpF.hsspFt...h........................................................................................................................................................................................................ 0 191 322 470 +816 PF05189 RTC_insert RNA 3'-terminal phosphate cyclase (RTC), insert domain Finn RD anon manual Domain RNA cyclases are a family of RNA-modifying enzymes that are conserved in all cellular organisms. They catalyse the ATP-dependent conversion of the 3'-phosphate to the 2',3'-cyclic phosphodiester at the end of RNA, in a reaction involving formation of the covalent AMP-cyclase intermediate [1]. The structure of RTC demonstrates that RTCs are comprised two domain. The larger domain contains an insert domain of approximately 100 amino acids [1]. 20.60 20.60 20.80 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.14 0.72 -4.01 99 1028 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 852 19 465 818 15 100.70 29 28.85 CHANGED h-pGplp..c....lpGlu..asspl.ssplApR.hcuAcp.hL.....th.h.-lplps..................tpsuh.usGsGlsLhAcspp..shhuusul..................GpcGhsAEcVGccAAppLlcpl.psu .......................h.-pGplhp..hRG.s............h..s..s.s.V..shpl..AcR.lssstuhh............sh.l.t-hpIps............................hspsp..u..PGsuloLhsE.....ops............h.hhthpsl...................................................Gcctss..AEsVutpsucplhc.ltp............................. 0 153 268 382 +817 PF00301 Rubredoxin rubredoxin; Rubredoxin Finn RD anon Prosite Domain \N 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.78 0.72 -4.09 43 2943 2012-10-03 19:45:42 2003-04-07 12:59:11 15 40 1967 86 844 2104 307 46.10 48 23.38 CHANGED +ahCpl..CGYlYDsspGDstpslsPGT.Fc-LP--WsCP.C..Gss.KcpF ........pa.Cpl..C.G.alYD..s..stG-P..........p...p......s..l..s.PG.T.......a...p....-l.P.-.c.WhCP.C..usu.KssF........ 0 288 568 732 +818 PF02759 RUN RUN domain Bateman A anon [1] Family This domain is present in several proteins that are linked to the functions of GTPases in the Rap and Rab families. They could hence play important roles in multiple Ras-like GTPase signalling pathways. The domain is comprises six conserved regions, which in some proteins have considerable insertions between them. The domain core is thought to take up a predominantly alpha fold, with basic amino acids in regions A and D possibly playing a functional role in interactions with Ras GTPases [1]. 20.70 20.70 21.00 21.20 20.40 20.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.78 0.71 -4.40 50 1686 2009-01-15 18:05:59 2003-04-07 12:59:11 14 59 103 6 970 1562 5 138.00 21 18.38 CHANGED sLssslEtlLpHGL+t...........................................................................................................p..thhhtpppshashhpph.................tph.hsssppl...hpplp.plpplpss..........................................................pu+......................t+AWl+hAL.c+hLspalphLhpspp...hl.............s........paYpstAllhsstts......hlhslLsuLss.lsFslshps ..........................................................................................................................................................................................hssslEthltHGL+t...................................................................................................................................................................................................p....hhhh..t.p.pp..shas..hh.pth.................................................tph...hsp.tp.ph............hpp.l......p....sl...pp..l..p..ss....................................................................................................................................................................................hu+.......................................t+Aalp..hu...L...........c..+h....Lspa.l....ph.Lhp..spt.....hh..................................p.........c.aY.c.t.t.Ah....l..h...sppts..........lhshL.hsL.ss.lpashsh.............................................................................................................................................................. 1 257 332 591 +819 PF00853 Runt Runt domain Bateman A anon [1] Domain \N 20.50 20.50 21.10 26.40 19.70 19.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.74 0.71 -4.71 6 488 2012-10-03 00:25:27 2003-04-07 12:59:11 14 7 118 24 206 469 0 119.90 74 31.17 CHANGED ERols-hlsEaPGELV+TuSPsFlCSsLPuHWRSNKTLPlAFKVlALGEVsDGThVTI+AGNDENaCuELRNsTAVMKNQVAKFNDLRFVGRSGRGKSFTLTIoIuTsPPQlATYs+AIKVTVDGPREPRp+ppp ............................Rshs-hlu-Hsu.ELVRTsSPsFLCSlLPoHWRsNKTLPl.AF.K.........VVAL..G..-......VPDGT...l............VTVhAGNDENYsAELRNAoAlMKNQVARFNDLRFVG....RSGR.....GK.S.FTL.TITVh.TsPP.Q.VATYpRAIKVTVDGPREPRp+pp......................... 0 47 63 141 +820 PF00665 rve Integrase core domain Bateman A anon Pfam-B_10 (release 2.1) Domain Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site [1]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.26 0.71 -4.00 230 47519 2012-10-03 01:22:09 2003-04-07 12:59:11 21 846 4253 244 8847 47451 3435 111.40 27 21.34 CHANGED psppshphhphDhs...hhp..s.t................................talhshlDshS+hhhshhhppc........ssptshphhpthhtthtsh...............hhlpoDpGspassp...............phpphhpphG..lphphspstsPpssuhsEphpppl+pph ...........................................................................................s..tts.thap.hDhs......hhp.......t......................................th...lh.sh.l.....c...s..h.S...t.h...l....u.......s..l.s.sc...................................sp.p...s..s..h..t.h...L.......c.............l...t...s.p.h.s...............................................sphl.+oD.s.G..s.p.Fsot.........................................th.p.t.h.....t...h.h..t.G......l...p..p......p.h....u........h.s.......h.......s.......P.p...s......p...G.h...sEsh..sppLKp..h............................................................... 0 3423 5434 6849 +821 PF00077 RVP rvp; Retroviral aspartyl protease Eddy SR anon Eddy SR Domain Single domain aspartyl proteases from retroviruses, retrotransposons, and badnaviruses (plant dsDNA viruses). These proteases are generally part of a larger polyprotein; usually pol, more rarely gag. Retroviral proteases appear to be homologous to a single domain of the two-domain eukaryotic aspartyl proteases such as pepsins, cathepsins, and renins (Pfam:PF00026). 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.29 0.72 -4.09 50 124336 2012-10-02 15:32:34 2003-04-07 12:59:11 15 164 892 1164 420 112085 121 94.30 86 28.21 CHANGED sh.pcPhlplplsGp.............hpsLLDTGADcollpptphshp......hpsphltGlGGth.pscphpphhlplttcphps.....shllhP..sPls......llGRslLsphsspLs ...............................................T.LWQRP.LVT.IKIGGQ...............LK.E.ALLDTGA.DD.TV.L.....E....E...h......N...L....P....G....+.....................W.K...P....K........M.I....G....G....I.......G...GFI.......K..V....R....Q.......Y.......D....QI........I.E.....I...CG.HKAIG.............TVLV.G..P...TPVN..............I.IGR.NLLTQIGCTLN..................................................................... 0 190 263 349 +822 PF00078 RVT_1 rvt; RVT; Reverse transcriptase (RNA-dependent DNA polymerase) Eddy SR anon Published_alignment and HMM_iterative_training Family A reverse transcriptase gene is usually indicative of a mobile element such as a retrotransposon or retrovirus. Reverse transcriptases occur in a variety of mobile elements, including retrotransposons, retroviruses, group II introns, bacterial msDNAs, hepadnaviruses, and caulimoviruses. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.44 0.70 -4.98 152 172360 2012-10-02 12:54:00 2003-04-07 12:59:11 22 1060 4989 405 8257 157991 1195 171.90 64 42.06 CHANGED l.Kps...........uphR................ph+hlschhtp.............................hp.sphuh.shtsh.................htps.phhhplDlpsuFpplshs.hhp.hhpshshst..........................................................ttphphpslPQGhhhSPhlaphhhpplhp.lpp.........................................................................thhhhtYsDDlllhsps.tpp..........hpphhptltphlpp...hGlpls.cKsphh.......................................tpphcaLGhpl ........................................................................................................................................................................................IKKKD...........STKWRK..L.........................V.D..F.R.E..L.N..K.R.T.Q.D.........................................................................F...W.E...V...Q..L.G..I..PHP.A.GL....................................................................K.KK....KSV...T..V...L.D......V..G..D..A...Y........F.....S........V.....P.....L....D...c.....-....F.........R....K..Y....T...A...........F....T........I...P.........S......h....................................................................................................................................................................................................................................................N....NE.....T....PG....I.......R...YQY.N.....V....LP...Q..G...............W..K...G...S...P...A..I.F....Q....u..S....M.T..K.....I....L..E....P..FR+pN...............................................................................................................................................................................................................................P-I...V..I..Y...Q.YM...DD..L....Y.....V...G....S..DL...EIGQ...................HR.s.K....I..E....E..L.R.p......HL....L+.....W......G....F..o.....T.....P...D.....K....K..HQKE..................................................PPFlWMGYEL.............................................................................................................................................. 0 3604 5373 6662 +823 PF03501 S10_plectin Plectin/S10 domain Barker W, Wu C, Bateman A anon Pfam-B_2138 (release 7.0) Domain This presumed domain is found at the N-terminus of some isoforms of the cytoskeletal muscle protein plectin as well as the ribosomal S10 protein. This domain may be involved in RNA binding. 22.20 22.20 22.20 23.00 21.90 22.10 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.25 0.72 -4.13 39 605 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 384 4 306 562 4 93.60 56 26.87 CHANGED lPKpsRptIYcaLFcEGVlVAKKDhphs.pHs-........l...slsNLpVl+shpSLcSRGaV+EpFuWpaaYahLTsEGIcYLRcaLaLPs-.lVPuTh++pspst ...........hPKpsRhtIYch.LF.+.E.GVhVAKKDhphs...KHP-..........l...slsNLpVlKAhQSL+S+G.Y.VKEpFuWpHaYWaLTNEGI-YLRpYLHLPsE.IVPATL++pt+............... 0 106 165 242 +824 PF01479 S4 S4 domain Bateman A anon Medline:99193178 Domain The S4 domain is a small domain consisting of 60-65 amino acid residues that was detected in the bacterial ribosomal protein S4, eukaryotic ribosomal S9, two families of pseudouridine synthases, a novel family of predicted RNA methylases, a yeast protein containing a pseudouridine synthetase and a deaminase domain, bacterial tyrosyl-tRNA synthetases, and a number of uncharacterized, small proteins that may be involved in translation regulation [1]. The S4 domain probably mediates binding to RNA. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.81 0.72 -4.53 155 36010 2012-10-01 23:15:27 2003-04-07 12:59:11 20 39 10093 225 6709 24712 7637 47.10 28 18.58 CHANGED hRLDphl..hchshspopppAcplIppGcVtVNGchls.suhtlp..ss-hl ..................RLDphl.......hch...u..h..s..s....oRspAc.pllpp..G..c.l.t..V..NG....c....h.lp..suhplp..stDhl...................... 0 2224 4271 5635 +825 PF04382 SAB SAB domain Bateman A anon Bateman A Domain This presumed domain is found in proteins containing FERM domains Pfam:PF00373. This domain is found to bind to both spectrin and actin, hence the name SAB (Spectrin and Actin Binding) domain. 20.90 20.90 20.90 20.90 20.70 19.40 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.27 0.72 -4.27 6 436 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 42 0 117 282 0 48.50 54 6.12 CHANGED Khc-LDKoQD-llKHQASISELKRoFhEo..sspsRssEWE..KRLST.SPhR ..................hc-LDKsQ--lhKHQASISELKRsFhEo..sspsRssEWE...KRLoT.SPhp....... 0 4 12 42 +826 PF03399 SAC3_GANP SAC3/GANP/Nin1/mts3/eIF-3 p25 family Mifsud W, Moxon SJ, Waterfield DI, Finn RD, Bateman A anon Pfam-B_2845 (release 6.6) & Pfam-B_4388 (release 7.5) Family This large family includes diverse proteins involved in large complexes. The alignment contains one highly conserved negatively charged residue and one highly conserved positively charged residue that are probably important for the function of these proteins. The family includes the yeast nuclear export factor Sac3 Swiss:P46674, and mammalian GANP/MCM3-associated proteins, which facilitate the nuclear localisation of MCM3, a protein that associates with chromatin in the G1 phase of the cell-cycle. The 26S protease (or 26S proteasome) is responsible for degrading ubiquitin conjugates. It consists of 19S regulatory complexes associated with the ends of 20S proteasomes. The 19S regulatory complex is composed of about 20 different polypeptides and confers ATP-dependence and substrate specificity to the 26S enzyme. The conserved region occurs at the C-terminal of the Nin1-like regulatory subunit [4,5,6]. This family includes several eukaryotic translation initiation factor 3 subunit 11 (eIF-3 p25) proteins. Eukaryotic initiation factor 3 (eIF3) is a multisubunit complex that is required for binding of mRNA to 40 S ribosomal subunits, stabilisation of ternary complex binding to 40 S subunits, and dissociation of 40 and 60 S subunits [7]. 26.10 26.10 26.20 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.39 0.71 -4.76 105 1100 2012-10-04 14:01:11 2003-04-07 12:59:11 11 24 325 2 789 1485 13 199.20 22 28.32 CHANGED ppssPppl+shcllhhphphlhtph....................salhsph+uh+pD..lpl...........slplhEhtshhtl.p..............tc...........ltpasp.sht...pLh.hYtp.............................................p.pps-hhuhhLL.hhL.ps.....................s-hppplph.L.............................................tt.h.psthlphslplpphltpus.........................Yp+Faplh......p.................pss.hhstlhc.h.ahsplRhpshpsls+uYpp...........lslphlpphLsh ..............................................................................................................spplR.....s..hL.tshphlht.h..............................t..a.tal.h-ph+ulRpDlshQt...h.s........slplhEtts+htl.p...................t-....................................hpphs.p...s.p.........pL..pt.hYpp..h......................................................................psppsEFtuY...h...lL.h.h.l....p.s.......................s-ht.p..p.ltt.l..............................................stp..hpps..........tlp....h....Alp......lppu.ht..pus.............................at..+aaclh...........p..........................................................................pss.....h....hshlhc..h...ahsph...R..tpulpshp.+uYp.................lslp.ltphLh............................................................................. 0 272 441 657 +827 PF03435 Saccharop_dh Saccharopine dehydrogenase Finn RD anon Pfam-B_4166 (release 6.6) & Pfam-B_6325 (Release 7.5) Family This family comprised of three structural domains that can not be separated in the linear sequence. In some organisms this enzyme is found as a bifunctional polypeptide with lysine ketoglutarate reductase. The saccharopine dehydrogenase can also function as a saccharopine reductase. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.07 0.70 -5.49 82 3742 2012-10-10 17:06:42 2003-04-07 12:59:11 13 32 2288 17 1319 4625 2619 321.10 19 76.28 CHANGED llllGs.GslGpsshthltcphch........plslsspstpphpphhtt.........tshphpshslsssshps.Lssh.lpps...chllslussh..tsh.slhchChcpGstYlD....Tuh.....................hppthhphcpptt.....tuGsTslsssGhsPGlsshhstpulpclt.............................sphtphhulc.hasutt............s.tassoWSscGhlpE.hps........t...sElshtspppph...shhhtsGsshhhah.pcsushspshshhst.....shhhs...slphssahshhp..htshshhc.ssh.hh..................................................................hss.csltulhc.hssht.phpsh.hplh.hhc....Ghtc.Ghhhhtphpps..hspths.hppsppthshssusshtlsutllssshhs....ptGllps-ph.hp.sh...hhshlt...Gl.hs ..................................................................llllGu..G.hlu.p..h.s.s...p.h.....lsp.psph....................cl.s.l.A.....u..R...s.....t...p.+.h..ptl.ht.................................tth.p.h.p...s...h.t..l.....D....s....s....s.....s.....p....s....l...t.....th...lppt...........clV...l...s..ss......s.....Pa.............hsh...slh.cA..C..l..p..s.G.s....c.YlD.........ssh............................................................................h.ph.h..t..h....c..cp.hc......p.uGh.s..hl.us.G....h..D..P........G.hs...sh...hs.t...hhhp.....phh................................................................t..........l..t.....h.......s...uth........................ht.h.sa..s..s.t....h....h.t.........................h.......t....t....t..........t.h..........h..t............h.......................h...........h.h.........p.t....h...........................h..h...........h.........t.h.t.hht...........hht.................................................................................................................................phh..thh............................t............h..h.....h.......................h......s.................................................h.h.s.s....h....hh.......................................th.................................t......................................................................................................................................................................................................................................................... 0 431 806 1113 +828 PF00536 SAM_1 SAM_1; SAM domain (Sterile alpha motif) Bateman A anon [1],[2] Domain It has been suggested that SAM is an evolutionarily conserved protein binding domain that is involved in the regulation of numerous developmental processes in diverse eukaryotes. The SAM domain can potentially function as a protein interaction module through its ability to homo- and heterooligomerise with other SAM domains. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.98 0.72 -3.80 65 6213 2012-10-02 20:42:54 2003-04-07 12:59:11 25 406 310 71 3366 7526 101 63.10 23 9.07 CHANGED shhshpsVs-WLcu...l.th....spYtcpF.psshhshctlhplot-D..Lhp.lGlshhGHp++Ihpulptl+ .........................h...t.tpVspW.Lp.s...........l...sh.........sp.Yt..p.tF..p........p.....p......h......h...s.......s..c......t.....l..h.....p...l.o..p..p-.................Lp...p..lu.....l.p.t.....Gcpt+lhtulpth............................. 1 782 1136 2102 +829 PF02198 SAM_PNT Sterile alpha motif (SAM)/Pointed domain SMART anon Alignment kindly provided by SMART Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.87 0.72 -4.22 15 1002 2012-10-02 20:42:54 2003-04-07 12:59:11 11 15 115 23 513 1149 0 82.10 32 19.96 CHANGED hsuappppp+ltlst.....cPphWocscVhpWLpWuhcE..FsLsslshspF.pMsG+pLCsLs+E-FhptsP...hsGDlLapHLphLpcps .....................................phtppphhlst......-P....ph....WoppcVtpWL.p.Ws.hcE....as..L.t..s.l.s..h..p..p..F...shsGctLCtho+--Fhph.s..P.......hsG..-..l..L..appLphlppt.s............................ 0 109 148 306 +830 PF01342 SAND SAND domain Christensen J, Bateman A anon [1] Family The DNA binding activity of two proteins has been mapped to the SAND domain. The conserved KDWK motif is necessary for DNA binding, and it appears to be important for dimerisation [2]. This region is also found in the putative transcription factor RegA from the multicellular green alga Volvox cateri. This region of RegA is known as the VARL domain [3]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.68 0.72 -4.27 27 491 2009-09-11 01:11:04 2003-04-07 12:59:11 16 30 96 4 249 486 2 78.20 33 13.91 CHANGED scs.sh....sstlPVsCGpspGhLhhc+h.ptGhpt+CI............................phc...spahTPpEFphhu.G+upuKcWKpuIRh......sGhsL+pLh-cthLs .........................t....sh...p..hPlsCGps.puhLhhp+h..ps..GhpsKCI............................php......spahoPpEFEt.hu.G+.ussKsWKpuIRh.....................sGhsL..p.plhcps.L..................... 0 51 98 148 +831 PF02037 SAP SAP domain Bateman A anon [1] Family The SAP (after SAF-A/B, Acinus and PIAS) motif is a putative DNA/RNA binding domain found in diverse nuclear and cytoplasmic proteins. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.04 0.72 -7.33 0.72 -4.37 182 2887 2012-10-03 03:04:30 2003-04-07 12:59:11 22 173 629 14 1622 2724 591 34.30 34 5.81 CHANGED hsphpVs-L+phLcppuLsssGp..KspLlpRLpphh ...........pphpVs-L+ptLcp+uL.s..ss..Gp..KspLlcRLpph............. 0 558 853 1248 +832 PF05184 SapB_1 Saposin-like type B, region 1 Finn RD anon Manual Domain \N 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.81 0.72 -4.05 161 1396 2009-01-15 18:05:59 2003-04-07 12:59:11 10 105 174 31 654 1402 4 38.80 27 19.67 CHANGED sshCshCphhlphlpphl.p.sspopppIhphlcp.hC.shlP ..s.shCphCchlVshlpphL.c.sspTcpcIhphlcc.hC.shLP..... 0 247 355 514 +833 PF03489 SapB_2 Surfactant_B; Saposin-like type B, region 2 Finn RD anon Manual Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.44 0.72 -3.89 103 1536 2009-01-15 18:05:59 2003-04-07 12:59:11 12 112 171 39 716 1502 4 34.80 28 17.26 CHANGED sppCpplVppYtshllphlhpphs..PpplCp.tlslC ..........ppCcphVspYtshllphlhpp.....hs...PpplCs.tlslC...... 0 242 367 546 +834 PF04499 SAPS SIT4 phosphatase-associated protein Mifsud W anon Pfam-B_2011 (release 7.5) Family This family includes a conserved region from a group of yeast proteins that associate with the SIT4 phosphatase. This association is required for SIT4's role in G1 cyclin transcription and for bud formation. This family also includes homologous regions from other eukaryotes. 24.90 24.90 24.90 24.90 24.50 24.80 hmmbuild -o /dev/null HMM SEED 475 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.59 0.70 -5.85 51 896 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 267 0 509 895 0 267.70 20 48.43 CHANGED .ppLlspKspphlsF.l+ppcslVcphLpHI-ssslMDhLL+lIS...s-+....s-sspGll-hLpp.....QcLIs+LlshL...........................................s.schsts.hQosAuDhLKAlIolSu.Nss.p...pssIGP.................NpLoRpLsStphlcpLl.shML..............................................................................................................ps.usuLssuVuIlIElI......RKN............NSDYD...................................hshtspsP.os+D...............................................PlYLGp........lL+hFup+lscFhpLlhps..................ptt.lposhGs.hcPLGFcRFKlsELlAELLHCSNMsLhNp.t..thht.RD..R.t...t...ht...tp........t.t....p.......................t.........................................................................................................................................t........t.t..............t...t............................................................................................................................pPs.................lGDhlKlpLh-spllssILchFF+aPWNNFLHNVVaDllQQlhNGshc....................................huaNphLshcLF..................................cpsplsptIlcutcpspch.ppp..............thRhGYMGHLTLIAEEVVKFsphhss..phlo....hlhc..tlpspcWptalppsLs-.TRc.p.ss ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h............................................................................................................................................................................... 0 148 248 376 +835 PF04000 Sas10_Utp3 Sas10/Utp3/C1D family Bateman A, Wood V anon Pfam-B_6555 (release 7.3) Family This family contains Utp3 and LCP5 which are components of the U3 ribonucleoprotein complex [2][4]. It also includes the human C1D protein and Saccharomyces cerevisiae YHR081W (rrp47), an exosome-associated protein required for the 3' processing of stable RNAs [3], and Sas10 which has been identified as a regulator of chromatin silencing [1]. This family also includes the human protein Neuroguidin an initiation factor 4E (eIF4E) binding protein [6]. 20.50 20.50 20.60 20.60 20.10 19.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.78 0.72 -3.79 122 865 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 313 0 604 854 4 85.50 20 23.39 CHANGED hlpslspslsplpspl..ptlhpth..................httths.................lhphKhphhhuYhtslsha........hhL+hp.shsspp............................HslhpcLhcl+phhc+ ..........................................................................................lpphppplsplpstl...pslhpts...............................ttthss.......................h.hcsKhpllluYhhslsaa..........hhL+sp..uhssps............................HPlh.p.cLhclRphhc+........................................... 1 186 316 487 +836 PF01547 SBP_bac_1 SBP_bacterial_1; Bacterial extracellular solute-binding protein Bateman A, Griffiths-Jones SR anon Pfam-B_269 (release 4.0) Family This family also includes the bacterial extracellular solute-binding protein family POTD/POTF. 20.50 15.00 20.50 15.00 20.40 14.90 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.87 0.70 -4.70 129 9759 2012-10-03 15:33:52 2003-04-07 12:59:11 20 37 2534 267 2818 28939 7759 292.70 15 65.96 CHANGED ssshpp...shpthh.pp....a....pc..pss..lcVp..hp.s...sssshtp.....h.hthtsus...ss.Dlhhs........h.....hhpt..shhtslsshhtst.hh.h...........hh...h.............................shha.........ppphht............................sWs.-hhphstphttpstsh........................hhh.h.tthhhhh..h....s............................................shthh.thh.......thtshtshstshstshshht...........pGpsshhhsh.ht..........h......................thhtsttpst.phshh.....................sshs.hslspsupp....tct....AppFl........pahhss........................pspt ..........................................................................................................................................................................................................................tttt..........htphh..pt.............F.........pc....p..s.s......lc.lp..........hp.hh..........s....s...s..htp..................cl..p.s.th..s.u.Gs....................sP...D.lhhh...........sssths.............phspp...........Gh.l..h..s...l...s...s....h...h..sp.........t.....t........................................................................h......t...h.hp.....pht.........................tpha..ul....P............h..........t....s......s....t...........s........l..a..Y................................s.+c.lhc.................................chG.lp.................................................Pp...Ta......-......-....h....h...p....s....s.....c.....p....l....p....p...ts.hss......................................hhh...t.t...t...h.....t.....t..t....h....h.....h...h.............h...h...t...s.hG.........s....t.h..h..s.t.pht.....................................................sh.sss.thhp..............shph....htp..h..h........................................t.........t.....h....h......s..............s....s.....t....s......h......s...h...s....p....s....h....s..h.ht.......................................sGc..s..u...h...h..h.......s..s...h.....h........................................................................................................................h.t...t..t..t.t..t..........t...t....p....h..s..h.....h..sh...P.t....h.t.sst.t...........................hss.s..h..s..l....s....p....s...o..c..p...............ct............Ah.c.al................pahhs.t.............................................................................................................................................................................................................. 0 1094 1940 2361 +837 PF00497 SBP_bac_3 Bacterial extracellular solute-binding proteins, family 3 Finn RD anon Prosite Domain \N 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null --hand HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.47 0.70 -5.10 483 31454 2012-10-03 15:33:52 2003-04-07 12:59:11 15 351 4322 113 7607 27422 5780 223.60 19 63.80 CHANGED lpl.usss.shs.Pasahct............sGphsGa-lD....lhpt....luc.ch.Gh....c.hchh......h...sasshl.s.uLpsG.c.h.Dh.lhuuhsh......Ts-R..p..c.p...........ls.F.op.P..Yhts.stsllsp..p.....s.....................................................................................................................ph...p...t.........-L...pG....................+..p.....lu.ltpG.os....tpth...htp.......h................thpl....................h.....h.sshsps.ht........s.LpsG...clDAhls-tsshthhhpppsttthhh......................ttshtspthuhshpcssspLhptlspultpl.ppsGphp.....c.l.hp...Ka.hst ....................................................................................................................................................................................................................................................lhlu.h.ps..s..h.s..P.a...p...h...hst.................ss..p...h..h...G.h..-.lD................ls....p.t........l....sc....ph...sh.....................c..hp...hh....................h..sa.s....s...hl.....s....s.L.....p.....s........s.....c........h......D...h....l.....h......u.....s......h....o....h................T....s....c...R.......p...c...p....................................hs....F....os......s......Y....h......p......s......s.....h.......t......l.l.spp...s....................................................................................................................................................................................................................................................................................................t..sl...p.....sh.t............-..L........pG...........................c....p.......l.u.....s..t...p...G...os.................tp.p.h........lpp...........h.........................shp.l........................................................h.....p...h....s...s....h...s...ps...hp...................s....L....p....s...G......+...l...D.....u.h......h...s.....D....p....s....s....h....t...t....h...h..pp.p.s.ththhh................................ttthtsps....h...u...h......s......h.....p.......+.....s...........s.......s.......p.......L....h...p....t...l...sp...sl.t.p.....h...p.....p.....s.....G......p......h..p......p...l...p+a.h..t.......................................................................................................................................................................................................................................................................................................................... 0 1937 3962 5901 +838 PF04144 SCAMP SCAMP family Bateman A anon Pfam-B_1298 (release 7.3) Family In vertebrates, secretory carrier membrane proteins (SCAMPs) 1-3 constitute a family of putative membrane-trafficking proteins composed of cytoplasmic N-terminal sequences with NPF repeats, four central transmembrane regions (TMRs), and a cytoplasmic tail. SCAMPs probably function in endocytosis by recruiting EH-domain proteins to the N-terminal NPF repeats but may have additional functions mediated by their other sequences [1]. 23.20 23.20 23.30 24.00 23.10 22.60 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.13 0.71 -4.39 39 634 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 162 0 330 557 0 161.50 37 57.61 CHANGED cpsNWP.Ph.t.h.htPhhYpDIssEIPs.caQ+hsphhahlWhhh............shsLhhNlluslshahts..ss...ussFuLullahllhsPsualsWYRPlY+AaR......................oDSuFsFhhFFhhahhplshslhtulGhPsh......GhsGaIsulshhps.shs..lulhhhlsshhFslpuslulhhlp+VaphaRtoG .......................psNWP.Ph............htPhhap.DhssEIP....ch..Q..+hsphhYhhWhhh..........................slsLhhNll.usl.uha.hts.............su.....sssF......sLulla....h......l........lh..s.Psoal.sWYRPlY+AFR.......................................oDSuhpFhhFFh...hahhphshplltAlG.hs.....sh........................G.h..sGhlsu.l...sh..............hsp.....shs.....lulhh.h.lsshhFshtul.hu.......hhhlp....cVathaRtsG..................................................... 0 89 145 226 +839 PF02023 SCAN SCAN domain Bateman A anon Pfam-B_1614 (Release 5.0) Family The SCAN domain [1] (named after SRE-ZBP, CTfin51, AW-1 and Number 18 cDNA) is found in several Pfam:PF00096 proteins. The domain has been shown to be able to mediate homo- and hetero-oligomerisation [2]. 20.20 20.20 20.30 20.40 20.00 20.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.10 0.72 -4.51 62 2273 2009-01-15 18:05:59 2003-04-07 12:59:11 12 322 50 7 1114 1819 0 93.00 48 19.04 CHANGED .ssEshRp+FRpFpYp-...ssGP+EALupLpELCppWLRP.EhHoKEQILELLVLEQFLoILPtElQsWVppppPcSuEEAVsLlE-Lpcphpcs.t..pts ....................st.EshRp+FRpF.pYpE.....ssGP+E.ALup.....LpELCppW.LR..P..Eh..+.TK.EQIL.E.LLV.....LEQFL..oILPtElQsWVpp.pp..Pc.SuEEAVsLlEcLppphpp.....h............................... 0 119 159 371 +840 PF02404 SCF Stem cell factor Mian N, Bateman A anon Pfam-B_2598 (release 5.4) Family Stem cell factor (SCF) is a homodimer involved in hematopoiesis. SCF binds to and activates the SCF receptor (SCFR), a receptor tyrosine kinase. The crystal structure of human SCF has been resolved and a potential receptor-binding site identified [1]. 20.00 20.00 20.50 20.90 19.70 19.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.77 0.70 -5.20 4 137 2012-10-02 01:28:15 2003-04-07 12:59:11 10 3 70 16 36 126 3 171.70 49 93.19 CHANGED MKKsQTWIITChhLQLLLhNPLVKTQu.CtNPVTDDVpDIsKLVuNLPNDYhITLKYVPtMDsLPsHCWL+.MVschShSLpsLLpKFSsIS-h...LSNYSIIspLs+IlsDLhACht.cpsKs.lKEsu+h.EpcpFhPEpFFplFNRoI-saK-.Fhsu.DpsDClh.Sos.TPEpDSRVuVTKsh.hPPVAASSLRNDS......SsSN+cAhs.IpsSSLQhhulALsuLlSLlIGFhhGAlYWKKppP.ShscosEsIQhp..pE-NEISMLQQKE+Ea.pV .........................h....hhh.......tt..hN.lTsslpclshLhtNlPpDYhIslpYlst...lsshCWl...V.php.SLpsLhpKFsphSp......NhsIh.ph...h...............h.t..t.....t.h.stpaFthh..p.....hpt...s...s.cpu-Clh...sos.sPEp-scs.shopsh.hs..h.................................................................................................................................................................................................................................. 0 2 5 15 +841 PF00188 CAP SCP; Cysteine-rich secretory protein family Finn RD. Yeats C anon Yeats C Domain This is a large family of cysteine-rich secretory proteins, antigen 5, and pathogenesis-related 1 proteins (CAP) that are found in a wide range of organisms, including prokaryotes [2] and non-vertebrate eukaryotes [3], The nine subfamilies of the mammalian CAP 'super'family include: the human glioma pathogenesis-related 1 (GLIPR1), Golgi associated pathogenesis related-1 (GAPR1) proteins, peptidase inhibitor 15 (PI15), peptidase inhibitor 16 (PI16), cysteine-rich secretory proteins (CRISPs), CRISP LCCL domain containing 1 (CRISPLD1), CRISP LCCL domain containing 2 (CRISPLD2), mannose receptor like and the R3H domain containing like proteins. Members are most often secreted and have an extracellular endocrine or paracrine function and are involved in processes including the regulation of extracellular matrix and branching morphogenesis, potentially as either proteases or protease inhibitors; in ion channel regulation in fertility; as tumour suppressor or pro-oncogenic genes in tissues including the prostate; and in cell-cell adhesion during fertilisation. The overall protein structural conservation within the CAP 'super'family results in fundamentally similar functions for the CAP domain in all members, yet the diversity outside of this core region dramatically alters the target specificity and, thus, the biological consequences [4]. The Ca++-chelating function [3] would fit with the various signalling processes (e.g. the CRISP proteins) that members of this family are involved in, and also the sequence and structural evidence of a conserved pocket containing two histidines and a glutamate. It also may explain how Swiss:Q91055 blocks the Ca++ transporting ryanodine receptors. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.57 0.71 -3.63 98 6534 2009-01-15 18:05:59 2003-04-07 12:59:11 21 194 2233 38 2946 5991 336 123.50 18 43.36 CHANGED lshhNphR.....t......................stshhs.Ls...asss.LsphApppupphsp...........................tt.htpshspshthhthsst......t....................spshhtttpphptt..............htshh........sss..........sHhpplltssssplGsuhspssstt...............hhhlsta ......................................................................................................................................phhNthR....................................................stpt...hs.lp..........assp.LsphAp..t..a.u.p....ph.st............................................ps..hsp...s...t....s...p..s....h.t.....h.htt.hpt.....hsh..h........................................spshh.s.pt.pshshs....................................h.p.s.hh................ss.s....................GHh..s...p..ll.............s......s......h....s..plGsu....h....shsspt....................hhhsp.a...................................................... 0 1119 1733 2458 +842 PF03803 Scramblase Scramblase Finn RD anon Pfam-B_3893 (release 7.0) Family Scramblase is palmitoylated and contains a potential protein kinase C phosphorylation site. Scramblase exhibits Ca2+-activated phospholipid scrambling activity in vitro. There are also possible SH3 and WW binding motifs. Scramblase is involved in the redistribution of phospholipids after cell activation or injury [1]. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.44 0.70 -5.26 10 783 2012-10-02 20:44:47 2003-04-07 12:59:11 10 15 285 0 512 771 22 198.70 28 69.98 CHANGED Mousht.hsssPsuL-hLsphDsllVpQplEhlElhTGFETsNRYsl+sstGpplhhshEc.....SsshsRQhhGscRPFshclhDshGpEVhplpRPFpCss...p.....hssshppt-l....ps.sGssIGhVtQpWchhcspaclhsucp.psshplpuPssshs.sssspsF.V+oh-s.pllGpIsRpWsGlhREhFTDADsasl+FPh...........................DLslchKAVlluusFLIDFsYFEc ...............................................................................................................thp.hL.t.h.stlhlp.pp..h.E....hhp....................h....h.....h.....s..h..E..psN+Ytl.........h.......s.........s.............G.......p........t...l...h.h.....s.....hEc...................sshhsRp.h.h..t.....s.t.R......s...F......p..h..cl...............h......D.....s........h...........s....p...........c.........l......h.p..hp.R....P....hp..h.s......p....................................h.s.h..hpph.pl...................ps.s.s.Gph.......l..G..hlt...Q.....p..a....p.............h.....h................p..a....s.l.s......tpt...pp.............h........h............p...............l.......................G..........P..........h........h..h.........h..........s.....s...............h....ts....h.............s....F............pl.....hs...h.........s......t..p.............l......GpI.s....+....p....W.s..G.......h...h...p..-.h......h....T-..ss....pas..lp.F.sh...................................................................................................................sL..s.l..c.....+AlhluushhIDh.aFp............................................ 0 214 284 400 +843 PF01390 SEA SEA domain Ponting C anon [1] Family Domain found in Sea urchin sperm protein, Enterokinase, Agrin (SEA). Proposed function of regulating or binding carbohydrate side chains. Recently a proteolytic activity has been shown for a SEA domain [4]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -10.06 0.72 -4.36 94 1758 2009-01-15 18:05:59 2003-04-07 12:59:11 15 244 103 4 1017 1679 0 104.30 16 14.60 CHANGED sstthhphshpl.sshp.....asschpsssStpapslspplpptl...pphapps......ttahpsplhsh.........................pp........Go...llsphhlha.ptssststtshtptltpthpp....tthhslthssp ................................h.....hthsh.pl.sshp........aspchtsssStpa...ps...h...ppp...lp.phl....pp.hapss.........stat..ssplhsh................................ps.....................uu.....lhVphhhha..p.s.s.s.t.s...h..tt..h.p..h..p...t................h....................................................................... 0 253 329 554 +844 PF04091 Sec15 Exocyst complex subunit Sec15-like Wood V, Finn RD anon Pfam-B_7871 (release 7.3); Family \N 22.40 22.40 22.40 22.50 21.80 22.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -5.10 31 418 2012-10-03 17:31:52 2003-04-07 12:59:11 7 15 255 1 266 408 1 281.90 32 39.47 CHANGED -Y-p.Vlphpahp........sptts.s...FPhhhPFSphhP.sCh.l+..........palsphhtFhsphhp.p.spls-.hl++shDpLLsch....lscslhphlpss....shpQlsQIllNLpaaEhAspplpphlsptptssps.ss...sph....tLpupcpapss++tAEpplhphlsoKID-hl.-hs-YD.Whs.spsss.-........sstalp-lspaLcshhsSshhsLPtplpphlhhcuhcHloppllsl.lLsssl+plsspultshshDlpaLEpFsspl......................................p......sssLppsFsEL+QplsLlhossh-.-a..........h-sshRt++YsRl.cspsuhhlL-Khp ....................................................Ych.lht...hp...........p.ph...pp.s.......FPthhPFSphhPthhh..p.l+.....................pFl.thhpFsps..hhh....p.splc-.hl+KuhshLLocp....lspsL.phlppt.....hsLspllQIllNhsahEpACp.L-phlsphp.sh.tp.ss..........tth..............................pLhut.ptF.+.ss...+.c.sAEppIh...phl.spKID-hl..ph...s-Y.D.Whs...scs..ss...c.................sSsYl...-lltaLcshh.ss.hptLP.t..........cltptshhsAhpHlusplh.....................ph.l..L.ss.....-.............l+plshsAltphslDlh.h..EtFsss.............................................p.......tspLp.shh-LRQhlsLhhs....shp.pa..........h-..u..p..p..+Y.+l.ss.pshhlLEKh................................... 0 77 136 208 +845 PF04815 Sec23_helical Sec23/Sec24 helical domain Bateman A anon Pfam-B_3055 (release 7.3) Domain COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is composed of five alpha helices. 27.80 27.80 28.00 27.80 27.60 27.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.03 0.72 -4.59 121 1420 2009-01-15 18:05:59 2003-04-07 12:59:11 10 28 326 24 936 1370 10 102.20 26 11.83 CHANGED Q-Ahsshhu+pulp+shs....ss.....hp-........s+chLsppllclhspY+..phhtssssu.............lhLsp..shchhPhahhsLhKsthl.ps...tssssDcRsahhphlhshslpphlhhl .....................................-Ahsslhu+h..Al.t+s.s.........ss......h.t-........................spc.hLsppllclhspat.......cht...tssssu.............................hhLsp....shplhPh...ahhsLh+o.phl.ps......h.......s.......s.......ssD-puahhphltp.slsp.hhhl....................................... 0 285 492 755 +846 PF04811 Sec23_trunk Sec23/Sec24 trunk domain Bateman A anon Bateman A Domain COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is known as the trunk domain and has an alpha/beta vWA fold and forms the dimer interface. 21.20 21.20 21.20 22.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.23 0.70 -5.01 22 1546 2012-10-10 16:07:06 2003-04-07 12:59:11 10 35 327 24 1024 1513 23 236.50 26 28.15 CHANGED P.PslFlFllDlohs...su.LpslppulhpsL.shLP........spshVGhITasshl+hapls.sh.......ptsp.hsssclp-hh.......................hPhs.sphLlslpcschslpsLLcplsp.hasss..+csppshGsALpsAhtlLpss...sGG+lhlFtuu.sohGs.uhlpsc..cp...shhshcK-ttphhppsstaYcsLApcssspGhslDlFhhs.s.sslAplpslsphTGGplhhhsuFs.......sshFppshpRhh ............................................................................................................PshalFllDs.s..hp..........ps.lp..........slpps.lhts.L.....shLP.....................................sps.....hlGhlTa.s....ph..l.......ph.a...p..lttsh........................................................ptsp..h....s...s..s.p...l...p.-.h.h.............................................................................hPhs...sp.h.L..l..s.l..p..c..s..c.......................................l.ps....l.Lp...p...L.................p....ha..s..s..s.............................p........c..s........t........p..s......hG...sA.......LpsAhtl.lp..............................sG.G..+lh.....l............Ft..uu..............soh.Gs......s...h..lpsc..cp........................shhsh.s..+..-....................t.................t......h..h.......p..p..s.s...........t...aYc.pl.Atcssp...............p...............sh.......slD.lF.....hh.....s..........p...........ss..........l..................u..p..h................ts..............l.sphTGGphhhhssFp.............s..phhtpshp+................................................................ 0 340 560 837 +847 PF04136 Sec34 Sec34-like family Wood V, Finn RD anon Pfam-B_16464 (release 7.3); Family Sec34 and Sec35 form a sub-complex, in a seven protein complex that includes Dor1 (Pfam:PF04124). This complex is thought to be important for tether vesicles to the Golgi [1]. 25.00 25.00 25.10 25.70 24.80 24.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.88 0.71 -4.53 3 293 2012-10-03 12:17:00 2003-04-07 12:59:11 10 9 255 0 211 309 1 151.10 29 19.38 CHANGED cscch+cYLQsFpp...pCDpILsQlNuAhp+LtSLp-cY-FVSpKTSsLsEACEQLlcEQpRLsELA-sIQ++LoYFupLEpLNp+LpSPTLSVA.S-uF+EpLsKLD-CIsYIEENPcFKDuPtYLlKYKQCLSKAhcLhKsYslslIpQsT-QlLKcc ................................................h....h.p.Lpth...pp...ph-...t...l..Lsp.....sssslptLpsLpppaptVsscTsshpctC-pLLp-Q...pcLtclu-pIpppLpYFspL-slsp+L.s........u.........P..s...h..........u....V..s...scs.F..h.s.h.Lp+LD-Clsalp..s..H..........P..p......aK-ussYhh+a+phLo+AlpLl+sahsssLpph.spth....t..................... 0 71 115 174 +848 PF02889 Sec63 Sec63 Brl domain Bateman A anon Ponting CP Family This domain (also known as the Brl domain) is required for assembly of functional endoplasmic reticulum translocons [2]. 33.60 33.60 33.60 34.00 32.60 33.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.13 0.70 -5.21 133 1940 2009-09-17 06:06:13 2003-04-07 12:59:11 11 53 348 4 1383 1943 35 302.80 21 32.16 CHANGED sppLGcI....ASpYY..........lpapohphasp.....lps..pt.s.tc...........ll......pllotusEFc.....pl.lRppEcppLpcLh...pp.....hs....h.h.................p.tshpssps.....KsslLLQAalSR......h.plp...hsLhs................Dhthlhpsus.RlhcAhh-l.h.....hpcs........ahpsshtslpLsphlpp...............phW.s..pp........sPL.+Qhs.........................thsppl....lc....plcp...ps.h..ohpclh....ch........ssp-ls.......pllp...........p.upplhphlp...pa.Pplcl.psplpshscs..l.............................lplplplpsph.hp...................................th.st............t-saalhlt..D.scspplhthcphtl.....pp.............................tt.h....................lphslPhs.ss.......ph.hlhllSDp.alss.-pphslsh .................................................................................hthGhl....suhYY..............lpapT.h...pha.p..........lps......ph...s.tc..................................llpllu.tus.EFc..............................p.l....l....R...p..p.E..c.....l.p.pLh.......pp......l.......h.h.....................................................ptph.p.s.s.ps........KsplL.LQ..........AaloR........h..pls.........hsLh..s..............................Dtt.hl...hp.puh.Rll..........p...Ahh-l..s..........................hpps..................ahpss..htshpLsphltp...................thW..s....pp..................ssL...pQls...................................................................................p.h.s.t.p..h..........lp..........phpp......................ts...h.......sltclh........ch..................psp.cht......................pllp..s...............p.hpplhphhp.......ph..P.p.lpl..ph..p...h.p..s.h.spp...h..............................................lplplplp.ph....pp.......................................................................................................................................................hh.tt..............t.csaa..l.hlt.............-.sps..ppl......h.t....h....chh.h............p........................................tth..............................................................................................................................hph....hP....t....................ph..hlhhhsDp.ahuh.-t.h.h.................................................................................................................................................................................................... 0 481 779 1155 +849 PF04048 Sec8_exocyst Sec8 exocyst complex component specific domain Wood V, Finn RD anon Pfam-B_9576 (release 7.3); Domain \N 25.50 25.50 25.50 26.20 25.30 25.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.55 0.71 -4.55 22 297 2012-10-03 17:31:52 2003-04-07 12:59:11 9 8 250 0 211 305 0 126.60 26 13.09 CHANGED pLppllsplphpWsthhpcsssPlplALphhD-oSlGhu+cht-FpphpcphppsLppVVs-HapsFNsuIuoYpplhsslpsSppcltplKptLppupptl.pscpspLpcLspsShcapchIplLstIc-lpplPs+l-ph .......................................p..........................................D..p..s...s...s.ptpchtc....hpcthcphppsLcplVspHapshssuItoappIpppls................sSpp+lcpl...KpsLtpsKthL..ps........++.............s.............-L+cLhhcuhp......accllplLcpIE....plpplPp+lEt............................... 0 73 119 178 +850 PF00856 SET SET domain Bateman A, Huang S anon [1] Family SET domains are protein lysine methyltransferase enzymes. SET domains appear to be protein-protein interaction domains. It has been demonstrated that SET domains mediate interactions with a family of proteins that display similarity with dual-specificity phosphatases (dsPTPases) [2]. A subset of SET domains have been called PR domains. These domains are divergent in sequence from other SET domains, but also appear to mediate protein-protein interaction [3]. The SET domain consists of two regions known as SET-N and SET-C. SET-C forms an unusual and conserved knot-like structure of probably functional importance. Additionally to SET-N and SET-C, an insert region (SET-I) and flanking regions of high structural variability form part of the overall structure [5]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null --hand HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.85 0.71 -3.93 267 8450 2009-01-15 18:05:59 2003-04-07 12:59:11 23 382 879 153 5532 8290 1539 133.10 21 17.69 CHANGED GhGlhApcsItpGp..hlh....phtupl..lstppspppttt.............................................................................................................................................................................................................................................tthh................................................................................................................hhhthtpththsstthsshucalNH...............SC.....................Nsthphh.................................tttplhlhAh+sIpsGEElshsYs ...............................................................................................................................................................................................................................................................................................................................................................hGlh.s.h...p......l...t...sp....hlh..........hs...h......ht.t.h.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................t.............p....t......h......h........h.....c......u....t........t....h....s.....s....h...u....c.......a..l....N..H.........................SC....pP............................N.st.hphh................................................................................tthpl.hl..h..A....h.....+...s.........I.....p..s.GE.ElshsY....................................................................................................................................................................... 0 1814 2953 4471 +851 PF03749 SfsA Sugar fermentation stimulation protein Bateman A anon COG1489 Family This family contains Sugar fermentation stimulation proteins. Which is probably a regulatory factor involved in maltose metabolism. SfsA has been shown to bind DNA [2] and it contains a helix-turn-helix motif that probably binds DNA at its C-terminus. 25.00 25.00 29.70 26.60 23.80 24.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.26 0.70 -4.88 13 1762 2012-10-11 20:44:43 2003-04-07 12:59:11 8 9 1701 0 410 1213 1569 211.30 41 87.65 CHANGED l+RhpRFlu-lpl-.GchhpsHhsNTGphptlhpsGspValp+u-sspRKhsashthspts..sphVslsTthsNcLstcAl..sttlspLs..astlctEVcaGppp...lDFLLspst..thaVEVKusTLscsslA.....hFPDAsTsRGpKHL+ELtpls+cG.aRullLFllh+sshcsFpPspclDPcauchlpcAhpsG.Vcllsapsphshp..lchsphlt ..................................lpRYKRFL..ADV....h.h.......s......s.......G...............c.....t...lThHCsNTGuMpGsh.p..PG.ss..Va.hS...p...S..-...s....s.K.....R...Khs.aohEls....ps......p...................G....t.....hlslN..Tt.hsNpLscEAlt...s..............t..p..............I..s........p........Ls..uY..s.......pl+pEV+..Y..G.....p...p+.......RID..F.h.....L.............p.....s...s.........s...............p......c....s....Y.lEVK......u......VT..L........t...c.....p.....t..h.u..................hFP........DA........lTpRGpKHL+ELh...shst..p........G.........p............RAlll..FsV.+ss.lpp.FsPuc.clDspY...AphLpcAt.ppG.VElLAYpsclospt.htltp............................... 0 139 263 348 +852 PF05002 SGS SGS domain Finn RD anon Manual Domain This domain was thought to be unique to the SGT1-like proteins [1], but is also found in calcyclin binding proteins. 21.90 21.90 24.50 22.70 21.20 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.97 0.72 -4.23 48 564 2009-01-15 18:05:59 2003-04-07 12:59:11 10 29 330 3 345 531 2 76.50 36 24.80 CHANGED spsWspLs..............h...pscc--cssp.t....................lsshF+plYpsuD-Ds+RAMhKSahESsGTsLoTsWt-lttt.h.....spsPcGhEhKca .................................................t.psWDpLst...................hpc....p.p.pc-.ct-s..ssu................................LsphF+clYpcuD--h+RAMsKSahESsGTsLSTsWp-Vtptphp.....t..sPp.................... 0 113 182 275 +853 PF03983 SHD1 SLA1 homology domain 1, SHD1 Finn RD, Wood V anon Pfam-B_ (release 7.2) Domain NPFXD peptides specifically interact with the SHD1 domain. NPFXD is a clathrin-facilitated endocytic targeting signal. NPFXD was originally discovered in the cytoplasmic domain of the furin-like protease Kex2p [1]. Sla1 is thought to function as an endocytic adaptor [1]. 21.20 21.20 21.70 21.90 21.00 21.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.21 0.72 -4.32 18 166 2009-01-15 18:05:59 2003-04-07 12:59:11 7 29 137 1 116 180 15 68.70 47 6.66 CHANGED ossKScP-PpKlRoWoDRSGoFKVEApFLGhtDGKIHLHKsNGVKIAVsssKMSh-DLEYVE+lTGpSL- ........t..tcshPsst+sRpWoDRoGoFpVEApFlG.l.p.-GKl+LHK.h.NG.VKI.AVPlsKhSh-DL-YVE+lTG.SL-.............. 0 44 72 103 +854 PF04925 SHQ1 SHQ1 protein Wood V, Bateman A anon Pfam-B_11411 (release 7.6) Family S. cerevisiae SHQ1 protein is required for SnoRNAs of the box H/ACA Quantitative accumulation (unpublished). 20.80 20.80 21.00 21.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.08 0.71 -4.85 27 208 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 176 5 136 213 1 179.00 31 36.24 CHANGED Lp...h.ppptp...............lpFocEEpcpht.plsp+paL.ls..p....tplhhsllslLaAYsYD.pps-G-pssE.SuWTIuKLoPplSaLDs.....................hssl+ssllsshRRuLsYPLaRsasLsp+sap-shhhLpu..GKphll+sLLcl+clF.p.hp-hhYlhsclalsDhhsWl..pss.s-phlpsLApclcpth..lscpp.......lp ...................................................................hpFoc-.Epp.ht.plsp+paL..ls..............pt..tppplhhsLl-lLhAYsY-hR.sspG-.pssE...SuWols.KLSssluah-p.......................hsslccslluhhRRsLsY.P.L..aRp...a...pLshpsh.pDshplLph...G..Kphll+sLL-l+clF..p..pp-........stYlhN...clalsDhshWl..pps.ppphlttLupplcph...lpKtt..s................... 1 53 77 115 +855 PF01549 ShK DUF18;ShTK; ShK domain-like Bashton M, Bateman A anon Pfam-B_662 (release 4.0) Domain This domain of is found in several C. elegans proteins. The domain is 30 amino acids long and rich in cysteine residues. There are 6 conserved cysteine positions in the domain that form three disulphide bridges. The domain is found in the potassium channel inhibitor ShK in sea anemone [1][2]. 20.90 4.80 20.90 4.80 20.80 4.70 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.66 0.72 -3.59 157 2278 2012-10-02 17:51:16 2003-04-07 12:59:11 19 119 113 2 2009 2028 124 36.40 29 22.98 CHANGED sCh....D.p......ssCsthssh....Cp...ss.....shhppp.CspTCs.hC ..................................Ch.......Dtt.......ssCst.h...sp.........Cp..............ss.............shh......p......p......p...CspoCs.hC............... 0 920 1083 1978 +856 PF04542 Sigma70_r2 sigma70_r2; Sigma-70 region 2 Finn RD anon manual Domain Region 2 of sigma-70 is the most conserved region of the entire protein. All members of this class of sigma-factor contain region 2. The high conservation is due to region 2 containing both the -10 promoter recognition helix and the primary core RNA polymerase binding determinant. The core binding helix, interacts with the clamp domain of the largest polymerase subunit, beta prime [1,2]. The aromatic residues of the recognition helix, found at the C-terminus of this domain are though to mediate strand separation, thereby allowing transcription initiation [1,2]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.24 0.72 -4.37 259 39743 2012-10-04 14:01:11 2003-04-07 12:59:11 9 171 5036 49 11330 31962 8852 69.50 22 26.33 CHANGED lhpph...hthl...hphutch....hsss....h.ss-D..LlQ-uhlthhcu.hcpac.ppu.p..hpsahhthhc....pthhc.....tlRc.pptt .............................................hpth.hphl.....hp.h.u.t.+h.............hsss...............h..p...h..c..D....ll.Q-.shltl....h....c.......u....h.........c.......p.....a........c...............p.....p.....u................p........hpsah...hthhc....sthhc......tl+c.pt................................... 0 4421 8178 10016 +857 PF04545 Sigma70_r4 sigma70_r4; Sigma-70, region 4 Finn RD anon Manual Domain Region 4 of sigma-70 like sigma-factors are involved in binding to the -35 promoter element via a helix-turn-helix motif [1]. Due to the way Pfam works, the threshold has been set artificially high to prevent overlaps with other helix-turn-helix families. Therefore there are many false negatives. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.05 0.72 -4.65 130 17028 2012-10-04 14:01:11 2003-04-07 12:59:11 11 79 4776 54 4045 30085 7549 51.60 33 15.50 CHANGED sLspLspRE+pllphRa....tpshTLpElGpthulocpRV+QlcpcAlp+LRp .............................hLppLscR.Ec.pll...p...hRah.......................tpstT.LcEluc....p.h..s......l.oc.pRl+Qlcpc..Alc+LRp..................... 0 1461 2782 3486 +858 PF03145 Sina Seven in absentia protein family Mifsud W anon Pfam-B_1854 (release 6.5) Family The seven in absentia (sina) gene was first identified in Drosophila. The Drosophila Sina protein is essential for the determination of the R7 pathway in photoreceptor cell development: the loss of functional Sina results in the transformation of the R7 precursor cell to a non- neuronal cell type. The Sina protein contains an N-terminal RING finger domain Pfam:PF00097. Through this domain, Sina binds E2 ubiquitin-conjugating enzymes (UbcD1) Sina also interacts with Tramtrack (TTK88) via PHYL. Tramtrack is a transcriptional repressor that blocks photoreceptor determination, while PHYL down-regulates the activity of TTK88. In turn, the activity of PHYL requires the activation of the Sevenless receptor tyrosine kinase, a process essential for R7 determination. It is thought that thus Sina targets TTK88 for degradation, therefore promoting the R7 pathway. Murine and human homologues of Sina have also been identified. The human homologue Siah-1 [1] also binds E2 enzymes (UbcH5) and through a series of physical interactions, targets beta-catenin for ubiquitin degradation. Siah-1 expression is enhanced by p53, itself promoted by DNA damage. Thus this pathway links DNA damage to beta-catenin degradation [2,3]. Sina proteins, therefore, physically interact with a variety of proteins. The N-terminal RING finger domain that binds ubiquitin conjugating enzymes is described in Pfam:PF00097, and does not form part of the alignment for this family. The remainder C-terminal part is involved in interactions with other proteins, and is included in this alignment. In addition to the Drosophila protein and mammalian homologues, whose similarity was noted previously, this family also includes putative homologues from Caenorhabditis elegans, Arabidopsis thaliana. 28.70 28.70 28.70 29.00 28.60 28.50 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.27 0.71 -4.65 10 1309 2012-10-02 00:06:50 2003-04-07 12:59:11 11 16 738 7 396 752 2 133.90 53 60.55 CHANGED IRsLAlE.+lAsplpFPC+aushGCslp.LPhpphscHEEpCca+PYpCPhss..ucCpWpGshcslhsHLhscHpshhp.pus.-lhaltsshshstussWhhsp............................pCaGcpFpLhhEth-ts..ssphaashlphhGspcpAcpFuYcLELtu..spR+LpWQuhPpSI+-shcpshcup.....DsLlhpscsuphFucsss.....Ltl+Vo ........................................IRNLAME.KVAsS....V.hFPCKYA.........SsGCclo...LP...aT....EKs-..H.....E.E.l.CEF....RP.Y.S.C...PCPG.......ASC.KW.QG.S.L..-.uVM.sH.Lhc...pH..........KS.I.T..T.L...Q...G....E....DI....V.FL.A.T.D.I..N....L.P..G..A..V..D..WV................................................................................................................................................................................................................................................................................. 0 90 187 279 +859 PF04938 SIP1 Survival motor neuron (SMN) interacting protein 1 (SIP1) Moxon SJ anon Pfam-B_5071 (release 7.6) Family Survival motor neuron (SMN) interacting protein 1 (SIP1) interacts with SMN protein and plays a crucial role in the biogenesis of spliceosomes. There is evidence that the protein is linked to spinal muscular atrophy (SMA) and amyotrophic lateral sclerosis(ALS) in humans[1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.68 0.70 -5.01 12 252 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 189 1 164 243 1 206.00 24 72.25 CHANGED LhPl-tsD.s.t-aD.osPPcsstEYLR+V...........phEAppCPsVV.lAplssp+.hc+cQoV.hsl.s......ss.ssPcshsPohcWp....ppQlspFspsRpslsph+pchpsp.lDssss.P..pDp-tWc+FCLscp.................................c.uhsPhLohlp+hsQ.sslsplLEhLstW.hpEcshssp......luRWlYALLACL-pPL.s-spSslRpLAR+CuplRst...Lcpc.......c.-plsshNLlIslluRaFsQpD ...........................................................................................................................................thp......Ppss.EYLppV............................................ph.EAtphP...pVh...lu.p.h.....s.......p...p...h..ppp....ps.s.......s....s..............................sh.....ss.tt....h......P.s....pWp....pt.ltpFpphRp.pl..tp............t............p..h...t.t...t...hs...t...p.....h....................t..spptWp.p.hhhspp...................................................................................................................................Phh......p..h.l...p..hs..p.tpl.t...l.l.p.hh.p.a...htppt..h..s..p............................................hspWlauLLspl-..p..P..L..scspuhlRpLsRp.C..p.lR........hptp..............................th......h.....s..h..hlslh.sph.atQ............................................................. 0 65 96 134 +860 PF03530 SK_channel Calcium-activated SK potassium channel Griffiths-Jones SR anon PRINTS Family \N 22.90 22.90 23.40 23.40 21.80 20.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.68 0.71 -4.27 10 342 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 90 0 179 301 0 110.00 53 20.18 CHANGED RLt+R+tLaE+RK+LuDaALshAhhGIllMVlEsEL.ohtlYs+..........................................uShYShsLKslISlSTllLLuLIlsYHApEI..........QLFhlDNGA-DWRIAMThcRlhhIsLELllCulHPlP..Gsa.hasas ............................................................+Lt+R+tL.FEKRKR.LSDaALlhuhFGIllMVlEsEL...ohs...s....Y..sK.............................................tShaShsLKCL.ISlSTllL.LGLIlhYHA+El.................................Q.LF..hlDNGAD.DWR.IAMTh-RlhhI.sLEllVCAIHPlP..GpahF.W........................................................ 0 35 51 113 +861 PF02437 Ski_Sno SKI/SNO/DAC family Mian N, Bateman A anon Pfam-B_2013 (release 5.4) Family This family contains a presumed domain that is about 100 amino acids long. All members of this family contain a conserved CLPQ motif. The c-ski proto-oncogene has been shown to influence proliferation, morphological transformation and myogenic differentiation [1]. Sno, a Ski proto-oncogene homologue, is expressed in two isoforms and plays a role in the response to proliferation stimuli. Dachshund also contains this domain. It is involved in various aspects of development [2,3]. 20.00 20.00 21.50 20.40 19.60 18.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.50 0.71 -4.52 10 565 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 112 15 283 519 0 107.10 39 17.76 CHANGED pP..P.hh...............SsPshssuDsssNEs+hlcLcGt+luuFlVsG-chLCLPQlFshhLKch..uLcplaT+lccLcIsslsCTs-Ql+ILRuLGAI.suVsRCtLIT++DhEpLhsshhsu ..........................................................s.....s..s...pss.pht.s.LcG.plusFh.l...sGpchlCLPQlhss.lLKch...ulpplas+hccLtIhs.spCss-QLcIL+s.hG.uls.uss+Cs.LIT+cDsEpLhpuhht...................... 0 66 89 178 +862 PF01466 Skp1 Skp1 family, dimerisation domain Bateman A anon Bateman A Domain \N 21.20 21.20 21.30 21.20 21.10 20.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.51 0.72 -3.95 62 1119 2009-01-15 18:05:59 2003-04-07 12:59:11 14 21 383 62 707 1038 25 74.70 46 40.61 CHANGED lppWDt-Fl..pl..Dpp.....hLF-lIhAANYLsIcsLLDLsCppV.....AshIK.GKTPEEIRchFsIpNDFTP.EEE...pp.lRcENp..Wsh- ...............lppWDt.cFl...cl....Dps.....pLF-LI........l...........AA..NYLsIKsL.........LD.l.sCpsV.....AsMI.K.......G..K...o.......P.E.E...I.RchF.s.I.p.s.D.aTs..EEE..tp..l+cEstWs................................ 0 222 328 583 +863 PF03931 Skp1_POZ Skp1 family, tetramerisation domain Bateman A, Griffiths-Jones SR anon Bateman A Domain \N 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.74 0.72 -3.87 17 1315 2012-10-02 01:20:04 2003-04-07 12:59:11 10 19 380 90 843 1197 12 61.60 32 39.07 CHANGED phlhLpSSDscsFEV-ctlAhpStsIcpMlEDssssst......lPLsNVsucILsKVlEaCp+aph .....................hlpLhS.oD.Gc..pFpVc.c.p.h..A.p.p.St.T.I....+.s...M..l....c........s.......s...tpsp..................lP.ls.N.Vsup....lLp.K.V.lpa.CpaHt.t................................ 0 254 396 691 +864 PF00395 SLH S-layer homology domain Finn RD anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.04 0.72 -4.21 175 10758 2009-09-11 16:01:23 2003-04-07 12:59:11 15 704 543 2 2552 9714 348 43.60 29 13.80 CHANGED sFsDlsstt..h....pslpthhptG.lhpGhs...ss........p....FpPspslTRu-hu ...........................FsDlssp..t.....ah.......ptl....ptl.s..p......t......G..llpGhs...........sG...............p..............FpPspslTRschA............................................................. 0 1376 2129 2319 +865 PF01423 LSM Sm; LSM domain Bateman A anon Psiblast SMD1_HUMAN Domain The LSM domain contains Sm proteins as well as other related LSM (Like Sm) proteins. The U1, U2, U4/U6, and U5 small nuclear ribonucleoprotein particles (snRNPs) involved in pre-mRNA splicing contain seven Sm proteins (B/B', D1, D2, D3, E, F and G) in common, which assemble around the Sm site present in four of the major spliceosomal small nuclear RNAs. The U6 snRNP binds to the LSM (Like Sm) proteins [3]. Sm proteins are also found in archaebacteria, which do not have any splicing apparatus suggesting a more general role for Sm proteins. All Sm proteins contain a common sequence motif in two segments, Sm1 and Sm2, separated by a short variable linker. This family also includes the bacterial Hfq (host factor Q) proteins. Hfq are also RNA-binding proteins, that form hexameric rings. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.91 0.72 -4.42 129 8102 2012-10-01 22:42:21 2003-04-07 12:59:11 17 62 2734 482 4356 6190 344 67.00 23 58.52 CHANGED phLpphh.......sppVhlpLps..G.pplpGpLpuaDpahNllLs-spEhhttt.....................................ptt.l....s..hllRGsslhhls.. .........................h.....hh.......pp...V..pl..h..Lps........G..h.p.lp.G......p....l..p....ua..D..p.a..h....N..ll.Lp..s.spph..................................................................................................................................h...........t..hh..l.pGpslhhl........................................................................................... 0 1475 2432 3549 +866 PF02463 SMC_N RecF/RecN/SMC N terminal domain Bateman A anon [1] Family This domain is found at the N terminus of SMC proteins. The SMC (structural maintenance of chromosomes) superfamily proteins have ATP-binding domains at the N- and C-termini, and two extended coiled-coil domains separated by a hinge in the middle. The eukaryotic SMC proteins form two kind of heterodimers: the SMC1/SMC3 and the SMC2/SMC4 types. These heterodimers constitute an essential part of higher order complexes, which are involved in chromatin and DNA dynamics [1].\ \ This family also includes the RecF and RecN proteins that are involved in DNA metabolism and recombination. 40.00 40.00 40.00 40.00 39.90 39.90 hmmbuild -o /dev/null --hand HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.52 0.70 -5.11 29 12849 2012-10-05 12:31:08 2003-04-07 12:59:11 14 52 4778 48 3915 12683 5334 665.40 15 93.51 CHANGED hlcclhlpsFKSat.cplhhsFstsFssIlGPNGSGKSNllDAIhFVLG.p.supplRupphscLIat...............pstsssppApVplhF........sppspt.............p..hslpRphhpsGs...ScYhI........NGcsssh.p-ltcLLtptsIshpshphlh.tsthctlthp.spc+p.hc-tsuhsphcpttpcphpplhpppppttph.hphpphptphpphtpppppt.ch.t.ttt...ht..hhhhph..h.tp..p..pp..t.ptpp.t...t..ptptph.tp.pp..t.ppp..p..pt..t.htpt.tt.tpph.php.tp...ppthp.tppp.tphppphpttpt.h.p.pt..p.hthtp.t.ttthpphtp.tttt.tttpp..hthttttpt.ttt.p.ht.tht.h....pphpt..phtpp....hp.hhpt..t..tt.tp..ph..t..t.tptt....t.h..t.t...pt.tt.h.thp.h....t.....tt....t.....t....................h..................h............hh......................h............................................................................................................................................h....h.........t..................................................................p.........t.tt.......t.......h.p...t..t...t..ttp.t.h.tp..t...pt..ttt.h.th....pt....t.tp.tpt.ttp.pp.hpt.tpp.tt..tt.p.p....pp.t...pt.tthtpt.hpt.tht.....ptt..t..p.p...t..t.p...t.pt.p.h..t..t.htt.ttt.tt.pptpp....htp.t.t..pp.pt.t.p.t..h....t.tp..hc....t..p.t..t.p..t.p....t..ttht..tthp...h.th.p.t.p..t.t.phtph.tpcpchhphhhctpppphpphhtthsthspshpthhphhthuuputhphps.psshssGlphsspsssKphpplp.LSGGEKoLsALALlFA.....lpphcPsPhYlhDElDAALD.pNsptlAphl+..ppscp.uQFIVlol+pshhptA....-plhGlhh.csuhsp ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lpplplps.F.t..........sh..............c.p.......h.....p.....l........p......h.......p.....s.............u.........h.....s.....s.l...........s..G..........NGu...........GKSs...l...........l...-......A.......l.....t.......h.......s......L.......G................p........s...s......p.......p.................h........R......s.......s.....p.....h.......p.....s.....l...Itt.............................t.....s.h.......s....t......u.....p.......l...p....h...h..h.......................................pp.p.stt..................................................t.p......l.h.........l..p...R...p.....l.....t......p.....s..Gc...........o..p....h......h.....l........................N...s.....p..........t.......s.....p.....h......p....p.....l...t.....p....h...h......t.....p........h...h......l......s...h...cs.phhttt.p.chhhhp.tthpttht.ttthtphpphhcpppttlpptppptpphtthhp.hthphpthtttt.tttt..t...t...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.tt.httthtthtthhpttpphhtthhtthhth.pth.h.hthhhhhsptt.tths.pshhtsshphssp.stpth.hhphh.S.G...G..E..h.........s..hh.s....L......A...L..h..h..u................................h.t...p......h........p...s......t......P....h.....h...l..h.DEl.-usL.....Ds..t...p.t.p.t....l...u.......p...h.l....p.................p.....h...s.............t.............t........s............Q..h...l...s...l..o.....p...........t....h..h.t...u......p...hhl.h.......t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 1352 2437 3292 +867 PF04158 Sof1 Sof1-like domain Wood V, Finn RD anon Pfam-B_9404 (release 7.3); Family Sof1 is essential for cell growth and is a component of the nucleolar rRNA processing machinery [1]. 25.00 25.00 26.10 29.90 22.10 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.98 0.72 -3.96 28 344 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 289 0 240 346 7 85.70 41 18.60 CHANGED AsASE+lGllosRE+puhpYsptL+E+apahPEI+RIuRHRHlP+slhptpch+pthhcAc+R+EcNc+pHSKsup...shhsE+cK..pll ......ApASE+lGlhssRE+pthpYscsLK-+apahPEI+RIuRHRHlPKsIhpAtchc+hhhc.up+RKEtNcR+HoK.sp..hshhsE+cKhll............... 0 88 137 202 +868 PF01033 Somatomedin_B Somatomedin B domain Bateman A anon Bateman A Family \N 23.10 23.10 23.30 23.20 23.00 23.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.47 0.72 -3.99 73 1147 2009-01-15 18:05:59 2003-04-07 12:59:11 12 76 110 24 540 1024 13 45.40 33 10.31 CHANGED s.pSCpuR...Cspshppst..........tCpCcstChp.hssCCtDYcphChtphs ........................tSCp.sR.....Cscthttsp.......................sCpC-.spChp..hssCC.DYpphCttp.t....... 0 152 189 320 +869 PF03700 Sorting_nexin Sorting nexin, N-terminal domain Finn RD anon Pfam-B_29150 (release 7.0) Family These proteins bins to the cytoplasmic domain of plasma membrane receptors. and are involved in endocytic protein trafficking. The N-terminal domain appears to be specific to sorting nexins 1 and 2. 21.00 21.00 22.00 29.10 20.10 20.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.82 0.71 -4.04 5 151 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 39 0 51 126 1 120.90 46 24.42 CHANGED AuEREPPPlsDscsp........EsE.........-pEEGEDLFTSssSs.cossSSP-suslPuE-uS.TsSNGPKssplLLDDDcEDLFAEAT-EVSLDSPE...RcsILSopsSPAlTPVTPoSlIsPRlE.hu......aDRShEElEEE .............AuER.PPPhsshcsp...........-...E.............p.p-GEDlFT.u.....ss.....s..s........c.......sp..ss.p...psu.LPhps.s.....S......ppN..G....+tpp...Dp-.pDLFA.....-..AT.ElSLDSsp.......+cs.....h.po.....S..ssp.s...T...s.o.oh.t.P................ohEElEEE.............................. 0 2 7 21 +870 PF04130 Spc97_Spc98 Spc97 / Spc98 family Bateman A anon Pfam-B_3531 (release 7.3) Family The spindle pole body (SPB) functions as the microtubule-organising centre in yeast. Members of this family are spindle pole body (SBP) components such as Spc97 and Spc98 that form a complex with gamma-tubulin. This family of proteins includes the grip motif 1 and grip moti 2 [1]. Members of this family all form components of the gamma-tubulin complex, GCP [2]. 27.00 27.00 27.30 27.20 26.30 26.80 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.85 0.70 -6.19 120 1599 2012-10-02 13:20:28 2003-04-07 12:59:11 8 21 316 1 1118 1585 20 536.90 16 64.78 CHANGED -l......LhsL.hGhs.s...hhphpt.........................tpaplspsh....................shps.............llpplhchuphapplp..pahp..tptt...................................uhhhpuhssulpphLp.sahp.hlspl-pphhp...............................................h.oLtplhthlp..phhphlphLtplspp.......h.....................................................t.tthpGuplLshlhpph....t.p.Gs......php.p....hhpplhppstpsa...hc.................................hlppWl..hcG......p....lp....D.....................t...E................FFlpps........................................tttpspphWpp.........p..........apl......pps......................................hl....P..................................sFl..psh...uppIlhsGKslshl+phstptthtpstt............................................................................................................................................................................................t.hp.hpssphpph....................................................................................................................................................................................................................................................................................................................................................................................................lppthphss..pplhpl............lhpph.cLh...pc.Lpsl+pa.......hLl....spG-Fhsphhpphtsplpcsspphpstphpshhptshttsssp......................................................................................................................................................................................................................................................................................................tptpthpttsstthtptpshpuhsshsLpYpls..hPLslll..otp............................slppYpplF............paLhpl++sp........htLs.phWtp.p...........t............................t..tth...hpph.......hhl+pchhpFlpslptYhh............h-......VlpssWpp.hppplp....................................................................................................................................pspsl-p.l......hptH.ppaLsslhppsh ......................................................................................................................................................................................................................................................................................................................................................................................................................hlhhh..Gh..u...hh..hp.....................................................th..h..t.......................h..s.pp...................................lhp.p.lhthu.hhh...tlp...pahp....t.........................................................................................................................u.hhpuh.ssul.p.p.hLp..paht...hlh.p.lcpp.h................................................................................................................h.oL..plhhhhp.....hhh.hh..ph..Lhtl..spph.........................................................................................t.hpus..tllshlhpth.........tp.Gs........................ht.p.................hh.plh.t.tst...psa...hp.............................................hlppWl....hpG...............................l.......D..........................s.................hp.......E................Fh...lppp..........................................................................t..tpphapp.............p......................................................apl..........ptp..............................................................................hl.......P...............................sa..l....tph......upplhhs.GKt.l...p.h...l......+ph......st....t....t..h..tt.....t...................................................................................................................................................................................................................................................................................................h......th.th.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lp.pthth..s....s.ptlh.ph..................hh.....p...ph...p.....Lh........tp.Lp.sl.+p.a.......hLh.....tpG...-hhtt..........hhp................h.....p........tl...p.p..s......p..t...h....t....h.....t...hp.s...h...h..p....s..ht..t..t....p..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.....h...p...t...t...h..t..............................s.......t...shs.shpL.p.....Y.p.l.....hPlshll...s.p..............................shppYphlFpaLhpl+chph.Lp..ph....aht...........t.......................................................................................hpth..........................hhlptchhh.Fl.pplt.Yhh................hp............................llpspapp..h.pplp...........................................................................................................................................p.s.p.sh-p.l...hthHtpaLsphhtt..................................................................................................................................................................................................................................................................................................................................................................................................................... 0 419 621 910 +871 PF04435 SPK DUF545; Domain of unknown function (DUF545) Finn RD anon Pfam-B_429 (release 7.5) Family Family of uncharacterised C. elegans proteins. The region represented by this family can is found to be repeated up to four time in some proteins. 22.10 22.10 22.20 22.10 21.80 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.20 0.72 -4.09 37 379 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 5 0 372 368 0 102.60 21 29.69 CHANGED hs+hMsFLl-pTKDs.sE...Pls.spplFp-Fsch-ssshs...tpsYhp+F+ppLAPpMsphssYSIEpRlRlMFuhuucVp--.FLpplcppGs.VpLD-ppRIs+YsSpDGplpL .........................................h...hhpalhppspph..p.....Ph.....htplhcca.tpp.p.t.s..tts.............hpsh.hp....+hp..p..tl.t.....plpt...h.p.p....ashcs+l+hhFshus.sVs.p.....p...FL....p................c....L............c.p...p.u.p..V..p...l...D.c..p.pR...I..h.cYpu.pctth................................................ 0 69 86 372 +872 PF04014 Antitoxin-MazE SpoVT_AbrB; Antidote-toxin recognition MazE Kerrison ND, Bateman A, Finn RD anon COG2002 Domain MazE is the antidote to the toxin MazF of E. coli. MazE-MazF in E. coli is a regulated prokaryotic chromosomal addiction module. MazE antidote is degraded by the ClpPA protease of the bacterial proteasome. MazE-MazF is thought to play a role in programmed cell death when cells suffer nutrient deprivation [2], and MazE-MazF modules have also been implicated in the bacteriostatic effects of other addiction modules [3]. MazF toxin functions as an mRNA interferase, cleaving mRNAs at ACA sequences to inhibit protein synthesis leading to cell growth arrest [4]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.13 0.72 -4.18 145 4979 2012-10-01 20:57:08 2003-04-07 12:59:11 13 18 1997 32 1417 3448 534 45.50 23 42.49 CHANGED hpsssshtlslPpchtcph.....slct...Gcpltlhhp..ss..tlllp.hppttp ..............+hsppstlsIPpclc...cpl........slpt......G-plplhsp...ss..plllp.ht....st.......................... 0 482 933 1183 +873 PF00622 SPRY SPRY domain SMART anon Alignment kindly provided by SMART Family SPRY Domain is named from SPla and the RYanodine Receptor. Domain of unknown function. Distant homologues are domains in butyrophilin/marenostrin/pyrin homologues. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.61 0.71 -4.04 68 8383 2009-01-15 18:05:59 2003-04-07 12:59:11 23 306 478 24 4618 6876 377 121.50 19 17.99 CHANGED sG+aYaElcltstst........hplGhsppss..................................................................sttp...hsucc.tuauapstt.pphtssps........tthspthpp.........schlGshlDhpss....plsFt.tNGptht......hsFp.psphs...thlaPhhsht.....spthphphstht .........................................................................................................................................G+aYWEVclsspst..............................htl.Gl...s..ppsh....................................................................................................................................................t...s...t..t..p.....h..hu.p...s....t...s..a.s...h.....p.h...p....s.......p..t...a...t.s.tps....................tph..s....thpt............................................scplGlh...l.....D.....h..csG.................p..ls.Fh...p.s.s.pt...t.............hhsFp......h...phs.............ts.l..aPhhsh....................................................................................... 0 767 1604 2913 +874 PF03105 SPX SPX domain Mifsud W, Bateman A anon Pfam-B_502 (release 6.5) Domain We have named this region the SPX domain after (SYG1, Pho81 and XPR1). This 180 residue length domain is found at the amino terminus of a variety of proteins. In the yeast protein SYG1, the N-terminus directly binds to the G- protein beta subunit and inhibits transduction of the mating pheromone signal [3]. This finding suggests that all the members of this family are involved in G-protein associated signal transduction. The N-termini of several proteins involved in the regulation of phosphate transport, including the putative phosphate level sensors PHO81 Swiss:P17442 from Saccharomyces cerevisiae and NUC-2 Swiss:Q01317 from Neurospora crassa, are also members of this family [see 4,5]. The SPX domain of S. cerevisiae low-affinity phosphate transporters Pho87 and Pho90 auto-regulates uptake and prevents efflux. This SPX dependent inhibition is mediated by the physical interaction with Spl2 [6] NUC-2 contains several ankyrin repeats Pfam:PF00023. Several members of this family are annotated as XPR1 proteins: the xenotropic and polytropic retrovirus receptor confers susceptibility to infection with murine leukaemia viruses (MLV) [1]. The similarity between SYG1, phosphate regulators and XPR1 sequences has been previously noted, as has the additional similarity to several predicted proteins, of unknown function, from Drosophila melanogaster, Arabidopsis thaliana, Caenorhabditis elegans, Schizosaccharomyces pombe, and Saccharomyces cerevisiae [1,2]. In addition, given the similarities between XPR1 and SYG1 and phosphate regulatory proteins, it has been proposed that XPR1 might be involved in G-protein associated signal transduction and may itself function as a phosphate sensor [1]. 26.90 26.90 26.90 26.90 26.70 26.80 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -4.62 168 2197 2009-01-15 18:05:59 2003-04-07 12:59:11 14 120 337 0 1617 2267 25 161.90 18 28.52 CHANGED MKFu+pLppphl.....PEWpsp.....YlsYcpLKKhl.+plpptttpt....................................................................................................................................................................................................................................................................................................................................................................................ttpthpp........cspFhptL.-pELpKlssF....................................ap.......p+tp-hhc............+hp..........tLppplpphttptstttttt......................................................................................................................................................................................................................h.pthtpLcptlhchhtplptLcsatpLNhsuFpKIlKKaDKhh...spp..hptpahpphh ............................................................................................................................................................................................................................................................................................................h+Fup.h..t..................................ta....t........ahtYp..................hKp...l.h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.Fh....h.p.phtc.......l..a................................................................................................................p.tth...................................pht................h.......t.......................................................................................................................................................................................................................................................................................................................................................................................................................................h..th..thh.th..L.pa..............t.lNhhu.h.KlhK........KaDKhh..t..............t...................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 468 969 1406 +875 PF03125 Sre C. elegans Sre G protein-coupled chemoreceptor Mifsud W anon Pfam-B_352 (release 6.5) Family Caenorhabditis elegans Sre proteins are candidate chemosensory receptors. There are four main recognised groups of such receptors: Odr-10, Sra, Sro, and Srg. Sre (this family), Sra Pfam:PF02117 and Srb Pfam:PF02175 comprise the Sra group. All of the above receptors are thought to be G protein-coupled seven transmembrane domain proteins [1,2]. The existence of several different chemosensory receptors underlies the fact that in spite of having only 20-30 chemosensory neurones, C. elegans detects hundreds of different chemicals, with the ability to discern individual chemicals among combinations [1]. 20.00 20.00 20.00 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.17 0.70 -5.53 19 368 2012-10-03 04:04:29 2003-04-07 12:59:11 13 8 32 0 339 323 2 246.30 15 87.61 CHANGED Mllphtss.....hhalPlahhspsh....hphlhslh-llhallssYlhhhslhlhh+lphFHpNhhhlslshhshWaEhllGKhlshsYchtlh....sht........hphhthWTs-.schl.VpshsslphlhhuuFLphHah.hohlaulhslslERshAShhlcsYEpps+haIs.lhLlhltQhluIshu..hhhlhphls.hlhhhhsahlssslushhahhlcphNpphppchcs.p.+...+..haTLSpRFQlKENl+sl+lhp+llhsllshlslsshslhhLhacl.....hP...s.hhsahhEshlhl.PhhIshshhholstW+pcapphhs.hhh...hc...hhptphthp.h.h.psppcphphETchYFpQLscSWp ....................................................................................................................................................................................................................................................................................................................h.....h......h...............hh...h.h......hh.hhhhERhhAohhhtsYE...p..p............p.............h........l...h.h...l......h.....h.h.......................hh........s..h...........s................h.h.h.h.........................h........h..............h...............h....h....h............h.....h.......hh.......h...........hh...............shh.............h.....h......h..h.h.hh.....hN.phh.p..p...h....p..t............t.............Yo.Lu.t+aQlpENl+.shp................hh..pt.h....h..h.s.h..h..h.h.h.h............h.h.hh.h....h....h..h.........h.p.h.............................hhhh.h...h.p.....h....hh..h..sh...hh.....s.h...hhh...............tap.p..h......h.....................................................................................................p......p.....aF..h............................................................................ 0 115 150 339 +876 PF04086 SRP-alpha_N Signal recognition particle, alpha subunit, N-terminal Wood V, Finn RD anon Pfam-B_7342 (release 7.3); Family SRP is a complex of six distinct polypeptides and a 7S RNA that is essential for transferring nascent polypeptide chains that are destined for export from the cell to the translocation apparatus of the endoplasmic reticulum (ER) membrane [1]. SRP binds hydrophobic signal sequences as they emerge from the ribosome, and arrests translation. 25.40 25.40 25.70 25.40 24.80 25.30 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.96 0.70 -5.02 27 323 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 265 2 237 320 1 243.40 23 40.50 CHANGED ssslNuLIpsVhlEE+s.............usss.ap+-....paTLKaphspEhsLlFVslYQplLpLsalDKLLssl+phFh-hY..................cspLp..p.psph.th....pFcch.Fcphlc-hEpsupt.tpssphhpphcpppcs.hospshh...s...t.........tpp.tttptt.pspsssssssp.t..s.........lhstc.h......ppRt+hhtphtst...........sssscptos+pstts+...pshKchRhWDtsG.....spc-stsLDaS....t.ss.tt..ssssss.ptlstpshtptTtKGp.hh.tDl......s-Esc .......................................................................s.tslNsLIpsshlp.ERs................usst.a..pp-.....thsL+aphsN-htLlFVssaQplLp..L...salDcLlssl+ph.FhchY........................tsplt...t......h.t............pFsp....Fpphl.....pph.Eppup....pts....t.h...pphpp....s.p.c.stt..s.stshh..t.st..t............................tttttttt...tt....t.....s...psp.s....s.ptt.....t......................h.ptchh...............ppRtchhppttt................................t.pts..p....t.s.....ppt.t..p...pttKp.Rh.Ws.su......pppstt.LDaS.........ssttt.....t....t....t.ht..ttphh.tp..p.G.p.h..h-h.........tp...t......................................................................................... 0 80 131 196 +877 PF00448 SRP54 SRP54-type protein, GTPase domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family This family includes relatives of the G-domain of the SRP54 family of proteins. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -10.97 0.71 -4.87 55 11362 2012-10-05 12:31:08 2003-04-07 12:59:11 17 39 5002 94 3068 9263 5175 195.50 41 44.83 CHANGED ssVIhhVGlpG..uGKTTThuKLAphhpcc...Gp+lhllAuDTFRuuAh-QLcphupphsl.hhs...tttts...DPsslshculppuKppt..hDllllDTAGRlppcpsLhcELpclpcll........pPs-slLVlDushGQsuhs.QA+sFp-sl.s.lsGlIlTKlDGsA+GGusLSlstthphPItFlGsGE+l.-DLcsFcscpals+LLu ...............................tVlhhlG.lp..G..sGKTTolGKLA...p...h.h.......p.....p........p...........s.............++....VhLsA.u..D..s....aR..s...A.A...l....-..Q..Lp.s.h.u...cp.....s..........s.........lslhs........................ttpss....cPss......l....sh.........c..Al.p.p..A..+....t..c.t.............hD..lll.....lD.T...AG.RL.p...h..c.......ps.LMpE.L.c.c..lp..csh...............................s.P..p.Esl.LVl.DAhT.G.Q.s.A....l..............s..p.Act...F..........s.....-........s............l...............s................l..o.....G..ll.L.TKLDG.s.A.+.G.Gs.slol..t...........p.....h.s.......h...P..I..+a.lGsGE+l..-DLcsFp..s-chsstll................................................................................................... 0 1055 2004 2604 +878 PF02881 SRP54_N SRP54-type protein, helical bundle domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.47 0.72 -3.87 189 9665 2009-01-15 18:05:59 2003-04-07 12:59:11 14 28 4872 82 2570 7341 3204 77.20 29 17.42 CHANGED L.ppsLp+shpplssp...hhh.sccpl..cchlc-lEptLlpADVuhpsspcllcpl+c.t....ht.............tptlsstpplpphlpcpL ...........................LpcpLpcohps..ltup...........shl..sccsl...c-hhcElctsLLpADVulplscchlppl+c.tlu................hcslssspplhcllp-EL................................. 0 854 1649 2165 +879 PF02978 SRP_SPB Signal peptide binding domain Griffiths-Jones SR anon Structural domain Domain \N 20.10 20.10 20.10 20.10 19.40 19.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.52 0.72 -3.77 124 4974 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 4793 40 1356 3784 2145 100.10 43 21.33 CHANGED pFsLcDhhpQlpplpKMGslsplhpMlP.Ghsth......p.tp.ths.-+thc+hcuIIsSMT.cER..psPcll.................................ss...SR+pRIApGSGsslp-VscLl...KpapphpchM ...........................................pFsLsDFl-QlpQh+pMGshsslh....sMlP..G.huph.........t.t.p.phs..-+plp+hcAIIsSM.T.pER.....psP...-.ll.............................................ss.......SR++RIAtGSGsslp-..VN+Ll...KQFpphp+MM.............................. 0 465 879 1145 +880 PF00436 SSB Single-strand binding protein family Finn RD, Bateman A anon Prosite & COG2965 Domain This family includes single stranded binding proteins and also the primosomal replication protein N (PriB). PriB forms a complex with PriA, PriC and ssDNA. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.88 0.72 -4.16 70 10092 2012-10-03 20:18:02 2003-04-07 12:59:11 20 7 4898 110 2030 6702 2778 102.00 30 65.61 CHANGED lN+VhL..lGpLspD..P-lRhst.sGsslssFolAssc...phps.......tsptcpps-ahplslasc.hAEhhspYlpKGs.hlhVpGcLps.cpapspp.G....pc+hss-lhsc....plphL ...........................................hN+VhL..lGpLspD......P.E..l......R.hss....s....G......s.....s.....V..u..shslAssc.......paps...................tsp.tc..cp.s-..a..hsllh..a...s....+....h..........A...E....s.........s.s.........p.....a.....l........p.K.G..s...hlhlpG.cl......p.......o..Rp..ap..spp..G............pc.c..h.s...sElhsp.....pht.............................................. 0 640 1330 1739 +881 PF04503 SSDP Single-stranded DNA binding protein, SSDP Mifsud W anon Pfam-B_2031 (release 7.5) Family This is a family of eukaryotic single-stranded DNA binding proteins with specificity to a pyrimidine-rich element found in the promoter region of the alpha2(I) collagen gene. 21.70 21.70 22.10 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.22 0.70 -4.67 5 325 2009-09-11 22:41:17 2003-04-07 12:59:11 8 5 59 0 134 249 0 206.60 62 77.49 CHANGED EHSSEAKAF.............................HDYSAAAAPSPVLGNhPPGDGMPsGPlPPGFF..........................................................................QPFM....................SPRYPGGP.....RPPLRhPNQPsGGVPGSQPL.....LPuGMDPT.RQQGHP...............NMGGPMQRMTPPRGMsPL..GPQ................NYGGuMRPPsNuL....uGPGMPGMNMGPGuG...................................RPWPNPsoANSI.......PYSSuSPGsYVGPP.................................GGGGPPGTP..IMPSPADSTNSuDNMYTlMNsVPPGssRuNFPMGPGSDGPMGGhGGMEPHHMNG..............................SLGSGDMDSlSKNSPNNhSGLSNs.....PGTPRD.........DGEM..GGN ..........................................................................S.............AA.u.........A....PSPV......hG.shs.Ps.-u..M...ssGPh.s.P.GFF.....................................................................................................................................QPFM....................SPRas.GGP.....R..PslRhs.sQ....s.u..ul.PGoQPL.....LPsuMDPT.Rt..Q.G.HP............................sMGG.sMQR...M.sPPR.G..M..ss......h..GPQ.......................sYGu..uM..R..PP.Nu.L.............GP.u.MPuhNM.GPG.u..t.......................................................t.PWs..sP.su.NSI.......sYSSuSPGsY.s.GPP.................................GGGGP..P..GTP..IMPSPu.DSTNS......u.-NhYThhNs.lsP......GssR..sN......F....PMGPGs-.G.PM...uuh.u.uM..EsHHhNG....................................................SLGSGDhDul...sKsSPs.shu.hsN......PGTPR-.........DuEh..s...................................................... 1 13 25 69 +882 PF05030 SSXT SSXT protein (N-terminal region) Moxon SJ anon Pfam-B_4900 (release 7.6) Family The SSXT or SS18 protein is involved in synovial sarcoma in humans. A SYT-SSX fusion gene resulting from the chromosomal translocation t(X;18) (p11;q11) is characteristic of synovial sarcomas. This translocation fuses the SSXT (SYT) gene from chromosome 18 to either of two homologous genes at Xp11, SSX1 or SSX2 [1]. 21.00 21.00 21.20 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.11 0.72 -4.39 9 329 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 133 0 185 312 0 63.70 52 20.29 CHANGED RG............tsplsppslQ+LLDENspLIpsI.-YQNKG+AsECsQYQplLHRNLlYLATlADusps.hpspts ..................tls.ttIQ+hLDENppLIpsIh-hQN.....KGKss.....ECs.Q....YQphLH+NLlYLAoIADus.s.t...s............. 0 45 78 128 +884 PF01852 START START domain SMART anon Alignment kindly provided by SMART Domain \N 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.20 0.70 -11.09 0.70 -5.00 18 2507 2012-10-02 19:24:03 2003-04-07 12:59:11 14 50 457 40 1330 2394 50 186.50 16 33.31 CHANGED plttpshpphhphspsspssWhp.s......t.pstspshpphs.s.........pscshRtpshV.hsss..lspplhcsh.....pWsppht....pspslpslssu.......sslphhhsphhs.sPlsP.R-ahhlRht+p.......ssssaslsshSlsssptss....ptshlRscphsSGhllpssssGh...........oplsalp+s-lcuphs..p.lh+slhpsuhshss+p..hhusLpptspp .................................................................................................................t....................t....W............................h.t...h................................................h.h.h.+.h..........s....h...l.....hs.st.........h.ht....plhpp..........pWspphh.........................ph.p.ll.p.p.l.s.ss...........................stl..h....h......h..h..h..p..h.......h..................s...h..s..........s..RDh.l...h..l.R..hhpp.......................tss.s.h...h.ls.....s..h.....Sl..p.............ts........p........h.........s............ttshl..R...up....h....h.s..u.s..hl...l....ps....h...s..ss............................s.p....lsal..h..p..s-.........p...u...h.h.s.....t.l.hp.hhttth........hhp..h.............................................................................. 0 408 676 997 +885 PF01740 STAS SpoIIAA; STAS domain Bateman A anon [2] Domain The STAS (after Sulphate Transporter and AntiSigma factor antagonist) domain is found in the C terminal region of Sulphate transporters and bacterial antisigma factor antagonists. It has been suggested that this domain may have a general NTP binding function [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.28 0.71 -10.12 0.71 -4.67 68 10190 2012-10-02 18:52:36 2003-04-07 12:59:11 16 126 3448 44 3587 10260 1791 111.40 16 25.99 CHANGED thphpttssl....hllclsGs.lshssustlpcpltphhtpt...................................................................................phlllDhssl..shlDosGltslhphhcphp.ppusplhlssspspltch.........lptsuhh.chh..........plhtolppA ..............................................................................................................................................................................h........sh.......hlh.p.l.p.G.s..l...as..s.u.p.t.h.pp.p.l....t..p...hhptt........................................................................................................................................................................................ptlll..D.........hs.s.l..shlD.sou.l.p.s...L....tp..h.h......c....p.....hp........p..........p....u....h..p...........l.h.l..s...s......l......p..s..p...lt..ph.........lp.t..s.s.h...t............................................................................... 1 1204 2247 3019 +886 PF03015 Sterile Male sterility protein Griffiths-Jones SR anon Pfam-B_1115 (release 6.4) Family This family represents the C-terminal region of the male sterility protein in a number of arabidopsis and drosophila. A sequence-related jojoba acyl CoA reductase is also included. 21.40 21.40 21.40 21.60 21.10 21.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.03 0.72 -3.72 13 894 2009-09-15 11:21:16 2003-04-07 12:59:11 14 22 172 0 591 1002 19 92.10 26 18.78 CHANGED haHhLPAhhhDlhhhLh..GcKPhhl+.la+KI+...pslslLphFshspWpF-scNhpcLppphs.pD+c...hasF.DhsslsWc-YFpps.lhGhRpYLhKE ...............................................hahlPAhhhDhhhhlh.......Gp...c.....s..h....hh+...lh..p..+lp.......cshph...hpaFsh.p...p.....W......pF.ss.sN...h.pp.L.h..p..p.....h....s...s..p.Dc.c.............hFsa.Dhpp...lcW.p..c.Yhpsh.lhGh+p.Ylh+-............................. 0 181 263 487 +887 PF02544 Steroid_dh 3-oxo-5-alpha-steroid 4-dehydrogenase Bashton M, Bateman A anon Pfam-B_1713 (release 5.4) Family This family consists of 3-oxo-5-alpha-steroid 4-dehydrogenases, EC:1.3.99.5 Also known as Steroid 5-alpha-reductase, the reaction catalysed by this enzyme is: 3-oxo-5-alpha-steroid + acceptor <=> 3-oxo-delta(4)-steroid + reduced acceptor. The Steroid 5-alpha-reductase enzyme is responsible for the formation of dihydrotestosterone, this hormone promotes the differentiation of male external genitalia and the prostate during fetal development [2]. In humans mutations in this enzyme can cause a form of male pseudohermaphorditism in which the external genitalia and prostate fail to develop normally [2]. A related enzyme is also found in plants is Swiss:Q38944 (DET2) a steroid reductase from Arabidopsis. Mutations in this enzyme cause defects in light-regulated development [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -11.02 0.71 -4.44 10 1178 2012-10-01 22:51:20 2003-04-07 12:59:11 11 15 411 0 741 1677 1096 142.80 25 50.78 CHANGED sshPlhhlstAsh.FsshNGhlQuhahuaats........s-sahs..+hllGhhLalhGMhINh+SDpILR....pLRKsGpsuY+IPp.GGlFEYVSsPNYFGElhEWlGaALssWShsuluFAh...FohssLssRAhpHHpWYp+cF.ccYP+oRKhLIPFla ..........................................................................................s....h..hh.s....a.h.hs.h.............h..h...............................................h.....................t...h...h..h......u.h.hlF.h..h..u...hhNhpsc..hh.Lt..........................sL...R.....p......s......u......p..........p.......t...........a.....p......I.P......p......u......t....h.....F....p....h....VosPNYhhEl.l.t...W...l.u..............a...s......l.........h........s.........t.......s..........h........s.........s....h......h.....F....h...h.................h.s...h.....s.....p..hs..........h.....A.......h......t.p.....+....c...............h....Y...h...c..c.......F......p......c....Y....s.....p.p..R...t......s....llPalh.................................................................... 0 231 405 592 +888 PF02910 Succ_DH_flav_C succ_DH_flav_C; Fumarate reductase flavoprotein C-term Griffiths-Jones SR anon Structural domain Domain This family contains fumarate reductases, succinate dehydrogenases and L-aspartate oxidases. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.29 142 8079 2009-01-15 18:05:59 2003-04-07 12:59:11 15 23 3914 101 2187 5856 3967 117.20 28 20.41 CHANGED +pcLpchMpppsGlhRsppsLpcul........pclppL.pcch.p.p.ltl.t-puthhNssLhcslELtshlphAphhshuAlpRpESRGAHhRpD..aP.......................pRDD.................psah+HTlhhhttps..............l..phcsVphp.................PptRsY ..................................................pclpphMt.c.t.s....u.....l....h.....R.....s.....s.....c..tL.....pcu.l................ccl.p.t.l...p..pc.........h.........p.....p.........h.............t.............h....................c.............p.......s.............h.....h......N.............p.....s............h...h.p.sl...EL.t...shlplAphhshuAhtRcES..R..G...u..Hh..R.......D.....aP......................................cRs.D...................pah..ppsh.h........t.tt.....................h.th.p.l............................................................................. 0 688 1387 1840 +889 PF00884 Sulfatase Sulfatase Bateman A anon Pfam-B_784 (release 3.0) & Pfam-B_7393 (Release 8.0) Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.00 0.70 -5.24 59 18763 2012-10-03 20:55:17 2003-04-07 12:59:11 18 130 3819 26 4719 15548 10237 301.40 17 53.82 CHANGED .sllllluEohtts....shshhuhsh...................ts...sPtl.........tths..ppGlhassha..ussshTssoh.shlouhs.tphs....................shhpp.sslsphhppt.GYpo.hht....shhspps.............................shp..hhsh.s.tsthtptt..shh................................................................................s.Dptlhsphhph.....hppt.scshalhlthhusH.................hhsscaspphtth.s.h.t.........................................................................phhssYssslthsDphlspllptL..pthtcsThllah.uDHG.tl......................................................th.htsttt.tt.ptthplPhllahssthtp...........ttscthhu....p.Dlhsollshs.uh ......................................................................................................................................................................................................................................................................Nllhl.h.....s-.shtht........................th.s.h...h..u.....th.............................................................TPpl.........stht................ppu...h...h...F.s.......s........h...a..........ss...s.........s......h....s........s..s.......o...h......s...s...h.h...o..u....h...h...sh.p....h...s...............................................................ths..tp...t..s...l.....s.p.h......L....p.....p...t....G..Y...p...o...........h....h.....t..t..................s....h.....h...s..pp.t........................................................................t.sa....p..........h...h......s.....................h.....t.....s....t...h.......t....t......t...........t..........................................................................................................................................................t..h..h.s...p...h....h.....h...p...p...s..h.phl...................ppp...ppPa.h.l....h.......lt.h.....hs.s..Hh.....................................................h.s...p..c.....a......p..p.....h.....t....t....h............................................................................................................................................................phhs.s..Y..h.s..s..l..p.....h..h....D.p...t.l...u...pl....l............p..t...........L.....c...p........p......s........h...........h........c...s...T.....ll..lah..uD..H.G..t.....h.tt...................................................................................................................................th.h.t..t..t..p..t.............h..h.....p..t..t..h....+...l...P..h.......h......lh..h..s..s..t..htt....................................th.pt.h.h.s..............thDl.hsTlhphh.s........................................................................................................................................................................... 0 1780 2902 3885 +890 PF00685 Sulfotransfer_1 Sulfotransfer; Sulfotransferase domain Bateman A, Griffiths-Jones SR anon Pfam-B_87 (release 2.1) & Pfam-B_1885 (Release 7.5) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.73 0.70 -5.14 45 4587 2012-10-05 12:31:08 2003-04-07 12:59:11 22 172 680 97 2665 5498 3822 230.10 16 64.36 CHANGED ssDlllsoaPKSGTTWlpcllsh...............lhspsch....phhppshl....hsphPhlEh............................................................................shtphpph.uPR.lhp..oHhshthhs.shhpspsKlIYlsRNP+DshVShYaatphht.............hh.tsss..ap..........phh-hFhp..............GplshGsaacHlcsWhp..hppppplLalpYE-h+p-Pppplp+ltcFLGtshs......t.lppllcpsoFpshKss.ssshs...tt..........p.ssahRKGhsG-W...+sahTs.p.s-chDchac.......cphpsss .................................................................................................................................................................................................................phhlhsh..+..s.......G...T..s...h.h..p.hlt...............................................................................h.p.ht....................p.p...........h........................p....h..h........h......l.p...h.............................................................................................................................................................t.......t.....h....t.....t...h.......................h.........................................................................................................................t...............p............s...K....l....l....h..l.h..R.s.P..t.-...h...h...hS...h...a..p..h..h..t.t.ht...................................................................................................t......hp...........................................ph.h.....p.....h.h......t.................................................................................s..t...h.......h.....u...........a...h...p....a....l...p...t...a...hp.................hh........t...p..p....l.......h..h....l..p..Y..E-........h........h........p......c........s....t..t...p...l...pc.l.h.c...F.L..Gh..s........................h...t....h...h..p..t..h..h.p.....h...p....t...........................................................t...h.h.tp..s......tth.........h..hs...t..t.hpt.h................................................................................................................ 0 952 1358 1975 +891 PF03567 Sulfotransfer_2 Sulfotransfer2; Sulfotransferase family Finn RD, Bateman A anon Pfam-B_3050(7.0),Pfam-B_5394(7.7),Pfam-B_7836(10.0),Pfam-B_5040(7.5) Family This family includes a variety of sulfotransferase enzymes. Chondroitin 6-sulfotransferase catalyses the transfer of sulfate to position 6 of the N-acetylgalactosamine residue of chondroitin. This family also includes Heparan sulfate 2-O-sulfotransferase (HS2ST) and Heparan sulfate 6-sulfotransferase (HS6ST). Heparan sulfate (HS) is a co-receptor for a number of growth factors, morphogens, and adhesion proteins. HS biosynthetic modifications may determine the strength and outcome of HS-ligand interactions. Mice that lack HS2ST undergo developmental failure only after midgestation,the most dramatic effect being the complete failure of kidney development [1]. Heparan sulphate 6- O -sulfotransferase (HS6ST) catalyses the transfer of sulphate from adenosine 3'-phosphate, 5'-phosphosulphate to the 6th position of the N -sulphoglucosamine residue in heparan sulphate [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.78 0.70 -4.70 66 2144 2012-10-05 12:31:08 2003-04-07 12:59:11 9 38 506 1 1196 2133 483 218.30 15 67.30 CHANGED pchhlssca.pllaChlsKsusoshpplhhhLts.................hpps.phs.....pshsp.p..h...ptthtphsphp.pph.tpthp.t........................................hhpFhhVR............cPhcRllSuapsKhh.........htphh.upphhphhc................t.....t.spapcFhphlhstt.......pht......hsp...H..............ats.....htp.hshsshh..cachluphEshp....cctshlhphhsh....shth....phsp.................pppppsssphttphhhphs.thhctlhclYph.DatlF.sYs ..........................................................................................................................h...........ll.ahhl.Ksusosht.phhh.h.h.t...........................................................ht.......th....................tth.h.......................................p............hh..ph....s.ph.....tt.......ht..p...............................................................................................................................hhhhhhlR.............cP...hp...R..h...lS..t...a.h..t.h.h..........................t..h.........t.t..h.hpthp..............................................................t....hshp..p...F.h.p...h..l.h.s.t....................t...........s.............p......................................................h.....................ht..h.p.....h..h......pa.s.h.l.Ghh...E.php............pp..h.t....h.l...p.h..h.sh...............h...........ph.t..............................................p.......t.......h..t.h......h..s.........t.hphh...phh.h.Dh.ha.ta............................................................................................................................................................................. 1 489 622 928 +892 PF04935 SURF6 Surfeit locus protein 6 Moxon SJ anon Pfam-B_5497 (release 7.6) Family The surfeit locus protein SURF-6 is shown to be a component of the nucleolar matrix and has a strong binding capacity for nucleic acids [1]. 22.00 22.00 22.10 22.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.72 0.70 -5.04 29 306 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 262 0 222 303 4 204.60 26 49.88 CHANGED hppRptllcp+R++cctc+p+++.pcRpct+ppcp...spcsctpsspppttts................................pssstpstsslsFsplpFs-sptsspshphhcpptpph.......t..shKphLp+lEs++p+LpphcEp..............Ktt-lc-KppWppAhh+AcGhKl+DDp+LL+KALKRKEppKcKSppcWpERpppVpcphppRQKKRc-NLpcR+csKtpK+hppt.+pKG..+hh ...........................................................................................................s....thhpt++p+c.ctc+t++.+.pp+p.ct+tccp...........tpc.tp.t.t.spttpt.t..t.........................................................................tpsttp..tssslsF.uplt..hs-spths...pphpphp..p...t.t.t...............p....shcp.hL.pclct+cp+Lpplccp..............Kttclcc..KptWpsAht+AcG.K.....V+..D...D........cLL+KulKRK-ppKcKSpcc..Wc-RpctVpcph....pp....RQcKR...c...c.Nlc.pR+cpK....t..p++hppt.ppKt................................. 0 74 123 186 +893 PF01805 Surp Surp module Bateman A anon Bateman A Family This domain is also known as the SWAP domain. SWAP stands for Suppressor-of-White-APricot. It has been suggested that these domains may be RNA binding [1]. 21.10 21.10 21.10 21.10 20.90 21.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.50 0.72 -4.31 64 1689 2009-01-15 18:05:59 2003-04-07 12:59:11 15 52 309 8 1115 1679 12 53.40 30 10.94 CHANGED .hplIcpoApaVscpG.pFEt....hlhp+p...ps.spFsFLp..ss.sshasYYph+ltphp ................hplIc+hAp.F.VA..+..sG......p..FEp......hlh.p+p.....tpN..sp.........F..s.FLp......sp.ss....hasYYph+ltph.............. 0 351 557 826 +894 PF02201 SWIB SWIB/MDM2 domain Bateman A anon Bennett-Lovsey R Domain This family includes the SWIB domain and the MDM2 domain [1]. The p53-associated protein (MDM2) is an inhibitor of the p53 tumour suppressor gene binding the transactivation domain and down regulating the ability of p53 to activate transcription. This family contains the p53 binding domain of MDM2 [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.54 0.72 -4.30 41 1464 2009-09-11 13:54:42 2003-04-07 12:59:11 13 39 457 79 836 1368 324 74.50 28 17.21 CHANGED hs+.h.lo.pLtphlGssp...h.oRs-llctlWpYIKp+sLQDPpsK+hIhCDppLppl..Fts.cp.lshhplschLspHhh ..................................t.htlsspLtp..hl....Gtpp.......h.oRspllptl.WpYIK.........ppp....L.............QD.sps+.......+.h.....Ih..C.D.cp.Lpp.l......Ftt...pp......l...phh.pl.schl.stah....................... 0 228 451 658 +895 PF04434 SWIM SWIM zinc finger Aravind L anon Aravind L Domain This domain is found in bacterial, archaeal and eukaryotic proteins. It is predicted to be organised into two N-terminal beta-strands and a C-terminal alpha helix, thus possibly adopting a fold similar to that of the C2H2 zinc finger (Pfam:PF00096). SWIM is thought to be a versatile domain that can interact with DNA or proteins in different contexts [1]. 19.60 5.00 19.60 5.00 19.50 4.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.58 0.72 -4.45 88 3643 2009-01-15 18:05:59 2003-04-07 12:59:11 12 91 1791 0 1241 3096 130 38.50 23 6.69 CHANGED apVplt......................ptsCoCthaph......pthsCpHhlAlhhtht .............................................................tspCo.Cs........................tsh.hC+Hhlulhh...h........... 2 294 711 981 +896 PF04433 SWIRM SWIRM domain Aravind L anon Aravind L Domain This SWIRM domain is a small alpha-helical domain of about 85 amino acid residues found in chromosomal proteins. It contains a helix-turn helix motif and binds to DNA [1]. 26.10 26.10 26.10 26.20 26.00 26.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.82 0.72 -3.86 79 1274 2009-01-15 18:05:59 2003-04-07 12:59:11 12 43 286 28 857 1242 5 86.30 27 11.90 CHANGED hhhsshu...shhshs.plpphE..pphhschhh..................................ssp...sYlphRshllppa..phpssphLshoss..........+csht......Dsstlt+lapFLpcaGhINa .....................h....shsshhsh..s...pl.pshE..pphhschhtt....................................................sPp....hYLshRshhl..pp..a.....phs..st.th..Lshsss..........+pslp.........DlstltRl.asFL-phGhINa........... 0 243 449 684 +897 PF00804 Syntaxin Syntaxin Bateman A anon Pfam-B_1158 (release 2.1) Domain Syntaxins are the prototype family of SNARE proteins. They usually consist of three main regions - a C-terminal transmembrane region, a central SNARE domain which is characteristic of and conserved in all syntaxins (Pfam:PF05739), and an N-terminal domain that is featured in this entry. This domain varies between syntaxin isoforms; in syntaxin 1A (Swiss:O35526) it is found as three alpha-helices with a left-handed twist. It may fold back on the SNARE domain to allow the molecule to adopt a 'closed' configuration that prevents formation of the core fusion complex - it thus has an auto-inhibitory role. The function of syntaxins is determined by their localisation. They are involved in neuronal exocytosis, ER-Golgi transport and Golgi-endosome transport, for example. They also interact with other proteins as well as those involved in SNARE complexes. These include vesicle coat proteins, Rab GTPases, and tethering factors [6]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.23 0.72 -3.86 171 1633 2012-10-03 05:55:03 2003-04-07 12:59:11 20 15 425 9 971 1702 19 101.60 21 33.55 CHANGED thspF.....hppspclppplpplppplp.clpphp....pphhshss..tp..................................phppclcplspplpp...hh.......pplppplc.........p......lppt..............................ttsssstphppsph.pslspcFpchh ........................................spFh.pp.spcIppplpp.lppplpclpphp...................pphlssss...sp......................................pphcpcl-pLstpIpp.ph................pplc..pplc.........................s...........................lppp......................p....................tppt.tspssssh+htpsp....hs.....sLupcFh-lh................................................................................................................................................... 0 281 496 740 +898 PF00907 T-box T-box Bateman A anon Pfam-B_363 (release 3.0) Domain The T-box encodes a 180 amino acid domain that binds to DNA. Genes encoding T-box proteins are found in a wide range of animals, but not in other kingdoms such as plants. Family members are all thought to bind to the DNA consensus sequence TCACACCT. they are found exclusively in the nucleus, and perform DNA-binding and transcriptional activation/repression roles. They are generally required for development of the specific tissues they are expressed in, and mutations in T-box genes are implicated in human conditions such as DiGeorge syndrome and X-linked cleft palate, which feature malformations [2]. 20.50 20.50 20.60 20.60 20.10 20.40 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.08 0.71 -4.81 22 2143 2012-10-03 00:25:27 2003-04-07 12:59:11 17 23 269 9 1083 1755 0 155.30 47 35.04 CHANGED plpL-spcLWpcF+plsTEMIlTKoGRRMFPsh+lploG....LDPpuhYhlhlDhVPsDspRaK..assucWhsuGKAEPpsP.sRhYlHPDSPssGu+WM+pslSFpKlKLTNst...sssGp.l...lLpSMHKYpPRlHlVcs..ss..s..........ph.hpTasFPETpFIAVTAYQNpcITpLKIcpNPFAKGFRDs ...........................h.LptttLWtpFp........p..h..s..sEMIl..T.K.t.G...R.....R..........M....FPshchp..l..sG....Lc..s........pspYhl.hhD.h..lss..D...s.....p...R.......a+.............a..............p..s..u....p........W..h...s.....u...G...K.....A........-......st....h.......s...............s.R.....h...Y.............l....H..P....D...S...P...s...s...GtpW..........M+....p....l.SF.....pK..l..KL..........T...........N..............N....h.............s.....p....p......G........p....l.....................lLpS.h..HK.Y..pPR.l...Hllcs..sp..t..................................p.t.hpoasFs.ETpFhAVT..AYQNp..p................ITpLKI.cpNPFAKGFR-............................................... 0 218 311 758 +899 PF04719 TAFII28 hTAFII28-like protein conserved region Waterfield DI, Finn RD anon Pfam-B_4085 (release 7.5) Family The general transcription factor, TFIID, consists of the TATA-binding protein (TBP) associated with a series of TBP-associated factors (TAFs) that together participate in the assembly of the transcription preinitiation complex. The conserved region is found at the C-terminal of most member proteins. The crystal structure of hTAFII28 with hTAFII18 shows that this region is involved in the binding of these two subunits. The conserved region contains four alpha helices and three loops arranged as in histone H3 [1,2]. 18.90 18.90 18.90 19.70 18.50 18.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.85 0.72 -4.19 10 332 2012-10-10 12:36:46 2003-04-07 12:59:11 9 8 271 2 230 363 2 87.60 40 33.70 CHANGED hQhLlosFocEQhsRYEsaRRSuhpKuslK+Llsp...lTG.polupsVlIulsGlAKVFVGElVEpAhcVpc..Epp-os.....PlpPcHlREuh+RL ............................tlLlssFoc-Qh.sRYEhaRRou..hsKu..slK+Llps.........ls.u.....p...SVspNVs..IshuGluKVFVGElVEc...........Ah.sVpc......cht..-ss.............................slpPp+h+cuhh...................................................... 0 73 122 187 +900 PF04177 TAP42 TAP42-like family Wood V, Finn RD anon Pfam-B_5735 (release 7.3); Family The TOR signalling pathway activates a cell-growth program in response to nutrients [1]. TIP41 (Pfam:PF04176) interacts with TAP42 and negatively regulates the TOR signaling pathway [2]. 26.00 26.00 27.00 26.70 24.90 25.60 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.09 0.70 -5.49 39 389 2009-09-11 05:02:27 2003-04-07 12:59:11 7 7 295 3 263 396 3 308.00 25 90.82 CHANGED sLpphFppuhphhs.plcp....ts..psosphQsplp...psIpphpcspphl.spLuLFSsNEslE-luTssLpYLhlsYaLupLhp+..................ssspRhphLptupphatpFLsphcsYc.llstp.tchhcph.............st.hs...shsssstpRptKIppaKpcKELcp+Lphlcpthpp.............................psD-E.............hRchaLspLphhhhcohppL-tlspElpl.Lpthtphttpst..........................................pphhc..............t.h...lsppspsLpPF.......hl..........hs.pRpphpppVFGsGas.LPTMol-Eah-pEhcpGthhcst......tpp.................ps.ppct.....c-cc.EpsDcEsh...+.sRpWD-aK-sNP ...................................................l.thattu.phht.tht.........t.s.u.thpp.t.lp...pslthhppstphl....splu.LFSsNEsl--luTssLpaLh.lsahlupLh.+..............................hs.spRhphLptup.......thhh......pF...LphhcpYp.l..ls.p....phhpph.............................t.......shsshstpRptKItpa+pp.Kc....l...cp+Lp.........tlcpthcp...............................................................psD.-E.....................hhRch..alhplphhhhp......uhppl-.lppEhph.Lpth.t....ttt.stpt.....................................................................................t.h.....tttstsl.p.sF...........hl......................pRpph..ptpVFt...sGas.LPTMol-Eah-pchcp....Gth.ptt........ttp......................tt.ppp..........p-pp.......-.......psDpcph.....+..sR...tWD-....aK-sp............................... 0 91 141 208 +901 PF03943 TAP_C TAP C-terminal domain Bateman A anon Bateman A Domain The vertebrate Tap protein is a member of the NXF family of shuttling transport receptors for nuclear export of mRNA. Tap has a modular structure, and its most C-terminal domain is important for binding to FG repeat-containing nuclear pore proteins (FG-nucleoporins) and is sufficient to mediate nuclear shuttling [1]. The structure of the C-terminal domain is composed of four helices [1]. The structure is related to the UBA domain. 21.30 21.30 21.40 22.60 21.20 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.40 0.72 -4.54 8 365 2012-10-01 23:03:33 2003-04-07 12:59:11 8 19 218 4 263 372 0 49.80 36 8.36 CHANGED QEMlpAhSsQSGMpl-WSpKCLpDNsWDYs+AupuFspLpscscIPtEAFt ......phltthoppouMshcaSppC.L.p.p.s.sW...saspA..hpsFp.plp....spspIPt-AF....... 0 74 111 185 +902 PF03134 TB2_DP1_HVA22 TB2/DP1, HVA22 family Mifsud W anon Pfam-B_837 (release 6.5) Family This family includes members from a wide variety of eukaryotes. It includes the TB2/DP1 (deleted in polyposis) protein (e.g. Swiss:Q00765), which in humans is deleted in severe forms of familial adenomatous polyposis, an autosomal dominant oncological inherited disease. The family also includes the plant protein of known similarity to TB2/DP1, the HVA22 abscisic acid-induced protein (e.g. Swiss:Q07764), which is thought to be a regulatory protein. 27.20 27.20 27.40 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.26 0.72 -4.24 93 1350 2009-01-15 18:05:59 2003-04-07 12:59:11 14 26 310 0 844 1293 9 90.30 32 38.56 CHANGED hshhupllss.lhGhlYPuYtShKAlcos...stp......-sppWLsYWllauhholhEhh.sshhlt.WlPhYahhKhhFllW......LhhP.pspGAphlYcphl+Phh ........................................................h.....phls.lhGhhYPu..YtShKAlcop.....shp........................-.ppW.lhYWllauh.h.ol.h..Ehh..s.......chh....l.......s..W.........h..P....hYa.hK..h.........hFllW......LhhP...tsp...G...uphlYcphl+Ph................................. 0 265 450 669 +903 PF00352 TBP Transcription factor TFIID (or TATA-binding protein, TBP) Finn RD anon Prosite Domain \N 22.90 22.90 23.10 23.00 22.70 22.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.62 0.72 -4.62 135 1985 2012-10-02 11:58:57 2003-04-07 12:59:11 16 8 584 132 1141 1872 373 83.40 38 62.81 CHANGED tss.hpIpNlVAosslst.plcLcpl.uhph...css...-YcPE.pFPGLlaR....hp..cP...KsshL.IFsSGKlVlTGuKs.c-sppA.hcplhphLpchs ................................................................h..phplpNlV.uossl..........th..plc.Lcsl.Ahpt......ppu..........pYcP.......-....hF.su.l..I.hR.........hp....cP..................+....sshLIFsSGKlVhTG.AK.......s..cc-.sthA.hcphhtllpch................. 0 351 608 909 +904 PF03148 Tektin Tektin family Mifsud W anon Pfam-B_3069 (release 6.5) Family Tektins are cytoskeletal proteins. They have been demonstrated in such cellular sites as centrioles, basal bodies, and along ciliary and flagellar doublet microtubules. Tektins form unique protofilaments, organised as longitudinal polymers of tektin heterodimers with axial periodicity matching tubulin. Tektin polypeptides consist of several alpha-helical regions that are predicted to form coiled coils. Indeed, tektins share considerable structural similarities with intermediate filament proteins. Possible functional roles for tektins are: stabilisation of tubulin protofilaments; attachment of A and B-tubules in ciliary/flagellar microtubule doublets and C-tubules in centrioles; binding of axonemal components [1]. 25.10 25.10 25.20 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.31 0.70 -5.92 37 1118 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 349 0 335 991 0 261.70 40 82.81 CHANGED WppsNhtphppupspRspuc+lpp-oppLhpEsptpTppsQpDssc+LspRlpDIphWKpELpcplcchtsEhstLtp.KpRlEcALpshp.hPlslsp-CLphR-pRhuh.DLV+DpVEpcLhKEl-lIcsspplLpcslppshcQlph.Rsu+ppLEhDhsDKhpAhpIDspChpL.sNsSsslpapssss+hs.......ssss..o.csWtcFoppNlp+Acp-ctsStpLRphl-plLppsusDlcsQtsssshAFppRlpEhpcA+scLchpLtcshpEIsptEcsIptLccAlp-KpuPlKVApTRL-sRot.RPNVELCRDpsQhtLlsEVpplpsolptLpp+LtpAcssLpsLhcpphpLEc-lthKpNSLhIDc.+ChphRpphss ............................................W+cNN..pYphospupccActLch.po++.hpcshsshshp.cDSs+K.LspR.............IcDlshWKcclp+slpslscEIspLcc...........sRs+LcsAhchLh.hP.uI.ucECLcL...........Rs..sRhcs...DLV.+D.-uEpEL.l..KE.Vslltpl++lhhsTL.s+s-EQh...h....hN....................+uAKpslEhDaSD..KhsuLcl.....Dc....cstsL......ssp..S.shl...a.+s...G.ssRhs.....................ps...so....o......E.......Wtcas....pt......................................................................................................................................................................................................................................... 0 101 127 224 +905 PF01397 Terpene_synth Terpene synthase, N-terminal domain Finn RD, Bateman A anon Pfam-B_728 (release 3.0) Domain It has been suggested that this gene family be designated tps (for terpene synthase) [1]. It has been split into six subgroups on the basis of phylogeny, called tpsa-tpsf. tpsa includes vetispiridiene synthase Swiss:Q39979, 5-epi- aristolochene synthase, Swiss:Q40577 and (+)-delta-cadinene synthase Swiss:P93665. tpsb includes (-)-limonene synthase, Swiss:Q40322. tpsc includes kaurene synthase A, Swiss:O04408. tpsd includes taxadiene synthase, Swiss:Q41594, pinene synthase, Swiss:O24475 and myrcene synthase, Swiss:O24474. tpse includes kaurene synthase B. tpsf includes linalool synthase. 20.20 20.20 20.60 20.30 19.90 20.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.24 0.71 -4.91 141 1399 2009-09-17 01:05:38 2003-04-07 12:59:11 16 21 224 56 399 1601 0 164.00 29 30.94 CHANGED WscpF.l..............sasst........cphhpplcpL.+pcV.+p.hl..s.sts.....phhppLpLIDslpRLGluaHFcpEIc.phLcplapp.htppp...tt...........DLassALtFRLLRpaGapVSsD.lFpcF.+-cpGp..Fpp.shh..sDs.+GlLsLYEAo.aLthcGE..s.lL-EAhs.FoppaLcphh...ttps..s.............L..spplppAL ................................................Wt..hh............tht............pphhphh.p.p.L..hpch.....pp.......hl..s..sh............shhppLphlDsl.....pRLGlshaFcpEIc.p.hLpp...la.pphtpps.........................DLh.ss.ALtFRLLRppG..a.p...V.........S..s..-......lFp.pF..pcppGp...Fp.........t....sh.........t.......pD.......s..cuhLsLYcAo.pl....t.h.p.uE..p..lL--Aht.FopphLcphht...p.p................LtpplphuL.............................................. 1 37 241 326 +906 PF00440 TetR_N tetR; Bacterial regulatory proteins, tetR family Finn RD anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.07 0.72 -4.32 109 63222 2012-10-04 14:01:11 2003-04-07 12:59:11 18 89 4528 525 17576 49863 3840 46.40 26 22.51 CHANGED IlpuAhclhs.ccGapssohpcIAcpAGlupuslYhaFs.uKcpLhtsl ....................llpuA.h.p.l.ht..p....p....G.....h....p....s...s..o.lpc....lAc..pA...G...l...s..p..u..s..l.Y.p.aF.....s..sKcsLh.t..................... 0 5211 11622 15040 +907 PF03850 Tfb4 Transcription factor Tfb4 TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Domain This family appears to be distantly related to the VWA domain. 21.00 21.00 22.10 22.10 19.50 19.30 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.08 0.70 -5.14 26 353 2012-10-10 16:07:06 2003-04-07 12:59:11 9 8 279 0 253 344 4 246.40 33 81.47 CHANGED sSLLsllLDTNPptWsphppp.......hslopslsu........llVFlNAHLuhNpsNpVAVlAupsps.scaLYPssp...............tsspsshshst.st.............shY+tF+hV-Etlscclpplhppsstss.psp.....ohl.uGALohALsYINRh.p.spu..........................................pphpSRILl....lsuspDsshQYIPlMNsIFuAQ+hplPIDlspLsG..DusaLQQAuDsTsGlYl+.lsp.....scGLlQYLhhuFhsD.ptRshllLPspssVDFRAuCFC..H++llDlGaVCSVCLSIFCph..P........CsTCs ...................................................................sLLslllDssPhhW.t....ht..............................hslsphlts................lhsFhNuHLhhstsNplAl.lA.u.p...pt....spaLYPs.s............................................p...p.t....t................................shap.hp.hpphlhpplpplhp.p.sp.t.........t.t.....s.t...........ohl.uGuLuhALsaIsRh.p...h....t...............................................................thpuRILl........lps..up-ts.QYhsh.MN.s.lFuAQ+.....plsIDsshL.....ss.....sushLQQAschTsGlYlp..s.t.p.....................tu...LlQYLh....h...sF.h...s.......-.......p.....Rp....l.......h.h.........P..s........s.p......VDa...R..A..u.....CFC.Hcpll-lGaVCSlCLSIaCp.....s..........C.TCt............................... 1 84 138 210 +908 PF02269 TFIID-18kDa TFIID-18; Transcription initiation factor IID, 18kD subunit Bateman A, Mian N anon Pfam-B_3681 (release 5.2) Family This family includes the Spt3 yeast transcription factors and the 18kD subunit from human transcription initiation factor IID (TFIID-18). Determination of the crystal structure reveals an atypical histone fold [1] 20.90 20.90 20.90 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.04 0.72 -4.17 6 572 2012-10-10 12:36:46 2003-04-07 12:59:11 11 6 281 2 396 577 1 87.00 33 36.04 CHANGED phappElpsMMYuhGDspcPhsETssllE-lVps.lhElhppAhclup.cttpplplEDlhFLIR+D.tKlsRl+chLohp-hhpKstKphD- ....................h..att-lpphMauhGD.......s.......p.......p.......PhsETssllE-lVhp.lh...............-hhp.....p.A....tpl.....uppp......t.t.p.......p...l.psEDllFLlR..+..D...tKluRlcchLphp.-.hp+stKs.-t.................... 0 122 205 319 +909 PF03847 TFIID_20kDa TFIID_A; Transcription initiation factor TFIID subunit A Griffiths-Jones SR anon PRODOM Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.07 0.72 -3.69 8 348 2012-10-10 12:36:46 2003-04-07 12:59:11 8 10 262 2 243 431 8 68.60 45 16.48 CHANGED oK+KLp-LVppl.....DssspLDs-VE-lLL-IADDFV-SloshuCpLAKHRKScpLEs+DlpLHLERsaNh ...................sK++Lp-LV+pl................DssppLDs-.VEEh..LLplADDFl-sVsstAC.pLAKHR+Ss.....sLEl+DlQLaL....ERsaNh................. 1 81 131 200 +910 PF04494 TFIID_90kDa TFIID_WDA; WD40 associated region in TFIID subunit Bateman A, Wood V anon Pfam-B_9152 (release 7.5) Family This region, possibly a domain is found in subunits of transcription factor TFIID. The function of this region is unknown. 20.60 20.60 21.20 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.59 0.71 -4.13 35 427 2012-10-02 14:46:49 2003-04-07 12:59:11 10 17 260 14 300 411 3 132.80 27 19.44 CHANGED sp.hstttp-s..cPppYtpuYshL+sWl-soLDlYKsELp+.lLYPlFlasaL...-LlspshsppA+..pFasca+s-ap.hHsp-..lppLpslspspHlc-NphAptapssKYplplocsuasLLlpFLpcppss...llltllspals ...........................s........pssPtpYpptastL.+pal.cs.s...L.Dh.a+h.ELpp.l.LYPlFVahaL...cLVtsshtpcA+........pFap+..apspa.......tt....app-......lcpL.p.s..........lpp.p.c..........l..ppNph.h..p..tacssKahlplocsua.................phLhpaLpppppt....hlhtllppal............................................... 0 91 144 231 +911 PF04253 TFR_dimer Transferrin receptor-like dimerisation domain Bateman A anon Bateman A Domain This domain is involved in dimerisation of the transferrin receptor as shown in its crystal structure. 21.00 21.00 21.40 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.45 0.71 -4.55 36 850 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 269 58 560 820 42 119.80 23 16.83 CHANGED slohp.lhpAhppapcsupphpphhcphc..........psshltl+h....h..NcplhplE+sFlss...hGlPs.+sha+HVlauPs.hs..ts........toFPultDultptp................WspspcQlsllshslpsAAstLtts .................................................t.lshp.LhpAhtpappsupphpp.hhp.php.........................................pp.s.h.th+h......h.......Nc+.....lhh..lE+sFls........................tGl.P.s.Rsaa....+Hl.lau......Put.hsthss.................psFPulh-ulpptc......................hspspcplshhshslpsAAthL...s...................... 0 150 267 427 +912 PF02824 TGS TGS domain Aravind L anon Aravind L Family The TGS domain is named after ThrRS, GTPase, and SpoT [1]. Interestingly, TGS domain was detected also at the amino terminus of the uridine kinase from the spirochaete Treponema pallidum (but not any other organism, including the related spirochaete Borrelia burgdorferi). TGS is a small domain that consists of ~50 amino acid residues and is predicted to possess a predominantly beta-sheet structure. There is no direct information on the functions of the TGS domain, but its presence in two types of regulatory proteins (the GTPases and guanosine polyphosphate phosphohydrolases/synthetases) suggests a ligand (most likely nucleotide)-binding, regulatory role [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.68 0.72 -4.13 61 10557 2012-10-03 10:59:06 2003-04-07 12:59:11 16 41 4800 19 2820 7605 3711 61.10 35 9.24 CHANGED ltlah.PcGphhc........ltpGuT.shDhAhtIHpslucphhhAhVsG.......p.hlslsphLcssDllcIls .......................lhlho..PcGslhc................lspG.uT..slD...hA.h.sIH.s.s.lucpsluA+..VNG.......................................c.hVsLsp.Lp..sssplEIlT....................................................... 0 893 1705 2346 +913 PF00763 THF_DHG_CYH Tetrahydrofolate dehydrogenase/cyclohydrolase, catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_882 (release 2.1) Family \N 21.60 21.60 21.70 21.70 21.40 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.28 0.71 -4.06 105 5546 2009-01-15 18:05:59 2003-04-07 12:59:11 18 17 4681 28 1648 4097 2479 116.10 41 35.35 CHANGED pllDG+tlAppl...ppcl+pclpplptp..sh.PpLulllVG-csASphYVcsKp+tspclGlcsphhplspshopp-LlshlpcLNpDsslcGIllQLPL..Ppplspp..pllpsIsPcKDVD ...........................................llDGKtlAp...pl...cpcl...tp...cV............pt..l..........pt...........p........s....hsPu...LA..VlLVGs...c.........P....A.SpsYVpsKp+us..cclGh.pS............phh..cL..........P...p...s.....s....o.............p........p.......EL.LshI.ccLNpD.s.slc.GILVQLPL.......P.pp......I..D...pp...pllpsIsPcKDVD.......................... 0 523 1025 1369 +914 PF02882 THF_DHG_CYH_C Tetrahydrofolate dehydrogenase/cyclohydrolase, NAD(P)-binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_882 (release 2.1) Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.02 0.71 -5.16 91 5651 2012-10-10 17:06:42 2003-04-07 12:59:11 14 24 4719 28 1679 4648 3203 157.80 45 48.31 CHANGED HPhNlG+Lsts..ps....shhPCTPtGlhcLLcp.hsls...lpGKpsVVlGRSsIVG+PluhLLhp.........p........sATVTlsHSpT...csLsphs+pADIllsAsGpPphlpu-hl...KsGAlVIDVGINRl..........tss+.LsGDV-F-sltch.AuhITPVPGGVGPMTlAhLhpNTlpuAcpph ..................................................................................HPhNlG+Lhhs........ps....shhPC.TPtGlh.p.L...L..c.........c......h....s.l......c..............lpG+.p..AVVlGRSs...I.......VG..+PhuhLLLp........................t.....................s.A.TVTl..sH...op......T...................+s...L..tp..h........s...c.......p.......A.D.....Il.l...sAl.....G.+.......P.p..h........l........s..u...-.h...l..............K............G..A...l..VIDV.GlNRh.................................tsu+..LsGD..V..c.a.......-.s..s..tch..A..u.h.I..TPVP.GGVG.PMTlAhLlpNTlpAscp.h.................................................................................... 0 539 1040 1395 +915 PF00899 ThiF ThiF_family; ThiF family Finn RD, Bateman A anon Pfam-B_59 (release 3.0) Family This family contains a repeated domain in ubiquitin activating enzyme E1 and members of the bacterial ThiF/MoeB/HesA family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.59 0.71 -4.34 90 12207 2012-10-10 17:06:42 2003-04-07 12:59:11 16 111 4208 117 4344 10198 2664 134.50 27 37.94 CHANGED ppu+VlllGsGGlGuhssphLAts.Gl.....GpltllDtDpl-hSNLsRQhlaspsc..l...Gp..sKspsAppplpplNP.plp..lps....hsptl...................stpshppll.......t..sh..D.....lllDu........hDs..hssRhhlsp..hshpt.............th.P...hlpuush.shtups.hhh ............................pu+Vlll..Gh.G.G..lGu......ssphLA...t...u...G..l.........................Gpl..s..l............l..............Dh......D....s..V...c....ho.N.....L.pR....Q..h..l.h...........p.....p....s..s.....l....................Gp...........sK...s.........p.s.s..tc..p........l.......p............p..l......N.....P......p..l.p.......l.ps....hpp.tl............................................s.t..p..s..h...p..p...hh....................p...ph......D...............lV...l.D..s.................hD.s......h.p..s...+.....h...h...l..sp...hsht.t.......................p.h...P.....l.lpuush..uh.......hh............................................................................................................................... 2 1492 2600 3620 +916 PF02597 ThiS DUF170; ThiS family Morett E, Mian N, Bateman A anon COG2104 Family ThiS (thiaminS) is a 66 aa protein involved in sulphur transfer Swiss:O32583. ThiS is coded in the thiCEFSGH operon in E. coli. This family of proteins have two conserved Glycines at the COOH terminus. Thiocarboxylate is formed at the last G in the activation process. Sulphur is transferred from ThiI to ThiS in a reaction catalysed by IscS [1]. MoaD, Swiss:P30748 a protein involved sulphur transfer in molybdopterin synthesis, is about the same length and shows limited sequence similarity to ThiS. Both have the conserved GG at the COOH end. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.42 0.72 -3.55 145 6781 2012-10-03 10:59:06 2003-04-07 12:59:11 15 34 3470 44 1951 4749 1939 72.90 24 84.19 CHANGED lhh.uphpchss............tstTltpLlppL....s.hph.....................ptltltlNtchlpt.....sp.l..pcGDplsllPsVuGG ...................................................................hh.ut.tphhs...........................t.s...t..Tl.....t.pLlppL.....t.tpt..........................................ptl..t.lul.Np...p..hl..s.......p.................s.......p.......hl................p-GD......plsl..lssVuGG....................................... 0 625 1287 1677 +917 PF01833 TIG IPT/TIG domain Bateman A anon [1] Domain This family consists of a domain that has an immunoglobulin like fold. These domains are found in cell surface receptors such as Met and Ron as well as in intracellular transcription factors where it is involved in DNA binding. CAUTION: This family does not currently recognise a significant number of members. 20.90 19.70 20.90 19.70 20.80 19.60 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.09 0.72 -3.99 131 8379 2012-10-03 16:25:20 2003-04-07 12:59:11 19 516 691 126 5044 7498 871 86.60 18 16.26 CHANGED Pt.lssls..P...ttsssts.GstlolpG..psF........................tssssplhhusttsssh............hhssspl.................hspsPsts............su.shslplthss............tssshsapa .................................P.lpslp.....P.......ppu...sh....t.....G...G....sp.......loI....pG......p.s...h..............................................................t.s.ss..h..p....V.....h.l......u..st...t...sssh...................................hhss.spl..........................................................hCps..Psts..........................su..th..p.l.plthss............................................................................................................................. 0 2483 3116 3961 +918 PF04280 Tim44 Tim44-like domain TIGRFAMs, Finn RD, Bateman A anon TIGRFAMs (release 2.0); Family Tim44 is an essential component of the machinery that mediates the translocation of nuclear-encoded proteins across the mitochondrial inner membrane [1]. Tim44 is thought to bind phospholipids of the mitochondrial inner membrane both by electrostatic interactions and by penetrating the polar head group region [1]. This family includes the C-terminal region of Tim44 that has been shown to form a stable proteolytic fragment in yeast. This region is also found in a set of smaller bacterial proteins. The molecular function of the bacterial members of this family is unknown but transport seems likely. The crystal structure of the C terminal of Tim44 has revealed a large hydrophobic pocket which might play an important role in interacting with the acyl chains of lipid molecules in the mitochondrial membrane [3]. 29.60 29.60 29.60 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.54 0.71 -4.22 140 1637 2012-10-03 02:27:23 2003-04-07 12:59:11 10 10 1251 4 675 1440 1309 139.80 19 44.40 CHANGED sutul.p.thpph-ssFssppFlpuA+stath.lhtAaspGDhcsL+shlop-hapthppslsp+.tptG..............shcsphlslpcsclhpuphc..sspshloVcFhsp.hth.hcpts.Gpll-G....s.spspchp-lWsFsRs......hsss.sssWpLsuhpp ..........................................................t.....t.hhths..ssFs....ptF..lptscphah..l.tA...a.s..p.s.-.........h....cs....l....+.p.hhotphas.thpt....tl...ppt..t.G.......................t.psp...l.tl.hps.c.l..spsphp.............sst..shlolp....h..p...h...h...h.t.t..s.....G.plhcG..........sts.ps....p.p......hp-lW..shsRs..........tst...tssWhlsshp...................................................................... 0 200 377 531 +919 PF04176 TIP41 TIP41-like family Wood V, Finn RD anon Pfam-B_12821 (release 7.3); Family The TOR signalling pathway activates a cell-growth program in response to nutrients [1]. TIP41 interacts with TAP42 and negatively regulates the TOR signaling pathway [2]. 25.00 25.00 27.20 26.60 23.00 22.20 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.01 0.71 -5.03 29 320 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 281 0 224 312 3 175.30 40 57.53 CHANGED MlFGcNhlpIpH.poGhsIcFNAhDALctVcppsp..sl+Vuhuc-WpcSRppsp.tp...........................-hlKPaDWTYTTsYpGTlhs...............ttthhhpsosccIslc+LpptDPILFaD-llLaEDELuDNGIShLSlKlRVMssphhLLhRFFLRVDsVLhRlpDTRlYh-Fsp......shllREapp+Essacplpp .........................................................MlFGsNhlpl....pH...........tsGht..lpFNAhDALctV.......p...p....pt.......t..............l.+V..uhupcWp...poRppsp....t.t...............................cll.+PaD.WTYTTs.YpGTlts..................................................t.h...ph...p.sosp..pIsh-tL..p.pp.-.s....ILF.a--lhLaEDELsDNGlShLSVKlRVMP.s.s..hhLLhRFFLRlDsVLlRlpDTRlYh-Fsp...........shllREapt+Esphppl..p........................................... 0 72 123 186 +920 PF01582 TIR TIR domain Bateman A anon Pfam-B_571 (release 4.1) Family The Toll/interleukin-1 receptor (TIR) homology domain is an intracellular signalling domain found in MyD88, interleukin 1 receptor and the Toll receptor. It contains three highly-conserved regions, and mediates protein-protein interactions between the Toll-like receptors (TLRs) and signal-transduction components. TIR-like motifs are also found in plant proteins thought to be involved in resistance to disease. When activated, TIR domains recruit cytoplasmic adaptor proteins MyD88 (Swiss:Q99836) and TOLLIP (Toll interacting protein, Swiss:Q9H0E2). In turn, these associate with various kinases to set off signalling cascades [3]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.52 0.71 -4.74 45 4612 2012-10-02 18:56:14 2003-04-07 12:59:11 15 676 330 18 1819 5111 86 129.70 23 17.89 CHANGED sFlsaps...........cscctFlsclhppLccp.....uhplalcp+.hhtGtphhss....lhpsIccS+hslllhScsaspSp..WC.LpELhphhcpthp.s..plllslFhpVcspclp.....pppscatpshtpthph.tsc.............tphthW+pshts ..................................................sFlsapt......................csp..ph.h.....h...s...cLh.....p....L...c...p.p.................s.h..p..l...h.l.c....c.+......-......h.........s..G....p.p..l...h.ss...............lh.p.u.I..c.........p.........S+...p.s.lh.VlS....ps....a.....sp.......Sp.............W..C...h..E..hh..h........h..h...p......p.............h..h...p...p......t.............t...p..l.l....l.h..l..h.....p.l..s.........p.plh...............tp.h.p.hh.h...thp...p.h....h....p...h.pp.....................tp....hWpphh..t................................................................................ 0 512 992 1303 +921 PF03920 TLE_N TLE_N-terminal; Groucho/TLE N-terminal Q-rich domain Finn RD anon DOMO:DM01627; Family The N-terminal domain of the Grouch/TLE co-repressor proteins are involved in oligomerisation. 24.00 24.00 25.20 24.00 23.60 21.00 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.83 0.71 -4.34 15 565 2009-09-11 00:33:27 2003-04-07 12:59:11 10 14 92 0 240 496 0 119.50 69 20.92 CHANGED MaP..tsRhsuPtp..........suQP..FKFTluEoCDRIKEEFQFLQAQYHSLKLECEKLASEKTEMQRHYVMYYEMSYGLNIEMHKQuEIsKRLNAICAQllPaLSQEHQQQVsQAVERAKQVTMsELNulIG..........pQ.phQpLppt ......................................s.s..p..........suQs..hK.FTl.sEohDRIK-EFQF..LQAQ.YHSLKl..EC-KLAoEKTEMQRHYVMYYEM.S.YGLNIEM.HKQ.sEIsKRLNs.IhA....Q.llPFLSQ.E.HQQ.QV.sQAVE.RAK.QVTMsELNAIIG..................QQ..hQ.hQpLSp.t................................. 0 38 56 155 +922 PF05154 TM2 TM2 domain Bateman A anon COG2314 Domain This family is composed of a pair of transmembrane alpha helices connected by a short linker. The function of this domain is unknown, however it occurs in a wide range or protein contexts. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.55 0.72 -3.75 142 3044 2009-01-15 18:05:59 2003-04-07 12:59:11 11 50 1798 0 887 2110 350 53.80 28 26.67 CHANGED pt+shhhAhlL.sl..h..LG.hhGlHRFYlG..phtp.GllhLlhh........Gh.hsl........h.hlsh.l .......................pshhh.uhlL..ul...h..........lG..h...h....Glc+FYhG...phtp.Glla.Llhhh.................Gh.hsl...................hhllp..h.................................................. 0 311 541 742 +923 PF03348 Serinc TMS_TDE; Serine incorporator (Serinc) Mifsud W, Mistry J anon Pfam-B_3473 (release 6.5) Family This is a family of eukaryotic membrane proteins which incorporate serine into membranes and facilitate the synthesis of the serine-derived lipids phosphatidylserine and sphingolipid [3]. Members of this family contain 11 transmembrane domains and form intracellular complexes with key enzymes involved in serine and sphingolipid biosynthesis [3]. 20.30 20.30 20.40 20.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.65 0.70 -5.77 45 847 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 289 0 501 781 10 343.80 29 92.34 CHANGED usCCGu...usCshhCosCsus...psShsTRlhYAhlLLlsollShIMlsshh.ppLp+.h.tas....................thsC....sp......hhGahAVaRlsFuluhFahlhullhlsV+SS+DsRAslQNGFWhhKhlhhlulhVsuFa.IPs.shFhhhhhaluhhGuhlFILlQLlLLVDFAHsWuEshlp+hE........-ssS....+hWhshLlssThhhYhsSlshslLhYlaas...ssuCshNphhIolNLllslllSllSlpPpVQEhpP+SGLLQuShlolYssYLTaSAloscP....D+pCNPh.............................hpspsshssshllGhllhhls.....lh.YoohRuussop....................................sshltLssc..............p.sh.p.cspshptssc-Gs.spps.......................hDsEcsuspYsYShFHhlFhLAohYlhhhLT.....................sWapP..tp.h...lsps.asusWVKIsSSWlChhLYhWTLVAPllhP-R.F ...............................................................................h.t...h..........psh...sRhh..Yshhhh.hsshlshlhht...h..h.t.l.....c.....h....h...................................................................................sC........t.........hhGh.uVaRls...huhsh.....Fah.........lhslh.h..ltlp..ss.......p.ss.R..u..t.lpN.G.aW.hhKhhhhhshhhhsFh.l........Pp...t.a..........h.........hh.hhluhhGu.hhFlllQLlLll-FAHpW..scthhtphc................pt..............phWhhhLhhsThhhY...h..h..uhsh.hs....lhaha..a......s..........................ts...Ct....Nphhls...hshhls.hhhohlul.P.......t...lp........t....P.p...........uGLh.Q..uuhlshYshYL..sa.SAhsspP.....pptCss...............................................................................t...tt....sh...llGhhlhhhs......lh.as..s.hps.uspst.............................................................................................tt........ts...............................tsE.pps.s.YsYuh.FH.h...hhhLA.ohalhhhLT..........................sW.....hp....................sp.....ht...ht..ts....h.shWVKl..s..ssWhshhlYh.WoLlAPh.h.h..pR......................................................... 0 149 252 373 +924 PF03459 TOBE TOBE domain Yeats C anon Yeats C Domain The TOBE domain [1] (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulfate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.78 0.72 -3.86 81 5980 2012-10-03 20:18:02 2003-04-07 12:59:11 12 16 2667 79 1494 5716 620 62.60 25 27.64 CHANGED suspNhltupVtslcttu...sps.Vplplusst....lsuplot...pussp..ht.Gpplhshl+sspltl ...................SsRNtltG...pl...s..s...l..pp....ss.........spscV..p..l..p..ls..ssp..............l.h.At.l..os........puspcL..tL...p.sG..pplhAhlKuspVh......................... 1 339 843 1182 +925 PF04265 TPK_B1_binding Thiamin pyrophosphokinase, vitamin B1 binding domain TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain Family of thiamin pyrophosphokinase (EC:2.7.6.2). Thiamin pyrophosphokinase (TPK) catalyses the transfer of a pyrophosphate group from ATP to vitamin B1 (thiamin) to form the coenzyme thiamin pyrophosphate (TPP). Thus, TPK is important for the formation of a coenzyme required for central metabolic functions. The structure of thiamin pyrophosphokinase suggest that the enzyme may operate by a mechanism of pyrophosphoryl transfer similar to those described for pyrophosphokinases functioning in nucleotide biosynthesis [1]. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.24 0.72 -4.26 86 1995 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1908 31 487 1399 315 67.10 24 29.30 CHANGED Gp.ahlphppsh..................paluhlP..lssssp.lohpGhKYsL.......s.spshph..Gsh.hssSNch......tpp..splolpsuhsl ...................................t....lp..tsh..........................pYluhlP..ls.s.sp..lolpGhKYsL..............s.stshth..sss..hssSNEhh.....spp...splohpsGh.................... 0 183 311 409 +926 PF04263 TPK_catalytic Thiamin pyrophosphokinase, catalytic domain TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain Family of thiamin pyrophosphokinase (EC:2.7.6.2). Thiamin pyrophosphokinase (TPK) catalyses the transfer of a pyrophosphate group from ATP to vitamin B1 (thiamin) to form the coenzyme thiamin pyrophosphate (TPP). Thus, TPK is important for the formation of a coenzyme required for central metabolic functions. The structure of thiamin pyrophosphokinase suggest that the enzyme may operate by a mechanism of pyrophosphoryl transfer similar to those described for pyrophosphokinases functioning in nucleotide biosynthesis [1]. 30.70 30.70 30.80 30.70 30.60 30.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.54 16 2336 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 2211 33 581 1714 474 120.30 26 51.60 CHANGED N.p..thPt..hhps...ppschhl.usDuGusph...hth.......................slhPchhlGDFDSlsc.Ehhshhtpt...sphlhhs..-KDpTDh-hAlphs.......hphsts.clslhGuhGG.RhDHhh.......uslthLh..+h........s..hpshplhlhsp ........................................................................................ht......................pp....s..h.hl.usDtGu.hl...hch..................................................................slh...Ps..hhlG.D.....FDS.lsp..-..h.ph.h......tpp..........ht.p..h.h..h...s....-K...D..pTDhplAlphs.......................................hp.h.s...s...p..lh...lhG...A..hG...G..RlDHhl.......uNl.lhh..p........................................................................... 0 229 386 496 +927 PF00515 TPR_1 TPR; Tetratricopeptide repeat SMART anon Alignment kindly provided by SMART Repeat \N 22.90 13.00 22.90 13.00 22.80 12.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.34 0.73 -7.61 0.73 -4.14 555 18473 2012-10-11 20:00:58 2003-04-07 12:59:11 23 4566 3183 236 9338 112617 30616 31.90 20 6.62 CHANGED spshhshGhsahphscaccAlpsapcAlplsPsp .......................hhhphG.h.s.a..h..p....h....s.....c.......a.p...pA.l..p.sap.cAlphp....................... 0 3602 5574 7522 +928 PF01938 TRAM DUF90; TRAM domain L Aravind, Bateman A anon L Aravind Domain This small domain has no known function. However it may perform a nucleic acid binding role (Bateman A. unpublished observation). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.89 0.72 -4.18 30 8506 2012-10-03 20:18:02 2003-04-07 12:59:11 15 36 4435 5 2131 6354 2315 60.00 24 13.59 CHANGED pphlGpphcVllcshu...ppGpshu+spt............sphlhlpss....ts...G-hVcl+lpcspp..phLpGcll .............................sphhp.lh.l.cshu..............ptGp....h..l...G+..sc.....................................spsV..hlpus.........................hs....GchV....cVcIsc..spp..p.hhucl................... 0 721 1386 1802 +929 PF00486 Trans_reg_C trans_reg_C; Transcriptional regulatory protein, C terminal Finn RD anon Pfam-B_94 (release 1.0) Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.37 0.72 -4.06 362 52522 2012-10-04 14:01:11 2003-04-07 12:59:11 23 271 4715 43 11355 34159 7239 75.80 30 29.75 CHANGED spp........lpL.ospEapLLphLhppssc.lloRcpLhc..plWstsh........ssspsl-lalppLRcKlcs....s.ts................phIpTl+GhGYph ..................................p...lpL.T.s.p.Eap.LL.t...hL.h....p.....p..s.......s....c......lloRcpLh...c..........pl....W.s.hsh...............sssp..s..l.-VaI..p+LRcKlpp.......s.ts................................ph.Ip.Tl.+.G.hGYph.......................................... 0 3440 7162 9460 +930 PF02458 Transferase Transferase family Bateman A anon Pfam-B_1540 (release 5.4) Family This family includes a number of transferase enzymes. These include anthranilate N-hydroxycinnamoyl/benzoyltransferase that catalyses the first committed reaction of phytoalexin biosynthesis [1]. Deacetylvindoline 4-O-acetyltransferase EC:2.3.1.107 catalyses the last step in vindoline biosynthesis is also a member of this family [2]. The motif HXXXD is probably part of the active site. The family also includes trichothecene 3-O-acetyltransferase. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.32 0.70 -6.04 11 3130 2012-10-02 12:01:53 2003-04-07 12:59:11 10 37 323 17 1766 3103 5 327.00 17 84.89 CHANGED hpVphpppplIpPupPTPpp...pL.LShlD..hltssh.hs.shaFYcpssphspph........shcpL+puLScTLstYYPhAGRL.....sschplsCNs-G..s.FVEApucsp.Lp-hhch...sssshphllsphssspps.s...hPLhhlQlTpFcCG.GlslGhshsHplsDuhShspFhpsWAchu+ut...t.sssPsas+phlhsp........tsPps....hs.h......hssspptsscchlpKphsat....pssplccLpp+ussppsss...........................TphEsloAhlWRshsputptssppp.......sslhhslshRsRls.P.LssuYhGNshhsssstpss..u-l.pp.lshssctlpctlpp.lsc-hhpsshshsts.........hhhpthscsshh..ssalloSWs.+hPhh-lDFGWGpPlassssssshtsts....hhhPsp....tsssGltlhlsLspppMstFccchphh .......................................................................................................................................................................................................................................h...........................................................................ht.tu.ht.h.L......a.....h..sGph...............................................................................l................................s...t.......u..............h......h.h.............s.......................................h....t........h.................................................................................................................h...............................................................................s....lh.h.......hQ.l..o.....h........t.s.........G.....u..........h.hluhthp..H.......hh.h.Duh..uhht....Fh..psh.u....p.h.s....ps........................................s............h....p....p...t........h..................................................................................................................................................................t......................................t.....................h.....h.h.........................p.......t.l.t.....t........h..t....t...t.....h.......t....t...................................................................................oshps.l...sAh.l....W..p....s...h......s.....t...u...h....t....h...........t..t.............................sthh.h...shs.....h....R.....t....p.....h........t......s...............l.....s.........t......s......a.h.........G.....N........h.h......h........h....s........h...s....h.ts..................tp.l.......h......t.........t..........s..........l..t...............h.......s....t....h.l..p.....p...t.....h..t.......t..........h.....p.....t......t...h..h..p..t..h...h..p.hht..............................h..........h.........................h....h...h..s.s.........p..h..s......h...h..p..h..D...F..G...........h...........G.............p.............s......h.h...t...........................................hh.......................tt..t......h...h...h...h.....................h..................................................................................................................................................... 0 148 963 1430 +931 PF01336 tRNA_anti-codon Aspartyl_tRNA_N; tRNA_anti; OB-fold nucleic acid binding domain Bateman A, Mian N, Finn RD anon [4] Domain This family contains OB-fold domains that bind to nucleic acids [4]. The family includes the anti-codon binding domain of lysyl, aspartyl, and asparaginyl -tRNA synthetases (See Pfam:PF00152). Aminoacyl-tRNA synthetases catalyse the addition of an amino acid to the appropriate tRNA molecule EC:6.1.1.-. This family also includes part of RecG helicase involved in DNA repair. Replication factor A is a heterotrimeric complex, that contains a subunit in this family [2,3]. This domain is also found at the C-terminus of bacterial DNA polymerase III alpha chain. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.29 0.72 -4.12 640 22969 2012-10-03 20:18:02 2003-04-07 12:59:11 20 145 5024 111 6345 20485 6498 80.20 20 12.26 CHANGED lplt.Ghlpsh.................+st.sthhalp.....lpDts...G......plplhhhp......................tthhphtpplp......sshltlp..Gplptp..........tts..........................hplhspp......lphl .............................................hpltGhlhpp................+st.up..h..h...F..ls...............l.p..Dts.....G........................tlQlhhhs.............................................tthhp.h.t.p..p..lp..........ts..s.h.lt......lp...Gpl.ppp.............pts...............................lplhspp...lp..................................................... 0 2125 3961 5336 +932 PF01841 Transglut_core Transglutaminase-like superfamily Bateman A anon [1] Family This family includes animal transglutaminases and other bacterial proteins of unknown function. Sequence conservation in this superfamily primarily involves three motifs that centre around conserved cysteine, histidine, and aspartate residues that form the catalytic triad in the structurally characterised transglutaminase, the human blood clotting factor XIIIa' [1]. On the basis of the experimentally demonstrated activity of the Methanobacterium phage pseudomurein endoisopeptidase [2], it is proposed that many, if not all, microbial homologues of the transglutaminases are proteases and that the eukaryotic transglutaminases have evolved from an ancestral protease. [3] 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.81 0.71 -3.86 87 6914 2012-10-10 12:56:15 2003-04-07 12:59:11 14 194 2577 45 2597 6853 1021 111.30 20 20.05 CHANGED pphuppl......tstpssh..ptsptlhpalp.......pphpYs............hssssts........stphLt......spp.....G.pCtpaAslhsshhRuhGI.PARhlsGhhh.......s........................................sts.ss....................HuWsE....sah................shu.....WlshDs ....................................................................h...................s.h.....pth.h.t.lhphlt...............pp.h.pYs.........................t.s...sps......................stpsLt........ppp...........G...sC.psaAtlh.hs.hh.R.s.h.Gl...P...A...R..h.V.s.Gahh.............s...............................................................t....sh.....................................................................HuWs-l.ah................................s.ts......WhhhDs................................................................................................................... 0 827 1630 2120 +933 PF00927 Transglut_C 1005; Transglutamin_C; Transglutaminase family, C-terminal ig like domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1005 (release 3.0) Domain \N 26.60 26.60 26.60 26.70 26.50 26.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.28 0.72 -4.01 41 1042 2012-10-03 16:25:20 2003-04-07 12:59:11 17 15 108 79 564 1010 0 100.00 19 25.81 CHANGED sphplclhssss.hupchsltlphpNshspshpsh....sshslphsGlhh..ptppcphthsltPtpptphplphh.pchG..pphlsp...hppspltcVpuhtplhlpp ...........................thplcl..s.s.sh.lGpchslplphpN............shsps.ps.....sshslphs.........Glhh..phhp...........pthhssltPtpptphph...plhspch...pphlsp...hp.sth.t.plpshtplhh............................ 0 94 147 303 +934 PF02779 Transket_pyr transketolaseD2; transket_pyr; Transketolase, pyrimidine binding domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain This family includes transketolase enzymes, pyruvate dehydrogenases, and branched chain alpha-keto acid decarboxylases. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.96 0.71 -4.78 93 20295 2012-10-02 16:07:47 2003-04-07 12:59:11 19 58 5038 173 5571 15647 10577 175.70 23 29.45 CHANGED pthshppu.spAlsplhpcs.pllshutDls................sushsttpuhhp.p.......................RhhcsslsEpuhsuhssGhAhpG...hhsatupFhsFss......hspstlp.htuhtphsss.lss+sshGhGtcGPsHpuhc.huhhpslPs........................hpVhtPssss-stthlptAlp....tt.tPshlhhs+p.ht.t .....................................................t..hshppu..spu.l......sph......h......p......p..s......s........p......l......hs..hutDlu...................su..s.h..t...t...p...p...u...h.tt...p........................................Rhh..cs.sluEtu....h..s.u...h..us.GhA..hp........G..............hhsht.upF.hs.F..hp.............hsps.t.l.t......h....t........u.h...h....p..h....s..ss....lhs+...s.s.h.u.h...G..t...c....G..s...s.H...pu.h..........p...huh..l..pt.h..P..s.......................................................................................hpVh..tPu...sss..-stthlpt...........ul.....p.....................p..s......P..s.hlhhs+p.h..................................................................................... 0 1847 3551 4709 +935 PF02780 Transketolase_C transketolaseD3; transketolase_C; Transketolase, C-terminal domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain The C-terminal domain of transketolase has been proposed as a regulatory molecule binding site [2]. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.45 0.71 -4.19 80 17209 2009-09-13 15:57:48 2003-04-07 12:59:11 15 48 4995 153 4660 13131 6413 117.50 22 22.65 CHANGED Gcuplh...+pGp-lslluhGshlphulpAAcpLtpp.....GlsscVlDhRolpPlDhcsl...hptsc..+ss+ll..lVcEuhh.huGhuutlsshlsc.psht.h.sslhplss...-sshshs...thththhshs..ppl ........................s.h.....ppu..s.-..lsllu.hG.s.h...l.ph.A.lp.AA.cpLptp.................Gh.ps.pll..ch.Rslp..P..h...D..t...-.h...l...hp.t.c...cs.t..+.h.l.........llp.-....s....s...h...p..s....u.....l...u.....u.....tl.st.hh.........t....c..........sh....t.............h.........h...s...........l..t..p...h....uh.....s.s..hs.........hht.hshs.tt................................................................... 0 1573 3034 3963 +936 PF00335 Tetraspannin transmembrane4; Tetraspanin family Bateman A, Finn RD anon Bateman A & Pfam-B_3109 (Release 7.5) Family \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.90 0.70 -5.07 212 4340 2012-10-02 01:14:40 2003-04-07 12:59:11 15 37 310 4 2488 3981 12 207.50 17 82.23 CHANGED hlKhh....lh.hhN..hlhhlsG.hsllu......hGlhh...........................................tthhhs..s........hhllh.lGsll.hll.uhhGs.hG.uh.......+csp..........shLhhahh.hlhllhlhplssulhshshpsphpsthtp.h........................................................................tthppssshppthstlp...pphp.C.CGhpshp-atp.......................................................................................................sstssssshhppGChptlt...phlp.pp..hhhl......sslulsl....s.hlplluhlhuhh.Ltpph ....................................................................................phh.....hh.hhN..hlh..h........lhG...hhllu...hGlah....h.......................................................................t................h..h.h..s.s..h...lllh..h.Gslh.hll..uhhGs..hG..uh....................pc.sp......................shL...hh...........ahh....hlh..l..l.h......l..h.....p.l.s..s.u.lh.s..h...h...h..p...s..p.hppthtp.h...................................................................................tht...p..p..t.t..h..p.ph...hshlQ...............pphp..C....CG..h....p....s.....p..Da......t..........................................................................................................................................................................................................................................................................t..t.......t..t..t.t.....h....h..p..p...G.Chtt.l...........phhp...pp....hhhl..............shhshsl...h.h..h..p....l..hshhhuhh.h....h...................................................................................................................................................... 0 761 1047 1771 +937 PF00905 Transpeptidase Penicillin binding protein transpeptidase domain Bateman A, Finn RD anon Bateman A & Pfam-B_726 (Release 8.0) Domain The active site serine (residue 337 in Swiss:P14677) is conserved in all members of this family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.70 0.70 -5.40 42 25088 2012-10-02 21:13:33 2003-04-07 12:59:11 17 55 4539 218 4622 19829 10300 286.30 21 43.58 CHANGED ussVlh-s+oGplLuhsspsphsspp...........................hhtt.apPGSThKshss.hhul-sGhhpsppthpt.st.h..sstshpsapps........hshppuhppSsNhhh.pl.shclGtc..................p..htphhpphGhGspss.sh.tp..t...............................................hstsssuaGpu.lsloPlp.sphhsslsssG.h....hht........................t.....chshstpshpplpphhttsspss..................shpluuKTGTAphht.ts.h.............huhalGa.s.tsssphshslhls.......tssttsutssstlhpplhp ........................................................................................shlsh-spoGpllAhsu.s...s.a.ssst......................................................................................h..N.p.s.......h.......p......s.......h.......h........p....PGSohKshsh...ss..A..........l..........-..........p.........G............h................h...........s..............s.......s..........s..............h.............h............s.............s.........s..........s..............h.........h....................h............s.........s..........t.......p......h.......p.......s....astp...........................htG......l.s....hpp.......A....lt....p..S..sN....s....s....hs...p.l....s....t....p..l....G.h..s.......................................p......h.t.p.h.h....p.c.......h.G...h...s........p.......t....s...t..................................................................................................................................................................................................phhs.h.u.h....G....t.........G........h...sso......P..l.phup.sauslAN........s.Ghh.h..........p.......P..p...h.l.p....p...l....t.....p.sp.h...........................................................p................s...p....p...s....h....s...p....p....s..s..t..h.....l.p.p.....h.........h....pt..Vspps............................st...th..t....h.........s...h......p......l....u...G....KTG.Tu....p........h...................................................shh..sGa...................................s...........P.....p....h.s.h.s.lhht....................t..s....t........h.................................................................................................................................................... 0 1494 3005 3893 +938 PF01609 DDE_Tnp_1 Transposase_11; Transposase DDE domain Bateman A anon Pfam-B_1013 (release 4.1) Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction [3]. This family contains transposases for IS4 Swiss:P03835 [1], IS421 Swiss:P11901 [2], IS5377 Swiss:Q45620, IS427 [4], IS402 [5], IS1355 Swiss:O69604, IS5, which was original isolated in bacteriophage lambda [6]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.43 0.70 -4.79 91 12598 2012-10-03 01:22:09 2003-04-07 12:59:11 16 50 2656 6 2783 18569 2527 188.80 13 58.95 CHANGED ttttphh......hlDuohlpss..........................................tth..thpstpthtuh.Khclhs.....sssshhhshtls.sush.pDtphhtp..lhct..............phthlhuDtGYsstp....hhpplppp.shph..hht....h+tstth..................................................................................................hhtpptpthth..............................hhptRhtlEpsFphlKph.hthsc..hptpshsphpshlhht.hlshhl ..................................................................................................................................................................................................................................................tt........hD...h...........................................................................................................................................................th...t......h.phhh....................tp....t.h....h..tht.h........h.....ss....ph...p..-..h..p....hh.t........llpt.................................p.t.t.h..l.h.s..D.........tu..at.s.tt.........hhp..t.l...p....p......t....t...hph..........hh......................h+.t.p.tth..................................................................................................................................................................................................................................................................................................................h....h...t.....t.....t....t.....t....................h.......................................................................................................hh.p.t.R.h.t.l..E.t..h...ap.h...+...p.......ht.h.pp......h..t.p....t..t.thp...hhhh.hhh....................................................................................................................................................... 0 850 1859 2290 +939 PF02371 Transposase_20 Transposase_19; Transposase IS116/IS110/IS902 family Bateman A anon Pfam-B_280 (release 5.2) Family Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases for IS116, IS110 and IS902. This region is often found with Pfam:PF01548. The exact function of this region is uncertain. This family contains a HHH motif suggesting a DNA-binding function. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.69 0.72 -3.88 139 6999 2012-10-03 02:11:09 2003-04-07 12:59:11 11 25 2087 0 1815 6255 787 83.60 27 26.97 CHANGED sphLtolPGlG.lsAssllupl.s-h.pp.FpsscplsuasGLsPttppSGsptp.ps+lo+.tGsptlRphLhhuA.hssh.ph.......s..shhpthap ............................hltolPGl..G....h.sA.ssllu..........pl..u-h..pp..Fp.sscp..lsua.s.........GL.sPtp.h.pSG..sppt...ttphoK..tG.sptLRphLh..sA..hssh...ph.............p.................................................... 0 542 1135 1449 +940 PF01526 DDE_Tnp_Tn3 Transposase_7; Tn3 transposase DDE domain Bateman A anon Pfam-B_885 (release 4.0) Domain This family includes transposases of Tn3, Tn21, Tn1721, Tn2501, Tn3926 transposons from E-coli. The specific binding of the Tn3 transposase to DNA has been demonstrated. Sequence analysis has suggested that the invariant triad of Asp689, Asp765, Glu895 (numbering as in Tn3) may correspond to the D-D-35-E motif previously implicated in the catalysis of numerous transposases [2]. 20.70 20.70 20.70 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.91 0.70 -5.86 50 1929 2009-09-11 04:42:20 2003-04-07 12:59:11 12 14 851 0 316 1636 192 297.90 33 44.98 CHANGED DlLh-VsphTuFsptFpclpst.....chpsttptllAsllA.GsNlGlpchAct.ssslohcpLshspppalct-slppApspllstppplsluphW..G...s.Gp...s..u...SSDGp+assstpsltu.phss+Yt...stGlshYsalu-passhauplIssstpEAhallDGLLppp..os.l...pscp..hh..sDTtGho-hlFulhcLLGapFsPRl+slp...c..p...+Laph...c..s..ss.pYsplssl.h..tp....pIshclIcppW--llRlssSl+tGplsuotll++Luuh.s+pssLtpAlpElGRl.+TlFlLcYls..-.slRRplpttLN+uEuhpsLsRAla.aGptGclps+sh-pQphpssuLsLlsNuIlhWNThalpcslpplcpp.G..pls.s...chltclSPlsapHINhhGcY .........................-llhtlst.s.tF.pthhthptt........t.t....lhsslh.u.uhNhGhp.hsp......ssh.shtpLt.htt.hhp..cshptA.s...lhph..ph....hsthW..G...t.up...hu...ouDG..ph.s.....ps...s.t.s.+Yh.....t........GhhhYphlscp.assh....s...................hlsss.p-uhall-Gll.p..p.....os...l...p.pp..hhsDTtG..................hochsFuhhtLLGaphsPRltshtp...p...phah......p..t........st..th..t.lt.h..h....tt....plsh.phIttpas-hhRlssSlp.Gpsssshhl++Lss..sptstlh.AlhEhGRl.+TlahLcalp...s.phRRplpttLN+GEuhpultRslh.as.ph.Gc.lpp+thcpQp.phssLsLlssullh.WNThhhppshpthp.....tp.G...hs.t.phlttlSPltapHI.NhhGcY.............................. 0 61 182 248 +941 PF01548 DEDD_Tnp_IS110 Transposase_9; Transposase Bashton M, Bateman A anon Pfam-B_646 (release 4.0) Family Transposase proteins are necessary for efficient DNA transposition. This family includes an amino-terminal region of the pilin gene inverting protein (PIVML) and members of the IS111A/IS1328/IS1533 family of transposases. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.69 0.71 -4.53 127 7132 2009-09-14 00:11:16 2003-04-07 12:59:11 12 28 2092 0 1856 6395 788 129.20 22 42.24 CHANGED hlGlDluKpphpls....hhssss............htptphsss.tuh.pplhshlpph...t..............hhlshEusu.htt.hltphLpp.tGhpVhllsstps+shtp....ptsKoDthDAphlAchspp..t...php....h.......htstptppLptLsptRcpLhpppsphpNc .............................................lGlDlu.K.p.phpss....hhstps..........hh.h.p.t.p.h.s.s.s....ts...h....pplh.phltph..t...................hhls.hEusuhht.....h.......h......h......p......h......L......p.p....h........G......h......p......l..hl..l.s.Pthh+tht............ptsKoDthD...Ath.l..A..chhpp.........t.....php........h.......h.st.t..ptlpt.Lh....p....h....+p.plhpphst.hs.................................................. 0 549 1162 1475 +942 PF00579 tRNA-synt_1b tRNA synthetases class I (W and Y) Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.88 0.70 -5.29 40 11971 2012-10-02 18:00:56 2003-04-07 12:59:11 20 46 5006 203 3414 8921 6461 279.10 24 74.98 CHANGED pp.tt.hplhsGhcPTus.lHlG.ahsslpphhphQ......tGacshhlluDhpAhhscss.....cplhpppphhtss......L.uph.lDsp+...............sp...lhhpSchhpph..phthhlpplushhpls+Mhphcs......hppchpp..............slshuthsYPlLQuhDhhhhpsc....l..sGsDQptplphuc-lsc+hspp...................hshslhss....llstssGp..KMSKStss...sIaLscptpsshchhpphhssscp..tltthhthhshlss..............lphhpth...ttpss......pthcclhsphhsthhpsschtptstpshpphhp .....................................................................hp......tlhsG..hcP.T.us..LHlG.alssl.hp.h.....h.p.h.Q....................ss...a..c..s..h.......h..h..l...u...s.....h...puh.hsc.s.......cp.l...p..p.p.p.t...htss................l...uth....l..D......pc.........................................................................................sp.....lh....p..u.c...h....t.ph.......phh.h.h..l..p..s.hsp..h..hpls+M.hphcs..................h..tpchpp.........................tsl.s.h.s.F..sYPl....LQ..uh.....D.hh.....h..h..pss........................l..sGpDQh.tplphu..p-.....l.s.c...+.h.stt..............................................................................s...t.sl..hss.................ll.st...h......D.........G...............s......................KMS.K.St..ss................slhL.........s.p.........p..sshc.h.h.p..t.hhsss-t.........lh.h......hp.h..h...s......hs.............................................l.p...h...h.p.th......stps............hpc.lhs..p.h..s..t....h.....htstchtptstchhpthh...................................................................................................................... 0 1164 2149 2869 +943 PF01409 tRNA-synt_2d tRNA synthetases class II core domain (F) Howe K anon swissprot Domain Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only phenylalanyl-tRNA synthetases. This is the core catalytic domain. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.61 0.70 -5.18 46 6642 2012-10-02 14:22:40 2003-04-07 12:59:11 15 27 5187 59 1844 6318 4237 222.30 45 68.66 CHANGED slDlolP...upthtsGshHPlspsh-clpclFhthGFphhp.GsplEs-aaNF-ALNlPppHPAR-hpDTFal....p....................................s.phlLRTHTosVphRth.....pppc...sPl+llu.G+VaRpDs..DATHps.FHQlEGlllDc..slohucLKGsLcpFhpphFGtc.hclRFRPoaFPFTEPSsElDl.t.................tcpstWlElhGsGMl+PpVLcss.......Gls...pchsGhAaGlGlERlAML+YGlsDlRphaps..Dl+FLcpa .......................................................................................................................................................................t.lDlohs.....sp..h.t...GthHPlsth.hcclpp...hF...h...t........h........G........a........plhp....G....s-lE..s............-aaNF-.sLN...hP.t.p.H.PA...Rs.h..p...D.TFYl............................................................................spphLLR..T..aT..S.s....V...Q...h...Rsh..................................ct...pp.......sP...l.+....h.IuP.G+V.YR...p.Ds...D...u...TH...os...FH........Q..l..EG....L...l...l.............D..c...................s..l........o...h.............u...c...L....K....G..s..L..c..t....hh+ph....F.........G.......p..........c....hp............lRh........R.P...SY.F...P....F.T.E.PSsElDlts..........................................t.ctssW.lE...l.LG.s.GMV+P.p.VL.c...s...........................GlD..sp.asGFA...FG.....hGh-RhsMl............+aslsDlR.haps..DlRFhpp................................................................................................. 2 623 1139 1548 +944 PF01588 tRNA_bind Putative tRNA binding domain Bashton M, Bateman A anon Pfam-B_482 (release 4.1) Domain This domain is found in prokaryotic methionyl-tRNA synthetases, prokaryotic phenylalanyl tRNA synthetases the yeast GU4 nucleic-binding protein (G4p1 or p42, ARC1) [2], human tyrosyl-tRNA synthetase [1], and endothelial-monocyte activating polypeptide II. G4p1 binds specifically to tRNA form a complex with methionyl-tRNA synthetases [2]. In human tyrosyl-tRNA synthetase this domain may direct tRNA to the active site of the enzyme [2]. This domain may perform a common function in tRNA aminoacylation [1]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.02 0.72 -4.15 43 10729 2012-10-03 20:18:02 2003-04-07 12:59:11 15 48 4863 53 2411 7871 3721 99.10 31 16.79 CHANGED l+VGcllcsEcaPsADK.LhhhplDlGpcp.....RplVuGlsphhs..-th.s+hlllls.NLcPt..............KhRGlpSpGMlluAp.......sssp..htllssstss.sGsc ..............................................lhVGcll..ps..c..p..h.P..s.......u..DK......Lhhhpl.......D....l............G.......s..........c.........p...............hpI.Vs.Ghs.shhs..........cs....l...luphss.hls....N....hp.p..........................Kl.R....G..h....SpG.Mlhuspp...............sspp....s..l.l.ph.s..p.s.h..sGt.................................. 0 822 1535 2024 +945 PF03250 Tropomodulin Tropomodulin Bateman A anon Pfam-B_3359 (release 6.5) Family Tropomodulin is a novel tropomyosin regulatory protein that binds to the end of erythrocyte tropomyosin and blocks head-to-tail association of tropomyosin along actin filaments [1]. Limited proteolysis shows this protein is composed of two domains [2]. The amino terminal domain contains the tropomyosin binding function [2]. 21.80 21.80 21.80 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.67 0.71 -4.68 12 463 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 101 0 230 395 0 130.10 40 31.99 CHANGED Mu.s...hp+-LtcY+DlDEDElLtsLStEELcQL-htLpElDPENshLPAGhRQ+DQTpKsPTGPaDR-pLLcaLEKpAlEhKDR-DhVPF.TGEK+GKlFVPKp+stch..tcEploL-.P.ELEEALssATDsElCDlAAILGMaTLhssp .....................ap+cLpc.Yc.D.lD.EDElLusLSsEELcpL-pEL--lDP-s..shLPsGhRQ+sQTpKsPTGsFsR-tLlpal..E.Kp.A....hchh.-+E-h..VPh....st.h.+.G.+....a..h.......................................t..................t.......h..tl-.....s.-.hcp..uLtpAspt-hh-lA.tlls................................................................................ 0 29 46 105 +946 PF00992 Troponin Troponin Finn RD, Bateman A anon Pfam-B_62 (release 3.0) Family Troponin (Tn) contains three subunits, Ca2+ binding (TnC), inhibitory (TnI), and tropomyosin binding (TnT). this Pfam contains members of the TnT subunit. Troponin is a complex of three proteins, Ca2+ binding (TnC), inhibitory (TnI), and tropomyosin binding (TnT). The troponin complex regulates Ca++ induced muscle contraction. This family includes troponin T and troponin I. Troponin I binds to actin and troponin T binds to tropomyosin. 23.20 23.20 23.30 24.10 23.10 23.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -11.04 0.71 -4.09 56 1116 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 175 22 337 1073 1 125.00 33 53.51 CHANGED K+pLKsLhhtpAtpcLcpEpcc+tpE+pphLp-+s.shp.hsu..sps-LQcl.s+ch+p+lspl-EERYDhEt+lsKpctEI--LphKl................hDL+GKFKKPsL++V+hossuhh+uhLGsKHpsshDLRusLKpVK ...................Kp..K.sLh.hpAh.-hchEpccKEEEc............hhhLp-.Rltthc......h...........ppu-...................Q..........cl.scc.+c+.s+l-..EE+h.chE.tc.t+pctE....cthppKs..........................t-h+t...hK+..sh+.ch+hph.u.h+t.Ls....c...+.h..s.p.pLRt...phKplh........................................................................ 1 71 116 229 +947 PF00234 Tryp_alpha_amyl tryp_alpha_amyl; Protease inhibitor/seed storage/LTP family Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family This family is composed of trypsin-alpha amylase inhibitors, seed storage proteins and lipid transfer proteins from plants. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.26 0.72 -11.45 0.72 -3.77 187 1632 2012-10-01 19:46:35 2003-04-07 12:59:11 17 6 243 52 214 4676 6 103.10 21 68.27 CHANGED Cstshhp..............................lssChshhps..........spppCCstlpsl...................ttttChCtslpshhhs................................................................................tht..tsttLPshCsl....shP.........tC ..................................................................................hht........................................h....s..hshhp..t.t.hs................sl.ps.CCppLtsl....................sphCpC..tsltshhpuh..............................................................................................................................tt.ls..hh.sAs..slPuhCs.l..sls.......................................................................... 0 29 95 159 +948 PF00089 Trypsin trypsin; Trypsin Lutfiyya LL, Sonnhammer ELL anon SCOP and Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.76 0.70 -4.76 71 22248 2012-10-02 13:45:52 2003-04-07 12:59:11 21 596 2517 2044 9786 28849 4196 206.20 23 59.84 CHANGED IlGGppsphtsh....Pa.sslphps...........hCGGsLlspp..all.TAuHChps..................phpl.....hhGt..thppppss......phhplpp..hhhp.tassps..............tDlALl+Lpps........hphssslpslsLss.....ss...tssspshlsGaGp...stpsu........sphLppsplsllspp.pCpp....h..................lssshlCsss........tt..ssCpGDSGGPllstst........lhGlsS....aG..hsCupsph....sula.spls....thhsWI ..........................................................................................................................................u.....s.....t.ph....Pa...s..t..l.......htt...................................hhC..G...G...s...l.......l........s..........p..........p.......a......l...L..T........A..A...H.C.hts......................................tthpl................hhG....t.............t....h....p....t......t......p.s.......................t...h..h..t..lpp.............hhh..H.....p....a.s..t...ts..h............................................tpD..l......A..L.l...c...L...pp.s..................................h.t....h...s...p....t....l...p.......s.....l..sLsp.....................t.t...........s...s.....p....t........s.......h......l....s...G..W..Gp.................s.t.tss..........................sp.hL...p..p...s..............p.l.......l.....l....s....p...p....t...Cpp............h.....................................................lp.p.s....h..lCuuh.............t................t...ts....s...C.................p..............G....D........S...........G....G.P...L....h...sptt...............................lhG.l..s..S.............aG........s....C.....s....t....h.................ula......splt......hh.Wl................................................................................................................................................................................................... 1 2699 3841 6981 +949 PF02210 Laminin_G_2 TSPN; TSP_N; Laminin G domain Finn RD anon Pfam-B_4211 (release 12.0) Domain This family includes the Thrombospondin N-terminal-like domain, a Laminin G subfamily. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.64 0.71 -4.10 208 7915 2012-10-02 19:29:29 2003-04-07 12:59:11 19 1063 156 84 4277 8885 448 128.10 19 16.04 CHANGED F+T...p.pssGlLlassstss...................taltlplp.sGp.lthphshu................tss.hhhhsspp..........l.sDGp.WHp..Vplppptpp................hplt.....VDsptstttttsttt...................................thphsss.lalGGh.spt.t................ppsFpGClpslplssp ......................................................F+T..tpss...G.lLlhssstps...............................talt.lp.l..........p..s......Gp....lt...hp.h.shu..................................ssshh.h.h.h.s...s.t.t......................l...s..D..G.p..WH..p.....Vp.l..p.....p...p..s.pp...........................................spLp..........V.D..s..pt...s...t..t...thts..ttt....................................................................tl.s.hpss...lalG............Gh..sp.t.........................................tpsFt.GClpslhhss..................................................................................... 0 870 1227 2700 +950 PF03133 TTL Tubulin-tyrosine ligase family Bateman A anon Pfam-B_682 (release 6.5) Family Tubulins and microtubules are subjected to several post-translational modifications of which the reversible detyrosination/tyrosination of the carboxy-terminal end of most alpha-tubulins has been extensively analysed. This modification cycle involves a specific carboxypeptidase and the activity of the tubulin-tyrosine ligase (TTL) [2]. The true physiological function of TTL has so far not been established. Tubulin-tyrosine ligase (TTL) catalyses the ATP-dependent post-translational addition of a tyrosine to the carboxy terminal end of detyrosinated alpha-tubulin. In normally cycling cells, the tyrosinated form of tubulin predominates. However, in breast cancer cells, the detyrosinated form frequently predominates, with a correlation to tumour aggressiveness [3]. On the other hand, 3-nitrotyrosine has been shown to be incorporated, by TTL, into the carboxy terminal end of detyrosinated alpha-tubulin. This reaction is not reversible by the carboxypeptidase enzyme. Cells cultured in 3-nitrotyrosine rich medium showed evidence of altered microtubule structure and function, including altered cell morphology, epithelial barrier dysfunction, and apoptosis [4]. Bacterial homologs of TTL are predicted to form peptide tags. Some of these are fused to a 2-oxoglutarate Fe(II)-dependent dioxygenase domain [6]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.88 0.70 -5.49 21 2286 2012-10-10 13:17:02 2003-04-07 12:59:11 10 53 310 4 1558 2259 173 263.90 24 44.99 CHANGED hphpp......tptlNaaPsshplspKD.hhhpslpc.tpphthc.......................hthhspoahlst-hsphhphapcppt............shWIlKPsupu+GpGIhlhschspl.............................................pppshllQ+YIc+PLLlss......pKFDlRhYVLlosh.pPLplYlYc-u.lhRFuopcYs........hsshpshhhHLTNhulpKc.t.....tpc.pthpspcaoltshhthhpp..hstcplhpplhshlhcshlsu.......hpsst.shppthssFElaGhDlhl........DpslcPWLlElNhSPsh.psssthssplpstllpslls.....lsssptpsh ................................................................................................hht............hsph..t...........l.spKt...hhhp..h.tp.h..t..h..p......................................................thhP..o.a....h.......s.....t.....c...h..t...............h..h...p........h.t..pttt......................................................................thWIh...KPs.s.....t...sp.G...p.G.........I..h..l..h...p.p.h..p.p.l..................................................................................................................ttpth.....ll....Q....cYI....pp.P.hL.lts.............................hKFD.l.RhY.lL.l..........s.....u.........h......p..P....L.p............la.h.a.........ccu.hsR........Fusp......Ys.......................ps..hp.s...hhh....HLT....N.hul.p+p.............................t..........t...................t............s.........p...p...h.s.....h.....p....p......h..........t......h.hpp.............h..p.....h.....p.....p.....h..h..p.p.....l....h....p..hhhp...hhlts...............................hpss.........h..............t...........p...s..F.ElhG......hDh.hl......................Dp.p.....h...c.....P.aL.lE..............l.N..hs.....Ps.h...t.t...s...s.....h...p.....h.p.l.h..thh.tshhp..............th................................................................ 0 771 931 1276 +951 PF01167 Tub Tub family Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.70 21.00 20.20 20.50 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -5.39 11 847 2012-10-02 20:44:47 2003-04-07 12:59:11 13 23 166 7 509 787 14 217.80 36 43.04 CHANGED PAPpsshlpChIpRDKpGhD+GLaPsYahaL-p...EsG..hFLLAuRKRK+SpToNYlIShDPsclSRsussYlGKlRSNhLGTKFTlaDsGsssp+..sp................ospssshR.-LAsVsYEsNVLGa+GPR+MolIhPGhssssp.....RVssp...........................................Phs.sp-slLschpppst-slllL+NKsPpWs--sQsYsLNF+GRVTpASVKNFQllcs....................................scPDaIVLQFGRVucDhFTMDaRYPLsAhQAFAIsLSSFD ...........................................................................................................................pt....hpChlpR..s+p..s.........s.hh....h..a......h........h............cps.........hFLLuu.++h.++........s.t.p....spYlI.....Sh....s....s..s...s..h........S+....tu.........p...s..........a.......lGKl.R........S.N.h..hGTcFhlaDst.s.tt..t..........................................................p....s..th.p..plutl.Y.c.h..Nl.L...s.......+.G...P..R+.Mp..shh....s.hs.pt............h..hp....................................................................................................s....tt......t......s....h...t.....p..h.p..p.....p....s..hp.......phl....LpNKsPhWs-phQ..sasLNF.+........G..R...V......T.....ASVKNFQl.lts..........................................................................................................................................................s-..-.h....l.lh...QFG+l..u.............c..D.hFThDa....p.YP.lsAhQAFAIsLoSF-............................................................................................. 0 172 275 391 +952 PF03953 Tubulin_C tubulin_C; Tubulin C-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes the tubulin alpha, beta and gamma chains. Members of this family are involved in polymer formation. Tubulins are GTPases. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. Tubulin is the major component of microtubules. (The FtsZ GTPases have been split into their won family). 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.47 0.71 -4.09 66 8994 2012-10-03 12:11:42 2003-04-07 12:59:11 12 31 3147 105 2212 7432 55 113.00 52 32.23 CHANGED PRlHFhhsuaAPLsups.....ptsacphoV.-lTpphF-spNhMs...ssDP+p........G+Ylosss.laRGcVss+-Vccsltplps+ps..spFV-WhPsulKsulsshsPhshptu........ushluNoTuIpclFpRlsc ...............................................PRLHFhhsGaAPLoScu....................uppa.Ru.loVs.E..LTp.Qh..................F.-...........s+.N.MMs.....us...D..PR.+..........G+Y....losss....ha....RG.......c...hs............K.-V.cc..p.h.hs..lps.........K.p.o.......s.F.V-.Wh.P.s.s..hK..........suls.....PPp...sl.hu..................sshluNo..TuItE..hapRls.c................................... 0 760 1134 1715 +953 PF00567 TUDOR Tudor domain SMART anon Alignment kindly provided by SMART Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.37 0.71 -4.28 48 3295 2012-10-02 16:56:36 2003-04-07 12:59:11 19 92 277 31 2089 3372 6 118.30 19 26.42 CHANGED hhhspthslhloplt.ssspFal.......tpstpplppltpplpphhpphtt.......ht.sthspsssshhstDs..pWaRApl.......tthsspphcVhalDYGspph.lshsclptls.pphtt........Ph.uhcspLss ..........................................................................h......pshlst.l...s...s..s...p..hah....................h.pp.t.pp....l....p.....p..........l.....p.p.l.p..p....hh.ppttt...................ht...p..h.sp.h.s.h....s...t.......h.....p....t......-.s.............pW.a.Ruhl....................t.hss.....p..p............s..pV....halD.a.Gspp......h...l...s...h......s...p......l......p..........l....s...p...p.hhp.....l...Ph.AhpspLt..................................................... 0 629 828 1457 +954 PF04906 Tweety Tweety Finn RD anon Pfam-B_5713 (release 7.6) Family The tweety (tty) gene has not been characterised at the protein level. However, it is thought to form a membrane protein with five potential membrane-spanning regions. A number of potential functions have been suggested in [1]. 29.70 29.70 30.00 32.60 29.50 29.60 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.42 0.70 -6.06 14 332 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 97 0 157 301 0 341.60 35 69.90 CHANGED applsusFpPcDt.sYQQSLlhLusluussLuLsLLhLhhYLlphCCsR+.pcspssp....csCClsWssllssLlC...........CAuIGlGFYGNuETsDGlhploYSLcpAN+TluGIcshVpsosssLppslcppLtpL--laus+s-.......alpslphhQp.sssllpphsulP..hWpts....slslsplAppsshhEaYRWLuYLhLLlLDLlICLlsllGLARpS+slLlshsshGlLsLllSWuShGL-hAsAVGsSDFCssPDsallp.sppphs..sDllpYYLhCu.utsNPFQQpLohup+uLssMQpplpsLL+.Als.FPsuc..csLlulQtlLNooEhsLHQLTALlDCRuLHhDYlpALpGlCYDGlEGLlaLsLFShluAlhFoshVCusP+sWphhtsR-cDh--hp-psPh ....................................................................................................................................................................................................................................t.p...h.puLh.hlus.l..uhh.sLsl.s.Llhlh...hYh...hhhC...Cp+p..p...ttpt....................tssChshs.h..l..hssLl.s...........s.sul...u..l..GFYGNuEss.DGl.plh.ulh....psNpTl.....s.slpp.......hl.ts.sttLp...ps.lc...tpLtpLpc...h..h...stp..s-............hltshphhpt.hpslhttht..sls.....hhpts......shs.h.tpl.....up.psshh-.......Y.R....Wl..ua.lhLL.l.ltll.....lCLh.s.llGl...........s........+...........pS+hhll...hhsh..hu...lL..sLl.lSWsuhGlc.hAsuV...uhSDFCssPDsalhphsppphs.......s-llp.YYhh.Cs.....s..hs.....N...P.F.Q.Q.p..Lo..sp.+...uLsph...ptplt.tL....p...uh........ass.sp.......csLhtlpthL..N....soEh......shpp.LsAl...l....cC...RuLHtDYh.pAlp.GlC.DulpGL.laL....h..L.aS...hlsAhhhsshlCshs..+sWt.h......p.p.p.tp........................................................................ 0 39 56 101 +955 PF04564 U-box U-box domain Bateman A anon Pfam-B_2801 (release 7.5) Domain This domain is related to the Ring finger Pfam:PF00097 but lacks the zinc binding residues [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.36 0.72 -4.01 16 2544 2012-10-03 15:03:13 2003-04-07 12:59:11 10 170 360 21 1743 2959 196 70.60 32 11.07 CHANGED lPDEFhDPIhhpLMpDPVlLPSG.hshDRusIp+HLhs......spoDPFs.RpsLTpcpLhPNhpLKpcIspalpp+cp ...........................hPpt.F......h..C....PIo.hcl......MpDP.V.l.......h...s...o.....G....hTY-..Rps.I.p.c.a.l.p.p................................s.p.osP.h....T...p.......p...........s..L...s..p.....p.........p..L....h.PN.hsL...+phIppahtpp..h................................ 0 505 1047 1434 +956 PF00627 UBA UBA/TS-N domain Bateman A anon Bateman A Domain This small domain is composed of three alpha helices. This family includes the previously defined UBA and TS-N domains. The UBA-domain (ubiquitin associated domain) is a novel sequence motif found in several proteins having connections to ubiquitin and the ubiquitination pathway. The structure of the UBA domain consists of a compact three helix bundle [1]. This domain is found at the N terminus of EF-TS hence the name TS-N. The structure of EF-TS is known and this domain is implicated in its interaction with EF-TU [2]. The domain has been found in non EF-TS proteins such as alpha-NAC Swiss:P70670 and MJ0280 Swiss:Q57728 [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.47 0.72 -4.00 141 9566 2012-10-01 23:03:33 2003-04-07 12:59:11 26 206 4854 104 4154 7764 1831 37.30 34 8.50 CHANGED hspptlppLhph..G...aspp.pspcAL.ptsss..sh-pAhphL ...............stphlcpLp-h...G....hshh.cs++AL..p.....p..ssG...cl-tAl-hL............. 0 1308 2242 3222 +957 PF01040 UbiA CytC_assmbly_fac; COX10_ctaB_cyoE; UbiA prenyltransferase family Finn RD, Bateman A anon Pfam-B_1357 (release 3.0) Family \N 24.60 24.60 24.60 24.60 24.40 24.50 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.17 0.70 -5.17 138 10787 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 4280 0 3435 8227 6621 264.00 17 84.83 CHANGED hhhsh...hssh..hu..hhhshtshhsh.........hhhhhhhshl............hhts.us.hh...N-..hhDhchDt.................tt..tp..........ls.sGtls....p.pshh.hshshhhlu..lh.....hu.............hhhs.hhhhhhshh..shhh..s.hhYo....................hh+phshhsplhhuls...hu......hhhh..hushh......hsth...............................sh.hhlhshh.hhhshslhht........thtDhcsD.ppsGhpo...........lshhh........t.thhh.......hhhhhsshhh...............hhhhhhhhhhhhhshhhhshslhh..h.h...........h.........................hhthhhhhhh ........................................................................h....hhssh.hu.......hhhu...t.s...h..sh...............hhh..hhhhlush.............lhtu.uussh...............Nc....hhD.t.chDtpht................................Rp..tpR..............s........ls..sGtlo...........p...puh...h...hshh.h.hl..lu....hh.....lh.................................hhhs...h...h...s...h....h......l......u.hh.....ulhh.....h..hhYo...........................hhK.+.hs.....h.....splh.suhs..........hu..........hssl..hGhss.........sssph......................................................shhhhl.hhhh.h.h..h....h....s....huhhhh....................................sht-.hcs.D.....tp....s...G.hto................lslhh...........Gtptshh...........................hhhhhhhhhhh...........................................................hh..h.h.hhh..h..s..h...h..h.h..h..h..s..h..h.h.....h.sh.h...h..h...h.......h...h...h...h..........p.....................................hhhhhhhhh.......................................................................................................... 0 1089 2178 2911 +958 PF00240 ubiquitin Ubiquitin family Finn RD, Griffiths-Jones SR anon Prosite Domain This family contains a number of ubiquitin-like proteins: SUMO (smt3 homologue) (see Swiss:Q02724), Nedd8 (see Swiss:P29595), Elongin B (see Swiss:Q15370), Rub1 (see Swiss:Q9SHE7), and Parkin (see Swiss:O60260). A number of them are thought to carry a distinctive five-residue motif termed the proteasome-interacting motif (PIM), which may have a biologically significant role in protein delivery to proteasomes and recruitment of proteasomes to transcription sites [5]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.96 0.72 -4.58 76 11560 2012-10-03 10:59:06 2003-04-07 12:59:11 18 265 901 508 6203 11456 369 67.10 44 28.05 CHANGED +shssp........phslclp.ssolpplKpclpppps...lssspQ+LlasG+.L..cDppslt-aslppss..slplshc.p .........................................+olsGK......slsl..-.V.-..s.o.D........T..l....c.sl.K..s....K...I........p..-..+cG.............IP.......P..-.......Q.Q...RL...I........F.....u........G......K......p.....L......-D..........s...........+......T..L.....u.....D....Y.....s...Ip.....cpo......TLHLVl+................................... 0 2304 3483 4906 +959 PF00789 UBX UBX domain SMART, Mistry J, Wood V anon Alignment kindly provided by SMART Domain This domain is present in ubiquitin-regulatory proteins and is a general Cdc48-interacting module [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.56 0.72 -3.92 22 2190 2012-10-03 10:59:06 2003-04-07 12:59:11 15 58 327 23 1473 2158 19 82.40 21 19.37 CHANGED spstsssplplRhPDGp+htc+F..pscslpplhpalpspt.sts....p.........................FpLhsshP..R+.hspps..poLp-s.thhssuslllph ................................................p....stsplplRh.s...s...G...p...p..l.p.p..c.Fp..sscsl..p....s.lh.p.al...p...s...p...t....s...st.....t.............................................................................F...p...L....h.s..s.a.P.....p+.......h........s....p.p..s............t......oLp..-h...hh.ss.u.slh...h................................................................. 0 463 739 1131 +960 PF00443 UCH UCH-2; Ubiquitin carboxyl-terminal hydrolase Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null --hand HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -12.07 0.70 -5.28 53 8903 2012-10-10 12:56:15 2003-04-07 12:59:11 24 248 429 43 5798 8963 637 380.20 19 45.37 CHANGED hsGLtNhGNTCYhNSlLQsLht..ssthp.chlh..............................................p...............stpst.ttts..........................................................................................................................................................lspthpplhpphhps......................pppsltPpt.............................................................................................hhptlsthtpp..hp.......shpQpDApEahthLL-pLccshpt..t.....................................................................................tt.shl..pclFpGphpsplpChpCsppspshcshhslplslttppph....................................................................................................................................................................................................................................................................................................................................................................................................................phhphpttpthptppphhsspspppppuhKphplpcLPp.lLhlpLcRF......aph.ptth...ppKlspplpaP.....h.pLDls....shhttstt.................................................................................................................................................................................................................................................................................................................................................................hpYcLhuVlsHtG.s.......hpsGHYhuahhp.........ps..............................ptt.......WhpacDspVsphs................ppl.hp.............................psAYlLFY ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hGL.N..h..G..N.T........C.ahNuh......l..Q..............s........Lh.t...h....hp...phhh...............................................................................................................................................................p.....................................tt......t....t..t.t............................................................................................................................................................................................................................................................................................................................................lhpth.t.p.l...h.tt.h.ps.....................................................................t.t.p.s.h..s..P...p..t.........................................................................................................................................................................................................................................................................h...h...p...t....l.....t...t.h.t..p.......ht.........................shp..Q.p.......D.......u........p........E.......h.h.............t.......h..l......ls..t..l...c..p.p..hpt................................................................................................................................................................................................................................................................................................................t...t.t..p...s.hl..........pp..h.F.....t...G.....p.....h...p.......s........p.......l...p.....C.......p...Ctphstp.pshh.lpl.h..ttt...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.httttthhstpspthtpsp+phpl.t.ph....P.......l.L................h.l.......p.......L.......p......R..F..........................hp.....t............................htK....l.sp.h..l.p.aP.....................t.Lc.hs...............hh..t...p...t......t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhY.p..L.h..u.V......l....s...H................G....s..................h.p...s.G...H.....Y..h......sa..hpp.......................tt...................................................................................................................................................ptt..............Whh...as..D....pp..V..p...h.p..............................ppl..p...............................................tpuY...hLhY................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 2079 3020 4452 +961 PF03456 uDENN uDENN domain Callebaut I anon Callebaut I Domain This region is always found associated with Pfam:PF02141. It is predicted to form an all beta domain [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.27 0.72 -4.00 45 1381 2009-01-15 18:05:59 2003-04-07 12:59:11 13 72 202 2 776 1312 4 65.10 30 5.71 CHANGED pPcllppaPp....p.........pcp.h.pslshFCFPpGlshhspp........psphFsFlLTctDGs+h.aGhChphh ................splltpaPpps........................ps....h...pslshFChPpG.h.ph...ts.s......................tsphasFVLTs.t..........D.u..s.+p.a.GhChphh..................................... 0 186 286 501 +962 PF03167 UDG Uracil DNA glycosylase superfamily Aravind L anon Aravind L Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.75 164 10115 2009-01-15 18:05:59 2003-04-07 12:59:11 14 32 4976 114 2700 7557 3938 160.00 22 68.49 CHANGED .shsssssc.llllGpsPGt.pp......st..sGtsFsst.........sGphLpp.hl.tph............................ulscpp...................lhlsssltp......................t..ttpssps-hpts.ps...hLhpplph.hp..Pclllhl.GptAhpthh.h...........h............................htth.lhshhHPSsh.tt.....................hp.t.h.phhp.pLp ..............................................................................h.....tss+llIlGpsPht..ss............pt........pG...hsFss..................sss.p.Lhs...hh...ppl..................................................................................uhs.cps...............................................lhlhNsl.......................................................hss.pp.s.s.p..sch..sts..pt....................tlhp..tl.sp...hp......t.........l.....l...hhL..Gph.Atpphhh..h..................................................................................sttphhll...ssHPSslstt.hth.................hsst.h.tps.t.L......................................................................................................... 0 865 1704 2249 +963 PF02809 UIM Ubiquitin interaction motif Aravind L anon Aravind L Motif This motif is called the ubiquitin interaction motif. One of the proteins containing this motif is a receptor for poly-ubiquitination chains for the proteasome [1]. This motif has a pattern of conservation characteristic of an alpha helix. 20.40 2.90 20.40 4.00 20.30 -999999.99 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.07 0.74 -6.49 0.74 -3.89 46 2346 2009-09-16 13:18:06 2003-04-07 12:59:11 15 72 279 15 978 2205 32 17.30 40 7.58 CHANGED tpE-pcLphAlthShp-t .........pE-tc.LptAlphShp-....... 0 264 430 695 +964 PF01027 Bax1-I UPF0005; Inhibitor of apoptosis-promoting Bax1 Bateman A anon Pfam-B_1376 (release 3.0) & Pfam-B_5704 (release 7.5) Family Programmed cell-death involves a set of Bcl-2 family proteins, some of which inhibit apoptosis (Bcl-2 and Bcl-XL) and some of which promote it (Bax and Bak). Human Bax inhibitor, BI-1, is an evolutionarily conserved integral membrane protein containing multiple membrane-spanning segments predominantly localised to intracellular membranes. It has 6-7 membrane-spanning domains. The C termini of the mammalian BI-1 proteins are comprised of basic amino acids resembling some nuclear targeting sequences, but otherwise the predicted proteins lack motifs that suggest a function. As plant BI-1 appears to localise predominantly to the ER, we hypothesized that plant BI-1 could also regulate cell death triggered by ER stress [2]. BI-1 appears to exert its effect through an interaction with calmodulin [3]. The budding yeast member of this family has been found unexpectedly to encode a BH3 domain-containing protein (Ybh3p) that regulates the mitochondrial pathway of apoptosis in a phylogenetically conserved manner [4]. 26.10 26.10 26.20 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -12.10 0.71 -4.72 511 5798 2012-10-01 20:22:31 2003-04-07 12:59:11 15 12 3474 0 1486 4003 2434 202.60 25 84.39 CHANGED pphl.ppsYshh......uhu....lh.lo.uhsuhhsh..................................h.....hhhhh.......hhhs.....luhl.h.h.s...h.tht............p.....sss........hs.hh......hhh...sasslhGhslus................lhhh.......ht................sshlspA..hh.....hTushFsuhohh...uhpo...++........D.......ho.th..GshL...h........hull..sl....lluulls.h.F......h...........ssshphsl.u....hlu....lllFs.Gh..h...haDT...p.pl....h..........p.....ththtsh.................................lhuAlsLYL.....DhlNLF...l.h...lLp...l.h .......................................thl.ppsYhhhulsLh...ho...uhsAhhs.h.h..................................hh....hh.h.hh............hhl.st.....lshh..h.h..h.h.h...h....thp.............................................p...h.sss..........hh.......hhh...........hao...shhGhslus...................lhs.h.Yh................ssslhtA.....hs.....hTus.hFh.shohh.....uhpo...++.........................D........ho....th....GshL...h........hull....sl.......llus...llN....h...F............................lt.................ssslthsl.o.............slulllFs.Gh....l...ha.DT......p..pl.....h............p.....tht.ss..................................lhuAlsLYlDh...lNlFl....lLplh................................... 0 437 786 1139 +965 PF03684 UPF0179 Uncharacterised protein family (UPF0179) Bateman A anon SWISS-PROT Family The function of this family is unknown, however the proteins contain two cysteine clusters that may be iron sulphur redox centres. 25.00 25.00 69.70 69.30 20.10 19.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.06 0.71 -4.62 24 104 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 103 0 79 109 6 141.80 37 95.34 CHANGED hITLlGpcLA+sGsEFlahG.sscCcsC+h+psCh..NLp.G+RY+llpVRsst..pcChlH.-ssVpsVEVtcs.slhsllpo+pAhcGuplohpss.Cs..h-CpsachCpP-Glhpu-+hpIhcllGcht...C.tG.+plplVclthh ....hITLlGpcLAcsGtEFlahG.ss.cCcsC+h+phCh..NLc.G++Y+ls.sVRsst....ppCslH.-ssVpsVEVtcs.slhshlpu+tAhpGuplshpss.Cs..h-Cptac..hC..pP.-.GlhpG-+h+IhcllG-h.t..C.pG.csLphVclh.h...... 1 16 46 63 +966 PF03699 UPF0182 Uncharacterised protein family (UPF0182) Bateman A anon SWISS-PROT Family This family contains uncharacterised integral membrane proteins. 19.40 19.40 22.50 23.80 19.30 18.80 hmmbuild -o /dev/null HMM SEED 774 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.59 0.70 -13.39 0.70 -6.60 63 654 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 582 0 224 642 800 783.20 35 82.82 CHANGED p+s+hhhhlhhlllhlllhsh....hsshhs-hhWFpplG....atsVFhTplhsphhlhlhs.................................................................................hllhhlhlhlslhh.sp+st..........ht..h.t......ht.....................t.hhhhhhphhhhslshllullsuhhsuup.WtplLha....lssssFGpsDPlFstDluFYlFpLPhhchlhshlhshllluhlhsllsa................hlhusht.......hsstth...tlsptsptpLulLhulhhLlhAhuaaLcpapLLao..s..pGs.hhGAuYTDlpstLPshhlLshlullsAlh..h..................hhsh.................hppph+h................shhuhslhllssllhsslhPtllQpFhVpPNEhphEpPYIccNIshTRpAa.sL-p..lc.hpsasspss..Lsst.s.....ltpspsTlsNIRLhDspPLhpTapQLQQlRsYYpFsc.lDlDRYpl.sGp........................hpQVhlusRELs.ssLstpupoWlNcHllYTHGYGhlhuPlNpl.............os-GhPpa..hlpDIPsssph..................sltl...ppPRIYaGEh.......ossYsIVs.s..pst....EaDYP...............................................................pus.pN.shspYsGsGG....l.lsshhpRllaAhphp-hplLlSsslss-S+ILapRslt-RVcclAPFLphDuDPYhVl.sc........G+................lhWIlDAYTTSspYPYSp..Ptp......................pslNYIRNSVKslVDAYsGoVsaYlh.DtpDPllpsap+lFPslF+PhuphPssLpsHlRYPpDLFplQsphlspYHh.....TDPpsFYsp-DhWplPp.....-..........................hsss.....pts...............htPYYllhpLPs........ppptEFlLh.sasPts......RsNhsAaLuARu......Du........psYG+lllaphP..+pchlhGPtQlpspIsQDspISpploLW ...................................................................................................................................................................h....hhhhh.hh.h.lhl.llhhh...hhsshasDaLWFsplG.......apuVFhThlhs+lslhlss...................................................................................sllhu.shlhhs.hhl.AhRs+.s.sh..s.s...s.........psltth........................................................................................................................ctsh.tp..h+hhhhslsl.llu.lls.G.hhspup..Wt.plhha....hpussFGhpDP.FGhDluFYsFpL.Phhchllshl...hshllluhlssllsa............................................................................alhGul+............hss.t.t.s.t.l.ops.A+hpLullsulhhLlhAsuYaLcRYpLLhs.......ppss...hsGAuYTDlpAsLPuphlLhs..lul.l..sAlh.....h..................h..ss.l...................hh..+..sh+l................................Ps.l..u.hslhl.lsul..l.lG..s..h..a..PhllppFpVpPNttphEpsYIp+NIpATRpAY.GL..ss...lp.hp..s....a....s...u..s..ss......h.s..sp.p......................l.tsstsTlsNIRLhDPpllssoapQhQQ..h+saYpFs-.LslDRYpl...sGp...................................hpshllAsRELs.ssLstp.ppsWlNcHhVYTH.G..Ghlsu.uNpV.................s.ssGhPta..hlp...slsspuph..................slsl...ppPRIYaGph.......ss-YuIVGssts.....EaDYs................................................................psp...ts...sphoY...s.G...sGG....lslushhsRhlaAh+at-hphLhSstlsspS+.ILapRssp-RVppVAPaLTh.....D.....u....csYPsl..lsGR..............................lhWIlD...uY..TTsssY..PYSp..hsshtt....................tpplNYIRNSVKAsVDAYDGoVsLYth.Dpp..DPll+sWt+lFP.G.hh+shu-hs..s..-LtsHlRYPcDLFcVQpplLs+YHV.....sDPpsFasspDhWplPp.....-.............................s.s..s..sp.....spt.......................psPYYlhhp..h..Ps........pspsp...Ft..Lhosasshp......R.p.LsAa.luucS...Ds...........ssYG.....clplhp.l.P..p...s....s.l....GPtQspsphspssplSpplsLh................ 1 84 173 210 +967 PF03676 UPF0183 Uncharacterised protein family (UPF0183) Bateman A anon SWISS-PROT Family This family of proteins includes Lin-10 from C. elegans. 19.80 19.80 21.20 20.40 19.20 19.70 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.37 0.70 -5.88 8 356 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 220 0 262 349 4 251.20 23 75.90 CHANGED GsctWEFlLGMPIuQAluIIppQsplI+sVpVhYSDpsPLshDllIslPsDGl+LhFDPhsQRLKlIEVaslpplpL+YsushFNSPsllPTl-plpp.FGuTHPGlYDsu+plasLpaRGLSFtFPlsS.....+apssaucGLu...SLcFssGuoPlloRMSIYsGus.........lsEu+sP.sLPhuCahGslYhEpVcVl+puts.shGlcLpLsspG...shhhE.chpshpRplhFGDSsQDVhSsLGuPs+VFaKoEDKMKIHSsSsHR.spo+suDYFFNYFoLGlDILFDupTHcVKKFVLHTNaPGHasFNhYpRCpFpI.l.............sDpssssssspps........ITshoKWDplpchLusst...+PVVLpRuSospp.NPFGSTFCYGYQclIFEVM.Nsa.IASVTLY.sus ................................................h.........G....h.....phht.lp.t..t..h..hp..lha........t...tPht...s.lhl.hsttuhpLhF-s....QpLphI.........El........hs.......h..p........h.............hhat.............t...........................h.................t........................t............................s........s........h.tl....t...F.G.s.oa.......P..G..............t...................t......................ahL.a.GlsF.F..h....................................................ttst..........htp.h.la.utp.............h.ps........h.............................................................................................l.h.s.....t.o.s.p-lh..hG.......Pttha.Kt.pph.lH........................................................................................................................................Khlhhs.s...tp..h.............................................................................................................................................................................................................................................................ts...................................................................................... 0 89 144 210 +968 PF03671 Ufm1 UPF0185; Ubiquitin fold modifier 1 protein Bateman A, Coggill P anon SWISS-PROT Family This is a family of short ubiquitin-like proteins, that is like neither type-1 or type-2. It is a ubiquitin-fold modifier 1 (Ufm1) that is synthesised in a precursor form of 85 amino-acid residues. In humans the enzyme for Ufm1 is Uba5 and the conjugating enzyme is Ufc1. Prior to activation by Uba5 the extra two amino acids at the C-terminal region of the human pro-Ufm1 protein are removed to expose Gly whose residue is necessary for conjugation to target molecule(s). The mature Ufm1 is conjugated to yet unidentified endogenous proteins,[1]. While Ubiquitin and many Ubls possess the conserved C-terminal di-glycine that is adenylated by each specific E1 or E1-like enzyme, respectively, in an ATP-dependent manner, Ufm1(1-83) possesses a single glycine at its C-terminus, which is followed by a Ser-Cys dipeptide in the precursor form of Ufm1. The C-terminally processed Ufm1(1-83) is specifically activated by Uba5, an E1-like enzyme, and then transferred to its cognate Ufc1, an E2-like enzyme [2]. 21.70 21.70 22.30 25.90 18.70 21.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.53 0.72 -4.22 2 177 2012-10-03 10:59:06 2003-04-07 12:59:11 9 5 139 3 104 152 2 72.40 75 67.89 CHANGED uKVSFKlTLTSDP+LPaKVhSVPEusPFTAVLKFAAEEFKVPstTSAIITNDGIGINPtQoAGNVFLKHGSELRlI ............uKVoFKITLTSDP+LPaKVlSVPEu.TPFTAVLKFAAEE...FKVPs.sTS.AIITN.DGlGINPsQTAGNVFLKHGSELRLI............ 0 41 60 83 +969 PF00179 UQ_con Ubiquitin-conjugating enzyme Ponting CP, Schultz J, Bork P, Finn RD anon Prosite Domain Proteins destined for proteasome-mediated degradation may be ubiquitinated. Ubiquitination follows conjugation of ubiquitin to a conserved cysteine residue of UBC homologues. TSG101 is one of several UBC homologues that lacks this active site cysteine [4, 5]. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -4.75 71 9194 2012-10-02 15:28:41 2003-04-07 12:59:11 21 127 571 270 5880 8415 316 134.10 27 49.81 CHANGED Rlt+Ehp......ph.cpsssuhpst.hs....c.....slhpWpshIhGP.psTsY-uGhFplplpFPpcYPh.pPPpl+Fh..o......................claHPNlp.tsGpl......CL.sILp............pp.......WoP.shslpslLl.ol.uLL......spP.NspsPhss-sA...phapcs.pp...paccpsp.th ..........................................................................................................................l.p-ht........ph.....p...p......s.......s..h..p.s.t..s......p...................slh..p.W.ps..h.I..h..........G...P.......s.......s....o....s............Y...........c...G...........G..........h...........F.......p...........l..........p...........l.............ph..P.....p.............c.Y..P.........h..p......P...P....p........l......p...Fh...o..................................................p.l..a.....H....P.......N.............l......t....s...........s.....G..p....l....................C.L...s...l..Lp........................................p.p...................WsP...shs.l...p.......s......lLh...ul......p..u........LL.............................spP.....s....s...p....s...P.....h...s.....phu...p....h....h...p...p....t....ta.t.h....h................................................................. 0 1995 3131 4649 +970 PF02814 UreE_N UreE; UreE urease accessory protein, N-terminal domain Bateman A, Finn RD anon Pfam-B_6279 (release 6.1) Domain UreE is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid. 20.80 20.80 20.90 21.40 20.70 20.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.02 0.72 -4.46 70 1294 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 1157 35 261 891 364 64.20 27 37.70 CHANGED hls.shpthtt.tsss..ttplsLsh--Rp+pRhRhpsssGp.-lulpLs+s.sh.LpsGDlLht--Gph.l .................h.........t...tttp.....s-plhLshp-ttKpRhRhsos...p.....Gp..-lulpL.tcs..hh...LpsGDlLht--sphl...... 1 57 144 203 +971 PF04192 Utp21 Utp21 specific WD40 associated putative domain Wood V, Finn RD anon Pfam-B_16350 (release 7.3); Domain Utp21 is a subunit of U3 snoRNP, which is essential for synthesis of 18S rRNA. 22.10 22.10 23.10 23.10 20.20 19.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.40 0.70 -5.26 28 330 2009-01-15 18:05:59 2003-04-07 12:59:11 7 21 284 0 242 331 2 226.30 30 24.85 CHANGED hlEu..Ah--pppc........pppth.......YpSh-QlscsLlTLSlhPcS+apsLLcLDlIKpRNKPKEsPKtPEpAPFFLP.ossslsspt....................t.sspp........scstsctsclsclp.sspht..tpSpFopLLcp............usps..s-YsphlcaLpshuPuslDLEIRSLs.......s..hsshsElhsFlculsptLcop+sFELspAahulFLK.........lHuDllhp.......ssp.........................LtcsLppWpstpccphp+Lc-LVtastuVluFl+o ..........................................................ttt.ht...h.o..cQl......sp.pLlTLS.h.lPcS+WpsLLpLDlIK........pRNKP..........p........Es.PKtP..c...p.....APFFLP..ohsslssph.........................................................................................ptpp........................pppp..spt...sph.sph.p......t........popF...sp.hLpp......................................................................ut..ps....ss......asshlphL+sL.u.P.S.s.lDhEl.RoLs.....................s...tssh..pt.hht....F....l....c....hlsthLp.s+csFELsQAahulFL+.........lHschl.h.p....psp.................................Lhptlpphpp.p..ppphp+lppLht.shsllsalp......................................................... 0 84 137 203 +972 PF02151 UVR UvrB/uvrC motif Mian N, Bateman A anon IPR001943 Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.60 0.72 -4.32 72 12483 2009-01-15 18:05:59 2003-04-07 12:59:11 14 46 4610 17 2727 9003 4562 35.60 34 5.61 CHANGED pphlp.pLppchppAscppcaEcAuclRDplpplcpt ..............phlp.cLcpcM.ppAu.csh-FEcAAplRDplppLcp.h.......... 0 966 1868 2351 +973 PF05008 V-SNARE Vesicle transport v-SNARE protein N-terminus Moxon SJ anon Pfam-B_5492 (release 7.6) Family V-SNARE proteins are required for protein traffic between eukaryotic organelles. The v-SNAREs on transport vesicles interact with t-SNAREs on target membranes in order to facilitate this [1]. This domain is the N-terminal half of the V-Snare proteins. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.81 0.72 -3.82 89 491 2012-10-01 21:14:52 2003-04-07 12:59:11 10 8 290 5 319 473 11 77.00 26 28.67 CHANGED aptlpsplppclsphs..s...s..-..p++ptlpclcppl-EAppLlcpM-lEsp.sl..P...s.+sphps+lRpY+s-lsp.l+cchcp ..........................ap.lpsp.lppphsp.h.......phs.u..-.......p++phlpph-ptl-EAp-LlcpM-lEsc.sl....Ps.......stR...sthps+lRsY+p-lsp.l+pch+..................... 0 111 177 256 +974 PF00790 VHS VHS domain SMART anon Alignment kindly provided by SMART Family Domain present in VPS-27, Hrs and STAM. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.53 0.71 -4.55 15 1706 2012-10-02 18:21:09 2003-04-07 12:59:11 14 40 287 48 1025 1655 5 130.70 28 24.38 CHANGED ss.sp.lpthlp+ATcpsls-s..Dhuhhl-lsDhIsps....sssP+-AspsIpK+lss..psspsulhALslL-sCVKNCGppFHhclus+-Fhs-Llphlspcs.sc.....V+pcllcllppWspsh....cpcschphlpDhachLKhcG ............................s.....htthl...p..+ATs...t....t..h......pt...DW.s.h.h..c.lC...D...h...lspp............tsus...+....-Ah+u.lt++lpp..........pss...p...ht.........h.h.........A.........Ls.............lL-sslc...N..CGppFH..hp.l....u.........sc....c.F.l.s......c.l.l..c...ll.ps.ph.t.....st...................VppKllpll..psWs.p.s.h.....p.ps.ph..s..hltphap.LptpG..................................................................................... 1 272 484 768 +975 PF00654 Voltage_CLC voltage_CLC; Voltage gated chloride channel Bateman A anon wublastp P37020/1-588 Family This family of ion channels contains 10 or 12 transmembrane helices. Each protein forms a single pore. It has been shown that some members of this family form homodimers. In terms of primary structure, they are unrelated to known cation channels or other types of anion channels. Three ClC subfamilies are found in animals. ClC-1 (Swiss:P35523) is involved in setting and restoring the resting membrane potential of skeletal muscle, while other channels play important parts in solute concentration mechanisms in the kidney [3]. These proteins contain two Pfam:PF00571 domains. 23.80 23.80 24.10 23.80 23.20 23.00 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.28 0.70 -5.43 139 7727 2009-01-15 18:05:59 2003-04-07 12:59:11 15 38 3515 57 2231 6126 1048 337.70 23 63.73 CHANGED lsuhls.......uhlhthh..sts...t...u.sGlsplhttl...pttpt.h.....lsh+shhs+hlus..llsluuGtSlG+EGPslplGAslushl..u...........ph......hth..ttsp.p+h.LlssGuAAGluAsFssPluGslFulE...............lh.tph..sh.......ps....hhsshluulsushlsphl........hu.t..sh...........as.....lss................hshhtlhhhlllGllsGl.hushFsphhhtsp.p.....hhpph.....h..............hh.pshlsuh...ll....uhlu....hhh.......................P.t..hhGsGh.......sh.........................................................................................................lpthh...sst...................................................hhhhhhhhhhlhKhlhTslohuuGhsGGlFsPsLslGAslGt.hh.....u.hlhthh........................h..................sshullGMuAhhuusspuPlouhlllhEhTu.shphllPlhlushluhhluph ................................................................................h..hhhs...shl...ht..hh.........stt...t...u......p...Gls.p.l...ht..tl......ps....h............................h.hps.hhhK.................h.lus..lls....lu..u...G..h..slG.............+...........E.............GPhlpluuslu.phlu..................................ch...............hp.h.......p.sc...p+h..llusGuA...AGluAsFsAPl..u....G.......slFsl.....E..............................lh..tph..ph...........................tshhss..hhu....ul.sushshphh..........hs.t..sh......................ht.............ls.h..........................................shs.h.t.p.h.h.hhll.l.Gl.l..s.....G..l.....hGhla.p...hhhthp....p.........................hhp..p....h........t.h.............hh..hshls..uh.lh.....ullu.....hh.h..........................................s..t..hhG...s.Gh...sh.......................................................................................................................ltthh.......tst....................................................................hshh.hlhhhhlhKhhhoh..lo.h...uuGhsGGlFhPsl......hl..G...uhhG.thh.....u..hhhshh................................................................................s..sh.hul....l...G.h.uuhhu..uss+s...................P.loshlllhEhT.......s..s.......h.p.......h...........l.hsh.hls.sh.huhhlst.h............................................................ 0 710 1253 1782 +976 PF04840 Vps16_C Vps16, C-terminal region Kerrison ND anon Pfam-B_6003 (release 7.6) Family This protein forms part of the Class C vacuolar protein sorting (Vps) complex. Vps16 is essential for vacuolar protein sorting, which is essential for viability in plants, but not yeast [1]. The Class C Vps complex is required for SNARE-mediated membrane fusion at the lysosome-like yeast vacuole. It is thought to play essential roles in membrane docking and fusion at the Golgi-to-endosome and endosome-to-vacuole stages of transport [2]. The role of VPS16 in this complex is not known. 36.00 36.00 36.20 36.20 35.90 35.60 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.73 0.70 -5.48 5 345 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 273 0 252 368 2 283.70 30 37.70 CHANGED lSascIA++AYpsGRs-LApKLL-hEscuup+VsLLLpMccsctALpKAIcStDssLIhpVLLcLKpchspSsaahsLpcpPhAlsLYpcasRcp-+cT.......LaDlYpQ-DcapclApaHlcsuhts.ccs-uRlouLppAuDAaupu+s.slEscss-Dph....+Ll+lQcoLpccasssFssLSl+-TVscLI.sGcsK+AccL+p-F+IPDKRlaWLKlcuLuct+KWEELEcaApS.KKSPIGYtPFVchCl+pcNhcEA+KYlsR...lss.p-KVchalpsssas-Au-lAh-cRDtssLp-lhp+hssss-ushss+Vpsslcp ......................................................................................................................lSaspIAttAhppGRtcLAhp....LLphEspu.scQ..................VPLLLp........h.p.c...........pchA..LpKAlcSGDsDLlhhVLh.p.Lcp.c.h.s........h..u......p.....Fh.h.hl...pp....ps.h.........A...........s....L...ht.t.......hs.+..p....p.phph.................................Lc-ha......p.....sD...ph.....-hu...hh.lppuht............p.........p........................ps.........p..ht.......t..L....p.......At..c..h.h......t.p.s..+...p.......t..h...t...t...p...h..hp-ph..................pLLc.hQ.cpL..-....pc.....h...s.............t.............p.............F.h........s.h....Slp-T.lhpL...lh.....h.......G.p...t............+.........p.....Apcl.....tp-F.+l.s..-K+a.a.al+lpuLs.pt.cc.W...p..-L-....c.h........u........+......p...K.......K.S.......P.........IGapPFhphhh.p.t.s..p.t........t.p.At.pals+......lss...pp+hcha...hths.hhpAup.Ahct+s.t.Lt.lht.h....................................................................................... 0 95 149 215 +977 PF04841 Vps16_N Vps16, N-terminal region Kerrison ND anon Pfam-B_6003 (release 7.6) Family This protein forms part of the Class C vacuolar protein sorting (Vps) complex. Vps16 is essential for vacuolar protein sorting, which is essential for viability in plants, but not yeast [1]. The Class C Vps complex is required for SNARE-mediated membrane fusion at the lysosome-like yeast vacuole. It is thought to play essential roles in membrane docking and fusion at the Golgi-to-endosome and endosome-to-vacuole stages of transport [2]. The role of VPS16 in this complex is not known. 22.00 22.00 22.60 22.40 21.50 21.20 hmmbuild -o /dev/null HMM SEED 410 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.35 0.70 -5.94 5 393 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 278 0 293 403 5 326.60 26 46.86 CHANGED Pou-WphLcDs.YYRKptLuos-WsLch-l.chhlulAPaGGPIAVsRsP.phpss..hs.hPh..IpIashSGplLu+.lsWsp..sslVuMGWocsEELIlVsKsGpVhVYuhhG-h...paSlGcsl...pssclpEs+lFpoptGcoGVAlLssucclhllsusucslhh+phP...-lPsspophpstsssspI.....LssDcshpIhlssGssLt.Is-pu..ps.............lsS.p+alKloVS.s+p+LALYTsoGplhllSsDhuccLCEacls...t+usPKQMs..WCGN......DAVVlua.EshLhlVG.....csG-pVsFhYchT.sh..LssElDGVRIlTpoopEFLp+VPAsoENIF+IuSpsP...GAhLlEAtpchEc...+SsKA-EhLpplp-..pLccAVs-CIpAAscEFpPEhQKsLL+AASFGKuaLcpasPD..cal+ ...............................................................................................................................................................................tW..h......hap....t.hh.t......a..............t...............h.......p...hhl.ss.us.uGsl....Alh.ps...................................t...........lplas.s.G......l.tp...h.a..pt.............s..lh.thsW.st....p.-pLl.sl..psGh.h.h.h.a........s.h.....uph.................h...s.h.sp.th....p.tl..p..s..............................h.sG.hsh.....l.h..t.s....p......h..h.h..tsh....t...............t..h.................t...st........................................................h.........p...p...h......l...hh..s.....s.....plh.l.....ttt.......................................h........................hhphslS.stp.hl.Ahh......s.....t.s....G.....h...l.....h.h..h........s.s.shp..p.h.h...-hss.p.....................pt..P.p..ph...WCup.................c..ulh.l........ta...p............p.......lhllG...............................stsp...h..pa..h.s....t......h........................lhsEhDG.lRlh..opsph-hlpc...VPt...s...s...........tplFplu.S.h..sP...........uuhLh-uhcphpp.................pu.....+A.--...lp.l..pt.........pL.pAVppClpAAst.E.a...psp.hQ+p...LL+AAsaG+sh.hp..h..p............................................................................ 0 115 177 250 +978 PF03635 Vps35 Vacuolar protein sorting-associated protein 35 Finn RD anon Pfam-B_3569 (release 7.0) Family Vacuolar protein sorting-associated protein (Vps) 35 is one of around 50 proteins involved in protein trafficking. In particular, Vps35 assembles into a retromer complex with at least four other proteins Vps5, Vps17, Vps26 and Vps29. Vps35 contains a central region of weaker sequence similarity, thought to indicate the presence of at least three domains[1]. 21.20 21.20 21.30 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 762 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -13.43 0.70 -6.39 23 430 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 295 2 300 460 7 646.70 38 90.35 CHANGED +hLpEAlssV+pQuhhM++sL-p.scLhDALKauSsMLsELRTusLSPKpYYELYMtlFDpLphLssaLt-p+spt.++..........................LsDLYElVQaAGNIlPRLYLhITVGosYlcsp-uPsKEILKDhlEMCRGVQHPlRGLFLRaYLsQto+-hL.Ps.s....................ppsssGslpDol-FllsNFhEMNKLWVRlQH.....................................QG.s..............+-+-+Rp+ERcELplLVGoNLVRLSQL........-GlshchYpcsILPplLEQlVpCRDslAQpYLhElIlQVFPDEFHLtTL-.LLsus.spLpPsVsl+pIlhsLl-RLusYsspps-sp.p..t..........................................................................................................................................................................................................................slclFplFhsplspl........lc.s+schs..........................................................................................................................................lpshlsLhsSLlsLsLpsYP..-pl-hl-plhshshp.hlpph........tsphpsspsppplh...................pLLhh.................Pl.pp........ahshhslLplpsa.sLlshhshps.pKsluhsllssll.....................cssoh...................................Issh-pl-plhpllpsLI...................p-psDp..t.....................................................-s--ht-EQphlu+llHll....p.s-........Ds-pphclLtssRKthhpGG.pRl+aThPsLlhshl.........+Lsp+hptpp.............................................................................................papspspplF+alHp..slssLhshss....s-LsL+LaLpsAtsADphs.............lp-...luYEFasQAFslYEE......................sluDS+sQhpAlthlluoL.p+h+shs..cENY-sLhsKsshauSKLLKKsDQCRuVhhCSHLaWssch.........................................................h+-uKRVLECLQ+uL+lAcsshp.......sssslpLFVElLN+alYYa...-pssspVoscalssLI-LIpssh ................................................................................................................................................................................hLt-ultsl+.psh.M.....++s...L-p..spLh-ALKpuophlsEL...........R.....T.....s.LuPKpYYEL..................Y.....MtlhDt.Lp.hLphaLh......-ph.t.....pp......................................................................ls..DLYE.lVQauGNIlPRL.YLhlTVGssYhp......t..t....sh+-lhKDhhEMsRGVQHPlRGLFLR.YL.phs...+.shL.Ps.s........................................ptstGslpDuh.pFlL.NFhEMNKLWVR.h..Q.H.......................................................QG.u..............+-+-pRppERpELplLVGoNLVRLSQL....................t.lsl-hYpphlLstlLEQlV.pC+DslAQpYLh-slhQ....V....FPDEaHLpTLs..hLpus.spLps.....pVslK..............pllhsLh-RLutastppsts..................................................................................................................................................................................................................................................tplclFphF.pplspl...........lp...sp.phs.............................................................................................................................................pshlsL.suLhslsh...p.haP...........-cl.-a.l.DplLthshp.hhpph..............tt.....psstsp.pplh...........................tLL.h........................Pl...pp........a.slhshLpL..pa..lh.ph.s.hts.p+thuh.llpsll................cp.p.s.h...................................lss..-p.....l-tlh.pllpsLl...................p-t....pt.s.t........................................................-s--hh-EQshluRhlHh.l........p..sp.............-s-p..phtlLphs++thh....tG.s...pRlpaTh..Psllhssh.........pLsh.phpt.p.........................................................................................................................................tpphptpspplapahpp..slssLhpt...................s-LsL+L...aLpsu.sAs.p.hs.....................hEp...luYEFhsQAFslYE-........................pI.u.DS+sQhtAlphIhush...pp..........hpshs....pE.N..a-sLhspsshhuuKLLKKsDQsRuVh..hsuHLaWssp..........................................................................h+sucRVh-CLp+uL+lAstshc..................sshplpLalElLs+Ylaaa.....-pt.stt....l.ohphlstLIphIppp.h............................................................................................................................................................................................... 0 117 178 255 +980 PF04129 Vps52 Vps52 / Sac2 family Wood V, Finn RD anon Pfam-B_10164 (release 7.3); Family Vps52 complexes with Vps53 and Vps54 to form a multi- subunit complex involved in regulating membrane trafficking events [1]. 22.60 22.60 22.90 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 508 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.63 0.70 -5.98 5 461 2012-10-03 17:31:52 2003-04-07 12:59:11 7 15 279 0 310 506 9 381.30 24 70.45 CHANGED lEpcSIcDhIp...............................ESpNluoLHNQIsuCDuVLscMEshLsuFQs-LusISoDIcpLQEKSscMslpLcNRpuVcu+LuphV-DllVPP-LIcsIl-GcVsE......spasopLplLs+htusscDQu.....s+uopAsKDVc...slLDKLRsKAlsKIRcaIlpKIhsFRKPh.TNhQIlQs.sLLKaKaaapFLhcNsRclAhElRsAYI-TMsKlYhuYF+uYlppLsKLQa-cusop.DL.hGVE-sus.......uLFFS..............KssoL+p+sslFolGcRssIls.tpl-sPllVPHIApssp.+YshEsLFRShchALlDNuosEYhFlsEFFslsGsptc-l...FppIFu+TLohspKalpslIusCaDsIGlhLsIRllp+aQLhup+RsVP.sLDsYa-ulllhLWPRFchVhDhplpSLRcssloshst............h-o+PHYlTRRYAEFouSllsLulsassup...lspLLscLpp-V-sFlL+lAKpFsc+KcQllFLINNYDhlluVLpEtus-suKEscsFpEhLNusospFl ..............................................................................................h...t.h.tttl.t.t.spthl.t...p.hc.......phLttFpscLsslos-lppLQppS.ths.pLpNRpthpt.Ls.hlptl..lssthlp.Ihp..s.....lsc......................h...t....p..............p..ph.h...h...........tp..t.....................................pt...tu.....ht-lt...s.lppLp.+...........Aht+l+............cal....l........t....p...l.hthR..........ps..................hN.......hQ.....h.Qp...th.......l.+.h.+............hh.tFLhtpt.thstElpptYhpTh.ph.......h.ah....sha.p.pY..ttL....p....lphp...hspt...sl...s..tt...t...........................t..h.......................................................t..ttha....t.lspR.h.pllp.....p.......p.t..........s....h......h..........................s.......h........s..........pt..............s.......t.............h.....................h...EhhFRs.phhLhDssstEa...........Fh.pFF.........................tph.........................h.tlh..t.sht.hh.p..hp.........p..h.l..t..............t..s..h.D....shulhl..hlpl..p.h.t.........h.t.p.R.....p..l..s..sh-.....ta.hpth...lWP+hp.lhphphpS.lp...p..h.s......t.......................................t.ts.+....ls.ppauph.tuhl.l....s..............t..h....pt.........................h......h.pl......t.hpthh.th......s....t.........................p........p......t.t....haL.hsNY.hhhsh......l......p.............................................................................................................................................................. 1 127 191 270 +981 PF04100 Vps53_N Vps53-like, N-terminal Wood V, Finn RD anon Pfam-B_5601 (release 7.3); Family Vps53 complexes with Vps52 and Vps54 to form a multi- subunit complex involved in regulating membrane trafficking events [1]. 24.00 24.00 24.20 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.37 0.70 -5.68 7 406 2012-10-03 17:31:52 2003-04-07 12:59:11 7 9 287 0 282 410 14 328.10 31 47.32 CHANGED tschssl-hINplFPs........................-tSLssl-plh.......p+lpt-Ip+lDssltshVcspsNsGpcup.sLp-AppulppL.pcIp-l+s+AEpoEthVp-lTcDIKpLDhAK+NLTsolTsLp+LpMLssuh-pLpthhppppYuchhs.hpuhhplh.phFptYpsl.pItpLppplsphptphhpplhpsapphF....ustsppc.u..hphLsDuChVhssL-ssl+cpllchFhpppLp-YhplFt-spEhuhLDcl-+RYsWhK+hLpsa-pt.htslFPtcWchshRLshpFC+.TRppLtpIhhp..+ccc.slclLlhAlppThpFEphLs+RF.............................p...E.c.ht.......................FcshlSSCFEPHLslYIppp-ppLtchh-phsp-.ph ..............................................................t..casslpalNthFsp..........................po...L.s.s.lsphh...........pplp.php..pl-ppIt....ph.lpt..Qs...............ss..ppu.ptlppA.p.tsl..................t.pLht+Ipcl+p+AtpoEphl...pphTpDIK.pLDhAK+pLTtohT...sLp+LpMLs...............suh-pLpt..hsp.p.....+.p....Yt.-sup.L.................p..ulhplh.paF.............p...pY.p.s......ls.p..ItpLppplpthpppLhpplhtD.Fc...h.a........................st.t......t..p..........p.Lt.-uCh.VhssLss..p..h+..pcllphahp.pp.LppYt.lF.p..................ts.p......-......su.L.....DplsRRYsWh++hLhsa-pp...h.....sthFP.pWphsp.tlshtFCchT+pcl.......t....tlhtp................ptp....ph..-...VplLL.slpcThpFEp.Lsc+F.................................................................................t........................................................................................Fp.thlSpsF-PaL.tlalctp-+pLtphl.phht....t................................................................. 0 109 172 243 +982 PF02204 VPS9 Vacuolar sorting protein 9 (VPS9) domain SMART, Eberhardt R anon Alignment kindly provided by SMART Family This domain acts as a GDP-GTP exchange factor (GEF). It activates Rab GTPases by stimulating the release of GDP and allowing GTP to bind [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.18 0.72 -4.00 49 1496 2009-09-12 00:55:57 2003-04-07 12:59:11 13 121 301 10 1005 1449 9 105.40 25 12.44 CHANGED hpputpcLppl.sph..poPp-Klphllpspchlhpsl................................tttstshuAD-hLPlLlalll+ups..pL..huslpalpp.........F........................h.psshhpu........EtuYhLTshpuAlpalcshsh ............................................h..tAhpcltpl...sph....psPpc.Klthlh.....pss...chIhpsh................................................................................................t..t.ppsp.shuAD-hlP.......lLl.Ylll.......+u............s...............pL..........huplpalpc.........F................................................................................h...tssh.hp.G...........E.tuY..hlTsl......p.uAlpalpph..t..................................................... 0 361 525 776 +983 PF03302 VSP Giardia variant-specific surface protein Mifsud W anon Pfam-B_4536 (release 6.5) Family \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.42 0.70 -13.07 0.70 -5.83 6 1081 2009-01-15 18:05:59 2003-04-07 12:59:11 8 16 13 0 430 1062 0 178.50 20 59.27 CHANGED CstCpsGYclSsDKTpCsuou..sCps-NCKsCSs-c+..psCp-CsSssYLTP.TpQCIDsCtKIGNYYssTsupsKplCKECssANCKTC-spGpCpsCsDGFYKsG-sCuPCcpsCKTCuuGTuSDCTcC.oGKsL+YGsDGTKGTCGtGCsTGsGuG.ACKTCGLTIDGsSYCSECAspTEYPQNGVCoSssuRAssTCpsuslAsGhCuoCssGah+MNGGCYETTKaPGKSVCpsAsuuG.TCQKtAsGY+LssssLssCS.GCKpCoSsTsCTsCh-GYVKo..osuCsKCDuoCcTCT.GusTsCcsCuTGYYKouospsuCTus-SD.pslTGVpsClsCAPPssspGSVLCYLIKDu...G.STNKSGLSTGAIAGISVAVllVVGGLVGFLCWWFICRGKA ...........................................................................................................s...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 430 430 430 +984 PF00092 VWA vwa; von Willebrand factor type A domain Sonnhammer ELL, Bateman A anon Prodom Domain \N 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.17 0.71 -4.55 189 10632 2012-10-10 16:07:06 2003-04-07 12:59:11 23 753 2209 191 4154 16265 2130 166.70 19 31.03 CHANGED DllhllDuSsSlst.....tp.....app.s+palppllp.th.t.......hstpssplullpauspsp............h.hslssh..pstpphhptl...........tphhhhsusTp.hup.ALphshpphhpt..........ttssRt...sss+lllllTD.....Gpssss...tthstthppt................slhshulGh.......................psstppLpplu..s.tt..pphhhhpshpt................phhppl ............................................................................DlsFllDs...S..sS...lst.................................tp..............................a.pt.....h+...p...a.l..t.....p.llp..ph..p...........................hu..t...pp.s.+.......l.....u....l.l..p..a...u...s..p.sp....................................................s.h..p...ls..s.h.........p.s.t.p.....p.lhptl.................................................pph.h.h.h.u..u..s...T...t....s..u..t.......A..L...p...h...s..h...p.p.h.hpp.........................................tsR..........ps.s....p....l.......l.l.l.l...T.D.........................G...p......s.....s.......s...p.................h........t............h..t....thppt................................................sl.h.h.hulGh..................................................pss...p.p.L.ptls.........................h......................h........................................................................................................................................................................... 0 1087 1482 2603 +985 PF00094 VWD vwd; von Willebrand factor type D domain Bateman A, Sonnhammer ELL anon Dotter Family Swiss:P17554 contains a vwd domain. Its function is unrelated but the similarity is very strong by several methods. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.27 0.71 -4.06 62 4125 2009-12-18 15:39:34 2003-04-07 12:59:11 20 537 352 0 2288 3757 7 145.50 21 18.65 CHANGED Csl.Gs..chpTFDGtpYsaps....ss.....sah..lspssssp..........hphhlt.ppsstsssth...h.pplplhht.............shplphtts.......htlhlsspplsh....shttssh..tlphhs........tshhhlth....thshpl..hphcs...tlhlplstthpspssGLCGsassctpsDhhpssGp ................................................................................................................................CtshGss.HahTFDG..t....has.F.G............sC....................pYh.......Lsps.s.ts.p................................phtlp..h...p.p...p..s.t....s..p.pt.h..............h.hpp.lpltlt............................................shplpltps................pl.h.l.ss.p.t.hth...............Pht.tsh.....tlphht................................t.t...hhlth........thshtl....h....p.h...s.......s..............pl......h.l.p..l...s.s............p....a....t......s....p.....s..s...GLC..G.sa.ss..p..p.sDhhh.ss................................................... 0 497 686 1444 +986 PF00095 WAP wap; WAP-type (Whey Acidic Protein) 'four-disulfide core' Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.55 0.72 -3.99 72 1589 2012-10-02 12:37:03 2003-04-07 12:59:11 16 122 164 5 826 1565 1 44.60 36 19.27 CHANGED KsGpC.P............tttt.Ch......ppC.psDpcCss.spKCC.........s.u.C.Gpp.ChtP ..............KsGtC.P.........................tttstCh.............ppC.....p......sDpcC..ss.....tp.....K...CC..........s..u.C..Gpp..ChtP........... 0 219 260 462 +987 PF00400 WD40 G-beta; WD domain, G-beta repeat Finn RD anon Pfam-B_2 (release 1.0) Repeat \N 21.00 12.10 21.00 12.10 20.90 12.00 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.75 0.73 -7.90 0.73 -3.89 1804 193252 2012-10-05 17:30:42 2003-04-07 12:59:11 27 2206 1701 1549 125805 189768 5414 38.20 23 20.36 CHANGED tphhtsh.puHpsslpslsasss........................sphlsouu..tDssl+lWc ............................................................................h......h.puH.p...s...s..V..p....s.l.s.asss.............................................................................sphlso..uu..............tDtsl+lWs........................................................... 0 44330 69386 100531 +988 PF00568 WH1 WH1 domain SMART anon Alignment kindly provided by SMART Domain WASp Homology domain 1 (WH1) domain. WASP is the protein that is defective in Wiskott-Aldrich syndrome (WAS). The majority of point mutations occur within the amino- terminal WH1 domain. The metabotropic glutamate receptors mGluR1alpha and mGluR5 bind a protein called homer, which is a WH1 domain homologue [2]. A subset of WH1 domains has been termed a "EVH1" domain [3] and appear to bind a polyproline motif. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.44 0.72 -4.34 8 1177 2012-10-04 00:02:25 2003-04-07 12:59:11 18 30 236 25 600 1088 23 104.80 28 24.22 CHANGED huh......psIsouhAplahh-sss+c.Whhs....ppsusVshh+DsspNoYhlhuhclp.cscllhspplhsshpYspsoshFHpapsscs..hhGLNFuSE-EAspFtcthpcsl ...................................................tt...........slhp.s.hApVh....hh......-.ss.s....pc.....Whsh.......pthusVs..h.....h.....p......c......s......s.....p...s.........s...........a....hlhuhc.h...........p...sp...p............lllsp......p..l.h.......p..s.........h..p...Y...s....p.....s.....s........sp.....F....H..p..W...........p.s.....s.c.p...........haG...L..sFu..Scp-Ap.....p...Ftcthpc..h............................................... 0 159 234 397 +989 PF02205 WH2 WH2 motif SMART anon Alignment kindly provided by SMART Family The WH2 motif (for Wiskott Aldrich syndrome homology region 2) has been shown in WASP Swiss:P42768 and Scar1 (mammalian homologue) to be the region that interacts with actin. 23.10 13.00 23.10 13.00 23.00 12.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.37 0.72 -4.21 57 1560 2009-01-15 18:05:59 2003-04-07 12:59:11 15 69 271 11 837 1462 8 26.60 38 5.24 CHANGED ssssRuALLusIppG..t..LKKs.posD+S .........sssRsuLLssI+pG....hpLKKs.pptp.......... 0 213 324 544 +990 PF02467 Whib Transcription factor WhiB Mian N, Bateman A anon Pfam-B_2249 (release 5.4) Family WhiB is a putative transcription factor in Actinobacteria, required for differentiation and sporulation. 22.80 22.80 22.80 23.00 22.40 22.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.62 0.72 -3.67 8 2386 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 569 0 664 1645 791 66.10 42 61.73 CHANGED WphpAlCRssDP-hFF.P..-RG..pusR-..AKclChR.CPVhpE..CtAaALcscctaGVWGGlSE.....cERRtL .....................................WptpAhC...t.....p..t.....D....P.-....hFF...P.......ppG.........t.s..pcc.........AK...tlCtp.CP..Vppp....CLpa.....A......L........p......s....s........p......c......a........GVW...G...GhoE.......cERRtl........................ 0 218 497 614 +991 PF02019 WIF WIF domain Bateman A anon [1] Family The WIF domain is found in the RYK tyrosine kinase receptors Swiss:P34925 and WIF the Wnt-inhibitory- factor. The domain is extracellular and contains two conserved cysteines that may form a disulphide bridge. This domain is Wnt binding in WIF, and it has been suggested that RYK may also bind to Wnt [1]. The WIF domain is a member of the immunoglobulin superfamily, and it comprises nine beta-strands and two alpha-helices, with two of the beta-strands (6 and 9) interrupted by four and six residues of irregular secondary structure, respectively. Considering that the activity of Wnts depends on the presence of a palmitoylated cysteine residue in their amino-terminal polypeptide segment, Wnt proteins are lipid-modified and can act as stem cell growth factors, it is likely that the WIF domain recognises and binds to Wnts that have been activated by palmitoylation and that the recognition of palmitoylated Wnts by WIF-1 is effected by its WIF domain rather than by its EGF domains. A strong binding affinity for palmitoylated cysteine residues would further explain the remarkably high affinity of human WIF-1 not only for mammalian Wnts, but also for Wnts from Xenopus and Drosophila [2]. 25.00 25.00 26.30 25.90 23.50 18.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.79 0.71 -4.71 10 223 2009-01-15 18:05:59 2003-04-07 12:59:11 13 21 84 5 144 241 0 122.70 35 26.76 CHANGED laIsccEV++Ll......GlsA-lYYl+sGtlNsYuhc.....F......hlsVPS-VsslsFTWpS.uppcYhYuFsl.hosDpplLstPslsIsppGcVPpshpsFulsL.CoGspuGpsshslsLplpsscsh..ssTsLph+p+KhC ...................als..ph.hhh......G..l.p.t-..lahVcpGhlspashs......F........hhslPuplpplpFT.Wp.uh.u....ph.phhYshps................s.c.............tshhstPplNIshpGpVPp....p....hp.sapV....tLsCo.Gphsupsshplpl.lpsspt....ssThLph+ppKhC.................. 0 30 41 95 +992 PF03106 WRKY WRKY DNA -binding domain Bateman A anon Pfam-B_85 (release 6.5) Domain \N 21.00 21.00 21.00 21.10 20.60 20.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.07 0.72 -4.13 34 3051 2012-10-02 23:28:20 2003-04-07 12:59:11 10 33 220 3 1178 2878 5 54.90 54 20.61 CHANGED hcDGYpWRKYGQK.VKGu.aPRSYY+CTps...sCssKKpVERustDsphs.hsYcGpHNHsh ...........sDGYp.WRKYG.QK.sl.KGsP......h.P....R....u..........Y...Y+...Cop......sCss+...K....pVp.......R...s.t.....p..........D......s....p......h......h.........hsTYcG.pHsH.............................. 0 150 687 953 +993 PF02206 WSN Domain of unknown function SMART anon Alignment kindly provided by SMART Family \N 19.50 19.50 19.50 20.10 19.40 19.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.99 0.72 -4.23 36 345 2009-01-15 18:05:59 2003-04-07 12:59:11 13 36 5 0 343 301 1 68.00 24 6.39 CHANGED ssLpphhcphphluRlsNuIsLQtulhssoIshc-lluELLphsss.shsplhslcs...spltptlpplpch ...........hpphhcchphlARlsNuIsLQsulhs.sol.shcclIuELLslssh..phsplhshc........pplpphlppl...h................................ 2 57 110 343 +994 PF00397 WW WW_rsp5_WWP; WW domain Finn RD anon Prosite Domain The WW domain is a protein module with two highly conserved tryptophans that binds proline-rich peptide motifs in vitro. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.62 0.72 -4.06 547 8927 2009-09-12 21:03:02 2003-04-07 12:59:11 21 403 357 119 5200 8316 391 30.00 36 6.13 CHANGED hsss.W.p.thh...ss.....GchY.YaNppTppopW-cP ..........LPsG.W.c.pphs..sp....GchY.YhscpT+.pTpWccP..... 0 1588 2359 3759 +995 PF02825 WWE WWE domain Aravind L anon Aravind L Family The WWE domain is named after three of its conserved residues and is predicted to mediate specific protein- protein interactions in ubiquitin and ADP ribose conjugation systems [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.29 0.72 -4.55 14 1021 2009-01-15 18:05:59 2003-04-07 12:59:11 15 76 133 6 601 935 23 72.80 24 8.55 CHANGED hststtshhW.ap....sssspWpsYs.ps..pppIEsuaptpcp.....hlp.l..shhGtsYhlshpsMpQhpptss.....thRtV+R .........................s.......hhWpap..........scpup...WpsY......s.........tp.s...........spp.........lE.p.uap.pspp...............sls...h............ss..t....t...h...t.Yhlc.....hps..M..p...Qhsppos............ptRtl+R...................... 0 158 223 410 +996 PF02706 Wzz wzz; Chain length determinant protein Bateman A, Mian N anon Pfam-B_1977 (release 5.5) Family This family includes proteins involved in lipopolysaccharide (lps) biosynthesis. This family comprises the whole length of chain length determinant protein (or wzz protein) that confers a modal distribution of chain length on the O-antigen component of lps [1]. This region is also found as part of bacterial tyrosine kinases such as Swiss:P38134. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.63 0.71 -4.39 38 7377 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 3167 41 1494 5501 1022 184.60 16 39.45 CHANGED scIDLhcLlttLh+p+hlIlhlsllhshhuhhYs..hhssPpYousshlhVspppt..........h...........sl.ssh......p.hs.shhcllpSppllpclhpclplt........................................................................................................................t.lpp.lsl..s.tp.potllslshp.spssppspplssplsps .........................................................pl-lhpl....ht.h..l.......hc.......t....ph.h.l.l.hh.s.h.l.h...s..h...l..u.h....h.h...s....h..........h.......h..........s.....s.p...Y..pus..u..h....lh...l.tppstt...................t............t...........s.h....s.st...............................p..h.hs....s..h..h...p..ll.p....S....p.........pll....p....cs..h.p.p..h.slth.............................................................................................................................................................................................................................................................................................................................................................................pt.l..p.p...t..l.pl.....p......................t....po...t...l..l....s..l..s.hp....sps..p..s.t.plhsth...h............................................................................................................................................................................................................................................................................................................................................................................. 0 472 948 1240 +997 PF03254 XG_FTase Xyloglucan fucosyltransferase Bateman A anon Pfam-B_3419 (release 6.5) Family Plant cell walls are crucial for development, signal transduction, and disease resistance in plants. Cell walls are made of cellulose, hemicelluloses, and pectins. Xyloglucan (XG), the principal load-bearing hemicellulose of dicotyledonous plants, has a terminal fucosyl residue. This fucosyltransferase adds this residue [1]. 20.40 20.40 20.70 20.50 20.20 19.50 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.64 0.70 -6.00 29 257 2010-01-08 15:56:59 2003-04-07 12:59:11 8 7 32 0 171 248 4 376.80 37 79.45 CHANGED lllu..............ptthsshhspttsssss.hspssptspD....+LLGGLLusGFDEpSClSRYpSu.LYRK.sSsa+PSsYLlSKLRpYEsLHKRCGPGTcuYp+AlcpL...puu....pssuss.....-.CpYlVWlshpG..LGNRhLolASsFLYALLTcRVLLVD...up-hu-LFCEPFPsTSWLLPh.DFPlps...hpuasppsscsYGsML+scsIsss...............t...PsalYlHLsHshpc..tDKh.FFC-c-QshLc+VPWLll+oDsYFlPuLFLlPuFppELs+LFPpK-oVFHHLuRYLFHPoNpVWGllTRaYpuYLA+ADERlGIQlRVFsppsssappVhDQIluCTpcE+LLPcl..sspps..........ssssupss+.KAVLlTSLh.stYaEpl+sMYWc+PTsTGElluVaQPSHEchQppspphHstKAhAEhYLLSLoDslVTSuhSTFGYVApGLGGL+PWlLacPps..tpsP..sPPChRuhSMEPCFHsPPhYsC+u ...........................................hh.s....................................-....tLhsGLL..s.s..s..h.s.ct..oChSRapt..h.a...h+.................h.....sSsaLlp+LRpaEthp+.+CGPtTt.YppshppL.pps..........t.t..sss......t..C........pYlVh.h......s...h..p.G..LGNRhLshsSsFLYAlLTsRVLLV.....c........st.c.h.ssLFCEPFPs......o.oWhL..P.................c..F..Phtt...........httht...tt.csa.sshlpp.phht.ss.................PsahalpLtts...t............-.p.h.F...aC-csQ..th..L.p..plsW.ll.hp.oD..YasPuLF........hhPtappE.Lp.phFP.p..+-.sVFHaLuRYLhHPsNplWt.hlp+.a....apuY.Lut.u.s.cp..lGlQ.lRh...a..t......t.....s.......h..p.h....hc.Qlhs.Cs.pc.p.lLP..pl....tttt....................sst.tstp.puVLlsSL..s.Yh-pl+shYhp.............pssh..sG.....-..h.ltVaQ....PSH.....EthQ.p.tpp.Hs..KAhAEhaLLShsDhl.lTSu.hSTFGYVA.pGLuGlpPW.lhh.p..t........ttss.....p.ssChpshShEPCha.PP..htCp.................................................... 0 33 90 137 +998 PF05181 XPA_C XPA; XPA protein C-terminus Studholme, DJ anon PF02186; Domain \N 25.00 25.00 25.20 25.40 24.20 24.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.47 0.72 -4.37 16 261 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 237 2 182 251 2 51.70 54 16.42 CHANGED -KYSLLTKTEsKpDYLLTDsELcDcE.LLs+LtKsNPHpusaucMpLaLRhQV ......-KauLlTKTEAKp-YLLpD.......s-LcccE..lL.altK.NPHpupWucMpLaL+hQV..... 0 58 94 146 +999 PF01286 XPA_N XPA; XPA protein N-terminal Finn RD, Bateman A, Studholme, DJ anon Prosite Domain \N 27.10 27.10 27.10 27.40 27.00 27.00 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.95 0.72 -4.27 10 119 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 95 2 72 115 0 33.60 52 11.78 CHANGED .-Y.hC--CsK.FMDSYLhspFDhsVCDsCRDs- ....sashCcECu+pFhDSYLhspFDlslCDsC.RDs-.... 0 23 31 52 +1000 PF00102 Y_phosphatase Protein-tyrosine phosphatase Sonnhammer ELL, Griffiths-Jones SR anon Swissprot_feature_table Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.81 0.70 -5.02 114 7626 2012-10-02 20:12:17 2003-04-07 12:59:11 22 291 589 328 3916 9772 240 210.40 29 34.46 CHANGED NhpKNRatslhshDp..oRVtL.................................t..s.sD..Y..INAsal....................uht.t...tppaIsTQuP.......l..................pTh...pDFW+Mlapp..p.spsIVMLsphhEt.........sp......KCspYWP.........pp..shpaGs.hpVphh.......ppp.tptshhl+phplp.....p....tspp...............................ppl.ppapap....sWP..Dps.......lP..ps.....spshl....phlcplcptp...................tsPllVHCS.....AGlGRTGsalslchhhpplcp.......pth...lclhp........hlpplR.ppRsthVQo......pQYhFlapslhc .................................................................................................................................N..KN.Rh......s.l...h.........s..h......-..p..sRVhL.............................................................................................................t.....s-.......Y.....I..N...Asal..........................................................pshp..p.........ptaI..so...Q......u..P...............l...................................................pTh.......tDF..WpMlacp....................p......s..t......h................I.VMlsph..Eh......................................sp...............KC.tpYWP.................................pt..........shp.a.....s....t......h.p.Vphh...................ppp....hsp.a..hhR..p.h...pl.p.....p.......tppt........................................................................................+pl....ppa.pah........s.W.....P....-pu................................................lP......pp...............spshl............phl.p.p.lpptp.............................................................................ttsP..l...l.VHC..S............................AG.sG.R........T.G.....s...a...h...s......l.s..h.h.h..p..t..hcp...........pth.........................lslh.p.......................................................hl..p.p..hR...p.p...R.s..t.h.lp..o.........................pQ..Y.Fhapsl....................................................................................................... 1 1277 1669 2915 +1001 PF04893 Yip1 DUF649; Yip1 domain Finn RD, Bateman A anon Pfam-B_5598 (release 7.6) Domain The Yip1 integral membrane domain contains four transmembrane alpha helices. The domain is characterised by the motifs DLYGP and GY. The Yip1 protein is a golgi protein involved in vesicular transport that interacts with GTPases [1]. 30.90 30.90 30.90 30.90 30.80 30.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.47 0.71 -4.76 208 1661 2012-10-01 22:34:14 2003-04-07 12:59:11 12 30 827 0 929 2057 257 174.80 14 66.95 CHANGED lhhpPsph.appltppt..........................hhhhhhlhshlshlhshhhs.........h...h.........................................hthhshhts......hhshhlhh.......hlshhlhuhlhth.......hsphhs.......upss....htp....shslhuas...hhPhhlstlhshhhhhhh.........................................hh.hhlhshWsh..hlhhhulttshphsth..puhhhshh.......shllhlh .......................................................................hp.tth.hpchht.hh.........................................................tp.cLh.uslh..hs.l..shshhlhhs...................................................................................th.h.shhuh................lhhshh...........hlshhlhshlhhh.....................h......hs...................ushs.....ahp..............shs.l.h.GYs....hlP.h.s.lssll.sh.hh..hhh.h........................................hhh..shlhhhWsh....hhhshhh.tthh..t..h........t.hh....................hhhhh........................................ 0 325 549 754 +1002 PF03226 Yippee-Mis18 Yippee; Yippee zinc-binding/DNA-binding /Mis18, centromere assembly Mifsud W anon Pfam-B_2930 (release 6.5) Family This family includes both Yippee-type proteins and Mis18 kinetochore proteins.\ Yippee are putative zinc-binding/DNA-binding proteins. Mis18 are proteins involved in the priming of centromeres for recruiting CENP-A. Mis18-alpha and beta form part of a small complex with Mis18-binding protein. Mis18-alpha is found to interact with DNA de-methylases through a Leu-rich region located at its carboxyl terminus [5]. 24.80 24.80 24.80 24.80 24.50 24.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.82 0.72 -3.94 54 862 2012-10-03 11:08:31 2003-04-07 12:59:11 9 10 301 0 600 712 1 97.00 39 59.44 CHANGED hVFpCtpCpshLuDShshls.......pspp........psllh......................p...plsssVhlscphhsup.p.t......sshpslhCptCstslGhhYhsospp.hshhcshFslphcplpsYp .........................................................p.hasCtpC+scLusps-lIS+.....................shpGtps.......+AYLF...................................................................sp......llNls.pu....s...p.-RhhhTGh.............HsVsDIhCpsCpstLGWKY-.....pAhEpoQKYKEGKaIlEhtth....h............................ 0 172 307 445 +1003 PF02757 YLP YLP motif Ponting CP anon Ponting CP Motif The YLP motif is found in several drosophila proteins. Its function is unknown, however the presence of completely conserved tyrosine residues and its presence in Swiss:Q15303 may suggest it could be a substrate for tyrosine kinases. 25.00 5.00 25.00 5.10 24.60 4.90 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.31 0.76 -5.60 0.76 -3.38 26 674 2009-01-15 18:05:59 2003-04-07 12:59:11 12 40 12 0 397 674 0 8.90 65 15.78 CHANGED usEYLPPsp ..usEYLPPsp.. 0 60 60 245 +1004 PF04146 YTH YT521-B; YT521-B-like domain Bateman A anon Pfam-B_1386 (release 7.3) Domain A protein of the YTH family has been shown to selectively remove transcripts of meiosis-specific genes expressed in mitotic cells [3]. It has been speculated that in higher eukaryotic YTH-family members may be involved in similar mechanisms to suppress gene regulation during gametogenesis or general silencing. The rat protein Swiss:Q9QY02 YT521-B is a tyrosine-phosphorylated nuclear protein, that interacts with the nuclear transcriptosomal component scaffold attachment factor B, and the 68-kDa Src substrate associated during mitosis, Sam68. In vivo splicing assays demonstrated that YT521-B modulates alternative splice site selection in a concentration-dependent manner [1]. The YTH domain has been identified as part of the PUA superfamily [4]. 21.40 21.40 21.40 23.30 20.90 21.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.53 0.71 -4.58 59 968 2012-10-02 17:37:24 2003-04-07 12:59:11 10 23 252 2 553 870 5 142.90 42 23.91 CHANGED su+aFlIKSho.t-slctSlchslWuoospss........c+LspAacpsp..........sVaLh.FSVNtSGpFtGhAcMhos.lshs...................................................................................................pss.hW..................................................................tp....casGs.FplcWlhlc-lP.pph++l.......hss...-s+sVphuRDspElp.phGhpllplFcph ........................................................................................s.sRaFlIKSho..c-cl+cSl+as.lWsST..psN........++LssAa.+psp...................sVaLl.......FSVNsSGcFsGhAcMtos.lcas.....................................................................................................................................................................................................................................ps.sshW....................................................................................................................................................................................................tt.......t+WsGh.FcVcWl..hl.+DlPssph+Hl.......p.p..N-N.....KPVstSRDsQEl..hc.pGtpllplht..h......................................................................................... 1 150 268 403 +1005 PF00643 zf-B_box B-box zinc finger Bateman A anon Prosite Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.88 0.72 -4.06 106 7772 2009-09-12 06:38:22 2003-04-07 12:59:11 19 359 385 16 4488 6434 9 43.10 26 9.16 CHANGED pptthC..spHp.....p.p...hphaCpsCpp.hlCppChhtt.......H..ps......Hph...hsl ........................t....hC...spHp................c..p.....hphaCp..s..s..pp..hl.....C.h.h..Cphst.............H..ps...................Hphh............................... 0 1713 2306 3263 +1006 PF02892 zf-BED BED zinc finger Bateman A anon [1] Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.41 0.72 -4.20 41 1403 2012-10-03 11:22:52 2003-04-07 12:59:11 10 85 169 2 872 1516 7 47.30 26 9.97 CHANGED utsWcaFphhp.......tcppspCpaCtKthstt.........GTosLpcHL........hptpp .......................s.lWpaFphht.............tpppspC.......ph..C..p...ptlstt..........uTo.s.LppHL..........ptp....................... 1 185 394 653 +1007 PF01530 zf-C2HC Zinc finger, C2HC type Bateman A anon Swiss-Prot Family This is a DNA binding zinc finger domain. 25.00 25.00 26.10 27.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -7.84 0.72 -4.22 33 1621 2009-01-15 18:05:59 2003-04-07 12:59:11 13 44 89 6 769 1357 1 30.90 62 11.87 CHANGED thpCPTPGCcGpGHlsG.pas....sHRSluGCPhu ............h+CPT.P.GCDGpGHloG.pas....oHR...SL..SGCPhA.. 0 116 177 407 +1008 PF00097 zf-C3HC4 Zinc finger, C3HC4 type (RING finger) Sonnhammer ELL, Vella Briffa B anon Swissprot_feature_table Domain The C3HC4 type zinc-finger (RING finger) is a cysteine-rich domain of 40 to 60 residues that coordinates two zinc ions, and has the consensus sequence: C-X2-C-X(9-39)-C-X(1-3)-H-X(2-3)-C-X2-C-X(4-48)-C-X2-C where X is any amino acid [1]. Many proteins containing a RING finger play a key role in the ubiquitination pathway [2]. 21.00 20.70 21.00 20.70 20.90 20.60 hmmbuild --amino -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.37 0.72 -4.30 35 9094 2012-10-03 15:03:13 2003-04-07 12:59:11 20 329 3245 29 3178 27732 1125 40.40 44 7.09 CHANGED CslChchhppss................hhtCtHs.FCpsClpphhp........tptht..CPhC ...............................................CpIC...p...c.l..l...s..-Pl.......................................................pos.C......p........Hh....FC.+....s.....CIh.ctlc.............................hhssh.....C.PtC................................... 0 1053 1488 2303 +1009 PF00642 zf-CCCH Zinc finger C-x8-C-x5-C-x3-H type (and similar) Bateman A anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.40 0.72 -4.21 88 7686 2012-10-01 21:35:20 2003-04-07 12:59:11 19 244 456 18 4636 7265 193 26.00 35 7.49 CHANGED hcpthCphatcsG..tCpaGspCpFtHsh ..........p.hCpha..h.c.G.....tC.paG.s.p.Cp..FtHs...... 0 1486 2495 3614 +1010 PF00098 zf-CCHC Zinc knuckle Bateman A, Eddy SR anon Overington and HMM_iterative_training Domain The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. The motifs are mostly from retroviral gag proteins (nucleocapsid). Prototype structure is from HIV. Also contains members involved in eukaryotic gene regulation, such as C. elegans GLH-1. Structure is an 18-residue zinc finger. 20.80 16.70 20.80 16.70 20.70 16.60 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.41 0.73 -6.48 0.73 -3.78 184 50986 2012-10-03 11:39:54 2003-04-07 12:59:11 18 692 722 56 6654 45194 253 17.90 58 7.29 CHANGED tpCapCGp.GHhu+-Csp ....htCapCG.KEGHht+sCpt.... 0 2202 3537 5115 +1011 PF02008 zf-CXXC CXXC zinc finger domain Iyer LM, Aravind L, Bateman A anon Bateman A Domain This domain contains eight conserved cysteine residues that bind to two zinc ions. The CXXC domain is found in a variety of chromatin-associated proteins. This domain binds to nonmethyl-CpG dinucleotides. The domain is characterised by two repeats [3], and shows a peculiar internal duplication in which the second unit is inserted into the first one [4]. Each of these units is characterised by four conserved cysteines, displaying a CXXCXXCX(n)C motif that chelate a Zn+2 ion. The DNA binding interface has been identified by NMR [3]. In eukaryotes, the CXXC domain is found in stramenopiles, plants and metazoans. Plants possess a mono-CXXC domain that is present in distinct chromatin proteins [4]. Structural comparisons show that the mono-CXXC is homologous to the structural-zinc binding domain of medium chain dehydrogenases [4]. 21.10 21.10 21.80 21.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.87 0.72 -4.15 31 1021 2012-10-08 21:21:50 2003-04-07 12:59:11 15 77 106 16 505 970 22 46.30 45 4.85 CHANGED pshp+pp+.CGhCcuCpps-.CGpCssCpD...hhKaGGsspp+Q.pChhR+C .........t.t++ppR.CGpCp.uCpps-sCGpCshCpD...hh.KF.G..Gssth.+Q.pCh.hRpC......... 0 110 148 279 +1012 PF01529 zf-DHHC DHHC palmitoyltransferase Bateman A anon Pfam-B_945 (release 4.0) Family This family includes the well known DHHC zinc binding domain as well as three of the four conserved transmembrane regions found in this family of palmitoyltransferase enzymes. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.00 0.71 -11.65 0.71 -4.87 97 4432 2012-10-03 10:42:43 2003-04-07 12:59:11 15 126 347 0 2951 4268 82 169.10 23 40.72 CHANGED hhsahhshhssPGhhspt.................................................................................................................................................................................................................................hhtss..tttptpaC.p...........Cphh.+PsRs+HCphCspClhchDHHCsWlssClGtpNa.+a.F....h.......hF...........lhhhslhhhhhhh.....hthhhhhphhtptphh.hhhh...........................................................................................................lhhhhhs........hhhhlhhshlhhh.........phh.hhh.ps...hT....ohEhhptt ...................................................................................................................................................................................................................................................................................................................................................................h...........Pu.......................................................................................................................................................................................................................................................................................................................................................tt..............thp...a.Cpp...............C..p........h.......h..+P.........s....R.u.+HC...p.....h..C..s......pCV..h.+h.D...H..H.....C.s..W.....l.s.NCl...G.............tp....Na..+a..F..h..........hF..................................lhh...h.s..l.h....s.h....h..h....h.h........h.s.h....h......h....h...h....t..h....h....t.....t....t.t...........t..h....h..h....................................................................................................................................................................................................................................................hhhhh.hs................hhh.hh.h...l.s...h....l..hhh...............phh.hlh..ps............T....ohE.h...t....................................................................................................................................................................................... 1 1087 1605 2335 +1013 PF04438 zf-HIT HIT zinc finger Bateman A anon Bateman A Domain This presumed zinc finger contains up to 6 cysteine residues that could coordinate zinc. The domain is named after the HIT protein Swiss:P46973. This domain is also found in the Thyroid receptor interacting protein 3 (TRIP-3) Swiss:Q15649 that specifically interact with the ligand binding domain of the thyroid receptor. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.11 0.72 -4.30 100 1073 2012-10-03 05:12:49 2003-04-07 12:59:11 11 21 308 3 734 1067 14 30.60 38 9.10 CHANGED tpphCsl..Ctp.....u+YpCPpCsht........hCSLpCh+ ..................hCsl...Csp.....u+Y.pCs+C.sht........hCSLpCh+..... 0 221 385 573 +1014 PF02891 zf-MIZ MIZ/SP-RING zinc finger Bateman A, Mistry J, Wood V anon Psi-blast Q9UN16 Domain This domain has SUMO (small ubiquitin-like modifier) ligase activity and is involved in DNA repair and chromosome organisation [2][3]. 22.20 22.20 22.30 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.07 0.72 -4.45 36 975 2012-10-03 15:03:13 2003-04-07 12:59:11 15 16 294 1 586 1031 4 50.20 45 7.21 CHANGED lSLpCPlohpRlpl..PsRuptCpHlQCFDhtuaLphspptss....WpCPlCsps ............lSLhCP..ls...t.h.Rl.pl..PsRutsC.pHl.Q..C.FDh.phaLphNp..cp.so...........................WpC..PlCsK.t................ 0 192 294 443 +1015 PF01753 zf-MYND MYND finger Bateman A anon Bateman A Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -9.03 0.72 -4.08 177 3934 2012-10-03 05:12:49 2003-04-07 12:59:11 13 252 370 26 2780 4039 204 41.10 35 7.26 CHANGED C..shCtp.....................tshp..pC.spC...........p...ts....hYCSppCQptcW.t......t....H+p.C ...................................................Ct.Ctp...................................tshp.......pC..stC..............p......ts........hYC...u...ppCQ.p.tcW..t............t.......H.+p.C................... 0 1218 1661 2317 +1016 PF05020 zf-NPL4 NPL4 family, putative zinc binding region Wood V anon Pfam-B_13681 (release 7.6) Family The HRD4 gene was identical to NPL4, a gene previously implicated in nuclear transport. Using a diverse set of substrates and direct ubiquitination assays, analysis revealed that HRD4/NPL4 is required for a poorly characterised step in ER-associated degradation after ubiquitination of target proteins but before their recognition by the 26S proteasome [1]. This region of the protein contains possibly two zinc binding motifs (Bateman A pers. obs.). Npl4p physically associates with Cdc48p via Ufd1p to form a Cdc48p-Ufd1p-Npl4p complex. The Cdc48-Ufd1-Npl4 complex functions in the recognition of several polyubiquitin-tagged proteins and facilitates their presentation to the 26S proteasome for processive degradation or even more specific processing. 20.40 20.40 20.80 20.60 18.60 19.80 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.90 0.71 -4.55 22 277 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 241 0 198 293 0 135.90 48 23.39 CHANGED pVpp.slDshL-KpDGhIpRp+.ophC..+HGsKGMC-YCsPL.PaDcpYhpEpp..IKHlSFHAYL+KlspspNK.t.GoSalsPLppPsaplphsCssG.H.PWPcGICoKCQPSAITLQpQpFRMVDHVEFtsssllspFlshWRpTGs ................................................................s.V.pstlDphLsKpDGpI.Rp+Dsp.h.C..+HGspGhCs.aCh.PL..-Pa.DtpYLp-pc..IKHhSFHuYlRKlsuus..sK.............Gp..hh....sLpp...sh+l+psC.s.......H.PW..P.cG.ICoKCQ....PSAITLppQpaRhVDplpFpsssllspFLsaWRpTG..................... 0 67 103 162 +1017 PF04810 zf-Sec23_Sec24 Sec23/Sec24 zinc finger Bateman A anon Bateman A Domain COPII-coated vesicles carry proteins from the endoplasmic reticulum to the Golgi complex. This vesicular transport can be reconstituted by using three cytosolic components containing five proteins: the small GTPase Sar1p, the Sec23p/24p complex, and the Sec13p/Sec31p complex. This domain is found to be zinc binding domain. 23.10 23.10 23.10 23.70 23.00 23.00 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.35 0.72 -4.39 133 1393 2009-01-15 18:05:59 2003-04-07 12:59:11 10 29 324 26 939 1354 14 40.00 38 4.58 CHANGED sPlRCp..pC+ualNPasphs...suppWtCshCtt.pNplPspY ...............PlRCp......pC+uYlNPa.sph.t.......pu+p..WtCshCtp.tN.plPspY...... 0 313 517 760 +1018 PF02148 zf-UBP Zn-finger in ubiquitin-hydrolases and other protein Mian N, Bateman A anon IPR001607 Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.77 0.72 -3.86 179 2273 2012-10-03 15:03:13 2003-04-07 12:59:11 14 45 510 19 1416 2222 31 68.20 29 11.39 CHANGED C.....spCs............................s..ps..................lWl....CLp..CGpluCG.....R.p...........s..sHAhpHa.p.pss.....................................Hslslslsshp........l.aCYss..-shVp..s.ph ..................................................................................C.ptCt...................................sp....ts.......................lWh....CLp..CGpluCG.............R..........................p.sHAhpHh.p...psp...........................................................Hslslslpshp...............................l..aCYss..cp.Vhp..h.................................................................. 0 429 703 1082 +1019 PF04704 Zfx_Zfy_act Zfx / Zfy transcription activation region Kerrison ND anon DOMO:DM04798; Family Zfx and Zfy are transcription factors implicated in mammalian sex determination. This region is found N terminal to multiple copies of a C2H2 Zinc finger (Pfam:PF00096). This region has been shown to activate transcription when fused to a GAL4 DNA binding domain [1]. 25.00 25.00 25.30 25.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.01 0.70 -5.58 5 252 2009-01-15 18:05:59 2003-04-07 12:59:11 8 42 68 0 75 216 0 215.60 54 43.94 CHANGED EsVlIEDVcCSDIL-EsDV.SEuVIIPEQVL-uDVA........EE-SLuphslPDplLTS-LlosololPEpVhsuEuVosss.GHlEpllpssVlttEIlsDPLstDlVSEEVLVADssSEAVIDA......SGlPl+ppDDD....KuNCEDYLMISLDD.uuKlDH-GuoslTMuA-uEpDs..SKlDGsuPEVIKVYIFKADu-ED.DlGGTlslVESEs-NcHtssVhDspsSuRlPREKMVYMsVsDSQQEEDDLssuEhsDpVYMEVIVGEEDAu.......stHEpQM-DSElpKTFhPlAWAAAYGNNo.DuIEsRNGTASAhLHIDESsGLsRlsKQKsKKRRRGEoRQYQ ..............tl...-sVps.-IhtEs.l..o-.....sVIhP-tlLt.s-Vs.........-.sL...t........c.lLsu-lhtt....s.........thP-pVhss-.hp...................................................hsp...-slhp...............................tlslp..p..p.................cssspDYLMI.ShD-.s.Kh.......s.hphtst.......t....p.pt.s.E.VIK.VYIFKA-st-D..-lGGT..lsES-.pssH....tlhs..s.p.Rh..REKMVYMsVpDSp.E-..-.-........................htpcpt...............................p.hpts........s....hhh.hs................u................Ns....ssl.Es+sssAuthLpIs-uhshs+lhKQKsKK..++Rs-s+QhQ................................................ 0 3 9 28 +1020 PF02535 Zip ZIP Zinc transporter Bashton M, Bateman A anon Pfam-B_1189 (release 5.4) & Pfam-B_1903 (Release 7.5) Family The ZIP family consists of zinc transport proteins and many putative metal transporters. The main contribution to this family is from the Arabidopsis thaliana ZIP protein family these proteins are responsible for zinc uptake in the plant [1]. Also found within this family are C. elegans proteins of unknown function which are annotated as being similar to human growth arrest inducible gene product, although this protein in not found within this family. 28.30 28.30 28.30 28.30 28.20 28.20 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.02 0.70 -5.42 40 6416 2012-10-02 19:55:49 2003-04-07 12:59:11 17 54 2513 0 3060 5517 747 223.60 18 74.42 CHANGED h.thhlhulhslhlsohhushhshhh.hhtt............................t..phhlphlhsFuuGllLusualHlLPcuhcshtpt.pht.............................................shsshhshhGhhhhhhl-phhphhhtt............................................................................t..t.....t..........t.t..............................................................................................................................t..htptptcpptpspppsppphtpthp.t....................uhllh...............huhslHslh-GLAlGsotsssp...shslhlAlhhHcl.cslGlsuhLlpushsptpshl...hshlhulssslGhslGlsl.........tustuthspull.uhoAGhhlYlull-llspthtpsp...........................h..lhphlsllhGhulMslls .........................................................................................................................................................................................................................................h.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t................................................................sh.h.hh......................................................................hu.l.s..lHs..h....-G................l..A..l...u.....s........s.......h.t.....ss..................uhslhlu..l..h..h......Hp...hscuh..u.............ls......h...l...h..............t....s...........s....h...........s....t.....h............p.....s..hh.......hshh...u..l......s...................p.sl.G..sh........l.u.hhh.................................s......................h....s..............h...h....h.............s.................h..............h..........h..........u...h...sAGh..hl...al.s...h.-...l.l...s....p.th.t..tp......................................................hh....t..h.h..s.h....h....h.Ghhlhhh..s............................................................................................................................................................................................. 0 1027 1736 2521 +1021 PF00246 Peptidase_M14 Zn_carbOpept; Zinc carboxypeptidase Finn RD, Bateman A anon Prosite & Pfam-B_4832 (Release 7.5) Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.07 0.70 -4.80 114 5755 2012-10-02 19:46:12 2003-04-07 12:59:11 19 114 1951 160 2797 5760 1277 247.20 18 49.68 CHANGED lhsalcpls..tphPsls..phhsl.Gp.Sh-GRslhslclopt...................................tss+sslhlpushHupE..hluptsshhhl.....ppLlpt..............pstthppLlcphchallPshNPDGathst..p.......................................ttphWths........Rssst..............................GsDLNR.........N.....a........sht.h.........s.sspssssp.atu.ts.t..............pPEspulh.sa.l...pp...........tphthalslHuhuphhh.........aPas.............................hsh..........................shsssppp........hpplupthspshtp.ht.............t....a..phG.ssss............hY.ss.GuspD.....asa.phsh.......hsaThEltsps.......................hhhss.....spl..tsp-shtu ......................................................................................................s..t...h.h......p...h.....l..G.p.....o........h...........p........s......p.................l..h.lplst.....................................................................................................ttsc...t.l.h.l.u..shH.u.p.E..hhus..t..h.hh.hhl.................p.p.Lhpt...............................ts....th...p....t.ll..c..p..h.phh...llPsh..NP...DG.ht.hst...........................................................................................................R.ss.ss.....................................................................................Gh.DL.N.R.............................s....................a..........................s.............................................................................s......................................................psEspulh..ph...h....pp.........................................hphthhlshH..u...h.....s.thh...............h.a..s............................................................................................................................................t........................................s...t...........................h...h.t.......h....h.......................................................t......................................................t...p...............h.h.....th.................hthth................................................s......................................................................................................................................................................................................................................................................................................... 0 956 1446 2182 +1022 PF00100 Zona_pellucida zona_pellucida; Zona pellucida-like domain Sonnhammer ELL, Bateman A anon Swissprot_feature_table Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -12.48 0.70 -4.95 143 2880 2009-09-12 10:41:20 2003-04-07 12:59:11 18 163 237 17 1681 2596 0 221.40 16 41.22 CHANGED pCsp.sph.........pltlpps.h.ts..............th.phsplpL.......pssp...........Cps.............hpsssshhhaphslss.........CGop.ppp...............hhYp..Nplhhphps..........................t.lhp.....hphtl.pChY........................ppshts..sh...................................h..thss...hs......s.sshp.........hphplhs..........................s..htp......................phss.h.lspsla.lc.....sphhs...........h.l........hlcsChA..Tss...s...s.spasll..ppGCs.pscshhsh.h...............htsphtcFp.hpsFpFs.........................spsplalcCplplCtssp......Cpt......Cstt ...................................................................................................................................................Ct..sth..l.lthp.....................th........phhh.............tt.t..................C.s...............t.st..t..h.h.h.h.ph.shpt............CGsh.thp.........................................t.........hhap....stlhh...t....ts....................................h.h........hthth.pCta........................................th...h.........................................................................................................s......................t.s.ht..............hphph..hp..............................st.................................t.sh.htlsp.lh...hp....h...hp..p..............s.h.l......................hlppChA.........sss............st.................phtll......tpG........Cs....tp.h.th........................................tsphhphp..hpsFpFs...........................pts.lahpCpltl.C.ttt.......C..........C...t....................................................................................................... 0 525 708 1302 +1023 PF01262 AlaDh_PNT_C AlaDh_PNT; Alanine dehydrogenase/PNT, C-terminal domain Finn RD, Bateman A anon Pfam-B_4166 (release 6.6) Domain This family now also contains the lysine 2-oxoglutarate reductases. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.66 0.71 -4.69 96 5257 2012-10-10 17:06:42 2003-04-07 12:59:11 16 38 3489 97 1531 6780 5369 153.50 35 34.75 CHANGED hpchttthshLhsussuVtP...A+VlllGuGVsGhsAhphAtsLGA.VsshDhpssphcpl-slhuphlps..................................hhsstttlpctltpsDllIsssLlPGpcAP+LlocchlppM+...................PGSVlVDlAl-pG.......GshEsocsss..................sshshpGVhhhu ....................................................................................h..p.tthhshhl.su...ss..s.V....s....P.....u+.V.h.l.l..G.u.G..VAGh....s..AhthAtu.L.G.A...........V....p.s.h......D..h..p......s.....p...t.h.c.p........l..c..s.........h..........h.........u........p..h.lps..................................................................................hh......u.....p......t......h.l...t..ptlp.cs..DllIsssLI.P...G..t.tAP+...L.....l.....Tc-M..Vps.MK.....................................sGSVlVDl.Ah.-..p..G...........GshE.h.o..c..ss.p....................................ss.h.h.t.pG.Vhhhu.................................................................................................................... 2 518 968 1302 +1024 PF05222 AlaDh_PNT_N Alanine dehydrogenase/PNT, N-terminal domain Finn RD anon Manual Domain This family now also contains the lysine 2-oxoglutarate reductases. 29.50 29.50 29.80 29.50 29.30 29.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.44 0.71 -4.16 363 5191 2012-10-02 14:31:05 2003-04-07 12:59:11 10 33 3449 95 1522 3903 4047 132.80 37 29.97 CHANGED GlP+E.hps...t...EpRVAlTPssVppL.....hpt.G.a..cVhVEsGAGh...uuuasDpsYptA...GAplss.ss.tp........lastu-lllKV+pP.........ssEhshl+pGphLhoaLahu...s.s.phsctLhppsloulAhEslsc.h..cu.slslLusMuplA ...........................GlP+E..hps..s...Ep.RVAhTPssVcpL...........lpt...G...a...pVhVEsu....A..G.....suuFsDp.sYtp.A.....GApIls...ss...c.......sas.u-lllKV+pP..................ssE.....hshl..........+p.....G.......thLho..aLa.A.......s.s.plhc.........tLh....p....p.........t........l..........o......s.lAhEsVsc...p........cup..sLshLusMuplA........................... 0 510 963 1296 +1025 PF05218 DUF713 Protein of unknown function (DUF713) Moxon SJ, Pollington J anon Pfam-B_6651 (release 7.7) Family This family contains several proteins of unknown function from C.elegans. The GO annotation suggests that this protein is involved in nematode development and has a positive regulation on growth rate. 21.20 21.20 21.20 21.30 20.70 20.80 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.55 6 53 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 5 0 50 59 0 174.90 23 27.54 CHANGED EELcchRpppKp+hhAFasClcLKhRFEEKEp-WuDWIpss.+pPIhRlhphFppF.ppht.....a++hsp-p.s-..lppEIhhlppclhshhsoLpphFppLppLppcap-sLFIKVLpKsIs-hAs+.......LhplhNsLsc...ssS+-.appLcshFppIssssIPoTopLR+ICKpsshSD.YpslcFPKl ...................................-hpph+pp.ptphth....hhpClhL+h+FEE+EccWucWlpsh.RpslsplhppahpFppphp..........hpc.hsp.c..cppp.....l.p.Eht.htpplhsshstlppsappLcpLpppasDtlFl+lL......Kslsphusp..................Lhplhptltph.....thspp....hp..pLcphhtplcs.ppIPoTspL+phspss.p.p-.Ypslp.s..h..... 1 9 9 50 +1026 PF05210 Sprouty Sprouty protein (Spry) Moxon SJ anon Pfam-B_6527 (release 7.7) Family This family consists of eukaryotic Sprouty protein homologues. Sprouty proteins have been revealed as inhibitors of the Ras/mitogen-activated protein kinase (MAPK) cascade, a pathway crucial for developmental processes initiated by activation of various receptor tyrosine kinases [1]. The sprouty gene has found to be expressed in the the brain, cochlea, nasal organs, teeth, salivary gland, lungs, digestive tract, kidneys and limb buds in mice [2]. 25.00 25.00 25.40 25.40 20.90 19.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.29 0.72 -11.13 0.72 -4.04 18 429 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 86 0 238 353 0 107.50 48 29.34 CHANGED +sRCt.C...RsL.Sphhss+.shCs.....ucsslcpsoChsCsculhYHChsDsEG-........huDpPCSCs.......cspsCsRWhuLuuLSLhlPCLhCYhPhRuCh+huptC.......GC..Ct .................+s+Ch.C...........Rsh.sp..ss+.shCp......scsslchsoCMhCscuhhYHCh.s.DsEG-............................huD.PCSCs........................ps+hC......hRWhuhuuLSlhlPCLhCYhPh+uCh+hsptC.......GC..Ct.................... 0 32 51 123 +1027 PF05216 UNC-50 UNC-50 family Moxon SJ, Studholme DJ, Wood V anon Pfam-B_6607 (release 7.7) Family Gmh1p (Swiss:P36125) from S. cerevisiae is located in the Golgi membrane and interacts with ARF exchange factors [1]. 21.10 21.10 35.20 28.40 20.30 20.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.58 0.70 -5.16 24 272 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 239 0 184 268 3 215.90 38 82.36 CHANGED ssph.chhRRlFK.hppMDFEhAhWpMhpLhhsP++VYRshhY+KQ.....TKspaARDDPuFllLhshhLslouluauls.Ys..uhhp............hlphhlhhVhlDFlhsGlllATlhWhlsN+.aLp..........................hspsscpplEWuYsFDVHCNuFhPhallLYllQahLlPllh...ppsaluhhlGNoLahhulsYYhYlTFlGYssLPFL..................cssphlLh.Plsllhllallul.......hGaNlsthhhphYht ...................t.ph.pahRRLh+...hpQMDFEhAhWpMhaLh.h.sPp+VY+shhY+....KQ...........................TKspWARDDPAFllLlshhLslooluauhs.hs...uhhp............h..lphllhhlhlDhlhsGlllATlhWhlsN+.aht.................................hppstctcVEWu.......YsFDVHhNuF...aPhhlhLahlQ.hhhh.s....hlh.....tp.........s.........aluhhlGNoLahhAluYYhYlTFLGYss.LPFL...................cpT.p.hlLh..Phhh......lhll.ahl.s.l......hhG.aNhst.hh.hY................... 0 69 102 153 +1028 PF00702 Hydrolase haloacid dehalogenase-like hydrolase Bateman A anon Pfam-B_566 (release 2.1) Domain This family is structurally different from the alpha/beta hydrolase family (Pfam:PF00561). This family includes L-2-haloacid dehalogenase, epoxide hydrolases and phosphatases. The structure of the family consists of two domains. One is an inserted four helix bundle, which is the least well conserved region of the alignment, between residues 16 and 96 of Swiss:P24069. The rest of the fold is composed of the core alpha/beta domain [1]. Those members with the characteristic DxD triad at the N-terminus are probably phosphatidylglycerolphosphate (PGP) phosphatases involved in cardiolipin biosynthesis in the mitochondria [2]. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null --hand HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.66 0.70 -3.93 239 27368 2012-10-03 04:19:28 2003-04-07 12:59:11 21 179 5134 203 8521 48772 5819 255.00 26 31.25 CHANGED hcsllFDh.sGTL..hpsp.hl.................................................................................ttthhphh..tph........shthh.http.............................................................................................................sttphhpphhtt...h.tt.hhttt..................................httttthhhhh...................................................................................................................................................................................tthhthhthtstlhs......s.........stpslpp..........L+pt........uh....plsllo.s.....s....hpshtpths..l............................s.hhhuph...................pP.psc..h...tthhcp......lt................p.t........p....shhlGDuh.sDh.uhttAs ...............................................................................................................................................................................................................................................................................................................................................................................................sslshDKT..G....TL.....T.p.s..c...hplt.....................................................................................................ht...........................................ths.pp...p...h..h...p..hs...........ssh........p.....t......h.......s......p.....p....P........h.....u...p..u......l....h...t..h....s..p..t.......t...........................................................hp.t.h..t..t..h..........s...h...s..h..t....s.t.h.pst.......................................................................................hh..........us....p.h.h...t..p.t.s...hp......................t........t.............................................ttt.s.t..s.s.h.h.hs..................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................s...h.p.h..l..G..l.l...u...l..t......D....s....l...+..s................s..............................s..t..p..u....l...p.p...................L.+.p.t...........................................Gl..................c.s.h.M..l.T...G.................D.s..t...t...T...A...p...u...I..A..p...p...l..G..l..........................................................................................................................................................sc.....l.hAcl..........................................................................................................hP..cc..K...........hpl.lcp.............L.p.t.....................................................psp.....h..................VAM.......sG..D.Gl..ND...A.PALtpAD.......................................................................................................................... 0 2722 5262 7124 +1029 PF04227 Indigoidine_A Indigoidine synthase A like protein Kerrison ND, Finn RD anon COG2313 Family Indigoidine is a blue pigment synthesised by Erwinia chrysanthemi implicated in pathogenicity and protection from oxidative stress. IdgA is involved in indigoidine biosynthesis, but its specific function is unknown [1]. The recommended name for this protein is now pseudouridine-5'-phosphate glycosidase. 25.00 25.00 30.30 25.00 20.20 21.30 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.78 0.70 -5.19 90 1192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 1117 6 398 842 476 280.60 54 79.09 CHANGED sEVppALssu.c.PVVALESTIIoHGMPYPpNl-sAppVEphlRcsGAVPATIAllsGcl+VGLsp--LEtLAp.....s.ps..lhKlSRRDlshslApsts.....GATTVAATMlhAphAGIcVFATGGIGGVHR.....G..A....EpoaDISADLpELu+TsVsVVsAGsKSILDlstTLEhLETpGVPVluY.....to-......chPAFaoRpSGhpsshcl-osp-lAphhcs+hp...LuLpu.GhllANPlPc-ttlstphlsshIppAlt-AcppGI..sGKslTPFLLs+ltElTpGcSLpuNIALVpNNAclAAcIAhsl .............................EVppAl..p..sp..pPVVALESTIISHGMPaPpNspsAhpVEphlRp...............pGA...lPATIA.II.sGhh...+...lGLspE-lEhLup...............p...cs....VsKVSRR....DLs.hllA.....t.....tp...s.................GATTVAuTMIhA.shA...........GIcVFuTGGIGG..VHR....................G..AEp.....ThD.ISADLpE.LupTs.VsVVCAGAKSILDlshThEYL.............ETh......GVPVlGY.......pTc..........phPAFasRp...S..u.....hclsh..pl-osp-lAchh...ts+hp...h.....s...L.p.G.G.lll.A.NPIPc.paAhsc.phIsssIspAltEA-........pp.Gl..p..GK-sTPFLLu+lsElTsGcSLpu..............NIpLVhNNAhLAucIAht................................................................................................... 0 144 246 340 +1030 PF04613 LpxD UDP-3-O-[3-hydroxymyristoyl] glucosamine N-acyltransferase, LpxD Kerrison ND anon DOMO:DM04521; Family UDP-3-O-[3-hydroxymyristoyl] glucosamine N-acyltransferase (EC 2.3.1.-) catalyses an early step in lipid A biosynthesis: UDP-3-O-(3-hydroxytetradecanoyl)glucosamine + (R)-3-hydroxytetradecanoyl- [acyl carrier protein] -> UDP-2,3-bis(3-hydroxytetradecanoyl)glucosamine + [acyl carrier protein] [1]. Members of this family also contain a hexapeptide repeat (Pfam:PF00132). This family constitutes the non-repeating region of LPXD proteins. 22.40 22.40 22.70 22.40 22.00 22.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.20 0.72 -4.44 90 2398 2009-01-15 18:05:59 2003-04-07 12:59:11 9 36 2248 13 574 1707 1557 71.20 31 20.84 CHANGED sshtlpulAsLppAsssploFls.st..KYt.ptLpsopAuAlllstc......tp........stshshLlspsPYluaAplsphh .............shhlsulAslpp..A..psuploFls..Ns.....KYp.cp.....Ls.p.opAuA.Vllspc.......s.............hs.s.ps.shLll.c.sPYlsaA+luphh............. 0 177 363 480 +1031 PF00244 14-3-3 14-3-3 protein Finn RD anon Prosite Domain \N 21.40 21.40 21.40 22.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.50 0.70 -5.28 85 2097 2009-01-15 18:05:59 2003-04-07 12:59:11 15 15 551 104 918 1919 23 196.20 57 83.35 CHANGED R-chV..ahAKLAEQ.AER.Y....--MlptMKplsph............tsc.LosEERNLLSVAYKNVlGuR.RuoWRll..SSIEQK-..csc...........................ppphphl+pYRpKlEpELsplCs-lLsLLDppLlPs..........u.........s...ssE.S+VFYhKMKGDYYRYlAEas.su........-c+c.csscpuhpAYppAp-lAp...pp..LssTHPIRLGLALNFSVFYYEIhNsPccACpLAKpAFD-AIu-LDs..LsE.-SYKDSTL.IMQLLRDNLTLWTS-tpscp ..................................................................pp.l.hA+LAEQ...A.ER.Y.................--Mst.MctVsp.............s.EL...osE..ERNLLSVAYKNV....lGuR.RuSWRll..SSIEQKpcsp.....................................-pphphh+..........pYRpKlEpELpplCp.s.lL..p.lLDpaLIss...................u.................s.....ssE..SKV....F.YhKM..........KGDYa.RYLAEht.....sG.......................................s.cR...p........ps.s-po.pA..Y..pp.....A.-lAp....sc.....h...PTHPIRLGLALNF.S.V.F.YYEIL..NuP.-+..ACp.LAK.p...A...FD-AIA.ELDo..L...s.E....-...S..Y...K.DS.TL.IMQLLRDNLT.LWTS-.tt..t............................................................. 0 308 472 682 +1032 PF02826 2-Hacid_dh_C 2-Hacid_DH_C; D-isomer specific 2-hydroxyacid dehydrogenase, NAD binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain is inserted into the catalytic domain, the large dehydrogenase and D-lactate dehydrogenase families in SCOP. N-terminal portion of which is represented by family Pfam:PF00389. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.91 0.71 -5.12 161 20266 2012-10-10 17:06:42 2003-04-07 12:59:11 14 74 4565 208 5776 22821 13988 171.60 30 48.83 CHANGED luhhLuhsRpl...hphppth....+pG....pW..........t...thhuh....clpG+TlGIlGhGpIGptlAchhp.uFGh.cllsasht.....t...........pthsh.phs.sl-cllppuDhlolpsP.loscTcpllstctlstMKssu.....hlINsuRGsllDcpALlcALcsGpluuAuLDVappE...........Phs....ts........p...sL....hp..hsN.....lllTPHlu ..................................................................................uhhlshhRph.....t...h...t.t.th............c.p.s.....pW............................t......t.hhuh..........cl...p......G.+...T...l.Gll.G.h.G.p.....I...Gp..t..............l.A....p..h.hp...ua..G.h....p.....l...l..h..a..s..h...................hp...............t......................tp..s.......h.......p......h......s.......s.....L.......-......c.....L..............l.....p....p.......u.....D...l...l.o...l...p.s..P....h........s.......s........p...................T.c..............p....l..l.....s...t....p..................t....l.s...t.M...K.s.....s..u...................hll.N...s..u..R...G....s.l.l.D.p.p.AL.l....pAL.....c....s.....G........c....l.t..u....A.u.l.DVa.p.p..E......................................P.hs.......t.s......................p.......s.L................hp.....h.s.N.............l.ll.TPHlu............................................................................. 0 1662 3388 4776 +1033 PF02834 LigT_PEase 2_5_ligase; 2_5_RNA_ligase; LigT like Phosphoesterase Bateman A, Mazumder R, Anantharaman V anon Bateman A Family Members of this family are bacterial and archaeal RNA ligases that are able to ligate tRNA half molecules containing 2',3'-cyclic phosphate and 5' hydroxyl termini to products containing the 2',5' phosphodiester linkage. Each member of this family contains an internal duplication, each of which contains an HXTX motif that defines the family. The structure of a related protein is known [1]. They belong to the 2H phosphoesterase superfamily [2]. They share a common active site, characterised by two conserved histidines, with vertebrate myelin-associated 2',3' phosphodiesterases, plant Arabidopsis thaliana CPDases and several several bacteria and virus proteins. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.03 0.72 -4.00 107 1248 2012-10-03 21:31:47 2003-04-07 12:59:11 11 3 1026 8 425 2631 248 80.60 31 52.39 CHANGED ls..spph.ppltpltpplc....thsh+h....ct.s.HlTLthlt.phs....pppls...plhpthtph..t...h.lplpthuhass...tspsla .....................................tlPsphppplhp.hp..ssh.h............s.....tu..s...+.V......tspNhHlTLu.FLG.-ls...........scc.c..........sltp.h.....h..u...c.....l................c...p..ss..F..s...l.pLc.shGtah....csRVlW............................................ 0 167 289 360 +1034 PF03475 3-alpha 3-alpha domain Aravind L, Anantharaman V anon Aravind L, Anantharaman V Domain This small triple helical domain has been predicted to assume a topology similar to helix-turn-helix domains. These domains are found at the C-terminus of proteins related to Swiss:P32157. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.87 0.72 -4.24 49 1443 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 1316 6 193 768 20 46.50 31 19.76 CHANGED cloVtchhplhascphs..hptlccllsh.ssLutoW+pphp+Rlpptps .................cloVpcssp.lhap......hs......p-tlccl...Lss.ssL.SsoWpcphpKRhtpt.......... 0 52 97 146 +1035 PF01612 DNA_pol_A_exo1 3_5_exonuclease; 3_5_exonuc; 3'-5' exonuclease Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_659 (release 4.1) Domain This domain is responsible for the 3'-5' exonuclease proofreading activity of E. coli DNA polymerase I (polI) and other enzymes, it catalyses the hydrolysis of unpaired or mismatched nucleotides. This domain consists of the amino-terminal half of the Klenow fragment in E. coli polI it is also found in the Werner syndrome helicase (WRN), focus forming activity 1 protein (FFA-1) and ribonuclease D (RNase D). Werner syndrome is a human genetic disorder causing premature aging; the WRN protein has helicase activity in the 3'-5' direction [4,5]. The FFA-1 protein is required for formation of a replication foci and also has helicase activity; it is a homologue of the WRN protein [3]. RNase D is a 3'-5' exonuclease involved in tRNA processing. Also found in this family is the autoantigen PM/Scl thought to be involved in polymyositis-scleroderma overlap syndrome. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.15 0.71 -4.79 27 6914 2012-10-03 01:22:09 2003-04-07 12:59:11 15 92 3529 32 2350 6043 5677 173.20 23 27.78 CHANGED hphlpsppthpchhppltstt......slAlDTEppshph..h.p.hstllQlpst.....ptsalhsshthts..........hphLptllpssphhKlhasschDhthltp........aulhhpsla-s..lAstllshtpt..........auLssLspphls...hthspppttucap.....tcslhcpth........tYAutDshhhlp..lhppLpp.lp .................................................................................................................l.s.tth.tt.hh.t.t.l..pt.ts............hlAlD...T.....E....t...........s.......s.............h.....p..s........................h....p.............h....h....h.......hl.pltss...................pss.h.l.......s..s....h..s..h...t.s..................................................hp..Lp..s.lL..p..s...s.s..l.h.....K......lh..as.h.c......D.hth.ltp............................................h.G..l...t...h..p.....s.....l...h...........Ds...........l...t...u...h..l..h..s.h..stt....................................................huh.s.....s...L..s...p..c.....h.ls.........hs...l.............s...h..p....p......t.....u..cht...............................tcsh...c.pth..............................pY.AAt..Ds.hhhp...lhptlh.pl............................................................................................. 0 797 1441 1966 +1036 PF00803 3A 3A/RNA2 movement protein family Bateman A anon Pfam-B_1054 (release 2.1) & Pfam-B_6332 (release 7.5) Family This family includes movement proteins from various viruses. The 3A protein is found in bromoviruses and Cucumoviruses. The genome of these viruses contain 3 RNA segments. The third segment (RNA 3) contains two proteins, the coat protein and the 3A protein. The function of the 3A protein is uncertain but has been shown to be involved in cell-to- cell movement of the virus [1]. The family also includes movement proteins from Dianthoviruses. 25.00 25.00 35.30 33.40 22.10 17.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.29 0.70 -5.30 27 894 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 89 0 0 507 0 209.50 48 84.13 CHANGED sppstusspppLlptlhu.ps.hpthtpcsshuphp.hpussphshh......sLlPssstsplsshhp......opacssphsosG...................hhpls+lllhlsPTls.sssGpVpltLhDsshss....hpslst.pphplphsss...PtlhsFhssYshPhpp.s...........pRCFtlshphsuhh..spusSshSlhhhWptphsspspsYp.pssthh.lp..hpRt.thpshsthpp...alcush...spssstthlhspshsh ....................s..plutcsuss.lsp.aSsss.chsl.spthDsu+t+hhhsspusSsh...........SlsEucuaDl.ARhlV-pspHlSNWKNDhhVGNGpppsshsIpIhPTWsSp+pYMhISRlVIWVsPTIP.sssGcLsVuLlDPNhPo.........-.cpVIL+sQGol.hDPhCFlFYLNWSIPKhsNTP............csChQL..pLhCs..pcY...thssohuSlh..WptpFsspsts.pp.ss......................................................................... 0 0 0 0 +1037 PF02829 3H 3H domain Bateman A anon [1] Family This domain is predicted to be a small molecule binding domain, based on its occurrence with other domains [1]. The domain is named after its three conserved histidine residues. 25.00 25.00 30.00 44.50 21.30 19.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.17 0.72 -3.97 57 945 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 913 1 147 544 7 98.00 42 55.62 CHANGED lssp..H.ss-chc-ELphIVc.pGGplhDVhl-HP.lYG.clpupl.tlpscp-VppFlcplpps.cup.....LppLTs.GlHhHTlpAssccsl-pIpctLcctGaLl ...............htsp.H..psEchcpELphI.VD.pGGpVp.sVhl-HPlYG..-lpu.L.plpsRcDVpcFlcplpps.pup......LSpLTc.G.lHhHplcA-oppsLctIccALcctGaLl................... 0 62 104 124 +1038 PF00725 3HCDH 3-hydroxyacyl-CoA dehydrogenase, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_743 (release 2.1) Domain This family also includes lambda crystallin. Some proteins include two copies of this domain. 21.00 21.00 21.00 21.00 20.60 20.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.19 0.72 -3.59 77 10901 2012-10-02 19:36:46 2003-04-07 12:59:11 17 35 3006 55 3264 9099 4385 93.80 27 23.85 CHANGED GFlsNRllhshhsEAhpllpcG.Assc-lDpshchthGaP.MGPhpLsDhlGlDsshplhp.......hhtpths-p...thp.s.lhcphscsGchGcKoGcGFYpY .............................................GFlsNRl.l.h..s.h.l.s.EA.h.p.h.l.p...........-......G.......h..u.s...s.p.c......l......Dtsh.p.hs..h...Ga...P...h...G...Php..h.h...D...hl...G...l.Dsshp.l.hp...........................hhtpt.hspp............hp..s..s.....hl.pph.h....p.t.sh.h...GpKs.s..t..Gaa.............................................................. 0 922 1912 2683 +1039 PF02737 3HCDH_N 3-hydroxyacyl-CoA dehydrogenase, NAD binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_743 (release 2.1) Domain This family also includes lambda crystallin. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.95 0.71 -4.63 74 8383 2012-10-10 17:06:42 2003-04-07 12:59:11 13 46 3107 48 2683 14660 8387 177.00 33 34.30 CHANGED pVuVlGA.GsMGsGIAtlhAps.GhpVslhDh......spptlpcuhptlpp...thtphhpcGphs........ttphsthhsplp..hsschsshs.su.DlVlEAVsEslclKpclFtcl-tls.tscsIlAoNTSulsloclupshp.cPc+..hlGhHFF.N.Ps.lM.LVElltutpTuscslspshshscpl.GKtPVhV.pDs ....................................luVl..G.u.G.h..M........GuG..I.......A..h.......h..h........A....t..........t.....G....h..s......Vh.lhDh.................st...p...s....l...s.......p...u........h...s...t...l.tp...........L.p..c..t....l..p..+...u...+..ls.................................t.t.p.hs..t..h..l..u....p..l.s..........s.......s...s.......c..........h.....s....s......h......s......c......s..........D.......l.V...............I........E.........A......V........s......E......s.......h......c...l.K......p...pla.t..c..l-.p..h.s.....t..........s.c......s..l..lASN.TSolsIs.pl..A.............s....s.......s.............p.......+........P.......cc..............hlGhHF..F..N...Ps...hM.LVEl.lt.........ut......p.Tusps.l.s.......pshsh.s.p.p.h..GKsslhl.pD......................................................................................................................... 0 802 1633 2233 +1040 PF02446 Glyco_hydro_77 4A_glucanotrans; 4a_glucanotrans; 4-alpha-glucanotransferase Bateman A anon Pfam-B_1924 (release 5.4) Family These enzymes EC:2.4.1.25 transfer a segment of a (1,4)-alpha-D-glucan to a new 4-position in an acceptor, which may be glucose or (1,4)-alpha-D-glucan [1]. 25.00 25.00 26.80 25.60 23.20 24.80 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.45 0.70 -5.93 14 3053 2012-10-03 05:44:19 2003-04-07 12:59:11 12 17 2557 11 694 2467 761 483.60 30 78.89 CHANGED lplhSL.oc...GIG-hGppAh.hlDahpcsG...hplhQlLPlssou.ssu.......PYsShSuhAhNslalclpsLs-tthlc............hpchpphpstspVcYsplhthKhshL+csacthpppt...pppsFppFtccp...phWLcsaAhF..hAlK-phss......hsWspW.....pshptpctpslpchpcphp-clpFataLQalhapQhppl+saApp..psltIhGDlPIhluccSuDVWs..chFphs.......tsuGsPPDhFsppGQtWu.PlYsactLpccsapWWhcRlctshphasllRIDHhhGFhphW.lPt.................ucppAtsGcahhsPG...................................................................cclhshlhpths................................................................ph.lIuEDLGslss-VcthhcphulsGh+llpFshs.-spsshlPpsh...ptpsVshsuTHDssTltuWacst.....................pccpphhtpaLs......ph.pcthptsll+hshposuhhtIh.hpD....hLuLss-........tRhNhPuTssp..NWsaRlpss.......-.hssppphhphltplh ....................................................................................................................hplhoL.op...t.....G.I.GD.Fup.th..hl-.h.ht.c.sG.....htl..lhslhss.s..t.su..........................P.Yp.s...SthhhNshaIc.l..p..t....l...-.h..t.hhptt................................t....p..t.....h.....p.....t..h.....p.....s.....s...p......h.lD.Ys.tl..hthKh.s.hL..c.hsa.pta...........t....p....p..........s.............p...........t........s........F...ppF..ht..pp....................tth..Lts.aAha.............sl.+.....c..p.htt.............................h..s..W.....t.Ws........p.......th.......pp....hc.............s............s....sl.....pph......p.p..p..h.........t.-.clpaahalQahhhpQhtth..pp..hApp..hsItlhsDlslhVutsuu-sWsp.c.haths....................................sssG.sPP......D.ha.......u.s.tGQpWG..Psa..s.c....h....hp...t..pu.............Yp.alchl.Rtshphhs...........s..........lRIDHhhGhhchWh.IPt...............................up..s.AtpGt.a.lphPs.............................................................................................................................................................................pclhsh.lthE..t.........................................................................................................................p.h.lIuEDLGs..l..s..sp....lhthhcptGl.uh+lL..Ft..h........................p.......p......p.........s...........h......h....h....P..p.ta.............stpulshsuTHD.sTl...tG.aapst..c.................................................................................pp..ppph..h......t.t......h...l..s............................t..h.............t....p.......s.....h....t....p....s..h...l.....p...s.h...h....s....o......h.....sp.h..h...h.h...h..tD......................lLulp.p.........................t.hNhPGTsst...NWph+hsts.......p.h.t.tth........t.................................................................................................................................................................................... 0 231 453 586 +1041 PF01812 5-FTHF_cyc-lig 5-formyltetrahydrofolate cyclo-ligase family Bashton M, Bateman A anon Pfam-B_1555 (release 4.2) Family 5-formyltetrahydrofolate cyclo-ligase or methenyl-THF synthetase EC:6.3.3.2 catalyses the interchange of 5-formyltetrahydrofolate (5-FTHF) to 5-10-methenyltetrahydrofolate, this requires ATP and Mg2+ [1]. 5-FTHF is used in chemotherapy where it is clinically known as Leucovorin [2]. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.10 0.71 -4.52 24 4810 2012-10-04 00:26:15 2003-04-07 12:59:11 15 19 4472 16 1259 3577 1998 177.40 26 87.98 CHANGED KppLRcphht.thctlspcphtptsptltp+lhshhthppu.ppl........uhYlsh.ssEhshp..tlhpphhptschlhhPhhp.ps..................................s.h.ps.ht........lhpPhpthth..s..........plDllllPulAFD.ppGhRLGhGtGaYDRhLuphpt.....p.htlulshcpQhhs..plPt-....aDhslchllss ...................................................................................KpplRpthhp..phps....l.....s.....p.....p..p...p...p....t.....s..p.....t....l.t.p...p....l....h...s....h...........t...h.......t....p....A.....p......s...l.........................................................uhals.h..s..s..El...s...Tp...........sl..l..c..p...h..h........p...........p........G.........K.....p.....l..h..l.Phh.ps.ts..................................................................................h...h...h...t...h.p..s....t.s....t...l....h.p.s..t.a.s................................lhEP.t.ts.h.t.h...................................plD.llllPslA.....F....D...c.......p...G...h.....RlGhGGGaY..D....RhL..s..phpt..........................hs.........l....u.ls.a...s..t..........Qhl........c........p..........l....P....s...Es.........a.Dl..slstllT..................................................... 1 390 782 1045 +1042 PF02739 5_3_exonuc_N 5'-3' exonuclease, N-terminal resolvase-like domain Bateman A, Griffiths-Jones SR anon Pfam-B_716 (release 3.0) Domain \N 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.04 0.71 -4.88 25 6210 2012-10-03 20:43:45 2003-04-07 12:59:11 11 20 4577 17 1278 4680 3819 164.00 35 23.04 CHANGED ppLlLlDGpulsaRAaFAhs......LpsspG.sTs.AlaGFhphLhpllccppPsalllsFDutsp..TFRcchYtcYKusR............spsP--LhsQlshl+EllctlGIshlEhsGaEADDlIGTLAcpAppc..GhcVhIlouD+DllQLls-...pVtlhps........tchphhss-pVhEK.a ...................................................................................LlLlDGpuhhaRAaaAhs...................h..p..s.s..p.G.........T.....s......Alh.G........h........hp....hLt....p...l...l.....p.....c.........................p.......P.....o.........H....hs.........VsF.D.....................s.t................t...............p......TFR..............p-h.as.cYKusR........................................s.s.hP..-....-........L......p......t......Q....h......s....h..........l.......+........ch..l....c....u......h........G....l..........s.h......h.p.h.....s.GhEADDlIGTL.Ap..p..u...p...p.p.......G.....h.......p.V.h.I.loGDKDhhQLlsc......p....l.t..lh.pp...........hp...phh.s.ptlhc+..................................... 0 413 828 1081 +1043 PF01367 5_3_exonuc 5_3_exonuclease; 5'-3' exonuclease, C-terminal SAM fold Bateman A, Griffiths-Jones SR anon Pfam-B_716 (release 3.0) Domain \N 40.00 40.00 40.30 40.00 38.60 39.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.20 0.72 -3.78 27 6018 2012-10-01 19:52:02 2003-04-07 12:59:11 15 19 4488 11 1254 4486 2458 100.70 36 13.83 CHANGED lsPpQ....hhDhtALhGDsSDNIPGVtGIG-KTAhpLLpcaGSl-slhp..Nl-plps.ph+......ctLpspc-sshhScpLuslcsDlslplsh-sh.......thps.stpplhp .........lsPpQ....llDhhuLhGDoSDNlPG....V....sGl....GpKTAhpLLppa.G.ol-slhp.....pl..-....p....lp...s.......+.h+...........................cpL..p..p..s..p..-.............t..AhL..S+..pLAo.lps.Dl...s...l...s..h...s.h...cpl.......thp..t......t.................................................. 0 404 813 1059 +1044 PF03491 5HT_transporter Serotonin (5-HT) neurotransmitter transporter, N-terminus Griffiths-Jones SR anon PRINTS Family \N 20.30 20.30 20.90 22.20 20.20 19.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.17 0.72 -3.84 3 54 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 34 0 20 41 0 39.10 70 7.22 CHANGED ENGVLQKuVPTPADKVESGQISNGYSAVPSTGAGDDAcHSlP .............ENGVL..QKsVPsPGDKsESGQISNGYS....AV....PSPGAGDDspHSIP............. 0 1 2 4 +1045 PF02096 60KD_IMP 60Kd inner membrane protein Mian N, Bateman A anon IPR001708 Family \N 25.40 25.40 25.50 25.50 25.10 25.30 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.46 0.71 -4.65 147 6198 2012-10-01 21:53:17 2003-04-07 12:59:11 15 26 4682 0 1609 4484 3134 199.40 31 48.09 CHANGED s.saG..luIIllTlll+.lllhPLshpph+Sh....t+Mpp....lp..Pclp....clpc..+a...............ps.ctpc.....hppEhhcLYccpslNPh...uGCL.PhLlQhPlhhuLatslpp...............................................................hp.....l...........pp.............ssFh.........................W......lpcL...........ussD............................................hhlLPllhulshhlpppl........s....................pst......pt+hhhh...hh.Plhhh.....hhhhphP..uGLsLYWhssslhslhQphhl.pphht ............................................................................................................................................saGhuIIl.lTl.ll..R...hll.h.P..L....sh.tp....hpoh..................tK.M.pt......l..p......Pclp.................tlp.c+a...........................................t-..-pp+...................hp..p....E...h...hp...LYK......c..p......s.l..N..Ph......uG..C....L.....PlL.l..Qh..Pl...ah.ALY.hslhp............................................................................................................................................shp...............l.............pp..................usFh.................................W.............l..cL............ussD.......................................................................................s.hh.lLPl.lh..uls.h.alpp.t.l......................................s..................................s.ps..htphhhh.................hh.Pl..hhh.....hhh.....h........hP....................uGLsLYWlluN.......lhsllQphllpp...t............................................................................................ 1 548 1016 1354 +1046 PF00428 Ribosomal_60s 60s_ribosomal; 60s Acidic ribosomal protein Finn RD anon Pfam-B_151 (release 1.0) Family This family includes archaebacterial L12, eukaryotic P0, P1 and P2. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.32 0.72 -3.59 107 2118 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 646 27 1183 2032 138 88.10 32 53.35 CHANGED psossslppllpuush.plcshthphhhptlp..uc.slc-llsssttt.....................uuuuusAAuuuuuuuusAspcpccEEcc...........EpD-..DMG.hu...LF ......................................................................sotsslppll.......puush.pl-s.hthp..h..h.......hptlp..uc.slc-llsssssths............................................uuuuAs.Au.u...u...u...u.u.u...u.A......s.s.sp...E.cccEEcc...........E..EoD-......DM..G..FG...LF....................... 0 390 647 956 +1047 PF01591 6PF2K 6-phosphofructo-2-kinase Bateman A anon Pfam-B_717 (release 4.1) Domain This enzyme occurs as a bifunctional enzyme with fructose-2,6-bisphosphatase. The bifunctional enzyme catalyses both the synthesis and degradation of fructose-2,6-bisphosphate, a potent regulator of glycolysis [1]. This enzyme contains a P-loop motif. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.39 0.70 -5.28 11 1160 2012-10-05 12:31:08 2003-04-07 12:59:11 13 22 311 13 696 1195 96 194.90 37 39.97 CHANGED tcsssssp.pss.hhIVMVGLPARGKTaISpKLTRYLs....WlGhsTKVFNVGpYRRpssps........pshcFFcs-NpEuhclRcpsAhsALcDlhsaLscEsGpVAlFDATNTTRERRchIhphsccps........hKshFlESlCsD.plItpNIpplphuSPDYtspss-c.AhcDFh+RIcsYctsYEPLD.-ppDcsLSaIKlls.lGpphllNpVpsalpSRlVYYLMNlHlpP ...................t........pss.hhllMV.GLPARG......Koal......u..pK...LsR..YLs.................WlG..h.o.+..l.....FN..lG.p..Y.R.R.pts.tt..............................................ps..h...p...F.F.p.ss..Nt..c.uh...p.lR...cphAhtslc..........Dlhp...al............p....p............p............s.....G..........p...........lu..l.................a......DATNoT.+cRRphlhp.h.h.p.p..p.s........................................h..c..shFlESlCsD....pllttNI.h.ph.p.h.s.s.........PD....................Yhs......h.s...cp...AhpDFhpRIppYct.....s.....Ypsl...s..................-.....p.....p..l.............s.....aIKhhs...sGpp.h.hl.s.p.l.p..salpo+lVaYLhNl+lp........................................................................ 0 187 351 557 +1048 PF00393 6PGD 6-phosphogluconate dehydrogenase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This family represents the C-terminal all-alpha domain of 6-phosphogluconate dehydrogenase.\ \ The domain contains two structural repeats of 5 helices each. 20.40 20.40 20.80 20.60 19.70 20.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.91 0.70 -5.20 58 5458 2012-10-02 19:36:47 2003-04-07 12:59:11 14 19 3935 36 1173 4118 977 231.40 44 59.81 CHANGED GHaVKMVHNGIEYGDMQLIuEAYplLKpsLGLoscEluclFpcWN.pG-LcSYLIEITucILpt+D.....ps.G..p......sLVDhILDpAGQKGTG+WTs.sAL-lGlPlshIsEAVhARhLSuhKcERltAS+.hLsG...sp.....sthss-+ppal-sl+pALYsSKIsSYAQGFtllptASc-asWsLshupIApIWRGGCIIRutF.LscIpcAappsssLsNLLlssaFpptlpptpsuaRclVu.hAsptGlPlPuhSoALuYaDuYRsspLP.ANLlQAQRDaFGAHTYcRh..........D+pGs..FHTpWs ................................GHaVKMVHNGI.EYGDMQLIuE..uYplh..+..p..s....h..s..h......s.....sc.......-.......huphFpcWN...pG.-....L.s.S.aLl-IT...tclh.ptcD.......pp..u..p.....................ll.-h.I......h.....D.....p.....A.....u......p.....KGTGKWTs.sAL-lGhPlslIs.EuVFARhlSu.h.K.p.p.Rht..A.sp..h.h.t.....s................scctthl.cpl+pALahuKlhuYAQGF..hptAu.pp...tWs.lshuplA.laR.sGCIIRu.......F.Ltp.I.pp.Aa.tp....s....s....t.l.................s.Llhs.hFtth..h..tp.h........tuhR.....plls.h............u............hp.G...........lP...........hPshu.uu.l.s.a.aD.uhp.st.L.P..AsLlQ.......AQRDaFGuHsaphh.........s....t.........hH...W............................................................... 0 355 715 977 +1049 PF02495 7kD_coat 7kD viral coat protein Mian N, Bateman A anon Pfam-B_2886 (release 5.4) Family This family consists of a 7kD coat protein from carlavirus and potexvirus [1]. 21.10 21.10 21.40 23.50 21.00 20.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.89 0.72 -4.52 61 349 2009-09-10 16:31:06 2003-04-07 12:59:11 12 1 114 0 1 324 0 58.80 27 70.57 CHANGED h....hhhhlullssllslhhl.....psssssChllITGESlplpu..C..hsschlchl.psL+shst .......h..lhhhlslls.sllslhhl........ssppspCpllIoGculhIss..C.t.os-hlchl..shpPhp.s........ 0 1 1 1 +1050 PF02294 7kD_DNA_binding 7kD DNA-binding domain Mian N, Bateman A anon Pfam-B_8148 (release 5.2) Domain This family contains members of the hyper-thermophilic archaebacterium 7kD DNA-binding/endoribonuclease P2 family. There are five 7kD DNA-binding proteins, 7a-7e, found as monomers in the cell. Protein 7e shows the tightest DNA-binding ability. 25.00 25.00 119.50 119.30 20.90 19.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.11 0.72 -4.29 2 33 2012-10-02 16:56:36 2003-04-07 12:59:11 13 1 18 30 10 32 0 61.50 90 96.62 CHANGED ApV+FKYKGEEKpVDhSKIKKVWRVGKMlSFTYD-.sGKTGRGAVSEKDAPKELhpMLt+tc s.TVKFKYKGEEKEVDhSKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQMLEKQK. 0 2 2 9 +1051 PF00001 7tm_1 7 transmembrane receptor (rhodopsin family) Sonnhammer ELL anon Prosite Family This family contains, amongst other G-protein-coupled receptors (GCPRs), members of the opsin family, which have been considered to be typical members of the rhodopsin superfamily. They share several motifs, mainly the seven transmembrane helices, GCPRs of the rhodopsin superfamily. All opsins bind a chromophore, such as 11-cis-retinal. The function of most opsins other than the photoisomerases is split into two steps: light absorption and G-protein activation. Photoisomerases, on the other hand, are not coupled to G-proteins - they are thought to generate and supply the chromophore that is used by visual opsins [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.81 0.70 -5.25 64 42866 2012-10-03 04:04:29 2003-04-07 12:59:11 16 290 7772 141 17156 63287 11 225.60 18 72.53 CHANGED GNslVlhslhtp+ch+....sssshalhsLAluDLlhslsls.hshhhhhh.........pWshGp...hhCclhshhhhhshhuSlhhLsslolDRYlAIscPhphtthps...tpuhhhhhhlWlhuhllulP.hhhhttpt.pps.............hhChhphsp.............hhlhhtlhsFhl.PlhlhhhsYhhIhppltpptt.................................................................................................................................................ppcp+ss+hlhsllslFhlCWhPatlhhhltshpphs............hthhh.lshhluhsssslNPllY ...................................................................................................................................................................................................................................................................................................................Nh.h.s..l....h....h.......h...h...t........p.....p......p..h+...................ss......h...p...h....h...l......h...s.....L....A.h.....u..D.....h.......h......h...............s...h............h.....h..................h........h.....h........h........hh................................a.........h......u.............h.h...C......p.....h.........h.......h..............h.......h............h.......h........h........................h...........h.......s......o.....l..........h......................l..s.h..l....u......h......-................R.............a...............h.........s................l......s......p.......s................h........p..........h................h.........t.............h.....h..............p...........................p..............t...............s.......h......h.......h......h.......h......h........h........W..............h......h.........u......h.........h..........h......s......h......s.........h........h.........h.....................h...........t...............................................C.h........h.................................................hh.h..h..h...h...h.....h.....h.....a.....h.....l.......P.....h......h....l.......h......h......h......s.....Y...h...h...l...h...h...t....l...t.....p...t..t...t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p..t...c....t....c......h...h....p....h........h....h............h....h....l...............h....h.......a....h...l....s........a....h...P.....h....h.....h......h........h.....h...........h.h..........................................................h..h......h.........h....h....h...h........s...s...hhsPhlY............................................................................................................................................................................................................................................................................................................................................................................................ 0 5181 6677 11247 +1052 PF00002 7tm_2 7 transmembrane receptor (Secretin family) Sonnhammer ELL anon Prosite Family This family is known as Family B, the secretin-receptor family or family 2 of the G-protein-coupled receptors (GCPRs).They have been described in many animal species, but not in plants, fungi or prokaryotes. Three distinct sub-families are recognised. Subfamily B1 contains classical hormone receptors, such as receptors for secretin and glucagon, that are all involved in cAMP-mediated signalling pathways. Subfamily B2 contains receptors with long extracellular N-termini, such as the leukocyte cell-surface antigen CD97 (Swiss:P48960); calcium-independent receptors for latrotoxin (such as Swiss:O94910), and brain-specific angiogenesis inhibitors (such as Swiss:O14514) amongst others. Subfamily B3 includes Methuselah and other Drosophila proteins (e.g. Swiss:P83119). Other than the typical seven-transmembrane region, characteristic structural features include an amino-terminal extracellular domain involved in ligand binding, and an intracellular loop (IC3) required for specific G-protein coupling [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.78 0.70 -5.29 31 4718 2012-10-03 04:04:29 2003-04-07 12:59:11 19 348 246 3 2544 4248 5 225.10 22 28.73 CHANGED hhslpllhhlGaulS.LsuLhlAlhlhshF.RpL+CsR.NhIHhNLhhoalLpshshhlt.tthlhspht....p...................................h....C+lssshhpYhhhsNFhWhLlEGlYLasLlshs.....ahs-.+thhhhahllGWGhPslhlssWshs+...............hhats............spC.Whsp..ptshh....WlhpGPlhhslllNhllFlpllplLhpKlp......ssphucpcptph.................................hs+ooLlLlPLLGlpall..hhhhsss......phlhhahphhlsSF....QGFhV .......................................................................h..hthl.hlG.h.sl...S...lh...s....Lh...h..sl.h...h......h......h......h..............h.......+................p...............l.................p............s.....p.....c......s......h.....l....p........h....s.L....h.....h.......u.......h....h..l.......t....t..h....h..h......l...lt........h....h..tt......................................................................................................................C.p...hhs.hh.h.aa.h.h.....lu.s..ah.....W..h...h..l..Eu...l..h.L...a.....h.....h...l.shs...................h..s.p....p.....t.h.h..h.....h.....a....h....h..l.....G.........W....G.............h..Ph.l..lls..l...h.sshp.......................tha..t..s.......................................p..t.....C....W..hp........pt...h..hh.............a....h...h...h..u.P...h...h....h..h.l.h...........l.N....h.l..h...h.l.h..h.....l.h..h..l....h...p.php..........tt.p.h.t...p.t.p.p.hh..............................................................................hhh.s.s..hh....L..h.....L...h..G.....l.....pahh..........h.h.....h....h.......tp..........t...hh....hh.hh.hh...hsuh....Q..Ghhl.............................................................................................................................. 0 852 1057 1672 +1054 PF02949 7tm_6 7tm Odorant receptor Bateman A anon Pfam-B_436 (release 6.4) Family This family is composed of 7 transmembrane receptors, that are probably drosophila odorant receptors. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -11.67 0.70 -5.57 36 3836 2012-10-01 21:54:26 2003-04-07 12:59:11 15 33 114 0 2186 4221 0 248.70 14 75.32 CHANGED shsphhpslphhssshsshhKhhhhhhpppchtclh......ptLppl.tcsh.ppp-phhhtphhp...ptp....hlhhhhhhshshhshhsh.....lshlhttpt..............sh.hhh.......h.hsap.....sph.aah.thhhphhshshsshttlssDshhhhhhhhlshphclLpt+lcpltts................................................................................ptsppp.....ppLtpslppHppll..................chsptlpshhshshhspFhssuhllshshhplhhhss...shphlhh...lhahhshhhQlFhhCahuspltppupplspAla.pss.Whs......tshch++hllhhht+sQ..+shplpAss.hhslsLssahslhphua .....................................................................................................................h..........................p.h....hh................h......h................t...h.............................................................................hh................hhhh......h.h...h..h..h...h...h....h.h.......h..h.h.h...................h....h..h...............................h.....h..h................................h...h...............t........hhh....hhhh.p.h.h...h.h.....h...hshh.......h...........sh.s...s.h........hh.h...h...h.....h....h.t...h..p...hph..l..t....p.l.pp.h.t..........................................................................................t..t..................ttl...h.t....h...lphHttlh................................................................ph.hp..h....p....t..h.h.t....h....h..hp.h.h...h....h..h.h.l...s..hh...h.............h...h..h...h........................h...h....h...................h...h..h...h...h...h...h....h...h...p.h..h....h.hsh..h...up....l.................t............st....pl.hts..h........a...p........W....h.t..............hs......p.....pp.......lh.hh.h.psp....p..sh...hpsh..hh....h..s..h..hhth......................................................... 0 582 712 1980 +1055 PF00207 A2M Alpha-2-macroglobulin family Finn RD, Sammut SJ anon Prosite Family This family includes the C-terminal region of the alpha-2-macroglobulin family. 20.40 20.40 20.40 20.40 20.10 20.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.07 0.72 -4.41 131 2819 2012-10-03 02:52:13 2003-04-07 12:59:11 17 148 1230 58 881 2534 98 88.80 26 5.98 CHANGED oWlWpsh.ls........t.s...GptslshplPDoIT.oWphpAhulosst.........................GlGlups...plpsh+sFFlslpLPYSlhRGEplplpssla.NY.hsps..lp.lpVpl ............................................hha...hhs.............tp....Gpspl.shh........lPDo...lT...pWc.lh.A.....h.u.h.s.s.s.......................................................s.hGhscs........plpshp...shhlphshPh..ltpG-p.hplthslh.Nh...h.sp.s...p.hpl............................................ 0 218 372 622 +1056 PF01835 A2M_N MG2 domain Bateman A anon Pubmed:16177781 Domain This is the MG2 (macroglobulin) domain of alpha-2-macroglobulin [1]. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.41 0.72 -3.75 175 2926 2012-10-03 16:25:20 2003-04-07 12:59:11 14 140 1451 60 921 2616 150 95.90 22 6.25 CHANGED psalhTDRslY+PGcsVph+sl..shs.................t............slp...lplhcP.sGppl.pphhh............Ghhphs.aslspss.hGpaplpsph.........................tstthsstpFpV ..............p.halhTD+slY+PG-s.V.phpsl..hhchc.....................................hpsh.ss....pslp...lp.l.....h.cP...sGp.h.l.pph.s....................t..pp...Ghh..phs..at..L..sps..s....s..tG....taplpsph...............................t.sphhphpFpV............................................ 0 235 407 669 +1057 PF01356 A_amylase_inhib Alpha amylase inhibitor Bateman A anon SCOP Domain \N 25.00 25.00 25.00 58.00 23.50 23.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.41 0.72 -4.18 6 15 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 13 7 1 19 0 65.90 51 70.74 CHANGED upPAPACVchapSWRYTpVsNsCupsVSVTVsYpDGspuPCtslsPGslsTFu.GYGTpuNaVpulshC .t.pPAPACVchapSWRYTsVsNsCussVoVTVsYpDGppuPCRslsPGshsTFu.GYGTpGNashulthC 0 0 1 1 +1058 PF02137 A_deamin Adenosine-deaminase (editase) domain Mian N, Bateman A, Iyer LM, Zhang D, Aravind L anon IPR002466 Family Adenosine deaminases acting on RNA (ADARs) can deaminate adenosine to form inosine. In long double-stranded RNA, this process is non-specific; it occurs site-specifically in RNA transcripts. The former is important in defence against viruses, whereas the latter may affect splicing or untranslated regions. They are primarily nuclear proteins, but a longer isoform of ADAR1 is found predominantly in the cytoplasm. ADARs are derived from the Tad1-like tRNA deaminases that are present across eukaryotes. These in turn belong to the nucleotide/nucleic acid deaminase superfamily and are characterized by a distinct insert between the two conserved cysteines that are involved in binding zinc [2]. 19.80 19.80 21.20 19.90 18.90 18.60 hmmbuild -o /dev/null --hand HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.43 0.70 -5.12 59 781 2012-10-02 00:10:39 2003-04-07 12:59:11 13 25 275 2 446 770 16 308.00 27 55.17 CHANGED oluTGsKslss..pphuppGph....lpDsHAEllARRuhhRaLhpplthhhpt......t.tpsl....a..............h.p..tssth........acL+sslphaLYlSpsP.CGDAsl...tp.............................................................................................................................................sstthlsscsss....tsh.shpGll.htsspu...-phhoh..SCSDKls+...........WsllGlQGuLLuphl..cPlYlsolllu..t.........hpts......phpRAht..pRh.....................tth.tsaphpp.Phht.......t.......................................................................tspsppptpsss.............................hSlsWsh...............sshplplsss....ppGp.........sshtstSclsKtshaptatpl......ht............ppphts............sYt-hKpts...ppYppsKpplhpt......................tshssW.lpKs.- ...........................................................uluTGs+Clst.......phh.s....pG......h..............lpDsHAEllARRuh...h.R.aL....hpplthhh...st................ppol..a...........................................................t.....t..t..s..............................................................apL+.pslphahYh.S..psP.C...GDAphhs.hp...............................................................................................................................................................................................................................................ssttslss.p.s.s...s..............sh..ph.Gll.............p.u.....-.....p..hhoh..SC..SDKls+...........WsV...lGl...QG...uLL..ophl...pP.......lYlpol..llG........................hptt.......................................thpRA..h..h....pRh...........................................t.htth...athpp...shht...............................................................................................................tsptpt.hpsss.............................hS.ls.Wsh...................................ss.pl.pl..hss..ppGp............................tshtt.SplsKtthhthahp.l..................ht.h.....................ttth.t................................sYt...phKths..............ttYpt.s..pp.hhp........................shssW.htps.......................................................................................................................................... 1 123 188 317 +1059 PF00324 AA_permease aa_permeases; Amino acid permease Finn RD, Bateman A anon Prosite Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 478 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.47 0.70 -6.16 26 17705 2012-10-03 01:44:59 2003-04-07 12:59:11 16 47 2920 0 4452 25995 1856 417.10 27 85.04 CHANGED clhhhuluuslG..sGLalusuhsltpuGssus..llua.llsuhhhhhhhhuluEhushhP.....puGuahsaus+hlus.phGhssGhh......ahh.hhhslsh-lsssshhlpaW...hscls.hhsh..................sslhhsllhhlshhul+hauchEah..hshlKllsllsFlIlu...hhhhtsstssps.............t.hhtsus.....hsss.........htshhushshshFuas.GhEhluhsAuEscsPpKslP+uhppslhplslhYl....................huhhhlshhlshsssshhst...s...........................ushshhhphtslsshssllshslLsusLSuusuulasuoRhlhuLuccshhP.....phhpths+pusPhtulhlohhhuhlsh........lhsthsss..hlhsahlshsulsshhsahhhhhshh..taRpshthpstthstlshchhhhshhshhslhhlhhlhlhhshhhh......hs.......hssts.htsalshhlhllhhhshthhh+phhsphhh ................................................................................................................................HlphIA..lG.G...s..I.......G...sG.L....F....l......G..u..u...t.........s.........l...p..............A......G................P...us............llu.Y..hl.s...G...h.........h............h......a......h...l..M...p.u.L..G............E..h..s......h.....t.P............ssG........S........F......s................s....a.......A................p......c......h..............l...........u......s......h..............h....G..........a....h..s.G.Ws.................................Y.a.h....t....a............h......l.....s.........s....h.........s........-....l......s....A......s.......u.....h.......h...h.......p......a.....W.............h.....P....s....l....P......t.W.l..h..................................................................s.h.h.h..l...h...l....l....h.....s....l....N.........l....h....u....V....+...h....a...G....E....h........E.....F..W........F...u...h....l....K...V....l....s....I..l....s.....h...I.lhG............h...h..h.h...h.s.s..tssst......................................................ht.h..h.p..p..Gu....................................h....s.....s..G.......................................................................h......h......u....h......h....h....s....h.....h...h...s....h..F........u....a.........t......G.....h....E...l..l............u.........l....s....A..........u.E....s..c...............s.........P....p......+................s...........lP...+..A...........l...p...p...l...h...h...R....I....h...l.....F..Y.l.........................................h..u...l....h...l...........l.......h...h...l..h........P....a....s....p...h....s....h....t..s................................................................................................S..P..F..l...h...h....h....p....t.......h.......G.............l...........s.....h....s.......u......s......l........h.....N....h....V.....l.......L....o.....u....s....l.........S..........u....s....N.....S.........u....l....a.....u.......s....u....R...h...L....a.u.L..u....p...p....G.........A..P...............................ph..h...s.....+..h.....s.....+.................p.........u....l..........P.....h.....h.....u.....l....l....h.....o........s....h..h....s..h...l.u.h..........................................................................l...h..s.h.h.sst...........ps..a..h...h.....l....h....s.......h...u....s..........h.....s.......h...l....h.....s......W...........h.......h..........I...h...l...u..p.h..............p.a...............R............+.............t...................................................p......s.............t..................h.........t.................p....h..................a........+.........h...............h.......h........s..........h.......s..........s..........h.................h.......s.......l.......h.........h......h...h.....h...l........l...l......h...h......h...h..t.............................................ht...........................................................................h............h....h...........h..h...h..h...h..h.......h....................hh...................................................................................................................................................... 0 1009 2148 3513 +1060 PF03306 AAL_decarboxy Alpha-acetolactate decarboxylase Mifsud W, Sammut SJ anon Pfam-B_3661 (release 6.5) Family \N 25.00 25.00 25.70 25.60 21.30 21.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.12 0.70 -5.30 71 1480 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1246 4 233 763 52 214.90 42 90.92 CHANGED laQhSTluALhsGlY-Gsholu-LLcHGDaGlGTFspLDGEhIhlDGpsYQhcuDG...s.sp.ls.ssp.psPFAslThFpsc.hphphtpshshpplpphlpphhs.upNlFhAl+lcGpFppl+sRoVsp.QpcPYsshs-sscp.QshaphpslpGTllGFaoPpahp.GlsVsGaHlHFlo-D+phGGHlLDapl.psuplplshhsphplcLP.pstsF....hpuclshps ..........................LaQauTLuuLhuGLhcGTholsELL.cHGD.h.GlGThsulDGE.lIhL.DGcsY..p..sp..u..cG...p.hh.c.l....p....s.-............p...hsPaAslT.Fps-...hpa...p..................p..p.p.h.o.pcplpt....pIcph....h.....uc....NL....FtAlKI.pGpFp+..h+lRhhPp...QptP.Ys.phh-succ..QPEaptps.lpGolVGF.aTP-hac.Gl..us..AG..aHlHFlsD....D+saGGHVhDFtl...ccshl-..ltshsphc.+hPhpspsFhpAclshcs................................ 0 52 122 182 +1061 PF04611 AalphaY_MDB Mating type protein A alpha Y mating type dependent binding region Kerrison ND anon DOMO:DM04516; Family This region is important for the mating type dependent binding of Y protein to the A alpha Z protein of another mating type in Schizophyllum commune [1]. 25.00 25.00 57.40 234.00 20.90 19.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.31 5 6 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 2 0 1 6 0 146.80 51 15.97 CHANGED MsDpLusL+uISAsAKuMsulAtSRGApssPsPhssTsV+..FDPLPoPsLDuLRoRLp-A+LPPKulKSALuAYEcACuRWRp-L-EuFcpTA+SVSP+NLHLLssLRaRLYTcQVEKWtsQVLQVPE+W+AEMEKQRAHIsATMGPu Ms-hLusLpuISAsAKsMhulAtSRGApss.pPhssTss+..FDsLPsPsLDhlRoRLp-A+LPPKulKuALuAYEpACuRW+p-L-EuFcpTA+SlSP+NhHLLspLRhRLYscQVpKWhhQVLQVPE+W+AEMEKQRAHIsATMGP.s 0 1 1 1 +1062 PF03417 AAT Peptidase_C45; Acyl-coenzyme A:6-aminopenicillanic acid acyl-transferase Bateman A anon MEROPS Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.28 0.70 -4.90 18 964 2012-10-03 21:14:07 2003-04-07 12:59:11 11 16 794 14 316 1078 61 218.40 21 58.87 CHANGED hpscss.....hlu+NhD..htPthhssphhlhts..............sGasslsh...supl..Gps.GhNcpGLshsh........Nhhph+ph.ssGhsphhltRhlL-.ssols-AlchLp-h....PctuuhsalhlDpstphshlElsssstsh......sl+css..thhhpTNHh..........................ppshppts......pphhcsShpRhp+hpphhspttss...ppshchhsDtpst.hs.....pphsshtsTla...oulaphtstphphslupsst ..........................................................................hp....t.....hhs+N..aD........hps..t...h...h..s...t...t.h...hh.hh..................................suh.s..t..l..s.................su.tl....Gp...h...t.GhNcpGLsh..sh...................N.h...h..p.....t....c..p...s..........s..s.....G.......h......s..t..h...h....l...h..Rh.....lL-.spslsEA.lp....lL...cch..............sttu..u.......h.sh.hl..h...D...p.s.....t...s...t.sh..l.E...hssp.sh..........................slt.ss.......t.hhp...T.NH.a......................................................t....h....h...p...p....s......................p.h...hp..p...S..h...p..Rh.......t.+h...tp...hhs.pp.sh.............ttsh.......p.......h.h.p.s.t...t.....hs............................h...p.....p.....h.............s....T..lt.............oshap.p..phph.hh.st...h................................................................................................................................................ 0 115 199 275 +1063 PF02496 ABA_WDS ABA/WDS induced protein Mian N, Bateman A anon Pfam-B_2496 (release 5.4) Family This is a family of plant proteins induced by water deficit stress (WDS) [1], or abscisic acid (ABA) stress and ripening [2]. 25.00 25.00 28.10 28.10 23.00 19.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.93 0.72 -3.81 17 393 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 64 0 39 374 0 70.70 70 52.27 CHANGED .cY+KEEKHHKHhE+lGcLGAsAAGAaALaEKHcAKKDPEHAH+HKIEEElAAAAAlGAGGaAFHEHHEKKEAKcEpcEu ..........-YcKE.KHHKHLEclGcLGAVAAGAaALHEKHcAKK.DPEHAH+HKIEEEIAAsAAVGuGGaAFHEHHpKK-A+cctct............... 0 1 18 30 +1064 PF00950 ABC-3 ABC 3 transport family Bateman A anon Pfam-B_1591 (release 2.1) Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.50 0.70 -5.09 16 7614 2012-10-02 17:14:55 2003-04-07 12:59:11 12 12 3931 0 1534 7023 2698 251.50 28 90.95 CHANGED pacahppAllsulllulssulLGsallL++hSLhGDulSHusLsGVuluahLuls.....hhhGAhhhulluAlshsalcppo+l+pDsslGIlhushhulGllllolhpt....spssLtpaLFGslLulsppDlh.htllssllLlllllha+chlhhoFD.shApshGlslphhphhLhhLlulslVsulpsVGsILVhAhLlsPuusAhhhs+shcphhllAsllGhloshsGlhlSahhs.suoGssIVlltshlFlluhhht .................................................................................................hpFh.pAh.ls.ulhl..u..l.s.s.u.h.l.G.s..a..l..l..lRp...h...ohh....G.D....ul...S.H..u....sL........s....G.....l....A...l...u..h...h......l...u.hs..........................hl..u....u.....h...l.....h...u...l.....l...s...A....l....h.......l....t....h......l.......p.......p.....p....s.......p....h...p.....p....D.......s.....s.....l......G.....l....l...h.....s.....s...h...h.....u...l......G.....l.....l...l.....l.o.h.hs...................sss.s.L.......s...h...L...F...G..........s..l...................L............u............l............s.........................p.............D............l............h...........h............l...........h...l....lu...s.............l......l.........l...h.....l..l.h.lh...a...+...p....Lh.hhoF...D.ph...Ap..s.t.G.l.s....s....p..hl.ch.l...hhhllulslssuhpsVG...slLlsuLL...l...hP....u.A....s.A.h.h.l....s..c..s...h..p.....p....hhh.l..us.hl.u....h....l....u.s....h....h......G........l....h........l...S.............a....h........h.............s......h...........s.....sG.....ss.I.Vl.h..ts...h.lFllshh.t................................................................. 0 472 958 1282 +1065 PF01061 ABC2_membrane ABC-2 type transporter Finn RD, Bateman A anon Pfam-B_865 (release 3.0) & Pfam-B_31 (release 15.0) Family \N 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.52 0.70 -5.17 298 15925 2012-10-03 10:13:34 2003-04-07 12:59:11 19 140 4416 0 7264 20686 4522 204.80 15 46.78 CHANGED pphtslhtRphh..phh+ss.ht.h........hphhps.llhsllhGhlaht.............t....thh.th.................huhlhhslhhhsh.shssh.shhhpppshh.h+phstshhphhshhluph...lsp.lshshlhshlhhh.lsah....h...h....uhpht..........h.....hhhhhhhhlhhhhhsu.huhhluuhs.shp.sshls.shlhhshhhhuGhhhshspl....s.....ahp..a.hhhlsPhsashpu.hhhs.h ..............................................................................................................................h..hhhlhh+phh....thh+.s......h...h..h...................ht.hlt..s...h..l....h....h.l..l...h.....u.h..l..a.h.t.............h..........s..t.thh...h............................................................................................................................s.uh.lhh.h.h..h..h...h....s....h........s....h...h........s.......h........................h........h..h.....h.....t...p....p.......shh....h.......+............h...s....t...........s...h.........s....h.....h.s....h.....h.....h..u.ph..................l.st....h...s.....h.........s...........l..l...h...s...hl.h...h...h...l...s..hh...........................h.....h...........................u.h.pht.............................th...........hhh.hh..h.h....h...l...h.s.h....h.......hss....hu.......h.h...l....u.s..........h...s........s..h.....p....s...s..s..h........l..s....s....h.l....h..h....h..........h...h..........h...h.....u...G.....h..h.h...s......h...s.........th......P....................h...h.p.........h..l.h.....h.h.sP.htahhpu.ht.s................................................................ 0 2247 4566 6342 +1066 PF00664 ABC_membrane ABC transporter transmembrane region Bateman A anon Pfam-B_2 (release 2.1) Family This family represents a unit of six transmembrane helices. Many members of the ABC transporter family (Pfam:PF00005) have two such regions. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.40 0.70 -4.99 70 51056 2012-10-02 13:23:42 2003-04-07 12:59:11 18 190 5230 28 16541 42384 7093 265.40 13 43.23 CHANGED lhhshlhthlsshhthh...hshhhuhhhsh.............hsssstp....................psshhhhhhh......................hlhhhhh..hshhhhsh.has..htthst+lptplhpp.lhc.phhp..h..s......pppssGplssRlosDssplpssl.stplhhhh.tslhhhlsshllh...hhh..uhpLsLlhl.hhslhhhhshhh.sphhpphpppppps.........hsp.hss....hhpEslsulcTVpuastppthhpcappthpp.hpptshptshhsshhhs.htphlhhh.h.shshhaGs..hl...shtsths..ssplh.sshthhthhttsl .................................................................................................................................h....hhhhh.h....s.h.hthh.........s.h...h....h..t.h..h.ls.......................h.tt.t..................................................................t..h.h..h..h..hhh....................................................................h.h..h..h...h....h..h.......t.s...h.......h...h.....h...h...h...h..hh...................h.t.t....h....u....h....p.....h....t..........t......p....l....+..............p.p.....l....h...p....+..l..hp.........hs.h....s.hh..........................pp.p.s....s......G...........p....l....h.....s..........+.......h....s.................s..............D..h....p....p....l.....p.....p................h....h...s.........p..h....l................t....h.................h......t....s..h....h..............h..h..l.....s.......s...l..h..h.h................h.h.h........s..h..t...l....s....l...............l..h...l...h...h...h..s....l....h...h...........h...h....h..........h.........h....h.....t......p..........t...........h.......p...p...h...t.......p.......p.h....p...p.t.....................................................hup...hs..s...............h.h.p...E..s....l....p....G...h..p.....s......l.......+...s...........a.....s......t.........p........p........p...........h....p.......p....h.....p....p..........t................p.......p....h.......h....p...t...........t......h............p.......h........t........t.....h....t.............s.....h..h.....t..s.....h....h..t.........h...l......h........h.......s.....h....s...h.....h.h..h..h....uu..........hh..............h.h.t.s......p.....h...s........hG..t..h...h....s.h..h...h.h..h....h...................................................................................................................... 1 5333 9845 13729 +1067 PF00005 ABC_tran ABC transporter Sonnhammer ELL, Bateman A anon Prosite Domain ABC transporters for a large family of proteins responsible for translocation of a variety of compounds across biological membranes. ABC transporters are the largest family of proteins in many completely sequenced bacteria. ABC transporters are composed of two copies of this domain and two copies of a transmembrane domain Pfam:PF00664. These four domains may belong to a single polypeptide as in Swiss:P13569, or belong in different polypeptide chains. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null --hand HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -3.87 55 363409 2012-10-05 12:31:08 2003-04-07 12:59:11 22 730 6447 300 93265 270413 81790 147.80 27 41.67 CHANGED LpslshslptGchlullGtsGuGKSTLLphlsGhhpP.....spGpl.....................hhp.spsht...........hpthcppluhl.Qp..splhspho.lt-slh.s..........................................hhthtppttpsc........hpphlpplshht..........................hhcphl..............tpLSGGp+p..RlslA+slhppsplLlLDEPTs .....................................................................................................................................................................................................................................................................................................................................................................................................cslsh.p.l...p...p..G..c.h....l...u....l.....l...G....s..s............G.......u....G..K........S.....T....L.....l.........c.....h.....l....s......G....h...h....p..s........................s.s.....G.....p...l....................................................................................................................................................................................h..l...s....G....p....s...ltp....................................................h.t....t..h.....p.....p.....p.....l.......u....h.....l......h...Qp.........hs.L..h...s....p...h...s....lt-slthsh....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................thtttptppc....................h.p...p..h..l.....p...t......l....s..l.t.t....................................................................h....p....p...h......h........................ppL..S....G...G........p.......+......Q.............R........l..........s..l....A.....R.......A........L...........h............p............p...........P..........c..........l......l.......l.l.....D..EPT............................................................................................................................................................................................................................................................................................................................... 0 28903 57358 77150 +1068 PF00561 Abhydrolase_1 abhydrolase; alpha/beta hydrolase fold Bateman A anon MRC-LMB Genome group Domain This catalytic domain is found in a very wide range of enzymes. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.07 0.70 -4.80 48 9019 2012-10-03 11:45:05 2003-04-07 12:59:11 15 58 3408 175 3041 47284 15466 242.40 13 59.97 CHANGED acllshDhhGhGtSs..................................................hsthshpshsc.lptlhp.phshpp......hhllGaShGGhluhthstphsp....plpsll.hhss......................................................................................................................hhhtpshhtt............hh.thhhshhhshhhsshpthhstth................htshpp..hsphhptht...hpshhp....shthsshht.................htthhttshhthhpp......hpsPslllhuppDp...hhs.pst.t.htphhsp.......s..phhhhss..uHhs........hhppssph..sphlhs ..........................................................................................................................................................................................lhhh.s...t.G..s.sh..us....................................................................................................................tht..h....t....p....h....s.......h......p....c.......h......s.....t.........s........h........c......t.......l........h...........p......t........h......G.......h....cp................hsh....l.G.t.S.h..G.G....h.....h.....s....h....t....h....h.....s......p...h.Pp.................pl....p....s...h..l...h.h.us.h..............................................................................................................................................................................................................................h...h...t..t..t.....ht...............................h...t..h...........h...t...........h......h....t................h.....h......t...............t................h....s.............h......................................................................................................h............t..................................t..................ht...................hh...................................h..h....h.......................................................h................t...s.......................h.tt.....................................ht...h....h....h....h..h.....t..h.....t.....p..D.................h.h....s................t..........................t......................t......h.........t...............................h........h.........t.......tH................................tt........................................................................................................................................................................................................................................................................................................................................... 0 897 1658 2474 +1069 PF03806 ABG_transport AbgT putative transporter family TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 19.60 19.60 19.60 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.73 0.70 -6.50 9 1493 2012-10-02 15:12:49 2003-04-07 12:59:11 8 3 1103 0 203 1520 191 425.00 40 96.44 CHANGED RFLshlE+lGNtLPcPlhLFhhhhllLhVsSuIhSshuhSsssPhs.Gst...........hlplhsLLos-GLthlhsshlcNFouFsPLGlVLVsMLGlGlAE+SGLluALh+shlppsP++lloshllhlGllSpsAuDAuYVVL.PLuAhlFhulGRHPLAGLAAAaAGVSGGFoANllluthDsLLuGhTppAAphIDPsa..ssNPhsNWYFhsASshVls.luhaVT-KlVEPRLGsapss.tppptspht...lTstE+KGL+aAGluhllhlALh.shsllPtpuhLRs.cssslt.soPFhpulVshIhllFllPGlVYGhlstol+sp+DVsshMu-uMuoMGsYIVlsFFAAQFVAhFsWSphG.llAVtGAchLcshsLsG.sLlluhIlluuhlNLhIGSASAKWulhAPIFVPMhMLlGauPEhoQAAYRlGDSlTNlITPhMsYFsLlLshsp+Yc.chGlGTLlShMLPYSlsFhlsWhlhhhlWhh.LGlPlGPGushtY .....................................................................................................................................................................................................................................................................................hLshlEhhGN.tlPcPhhLFhhhhlhlhlhohlh..uh..hs.h.s..sh.pP.............t..t......................................l...hl...h...s..L..L..ss..-....G....l....p..h....hlss.hlpNFs.sFs..P.LG.hl.......LshhlG..l.G...lA..EcoGhlssh...hhthl.t...t..s....s....t...p....h..lo....hl...l...h..h.ulh....u.p....h..A...u.D..u.u.....h....V.....l.....l...PluAhlFhuhGRHPlAGlhsAaAuluuGas...ANl.l.lss.hDsLLsGhop......t......A...A.......p...h..l...s..s...s...h......pl..s..s...h.sNaaFhhsSshllshlshhl.T...-Kl....l...c....P+..L............u.p........h.....p......s..........s...................................t......p........p.........h................p.................................lT.t.Ep+uL+h.A.s.l.sh.ll.h.l......sl.l.hhh.h.....lP..................p...s.uh.LR..s...st..s...........t.............h..l............h...suPhh.p.uIl.slIhlhFhlsGlsYGhsstph+....sp....p....Dlh.p...................hMsc....s....h....p.s..M..u.s.a.I.V....hsF..hsAQFlA.h.F......s.a.S..N..h.G..t..hhA.l.....tG..Ac..hL...c.s.......s..hsG......s..hhlu......h.l.ll.s.u.h.lNhhluSuSApWulhAPIFVPMhMhl...G..hpPthsQhhaRlGDSsoN.l....oP....h..sah..sLll.s.a....h..p.+Y.....p..............c.............p....ht..lG.T.lh..Sh.hlP.Yol.hh.hl.sWhlhl.l..hWah..lGlPlGPGs....h............................................................................................. 1 57 123 167 +1070 PF02230 Abhydrolase_2 abhydrolase_2; Phospholipase/Carboxylesterase Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_1382 (release 5.2) Domain This family consists of both phospholipases [1] and carboxylesterases with broad substrate specificity, and is structurally related to alpha/beta hydrolases Pfam:PF00561 [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.31 0.70 -4.90 12 2930 2012-10-03 11:45:05 2003-04-07 12:59:11 11 32 1915 13 1262 4011 3116 197.40 21 84.53 CHANGED hs..tlhsstp.tpsslIaLHGLGDsu.....cuhu.sht.thsh.pspaIhPpusp.hPlshstGht..uWF-lhshs.st..p.spssltputpplppLl-t-hcpG............lssscIllGGFSQGuhluLassLop.p...luGlluhSuhhshttph.pt.stss...phPlhpsHGppD.llPhthGttstchLpshht..psphpsapthuHu.sspphpsltpalpphl ....................................................................................................t............t....t..p..t.h..l.l.h.lHG....h....Gsss.................ph.h......s.........h..................p...........................h.......................h...............t............h......p.h....l..h..s..p..u.....s.......h........................s..........s...............s............t.....G.......h....................................t....W..........a..............s......l.........s.......h........s...........ps........................c.....t.........t............tl...p..t.......s.....h.t...t.l..t.p.....h..l..c.p....t....h...p.p.s..........................l.s.s.p..c.l....h....l.sGFS.Q..................Gu.......s.h.u.............lt..s...s...........h...........p...........t............s.........t..........................h....u..........u...........l.....l...u....h....S...........u..............h..................h...................s.................................................p...............h................t.........t......................................p..........s.......................t.............t......h.........s.....l......h.h.h.HG.p..t.Ds.l.l...s.h.t..h.u.t...t.s.t..p....h.Lpphuh.............plph...c...h..........h...sH......t....l..s.......p..php..hhpalt...h................................................................................................ 0 362 705 1015 +1071 PF02517 Abi CAAX protease self-immunity Bateman A, Bashton M anon Pfam-B_1073 (release 5.4) Family Members of this family are probably proteases (after a isoprenyl group is attached to the Cys residue in the C-terminal CAAX motif of a protein to attach it to the membrane, the AAX tripeptide being removed by one of the CAAX prenyl proteases). The family contains the Swiss:Q03530 CAAX prenyl protease. The proteins contain a highly conserved Glu-Glu motif at the amino end of the alignment. The alignment also contains two histidine residues that may be involved in zinc binding [1]. While they are involved in membrane anchoring of proteins in eukaryotes, little is known about their function in prokaryotes. In some known bacteriocin loci, Abi genes have been found downstream of bacteriocin structural genes where they are probably involved in self-immunity. Investigation of the bacteriocin-like loci in the Gram positive bacteria locus from Lactobacillus sakei 23K confirmed that the bacteriocin-like genes (sak23Kalphabeta) exhibited antimicrobial activity when expressed in a heterologous host and that the associated Abi gene (sak23Ki) conferred immunity against the cognate bacteriocin. Interestingly, the immunity genes from three similar systems conferred a high degree of cross-immunity against each other's bacteriocins, suggesting the recognition of a common receptor. Site-directed mutagenesis demonstrated that the conserved motifs constituting the putative proteolytic active site of the Abi proteins are essential for the immunity function of Sak23Ki - thus a new concept in self-immunity [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.29 0.72 -3.85 1333 12845 2012-10-01 21:07:14 2003-04-07 12:59:11 11 37 3734 0 2636 9172 1289 110.90 21 43.17 CHANGED hhhh.......hhh......hlh.......................................................hslh...ss.l..........sEEllFRG....h...l........................................................................................................................hs...t............................................................................ltph............hh..........................hhulllou.llFul.hH..............h...........................hh.....h..................................................................................................lhh..hhh....G...l.....hh......u.....h....lh...........h........po..t..s...............lh.sslhhHhhhNhh .................................................................................................................................................................................................................................................................................................................................................hh...hhhhlh......................hslh...ssl........sEEl...laRG....h....l.........................................................................................................................................................................................................................................................................h..s..t.............................................................................ltpt.........ht..........................hhulllou..llFu.l.hH...hst..............................h.h.....h..................................................................................................lhh...hh..h....G...l.....lh......u........h....la...........................h......po...p..s...............lh....ss.l.hhHhhhNh......................................................... 0 995 1783 2287 +1072 PF03992 ABM Antibiotic biosynthesis monooxygenase Yeats C anon Yeats C Domain This domain is found in monooxygenases involved in the biosynthesis of several antibiotics by Streptomyces species. It's occurrence as a repeat in Streptomyces coelicolor SCO1909 (Swiss:Q9X9W3) is suggestive that the other proteins function as multimers. There is also a conserved histidine which is likely to be an active site residue. 21.90 20.90 21.90 20.90 21.80 20.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.39 0.72 -3.94 114 8140 2012-10-02 00:20:33 2003-04-07 12:59:11 11 50 3128 85 2223 5804 1847 77.50 17 63.08 CHANGED hhhlhsphp.lpsspt.ppahph.hpphsp.......hhtspsGhlphplh.........p....s..hpsssp.ahlhphWcs...psu.hpsatpo.........spapphtpt .................................................hhhhsphp..lp.s.spt...pphhpt..h..p...p..h..ht........th..ppps...G.hlshplh.....................p...............s...hps.s..s.p..h..h...h...hp..t.....Wc.s.......ppu...h.p.s.a.t.p.o..........spapth...t.......................................... 0 615 1296 1802 +1073 PF00887 ACBP Acyl CoA binding protein Bateman A anon Pfam-B_864 (release 3.0) Domain \N 21.90 21.90 22.20 22.30 21.50 21.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.06 0.72 -4.32 101 1677 2009-01-15 18:05:59 2003-04-07 12:59:11 14 62 583 29 963 1596 101 85.20 32 33.64 CHANGED hpppF-pAsphl+pLsp........pPos-phLcLYuLaKQAT.G.css..ss+P..Ghh..............Dhhu+sKW-AWppl..cGh........S+--AhppYlchlpp....lhp......phu .................................ptpFptAs.ph.l.c.p..Lsp...................pPss.-phLcl..Y..uL..YKQ..ATh.G.sss.............ss+P.....Ghh.............................D.hh.u+s..KW.....-.....A.Wppl....puh.......................o.p--AhppYlphlpplht...t........................................ 0 305 482 749 +1074 PF03255 ACCA Acetyl co-enzyme A carboxylase carboxyltransferase alpha subunit Mifsud W anon Pfam-B_1935 (release 6.5) Family Acetyl co-enzyme A carboxylase carboxyltransferase is composed of an alpha and beta subunit. 21.30 21.30 21.40 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.66 0.71 -4.64 15 3540 2012-10-02 13:07:06 2003-04-07 12:59:11 9 7 3419 3 719 2292 2130 133.30 51 41.80 CHANGED hhL-FEKPlhELEpKIspLcclup....cschslssclppLcc+hpcLp+cIausLoPaQRlQlARHPcRPoTLDYIptlh--ahELHGDRshuDD.AlVGGlG+lcG+sVslIGHQKGRDTK-pltRNFGMssPcGYRKALRLMchA- ........................................................p..L-FEpPlh-lct+IppL.p...h.tp............p.........t....l....s........h..s...p.ElptLcp.+.........p..htc.....c...l........a......s..sL..ssWphsQl....ARHPpRPhTLDYlptl..F..s..-..Fh..E.....LHGDRs.au....DD.t......AIVGGl..A........+.l.s........G........p........P........VhVIGpQ.K.G............+cT..K...........-plpR........NFGMPp...PEGYRKALRLMchAE.............................. 1 237 478 613 +1075 PF00871 Acetate_kinase Acetokinase family Bateman A anon Pfam-B_1595 (release 2.1) Family This family includes acetate kinase, butyrate kinase and 2-methylpropanoate kinase. 22.00 22.00 22.00 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.10 0.70 -5.93 11 6003 2012-10-02 23:34:14 2003-04-07 12:59:11 12 14 3886 38 1171 4042 326 366.90 38 95.86 CHANGED KlLVlNsGSSSlKFpLh-s........ptpsLhpGLsEpIhlssuhhhhp..stcp.tthsshssHptAlphllssLpp...thhpphs-IsulGHRVVHGGphFosSslls--llcsI+DhhpLAPLHNPApl.GIctstclhPss...pVAVFDTuFHpThPptAYLYulPhshhpcaGlRRYGFHGTSHKYVup+suchLsKPh-pLslIsCHLGNGuSlsAl+sGKSlDTSMGhTPLEGLhMGTRSGsIDPAIlsalt-ppshSss-lsshLNKKSGlLGloGloSDhRsl.-shpc.ucccAphAlchasaRlt+aIGpYhAsLt.splDulVFTuGIGENuuhlR-hhlpsLchlGlclD.EpN.h..phGccslISossS+hplhllPTNEElhIAp ...........................................................................................................h.lLllNsGSSSlKatlh...chs......................s.t...p....h...l...h...p.Gl...s....E.....p....l...s...h...p..s...u...h....h...p...................h..p....................................s..............s.......c.......p..........t....t.............h.....s....h..s......s.....H....p....tA...l....p.h.l..l.p.tLhp..........t..h..h...p..t...h.....s......pl.s..ulGHRlVHGGch.F.s.posll.....s..-.....cV...lp.p.I.c.c..hs..s.L.......A..PLH.NPAslh..........GIcss....p...cl..h......P.s.ls.........pVAV.FDTuFHpTMPctAa.hYulPh.ch.Ycc.....hslRR..Y.GFHGTSHpYVu...pc.u.A...c.h...L.....s..+..............s.......h..................c...clplIssHLGNG.u.SlsAlc.....sG+SlD.TSMGhTPL-GlhM....GTR.SGDlD.P.ull.a....l....h..........p.........p.........t.............s...........h...o........h.c.............c.l..............p....c...lLNKcSGLLGl.o.G.ho.o.Dh...Rs.....l...c...p.............sh...............t...........p.....G.........c............c......c...........ApL.A.h-hasaRltKaIGuYs.A.sh................t.........lD..AllFTuGIGEN.ushlRph..lh.p.t.L.t....h..h......G.l.p...lD.c.tN..t.......hhG..p..p...t..h....I...s....p...t.s..o.p.....l.h.s.hVI.PTsEEhhIA............................................... 0 390 748 994 +1077 PF02550 AcetylCoA_hydro Acetyl-CoA_hydro; Acetyl-CoA hydrolase/transferase N-terminal domain Mian N, Bateman A anon COGs Domain This family contains several enzymes which take part in pathways involving acetyl-CoA. Acetyl-CoA hydrolase EC:3.1.2.1 (Swiss:P32316) catalyses the formation of acetate from acetyl-CoA, CoA transferase (CAT1) EC:2.8.3.- (Swiss:P38946) produces succinyl-CoA, and acetate-CoA transferase EC:2.8.3.8 (Swiss:Q59323) utilises acyl-CoA and acetate to form acetyl-CoA. 20.70 20.70 20.70 21.40 20.60 20.60 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.20 0.71 -4.55 5 2652 2012-10-04 00:26:15 2003-04-07 12:59:11 10 14 1717 21 626 2072 95 179.70 26 42.46 CHANGED hpcpYp+Klts.PEEAAsLlcsGpHIchGGhhuAuTApulPcYLA+R+sEhspl+ssohIDLtshphloAuPps-hhcpcsuhlaR.ss+pouchsssslNpGllcasshaLSElut..hhspGFss........IDlAlIpTTshDcHG..alNhG..Vo......ssthKuIlEl........AElVllVlssssPalNG.YDthIsl-+V..DYIltDsEhsVshlP ..............................................................................................................................t........hhhs..st.cA.s.th.l..pps...........hlshut....su.sts.ls.....A..ls......p..h..............t...h.t...........h.....h..p......l..h.h.h.t.............s....s....t..t.h..h..t........t.hh.t.h+........sa....s.s.......h.....R.c......t....Is.....p......G..p......s....as.s.h....+LSc.lsp......hh.c.p.s...hhs.....................lDVAllp.........s..ush.sccG.....ah..s.....G..lu............sp.tpp.s.ll.Eh................uth.h...l....l.s.t.s.hP+.p.hu.hs...h..h....h..t..h..s+l...s.h.lh...ss........................................................... 1 243 430 562 +1078 PF00797 Acetyltransf_2 Acetyltransf2; N-acetyltransferase Bateman A anon Pfam-B_575 (release 2.1) Family Arylamine N-acetyltransferase (NAT) is a cytosolic enzyme of approximately 30kDa. It facilitates the transfer of an acetyl group from Acetyl Coenzyme A on to a wide range of arylamine, N-hydroxyarylamines and hydrazines. Acetylation of these compounds generally results in inactivation. NAT is found in many species from Mycobacteria (M. tuberculosis, M. smegmatis etc) to man. It was the first enzyme to be observed to have polymorphic activity amongst human individuals. NAT is responsible for the inactivation of Isoniazid (a drug used to treat Tuberculosis) in humans. The NAT protein has also been shown to be involved in the breakdown of folic acid. 21.00 21.00 21.10 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.49 0.70 -4.79 33 2001 2012-10-10 12:56:15 2003-04-07 12:59:11 12 6 1363 31 435 1384 26 229.20 28 85.40 CHANGED sL-oLpplhttHhpulPFENLslhhG.....csl.sL-lpslacKlVpp+RGGaCaEhNtLhthsLpplGFclshLuupVhhstsst..ssshoHhlLhV.sl-Gc..salsDVGFGus..phhtPlcLtsstsQspshu.hFRl.scpss....tahLpphppppWh.................slYpFslpPpshpDapsts.hahpopPsS.hFspp..hlsuhtss-.GphsLhupphohp....psst..thphphlsssEltcsLpphFslsls...tthl ..............................l-sLptlhhtahtslPF..EN.L.s...lh.h.s...........................p.l..pl...s........psL..hcKl...l........h......p......p..R....G..G.aCaEhNslFthhLpp..lGF.sVphlhu.....pV.........h.....h.........s............s....s.............s.............t..............h........s........s...t.............s.......HhhLlV......s...........l.........c.sp...........palsDVGFGut...h.htPl..L.....t.s......s....h.....s................s....s....t....s.....paRl..h........ppss.................ha.hl..p....t.......t......p......p..p....p....Wp........................................shY..pF..s....h..p.......p.....p.D..a....t.........sp...hast..p..p..P..pS....hF.tpp.........hl..h.......s....p.....h..........h........s.........s.......G.............+..............h.....s...L.s.....s..t..........p......hshh..................tss....hpppp.h.s....tphhphLpp.F.ultl........................................................................................................................................................................... 0 133 234 337 +1079 PF00328 His_Phos_2 acid_phosphat; Acid_phosphat_A; Histidine phosphatase superfamily (branch 2) Finn RD, Griffiths-Jones SR, Rigden DJ anon Prosite Family The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue. Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches.The smaller branch 2 contains predominantly eukaryotic proteins. The catalytic functions in members include phytase, glucose-1-phosphatase and multiple inositol polyphosphate phosphatase. The in vivo roles of the mammalian acid phosphatases in branch 2 are not fully understood, although activity against lysophosphatidic acid and tyrosine-phosphorylated proteins has been demonstrated. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.16 0.70 -5.12 93 4055 2012-10-02 11:42:54 2003-04-07 12:59:11 17 54 1206 64 1934 3581 41 323.30 15 63.62 CHANGED pLchVpllhRHGsRsPppshstp..t................................................................................t..h.t.hta...................................................................................pashshGt...........LTst.GttpthphGchhR.p+Ys.................tLhsst........pplhlhoostsRsltSApshhtGhh.sps.............................pshshthlsc...t.sshhhhst.........tsCsthpp......................ppsspthpphppthtpshs........tclsph...................................su.....shshtcshshhshshhppt.................................phpshssl.hsp.....tchhphph..............................................................................................................................hpslpp......hath.............uhspt....htphhGsshhsplhpplppshsppp..........................................................................phphhhhhuHDoslhslhs.sL....Glhpt...........................st.sPauuplhhEha.p.......................ssp...t....hhV+lhhs ................................................................................................................................................................................................lp.l.hl.RHG.Rs.Ph.tt...................................................................................................................................................................................................................................................................................................................................ta.....................................................................................................................................................................th.s..h.thGt.........LT....s....t....Gt......p....hht.hG.phhR.phas...................................................thhp..p.t..h........pplh.hh..u.s......s....pR...s.h.....to............A......ps.....h...h..tGhh.sps.....................................................................................h.sh....t.tht..p...................t..s.th......h.........................tt.ss.th..pp.................................................t.s...t..h..t......t......t....t..t..h...p.ht.....................phpp.........................................................................................tt...............h..t.h...t...h..ht..h...h......h.thhp...................................t.....h..s..p...h...hpt........tp.....tp.hp...............................................................................................................................................................h.t.ph.pt....hat...................................th.s.t.....ht...t.....h...u...t...sh.h...p.....l...hp.tl........tphtp.tt..............................................................................................................................................................................................................phh..hh..hu..........HDss.....l.hs.lhs..sL...........sh..................................................................................................p.ss.h.uuplh..hphap..............p.....................stt.......t......hlph.................................................................................................................................................................................................................................................................. 0 702 1050 1602 +1080 PF03767 Acid_phosphat_B acid_phosphat_B; HAD superfamily, subfamily IIIB (Acid phosphatase) Finn RD, Selengut, J anon Pfam-B_2784 (release 7.0) Family This family proteins includes acid phosphatases and a number of vegetative storage proteins. 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.47 0.70 -4.99 31 1773 2012-10-03 04:19:28 2003-04-07 12:59:11 9 10 1346 54 323 1170 350 209.70 26 84.65 CHANGED thpshhshtts.sthtCso....a+.hu...sEspNlhsapshstpshphstpYh..thppaps-.pslsppAhhhAcpht.ps.........sthsshlFDID-TlL.oNhPYhthps.h....Gsctass........pp........as.cWlppGpA.sulst..sl..chhptlhphGhcIhalosRpcs..p+ssThp.....NLpptG.................ap....shc+LlL+s.ps.ps...hpY...KsscRpplhcc..GYpI.............lt......hGDphsDlhG........sspu..................pRsh+LPNPhYhs. ...........................................................s...........................p..................t.tt.s....l......phsu..........hh.ph...p.httt...........................t.hAlshDlD-TlL..ss.ss..a....h..h...........st.c..sa.s.s..........................p.s.......................ah..ch.hp...p..u..t...s...hulPt..sl..phlch.t.p+GspIaalos.Rs...........p..........s.........t..........s.........t..........Thp..........slhp.t.s........................................................ht....sh..s.tl.l.h.t..s...pc....s...........p..s..............................Kts+pphlpcc......hpl................................................................hhh.....hGDs.s.Dhsu..........s+pssscs.......................h+hlhhsNshYts........................................... 0 69 187 257 +1081 PF00330 Aconitase aconitase; Aconitase family (aconitate hydratase) Finn RD anon Prosite Family \N 22.30 22.30 22.40 22.40 21.60 22.20 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.53 0.70 -6.02 11 14106 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 4231 25 4215 11379 9019 338.30 28 64.53 CHANGED TLhEKlhcuHlst.p..........cpusphLhl..DRhlhp-sTus.Ah.sLtsuGpslppssh..........ohhshDHsIsspsttp......Dlphptshs+cphphLppssKcaulthas.su...GIlHpls.EphhshPGh............TlVssDSHTsstGuhGuLAaGlGsuEsEcVhAsQslp.tpPKshtlclsGKLssGlTuKDlILplhGhlsscGGTGplVEahG-ulssLShpuRhTICNMuhEhGApsGhhs.DEsThcYLcups+A.cutph-cAhshhctLpsDcsA....paDpllcl-hsslpPplshspsPs.shslschssssp.p......t.t.cc..htahshhPt..h.tlcVchuhIGSCTNSphEDlppAAullKpt.....ttlpshshhhVsPGSc.V+sphE+-GLscIFp-sGhphhssGCosClG.s...sDhlps.......tspssoouNRNFEGRpssss+TH..LsSPshssAhAluG ............................................................................................................................................................................................t..h.p-..........h..h......h.......................t..............................................................h...DH..h.......................................................s...........h......h......................................tt..h...t.........................h.........ss.......Glh..H.h.........E..hhhshh............................................................shlssDSHTshhsu.hGhluh............G..........hGs......-.sth.shh.s.tsh.......hs.c.............s.....h..thphp....G.p.h.........thssp...............Dll............Ltl............h......t.......h....h.t......h......t.....t............s............h............G...h...hhE.....a.h..G........u..lt..t.Lsh.tt+hols.NMu.....................Eh.G.A.p.su.h.h..............D.p.............................s....h.......p.................Yl................p.................................p.......................h................t..........................t........................................................h.....................t.............................................................p..................h...........t.....c........ss...........as.t.h..l.p.lchsp...l.t.P.lshspp....Pp.t........hh...ls...th.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................t......ht..l......s.shIsSC.ossp...phhhuAtl...ltt..p.........................t..h..t........h..p..h.lsPuSt......l.....t.t......h........t.....t........t.....G.....h......................thh..p.h...........Gh.t...................h.h.......................u.Cs.......ChG.t.................t...ht.....................................h.hs.h.ossRNF.sR....t.........s...ph......LsSs.hssshAlhG..................................................... 0 1274 2607 3544 +1082 PF00694 Aconitase_C Aconitase C-terminal domain Bateman A anon Pfam-B_224 (release 2.1) Domain Members of this family usually also match to Pfam:PF00330. This domain undergoes conformational change in the enzyme mechanism [1]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.54 0.71 -4.04 18 9126 2012-10-01 19:37:30 2003-04-07 12:59:11 14 28 4295 33 2735 7034 4256 118.60 33 21.91 CHANGED hshalphcGhssstthSsssschth.tphpsshsphtlssthhtshchspspph.st-ps.......shhtss.tY+.tsscllVluscNaGsGSSREHAAhu.pthGh+ulIucSFucIacsNLhppGlLPLphsps ....................................................................................h...ah..pGh.h....t.....s.sspct.h..h.hh.tp.h..sp...hth..t..s....t....h..h...............s......h..........c........h......................t.....t.............................................................................shh.s..t.pY...p........s.s.s.l.........llhu.csaG....sGSSREaAAhuhp.hh.G...........l+sVIA....p..........S.....F.u.cI.ap...sNhls..G.l.L.Plph.t.s.................................. 0 835 1673 2276 +1083 PF01756 ACOX Acyl-CoA oxidase Bashton M, Bateman A anon Pfam-B_598 (release 4.2) Family This is a family of Acyl-CoA oxidases EC:1.3.3.6. Acyl-coA oxidase converts acyl-CoA into trans-2- enoyl-CoA [1]. 21.40 21.40 21.50 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.98 0.71 -4.92 37 1066 2012-10-01 23:33:27 2003-04-07 12:59:11 14 21 433 8 692 1082 31 165.80 23 25.84 CHANGED chpss.tshlcAachhstchlpcuspphpp..t.t.s.tpuaspsu.hphhpsu+hHs+hhllpsFhc+ls.....thsstsl+thLppLspLa.uhahlpcpuutFLptuhh..ospplshlpp.tl..cLhsplRsssluLsDuFshsDhhLNSslGpaDGclYcshachsppssssp...tscssaacph..LcPhLp+. ..................................................t.hhpsaphhtt.thltpssp.plp.p...........t..t.t....s.t.hpu.aN.p.st...hphh.....ph.u.pAHsch.hllct.Fhctlp...................ptsss.shpplLppLspLa.uLt.tl..pp.....p..............hu..tal.....pt........s........hl.......os.pp.......h.p.....t.lpp....tl..pLh.spl.....RPp....Al.sLVDu..FshsDhh.LsS........s...............lGt.D.Gph...Ytthht..h.t...............................t.................................... 0 256 392 581 +1084 PF00873 ACR_tran AcrB/AcrD/AcrF family Bateman A anon Pfam-B_578 (release 3.0) Family Members of this family are integral membrane proteins. Some are involved in drug resistance. AcrB cooperates with a membrane fusion protein, AcrA, and an outer membrane channel TolC. The structure shows the AcrB forms a homotrimer [1]. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 1021 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.68 0.70 -13.50 0.70 -7.15 16 19835 2012-10-02 18:57:54 2003-04-07 12:59:11 14 33 3142 84 5307 18191 12148 888.80 26 97.07 CHANGED hspFFIcRPlFshllAllIhlsGslulhpLPVstaPpIssPsVpVsssaPGASscslpssVTpsIEpshsGlcGlphhoSpS.usGpsslTlsF-pGsDhDhApspVQs+lptApspLPpsV.pp.ulsshcsups.lhshulsSs.......ssshsth-LpsasssslpstLuplsGVG-VplhG.sphuhRIhlDPs+LsphpLThsDVhsAlpspNspluuGpL......sspphphslhspsphpos--accIll+......stsGu.VRL+DVAcVElGu-phshhuphNG.cPAsslslphtsGANsl-sucuV+pclscLpth..hPpGlclshsYDsTsalcsSIcsVscTLlEAllLVhLVhaLFLpNhRATLIPslAlPluLlGTFulhthhGaSlNsLThhuLsLAIGllVDDAIVVVENlpRhltp.puhsP.........hcAshcuhsplptAlluhuhlLsAVFlPhhhhuGssGtla+pFulTllhAhslShlVAlTLoPAlCAhlL+stppscct...........thathFNphFctssptYscslstlL+psthhlllhllllsu.slhLasplPpsFlPppDpGhhhstlQhPsGsShppopplhpplpc.hlp.cpspVcslh.shsGhs..uG..pu.NsuhsFIpLKPhcER.t.pp.ostullp+hptphspl.suslhh.hss.slpt..htsssGhchplts.hhGsuh-sLppstpplhthhtphPs.L....scV+sstpsstPphplclDc-+ApplGlsls-IspslpsAhGu.phlssFhcpuRhhcVhVpsssphR.sP-slsplhVpsspGp.....hlPlouhAohc.shGssplpRhNuh.shpItups..ssGhSsGpAhpshpplhpp..LPsGhshpaoGhuhppptussptshlhslulllVFllLAAhYESashPlslhlslPLullGALLAhhlpGhs.slhstVGllsLlGlusKNAILlV-FAp-hhcpcGhslhEAllpAs+hRLRPILMToLAhlLGhlPLAluoGsGutsppslGhuVhGGhlsuTlLsLahlPlhYlllc ..........................................................................................................................................................................phhlpp..hh.sh.l.ls.l.h.l.h.l.h.G...........h...........h......u...........h...........h.p..........L.....s..........l...p.t.h.Pp.ls..s.l.l.p.s.s.a.PG.A..........ss.pplpppVstslEp.t.h.s...s.l.s..s..l..p.....h..p..S..pS..ptG....u..ls..l....p....F...p.........s.....s...s.........s...h...A...t..........p...V.....pp....tl....p...t.....s...........t.....s...p.....LP....p....s...l...tps.s..l...t.......................p.....s.......s....s....s.......l.h..h.h.sl.hup...........................................................t.h...s........t...p......l...p...s...h......h...p....lp..pl.p.p.ls..G.Vu.p..l.p.h...h....G..t.t.p.hthpl.ls.s..ppLtth.s......l..o..........s.c.l.h....s.u........lps..p.N.h....p..h..s..s..G..tl...........................................pttt.h..p.h....l.p.s.......p.s...........p.....h.......p....s...........h.c....c.....h..tpl..hlt...........................tt.s...G.......s.......................l.p.....LtD.............l...A.p.l.................p......h.............G....................s..............p.............p...............h.............p................................h................s................t.........................h.......................s....................G........p..................................u............s..s.......l.s.l..h.h.t................s.u.u.N.s.lp...s..s..ptlc....t....t...l.t....p....l....p..tt...............h....P.........t.............u.........l.........p............h...t....h.....h...h....D...p.o......h..l...p.....t..u...l...p...p.l.h..p....s.....L....h......u.h....h....l.....V...h..l....V.h....h.l.F..L...t....................s....h...R...u....s....l..l.ss.l....u.l..Pl..sl..l...s.o...h..h..h..h....h..h......h......G......h......o..l.N..h....l...o....L...h..u..l.s.lA..lGh....lVD.DA.IVl.l.E....N....l......+.h...hpp.....s.hss...................................................................hp.A.s.hpuhpplshsl.lu..hsl.s.lhs........VFlP.l.h.h..h..s...G...h...............s.G..t....h.a...p..t.hu....holsh..uh.hh....Shl..lul.s..l.s.P..h........h......s.....u.......h.........h......L...+........t....t..t.tpp................................................................................s.......t.....h...........h....p........t........h.....t.....p...........t.....Y......t.......t....h...l.....t......h...h......l......p.......p.......p....h.................h.......h.....h...h.....h....h........h..............h.........h.....h......sh........s.....h.....h................h............h........h........h...............l......s.....p...................p........F..h...P...p..pD....p..u............h....h.....h..h.ph.s............us...........o.......h..pp.s...........t.p....h.h.......p.p.......h..................p.......p.......h.........l..............t.........p....................................s.........t.............l.......p..............p............h............h.....s.........h........s........G..........h.........s.......................s..........s..................................t..........s................s.................u...............h......h.......l.........t.........L....c....s....h.....p......................p..................R..........................................................................t...............................s.............h...............p......t...........l..........h...t................c................h..................p.t.........t......h.................t..................p..........h.....................s..........s.t............h................h.........h.......h.................t.h..h.............................s..s.u...h.......p....h....l........t...........h....h.h.u.........s...h...p.t.Ltp...h.s..p..p....lh...t......h...h........t......p........s..t...l..............ss...l...p.....s..s.....h......t.....t.s......tsp.hplplD..p.ppAtth......Gl..sh.ss.l.s.p....s....l.....................p..s..s......................h...u.u....t..........l...........s...........p.....h....h.....p........t....s..........p...............h...p...l....h....l..........p..................h.........s.............t........p...........................R................s...................p.........s...l..p....p...h......h.....l.....t.....s....s...s...G.t....................................................................h..l...P...L...ush...s...p...h.p.....t...u.s.s.t.l.p....+..h...s.th..s..h..p....l.tu..sh.....................s.s...s...............h............s.....h........u.............p.......s....h................p........h......h............p........p..........h............h............p.................p......................................................L..............P......................s.....................G.....................h................t..................h................p................a..........s..................G.......t...s...........p.......p..p....u....h..............s.............p....h..........h...........h................h................h..sh.u....l.lhla.ll.Ls.................h..............a.c.S...hh..P.hh...lh.h...s.lP.hu..ll..G.........u.........l...........h..............u..l......h............l...............h..................s...............h......................s......................h...........s.............l............h....s........l..G......h.lh.L....h...Glss.+NuI.ll.......l........-.....a............h........p..........p.........h........h...........p..............................p..............G.............h..............s.............h...............h.............c............A.....h....h.....p.A.shhR.hRPIlMT.sh.shlhGhlP.l.............h............h............u..................t..................G...............s...............G.........u.t...h..p.............p.......s.......luhslhGGhlsuTlLsLh.h.lPs.h.ahhh................................................................................................................ 0 1569 3236 4342 +1085 PF05058 ActA ActA Protein Moxon SJ anon Pfam-B_5981 (release 7.7) Family The ActA family is found in Listeria and is associated with motility. ActA protein acts as a scaffold to assemble and activate host cell actin cytoskeletal factors at the bacterial surface, resulting in directional actin polymerisation and propulsion of the bacterium through the cytoplasm of the host cell [1,2]. 23.80 23.80 31.70 23.80 21.10 22.90 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.74 0.70 -13.05 0.70 -6.52 2 1025 2009-12-16 10:43:18 2003-04-07 12:59:11 7 3 37 0 1 762 1 335.50 65 99.41 CHANGED MRAMMVVFITANCITIsPDIIFAsTDSEDSSLNTDEWEEEKTEEQPSElNTGPRYETsREVSSRDIKELEKSNKV+ssNKADLIAMLKtKAEKGPN.NNNN...SEQotNsAINEEASGADRPslQVERRHPGLsSDSAAEIKKRRKAIASSDSELESLTYPDKsTKsNKKKVAKESlsDASESDLDSSMQSADESoPQPLKANQQPFFPKVFKKIKDAGKWVRDKIDENPEVKKAIVDKuAGLIDQLLTKKKSEEVNASDFPPPPTDEELRLALPETPMLLGFNAsATSEPSSFEFPPPPTD...................................sELEIhRETASSLDSSFTRGDLASLRsAINRHSQNFSDFPPIPTEEELNGRGsRPTSEEFSSLsSGDFTDDENSETTEEEIDRLADLRDRGTGKHSRNAGFLPLNPFsSSPVPSLSPKVsKISAPuLlSDITKKsPFKNPsQPLpVFNKKTTTpTVhKK.TPVphAPKLApLPsTKPQETsltENpsPhhEKQAETNpQsIsMPSLPVIQKEsTEpsKEEMKPQTEEKMVtESEsANssNGKpRSAGhEEGKLIAKSAEDEKsKEEPuNHTTLILAMLAIGVFSLGAFIKIIQLRKNs .....................................................................................................................................................................D.LIAML......KtKAEKGPN.NNNN.....uEQotNsAI.N..EEASGsDR........Psl.QVERRHPGLsSDSAAEIKKRRKAIA.SSD.........SELE......SLTY..........DKPTKssK+.K.VAKtSVsDsSESDh.....tSphpSsD.ps....KtsppPFFsKsFcKIKtAGpWshDKlscNPt....................cpEEVNAS.DF..P..PPPTDEE....L......R...LAL.PE.TPMLL....GFNA..........P.ssSEP.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +1086 PF00976 ACTH_domain Corticotropin ACTH domain Finn RD, Bateman A anon Pfam-B_1057 (release 3.0) Family \N 20.70 20.70 21.10 21.00 19.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.14 0.72 -4.11 13 1273 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 737 0 33 1256 0 38.60 83 21.34 CHANGED SYSMEHFRWGKPsGRKRRPIKVa.sNuh..E..-ESpEsaPhEh ..SYSMEHFRWGKP...V.G.KKRR..PIKVF.PoDA...E..EESSEhYPhE.h.............. 0 1 4 13 +1087 PF00022 Actin actin; Actin Sonnhammer ELL anon Prosite Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 393 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.26 0.70 -6.05 28 15168 2012-10-02 23:34:14 2003-04-07 12:59:11 14 83 3849 392 3857 12329 1523 241.30 43 92.30 CHANGED u--lsulVlDsGotss+AGaAG-DsP+..........slhPohlG.....+spsst......................................chhlGsp...thp.....csthplppPh.ccGllpsW-shcclWcashhpc.Lpss........P......................................p-+PlLlTEsshNspppREKhsElhFEpapsPAhalupssVLotaAsG+s..................TuLVlDsGsupTslsPVa-GasLp+ult+.sluGchLoppl.ppllpp.......h.h...........h...........................s.SapshtcppllpchKEslChVs.s....................phptus...........s.ssps......Y....cLP.DG...............................................p..phhhGs-RFplsEhL.....FsPshht.ptt................................GlscllhsulttsD..sDlRtsLhuslVloGGooLhsGhs-RLppElpph................s.ssh.....+l+lhAss...ER+ausWlGGSILASLu.oFp.phWlSKpEY-EpGss...lVc++Ch .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....................h...t.h.......................................................................................................................................t..p....p...........h....l...l.........o...E.s.............s.........h.........s.........s...........................t...p...R...c......p...............h....s..........p........lh......F..E.....p.a.s......h...s.u.....ha.l.......sh..pu..l.L.....u..l.....a...u...s..G.p.s..........................................................................................o.G..l.Vh..DsGcu..so...p.s..lP.l..h...-......G........h........s..l...s...p..........u...l......h......+..l...s......h.........u.G.c..c.lTp.hh.hplL.tp.......................p................................................................................................................................................................t...h.....t.....t.p.h..cl..l...c...p...h...K.....E...p..h..s..a.l.u.shpt.....................t.t..ts.........................................................................s..t..p..p.........a..........p..l....P...D...G..........................................................................................................................................................................................................................................................................p.......hl..p..l..u...t...E...R.a..p....s..s...E....hl............FpP.p...hs....p...........................................................................................................ul........h....h.h..p.....s..l..............p.......s...s................h.-....h....+....t.....t...l.h............t.s..h.lh....s.G.G...s...o.....hh...........s....h.........tRh.......tch......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1405 2078 3077 +1088 PF01643 Acyl-ACP_TE Acyl-ACP thioesterase Bashton M, Bateman A anon Pfam-B_928 (release 4.1) Family This family consists of various acyl-acyl carrier protein (ACP) thioesterases (TE) these terminate fatty acyl group extension via hydrolysing an acyl group on a fatty acid [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.67 0.70 -5.45 27 1578 2012-10-02 20:54:35 2003-04-07 12:59:11 12 8 1264 3 365 2638 846 228.20 24 89.64 CHANGED GLsa+psFslRsYElGhs+TAolETlhNhLQEsuhNHspslGh.sDGFutT.pMp+hsLIWVVs+hplplpRYPsWGDsVcl-TWstupGKhGh+R-WhlpDhpsGEllsRAoShWVMMNpcTRRLs+ls-EVRsEh.shh.pp..l...--sscKLpKl......-ssu-hhctGLsPRasDLDhNQHVNNVKYlGWlLES.hP.pll-o+ELpslTL-YRRECtpDsllcSlTsht......................sstppsthphpHlLplss......GsEIs+u+T-WR. .........................................................................................ap.phpl.h...csD.hs..t....p..hpl....sslhphh......p.sut...p.s.t.p.h.G.hs..ph..........hpchsh..sWl.lschplcl......p.R.h.P.p..h..........s.-.p.lp.lpTh.s.h.u.h.s..+...h..a.s..h..R.c.a..t..l...h....s......p.s......G.........p...............l.s.cspoh...alhhshco...R.....+.h...t..p...l..s...s....-.lh.s.a.....................tsp.....p.+lh.+h.h.........hpt..p.p.....h.t.ps.a..p...l..Ra...D...lDhNtHVNN...........sc...Y........l........p........W.......l.........h........-........s..........h....s.........h.......c.......h...........h........p.........p............t..........t.....................p....p...........l.........p....l.........c..Yh+E.s.t....Gs......lp....................................................tt......h...l............t.............h...a.......................................................................................... 0 116 276 331 +1089 PF02770 Acyl-CoA_dh_M Acyl-CoA dehydrogenase, middle domain Finn RD, Griffiths-Jones SR, Mistry J anon Prosite Domain Central domain of Acyl-CoA dehydrogenase has a beta-barrel fold. 20.60 15.00 20.60 15.70 20.50 14.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.35 0.72 -4.45 69 26220 2009-01-15 18:05:59 2003-04-07 12:59:11 14 120 3313 214 9308 22756 9383 53.90 34 11.57 CHANGED uhuhTEss.uGoDh........tshpTpAptssss.......ahls.....GpKtalos.u..shA....shhllhu+s.s ............................................shuhTE..P....s...uG..S..Ds.................s.u.l.p.....T..p..Ap..p.......su-t...................alls..................G.p.Kh...aIos...u......shu......s.h.hlVhA+s...................................... 0 2632 5550 7773 +1090 PF02771 Acyl-CoA_dh_N Acyl-CoA dehydrogenase, N-terminal domain Finn RD, Griffiths-Jones SR, Sammut SJ anon Prosite Domain The N-terminal domain of Acyl-CoA dehydrogenase is an all-alpha domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.79 0.71 -3.50 988 26175 2012-10-02 12:47:07 2003-04-07 12:59:11 11 97 3435 215 9054 22885 10305 112.70 20 25.73 CHANGED o.-.......-pc..hl+cs...sRc.Fspc..cl.h..P.....h........stch.....c..c....pp.p.........hP...tc..l..hcch.u.-.h.GlhGlslP......E-YG.G...s.G..h.............s...hhshsllhEElu+..ss...s....uhsh.hhss..pssls.s........l....hpaGo.--QKp...+..aLPtlssG- .............................................................p.p.hhp.t....scp..ahpp......pl...t.....s.........h...............................h.tph.............-.......c.............ps.....p.........................hP..........pc....h..........h.p.ph...s....c....h....G.h.....h.....u...l....t..l....P...............................cc.a......G.....G...........t..G.......h...................................s...hhp.hs.l.l.h.-El.up.....ss.................s.....u.hsh.......hhss.....t.s.s.ls.h..........sl..............ht...aG.o...c.c......Q..+p...c.a.LstlhpG................................................... 0 2413 5396 7525 +1091 PF02551 Acyl_CoA_thio Acyl-CoA thioesterase Bashton M, Bateman A, Griffiths-Jones SR anon SCOP Domain This family represents the thioesterase II domain. Two copies of this domain are found in a number of acyl-CoA thioesterases. 20.70 20.70 21.20 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.75 0.71 -4.53 9 1320 2012-10-02 20:54:35 2003-04-07 12:59:11 10 4 888 10 170 2384 900 124.10 41 59.06 CHANGED hssthFhs-h..p.css.hppsasGphhu...Qshhtu.p.ssP.cDhhlppsh................................HShaFh+sscsschllYslpo........LtpGchF.......sQsGplluo......sspcGh .........................................................................l.pttFhs-pPhchcP.s.h+.p.s.h...p...GcV.st....pQ..lWl...+...AsG...sl.P....D....D..h....hlH....p...ah.LuYuSDh..sh.Ls.s...A...l.......p...s...Hsl..........uhhp...t.h...p....l...AT..l..DH..S..h..WFH.....R.........P.........F......s........h........s.-W..LLY.............u.VESssA..u..s.uRGhsR..G..cha.......sQsGt.LlAo.......ssQEGl......................... 0 28 69 120 +1092 PF00698 Acyl_transf_1 Acyl_transf; Acyl transferase domain Bateman A anon Pfam-B_250 (release 2.1) Domain \N 24.30 24.30 24.40 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.84 0.70 -5.10 12 13987 2012-10-02 11:19:24 2003-04-07 12:59:11 16 1271 5013 68 4402 12759 2619 292.50 26 23.26 CHANGED VaVFsGQGuQWtGMGhpLlpspslFtsultcs-cshps.hGaplh-lLp..sss.......................................................thcpl-hlQPsLhulpluLsplh.puhG..lpPsuVlGHShGElAAuhVsGALo.t-ushslshRuphhtpls.stGuMsuVsh....s..c.h.t.h.splslAssNuPpollluGsp-tlp-hlpphptcs.htsphlsVshAsHSsphsslt-sLtttLup.lsshtsplshauospss.t.....sssphsApYWhpNh+psVpFppAlpuh...h-suatsFlElSsHPlLttulpcshc.........tshssssllsphpRcpss...hppFhtshsphHssGsss ...............................................................................................................h.alFs.G......QGu.Q.hsG....MG.p.p......Lh....p........p...........s....s....ht....p.....s....h....s....p........s....s...p...h........h.......t.....................h........s...........h.........s.......l...h...p....l..lh......s.s..s..t............................................................................t.t.L..s..p....o...p...h...s....Q.P....A.....l...h....s...h....p....l....A......l..h..c....l....h.....p....s.......h.......G.............lp.P...s...h...l....s....GH....S........l......G......Eh........u..Ah.h.....s.AG..........s..........l..o.h...p.....D.....A.......h....p........lV.......t.....h.....R.......u....p.....h........h...............p................p.............h.............s........s...........s...........G..........u.......M...s..A...lhs................................stt..t.....h....h...............t.......h........t...........s......t....................l.......s.......l.A....s.....h.....N...u......P...........s..p...........s............V..lu..G....s.t....p...u..........l....p.....p....h....h.......t...t.............h........p.........t........p............G.......h.............p.s..h...h........L....s..V.......s.h..A....h.H.o.s.......h.h....c.s........h....t............p...p......h.........t.....p......h.....l......t.......p.........l..............p.........h.............p.........s............s.......p...........l........s..........l......h....o....s...l..s...u.p.hh..............................ss.s.t.h.......t....p.h..h...h...c...p..lt...p...s.V...p....a....t...p....u.............l...........p..................th...................h..............t..............t..............G...................h...............p................h.................h..........l.................E...l..uP.t......s...L...t...t..hh.p...p......................................................................................................................h......................................................................................................................................................................... 0 1131 2658 3767 +1093 PF02273 Acyl_transf_2 Acyl transferase Mian N, Bateman A anon Pfam-B_5787 (release 5.2) Domain This bacterial family of Acyl transferases (or myristoyl-acp-specific thioesterases) catalyse the first step in the bioluminescent fatty acid reductase system. 20.30 20.30 20.30 20.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.94 0.70 -5.40 4 59 2012-10-03 11:45:05 2003-04-07 12:59:11 10 4 39 2 8 132 21 269.20 60 83.59 CHANGED hsIDHVIcVsssRcI+VWEThPKppssKRNNTIlIASGFARRMDHFAGLAEYLSpNGFHVIRYDSLpHVGLSSGpIDpFoMSlGKpSLLTVlDWLp.p+sIsNlGLIASSLSARIAY-llu-lsLSFLITAVGVVNLRsTLE+ALtaDYLph.IDElP-DLDFEGHpLGScVFVpDCFEssWDoLDSTINKhtpLslPFIAFTANsDDWVpQcEVhcLlSsI+Sc+sKIYSLlGSSHDLGENLVVLRNFYQSlTKAAIAhDsshl-lss-IIEPsFEpLTIATVNERRLKscIEs ...........................................sI-HVlplsssppI+VWET.PK...pp.sp+psT.IlI.A.S..G....FAR..R....M.D.....H......FAG......LAEYLSsNGF..HVlRYD..SL.pH..VGLS.S...Gs....IspFoM.S.l...G.K...p..SLhsVl-W.Lp...s+...G...l..p..p..lGLIAuS....L.S.A..R...I..A.Y.-...l.su.-.l..s.LSFL...IT..AV.GV.V.NL..RsTLE+ALtaDY...Lp.h.Is-lPpD.LDFE.GHpLGScV...FVpDCFcppW.DoL.-ST.ls...chppL.slPFIAFTANsDsWVcQcEVh-hlusIpSs+sKlYSLlGSSHDLGENLVVLRNFYQSVTKAAIALDss.l-lss-h......l..EPpFEpLTlsTVNERRLKscIEs........................................................................................................................... 0 2 4 5 +1094 PF00708 Acylphosphatase Acylphosphatase Bateman A anon Pfam-B_686 (release 2.1) Domain \N 20.80 20.80 20.80 21.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.99 0.72 -3.88 75 4748 2009-01-15 18:05:59 2003-04-07 12:59:11 13 31 3398 34 1333 3407 353 90.30 31 29.81 CHANGED M...............hphplhlpG.pVQ..GVGFRhastphApphG.lpGa.VpNhs-G...pVclhspG.spps.lcphlptlpp..ussh.....uplpplph.pphshpsp........ssFpIt .........................phphhlpG..+VQ..GV.G....F..R.h..h..shphApplu.....LsG.h....VpN...t.sDG.........pV-lh....s.p......G.sp.pp....lcph....l.p.t....Lcp....usss.......A+.V.splph...p...h.p.h..p..tt......ssFpI....................................... 0 404 800 1089 +1095 PF01553 Acyltransferase Acyltransferase Bateman A anon Pfam-B_128 (release 4.0) & Pfam-B_5069 (Release 7.5) Family This family contains acyltransferases involved in phospholipid biosynthesis and other proteins of unknown function [1]. This family also includes tafazzin Swiss:Q16635, the Barth syndrome gene [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.90 0.71 -4.71 57 17338 2012-10-02 00:16:30 2003-04-07 12:59:11 16 145 4819 2 5587 13699 5618 134.80 20 34.96 CHANGED plplps....Epl.p..................ttssllluNHpSh...lDshhls.hhh...........pshhhlupcplhth.shhshhhp....hhsslhlcR.............................tp.pttttshpthhc...hlppG................ph.lhlFPEG.....Tcsps...................tph..h.aKpGshphshps.......sssllPlslp ..........................................................................................................h...h.s.....pp.l..p..................tts.h.l..l.l..u.N..H.pSh.............hD.h..l.h....l......s...hhh.......................................t..h.ph.h..u...c..p...p...l.....h......p......h...s.....h....h.....u.....h....h.hp.................hh.s..s.l.......l..c..R.................................................................................................................tt...t.t..s..t...t..s.h.cphhc.........hl.pp..u.....................................................................................ph...lhlF...P..E...G................o.+.sps.............................................................tph.......h.h.+.s.G..h.h..th..uhps......................sssllPlhh...................................................................................... 0 1763 3288 4613 +1096 PF02805 Ada_Zn_binding Metal binding domain of Ada Bateman A anon Bateman A Family The Escherichia coli Ada protein repairs O6-methylguanine residues and methyl phosphotriesters in DNA by direct transfer of the methyl group to a cysteine residue. This domain contains four conserved cysteines that form a zinc binding site [1,2]. One of these cysteines is a methyl group acceptor. The methylated domain can then specifically bind to the ada box on a DNA duplex [2]. 20.90 20.90 20.90 21.10 20.70 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.24 0.72 -4.38 150 1999 2009-01-15 18:05:59 2003-04-07 12:59:11 11 24 1681 5 495 1548 76 64.90 47 18.48 CHANGED -ppap.AlhsRD..spaDGpFahuVpTTGIYCRPsCsuRhP+ccNVpFasouttApsAGaRPCpRC+P- .....................................ppap.AlhsRD..sphDGpFhhAV+TTGIaCRPSC.s......u......R..t.......P..p...c......cNV....p...FassAspAhsAGFRPCKRCpP-......... 0 126 281 399 +1097 PF01602 Adaptin_N Adaptin N terminal region Bashton M, Bateman A anon Pfam-B_491 (release 4.0) Family This family consists of the N terminal region of various alpha, beta and gamma subunits of the AP-1, AP-2 and AP-3 adaptor protein complexes. The adaptor protein (AP) complexes are involved in the formation of clathrin-coated pits and vesicles [1]. The N-terminal region of the various adaptor proteins (APs) is constant by comparison to the C-terminal which is variable within members of the AP-2 family[2]; and it has been proposed that this constant region interacts with another uniform component of the coated vesicles [2]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.50 0.70 -6.18 32 3862 2012-10-11 20:00:58 2003-04-07 12:59:11 15 87 445 26 2500 4196 107 460.30 19 58.89 CHANGED cpphhpp-ltphhsph..............s.p....p+ppslpKllhlhhhGp...chs.....hhhhsll+hht.opchshK+lsYLhlh.hscppsD.....lhhL...sssslp+D.lp...ssNphhpuhALpsLuslt..sscls..csltsslpphls.spss..aVR+sAslshh+laph.....ss-hlpp....hsclpchl..sDps..........................hVhtsAlshltcl...................pssc.hh....cllthhhppltph....................s.sssahplhllchL.p....phstpsspts...........................................................................t.hhcplhshlp....................ssssuVlhEslpslh........plsspss........hhhhssshLhphLs.spcpsl+YluLps.lsplspp..........c.....pslh..cpshhlhhlp.csshsl+hcsl-lLhtlsscpNlpp...llpcLhpa......l....-ht-.p................a+pthlpsItplu.+h....ssshphhlssLlcllp.....ps..shh..sp-hl.slhpllpphsshpths..lppL....hchlps............hppsp.......h..htshlWllGEaushlss.................ssphlcplhpphh.pss.l+tt.hLsshhKht...........httst.pht.............plhphlhphsp..s.ch-lp-RAh..........hhpllptsp ................................................................................................................p.tt.hpp.....................p.t.....hptp.h.lt+.ll.................h.hh............h.h.G........s..hs..................h.h.phlp..hh.....t...s...pphp...K............+.l.......s...............YLhh..........hh..p.......p.p.-..............................hhhL..................ssssl.p.p......D..hp.........................s....s.....h....hp.uhA..lp..sl.s..p.lt...................ssp..hh............p..l....h...l..t.p.h..lt....s....t...s....s.......hV++p.A...sls..........h..h+l...hph.................................ss.p...h...h.......t.........hs.p....l..h..p.........hl......s-.ps..................................................................................hV.h.hs..u...l.sh...l...hpl........................p..sp...............phh....t...h...h.p.t.....l.th....................................................................................s.h.s...a.h.plh.l.l.c....h.l.t.............t.h.s...tp..ttt........................................................................................................................................................................................t.hhp..lhshlp.................................sts...tuV.l.h...pshps.lh......................................................ph..s.t..s.............................hhth.hs.s...Lhphl........p....sp..............p...p.............l.p....al...u...L..ps..lttl..h.tp............................p..............sl.........p..t...p...h........l......h...l....p...........ss..s.h.....l..+......h.t.....t..l....c.lhh.t....h...s........s...p.........p..N..l.pt..............l......l.p.....c..lh..p..a...................h............p.h...-.p.............................................htt..thl.pt..l.t...t..h..s..ph..............................s..sh.phh.......l.s.h.l.hp..l..l.p..............................ts.......s.hh.......hpph..h...l...h.p..l..h.........p.......p......................s........p.......h......p........t............h...............l.t...p....l..................h.p..h.l.pp.................................hp....p..............................h..htshh..a.llG.E..a.u....phh.p.................................................................................s.ph.h.p......h.....h....p..t.....h...h........p.....s................s..p..h.....hlsshhKhh.........................................................................................................................tlh....h.l..th.t........p.s.-.........lppRuh....h.tlh....t............................................................................................................................................................................................................................................................................................................................................................. 0 926 1396 2042 +1098 PF03352 Adenine_glyco Methyladenine glycosylase Finn RD anon Pfam-B_3953 (release 6.5) Family The DNA-3-methyladenine glycosylase I is constitutively expressed and is specific for the alkylated 3-methyladenine DNA. 25.00 25.00 29.60 25.10 22.80 24.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.14 0.71 -4.75 190 3641 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 2986 18 794 2600 430 166.00 41 85.63 CHANGED Ws....ss......slYhpYHDpEWGhP.lp.DDppLFEhLsLEGhQAGLSWhTIL+KR-saRpAFssFDsppV...Apa.....s.-pclccL.htssGIlRNRtKIcAsIsNA+shlplpc..ch.....GSFusalWsFV.sspP.hhsp..hpshtclP.....u..pTshScslSKcLKKRGF+FVGPThsYAFMQAsGhVN.DHlssChpptp ....................Ws..tps..slYhtYHDpEWGh...P..p...D.-.ptLFEhLsLEuhQAGLSWhTlL+K.....Rcsa....RpAFts.FD.p+V.......Ath........s.-p.-......l....-c.L..h.p.s.s..uIIRpRtKIpAslsNAp..uhlpl.....pp..........p.h...................G.SFssalWuF........l....st.ps...hsp...ht..s..hp..p..hP......................s..p..o..shS.ctlSKsLKK+GF+FVGsThsYuFhQA.....sGhVs.DHh.sC.hh..t............................................................ 0 218 478 659 +1099 PF02438 Adeno_100 adeno_100; Late 100kD protein Mian N, Bateman A anon Pfam-B_1583 (release 5.4) Family The late 100kD protein is a non-structural viral protein involved in the transport of hexon from the cytoplasm to the nucleus. 25.00 25.00 131.30 131.10 19.40 19.30 hmmbuild -o /dev/null HMM SEED 583 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.87 0.70 -6.30 19 136 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 100 0 0 144 0 588.50 65 76.54 CHANGED ssLhKHlpRQutIl+puLp........-p.psPhoVsplSpthEptLFs......P+sPsc+pcNuos-PsPRLNFYPsFhlPEsLATYHIFFhNppIPlSC.+ANRotuDphhpLppusplsslsshccssKlh.-GLG.pEspusst.Lp........psutLVpLcsDssRLsslK.RshploaFAYPAlsLPPKl.pslh-pLlh+psp...........pps-sscPVVSDEpLs+Wls.ss.......-st..tLp......c+..RKthhuAlhhTlpLEChp+hFoc.phl+KlpEsLHYTF+HGaV+.spcIosVpLSNhVoYhGlhHENRLspssLHsoLcuEs+RDYlpDolYLFLlaTWQTAMGlWQQsL--cNlcplpclLs+p++sLasthspcslApcLAcllFPtc.LlpThppuLPDFhoQS.lpNFRoFILERSGILPuhssALPSDFVPlsa+EsPP.LWsHsYLLpLAsFLhhHuDhh......EDsuspslhcsaCcCNLCoPHRslspNsuLhNElpsIGTFElQsPsspsst......LKLTPuLWssAYL+KFsspDYasapItaYcspsc.s.pspLTACVITpscILApLppIpcuRE-FLL+KG+GVYLDPQTGEpLN ..DVLLKHLpRQShIl+DALt.......DRocsPlSVcELScAYEhsLFS......PRVPP...KRQsNGTCEPNPRLNFYPsFAVPEALATYHIFFKNQ+IPlSC.RANRoRADtlLsLtsG.uRlPDIsSLEEVPKIF.EGLGpDEsRAANA.Lppp...........ctppSsLVELEGDNARLAVLK..RolElTHFAYPAlNLPPKVMpslMDpLlhp+spsLsc......pptpps--ucPVVSDEpLuRWLG.Tp.......-Pp..sLE..........cR..RKLMhAsVLVTlELECh+RFFoDP-TLRKlEEoLHYTFRHGaVRQACKISNVELoNLVSYhGILHENRLGQsVLHoTL+GEARRDYlRDCVaLFLsaTWQTAMGVWQQCLE-pNLKELcKLLpRshKsLWTGFDERTVAuDLA-IlFPER.LppTL+sGLPDFhSQSMLpNFRoFILERSGILPATCsALPSDFVPLoYRECPPPLWSHCYLLpLANYLuYHSDlh......EDVSGEGLLECHCRCNLCoPHRSLsCNPpLLSETQlIGTFELQGPpssst..us.........LKLTPGLWTSAYLRKFlPEDYHsHEI+FYEDQ...Sc.P..+A-LTACVITQusILAQLpAIQKuRpEFLLKKG+GVYLDPQTGEpLN...... 0 0 0 0 +1100 PF03052 Adeno_52K Adenoviral protein L1 52/55-kDa Mifsud W anon Pfam-B_2151 (release 6.4) Family The adenoviral protein L1 52/55-kDa is expressed in both the early and late stages of infection which suggests that it could play multiple roles in the viral life cycle. The L1 52/55 kDa protein interacts with the viral IVa2 protein and is required for DNA packaging [2][3]. L1 53/55-kDa is required to mediate stable association between the viral DNA and empty capsid [2]. 25.00 25.00 47.20 46.80 21.70 17.90 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.11 0.71 -5.09 18 154 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 99 0 0 150 0 187.10 52 54.87 CHANGED lsp.ssuhAsct.......h.+.Qhcc-shcutlPppNlF...R-tpsp.....-t.RchhapuGptlphsh..cRsLpscDF....s-.s.....ulSsApsHhpAAcLtpsactTtphEsshpcoFsscl+sLlhR.ElslGLhaLhDFlpshhppPs.shsLssQLhLlspHscscsshRctlhslucsc......ucWLhDLlshltsIl.pccphsls-pVuAIs ...........htph.....GA.uPER.......HPRVQLp+DsRtAYVPtQNLF...RDcSGE...EsEEhRctRFcAGREL...R..L..DR..pRlLRsEDF..EssE.o......GlSPARAHluAAsLVoAYEQTV+pEpNFQKSFNNHVRTLlAREEVslGLMHLWDhhEAhlpNPs.SKsLTAQLFLlVQHSRDNEsFREALLNIuEPE......GRWLhDLINILQSIVVQERuLoLu-KVAAIN......... 0 0 0 0 +1101 PF02703 Adeno_E1A Early E1A protein Bashton M, Bateman A anon Pfam-B_1193 (release 5.5) Family This is a family of adenovirus early E1A proteins. The E1A protein is 32 kDa it can however be cleaved to yield the 28 kDa protein. The E1A protein is responsible for the transcriptional activation of the early genes with in the viral genome at the start of the infection process as well as some cellular genes [1]. 24.90 24.90 24.90 25.50 20.90 24.80 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.11 0.70 -4.95 15 313 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 91 4 0 292 0 158.20 37 97.87 CHANGED MRplhhhssh.h....-hus-lLE...plVsshhss-hPp.ssshhpsPSLHDLYDLE..V-ssE.DsNEEAVsshFsDuhLLAAcEu......................sssh.ssppssGspslP-L.ps--hDLpCYE-GhPPSDsEDEpppp..shpphuspushshppt.......FhLDsPplPGHGC+SC-aHRpsTGssshhCuLCYhRspspFlYSPVSDst..--cosss.........................-pps.........SPPclssssP.slh.+PVPVRsostRRsAV-slED.......LLpE....ssEPLDLSl.KRPRs ....................................................................t.h....p....hlpthh..p..................s.oLp-LYDl-..Vps.p..DsNEcAVs.hFs-uhhLtsp.u........................s......s...s...hPtL.....hDLhCaEtshPsSDsEspp.....................................................................................hSt........................................................................................................................................... 0 0 0 0 +1102 PF01691 Adeno_E1B_19K Adenovirus E1B 19K protein / small t-antigen Bashton M, Bateman A anon Pfam-B_1569 (release 4.1) Family This family consists of adenovirus E1B 19K protein or small t-antigen. The E1B 19K protein inhibits E1A induced apoptosis and hence prolongs the viability of the host cell [2]. It can also inhibit apoptosis mediated by tumour necrosis factor alpha and Fas antigen [2]. E1B 19K blocks apoptosis by interacting with and inhibiting the p53-inducible and death- promoting Bax protein [1]. The E1B region of adenovirus encodes two proteins E1B 19K the small t-antigen as found in this family and E1B 55K the large t-antigen which is not found in this family; both of these proteins inhibit E1A induced apoptosis [2]. 25.00 25.00 40.80 40.30 18.20 17.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.31 0.71 -4.87 17 117 2009-09-10 16:39:30 2003-04-07 12:59:11 11 2 86 0 0 102 0 134.60 47 75.49 CHANGED M-....lhphLpsapphRpllctuSspsShhhRahFGupLuclVacsKp-acppFtclLs-s.GlasuLsLGapshappcllppLDFSoPGRssAulAFlsallDcWs.pcop...lSpsahLDhlshsLWcth...h.p...phhhh ........M-..lWslLpDFppTRpLlEsu..SsusShaWRah..FGos....Lu+LVaplKcDYppEFEclLs-CsGL.a-uLNLGHpuhFpE+ll+sLDFSoP..GRTsAuVAFlsFllDKWs.ppTp...lScGYhLDalAhtLWRsW..h+ppt...hph..... 0 0 0 0 +1103 PF01696 Adeno_E1B_55K Adenovirus EB1 55K protein / large t-antigen Bashton M, Bateman A anon Pfam-B_1728 (release 4.1) Family This family consists of adenovirus E1B 55K protein or large t-antigen. E1B 55K binds p53 the tumour suppressor protein converting it from a transcriptional activator which responds to damaged DNA in to an unregulated repressor of genes with a p53 binding site [1]. This protects the virus against p53 induced host antiviral responses and prevents apoptosis as induced by the adenovirus E1A protein [1]. The E1B region of adenovirus encodes two proteins E1B 55K the large t-antigen as found in this family and E1B 19K Pfam:PF01691 the small t-antigen which is not found in this family; both of these proteins inhibit E1A induced apoptosis. This family shows distant similarities to the pectate lyase superfamily. 21.80 21.80 21.90 31.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.22 0.70 -6.00 15 139 2012-10-02 14:50:22 2003-04-07 12:59:11 12 2 88 0 0 132 1 334.00 51 78.03 CHANGED sssshhsp....Lshuhhs+pR.EpVpap-lps-a..pct..hht-+YsFEQlKTahLpPt-.DhEtsI+paAKlALRPsspYpIp+slsI+ssCYIlGNGApVclsspcpsA.Fcsthh.uhsPGVsGMpuVTFhNlRFps-s..........asGslFhusTplllHGCsFhGFssoCl-uhussplRGCpFhuCa+ul.su+s+ucloVK+ClFE+CslGlhsEGpu+lR+Nsuo-ssCFlll+GsuplcHNhlsussch.pps.hpMlTCu..sGpspsLpolHIsSHsR+pWPsFc+NllhRCslHLGsRRGsFpPhQCNhoaopllLEscuhs+VsLsGlFDhshplaKlLRa.--s+sRsR.....sC-CGupHhph.ssshpVTE-lRsD+hhhSCsssEFsSS-E ......................s.pt.hscLshSLMsRpRPEplhatElpp-h..pct..hhppKYuhEQlKTaWLpPt-.DhE.AIcpaAKlALRPDppYplo+plsIRpsCYI.GNGApV.lss.D+sA..FcCsMh.sMhP...G..VhsMpuhsFhNh+Fpuct...............asGslFhusophhLHGCsFaGFNshCl............EsW.utsplRGCpF...........huCWhul.suRsKSphSV.KpClFE+ChLGl..s.EGpuRlR...Hsuu.-ss.CFhLlKGsAslKHNMlpGss-..pp..hphLTCs..sGhCphLtslHlsSHsR+tWPhFEpNllh+CphHLGuRRGhF.PaQCNhspsplLLEs-AhS.RVsLsGlFDMslplaKILRY..DEo..+sRsR.....sCECGG+HhRhpPVsl-VTE-LR.PDHLlhuCsssEFuSSsE............... 0 0 0 0 +1104 PF04623 Adeno_E1B_55K_N Adenovirus E1B protein N-terminus Kerrison ND, Bashton M, Bateman A anon DOMO:DM04583; Family This family constitutes the amino termini of E1B 55 kDa (Pfam:PF01696). E1B 55K binds p53 the tumour suppressor protein converting it from a transcriptional activator which responds to damaged DNA in to an unregulated repressor of genes with a p53 binding site [1]. This protects the virus against p53 induced host antiviral responses and prevents apoptosis as induced by the by the adenovirus E1A protein [1]. The role of the N terminus in the function of E1B is not known. 25.00 25.00 34.80 33.50 21.90 20.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.50 0.72 -3.49 5 123 2009-09-11 22:02:39 2003-04-07 12:59:11 7 3 65 0 0 110 0 68.70 40 17.64 CHANGED MERsNPoEpGl+uGLHusAsVEuhptuAEEEsL+LLAuAASs.pssussssstAthtuGGus.uuuGGE..Es ...MEstsPspQGl+sGh+upu.VEshttuAsp-NLcLLAusAuh.ts...ss....puss.s.....t.uth...tuGuts..uuuGtp....t...................... 0 0 0 0 +1105 PF04834 Adeno_E3_14_5 Early E3 14.5 kDa protein Waterfield DI, Finn RD anon Pfam-B_4148 (release 7.6) Family The E3B 14.5 kDa was first identified in Human adenovirus type 5. It is an integral membrane protein oriented with its C terminus in the cytoplasm. It functions to down-regulate the epidermal growth factor receptor and prevent tumour necrosis factor cytolysis. It achieves this through the interaction with E3 10.4 kDa protein [1,2]. 23.60 23.60 23.60 25.50 22.10 23.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.47 0.72 -3.57 8 110 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 68 0 0 79 0 104.20 50 79.38 CHANGED pC+FpcPWsFLcCYpccoDhsssWlhhlshlhlhssThhultIYPphchGWNuPsAhshPphPs..tthPLQ........PhPpP............-P.PpsPosISYFpLTGGDD ......KCKFpc.WsFLcCYccKsDhPshalhIlGI.lhlhuCT.hFu.lhIY.PpFchGWNuspAhsaP.-Ps..tphP..........Phs.Pht..........pY.pEP.PphPoslSYFpLTGGDD. 0 0 0 0 +1106 PF03307 Adeno_E3_15_3 Adenovirus 15.3kD protein in E3 region Mifsud W anon Pfam-B_3512 (release 6.5) Family \N 25.00 25.00 45.80 45.60 20.40 16.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.76 0.71 -4.35 8 100 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 71 0 0 67 0 116.40 64 88.98 CHANGED DGphoEQRll..phR.tRppp-RpspELtsLhslHQCKKGlFCLVKQAKLoYE.lsu..psHcLuYpLstQRQoFssMVGssPIpVTQQuG-scGsI+CsCcsPEClYTLlKTLCGLR-LLP ..............DGpsSEQRll..QLR.lRQQQERssKELtDslsIHQCKKG.IFCLVKQAKIoYE..lsu..psHRLoYELPpQRQKFTCMVGlNPIVITQQSG-TcGCIHCSC-SPEClYoLlKT.LCGLRDLLP.......... 0 0 0 0 +1107 PF02440 Adeno_E3_CR1 Adenovirus E3 region protein CR1 Bateman A anon Pfam-B_1854 (release 5.4) Family Early region 3 (E3) of human adenoviruses (Ads) codes for proteins that appear to control viral interactions with the host [1]. This region called CR1 (conserved region 1) [1] is found three times in Adenovirus type 19 (a subgroup D virus) 49 Kd protein in the E3 region. CR1 is also found in the 20.1 Kd protein of subgroup B adenoviruses. The function of this 80 amino acid region is unknown. This region is probably a divergent immunoglobulin domain (A. Bateman pers. observation). 20.00 20.00 20.20 20.30 19.80 19.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.17 0.72 -4.48 20 217 2012-10-03 02:52:13 2003-04-07 12:59:11 10 5 50 0 0 184 0 58.70 42 36.33 CHANGED oVshGsNhTLlGP.ssspVoWY.....ssshpchCptsphc......hpaoCstQNLTLlNVopsasG .....VshGsNhTLlG...P...sss...VT.Wa......ssphp.chCstsphc..........hpaoC..N.tQN..LTLlNVspsapG... 0 0 0 0 +1108 PF02439 Adeno_E3_CR2 Adenovirus E3 region protein CR2 Bateman A anon Pfam-B_1854 (release 5.4) Family Early region 3 (E3) of human adenoviruses (Ads) codes for proteins that appear to control viral interactions with the host [1]. This region called CR2 (conserved region 1) [1] is found in Adenovirus type 19 (a subgroup D virus) 49 Kd protein in the E3 region. CR2 is also found in the 20.1 Kd protein of subgroup B adenoviruses. The function of this 50 amino acid region is unknown. 26.20 26.20 26.20 26.70 26.10 26.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.01 0.72 -4.41 9 114 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 48 0 1 96 0 37.90 49 13.63 CHANGED IPsShIuIIsAVllGhslIIlChhhYACCY+Kh+.p.Kh .IPsoTlAIlsuVlsGhhllIIshhhYhCChK+.+.a.+.... 0 1 1 1 +1109 PF03376 Adeno_E3B Adenovirus E3B protein Mifsud W anon Pfam-B_3736 (release 6.6) Family \N 25.00 25.00 27.60 27.60 20.60 18.50 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.33 0.72 -4.06 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 67 0 0 48 0 66.40 59 73.42 CHANGED VuclsPDCLsPFssYLlFsFVTClslCSIlCllIsFhQhlDalaVRIAYhRHHPpYRNppVAsLLpL ...VuHAoPDCLGPFsoYLLFAhlTChCVCSIVClVITFhQhlDWhhVRhsYL+HpPcYRspsVAtLLRL....... 0 0 0 0 +1110 PF04528 Adeno_E4_34 Adenovirus early E4 34 kDa protein conserved region Waterfield DI, Finn RD anon Pfam-B_4904 (release 7.5) Family Conserved region found in the Adenovirus E4 34 kDa protein. 25.00 25.00 49.30 48.70 21.60 21.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.97 0.71 -4.61 17 117 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 86 0 0 105 1 147.40 57 51.52 CHANGED Ltshh+phlhGshhNpcahWYRchVNpthPcElhYVGSVahRGtHLIYl+l.....haDuchhsllcphs...aGhshhshGlhsshlVLsCppCs.shoEhphRsCA+RTRplhh+slpllsppsh...............ppStsEccRQ+hL+tLhpap+slhh ..........................LAsWFR+lIhGsMhNQRhPWYRplVNh.pMPKEIMYhGSVFhRGRHLIYl+I.....WYDGHsGuIlssMS.....FGWSshN...YGLLNNhVIhCCTYCp.sLSEIRMRCCA+RTR+LML+AltIlsc-TssscP..........lsSSRTE.RRQRLLRuLMc+pRPI..h................................ 0 0 0 0 +1111 PF00541 Adeno_knob adeno_fiber; Adenoviral fibre protein (knob domain) Bateman A anon SCOP Domain Specific attachment of adenovirus is achieved through interactions between host-cell receptors and the adenovirus fibre protein and is mediated by the globular carboxy-terminal domain of the adenovirus fibre protein, termed the carboxy-terminal knob domain. 25.00 25.00 65.80 65.70 17.40 16.80 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.40 0.71 -4.61 46 374 2012-10-01 20:11:45 2003-04-07 12:59:11 12 13 129 144 0 334 1 176.00 40 50.73 CHANGED LWTTPDP.SPNCpl...pp-pDuKLTLsLTK.CGSQlLuoVSLlsV.pGp.hpslssshp......shslpLhFDssGsLhs........sSslspsYWNa.................................RsssSss..ussapNAls...........................FMPNhsAYP+...........sppspu+spIhuplYLpGp...shpPlslploaNppsss.......sYSloFsashsps..Yt...s..sFsooShTFSY ....LWTTPDP.SPNCpI......ppspDuKLTLlLTK.sGuplhusVSLlsV..sGphphlsssps.....hshslcLhFDssGsLLs........sssh..KshWsa.................................Rss..sohs..ssshpsAlu...........................FMPshsAY.Pp...........spppps.cshlhussYhtup...stpPlslplshNpcsss.......sYSIoFsauhsps..ht.....s.hsF.sTosaTFSY... 0 0 0 0 +1112 PF00608 Adeno_shaft adeno_fiber2; Adenoviral fibre protein (repeat/shaft region) Bateman A anon Bateman A Repeat There is no separation between signal and noise. Specific attachment of adenovirus is achieved through interactions between host-cell receptors and the adenovirus fibre protein and is mediated by the globular carboxy-terminal domain of the adenovirus fibre protein, rather than the 'shaft' region represented by this family. The alignment of this family contains two copies of a fifteen residue repeat found in the 'shaft' region of adenoviral fibre proteins. 20.50 20.50 20.60 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.35 0.73 -7.21 0.73 -4.10 63 1159 2009-01-15 18:05:59 2003-04-07 12:59:11 12 20 137 15 1 857 6 30.00 30 24.56 CHANGED sssLslshussLslss.ssLslplussLshs ...sssLoLshusPLslss.ssLsLpluusLsl............... 0 0 0 0 +1113 PF04881 Adeno_GP19K Adenovirus GP19K Kerrison ND anon Pfam-B_6142 (release 7.6) Family This 19 kDa glycoprotein binds the major histocompatibility (MHC) class I antigens in the endoplasmic reticulum (ER). The ER retention signal at the C-terminus of GP19K causes retention of the complex in the ER, preventing lysis of the cell by cytotoxic T lymphocytes [1]. 20.00 20.00 22.00 83.50 19.70 18.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.00 0.71 -4.43 6 78 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 49 0 0 58 1 133.40 56 81.67 CHANGED CsLTFAs-ssc.CsVLIKCua-Ccsl.cIpapNKThspol.sssWpPGs.sp.YTVoV.............pGsDGo.hhsNsT....FIF.spMC..........DlsMaMS+QYsLWPP.oKENIVsFSlAaslsACllouLlslslthhlph+PRpuN.EKEKhP ...CslThus-pSc.CsVlIKCta-C.h..pITaKNKThsNsh.susWcPGD..p.YTVoV.............+GsDG.....NpT....FIF.phMC..........Dlshahu+.auLWPP.oK-NhVsFSlAaslhAClhouLLssslhhhlpp+PR.uN.EKEKh.......... 2 0 0 0 +1114 PF01065 Adeno_hexon Hexon, adenovirus major coat protein, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_885 (release 3.0) Domain Hexon is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. The penton complex, formed by the peripentonal hexons and base hexon (holding in place a fibre), lie at each of the 12 vertices [1]. The N and C-terminal domains adopt the same PNGase F-like fold although they are significantly different in length. 18.60 18.60 20.00 20.00 17.90 17.80 hmmbuild -o /dev/null HMM SEED 495 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -12.75 0.70 -5.64 36 2918 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 287 43 0 1583 0 236.80 39 78.17 CHANGED FRNPTVAPTH-VTT-R.SQRLQLRFVPVppEDspYoY.KsRFoLuVGDNRVLDMuSTYFDIRGsLDRGPSFKPYSGTAYNsLAPKuAsNNs.ap.................ssss.................spshAQAshhs...................sspsst.ttspthhsssshpPpPQlG..tss...Ws...tspsstptuuGRlL.....csssshhP......CYGSYApPTN.pGuQu................s.slpphaassssssss.........ssslhhsEsVs..LpsPDTHlVhh..ssss..sups.hs..uhPNRPNYIGFRDNFIGLMYYNSsGNhGVLAGQuSQLNuVVD.LQDRNTELSYQhhLssLsDRsRYFShWNQAVDSYDs-VRlI-NcGhED-hPsYsFPlsGlss..tsasslptsss.....tsapsssssssss.....hluhGNlsuMEINLsANLaRsFLYSNVAhYLPDchKaT...PsNls........LP.sNsNTYsYMNuRlPsssllDoalNIGARWSlDsMDNVNPFNHHRNsGL+YRSQLLGNGRYspFHIQVPQKFFAIKNLLLLPG .................................................................................................................................................................................................u.hs.........................................................t..t.h.t.....Ppsp.G.....tt.....................huRhh.....c..t............................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +1115 PF03678 Adeno_hexon_C Hexon, adenovirus major coat protein, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_885 (release 3.0) Domain Hexon is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. The penton complex, formed by the peripentonal hexons and base hexon (holding in place a fibre), lie at each of the 12 vertices [1]. The N and C-terminal domains adopt the same PNGase F-like fold although they are significantly different in length. 21.40 21.40 21.60 22.10 21.30 21.30 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.60 0.70 -4.90 23 736 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 213 43 0 552 0 193.00 81 34.68 CHANGED NsTNDQoFsDYLuAsNhLYsIPAssTslsIsIPuRsWuAFRGWSFTRlKspETPslGusaDP.FpYSGoIPYLDGTFYLoHTF++VSIpFDSSVsWPGNDRLLoPN.FEIKRs..lD...uEGYshuQSNhTKDWFLVQMhANYNhGYQGY+lPssh+.hpYuFl+NFpPMoRQlPshsp.st.achlshs.p...............aNNSGahuhps....shtt+pGHsYPANWPYPLIGpsAlts..lTp ........NDTNDQSFNDYLSAANMLYPIPANATNVPISIPSRNWAAFRGWSFTRLKTKETPSLGSGFDPYFsYSGSIPYLDGTFYLNHTFKKVSIMF.DSSVSWPGNDRLL..TPNEFEIKRo..VD...GEGYNVAQCNMTKDWFLVQMLupYNI..GYQGFal..PEuYKDRMYSFFRNFQPMSRQVVDphpYpc.YptVslsaQ...............HNNSGahu.hu......s.ppGpsYPANaPYPLIGtpAl.s.....hp....... 0 0 0 0 +1116 PF02456 Adeno_IVa2 Adenovirus IVa2 protein Bateman A anon Pfam-B_1982 (release 5.4) Family IVa2 protein can interact with the adenoviral packaging signal and that this interaction involves DNA sequences that have previously been demonstrated to be required for packaging [1]. During the course of lytic infection, the adenovirus major late promoter (MLP) is induced to high levels after replication of viral DNA has started. IVa2 is a transcriptional activator of the major late promoter [2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.18 0.70 -5.81 11 128 2012-10-05 12:31:08 2003-04-07 12:59:11 10 1 98 0 0 181 10 359.20 69 84.98 CHANGED lDpp.ht.lp-hac+ltshppslpshshu.h.....tPh....ppFsSa-chhuhuG..llpcLpcsppplccthstsphaLpssGphsSLNhshQPlIull....YGPTGsGKSQLLRNLlSspLIsPsPETVhFIsPphsMIPPpEhsAWchQlsEGNYssss-GTlsPpouTh+PcFlcMoY--hTss-NhDlscPpNlFspAAppGPlAIIMDECM-cLssp+uIShhFHAhPSKLasRaspCTGaoVFVVLHNMNPRpshuGNIssLKIQAKlHIlSs+hpP.QlsRFlpoYo+uhspsl.slLLKsIFsahpppspYsWllYNssP.pEuhpWs.hLcsppulhPhhLNlQshlacslc+Ip+hh....p-RpRappth+pKhp ..........................................LDRDAlE+lTELWDRLpLLpQTLscMPMADGL....KPL.KNFuSLpELLSLGG-....RLLs-LVRENhpVRcMhNEVAPLLR-.DGS.C..SL...N...Yp.L.Q.P.V.IGVI....YGPTGCGKSQLLRNLLS.uQLIoPAPETVFFIAPQVDMIPPSEhKAWEMQI..CEGNYAPGPEGTllPQSGTLRP+FlKMuYDDLTpEHNYDVSDPRNVFApAAA+GPIAIIMDECMENLGGHKGVSKFFHAFPSKLHDKFPKCTGYTVLVVLHNMNPRRDLGGNIANLKIQAKhHIISPRMHPSQLNRFlNTYTKGLPlAI.SLLLKDIhpHHAQ+PCYDWIIYNTTPEHEAhQWs.YLHPRDGLMPMYLNIQoHLYRVLEKIHRsL....NDR-RWoRAYRARps......................................................................................... 0 0 0 0 +1117 PF01686 Adeno_Penton_B Adenovirus penton base protein Bashton M, Bateman A anon Pfam-B_1180 (release 4.1) Family This family consists of various adenovirus penton base proteins, from both the Mastadenoviradae having mammalian hosts and the Aviadenoviradae having avian hosts. The penton base is a major structural protein forming part of the penton which consists of a base and a fibre, the pentons hold a morphologically prominent position at the vertex capsomer in the adenovirus particle [1]. In mammalian adenovirus there is only one tail on each base where as in avian adenovirus there are two [1]. 25.00 25.00 94.70 94.50 15.40 15.00 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.49 0.70 -6.25 19 217 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 130 39 0 205 0 436.00 67 95.37 CHANGED h-..hsPPRhhAPTEGRNSI+YopLsPL.DTTKlYhlDNKouDIsoLNaQNDHSNFhToVlQNsDhoPtEASTQoIpLD-RSRWGG-LKTlL+TNhPNVscFh.SNoh+s+lMss................ctssssspYEWh-lolPEGNao.stlIDLhNNAIl-pYLtlGRQpGVhcSDIGVKFDTRNFpLGhDPlTsLVhPGhYT.cAFHPDIVLLPGCuVDFTaSRLsNlLGIRKRpPYpcGFhIhYEDLpGGNIPALLDlpsYpts....................................................................................IpP.lhpDSpuRSYpVhpssos.....TtYRSWhLAY.N........spsuspspTLLTsPDlTsGltQlYWSLPDhhpsPlTF+ss.ppssshPVVGhcLhPlhu+shYNspAVYoQllcptTs.tTpVFNRFPENpILhpPPhsTlTslSENVPuloDHGTLPL+NSlsGVQRVTlTDsRRRsCPYVYKSLusVsP+VLSSpTh ............h.-s.hVPPRYhuPT-GRNSIRYS-LsP.aDTT+lYLVDNKSuDIsoLNYQNDHSNFLTTVlQNNDFTPsEASTQTINFDERSRWGGDLKTILHTNMPNVNEaMFTsKFKARVMVuR+.s..........tsDtupchLcY-WaEFTLPEGNFSETMTIDLMNNAIl-NYLpVGRQNGVLESDIGVKFDoRNF+LGWDPVTKLVMPGVYTYEAFHPDlVLLPGCGVDFTcSRLSNLLGIRK+QPFQEGF+IMYEDLEGGNIPALLDVcpY.cScpchtptttts...t.......................................................t.httt..tsstcclsIpP.lpcDspsRSYNVlpssp.......sThYRSWYLuYsYG..................DPEKGVpSWTLLTTPDVTCGuEQVYWSLPDhhpsPVTF+so.ppsssaPVVGsELhPh.huKSFYNt.AVYSQhlcp.Ts.hT+VFNRFP-NpILhRPPssTlToVSENVPAloDHGTLPL+sSltGVQ...RVTlTDsRRRTCPYVYKuLuhVsP+VLSS+Th. 0 0 0 0 +1118 PF03955 Adeno_PIX Adenovirus hexon-associated protein (IX) Finn RD anon DOMO:DM01967; Family Hexon (PF01065) is the major coat protein from adenovirus type 2. Hexon forms a homo-trimer. The 240 copies of the hexon trimer are organised so that 12 lie on each of the 20 facets. The central 9 hexons in a facet are cemented together by 12 copies of polypeptide IX. 21.80 21.80 23.30 42.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.58 0.72 -3.53 12 121 2009-09-10 22:51:00 2003-04-07 12:59:11 9 1 96 4 0 85 0 109.30 59 79.56 CHANGED -GtlhosaLTsRLPsWAGVRQNVhGSslsGtPV.hPuNStshphtphs..ushcssAAAsu..ptutssspshstsht.hstlt.ph.u........pctLthllspLcpLpppLtth .EGGlFSPYLToRLPuWAGVRQNVMGSTVDGRPV.hPANSST.hTYATVG.uSoLDosAAAAA..uAAA.TAptlAushh..us.ussssSS.....ltE-KLhsLLAcLEALoppLus.L..................... 0 0 0 0 +1119 PF03910 Adeno_PV Adenovirus minor core protein PV Finn RD anon DOMO:DM04810; Family \N 20.30 20.30 30.70 24.40 17.50 16.80 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.56 0.70 -5.37 12 112 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 88 0 0 111 0 325.30 60 96.48 CHANGED MSKRKhKEEhLpslAPEIYGP..s.hpsch..KPRplK+V........KKcc+..tc-..........p......-stVEFVRpFAPRRRVQW+GR+VpRVLRPGTsVVFTPGERSshp..KRsYDEVYuD-DILEQAApphGEFAYGKRuR..............-.lulsLDpuNPTPSLKPVThQQVLPs.s......+RGlKRp.ttcl.PThQlhVPKRp+lE-VL-phKs............................................................-PslpPEVKV..RPIKpVAPGLGVQTVDIpIPs............-s............M-sp....sc.P.s.s.................................hsshpl....QTDPWh.hs..........ssppp+ppR+YGsAstlhPpYsLHPSIh........PT.............PGYRGpp.apsphptssRRRpssspRp+p........hsPstlhRshsRRG+..pls .......................................MSKRKhKEEMLQVlAPEIYGP...s..ts-p...KPRKlKRV................KK+c..c...tt................s........-stVEFVRpFAPRRRVQWKGR+VpRVLRPGTsVVFTPGERSusp..KRsYDEVYGD-DIL-QAA-RhGEFAYGKRuR.............p-hlulPLDcuNPTPSLKPVTLQQVLPsss....pRGlKRE..upslhPTMQlMVPKRpRlEDVL.-p.hKh.......................................................D.tlpPE.VKV..RPIKQVAPGLGVQTVDIQIPs...................s.....................M-sppc.P...................................................................ooohEV....QTDPWhsssss............sssp.pRRR+.aGsA.StL....hPsYsLHPSIl...............PT.............PGYRGop.Yt...sRp.psup+RRppsspRRR.........lsPu.tVpRVshRcGR..hlh................ 0 0 0 0 +1120 PF01310 Adeno_PVIII Adenovirus hexon associated protein, protein VIII Finn RD, Bateman A anon Pfam-B_1405 (release 3.0) Family See Pfam:PF01065. This family represents Hexon. 25.00 25.00 49.30 40.90 20.60 20.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.58 0.70 -4.53 17 163 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 109 4 0 124 0 206.60 56 98.48 CHANGED cphPTPYlWpYQPptGhsAGAuQDYoo+hNWl........sAGPpMhs+lpslpspRNpllhppAthspsPhss.N.ssWPAs.lhp.sss...Ppslhhs+spsh-pth.........TsuGhQLAGGut................................hthpG.hQLss........P.sttth..RPsthh..QLuGuuh......hs..ppshhhhppusS..lPRoGGluspQFlcEFsPsVY.pPF.SGsPssaPcpF.s.Ycssssshcuas .............................................K-IPTPYMWSYQPQhGlAAGAuQDYSo+MNWL........SAGPpM..ISRVNslRsp.RNpILLcQAAlTsTPRspLNP.sWPusLVYQEhPs...PTTVlLPRDAtAEVpM..........TNuGsQLAGGuphs.hts.......................................ttuIKpLhIRGRG.hQLNDEh.............VSSuhGL..RPDGlF..QLuGuGR....SSFTP.pQAhLTLpoSSS..pPRSGGIGTlQFVEEFlPSV.YFNPF.SGSPGpYPDpFIPNFDAls-uVDGYD..................... 0 0 0 0 +1121 PF02459 Adeno_terminal Adenoviral DNA terminal protein Bateman A anon Pfam-B_1602 (release 5.4) Family This protein is covalently attached to the terminii of replicating DNA in vivo [1]. 20.60 20.60 21.20 243.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 548 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -12.75 0.70 -6.34 17 138 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 99 0 0 157 3 552.20 65 87.77 CHANGED MLhsLuPpsPsTtphPhhthPPPHLLlGYQYlhRshNDYlFDsRsYSpLpYpphhp.t...pplsWoshssCSYoINTGuYpRFl.Dh-....NFp-TlsplQpAlLh-RllADLulht...hRGhGhs.h......tttpls.......ptl.pcphpplsthpspuhGhupchRlp..........pAup+DhslLssIR+L+sAhhpFLlspth..........tstcshLsLP.....s-s.sWLsuFlccFuc....php............tphsspphh+sllosLoLsps..........tusshsGGsFp...........LRPRE.sGRAVTEo.MRRpRGchlcRFlDRLPlppRRRRh..ssP.P..s.ptt.tttth....-Ep.p.............tFt-EVhsolsEsIchLp-ELTssARpppFFsFAscFYcll.chcsh..uclsEthLRRWlhYFFlsEHIAoTL.YLappLphpt.FtRaVplphsQVlhRuRDtsGp.laoRVWsEpupsA..FppLhpRIhpDlhshlERAGct-..hpp-Eh-pFhs-Isap-sSGDlpEIL+QlslN-s-IDSl-lSFRhKhoG.VsaSspcpIps.sRRVlptA..LRppc.ps ..................MLEDLAPGAPATpRWPLYRpPPPHFLVGYQYLVRTCNDYlFDoRAYSRL+YoEllpPG..hQTVNWSlMANCoYTINTGAYHRFV..DhD....DFQsTLTQlQQAILAERVVADLALlQP..hRGaGlTRM......tt-ppls.........................lERLMpDYYKsLuRCQspAWGMA-RLRIQ..........QAGPKDlVLLATIRRLKsAYFNaIlSshsu......................ppst.tpTsLSLP...............CDC.DWLDAFlERFSDP..VDLpsl.........................htssPTtQLI+CIVSALSLPNG.s................s...phpchpGGVFp...........LRPRE.sGRAVTET.MRRRRGEhIERFlDRLPVRRRRRRs...PPP....Ps......sP.E..pt...........t.E..............hh.......E.......EEEEEt......................................ssFEREVRsTlAELIRLLE-ELTVSAR..NuQFFNFAVDFYEAMERLEAl..GDloEhsLRRWIMYFFVsEHIATTLNYLFQRLRNYAVFsRHVELNLAQVVMRARDs-GsVVYSRVWNEsGhsA..FSQLMuRISNDLAATVERAGRGD..LQEEEIEQFMuEIAYQDNSGDVQEILRQAAVNDsEIDSVELSFRFKlTGPVsFTQRRQIQDlNRRVVAHAStLRAQ+p......... 0 0 0 0 +1122 PF03228 Adeno_VII Adenoviral core protein VII Bateman A anon Pfam-B_3049 (release 6.5) Family The function of this protein is unknown. It has a conserved amino terminus of 50 residues followed by a positively charged tail, suggesting it may interact with nucleic acid. The major core protein of the adenovirus, protein VII, was found to be associated with viral DNA throughout infection. The precursor to protein VII were shown to be in vivo and in vitro acceptors of ADP-ribose. The ADP-ribosylated core proteins were assembled into mature virus particles. ADP-ribosylation of adenovirus core proteins may have a role in virus decapsidation. 21.20 21.20 23.80 23.80 20.80 18.30 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.78 0.71 -3.51 14 129 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 102 0 0 107 0 116.10 57 66.84 CHANGED MuILISPSNNTGWG.LGs.ppMaGGA++hScpHPV+VRsHaRAsWGuhp....GRssh..h..th....t.hp...ssthssTsDsVhcs...lsAsuRtht+p+RRhc....h.sRR+thttsotAhRt........ARu ..MSILISPSNNTGWG..Lss.....Sp.....MY.....GGA++RSsQHPVRVRGHaRAP..WGAhKtt..........ttsRTT..VDDVIDpV..VADARNYT...s..ssSTVDuVIDS...VVADARsYARRKpRpR....RhARR+ps...TsAMRAAR.Alh................................................................... 0 0 0 0 +1123 PF04439 Adenyl_transf Streptomycin adenylyltransferase Kerrison ND anon DOMO:DM04121; Family Also known as Aminoglycoside 6- adenylyltransferase (EC:2.7.7.-), this protein confers resistance to aminoglycoside antibiotics. 20.60 20.60 20.60 20.90 19.60 20.30 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.90 0.70 -5.40 6 625 2012-10-02 22:47:23 2003-04-07 12:59:11 7 5 514 1 75 442 9 256.10 36 96.18 CHANGED MRoEpEMhDllLchA.pcEclRlVsLpGSRTNpNlsKDcFQDYDIsYhVoDl-sFIp-csWLppFGchIhhQcPEDh-h...Fss-hsptauYlMLF-DGNKlDLTLhshc-hpcah.......-sDsLhKlLlDKDshl.p.lsPsDppYalK+Pop+EFpcCCNEFWhVosYVsKGlsRcEIlaAhDHhppIlRsE.LL+MluWaIu.p+GaplShGKNhKahc+YLsschWpch.uTashsuYcchWpSLFhsppLF+phupcVupthsYsYP.-h-.csIpcYhcshhpp ......................................MRoEpEhhplllphA.p-c+IRsVhhpGSRsNs.......ps.+DpFQDY..DIsYhV..p..-..hp...s.a...hpst........s........WlppF.Gp.h.lhhQpPcc.hph.......hs...s..p....p...p...t..hsYL..MhF.p.D.G.s+IDLTLhPlcplcpah..................................pt......Ds.....LhplLlDKDshh.....t....s.s..o..-.psY.hl.pp...P..op.p-Fp.csCNEF.WasosYVsKGLhRcElhaAhshhpths+pp.Ll+hls.Wpl.uh.cpsa.p.l.s.lGKphKalppY.l.st-h..hcphhpoashssh-phWpuLhhhtpLF+phupclA.pp..htapYs.phs.cpltpYhpph...h......................................................... 2 29 51 60 +1124 PF01928 CYTH Adenylate_cyc_2; CYTH domain Aravind L anon Aravind L Domain These sequences are functionally identified as members of the adenylate cyclase family, which catalyses the conversion of ATP to 3',5'-cyclic AMP and pyrophosphate. Six distinct non-homologous classes of AC have been identified. The structure of three classes of adenylyl cyclases have been solved [3]. 23.10 23.10 23.10 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.36 0.71 -4.61 51 3690 2012-10-01 23:11:28 2003-04-07 12:59:11 16 10 3229 34 930 2667 248 182.60 21 64.24 CHANGED hhElEhKhhl....t..t......htphpspthshhpppshYasss.....shphttpp.tsLRlR............................sptth.lThKsst..t......phpsths..........................h.t..hhtlhsths.....................hp.h..sslpppRptapl....tt........stlslDtschl............hs.hElEhpspstpph.phhcp...........t..t.hth.thsthtRsYhphhtt ...............................................hEIEhKhhls.........tthpt.l...............htph.....t.....h.p....t.t...s.....h.....p......t.sh....Y.a.-Ts........c....L...t.t.p...c...huL...RlR...........................h.....s..st....th...hTlKs..ss.......................hsshhp..t....Eh.ph.ls...................................................hst..hht.h.htths.........................lp.s..l...hs..hshh.R....pphhl...............st...................hplslD....p....schh..........................sh.s...-hEL..El....p..s...s..s....h....p...t....h..h..p.hhpp..........h.......t....h......ts.....schtRhh..h...st..................................................................... 0 244 546 742 +1125 PF01295 Adenylate_cycl Adenylate_cycla; Adenylate cyclase, class-I Finn RD, Bateman A anon Prosite Family \N 21.80 21.80 25.30 25.30 21.30 20.70 hmmbuild -o /dev/null HMM SEED 605 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.99 0.70 -6.25 18 1028 2009-10-20 15:17:26 2003-04-07 12:59:11 13 6 915 0 113 599 28 520.90 64 68.95 CHANGED -WlDhGGhuplsApEYFGASLWQLYKGIDoPYKuVLKhLLLEAYStEYPNTcLluhphKp+hh........sschs.paphDsYhhMLE+VTpYLpplsDhpRLDhlRRCFYLKssEslu...p..tsssWRpphLscLlspWsWocpplppLDpRpsWKIcpVcps+spLl-tLMtSYRNLIpFAR+pplssSIsPpDIuILoRKLYuAFEsLPGKVsLlNPpIS.sLSEssLTFIpVpp.s+t...psGWYLhNQuPcstthsspphlEasc.LsKLVAWuYFNGLlTspTpLHlhspsscl..spLppFlsDL+toFPlp.sspsospsLppPCEIRpLslhlNLppDPTp+h..pslchchpsoDlFSFGpppcsLVGSIDllYRNsWNElRTLHFcGspAlLcALKsl.sKhHpsussPcSlcVFCYSp+hRuplcshVtsLlpcCIplpLus..ppptpphpsL+luGcsatlFFEc+GlSlQcLpsshsh..............aspIoppKht..sh...ttppppphPt.IDuFASEGhlQFFFEssc..puFNVYILDEsNclElYppCsGsK--hl+clN+hYsssp.cptpsstphl..NFNhPQFYpllps......sstlpllPFpuptptp ........................................EWLDLGGLSoLSAEEYFGASLWQLYKSIDSPYKAVLKT.LLLEAY........SW....E.YPNs.+LLAcDlKQRLa........sGEls...saGLDPYshMLERVTcYLstIp..D....TRL.DLVRRCFYLKVsEKLS....p.p.p.u.ssuWRRtlL.spLVpEWsWsc.sp.LshLDNRusW.KI-QV+csHscLLDAhMQSYRNLI+.FA...R.RN..slos.S.hSP...QDIGlLoRKLYA.AFEsLPGKVTLlNPQISPDLSEssLTF..I..VPs...G..Rs.....N+sG...WYLY..N..puP...........shc..sI..l.u.a..pPLEYNRYLsKLVAWAaFNGLLT.upT+Lal+ussh.s...sKLpchVuDl.pppF.P.LR....ls.sPT..sc...A.....LhoPCEI.RcLAllVNLEhDP.Tutapsp.sl+hDh+clDlFS.FGppQpCLVGSlDLlYRNSWNEVRTLHF.sGEpuhl-ALK.TlL.....G.KMHQDA...s...P.P..-.SV-VFCYSp+LRGlIRsRVpQLVuECI-LRLus..pppcssRFKALRVuGQsWGLFF.ERhs.VSVQKLENul-F.........Ysu.ISp.NKL+.Ghslpl....cssps+.....L.P..sVVDuFA.SE.......GlIQFFFE....-o....t..p.p.GFNIYILDEuNRlEVYHcC-GoK-ELV+-VsRFYoSu+..Dc.s.hu..ushI....NFN.LP.QFYQIVps......-G..ctpVlPFRsp...p.h.......................................................................................................................... 0 16 40 81 +1126 PF00709 Adenylsucc_synt Adenylosuccinate synthetase Bateman A, Griffiths-Jones SR anon Pfam-B_690 (release 2.1) Domain \N 19.90 19.90 21.30 20.20 19.00 19.70 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.50 0.70 -5.82 92 5395 2012-10-05 12:31:08 2003-04-07 12:59:11 16 13 4698 55 1514 3933 4438 387.40 46 96.96 CHANGED sslllGsQWGDEGKGKlVDhLu.......pcsDhVVRapGGsNAGHTlVss....s.c+atLHLlP..SGllpsss.hslIG.NGVVlcP.sLlc.Elcp.Lp.ppG.lsh..scLhISccAHllhPaHptlDthpE....t..pupt....pIGTTt+GIGPsYpDKsuRp.G.lRlsDL..h.c......chhpc+Lcph..........lp.p.Nthh.ppha.........t....p.h.shcplhpchhphsc............................................plp....sh..ls.Dsshhlpculcp.sKplLaEGAQGshL.DlDaGTYPaVTSSsssuuGsssGsGluPppls..pVlGVsKAY.oTRVGsGPFPTE..lh........s-......hG-plp.............ct..............................................G...p....EaGsTTGRsRRsGWhDhVhl+Yus.plNG.hsslslTKLDVL....sshcclKlCsuYch....sGc....................hlc.thPss.httlpcscPlYEphsGW.p....pchssscp..a--LPtsAppYlctlEchl.uls...............lshlSsGPc.RcphIh ....................................................................................................................................................................................s.lVVlGsQW.GDEGKGKIsDh..Ls................ppuchVsRa.Q...GGpNA..GHT..lVl................s............G..pcahLHL.lP..S.GIh.pts......h....hslIG........NGV..VlsPtsLhc..Elpt...Lc..p..p.......G.......lss..........ppLhI...ScpA...p...............l...IhPYHhtlDphpE................pt..+Gsp.........pIGTTt+GI.......GPu.Y.....t.....DKs.....u.............Rh...G.....lRluDL................h..-t.....-thtc+Lcps...............lph+..Nhhh...pha.......................................tt.......psl...sh.cclh..c...c.hh..t...hu.c............................................tlp.....sh...ls..DsshhLs.......p.Ah..........c.p....G...c.plLFEGAQG.............shL..DID.a.........GT.Y.P....aVTSSNssAG.Gs...ss.G.u.G.lGPptls....pVlGlsKAY.oTRV......G.........s......G..PF..PTE..Lh...DE...........................hG-.tlp..............ch..............................................................G....p....E.........a.Gs.TTGR.R..RsGWhD...sVslRhuh.p.......l......sG.l......os..lsLTKLDVL........s..Gl.c...plKlCluYch........-Gc.......................pls...hP..hs...hpth..ppscPlYEphP.GW...p....Eshtus+s.....hc-.........LP.psAppYlc..................RlEEls.Gl............................lshlSsGPcRppTh........................................................... 0 480 920 1248 +1127 PF00106 adh_short short chain dehydrogenase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family contains a wide variety of dehydrogenases. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.81 0.71 -4.33 230 80936 2012-10-10 17:06:42 2003-04-07 12:59:11 20 600 6093 916 30667 110825 42278 163.30 25 56.70 CHANGED GTlLlTGGoGuLGttlARaLspp.Gsc+LlLsSRp.............u.tusuuscL.....hs-Lp...thG..ApVplsu.sD...luDc.sslssllssls...t.tPlsuVlHsAGl...lc..Duslssh.....os-chs..pVhpsK....ssuAhpLcclstc....hs......LstFVlaSSsAulhGusGQusYAAANuaLDuLAcpR+upGL ...................................................................................ssll...T...G......u.........u.....p.........G.....I......G....t........u.......h.............A...........p.....t.......h....u........p........p.....u............u......................h.........l.........l....h...s....s.ps..............................................pt..h.pph....................hp.p.lp.............t....G......s........p.........s.........h..........s................h..........t......h..........D.................l....s......s........t.......p...........s.......l.........p..............p.........h.......h........p......p............h...........h...........t............p..........h..........G..........p............l.................D.............l.....L..........l...........N.....N.......A.........G....l.....................tt...............s........s.......h......t......c.h..........................s....c....p.......a....c............p...h.....h..........s..l..N.................l.h...u....s....h....h....h..s....p....t.hhshh.................................htps........tG.p..I..l..s..l........u...........S............h............s............u............h............h.............s..............................s............t..............................s............s............Y..........s.......A....o..K....tu....l..h.u.hscslsh-..t...................................................................................................... 2 8711 17860 25336 +1128 PF04619 Adhesin_Dr Dr-family adhesin Kerrison ND anon DOMO:DM04566; Family This family of adhesins bind to the Dr blood group antigen component of decay-accelerating factor. This mediates adherence of uropathogenic Escherichia coli to the urinary tract. This family contains both fimbriated and afimbriated adherence structures [1]. This protein also confers the phenotype of mannose-resistant hemagglutination, which can be inhibited by chloramphenicol. The N terminal portion of the protein is though to be responsible for chloramphenicol sensitivity [2]. 20.60 20.60 20.70 22.00 20.50 19.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.76 0.71 -4.46 6 61 2012-10-02 17:35:21 2003-04-07 12:59:11 7 2 5 49 0 67 0 136.70 49 86.71 CHANGED uFTsSGoTGTscLTVTEECpVpVsspssoKpRu-LsDuAhlGslolsApGCsTcp.sAL+AsusNYcssp.hhLhp-stps+lsV....sltAsDGs.sWTsDsushYRsssGsWsGolhllVcGDQsspPsGsYTLNL-GGYWsp ..............FTsSGoTGTscLTVTEECpVpVssho..soKsRupLs.-ustIGslsVpApGCsscQ.lAL+AssDNa-psp.hahhp-NspDKLhV....slpusDGS.uWTsDsGVaY+s........csGsWGGplsl.hVcGDQTspPsGsYTLsLsGGYWsc..... 0 0 0 0 +1129 PF03257 Adhesin_P1 Mycoplasma adhesin P1 Mifsud W anon Pfam-B_4117 (release 6.5) Family This family corresponds to a short 100 residue region found in adhesins from Mycoplasmas. 25.00 25.00 42.90 42.90 19.10 18.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.06 0.72 -3.67 7 75 2009-09-11 06:42:52 2003-04-07 12:59:11 8 2 7 0 10 73 0 87.40 58 10.15 CHANGED pGSspptGS.oGo..SAGNPDSLcpDKlspSGpshTs.p......ouspssTNYTNLPP.s..........lTPTuDWPNALSFTNKN..NAQRsQLFLRGLLGS...IPVLVN+ ..................................ptttS...uo..SAGNPsSLcpDKlop..SGps.os.p.....shsppcsTsYTNLPs.s...........loPTuDWsNALoFTNKN..NsQRsQLhLRGLLGo...IPVLlNK........... 0 9 9 9 +1130 PF00406 ADK adenylatekinase; Adenylate kinase Finn RD anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null --hand HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.92 0.71 -4.33 23 9734 2012-10-05 12:31:08 2003-04-07 12:59:11 17 83 7181 94 2694 6272 2767 160.60 42 74.26 CHANGED llGsPGuGKGTQuppIsccaulsHlSTGDhLRApl.puGTplGcpAKphMDpGcLVPDElslsll+-cltps..ssppGFLLDGFPRTlsQA-uL-c....tshplDhVlpl-Vs--lllcRlouRhlp.ssGpsYah.apPP.c..hhsDhsup.LhpRuDDstEol++RLpsYpppTpPlI-aYppcGh ..............................................hhG.PGuGKGTp.........sth.lh...p....p....h..........t..h......s..p..l..S................sG....DhL.....R....u.........s....l......c....s..............s..........T......-.......L..........G............p..............p..................A...........K............sh...h..D......t...................G..p......L..V.......s....D.E.....l...l...........l...u...l.......l.+....-.R.....l...........s...................p...............s...........D..............s................p........p..........G.........F.......L.L.D.....G.F...PR.......T...ls.......QA.........-...u...........L.............c............c........................................................h..........u..............h.........s..............l..............D..........h..........V....l....ph..-.V.s.-..-.lll.c....R..hsGRthp.ssGtsYH.hapss....................................................................h.tshsDhsuttLh.RsDD.p.pE..T..l.+.p...Rlt..a...h.....p.........s...t...PlltaYtt...h............................................................................................................................................................................ 1 926 1587 2197 +1131 PF05191 ADK_lid Adenylate kinase, active site lid Finn RD anon Manual Domain Comparisons of adenylate kinases have revealed a particular divergence in the active site lid. In some organisms, particularly the Gram-positive bacteria, residues in the lid domain have been mutated to cysteines and these cysteine residues are responsible for the binding of a zinc ion. The bound zinc ion in the lid domain, is clearly structurally homologous to Zinc-finger domains. However, it is unclear whether the adenylate kinase lid is a novel zinc-finger DNA/RNA binding domain, or that the lid bound zinc serves a purely structural function [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.93 0.72 -4.29 141 6209 2009-01-15 18:05:59 2003-04-07 12:59:11 9 17 5432 61 1176 2972 552 36.00 57 18.92 CHANGED RRlCts..CGpsYH........lhasP..PKhpGlCDhCGs..pLhpR.sD ....................RRsHhs..SGR.oYH........shaNP.........PK..........hEG.........K............D.Dl.TGE...-Ll.RcD.............. 0 385 706 964 +1132 PF05221 AdoHcyase S-adenosyl-L-homocysteine hydrolase Finn RD anon Pfam-B_157 (release 2.1) Domain \N 25.00 25.00 25.20 27.90 24.40 24.20 hmmbuild -o /dev/null --hand HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.67 0.70 -5.45 9 2525 2012-10-02 14:31:05 2003-04-07 12:59:11 12 10 2027 95 1084 2299 4318 421.00 48 95.54 CHANGED tcYKVtDIuLAsFGRKEl-lAEsEMPGLMshRccYGsSQPLKGA+IuGCLHMTlQTAVLIETLsALGA-VRWuSCNIFSTQDaAAAAIA.........tuGlPVFAWKGET.EEYhWCl-pslp.asDG..tssNhILDDGGDhThLlpc.........................................KasphhtsIhGlSEETTTGVH+LYch.ppGpLhhPAINVNDSVTKSKFDNlYGCR-SLlDGl+RATDVMlAGKlAVVsGYGDVGKGCAuuL+GhGARVlVTElDPIsALQAuMEGapVsslE-lspcucl..hlsTsssssIlsscch.hhppsslssshtah-h-.psthhphsupcphsIKPQVDcahh.sGp+IILLAcGRLlNLGCATGHPSFVMSsSFTNQVLAQlELWspt..............ucY....cssVYlLPKpLDEcVAtLHLupLss+LTKLoccQAcYlGlPlpGPFKsDHYR ...........................................................................................................................capVtDl.u.LA..saGR+EIclAEpEMPGLM.u.lRccaus..pp.PL+GARIsG.s.LHMTlQTA...................VLIETLhALGA.-.V...R...WuSCNIa.STQDcA...AAA.l.A.............t.s.G.l.P.V..FAWK...GEo.-EYahshcpsl..a...........s.s...................ht.s.NhILDDGGDh.Thhl+..p.................................................ca..sch..hp.p...l............+G..loEETT......TGV.+.RLYphtcp....G...pL.hhPAI.NVNDSVTKSKFDNhYGCRcSLlDGIpRuTDlhlAGKhsVVsGYGDVGKGsAtuL+uhGApVhVTElDPIsALQAsM-GacVssh--ssppuD................................................................hhssssssphhhtphhthhptsshssssthtp.p..hthhthpshphhph+splcchhhssG+plIlLuEGRLlNLGsAoGHPSF.VMSsS.FsNQ...s..LAQI.ELapp.t.........................................s.pY......c...p.VY..h.LPKcL...DEcVApLHLpplGscLTpLop-QAcYlGlshpGPaKsphYR........................................................................................................................................................................................................................ 0 369 670 896 +1133 PF00670 AdoHcyase_NAD AdoHcyase; S-adenosyl-L-homocysteine hydrolase, NAD binding domain Bateman A, Griffiths-Jones SR, Finn RD anon Pfam-B_157 (release 2.1) Domain \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.03 0.71 -4.40 16 2557 2012-10-10 17:06:42 2003-04-07 12:59:11 16 15 2063 95 1090 4107 3855 158.70 52 35.69 CHANGED NLYGCRESLlDGI+RATDVMIuGKlAVVsGYGDVGKGCAtuL+uhGARVlVTElDPIsALQAuMEGapVsTlE-ssccucIFVTTTGN+DIIps-HhppMKcsAIVCNIGHFDsEIDVshLpss.uhcp.slKPQVD+aphss.G++IILLAEGRLlNLGCATG ..............................NhYGCRcSLl..DGI...p....R..u..T.D.l...h...lAG...K.....s.....sV.Vs.G.Y.G.D.............VGKGsAt.uL+.uhG.A...p...V.h.....V..T.....E...l..D.......P....I...s........A......L....Q....A.........s...M....-......G.......a....c.....V.....s.............s.............h.......-............-.........s............s...p...............p........u.....D...I.....a.....V....T....s.....T....G....N....p...c....lI...st.-.Hh.c.t..MKc.pA..IV..s.N.....I...G..H...FD..sEI.D.....l..s.s.L.cph....th..........p.h.pp.l.K.P......Q......VD...c.........h..........h........h........s......s........G.............+....p........llLLAE..GRLVNLGsATG........................................................................... 0 375 679 903 +1134 PF03747 ADP_ribosyl_GH ADP-ribosyl_GH; ADP-ribosylglycohydrolase Bateman A anon COG1397 Family This family includes enzymes that ADP-ribosylations, for example ADP-ribosylarginine hydrolase EC:3.2.2.19 cleaves ADP-ribose-L-arginine [1]. The family also includes dinitrogenase reductase activating glycohydrolase [2]. Most surprisingly the family also includes jellyfish crystallins [2], these proteins appear to have lost the presumed active site residues. 26.10 26.10 26.20 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.93 0.70 -4.63 176 3139 2009-01-15 18:05:59 2003-04-07 12:59:11 9 53 1913 22 1016 2670 767 273.20 22 79.26 CHANGED GullGtAlGDALG....h.shEh...hshs..plp.t.ht............................................................hh.ht..htsGph.....T....DDTphslslsculhp..........................................th.....s..ps.........hhpc.hht...........Whp...............................t.shshG.....ssstpul...............pph...ht..ut......t.t........................................................................ssus.GuhMRssPl.ul..hh...........s.pph...hp.hutpsutlTHssspuhtuuhhhAhhltth.lpGp.....s...htphhttt..................................th..tppphtptlpthhphtppst.............................................htthtssshshcslshulhshhp................sss........apculhtsls....hGGDoDosuAlsGulhGAha.G.h....ps...lPpp..W .................................................................................................................................................................................................................uslhGtslGDA.hG..........h.shEh..........hshp.........plp..t.hs.h..............................................................................h.h.....h.ttu...ph.......................T.......................DDTt..hslslscuLhpt............................................tth........s...ps.................hhpp.h.t..................Whp..................................h.ttshG..................ss..sptul....................................pth.......pp.....st.............t.......................................................................tuhsN....GusM..RlsPl..uh.......hhst..............shpph.................hp.hsths.......u....ph..THsp.....s..h.u.ltuuhsh...Ahhlths....lp..Gt.........s...hpphhpth..................................................................................................................tt..pht.t.plphhhphhpptt.............................................................................hht.h..t.sss....t.....sh.-.sl.s.sAlhhhht.......................................................sss..........................hpcsl..hh.sss....h.G.G.........DoDThuA.........hsGu.....lsGAha.G.h.....ps........ls................................................................................................. 0 390 677 884 +1135 PF04587 ADP_PFK_GK ADP-specific Phosphofructokinase/Glucokinase conserved region Waterfield DI, Finn RD anon Pfam-B_4731 (release 7.5) Family In archaea a novel type of glycolytic pathway exists that is deviant from the classical Embden-Meyerhof pathway. This pathway utilises two novel proteins: an ADP-dependent Glucokinase and an ADP-dependent Phosphofructokinase. This conserved region is present at the C-terminal of both these proteins. Interestingly this family contains sequences from higher eukaryotes. [1,2,3]. 20.90 20.90 24.50 23.80 20.30 18.30 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.51 0.70 -6.27 9 234 2012-10-03 06:25:16 2003-04-07 12:59:11 10 6 153 7 154 223 1 386.20 27 87.69 CHANGED slh+shulhhAYNsNlDAIh.hLpsE.....slp+hIpchGtcclh+t.EEhP+cIpp..-hlupllaul+pGKsAEl.lhs-c.l+pahtcpat..aDp.R....MGGQAGIhANlLuultshpVIsasPhLuKh.ApLF...sslhhPshEpGcLhLh..+E.acts-...sscIphIaEFp+G..FKlh..phpAP+usRFIsuuc.ps.s+l.h+--hpchLpEIuc.ps-hAIlSGhQulp.....ucstph.l+ps+EclplLpp.spsI+sHLEFAShs-cclRccll..plLshspSVGhsEsElApllplLGhc-LAccIhshsh.l.-ss.lhtuhlhL.c..s..............lchlphHTlhYhhhlT+tcs....EclccuLtFuslhAAs+AphGsIps.--l+.GLcVPhscpuphlcth.chcap.tcsh..h..........chc-YplshlPT+lVpcPhSTVGlGDTISouA.Flo.h .....................................................................................................................................tlhluhssslDhlh.hl...........................ph.lpt...hs..h..p.h..........t.t..pph.....lps.t-l.htshhahhppGtuAEhhh.s...pp.hpphht...tht.......p..p........hGGs...Aulhupthu...h..t.s....hpV...lltsshhs+...tphh.....tt.l.hhP......tp...............................schHhIhEYptG....tphu....phpuPpuNRaIhspD.pN.sthp.....hh-.a.hp.........tLpc.......hp......sDlsllSGlp.hhc........s..p.s....tchhhccl...tc.l......p.lpp...spsl..l.HhEhAShsspclhpplhp.plh.PhVsSlGhNEpELh.lh...pshstsc.s.....h...thst..l..pss...l.chh...h...hlhcphs..................................LsR.lHhHTlsYphhhshpt.........hc.hppshthuuh.stssts.pt...........ss.hts...h-.s....c...h.s..L.chs..phhs.p.h..ct........c...h.h...p....tpsh.....h...........................pppshphhhsP.shlscpPh.p..TlGlGDsISutu.hh........................................................................................................................... 0 42 64 112 +1137 PF01117 Aerolysin Aerolysin toxin Finn RD, Bateman A, Griffiths-Jones SR anon SCOP Domain This family represents the pore forming lobe of aerolysin. 24.30 24.30 24.60 24.70 23.60 24.20 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.28 0.70 -5.51 14 252 2012-10-01 20:43:00 2003-04-07 12:59:11 15 9 61 14 43 279 4 219.70 45 72.35 CHANGED ss.h-VptcllspcspF.l+PluhL.AHhLGYAWsGGspupaVG.EDhslpRs....uD.uW.lpusssGs...CsGYR.CsE+hphplsNFpaslsspshpaGssp.pc+chlpTlsuhAhNpoDpspp.hllshphspoTsW.....SKTssashu-plthcspFcaP.hhG....cT-lslphpAsQsaoso.Nusopops.shpspspVPs+Sph.lplcla+oplshPYchph.hsYDl..phsGFLRWGGNAhh.sHPs.........NRPshsaTFs..hupspptupslhYQasH+hIsGps+aWDWsWslsc..GhushpaAhu....ssl.R.atuhloGpFpAtupasusI-hupshslsup.....phsphstpspttssshpl.hsshshcpLst ......................................................................................................................................................................t.t....h..h.hhN.sp..tp....hpht..h..ptsp...p..h.....shossht..hupp..lth..p.pa.phP..LsG...........coElulEluA..s..QSWAop..pG..GSTT.pol..S...ps...R..s...TVPs+SplPV+ltLYKusISYPY-FKAclp................................................................................................................................................................................................................... 0 15 29 36 +1138 PF05110 AF-4 AF-4 proto-oncoprotein Moxon SJ anon Pfam-B_6407 (release 7.7) Family This family consists of AF4 (Proto-oncogene AF4) and FMR2 (Fragile X E mental retardation syndrome) nuclear proteins. These proteins have been linked to human diseases such as acute lymphoblastic leukaemia and mental retardation [1]. The family also contains a Drosophila AF4 protein homologue Lilliputian which contains an AT-hook domain. Lilliputian represents a novel pair-rule gene that acts in cytoskeleton regulation, segmentation and morphogenesis in Drosophila [2]. 29.90 29.90 30.20 30.70 28.90 29.80 hmmbuild -o /dev/null HMM SEED 1191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.23 0.70 -14.19 0.70 -6.69 6 483 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 78 0 208 431 0 610.70 28 90.08 CHANGED Yc-DRNlLRh+E+ERRNQEsQQ-c-AFPsshPLFuEPYK....TuKGDcLSSRIQsMLGNYDEMK-FLos+Sp.s+LsuhPpssss.SsscKscPpah.s-pRuputsuS.......pQSs+sossGPsPs.....ushSpSpKtops.....sS.+s+uussPuo..uoQc+spp........ptspp+tpupptu...-pu.p+p.SPhhuplspssss............pppuSS+..s..puusssuKpphpuKSP+-h-sshps.ppP.suhs.......uSuQhssQsF.PPSLhSKousMQQKPTAYVRPMDGQDQAPsESPELKsS...tEsYusQSapss.s-hKssA..KAKLoKL+IPSQslEsshSs-ssCVEEILKEMTHSWPPPLTAIHTPu+sEPSKFPFPTK-SQalosuoppQKphDsss+opsssppsp.SMLEDDLKLSSSEDSDsEQss..............-KsssRssP...sssspspsEsussu+uuSu.ScSsSESSSuSDSESESSSSDS...EsNEP.csuoPEPEPPoTNKWQLDNWLsKVssppss........PsusssopsPppps.cscG+spususp..s-s+-s..pSos+sttcsRsspKuPcsG+s+.QKSPspu....................-..sstRRolGKKQPKKsEKssus-t.psuh+sEpEsu..shtpusphssc+sKspTKGsp+sus+KEPKsulsss................uEK+KaKuso+sssKS+EhlE..TDSSoScSspc.......cuLP.......ssopSsGsscSspt.........RTsss.pushspssht+pp.....hhlPhc-sELLSPL+Ds-shps........LhVKIDLsLLSRIPt+.P.tcus.s+sscccs.stsp+csScstSEKuSsKuKRK+Ks-p-sctsspKKs+lEccsp........osuss.uspcsSps+sSpppStsKcc-hLPsP.u...Plsspo.Kst+su.KRshsppsosusssPpusspspSussssossKp++sEuKGusopctt+ssSsssssp...................................................................hPlPsLosusoKspRsKLsFDD.spsADaaMQEAKKLKHKADAhsD+FGKAlpYL-AVLSFIECGNAhEpsu.EuKSPYsMYSETV-LI+YsM+LKsauuPsAostDKpLAVLChRCpSLLYhRMF+LKKDpAlKYS+TLsEHFK.NSSKsAQAPSPsh...........u+usGsPSPlS.PpsSPuoSVGspSuuuSu.uusu..uoVoIPQ+IHpMAASYVsITSslLpAa-lW-QAEpLo+ENKEFFu-LDplMGPLshNSS.Ms-LV+.......YoRQGLphLRpsA+ ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 20 34 93 +1139 PF03969 AFG1_ATPase AFG1-like ATPase Bateman A anon COG1485 Family This family of proteins contains a P-loop motif and are predicted to be ATPases. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.27 0.70 -5.65 8 2702 2012-10-05 12:31:08 2003-04-07 12:59:11 11 19 1983 0 969 3282 2551 309.70 33 88.96 CHANGED sTshpRY.pppLppsshhtDsAQusAlssL-cLtp...cltsushs+.t.....thhttLau+K...sttss+GLYlWGGVGRGKTaLMDsFFcuLPsppKtRsHFHcFMhcVH--LppLp..............ttsDPls.suDchss-splLCFDEFpVoDIuDAMlLupLhcALFsRGVoLVATSNhsPcsLY+sGLpRpcFLPtIshlcs+hpllsVDushDYRLRsLppA.hahhP.sttsptthsthapthh...ut.pstssL-lsuRtlps.ssssslstauFssLCpsspuspDYlsLuctFpTVhLssV.sMs.sccstA+RFIsLVDclYDpcl+LllS...AEs.hp-LYpuGch..tFEFpRshSRLhEMpS....t-alsRt .................................................................................................t.a..t.lt..th.....D.s..hQtthh.t.t.hp.hhp....................th......t.......t.........t.........................hht.t.h........h...t................t.......................t......................ts...........s.........+...............G...LYhaG.sV.G.pGKTh....L.M....D....h....F....ap..s.l......P..................h.............p..........p..........K.......h..R...h...H...F.....H...pF.M..h....c.lH.p..clpt.lp.........................................................................s..p..t..sP..l...t...l.A.c....p.h.......t....tc.......s..p..l.LCFD...E...F.......V...o....D.I...s....D...A......M....l..Lu..p.......L...h...p...t.L....F...t..p...G..l..s...lVAT.SNh..sP...-...p...L......Y.......c...s....GL.pR..t..p.F.L..P.t.....I....sh.lpp..phplhp.l..D...u......s.....h.DYR....h.........c...............s...h...p....t........s.......t......h.......ah.........P..........h..........s......t..............p..........s..........p.........t..........t...........h.p.......p.......h...........a.......t..lss.....................t..t....t.........t.....s......................s.............l...p.......l...t......t.......R...l.....ts....................t...h..s..s..........t......s...h..hhsFtpL..C............t...........s..hutt.DYltLuchac...s.lhltsVPhhs.................................c..stA...RRFlsLlDthY..-pcl+Llho..............................Atssh.cl....a............t....tth...................thcatRshSRL.EMpst............................................................................................................................ 2 276 558 798 +1140 PF01314 AFOR_C AFOR; Aldehyde ferredoxin oxidoreductase, domains 2 & 3 Finn RD, Bateman A anon Sarah Teichmann Domain Aldehyde ferredoxin oxidoreductase (AOR) catalyses the reversible oxidation of aldehydes to their corresponding carboxylic acids with their accompanying reduction of the redox protein ferredoxin. This family is composed of two structural domains that bind the tungsten cofactor via DXXGL(C/D) motifs. In addition to maintaining specific binding interactions with the cofactor, another role for domains 2 and 3 may be to regulate substrate access to AOR [1]. 19.60 19.60 19.90 21.70 19.00 19.50 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.39 0.70 -5.78 112 1171 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 639 10 535 982 251 383.30 27 62.78 CHANGED htchhpchhctlp...ssssht....shtpaGTssh.lphhs......phGshPs+Nap....puth..c............tAccluG-t.hpc.......phh.hppt....u......ChuCslsCu+.hschp.................................u.h..............................stsPEY..EshhuhGuss.....................GlsDlculhphNcLsschGlDoIShGssluhsMEhhE+Gh...ltct.h........shtlp.aGss-ullchlcclAhRcG.lGchLApGstchucch..G.s.-...hu..h.pV......K....Gh.EhPuaDPRuhp...G...huLuYATusRGusHl..cu........a.shs.-hh.......s.................thD.h.............th...........csKs.phlthhpchpul.....hDShslCtFssh.................................hshpth...............................schlsAsTGhc.hos--lhps.GERIasLcRhaNl.+t......G....hst.pcDsLPpRh..hcc.......Ph.pGs..scGp.....lsp..lcchLscYYchRGWc.ppGhPot-pLccLGL ...............................................................................................................hhtp..chl....pss.sh.....s.tphGTshh..l.hhs......ctGtlPstNhp.............psta....p..............htpthshts....htc...........phh....s+....hp....u...........C......a.uCPltCtp..hp.h...........................................................ps.Ea..-slhuhGuss....................................................s...sc.ltss..h..hs.pl....s--hGlDs..hshG.p.hhu...hhh-h...hp+...Glhpc..........................h.tl..c...h...GD..sph..hh..c.hh...c.lApR......pG...hhshLA.c.G....sht....h....Acp..a........s..sp-..........hh......h..hs................................K.....sh.th.P.....sac.s.pthp.....G.....h.l.s.hhh.sp.c.s.t.sHh.ch...............................h.hlth-lh.................G.................tthD.h.....p....................-uKh.chsth.t.h.s.sl.....hsulslCt.ash......s.............................hsh.th.h............................................schhpA...h.TG.c..hot-c.L...hh.u-RIhsLcRtash+h..h...........h.pp.pcD..hlP..phh..hcc.............................sh.s...p..Gs..tp........hsct.hct.Ls.aYp.hGWs.p.G.hPstppLpcLsl....................................................................................................................................................... 0 220 395 470 +1141 PF02730 AFOR_N Aldehyde ferredoxin oxidoreductase, N-terminal domain Finn RD, Bateman A, Marshall M anon Sarah Teichmann Domain Aldehyde ferredoxin oxidoreductase (AOR) catalyses the reversible oxidation of aldehydes to their corresponding carboxylic acids with their accompanying reduction of the redox protein ferredoxin. This domain interacts with the tungsten cofactor [1]. 20.80 20.80 29.70 34.50 20.70 20.60 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.13 0.71 -4.98 111 1162 2009-09-12 00:12:13 2003-04-07 12:59:11 10 5 646 10 533 979 183 199.10 39 32.36 CHANGED atG...+lL+VsLostphph-plstc.hh+palGG+GLus+ll.hccl.ss.........psDPLuP-.NhLlassGPLTGo..sPsuuRhslss.KS....PLTG..........slssSssGGtauspLKhAGaD..ulllcG+uccPV...YLhIc.cs........plElc-AscLW....G.pssh-TpctLpcch.......t.................spVhsIGPAGEphVtaAsIhs....-t..tR.......uAGRu.GhGAV.MGSKpLKAIsV+....Gsp ........................ahsplLRVsLos...s....slphE....chsc...hcpalGGhGhuh+lha-cl..ss.........sscP.h.s.p..NtLlFusGPLoGos.sPs.ouRhslss..hS..........PhTs..........hls-uphGG.hausphKhA..GaD..slllcG..+..u..c..pPV...a..LhIc..--...........c..Vpl....c..c.....A..s...t..LW.........G......csshtTsctlpcth.s...c......................ssVssIG.AGEN..hV...h.uslhs........sp......p+................uG.RuGsGAV..M.G.S.KNLKAIsVcGs................................ 0 222 394 469 +1142 PF02420 AFP Insect antifreeze protein repeat Bateman A anon [1] Repeat This family of extracellular proteins is involved in stopping the formation of ice crystals at low temperatures. The proteins are composed of a 12 residue repeat that forms a structural repeat. The structure of the repeats is a beta helix [1]. Each repeat contains two cys residues that form a disulphide bridge. 36.50 2.00 36.60 2.50 35.20 1.90 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.25 0.75 -6.12 0.75 -3.10 9 502 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 7 21 7 518 0 11.70 45 80.61 CHANGED TCTsSpsCspAp ..sCTsSssCspA.... 0 0 7 7 +1143 PF03756 AfsA AfsA_repeat; A-factor biosynthesis hotdog domain Yeats C, Bateman A anon Yeats C Domain The AfsA family are key enzymes in A-factor biosynthesis, which is essential for streptomycin production and resistance. This domain is distantly related to the thioester dehydratase FabZ family and therefore has a HotDog domain [2]. 21.30 21.30 21.30 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.60 0.71 -4.43 34 276 2012-10-02 20:54:35 2003-04-07 12:59:11 8 5 116 0 114 312 5 127.30 20 65.85 CHANGED hVHRtsss-VlLossppt.....sththphphspsHshhas+...ssDHhsuhLlhEuhRQuuhhlsHsth.tlshshthlhhshshsatchschss.....Pshlpspssssssp.......t...sphphplslhQsGpllusushssss .................................VtRt.pstpVllsshtph.........sth........ph.th.p.h........s..psHshhh-+.....ssc...H..hsshl.LhEusRQsuhh..hs...ptth.....hs...............t....h..lhp.s.h...s.hshpp.hs.phss........Psplpspss.ssshp..............tthph..phphhpsuphsspsphph..h................................................ 0 16 76 106 +1144 PF04671 Ag332 Erythrocyte membrane-associated giant protein antigen 332 Waterfield DI, Finn RD anon Pfam-B_4587 (release 7.5) Repeat To date many different Plasmodium antigens recognised by the hyperimmune system human sera have been cloned, sequenced and characterised. The majority contain tandemly repeated amino acid sequences which make up a considerable portion of the protein sequence. It has been suggested that these repeat-containing antigens may provide an immunological 'smokescreen' to the parasite in order to evade the human immune system. This repeat is found exclusively in the Plasmodium falciparum Ag332 protein and occupies most of its length [1]. 20.70 20.70 61.00 20.70 18.40 20.60 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.49 0.73 -6.65 0.73 -4.03 41 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 2 0 29 71 0 21.20 52 11.32 CHANGED sSlsEEllE.-GSsTE-lVppc .sSloEEIVE.-GSsTE-lVcpp. 0 29 29 29 +1145 PF01453 B_lectin Agglutinin; MMBL2; MMBL; Agglutinin; D-mannose binding lectin Bateman A, Griffiths-Jones SR, Studholme DJ anon Prodom_2511 (release 99.1) Domain These proteins include mannose-specific lectins from plants[1] as well as bacteriocins from bacteria [2]. 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.15 0.71 -4.19 128 2897 2009-09-13 13:36:34 2003-04-07 12:59:11 19 158 299 65 1397 2960 33 103.90 30 18.28 CHANGED spshlW.....ANpt.....pPl..................s.pLplpt.DGs..LVLh.s..ss..........................t....lWuo..........ssssss..sssh....spL.c.sGNLVl.h....t.............sspslWpS.......F..cpP.scohLsstph....t.hs.t.p....hsuWposs...........ssu ...........................................thlW....hANpp...............pPl.............s........upLplst...s...Gs.........LVLh...s..ts..........................sps...lWoo.......................sos..sts.....tssh..........upLh...c.sGNLVlt...s..s................ssphlW...Q.S.......F...DaP..oDTlLPt.pls.h..s.hh...t.t........hhshts..................................................... 0 123 873 1179 +1146 PF05039 Agouti agouti; Agouti protein Moxon SJ anon Pfam-B_5381 (release 7.7) Family The agouti protein regulates pigmentation in the mouse hair follicle producing a black hair with a subapical yellow band. A highly homologous protein agouti signal protein (ASIP)is present in humans and is expressed at highest levels in adipose tissue where it may play a role in energy homeostasis and possibly human pigmentation [1] [2]. 21.80 21.80 21.90 21.80 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.33 0.72 -10.99 0.72 -3.64 9 278 2012-10-01 22:06:18 2003-04-07 12:59:11 7 1 126 6 49 243 1 92.60 45 71.89 CHANGED tcu-pth.............plsshuLs.slcphstpptcpc....p+hshhc.phLs.......R.s..sPpRCltLt-SChu.tsPCCDPCAoCaCRhFpshCYCRph .................................................s....p.h..s..........pluhl..uLppc.Kphohp.EAEtp......p+.ph.pc..s..p..thphA.......tsRss....sPpsCVsp+-SChs.ssssCCDPCAoCpCRFFpohC.CRh.h..... 0 2 4 16 +1147 PF04647 AgrB Accessory gene regulator B Finn RD anon Pfam-B_5200 (release 7.5) Family The arg locus consists of two transcripts: RNAII and RNAIII. RNAII encodes four genes (agrA, B, C, and D) whose gene products assemble a quorum sensing system. AgrB and AgrD are essential for the production of the autoinducing peptide which functions as a signal for quorum sensing. AgrB is a transmembrane protein [1]. 23.00 23.00 28.30 28.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.36 0.71 -5.22 56 789 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 539 0 119 491 7 179.40 24 93.29 CHANGED pl.scphsphl....tpptslscpchthlpYGhpllltslhKhlhlllluhlhshhh.sllhhlshhhlRhauGGhHupo.hpChlholhhhlshshls.thhthshh......hhlhh.....hlhuhlllhhaAPssotp+Pltspc....hc+phKhtulhhhhlhhllslhlh.............hhphlhhulhhpsls..lhslsh..phhp .....................hhcphuphl....ppcpsl-.c...ph.hlcaGhpllhtslhphllhhhluhlh.shhh.ollshlsahhlRpautGhHApophhChl.olllalh....h....shlh.hp...h.p.lshh........hhlhl....hllulhllhha....APssocpp.Pl.stc....hh+p..h...Kh..h...ullh...h..h..lhh...l.ls.Lhlh.............hsphlhh.Glll.psls..lhPlhh.h..p............................... 0 69 107 116 +1148 PF04730 Agro_virD5 Agrobacterium VirD5 protein Mifsud W anon Pfam-B_3261 (release 7.5) Family The virD operon in Agrobacterium encodes a site-specific endonuclease, and a number of other poorly characterised products. This family represents the VirD5 protein. 25.00 25.00 628.50 219.50 17.30 20.70 hmmbuild -o /dev/null HMM SEED 774 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.53 0.70 -13.41 0.70 -6.48 5 19 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 5 0 4 20 0 432.90 50 93.94 CHANGED KLGAAERuAYEsWssusRPTWKDLIL+ARLDAIDSSAWLsDl.......................................................................................................................................uEsTSSTFcYEGlPLGEGER+AYEcWQEDAQPTWE-LVVNARlsELs+suuIssEasuLpEshEhRSDAsKRKRccsIDQ+so.SuSFpYDGMRLGuPERtAhtsWucspPPo.p-LllcuRlpuIssSsWh.sp.ssoS.FEYpGhPLGpsER.AhcpW.tsAQPsWEDLVVsARh...AELh.uu.h.sp+...p.tt-.s.ptp.Rh..hPI.tcp.sts.FlYDGhtLGAsERAAY-RWSKP-RPTWEDLILDARQAAIESDuVSsstIGKoSSSVFLYEGMSLGDAERQAYGRWRQPAQPRWQNLVVNARLAELDPSAWIPDEHDPFE-GEAsuhsSQASsANKSssuLGsQS-S+RPuhA+EssQEsTHlQsPoCSQLETRRALaFGSSGRDAspTESIAcsNRlDGVGKVKRLGoKSRRuVsATsHuVsSSscRLLS-EsGppAssSsPEKsVRSRoDNIGTYGSRKNERARLATETGKYESEHIFGFKVVHDTLRATKEGRRLERPMPAYLECKELHRQHVGTGRGRTRLVGRGWPDDASYRSDQRATLSDPVASuEGATASNGYQLNQLGYAHQLANDGLQSETPDGVsMPIQVATTSYNYTVSRDPVLSPPSKEQAPQLLHLGPRGQTEAVLARETALTGKWPTLEREQQVYREFLALYDVKKDLEAKsLGVRQKKpALVSALNRTAuLIGASPhKAQSssAEVEpusDEhDERRVYDPRDRuRDKAFsR ......................ERtAY-sWstst.PTWKDLllpARlssIDussW..phtEsosSsFhYEGlPLGEGERpAYcpWtEsAQPoWEcLVlsARhtpLstst.IssEhssLpps.phRu-u.K+phpp.hDpcps.stuF.aDGM+LGpPE+tAYt............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 3 4 4 +1149 PF00578 AhpC-TSA AhpC/TSA family Bateman A anon MRC-LMB Genome group Domain This family contains proteins related to alkyl hydroperoxide reductase (AhpC) and thiol specific antioxidant (TSA). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.35 0.71 -4.44 124 16547 2012-10-03 14:45:55 2003-04-07 12:59:11 16 104 4711 453 5334 26489 14345 127.90 25 62.99 CHANGED lGppA.P..-..Fplt.......st..p...s...pp.lsL.s-h...pG..phllLhF...ast.sasssCs.pEhsshpchh.pcap.pt.ss.pllulSs.Dsht.........stppahcphp.................lsaPlluD....sppluctYGlh............................hshtssalIDtc....Ghlphh ...................................................up.s.P..s...F.p.ht...............st....p.........h......pp...l.s..l....s-h................pG.......+h.s.l...l...h....F..................a.....Pt...s.....aT.......s....s...............Cs.....s....E..h..s...s....h...p......c....h...h......p.....c...a......p......p..........t......s.......s......p.....l......l..u...l......S......s.....D...s..ht.............................sp..p...s.a...t.p.ptp..................................................ls..a..s...l....l......u...D.............s...p.....p......l....s....c...t...a..s.l.h..t.................................................................h.s.hR...s....o....F.l...l.Ds.c...Ghlp..h.......................................................................................................... 1 1910 3591 4647 +1150 PF02626 AHS2 DUF183; Allophanate hydrolase subunit 2 Mian N, Bateman A anon COG1984 Family This domain forms the second subunit of allophanate hydrolase. In yeast urea amidolyase (Swiss:P32528) this domain is found between Pfam:PF00289 and Pfam:PF00364. 25.00 25.00 25.10 25.90 23.60 24.70 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.44 0.70 -5.10 160 3169 2009-01-15 18:05:59 2003-04-07 12:59:11 10 21 2343 11 754 2303 1607 265.10 35 59.05 CHANGED chGlssuGAhDthAhphANtLlGNs..sssAslElshs..Gsplch.ppssh.lAlTGAshsspl..........sG...ps.....lsha.pshtlpsGshLpluts.p..pGsRu.YLAltGGl.sls.hlG.SpSThhtuslGGh........tGc.sLpsGDhLslsssssttstht.........................h..thhsthsp..pht.l.RVl.GPph..chFoppshppahsspaplosposRMGhRL..p....Gs.slp............spstslhS-ulsh.GulQlPssGpPIlLhsDpQTsGGYP+IusVlssDLs+LAQhpPG.spl+FphlshppA ............................hGls.uGAhDphAhchANhLVGNs..tssuuLEl.Tl.........t....sssl....cF....p....s....s.sh.lAlTG.As.h.p.spL.............................-s.....ps........l.....hh..pshhlctGphLpl.........sts...p......pG....hRu.YLAV.uG.G.l....s.........V.ss.h..h.G.SpSTphpuulGGh........pGR...hLpsGDhL.slspsttthttht..........................................h..h....p...h.st........sst..l..+ll.GPph..chFscpuhpsFh.p.s.s.........aplospSsRhGhRL....p.........Gt..hhp.....................tpsp-lhS.cu.hsh.GslQVPssGpPIlLhsDt.TsGGYPpIusl.hpsDL.+LAQ.htsG.spl+Fh.hshpp..................................... 0 197 420 621 +1151 PF01808 AICARFT_IMPCHas AICARFT/IMPCHase bienzyme Bashton M, Bateman A, Iyer LM, Zhang D, Aravind L anon Pfam-B_1613 (release 4.2) Family This is a family of bifunctional enzymes catalysing the last two steps in de novo purine biosynthesis. The bifunctional enzyme is found in both prokaryotes and eukaryotes. The second last step is catalysed by 5-aminoimidazole-4-carboxamide ribonucleotide formyltransferase EC:2.1.2.3 (AICARFT), this enzyme catalyses the formylation of AICAR with 10-formyl-tetrahydrofolate to yield FAICAR and tetrahydrofolate [1]. This is catalysed by a pair of C-terminal deaminase fold domains in the protein [3], where the active site is formed by the dimeric interface of two monomeric units [3]. The last step is catalysed by the N-terminal IMP (Inosine monophosphate) cyclohydrolase domain EC:3.5.4.10 (IMPCHase), cyclizing FAICAR (5-formylaminoimidazole-4-carboxamide ribonucleotide) to IMP [1]. 25.00 25.00 25.00 25.90 24.70 24.50 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.91 0.70 -5.33 9 4880 2012-10-02 00:10:39 2003-04-07 12:59:11 13 11 4399 36 1208 3788 3760 297.20 42 60.59 CHANGED AAKNHtcVsIlsDPtDYssllpchcp.pGp.lohppRpcLAhKAFpHTAtYDusIusaFpp.hu..............tt.s...shshhcp.sLRYGENPHQpu.hhhphs.tch....lssAp.LpGKthSYNNlhDADuAhphV+Eap.....PAsshlKHsNPCGlAlG.....................sslt-AYsRAhpAD.hSuFGGlIAhNccVDstTAchIpp...........FsEllIAPuYstEALclLs+K.KNlRllhhthssph...ph-h+pVtGGhLhQcpD...tshhstsphp.lVoKppss..EppLpDLlhAWpslKasKSNAlVhAKNshslGlGAGQ.SRVcus+lAh.KA .........................................AAKNats....VsllscssDYstllpclp........t.....t.....G..s.....hoh..pp.....RhpLAhKAFpHTAsYDuhIusYhsp......hs................................................t.t.h..P..p.t...h...sh.sh.....p....p.p.shRYGENPHQpAuaYhp..st..s.t.p................u.l..A.s.......A.....c...QL........p..G.Kt.LSYNNltDsDAAhch....V+-F..s............................pPAsshlKH.....sN....PC.GlAlu.......................................s.s.lhpAYp...+Aa......tsDshSuFGGIlAhNRpl.D.ttTAct...ltp.......................FhEllIAPuasp-A.LclL...st....K....K....NlRlLt......h..........s..............................s.....p...............t.................s.......t........h-.....h......+.p.V.s..G.G.lLlQ.........sp..D.................h.h.h...s.......t...p.h.........p...........lV.T..........c........+...p.................P.............o....-p................ch.....pDLh.FAW+ss.KaVKSNAI..lh...u.........+.....s...s.....h....TlG...lGAGQhsRV....tSs.+.lAht+............................................. 0 404 789 1035 +1152 PF04548 AIG1 AIG1 family Bateman A anon Pfam-B_1846 (release 7.5) Family Arabidopsis protein AIG1 appears to be involved in plant resistance to bacteria. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.37 0.70 -5.23 32 1778 2012-10-05 12:31:08 2003-04-07 12:59:11 11 41 154 28 1094 11277 2366 173.00 24 40.10 CHANGED hsllLVG+TGsGKSATGNSI.....LGccs.....FpS+hpupuVTppCpttss.p.cG...pplsVIDTPG...L.F-hp.s..s...s-.hs+EIh+ChhLupsG.HAlL....LV..hslt.RhTpE-ppslpplptlFGsplhcahIllFTpt--Ltsss..L-DYlppssschLppllp.CssRhshFsN+..ssttc+tpQlppLLshVpslhppNs..G.t.aosc.hacphpppt....p.thpppp .............................................pllllG+.oGsGKS....uh....sN....ol...........................L..G..p....p.h.......................F...p.......s....p....h......s.........s....p......s.....s........T......p.....p....s......p.......t....t...........p.......t.......p........h......p..G...................p.p..l........s.......l.l..DTPG..........h....h....cs...p...h......p......p.p...p....h..h..p...p...l.....h...p.....t...h............h........p...t...s.....u.........c.....s....h.l......lV.....h.p..hs.....+h.o...p..p.....-.p.....p..h.....l.p....h.....l.p...p...h....F......G.........p...p......h..h..c...h...s..l.l..l..h..T..p......t..c...p.....h.....t..s...tt........hp..p....h...ht......................h......t.l.....h.t....h...t.............................................................................................................................ttt................................................................................................................................................. 1 345 637 860 +1153 PF00731 AIRC AIR carboxylase Bateman A, Griffiths-Jones SR anon Pfam-B_462 (release 2.1) Domain Members of this family catalyse the decarboxylation of 1-(5-phosphoribosyl)-5-amino-4-imidazole-carboxylate (AIR). This family catalyse the sixth step of de novo purine biosynthesis. Some members of this family contain two copies of this domain. 29.30 29.30 29.80 29.70 28.80 29.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.83 0.71 -4.93 224 5292 2009-01-15 18:05:59 2003-04-07 12:59:11 15 19 4504 106 1483 3728 2423 147.40 46 73.54 CHANGED shVuIlMGScSDhssM.......ccAsclLcc.hG..............lsaEscVlSAHRTP-chhcaAcsAcp.+GlcVIIAGAGG.AAHLPGMlAuhT.sLPVIGVPVp........op.sLsGhDSLLSIVQMPsGlPVATlAIG..GApNAuLLAspILuhp....Dsp.ltp+LppaRpphpppV .................................................hVullMGSpSDas.sM.......pcAu.p.h.L.-p..hu....................ls....a....E....s....c.V...lSAHRTP-hhhpaAcpAcp...+G...h..c..V.IIAGAGG.AAHL.PGMlAuhT.slPVIGV..P.Vp..............op...s...L...sGhDSLhSIVQMPs...Gl..P..V..A..T..l..AIG.....sGAtNAuLLAspILuh..p....c.p.....lt....p+lttacpt.pp................................................... 0 517 988 1277 +1154 PF00586 AIRS AIR synthase related protein, N-terminal domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome Group Domain This family includes Hydrogen expression/formation protein HypE Swiss:P24193, AIR synthases Swiss:P08178 EC:6.3.3.1, FGAM synthase Swiss:P35852 EC:6.3.5.3 and selenide, water dikinase Swiss:P16456 EC:2.7.9.3. The N-terminal domain of AIR synthase forms the dimer interface of the protein, and is suggested as a putative ATP binding domain [1]. 21.00 11.40 21.10 11.40 20.60 11.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.29 0.72 -3.75 856 17593 2009-01-15 18:05:59 2003-04-07 12:59:11 19 63 4701 78 4854 13670 7415 103.20 23 21.07 CHANGED G.-DuAllchs....................................sttlshss.Dshsstsp...........h.sshs...hGtpulusslsDls..uhGA.cPl.shhsslsh.......t.........t.........................................l.pphlpGhscssp..phGs...sllGGco ....................................................................................tsuulhp.ls...................................tptt.ls..hssDth.s..s....psc.......................t.ps.hs....hGtpulusslsDlh................shG..A..c.P.........l...hh.hshlsh............sp.h...ssp.h...........................................h.ptllp.....Ghuptsp...ph..Gh.......sllGGcs....................................................... 0 1572 3133 4132 +1155 PF02769 AIRS_C AIR synthase related protein, C-terminal domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome Group Domain This family includes Hydrogen expression/formation protein HypE Swiss:P24193, AIR synthases Swiss:P08178 EC:6.3.3.1, FGAM synthase Swiss:P35852 EC:6.3.5.3 and selenide, water dikinase Swiss:P16456 EC:2.7.9.3. The function of the C-terminal domain of AIR synthase is unclear, but the cleft formed between N and C domains is postulated as a sulphate binding site [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.95 0.71 -4.21 169 19206 2009-01-15 18:05:59 2003-04-07 12:59:11 17 69 4747 77 5269 15059 7896 157.80 18 34.84 CHANGED psGDhllhlus........sGltusuhhtttphhttss.................hs-hhhp.phlhhhhhhhth....thlpuhpDlosGGLssslsEhs.t.sthGhplshs........................shhp.h............h-hhhs..................................tspGthhlslssppt....pphhphhp..pht......shhlGplssptththhht ...................................................................................sGshllhhus......................Ght..usu....h..s...h..h...p..p.h.h.t.tss.h................................................hscsh.h..p.......p.h..hp.....s...h..tt....hhth...............shl.....p..uhpDlo.sGGL...h.....s............slsc.....hs...t..s.....u......t......h........usp...l.chs..................................h.shh..p.h..............................t.h-...h.h.hs......................................................................................................tsp....u...t....h.....l.l.s...l...ssppt.........p..thh.t.h.hp........p..t.sh.........t.....shhIGplspptt......ht.................................................................................................................. 0 1677 3348 4466 +1156 PF04988 AKAP95 A-kinase anchoring protein 95 (AKAP95) Moxon SJ anon Pfam-B_5366 (release 7.6) Family A-kinase (or PKA)-anchoring protein AKAP95 is implicated in mitotic chromosome condensation by acting as a targeting molecule for the condensin complex. The protein contains two zinc fingers which are thought to mediate the binding of AKAP95 to DNA [1]. 24.40 24.40 24.40 26.90 24.30 24.30 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.47 9 156 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 41 0 77 134 1 157.10 53 28.51 CHANGED FsCShCKFRTFp-c-IppHhpSshH+Epl+al..ps+LsK.sh-FLpEYlsNKhKKTtpc+pphpsppthhpph...tDlhcGlu.-cFMpKVEAsHCuACDlaIPshapslQcHL+S.sHspNR+thhEQhK+sSlhsA+SILNN+hltp+LE+YlKGENPFssssc- .....................FsCSlCKFRoFp-cEIppHL-S+FHKEph+aI..uTKLPKpTs-FLQEYlsNKsKKT-pR+pphpsppsshpph..-pDlhculu..hEHFh+KVEAA...HCuACDlaIPhpasllQ+HLKSsDHN+NR+hhhEQ.K+sSLhVA+SIL...NN+hIsc+LE+YLKGENPFssps..t................ 0 3 8 28 +1157 PF02983 Pro_Al_protease AL_protease; Alpha-lytic protease prodomain Griffiths-Jones SR anon Structural domain Domain \N 20.50 20.50 21.10 21.10 19.90 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.07 0.72 -4.01 35 302 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 88 14 99 313 0 59.80 28 15.82 CHANGED polucLcuspssL.s........sush...susuWaVDspoNcVVVsucsss...uusshschuussuutss ........................ohspLcuupspL.s.............sAsh......ssou..WhVDspoNpVVVs.sssss.....susphsphstssss.s.t...................... 0 32 73 96 +1159 PF00490 ALAD Delta-aminolevulinic acid dehydratase Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.90 21.90 22.50 23.40 21.70 21.80 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.83 0.70 -5.54 165 3912 2012-10-03 05:58:16 2003-04-07 12:59:11 16 9 3698 72 1117 2899 2727 314.40 48 95.89 CHANGED hshpR.RRhRpstslRclVpE.TpLsssDLIhPlFVp-Gps...pp.IsSMPGlhRhS.l.DpLhcpsccshplGIsulhLFGl...s.pt..KDttGopA..as.ssGllpcAlRtlKcth.P-.lhllsDVsLstYTsHGHsGll......................t..s..pl.NDtTlchLs+tAlupAcAGADllAPSDMMDGRVuAIRpALDps.GapclsIMSYuAKYASuFYGPFR-AssSu..sphG.............D++oYQMDP.A.NscEAlREltLDlpEGADhlMVKPuhsYLDll+cl+cpa.s...lPlsAYpVSGEYAMlKAAAp..pGWlD.cccllhEoLhuhKRAGADhIlTYaAh-sA..c.hL .......................h..hpR.RRlRpsshhRphl.p..E..s....p.L.s.........s..sDLIhPlFVh..........-G..p.s......p..ptl..sSMPGVh.Rh......o......l..-.pLh.ccscchsp....hGItu.lhLFGl......s.pp....KDtt...GopA..as..-.Gllp+AlR.t.lKpp..h..P-.l.........hlhsDlCLstYTsHGHCGll....................csu..tV.NDtTlchLs+pAlu.....pAcAGADllAPSsMMDGplsAIRpALDp......s...G.a.p.s..ssIMuYosK..YASu...FYGPF.R.........DAssSu..sp.hG.............DRKoYQMDPu.N.p.cEAlREsthDltEGADhlMVKPuhsYLDllRcl+-...ph....p....lPlsAYQVSGEYAMlKAAAt...sGW.lD.cccllhE..oLhuhKRA..GADhIlTYaAh-sAch.......................................... 0 362 710 948 +1160 PF00171 Aldedh aldedh; Aldehyde dehydrogenase family Bateman A, Sonnhammer ELL anon Prosite Family This family of dehydrogenases act on aldehyde substrates. Members use NADP as a cofactor. The family includes the following members: The prototypical members are the aldehyde dehydrogenases Swiss:P00352 EC:1.2.1.3. Succinate-semialdehyde dehydrogenase Swiss:P25526 EC:1.2.1.16. Lactaldehyde dehydrogenase Swiss:P25553 EC:1.2.1.22. Benzaldehyde dehydrogenase Swiss:P43503 EC:1.2.1.28. Methylmalonate-semialdehyde dehydrogenase Swiss:Q02252 EC:1.2.1.27. Glyceraldehyde-3-phosphate dehydrogenase Swiss:P81406 EC:1.2.1.9. Delta-1-pyrroline-5-carboxylate dehydrogenase Swiss:P30038 EC: 1.5.1.12. Acetaldehyde dehydrogenase Swiss:P17547 EC:1.2.1.10. Glutamate-5-semialdehyde dehydrogenase Swiss:P07004 EC:1.2.1.41. This family also includes omega crystallin Swiss:P30842 an eye lens protein from squid and octopus that has little aldehyde dehydrogenase activity. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.37 0.70 -6.16 119 43773 2012-10-02 17:28:28 2003-04-07 12:59:11 17 120 5325 607 12917 36315 23835 407.20 25 80.35 CHANGED ahsups.......pp.hpshsPu.....sschlupls.tustp.Dl-pAlpuAcp.Ahp.......t...W.pphsstcRuplLt+hAchlc.p+t--lApl.sh-sGKslt...pupt.-ls.ts.....hshhcaaAsh..scc.hpG.ph......hs..s..........sthshsh+p.Pl.GVlusIsPWNaPlhhs.shKluPALAsGNolVlK.Pup.TPlou.hhluclhp-.A....GlPsGVlNll...sG..GppsGpsls.pcsplctloFTGSotsGcplhpt.Au..pp...l....K+lsLELGGKs.....shIVhsDA.D.l.-tAlpsslhut.ahssGQsChusoRllVpcu.la-c..Flcclspts.ppl.clGs...P.h-..........ssplGPllsppphc+lpphlppuhpcGAclls.GG..............tpG.halpPTlh...ss.VsschplhpEElFGPVlslhpac..s.......-EA.lphANsopYGLuuulaTpDls+uhchucpl..csGtValNsh...tsss.ts..PFGGhKpSG.hG..R-sG.ttulcpasphKsl .......................................................................................................................................................................................................h.sP.............st.p....h..t..t...h..........s...........s......t...c.l..p.t.A.l....t..s...A..pt...Aht.......................t.W...t.p..h..s...s..t.p...........R........s........p....h...L........t..........c.h........u........p............h..l..........p........p........p...........t..........c......p.....l..........u.......t........h................s........h.........-..........t........G............K..s..ht..............p.uts........-....lt....t.s.......................h.p.h.h.....c.a..h..u...th.......sc...p.....h......t.....s....ph..............................h....t...............tshh..s.h.h.h.p...p.....P..l....G....V........l....us..I............s..P...a..N....a....Ph......h...h...s...s..h.ph..u....s.A............L..s..s..G.N.s..l..l.l....K....P.u.....p....p....s.....s.....h....s....s.....h.....h....l.....s.....c.....l....h....p....c....A......................G.l.P...t...G...l....l..s.l.l............t.G.......u.....t....p.....s.....u...p...t...L........h....p..c.......s...t...l....sh..............l..............s......F.T..G....S.s..t.....s........G..c....t..l.....h........p.....t...uu........tp........h..................p....l..hhE......h........G.Gp..s.....s.h.l.....V......h.....s........-......A.......D......l.......-.....t.....A...l...p.......s.h...h..t..........u...t.....h.......h..s....s...GQ.......h.C.......s...u..s.p...p.........l.l.V..p.........c.........s.........l.........h......-.p.........hhpt...l.........h.......t.t.......h.....p......p......h.......p....h......G.s......s...hp........................ts...t...h..G.s.....l...l......s.t.t........t.h..p....p..l.............t.h...l...p...t...u...h....t......p.....G......u...p.....l...h.h....uG.............................tt........G....h.a...ht.P....T.....lh...................t...t....s.t............p......h.....t...l....h.....p............c.....E..........h..F.....G.....P.....lhs..........l.h.t.hp.....s...............................c...-..s.....l....t....h....s.....N....s....s....t...............a....G..L.....s.........u...s.........l.......a.....o......p......s........h......t......p......s..........h......p.....................h...t.......p...t.l..........cs....G.........h.....l..............h........l.......Nt..........................s................h................s........FG.................G....h........t..t..S...........G....h.G....pt.u....s.ht.a.p.p................................................................................................................................................................ 0 3478 7376 10497 +1161 PF01081 Aldolase KDPG and KHG aldolase Finn RD, Bateman A anon Prosite Family This family includes the following members: 4-hydroxy-2-oxoglutarate aldolase (KHG-aldolase) Phospho-2-dehydro-3-deoxygluconate aldolase (KDPG-aldolase) 22.20 22.20 22.30 23.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.02 0.71 -5.14 5 4204 2012-10-03 05:58:16 2003-04-07 12:59:11 14 12 2642 49 825 2763 739 192.20 33 90.12 CHANGED I-ulL+cA+llPVIslcct-DAlPlA-ALlAGGlRVLEVTLRTpsAlDAI+hlt+pRP-hlVGAGTVLsApQlupApcAGAQFlVSPGLTsDLlKtus-schPLlPGVuTPSEIMlGL-hGlccFKFFPAEssGGVsAlKAlAGPFuQVRFCPTGGIsPuNlRDYLAlPullClGGSWlVsuuhlpssDas+IspL ..........................................h....hppt.sllsVlts.ps.h.-cA.lshucAL.lpGG.lp.s.lElTh......p......o......s.....s......A....h......c........u....I....c....t...l....t.................c........p.....h.....s....c.........s.l.l..GAGTVLssp..psctsh.pAGA...pFlVSP.u.h.st...pllctsp.p.t....slP......hlPGssTsoElhtAh.-hGh.chlKhF..PA..p..sh..G.....ss..hl+Alt..u..P..h.s..p..l..p.h.h.P.TGGlo..s..s..N..htcalshs.s.lss.s.hG.S.hL.ss.ts....t.t.tpattlt.................................. 0 228 513 683 +1162 PF00596 Aldolase_II Aldolase_class_II; Class II Aldolase and Adducin N-terminal domain Bateman A anon MRC-LMB Genome Group Domain This family includes class II aldolases and adducins which have not been ascribed any enzymatic function. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.46 0.71 -4.68 502 9699 2009-01-15 18:05:59 2003-04-07 12:59:11 16 32 3654 97 2299 6508 3435 184.40 24 69.68 CHANGED pc.l....stss+hhs.phGh.sp.ussGslSs..+..............................s...............tsthl.lsPo..Ghphsplp...s.scllhl.....c...h........c.G..phl.....................t.................t...pso............u-hshHttlapt.....ps.-spullHsHoshusuhu.h...................tp.l....s.....hstptst.h...........splsh.............h..hp.......G..tl....................u..pplspsht.......t...........................s.p...ulllpsH..GllshG......p.oltcAhhhhhhl....Epsschtl ............................................................................plhpsstths..phGh..s........p..hstGNl..Ss...p........................................t............psthl.I....pPo...Gh...t.ap...pls.......s..schl..hl...c...h........s.G.....chl....................................pt.......................thpPS......u-.hsh.HhtlYpt...............hs.ch.....tullHsHos..tusuhu.h.................................spsl...s...............hshptst..hhh......................sslsh..................h..h.hs........u.p..pcl..........................ups.ls..csht.........................................ptt..ulLlpsH....Ghhsh.G........c.s.hpcAht.hh.hlEpsAph..h...................................................... 0 668 1344 1864 +1163 PF03752 ALF DUF312; Short repeats of unknown function Yeats C anon Yeats C Repeat This set of repeats is found in a small family of secreted proteins of no known function, though they are possibly involved in signal transduction. ALF stands for Alanine-rich (AL) - conserved Phenylalanine (F). 20.80 20.80 21.00 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.82 0.72 -4.10 42 301 2009-01-15 18:05:59 2003-04-07 12:59:11 8 24 42 0 140 347 0 41.90 30 20.81 CHANGED Rsslsplhssu..GssVppAAptAL....sGsspslcsFLpsG.htAt ...............Rhtssplhssu....GssV+pAAptAL....sGo.spslcpFLpsG.h.A........ 0 39 110 133 +1164 PF05208 ALG3 ALG3 protein Wood V, Bateman A anon Pfam-B_3416 (release 7.7) Family The formation of N-glycosidic linkages of glycoproteins involves the ordered assembly of the common Glc3Man9GlcNAc2 core-oligosaccharide on the lipid carrier dolichyl pyrophosphate. Whereas early mannosylation steps occur on the cytoplasmic side of the endoplasmic reticulum with GDP-Man as donor, the final reactions from Man5GlcNAc2-PP-Dol to Man9GlcNAc2-PP-Dol on the lumenal side use Dol-P-Man [3]. ALG3 gene encodes the Dol-P-Man:Man5GlcNAc2-PP-Dol mannosyltransferase. 21.50 21.50 21.70 22.00 20.30 20.90 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.58 0.70 -5.23 7 352 2012-10-03 03:08:05 2003-04-07 12:59:11 8 7 268 0 241 353 16 311.30 39 79.10 CHANGED lsshLlLsEshlshllIp+VPYTEIDWtAYMpplpuhLs.GppDYopLpGsTGPLVYPAuaVYIYouLYaLTstGsslhhAQhlFAslYllsLulVhhlY.ppt+lPPalhsLhsho.hRlHSIaVLRLFNDshAhLhLahul......hhhcp+.......WthGuhhaSlAVuVKMNlLLasPAl.LlLhLtshu.hltTl.pLsshullQlllGlPFLhpaPspYLppuFDLGRhF.a+WTVNaRFlscclF.s+pFplsLLhhHLhLLlsFsps+Ws..t......+h..plls.lhptt......hchpht........p..sssphhos...............................................hhsuNhIGlhhuRSLHYQFYsWYaaoLPYLlWpss...hshhlthllhslhEasWNsYPSTstSS .......................h...hlhhh-hhlsh.hll.pVsY.......TEIDWpsYMpp.lphhls.G...phDYsplc..GsTGPLVYPAGaVYlaphLY.alT...s.p........G................p.............s...lhhAQhlFuhlYlhsLhl..Vh.hhY....p...p.............tp.............l...PP.a...h....hh.hhs.ho.hRlHSIaVLRLFNDshAh.hhha.hul...............hh.h...p.pp...........................Wh..hushha.....Shu..VulKMNlLLahPul..hhlh.lh.t......hu..hhtsl.hl.h.l.h................shl....Q.llluhPFL...h.........p.p...P...h...........s........YlppuF-huR.FhacWTVNW+FlsEch...............Fhs+tFtlsLLh.hHlhhLhhFhhhpWh.........................p..t..t.h.ht..lh.p.........................................tst.hh...h........................................................................hhsuNhIGlhhuRSLHYQFasWahaslPaLLats....................hs..hh..lt.hhlhhh.EhsWNsaPSTshSS..................................... 0 90 136 201 +1165 PF03561 Allantoicase Allantoicase repeat Finn RD, Yeats C anon Yeats C Family This family is found in pairs in Allantoicases, forming the majority of the protein. These proteins allow the use of purines as secondary nitrogen sources in nitrogen-limiting conditions through the reaction: allantoate + H(2)0 = (-)-ureidoglycolate + urea. 21.60 21.60 30.60 23.80 19.10 20.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.10 0.71 -4.75 13 1158 2012-10-03 19:46:52 2003-04-07 12:59:11 10 13 467 6 534 1098 1048 143.30 39 75.20 CHANGED GGpVlusSDpaFuss-NLlpPucsh..........chhDGWETRR+Rp................GH.DWslI+LGss.GhIcuh-lDTsaFpGNasstlSl-Ashhpsps..........................scsspWsplLstpcht...PcpcHhat...lsspp.aTHlRLsIaPDGGluRlRlaG .......................................................................GutslssoDpaF.u.s.spsl.LtPstuh..........phhD..GWETRR+Rp.....................sGa..DWsll+Luts...G......h.....Ict.l-lDTuaFpGNaPstsSlpAshhss...s..s..............................................................................................................sps.ttWpp.lls.p.sp.Lp......ss...p...c..Hha........t..............l......s......s......t.....p.......s....h......THlRlsla.....P.DGGluRLRlaG.............................. 0 146 279 433 +1166 PF04864 Alliinase_C Allinase Mifsud W anon Pfam-B_4527 (release 7.6) Domain Allicin is a thiosulphinate that gives rise to dithiines, allyl sulphides and ajoenes, the three groups of active compounds in Allium species. Allicin is synthesised from sulfoxide cysteine derivatives by alliinase (EC:4.4.1.4), whose C-S lyase activity cleaves C(beta)-S(gamma) bonds. It is thought that this enzyme forms part of a primitive plant defence system. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.28 0.70 -5.80 10 180 2012-10-02 18:26:03 2003-04-07 12:59:11 8 7 33 19 77 718 304 318.40 46 79.58 CHANGED sDlssGDPhFLEcYWp++t-soAVllSGWHRMSYF..tsss.pFlSsEL-+pI+cLHclVGNAsT--RaIVFGTGoTQLlpAslaALSPs..ssssusPspVVAssPYYusa+cQTsaFcuptY+WcGsAusah....cssssssaIELVTSPNNPDGtLRcuVl...cGopu..IaDhAYYWPHYTPITtsuDEDIMLFThSKhTGHuGSRFGWALVKDccVAcKhlcYlptNohGVS+EoQLRsLKILK.llssh+sppGo....clFpFGacph+pRWcpLsclVspSs.RFSLQch.ss-YCNYFp+h+sPSPuYAWlKCEh-c-pDCtphL+.su+IhTpuGshFtussRYVRLSlI+opDDFD.hlpRLsshl ...........................................................................................................................shspGDshhhE.aWh.p.ptttuslllssWppMSYh...ss...............Flp.EL-+pI+c.LH..c.hV.GN.A.s.s.cs.+.alVh.GsGsTQLhpuh..lhu.LSPs...........ssu..sp.VVu.t..sPa..Y...s.......s.a...p.......p..........Q......T........c.......a........h........c.........p........t........h........Y.......p........W.....s....G.......s.....A....s...s.a..........s.s...s..s..s.ppa...I.E......h....V..T..SPNNP.....-G......h......l......R.....c........u.....V...l........cG...s..p....u..........Ia.....D.....hs..Y.......Y........W..........P......H.....Y........T.....P........I....s......t.........t.......A........D.........c....D.......I.....M..L.F...T...hS.Kh.TGHuGS...............RhG.WAL....l.KD...c..pVh..p..+...hh...p.Yhp..h..NT..G.sS.....+-oQLRuhKlL...+...tVsshhc.......s.............p......p..st..........cl.pFGacphccRWhplp.sllspS..c.RFSlpch.ps......pYCsaFp+hpsssP................ua..AWlKCEhE..-...s.cDChphhp.sst.I.spsG...at......s.ssc.aVRlShlcpp-sFs.hhthLt.h........................................................................................................................................ 0 14 57 67 +1167 PF04030 ALO D-arabinono-1,4-lactone oxidase Wood V, Finn RD anon Pfam-B_33547 (release 7.3); Family This domain is specific to D-arabinono-1,4-lactone oxidase EC:1.1.3.- , which is involved in the final step of the D-erythroascorbic acid biosynthesis pathway [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.81 0.70 -4.80 34 1395 2012-10-02 00:48:38 2003-04-07 12:59:11 9 15 826 5 608 1305 206 200.00 21 43.75 CHANGED lTlpslPuaslctppps.......hshppllcsh-phh..pps-ahchaWFPaocpshlhpsscssts...........sp.tpphhsphlsphhapsLhhlu.chhPslsPhlp+hshphhhu..........pptlssShcsFshsphh..p.h..EaulPhcpu.-sLpcLcshlspps.................hhlHaPlEVRhsssD......................................csaL..............SsshscsosYIssphYRshs.......hppaapthEslhpph.GGRPHWuKhaphsspp.............................lpphY.schccFhplRcchDPpshFls..sahc.+h .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................E.......a.......l..Ph.....p.......t....h.............t..............h..............l...p...pltthhptpt.........................h...hhs...l.phRhsts..s.......................................p...hL.................u..s.........t....p............s.....h.......hl.s.h...h..h.....t..s..t.................htp.hh..p....t.....h....-....p....h.........h....h.....c....a...u..G.......RP...HWu....K..........p..p..h..s.tpp...........................................................................................h..p...ph..Y....P...c.....h..pcFhtlRcc..h.DP..p..shFhs..sahcp..................................................................................................... 0 177 387 524 +1168 PF00128 Alpha-amylase alpha-amylase; Alpha amylase, catalytic domain Sonnhammer ELL, Griffiths-Jones SR anon Swissprot Domain Alpha amylase is classified as family 13 of the glycosyl hydrolases. The structure is an 8 stranded alpha/beta barrel containing the active site, interrupted by a ~70 a.a. calcium-binding domain protruding between beta strand 3 and alpha helix 3, and a carboxyl-terminal Greek key beta-barrel domain. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.03 0.70 -5.03 31 28372 2012-10-03 05:44:19 2003-04-07 12:59:11 19 300 4623 473 6746 23482 3056 261.10 18 44.76 CHANGED GDLpGIhpKL-..YLKc........LGlouIWloPlac.........Ss......u.HGYDhpDYpplsspaGTh-DhcpLlscsHp..+GlKlIlDhVsNHoSsppth...........Fpcuhpptss...hpcahh..h.h..........t.psssshht......h.stosaph......ppps.....h.hhhshsDLNhcNss.........lhpthhc....shcaWlcpG......lDGaRlDsspclsc............................sah+patpphsph.........thhhlGEhatss...sp.hhhhstsshhshs...hhsahphthtpssp.....hp.ss.s....htchpphlschhpthsp...sthtssalsNHDpsR.......hhothusst................ph+hshshlhshp.GsPhIYhGpEhGhoGsp.........DP ...............................................................................................................l.s....al.t.p........lGh.....stl.......l.....P..hhp......................................ss...........ts.h.G..Y..s........s..........h.......s.......a............h.........s.........l.........s.........s.........p.........a..............G..............s.........................p...........-...........h........c...........p...........h....l....pth.+p..........tG.......l...............p...l.l......hDh......Vh....N......H.h...u......t...p...p........h...................................................................................t........t.................t...h.......................................................................................................................h..t..............................................................................p.l..s....h....t....p............................................................hh.p...h..........h...p.......ht.a.h.h.p...h..................lc.Gh...R.....h.Dhh.t..h..h...........................................................................................................h..h.t....h...h..t....h.t...t.......................................h.hhh.u.E...h......t.............................................................................................................t.........................................t.....................................................................................................................h...........................................................................................................h.......t....s..H.D.................................................................................................................................................h.......h...h.......t.................................h........................................................................................................................................................................................................................................... 1 2088 4020 5641 +1169 PF02903 Alpha-amylase_N alpha-amylase_N; Alpha amylase, N-terminal ig-like domain Griffiths-Jones SR anon Structural domain Domain \N 21.20 21.20 21.50 21.30 21.10 21.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.77 0.71 -4.14 38 1507 2009-09-11 01:10:01 2003-04-07 12:59:11 9 20 1336 56 147 988 29 118.10 30 20.02 CHANGED Mh.htAIaH+PpssaAYsYsppplHIRLRTcKsDlp.cVtLlauDsYph..................th.hpphsMcKhuos...plaDYW.splsssh+.R..l+YsFpLps.sscphhas-cG...hapctshs...hthaFpaPalp ......................................shaHhP.s...as...h...h...h...s...c...c.....p...LpIpLps.....p.p.p.D.l.s....c......lh.L.hhtcshp................................tshs...McKht..S.s..........sh..a..-..a..W....p..ssl..s..h..s...t..R...hpYsFcLhs..c....scphaas.spG.............hs...p..h.s.hc................................................. 1 45 79 112 +1170 PF02296 Alpha_adaptin_C Alpha adaptin AP2, C-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_8859 (release 5.2) Family Alpha adaptin is a hetero tetramer which regulates clathrin-bud formation. The carboxyl-terminal appendage of the alpha subunit regulates translocation of endocytic accessory proteins to the bud site. 24.20 24.20 24.30 24.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.39 0.71 -4.12 21 349 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 230 11 231 333 4 109.80 43 12.10 CHANGED FhcPspl.oupDFFpRWKQluusspEAQ+IFt..........uspshstshs+phl.pGFthulLpsVDPNscNhVuAullpop.uGp...lGCLLRLEPN.hpspMaRLTlRuoc-sVsptLhcl ..................FhpPsch.su.p-FFpRWKQLu.....ss...t..p.E..sQcIFp.............us+sh-....tp..h.s+s.tl....hGFuhslL...cs.V..DPNPp...N...hV.G.AGllpT...c......ssp........lGCLLRLEP....N...hpupMaRLTlRooc-.sVsptLsc............... 0 80 124 184 +1171 PF02883 Alpha_adaptinC2 Adaptin C-terminal domain Bateman A, Griffiths-Jones SR, Mian N anon Pfam-B_8859 (release 5.2) Domain Alpha adaptin is a heterotetramer which regulates clathrin-bud formation. The carboxyl-terminal appendage of the alpha subunit regulates translocation of endocytic accessory proteins to the bud site. This ig-fold domain is found in alpha, beta and gamma adaptins. 20.60 20.60 20.60 20.90 20.50 20.20 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.62 0.71 -3.77 102 1431 2012-10-03 16:25:20 2003-04-07 12:59:11 15 26 301 45 862 1378 12 109.40 19 13.69 CHANGED ssshlapssslplthphph..........tpsshhplphhhsN...p.sspslsshshp..hulP.........pshplplpss.sssslss.......ssplpQhlhlts...........hph+l+lsaphs...up.th...p.t.h...hP .....................................hhlapps.slp.lthphph............tpsshhhlhlphsN.......p..sspslsshshQ....hs..ss.............................cshplp.Lpss...sssplss..........ssplpQhhhlt.s.......................t...p..hplclsap.hs.st.......p.h........................................ 1 253 405 648 +1172 PF00944 Peptidase_S3 Alpha_core; Alphavirus core protein Finn RD, Bateman A anon Pfam-B_266 (release 3.0) Domain Also known as coat protein C and capsid protein C. This makes the literature very confusing. Alphaviruses consist of a nucleoprotein core, a lipid membrane which envelopes the core, and glycoprotein spikes protruding from the lipid membrane. 20.70 20.70 20.80 20.80 20.30 20.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.86 0.71 -4.81 5 862 2012-10-02 13:45:52 2003-04-07 12:59:11 14 3 54 46 0 511 4 104.80 71 12.60 CHANGED sMKLEuD+TFPlhL.DGKVNGYAClVGGKVhKPLHVKGTIDNPsLAKLKFKKSSpYDLEaAcVPssMKSDAFpYToEKPEGFYNWHHGAVQYSNGRFTVP+GVGGKGDSGRPILDNoGKVVAIVLGGANEGuRTALSVVTWNcKGVTlKTTPEuoEpWS ...............................................................................................................GRFoVP+GVGuKGDSGRPILDNKGRVVAIVLGGsNEGSRTALSVVTWNpKGVTlKhTPEGoE.WS.......... 0 0 0 0 +1173 PF01589 Alpha_E1_glycop Alphavirus E1 glycoprotein Bateman A anon Pfam-B_587 (release 4.1) Family E1 forms a heterodimer with E2 Pfam:PF00943. The virus spikes are made up of 80 trimers of these heterodimers (sindbis virus) [2]. 25.00 25.00 25.30 27.00 18.20 24.60 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -12.88 0.70 -6.18 17 1920 2012-10-01 19:42:26 2003-04-07 12:59:11 11 5 59 57 0 941 0 247.60 48 50.15 CHANGED .tu+Ats.hsEohsYLWspNQshFWhQhshPlAulllls.hlRsh.CCh...sFLllhu........ls.ssspAYEHssslPNhVGhP.YKAlV-RsGYuPLsLplpVlpopL.PolsLEYITCcYKTVVPSPhVKCCGssECss..........ppcsDYpC+VaTGVYPFMWGGAYCFCDoENTQhSEAYV-+S-sCptD..HAtAY+sHTASLKApl+loYGsss.psl-saVNG-ossphsshKhIhGPlSoAWoPFDsKlVVYKs-VYNhDFP.YGuGpPGpFGDIQuRTss...SsDLYANTsL+LtRPuuGslHVPYTQsPSGFchWhp-+GpsLspsAPFGCpIpsNPlRA.NCAVGsIPlSlDIPDAAFTRls-uPslopLpCpVssCTaSoDFGGlAslpYpo-+sGcCuVHSHSssAsLpEuslcVhp........sGphTlHFSTASspssFhVplCus+sTCpAcCcPPKDHIVsaPspHssthhsslSsTAhoWlptlhGGsusllslullllllVshlshpR ..........................................................................................................................................................................................................FMWG.................................................tt.s...h.sasNG.pssp.tt.phhhGPhSouaoPFDp.......................................................................................................................................................................................................................................................................................... 0 0 0 0 +1174 PF00943 Alpha_E2_glycop Alphavirus E2 glycoprotein Finn RD, Bateman A anon Pfam-B_308 (release 3.0) Family E2 forms a heterodimer with E1. The virus spikes are made up of 80 trimers of these heterodimers (sindbis virus) [3]. 19.50 19.50 21.10 21.00 19.30 19.10 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.51 0.70 -5.84 23 2166 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 59 31 0 967 0 320.80 49 61.18 CHANGED PYlAhCscCutGtsCaSPlAIEpVpu-AsDGhl+IQsSuQhGls.puGstctsKhRYhts...pclcEushsslpVpTSuPCpllushGaFlLA+CPPGDolTVuFp.sssss+uCplsa+hc.p.VGRE+ashsPpHGp-lP.....CssYspssppost..l-MH..hPsshsDpoLLSpsuGsVpls.PuG+slpYcCpC.GstpsGTs.oocpplssCsts.QC+AahsspsKW.aNSshlPR.usspshKGKlHVPFsLssuTCpVPLA.pPsVpauh+ploL+LHPc+PTLLThRpLGs-PphocEWIotsssRshsVsccGlEYsWGNpsPlRlWAQ.cousGsPHGaPHEIlpaYYphYPssTlsslsusuhllhluluuSshhhspARpcCLTPYsLTPsAtlP......hsLulLCC .......PYlu+CssCtcu.sCaSPltIEcVhs-ucDGhlRIQTSu.aG.D.posptctp+hRYMss...hpltEu.hcplpVpTSuPCpllshpGYFlLs+CPPGDolTVuhh.pssshHSCTVth+Vphp.VGREpYpaPP.HGtplP.....CssYsphppsousY.lpMH..h.Gshs.oThLppppGpVhlssPuGpTVpY-CpC.uspp.GTT..upchplhhCccscQChAYlsspsKWVaNSscL.R.u.sscohKGKlHlPFsLssssChVshA.tPhVp.hh+tlhL+LpscpPTLLohRpLGt-sp.ptcWlpt.sphshoVstEGlEasWGN.sPhRhWuQ..os.GsPHGaPHEll.aYYphaPhhTlhslsshshhhhlshssohhhhspsRp.ClTPYtLsPsApVP......hhlulhCC..... 0 0 0 0 +1175 PF01563 Alpha_E3_glycop Alphavirus E3 glycoprotein Bateman A anon Pfam-B_285 (release 4.0) Family This protein is found in some alphaviruses as a virion associated spike protein [1]. 25.00 25.00 67.70 67.70 21.40 20.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.83 0.72 -4.16 25 939 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 57 13 0 592 0 56.50 63 6.95 CHANGED MClLANlTFPCspP...PsCY-+pPppTLchLEsNVDpPGY.pLLcAsl+Css..pRp+R .MClLANlTFPCsQP...PlCYsRpPcpsLshLE-NsDsPuYDELLpusV+Csu..+RtKR.... 0 0 0 0 +1176 PF03229 Alpha_GJ Alphavirus glycoprotein J Bateman A anon Pfam-B_3350 (release 6.5) Family \N 21.00 21.00 21.00 21.30 20.90 20.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.96 0.71 -3.79 5 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 8 0 3 47 0 92.70 53 76.17 CHANGED MRSLLFVVGAWVAAlVTsLoPcAALATGsTsTsGsANu....sssssAoGANATSAsAuuThAAssssPPscST.sstTPGPaPPTDFALPLVIGGLCALTLAAMGAGALLHRCCRR.AARRRQRVSYVYA ...........................hu.ls...........................................................uslAATphuPAuso....TpAPsoPaPSPhus...FAl....PLVlGGLCAlVLuAhshhtLLRRsCRt.huR...........hs............ 0 0 0 3 +1177 PF01120 Alpha_L_fucos Alpha-L-fucosidase Finn RD, Bateman A anon Prosite Family \N 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.20 0.70 -5.55 48 2021 2012-10-03 05:44:19 2003-04-07 12:59:11 12 80 870 58 664 1863 422 324.50 24 62.37 CHANGED hlllh.........hhhts...........spspapPsh-sls............ppt.pWFc-sKFGlFlHWGlYSVPuhs.....................sEW.......................hhpp..pthsstpYt........chs..........spFssp......pFDscpWsphhKpuGAKYhlhToKHH-GFshWcS.....chot.aNssc......ssPKRDlltELspAs+cp.Gl+hGh..YaS..hh.-Waps.a.htcppstht................phppa.............................................hp.hhhsQlpELlspY.ps-llWhDGsWtt.............................ssptWpt...schhshlhphpP...slllssRhtpss.tth..........sapss.EpthPsp..........l.ppsWEsChTl.ssoWGY...phpspsapospplIcpLlcsVSpGGNLLLNlGPps-GslsshtpcpLpphGpWhchNGEuIYu .............................................................................hhhh.h.............................................................ap....p.....chGhhhHaG..sh..s.............................pW.......................................t.................ta.t...................................p...........p.tF.s..s..p........paDs..cpWschh+puGhKalllTsKHH..D.GFs....La..s..o..........p.ho.......s...asshs..........pss...+...p...Dllt..ElspAs+c.......t..GlchGl....YhS.......h...D.......h....p..p....s......a....hh..st.pt.........................................h...........................................................hp.hhhsQlpE.Ll...s.......p....Y...sp...h.s....hlWhDGs.hst.................................................stpth.ph..........pchh.phl..c.p.h.pP....................ssll......s......s......+hss.s......h....................................th.p..-..p........s.t.t...................................h...h..a......E..s..sh...o.....l....p.......suWhY.......................tpspph..K..ohppllc.h.hh...c.sVu.+s....us.l.LLNls.Pst.sGhlsstthptLtphGp.aht.............................................................................................................................................................. 0 276 488 583 +1178 PF02232 Alpha_TIF Alpha trans-inducing protein (Alpha-TIF) Bateman A, Mian N anon Pfam-B_1799 (release 5.2) Family Alpha-TIF, a virion protein (VP16), is involved in transcriptional activation of viral immediate early (IE) promoters (alpha genes). Specificity of Swiss:P23990 for IE genes is conferred by the 400 residue N-terminal, the 80 residue C-terminal is responsible for transcriptional activation [1]. 25.00 25.00 191.50 191.30 18.40 17.60 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.14 0.70 -5.63 13 66 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 37 1 0 62 0 333.00 47 75.87 CHANGED sssss+luhPP....PpsuuPssLYpRLlpELsFsEGPuLLutLEphNEDLFSClPtNtDLYpcstlLSssss-Vlcsspsssst....ssslsLssHGstshPpsPusc--LPsYVpulQpFFhuELRAREcsYs+LLhsYC+ALlcYL+usup+shRGht.ps.pstthpp+hRphltsRYYREsA+LARLLYLHLYLoloRElSWRLaApQstpQsVFsuL+acWtQtRQFpCLFpPlLFNHGlVhLEGcPLsutcLRclNYhRccLGLPLlRsuLlEEpsuPLsp.PsFoushPRosGaLsppIRuKh-uYSpt+sssPt.pspp-HsYu+thsss.NYGoTlEAhL.cPssP .......s.pttphh.sP....PhsssPsuLapRLlc-LsFs-GPuLLotLEpWNEDLFSslPtNtDLYp-sthLSssss-Vlptsps.sst....tstlDLpsHGshshPpsPusc-sLPsYVpulpcFFhuELRAREEuYsplLssYC+ALh+YL+uou+cthRththps.pstshtcphRphIusRYYREsARLARlLaLHLYLsloRElSWRLaApQhhppslFssLph-WpptRQhsCLFpPllFNHGlVhlcGtPlpAtcLRclNYhRccLGLPLlRsuLlEEssuPLsp.PsFpushPRuuGaLhphIRuKh-uYSphtsspPp.tshpEHsYu+thsts.NYGSTlEuhL.sPss.s.................... 0 0 0 0 +1179 PF02430 AMA-1 Apical membrane antigen 1 Mian N, Bateman A anon Pfam-B_2016 (release 5.4) Family Apical membrane antigen 1 (AMA-1) is a Plasmodium asexual blood-stage antigen. It has been suggested that positive selection operates on the AMA-1 gene in regions coding for antigenic sites [1]. 20.10 20.10 38.70 21.90 17.70 18.30 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.96 0.70 -5.61 7 1565 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 33 38 37 1845 0 343.80 69 86.34 CHANGED WpcaMtKaDIt+sHGSGIhVDLGEDApVtspsYRhPsGKCPVhGKuIhlpNus.sF.LoPluotstpl+ptGLAFPp.......................s-splSPlThssL+ph.YKcp......sDlu.CucaAushVPusspNopYRaPhVYDcpschCYlLYsuhQ.N.GsRYCssD..tsp.csuhhChpPtKscp.ppLsYhopslcpDW-psCPh+sl+sAhFGhWsD..GpC.sht.hhpcpucsh.-CspIlFptSASD.p.cphpcchsDhpclppGhpptNhphltpA.FhPluuhpucphKS+GhGhNWANYDscspKChIhsspPoCLI.spsalAhTALSSP.E.sthsaPCsIhKschhh.............................E.+tpspsht.ss.s........hP.clahS.cK-oLcCss.spphssuoCs.aalCssVEtct.lppsp.l.l...............hsp+phllIhlhhssuslllslhhhaha+.......+pcsu-.............cYD+ht ...............................................................+CPVFGKGIII....E....N.......S......s...TTF.LTPV..ATtsQcLK....D...GGFAFPP.........................TpPhh..SPMTLssM.Rc.h...YKDNc.V.....KNLDELTLCSRHA...GN...M.sP....DsD.......c...NSN...YK.YPAVYD-K-KKCHILYIAAQENNGP.RYCNKD..pSK..RNSMFCFRPAKD.cS.F.p.NYTYLSKNVV...c....N....WE...KsCP.RKNLpNAKFGL..WVD..GNCE-.I.Pa...Vp.E...h...pApDLhECN+lVF..ph..S..ASD.QPpQYEpchTDYpKIppGF+ppNtpMIKSA.FLPsGAFpuDpaKS+G+GaNWuNa..sp.....p..pKC.IFNsKPTCLINspsaIATTALSHP.E.V-.pFPCSlYKsEI.+.............................EIc+pS+php.L.ssDs...c+IlhP.RIFISsDh-SlKCPCtPEhlSNSTCp.FaVCpCVE+RAElppNNpVVl................p........................................................................................................................................................................... 0 13 17 33 +1180 PF03913 Amb_V_allergen Amb V Allergen Finn RD anon DOMO:DM04509; Family \N 25.00 25.00 66.90 66.60 21.60 20.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.72 0.72 -4.23 3 4 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 3 3 0 8 0 42.80 61 62.87 CHANGED lssCYhAGolCGEKRuYCCSDPGRYCPWQVVCYES+cICuKNCu ..hsChhAGslCGEKRuYCCSDPGRYCPWQVVCYESpcICuKpCu 0 0 0 0 +1181 PF02948 Amelogenin Amelogenin Bateman A anon Pfam-B_402 (release 6.4) Family Amelogenins play a role in biomineralisation. They seem to regulate the formation of crystallites during the secretory stage of tooth enamel development. thought to play a major role in the structural organisation and mineralisation of developing enamel. They are found in the extracellular matrix. Mutations in X-chromosomal amelogenin can cause Amelogenesis imperfecta [1]. 21.00 21.00 21.00 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.51 0.71 -3.64 6 451 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 281 0 28 407 0 113.20 52 97.18 CHANGED lPLPPHPpHPGYVNFSYE..............VLTPLKWYQShhpHQYPsYGYEPhGGWL+p.hlP.hsPQhPpQp....phhPpLsPHHplhhl.......PQpPhh.hPuHHPhhPh.tp...pPph......PsQcPlpPpssp.P....QPQQPspopPPhp.p.PhsPpss..........pPMaPlQPLPPhlPDhPLEPW.uADKTKQEElD ..........................................................................h+p..phhP..h........s.pa............hh.shHph..............P..Q.pPh.h.....PG..pps...hsPhptp....ps....hs.....PsQpPh.Q...Ppssp...P.........QPpQP.lp...P...p.Ph...HPhQPhsPp................P.haPhQ...PLPPhlPDhPLEuWP.................................... 1 2 2 6 +1182 PF04709 AMH_N Anti-Mullerian hormone, N terminal region Kerrison ND anon DOMO:DM04835; Family Anti-Mullerian hormone, AMH is a signalling molecule involved in male and female sexual differentiation [1]. Defects in synthesis or action of AMH cause persistent Mullerian duct syndrome (PMDS), a rare form of male pseudohermaphroditism [2]. This family represents the N terminal part of the protein, which is not thought to be essential for activity [2].\ AMH contains a TGF-beta domain (Pfam:PF00019), at the C terminus. 19.60 19.60 20.10 20.00 19.40 17.50 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.36 0.70 -5.51 13 109 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 66 0 28 87 0 261.20 30 66.38 CHANGED lpch.sAFlEul.pcuchsppDLspFGlCssuDssushs.Lp.Luphlscsttp..tLhVLHhpEVhW.......EsslpLpFphP...hsu..uss.s....sLLlhasGshts....ss.cVohousuL.PtsQSlClScDTpYLlLsscts..........puuhpsphhshs.p+ps-.GspLshs-LpthLhGscs+shTphTPlLLh............sus.hsucGplcssPhPps...........sPssT...............sFLspLpRFlRslLsP..pupsss.u.....LshsuLpoLPph.LsLSsotu.LtpLlsSppPolhlF.......sshsusLps.ppuphshpsuLlpcLtt+Lppstscl....Rshsuhsssss.hLpRLhsLssh....ssssuuuss....phRALLLLKALQoVpspWctcR ............................pth.tshhpsl..putht.ps.thFGhCss..tttsh...hp..ltt.ht....t..th.shH.tt.s.hh.......p....Lhhp.s....................hhLhh..s.........st.tl..htt..l....ttQslC.otsTpalhLsst.s..........tut....h...h..s.t...psp...tt..................h........ph...Lhstt.ts.hphpPhLLh................................ssph.hphhs.hs...................................ss.ps..........................FLppLpthlttlhs.......ptt............l..t.htshP...lshSpsts.Lttllpupt..lh.h........t..........s.ht...slhp.lh.cLptshhph....pt..s...sh...hht+Lhths............t...tssts..........hpuhLLLKALQslhttap.p................................. 2 1 3 9 +1183 PF01425 Amidase Amidase Bateman A anon Pfam-B_191 (release 2.1) Family \N 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.52 0.70 -5.63 44 12240 2009-01-15 18:05:59 2003-04-07 12:59:11 16 79 4062 116 4833 11144 9300 393.40 26 83.65 CHANGED -llcthls+hpthppplsshhphhhppAhptApphcppht.......tssLtGlPlulKDslslp.GhsoTsGothhp..shhssh..DuslVcpL+puGAlllGKTNhsEhuhussspssha.........GsspNPashsc...ssGGSSGGuAAsVAushsshulGoDsGGSIRhPAuhCGlsGhK......PThGRluptGlh.....shssohDpsGshu+sVcDsulhhpsltGt-.tDssshs.............htt.httt.ptl+lGlhpp.h.......................................h.shsstlpcshppshttLpphG.tplhchphs.ht.hhs.hhhhhhssps.t......................h.chhsphp..s..phhsstlptpl.hsshshsth......hhpspph+t.hhpcphtphapp...............lDlllsPosstsshphsth..............t.sthhth.shs..ssssshsGhPulo......lPsuhs.pp.................GLPlGlQlhupthp-ptlL .................................................................................................................hhpthhtph...t.....t...h......p............t.........l.p...s.....h...h..t........h....................p.....t...A..h.t...t....A.......p..t.h......-.......t..............t...........h...................................t.............t.........................................s.............s......L.t......GlP.lulKD...h.....s...s...............t.....G.....h..To.s.G..o......t...h.h..t.................s..h.....h...s.........s.h...........D...A....s..l..........V..pp.L........p....p........u........G............ul................h.l...........GKT.NhsEa.....u....h.....u..s.....s.....s..p...s..s..h..a....................................G.....s.p...N...P......a...........s.........h............s.......t....................ssG.G..SSu..Go..AA......A..V...............A..u.....s.............h..............s............s.h.....ulGoD...s.G..............G..S.....lR....PA.u...hsGl.........sG.hK.........Po......h..G......t..l....s....p...h...Ghh..................sh.s.s.s...h..-.....p.....h..G..P...hs..+...o......l...p...Dsu.hl...h.........p...s.l.............s.......u.......-.....s......t.....c.s.t..sh....................................h...........t................h.........t.....t.......s.........h..........p.........s...........h....+...l...u.l...p.p.hh................................................h....t..h...s...s...p...l.t...t.s..h.pp...s..h.p..h...L...p.p...h......G....t....p.............l........p.........h.....s.....h.......s..........h...p...........s......h.....t......hh...h.h....h..h.......s..-..st.t.t....................................................................h..p.h..h..h..t..t..p....s.........p.t...h....s..........t....h........p.......t....t...h.....h........h..u..t.....h..h...h.....s..s...t........................................h...hpu...t..ph...cp......hh...t..p..p..h.t.p..hhpp........................................h.D..hl..ls.Ps.s.s..t....s..s...th..s..t.....................................s.h...h....h...h.................h....h......s.......................h......h...s..h..s.......G...h.Pu.lo..............lP.s....uhs....p................................................................................Gh.P.l.G.lQlhu.t.hhp-ttl............................................................................................................................................................................................................................................................................................. 0 1337 2800 4004 +1184 PF01832 Glucosaminidase Amidase_4; Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase Bateman A anon Pfam-B_888 (release 4.0) Family This family includes Mannosyl-glycoprotein endo-beta-N-acetylglucosaminidase EC:3.2.1.96. As well as the flageller protein J Swiss:P75942 that has been shown to hydrolyse peptidoglycan [1]. 21.70 21.70 21.70 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.93 0.71 -4.05 162 6435 2012-10-03 00:09:25 2003-04-07 12:59:11 15 195 2899 3 744 3961 1635 133.40 25 32.41 CHANGED htstApphtpc.hsl.s.slhlA..QAhLESuW......GpSt.....huppsp.NLFGIKut..........tst...htht.....TtEh.p.sth.phpspFRsYsohp-Shp-asp.hlpssst......Y...p..sshp..t..tpshpth....sptlpts.G....YA....T......D.PsYupKlhsll...pp.hsL ...............................h....h.phtpc.hs...l..s.shhlA..QAhLESua...........GpSp...........................hup.p.sp...NlFGlKu......................h.h...........................t.ch.s..spt.h.shtspa...+...t......a....sS.....hp.-u.lp.sasp..hlpps.t...................Y.........p..shhp....t.........ttshpps..........sttlppt.t...........YA.....T.................D..spYupc.lhsllppht............................................................................ 0 222 434 613 +1185 PF02274 Amidinotransf Amidinotransferase Mian N, Bateman A anon Pfam-B_5784 (release 5.2) and Pfam-B_1850 (release 5.5) Family This family contains glycine (EC:2.1.4.1) and inosamine (EC:2.1.4.2) amidinotransferases, enzymes involved in creatine and streptomycin biosynthesis respectively. This family also includes arginine deiminases, EC:3.5.3.6. These enzymes catalyse the reaction: arginine + H2O <=> citrulline + NH3. Also found in this family is the Streptococcus anti tumour glycoprotein [4] (Swiss:P16962). 22.00 22.00 22.00 22.00 21.90 21.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.74 0.70 -5.32 55 3546 2012-10-01 20:45:11 2003-04-07 12:59:11 12 9 2331 60 857 2617 1976 321.90 24 89.76 CHANGED hhR.PsphthN.spostsshh.............sst.h.ppAhpEacshlptL+pp.GlcVhhlcch.t..................................................................................sshP-ulFspshhus....ts..uslslhPMhs.sRRpEpphshhhhhpp.....hth..hhchs.thc.psthLEG.Gshlhspppplshusts.Rosppslcphspphshp..shhhps...hssptpshhHhsshhslusphsllshsslhsst............................cphlhpt.......Lptss.hcl.Iplu.......tcphh.shuuNhLpl.....s......h..s.hAass.s.sppphhcp............tscllthslsplp.tGGGusRCMhstlah ......................................................................................................................................................hpPt..h.phh.s.hpphh...................ssh..lptA.p.c.EH-taspsL......+pp...G...l..c..Vhhlc.s.l.h.t.............................................................................................s.shPsh....h..F....sRDstsh..............ls....ss...l....slspM......h.t..sRppE....shhhp...hl...hphp.................................th.hhh...shp...t................ss............plEG.GDhlh..l...u...c.............s..h.....l.s.......h........G..........h........u.........p...R.Tst.pul-...............pl...sp..plhpp........h....p.......l............l...s.h.....th...s......p.......s...c......s........h............hHLDT..sh.o.hl..........-........h......c......p.........hs.h.a.s.t.lh.st...............................................hph.ltpt...................L.t...h...c..p...hpl...Ipsu...................................tcEta....s...u..sNsLsl................ssGsV...................................lsas..p..s.s..h....s..s.p..h.L.c..ct..............................................Gl..c..l..lt....ls......ss.ELs..+GtG...Gs+CMohsl.R.............................................................................................. 0 307 557 719 +1186 PF01979 Amidohydro_1 Adenine_deam; Amidohydrolase family Bateman A anon Bateman A Domain This family of enzymes are a a large metal dependent hydrolase superfamily [1]. The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source [2]. This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit [3]. Dihydroorotases (EC:3.5.2.3) are also included [4-5]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.00 0.70 -4.79 58 18418 2012-10-03 00:45:34 2003-04-07 12:59:11 15 103 4730 152 4606 25313 10133 289.10 14 66.61 CHANGED hlhPGhI...DsHlHhhps..t..................................h.shtpshpphlpuGsTsstsh...tssss................................htchlts..s.ph...hthh..spsuhts.htphhhsts.......................................lchhpshtth.....thhsshstpsstthtpsthtstsphscc.....t.slhhshHhtpssss.....................................h..h.....................hh.h......sthphhhhshhls....p..th.......tsstlshpshusct............t...t...lspshhphhphhpp.tht....ht.t...s....................................hsstchlphsThssAcshGl.pphGslpsGchADlllhc ...............................................................................................................................................................................................................................................................................................................................................................................lhPGhl......Ds..H...h.H..h...h...t.s....h.......................................................................................................................................................t...s......h....t.........h...h....p..t...h...h.....t....s.G...s.....T.s....h.h.s.h......ts.............................................................................................................h...p.....h.h....t........t...t........................................h...................................h.....s...........h.........h.....................................................................................................................h.c.....h.h.p....t.h....t....t...h......................................................t....h.....h............s......h......s...........p.............s.........s......t..........s..........h.........h..........p.........s..........t..........h.........h......s................t......h......t.......h....s.......p......t...............................................t..............t....h......h....h...p.....h...c...h...t..p...s...t..tt..........................................................................................h..h.............................................................................................t...hhh...h................................t....t....h.....t.....h......h.h....h...s..h...hlp.........................p.........ph...t.h..........h................t.......t...........t......s..t......h...t......t...t.............h........s...........s.....t....p........................................................................................................................................................................h........t......p....s.....h.....h..........h....h....p.......h...h.p...t.........th..............................................................................................................................................................................................................hsht..p..h...l.t..h...s....o.h....s.s....A....p.......h...h......s.........h....................t.........p........h.......G......p....l.tsG.t.ADhllh....................................................................................................................................................................................................................................................................... 0 1418 2812 3769 +1187 PF04909 Amidohydro_2 Amidohydrolase Bateman A anon Pfam-B_4687 (release 7.6) Domain These proteins are amidohydrolases that are related to Pfam:PF01979 [1]. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.72 0.70 -4.84 132 6287 2012-10-03 00:45:34 2003-04-07 12:59:11 9 30 2257 66 2486 6470 2429 284.10 13 85.76 CHANGED lDsHtH...................................................................t.th...h.hthtsh..thh.hts.hsh.tts....shh....ttt.s..shspt............h..h.....tthh.p..t.s..sthhshsshs........t..p......shtphtcth......................................................tphshtG....................lp..ht..thtss...stth..............................................hhp..........thtch.s.l.....slt.la......................................................................hthtp.hhht.slh....ccaPp.......lplllsHhGhs..................htth..hthhtptsslah...chst...h........................h.ssh.t.....h.....htph...hphhs.s...-+llaGoD.....................aPhhsh.tts.hsthh.th.h.......................stptpcplhtsNAt+lath ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..................h.....................h.......t..h....s....p..............................h..h.......t..hh..t.......t...s.........s..t.h..h..s.h...s......s......h......s.....................t......t...................sh..tc.h.t..c....th..................................................................................t....p..h....u.....h.p..G...........................lp........l...........s.....t..h.......t.....s..h............t...s..tthh...............................................s.hhp..................................t.htc.h......s...h...........s.lh.lH.................................s......................................................................................................h.t.t..t..t....h..h...h..t...slh.........pc..a.Pp........lpl..l.......l....sHhGts....................................................ht.th..h..t.......h...h...t.....p.....h.....s.....s..l..a.h......c.hut..h....................................................................shh...h...t.....h...............h.t..th......h.p..t..h.u.............-.+...l.l....aGSD..........................................a.P...h....s..p.......h....tt....httht...th.h................................................stpt...tp..plhhpNAtclat........................................................................................................................................ 0 680 1536 2082 +1188 PF00155 Aminotran_1_2 aminotran_1; aminotran_1_2; Aminotransferase class I and II Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.94 0.70 -5.51 48 52439 2012-10-02 18:26:03 2003-04-07 12:59:11 16 160 5588 558 14604 63947 26038 337.30 17 82.76 CHANGED sphlsLussphths.....hhtshhcst.c..........htttpshptYhshpuhsphcptlA.....chht........t.......h..ptp.slhhsuGssushtshhhhh......h.suct.lllssPsassatpshchsssp.lhhhth.............pshplDhpslppslpp...............phllhssspNPsGsshshcplpplhphsppp.shhllsDEsYtshshut...t..s....................hhhlpst.p......hhlstohSKsh.GlsG.RlGhlhu.ss...................pllstlpphspshhs...sststthssshLsssthh........plpphtpcltpp+phlhstLtph.....shshhtsp.uuhashhshsst.................th.ph........................Lhpc.hslhlhshp..th........s.hhRlsh.sshospplcthhptl ..............................................................................................................................................................................t.................................................h.h.tt.h...........................................t..t...s...h...h...h...Y........s.....s.........t.....u........h............t.......L......c...p....s...l..u...........phht......................................................h.......h....p...s.p..........p..l...h...h.....s...s...G.....s..p......t....s.....l..t.....h....h....h....t...h..h.............................h..t....s.......u......-........t........l.....l.....l....s.......s......P.......s.....a.............s.......s......h..........h.....t...........s.........h............p.....h.......s.........u............s...........p..........h...........h....t...h..s....h.........................................t...h....t......h....c......h....p...t......l....p....p....t....l....p..p..................................................................p..h.l.......l.....l.s....s...s....p....N...P........o.............G........s.....s...........h....s.....h....p.......p....h..p...p....l......h....p.......h...........s.........p.........c.............h......s..........h..............h...........l..l................D......-....s...Y...s....p...h..s....h..s.tt......t..s.....................................................htt.h..s..t.t..p..................h.lh.h.s...o...h.......S......K.s......h...........u........h..........s....G.h...R..l......G...a..h..s...u...st.................................................pl.h...p.......t....l.......p.......p.....h....t.....p.......h....hh...............ss.h..s...t...h....s.......s....h..s...s.....l...p....s...s..t.......................................th.p.p..h..p....p.....t......h....t....p....p....p....p...h...h.......h..p...t....L.pph..........................................sh..p..h...h.....t....s......p......u.......u........h....h....h...h..h..t..h..s.tt.............................................................t.s...p..h..h..pt.......................................L..h.....p.....c...........s.....l.....h.......l...h..s..s....t....h.sh...............................tt..h.lR.lsh..........h.h...s...p...p.h.pthht........................................................................................................................................................................................................................................... 0 4349 8681 12025 +1189 PF00202 Aminotran_3 aminotran_3; Aminotransferase class-III Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.97 0.70 -5.59 18 23135 2012-10-02 18:26:03 2003-04-07 12:59:11 16 134 4515 255 6601 18830 12804 322.10 28 72.23 CHANGED sls+ucGsalh.Dh-GpcYlDhhSuhussshGHsHPcllpAlppQtsplsass.tshspcshhpluccLsphh....st.sh-+lhhssoGoEAsEsAlKhA+...h....shsp+s....................cIluFpsuaHGRThuulSl.ssssth+phhtPhhssh........hhlPaschts...............t.......h..hhpphpsssl..AAlIlEP.lQGEGGhhhsssuaLptl+clCccaslLlIhDEVQTGhGRTGchFAt-a..hGlsP...Dlhshu.KuluGG.hPluAslsssclhpsh..psss.......HGsTasGNPlAsAluhusLcllpc-pllpsspchushLpptLp.pLpct..shltclRGt..GhhhulEhsc .........................................................................................................................................h..t+ucG.shlh....D.h.-...G......p....c.....Y...l..Dh.h.u..u.h.u................s.h.s.h.G.H..s....+...P...p.l....s...p.....A..l.p....c.........Q..h....p........p..h..h..p..h....................s.h...........s...........s.......p...s......t....h....c..LA....cpLsphs.....................................sh....c.....c....l....h..h.s..s..S....G...oEA....sEsAlKlAR.......................................s......h...s...t.....+s.................................................................................p.l.l.sh..p.s.u.aHG....co...h.......u....s...l.......u........h.......s......s.......t.....s......s.......h.......p.......p.......h......h......t...s.....h...h..s..s.h..............................ht..s..s..hs.s..hht.....................................................................................................t..............h..h......t......h......t.......s....p...p...l.........AA..l.ll.EP..l.Q..G.......p..u.......G..h..h.....s....s..s.....s..aLp.........tl.......Rc.......l.................C...c................c..............a................s....hL.................L.....................IhDE...V.t....o......G..h....G...R....T....G.........p.....h.....a..........A..........h............p...p..................h...u.....l......p...P.................D....l...h....s..h...u...K.u.l........u..G.G...hP.lu.A.h.l..s..p....p........c..l..h..p..s...h....t.sss........................au.s.Ta..u.G...N....Pl.A..s.A....s...u...h..A...s...L.c..l.....l.........p....p........p......s.....l..h....p...p.......s.p......p....h.u....p....h....l...t.....p....t.....Lp.....pl.....t..p..........p.................s.......h.......l..t..c...l....R..uh......G..hhhulph..p................................................................... 0 1940 3976 5469 +1190 PF01063 Aminotran_4 AA_transferase_C4;aminotran_4; Aminotransferase class IV Finn RD, Bateman A anon Pfam-B_607 (release 3.0) Domain The D-amino acid transferases (D-AAT) are required by bacteria to catalyse the synthesis of D-glutamic acid and D-alanine, which are essential constituents of bacterial cell wall and are the building block for other D-amino acids. Despite the difference in the structure of the substrates, D-AATs and L-ATTs have strong similarity. 20.50 20.50 20.50 20.80 20.40 20.40 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.44 0.70 -4.89 78 10610 2009-01-15 18:05:59 2003-04-07 12:59:11 14 29 4578 127 2921 7879 6007 228.80 22 69.44 CHANGED hhlcpHhpRLtposptl...........thshshpp..........lpphlpphl........cths...tpss.hl+lh..lsps.t.h........................thhhhtttts.phhhspphphs.tsh........................hsphKosshhtthh.ttctt.....t....uh-c..s.lhhsppGt...lsEustuNlh......................................hhcs...............sp....lhTPshsp...s.hLsGlsRptll.phstt..sh.....lp-pslshp-lt.puc.....phalssohtslhP...lpplsst.h...........t..ht.l....hpth ................................................................................................................................................h.cpHhpRLppS.uctl..............h.hs.h.s.h.c.p..........................................l.h.p..t..lpphl......................ptst...........hsss..hlR.h........l.h..t.s...s...s....t..h...u..ht.sstt....h................................sthhhh.h.th..t.s.h..p.h..h..h...s..p..t...h.p..h...s..s...ts.h......................................................hsthK.s.s..s...ph...s....ssl.hspptA......................tpp.......Ghs-........s..lhls....t..p.Gh...............lsEsussNl.F......................................hh....c..s.................................s.p..........lhT.Psh...ss...........s..lLsGI.TRp..sll..pl..Acp...h.....u..hp....................lp.E...c.p...l.s.....h.c....-Lh...pA..c................Esahsuos..s.....lsP.........ltp..l..p..ththt...............u.hh.t.h............................................................................ 1 919 1864 2466 +1191 PF00266 Aminotran_5 aminotran_5; Aminotransferase class-V Finn RD anon Prosite Domain This domain is found in amino transferases, and other enzymes including cysteine desulphurase EC:4.4.1.-. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.96 0.70 -5.72 45 23746 2012-10-02 18:26:03 2003-04-07 12:59:11 14 88 5192 153 7010 41460 18473 338.30 19 85.74 CHANGED lYLDuAAToppPpsVl-uhpcaYpphtuNlHR..uhHshuppuopthEpsRcpVApalsAps.cEIlFTpGoTc.ulNLlAtuhttt.....lpsGDcIllophEHHANllPWpplucppGsplc..hl.slsspGpl-..lcpl.pphl...s.scT+lVulsalSNlhGs.lpPlpclsphs+p.t.GAlllVDuAQulsHhslDlpplssDFhuFSGHK.hhGPsGlGlLYs+cchLpph...Ph.hGGpMlpplsh.psts....apphPh+FE.AGTPsluuhlGLusAl-al.ppluhstlpp+pppLtphshppLts.lsslpl..hG.s...pppsulluFsl.pslcs....pDluphLcpp.GIAlRu..G++CA.....Phhthhsls.......ushRsS.hshYNTp--l-pl .........................................................................................................................................ah...sus.s...h.s...t....l.h.p.t....h....t.......h..........t..................u..s..s.....p...........th.p...t.h..u....t........p....h...p.....p.....h..h......c.....p..u...R.p.....p.l..t.....p......h......l......s......s........p........s.............p........c......l.....l...a..s..p.....G.........u.T....p....u.h.........s.............h........s..............h..............t....s...h......................................................h...p.............t...........s........c..................c...............l..............l...........h.............s....t.........h....-.............+......h......u......s.....h.......................s......h..........p.....t......l..........t.........................p......t.....G........h........p.......l...p....................h.l......s.......s.....s.............t........s......u.......h.....l..s....................h.p.p..l.....p.t..t.l............................s...s......c.......s..........t.......l......l.....s.......l....s.....t..s....s.....s..................h...G....s....l....p.....s.....l..............t..........p.......l.........s..............p........h........s...........+................p......t.......s...........s...............h........h........h........l...D...........u............s..............p...........u............h...........u...........p...........h.............s...........l...........D..........l.........p...........p.........h..........s..........l.............D............h............h......s.......h......o..u........a.....K...h.....h......G....P.....t.....G.....l..........G.....h.l........h.s.....+........c....p......h.....h..........t..........p.....h.................t...s.........h........h......h......u..............u....t........................................................p.t.ts.........................................................h....c.....s.....G......T................s...h.....h.....s.......l........h......u........h..........s.....t.....A.....h......c.......hh......t..................p...........h................s.................h...................p................t.................h.............t...................t......+...t...p.............p..........l...t..............p..............h....h......h.......p........t...........l........p.....p......h.........s............t............h.....t...l...............................s.....................t........................t..................t.......p.......t..................s.......s.......h......h....s....h.....s......h......t......t......h...c.s...............................p..t....l...h....h....h....L...p...p.....t....s....l..t...lps........G....ptss..................................................................................sshRhS.h....h...s...t.plp.................................................................................................................................................................................................................. 0 2396 4497 5949 +1192 PF02293 AmiS_UreI AmiS/UreI family transporter Bateman A anon Bateman A Family This family includes UreI and proton gated urea channel as well as putative amide transporters [1]. 25.00 25.00 28.00 27.70 24.90 21.80 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.30 0.71 -4.60 24 489 2009-09-10 14:59:15 2003-04-07 12:59:11 10 1 261 0 51 381 6 180.40 56 94.65 CHANGED Mh.GlsLLYVGAVLhlNGLhlLG+lss+ssulhNlFsGsLpllsshhhlhsu................s.ucstslhuuAsshLFuFTYLaVulNplhsh.Du+GLGWaSLFVAlsAlshuhhu.hsss..........tchhhulhWlhWulLWhhFFLlLuLt+.plpphsualsllpGlhTuhlPuaLhLsGha ....ML.GLlLLYVGhVLlsNGlstLsKVDsKSsAVMNhFVGGLSIlsNlllIsaS................hu..pchsoaYusATGLLFGFTYLYsAINahFGL.D....hRsYuWYSLFVAINslPuAILS..aoshh.......hscusWaAIIWLAWGVLWLTuFIENhLK.....h.sLG.KFTsaLAIlEGIlTAWIPAaLLFhp+W............................. 0 15 38 47 +1193 PF02461 AMO Ammonia monooxygenase Bateman A anon Pfam-B_2301 (release 5.4) Family Ammonia monooxygenase plays a key role in the nitrogen cycle and degrades a wide range of hydrocarbons and halogenated hydrocarbons. 24.40 24.40 24.40 26.10 24.10 24.30 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.88 0.70 -4.65 8 15067 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 915 12 14 15207 1 149.70 61 93.12 CHANGED uAutSsh+S+AEAstssRThDalhLshLFhllLGuYHIHhMLThGDWDFWlDWKDRRhWPTVsPIVuVTFPAAAQuaFWE+FRLPFGAThsVLGLLlGEWlNRYhNFWGWTYFPINhVFPouLlPsALaLDlVLhLSpSallTAlVGuhGWGLLFYPuNWPIlAshH.PsEhpGhLMSLADlhGFpYVRTGTPEYIRMVE+GTLRTFGKDVVPVAAFFSGFVShLlYFlWWalGKWFSTs.+alpp ..........................................................................................................................................................FahWo....aYPINF.VhPS.oM..IPG.ALh.hDslLLLT.pNWhlTALl.GGuuF.GLL...F.Y.PGNWP.IFGPTHLPl..Vs.EGs..LLSlADYpGahYVRT..GTPEY.V.RlIEQ.GSLRTFGGHTTVIAAFFuAFhSMLhahlW.WYhGthasss..a...h.................... 0 6 11 13 +1194 PF00909 Ammonium_transp Ammonium Transporter Family Finn RD, Bateman A anon Pfam-B_596 (release 3.0) Family \N 19.20 19.20 19.30 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 399 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.45 0.70 -6.00 25 6224 2009-01-15 18:05:59 2003-04-07 12:59:11 16 40 3496 34 2233 5420 6440 367.80 30 87.31 CHANGED sahllsuuLVhaM.psGhuhhpuGhs+sKNslN..hhhhshhshsls..slhahhaGauluFGps....hsuhhGs..........hthshhshsssshsphh.............hhFQhsFAs.sssuIlSGulAERh+hsuallauslhssllYsshuHWlWs...........sGahsphssh..........DFAGusl.VHhsuGhuGLssAhllG.Rhs+h......pstshpsHslshshLGshlLWaGWaGFNuGSuhshsshsts.............sslsTshAuAsGulshhhhshlpp..GK...shltstsGslAGLVAI.TsusuhVs.PauAlllGllAGllshhuht....hLpp+l+lDDslsshslH...GhuGlhGslssGlFut.th................sshhtGss....t.LhhQlluhhsshsauhssshllhhllshhhs..LRlot-pEhhGlDhspH.u-suY ...............................................ahhh.sshLVhh..M...h.....s.......G.......hAh...h...uG.h.l.R..p.K.N.s..l..s.......hh..hp....shhshuls..sl........ha...h..h.h...G..aulu......F..ut......t.....................h.s......s..h.hGs................................................h.h..t......s...h............t.......s.......s...h......p..h.h................................h.h.hF....Q..h.hF.Ah.....l.ss...........u...l...l...s........Gu....hA...E..R...h.+..asshl.lFssl.hssl.lYsslua..h.lWu............................................s.G..a..l....s....p..h.Gsl..........................................................................DFAG....usV.VHhsu......G..hu......u....Lss..uh......h.....lG......t.Rh.shh.........................tpsh.....s...H.........s..l.sh.shl.Gs.h........l.L...W.hG...Wa.G.F.Nu...G..Ss.h.s.s...ssh.ush.........................shlsThlAs.Auus..l................s...h....h....h....h....p.....h...h...ht..................s.K......s.......s....hhs.hhsG.slAGLVu..........I.....T................su..s.u..............h.....ls....shuAlllGhl..u.G.hl.shhuhp..............................hlp...p.t....l.....p....h....DDsl.s.shulH......GlsGlh...Gs....l...h...sGlFustsh.......................................................h..h.h..h....st..........pl..h...h...Qlhu..h.h.hsh...s.ashls....shll....h.hllchh..............hs.......LR..l........s..p-p..E..h...G.lDhs.H.ut.s................................................................... 1 776 1422 1922 +1195 PF05145 AmoA Putative ammonia monooxygenase Bateman A anon COG3180 Family This family are annotated by COGS as putative ammonia monooxygenase enzymes. 23.30 23.30 24.20 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.86 0.70 -5.38 8 1520 2012-10-02 17:14:55 2003-04-07 12:59:11 7 4 1297 0 296 1116 1205 282.70 30 90.48 CHANGED aMlGsllAuIlsspht.hslphshhhh.......tsuQsIlGltIGtplosslltslhspaslllsssllTlLuuslhualh+RhupsshsTAhauohPGGuotMlslu.pchGAsptlVuhsQshRllaVshssPhlsphhlss.......ussshshhhs....lslthlslLhhsuhlsuhsuphL+hPuPaLLuslLluAlVphG......hslphtLPsahhuhuQhllGhsIGsphs+shhtptsRhhhtsllsslhhlhhushhAhllohLssl-hhohhLuhsPGGlsphulsAtuLp.h-suhVsAhQshRlLhlLhlssslh+ahp+hts ....................................hhGshlss.llhs.h.hh.....hpl....p....h....Ph.hh...........hu.sllGs.lutsho..sl.ltsltt........h......l.lhlh.l.hsll..u.....h.l.hu.a.l....h.h....+.h..........u.............t..l.s....h............t.......Tuhhus.PGu...hu.tMl.shA..p-h.s.A.sh..thVuhhQhlRllhVshh.ssh.ls..th.h..hss..................................t..ss.s...t..h..h..hhs......................hsh.t.......l....llhh..l..h..u....h..l.u.u.h.h.....up...hl+.....lPusthLsPhl.s.su.l.h.phs.......thl.s.h.p..L..P...p...a.L.l.shA.hhlGhpIG.l..ta.s+th.hht..th.Rhlsth.llshh.hL..l.hh.sshhuhhlshhh.p..l.shhssaLusuPGGlsplsllA..hsss..ADhuhlhuhQslRlhhlLhh...s...P...sl.h+hh.p...s..................... 0 74 166 237 +1196 PF04896 AmoC Ammonia monooxygenase/methane monooxygenase, subunit C Mifsud W anon Pfam-B_6611 (release 7.6) Family Ammonia monooxygenase plays a key role in the nitrogen cycle and degrades a wide range of hydrocarbons and halogenated hydrocarbons. This family represents the AmoC subunit. It also includes the particulate methane monooxygenase subunit PmoC from methanotrophic bacteria [1,2]. 25.00 25.00 34.70 34.70 22.70 21.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.61 0.70 -5.35 9 127 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 57 12 24 127 22 191.30 46 94.37 CHANGED sspssususA.hsps.laDh+hhhlGlhshhsFYlhlRhYEpsaGaptGLDSFuPEFppYWMslhahEhslEhlssLslhGaLWKTRD..cslsslsPREElRRhFhhlhWLsVYuhAlYWGASaFTEQDGoWHQTlIRDTDFTPSHIlEFYhSYPlYIIhGVuuFhYA+TRLPhFu.KGhSlsaLhhhsGPFMIhPNVGLNEWGHTFWFMEELFsAPLHWGFVhFGWhuLuVhGVslQllsRhpcLh.sh-hsts....c .................................h.s...hhhshhhh.hhhhhh.hYpthauht..tGhD.htPtapphWhshhhhph.l..h.h.hhhualhho.Rc.....cph.slsPp..El+Rhh..hhhalshYhhu.laauuSaFsE.QDuoWHQsllRDTsFTPSHll.FYhSaPhalhhGhushhYAhTRlPh.at...cuhshshhhhlhGPhMlhPNVGlNEWGHsFWFMEELFsAPLHWGFVhaGWsu.LuhhGlhhQhlschspLh.ch......h............ 0 12 19 23 +1197 PF03782 AMOP AMOP domain Bateman A anon [1] Domain This domain may have a role in cell adhesion. It is called the AMOP domain after Adhesion associated domain in MUC4 and Other Proteins. This domain is extracellular and contains a number of cysteines that probably form disulphide bridges [1]. 25.00 25.00 25.60 26.20 24.20 24.90 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.45 0.71 -4.21 15 277 2009-01-15 18:05:59 2003-04-07 12:59:11 12 32 81 0 184 274 0 146.50 32 15.41 CHANGED hspspC.cWlpsc.chL.sah....p-LPo...CPCohsQuhhDp.........uRFhs.hcsst..+pphshapPGAhaClRS..sspuSst.Gu.................QpCCYDssGpLh.....................sRG+shGsPshhps.hsPtL.paphDlhPahhC........Chas-p.uh.Ctha.hcR ......................................shspCppWhppc.....chL.p.ah.....p-Lss...C..PC....ohspshhDp..........uRhh..hssst...........ppphs.h...apsuAh.a.ClRS.....hpuosh..uu...................QpCCYcssGpLl.....................sRG+s.hGsPshhps......h....sPtL.Ha.hhDllPahhC...............shhs.....s..pp...Cp....c................................... 0 57 70 120 +1198 PF04739 AMPKBI 5'-AMP-activated protein kinase beta subunit, interation domain Kerrison ND anon DOMO:DM04946; Family This region is found in the beta subunit of the 5'-AMP-activated protein kinase complex, and its yeast homologues Sip1, Sip2 and Gal83, which are found in the SNF1 kinase complex [1]. This region is sufficient for interaction of this subunit with the kinase complex, but is not solely responsible for the interaction, and the interaction partner is not known [2]. The isoamylase N-terminal domain (Pfam:PF02922) is sometimes found in proteins belonging to this family. 21.50 21.50 22.20 21.50 19.70 18.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.39 0.72 -4.15 56 516 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 289 36 306 507 2 114.60 35 32.87 CHANGED ........tsP....tpas...splPsh..p.............................pph.pPPtLPPaLppsl........LNp.ss................................................................................ppc-.........sshL.....shPsHVlLNHLhspu..I+ss..VlslusTpRY+pKaVTpl......lYpPh ....................................................................................................ts...............................................................spcphpsPPhLPPaL.hpsl........LNpsss......................................................................................................................................................................hps-..............sulL.P.PNHVhLNHLastS...IKcu..VhslusTpRY+pKYVTslLYKP............................................ 0 85 165 252 +1199 PF02166 Androgen_recep Androgen receptor Mian N, Bateman A anon IPR001103 Family \N 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.48 0.70 -5.61 3 248 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 130 3 28 232 0 263.10 50 56.51 CHANGED GLGRVYPRPPSKTYRGAFQNLFQSVREVIQNPGPRHPEAASAAPPGAsL.......QQQQE.............TSPRQQQQQQpGEDGSPQAHpRGPTGYLALDEEQQPSQQQSALECHPESGCVPEPGAAsAASKGLPQQPPAPPDEDDSAAPSTLSLLGPTFPGLSSCSADLKDILSEAGTMQLLQQQQQE.................AVSEGSSSGRAREAoGAPTSSKDSYLGGTSTISDSAKELCKAVSVSMGLGVEALEHLSPGEQLRGDCMYAPLLGGPPAVRPTPCAPLAECKGSLLDDSAGKGTEETAEYSPFKGGYTKGLEGESLGCSGSSEAGSSGTLELPSTLSLYKSGALDEAAAYQSRDYYNFPLALAGPPPPPPPPHPHARIKLENPLDYGSAWAAAAAQCRYGDLASLHGGGAAGPGSGSPSAAASSSWHTLFTAEEGQLYGP ...........................................................................................................pEslQsPusppsps.s.shAPPuspL.................Qp.....tp....................................pp.....pp....p.............QQ........s-ssSsps..Ru.suYLsL-ccp..p..su.....................................................................................................................p....................................................s......t......s....s.RsREssuss.S...SKDsalG......us..oTISDoAKELCKAVSVShGLuhE..........u........E......t.t...........s..s..c.t.........pt-pha.s................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 2 3 7 +1200 PF03139 AnfG_VnfG Vanadium/alternative nitrogenase delta subunit Mifsud W anon Pfam-B_1227 (release 6.5) Family The nitrogenase complex EC:1.18.6.1 catalyses the conversion of molecular nitrogen to ammonia (nitrogen fixation) as follows: 8 reduced ferredoxin + 8 H(+) + N(2) + 16 ATP <=> 8 oxidised ferredoxin + 2 NH(3) + 16 ADP + 16 phosphate. The complex is hexameric, consisting of 2 alpha, 2 beta, and 2 delta subunits. This family represents the delta subunit of a group of nitrogenases that do not utilise molybdenum (Mo) as a cofactor, but instead use either vanadium (V nitrogenases), or iron (alternative nitrogenases). V nitrogenases are encoded by vnf operons, and alternative nitrogenases by anf operons. The delta subunits are VnfG and AnfG, respectively. 20.50 20.50 21.40 115.70 19.40 18.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.35 0.71 -4.28 20 108 2009-09-11 14:59:01 2003-04-07 12:59:11 10 2 80 0 25 83 0 111.80 49 82.15 CHANGED hcs+l-pLhDYIhKpCLWQFaSRuWDREcQNEGILsKTtplLsGEsscpsTP.tDRsYasDAlsLAcsaKp+aPWlsshsK-EI+pLhpuLKpRlDalTITGSLNpELTcppY ...ps+lDpLhDYI.cpCLWQFaSRoWDREcp.EGlLspsscLLsGE.s..huTP.pDRhaasDAlslAsDh+c+aPWhsphsK-EIptLhpGLKsRlshlTITuSLNcELscchY. 1 9 18 20 +1201 PF00212 ANP Atrial natriuretic peptide Finn RD anon Prosite Family \N 21.20 21.20 21.90 24.30 21.00 20.20 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.78 0.72 -3.87 39 340 2009-09-10 22:47:03 2003-04-07 12:59:11 13 3 149 7 90 346 0 32.00 49 25.16 CHANGED phhpstcps+..h....sGCFGt+lDRIGShSGLGC ................s......tts+.phhs....sGCFGh+lDRIGohSGLGC..... 0 3 11 36 +1202 PF03452 Anp1 Anp1 Finn RD anon Pfam-B_4441 (release 6.6) Family The members of this family (Anp1, Van1 and Mnn9) are membrane proteins required for proper Golgi function. These proteins co-localise within the cis Golgi, and that they are physically associated in two distinct complexes[1]. 28.80 28.80 28.80 29.00 28.50 28.60 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.77 0.70 -5.25 49 428 2012-10-03 05:28:31 2003-04-07 12:59:11 9 10 142 0 314 409 3 260.40 42 61.82 CHANGED phstsslpa..YDLsphpuos..cuhtpcE+VLIloPL...upa.....lshaacsLhpL.TYP..HcLI-LuFllscop..DsshptLtptlpclQ.................................................p.ts..tpppFtplpIlcpDFsphl..uQshp-RHuhpsQt.RRchMA+ARNhLlhssLpPtpSWVhWhDuDI...csPsolIpDLhpHs+DVlVPNVap.h.s..s.....s.pPYDhNSWh......ES-puLcL.AssLs-D-lI...VEGYA.EasTaRshhAahtD..spGsscpphpLDGVGGsulLsKAcVaRs..GuhFPuFs ................................slphaDLsphpuos..puhp....p.cE+lLlloPL+s.uupa........LshaFs...pLhpL...TYP..HcLI-LuFLVuDop..DsThshLpptlpclQ..................................................p...ts...pptFtplo.Il....c+.DFsth.......l......uQshp-RHu..htsQs.RR+hMA+ARN.aLL.ssLc.....P.tcSWVhWhDsDl...psPso.llpDL...hp.....Hs..KD....llVPN.....lap.h..s..s.....stpPYDhNSWh......ES-pulpL.ApsL.sc..Dsll...VE.G.Ys.Eas...T.a........Rsh.hAah..t...D...spussctEh-LDGVGGsulLsKAcVaRs..GshFPsFs........................... 0 61 156 266 +1203 PF03374 ANT Phage antirepressor protein KilAC domain Mifsud W anon Pfam-B_3485 (release 6.6) Family This domain was called the KilAC domain by Iyer and colleagues [2]. 24.80 24.80 24.80 24.90 24.60 24.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -9.94 0.71 -4.02 66 1631 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1146 0 193 1289 13 109.40 28 45.26 CHANGED Lc...pp.....ltttpPKspahDtlssus.ssltlsplAKhl.......slu..tpcLhpaLp-.pshlh+...sssphhshQct.hctGhhphKpsshtpss.Gp.cht...hss+hTsKGp....talhphlhppshht ...........................ppp..ltt.tPKstasDtlhpu.....p...sslhlsphAKhl...................slu.tpcLhpaL+c.....pshlh+.....ssp............p.......hPhp.ch...hspGhF.pl..K..ps..shs....p...ss...Gt.hp.hs...........hs.s+lTsKGp....talhphhhptt..h........................................................... 0 53 133 161 +1204 PF04715 Anth_synt_I_N Anthranilate synthase component I, N terminal region Kerrison ND anon DOMO:DM04829; Family Anthranilate synthase (EC:4.1.3.27) catalyses the first step in the biosynthesis of tryptophan. Component I catalyses the formation of anthranilate using ammonia and chorismate. The catalytic site lies in the adjacent region, described in the chorismate binding enzyme family (Pfam:PF00425). This region is involved in feedback inhibition by tryptophan [1]. This family also contains a region of Para-aminobenzoate synthase component I (EC 4.1.3.-). 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -11.08 0.71 -4.17 111 5751 2009-01-15 18:05:59 2003-04-07 12:59:11 8 30 3830 10 1512 4547 2202 145.90 22 28.89 CHANGED shhsPlshatplts...pt.......shlLESs.....t..ssp......uR.aShluhsP.........................ht....hpssththtt..........................tt.....p......................shph......Lcphhsphp.h.............t..P..FtGGhlGYhuYDh...hctlc.ph...........sshs.hP-.hthhhh..-phllaDHhppphhl ........................................................................................................sshthatp.Lpt.............tt.............hshLLESs...................t.........stp................uR.aS.llsh...ss......hhp.....................lp...........stspp.splpt............................................................tptph.ppts......................shph...............Lc.p.hh.p.p.hp.....................................t..s....F.sG.GhlGhhuYDh..................lpthE..pLs....p..........htsc.t..hP..D..hthhlh...-plllhDHhcpphh........................... 0 477 968 1295 +1205 PF03497 Anthrax_toxA Anthrax toxin LF subunit Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 28.20 27.20 20.80 20.00 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.92 0.71 -4.44 7 96 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 91 65 11 94 0 173.00 35 16.76 CHANGED t+sh.....s.tshctsGlssE..ascuhtplAcEpNshlhhRsVN.huToLIcpGh.uTKGhsl+uKSSDWGPp..AGaIPhDtphSK.......htsssttlp+hs.tsppul........tutuhsplsLplscpRlsELppsGshshstcsht....hshphssupphEFclppssss.atl.hhth.ss....lpVhGsshsht ..............ps.t.hctsGlssE..aupshpplApchNslIGlRsVs.lupoLIcpGh.soKGhplKuKSSsWGPp..AGFIshDQphSK.......hsssthplp+hNhpstKul........pttuhsplsLpIo+pRlsELhcs.s.lshhtcph..........hssptsss.pp...hEF...c..lp..p...p..sp...s.h..l.hhsppsp.....lpVhsss.p..s.................................................................. 0 4 8 11 +1207 PF02522 Antibiotic_NAT Aminoglycoside 3-N-acetyltransferase Bashton M, Bateman A anon Pfam-B_1432 (release 5.4) Family This family consists of bacterial aminoglycoside 3-N-acetyltransferases EC:2.3.1.81, these catalyse the reaction: Acetyl-Co + a 2-deoxystreptamine antibiotic <=> CoA + N3'-acetyl-2-deoxystreptamine antibiotic.\ The enzyme can use a range of antibiotics with 2-deoxystreptamine rings as acceptor for its acetyltransferase activity, this inactivates and confers resistance to gentamicin, kanamycin, tobramycin, neomycin and apramycin amongst others. 25.00 25.00 26.70 26.60 24.80 24.70 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.32 0.70 -5.11 52 757 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 561 30 138 557 210 200.70 28 82.97 CHANGED hVHuSlpslGhlsGGsp......sllpALh-slu.pGTllhPstoss.ssst............t..t.h+tphPsaDPsho.so.cshGhlsEhhRphPushRSsHPthShuAhGtpAptl.hsspshs.saGtpSPLu+lh..chsGplLhLGssh-ssThlHhAEthushstcphtphp.....slh.tsGpph.Wpphpchshss.........pt......Ftplupsh.pppsh..hppuplGsAp.spLhst+-hlchuhp.alppc .........................................hVHsslpphG...h.l..sus.......sllpulhcsls.pG.TllhPs.o.p.spst.............................saD.tho..o...shGhlsEhhRp.hs.s..shRSsHPhhShuAhGtpuc..l.h..c...th.s..........shGcpSPhtplh..c.hsupllhlG.s.s..h.p.ss.T.h.lHhuEtp..hs......hchhptht.......slh..ppGphh..hpph..pphs.hst.............-t.............h.plst.....h...pppt..............hppsplusut.hpl..h.ptpchlchshphhpt................................................................................ 0 48 95 117 +1208 PF03230 Antirestrict Antirestriction protein Bateman A anon Pfam-B_3190 (release 6.5) Family This family includes various protein that are involved in antirestriction. The ArdB protein Swiss:Q47057 efficiently inhibits restriction by members of the three known families of type I systems of E. coli [1]. 25.00 25.00 25.00 26.50 24.50 24.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.15 0.72 -4.00 27 1097 2009-09-10 21:28:14 2003-04-07 12:59:11 8 3 438 3 52 624 10 95.40 57 64.17 CHANGED Lss.cYsGGaWpFYpLusGGhaM.APss....ppchplhssh.NGapGplSA-AAGIlsoLashu+h...utcsp....sDthhcpYapLR-aA..tpHsEuusIapAID ......................LC--YsGGhWshYTLsN..G................G.AFM.AP-s....s-sahLFNuh.NGN.cAEhSsEAAGIsACLhsYS...H+.....As+TE..................saAMos....HYY+LRDYA..LpH.....P..ECuAIhRlID................... 0 2 13 38 +1209 PF03589 Antiterm Antitermination protein Griffiths-Jones SR anon PRODOM Domain \N 21.60 21.60 21.90 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -10.19 0.72 -3.90 21 1401 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 423 0 52 693 1 94.20 30 76.57 CHANGED ss..losuspsssucu.......sAlshtpoctp.Ghsla.....suttGhschssppAhctlhphs.slspps.hh+phcshhcshVhpshtp.uaA-hshut ...................p.s.lossspsspucu.......sAhshspoctt.Ghsla.....CuthGhSp.ssptAhptltphs.slsshs.hh+plps.hcshVhphhtp.AaADhphsA......................... 0 2 8 26 +1210 PF01786 AOX Alternative oxidase Bashton M, Bateman A anon Pfam-B_1154 (release 4.2) Family The alternative oxidase is used as a second terminal oxidase in the mitochondria, electrons are transfered directly from reduced ubiquinol to oxygen forming water [2]. This is not coupled to ATP synthesis and is not inhibited by cyanide, this pathway is a single step process [1]. In rice the transcript levels of the alternative oxidase are increased by low temperature [1]. 22.50 22.50 27.10 26.80 22.30 22.10 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.23 0.70 -4.92 60 637 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 343 0 287 637 966 195.30 41 67.21 CHANGED ssaspcphp.slphtH+pspshuD+lAhhhl+hlRhshDhhohh....................................................................................................hocc+ahsRhlhLEolAGVPGMVuGhlRHL+S......LRthcRDt...............................................................GWIcoLLEEA.NERMHLLTFlcls.cPuhahRhhlhsuQGVFaNhaFlsYLlSPRhsHRFVGYLEEEAVpTYT+hlc-l-s.G+.l..h...shsAPpIAlcYWphsc.p......................................................................................................soh+DllhhlRADEA+H..R.-VNHThus ......................................................................................................................t.H.p.stph.DphAhhhl.c....hlRh.h.Dh........................................................................................................h.h.p....p+a.h.pRhhhLETVAuVPGMVu......G...hlhHL+S......LRphc+..s.s.....................................................................G.W..I..+.sL.L-EA.NERMHLhTFh.c.lu....pPt......WapRhllhssQG..VFaNsa..FlhYLlSP+h.uHR.hVGYLEEEAlpoYTchlc-.l-p...Gp.l........shsAPtIAl.........pYWphsp.p......................................................................................................ssl+DllhslRADEAcH...R...clNHhhus....................... 0 104 194 247 +1211 PF01261 AP_endonuc_2 AP_endonulease2; Xylose isomerase-like TIM barrel Finn RD, Bateman A, Studholme DJ anon Prosite & Pfam-B_3980 (Release 7.5) Domain This TIM alpha/beta barrel structure is found in xylose isomerase (Swiss:P19148) and in endonuclease IV (Swiss:P12638, EC:3.1.21.2). This domain is also found in the N termini of bacterial myo-inositol catabolism proteins. These are involved in the myo-inositol catabolism pathway, and is required for growth on myo-inositol in Rhizobium leguminosarum bv. viciae [1]. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.09 0.70 -11.13 0.70 -5.05 311 17202 2012-10-03 05:58:16 2003-04-07 12:59:11 19 48 4117 343 4623 13647 4257 209.50 15 68.04 CHANGED lpts...tphG..asslElhht..tth.........................hpplpphhcctulp......................h..hth.......................................ttt........hpthpptlchu...ppl..............G..sph.......lshhsu.............................................................h.ttstp....pshpphhc..slpplschst..ptG....lp.lslEshstpth..h.......................pchhpllcplsptt...ltlthDssHhhht.................ttshhphlcphssp...............lttlHl..pDs..................................ttspch..GpG.pl ..................................................................................................................................................................................................t.stphG.hps.lc.h..h...h...........th...................................................................hp.ph.t.phh.p.pt....s..lp.............................................l.....s..h..s...h...h.t...hhh..................................................t..........t.t..hp.t.h....p.p..s..l...ch.u.......p.tL............................G...s..ph.............l..s..hh.su...........................................................................................................h.ht.t.s.p...................psh.pp.h.h..c....sl.p..t.h.s.c.h....h....t...ptG....................lp...lslE.s.....h.ss..t.h.h..tth..........................pph...h..p..l...l...c.....t......l....s.........p............t.......p........lt...l...t...h.....Ds....sHh.hht........................................tts...h..t...p....h...l......p.....p..h.t..st..................l.ttlHl.....pDs........................................t....t...h...GpG................................................................................................. 0 1580 3025 3904 +1212 PF01636 APH Phosphotransferase enzyme family Bashton M, Bateman A anon Pfam-B_840 (release 4.1) Family This family consists of bacterial antibiotic resistance proteins, which confer resistance to various aminoglycosides they include: aminoglycoside 3'-phosphotransferase or kanamycin kinase / neomycin-kanamycin phosphotransferase and streptomycin 3''-kinase or streptomycin 3''-phosphotransferase. The aminoglycoside phosphotransferases inactivate aminoglycoside antibiotics via phosphorylation [2]. This family also includes homoserine kinase. This family is related to fructosamine kinase Pfam:PF03881. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.76 0.70 -4.64 244 16611 2012-10-02 22:05:25 2003-04-07 12:59:11 18 143 4228 100 5858 21791 6494 227.30 12 63.11 CHANGED phphhs.sG.hsst.sahht........ssp...t........phl.......l.+h........................hpss......htph.t.....tph.p............hh..phLs.ppth....s..........sPps...l........s....t...tthtth......................hhthh..phlsGpthtp.................t.tthhp..thuphLs....................................plHph.....................t...........................ttshs..................................................tht..t..........hht......h.h......................pth.hph.............th........ht.phhp.........phhp...............................hhsthtpths...................hslh.HuD.hpssNllhs.............tsspl...ullDa....ppushus.hhDl......uh..h..h.......t.hs.ths...................sphhtth....hptht.............hshtphphh ...........................................................................................................................................................................................................................................................................................................t....................h...h.....................................t..............ph.h.......h.+h...............................................th..................t.h..t.....................................hh.......th....l.t...t.t.th.......................................................s..P.p.h....h...............................t..................t...hh...........................................................hhlh....p..h..l..t....G.p...htt..................................t......t.h...h.p........th...s...p..hl.t...........................................................................................................p.l.+p....h.....................................................................t............................................................................h.t...........................................................................................................................................................................tht........t........................h...............................................................................t.h....ph...............................th.................ht..th.t...................t...hhp......................................................................................h..h..p.t.h..t..p..t..h.....................................................ths.h.h...H...u..D..h.p............s.Nl.l.hs...........................tss..p....l....s...l....l..D....a.........p.t.s..s....h....u....s......h....h......D.l.................uh...hh...................................................................................................hh............................................................................................................................................................................................................................ 0 1538 3487 4810 +1213 PF02558 ApbA Ketopantoate reductase PanE/ApbA Bashton M, Bateman A anon COGs Family This is a family of 2-dehydropantoate 2-reductases also known as ketopantoate reductases, EC:1.1.1.169. The reaction catalysed by this enzyme is: (R)-pantoate + NADP(+) <=> 2-dehydropantoate + NADPH. AbpA catalyses the NADPH reduction of ketopantoic acid to pantoic acid in the alternative pyrimidine biosynthetic (APB) pathway [2]. ApbA and PanE are allelic [2]. ApbA, the ketopantoate reductase enzyme is required for the synthesis of thiamine via the APB biosynthetic pathway [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.78 0.71 -4.77 98 4828 2012-10-10 17:06:42 2003-04-07 12:59:11 11 38 2913 28 1377 9328 3628 147.80 22 45.49 CHANGED ItllGuGAlGuhhustL..tp.uGpc.....VphlsRsp.phpt.lpppGlplpssp........tphhh.s.hthsss.............htthDllllssKuhpsps.slpt.ltshlsss...s......hllhlQNG...lGpt.-tlpphh.....st..p.pllhGlsh.huutpps.......supl....pps...u..tuphhlGthss ....................................................................lsll.G.s.G.A.l.G.s.hh.u.s....tL.....tp....u....Gp-..........................Vsh......l..s..R..t....s....p...hps....l..p....p..p.....G..l.p.lptts..........tphh.h........h.thsst............................................hsthD..ll.l.l.......s.......s.......K....u........h..........p........l.....s.....s.....s.lpp...l...t..s..h..l..sss...o.................hl...l..h.....l....pN..G.....................h..up...........-t.lt..phh........................................st....p..pllhGs.sh...hs.uthps.......................ss.tl....hhh....u...tsth.ls....s....................................................................................................................................... 0 390 813 1149 +1214 PF03256 APC10 Anaphase-promoting complex, subunit 10 (APC10) Mifsud W anon Pfam-B_4273 (release 6.5) Family \N 19.90 19.90 20.10 20.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.18 0.71 -4.72 7 646 2012-10-03 19:46:52 2003-04-07 12:59:11 11 56 294 3 429 618 4 165.50 26 13.97 CHANGED .ptsssssu.t..pphphtGh........hh-luppAhWolSSsK.G.GVc.hLRD-sh-TaWQSDGu.PHhlpIpFpK+sslphlslYhsaphDESYoPSplplcuGsshpDhp.lphh-lsp..PsGWVplslp...................Dspt..p.l+shhlQltlhsNHpsG+DoHhRtI+lYuP..p..t.shp.hht.....oohth.phsol .......................................tt..tt............................................h...t..So...p......t.slt.....pL........pDssscoYWQS...DG...s................p...............s...H.hls.lphp.+.t..s...h.l....pplt..lhls.ptDcSY....hPs+.lslhuGsshs.s.....L.p................E........l..ps.....lpl.p...............ssGah.pl.l.................................................................................p.s...s....p.hph.hlQltlhps..pps....GhDT+lRtlclhu....................................hh.................................................. 0 128 193 305 +1215 PF04110 APG12 Ubiquitin-like autophagy protein Apg12 Wood V, Finn RD, Bateman A anon Pfam-B_9471 (release 7.3); Family In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells [1]. The Apg12 system is one of the ubiquitin-like protein conjugation systems conserved in eukaryotes. It was first discovered in yeast during systematic analyses of the apg mutants defective in autophagy. Covalent attachment of Apg12-Apg5 is essential for autophagy [2,3,4]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.89 0.72 -3.79 6 282 2012-10-03 10:59:06 2003-04-07 12:59:11 8 8 246 2 197 973 70 83.60 40 53.46 CHANGED KIsl+L+AlGssPlLKpppasVssocohutlIpFL+KhL+l.hs-pLFlYVNsoFAPSPDppltsLYcsFu...oDu+Ll.lpYCto.AaG ...........................Klslhh..+slG.s.sPll..K....p.p..haplsso.pphp..sllcFL.+.KtL.....+.........h...........s.......-........p.............lFlYl...Np.......s.F.A.PuP.Dp.tlusLa.c.s.Ft..........ssscLl.lpYsto.AaG.................. 0 65 107 165 +1216 PF04108 APG17 Autophagy protein Apg17 Wood V, Finn RD anon Pfam-B_71163 (release 7.3); Family Apg17 is required for activating Apg1 protein kinases [1]. 25.00 25.00 25.00 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.35 0.70 -5.67 19 172 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 138 \N 130 174 0 344.20 21 54.31 CHANGED hpahhtA+poLspspplCp.Apphlsss+ptLppplh.pt+.hpFLhpuLppQhchLhp.....shsphhhtpptspp.hsslhppLpsups+LcpplphLcpThVph.h..............sp-s+sLtDFlspcsl-.Lptslpphlccssth..ptpl-s.htta-s.lpplppth....tpph+phpp....pspht.t.pst......s.hsslsptlpuLE...pEhAslLpSLopHaDhChpuhchh.s.............................................pshstsEhcEhlpVLpsDAtEl.-VltELpsthsphcpthcs....lpsphsphpphastspslhpplpphtp.phstYlthhpshsphhpcppt.........plpptlsplppLspaY-pFlpuYpu.LlhElcRRpts..cp+hccllcpsppcLppl.-pDhcpRppFhhc.GDYLPp-l.WPGhhcpssh ...............................................................h.tah.tuppsLtphp.lhp.Ap..h.thpt.hpp......p.htalhttltpQhthl.t.....hhtth....t.spt.h..thhpphp.h.tplpphht.Lcth.s...h..........................................tpt.ppLhDFlsppslp.l....p..lpph..h..ptth-t....thpp.hptlppth..............h.........................s..t.h.p..tphp...pthsphLpuLspHaD.shhh.c...........................................................t.hs.tpht.p.hpVl.pDstEh.slhhclpphhtphpt.htp....l..p.p....hsthpphhsshpphh.pplsphtp..p....h....spalt....thpshpphh.pptpp..................................plpsthpphp....pLpphYpsFh..p..uYss.Lll..EltRR+ps..ppphcslhcphpcpLsplh-....p-hcpRcpFhtchG-aLPp-l..hssh......s..................... 0 30 65 105 +1217 PF04111 APG6 Autophagy protein Apg6 Wood V, Finn RD anon Pfam-B_11747 (release 7.3); Family In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells [1]. Apg6/Vps30p has two distinct functions in the autophagic process, either associated with the membrane or in a retrieval step of the carboxypeptidase Y sorting pathway [2]. 30.00 30.00 30.00 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.09 0.70 -5.38 99 445 2009-09-10 16:48:33 2003-04-07 12:59:11 7 12 305 4 274 429 5 294.70 35 65.03 CHANGED PlCp-Cs-hLlcpLcpplctspcEpcsYppaL......cplp.........................................tppstpp....h..pcclpp..lctEEpphhpELpclEpccpplsp-lpp.......hcpEtppLc..ppEc.paacchsphphphhchpc-hpSlpsphphspppL-+Lc+oNlaNssFcItH.....-.G.tFGT..INGhRLG+......LssssVsWsEINAAWGQslLLLtslAc+lshp...Fp.p.Y+LhP..hGSpSpIpchs..p.......................................phptLsL......auo.G......sh...phFh.....cp+FDpAMlAFL-slpQh....tppl...p..............ptss......t.....................h.........p....LP.Y..cI..p..cc+Itch.......SI+l..thspc-.pWT+AhKahLTNhKWhLAasSsp...tp .................................................PlC.-Csc.hLl-thcpplp.sppEppsYtphLcpLp......................................................p.t.p.tpp...pthpp......clpp..lpt-EpplhpcLpplE.ppcttlspp.ltp..........lct.......ctpp.Lc..ppEt.pah+-hsphphp..hhphpc-.......hculpsphchsptpL-+Lc+TNVaNss...F....p..I.H................s.G..tFGT..INshRLGR......Ls.sss..V.-WsEINAAWGQssLLLtslAp.+..hs.hp...Fp.p.Y+LhP.hGs.aShlpphp....................................................ptpplsL..ass.s...sh....phFh.....pp+FD.pAMsuFL-ClpQhtc.l...p.............ptst.......................................thpLP..Y.ch....c..psKItsh...........................SI+h..thspp-..pWT+AhKahLoslKahLsasss....p........................ 0 102 152 217 +1218 PF04109 APG9 Autophagy protein Apg9 Wood V, Finn RD anon Pfam-B_12479 (release 7.3); Family In yeast, 15 Apg proteins coordinate the formation of autophagosomes. Autophagy is a bulk degradation process induced by starvation in eukaryotic cells [2]. Apg9 plays a direct role in the formation of the cytoplasm to vacuole targeting and autophagic vesicles, possibly serving as a marker for a specialised compartment essential for these vesicle-mediated alternative targeting pathways [1]. 21.70 21.70 25.70 23.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.15 0.70 -5.55 36 425 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 279 0 277 410 20 322.60 36 42.59 CHANGED Ds-ltTlsWppVlp+lht.Lcc..tsshosp..............t.ps.....Kp+LsApDIANRIMR+ENYhIALh.NKslLslsl.......................slP.......hlpsp...........hLT+oL-WNlphslh...salFs.ppGpl+ppal+sppRptLupcL++RF.........hhsGllNllLuPFlllYhlLhhFF+Yap-a+psPusl.GsRpaoPhAcW+hREFNELsHhFp+RLshShhhAscYlsQFP..pshhsl.lh+hluFluGulsAlLllholh...Ds-sFhsF-lo.s+olLFYlulhGslhuls+uhlsc.......................-shV.aDPEtsLcpllpaTHYhPscW...cs+hHopcV+pEFspLaph+lhllLcElhSlllTPFlLhFsLspsu.spIlDFFR-aoVcVDGlGaVCpFAhF-hp.cssts ......................................................................t-lpshsWt.cV.p+lht.lpc.pp..thsht.......................................ppcL......sthDIhpRlhRh............cNYh..lAhh..NK........slLshph..........................................lP.............hht.p.h...........................hho+sLcaNlphhlh....t.hFp..pphplp.tpahp.......stp.......c......L.......uptLpp+h.........hhhulhNll.lsPhlll...a.llahFapYhp.h+..p.pPusl.GsRpao.huchhhRcFNE.L.H.hppRLshua...........AscYlspFs..pshhsh.lA+.lsFhuGulhulL..lhh...slh.......Dpclhh......scplLhhlslhGslhsls.Ruhls-....................................................................-phV......asPEth.hptlltahHYhPpcW...puphHs.p.....pl+pEFs.pLaph.....+hl...hlLcElhS.llTPhlLhF.l....pu..cIlDFFRpFTVcVsGlG.lCSFA.hDh.cpts...p...................................... 0 94 148 221 +1219 PF04655 APH_6_hur Aminoglycoside/hydroxyurea antibiotic resistance kinase Waterfield DI, Finn RD anon Pfam-B_4369 (release 7.5) Family The aminoglycoside phosphotransferases achieve inactivation of their antibiotic substrates by phosphorylation utilising ATP. Likewise hydroxyurea is inactivated by phosphorylation of the hydroxy group in the hydroxylamine moiety [1,2,3]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.58 0.70 -5.23 9 681 2012-10-02 22:05:25 2003-04-07 12:59:11 9 5 456 0 126 779 46 215.90 33 83.38 CHANGED RWcLptcGtshsscoShllPVh...psDGssAhLKlth..tc..pEptGthl.LsaWsGcGAVRlLsp-..sushLLERhsGsRsLspls.cttDcpAstllAthhsRLauspstPhP..LpPLp-hhsuLhpts.spt.st......cttL.tssAusstpLlusPp-.RsLHGDLHasNVLsusc.....csWLAIDPcsLhG-sGFDhAshhsss.tc......shcstclc+ph-llstslslD.tRlhsWslAhss.SusWthEDG ............................................................................................................................Wtl.........s.........s.suhlh.V.....h..s..G.t...............A.hlKh..........pE.h.....h..Lh.....h..h.s...G.p...G..A.s.c.lls.........tc.....................p.......s..hhLLEhh..utR....L.u......p.....l.....s..................s...D...p.....A...s...pl..h....A........p...l..hu...+........La....u..s..s..............s.h.....P..s........s...L....h...s.......lp...-...h.....h...s.s......hh..pps.....tp.s.st...................sppphh..hpsAt.h..s..c...p...l...h.u......s...s..u......-.........p......h...L.....HGDLHa-N..ll...t.......us........................cs.W.....L....s....ID....P.p.s.L.s.G-.sGF-h....ssh..hh.s....h-c......shs.s..p...p..l.tph.h-.hhs.c.slsl..D.cR.hhsashuhss.sAhW.ht.................................................................................. 0 40 79 103 +1220 PF00807 Apidaecin Apidaecin Bateman A anon Pfam-B_1489 (release 2.1) Family These antibacterial peptides are found in bees. These heat-stable, non-helical peptides are active against a wide range of plant-associated bacteria and some human pathogens [1]. The Pfam alignment includes the propeptide and apidaecin sequence. 25.00 25.00 31.70 30.20 23.50 17.60 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.47 0.72 -4.26 2 104 2009-09-10 22:49:59 2003-04-07 12:59:11 12 9 4 1 40 92 0 27.70 92 82.49 CHANGED hctcPEAEPGNNRPlYIPQPRPPHPRl. .REAEPEAEPGNNRPVYIPQPRPPHPRLR. 0 40 40 40 +1221 PF04711 ApoA-II Apolipoprotein A-II (ApoA-II) Kerrison ND anon DOMO:DM04862; Family Apolipoprotein A-II (ApoA-II) is the second major apolipoprotein of high density lipoprotein in human plasma.\ Mature ApoA-II is present as a dimer of two 77-amino acid chains joined by a disulphide bridge [1]. ApoA-II regulates many steps in HDL metabolism, and its role in coronary heart disease is unclear [1]. In bovine serum, the ApoA-II homologue is present in almost free form.\ Bovine ApoA-II shows antimicrobial activity against Escherichia coli and yeasts in phosphate buffered saline (PBS) [2]. 24.30 24.30 24.70 24.70 23.80 24.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.41 0.72 -4.09 5 38 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 26 44 15 41 0 73.20 61 73.18 CHANGED QAEEoslQSLsSQYFQTVTDYGKDLMEKAKuSELQuQAKAYFEKTQEQLTPLVKKAGTDLlNFLSpFl-L+cQPAT .............QA-EsslpSLhSQYFQTlTDYGKDLhE...KVKoPELQAQAKAYFEKo+EQLTPLVKKAGT-LlNFLS.Fh-Lc.p.pPA........ 0 1 1 1 +1222 PF04691 ApoC-I Apolipoprotein C-I (ApoC-1) Kerrison ND anon DOMO:DM04729; Family Apolipoprotein C-I (ApoC-1) is a water-soluble protein component of plasma lipoprotein. It solubalises lipids and regulates lipid metabolism. ApoC-1 transfers among HDL (high density lipoprotein), VLDL (very low-density lipoprotein) and chylomicrons. ApoC-1 activates lecithin:choline acetyltransferase (LCAT), inhibits cholesteryl ester transfer protein, can inhibit hepatic lipase and phospholipase 2 and can stimulate cell growth. ApoC-1 delays the clearance of beta-VLDL by inhibiting its uptake via the LDL receptor-related pathway [1]. ApoC-1 has been implicated in hypertriglyceridemia [2], and Alzheimer's disease [3]. 27.60 27.60 28.40 30.60 26.80 27.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.86 0.72 -4.28 6 41 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 29 5 13 45 0 55.00 47 71.03 CHANGED ss-hSSshpplsDKLKEFGNTLEDKA+sAI-cIKpS-lssKTRsWFoEsFpKlK-KlKss. ......tP-lussh....D+LKEFGsTLEDKA+psIp+I+QSEhssKTRsWFoEsapKVK-Klp.s..... 0 2 3 4 +1223 PF01333 Apocytochr_F_C Apocytochrome_F; Apocytochrome F, C-terminal Finn RD, Bateman A anon Pfam-B_1294 (release 3.0) Domain This is a sub-family of cytochrome C. See Pfam:PF00034. 28.50 28.50 28.50 28.70 28.40 28.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.48 0.71 -4.26 49 697 2012-10-02 20:27:15 2003-04-07 12:59:11 14 5 642 30 66 553 134 114.80 74 37.15 CHANGED NNslasAsssGpIopIpt..p-..............c....GGaploI.possGppls-plPsGP-Ll..VscGpsVpADQsLTsNPNVGGFGQs-sEIVLQsPsRlpGLlsFhhsVhLAQlhLVLKKKQFEKVQhA.EMNF ................NNTVYNAousGhVoKIlR..KE....................K....GGYEITI..scs.SD.GcQVVDIIPPGPELL..VSEGEsIKlDQPLTs...NPNVGGFGQGD.AEIVLQDP.LRVQGLLhFhASVILAQIFLV...LKKKQFEKVQLuEMNF...... 0 16 40 56 +1224 PF01442 Apolipoprotein Apolipoprotein A1/A4/E domain Bateman A anon Prodom_1521 (release 99.1) Domain These proteins contain several 22 residue repeats which form a pair of alpha helices. This family includes: Swiss:P02647 Apolipoprotein A-I. Swiss:P06727 Apolipoprotein A-IV. Swiss:P02649 Apolipoprotein E. 90.00 5.00 90.00 5.90 89.90 -999999.99 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.09 0.71 -11.79 0.71 -4.81 46 1816 2009-01-15 18:05:59 2003-04-07 12:59:11 13 29 249 39 715 1765 2 196.40 12 86.35 CHANGED thlp-shcplssYtpcLpppLsPhs...p-htspLsc-sptl+pcl......ppDlE-l+s+lpPahsElpphlppph-chRp+lsPhspcL+c......phppchccLpppLsshsp-..................h+sph......cpsl-sl+spLtPhs-ph+p+ls..................p+Lcpl+ppssspsp-hpsplpppl...ppl+c+lpstsc-l+ppL...............................................................................pPhsEshcpplhphhE ...................................................................................................................................................................................................................................................................................................................t.......................................................h.......ttl....................ttt...h.tt.htt.......t.......l...t....thh....p....p...h...p...p......t.l....t.p...p.......h.cp....l..pp....p....l...............s...s.......hhpphp.p......................pl....s........pp...s......ppl.....pp.p....ls.......sh...s...pp........................................................................................hpppl..................pp...ps....p...ph...p...splssh.......s....cpl...p......pphs............................................................................................pplpphp...p...p.l...s..p...ts.....p....p....ht...s.p...l..p...p...ph...........pphp...ppl....st.t....hp.......slpppl....................................................................................................................................................................................................................................................................t..t.th...........................tt...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 146 285 400 +1225 PF01583 APS_kinase Adenylylsulphate kinase Bateman A anon Pfam-B_578 (release 4.1) Domain Enzyme that catalyses the phosphorylation of adenylylsulphate to 3'-phosphoadenylylsulfate. This domain contains an ATP binding P-loop motif. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.78 0.71 -4.57 20 3100 2012-10-05 12:31:08 2003-04-07 12:59:11 15 31 2402 54 1024 3172 2394 154.70 50 43.89 CHANGED +GsslWhTGLSGuGKSTIAtALEcpLhppG.hpsYhLDGDNlR+GLN+-LGFSccDRpENIRRluEVAKLhu-uGllslsuFISPacp-RctARplhpp.........cFlEVFVDsPL-VCEpRDPKGLYKKARsGcIKsFTGIDuPYEsPpsPElhlcssppsl .......................usllW.h.TGLSGSG..K..S..TlAsALEc.......tL....h....p................p.G.....hp.....s....Yl....L.D....G....D......N..l....R.....+.....G.....L....s........p.................D..........L.......G.....F.......S..c.....t..D.R........p..EN........I....R..R.....l..u....E..V....A.....+.....L.h.....s....-.u....G.l..l.s.l..su...F..IS.P.a...+.t..-R..ph..sRchlsps.............pF..l.E.......V.a..Vcs.P.Lp..lC..E....p..R..D........P........K.................G.....LY.+KA.....R.....u....G...E........I+.............s........FT......GI....D....u..s............YEs..P..p..sP..-..lplcsp...ph........................................................................................................ 0 317 597 837 +1227 PF03440 APT Aerolysin/Pertussis toxin (APT) domain Griffiths-Jones SR anon SCOP Domain This family represents the N-terminal domain of aerolysin and pertussis toxin and has a type-C lectin like fold. 25.00 25.00 36.70 41.30 24.80 24.80 hmmbuild --amino -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.83 0.72 -4.28 5 77 2012-10-02 16:37:33 2003-04-07 12:59:11 9 3 33 26 10 90 2 81.00 49 19.25 CHANGED sEPVYPDQl+lsuLG..pGVCuscYRPLTR-EApS...l+sNLVuhMGQWQIoGLADtWVILGPGYNGEIKsGoAGuTWCYPToPsou ..................lYPDQlhhhsLG..ptlCsstYRPlTRpEApu...lKusllshMGp..WQIoGLAssWVIMGPGYNGEIK...G.o.A.u.sTWCYPssPs..s.... 0 3 5 5 +1228 PF02610 Arabinose_Isome L-arabinose isomerase Bashton M, Bateman A anon COG2160 Family This is a family of L-arabinose isomerases, AraA, EC:5.3.1.4. These enzymes catalyse the reaction: L-arabinose <=> L-ribulose. This reaction is the first step in the pathway of L-arabinose utilisation as a carbon source after entering the cell L-arabinose is converted into L-ribulose by the L-arabinose isomerases enzyme [1]. 27.00 27.00 27.00 27.50 26.80 26.90 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.15 0.70 -5.49 38 1076 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 1019 9 200 697 49 349.80 58 71.66 CHANGED M.hph.cphEhWFlTGSQHLYGpEsLcpVupcoppIssuLNsoupLP.hclVaKslsoTs-pIpplhp-ANss-sCsGlIsWMHTFSPAKMWIpGLptLp..KPLLHLHTQaNp-IPWsoIDMDFMNLNQSAHGDREFGFIsuRhphpRKVVsGHWpDtcVpccIusWhRsAsuas-upplKVARFGD.NMRpVAVTEGDKVEAQIpFGaoVssaGlGDLVphlssVo-p-lcsLlpEYpspYslssshpp.ssppcpulpptA+lELGl+pFLc-GGasAFTTsFEDLtGh+QLPGLAsQRLMA-.GYGFGuEGDWKTAALlRhhKlMupGh...sGTSFMEDYTYchssGschlLGuHMLEVCPSIA ..............................................................................................................................MhhhcsYEVWFVlGSQHLYGsEs..L+QVs.pHAcclVsu.L.NspucL..P.sKlVhKPlsTosDEIosls+-ANYD-cCAGllsWhHTFSPAKMWIsGLshLp..KPLLphHTQFNsslPWDoI....D.......M...D......FMN....L.NQoAHG....s....R....EFGFIsARMRpp..+.tVVsG.H.W.p..D.cp.s.pc+IusWMRtA...luhp-o+pLK...VsRFGD...NMR.cVAVT-GDKVuAQI+FGaSVNsauVGDLV.p.lVsu..lSDuD.l.sALl-E.Ycs.pYshssusp.....p.G.-.++psVh-AARIELGh+RFLEpGGa.cAFT.TsF......E.....D....L.........a..........G.....L.....K..Q...L..P.G.....L..A...VQ.R.LMpp.GYGF..uGEGDWKTAALlRlMKVMusGL....p....GGTSFMEDYTYcFc.pG..N-hlLGSHMLEVCPSIA................................................................. 0 70 136 171 +1229 PF02311 AraC_binding AraC-like ligand binding domain Mian N, Bateman A anon Pfam-B_12588 (release 5.2) Domain This family represents the arabinose-binding and dimerisation domain of the bacterial gene regulatory protein AraC. The domain is found in conjunction with the helix-turn-helix (HTH) DNA-binding motif Pfam:PF00165. This domain is distantly related to the Cupin domain Pfam:PF00190. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.58 0.71 -4.48 58 10833 2012-10-10 13:59:34 2003-04-07 12:59:11 14 20 2515 10 2189 13862 1372 132.60 12 46.69 CHANGED hsshtthpsph.tpta.s..HhHs..tatlthltpGssph.phsspp.....aplssGclhllsPsp.Hphtsss.........pt..saphphlhhpsshltphhtphthht...................hhpsspltphlpplhptl............ppttsshhtpshlhpll .....................................................ht..................h........HhH.s....th.p..l.h.h..s..h....pGs.s...p..h..t.h...s...s..pt......................ah..l.psGslh.h..l...s..s..s..p..h...H...p....h.t..sss............................................sh.p..h...h..h..l....h.h......p.......s...t..h.......h..............t...h...h...t...t...................................................................h.............h....hh........................................................................................................................................................................................... 0 712 1314 1755 +1230 PF03869 Arc Arc-like DNA binding domain Finn RD anon DOMO:DM07094; Family Arc repressor act by he cooperative binding of two Arc repressor dimers to a 21-base-pair operator site. Each Arc dimer uses an antiparallel beta-sheet to recognise bases in the major groove [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.28 0.72 -4.34 3 753 2012-10-02 18:44:02 2003-04-07 12:59:11 9 3 491 41 123 550 31 46.50 30 53.16 CHANGED uRcsP+FNlRhPcEVREpl+cVAEuNGRSMNSEIlQRVp-SLpKEGslsu ..............ph....lR.lPp..pl+-plcphAcpssRShNuE...l....lthLcpult.p......t................... 0 21 58 85 +1231 PF04659 Arch_fla_DE Archaeal flagella protein Waterfield DI, Finn RD anon Pfam-B_4437 (release 7.5) Family Family of archaeal flaD and flaE proteins. Conserved region found at N-terminus of flaE but towards the C-terminus of flaD [1]. 20.80 20.80 20.80 21.50 20.50 20.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.47 0.72 -4.20 22 97 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 55 0 68 99 1 97.30 34 28.55 CHANGED .cspcps+LcslP.--shu.hlshcWLEFLhp+sGhsshs-sL-YYhslGWlS-cshscLhcas+Ghph.p-p.....pssscLohsDHllSLLaIE+LsGcp .........tppshLpplP.c-shuphlshcWLEFLlc+sGhpsh.csL-YYhslGWIS-cVhspLhcas+Ghch.t.c...............psstcLs....hpDHlhSLlaIp+LsG.................... 0 8 36 56 +1232 PF01917 Arch_flagellin Archaebacterial flagellin Enright A, Ouzounis C, Bateman A anon Enright A & COG3354 & COG3353 Family Members of this family are the proteins that form the flagella in archaebacteria. 29.30 29.30 29.40 29.40 29.10 29.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.30 0.71 -4.68 114 362 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 94 0 241 376 91 185.90 22 88.87 CHANGED GtsG.luslIlFIAhVlVAAlsuuVllsoushlpppupssucpsspplsoslplhssss...........ssssssshs...........lslhlp.NsGus.slclspspls..lh.ss............................................................................................................................................................................................................................................................tlhshpssstshsss....................pssls.G-hstls..ls...............................................hssssphph.plhschGu.ssslph ............................u.sG.luolIlFIAhVlVAAlsA.uVllsoushlpppupssGcpusppluoslplhsssu...............................ssssss..s..lsp......................lslhl..p..ssG..us..sl...-lsps.plt....lssss..t.....ht.h..st..............................................................................................................................................................................................................................................................................................................t..thshhsl.pstssshtss.................shlp...u-...hhtlh.lsht....................................................................................................h.s.tp..thph.plhsp.Gssh.h.................................................................................................................... 0 43 130 195 +1233 PF01637 Arch_ATPase Archaeal_ATPase; Archaeal ATPase Bateman A anon Pfam-B_1507 (release 4.1) Family This family contain a conserved P-loop motif that is involved in binding ATP. This family is almost exclusively found in archaebacteria and particularly in Methanococcus jannaschii that encodes sixteen members of this family. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.46 0.70 -4.66 56 1765 2012-10-05 12:31:08 2003-04-07 12:59:11 13 92 762 3 830 5495 1741 203.60 17 44.02 CHANGED FhDREcELcclcchhccs.t.lhllYGPcssGKTuLL+phlpph..thsh....tslYhpshcphhttthcth........................tcpltcth...........................tpsl.psth...........phs.hsls.hhchlpccscc....lsllhDEl.phh..t.p................tstthlptLhphh-h..stphchhhl..lsuSS....-Glhhc..........lhGRppa.hplcsh.....hphhccsFcpl.....t....sc.ch.-clachsGGpPthLtpl .......................................................................................................hhsRppE...l....p.........l.........p....h.....h.......p........p........t.........................p....h.....h...l...l...h.G..RchGKTs....L.l.....p..p..h...h....p...ph....................................hsla....h.........s...h......p....p....t..t....t....p.....p.....h.p..p..h............................................tp..h....p..hh...............................................................................................................t......h....t.........h..........................t.......h...t.s..h..p..p...h.h...p..h...l....t.p....p..tpt................hll....h...l.....D.Eh...p.lh........p.......................................................................t..h.......p...h.ltp....hh....pp.................p......p.....p......h...h...hl........h...s..G...S..t.........s..hh.p............h.t.....p.t....s......h.h...s......p..h....p....h...h.l.p...h....................h.phh.........t...................................t....h...........hht...hhsGhs......h............................................................................................................................................................................................................................................... 0 292 489 679 +1234 PF00798 Arena_glycoprot Arenavirus glycoprotein Bateman A anon Pfam-B_1047 (release 2.1) Family \N 25.00 25.00 28.70 28.60 18.10 18.10 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.84 0.70 -6.09 20 431 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 50 30 0 349 0 309.30 46 98.66 CHANGED MGQlloFFQEIPpllpEAlNIALIsVSLlAIlKGllNlaKsGLhQLlhFLlLAGRSCS...h.......pIGc+pphpolplshoplh....tphPtsCplNNoHaYl+sspsop.Gl-lolTssolls.......ph..tthsNlosChcssptt.....asLpWllsslHash.pssphl.stspspssuthpIQhNLoc....tpcstpaspplhsultclFGshpts.........................C......stsshpaLI.IQNoTWp...........scCphs...Hhsol+lLhpsstpphl.oR+LhuFFoWoLSDSsGNDhPGGYCLE+WMLluucLKCFGNTAlAKCNhsHDSEFCDMLRLFDFNKNAIcoLpspocpplNLls+oINuLISDsLLMKN+L+ELMsIPYCNYTKFWYlNHTtoGcHSLP+CWLVpNGSYLNEocFRN-WlhESDpLIoEMLsKEYp-RQG+TPLuLVDlCFWSTlFYosolFLHLlGIPTHRHIlG-uCPKPHRLsppGlCuCGhYpp.sK ..MGQlloFFQElPphlpEshNIsLlAlSllAllKGlhNlhpsGlhtLlsFLhLsGRSCo.......h.......hhtthhEhQolphsMspL......shP..CphNsSHhYlphGpp....hplohos.oll......................pp.sslosshppp........aslh.hhsshthsh...s.h..shspshstG.h.lQhNloh....s..shp.stphtsthhphahthh.s...........................C.........s.sh.hlh.hpspsW.spC.hs....hsh.thl.tptt.....tRplhuhFoWsLoDstG.p................................................................................................................................................................................................................................. 0 0 0 0 +1235 PF00843 Arena_nucleocap Arenavirus nucleocapsid protein Bateman A anon Pfam-B_1333 (release 2.1) Family \N 22.70 22.70 22.80 33.30 21.70 22.60 hmmbuild -o /dev/null HMM SEED 534 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.76 0.70 -6.62 17 468 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 52 24 0 392 0 313.20 54 96.85 CHANGED ScpVPSFRWTQSLRRGLSsaTpsVKuDVl+DA+ullsuLDFspVupVQRlMRK-KRsDsDLs+LRDLNKEVDsLMpM+SsQ+sslLKVGsLo+DELM-LAoDL-KLKpKVhRsEt.ssssGVYhGNLTsoQLspRuclLchlGh....pt.pssssGVVRlWDVK.....Dso.lLsNQFGSMPALTIACMoc..QGuEshNDVVQuLT.sLGLlYTVKYPNLsDL-KLoppHssLplITp-cSuINISGYNhSLSAAVKAGAChlDGGNM.LETI+VpPssFoolIKslLpVKp+EuMFVu-sPGpRNPYENLLYKlCLSG-GWPYIuSRSQIpGRAWDNToVDLsscs.......ssspsPh+sGus.pLssLo.sQEt.l+cuhtpLDPssTTWlDIEGsssDPVElAlYQPsoGpYIHCYRcPpDtKuFKspSKaSHGlLlKDLtsAQP.GLlSslIctLPpsMVlTsQGSDDI+KLh-hHGR+DlKllDVchou-pARhFE-tVW-+FshLCcpHsGlVlpKKKKGss.s.o....pPHCALLDCIMFpuslsGt ......................................................................................................................................................................................................................................................................................................................................YIuSRopIhGRuW-NTsVDLss.c.s.......sssptP......pt.su.tshp........usLo.tQphhlK-.uhtpLDPssshWlDIEG.PsDPVElAlaQPsuupYIHhaRcPpD.KtFKpsS+aSHGI.lpDl.sApP.GLhShVIthLPpsMVlTsQGSDDIRKLhD.pGR+DlKLlDVcLop-puRpaEptVW-+atcLC+hHsGlVlpKKK+ttt....hp................................... 0 0 0 0 +1236 PF00025 Arf arf; ADP-ribosylation factor family Sonnhammer ELL anon Swissprot Domain Pfam combines a number of different Prosite families together 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.12 0.71 -5.03 20 5460 2012-10-05 12:31:08 2003-04-07 12:59:11 16 68 556 115 3415 26010 2506 161.80 36 79.30 CHANGED hsslhupLhu....sKEhRILhLGLDsAGKTTILa+Lphs-lso...shPTlGFNlEolpa.....+Nl+FslWDlGGQcplRPhWRsYassTsulIaVVDSuD+-..RlsEu+pELpslLsE-ELp...sAslLlhANKQDLPsAhSssElpctLuLpp.......l+.....sRsWpl.sssAlpG-GLhEGh-WLussl ...........................................................................................h.......h.h......pc.c..h..+....l..l.h......l............G.LD..s.......A....G...K.......T.........T.....l.....L.......h.......p........L.....p.......h..........s........c.......h....s..p...................h.h....P.............T........l................G..........F.....N.....l......c......p.....l....p..h............................p...s......l........p.........h.......p...l..........W..............D....l.......G............G...........Q.........p.........p..........l............R.............s.........h..........W.............+...........p........Y........a.........p..........s.........s............p...........u...........l....l..........a..........V..........V................D..............S..........s..........D..........c.......c...................R......l............t........-............u.......+.......p........E.....L.....p.........p.......h.........L.............p...........c........c.........c.....L...p............................s..s.....s...l....L.....l....h..A..N.....K............Q.............D..l....s.....s.........A.............h.....s.....s......s......E........l.....p....c....t......L..s..L..p..p.................................lp...............p.R...t...h....h...l............s..s...s..A........h........p......G......p........G......l..........-.uh.cWL.p..h............................................................................................................. 0 1187 1801 2657 +1237 PF01316 Arg_repressor Arginine repressor, DNA binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.06 0.72 -4.42 10 4229 2012-10-04 14:01:11 2003-04-07 12:59:11 16 5 2719 38 530 1777 256 69.40 34 44.70 CHANGED hsKspRpptI+cIIppcclsoQsELlctLpcpGls.VTQATVSRDL+ElshVKVt..ssGphhYsLss-sph ..........................p+ppRh.p....hI+.pl.lpccc..lp...oQpEllptLpc.p.Ghp.lTQuTlS...R...D...l...+-.....l....s...llK.lt.stps.p.....hhY.sLssp...h................... 0 156 308 440 +1238 PF02863 Arg_repressor_C Arginine repressor, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 25.00 25.00 30.40 29.50 21.60 18.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.18 0.72 -4.51 146 4296 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 2715 74 531 1750 261 69.90 29 45.54 CHANGED hlpphllslctspshlVl+Th..PGsAphlAshlDph.phsc..IlGTlAGDDTlhlls+stpssppltpplpph .................h.lpchllsl-hsst.hlVl+Th..PGs..Aphl.usllDp.........h..phcp.........IlGTlAG.D.DTlhlls.csspsupplhcplhp................ 0 156 310 442 +1239 PF00491 Arginase arginase; Arginase family Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.00 22.00 22.20 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -12.03 0.70 -5.11 347 5883 2012-10-01 22:40:15 2003-04-07 12:59:11 16 18 2993 272 1841 4658 4174 269.20 24 85.75 CHANGED tclsll.GlPhDt...s.ssh.RsGs.chGPp......ulRpu.h.ht............hs...s................hplh....................................D......hGD.l........shs.............................p.tp.s...hcplpptspp.l.....lp..p.................uths......................lsLGGDHolohshlpuh...scth...................st..lullahDAHsDh.ps...............tsst..........hs.HGsshp...p..................................shp..hhlp.....tphhplGlRshsps....-...h.........t.h...cthGhp..hhshcclpphuh.t..th...hcphhptl.....t....t....slYlShDlDslD.PuhAPGoGTPts.G.Glohc-shtl.lct.l...ts.....hpl..lGhDlVEls.P....shD..s..................thTuhhAupllh..p.hhs .....................................................................................................................................................................phsllGhPhDh.....h..sph..Rs.Gs.....c......h.GPp........ulRps.h.ht.................p...s....................................hplh..........................................D.......hGD.l..........shs....................................................h...s.t.p..h.........hpplpptspp.l..lp..p.....................sths.......................................lslG.GD..H.plshshlpuh....scph.....................................s..lullahD.AHs..Dhps...................................ss.t...........hs..HGoshtp................................shpp...shlps........pphlpl.GlRshsss.........ph.................................phh.....cctG..hp.......h.hs..h..p.c...l...p...c.h.......uh.....t...........tl...........hpplh.phl.....t.........sp.....slYlohDlDsLD.P.u.hA........P.G..s...........G.....T...P....s.......s........G...G.......l.o...c.c.s.h.pl.lct.l.....tp............hpl.....suhDlV.E....ls..P....shDt.s...................hTuhhuuplhhp...h............................................................... 0 557 1073 1504 +1240 PF00764 Arginosuc_synth Arginosuccinate synthase Bateman A anon Pfam-B_888 (release 2.1) Family This family contains a PP-loop motif [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.30 0.70 -5.42 12 4339 2012-10-02 18:00:56 2003-04-07 12:59:11 14 19 4054 37 1169 3498 3319 374.20 40 92.38 CHANGED VLAYSGGLDTSVsIshLp-chs..EVlulslDlGQst--lctscp+AhchGshcphslDAK-EFsc-Yha.uIpANAhYEs.YPLuTALARPLIAK+lVEsAcccGApAVAHGCTGKGNDQlRFEsshtshsPplclIAPhR-hshT.....RcctIpYAcp+GIPlssshcpPaSIDcNlWGRulEuGhLEDPhstPPc-lataTpsPtcssppP-hVcIsFcpGVPVuls......Gcphss...lplIpphNElAGtpGVGRlDhlEDRllGlKSREhYEsPuAhlLlpAH.csLEslTLsR-.h+FKchl-pp.auELlYpGLWasPL+csLcAFIs+oQE+VTGhVRV+La+GshhlhGRcSshu.LYstcLsoa-.scshDQptupGFlphaGLpu+lap ...........................................lLAYSGGLDTSlhlhWLpcch....t-VlAh.su.........D..l....G..Q..s............-....-..h...-.s..l.c.c....KA.hphG...A...p..c....s..hl...lDs+cEF...sc.-a.lhssl.pus.A........h...Y....E....s....p....YhL.sTul.u....R.P...l...I...u......+tLVc.....hA+..cpGussluHGsTGKGN.D....................Q..................VRFEhshh.u.ls.PpLcllAPWR-hs.h.h.....uRc-hlp....aspppslsl.t...h...........s........h...c........p....sYShDpNlht.t..ohE.......u.t.LE..ss..h.....s.....p.....s....s....c.s....h.a......th.o.h.ss.-.p.s.ss...........p.s-.lplpF-p..GhPVul..N......................Gcp.h.ss......spllhcLNclu..G+HGlG.R.lDhlENRl..lGhKSRGl.......YEs..................PGuslLhtAH.ctLEs.lsls+-.shc.h+.c.t.l.tt.c.hucLl.Yp.........GhWFsP..p...p.hLp.s.hl.pc.....s.ppt.V.oGpVplc.......La+G.s.hh.lh.u.pcS.s...p.s...L.Y...stc..hsTa.p..c..s...c.....s..a.stpcAhGFIplhuL..p...t.......................................................................................... 0 359 740 981 +1241 PF01960 ArgJ ArgJ family Enright A, Ouzounis C, Bateman A anon Enright A Family Members of the ArgJ family catalyse the first EC:2.3.1.1 and fifth steps EC:2.3.1.35 in arginine biosynthesis. 25.00 25.00 34.00 28.60 21.50 16.80 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.42 0.70 -6.06 139 2527 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 2367 44 817 2155 2927 371.30 40 94.80 CHANGED FpsuuspuGlK..................ps.+hDLullh..sp...tsussAGVFTpNphpAAPVhlsccpl.......pss.p....h+AlllNSGNANAsTGppGhpsAtphsphsAptLslss............p..........pVlluSTGVIGp.L.Ph-pltsulsp..hst.tL.........s........ts..shpsA...AcAIM.TTDThsKtsuh....phpls.Gp...plpluGlAK..GuGMIcPNM.......ATMLuFlsTDAslsssh.LpphLcpulcpSFNpIoVDGDTSTNDolllhAsG..tu.ss............s.lsp...ts.t...hptFppuLptVstcLActIs+DGEGATKhlplpVpGAtscp-AcplA+ulusSsLVKTAlaGpDsNWGRIlsAlG.hu.....Gs...sh-ssclslhl.........s....s.............lhlhpsGthss.as.E..ppupphhpp.........p.-lpIpl-L.....st.Gs..upussWsCDLoa-YV+INA-Y..RT ...................................apsuuhpAGlK..................ss...+tDlullhss....ss..u.s.s.A.uVFT.pNphpAAPVhhs+ppl.......ssu..p.........hp...AlllNSGsANAs.T.........G........tp..Ghpcupphspts...Aptl...s..ht..s.............p...............pVhlsST....GVIG..p...LPh..-.+lhs.ul.sp..hh.s.sh...............tts........shtsAAcAIh...TTDThsKtssh.....p.h.p.....l.....s.....Gp.................slsluGhAK..GuGMIpPNM...................ATMLuF............lsTDAsls.....s.sh.LpphLppsscpoFNpIoVD...GDTSTNDslllhAs.Gtu...st............................................lpt.......tp.p......httFppALptVspp.....LAptls+DGEGATKhlpVpVpGAt.opp-AcplA+slspSsLVKTAla.GpDsN.W..GR.IlsAlGhu................us...........shD.s.splslhl......................ss......l.lscpGt.ss.....as...E....ptspthhpp...........................................c.-lpIplcL..........sh...G...s........u.p.ussWsCDLoa-YVcINusYRo............................... 0 273 556 708 +1242 PF03308 ArgK ArgK protein Mifsud W anon Pfam-B_3540 (release 6.5) Family The ArgK protein acts as an ATPase enzyme and as a kinase, and phosphorylates periplasmic binding proteins involved in the LAO (lysine, arginine, ornithine)/AO transport systems. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.69 0.70 -5.62 9 1738 2012-10-05 12:31:08 2003-04-07 12:59:11 11 13 1333 12 625 2137 1076 266.50 40 60.95 CHANGED LARAITLVEsccs....pAppLLcclhPhsGpAphlGlTGsPGuGKSTLl-tLhtpLtccGh+VAVlAVDPoSPhTGGuILGD+hRMpchu......scsGsFIRshsopGsLGGluptTp-sltLh-AhGaDVlllETVGVGQSEV-lsphsDshlllplPGsGD-lQuIKtGlMElADlhVVNKAD...htsActstp-LphALcL.p......................c..ctsWpPPllcssAspGcGlcELW-tIc-H+chlptoGhhtp+RRppttc.hhpllpstlhs+lpuu.s ........................................................................................LA+AITLlES..pp.stc....................hu.p..pl..L...p...t.....l.......h........P...........t..........s..........G....p.........u......h.........R..l...G.I.T.Gs.PGAGKSThl-ulGh.t..L..........h...c..c...G...t...+..VAVLA.....VD......P..S......S...s......hoG.....G......SI...L......G....D.+T...R...Ms.cLu...................................p.c......s..uF.....IR..P...s.s...o...pG.....p.....L....G.....G...l.up.....t....s.....+E.....shh.lh.E.A.A.......G.....aDll....lVET.VG...VG.Q...SEs...s.V.sphsD...shl.hl......h......lsusG......DpL.Q................u...I.K....+....G.l..h....El...AD......l...ll.lNKuDh.....pps.Apt.u..t..p.-....h.p.s.u.L...+...lhpt..................................................................................p....tstWp.P...VlssS..A..h....p.....t........c..Gls-lW...pt.l..c..apph.h..pt....s...G...thp..t..pR....p..pQ.t.htahhp.hpptlhpph....s................................................................................................................... 0 245 446 556 +1244 PF02374 ArsA_ATPase Anion-transporting ATPase Mian N, Bateman A anon Pfam-B_1201 (release 5.2) Family This Pfam family represents a conserved domain, which is sometimes repeated, in an anion-transporting ATPase. The ATPase is involved in the removal of arsenate, antimonite, and arsenate from the cell. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.70 0.70 -5.38 11 2686 2012-10-05 12:31:08 2003-04-07 12:59:11 10 13 1221 72 1090 14632 4072 239.70 25 76.97 CHANGED h+alFhuGKGGVGKTTsSsATAlp..Luc.G++sLllSTDPAHsLSDuFsp......chG+pPTKlp..-NLhuhEIDPp........hplpchhtpshc.hsssh...thphlpshlp-thsu.PGh-EhhuFsphh+ahc......sscaDlllFDTAPTGHTLRLLphPsshsphhc+hhchpp.....ltshhp.h.t...hGusshs....chhppl-pphEplcthp-hloDPspToFhhVshsEchSlhEocRshptLtpaGlcscslIVNp.llPEsspp....spahpu++plQpKaLcpI--hFpshslsclPLhppEltGlcsLpchuphLhs ...........................................................hl.hh.s.G..KGGVGKT..Th............us..u.h.Alt...........h...A...p...........t...G.......c.......c..ll.l..l.............o.o..D.............P.....A....p..s.......L..........u.........p...s..h...s......................................p..h..s..t..p...........h.........l..........t....................................s...........s........L..........s...........h....p...l..D.sp......................tt.h..p.c...h..h....t...p...h.....h.....t............h.....t...t.....h.........................t.....h........s....h.....h.....p....c.......t..h.....s.....u...............P...s.........h....p........E.....h....h.....s....h..t.........p....h..t..c.hhp.........................ptpa...-...h...llhDoAP...T.G..........c..TLc...h..Ls......h..P...p.th.........p...t..h..l.......t.........t...............t...............................................................................................................h..t.t..h.t.t.....................t....h..........t........t...........h............t..........hts...........t...........othhh.V..h.sp..s..l.Es.ph.t..Lt........t..h..t.ll..lNt...h..h............................................................................................................................t..................................................................................................................................................................................................................................... 0 393 721 952 +1245 PF02040 ArsB Arsenical pump membrane protein Mian N, Bateman A anon IPR000802 Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.53 0.70 -5.52 4 1841 2012-10-02 15:12:49 2003-04-07 12:59:11 10 4 1464 0 343 2231 187 367.80 44 95.11 CHANGED LAhsIFLLTLVLVIWQPKGLuIGWSAslGAVLALIsGVVshuDl.sVhsIVWNAThTFlAVIlISLlLDEhGFFEWAALHhu+.hupGpGh+hFsalsLLGAhVAALFANDGAALILTPIVlAMlhALtFsctsshsFlhAuGFIADTsSLPLlVSNLVNIVSADYFslGFhEYAShMlssslhSllASllVLaLaFRKsIP.sYsLppLKsPtpAIKD.shF+luWhlLhlLLlGaFs.E.luIPVSslAushAlIhlhlAp+u+AlpTt+Vl+tAPWpIVlFSlGMYLVVYGL+NAGLT-lLuslLsshu-pGLaAushGhGFluAhLSSIMNNMPTVLlsALuIsuSsssGhl+EAMIYANVIGsDLGPKITPIGSLATLLWLHVLopKGM+IoWGhYF+TGIllTlPVLhlTLsuLhLpL ...........................................................................................hAhhlFlhT.l.l....h..l....l...W..p....P......+......G.....L.s...l.........u...h..s..A..h..h.G...A....h...l......s.l....l....h...G.l....V...p..h..s...D...l................V......h....s..........l........V.......W...s.A....o.h..s...F...l.u.l.I.lI.Sl.lLD...c....G.F.F.-....W..u..A...l+.h.s.+.......h...up.....G...p.....G..h.tLF.sal.h...LLG.AhVuAhFANDGAAL.ILTPIVlAhl...h..sLs.a..........s.p......t.s.h.......h...s.......Flh.A.u...GF..I..A.D.......o....u....S....L....P...Ll..V.S..N..L.V...N.I..l..o....A.....c....a.....F....s....l..u.F..h..c.Ys......u.....h......M.h........s..s.l.h.....u...l....l...A..o...l..l.....h...L...a...l.....a......F.........R....+.......s.....l....P....p...p...........a.....-.....h......t...............l..........p.........p.....P..t........p.........A......l...p.............D...................t.......h.....F......+.....h..u.....h.h.l..L...h......l.L.......L......h....G......a......h............l...h....E.......l........s........l...........P................l.........S..h......l....A...s..l....s....A....h.......l....h....h......h.......l.......A..t......+......u......p.....s......l.............s........s....t..........p..........ll.+..s..A...PWpIVl..FSlGMYlV.VaG.L+N..s.G..l....T....p....hLu....t.....l....L....p....h.....l.....u......p.....t......G.......l....h.s...uh....h....G..hG...h.l.....s...A.h.LSul..h.NNh..P.....s.....V...............L............l........s........s.....l.........u.........l...s.......t........s.....s..........s.....s....s.....h....h..p.....c.......u...........h......l....Y..A.N.l....I...GsDLGP.K.....lT.P.I.G.SLATLLWL+..V...L.sp....K....s........h...p..I.o..Wu........Y...h...+s.G.l.l...h.T.l.P.l.Lhh.T.LhuLhh.................................................................................................... 0 93 207 283 +1246 PF03960 ArsC ArsC family Bateman A anon COG1393 Family This family is related to glutaredoxins Pfam:PF00462. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.33 0.72 -3.99 36 7591 2012-10-03 14:45:55 2003-04-07 12:59:11 10 18 3508 24 1253 4303 1099 107.40 26 86.65 CHANGED YtsssCsos+KAhpaLcppslpaphhshhpsshscccLppllpphs.s.hcpllsp+upsa+pLs.....ls.pplot.scllcllhcpPpLl+RPIlhss......c+lplGass-plpth ......................Yt.PsCsos++A.h....p....h...L....c....p...........p....s....l..p.h.p.h.hph.h..p..ps...o..tcpLppllpph.......u.....s....hcpll..ss+u.p.ha+cLs.......hp...ss.l.ot..pp..h..l.plhhppP.tL.l+RP.Il.lss...........pp.h.p.l.Gasp-phtt......................................... 1 315 718 1014 +1247 PF01129 ART NAD:arginine ADP-ribosyltransferase Finn RD, Bateman A anon Prosite Family \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.53 0.70 -5.25 9 454 2012-10-01 23:25:29 2003-04-07 12:59:11 13 17 114 8 209 503 14 201.40 31 63.26 CHANGED LDMAssoFDDQYtGChccMct+.hPpL.+pEFstspthsssWcpAppcWp..........cR......psphs...........hPhsF+DpHGlALlAYTus....s.la+.FNpAlRpuGtS+pcYhtpFpFKslHFhLTcALQLLps....tsCpp.....VaRGl+.....GhRFp.st.GtoVRhGpFsSoSLp+psAp.....FGpsThFslcTChGssI+saShaPpE-EVLIPPaEsFpVsshpp.spu.spI.LcShs+pso ..................................................................................................shs.ssFDD...pY.sCtpp.h.tt..hs.p..h..hppEhttpp.htpsWppA..p.tWt.....................pp...........ttth............hP...s.h.pspau.l...AlhsYTss.....sslappFNpAl+....psutohp.pY.h.p.p...F...p..FKslHahLTpAlQlLpp.............thChp........V.a.RG..sp.............s....h.p..F....p......s..t...hG....t.p.lRhG....pFs...SoS...hpct.Ap................pF....G....p......p..........T......h...F......sl...h......T....C....h........G.......ss.......l...pta.S.hh...t.E..cEVLIPPhEhFpVhpht....ptt.ts.....l..L.t.tp............................. 0 25 49 112 +1248 PF02497 Arteri_GP4 Art_glycop; Arterivirus glycoprotein Mian N, Bateman A anon Pfam-B_787 (release 5.4) Family This is a family of structural glycoproteins from arterivirus that corresponds to open reading frame 4 (ORF4) of the virus. 20.70 20.70 24.20 24.10 20.50 20.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.28 0.71 -4.35 3 241 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 9 0 0 248 0 176.90 80 99.36 CHANGED MAuoLLFlLVGFcClLVSQAFACKPCFSouLSDIcTNTTAAAuFhVLQDIuCaR.HRsSAu.EtIp..KsPQCRTAIGTPpYVTITANVTDEuYLHsADLLMLSACLFYASEMSEKGFKVlFGNVSGlVAVCVNFT-YVuHVpp+TQ+.pLVlDHlRLLHFLTPSsMRWATVIACLFAILLAI ............................................................................MAAuhLFLLVGFcChlVSQAFACKPCFSSSLSDIKTNTTAAuGFsVLQDISCLR..HGsS...uS.......sIR...KsSQCRTAIGT..PVYITITANVTDENYLHSSDLLMLSSCLFYASEMSEKGFKVVFGNVSGIVAVCVNFTSYVQHVKEFTQR.SLVVDHV.RLLHFMTPETMRWATVLACL.FAILLAI. 0 0 0 0 +1249 PF01481 Arteri_nucleo Arte_nucleocap; Arterivirus nucleocapsid protein Bateman A anon Prodom_2418 (release 99.1) Family \N 20.40 20.40 20.60 22.90 19.50 17.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.37 0.71 -4.00 4 700 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 11 6 0 662 0 110.20 70 93.69 CHANGED SQpK+Kp....GNGpssN.QLsQhLuthl.tpttQs+....Gptp+KKpPtKP.HFPlAs.sDlRHchT.sERphChSSlQThFNQGuGosoLuDSGtlsaTVpF.LPTppTVRLIpsoAss ...................pQpK+Kp....GsGQPVN.QLCQhLG+hI.uQppQsR.....G....p....psKKKp......PEKP.HFPLAsEDDVRHHhT.oERpLCLpSIQTAFNQGAGTsoLSsSG+lSapVEF.LPstHTVRLIRVTuos............... 0 0 0 0 +1250 PF01606 Arteri_env Arterivirus envelope protein Bashton M, Bateman A anon Pfam-B_664 (release 4.1) Family This family consists of viral envelope proteins from the arterivirus genus; this includes porcine reproductive and respiratory virus (PRRSV) envelope protein GP3 and lactate dehydrogenase elevating virus (LDV) structural glycoprotein. Arteriviruses consists of positive ssRNA and do not have a DNA stage. 21.20 21.20 21.70 256.50 20.60 21.10 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.67 0.70 -4.68 3 333 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 8 0 0 354 1 211.30 78 82.56 CHANGED MAcpCshFHhFL.CuFICYSsCCAVsANSoA..ThCFWFPLs+.GNTSFELolNYTlChhCsTsQAAtphLEPG+shWCRIGHDRCEEsDHDELsFslPsGhp.Lp.EGa.TSlYAWLAFLSFSYAAQFHPElFGIGNVSpVaVDh+HQFICA.HsG.NSTLscH.+NISAlaAlYYpHQlDGGNWFHLEWLRPLFSSWLVLNVSWFLRRSPAS+VSRRl ...MAsSCsaLHIFLpCuFL.YoFCsAVVANSNu..TaCFWFPLVR.GNFSFELsVNYTVCPPC.TRQAAuphLEPGRShWCRIGHDRCpEsDHDELGFhlPPG...LSSEGHLTSVYAWLAFLSFSYTAQFHPEIFGIGNVScVYVDIKHQFICA.HDGpNuTLPRH.DNISAVFQTYYQHQVDGGNWFHLEWLRPFFSSWLVLNVSWFLRRSPAS+VSVRV. 0 0 0 0 +1251 PF00951 Arteri_Gl Arteri_glycop; Arterivirus GL envelope glycoprotein Finn RD, Bateman A anon Pfam-B_425 (release 3.0) Family Arteriviruses encode 4 envelope proteins, Gl, Gs, M and N. Gl envelope protein, is encoded in ORF5, and is 30- 45 kDa in size. Gl is heterogenously glycosylated with N-acetyllactosamine in a cell-type-specific manner. The Gl glycoprotein expresses the neutralisation determinants. 21.80 21.80 22.90 24.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.97 0.71 -4.68 11 8906 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 12 0 0 8965 0 171.80 75 86.54 CHANGED .u.uhss.sss..p..p.h.ap.hhpphssTthhpphhsaulpohslh.hsp+lhphthLshsphl.hlshusl.uhhsahthchshossYuhsAlsALla....Flhp....ss..+.ChstRhhhst.ssFl.sscG+..lapscosl.lVc+sGpstVsGcLV.slKplVLsGpKAs.hstssAcpWp .............hss...hsNAsss.S.SSHhQ.LIYNLTLCELNGTDWLs.s.+.FDWAVETFVIFPVLTHIVSYGALTTSHFLDTVGLlTV.STAGFhHGRYVLSSIYAVCALAALlC....FVIR....hA..KNCMSWRYSCTRYTNFLLDTKGR..LYRWRSPV.IIEKGGKVEVEGHLI.DL.KRVVLDGSsATPlTRVSAEQW.G................................ 0 0 0 0 +1252 PF01097 Defensin_2 Defensin; Arthro_defensin; Arthropod defensin Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.12 0.72 -4.23 15 314 2012-10-01 23:31:40 2003-04-07 12:59:11 13 1 158 14 58 362 0 33.90 42 40.60 CHANGED u.hh.lNcoACAtHCls+.G.p+GGYCsu..KsVClCR ............hhsh..spusCsuHChuh..G.p+GGYCss...ptlCsCh............. 0 23 27 51 +1253 PF04161 Arv1 Arv1-like family Wood V, Finn RD anon Pfam-B_9351 (release 7.3); Family Arv1 is a transmembrane protein with potential zinc-binding motifs. ARV1 is a novel mediator of eukaryotic sterol homeostasis [1]. 20.00 20.00 20.30 21.50 18.20 19.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.82 0.70 -4.42 21 319 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 253 \N 217 304 3 176.80 27 62.27 CHANGED hlCIpCspt.lpsLYppY...Ssshl+LTpCspCp.cssDKYlEaDsVllaIDll.LLKspAYRHLlaN.h......................................scacphh+lhllhlLhEsY...........LhWtppccp.pps..............................h.h..hhp.tshhhQhlhFhhthll....phhhhhhhhthhhhthhphs......................ht.thphlhhslLlSthuKlF.ILhlIWsas.shhuhtllpllshhphlpuL ........................hClpCsp..sppLappY...............s.t.......t.s.......l+lo.CtpCp.phs......DKYlEaD.lllhIDll.LhKspsYRHlLaN.......................................................t...p..hh+.lhlh.hl.L...h-.sY............lpWh.p.ppp.t.s..................................................hhp.....h.thhhh.h.hh..shh....ts.hhh....h.h.hht...hhhh...hh.h............................................t....l.hslhlu.......hKlh..l..h...h.lW.hs........hthhh............................................................................................................................................................................... 1 62 112 171 +1254 PF01731 Arylesterase Arylesterase Bashton M, Bateman A, Fenech M anon Pfam-B_2101 (release 4.1) Family This family consists of arylesterases (Also known as serum paraoxonase) EC:3.1.1.2. These enzymes hydrolyse organophosphorus esters such as paraoxon and are found in the liver and blood. They confer resistance to organophosphate toxicity [1]. Human arylesterase (PON1) Swiss:P27169 is associated with HDL and may protect against LDL oxidation [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.71 0.72 -4.00 6 216 2012-10-05 17:30:42 2003-04-07 12:59:11 15 7 94 0 130 450 101 84.20 42 23.51 CHANGED NDIsAVGP-pFYuTNDHYFscsaLhhLEhhLshtWosVVYYSPsEV+sVApGFsuANGIslSsDcKalYVA-lls+sIHlhcKHss ...................................NDIlAVGPcpFYATND..H.YF...ss...s...h...L..p..hl.E.h.a.L.s..L..t.......Wus.VVY...Y...S.......P...p....E......V...+...l...V...Ac...G...a...s.ANGIs......lSsD...p....K...alYVA-..lhs+pl+Vhc+p.......................................................... 0 37 50 79 +1255 PF00858 ASC Amiloride-sensitive sodium channel Bateman A anon Pfam-B_415 (release 3.0) Family \N 23.00 23.00 23.10 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.65 0.70 -5.83 114 2052 2009-01-15 18:05:59 2003-04-07 12:59:11 19 39 116 15 1489 1878 5 339.30 17 78.79 CHANGED FspsoolHGhpal..h.sspt..........hhc......................R.....................hhWhlhhlsulshhhh.hhphhppah.spPslssls..hht..pls.FPAVTlCshN.h.......chst..lpt....h.........tt.hthht.hh...t.................................................................................................................................................................................................................................................t.............................phshhphhpph........uhsh.c-hl....hpCpapup..........h..spp......ahp.......................................................................................................................................................................................h..h....hTp.hGh.CYoFNsttt...............................................................hps....ttsGhpsGLplhLph......p..pp-hh..sttss...............................................................................h....Gh+lhlHsts-hPhhsp........shtlssuh..p.shlulp.pphp.......pls.............sshpp.Ch.ss.....p..........h.h.....ttY.ohssChhcChsphhlcpCsCs.hhh................................tphsh..........Csh..pp...h.pC..........hpphhtphh......................tC..p.C.ssCsphpYs...hphohsphsspt...h......................t.p.s.t..ppshthlplaa...cphsa.pphccp.sashssLL...............................uslGG.hGLFhGsSlLollEllhah .......................................................................................................................................................................................................................................................................................thtu...hh....t.....................................p...............................hhWhhhhhh....hhhh....hh....h.......h...............h................a....p..................h.................hthp..........................................ph...F.Pu.lo....lC.s..s.....h.......p.p.......ht..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ph.ph.h...th..........................t...h..t.p..h.h.....h.C........a...t..t..............h.......pp..........h................................................................................................................................................................................................s....G....C.hshN.................................................................................................................h..........su..t...............G...l..phhl.t.h...........t....p.p....h........................................................................................................Ghp.l.l...at.........tp..s......t..t.............hh.......h.......s........t.s..h......t.hp....h............................h...............h.tp....Ch.tt..............................................Y....o..t....sC..hpC..p...h..h..p.....C.tCh....h......................................t...............C.s......tp..............h..C..........h.........h...................................................................C..........C...sCp...................a.t.......h.t.h.t.......h......................................................................................t..t.t..h..hlplhhtt......h....h..pt..hth.thh.......................................................................................sshGu.hulhhGhShlshhEhh.................................................................................................................................................................................................................... 0 637 750 1219 +1256 PF01671 ASFV_360 African swine fever virus multigene family 360 protein Bashton M, Bateman A anon Pfam-B_1174 (release 4.1) Family The multigene family 360 protein are found within the African swine fever virus (ASF) genome which consist of dsDNA and has similar structural features to the poxyviruses [3]. The biological function of this family is not known [3]. Although Swiss:Q65137 is a major structural protein [1]. 22.50 22.50 26.90 26.20 22.40 22.40 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.46 0.70 -4.95 14 126 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 11 0 0 100 0 205.90 43 60.99 CHANGED EpT+cLCccLGAKptLpcpclhphFhc.lpcpKTuuNlILCHElFssN.PlLcsls.tcl+hhIahcLctl...h..hhsphohsphLsKYWYuhAlcasLpcAIpYFYQ+asHlppWRLhCALhaNNlFDLH-lYpp-+lcMDhsEMMplAC.h+DpNahTIYY.CalLGAsINpAMlsSlp.aN.................hsNlFFCIDLGAssFEEuhtlAtpcshphlhplLs ............cpT+cLCccLGAKptL.pcpclhphFhp.lpcpKTSsslILCHElFssN.PlLpsls.hch+hhIhhpLcth...h..hhsphohsphLsKaWYuhAlcasLpcAIpYFhp+atHhppWRLhCALhaNNlaDLH-hYppcclcMDhs-MMplAC.hpDtNahTIYY.CF.hLGAsINpAMhsSlp.as.....................hsNlFFCIDLGAssF-EuhtlAtpp.s.phlhphL............. 0 0 0 0 +1257 PF05170 AsmA AsmA family Bateman A anon COG2982 Family The AsmA gene, whose product is involved in the assembly of outer membrane proteins in Escherichia coli [2]. AsmA mutations were isolated as extragenic suppressors of an OmpF assembly mutant [1]. AsmA may have a role in LPS biogenesis [1]. 29.80 29.80 29.90 29.90 29.70 29.70 hmmbuild -o /dev/null HMM SEED 604 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.67 0.70 -5.90 12 3636 2012-10-03 05:41:17 2003-04-07 12:59:11 9 19 1503 0 623 2962 466 400.20 19 70.17 CHANGED M+phh+hlhhlLlsLllll....hulhhhhssp.hpsplpp.Vuttsshplshcus.t............chhhP..plpLpslplussst.t.......hsplcplclsLushsLhspplplsplhLcssslpLsh.spsphshst.lp.pp.shss.............................ss.ph.luplplssuplhhpscstpphh..phslphpssslht...................................hshspstlsGphcpctpsh.s.lsssh.spsshst.thpst.t.......................hhtthuss.hphsshclhssplth.sshhhpsh..phsh.shpLphssht...s..thpt..th..htt.hh.sthsh..sPtslsltphstpaptupspsshth.p.th.h.............................................cptulsGht.ss.ssh.......h.pshPphp.pLphppLslschh.lpts.........pps.hptsshsstsssh.Lspcph.hshWps..............slp..hssspssa.spl.lscsthphpspsGhlpLsplpuslttGhlpusuplshsphstphplphsucsl.lp.Lhpth...hslpG.hslshshpGtutssshlhtslsGplchshssuthp...............................tt..tst.h.hph.phshshppGhlohs.h ............................................................................................................................................................................................................................................................................................hhhphhh.h...h...h...hsh..l.h.hl.........hhlh....h.h.h...p........p....h.p.sh.h.sp.........l...ptp..s.h..ht...h...p.u....t..................hth...h..P.......plhh..t.ph.....tl.....s.t........s.......t..........t.................hlphpphch.sls...hs.L.h....s....p....p....l.l.plhL.....ptsshp..L........p..t....s.....t........s...............h....t........p..p......t...ss..........................................................................t.................p.......h.....p.......l...s..t.h.h.s.....tuhl..h.p......p......s..p.........................p.h.p..h.......s........................................................................................................................................................................t.h...h.....t.........t.......h..p.........................................................h.......h.........th.........................................................................................................................................................................................................................................................................................................................................t............h.............................................................................................................................................................................................................t......t.ht.u......hh...t..............................................s...h.....pl.......hphtt............................................t..............................................................h....t..................ht.........................................ph...hphtt.h..h......t.h....h..pp....hth..p.......t.s.....htlp..hph..h...tG.ht......h..t...h.......p...................p...........................h..t..h..p.th..p..th........h.....t..................l..........................................................................G.....h.sh...h.phpu.....s.....hh...p.tGphth..ttt........................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 138 316 463 +1258 PF00733 Asn_synthase Asparagine synthase Bateman A, Griffiths-Jones SR anon Pfam-B_443 (release 2.1) Domain This family is always found associated with Pfam:PF00310. Members of this family catalyse the conversion of aspartate to asparagine. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.71 0.70 -4.89 104 5876 2012-10-02 18:00:56 2003-04-07 12:59:11 16 41 3139 38 2130 12423 8100 248.10 24 44.92 CHANGED plcchLpcuVppphh............uDlslGshLSGGlDSShlsulspc...........................................ts.lpoFoluapsss.................................................-hthActsApplusc..Hpphhlstp..-hhsh..l.cllhth-pPhs.....sshshshahhu+ts+...................ptspVsLSGcGuDElF..uGY....thhpps.thttht..hht.hpthhhh.h......................................................................................t......hh..hthh.st.L.t+sD+hsMutulEsRsPFLDccll-hshp.lPsphKh.psstt................................KhlLRcsh........pphlPcplhp.........RtK ...........................................................................................t.hpphl.ppuV...p..p..p.hh.....................uD..l..s..l.G.....s.h.L..S.G.GlDSS..l..l.s.u.l.s.pc...................................................................ht.......p..l....c..o.....F..s...l....u..h...p.s..s..s.........................................................................................-h.t.h..A..c..p.l....A..p.t..lGsp........Hpph....h......h..s..........p.................-.h...hpt.......l.p.h...l..h.t..h...-...p..s.....................tssh..s.h..a.h....h..s.c..hhp.....................................................................................tth.ph...s...L.SG-G...u....D....El..F...........u...GY...............................h.....h.t....p..t..........h...t......h.....t............h.....t...h...t.h....h.............................................................................................................................................................................h+s..s.+..h..........hu.h...u....l..........E....sR..s....PFL....Dp.chh.chs.hp....l.s.p..K..h...t.t.stt...............................................KhlLR.psh........................pt...h.l.P.t.p.lh.R.K...................................................................................................................................................................................................................................................................... 0 735 1406 1834 +1259 PF03590 AsnA Aspartate-ammonia ligase TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.80 21.80 21.90 24.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.47 0.70 -4.94 6 1603 2012-10-02 14:22:40 2003-04-07 12:59:11 10 4 1542 4 211 863 13 236.20 56 72.62 CHANGED KoFIhQ.QppISFlKNTFTppLtE+LGllEVQGPILSpVGsGhQDNLSGtEKAVpVpVKpIPDAsFEVVHSLAKWKRHTLARasFupsEGLFVHMpALRPDEDsLDtsHSVYVDQWDWEKVIssGcRNluYLK-TVcpIYuAIRhTElAVsp+F.sltshLPcpIsFVHSEELVcRYPcLssKcREsAICKEaGAVFLIGIGGcLSDGKPHDlRAPDYDDWTT.SEstYKGLNGDILVWNslLscA .....................l.p.Qpt..IsaVKspFpppLpccLsLlcVpAPlhscsGsGhpDNLs..GsE+sVphcVKs...l.P..D.u...p...hE..VVHSLAKWKRhTLucasFstGEGLYTcMpAlR.DEDp....LsslHSlYVDQWDWE+Vls.c.s..cRs..hshLKpTVcpIYtuI+tTE..........t........tV.ptca...s...l........t....s...h.....LP...-.pIpFlHSpELlsRYPDL.s.sKpREpAIsKE.hGAVFLlGIGGcL.s.DG+..HDsRAPDYDDWoo..................st...........u..........h.................GLNGDILVWNslLppA....................... 0 102 148 179 +1260 PF00026 Asp asp; Eukaryotic aspartyl protease Eddy SR, Griffiths-Jones SR, Finn RD anon Overington enriched Family Aspartyl (acid) proteases include pepsins, cathepsins, and renins. Two-domain structure, probably arising from ancestral duplication. This family does not include the retroviral nor retrotransposon proteases (Pfam:PF00077), which are much smaller and appear to be homologous to a single domain of the eukaryotic asp proteases. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null --hand HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.08 0.70 -5.19 23 4762 2012-10-02 15:32:34 2003-04-07 12:59:11 18 56 583 656 3018 6175 72 282.30 24 71.72 CHANGED pYhupIsIGTPsQpFsVlFDTGSSsLWVP...SspCsss........................uCpp+spacPSpSSTapps.Gps...a.sIpYGsGS.hsGhlupDTVslGGlslssQtFu.usp...su..h.....htuhhDGILGluasulssst...........hssVasslhsQullspslFSlYLspss....ttsGpllFGGlDsu+YoGslpalsVo..........ppsaWplslculsluss......shsssssp..............AllDTGToLlhhPsshlspltpslGAp.sp.................................................................................................t.....GtYslsCsshsphPslsFhls.Gtpaslssssalhpsssss..........ChhGhps......sssshaILGDlFlRphYsVFD.hsNsclGhAts ...................................................................................................................................Yhstltl....G...o......P..........s.....Q......p.h...p.lhh..D.T..GS.u...s.hW....Vs..........s......s..........................................................................................................s...............p.....t.....h.....a....s......s...p........t...S....s........o..h......p...........................s.....p.t............................h...pl...p......Y......u....s..........G..........s.......h......p........G........h......h..........s.......p.......D.............s..........l.............p...............l...............u..................s....................h..........s.........l...........p..........p............t.......t..........h.........u.........s......p..p.................s..........h.........................s.....h...s...G....l....l.Gl..u......a..s...p..h....s.sst..................................................h.shh..t..s.h....h..p..........p.....t.....h...l....s....p....s....h....F....u.....h.....h.........L.........s....p....ts.....................s.....G.....t...l..h...h.....G.....u......h...............D.....t....s......h..........a......p..........G..........p......l....t...a...h...s...l............................pt..s.h..a...p......l...p.....h.s...s.......l.....t..l...s..sp...........hh.s...t...t.....s....h.t...........................................................................sl....l..D....oG......T..o..h..........h.h..........h..P....p....p....h....h....p....t....l.........p...t....l....s..u.t........................................................................................................................................................................s..h.h..l..s..C.....s......t.......h......................h........s.......s....l...........s.....h..........t........h..........s...........s..........h...........p........h..s.....l........s.....s.......p......p.....h.....h.......h.....p.............t...t....t...................................C..h............s.h.s.........................................................t.......s........s.....h.........h.....l.......LG.s.s.F....lpp.h.as..l.a..D....h...t....p....t....p..luhA............................................................................................................................................................................................................................. 0 1115 1670 2449 +1261 PF02261 Asp_decarbox Aspartate decarboxylase Mian N, Bateman A anon Pfam-B_3879 (release 5.2) Domain Decarboxylation of aspartate is the major route of beta-alanine production in bacteria, and is catalysed by the enzyme aspartate decarboxylase EC:4.1.1.11 which requires a pyruvoyl group for its activity. It is synthesised initially as a proenzyme which is then proteolytically cleaved to an alpha (C-terminal) and beta (N-terminal) subunit and a pyruvoyl group. This family contains both chains of aspartate decarboxylase. 25.00 25.00 29.00 30.30 22.50 19.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.33 0.71 -4.51 163 2477 2012-10-02 17:45:13 2003-04-07 12:59:11 11 4 2416 51 550 1458 373 115.10 52 89.57 CHANGED MhhphLKuKIHRAoVTpA-LpY.GSlTIDp-Ll-AAsIl.hEpVpIhNlsNGpRFpTYsItG-+GSGhIslNGAAARhspsGDhlIIhuYuthscpEh.cs.acPplVhlD.cpNplhc ...........MhpTMLpuKIHRApVTcA-LpY.GSloIDpDLL-AAsIlpsEpVpIhNlsNGpRFpTYsIsGE...RGSGlIslNGAAARhspsGDlVIIhoYuphs.-.-Es..+s..acPcVlah-.t.s.Nchh.c......... 0 193 390 489 +1262 PF00710 Asparaginase Asparaginase Bateman A anon Pfam-B_652 (release 2.1) Domain \N 22.50 22.50 22.60 22.80 22.20 22.40 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.03 0.70 -5.56 157 5341 2009-09-16 23:30:57 2003-04-07 12:59:11 15 27 3806 135 1165 3548 565 305.20 30 90.15 CHANGED clh.......llsTGGTI...........uh.ttsspstthss.......ssptll...ptls....lpph...s.plps...............................................................................................................................................................................cthhs.lsSsshssppWhplupplppth...cs...cGhVlsHGTDThp.TAhhLshhl.ps.s....+PVVlTGu.+sh..........sphsuDu.tNLhsAltlAss............htGVhlshssc.......lhpupcspKsposshsA.F.pSsshsslu.hl...ss..plphhtp.hh.....................................t......................th...h.tthtsp.VsllthasGh.ss....phlcshl..ss.hcGlVlpuhGsGs......sssshhsslpcss.pp.G.......lsVVhsopshpGtVs...hshhusutplt.p...h.GslsusshssppAhltLhhhLupt..hshpcl.p ..........................................................................lhlltTGGTI........................uh..tt.s.s...p.s.....hhs.................tspsll......ptl.s......lp.ph.....s...pl.cs...............................................................................................................................................................................c..p.h...h.p...l.s.SscMss.ppWhplucp..I..pp..th...cp.........hDG....hVIoHGTDThp.TAhhLs..h....hl.....ps.........s..........KP..VV.lTGu.pP............s.p.hsuD.G...hNL.hsA....l...hlAus...........................t.......spG....Vhls.hNs.c............lhpupcs...........o.K.sp..os.sh.s.s.F.pSs..s.hs.sLu.hl........st...clth.h.pssht.........................................................tt..t..........................h.....s..p.......p.h...s.p...Vsllt..hY..sGh.ss................phlcsh........l....pt.......s....h...........cGl....lltuh..GsGN..............hstshhp....t....l.p....p....hh...p.p...G.......................lsV.Vhsop..........shsG.tss........th.....s..s..u..h.tlt..c.................h.Ghl.su...t..s..hssppAhhhLhhhLsp..ths.pth..................................................................................................... 0 339 683 964 +1263 PF01112 Asparaginase_2 Asparaginase Bateman A anon Sarah Teichmann Domain \N 19.40 19.40 19.90 19.50 19.10 19.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.99 0.70 -5.71 7 2653 2012-10-03 21:14:07 2003-04-07 12:59:11 13 21 1641 72 1019 2127 867 259.80 32 88.17 CHANGED sshhIs.lpG..utsslsps......hPh.hphht..hh+pssptuhcsLpuGsssLDsVE.ulchlEsp..chssGhGus.spsGpsph-AslMDGsshcsGAVuslp+l+NPIplARhVM-pTsHlhLsh-uAppFApspGhcp.s.ophtopp.hpchhtsppts....pl..D.osh.................l.ptpp...........hsTlGhVAlDspGplAuuTSTuGhs.KhhGRVGDSPl.GAGsYAschsu.soATGpGEslIRthsuhplsthMchGt...phsptshphlhcthPc.......hssGlIAlstpGcluhshss.uha..h.sAspss.h..Ysp.s...p .......................................................................................t.............................................th.h.......hp...thh.p.........t..u.............p..h..L............p......t......G.........t.................s............A.l................D....s...Vttulphl...E.....sss....hhNuGhG.ushst.............sG.p.h.EhDA......s.l..M.............D..........G........p...........s..........h.......p..s......G............A.V......uu..l..p.pl..+N.Pl.....hA..........+.........h..........l..........h..............c......p...........s...........................H..............s.........h.........h.........sGp.......GA....p.p.a.A.h....t.t.....G..h...t...h....h....s............t.....h...................o...........................thp.ph.t....t....h.ttt................................t....t...............................................................ttp..............................................................hsTV.G.sVA..l.D.t.p.G.pl.A...A....uTSTGGhs.Kh.G.R.VGD............o..P.ll.Gu..G.s.............a........A........s.............s........t............s........s.....s.....o.....s..T...GhGE..hhh+h.....h..h.............u...hp..l...s.t...h...h......c.tt............th.t...ps.s..th.l....p..t..h.t........................stsull..s.l..st.pGp...h...s..hsh.ss......th................................tt....................................................................... 0 291 539 822 +1264 PF04958 AstA Arginine N-succinyltransferase beta subunit Bateman A anon COG3138 Family Arginine N-succinyltransferase EC:2.3.1.109 catalyses the transfer of succinyl-CoA to arginine to produce succinylarginine. This is the first step in arginine catabolism by the arginine succinyltransferase pathway. 29.40 29.40 31.90 36.50 25.30 29.30 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.86 0.70 -6.02 71 1133 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 909 1 204 701 163 330.90 51 97.92 CHANGED MhllRPlptsDlsuLhplAppo...GtGh.TSLPsscchLpp+Is+SppoFtspphspsupps...YlFVLEDocoGcllGssuIcAsVGhsp.PFYsY+lupllHsS......ccLslppphcsLsLssDhTGsoElCoLFLcssaRps..tsGcLLS+uRaLFhApapcRFu-pVlAEhRGlsD..-pG..pSPFW-uLuc+FFsh-FscADhLoGlssKsFIAELMP+aPIYVsLLsp-AQsVIGpVHssTcPAhclLEpEGFphpuYVDIFDuGPTlEAclcsl+oV+pS+thpVp...............tsu....pp..hLluNsphps.........FRAshsphths...s.splhlsscsAcsLplps.GDpVRhlsl ..........MhVlRPlcpuDlsALhpLAs...co...G.sGL.TSLPsNcspLusRIcRuhcoapuch...stuEpG...YlFVLEDoE...TGpVsGlsAIEsAVGLs-.PaYN.YRVuTlVHAS......+EL.sVaNslsTLhLSNDaTGsSELCTLFLcP-aRpp.....sNGhLLSKuRFhFhAuFR-+Fs-+VlAEMRGV.D..EcG..hSPFWpuLG++FFuhDFocADaLs.GsGpKuFIAELMPK.aPIYsphLSpEAQsVIG.pVHPpTtPAh.............s....l..LE+EGFcYcsYlDIFDGGPTLEC-l-clRulRcScllpVs.u.......ps..stsshss...hLVuNp.sacc.........FRssLscscss........sppllLostph-ALcsps.GDpVRlVt.L.......................................................... 0 43 78 145 +1265 PF04996 AstB Succinylarginine dihydrolase Bateman A anon COG3724 Family This enzyme transforms N(2)-succinylglutamate into succinate and glutamate. This is the fifth and last step in arginine catabolism by the arginine succinyltransferase pathway. 20.10 20.10 20.10 38.90 19.60 20.00 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.46 0.70 -5.68 4 825 2012-10-01 20:45:11 2003-04-07 12:59:11 7 3 787 14 131 513 157 429.90 67 99.25 CHANGED M.sAaElNFDGLVGPTHsYuGLShGNlASppNuttVSNPRtAAhQGLsKM+sLhDhGhsQuVLPPptRPssslLRpLGFSGSDEtVLppsA+-A.sLLuAsSSASsMWsANAATVsPSADTADGRVHhTsANLssphHRSLEussTtpsL+tlFsDEp+FAVHusLPtsuphGDEGAANHsRLsucaGssGVplFVaGR...-u..stsuRaPARQThEAspAlARLpGl..spslFAQQssssI-tGsFHNDVluVuNtpVLFtHppAFtcpsplLAEL+A+lsG.....hhlpVspsplulpDsVuSYLFNSQLLShs.DGpMhLllP.EsR-NstsWshlppLhuuDsPIscVKVhDLRQSMpNGGGPACLRLRVsLsEtEhsAlNPAhhhsssLhDsLscWV-RaYRDRLosuDLADPpLLpEuRpALDtLoQlLsLG.shYsFQ.....ss ..............MsAaEVNFDGLVG.THHYAGL..SFGNcAST+HchpVSNPRhAAKQGLlKMK.ALADhGFsQuVlPPHERPhlssLRpLGFoG.SDEQVLcKsARQAPchLSulSSASsMWsANAATVuPSADThDGKVHhTVANLNNKFHRSlEAssTculL+AIFsDEc+FuVHsALPpVAlLGDEGAANHNRLG.....GcYGcsGlQLFVYGR.....c-s..s...c...s...c..PsR.YPARQTREASEAVARLpQlsPppVlFAQQNP-VIDQGVFHNDVIAVSNRpVLFCHppAFs..cQppllspLR....s+lsG......FhuIEVPsspVSVuDAVuTYLFNSQLLSRs..DGSMh...LVlPpECR-csuVWsYLN.c.L.l.A.u.D.sPIs-l+VFDLRESMtNGGGPACLRLRVVLoEcERpAV..NPuVhMND.sLFssLNsWVDRaYRDRLTsADLADPpLLcEuRpALD.LTQlLsLG.SVYPFQ.c.................... 0 24 49 91 +1266 PF04952 AstE_AspA Aste_AspA; Succinylglutamate desuccinylase / Aspartoacylase family Bateman A anon COG2988 & Pfam-B_15640 (release 10.0) Family This family includes Succinylglutamate desuccinylase EC:3.1.-.- that catalyses the fifth and last step in arginine catabolism by the arginine succinyltransferase pathway. The family also include aspartoacylase EC:3.5.1.15 which cleaves acylaspartate into a fatty acid and aspartate. Mutations in Swiss:P45381 lead to Canavan disease disease [1]. This family is probably structurally related to Pfam:PF00246 (Bateman A pers. obs.). 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.67 0.70 -5.53 33 2822 2012-10-02 19:46:12 2003-04-07 12:59:11 9 14 1568 43 782 2774 810 264.10 21 80.56 CHANGED sG.slllouGsHGsEhsGs.llcclhpplsttp....hsuphhllshsNs.Ahpssp......Rhhs........psLN.....................RsFsGctpu............spsc+lAphhhtthh.sps-hslDLHouspuhshh.hshsshtss.h.....hphlpsh..shshsh.hpptsss............sh.phsspphut.ulolElGts..tshstptlsphttsltslLphl.uhhpsp......p.hthhphstpssptpp..u..sGhhchphplsspV-ttshhshshlhsphsscph.hpu...spsshllhhp.tshsttssslhhltpt ...................................................................................................tlhlsuu.lHGsE.hsu....h....t....hlpp.L.l..........p....pl..t.ttp............hh.u......t...h........l.......ll.slsNs.u.hp.ptp...............................................R.a.h.....................................pDhN..............................................R.h.F..s..G.p.ps...........................................s...hpph...s..p.th.......h..p.t......s.........t............p..s.....ch..h.lD...L...Hou.......h.....p..s.......t...h..h....h.....h.s.h.....h.s.h.p..sh.....h...............hth.ht.sh......sh.s..hl....h...ppt..s.ss.......................................s.h.hp.h.....s.....s.....p..p........h.......u......t............u..........h.o.lEhGts.........h.hstpthpt....h...ttsh.ht...hhthh...u.hhpst..................t.....h.h..t.hh.t....h...t...t.......p............................p..h..t..h.......t..hupp...stthp.h..s..l..h..s.....s..t......h.....hhs..........psthllh.p..shht.tsslhh....t......................................................................................................................................................................... 0 179 407 606 +1267 PF03115 Astro_capsid Astrovirus capsid protein precursor Mifsud W anon Pfam-B_2957 (release 6.5) Family This product is encoded by astrovirus ORF2, one of the three astrovirus ORFs (1a, 1b, 2). The 87kD precursor protein undergoes an intracellular cleavage to form a 79kD protein. Subsequently, extracellular trypsin cleavage yields the three proteins forming the infectious virion [1]. 25.00 25.00 28.80 27.90 20.90 20.60 hmmbuild -o /dev/null HMM SEED 787 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.35 0.70 -6.28 11 1664 2009-09-13 08:26:15 2003-04-07 12:59:11 9 6 167 5 0 782 0 200.70 66 92.28 CHANGED MASKSsKQVTVEV..NNNGRsRSKSRsRSQSRGRs+uVKITVNS+....s+uR.RQsGRsKhQSNQRVRpIVNKQLRKQGVTGPKPAICQ+ATATLGTIGSNToGsTEIEACILLNPVLVKDATGSTQFGPVQALGAQYSMWKLKYLNVKLTSMVGASAVNGTVVRlSLNPTSTPSSTSWSGLGARKHLDVTVGKNAlFKLKPSDLGGPRDGWWLTNTNDNASDTLGPSIEIHTLG+TMSSYpNpQFTGGLFLVELuSEWCFTGYAANPNLVNLsK........STDKpVsVTFEGSsGsPLlMsVPEsSHFARhsspRSohsToLuRAGtpooSDTVWQVLNTAVSAAELVTPPPFNWLVKGGWWFVKLIAGRsRoGs....+pFYVYPSYQDALSNKPALCTGuss.uuhRspssVtTTLQFTQMNQPShG+GEoPAThGRulPpP.........G-phpVlhTlussh...uPNssspQsWltKshsuPps........hslKIGpssp+YhshpGFT..........lsuV-WYTs-aQsupcP...........sPIsGlt....VhsssoKKADVYAhpQatsspTNs+pQlTolaLV+V...Tp...............sapVssahshhaptussss......ssslplRssTtssslpFppGpWYLlTsTsl+s.ussssGWlW.shELtsspsYhhDpshsHlIsP.Pssoplha-hhTulPpspsth.shppcss......................ss.pss-s.......hss-ETDs-sEosEDEsD..ElD....RFDL+soSsSEsED...ENsRVTLLsTLlNQGholpRAT+IopRAaPT.....s-+h+RuVahDLLssGlsPuuAWSHACcpARhhuh....pp..phS.GSRGHAE ............................................................................................................................................................................................................................................................................................................................................QRATATLGTl.GSNTSGoTEIEACILLNPVLVKDATGSTQFGPVQALGAQYSMWKLKYLNVKLTSMVGASAVNGTVlRVSLNP.TS.T.P.S.S.T.SWSGLGARKHLDVTVGKNAsFKLKPSD............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +1268 PF04377 ATE_C Arginine-tRNA-protein transferase, C terminus Kerrison ND anon COG2935 Family This family represents the C terminal region of the enzyme arginine-tRNA-protein transferase (EC 2.3.2.8), which catalyses the post-translational conjugation of arginine to the N terminus of a protein. In eukaryotes, this functions as part of the N-end rule pathway of protein degradation by conjugating a destabilising amino acid to the amino terminal aspartate or glutamate of a protein, targeting the protein for ubiquitin-dependent proteolysis. N terminal cysteine is sometimes modified [1]. 25.00 25.00 25.30 25.40 24.50 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.53 0.71 -4.08 166 1296 2012-10-02 22:59:21 2003-04-07 12:59:11 10 12 1200 0 497 1090 425 134.00 35 43.75 CHANGED pEpasLYp+YhpsR..HscGsMtsss...h...........ppappF...lpso.l.............................................................................................sohhlEap.............tsscLluVulsD...lLscGl.SuVYoF..aDP-..h.pptS.LGoaslLppIphucc............hsLsYlYLGYaIcsstKMsYKspapPhEl....LssspW ....................................................................................................................................................EpasLYp+Y.ptRH.tcushp..s.....t..............ppa.p.p.F...lps.s.h..l.............................................................................................s.o.hhlcah....................hss..cLlAV.ulhD...lLs.....c......u.....lSuVYoF.............a..D..P..D....h.pp..h.S.LGsaulL.p.pIphu+p................hsLtalYLGYaIcsstKMsYK.spapPtEhLsspt............. 0 143 288 400 +1269 PF04376 ATE_N Arginine-tRNA-protein transferase, N terminus Kerrison ND anon COG2935 Family This family represents the N terminal region of the enzyme arginine-tRNA-protein transferase (EC 2.3.2.8), which catalyses the post-translational conjugation of arginine to the N terminus of a protein. In eukaryotes, this functions as part of the N-end rule pathway of protein degradation by conjugating a de-stabilising amino acid to the amino terminal aspartate or glutamate of a protein, targeting the protein for ubiquitin-dependent proteolysis. N terminal cysteine is sometimes modified [1]. In S cerevisiae, Cys20, 23, 94 and/or 95 are thought to be important for activity [2]. Of these, only Cys 94 appears to be completely conserved in this family. 21.10 21.10 21.10 21.50 20.80 21.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.91 0.72 -4.17 128 1240 2012-10-02 22:59:21 2003-04-07 12:59:11 8 8 1160 0 466 1031 317 77.30 35 25.38 CHANGED pahhTsshsCuYLss................................+pppphhst...stthssphYspLhptGFRRSGshhY+PpC...psCsAChslRlssspFpso+oQ.+R ........................hhTsspsCuYLts...........................................cp.tcp.hst.........spt.......hs........sphYspLhptGF.R.RSGshhYRPtC....csCp.........uChslRlssscFpso+oQ+R.............. 1 131 266 373 +1270 PF03078 ATHILA ATHILA ORF-1 family Mifsud W anon Pfam-B_2240 (release 6.4) Family ATHILA is a group of Arabidopsis thaliana retrotransposons [1] belonging to the Ty3/gypsy family of the long terminal repeat (LTR) class of eukaryotic retrotransposons[2,3]. The central region of ATHILA retrotransposons contains two or three open reading frames (ORFs). This family represents the ORF1 product. The function of ORF1 is unknown. 19.50 19.50 20.20 20.50 19.30 19.00 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.52 0.70 -5.90 11 113 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 9 0 38 110 0 201.90 16 53.65 CHANGED p++tElARGKRshps...........c.pl.pE-h-...-..stsst+tpph.p+tcc.TspEYh+hhp..cFhuTRYPp.pThtpLGlhcDVpalhppspLpshhuh.asuaccEolpFLSTLclcha....s.th-.cG.lGalpFhVhshpYplsIppLpphaGFPotpuhp..casp-ELpsLWhpIuushPhs.u+SKSs.IRsPVIRYhppslANshasRcpTGTVoNs-hEMID.ALptlL....RcTKsGpslcGDh.NDsshohlLL.HLhuY+paAhs.ppp.htGuLChGGllT.ILhussVslts.sh.P+hhDhcaLptpc.hphthssDhhha+Fpcs..t.u.hhLPs.chTpIhptpNI-FpPphphL...............s...ppp..hhptttpppl...-tpscpcph-...h-Ts.aHFpEah.Psp.pu+sluEsH+p.uhLp+WsphQcKlltphhKtl+hhpptlpshussp ..........................................................................................................................................................................................................................................................................................................................................h...................................................................................................................................................................................................................................................................................................................h............................................................................. 0 3 3 6 +1271 PF03477 ATP-cone ATP cone domain Aravind L anon Aravind L Domain \N 21.50 21.50 21.50 21.50 21.40 21.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.95 0.72 -3.55 133 9433 2009-01-15 18:05:59 2003-04-07 12:59:11 11 53 4693 82 2010 5803 3748 89.30 26 18.76 CHANGED hhVlKRDG.pttsFchpKItpulpcAs.................t.h.....stp.p.spplspplhpp..lps...t....tl.ssccIp-hVpppLh.................cts.....thshs+thlh..YRph+ct ..........................hVlKRDG.p.p.psFst-KItpulh+As...........................................tt...ps.l..........sps.p...lp....p.l..sp.clp.pp....lps......t...............pl.sspcItphVhcpLh......................chs.....ph.s..hl+ht.sYRph+c.h............................................... 1 650 1237 1685 +1272 PF02222 ATP-grasp ATP-grasp domain Bateman A anon Pfam-B_157 (release 5.2) Family This family does not contain all known ATP-grasp domain members. This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.12 0.71 -4.91 23 5314 2012-10-10 13:17:02 2003-04-07 12:59:11 17 15 3695 55 1212 22317 9135 171.00 35 44.36 CHANGED l.pphGlssscahhlcsts...plppssppl....GhPsVlKtpphuYDG+Gphll+sps-lppuhcth......tstslllEcalsFctElollslR..shcGt..shhaPslcslpccGlh+hshsPAthstt...htppApplAc.clscpLsssGlauVEhFlh...sGc......lllNElAPRPHNSGHaTlsus ..........................................................................................................................h.cchuls.s..u...s...a...t..h.....l......s...o....t.p.........-.l..p....p.....s.....h....t......p....l...............G..h....P..s.llK.t.s.h.h.G.Y.D.G..+....G......Q....h...h...l.......+........s...t..p..-....l.p..t..u..h.phh.............................ssspsl.l.E.p.h..l.............s...............F...c.........h..................E............l..o..l.......l........s......s...R.................sss...G......p..............s......t........h.......a.......P.................s..............s......c............s........h.............p...........c...........s............G.............l.............h................p..........p..................S...........h.............s.................P......s....p.....h......s....s.t...................ht.p...p.A.p.p...h.Ap....pl.....s......p.....t.....L................s.........h.....s...G......l....h....u....V..E.h.Fls........p.G.s....................lllNElAPRPHNSGHaTlpu...................................................................................................... 0 320 712 992 +1273 PF00217 ATP-gua_Ptrans ATP:guanido phosphotransferase, C-terminal catalytic domain Finn RD, Griffiths-Jones SR anon Prosite Domain The substrate binding site is located in the cleft between N and C-terminal domains, but most of the catalytic residues are found in the larger C-terminal domain. 21.90 21.90 23.10 23.00 21.80 21.40 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.44 0.70 -5.05 115 3578 2012-10-02 17:21:26 2003-04-07 12:59:11 14 21 2750 104 545 2666 91 185.20 48 68.74 CHANGED s.-s.chVlSoRlRsuRslcGhshPPph.ocp-pcplEphlpsALssl.....suchp.GpYasLss.....MostppppLl-cH......aLFpc..ss.hh.suthscsWPtuRGIahNcs+sFllWlNEEDHlRlISMppG...uslppVacRhspulptlE........cp.lcFuas-+LGYLToCPoNLGTGLRASVHl+LPtLuts....sc.hpphhpplpLphRGhtGEsoc.uhG...slaDISNppRLGhoEh-lVppl.hsGVpplIchE+ph ..............................................................................................sscallSoRVRsGRSlcGasFsPsh.T-t.p.h+E.hEpcV.sssLsuL.............cG-LK...Gsa..Y..P..Lou...........Moccs.Q..QpL.IDDH..............FLF...cE.......ss.hLp...s...A...sh.sRhWPsuRGIa..HN......-..s+....ohh.VhhNEEDHlRl.uMp.G....................hshtp...sapphsph...ptl-.........cth.a..a.pc....plGalh.....hCPoNlGTshRAuVhl...+l...PtLshp......tch.p.hh..t.ph...t......h..thR...Ghh......G....ts..........t................................................................................................................. 0 243 305 418 +1274 PF02807 ATP-gua_PtransN ATP:guanido phosphotransferase, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The N-terminal domain has an all-alpha fold. 25.70 25.70 25.70 26.10 25.60 25.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.48 0.72 -3.83 91 1908 2009-01-15 18:05:59 2003-04-07 12:59:11 10 20 1186 104 380 1509 23 67.60 62 24.46 CHANGED schsp...ppslhpKaLT.-lapcL+sKcT..shGsTLscsIpoGl-Nss.....psVGlhAuDtEuYp..lFu-LFDslIc-hHsGa .................................ppshhtphL..T...-l..acpL+s.+.pT..shGsTLhDVIQoGl-N.s........................psVGlYAs.DAEuYo..VFA-LFDPII-DYHuGF.................. 0 158 187 276 +1275 PF01747 ATP-sulfurylase ATP-sulfurylase Bashton M, Bateman A anon Pfam-B_494 (release 4.2) Domain This domain is the catalytic domain of ATP-sulfurylase or sulfate adenylyltransferase EC:2.7.7.4 some of which are part of a bifunctional polypeptide chain associated with adenosyl phosphosulphate (APS) kinase Pfam:PF01583. Both enzymes are required for PAPS (phosphoadenosine-phosphosulfate) synthesis from inorganic sulphate [2]. ATP sulfurylase catalyses the synthesis of adenosine-phosphosulfate APS from ATP and inorganic sulphate [1]. 23.40 23.40 24.40 23.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.33 0.70 -5.03 113 1308 2012-10-02 18:00:56 2003-04-07 12:59:11 12 15 1032 36 520 1170 549 214.80 37 43.51 CHANGED asphpho....PtEhRphF.pc.pGWc..pVlAFQT.RNPhHRu.HEhLt+tAhct.h..s........ulLlpP.LlGtpKssDlssclRhcsacsllcp.Yh................PpspshluhhPhsM..RYAGPREAlhHAlhRpNaGsTHaIVGRDHAGlG..........s..aYGsa-AQclhcp..............ts-L..sIp.l.hcpshY.spcssphssh..........cssscspcp+.lplSGT...........clR.chLppGttsPshFsRPEVsclLhch.h .........................sphphsPtEhRppF....cc....hshc....sVsAFQo.RNPlHRu.HchLhppAhct..h...s.........sLllpP.LlG.TKssDlPtcsRhcsacs.ll..cp.Yh................stspshlulhPssM..+....YAGP+EulaHAllR+Nh.GsTHaI.VGRDHAGsG...........DaYssacApclhsp.............hss-l..sIphl.FcpshY.sppspthhs...........cpsscs...pcpc..lhlSGT...........clR.chLRsGttPPshF.pPEVsclLhc.................................. 0 179 315 435 +1276 PF00231 ATP-synt ATP synthase Finn RD anon Prosite Domain \N 22.10 22.10 22.40 22.40 21.70 22.00 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.76 0.70 -5.06 161 5197 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 4653 59 1266 3418 3462 281.00 34 97.64 CHANGED AsL+-l+pRIpSlpsTpKIT+AMchV.AuuKl++Apcphpss+PYsp.plppllspl...htt.................................t.t..pshhttc.....pspcs.......hllllooD+GLCGuaNssll+tstphlpphpspt....ph...plhslGcKutsahp..p.h.............shp.lht.........phssls.cssshppsp.tlsppllpta...hpt..p.................................hDc..lhllYscF.hsslsQpspspplLP...............................................ttpttttttpsph.aEP..sscplLssLlspYlpsplapullEuhASEpuARMsAMcsATcNAs-hlpcLslpYN+sRQAuITpElhEIl.uGApAL ................................................................sh+-I+s+ItSlpsTpKIT+AMcMV.uuuKl++upcphpsu+PYup.phppllspl..sps..t.............................ph.p..pP.hl.ppR............psK+s.....uhlVloo..DRGLCGGaNsNlhKp...sh.ph..hp..p.hp...s.ps...............hp.h.pl.hslGpKGh..saap....p..t.............................uhp...lht..........phsslu....-pP..sh.p.cst..sl..s..pp.hlpta.....pps...p....................................hD....c...lallYs+F..l..s..s.h..oQpsphpplLPl....................................................t..t.s..p..t.tp..p.tph.sY...aE.P....s..s..cp...lLcsLLspYlpu.lYtull-uhASEpuARMsAMc........sATDNA......s...clIpcLsl.YN+uRQAuITpEloEIVuGAsAl......................................................................... 1 435 822 1071 +1277 PF05176 ATP-synt_10 ATP10 protein Wood V anon Wood V Family ATP 10 is essential for the assembly of a functional mitochondrial ATPase complex [1]. 21.90 21.90 23.80 24.70 21.70 21.10 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.42 0.70 -5.11 23 186 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 169 0 137 174 20 237.20 27 80.37 CHANGED puh...........p..p....sspstptch...htLs+PlGh..PPp.s..pphs-spoLpp..htpDhhsh-K+tcRpKELst-hu+uhFt-hpsh+pppGKhFhuPsphFKpDKALYFPsht.....G+TL..............sus.....pssp.......shLcGKlSlVplFSoshuEppspoahp.....sshp-hhpps.t.........psQll-INl.-shl......KuhLl+lh.hsph+ppl.PtppapcY..Flsp.cs.hs.sl+Eplth.NshsGYlYLlDcps+IRWAuoGsAspcEhcsLh+sV+tL ..................................................hht...................t......l.+PlGh..sP..............s...pph..ptpohpt...........-hhs.-+....p..p+pccL..........t.....t...c.hs+.s...hat-hpsh.ph.p.pGKhFhuss.pha+t-puLaFPslh..............GpoL.............................ssp.......tshs...........shl..p...G.K.sSlVslao..o.hh.u-p.spoahp..........shhpphhpps......................tsQ.hlclNh.-shl......KuhLl+hh.hssh+..ppl..Ptppasp...Y..alsp.....ps.h...s.plR-plthhNshsGYlaLlDpps+IRWAusG.Aps..pEhcsLhpssptL.................... 0 41 82 117 +1278 PF00895 ATP-synt_8 ATP synthase protein 8 Bateman A anon Pfam-B_446 (release 3.0) Family \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.02 0.72 -3.76 66 6876 2012-10-02 21:03:42 2003-04-07 12:59:11 15 1 4964 0 41 6086 109 52.90 39 95.77 CHANGED MPQLssss......Wahhllhhhlslhllh.hKlhsah...ssss......sppspppppssPWpa.W ....MPQLNPsP.......WFhIhlho..W...Ls.ah.hl.l..sKlhsas.....sNsP...........ssp...s.....s.pp....p.s.ps..W..sWPW................................. 0 8 12 21 +1279 PF00119 ATP-synt_A ATP synthase A chain Sonnhammer ELL anon Prosite Domain \N 21.70 21.70 21.70 21.70 21.40 21.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.44 0.70 -4.84 30 17567 2009-01-15 18:05:59 2003-04-07 12:59:11 15 12 11908 3 1063 15216 2921 204.40 33 91.02 CHANGED hslshpslhhhlhshllhhhhhhh..p......hPsphpshhphlhchlpshlpsphstps...httahslhholFlalhhsNhhG.LlPasFs................sTuclslThuLAlslaluhhlhGlppcshsha..phhls.hs...hlPhhl.hlEllS.hs+PloLulRLhuNlhAGcllltLlushh...............hhullshhlh.........lhhhhh-lhluhIQAalFshLshlYls- .......................................................h........h...h.h.l.h.h.h.l..l...h.h........h....h....h....h...................................t......h..........lss...R....h.p.s..l.p.hhl.phh.s.p.p....l.h.t.s.l.s.tpu...........c.p.au.hh.......l..hoLhlFll...hhNhLG...L.l..P.Y....s...F.T................................................................................PT.opLu.hshuL.Als.l.W.lus.ll.hG.h+.p...p.s.....tts.h.......u+h..l....P.....p.......G......o........P..............h....................L....l.....P.......h....L....l...l......IE.sl.S.h.h.h.RPluLulRLhANlhAGHL.L.lp.Llushshs........................................h..h.h.u.l.l..s.h.hlh...............hhl.shLEl.hVuhIQAY.V.FslLhslYlp................................... 0 343 689 893 +1280 PF00006 ATP-synt_ab ATP synthase alpha/beta family, nucleotide-binding domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family This family includes the ATP synthase alpha and beta subunits, the ATP synthase associated with flagella and the termination factor Rho. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null --hand HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.34 0.70 -4.84 157 33595 2012-10-05 12:31:08 2003-04-07 12:59:11 20 61 14564 479 5203 24829 10876 206.20 38 47.96 CHANGED TGl+sIDshhPlu+GQRhsIhussGsGKosLhtplspss....................p...ssss....lhsh..........................................................................................................................................................................................................................................................................................................................................................................................................................lGERscElp-hhcph..............hsp........tsh.......................p+.olllsususcss...ht+htushsu.hslAEaapp.pGp.cVLl.....lhDslTRaApAhREluhthuc..sPupcGYPssla...shlsclhERAGph...................................ttGSlTul.s...lhstusDh.s-PlsssshulhDGplhL..s+cLsppuha.PAIDl....htShS .....................................................................................TGlKslD.h.l.s..P..h.u+G.t.+.hs.lh.G..ss.GsGKTs.lhh.-.lIpNh.................................s...psts..........sVas.u..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lGE.Rsc..Es.s....-lh.pch..................................ccs.......ss.h............................pcsslVhu....p..hs-P.s.......stRh.psu.hou............loh.A..EYFR............D......p............u..........p............D............V.Ll.....h.....hDsl....hRaspAhpElS....hl...Ls.....R...Pu.thGY.sslh.....tp.h.upL.ERhsps..........................................................ptGSlTul.s...lh.s.....us.Dh..ocsh.P..ssshuhh......D..u..phhL..s+pLt..tpGlaPAl.sshtSsS.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 1807 3311 4366 +1281 PF00430 ATP-synt_B ATP synthase B/B' CF(0) Finn RD anon Pfam-B_137 (release 1.0) Family Part of the CF(0) (base unit) of the ATP synthase. The base unit is thought to translocate protons through membrane (inner membrane in mitochondria, thylakoid membrane in plants, cytoplasmic membrane in bacteria). The B subunits are thought to interact with the stalk of the CF(1) subunits. This domain should not be confused with the ab CF(1) proteins (in the head of the ATP synthase) which are found in Pfam:PF00006 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.33 0.71 -4.35 27 5963 2012-10-02 21:03:42 2003-04-07 12:59:11 13 9 4775 3 1260 3817 3365 130.10 24 75.06 CHANGED sl.htllshhlllhlLhhhhhpsLtplLcpRcptItsslppAEcphpputphhtctcppLppA+tcApcllppApppupphtpphlspuppchcplhpsAtsclpppcppuhppL+pplssLulphspphlp ................................hhhphlsFl...l.ll.h.l.l.h..+..a..s....a....s.....s.........l.h.sh...l-cRppcIt.sslssA-cs.+.pc.u.p...th.t.pctpppLppA+tE.Ap....pIl......pp....Appcup....ph....hcp...hh....s....p....A...pp....-.tpc....l....h....ppA....ptc....Ip.tp....+pcAhppl+pp....V.upLulthAp+ll............................... 0 418 853 1083 +1282 PF00137 ATP-synt_C ATP synthase subunit C Sonnhammer ELL anon Prosite Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.05 0.72 -4.07 167 10204 2009-09-13 08:00:21 2003-04-07 12:59:11 16 24 5907 292 3091 5928 2123 66.90 28 77.67 CHANGED hshl.GAGluhuluslGuulGhGhsuuushpuhu+pPc....lhsphllshuhsEshulauLllulll..lhs .................thluA.uluhu.luulG.uulGhGhsuu.s..hlpu.....sARpP-.........................lhsphllshuhsEulul.a......ulllulllh...................... 2 1106 1902 2580 +1283 PF01813 ATP-synt_D ATP synthase subunit D Bashton M, Bateman A anon Pfam-B_1304 (release 4.2) Family This is a family of subunit D form various ATP synthases including V-type H+ transporting and Na+ dependent. Subunit D is suggested to be an integral part of the catalytic sector of the V-ATPase [2]. 22.30 22.30 22.30 22.40 22.00 22.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.17 0.71 -4.75 114 1524 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 1253 6 536 1171 151 187.10 30 85.70 CHANGED hpLhphKp+LphAp+GapLLKcKc-sLhhchhpllcchpch+pplpcth.pcAhtslshAph....utthhptlt.sstp..phplchppcNl.hGVplPhhchtp....................................ht.tsauhhssuspl-cuhcpapcslchllclAplEpshttLsp-lcpTpRRVNALEpllIPchcssl+aIphpL-EpEREphhRLKh.lKpp ....................pLtthKp+LpsAp+G+pLLKcKp-tLh.....hc......Fhpll.cchpch+pphpphh.ppuhtsh.shAph.....hutthhpplht.spp...phplchppcNl..hGVplPhhchhp.............................................................tts.h.au.h.hpsspplcc.shpphpchlphllcLAplc.....pshhhLscpIcpTpRRVNAL..............Ep..lh.IPphcpTlpaIphcL-EtERpphhRLK+lKp........................ 0 199 334 452 +1284 PF00401 ATP-synt_DE ATP synthase, Delta/Epsilon chain, long alpha-helix domain Finn RD, Griffiths-Jones SR, Kerrison ND anon Pfam-B_114 (release 1.0) Domain Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. This subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (Pfam:PF00213). 24.60 24.60 24.70 24.70 24.50 24.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.35 0.72 -4.08 179 3444 2009-12-15 10:59:44 2003-04-07 12:59:11 15 6 3304 16 491 1953 627 48.40 37 35.06 CHANGED c-IDhpcAppAhccA-ppLtpt..ps.ct....-hhcAphsLp+AhARLcshpth ........pDID.p+ApcAKc+AEpplppu....cu.ch...........-htcAphA.LpRAhsRlcshph.t................ 0 154 303 405 +1285 PF02823 ATP-synt_DE_N ATP synthase, Delta/Epsilon chain, beta-sandwich domain Finn RD, Griffiths-Jones SR, Kerrison ND anon Pfam-B_114 (release 1.0) Domain Part of the ATP synthase CF(1). These subunits are part of the head unit of the ATP synthase. The subunit is called epsilon in bacteria and delta in mitochondria. In bacteria the delta (D) subunit is equivalent to the mitochondrial Oligomycin sensitive subunit, OSCP (Pfam:PF00213). 20.10 20.10 20.60 20.30 19.50 18.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.29 0.72 -4.22 327 5637 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 5213 39 1228 3611 2337 79.90 32 59.02 CHANGED lplclVoP-chlassp.lc.lhssuspG-hGlLssHsPllosLc.s.Ghlclp..ts...u......pp........phh.hlsGGhh-.Vp..s.splolLu-sA ............................hplplVoP.-.p.h.lasuc..Vc.lhls.....s..s.p.G.-lGlLPsHsPlloslc.s....G.h.l+lp....ts.....s...................pp.......................chl.hls.G.Ghl-lp..s.sp.l.o.lLA-sA...................................................... 0 402 786 1040 +1286 PF04627 ATP-synt_Eps ATP-synt_E; Mitochondrial ATP synthase epsilon chain Kerrison ND anon DOMO:DM04624; Family This family constitutes the mitochondrial ATP synthase epsilon subunit. This is not to be confused with the bacterial epsilon subunit, which is homologous to the mitochondrial delta subunit (Pfam:PF00401 and Pfam:PF02823) The epsilon subunit is located in the extrinsic membrane section F1, which is the catalytic site of ATP synthesis. The epsilon subunit was not well ordered in the crystal structure of bovine F1 [1], but it is known to be located in the stalk region of F1 [2]. E subunit is thought to be involved in the regulation of ATP synthase, since a null mutation increased oligomycin sensitivity and decreased inhibition by inhibitor protein IF1 [2]. 20.50 20.50 21.80 20.90 18.40 18.40 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.35 0.72 -4.41 30 309 2009-09-10 16:25:20 2003-04-07 12:59:11 8 6 257 30 199 292 1 49.00 38 53.74 CHANGED huuWRtAGloY.pYssIAA+sVRcuLKp-h+ssAt+Rsp.scl...............KascW.psG ................shWR.tAGloYh+YsslsAcsVRpuLKpph+spAt+Rst.spl...............+hscWpsG.............................. 0 63 113 163 +1287 PF01990 ATP-synt_F ATP synthase (F/14-kDa) subunit Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes 14-kDa subunit from vATPases [1], which is in the peripheral catalytic part of the complex [2]. The family also includes archaebacterial ATP synthase subunit F [3]. 23.50 23.50 23.50 23.50 23.40 23.20 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.29 0.72 -3.60 100 1179 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 1012 17 454 861 109 94.90 28 86.32 CHANGED luVIGDc.Do.lsGFtLuGlsph.sh...........................ssc-lccslcchh.pcp-luIlllspclu......cc.lcctlcph..pp.....shPsll..pIPspptshshttss..lpt.lc+ ..................................................luVIGDc......Do.lhGFpLhGlsthsst...........................stp-hccslcchs..............c............p-h.....u.........lIhlopplA..............pt.lpcplcpa..p.p.....shPs.ll...IPo+pt..sh.shuhsp..l.p.lc.................................... 0 171 280 387 +1288 PF04911 ATP-synt_J ATP synthase j chain Wood V, Bateman A anon Wood V Family \N 20.70 20.70 23.30 22.70 20.20 19.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.74 0.72 -4.51 18 106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 102 0 80 91 0 52.30 45 78.02 CHANGED +FPTPVlKPhhPFFsuGsllhYGlsphAss.hsocEFhNDPRNPpht..s.pttH ...KaPsP...lhKPhhPFFsAGhllhYGlsphtsAhhsocEapNDPRNPpht.tt.t...t................ 0 23 47 70 +1289 PF02038 ATP1G1_PLM_MAT8 ATP1G1/PLM/MAT8 family Mian N, Bateman A anon IPR000272 Family \N 19.80 19.80 20.80 20.10 19.60 18.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.47 0.72 -4.95 23 266 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 47 12 108 287 0 49.20 42 44.01 CHANGED scsp..-PFaYDYcoLRlGGLlhAulLFllGIlIlLS..t+C+C.phsQcp+phs ..........-c..-PFhY....DYpoLRlGGLlhAulLFllGI.lllLS......t.+.C....+C.phspp.pt..s................. 1 9 11 27 +1290 PF03899 ATP_synt_I ATP synthase I chain Finn RD anon DOMO:DM04622; Family \N 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.35 0.72 -3.84 154 2035 2012-10-01 19:47:47 2003-04-07 12:59:11 10 1 1796 0 377 1164 184 94.90 20 74.16 CHANGED +lhhhphhlhh.lhsls...hhlh...........shp...huhu.....hlhGuhlullshhhhshps...httthst...pphht..........hhhG.thh+hhlshhhhhlshth...ttlph.lslhlGhh ..........................hhhphhlhh..lhslh...hhlh..............tht...hslu.....hllGshsuhlsshlhthhs....hhtt...thst....tpshh..........hhhG.phh+..h.h.hsllhhl.lshth...hphsh.lslhlGh.................... 0 107 223 309 +1291 PF03154 Atrophin-1 Atrophin-1 family Mifsud W anon Pfam-B_3427 (release 6.5) Family Atrophin-1 is the protein product of the dentatorubral-pallidoluysian atrophy (DRPLA) gene. DRPLA OMIM:125370 is a progressive neurodegenerative disorder. It is caused by the expansion of a CAG repeat in the DRPLA gene on chromosome 12p. This results in an extended polyglutamine region in atrophin-1, that is thought to confer toxicity to the protein, possibly through altering its interactions with other proteins [1,2]. The expansion of a CAG repeat is also the underlying defect in six other neurodegenerative disorders, including Huntington's disease. One interaction of expanded polyglutamine repeats that is thought to be pathogenic is that with the short glutamine repeat in the transcriptional coactivator CREB binding protein, CBP. This interaction draws CBP away from its usual nuclear location to the expanded polyglutamine repeat protein aggregates that are characteristic of the polyglutamine neurodegenerative disorders. This interferes with CBP-mediated transcription and causes cytotoxicity [2]. 26.00 26.00 26.20 26.60 25.30 25.80 hmmbuild -o /dev/null HMM SEED 982 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.13 0.70 -13.94 0.70 -6.41 4 230 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 80 0 118 188 2 495.60 34 60.53 CHANGED GKHSMRTRRsRGSMSTLRSGRKKQTsSPDGRsSPsNEDlRSSGRsSPSAAST....SSsDSKsEShKKssK...KIKEEAsSPhKosKR.REKsASDsEEs-RsouKKoKTQ....ElS+PsSPS..EGEuEGEGE.SSDSRSlN-EGSSDPKDIDQDNRSSSPSIPSPQD.NESDSDSSA.......QQQh.QsQtsPul.sPPusususus..........Ps.sPSusslPPQsSPsuupPss.s....tssshoLl.pAPoLHPpRLPSPHPPlpP.su..utssspPssPs..tPpusHHGPhPPhPHsLQs....uPl.L.aP..sPPQP.uL....upt.sP...sPupA..+o..t..suQssht.PpQPP...REQPLPPAP.uMPHIKPPPTTPIPQ.ss.QSHKHPsHlpuPoPF.pMsSNLPPPPALKPLSSLPTHHPPSAHPPPLQLMPQuQ.LpossAQPPVLTQSQSLPspuSspPpo............uh.phPsQssFspHPFhsushPuIsPPsssssShsussPsuSu..pPssS...shsuSsss......usssssLPsIQIKEEsLDEsEEPESPPPPPRSPSPEPTVVssPSHASQSARFYKHLDRGYNSCARTDhYFhPLuuSKLAKKREEAhEKAKREAEQKsREEREREKE+EK..EREREREREAERAA.............................KASSSuHEuRhu-sQLuussHhRsSFEsPPTTIAAVPPYIGPDTPALRTLSEYARPHVMSPTNRNHPFaVsLNPsDPLLAYHMPGLYNsDPolRERELREREhREREIRERELRERMKPGFEVKPPELDsLHPSsNPMEHFARHGAlTlPshAG.PHPFAuFHPGLNPLERERLALAGPQLRPEMSYPERLAAERlHAERMASlusDPlARLQMFNVTPHHHQHSHIHSHLHLHQQDPLHQ........................GSuuP.HPL.VDPLsAGPHLARFPYPsGsIPNPLLGQ.PHEHEMLRHPVFGssYPR-L.suI.P.MSAAHQLQAMHAQSsELQRhAMEQQWLHGH.HhHGG.LPSQEDYYphhhppus+pL ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hS....P..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 12 22 62 +1292 PF03769 Attacin_C Attacin, C-terminal region Finn RD anon Pfam-B_2791 (release 7.0) Family This family includes attacin, sarcotoxin and diptericin. All members of this family are insect antibacterial proteins which are induced by the fat body and subsequently released into secreted into the hemolymph where they act synergistically to kill the invading microorganism [1]. 28.80 28.80 29.70 28.90 26.90 28.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.28 0.71 -3.94 24 176 2009-09-10 15:59:06 2003-04-07 12:59:11 10 3 48 0 58 196 0 106.40 31 59.05 CHANGED hssa..GsplsssupsNlFpsssHcLsAsAFto+shss...ps.pFsphGGGlsY.atsthGAohusu+pshhs.sshulsG+hNlapSss..oSLDhsuGas+ahussacsSphp.shGhsho+pF ......................................thts.hptthpsslhps.t.cslssps.a..topsh.t.......app.GuGlsh.hh.sthuAuhusuphsshs.pplslsG+sNLWpSssttopLDhsGuhu+ahuGPassptssausGhshoapF.......... 0 12 17 40 +1293 PF03472 Autoind_bind Autoinducer binding domain Bateman A, Holden M anon Bateman A Domain This domain is found a a large family of transcriptional regulators. This domain specifically binds to autoinducer molecules. 21.00 21.00 21.00 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.69 0.71 -4.91 188 2219 2012-10-02 14:34:25 2003-04-07 12:59:11 10 9 1181 48 438 1549 74 144.40 20 58.87 CHANGED shpp.lhphlpphhpthGacp.hsas.......s.ttstsshhlhs.saPs.sWhp...pYhppsahthDPllptsh...psht.P.hhWsch.ht...........stttp..phhppAppaGl.psGholPl+sssuph.uh..l.ol..stsppshs.hp.....pttplphluhhhapph .................................................t..pplhp.lpthspphuachhshs...............hhhshshstschh..hhs..sa.P.t..sWhp.......tYppp.sahtlDPllphst....psps..s..hhWscthhptt......................tlhp.t.A.p.p.a.....G..l..ppGl...ohsl...h...s...ss..ss....h..uh..L.Sh....upsstp.ssht......hphphphls..h....h................................................................. 0 65 185 288 +1294 PF00765 Autoind_synth Autoinducer synthetase Bateman A anon Pfam-B_881 (release 2.1) Family \N 20.10 20.10 20.20 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -5.04 13 807 2012-10-02 22:59:21 2003-04-07 12:59:11 12 2 478 5 184 757 67 174.20 26 85.33 CHANGED sashhscs+..-ElFsLRKcTFKDRLcWsVpCpsGMEFDpYDNpsTTYllGlp-.spVlCSlRFI-s+hPNMIT.GTFtsaFsclslPc.GsalESSRFFVD+u.Rs+sllG.sptPlothLFLuMINYA+spGYcGIhTIVS+sMhpILKRSG.WpIsllppGhupcpEplYLlaLPsDc-spctLh .......................................pphhph...Rpcl....F..h-....+Ls.W...c....l..s...s.....t.....s...u...h..E.hDpaD.s.s.s.sh..Yl.l...u.h.s..c.....up.lhGs.sRLL.......P..T.......s.....t......P.....h.....h.....L.......p....ss........F........s...p.......L...h.......t.......s........h........s.......h.....P....p.......s..s.s...lWEhSR.F.s...l..stt...h........s.......tt.............h..................s...s....t.........t...................................hs...h..h....lhhu.hlpauhppGhppllsVs.s.hs.h.t+lh+RhG.hthphlG..s...........t..t..p..hhhhhh.hs........h............................................................................................................................ 1 22 84 121 +1295 PF03987 Autophagy_act_C Autophagy_C; Autophagocytosis associated protein, active-site domain Finn RD, Wood V, Coggill PC anon Pfam-B_10019 (release 7.3) Domain Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the vacuole. The cysteine residue within the HPC motif is the putative active-site residue for recognition of the Apg5 subunit of the autophagosome complex [2]. 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.04 0.72 -3.79 57 558 2009-09-11 22:59:56 2003-04-07 12:59:11 10 8 307 1 391 549 6 65.50 35 23.68 CHANGED hIsYsphYpsPplaltGas.........p.st.psLs.ppha...-Dlsscatp+T....................lTh-paPh...............hsh...hhslHPC+ .............................aIsYsp..hYpsP+LalhGas........................p..st...pPLsscp...h...h......ED......l..s...t-atp+T............................................lT.h-pHPh.....................hshs....hholHPC+................................. 0 124 202 314 +1296 PF03986 Autophagy_N Autophagocytosis associated protein (Atg3), N-terminal domain Finn RD anon Pfam-B_10019 (release 7.3) Domain Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the lysosome/vacuole. Atg3 is a ubiquitin like modifier that is topologically similar to the canonical E2 enzyme [3]. It catalyses the conjugation of Atg8 and phosphatidylethanolamine [4]. 20.30 20.30 22.40 25.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.05 0.71 -4.43 29 337 2009-09-11 23:00:31 2003-04-07 12:59:11 8 5 293 3 233 333 4 156.20 35 49.50 CHANGED tl+ush.sltEaLTPVh+pSpF+pTG.ITPEEFVtAGDaLVa+hPTWpWu.susps+h+saLPtDKQFLlTRsVPCa+Rspphtts......ttt-cllc-p-t...........D..-GWVcTpthssttt.............psschcsls-hsstspp....................ptp-p----hs ...................h.l+uphhsht-hlTPlh.+pSpF+cTGhlTPEE.FVtAGDaLVa+hPTWpWs..s.....us..s......+t+sYLP.tsKQFLlTRs.......VPCh+Rspphths...........tpt-t.llc..-.s.ps...........................-.sG....WlpTtt.ss.ttt...................t.tt..c.sl....s-hs..spp.................................t.....t......ttt.............................................................................................. 0 78 126 188 +1297 PF02309 AUX_IAA AUX/IAA family Bateman A, Mian N anon Pfam-B_801 (release 5.2) Family Transcription of the AUX/IAA family of genes is rapidly induced by the plant hormone auxin. Some members of this family are longer and contain an N terminal DNA binding domain, such as Swiss:O64965. The function of this region is uncertain. 22.70 22.70 22.80 23.00 22.30 22.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.72 0.70 -4.59 56 1709 2009-01-15 18:05:59 2003-04-07 12:59:11 11 17 106 4 612 1704 1 179.60 27 48.17 CHANGED LsLctT.E.........LR..LGLPGstt.t................................ptsstsssKRu..Fu-..................................................sstttpssssspppp........psspsP.............................sK.......AQVVGWPPVRSaRK.Nsht....................................ptpst..............haVKVSMDGAPYLRKlDL+hYpuYpELssAL-cMF.usF.olG..............E.phh-hhsGS.EYVPTYEDKDGDWMLVGDVPW-MFlcSCKRLRIMKuSE....AhGLusps ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.h.s..sh.....ssl.p..s.hp..p..s.ht.........................................................t.....t...............thaV.K.VpM-GsshhRKlDLph...a......suYp-LtpsLp.cMF...........sh..t................................................t....hc.......hp.ts...cahlsYc.D.c.....-GDhMLVGD.sPWp...........F........hpss...++l.+Ihptp-.h.................................................... 0 67 360 485 +1298 PF02041 Auxin_BP Auxin binding protein Mian N, Bateman A anon IPR000526 Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.07 0.71 -4.81 4 80 2012-10-10 13:59:34 2003-04-07 12:59:11 11 1 41 8 18 138 46 144.30 65 86.11 CHANGED AupCslpGLPlVRNIS-LPQ-NYGRsGLSHhTVAGSlLHGMKEVEVWLQTFAPGScTPIHRHSCEEVFVVLKGpGTLYLu.SSHuKaPGpPQEFsIFsNSTFHIPVNDsHQVhNTsEHEDLQVLVlISRPPlKlFhY-DW.MPHTAA+LKFPYYWDE-Chpssp...DEL ........................................t.ss.ps.slVRsISch.QssYGt.GLSHhT..lAGu.lh..HGhKEV.EVWLQTFuPG.s.pTPIHRH.S.C.E..EVFVVLKG....sGTLYL....u....sSH.t....Ka.P.GcP.pE....a.sIFsNSTFaIPVNDsHQVWNTsE+EDLQVLVlISRPPlKlF.hY-DW.MPHTAA+LKFPaaWDEpCh.ts.....E................................................... 0 5 12 16 +1299 PF02519 Auxin_inducible Auxin responsive protein Bashton M, Bateman A anon Pfam-B_1263 (release 5.4) Family This family consists of the protein products of the ARG7 auxin responsive genes family none of which have any identified functional role. 20.90 20.90 21.10 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.50 0.72 -3.95 112 1334 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 37 \N 887 1284 1 94.40 31 75.65 CHANGED hstphpth.tttp.......thh.tts.tth...........................hP.+GahsVYV........Gc............tpp.RFlVPlsaLs+PhFppLLcpAcEE.F.Gasp...tGs..LsIPCcts.hFcp.lhshlp .........................................................................tt.......................................p..........................sslP...KGah.sVYV...........G-.............ptc..RFllPl.sYLspPhFppLLppA......EE..E...F.G.asp....tGs.....L.slP.C.cts.hFpplhphl.p...................... 0 98 600 754 +1300 PF03708 Avian_gp85 Avian retrovirus envelope protein, gp85 Finn RD anon Pfam-B_3651 (release 7.0) Family Family of a vain specific viral glycoproteins that forms a receptor-binding gp85 polypeptide that is linked through disulfide to a membrane-spanning gp37 spike. Gp85 confers a high degree of subgroup specificity for interaction with distinct cell receptors [1]. 21.10 21.10 24.50 23.70 18.30 17.60 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.85 0.70 -5.14 5 283 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 22 0 0 304 0 223.70 64 47.01 CHANGED LNVSLWDEPpELQLLGSQSLPNlTNITplou..V......sGGClGFsP..pusGlY.GWsRctlo+aLLccPh.pPaFspsSNSo-PFTVVTADRHNLFMGSEYCGAYGYRFWcMYNCSQoRps........Ys.C..Gcssu..sG.PEsWCsu+GGcWVNQSpEhNESEPFSFTlNCTGSsLGNsSGCCGcssTILP.GAW.lDSTQGSFTK..........PKALPPAIFLICGDRAWQGIPSRPVGGPCYLGKLTMLAPNHTDI...LKlLuNSSRTGIRR.KRS ....................LNTTLPWDPQELDILGSQMIKNGTsRTCVTFGSVCYp...tNtSpVCHsFDGNhNGTGGAEAELRDFIsKWKucD.LIRPYVNQSWTMVSPINs.....ESFSISSRYCG.........FTSNETR.Y.........Y+....GshSs........WCsSKGGcW.........SAGYSNGTpCSu....NToGCGGNCT.......uEWNYYAYGFTFGKpsElL......WNNGTAKALPPGIFLICGDRAWQGIPpNALGGPCYLGQLTMLSPNFTTW.....hTYGPNI..TGHRR.+R.... 0 0 0 0 +1301 PF01382 Avidin Avidin family Bateman A anon SCOP Domain \N 20.10 20.10 21.90 21.20 19.70 19.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.64 0.71 -3.80 15 142 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 81 454 71 247 4 107.70 29 48.71 CHANGED s..slpGsWhNchGSshtIss..susGtloGTYhossusssstspsp..slsGah.......s..t..ssoshuFoVsW....spssSsTsWoGQshls..supstlpThWhLsssssssp..WpuspsGtDsFTp ...............................shpGpWhNchGS..phpIp.....pssGtlsGsYhoslsps.t....s...ss.....lsGhh.................stssl..uFoVpW..........sphpShT..sWsGQshhs..supphlpThWhhspts.sstp...p..WtuhpsGtDhFs................................... 0 39 42 61 +1302 PF03377 Avirulence Xanthomonas avirulence protein, Avr/PthA Mifsud W anon Pfam-B_3936 (release 6.6) Family \N 23.10 23.10 23.10 23.20 22.30 23.00 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.43 0.70 -14.25 0.70 -5.74 2 801 2012-10-11 20:00:59 2003-04-07 12:59:11 8 12 45 7 116 807 9 173.70 53 73.42 CHANGED LsssQlltIAppGGhpAlEsVpthhssLsts..sLT.tQVVAIASNIGGKQALETVQtLLPVLCQsHGLTPDQVVAIASN.................................................................................................u.....GGKQALETVQRLLPVLCQsHGLT.-QVVAIASN.GuKQALETVQRLLPVLCQsHGLTP-QVVAIASp.GGKQALETVQRLLPVLCQsHGLTPpQVVAIASNhGGKQALETVQRLLPVLCQAHGLTP....................................................................pQVVAIASpsGGKQALETVQRLLPVLCQsHGLTP-QVVAIASNGGGKQALETVQRLLPVLCQsHGLTPpQVVAIASN .....................................................................................................................................................................................................................................................h.hlp.+.LhP..V...Lsps.HuLT.tQVVAIASp.GG...KQALETV.Q.R.LLP..VL.CQ.s.HGLTPcQVVAIASpsGGKQALETVQ.R.LLP..VL.CQ.s.HGLTPsQVVAIAS..N.GGKQALETV.Q.R.LL.P..V.L.CQ.s.HGLTP-QVVAIASpsGGKQALETV.Q.R.LL.PV.L.C...Q.s.HGLTPsQVVAIAs..psGGKQALEoV.p+hL.Ps.................................................................... 0 5 22 22 +1303 PF03591 AzlC AzlC protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.80 22.80 22.80 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.64 0.71 -4.25 162 3558 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 2883 0 690 2484 1687 141.30 28 60.12 CHANGED lPl.hluhhshGlsaGllu.spsGlohhpshhhShllaAGuuQFlslsllss....uu...s...hhslllsshllNhRHllauholsshhp..t..hshht+hhhuahLTDEsaAlshs.phtp.ttttt.............hahhGhs..lh.hahsWhluohlGslhGshls ................hPlslualslGluaGl..h.u.spt.Ghohhpsh.hhShllaAGuuQFlhlu.llsu....uu....s............lhsls.loshhlNhRahLhuhultshhp....t..........hshhpp.hhhuahlTDEsaulshs.ph.sp..ppttt.................hahhGlsl.ssahsWshuollGuhhGshl............................ 0 197 409 559 +1304 PF02216 B B domain Bateman A anon Pfam-B_1782 (release 5.2) Domain This family contains the B domain of Staphylococcal protein A, which specifically binds to the Fc portion of immunoglobulin G. 22.40 22.40 22.90 22.40 21.50 22.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.29 0.72 -4.20 6 1484 2009-01-15 18:05:59 2003-04-07 12:59:11 11 69 165 21 12 1155 0 53.40 62 43.70 CHANGED ppspasc-QQNAFYplL+hPNLsEEQRNGFIQSLKDDPSpSppVLuEAKKLNDu ....s.sNpasc-QQNAFYEILphPNLNEEQRNGFIQSLKDDPSpSsNl.LuEAKKLN-u.......... 0 8 8 12 +1305 PF04182 B-block_TFIIIC B-block binding subunit of TFIIIC Wood V, Finn RD anon Pfam-B_68239 (release 7.3); Family Yeast transcription factor IIIC (TFIIIC) is a multi-subunit protein complex that interacts with two control elements of class III promoters called the A and B blocks. This family represents the subunit within TFIIIC involved in B-block binding [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.10 0.72 -4.13 35 324 2012-10-04 14:01:11 2003-04-07 12:59:11 7 4 262 0 222 703 46 74.90 27 4.73 CHANGED sshpaplLptIApsRhpGlhph-Ls.phss..pDsRslhh+hcpLpctsLIs+psl...................................ptpstposllhLp+Fhpp .....................s..hpaplLptlu..p.u.Rh.pG..h..h.Qp...-Lt..phhp.....hDs+plhhph+tLpcpGLIs+psh....................................................................................................ptpt......tpo.t.l.lhLp+Fh..t........................................................................................... 1 65 124 181 +1307 PF02246 B1 Protein L b1 domain Bateman A, Mian N anon Pfam-B_3438 (release 5.2) Domain Protein L is a bacterial protein with immunoglobulin (Ig) light chain-binding properties. It contains a number of homologous b1 repeats towards the N-terminus. These repeats have been found to be responsible for the interaction of protein L with Ig light chains [1]. 25.00 25.00 26.40 43.00 24.60 23.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.31 0.72 -4.15 3 19 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 2 23 0 33 0 69.50 78 44.52 CHANGED TPE.PKEEVTIKsNLIFADGoTQTAEFKGTFAEATAEAYRYADLLuK-NGEYTADLEDGGYTINIKFAGK TPE.EPKEEVTIKANLIaADGKTQTAEFKGTFEEATAEAYRYADhLtK-NGcYTsDltDtGYTlNIKFAGK............................ 0 0 0 0 +1308 PF03483 B3_4 S3_4; B3/4 domain Bateman A anon Pfam-B_1005 (release 7.0) Domain This domain is found in tRNA synthetase beta subunits as well as in some non tRNA synthetase proteins. 23.30 23.30 23.40 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.76 0.71 -4.80 308 5864 2012-10-01 21:04:40 2003-04-07 12:59:11 12 45 4697 27 1517 4739 2917 167.50 30 24.80 CHANGED ssspahu+.llcslp.hssSP.WhppR..LhtsGlRs.INslVDlTNYlhl-hGp.....PlHAFDhc+l....p.........s.plh.VRh.Ap.su....E.plhsL..Dsp.chpL.ss...........................stlV.Is...D......s......p.p.sl.ulAGlMGGtpSpls.p.........sTpslhlEuAhFsshtlttsu+phs..lpo-uShRFERG.lDsphsthAhccAspLlh-ls.G ...............................................................................................................sstahs+.h.lc.slp.hts.o....P.W.hpp+....L.hps....G.h.....Rs...IsslVDloNYlhLchGp.....PhHuaDh-cl.p..........................................G.slh..VRh.Ac..pG............................E.phs.sL..D.sp...-tpL...ss.......................................splV.Is....D........s.......................pp..sl..uluGlhGG..p.po.t.ls.p........................pTps.lllEsAhFss..h.s.lttp..u+.pht....lpo-.uu...RaE+G.l-..t.h....uhphAstLl.phs............................................................. 0 509 955 1269 +1309 PF03484 B5 tRNA synthetase B5 domain Bateman A anon Pfam-B_1005 (release 7.0) Domain This domain is found in phenylalanine-tRNA synthetase beta subunits. 24.10 24.10 24.10 24.10 23.20 24.00 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.33 0.72 -4.15 85 4895 2009-01-15 18:05:59 2003-04-07 12:59:11 10 18 4795 27 1299 3944 2191 69.60 33 9.04 CHANGED tptlplshpplscllG...........................h..plstppltp...hLpplshpsph....................spsthpVpsPsaR..hDlppphDllEElsRhhGa ......................................t...lslphscls+ll.G.................................................h.....cl.s.tp....p.ltc...lL.p+LGhp.lpt.....................................................sssthp.V.ssPoaR..hDl....p.h.ct...DLlEEVARlYGY.............. 0 443 837 1100 +1310 PF01603 B56 Protein phosphatase 2A regulatory B subunit (B56 family) Bateman A anon Pfam-B_984 (release 4.1) Family Protein phosphatase 2A (PP2A) is a major intracellular protein phosphatase that regulates multiple aspects of cell growth and metabolism. The ability of this widely distributed heterotrimeric enzyme to act on a diverse array of substrates is largely controlled by the nature of its regulatory B subunit. There are multiple families of B subunits (See also Pfam:PF01240), this family is called the B56 family [1]. 25.00 25.00 25.40 25.10 23.40 24.40 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.30 0.70 -5.87 54 1081 2009-01-15 18:05:59 2003-04-07 12:59:11 15 15 292 10 639 990 15 337.90 49 73.42 CHANGED .L.P.hp.csssscppsLFlpKlp.CshlhDF.sD.....ssp-hppKch.KppsLp-ll-hlsss.p........stls-shhstlhpMlusNlFRsl.......Pshsp.t........hss---ps...hh-suWP...H...LplVY-lhl+alpssshssp.hptalspsFlhpLLsLFcSpDsRER-hLKshLH+lYuKFhshRshIRpsIsslhh.................calh.....csppasGluELLElhuSIIsGFulPLKcEHphFhh+sLlPLHpscslshYatpLshslspFlcKDssLsp.llctLL+aWPhosopKElhFLsElccll-hhpsspFpphhhsLFpplucC.lsSspFpVAE+ALhhWsN-tlhsLl.....p..pNsphlhPllhsuL.csucpHWNpsl+shshssh+hhh-hssplFpc.http......hpppppccpppp...pp+cppWpplt.chAt .......................................................................s.hc.-s.sst-p.pLFhpKlp.CshlFDF..D.........................shtc............h+.KEl.KRtsLtEhl-al.......sps..p............shlsEshhschl.pM..h..........u..hNlF.Rsl...............................PPssssp..........................aDsEEDEP............slEsuWP...H..................L.Q.....lVYEh.....FLRFlpS..-hpsp......lAK+YIDp.pFV..L.....p.......LL-L.FDS.EDPRERD...aLKThLHRIYGK.FhshRuaIR+pINNlFh.................c.FlY.....ETE+aNGlAELLEILG...........S...II..NG..............FALPLKpEHK.FLh+lLlPLH..........KsK........s..........Lu..hYHs....QLu.Y.CllQFl.EKDspLs-................Vl.h..GLL+aWP...t...TpS.K..EVM..FLs.ElEEI.L-...l.....hc.P.........s.p....F........Kl..PLF+QlA+C.lsSsH.F.........Q........V.AERALah.WNNEalh.s...Ll.......................p..cNt...th...IL..PI.hFsuLh.cso+p.HWN..p.s.l.....huLhhNshKhFhE.hs.pLF--.hstp......aptcctp........tp....p...........tpRpphWtplp.p.................................. 0 209 349 494 +1311 PF02043 Bac_chlorC Bacteriochlorophyll C binding protein Mian N, Bateman A anon IPR001470 Family \N 25.00 25.00 28.60 28.40 24.90 17.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.79 0.72 -4.19 12 34 2009-09-11 07:33:55 2003-04-07 12:59:11 12 1 21 1 24 32 0 77.10 52 97.47 CHANGED shuGAFopuAtAYGRhlEVFlDGHWWVVGDhLENlGKoTKRLssNAYPalYGG..uuuuul+GSSPphuGYApPoKclcpRFpc .s..uGsFTchhuAhGRIhEVhl-GHW.sVG.hh-ulGKuThRlspNAYsphhGG....usuLRGSSPpsSGYAhPoKtlcS+Fs........... 0 4 5 18 +1312 PF00216 Bac_DNA_binding Bacterial DNA-binding protein Finn RD anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.70 0.72 -3.96 158 11816 2012-10-02 15:10:05 2003-04-07 12:59:11 16 10 4455 45 2522 6533 4975 89.30 37 83.62 CHANGED hsKs-Llppl.........u.....pcs......s.l..o+ppspphlsshhctlppsL..ppucc...l....plsG.FGsFplpp...+tuR.pGRNPpT.G-.tlpl...ss+pslpF+suKpL+ctls ....................................................................MsKs-Llc..t.l.........A.........pps.......s..l.....oKp......cup....p.s.l-shhcs......lppuL....ppG-p........V................pL.h...G.FGoFpl..+c..............RssR...p....G..R......N...P....p...T.Gc..plpI....sAp..pVs..tF+sGKtLK-tV............................................... 1 797 1619 2112 +1313 PF00308 Bac_DnaA bac_dnaA; Bacterial dnaA protein Finn RD anon Prosite Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.18 0.70 -4.70 14 6279 2012-10-05 12:31:08 2003-04-07 12:59:11 13 14 4628 18 1289 7863 4434 203.50 39 51.24 CHANGED Lsp+YoF-sFVhGsSNchAaAAAhplAcs.Puc.tYNPLFIYGssGLGKTHLL+AIGNhshp.hP.shRVhYlsuEcFsp-hlculpc..schppFKcpYR.slDlLLIDDIQFlutKEpoQEEFFHTFNsLh-ssKQlVloSDRsPccLssh--RL+SRFphGLssslpPP-hETRlAILcKKhc....tcshs.....lPpElhpaIApplssNlRELEGAlpRlhsaushs ................................................................................p.paTF-.s.F..V.........G..p.......u.N.......p...h......A.....h..A..........A..u....h....t.....V....u................c............s......s.......u..t.....s........Y.......N...P......L......F..l.Y.G.u.s.GLGKTH.L...........hp....Al.G.......st.l........h..................p....t.t...........s.....s......s......+.V....h......Y......l....s..u....E.....p...F...s...p.......-...h.....l.p.....u...l....pp...........pt...h...p...p.....F.....+.......c....t.....Y...R.....s.......l...D....lL......L.I...D........D.........I........Q....F........l..........u.....s.......K.........-..........p..........o........Q.........E........E........F.F....a......T..F.N.....s......L......h.........c.....s......s........+.....Q......I..........l......l........o....S.......D......R..s.........P........+.................c...l.........s....s......l....-.....-.....R.......L....p..S.....R.F...s...W..G.L...s..s.sl...pP.P...-.h.Eo.....Rl...A....I.L.p...pKAc.......................t.p.slp...............lP.s.-V...h.pa.l...Ap.p..l....c...s...N.l...R-LEGALs+lhA.up..s................................................................................................ 0 415 826 1072 +1314 PF01311 Bac_export_1 Bacterial export proteins, family 1 Finn RD, Bateman A anon Pfam-B_1442 (release 3.0) Family This family includes the following members; FliR, MopE, SsaT, YopT, Hrp, HrcT and SpaR All of these members export proteins, that do not possess signal peptides, through the membrane. Although the proteins that these exporters move may be different, the exporters are thought to function in similar ways [1]. 27.80 27.80 27.90 28.60 27.70 27.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.37 0.70 -5.10 16 3358 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 2177 0 654 2099 515 239.20 26 92.63 CHANGED pshhshlsthhLs.hhRlhshhhhlPhhspphlsuhlR..hulshhluhhlhssl.hsssshhsst...hhhLllpElllGlhlGhhhshsFhshpsAGplIDsQtGhsh.ushhsPssuspso.luphLshhulllalsssGhhhlls.slhcSaphhPlsphhs..t..thhthlhpthsplatpulhlAuPllhhLLLh-lulGlluRhAPQlslhsluhPlKshlullhlhlhhsslhshhppthphhhshhs ..........................................th.lhshhhs.hhRl......huhhhhhPhh.u.pps.l.P.s.t.l+....hsluhh.lsh..h.l.h.P....t.l........s..s..t....s..h....s..h...h.........s..........hh........hhhlshpElllGlhlGhhhphsFhsh.phAGplIshQhGhuh..uohh.DPs..s.u.s..s..s.s..lluplh...shhu.hllFl.s...hsG.........Hh.hllp.hLhcSaphlPlu.s.....h....h..h....s..s.............h........h.....hlhphh..shhahhulhlAhPllshhLlsslulGllsRhAPQLNlFsluhPlphhlGl..h.l...l...hh...h.h..shlhshhtphht.......t.................... 0 202 393 515 +1315 PF01312 Bac_export_2 FlhB HrpN YscU SpaS Family Finn RD, Bateman A anon Pfam-B_1200 (release 3.0) Family This family includes the following members: FlhB, HrpN, YscU, SpaS, HrcU SsaU and YopU. All of these proteins export peptides using the type III secretion system. The peptides exported are quite diverse. 22.10 22.10 22.20 22.80 21.30 22.00 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.85 0.70 -5.55 16 4381 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 2177 42 948 3009 756 267.10 32 91.14 CHANGED .u.-KTEcPTsKKLcDARcKGQlsKSpDlsshshllushthlhhhushhhpcLtuhlth.hhhtspsashuhp.thhthhhhlhhshshhl...hhsshlsulhushlQsG.hlhoscul+Pchp+lNP..lpu...hK+.hFSsculhEllKSlLKllhluhlhahhlhsphsplhpLshss.tshhshshpLhhplhlhshhhhllluhhDahaQ+hpahKcl+MoKpElKcEaKcpEGDPclKu+RRphtpElt.ppphtssl.....cAsllVsNPTHaAVAltYc.pchssPhllsKGsDttALpl+phAcctslPllcshsLARuLYppschsphIPtphacsVAclLthlh ...........................................t...pKTEtsT.++hpcu+c.cGpls+Sp-...ls.hh.hhs.hhhh.h.h....h.h...h.............h....h.............t....h.....t...h....h.t...............h.........................................h......................................h..h.................h...h.............h...........h...h.h.h....h.h.h..h.h.hhs.hh.s.shh...s.G..hhh.sscs.l..php+l.sP..hts.........hKp..hF.uh.pshh-hhKslh+hhhlhhhhhhhh...h..thht.h..........s............t....h..h.hth.hh.hhhhhhhhhhhhshhDh.hpha.ahcph+Mo+p-l+cEhKptEGsPplKu+hRphtpth...........t...t....p...h.h.ttl.......pushllsNPT.Hh..AV.ALpYc....p....p.....s...A...PhVlAKGts.lAhpI+plAcc..pslPllcs.sLARu.Lapps.c.ls.ptIP..t..p..L..a..pAVAclLsal........................................... 0 309 593 767 +1316 PF05088 Bac_GDH Bacterial NAD-glutamate dehydrogenase Moxon SJ anon Pfam-B_6291 (release 7.7) Family This family consists of several bacterial proteins which are closely related to NAD-glutamate dehydrogenase found in Streptomyces clavuligerus. Glutamate dehydrogenases (GDHs) are a broadly distributed group of enzymes that catalyse the reversible oxidative deamination of glutamate to ketoglutarate and ammonia [1]. 23.00 23.00 23.40 23.00 22.60 22.80 hmmbuild -o /dev/null HMM SEED 1528 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.37 0.69 -14.31 0.69 -7.18 107 1221 2012-10-10 17:06:42 2003-04-07 12:59:11 7 6 1073 0 449 1248 467 1239.90 36 88.15 CHANGED l+lhs....htp.tshpus.pollplls-DMPFLVDSlphtlscpulsl+hlhH.PllplcRs.tsGplhpltstst........................ssttEShlhlEl-.+hs...sspphpt..LpppLppVLsDV+tsVpDWpsMpschpph.hpplp...pp...................chpEstsFLcWLtscpFoFLGa+cYpl....................tss.thtlt......ssouLGlL+..........t.httspphsshssttttthhp.sph.LlloK.ustpSpVHRsuYhDYlGlKc.a...cppG...pVlGEpRFlGLaTSsAYspssppIPllRcKlppllppuGasssoHsuKsLhplL-saPRDELFQhst-pLhchuhullp.Lp-Rt+lRLFlRpD.asRFlSsLValPR-+YsTplRh+lpplLtcshsu.pts-asshh.sES.sLARl+all+h........sssph.......h-hpclEppltchsRsWpDclpssLhpp...........hup..............................tputtlhp+YssuFPtuY+-passppAltDlpplcpL.........ss..ssslshplY......+s..pss....sp..l+LKlap.tspslsLScllPlLENhGl+VlsEp..Papl....ph..t.....st.....pshWlaDFsLp..hssst.......hclsp.............hpptF...p-AastlWpGcsEsDuFN+LVLtAuLsWRplslLRAYu+YL+QsGhsaSQsYltpsLsppPsluptLlpLFcsRF..-P.........................................ttppcspptptlpsplppsL-pVssLD-DRILRpahslIpATLRTNaaQt.......st........pGps...+salSFKlcsptlsslPtP+PhhEIFVYSPRVEGVHLRsGpVARGGLRWSDRpEDFRTEVLGLVKAQtVKNAVIVPVGAKGGFlsKphPss.s.....sR-uhhsEGhtCY+hFIpuLLDlTDN..l.lsGc.........lVPPs..sVVR+DsDDPYLVVAADKGTATFSDlANulut-.YsFWLGDAFASGGSsGYDHKtMGITARGAWESVKRHFRE.hGlDsQo.psFTVVGlGDMSGDVFGNGMLLSccI+LlAAFsHpHIFlDPsP.DsApSasERcRLFsLPRSSWsDYDpsLISpGGGVFsRouKoIsLSsch+thLGl..cps......phoPsELIpAIL+APVDLLWNGGIGTYVKAosEocu-VGD+ANDulRVsGp-LRsKVlGEGGNLGhTQhGRIEaAhpG.....G...........RlNTDhIDNSAGVDCSD+EVNIKILLsplVpsGcL.ThcpRNpLLtpMTD-VupLVLcs.NYtQopAl..SlsptpuspplstptRhhptLEppGpLcRtlEhLPs-cplscRtstGp.GLTRPELAVLlAYuKhsLp-pLlsS.-ls--s......ahsphLhsYFPp.Lpc+assp.ltpH.LRREIIATtlsNplVNchGh.....sFlhRlt-pTGsss.sclscAashucclFslsslWppI-uLD.splsuslQhphhhplp+hlc+sspWhLRs...ppt....h.s.lsstlspapsslppLp....splsshLsspp..tpthppptpphsp.tGlPpsLApplAslphhhssl......DllclAppssts........l..tplApsYFtluppLslshlhptlsp.lsssspWpsLARt.....uhp--LtttpRpLstslLpt............t......spttlstW..hppppstlpRhpphls-lps...sssh-hAhloVAhRpLpsL ....................................................................................................................................................................................................................................................t..hshlpll..shPaLlDSlhh.ltp..shthph.hhp.s..htltRt...pu....tl.tl...................................t...pohhhlpl.........s...t...........sttthtt...lpptl.phLtplt.sspDh..hhtth.th...httlt....tt................t.....................pht-shthLpWlt.sppFhhhGhpchth.......................t....t..h...........tstLGlhp.................................................th.......h..p.......l.hlsK.ss..Shl+R..ssY.Dalul+.....hstpG............t.hhuEpRhhGLasussattss.pIPhl+p+httlhthu...uht...tuash+tLhpllpphPR--Lhphs.ppLhthshsllt.htp.+.ph+lhlRh......D.hs.+FhssllalP+-casoth+.thtthlhp.htu.t.h.-..hshh..s-.uslsphphhlch......t.tt...............t..hs.....ttlptplhphspsWpDph.thh.tt...................................................s......tphttsFs.sY+pthsstpAlhDlthlttL................................st.....p...lth.hh.p....ts..t..............tt.........hphKl....ap.tttsh.LSplhPhLpshGlcVl.sEp..sapl.h.................ps.........t.h.alh-Ftht....t...............hp.h..t.p......................ht.tthppAhtth.atG.p.h.EsDsFN.pL..lhtAuLsh+pl.slLRuhu+Y....l.....+Q.h.....u.h.....s.....aSpsYl..tp.sLp..p..aPp..l..sctL.lpLFpt+F.....sP................................................ttptpppptp..t.l....t..p.p...ltpt.L..-...p.V......s......s......L.D....-.....D.R....I..L.Rpah...sll.pA.oLRTNaaps....................t.................t....s....+shluFKhcP........ph.....l........s........p.l.....P......t.......P....p.......P.haE.....I.F.V.Y.u.PclEGVHLRhGtVARGGLR..........W.......S......D......R.p...ED.........F.......R........T..........El.......L.GL.......V....KAQtVKNsVIVPlGAKGGFls+p..P.s..s...........................sR-thht.cGh.tsY+h...F..I.puLLDl..TDN......l..h....p....up..............l..VsP.t..p..V.VRa..D.tDDsYLVVAAD.KG.T..A..T.FSDhA..Nslut.p..as..F.WLGDAFASG...G.S...s...GY....DHKtMGITA+GAWEuVpRHF+E.hG.....h......DhQ.s.psFTslGlGDMuGDVFGNGMLLScpI+LlAAFsHhHIFlDPsP.DsttSatERpRLFpL.PR.SSWtDYstpLIStGGGlasRstKsIslosphpthLslpts..........................phsPs-LlpAlLpu.VDLLWNGGIGTYlKuspEocspVGD+ANDslRl.sup-lRs+l..lGEGGNLGhTQhGRlEauhpG.....G...........+lNTDhlDNuuGVDsSDpEVNIKIhLs..th..l.t...........sGcl...s.p....pR...spLL.pMoD-VuplVLts.NhtQs.s..l..uhstt..puhthht..tchhp.LEpp.s.h.L.sRtlEhLPssppltcR....t......t..G..sLTpPELulLhuasKhsLtctllt.o.pls-...-s......hh.sp.L.tYFPt.....lp.pp...aspt.h.pH.L++EIlsT.lsNphlsphGh.....sah.Rlt-psGts..splhpsahhscplatlstlhpplp.t..L..s......lss.................thp.phhhthtchhpcsspWhLpp.....t.t.........s.ltt.ltpat.thttlt.....phsthl.....t.tp..htthptthtthhp..tGl.sttlAthluth..hhs.hh....sI.hpl......u.p.......ts..hs.........................h.ttsAchYatlupthththh.....hpthpt...s.hsshWpshAttshp--l....ptlshtsLt....................p.............tt.ht.W....t.t.pt..ltph.thltphtt.....tt.......h...phshhsls.t.h.......................................................................................................................................................................................... 0 153 284 373 +1317 PF01152 Bac_globin Globin; Bacterial-like globin Finn RD, Bateman A anon Prosite Family This family of heme binding proteins are found mainly in bacteria. However they can also be found in some protozoa and plants as well. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.47 0.71 -4.10 12 2140 2012-10-01 21:46:00 2003-04-07 12:59:11 16 28 1468 69 809 1739 270 116.80 24 78.59 CHANGED ola-tlGGpss.lcslsscFYspl.sDsp..htphhp.sschsspcp+.htFLsthLGGPshYht+pG+P.L+tpHtsh..slsstch-thlcphtpAlpshs.tshs.-phhchhphhttshhsh ...................a-hlGG....pps....lpplV-pFYs.t.l.tpDsp.................lt.h...F..t......s......s......-...h...s..........p....t...pp+...h...t...p...........Fl...st...h...h.....G..G..P.......s..h.Y....s.t..p..p...........G...p........P...........l....+....t....p.H..h...sh........sIst.tc.h-tWLpp...hpp.A.l.s.c.h...s.h..s..t.t...h...tp.....l...h.thht.htt.h...t........................................................ 0 348 595 732 +1318 PF00296 Bac_luciferase bac_luciferase; Luciferase-like monooxygenase Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.70 0.70 -5.30 155 15488 2009-09-11 11:56:34 2003-04-07 12:59:11 15 68 3031 48 4888 13559 5534 299.40 18 82.83 CHANGED h..........hhh...phth.h.st.................shpphhchAptA..Eph.GF-shhhs-phts........................h-shshhuslA.stTp+lclssultssh..p....pPshlAppsATl-.plSs......GRh.tLGlGs.Gttthp...hthh.....Gh.........shscphsthcEhlcll+tlh....tt......................pps.sacGca..hph...........................tpshhhPps......................P..lhluutu.spshchAuchuDshhshs..............ts.sthtphhsplcp.tstp...........tGRs.....shphsh.thhlhhs....cspppAtpth......hthh.........................................................t..thtthhthh..tthhtshsh...lG.osp.pl...tcpltp..htpsGssp ...................................................................................................................................................shpthhplApt..A..E.......c.........h..G.a.cthal.s..-pht.h.........................................ss...hsh..lut.lA.s..t..T..p.......+..l....c..l......u.s..ush..hhs...p........sP.h.hlAcp.hu.oL.D.plos...........G......R.h..tl.s..l.....u....s....G.........t..t..t....p.........hpt.h........Gh.........................sh.s.p.p.h.p...t.h...p..E...h....l..p...llp.p..lh.....ps............................................t.h...sa....p...Gca....hph.......................................tssth.h.P.t.shpt...............................hP...lah.u.u........s..u..s.............t..u....h..p.h..AA.c..h....u.p.shhhss....................s.p.t..h.t.p.h...h.p.t......h.+p....th..t.t...............................tG+..t.........phph...h.....h...th....h......s...hs.s...............c..s...p..p..p..A.hthh.............tthh............................................................................................................................................h.......t.....h.........................................h..t.t.hhh..................lG..ssp..pl...tptltt....h.t.....t................................................................................................................................................................................................... 0 1231 3130 4170 +1319 PF01036 Bac_rhodopsin Bacteriorhodopsin-like protein Finn RD, Bateman A anon Pfam-B_1412 (release 3.0) Domain The bacterial opsins are retinal-binding proteins that provide light- dependent ion transport and sensory functions to a family of halophilic bacteria [2,3]. They are integral membrane proteins believed to contain seven transmembrane (TM) domains, the last of which contains the attachment point for retinal (a conserved lysine). This family also includes distantly related proteins that do not contain the retinal binding lysine and so cannot function as opsins. Some fungal examples are: Swiss:O74870, Swiss:P25619, Swiss:P38079, Swiss:Q12117. 21.90 21.90 21.90 21.90 21.30 21.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.64 0.70 -5.04 17 3174 2012-10-03 04:04:29 2003-04-07 12:59:11 13 10 618 161 242 3093 2566 141.90 35 89.93 CHANGED hshaslhulhhllushlFhhhshp.sss.p+phhssslhlsultuhuYhshA.slGhohsts..................cpla..hs+YlsWhlohPl.....lLhLs.lAsss...................tpslhshlhsshlhllshLlGulh..sush.....+asaashussshLhlhYhlh.sshtpsstshspp...shahhLp.hhlll...WhhYPlsWhlustGssl.sssspslhashLDllshlsashlLlhttsshsstt .......................................................................................................................................................................................................................................................................RYlDW....LlTV.PL.........h.hh....l....l...Ls...t...l....s...s.ss.................................................sul...h.a.+....L..l.h.uS.ll.MLls.G.Yh.G...E.......s........shh.s..............sh.h.h.a..l.l.u.h.h.u...alYI..l.Y.p...l...........a..G.....Es.u....c.s..u.s...p..u..u.ss.s....tsAasshph...lVhlG......W.u.lYP................................................................................................................................................................. 0 69 151 216 +1320 PF01103 Bac_surface_Ag Bac_suface_Ag; Surface antigen Finn RD, Bateman A anon Pfam-B_1201 (release 3.0) Family This entry includes the following surface antigens; D15 antigen from H.influenzae, OMA87 from P.multocida, OMP85 from N.meningitidis and N.gonorrhoeae. The family also includes a number of eukaryotic proteins that are members of the UPF0140 family. There also appears to be a relationship to Pfam:PF03865 (personal obs: C Yeats). In eukaryotes, it appears that these proteins are not surface antigens; S. cerevisiae YNL026W (SAM50, Swiss:P53969) is an essential component of the Sorting and Assembly Machinery (SAM) of the mitochondrial outer membrane. The protein was localised to the mitochondria [3]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.90 0.70 -4.97 70 6186 2012-10-03 17:14:36 2003-04-07 12:59:11 18 36 2849 0 1843 5261 3202 320.30 19 47.88 CHANGED NhhGpGpplshshs..............hu.............p.hppshshsaspP...ahhsp...............thshuhslhhp......................................t.st.pshshpshuhslshuh.....ls.......pphphs.....huhshppsphpstssps................................................................tttpstththshuhshsshssth.hPopGhh............hphshchs.....................h..huussp..ah+hpsps.phahsltp..pt.hhh.......hs+hphGhht.ths..................stc.lPhhcpFasGG..........ssSlRGaphsslGPps...............................ptt.lGGpthhsuShEhphPl.h.....t..t.hpsuhFhDsGs..............sassptpss.......................................psusGlGlpWtoP.hGPl+hDaAh.Pl....pc.ss.sp.................pp.papFulG.ppF ...........................................................................................................................................................................................................................h.utGp.phshs.hp.........................hs.................t...ppth.phsh.p.P............hh.h.s.p......................................t.h..ph.s...h.shhhp..................................................................................................p.....sh..s...t..p......s...h.u..h..s..h.shsh....................hs.................................p..t..h.p...hp.......................................hs.h..s..h...p..p....s..p..h.p..p.h..p.ps..............................................................................................................ps.h.h..h..s..h..s..h..s..hs..h......s....p...t...c......s........s....h...h.....P....s...p...G.h.............................................................hph.s.hchs........................................................................h....h.u...u-.sp.......ah+.h..ph..ps...p.h.ah.slsp...........tpthsh.............................................hh+s..phG..h...ht..shs................................................st.p....l.P....h....p.p.....F.a.s..........GG...................s.p..o....l..R.G..........a.p...p.s.l.uPpt..............................................................................ttt..h...G.G.s..t..hh.su..ohE.....hp..h.....s..l...................tt..p....hp.....s.uh.F...h....D......sGs.....................................l.h.s.....s..t.t..p......s........................................................................................................th+h.usGl.G...l..pa.......h.......o......P....h....G......P.....l.p.....hs..hAh..sl......pp...pt..t..............................................tt..th.hslG.t............................................................................................................... 1 578 1140 1548 +1321 PF01338 Bac_thur_toxin Bacillus thuringiensis toxin Finn RD, Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 26.00 26.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.51 0.70 -5.12 7 74 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 26 16 17 68 0 187.60 43 89.80 CHANGED pps..ppshRhIhLpV.ss.-.ssh.pIh.lp.Ppal.QAltLAsuFQsALsP........sLpFshpKuLplAssl.Ppuul.VshlsQolpQosspVSVMls+VlElLpsVLGlsLsuosh.pplpsuITsTFTNLssQps-AWIFWt+poAppTsYpYNIlFuIQNtpTGuhMhslPluFEIsVslpKcpVLFhTI+DpAsYpVplpuLplsQsL.pppthslhsl.pshs.sh ...............................................................h....................ph.pl..lp.spYl.QAlhlsssFQsAlsP.........sLpFsh.KuLpIANsl.PtuuV.luhlsQsVhppsspVSVMIsplhE.ll+..oVL.......Glslsu.ush..splsuAITNTFTNLssQpsEsWIaW..tpp..o.A..sQ.TsY.pYslLFuIQ.NppTGthMhhlPluh-ls.V.sht+pplLh.hsh.psptpatVphpth.......................................................... 0 1 6 12 +1322 PF01654 Bac_Ubq_Cox Bacterial Cytochrome Ubiquinol Oxidase Birney E, Bateman A anon Pfam-B_1148 (release 4.1) Family This family are the alternative oxidases found in many bacteria which oxidise ubiquinol and reduce oxygen as part of the electron transport chain. This family is the subunit I of the oxidase E. coli has two copies of the oxidase, bo and bd', both of which are represented here In some nitrogen fixing bacteria, e.g. Klebsiella pneumoniae this oxidase is responsible for removing oxygen in microaerobic conditions, making the oxidase required for nitrogen fixation. This subunit binds a single b-haem, through ligands at His186 and Met393 (using SW:P11026 numbering). In addition His19 is a ligand for the haem b found in subunit II 26.20 26.20 26.50 26.80 26.10 26.10 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.47 0.70 -6.07 178 4711 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 3032 0 940 3110 321 447.50 39 93.87 CHANGED LuRhQFAhTssaHhlFsslTlGLuhhlslhE..shaltTs.........cph....Yhchs+FWsKlFulsFAlGVVTGlshpFQFGsNWutaSchlGslhGssLAhEsLhAFFLEusFlGlhlFGWs..Rl.upthHhhushlVAlGoslSAhWILsANSWMQ.sPsGa..ph...............ps..G+hph...ssahsl.............................lhNPsh.h+asHslhAualouuhhVhGluAaaLL+.............................tc.....ch.....p.hh+....+uhphuhhhullsuhhtlh.sGDhpGhpshcpQPhKlAAhEuhW.-...T............ps......s..A....shslhulPsp...csp...c.......spat.lpIPhhhSlls...................s..+shsu.pVt.GLp-....................................................hst.........................................................................p.hP.s.VshlFauFRlMVGlGhhh.lhluhhuh.....ahth.+..................................p+ahLthhlhshPh.salAspuG.WhssEhGRQPWsVaG................l.........LpTspul.Ss...lsssplhhSLhsFhllYs.hLhs.stlhlhh+hh+....pGPps .....................LuRhQFAhTshaHalF.lPlTlGLuhhlAlhEslal....hTs................cph.........Y+....c....hs....+....FWu.K.lF.u.I.NFAl.GVsTGlsMEFQFG..T..NWShYSca....VGDIFGs...P.LAhEuLhAFFL.EuTF.lGl.ahFG.....Ws..R...l.....s+hh.HhhsTahVAlG..oslSAhWILsA.NuWMQ.sP..sGh...ch..............................................c..s...h..R..h..c..h....ss.ahtl.........................................................................................................lh.NPsh.s+Fs.H.slsuualouAhFlhuluAa..aLL+........................................................s+.........c.h........s...ht++.uht.l.uhhhuhhuslss.hl.hGD.tpGhphtch.QPhKlAAhEuhWc.T.....ps............s..A...shslh......u.h......Psp.......cpp......c....................spau..l.pIP..h.hL.ullu.....s..+.o..h..ss....V...GLc-........................hst.......................................................................................................................c.thP.s..VsslFauFRlMVuh.Ghlh..lhlh....hh.uh..h.hhh...+...pcl...p......................................p+WhL+hslhshP.h.salAlpuGWh.ssEh..GRQPWslhs..................lL.TtsAs..Ss....los.upl.hh..Shlhas...slYs.llhhs.t.la.lhh+hh+tGPt.s........................................................................................ 0 271 564 761 +1323 PF00936 BMC Bact_microcomp; Bac_microcomp; BMC domain Finn RD, Bateman A anon Pfam-B_1071 (release 3.0) Domain Bacterial microcompartments are primitive organelles composed entirely of protein subunits. The prototypical bacterial microcompartment is the carboxysome, a protein shell for sequestering carbon fixation reactions. These proteins for hexameric structure [1]. 20.60 20.60 20.60 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.50 0.72 -4.30 128 7283 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 957 198 954 3181 404 77.20 31 65.78 CHANGED pAlGllEs.puhssulhAADthlKuAsVcllthctss..u.G..........hhhl.hlsG.DVuuVpsAlcuuhpsspc.h.........llsphl....l....s..ps .............pAlGllps.puh.ssul.AADshlKuAsVpl.ls.hcphs.....u..G..........hshl.hlpG.DluAV+uAlcAusssspc.................lhs.hh.h............................. 0 418 700 840 +1324 PF02397 Bac_transf Bact_transf; Bacterial sugar transferase Mian N, Bateman A anon Pfam-B_1538 (release 5.4) Family This Pfam family represents a conserved region from a number of different bacterial sugar transferases, involved in diverse biosynthesis pathways. 20.40 20.40 21.10 20.40 19.50 20.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.31 0.71 -5.18 461 7019 2009-01-15 18:05:59 2003-04-07 12:59:11 11 45 3624 0 1678 5749 1907 186.40 39 54.00 CHANGED KRhhDllhuhls.LllhuPlhl.l.lAlhl+l..ss...GP.lhFpQpRhGhsG+.....FphaKFRoM.hs...ss..-pt.............hptth........................c.sDs......R.lTp.lG+FLR+TS..LDELPQLaNVL.+G-MSlVGPRP.h.....tpsc...ph..h.s.....thttR.hpV.+PGlTGhAQVs....tpssh.shpp...............+lchDlhYlcshS.....lhhDlpIlhpTlhsll...h.c..GAh ...................................KRhhDllhu.h.lh..L.l.l.h.u.P.lh.l.l.l.u.lhl+l........s.t.......G.PlhFp.Qp.RhGh......s.G+...............Fp.laKF...RSMts..ss.....-pt........htp.hh..........................................................................p..s.Ds..R..lT.+........lGcFlR+TSLDELPQl.hNV.L.+G-MSlVG.PRP.h.s.........pt...h...c.....p...a.....p..................................phht.R.hpV+PGlTG...hAQls...........spssl..s.hp.c..............................+lc.hDlhY....l...c...p.h.S......lhhDlcIl.hpTlhhlh..t..p...t........................................................... 1 576 1136 1441 +1325 PF01721 Bacteriocin_II Class II bacteriocin Bashton M, Bateman A anon Pfam-B_1954 (release 4.1) Family The bacteriocins are small peptides that inhibit the growth of various bacteria. Bacteriocins of lactic acid bacteria may inhibit their target cells by permeabilising the cell membrane [1]. 21.10 21.10 22.20 25.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.91 0.72 -4.43 19 138 2009-09-13 12:44:13 2003-04-07 12:59:11 13 3 75 11 5 112 0 35.30 47 58.83 CHANGED KYYGNGVaCsKcpChVsWGpAhssIsstsssuassG .+YYGNGVYC.s.Kpp..ChVsWucAhssIushshsuahsG............... 0 2 4 4 +1326 PF04798 Baculo_19 Baculovirus 19 kDa protein conserved region Waterfield DI, Finn RD anon Pfam-B_6291 (release 7.5) Family Family of Baculovirus proteins of approximate mass 19 kDa. 25.00 25.00 28.20 34.00 24.70 23.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.93 0.71 -4.81 23 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 60 0 0 60 0 142.40 44 76.49 CHANGED hhNPapphsp+LlcsatsoLpaGsaIcVaDhp....cs-RLFlIcPENlllYNssGsLYYYLE..uSuhhCP.sEFulVRFopsDIpsINEoGhashsCTslsSLsLlEHFho..ssls-tplhLs...p.phpaoIlDlINhLIpsGYlplp ...............h.hNPappphppLlcDatsTLpaGs.YIclYDLSsss.csERLFlIcPENVlLYNhsGsLaYYLE..SusVhCP.sEFulVRFops-IpslN-oGlasssCTsVsSLsllEHFhoLKNslsDppllLsh.....c..pIpaoIlDlINaLIhpGYVpl...... 0 0 0 0 +1327 PF04631 Baculo_44 Baculovirus hypothetical protein Mifsud W anon Pfam-B_5343 (release 7.5) Family This family includes several hypothetical baculoviral proteins, with predicted molecular weights of approximately 44 kD. 25.00 25.00 29.90 29.20 17.30 17.20 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.35 0.70 -5.84 23 108 2009-09-10 23:58:53 2003-04-07 12:59:11 7 1 82 0 0 99 0 265.80 44 92.09 CHANGED LlllllllFLallY.pPlapAatpI+psQtpYspplD-ph-hhppsLpRRRYVPLcsLPslcasoshsTlst.Gph+ChSVPlhVosscTssFDCoplCDNssAsYFFVspaD+FVVNGphLspGGYCTTNSlPRNCNRETSllLaSlNQWTCIAEDPRYFAGpsNMlQlAGRQHuscIhPuplc+NVLaD+LLstpVslopNTFRpsWDElhpDGo.RRFEl+Cs.ALD.+pNpMFVNPlNPIECLPNVCTNVpaVHpsVRPsFEsG.C-CGDhslTRVpHlsssD+TShCASIVDphcpsstSa.......pFRV-ClshDoPlocasts..+LLCPsclFspNTDsAYsFsLsGs.hPlSuNGIcEPTaRhahDTRsR.lsass ......................................................................................................h..LPslphssp.hsphst.....st.cshp........sshhl.u..p....h....s..shD..CstlCssssu..hhF...alsp.sp..hVlNsphLt.GGaChssSlsR.CN.hpTS.hhlholspapCluEDPRYaAGstNh.QlAGRQH.spIhsup.s+NlLaDpLLstpVssspNTFRp.pWDEhh.DGo.RRFEh+Cs.AhD.ppN.MFlNPhNslECLPNVCT.s.VphlH.sV+P..sF..-...s...G......C-CG..t.hch.p......tohCssh....hp.................h.....Chth...h..h............................................................................................................................... 0 0 0 0 +1328 PF04786 Baculo_DNA_bind ssDNA binding protein Waterfield DI, Finn RD anon Pfam-B_6251 (release 7.5) Family Family of Baculovirus ssDNA binding proteins. 25.00 25.00 44.80 43.80 19.10 19.10 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.59 0.70 -5.23 21 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 55 0 0 62 0 244.20 27 81.91 CHANGED Wh-phlaNL..pp.tNhollpCsssh..NpLtcsLshlppphsl..pahcchhP....pht..cplslhcPp..ss+lsYplGhpV+GGhhs.FYaFDhspl+Rscu.saGcFlolpWsshhhHNplauplhtpah....thp.-.thpLpssshlslPpc.........p.sp+pshlRKFFslppcpNpplYsTGc.....L..hctlpscPaoh-cFcplFphp.scs....sScEVpMlhuulI-GhKpuKp-hphco.lss+clpEKsYSLAl+PhlFhplE ............................W.cphhhNL...pp...tN..hollp.Csss...ppLpcpls.lpphhsl..phhcc.hhP........p.s...pplslhcsp..ss+ssYplGh+lcGssps.FaaaDhsplK+scu.sa..G.cFhslpasshttaNplauplhtpah.....t.ppc.slplpsslhlslPcc...........sp+phFlR+FaslpppsNtplasTGc.....l..scs..lpspshol-cFcclFphptspp....sSpEVcMlhsuhI-GlKpuKt.-hphps....lss+chp..EKsYSLulKPhlFhhlE.............. 0 0 0 0 +1329 PF04639 Baculo_E56 Baculoviral E56 protein, specific to ODV envelope Mifsud W anon Pfam-B_5446 (release 7.5) Family This family represents the E56 protein, which is localises to the occlusion derived virus (ODV) envelope, but not to the budded virus (BV) envelope [1]. 20.60 20.60 20.70 28.60 20.20 19.80 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.66 0.70 -5.76 25 71 2009-09-10 14:49:23 2003-04-07 12:59:11 7 1 62 0 0 66 0 286.80 48 84.53 CHANGED ssPoohsl.ussphhPGYslsNspFVSsu-...lNplhRNNDlsulRplFss.sossQlNGLspLRRuDNlPDAslHuhphR+suVKssaPc.TssRT.pGVpsuLspNPRLssYL...KsAGhssLlGsGVYLlhsuAsLV.pDIlcALNRTGGSaYhpGpNGG-...sl-uClLpaRoCshshssl...ssslC........shDPLLss..spLpslCpGYNaEsEpoVCRuSDPNA-PsS.QYlDIS-LsssQTIpClEPYDhGDLIGDLGLDaLLGEsGllspSSNuSp....SlSssLhPlIllIGullhllhIGahIaKhlhppps ........................................sPoohsl.uNsphhPGYslsNNpFlSsu-...lNplhRNNDlsslRplFss.ssssQlsuLspLRRsDNlPDAslHutphR+suVKpsaPp.TssRo.pGVpssLppNPRLssYL.shKsAGhssLlGsGlYLlhsuAoLV.tDIlpAlNRTGGSYYhpGpNGG-...sh-sClLhhRTCths..ss...tslslC........shDPLlss..spLpslCpGaNa-sEpo...VCRuSDPsADPsS.QYVDIS-LsssQTI.ClEPYshGDLIGDLGLDtLLG-.cGLlspSSNsSp....SlSspLhPlIllIGullhllhIha..hlaKhlhp..s.... 0 0 0 0 +1330 PF04850 Baculo_E66 Baculovirus E66 occlusion-derived virus envelope protein Mifsud W anon Pfam-B_4624 (release 7.6) Family \N 25.00 25.00 113.00 112.00 19.10 17.80 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.41 0.70 -5.47 28 75 2012-10-02 15:11:41 2003-04-07 12:59:11 7 2 51 0 0 75 0 381.70 37 56.26 CHANGED cssVNhpslppuIphVuSscGllNPAlhSRNGopaS.sVIGpFl-Ys.uVaSADaSKVLThhocpYaGSVVGsTsclAY.....YEADssNshaAPLWAMsRRIWscs...utllsY...p............spolshESGVlL.pshsGl............hslPo.....TssSTpSFpPslupT.AlspTcssG.AMlsau+FsELN.LEFcShTLYacpGMaQLY.pltshpsshss.....suRsVVLsRDh....s.pTs.-.sas.......suSs.stoaNGVss++hsIsNh...slsshslR.s.sslp..............hlEQlIuh-slpsGsGssCYpLsVps.....sDsspshplssssh...................hhhssssslcslFcFPal.hlK-spptplo.sstsst...........lshsslpplLshlshsstsl..hss..ptssssFhhpss..s.QFhFch ...ssVNhpNlppuIphVGSscGhlsPAlhSRNGopaS.sVlGpFl...-YssuVaSADaSKlLTlRocpYaGSVVGtosclAY.....YEAD.sNshHAPLWsMTR+IWsps...utllsY.p.................................sss..lshESGllL.pshNGl............hplPo......TssSTpSFpPsluhT.AlssTc.suG.shh.au+FsELN.LEFhSYTLYacpGMFQLYspI+slpshsss......uRCVVLsRDh.......s.sss.-.sap............ssSN.htsaNGVss+HaNIs.N.......slssFslR.shssls..............hlEQIIuhcslNsGsGsuCaoLhsps.....sDsspsh+lssssh..................hlhhsssssl.cslhsFPal.llK-spspplohssssst....st..H..lshspIpphLshlshsstsl..hss.hpc.psssFhhpss.tu.QFpFph............................................................................................... 0 0 0 0 +1331 PF03258 Baculo_FP Baculovirus FP protein Mifsud W anon Pfam-B_4275 (release 6.5) Family The FP protein is missing in baculovirus (Few Polyhedra) mutants [2]. 25.00 25.00 26.10 26.10 24.40 23.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.20 0.70 -5.22 12 67 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 56 0 1 62 0 186.50 49 94.67 CHANGED M-...ppLINVslLKsLIKsEIDcsVo-NIphlssKLK+LEc-pLNDoVEIYGIHDsR.LhsKKIRs.YlKKICuLLsL-aKtVl-SsacKNHIhV+LsDAsTA+EWQo+SREhRLKNaDLsI-aDGPVKIFVAAosEHK.LLKKTRDALLPaYKYVSLCKpGVMVR+s-+S+laIVKNEpDI.pLlsp.hpuhcsts......ts.t..t......pplI ..............................ppLINVslLKsLIKspIDcsVssslphhstKLK+LEpcpLscoVEIYGl+Dp.R.L.hsK.KlR.s.YlKKI.CsLLsLsaKtVl-osa...cKNHIhlKLsDAssA+EWQs+SREhRLKNaDL...........s..I..-a.D..GPVKIFVAA..osEaK.LLKKTRD.ALLPhYKYlSlCKpGVMVR+s-+S+laIlKNEpDIp.Lhsp...th........................h................. 0 1 1 1 +1332 PF04700 Baculo_gp41 Structural glycoprotein p40/gp41 conserved region Waterfield DI, Finn RD anon Pfam-B_4278 (release 7.5) Family Family of viral structural glycoproteins [1]. 23.10 23.10 23.90 188.30 22.90 23.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.25 0.71 -4.86 21 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 57 0 0 60 0 190.70 50 57.94 CHANGED sKRF-SD-pLIcaYtpLpKchsssshsp......sIFpsSFVhS.lh.uYApKFYs+........plsEAAcpLS.ulpYQlApAVTpNpPlPLshspplsN-YlpLLhp+AsIPsNlppuls.....sps.spLN....hhssllNsll-DlFsG.tssYYh.hsLNpcsRu+VhshK-NIuaLs...P.Lot..SssIFpaluphATpsG+ ...sKRF-SD-sLIcaYtRLpKELGssslsc......sIFpsSFVhs.lLPuYAQKFYN+GuttlutsulsEAA+pLuhAlQYQlApAVTsNpPIPLPFspQLuNsYlTLLLp+AslPsNlQpulp.....SRphs+lN....hIN-LINsVIDDlFsG.uusYYh.YVLNEcNRARlloLKENluFLA...P.LSu..SsNIFpaIApLAT+uGK.. 0 0 0 0 +1333 PF03273 Baculo_gp64 Baculovirus gp64 envelope glycoprotein family Mifsud W anon Pfam-B_4223 (release 6.5) Family This family includes the gp64 glycoprotein from baculovirus as well as other viruses e.g. Swiss:P28970. 19.70 19.70 20.70 19.80 18.10 16.90 hmmbuild --amino -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.87 0.70 -6.33 6 42 2009-09-11 04:57:11 2003-04-07 12:59:11 8 2 26 1 1 43 0 366.50 45 89.09 CHANGED EHCNAQMKoGPY+IKsLsIsPPKEoLpKDlpIclsETDhsENVlIGYKGYYQAYAYNGGSLDsNTplpEsLhTlsVuK-DLLMWGlRQpCEVGE-LIDpWGSDSpsCaR-.sGRGhWV.....sGKELVKRpNNNHFAaHTCN+SWRCGVSTuKMYTRLpCss-oD-CpVpILDINGssINVotscVLH..RDGVSMILK.pKSchoRRopKlACL.........lKDDKsDPcoVTREH.........CLl-sDIFDLScNpWhCKFN+CIK....R+sEpVVKpRPpTWR+Dt.sKacEGs..oATKGDLMHlQEELhYENDhLRMNlELLHuHINpLNNhhHDLlsSlAKlDERLIGNLMsNSVSSTFLSDDTFLLMPCTsPPsHTSNCYNNSIY+EGRWVuNoDooQCIDFsNYcELAID..DDlEFWIPTIGNTSaH-SWKDASGWSFIAQQKSNLIoTME.TKFGG+sTSLpDlsshupGpLsupLtShhhGphhsalllhsVIlFLhCMlRstsR+ .....................................................................................Noth.t.h..hs.ttt-LLhWu.pppCphG-phh...DpWGSDS.sCht-..stt.Ws.tKELV+RpsN.+.FAaHhCNhoWRCGloTochas+Lpss.....sD-s..p..VhhL..sGssIslotspsLa..ccshShllK.ppophppcphKluCh.........htss.sDPc.lst.-.+..........Ch.sssha-lopspa.C+.stCh+.............cc.t..s.p.+P.pa+hs...c...h...sAohtDlhpl.ppLhYE..s-hL+hslp.hct+hsplsshhpsLl.SlAKlD-RLIGpLhspssuSpalsscpFhLhPChps.s..tsSNChssslYp-GRWsts.sDsspChshtt.p...lc.....hphhhs.ltsssh+tshpDtpGWSFlAppKpsLIpTMp.T+.GG.sTSLpslhths.G.h.tph.u........hhlhhhhhhhhhhhhp.....t............... 1 1 1 1 +1334 PF04735 Baculo_helicase Baculovirus DNA helicase Mifsud W anon Pfam-B_3393 (release 7.5) Family \N 25.00 25.00 116.10 48.10 18.20 17.80 hmmbuild -o /dev/null HMM SEED 1173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.23 0.70 -14.16 0.70 -7.20 25 73 2009-09-11 13:37:16 2003-04-07 12:59:11 7 2 60 0 0 73 0 1095.40 37 98.75 CHANGED tIhssIhpshp...pp-phsssshsss-pllh+Nssot..p++hlcsh-sFpKLlsshssptsts...........................t............ptt..h..........................tsHsWshpsNhF.shhV+PFlhtccY-p...lpcplshpcFhsopssthuNcCspuG-YhYWPN....hulsahGWplYLphpasIclsspIPLlHN+cLGsVNLFs.sschFLslEhsl.ssps..ppLFVNG+othscp.p........-cLFplshs....ssssusCKhsscLVaS.NKshFcYl+D-INLppChssscY+pllc.lDLpsLRhFsstt..sssslsst-ct..........pspppITPSSEptcpIpppIc-ClphIpEsMhcshspp-pucs..lLppYhppSsFhNFcYLIlslW+hlp+spch...pascTDI+LFlELLC...........Eplaus............ct..thcpshpcCcPYhphosplap+FCsphshFsssss.....h.uLutYaAIHahIYtKpss..........WsaThcsshp.Cslss-V.....lstGFFKKI..psspsshlFNGK.HYphVK..KDDDLhKlh-cssslslSslKFNNWKYhYhTEcGVaNVhppsY+ssCPFllGsTLhpsahc+s-..pYLscslhsaMhssscpEhsIaKhYHsAKlsR-l+hlKsNhptshhhs.sCtsCphptppcLN-LFRElWshccs-LllLulYLNp.KhsDllpNl.pC..Ct.t.t.....pCpClp.plc.....lDlpuhKlsLhhcLFhsss.plhcLhWuLla.osphYsphhtshh........sssthlcphsthaapN+p+IlchLapplc+l-alcphh.chssscphlp..plps....................................sscsDshp.cpp....................phlpsFatpYspshplLp+a.NVWWDKLIltRp.sDDLsoWLTRFYhRlhhoK..lslpshsptalpplVpGYLYFRhFTNFNhsNShslhHFsASLuIPoDYEKhClYLsGcPssGKSShFELL-p.lllVHKpDs-pYshsK.+ETsEhEscKh.SQLYVINEhKhCs-oFFKopADSTKSsSssRKYpGppKYEANYKlLIlNNc..PLaIsD.YDKGVpNRFullYhDHpFp.-sh.FsGSVYcHhcsK+YP.E+shh-pLtsPVRlFLuHlLhY+RsP+DGYV.YKslLpsDssapHNLtCLslNNoslpALlYVLpV+...c.us.........thlsEsKlpchIchAssaVcshlH.ph+.....Kpss..........tphLhs-FK+Kap...KaYsscsKhahNLsMAhscpDFNhshPpFKs ......................................................s.Ihpplhpsh....p.c.p.shpshtsssplll+NstTt..p++hhcphp.Fppllsshpsp.stst................................t........t....t.t..h.................................................sHsWshpsNhF.shhV+PFlhp..cpY-t...lp.c..tlshpcFhtSppstasNcsspuGDYhYWPN....huloahGWplaLhhpFsIslsshIPlhHp+pLGsVsLFshsPc......h....FLslEhsl.sssc...ppLFVNG+ohFscp.p........-sLF.lphs....ssssusCKhtscLVhS..sKshFcYI+DsI.NLppClTsscYppllp.lsLppLRhFspps.hsstshsttpph..........phh..IosSSEphc.lpppIspslttIp-tMlcshsppp....tsss.plL......ppYhppSsahNFcaLlhllW+hlp+ppch..papcTDIKLalELLC...........-pl...ass..............ct..shppsht+CcPYhphshplFp+hCsphphFps.s......h.sLuhYauIHahIYhchss............WsaTacsshs...CtlsspVlstGFF+Kl..hsspsshVFNGK.HY..hVK...KDDDLaKl.hppss.shplsslKFNNWKYhYhTcpGVaNlhpspa+ssCPFllGsTLhpshpchs-..pYLPcsshsaMLssucpEhpIh+hYHhAKlCRDl+hl+sNhthh.hhs..sCtpCp.ptp.pLNplFR-lWshscppLlhlulYlNc.KhpDllpNl.+C..Cptt...........pCpClp.plclshhAhKlsLhh-LFssss.pl.pLhW.Lla.ssphYsphhh............sssphlpphsphhhpp+hcIlphLapplc+l-al-.hh.phss.phhlp.lpp....................................................ttpst.t.pps...................phlppFatpYspshplLpKa.NVWWDKLIltRp.sDsLsoWLTRFYMRlhho+..hs..lpsYs..alp..plVpGYLYF+haTNFNhsNShhhhHFsASLuIPoDYEKhslYLsGcPsSGKSSFFELL-p.lllhHKpDs-pa.shs.p.+-Ts-hEssKh.SQLYlINEhKhCs-SFFKspADSoKssSssRKYpG.pKYEANYKhLIlNNc..PLa..lsD.YD+uVpNRFsllYhDHpF..-sh.FsGSlYpHhhsKpYP.E+shh-p.LtpsVRlFLuHlLhY+RsPpsGaV.YKslLpsDshHpHNLhCLslNNoslpALlYlLpl+...csus.........thlsEpKlpchIphAs..alcphL.Hsthp.....Kphs..............tphLhspF+cKac...+hYpt.csphahNLsMuhscpDhshs.hPpF+s.................................... 0 0 0 0 +1335 PF04838 Baculo_LEF5 Baculoviridae late expression factor 5 Finn RD anon Pfam-B_5141 (release 7.6) Family \N 25.00 25.00 27.80 50.60 20.60 19.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.19 0.71 -4.47 20 57 2009-09-11 05:38:18 2003-04-07 12:59:11 7 1 56 0 0 56 0 153.60 56 58.32 CHANGED shsLFplFpcFRcscsYpcLIpaLlpNYPsNVKNKTFNFsNTGHLFHSLYAYlPulos..KERKQIRLp.-CIcKLFpNTpNDFKLYsElFchlp...spth.ppCPCpLlppRhpphpsYVcslpsKpFDsKPsKLKKEsIDsILaKYSlNWKslLhKKKh ....hsLFhlFpcFRpppsYppLIcFLlpNYPsNVKNKTFNFsNTGHLFHSLYAYlPulosh.KERKQIRLp.-ClcKLFsNThNDFKLYsEla-hIp...ppth.ppCPCpLlhp+hpphpsYVcslpsKpFDsKPPKLKKEsIDsILaKYSlNWKslLhKKK..... 0 0 0 0 +1336 PF05073 Baculo_p24 Baculovirus P24 capsid protein Moxon SJ anon Pfam-B_6005 (release 7.7) Family Baculovirus P24 is associated with nucleocapsids of budded and polyhedra-derived virions [1,2]. 21.30 21.30 21.40 38.90 20.40 20.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.32 0.71 -4.72 26 59 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 53 0 0 55 1 180.10 39 86.02 CHANGED FpYssc.slEVhIIpN.....s-sDpDGYlElsAAA+LLuPhsp.+shsss.LWsNstsSaKLp+NNKNYlHsFuLsKYLSsYshssp..p.spYhsLKpLlsDLlhGsQsp.....shDP.......Ls-IKsQLCslQEslsps..........................................................................tsppsshhuslsuhL-hlKo.lpsDlssKlsFu ...FpYssc.slEVhIIpN.....s-sD+DGYlELoAAu+LLuPhlp...hstusLWsNstsSaKLp+NsKNYlHsFuLsKYLSsYshssp..p.s.pYhsLKpLlsDLLhGsQup.......lhDP..............Ls-IKsQLCslQEsLsps.sths...............................t...........s............p..p.htphlpsc.ts.p.hhs.shsshL-plKs.h................................................ 1 0 0 0 +1337 PF04766 Baculo_p26 Nucleopolyhedrovirus p26 protein Waterfield DI, Finn RD anon Pfam-B_6066 (release 7.5) Family Family of Baculovirus p26 proteins. 19.10 19.10 21.80 21.80 16.90 19.00 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.53 0.70 -5.12 25 100 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 55 0 0 98 0 209.60 31 74.22 CHANGED hsVpY..sVsasp+plslhphcscsVpl+VhsPttps.....tD-s..Lstl.HpFPGVATsllFPpl.ppsstlpVhLss....GsLhcspss..+lahNaHsap+RhVYGQLsulslss...plts+lYlGAPIFpss.+.LVSVVTspa.sDhtcshslhPVTGhRtsuhlSGphphDst...VhVpchcsshSVYGppQLPY..........tlKtaAhst.sst...tshscslhlFas..cc-VpIslscGpFEIs+lRlsGPhlss ......................................h.lp...p.h......t....h.hh............p.t...................h-pl.HpFPGluSs.slhsp.l..pptoslpVh.sc.....h..hchhhs....+hha.saHphsKRalaG.LPshtss-......hh.hLhIGuP....................Iacpc..p..hlSlVTtRa..cs.t...tt.h....haPloGl...GhhSGplsl-ss...lphcpLcsGhuVYG+hQhsY............slKphAhphs...........shhlhhp..pp.l.lsh....s.thph.+hRh.s.hh.......................................... 2 0 0 0 +1338 PF05214 Baculo_p33 Baculo_P33; Baculovirus P33 Moxon SJ anon Pfam-B_6583 (release 7.7) Family This family consists of a series of Baculovirus P33 protein homologues of unknown function. 25.00 25.00 63.60 63.60 22.70 21.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.75 0.70 -5.10 18 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 57 3 0 62 0 241.80 45 97.07 CHANGED Is.TPLhtRYKsSFhLasFRhLDhhRsuPSpcLpplLspElpYLYplsClIsY+-sQps-l-pLhpWshsLss-.hcL-phKhhah-KhppLNL+shpPpcYsaoFoTIWDoIHaLsLlsDDMVtNR..cphsh-hlptpl+phKhlFYNlFhhL.CshCpcHYLTVcuF.haplERIElALaRE+.hGEslhhVD.......E.lstspsscNlLh+athLYsSMlFHNHlNsYR.IQ+Nhcs.s..NapRM-WshYKpLLsl .................................IP.TPLhs+YKDSFhL...asFRhlDhlR.su.SpcLsplLusElTYLYclACLItYKDlQppElppLhpWuhslstc.hcL-Qh+lhFh-KhpELNLRuhQPKsasYoFoTIWDoIHFLuLllDDMVtsR..cKhsh-hltppL+phKslaYNlFFhLpCshCRcHYhsVKGalIhpIERIEluL...RE+......aGEsIhhVD.......................-..hstsssscNlLMKpthLYsSMlFHNHINsYRaIQ.............RNhcsPs..sap+Mc.........WspYKphLpl............................. 0 0 0 0 +1339 PF02961 BAF Barrier to autointegration factor Griffiths-Jones SR anon Structural domain Domain The BAF protein has a SAM-domain-like bundle of orthogonally packed alpha-hairpins - one classic and one pseudo helix-hairpin-helix motif. The protein is involved in the prevention of retroviral DNA integration. 25.00 25.00 25.10 33.00 24.40 20.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.86 0.72 -3.68 9 197 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 94 13 127 159 0 83.60 53 87.26 CHANGED MsoTSQKHRsFVuEPMG-KsVssLuGIG-sLGt+LcspGFDKAYVVLGQFLlL+KD.E-LF+-WLK-oCGANu+QAs-CasCL+EWCssF ..........M.sTSpKHRsFVuEPMG-KsVspLAGIGcsLGp+LpcpGFD...K.............AYlVLGQFLlLKK-.E-lFp-WLK-.sC.GAssKQupDCatCLp-WC-tF........... 0 50 58 88 +1340 PF05112 Baculo_p47 Baculo_P47; Baculovirus P47 protein Moxon SJ anon Pfam-B_6441 (release 7.7) Family This family consists of several Baculovirus P47 proteins which is one of the primary components of Baculovirus encoded RNA polymerase, which initiates transcription from late and very late promoters [1]. 25.00 25.00 65.80 65.20 19.90 18.40 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.09 0.70 -5.03 21 58 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 57 0 0 59 0 307.50 47 78.87 CHANGED hFsphhphsSQhLPQCCKYLscsLshYhLa...hp....ths.stshahlccslpVctEGFVphppphVhFclspl.sp...uTPcDL-hYlcsoR..pl.osHctplhcLlh+DRWaKGDFsRL++hLs.pDsssLlsFsCNsLWERGYEsaYTLGQQLSIRITTKLIQSGLDFKH.Qsssss.....ts...............RGWssptFEKhluSIoSlSDlIKRHKhSpKYIlLElssspssphlchLhcppFslIpNsphsNVChIpl.D-DKNShpYLpKLupLIpp+llNVLFVTDlEaYl+pssahFYLYNSLKFYYYCLKNKFVF-hpDYEhlFL ....hFsphhphpoQ.LPQCCKYLs-sLshYhLY....hp....shss.ststhhlscslpVcs-GFVphppshVhFclspl..sp....uTPcDl-pYlchTR..sL.osHDtpllKLlh+.DRWaKGD.hsRL++lLppp-Vs..sL...lpFsCNVlWERGYEsHYTLGQQLSIRITTKLIQSGLDFKHQ.sssss..s.ss................RGWsstsFEKhlsSIoSlSDlIKRH+hupKYIsLElsssphsphlctLhpppFsllpNsphsNlChIpl.D-DKNSh.YLpKLspLlpsKllNlLFVTDVEaYh+pspahFYLYNSLKhYYYCLpNKFVF-tpDYEhlFL... 0 0 0 0 +1341 PF04878 Baculo_p48 Baculo_P48; Baculovirus P48 protein Mifsud W anon Pfam-B_6510 (release 7.6) Family \N 25.00 25.00 56.10 55.70 19.70 19.60 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.47 0.70 -5.52 23 56 2009-09-10 15:38:45 2003-04-07 12:59:11 8 1 54 0 0 53 0 376.70 46 98.32 CHANGED ppaplpYsLRFpK.....tsshppVsFpspLopsEIDSLsFLhucYFDQpphlsl.KGLTFFsEFNKCI-sIKpsFEs+p...-ss.-VKpIFslFL+cEFhsQVPsFphIMpYLppYYKPlssP.slst.l...Cs.pCs.hs+lpCLpCKssYlSsulohhDsulQ-GWDIFLRPMhGlPLhhalLlKT-as..cs-V.FNsDslITNsFsQFFYNLLCDKAsshYhsaKtCpPLlc-C++sshuLpsc-lEhLLshL.Nssohso.......KLasPFKpFM.chshpTKlK..KlNKlAuslFhGFYLRhYLEAtssK.......................shsssELElRNVCRhIh+cYs-cpFEphlpKLpsIKtDLhh.lhpphIVPEphIR+LhsKYsLDpDluhLLppsV .....s.hplpYsLRFsK.....hcshpsVsFpspLopsEIDSLsFLhS+YFsQpphVsl.+GLTFFsEFNKCl-sIKpsFEs+p....-Ns.....-lKpIFSlFL+.cEFhsQVPpF+pIMpYLppYY+PhssPslstl....Cs.pCs..........hs+...........l..pClpCKssYLSsulSshDsulQcGWDIFLRPMFGlPLhlalLl+T-as..csslFNsDs....LITNsFsQFFYNLLCDKAsstassa..KtCpPLlc-C++sssuLpsp-hEhLLshL.Nssohso.......KLasPFKpFMhchuppTKlK..KlNKlAusVFhGFYLRhYLEutssK....................................shosuELElR.NVCRaIhp.cYs-cphEphlpKLppIKtDLh..lhpphIVsEpaIR+LhsKYpLDp-luhLLppNh.... 0 0 0 0 +1342 PF04583 Baculo_p74 Baculoviridae p74 conserved region Waterfield DI, Finn RD anon Pfam-B_4744 (release 7.5) Family Baculoviruses are distinct from other virus families in that there are two viral phenotypes: budded virus (BV) and occlusion-derived virus (ODV). BVs disseminate viral infection throughout the tissues of the host and ODVs transmit baculovirus between insect hosts. GFP tagging experiments implicate p74 as an ODV envelope protein [1,2]. 19.40 19.40 19.50 20.40 18.30 19.30 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.58 0.70 -5.18 25 93 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 75 0 0 91 0 236.70 46 37.84 CHANGED s..psss-s-.LEsIIspFLEDauLlhGIhTslGF-hL.sslcsMLK+INosLIPtLK+hLLsoS+RlTsRLLGETYKAAllHshNRhAIKTlSsVAKAhsRhuhpAuSVlGIlLIhlTIuDLVLhlWDPFGYsNMFPR-a.DDLSpoFLoAYa-Sl.sssoRDlIEFhPca...Fs-lV.....-..pD-.......hhh-ohhall-YluuLEVNSNGQhLphscG-sIsD..FDEtoLVGuAL.ASouhYTph-FhtYTpRHNclLh...ssp .........................................st...s-pcL-sIIspFLED...aullhGIhsshGF-hLhsshKsMLK+INosLIPhL+phLlssop+VTsRlLGE.TYKAAll+shN+....lAIKTlossAKAhTRluIpAu...SVlGIlLIlhTluDLlLslWDPFGYNNMFPR-FPDDL....SpoFLoA...Ya-Sh.s.psoR-lIEFhPEa.Fs-hl........-..s--...p....hhhpohhalh-YluuLEVNSsGQhLphpcu-sIpD...FDEhoLVGs..AL.ASSuhYT+h-FhpYTtRpNpll...p.................................... 1 0 0 0 +1343 PF04513 Baculo_PEP_C Baculovirus polyhedron envelope protein, PEP, C terminus Kerrison ND anon DOMO:DM04337; Family Polyhedra are large crystalline occlusion bodies containing nucleopolyhedrovirus virions, and surrounded by an electron-dense structure called the polyhedron envelope or polyhedron calyx. The polyhedron envelope (associated) protein PEP is thought to be an integral part of the polyhedron envelope. PEP is concentrated at the surface of polyhedra, and is thought to be important for the proper formation of the periphery of polyhedra. It is thought that PEP may stabilise polyhedra and protect them from fusion or aggregation [1]. 38.00 38.00 38.00 38.40 37.80 37.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -10.97 0.71 -4.35 23 57 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 54 0 0 54 0 137.80 34 43.32 CHANGED lsslsQL..slssuNQalELoNhLsAI+hQssplhupLssll-slpspLsslss-lppLlsplss......clsshsssLssAlNpLp...-slRN-LTslNSlLsNLsSSlTNINuTLNNLLpAlsul....shGslsphhsshhss.........lppllsh ...........hsslNQl.....slNsoNpalELoNhLsul+hQNsplhutlsphl-slpspLsslss-lpplls-lss......pLsshsssLssAlNplp...cslRNELsslNSlLsNLsSSlTNINuTLNNLLpAlssl..........shG.slss...thpshlsp.........l.pllp...................... 0 0 0 0 +1344 PF04512 Baculo_PEP_N Baculovirus polyhedron envelope protein, PEP, N terminus Kerrison ND anon DOMO:DM04337; Family Polyhedra are large crystalline occlusion bodies containing nucleopolyhedrovirus virions, and surrounded by an electron-dense structure called the polyhedron envelope or polyhedron calyx. The polyhedron envelope (associated) protein PEP is thought to be an integral part of the polyhedron envelope. PEP is concentrated at the surface of polyhedra, and is thought to be important for the proper formation of the periphery of polyhedra. It is thought that PEP may stabilise polyhedra and protect them from fusion or aggregation [1]. 23.90 23.90 25.30 68.90 20.80 23.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.10 0.72 -4.00 16 82 2009-09-11 22:40:04 2003-04-07 12:59:11 7 2 53 0 0 79 0 113.00 28 41.17 CHANGED -sssVPlha..ssh.slWVGA-ElLpIL+ls.sstlp....slPpscKphhpcLps............ss-usKhFlTtlGlulLsuRs.s...........................................ctushhsshFlsDshp-hp.......ssp.hC ....psssVslhap.ssh..shWlusDEllplL+hs.tsshp....slPppc++hWpDhps...........sshsusKhFlshhGlulLssRs.s........................................................................phschhhshFls-shhphhtp................................................................................................................ 0 0 0 0 +1345 PF04501 Baculo_VP39 Baculovirus major capsid protein VP39 Kerrison ND anon DOMO:DM04345; Family This family constitutes the 39 kDa major capsid protein of the Baculoviridae [1]. 21.60 21.60 22.20 49.60 20.00 21.50 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.01 0.70 -5.42 16 65 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 0 63 0 287.40 41 92.57 CHANGED MALVssGssssRhp.NaCIFuuV.pP..FDsCtsYpSPCSsDAosDDGaFICpYHLShcFKMEKMsLPIPDu-ss..tYhRTlG+SLVsHpspuscRILIPTpsNYpoVLNlsuhsluEQLIhHhIYsNpsp.....hscICppL+hsE.pFps-hhplVEplYssTtplLuhTsPstYCopVss.sssRhassss..............ts.....ssu-psasshPsFl+NLIs+sVAPEshpIs....scsLhLRNCsTCtIs.ssGLVAss.......cLYNPVcP+YhhttN-NhLpIcNVLKFcG......NusALQ+.sLuRYEpYPlhVPLhLGpQllsops .........MALhsss.tspchp.NhCIFtul.ps....FssCtsYsSPCSsDA..sp..c..DGaFICsYHLuphFKhpKhslsIPDu..csN..papholG+SLVspp.s..pu....pRILIP.opsNYpsVLNlsshs.sEpLlaHhIYsNpsp...........................pplCptLphsE.pFpssh.tllcplassTpslLuhssPst.aCupVsp.sssRhassss.................sp.....s.sppsasshPsFLpNLIp+sVAPchhpIs....spsLplRNssTCtIs.ssGLVAss.......pLYNPlpPphh...p.hp.p..NpLpIcsVLcFcG......su.ptLQ+.sLspYEpYsl.lPLhLGpphlsp..s............................ 0 0 0 0 +1346 PF04913 Baculo_Y142 Baculovirus Y142 protein Mifsud W anon Pfam-B_6688 (release 7.6) Family \N 25.00 25.00 30.60 30.00 19.10 18.20 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.57 0.70 -6.13 17 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 57 0 0 58 0 445.90 43 96.79 CHANGED hsL-cc.pLKYLFLuSYFcL.ts.phlss-scsFIt.-YlcsNFp.ls-ssLlpYlsYLpshpLKpll.sDposshFKYlKPQF+FlCsRc.slDIlcFD.s+lYlpPsTslYATNhFVpsPppFphhlY..ptFs+Vas.cRpFVsssppasl.hsGspGalF-cuYlDWsGl+hCpsspl...psspaPYRLYLlGEtMAp+FlcpNI.hs...........pschlLKNFaKGLPL...h+ssaclINSK+FsTcKPNclF-EhppELsspssalKFIQRDYIYDA.sFP-DLL-LL.N-YhTpTSlaKaIpKFh-sp...phts..hsEIVlDRYuVs+YRKh.l+h-ssshaP.shphspsuYIFlpsDhlQI+GTLNAFYlP+ppll.ILAsNsLFGusclL............cFD..p.LlsYppsssPh+lst-hYhlstppKlYLs+ahFus.slPAYLlIRGDYE..oSp.....hKsLc-LpNsWVpNTLLpLhl ................................................................sL-pc.phKYLFlsoYFcL.ps.s...h...lss-spsFIp...pY...lpsNFp.ls-tsLhpYlsYLsphpL+pll.s-p.ssshFKYlKPQF+FlCsRs.slDIlcFD.s+hYI+PsTsVYATNhFVssPpchhhh..lY..scFs+Vhp..p+hFl...s.sssphsl.lsGssGalF-sAYlDWsGV+MCsss+l.....psspaPYRLYLlGEtMApHFlc..sNIhhs..........sssshhLKNFYKGLPL...h+spapllNSKKFsTcKPNcl.F.sElcpELssp....ssalKFIQRDYIYDA.pFPsDLL-lL.N-Y.hTpTSlaKhIsKFhppp.......p.sshhpEIVlDRYuVs+YRKL.IKh-.sshhP.shtts-suYIFlpsDl.lQI+GTLNAFYsPp.plh.ILAsNpLFGuTclL............cF-..p..LlsYppspsPh+lss-sYhVstppKlaLs+ahFus.sVPAYLLIRGDYE..oSc.....hKoLc-LpNsWVpNTLLpLhl................. 0 0 0 0 +1347 PF04684 BAF1_ABF1 BAF1 / ABF1 chromatin reorganising factor Kerrison ND anon DOMO:DM04689; Family ABF1 is a sequence-specific DNA binding protein involved in transcription activation, gene silencing and initiation of DNA replication. ABF1 is known to remodel chromatin, and it is proposed that it mediates its effects on transcription and gene expression by modifying local chromatin architecture [1]. These functions require a conserved stretch of 20 amino acids in the C-terminal region of ABF1 (amino acids 639 to 662 S. cerevisiae (Swiss:P14164)) [1]. The N-terminal two thirds of the protein are necessary for DNA binding, and the N-terminus (amino acids 9 to 91 in S. cerevisiae) is thought to contain a novel zinc-finger motif which may stabilise the protein structure [2]. 24.90 24.90 25.20 25.10 24.80 24.80 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.98 0.70 -5.65 2 57 2009-09-13 16:19:24 2003-04-07 12:59:11 8 6 27 0 35 55 0 198.70 24 64.86 CHANGED MSLYEYpcPIINKDLAtsDPV.uQpRoFPTLEAWYDVINDYEFQSRCPIILKNSHKsKHFTFACHLKSCPFKILLSaQGs.sSspstDGSPtshsGDttuppptpN........HpNGHTN....utDshuEpE..pDDEDDDAAVTAAIAAAVAAVADSQETIKGPFsVTKIEPYHNHPLESNLSLQRFVLoKIPKILQVDLKFDAILESLCND-DNTVAKFRVAQYVEESGIlDIIKQRYGLT-AEMDKKMLSNIARRVTT.KARFVLKRKK-GVYhhPsuHQloGuDpHQhQhpH......pHQHQ....pQ+phQHpHQpQpQp.QHQpQHQpHVsssppVYQsRItS.SDpD-uulHNLDDsNVRV.AAAAAAAAAALQSR-sHsoE-LKhshtpsQD--ul-sss.sSKRQLHRp-RDRVAEALKMATRDILSNQsVDSDVNVDVDLVTGHKQLSPHDDMAEQLRLLSSHLKEVEAEENVSDsNLKKDDl.DENIQPELRGQ ............................................................................ss....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pt...t............................................................................................................................... 0 3 17 32 +1348 PF02179 BAG BAG domain SMART anon Alignment kindly provided by SMART Family Domain present in Hsp70 regulators. 23.40 23.40 23.40 23.40 23.10 23.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.53 0.72 -3.87 63 795 2009-01-15 18:05:59 2003-04-07 12:59:11 11 16 274 24 498 779 0 77.20 24 25.51 CHANGED plpplhsclpp.ltsplpph.........tsp...ppcpchhhls.EhLhpt.LlcLDul.p..spGp......slRptRKphl+clQshlcpLD....thp .......................................................lpplhpclpp.ltspltph....................................ts.p...pppcphhtlp..EhLhp..LlcL.Dul.c......spGpt.......slRptRKphl+clQsllcpLDth.t................. 0 102 206 335 +1349 PF02923 BamHI Restriction endonuclease BamHI Griffiths-Jones SR anon Structural domain Domain \N 22.80 22.80 27.00 27.00 22.70 22.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.01 0.71 -4.30 3 18 2012-10-11 20:44:43 2003-04-07 12:59:11 10 2 15 9 8 16 2 136.40 41 74.24 CHANGED M+VcNcEILl-sGclssc...IpSIhsEVcsSIcsslWPAsScsFsINsT+Ku.NGVKPIK-pCMpHLc.paGWaLEK+LDIhKs-pKP.GPIDAVK.l....usKtFAlEWETGNISSSHRAINKMlLGMLcGcIIGGILILPSRpMYsYLTDRVGNFcELcPYF-l ........................hcl.pp.hh.pttp..h...pp...htphhs-lhpuIps.haPssspsFhlNs..scKs...NGVhPIKp.ChphLc.phsWhhEp.lcl.hhptp..GPlDshK.h....p.+hhuhEaETGNISSuHRuhNKhlLGlhcG.l.hulllhP.+phh...Y...LTDRluNFcELEPYFp.......................... 0 4 6 7 +1350 PF00373 FERM_M Band_41; FERM central domain Bateman A anon Bateman A Domain This domain is the central structural domain of the FERM domain. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null --hand HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.53 0.71 -3.95 56 4264 2009-09-17 09:40:39 2003-04-07 12:59:11 13 246 174 84 2246 3702 4 127.30 24 12.59 CHANGED -.t...hp-shphphhYhQs+psllp...sch...........ssp.-pshhLAuLthQhphG.chs.tpptsts..htt..th...................................................................................................................................................................................ls....................................hhpphppcp.hppclhptap.phps.........h..ottcAch....palphspp......L.stYGsphF ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hp-phs..hhhha.l..Ql.+pcllp.......Gcl...........Cs....css..hhLAu..htl.Q..s..chG...Das...pt.p.tt..s..hh.tpt.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhP.................................................................t...h...p...s..c.........p.......h........cc.c.lh....phap....ph.p.u..............................h.....o.tc.Ach.........paLp.h.sp.p......L.thYGlpha...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 564 750 1407 +1351 PF03114 BAR BAR domain Bateman A, McMahon HT anon Psi-blast P25343 Domain BAR domains are dimerisation, lipid binding and curvature sensing modules found in many different protein families. A BAR domain with an additional N-terminal amphipathic helix (an N-BAR) can drive membrane curvature. These N-BAR domains are found in amphiphysin, endophilin, BRAP and Nadrin. BAR domains are also frequently found alongside domains that determine lipid specificity, like Pfam:PF00169 and Pfam:PF00787 domains in beta centaurins and sorting nexins respectively. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.52 0.70 -4.85 28 2173 2012-10-03 12:17:00 2003-04-07 12:59:11 13 82 313 19 1287 2310 1 214.10 18 40.80 CHANGED hpKp....hsRssQhhppKhGtuEpTc..DccFcphEc+hcthpptspclhccsptYlps..shtscph......................hLupshhchucphupcss...u...........sh.phupshcpl.uphhcshptplcpshlcPlpphh.sphpslp+plcKhps+hLDaDsp+p+hp+hp...t.................--EhctAppca-chs-.lpppl.slhstcsphl.splpshlptphcaappshphLpplptpltt ....................................................................................................................................................h..p.h...t...p..h..s.................s...p.t...Tp......-.......t.......p.a.pp.h...p..p..hp..hpp.........spcl.cph....p....t....h...lp...s..........s.h..ptpph.................................................................................hlup.sh..c..h...h..pth..s.......ttpt.......s.................................................................hh....ph.spshc..pl....sp....h........h......p..p.h...p........tp...............l...p...pphl...p.PLp.phh..spht....sl.......p..c...............th+.......K....hpp...+...h.......l.......D.aDt....t+p.+.h..p........p.hpt..tp..t...........................................................-cElc..pA..p.ppa-...phs.......ptl.....p.p.......cl........p........l...h...s..t.c.h.phh.sp.hps.h.h.....p...hp....h...p....aappsh....phhppl.....t........................................ 0 333 558 919 +1352 PF01337 Barstar Barstar (barnase inhibitor) Finn RD, Bateman A anon Sarah Teichmann Domain \N 21.00 21.00 21.30 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.93 0.72 -4.14 52 1251 2009-09-10 16:56:04 2003-04-07 12:59:11 13 10 1143 47 253 726 26 85.60 29 72.31 CHANGED ltlDhsphpscpslhptlupph.sFP.saaGpNhDALaDsLosh............sphshhlhhpthsphpph..cphtt...lhplhc-spcph...ss.t..hp .......................hhDhsclpspp-hacphupsh..uhs..pth..G....c.NLDuLaDsLts..................hhshPlpI.asphsp.t..p+............c.c..a.ss...Ll.lhc.-.Ap-ch...tt......thh......................................... 0 59 152 209 +1353 PF00967 Barwin Barwin family Bateman A anon Sarah Teichmann Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.95 0.71 -4.43 8 153 2012-10-01 21:39:58 2003-04-07 12:59:11 12 6 58 2 47 226 1 108.50 60 69.81 CHANGED SAoNVRATYHlYNPtp.NWDLs..AsSAYCAT..WDAsKPhuWRpKYGWTAFCGPuGPRGQuSCGKCLRVTNTuTGApsTVRIVDQ..CSNG....GLDLD.ssVFpplDT.sGhGYQpGHLsVNYQFVsC ...........................................AsNVRATYHhYsPtp.......sWDLs..ssSAYCuT..WDAs...KPLu.WR.pKYGWTA..F.C.G...P...s..G......PpGpsuCGK...CLp......VT.....N.....T....u...TG..Aps.........TsRIVDQ...C....SNG.......G...LDLD.ssVFpplDT.sGtGhppGHLhVsYpFVsC............... 0 6 28 38 +1354 PF04865 Baseplate_J Baseplate J-like protein Mifsud W anon Pfam-B_4777 (release 7.6) Family The P2 bacteriophage J protein lies at the edge of the baseplate. This family also includes a number of bacterial homologues, which are thought to have been horizontally transferred. 20.50 20.50 20.60 20.50 20.10 19.80 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.59 0.70 -5.03 136 2290 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 1421 0 441 2037 229 212.60 22 56.27 CHANGED pustL.Dphuth.hs..ltR..hsss.A.ssshh.sss...................................................................................................................usshEsD-s.......hRpRlhhshcshss....uGspssYphaAh...........................ss.ss..............Vscutshss...............u..............................sGs..Vplslls.p...................su......hssspllst.....................Vpshl...........pt-sl..........RPlsc.pVpVtusphhshslssphhhh..sss.spsht.sspps...lpsahtstpph..........Gts...lhhSt.l..hus ..................................................................................t....Lt.hst..hs..h..Rh.ht.u...p.s...h...h.........................................................................................................................................................................t............................................................Gss.....hEo.....Dps.......h.RtRh.t..s..hc.s..hss....uG.s...p..s.sYpha.Ah............................ss..st..........................Vs.cstshss............u...............................................................................................sus.....Vp....l.hllsp..........................su..........hsspsllst..........................................................................Vpphl.........................-sh....................+Pl....s.....s...pl..pV.tu.sthhshplpspl..hhh...sss.p.......sp..sht.....shp...tt........lpta.ht.t...th...........stt...l..stl..................................................................................................................................................... 0 128 281 374 +1355 PF01586 Basic Myogenic Basic domain Bashton M, Bateman A anon Pfam-B_427 (release 4.1) Family This basic domain is found in the MyoD family of muscle specific proteins that control muscle development. The bHLH region of the MyoD family includes the basic domain and the Helix-loop-helix (HLH) motif. The bHLH region mediates specific DNA binding [1]. With 12 residues of the basic domain involved in DNA binding [2]. The basic domain forms an extended alpha helix in the structure. 25.00 25.00 27.10 26.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.47 0.72 -3.26 7 456 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 147 4 145 408 0 84.90 40 34.53 CHANGED MELh.s....hs.......hF...s.pp....FYDupsh.sschp..hhEsht...sp.ssLp.........P-s.ptppE-EHltAPs....cHpsG..pCLhWACKsCKRKosssD ...................................................................................................................................................................F.............ah.-...s...sh..sc.hp...hh-s..s....h.p....s..sL.p.........................s-.t....p...s.s....p...--..E...HVtAPs.............s.pHpsG....pCLhWACKsCK.RKosssD... 0 21 30 72 +1356 PF02028 BCCT BCCT family transporter Mian N, Bateman A anon IPR000060 Family \N 20.10 20.10 20.30 20.30 19.80 20.00 hmmbuild -o /dev/null HMM SEED 485 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.54 0.70 -6.25 145 4820 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 2128 17 801 3301 1081 459.80 34 85.74 CHANGED VFhhSshllhhhllhsl.....hh.scthpshhsshhsalsspFGWaYllssshhllhslhlAhS+.aGpl+L.GsscscPEaSthoWhuMLFuAGhGlGLlFaulAEPlhHatss................P...ts......psto...pAA.p.AhthoaFHWGlpuWAlY.ulluLulAYata+.+shP................hplposh.hPllG.c..+.hpGs....lGcslDllAlluTlhGlATSLGhGshQlsuGLstlh.G.......l...s.s......shss.plhllsllsslhslSshoGls+..GI+hLSplNlhLAhhLllFlllhGP.ThallsshspslGsYlpsh............hthohhh...ts...hs......sss.................................................WhssWTlFYWAWWIuWuPFVGhFIARISRGRTIREFllGVlllPslhshlWaulFGuoAl.........thph.....puss.slss.............sttss.ssulFshlpph.............Phutlh....uhlsllLlhlFalTSuDSuohVluhloosG...st..sPsthh+lhWulhhuslAhs.LLhs..GG.......LsuLQsssllsALPFsllhllhhhuL.....................h+sLpp- .............................VFhhoh.h.ll.h.h.h.s.h.h.s.h.....h.h.s.c.....t.stthhss..................h.hsalspshGWhallhsslhlhhsl.hl.s.....hSc.aGpl+.L.G..t.p.pcPEaShhSWhuMLFuAGhGluLlFaGsAEPhhaahs.......................................P......sh.........ps.to.tpAh...p.AhthohFHWGlpuWAlY.ulsuLslAY.Ftap.+s.h.P................h.p...lpusl.hPllG..c....+....h....p.G........l.G.chlDlhullATlhGlAToLGlGs.QlssGLphLa..u.......................l....ss..............shth.phhlIslhshlhs..lSshoGlsK..Gl+hLS.plNhhLAhlLll.hl.l.....l.....h.....G.....P.....T..hal...hs..sh...ssslGs.Ylp.s.h..............hph.oh.t..ts...hs.....pss.................................................................W..h.p.sWTlFYWuW.WluWuPFVGhFlARISRGRTIREalhGslllPshhshlWaulFGs.sAl......................hh..h..............ps..hh....slsp...................................ttss..ptu.la.thhpph...................................P.h..u..p.lh....shlshllhhlFalTouDSuohlluhhosps......................s..cs...s.hhhRlhWulhhu...............lluhs.LLhs......GG...........LpuLQsssllsulPFshlhlhhhhuhhKslpt................................................................................................... 0 247 506 695 +1357 PF02327 BChl_A Bacteriochlorophyll A protein Mian N, Bateman A anon Pfam-B_38317 (release 5.2) Domain Bacteriochlorophyll A protein is involved in the energy transfer system of green photosynthetic bacteria. The protein forms a homotrimer, with each monomer unit containing seven molecules of bacteriochlorophyll A. 25.00 25.00 234.40 234.10 19.20 18.50 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.33 0.70 -5.83 2 70 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 28 4 12 70 0 290.90 78 98.43 CHANGED TTAHSDYEIILEGGSSSWGpVKGRAKVNVPsA.PLLPsDCNl+IssKPLDstKGhVRFoshIESlVDSsKNpLsVEsDIANETK-RRIsVGEGSloVGDFSHSFSFEGpVVNhaYYRSDAVRRNlPNPIYMQGRQFHDIlMKVPLDNNDllDTWEGh.pulpusGu.FsDWIREFWFIGPAFsAlNEGGQRIS.I.VNS.sspuGEKGPVGVoRW+FSHuGSGlVDSISRWsELFPs-pLNKPASlEuGFRSDSQGIEVKVDGphPGVShDAGGGLRRILNHPLIPLVHHGMVGKFNDFTVDTQLKIVLPKGYKlRYAAPQFRSQNLEEYRWSGGAYARWVEHVCKGGTGQFEVLYA .........TTAHSDYEIlLEGGSSSWGpVKuRAKV.NVPsA.PLLPADCNlKIssKPLDstKGhVRFousIESIVDSTKNKLsVEsDIANETK-RRIuVGEGpVoVGDFSHoFSFEGSVVNMYYYRSDAVRRNVPN..PIYMQGRQFHDIlMKVPLDNNDLIDTWEGh.pulpusGA.FsDWIREFWFIGPAFTAlNEGGQRIS.IpVNu.hssESG..-KGPVGVSRW+FSHuGSGlVDSISRWAELFPsDpLN+.P.......ASlEuGFRSDSQGIEVKVDGshPGVShDAGGGLRRILNHPLIPLV.................................................................................. 0 2 3 9 +1358 PF00452 Bcl-2 Apoptosis regulator proteins, Bcl-2 family Finn RD anon Prosite Family \N 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.42 0.72 -3.89 72 878 2012-10-03 11:38:54 2003-04-07 12:59:11 14 11 175 146 362 880 0 97.20 28 43.78 CHANGED Lpplu-clppphpph..apshhpplpl.s.tp..shph....FppVupclFps...........slNWGRlVulhsFuutlsh................chhpptt...sth...lpplsphhspal.ppphssWIpppG.GW ..................................................LpphG--hppp.hpp...apshhppLp....h.......sstp......shpt.......FtpVsp..-....lFpc.................................slNWGR.lVuha..sFuutlsh..................cshppph......tsh.................lp.plspahs.p.al.pcplt.s.W..ItppG.GW.............................. 0 81 111 190 +1359 PF04538 BEX Brain expressed X-linked like family Finn RD anon Pfam-B_3086 (release 7.5) Family This is a family of transcription elongation factors which includes those referred to as Bex proteins as well as those named TCEAL7. Bex1 was shown to be a novel link between neurotrophin signalling, the cell cycle, and neuronal differentiation, suggesting it might function by coordinating internal cellular states with the ability of cells to respond to external signals [2]. TCEAL7 has been shown negatively to regulate the NF-kappaB pathway, hence being important in ovarian cancer as it one of the genes frequently downregulated in this cancer. A closely related protein, TFIIS/TCEA, found in Pfam:PF07500 is involved in transcription elongation and transcript fidelity. TFIIS/TCEA promotes 3' endoribonuclease activity of RNA polymerase II (pol II) and allows pol II to bypass transcript pause or 'arrest' during elongation process. It is thus possible that BEX is also acting in this way [2]. 26.10 26.10 30.10 31.20 23.40 25.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.38 0.72 -3.79 20 315 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 27 0 152 306 0 126.00 29 85.30 CHANGED M-.......KsppENEtcsp.sps+sE-t.............................................................................ppPhtss-tpcscGshRc.+ltcsh.pF+tDI.sRplss-Ehhct.sD-hpRhhEEhRclRpKht...............shHacp+cspsh. ..........................................................................Mp.psppENEsc.p..stspsEct..............................................................................................pcPhtss.spcsc...ssh+c.thtpsh.pa+.EDl.sRplssE-MhRt.s--hpRh.EEhRcl+pKht...............shHapp+cphsh....................... 0 14 14 19 +1360 PF04714 BCL_N BCL7, N-terminal conserver region Mifsud W anon Pfam-B_5900 (release 7.5) Family Members of the BCL family have significant sequence similarity at their N-terminus, represented in this family. The function of BCL7 proteins is unknown. They may be involved in early development. In addition, BCL7B is commonly hemizygously deleted in patients with Williams syndrome [1]. 25.00 25.00 29.00 28.30 19.50 17.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.48 0.72 -4.39 2 196 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 90 0 108 180 0 45.70 73 23.78 CHANGED hsRShRAETRsRuKD-l++Vhpul-KVR+WEKKhVhIpDTsh+IYKWVPluu ........Ro..lRAETRSRAKDDIK+VMsslEKV..R+W.EKKWVTVG..DTSLRIaKWVPVo.p...... 0 23 32 61 +1361 PF01869 BcrAD_BadFG BadF/BadG/BcrA/BcrD ATPase family Enright A, Ouzounis C, Bateman A anon Enright A & Pfam-B_5854 (Release 7.5) Family This family includes the BadF Swiss:O07462 and BadG Swiss:O07463 proteins that are two subunits of Benzoyl-CoA reductase, that may be involved in ATP hydrolysis. The family also includes an activase subunit from the enzyme 2-hydroxyglutaryl-CoA dehydratase Swiss:P11568. The protein Swiss:O66634 contains two copies of this region suggesting that the family may structurally dimerise. This family appears to be related to Pfam:PF00370. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.60 0.70 -5.10 19 4290 2012-10-02 23:34:14 2003-04-07 12:59:11 15 24 2197 20 1108 3723 293 259.00 21 58.92 CHANGED lGlDuGuTso+sllhs........cpup.lhspshssu..................................................................sshptss.p.s.t.h.culppshcpuGhshp-...............................................................................................lthh..sssGhGhsshs..hstchhht...........chsspscGshhhhsssp....uVlsIuGpsopslthc.sGpstsashtuhshuspGphhtlsuctlshslcphsulstpus............................ps.tthsutssshutp.hhsthuus.tutcIlsshspslutplhshhpphushtt.......lhhsGGlups.shh.tl...l.pphhph..h.h.s.sthsuAlGAuLhA ........................................................................................................................................................................lGlDsGSTss+s..l...l..h.s...........p.s...t...p...l...l......h...p.h..h...t...h.s..................................................................s.sh.......................................h.......p...sl.p.p.h.h...p...p...h....t....h..t.....t...............................................................................................................................................................h..t..h..h............s..s..s.G....h....G......t.t.t...h...t..............ht...ph.hh...................................Elhsp..sc...u..s.t...t.....hh......s..sss......................sll.-..IG.G..pDu...K.sl..t..l.........c..........s.......G......t.....h...........t......p.............h......t.h..............ss..............tCu.u.u.s.Gu..a..lc.h..h..u..p.t.....L.p..h...s.l..p.....p..h......s....p..hu..psp.........................................................................................................................ps...hsl..s..u..c....C...s.V...F...A...p....o.p..l...p...s...h.....p.t.ss......s.t.pcI.h...suls.p...u.lsp....p....s....h..s..h...l...t....p...h...p.......t.........................l.shtG...G.....s.....h..t..s.............h.....h....p.t.....h.............p....t..h.......t..............h..........s...t..h.......sshGAsh........................................................................................................................................................................................................................................................................................................................................................................................ 0 477 821 983 +1362 PF03170 BcsB Bacterial cellulose synthase subunit Mifsud W anon Pfam-B_3954 (release 6.5) Family This family includes bacterial proteins involved in cellulose synthesis. Cellulose synthesis has been identified in several bacteria. In Agrobacterium tumefaciens, for instance, cellulose has a pathogenic role: it allows the bacteria to bind tightly to their host plant cells. While several enzymatic steps are involved in cellulose synthesis, potentially the only step unique to this pathway is that catalysed by cellulose synthase. This enzyme is a multi subunit complex. This family encodes a subunit that is thought to bind the positive effector cyclic di-GMP. This subunit is found in several different bacterial cellulose synthase enzymes. The first recognised sequence for this subunit is BcsB. In the AcsII cellulose synthase, this subunit and the subunit corresponding to BcsA are found in the same protein. Indeed, this alignment only includes the C-terminal half of the AcsAII synthase (Swiss:Q59167), which corresponds to BcsB. 20.10 20.10 20.40 20.20 19.80 20.00 hmmbuild -o /dev/null HMM SEED 605 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.74 0.70 -6.50 55 1096 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 931 0 189 915 40 545.50 33 75.33 CHANGED sss...sspspshsFtplGh..sslsLpGspsptslsFslpsD.clVosApLpLsao..SPuLlsshSp....lpVhlNsplluslslsppps...phsshplslsPt.hhs..saNplplchhu+asts.CpssssssLWssluss.SpLsLshpslsl..tscLuhLPsPFFD..............tpstptlslPhVhsssss.splcAAuhlASahGshAs.aRstsFPshhs.......slPsps.....ulVhuossphsshl....shPs..h...sGP.......slthlssPssshs+lLlVhGRssp-LppAupuLshus..sshsGssstlsp......sRpPYDAPsal.o-RslphuELst....spsLpssGhtssslplshpl......PPDLahhpspslslsLcYRYoss...stssSpLslslNsphlpohsLss.t........h...hhspsshssppplplP...shhlsupN.......pLphcFshss.psu....tCpsstss..s..+usIss-SoIDhSsh.HahthPsLssFAsuG.aPFoRhADLS..cTslllPsp.....PsssslsshLslhGphGttTGhPshslplsss.....ss.ssh.pcc.DlLllGshss..shhhph.........sspl.shhhssspth.ht.s.....h.tth.h...t...ssttssssthsls...usushuslhGhpSPhsspRo........lVulh .................................................................................................................................sts....ssppsploFsplu..ssshsLpG..hss.s.u.slpFshcuD..clVopAhLsLpYo.sSPuLl.ssp.Sp.........LpVh.lNsph..hus...l..s...lsccph.....p..........pshtphsIssh.hls..caNpl...pl..c...hl...u....+apc.....s...C.Esssos.............sLW.lclsps.SsLsLsapt.lsl...ps-LucFPsPFaD..............s.c.c.s.p..s...slsh.V.hs...s....s..Psss.......pAuu.llAS..WFGutus...aRuppFPVh.hs....................pLPsps............................ul..Vhuos-c..hPshLp.........stPs.....l......puP...............................slph...lsp..Pp.........s........s...h.sK.L.LVVh.G+s..-c-LhpAu+ulApus.........hhhpGpsssVsc..p.h.sRpPYDAP.sWlpoDRs.l.......shu...ELtp..........ppLp.ssGht.......ssslslslsL...PPDLahhcussls..hclpYRYo.s..........sp-sSphslslNsphlpohsLss.t...............hhpshh.ssctslsIP...slpluupN............pLchcFphh....s.....hsu..........sChshtss...ts.+ssIs.s-SoIDhSp.hhHah.hPcLt..hFuNuG.FPFoRh.ADL.S...pTh...sVhPcp.....PspsphpsLLshhGhlGs..tTGhsshsls.lscs.....ust....ts..cct.DlhllGshss....hhp...........................scpl..shhhpss.psh...htps............h...............tttttspst.thp....sssshsslhuhpSPhpstRsllsl.......................................................................................................................................................................... 0 37 85 137 +1363 PF02138 Beach Beige/BEACH domain Mian N, Bateman A anon IPR000409 Family \N 21.00 21.00 21.20 21.80 20.60 20.80 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.94 0.70 -4.95 145 1432 2009-01-15 18:05:59 2003-04-07 12:59:11 13 80 283 6 960 1325 26 252.20 44 12.87 CHANGED T..pcWtptclSNF-YLhhLNhhuGRSaNDloQYPlFPWllsDYpSp..pl....Dl...s..sspsaRDLoK.PhG....uh.spcRhppht.pphpp.htp.t.....................................a.hYuoaYS..sshsVhhaLlR..l-PF.oshtlplQuGcFDhssRlF....pSlppsaps.s...ps.sDh.+EL...IPEFF.......hhPEhLhN.NphshG.h.....psp.lsDVhL...PsW..u..ps.st.....cFlhhpRcALEScaVSppLppWIDLIF....Gh+Q+GptAh.ps.NlFp.hsYpstht......thps...ppp...thpshlpshGQ....sPt.QLF.ppsHPpR .....................................................................pcW.p.tc..ISNF-YLhhLNslA..GRoaNDL.sQYPVF...PWllsD...Y..sSc.....pL..................DL.s.......sPpsa...RDLSK..PhGs...s.cR.hptht.c+Ycp..hcp....................................................................s.a.HYGoHYS.ou.t.hVhhaLlR.h...pPF.ophhl...pLQu.....G.p.FD...h...s...DRhF..................pSlt.psWps....s.......pshsDV..+EL......IPEFa......................a.h.P....E.hL.hNtst.hs.hGhh......................pssphlsDV.LPsW......A.......ps....Pc.................cFlt.hpR.pAL.......ES-aVSpp.....LHp.....WIDLIF....GY.KQ......p.G...tAl....cAhN.VFa.hoYpGsssh...............................ptlpD...h.+puh.s.IpsFGQ.sPp.QLh.ppPHP.R.......................................................................................... 1 439 568 771 +1364 PF00407 Bet_v_1 Bet_v_I; Pathogenesis-related protein Bet v I family Finn RD, Radauer C anon Prosite Domain This family is named after Bet v 1, the major birch pollen allergen. This protein belongs to family 10 of plant pathogenesis-related proteins (PR-10), cytoplasmic proteins of 15-17 kd that are wide-spread among dicotyledonous plants [1]. In recent years, a number of diverse plant proteins with low sequence similarity to Bet v 1 was identified. A classification by sequence similarity yielded several subfamilies related to PR-10 [2]: - Pathogenesis-related proteins PR-10: These proteins were identified as major tree pollen allergens in birch and related species (hazel, alder), as plant food allergens expressed in high levels in fruits, vegetables and seeds (apple, celery, hazelnut), and as pathogenesis-related proteins whose expression is induced by pathogen infection, wounding, or abiotic stress. Hyp-1 (Swiss:Q8H1L1), an enzyme involved in the synthesis of the bioactive naphthodianthrone hypericin in St. John's wort (Hypericum perforatum) also belongs to this family. Most of these proteins were found in dicotyledonous plants. In addition, related sequences were identified in monocots and conifers. - Cytokinin-specific binding proteins: These legume proteins bind cytokinin plant hormones [3]. - (S)-Norcoclaurine synthases are enzymes catalysing the condensation of dopamine and 4-hydroxyphenylacetaldehyde to (S)-norcoclaurine, the first committed step in the biosynthesis of benzylisoquinoline alkaloids such as morphine [4]. -Major latex proteins and ripening-related proteins are proteins of unknown biological function that were first discovered in the latex of opium poppy (Papaver somniferum) and later found to be upregulated during ripening of fruits such as strawberry and cucumber [5]. The occurrence of Bet v 1-related proteins is confined to seed plants with the exception of a cytokinin-binding protein from the moss Physcomitrella patens (Swiss:Q9AXI3). 20.80 20.80 20.80 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.78 0.71 -4.60 34 1250 2012-10-02 19:24:03 2003-04-07 12:59:11 14 8 180 62 290 1394 4 142.70 32 95.24 CHANGED uhsGph.sElplpssAc+aa+hapt.cschlPcshsc.tIpulclhEG-hsss..GoI+pWsash.-Gc.cshKE+lEhsD-p.pslsapslEGclhpcaKpatsshphh.PKspGs.slsKhohcYEKhs-css.Ppchlch....stplsc-l-saLlus ...............................................shptEhss.s.lsss+la+u.hhh.-s....ssllPKl.hPp.slpolEh.l.E....Gs..G.GsG...........TI..Kcls.F.....s.............c....u..........s...h.pa.s..Kc+.l-tlDcs....NhthsYoll..E...G.s..s.l.s..c.t...lc.p.l.s.hc..hKl...s...sss......G...G...S.lhKhosc..Yc...s...ps..stt...............pc..hch........stslh+slEsYLlt........................................... 0 47 183 242 +1365 PF03494 Beta-APP Beta-amyloid peptide (beta-APP) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 40.80 39.70 19.20 18.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -7.73 0.72 -4.48 2 171 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 48 133 33 165 0 38.90 89 6.25 CHANGED .tc-pt.EVaHQKLVFFAEDVuSNKGAIIGLMVGGVVIA ...FRHDSGYEVHHQKL..........VFFAEDVGSNKGAIIGLMVGGVVIA. 0 1 4 11 +1366 PF00144 Beta-lactamase beta-lactamase; Beta-lactamase Sonnhammer ELL, Bateman A anon Prosite and Pfam-B_106 (Release 7.5) Domain This family appears to be distantly related to Pfam:PF00905 and PF00768 D-alanyl-D-alanine carboxypeptidase. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.82 0.70 -5.42 127 13942 2012-10-02 21:13:33 2003-04-07 12:59:11 19 160 3791 302 3910 15232 5643 321.50 17 75.55 CHANGED lpphhpphht.............................hsGhs............lslhpsGchhh.th...............u....................shss......cohFtluSsoKshsussl.hthscpG................t.........lsL-csl.......................pcals....................t.stptlTlpcLhsaouGh........h.tt........................phhthhtth...h............sGpth..Ysssuhs.Llutlltpss..G.ps.....htphlpcplhps.................h..shpcsthssststptphstsh..t..............................................................shhusspDlt+ah.....................h....tthhsttthpph..........hshhtsststssthttsh....uhGahlsstst............................hhtpsG..........shushhhhsspp..........slslsh...lsNptt..ttttttsthhhthst .........................................................................h.........................hsG.h.s................l.h.l...h..p....s...s..p..h.....h.h.p.ph.h...................................................Gh...................ts.h..t.t.t.............s.h.ss.................s.o..h..apl.u...SloK..s.h....s..u..s.hl..h......p....L.h..pc.G..............................p..............l.s..L..c..c...s..l...................................................................................................sca.lP.......................................t....t....s...t..p....t....lT.l...pp.....LL...s.H..o.....o..G.....l..s.........................t...t.h.h.ptt.................................................................tp.hhp..h...h..t..p...h.......h..t..ht................................................PG..s...p..ht...Y.u...s.........s..shh..lLuh.....l...........l..c.pso...........G..ps....................................hp..p.h...h..p...c..p..l.....hpP.............................................L.....u..h...p....p.....o........t.....h.......t.....s......s.....t.....s....t.............t....t......h.....s...h......s...h.............................................................................................................................................................................................ssuulh.oos.pDh.....t+ah.................................................h...t.t.t..h....t.t..p..h..h.p..th........................................h...h...h...t....h....h..........s....s...t........t..t..s.h.........................uh..G.a..h.ht.t.tt............................................................................................thh...t.+s..G..........thus.....h..h...h...h.....Ppp....................................ph.sls...h....h.s.Nt...................thh........................................................................................................................................................................................................ 1 1339 2525 3318 +1367 PF01212 Beta_elim_lyase Beta-eliminating lyase Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.74 0.70 -5.27 53 4088 2012-10-02 18:26:03 2003-04-07 12:59:11 16 26 2725 70 1143 27402 8234 301.90 26 73.88 CHANGED DLtSDoht.s.TsuMpcsh.stA..hGD.-.s.YusD........sstcLccpss...........-..............................lhGh-.ss..l..F..............ss.o.GTtANplul....hshs.p....pphlstcs.....uHhahcEs.uuhs.l..uGspshsl...ssp...s.............Gshslsclcp.tlct..s.........a.spssllslpsTps...Gpllsh.cplcplttls+cpG.......l.lHhDGARhhs..Au............sslsssl+Ehs..uh.sDulshshoKshssslG.ullshscc...........................................................alppthch..p+....hLGuth....R..psG....llsA......................................uGlhul-ps......hhphtcDpcp..AcpLup...tlpt........lsh......s.hsp..hs....spsphshltts ............................................................................................................................................................phhoDs.....ss.t.h...h.puh....h...t.....s......hG..D......-....s....Y....G....s....D..............s..s..t....p....L..p....c....t....h.t................c.............................................................................................l...h...G....h....c......su.......l....F........................................hs....s...G...T......t......A...N........l...s.l.......................s..h....h....c..........p....t...p....p.....h...l...s.sps............................u.H.h..h......h.....p......E..s......Gu..h....t..h........s........G......h....p....s...h....s....l.s.....s..pp.....................................................G..p...l...s...h...c..p...l.c...t...tlct.s.....................H.h.s.p..s..t.h.l.s...l...p..p..T.ps.........G..p..l.......h.....s.....h......p......p......l...c..p....l......t.p..h..s...+.....c...p...s...............ls...l..a........hD...........G.....A......R....l..h..p.......A.s..............................................ss..h..s...h....t..h...p...-...h.s..........p.h....s..D......s....h...s...........h........s...h...o....K....s...h...s....s...s....h..G....u......l.l.s...h.s.cc.......................................................................................................................................................h.l.t..p...t....h..p...h......t+.............h.h....u....u....t....h...........+.........p...t.....G........llsu........................................................................................................u..u..h...h....u..l...c.s.........h..h..h..t...c....s...p..p..p....A.p.p.l..sp....t..l.pt........h..s.h.....h..h...p.....s.....opthhs.....h................................................................................................................................................................................................................................................... 0 371 694 965 +1369 PF02929 Bgal_small_N Beta galactosidase small chain Bateman A anon Pfam-B_592 (Release 6.3) Domain This domain comprises the small chain of dimeric beta-galactosidases EC:3.2.1.23. This domain is also found in single chain beta-galactosidase. 19.80 19.80 19.90 20.00 19.10 19.20 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.69 0.70 -5.38 21 2500 2012-10-02 23:57:29 2003-04-07 12:59:11 12 42 1440 273 453 2175 266 257.00 27 27.63 CHANGED slp.utshphhFs+psGtlpshh.hpGpchLhp..sspssFaRAPTDNDhGss.s.ch......tp............WpsAGh.phppcl..h.s.ptt..............s-shlplphsathsshh...htsphsYplpssGplplslshpssts....lP..lPRlGlphtlsps.hsslpaaGhGPtEsYsDRppuuphGpapsslsp.hssYlhPQEsG.+s-s+hlslpttt.........stlhlsup.....FsFus...YotppLpp.ssHhp-L.tpctshLplDttphGVGG.DSWussVpspYpLts.psapasaslp .......................................................................................................h.lpstshph.Fs+.t.s.G.hlspht...h........ss....p.......p.....h..lh..p........s..p........sF.aRA.s..hD.....ND....hs.....t.tsh...........................t........................W.p.s..s.s.ht.t..h.p.t...ph..t.h.ts.pp..s......................................................sts.....lh..s..p..h...s.h.....h..........h.s....s.....h..h...hpsphpYp...l....s..s.........s...G...p.....l...pl..s..h..p..h..p..h.ts.s...............hP....p...lP+lGhph....tl.....s......tp....h....sp...lpaaGhGPtENYsD+ppuuhhuhap...s.s...ls....p..h..a...p.....sY..lhPQEsGt+pssRahsl.tstt......................................sul.hlpup...............t.h.p.Fus............Y.......ot........p....p....L................p.................p......s........p......Hp...p..-L..p........t.....p..c..t...s..hL.slDttph.GlGu.sSWG..s..p..l..h..s..p..a..pl.h...pp..apaphhh.h......................................................... 0 131 310 401 +1370 PF02180 BH4 Bcl-2 homology region 4 SMART anon Alignment kindly provided by SMART Family \N 20.50 20.50 20.50 20.90 20.10 20.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.60 0.72 -6.77 0.72 -4.42 11 223 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 58 92 80 246 0 25.40 54 12.65 CHANGED huhssRpLVhcFlsYKLuQ+Gashtpt .....u.sNRELVhcalsYKLSQ+GYsWst.h..... 0 5 12 29 +1371 PF02368 Big_2 Bacterial Ig-like domain (group 2) Bateman A anon Bateman A Family This family consists of bacterial domains with an Ig-like fold. Members of this family are found in bacterial and phage surface proteins such as intimins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.64 0.72 -4.09 46 5992 2012-10-03 16:25:20 2003-04-07 12:59:11 13 651 1569 19 1313 5130 392 78.80 23 17.92 CHANGED sssplslsss......ssuhh.utshphsssth.ssuss......tphoWpS.sNsplAoVs..........ssGhV...ouhs..pGsssIsssssc..spssshTl ............................................................................tlpls.s.........phsl..h..h.G.....t.s.....h.p.l.s..s..s..h.....t...sssuss.............ppl.s....W....s....S.....u.....s.....s.s.l.A...oVs...............tsG.tV...........s.uls.....tG...s.....s.s..Is..s...s.s.ss...s..sh.......................................... 0 641 1057 1180 +1372 PF04775 Bile_Hydr_Trans Acyl-CoA thioester hydrolase/BAAT N-terminal region Kerrison ND anon Pfam-B_2191 (release 7.6) Family This family consists of the amino termini of acyl-CoA thioester hydrolase and bile acid-CoA:amino acid N-acetyltransferase (BAAT) [1]. This region is not thought to contain the active site of either enzyme. Thioesterase isoforms have been identified in peroxisomes, cytoplasm and mitochondria, where they are thought to have distinct functions in lipid metabolism [2]. For example, in peroxisomes, the hydrolase acts on bile-CoA esters [1]. 21.30 21.30 22.50 22.40 19.60 20.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.52 0.71 -4.42 37 429 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 155 4 254 436 2 126.40 34 30.24 CHANGED hD-.lpItlsGLsPsp.VTlpuphp..c...........-.....pGthapShAtapAsppGpVDLscsssl.uGoYpGl-sMGLhWShcP...p+hshthhppsl......h.pshhlplpshssp............lupsphcRhahusGVpRh.VcEs .......hDEslpItlpGLsPtp..VTlcuphp..c................-..pG.s.hapupAtYpADppG.plD..Ls+ssul...GGoYsGl-PMGLhWohcP........p+sh.h...+..hh+psV..........sPh.hVplplhsup................thlupsshcRha.huPGVpRhsV+-.................................. 0 37 72 171 +1373 PF03496 ADPrib_exo_Tox Binary_toxA; ADP-ribosyltransferase exoenzyme Griffiths-Jones SR anon PRINTS Family This is a family of bacterial and viral bi-glutamic acid ADP-ribosyltransferases, where, in Swiss:Q93Q17, E403 is the catalytic residue and E401 contributes to the transfer of ADP-ribose to the target protein. In clostridial species it is actin that is being ADP-ribosylated; this result is lethal and dermonecrotic in infected mammals. 23.40 21.90 23.50 22.10 23.30 21.80 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.33 0.71 -5.09 26 558 2012-10-01 23:25:29 2003-04-07 12:59:11 9 24 360 74 64 557 16 184.60 17 40.09 CHANGED AcpWGpcthcpatpp........................ossE+pAlptYTptsYpcINshL..Rpspsph.th...splpcp...........lcpl-uAhpKs.slPcsIhVYRtss.t.ht...t......................ppthpthpshhhu+hhp-tuYhSTSLsps.........uuFutp...l.h+lplsKGopuuYlss..l...............................Ssa.ssEhElLls+uspacl..........schsh.......sssppclll-Ahhltp ...............................................................................................................................................................sttch.tu....lt....Y.s........s......t..s..............h........t...tlN......phL......Rt..sp...............th......ppl.pp........................l...p.p.l...ssuhp..+....h.....h....p......s..h....h.l....Y.Ru........................................................p.thp.hh.p.th.h.....G.p.hhp.p.pu.ahS..TS..hstt......................................ts..hstp..........hh.h...clp.l.s.p.G.p...p.......u....h.h.lss........l............................................................o...th..ssEtElLls+ssph+l...............ppht.........................ht.............................................................. 0 25 39 48 +1374 PF03495 Binary_toxB Clostridial binary toxin B/anthrax toxin PA Griffiths-Jones SR anon PRINTS Family The N-terminal region of this family contains a calcium-binding motif that may be an EF-hand. 19.70 19.70 19.80 19.90 17.80 19.60 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.50 0.70 -5.85 8 100 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 61 59 4 126 0 353.70 41 51.40 CHANGED s.sDTDsDuIPDsWEhNGYTl..pNKlAV..tWDDshAp.pGYpKYhSsPhcupTsGDPYoDaEKsuGcIDpuluhEARcPLVAAaPsVsVsMEKlILSKNEclSsppst........olSsuTSoSpTasNT.GAslsAuhuhhs.....hShuVSANYSHo.psTVus-aosSp........oaupohulNTA-uAYLNANVRYhNsGTAPIYcVpPTTohVL.csp...TlATIKAK-NphApsIsPsppYPpKupsuIALNTMDDFNS+PIslNYsQLcp.LpssKslpL-TsQssGsYuphs.osGplhlss..sWusllspIpupTASIIlssustss.E+RVAA+-.sNPEDKT.PcLTLKEALKlAa..uhpE.csGlLaY...Ns......KsIsEs.slphhlD-pTuppl+cQLpshss.....KslY....clKLpsKMNIhI+ssohhhs ..................DpDsDuIPDshEhNGYTl......psphAh.....tW-sphtc.pG.......hpKYlSsPhcusTsuDPYTDaEK.soG.phDpsss.pshsPlVAAaP.ltVshE+lllSpscshosppst........olS+s..oosS..po..p..os......ssGssspssh..p.h.s........huhuVSssaSpo.ppTss.pcS.up........sWupshslNoucuAhlNsNlRYhNsGTAshYpVpPTTslVL....ssp...oluTIKA..ppsp.h.up.LuPsptYPp+shsslulsohDpFuSp.IslNhsQLcp.l-ps.c.lpL-TsQspG.hshhs.psG.plhsts..pWuthhspIpthoAplhhsht.s.sh..chRlsAhs.psP.-pT.PphTlt-Alphua..thpc..ss.h.a...ps......h.I.c.....h.hDppTspphcppLtphts.....pplh....phhlp.tMNhhl+.............................................................................................................. 2 1 2 2 +1375 PF02084 Bindin Bindin Mian N, Bateman A anon IPR000775 Family \N 21.70 21.70 24.20 21.70 20.30 21.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.88 0.70 -4.31 10 599 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 43 0 2 613 0 138.30 52 94.71 CHANGED YGNt...NYPQhhsPphGGsNYs.......GQ.sQQGYuspGM........GGPVGGG...s.uss.sssGt.uGslsGGG....huP.....h.spu.tss.uphc-YSSsshp-s...........-TTISAcVM-cIKAVLGATKIDLPVDINDPYDLGLLLRHLRHHSNLLANIGDPEVREQVLSAMQEEEEEEEpDAANGVR-NVLNNlN..NuPGsGGaGGstu...............uGtsGGh...sshG..sQGtGGshth.Ghssp...usuYN...QGYRQG ...............................s.......thsQ.hs..MGGuNY..............GQ.sQQ.....GYussGM...............uGPVGGG.......AMA.t.Ph...........G.G.GGAMAt...P..VGGGu............................uGPst..........h.G.....t.h....................ts.p-YSSs.....t...........tthIss....................................................................................................................................................................................................................................... 0 2 2 2 +1376 PF00351 Biopterin_H biopterin_H; Biopterin-dependent aromatic amino acid hydroxylase Finn RD anon Prosite Domain This family includes phenylalanine-4-hydroxylase, the phenylketonuria disease protein. 22.00 22.00 24.60 24.60 19.30 19.30 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.06 0.70 -5.68 6 1616 2009-01-15 18:05:59 2003-04-07 12:59:11 16 13 829 37 525 1403 197 213.20 33 70.19 CHANGED PWFPR+IsELD+CsphlhpYss-LDhDHPGFsDpVYRpRRK.hA-IAasYKHGDPIP+VEYTcEEhcTWtpVFpTLpsLYPTHAC+EYhcsFsLLp+aCGa+EDNIPQLEDVSpFL+-pTGFpLRPVAGLLSuRDFLAuLAFRVFpCTQYIRHuSsPMaTPEPDsCHELLGHVPLLAD.oFAQFSQEIGLASLGAoDEtIEKLuTlYWFTVEFGLCKQsGplKAYGAGLLSSYGELhHuLS-+Pcl+sF-P-sTAVQsYpsppaQPlYaVuESFpDAK-KhRpaAuoIpRPFuV+YsPYTpuIEVLDSsppIpphh-slpsElphLssALsK ................................................................................................................................pYo.t-.tsWt..lhp.p.................thh.thAs.ttahpsh..l.....h..cplPplt.-lsphLt..............tto...G..a.plhs..ls.uhls.ttFht..hLA...+.hF......sspal.Rp.pp...Yh.E.....P..DhhH-lh....GHsPhl.....h..s.....s.........a..A...p.....a.p........t...h..G............hu.....h.t.s.........s...........t................t...h.t...........h...........L.u...p.......h.......aWa..TVEFGL.h.........p....p............t.......t.........h+hY..GuGlLSS..uE.....................a.s...h.......o.......s..t........s..ph.h.s.ap...sh.h.p.a.hs.hQ.hYal.ps....h....t............................................................................................................................................... 0 167 254 398 +1377 PF00364 Biotin_lipoyl biotin_req_enzy; biotin_lipoyl; Biotin-requiring enzyme Finn RD anon Prosite Domain This family covers two Prosite entries, the conserved lysine residue binds biotin in one group and lipoic acid in the other. Note that the HMM does not currently recognise the Glycine cleavage system H proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.22 0.72 -4.34 49 25099 2012-10-02 20:27:15 2003-04-07 12:59:11 17 167 4936 74 7239 20327 9244 71.80 30 15.82 CHANGED tplpsP.hGphhp......thhVcsG-pVptspslshlEu.Khph-lsust.sGslpplhsptGss.VpsGphlhpl ....................................h..ltsP.l..Gps..h.p.........s....l....hphhVpsGDpV.........pt.sps.L......h.hl...E.u.......................K.....h.....p...h.....-......l.......A..s...........t.....s...G...........s.........l.p...c...l.h.....l..p.....p..G.ss..Vp.sGphlhh.............................. 0 2248 4430 6064 +1378 PF03744 BioW 6-carboxyhexanoate--CoA ligase Bateman A anon COG1424 Family This family contains the enzyme 6-carboxyhexanoate--CoA ligase EC:6.2.1.14. This enzyme is involved in the first step of biotin synthesis, where it converts pimelate into pimeloyl-CoA [1]. The enzyme requires magnesium as a cofactor and forms a homodimer [1]. 25.00 25.00 30.50 30.40 19.90 19.50 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.48 0.70 -4.94 17 405 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 395 0 39 195 3 228.90 45 69.10 CHANGED aSl+MRAS......psspHISGAEcls.stpclcphlsphlp+uhsHpp...G.psDFlslplEclp.csIphlp.sLslcohpstshEcu+phApplLppt............GlscphhcpAh.chlscs......shRGAhllshcoGpRL-s.ctpRGVRVophDhs-pp....p.tlhpps...spRsh-AlAlAoKVhst.GVlAELChSDDssYTTGYVAscphGYhRIsslKptGs..GGRlFFlcsshcl.p..shIphLEppP..VlIp ......YSl+MRuS......................spshHISGAEols...p.hccIEQTVpphhp+uhhHpp...G.psDFlsl.+lpclh.psIppI..AL.l...................hc-s+t.hpcLh....p.c.s............GVo.cpAlppuh.phlpst.....sshpGAllLsAhoG+RLD....u..spRGlRsT+Fu..hpshss...........ps....hs...cRhp-ALslASplsAtPhVhuELClSDD.sYTTGYhAssclGYpRlhslKssso.hGGRlIFVD.s.s.h.....sl..s...phIoaLEspP......t..... 0 13 23 34 +1379 PF02632 BioY BioY family Mian N, Bateman A anon COG1268 Family A number of bacterial genes are involved in bioconversion of pimelate into dethiobiotin [1]. BioY is a component of the BioMNY transport system involved in biotin uptake in prokaryotes [3]. 23.10 23.10 23.10 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.91 0.71 -4.56 153 2666 2012-10-03 02:46:00 2003-04-07 12:59:11 9 7 2228 0 579 1839 1577 149.40 30 79.42 CHANGED l.Ph....ss..VPlThQshuVhLuGslLGs+hGsluhllYLllGslGLPVFuGG..p.G.G.luhhhGPTuGYLlGahhuAhlhGhls.......c+hthp.................hhhhhhshlsGhlll.YshGhhaL..s.....hht..........t...lshsp....Alh.....huh.hsFlsGDllKsslAshlsht....l.pchh .........................sh.ss..VP..lTLQshulh..LsGhlLGs+hGslohhlYlllG.sl.....G.L...PVFuGG.......p...u...G.l.....usl.h..G.P.TuGYLlualls.Ahl.sGhlt.....cphtpt......................................h.hhhh.h.s.h.l.hG.hhll.al.h.Ghhhl..t...........hht....................s......hshtp......Alhhuh.hsF.l.ssDl.l.K.slluuhluhtlh..h....................................... 0 205 400 503 +1380 PF00653 BIR Inhibitor of Apoptosis domain Bateman A anon Prosite Domain BIR stands for 'Baculovirus Inhibitor of apoptosis protein Repeat'. It is found repeated in inhibitor of apoptosis proteins (IAPs), and in fact it is also known as IAP repeat. These domains characteristically have a number of invariant residues, including 3 conserved cysteines and one conserved histidine that coordinate a zinc ion. They are usually made up of 4-5 alpha helices and a three-stranded beta-sheet. BIR is also found in other proteins known as BIR-domain-containing proteins (BIRPs), such as Survivin (Swiss:O15392) [2]. 22.90 22.90 23.00 22.90 22.70 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.81 0.72 -3.55 184 1889 2012-10-01 20:49:39 2003-04-07 12:59:11 16 35 348 189 901 1890 5 67.90 37 17.22 CHANGED RlpoF.p.t.......................WP...hth...psp...p.LAcAGFaYsGt................sDpVpCahCphtl....psW.p.sD......sPhpcHt+atP....p.Ct...alp ...............................RlpTF.p..........................WP.......h..........s.s.s.cpLAcAGFaYsGt...................sDpVpCFhCsstL....p.sW.c.sD.......................cPhp-Ht.+ahP.....p..CtFl.......................... 0 236 332 623 +1381 PF04197 Birna_RdRp Birnavirus RNA dependent RNA polymerase (VP1) Bateman A anon Pfam-B_2204 (release 7.3) Family Birnaviruses are dsRNA viruses. This family corresponds to the RNA dependent RNA polymerase. This protein is also known as VP1. All of the birnavirus VP1 proteins contain conserved RdRp motifs that reside in the catalytic "palm" domain of all classes of polymerases. However, the birnavirus RdRps lack the highly conserved Gly-Asp-Asp (GDD) sequence, a component of the proposed catalytic site of this enzyme family that exists in the conserved motif VI of the palm domain of other RdRps [1]. 25.00 25.00 33.30 30.80 17.20 17.10 hmmbuild -o /dev/null HMM SEED 860 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.67 0.70 -6.51 5 338 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 26 27 0 228 0 401.50 68 97.83 CHANGED MSDVFNSPQsRuoIosALGhKssuupDlc-lLlP+pasPPcDPlsus...pcAApaL+-NpY+lL+PRuIPE..Np.l-TDsh.hP+Lsph..l-...sGcLtDT..VSlPtGoocaIPKYYPsHKPo+pcsssa.PPDlTLLKQhoYpLLpss-so-Npt......-Tl+pLpcAIsTppYGSGShpGQlsRLlAMKEVATGR...NPNKoPKclGY.ThEclAchLDpTLPIsPPtsDDc.hlsLossLSaLl.hTsDsuss...cDYLP+IThKSSAGLPalGKTKGETssp.ALsluDpFLR-VSphLK-GAsTuuss.........................cpcLc+lLsDYWYLSCGLLFPKuERY-pscWLTKTRNIWSAPaPTHLLLSsISsPlMcsShNNlhNs.cTPSLYKFNPF+GGM-sIVshI...Lus.sEslhLVYADNIYIhh-N.....TWYSIDLEKGEANCTP-HAQAsuYYLLTRGWTs--GoPtFNsTWAThAMplAPuMVVDSSCLLMNLQlKTYGQGSGNAWTFLNNHLLSTIVVsKWscsGpP...................NPsSKEFtcLEutTGINFKIERsIcsLRpKLpEAscpAspsGYL.S-GoE.PPcpPuPTVELDLLGWSATYS+ah-hFVPVLDKERLasSAAYPKGlENKsLcuKsG...AEQAYKlVRYEALRhVGGWNYPLIspAscssAps+Rs+....LcsKGhsLDchlu-..Wsc..h.SEFGEslEslol..ccsVTspsLt-LNtsscshcPsVs+shsRs...sL+cVoNALppGsYKsspossGchLsstA+SRI.....ps.h...+scslt-pls+LKPscscuDsWs-RsEptustlctLh+AsslhcppLcEsucALEsVQ......SscllsuKpPpEKssppAoNPVVGY+..uc+h...........sShP...p..sLoPsu+KttKRRpKQ+cp ..................................................................................Pp....EhEsDQI.LPD.....LAWMR..QIE....GAVLKPT.....LSLPIGDQEYFPKYYPTHRPSKEKPNAY.PPDIALLKQMIYLFLQVPEAs-sLK...................DEVTLLTQNIRDKAYGSGTYMGQATRLVAMKEVATGR...NPNKDPLKLGY.TFESIAQLLDITLPVGPPGEDDKPWVPLTRVPSRMLshTGDsss-........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +1382 PF01766 Birna_VP2 Birnavirus VP2 protein Bateman A anon Pfam-B_946 (release 4.2) Family VP2 is the major structural protein of birnaviruses [2]. The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) [1]. 19.70 19.70 21.60 21.10 18.70 18.70 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.32 0.70 -5.56 4 2317 2012-10-04 01:49:40 2003-04-07 12:59:11 12 5 39 48 0 1558 7 234.70 75 85.90 CHANGED ppspspYL+SLLhPpsGsuSIPDDhhtRHsL+sEToTYNLpsusoGSGLIVhFPssPuSllGAHYphsuss.shhFDQhlhTuQ-LKcuYNYuRLlSRplsV+SSTLPAGVYALNGThNAVTFpGSLSElpDhSYNuLhShTuN.pDKVGNVLVG-GVsVLSLPsGaDhPYVRLGDcsPth.pSs.thsspCssuspPRtYpIssss.......V.ssGspsphauhNlDulsssslss-hphphpsps.hshshpsh.luhsGhssssRslohossshusTspa......hstlhspspITpPlsulKlphph....u.tsusshu.sssSSluloltGGNhPGsLRPlTlVAYEpVAsGSllTluGlSNYELIPNPELtKNlsTpYG+hDPtshsYsKhILScR-cLGlRoVWsht-Yp-hppYFpEloDhsSsL+hAuAFGatDll+uIR ...........................................................................................................................................................................................................................apIshusph......s.YQsGGV...TITLF.SANIDAITSLSlG.GELV.FQT....SV.Qu...LlLGATIYLIGFDG..TAVITRA....VAAsNGLTAGTDNL....MPFNlVI....PTsEITQPITSIKLEIVTS...........KSGGQAGDQMSWSAS.GSLAVTIH..GGNYP.GALRPVTLVAY................................................................................................................. 0 0 0 0 +1383 PF01767 Birna_VP3 Birnavirus VP3 protein Bateman A anon Pfam-B_946 (release 4.2) Family VP3 is a minor structural component of the virus. The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) [1]. 20.80 20.80 29.10 28.50 20.30 20.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.65 0.70 -4.75 5 161 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 30 2 0 176 0 219.30 65 23.46 CHANGED lcYLs.......cLtMsosuSutcPELEcslcAhMAcA+clPsup..................KlLsLhSWsRcNuLlDcMacWAp.........pDPcAlRhcRhLuNsP+cGpKspctKhhsts....uKGPTpcs.......AQ-AKAsRISpDAscsGt-FATPEWVA..pNsaRGPoPGQhKYYhtTGhsP-PG--YpDYl+psloRPss-sKItRLAsSlYGhPsQEPAP--FhDtVAcVas-NsGRGPsQ-QM+DLRctARcMK+R .....................L.PYLPPsAGRQ..acLAMAASEFKETPELESAVRAMEAAAsVDPLFQ..................SALsVhhWLcc...N..GllscMssFuh.........oDPsAcRhcshLussPptupK........ts....s+GPT.cc.......AQ+tKsTRIStch.shGh.FATPEWVA..LNGaRGPSPGQhKYapsTtclPDPNE-Y.DYV+s.ppoRhss--pIhRhAsSlYGsPsQs.sPpsFlDcVAcVY-.NcGRGPsQEQM+DLh.tAhcMK+R....................... 0 0 0 0 +1384 PF01768 Birna_VP4 Birnavirus VP4 protein Bateman A anon Pfam-B_946 (release 4.2) Family VP4 is a viral protease [1]. The large RNA segment of birnaviruses codes for a polyprotein (N-VP2-VP4-VP3-C) [1]. 21.20 21.20 21.20 22.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.62 0.70 -5.33 4 354 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 42 21 0 303 3 175.60 57 29.75 CHANGED VAsPVlSTLFP.AAPLIGAADphlssLhtssAuuGRhsu+AAuG+h+ss.cphtsho.suphuhplcspL........EssNapcs-l.+s.........ppuslFPVVhTs...cssPu-s.uphhsVI.Gth.-L.sPNQpshshaphsstpVaGhupD..lPhEssc.sYTsLPlc-l.hsGsIslpK..hsPlhGsSuQLAI.hhssslcpGV....Ph.hsFTGplsts..olh.IpGVsl...KhhsAHcLGLPLlGspPGls-hsssTSLAs+lh .............lAsPVlSTLFP.AAPLhtAhsphls.LhtspAtuuphpu+AAuG+h+ss.sphtpho....suchuhplhspL.........ss...htpstl..s.................lhPVlh............................................................................................................................................................................................................. 0 0 0 0 +1385 PF03042 Birna_VP5 Birnavirus VP5 protein Bateman A anon Pfam-B_1772 (release 6.4) Family Birnaviruses are ds RNA viruses. Non structural protein VP5 is found in RNA segment A. The function of this small viral protein is unknown. The proteins are about 150 amino acids long and contain several conserved histidines and cysteines that might form a zinc binding site (Bateman A pers. obs.). 25.00 25.00 33.40 39.90 19.50 18.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.73 0.71 -4.66 7 113 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 16 0 0 102 0 123.50 64 94.44 CHANGED hpDEHppuNRNLhElHYASRDWss+...HSGRHNtEsHsKTRDLVlQh..RGhRlRKhsSCLhPWtohlpstCoLQsEsEPDGstlRPVAsDlsGPcEulQLhEAslpEIR+scLHsstWsLCochD.cRpcLRRpSl .sDc.s+uN.s.spVHstspDANsRTGVHSGRHPtEAHoQVRDLDLQhDCtGaRVR..AsCLFPWhPWLsCtCSLHs.AEQWEhpVRssAPDssEPstpLQLLQASEoEs+ppVKHTsWWp.LCTKhc+KRRDLPRKP.. 0 0 0 0 +1386 PF03493 BK_channel_a Calcium-activated BK potassium channel alpha subunit Griffiths-Jones SR anon PRINTS Family \N 26.80 26.80 27.50 26.90 26.00 26.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.24 0.72 -4.06 35 665 2009-09-11 05:04:26 2003-04-07 12:59:11 13 15 154 10 289 608 6 99.50 40 9.24 CHANGED shsphhhphs.-pllslpElKhsllApsslsPGhsTllsNLlpopp......phss.t..........ppWhspYhpGhtp-Iaphtl.sspF....hGhoFsplsthhacphsllLlGlE ..................................................s..spaphK.u..DcslChsEhKhuh..............lAps.C.l.sPGhSThlssLhphpp..........h.ch.pp..............................................-pW.p+hYhcusu.NEhYs.hL..sSsF.................hGhSFstss.hsatKh..t..lhLIulc......................................... 0 105 144 218 +1387 PF04940 BLUF Sensors of blue-light using FAD Yeats C anon Gomelsky M, Klug G Domain The BLUF domain has been shown to bind FAD in the AppA protein (Swiss:Q53119). AppA is involved in the repression of photosynthesis genes in response to blue-light. 25.00 25.00 27.00 26.20 23.40 23.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.76 0.72 -4.26 154 831 2009-01-15 18:05:59 2003-04-07 12:59:11 7 19 584 55 218 692 223 91.60 38 33.13 CHANGED LhpLlY..hS.........ps.p.shs......tppltsIlppupppNtptslTGhLla......s..sshFhQhLEGscpsVppla.p+IppDsRHpslthlttp.l.spRtFss.WsMsh .............................LhpLlY..tS.........ch..pss.hs.......spclppllshApppNhpsslTGlLla........s..sspFhQlLEGs.c-pVptla.cpIppDsRHtsls.Lh.p.c...h...stRpFuc.huMt.h.......... 0 55 135 179 +1388 PF02608 Bmp Basic membrane protein Bashton M, Bateman A anon COG1744 Family This is a family of basic membrane lipoproteins form Borrelia and various putative lipoproteins form other bacteria. All of these proteins are outer membrane proteins and are thus antigenic in nature when possessed by the pathogenic members of the family. One protein Swiss:032436 is a transcriptional activator [2]. 20.80 20.80 20.80 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.58 0.70 -5.68 13 3407 2012-10-02 13:57:41 2003-04-07 12:59:11 9 10 1918 5 933 2720 1656 294.70 25 81.15 CHANGED hhhlulhssGslsDKuFsppua-Glp+hpcch..slchhhtsust...............sshhsshpplpcpshDLIhssGaths-slptlusch..........PchpFhllDuhhpp-...............NlsolsFRspEuuFLsGhhAAhho+psphualu....................GhphshhpsFhhG.FctGs+YsN.....clclhsphssoasDsstupshApphhp.cGlcVIas..huGhsshGVhpsA+-hG.p......hsIGhDpDQ.....uahuscsllsSslpslschhhphhpphhpss..h.sGpshphGL+cusVGhs................c..chh.schhcchlphtpKhl.ttl.lsspc .............................................................................................................h...shlhssG.slsDpSFNpus.h.p.G...hp..th...tcc....h........s......l...c....h..t....h..h.p.shs.....................................................t.s.-.h..t.s..s.l.p.p.h.....s.p....s.....u.h.....slIh..u.s.Gatht......s.s.l......t......p...s......Apca.............Pc..h.p.F..s..l.l...D....s...h...hp.sp............................Nlsohsa.pppE.uuYL.uG....hhAu..........h.....h.o.....K........o........s.......p.....l.....G.....a..lG...................................................G....h..p..h..s..h.l..pcF....sG..F....t....t.Gs....c....ss....s.........................ssl..p........l........t......s.......p.......a.............s......s...............s.......a...............s.....D......s....s....K....u..cphA.p..u.hhs...pGs......D..llap.........suG.ss.G.sGl...h.ps.A.cp.t.sth.......................................sIG.VDp.DQ..................s.t.h...s.....s.....p....s......l.....l.s..S.s.l.c....p.h.ss.shhphs.pp....hh....c...u....p...................t.......u.......p....h......h....h...h...GL.c-.s..u...V.s.ls.........................................p...h.s..t...p..h...h...p.t...h.......p.......t..............t.................................................................................................................................. 0 342 623 769 +1389 PF01722 BolA BolA-like protein Bashton M, Bateman A anon Pfam-B_1996 (release 4.1) Family This family consist of the morphoprotein BolA from E. coli and its various homologues. In E. coli over expression of this protein causes round morphology and may be involved in switching the cell between elongation and septation systems during cell division [1]. The expression of BolA is growth rate regulated and is induced during the transition into the the stationary phase [1]. BolA is also induced by stress during early stages of growth [1] and may have a general role in stress response. It has also been suggested that BolA can induce the transcription of penicillin binding proteins 6 and 5 [2,1]. 25.70 25.70 25.70 25.80 25.60 25.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.54 0.72 -4.02 238 4233 2009-01-15 18:05:59 2003-04-07 12:59:11 13 22 2088 10 1342 2705 2605 73.30 33 73.74 CHANGED p.Lppsl.ssp.p.lpVps.S...............................tsHFclhlVSstF.pGhshlpRH+hVassLt-.cl..sss....lHALul.+.shT.PpEa ................................................h.lppuh.sst.p.lcV...ps.u..........................tG..toHFplhlVSctF.pG.h.shlp+HphVYssLt-pl..sss....lHAL.u.l.+.shTPpEa.............. 0 378 743 1064 +1390 PF02044 Bombesin Bombesin-like peptide Mian N, Bateman A anon IPR000874 Family \N 18.10 18.10 19.60 18.40 16.80 16.60 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.87 0.72 -5.80 0.72 -4.41 12 105 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 53 2 39 125 0 13.20 79 12.60 CHANGED GspWAVGHhMGKKS .GNpWAlGHhMGKKS 0 2 5 9 +1391 PF02414 Borrelia_orfA Borrelia ORF-A Bateman A anon Pfam-B_1805 (release 5.4) Family This protein is encoded by an open reading frame in plasmid borne DNA repeats of Borrelia species. This protein is known as ORF-A [1]. The function of this putative protein is unknown. 19.60 19.60 20.70 20.50 19.00 18.60 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.19 0.70 -5.30 25 681 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 33 0 37 578 2 235.80 31 82.34 CHANGED KhQaK...LIsLISTLpYlNpphKK.....................YoQpsILYaFNpNLKRNGQ+slplKTLQpYLY+L-K.hpVTp...NYa+HLG.lNh...GTEIYYKLpasKcc.CaphIN.pYFc-+Kcp+apsRl..........sshh..pcchsKpssVpht....................................................EChsNps.s..NK...cEc..pKptc..IEchplpKYhpKCNFhsp.........llpLplpK-tpIclhKhhK+hE.tlhK..................................p.hph.c...pphKsKpppLKpILpNh+..hphcpcsYspcQLcpphpchYc.pYKsKPHFIIEppKYs..DLspI.htKlKcshcpp.Kpsspcshp.pIKsNI.........aN ................ph.hK...ll.lhusl.alNpph.....cc.....................YsQpsILhhhNpNLp+ss.p.ssl+Thpp.LhhLp+hhtlp....sahpphG.hsp....Go..h.aYclph.h........p.s......aphIs.paFpppctphhpphh.........................pt.h......ppph....p.t.s.lp...........................................................................................................................................................................................................psh.spp......sh..hpp...+.....h.p....p.p...........p...h..tp.sh...................ppt.h..hphhp..p...c.tlh+.............................................h.....phpp.pppph+phL.shp..hph.pptYp.cpl...h.p.hp..YK.K.ahhhcptchp..Dh..l.htchccpht.p...Kpp..pps.p..php..h......................... 1 29 29 29 +1392 PF03183 Borrelia_rep Borrelia repeat protein Mifsud W anon Pfam-B_2029 (release 6.5) Repeat \N 20.60 20.60 23.00 20.90 20.00 20.20 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.02 0.73 -6.24 0.73 -3.84 14 96 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 5 0 0 96 0 18.00 64 43.37 CHANGED TlSILlS+SLhoDhp.h. TlSILLSRSLhSDFsShh.... 0 0 0 0 +1393 PF00228 Bowman-Birk_leg Bowman-Birk serine protease inhibitor family Finn RD anon Prosite Domain \N 20.40 20.40 22.20 20.80 19.40 19.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -7.35 0.72 -4.08 55 570 2009-01-15 18:05:59 2003-04-07 12:59:11 15 4 80 50 85 586 0 25.90 43 37.76 CHANGED sCcsChCoc.....ShPPpCpCtDhhpt.Ch .sCcpC.h.CTc.....ShPPpCpCsDht.t.Ca.... 0 0 20 52 +1394 PF02653 BPD_transp_2 Branched-chain amino acid transport system / permease component Bashton M, Bateman A anon COG0559 & Pfam-B_654 (Release 7.5) Family This is a large family mainly comprising high-affinity branched-chain amino acid transporter proteins such as E. coli LivH Swiss:P08340 and LivM Swiss:P22729 both of which are form the LIV-I transport system [3]. Also found with in this family are proteins from the galactose transport system permease [2] and a ribose transport system [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.59 0.70 -5.43 51 33622 2012-10-02 17:14:55 2003-04-07 12:59:11 11 38 3614 0 9346 25680 15397 275.60 20 80.62 CHANGED tlhshlthus.lslhuluhslsh.hsGhhsluhuuhhshu.uhssshhhthhs.......................................hhlullhuhlsusshGhlsuhlhhthplspl.........lsolh..................hhhhhhulshhhhtthhts.tsss.....h.sshhsh.sshhh.hshthhh..........................................................................lhhllhsllhhhllt+TphGhtlpAlGps......puAchsGlslp+hphhsasloGhhAuluGhlhshhhsss........shuhs....hthtslsssllGGs........ssshGslluullluhlps.shshhth.....................sthttllhGhlllhsl .....................................................................................................h...shltt.ssh.h.sll..A...l..G..h..s..l..l......h..h.......s.......G.......h.......l.....s.......luhu.u.hh.slu...u...h..s....s..s...h.h...h..t..t.h.s.....................................................................................hhl.u.l..l.....h.u..h....l....s..u....s........l..h.....G.....h.....l....h.....G.....h.....h.....h.....h.....+.......h......p.......h.....s...........h.................l..s...o..l..h.............................................................h..h..h...h...h....h.....s...l....s.....h..................h......h.........t......t......h...h....s....s....s.....t....s.....s...................h.h.t...s...h....h....t.....h......h....h...h.....h.....h....s.....h..h.......h.hh....................................................................................................................................hl..h..l....l....l...s...h...h..h...h...h.....h....l....p....+....T...p...hG..pt...lhA..lu....ps......pu.A.chhG.lss.p+hphhsasluuh.lAul...A..G.s..l.h..u.t....t..h.s.s.sss..........shu..h.s.........h.t.l..p..u..lss...sl.......lGGh..................Gsl.h.G.s....l..l..G.u....l....l....l..u....h.....l....p.s......s...h...s..h.hsh........................................s...................s.hp.....l...l.h.uhlllhh......................................................................................... 0 2466 5546 7467 +1395 PF00634 BRCA2 BRCA2_repeat; BRCA2 repeat Bateman A anon Prosite Family The alignment covers only the most conserved region of the repeat. 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.08 0.72 -4.65 25 1602 2009-01-15 18:05:59 2003-04-07 12:59:11 13 37 218 1 293 1604 4 33.50 36 11.38 CHANGED hcsshsuFpTASGKplsVScpoLpKu+plhs-hct ...........s.hhuFpTASG.K.plp.VScpuLpKu+pl..Fp-hp............. 0 99 128 183 +1396 PF02498 Bro-N BRO; BRO family, N-terminal domain Mian N, Bateman A anon Pfam-B_1235 (release 5.4) Family This family includes the N-terminus of baculovirus BRO and ALI motif proteins. The function of BRO proteins is unknown. It has been suggested that BRO-A and BRO-C are DNA binding proteins that influence host DNA replication and/or transcription [1]. This Pfam domain does not include the characteristic invariant alanine, leucine, isoleucine motif of the ALI proteins [2]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.92 0.72 -3.70 257 2580 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 1532 0 314 2002 73 91.30 24 37.17 CHANGED pl+slh......psschWasup-ls.psL..sa........sss.p........csh......hpcl.cc............ptp..h..................................st........................h............hhlscsGlapLlhpS......c........hs.....p..AcpFppWlh...p-llP ......................................lRhlh....ssc..a.FsupDls.phL....sa....................sss..t...............csl......tppl.cp...............cpp.tth.....tp...........p..............s.ut....t.........................................................pp..h...............hhlsEsGlYpLl.h.pu..c...............hs...............p..AcpFppWlh....ppllP............................................................ 0 99 222 272 +1397 PF03032 Brevenin Brevenin/esculentin/gaegurin/rugosin family Griffiths-Jones SR anon Pfam-B_1232 (release 6.4) Family This family contains a number of defence peptides secreted from the skin of amphibians, including the opiate-like dermorphins and deltorphins, and the antimicrobial dermoseptins and temporins. The alignment for this family includes the signal peptide. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.63 0.72 -4.12 19 1689 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 76 0 1 1661 2 43.60 54 62.46 CHANGED shLKKSLhLlLFLGhVSLSlCEEEKc.-sE...sc-cpct-cpp...Eh+R ...........FThKKSLLLLFFLGT.ISLS.LC.E..c...ERs.AD.EE-..tc-tpp.c..........c........................................ 0 0 0 1 +1398 PF01318 Bromo_coat Bromo_CP; Bromovirus coat protein Finn RD, Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 25.50 245.80 20.40 24.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.01 0.71 -4.59 3 16 2012-10-04 01:49:40 2003-04-07 12:59:11 13 1 6 39 0 19 0 188.60 71 99.28 CHANGED STSGTGK.LTRAQRRAAAR+N.RhT+cVQPVIVEPLASGQGKAIKAhTGYSVSKWsASssAlcAKsTsAlSIoLPcELSSE+NKpLKVGRVLLWLGLLPSVAGTVKuCVTEKQsoAAASFQVALAVADSSKEVVAAMYsDAFKGlTLGDLpsDLoIYLYSSAALsAsuVlVHLEVEHVRPTFDDpFTPVY .STsGTGK.hTRAQRRAAAR+N...R..pTphV...QPVIVEPlASGQGKAIKAhsGYSlSKWpASssAhp.AKsTsAloIoLPsELSSE+NKpLKVGRVLLWLGLLPSVuGpVKuCVTEpQssAuAuFQVALAVADsSK-VVAAMYs-AFKGlTLtpLhscLpIYLYSStAlsAtsVlVHLEVEHVRPTFDDhFTPVY.. 0 0 0 0 +1399 PF01573 Bromo_MP Bromovirus movement protein Bateman A anon Pfam-B_508 (release 4.1) Family \N 25.00 25.00 26.20 26.60 17.80 17.30 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.50 0.70 -5.42 16 150 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 28 0 0 151 0 263.30 43 94.53 CHANGED ssppulol...otc-.ppL.cplscshpsshtpshsh+tCsshthp.Nssuhpsh-LsoK-s+uhlo..+hssKl+pplhVcHshIaLlYlPhILpoTsussslKLhNhATG-chslGsphsLNcAFIlthsWPRSlhscss.pt+GLaLshpss.Assl.ssutlGphhPhW--sho.tK.hYpcsss.hshsht-s.ss+.lhocKhhpSLlpSphptshsuppcts.hlps.plp..cph....cFTlpphp.ssss...........tspstppstlsstpshh-pshsss ..................P.SoSsFSV...Sh.D-hspluc-lcclh.usphpslsTKtCahLpLl.NhstsssLcLsSKEpKuFLo..R.uDKVKp+lYhshutlaLlYlPlI.s..TTSGllTLKLpNssTGEhsDVsTDV-AN+AFllhsRWsRSLhtsA.....-LsLlholS..sscV+ssA+VGchhsFWDE+hS.+pQhY.-cuNslhFPItETcss+hlsscKlLhShVRSRlhsGspupc.hs.spslpscRlussc+s.....hTlp.tsshsc-............hcstspshsusscshhE-thh................................... 0 0 0 0 +1400 PF04450 BSP Peptidase of plants and bacteria Finn RD anon Pfam-B_5066 (release 7.5) Family These basic secretory proteins (BSPs) are believed to be part of the plants defence mechanism against pathogens [1]. 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.54 0.71 -4.86 24 266 2012-10-03 04:41:15 2003-04-07 12:59:11 7 11 162 0 155 275 4 193.70 30 72.57 CHANGED tsslphclps..s.sosGupp.......................Ftphl...s.uppsLssAsphlhphh.ps..ss-+......................ss..csVTl......hlcDh-GVAhooG.....................ppI+hSspYltshss..scs+....tElsGVLhHElsHsaQass..............ps.pssuGLIEGIADaVRL+AGhsss+WtpPusG.........scWD.pGYphTAhFL-ah-s..hs.GFVuclNcch.+ssY...........s-saahplh.Gcs..VppLWp-Y .........................................................h...h.hp.ps....pp.u.uthF.p.l......stphltpu.hthlhphh.ps..stsp...........................ss..ppVsh..............hl.c.s....h..sG.VAassG.....................ppIahSspaltphs.......sphp........tEltGVLhHEhs.Hs...aQass................................ps..psPuuLIEGIADaVRL.c...u.u..h..s..s..s..p.....Wt.p..ss..tu..................ppW-.pGYphTAhFLsalcs.....ht.uhVtplNcph....csta....................................pcshahplh.Gps.....lcpLWp-Y.................................................................................. 0 33 107 133 +1402 PF00779 BTK BTK motif SMART anon Alignment kindly provided by SMART Motif Zinc-binding motif containing conserved cysteines and a histidine. Always found C-terminal to PH domains. The crystal structure [1] shows this motif packs against the PH domain. The PH+Btk module pair has been called the Tec homology (TH) region [3]. 21.80 21.80 21.90 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.86 0.72 -4.33 26 515 2009-01-15 18:05:59 2003-04-07 12:59:11 14 28 86 12 259 461 0 31.70 38 4.63 CHANGED ppYHPshah..sGcWhCCppss+sAsGCshssst ...............pYHPuhah..sG+WhCCpQs....s+sA.GCphhps.t....... 0 41 60 128 +1403 PF04514 BTV_NS2 Bluetongue virus non-structural protein NS2 Kerrison ND anon DOMO:DM04350; Family This family includes NS2 proteins from other members of the Orbivirus genus. NS2 is a non-specific single-stranded RNA-binding protein that forms large homomultimers and accumulates in viral inclusion bodies of infected cells. Three RNA binding regions have been identified in Bluetongue virus serotype 17 (Swiss:P33473) at residues 2-11, 153-166 and 274-286 [1]. NS2 multimers also possess nucleotidyl phosphatase activity [2]. The precise function of NS2 is not known, but it may be involved in the transport and condensation of viral mRNAs [1]. 25.00 25.00 83.80 37.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.37 0.70 -5.57 5 118 2009-09-11 00:56:28 2003-04-07 12:59:11 7 2 50 2 0 126 0 347.00 51 97.69 CHANGED MEQ..KQRRFTKNVFVLDtppKTlCGpIAstsupPYCQIKIGRshAl+sVsTPEPKGYVLEIs-sGuYRIQDGsDIISLMISs-GVEuTTERWEEWKFEuloslPMAssVslNG.......spsDAEIKYsKGMGlVPPYTRNDFDRREhP-LPGVp+ScYDVRELRQKIREEREKusccpsppsuhKsE....RhhttscsD-DpsshsptAs-hsPcTp+p-pccERRcsltp+lt-t.p....hphp.-c+cc-h++cpcppcsscscSD-s.......D-cGEDS-...--EscscoYITpsYIERIu+l+KhKDERLSuLAStMPQsuGcassMIFoKKpKW-NVPLYsIDEsuK+YELQSVGuC-RVAFVSKGhSLIILPVuu .................................................MEQ..KQR+FTKNlFVLDhstKTlCGtIA+tsSpPYCQIKIGRslAh+sVpsPEPKGYVLplsssGAYRIQDGpDlISLMlTspGVEuTpERWEEWKFEulSssPMATtVphNGshVDAEIKYsKGMGlV.PYhRN.D.F.DR.pEMP-LPGVh+S..sY...D...VRELRQKIKpEREuusch...p.pul....us+pE....sRWh......D-DEsplDEtup-hhP.tst+l-p.cEtRsslh+clttt.........hphs.pE+pct.+sEp-...cp-phps.SDc..........DEpsEDup...--E.cPcoaITcEYIE+luK.hKhK..DERh.SLuStMPQsuGsas+hIhoKKhKWpNVPLYsaDEuoK+YELQsVGuC-RVAFVSKDhSLIILPVG.V................................................................................................................................................................................................................................. 0 0 0 0 +1404 PF04426 Bul1_C Bul1 C terminus Kerrison ND anon DOMO:DM04045; Family This family contains the C terminus of Saccharomyces cerevisiae Bul1. Bul1 binds the ubiquitin ligase Rsp5, via an N terminal PPSY motif (157-160 in Swiss:P48524) [1]. The complex containing Bul1 and Rsp5 is involved in intracellular trafficking of the general amino acid permease Gap1 [2], degradation of Rog1 in cooperation with Bul2 and GSK-3 [3], and mitochondrial inheritance [4]. Bul1 may contain HEAT repeats. 24.20 24.20 24.20 24.70 24.10 24.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.71 0.70 -5.20 17 182 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 62 0 117 183 0 186.50 20 30.29 CHANGED HDQENWhpLhphls-pE+psLpcLslpLoCIQSNNShsH.sPPcIpSlTTELlCITu+SDN.SIPIKLsuchLh.sc-Klssl+psFpsappplp-Ypp+FpcNhcKLN-LYNhs+sh..ssREL+FoDFIosQlhNDlESLuNLcVplpsLpplFKKQhpohcspsp......stssossshtsp..ttsp.......................s.tthhppplhppWhppss.pYcRpls..VNLphNp.cl+ET.........LVPoFESCLCCRFYslRVsIKF-pHlGo.splDlPVsV+ph ..........................................................................................................................s.t.....hptlpsphhshohpo.p..shPh.hp.thhh...tp............................h.............ht......................h.....h..ph..slpshtthp.p.........................................................................................................................................p..t....phpppht......lsl.p.h.tt.....p.s.................llPsFpoChhsRhYhl+lplch.t.p............s........s.......hplclPlplt.............................................. 0 11 58 112 +1405 PF04425 Bul1_N Bul1 N terminus Kerrison ND anon DOMO:DM04045; Family This family contains the N terminus of Saccharomyces cerevisiae Bul1. Bul1 binds the ubiquitin ligase Rsp5, via an N terminal PPSY motif (157-160 in Swiss:P48524) [1]. The complex containing Bul1 and Rsp5 is involved in intracellular trafficking of the general amino acid permease Gap1 [2], degradation of Rog1 in cooperation with Bul2 and GSK-3 [3], and mitochondrial inheritance [4]. Bul1 may contain HEAT repeats. 28.70 28.70 29.00 28.70 28.40 28.40 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.50 0.70 -5.92 27 164 2012-10-02 22:29:00 2003-04-07 12:59:11 7 7 69 0 105 170 0 361.50 28 49.70 CHANGED ssspshphss.spss....sps.lhDlLPSFcMY.solp++lspushs.Dh+shPPoYp-spsppss................sstsssshs.tp.sh.ssstp..sps.p............................p....p.sl.t.t..................................sht....Dshs..p.pshhlDplapLPKLo......o.Pl-lpI+lTKcsspP.pt.ps-.-ohLKEYosGDlIpGYslIpN+SspslpF-MFYVoLEGhhsll-+..........p+sKpplKRFL+MhDloASWoYsslshusGhp.....hhssphD.hDsshlGLsssRlLpPss+YKKFFhFKlPppLLDssCc.cphhsHsLlPPShGlD+hppts+hptIphNpsLGhGalsh+GoPlL...................TpDhu.-slSIsYol-ARllG+s..................pcsschsIhKEppY.LRlIPhsht........sshhsppsshp........hcslhchlpc+l..pthcclhpclcppcs.lss....-l+spsloush ........................................ttp.............t.ttp.l.slLPSapMa.pslhpph..s...sp...shp......PPsYt....pp.s.......................t.s.t....................t.t.........................................................................................................psht..t.pphhl-plhpLspls......s.slplpIhhT+phs..t......-s.lpEYppGDhlpGahhlpNpSspsl.F-MFhVshEuhhphhsp..............tt.hph++FLcMhDhsASW.s.spls..ss.p........hh...hD.hDsshhulsssRlLpPshpYK+FFsFKlPppLLDssCp.p..p......h......h......s......Hs.hLPPohGls+.hp..sphtthths..p...........................s.h.............................hpDhu.tshSlsYslpA+hlG+t...........................psschhlhcEtph.lRllPhsh.........t....t.ps.........hpsh.p.lpp+h......thhppl.pthp.t...................h.............................................................................................. 0 10 51 98 +1406 PF03557 Bunya_G1 Bunyavirus glycoprotein G1 Bateman A anon Pfam-B_653 (release 7.0) Family Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. This family contains the G1 glycoprotein which is the viral attachment protein [2]. 25.00 25.00 31.90 31.90 18.10 17.80 hmmbuild -o /dev/null HMM SEED 871 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.79 0.70 -13.63 0.70 -6.74 15 361 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 74 0 0 338 0 609.20 35 56.09 CHANGED YsslAppLtuhstIS.LDhshlshlP--IuuAL+hIEsp+TYHp.lhlEhshLo+YCDYYopFscNSGYSQTsWRhaL+oHcF-lChhaPNpHFC.RClpcusKCssusWDFAsEMpsaYouKps+Fs+DLNLhLpsh+pAFRGTopualsphlpcKcssslhphhsKl+pKaPsNsLLhullcahpYLhuLschosacLsp.Wc-hlapspspppsphs+spcS..YpasNAtssssoKsCcNhKsVsCLSPRuusshsslIACG-sss.plYchPs.plYpSNscpspaClsDoHCLp-FEslspEhLsulKKopCWts-hsshs.cppSsGl+SC+hKDpGsCsVsss+.WsIIpC-sspaYYS-t+csaDpspDIG.HaCLSs+CsT.RYPINPcpIpsCsWphppsphcpIsshsLE-lEpYKKAIopKLpsoLoIa+Yp..TcNLPHIKPlYKYITlpGTETuEGIEuAYIpSpIPALuGTSIGFKIsSKDGppLhDlIsYVKSAsYposYsKlYsTGPTIGINTKHDE+CTGsCPspIsHKsGWLTFu+ERTSoWGCEEFGCLAIssGCVFGSCQDII+sEhsVYRKssEEssslElClTFscKTYCT-lNAlpPIITDchElQFKTV-ohoLPpIlAVpNHclhsGQINDLGsaSpGCGNVQKsNuTshGsGsPKFDYlCHhASRK-VIlRKCFsNsYQuCKhLpp.sShhh--cpsTlTVhchK+lLGslphKhILGDl+YKsFAEslDlpsEGpCsGClsCFEsIpCpFsIcoolEsoCslcusCThFHDRIlIoPsc+cYAlKlhCpcKPssolpFKICNpKl-sshTlVDtcshIELuslDQTuYI+EKD-RCKTWhCRVRDEG ...................................................................................p........hst...pht..t.hp.h.l.ppp....cshcsthhhEahhhthhs..shappa.psuG.sphpWRhhh+spphchC..assphhC.RCl.ptpcCsssphDhupphpphYptppphaptDlplhh..ht..h.t.s..hh....tptt......h..th..t......h..hh.hh.hh.............................................................................................................................................................................................................................................................................................................hsGhu.sa.lps+pupph.h-hlhYlKSsthphshschYsTuPh.uhsspa.phCTGsCs..s..pl.t..t..ssahsFshp.TS.WGCEEhGCLAIspGslhGpCpsl..hc...-..h..t.lYp..p...hppth.s-lClo...tt...shCs....plsshpPh.ss.....hphphp...s...spshslspllul.p.s.pclh.GpIsclGshs.p...h.GpsQh.s.p..s.....s..l.....h.....s...........us...s....c.FD...a....Cp...hhu+Kclhl+pCh.ssYpupphLpp.ssh.....hp.ppts.hhhphph.LGplphhh.Lsc.haK.h.s.p.p..pl.p.s.c.u.pCsGChtChpslpCphphhoshths.ssl...s.s.Cs.ap.s.pl..hlp.sppchslKhhCppp.t.ph.h.lsttp.pht.ph.h.tp.hl-...s..Dp............................................................... 0 0 0 0 +1407 PF03563 Bunya_G2 Bunyavirus glycoprotein G2 Bateman A anon Pfam-B_1048 (release 7.0) Family Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. This family contains the G2 glycoprotein which interacts with the Pfam:PF03557 G1 glycoprotein [2]. 25.00 25.00 30.20 25.60 23.70 18.80 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.98 0.70 -5.52 9 412 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 74 0 0 340 0 233.50 57 29.30 CHANGED ssshsRCFtGGpLltphpSssuhoElClKDDIShlKSpsha..tKNcssIhussKhaRpahVpDWppCNPl.ssuGoh.Vl-VscshpLhsKsYsCpssCsIol-+-sApIlhposcLNHFElsGTTlpoGWFKsKsoloLDpTCEHl+VTCG+KolpFHACF+pHhSClRFh+poILPt.MhpShCQNIELIIlshhslhhhIhhlILTKTYIsYLLlPlFhPhsalYGhlYN+sCKpCpsCGLAhHPFTsCsopClCGh+apoo-ph+lHRputhCpGYKSLptAR .....sslhpRCFpsGsllKppsSppuloElCLKDDVShIKopupY..hKNsoGlausNhshRpWlVpDW+-CpPhcssGGpINVIEVscDLoLpTcoYlCoADCoIslDKETAQlhLQT-shNHFEluGTTlKSGWFKoTshITLDpTCEHl+VoCG.KolpFHACFpQHhSClRah+tohLPt.hhpShCpNhElIllh.h.hh.hhhh.lloKTYlsYlhhPlFhPhshhYuh.h.+.hK.C..ChLshHPFo.Cs..ClCG..atso-th+hHR.sh.C.GaKsL.hsR..... 0 0 0 0 +1408 PF01104 Bunya_NS-S Bunyavirus non-structural protein NS-s Finn RD, Bateman A anon Pfam-B_880 (release 3.0) Family The NS-s protein is encoded by the S RNA. This segment also encodes for the N protein. These two proteins are encoded by overlapping reading frames. 25.00 25.00 33.20 33.20 22.10 21.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.88 0.72 -4.51 24 277 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 72 0 0 221 0 84.00 44 97.52 CHANGED M.hpshpl+Lhp+psMh+Lplshsps.hhl.LESSoShcRRPKlsshtcpsthhhLhLtuuphpaLIpIFhpTushpC.TosLPSTus.sT ....M.h.t.phcLh.h.thhcL.lshs.p.hLl.LtSSSSh.pRP+LhShhppsshLhLpLtuuphph.IhIFhpTushQhhTTlLPSTss.sh.. 0 0 0 0 +1409 PF03231 Bunya_NS-S_2 Bunyavirus non-structural protein NS-S Bateman A anon Pfam-B_3018 (release 6.5) Family This family represents the Bunyavirus NS-S family. Bunyavirus has three genomic segments: small (S), middle-sized (M), and large (L). The S segment encodes the nucleocapsid and a non-structural protein. The M segment codes for two glycoproteins, G1 and G2, and another non-structural protein (NSm). The L segment codes for an RNA polymerase. 25.00 25.00 95.60 95.50 18.40 18.20 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.33 0.70 -5.91 9 245 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 30 0 0 244 0 410.40 68 94.52 CHANGED MSosphs..................spphhpsYGop.Ds+AlsDsYslas.GcGpphlphhhaoNuthKouFuhsphG+stDlch...........cEtEllsopc.ashF-c...hsLsIshssc.hpllVp+PslpspGsKaphHspIhsPspshLphsss.....hscc-Fhcp.plpppchhPsshhl-tspKsshhlssssphslcYua.sVMGKs...ssa.tpshs+EhllosKpcsh.ssstssNRhLsspsVKuIpIu..S-ltss..opshLps+pshshclpsQh.RlShsulhcEsuhsRhFhls.-spsRhlhh.ucslsspsNtcTTLlIKllsKsh..sphssshsp...shpsChcs......ltsphGlVc.hhssDPs.YNphIs+sLLuVHTphAhslScsLpKPlIVFplhD.pELpscps-lsG+plsYpcDupGshYFLSpTL-.....hhPps.oolsYLsSht.s.WK.shstpchhlp ..................MSSSVYE................SIIQTRASVWGSTASGKAVVDSYWIHELGTGSpLVQ...TQLYSDSRSKSSFGY..TAKVGDLPC...........EEEEILSQHVYIPIFDD...IDFSINIDDSVLALSVCSNTVNsNGVKHQGHLKVLSP..AQLHSIGS....hMNRSDITDRFQLQEKDIIPNDRYIEAANKGSLSCVKEHTYKIEMCYNQALGKV...NVLSPNRNVHEWLYSFKPsF..NQVESNNRTVNSLAVKSLLMSAENNIMPN..SQAFVKASTDSHFKLSLWL.RV..PKVLKQlSIQKLFKVAGDETNKTFYLSIACIPNHNSVETA..LNIoVICKHQL..PIRKsKAPF...ELSMMFSD......LKEPYNIVH.....DPS.YPQRIVHAL.LETHTSFAQVLCNNLQEDVIIYTLNN.+ELTPGKLDLGERTLNYSEDuhKRKYFLSKTLE.....CLPSNTQTMSYLDSIQIPSWKIDFARGEIKIS..... 0 0 0 0 +1411 PF00952 Bunya_nucleocap Bunyavirus nucleocapsid (N) protein Finn RD, Bateman A anon Pfam-B_587 (release 3.0) Family The bunyaviruses are enveloped viruses with a genome consisting of 3 ssRNA segments (called L, M and S). The nucleocapsid protein is encode on the small (S) genomic RNA. The N protein is the major component of the nucleocapsids. This protein is thought to interact with the L protein, virus RNA and/or other N proteins. 25.00 25.00 27.60 35.70 20.50 24.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.52 0.70 -4.35 17 354 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 96 0 1 246 0 197.20 55 97.72 CHANGED FpDVPppssuTFDPEAuYluFpspastsLshsslRIFFLNt+KAKssLu+puc.pVsLsFGGhphsVVNsHFPt.psNPVsDsuLTLHRlSGYLARWll-pht.ss.pscpshl+opIlsPlAEspGhTWsDGsphYLuFhPGuEMFLpTFcFYPLsIshaRVh+ctMDspahcKslRQRYuslsA-pWhpp+hsslpuAhpsVspLsWu+ouhSsAARcFLupFGIpl ........F.DVsppssssFDP-suYlsFptpaspsLshssVRlFFLNttKAKssLp+psc.plslsFGshphslVNNHFPt.psNPlssssLTlHRLSGYLARWlh-php.ss.t.cpt.h+ssllsPlAEspGsTWs-.G..s....phYLuFhPGsEMFLpTFcFYPLsIshaRVh+shMDspahcKshRQR.Yu.thsAppWhppchstltsuhpsVtpLtWt+sshSsuARpFLtpFGItl........ 0 0 1 1 +1412 PF04196 Bunya_RdRp Bunyavirus RNA dependent RNA polymerase Bateman A anon Pfam-B_2559 (release 7.3) Family The bunyaviruses are enveloped viruses with a genome consisting of 3 ssRNA segments (called L, M and S). The nucleocapsid protein is encode on the small (S) genomic RNA. The L segment codes for an RNA polymerase. This family contains the RNA dependent RNA polymerase on the L segment. 31.30 31.30 31.40 31.40 29.90 30.40 hmmbuild -o /dev/null HMM SEED 743 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.13 0.70 -6.52 12 767 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 282 0 0 649 0 428.70 23 35.15 CHANGED lshsRh.slhhu.p....hFh....ptshhhh.tps......s.hsh..shpphhp...hphLlslh.cK.phhtlhs.hR.Ylhhtshs.hSsh.chI.chF-c..psshshhhshhIKphlhshtp.sppshh..lplsshshp.pslts.tlhs.shh.tshhs.hcsLlsphYhshYhhsKshcschss.splhpchlEaEpchpppp..........phhth.............h+.hscuhhtphpppsthc..lc.ppsFppslsplushphsphshp.....p.pslcu..o.hscch.chps.ppht..p.........cthphphtccs........ahhupsls.....................hlpshhhsphsppp.shVlchhcpshphhchtp.........hp.hhhKtppptu...RthFltsl.p+lhhth.lEchscslh+hsspEhhopsts+c.hhlppthphth+atstcohhplupuDsu.................................hhsphhalotsho+hostsss..ahahhhlhhsslhhpKchhhslphlp.h...cp.hhhs-hhhphlcpthphpp..sh..ahstshs..hhtsctsahQG.LsasSSLhHush..ha+chht....cth.pl-..........shhchhppSDDuth.luh.t.spsttsphthl................phspthFphtcthhhhhsIh.hs.KpThso..s.hEFhSpF...FpGsshs..hh+hlhsulscstt.shhsDltphpsphsphlctGssspLs.ls.sspp.hshphYu.sssu.hss.tphlph.ps.....phPh.hhhhhstPhtththhGhphss ..............................................................................................................................................................................................................................................................p....h.s..h..................th.....................................................h.....h...h...............tp.h.t..........t................................................................ht.pt...............h.t.h...................................................................h.p..t.......hl.p......t.h..................h.......h...p.t...+.hal..h...h...h...th..thh....pch.t....t..........h....t....h..t.........p.....................................hht.h..hst.h.hhs....s..hhhhhhhhh.uhhpp+hh.hslphlh.h........cp.h..pc.hh.hh.t..thp...ph..hhp.h.....p.....h.hptshhQG.hphsSShhtshh..hhpph.......phh..h...............hhhp.h.tSsss...h...................................................................................................................................................................................................................................... 0 0 0 0 +1413 PF03181 BURP BURP domain Mifsud W anon Pfam-B_1432 (release 6.5) Family The BURP domain is found at the C-terminus of several different plant proteins. It was named after the proteins in which it was first identified: the BNM2 clone-derived protein from Brassica napus Swiss:O65009; USPs and USP-like proteins Swiss:P21746 Swiss:P21747 Swiss:Q06765 Swiss:O24482; RD22 from Arabidopsis thaliana Swiss:Q08298; and PG1beta from Lycopersicon esculentum Swiss:Q40161. This domain is around 230 amino acid residues long. It possesses the following conserved features: two phenylalanine residues at its N-terminus; two cysteine residues; and four repeated cysteine-histidine motifs, arranged as: CH-X(10)-CH-X(25-27)-CH-X(25-26)-CH, where X can be any amino acid [1]. The function of this domain is unknown. 20.70 20.70 21.00 21.00 20.30 20.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.91 0.70 -5.17 28 407 2009-09-10 21:17:04 2003-04-07 12:59:11 10 6 56 0 178 416 0 183.20 34 55.05 CHANGED ulFFhEccL+sGsphs....ltFssssst...s..sFLPRphAcslPFSopcls-ILphFultssSspActhpsTlpcCE.ssu..hcGEcK.....hCATSLESMVDFusSpLGsp.slpAhS.Tts.s......tsstppYsl..suV+tls.u..t...sssVuCHp.sYPYsVFYCHpstp.o+AYtVsLhus-Gs.........tscAVAVCHpDTSsWsPp.HsAFplLsl+PGslPVCHFlspscllWs.p ...........................................s.haFh.p.lh..Gphh........hst.t...............hLs+t.....uppl.P.Fstpphstlhth.hsh..so..u.phhppslppC-...tss.......htGE..pK.....hCsoSlEshl-FshuhL...G...p........plp.......sho..oph.t........................hppYtl..tsV.pt..l.....s..s...............sp.VsCHp.sa....P....Ys.VaYCHp.h.........tt..s+sY.Vs.lhus.cus..............tspulAlCHhDTSt.Wssp.HhuFphLtscPG..p.s.s.....VCHalspsp.hlWs........................... 0 15 102 151 +1414 PF03309 Pan_kinase Bvg_acc_factor; Type III pantothenate kinase Mifsud W anon Pfam-B_3452 (release 6.5) Family Type III pantothenate kinase catalyses the phosphorylation of pantothenate (Pan), the first step in the universal pathway of CoA biosynthesis. 23.30 23.30 24.50 24.40 23.10 23.00 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.49 0.70 -4.81 194 2298 2012-10-02 23:34:14 2003-04-07 12:59:11 9 5 2191 43 659 1901 2016 197.40 29 75.38 CHANGED lLslDlGNTplhhulaptt..........thh.....ph+h..sosttps..........t-phshhltshhphtsh..............h..tllluSVVPshhtshtphhtch...ht......hpshh.......ls.sshph.GlshpY.c...sP..pplGuDRllsslAAhp.ha....s.s..hlllDh.GTAsThDsls..scG.p..alGGhIsPGlslthcALtppsApLP..plt...............htps.t..psl...GcsTssulpuGlhaGhsGhl.-tl...lp.......c ......................................................................LhlDlGNTphhhulhptt.................phht......ph+h.....ts.s.h.tpo...................t-ch..s.hh..l...p..phhp.h.tshp..........................tlhluSVV.Ps.h..h.t.s.h.p.p.hh.h.c.h.ht..........hpshh..............ls......s.shps...Gl.s.hta.c..s.P..p.p..lGuD.......RlsssluAhpha.........sts.hlVlDh.GTAsTh-h....ls.........tcG..p......alGGsIsPGltlphcALtppsAp..LP.p.lp...............lsp.s...t......ssl....GpsTssuhpuGlhaGhsGhl.-tllp.p......................................... 0 270 498 593 +1415 PF04681 Bys1 Blastomyces yeast-phase-specific protein Mifsud W anon Pfam-B_5640 (release 7.5) Family The molecular function of this protein is not known. Its expression is specific to the high temperature, unicellular yeast morphology (as opposed to the lower temperature, multicellular mycelium form) [1]. 20.90 20.90 21.20 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.02 0.71 -4.50 2 111 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 60 0 75 103 0 144.40 33 78.72 CHANGED ESVlAKRADLGsAFVhlYCNFoVhLDlpAGtsSTR+hLsGRsaDYapE.YtsGSGDGVSLTLpHT-G.DSSNScTTFRYKLuDsNSTVtYSLGNSGGNPFtGHKlTLKsSsDtCPpIEWP-GIPTGVSSGSCGSutNLILThCP.tpshp-FEDE ................................................h..shs..........lGpAhVhN..pCshsVaL.a..Sluusl.us.tp.hl.s.stshphhpp.ha.+..s.s.s.u.GGluLKl.o.p.s.-.G.h..hs.u.ss.pThFsYsLsts......tspVaYDLSslFGs.P.F..sGpp.ls...l...p.......s.....o..ss......sC.....P.sIhW.s.sG.........l.........P.s.........G.s........p..s..p.sCt.ussclhLTLC............t...................................... 0 7 27 58 +1416 PF00170 bZIP_1 bZIP; bZIP transcription factor Sonnhammer ELL anon Prosite Family The Pfam entry includes the basic region and the leucine zipper region. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.15 0.72 -4.01 22 5966 2012-10-02 13:17:30 2003-04-07 12:59:11 16 81 523 288 3305 7374 40 61.00 27 17.17 CHANGED -cplK+p+R+.pNRpuA++sRtRKptchcpLpp+VcsLpsENttLtpclppLccpstpLptpsp .............................++p+R.hhp.N.R.........t....u..A.....p+sRpRK.c............p...h.......lp.......p.......L.......E...p.......c...l..........p..........p....L....p....p.......p.......N.....p....p....L...p..........p...p.h..p...t...L....pp.p.ht.h.....h............................................... 0 830 1626 2510 +1417 PF00386 C1q C1q domain Finn RD anon Prosite Domain C1q is a subunit of the C1 enzyme complex that activates the serum complement system. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.82 0.71 -4.13 46 2356 2012-10-01 20:41:10 2003-04-07 12:59:11 16 38 315 47 1129 1977 431 122.60 27 35.37 CHANGED AFSsshspthsss.t.........slhFcc....llhN.tssYsssTGpFsCslsGlYaFsaplp...shppsltht..Lh+N...spphhthhsptpssp....psuSsuslLpLptGDcVWlplhst...suhhsspt.sposFSGFLl ................................AFpsshsps...h...s........p.........slhF-.p....lh..h.N....h........s...s.....p..Y...s...s.po...G.hFs.ss..l.sGl..Ya....F....saplt..................hpsppl..t.ls.....L.h+N...............s.p.s..h....h...t.s..h.sp.tssp................phuS....s.....u.....s....l....L..p.LptGDpValpl.tt..........puh.h.s.s.t...........th...oo..Fo.GaLl................................................................ 0 266 375 622 +1418 PF01413 C4 C-terminal tandem repeated domain in type 4 procollagen Ponting CP, Schultz J, Bork P anon SMART Domain Duplicated domain in C-terminus of type 4 collagens. Mutations in alpha-5 collagen IV are associated with X-linked Alport syndrome. 19.70 19.70 32.60 20.30 16.50 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.79 0.71 -4.17 26 1172 2012-10-02 16:37:33 2003-04-07 12:59:11 14 49 94 40 532 965 0 108.20 49 15.42 CHANGED uhhlshHSQopslPpCPtGhspLWpGYShLhhsu.sscusGQsLuoPGSCLpcFpohPFl.Css.pssCpYhu.NchSaWLoTsp.......tPhs.s.hsG.plcsaISRCpVCpts .......................hllshHSQo..hp.l..P..p..CP.....t.....GhppLWhGY....S.hLhhpu..spcupGQsLu....osGSCLccFpohPFl.Css..pssCpYhu.NcYS.aWLoTs.......p...........................tP...hs....s.h....s....G.....plcshI.S..RCpVC.t.................................. 0 125 159 312 +1419 PF03595 SLAC1 C4dic_mal_tran; Voltage-dependent anion channel TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Family This family of transporters has ten alpha helical transmembrane segments [1]. The structure of a bacterial homologue of SLAC1 shows it to have a trimeric arrangement. The pore is composed of five helices with a conserved Phe residue involved in gating. One homologue, Mae1 from the yeast Schizosaccharomyces pombe, functions as a malate uptake transporter; another, Ssu1 from Saccharomyces cerevisiae and other fungi including Aspergillus fumigatus, is characterised as a sulfite efflux pump; and TehA from Escherichia coli is identified as a tellurite resistance protein by virtue of its association in the tehA/tehB operon. In plants, this family is found in the stomatal guard cells functioning as an anion-transporting pore [2]. Many homologues are incorrectly annotated as tellurite resistance or dicarboxylate transporter (TDT) proteins. 26.20 26.20 26.90 26.40 26.10 26.10 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.30 0.70 -5.79 159 2801 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 2089 12 789 2007 101 296.00 22 86.74 CHANGED pphsssaFuhsMGhuulu...hh........ht.................h......h.......shhthluthlhhlslhlall..hhhh.hhh...+hlha..p..thttp.........h....pcPlpusFhsshshuh.hhlsshhhhh.....................................hhhluhsLWhhuss.lplhhshhhhh..................thh..t..pt....hphpp..hsPuW...hlPhVushlsussu..shhs.....................hhhhuhhhauhGhhhhlhlhsll.hhRlhhpphhsps...........hhPohhIhluPhuhuhhuhhtls..............................................h....shsshhuhhLhshuhhhhhlhlhthhph..................hph............................sF.shuaWAhoFPlushs...hushph.........sthhs...hthhphluh...................hhhhhhshhhhhlhhtslt ....................................h..hPhshhuhslGhsuhu.h...ht...............................................h............th...t.h.lut..hhhhl.uhllahh.........hlh......+hlha...c.....phhtc.............l....pcPlhushhsshshuh..hll.ss.hhh.h.....................................................h..luh.s.......lWhhuhl..lplshhhhhsh..............................thht..t..t...............h.phcp....hsPuW..hlshVuh.h.lsu.ssu...sshu.........................hh.huhhhhuhGhhhh.hhlhsll...htRL.h.........h..ps.......h.sps...........hpsshhIhhAPhulsssuhhtlst.................................................................s.sshhshh..Lhshuh..hhh.hhhlh.h.h...p.h......hpt............................sF...ssua..h..uaoFPhshhA.suhhph...........uphhp....................s.h.h..chLuh....................h.hlhss.hllhhlhhthl.h.............................. 0 203 426 638 +1420 PF01681 C6 C6 domain Hutter H, Bateman A anon Hutter H Family This domain of unknown function is found in the C. elegans protein Swiss:Q19522. It is presumed to be an extracellular domain. The C6 domain contains six conserved cysteine residues in most copies of the domain. However some copies of the domain are missing cysteine residues 1 and 3 suggesting that these form a disulphide bridge. 25.00 25.00 25.10 25.20 24.90 24.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.69 0.72 -3.93 35 172 2009-09-12 03:01:09 2003-04-07 12:59:11 12 10 7 0 169 145 0 94.00 21 48.65 CHANGED Csssshshuhusspsst.....shsshhssshsso..tsssoohploCsuhss..hpsshhlss........ssssspsus.s.....o.lslshsC..sssuhWhY............sss..pslsol.sC .........................CpsCs.h.hh....hsts........hsshhshstsss...ssCpsh.sloC.pusss......sps..shhhss..........ssssspsus..u......s.sshslsC...sssu.p.Whh.............hu..pslsslsC............... 0 72 92 169 +1421 PF03596 Cad Cadmium resistance transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.00 20.00 20.50 20.00 19.70 19.50 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.28 0.71 -4.60 6 922 2012-10-03 02:02:08 2003-04-07 12:59:11 8 3 734 0 141 604 8 185.00 42 92.57 CHANGED YhuTulDhLlILhlhFuchKppKphhcIalGQYlGoshLlllSLlhAalls.hlPEcWllGLLGLIPIYLGIKlhIhG-..---EpplhctLpppKhspLhhTVshIThAS.GADNIGlalPYFsTLohspLllslllFlIhIhlLsalup+LAslPpluETlEKYuRWIlslVaIuLGIYIllENsThsslLoh ........................................YhuTulDhLlILhlhF..Ap.hp.p..............p..K...p.............h..h.c.IYhGQ..YLGo.sh.Llh.sS.Llh.AaV.ls.al.Pp.cWl...lGLLGLI.PIaLG....I.....+....h...h........l...........h....s-.............s-..s.E...c...c.....h....hc...p........L........p........t........p..t..........h.............s.............p....L...........l...........hs........VuhlTlAS.GuDNlGlalPYF.soLs.hsplllsLllFllhIhlhshhuphLuslPhl..uEsl.EKapRhlhslVaIuLGlaIlhENsTlphhh.h.......................................... 1 36 78 122 +1422 PF00028 Cadherin cadherin; Cadherin domain Sonnhammer ELL anon Swissprot_feature_table Family \N 28.80 28.80 28.80 28.80 28.70 28.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.07 0.72 -3.73 57 41902 2012-10-03 16:25:20 2003-04-07 12:59:11 12 548 328 141 21785 37651 1579 93.10 24 44.86 CHANGED hphplsEst....ushlhplsupDtD...tsssspltYplhpss.....sphFplsspsGp........lpstps..LDcEph.........spYpLplhApDp.........h..shssssplplplt ....................................................hpl.E.s.s..s.......s..G...s.....h......l........h........p.......l........p....A..p...D....t.D..................tu..t..N......u........p........l....p..Y...p...l.....h......s...s.s....................................tshF..p....l..s.......s...p.o.Gt........................................lps.......t....p.......s......L.....D...p..Epp.......................................sp..a.p..l...p....l.p.A...pDt....................ut.s..s...h...s...u....s..s.p.lplpl................................................... 0 4217 6276 12981 +1423 PF01049 Cadherin_C Cadherin_C_term; Cadherin cytoplasmic region Finn RD, Bateman A anon Pfam-B_257 (release 3.0) Family Cadherins are vital in cell-cell adhesion during tissue differentiation. Cadherins are linked to the cytoskeleton by catenins. Catenins bind to the cytoplasmic tail of the cadherin. Cadherins cluster to form foci of homophilic binding units. A key determinant to the strength of the binding that it is mediated by cadherins is the juxtamembrane region of the cadherin. This region induces clustering and also binds to the protein p120ctn [1]. 21.70 21.70 21.90 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.84 0.71 -4.17 64 1652 2009-01-15 18:05:59 2003-04-07 12:59:11 12 99 108 11 822 1256 0 136.60 35 14.31 CHANGED R++ppp....h...........t.-c...DlR.ENllpY-DEGGGE-...D...ppuaDlssLppshss....................ptshhP.hthhs.................................ssssltpFIpc+lccsDsDPs.uPPaDoLpsYs...YEGsG........SsAGSLSSLs.Sso.o-u-p..caDYL.ssWGPRF+KLA-hYGsp ...................................................................................................................................................p................h...c-.....DlR.-NllpYs-EGGGE-......D.........................pp.taDlu..tLpps.tht.................................................p.s.hP....h.............................................................pssph.tp..FIpp+.l.p.t.A.DpDss...sPPaDoLh......sYs...YEGsG.......................................................SsAuS.LSSLt..S..........so....ss........s.-p.........-.............a..-a.L.s-W.G.PRF+pLA-hYut.t.................................................................................... 0 71 138 400 +1424 PF03507 CagA CagA exotoxin Bateman A anon Pfam-B_918 (release 7.0) Family \N 21.60 21.60 21.90 21.60 20.20 21.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.43 0.71 -4.44 4 2702 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 41 4 1 2704 1 133.20 62 54.67 CHANGED NFSDIKKELNtKL.GNFNNNNN.GLKNSsEPIYApVNKKKsGQsASPEEPIYsQVAKKVsAKIDRLNQIASGLGsVGQAAuF.LK+HcKVDDLSKVGLSASPEPIYATID..DLG.GPFPLKRHDKVDDLSKVGLSREQcLsQKIDNLNQAVSEAKAGaFsNLEQpIDKLKDSTKKNslNLaVEuAKKVPsSLSA .........................................................ph..tN...N.N.N.N.N.GLKN............EPIYApVNKKK.s.GQss..SPE.EPIYAQVAKKVsAKIDpLNph.sSu..lsthh......p.thsh.hptt..ctVsshSt..s.GpSs....S....P.EPIYATID..-hs....usF.PL+RpstVs.........D.L.S.KVG.LSR............p.Qc.L.s.p+.......lssLs.QA................................................................................................ 0 1 1 1 +1425 PF03524 CagX cagX; Conjugal transfer protein Bateman A anon PRINTS & Pfam-B_5812 (Release 7.5) Family This family includes type IV secretion system CagX conjugation protein. Other members of this family are involved in conjugal transfer to plant cells of T-DNA. 25.20 25.20 25.50 26.70 24.80 25.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.32 0.70 -5.06 150 1580 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 797 15 335 1390 79 205.60 22 75.21 CHANGED Ds+l..........phhsYssspl.hplhsssGhsosIthpssEpl....ts...luhGD................ossWplss.............ptsplhlK..............................Ppp..ss.hsTNlslsTs+.....RoYthcLpup.spshh............................htVpFpY..Pp...............t......ttttttttsssshtss....hphss.................hNap.Yshp...Gsps..hpPhp.laDDGphTalpFsss.hp....hPslFhlss...pG.....p.cplVNh+lps........shllVcplhpp.hhLRhG....pp.sVsIhp..s ..........................................................................................................h.Ys.sthhplhst.shho.l.ht.sEp.l.............s.....lshGs.......sht..Wplts........................tsstlhlK..............................P..hp....s..hp.TN.lhl.hTs+...........RsYphpLtstptp..h.......................................htlpap...Y..Pp.............................t...tt.tt.t.t...t.p....t..sp..p..tt....h.p...tp....................................................hsap..Y.hp................ustt....htP..p.......saDD..G..phTahpFstt.ht.....hPslahlss......u............p.pphlN.h.phps...........shh.llcplhtp.hhL+hG.....pp.hltlhp.s............................................................................................................................... 0 62 186 252 +1426 PF03185 CaKB Calcium-activated potassium channel, beta subunit Mifsud W anon Pfam-B_2176 (release 6.5) Family \N 25.00 25.00 25.10 26.30 23.70 24.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.46 0.71 -4.94 10 225 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 48 1 119 207 0 174.90 38 79.42 CHANGED tK.+lsou.psGEDRAlhLGLuMlssSVlMaFlLGhTlLpsYhpSlhs-EusCsVLpssIt--.hpCsaoCGs-CcssSpYPCLQVaVNLosSGppuLLaasEEslcpNsK......C.YlP+CpcDppchpsplpslp-pF+cp...QsFsCaasPspppssVLLpRhYsppsLhHsLhWPolhLsGGsLIVlhVKLTQaLSlLsEch ....................................+hs.t.ptGEsRAlhLG..lsMh.ss....ulhhhFll.GhTlLpsa..h..p....S..l...s.pEupCol.lpsplh-p.......hpCsasCGs-Cpthu........pYPCLQV...aV......N.l..o....p...SG......p...p..s..lLa....asE.-sh......p......h.N..c..................CS..YlP.t...........Ct..c.shpcsh...spV.slpp..accp...psFsCahsspt..p..csVlLp+hYst.slhHslhWPsh.hh.h..sGslIlshshhsp.Lulhstt....................................................................... 0 26 31 56 +1427 PF00214 Calc_CGRP_IAPP Calcitonin / CGRP / IAPP family Finn RD anon Prosite Family \N 21.60 21.60 21.60 21.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.80 0.71 -4.11 13 313 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 68 21 149 310 0 106.10 29 83.67 CHANGED MsllphSshLss.lhsL..............pphsthpAsPhRpsL-shss..ss....cst.h.thlh.p.hp.php..ptppppt.....ttsts.......ucKRuCshuTCssp+LAchLsphss.hpsshuPss.luspuaG.RRRRul ...................................................................................................................h.........hh...................................t..h...................t....t......h.thh..t.........p..t.........tt..t...........t.ppp............hpKRsCshuTCssp+LuphLt+....s.s...t...pshsPTs.lGspua.G.++Rp........................ 0 9 21 58 +1428 PF04847 Calcipressin Calcipressin Mifsud W anon Pfam-B_4547 (release 7.6) Family Calcipressin is also known as calcineurin-binding protein, since it inhibits calcineurin-mediated transcriptional modulation by binding to calcineurin's catalytic domain [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.31 0.71 -4.71 9 405 2012-10-02 20:46:34 2003-04-07 12:59:11 7 4 237 1 233 376 0 174.90 37 76.73 CHANGED .pVFsssps+.........pphcsLapphscslpaphl+SF+RlhlsFss.psAtsA+hphc............t.phpG+p......l+haFu..Qs.ss..sssspaLtPPpssKpFLISPPuSPPsGW-..tp-ssP...ll...saDLhtALupLs....................................................tEchpl+sss.-ssPulll.....+sspst.pttt..............tscschspTpRPsh .....................................p..h.t.p.h.........pphcs..Lappa.s...c....h...oF.....phh.....KSF...+...R...lhlsFssspuAscARh....pL..+.................pp..ph.Gcc...............h+lYFu............Qs.s......................hs....p..pa......LtPP..p..s..sKQFLISPPuSPPlGWc.......tp-ssP....Vl...saDLlt.Al..u..+Lus.......................................................................................................uEp.hE....L+sss....-ssPollV........+sspsp..p.tpt.........................................htts.+..pl.h.pTtRP..h............................................................................................ 0 60 98 163 +1429 PF02029 Caldesmon Caldesmon Mian N, Bateman A anon IPR000075 Family \N 50.00 50.00 50.80 50.80 48.70 49.70 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -12.95 0.70 -6.07 10 234 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 39 0 77 177 0 296.00 32 83.25 CHANGED DDDEEEAARERRRRARQERLRQKpEE-shGQVo-psEssuQNSVssE-sKsoo.po...t.tsDDEAALLERLARREERRQKRLQEALERQKEFDPTlTDuSlShsS.RRhtND.su-NppsEKEE+pEuRppRpElEETEsVoKScQ+NsacDsE-ccp--+.+EccEEEc.c.tohtpN...............................................................................................................................................................................................................................................................p.hc-phcK-KtsK.-hKphhDtK+Ghs-.KuQNG..EhhT.KLKpsENsFS.s..tt....st.scsA.psEAG++LEEL+RRRsEhEsEEFEKLKQKQQEAAlE..LEELKKKREERRKVLEEEEQ++KQEEA-RKsREEEEKRRhKEEIERRRAEAAEKRQKhPEDGlSE-KKPFKCFoPKGSSLKIEERAEFLNKSlQK.SGVKsoHpsAVVSKIDSRLEQYTsAIE.GTKuuKPsKPAASDLPVPAEGVRNIKSMWEKGNVFSoPuusGTPNKETAGLKVGVSSRINEWLTKTP-GsKSPAPKPS.DLRPGDVSuKRNLWEK .......................................................................................................................................................................................................................................t.t...ttp.............t......p.c.p...p..tt........p.tp.p..t.............................p...t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...........t...................................................................ttt...t..........t..............................................so...h+l.-RsE.LN+ShpK....suh+.sp.s..lucIDphL-QYTpAht...ts+ts+..+.ss.-Lss...s..ltshKohaEtGps.....tsss.ss..K-............................................................................................ 0 3 9 28 +1430 PF05042 Caleosin Caleosin related protein Moxon SJ anon Pfam-B_5163 (release 7.7) Family This family contains plant proteins related to caleosin. Caleosins contain calcium-binding domains and have an oleosin-like association with lipid bodies. Caleosins are present at relatively low levels and are mainly bound to microsomal membrane fractions at the early stages of seed development. As the seeds mature, overall levels of caleosins increased dramatically and they were associated almost exclusively with storage lipid bodies [1]. This family is probably related to EF hands Pfam:PF00036. 20.60 20.60 20.60 21.10 20.50 20.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.04 0.71 -4.71 9 275 2012-10-02 16:17:27 2003-04-07 12:59:11 8 3 97 0 153 262 3 155.30 43 68.07 CHANGED +-pcphSVLQQHVAFFDpscDGIlYPWETYpGhRulGhshlsShhhulhINluLSYsThPuhhPS.hFPIaI+NIH+AKHGSDSusYDsEGRFhPsNFEpIFSKaA+TtPDtLThtElapMhcuNRsshDhhGWluupsEWhLLYhLA+Dc-GhLpKEuVRtsFDGSLFEpltK ...........................tthosLQpHluFFDpspDGlIaPh-TapG.hRsl.......GhshhhShhsshhIphsh..Sh.T..h..s.......u.......h..h..............Ps.Ph.F..sIYlcNIH...+uKHGSDous.YD.sEGRFlPtpFEpIFuKa..A.+s.t.s.......-tLThtEl.hphhpu...pRp...shD.hG.W....h.....u...uhhEWthhYhL.s...pcc..-G.h..l..pK-slRthYDGSlF.hlt.................................... 0 38 83 123 +1431 PF00915 Calici_coat Calicivirus coat protein Bateman A anon Pfam-B_202 (release 3.0) Family \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.92 0.70 -5.50 11 9404 2012-10-04 01:49:40 2003-04-07 12:59:11 15 15 8145 108 0 7359 0 130.20 53 60.76 CHANGED hhASsD........AssssDGsuuss.lsPEsss.ssshsh-PssuutsAsAsuGpss.lD...sWhts.aVpsshu..TlsPp...GclLaslpLGPcLNPYLuHLSpMYsGWuGuh-VRlhlAGsuhhAGKllhuslPP..ul-slossphshaPHVlhDsRpLEPV.hslPDVRNshaH.ss.sssTh+LVhMlYsPLhsssuus..sshsluspV.T+PSsDFsFhhLhPP..plEppopP...holPplohpphuN.Rasu.IsuhhlsPs..hshQ.pNt+hshDGphhG...hSssplsslcusls .................................................................................................t................s.s.Du.s.uu.ss..L.V.PElN.........NEsMALEPVsGAulAAPlu....G.Qp....NlID....PWIpN..N.FVQAP..sG.E.FTV..SPRN......uPGEl.L.hsh.L.....GPc.....L.....N...........P.....Y....L.....u.....H.....L....up....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +1432 PF01067 Calpain_III Calpain large subunit, domain III Finn RD, Bateman A anon Pfam-B_852 (release 3.0) Domain The function of the domain III and I are currently unknown. Domain II is a cysteine protease and domain IV is a calcium binding domain. Calpains are believed to participate in intracellular signaling pathways mediated by calcium ions. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.03 0.71 -4.56 91 1670 2009-01-15 18:05:59 2003-04-07 12:59:11 17 64 274 9 868 1478 9 140.30 28 20.07 CHANGED pW...phshhpGpW.h.....pGs...TAGGCpNa.t.........TFhpNP.............Qatl...............plp-s--p.t..........................sslll..........uLhQ..............KscRpp+p...........t.c.hs........IGFslacl..........t.plsppahhpp...tshspopsahNtREVspR.hpLsPGc..YlllPoTFcPspcu-F.hLRlFocptsptp ............................................................W.p.thhpGpW.h..........pGs.....oAG..G....C.p.N.h...t.................................TF...hpN.P.................................Qa..hl...............plpcssct.........................................slll..........uLh.Q................+s.p.+.p..tpp............................tt.phhs................IG.Ftlacs...................................t.ph..p.pp.h.hhtp.................tshspo.t.s.a..h...s...t..R....-..Vsp..c..hpL.....s...P.Gp......YllVPST....a.....cPpp.pu.cF.h..LRlaoct.....t................................................................................... 0 247 356 615 +1433 PF00748 Calpain_inhib Calpain inhibitor Bateman A anon Pfam-B_543 (release 2.1) Family This region is found multiple times in calpain inhibitor proteins. 20.80 20.80 21.00 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.84 0.71 -3.82 24 632 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 49 7 111 594 0 125.40 35 73.52 CHANGED lDuLSpDFosut...........st.ssuptcppcschssssu-slpts..ssssspouuPs.sp..psp....h.-DAL-sLusSLGpRpsDP-..-sKshtDcVKEKu.KcE+p-KLGE+--TIPP-YR.Lh-scDK-GKPhhPcs..cc .......................................................................................................................lDtLSpDFst........................t..sstthp..pp..t.....t...tshs.Esl.sts.......ssss.sp...ou.sP...sp..t..tsp................h..D.s.AL-sLusSL.G.p.+p.s.-.P-...-sps.ht-cVc..-Ku...KpEch-KLGE+--TIPP-YR.Lhssp.c..ccG+PhhP.....p................ 0 4 12 40 +1434 PF00402 Calponin calponin; Calponin family repeat Finn RD anon Prosite Repeat \N 21.10 21.10 21.30 21.20 20.70 21.00 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.67 0.72 -6.68 0.72 -4.59 5 1168 2009-01-15 18:05:59 2003-04-07 12:59:11 13 17 129 0 595 1070 0 25.20 59 20.32 CHANGED luLQMGTNKaASQpGMTuaGtsRall .....IuLQ..MGT..N..K.sASQuGMTu...aGssRpl......... 0 145 202 400 +1435 PF00262 Calreticulin calreticulin; Calreticulin family Finn RD anon Prosite Family \N 19.60 19.60 20.60 20.10 18.70 19.00 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.66 0.70 -5.82 68 1064 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 414 14 509 1004 21 294.80 45 68.78 CHANGED saFhEpF.ts......thpp+Wl.Sptptpt.......t....hGcaplpsu.thhsshptD+GLhhppcA+aaAISs+..hspPhssc.s..................................KsLVlQYpVKhp.psl-CGGuYlKLhssshctp.....p..hs....scT..sYpIMFGPDhCG.ssK.lHhIh.........shhtcpalhptsh.p.....ssph.THLYTLIl+PcN.oaElhIDscpspsG.sLhcD.....ac..h.PPKcIcDPp-cKPcDW-DcthIsDP.cspKPEDWD......................c.PchIPDP-ApKP-DWD....--hDG-WcsPhIsNP.....................cYKGcWps.hIcNPsYKGhWtPpcIsNP-YhpD..p.h.h...s.lsulGh-LWpspushlFDNlll ..............................................hF.EpFpps........hp.scWl.Sptcpp.................Gpaphpts...h..ss..httD..........+..GL.hppcA+aaAlSuc..h..s.sassc..s..................................+sLVlQapV+ap..psl-CGGuYlKLhssshs.p......p..hp....scoP.YtIM.....F........GPDhCG.ssK..lHhIh...............shhtcpHh..h..p...shtt.....hsDph.THLYTL..IlpPDs.oaplhlDspthpsG..s.L.pD.....as...h..PP+...cIcDP...pspKP-..DWD.-.+tpIsDP.pshKP-...DWD.................................p..P..chIsDP-ApK.......P-DWD....--.DG-WcsPhIsNP......................pYK.GpWcs..IcNPsY+GhWh..cIsNP-ahtD...p.h..hh..pshsslGh-lWp......h...pushlFDNhlI......................................... 0 167 255 377 +1436 PF01216 Calsequestrin Calsequestrin Finn RD, Bateman A anon Prosite Family \N 19.70 19.70 19.70 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.37 0.70 -5.69 3 208 2012-10-03 14:45:55 2003-04-07 12:59:11 12 5 67 13 84 623 10 299.80 55 93.70 CHANGED KsPaLhLAuLhLLlu...p.GsRGEEGLDFPEYDGcDRVIslotKNYKsVLKKYEVLALLYHEPluDDKASQRQFEMEELILELAAQVLEDKGVGFGLVDSEKDAAVAKKLGLDEEDSlYVFKGDEsIEYDGEFSADTLVEFLLDVLEDPVElI-Gc+ELQAFENIEDEIKLIGYFKSEDSEHYKAFEDAAEEFHPYIPFFATFDuKVAKKLTLKLNEIDFYEPFMDEPITIPDKPNSEEEIV-FV+EH+RPTLRKLRPESMYETWEDDLNGIHIVAFAEEuDPDGYEFLEILKpVAcDNTDNPDLSIIWIDPDDFPLLVPYWEKTFcIDLS+PQIGVVNVTDADSVWMEMDDEEDLPTAEELEDWIEDVLEGEINTEDDDDDDDD ......................................................................h...........hh.........s.u-c...GLp.....a..P...pYDG..hDRVhs.l.stKNaKp.sh.K.+.a.c.h.h.sl.hY.Hp.s.....ss.+s.Q+Qap......h.pEhhL.EL...........s.AQVh......E.....c.....+.....s..lG....Fsh..V....Ds..p..K.-..s..t..lAKK..LG.hpE..sSlYl..hK..t...-..c...h...I...E.aD.Gp.....h..uADsLVEFLhDl.l.E.D.P.V..El.I.p..sphElp.uF.-p..h..E..-..c...I...+..L.l.G.aF...K.sc-..S..-.a...Y..+.....A.Fc-A.AEc..FpPa...I...FF.A.TF.-.pt.VA....K.+L..sL...K..h.N.E............l.....D..F......Y......E.P......F....M.......-...E...P....l..s...I.P...s.K..P......o..E.c.El.V.pFl..ccHp.RsTL...R+L....c.s..-..s..MaEsW...ED.D...h.s.GhHI.V...A..FA.E.ctDP..DG.a.E.F.L.EhLKpVApDNTc...N...P.-.LSIlW.IDPDDFPL..........LlsYWEKTFpIDL..t.PQIGVVNVoD.ADSlWh..-hs.....s.......--.DLP......os-ELE-WlEDVL.pGclss-D......DDpcp..p................................................................................................................................................................................................. 1 20 26 47 +1437 PF03160 Calx-beta Calx-beta domain Bateman A anon Bateman A Domain \N 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.19 0.72 -4.05 32 5074 2009-01-15 18:05:59 2003-04-07 12:59:11 9 429 918 46 2189 4984 1811 97.30 27 17.36 CHANGED sslslh...D.sDts.hlsF-pspapl...EssGt.splpVsphu.GslppsVhVsapTtD.GT....Apu..GsDY..psspupLsFsss.pst.pplpls.l....lDD-lhEps.EpFhltLs .........................................................................................t...p-.......s....hh..........a....p.t.....s...p........h.....p..s........tE.....s......s......G.....s.....l....p....l.s.V....h....R.......ps....G..........s....................s....s....s..........l..........h....V..s..a..........p...T....t......s......G..o......................A..p.s...........G.........s.........D..Y...............ps.s.........p...u..........s....l........s.....F...t.s......G....co................p.....p.......plpls...I.....................l-.D...s...h...h...Ets..Es.FhlpL...................................... 1 821 1173 1655 +1438 PF01213 CAP_N CAP; Adenylate cyclase associated (CAP) N terminal Finn RD, Bateman A anon Prosite Family \N 27.00 27.00 27.10 27.80 25.90 26.90 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.97 0.70 -5.14 6 489 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 282 4 291 479 4 263.70 35 60.01 CHANGED hppLlcRLEpATuRLEuVs.......sshhRss..............GssssPou.....slAu........................sssslpAFcshhuphlutalclS+cluG-VtppuEhVcpAFpsp+slLpsAupsp+Psps..cLt-LLKPIsEpItcltshREcNRuSKhFNaLSAlSEuIshLGWVuVsPpPssaVpEMcDuApFYTNRlLKEaKcsD.cpV-WV+uYLslhs-LpAYIK-aHTTGLoWsKcG.susstoAhsussouusssP..........PPP....PPPPPP.ssshhppssEuspu.s.psuhuAlFApLNpGEuITpGLKKVocD.KTHKNPsLRspss.sso...PKs.hpsPsP ..........................................................................tllpRLEtsssRLEshs.................t....................................................s.........................ts...............................s...lp.sF.D.p.h..l.st.ls.palph...Spplu.u...V.......tp......p...........uphltpua.p.p+thLhhus.ptppPs.s.....thtplLp..Plsctlppltsh+.Ep.s.Rs..St.h.FN.H....LouluEulsuLuWls.......h...s......s....p......PssaVpEh.suApFYsNR..........VLK-aKc..p..D......pHV..-..Ws+uahpl.h.p.pLpsY....lK..paass.GlsWst.p.G...........h........t.........h...........s..............t...........s..s...s..s.....s...s..P.......................PPP.............Ps.P.PPP...s.ss...t..................t......s.t....t.........tsshu..AlFupl.N.p.G.ps.l.T.p.u.L++Vscs.hTH.KNP.sLRstss.s.stt....st................................................................ 0 91 146 225 +1439 PF04451 Capsid_NCLDV Capsid_Iridovir; Large eukaryotic DNA virus major capsid protein Kerrison ND, Coggill P anon DOMO:DM04206; Iyer L Family This family includes the major capsid protein of iridoviruses, chlorella virus and Spodoptera ascovirus, which are all dsDNA viruses with no RNA stage. This is the most abundant structural protein and can account for up to 45% of virion protein [1]. In Chlorella virus PBCV-1 the major capsid protein is a glycoprotein [2]. The four families of large eukaryotic DNA viruses, Poxviridae, Asfarviridae, Iridoviridae, and Phycodnaviridae, are referred to collectively as nucleocytoplasmic large DNA viruses or NCLDV. The virions of different NCLDV have dramatically different structures. The major capsid proteins of iridoviruses and phycodnaviruses, both of which have icosahedral capsids surrounding an inner lipid membrane, showed a high level of sequence conservation. A more limited, but statistically significant sequence similarity was observed between these proteins and the major capsid protein (p72) of ASFV, which also has an icosahedral capsid. It was surprising, however, to find that all of these proteins shared a conserved domain with the poxvirus protein D13L, which is an integral virion component thought to form a scaffold for the formation of viral crescents and immature virion [3]. 25.00 25.00 26.50 25.50 19.20 23.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.29 0.70 -5.33 37 809 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 193 9 0 474 1501 166.00 33 55.42 CHANGED ssshsssp..las-YlaLDstERpthupss+-YLIEQlQhtsppsh.....sssssspplcls...FNHPsKhLhashp....................................s..stsshsshhss....................s.ss.ssshh............................................................................................................sslpsupLhhNupcRFsppsup.YFshVQP.apphsps.Pss.......................GlahYSFuL.sssshpPsGohNaSRlcsspLplshps......................tsshssspshp.lhlaAhNaNlLRltsGhuGl .........................................................................................s..............................................................................................................................................................s...........................................u.Pshssshsshs.h...................................................................................................................shs.l.pslolhhcsh..s..h.lsp.h.ssc..ahoth.P.aaa..ussh.....ps......................sGhhhhoaAL..shpc.hpPSGphNhuRh..pp..hh..lshcss................................hs..shpp.hc.LlV.AsshNhh..................................................................................................... 0 0 0 0 +1440 PF05159 Capsule_synth Capsule polysaccharide biosynthesis protein Bateman A anon COG3562 Family This family includes export proteins involved in capsule polysaccharide biosynthesis, such as KpsS Swiss:P42218 and LipB Swiss:P57038. 20.00 20.00 20.00 20.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.64 0.70 -4.92 23 1766 2012-10-03 16:42:29 2003-04-07 12:59:11 9 20 670 0 306 1577 451 256.20 23 64.77 CHANGED -sllhaGsctshcthtth..........pppshshhplE-GFlR...ths.th..PholslDchGlhaDsspPsch-plLpp.................shhststhspspphhshlhppploKYs..................................hpht...p...sps+chlLlssQVhsDtulp.huss...shpshtplLpsstccsP.suplllKsHPtshutp+h............uhhsph.t.tctsplhscDsshhsLlcpsctVhTloSpsGhEALLpsKsVhs...hGtsaYusaGLTpDtphp.......ppptph........slt.Lhstshlpa...............shY .......................................................................................................................................hhha.s..p...t...h............sp.t.h...th...hh.hhE-GalR......................P.....hsl....sh-...chG.l....hsus..h.....P...p....ch.c...hlpp.............................................h.st..t...h.t...h...shth..h.p.h.l..hp..tp.h.s+Yp..............................................................................................t...........t........th.ppchl.Lls..hQ........l......sDssl...p..htss...............thpth...hchl..p..s.....h.t...p...c..s..P....puh..............l.l.hK...H..P..t.s..h..ut.pch.................shh.s..p..........t...hptt...h.h.h..s.c.s..s..sh....s.L.lcp.sctVhTls.S.p.sGh-A.LlpG+sVhs...hGhsaYs..thG..LT...p....ct...h........t.......tpp.ph.....................ht..Lhtss..l.hshY...................................................................... 0 70 165 239 +1441 PF00194 Carb_anhydrase carb_anhydrase; Eukaryotic-type carbonic anhydrase Finn RD anon Prosite Domain \N 20.10 20.10 20.10 20.20 19.70 19.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.50 0.70 -5.41 14 3112 2009-01-15 18:05:59 2003-04-07 12:59:11 16 24 954 506 1486 2849 68 215.10 26 65.40 CHANGED WGYscpsuspp........WsphaPlAs.....GpRQSPIsIpopcspaDPSLKPLslSYssuoup..plhNsGHohpVpF-Dops+oVlpGGPLs.us.YRLhQFHFHWG..uscppGSEHTVDGh+YsuELHLVHWN..sKYssasEAhspsDGLAVlGlFlKlG..stpstlQ+llDsLssI+pKGppsshssFDPssLLPs...shDYWTY.GSLTTPPLpEsVTWlVhKEPIslSspQltpFRsLLhsscs-pth......hhsNaRPsQPL+sRsV+ASF ...............................................................................................................................................W.........t.............s.........s.t.......G.pp...QSPI...s...l......p...p.p..t...h.............h.......s.....s.........p........h.................t..s..l...p..h.....p..h.............t....t.....pl..h..N.s..G.+..o......l........p......l...p...h.................p......s.................................................h............l......pG..s........h......t..p.....a..............p.............htp.....hH.a.Hh................................sS.EHpl.....s.G....p...p..a..shE...h..H...l....V..Hh................s........................................................p...s...s....s....l......u...V.lu.l......h...h......p......h.u..........p....p.....s.....s...................h....p....t..l...h....p..t....l......t....t....l......t................t.............s.............p......p...s.............h.....t....s.......h.....s....h...p.......p.L..l..Pp.........ptp..Y.....ap.YpGSLTTP.......P...Cs..E.s.VtWh...lh.p.p.s.lp......l.op...........p................Q..............l........t..t.h................p.p......hh.........................................................t.N.Rs...Q..httR.l................................................................................................ 1 360 612 1041 +1442 PF02977 CarbpepA_inh Carboxypeptidase A inhibitor Griffiths-Jones SR anon Structural domain Domain \N 21.50 21.50 43.10 42.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.80 0.72 -4.38 7 15 2012-10-01 19:46:11 2003-04-07 12:59:11 10 1 5 4 0 18 0 45.30 45 51.05 CHANGED spshlssCNc.CsopuDC.GhThC.........ta...C.+hppossGhshpthulhs ...oNsLGTCNcYCsTNuDChGlTLC.........sW...C.Kh+KosuGhshupCulhP... 0 0 0 0 +1443 PF00619 CARD Caspase recruitment domain Ponting C, Schultz J, Bork P anon SMART Domain Motif contained in proteins involved in apoptotic signaling. Predicted to possess a DEATH (Pfam:PF00531) domain-like fold . 23.90 23.90 23.90 23.90 23.70 23.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.56 0.72 -4.15 60 1660 2012-10-01 21:41:45 2003-04-07 12:59:11 16 127 129 33 789 1599 1 84.60 21 14.74 CHANGED pphlcppRhtLlpplt...lst...lLDtLhpcpVLspc-h-plpp...pso...pts+.sRp..Llch.l.p+GsputphFlpsLpc..tpstLsphlptp ........................................phlcppRhtLlpplp.......lpt...ll..D.tLhp.pp......lls.pp-h-plps.....pso....ptp+.scp........Ll.-h.l..p.+G...p....p.u....hphF.lpsLpc....tpstLhp.l...t.................. 0 218 269 437 +1444 PF01623 Carla_C4 Carlavirus putative nucleic acid binding protein Bashton M, Bateman A anon Pfam-B_808 (release 4.1) Family This family of carlavirus nucleic acid binding proteins includes a motif for a potential C-4 type zinc finger this has four highly conserved cysteine residues and is a conserved feature of the carlaviruses 3' terminal ORF [1]. These proteins may function as viral transcriptional regulators. The carlavirus family includes garlic latent virus and potato virus S and M, these viruses are positive strand, ssRNA with no DNA stage. 20.70 20.70 21.00 20.80 20.50 20.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.42 0.72 -4.24 10 194 2012-10-01 20:03:16 2003-04-07 12:59:11 12 2 43 0 0 207 0 89.70 45 82.40 CHANGED h+scslhplhcslacp.usshsh-lshsIlshsss+.lut..GRSpYARRRRA+SIGRChRCYRVaPP.hsFoo+CDN+TCsPGISaNh+VssFI ..........................................hhhhllhthh.ch.sshhshclC.VsIhp.hsucsVut...GRSoYAR+RRA.tsIGRChRC.YRVaPP..hpso+CDN+TChPGIShNh+VtsaI......................... 0 0 0 0 +1446 PF00997 Casein_kappa casein_kappa; Kappa casein Bateman A anon Pfam-B_1298 (release 3.0) Family Kappa-casein is a mammalian milk protein involved in a number of important physiological processes. In the gut, the ingested protein is split into an insoluble peptide (para kappa-casein) and a soluble hydrophilic glycopeptide (caseinomacropeptide). Caseinomacropeptide is responsible for increased efficiency of digestion, prevention of neonate hypersensitivity to ingested proteins, and inhibition of gastric pathogens. 25.00 25.00 32.80 32.70 19.00 17.90 hmmbuild --amino -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.93 0.71 -4.53 9 256 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 142 0 12 240 0 137.40 73 97.48 CHANGED ElQNQEQssCpEp-ERlFcpcpVhYlPlhaVLNpaPpYtssYYQpRsuls.hNN.ahsaPYYs+PlllRspAQIPpWQshPNh..........sTssR+PtPHPSFlAIPPKKhQDKTsIPsINTIAslEPTP..lPTs......EPsVNoVssPEASSE.I..STPETTTVsVTSsss .........................h.p+hs.YlPh.YVLspYPpYGLNYYQQRPVAL.INNQFLPYPYYAKP.lAVRSPAQhLQWQVLPNTVPAKSCQsQPTTMARHPHPHLSFMAIPPKKsQDKTEIPsINTIASsEPTs..TPTT......EAlVNT..Vss.....EASSEsI.tSsPETNTsQVTSTtV..... 0 1 1 1 +1447 PF00363 Casein caseins; Casein Finn RD anon Prosite Family \N 21.80 21.80 22.60 23.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.74 0.72 -3.35 15 200 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 83 0 28 249 0 76.40 34 45.23 CHANGED QlspLpphpp.phtls.spt....tp....hp...to..h..cpshholsQ...pphlphhpplhpa.p....hshhlpslhQYpcshh.Phsphts ...............................................................Q..pLsshps.pLsLs....hhps.......hcQ.....psh..o..hhs.pshholsQ.....+hLsh.ppls.a.Q....tsh.lQsh..h......................... 0 1 1 1 +1448 PF00302 CAT Chloramphenicol acetyltransferase Finn RD anon Prosite Domain \N 23.50 23.50 24.20 24.30 22.90 23.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.40 0.71 -4.70 12 768 2012-10-02 12:01:53 2003-04-07 12:59:11 13 6 590 61 135 572 473 191.50 33 93.28 CHANGED MsFshIDlssWsRKEaF-HYhs.spCTYShTsplDITshhtplKcpuhKhYPshIYhluplVNpapEFRhuhsss.-LuhWDphpPsYTlFHc-TETFSulWo.acsDFspFhpsY.sDltpYucshphFPKsshPENtFslSulPWloFouFNLNltssssYLhPIFThGKYhpcssKlllPlulQVHHAVCDGaHsuRFlNElQ .......................phlDhpsWpR+caFpaahp..ps.tauhTsplDlTt...hh...pt...hKpp.....shpFasshla.hls+shNp........htEFRhth.psp.plshaDplpP..sa.T.lh.p.pc.s.c..oFSslas...p.aps..DappF...hp...th.p...p-h.p.p.......htp.s...p...s.h......h.s.c..t...h....p.....N....h.h..l...SslPWlsFouhs...h.sh.s.s..sss..hh.h.Pl.hThGKah.p.c.s.s+lh.hPlulQlHHulsDGaHlupFhpclQ........................ 1 52 92 122 +1449 PF03123 CAT_RBD CAT RNA binding domain Bateman A, Declerck N anon P39805/1-60 Psi-blast Domain This RNA binding domain is found at the amino terminus of transcriptional antitermination proteins such as BglG, SacY and LicT. These proteins control the expression of sugar metabolising operons in Gram+ and Gram- bacteria. This domain has been called the CAT (Co-AntiTerminator) domain. It binds as a dimer [1] to short Ribonucleotidic Anti-Terminator (RAT) hairpin, each monomer interacting symmetrically with both strands of the RAT hairpin [4]. In the full-length protein, CAT is followed by two phosphorylatable PTS regulation domains (Pfam:PF00874) that modulate the RNA binding activity of CAT. Upon activation, the dimeric proteins bind to RAT targets in the nascent mRNA, thereby preventing abortive dissociation of the RNA polymerase from the DNA template [2]. 25.00 25.00 25.60 25.30 24.10 24.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.90 0.72 -4.28 15 2777 2009-09-11 00:54:21 2003-04-07 12:59:11 10 4 1579 7 291 1469 8 57.80 36 21.19 CHANGED M+IcKlLNNNslllp.pcpppEtllhGpGIuFpKKtGDhlss.stIEKhFhLcscc.cpp+a ............hhIpKlLNNNVVl.sp....s..........cp.spE.h.llhG+..GIuF..p..KKhG-h.l..sp...p..p..I..EK.hFh.lcscp..tt...................... 0 79 152 209 +1450 PF00199 Catalase catalase; Catalase Finn RD anon Prosite Domain \N 19.60 19.60 19.80 19.60 19.20 19.40 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -11.94 0.70 -5.78 121 5600 2009-01-15 18:05:59 2003-04-07 12:59:11 14 27 3265 302 1509 4794 143 346.50 44 69.89 CHANGED TsstGssls.sspsohss....Gs...p.G.PhLLpDhpLl-cls+FsR.ERIPERsVHAKGsGAaGpFpso.s-.....hophTpAshhppsGp....cTPlhsRFSTVuGppGSsDss..RDsRGFAlKFYTc-GN.aDlVGNNhPV.FFlRDshKFPchl..HutK.pPposh....pcssth........................aDFhutpPEuh.Htlthlhoc.+GhP..toaRphsGaGsHTaphlN.spGctt...aVKaHacsppGhcslsscEAtp.htupssDatpcDLapsI.cpGsaPpWplhlQlhs.p-spph....p.as.hDsTKlWPcpchPl.......hplGphsLN.+NPsN.aFu-sEQsAFsPusl.VP.......GIphSsD.hLQuR.hFuYsDop+aRLG.sNa.plPlNt.P.......hs...h.s......pDG.hth............t.......ss..tssY.Psshssst ..............................................................................................................................................................................TpstGh.ls..s.s..psohps..........G....cG..PhLlpDhhhhEc...........ls+F.......s+..E+IPE..R..hVH.A+G.u.G.A.a..GhFpsp..ps.........l..o.chTp.....Aphhpp..Gc..............pTPlhlRF.S.TV...............sGpp.....G.....os..Dsh...RDs.RGFA.......lKF..............Y...........T............-..E....G...........N...a...........DLVGNNh.................P.l.FFl+Dsh...KF................P...Dhl..................Hu....K..pPcspl......pststh..............................WD.....F...ho..h..p...PEuh....Ht.....lhh...lhS...D....RGlP........toa....R.p.M.pGa.G..s..HT..aphl.....N.....s........p................G.cth......aV.KF.Ha............+..................s.......p.p........G....l..........c...s.....L...s..............-..........EAt.........c..........l.............t...........u......p.D...-.....atp+DLapAI....c...p.....G.s.aP.c.WplhlQlhs..c..-t..p..ph......p..as..s.hD..hT..K..lWP....cc...phP.l....................h.VGchs.....L......N....RN....P....c....N...aFu....E....s....E....Q....s....AF.......sP...upl...VP.......G.lp.hSsD....hL.......Q.uR.lFuYsDspt.hRLG..sNa..plPlNp.P.....................tss...hts.p...+DG.hph.......t........ts.....tssY.Psphs...t............................................................................................................................................................. 0 417 840 1230 +1451 PF00666 Cathelicidins Cathelicidin Bateman A anon Pfam-B_276 (release 2.1) Family A novel protein family, showing a conserved proregion and a variable carboxyl-terminal antimicrobial domain. This region shows similarity to cystatins. 20.70 20.70 20.80 20.70 20.60 20.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.15 0.72 -3.82 8 193 2012-10-01 19:28:07 2003-04-07 12:59:11 12 2 56 5 87 189 1 65.00 50 41.10 CHANGED sLSYREAVLRAVDphNEpSSEANLYRLLELDPs.PpsDpDPsTPKPVSFRVKETVCPRTopQsPEQCD ............LSYcEAVlR.....AVDphNp+Ss-.sNLYRLL..-L.D.......P...P.............ptD.tcssos.KPVSFTVK....ETVCP+o...s.p..ps..sE..pCD................ 0 1 1 18 +1452 PF04731 Caudal_act Caudal like protein activation region Kerrison ND anon DOMO:DM04892; Family This family consists of the amino termini of proteins belonging to the caudal-related homeobox protein family. This region is thought to mediate transcription activation. The level of activation caused by mouse Cdx2 (Swiss:P43241) is affected by phosphorylation at serine 60 via the mitogen-activated protein kinase pathway [1]. Caudal family proteins are involved in the transcriptional regulation of multiple genes expressed in the intestinal epithelium, and are important in differentiation and maintenance of the intestinal epithelial lining. Caudal proteins always have a homeobox DNA binding domain (Pfam:PF00046). 22.40 22.40 22.40 25.60 21.60 22.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.21 0.71 -3.88 14 144 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 47 0 59 120 0 127.30 37 51.59 CHANGED MY.PuslRps.GlN.............LssQNF.....Vuu.PQYsDYsuYHH..VPsh....Dsps.pPsuuWussYusP.REDW.ssYususussush.s.......SP.uphua.sss-Ysshps..............susGlL.........ssssuustthSPuup...R+ssYEWMRKolts....susup .......................MY.Pus...lRps.uls.....................Lss.QN.a......sus.PQYsDasGYHH..hssh......-stt...tss....suWsusYusP.....REDW.ssYu.usssss.s..............os..u.hua.sss-asshts...............susGl.L..........ss.ssss.tt.SPss.p...R+s.apWMR+sstss.ss.t......................................................................... 0 3 8 22 +1453 PF00689 Cation_ATPase_C Na_K_ATPase_C; Cation transporting ATPase, C-terminus Bateman A, Griffiths-Jones SR anon Pfam-B_137 (release 2.1) Family Members of this families are involved in Na+/K+, H+/K+, Ca++ and Mg++ transport. This family represents 5 transmembrane helices. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.20 0.71 -4.75 141 8688 2009-09-12 06:26:15 2003-04-07 12:59:11 16 115 3188 71 3253 7683 303 180.00 22 19.21 CHANGED shPlsslQlLalNLlsDshs.uluLuh-ss.-...pslMp+..Pc.t.ppslhstthhhpl.hhhGhhhuhhslh...sahhshh.............................................h.......shs...........................................................upThsFssllhsplhp.shssRo.....tt..............shht.hshh..pN..hh.....lhhuhhhsl.hltlhlhah..P.........lpph.F.pss....sl.......................shtpW.........hhs.....lshuhhhhhh..sElh....Khl .............................................................................................................PLtslQlLal........NLlhDs.hsulALuh.....-..s...s.....-.................pc.lM..p..+....P....R...t.p.p.......s....l.......h......st.t....h.....htph...lhh..G..hh..u......hhs.hh...........s.a.h...hhhh......................................................................................................................................hh............shs...............................................................................................................spThhFss...h.lhs.Q...hhp..sh.ssRo..........pp...........................................shh.........hshh..............pN...hh..............lhhulh.hs.....h..hlt............h.hlhah.....P..........................hs.ph..F..pht.....sL............................s.h.tpW.........hhs..lshuhhhhhl...splhKh........................................................................................................................................................... 0 1051 1898 2639 +1454 PF03310 Cauli_DNA-bind Caulimovirus DNA-binding protein Mifsud W anon Pfam-B_3746 (release 6.5) Family \N 21.00 21.00 21.00 22.90 20.90 20.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.59 0.71 -4.26 3 30 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 18 8 1 30 0 102.50 43 89.24 CHANGED LAsItSEIcEllosQKohcu-IKAILE+lGSssP.pouLEoVAAKIIsDLTKcIc-CcCNKEIlEhLspp...DcQIIPoPcEc..tKtLuLsKYSYPNasVGNEELGSSGNPNALKWP.hctPps ..........hsph.KElu.cllos.Kshcs-IKAI...L-hluSps.s......p-sLEslAAKIlpD...lschIscC.CsKcll-tLtst......cppl...p.pct.......tpthshtK....YSaPNasVGNtpLGSStsPpALpWP.......s..................... 0 0 1 1 +1455 PF03233 Cauli_AT Caulimo_AT; Aphid transmission protein Bateman A anon Pfam-B_3118 (release 6.5) Family This protein is found in various caulimoviruses. It codes for an 18 kDa protein (PII), which is dispensable for infection but which is required for aphid transmission of the virus [2]. This protein interacts with the PIII protein [1]. 21.10 21.10 21.20 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.14 0.71 -4.53 6 35 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 22 0 3 37 2 136.50 49 73.75 CHANGED MS.shos.PHIYKK.-pllRLKPLs.lsSNsRpYhFSS..pKusIpuIhsHCNNLNpIVuRsaLtlsKl.SYFGLpKDsSEthSKsKsP..shFsshppIF+cGGsspcppsp.lcoLpEhpN......RItclpppsKcLs-p.l.scsLhKc.VKDhcEpLscIc-ulKsIIG .......................MS..lTupPHlYKK.-pIl+LKPLs.lsSNsRpYhFuS..sKusIpsIlsHhNNLNpIlGRshLtlhKlsSYFGLp........KDsSE............SKSKsP..SVF.ssucsIFKsGGsDh.ssphc.plcsLhEhpN........+IcpL-pthppLspc.I.scs.hcp.lK-hccplppIc-tlKsIIG............ 1 0 0 2 +1457 PF01693 Cauli_VI Caulimo_VI; Caulimovirus viroplasmin Bashton M, Bateman A anon Pfam-B_1373 (release 4.1) Family This family consists of various caulimovirus viroplasmin proteins. The viroplasmin protein is encoded by gene VI and is the main component of viral inclusion bodies or viroplasms [2]. Inclusions are the site of viral assembly, DNA synthesis and accumulation [2]. Two domains exist within gene VI corresponding approximately to the 5' third and middle third of gene VI, these influence systemic infection in a light-dependent manner [1]. 19.90 19.90 19.90 20.00 19.70 19.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -7.98 0.72 -3.93 212 1222 2009-01-15 18:05:59 2003-04-07 12:59:11 11 36 808 7 520 1128 69 43.80 36 16.86 CHANGED Ka..YuVtpG+p..s.........GlY.soW...s-scpp......VsGasuAp..aKuF.sohpEApta .....................a..YuVt..pG+p..s............Gla..soW....s-Cppp........VpGasuAp......aKsF.sohpEApta...................... 0 188 333 458 +1458 PF04771 CAV_VP3 Chicken anaemia virus VP-3 protein Kerrison ND anon Pfam-B_2147 (release 7.6) Family This protein is found in the nucleus of infected cells and may act as a transcriptional regulator. It induces apoptosis, and is also known as apoptin [SwissProt annotation for Swiss:P54094]. 25.00 25.00 25.10 41.20 24.70 18.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.63 0.71 -4.14 7 93 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 0 0 63 0 108.30 94 99.84 CHANGED MNALQEDTPPGPSTVFRPPTSSRPLETPHCREIRIGIAGITITLSLCGCANARAPTLRSATADNSESTGFKNVPDLRTDQPKPPSKKRSCDPSEYRVSELKESLITTTPSRPRTARRpIRL .............MNALQEDTPPGPSTVFRPPTSSR.PLETPHCREIRIGIAGITITLSLCGCANARAPTLRSATADNSESTGFKNVPDLRTDQPKPP.SKKRSCDPSEYRVSELKESLITTTPSRPRTARRCIRL............. 0 0 0 0 +1459 PF01146 Caveolin Caveolin Finn RD, Bateman A anon Prosite Family All three known Caveolin forms have the FEDVIAEP caveolin 'signature motif' within their hydrophilic N-terminal domain. Caveolin 2 (Cav-2) is co-localised and co-expressed with Cav-1/VIP21, forms heterodimers with it and needs Cav-1 for proper membrane localisation. Cav-3 has greater protein sequence similarity to Cav-1 than to Cav-2. Cellular processes caveolins are involved in include vesicular transport, cholesterol homeostasis, signal transduction, and tumour suppression [1]. 25.00 25.00 25.60 25.60 21.60 21.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.88 0.71 -4.62 21 305 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 100 0 153 266 0 143.40 45 85.68 CHANGED sc-ph...........................sctps...h..........p-.cptthshssRDP+plNsc.lKlsFEDVIAEP.suoHSFDtVWhsSassFploKYhhYRlLosLLulPlAhlhGllFAlLSslHIWhVsPhl+ohLh.lsslpplWs.shcshhsPlFpuhG+hhSulplplpcp ...................................................s......p.pt...h.hDscpt.thshssRDP+plNsc.lKlsFEDVIAEP.puTHSFDslWpsSassFploKYahYRlLoslhuIPhAllaGIhFAhLSFlHIW......hVsPslKohLI.t...lpslpplashhlcsh..ssPLFpulG+hFSslplphp+.-...... 0 40 53 98 +1460 PF02275 CBAH Linear amide C-N hydrolases, choloylglycine hydrolase family Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_5806 (release 5.2) Domain This family includes several hydrolases which cleave carbon-nitrogen bonds, other than peptide bonds, in linear amides. These include choloylglycine hydrolase (conjugated bile acid hydrolase, CBAH) EC:3.5.1.24, penicillin acylase EC:3.5.1.11 and acid ceramidase EC:3.5.1.23. This domain forms the alpha-subunit for members from vertebral species, see family NAAA-beta, Pfam:PF15508. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.94 0.70 -5.00 8 2070 2012-10-03 21:14:07 2003-04-07 12:59:11 13 9 1418 42 386 1606 83 285.70 24 86.92 CHANGED CTulshcspcsphlaGRNMDas......hshsppVI...lhPRsaslsh.cp.ss.hss+h..............AhlG.MGshhtshP.lasDulNE+GLuhAGLYa..ssaspap+ssccspssIssh.lhpaVLsNsoSV-EVKctLpphslVs.....sl..slh.hs.sLHahlsDtSGEulVI.EssK-.uLcVa-s.phGVhTNsPsa.cWal..TNLspYpulp.p..pshhhschclssaupGhGtlGLPGDhTPusRFlRsuahKtsh.cspsEstulsshFcILuossh.+GsVls.psp.chTlYToshssspGpYYachY-s.plptlsL.ccsL..............Dss-.hoa ..................................................................................CTuls.h..s..t.......p..s....p.hhhGRohD.at...........h.s..h.ss..p.lh.........lh.P..+...s.....h...p....h....st........p....t......s......s......p..........h......p..t..ph..........................uh.lG...h..u..h..s...........h...t.........s.h.......h.........h....s..D.G....hNEcGLu..h..ut..hah...........ss..h..up.a..t..p...t............p...t.....s.....p......s.....ls.....shph.l....palLsphsoVpEsc.pt.l..p.....p...h..p..l..ls...............................th.......st..h......s..........h..s...s.l.....H.ahlsD.tsGcolll.E..h..ps....t..........l.p.l........a............-s...........ht..Vh..TN...sPs.a..s.hph..tNL..p.pYh.hp.........................t..s.........h......s.s...........h......p.........h.................s.....t..t...Gh.h...s....L.P.G.s.hosssRFlRss.......a.h............p.......ts.............h...s.............p.............s................s............s............p............p............pu.l....sphapllpsssh...P.......h.......G....h......s.........h.........s.....t...........p........s..........p.......................p.......h.......Thaposhshppth.h.Yhcshts.p.l.htlphpp..........................h.......................................................................................................... 0 137 230 310 +1461 PF03914 CBF CBF/Mak21 family Wood V, Griffiths-Jones SR anon Pfam-B_3822 (release 7.2) Family \N 20.50 20.50 23.40 22.80 19.50 19.30 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.16 0.71 -4.79 111 959 2009-01-15 18:05:59 2003-04-07 12:59:11 12 15 309 0 670 933 15 173.80 25 23.17 CHANGED hsl.uL.hlh.t.lhpp....p.....slc........h.......scFYppLYphL..ssp...........................lhpss.+.pp.................................................hlpLLpphL..pssphsh..pplsAFlKRLhpl.ul.ptsssshsuhlhhltpLhpp...aP.shpsll.pspp.................t..................t...........t......................................................................................sYss......cc.....c-Pphs.s.A.po...sLWElphL.ppHaHPsVsphAp .......................................................................................sl.uL.hlh.....lhpp....p...................sl-h.......scFYppLYphL..ssp...................................hhpsp.+.tp........................................................................................hhlplLpthL.............pssp....hsh.........tRlsAFlKRLhpl.uL.......pt....s...s.ss.......hhu.hLhhlppLhpp......aP...phpsl..l..cppp.pt..........t......................................................t................................................................................................................................sY..ss.................ppc-Pphs...s..A.po.......sLWElp........hL.p....p.H.aHPsVsphA...................................................................................................................................................................... 0 233 374 558 +1462 PF02312 CBF_beta Core binding factor beta subunit Mian N, Bateman A anon Pfam-B_12381 (release 5.2) Family Core binding factor (CBF) is a heterodimeric transcription factor essential for genetic regulation of hematopoiesis and osteogenesis. The beta subunit enhances DNA-binding ability of the alpha subunit in vitro, and has been show to have a structure related to the OB fold [1]. 25.00 25.00 27.50 27.40 24.10 23.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.99 0.71 -4.64 8 133 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 83 10 77 145 0 147.10 52 84.21 CHANGED MPRVVPDQ+SKFEoDELFR+LSR-SEl+YTGYRDRutEERpsRFpNuCR-G+uEloFVAoGTNLpLpF.ssps.htp..............cc.lDFD+EhGKVHlKS.FIhNGVCVpa+GWlDLcRLDGhGslEaDEcRAppEDulh+csl-ptppRhpEFE-cpRta+csppsphpt......u.sl .............................MPRVVPDQ+uKFEs-EhFR+LSREsEl+YTGaR-RshEER.phR.F.NsCR-...Gcs...El.uFVAo..GTNLpL.F.ssp..htp..............cc.sDF-+EtG.KVaL+ushIhNGVCVha+GWlDLpRLDGhGCLEaDEcRAtpEDAhh..ppth...-p.hppRhREFE-ppRsa+pp.ps..pt....t.sss............................... 0 20 25 56 +1463 PF02045 CBFB_NFYA CCAAT-binding transcription factor (CBF-B/NF-YA) subunit B Mian N, Bateman A anon IPR001289 Family \N 20.10 20.10 20.10 20.40 18.70 19.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.13 0.72 -3.43 25 558 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 271 0 319 542 1 56.50 67 18.95 CHANGED --PlYVNAKQY+uIlRRRpuRAKhEt...pp+...................l.s+sRKPYLHESRHpHAh+RsRGsGGRF .......EpPlYVNAKQ........YHtIL+RRpuRAKLEt...ptK.............................................l..s.KsRK......PYLHESRHpHAM+RsRGsGGRF..................................... 1 91 179 258 +1464 PF00808 CBFD_NFYB_HMF Archael_histone; Arch_histone; Histone-like transcription factor (CBF/NF-Y) and archaeal histone Bateman A anon Pfam-B_1351 (Rel 2.1) & Pfam-B_3673 (Rel 7.5) & Pfam-B_2078 (Rel 8.0) Domain This family includes archaebacterial histones and histone like transcription factors from eukaryotes. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.82 0.72 -3.91 28 2710 2012-10-10 12:36:46 2003-04-07 12:59:11 18 35 501 23 1800 5141 81 64.60 30 31.69 CHANGED tpLPlAslp+IhKps..sst+..lup-ApchlscslpEFlphlsscAs.-hspcppRKTlps-DlthAl ................................h..LPhAp.lp+....I....h....K.....p.....s..........s....ssp............l..up....-Ah.h...l...............s.c.....ss..p.FIphlo...pc.........At....p...hs..p......p..p..p....R....K.......T.l..pspDlhhAh..................... 0 538 1012 1477 +1465 PF01656 CbiA CBIA; CobQ/CobB/MinD/ParA nucleotide binding domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_782 (release 4.1) Domain This family consists of various cobyrinic acid a,c-diamide synthases. These include CbiA Swiss:P29946 and CbiP Swiss:Q05597 from S.typhimurium [4], and CobQ Swiss:Q52686 from R. capsulatus [3]. These amidases catalyse amidations to various side chains of hydrogenobyrinic acid or cobyrinic acid a,c-diamide in the biosynthesis of cobalamin (vitamin B12) from uroporphyrinogen III. Vitamin B12 is an important cofactor and an essential nutrient for many plants and animals and is primarily produced by bacteria [4]. The family also contains dethiobiotin synthetases as well as the plasmid partitioning proteins of the MinD/ParA family [6]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null --hand HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.18 0.71 -4.90 79 20629 2012-10-05 12:31:08 2003-04-07 12:59:11 18 135 4514 62 5745 39422 11955 208.60 17 65.21 CHANGED lslsutKGGsGKTThussLsthLup..tGh+VhshDh.Dspssssthhh...............................t.............h...htpshtsh.....................................thhsh.hhhph...tthpt..h......................................hpthhpthhptt....aDhlllDsssulsph................hspslsssshlllshpsph..hu..ltuhtphhphhpph..............lhGlllN+ht.s..pthtp.hh.thh.h.hhh....hh.........thhscshtlsp ..............................................................................................................................................lslsstKGGV..GKTT....s.......s..........s.s....L....u.......t.......s.L...........u..............p........p............G..........h...........+............V........l...l.......l........D.....h.....D....s.....p....s....s..s...s...s..h.ht.h............................................................hh...................................................................................................................................................................................h.h.thp...................................................................h...l.p.p...h...l..p..p...hhptt.............aD..h....l..l..lD....s...s....s...u....l.s.th......................................................................................s.h.s.u....l...t.....s.....u......c..........h..........l.......l........l...s...h...p....sph.........hu............lp.u....h.....t..p....l.....h.....p....h...l...p...p.ht..................................h.h.h.s..l..l..h..s.h...h..t....t........pth..t..h......t.......h............................................................................................................................................................................................................................................................................................ 0 1917 3836 4923 +1466 PF02570 CbiC Precorrin-8X methylmutase Bashton M, Bateman A anon COGs Family This is a family Precorrin-8X methylmutases also known as Precorrin isomerase, CbiC/CobH, EC:5.4.1.2. This enzyme catalyses the reaction: Precorrin-8X <=> hydrogenobyrinate. This enzyme is part of the Cobalamin (vitamin B12) biosynthetic pathway and catalyses a methyl rearrangement [1,2]. 21.30 21.30 23.90 29.00 20.60 20.20 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.14 0.71 -4.89 155 1640 2009-09-11 02:55:36 2003-04-07 12:59:11 10 16 1546 15 459 1277 315 196.50 41 85.28 CHANGED sIh.pcSFthIcpEhs.....p..thsstp..tpllpRhIHuouDh-hsc..hlpFos....s..............ulpuuhsAL.psGu..sIlsDspMVtsGIspttL.t......ssplhChls-sclsphApptGtTRosAulchhtp..c.........hsuullsIGNAPTALhcLl-hl..cp..sts+PALlIGhPVGFVuAsESKctLtt........slPhIslcGR+GGSslAuAslNALlphsp ...................tI.ccSFtlIcsEhs........p.....th..s.sp.p..ptllhRhIHssuDh-.hs..c..pltFos....s..............slpsutpAL.....ps.Gu..........sI.lsDspMltsGIs+ptLs.t.......s.....sclhChlsDscVschAc.......p.tGhTRSsAAl.-lhtp...c...........hssu.lh.sIGNAPTALa+Ll-hl..c..p....s..s.s..c.PAhllGhPVGFVGAAESKct.Ltps.................slPalsspGR+GGSslAAAhlNALhh..................... 0 144 309 396 +1467 PF01888 CbiD CbiD Enright A, Ouzounis C, Bateman A anon Enright A Family CbiD is essential for cobalamin biosynthesis in both S. typhimurium and B. megaterium, no functional role has been ascribed to the protein. The CbiD protein has a putative S-AdoMet binding site. It is possible that CbiD might have the same role as CobF in undertaking the C-1 methylation and deacylation reactions required during the ring contraction process [1]. 25.00 25.00 35.50 27.20 19.40 21.50 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.51 0.70 -5.42 6 1191 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 1139 1 338 1049 276 250.80 40 67.94 CHANGED scph+pGYTTGosAoAAAhAAlhsLh.ucphcpVplpsPs.GhplplslEpsch.puppAhAslhK-uGs.DhDlTpGh.IhuEVphpsGp.-lhIpGGEGVGhVT+.Gl.V.hGEAAINssPR+hIccslpchls-...scGs.VsISlPcGEclApKThNs+LGIlGGISILGTTGIVpPhSscuacsSLs.plslAhApsac+llhssGspGp+aARchh.slsp-thlphuNFhGahl-cAtcpGsccIlLlGasGKLIKlAuG ..................................s...LRpGaTTGoCAsAAu+AA...hhhL....l....p.......t...p...............h..........c...p..Vp.ls..hP......p...G...........p.......lphs...l...........pp...........hph......p........s...............s..........t..........A......t...........A..ulhKDuGD.D.PDlT+GhhI..hu.....pV............p...............h...............................s.............s.........t..................s...........t.............l..............h................lpu..GpGVGpVT+...GLsls.l....G.csAINPsPRcMIp...p...s...l...p...c...h...h...s..t..........................stu....hpl..pIslPpGEclAp+ThNsRLGIlGGISILGTTGIVpP.h.Spp....Aa.hpSlpht...l..c..l..t.....p.A.p.G..h.....p.p..llhssGstuEchsp....p...........h.........h......s....l...........sp.psllphusFlGhhLc...t...st......c........h......s............hp......c......lhlsG.th.GKlsKlAuG............................. 0 118 232 295 +1468 PF01890 CbiG_C CbiG; Cobalamin synthesis G C-terminus Enright A, Ouzounis C, Bateman A anon Enright A Domain Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process [1]. Within the cobalamin synthesis pathway CbiG catalyses the both the opening of the lactone ring and the extrusion of the two-carbon fragment of cobalt-precorrin-5A from C-20 and its associated methyl group (deacylation) to give cobalt-precorrin-5B [2]. This family is the C-terminal region, and the mid- and N-termival parts are conserved independently in other families. 21.20 21.20 21.20 23.40 20.90 20.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.28 0.71 -3.86 204 1400 2009-01-15 18:05:59 2003-04-07 12:59:11 11 24 1369 5 399 1196 300 120.40 30 34.00 CHANGED slslGlGCc+ss..shppltpslppsLtp.......tslshpslsslAol-hKt-Es...ulhphApphs..hPlphassppLpt.t....hsss.SphVhpph.Gss..uVAEu.AALhuu............ssup.........LlltKpthss..............sTlAlAp ............................................................t.lslGlGC++s.s..shppltphlppsLpp............t.sl...s.tu.lps...luolclKtsEs...ullplApphs..........lP.hphass-pLpphp..........................h.ssSsh.Vt.cpl.Gss..uV.uEsAAlhss.............................s.sup..............................................LlspKhttss...............sThAlu................................................................ 0 127 262 337 +1469 PF02571 CbiJ Precorrin-6x reductase CbiJ/CobK Bashton M, Bateman A anon COGs Family This family consists of Precorrin-6x reductase EC:1.3.1.54. This enzyme catalyses the reaction: precorrin-6Y + NADP(+) <=> precorrin-6X + NADPH. CbiJ and CobK both catalyse the reduction of macocycle in the colbalmin biosynthesis pathway [1,2]. 28.60 28.60 29.00 31.70 27.50 28.50 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.62 0.70 -5.05 90 1417 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 1368 0 341 1152 320 240.60 29 86.82 CHANGED h+lLlLGGTsEAptLAptLssts..........lssshShAGRstp..Ps..sh.slRlGGFG.....GssGLssaL...cpptlshllDATHPFAuplSpNAspAspptGlPhltlcRPsWp.ts.......sDp.WhpVsshspAsphL..........h...s.pR.VFLslGRppLstF......tthsppphllRsl-ssps.....slsh.ssspllhsRGPFshp........sEpuLhcpapI-slVoKNSGGs.ustsKlpAARpLGlsVlhlcRP.slP.sstt....hsssscslsalt.ph ..........................plLlLGGTs-uptluptLstts.............lsssoshuuchtt.........st.....ss...tl.+sGsh.......shcsltpal.....cc.p...pl..c..hllDATHPaAsplSpsA.h........pA.................s...c...........ps.....s...........lPhlRhcRs...sh..t....t.............................scs..hhhVsshppAsphh..................ph.......s.pp...lhLTsGppsLsta.........thsppphlsRslssscs......plsh..stt..cllsh+GPF..oh-..............hptslhcp..h....phc...........slVoKsSGus.uhptKlpAAtchGl.sVlhlpRP...sls......s..t.ph.....hpshsphhphl..t.................... 0 105 223 288 +1470 PF01891 CbiM Cobalt uptake substrate-specific transmembrane region Enright A, Ouzounis C, Bateman A anon Enright A Family This family of proteins forms part of the cobalt-transport complex in prokaryotes, CbiMNQO. CbiMNQO and NikMNQO are the most widespread groups of microbial transporters for cobalt and nickel ions and are unusual uptake systems as they consist of eg two transmembrane components (CbiM and CbiQ), a small membrane-bound component (CbiN) and an ATP-binding protein (CbiO) but no extracytoplasmic solute-binding protein. Similar components constitute the nickel transporters with some variability in the small membrane-bound component, either NikN or NikL, which are not similar to CbiN at the sequence level. CbiM is the substrate-specific component of the complex and is a seven-transmembrane protein [2]. The CbiMNQO and NikMNQO systems form part of the coenzyme B12 biosynthesis pathway [3]. The NikM protein is Pfam:PF10670. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.60 0.71 -4.72 154 1775 2012-10-03 02:46:00 2003-04-07 12:59:11 11 5 1495 0 550 1442 303 202.70 27 84.38 CHANGED HIsDGhL.ssshssshhslusshlshuh.....++lcpp......t...lPh.........................lul.huAhhFssphlslPl..su..oosHhlGsuLhulllG.sasuhlshslsLllQAllFucGGlssLGsNshsMulsus.hsua.hhhp..................l.......hph.........................thhluuhl....u...uhlulhhuulhsultLuls.s..............................................shthshshhh.......hsalsl.ull.EGllTshllshlt..+hcPch ......................................HI.-GhLss.hshshh..s..h.s..hshlsh.ul.......h+lppp....hp....ppp.........hsh.........................hul......s..uA......hh...Flh..p..h...l..pl...Pss..sG.....ossHh....h..G....s..uLsull....hG..shssslh.hs...l..l.Llh.QA....L....L.h..u..c.G....G..l..s...s...LGs.Nshs..Mul.h.ushlua..hla+..h......lpch....................thphhlusFl......u....uhl..us.h.h.s.h.h....s....s....u...l...p.L.ulu..hs...t.........................................hshhthhsh.hh....hsplsl..uls.EGllTshlhshlt.pht................................................................... 0 185 372 476 +1471 PF02553 CbiN Cobalt transport protein component CbiN Bashton M, Bateman A anon COGs Family CbiN is part of the active cobalt transport system involved in uptake of cobalt in to the cell involved with cobalamin biosynthesis (vitamin B12). It has been suggested that CbiN may function as the periplasmic binding protein component of the active cobalt transport system [1]. 25.00 25.00 32.70 32.30 21.50 20.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.61 0.72 -3.98 12 603 2009-09-12 00:29:08 2003-04-07 12:59:11 10 1 593 0 137 328 12 71.60 44 67.62 CHANGED hpplllLlh.hhlhllsLll....h.hpcts.hG.uDspAEcsIpc.lsstYcPWFpPlaEPPSGElESLLFuLQAA ........h..phllllh..sl.h.lhllsLll.......h.....hh....s...c.sut..aGGoDspAEptIpp.l.s.spYcP...WFpP...l...a......E.....P.....s.....SGEIESLLFsLQu.. 0 47 99 122 +1472 PF02361 CbiQ Cobalt transport protein Bashton M, Bateman A anon Pfam-B_673 (release 5.2) Family This family consists of various cobalt transport proteins Most of which are found in Cobalamin (Vitamin B12) biosynthesis operons. In Salmonella the cbiN cbiQ (product CbiQ in this family) and cbiO are likely to form an active cobalt transport system [1]. 21.40 21.40 21.50 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.53 0.70 -4.79 21 6688 2009-09-20 21:39:57 2003-04-07 12:59:11 11 12 2806 0 1371 4611 488 211.40 20 79.22 CHANGED tsshlHclsschKLlhhhhhllhssls.shhsh.llhsllhhshhlsths......phhtphhhh.hlhhlhshlhlhh.................................th.sthlhssss.......................................................h.lhp.G..............................hhtulhlshRhhshlhshhh....LshTTsh.ElssulcclthPh.........lshhhhLuhRal.hlhcEhpplhpAppsRshph...p.shtpthpplshLlsshhlp.uhc+uEclshAMpuRGYss...tpsph ...............................................................................................t....hpplss.t.sK.l.h.h.h..h..h..h..h..h..h...h..h..h....s..........s...h...........h..h...h..h..h..h..h...h..h.h..h..h...h..h..h.h..s.th....................h..h.h...h..h.h..h..h....h...h..h...h..h.l..h..h..hh.hhhh...............................................................h....t..t..t...t..h..l...h..t..h.hh...........................................................................................................h....l..t.t.t.u........................................lhh.u......h..h.l...h.h.....R.h.h.s..h.....lh.hs.hl................hsh...TTss.sp.l.h.s....u.......l.p.p.l..h..hPh.............................................................cluhhlslslRalPhlhc-hppIt.pApcsR...Gh..ph..........t...t......s....h......h..........p....p......h.....+.......s...h...h..........l....l....h.PL...h.hp....uhc+u-plutAM-sRG..aps.........h................................................................................ 2 510 954 1206 +1473 PF01903 CbiX CbiX Enright A, Ouzounis C, Bateman A anon Enright A Family The function of CbiX is uncertain, however it is found in cobalamin biosynthesis operons and so may have a related function. Some CbiX proteins contain a striking histidine-rich region at their C-terminus, which suggests that it might be involved in metal chelation [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.11 0.72 -3.92 76 2846 2012-10-01 23:23:09 2003-04-07 12:59:11 12 22 1311 18 915 2680 601 103.40 22 64.24 CHANGED HGS.+c.scuspshtphs.phltcths..h.V.phuFl-hsp.Pslppul....p.plhtt.....Gsccl.......lllPhhL.hsG.hHscpcI......sp.......lp.....phptphs........................thp..l..hhupsLGscstlhplltp .................................................+Go.pc....sp....us...t..th....p....ph....s..p....tlt....p..........p.........h..........s.....h...................V..........c....h..........uF...l...........p........hsp....P...s...lppsl................p.ph.htp...............Gsp.c.l.......................lll..Ph.h.....L.hsG.hHh..c..c..cI.........st.......lp.........php.t.phs.............................................................h.p.h.thu...s.sL.....Gs..cstlhphl..t............................................................... 0 286 665 834 +1474 PF02262 Cbl_N CBL proto-oncogene N-terminal domain 1 Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3949 (release 5.2) Domain Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. Cbl_N is comprised of 3 structural domains of which this is the first - a four helix bundle. 25.00 25.00 25.10 25.80 20.10 24.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.68 0.71 -4.43 3 228 2009-09-10 23:26:51 2003-04-07 12:59:11 11 12 88 35 117 222 0 122.10 60 17.48 CHANGED .suosDKKhLEKsWKLMDKVVKLCQsPKLNLKNSPPFILDILPDTYQHLRLIaS+NEDpMclLp-NEYF+VFlENLM+KCKQsIKLFKEGKE+MY-ENSc.RRNLTKLSLlFSHMLuELKAIFPsGlFtG ..............ssD++sl-KsWKLMDKVV+LCQsPKLsLKNSPPYILDlLPDTYQHLRhI......h.S.+Y........-.....s......+....h.....t.....t.....L....u....-.....NEYF+lal-NLh+KoKpsIpLF.....KEGK.E+MY..-EpSp.RRNLTKLSLIFSHMLAELKAIFPs..GhFpG.................... 0 24 34 66 +1475 PF02761 Cbl_N2 CBL proto-oncogene N-terminus, EF hand-like domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3949 (release 5.2) Domain Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. The so called N-terminal domain is actually 3 structural domains, of which this is the central EF hand domain. 20.00 20.00 20.30 20.80 19.70 19.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.00 0.72 -3.91 5 227 2009-01-15 18:05:59 2003-04-07 12:59:11 9 15 92 35 122 226 0 81.50 68 11.39 CHANGED pFRlTKA-AcpFWRcpFGsRslVPWupFcotLspsHPlosG.hEAhALKoTIDLTCNDaISlFEFDVFTRLFQPWsTLLRNWQlLA ......................FRITKADAA-FWRctFG...c+....TI.VPWKhFRQsL+cVHsISSG...LEA.MALKSTIDLTCNDYISlFEFDIFTRLFQPWuoLLRNWphLA.............. 0 28 39 74 +1476 PF02762 Cbl_N3 CBL proto-oncogene N-terminus, SH2-like domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3949 (release 5.2) Domain Cbl is an adaptor protein that binds EGF receptors (or other tyrosine kinases) and SH3 domains, functioning as a negative regulator of many signaling pathways. The N-terminal domain is evolutionarily conserved, and is known to bind to phosphorylated tyrosine residues. The so called N-terminal domain is actually 3 structural domains, of which this is the C-terminal SH2 domain. 25.00 25.00 39.70 38.00 20.10 18.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.87 0.72 -4.35 4 215 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 85 35 115 217 0 84.00 81 11.55 CHANGED sHPGYhAFLTYDEVKtRLQ+hhcKPGSYIFRLSCTRLGQWAIGYVouDGpILQTIPpNKsLhQALl-Ga+EGFYLYPDG+spNPDL ........THPGYMAFLTYDEVKARLQK.a.hpKP..GSYIFRLSCTRLGQWAIGYVTuDGsILQTIPHNKPLFQALIDG.REGFYLaPDGRshNPDL.......... 0 24 34 64 +1477 PF00734 CBM_1 CBD_fungal; CBD_1; Fungal cellulose binding domain Bateman A anon Pfam-B_444 (release 2.1) Domain \N 20.60 20.60 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.57 0.72 -4.28 133 1334 2010-01-08 14:00:54 2003-04-07 12:59:11 13 104 175 6 936 1332 9 28.80 51 7.32 CHANGED tau.QCGG...G..a.oGsTs..CsuGhsC..pthNsaY ......haG.QCGG..tG.......a..oGsTs...Cs.....u..G..h....o..C..ph.Ns.aY......... 0 387 592 830 +1478 PF02013 CBM_10 CBD_5; Cellulose or protein binding domain Bateman A anon PSI-BLAST P10476/668-713 Domain This domain is found in two distinct sets of proteins with different functions. Those found in aerobic bacteria bind cellulose (or other carbohydrates); but in anaerobic fungi they are protein binding domains, referred to as dockerin domains or docking domains. They are believed to be responsible for the assembly of a multiprotein cellulase/hemicellulase complex, similar to the cellulosome found in certain anaerobic bacteria. 21.00 21.00 22.00 23.20 19.50 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.07 0.72 -3.69 92 276 2010-01-08 15:14:06 2003-04-07 12:59:11 11 56 49 10 58 278 0 34.80 41 10.69 CHANGED s..Chspthu..YsCCss...s.tlhasDssGsWGlENs.pWCul .........C.sttpu..YPCCss...s..s.hsDssGsWGh..ENs..p.Csh.. 0 16 54 58 +1479 PF03425 CBM_11 Carbohydrate binding domain (family 11) Bateman A anon CAZY Family \N 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.16 0.71 -4.58 15 149 2012-10-03 19:46:52 2003-04-07 12:59:11 8 40 89 1 61 228 302 177.60 16 25.80 CHANGED suhshhl-DFEss.s.s..hh.hWto.ssssspsuoplss.....stsscuhplphs..sstuuashpVsasl-cu.......Das......................pauGlsF.h.......K...Guu+plc....lEIsDss......cs-lalsslssspo.WpplpIsFsshsp......sGhstcssh...DLccltu.lsFpspussu....ssapIDslcLh....stspspssps ...........................................................................................s......lDc...F-.....sh.........s............s........h..h....ahs...s.s.pssthphp.ts................stssps.....hp.hphs....sstsshh...ssh...ths..hs.tt................................DWS..................................................shs.ul.pF.hl.........+ss..Guuppls.........l.....pl..p.sss..................t..c.hah......t..p..hss....sts..Wp.plpIPFsshst.....s..ss...st...st...s...h.....clspltt...hsh...h.hssstt...........sph..h..lDslphh..............s..................................................................................... 0 38 47 54 +1480 PF03426 CBM_15 Carbohydrate binding domain (family 15) Bateman A anon CAZY Domain \N 20.30 20.30 20.30 292.50 20.20 17.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.03 0.71 -4.87 2 3 2012-10-03 19:46:52 2003-04-07 12:59:11 9 1 3 3 1 7 0 160.30 69 26.24 CHANGED lplDMssGWRGNuoG...pSGlThsuDGVoFsA.GDslGAVhDhh+PhpLEDAlIsMVVNVSuEFKAStAsLQ.hsQlKts...GEWsChAusp.hTAspDhTloCTlsEsDcKFNQTthDVQVGlQAKGTPsGslTIKSVTlTLA.tA.............YSAN IEVDMANGWRGNASGSTSHSGITYSADGVTFAALGDGVGAVFDIARPTTLEDAVIAMVVNVSAEFKASEANLQIFAQLKEDWSKGEWDCLAASSELTADTDLTLTCTIDEDDDKFNQTARDVQVGIQAKGTPAGTITIKSVTITLAQEAYSAN.............VDHLRD 0 1 1 1 +1481 PF03424 CBM_17_28 CBM_28; Carbohydrate binding domain (family 17/28) Bateman A anon CAZY Domain \N 25.00 25.00 37.00 31.20 22.00 21.60 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.32 0.71 -4.67 7 48 2012-10-03 19:46:52 2003-04-07 12:59:11 9 6 27 11 12 57 0 188.40 35 39.07 CHANGED VWu.EELSlSGEYVRARIKGhpYpP......I-RT...caocslWD..FNDGTpQGFslNuDSP.hpslslENsN..sAL+IoGLNs..SNDlo..EGNaWANVR..lSADt...WupshsIhGApcLTMDVIscpPsTVuIAAIPQSsstsWANPsRulpVp..ssFhppcDtpYKAhLTITstDuPslpsIApcscssshsNIILFVGo-s...uDVI.LDNIpVo ................Ws.ppLohSGpYsRuRIhG..Yts............p......hsphlhs..FpDGT+QGashsu-Ss.spss..lTIcssN...u.phhuhcs..spshs...sshWAs...A.....l.t-h....pups..lhhs.hLs...-hhscushslslAh.P.o.huh..WApsscshpls..schsp...tp..c..sp.chhhhhsh.D.splpshth.ssDohLpNlllhlussp...ustlalDNl+h............................................................ 0 5 11 11 +1482 PF03427 CBM_19 Carbohydrate binding domain (family 19) Bateman A anon CAZY Domain \N 20.10 20.10 20.10 20.10 19.60 19.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.14 0.72 -4.19 2 39 2012-10-01 20:20:38 2003-04-07 12:59:11 8 5 31 0 26 42 0 63.00 34 12.39 CHANGED hTTTuhsTAoSA.hsh.hAspSCSoQsphuCTusGpYslCsaGKWVsusCPsGslClsosp .........................................t.......................................hsGp..ossopGphACou.sGp...aAlCs.aG.sW.VhtpCsu.GTsCh....s... 0 8 15 24 +1483 PF00553 CBM_2 CBD_1; CBD_2; Cellulose binding domain Bateman A anon SCOP Domain Two tryptophan residues are involved in cellulose binding. Cellulose binding domain found in bacteria. 21.10 21.10 21.10 21.10 21.00 20.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.07 0.72 -10.63 0.72 -3.96 17 1472 2012-10-01 21:34:18 2003-04-07 12:59:11 14 210 411 18 667 1442 50 96.90 30 17.38 CHANGED spssYslsspWssGFsAslslpNsuosslssWolsash.s.GpplTpuWNAslosoGsshososhuWNuolss.....GuossFGFpGotsGus..sss.slsGss.C .........................pssasssssW....s..u.....G......a.s.u....p....lslsNsGs.s.sl....s.u.W.slsash...sss..pp....lo..ss.....W......s..u..s.....h.......o.....p....o.......G.....s...p.h...o.ss..s.....s..u..a.....Nu....s.lus.....G.u.o.s...o..h...GFp.u.s.t..s.u..s.........ss............................................. 1 261 536 651 +1484 PF00686 CBM_20 CBD_2; CBD_4; Starch binding domain Bateman A anon Pfam-B_111 (release 2.1) Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.41 0.72 -4.38 101 1301 2012-10-02 20:10:03 2003-04-07 12:59:11 14 99 591 106 739 1353 50 93.70 23 15.09 CHANGED plsV.sFps.ps.....sTshGpslallGslspLGsWss.spAlsLshst....ssshPhWps..sl.sLPs.Gs....shEYKal+.....t.ssss...sV.tWEu..u.sNRshosPsssssssss ........................l.pFpl..ph.....psthG......p.plhl.....sGs..hspLG....s...W.s..s.......p.p.AltLpht..................t..s.hWps.....sl...sl......ss...sp.................slc..YKalh.................h..sss.s......sh...hW.Es...s..sN...R.hthst.......sh.................................................. 0 308 491 631 +1485 PF03370 CBM_21 PRS; Putative phosphatase regulatory subunit Mifsud W anon Pfam-B_2433 (release 6.6) Family This family consists of several eukaryotic proteins that are thought to be involved in the regulation of glycogen metabolism. For instance, the mouse PTG protein Swiss:O08541 has been shown to interact with glycogen synthase, phosphorylase kinase, phosphorylase a: these three enzymes have key roles in the regulation of glycogen metabolism. PTG also binds the catalytic subunit of protein phosphatase 1 (PP1C) and localises it to glycogen. Subsets of similar interactions have been observed with several other members of this family, such as the yeast PIG1, PIG2, GAC1 and GIP2 proteins. While the precise function of these proteins is not known, they may serve a scaffold function, bringing together the key enzymes in glycogen metabolism. This family is a carbohydrate binding domain. 21.10 21.10 21.20 21.30 20.50 20.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.67 0.71 -4.10 75 714 2010-01-08 15:58:43 2003-04-07 12:59:11 8 10 243 8 460 671 6 108.30 31 22.53 CHANGED phppp.VpL...Eshtls.......ppslhGsltVpNluF-KpVtlRaT..hDsWcohp-lsupYhs...shtthsh....D..........................pFpFplsLsshht..........................................tpplpFClpY..pss.u....ppaWDNN.supNYplp ................................................t..ttp.VpLEph.hp.........ppslhGslpVpN.luF-K....pVtlRhT..h.DsWcohp.-ls..u.pYhp............sss..shD...........................................pFsFplslssh.h...............................................................ttplcFslpY.......ps.s..u........ppaWDNNsGpNYpl............................... 0 122 198 331 +1486 PF03423 CBM_25 Carbohydrate binding domain (family 25) Bateman A anon CAZY Domain \N 26.80 26.80 27.00 27.20 26.70 26.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.00 0.72 -3.90 30 440 2010-01-08 16:02:45 2003-04-07 12:59:11 8 47 133 10 176 429 14 84.90 26 18.88 CHANGED usslclaYNhssssLstpsclah+GuaNsWsps.uhs.chsc...t......usahpssl.plPppAhhlDaVFssG....sssYDNNsspDaphsls ...........................................sslslaYs......t....p..ps.h..s.t.psp...l..........ah+s.u...a...s....s...W...s....ps.......u..hs.p....ttp.................ssahpss.l.plPtp.Ah.t.l-alFssG.....ussa...DN...Ns...sp...Daphsl.s............. 0 61 121 159 +1487 PF02839 CBM_5_12 CBD_7; CBM_5; Carbohydrate binding domain Bateman A anon Bateman A Family This short domain is found in many different glycosyl hydrolase enzymes and is presumed to have a carbohydrate binding function. The domain has six aromatic groups that may be important for binding. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -7.97 0.72 -4.37 135 2927 2012-10-03 03:17:01 2003-04-07 12:59:11 9 197 1085 40 615 2508 403 42.20 30 7.84 CHANGED hssWsss.ps..Yss.G....-hVs.asG..phYpA.phasp.st...tsssst..........sW ...................ssWsus.ps...Yss.G......DpVs..a..p..G..psYpA..paWTp..us.........pPstss..........sW............................ 0 203 349 497 +1488 PF04942 CC CC domain Bateman A anon Pfam-B_4563 (release 7.5) Domain This short domain contains four conserved cysteines that probably for two disulphide bonds. The domain is named after the characteristic CC motif. 20.60 20.60 20.70 20.60 20.20 20.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.04 0.72 -4.07 29 101 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 6 0 95 112 0 35.30 31 20.03 CHANGED ssssshsCc..uss.....sPshs.GtCPsGhsllpushCCssc ..........tshsC+.......uss.....sPuls.GhCPsGhslltus.t.CCsp.t..... 1 41 43 95 +1489 PF01845 CcdB CcdB protein Bateman A anon [1] Domain \N 23.40 23.40 23.70 26.10 23.30 23.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.22 0.72 -4.09 12 537 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 413 26 75 272 31 98.20 43 98.60 CHANGED hQFpVYpshucut...YPhllDVQSDlIsplsoRlVIPLhsscph.t.+ssp+LpPllp.l--psallhTppMASlPsslL.Gp.VsDLospRppI+sAlDFlhsGI ............MQFpVYps+tcsp...a.hhlDVQSDIIcshspRhVIPLsshchhsp...+ss..ccLhPllp.lsscsahlMTp-hAoVPlssl..Gcc..VsDlS.p.+cspIKsAlDhhhpGI.............. 0 14 34 51 +1490 PF04995 CcmD Heme exporter protein D (CcmD) Bateman A anon COG3114 Family The CcmD protein is part of a C-type cytochrome biogenesis operon [1]. The exact function of this protein is uncertain. It has been proposed that CcmC, CcmD and CcmE interact directly with each other, establishing a cytoplasm to periplasm haem delivery pathway for cytochrome c maturation [2]. This protein is found fused to CcmE in Swiss:P52224. These proteins contain a predicted transmembrane helix. 23.90 23.90 24.10 24.60 23.50 23.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.06 0.72 -4.23 96 1201 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1174 0 214 605 209 48.40 37 73.35 CHANGED GsYuhYVWsuYulohlslshLlhholhpp+plhpclpctptRpt....ch .....GGYAhaVWhAhuhTllsLslLllpolhp+RtlLptltp.ppAREs.R.h.................. 0 45 106 160 +1491 PF03100 CcmE CcmE Mifsud W anon Pfam-B_2583 (release 6.4) Family CcmE is the product of one of a cluster of Ccm genes that are necessary for cytochrome c biosynthesis in eubacteria. Expression of these proteins is induced when the organisms are grown under anaerobic conditions with nitrate or nitrite as the final electron acceptor. 19.90 19.90 21.00 22.00 19.00 19.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.50 0.71 -4.58 135 1616 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 1542 4 402 1031 1646 131.90 46 83.52 CHANGED h..st.+++RLhllshslsuluhAsuLlLhAhppslsaFaoPo-lhpscs........s.sspplRlGGhVppGSlpRs..suhpVp.FpV.TDs...s.........ss.lsVpYpGILPDLFREGQGlVApG................ph.tssG....sFtAsEVLAKHDEsYMPsE ...............................................................RR+pR..Lh.llhullsululssuLlLaAL.p.p.NIsh..F...YTPuEl.l.tGct..........t.sGpRlR.l.GG.M..Vh.....G.S......VpRss........soLc..Vs.Fsl.......t....Ds....t.........................s.s..V..sVsYcG..I..L.P.D.LFRE....GQGVVspG........................pL.tpss.....phhAcEVLAKHDE.sYhPPE.............. 0 113 247 324 +1492 PF03918 CcmH Cytochrome C biogenesis protein Finn RD anon DOMO_DM01577 Family Members of this family include NrfF, CcmH, CycL, Ccl2. 25.00 25.00 25.10 27.10 22.30 24.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.83 0.71 -5.21 177 2053 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1387 2 373 1270 1631 139.80 38 68.57 CHANGED hlhhl..lhl..hh...................sh...sss.sDph.h...ss.....s.EpRhppLscpLRCsh.CQNpsIsDSsAslApDLRtpVh-hltpGcScppll-ahVsRYG-FVlacPshpstThlLWhuPhllLlh.GhhhlhhhhR++pttt..........................sLosp-p.....tcLppl.Lp ........................................................hhh...hh.lhL.hhs..................sut....tss.lDshpF..tss......tQcpphppLsppLRCPp..CQ.N..p..slsDS...N...A.l.AsD...hRppVYc.h.l.p.......EGKSc..pEIlcaMlsRYG.cFVpYsPP.....ls...st.TllLWhhPl......lhlllGuhll.h.tht.++pht...........................................t........................................................................... 0 93 206 291 +1493 PF03597 CcoS Cytochrome oxidase maturation protein cbb3-type TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 23.00 23.00 23.40 23.40 22.50 22.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.99 0.72 -4.50 127 978 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 954 0 277 677 88 44.60 39 69.38 CHANGED sllhlLIPlullluhlulhsFhWul+oGQaDDl-ssutRILhD.D- ................phlhhLIPlulllshlulssFlWul+oGQFDDl-t.uppI.LhD.D-....... 0 74 175 227 +1494 PF03150 CCP_MauG Di-haem cytochrome c peroxidase Mifsud W anon Pfam-B_3135 (release 6.5) Family This is a family of distinct cytochrome c peroxidases (CCPs) that contain two haem groups. Similar to other cytochrome c peroxidases, they reduce hydrogen peroxide to water using c-type haem as an oxidisable substrate. However, since they possess two, instead of one, haem prosthetic groups, bacterial CCPs reduce hydrogen peroxide without the need to generate semi-stable free radicals. The two haem groups have significantly different redox potentials. The high potential (+320 mV) haem feeds electrons from electron shuttle proteins to the low potential (-330 mV) haem, where peroxide is reduced (indeed, the low potential site is known as the peroxidatic site) [1]. The CCP protein itself is structured into two domains, each containing one c-type haem group, with a calcium-binding site at the domain interface. This family also includes MauG proteins, whose similarity to di-haem CCP was previously recognised [2]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.45 0.71 -3.93 173 2309 2012-10-03 10:02:11 2003-04-07 12:59:11 9 22 1444 47 631 1895 669 159.60 36 39.95 CHANGED sttp.spLG+tLFaDspLSts.......sslSCAoCHs.ss..........t..uhsD..stsh.......u.hG.....hs.s.................ph.......ssR..NuPolhN.suap...............................................h.FW.DGRs...............................s..s..L.cpQA......htPl..tsshEM......ut...s........................hppll.p+Lps.............st......YtphFppsFs....................................sps........lshpplspAlAsF.p+ol...hossStFDpal.....pG.-tsAl ...................................s..p+stLG+tLaaD...sR.LSts.......ss.lSCuoCHs..hs.............t..uhsD......shps........................o..hG.......hs.s........................................................................................ph........GshNuPTlhN.usash.......................................................................s.FW.DG.RA...............................................................s.sL.p-Q.A......tGPl..hsPhEM.............us.......s................................................hctll.t+lpp................................st.......YtptFppsas..................................................ps....hshc.s.l.scAIApFE+TL...lossS.FDcaL.cG.-ppAh.......................................................................................................................... 0 223 417 541 +1495 PF04505 Dispanin CD225; Interferon-induced transmembrane protein Mifsud W anon Pfam-B_2070 (release 7.5) Family This family includes the human leukocyte antigen CD225, which is an interferon inducible transmembrane protein, and is associated with interferon induced cell growth suppression [1]. 27.10 27.10 27.30 27.40 26.70 27.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.79 0.72 -4.44 68 837 2009-09-11 14:57:37 2003-04-07 12:59:11 7 11 210 0 406 837 11 81.30 26 49.85 CHANGED ss.usssh...............spsaLshuIhsol....hCsh....P...lGllAllaSspspsthttGDhsGAppsuppA+thslhuhlh....Gllhh..llh.l ...............................................................spD.a.ls..hulhssl...........hChh.......P.......lGllAhhaS.h..........cspsthhtGDhpuA.pphuppA+hhs....lhulhl.Glhhhlhh.h.......................... 0 97 146 220 +1496 PF01130 CD36 CD36 family Finn RD, Bateman A anon Pfam-B_1229 (release 3.0) Family The CD36 family is thought to be a novel class of scavenger receptors. There is also evidence suggesting a possible role in signal transduction. CD36 is involved in cell adhesion. 19.80 19.80 20.30 20.00 19.50 19.60 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.55 0.70 -6.19 52 1012 2009-01-15 18:05:59 2003-04-07 12:59:11 16 15 151 0 630 1015 58 355.90 22 82.78 CHANGED hhsluhllhlhuhlhhhh.asshhpphlccplsLps...sop.......sac.Wtps........Phslhh.p.........lYlFNhTNs--hhs..ut+PplpElGPYsY+.EhtpKhslpassss...Tloa..ppp+sahFpP-...hSsG.....stsDhlsssNlshlussthhpp..........hs.hhp...............................................hhlshhlpt.....stphF.hspolt-hLa..........GacDsl..lshhpp.........h...............spFGhhhs..cNso...sshaslhTGtc.shpphGh..........lppasGpsph..sh...............Wps.........C.s.....plpGo.-GohFsP..h.lpp.pcslhh..Fts-l.CRslp...ltappsss.hpGIpsa+atss.spshsssptpspstCa................C.tstt...........................Ch.sGlhslosCh....G..uP....lhhShPHFa..tAD.phhpslp.Glp.Pst-cHp.halslcPhoGhPlp.sptRlQlNlhlppsps..h.shhps.hs.shlhPlhWhcpss.tlscphhshl.ptlhhh.plhthhthshlslGllhlhhhlhhhht .................................................................................................................................................h...............................h.p.th.lt...ss.............hp..W..p.............sh.hhh..p..............hahashpNspchht....s..tp..lpphGPYsap....chh.+.sl.p......t..tp...s......plsa.....p.h.F..p..........hS.s.......p-.lhh.Nh...hhsh..h..htp.......................hhp..................................................................................................................................................................................hhhs.h.hp..........ttt.h..hp.....hss...t-hha..............Gaps..h......hphh........................................................th.u.hh.......hNso.......ss.hphhsGhp..sh....t.ph..s...........................................l..pa.p.s.p..ph......sh..............................ats................C.s................l.pG.o..suph.a.P........h...hp.....pp...l.h..a...........-h..C.....R...th......h.atpp........hp.G.l.s...h+ahhs..p.hs.s.s..t............st.sa................................................................C.......................................C...tGhhsls.C...........s......tP..................lhhShPHFh....us..........h..........h..p..t.l..p......G..........h....p....P.s.................cpHp...hhh.lpP...............h.....sGhshp.sth+hQhNhhlp..tt....................h....h.t....p.....h............t................hh...hPhhah.p...................th....h.s.tthhp.h.......h..h.......h......h.h...h...hh.h....hh.uhhhhhh...h....h........................................................................................................................ 0 248 319 521 +1497 PF04549 CD47 CD47 transmembrane region Mifsud W, Bateman A anon Pfam-B_2739 (release 7.5) Family This family represents the transmembrane region of CD47 leukocyte antigen [1-2]. 25.00 25.00 26.70 27.80 21.20 23.00 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.25 0.71 -4.22 13 120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 61 0 25 104 0 151.00 44 53.06 CHANGED hlIllhhhlhllLhWupls..hlohKh..sphc+hIhlhhssulllTlIslVGtalh.lssthshhphpGlsLI.lshhh.IhLphhla.htltho.phhIsllllQlluYlLollslsLslhsC.pslaG.LLI.sLhlIslhELhuLlhhhh...hsssp+shh ............hLlhIFshlsllLhWhphu..hhol+h.Ssshspph.hhLhVhGhllolIhlsGAhLF.hP.u.aol+phhGLhhhslsohhLIllphhsFhh..hhtho.shlhhIlhhQllGYILsVlGLuLslptC...VcGsLLlSGLuhIsluELhuLlahhh...hsSsQ+sh............. 0 1 1 4 +1498 PF03234 CDC37_N Cdc37;CDC37; Cdc37 N terminal kinase binding Bateman A, Wood V, Mistry J anon Pfam-B_3345 (release 6.5) Domain Cdc37 is a molecular chaperone required for the activity of numerous eukaryotic protein kinases. This domain corresponds to the N terminal domain which binds predominantly to protein kinases [2] and is found N terminal to the Hsp (Heat shocked protein) 90-binding domain Pfam:PF08565. Expression of a construct consisting of only the N-terminal domain of Saccharomyces pombe Cdc37 results in cellular viability. This indicates that interactions with the cochaperone Hsp90 may not be essential for Cdc37 function [2]. 26.10 26.10 26.60 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.52 0.71 -3.95 21 361 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 238 0 221 333 1 125.20 28 34.41 CHANGED slDYSKWD+IElSDDSDlEVHPNlDppSFIRW+QpsIHE+RppR+p-hcsLchphphsscLhpRlc+hLspLppp.t..s...........l.....t................................p........ppMh.sLh..l.....p.t.t..........sshhpplppHppKlcshpc-hppKLcELpKccpppIsS-Dl..HsGFspS ..............lDYSKWDtlElSDDpD.-sHPN...lDptShhRh+ppthhcchtp..hppchcplchpht...pthhtchpph.lpphp.p....t.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 68 108 175 +1499 PF02724 CDC45 CDC45-like protein Mian N, Bateman A anon Pfam-B_1919 (release 5.5) Family CDC45 is an essential gene required for initiation of DNA replication in S. cerevisiae (Swiss:Q08032), forming a complex with MCM5/CDC46. Homologues of CDC45 have been identified in human [1], mouse and smut fungus (Swiss:Q99107) among others. 34.00 34.00 36.20 34.90 33.60 31.70 hmmbuild -o /dev/null HMM SEED 622 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -12.88 0.70 -6.49 33 405 2009-09-10 22:41:12 2003-04-07 12:59:11 9 4 282 0 267 409 17 488.50 28 92.86 CHANGED .VlllVuh.-lDALCAs+ILssLhKpDhl.apllPVtGYs-L.............cpthpch..spshpp...........llhlsCGuslDLtshLphs............................tshplaVhDuHRPasLsNlau....................................ssplhlhsDssh--phpt....cc...hapth........................cs-s-s-..s-s-s-sssp......pc.........t.ts.ppttcpc.hphcRp.p...................................................++pp++.hccpcphlpthYppushaupSsuhhlYsLA.plu+s..st.-hLWhAIlGlosh.l...................tthstppYsph.hthLpcEVpRLssps.....................tssspos............................sshpIsh.p.-h+LhLhRHWSLY-ShhaSsYluu.+LcLW.o-pGcK+L+cLLA.+MGlsLsps+QsataMDhplK+pLtphlc+.uspaGLpclsht...............uFs+saGa+.splSAuDsVhulsALL........Ess........................................................................................t.....tt.tppcpcphhssFapAhDALs................ppsh-lLppGlphAptLppAIhpsssoll.-p.c.lpshpsF+hsllp-.u...PDlclFspPhsLscLupaLlcuhspp...pcppttt..............hPLVlAu.LcpspsshlVl.Gl..........................................................................s.tpptp...................tNpFuhAFppsuppo.sA+lphDsF-sulIEl+p-DLssFL-sLohtss ............................................lhlh.ss..-sDulCAs+lL.p.tLhpp-.l...aplhPltuht-l.............pp.hh.ph...........p.hp..............hlhlssGu.lDl...p...h...L.t..t...................................tth.halhDs+RP.h.slsNlas......................................pttlhlh.c.tp.c.p.......cp...ha.t......................................................ttpptpp..pptt..t..t.tt...................t..............................................t.pc..................................................................................................................................................................................................ppp....hp.hpt...ct.l...Yp.hp.aups.u.hh..........ap.L..u..hs+p.......p..-...h...LWhullGlos..l............................thh.t.th.t...h..lppcVp...phs..p.....................................tp.hs.............................pp.p.lth..p.-..................phhL.hpHWoLa-Shh.osahss.phphW..s........p.................G...........tc+Lpphl....A.cMG.lsLtp..spQpat......Mshpl+cplhphh.c..h..ash..pphh.................sF.hpaGap.tph.AtDhshshhulh........Es...................................................................................................................ttttthhtpFhtAhDuLs.......................s.p.L.tul..hAp...pul..ppstshl..pp.p...lhp.t.....ahhs.l.....-..u............s-.ht.hFsp.P.hsL.hLu.alhcshh.p........tttt..................................hPllhus..h.......s..p.pshhhll.Gh..............................................................................................t........................hN.Fu.AFppsstps.ss...p.hhhs.F-.shlplptp-hs.FhptL.....h................................................................................................................................................................................................................................................................................................................................... 0 106 156 224 +1500 PF02933 CDC48_2 cdc48_2; Cell division protein 48 (CDC48), domain 2 Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_799 (release 5.2) Domain This domain has a double psi-beta barrel fold and includes VCP-like ATPase and N-ethylmaleimide sensitive fusion protein N-terminal domains. Both the VAT and NSF N-terminal functional domains consist of two structural domains of which this is at the C-terminus. The VAT-N domain found in AAA ATPases Pfam:PF00004 is a substrate 185-residue recognition domain [1]. 22.40 22.40 22.40 22.60 22.30 22.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.88 0.72 -4.33 103 1049 2012-10-01 20:15:13 2003-04-07 12:59:11 12 16 512 49 687 1072 104 68.70 25 9.07 CHANGED thsss.ht.hltpphts..pslstGphl..............hh...sh......h..................sptlphtVhsspPss.............sl.hlscsTplpl.t......pcssph ..............................................hsss.hshhlp..h.Fhp...+PlppG-hh..............................hl....ph.t...........................hpsl.pFp.....VspscPss..................hs.hls..sT.lphp.....tcs...t...................................... 0 197 383 563 +1501 PF02359 CDC48_N VAT-Nn; cdc48_N; Cell division protein 48 (CDC48), N-terminal domain Bashton M, Bateman A, Griffiths-Jones SR anon Pfam-B_799 (release 5.2) Domain This domain has a double psi-beta barrel fold and includes VCP-like ATPase and N-ethylmaleimide sensitive fusion protein N-terminal domains. Both the VAT and NSF N-terminal functional domains consist of two structural domains of which this is at the N-terminus. The VAT-N domain found in AAA ATPases Pfam:PF00004 is a substrate 185-residue recognition domain [1]. 25.10 25.10 25.10 25.10 24.90 25.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.64 0.72 -4.04 120 1252 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 680 48 754 1224 114 82.90 27 11.30 CHANGED pLpVscAh.pp....csspulsplsspshppLslptGDhlplpG.c+p.Ts.shVhsstsp....sp....tshIRhsshhRpNsslulGDpVpVcpsp.s ................LhVscs....p.......spul..ltlsssshppL.....t.l........h...pGD.s....l.hlpG..c+.......p..Ts.shVhsscsp.........st.....uplphst.....s.hRp.NhtV.p.lG..DhVsVp.ss..h............... 0 221 435 635 +1502 PF03381 CDC50 DUF284; LEM3 (ligand-effect modulator 3) family / CDC50 family Mifsud W, Kerrison ND anon Pfam-B_2846 (release 6.6) Family Members of this family have been predicted to contain transmembrane helices. The family member LEM3 (Swiss:P42838) is a ligand-effect modulator, mutation of which increases glucocorticoid receptor activity in response to dexamethasone and also confers increased activity on other intracellular receptors including the progesterone, oestrogen and mineralocorticoid receptors. LEM3 is thought to affect a downstream step in the glucocorticoid receptor pathway. Factors that modulate ligand responsiveness are likely to contribute to the context-specific actions of the glucocorticoid receptor in mammalian cells [1]. The products of genes YNR048w (Swiss:P53740), YNL323w (Swiss:P42838) and YCR094w (Swiss:P25656) (CDC50) show redundancy of function and are involved in regulation of transcription via CDC39 [2]. CDC39 (also known as NOT1) is normally a negative regulator of transcription either by affecting the general RNA polymerase II machinery or by altering chromatin structure [4]. One function of CDC39 is to block activation of the mating response pathway in the absence of pheromone, and mutation causes arrest in G1 by activation of the pathway [3]. It may be that the cold-sensitive arrest in G1 noticed in CDC50 mutants [2] may be due to inactivation of CDC39. The effects of LEM3 on glucocorticoid receptor activity may also be due to effects on transcription via CDC39. 26.70 26.70 30.50 28.00 18.90 26.60 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.00 0.70 -5.23 76 747 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 312 0 511 720 9 276.90 32 77.38 CHANGED l.....hhsupplp-lpl.cYsssstsss..................................................................sCplsFsls....pphcu.PlahYYpLsNFYQNHRRYVpShsppQLpGct.hs.sp.......tCcPhph........................pssc.hh...hPCGLIAsShFNDTash.....................sss..sssh....................shs.....ccG..IuWpoD..+pp+a+p..sph.p.t...........s.P.sW.t................................shp.ss.hPssp........pE-FhVWMRoAALPsFpKLat+h.......sssL.sGp.YplpIp.NY.....PVppFs.Gp..KpllloTsShhGG+N.FLGlsYlllGulshllulhh.llhahh.ps+.....phu-ts ...........................................................................h.hhsuppl.El.h.cYspsp..ss..........................p..............................................hCplpapl.s....p.ph.p....s.slahYYpLsN..FYQNHRR.Y..VpS.hs.spQLpGps..hshps..............psCpPhphs..................................................tssc...slhPCGhIAsShFN..D..Tash..................sssspth............................................shs.......ppG.IuW.o.D.+.t+Fpp.....sth.t.t...................................ths..Ps..sW..p.......................................................................................shp..ss..P.s.p............sE-ahVWMR...TAALPsF+KLYt+lt...................pssL.sGp.Ypls.I.p....s.a.............sVtpFs.Gp..KpllloTt.o.hh.GG..+N..FL...GluYlllGulshlhulhh.hlhhhh.h.p......p............................................... 0 183 288 415 +1503 PF02611 CDH CDP-diacylglycerol pyrophosphatase Bashton M, Bateman A anon COG2134 Family This is a family of CDP-diacylglycerol pyrophosphatases, EC:3.6.1.26. This enzyme catalyses the reaction CDP-diacylglycerol + H2O <=> CMP + phosphatidate. 25.00 25.00 26.70 26.60 24.00 23.90 hmmbuild --amino -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.54 0.70 -5.10 24 718 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 658 2 75 359 4 208.40 62 86.80 CHANGED ssDALW+IVpppClst.ppssssuPCspV..c.....ptGhVlhKDtsGshQYLLhPTt+loGlEuPtLhpsssPNaahtAWpARsahupchGpslP-ssluLAINSphGRoQsQLHIHIuClpPslpptLspts....sshsspWp.LPh.L.....pGHpYhA+pVsss-hspt..sPFphLsccl..tupscMuchululsshssss....FlLLsophchhshs.uSAEElQDHsC.plhc ..............................................................................sDsLRKIVhEpCLPs.Qpps.QNPuPCAEV.....KP....sAGYVVhK...D.hpG.P.LQYLLMPThRIsGhESPLLh-PuTPNFFaLAWQARsaMScKYG..pP......lPDpAVSLAINSRp..GRTQNHhHIHISCIRPDVRcQLDssL.....ssIoo..RWhPLPGGL...........tG..H-YL..ARRVTESELsQR..SPFhMLAEEVP.-AR-+MGpYGLAhVRQSDsS.....FVLLATpRNLL.T..LN........RASAEEIQDHpC.pIL.p.................... 0 9 33 55 +1504 PF03598 CdhC CO dehydrogenase/acetyl-CoA synthase complex beta subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.60 20.60 20.60 22.80 18.10 16.30 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.37 0.70 -6.01 22 260 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 146 27 85 256 20 281.80 45 67.47 CHANGED tl-IPVshuPsaEGERIR+sDMaVEhGGs+.o...uhELVpshph-E.lEDGKlpVlGPDIc-h-.EGs.phPluIll-luG+chpcDaEsVLERRlHcahNYspGlMHlsQRDhsWlRlSK-AhsKGhp.LcHlGclLhshh+pEF.sll-+lpVTlhTDtpcV......pchh.ppA+thYcpRDcRh+sLoDEsVDsFYuCsLCQSFAPoHVCllTP-RsuLCGAlsWhDu+AuhclsPsGPp.PItKGcslDsptGpapuVN-hltcpSpGsl-+lsLYSlhppPhTSCGCFEsIshhlPEssGlhlVsR-asGhTPsGhsFSTLAG.sGGGtQsPGFhGluhpYhpSpKFlpADGGhpRlVWMPKpLK-plp-pls.........c-hhDKIAsEpsuoo.EElhtFLccpsHPslsh.......-sh. ...........................h.h.hush.tGE.lRt.ph.hEhsG.p.....shEhl.h.....tp....htDtplplhG.-lt..p.h.......u.........s........hu..hhhpl..G.cth.p.D...hEs....lh..ERphH.hhshh...pGhh.+..h..sQ..R.....hhRlsKtshttGht.hpphuphl..hhhtch.shl-phplhhhTp..tth.......p.h...s...at.RDt+htt..h.--ts-.aYsC.hCQsFuPsplChlsP-R.uhCGuhsaLDAKAstclcPsGPsQslsKtpslDEphGtapsVNEhVpchSpGslEcVsLYSlhcpP.TsCGChEslhhh.P..sGhhhspRta..s.sP.Gh.FsthAu.huGG.p..GahGhsh..h.p.+hh.u-.GG..RhlWhst.hKp.l...h............phhthlssEp.s.t.ttl..al........................................................... 0 37 65 75 +1505 PF03599 CdhD CO dehydrogenase/acetyl-CoA synthase delta subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.24 0.70 -5.97 9 289 2012-10-03 05:58:16 2003-04-07 12:59:11 11 5 140 16 156 518 200 303.70 26 74.61 CHANGED EVslGs.......pcpsVhlGG-csLaR....a-hsh.N.sslshDV.D.h.st..Ksltcchpclhpp...at+hpVtc.hshDhlsI+thSsDP.......cchscslEcVhpulshPlllsu......DP-VLctAhEVscsc+sLLhuAsh-.sacchschAhcYspsVls.au.tDlsthKsLs+plhp.sGl..ccIVhDPsT.ts.G.GlchohsshhpIRhsAlcG.Dc-lsaPI.uhsssAahsc............u.hVs.............................................pP.ts-ssh...Gss-.cuPla.hTs.............................GlshulAGspl.......................hhhl.PshsAsL+thhEshTGhpshVGstDsutlhshl ......................................................ltlGs.......tppshplGG-s..s..L..ap...a..-tph....s.shluhcl.Dh.hs.p...t......pshhpth...pc....l..p..........plt..p..hts..-hl.sl.+..h.....h...u..s..c.s....................cchspslc.p.V.h.p.s.s.c..hPlllhus.....Dsplhctsh....csst...sc..p..sLl..h..u....Astp.....N....a..ct..hst.h.A.ht.as...t..s.....lhs.............u.................h......-..........l.......s....h..........h....c..........p..........L..........s..h..........h...........l...........t......p....hGl.............cclVhDs...ss.ts.......u......s....l..c.sh....s.sh..p.lRhu.A.lpt.Dc.lshP.h.h.s.h.s.h.p.sh....s.p..................................................................................................................................................................................................................................h................................................................................................... 0 73 128 142 +1506 PF02234 CDI Cyclin-dependent kinase inhibitor Bateman A, Mian N anon Pfam-B_1698 (release 5.2) & Pfam-B_5787 (Release 8.0) Family Cell cycle progression is negatively controlled by cyclin-dependent kinases inhibitors (CDIs). CDIs are involved in cell cycle arrest at the G1 phase. 20.70 20.70 21.20 22.00 20.00 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.62 0.72 -4.17 43 388 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 133 2 195 364 1 50.30 34 25.13 CHANGED Rshhh..Ps.cpElcc.aptt.cc.pcchpc...KaNFDFhs-pP.L..s..GR...Y-W.c..ls ......................thFs...Ps.p..pElp.c.hptthp..p....h..pcptpc...KWNFDFhs-pP.L....p..G..+....acWpcl.............. 0 39 84 138 +1507 PF03261 CDK5_activator Cyclin-dependent kinase 5 activator protein Mifsud W anon Pfam-B_4160 (release 6.5) Family \N 25.00 25.00 42.40 27.30 21.40 21.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.29 0.70 -5.11 5 198 2012-10-03 00:42:12 2003-04-07 12:59:11 10 3 77 10 122 170 0 210.00 37 88.33 CHANGED MGTVLS........LSPuS.....RpusLa--.tcs...GussLusYTusssuKuuKu..EKsL.......KRHShlIsALoWKRLVAutSsKKKsSKKuosN...............sSSuYpspltpLN+ENlcKS...........hPhs..h.LsssNhuoacp............................sPupssAPs....supLuuKss.s...............lus.csAPs.t.susGtoP+RVIVQASTSELLRCLG-FLCRRCYRLKcLSPuDsVhWLRSVDRSLLLQGWQDQAFITPANVVFVYLLCRDVlsG.E...luo-cELQAslLTCLYLSYSYMGNEISYPLKPFLVEssKEsFWDRCLslIsphSsKML+INADPHFFTQVFoDLKNEGsp- ....................................................................................................................................................................................................................................................................................................h.......K+.....h.s.hsh+.hs.....tt...t..t............................................................................................................................................................................................................................................................t+hll.QASTsELL+sLG.Flpp..............+C.........tl.pp.....hpssc.lhWhRsVDRuLLLQGWQD.uFls....PANlVFlYhLsR-.ltt.p....................tp.t-Lpu.hLTCLYluYSYMGNEISYPLKPFLl..-ss+-tFWpRCl.llp.hSspMLplNspstaFTplFt-LKt.....t.................................... 0 27 46 88 +1508 PF05174 CDRN Cysteine-rich D. radiodurans N terminus Yeats C anon Yeats C Domain This domain is found individually and at the N terminus of a few multi-domain proteins. 25.00 25.00 102.00 101.10 19.60 19.40 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.58 0.72 -4.50 5 6 2009-09-10 20:04:58 2003-04-07 12:59:11 7 5 1 0 6 7 0 55.30 73 18.45 CHANGED phastsLEQFSELRVRRNSTATRSIL.......RPAhCFALAPLA......KKLCHLFVKCSRLs ....astsLEQFSELRVRRNSTATRSIL.......RPAhCFsLAPLA......KKLCHLFVKCS+Ls. 0 6 6 6 +1509 PF03498 CDtoxinA Cytolethal distending toxin A/C family Griffiths-Jones SR, Bateman A anon PRINTS Family \N 21.60 21.60 21.70 21.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.12 0.71 -4.83 10 430 2012-10-02 19:42:32 2003-04-07 12:59:11 9 1 170 6 23 258 3 145.30 32 69.60 CHANGED hlSlhu.toGulLosathss+sal...Wshs.lcos-auch+s.phhsh.shGhlpF+Nsssus.Clss...htsGhhtst..Csss....shpplFsLlPosoGAVQIKS..lusGpClpsshssph....hhhslplscC..shup...psshsphWhIsPP...spsspP .........................................loIhu..oGhsLosathsstshl...Wshp.lsup.shtc.tRs.plh....s.saVphpNs.+sss.CLss......hts.Gh....hth...........Cpps......shtphF.....plhP.h.os..GAl....QI+s.....lsss..pClps...hsssh.....h.hpht.lscC.....s.th...ptshsp.ahlosP....htA..................... 1 3 15 19 +1511 PF00272 Cecropin cecropin; Cecropin family Finn RD, Bateman A anon Prosite Family \N 29.40 29.40 30.00 29.70 28.90 29.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -7.00 0.72 -4.39 28 182 2009-09-11 13:43:56 2003-04-07 12:59:11 14 1 57 10 56 210 0 30.50 49 49.92 CHANGED chhKKlE+sGpplRDAlIp....AuPAlsVlupA .KlhKKIE+lGp+lRDAhIp....Ap.AlsVlusA... 0 15 26 47 +1512 PF02927 CelD_N celD_N; N-terminal ig-like domain of cellulase Griffiths-Jones SR anon Structural domain Domain \N 21.30 21.30 21.30 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.04 0.72 -3.73 42 561 2009-01-15 18:05:59 2003-04-07 12:59:11 9 36 362 16 169 519 19 88.60 25 12.13 CHANGED sstsstsss...tl+VNQlGYhPsusKhAslsssuss.....s.taplhsusGss.VhsGpspst.uspss.....................SGppV+hlDFSshp.ssGs.YpLpl..sGt..pSt .................hs...........lhlNQlGYhspu.s.KhAslhss.psp......................s.pap..l......h...s...s.sspp...Vh.p.Gphp.t...ss.st............................st.pp..ht.h.lDFSshp.ssGp.Yhlph..ssh.....p..................... 0 100 148 161 +1513 PF03500 Cellsynth_D Cellulose synthase subunit D Griffiths-Jones SR anon PRINTS Family \N 21.30 21.30 21.70 21.80 21.00 20.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.94 0.71 -4.51 2 86 2009-09-10 23:58:10 2003-04-07 12:59:11 8 1 83 16 36 84 2 124.90 30 81.81 CHANGED FoLFLQsLSWEIDDQsGIEVRN-LLREVGRGMusRl.PP.CpTlcpLQIELNALLuhIsWGhVpLELLuE-QuhRIVHEsLPQVGSAGEPSGTWLAPVLEGLYGRWlTSQsGAFGDYVVTRDVDAEDLNuVPpQTIIhYMRsRS ..................hth.....................pshL+phGpplAppaPLPsucTlu-LEpslNtlLschsWGaVplc..sp-suLpltHpAhPh...s..stucst.t..pW...h...s...A.lLEGlYupWLpuQuG.u...............................thsh.t.t................ 1 1 9 23 +1514 PF03552 Cellulose_synt Cellulose synthase Bateman A anon Pfam-B_1346 (release 7.0) Family Cellulose, an aggregate of unbranched polymers of beta-1,4-linked glucose residues, is the major component of wood and thus paper, and is synthesised by plants, most algae, some bacteria and fungi, and even some animals. The genes that synthesise cellulose in higher plants differ greatly from the well-characterised genes found in Acetobacter and Agrobacterium sp. More correctly designated as 'cellulose synthase catalytic subunits', plant cellulose synthase (CesA) proteins are integral membrane proteins, approximately 1,000 amino acids in length. There are a number of highly conserved residues, including several motifs shown to be necessary for processive glycosyltransferase activity [1]. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 722 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.18 0.70 -6.24 15 1407 2012-10-03 05:28:31 2003-04-07 12:59:11 9 33 149 0 613 2401 43 403.60 33 73.03 CHANGED VDlFVSTVDPLKEPPLVTANTVLSILAVDYPV-KVSCYVSDDGuAMLTFEuLuETA-FA++WVPFCKKasIEPRAPEaYFutKIDYLKDKVpssFVKERRAMKREYEEFKVRINALVAKApK.....................................lP-EGWsMQDGTsWPG........NNsRDHPGMIQVaLG.sGucDl-GNE...............LPRLVYVSREKRPGYsHHKKAGAMNALVRVSAVLTNuPFILNLDCDHYlNNSKAlREuMCFMMDPslG++lCYVQFPQRFDGIDhsDRYANRNTVFFDINM+GLDGIQGPVYVGTGClF+RpALYGY-PPpspch.c.s.........ssCChGp+Kpspstppsp........................................p..pcp-pptshashp-l--s....h..s-.E+t.lhoQpslEK+FGpSslFlsSThhpp.....................GGlPc..sssPAsLlKEAIHVISCGYEDKTEWGKEIGWIYGSVTEDILTGFKMHsRGWRSlYChPKRsAFKGSAPINLSDRLHQVLRWALGSVEIFFSRHCPlWYGat.tRLKaLpRlAYlNohlYPFTSIPLlsYChLPAlCLlTGKFIlPoLoNhAulaFLsLFlSIhsTulLElRWSGVoIE-WWRNEQFWVIGGsSAHLFAVFQGLLKVLAGlDTNFTVTSKuu.s...tD--FuELYlFKWTTLLIPPTTllIlNlVGlVAGlScAINsGYtuWGPLFGKlFFuFWVIlHLYPFLKGLMGRQNRTPTIVlVWSlLLASIFSLLWVRIsPFlscssusshp.p .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hp.......t.......t.......G.......W.Shhh...........................................h....G.s.s.sh.t.....h.Q.hRW..G.h...............ph.hh.s.p.........s.....sh.....hh............s.......................t.h................th...a.......................s..h.........hhY.h.h.s......h...h........h.......t...........h................................................................................................................................................................................h....h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 59 399 517 +1515 PF03040 CemA CemA family Bateman A anon Pfam-B_1775 (release 6.4) Family Members of this family are probable integral membrane proteins. Their molecular function is unknown. CemA proteins are found in the inner envelope membrane of chloroplasts but not in the thylakoid membrane [1]. A cyanobacterial member of this family has been implicated in CO2 transport, but is probably not a CO2 transporter itself [1]. They are predicted to be haem-binding however this has not been proven experimentally [2]. 20.80 20.80 21.00 21.00 20.50 20.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -4.75 53 734 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 614 0 83 652 29 213.40 50 80.88 CHANGED hu+h.pshsSlpYLhhL...lhlPhhlshhh+phhlpPhlspaW..NspQsplFLNs.QEcpALc+hpchEEhlhhDphl.p.....tsphp.pclphpI+ccslpLschasp-ulpsIhplhoslluhshhsshhlhG+cc.LtlLpSalpEhlYuLSDThKAFhIlLhTDlhlGFHSPHGWEllIsulhcHaGhscNcp..hIshFVuTFPVILDTlFKYWIFRYLNRlSPShVsoYHsMNE ..................................................t+hKshsslhYLss..l...VhLPWhlshs..hpKs.LEsWl.s.N...WW.....NTtp...Sc...hh...lstlQEc...s...hLc+F.hcl..EELhlL-cMl.c................-....h.s..pTH...........Q...c..l..c..ItI..HKETIQLlchaNE-p.lchIhHh.TNlIsFshlSuah..............IL...Gpcc.LsILNSWlQEFhYsLSDThKAF.ILLlTDLhIGFHSsHGWELhIsulhcc.aG..hs..cN-p..IISsLVSTFPVILDTIhKYWIFRaLNRlSPSLVVIYHSMN-..................................................................... 0 16 53 72 +1516 PF03879 Cgr1 Cgr1 family Wood V, Bateman A anon Wood V Family Members of this family are coiled-coil proteins that are involved in pre-rRNA processing [1]. 22.50 22.50 22.60 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.22 0.72 -10.74 0.72 -3.88 18 165 2009-09-10 18:08:07 2003-04-07 12:59:11 9 5 161 0 121 151 0 105.60 41 66.03 CHANGED pst..scGhRlNGKsW+spKcsFRssu.......phTSaEpRtpcRhpppthKt+EKELK-EKEspRp....p+IptlK-RRttKEEKERYE+MAtKMHtK+VERh+RREKRNKhL+p ...................s..scGhphsGKsW+s.s.Kp..s.FRsps.....................t.hTSaccRhpcRtptpthKp+pKElK-EKEs-R.p.....p+Ipt.l+-RRttK.EE.+....E.R.Y.E+hAt.K......M.H.t.K+...VERhKR+EKRNKhLpp.......................... 0 35 66 103 +1517 PF04752 ChaC ChaC-like protein Mifsud W anon Pfam-B_3722 (release 7.5) Family The ChaC protein is thought to be associated with the putative ChaA Ca2+/H+ cation transport protein in Escherichia coli. Its function is not known. This family also includes homologues regions from several other bacterial and eukaryotic proteins. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.26 0.71 -4.32 15 1371 2012-10-02 16:39:48 2003-04-07 12:59:11 7 12 1051 0 511 1064 243 169.70 37 77.62 CHANGED lWVFGYGSLlW+PuFsas-phhual+Ga+RpFh.ssscHRGTscpPGRVsTLhcs.t.........................utshGsAY+lsutph.ttslpaLp.REh..sG.ptppl.hh...................psssspssh.psllaluos...pNptYhGss.sl-chAp..............pIssAsGsSGsNt-YLFpLtctLcpls............................hcDcaLhcLtptVcct .................................................lWlFGYGSLh........W...p.....Psh.....t.......as..E......ph........s.up.ltGa+RsFh.h.............t....sp....s.tRG..Ts..cpP..GRsluLcc..u...............................................................GpshGl..AYRlst..p..p.h..cp..t.lphLhcREh...hs.....t.phlshah.............................................................p..ss.st..h......pA......lsalhs................cpspYt...ust...sh...pslAt..............hIusAsGs.GsNt-YLhpLtptLpphG..............................hpDctLppLhttVt..h.................................................................................................................................................. 0 128 251 387 +1518 PF00195 Chal_sti_synt_N Chal_stil_synt; Chalcone and stilbene synthases, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The C-terminal domain of Chalcone synthase is reported to be structurally similar to domains in thiolase and beta-ketoacyl synthase.\ The differences in activity are accounted for by differences in this N-terminal domain. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.48 0.70 -5.11 21 3069 2012-10-02 12:25:54 2003-04-07 12:59:11 14 18 1289 101 485 4736 327 174.40 49 54.42 CHANGED sslEph++AQRA-GsATlLAIGTAsPsNsVsQusYPDYYFRlTpSEHhs-LK-KFcRhC-+StIKKRYhaLTEElL+cNPslCsahuP.SLcsRQ-IslsEVP+LGKEAAtKAIKEWGQPKS+ITHLVFCTTSGVDMPGADYQLo+LLGLpPSVKRlMhYQQGCaAGGTsLRLAKDLAENN+GARVLVVCSEhTslsFRGPS-s+...hDsLVGQALFGDGAAAlIlGoDP ................................................................................................................................................................................................................t.....h....h..........p........h.h...t..t..............s...lppRh..hh.....h..................................................t.......................h...............................................s...s....h.....p.Rp.c...h...h..h.....c...s..s..c.L.up.-.AAhc.Al...c.....E.....W......G.....t.....s..t.....o....cI.TH.l.l.h......s.T..o..o..G...l.s.......h....P..G..s..D.h..p.Ls.p.hLG..L.+..s.oVc..Rlhh..a..p....G..CaAG.us..........s..L.R.h.A.K.D...L....A....E....N....N....t....G....A..R.V.LV..VC...SE....l.T....u..l..o.....F...R......u.........P....s...-...s...+..................L.-u.L....V.......G..pA.....L...FGDG.A.A.A.l.I.V.GuDP............................................................. 0 109 314 416 +1519 PF02797 Chal_sti_synt_C Chal_stil_syntC; Chalcone and stilbene synthases, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain of chalcone synthase is reported to be structurally similar to domains in thiolase and beta-ketoacyl synthase. The differences in activity are accounted for by differences in the N-terminal domain. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.90 0.71 -4.29 21 3003 2012-10-02 12:25:54 2003-04-07 12:59:11 10 16 1211 100 498 3983 257 132.70 50 39.80 CHANGED ELVSAuQTlLP-ScGuIsG+LpEhGLTFHLh+DVPtlIScNIEcsLhcAFsPlGIs......DWNSlFWlsHPGGPAILDQVEtKLsLcPEKLcAoR+VLSEYGNMSSACVlFlLDEMRKpShccGtsTTGEGL-WGVLFGFGPGLTlETlVL+SVs .........................................................................phhtuuQTllPDS-...uA.IsG+.L.R..E.s..G.....L..T.F.H.L..h.+.-.VPsLISc...NI.c...c..s..L.......sc.A..F....p......s...l......G..I.o.................D....W...N...s..l....F..W..I.u.HPGGPAILD...pV..Et+L.......sL..c..c.....K.lcuoRcV...L.uEYGNMS.....SAs..V.LF.l.L.D.-.h..R.c.p..........s..........h.....p....p.......t.....................t.......p..........................h.....c....hG............l...hhu.aGPGhshEhh...................................... 0 132 331 436 +1520 PF02431 Chalcone Chalcone-flavanone isomerase Mian N, Bateman A anon Pfam-B_2073 (release 5.4) Domain Chalcone-flavanone isomerase is a plant enzyme responsible for the isomerisation of chalcone to naringenin, a key step in the biosynthesis of flavonoids. 23.00 23.00 23.10 23.10 22.80 22.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.18 0.71 -4.92 60 585 2009-09-11 05:35:35 2003-04-07 12:59:11 10 8 320 25 257 589 20 182.60 24 69.08 CHANGED FPss.l.p.ssss.........................pshhLlGt...................GlRslshh...tlKhhulGlYlpss.........slspltsphpucssp....................................-hpcu.phacsllsu.sh..cphhRlshl+s.lshpphp-tlscslhuphpths....................hs......-ppspAlccFpph...F..psc.shPtGsslhhph..sssG.s......Ls.luasppsp...............tpthuslcsch........lucslhppYl.GcpslSPss+cSluppls.sl ............................................................................................pth.LhGh........................................G..hRshph.....lphhulG.............lYlptp...............sl.t..h..tt..hhtp.st....................................-.tps.thapslls....s....s...h...cphh+lshl.hs.lshpphp-thtcshhsphpphsh............................p...-sttpul...cc..Ft.ph.....F....psc.shs.Gsslhhph.sssG..s..............Lp..lshs.t..................tp.thuslpsph...........lscslhp.al.Gcp.ssS.ts+pslspth.t......................................................................................................................... 0 78 167 221 +1521 PF03502 Channel_Tsx Nucleoside-specific channel-forming protein, Tsx Griffiths-Jones SR anon PRINTS Family \N 20.50 20.50 21.40 20.80 20.10 19.30 hmmbuild --amino -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.70 0.70 -4.57 44 1502 2012-10-03 17:14:36 2003-04-07 12:59:11 8 2 831 6 162 633 25 237.30 33 88.51 CHANGED tsshpWhphshhhu.t.phs.....pps-....csYLEhEaGG+suhhDLYGYlDl.slt..sspsscp...ttt.sphFhchtPRhSlDthoGcsLuaGPhpElYh.......us.hsast.............................s.ss....hhhGlGsDl.sshhsphshNlaupYshpsh..............pasGaphp.ssWhtPahhhtssshloapuah-apFutcpthts.........psssGhshhhulaW+.o-ca.......ssGhsh+hacshh.htst...............huhcooGhuaYhslsYcF ..........................................................................................................................h.p.h.apShslhss.p.chu.......phss......psY.LEhEsuuphsWh-hYGahDh.shhs...tpspst..........s.oshahchcPRhSIctlsssp.hsFGPap-hYh.......As.s...h.as..............................cpss....ahhGlGhDl.oGhshhhphNlYt+Yp.psY...............pasG...aphp..ltahsshs.hhhsupphohpuas-a-asp...Dtu.sssp...........tspsul.s.uuhh..Lths..hs+a.......phulshRYacsththt..s.............hsh.cs.hG.uhhhslsYpF.............................. 1 24 50 109 +1522 PF03924 CHASE CHASE domain Yeats C anon [1] Domain This domain is found in the extracellular portion of receptor-like proteins - such as serine/threonine kinases and adenylyl cyclases [1,2]. Predicted to be a ligand binding domain [1]. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.08 0.71 -5.08 191 1214 2009-01-15 18:05:59 2003-04-07 12:59:11 8 223 668 14 443 1200 120 181.20 18 21.82 CHANGED sstsl.spscacsasps...lhpphsuhpuhs..at.plstpc..h..statpphppp......thst..apl...........t..................sps.hhslhalpPhs.tNppAlGhDhtopst++tAhpc....Atpoups..slouslp..LlQss......ptGhl....lhhPVa.tt.........................t.....pphhGhlhushchsslhpshhtpp.........tplslplhD.................spssssplh.ass ........................................................................................tphtth...htt...h.tt...hssh..u.ht....h......p..lt...tp....h..tthtt...phttp...........h.......hpl........................................tts.hhslta..l...........Phs....tNppsl..GhDh..ts......p..sp..p..p..ps.ltc....Atpssps..slouP.......lp...L......lQss............t.G.hl..lhhPVa...p.t.....................................tp.................pphhG..........h.lsushchsslhpshhh.tt............plslplhD..................ts.tt...hh.............................................................................. 0 131 283 371 +1523 PF03173 CHB_HEX Putative carbohydrate binding domain Bateman A anon Pfam-B_8666 (release 6.5) Domain This domain represents the N terminal domain in chitobiases and beta-hexosaminidases EC:3.2.1.52. It is composed of a beta sandwich structure that is similar in structure to the cellulose binding domain of cellulase from Cellulomonas fimi [1]. This suggests that this may be a carbohydrate binding domain. 25.00 25.00 26.20 25.30 20.30 19.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.97 0.71 -4.79 6 353 2012-10-01 21:34:18 2003-04-07 12:59:11 8 7 306 4 75 309 10 155.80 34 18.70 CHANGED sNLclphpllcshsuppshsCssht......s.huuCh+lphohsspspsscuc.DasIYFpSl+hshps-u-pFtIsHlsGDLHKLpPTstFuGhsuGcohslplsuphWQlhcoDhhPphal...ousstcPcsltsTs.......TE.........-lstFlsshs...ssQhKpTscDpsshtsuss .........................................................LtlphplhsNpuuptGhsCttht.........A-hAuCsps........phsLs........NpG.pss......uc.DWsIYFp.S.I.Rh.l..........L........p.l.s.s.s.......p..............F...........p.ls+loGDLa+lpPTcpFsGhusG-slplPhluEYWplhpoDhhPphal........sussAcP.clltshs.......T-..........DhstaVpshp.......sp...ph..+R..o.....s...Dps.hhps..t..................................... 0 18 32 55 +1524 PF03174 CHB_HEX_C Chitobiase/beta-hexosaminidase C-terminal domain Bateman A anon Pfam-B_8666 (release 6.5) Domain This short domain represents the C terminal domain in chitobiases and beta-hexosaminidases EC:3.2.1.52. It is composed of a beta sandwich structure [1]. The function of this domain is unknown. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.92 0.72 -4.14 264 1239 2012-10-03 16:25:20 2003-04-07 12:59:11 8 244 639 4 431 1707 370 74.60 23 9.83 CHANGED ssshhssssG.shts...s.pls.....lss.tpsss.....sIaYThD.GopP........ots..S.phY..................s.s..P.lsl..ss.s...........l+shuh..sss.tpsSpltotp ..............................h....h.s.sG...h.t.....stpls.......l..ss..ttsss......pIhY.Th..D.G.o.p.P..............................s.t........S..th..Y.................................s..s..P..l.slspss.s............................l+shuh.....sss.tp.Sphhp..t............................................................................................... 0 199 339 396 +1525 PF01339 CheB_methylest CheB methylesterase Finn RD, Bateman A anon Sarah Teichmann Domain \N 20.40 20.40 20.60 20.60 19.90 18.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.97 0.71 -4.85 180 3655 2009-01-15 18:05:59 2003-04-07 12:59:11 12 120 1959 4 1223 3093 363 180.30 37 42.15 CHANGED llsIG.uSsG.GspALppllsp...LPs.shs..h.sllllQHhsss.assthschLsptsslsVpp.Ap-Gp.hl..ps...GplYlAPsst.clt...........l..........p...ssp..........htlpht.....sthstt....+..Pol.DshFpSlApthusps.lullLoGhGsDGupGlttl+ptGGh.sl..uQct.pouhhhGMPcuAlp.sG.ssDtllshpcluptl.hph ..........lluIGuSTG.GspA..LpplLps.LPs..shP....sl.l.l.....s.Q.H.MPss..FT.p..uhA...c+L.s.ph..s.p.l.s.V+E.A.p-.G-..hl...ts...GpsY..lAP.G.sp.Hhtl.........tp....sup...................................lplptt.......ssssta....+..PSVDlLFcSsA.c..t..h.G.p.ps.lGVlLTGMGsDGAtGhhtl+.p.s.G.u.h..s.l..AQ...cE..so..s..l..VaGMP+t.Alp.h..G.ssspllsLsplupplh..h................................................................................ 0 399 770 1013 +1526 PF04509 CheC CheC-like family Waterfield DI, Finn RD anon COG1406 Domain The restoration of pre-stimulus levels of the chemotactic response regulator, CheY-P, is important for allowing bacteria to respond to new environmental stimuli. The members of this family, CheC, CheX, CheA and FliY are CheY-P phosphatase [1,2]. CheC appears to be primarily involved in restoring normal CheY-P levels, whereas FliY seems to act on CheY-P constitutively. CheD enhances the activity of CheC 5-fold, which is normally relatively low [1,2]. In some cases, the region represented by this entry is present as multiple copies. 20.20 20.20 20.20 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.22 0.72 -4.22 33 1993 2012-10-01 19:50:22 2003-04-07 12:59:11 7 14 662 13 597 2015 46 37.70 36 24.19 CHANGED httuAlpEluNIhsGsssouLuphht.tpl-hosPslth ......phsulpEluNIhhGsAuTuLSphls.pplslosPpl..h.............. 0 261 447 522 +1527 PF03975 CheD CheD chemotactic sensory transduction Bateman A anon COG1871 Family This chemotaxis protein stimulates methylation of MCP proteins [1]. The chemotaxis machinery of Bacillus subtilis is similar to that of the well characterised system of Escherichia coli. However, B. subtilis contains several chemotaxis genes not found in the E. coli genome, such as CheC and CheD, indicating that the B. subtilis chemotactic system is more complex. CheD plays an important role in chemotactic sensory transduction for many organisms. CheD deamidates other B. subtilis chemoreceptors including McpB and McpC. Deamidation by CheD is required for B. subtilis chemoreceptors to effectively transduce signals to the CheA kinase [2]. The structure of a complex between the signal-terminating phosphatase, CheC, and the receptor-modifying deamidase, CheD, reveals how CheC mimics receptor substrates to inhibit CheD and how CheD stimulates CheC phosphatase activity. CheD resembles other cysteine deamidases from bacterial pathogens that inactivate host Rho-GTPases. Phospho-CheY, the intracellular signal and CheC target, stabilises the CheC-CheD complex and reduces availability of CheD [3]. A model is proposed whereby CheC acts as a CheY-P-induced regulator of CheD; CheY-P would cause CheC to sequester CheD from the chemoreceptors, inducing adaptation of the chemotaxis system [4]. 25.00 25.00 34.30 27.90 21.80 21.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.33 0.71 -4.31 150 1074 2009-11-17 14:42:28 2003-04-07 12:59:11 8 8 935 2 412 920 66 114.90 30 61.85 CHANGED MLPsssttptt.........ss.s+YushAh-hLlscll+hG.Ap+ppLpAKlhGGAphhs........shhsIGpRNschs+chLpppsIsllucDlGGstuRplhF.s.poGclhl+pl..........tpt.p.....h ................MLPpsstspst..........sss+YushAhchLlschl.c.h.G..A.p.....+..p..cLpAKlhGGAphhs..................shhslGpRNschscchLpppsIsllAcDlGG.s.tuRplhF.s.poGclhl+pl........t......t........................... 0 151 288 351 +1528 PF01739 CheR CheR methyltransferase, SAM binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_694 (release 4.2) Domain CheR proteins are part of the chemotaxis signaling mechanism in bacteria. CheR methylates the chemotaxis receptor at specific glutamate residues. CheR is an S-adenosylmethionine- dependent methyltransferase - the C-terminal domain (this one) binds SAM. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.07 0.71 -5.02 22 3912 2012-10-10 17:06:42 2003-04-07 12:59:11 13 179 2109 2 1263 3557 543 191.20 32 48.37 CHANGED NhTpFFR-stpFchLpppll.s.l.phptt...pp.l+lWSAuCSoGcEPYSlAhsltc...hh.sshs..thpIhATDIshpsLppA+tGhYspptl.psls.thhp+YF.ctsstsap....lpscl+phVpFchhNLhs.....shhsphDlIFCRNVlIYFcppsppcllp+htstLpssGhLhlGpSEsls.thsshFphhpsshthh .......................................................NhTtFFR-sppFphLpcpll...h..t.ptt..................tp..lRlWSAuCSoGEEPYS.lAhhLt-............hh...p.h....h...s.......phpIhATDIss..ps..L..pc..A..+....pGlY...s.......tpp.......l........c........s......l.s.................p...h........h........p.........+........a...F..........c.......t.....p........s......s...p...ap.................lppp.l.+p..hV..p.F..pphNLlp..............h....s.......h......h....s.........p....a.....D...lI.h.C....R.N.V.h...I..YF.c....p.p...s....p.pc...l.lppFt.t.t.Lp.ss.Gh..La..l...Gp...oEsls...th..sphap.h.......h................................... 0 424 817 1056 +1529 PF03705 CheR_N CheR methyltransferase, all-alpha domain Bateman A, Griffiths-Jones SR anon Pfam-B_694 (release 4.2) Domain CheR proteins are part of the chemotaxis signaling mechanism in bacteria. CheR methylates the chemotaxis receptor at specific glutamate residues. CheR is an S-adenosylmethionine- dependent methyltransferase. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.39 0.72 -4.60 77 3379 2009-01-15 18:05:59 2003-04-07 12:59:11 10 136 2012 2 1043 2654 197 56.40 24 14.23 CHANGED scpchptltphlhppsGlslss.hKpshlppRLt.pRhcthslpshscYhphL...pssppp ..................ptcappltphlhppsGIsLss..tKcshlhpRLs.c+.l..c.t...h..s..l..s..sascYhphL...pps..................... 0 348 678 867 +1530 PF01584 CheW CheW-like domain Bateman A anon Pfam-B_579 (release 4.1) Domain CheW proteins are part of the chemotaxis signaling mechanism in bacteria. CheW interacts with the methyl accepting chemotaxis proteins (MCPs) and relays signals to CheY, which affects flageller rotation. This family includes CheW and other related proteins that are involved in chemotaxis. The CheW-like regulatory domain in CheA [1] binds to CheW, suggesting that these domains can interact with each other. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.71 247 10977 2009-01-15 18:05:59 2003-04-07 12:59:11 14 74 2190 12 3275 8780 671 135.50 21 33.57 CHANGED phlhhpluscp........aulsltpVpEllph..sp.hsplspss.shh.......Gl.lslRG....pllPllcLpphhshsstp................tppshllllc..hs.................sphhGllVDplhshhplshcslpss......hs...thsshlpGsshh.......cu......p...............llhlLDlppllpt.t ............................................hlhhp.lu.scp.........aulslt.pVpE..llph......sp..hs...pls...p...ss...shh..................Gl.l.sl..RG.........pllPll..c.L.p..p.h.h.s...hsssp....................................................tppshll.llp....hs.................pphh.Gll.VD.plhs......h.h.p..l....sh.c.s..lpss.................ht...t..ss.h..ls.Ghshh.............cs.........p.....................lhhllDlppllt...h............................................................................................. 0 1032 2073 2737 +1531 PF01111 CKS Cyclin-dependent kinase regulatory subunit Bateman A anon Sarah Teichmann Domain \N 20.90 20.90 21.90 22.20 18.10 20.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.52 0.72 -4.14 33 488 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 312 20 323 442 1 71.50 58 58.47 CHANGED pIhYS-KY.DDp.aEYRHVlLPK-lsKhlP+s..........+LhoEsE.WRsLG.......lQQS.GWhHYhlHcPEPHILLFRRPhs .........................IaYSs.KY.D-p.aEYRH.....VhLPKcl...sKhlPKs.........................+LhoEpE.WR.s.LG.......lQQ........ShGWhHYh.lH..cP.EPHILLFRRPl............. 0 119 178 263 +1532 PF04344 CheZ Chemotaxis phosphatase, CheZ Mifsud W anon COG3143 Family This family represents the bacterial chemotaxis phosphatase, CheZ. This protein forms a dimer characterised by a long four-helix bundle, composed of two helices from each monomer. CheZ dephosphorylates CheY in a reaction that is essential to maintain a continuous chemotactic response to environmental changes. It is thought that CheZ's conserved residue Gln 147 orientates a water molecule for nucleophilic attack at the CheY active site. 26.70 26.70 26.90 26.70 26.60 26.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.47 0.70 -4.68 60 1206 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1062 4 279 728 51 184.90 44 83.43 CHANGED ssplapclGpLTRpLH-uLpshtlDp+l.......ctAsscIPDA+-RLsYVlchTEpAAs+ThsAVEtuhPls-pLpspspplpspWpchhppplphs-....F+pLscchcpaLpp.sppsosplpspLsEIlMAQDFQDLTGQVI+RVlsLVp-lEppLlplLh.hu.scpp.tht......................pGP.lss-p+sDVVsuQD-VDDLLuSLGF .............................................s..clhtcIGpLTR.L+-SL+-L...u..lDptl.......tpAs-sIPDARDRLtYVlpMTtQAApRsLsuV.......EtupPhp-phcpp...Ap.tLp..tcWsc......hht..s......I...-ls-................h...R...pLspcsc.paLtp.Vstc.ouh...ssupLhEIhM..AQDFQDLTGQVIKRhhcllpElEcpLlhl.....Llpsh...sp...ppt.cstc..ps................................psL.hNG.PQl....s..s.....p..+..s..s....V..V.....u..SQDpVDDLLsSLGF.................................................................................... 0 63 151 211 +1533 PF00187 Chitin_bind_1 chitin_binding; Chitin recognition protein Finn RD anon Prosite Domain \N 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -8.90 0.72 -3.97 18 1066 2010-01-08 13:37:10 2003-04-07 12:59:11 14 39 234 154 390 1128 1 38.80 51 11.02 CHANGED ApQCGpQuGGthCPssLCCSpaGaCGoTs-YCGs..GCQ.SpC .................ppCG.........s....s....t....hC....s...s......s......l...CCSpaGaCGoTs-Y..C......Gs...........GCQ.u.C................... 0 83 225 335 +1534 PF01644 Chitin_synth_1 Chitin_synth; Chitin synthase Bashton M, Bateman A anon Pfam-B_892 (release 4.1) Family This region is found commonly in chitin synthases classes I, II and III. Chitin a linear homopolymer of GlcNAc residues, it is an important component of the cell wall of fungi and is synthesised on the cytoplasmic surface of the cell membrane by membrane bound chitin synthases [2]. 25.00 25.00 25.10 29.30 24.60 24.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.82 0.71 -4.46 14 884 2012-10-03 05:28:31 2003-04-07 12:59:11 12 12 402 0 349 874 2 143.70 53 23.85 CHANGED MYNED-lLFsRThHGVMKNIuHLCoRp+SpsW..GtDuWKKVVVCIVSDGRsKlp.RsLshLAulGVYQ-GlAKs.VssK.VpAHlYEYTTQlSIDsshpacGsc....+GlsPVQllFCLKEcNpKKINSHRWFFpAFu.lLpPNlslLLDVGT+PuspSIYpLWKuF ................................................hYNEsch..hhsRThtulhpNIt.hsp.....h....p......p......SphW...........G...t......s..uWpKIVVClVuDGR..sKls.RThslLAuh..GVYQ.-.........G.lA..Kp.......p.VN..GK.-V.sA.HIY..E..............YTTQ..lu..l.s.....p.tp..h....h.t.hp.............pshsPVQhlFCL.KEKNpKK.INSHRWhFpAFuphL.......pPp.........lClL...............lDsGT+Pu.polYpLWcuF................................. 0 111 202 304 +1535 PF03142 Chitin_synth_2 Chitin synthase Mifsud W anon Pfam-B_1787 (release 6.5) Family Members of this family are fungal chitin synthase EC:2.4.1.16 enzymes. They catalyse chitin synthesis as follows: UDP-N-acetyl-D-glucosamine + {(1,4)-(N-acetyl-beta-D-glucosaminyl)}(N) <=> UDP + {(1,4)-(N-acetyl-beta-D-glucosaminyl)}(N+1). 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 527 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.68 0.70 -6.12 10 1072 2012-10-03 05:28:31 2003-04-07 12:59:11 10 35 361 0 640 3978 47 331.20 28 34.17 CHANGED ssshlacslssQPhs-apsaGasLtHTIClVTCYSEuEEGLRsTLDSlAsTDYPsSHKLLlVICDGlIpGuGND+oTP-IVLsMMcDhlsPs--lEPhSYVAVAsGsKRHNMAKVYAGaY-Yss.......pls.ppQQRVPhlVVVKCGTPsEsottKPGNRGKRDSQlILMsFLpKVhFDERMTsLEaEha+sIWplTGlsP-FYEhVLMVDADTKVaPDSLT+MVAsMl+DPpIMGLCGETKIANKcsSWVThIQVFEYYISHHhoKAFESlFGGVTCLPGCFoMYRIKAsKGspshWVPILssPDIVE+YSENVVDTLH+KNLLLLGEDRYLTTLMLKTFPKRKphFVPpAtCKTlsPDoFpVLLSQRRRWINSTVHNLhELVLl+DLCGTFCFSMQFVVFIELlGTlVLPAAIsFTlYlIlhulls...pPsPhIsLlLLAhILGLPAILIllTsR+WsYluWMllYLLALPlWNFVLPuYAaW+FDDFSWGsTRpVpGEct..KcstscsEGcFDsSpIsMKRWcEaERE ........................................................................................................................................................s...................................................................................................................................................................................................................................................................................................................................hllhlDuDs.h...p....u....lphhlt.....h...........p..s.....p..l......huhCG.....p.........h.....t......l.............t..........s.........t.............t......p.......u....h........h.......s.......h....h...............Q....a.....EYh.lua..hh.t..K.uhEu.hh.G.s.VhCh.P.G.CFshaRhps..................t..............................................................t...t.l.h..........p...p...Y...s..p.................s.....-......s.....h..a....h............p.h.........h.........................h..G.....E...DRa..L....o.o....L....hl..p....t..........t....hp....hp.......a....sspAhshThsPc....sap..............hhsQRRRW....h.s........S...T....l....p..N.....h.....h......-........L.......h....h........h........t...p........h.......p.....h.......h........h....p....h.s..h..............h...l...l........h..h.....p....l...h....u.....s....h...lh.......P...s......s...h..h......h.......h...h.....h......h........................h..........h........h.ht.............t.............h..............h.......h........s...h..........h.........h....l...s..h..h.......h..s.....l......h....h.......h..h....h....h.......h...................t...p.h.....h...h...h.....h......h..h......h....h.h................................................................................................................................................................................................................................................................................................... 0 272 404 566 +1536 PF03503 Chlam_OMP3 Chlamydia cysteine-rich outer membrane protein 3 PRINTS, Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 62.80 62.80 17.80 16.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.44 0.72 -3.75 3 38 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 37 0 4 13 0 53.30 81 60.81 CHANGED CGVVSLSSCCRIVDCCFEDPCAPpsCNPCEs.+KKD+ssGCNuCGoYVPSCSKPCG .CuVVSLSSCCRIVDCCFEDPCA.PhpCsPCEu.+KKDVssGCNSCsuYVPuC.KPCG 0 1 1 3 +1537 PF03504 Chlam_OMP6 Chlamydia cysteine-rich outer membrane protein 6 PRINTS, Griffiths-Jones SR anon PRINTS Family \N 27.20 27.20 27.50 31.00 26.80 27.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.32 0.72 -3.73 2 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 44 0 5 50 0 91.30 82 20.13 CHANGED CaG+MYsV+VNDDpNVEIoQAVPEYATVGSPYPIEIhAsGK+DCVsVlITQQLPCEsEFVpSDPATTPTuDuKLlWpIDpLGQGEKsKITVWVKP .........CaG+MYoV+VNDDpNVEIoQuVPEYATVGSPYPIEIhAsGK+DCVsVlITQQLPCEAEFVpSDPATTPTuDGKLlWKIDRLGQGEKsKITVWVKP. 0 2 2 4 +1538 PF01308 Chlam_OMP Chlamydia_OMP; Chlamydia major outer membrane protein Finn RD, Bateman A anon Pfam-B_1429 (release 3.0) Family The major outer membrane protein of Chlamydia contains four symmetrically spaced variable domains (VDs I to IV). This protein is believed to be an integral part to the pathogenesis, possibly adhesion. Along with the lipopolysaccharide, the major out membrane protein (MOMP) makes up the surface of the elementary body cell. The MOMP is the protein used to determine the different serotypes. 19.60 19.60 19.70 20.10 18.50 19.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.33 0.70 -5.86 4 1235 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 64 0 7 951 0 269.70 66 99.06 CHANGED MKKLLKSALL.AA.huSsuSLQALPVGNPAEPSLLIDGTlWEGhuGDPCDPCuTWCDAISlRsGaYGDYVFDRVLKsDVsKpFp.MGAsPTusssA....s.oTss-RPNPAYGKHhpDAEWFTNAualALNIWDRFDVFCTLGAosGYhKGNSuuFNLVGLhGlKGoSl...sAsplPNsulopGVVELYTDTTFSWSVGARGALWECGCATLGAEFQYAQSKPKVEELNVlsNsAQFoVpKP+GYhGssFPLPloAGT-sATu..TKsATIsYHEWQVGhALSYRLNMLVPYIGVpWSRATFDADsIRIAQPKLAsAlLNLTTWNPTLLGpuTslsoo.NcFADhhQIVSlQINKhKSRKACGVuVGATLlDADKWulTuEsRLINERAAHlsAQFRF ....................................................................................RhGaYGDaVFDRVLcs.D.VsppFp.MGtt.P..o..s.s..s..sus...s.oo..h..ssR.NPAYG+HMQDAEMFTNAAhMALNIWDRFDVFCTLGAooGYLK.GN.SASFNLVGLFG..sst..p..s.s.....tss....plPNhulsQ...uVVELYTDToFuWSVGARAALWECGCATLGApFQYAQSKPKVEELNVLsNsApFTIpKPKGYVG..t..p..FPLsloAGT-s..ATs.....TKsAoIcYHEWQsuLALSYRLNMhsPYIGVpWSRASFDADTIRIAQPK.Ap..s............lhshTThNPTlhGt..us.s..h..s......s.....s.....s.hsD........................................................................... 0 4 4 6 +1539 PF00504 Chloroa_b-bind chloroa_b-bind; Chlorophyll A-B binding protein Finn RD, Bateman A anon Pfam-B_54 (release 1.0) & Pfam-B_5772 (Release 7.5) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.63 0.71 -3.97 157 2672 2009-01-15 18:05:59 2003-04-07 12:59:11 16 22 422 33 911 2617 772 131.40 25 66.18 CHANGED hPstL.sG....ph...sG-hG........FDPLGLu...t.-s.......................................................thchhRpuElhHGRhAMLuslGhlssEhh.t.........sh.....t.Ps....ahp.ss..............................................h.s......h...hh...lhhhuhhEhhp.tshhs..t.t.........................hhPG..s...............h.FDPl....G.hs.........ts.pphtph+hpElpNGRLAMlAhhGhhsQth ......................................................................................t.......aDsh.t.h.....t...................................................................thth...ptsElhpuR...hAMLGh.h.GhlhsEhhst................s.........t.............hht...s.................................................h...hh......hh.hhuh.h..E..h..h..t..t.h...................................................hPG..s..........................hssh........s.hs..........p.....t....tht.hphtElpNGRLA.Mhuhhuhhhp..h.................................................... 0 418 719 840 +1540 PF02962 CHMI 5-carboxymethyl-2-hydroxymuconate isomerase Griffiths-Jones SR anon Structural domain Domain \N 21.30 21.30 21.40 21.40 20.70 21.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.57 0.71 -4.00 3 831 2012-10-01 20:38:22 2003-04-07 12:59:11 10 4 769 15 152 460 70 119.90 39 95.22 CHANGED PHFhlECTDNIREpuDLP-LFuKVNssLAAo.GIFPlGGIRSRAHWLDTWQMADGQH.DYAFVHMTLKIGAGRSLESRQ-sGDMLFsLIKsHFAALMESRhLALSFEI-ELHPTLNaKQNNVHALF .........................................PHhlhEhosNlc-ps.cls.s.Lh.pplspsLh......s......o.....G........lFP.l.u.G..IRSRAhh.h-..sap..h...A..DG.........pp....-hAFlHhoL+IGuGRShEs+pplu-t.LFsllps.+.h.As.l.htp+.h.lALShEltEhc...s.h.sa..Kp.NslH...................................... 0 18 59 108 +1541 PF04428 Choline_kin_N Choline kinase N terminus Kerrison ND anon DOMO:DM04048; Family Found N terminal to choline/ethanolamine kinase regions (Pfam:PF01633) in some plant and fungal choline kinase enzymes (EC:2.7.1.32). This region is only found in some members of the choline kinase family, and is therefore unlikely to contribute to catalysis. 20.40 20.40 20.40 21.00 20.10 20.30 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.53 0.72 -4.50 21 146 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 123 0 97 147 1 62.40 31 9.87 CHANGED sshtsut.h..s...sp..LD.shs..h............................h+p-lhpllpoLplssWtcls.-tss ................t....ss.ph.lPs..scshLDNShshsY.................................FKp-IlcLh+oLclpuW++ls.p...t.... 0 16 47 78 +1542 PF01633 Choline_kinase Choline/ethanolamine kinase Bateman A anon Pfam-B_1165 (release 4.1) Family Choline kinase catalyses the committed step in the synthesis of phosphatidylcholine by the CDP-choline pathway [1]. This alignment covers the protein kinase portion of the protein. The divergence of this family makes it very difficult to create a model that specifically predicts choline/ethanolamine kinases only. However if [add Pfam ID here for Choline_kinase_C] is also present then it is definitely a member of this family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.44 0.70 -4.76 38 1798 2012-10-02 22:05:25 2003-04-07 12:59:11 15 27 951 30 795 4525 1654 199.50 21 51.55 CHANGED pspplLlRlYGps..s.................chhhsRppEhhshphlucpulGPcLhGhFssGRlE..cFl.sucsLssp-lpssclpttIApphtchHslsh.........................................................hpcpstlapphcca.......................h..pphspppthpthshppLpcElstlcchl.p...............thcps...lVFCHNDLt.uNllh......................................................................psspplhlIDaEYuuaN.tuaDlANHFsEhshshp..t...ahh..chshaPspcp .............................................................................................................................................................................................t..pthllR.h..h.Gtt....s......................................phh...l.sR..p.p.....Ehh.sh...thl.t.p........hs...h...s......s.........p..h...h..........h..........h....................p........u...t......hp...........ca.....l...p............u............p...s....L..s..s.......p....p...l.......p....p.............t......h....h........p.....t.....l...A....p.t....h......tph.H....s....h.th..........................................................................................................h.p.t...t......t.h.a..p..p......h...p..c..a...............................................................t..t......t.....t...p...t...t..h..........h......h.......p......h....p...p.....l....p.....p...p.l......t.....p...l...c..p....hl...p......................................p..h.t..t.s............hs...hC...H..N..D..........lh....t.Nllh...................................................................................................................................p..p..p.....s.....p....l....h......l...I...D.a...EY..uu..hN.......saD..l..u...s..a........h...h...Eh.t...hs...t..............h................................................................................................................................. 1 255 439 638 +1543 PF04345 Chor_lyase Chorismate lyase Mifsud W anon COG3161 Family Chorismate lyase catalyses the first step in ubiquinone synthesis, i.e. the removal of pyruvate from chorismate, to yield 4-hydroxybenzoate. 21.00 21.00 21.30 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.93 0.71 -4.82 3 1188 2012-10-01 19:33:20 2003-04-07 12:59:11 8 2 1158 11 198 690 245 159.80 37 92.75 CHANGED hLsNATWQpuDDlp.luPulpsWLh-pGSLTRRLssts-.clsV-lLuEuWh...TLpsDEsQtLsscpusssWlREVIL+GcDpPWVFARTLIPRSSLcsQsFDLsQLGsRuLGEhLFSsSshcRssLEVs+scss.......htuLaARRSRhShGAcsMLVAELFL.TPpIao+ssl .............................................hhh.......................h........lt.s.t...l..h.-WLh..psShT+Rhcpp.s.p...pl.oVp.......hlpEsas...............p......sp..s.Ep.h.L...hs.p.....ps..............h..W.l.REllLpu..D.u..p.P..Wl..huRTll.P.hooL...............pG.s.t...h.s.L.p...cLGppPLGc.h.LF......os.s..s..ls...R...Dhl..-..l...u+ss...............................tL.W.u.RRS+h.ch.s....G....c....P...LL.lTElFL.tssla.................................................. 0 34 86 143 +1544 PF01723 Chorion_1 Chorion; Chorion protein Bashton M, Bateman A anon Pfam-B_1914 (release 4.1) Family This family consists of the chorion superfamily proteins classes A, B, CA, CB and high-cysteine HCB from silk, gypsy and polyphemus moths. The chorion proteins make up the moths egg shell a complex extracellular structure [2]. 20.90 20.90 20.90 21.80 20.80 20.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.54 0.71 -4.39 14 91 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 6 0 62 96 0 136.40 30 89.51 CHANGED huphlLhlsspALhsQoshSQChuR....luu.t.....Gh.GhsuLG.YpGhGh.sh....GhG.Ghts.h.usus...LsAshGGsLsVso.ousAPTGLul..sSENsY....EGsVuVsGpLPFLGTssVAGphPTuGhGulpYu...CGDGAVuIouEsuh......uhs.s.shs.s.h.us.s.uhu......hGYtshs.GCGC.........Gs .................................................................s.......h.h.shhhp.hhuph.........h..........................s..s...s.......hs.......Ghuh............uhu...................................htus.G..s........l.s..sshuPsGlsh..tups.th.................-GsluVsGpLPhhGossltGphPs.GhGsVpau...CusGsVuIsscss...............shs..h.......................................................h............................. 1 39 62 62 +1545 PF03964 Chorion_2 Chorion family 2 Finn RD anon DOMO_DM03001 Family The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary[1]. 21.90 21.90 26.90 26.90 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.80 0.72 -3.46 14 51 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 21 0 18 55 1 106.00 35 72.01 CHANGED GYGusshG...s.u...psh.p................Ytt...........................s+G.ttuYGpt........................t.hhu.sssspAtutAt.u.AussuGs.p.hthPs.cltsu.hGPph.Ga.........G......hGtst .................GYGusshG....GYuh.spsshp.hsh................uYGs...........................upG...suYGps..........................t.htu.sssupAhAhAphAhAAsssGs.h.at.Pshclpsu..hGPp.ua........h.G...h....t.... 0 4 4 13 +1546 PF00425 Chorismate_bind chorismate_bind; chorismate binding enzyme Finn RD, Kerrison ND anon Pfam-B_164 (release 1.0) Family This family includes the catalytic regions of the chorismate binding enzymes anthranilate synthase, isochorismate synthase, aminodeoxychorismate synthase and para-aminobenzoate synthase. 20.50 20.50 20.50 20.60 19.70 19.60 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.53 0.70 -5.06 163 11419 2009-01-15 18:05:59 2003-04-07 12:59:11 13 52 4438 63 2457 8400 4662 249.70 32 53.38 CHANGED spppatptlppshpt.I..psG-...hhpls...Lopphphp..t..ths......s.........hslhppLpp......p.ssssashhh.............ph...................................h..luuSPEhhl.................p..hc................plpopPluGT...t.....R....uts.t.........................p......DtthtppLhpspK-psEphhlVDhlRscLuplt....s....so...........lplsphhplpp....hsslhHLsoplpu.pL.pts.hsh..h-llpulaPsuulsGsPKtpAhph....IpclEs.h.......sRGh.YsGslGals..su.....s......s-hslsIRoshh....ps...........hplhuGuGIVtcSssppEapEsttK .......................................................................s.pppatptlpps.p.ch.I..p..p..G.-........hapVl.lop..phphp....h........hs....................s...........................................hsh.a.pp.....Lpp......t..Nsu.sYhaah............phss...............................................htl..lGuSPEhhl................................c....hc...st...............plpopPlAGT...t...sR...........u.t.s..s..p.............................p.....Dppht.pcL..hsstK...............-puEphMlVDLhRNDl.u+ls.......suo................lcV..s..chhplcp.....a.sp....VhH.......LsS.plpG....p...............L......pt....s....hss..................hc.hl.p.u.ha.PsG...oloGAPK.hcA.h.......pl.I.p.c.l.Es...t............pRGh..YuGulGalsh.....sG.......................s..............h-hslsIRohhl...cs....................................spshltAG.u.GIVhD.SsPpsEapEottK............................. 0 741 1558 2097 +1547 PF01817 CM_2 Chorismate_mut; Chorismate mutase type II Bateman A, Griffiths-Jones SR anon Bateman A Domain Chorismate mutase EC:5.4.99.5 catalyses the conversion of chorismate to prephenate in the pathway of tyrosine and phenylalanine biosynthesis. This enzyme is negatively regulated by tyrosine, tryptophan and phenylalanine [2,3]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.53 0.72 -3.89 101 5974 2009-01-15 18:05:59 2003-04-07 12:59:11 16 36 4103 55 1304 3816 1745 78.70 27 28.68 CHANGED RppIDplDcpllpLLscRhplstpluchKppp.....shs.lhcs.pREppllcplhptsc....tshs..sphlpplappIhctuhthppsh ........RppIDplDppllpLLscRhclspcVuchKtpp............shs..lh.cs...pREpplL..pcltptuc........ttsls...schlppl...applhctShthpp..h............................. 0 373 798 1089 +1548 PF01264 Chorismate_synt Chorismate_synth; Chorismate synthase Finn RD, Bateman A anon Prosite Family \N 21.00 21.00 24.20 24.00 20.70 20.40 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.01 0.70 -5.96 158 4715 2009-01-15 18:05:59 2003-04-07 12:59:11 16 5 4374 29 1174 3448 3602 336.60 44 93.20 CHANGED hRhoTaGESHG.ulGsllDGhPAGltlspccIpt-LsRR+..PGt...u+.hso.RpEsDpVcILSGVh.cG......hTsGoPIullIpN....pDh+stcY..........................schtst.RPGHADhshthKY.G..h+DhRGG.GRuSARETAuRVAAGAlA+plLp...phG.....IclhualhplGsh........................phshp.h.......cp......ss..lhssDspstpchtphIcp.s+cpGDSlGGll-llspG......lPsGLGpPl.aD.KLDAcLApAlMSIsAlKGVEIG..sGFpsuphhGSpspDph......hhpst.............hh.pp....oNcuGGl.GGISsGpsIllRsAhKPssolhp.stpTVs.pstc..psplphtGRHDsCllPRAssVsEAMlAllLADthLc ..................................hRhoThGESHG.uLssll-GsPsGltl.stt-..Ipp-.LcRR+sG..t......uR...h.so.RtEs..DpVcILSGVh..cG..........h.T....T.GoPIulllpN...pDp+s...t-Y..................................................................................sp.h..h.st..R.PGHADhshthKY.G..h.R..DhRGu..GRSSARETAhRVAAGAlAKphLt...p......h.G.............lcltualsplGsl.....php........................................................hhs.hp.pl......cp......ss.hhss...D.....ssttpphcphlcp.h+.+.pGDSlGuhlpllspG......lPsG.LGpsV..aD..+LDAclApAlMSINAlKGVElG..sGFpss.phpGSpspDEh.................hh.s.st...............................htoN+uGGl.GGlS............sGp......sIls+hAhKPhsolhp..P.hpols.pssE......pspl..hpGRaDPCls.RAsslsEAMlAlVLhDtlLp...................................... 0 394 771 1014 +1549 PF02417 Chromate_transp Chromate transporter Bateman A anon Pfam-B_1872 (release 5.4) Family Members of this family probably act as chromate transporters [1,2]. Members of this family are found in both bacteria and archaebacteria. The proteins are composed of one or two copies of this region. The alignment contains two conserved motifs, FGG and PGP. 28.60 28.60 28.70 28.60 28.50 28.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.96 0.71 -4.66 103 4595 2009-09-10 17:38:28 2003-04-07 12:59:11 10 9 1631 0 1528 4268 786 171.40 25 85.64 CHANGED shhpLFh..hFh.plGhhuFGGuhuhlshhccphV.cppcWlopcpFhchlAluQhhPGPhslphushlGa+...h......sGhhG........ulsAsluh...hlPuhllllhluhhhtp..hpp..tshhpshhpGlpssslullhssu.....hph.sps.............hhtsh..hshhlh.shs..hlhlhhhp....h.....sslh.lllhushhGhh ...................h.hhplFh.hFh.+lGhhsFGGuhuhlshhp..p..phV....p..............p............h...p..........W....lo.......p...ppFh.chlAluQhhPGPh..sh...phu...sa....lGat.......h................sGhh.G.........................................................ullAsluh...........hLPuhllllhl.uhhhpp.........hpp......ps..h.....l.p.uhhpGlpsu.llullhsus......................hp.h.spp................................shtsh....hsh.s....ls....shs.....h..l.h...l......h.h..h.p..........l..........s.hh...lllhuulhGh............................................................................. 0 510 999 1289 +1550 PF00385 Chromo chromo; Chromo (CHRromatin Organisation MOdifier) domain Finn RD anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.93 0.72 -4.29 173 5794 2012-10-02 16:56:36 2003-04-07 12:59:11 19 330 405 83 3614 5547 121 53.30 24 6.52 CHANGED htl-cIl..cp+..hpcptt..................paLV+Wpu.hs....pcs.TWEs..tc.slppp.....p...lptatpp ..........................................l-cll...spc........htctpt.......................................pYhVKWcu..as.............pcs..TWEs...cc..plptt.......ph...lpta.......................... 1 1222 1720 2571 +1551 PF01393 Chromo_shadow Chromo shadow domain Bateman A anon Medline:95396576 Domain This domain is distantly related to Pfam:PF00385. This domain is always found in association with a chromo domain. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.74 0.72 -4.13 8 489 2012-10-02 16:56:36 2003-04-07 12:59:11 14 9 177 24 286 474 0 56.80 49 28.14 CHANGED RGLEPE+IIGATDSs.....G-LMFLMKWKsoDEADLV.AKEANhKCPQlVIuFYEERLTWHS .......................RGL-PE+IlG.A.T.Do.s..............G-..L..h..FLhKW.KssD....c...AD..LVsA+pANhKCPQl.VIpFYEE.RLoW+........ 0 69 99 194 +1552 PF00878 CIMR CIMR_repeat; Cation-independent mannose-6-phosphate receptor repeat Bateman A anon Pfam-B_764 (release 3.0) Family The cation-independent mannose-6-phosphate receptor contains 15 copies of a repeat. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.17 0.71 -4.43 12 1464 2012-10-02 14:19:21 2003-04-07 12:59:11 13 33 108 57 767 1357 0 136.00 23 77.56 CHANGED shpcsosIpFhCNcssh.s...sPVFhtEs......sCoYaFEWcTthACs...hpslpCplpDstu.pcYDLSuLo+tsc...sWcAVDsots.t.++pFaINVC+....hstspuCP.ssuusChVsc.spShNLGhhtpuPpht.sGslpLpYssGDhCs ....................................th...hsohIpFhCsps..s....u.............pP.h.h.h...p..ps..........sCpahFpWpTshA.C...............s...................p..........sh..............s................Cp...l.........p.......c....s.p.......t....p.p.aDLo...s.Ls...ptpt.............s.a.s..h.....ss....t......tt........................t...ahlNl.Ct...............................s......t.....s.....t.....s....C.....s....ts...u.....usC........h......p..........t.......s..........p..........s..........h..........s......l......G.h.....h...p.....p....t...............ph.....ss............l....l.Y..sGp.C........................................................................................ 0 238 267 463 +1553 PF02464 CinA Competence-damaged protein Mian N, Bateman A anon Pfam-B_2197 (release 5.4) Family CinA is the first gene in the competence-inducible (cin) operon, and is thought to be specifically required at some stage in the process of transformation [1]. This Pfam family consists of putative competence-damaged proteins from the cin operon. 20.90 20.90 20.90 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.55 0.71 -4.77 102 4443 2009-09-13 17:09:44 2003-04-07 12:59:11 12 14 3786 2 995 3093 2194 153.50 36 58.87 CHANGED tpsLsptlsphL.tppsholusAESCTGGhluutLTslsGuSshFpGGhVsYoscsKpplLGVspphLppaGAVScpsAptMApGs.pptht.....uDhulu.lTGlAGPs.GuotpKPVGpValuluttst.......stsp.chpFsG.sRpp..lRppusptALphLhchL ................h.pLsptlsphL..ppcshslusAESCTGGhlAusloslsGu.SshFcsGhVT.Y.Ss.cs.KtphLuVs...t...p...sLp...p....a...G..A..VSc.tsAtpMAp.GA..h.pt.s.p....................uD..hulu.lTGl..AG..P...s......G.............u............s....c...t+.......PV......G.T.Vah.uhu.....tpss.................shs....p..p.h..........p..a.......s..........G.....s......Rpp..lRppusthALphLhp................................................... 1 323 652 857 +1554 PF04162 Gyro_capsid Circo_coat; Gyrovirus capsid protein (VP1) Bateman A anon Pfam-B_1772 (release 7.3) Family Gyroviruses are small circular single stranded viruses. This family includes the VP1 protein from the chicken anaemia virus which is the viral capsid protein. 25.00 25.00 95.40 95.30 21.30 17.40 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.74 0.70 -6.24 22 315 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 11 0 0 241 0 348.00 94 99.88 CHANGED MARRARRPRGRFYAFRRGRWHHLKRLRRRYKFRHRRRQRYRRRAFRKAFHNPRPGTYSVRLPNPQSTMTIRFQGVIFLTEGLILPKNSTAGGYADHMYGARVAKISVNLKEFLLASMNLTYVSKlGGPIAGELIADGSKSQAAENWPNCWLPLDNNVPSATPSAWWRWALMMMQPTDSCRFFNHPKQMTLQDMGRMFGGWHLFRHIETRFQLLATKNEGSFSPVASLLSQGEYLTRRDDVKYSSDHQNRWRKGpQPMTGGIAYATGKMRPDEQQYPAMPPDPPIITuTTAQGTQVRCMNSTQAWWSWDTYMSFATLTALGAQWSFPPGQRSVSRRSFNHHKARGAGDPKGQRWHTLVPLGTETITDSYMuAPASELDTNFFTLYVAQGTNKSQQYKFGTATYALKEPVMKSDAWAVVRVQSVWQLGNRQRPYPWDVNWANSTMYWGoQP .MAR.RARRPRGRFYAFRRGRWHHLKRLRRR.Y....KFRHRRRQRYRR.....RAFRKAFHNPRPGTYSVRLPNPQSTMTIRFQGlIFLTEGLILPKNSTAGGYADHhYGARVAKISVNLKEFLLASMNLTYVSKIGGPIAGELIADGSp...SQAAp....NWPNCWLPLDNNVPSATPSAWWRWALMMMQPTDSCRFFNHPKQMTLQDMGRMFGGWHLFRHIETRFQLLATKNEGSFSPV..ASLLSQGEYLTRRDDV.KYSSDHQNRWRKGcQPMTGGIAYATGKMRPDEQQYPAMPPDPPIITsTTsQGTQV.....R.........CMNSTQAWWSWDTYMSFATLTALGAQWSFPPGQRSVSRRSFNHHKARGAGDPKGQRWHTLVPLGTETITDSYMsAPASElDTNFFTLYVAQGTNKSQQYKFGTATYALKEPVMKSDuWAVVRVQSVWQLGNRQRP................................ 0 0 0 0 +1555 PF02443 Circo_capsid Circo_ORF2; Circovirus capsid protein Bateman A anon Pfam-B_1890 (release 5.4) Family Circoviruses are small circular single stranded viruses. This family is the capsid protein from viruses such as porcine circovirus [1] and beak and feather disease virus Swiss:Q9YUC8. These proteins are about 220 amino acids long. 25.00 25.00 25.00 30.20 24.40 24.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -4.99 12 1556 2012-10-04 01:49:40 2003-04-07 12:59:11 10 1 51 1 0 1258 0 196.30 62 95.87 CHANGED phphRRRhtRs......hhRRRahRt......RhR.RRh.hpRRtapsNRlYshRlsRpaphplp.ptoss.ushpWssDhlshsLsDFL.ss........................sssshpLPFEcYRI+LAKhEh+Pths.hs.p.cGhGpTslIpDu+lpcFpppsshs.DPLAsaDGA+pW..p+GF.KRLh+PKPQl..oIsDhssu.NpoAALWLsss+shWIPl.....p.hs.suts+VcHYGlAaSa.pP.tss.....hsYpsplTlYVpFRQFsh ..............................R.R.RhtR...........hhRRR.hhh......RY..R.hR..R+.................N.tIashRLp.RTFGYTlK...pTTVpTPSWAVDMMRFNIsDFLPPG.......................GGoNPhSlPFEYYRIRKVKVEFWPCSPITQ.....GDRGVGS.oAV..ILDDNFV..TKATAL....TYDPYVNYSSRHTIsQPF.S.YHSRYFTPKPV.LDpTIDYFQPN.NKRNQL.WLRLQTs........................sNVDHVG.LGT....AFENS..hc......pYshclThYVQFRpFs............................................... 0 0 0 0 +1558 PF04487 CITED CITED Finn RD anon Pfam-B_3987 (release 7.5) Family CITED, CBP/p300-interacting transactivator with ED-rich tail, are characterised by a conserved 32-amino acid sequence at the C-terminus. CITED proteins do not bind DNA directly and are thought to function as transcriptional co-activators [1]. 21.10 21.10 24.90 21.80 18.30 18.30 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.69 0.70 -4.11 11 168 2009-09-11 10:42:53 2003-04-07 12:59:11 7 2 46 2 72 157 0 163.30 35 97.93 CHANGED MA-H.MhAhsauhhssshtt......hphsMs.....u.stps.+s......hps....uslhHYuuu...uhcuuhtsRsu...........sG.shuhupPsus..s.hhas........................ss.p.p.Fh.....ssuspLhASMpLQKLNopY.G+............tsts......sGtPhssus.Q...aR........sGsu.ssuhpp..ss..hs.ulhDsDhIDEEVLhSLVlELGLDRlpELPELaLGQNEFDFhoDFsst.tsupVSC ........................................Msph.h.shsht.hsts.tt......hp.t.Ms...th.s.sp.ht.pt........hps....sthhHYsus...shpu.s.t.p.ts...................u.s.sh.s....P.t.t.............................................s......a......t.s.pL.ASMpLQKLNspY.sh................................................hts.....sup.htsss.p.................t................................................u.......s.sssh.t...................shhDsD.lDEEVLhSLVlEhGLDR....hpELPELaLGQNEF.DFhsDhss............................ 0 6 14 28 +1559 PF04223 CitF Citrate lyase, alpha subunit (CitF) Mifsud W anon COG3051 Family In citrate-utilising prokaryotes, citrate lyase EC:4.1.3.6 cleaves intracellular citrate into acetate and oxaloacetate, and is organised as a functional complex consisting of alpha, beta, and gamma subunits. The gamma subunit serves as an acyl carrier protein (ACP), and has a 2'-(5''-phosphoribosyl)-3'-dephospho-CoA prosthetic group. The citrate lyase is active only if this prosthetic group is acetylated; this acetylation is catalysed by an acetate:SH-citrate lyase ligase. The alpha subunit substitutes citryl for the acetyl group to form citryl-S-ACP. The beta subunit completes the reaction by cleaving the citryl to yield oxaloacetate and (regenerated) acetyl-S-ACP. This family represents the alpha subunit EC:2.8.3.10. 19.50 19.50 19.50 19.60 19.40 19.40 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.52 0.70 -5.82 5 1131 2012-10-04 00:26:15 2003-04-07 12:59:11 7 4 925 4 116 1121 28 448.90 64 91.35 CHANGED DRKLsuSLEEAIc+oGLKDGMTISFHHAFRGGDalVNMVM-lIA-MGFKNLTLASSSLossH.sPLVEHIKNGVVT+IYSSGLRGsLA-sIS+GLLcEPVpIHSHGGRVHLIpSGEL+IDVAFLGVPsCDEFGNANGacGKAsCGSLGYAhVDA-YADpVVhLTEsLVsYPNsPASIsQDQVDhVVpVDAVGDPsKIGuGATRMTTNPRELLIA+pAA-VIsNSGYFK-GFShQTGTGGASLAVTRFL+EKMl+cNI+AoFALGGITAoMVDLHEcGLI-KLLDVQsFDSsAApSLARNPNHlEISANpYANPuSKGAuVDRLDVVILSALEIDTpFNVNVlTGSDGVIRGASGGHCDTAuAApLSIIVAPLVRGRIPTVV-cVsTVITPGoSVDVLVTDpGIAVNPsRPDLl-+Lpcss.lPVaoIEpLpERActlTGKPcPIEFTDKVVAlV+YRDGSlIDVV+QV ............................................................................................................................................................................................+KlsuSLEEAI++SGLcDGMTlSFHHAFRuGDhslNhVM-.hIA.cMGaKNLTLASSSLsssH.sP.LlEHI+sGVVopIYTSGLR.G.LuEpIS..c..G.LLt.pPVpIHSHGGRV+LlpSGELpIDVAFLGVPoCDEFGN..AN..G......h....s....G....K....u....sCG.SLGYAhVDAc.AcpVVhLTEpLlPYPp.sP.ASIsQDQVDhlVpVDc.VGD..ssKIuuGATRhToNPRELLIA+pAA-VIspSGYFK-GFShQTGTGGASLAVTRFLc-KM...cp+sIpAsFALGGITuoMVDLHEcGLI.......+.........KL...L.D..V......Qs.........FDp.sAAp..SLAcN.P.s...H...l...E...I..SAN.....p.Y........A........N.........uSKGAuV-..+LD.V.V.lL.S.A.L.EIDT..pF..NV.N..V..l.TGSDGV.lR.GASGGHsDTAhu.ut.....L.....S.......I....I.l..A..P..L..V..R..G.R.IPTlV-...pV...h...T.slT.PGuSVDlLVTD+GI.....A....VNPs...R...s...-LhEpLp.p.AG..l.lho..IE.L+-RAphLT.G.cPp..P..IEF.T.D...R...VVAVV+YRDGSVIDVV+QV.............................................................. 2 32 62 87 +1560 PF01874 CitG ATP:dephospho-CoA triphosphoribosyl transferase Bott M, Enright A, Ouzounis C, Bateman A anon Enright A Family The citG gene is found in a gene cluster with citrate lyase subunits [1]. The function of the CitG protein was elucidated as ATP:dephospho-CoA triphosphoribosyl transferase [2-3]. 20.80 20.80 22.20 21.10 20.40 20.70 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.47 0.70 -5.20 135 1741 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 1304 3 316 1196 72 248.10 35 85.49 CHANGED pl....uphAhpAhhhElsh.PKPGLVchtssGuH.DMshtpFhp.Suhultshhtphspsuht...t..........ltplGhpAEpsMhtsTsGlNTH+GulahlGlLssAsut................................h.tttsstsstthhtslsthssshh.....t..............t........tpshohGpththpauhsGsptEstsGFstlhphulPthppsh..t................hs.ppAhlpshltlhuph.sDTsll+RuGhpsl.phlpppAppl....Lstu.............t.tt..hpplpph-pthh..pcslSPGGoADLLusolFlst ..............................................................................h....huhpAhlhElploPKPGLVDthssGAH.p.DMslt.sFhp.Suhult.sah.phhphGhptsth.........................hhttlR.lG..htsEtsMhpATsGlN..T...H+GuIFulGLLsuAhut..............................................hht..t.p.shs...pplspth.sp.hs.pslst................................h.tp.....ttphTtGpphhpphGls.....Gs....RGEApuGashVhptALPhhpphh..pp...............................ths.chAllpsLLh.LMuh..sDTNlhpR.u.G...t..uL..pa.lQp.pAppl....LtpG.................Gh.hs.ss.....hptLpphDcthl..c+s..lSPGGoADLLhhThFls.p............................................ 0 62 165 244 +1561 PF03600 CitMHS Citrate transporter TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Family \N 29.60 29.60 29.60 29.60 29.50 29.50 hmmbuild -o /dev/null --hand HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.34 0.70 -5.87 32 7280 2012-10-02 15:12:49 2003-04-07 12:59:11 11 35 3249 0 1836 11155 2469 359.10 15 84.92 CHANGED Luhhhslshhh..llhhpthuslhuhhllsllhsLh..............................hs.hh.phlshls..slhLLFuhha.hslM.-oGlFDslsphlhchspGc.hclhhsssllsullShsGsusTsshlss.shhl.Lacphsls.hhlssslsluuslhs.hsPhGsPss........ps.hsslsh...sshhsshlPuhlsulhhhh...hhhhhhth+cppthtthph.htt..............................................................................................................................................................................tpshtptpsptttht+shlhhhshhLshslhshl.............................lhuh............h.s.lhhhhuhslsllhsa.slcpphp...th.ttstsshsslhhhhAhhlFsullstsuhhctluptlhsl............lstslu.hlsllsuls.ShPhshhhu.ps....hhslhhpsusthGls.h.hshsuslGs .......................................................................................................................h..h.h.h.......h..h.......................h.....s....h....h...h...h..h....h..h..h..s..h.hh.shh..........................................................h...h..t..p....h...h....s...t...h....s...............s.............l..........h...h..l........h...u..h..h....h...st.h...h.p.p...o.Gh...hp...hls....p...h...l...l....p....h..s.....t......s......p......................p.........l...h..h...h...l......h......l.........h...s...u..l..l..o.s.h.h......s...s.s...s.....s...s...h........l.....h....l.....P..l...s............l.......s....l...........t.............p..............p...........h...............p...........h.............s..........s.................t..............h...............h...h..h....s....l....s.....h......u..u........s..h....u.u.h.....h....T.......l...G...s...P..s.....s...h..h.l........................................ts...hts.h.s...h.................s...s....h.....h......t...h.....h....h....s...h........h...l...h...s...h..hh...hh.........hhh.h.hh.h.pp.tt.t...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...h.t.h.h..h....h..........h...h..l...h....h...s.h..h.hhh.....................................................hht.h.........................................h.....h...h....h....h...u.....h...h....h.....h.......h........h............h............t........h.................t........h.......p....p...ht.............................................t.....ht.h............hl...h...h.h...h....u.h.....h.h.h...s..t....s....l...t.......t.....h...s........h..h.t.....h..h...s.p.....h.h..th......................h.......h...h..h.................h..h....s..t....h...h..........s.......s..hh.h..................................................................................................................................................................................................................................................................................................................................................................................................... 0 639 1190 1535 +1562 PF00285 Citrate_synt citrate_synt; Citrate synthase Finn RD anon Prosite Domain \N 20.10 20.10 20.30 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.20 0.70 -5.72 121 9392 2009-01-15 18:05:59 2003-04-07 12:59:11 16 26 5081 103 2244 7278 4274 289.30 36 82.11 CHANGED GL.....psshsspopIohlDG...........ppGh..LhYRGYsIp-Ls.p.pss.............FEEss.aLLlhGc..L.PspppLppFppplttpp.t..........lsp...plh...phlps.hPp.sscPMshLpsulusLus.hp.t..........tstph...p..hcpuhcL.lAphPslsAhh..a+hp......pGtphltPcss..Lua...utNFLhMl..pspt............s.s.....t.hscsh-hhLlLaADH.thNuSTFssRlluSThuDhYuulsAuluuLpGPLHGGANptVhchlpcI....ss...........scpscpalcctlsp.pc..+...lMGFGHRVY+.shDPRApll+chscplstph.........sp....sphhclupplEchshp...h.p+.t...lhPNVDFYSullapth.GlPt..-hFTslFAluRssGWhAHhhEp....h.t.....ss+l.hRP .......................................................................h.sh.sh.otls.lst...........pts.....LhaRG.hsl.pLs.p..tsp..............................a.-sh.aLLh....G.c...........h.P.s.t.t.p.h...tt.hpttltt....t..............l....t...t.h..........thhtt...h......t....HPMs.h.h.sshsh.lssh......................................................t...tt.....shcl.h....up..hsslh..shh...a.+ht.........................s....h..h......s...p..ts......hshstp.F.lh..hh....h..stt................s..h.cshchhhh.La....A-H..t..............N.s............S.T.T..s...Rl...............s...u.S...............o.t.u.s..a.u.slsuuluu.L...hGPh.HG.G.A.N.Essl.c.MLpcl............ss.....................scpl.ca....l.t.+.t.h.cc....p-........+...................lM..GFGH.RV.....Y...+...shDP..RApl..h+chsc.clhpch..........ts....s..s..lhc.lAhc..lEclshp.......hhc+.+.......LaPN.........VD..........FaSGl....h....h.............ct.h..........G..............h.Pp....p........h..a..............T..............s..............l.FshuRhsGahAphhE.....ht.....t.tl..RP..................................................................................................... 0 694 1359 1866 +1563 PF03802 CitX Apo-citrate lyase phosphoribosyl-dephospho-CoA transferase Bateman A anon Bateman A Family \N 25.00 25.00 26.40 25.60 19.40 18.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.94 0.71 -4.82 29 1055 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 904 0 101 514 3 165.80 43 81.99 CHANGED pslsL.plLsuREtRsshQpphLpca.pt..sLlSlTlshPGslKsssthcplFstulpslpphhtptthhhhtp....sttTGPEuhhslsts.AtplKphhltlEcsaPLGRLaDlDVl...sspsptlSRpsluh..ssR+CLlCscsA+hCuRuR+HulpElhsplpchlpph..spc ...............................p.sVolsElLsuR-cRtARQ+th....L..p+a..ss...........PLlShTlssPGslKsStlh++lFspulsslpsLstcps.W.phpt.p...............th....lspsoG...PEuhl.uls.uP..Aps..LKtthlpLEcsHPLGRLWDIDVl.............ss-G...chLSRc....-huL..P.sR+CLlCpp.s.AtlCARu+pHsLs-Lls+hctllpshpt.s.......................... 0 23 50 75 +1564 PF01214 CK_II_beta Casein kinase II regulatory subunit Finn RD, Bateman A anon Prosite Domain \N 20.10 20.10 22.20 21.10 20.00 17.30 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.35 0.71 -4.78 66 783 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 323 22 504 729 15 179.10 47 69.13 CHANGED sWIshFssh.hspcaFscV-c-aIpDpFNLhGLppp..lspappALchILDhpstp..................................pthppsp....tptl-psAptLYGLIHARaIlTspGlpt.....Mh-KYpp.ucFGpCPRlhCpsptlLP..lGlSDtsspssVKlYCPpCpDlYpP..ssctsslDGAaFGsoFPphFlhsa.sc..hhspt........................................................................ppahP....+laGF+ ..............................................pWIsWFCuh.cG.sEaFCE..VD--YIpDcF.NLs..........GLspp...V.s.h.Yc.............pAL-hILDlps..pp.......................................................t..s.....p.p.......pphl.EpuAchLYGLIHARYIl.T.s+Gltt..M.........h-KYpp.u-.....FG.pCPRVaCp....spshLP........lG...hS.DhP.....sps.sVKlYCP+C.....pDlYhP..po.s..+.....p...tp...l............DGAaFGToFPphhhhsaPp....hhspp.........................................................................................stppahP....+laGF+........................................... 1 212 307 426 +1566 PF03805 CLAG Cytoadherence-linked asexual protein Lawson D, Bateman A anon Lawson D Family Clag (cytoadherence linked asexual gene) is a malaria surface protein which has been shown to be involved in the binding of Plasmodium falciparum infected erythrocytes to host endothelial cells, a process termed cytoadherence. The cytoadherence phenomenon is associated with the sequestration of infected erythrocytes in the blood vessels of the brain, cerebral malaria. Clag is a multi-gene family in Plasmodium falciparum with at least 9 members identified to date. Orthologous proteins in the rodent malaria species Plasmodium chabaudi (Lawson D Unpubl. obs.) suggest that the gene family is found in other malaria species and may play a more generic role in cytoadherence. 18.30 18.30 19.90 19.80 16.80 16.70 hmmbuild -o /dev/null HMM SEED 1282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.23 0.70 -14.21 0.70 -7.22 10 213 2009-09-11 00:22:49 2003-04-07 12:59:11 8 2 15 0 39 216 0 436.10 45 84.97 CHANGED DNIpELKphI-N-ELYcNLspLE+hlLpoLEpDKLKlPlhps-s+.cYlDhSpFKll...cstssucscsYIlPThcoohcDIIKYEHlLKpQlhpcYss-ISDhIK+KlLlVRTLKllKlMLlPhNsYKcssDlKpALpELNclFpsc..ppppcccsslphpschFcpllpsl+tlKcppph.pth......DsKp-lh-hsDhFFTTssNIEFM-sLDpIoNpYGIGlaNhlGsHhIALGHFlsLKLALKpYcpYFEtGslKFaoWQKILpFshSDRFKlLDLMCDcsusYcsppKRRpoYLKscpouoopECNILEFLIHaFNKYQlELlppspcp-FslahhhEHKclK-cFFsFMCs..csKECsIYcospFKpEps--ssFs-pps.sFs........hhuPaNlYsNYaYFhKpYs-FsscpllYlHlLNLsGlLNs-scAYVoSLYLPGYYNAIQLSFp--ppLs-LapNLlKClcKCau.p++spo..p+.pslpsptcp-sSKCslCKGsFhYIN.Ks--ssSMLQKFYsYlTKlLclpplSoLlcshsIY--YsNFLoHDLNWYTFLFLFRlTSYK-IsppoVuEAMYLNLKDEDsh++ThVTsaWYPSslKKaYTLYlRK+lusNLL-ELEpLl+pcTIEKMKKSIpFllHVNSFLQLDFFHtLNEsPlG..RsaPLShlLEs+FK-Wh.shssGFaFlNYDcPptRtcLacKhcsp+FlsPKhscWschLK+lIcsAY-hYFpQRHVKNLaKaHssaNINNKIMLMRDSYELYhKNacDllFhADIhllRKYLouTPKl++lpDRhaYalHslhGNulNFYKYGlIYGFplNKchLKEVV-ELaoIYphNpcIFoDlSFLQTVYLLFRKIEpSatSHRRNDclSlNNIFFhNVusNYSKLsKEER.pEIHsSMASRFauKThFosFQhMFsopIS+clD+LDKhYGKAshlplsssEcAaLpFAYlY.GSIhDSlTNsLhPhYAKKPIsQLKYGKTFIhANYFhLuSplauhLNLNNLShLCEYQAIsSANaYShKKluQFls+KFlPllsYahhhRlts.hspsp...ttaphhpt.us.sp........................hhhpsshYhohYhusNLah-sstaFPssLssELp-QT-alptpp.ccKPslHuhspshlhullpuhshsFslaslspaYAFF-NllFalhsshRFhDRaaslhspYhssah+phh++hTsDllLKhhp+sYpshKKcGYYcEsIsARLssKpplpphltt-pspslhsslp.h-hcshp+.shshYsD-phhF-DlscNEpFLN-R ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................shhpFAYhYsGSIMD.SlTNsLLPPYAKKPITQLKYG+TFVFSNYFMLASphY-MLNYKNLSLLCEYQAVsSANaYSuKKlsQFlGRKahPlTThaLhhRIptohsahp......thphhs..ss............................................hts-sshaLaFaFFhNLYhDuhKaFPGGFusuL+EQTcH.Vppps.....a.+KP.VHths........................................................................................................................................................................................................................ 2 14 17 38 +1567 PF01217 Clat_adaptor_s Clathrin_adapt_s; Clathrin adaptor complex small chain Finn RD, Bateman A anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.74 0.71 -4.47 7 3187 2012-10-04 00:47:01 2003-04-07 12:59:11 15 38 357 27 2033 2739 18 135.90 25 49.99 CHANGED hl+hlLlhs+pGchRLsKaY.sshssscc.pKhlcclhtslosRpsch................sNhlEhps..h+llY+pYAsLYFshsl-.s.-sELhhL-lIppFVEsLDphFs.NVCELDllFNFpclahll-Ehl.G.G.lhEoshppllcplt.hspht ..................................................................................hphlhl.h.s.ppGc.h.....hls+hY....t................s...............h...............s......p.....p..........p..........p......p....p....h...h....p...c..h.h....p...h.h.....h......s.+....t..t..p..h.......................................ss.h...l.......p......h......p......s........hph..lY..+.+...............a.s.......sL.a..hsss....s.......s.....ps.s.N.t.L.h...h..........l..-...hlp............p.h.l.-s..l...sc.a..F.........t........s.....l.s.Eh...s.l.h...N.a....h........la...l...........LDEh..l...s...G..h..h................p.Ts.p.lht.h......t.................................................. 0 669 1072 1604 +1568 PF00637 Clathrin Clathrin_repeat; Region in Clathrin and VPS Bateman A anon Prosite Family Each region is about 140 amino acids long. The regions are composed of multiple alpha helical repeats. They occur in the arm region of the Clathrin heavy chain. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.47 0.71 -4.59 150 4251 2012-10-11 20:00:59 2003-04-07 12:59:11 15 79 397 51 2715 4128 33 136.90 16 33.12 CHANGED l.php..shhshspllchhpc...tshhp.lhtaLpphh......s.pt..ppsslpstLhphhhct......sp.pplcphlp..phsp.....aDhtpluphs..ccpc.Lhptthh...lYpc..hsphpchhp...l.hpp.pthppsh..chstctss.....clhppllphhlsptphph ..............................................................h........phppllp...hh.p.......ts..h...h..p.t....Lh..taLpthh....................t...p...........pp.tslpstLhthhhct....................................pp.pplccalp................t.sp.........ach..tp......l.t.phs.......pc....tp...h....hptthh....lapc..........hsphpphlp.......h..hpp......p.....phppsh........ch.ht.c..h...ss....................p...hh.plhth.hp......h............................................. 0 886 1499 2201 +1569 PF01086 Clathrin_lg_ch Clathrin light chain Finn RD, Bateman A anon Prosite Family \N 20.40 20.40 20.50 20.60 20.20 20.10 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.84 0.70 -4.43 28 493 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 297 6 289 500 3 200.20 27 86.99 CHANGED MA..DcFPsl-s.sss........s.h..ssptsssu-FLuREcshLGD...pFto-pD.......hs.s-sD.................p.ssFppsFPslsststt.....sussushsGshht.s.......suhsphsphpsE..PEsl+cWRE++ctcIpc+-ctsccp+pEhhcKApccIDDFYpsYNpKp-KppupsR..................t-tE....pFLucR--h.opsGTpWERlAcLlDhssKus+...su+D+oRFRclLlsLKtptpAPGu ............................................................................................................................................................s.........tstFL.tp..ppt..lus..................h.ps..................tss...............................................................t....st..ts.t........................t..ts.s...sut....t..s..........................t.....su..h.....s.t.h...s....p..ppE.....sEslR...cWREcpptcL..p...c..+....Dtppc...........cpctEhhccApccl--.aYpphscph-+s+t.sNR....................................tpE-......tFlppp-c....t.sGo...p..WERlAcLs.....D.....h.ss+ss+............................pspDhSRhRplLlpLK.....psPh................................................... 1 94 157 232 +1570 PF01394 Clathrin_propel Clathrin propeller repeat Bateman A anon [1] Repeat Clathrin is the scaffold protein of the basket-like coat that surrounds coated vesicles. The soluble assembly unit, a triskelion, contains three heavy chains and three light chains in an extended three-legged structure. Each leg contains one heavy and one light chain. The N-terminus of the heavy chain is known as the globular domain, and is composed of seven repeats which form a beta propeller [1]. 21.30 21.30 21.90 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.35 0.73 -7.82 0.73 -3.30 71 581 2012-10-11 20:00:59 2003-04-07 12:59:11 15 30 177 39 344 507 4 38.40 30 4.84 CHANGED sshspphsa.ssh..shhhlhslp.........stuplhlhslc .....tsst.IsacssscpphhhlhGls.........ststGphplhsV-.. 0 90 164 246 +1571 PF03505 Clenterotox Clostridium enterotoxin Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 32.40 35.10 21.90 16.80 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.18 0.71 -4.92 4 72 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 20 25 4 63 0 172.20 44 58.07 CHANGED D+DLIGTLLIEusoSGSIIQPRLRNTTcPlFsoSNsTtFSQpYTEtRLcDAFNIpLFNTSTsLFKFVEEAPsNKNIshKsYNTYEKYELIcYpNGsIsscApYYLPSLGasEVosAPSstu.VVch.lspsuFIQsGPpEcIVlGVIsPSENIp.EIsTsIu-sYTYNIPshlssNPhYlLFsVNoTshYKIoscsNL ....................pllGs.lIpsuooG.lhp.pLRsospslhhs.ssspFspEYhcsplpssF.Nhp.hNp.shhFcFscpAPSNKNlhhhl..TY.+YElIch.pssIs-+A.hYlPSLGYscshphsS.tt......lsps............................................................................................................................... 0 2 4 4 +1572 PF03515 Cloacin Colicin-like bacteriocin tRNase domain Griffiths-Jones SR anon PRINTS Family The C-terminal region of colicin-like bacteriocins is either a pore-forming or an endonuclease-like domain. Cloacin and Pyocins have similar structures and activities to the colicins from E coli and the klebicins from Klebsiella spp. Colicins E5 and D cleave the anticodon loops of distinct tRNAs of Escherichia coli both in vivo and in vitro [1]. The full-length molecule has an N-terminal translocation domain and a middle, double alpha-helical region which is receptor-binding [2]. 25.00 25.00 37.00 31.60 20.90 20.40 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.86 0.70 -5.19 8 84 2012-10-01 19:36:59 2003-04-07 12:59:11 9 10 43 12 4 97 0 272.40 34 49.83 CHANGED Mu......................................psG.sausp.ssa.shtt.s.............GGsspsG..uuoshu.s...................s.hPtshAh.......GlPuhhsPG.......sG........shulSlsus....slsAAlu-lhtsL+G.....stP.....................aKhshsGlulhulhPupIAcDc.sh.................hshlVooLPsDslT-sPlSsLPhspssVsVshRlsDlVcDspQplAlVsu..hPhsVPVVcA+P........TcpssVaoAul.PG.hPslplSVspssssspuhscuhssc......cstsspPAGaThGuso+..-AlIcFPccSGpcP.lYlSVoclLosspl...KQRQ-EEp+tppcWc ................................................G..........s..spG.sausp.ssasshtt.s.............GGsspsG..GuSuhussh...................hsstshAh........h.ulhhPG............tG........GhuVolpus....tlSAAhssl.suL+G...........s.P..........................huVuhhtl.Puphsc.pc.hh........................lhso..h-plT-hPsS..pLPhstssVsVphRlsDhspstpQhhullsG....hP.M.oVPVVDAhP........TphssVaoAsl.Ps.hPsL.lSVsptsssspsss.h.pps.......pstshh.AGaThuuNs+..DsllRFPcsSGhsP.lYlSss-lLssspl...pQRQ-tENptppca........................................................... 0 2 2 4 +1573 PF03513 Cloacin_immun Cloacin immunity protein Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 27.90 59.60 20.90 19.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.67 0.72 -3.94 6 22 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 17 7 6 26 1 80.50 54 72.32 CHANGED GLKLcLsWFDKKTE-FhGcEYSKDFGDDGSVIESLGlPLKDNINNGsFDVccsWVPlLQPaFKNpI-hDKahYpISFDYRDu ..GLKL+LpWFDK+TE-FpGcEYScDhGDDGSVIEpLGhPlcDN.INNGsFDVpppWlslLQPaFpppI-hs+apY.lSFDYRDt... 0 0 2 5 +1574 PF01785 Closter_coat Closterovirus coat protein Bashton M, Bateman A anon Pfam-B_1309 (release 4.2) & Pfam-B_6985 (release 8.0) Family This family consist of coat proteins from closteroviruses a member of the closteroviridae. The viral coat protein encapsulates and protects the viral genome. Both the large cp1 and smaller cp2 coat protein originate from the same primary transcript [1]. Members of the closteroviridae include Sugar beet yellow virus and Grapevine leafroll-associated virus, closteroviruses have a positive strand ssRNA genome with no DNA stage during replication. 21.30 21.30 22.20 21.60 19.90 21.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.18 0.71 -5.25 11 1326 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 50 0 0 1130 0 171.40 35 75.75 CHANGED hshpshs.sssspLssc-h+plhtphppplpsch.........sss--chhhtluhhLhphushSTSsKlsh..psstoh...shs.scchslpcu.lhshlpuph.htspsNsLRsauRoaEctYlshspthhpt.sptptsuK+GlPuthpYLuADFlsGosstho-c-pAshLhApcsALc+p.usts-pplssLppLG+h ......................................................t.....hh.s.s.lshs-h+.l.sp..thlspch.........shss+-ht..ha.luhhlhplsspSoShpscc.sssshoa....spt..Gh..c..hcls-chhhshlhss.hsh.s..s.psNsLRhauRop-shalshs+ps+sl.s.sttshctGlPst.aaYLsADFLsGu..slo..-.h-pu.sh...l.A+cphLc++tu..cps.lhNlpph................................................ 1 0 0 0 +1575 PF00574 CLP_protease Clp protease Bateman A anon Prosite Domain The Clp protease has an active site catalytic triad. In E. coli Clp protease, ser-111, his-136 and asp-185 form the catalytic triad. Swiss:P48254 has lost all of these active site residues and is therefore inactive. Swiss:P42379 contains two large insertions, Swiss:P42380 contains one large insertion. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.82 0.71 -4.62 86 9058 2012-10-02 13:07:06 2003-04-07 12:59:11 18 23 5278 397 2138 7738 4927 176.10 39 75.01 CHANGED spuE...cshD.lYuR.LL+-RIIFLustl.sDphAstllA.QLLaL-u-sssK-IhlYINSPGGsVoAGhAIYDTMpal.+scVsTlCh.GhAASMGuhLLuuGspGK..RhuLP.suRlMIHQP...hGGspG..QAoDIpIpAcElh+l+cplsclhuc+.T.GpshEcIpc......Dh-RDhaMoA..pEAh-YGLlDpVlppp .................................................................t........hD.l.as.R.L.hc.-Rl..l.hL......s....s.......t...l..pD..p..h..A...s.......l.lA.Q.LL..a.L......-....u.p...ssp......KD.Ih.lY...I.N.S....P.......G.............G.........s.........V..........oA..............G.........h..............AI...Y....D...T...M..........p.....a....l.......+......s.............c......V..s....T....l.....s..h.....G....h....A....A..SM...G............uh...LL.s............u.GscG..K..Rhs...LP.....su........c......l......MI...H...Q...........P................h...G...G..h..pG.........QA....oD.......l....p........I..p..A.c....c.l...h.+........h+...c........p.......l.......s.....c.....l.h.uc+...T....G..p.......s.......h...E.......p.......Ipc...........................Dh...-........R.D.....p....a..h.o..A........pEAh-YG...llDplhp..p............................................................................................................... 1 751 1443 1838 +1576 PF01093 Clusterin Clusterin Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 20.30 20.30 19.90 18.70 hmmbuild --amino -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.51 0.70 -5.68 14 180 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 48 0 59 144 0 339.10 37 92.73 CHANGED ppLpphSptGp+Yl-cElcNAlpGVKpMKolME+opEEHppLhpsLEcsKccKE-ALKhsp-sEp+LcEcpclCNETMhuLWEECKPCL+pTCh+FYo+sC+SGuGLVG+QlEEFLNpoSPFhhhhNG-clcsL..h-psppQshplsplE-sFoplsssV-pLFp-Sh.hatphp..FspshpssFh.....hpPhhhsthptthttsc.....pth..p..Ph..hp.hsh.shFQ.Fh-huhphhpuhssthpp.ht.........t.u.phs.sp.hh....sDRtlC+ElR+NSoGCL+h+-cC-KCp-IL.ul.DCotppPspspL+pElp-uLpLAEchoppYsplLp.hQp+M.NTouLL-phscQFGWVSpLANhTps.scshFploTVso+ss...sspuPuD.TpV.sVplFsSsshTlslPt-lSh-sPcFh-hVApcALQ+aKpch+pt ..................................................pLpphS.ttGpphlscElppAl.GlKphKphh-+spc-+ppLhpsLccs.+ccKp-...Alp.hp-spt+Lpc..plCptohh.s...W-EC+sCLcpsCh+aYs.sCpsu.u.VtpplEp.Fhpp.s..shhh.........hp...t-phcsL.........pp.pppst.ls..thp-tFs.phs.s.hspLFpcp..........aspt..shah.......s.....h..hp.....h..t........p.............t.hs..h..shFp.hh.ph.hph.pshs...hhpp....................s....p..h.s...p..h...t....psRhlC+ElR+NootCh+h+ppCpKCpphL.th..DC....Ps.stL+pchp-ulplsph.sppYsplLp.hQt+h.sTs.LLcphpcQFuWVSp.LAN.Tts...sp.hhpltpVh...s+...p....s..s...........s.ts.T.V.slplhsSssholplP.-.shpsspFhphVstcALppa+pp.................................................................................................... 0 4 8 23 +1577 PF03026 CM1 Influenza C virus M1 protein Griffiths-Jones SR anon Pfam-B_1290 (release 6.4) Family This family represents the matrix 1 protein of influenza C virus. The protein is the product of a spliced mRNA. Small quantities of the unspliced mRNA are found in the cell additionally encoding the M2 protein (see Pfam:PF03021). 20.20 20.20 22.60 276.40 18.30 18.20 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.48 0.70 -5.26 2 132 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 89 0 0 34 0 218.90 100 86.56 CHANGED MAHEILIAETEAFLKNVAPETRTsIISAITGGKSACKSAAKLIKNEHLPLMSGEATTMHIVMRCLYPEIKPWKKASDMLNKATSSLKKSEGRDIRKQMKAAGDFLGVESMMKMRAFRDDQIMEMVEEVYDHPsDYTPDIRIGTITAWLRCKNKKSERYRSNVSESGRTALKIHEVRKASTAhNEIAGITGLGEEALSLQRQTESLAILCNHTFGSNIMRPHLEKAIKGVEGRVGE MAHEILIAETEAFLKNVAPETRTAIISAITGGKSACKSAAKLIKNEHLPLMSGEATTMHIVMRCLYPEIKPWKKASDMLNKATSSLKKSEGRDIRKQMKAAGDFLGVESMMKMRAFRDDQIMEMVEEVYDHPDDYTPDIRIGTITAWLRCKNKKSERYRSNVSESGRTALKIHEVRKASTAMNEIAGITGLGEEALSLQRQTESLAILCNHTFGSNIMRPHLEKAIKGVEGRVGE 2 0 0 0 +1578 PF03021 CM2 Influenza C virus M2 protein Griffiths-Jones SR anon Pfam-B_1092 (release 6.4) Family Influenza C virus M1 protein is encoded by a spliced mRNA. The unspliced mRNA is also found in small quantities and can encode the protein represented by this family. 20.90 20.90 22.80 308.20 20.00 18.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.77 0.71 -4.63 2 111 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 87 0 0 38 0 139.00 98 70.10 CHANGED MGRMAMKWLVVhIYFSIASpPASACNLKTCLpLFNNTDAVTVHCFNENQGYMLTLASLGLGIITMLYLLVKIIIELVNGFVLGRWERWCGDIKTTIMPEIDSMEKDIALSRERLDLGEDAPDETDNSPIPFSNDGVFEI MGRMAMKWLVVIIYFSITSQPASACNLKTCLNLFNNTDAVTVHCFNENQGYMLTLASLGLGIITMLYLLVKIIIELVNGFVLGRWERWCGDIKTTIMPEIDSMEKDIALSRERLDLGEDAPDETDNSPIPFSNDGIFEI 0 0 0 0 +1579 PF02543 CmcH_NodU Carbamoyltransferase Bashton M, Bateman A anon Pfam-B_1740 (release 5.4) Family This family consists of NodU from Rhizobium and CmcH from Nocardia lactamdurans. NodU a Rhizobium nodulation protein involved in the synthesis of nodulation factors has 6-O-carbamoyltransferase-like activity [1]. CmcH is involved in cephamycin (antibiotic) biosynthesis and has 3-hydroxymethylcephem carbamoyltransferase activity [2], EC:2.1.3.7 catalysing the reaction: Carbamoyl phosphate + 3-hydroxymethylceph-3-EM-4-carboxylate <=> phosphate + 3-carbamoyloxymethylcephem. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.06 0.70 -5.33 11 743 2012-10-02 23:34:14 2003-04-07 12:59:11 10 5 526 10 274 1274 3956 344.30 32 61.74 CHANGED sSAashSsFtpu..lslslDusGDhhsphhhluptsthth..hspFPtp.....t.......lGhha..hphsphhGashh.stGKlMuLuuYG.sssphhsslpphhchpttss........Ycthlsthpsslhcshchh....ct....h..ppcpcluAohphhlE+llhchhphhhtchs...hsp.LslAGGsuhNlphNutLhppGhFsclaV.PtssDuGsAlGAAlhshsphst......hPhchs..VY.GPshss-.......p.csssWcuphp.......hhuplAshLAsGclVuahQGRhEhGPRALGNRSILA-PpsscsKD+lNthl+.REtFRPaAPosLEEcss-hF-.spsc...PaMhhshpsREsh....................tcplPAlsHlDGoARlQ ...........................................................................................................uAahsSsFpcs....sllslD.G....h...G.....-.......h.....s........o.s.........hh...h.tc.s.s.p....h....p....h.....h...t...ph.thPpS.............................................LG.hhY.........tthTtalGF....p....h................p.......sEh....K.lM.G.LA.sYG......t...s.......t...h.....h..h..p...h...h.....pp.hh..p...h.p.s.ss........................................ah.t.....h..s.h....h....p....hh.....t....t..h..h.t....hh...................t........t......t.....ptt....hDl........A...suhQt.hh.Ec.ll...l....ch...hcth...hppts......pp..LshAGGVALN...s....sh...Ns....+...l....h....p....p......s......F.c.........c..............l..a..V...p..P...A..u...u...DuGs.AlGAAhhsht.phts......................................t.ht.p......sYL..GP.sas..sp...............tht.h.th.hts.....t.h.t.h............p.....p..tlhp.p.sAphL.Ap.u.p....lVuWaQGR..hEFGPRALGsRSILusPpssphpcplNthlK.REpFRPFAPslLtEcuscaF..c..h.s..p..su.....................Pa......MhhshpVp.tp.h..................................+scl.PAVsH.l.DGTARlQ.................................................... 0 93 169 217 +1580 PF04989 CmcI Cephalosporin hydroxylase Bateman A anon COG3510 Family Members of this family are about 220 amino acids long. The CmcI protein Swiss:O85726 is presumed to represent the cephalosporin-7--hydroxylase [1]. However this has not been experimentally verified. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.08 0.70 -5.01 31 285 2012-10-10 17:06:42 2003-04-07 12:59:11 7 8 264 36 73 634 456 186.70 38 79.54 CHANGED tpasasapWhGhPIlphPpDlhshQEllacl+PDlIIEoGlA+GGShlahAuhLpll...............tsct+VluIDI-lRtpsptul-s.+sh.pRIphlpGSSscscllppl+phtpt.hcpllVlLDSsHopcHVLsELch.YusLlosGsYhlVhDTll-chsts....h.s.......psWs.GsNPhsAlppaLppps............c...FplDp...thps+hh...lTsuPsGaL+Rs ......................................h....hphpahG..hshhp.stDhh..h.QE.l.l..ac.l..+...P.cL...IIEhGstaG.G.Ss.lahAs.hLchl.................................sps.+Vls..l..DI........s...h..p..s......s........t..h..p..t......................s..R.......Ip.h.l...p......u.SS.ss..s..p.h....lpplpp.h.tp..sshl..llILDSsHop.pHVhtELph.Ytsll.os.GsYhlVEDT...s...l..s.s...hPh.....................sa..G.s.G..PhpAlcpaLtpps............p...FhlDp....hcpKhh...hohsPpGaL++...................................................................................................... 0 24 52 57 +1581 PF02627 CMD Carboxymuconolactone decarboxylase family Mian N, Bateman A anon COG0599 Family Carboxymuconolactone decarboxylase (CMD) EC:4.1.1.44 is involved in protocatechuate catabolism. In some bacteria a gene fusion event leads to expression of CMD with a hydrolase involved in the same pathway [1]. In these bifunctional proteins (e.g. Swiss:O67982) CMD represents the C-terminal domain, Pfam:PF00561 represents the N-terminal domain. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.77 0.72 -4.14 215 9489 2012-10-01 19:19:04 2003-04-07 12:59:11 15 45 3396 115 3044 7828 1802 82.90 18 51.31 CHANGED Pphhphhtphttshhh...putLsh+p+cLlslusshsps.sshsh...shHhpt..Ah.psGsop...ccltcshth.tshhs....uhstth..suhthspp ...............................h.phh.thhhs.hh.....pu.....Lshpp+cLlsl.ss...u.t.ss.....s...s.shsl........phH...spt....uh...p....t...G....hop............cc..l..t.plh.th...tsh.ht.......th...stth..tuhthh........................................... 0 764 1817 2496 +1582 PF00795 CN_hydrolase Nitrilase; Carbon-nitrogen hydrolase Bateman A anon Pfam-B_1042 (release 2.1) & Pfam-B_5155 (Release 7.5) Family This family contains hydrolases that break carbon-nitrogen bonds [1]. The family includes: Nitrilase EC:3.5.5.1 Swiss:Q42965, Aliphatic amidase EC:3.5.1.4 Swiss:Q01360, Biotidinase EC:3.5.1.12 Swiss:P43251, Beta-ureidopropionase EC:3.5.1.6 Swiss:Q03248. Nitrilase-related proteins generally have a conserved E-K-C catalytic triad, and are multimeric alpha-beta-beta-alpha sandwich proteins [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.21 0.71 -4.94 58 15146 2009-09-13 06:29:50 2003-04-07 12:59:11 17 49 4520 101 4901 12501 4878 178.50 18 46.69 CHANGED psAssQ........hs....shhshpss.htphhphl..............................tcAsct..usp.llshPE...hhlsua................ht.thtphhthuh................sh......u.thptlt...........................................................................phApppslhllsG.hspp......csst.....hYNshlhlsss.....Gphl..........s.pYRKh+........h.tah.....EpphhstG.........stsh.ssass......s.h....u+lGh..hI.........CaE..hhaPthtph.hst.............pGspllsssss ........................................................................................................................................................................................................................................................................................................thhQ........h...........hh.s....h.t...t....s....h.....tphh.p....hh....................................................ppA.spt.........ssp...lllhP.E........hh.h..s.sa..............................................................................p...h.....t...h.s..t................................................sh....................t.h.h..pt.l..p...................................................................................................................................................................................................................................................................p..h..A.....p...p....h...s.....h...h...l.l...s....G....hs.p...........................pssp..............................ha....N.sh.h..l...ls.ss..............G.plh.......................................s...pYc....K.h.HL....................h.....tah....................Ep...t.h.hs.sG....................stth....s.hpt...............................................h...................h...+....l.uh......hI...............................................CaD...........hh.a..P.p.h......h..p..t..h.s.h................tG..spllhh.t..................................................................................................... 0 1561 3031 4091 +1584 PF01110 CNTF Ciliary neurotrophic factor Bateman A anon Sarah Teichmann Domain \N 19.30 19.30 19.80 19.30 18.70 18.20 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.37 0.71 -5.02 2 48 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 34 4 26 265 0 175.60 49 95.14 CHANGED MAhs-.oP.sshR+.DLCSRuIhLARKhRSDlTsLh-.YVc+QGLstsIslsusDGhPsAus-pWuE.TtspRL.-NLtAYRsF+sLLAphLE-Qp.hhssT-u-ht.AltshLLQVuAFsYplEELh.L.phthP.sEtsu.P.ssschuLFEpKLhGL+VLpELuQWsVRSl+DLR.lSpH..G.PupGuthhhsppp ....................................................MAhs-..s.ssp++.-...LCS....RSIhLARKhRSDlTsLh-sYscpQG..........L.....sp.s...l.sl..s..u....hD..GVP..sA...o..s..-..p..WSEhT-u.ERLp-NLpAYRsFcshLspl......LE-Qcsph.sPs-u-a+pAI+...sllLQVuAFAYplEELMh.L.L..ph.plP....s.Ess..uhPh...s.s...G..c..t..GLFEKKLaGL+VLpELuQWTVRSl+DLRllSpp.t.G..hsu.tst.......................... 2 1 3 9 +1585 PF03450 CO_deh_flav_C CO dehydrogenase flavoprotein C-terminal domain Griffiths-Jones SR anon SCOP Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.07 0.72 -4.08 168 4331 2009-09-11 23:20:01 2003-04-07 12:59:11 12 53 1952 108 1331 3679 1203 98.20 24 19.57 CHANGED pua...Khu+Rts.DluhVsuAhtlplp...s..........st...plpss+luhGGhuss.sh+A.ppsEphLhGpshsp.pslptAhpslt.p-hsPhsc.hcuospYRpplutsLhhRhhhps ............................................................t.ta.KhucRps...shu.lVssAht..lphc.....s.....................................s......hlp.ps.RlAhG.......GsAs.............p..sh.Rs...ppsE...pt..L...........h...........G...........p...........s....h..........s............t....p....s..........l....p...p........A.sp.........t.hh...ps..........h.........t....sh...ss.......t..u..ps...t.......Y.Rhtlstshhh+hh.t.h...................................... 0 410 752 1046 +1586 PF00473 CRF Corticotropin-releasing factor family Finn RD anon Prosite Family \N 22.10 22.10 22.20 26.60 21.50 21.70 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.80 0.72 -3.87 7 187 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 111 8 64 192 0 39.40 47 25.30 CHANGED tPSlSIshshclLRphL.hchscpphpphp.....sNRphLspl ....PPlSlDLTFHLLRphLEhuRAcp..tp.QAc.............pNRplh-.l... 0 11 19 37 +1587 PF02552 CO_dh CO dehydrogenase beta subunit/acetyl-CoA synthase epsilon subunit Bashton M, Bateman A anon COGs Family This family consists of Carbon monoxide dehydrogenase I/II beta subunit EC:1.2.99.2 and acetyl-CoA synthase epsilon subunit. Carbon monoxide beta subunit catalyses the reaction: CO + H2O + acceptor <=> CO2 + reduced acceptor. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.87 0.71 -4.70 6 97 2012-10-03 09:55:27 2003-04-07 12:59:11 11 5 65 5 55 244 219 163.80 27 75.46 CHANGED tphphhP......p.ssl.sscpAshhps.cssspMI++AKRPlLllGPplhpsE.hEtpsKthhc+clshltTu.....h..tsl...cchp..shh-hophlpssp.phh.hpGpaDLslFlGshhYasupsLstLKpFosplpslsIscaaHPsAchSFsNlsc--..ahchLpEhl ................................................sh.....P........tshhtschsphhps.chuuphlppAKpPhlhsGshll..h..s......h.ps.h......t.....p.......tht.h.t..plshstsu.......ht.hsh...c....cs...+Y....shhp.ps.lsssc.phh.hcsp..hDhslFlGlphaaus.sLphl+sao..sshphshhschhH.sAph..Shsshs.t-.c..hht.hltp..h.................................................................................... 2 14 34 44 +1588 PF01121 CoaE UPF0038; Dephospho-CoA kinase Finn RD, Bateman A anon Prosite Family This family catalyses the phosphorylation of the 3'-hydroxyl group of dephosphocoenzyme A to form Coenzyme A EC:2.7.1.24. This enzyme uses ATP in its reaction. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.99 0.71 -4.88 13 4826 2012-10-05 12:31:08 2003-04-07 12:59:11 15 23 4560 31 1266 3845 2571 176.30 33 80.94 CHANGED hllGLTGGIGSGKSTlushFsp.hGlsllDADllARpVVpPGssuLspIsccFG...ssllhs-..GpLcRpALtcclFucsEc+phLNullHPhIppchhcpluphps...saslhslPLLhEs.phpslsspVlVVcss.cpQlcRshpRc.ulocpcspuhlsAQhSccERluhADsVl-Npushs- ...................................h.llGLTGGIuSGKSTV....u....p....h....h........p................p....h.....G...h.sl............l...D.AD..h..l........A..........+.p..l....l....p............s.Gs...........s.hp.t...l....h....p.t..F.G..................s.p.l...l........t.......t......c..........G..........p.............L........sR.....t...t.L.up.h......lF....s....s....s....p..........p....+....p....h...L..........ssl.........l.........H......P..h....l....p.p....c....h...t........c...p.........h...............p.....p....h.......p.p.t..................sh.l..lh.-.l.....PL.L..h.E...........s.....s..........h...p..............p.....h............s.....c...c..V...l.V..Vt.s.s...c.h.............QlpRlh............pR..............c......sh..o...........c....-.cspthls......uQ.hs......hcp.+.h..th....A.....D....hVI-Nsut...p..................................................................................................................... 0 433 796 1062 +1589 PF02035 Coagulin Coagulin Mian N, Bateman A anon IPR000275 Domain \N 25.00 25.00 25.40 341.10 24.90 24.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.33 0.71 -4.59 2 4 2012-10-02 16:54:34 2003-04-07 12:59:11 10 1 4 2 0 10 0 173.00 78 93.51 CHANGED sNsPhCLC-EPslLGRp.IVopEhKDKIEcAVpAlspcstlSGRGFSIFutHPsF+ECGKYECRTVpsEcSRCYNF.PFpHF..ECPVSspsCEPsFGYTsusEhRlIVQAP+AGFRQCVWQHKCR.aGSN.CthsGRCTQQRSVVRLVTYsLEKssFhCEshRTCCGCPCRS. TNAPlCLCDEPGlLGRpplVosEsK-KIEKAVEAVAcEuGVSGRGFSlFSHHPVFRECGKYECRTVRPEHSRCYNFPPFhHFpSECPVSTRDCEPVFGYTsAGEFRVIVQAPRAGFRQCVWQHKCR.YGSNsCGasGRCTQQRSVVRLVTYNLEKsGFLCEoFRTCCGCPCRS.. 0 0 0 0 +1590 PF04733 Coatomer_E Coatomer epsilon subunit Mifsud W anon Pfam-B_3343 (release 7.5) Family This family represents the epsilon subunit of the coatomer complex, which is involved in the regulation of intracellular protein trafficking between the endoplasmic reticulum and the Golgi complex [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.62 0.70 -5.22 7 430 2012-10-11 20:00:59 2003-04-07 12:59:11 9 22 320 7 259 625 124 243.90 29 86.34 CHANGED tsDcLFslRN.FYlGuYQssINpup.lsshst-ss.E+DshlaRuYlAlGphtlVlsEIctus.ussLQAV+hlA.ahssstp+-thlupLpEhlucpshssNsshpLlAullah+-psas-AL+th+tu.ssLEhhALsVQIhlKhcRh-hAcppL+hMQpIDEDtTLTQLAsAWlslAsGG.pKlp-AahIFp-hu-Kas.TshlLNGpAVsshthtpa-EAEolLhEALsKDspDsETLhNLlVsshclGKs...ssR.lSQLKhsHPpHshVcchsppEspFDRhspphs .................................................................t....Lhsl+staahGtYptsl.php...h...t.........s...s...p.t......t....hptphhhhRuhlA...t...p.....ht....ll.....tpl.....tt..ps.....s...st.......Lt.......Al....p.hh..A........p..a...h...t.....s................p....p...........ct.h......l.t....p....lp.................p...h....sp...p.........................ts.......h....h.......lhsu...l...h.h..t..ptp.p...tA...Lph..l.......p.........p.............t.......s.............s............l.....E.....h..h..A.........h.....h..l.....Q.....l...h.L...p....hsRh......Dh.A....p........+...p......l...pt...h.....p....p..h......s......-................D......s..h......Lsp.....LApuW..l....slthGu.....-p..........h.....p.pAaa.....lap.E.....h.....u.....p...p..h.....s.........s......o....................h.l.....Ls..upAsspht..schp-A...c...s...h...Lp.......p......A.....L.....p....p....c............s...p....p......s.-......s......l..h.N.h.l...lhs..h.h.GKs...ss..ch..h.s................p...L.p..t.....s.p.p....Hs....hlpc...ht.ttpt.F-phs.ph.t......................................................................... 0 98 151 219 +1591 PF04053 Coatomer_WDAD Coatomer WD associated region Wood V, Finn RD anon Pfam-B_1269 (release 7.3); Family This region is composed of WD40 repeats. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.47 0.70 -5.69 60 828 2012-10-05 17:30:42 2003-04-07 12:59:11 9 37 318 6 577 840 14 397.60 33 41.01 CHANGED psplpphshpsp.........ssphhslshKchGssphh......PpoluasP...ss+hlhV......susGcYhlYo.............ulshpspshGpuhshlWsu...pNc..aAlh-p.spslcl..KNh.cpps.hslplshss-plFs.....GsLLslpssu..lshYDhpptpllpclc.ls...s...KhVhWSss.Gp.hVAll...............o..........ccohhlls.hsh-..............................shsslpE...s.plKSutWss.s..VhlYTTts...+lpY..hlsG-sshIppl-pshYllthhs...s+lYhlD.+-hplhuhplsss.lcFppullcpch-p............................t.spll............tpplspaLpppGa.ElALphspDscpRF-LAlphGsLclAh-hAcphs....spp..........pWcpLGctALppGshplAcpsap+sp..caspLhhLYhsoGstppLp+luphApppuchsstFpsthhh.GclcpplclLhpouphs.AhhhApoau. .......................................................................................................................................................................................................l.phshpt............tp.h...lsh.+chG..s..s...h......Ppolpa...sP......stchVlV..............sssGpY.lYo..........................uhshps.pp.h.Gpu.p.hlasu.....psc.....aAlh-p...s.p............p...........lpl...KNh..ppph.......s....l.p..s..s..h.....s.s.-.t.Iah..............G..LLhhp.s.ss...........l..shaDhpp.t.p.............h..ltclc..ls........K.VhWS..ss..up.hVulh....................s........cc.shhlhp.hph-....................................................th.psl..pE...s.pl+oGhWs.......s....s....sFlYoots...+lpY..hss.G........-hshltpL.D....p......s......hY.l....h........thhs.............s...plYhhD.+-hpshsh.s.ls.o.hca.phulhcpch-..p....................................t.spll..............tpplhpaLppp..Ga..EhALphs.p...............D....c.pRF-LALphGpLc..lAhc......hA.pphs...........spp...............................................pWcpLuchALtpsshplsc.sap+sc..saspL.hLahsoGs.t..ptLp+hs.ch.Actpsc...h.................sstF.shhhl.Gcl....cppl.clLhpss..phs.AhhhApoat....................................................... 0 228 341 487 +1592 PF02572 CobA_CobO_BtuR ATP:corrinoid adenosyltransferase BtuR/CobO/CobP Bashton M, Bateman A anon COGs Family This family consists of the BtuR, CobO, CobP proteins all of which are Cob(I)alamin adenosyltransferase, EC:2.5.1.17, involved in cobalamin (vitamin B12) biosynthesis. These enzymes catalyse the adenosylation reaction: ATP + cob(I)alamin + H2O <=> phosphate + diphosphate + adenosylcobalamin. 23.40 23.40 23.70 26.20 23.20 22.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.93 0.71 -4.63 173 2528 2012-10-05 12:31:08 2003-04-07 12:59:11 10 9 2106 4 629 1797 875 170.90 42 88.83 CHANGED pc+GlllVaTGsGKGKSTAAhGhshRAlG+Gh.......+VuVlQFlKG.s.hps....GEcphh.cph.......ss..........lpacthGc.GFTWc..opccp.pD..htsAppuWptAcphlt.................sspaclVlLDElshsLca......salsl-cVlssLp.sRPttpcVllTGRsA.sptLl-hADhVTEMp.lKHsFcs.Gl+AQpGlEa ........................c+GlllVaTGsG..KGKoT.AA.FGhuhRAlG....+Gh.......+VuVlQFlKG....phps......GEcshL.c.th..u......................Vca.p.s..h.Gp...GF..oWc..sps.cp...pD..............tssupps...W.pc.u..+.chLt..........................s.s..phDhVlLDElshslpa......sa..Lsl-EVlp.s.L.p.....p.R.Pt..p.....pcVllTGRss..ppl.l-hADhVoEhc..lKHsFcs..Gl+..AQtGI-a........................ 0 192 403 529 +1593 PF01122 Cobalamin_bind Eukaryotic cobalamin-binding protein Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 26.80 26.50 24.60 23.70 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.04 0.70 -5.82 11 162 2012-10-03 02:33:51 2003-04-07 12:59:11 14 7 67 16 90 185 0 281.40 30 67.51 CHANGED hhhLssLLhhuusssthtp.CplspspppLlcsL.ppLLpshsppuu.s.NPSlhluLRLuuhpshstEphhlppLKhphppp.huSsou........p.ohGpLALhlLALpuuCcshu........cs.cLlopLccphEsEpctht.sHputPtTsYYQhSLulLALClcpscsspsVss+Lhpslp+s.hhhsuphSVDTtAMAsLALTClcpphhs.......shcsplshAlcslhEKIlppppssGhhGNlYSTsLAlQtL.sosp..sc.thsCtKshsslLsplppGsFpNPhuhuQlLPuLptKTYLDl.p.ssCsuscss..hh.....Psssosspsp ..............................................................................hhh...hh........p.C.......lst.pt.hlp.l...hh......p.....sPSlhluhpLus....s.ph...pp....h..h.ppL...........hts..s..........................t.shGpL..ALhlLALpusCcsst........hs.cLlupLpcphppttpth....sppstPhosaYphuLulLALChpstcspt.phls+Lhpshpts....tsp...hS..V.DTtAMAsLALsClppsthp........t.tpplstslcplhccIhppp.ps.pGhhG..NlaSTuLAhQ.AL.sss.....t...........sc.thsCtpshssl.Lp.p.l.p.pGtFpssh..huQlLPsL.tKoYlDl.t..pC.s.p.....h......P................................................ 0 14 19 39 +1594 PF03186 CobD_Cbib CobD/Cbib protein Mifsud W, Bateman A anon Pfam-B_2468 (release 6.5) Family This family includes CobD proteins from a number of bacteria, in Salmonella this protein is called Cbib. Salmonella CobD is a different protein [1]. This protein is involved in cobalamin biosynthesis and is probably an enzyme responsible for the conversion of adenosylcobyric acid to adenosylcobinamide or adenosylcobinamide phosphate [1]. 28.00 28.00 28.00 28.40 27.70 27.80 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.72 0.70 -5.34 139 2167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1937 0 647 1878 476 282.10 30 91.11 CHANGED hhhlhlAllLDhllG-.Ptth......HPVshhG+llshl.-pthppt..........tp+htGhl..hhhhllsssshluh.hlhths.................hlshll.ps...hllhssluh+oLtccuttV.....tpsL.pps.sLstARctluh.l...VuRDTspLscstls+AslESlAENhsDGllAPLFahh........l.....h........GhP......G..uhsY+slNTLDSMlGY+s........c+YppFGasuARhDDllNalPARLTullhs.lsuhhh............tu........h....phhh...+Dutpa.SPNuGas.AAhAGALulpLuGsshY..G.h...cshlG..suhpsssst.cIppAlpLhttss .........................................h..hhhhuhllDh.llG-..P..th.............HP...V.h...h..hGp.hhshl.-...phhpp..........................ttpht.G.s..l.hh.llh.l.s.sshhlsh.hlhhls.................h.t.hhs.hhhpshhlahsLuh+uLtcpsptl......................tpsL.........p.....p.......s.....Dlst........A.....R.ptluh.l...VuR.DTspL.st.stlsRAslE.olAENssDullAPLFahh..............l.u........................G.sP..............u....s..hhY+sl.....NTL........DuMl......GY.+.s...........p.+Yp.....tFG........h..suA......+.lDDlhNalP.ARLou.lhhs.lsu...h.h..............................pu...................................hchhh.....+-st.....pasSP.Nu.G.h.sE..AAhAGALulpLGGsshY..h.....G.ph....+P.hlG..ss.....p.t...s.st...cltpulpLhhh..s................................................... 0 190 410 540 +1595 PF02654 CobS Cobalamin-5-phosphate synthase Bashton M, Bateman A anon COG0368 Family This is family of Colbalmin-5-phosphate synthases, CobS, from bacteria. The CobS enzyme catalyses the synthesis of AdoCbl-5'-p from AdoCbi-GDP and alpha-ribazole-5'-P [1]. This enzyme is involved in the cobalamin (vitamin B12) biosynthesis pathway in particular the nucleotide loop assembly stage in conjunction with CobC, CobU and CobT [1]. 21.70 21.70 21.80 21.70 21.20 21.00 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.34 0.70 -4.78 8 2349 2009-09-13 21:11:09 2003-04-07 12:59:11 10 4 2268 0 598 1783 441 235.20 29 94.17 CHANGED l+uhluFhTpLPlhu......DhEphpctlhtaPllGhllGslsulsthltshhhs..s.LsuhLslhlhlhhsGhhHlDGLADhuDGlhusss+cRhltsM+DhplGsuGlsslllVhLlphhuLtpls..h.hhhlhluplsAKhshlLshtht+P...ttpGhGuaFhcphcp.pplslGhllhlllslhhu...sh..h.slhAlhsulhsulhluphuc+pFGGloGDVlGAusElocssoLLuL .........................h...slsFhT..R..l..PlPt...........ths.h.....p......p...h...s....+..u......h...h....h......a.P.l.l.Gl.ll...G.s...l...s.....u.hl..h...h.l....h.t..h...h..h.s...............s..l....u.u.l..l.s.l.h.s.h.h....llTGuhHlDGLADosDGl.h.u......s.t........s......+.............-R......h.....LcIM..+DSR....lGsa....Gsl....A...L...l.h...s...l...L.....h......+....h......h....s....l...s......p....L......s..............t.............s...........t.................h...........h...........h..............h.........h......l..........u...s...s.......s....u.....p....h....s...s......h....h...s....h...h..h..h.h.......................s+tpG.h.G.....s...h...h...h.....s.p........h.s...t.......p...p....h.......h...h....s..h....h......h...s...h....h...hs...h...h.hh........................s........h..t.s....l.h.u....h...l......s.....s.....h......l..........s...h..h....h...h....s....p....h....h.t+....cl....G.G.hTGDslGA...s...h...clsElshLls................................................. 1 189 397 511 +1596 PF02283 CobU COBU; Cobinamide kinase / cobinamide phosphate guanyltransferase Mian N, Bateman A anon Pfam-B_7022 (release 5.2) Domain This family is composed of a group of bifunctional cobalamin biosynthesis enzymes which display cobinamide kinase and cobinamide phosphate guanyltransferase activity. The crystal structure of the enzyme reveals the molecule to be a trimer with a propeller-like shape [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.98 0.71 -4.84 192 2330 2012-10-05 12:31:08 2003-04-07 12:59:11 11 13 2134 6 530 1759 319 165.20 37 82.22 CHANGED hLVoGGARSGKSpaAEpLs.....hphs....tps.hYlATupsh....Ds....EMppRItpH+pcR...sspWpTlEp....s..h...cLsp.sL..tp.tst......pslLlDCLTlWloNllht.p.........................p...shptphppLlssl...pp.hssslllVoNEVGhGlVPpssluRtFRDhhGplNQplAstAccVhLlluGlPl....pl .................................................hLlhGGARSGKSpaAEplh.........................tp............tpl..hYlATuphh........Ds......EM..ttRIt.pHppp....R.........ssp...Wp...Tl...Ep......h...................cLsp.hl............pt....ttt.............tpslLlDClThaloNllht.s...............................tp..slp.tcl.pp.L...lsul......pp.....ps.u...p.l.llVoNEVGh.GlVP.tsp.lu.R.pFRDhtGclNQtlAstA-cVhlVVuGlslp.................................. 1 172 358 453 +1597 PF00241 Cofilin_ADF cofilin_ADF; Cofilin/tropomyosin-type actin-binding protein Chris Ponting, Joerg Schultz, Peer Bork, Finn RD anon Prosite; SMART; Domain Severs actin filaments and binds to actin monomers. 21.10 21.10 21.20 21.10 21.00 20.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.70 0.71 -4.25 16 2431 2012-10-01 21:06:05 2003-04-07 12:59:11 15 42 403 61 1450 2235 7 121.70 20 53.52 CHANGED ssspssap-l+tscp...hphllapIscs..ppplhVc......p........ss...tssssa--........Fhppls.....tp-scYulaDhchp.........suphsKlhFltWs.PDs..A.l+pKMhYAS.oKssl+ptLsu...lphplQuo..Dhs-ls.c-tltp+hp .................................................................t.......h.pht...t.t.....hphl...lhplptp......pppl..hlp............p.............h.........ssshc.c.......................h.h.p.p.Ls............psp.s...p....Y......sl.a.c..h.ch.pt....................ssp.h.s.c.h.l.h.l.h.Ws..P.-.s...........u.....s...l.+pK...MlYAo..s+....ss...l...p.c..t...l.ps.........hph..pl.p.us......s.p-ls....p.l.t...t................................ 1 449 733 1114 +1598 PF00963 Cohesin Cohesin domain Bateman A anon Sarah Teichmann Domain Cohesin domains interact with a complementary domain, termed the dockerin domain. The cohesin-dockerin interaction is the crucial interaction for complex formation in the cellulosome [1]. 21.40 21.40 21.50 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.32 40 1118 2012-10-01 21:34:18 2003-04-07 12:59:11 13 97 144 45 199 1131 188 151.00 25 40.86 CHANGED lplslssssu.psGssVsVPVslssl.ss...lushshslsYDsslLphssspsGs..............llh..sssssFsss.tsssuhlshh..Fhcso.husphIspD.GlhAslsFKVpssssss.............sslshssshsasssshsplsssh.ssGslsV ..........................lplslucssA.csG..sphsV..sVolucl...Pss....Glpsh.sFslpYDs.slLpl..sslp......sGs.h................................................hh.s.ss...u........sh........a.p...s...s.....p.s..s..cG.h..lshh....aosss.tsuu.h..hl.ps-..Gl..FsTIsh....pVsusss.sGs.........................s.lp.h...................................................h................................................................................................ 0 105 188 196 +1599 PF01410 COLFI Fibrillar collagen C-terminal domain Ponting CP, Schultz J, Bork P anon Pfam-B_464 (release 3.0) Family Found at C-termini of fibrillar collagens: Ephydatia muelleri procollagen EMF1 alpha, vertebrate collagens alpha(1)III, alpha(1)II, alpha(2)V etc. 26.80 26.80 26.80 27.10 26.70 26.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.73 0.70 -4.92 36 1510 2012-10-01 23:56:02 2003-04-07 12:59:11 13 94 127 0 533 1087 7 167.00 40 15.57 CHANGED l+sPpG.o+csPARTC+DLphsHP-....hpsGpYWIDPNpGsst.DAI+VaC..shps..GETClhPs...spl.ppsWappps....ttsWFupphpGs.................pFsYsss..........slQlsFLRLLSspApQNlTYHC+NSlAahDpts.sshc+AlhLtGuNDhElps-sssphpYpVl.pDGCpp+supauKTVlEa..cTp+spRLPIlDlAPhDlGsscQcFGl-lGPVCF .........................................................ppP.G.opcsPARoCcD.Lphs..a..P....c....h..s....GpYW.lDPN.p.G.Cs..Du.h+VaC..Nhps....G-....T.Clh.sp......tt.h...tta...tt...............ahu........h....t..t......................hpYss.......s....hlQh.sFL+LLSspApQslTYpCpsohu.a..h..-.....t..........ts....s.s.h.c...pu.lhh.G..u........N..-..................E...h.p.h..c..........sp.h.......h.......p........s.h.....DuCp.......p...p..Gt....t.cTlh-h..pT........psppLPllDlt.....Dh.G.ts.s..p.c..aG..h-lGPVCF................................................................. 0 127 171 312 +1600 PF01024 Colicin Colicin pore forming domain Bateman A anon Bateman A Domain \N 23.40 23.40 23.40 29.80 21.20 23.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.98 0.71 -4.71 14 176 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 93 7 11 176 0 170.20 39 38.52 CHANGED pcppcpsEpphlpcAs-hhsuhhcclsEphGcKacplA+-lAsshK...GKpIRSh--AhuShpKhtsN.shKhstsDR-AIsNAhculshp-hAppLsplSKuFKhsshshpstclhpchhcuhcTGNWtPLhlclEohslushAouVshhlFShslG..........sslGllGI.llsulluuhID-shlsplNphl ...............ptppptE.phlpDAlchsssFhcplsEcaGtKhpplAcElAspu+...GKpI+ss..-.....-ALpua-Ka+s.slsK...KhstpDRtAIspAL-ulphpch....upplspFuKuhthsu+shphhDlhs-hhcuhcTsNW+PlFlclEshssusuA....oslsuhh.FSlhhG...........oslGIlGh.ullhusluAhID-phl-plNchl............. 0 1 3 8 +1601 PF03857 Colicin_im Colicin immunity protein Finn RD anon DOMO:DM07441; Family Colicin immunity proteins are plasmid-encoded proteins necessary for protecting the cell against colicins. Colicins are toxins released by bacteria during times of stress [1]. 25.00 25.00 27.20 29.20 22.20 22.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.73 0.71 -4.43 4 38 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 0 3 37 0 130.20 56 79.78 CHANGED cshlLcpIhspsp.LPSITShaNPlMTclMslYsKTAPhhAllLFIhTaKpRcLIpposRptVL+SChhuslhYAshlYlhhFpNhELTTAG+sh+LhspN.ssTLhlhYhulYhhIhahoYhsLhsPlhshKhhKtRQ ....aSpVLYhLY.NKlu.LPSITSLasPlMTpLMSsYsKTAPVhGILlFLCTYKTREIIKPl....o.RKL...VlQSCF..WGPVFYA.ILI..YITLFYNLELTTAGu.FFKLlS+N.slTLFILYsoIYaoVL.TMTYAl....LLhPlLVhKhFKsRQ.............. 0 0 1 2 +1602 PF01320 Colicin_Pyocin Colicin immunity protein / pyocin immunity protein Finn RD, Bateman A anon Sarah Teichmann Domain \N 20.90 20.90 21.10 21.80 20.50 20.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.82 0.72 -4.12 29 341 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 189 47 44 217 1 84.90 43 95.53 CHANGED Mc.hKp..plpDYTEsEFLpFlpclhps...p..hss--....hspLlcHFpclTEHPsGoDLIaYPps...st-DSPEGllcplKcWRAuNG+PGFKp ...............hKpplpDYTEpEFlEhlpclhss..p.........ps--........hhDsLlcHFp.+.l.TpHPssSDLIaYPpp...pt--pPEsIlchlKEWRtppGhPhFKp.................... 1 3 11 21 +1603 PF02674 Colicin_V Colicin V production protein Mian N, Bateman A anon COG1286 Family Colicin V production protein is required in E. Coli for colicin V production from plasmid pColV-K30 [1]. This protein is coded for in the purF operon. 25.80 25.80 26.00 25.80 25.60 25.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.05 0.71 -4.35 164 3878 2012-10-03 02:02:08 2003-04-07 12:59:11 11 7 3737 0 790 2303 1930 149.10 22 74.52 CHANGED lDllllhllhhuslhGhhRGhlpplhulhuhlsuhhlAhhats.lushlst.......................................stthtthlua.hl...lFlhshll.stlluhhlsp.....hl.ph..ssluh.hD+lhGslhGhl+uhlllh.lllhlhshhshst.............................hhppShlhs.hlt......shss ........................................................lDhlllhllshu.slhGhhRG....hltp...........h...........luLl..uhls...........uhhlAt.t...a.h.s..l.us.h.l.st...............................................................................t.s...phht.s...sluh...hl...lFl...hs...h...ll....stllshhlsp...........ll..pt.....ssLut....hDR....l....lG.s....l....h.G....slcuh....lllh.llh.h...lh...s.h.h.s.hsp.............p.................hppShlh..hh.....s........................................................... 0 259 520 663 +1604 PF01114 Colipase Colipase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports duplication of common fold with Colipase C-terminal domain. 19.40 19.40 19.60 24.90 19.10 17.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.14 0.72 -4.47 9 50 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 36 7 28 47 0 39.20 63 37.13 CHANGED PRGllINLEsGELClNSAQCKSpCCQHsosLuLARCs.KA ........PRGlIINL-sGELClNSAQCKSp.CCp+souLuLARCssKA.... 0 3 3 7 +1605 PF00325 Crp crp; Bacterial regulatory proteins, crp family Finn RD anon Prosite Domain \N 20.50 14.00 20.50 14.20 20.40 13.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -7.03 0.72 -4.47 12 2310 2012-10-04 14:01:12 2003-04-07 12:59:11 15 6 1382 67 409 4399 727 31.70 52 13.95 CHANGED Lsho..Rp-IAcaLGhThETVSRhls+LpcpuLI ..........lphT..Rp-I.GphlG.h.o.hETVuRlLpphpcpsll............. 0 79 197 303 +1606 PF02740 Colipase_C Colipase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports duplication of common fold with Colipase N-terminal domain. 23.70 23.70 24.60 23.80 23.60 23.60 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.38 0.72 -4.21 5 53 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 32 7 26 45 0 44.20 69 42.92 CHANGED ENSECSPpTLYGIYYKCPCERGLTCEGDKTIVGSITNTNFGIChD ........ENSECSsc.TLYGlYYKCPCERGLTCEuDK.oIVGoITNTNaGlChD................ 0 2 2 4 +1607 PF03047 ComC COMC family Mifsud W anon Pfam-B_2107 (release 6.4) Family This family consists exclusively of streptococcal competence stimulating peptide precursors, which are generally up to 50 amino acid residues long. In all the members of this family, the leader sequence is cleaved after two conserved glycine residues; thus the leader sequence is of the double- glycine type [2]. Competence stimulating peptides (CSP) are small (less than 25 amino acid residues) cationic peptides. The N-terminal amino acid residue is negatively charged, either glutamate or aspartate. The C-terminal end is positively charged. The third residue is also positively charged: a highly conserved arginine [2]. A few COMC proteins and their precursors (not included in this family) do not fully follow the above description. In particular: the leader sequence in the CSP precursor from Streptococcus sanguis NCTC 7863 Swiss:O33758 is not of the double-glycine type; the CSP from Streptococcus gordonii NCTC 3165 Swiss:O33645 does not have a negatively charged N-terminus residue and has a lysine instead of arginine at the third position. Functionally, CSP act as pheromones, stimulating competence for genetic transformation in streptococci. In streptococci, the (CSP mediated) competence response requires exponential cell growth at a critical density, a relatively simple requirement when compared to the stationary-phase requirement of Haemophilus, or the late-logarithmic- phase of Bacillus [1]. All bacteria induced to competence by a particular CSP are said to belong to the same pherotype, because each CSP is recognised by a specific receptor (the signalling domain of a histidine kinase ComD). Pherotypes are not necessarily species-specific. In addition, an organism may change pherotype. There are two possible mechanisms for pherotype switching: horizontal gene transfer, and accumulation of point mutations. The biological significance of pherotypes and pherotype switching is not definitively determined. Pherotype switching occurs frequently enough in naturally competent streptococci to suggest that it may be an important contributor to genetic exchange between different bacterial species [2]. The family Antibacterial16, streptolysins from group A streptococci, has been merged into this family. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -7.19 0.72 -4.51 20 666 2012-10-02 23:56:30 2003-04-07 12:59:11 9 4 281 3 31 212 0 29.20 45 61.52 CHANGED MKp.........ppLppFppLs-c-LppIpGGshhtphh ............Mcp........h.pL.ppFppLoscELQcIpGGthh....h...... 0 6 10 15 +1608 PF02247 Como_LCP Large coat protein Bateman A, Mian N anon Pfam-B_2294 (release 5.2) Domain This family contains the large coat protein (LCP) [1] of the comoviridae viral family. 27.10 27.10 27.70 33.70 26.10 27.00 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.04 0.70 -6.06 10 129 2012-10-04 01:49:40 2003-04-07 12:59:11 11 4 22 5 0 137 0 314.50 38 47.63 CHANGED pE.sLhpLSLDDTSSl+GohLpTKlApo+llLsKsMlGGTlLpsshLsshLscushRAolsLhRTHVIpGKI+slAolNls-NTGCuLAlsaNSGlcGt.huTDIYThsSQDuhlWNPACcKss-aoFNPNPCuDuWshtFLp+T+sH..hsVpCVoGWTsoPhTDltlslsWaIssphCVP+phslusspssFslNRWMGKLoFPQGsspVl+RMPLuIGGGAGsKsAILMNMPNAhlSLaRYF+GDllFElTKMSSPYIKATloFFIAFGslo-chsN...LESFPHKLVQFuElQc+T.TlsFoQpEFLTAWSTQVhssssspuDGCPaLYAllH.DSsoSTIEG-FslGVKLlsI+sasuhGpNPGhpGoRLLGuhu ..............................h...p.uo.h.phlhpshh.lshshssGshlhsthLs..L.spts.htsh.hphhphh.uplhshhshtls.ssGhuLhhsaspG.c.ut.hso..slhphhu.pphhWNPAhp..hph.hpP.sCsDhWshpaLtpspht..hsl.slotWhssPhsDhphohshahps....p.slPc.....ph.sshp.tsshhhp+.hGpLsF.QG.ppshhph.lshGtstsstptlh.shssAhhuh.pYhpuslhh-lhhhSSPhItuThuhhlshG.sh.cphsN...h-uh...PHh.hpFuchpcps.slpFsp-.Fhshhohphhs.sshptDssshhaslhp.DusuSsl.G-hshtlthp..tshphhGhssGh.sshhhh.................................. 0 0 0 0 +1609 PF02248 Como_SCP Small coat protein Bateman A, Mian N anon Pfam-B_2294 (release 5.2) Domain This family contains the small coat protein (SCP) [1] of the comoviridae viral family. 21.90 21.90 23.00 21.90 19.10 18.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.93 10 94 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 21 5 0 104 0 162.30 36 20.83 CHANGED ssps.sVYsshhhlcTPlss.osppsFuuFThDLlsusISsDuoG.NWshslhsSPIuNLL+TAAWK+GTIHhQLhhpG.AuVKRSDWuuosplsLppuhuscuhsARoWhIocP+uu-lpFslEIsGPNNGFEMhsSsWANQTTWaLEhlIsNP+QhslFElsh+lspNFEVAGNsLhPPlsLS ..................p............................................u.s.shcl.pSPhspLLpssAWh+GTL+aplVhcG.uuschusapspsQlslosspss.pohsup.pa...s...ho..pPtShELpFsh-lsGPssGFc.Mh.s.h.uspp.ahLphtlsNs+p.ssh.l.uthspDhchAGp.........t.......................................... 1 0 0 0 +1610 PF01257 2Fe-2S_thioredx complex1_24kD; Complex1_24kDa; Thioredoxin-like [2Fe-2S] ferredoxin Finn RD, Bateman A anon Prosite Family \N 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.58 0.71 -4.60 178 4533 2012-10-03 14:45:55 2003-04-07 12:59:11 14 41 2679 24 1588 3500 2480 131.40 29 53.12 CHANGED lcphls+Ys....pppuAllslLphsQcp.......h..G.alstsslphlAchLs..lshhcVhpVATFY.ohFp.....hpPsG+.ahlpVCssosChlp..Gu-clhcshcccLGl..ph.G....cTT....sDGpFoLppVcCLGAC.spAPshhl....sc..........-..hapcL.Ts-plppllcphp ...........................................................h..t.htpa.........t.tushl.hLphhQpt.............G.als.tshthlAchL.t..hs..cl.tVsTFY.s.h...........hp.P.sG.....+.....ahlplC.....su......s........s........C.tl..p...G.u.pp.lh.pt...l.....c...c....c......L.s.l.........p.....s..................p.so................D....G.p....hol.p.ps.p.C.lG.sC.....s....p......u......P..s.hhl......ss......................c....has..p...l.o...s...-.pl.t.cllcph...................................... 0 643 1141 1390 +1611 PF00346 Complex1_49kDa complex1_49Kd; Respiratory-chain NADH dehydrogenase, 49 Kd subunit Finn RD anon Prosite Family \N 20.40 20.40 22.50 20.50 20.10 20.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.74 0.70 -5.40 7 7353 2009-09-12 08:12:15 2003-04-07 12:59:11 14 22 3508 15 1751 5117 3078 185.50 28 55.80 CHANGED DlGAhTPhhahFcEREclh-haEtsoGhRhHsA.ahhhhGVtpDLPhGhhDclh-asc.F..tlD-h-chlTpNpIahpRlpGlGhlott-AlsaGhoGsMLRuSGltWDlRKspPY-sY-ph-a-lsh.sttGDCasRYLsRltEMRpSl+IlpQslpphPsGP...pl-th+hp.s.+schppphEslIt..hhho.saplPsG-sYstlEuPKGEhGlYLsuDso.hPaRh+lRsPuFsHLphlsth.+GhhLADlhAllGolDlVhG-VDR ..........................................................................................................................................oshh.ha.p...Rp..h.......phhEhhsG.Rhh.s.h.h.GGl..t.....D.l..............................t....................................h.t.h...h.p...h..t.t.l.t.p.h...p.hh....ts..h..tR........sluhhs.p.A.hthu..sGshhRu.o.....G.h...t.hDhR..t.....p...s.a.....hY...........t.t.........h.......h......p......l...........h......t.........tt..................sDshsRhhl+htEhhpShphlp.s...lp.......h.........t......s..................................................................................................................................................................................................................................hEss+G.....h.a..h.s.t..s.s.......h.a.Rh+hRsssa..p.h....h...h....t....luDh.hhhuo.s.hhsthD..................................................................................... 0 581 1119 1471 +1612 PF01512 Complex1_51K Respiratory-chain NADH dehydrogenase 51 Kd subunit Bateman A anon Pfam-B_780 (release 4.0) Family \N 20.50 20.50 20.80 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.86 0.71 -4.48 123 5590 2012-10-02 20:27:15 2003-04-07 12:59:11 12 89 3181 15 1614 4537 2950 161.60 39 31.64 CHANGED lccuGlsGhGGA..GFPTtlKhssssc...........chlllNusECEPhlpsDctLhpcpscp..llcGhtlhtphl.uupcshIulcsphscAlpslppA......lp....ptpt.................lclthhsstYPsGsEptLlpslpG+..........tlPps....t..hP...hchGl..........lVpNVpThhslhp ....................................l+puGL+GRGG....A....GFPTGlKWphhsc.p..............................cYllsNADEsEPGshpD+tlM..cp.p..P..ap..................llEGhhIuua.Al.t.Ap.puaI..al+.uEa...p..Ahp.tLppA............ltcu..pp.shlt..........................................hthplpl+hG.s.G.tYlCGEEoALlpSLEG+.........tuhP+s..........+..PP.....ushGlh...............tPTllsNVETlssl.......................................................... 0 615 1120 1387 +1613 PF00668 Condensation DUF4; Condensation domain Bateman A anon Pfam-B_130 (release 2.1) Family This domain is found in many multi-domain enzymes which synthesise peptide antibiotics. This domain catalyses a condensation reaction to form peptide bonds in non- ribosomal peptide biosynthesis. It is usually found to the carboxy side of a phosphopantetheine binding domain (Pfam:PF00550). It has been shown that mutations in the HHXXXDG motif abolish activity suggesting this is part of the active site [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.70 0.70 -5.57 42 20609 2012-10-02 12:01:53 2003-04-07 12:59:11 15 1841 2351 9 7271 22090 414 273.20 20 26.78 CHANGED hpshhPloshQcthhahpphpssss.....uashsshlchps..sl-hpplcpAhppllp+H-uLRhhahpppt......hQhlhpptphplhhhphhs.........pph.chhhpc..hppsacLppu.PLh+stlhphtcs.pthlhhs...hHHllhDGhShsIlhc-ltphYpshp.......Lsshs............spapsau.hhpphtppt.hpcptsYWtphhpph.s......h.lstchspsstpshpst....plphslstt...htptLpphspppssolhslLlusaslhLpcasupscl.llGs.hsGRsp.....sslpphlGhFlshlPlclchp .......................................................................................................h....hslo.h.Qp.t.h.h....h....h...t...p...h..t...........tss.....................ta.s.....h........h...h..h.....c......l.....p.................u......t..........l...........c......h.s........t....Lp....p....A..h....p....t.l.l..p..+..H..p..h..L..R..s...t.ah...tpss.....................t..h..Q....h................l....h......t.........t....h.......................h.....t.......h............h..h..s..h..t.t....................................tt.h...p.h..h.t...p.p.......ht..p...s...h..........c..l......t...........p.......s..........s..........l.......h....+....h......t...l......h............p............h...............s..............s................t.......p.............t..............h............lhls..................hHHl........lhD....G...h.S.h....t.l.lh..p.-....l....t....t..h...Y...psht....................hs.s...s......................................h.p.a...t..s...a....s......t...h..............p.....t............h............h..........p........s....................................h.......t.....p......p......h.........s.....a.....W...p....p.t.....L....t....s....h.s.s.....................................l...s.....h....s....h....s.......t....s...s....h....t...s..h...t...st...................ph..p.h.p....l.stp......httt.l..p..p...h....s....p.......p.....t.......t........s.....o.....h......t..l..l...h.u.u.h.....u......h......h.........L...............t..........c............h.............s.........s............p.............p..........-............l.....s........lG............hs.h......s....u.Rtt.......................s...s...h..p.p...h...lGh..Fhss.lsl+hp..t........................................................................................................................................................................ 2 1552 3788 5729 +1614 PF00029 Connexin connexin; Connexin Sonnhammer ELL anon Prosite Family \N 20.60 20.60 20.60 20.70 19.20 19.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.33 0.72 -3.98 65 1256 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 120 6 656 1028 0 102.60 49 33.51 CHANGED sWuhLtpLL-.pVppHSTslGKlWLoVLFIFRIllluluuEsVWuDEQScFsCNTpQPGCcNVCYDphFPISHlRaWlLQlIhVSTPo..........Llals....Hshaphc..+cc+tcp+ ........................sWshLtplLp.t.Vpp...HSTslG....+.lW..LoVlFIFR..llllssAuEsVWu.....DE.QucFs.C.N.TpQPG.....Cp.NVC.YDphFPIS.HlRaWsLQlIhVSoPo..........Llahs..Hshaphpppc+....t............................... 0 61 128 329 +1615 PF03508 Connexin43 Gap junction alpha-1 protein (Cx43) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 37.00 37.00 23.60 22.80 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.38 0.72 -6.56 0.72 -4.69 3 90 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 65 1 24 85 0 20.00 92 5.88 CHANGED RsTuSCRNYNKQASEQNWAN ..RNNSSCRNYNKQASEQNWAN 0 1 3 8 +1616 PF03509 Connexin50 Gap junction alpha-8 protein (Cx50) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 48.60 46.60 17.30 16.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.30 0.72 -3.67 4 47 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 33 0 25 40 0 66.60 70 15.69 CHANGED IQKAKGY+LLEEEK..slSHaFPLTEVG.hEsu.Lsu.sFptFEEK...uhuPhcDhS+sYDETLPSYAQs ......IQKAKGYQLLEEEK..IVSHYFPLTEVGhVETSPLsA.PFspFEEKl...uTGPLu...DlSRuYpETLPSYAQV. 0 1 4 10 +1617 PF03601 Cons_hypoth698 Conserved hypothetical protein 698 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 24.50 24.50 24.60 25.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.00 0.70 -5.78 13 3130 2012-10-02 17:06:44 2003-04-07 12:59:11 9 5 2815 0 627 2177 1234 301.90 31 87.76 CHANGED sGLLLshllullAhh.......Luph....hhstluAhslAIllGhlluN..ahphppphpuGltFupphLLRhGIlLhGhpLolspluslGhpullhsslslhuohllshalG.+hltLD+phuhLluuGoSICGAuAlhAspPVl+Acscc...VuhAIuslslFGTluhhlYPslhshhuhs......sctFGlahGuolH-VApVsAAGtthuspsss......sAllsKhhRlhhLuPhllhLuh..hhs+ppptutspst...+hs.....lPaFlluFlllullsohhhls........ssllshlsslsoahllhAMAAlGLssslstlt+sGhKPLlLu .....................................................Glhl.shlluhhuhh....................lup.........h...hsh..luuhs..lAIllGhlls....s.....hh..t.........h..........p...................p..h....p.s..G....l.p....Fu....s....cpLL+huIlLhGhpL........shsp.......l......hs..lGh......t......u..l......l......h.......s.ll...s.....lh.....soh.....l......l.....sh.a......lu.+....hh....tl..D..c.c..h.uhLlu..sGouICGuu..AlhAsuPll...+...A...c...s...cc...........suhA.lus...lslaGoluhhlaPhl......h.s..h..h.shs........spsaGlasGsol.H-....lup.VlAAutsh...u...s..put.s...................hA.s....lsKl.h.RVh..h.....L...sP..lsl.ll.uh..........hh.p.....p...p..pp....t..s.psptt..................+hs.........................lPaFllh.Fl...ls..ul.l.so..hh.....h...ls............................ps....lh.s..h...l.....t.....p........l...s.phhlshAMuAlGLssplpsl.t.c..s..G.sKsllh................................................ 0 209 414 530 +1618 PF03602 Cons_hypoth95 Conserved hypothetical protein 95 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.12 0.71 -4.89 27 4548 2012-10-10 17:06:42 2003-04-07 12:59:11 10 7 4379 14 1006 14823 4683 175.00 31 90.57 CHANGED hRIluGph+G.......RpLts.su.pshRPTsD+VREulFNhltsh......lststhLDLFAGSGuLGlEAlSRGAppslhlEpstcuht.hlccNlptLth........thlhpssthth.phstps..FDlVFlDPPYtps....ppslphlsppshLp.....suhlhhEptpc.ttl.p.ssshphh+c+thGpsplphat .............................................................hRIIuG.p.a+GRp.L....s......s....p..u....t..s...h...RP..T.o.D+..V.+.E.slF.N...h.Lssh................hp.s.u.p.s.L..D..L..FA...G.S..G....u..L..G...l......E...A...l.....S.....R.....G...A....s.......p.....s...........s.....h......l..Epst..pu..hp.....hlp....c..N......l....p....t....l.c.hps.......................hpll.p....t...s......s.....t............p.........s.........l............h......t......h......................................t.......s.................t.....t........F...D......ll...F.l..D...P.....P........Y...pp.......s........h.......h..............p.p........s.......l.p........h........l...t....p........p........s........h.L.s.......................ps..u..l..l.h..s..E.p..s...p..p..........t...h...s.......p............s....s....s....h....p....h.h.+.c.+thGpsthphh................................................................................... 0 343 672 859 +1619 PF04234 CopC CopC domain Kerrison ND, Finn RD, Bateman A anon COG2372 Domain CopC is a bacterial blue copper protein that binds 1 atom of copper per protein molecule. Along with CopA, CopC mediates copper resistance by sequestration of copper in the periplasm [1]. 22.60 22.60 22.60 22.90 22.50 22.50 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.20 0.72 -3.36 162 1651 2012-10-03 16:25:20 2003-04-07 12:59:11 7 10 1283 12 405 1175 336 96.80 35 42.58 CHANGED HupLhuosPAssuslssuPsplpLsFsEslp.......thuslpls.sssG.pshsssp.......spsss........pshslslss..Lss....GsYpVpW+slS.sDGHshpGsasF.sVp ..................HApLppssPAss.up..l..s..sA.PptlsLsF.....oEslp.s....................sFou..spls......ssps....c..s..lpsts..........................s.phst.....tstpplhlsLsps....Lts................Gs.YsVsW+lVS.s.DGH.spGpaoFoV.................................... 0 101 242 333 +1620 PF00127 Copper-bind copper-bind; Copper binding proteins, plastocyanin/azurin family Sonnhammer ELL anon Prosite Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.54 0.72 -3.77 31 1467 2012-10-02 17:41:00 2003-04-07 12:59:11 15 49 912 431 532 1917 1988 103.80 25 50.66 CHANGED spstlsscsus.hsFsssslslssG-plhalN.........sshsHNlVhsc.........DtlsuGs-sstlphsc.....cshl.usGEshSVThs...tsGs..YsaaCo.P.HtuhhMhGpVsVp ........................................................................................h.....thttss..htFp.P...s....t...lplps.G..s..s..l..p..a..h..s..................ss.hs..H...Nhshsp......................................sth..s...t.s.h.pt....hh.p..t.....................................t.h.......t......s...G..c..s....h..s..l..T..Fs.....psGs...Ypa.....h.....Cs..P....Hh...s.h....s.MhGplhV................................... 0 126 321 455 +1621 PF00649 Copper-fist Copper fist DNA binding domain Bateman A anon Prosite Domain \N 25.00 25.00 39.10 37.80 24.80 24.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.38 0.72 -4.76 23 240 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 128 1 176 247 0 39.50 57 8.31 CHANGED M.llIsGpKaACtsCIRGHR...uSoCpHs.-R.Lh....cl+pKGRPs ................M.lIsGtKaAC...EsCIRGHR...sSsCpHs.D...RsLh....pl++KGRPs...... 0 46 97 155 +1622 PF01218 Coprogen_oxidas Coproporphyrinogen III oxidase Finn RD, Bateman A anon Prosite Family \N 20.00 20.00 22.60 22.50 18.90 18.40 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.90 0.70 -5.86 93 2155 2009-09-11 07:50:01 2003-04-07 12:59:11 13 8 1940 24 700 1719 2513 276.40 51 93.10 CHANGED pplcsalhsLQccIspuLE....th.D...G.....ts..pFttDsWpR...t..pG.....GGGto+VLpsGpVFEKuGVNFSpVhGsthPsuAost..Rs-l.....uGt................sFpAhGVSLVlHP+NPalPTsHhNlRaF..lu...ptp...spsssWWFGGGhDLTPaYs.....a-EDshHaHpss+sACssa.GsshYP+aKcWCD-YFaLKHRsEsRGlGGlFFDDLs...p....hs...F-psFuFhpulGcuFLsAYlPIVc+R+stsau-cERpFQlaRRGRYVEFNLVaDRGThFGLQoG.GRsESILMSLPPhscWcYs.apPpsGS.EAcLhc.alts+-Wl .........................p.tscpalhpLQDpICptLptl..D.......G........................upFhcDsWpR...p.s.G.................G.GG.po.....RV.........lp.............s..............G..............s...............VFEpuGVNaSpV.aGp.hhPs.uAsst...R.sch...........uGt..................................sF.AhGlSLVlHP+NPalPTsHhNsRaF....hs.................p...t....................st.ss........sWWFGGGhDLTPa.Ys.....a-....E.....D...shHaHpst+s.......hC.p..........s.a.......u....t....s.............h....YP+..a...KcWCD-YFaL+HR.s...EtRGlGGlFFDDL....s...s.............................s.....a-.p.sF.uF.hpuVGcuahsAYlPIVc+R..+..s..h.....sasEcERpaQLhRRGRYVEFNLVaDRGTlFGLQ.....TG..GRsESILMShPP.hs..+Wc..Ys.............a...........p...P..pss........S.EutL.p.hh...s+-Wl........................................... 1 214 409 570 +1623 PF03232 COQ7 Ubiquinone biosynthesis protein COQ7 Bateman A anon Pfam-B_3545 (release 6.5) Family Members of this family contain two repeats of about 90 amino acids, that contains two conserved motifs. One of these DXEXXH may be part of an enzyme active site. 22.00 22.00 22.00 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.23 0.71 -4.79 47 840 2012-10-01 21:25:29 2003-04-07 12:59:11 8 7 777 0 370 769 1628 164.50 33 78.42 CHANGED thl-chIRVDpAGEhGAspIYtGQhtVLu....csphushlpcMh-QEptHhppFscLltc++VRPTlLtPlWcluGFuLGuuTALlGccAAMACTsAVEssIscHYssQlcpLt......ps-tc......................LtspIpcFRD-ElEH+Dhulpp.sAcpAssY.lLopsI+hGCRsAIhluc+l .........................................t..httlhRVsHsGElsAptl....YpGQh..h.........shp..........pspl+thhp.chhcpEtcHLshhpchlp.ch..ps..R.P.olLsPlWh.s.uuau..l...G.......ss.su.lhGccsuhuhssusEcplspHhsspLcpLs............tp-tc..........................psllcphRpDEhcHt.cpA.......lct.u.u..t..p..h................h.th....t....................................................... 0 111 206 296 +1624 PF04803 Cor1 Cor1/Xlr/Xmr conserved region Waterfield DI, Finn RD anon Pfam-B_6320 (release 7.5) Family Cor1 is a component of the chromosome core in the meiotic prophase chromosomes [1]. Xlr is a lymphoid cell specific protein [2]. Xlm is abundantly transcribed in testis in a tissue-specific and developmentally regulated manner.\ The protein is located in the nuclei of spermatocytes, early in the prophase of the first meiotic division, and later becomes concentrated in the XY nuclear subregion where it is in particular associated with the axes of sex chromosomes [3]. 22.10 22.10 22.40 23.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.66 0.71 -4.39 12 157 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 50 0 76 193 0 115.40 34 52.16 CHANGED sKsLpsKRKRlEshoKsShKuSppKlEplW+TQpspRQKLsp-aSQQhhslhQQW-hDspKhcEQcEKLsNhF+QQQKlhQQuRlVQsQ+lKsl+pLaEQFlKshE-lEKs+-shhpusQpEL+KEMAhL .......................sL.tK+++hph.spsohps.pp+l...cphh+spp.ppRQKlspcaSpphhslhppa-hDhQKhcE...p...c....EK....ls..s..happQQK..hhQQschlQpQ+Lcsh+plh-palKshpslEpsppphh.shpsEhcc.hs.............................. 0 12 14 22 +1625 PF01544 CorA CorA-like Mg2+ transporter protein Bateman A anon Pfam-B_944 (release 4.0) & Pfam-B_3206 (release 7.5) Family The CorA transport system is the primary Mg2+ influx system of Salmonella typhimurium and Escherichia coli. CorA is virtually ubiquitous in the Bacteria and Archaea. There are also eukaryotic relatives of this protein. The family includes the MRS2 protein Swiss:Q01926 from yeast that is thought to be an RNA splicing protein [3]. However its membership of this family suggests that its effect on splicing is due to altered magnesium levels in the cell. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.91 0.70 -5.38 79 8370 2009-01-15 18:05:59 2003-04-07 12:59:11 13 47 4035 34 2466 5606 407 264.00 18 74.31 CHANGED tpshhWlclptsspp..phphLtpt.......hslst.h.hp........hhstppps+h-.......hpsshhlhlpshphspsspt...h.......lshhl.ssshllTlppps...hphh.......pplhppht........ttht.pssttllhtllptlscphhphlcplppclcplEcpl............pppspphhpclht..l+...+p.lspl....+chlhsppphl........phhp......pttthhsppp......lpclhsclpp....lhpthphhp-hlptlt-thtshlspp.......hNchh+hL...Tlloslh.lPhTl..lsGhaGMNht......hP......thphta......hhhhhlhlhhlhshlhhhhh......++ ...............................................................................................................................................................t....hWlpl.t.sptp...pht....ltpt.......ht.l...h..hpt...........hh.p....p.p.ps..+hp..................t.s.h......hh.l..h....h...t...h...h..p.t.ppp..................................l.thhl....t........p.....p........hllo...hp...pp.......hh...............................pthhpph...........................tthh.hss....t..h.lht.l.h.......p.ths......cth....hth.l.cp.lppp...h..p.pl-.ppl.............................ttptp...p..p........h........t..plhp.....lc.........cp....lhhh........................................ppslt.....s.......p.p.hl..................................phhp.................t..h....h...hs..p.p.p.....t..th...........hc.-.l.hp.chpp...................hhph.hch...h..t...ph...ls...t...h...h.......p.......sh...h...u...h...l....stp.............................N.p.......h.......h...c.......h...l..............olh.......oslh...lP....Th...ls..u...h..aGM.N..hp................hP................thp.h.ta..........hh.h.h.s........lh.hh.l.hhsh...hhhh.hh.++........................................... 0 668 1466 2076 +1626 PF03311 Cornichon Cornichon protein Mifsud W anon Pfam-B_3813 (release 6.5) Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.70 0.71 -4.00 31 647 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 286 0 397 527 2 110.50 35 84.83 CHANGED Mu.phh.h.alhulllssshlFhtlaalIhhuDLEsDYlNPI-hCs+LN.hVlPEhhlHuhLslLFLlsGaWhsFL........LNlPllsYNshphhp+.....spl..lDsT.EIF+p..Lstcp+cshlKLuFYLlhFFhYLYp ...............................h.h.ahhullhsss...lhh.laal.....Ih..hs-LcsDYhN.Ph-.Cs......p..........LN.................hllPEhh..l..Huhhsl.ha..L...hstpWhhhh........LN.l.Pl.lhaphh+.hhpp.......tl..hDsTpIhpt.......Lshpp+-uhhKLuFaLl.FFhYLY..................................... 2 101 172 283 +1627 PF04694 Corona_3 Coronavirus ORF3 protein Mifsud W anon Pfam-B_5763 (release 7.5) Family \N 23.60 23.60 25.00 54.80 23.20 23.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.21 0.72 -4.09 4 89 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 40 0 0 78 0 59.00 82 84.35 CHANGED MDIVKSIshSVDAVLDELDsAYFAVTLKVEFpoG+hLVCIsFGDTh.tA..phhu.Lthc MDhVKSIshSVDAVLDELDshhFAVTLKV.FpoGKLLVCIGFGDTh.EAcpKAYAKLtL....... 0 0 0 0 +1628 PF03262 Corona_6B_7B Coronavirus 6B/7B protein Mifsud W anon Pfam-B_4476 (release 6.5) Family \N 25.00 25.00 208.10 119.30 22.20 21.60 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.46 0.70 -5.08 5 357 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 39 0 0 270 0 194.70 87 100.00 CHANGED M.IVllLVClhLuNuhGIKtssQEcDL...HEHPThTWELL-+FVGNTLYITTsQILSLPLGAcV+C-sVEGFsCoWPGFcssAHDHIDFYFDLSNPFYSFVDoFYIul...G-tspKIsLRlVGATPK-KRLNlGCHTSFu.VcLPFGTQIYHD+DMphhV-GRHLECTHRVYFVKYC.PaasHGYCFcDKLKVYDL+Rl+SpKsF-KlNQac+oEL ....................KATTVQsDL...HEHPVLTWELLQHFVGHTLYITTHQlLALPLGSRVECESVEGFNCTWPGFQNPAHDHIDFaFDLSNPFYSFVDNFYI.l...GEGNQRINLRLVGAVPKQKRLNVGCHTSFA.VDLPFGTQIYHDRDFQHPVNGRHLECTHRVYFVKYC.PYNLHGYCFNEKLKVYNLsQLRSKKVFDKINQHHKTEL.......... 0 0 0 0 +1629 PF02398 Corona_7 Coronavirus protein 7 Mian N, Bateman A anon Pfam-B_1574 (release 5.4) Family This is a family of proteins from coronavirus which may function in viral assembly. 22.30 22.30 23.10 47.60 19.00 22.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.39 0.72 -3.53 2 105 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 47 0 0 81 0 77.30 75 100.47 CHANGED MLVhhpAVhlTVLhLLLIGRlQLLERLLLsH.hNLpTVs.......................DFNILa+sLAETRLLhVlLRlIFLVLLGFsCYpLLshLh ..MLVFLHAVhlTVLILLLIGRlQLLERLLLsH.LNLpTVsNVLGVsDssL+VphhQLLKPDCLDFNILa+sLAETRLLhVVLRVIFLVLLGFsCYpLLssLh...... 0 0 0 0 +1630 PF03187 Corona_I Corona nucleocapsid I protein Mifsud W anon Pfam-B_2926 (release 6.5) Family \N 25.00 25.00 75.50 75.20 24.60 16.30 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.43 0.70 -5.17 3 82 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 48 0 0 55 0 183.00 55 98.69 CHANGED MESSRRPLGLTKPSADcIhKIEAEGISPSRLQLL.NPIPGVWFPITLGFLALPNSRRERSLSLQhDKECLLPMESQLLSKRDIGIDTTDVLLKHLMASRSNYCPDGIFTILEQuPMLcsSMATsLTESSGSQlsRhhspPLLTLLKGTQVAMRLFLLGLRPVRYCLRVoMLKAQEGLHLLVDLVRGH..NPVGQIIALEAlPTSASLPLL ....tS.pt.l...p.o..ph.h.tsEp.NPspLhLL.NHpEshh..hI.GSLtL.sFK+.coLNhQhsK.h.LhpES.LLKpRDIGhDTTsVLLKQLMus+SsCspDGIFTIhtQs+MPtpsMssshptSSGSLlT+Lh.lPh.hFppGhQlsMRLFhLGhR.sRasL+shhLKAQEGLhLlsDLl+uH..pPlsQ.hshEsh.s.ts.PLl........ 0 0 0 0 +1631 PF01635 Corona_M Coronavirus M matrix/glycoprotein Bashton M, Bateman A anon Pfam-B_845 (release 4.1) Family This family consists of various coronavirus matrix proteins which are transmembrane glycoproteins. The M protein or E1 glycoprotein is The coronavirus M protein is implicated in virus assembly [1]. The E1 viral membrane protein is required for formation of the viral envelope and is transported via the Golgi complex [2]. 19.10 19.10 19.30 19.30 18.60 18.60 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.48 0.70 -5.39 14 976 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 276 4 0 775 0 199.50 48 97.13 CHANGED h.SN...solshp-llphl+sWNFshsl.ILlhlhllLQaGYss+S+hlYllKMhlLWLLWPLslAlolFsAl..aslN.hshhuhSIlhAslohlhWlhYFlsSlRLapRTcSaWSFNPEoNsllslsl.hGpphshPlhpsssulThsllsGpLhh-Ghclup.sssssLPphlTVApPsshhhYch.u+..s.shsssoGaAhYl+hKt.Gsaptssshpsshs-sppLhp ........................sh...tphh.....hhtpaNh.hsh.hh.hh..hhhlLQaGhsphShhlYhlKMllhWlhWPlslAlslhssh...Ytls.h.VhhuhSIhhAllsh.hhWlhYFVpSIpLa+RT+SWWSFNPETNAlLslsh.hGpphshPl-usPpslThTllsGsLYsEGhKlAsGhsl-cLPKaVhVApPoRphlYph.VGK..p.puussoGaAhY...V+uKt.G.sYs.spshpsshopttpLh.................... 1 0 0 0 +1632 PF04753 Corona_NS2 Coronavirus non-structural protein NS2 Mifsud W anon Pfam-B_3747 (release 7.5) Family \N 25.00 25.00 26.60 179.60 24.00 17.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.68 0.72 -4.11 3 81 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 63 0 0 43 0 108.40 73 99.55 CHANGED MEIWRPShKYLRhTR-FGVT-LEDhCFKFNYCQP+VGYCRVPL+AWCRNQGKFAApFTL+S+-KSa+ppFGVITSFTAYGNTVcEAVSKLVEpAuDFIsWRAppLN+YG ..MDIW+PEhKYLRYTNGFNVSELEDsCFKFNYpFPKVGYCRVPs+AWCRNQGpFCAThTLYGKSK+.YDKYFGlITGFTAFuNTVEEAVNKLVFLAVDFITWRpQpLNVYG. 0 0 0 0 +1633 PF05213 Corona_NS2A Coronavirus NS2A protein Moxon SJ, Bateman A anon Pfam-B_6568 (release 7.7) Family This family contains a number of corona virus non-structural proteins of unknown function. The family also includes a polymerase protein fragment from Berne virus and does not seem to be related to the Pfam:PF04753 Coronavirus NS2 family. This family is part of the 2H phosphoesterase superfamily [1]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.74 0.70 -5.06 4 104 2012-10-03 21:31:47 2003-04-07 12:59:11 7 4 66 0 0 95 1 202.80 52 38.20 CHANGED AYADcPsHFlshPlsp.psFltsahcLQ.....EGhssKhQsAPHISLTMLclpsEDhcpVE....-IlD-Mshspu..lshsNPHMhG+phVhDVcGl-pLHD-lVslLRc+GhssDQoRhWhPHhTIuplpDsuh.sKh.......hpFshpppl..........tch-hVKLGAsKtsuhYEhIso...........sWsupR.LCapsss.p.Sc.htYhsLss...EhptG.l..Ncsss...hShcYpsppahh++V+DpSpa..hRTu ............................AaADKPNHFINFPLspFpGFhhpYhtLQ.Qll-..G..lDCK....lQpAPHlSls...hL.D..Ip..s-pY+sV-....hAIQEllDDhthh..EG..pIpF-NPHh.L.G.R......ClVL.......D.V.+G.VEELH-...DlVNhlRc+GCsADQSRpWIsHCTlAQhs-tslpIKt.........hQF.a.Kh.sh.....N..osAclElVKlGupKh.DGFYpo.hS...........hWhG.R.hpYpPPTsKhu.IhGYCCl-hlRt-LE.GDLP.sD--AWhcLSYHYppNoaFFRaVacpS.Y..FRp.............................................. 0 0 0 0 +1634 PF03053 Corona_NS3b ORF3b coronavirus protein Mifsud W anon Pfam-B_2130 (release 6.4) Family Members of this family are non-structural proteins, approximately 250 amino acid residues long. They are found in transmissible gastroenteritis coronavirus (TGEV) and porcine respiratory coronavirus (PRCV) isolates. These proteins are found on the same mRNA as another product, designated ORF3a. While ORF3a/b has been implicated in TGEV and PRCV pathogenesis, its precise role remains unclear (see [2,3]). 25.00 25.00 25.60 25.30 23.20 23.00 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.42 0.70 -4.81 4 228 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 53 0 0 204 0 211.70 53 99.20 CHANGED MhLGLFphTls.......osVpposppsplSt-sshtlppsVVshRpssplsuFhlsSlFVhFFALFKAoSa+pshhllhh+lLslhlYsPlLhasGAYlDAhllsssLhuRhhalsaasWhYKsacFIlaNoTTLhFlpG+Asaacs+u....aVhL.GGspYlhlGsphVsFVSs.sLYlAIRGpt-uDLpLlRsVELLsGchlYlFSpc.lVGlsNuuFpp..L..........DchhsIS ...MIGGLFLsTLu.......hI.V.s.s.p.sh.l.s.Nhsp.s..slVQQ+pVV.Sup..hpShahEFSIAlLFVhFLALYRSTNFKsCVulLMFKIlSMTLlGPMLIsaGYYIDGIVTT.TVLuLRFlYL.uYFWYlNSRFEFILYNTTTLMFVHsRAAPFhRSSHuSIYVTLYGGINYMaVNDLTLHFV.ssh.l......hlAIRGhscADLpllRsVELLN.....GchIYlFSQEsVVGlhNAAFsph.l..........sc........................ 0 0 0 0 +1635 PF03905 Corona_NS4 Coronavirus_NS4; Coronavirus non-structural protein NS4 Finn RD anon DOMO:DM04795; Family \N 25.00 25.00 42.70 42.70 18.80 16.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.22 0.72 -4.16 3 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 45 0 0 37 0 47.40 70 65.50 CHANGED MPMATTIDsTDYTNIMPoTVSTTVYLGuSIGIDTSTTGFssFSa.............Y MPMATTI-GsDYTNIMPhTVhTTVYLGsSIGIDTSTTG.pshsh....................... 0 0 0 0 +1636 PF00937 Corona_nucleoca Coronavirus nucleocapsid protein Bateman A anon Pfam-B_267 (release 3.0) Family \N 24.60 24.60 29.90 25.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.18 0.70 -5.50 21 1034 2009-09-11 06:29:42 2003-04-07 12:59:11 13 4 296 36 0 939 0 294.10 37 87.79 CHANGED Mu........................plsatsp...stppsppttt......hPh.......................SaFssLptpscpthh.hhsGsGVPhutG.stspphGYWpRQpR..a+.scGppppLss+WaFYYhGTGPaA-hcatc.......cp-GVhWVApcGAcssssu.lGoRsssp...pslss+Fssu..lPpshhlps......pspSRssSRus......SR...upS..RusSts..........uRssS...psRpsss...............lhsslhttLtslshsppp.........................tsppssploppsst-stp....K.caKRossKu..psVspsFGtRussp...NFGsschlctGscsspaPtlAELlPosuAhhFuSclssccps..Dsl.........plsashshpls+csPshppah....pplsAY...........scPpcp.cK.pppp ...............................................................thsh.......................SaFpslp.pphst...hhtspuVP.s.s.ptspQhGYWpRpsR..a+.scGtpK.lsstWYFYYhGTGPtAchpats........p.-GlhWVAtcGApspsps...GsRsssp...-thsh+Fssu..hPps.hh.s..........pGcSttsot..us......S..t....upS....Rs.sS...............RpsSpsRpssu........................................t.sl.hsthtplh.spp.p..............................ps.pphs.p..tpss.-h.p......+.hhKRT.s.t...pVspsFG.Rs.sp......NFGDsch.ppGhpstphsthhphsPostAhhFGSpls.c.ts.sth...........plpaphshhlscscsphpphl....pplsuh................tcPpp.t.t....t............................................... 0 0 0 0 +1637 PF01600 Corona_S1 Coronavirus S1 glycoprotein Bateman A anon Bateman A Family The coronavirus spike glycoprotein forms the characteristic 'corona' after which the group is named. The Spike glycoprotein is translated as a large polypeptide that is subsequently cleaved to S1 and S2 Pfam:PF01601 [1]. 25.30 25.30 25.40 26.50 23.40 25.20 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.94 0.70 -6.20 8 2634 2009-09-11 07:53:46 2003-04-07 12:59:11 11 4 115 4 0 2424 0 374.30 50 73.17 CHANGED hhG.us.VahhpSuh+.PcGaphpshhhhssssos.susssSsQ....sClhul.taupsssssShshTAss..sGhShsss....cFshs.......csNhsctslFlTasapo.hshuCs.ouh......l.puah+IshhKsts...salFYNlTls.......luphPphhp..slsphsSVYlNGahhFTostscsVshusspacoGG.....hThtlhphVcALs.hsNsohpcVIhCD.SPhuhLtCQ.sTuNhsDGFYPhoso....phhlVhppsSVsTh.shpsaTFsNlosu.........PssGGlpohslYtopsspouhhNFNhohlouFsh+sSpFhhhsa......h.pspFpspshNsGhWhNSlslu..hs.hslQussc.ssFu...TsCauhShsGsusshuVausphsp.apshhhlYVoho-GShIpTuspsPlhshpsasNlTLDcCs-YNIYGRsGpGhITNsTsohlut.........Ih.TSsShDlhshpsshusshYpVsPC-ss.pQhVVssGclVGllTShNpTu............p.htN.aYlplhNhT+hhpR ...............................................ts.VahhQSuhR.spGWHlpGGAYAVVNsos.ssNAG....o....up.............p...CT....s..Gs........I.......p.....s..h.......s........s.......Au....Sl...A..MTAP...........pGMuWSps..................QFCoA.........HC..NF.o...-...h....o...VFVTHCaps...suCPlTGh.......l.psaIR.ISAM.+..s..........upL....F..Y..NLTVo..............VuKYPpFKShQCVNNhTSVYLNGDL..V.FTSN.p.TpDVsuAGV.a.aKu..G..G....PlsY.plM+phcsLsaFsNGospcVlhCD..oPhshLt.Cp......shshsDGFYshss.......hhhs.h..pShsTh...hpsaoF.N.osu.........ss...u...s.h.p..s.h...hhts..t.hps...sh.h.sFs.s.h..s..Fsh...ptsp.ahh..................s.F...h....ps..s......Ghh...sslsst.....s..h.shpt.hsp..sFp......phC....h.......uhs......ss......hphsl..hth...t..........phh.lahphptGp.h.ots........h...h..h.hs.CstYslYsh.G.GhIh..s....p...sh.........lh.so.uhphhhhps.....hapl.PCt...tQhsh.ttthhs........................................................................ 0 0 0 0 +1638 PF01601 Corona_S2 Coronavirus S2 glycoprotein Bateman A anon Bateman A Family The coronavirus spike glycoprotein forms the characteristic 'corona' after which the group is named. The Spike glycoprotein is translated as a large polypeptide that is subsequently cleaved to S1 Pfam:PF01600 and S2 [1]. 30.00 30.00 30.20 30.10 29.90 29.00 hmmbuild -o /dev/null HMM SEED 610 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.17 0.70 -6.42 22 1416 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 312 44 0 1305 0 413.10 39 54.14 CHANGED sCs.Ph.....lsYushulppsGulht.lshp.......s.stlsslhs.....tslpIPoNFTlolpsEYlQhpsp.lolDCupYVCssss+CppLLpQYuohCpsIpsuLptsupl-shplhshhoss..puhpls....shpsFsus..................aNhosl...Lsss..........up.t.RSsIEDLLFsKVhouGlGtVDttYccCos...G..tsltDLlCAQhYNGIhVLPsllssphhuhYTuSLlGuhshG....Gl.TuAAuIPFuhslQuRLNalulppsVLpcNQKllAsuFNpAlusIpp..............GhsosupALsKlQ-VVNppupALspLssQLssNFtAISSSIp-IYsRLDtlpA-AQVDRLITGRLsALNuaVoQpLschsclcsSRpLAtpKlNECVKSQSpRhGFCG.sGsHlhSlsQsAPpGlhFlHhshlPopatsVpAssGlClss.....hhhhhsPthulFh......psso..........ahlTsRshapPchhotu-hVplpoCsVsasslsps.lsshl.P-..hhDhscpLscahpsh..spshPshs..hshaNhTh..LNLosEI..............pcLpplIcsLNsohlDLc.LsphEpYlKWPWYVWLsIshullhhlhlLlahhhsTGCCG.hhuChu.....spCtppCp...thpp...h..hcphHsp .......................................................................................................h.lP.thshs...Ehh.h....h.lss..alss....C...h.pYh.hC.pl.t.l...s...s.....hh.........................................................................................................................................thYTsuhhuuhshu....uh.ouAsul.PFuhplQhRlNhlulppslL.cNQchlAsuFNpAluphp...............uhpohs.ALtplQsVVNpputhLsphhtpLppNFtAISSslt-IhppL-tlpApsQlDRLIsGRLsuLsshsotp..phhclptptpLAhpKlsECV+SQS.RhsFCG.pG.HlhohspsAP.GhhhhHhsh.P.t..ph.s.suhChts................hh...t.shh.........t......................ahho.p.ha.Pp..p.tshl.htsCtssahphs.s.h...h..p....shpcplpchhpph..p..hs-...hs......hN.Th..LslptEI..............pclpthhpslNpohlsLp.lshhc.........................hhhh.Tsps..........ssh........ht....................................... 0 0 0 0 +1639 PF00115 COX1 Cytochrome C and Quinol oxidase polypeptide I Finn RD anon Pfam-B_23 (release 1.0) and Prosite Family \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.90 0.70 -5.86 94 254351 2009-09-13 13:30:05 2003-04-07 12:59:11 15 23 108179 109 2006 206187 5121 227.90 58 95.26 CHANGED pcp.luhhYllhuhhhhllGslhullhRhpL..shP...................sh...lssp....saNplhohHushMlahhshshhhG.hu.allPhhluscplshP+Ls.shuaWlhshusllshhuh......hhths...............stsWh.Ys.Phs....................s..hhhhulhlstluullsulNhlsT..lhphRs.uhsh..h.slh.sWuhhssullhlhuh.Pslshs.hhh.h...h...hh..stsussllapalFWaauHPtVYllllP.uhGllu.llsphs.t+.lauaphhhhuhh.uluhlu.hhlauHHhas.sGhshhhpshhsstohhlu..........lPsult...........lasalsT......hhtu....p....hphs.sshhasluhlhhF.hhGGloGlhhuhsslshhhHsTaalVAHaHhslhGuhshshhuulaa....hhPphtGp.....hhsppluphpFalhhlGhslhFhshph.hGlh.GhsRRh....hsa......sst.......htsa.....hlpolG..uhlh ........................................................................................................h.lG.suh.ShlIRhEL....upP...........................suh....lssDQ........lY.Nsl.VT.A.HAFl.MIF.FM.VM....P.l..MIGGFGNWLl...PL..M.lG...A..P............DM....A....F..P...R....M.N....NM.SF.W..LLP.PS.lhL.Lls.So.....h.V.....EsGs.......................................GTGW.T....V.YP...PLu.usluHs............GuSVDLuI.FSLH..LA.GlS.SI.L.G.AI..NFI.T.T.........l..I...........N......M........+..............s..........s...h..o....h...c........p......h.......PL.......F....V..........W.........u........V.........h.IT.Al.L.L.....L.L.SL..P..V..LA.G....A.I..TM.LL.T.D..RN......l.NToFFD...............PuGGGDPl.La.phh................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 538 1218 1661 +1640 PF05051 COX17 Cytochrome C oxidase copper chaperone (COX17) Moxon SJ, Mistry J, Wood V anon Pfam-B_5838 (release 7.7) Family Cox17 is essential for the assembly of functional cytochrome c oxidase (CCO) and for delivery of copper ions to the mitochondrion for insertion into the enzyme in yeast [1]. The structure of Cox17 [2] shows the protein to have an unstructured N-terminal region followed by two helices and several unstructured C-terminal residues. The Cu(I) binding site has been modelled as two-coordinate with ligation by conserved residues Cys23 and Cys26. 21.80 21.80 21.90 22.20 21.60 20.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -8.95 0.72 -3.94 43 302 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 243 7 199 288 2 48.60 48 61.89 CHANGED ppKP...KsCCsC.-pKcsRD-Cll...pGp-p.....CpchIEtaKpCM+uhGFpl ..........p.pKP...hKPCCsCP-TKcsRDp.Cll.....pGp-s........CtphIEsHKpCM+uhGFpl............. 0 65 105 163 +1641 PF00431 CUB CUB domain Bateman A, Finn RD anon Pfam-B_136 (release 1.0) Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.53 0.72 -3.98 29 12961 2012-10-02 11:50:15 2003-04-07 12:59:11 15 883 190 46 7617 11572 76 107.50 26 32.62 CHANGED CGsp....lppssGslpS.PsaPp.sY.ssppClWpIpss.uhp..lpLpFps.F-lEtpt.......ChYDalElhDG.tss.t.hlG+aCG.....pthPssltSsusphhlpFho.DsshsppGFphsa ......................................................................Cut......hp..t.....s....G...h...lp...S...P....s.......a....P.......p........s...........Y......s........s......s.....h...........p.....C..h.......Wt..........I......p....s......s..............s.....p.....p...............l...p...L......p.....F..p......p...F...p.l...E.......................................C....t.......h....D....a......l.......p........l.....h.....-...............G......s............s...............t...............p........s.......................h...........l.............u.......p............a.............C....G.......................s.p..h..P...t....s.....l......h...S......s....u.....s.......p....l..hlpF...p......o......D.....t....s.....h.....s.....t...p..GFphpa.............................................................................................. 1 2339 2836 5004 +1642 PF00116 COX2 Cytochrome C oxidase subunit II, periplasmic domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.47 0.71 -4.28 32 31269 2012-10-02 17:41:00 2003-04-07 12:59:11 15 29 18166 144 1320 28095 2586 106.00 48 47.64 CHANGED lTlKulGHQWYWSYEYsDa.s.....lpFDSYMlPpppLp.sphR....LL-VDNRhllPhssplRhllTu....sDVlHSWslPShGlKhDAsPGRLNQsohhh...sRsGlaaGQCSEICGsNHSaMPIslEul ................................................................lTlKslG.HQWYWS...YE..Y...o..D..a....s..........................l..p.F..D.S...Y.......M....l.........s..........p......p........-.........h..............................s.p........F.......R.............................LL..-.....VD...N.....R.l.....l..lP..hsspIRlllTu......s.D....V....l...HSWs...l....P.....u........L..G....l....K........h........D..A..s..P.....G.....R........L.N...Q...s...s...ahh....................sR...P......G......l...a.a......G.QCS.ElCG......ss.Hu.h.Ms.hhh........................................ 0 349 789 1076 +1643 PF02790 COX2_TM Cytochrome C oxidase subunit II, transmembrane domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family The N-terminal domain of cytochrome C oxidase contains two transmembrane alpha-helices. 21.00 18.00 21.10 19.70 20.80 17.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.03 32 25391 2009-01-15 18:05:59 2003-04-07 12:59:11 10 24 16352 78 555 22832 1763 78.90 47 36.87 CHANGED MsT.hshs...hQDuuSPhMEplhhFHDashhlLhhIhhhVsalhhshlhs......phsp+ahlcGQh.IEhIWTllPAllLlhIAlPSL+L ............................Msp..phs....h..Qs...usSPlMEp.Lla.F.H.DHs..l..h..I....l...lh...I.o..h.l...V....s..Y.lh..h..h.l.h.hN................................K.hs.s.+...h...ll.-u.Qh...IElI.....WTIlPAllLlhIAlPSLRL................................................ 0 162 342 451 +1644 PF00510 COX3 Cytochrome c oxidase subunit III Finn RD anon Pfam-B_78 (release 1.0) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.69 0.70 -4.94 21 13171 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 8530 60 1326 11262 3530 206.40 41 93.84 CHANGED HPFHlVssSPWPlssShuhhshsluhlhahHsap..hhllhlullsllhsMhhWaRDllREuTapGpHThhVpcGL+hGhlLFIlSElhFFhuhFWAFFHSuLuPolELGusWPPhGI..pslNPh-lPLLNThILLoSGsTlTaAHHullpG.pRppul.uLhlTllLulhFThhQhhEYhpAsFTIuDuVYGSsFahuTGFHGlHVllGTlFLhVshhRhhpaHhTspHHhGFEsAhaYWHFVDVVWLFLYlolYWWGu .........................................................................................................................................................................................................................................................................................................t.t..............G....Hs..............l.............u..h.+..h..........G...hh.LF.Ih.S.E.l.hF.....Fhu.a.F....WA..........a......a...................p.........u.................u....L........u..........P..............s.....................................p........l.................G.................s............h....................W..P...........P..............t.......G.......l.................p..s.......h...s....P....h...p...l..........P......L....L....N..Thl....LLuS......G...l.o....lT...........a.........A...HH............u...........l...........h.......p.............s.............p.............c................p......p........s..............h.t.u..........L.........h.l.T..l..l...L.GhhFsh..l...Q...s...h...E.........Y........h..........c..............s...........s.........F................o.........l.......u.......D..............u...........l...........Y.........G......S.......sF...F....h....s..TG...FH.GlHV...ll..G.oh.aL...h.......l..sh....h...R..........h...........h.........h......c.........a........o...........s..........p...........p..........+..........h..........u..........h............c...s.....su.........h.......YWHFVDl.VWl.h.lah.lYh................................................... 1 358 800 1075 +1645 PF02284 COX5A Cytochrome c oxidase subunit Va Mian N, Bateman A anon Pfam-B_7466 (release 5.2) Domain Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit Va. 21.30 21.30 22.00 25.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.43 0.72 -4.10 3 318 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 272 51 176 300 0 103.20 52 69.73 CHANGED uHGoEETsEEFDARYEKYFs+EuIDuWElRKGMNsLlGYDLVPuPKIIEAALRAuRRVNDlAoAIRlLEulKDKsGscKt.lYPYlL-EL+PTLQELGIPThEELGhDK ......................H..pp.E.T.EEFsA.R......a.phFs.sshDsaElp+uhNs.hhuYDLVPpPcllpAAL...RAs...RRlNDaAoAVRlhEulKsKst.s+.c.....YshhlpEL+PshpELGIsh.EELh.-.............. 0 51 86 139 +1646 PF01215 COX5B Cytochrome c oxidase subunit Vb Finn RD, Bateman A anon Prosite Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.69 0.71 -4.50 4 403 2012-10-03 19:45:42 2003-04-07 12:59:11 14 3 288 52 248 395 7 118.00 33 69.68 CHANGED S+ll+t.pslsp.suQsLt.s+tPs..oLltshuptGtVPTDh-QtTGLcR..hLuthpGpDsFshcsLcuo.pGThcDPllVpSlsshRhVGCpGususSHsllWhpltcscspRC.-CGoVYKLp.hus.scphtH ................................................................................................u....tls.....s-..-.p....ATGL.E...RhEl..lu.p.h..p.G.h.D.s......a.c....h.c.s..cus..hG...T.h-s...PhlV.s.Sh..hscRlVGCss....t....t.s.s.o..slhWhhl.c.c.s.c.s.p.RCspCGp.haKL............................... 0 84 137 203 +1647 PF02046 COX6A Cytochrome c oxidase subunit VIa Mian N, Bateman A anon IPR001349 Family \N 25.00 25.00 28.10 27.90 24.60 24.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.92 0.71 -4.17 31 404 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 264 51 247 396 1 102.90 34 78.21 CHANGED Mht.............t..t..........sh+RhhSo......................sstt.......tpta.pphtth.tHutu...soclW++loh.....hlAlPu.lslsulNsa.l..cscHt-HhtH.....-p-phspYsa.NIRoKsF.WGDGsKTLFWNscVNths ........................................thhhts.................................................................t...shtt..c.sts...su.....ch..W+p.loh.....h....lul.Pu.lslshlNsa.l.pcHtcH.....................tp.EhstYsahp..IRoK.sFPWG.DG.s..+.o........LF........aNsclN.h.s.............. 0 67 115 186 +1648 PF02297 COX6B Cytochrome oxidase c subunit VIb Mian N, Bateman A anon Pfam-B_9188 (release 5.2) Domain Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of the potentially heme-binding subunit IVb of the oxidase. 24.50 24.50 24.80 25.40 24.40 24.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -9.88 0.72 -3.96 58 675 2012-10-02 15:44:21 2003-04-07 12:59:11 12 9 292 51 456 646 0 66.70 32 59.76 CHANGED schPs.........pspp+pCapshscaacClcp...pu........................spp................Cphhppsacs.CssuWlchac-p.....pcpshhs ....................RFPs..psQp+pCap.......sas-aa+Clct...psp......................................shss...............Cc.tatcsacohCPts......W....l..ccac-p......tt....t................. 0 125 227 364 +1649 PF02238 COX7a Cytochrome c oxidase subunit VIIa Bateman A, Mian N, Finn RD anon Pfam-B_3023 (release 5.2) Family Cytochrome c oxidase, a 13 sub-unit complex, is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of the heart and liver isoforms of cytochrome c oxidase subunit VIIa. 22.20 22.20 22.50 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.73 0.72 -4.24 8 306 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 111 51 138 301 0 53.40 42 59.85 CHANGED +NKVhEKQKLFQpssch.sYLKGGh.DslLaRlTMsLslGGTuYslhuLGhAuhP+s ......pN+Vs-tQKhFQcsss..lPlaLKGGhsDs.lL..Y..RsT.Ms....L.....s...l...u........Gohhslh.thhhhs.s+t.......................... 0 21 42 77 +1650 PF02285 COX8 Cytochrome oxidase c subunit VIII Mian N, Bateman A anon Pfam-B_6423 (release 5.2) Family Cytochrome c oxidase, a 13 sub-unit complex, EC:1.9.3.1 is the terminal oxidase in the mitochondrial electron transport chain. This family is composed of cytochrome c oxidase subunit VIII. 20.50 20.50 20.60 20.60 20.00 20.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.98 0.72 -4.43 11 112 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 71 51 45 114 0 42.40 43 59.90 CHANGED VpSKPs+p.hushEpAlGloshFVoFLlPAGWlLSHLEsYKKpu .....lpSpPscp.plushEpAl.GLoshFlshLlPuGWlLuHL-sYK+..... 0 5 7 16 +1651 PF02672 CP12 CP12 domain Bateman A anon Bateman A Family The function of this domain is unknown, it does contain three conserved cysteines and a histidine, that suggests this may be a zinc binding domain (Bateman A pers. observation). This domain is found associated with CBS domains in some proteins Pfam:PF00571. 21.20 21.20 21.20 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.40 0.72 -3.51 39 291 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 151 16 114 305 584 62.40 34 39.64 CHANGED ls-pIpcshpcAcpsCups..sSucCssAWDtVEELpAttuHpcpcpp....ppssLEpYC--NP-AsECR.lYDD ...................................tpplppthppActhsstt..ssspsttAW-t.l-ELpsttsHt.tpt......stssLE.....paCcsNP-ssEC+.lY-p................... 0 24 78 103 +1652 PF01383 CpcD CpcD/allophycocyanin linker domain Bateman A, Griffiths-Jones SR anon Pfam-B_887 (release 3.0) Domain \N 21.10 21.10 21.30 23.20 19.30 20.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.36 0.72 -3.79 86 448 2009-01-15 18:05:59 2003-04-07 12:59:11 16 9 103 2 124 392 105 55.10 34 24.05 CHANGED u..RhFplcVsu.........tt...h.R+Sspshl.VPaschspphQcIp+hGG+IlSIsss .........RhF+lcVsu.........tppp.pl.R+ospsal.VPYsphspphQRIpRhGG+IlSIpsh.... 0 14 77 114 +1653 PF00166 Cpn10 cpn10; Chaperonin 10 Kd subunit Sonnhammer ELL, Finn RD anon Prosite Domain This family contains GroES and Gp31-like chaperonins. Gp31 is a functional co-chaperonin that is required for the folding and assembly of Gp23, a major capsid protein, during phage morphogenesis [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.12 0.72 -4.05 48 6057 2012-10-01 22:45:51 2003-04-07 12:59:11 16 6 4946 114 1577 3685 2838 90.70 44 92.00 CHANGED plcPLtDRVllct..h.pt.-p...potuGIllP..-supc..Ks..ppGpVlAVGsGhh..ppGp.hhshslcs..GDpVlascau.............Gs.clch..-.sccalllcppDIlAll ....................l+PLtDRVll+t....h..Es.Ep...cTs.uGIl.lP.....soApE....Ks...ppGc.VlAVGs.G...............p.h...................c..........s............G..........p..h..........hs.h.s.VKs...GDpVlas.cau....................Go.-lch........-.sc-aL.....lhp.EsDILAll................................ 0 540 1020 1322 +1654 PF05205 COMPASS-Shg1 Cps15; COMPASS (Complex proteins associated with Set1p) component shg1 Wood V, Coggill P anon Wood V Family The Shg1 subunit is one of the eight subunits of the COMPASS complex, complex associated with SET1, conserved in yeasts and in other eukaryotes up to humans. It is associated with the region of the Set1 protein that is N-terminal to the C-terminus, ie Set1-560-900. The function of Shg1 seems to be to slightly inhibit histone 3 lysine 4 (H3K4) di- and tri-methylation, and it is a pioneer protein. The COMPASS complex functions to methylate the fourth lysine of Histone 3 and for silencing of genes close to the telomeres of chromosomes [3]. 22.10 22.10 22.10 23.40 21.50 21.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.43 0.72 -3.75 22 260 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 195 0 177 248 0 106.20 31 17.20 CHANGED cpLsctaKKcGtFDphR+clLsch.cpu...........-tcpplpp+ltpllcsclpc-s.plL.+s+G+susLIcGtls+s............................shhpt..........s-pslss..ll-pclpch..tslcphh+ptltc- ................llpphKpcGhFDphR.+-sLu-h.csp..............suhpsLpp+lcshVpscls.cpp......shN+..sphpshl.ctplhcS............................hl..ps............s-ch....lsp....llcs+lsch..hsplcphh+p.l...t..................................................................................... 1 43 76 128 +1655 PF00289 CPSase_L_chain CPSase; Carbamoyl-phosphate synthase L chain, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines [2]. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00988. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117. 25.50 24.50 25.50 24.50 25.40 24.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.17 0.72 -3.96 157 23989 2009-01-15 18:05:59 2003-04-07 12:59:11 17 163 7729 193 5850 19831 8081 108.40 33 18.02 CHANGED hhc+lLlANRGEI...........AlRlhRss+ch.............Gl.coVslaoss..Dpsuh...asphADcshhls..s...s.........tuYLsh-pllpsA..cps........G.....spA.lHPGYGFLSEsscFActst..p.tGl..hFlGPsscsl ...................................................................hc+lLlhs.pG.I...............................ulpsh+uh+-h..................Gh.col.hV.ss.ss.............-..o..sus.............chph......AD.c.....s..Yh.s...............................lsh.-tlhplhchp.........................u...s.s...ha........sG....sh.Ls.s.hph..tp...h..........c..tGl....hhlGssscsI...................................... 0 1856 3641 4921 +1656 PF02786 CPSase_L_D2 CPSase; Carbamoyl-phosphate synthase L chain, ATP binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain Carbamoyl-phosphate synthase catalyses the ATP-dependent synthesis of carbamyl-phosphate from glutamine or ammonia and bicarbonate. This important enzyme initiates both the urea cycle and the biosynthesis of arginine and/or pyrimidines [2]. The carbamoyl-phosphate synthase (CPS) enzyme in prokaryotes is a heterodimer of a small and large chain. The small chain promotes the hydrolysis of glutamine to ammonia, which is used by the large chain to synthesise carbamoyl phosphate. See Pfam:PF00988. The small chain has a GATase domain in the carboxyl terminus. See Pfam:PF00117. The ATP binding domain (this one) has an ATP-grasp fold. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.13 0.70 -5.05 16 23031 2012-10-10 13:17:03 2003-04-07 12:59:11 12 179 6767 194 5911 27500 13103 197.30 32 31.79 CHANGED D+tthpsthschshPssPusss...coh--AlthAcclGYPVII+uAauhGGpGhtlspsc-Eltclhspuhspu......splLlEK.lcs.KcIEhpVlpDupsNsIplsshEs.s.h..+stcsl.hAPSpTLo-cphphl+psAlpls+clGhhGus.slpahlss..stcahhIEhNsRlphppslupcsTGhsLshhthKlAhGhsLspl .............................................................................................................D+tthpp.h.h..pc..h..s..ls.......h........s.............ut....t...s........................p......s..........h.......-......-...........A............h.............t........h............A.......c.........c..........l.............G....Y...P...l..l......l.......+.......s.u.......h.......s........h........G.....Gp.........G.......h....p........l.........s.........p.......s....c.......c...........-.........L....p................p.h..h.................p.......p.................u.......h.................p.................t.................u...................................................................................s.......s.......t.................l...........h.......l..-.+.h.l....t....s........s....+......c....l......E...l........p......l.......l........s....D.......s.......p........s.........N.........s.........l........h.......l.......s.......p......h..........-......s............pt................+s..t........c.....s.............l......p.............................u.......P.......u.............................s.............L......o........s.................c................h.....................p.........t......l....t....p..s....u..........h....p....l.....s...........+t....l.....s.....h....h...G....s..s.....s...l...p..F.......h.......l.........s...............................s...........s..................c...........h...Y..hl..E........hN.s....R....l...p.......h....p...p....s...l...s...p..p...s.........T......G.........h.....s.....lschth+.lA.hG.sLs..h.................................................................................................................................. 0 1878 3684 4980 +1657 PF00650 CRAL_TRIO CRAL/TRIO domain Bateman A anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.70 0.71 -4.74 110 4026 2012-10-02 01:12:42 2003-04-07 12:59:11 15 76 365 19 2809 4545 94 152.30 20 37.53 CHANGED th.hptththlhGh.DppGRPlhhhcht.thssp....sts..t......................phhchhlhhhEpsh..................hhtt........thsshsllhDhpshs.....htphs....hshh.+hllphhp.spYP-pLtplhllssPhhhsshapllp..sal.sspsppKlpht.pstp...........Lppals...cplsp.......phG.Gp ....................................................................................................t......h..h.h...th..D...p.p.Gcslh..hhphs..ph.ssp................phs...p.........................................ch..h+.h..h.hhhhE..thht.....................................................h............................thp...shshlh.Dh..p...uhs..............h.p.p.h...t.............hphh...p.......p....h....hph..h.......p....stYP...p...plt..p..h...hll...........N.......s....P...h......h.a...p.s.ha.p..l...........l+................sa.l..s.........t.ps...t..pK...lhh...h...ts..t.t..t..........tLhp..h..l.s.........p.LPp.......phGGp............................................................. 0 1013 1571 2350 +1658 PF03765 CRAL_TRIO_N CRAL/TRIO, N-terminal domain Bateman A, Griffiths-Jones SR anon Prosite Domain This all-alpha domain is found to the N-terminus of Pfam:PF00650. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.54 0.72 -3.83 148 2100 2009-01-15 18:05:59 2003-04-07 12:59:11 10 27 310 19 1402 1996 17 56.30 25 13.53 CHANGED pptlpp....lcphltp............................thhptph..............sD..thlLRFLRARcaclpcAhpMlpps ...............................................................................................................h.p...hcphltp...................................t.hhttph...................................................sD...thLLRFLRARcFclpc.Ahphltp.............. 0 364 715 1092 +1659 PF02537 CRCB CrcB-like protein Mian N, Bateman A anon COGs Family CRCB is a putative integral membrane protein possibly involved in chromosome condensation. Over expression in E. coli also leads to camphor resistance [1]. 22.70 22.70 23.10 22.90 22.40 22.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.59 0.71 -4.21 106 5746 2012-10-02 19:55:49 2003-04-07 12:59:11 10 8 3698 0 1390 3815 1350 112.50 30 83.00 CHANGED hlhlulGGslGAhhRahlsthh.p.........................uTl.hlNllGsallGhhhs.hh..............htth...hphhlssGFhGuhTTFSoash-shplhpps.phh.tuhhahhholhhGlhsshhGhh ...................................lhlhlGGulGulhRahluhhhst...hhssh.................................PhGTL.hlNllGuFllGhhhshhh......................................tthssthphh.....lsTGFhGGhTTFSTFshEsl...p.L....h.pp.........s.....p.........h........h.....huh...h.al...hhollhul.hhshlGh.h.......................................... 0 430 866 1175 +1660 PF01321 Creatinase_N Creatinase/Prolidase N-terminal domain Finn RD, Bateman A anon Bateman A Domain This family includes the N-terminal non-catalytic domains from creatinase and prolidase. The exact function of this domain is uncertain. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.87 0.71 -3.66 170 7541 2012-10-02 11:23:57 2003-04-07 12:59:11 13 18 4001 40 1991 5872 3626 132.50 18 31.80 CHANGED Rlpclpphhpcps..lDuhllsss............tslhYloGa.........ssstshhl.l......st.cs..t.h...................llss.hchtpttppp...........hcl.h.hpp..................hpslhphltph.........h.t..............ppl.G...hEtsh.....hshst...hptlppth.....t...phhshss....hlpplRhl .....................................+lpplpphhp..p..p..p.......lD..uh..llsss................................................tshtYlo..GF...................sssss.hs.l...l................st...-p...s...h.......................................................lhs..c...hch...httsppp.......................shpl..hthps........................................................hpslt..p.h.ltph..........................................................h.t................ppl..u..........h-..s.ph..............loh..st.....h..p..p.Lppth..........st.......ph.h.shs.......h.t.hR................................................................................................... 0 667 1256 1650 +1661 PF00030 Crystall crystall; Beta/Gamma crystallin Sonnhammer ELL anon Swissprot_feature_table Domain The alignment comprises two Greek key motifs since the similarity between them is very low. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.58 0.72 -4.09 138 3340 2012-10-01 23:14:22 2003-04-07 12:59:11 14 52 213 141 1708 2532 18 81.10 31 53.48 CHANGED +lplaEcpsFp..GcphEhs.sDs..sslpphhh.sc............lpSh+V.s.GsW.lhYEpssapG..cQalLc....pGEYtsapp.W.G.....ssp...........ltShRhl ....................................plhlaEcp.sFp....G.c.phEhs....s....Ds........sslps.h.....h...a.sp...............spSl+.V.s..G...sW....l..hYEpssap....G......pQa.lLc........p.G.-.Y.....p..a..pp...W..G........t.ss.p.........................ltShR.l......................................... 0 133 378 969 +1662 PF02633 Creatininase Creatinine amidohydrolase Mian N, Bateman A anon COG1402 Family Creatinine amidohydrolase (EC:3.5.2.10), or creatininase, catalyses the hydrolysis of creatinine to creatine [1]. 24.40 24.40 24.60 24.50 23.90 24.30 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.52 0.70 -5.28 163 1362 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 981 81 528 1293 626 234.00 26 90.37 CHANGED hsphoh..s-l.pph.....tpt..sssllPlGu.sEQHGPHLPluTDshlupslupcsspphs................shlhPslsh......Gh..SscHh.s.....FP...GTlolsspThhsl.lp-lscSlt.ppGh++llllNuHGG..Nhshlphsscclptc............shhlhshsaaph..s..t........hhsthttptshHAGthETSlhLtl....tP....-hVchs+.............................ts.thhsht..thhhshthpc...hsssGshG...-sst..AoAE....+Gctlhcthscthsphlp ........................sh.-htth......tps.....sssllPlGu.sEQHGPHLPluTDshlupslu...pp..lspphs................shlhPslsh....................Gh.....u...c.Hh.s........................aP.....G.T.lolss.pshh..sh.lt-lscult.p.pGh...++...........llhlN................uHGG..........Nhss....l.pts...spclptc................................shh.h.h.h..h...s.a.h.ph....s.........................h.t..t.t..t.ts...hHAGthETSlhLtl....tP.-h.Vchsc............................h.t.t.hhp.h.t....sh.hhs..hthpp.....hsss.....G.shG...csst...Aos-......cGctlhpthspthsphl...................................................................................... 0 165 359 459 +1663 PF03858 Crust_neuro_H Crustacean neurohormone H Finn RD anon DOMO:DM02710; Family These proteins are referred to as precursor-related peptides as they are typically co-transcribed and translated with the CHH neurohormone (Pfam:PF01147). However, in some species this neuropeptide is synthesised as a separate protein. Furthermore, neurohormone H can undergo proteolysis to give rise to 5 different neuropeptides [1]. 21.00 21.00 21.10 21.00 20.90 20.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.00 0.72 -3.37 8 51 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 23 0 0 56 0 38.60 54 29.06 CHANGED RSA-GaGRM-RLLASL+Gsu-o.sPLu-LpGA.E.uuuHPLE RSApGaGRM-RLLASL+..u-s.sPlusL..s.E..ussHPLE.............. 0 0 0 0 +1664 PF01147 Crust_neurohorm Crustacean CHH/MIH/GIH neurohormone family Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 26.20 25.00 23.70 24.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.08 0.72 -4.67 45 264 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 93 1 38 277 0 69.90 40 61.43 CHANGED plaDpsC+Gla.sRslFp+L-RVC-DCYNLaRcsplsotCRpsCFsNphFttClcsLhh.c.phccapphlphl.s ........................haD.sC+Gla.DRslFp+L-RVC-DCYNL..a..R..p..stlsstCRpsCF.........sNphFttClcsLhh..-..phcphtthlphl................ 0 12 15 35 +1665 PF00525 Crystallin crystallin; Alpha crystallin A chain, N terminal Finn RD anon Pfam-B_97 (release 1.0) Family \N 23.20 23.20 23.20 23.40 23.10 23.10 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.29 0.72 -3.76 11 395 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 264 24 81 297 0 51.50 65 39.75 CHANGED MDIsIQHPWhRRPhas..aaPSRlFDQhFGEHl.-uDLFPsh........sslSPaYh+...P.hhRhPS ..........................R.....u...LGP....hhPSRLFDQFFGEGLhEYDLLPhh.............SSTISPYYRQS.....LFR...oV............... 0 5 12 29 +1666 PF03783 CsgG Curli production assembly/transport component CsgG Finn RD anon COG1462 Family CsgG is an outer membrane-located lipoprotein that is highly resistant to protease digestion. During curli assembly, an adhesive surface fibre, CsgG is required to maintain the stability of CsgA and CsgB [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.25 0.70 -5.25 61 1246 2012-10-01 20:48:06 2003-04-07 12:59:11 9 33 1090 0 274 845 586 190.90 30 65.37 CHANGED tshshsth+hsluVhpF..cspou........at.s..........SshsssluptusshLlstLppo...stFsllERpsLpslhpEppl...............ssphssLpuAshllpGulspaspss.t.tsG........hphhGlhu......psphppshspVsLRlVslpTucVlhSspssuchtspshpsu..............................hhthhsu.suhhsscslslAlppAlpp..hVptl.s.th.tuhWp..sp .......................................................................s..h.hsss+h.luVhph..pscou..............a..h.h........Sshsstlsppu.psh.....Llo.t.L..p..p..S....p.h..F.hsL.E..R..p..s..L.pslh.p.Epplhps.s..p............s.pph.l........psLpu.AshhlpGuI........h..s.......ap.spst..s.st.lG..............tphaGIhu...........................................ss.phpls.pht..VsL+lVsVsTu.Ell.S........sp.sutphh..phpss..............................h.th.htu....uhhsscsl.lslhpAlpphV.hl.s.Gh.p......s................................................................................ 1 75 168 224 +1667 PF02599 CsrA Global regulator protein family Bashton M, Bateman A anon COG1551 Family This is a family of global regulator proteins. This protein is a RNA-binding protein and a global regulator of carbohydrate metabolism genes facilitating mRNA decay [1]. In E. coli CsrA binds the CsrB RNA molecule to form the Csr regulatory system which has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis [1]. In other bacteria such as Erwinia caratovara RmsA has been shown to regulate the production of virulence determinants, such extracellular enzymes [2]. RmsA binds to RmsB regulatory RNA. 20.70 20.70 20.70 21.60 20.60 19.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.60 0.72 -4.24 137 1995 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1669 7 426 998 255 53.20 54 75.78 CHANGED MLlLoRKhGEsIhI...G.D.-IpIsVlplc..Gsp....V+lGIcAP+slslaRcEl......YpcIppc .............MLILTR+lGEolhI...............G.D...-l.sVT.VL..u.V+...GsQ....VRIGlsAPK-VuVHREEI..Yp+IptE................... 1 175 279 361 +1668 PF02554 CstA Carbon starvation protein CstA Bashton M, Bateman A anon COGs Family This family consists of Carbon starvation protein CstA a predicted membrane protein.\ It has been suggested that CstA is involved in peptide utilisation [1]. 25.00 25.00 28.10 27.70 24.80 24.70 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.31 0.70 -5.81 97 3680 2012-10-03 01:44:59 2003-04-07 12:59:11 9 5 2335 0 644 2537 205 319.50 39 60.88 CHANGED h.sulhlllsulssahluYRaYupa.lup+lht.lDssRtTPAcphpDGhDYVPTs+h.VLFGHHFAuIAGAGPlVGPllA.AtaGaLPuhLWIllGslhuGAVpDahsLahShR+cG+SlGpls+cplGphushhhhlhshhlhllllAlhuhlVspsh..............sp...........oPh.....................ushslhhsIPlAlhhGha...hah+suplhtsSllGllLlhhulhhGhh.ls........................................hh.hstpphsh..llhsYualAolLPVWlLLtPRDYLooahhluslshLslGlll.....stPplphP.AhTp......F.h...sGssPhhsG...slFPaLFITIACGAlSGFHuLluSGTTsK.lspEscs+hlGYGuMLhEuhlAlhAllAAsslt....................uhYauhp......................................................................Ghs.....sstlsphup....slG ............................pshhhllsulshhhluYhhYuha.l..tpplh..l.-..ss.R.hTPAhh.sDGhDYVPss+h.llFGHHFuu..IAGA.GPllGPlLA.A.hGalPuhlWllhGslhAGAVpDahsLhlShR+sGtSluphhpcphG.hsthlhhhhshhlhlllhAVhu.....h....llspsL................s.t............sPh..................................................uhhs.lh.hlslAhhhGhh...hhhts...sp..hh.hohlGhhhh.hhslh.G...l.........................................h.hptsthsh..hlhsYsFlAulLPVWhlLuPRDYLsoahhlssllul.sl.Glll...................htPpl..p...hP...uhop.....a.h................ssssP..h..hsu............slFPhLFITIACGAlSGFHAhluSGTTsKhlt.............pEppuphlGYGuMlhEuhVAlhAhluAs.h.l.........su.hh.th...............................................................ht.s..hh............................................................................. 0 219 401 536 +1669 PF00859 CTF_NFI CTF/NF-I family transcription modulation region Bateman A anon Pfam-B_362 (release 3.0) Family \N 19.50 19.50 20.10 19.70 19.10 18.60 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.12 0.70 -4.65 4 440 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 51 0 132 328 0 227.10 51 56.35 CHANGED QDSFVTSGVFSVoELVRVSpTPIssGTGPNFSLuDLpSpsYY.shsPGu..hRRoLPSTSSSuS.KRhK..SME--hD.SPGt-saYo....SPuSsopSS.sWHE.hEsuhsSP.phpc.-Ks.FsssSPpppSshhSuFsQ+H+Psl.....os.tsSPHso.SsLHFPTSsIl.QpPuoYFsHsAIRY....pPQ-sLK-aVpLsCssuuQQuGQ.....PNGSuQ..GKV.s.FLsTPMLsPPPP..PshARPVsLshPDTKPsTTSTEGGusSPTSPoYSsPuTSPANR.FVulGPRDPuFl...pQsQSWYLG ...............................................pDsFVpSGVFsVoELVRVSpTPlssGo.....GP.NFSlu-.L...pSp.sY.Y..s....hss......u.....s.....h+RSLsSssSo............ss.K........R...K.........Sl-.-.pM-...SPst-..........FYs....SPu...ususu.S.t.sWp-..h-.............s.................s.............h......su.P....ssh....KKs.tK...FsS.sosppsSshh.sFspHt.hPs..l.....ss.....ssSP+so..sSsLHFPoosIl..QpsusYFoH...PsIRY...hpP.QDsLK-aVphsCs..s.u..u.tQ.s.u.p.........................suusQ...+h..s.h....hLsPs........P.......h.h............................t........................................................................................................................... 2 9 23 64 +1670 PF01467 CTP_transf_2 Cytidylyltransf; Cytidylyltransferase Bateman A anon Bateman A Domain This family includes: Cholinephosphate cytidylyltransferase Swiss:P49585. Glycerol-3-phosphate cytidylyltransferase Swiss:P27623. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.99 0.71 -4.17 65 14199 2012-10-02 18:00:56 2003-04-07 12:59:11 21 74 4889 196 4168 10545 6936 142.60 19 60.12 CHANGED lhsGoFDPlHhGHlpllcputphhs......llhlssspsspp..p....shhstpcRhchlctshtsst.....................................................h.hlhstc.....................hpthpchltchphshhs+s.s..thpt.....................t.....hhhhhhh.htp.......lSootl+pt .......................................................................................................lhsGoFDPlH..h.........GHl.shlppA...t......p.....h......h.......c.........h..........................llh..ls....s.s..t....s..s..p.+...p............................shh.s.h.ccR....hth....l....p....t......s....h......t.....s........................h...........h.......p.......................................................................................................................................................................t.......h..hl..h.ttc.........h...........................th..p..th....p..c..h..h...t......p...h......p.....h...s...h..hs..+.s...t...th.pt.h..........................................................................................h..h.........h.................lSoo.l+p.h........................................................................................................................................................................................................................................ 0 1392 2560 3483 +1671 PF02348 CTP_transf_3 Cytidylyl_trans; Cytidylyltransferase Bashton M, Bateman A anon Pfam-B_886 (release 5.2) Family This family consists of two main Cytidylyltransferase activities: 1) 3-deoxy-manno-octulosonate cytidylyltransferase, [3], EC:2.7.7.38 catalysing the reaction:- CTP + 3-deoxy-D-manno-octulosonate <=> diphosphate + CMP-3-deoxy-D-manno-octulosonate, 2) acylneuraminate cytidylyltransferase EC:2.7.7.43, [1,2], catalysing the reaction:- CTP + N-acylneuraminate <=> diphosphate + CMP-N-acylneuraminate. NeuAc cytydilyltransferase of Mannheimia haemolytica has been characterised describing kinetics and regulation by substrate charge, energetic charge and amino-sugar demand[4]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.47 0.70 -4.71 20 4289 2012-10-03 05:28:31 2003-04-07 12:59:11 14 33 2780 54 1074 4236 6196 213.60 26 80.94 CHANGED lsIIPARhuSpRL.sKsLtpltGcPllt+slcsAhpSthh-+....llVATDscpltchsppaG..spshhpssshsssscRshcslcchhss........hllslpGDpPhLpspsltphhppltps.t...................hsshstsls.sppshpssslcsshcpp......shthahpcushsahpcp.s.................hhtchulYsaRpt....hhhcaststsos...........hc.hcpl.........cQh+.....hhhtu................pcI .......................slIPA.Rh...uSoR..l...P.....s....Ks...Lt....c...l......s...G.....K.....P..hIhash.-...p....A.....t...p.u....s...t....h.c.c.........l.l..V...A...T.....D....c...c...c....l....t....p....s....s......p....t.hG..........scl.h..h.s....p...s.c..p.t....o....u....o.....c.......R....h....s...-..s....l..p.c...hshs.....tp....................llls..l...Q.G....D..p....Phlssshlpp....sh..p...t..l...t..ps...s...........................................................th.s.o...h...s...h.........l....t........t....t....p........c...h...h..s........s...s.s......l.K...s....l....h..ctp..................s.u...h...a...a.o+ss..lPa....+....p.t...t.....................................................hh.t+..l.u...lYsactt...........hl.pas.t.....s....................hE.hE.l...................-ph+.....hh..................................................................................................................................................................... 0 335 680 894 +1672 PF04808 CTV_P23 Citrus tristeza virus (CTV) P23 protein Kerrison ND anon Pfam-B_2595 (release 7.6) Family This family consists of protein P23 from the citrus tristeza virus, which is a member of the Closteroviridae.\ CTV viruses produce more positive than negative RNA strands, and P23 controls this asymmetrical RNA accumulation. Amino acids 42-180 are essential for function and are thought to contain RNA-binding and zinc finger domains [1]. 20.70 20.70 21.00 20.70 20.20 19.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.44 0.70 -5.01 3 175 2012-10-01 20:03:16 2003-04-07 12:59:11 7 1 1 0 0 178 0 204.80 91 100.00 CHANGED MDNTSGQTFVSVNLSDESNTAST-VEsVSSEADRLEFLRKMNPlIIDALIRKNSYQGARFRARIIGVCVDCGRKHDKALKTERKCKVNNTQSQNEVAHMLMHDPVKYLNKRKARAFSNAEMFAIDLVMYTKERQLAVDLAAEREKTRLARRHPMRSPEETPEHYKFGMTAKAMLP-INAVDVGDNEDTSSEYPVSLSVSGGVLREHHFI ......MDsTSGQTFlSVNLSDESNTASTcV+sVSSEADRLEFLRKMN.PFIIDALlRKTsYQGARFRARIIGVCVDCGRKHDKu.+TERKCKVNNTQSQNEVAHMLMHDPVKYLNKRKARAFSNAEMFAIDLVMYTKERQLAVDLAAEREKTRLARR.HPMRSPEETPEHYKFGMTAKAMLPDINAVDVGDNEDTSSEYPVSLSVSGGVLREHHFI...... 0 0 0 0 +1673 PF01179 Cu_amine_oxid Copper amine oxidase, enzyme domain Bateman A, Finn RD anon Prosite Domain Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ). This family corresponds to the catalytic domain of the enzyme. 19.70 19.70 20.10 21.40 19.50 18.30 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.16 0.70 -5.75 75 1272 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 545 187 699 1240 205 371.60 33 57.94 CHANGED Plpl....sQP-GsSFpl.c.....GptlpWppWpF+lGFssREGlsLaslpapscs.........lhYRlSlsEMhVPYGDPpssahpKpsFDsG-aGhGhhussLplG......CDChGt.IpYhDuhhssssGpPhplcNulClHEpDsG.lLaKHoshcs..s.ps.tspRsRpLVlphIsTluNY-YhahahFhpDGsIchEl+ATGIlsoss........hss.....spp.....ss...aG.shVs.sslhushHQHlFshRlDhslDG...t.p....Nolhth-shshsh.......sstN..Phusuapsccphlcsttpu..thchssppsRhaclhNssppN.h.GpPsuY+lhs......ssssh...llspssShhtpRusFAp+plWVTtYc-s.....EhauuGpassQssus......sGlssaltp...scs.......lcspDlVlWaThGhpHhP+sEDaP..VMPs-hhuhhL+PhsFFspNPuLDlPsssp ...........................................PhplhpPcGssapl..p......Gphlp.WppWpF+lu..hs.RtGhhl.slpapsps...........lhYchSlsEhhVPYu..sPp....sahh+thhDhG-aGhGhhsssLthG.................pDC..u.t.h.....tahDs.hhss.t...sGtPht.h...su.....lClaE...p....s.sG..ha+...Hppht...............t...s...hstcp+pLVlphlsTluNY-Y........h...........asahFtp...sGsI......phcspuTGllssts.......................hss..t..ts.....s....aG.ohls...slhushHpHha......shRlDhsl...D....G...........t...p....N.o.lh.t..-.s...hs...h.s....................sst..s..s.h.....s...ssh...p...sp...pph....lpsE.tpu..thch...sstp.sRhhhlsNss.ppNth.Gps.........suY+lhs......ss.st.h....hhhs.ss.s.h.hh.+R.ssF.sc..................pp....laVTpY..ccs......E.....hasuGtassps.ts..............suls.paht..p......scs..............................l.....c...s..p...Dl......VlW...hT..................hGhpHhP+s..EDaP..lM..P..s..p..hh..sh.hL+PhsFFspsPslss........................................................................ 1 162 358 544 +1674 PF02727 Cu_amine_oxidN2 Copper amine oxidase, N2 domain Bateman A, Finn RD anon Prosite Domain This domain is the first or second structural domain in copper amine oxidases, it is known as the N2 domain. Its function is uncertain. The catalytic domain can be found in Pfam:PF01179. Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ). 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.76 0.72 -3.90 14 944 2012-10-02 11:54:05 2003-04-07 12:59:11 11 27 479 119 492 882 81 87.20 27 12.37 CHANGED HPL-sLostEhs+spsIlpsps.hsts.shphp.lsLc-PcKphVhpa.....-ctssh..ssRcApslhhhuups..a.hllDLssuplsusphh ................................HPLssLostEI..pp....sst.llp........sut.................hts........s.....h........pFttlsLtEPsKttlh...sa................stssts........ssR..pA......l....l..h..h...s.....t.p........hcslV-..L..ssstl.p.p..h............................................................ 0 103 248 379 +1675 PF02728 Cu_amine_oxidN3 Copper amine oxidase, N3 domain Bateman A, Finn RD anon Prosite Domain This domain is the second or third structural domain in copper amine oxidases, it is known as the N3 domain. Its function is uncertain. The catalytic domain can be found in Pfam:PF01179. Copper amine oxidases are a ubiquitous and novel group of quinoenzymes that catalyse the oxidative deamination of primary amines to the corresponding aldehydes, with concomitant reduction of molecular oxygen to hydrogen peroxide. The enzymes are dimers of identical 70-90 kDa subunits, each of which contains a single copper ion and a covalently bound cofactor formed by the post-translational modification of a tyrosine side chain to 2,4,5-trihydroxyphenylalanine quinone (TPQ). 20.60 20.60 20.60 20.70 20.30 20.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.13 0.72 -3.96 17 943 2012-10-02 11:54:05 2003-04-07 12:59:11 11 20 497 180 473 884 97 100.70 23 14.32 CHANGED sslhhcEhspsppllh..p.Ptatcslpp+G..tp.hspVhstPhosGaautts....ucRlh+shsahppsss....shas+Pl-slplllDhcshcVlchp-pt.hhsls ....................................lhh--atplppllp..ss.spatts.l..ccp..G...ls..s..hp.pVhssPh.s............s..........G..........h....hstps......spRlh.pslhahcssss........Nhau+Pl-.lsslV..D.hpptcllcl.c.t..hhsh................. 0 93 229 354 +1676 PF02298 Cu_bind_like Plastocyanin-like domain Mian N, Bateman A anon Pfam-B_398 (release 5.2) Domain This family represents a domain found in flowering plants related to the copper binding protein plastocyanin. Some members of this family (eg Swiss:P93328) may not bind copper due to the lack of key residues. 20.80 20.80 20.90 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.14 0.72 -4.19 26 1166 2012-10-02 17:41:00 2003-04-07 12:59:11 12 16 93 17 688 1154 2 82.90 31 43.81 CHANGED Wshsh......YspWAsu+pFplGDsLlFpYssphHsVhcVs.ctsYcsCpssp.shpsas.............sGssplsLsp.GhpYFICuhs..GHCp..tGh ..................................sYssWu.ps..p.p.F.p..lG.DsL............l.F.pY.......s......s.....s...........a....s..Vh.pVs....cssYcsCssss.....sl..t.s.h.s............................sG..s..s..t.l....s...L....s..p...s.G.tt.Y.F.I.suhs.....uHCptG............................ 0 76 388 550 +1677 PF03263 Cucumo_2B Cucumovirus protein 2B Mifsud W anon Pfam-B_4373 (release 6.5) Family This protein may be a viral movement protein. 20.90 20.90 41.70 40.80 20.60 18.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.55 0.72 -3.74 7 122 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 9 6 0 124 0 104.60 70 96.29 CHANGED tuuhosl-LpLA+lVEtK+pRRRSH+pNRRtRGaKSPSERARSpLR....LFphLPFatsDss-.hhshh+c.shs-LspsEss.....s.p.tshDDTDWFAGNEW.sEGSF ......t.GAhTNVELQLARMVEsKRQRRRSHKpNRRERGHKSPSERARSNLR...........LFRFLPFYQVDGSE.Lh...-hh+HssVsELsESEAsp.h..ut-DHDFDDTDWFAGNEW.AEGuF... 0 0 0 0 +1678 PF00760 Cucumo_coat Cucumovirus coat protein Bateman A anon Pfam-B_867 (release 2.1) Domain \N 25.00 25.00 49.30 43.20 18.60 16.70 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.13 0.70 -5.38 4 531 2012-10-04 01:49:40 2003-04-07 12:59:11 13 2 37 6 0 462 0 188.50 85 95.29 CHANGED sstpRRPRRGpRS....sssutDtsLRALTQQlsRLsphhAuutPTLsHPTFVuSc+C+sGYTaTSlsl+Ps+hEKsp.FGpRL.LPssVoEYsKKhVSplQlRlNP.PKFDSTVWVTlRKlPtoosLostul.thFsDGhSsVLlYQassoGlQsNNKllaDLSshtA-IGDMpKYAllVYSKDDsLEsDElVlHVDlEHQRIPouphL .............sRRRRPRRGSRS....AsSSADAsFRVLSQQLSRLNKTLAAGRPTINHPTFVGSERCKPGYTFTSITLKPP...KID+GSYYG...KRLLLPDSVTEaDKKLVS.RIQIRVNPLPKFDSTVWVTVRKVPASSDLSVAAISAMFADGASPVLVYQYAASGVQANNKLLYDLSuMRADIGDMRKYAVLVYSKDDALETDELVLHVDIEHQRIPTSGV.L..... 0 0 0 0 +1679 PF02376 CUT CUT domain Mian N, Bateman A anon Pfam-B_770 (release 5.2) Domain The CUT domain is a DNA-binding motif which can bind independently or in cooperation with the homeodomain, often found downstream of the CUT domain. Multiple copies of the CUT domain can exist in one protein (eg Swiss:P10180). 22.50 22.50 22.50 22.80 22.20 22.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.78 0.72 -4.18 26 1069 2009-01-15 18:05:59 2003-04-07 12:59:11 10 15 90 11 655 873 0 84.90 35 19.88 CHANGED sphssspplsTtcIscclcpcL+...................ctsIuQslFAchlLs...+SQGoLS-..LLp............cP.......KPWsphp.pG+psap+MppaLs.s..psp+thhhh...ppp .................t......pplsTtcIspcl+.ppLp...................+tsI..sQtlFuchlL..s...hS.Q.......GolS-..lLp.........................cP..............KPWs+Lp..pG+EsFh+MppaLptsps.+..hh....ph........................................... 0 96 146 428 +1680 PF00888 Cullin Cullin family Bateman A anon Pfam-B_1149 (release 3.0) Family \N 23.00 23.00 23.00 23.20 22.80 22.90 hmmbuild -o /dev/null HMM SEED 588 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.84 0.70 -5.98 47 2251 2009-09-12 05:56:55 2003-04-07 12:59:11 17 46 329 30 1484 2160 44 484.10 20 65.66 CHANGED Wstlp.ss.....lp..plh..............t......h..laptlhsh............................................p.hut...lYpthtp....hh.........ppaltt.h.tph.t.....................t...hl..h...t.W..a...h.hhtt.hh.a..hs..p.a.ltpp.........................h.h..apt.l.ht...htt.hh..hh..h.....R..p.....p.....htthhp.hh.h........................t..hY.p.Ft..hlptottaYp..s.thl.p..sh.pYh..s..thppEpppst.........alp.ps.t...lhthhpp.hltth.t.l......tth..hhttpp.pcl..ha.lhphh.tslp....hhthhpphlp..ptuhthhtp.........................................................................................t.ps.....hlpthlthht....hhppsF.t......c.....hhtslppsht.hls.t.....................spssEhluhahD.hL+put....pthsptp...................lcpplsplhhlhcalpsKDlFpcaYpphLA+RLlptpSss..-sEpphlstLKp.............................ttG.paTp+lptMhpDlplSc-lsppFc.phhtpp.t..........................................................shshslplLssshWPh.ssp...............hplPp.....clppthctFppaYppp+ss........R+LpWhhpLupsclphph.ttt...........................................h.hploshQhslLhhFNp...............pcplohc-ltpttplstc..tLpc.tL.pslhpsphhh............p..pspphssssthplNppas .......................................................................................................................................................................................................................................................................................................................................................................h............h....h.................................................................................s......lat..h.............h..........................th.h..t................................................................hl..h.......Wt..pa....................h.h..t......hh.h...h....s...p......h...h.tp.....................................................................................................................................thhh..apt..h..h................h.....t......h.t.............h......h..............lt...p..................R....s........t............p............h..th..h.t.h...................................................................................ha.....ac..hlt.stt.hat..s..................h.t........h.....ah...........s..th.l..pE...hh.................h.h.........t...................l..t.h.p.hl..............t..h.........................tth.....h...h.....t.........t.........t.......p...........t...............p......l...t....h...a......h...h...t........t.............h.............t.......t........h............h..h.t.ht.....thlh..p.u.thh.t...................................................................................................................................................................h...ltt...h..lp...hh.p.p.h....t.h.....h..t........sFtt...........................................p.......h....t.shp.p...s....h.t...h.ls.t............................................................spssEh..l..........u..ha.......h.D...hl+ts....................tp..p........................................................................................................................hcp.h....h.pp...........hhh.l...h..p.hl.....p.s.....KDlF.paYpp.hL.ucR.L....l......................t.....p..........o.....h................s...............-..............t........Etphl..p.h..L+t..................................t.h.G.....p.h...s.p.p.hptM.hp.....Dh.p.h.S..p.ch..t.p.app....h.htpt.........................................................................................................................thphph.lLsts.......h..W..Ph............................................htlP..............phtph.hp.ap.pa..Y..t...ppss...........................R.+L..pW......phupsplp..hp.h...t.t.t........................................................................................................hph..p.loshQ....hhlLh.h....FNp..................................tp.p..ho.hppltp.....t..s.....t..l.....t.....Ltp.tl...t.l...h...p.tthh.........................t....................................................................................................................... 0 530 800 1185 +1681 PF03091 CutA1 CutA1 divalent ion tolerance protein Mifsud W anon Pfam-B_2307 (release 6.4) Family Several gene loci with a possible involvement in cellular tolerance to copper have been identified [1]. One such locus in eubacteria and archaebacteria, cutA, is thought to be involved in cellular tolerance to a wide variety of divalent cations other than copper. The cutA locus consists of two operons, of one and two genes. The CutA1 protein is a cytoplasmic protein, encoded by the single-gene operon and has been linked to divalent cation tolerance. It has no recognised structural motifs [2]. This family also contains putative proteins from eukaryotes (human and Drosophila). 22.10 22.10 22.20 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -4.35 13 1667 2012-10-01 21:59:08 2003-04-07 12:59:11 10 8 1551 85 550 1191 568 99.10 39 84.17 CHANGED hhllhlTsPs.-pAcclA+pllEc+LAACVNllspIcShYhWEGcIpcDsEhhlIlKTpspphspLpccl+phHPYsVPEIluLPlppGspcYLpWlcpslc ..............h.hlVhsTsPsc.ts.ApclAttllp..c+LAACsslls.shpSl...YhW.....c.....G.....+....l.....cp-.t.ElthllKTss..sphpsLhcpl..+phHPYpsPEllslPlsp..GsssYLsWlstsh.p....................... 0 154 313 430 +1682 PF03932 CutC CutC family Bateman A anon COG3142 Family Copper transport in Escherichia coli is mediated by the products of at least six genes, cutA, cutB, cutC, cutD, cutE, and cutF. A mutation in one or more of these genes results in an increased copper sensitivity. Members of this family are between 200 and 300 amino acids in length are found in both eukaryotes and bacteria. 21.20 21.20 21.30 21.60 20.80 21.10 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.36 0.71 -5.06 8 2302 2012-10-03 05:58:16 2003-04-07 12:59:11 9 10 2140 20 426 1510 277 197.20 39 85.23 CHANGED hllElCl-slsulhsApsuGA-RVELCuuLulGGlTPShGllcsAsc...pusIPlasMIRPRGGDFlYs-pElcsMtpDlcss+chGssGlVlGsLsscGslDhcthcpLltAAp..GLuVTFHRAFDhssD..PtcALEpLIcLGs-RVLTSGtt..ssAl-Gh-pLtsLVsQAusRIpIMuGAGlsApNltcLsptTGlsElHuSuts .....................................................h.hlElCsts......hpsshpA.t...psGAcRlELC..s..s..h.s...G.GlTPSh...G....llcp.s...hp.......................ph.sl...P.lhshIR.PR.....u...........G..............D......Fs.......Y..s-tEhth.MhcDlphs+c..h...........G............s........s......GlVhGsLss...-.G.............p...........l........Dh.....s.th.cclh.s.......s.u..t..................sh..s....l.....TF.HR.....A.FDh..s......s..s................h...p..A.....lc...p.......L.h....p....h.G...l.s..RlLTS..........Gtt....sss.....p..u..l.sh....l.....pc.L.....l......t...p.....u....s.........u.........p.............I.............Ihs...........Gu.GlpscNlpphh.c.......sG.lpplHsot.s............................................ 1 136 238 337 +1683 PF01083 Cutinase Cutinase Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.17 0.71 -4.54 23 1537 2012-10-03 11:45:05 2003-04-07 12:59:11 17 11 503 75 597 1612 26 189.00 24 66.18 CHANGED ssCscltllFARGosEsush...ussGsshsssLputhGusslul.uVt..YsAshsp........tsosssGssshtshlspssspCPsTplVluGYSQGuplhssAlst..............lsuustspltuVllFGsPps........spsls.......................slssh.suK..shshCssuDslC..usuhshss....Hh..sYss-.h.sspAssFlts+ls ........................................................s..C.sh.hhhA.RGot..-...........s.....s..........s.......h..........s.........h.s.......s....h.h...s.sl......p......p...p.h.....s...........p..s.l...ssh..s..Vs........Ys..Asht.........................t..os.stGs...p.s.h........sstl.....pp.h......s......s..p.....C.P.s..T.....clVLuGY.....SQGAslhstshss...................................................sh.s.s.s..s...s..s..+.l...su..l....sLaGs.Ppp..........sts.hs.................................................................shs..s...h..s..s.+...s....hplCs.s....u..D..sl.C.......ss...................s.............H.....Y...........u..hh................................................................................................................. 0 135 324 513 +1684 PF01473 CW_binding_1 Putative cell wall binding repeat Bateman A, Mistry J, Russell R anon Bateman A Repeat These repeats are characterised by conserved aromatic residues and glycines are found in multiple tandem copies in a number of proteins. The CW repeat is 20 amino acid residues long. The exact domain boundaries may not be correct. It has been suggested that these repeats in Swiss:P15057 might be responsible for the specific recognition of choline-containing cell walls [1]. Similar but longer repeats are found in the glucosyltransferases and glucan-binding proteins of oral streptococci and shown to be involved in glucan binding [2] as well as in the related dextransucrases of Leuconostoc mesenteroides. Repeats also occur in toxins of Clostridium difficile and other clostridia, though the ligands are not always known. 20.50 9.80 20.50 9.80 20.40 9.70 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.74 0.73 -6.64 0.73 -3.57 213 28133 2009-09-16 13:19:13 2003-04-07 12:59:11 15 543 547 186 1455 18171 72 18.80 40 20.20 CHANGED suWhp.h..s..ssW.YYh.sssGsM ............sGWhp.......s...upW.YYh..sssGtM........ 0 419 1058 1118 +1685 PF04122 CW_binding_2 Putative cell wall binding repeat 2 Kerrison ND, Finn, RD anon COG2247 Repeat This repeat is found in multiple tandem copies in proteins including amidase enhancers [1] and adhesins [2]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.12 0.72 -3.78 99 3897 2009-01-15 18:05:59 2003-04-07 12:59:11 7 210 151 0 926 3881 25 92.20 26 35.86 CHANGED pRl.sGss...RY-Tuhp....lupp..........h..........h..sssslhl..s....sG....ps..asD.ALuuuslAup............pssPllLss....ssh...psstshlpsh...........t.pplhll..GGpssls...pp....lhpp ..............................................Rl.uG..ps...Ra-Tuhp....lucp.....................h.t............................t.....sssp.lh.l..s.............sG...........ps......hsD..ALuuuslA.sp..............................t.s.u..PIlLss.......tssls....ssstshl.pph.........................tspplhll..GG..p.sslsppl...t........................................... 0 554 857 897 +1686 PF03638 TCR CXC; Tesmin/TSO1-like CXC domain, cysteine-rich domain Bateman A anon Pfam-B_1144 (release 7.0) Domain This family includes proteins that have two copies of a cysteine rich motif as follows: C-X-C-X4-C-X3-YC-X-C-X6-C-X3-C-X-C-X2-C. The family includes Tesmin Swiss:Q9Y4I5 [1] and TSO1 Swiss:Q9LE32 [2]. This family is called a CXC domain in [2]. 20.30 20.30 21.20 20.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.62 0.72 -4.10 90 900 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 139 0 617 883 19 40.70 48 14.71 CHANGED ppp+sCsCKKSpCLKhYCECFtsGthCsp..CpC.psCpNptpp ...............pp+sC.sCKKStC...LKhYCECFt...........uu..hhCss...CpC..psCpNp........ 0 297 406 520 +1687 PF03128 CXCXC CXCXC repeat Bateman A anon Pfam-B_252 (release 6.5) Repeat This repeat contains the conserved pattern CXCXC where X can be any amino acid. The repeat is found in up to five copies in Vascular endothelial growth factor C [2]. In the salivary glands of the dipteran Chironomus tentans, a specific messenger ribonucleoprotein (mRNP) particle, the Balbiani ring (BR) granule, can be visualised during its assembly on the gene and during its nucleocytoplasmic transport. This repeat is found over 70 copies in the balbiani ring protein 3 Swiss:Q03376. It is also found in some silk proteins [1]. 13.20 13.20 13.20 13.20 13.10 13.10 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.42 0.74 -6.50 0.74 -3.42 116 305 2009-01-15 18:05:59 2003-04-07 12:59:11 9 19 71 0 133 258 0 13.90 43 10.39 CHANGED sphWscpoCpCtCs ....sppaDcsoCpCsC.. 13 46 52 85 +1688 PF02560 Cyanate_lyase Cyanate lyase C-terminal domain Mian N, Bateman A anon COGs Domain Cyanate lyase (also known as cyanase) EC:4.2.1.104 is responsible for the hydrolysis of cyanate, allowing organisms that possess the enzyme to overcome the toxicity of environmental cyanate. This enzyme is composed of two domains, an N-terminal helix-turn-helix and this structurally unique C-terminal domain [2]. 25.00 25.00 25.80 32.60 24.80 20.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.45 0.72 -4.44 25 725 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 688 80 229 524 63 73.00 59 47.15 CHANGED shp.slPoDPhIYRhYElVhVYGsolKplIpEcFGDGIMSAIDFsh-l-+.psPc.GD.RVVlThsGKFLPY+pa ........sh...stlPTDPslYRFYEhlQVYGsTlKALlHE+FGDGIhSAIsFclDVcKlsDPc.G-.RsVITLcGKaLPhKs...... 0 59 131 184 +1689 PF04199 Cyclase Putative cyclase Bateman A anon Pfam-B_1440 (release 7.3) Family Proteins in this family are thought to be cyclase enzymes. They are found in proteins involved in antibiotic synthesis. However they are also found in organisms that do not make antibiotics pointing to a wider role for these proteins. The proteins contain a conserved motif HXGTHXDXPXH that is likely to form part of the active site. 20.80 20.80 20.80 20.90 20.70 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.41 0.71 -4.57 76 2705 2009-09-13 14:05:17 2003-04-07 12:59:11 8 22 1704 5 1053 2458 695 168.90 21 65.07 CHANGED lhDLohslp..............pt.h........shh............h....................tt.hsh...pt....shsspt.lp.h.ssHsGTHlDu.sH........h................s.................sl-plPlp....hhs.pulllDlsp.................hsst-lptshpt.th...plptu-hVll+T......................Gh.................tphh..sstp.ahsp..........PGlsh-uucaLh.cpGl...pslGhDshuh...........-ts. .........................................................................hDlo.shp..................t..h........shas...........htht.h............................................tt........tshtspp..lp.h..ssp..s.G..THhDAPhH.........................................................h.tsst.........................................sl-p.l.slct.............hhu.......ulll..Dhsp.....................................ssthl.shpclt..tthtt.th.........pl.pt.u.c.hVll+T..........................................uh.......................................pp...hh...............s...s..tp...hhps................hs.uh....s.......-........ss..caLh....-..p..sl......pslGhDshuh-...s.............................................. 0 330 674 904 +1690 PF02984 Cyclin_C cyclin_C; Cyclin, C-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Cyclins regulate cyclin dependent kinases (CDKs). Swiss:P22674 is a Uracil-DNA glycosylase that is related to other cyclins [4]. Cyclins contain two domains of similar all-alpha fold, of which this family corresponds with the C-terminal domain. 21.20 21.20 21.20 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.72 0.71 -4.17 240 3032 2012-10-03 00:42:12 2003-04-07 12:59:11 14 43 449 158 1706 2961 42 116.40 20 28.12 CHANGED PTshsFLcR....h.t+...........s...sph..................s....p........hcplupalhE......Lolh-ap............hlpahPShlAAuA..lhlupph......Lt.....t.............WspsLptao.uY...stspLtsssphlh....phhh...pss.....tsph.p..ulhcKYsppchtp.........lu.hhss. ..............................................................................ssshpFLpp........h..h+................h.........sph...........................s..p..........hcphupahhE........................lol..h-hp..................hlp.a.PS.hlAAuu..lhhAtph.......lp....t...................................Ws.p..p..l..t..t.ho..sh.................s.p...p...l..t......s....s.hp...hlh..phh............p.....................thhp+ht...............h....................................................................... 1 471 819 1258 +1691 PF03784 Cyclotide Cyclotide family Bateman A anon [1] Domain This family contains a set of cyclic peptides with a variety of activities. The structure consists of a distorted triple-stranded beta-sheet and a cysteine-knot arrangement of the disulfide bonds [2]. Cyclotides can be separated into two subfamilies, namely bracelet and moebius. The bracelet cyclotide subfamily tends to contain a larger number of positively charged residues and has a bracelet-like circularisation of the backbone [2]. The moebius cyclotide subfamily contains a backbone twist due to a cis-Pro peptide bond and may conceptually be regarded as a molecular Moebius strip [2]. 20.30 20.30 20.50 20.50 17.20 19.80 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.22 0.72 -4.07 30 259 2009-09-11 23:07:15 2003-04-07 12:59:11 8 4 30 33 0 274 0 29.80 57 44.52 CHANGED G..lP.CGESCsalP.C...tssG.CSCcs+...VCYhN .......GlP.CGEoCVhhP.C...sshG.CSCpsp...VCY+N.. 0 0 0 0 +1692 PF00548 Peptidase_C3 Cys-protease-3C; 3C cysteine protease (picornain 3C) Bateman A anon SCOP Family Picornaviral proteins are expressed as a single polyprotein which is cleaved by the viral 3C cysteine protease. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.45 20 4118 2012-10-02 13:45:52 2003-04-07 12:59:11 15 54 450 68 1 3087 3 135.60 48 11.78 CHANGED GPsh-F....uhuhh+pNlhslsT...........spGcFTtL..GlaDplhV.....lPpH..upsscsIhlsGppsplhD....uhpL.lcppGssLElTllpL..cR...NEKF.RDIRpals...pphpcts-ssLslNosphsphhlsVGpVsthGhl.sLuGssTpRsLhYsaPT+sGpCGGVlhs....sGKllGlHlG.GNG ...........................................................................................................................................................P..-h..thhht.Nh.h.sph...........tp.s...h.h.hh...G..lhsphhl.....lPpH........ut.h...plhh.....p..t...t...hph.-.........thtl...stpsh......h.....-lsllpL..cp...s..p+F.RDIptals............pp.s.t..h..sss..shhl.......s.s....pc.hss..hh....l..s.l.us.....ls......G...h...l......s...hs......G.......p............o..t.+...h...hhYp..a..s.T+....A.GQCGGVl.h.u...........s.GKllGIH.lG.GNG........... 0 0 0 1 +1693 PF00007 Cys_knot Cystine-knot domain Sonnhammer ELL anon Published_alignment enriched with PDOC00234 members. Domain The family comprises glycoprotein hormones and the C-terminal domain of various extracellular proteins. It is believed to be involved in disulfide-linked dimerisation. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.85 0.72 -11.43 0.72 -3.95 24 1416 2012-10-02 16:54:34 2003-04-07 12:59:11 17 22 417 7 416 1343 0 92.30 29 44.70 CHANGED t.hCp.hN...hTlplE+csCshClslpTTICsGaChTh-...sh.ssh..h.QplCsac-hpYcohclPsCPsGssPhhoYPVAhSCcCs..tCstssoDCsh.thpsshC .......................h...C..hp.......hsht.h..ch...p..t...Cs.s..h.shp.sshC.GhChp.........................s....s.....t................p..sC..shtchph.psshl.......sCP...s.....G......s.s...s...h...h..o...ash...sh...oCpCs...tCpts.ss.h........................... 0 44 77 214 +1694 PF01053 Cys_Met_Meta_PP Cys/Met metabolism PLP-dependent enzyme Finn RD, Bateman A anon Pfam-B_366 (release 3.0) Domain This family includes enzymes involved in cysteine and methionine metabolism. The following are members: Cystathionine gamma-lyase, Cystathionine gamma-synthase, Cystathionine beta-lyase, Methionine gamma-lyase, OAH/OAS sulfhydrylase, O-succinylhomoserine sulfhydrylase All of these members participate is slightly different reactions. All these enzymes use PLP (pyridoxal-5'-phosphate) as a cofactor. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.10 0.70 -6.18 30 12248 2012-10-02 18:26:03 2003-04-07 12:59:11 15 41 4308 161 3561 25261 14126 368.30 34 93.53 CHANGED sThtlHuGpp.c...spuAlssPIYtoooatas........tsscpsG.a.YoRpGNPTpssLEcplAtLEuu..stulAhuSGhAAl.sslhsLlcs.GDc.....llsssslYGG.....Taphhcpshs+.hGlpssaVDs....schsslcpAlps..sTKhlalETPoNPhhclsDIttluclA+cp..s..llllVDNTFsoP.hlppPLcLG..ADlVlHSATKYlsGHuDVluGlllsps.tclspchthht........................................sshGusluPaDuaLlhRGl+TLsLRhcppspNAhplAcaLcpc....P....pVcpVhYPGLsoHPpH-lA++QhpGhuu..hlSFclcus............tupchlcsl+LhohAtSLGusESLlptPAsMTHuslstEpRtstGls-sLlRlSVGlE-h-DLlsDLcQALc .............................................................................................................................Thhl+uG..p.c.............tt.su.hssPI..a.o................ooa.h.....as.........................................................................h..........h..........t..........p........t..............t.......G.....h...............Y.o...R...h...s.......N................P...T....p...p...s...l....E.....p....t....l..A.t.......L..........E..........G.....G..............t.....s...........u..........h.........s....h...u........S........G.................h.....A.......A....l.............h......s......l.....h.....s.......l.........h.......p....s.....G..D....c.......................l...l........s........s........s........s.....l.........Y......G...G............................T.h...p...l.......h........s.......p.......h......h........t.......+.......h......G...........l......p.........s.....s.......a...l.....D..s.......................s...s......h......p.......s......l........p.......t.....s.....l....p........s............p......T.............K.............h.....l............a.....l....E.....o....P.......u....N....P.....h....h............p....l...s....D...l...t...t....l....u....p.....l.....A.......+...pt..........u...........h........h.........l........l...........V.........D............N.......T............a.........s..........o...............P......h.......h............p..........p.........P.......l......c........h...........G..........A......D.....l.......V..l..H..S..u....T..K..........Y..l........s.G.....H..u..D.s....l.......u..G....s.l.l...s....s........s...................p...ht....p...p..h..t..h..h..t....................................................................p.s.h.G....s....s....l..u...P...h...s..u...a....L......l..l..+.G...L....c....T....L...s....l...R....hc...p.....p...s....p.N..A.....h.p.l.A....c..a.L...ps..+.....................P.........................tV.p...p..V....hY...P........u......L.........................s.......p..............t.....a......p.....l...t..p............+.......................h...........p..............G................t.........u.......u...........l..lSFplcss.....................ttstphl....ssL....c....l.h.s....hutS.lGsscS.L.l....h..a.....P.....u...s.....h....T..H......s....p....l.....s...t..c.....t.....p.....t...t..t..G.........l..ss.sL..lRlSlG.lEc.s-.DLlsDLcpAL.t......................................................................................................................................................................................................... 0 1060 2191 2996 +1695 PF00839 Cys_rich_FGFR cys_rich_FGFR; Cysteine rich repeat Bateman A anon Pfam-B_297 (release 3.0) Family This cysteine rich repeat contains four cysteines. It is found in multiple copies in a protein that binds to fibroblast growth factors [1]. The repeat is also found in MG160 and E-selectin ligand (ESL-1). 20.60 20.60 20.60 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.16 0.72 -4.04 85 1785 2009-01-15 18:05:59 2003-04-07 12:59:11 12 15 137 0 1097 1610 20 58.80 24 71.91 CHANGED pcCc.ptlhph..pp..spDh+.....lsstLhpsCcp-l...pc..aC........sp............ttspupllpCLppphpp.tp ...........................cCc.ptlhph...pt....spD.h+..............lsstLhpuCc....s-l...pc.....hC...........ss..................................ttspGpllpCLhpphtp..t...................... 0 339 467 794 +1696 PF00031 Cystatin cystatin; Cystatin domain Bateman A, Sonnhammer ELL anon Prosite Domain Very diverse family. Attempts to define separate sub-families failed. Typically, either the N-terminal or C-terminal end is very divergent. But splitting into two domains would make very short families. All members except Swiss:Q03196 and Swiss:Q10993 are found. Pfam:PF00666 are related to this family but have not been included. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.92 0.72 -3.86 43 2054 2012-10-01 19:28:07 2003-04-07 12:59:11 16 20 367 81 813 2132 5 88.60 17 54.95 CHANGED Gulpssss..sssclpcslchAlscaNpps..tsthhchhclhcsppQVVuGh..pYhlchplscssC........ptpshpsCshh....ppphthsshpshptst .....................................................hps...sssplpph...sphul..p.p....hN.........p.....ps........................s........h..........h....hp.h.h.c...l...h...c......u......p...p.....Q..l..V......u...Gh......pYh..lp..lclt.cs.p.s............ptp.hp....s.p.h...............pth.................................................. 0 158 262 454 +1697 PF01578 Cytochrom_C_asm CytC_asm; Cytochrome C assembly protein Bashton M, Bateman A anon Pfam-B_114 (release 4.1) Pfam-B_8014 (Release 8.0) Family This family consists of various proteins involved in cytochrome c assembly from mitochondria and bacteria; CycK from Rhizobium[3], CcmC from E. coli and Paracoccus denitrificans [2,1] and orf240 from wheat mitochondria [4]. The members of this family are probably integral membrane proteins with six predicted transmembrane helices. It has been proposed that members of this family comprise a membrane component of an ABC (ATP binding cassette) transporter complex. It is also proposed that this transporter is necessary for transport of some component needed for cytochrome c assembly. One member CycK contains a putative heme-binding motif [3], orf240 also contains a putative heme-binding motif and is a proposed ABC transporter with c-type heme as its proposed substrate [4]. However it seems unlikely that all members of this family transport heme nor c-type apocytochromes because CcmC in the putative CcmABC transporter transports neither [1]. 21.10 21.10 21.10 21.20 20.80 20.90 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.60 0.70 -4.79 39 8917 2012-10-03 10:28:09 2003-04-07 12:59:11 15 16 4477 0 1718 6398 4666 196.50 21 51.54 CHANGED huslhtulhhhshshshhhhlh.t...p....thhuslshsssh...........................lhhuhhhh.......................l.....tssslssslps.hLhhHlshhhhoYushhlushhulhhL...........................................................................ht.htthcphshphhhlGashLTlullsGuhWA..p.sWGsaWsWDP+pshuLlsWllasuhL+sth..pc.shtuctsulluhlGFhllhlshauVphh..slc ............................................................................................................................t..sh.h.h.hs.h.h.h..shh.....hhh......................................p.........t..........................th...........h.s.t..h.h....h.h...sh........................................................................................................................h.s.h.uhhhh.................................................................hh.............h..t..s..t.s.l..s.s..h..L.p....s......h..l.h.hHl..sh..hh..hu.Y...s...s...hh...ls.h.h.h.ulhhL..........................................................................................................................................................................................................t...h..........h..tt..ht...p..h..sh.t.h...s...hG...hh.h..ho...l.u.l.lhG.u...h..Wu........p.sW....G...s..a..W.h..W..D.s+.ts.hu..hl....s.ah..las.u..h..la.hhh........pp...sh..t..t..p..h..s...s....h..h..s..l..h.u.h.h.h.hh.l.shahVp......................................................................................................................... 0 525 1095 1442 +1698 PF02224 Cytidylate_kin Cytidylate kinase Bateman A anon Pfam-B_1582 (release 5.2) Family Cytidylate kinase EC:2.7.4.14 catalyses the phosphorylation of cytidine 5'-monophosphate (dCMP) to cytidine 5'-diphosphate (dCDP) in the presence of ATP or GTP. 20.40 20.40 20.40 20.40 20.00 20.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.69 0.71 -4.79 14 4159 2012-10-05 12:31:08 2003-04-07 12:59:11 13 26 4102 23 894 2951 2326 155.70 40 63.80 CHANGED hphhhphplphh.p.st....shhsGpDlosp.IpopEVuptsStlushPtVRshhschQ+phuc..ssslVhEGRDlGTVVhPcAplKIFLsASsEhRApRRhpphstpu..s.sh-pLlt-ltcRDphDppRssuPLhhAtDAlhlDTSsLolp-Vl-cllp ........................................hs....phplph.....p..t..s..t..p....Vhl....s.G..c..DV.op.t..IRsp-VustsS.tVA.u.hPtVRctLlptQRp....h.....u............p..........t.........s..........G..........l.....V.hDGRDIGTVVh.P-.........A...p....lK....IFLsASsEpRAcRRhpph..t........p.....+.........G......h......s...s.........s...h.........-....p..l...hp-.......I.pcRD..pp.D..p.sR.....p.....lu.....P.L.....c........As.....D......A......l.....h.lDoTsl....oI-pVlpplh.t.................................................... 0 314 596 761 +1699 PF01265 Cyto_heme_lyase Cytochrome c/c1 heme lyase Finn RD, Bateman A anon Prosite Family \N 20.40 20.40 22.40 21.30 20.30 19.20 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.16 0.70 -4.80 46 463 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 263 0 351 449 4 246.90 36 87.58 CHANGED M.................s..t...............................s...spCPlscpspp......................................................stCPspt...............................................tphss..sshhss.spp.ssspshsLsscREhSSIPRs........................................t.usus........WlYPStpQFaNAMlRKGa..t.pp.....tsshcslVslHNtlNEpAWpcIlcWE..tha..................................scpCs.sP+LhpFtGcsc-...hoP+Aphpph.hG........hphPFDRHDWhVsRC.G..........+-VcYVIDaYs...Gssps..........p.....s......haLDVRPA.Lsohpu...sh-Rhh+h .......................................................................s.....................................................tCPhptt......................................................................stC.Phtt..............................................p.p.l..sP......ssh.s.s.sp..p....s.ssQs..hs.LsscRphSoIP+u.........................................................s..ssss.......W.YPS.Q.Fa..NAMlRKGa.t.t...tphs....tcshcslltlHNtlNEpAWpEIlcWE..t..ha...........................................................sppss..sP+LhpFtG+sp-...hSP+ARhpph..hG.....................................hphPFDRHDWhVsRC..G...............................ccV.RYVIDaYsGssst.........................p.....s.......shhLDVRPA.lsohpush-Rhhh.h.................................... 1 120 196 293 +1700 PF02322 Cyto_ox_2 Cytochrome oxidase subunit II Bashton M, Bateman A anon Pfam-B_997 (release 5.2) Family This Family consists of cytochrome bd type terminal oxidases that catalyses Quinol dependent, Na+ independent oxygen uptake [2]. Members of this family are integral membrane proteins andi contain a protohaem IX centre B558. One member of the family Swiss:O05192 is implicated in having an important role in micro-aerobic nitrogen fixation in the enteric bacterium Klebsiella pneumoniae [1]. 28.10 28.10 28.70 28.50 27.50 28.00 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.19 0.70 -5.27 134 4601 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 3012 0 866 2836 242 331.80 32 94.78 CHANGED sLtllWahllulhlhhYslhDGFDhGlGhLh.sh....hsc...........-p-RchhlNolGPhWDGNEsWLlhuGGulFAAFPhsYAslhouhYlslhlhLhuLIhRuVuFEaRtK.st.s.ph+.....phWDhshhhGShlsshhhGlslGs.llpG...lshs.............................................ttas....Gs..............h.hsh...lsP.........FullsGlsslshhshhGusaLhhKTcG....p.lpp+Apphuthhshsh.hshhhhss.............................hhshh.tsthhtpah.shshhhhh.sslsllsslhshhhh...thp+............pth..uFhsoulsh.hhshhslshuhaP....hllPus...........sLTlasAuuuthoLphhhhsshlhlPllLsYssasYhlF+ ......................LphlWalllulllhsahlhDGFDhGlGhLh....h.....lu+..........................s-p..E...R...+...l...hl..N.....ol.....uPhWD.....GNpVWLlhuGGAlFAAFPhsYAs.hFSuhYlshhl.lLhuLhh.RsVuF-aRsK..h...ssc.a+.....phWDhuhh.lGShl.ss....hlhGl...AhGs.llpG....lshs............................h.....................thtasGs......................h.hpL..........LsP...............Fu.l..l.sGls...s...lshh...hhhG....usa..Lt..hKT..pG.....t.lp.pR..u....pphuphhullh....hlh.h.h.lsu.....................................hhssh.....sss.h.hp.p.a.........h..p.h.P......h.h.h.hh...ss.l..ul.ls..hlhs.hhhh....thp+.....................ssh......uFlhsslth.hhshhshuluhaP....hlhPus................sLTlas......AsSot.hTLplMhhss.ll.h.l.PllLsYshWsYahh........................................... 0 238 510 701 +1701 PF00283 Cytochrom_B559 cytochr_b559; Cytochrome b559, alpha (gene psbE) and beta (gene psbF)subunits Finn RD anon Prosite Family \N 29.50 29.50 30.70 30.30 29.40 29.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -6.92 0.72 -4.66 8 2001 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 993 42 112 639 147 28.50 55 47.05 CHANGED sscshoYPIFTVRWLAIHulAIPolFFlG ..........hphshs..IholRahslHulslPolFhhG... 0 28 71 99 +1702 PF00284 Cytochrom_B559a cytochr_b559a; Lumenal portion of Cytochrome b559, alpha (gene psbE) subunit Finn RD anon Prosite Family This family is the lumenal portion of cytochrome b559 alpha chain, matches to this family should be accompanied by a match to the Pfam:PF00283 family also. The Prosite pattern pattern matches the transmembrane region of the cytochrome b559 alpha and beta subunits. 25.00 25.00 28.20 27.70 22.80 22.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.93 0.72 -4.44 25 1065 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 972 19 62 465 127 39.70 82 49.73 CHANGED LAYDVFGTPRPNEYFTpsRQclPlloDRFsAhpQl-phsc ...........LAYDVFGSPRPNEYF.TESRQGIPLITGRFDuLEQLDEFS.p... 0 14 39 56 +1703 PF02335 Cytochrom_C552 cytochr_c552; Cytochrome c552 Mian N, Bateman A anon Pfam-B_19175 (release 5.2) Family Cytochrome c552 (cytochrome c nitrite reductase) is a crucial enzyme in the nitrogen cycle catalysing the reduction of nitrite to ammonia. The crystal structure of cytochrome c552 reveals it to be a dimer, with with 10 close-packed type c haem groups. 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.24 0.70 -12.83 0.70 -5.79 38 1136 2012-10-01 23:37:15 2003-04-07 12:59:11 10 12 966 82 171 735 44 413.10 48 88.71 CHANGED s.tpau........+taPpQYcoW+cTsE..ss.........s................htDtLcccPtLslLWuGYuFu+DYscPRGHhYslpD.hpohRssss...........stsuuChoCKoPssPcLhcchG-........sYFsspascstscIs.......ps.............luCsDCH-sp.shp.......L+loRP.tlt+AlcslGh.......................c.hc...to+p-hRohVCAQCHVEYYFptc.............................................sptVpFPWspGh......olEsh.pYYDphs..........FtDWTHulSts.MLKAQHP-aEhas.pGlHupsGVoCsDCHMPhsp.tsupKhoDH+.lt.SPL.sshp...coCtsCHp.poc-tL+spVtshps+shphth+AtptlspAth-h................ctAh.cAGAspcchc.....puhphhR+AQWRhDashAppusuFHAPpEulRlLusul-cAscAcspLtplLAptGlps........PshsshphAphshshshc+h.hct..pp.lcs..hsp................................t+ .........................................................ts..Epau.paPcQYtSWpsTu-..ps...........................................thctLt..ccPpLlILWAG.YsFS+DYNcPRGHhaAlsDlccTLRTGu.Phss.pcG...................................s.P.hACWoCKSPDVs.RL....I.pc.G-................................cuY.ap..u.KWuchG..sE.Is........Ns.......................................lGCADCHsss.o..c.......LploRP.hshcAhculG+............................s..hc....cAs..R.-.pohVCuQCHVEYYFcuc...........................................................sKsV.pF.PWD..cGh.......................cVEshE...p.YYDclu..................FuDWTp.....sLSKsPML.KAQHP..EYEh.W...o....sG.......l..H..G..c....N....s...VoClDCHMPpVp....tpG.K....hY......TDH.+..Is......sP..h....cs...hs......pTC..t..sCH......s....Q..s..c.st...Lp..p.hVt-+ppplp-hph+s...EcpLV+AHhEA..................................................+tAh.....D.A.G.....A.T.....-.tEMK......slhptIR+AQWRWDhuhASHGht.....hHAPcEuhRhLusAhDcAtcARsKLscl..LApp.Glsc....tl.h..P...D.I.STtpKApthlGl.sh.pp.hpAtK..ppFlcsl.lPp.W.cpAp..ss.............................................. 0 57 111 146 +1704 PF03188 Cytochrom_B561 Cytochrome_B561; Eukaryotic cytochrome b561 Mifsud W anon Pfam-B_2927 (release 6.5) & Pfam-B_7165 (Release 8.0) Family Cytochrome b561 is a secretory vesicle-specific electron transport protein. It is an integral membrane protein, that binds two heme groups non-covalently. This is a eukaryotic family. Members of the 'prokaryotic cytochrome b561' family can be found in Pfam: PF01292. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.96 0.71 -4.34 48 1285 2012-10-03 10:28:09 2003-04-07 12:59:11 11 38 290 0 837 1341 24 131.30 21 41.09 CHANGED hHPllMslGalhLhG..........EAlLsa+.s...hh.hs+pspKhlHhsLphlAhhhullGlhuhap.+stp..........t...h.sphhSLHSWlGlsslhLaslQhlsGhssahh....PthstthRshhhPh..HshhGlssalLulsoshhGh.pphhFt ...................................................Hshhhhhuh.h.hhh..s...............................u.l.lh...h+.....h......................h..................s....c.........h....hh......h.......lH..hhlp.h..l.uh..l..l.ul..s..G..hsh.s.h...t.h.c.s.t.............t.....h...sp..h.h..........o....h......HuhlGlssh.h.L.h.h.l....Qhl......h.............G.hh...h..h....hh.................P...t...h...s...t...p.......h.....R...t..h..h..h..h..........HhhhGh..shhl.luls.shhhGh.......t............................................... 0 230 447 659 +1705 PF00032 Cytochrom_B_C cytochrome_b_C; Cytochrome b(C-terminal)/b6/petD Sonnhammer ELL anon Prosite Domain \N 20.70 18.90 20.70 19.30 20.60 18.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.54 0.72 -3.79 114 70433 2009-09-12 14:19:01 2003-04-07 12:59:11 12 20 28713 95 599 68332 2102 89.80 73 27.89 CHANGED .ANPhhTPsHItPEWYFL.hYAILRS..................IP.....sKLsGVluhhhSlllLhhlPh....hp....puphpohp.ap.shh.phhaah.....FlssallLsalGupPs-......Pah..h.luplhol.h...YF ........................PANPL.s.TP.PHI.KPEWYFLFAYAILRS...............................................IP...............NK...L...G.G.VLAL....l....hS.I...L...l.Lh.llPh...LH.............T.S.K...Q.R.....u.h.....t.....F...R....Plo.....QhL.....FWh.....Ll.Ash.hlLT.WlGu..PV.Ep...........Pa.l.hIGQlASh.h...YF............................................ 0 158 359 481 +1706 PF00033 Cytochrom_B_N cytochrome_b_N; Cytochrome b(N-terminal)/b6/petB Sonnhammer ELL anon Prosite Domain \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.39 0.71 -4.94 92 8451 2012-10-03 10:28:09 2003-04-07 12:59:11 14 32 3033 2 1429 91159 3309 164.10 19 83.40 CHANGED httashhh+...........lh.HW..hhulhhlhhh.h...sGhhhh................................................................hhh..hshtthhhtlHhhhGhhhhhlhlh+lhhthhp........................................h.t.t.ssttttshhtphshhhlhhh.hhhhsloGhhh....................................h..hh..stshhphh.....thlH.thhshllh.shlhlHlhhshhtphh....shlptMhsG .................................................hash..phhHW......lhul.shl.h.hh...h......oGhhhh....................................................................ths.t...h.s.h..h.h..h...h.p.......l..HhssG.hh....h....h...l.s..hh..h+l.hhthht..................................................................................................................................................h..h......h.p.h.ht..p...h.h.a.h.h.hhhh..hhhh...l.o.Ghhh................................................h...........h...h..................h..h.....thh.............t.h.H...h.h.s.h..h.hh.hhl.hhHhh..h.Al.hhph.........sslttMh.......................................................................................... 0 342 765 1112 +1707 PF00034 Cytochrom_C cytochrome_c; Cytochrome c Bateman A, Sonnhammer ELL anon Prosite Domain The Pfam entry does not include all Prosite members. The cytochrome 556 and cytochrome c' families are not included. All these are now in a new clan together. The C-terminus of DUF989, Pfam:PF06181, has now been merged into this family. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.47 0.72 -3.15 237 12531 2012-10-03 10:02:11 2003-04-07 12:59:11 16 182 3088 283 4329 18590 7463 94.00 18 36.97 CHANGED hspG...pplh.p.t.......pCssCH.utssps...........................................Pslsshstthhhpthtthhttt..h...........thh.tth...shh....................................loc.p-htslssal.php .........................................pG.ttlh..t..t...............sCsu.CH...us...s.s..t.u.st.h................................................................................hPsL..t...s..h...s.....s....p.....h......h...h.....t.....t......h......t..t.........h...h..s...s...s.....t..t.......h..........................................thh.....t..t.....h..............................................................................loc.p-..hpsls....uYlts..t........................................................................................................ 0 1149 2550 3504 +1708 PF02167 Cytochrom_C1 Cytochrome_C1; Cytochrome C1 family Mian N, Bateman A anon IPR002326 Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.64 0.70 -4.69 124 1691 2012-10-03 10:02:11 2003-04-07 12:59:11 10 8 1421 83 579 1376 2156 190.90 30 70.78 CHANGED a.....sasG.hspaDpsuLQRGhpVYppVCuuCHShphltaRpLsc.............luho.......-sp..........l+shAtp...p.h.s....sGp........Rsup.uDhhssshsscpAtthhssGAhPPDLSLlu+ARt....................sGs-YlaoLLpGY........hc.s........................Ps......................tsstahNshF.......................................su..luMs.sL.h-t....l.....................................................pasD.Go.........ssoh-QhucDVssFLhWsAEPphppRKphGhhVllFL.slhsslhYhhp+ph...Wpsl ...........................................shhDps.ulpRGhplahph.C.uCHSh.phhtapplhs.............lshs..tpp..................................h+t.hh................................pup...........phuDhh.sshssp..pAsphh..GAhPPDLSlls+uR..........................GsDalashL..pua.........p.s......................................ss................t.hh..h.N.hF.......................................ss.....luMstsL...p.....s..........................................................phts..Gs.............stphsph..scD.lssFLtasuEP...tt.tpR.+phGhhlhhaL..s.l.hhh..ls..hhh..K+phWpp........................................... 2 173 330 465 +1709 PF01322 Cytochrom_C_2 Cytochrome_C_2; Cytochrome C' Finn RD, Bateman A anon Sarah Teichmann Domain \N 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.82 0.71 -3.57 95 629 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 492 53 234 648 813 114.10 23 73.21 CHANGED pc...s.lchRQuhh.ph.hutshu.sluuM.s+GchsaDsststtsAsslssluphshst..F.stGocsus.....TcApPpIW.pchscFppphsphppsuspLsssA.tsG..Dh.sul+sAhsslGts.CKuCH-saR ............................................................................t...lp.Rpshhp..ht.ph.t.thssh.hc.G..p..sa-stt..htttut..tls..th.u.ph........ph..F...tsspts...........scAhstlW..p..c..hscFpt.ttpphpsssspLss.uA....psu...ch...s....slpsuhsp.huts.CcuCHcsaR............... 0 54 144 186 +1710 PF02085 Cytochrom_CIII Cytochrome_CIII; Class III cytochrome C family Mian N, Bateman A anon IPR002322 Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.92 0.72 -11.89 0.72 -3.92 24 410 2012-10-01 23:37:15 2003-04-07 12:59:11 11 26 147 90 206 698 264 94.70 21 46.57 CHANGED P.....hshcsPssst.........pptsVsFsHpsHt.ps.....sCpsCHHt........tsttphssCss..CHss.t........scps.tsh...hpAhHspps..........oClGCHpphttpt.t.............ps.suC.stCH ................................hht........................tsl.FsH..t..hHs.ph..t..........sCpsCH..+s.......................sstssh.....t..s.....Css........CHsth...........................tptt..h.th.........ht..u..h..Ht..............................pCh.s.CHtp.t................................C....CH................................ 0 92 172 198 +1711 PF03264 Cytochrom_NNT Cytochrome_NNT; NapC/NirT cytochrome c family, N-terminal region Mifsud W anon Pfam-B_1404 (release 6.5) Family Within the NapC/NirT family of cytochrome c proteins, some members, such as NapC Swiss:P33932 and NirT Swiss:P24038, bind four haem groups, while others, such as TorC Swiss:P33226, bind five haems. This family aligns the common N-terminal region that contains four haem-binding C-X(2)-CH motifs. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.29 0.71 -12.03 0.71 -4.83 111 2644 2012-10-01 23:37:15 2003-04-07 12:59:11 9 24 1304 8 392 1397 89 166.30 41 59.71 CHANGED ps...stphslshll...llGhlsGllhauuhpsshchTsopcFC.lSCHp.Mpss.apEappolHapNpsG.VRAs.CsDCHVP+..chh.sKhhpKhp.Au+-latch.hGplsT.EKF-.t+RhphApctWtch+sssStpCRsCHshp.tMch..spQ....ptpApptHpps..................h....tpsp..TCIDCHKG.....I.AH.....phP.ch ................................h...+htlhsll..llG..h.l..sG..l..h..h..h.ssh..p....su.hc.h.T.....sopc.FC.lS.CH...p..Mps.....s.....YpEY.p.......p.......o.l.....Ha......p...........N.t.......pG....lRAp...CsDCH..l.........P..+............-hs...shhh.pKlp....ASK-lYtph..hs.pIDT........P-K..FE......s.+Rhp.....hAcp.pW....p.chK....sN.sS...t...pCR..s..CHsa-....tMDh...spQ............pspA....s.+..........aptA.......................................h...........ccsp.....oCID..CH.KG..IAHpLP-......................................... 0 120 230 327 +1712 PF01801 Cytomega_gL Cytomegalo_gL; Cytomegalovirus glycoprotein L Bashton M, Bateman A anon Pfam-B_1420 (release 4.2) Family Glycoprotein L from cytomegalovirus serves a chaperone for the correct folding and surface expression of glycoprotein H (gH) [2]. Glycoprotein L is a member of the heterotrimeric gCIII complex of glycoprotein which also includes gH and gO and has an essential role in viral fusion [1]. 25.00 25.00 144.60 144.50 20.70 19.80 hmmbuild --amino -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.38 0.70 -4.85 7 80 2009-09-12 02:32:10 2003-04-07 12:59:11 11 1 37 0 0 58 0 204.20 59 82.04 CHANGED hpscChcshhpChpu......p..ShhpPlhs....cusl......hSpLIRapp...........ptphhss..lhls--FL-plhLLaNNssQLRsLLTLl+.S-sussWhsahpGYspC.tsssslaTCV--lCppYsLp+LpYspslFsEsVlGFElssP........shulLlhlcNptT+ss+lVRlssso......loLFDulYNhl+pFhhch.ulshsLlpcLcpYpspLPs.a+puc..slhtRsh .ss.sECsELTRRCLhGEla.pusch-SWL+PLVN.VTsR....DGPL.......SQLIRYRPl..........TsE.AAso..VLLDDuFLDTLALLYNNPDQLRALLTLLu.SDTAPRWMTlMRGYSECGDGSPAVYTCVDDLCRGYDLTRLoYspslFTEpVLGh-lsPP........hhsVlVhlRNptT+sp+sVRlPs..oo......loLF.ulYNhl+phhh+h.pLDssLlppLcpYhstlPs.h+poc..pl.tpp.h........ 0 0 0 0 +1713 PF02239 Cytochrom_D1 D1_heme; Cytochrome D1 heme domain Bateman A, Mian N anon Pfam-B_3322 (release 5.2) Domain Cytochrome cd1 (nitrite reductase) catalyses the conversion of nitrite to nitric oxide in the nitrogen cycle. This family represents the d1 heme binding domain of cytochrome cd1, in which His/Tyr side chains ligate the d1 heme iron of the active site in the oxidised state [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 369 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.20 0.70 -6.16 8 7650 2012-10-05 17:30:42 2003-04-07 12:59:11 11 55 672 42 338 8166 222 221.80 52 91.82 CHANGED phshtNLFsVs.RssGplsllDGDspc.luhl-otYAl.HhShh.us.GRYlYVhGRDGtlohlDLaspc..hVAEV+hGhsuRulslS.....-s+allsGsYhPsphslhDucTLE.lpllsTpGhssss....spsRVuAllsu.h+spFVVslK-TGclhllsYosh.cslpTppIsuA+aLaDuuassstRYalsuh.ups...KlslhcsccGthssLlDs.G+ss+sh.sushsH.phG.hWssuphtshshshIGpcshs....hhctpsWKhlppIsstGsG.lFl+THPcSpalWVDshhsP-...scuVtVhDpcsL.........lshsltshsGhs.......VlpsEFs+cGDpVWlSVWsucs...ALVVaDs+TLcLh+sls...hhoPoGKFNVaNs .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................AuIlu..S....pP-.alV.Nl.K...E..T.G.pI.L.L..V...s........YpD............l.....c......N....................L....p....................s......T...............s..I............s....A..A...+......F.L..H.......D..G...Ga.....D....u....o.....+....R.....Y...F...Ls.A.....A.....N.t.Ss..............K.lA..VVD..o.K.....-..p.......K...L..s.......A.....L....l....Ds...sc..h.P.H..P.G.....R...G.AN.a.s....cPc.a.......GP.V..W.....sT..u.....t.........L..G..s-..s....l.o...h...IG...T....D..Ptp..........H..p.p...A...WKVVp..slc..G..p...GG..G..SLFl.KT.HPp.Sp....pla..s..Ds.s..hs.P-.......thup.o.l.s.Vach..tsh......................h.h.....l...t......st.......................................................hhp...cas.tG.p....hhh.................................................................................................................................................................................................................................... 0 91 229 292 +1714 PF02109 DAD DAD family Bateman A anon Bateman A Family Members of this family are thought to be integral membrane proteins. Some members of this family have been shown to cause apoptosis if mutated [1], these proteins are known as DAD for defender against death. The family also includes the epsilon subunit of the oligosaccharyltransferase that is involved in N-linked glycosylation [2]. 20.20 20.20 20.30 21.10 20.10 20.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.48 0.71 -4.02 30 370 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 306 0 230 340 0 104.90 45 83.17 CHANGED sus....h.s.llsphhppYhspTP..p+LKllDAalhalllsGllQFhYCsLl.GsFPFNSFLSGFIosVGpFVLslsLRlQlN.......tNcspF.hulSs.ERAFADFlhAsllLHhlslNFIG ....................ss...h.sllpphhppYhssTP..p+LKllDsalhalllsGhlQ...FlYshLl.GsFPFNuFLSGFlSsVGpFVLslsLRlQlN......................pNttpF..tslSP.ERAFADFlhusllLHhllhNFls....................................... 3 77 127 189 +1715 PF00130 C1_1 DAG_PE-bind; C1; Phorbol esters/diacylglycerol binding domain (C1 domain) Bateman A anon Prosite Domain This domain is also known as the Protein kinase C conserved region 1 (C1) domain. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.82 0.72 -4.23 46 6123 2012-10-02 13:15:50 2003-04-07 12:59:11 17 344 330 24 3317 5447 1 52.60 31 8.26 CHANGED HpFttp......sa.tpssaCstCsphl...........h.htppGhpCp....hCphpsHc+..CtptVsstCsss ...................HpFhhp............sa..pp..Ps....hCshCpphl.........................hG..hhp.QGh.pCp..................tCp.h.s...sHc+..........Ctp.plsspC...t.................. 0 793 1126 2107 +1716 PF03982 DAGAT Diacylglycerol acyltransferase Finn RD, Wood V anon Pfam-B_11378 (release 7.2) Family The terminal step of triacylglycerol (TAG) formation is catalysed by the enzyme diacylglycerol acyltransferase (DAGAT) [1,2]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.82 0.70 -5.72 6 1100 2012-10-02 00:16:30 2003-04-07 12:59:11 8 11 361 0 723 1216 48 230.60 28 64.76 CHANGED ahLFhTspWhllsLYulWhhYDhsoP++GuYpssWsRphtla+aFAsYFPlpL.hKTu-.Lsss+NYlhGYHPHGIlulGAassFuTsuTGlhcpFPGIR.plsTLs..GpFhhPaRREhhlhhGhI-sSRESI-alLs+stp.G+AlVlVlGGApEAL-AHPGpcsLTLtsRKGFV+hALcpGApLVPsYuFGENDlYcQh-NPcGSpLRphQchhK+hhGhoPPlFaGRGlFs.hshGLLPaRKPlsTVVGtPIpVsKs.cPTpEpIDclHuhYh-tLt-LFEEHKsKaGls.sspLllp ..............................................................................................................................h.............................................................................................h......p...................h.hph..h.h.t..Y..F..s.h.pl....h....p......s.....t....h.........s.p..p....p..........Ylh.uh.aPH.Gl.hs..h..u.h.....hhshs.......p.......................t....................h............t..............h....hs.t.h.t..........l.....h..sls.....F...h.....h.P.h...h.R-hh.h......h.....G.hs.ss.scpshth...h...L...........s..........p...........t...........t...........t...............lhll.sGGst..E.u.L.....p.s......p..s....s..p............p..l....hL.p.p.R.+GFl+l.AlppG........s......s.......LVPshsFG..E.......s.......-.......l.......ap...h................t........h....h..............h...t.......h.h....t....p.h........h.t.....h...s.....s..h.h.......h..u.....h........h.........................h............P.h..p...............t.s....l..h........V...l..G..p....Pl..............l..............p................p.P...s........p.......lpt.h.+.t.....ah.t.tL.plapph+..h.s...............h.................................................................................................. 0 239 386 554 +1717 PF01219 DAGK_prokar Prokaryotic diacylglycerol kinase Finn RD, Bateman A anon Prosite Family \N 21.90 21.90 22.40 25.20 21.50 21.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.08 0.72 -4.44 186 3222 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 3038 3 605 1802 403 102.60 36 78.24 CHANGED htYuhpGl.ttshppEtuFR.clhhullllsluhhl......slothEhhlllhslhlVLhsEllNoAlEslVDhlu..-aH.LuttAKDhuuuAVhluhlhusllhhhllh .............h.tauhpGl.tsAhp.pEtsFRpchhhsllslshu.hh.l......slstlEhllLlhulhLVlhlEllNoAIEslVD.hlu....s-aH..Lu+pAKDhuuuAVLlshlhA..s..lshhllhh....................................... 0 181 367 508 +1718 PF00609 DAGK_acc DAGKa; Diacylglycerol kinase accessory domain Ponting C, Schultz J, Bork P anon SMART Family Diacylglycerol (DAG) is a second messenger that acts as a protein kinase C activator. This domain is assumed to be an accessory domain: its function is unknown. 21.30 21.30 21.40 21.90 21.20 21.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.18 0.71 -4.09 42 1202 2009-01-15 18:05:59 2003-04-07 12:59:11 14 102 195 0 702 1147 5 153.50 36 20.57 CHANGED sMsNYFSIGhDAcluhsFHptRppp..PthapuphtNKh.hYst.hGspc.hh.ppshtsh..cplplc..................s-sppl....plP..ssculllLNIsSauuG.sshWssspppt...................at.tshsDGhLEllulpushclutlpssltps.hRluQu.spl+lphpp............phshQlDGEP ..................................................................................................hNNYFulGh................DAp..luhcFHptR...E.cp......PpK.....Fs...S..Rh.pNK..............h......hYst.hGspc......hh..ttot.+s...L..ppplp..lp................................................................sDGptl....pls...slpu...Is.hLNIPS..asGG.sshW..Gsspcpp...................................................ats.phsD.thLEVVGlp..ss.....hphu.t..lp.......s...s.h......t....ps...hRl....AQs.pplplphpp................slPhQ..lDGEP..................................................... 0 211 323 501 +1719 PF00781 DAGK_cat DAGKc; Diacylglycerol kinase catalytic domain SMART, Coggill PC anon Alignment kindly provided by SMART Family Diacylglycerol (DAG) is a second messenger that acts as a protein kinase C activator. The catalytic domain is assumed from the finding of bacterial homologues. YegS is the Escherichia coli protein in this family whose crystal structure reveals an active site in the inter-domain cleft formed by four conserved sequence motifs, revealing a novel metal-binding site. The residues of this site are conserved across the family [5]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.93 0.71 -4.71 125 7254 2012-10-02 15:20:27 2003-04-07 12:59:11 19 132 3378 25 2390 5666 830 128.70 23 30.86 CHANGED phhlllNPpu..Gpppsp.......plhtt.h..ppt..h..p......h..pl.hps..p..p..tsss..pt......hth.hts.hsph...............tllss.GGDGTlspVlsul...........h...p......p...sh...................s...lullPsGTuNshA+slth.stshtt............................................tthhthht.pht..sp...ssthsph ............................................................................................hhllhN.P..p.S..Gp.ppst......h.......tplht.h..h...ppt...t....hp....................h...pl...hho.......pp.......tssu...pp....................hst...hts..hssh...................................shllss.GGD....GT....lsc......V.lsul.................................................h...p......p.sh.................................s..lull...P..h..G.T.u.N.DhA+.sL.s.l..s.ts.tt............................................th..hhht.th....hsh......t.......................................................................... 0 772 1353 1927 +1720 PF00793 DAHP_synth_1 DAHP_synthetase; DAHP synthetase I family Bateman A, Griffiths-Jones SR anon Pfam-B_1032 (release 2.1) Domain Members of this family catalyse the first step in aromatic amino acid biosynthesis from chorismate. E-coli has three related synthetases, which are inhibited by different aromatic amino acids. This family also includes KDSA which has very similar catalytic activity but is involved in the first step of liposaccharide biosynthesis. The enzyme is also part of the shikimate pathway, EC:2.5.1.54. 19.80 19.80 20.50 20.10 19.30 19.60 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.66 0.70 -5.71 28 8958 2012-10-03 05:58:16 2003-04-07 12:59:11 15 18 4112 276 1994 5818 4302 285.30 28 87.41 CHANGED chsls.t.............slhhGpspphlllsGPCSlEs.-tsh-hAp+Lpplus+ht..lhllh+uah-KP.Rooshua+Ghh.-stLphhtclpcsh..............GlslsTEhlDstssphlA-hsslhpIGARsscspshlctAuthspPVslK+Gps....sulpthtsAuchhhhhGs..........ssslhhC-RGlRsGc.tsNppshDlpulshhpcts.hh.......PlhlDsSHusup+s...........hh.p.hVsshspuulAsG...hsGlMlEsH.sPupAhsD.......usptLsht...shtphhh-hsphlhp ...............................................................................................tthhpht...................plhhGp...D...p..chlllsGPCSlc..s.-tsh-hAc+.Lt.s.l...s..p..c..h..t....l.lVh+saa-KP..posh.ua+GL....ht......-......st.......l..c.hhhclpcsh.................................GlP.s.s.T.Ehh-s.ss....p.h.l.u.D.l.l........s........hhtIGARs.scs....pshh....c....hA.....u......t....h.......u..t......P....VshKpGps..............luhsshtsAs..p...h..th.h.G........................spss...hschhl...R...s......G......p..t......sN....h.t.s.hD..l..tsss.....h.h.hpts....tl................sl.llD...soHusup+s...........htp..p....hVs.p.............s......p..s......t...lusG.................lsGlMlEo..H.s..s...uptp.s.-........us..p..tlsh...u.p.hs.hshhhp.sp.h.t.................................................... 0 632 1204 1652 +1721 PF01474 DAHP_synth_2 Class-II DAHP synthetase family Bateman A anon Prodom_1974 (release 99.1) Family Members of this family are aldolase enzymes that catalyse the first step of the shikimate pathway. 19.40 19.40 22.40 19.50 17.40 19.20 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.44 0.70 -6.12 111 1570 2012-10-03 05:58:16 2003-04-07 12:59:11 11 4 1229 18 510 1307 2300 383.10 49 93.61 CHANGED pWsssS...WRspPhtQpP.sYPDtstLpplpppLpphPPLV.FAuEscpL+ppLApVApGcAFLLQGGDCAEoFs-hsuspIRcph+llLQMAlVLTauuuhPVVKVGRhAGQaAKPRSushEsh.....-G....lsLPSYRGDhlNuh-FotpuRhPDPpRhlpAYppSAuTLNLlRAaspGGhAc.Lc.plcp.......WshsFl.tpushup+Yppl..........AscIscuLpFMp....AsGhs.t......pl..............................ppschaTSHEALLLsYEpALTR......................h.............D..opo.Gp.........................a..YssSAHhLWIG-RTRQlDuAHVEFhRGlpNPIGlKlGPohss--llcLl-tLNPc....sEsGRLTLIsRhGAc+lpctLPtLlcsVcppG+p..VlWsCDPMHGNThpus..sGaKTRpF-cIlsEVpsFFclHcup....GoasGGlHlEhTGp......sVTEClGG.upsls-.pDLssRYcTtCDPRLNupQuLELAFLlA ..........................................puWcsh.s.h.hQtP.pYsDt..t..tltpV.tpLpphPPLVh.AuEscp....LcppLApVupGcAFLLQGGDCAEoFsphss....spI+s.ph....+...llLQMA...lVLTauuph.PVVK.lGRh...AGQaAKPRSsshEth..........s.GlpLPSYRGDhlN.Ghphstpu.RhPDPpRhlpAYtpSuuTLNLlRAhspGGhAsLc..plcp........Ws..hsFl..tp..o.......t.t+Yptl..........ApcIscuLt.FMp....AsGls..........t.tL.........................................pps-haT.S....HEA....LLLsYEpAhTR.................................................................h.................D.....sts...sp.................................h..YssSuHhlWIG-.R....TRQlDsAHV-FhpsltNPlGlKlGPshss--llpLh-h.L..sPp.............s.-sGRLTlIsRMGuc+ltstLPtllcsVptp.G..p..p..VlW.sDPMHGNThpus..........sG.aKTRtF-pl..hsEVpuFFclHpu.....GoasGGlHlEhTGc......sVTEChGG......u......pslop....psLssRYcTtCDPRLNspQuLELAFllA..................... 0 158 324 433 +1722 PF02733 Dak1 Dak1 domain LOAD anon LOAD Family This is the kinase domain of the dihydroxyacetone kinase family EC:2.7.1.29. 25.00 25.00 26.10 25.40 23.90 23.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.91 0.70 -5.87 9 2939 2012-10-02 12:41:15 2003-04-07 12:59:11 12 12 2046 34 705 2165 413 309.40 40 77.08 CHANGED LtGLspuNPs.Lsllpcs+Vlhpsss.....s.sKVullSGGGSGHEPsHAGFVG.cGhLsuAssGpIFASPSoKQIaoul+AV.puspGsLlIVKNYTGDILHFGLAAE+A+AtGhpsEllsVuDDVSVG+p+suhVGRRGLAGTlLVHKIsGAAAtpG...LsLcpVsplAcplssNhsTIGuSLsHCTlPGpchpsp............LscsEhElGMGIHNEPGht+tuPlPolD-LVuc.hLchLLspssc-Rsa.................VphsssD-VVLLlNNLGGsS.hELhsltppls-pL.pcasIpPsRshsGsasTSLsGsGFSITLlpsocsstchhcthscssshsssh ...............................tGhshu.asp...l..p.h.h....t..s..s..p...l.l.h.+tch.......tpsKVullSGG....GSGHEPsHuGaVG.cGMLsAAssGplFoSPos-plh.p.A.h.....+.................s.s.......c.......s...............G..t...........G...V.LhllK........NYoGDlhNFchA.....s.Eh..A.c.t.......-....G..............l....c.VcpVll..sDDlAlp..s..s..h...............hs...ssRRGVAGTlhl......aKlsGAA.A-pG........tsL-p.lt.p...luc........+ls..s.....p.....s+..olGlALssCTlPusG.+ss..F.p...........L.s...-.sE..hEhGlGIHGEPGhc+pph..ps...ucp.lspp.hhs.......pllpp.....hs........................................h..t..t..u.......c.cVhlLVNGlGuTPl..ELallhscl.t.ph.L..p.p..pGlp.lt+sh.l.G..s..ahTSLDMsGhSlTLl..c.l..D.-..-.h.h.t.hh.ps.st.......t....................... 0 221 420 588 +1723 PF02734 Dak2 DAK2 domain LOAD anon LOAD Family This domain is the predicted phosphatase domain of the dihydroxyacetone kinase family. 23.70 23.70 23.80 23.70 23.60 23.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.98 0.71 -4.60 175 4573 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 2828 8 1004 3192 390 167.00 29 41.01 CHANGED slGDGDpGtsMspGhpushctlpsh.......ttsshuplhpslupshhpssGGsSGslauphhtuhupsl.....ps.ppp........h..ssps.hupulpsuh.pulpph.u......tAcsG-+ThlDsltPsscshpp.sht.......tsh.phhptuspuA.cpuscsTtshh.sphGRAthluppsh.G......hsDsGAhuhshllcuhtp ...........................................luDGDpGsNMstuhpustcplpst.....................spslu.....plhpshutslltsstGsSGslh..........uphFtuhupsl..............ps..ppp.................l...sspp.lup.u..hpsul.suhpuh.........scss-tThlsVhtss....scuhpp..utp................tts..h.t.............h....h...ptshcsAcpuh.ppT..shl..Ath..GRA..........G.......hlDsGupuhhhlhpuhh.p..................................................................... 1 338 633 857 +1724 PF03045 DAN DAN domain Bateman A anon Pfam-B_1968 (release 6.4) Domain This domain contains 9 conserved cysteines and is extracellular. Therefore the cysteines may form disulphide bridges. This family of proteins has been termed the DAN family [1] after the first member to be reported. This family includes DAN, Cerberus and Gremlin. The gremlin protein is an antagonist of bone morphogenetic protein signaling. It is postulated that all members of this family antagonise different TGF beta Pfam:PF00019 ligands [1]. Recent work shows that the DAN protein is not an efficient antagonist of BMP-2/4 class signals, we found that DAN was able to interact with GDF-5 in a frog embryo assay, suggesting that DAN may regulate signaling by the GDF-5/6/7 class of BMPs in vivo [2]. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.34 0.71 -4.70 20 362 2012-10-02 16:54:34 2003-04-07 12:59:11 10 7 107 0 209 428 0 113.40 30 56.02 CHANGED phhpctlhctsttu.....hshppp.h+pshC+shPhpQpl.sc-GCcshsltNphCaGQCsSaalPpp...t...........hpsCupChPs+hpthplsLpCsups.s......hKcVhhVccCpCpssppc ...............................h.....................h.thh.h+pshC+spPlp.Qsl...pc..p...G...Cpu.p.s.l.N+hChGpCs..Sa....h..l...Ppp......ptts...............................hppCsp.CpPtchphhhVsLp.CPshps.s.p...........hK.pl....tpl..c..pCpC.sh..t........................ 0 42 63 121 +1725 PF01266 DAO FAD dependent oxidoreductase Finn RD, Bateman A anon Prosite Domain This family includes various FAD dependent oxidoreductases: Glycerol-3-phosphate dehydrogenase EC:1.1.99.5, Sarcosine oxidase beta subunit EC:1.5.3.1, D-alanine oxidase EC:1.4.99.1, D-aspartate oxidase EC:1.4.3.1. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.02 0.70 -5.55 530 27418 2012-10-10 17:06:42 2003-04-07 12:59:11 19 195 4663 179 8594 47340 28561 332.00 16 72.99 CHANGED -llllG.uGlsGhusAhpLuc.........pGh...pVsllEpp.phut..................................usS...s.ts..uGh..lpsth.....................................................pts..hh.hp.hs.hpuhph....hpp..............................................................................................h.....hpph...................shs....h.....t.h.ptGtlhls.h.......sppp..hp..th.............tt.hp..hhpp.tsh......thc.....hlstp.phtph............................s.hs.....s.........th........hu...............u........lh.hsssu.hl..cstphstult...p....................t..s.t..p.t......G..sp......lh...ppsp.....V............psl......pt..psst.............................................ht.lp...............................s.....t.pu........p.hps.............ctllhAs........Gs....as....sp.l.ht.h.sh............................................l...........hshcu....h..lhhp.......s................htt...h.....................h......h.........................................................t.hhh...........hh............................h.h.............h..........................................................s.tp..sp.......h...............hs............sshsth.............................................t................................................................t.......................tl.......................................................................h..ptht.......phh...Ptl.............................................tpspl.htt......a.sG.........................................................................................hc....sh..s.......................................................................sDth.............................................................hhtt....ht.slhhs..tGhss................Ghth....usssGc.hluph .....................................................................................................................................................................................................cllllG....uGlsG..h..u..sAhtLsp..........................tGh......pV.s..l..l-..t...p...t...h.ut.................................................................s..s....o.........s...t...s.........s.u..h..l.tst.......................................................................................................................................ttt.....h....h...h..p.....h......s.........t.....u...h....p..h......h.t.t........................................................................................................................................................................................................................................................................................h................ht.p.h......................................s...h..t.........h..............t........h.....................u.....h......l....h..l....t...h................p.t..t.p.........ht.....h........................................t....h.......t.........h.....t.t.......hsh..................th.p.............................h..l...s.....t...t......p.....h....t..t..h........................................................................s.ht......t.....................h..................h.s..............................u..............h.h.........p..s...s......h.....p.......s........t.....p........h.....s...h.....s....h.h....p...........................................................................t...s..t..p..t......................G....sp....................................l..h....t.t..s...p........V................p.s..l..........tt....p...s..st.............................................hh....l..p.........................................................................................................................s......t.p.s..........................p..lpA..........................ct.l..l...........A..s...................G....s............h..s..........tp...l...h.t...h...t.h...............................................................l...........hs.....h....c..u.........h...h....h.ht.............................................t.t...h...................................h...............................................................................................................t..........h................h..h.h..........................................h.h.........t....h..h..hu.......................................................t...tt......h...........h.s.............s.s.........................................................................t..................................................................................................................................t.th.......................................................................................................................................................................................................................................................h....p..t..h.......p.h...h........P..t.h.....................................................tt.h...p.h......tt........h..s.G..............................................................................................h.h...........sh....s.......................................................................s-th.....................................................s.h..s..t.............................h............s.....h........h...hs.......u..hs..t...........................G..h....t..h....u.hhup.hhs................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2308 4853 6950 +1726 PF01678 DAP_epimerase Diaminopimelate epimerase Bateman A, Griffiths-Jones SR anon Pfam-B_2089 (release 4.1) Domain Diaminopimelate epimerase contains two domains of the same alpha/beta fold, both contained in this family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.47 0.71 -4.20 28 7321 2012-10-03 03:02:41 2003-04-07 12:59:11 14 6 3504 42 1855 5624 4219 119.90 25 83.62 CHANGED hshspGsspshlhls-hcp...tshhp.......hsttlps+pthssusslhalp......sssctclphR..........lapssuuEsptCGsG.stshuthlhptth......tppslpVcs.uG.lhlplpsssp.....hh.hGssphl .......................................................................sh.hGsspshhh..l....D..s..h..sp...........ts.h.s...........................hu..t.l..p..s..+...p.....h......h.....s........s.t.s..s..lhhlp.........................hssc.s..c..h..chR................................lap.p.s.u.u..EsptCGsG...st...s.s..u...th...l..t...p...t...t..h..................tpp..p..l..p.Vps.....u..G.p....l....t..l..p.h..p..s..sst.............hh.hG.s........................................................ 0 612 1251 1586 +1727 PF05173 DapB_C Dihydrodipicolinate reductase, C-terminus Studholme, DJ anon Prosite Domain Dihydrodipicolinate reductase (DapB) reduces the alpha,beta-unsaturated cyclic imine, dihydro-dipicolinate. This reaction is the second committed step in the biosynthesis of L-lysine and its precursor meso-diaminopimelate, which are critical for both protein and cell wall biosynthesis. The C-terminal domain of DapB has been proposed to be the substrate- binding domain. 20.80 20.80 21.00 21.10 20.20 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.70 0.71 -4.38 189 4343 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 4160 36 1018 3039 2348 134.50 37 51.96 CHANGED GlslhhplscpAAchhss....aDlEIlEhHHppKhDAPSGTAlplu-slspst......................t....pt.thhs.p............sp.IslpulRsGsllGcHpVhFsutsEplplpHcAhsRp..sFAtGAltAAcal....h...s.p.....G...lYshpDlL ..........................................Glslhhclhc....pAA.+.h..hss.....hDlEIlEhHHccKlDAPSGTAlphuEsIAcsh................................spph....tt..thhsccth............ttts.ssIshpolR.u..G..slV.Gc..HpV.......hFus.tGE..p..l..p..IpHcAt.s.Rt..sFAsGAlpAApalt.........s....+..........sG......lYshpclL.................................. 0 327 676 865 +1728 PF03344 Daxx Daxx Family Finn RD anon Pfam-B_3933 (release 6.5) Family The Daxx protein (also known as the Fas-binding protein) is thought to play a role in apoptosis, but precise role played by Daxx remains to be determined. Daxx forms a complex with Axin. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 713 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.35 0.70 -6.04 7 169 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 84 2 75 167 0 470.80 32 71.69 CHANGED hAhsDsIIlLD.DDDtEEtsspPusS.sss..sts......ph..PKlpQPhsscIstusssou++csasLpsENpKLFpEFV-aCpspTpDpPEVlsFL+s+puKASPpFLuSVEF+NhLuRCLoRAQu+RoKsaVYINELCTVLKtHSsK++...lplpPsuups.....tp.t..PT-sh..s.......................Sup.pspc-p....tt-tppp+uSRRQItYLENLL+hYscEI+RLQEKELSLpEL--EDSoYIQEu+LKRK............................Lh+IapKLCELKGCSSLTGRVIEQRIsYpGTRYPElNRR....................IERFINtPEupps.PDYsDlLptlp+ANERHuLsLoRKQLpthAQDAFRElGsRLQERRHLDLVYNFGSHLTDsYKPusDPAL.DPoLAR+LRsNRplAlS+L-pVISKYA.hQD-TEEtERpK.RpE+ctpsspup.....tp.shtsssuspuP.hhtup-s.sppps-p-p-----.pp..p........psptt.--.-EE-pE.psssc.-p-h-sus-upusspEccpstp................................ttp..+..ohsspSPpspPtps.sh-.p..sEppcpphltp-pslSsHhphstluhs..............ppphsPsslshsLppsusshhopopsussoscsspppss.pt+pRpc+..........c.htut.hhsusl......h.csGpchp.LPhshsslsS.ushpspSopsDoPopt.sTso..........pTP..sPh+s...KlsVATQCDP-ElIVLSDS- .....................................................................................................................................................................................................................................s...altShtFpshlspshschptp.tp...haValpplss.Lpt...ct....++p..........p...h..sssptt............ss.....shp.............................................................................................t..pp.............ttpt.pppt.pc...c...pIthLpph..LthhstcIp+LpctElshsp.pc.D....SsYlp.t+hK++............................hhplat+lC-Lpspss.ssRhlcp..I..a..puTcYPclN+p.........................lpphlNp................ps.............hPDYtDlLphlp+........sstccsLsLsc..pphp.h..Ap..-AFpclG.hLQcRR+.DhhYshu..sHlTss.....sh..DPAh.Ds.Lhp+LcpNppht.s+lp-llpKYu.hQ-.p...ct...ptph.p..tc......tt...................pt..tt.......tt.......t....t...tt.t....t................................................tt...ptt........t..........t.....................t.................t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 23 28 50 +1729 PF02277 DBI_PRT Phosphoribosyltransferase Mian N, Bateman A, Moxon SJ anon Pfam-B_5739 (release 5.2) Domain This family of proteins represent the nicotinate-nucleotide- dimethylbenzimidazole phosphoribosyltransferase (NN:DBI PRT) enzymes involved in dimethylbenzimidazole synthesis. This function is essential to de novo cobalamin (vitamin B12) production in bacteria. Nicotinate mononucleotide (NaMN):5,6-dimethylbenzimidazole (DMB) phosphoribosyltransferase (CobT) from Salmonella enterica plays a central role in the synthesis of alpha-ribazole-5'-phosphate, an intermediate for the lower ligand of cobalamin [2]. 22.60 22.60 23.00 22.90 21.50 22.50 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -12.10 0.70 -5.34 26 2551 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 2248 35 647 2021 520 317.80 35 85.49 CHANGED llsplsssDstshstspp+.spLTKP.GuLG+LEpluhpluuh....pGps.P........sls+s.tlhVhAusHGVstpGVoshPtpVTsphlpNhht......GGAuIshlsppsGssl..c....VlDlulct................csoushsptsAM..occcsttulthGhchhtp.hstGs-LlshGE.hGI.GNTTsAAAlhuuLhGssspphVG.G.oGlsspsh..........p+KhslVccAlt.hppssh...sDPl.-lLsplGGh-lAAhsGhhLuAAhp+hPVllDGalssuAAllutp.lsPtshcahlhuHhSsE.GHthhLptLuhcPlLsL....shRLGEGoGAsLAhslVcuAsthhptMuTatpAsV ......................................................................................................................h....l.s.D.tt.hh.ttspp+......ctLsK.P.G....SLGpLEpluhp.lAuh......tup.t..s.....................pltc...tlllhuuDH.G..Vh.t......c.....G..Vos..h.P....ppV.T.t...t...hstN..h.h.t......Gsu..ulssluppsGs..p..l..p....VlDl.Glsss....................................................hph+ltpG.o.sshs....ptsAM....o..p..cp...s..ttulttGhphs.pp..h.h..........p.p.G.s.s.l.......lsh.GEhGIGNTTsA..uA.lhuslsG....h...............ss..p....p.....s....V.....G...h....G....s.Gls.s.st.l.............t+KhsllccAlp.....hp...p....s.ps.................pD....sl......-......lLu.plGGh-lu....uhsGshLuAA.p.p..p..hP...VllDG.Flss.uA..A.L..s.A..s..p....l....s....P............ss......psahlsuH.hS.sE......Gt....ph.hL..ptL...........u...........h............c...P...h...L.sh.....shRLGEGoGAs..LAh..sllcuAsthh.s.pMushttutl........................................................................ 0 206 432 555 +1730 PF03880 DbpA YxiN_DEAD; DbpA RNA binding domain Bateman A anon Kossen K Domain This RNA binding domain is found at the C-terminus of a number of DEAD helicase proteins [1]. It is sufficient to confer specificity for hairpin 92 of 23S rRNA, which is part of the ribosomal A-site. However, several members of this family lack specificity for 23S rRNA. These can proteins can generally be distinguished by a basic region that extends beyond this domain [Karl Kossen, unpublished data]. 23.10 23.10 23.20 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.86 0.72 -4.31 173 3567 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 2412 2 811 2655 411 73.50 30 13.56 CHANGED sphplslG+ccplcPtslluslsscu.slss.ppIGpIclh-paoaV-lsps.hscphhpplp..psplp.G+plplchh .........phpIssG+c.cp.lcstcIlGAlss-u..s...lsu...ppIGpIclhs..s..aohVplspu.hspcslpplp..ps.+Ip..G+phplph.h........................ 0 257 512 682 +1731 PF04290 DctQ Tripartite ATP-independent periplasmic transporters, DctQ component Mifsud W anon COG3090 Family The function of the members of this family is unknown, but DctQ homologues are invariably found in the tripartite ATP-independent periplasmic transporters [1]. 27.10 27.10 27.20 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.66 0.71 -4.39 186 5361 2009-09-13 06:40:50 2003-04-07 12:59:11 7 9 1674 0 1467 4565 5259 134.30 19 65.70 CHANGED hhhlhhhpVlhR..hh....h.....t.shs.h.spE.lsthhhlhhsh.luhuhshpcspHlplcll...hptl.ssphpthhp.hlssl.lshs...hss....hlhatuh...................................hhhhhtttptssshs...lPhhhhh.....hs...lsluh..slhsl...thl.hplhph.hps .................................................shllhhpVlhR....ah......h..............................spshs..h..s....-E.lst.hlhshhshlG.uuhsh.t..p.st.H.l..pl-hl..........hp.tl...ssp.h...pp..h...lp....l...lspl...lhlh....hsh..................hlhh.huh..........................................................hh.s.h.ht.h.s.p...h..o.ss.hs............ls.h.h.h.hh.............hs...lsluh....slhh.l....htl.tplhp.h..t......................................... 0 440 995 1263 +1732 PF03605 DcuA_DcuB Dcu; Anaerobic c4-dicarboxylate membrane transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.70 20.70 20.70 21.30 20.10 20.60 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.29 0.70 -5.62 10 2671 2012-10-02 15:12:49 2003-04-07 12:59:11 9 3 1383 0 245 1239 66 350.30 44 83.13 CHANGED ElllVLlslFlGARhGGIGLGlsGGlGllILsFshtlsPus..hPlDVhLIIhAVluAuAsMQAAGGLDahlQIAEKlLR+NPKplTlLAPlVTYhhTlhuGTGHsVYSlLPVIu-VAhcsGIRPERPhSsAlVuSQlAITASPISAAVVhhlAhLss......pulshLoLLtVsIPoTllGsllhulhsh.hpGK-LccDP-YQcRLscshh+phshspp......clssoAKlSVaIFLsullslVlhAsssslpsuhh..s....psluhshsIphFMLssuuLIllhTKlDsssIspusVF+SGMhAslslaGlAWLuDTahssHlspIKsshupllpsYPWhaAlsLFllShllsSQAATstsLlPlul.ALGlsP ..........................................................pllllLlslalG.ARh.GGIulGhhGGlGlllL.s.h.s....ht.lpP..Gp.......PhDVhLlIhAVluAhushQsAGGLDhhlplAE+lLR+NPKaITllAPhVTahhTlhsGTGHlsaohLPlIs-VAhcpsIRPpR.PhuhusluuQhuIsASPlSAA.VVhh.s.u..h.Lts.........tsl....s....al..plLulsIPoTllulhlhuhh.s.h......hhGp.-.....LscDs..YQcRLpcs.h.cthshtpp...............cl.p...sA+hSlhlFLhu.llsVV........lhushss................ps...........uhs..............p................slshstsI.hhMLssusLIhllsKss.sspIs.ssslF+uGMsAslsl.hGlAWhu-Tahs..u..phs.IKsshupllp....saPWhhAllLFhsStll.SQAATstAlhPl.uL.ALGlsP.................................................. 1 41 118 193 +1733 PF03606 DcuC C4-dicarboxylate anaerobic carrier TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.46 0.70 -6.26 13 3902 2012-10-02 15:12:49 2003-04-07 12:59:11 10 5 2105 0 339 3372 265 444.50 24 97.14 CHANGED hPthhsllhhlhlhhhIlohahPsGsatpts..................................hl.hhhsshhspspslssshtssttthhs..hhphshh.hpscAsslshFIhhlGGhhulls+oGAh-sslttls....KhhptpcphhIhhlhshFshsssshGhsEto...lshhslLhPlhlslGasthsususlhhuossGhhuuThNPhslVlAupsAGlslt-GhhhRllshslhslluluasaaYsp+lpccP......hpp.cEppphhphcpup.sss....ssthhhhLhhhshlhhlhu.........h.h.hls.....................paahsplsslalhhulllsalth...hsshpsp-hhpuFhcGhuchh.sAhlVuLA+Gltlhhpss.....hhlsTIlshhssuhoshsu..lhhllhhllhhhhuhllsSGSG.AhhoMslhsPLAchlGlstshlllshQhusslsphluPTSGlVhAsLuhu+luhtpWlKhsus.lshhlllshlslllthh ...............................................................................................................................................................................................hhsllhh.l.hlhhhlhphhh..PsGhh.h.t.........................................................................................h.l..h..h.h..s..s...hh...t.t.......t....sl..s.....s.......t......s..t.......t....hh...s..........................th............h...........hhs...puss...lh.h.h.IhhlG.G...h..h..ul.hp.+oG...A.-sslt.tl.s............+......h...p...........s...h...c....h...h..I......h.l..h..h.h.h.sh..s.s......h.s.h.uhs...p..ts......l.s.h.h.s.h.L.h.P....l....h.ls.lGhcs..........h.su...sus.l..h............hu....s..t..h.u....h....h..u............s....s....h............s.....................s.......s.......s....l.......A..........u...........p.......h......A.......G..........l........s.....h....h....s....s....h.......h.......h........+.......h....l...h...h.......s...l....s...s....l..l..s...l..h...h...s....h...h...a...s....p..+....h....p.cc...............................p.........t...c....p....t.......p...t....h..t..h.....p...t....s....p.........p...p.......p......hp.........t...p...t......h..h..h....l.....l...h..h..s..h..l...h.....h.l..h.u.......................hhhp................................................................................th.ah...s..p.h....s....s....h.....h..h...h..h..u.l...l..l.t.h.lt.........s..h.p...p.shhps.F...h...c.Ghu...s..h...............s.......Ah.l.V..u...l...s.h..u..h..t..l..h..h...p...sh.................hh.h.s...o...I...h...s...h....h...s..t...s...h...s...s...h..s.....s....h......l...hh..l...h..h..h..l..h..h.......h..l..huh.hh.sSuou.uhhhhs.l.hs.P..L..A....c.h..s.......G.....ls...t....t........h..h...l.h...s....hphusshsph.lsPs...o..............u....l....l..hushu....h..u.....+.........l.....s....h.h...p.hl.K..h...s..u..s..h..l....h..l.h..hl.h.sh.lhhhh...h.......................................................................................... 0 106 193 281 +1734 PF03184 DDE_1 CENP-B; DDE; DDE superfamily endonuclease Mifsud W, Bateman A anon Pfam-B_2254 (release 6.5) Family This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. Interestingly this family also includes the CENP-B protein. This domain in that protein appears to have lost the metal binding residues and is unlikely to have endonuclease activity. Centromere Protein B (CENP-B) is a DNA-binding protein localised to the centromere. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.35 0.70 -5.20 20 3031 2012-10-03 01:22:09 2003-04-07 12:59:11 14 109 208 0 2383 3243 38 158.50 17 38.69 CHANGED hsscslaNhDETGlhhshhsstt....s..ttpth.stpt..pcchTshlsssusuuc+L.PhlIhKups.cshph..................htpshshpacssppuWhosplht-WL.chFssphp........s.sc+hlLlLDuasuH..ssph......hhpNltllalPspsopllQPhDhGlhsshKtpYRpphhphhltstsshps...........................t.shhchlchhtpAWp.ulspcsItsua ..................................................................................................hhsh.c.p.....h.....t.................................................................................p......p...p.h..h.hhh...s....ts..s..s...hh.........h.h.lh...t...t....t........................................................................h......h..t......p.t...pu.a........h.s..p.......t..hh..t...ah......p...h.....h.....ht......................tp....hhL.lhDs.....h.ts..H....s.th........................t..p..l.....h.h.h.....h........P........s.........p.s..op......h.............h............Q............P.......h.....D......h.....s.....hht.sh.K...th..h.t.t.h.......t.h......................................................................................................................................................................................................................................... 0 825 1401 2161 +1735 PF02862 DDHD DDHD domain Bateman A anon Bateman A Family The DDHD domain is 180 residues long and contains four conserved residues that may form a metal binding site. The domain is named after these four residues. This pattern of conservation of metal binding residues is often seen in phosphoesterase domains. This domain is found in retinal degeneration B proteins, as well as a family of probable phospholipases. It has been shown that this domain is found in a longer C terminal region that binds to PYK2 tyrosine kinase. These proteins have been called N-terminal domain-interacting receptor (Nir1, Nir2 and Nir3) [1]. This suggests that this region is involved in functionally important interactions in other members of this family. 23.30 23.30 23.70 23.30 23.00 23.20 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.45 0.70 -4.56 51 906 2009-01-15 18:05:59 2003-04-07 12:59:11 12 34 250 0 612 855 0 183.70 25 23.63 CHANGED LcFcscshFhhGSPlGhFhhl+ttph.sp.............................................s.ssphspPsCcplYNIFHPsDPlAYRlEPLlssphup.lcPh.lPptppt.hhtht........................................ctlsslusphptsht.......................ht...h.pt.s.stpspp.tpsss.pph...............................................................................p.pppp.ppp....tttphphttLNsst......RlDYsLp.tu..sl-..spalsslsuHsSYWpspDsstFllpplhp ................................................................................................................................................................hpFcspthFhhGSPluhhLhl.+th.t.......................................................s.......hs...tCp.phaNlaHshDPl.AhRlEPhl..pht.......l............tPh..lPh..hpt.h.h.hp.ht.................................................................................................................................h.pthttt....h..............................................................................................................................................................................................................................................................................................................................................................................................................................h..h.thh...t..s..t...............RlDa.L....t....lp......ph.h.sh....HhsYWpstDsshhllpplh.......................................................................................................... 0 170 259 468 +1736 PF03345 DDOST_48kD Oligosaccharyltransferase 48 kDa subunit beta Mifsud W anon Pfam-B_3520 (release 6.5) Family Members of this family are involved in asparagine-linked protein glycosylation. In particular, dolichyl-diphosphooligosaccharide-protein glycosyltransferase (DDOST), also known as oligosaccharyltransferase EC:2.4.1.119, transfers the high-mannose sugar GlcNAc(2)-Man(9)-Glc(3) from a dolichol-linked donor to an asparagine acceptor in a consensus Asn-X-Ser/Thr motif. In most eukaryotes, the DDOST complex is composed of three subunits, which in humans are described as a 48kD subunit, ribophorin I, and ribophorin II. However, the yeast DDOST appears to consist of six subunits (alpha, beta, gamma, delta, epsilon, zeta). The yeast beta subunit is a 45kD polypeptide, previously discovered as the Wbp1 protein, with known sequence similarity to the human 48kD subunit and the other orthologues. This family includes the 48kD-like subunits from several eukaryotes; it also includes the yeast DDOST beta subunit Wbp1. 36.00 36.00 36.20 46.60 35.90 35.40 hmmbuild --amino -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.42 0.70 -5.95 29 360 2009-09-11 12:44:55 2003-04-07 12:59:11 9 6 276 0 243 382 1 379.40 35 87.94 CHANGED sLV........llDstsp+tp.........aShFapsLcs....R.GacLsacssc..s..ssLsLachGEhhYDpLllFssp.s+shGss..lo.scsllcFlssGGNlLlssSup........hsculRplhpEh.slch.s-csshllDHFNaDst.sss.pHslll.....h.tsshlpssspshht......tslha.+Gsuthlss..ssLlhPILpAspTuY.......SYs..scpchpsh..........sausGpQshLVuuhQuRNNARhsasGSh-hhpDcaFsutspt..............posNp-FAcplopWsFpEpGVl+lsplpHa+ts-st............sP.phYpIK-plhYoIplsEa.....sps+WlPFp....s-DlQLEFsMlDPahRhsLp...........tpp..tsushYos.pFplPDpHGlFsFclsY+RsGhTplpcpspVoV..RphsHsEYsR..hIsuuasYhuuhauslsGalhFshlaLappss...........KKpp ......................................................hLVll-s...s.+pp........aShFapsLps.........................RGa..pl.sacsscs......spLsLhchGphhYDpL.l.lhssp....sc.p...........hGss..ls...spslhpFlcs.G.GN.....lLlA.soss.........hscslRplhsEh.Glch.s-csshVlDHasYcst..........s.s.pHshll................h.sps.h...l.p..ssshst.tt........sslha..+GsGhshss.tNsLlhslLpAs.sTuY............................Shtscpthpph...............shssGp.phhLluuhQ.....AR.NNARlshsGSl-hhoDpaFsutlpt.........ttp..pouN..tphupplopWsFpEpGVL+ls.slpHa+sscst.........................sP.phYplpDpl..................pYoltlpch.....ssscWhP.ap..........sDDlQLEFshl-PahRh..sLp..........................psuhYps.pFplPDhaGVFpF+VcYp.R.GhTpl.ppppVoVR..htH.spY.R..hIsuAaPYhuusaShhhGhhlFshlaLa.p...............p......................................... 0 87 137 203 +1737 PF04625 DEC-1_N DEC-1 protein, N-terminal region Kerrison ND anon DOMO:DM04594; Family The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing [1]. Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). 27.90 27.90 27.90 58.00 24.90 27.80 hmmbuild -o /dev/null HMM SEED 407 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.41 0.70 -5.79 3 34 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 13 0 12 48 0 338.70 58 37.51 CHANGED MMFPALGSLLRWGSLFPAYSILGAIPDNLQPTAAASKVVLVLADDATAKTRVARQNPPPNPLGQLMNWPALPQDFQLPSMDLGPQVGSFLAQL...PAMPo...MPGLLGAAAPVPAPAPAPAAAPPsAPAPAADoPAAPlPDAsQPAILGQAALQNAFTFLNPSNFDASSLLGQSVPTFAPPNLDFVAQMQRQFFPGMTPAQPAPAGTDAQASDISEVRVRPEDPYSQEAQMKIKSALEMEQERQQQVQVKDQEQVPLLWFRMPTTQNcDATEEKTLEDLRVEAKLRAFERQVIAELKMLQKIELMAKQMRSSAAAQNGDSPYRISYPLSRTPIHKITRADIEQALRDDYVRRLVNKEAQRKARNSGINTQKANALKRQAKSQDQTLSKEDIVQIMAYAYRMANEQMESEKG .....................MMFPALGSLL+WGSLFPA.SlLGAlPDs.L.....QPtAAASKVVLVLADDAs.uKoRVsRQs........sP..PsP...h.Gph...MNhPAL......PQsF.....Q....L...so....M.sL.s...PpVGp.hL...u..QL.Ps.MPs.....hsul..LGtAsPV.Pss..........APAsss...sPs.AssPAs-sPs..sssP..ss..s.Ps....ushtsuhsFhsPuNFDuS.sLLGQu...lss.....................APP.shDFhuth.RQFaPGhsPA.pPusu.soDAQASDISEVRVRPE........s.............s...a...uQ.pAQ...M...KhKSALphEQE+Q........cspEpVPLLWFRMPo.......opsp..-ts-pKolE......DLRVEAKL+AFERQVIuEL+MLQpIEhMAKpMRoSA...suQss..soPY+lsYPLSRTPlHKITRADIEpALRDDYVRRLlpKEAQRKu....tsp......tsuhKRQstpQ.-QshSKE-IVplMAYAYRMAsEQh.Ep................... 0 3 3 7 +1738 PF04624 Dec-1 DEC-1_REPEAT; Dec-1 repeat Kerrison ND anon DOMO:DM04594; Repeat The defective chorion-1 gene (dec-1) in Drosophila encodes follicle cell proteins necessary for proper eggshell assembly. Multiple products of the dec-1 gene are formed by alternative RNA splicing and proteolytic processing [1]. Cleavage products include S80 (80 kDa) which is incorporated into the eggshell, and further proteolysis of S80 gives S60 (60 kDa). This repeat is usually found in 12 copies in the central region of the protein. Its function is unknown. Length polymorphisms of Dec-1 have been observed in wild-type strains, and are caused by changes in the numbers of the first five repeats [2]. 20.90 20.90 21.20 20.90 20.30 20.70 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.71 0.72 -4.31 7 107 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 8 0 43 192 0 26.40 68 12.46 CHANGED QNsMMM.p....RQWoE-QAKhQQp..QQMhQQ .QNPMMMQQ...RQWoE-QAKlQQs....Q.Q.hQQ........ 0 24 24 37 +1739 PF02352 Decorin_bind Decorin binding protein Bashton M, Bateman A anon Pfam-B_800 (release 5.2) Family This family consists of decorin binding proteins from Borrelia. The decorin binding protein of Borrelia burgdorferi the lyme disease spirochetes adheres to the proteoglycan decorin found on collagen fibres [2]. 24.60 24.60 95.30 94.90 21.60 20.90 hmmbuild --amino -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.68 0.71 -4.17 12 159 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 30 0 2 132 0 139.20 56 78.15 CHANGED KhtLESSuK-IpDEIsKI+K-AtscGVNF-AFpsspTGSKVupss.hIhcAKl+shslstKFlpsIEEEAhpLKEsGu.Sup......FpuMYDLML-lutsLEclGlppMppoVoptAcpsPsTTA-GIltIspthcsKLppV+pKQ ..KltLESSuKDIpDEIsKIKK-AsspGVsFcAFT-scTGSKVo......ps......s...hIh-AKlRAhslsEKFlpAIEEEApKLK-oGu..SGc......FSAMYDLMLEVScsLE-lGlpcMppTVo-AAcpsPsTTA-GlLtIAptMcsKLppV+pKp........ 0 2 2 2 +1740 PF01335 DED Death effector domain Bateman A, Finn RD anon Bateman A Domain \N 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.56 0.72 -3.86 37 947 2012-10-01 21:41:45 2003-04-07 12:59:11 16 55 86 20 419 826 0 81.10 24 27.28 CHANGED aphhlhp.lscsLspc-lcsLhFLsp...c.lspsc..hpp.pshh-lhhp.Lccpshls.ssl....shLtclLtplsRhDLLptl.phcppsh .........................ahthLhp.lu-pLspc-lp.sL.p..FLhp......-.h..lsppc.............hpp...pssh-lhhp.Lccps.hlsp..ssl....shLpclL...ppl..p..RhDLlppl.phcpt................. 0 158 186 257 +1742 PF00711 Defensin_beta Beta defensin Bateman A anon Pfam-B_675 (release 2.1) Domain The beta defensins are antimicrobial peptides implicated in the resistance of epithelial surfaces to microbial colonisation [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.44 0.72 -4.02 54 464 2012-10-01 20:50:19 2003-04-07 12:59:11 14 4 80 69 141 484 1 34.50 32 54.72 CHANGED s.hpChppsGhCh.tpCssthp.pIGo..Ctssth+CC+ ......hpChtptGh.C.t.hhCs.hhhp.plGs..Ct.sshhp.CCh.... 0 4 4 18 +1743 PF00879 Defensin_propep Defensin propeptide Bateman A anon Pfam-B_517 (release 3.0) Family \N 21.00 21.00 21.20 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.53 0.72 -4.01 23 253 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 31 0 76 259 0 49.60 48 52.36 CHANGED MRTLsLLuA.lLLlALQsQA-slQtps-Es.ssQ-QPGtEDQDlulSFssscuo ...M+TLsLLuA.lLLlALQsQA-...sl.......Q.s..ps-.......Es.ssp-QPGt--QslulSFstscs............. 0 9 11 16 +1744 PF00323 Defensin_1 defensins; Mammalian defensin Finn RD anon Prosite Domain \N 21.40 21.40 21.60 21.50 20.10 21.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.88 0.72 -3.58 11 136 2012-10-01 20:50:19 2003-04-07 12:59:11 14 3 22 60 56 185 0 29.00 47 32.72 CHANGED ChCRpt.tCttpERthGsCplpGhhashCC .CaCRth.sCtttE+hsGoCp.ht.Ghhaph.CC 0 5 5 8 +1745 PF01041 DegT_DnrJ_EryC1 DegT_DnrJ_EryC1_fam; DegT/DnrJ/EryC1/StrS aminotransferase family Finn RD, Bateman A, Parkhill J anon Pfam-B_239 (release 3.0) Domain The members of this family are probably all pyridoxal-phosphate-dependent aminotransferase enzymes with a variety of molecular functions. The family includes StsA Swiss:P72454, StsC Swiss:P77952 and StsS [1]. The aminotransferase activity was demonstrated for purified StsC protein as the L-glutamine:scyllo-inosose aminotransferase EC:2.6.1.50, which catalyses the first amino transfer in the biosynthesis of the streptidine subunit of streptomycin [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.04 0.70 -5.42 38 8204 2012-10-02 18:26:03 2003-04-07 12:59:11 12 29 3491 78 2089 27677 18092 342.00 27 92.75 CHANGED sthsp-.httlppslcus.hhhs......GshlppFEccFAsahG..staululsSGTsALhLALtA...hulu......GDEVIsPuhTFsATssslhtsGApPVFVDlD......ssTaslDPstlEsAITs+T.....KAIlsVHhhGpss.DhssltclAccauLhllEDsAcAhG..upapG...................+hlGshG..chusFSFassKsl.osGEGGhllTsDscLscphc........th+saGhs.........ppahpph............hGaNh+hs-lpAAlGlsQLc+l-phlppR+chsphapptLtslshh..hshsspss............tuaahaslhhptp....s.RcpLhptLppp..slsoplha......s.Ph+tpsh.........appht.tss.thPsu-plspcslsLPhassls...s-clccVssslp ..................................................................................................................p.hthltps.lpos...hl.s.s.............G.s..h.s..p..p...h..E......p....t......h..s....p...a........h........G..........s........p.................a.........s.........l........s.........s........s.........S.........u....T....s....A..L...p.l....A...Lhu....................ls...lt.......................G...D....E..V...I....s......s......u......h......T.....F......s........A........T.....s......s.....s.....l......h........h......h........G........A.....p..........P.....V......F.........l......D.....l.....-.............................c.....T.....h.....s.....l....D....s.....p.....t....l............E...t....s....I.......T........s.......+...T.........................+...A........I......l.......s.......V.......H.......h.......h........G.......t........s.......s.......-.......h.......-.......t.....I..............h.....t.....l...............u....c.....c......a................s.................l..............h.................l.l..E.D......u...A.....p....A....h.G.....u.....p..a.....c..G...........................................+.t..s..G......s..h..G.......c....h.u.s.a..S......F..a...s......s...K.sl....ssG..E.....G....G.h.l......h......o..s......c......p......p......l.h...c..c..hc...............................................hl.p..p.p...Ghs.................p.p...h.htph....................................................hG..hN...h..+...hs..-l...p...AA..l.s....h....s.....Q....L...c.p.......l.........-..p.h.....h.p.pR.....p..p.l.sp...hY.....pp.t.L........t....s...h...s...........h............h..t.h..s..stsp.........................................psaa..ha.h.lh...l..............p....t.................t................................................p......Rs........t.....l.h.ptLppp.............sI...t.s.....t.h.aa................hsh.a.t..p.sh..............................a.p..p.....h.....t....................................p.....h.s........scp.ht.p.p.hlsL..P..hast.hs...ppp.hphlhpth.h.............................................................................................................................................................................................................................. 0 739 1465 1821 +1746 PF02286 Dehydratase_LU Dehydratase large subunit Mian N, Bateman A anon Pfam-B_7927 (release 5.2) Domain This family contains the large subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances. 25.00 25.00 41.80 41.80 17.40 17.00 hmmbuild -o /dev/null HMM SEED 554 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.61 0.70 -6.26 11 398 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 338 20 44 238 19 512.60 77 98.83 CHANGED pRpKRFchLscRPVN.DGFltEWsEpGhlAMsSPNDPKPS.lKI-NGpVlEhDGKttpDFDhIDcFIAcYuIslspAEcsMshDSlclARMLVD.NVsR-EIlclTouhTPAKlsEVVupLNsVEMhMAhQKMRARRTPuNQAHVTNh+DNPVQIAADAAEAulRGFcE.ETTVuVsRYAPhNAlALLVGSQsGRsGVLTQCSlEEAsELcLGMRGhTuYAETISVYGTEpVFTDGDDTPWSKAFLASAYASRGLKMRFTSGuGSElhMGaAEGKSMLYLEARCIhlsKuuGVQGLQNGulSCIGlsGAVPuGIRtVLAENLIshhlDLEsASGNDQsFSHSDIRRTARhLhQhLsGTDaIhSGYSulPNYDNMFAGSNhDA-DaDDY.lLQRDLtVDGGL+PVsEE-lIulRpKAA+AlQAVFctLGLPsITDEEVEAATYAcGScDhPpRssVcDlKAApplhsRGlTulDVVKALucsGFsDlAEslLshh+QRloGDYLQTSAIhD+capllSAVNDsNDYtGPGTGYRL..otERW-cIKslstAlcsp-lc .....MRSKRFEsLAKRPVNQDGFVKEWlEEGhIAMESPNDPKPS.IKI.NGtVsELDGKshu-FDLID+FIA+YGINLsRAEEVMuMDSVKLANMLsDPNVpRs-IV.LTTAMTPAKIVEVVSpMNVVEMMMAMQKMRARRTPSpQAHVTNlKDNPVQIAADAAEuAhRGFDEQETTVAVARYAPFNAlALLVGSQVGRPGVLTQCSLEEATELc...LGMhGaTsYAETISVYGTEPVFTDGDDTPWSKGFLASuYASRGLKMRFTSGoGSEVQMGYAEGKSMLYLEARCIaITKAAGVQGLQNGSVSCIGVPuAVPSGIRAVLAENLIso.LDLECASuNDQTFoHSDhRRTARhLMQFLPGTDFISSGYSAVPNYDNMFAGSN.DAEDFDDYNVlQRDLKVDGGLRPVpEE-VIAlRNKAARAlQAVFsshGLPsITDEEVEAATYAHGSKDMP.ERNlVEDIKhApElls+shsGL-VVKALupGGFsDVApshLNh.Kt+loGDYLHTSAII.............stDhpVlSAVNDlNDYAGPuTGYRL..pGERWEEIKNIPsAlDPs-l.s........... 0 14 25 34 +1747 PF02288 Dehydratase_MU Dehydratase medium subunit Mian N, Bateman A anon Pfam-B_7081 (release 5.2) Domain This family contains the medium subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances. 20.60 20.60 20.60 37.50 20.50 20.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.27 0.71 -4.33 62 717 2009-09-11 12:54:19 2003-04-07 12:59:11 10 2 336 26 77 386 10 113.30 44 63.61 CHANGED stPAhslhhppshss....tll+pllhGIEEEGlshcl.p.shcsuDlshhAhpAAphSs.ulGIGlpuc......ssllH.+sL.s.ssL.hhs...hhstcshRtlGpNAARhsKG...hPh. .............s.uPAhslttssshsu.....pll+EVlhGIEEEGI.scl.c.....phcuuDVshsAhpuu+.Ss.ulGIGlpu+......osVlH.+sLPs.usL.hhs..shhs.csaRplGpNAARhsKG.....P........... 1 27 44 59 +1748 PF02287 Dehydratase_SU Dehydratase small subunit Mian N, Bateman A anon Pfam-B_6588 (release 5.2) Domain This family contains the small subunit of the trimeric diol dehydratases and glycerol dehydratases. These enzymes are produced by some enterobacteria in response to growth substances. 25.00 25.00 38.30 38.30 21.10 20.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.44 0.71 -4.43 17 370 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 338 20 44 205 7 135.80 66 81.96 CHANGED os+p.hstsDYPLspK+P-hlKTsoGKsLcDITL-sVlsGcVsupDlRIoP-TLchQApIAcssGRsAlucNFpRAAELTslPD-RlL-lYNALRPaRSoKpELLsIAcELEspYpAslsAsalREAA-sYcpRcKL+ .......................ou.csspVsDYPLAsKHPEaVKTuTsKoLDDhTLENVLSscVoApDhRITPETL+hQApIA+DAGR-pLAhNFERAAELTuVPDDRILEIYNALRPYRS.TKpELLAIAD-LEs+YpApICAAFVREAAtLY.cRKKLK.......... 0 14 25 34 +1749 PF00257 Dehydrin dehydrin; Dehydrin Finn RD, Bateman A anon Prosite & Pfam-B_3306 (Release 7.5) Family \N 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.50 0.71 -3.89 27 1293 2009-09-17 10:00:38 2003-04-07 12:59:11 14 12 182 0 114 1285 0 110.70 26 87.32 CHANGED sshhcchGsslthssthppttttttst.........................................................................s.httptts.....................................................................ttthtthhppst...SuSSuuSE--sp.........sGc+.................................................................KKGltEKIKEKLPGspps...............t....sttssshstt..........................................................tttEKKGhhDKIK.........EKLPu..............Gpc ...............................................................................................................................................................................................................tt...........................................................................................................................................................................................................................................................................................................................................................................................................................................tpp-ptt.............................tt.c.+.................................................................................................................KK.G..lh-K..IK.E..KL.P.Gsppc.............................................................s.t...t.h.t.t.....................................................................................tpp...shhtp.......................................................................................... 0 23 58 87 +1750 PF02336 Denso_VP4 denso_VP4; Capsid protein VP4 Mian N, Bateman A anon Pfam-B_19701 (release 5.2) Domain Four different translation initiation sites of the densovirus capsid protein mRNA give rise to four viral proteins, VP1 to VP4. This family represents VP4. 25.00 25.00 42.50 40.10 18.80 17.90 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.45 0.70 -5.48 3 29 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 21 1 0 36 0 370.00 36 70.57 CHANGED DVTMAMSLPGTGSGTSSGGGNToGQEVYlIPRPFSNFGKKLSTYTKSHKFMIFGLANNVIGPTGTGTTAVNRLLTTCLAEIPWQKLPLYMNQSEFDLLPPGSRVVECNVKVIFRTNRIAFETSSTATKQATLNQISNLQTAVGLNKLGWGIDRSFTAFQSDQPMIPTAToAPKYEPVTGsTGYRGMIADYYGADSTNDsAFGNAGNYPHHQVGSFTFLQNYYCMYQQTNQGTGGWPCLAEHLQQFDSKTVNNQCLIDVTYKPKMGLIKPPLNYKIIGQPTsKGTISVGDNLVNMRGAVVoNPPEATQsVoESTHNLTRNFPAsLFNIYSDIEKSQlLHKGPWGHENPQIQPSVHIGIQAVPALTTGALLVNSSPLNSWTDSMGYIDVMSSCTVMESQPTHFPFST-ANTNPGNTIYRINLTPNSLTSAFNGLYGNGATLGNV .......................t.....hsLPGTG.u...spu..s..s.Gusspu.tp......lh.I.RPhs.s.h.s..pph.pTYpKsHKFhoFGlAsslls..s.u.T..ssss..s..s.....h.hlTTsLAEIPWcp.hhYMN.SEFcLL...P..s..GSplh-ssV+VlhRssRlAFETsSosTs.ATLNQ.p.lthAhGL..Nph.ua...GhsRpaTsFpu...spPMlPsustsspYtslsstp..tacshlt.-hYGssssss.shstssshP+HplG..hhLpsYashht..p....s.......s..p.ss........u......GWPsLsp+lppaDupsssspsll....shoYcPphu.lpsshp...h..h.hhs...P...st.us..h.....t..tshss.......hp.....t.....ssh..s.....hs.....t..t.p.s.ps..h.........s..p..........ap.....hhs.IEKuQhh.tG.h.t..psphQPSlHlGh.sV.tLTTsth.............................................................................................................................................. 0 0 0 0 +1751 PF01791 DeoC DeoC/LacD family aldolase Bateman A anon Bateman A & Pfam-B_6806 (release 14.0) Domain This family includes diverse aldolase enzymes. This family includes the enzyme deoxyribose-phosphate aldolase EC:4.1.2.4, which is involved in nucleotide metabolism. The family also includes a group of related bacterial proteins of unknown function, see examples Swiss:Q57843 and Swiss:P76143. The family also includes tagatose 1,6-diphosphate aldolase (EC:4.1.2.40) is part of the tagatose-6-phosphate pathway of galactose-6-phosphate degradation [2]. 22.50 22.50 22.50 22.50 22.30 22.40 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.50 0.70 -5.07 26 7750 2012-10-03 05:58:16 2003-04-07 12:59:11 4 17 3919 241 1426 4608 1164 223.40 24 84.08 CHANGED phhhhhDpsshpsshpst..............-lcsllpcshph.......tssuVslssshlthupcthsp......clshlluhstusshsssp...........shpsccAlc.GAstlchll.ht..tscp.ppphhpclptltctCp..shslhlllE.shhpspp............p..hltcAsclst-..hGADhlKssss...........t..htsp.pssphh+csstt...................lhsSGGl.........cslcpAh...cAGA..............hGsusGRslapp .....................................................................h..hhhlDpshhps..psstt.................clctlsppA..hp..............................tssuls.l.s.sph.l.h..h.s...t.c..th..s.t.............................pls......h....l..l...s.ast....ussh.ss.s.p..................shpsc.pA..lp...G..A..s...tlchll.ht.........tu...tp....tpphh...ppl....p....t....lhp..t..s..t.............shs...l...h..l..llE.....shl...pcpp......................t...l.hp.A.s..cl.u.tc.....hGADhlKTpss.................t...sssp.....ps....sp.h.hpchss.....................................................l..hsuG.Gl...............cshppAh..hpuGu.ph............hGs.sGhthht.t............................................. 1 479 902 1203 +1752 PF00455 DeoRC deoR; DeoR; DeoR C terminal sensor domain Finn RD, Anantharaman V anon Anantharaman V Domain The sensor domains of the DeoR are catalytically inactive versions of the ISOCOT fold, but retain the substrate binding site [1]. DeorC senses diverse sugar derivatives such as deoxyribose nucleoside (DeoR), tagatose phosphate (LacR), galactosamine (AgaR), myo-inositol (Bacillus IolR) and L-ascorbate (UlaR) [1], [2], [3]. 31.80 31.80 31.80 31.90 31.70 31.40 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.56 0.71 -4.60 91 14356 2012-10-04 00:26:15 2003-04-07 12:59:11 17 16 3216 0 1875 7048 402 159.00 26 63.33 CHANGED tpphpcKppIActAAphlps.GsslhlDsGTTshtlActLssp........slsVlTNulslAptLtpps........shclllhGGplcppstuhsGshshphlppap.hDhuFlussGls..ctGlts.s.p-stlccthlppAccshllsDpSKau+puhhphsslsplstllTDps .................................................h.phppKptI.Ac.t.A.u.p.hl..p..s..G...p..s.lhlDsG.oTshtl.uctLs...pp...................slsllTN.ultlA.p.h.Ltc..p................php.l.l.lhGGplc.p.....p..st..u..hl..G....s.h.shp.h.l.....p.....ph..p........hDhsFlussGls...............h......p.t...G...l.s.shs...............pc..............stlppthlpp.u..p.c.s.lll..sDpSKa..s..c..p...u...h....hph...s.sls.pl.s.h.llTDp.t................................ 0 508 1030 1467 +1753 PF04511 DER1 Der1-like family Mifsud W, Bateman A anon Pfam-B_1901 (release 7.5) Family The endoplasmic reticulum (ER) of the yeast Saccharomyces cerevisiae contains of proteolytic system able to selectively degrade misfolded lumenal secretory proteins. For examination of the components involved in this degradation process, mutants were isolated. They could be divided into four complementation groups. The mutations led to stabilisation of two different substrates for this process. The mutant classes were called 'der' for 'degradation in the ER'. DER1 was cloned by complementation of the der1-2 mutation. The DER1 gene codes for a novel, hydrophobic protein, that is localised to the ER. Deletion of DER1 abolished degradation of the substrate proteins. The function of the Der1 protein seems to be specifically required for the degradation process associated with the ER [1]. Interestingly this family seems distantly related to the Rhomboid family of membrane peptidases. Suggesting that this family may also mediate degradation of misfolded proteins (Bateman A pers. obs.). 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.32 0.71 -4.81 11 816 2012-10-01 23:21:32 2003-04-07 12:59:11 10 13 329 0 563 905 24 176.70 28 69.86 CHANGED lPsVTRhahhusllholls+hpllsPhhLh.asasLlh+KhQlWRLhTshhaa...ushGapalhNhaFhapYsptLEpusFps+.........ssDalahLlFshlhhshhsh......................................................................ph...hhLspshshhllYlWuphNs.psplsFatlhphpApYLPalLhshshllts.ssl.s-lhGlhsGHlYa....Fhp.................hYstp.tGpcLlpTP. ...........................................hP.lTRhahss....slhh...oh...h...s.....p.h...t......l.l....s..s....h...p....lh...hs..................p.....h..lh....p......c.h...........Q...l..W.Rll....Tsh..h....a.a...................u.s.hu..h.sh.lhp..hhal....hpYs......phLE..p...s...t..F.psp...........................................uDa.h.ah..l.l.a..s.....h.hhh.hh.h.u..h..................................................................................................................th.......h..L..u..ps...hhhh.l..l.Y..l..W.....u+.p....Ns...phpls......aahl.h..sh..p.A..a.LP...a...s.hh.s.h.shl....h....ss.....s.....hh....sc.lhGlhsGHlYa.........Flp................phaP.........t..G.h..p.l.sP..................................................................................................... 0 203 316 457 +1754 PF01880 Desulfoferrodox Desulfoferrodoxin Enright A, Ouzounis C, Bateman A anon Enright A Domain Desulfoferrodoxins contains two types of iron: an Fe-S4 site very similar to that found in desulforedoxin from Desulfovibrio gigas and an octahedral coordinated high-spin ferrous site most probably with nitrogen/oxygen-containing ligands. Due to this rather unusual combination of active centres, this novel protein is named desulfoferrodoxin [1]. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.63 0.72 -3.71 58 594 2012-10-01 19:22:21 2003-04-07 12:59:11 13 10 502 39 225 540 29 92.40 38 70.61 CHANGED osDstpEK.....HVPVIE...pspsG....VpVpVGp..lsHPMppcHaIpWI-Lhs.cpsth.......RtthpPss.............c..PcspFslph.t............................splhA.hpYCNlHGLWcsc ...........................s.sDsutEK.....HVPVlc.......htss.....lpV.p......VGp....ltHP.Mp.....pc......HaI.pW.IpLhs...s.p.s.sh..................+tpLpPsp..................c..PpApFtls.s..t...........................tsltAhpYCNlHGLWcs.p............ 0 114 181 211 +1755 PF04598 Gasdermin DFNA5; Gasdermin family Mifsud W anon Pfam-B_5153 (release 7.5) Family The precise function of this protein is unknown. A deletion/insertion mutation is associated with an autosomal dominant non-syndromic hearing impairment form [1]. In addition, this protein has also been found to contribute to acquired etoposide resistance in melanoma cells [2]. This family also includes the gasdermin protein [3] 30.00 30.00 30.20 31.00 26.90 29.80 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.42 0.70 -5.74 18 326 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 45 0 165 276 1 322.80 23 90.11 CHANGED hF-+so+sll+Els..pGDLlPVssLpsus+Fp.asLVp+K+cp..hFWp.s+YhslshoLtDlLEPss.......ssPsssposs..hpapsthcsplpGslclshs.hphpluGuu...opSstsolclQplslss.th-sh..cR+lhs.cs..................shLpphp....ppt..-sLaVVTEslpTsp-sslpcsspsctpsph.lst.shslpGpuptp......pspcpsloIPpGosLAY+lhQLhl..cpcspatlslh.ccKptTFtp....................................ph..p..h..................h.....sssptpsstptshpsLcpE...lcpphpsLscLscphppsLhssltplLpDcpsLpsLEphL-th..............tltsh-ssstslLppLht..........ssshh.pphtsslhYLLsALs.LS-sQttLLupuhEhplLstphcLVculh-pshp......hs.............LpsthLusLtscth.sls.tLhtpsGlclp..tssPcssh ...............................................hFttso+shl+pls...t..GcLhPVpsL.sus+hp.hsLVh+K.pp......hhat.scah...hs..hoLhDlLpsst..............s.....p.t..t...hph.s..tsphpup....hth.........ht..hpls..Gps....s.sp.stlphpplpls..hhc........p......+.....+h.p.c....................hlpphp....ppt..psL.hVVhEhlpshpps.lp..tttphpt...t.h.......h.h.h.......h..thpsp...........hspphhltlPt.tsslAathhpLhl....p..h...th..hh..t...p...t.sF..................................................................................................................h.t....h....t.h.....Ls...p..l.t.h.t..hh...pp..h..L......ht........................t.t......h.th.................................................................h...hh.hltA.l...hsc.t..hl....h..p.t...l....p.....h...t................................................................................................................................................ 0 13 19 43 +1756 PF04127 DFP dfp; DNA / pantothenate metabolism flavoprotein Wood V, Finn RD anon Pfam-B_6559 (release 7.3); Family The DNA/pantothenate metabolism flavoprotein (EC:4.1.1.36) affects synthesis of DNA, and pantothenate metabolism. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.32 0.71 -4.67 702 5121 2012-10-10 17:06:42 2003-04-07 12:59:11 10 12 4486 16 1366 3993 2687 170.90 33 49.56 CHANGED LtG.++..l..llTAGPTpEsIDPVRaIoN+SSGKMGaAlApuAtptGAcVTL.l....u.GP.s.s...ls..s....P......s..l.p.hlp.VpoAp-MhpAlh.pt...h......sps......Dl.hIhuAAVADa..RP....tphus.pKI...K.K....t......sst.hs........l..pLhcsPDILt.....pl.u......ttpp...........tt.....h.lV..GFAAE..T.p........slhcpA...cpKLp.....+KshDhIlANslstts.......tG..Fus.Ds...Nplp.ll .............................................................tshpl.llTAGsTpEsl..D..P....V..R...aloNcSSGKhGhAlApsh.hp.t..G.A....p.VsLl.....s.Gs..s..s..............l...s...............P..........t....l...p..hlp...Vpo.u...t.-.M...h.....ps..l....pt..............h.............tpt......-l.hIhsA.AV.uDap..s....................t.p.h....s..s..p.......K..l..................KK.......p.................tspths................lplh...csPD...Ilttls...t.t.t....................tp..............h.llGFAAE....T..p..........sl...phA.ppKL.t.....+.KshDhll.ANs..lst..........s...Fss.spNtlhh....................................................................... 0 450 859 1151 +1757 PF05035 DGOK 2-keto-3-deoxy-galactonokinase Bateman A anon COG3734 Family 2-keto-3-deoxy-galactonokinase EC:2.7.1.58 catalyses the second step in D-galactonate degradation. 25.00 25.00 25.70 25.60 24.80 24.60 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.83 0.70 -5.66 51 889 2009-09-12 20:56:58 2003-04-07 12:59:11 7 2 751 6 148 615 193 280.40 43 93.85 CHANGED DWGTosLRuWlhs.tsGp..llsp+puspGhtplssss............Fcss...LtsllusWhss..sshPVlhsGMlGSRpGWhEAPYlssPsshsslstthttls..s.ttsltIlPGls..ptssss.DVMRGEETQlhGsLs......psshssllClPGTHoKWVplpsGplssFpThMTGELFulLsp+SlLs+s....hssss.sss......AFtpGlppu....spssslsspLFulRuttLLttlssssutuhLSGLLIGtElAuspst..........hhspslsLlGussLsshYtpALsttGhssphls..u-pAshsGLtthsc ..........................................................................DWGoTsLRAWLh..ps.s.p...sL-pRp.u..psGlt+L....sspu.................sssLtclss......c..Wtp.......p...........s........s.P........VlhuGMlGSssGWp.APYLslPAthsulucpLssVs.......................csl..aIlPGls..........hpcs-s...sVMRGEETQLlGAht..................thussulhl.hPG.T.H.sKWVps.-......s...............t....pIscFRThMTGELapLLhp...HSllGsu..........ls.s...p...-...s...ss-........AFss.Gl.cpu....hsssul..LspLFplRAutll.G......sL......spp...psu-aLSGLLIGsElAuhpsh..............hstppsIsLlu.usuLs.uRYppAhpsh..Gh..s.s.stls..uDpAhpsGltplA.p............................ 0 23 71 108 +1759 PF00926 DHBP_synthase 3,4-dihydroxy-2-butanone 4-phosphate synthase Bateman A anon Pfam-B_1148 (release 3.0) Family 3,4-Dihydroxy-2-butanone 4-phosphate is biosynthesised from ribulose 5-phosphate and serves as the biosynthetic precursor for the xylene ring of riboflavin. Sometimes found as a bifunctional enzyme with Pfam:PF00925. 20.90 20.90 20.90 22.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.17 0.71 -5.08 97 4699 2009-01-15 18:05:59 2003-04-07 12:59:11 14 20 3949 37 1250 3399 2646 192.70 49 56.26 CHANGED lccAlpsl+s.GchlllhDD-s....RENEGDLlhuAphsTs-plsFMhppupGllClslstppsccLsLs.Mss..p......Nssth.....................tTuFTlolD.tp...s...ssTGISAtDRAtTI+tLu...........sss..spsp-FhpPGHlaPLhAcpGGVLpRpGHTEAuVDLs+LAGl.pP.suVlCEll..sc.DG.sMt+hs-..lhpaAccasLhlloIp-Llpat .....................................l-cAlpsl+p.G+hVl.V.l.DDE.D....REN.E...GDLIhAAEthTsEpl....sFMh+au..........pGllClsloc-cscpLsLP.MVp......p......Nsssa.....................tTuFT.VSI-...stc...G...sTTGISAtDRupTlcshs.......................sss........upssDhsRPGHlFPLhApsGGVL.pR.sGHTEAuVDLs+LA..Gl.pP......A.GVlCElh............N-...D...........G...o...........MARts-..LhpFAccHsltllTIpDLlpYR................. 0 388 804 1055 +1760 PF00701 DHDPS Dihydrodipicolinate synthetase family Bateman A, Griffiths-Jones SR anon Pfam-B_557 (release 2.1) Domain This family has a TIM barrel structure. 22.10 22.10 22.10 22.20 21.90 22.00 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.74 0.70 -5.81 13 10473 2012-10-03 05:58:16 2003-04-07 12:59:11 17 24 4601 330 2809 7818 5086 284.20 27 95.11 CHANGED thtGlloAllTPFsspGplDhpuhcpLlcapIspG.s-GLlluGTTGEuhsLSh-E+hpllchsVspssG+lsVIAGsGSNuTpEAlchsppupcsGhcusLtVsPYYsKsSpEGlhpHF+sIh.stsslPhIlYNlPuRTusclsPEslt+Lup.hsNllGlK-ssGsh....thppl+ptsscshslaoG...-DspthshhphGucGsISVouNlssthhpshhctspsGchttA.ttlpc+LhPLhchLFtp..sNPhslKsALphlGlssss.hRhPhsPlsp-tptclsshlcthG .......................................................................................................hpGlhsAhlTPF.....s..p..p.....G......p......l......D..h...p...uh.p...p.l.l.c.a...h...l......p.......p..G..s...s.G.l...h..ss..G.o...T.G..Ess..sLo..t...-......E..+.....t...pl...l....c...h...s...h.c...t...s........s.......u...........+..........l.....P.....l...I...A........G...s...G.......s..s...s...........T.p.c....u.l..p....h....s..c.p..A.p.c..h..G....s..D........ull.s.l...s.P...........a...Y....s.........+....s......o.......p......c..u....l...h....p.......H...a...c...s.......l......u.....p....u.....s.........s...l...P....ll.l..Y.....N..l...P.....u.....p...o..u..s.....s.l..s.s..-..s..l..t.c..Lu........c...h........s..s.l.l..ulK...........-....u....s....G....s...l.....................t.p...h...t....p....l.....h........p......t...........h...........s...........s........c..........h....h..l....h..sG..........sDs.h.h.h..s.h..h....t.....h...........G...u..p.......GsIos.....s.....uN..l.....h.P....c...t...h....t...p...h....h...c...t......h...t.....p.......G...c......h...p...p.....A......t.......pl...p..p...p...l.....h....s....l......h....p...h...l.....h.tt..............ss.s...h.......s........l...K.t...s......h.....p.....h.....h........G...h.......s.s..s....h...R.hP....h.s.s..h.sp..p.p.t.t.tltthht...h........................................................................................................................................... 0 799 1639 2295 +1761 PF01368 DHH DHH family Bateman A anon Pfam-B_1245 (release 3.0) Family It is predicted that this family of proteins all perform a phosphoesterase function. It included the single stranded DNA exonuclease RecJ. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null --hand HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.21 0.71 -4.55 150 10761 2009-09-12 04:54:29 2003-04-07 12:59:11 15 57 4504 39 2562 8005 1706 162.10 20 32.36 CHANGED tlppscpl.hlhsch..DsDulsSssslhphlpphsh.pthhhhsspthpt.h...hsttth..............................................................................................................................................................................................................................................t.ttppssLllhlDs.utsshtthpttpth........hpll.llDHH..spp.h................lpsthhs..........hhc.hsuuusuhhlsphl...............................hspphtsllhhGhl ................................................................h...p.ppl..llhGch....D..sDul.sS.s.hs.....h.tp..h..h.....p...p........h........s......t..............p.........s.........h.........h.....h..l..s...p.....p.................s....s......s...................h.sh.h..thtt........................................................................................................................................................................................................................................................................................................................................................................tsslllhVDs...u...t....s.s..p....t...s..s..p...h..t.p..hs...............h.pl.l..llDHH...s.sp.h.....................................ss.hs............hhp..hsuu..usuh.hlh.p....h.l.pt........................t.....p......h.tphhs.llhhGhh................................................................................................................................................................................................ 0 921 1749 2205 +1762 PF02833 DHHA2 DHHA2 domain Bateman A anon Bateman A Family This domain is often found adjacent to the DHH domain Pfam:PF01368 and is called DHHA2 for DHH associated domain. This domain is diagnostic of DHH subfamily 2 members [1]. The domain is about 120 residues long and contains a conserved DXK motif at its amino terminus. 21.20 21.20 21.60 21.60 21.00 20.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.58 0.71 -3.94 126 2132 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 1956 28 508 1459 93 128.40 35 33.02 CHANGED u....h-hhcAtosl.sshosc-llptDhKpFsh.......su....ppluluQlpshshsplhscp..........sclhsthp.phtpcpshDhhllhlT......DIhppsohlLhhu..tptphhppAash.phps.sthhLp..............................GVhSRKKQllP.Lpcsh .....................s.hpMLKAG.o.sl...ss.+.osc-LlshDhKsFph................su.................ppltluQVsslDl.s-lhp+p...................s-lcsshp.th.s..t....p..puh.......ch.hlLhlT..........................DIls.ss....SclLslG.....tstch.....lpp.AFsh.....pl.....p.....s......sp....s.hLs..............................................................GllSRKKQl.VP.Lpcsh.......................................................................................... 0 175 314 423 +1763 PF01180 DHO_dh DHOdehase; Dihydroorotate dehydrogenase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.95 0.70 -5.46 12 6787 2012-10-03 05:58:16 2003-04-07 12:59:11 16 50 4626 118 1697 6540 4232 282.50 29 80.42 CHANGED hLssplhGlcapNPlh.AuGh.t.GpphsphhsLu.hGhl.ltusThpsp.GNPpPRlhchstu..hlNuhGhsN.GhDhlhpclh.hhpc.....................................lsI.hshstss.....h-DYltsscplu..shschltlNlSsPNs..Ghhshtpss-lst.Lspslpchs..................................................psPlhVKluPsls-.shhslAs.ht.tpsLshls....Tlpt.hlDl+stpslhts...tsGGLSGsslKslAlchl+plhphss..IPIIGsGGIpoucDAlEhhhAGAShlQlhTuh.asGPhlss+IhccL.phL ............................................................................................................................................................................s..tsphhGlphpNPlhh.A..u.G..h.s.p.p.u...p.t.h...t...t....h.....h.......s..h.......u...hGtl...htol.T......c..s....p..........G.........N.......s....p..........P..R......h.......hc.......l........s........p.......s...................................h..l.......N..t.h......G..........h.....s......N.............G......h......-...t.....h..hp...p..l..h.t.h...h...t.p...............................................................................sls.l....l.....u....hstto...............h..-..-...Y..h..ts.h.c.p..lt..............sh..s...s....h...l...plN.l....S.s.P.....N.....s...........u....h.t.....s........l..t..........t...s.....s.....p....l......s....p.h.....l....s.p.h...h....p...p..t.s..................................................phPlhl....K..l..u............P........s..l.s..-.......................s.....h...h.........s......h.....A........s...........t.....h.........h....t..........p......s..........l.s..............s.......l...hh.N.o.T..l......s.t....................s.....l...p.......s....h....t.....s......h......t..................thG.G.....l..S..G....t......s.....l.....p..s.....h.......u.......l.....c.......h.....l......p......p......l......t......p........p.....h..............p..............................t...........l......P.......I.....IGs..G.GIpos..cDA...h...E........h...l...h...............A.G.A.o...hV.Ql.......hT....u.h.h...a....p...G.P..t.....lspclhptL.th.......................................................................... 0 544 1054 1421 +1764 PF01761 DHQ_synthase 3-dehydroquinate synthase Bashton M, Bateman A anon Pfam-B_1327 (release 4.2) Domain The 3-dehydroquinate synthase EC:4.6.1.3 domain is present in isolation in various bacterial 3-dehydroquinate synthases and also present as a domain in the pentafunctional AROM polypeptide Swiss:P07547 [2]. 3-dehydroquinate (DHQ) synthase catalyses the formation of dehydroquinate (DHQ) and orthophosphate from 3-deoxy-D-arabino heptulosonic 7 phosphate [1]. This reaction is part of the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.59 0.70 -5.47 95 4669 2012-10-02 14:41:14 2003-04-07 12:59:11 15 21 4283 45 1160 3985 3068 257.20 42 63.32 CHANGED hlslssGEptKo.hpsht.plhstLhpts...hsRsssllALGGGVlGDlsGFsAAsahRGlsalQlPTTLLApVDuSVGGKTulNpshGKNhIGuFaQPpsVlhDsshLpTLPtRE..htsGhAElIKauhI.t.D...sshaphLcpp.tt........................ptpsLp......phI.......t+SsplKA.plVspDE+E.sGlRtl.....LNhGHThGHAlEshtsY......phh.HGEAVAlGMlh.s.uclutthGhh.s.tthp............clpplLpphGL.P...s.shsph.................ss.-pllph...httDKKsp ...........................................thhlPsGEptKs.hp.s.h.p..p.lhst...L.l.pps..h.s.R.s.s..s.llALGGGVlGDlsGFsAAoahRGlc.FlQlPTT.L....L..A.p.VD...S.....S.VGGK.Tu.l...N..p......s....h.....G.............K..N...h...lG...A..F...a...Q..P...p..uVlhDhshL.c.TLPtRE.ltuGh.AE....V.I....K....a.....G.l....I...t.D.....ss.......h.ap.hLE..pph.p.t.h.ht..............................stpsht......phI.................t+.u...s...p.l..K..A........cl...V..s.......p...D...E+E........s.......G........l........R...tl................L........N..h.....G............H...T.........hG.H..AIEsth.sY.......tphhHGEAVAhGMlh.A.u.cl.u.pp...h..G..hh..sptpsp....................+lh..plL.p.p.h.G.L..P......s.s.hsp.....................shpphhp.tMtpDKKs.t.................................................................................................................... 0 401 765 1004 +1765 PF01487 DHquinase_I Type I 3-dehydroquinase Bashton M, Bateman A anon Pfam-B_2492 (release 4.0) Domain Type I 3-dehydroquinase, (3-dehydroquinate dehydratase or DHQase.) Catalyses the cis-dehydration of 3-dehydroquinate via a covalent imine intermediate giving dehydroshikimate. Dehydroquinase functions in the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. Type II 3-dehydroquinase catalyses the trans-dehydration of 3-dehydroshikimate see Pfam:PF01220. 20.70 20.70 20.90 21.20 20.60 19.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.32 0.70 -4.70 150 2260 2012-10-03 05:58:16 2003-04-07 12:59:11 10 37 1982 52 564 1429 154 219.00 32 56.47 CHANGED slslsspshtchhtphpphtt..ss.DhlElRl..Dhlp..................p.ltptlptlppps...shPllhTh..R...................sp........tEGGph.p..hsc....cphhpllcpshchs.......s-alDlElp..hppt.hht......hhhtppsts+lIhSaH.sFppTPshpp...lhp....hhpcht.phGsDlsKlsshspshpDslp.lhphppphp.....thPlIshsMGphGplSRlhsshhGSshoau..thspssAPGQlslpclpphhp ..............................................................lslhspshtph.h.p.p.hpthpph....ss....DllEaRl..Dahtt..................................................shcplhphhtt.l+cth...sphPllaTh..R....................ot........pEGGch...s.....hos.......ctYl....pLh....ctsh....phs..............sD.hIDl..........Ehh...s....t....pc..htt................ltttptpsspllhSpH...sFpcTP.ptcc...............lht...........hlpchp..p.............h.....s.s.D........lsK.lAlMs.ps..p.pDVLs...Lh....photphpp...........sspPl..lshSMuchGhlSRlsuplaGSshTFu.........shpcsSAP.GQlslscl+phh.p............................................................. 0 165 339 475 +1766 PF01220 DHquinase_II Dehydroquinase class II Finn RD, Bateman A anon Prosite Domain \N 21.10 21.10 21.30 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.60 0.71 -4.86 191 2882 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 2604 343 840 2257 2156 139.20 48 92.52 CHANGED hpILllNGPNLNlLGpREPplYGs.pTLs-lppplpp..tApph..u..lplphhQSNpEGpLlDtIHpA.....tsp..scuIlINPuAaTHTSlAlRD....Altul...sl.P...hlEVHlSNlHuR.EpFRH......+SaluslA.tGlIsGhGspGYplALcshhp ..........................plLllNGPNLNhLGpREPplYGs.tTL.s-lppplpp..t..A..p..p..h......u....l.c.lchhQSNpEGpLlDtIHpA....................tsp..scsIlINPuAaTHTSl...AlRD....Altul...s..l..P...hlEVHlSNlHsR..EsFRH......+SalSshA.tGlI.sGh.GspGYpLALpshh.p................. 1 251 526 715 +1767 PF04706 Dickkopf_N dickkopf_N; Dickkopf N-terminal cysteine-rich region Mifsud W anon Pfam-B_5838 (release 7.5) Family Dickkopf proteins are a class of Wnt antagonists. They possess two conserved cysteine-rich regions. This family represents the N-terminal one [1]. The C-terminal region has been found to share significant sequence similarity to the colipase fold, Pfam:PF01114, Pfam:PF02740 [2]. 21.20 21.20 21.20 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.92 0.72 -3.80 13 214 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 54 0 110 172 0 52.00 45 19.55 CHANGED .ChsD--CssspaCptscpp.......thClsCR+p++RCpRDuMCCsGshClNGlChs ....................Ch.Dc-CssscYCptsp.t...........tChsCRc...p+...c+CpRDu.CCsG...phCssGhCh...... 0 11 26 52 +1768 PF05086 Dicty_REP Dict_REP; Dictyostelium (Slime Mold) REP protein Moxon SJ anon Pfam-B_6278 (release 7.7) Family This family consists of REP proteins from Dictyostelium (Slime molds). REP protein is likely involved in transcription regulation and control of DNA replication, specifically amplification of plasmid at low copy numbers. The formation of homomultimers may be required for their regulatory activity [1]. 20.70 20.70 114.70 114.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 911 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.66 0.70 -13.84 0.70 -6.79 5 10 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 5 0 2 10 0 732.20 36 98.23 CHANGED scLIPWcsFhhFalpILE-FpPsKRsp..h..ss-IMsahYSs+sLlhKElFRolls+pph.....scphtDsLDlcoIFc+TSLhsh....spsscsDLDDllpI+Ks.psGKIlVoDsDQAIFIIDHFSRluDsQVFsKKsIsGFRslEKhVScsGYKIcDaRphGlcWFsFLNKlRTuCspachF....h.puhaKalDFISMLoslHsItVDcQN+-EEcLSslYoLYPFl-LE..........pp.s.t.pulsocNtpRpshpPsspsNsTTTTTTTTTTTTspshT+KR+thsspsls.s.t.....ps.tsssts.......................tsphpcpsSRKSGuLK-VRlDsIS...................s..hssssspSlsPs-sIlSlSN+IKsCLlEllpSKGEIso-lVKuIFcsLQsKsYosDLlDSIFsQNKSEKVITlSS+lFsluuKlDaDEI+Fu-lS-DlF-LoKRLoFEKNTNILIPTp-tENpFGFLWlPIVNGssSTSIaLSPsN.ScVsas+I+SllKFIQLCIllu-INsFlolRSIoFDsFKSIosELLGMScRlLsLEsDV+KLKDllsKs...KKKspl-ss-hpusssscscFAsaVaEF.INNNhYIKLSKp-NuLKLs-sPsSoLsVEYDPsoI-HKVGFlFHCRSEISKFssstNhaSINsLlpSFTPNNIsslSQ-sENDLKRKYSLMSSDhScllKssssFlPSNFcRFlsITITNsuYNhN+VFuFcDISsGlSITNLRsIasccScp.cNRYcEYlGcTRlIRAFFhAPCLIQITNlsFso-pLpsD+slsRQIKSIKI+NLSYIPIDIKVGGslIDTIKuspTcsVpINSSEFoFSISCLcIoFSooLISKsKLsNLsTlls-.KYscpTslLpssDKhscLs+sFlscapchN.sLTcLEcaLlupF...tuhcD-ccsEcEcp--DEDpDEsEDE ............pcLlsWppahhaahhlLcpatsscR.p.....ss-IhpthaSschllh+ElFRslhsph........t...DhhchpslFsthsh.........sh.hsLcchhphcKs.psuKlhVsDsDQuIhIlDHhSRlsDpplFsKctlsuaRshEKhlopt.YKlpDhRthGlcWashlscl+otCttpphh.......shac.sDhIuhLohlHslhltpQNc-EpplSthYoLaPFhsLE..............pp...t.pshsspptp+ts.pssp.sssTTpoTTTTTTTs.tshpp+pp.ht..p..............................................h..hsoRcoGuLccVRlssIo.........................sssspp.ssppsIhSlSs+IKshllEsh.sKGElshcsVKulFpsLQsKsYshsLl-slFppNKS-+VITlosphaplssplsa-EI+huclop-lhphu+pl.aEKsTNILlPTps.csthuFLWlPIVsshsSTSlaloP.s.opssasplcuhlKFlpLCIslucIstFls.RSIoa-tFKplsp-L.lsMSpRlLsLEtshcpL+phhppp...+p+ht.pps...p..s.ssp-pchhsalpEF.IssshYIKlS+ppN.sLpLs-sPsSoLslEYsPsoI-HKlGFhaHCRSEISKFsssts.hpIsslhpSFTPsNI+NlS.-pEs-LK+KYuLhsS-hopl.cshs.FhPpsFcRahsIohTNssYNhs+VFuFcDISpGhSITNL+sIatcsopp..ppY.-YhGsTRlI+AFFhAPCLIpIsNhtFsssplhsscslsRpIKSlcI+NhSYlsl-IKVssshlsolKuspTpsV.INSsEFsFSISCLchpFSsoLISKs+Lpplpohhsp.+Ysp.ps.h.phhsphscLhcsFlpphp.hN.plopl-phlhp.a....u.tc-cpsEspc.............-... 0 0 2 2 +1769 PF04562 Dicty_spore_N Dict_spore_N; Dictyostelium spore coat protein, N terminus Kerrison ND anon DOMO:DM04424; Family The Dictyostelium spore coat is a polarised extracellular matrix composed of glycoproteins and cellulose. Four of the major coat glycoproteins exist as a multi-protein complex within the prespore vesicles before secretion. Of these, SP96 and SP70 are members of this family. The presence of SP96 and SP70 in the complex is necessary for the cellulose binding activity of the complex, which is in turn necessary for normal spore coat assembly [1]. The function of this region of these proteins is not known. 22.30 22.30 27.90 27.30 22.20 21.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.53 0.71 -3.82 9 43 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 4 0 43 47 0 117.80 32 20.62 CHANGED h-C-s.hsEspCcus.s.ChhLsasuCCGcpphhCscsstNsChssslSCh+sspospIhEhWSsC.hPpc......sFp.a..PsptoCss...hpCpspGhsCcaspsssChsTSCCPchspChstss .......h.sCEs..hspspCpsshs..Cpals..ahuCCG.cppthCls.ss..........p...s.....sCp......p..s..s..l...sChps..spsspIaEhWS.s...C.pP.pp......Gap.a..P.sstoCsp...LsCpspG.hsCpas.p........sssC...huTS.CC.sthstCss..s... 0 34 43 43 +1770 PF00186 DHFR_1 DiHfolate_red; Dihydrofolate reductase Finn RD, Griffiths-Jones SR anon Prosite Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.07 0.71 -4.65 89 5237 2012-10-03 00:23:32 2003-04-07 12:59:11 14 18 4170 438 1010 3895 1205 156.60 33 84.23 CHANGED hlslIsAhscNtlIGpssplPW+LPsDLpaFKp..hThG+sllMGRKTa-Sl.s....+..PLPsRpNIVlT.Rs.s..hp.....t.s...s...lhpSlppAl......t.ht....ppt......p-la.lIGGuplYpthls..hA-...clhlTclc..tph.p.GDsaFP.pls..spWphspppttt..spp...sshsasFhtapR .............................................lshI.s.A.h.....s.....p.....s.....t....lI....G......t......p......s......p......lP..W.....+......L.....P......s....D.....h...t....a.F...Kp...............h.T.............h.............s.................p...........s.......l.......lM.GR+TaE.....S.ls................+...PL.Ps.R...pNlVlo+sss................hps.................p..sst............lhp......S..l-.-...ul..............................t..ht......tp.......pcla.lI...........GGup.l.Yp.ph..l..s.........h....sc.............clal....Tc..Ic.............sph....-.....GD...s....a....F.........P....p...h......s............s......p.......ac...s...t...p...p....h............................tp.......sshsapa.hh.+...................................................... 0 312 602 830 +1771 PF02966 DIM1 Mitosis protein DIM1 Griffiths-Jones SR anon Structural domain Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.64 20 463 2012-10-03 14:45:55 2003-04-07 12:59:11 11 4 318 9 303 648 68 123.00 59 88.41 CHANGED hLPHLpoGWcVDQAIloEc-RlVVlRFGR-pDstCMphDElLhpluE+l+NFAsIYLVDlD..cVPDFNpMYELh....DPsTlMFFaRNKHMMsDaGTGNNNKlNaslcDKQEhIDIlEslYRGApKGKGLVlSPhDY ......................................hLPHLpsuWpVDQAIlo.E-..-Rl.VV..............IRFG+...Dh.......D.ss........CM.phDElLhpl.....A-.+.....V.KN.F..AVIY..L..V..DIs..cVPD.FNpMYELY....DPsTVMFFaR.NK.HhM.lD.h.GTG..NN.NK.lNW.shcDKQEhIDIlE........TlYRGA.+KG+GLVlSPKDY..................................... 1 94 161 241 +1772 PF05163 DinB DinB family Bateman A anon COG2318 Family DNA damage-inducible (din) genes in Bacillus subtilis are coordinately regulated and together compose a global regulatory network that has been termed the SOS-like or SOB regulon. This family includes DinB from B. subtilis [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.82 0.71 -4.55 14 1291 2012-10-02 14:44:17 2003-04-07 12:59:11 7 1 629 11 408 2210 324 161.30 18 97.05 CHANGED Msppsh.........phhpa.thsssphhchltsLss-phs...tc.spshholtphLsHlhtschhWlsthph...........ttsph.ppsptcshphhcshppshspthhshhpphs-....ttcphh..s.tsstshphshsplLhHlhsHpsHHRGQlsshlRphGhsss.hshhhh...t................t.ch ..........................................................................................................thhpa..th.s.spp...h.h.p.h.hp.p..L....s..p..c.p..hp............pp.h..s.s.h.h.t.oltcshh...H...lh...s.s.....c.h.h...a..l.s.t.hps.....................................tt....s..h........p...t...h...........p.....p.....s...h....p....p.....l...c...p....h....h..p.....t...h......s.....p...p...hp.....p..a..h.s..p..h..sc.............t..t.h.hhh...........pt.s...s..h....t......s..h...p....h.......s.h...t..p.........h....l...t..H...l.h...s....Hps.HHRGQlss.h...lR.p.hG.h.s.s.s.s.hsh...............tt.................................................................................................... 0 165 288 348 +1773 PF00775 Dioxygenase_C Dioxygenase; Dioxygenase Bateman A anon Pfam-B_1018 (release 2.1) Domain \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.26 0.71 -5.03 20 2905 2012-10-02 19:08:27 2003-04-07 12:59:11 16 16 1090 387 1080 2726 434 159.70 27 65.12 CHANGED l-GPhYlss...APptss.huchs.t..ss.GpslhlpGpVpDtsGcPlsGApVElWHAsscGhYShasss..ts.paNLR..GpllTDs-GpYchpTlhPusYshPspGsstthlp..hGRHshRPAHlHahVoAPGa.cpLsTQlahpGDsalt.sDhshu.....s+spLlhshpptsss...................................hpchpachsLssp ...............................uPha....t..........................................ts.Gp.l...hlp...GpVh...D......t.....s...G...p...P.l.ss....u..hl-lWpA...su.sG..t....Y.s...t........h...........s..............s........s..................................t.......s........c..................s..........h...c.....................G+h..h..T..D..s...s...G..p..a.p.F..p.....TlhPu..sY.....sh..s........................................t.pshRss....H..lHh...l............u...uh....ptLhT....p....hah..........s..-..hh....ss.h.s......hhttLl.......t...........................................h.........t.................................................................................. 0 252 577 855 +1774 PF04444 Dioxygenase_N Catechol dioxygenase N terminus Kerrison ND anon DOMO:DM04160; Family This family consists of the N termini of catechol, chlorocatechol or hydroxyquinol 1,2-dioxygenase proteins. This region is always found adjacent to the dioxygenase domain (Pfam:PF00775). 20.90 20.90 20.90 21.60 20.80 19.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.31 0.72 -4.00 19 910 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 500 43 410 899 78 74.30 31 24.82 CHANGED ssRs+pIspcllpcLacsIc-hclT.-EahsulsaLschGp.....ptEhsLLussLGlEH..shthDstptps.uhpG..GT ........ssRh+plhppLlpcL+shlc-hclT.sEahtulpaLscsGp...................+pEhhLLuDsLGlpp.....hlDthpttt................................. 1 78 193 315 +1775 PF01866 Diphthamide_syn Putative diphthamide synthesis protein Enright A, Ouzounis C, Bateman A anon Enright A Family Swiss:Q16439 is a candidate tumour suppressor gene [1]. DPH2 from yeast Swiss:P32461 [2], which confers resistance to diphtheria toxin has been found to be involved in diphthamide synthesis. Diphtheria toxin inhibits eukaryotic protein synthesis by ADP-ribosylating diphthamide, a posttranslationally modified histidine residue present in EF2. The exact function of the members of this family is unknown. 28.70 28.70 29.50 29.10 27.90 28.10 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.11 0.70 -5.18 57 985 2009-01-15 18:05:59 2003-04-07 12:59:11 12 19 467 4 687 991 178 247.50 26 64.82 CHANGED hPEGLhhhuhpluchL..cp....sst.....sll.GDssYGACsls-hpu.ptlssDhllHaGHosls......ht..h.slal.sph...plDsp.chlcplhpph.......pplullsTlQahptlpplpphLpp.......p...hhlshsps+..htsGpVLGCshssh...........pshlalGsGpFHh.uhhl.u....sph.shthDPa....shshc...p.-tschl+hRhttIp+At..sA..+paGlIluohssQtp.clhcplpphlcp...GhcshllhhsclsPscLtth.s...lDsaVpsACPRlulD.sttFp+Pl.LTPhEhplslstpp.......tpY.hD.h ........................................................hP-tlh..u..lsp.l.....pp..........tt.p.......................hhlhuDs..sY.G.uCClD-hs.A........p........ths.sDhllHa..G+uCL.s........................t.t.l.slalh.sph......t.lc.....ht...thh.p.p.h.pt....t.h......t.t................ppls.lhssh..pa.....t..t...l.pt.h.tt...Ltt...................................t......h.......s......................t..p....l...h...sp...p...............................t.hha.l..us....s..hp......h...............s........hhl.t.....sth.....hh......th....s...Ph...............hp.....h......p....................phtthhthRhth.ltp.........up............p.A..................phhGll.luoLuhps..p.h.hpplp.phl..pt........t...G..h..p............hhlhhucl.s..sKLs.......h.............pl....-saV.luCP..c..h..s....l..........s..............s.....t..F.....+..Pl..loPhEh.lslt......................................................................................................................................... 0 245 397 568 +1776 PF02763 Diphtheria_C Diphtheria toxin, C domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain N-terminal catalytic (C) domain - blocks protein synthesis by transfer of ADP-ribose from NAD to a diphthamide residue of EF-2. 25.00 25.00 31.70 106.20 24.10 17.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.23 0.71 -4.85 3 23 2012-10-01 23:25:29 2003-04-07 12:59:11 9 1 10 13 1 35 0 169.70 98 32.48 CHANGED GADDVVDSSKSFVMENFSSYHGTKPGYVDSIQKGIQKPKSGTQGNYDDDWKGFYSTDNKYDAAGYSVDNENPLSGKAGGVVKVTYPGLTKVLALKVDNAETIKKELGLSLTEPLMEQVGTEEFIKRFGDGASRVVLSLPFAEGSSSVEYINNWEQAKALSVELEINFETRGKRGQDAMYEYMAQACA .GADDVVDSSKSFVMENFSSYHGTKPGYVDSIQKGIQKPKSGTQGNYDDDWKGFYSTDNKYDAAGYSVDNENPLSGKAGGVVKVTYPGLTKlLALKVDNAETIKKELGLSLTEPLMEQVGTEEFIKRFGDGASRVVLSLPFAEGSSSVEYINNWEQAKALSVELEINFETRGKRGQDAMYEYMAQuCA 0 0 1 1 +1777 PF01324 Diphtheria_R Diphtheria_tox; Diphtheria toxin, R domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain C-terminal receptor binding (R) domain - binds to cell surface receptor, permitting the toxin to enter the cell by receptor mediated endocytosis. 25.00 25.00 27.80 81.20 19.60 18.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.82 0.71 -4.66 3 23 2009-09-10 22:55:25 2003-04-07 12:59:11 14 1 10 12 1 20 0 138.70 95 26.57 CHANGED SPGHKTQPFLHDGYAVSWNTVEDSIIRTGFQGESGHDIKITAENTPLPIAGVLLPTIPGKLDVNKSKTHISVNGRKIRMRCRAIDGDVTFCRPKSPVYVGNGVHANLHVAFHRSSSEKIHSNEISSDSIGVLGYQKTVDHTKVNSKLSLFFEIK SPGHKTQPFLHDGYAVSWNTVEDSII+TGFQGESGHDIKITAENTPLPIAGVLLPTIPGKLDVNKSKTHISVNGRKIRMRCRAIDGsVTFCRPKSPVYVGNGVHANLHVAFHRSSSEKIHSNEIsSDSIGVLGYQKTVDHTKVNSKLSLFhElK 0 0 1 1 +1778 PF02764 Diphtheria_T Diphtheria toxin, T domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain Central domain of diphtheria toxin is the translocation (T) domain. pH induced conformational change in this domain triggers insertion into the endosomal membrane and facilitates the transfer of the catalytic domain into the cytoplasm. 25.00 25.00 28.40 27.40 24.40 24.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -10.95 0.71 -4.50 3 24 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 11 12 1 32 0 178.30 91 28.97 CHANGED SCINLDWDVIRDKTKTKIESLKEHGPIKNKMSESPNKTVSEEKAKQYLEEFHQTALEHPELSELKTVTGTNPVFAGANYAAWAVNVAQVIDSETADNLEKTTAALSILPGIGSVMGIADGAVHHNTEEIVAQSIALSSLMVAQAIPLVGELVDIGFAAYNFVESIINLFQVVHNSYNRPA ....................................SCINLDWDsIRDKTKTKIESLKEHGPIKNKMSESPNKTVSEEKAKQYLEEFHQTALEHPELSELKTVTGTNsVFAGANYAAWAVNVAQVID.SETADNLEKTTAALSILPGIGSVMGIADGAVHHNTEEIVAQSIALSSLMVAQAIPLVGELVDIGFAAYNFVESIINLFQVVHNSYNRPA................. 0 0 1 1 +1779 PF00200 Disintegrin disintegrin; Disintegrin Finn RD anon Prosite Domain \N 19.60 19.60 19.80 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.80 0.72 -11.29 0.72 -3.76 154 2414 2009-09-12 09:20:59 2003-04-07 12:59:11 18 51 294 43 1025 2300 5 73.50 43 11.29 CHANGED EtGE-CDCGs.cpC..p...ssCC.........sstsC+Lp..sGupC....up.G.CCp..p.Cph..pssGslCRtsp.s-....CDlsEaCsGpSspCPsDhat .........................EtGE-CD.CG..s.....p.p...C..p........ssCC..................ss..sCp.L...c.........su......u.p......C..............upG.C.C..............c..p..C..p.a......ps.s..Gs.hCRtst...s-.....CDlsEaCsG.p..S..s..pCPssha.h......................... 0 211 280 525 +1780 PF05141 DIT1_PvcA Pyoverdine/dityrosine biosynthesis protein Bateman A, Eberhardt R anon COG3207 Family DIT1 is involved in synthesising dityrosine [1]. Dityrosine is a sporulation-specific component of the yeast ascospore wall that is essential for the resistance of the spores to adverse environmental conditions. Pyoverdine biosynthesis protein PvcA is involved in the biosynthesis of pyoverdine, a cyclized isocyano derivative of tyrosine [2,3]. It has a modified Rossmann fold [3]. 23.00 23.00 23.70 23.90 22.60 22.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.79 0.70 -5.09 33 360 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 254 4 199 317 7 262.50 32 49.15 CHANGED plptFlppscPIchlLPAFPsKSP...Np.pKVlGsLPDhuEclALppLsshspcIpplYsPGAclhIsSDG+VFuDlluVsDcslssYspsL+pLhpplu..........hsp.lpahsLpDl........................tth.th.hssphsthRphLhppasssstsl...cpplps...scsththYRGhs+FLh-D....hhs.t.shS+pth+KsspphAhsMIpRupAausLlttpFPptlRLSIHspsssu.KhGIplh...................Pspcsh...hTPWHusssphs....suphhhhp+schtp..shcllphsG ..................................lptalsptp.lchlLPAFPsKSs...N..pKVLGphPDhuEcluLppLsphspcIp.plYs........PGs........p........lhIhSD.G+VFu........Dhl.ul.sDpcl.sYsctL+phhtpht..........hsp.lphhsLpch..........................cphst.h..phstph-thRchLhst.hss.ss....t..l....pppltt...spctthhYpGhp+Fh.pD......h....tt...s.h.S....ppth+.+pspplAhtMlhRu...............pAaushlp......ppFP.ctlRLSIHsp.........sstu.Khulplh..............................sppcsa.....hTPWHssll..s....cGp.hhhhp+pph.p..shp......h................................................................................... 0 23 103 165 +1781 PF04977 DivIC Septum formation initiator Bateman A anon COG2919 Family DivIC from B. subtilis is necessary for both vegetative and sporulation septum formation [1]. These proteins are mainly composed of an amino terminal coiled-coil. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.42 0.72 -4.49 70 4922 2012-10-02 13:28:50 2003-04-07 12:59:11 10 6 3891 0 944 2787 1568 78.40 20 63.49 CHANGED lhshhhhhhhhhttshhshhphcpclsphppphppLppcpppLcsclcpL.p...sp-hlcchARpcLshscssEhhaplspp ..................................hhhhhhhhhhh...h.p...sh.t...ph....h...p.hp....p....p...l...s...p..hpp....php...plp..p...cpppL.......p.......s-lpcL.s..........s.p-...hl.cc.hARpc.huh..s..p..s.s.Ethapl...p.................... 0 323 617 794 +1782 PF05103 DivIVA DivIVA protein Bateman A anon COG3599 Family The Bacillus subtilis divIVA1 mutation causes misplacement of the septum during cell division, resulting in the formation of small, circular, anucleate mini-cells [1]. Inactivation of divIVA produces a mini-cell phenotype, whereas overproduction of DivIVA results in a filamentation phenotype [1]. These proteins appear to contain coiled-coils. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.60 0.71 -4.10 99 3249 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1865 6 551 1653 105 105.00 27 54.00 CHANGED lTPhDIppKcF+psh..R.GYcp-EVDcFLDplhcDYEslh...+EspcLccclppLcpclppapphcpslppsllhApphu-chKtsApccuphllppAptpAppllt...........pu.tph.pltpph-.pLK+pspla+ppap .................hosh-IhpKpF.p...p.th....R...G..YctcEVDpFLD.pl.......hp-a.......-phh.....................pchc....p.......Lc.........p....cl.....p...pLc.......p.......c.......l.............pp..........h.p.........p....................................t....................................................................................................hh.........s..t......t.t...h.....pup.......tstt........................................................................................................................... 0 199 374 482 +1783 PF00778 DIX DAX; DIX domain Marcu O, SMART anon Alignment kindly provided by SMART Family The DIX domain is present in Dishevelled and axin [2]. This domain is involved in homo- and hetero-oligomerisation. It is involved in the homo- oligomerisation of mouse axin Swiss:O35625 [3]. The axin DIX domain also interacts with the dishevelled DIX domain [4]. The DIX domain has also been called the DAX domain. 21.40 21.40 21.80 21.60 21.20 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.79 0.72 -4.26 25 558 2009-09-11 10:36:07 2003-04-07 12:59:11 12 24 114 18 287 508 1 77.40 42 11.79 CHANGED sspTKllYa.lscEpsPYhlplstsst.lTLtcFKshl.s+p..sY+aaFKshDsDFu..sVKEElh-DsshL..PsapG+llua...lhss- .............................s.tphhVhYa..h.....stE.....hPYhspl.ht.s.....lTLucFKphL...s+p..ssY..+aaFKs.....hss-Fs....sVpEE.lh-Ds.shL..PsapG.+lluhl.p..t......... 0 70 98 185 +1784 PF01738 DLH Dienelactone hydrolase family Bateman A anon Pfam-B_757 (release 4.2) Domain \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.57 0.70 -5.06 17 4591 2012-10-03 11:45:05 2003-04-07 12:59:11 13 32 2059 10 1757 12590 4895 210.60 23 80.54 CHANGED hsualutPsss.....hPslllhp-laGls...s.h+thsppLAptG.ahulsPDlYhRpu........................psschspttphhtthhpph..tplhscl........sslsahcups.hpstclGllG.aChGGthuh.hAups..hlcusluaYG........h..sp..lthssplpsPlhhthGppDphlssps...hpplppshttsssshplchYssAuHuFtsstss...........sasstAupcuWp+hhpahpphh ........................................................................................................................................................................sahs.hPttt......t..hP........u.l.l.....l........l........p............-.......h......a........G.lp..............sth.c...p.....h....u....c....c..l....A......t.......t....G....Y..h..s..l.......s........P..D....h...a.tpps.............................................................t.p..h.s.p.t..t...p..h...h...p...t..h...h..t......p...h.................s...p....h..hs.ch..............................s.s.l...s...h..l...t...p.........p.....s.........................s......s....t.+.....l.uhsG.........a.C...........a.G.........G........p....l.........s.........h.........h.........h............A..............s...............p..................s........s.............p............l..........p..............u..........u.........l.........s.........a....YG......................................................p..t...s...h...t.......s...s..p....l.....p....s........P....l....l.h..h..a.u...t..p...D...s...h...h..s...p.t.............h.t.t....h.c.......p............t..L......p.......t.......s....s.......s....p........h.....p...h..h.....h...Y...s...s....s...s.H......u.F.ss..s..tps.............................................pY.s..tt..u.......u..c...p...u.a.pch.ltahpph.h....................................................................................................................................... 0 494 1027 1453 +1785 PF04914 DltD_C DltD C-terminal region Kerrison ND anon Pfam-B_6216 (release 7.6) Family DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis [1]. This family consists of the C-terminal region of DltD. 23.80 23.80 23.80 24.10 23.60 23.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.60 0.71 -4.28 36 1136 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 1012 6 85 607 7 128.70 41 31.78 CHANGED pFGIcsp.aYcp+lKp.plp..+LKspp+phsYhp.SPEasDhQLlLsphscppscVlFlIsPVNuKWhcYTGlspchhpphscKI+hQLpsQGFppIsDhSccuscsYFMpDTIHlGW+GWlthDctlpsFhppp .....................paGIcst.aYpp.plKp.plp..+h.K...spp.c..p...s.Yhp.SPEYsDL.QLlLsph..pcspscslFlhsPVNs.+.WhDYsGlsc-hhptshcKI+.QlpspGa..pIsDhSccsh-tY...Fh...pDTI.HLGW+...GW.lthDctlp.Fhpp....................................... 0 18 38 63 +1786 PF04918 DltD_M DltD_central; DltD central region Kerrison ND anon Pfam-B_6216 (release 7.6) Family DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis [1]. This family consists of the central region of DltD. 20.70 20.70 20.90 20.90 20.60 20.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.91 0.71 -4.05 9 1116 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 994 6 79 595 1 162.20 33 39.87 CHANGED lGpsGSsSLsHahshsuhhspLKsKKhlaVlSPQWFstpGlspssFpthhSssQlhpahhpps.ssptcphhAKRLLpFp...scsshpphLcplAsspphsstshh.ltp.tp....hhc+p-ulaSphshs.....s.ppplp+hhKtLPcshSappLsplAscpGp+pTssN .....................LGpuGopSLsphhshpuh.hspLc..sKKsVallSPQWFo.p.pG.hssssFsthaSppQhhpaltpps.ssphc+hhApR...LLph........cs.t...hKph...Lp.plu...p...s....pphs........p...ts....t...thh....th.tp........hLp+p-....s..hhS.Fshc.......s..ptl.p...+h..h..pt....L...s.cphSapp.lcphApppuctpTssN............................................................................................ 0 17 35 57 +1787 PF04915 DltD_N DltD N-terminal region Kerrison ND anon Pfam-B_6216 (release 7.6) Family DltD is and integral membrane protein involved in the biosynthesis of D-alanyl-lipoteichoic acid. This is important in controlling the net ionic charge in lipoteichoic acid (LTA). This family is found in bacteria of the Bacillus/Clostridium group. DltD binds Dcp and ligates it with D-alanine. DltD does not ligate acyl carrier protein (ACP) with D-alanine. It also has thioesterase activity for mischarged D-alanyl-acyl carrier protein (ACP). DltD is thought to be responsible for discriminating between Dcp involved in the D-alanylation of LTA, and ACP involved in fatty acid biosynthesis [1]. This family consists of the N-terminal region of DltD. 21.20 21.20 23.00 24.50 20.50 21.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.99 0.72 -4.11 32 1047 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 949 6 72 531 0 62.30 44 15.28 CHANGED cplcpuAsolossshKGphlKpcAl..ppspYlPhaGSSEhsRhDshHPSVhAc+Ypc.sYpPFL .........shccuAsShotpshK..uphlps+Al..pss+Y..lPhaGSSEhtRhDshHPulhhcKasc.sa+PFL... 0 12 28 50 +1788 PF03474 DMA DMRTA motif Ottolenghi C anon Ottolenghi C Motif This region is found to the C-terminus of the Pfam:PF00751 [1]. DM-domain proteins with this motif are known as DMRTA proteins. The function of this region is unknown. 20.30 20.30 20.30 25.00 20.00 20.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.66 0.72 -4.10 5 284 2012-10-01 23:03:33 2003-04-07 12:59:11 9 4 122 0 123 261 1 38.10 53 10.22 CHANGED sRoPIDlLt+lFPp+KRoVLELlL+sC+GDllpAIEshL ......RsPlDlLs+lFPsp+RosLEllLptCsGDlVpAIEplL..... 0 28 39 77 +1789 PF00885 DMRL_synthase 6,7-dimethyl-8-ribityllumazine synthase Bateman A anon Pfam-B_1503 (release 3.0) Domain This family includes the beta chain of 6,7-dimethyl-8- ribityllumazine synthase EC:2.5.1.9, an enzyme involved in riboflavin biosynthesis. The family also includes a subfamily of distant archaebacterial proteins that may also have the same function for example Swiss:O28856. 20.60 20.60 21.70 21.20 20.30 19.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.59 0.71 -4.53 104 4443 2009-09-10 17:03:43 2003-04-07 12:59:11 14 14 3970 450 1238 2833 2169 140.60 42 89.31 CHANGED sssh+lulVsucaNptIscpLlpGAlptlpptG.s..tplslhpVPGAaElPlsspplu.cs.s....c.......aDAllsLG.sVI+GsT.Ha-aVuspsspGlh..pl.....uLcpslPlshGVLTs.csh-QAlcRuuh..ptt...sKGtEAAtAulchlsl...hcpl .....................u..sh+luIVhuRF..Nph..IsspLLpGAl-sL.p.+.tG...l....p....p....c....sIslshVPGAaElPlsupplA..co..s..........c....................................aDAllsLG.sVIRGsTsHF-YVsspsupGls..pl.....u.l.c.....s.s.l.PV.h.F.G.VLTs.-ohE.Q.A.......lERAGs........KsG.....NKGsEAAhsAlEMhslhct.h.................................... 0 390 794 1053 +1790 PF04976 DmsC DMSO reductase anchor subunit (DmsC) Bateman A anon COG3302 Family The terminal electron transfer enzyme Me2SO reductase of Escherichia coli is a heterotrimeric enzyme composed of a membrane extrinsic catalytic dimer (DmsAB) and a membrane intrinsic polytopic anchor subunit (DmsC) [1]. 19.60 19.60 19.60 19.60 19.30 19.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.70 0.70 -5.14 6 1995 2012-10-01 19:35:38 2003-04-07 12:59:11 7 11 916 0 169 1024 199 261.40 35 93.70 CHANGED Ms.GhaEhPLVhFTVhuQsslGual...lhullhltucspt..+ph..lltuMFllLsLlGlGFIAShhHLGpPlRAFNuLNRlGuShLSNEIAuGuIFFuluGlaWLlAlltKhssuLuplahllohllGllFsahMspVYp.IcoVPTWaouaTshuFaLThlhuGssLuhuLLpstpls.shshphLshLssLu.................sslsusVslhQGhsLpoIcoSsQpAssLVP-a.........AsLpshRhlLLslshshLlhshhhhc.sslslL.lluhlLlLAGEhIGR...........sLFYGL ....................................................hHEhPLllFTVhuQsslGuhl......V.h..u...l..u..h...h.........t.u..ph.....p.....s.p...................ppt.........lht....s.h.h.h.lh.l..L....h.u..l.....Ghl.AShhHLGpP.......hR..AaNuL..s....+.l.G.uS.hLSNEI....s....u.....u...ol......F......a.A....l.u....Gl....h......a..........L...l..s.........h...........l.......t.....+....h.......s.....s....u....l..t.....p...l.......h.h..l....l.......s.hl...lG...l.l....F...V.a....hMs...pVYp....l.s.TVPTWtss.aTs...hsFa.....lT.....s.h.....l..s...G..s..l...L.u...h.......h......L....l......t.h......u.t.l.......shs....h..p..h...h..s.h.l.s.s.LA..................................lls.ssh..h..........h...h.....p.u....h.p.....l...s...s.....l..c....o....u.l..p...t.u..s.t..L.h..s..s.a.........................................u.tl...sh+...l...l.....L.....l...s..s..uh..s....h...h....l....s..s.......h....t.......t...h...........p.......s....t.......s.s..hL........h...u..h....l.Lllu..GEhlGR...........hlFYs................................................................................................................................. 2 45 97 139 +1791 PF03989 DNA_gyraseA_C DNA gyrase C-terminal domain, beta-propeller Yeats C anon Yeats C Repeat This repeat is found as 6 tandem copies at the C-termini of GyrA and ParC DNA gyrases. It is predicted to form 4 beta strands and to probably form a beta-propeller structure [1]. This region has been shown to bind DNA non-specifically and may stabilise the DNA-topoisomerase complex [2]. 20.40 4.10 20.40 4.70 20.30 -999999.99 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.96 0.72 -4.66 362 37895 2009-01-15 18:05:59 2003-04-07 12:59:11 8 34 4630 51 7547 27731 14770 47.70 22 27.63 CHANGED cp..llhhTppGhsh+hsh................................................schpt..hsRsspGlpsh........plpcs..-plhshhhss ...................................................................................llhhTppG.h.sh..R.h..sh..............................................................................................................schpt.....hs.R.s.sp.G...l.psh...............plp..cs.....-plhsh....t............................................................. 0 2464 4959 6375 +1792 PF00204 DNA_gyraseB DNA_topoisoII; DNA gyrase B Finn RD, Griffiths-Jones SR anon SCOP Domain This family represents the second domain of DNA gyrase B which has a ribosomal S5 domain 2-like fold. This family is structurally related to PF01119. 20.70 20.70 20.70 20.70 20.30 20.60 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.15 0.71 -5.05 99 18153 2009-01-15 18:05:59 2003-04-07 12:59:11 20 41 10023 20 2024 14069 4290 160.20 42 30.69 CHANGED Glpcalchhsps.........................h.hpstt..............ps....hpl-lAhtas.s..s.s...c.plhSFs..NsIsThcGGTHlsGhcsuls+slspascpp.phh......ppt.t....plsscDl+cGLssll.sl....+lssP..pF-GQTKp+Lssspsp..shlpph..lp-pht.phlp..css....phscpllppshtttc....sRhs..s+ps+ch ................................................................................GlpsFVcaLNps....K...psl.............hsp..........lhhhps-p..................................cs........lsVElA.hQWN..-....uap.......E..slhsFs...NNIsp+-GGTHlsG.FR..sALT.......Rs.lNs.Yhc...cp...shh...............Kc...t.c......sloG.-DhRE.G.LsAVl.SV....K.ls-P......pFp.uQ.........TK.sKLssoEs+.................shVp.p.h.hs-...tls.paL.....cNP................p..A+hllpKhlpAAp...........AR.A....A++AR-........................................................ 0 674 1283 1691 +1793 PF00986 DNA_gyraseB_C DNA gyrase B subunit, carboxyl terminus Finn RD, Bateman A anon Pfam-B_332 (release 3.0) Family The amino terminus of eukaryotic and prokaryotic DNA topoisomerase II are similar, but they have a different carboxyl terminus. The amino-terminal portion of the DNA gyrase B protein is thought to catalyse the ATP-dependent super-coiling of DNA. See Pfam:PF00204. The carboxyl-terminal end supports the complexation with the DNA gyrase A protein and the ATP-independent relaxation. This family also contains Topoisomerase IV. This is a bacterial enzyme that is closely related to DNA gyrase, [1]. 21.10 21.10 21.10 22.00 21.00 20.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.08 0.72 -4.22 72 9044 2009-01-15 18:05:59 2003-04-07 12:59:11 16 17 5005 38 1681 6230 3303 62.90 53 9.63 CHANGED ttppphpIQRaKGLGEMsscQLWETTMsPcsRpLlpVplc..D....ttp..scchhshLMGc.cspsR+paIp ................s..ppthslQRYKGLGEMNs-QLWETTM.......cPcsR..pLlpVs..l..-....D....................uhp........A-ph..hshLMG-.cVEsRRpFIp......... 0 548 1095 1410 +1794 PF03603 DNA_III_psi DNA polymerase III psi subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.20 20.20 20.30 20.80 19.50 19.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.70 0.71 -4.28 26 745 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 743 5 76 295 13 127.30 54 93.11 CHANGED M.s+RDhhLQphGIoQWpL++PphLp..G..phslsLsscsRLllVusphPptsps.LlpDlL+SlpLs.sQshplss-plshLstpph.phhWhlGscpsth.htu...........tpL........pSPtLspLpssspsKRsLWQQIssa ............MTSRRDW.LQQLGITQWsLRRP...u.ALQG.............EIAIul.PsHVRLlhVAs-LP.uLs-P..LlpDVLRALsloPD..QVLpLTPE+lAML..Pp..so+..CNSW.RLGsD....pPLs....LcG...........AQl..............sSPAL.s-LcuNssARtALWQQICsa........ 0 6 25 53 +1795 PF01653 DNA_ligase_aden DNA_ligase_N; NAD-dependent DNA ligase adenylation domain Bateman A anon Pfam-B_1334 (release 4.1) Domain DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor [1]. This domain is the catalytic adenylation domain. The NAD+ group is covalently attached to this domain at the lysine in the KXDG motif of this domain. This enzyme- adenylate intermediate is an important feature of the proposed catalytic mechanism [1]. 19.00 19.00 19.00 19.00 18.90 18.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.85 0.70 -5.48 15 5286 2012-10-02 00:43:09 2003-04-07 12:59:11 13 57 4462 24 1089 4137 3767 299.60 38 46.15 CHANGED ppsppclpcLpchl+pasYcYYVhDsPhlsDu-YDpLhpcLppLEpcaPELlTPDSPTQ+VGuussputFpplpH.sPMLSLDNAFspc-LpsahcR....lpcthsppsp....aslE.KIDGluluLhYcsGlLs+AsTRGDGphGEDVTpNl+TI+sIPlplshs.......tplElRGEVahsKcsFtpLNpph.cpsccsFANPRNAAAGSLRQLDPclTA+RsLphhsYulu..ps.phshpTQhtsLphL+phGFs.Vspphphsc...sh--VlsahcpapccRcsLsa-IDGlVlKlsplshQccLGaTu+uPRWAlAYKFPAc ...........................................................................t...ppchpcLppplppash.tYY..s...D....s....P..plsDuEYD.pL.h.c.cLtpLEppa....P.......-....h................h.s.......s.D.....S............P....T.p...RVGh...s.....s.l...........p.u.........F.......pplpHthPML..SL..s..s.....s......F..........s..........c.............c.........-..l..p..sF.pR......................lp...c........t...l..........s......p......s....p.............ahsE.KlDGLAloL..hY....c.....s......G.......h.............Llpus.TRG.D.G.p.tGEDlT...p..N.l.........+T..I......c..s....I.Ph.pLptt................................PtplEVRGElahs+psFtpl.N..............tpt.t......p...............p......G............p..........p...............h.....F......A.NPRNAAAGSL..RQ.....L..............D...s......p........l.........o.........A..........p...........R.........s........LshFsYu.....l..............u..................................p...................s...........................................t........................s......o..............p...pt....Lp.hL..p.p.......hGF.........s.s....s....s.t....h....p....h....s.p......................s..h..--.....l..h.p.....ah....pch..............t........p......p...........R......s...........s..L.......sa-I..DGlVlK.VsclshQc.pLG..a........s........u.+.....u.PRWAlAaKFPAp..................................................................................... 0 368 717 925 +1796 PF03120 DNA_ligase_OB NAD-dependent DNA ligase OB-fold domain Bateman A anon Pfam-B_1334 (release 4.1) Domain DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor [1]. This family is a small domain found after the adenylation domain Pfam:PF01653 in NAD dependent ligases [1]. OB-fold domains generally are involved in nucleic acid binding. 25.00 25.00 27.00 25.40 24.30 23.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.55 0.72 -4.32 23 5111 2012-10-03 20:18:02 2003-04-07 12:59:11 11 59 4425 11 1047 3928 2199 82.00 46 12.29 CHANGED s.TpLhDlplpVGRTGs.lTPlApL-PVpluGosVopATLHNtDhIcc+DIRIGDpVllcKAG-VIPcllpVlh..-pRssssps ............t.hTplpslchpV.GRTGs.lTPVApL.............c......P.......Vpl.....uGssV....opATLHNhDp.I...........c............c............h...........Dl.+..l......G.DpVl.lp+AGDlIPcllpVlh..pcRstpt............. 0 351 681 885 +1797 PF03119 DNA_ligase_ZBD NAD-dependent DNA ligase C4 zinc finger domain Bateman A anon Pfam-B_1334 (release 4.1) Domain DNA ligases catalyse the crucial step of joining the breaks in duplex DNA during DNA replication, repair and recombination, utilising either ATP or NAD(+) as a cofactor [1]. This family is a small zinc binding motif that is presumably DNA binding [1]. IT is found only in NAD dependent DNA ligases [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.44 0.72 -4.24 152 3952 2009-01-15 18:05:59 2003-04-07 12:59:11 11 37 3909 5 869 3009 904 27.80 47 4.05 CHANGED pCPsCsotlh.+.ps.....Es.shRCsNs..sCPA ...pCPsCGSclh.+hEG............Es.shRCsss..hCsA.. 0 296 579 741 +1798 PF00145 DNA_methylase C-5 cytosine-specific DNA methylase Sonnhammer ELL anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.04 0.70 -5.32 31 7421 2012-10-10 17:06:42 2003-04-07 12:59:11 12 102 3000 52 1976 6841 2919 257.40 19 65.39 CHANGED hchl-LFAGlGGhcLGhcpsu...........hpslhusEhDcp.....AhcoYptNa......phshsDIsphshp....plP.....hDlLluGhPCQsFShAGp....ppGhsDsRG...sLFa-hhRllcppc....PKhFlhENV+sLlspcpt...pshpsllpphcchG..YpVp.......hplLsApDaG.lPQsRcRlallGhcpch.........................th.phphsp.....................h.sltDlhpp.........h.pschshssphhpshphht....t..tsas..........hhhpp........................................cphttsthhshshpppsh..................................h.tsshhtpsst.......................................pthRplTsREsA.RlQGFP-s.....Fphhssps........ptY+phGNuVsVsllptlucplhp ...................................................................................................................................................................................................................................hphFsG.h..G....uh.p.h....u....hpt.hs................................hph..h.h..h.-..h...p.................................sh.t.s...ht...h...ph...................................t..D......l...t...ht.t..........................................th.....t.....h-....l.l.h.u..G.PC.Qsa...S....h....h....s...............................t...t....s......h........t.......p....t..p.s.......................tL....h..h...p..h.h....c....h....l.....p...........h...p...................P.t......h....h.h.h.E..NV.....s......h...........h.............p...................................................h..t....h...h...t.....h...............t..........t..............h...s.........Y.t.h....................................h..l..p...s.t.....p..a.......t......hs...ptRpRh...h...hl...uh.....t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................R...ho..hE...........hh.hh...s...h...t............................a..........h........................................h..hGsuhs..h...hh.....h........................................................................................................................................................................................................................... 0 634 1210 1611 +1799 PF01119 DNA_mis_repair DNA mismatch repair protein, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon SCOP Family This family represents the C-terminal domain of the mutL/hexB/PMS1 family. This domain has a ribosomal S5 domain 2-like fold. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.26 0.71 -4.64 151 4737 2012-10-03 01:04:38 2003-04-07 12:59:11 14 30 3934 16 1407 3880 762 118.40 30 17.88 CHANGED ltsl..hGpshspph.l.lp.tpsps........hplpGalupP.s.....hsRus.psh.hhalNGRhl+sp.....hlpcAlcpAYp.....shLsts+aPhshLhlclsPptVDVNVHPsKpEVRFpcpctlhchlhpulppsLt ..................................................................tslhGhth.h.pph..ltlc...hppts...................hplpGalu.p..P..p.............................hs.Rus....ps..hphhal..N........G...........Rh..l.+.s.p.............hls+....A....l.p...p....uYp.............................sh...L...s....t.s......p..........a..........Phhl..L.hlc......l.D..P.p.t.VDVNVHPs.KpEVRFpppc..tlhphlhpulppsL.t......................................................... 0 473 844 1161 +1800 PF02499 DNA_pack_C Probable DNA packing protein, C-terminus Mian N, Bateman A anon Pfam-B_1283 (release 5.4) Family This family includes proteins that are probably involved in DNA packing in herpesvirus. This domain is found at the C-terminus of the protein. 19.60 19.60 19.90 19.60 19.40 19.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.31 0.70 -5.71 27 252 2012-10-05 12:31:08 2003-04-07 12:59:11 10 4 127 9 1 214 0 256.60 42 58.45 CHANGED LsQsssKIIFlSSoNoucpSTSFLaNL+sus-chLNVVoYVC--HhpchstpsssTuCsCYhLpKPsFIThDsslRpTAsLFLtsuFhpElhGs...tssst.sssptllo-pAhspF.lhRsSTssppsh..LspsLaVYlDPAaTsNscASGTGlAsluph...psphllhGlEHFFL+sLTGouutpIApCssphltulhsLHP.hhppV+lAVEGNSSQDSAVAIAshlpc.h.............shslhFaHstsp..sssltaPaYLLs+pKohAFEhFIttFNSGplhASQ-lVSpTl+LohDPV-YLlcQl+Nlpplshps....sstoYouK..p.pshSDDlLVAllMAhYlus...ssptsF+sl ......................................h.QtssKlIFlSSsNoucpoTSF..LhpL+sAs.p..c.hLNVV..oY..VC--Hhpchst..+ssu..suCsCYpLaKPsFIoh-ssl+cTANhF.l..tsuFhpEIhGs....sspt...s..........psp.llocsuhpcF............lhR.STsspp..sh..hutsLaVYlDPAaTsNt.pA.......SG....TGluhlsth........p.llhuh-ch.ltth.t.s....t.hh...h......a.....h.....h.h....p................................................................................................................................................................................................................................ 1 1 1 1 +1801 PF02500 DNA_pack_N Probable DNA packing protein, N-terminus Mian N, Bateman A anon Pfam-B_1179 (release 5.4) Family This family includes proteins that are probably involved in DNA packing in herpesvirus. This domain is normally found at the N-terminus of the protein. 20.30 20.30 22.70 24.70 19.00 18.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.85 0.70 -4.87 26 149 2012-10-05 12:31:08 2003-04-07 12:59:11 10 3 92 0 1 142 0 267.60 39 43.48 CHANGED R.cphshPhlGslHusspaushhpshspphh.........t.t...s.........ptt....sspshlss........pLpsslppLpFhphs.tpstspcstYpushsoapuhhpsssFppLpsFltshuphLsssapst.................................pupLE.FQK.llMHshYFlsSlphs-psc+h.hshl+hhFslsphossslcpFKQ+AoVFLVPRRHGKTWhlVslIulLLuohcsI+IGYsAH.++sopsVFpEIhspLp+WFsscpl-hhKsp.TIsFsasstppSTlhFASsaNTN.SIRG .........................................................cRaQhshPtlGhlHsss-hs..htuhspchh............................s..tppp.hlts........pLppslppLpFt..s..psttp+ssYpsshsshpshhcsstFpQlssFlpcFsphLssuFpst............................tctc............................................................pspLE.FQK.lLhHsha.Fluu.spls-hA.p+l.hpaLptsFslshhS....pssl....c....pFKQ+ATVFLVPRRHGKTWFlVPlIuhlLss..h..pGIpIGYsAH.++sopsVFcElctpl++hFssphVscsKsp.sIohsasstu+SThlFASsaNTN.uIRG..................... 0 1 1 1 +1802 PF00712 DNA_pol3_beta DNA polymerase III beta subunit, N-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_631 (release 2.1) Domain A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.27 0.71 -10.21 0.71 -4.21 15 4817 2012-10-02 11:47:48 2003-04-07 12:59:11 14 8 4456 54 1006 4270 2846 120.10 28 32.57 CHANGED MKFsIpR-pLlcsLppVu+slssRsolPlLuslLLpsp-..spLolTuoDhElphpuplstpps.......ppsGslslsuRhlhDIsRsLPs.pplplphss..s+hhlp.uGp.......S+FsLsTLsu--aP..sLs ...................................M+Fsl.p.+stLlp...sLpplppsl...s.s..+....s.o..l....P.I.Lu.slhlp....s..p..s........sp.lsLouoD.h......-...l.p..l..p..s..p..l..s..h..p..ts........................psG...s.....h..s....l.sA+...hhh-...Il+...p..L....P.....c.......t..p...l.s..l...p..h..c..s........pp..h..h.lp..uG+....................Sc..FsL.ss.l.s..us-aP.pl............................................ 0 348 678 861 +1803 PF02767 DNA_pol3_beta_2 DNA polymerase III beta subunit, central domain Bateman A, Griffiths-Jones SR anon Pfam-B_631 (release 2.1) Domain A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.22 0.71 -4.06 15 4914 2012-10-02 11:47:48 2003-04-07 12:59:11 11 14 4454 54 1035 3844 2795 115.90 30 31.47 CHANGED slssssL+clIppTtFAhupp-sR.hLsGlplchpss....pLhhsATDuHRLAlpchphspstss..hsVIVPsKslhELs+llss...sp.lpltlu.......ssplthcssshhhTS+LlDGcaPD ............................lstphL+chI.p.p.T..t.FA...h..u...p.p...-..s.R......h.LsG.....lhh...c...l....p...ss..........pL.p.sVA..TD......u.....H....RL.A.........h........p.p............h.............p............l.............p.....p............s.........h......s.......................p...............h......slIl.P...t.KultE...lt..+.l.l..ss.......sp..p..s..l.p.l.t.lu..................s.s..p...l.h.h........p..........s...........s....s...........h....h....h.h.S.+.L..l.-.G.p.aPD.................................................................. 0 353 699 887 +1804 PF02768 DNA_pol3_beta_3 DNA polymerase III beta subunit, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_631 (release 2.1) Domain A dimer of the beta subunit of DNA polymerase beta forms a ring which encircles duplex DNA. Each monomer contains three domains of identical topology and DNA clamp fold. 20.30 20.30 20.40 20.30 20.20 20.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.23 0.71 -4.54 16 4819 2012-10-02 11:47:48 2003-04-07 12:59:11 10 13 4457 54 1006 3167 2433 121.10 27 32.84 CHANGED ppllPpchsphlslssp.LtcAlpRsulLup.-+spsl+Lpls..sspLclsusssEhGpucE..tlcsshh..GcslpIuFNspYLLDsLsslcspclplphsssspshllpsssssssp.llhPh+ ......................................+llPp.s....s..p..plp.h.spppLhpAlcR.su.ll.....op....p...+..t......p..s...V...+L..p.l.s........p.s.pl.....p...l....ou.s....s....s.-.......u........c.u.pE.............pl...s.s......shs..........G.c.....s...l.c...I...u..F..Ns..pY..llD...sLps..l.....c...s.-...p.....l.plp.......h.......s.s.......s.........s......ps...s......l......l......p........s......s......s......s......p......s.....h...h...h..l..lhPhp...................................... 0 348 678 861 +1805 PF04364 DNA_pol3_chi DNA polymerase III chi subunit, HolC Kerrison ND anon COG2927 Family The DNA polymerase III holoenzyme (EC:2.7.7.7) is the polymerase responsible for the replication of the Escherichia coli chromosome. The holoenzyme is composed of the DNA polymerase III core, the sliding clamp, and the DnaX clamp loading complex. The DnaX complex contains either either the tau or gamma product of gene dnax, complexed to delta.delta' and to chi psi. Chi forms a 1:1 heterodimer with psi.\ The chi psi complex functions by increasing the affinity of tau and gamma for delta.delta' allowing a functional clamp-loading complex to form at physiological subunit concentrations. Psi is responsible for the interaction with DnaX (gamma/tau), but psi is insoluble unless it is in a complex with chi [1]. 21.40 21.40 22.40 22.30 20.60 19.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.78 0.71 -4.34 151 1629 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1600 3 343 975 388 138.40 35 93.69 CHANGED M..sclhFYhLspssh..........tphhspLhpKuhppGh+.lhltspsppptptLDchLWsass-uFlPHsh..ssps...sst...p.PlhLsh.stt..sss..ts..lLlsL.sssh..Ps.hspF.pRll-lhsss-s.thptARp+a+ta+.st...Ghslph .........................MppssFYlLssssh.......................pphlCcLst...ctac.p..G.....p.....R.VlltspDc.ppAtpLD-tLWsh.ss-uFlPHsl.....sG..Eu........sp..ss................s....PV.luhspp....t...........sss.+s..........lLI..sL.pssh........ssh....s..ssF..scVl-hVsh--s.h+phA.R-RaKtYR.st....GasLsh............................. 0 79 183 259 +1807 PF00476 DNA_pol_A DNA polymerase family A Sonnhammer ELL, Griffiths-Jones SR anon Prosite Family \N 23.80 23.80 23.90 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.15 0.70 -5.84 12 6834 2009-01-15 18:05:59 2003-04-07 12:59:11 15 51 4984 160 1526 5654 5505 340.20 33 42.62 CHANGED lchchhpthspchshp.htthttphhchuGppFNlsSscQLphlLF-chGl..hthTcp..Gs.oTssssL-pLt...sc.hlphllpa....Rpls+LpsTalctL.phlsstsG+lHTpaspssTtTGRLSSssPNLQsIPh+s......c.GpcIRpuFlAp.G..hslluuDYSQlELRlhAclusDpthlpsFppGtDlHphsAuchaGlshcs....VsuppR+sAKshNFGllYGhSAhtLuppLs.....IopcEApchh-tYFtpaPG..................................lcpahcchhc...........cu+ppGYlpTLhsRRc......lPclsSp....stshcptAERh......AlNhPlQGoAADlhKhAhIplc.hLpc.t.ps......RhhlpVHDEllh-ssp--..httlsthlpplMcpshth........pVPLhs-hshG.tsWhsA .......................................................................................................................................................h.....h.t.ttpht.t..h....tth.ttp....h.h.t....h..s...G..p..t.F.N.l.s...SsK.Ql.t....l.....L..F..c..c.......h....t..l......................................h......t......p......p........p...................t....................o......T..s.....t....pl.....L..p......pL....t..........................t..........h.....s......l..........t.hlLpa........................Rtl.s.K...L..p.o....T............a....s....c..t....L.....................t......h......l....p........................p..............s.......u............R...l.H...o.p.....a..............Q............s...........h.........s.t.TGR.L.S.S...s.s....PN.....LQ..N..IP..h+s.................................p.G+.....c....I.....RpuF...l.....s........s.........pG...........hhl..l..u..ADYSQIELR.lhA....H.....l....op..D.p........t.......l.......l...p..........A.......F.......p........p............G...........t.......D...........l...H..p..tT..A.....uc.l..F..s....l.s.h..cp...............................ls..s...p.p....R+.p.A.Ks.lN..FGl.......l...Y..Gh..o..sa......uL......u..p......p..Ls...............................................ls.c..p.-.....A....p....p.....hhc....pYappaPu.......................................................................................lppa..h..cp.h.hp...........................................pA.c..p...p..G..Yl..p........T.....lh..s.RRt.....................................hl.....P.c..l..p.up......................sh..h+t..h..u.E..Rt................................................AlNsPlQ.G.oAA.DIl.......KhA.M...lp...l...p.p..h.l......p..p.....t...t....h..ps....................................+..hlh....Q...VHDELlhEl....p....pc....hpth.....tt..h.l...t..p...hM....p.p.....s....h.p.h.................................pVP..Lh...sc.h.t..G.tsW.p............................................................................................................................ 0 539 976 1295 +1809 PF03175 DNA_pol_B_2 DNA polymerase type B, organellar and viral Mifsud W anon Pfam-B_236 (release 6.5) Family Like Pfam:PF00136, members of this family are also DNA polymerase type B proteins. Those included here are found in plant and fungal mitochondria, and in viruses. 22.90 22.90 22.90 22.90 22.80 22.70 hmmbuild -o /dev/null HMM SEED 459 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.55 0.70 -5.82 35 975 2012-10-02 01:06:00 2003-04-07 12:59:11 8 18 262 25 572 970 44 307.10 22 53.07 CHANGED chs.....cshhhhstplhh.DhsFhLsphppht..............hhphthhppshhpphchphphhphhltDoatlhh.sSLccsupuasl............hpKGhhPYctlsp.h....h.....ttshPhhcYhcc.phhhhpcplhp.hc...s.taclhccslcYsphDlhlht....plhtpapphlpcphsl.psshphap...luu.ohtIF+phh...............................................................phsstlhsPs.cchaphlRpulhGGRs.sph.shhp.....lahYDlsShYPuAhp.ashPhGpPhtstt.pththpch.htlpp....lphhspphpshl.....hlsh+sssphh.....hsshsshsu.....hhaosE-Lchtlspu......hhcshhlcsh..ccpsslFs-ah...ccahplphsucppuccpp......luKlL.N.uLYG+Fupp.....spchhlhs-ph........-ptphppltssphhlpppthhps.......pshssphh.tsstshshtp..htphspsttst.pphthtsttsh ...........................................................................................................................................................................................................................................h.hp.h.hh....l..h...h...t.................................h.h........................................................................................................................................ta..ch.c....t.h.....YCt.DVhlht...............chh.p.a..tp..hh..pthp...l....th..h........lsS.u...t..lFcphh....................................................................................................................................................s.t.lht.s...pp.hp.....a...l....p..p...sl.GGR...s..........................h.....h.....p.......p.lh...aDh..s..uh...Ys..SA.ls...h..h....G..Phs.h........................t..........................hh.......................h.h................h...........................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 439 448 571 +1810 PF04042 DNA_pol_E_B DNA polymerase alpha/epsilon subunit B Wood V, Finn RD, Bateman A anon Pfam-B_12632 (release 7.3) and Pfam-B_5821 (release 7.3) Family This family contains a number of DNA polymerase subunits.\ The B subunit of the DNA polymerase alpha plays an essential role at the initial stage of DNA replication in S. cerevisiae and is phosphorylated in a cell cycle-dependent manner. DNA polymerase epsilon is essential for cell viability and chromosomal DNA replication in budding yeast. In addition, DNA polymerase epsilon may be involved in DNA repair and cell-cycle checkpoint control. The enzyme consists of at least four subunits in mammalian cells as well as in yeast. The largest subunit of DNA polymerase epsilon is responsible for polymerase epsilon is responsible for polymerase activity. In mouse, the DNA polymerase epsilon subunit B is the second largest subunit of the DNA polymerase. A part of the N-terminal was found to be responsible for the interaction with SAP18. Experimental evidence suggests that this subunit may recruit histone deacetylase to the replication fork to modify the chromatin structure [1]. 20.50 20.50 20.70 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.41 0.70 -5.08 34 1130 2012-10-02 19:15:56 2003-04-07 12:59:11 11 22 407 8 787 1175 118 215.10 19 39.82 CHANGED llhsSGlhhs...tsshs....hchLpchlptht..........ss.sstlllhGshlstptt.ht.t.......................shstshhhltplpshlppls....tssplsllPuspDssst........hLPQtPh.pt.hhsc.pth............................hphsoNPsphpls..shplsssss-hltclt+tphtpss............................................................................hchlcplLpppplsPhsP..................................shhshsatpppshhl.ps..hPclhlsus...ttpFsp .................................................................................hhlhSslhhs......s.shs...............hphLt...chl.ptht........................ssts.s..h.lll.hGs.hlsts..t..h.h.tttth............................................sh.p...p...h....th....h....t......p....l.ts.h...h.tplh............tshp.llllP.G.s.pD....ssst..........shP...Q.s..l.pt....hh.s.ph.pph..................................................hphsoNPsphpls...................sh.pl...hhsp...schlpclt.+.t..p..h.t.t.s.t..................................................................................................................................p...tc.l.h.cplLpppphsPhhs...........................................................................sh.h.sh..sa...p....h...p...s..s...h...hl....s..........hP...clhhhss......F.............................................................. 0 269 448 662 +1811 PF04931 DNA_pol_phi DNA_pol_V; DNA polymerase phi Wood V, Bateman A anon Pfam-B_10566 (release 7.6) Family This family includes the fifth essential DNA polymerase in yeast EC:2.7.7.7. Pol5p is localised exclusively to the nucleolus and binds near or at the enhancer region of rRNA-encoding DNA repeating units. 22.00 22.00 22.20 22.00 21.30 21.90 hmmbuild -o /dev/null HMM SEED 784 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.21 0.70 -6.76 5 422 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 249 0 305 433 5 450.40 15 60.45 CHANGED huYlLcRLI+GLuSsRpuARssauluLsplLstsp-IsssclL-tLc+csshsusp+uh-cssla.h...usLlsshsullhupll+cKsplsEslFppltlLhlhpuptsaLpusslplIsctLccsshpp.sh.sLhshlsKlLps.hcVs.oS-clLula.Lshs+VhsNccSp..sVthhsp-shcsLhclh.lsusolppcpscsclAusLlpLsKs..Klsusap.lspcthhKpsutcusshssphlslssslhssEphthlcpsspppptlp+sssS+spsshcFhcphppslspthpsEccsspRhhhphhshltsscppssshsshsphhplhppLsopshss.loss+cspplh.pssVcoltchsshppshlpssstplscuolss-ppp+htpsphlhllcpthppKptshlcph..FplapsactsKssTst..AlsstFS...EsssptlclsthsuLhhspSsthpQs.s.tp.GchtLspLtplsphpLp+pLscsstpup.csshscppppsasplcppL+pcsupsu-spspAFcpLlllVsLplhspss-S........s-VLpDlpsChcKuhscssp....tp.+EpsscEPssM-VLTElLLSLLuQsSsLhRclVchVFspFsp-lsp-uLpllL-VLKtc.shscpsth.tGEEE.EDsh..oDsDED-pcsl-DuEsESE-........DsEssEsDEp-DDuEusptlh.hc.ulcppLscsLp.sssl.................cGu-S-DEEsMDDEpMhcLDspLA...plF+E+Kc...+hQuscEcKppsQps+ppllpFKlRVLDLLElYl .................................................................................................................................................................................................................t........h..........l..hh...............h...............................................................h.......t.........h....t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..............................................................................................................................t.....t................hhh-hhlthh.p......t.....t.....h+t....hh...h.......................ht.tth..hht........h.........................t......................................................t................................................t.........p...............t....t............................p.......t..t....t.....t.t.t............................t........t.t....t................t.....t....................................................................................................................t.t......p.............t.......h.s...-tt......h.t..h...s..l....thh..............................................p.+hhthh........................................................................................................................................................................................................................ 0 105 176 258 +1812 PF00336 DNA_pol_viral_C DNA polymerase (viral) C-terminal domain Finn RD anon Pfam-B_107 (release 1.0) Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.66 0.70 -5.11 3 8753 2009-09-12 08:13:51 2003-04-07 12:59:11 13 16 93 0 1 8052 1 158.20 90 27.33 CHANGED GSYGSLPQDHIlcKIucCFRKLPVNRPIDWKVCQRIVGLLGFVAPFTQCGYAALMPLYsCIQS+QAFTFSLlYKoFLpcQYhcLYPVARQRuGlCQVFADATPTGWGLVhGsQRMRGTFuuPLPIHTAELLAACFARCWSGA+LLGTDNSVVLSRKYTSFPWLLGCAANWILRGTSFVYVPSALNPADDPSRGpLGLLRPLPRLhFRPSTGRTSLYAsSPPVPsHRPsRVtFASPLQss-AWRPP .........GSWGTLPQEHIVQKIKpCFRKLPVNRPIDWKVCQRIVGLLGFAAPFTQCGYPALMPLYACIQuKQAFTFSPTYKAFLsKQYLNLYPV.A...RQ................................................................................................................................................................................................. 0 0 0 1 +1813 PF00242 DNA_pol_viral_N DNA polymerase (viral) N-terminal domain Finn RD anon Pfam-B_107 (release 1.0) Family \N 29.90 29.90 30.00 30.80 29.00 29.80 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.30 0.70 -5.43 15 5612 2009-01-15 18:05:59 2003-04-07 12:59:11 12 21 92 0 0 5444 0 301.90 78 41.87 CHANGED MPL..............SYpHFR+LLL.....L-E..Eu.....GPLE-tLPRLADEsLN+RV.........AEsLNLth.sVsIPWTHKVGsFTGLYSopsssFNPcWpTPsFPcIHL+pDlIs+CppasGPLTlNEKRRLKLlhPARFaPpuTKYhPL-KGIKPaYP-aVlsHYFpTpHYLHTLWcAGILYKREoo+SASFpGoPYSWEQc..LQHGs..h.pt..t.t.cshsuQSutILuRu..........................ssGsshQuphppSRLGhpup...pGplApuppGpShulRuRs+uospRshusEsSuSspssppA.ScSuSshpQSusRcsAaophSTocRpSSS....upul-hpslPssospupspuslh.ChWhpFRsS.+PCSsYCLsHlVsLL-DWGPC .......................MPLSYQHFR+LLL.....LD-EA.....GPLEEELPRLADEsLNRRV.........AEDLNLG.NLNV.SIPWTHKVGNFTGLYSSTVPlFNPcWpTPSFPpIHLppDIIs+CpQaVGPLTVNEKRRLKLIMPARFYPshTKYLPLDKGIKPYYPEpsVNHYFpTRHYLHTLWKAGILYKRETTRSASFCGSPYSWEQE...LQHGp..h.psspR+G-ESFssQSSGILSRu.................................sVGss...lpSph+pSRLGLQsp...QGpLAptppGRSGSIRARlH..PosRRsFGV.EPSGSGHhsNpA.SsSSSCLHQSAV.RKsAYS.pl.ST.SK+p..S..SS....GHAVE.LHslPPsSARSQSpGPVhSCWWLQFRNS.KPCS-YCLoHIVNLLEDWGPC................................ 0 0 0 0 +1814 PF04104 DNA_primase_lrg Eukaryotic and archaeal DNA primase, large subunit Kerrison ND, Finn RD anon COG2219 Family DNA primase is the polymerase that synthesises small RNA primers for the Okazaki fragments made during discontinuous DNA replication. DNA primase is a heterodimer of two subunits, the small subunit Pri1 (48 kDa in yeast), and the large subunit Pri2 (58 kDa in the yeast S. cerevisiae) [1]. The large subunit of DNA primase forms interactions with the small subunit and the structure implicates that it is not directly involved in catalysis, but plays roles in correctly positioning the primase/DNA complex, and in the transfer of RNA to DNA polymerase [4]. 21.10 21.10 21.60 22.60 21.00 20.80 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.94 0.70 -5.12 48 540 2012-10-01 20:20:07 2003-04-07 12:59:11 9 10 444 11 383 545 79 237.20 27 54.76 CHANGED pha+lsapshh.cLsp..p+........phhlc.pGhsalstpphhsllhppappplpc..slthhtpshs..cl....pcct+..lt.hhppls................................ttht.tp..h.splstppFP.CM+plhppL...ppsp+LcHtuRhplshFL+.......................slGlsh......--slphacpth..........s.scFsc.php..YsI+HthG...........-Gppss.....YsPhs.....Cpplh.............csaG...CPh...........s.-pL..................................................................................sppttl.pHP.pYa ..........................................................................acl.satch...cLsp....pR................phhlppG..hsals....t...pp....hh.....sllhppappplpcslthstpths....pl....ppcp+..ltsl..lppls...h...t.............................tstl.shpp....l..spl.s.pp..FP.CM+plhptL...cpspHL+HtuRhQhslFLK............................uIGlsl..............--sltaa+pt.att.............hs.-cFs+...pap..YslR.HtaG.............cGpcss.....Y.sPho.....Ctplh.....t....s.t..........ctHG...CPa+p..........s.ctL.t.l..............................h.t.................lh....t.p...phuC..hh...p........................s....l...HP..aa........................................................... 0 138 224 321 +1815 PF01896 DNA_primase_S Eukaryotic and archaeal DNA primase small subunit Enright A, Ouzounis C, Bateman A anon Enright A Family DNA primase synthesises the RNA primers for the Okazaki fragments in lagging strand DNA synthesis. DNA primase is a heterodimer of large and small subunits. This family also includes baculovirus late expression factor 1 or LEF-1 proteins. Baculovirus LEF-1 is a DNA primase enzyme [1]. Bacterial DNA primase adopts a different fold to archaeal and eukaryotic primases. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.98 0.71 -4.39 73 1657 2012-10-02 15:26:12 2003-04-07 12:59:11 14 27 1225 21 676 1488 174 154.80 19 29.20 CHANGED FDlDhschsshcs...............h.hhh.hhhhshthh.tsltc-hu.hpp..............hhhhhSG.........p+GhHlal.....s.ppshphsshpph....h.................................................tpthsptlhpppshhhst.................................................................................h.h.hlDhplspphpp......hl+sPhsh.......cstssplsss............................lsh ..............................................................DlD.........t.............................................h...t.s.t..h.hptl..h...c-hGhps...............h.h..sSG..........s+GhHlal.............hhppth.sh....spspch.............................................................................................................................spthsppltp.pt..s...hhspt.ht.........................................................................................................................ph.h.h..lDht.ssphpshlps.....sh........ps...ss.t.lss.lt.............................................................................................. 0 213 410 566 +1816 PF03604 DNA_RNApol_7kD DNA directed RNA polymerase, 7 kDa subunit Griffiths-Jones SR anon PRODOM Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.91 0.72 -4.48 22 397 2012-10-03 10:42:43 2003-04-07 12:59:11 8 6 344 96 272 368 26 32.10 48 44.18 CHANGED YlCu...-Cuscsp.lpts-slRC+pCGaRILYKcR .....YlCu...-Ct...pcsp...lc.t......t.-.s....I..RC+cCGaRIlYKcR... 0 81 151 225 +1817 PF00521 DNA_topoisoIV DNA gyrase/topoisomerase IV, subunit A Finn RD anon Pfam-B_55 (release 1.0) Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.44 0.70 -5.63 110 11276 2009-01-15 18:05:59 2003-04-07 12:59:11 15 57 5442 61 2091 8937 6799 383.20 48 53.48 CHANGED RAlPcltDGLKPVQRRILauM.hc.h.hp.sppapKsAplVGpV..hupYHPHGDsSla-ulVpMAQ.sash..phsLl-spGNFGSh.sGDss.AAhRYhEs+Loclupp.lht-l-cpslcahssaDspptEPphLPshlPsLLlNGusGIAsGhATsIPPHNhtEll-uhhthl.c.s.ph...pl....hp.h..h.sPDFPouuhlh......st..pthhptYpsG...p..Gplp..lRuchph...........t...tpllITElPapsspssllc.pIscll....psp.....plp.lt..-lcDp.occ..sl+lllc..lcp...shp...sphlhst..LachTpLpsohsss.hslhcs.p...P+hh.slp-lLcpalcaRhcllp+RppapLpchpc+hc........llpthhhhh..........spl-tllplIcss.s.....tpps+p.pLhpc..lsc.....................................................................psctlL.shpLtpLTph-hp+lppEhpclppclpplcphlss...h.phhhc-Lc ...........................................RALPDlRD..GLKPVpRRlLYuM.pc.h.G................s........s.......+.....sa+KSARlV....G-V.....hGKY.HPHG............DoulY-uhV...RMAQ.sFoh..........RY.LV..DG..pGNFG..........Sh.DGDsu.AAM.RYTEuRh.ocl.u.tp.l.L............p.D..lsc....p..........TVDahsNa..Ds..oppEPtVLPu+hPNLLlNGu.o.GI.AVG..M..ATsIPPHNLsEllsushhhlc....ssphs......................hppLhp......h.....l.GPDFPT.uuhIh..........uppsIpcuYcTG.+....Gplh...hRuchchE................................ttt.ppllloElPa..p.................V...s...p...s...p.l......lc...pI.schh....ppK...............................+l......t..hI.s.......clRDE..S.....c..........+.....puhR.lVIc...l+c.....css.............sp...hl.h..Np...Lap......Tp..Lpt..sa.sh.Nh.l..........s..........l.ps.p....................Pphh..sLtphlptalpaphcllhRRptacLpKsppRhH............Ilc..GLhhAh...........tplDplI.tlIRtu.........s.........tts.......+ttLhtp..........thsc.....................................................................................Q..ApAIL.phpL.t+Lst.h.-t.cl..tEhpc.L.tpltpLttlLss..thh.plhtp-........................................................................ 1 693 1331 1755 +1818 PF01556 CTDII DnaJ_C; DnaJ C terminal domain Bashton M, Bateman A anon Pfam-B_342 (release 4.0) Domain This family consists of the C terminal region form the DnaJ protein. It is always found associated with Pfam:PF00226 and Pfam:PF00684. DnaJ is a chaperone associated with the Hsp70 heat-shock system involved in protein folding and renaturation after stress. The two C-terminal domains CTDI and this, CTDII, are necessary for maintaining the J-domains in their specific relative positions [2]. 30.50 30.50 30.50 30.50 30.40 30.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.66 0.72 -4.08 113 11443 2009-09-12 04:15:19 2003-04-07 12:59:11 13 45 5229 30 3594 8830 3406 77.70 31 24.15 CHANGED DLhh.pl.slohhpAllGsp.lpl..slcG..plplp....lssssps.Gphl......plpscGh...tps.........tpGDLhlplp.lp..hP..pp.l...s..s.pp+ph .......................................slhh.pl.slsh....s.p..AsL..G.........s......p....lp......l......s.....T.....l..............c.........G...............p........lplp...............lP..s......G.......o..ps..Gpph...................Rl+G+.Ghsthpss........................tpG....DLhlplp..Vt...hP..p..p..l...s..t.ppp................... 0 1220 2251 3021 +1819 PF00684 DnaJ_CXXCXGXG DnaJ central domain Yamout M, Bateman A anon Pfam-B_89 (release 2.1) Domain The central cysteine-rich (CR) domain of DnaJ proteins contains four repeats of the motif CXXCXGXG where X is any amino acid. The isolated cysteine rich domain folds in zinc dependent fashion. Each set of two repeats binds one unit of zinc. Although this domain has been implicated in substrate binding, no evidence of specific interaction between the isolated DNAJ cysteine rich domain and various hydrophobic peptides has been found [1]. 32.70 32.70 32.70 32.70 32.60 32.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -11.02 0.72 -3.92 63 7168 2012-10-05 09:33:39 2003-04-07 12:59:11 14 35 5271 3 2059 5364 2289 63.40 43 16.70 CHANGED CspCpGsGs.....ssssht.pCssCsGpGhhhptpp..h.hhhhphptsCspCpGpGph..h..pc.CppCpGpG ...................CcsCpGoGAc...........sGop...sp.....TCspCpGsGplphp......p...............Ghhphpps......CspC.pGpGph....I........c-PCppC+GpG................ 0 693 1292 1727 +1820 PF03265 DNase_II Deoxyribonuclease II Mifsud W anon Pfam-B_4508 (release 6.5) Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild --amino -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.03 0.70 -5.43 37 585 2009-01-15 18:05:59 2003-04-07 12:59:11 10 37 128 0 441 567 4 252.60 21 48.86 CHANGED hoChsppu.................psVDWFhlYKLPp.............GhpahYhDs....ss.ssWphustslsss.p.sslspTlpphh....ssspsshshlhYNDpPP.....ptps...ssphuHsKGVlhhcpp...pGFWllHSlPc.FPshs....hu.YsaPssspphGQphlClohshpphpp.IuptlhhppPhlYsssl........Psths..sph..spLpplspspph...........psshpppstlpohpG....tphptFuKss.passDlYs....shluptLpsslhlppWtpsss...hLsosCs.ssa.h.shshp.lsl..sp.....aspppDHSKWsVs............ssssasClG-hNRptppt+RGGGslChp............stplapsFpphst..php.sC ..................................................................................................................................................h.phh....hhhKh.st.....................................t.thhhhps..................ts..ttht......t..............h..st....t...tshtt.oht.h.......................tpt.......phhhhhYNstsP...........................t...............tt....h.....upsK..............G........l....lhh....spt...........tu.h.Wl.lHohPt.FPs......................s..a.........h.P.....s......t.....................................................Gp.hlChoh.t...t.p.....hpt....ls........t........lhh.tts.la..ph.................................s..t............t.h.......h.p...lhpt....................s..........hto...ts.......h..auK.t...t............................-ha.......................thl....h...h.t.t.s.h.s.........W.........tttt........s.Cp.................h..h...t..hth...t.t..............................ht..ttDp.up..ahls.....................tshhChss.sh.......tp..psuhhhCh................lht.hp..h.................................................................................................................................................................... 0 336 365 405 +1821 PF01712 dNK Deoxynucleoside kinase Bashton M, Bateman A anon Pfam-B_1744 (release 4.1) Family This family consists of various deoxynucleoside kinases cytidine EC:2.7.1.74, guanosine EC:2.7.1.113, adenosine EC:2.7.1.76 and thymidine kinase EC:2.7.1.21 (which also phosphorylates deoxyuridine and deoxycytosine.) These enzymes catalyse the production of deoxynucleotide 5'-monophosphate from a deoxynucleoside. Using ATP and yielding ADP in the process. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.82 0.71 -4.29 14 2493 2012-10-05 12:31:08 2003-04-07 12:59:11 14 11 1392 149 635 1837 992 145.40 25 62.65 CHANGED hQhhhLspRh+.hhs.........pppshlh-RSlassphlFspshhcpGphspp-aphYppha-thh.phsp........cllIYLcss.-sslcRIc+RuRs.E..........sIsh-......Yh-pLc......thatthhpphshsshhhlcsschDh.ts.pc..phhpplh .........................................................hQhaFLspRacphpc..................ttts.hlh.-R..S..la.pD.th......lF.t.p.t...h.h.cp.....G..ph......s..c...p-.aphYpcl.a...ch.h.h.pph.h.................................PD..llIYLc..s.sh-s.h.h.cR.....Ipc....R..GR..sh.E....................p.l.s...c..........................Yhc.p.Lp...............ptYcp.a...h........p....p......a....s....t....s....s......h...l...h..l...ss...sp..h.Dhhps.psht.......h.................................................. 1 202 359 503 +1823 PF00404 Dockerin_1 celCC; Dockerin type I repeat Finn RD anon Prosite Domain The dockerin repeat is the binding partner of the cohesin domain Pfam:PF00963. The cohesin-dockerin interaction is the crucial interaction for complex formation in the cellulosome [1]. The dockerin repeats, each bearing homology to the EF-hand calcium-binding loop bind calcium [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.28 0.73 -6.42 0.73 -4.01 130 1647 2012-10-02 16:17:27 2003-04-07 12:59:11 13 223 55 24 504 1211 47 21.00 49 5.22 CHANGED DlNsDGpVsuhDhhhl++alh ..DlNuDGplNShDhslLK+YlL...... 0 294 503 504 +1824 PF04118 Dopey_N Dopey, N-terminal Wood V, Finn RD anon Pfam-B_17466 (release 7.3); Family DopA is the founding member of the Dopey family and is required for correct cell morphology and spatiotemporal organisation of multicellular structures in the filamentous fungus Aspergillus nidulans. DopA homologues are found in mammals. S. cerevisiae DOP1 is essential for viability and, affects cellular morphogenesis [1]. 25.70 25.70 25.70 25.70 25.50 25.50 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.97 0.70 -5.65 37 385 2009-09-17 06:44:01 2003-04-07 12:59:11 9 9 257 0 258 391 0 285.80 37 15.96 CHANGED pDt+h++asssl-+sLppF-o.lpEWADhIuhLu+L.KuLQ......ttsshshlPp+hhVu+RLuQCLsPsLPSGVHpKALElYshIFphlup-s...Lsc-LslahsGLhPhhuaASloV+shhlplhEpYhl.sLsp.sLcsshpullhuLLPGLE-EsSEhappshpLl-plp..ptlsps..............hFapslalsllsssphRLuuLsalsp+hspht........................................tpptp.hlss-ssLll+ulssuLsDc.......slLVpRshLDLLlpphPLsS....sl........l....stpDpphLlhushtlhh++DM.SLNRRlasWlLGspspspsst........pu ...............................................Dt+aRpYsusl-+ALppF-t..spEWADhIShLu+L.KsLQ...........................ps.ph..t.hlPc...+hhluKRLAQCLpPuLPuGVH.KALEsYphIFphlG..sct.............Lu+DL.lY...s...GLhPlLuhAuh..sV+............PhlLsLhEpahl.sLsp.sL+PuLpuhlhulLPGLE..E.sSEha-..+o.pL.l.pplp..ts.lspp...................................hFapsLWhull...sssuhRlsulsalhp+ls+hh...........................................................................................................hp..ptthh.....lus-...tLhlculssulpDp.......slLVQRshLDlllhthPhps....sh................hppsDh.hllpuuhtsll..RRDM..SLNRRLasWLLG.c.pt.....st.t..................................................... 0 92 133 203 +1825 PF04556 DpnII DpmII; DpnII restriction endonuclease Kerrison ND anon DOMO:DM04410; Family Members of this family are type II restriction enzymes (EC:3.1.21.4). They recognise the double-stranded unmethylated sequence GATC and cleave before G-1 [1]. http://rebase.neb.com/rebase/enz/DpnII.html 20.90 20.90 21.30 20.90 20.70 19.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.67 0.70 -5.34 19 239 2012-10-11 20:44:43 2003-04-07 12:59:11 7 3 216 0 35 188 11 259.50 38 88.94 CHANGED cpac.FlpohpsTstshsYaVsapKVpcNlpp..hc...lcLNhLNhLIGcc..slcp-hhpLhpchPpllpslPhLlAlR-pphchhh..............stptsh.........shhphsFpp......hs..pl..-phhcFhcpTGLh.clhppptl+sLsDYVhGVEVGLDoNuRKNRoGchMEtllcphlpchs....shp.ahpQhphstlcch...auh-ls.....shtp++FDFsl..cspcKlYlIEsNaYsuuGSKlppsAppYpplspclp.tpsshcFlWITDGpG.W.pouKspLpEuFcplshlaNLphlcpuhLcclh ..................................t..Fc.alpshp.sostshsaasDapKlhcNlpplc..lpLNhLN.L..lG..pc..slppchhpLhpphPchhpslPlLlAhRppph.hh..........stptsh..............phsF..pp......hs.....h..-phh.FhccoGLh.plhps+hlpsLlDYVhGlEsGLDoNuRKNRoGchMEsllpphl.ppts....shs.........ahcphph...p...c...lcch...ash-ls.............sps.K+FDFVl.....cspphlYhIEsNFYuu.uGSKLNEsAcuYpplupc.hp.shsshcFlWlTDG.p.G.W.hsA+ssLpEsFc.....hl.hlYNlspl.cpshlppl.h.......................... 0 15 27 33 +1826 PF04244 DPRP DUF426; Deoxyribodipyrimidine photo-lyase-related protein Mifsud W anon COG3046 Family This family appears to be related to Pfam:PF00875. 21.00 21.00 21.20 24.00 20.70 20.40 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.55 0.70 -4.82 107 598 2009-09-14 12:31:16 2003-04-07 12:59:11 8 3 588 3 197 601 2407 216.30 32 43.34 CHANGED LhLlLGDQLststuslpt....stsps.hllhsEstpEss.....YVtHHhQKlshhFuAMR+FAppLcppGapVtYhpL-.-....ststt..slsstLtchhppp.thpplthpcPsEaRLtpplcp.htt..thslsl.phhsss+Flssts-hpsahp..s.+KphpMEhFYRcMR++hslLMtsss.pPhGGcWNFDs-NR+phst.sh...phPpshpFpsD...thspcllphlppchs..sph......Gslcs...Fha ....................lhLlLGDQLs...ptuhhpp.....spspshhlhsElppEss.....YVhHHhQKlshhFuAMRpFAptL.ppp...GapVpYlplDs...stsht....shsstltph...hpph.t.spplphpcPsEaRLhpplpp.h.t..................hsl............sl...phhssc+FLsshs-hspahp.......s..p.....KplhM.EhFYRcMR..++hslLM....s.....ss...pP...hGGpWNaDs-NRcthsps..........phP.pshpass.....D.....spplhphlcpphh..sphGplps...h.a................................................................. 1 60 128 166 +1827 PF05219 DREV DREV methyltransferase Moxon SJ, Bateman A anon Pfam-B_6662 (release 7.7) Family This family contains DREV protein homologues from several eukaryotes. The function of this protein is unknown [1]. However, these proteins appear to be related to other methyltransferases (Bateman A pers obs). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.74 0.70 -5.52 3 166 2012-10-10 17:06:42 2003-04-07 12:59:11 7 4 121 0 102 1665 951 221.30 40 80.85 CHANGED WYssN+E.Ls-shQulFVsShPDusTQpFL-NSpclSuNlFhQLaHuLusSILShFMS+TDINGhLGRGSMFlFSE-QFpKLLsIst.p.sSs.cs++LLDLGAGDGEITc+MuPaFEE........VYATELSpTMRsRLpKKsYNVLotlE.WQpTshpaDLIoCLNLLDRCasPhKLLcDI+s.ALcP.NGRVIVALVLPapHYVEsNs.G+ap+PcslLElsG..poaEEcVs+hh.EVF+sAGFcVEAWTRLPYLCEGDMaNSaYaLsDAIFVLKP ...................................................................................................................Y..p.p..ls..phtshFlt.....DptTp.FLppohphSs.hhhpha.+shspslh..p.hFhopTslNGh...L....tR.GuMFlhS.pQ.apcLL..p.h....s.................................s................p.s.......p..........p.....L.L......DlGAG.DG....p..VT.t.h.h....u...s...h...F...p...c.........................l....a....s....T.......E.....h..S.........................s.........M.......h.........h..............p........L.........p.........c..........+..........t........a................p............V.......l........s.....h........p.....-........a.......p.........p........p.........s.......h........p..........a.....D........lI....o...C....L....N....l...L.D.....R..C..-......p...P...h..s..L..L...ccl+p.sLpP...pG.h.l.l..L...A....l....V...L...P....a.p.s..Y..V.E.s....s....s.........p....p..pP..p..c.h......L....p..h......pG........tsaE.....-..p..l..s..p....h...p.V.h.t..sGF..l.ta...o...+lPY..LCEGDhhpsaYhLsDslhVLp.......................................................................................................................... 0 41 52 81 +1828 PF02635 DrsE DsrE/DsrF-like family Mian N, Bateman A anon COG1553 Family DsrE is a small soluble protein involved in intracellular sulfur reduction [1]. This family also includes DsrF. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.48 0.71 -4.06 254 4389 2012-10-01 20:53:36 2003-04-07 12:59:11 10 5 1987 31 1183 3383 682 116.50 19 88.08 CHANGED phlhhlsp.......sst.sphshthst...ss..tths.............hclt.lhhhusulthhhp............................h.....tp..thptLtp.t.....lphhlCtsuhpppuls.pp.............l..........phthsuhs.plsph.pp.u...phlph ..........................................................................................hhhlhpts.sh.ust.tstpuhph.s.....uh...tt.s..............tclt..lFhhs-.GVh..thhp....................................................s.tthsh...................hp....thptLst.ts......lplhlCs.ss.h....p....t.R...Glsttp................sL....h.......sh.ph.sshs...t.L.sphh.hp.s-.pllph................................................ 0 373 740 996 +1829 PF01916 DS Deoxyhypusine synthase Enright A, Ouzounis C, Bateman A anon Enright A Domain Eukaryotic initiation factor 5A (eIF-5A) contains an unusual amino acid, hypusine [N epsilon-(4-aminobutyl-2-hydroxy)lysine]. The first step in the post-translational formation of hypusine is catalysed by the enzyme deoxyhypusine synthase (DS) EC:1.1.1.249. The modified version of eIF-5A, and DS, are required for eukaryotic cell proliferation [1]. 21.00 21.00 21.60 23.00 20.50 20.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.65 0.70 -5.29 7 956 2012-10-03 09:55:27 2003-04-07 12:59:11 12 6 712 6 524 948 1511 277.10 35 85.61 CHANGED hptLlchhtphGFQAo.plGcAscIhcpMhcp+..................p..pchslFhGYTSNlISSGlRchltaLVpcphlsslVTTAGGlEEDhIKsLus.thlG-atlcsppLRccGlNRIGNlhVPN-sYptFE-ahhsIh-phlp.pp....p.s..............hTsSchIaclG....Kcls...cEcSllYWAaKNpIPlFCPuLTDGSlGDMLaFaphcs.stpLplDlspDlp+lNshuhpu+csGhIILGGGlsKHaIhNAsLhRsGsDYAVYIsTup.aDGS.SGA.P-EAVSWGKI+scAchVcVauDAThlFPllVAtshs .......................................h...lhpth...p.h.G.FpA...s.plucAhch.h.pp.Mhp.c.........................................t.spl.FLuhoushlouGl.R.c.h.lt.LlpcphV-slVoTuu.sl.E.E.Dhh.....cslu........p......a......h......G.......s.......a.......s.......h.......s.............s.t.p.LRppGl.NR...IsslhlP.p.-.s.a.p.t.h.E....c.al.h.s.lh-p..hhtcpp...................................................................aospchltc.lG.........ccIs.........sc...s....S.lhhhAhc.........p.......s.lPlasPulsDuSlGs...h.l..a....h...a....s.....h.....c....s...........s....t................l...........h.....l..D.h........l....p............D.h......pplss........h..............s...h...p........u.....p................c........s.......G....hlh....lGGGlsKca.........h........h.......s........s........s........l......h...........+.......s.........G...........t.......-........Y.....u...l........IssupphDGS..SGApPcEAlSWGKl...c...h...s...u...p....s...p............VauDATlshPLlsu.hh....................... 0 196 337 452 +1830 PF01323 DSBA DSBA-like thioredoxin domain Bateman A, Mifsud W anon Bateman A & Pfam-B_2082 (release 6.4) & Pfam-B_5982 (Release 7.5) Domain This family contains a diverse set of proteins with a thioredoxin-like structure Pfam:PF00085. This family also includes 2-hydroxychromene-2-carboxylate (HCCA) isomerase enzymes catalyse one step in prokaryotic polyaromatic hydrocarbon (PAH) catabolic pathways [2,3,4]. This family also contains members with functions other than HCCA isomerisation, such as Kappa family GSTs (e.g. Swiss:P24473), whose similarity to HCCA isomerases was not previously recognised. The sequence Swiss:O07298 has been annotated as a dioxygenase but is almost certainly an HCCA isomerase enzyme. Similarly, the sequence Swiss:Q9ZI67 has been annotated as a dehydrogenase, but is most probably also an HCCA isomerase enzyme. In addition, the Rhizobium leguminosarum Swiss:Q52782 protein has been annotated as a putative glycerol-3-phosphate transfer protein, but is also most likely to be an HCCA isomerase enzyme (see [5]). 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.94 0.71 -4.75 30 5572 2012-10-03 14:45:55 2003-04-07 12:59:11 15 31 2655 78 1612 6482 3933 172.90 17 79.27 CHANGED plchaFDahCPaCahuppplpphtpth................s.lclph+shslhss.....s.s..............p......th.......th.h.............sopputplhhhsttps....tphspthapshaspstshscps...ltplspcsGl.-ssp...hpphhsu...ssspstlccspptAtphultGlPshhl...ss......c.....hhhGssph-hltptlt .................................................................................................l..ah-hhCPa.C.a.h.h.p.t....h....l.p.t..h.hpth........................................s..h..p..l..p..h..p...s.h....t...lhst...................................................................................................................................................................................................h..sshp...u.h...p.h..h...h.h.s.t...tps........................p..h..h.ts.l...a....c....u....h.....a.....p.....p........s......p......s....l....s.....s.......s......p.s...............l....t....p....l....h....t.....p....s.......G.........l...s.....t...t..p.................h.p...t...s.h...s...s.............s...h..p...s.t....l.....p.p...p....p....p.......t.......A..........t.......p...........h.........s....l..........p..........G.l..Ps.hll.........ss..........c...........hh.......G.t....t.h.....h........................................................................................................................ 0 421 907 1308 +1831 PF02600 DsbB Disulfide bond formation protein DsbB Bashton M, Bateman A anon COG1495 Family This family consists of disulfide bond formation protein DsbB from bacteria.\ The DsbB protein oxidises the periplasmic protein DsbA which in turn oxidises cysteines in other periplasmic proteins in order to make disulfide bonds [1]. DsbB acts as a redox potential transducer across the cytoplasmic membrane and is an integral membrane protein [2]. DsbB posses six cysteines four of which are necessary for it proper function in vivo [1]. 22.10 22.10 22.90 22.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.26 0.71 -4.43 184 2959 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 2169 13 514 1795 1819 155.50 23 77.26 CHANGED M...............pt+thhhlhhlsuhshhhs.....AL.ahpal....hshtPCsLClhQRhshhslsllslluhht.........hchh.hhhhhlh.uhhGhhluhhHshlptt....t....sCs.........h.t.hshtphh...lhts.sss.Csphsap.h..lG................lohstWsllhFhhlsllhlhhhh .................tht....t+hh.hhlhh.h.ss.hslhhs.....A.h.aaQal....hthtPCsLClhpRhshhshshsu.lluhlt...............hshchh...thh.hhl.h.uhhG....l....t....luh...p....H......sh......lphh..............Ps.s..h.s.....sCs.....hh.t..h...phh...shtphh...h..hhs...sss...C.sth...ht..h..hs.....................................lphsthth.hhFhhhhllhhh.h.h.................................................................. 1 116 272 395 +1832 PF02683 DsbD Cytochrome C biogenesis protein transmembrane region Bashton M, Bateman A, Eberhardt R anon COG0785 Family This family consists of the transmembrane (i.e. non-catalytic) region of Cytochrome C biogenesis proteins also known as disulphide interchange proteins. These proteins posses a protein disulphide isomerase like domain that is not found within the aligned region of this family. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.65 0.70 -4.73 10 5377 2012-10-02 18:22:22 2003-04-07 12:59:11 10 29 3528 0 1358 4319 2426 206.50 25 49.26 CHANGED uFLAGlLShLSPClLPllPlalSaIoshshssh+pt.........hhpulhFlLGhollFluLGhusuhlsslht.tatshlphluulllIlhGLphluhh.................hLh+hphs..............................h...tpsps.hltuhLhGhsFAluWoPChuPlLuullshuusps..hhushhhslYsLGLulPhllsulFhs.hh....hh+tlp+ap+hlchsoGlLlllhGllhlh ...........................................................hluGll...h.h.o..P.C...l.LP.l.l...Ph...h...h.u.h.l...h....u........t.........p.......t....t...t.........h......p..tt.............................shh.t.s...h...h.a.l...h...G...h...u...l...s.......a.......s.h...L..........G.......h.........s...s.........u.......h......h.......G........t.......h......h.......t.............h.......t.....t..........h.......h.....h......h.....l....h...u..h........l.....h...l....l...h.......u.......L.....t...h...h.....u.h.hp......................................h..h.l...t.p....pph...............................................................p.p..p..t.s.s..t.h.h......u...s.a.lhGhhh.ul....s....h....oPC....s....u....P...s...L..u.....ul....L....s.h........s....u......s....s............u....s.......h..h..tG.s..h.h....h.h....s....a...u...L..G.....hu..l..P.h..l....l....l..u....l....h.s...s....h...hh..................h.h.....t..h...t.........t.........a........h........t..h....l.....c......t.......h...hG.h.l.lls.hulhh..h.......................................................... 0 404 850 1135 +1833 PF01984 dsDNA_bind DUF122; Double-stranded DNA-binding domain Enright A, Ouzounis C, Bateman A anon Enright A Family This domain is believed to bind double-stranded DNA [1] of 20 bases length. 26.00 26.00 26.60 29.70 25.60 25.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.38 0.72 -4.05 62 498 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 448 6 336 484 53 104.50 33 78.93 CHANGED IR+p+h..tELQpp.............t.tttpppppppppt..........ctp+pulLpplLss-AR-RLspl+LV+P-hApsVEs.LIpLAQoGplpp+lo-cpLhplLpplspppcpp.....cIphsR ...................lRtt+htpLptp.........................ttttpptppp.ppppt...............................ppt+pu...lL...pQlLss-ARpRLuplpLVKP-+ActVEshLIphAppGpl.p..tKlo-cpLhplLpplspppcpp.....tplphpR................... 0 107 189 274 +1834 PF04077 DsrH DsrH like protein Kerrison ND, Finn RD anon COG2168 Family DsrH is involved in oxidation of intracellular sulphur in the phototrophic sulphur bacterium Chromatium vinosum D [1]. 24.40 24.40 24.80 24.60 23.80 24.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.01 0.72 -4.32 66 948 2012-10-01 20:53:36 2003-04-07 12:59:11 7 1 944 13 166 473 50 86.00 41 91.11 CHANGED +SPapstslpssL+hl....sssDslLLlpDuVh.Ashsssphhptlpt...pshplasLpsDlpARGl..tplsssl.........ph.lDYss..FVcLshc.Hpps. ........................SPa.s.-hsulLRLL....s-GDs..LLLlQDGVh.AAl......cu.sc......aLcsLps.............uslplYALpEDl..hARG...L....s...u.pl..S..s..s..l.........hh..lD..Yo-..FVcLTl+Hspp................ 0 38 74 123 +1835 PF05160 DSS1_SEM1 DSS1/SEM1 family Wood V, Mistry J anon Pfam-B_22209 (release 7.7) Family This family contains the breast cancer tumour suppressor BRCA2-interacting protein DSS1 and its homologue SEM1, both of which are short acidic proteins. DSS1 has been shown to be a conserved component of the Rae1 mediated mRNA export pathway in Schizosaccharomyces pombe [1]. 20.80 20.80 22.60 23.20 20.20 20.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.48 0.72 -4.19 28 295 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 233 7 210 264 3 62.30 45 64.59 CHANGED ptspp...chssLE..EDDEFE-FPh--..Wssccsppsss........plWEEsWDDDDs.sDDFSpQL+pELcps ....................................t..tpt.chshLE..EDDEFE-FPs--..Wssp-...p-pss..................plWE-s.....W.....DDD.-l.pDDFSpQL+tELcc................... 1 67 115 171 +1836 PF00908 dTDP_sugar_isom dTDP-4-dehydrorhamnose 3,5-epimerase Bateman A anon Pfam-B_540 (release 3.0) Domain This family catalyse the isomerisation of dTDP-4-dehydro-6-deoxy -D-glucose with dTDP-4-dehydro-6-deoxy-L-mannose. The EC number of this enzyme is 5.1.3.13. 20.30 20.30 20.40 20.80 20.20 20.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.20 0.71 -4.95 19 3552 2012-10-10 13:59:34 2003-04-07 12:59:11 12 9 2817 55 902 2953 1971 172.40 40 87.60 CHANGED hhchslssshllEPclFsD-RGaFhEoFspptFpctsstt.shVQ-NcShSup.GVLRGLHaQh.s.sQuKLVRslcGpVaDVAVDlRpsSPTaGcWsuVpLSs-N+ppLalPtGFAHGFhsLu-c.schhYKsoshYssppEpulpWsDPslGIcWPhtss.........lStKDtsus.hs-h ...........................................h..pstlsslhll-P.p.lFuDpRGaFhEsap..pc..p.....F.p......c......t....s.........h..............p.........t....s..........F.....V....Q.-.N.pS.hSpc..GVLRGLHaQht.............sQ...uKL.Vpll.p.G..p..VhD..V.sV..DlRcuSPTFGpWhu.shL....osc....N..........p....+plalPcGFA.HG.FhsL.o.-....s..sp......h.h..Y...Ks..s...s...a..Y.s...P.p...t...-...t...u...lt..asDPslu.IcW.P.h.t....s....t....................hlS-KDtttshLtcs..................................... 0 286 589 765 +1837 PF03942 DTW DTW domain Bateman A anon COG3148 Domain This presumed domain is found in bacterial and eukaryotic proteins. Its function is unknown. The domain contains multiple conserved motifs including a DTXW motif that this domain has been named after. 19.70 19.70 19.80 20.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.34 0.71 -5.11 13 1690 2009-01-15 18:05:59 2003-04-07 12:59:11 10 14 1061 0 506 1210 69 187.30 32 78.80 CHANGED RthtCtcCtlshptCLCshl..PslpspsplhllpHssEst+shsTuhll...shsltcs.hhth.ch.t.s.p...........................h..hhttPshpshllFPucputshpcl..................................................hshssschhphIllDuTWscA+cha+psshL............csLP.pVsLtsptsSpYR.lR+usscspLsThEsuthhLshhpsp............cu.tpsLlphhcsht.paphspt ..........................................................................................h.tC...p.Ch.hs...p....h....ClCstl.........sshp....s....p...s..........phhllhassEs.h.+.s..o.N.TGRLl................ut.hl..s..c..s...t.s..a.....a..sRs..psstp...................................................................................................................................L.ht..hl....p..s.s..s.....h...p.shllF.P....u.p.h....u...tp..h..p..p....l.........................................................................................s....s..ss.+......sh....hI.h.L..DGTWpcA+KMa+.+.S.PaL...............................................................psLP...hls..l.s........s......p..h....S......tY..+....lRcsp..s...........psphsTsEsuht....lLphhscp.........................................ps.spsLh.phFpthhppa.h..pt............................................................... 0 139 241 393 +1838 PF01950 FBPase_3 DUF100; Fructose-1,6-bisphosphatase Enright A, Ouzounis C, Bateman A, Mistry J anon Enright A Family This is a family of bacterial and archaeal fructose-1,6-bisphosphatases (FBPases). FBPase catalyses the hydrolysis of D-fructose-1,6-bisphosphate (FBP) to D-fructose-6-phosphate (F6P) and orthophosphate and is an essential regulatory enzyme in the glyconeogenic pathway. 25.00 25.00 41.40 32.60 22.20 18.10 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.12 0.70 -5.46 33 184 2009-09-12 01:16:20 2003-04-07 12:59:11 11 2 177 8 111 177 25 352.60 55 96.70 CHANGED hKlTlSlIKADVGulsGHopsHscLl-tscchLpcA.cpslLIDaaVopsGDDlpLlMTHp+GsDsc-lH.cLAWcsFhpuT.cVAK-LKLYGAGQDLLsDuFSGNV+GMGPGVAEMEFp.E.....RsSEPlllFtuDKTEPGAFNLPLY+MFADPFNTAGLVIDPsMHpGFpFEVhDVh.....-sKplhLssPEEhYDlLALIGssuRYVIK+Vap+.tsGc....l.....AAVsST-RLshIAG+YVGKDDPVsIVRsQSGFPAlGElLEPFA.hPHLVuGWMRGSHsGPLMPVshccAps..........TRFDGPPRVhALGFQls.cG+LlGP.sDhFcDPAFDhsRcpAtclA-ahRRpGPFpPHRLP.-EMEYT.TLPpVlcKLcsRFpsh ........KlTlSVIKADlGuhsGHshsHPcllctApchLp-A.ppslllDaaVopsGDDlpLIMTHp+GhDsp-lH.tLAWcAFppuT.cVAKcLtLYGAGQDLLpDuFSGNl+GhGPGVAEMEFpE......RsSEPlllFhADKTEPGAFNLPLY+hFADPFNTAGLVIDPsMHpGFpFEVhDlh.....EpKtlhLssPEEhYDlLALIGssuRYlI++Vap+.t.....sscl......AAVsST-RLshIAG+YVGKDDPVsIVRuQuGhPAlGElLEsFu.hPHLVuGWMRGSHpGPLMPVshcsAp..s..........TRFDGPPRVhALGFQlp.cG+LlG.....P.sDlFcDPAFDtsRppAhchA-hhR+pGPFpPHRLP.-EMEYT.TLPpVlcKLcsRFt.h............................ 0 41 74 94 +1841 PF01954 DUF104 Protein of unknown function DUF104 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes short archaebacterial proteins of unknown function.\ Archaeoglobus fulgidus has twelve copies of this protein, with several being clustered together in the genome. 21.60 21.60 21.80 21.70 21.10 21.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.93 0.72 -3.94 41 145 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 53 2 106 172 12 61.20 27 89.62 CHANGED MsclI-AlYEsGVhKPLcKlcLcEGc+lplhlppt.........-lhc+htshlttpp.......t..pchlcE ....M.phIcAlYEsGVhKPL..cpl.c..LtEGpc...VplhIppt...........c.hcchhthht.t........................................................................................ 0 46 70 92 +1842 PF01955 CbiZ DUF105; Adenosylcobinamide amidohydrolase Enright A, Ouzounis C, Bateman A anon Enright A Family This prokaryotic protein family includes CbiZ which converts adenosylcobinamide (AdoCbi) to adenosylcobyric acid (AdoCby), an intermediate of the de novo coenzyme B12 biosynthetic route [1]. 21.00 21.00 21.00 21.20 20.70 20.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.35 0.71 -4.51 69 333 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 295 0 160 338 10 191.00 28 66.29 CHANGED llchtpshpslSoushsGGhtp.sctlhNppVsp....sass...........sspphh.pchhpph..uh...psssuhhTAssh.cphshs....cshpVpshsTAGl.sNssphu..................................GTINlhlhsstsLscuAhspulhTATEAKstALh-hsl.........tuTGTuTDulsVss........tt..stthpauGstTclGphlucu....VhcAl ......................................h..phstshpslSouhh.sGGhtp.hptlhNtp.Vsp...shspt...............sstphh.pphhpph..uh..................psssuhhTAssh.cphshs..........c..s..h..tVtshsTAGl.sNuscsut..........................................................GTINlllhls.....ssLssuAhspAlhTsTEAKstAL.-hsh....t..................hATGTuTD.ulslss.........stt....stthpauGstTplGphlu+u....Vhpul........................ 0 62 111 139 +1843 PF01956 DUF106 DUF106; Methyltrn_RNA_3; Integral membrane protein DUF106 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. Members are predicted to be integral membrane proteins. 25.60 25.60 26.00 25.70 25.50 24.80 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.72 63 665 2012-10-01 21:53:17 2003-04-07 12:59:11 11 4 418 0 446 621 77 171.30 25 75.18 CHANGED slllss.lp......hhPhhllllllullpshhupllpthh..pp..sph......phpppphtpctcphpp.........................sss.thp.cphppccpthhpsppph.................................hhphhKs.hhhhlsphslhhWlshhFu....................Ghll.......t+lP.FPlhtphpthhptsl..tshssthso.lsWYFLshh.ulpplhphl....LGhss ............................................................................................sh..llhlhlullpthls.hl..p.........+p...sth.............phpc.pp.h....hcuctLcp...............................ssp.hht.tpp...h.ppccphhts.sppsh..............................................hhs.h.h.Kuthsh....hls.hhllh.ual..Nhh.Fs...........................GhVl.........sKlP...F.P..l..hh..thpshh.p.....pGl............shD...s....shs.......S.......lsWYF.Lssh.ulpphhphl.LG.s.......................................... 0 142 240 363 +1844 PF01957 NfeD DUF107; NfeD-like C-terminal, partner-binding Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family NfeD-like proteins are widely distributed throughout prokaryotes and are frequently associated with genes encoding stomatin-like proteins (slipins). There appear to be three major groups: an ancestral group with only an N-terminal serine protease domain and this C-terminal beta sheet-rich domain which is structurally very similar to the OB-fold domain, associated with its neighbouring slipin cluster; a second major group with an additional middle, membrane-spanning domain, associated in some species with eoslipin and in others with yqfA; a final 'artificial' group which unites truncated forms lacking the protease region and associated with their ancestral gene partner, either yqfA or eoslipin. This NefD, C-terminal, domain appears to be the major one for relating to the associated protein. NfeD homologues are clearly reliant on their conserved gene neighbour which is assumed to be necessary for function, either through direct physical interaction or by functioning in the same pathway, possibly involve with lipid-rafts [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.79 0.71 -4.05 29 3477 2012-10-02 01:11:51 2003-04-07 12:59:11 13 7 2863 3 986 2665 624 137.70 20 67.86 CHANGED hhlllGhlLlshEhh..hsshuhluhuGlsuhllGslhlhs...................hshhhshtlhhhhhhshlthhlh+t....+pppss.psh.....csllGcpupl...ts...hhsspGhlplcGphWpsput-....lttGcpVcVluhcGhsLhVpcps ............................................................hWlhlu.hlL...l..hhEh..........hssh...h..h..h.....h.h.s..G..l...u..u...h..l..s....u.h.lh.h.hh.........................................hs.hh.h.h....h..t..s.l.h.....h.h...h..l...s....h.l........t.....h............h.....h.....h.+...p......h..h...t...t..p.t.p.p.pt..t.................................ppl...l...G...p..pu.hl.......ps....................hs..s..s....p......G......p....l......c......l.....s.......u.......p.....t......W...s.....s...p....utss...........lts.G..s..c.V....pVlt.l...-...Gh.pLhVp...h............................ 0 335 648 832 +1845 PF01958 DUF108 Domain of unknown function DUF108 Enright A, Ouzounis C, Bateman A anon Enright A Family This family has no known function. It is found to compose the complete protein in archaebacteria and a single domain in a large C. elegans protein Swiss:Q19527. 21.00 21.00 33.40 32.40 20.70 19.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.94 0.72 -3.76 65 431 2009-09-10 18:08:58 2003-04-07 12:59:11 13 7 393 4 169 403 114 91.40 37 34.30 CHANGED pslspss..slFcGsApEAsptFPpNsNVAAsluLAu..hG............h-pspVcllADPss.stNhH-IplcG......shGphphplcshP.ssNP+TShlsAhSllp .......s..hsctp.llFcGoA+E.....AsphaPpNsNVAAsluLAu..lG............h-pTpVclhADPsh.spNsHclpscG......shGphplplps.hPhss.NP+TSslsuhSsl................. 0 50 101 138 +1846 PF01959 DHQS DUF109; 3-dehydroquinate synthase (EC 4.6.1.3) Enright A, Ouzounis C, Bateman A anon Enright A Family 3-Dehydroquinate synthase is an enzyme in the common pathway of aromatic amino acid biosynthesis that catalyses the conversion of 3-deoxy-D-arabino-heptulosonic acid 7-phosphate (DAHP) into 3-dehydroquinic acid [1]. This synthesis of aromatic amino acids is an essential metabolic function for most prokaryotic as well as lower eukaryotic cells, including plants. The pathway is absent in humans; therefore, DHQS represents a potential target for the development of novel and selective antimicrobial agents. Owing to the threat posed by the spread of pathogenic bacteria resistant to many currently used antimicrobial drugs, there is clearly a need to develop new anti-infective drugs acting at novel targets. A further potential use for DHQS inhibitors is as herbicides [2]. 21.20 21.20 33.70 32.40 20.40 20.00 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.01 0.70 -5.76 26 211 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 183 0 126 218 104 339.40 38 94.02 CHANGED KplWl..s.t..ssW-tpKchlTsuLEuGsDsVlVsp.tclc+V+cLGplplhu..................................s...h.........ht.h.......GpssusYlcIpsc-pEphAtchuc...s-alIltupDWplIPLENLIAtl.tpcscllAsVpss-EAcsAhpsLE+GsDGVLLcsc..Dss-l+chsphhcphspEp..lcLp.ApVTclcslGhGDRVCVDTCSlMpcGEGMLVGShSpGhFLVHuEThEsPYVAuRPFRVNAGAVHAYlhsPss+T+YLuELpuGDcVhlVDpcGpsRpAlVGRlKIE+RPLhLlEA..Es.cG.cclpslLQNAETI+LVss-GpslSVs-L+sGDcVLlhh-p...suRHFGhtl.-EoIIEK .............................................................................................al...........p..+thlptulEsGh.ss.llhtt...tp.h.....tt.h.p.pl....up.h..plhs.............................................................................................h.......tt.spchushlp.lps.ppp......chAt.phup...sshsll.s.p...DWp.....lIPLENllAt..h..t......p..ps............plluhs...p...sscEAclshps..........LEpGsDGVllpsc......shs-l+clhp.......hh...........cp............s..p........pp...........lpLs.AsVTplc..l.GhGDRVCVDTsSlhc.GEGhL..VGShSpGhFLVHuEohcss.YluoRPFRVNAGuVHuYlhsPss+TpYLSELcuGccVllVc.pcGpsRpuhVGRVKIE.pRPLlLl-....Acs......sG.....cphsslLQNAETIRLl.......s.s......s......G.....p...slu....VspLKsGDcVLshhpp...suRHhGhtl.pEhIlE+............... 0 43 92 117 +1847 PF01345 DUF11 Domain of unknown function DUF11 Bateman A anon Pfam-B_1553 (release 3.0) Domain A domain of unknown function found in multiple copies in several archaebacterial proteins. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.51 0.72 -3.93 35 6559 2012-10-03 16:25:20 2003-04-07 12:59:11 13 229 570 0 1139 6240 824 72.90 27 22.92 CHANGED sssssssssssp..ssssshssssssshtlspssssssspsG-slsaTloVoNsGsss.uss..VsVpDhl....PsGhsassso .............................................................ssss..................................s.........h....s....p.....u.......s...l..s....s...s...K......o.....s.......s.......p...s....h..........s...s.........l......G.......-..s.l..........TY.Tlsl..s..N..s....G.....s.....s......s.....A.....s...s........VhhsDsl........Ps.G..soFlsso........................ 0 455 877 1040 +1849 PF01969 DUF111 Protein of unknown function DUF111 Enright A, Ouzounis C, Bateman A anon Enright A Family This prokaryotic family has no known function. 19.90 19.90 26.20 23.10 19.40 19.00 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.28 0.70 -5.66 103 749 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 627 1 290 733 394 342.40 33 95.11 CHANGED LahDs.huGluGDMhLuALlDLGl..shch.lpptLppLsl.....st...apLphpcsp+pGlpup+lcVph.............................................................................ptpp..................c................Rp.......hs-I...tplIp.su.....sLsspl+ptuhplFppLAcAEA+VHGh.sl-cVHFHEVGAlDuIlDIVGsshul-hLs....l-c.......lhsoPlsh...GuGhVcsAHGhlPVPAPAslElL..cslP.lhs..ssh.p.....sELsTPTGAAllssluspasshPs.hplpplGYG..uGs+-hs..hPNlLRlhluc.............................tps.thpp-plhllETslDDhssEhlualhEpLhps.GAlDVhhsPlhMKKuRPGhhlsVls..pscptppltpllhpEToTlGlRhpth.pRhhLpRchtplpTshG....plpVKhuh.hsut....hhphpPEa-DspclApcp..slPlpcVhp ...............................................................................lahDs.hsGluGDMhluALlDh.......Gs.......s................p...h....l...p...tlppLsl.....tt..............hplphpch.+..p..GltutphpV.ht.................................................................................................................................................................p...ppt..........................c...............Rpht-I.hplIp.pu........s.Lsptl+phuhtlFphl.Ap.AEuplHGh.s.h-pVHFHEVGAh.D.uIlDIlGsslsl-.Ls....hcp.......lhsoslsh...GsGh.lpsuHGhhPVPsPAshclh.............psls..lht..t.sh...p.........sELsTPTGAAlltsls......s.p......a..s..........ss.hplpplGhG..s..Gp+-h.....t..hsNlLRshlhc........................................................................................tt...t.ttcplhhlEsslDDhssEhhuh.hh-tLhpt.GAhDVahpPlhMKKsRPuhhlslls.....p.pphpthtpllhpcToolGl.Rhh.h.pRhhhp.Rpht..plps.hG.....................................plplKhsh..h.....tst.................hhphpPEa--htplApptshshpplh.t.............................................. 0 134 237 275 +1850 PF01970 TctA DUF112; Tripartite tricarboxylate transporter TctA family Enright A, Ouzounis C, Bateman A anon Enright A Family This family, formerly known as DUF112, is a family of bacterial and archaeal tripartite tricarboxylate transporters of the extracytoplasmic solute binding receptor-dependent transporter group of families, distinct from the ABC and TRAP-T families [1]. TctA is part of the tripartite TctABC system which, as characterised in S. typhimurium [2], is a secondary carrier that depends for activity on the extracytoplasmic tricarboxylate-binding receptor TctC as well as two integral membrane proteins, TctA and TctB. complete three-component systems are found only in bacteria. TctA is a large transmembrane protein with up to 12 predicted membrane spanning regions in bacteria and up to 11 such in archaea, with the N-terminal within the cytoplasm. TctA is thought to be a permease, and in most other bacteria functions without TctB and TctC molecules [1]. 20.40 20.40 21.50 21.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.26 0.70 -6.07 199 2024 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 1089 0 682 1865 3145 407.50 36 83.48 CHANGED sLhhhhhG.shlGhllGslPGLusshulAlLlPlTas..h.....s........PhsulhhL.sulYhGuhaGGulsuILLslPGssuussTslDGasMAppGcuupALulusluShlGGhlu....slhLhhhuPhluplALp.FGssEaFsLhlhuLs.hlusluus.s.hh.............KulhushlGLhluhlGhDshsG.h.Ra.TFG..........pLh.sGlshlslhlGLFuluElltt....h..................t..ptph..tph....tt.h.h.s.....hp-htcthhshl.RuohlGshlGhLPGsGuslAualuYs........................ht++.ho..+psc..p..F..Gc.Gsh-GlsAsEuANNA...ssuuuhlPhLsLGIPGsussAlllGuhhlpG...lpPGPhlhpppsp..llaullsuhhluNlhhlllslhh.ht.h.as+.llplPtphLhPhIllhshlGsYuls....ssh.hDlhlhlshGllGahhc+hshs.........huPll........L ..............................LhhsllGshlGsllGsLPGLGsssul..A..lLLPlTau..h.............sPtu..ullhL.uulYhGuhaGGuhouILlNhPGssuulsTshDGasMA.pp.G.+.AGtALuhusluSFhGGhlu.......sl.hl.hhh...A...P..............h.LAph.......A.Lp..F..Gs...sEYFuLhlhuls.sluuh.su...p..s..hl..............Kulhu.s.h..lGLhlu.....sl..GhDsho....G.s.Ra..TF....s..s.........pLh.sGls.hlslhlG.LFul......uEllhhh...............tppp.p.t.ph.....h.t.ph..........sp...h...hhs...............hp-htp.h.h.h.shl..RuollGhh.lGlLP.G.uGAsl.......A.ualuYs...............................hpK+..hu.......+..p.sc....p..F...Gc..GslcG.lu.AP.EuANNA...u.s.s.G.uh.lPhL..TLGlPGsuss....A.....l....h.lG...Alh.laG......lpPGP...hlFs.....p.....ps.-...............l..saullsu..hhluNlhLllls.lsh....ls.las+......lLplPh.................th..LhP.....hIl...h.h...s..hlGsYu.ls..........ssh...hDlhlhlshGllGahhc+hsaP.........huPllL................................................................................................................. 0 185 421 574 +1852 PF01972 SDH_sah DUF114; Serine dehydrogenase proteinase Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaebacterial proteins, formerly known as DUF114, has been found to be a serine dehydrogenase proteinase distantly related to ClpP proteinases that belong to the serine proteinase superfamily. The family has a catalytic triad of Ser, Asp, His residues, which shows an altered residue ordering compared with the ClpP proteinases but similar to that of the carboxypeptidase clan [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.71 0.70 -5.63 4 404 2012-10-02 13:07:06 2003-04-07 12:59:11 11 4 342 0 216 2161 936 193.70 28 59.54 CHANGED h.hhDPhou.l.uLhWaLLFhaLlhuPphphptLlhARh+slRclppKRsSpVITMIHRQEuIGFLGIPIY+FITIEDSEclLRAIRhTPcDhPIDLIIHTPGGLsLAATQIApAL+cH.AcTpVIVPHYAMSGGTLIALAADEIIMDcNAVLGPVDPQlGpYPAsSILKslE+KsscclDDpTLIhADIucKAIpQhp-hVasLL.KDKhs-EKAKElAKhLTpG+WTHDYPLTVEcLKpLGLcVsTNVPcEVYELMELY.QPMtp+ss.VpalPsPhK...pEpsAK ...................................................................................hhh......................................................................................................................h.....c.tl.p......h.........s...h...s...I...s...L.l...l...c...TPGG....h....V.....AA....t....pI....u....p....t...l....p....p....p..s.....s.....c.....s....s............l....h...V....P...c...h.....Ah....SuGT.lIA.L.uAD...c..IlMs.sus..........lG.....P.....l.....D.....P.......p........l........t........t...........................................................................................................................................................................................................................................................................................................................................................................................tsh................................................................................................................................... 1 96 153 185 +1853 PF01973 MAF_flag10 DUF115; Protein of unknown function DUF115 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaebacterial proteins has no known function. 33.30 33.30 33.30 33.30 33.10 33.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.09 0.71 -4.75 151 1412 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 624 0 345 1270 190 143.60 25 34.65 CHANGED pphlpNlsphhp......hppLhs....pt....cs..slllusGPSLccp.lshl.+php....pchlIlusssuhphLhcpGIpPDhlls.lDt....hshc....h..hpph.p........slhllhsptss.chlpthpspph.hhhhsp..........hshh..t..t..........hhsGhoVuphuhplA.hthGtcpIlLlG.Dhuasps .............................................................................hht.................hhlhusGPSLt.pt..l.hl..pt............tp..hhlhss.ss.uh.hL.hptsI.tPD.....hlh...l-t.....hs..c.............h.hpp..tp........................slhhlh.ssh.sp......ps.l...p..hh...ptpph...hhhhtps....................................hthh...p..th.................htsGh.....oVu.phuhtLA.ht.lshc...sIlhlG.Dhuas........................................................ 0 131 239 288 +1854 PF01976 DUF116 Protein of unknown function DUF116 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein has no known function. The protein contains seven conserved cysteines and may also be an integral membrane protein. 25.00 25.00 26.20 25.40 24.60 24.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.23 0.71 -4.74 48 272 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 245 0 133 254 20 161.60 30 58.26 CHANGED hhshhchlh......................................phhuhcc..phlpphhIplpNphhtspht...................ch.scchllllPHCLppsc...Cst+lTs.cs.pC.pcCG.+Csluclhclu-chGhc.lhllsGuohs+............+llcct+Pc....ullulAC.p-LppGhpc.........hpt...lPs..hGVl.p+ss..C........hsTpV-hpplhchlp .............................................................h...hhhhls.....phhshpc..phlpp.hlcl...pNphh.hppht...................c.h.scchllLlPHClp.sp......C.h+lTp...ch.sC.....+cCG..+Csluslhclucch.G.hc...lhlsTGGTlA+............+hlpct+Pc....ulluVACc+DLtpGhpc.........sps...lPshGVlsp+sstsC...............hsTpVshpcl.phl.t................................................... 0 54 101 121 +1855 PF01978 TrmB DUF118; Sugar-specific transcriptional regulator TrmB Enright A, Ouzounis C, Bateman A, Studholme DJ anon Enright A Family One member of this family, TrmB, has been shown to be a sugar-specific transcriptional regulator of the trehalose/maltose ABC transporter [2] in Thermococcus litoralis. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.86 0.72 -4.27 51 1668 2012-10-04 14:01:12 2003-04-07 12:59:11 14 47 710 5 702 5171 791 67.60 24 27.24 CHANGED Lp.plGlochEucsYhsLl...ppusssup-lucpsslP+s+lYclLpsLhc+GhVphtp.up......Pt.hYpslssc ......................................thGhsph.Esc..s..YhsL.....l.......pp.u....s......h......o.....u.......p.-.l....ucp.s....u.....l....s..+......u....p.l.Y....csLppL....h....ccGh.V.ttpp..sp.....................sh.hY.tshs.................................................. 0 181 429 610 +1856 PF01982 CTP-dep_RFKase DUF120; Domain of unknown function DUF120 Enright A, Ouzounis C, Bateman A, Coggill P anon Enright A Family This domain is a CTP-dependent riboflavin kinase (RFK), found in archaea, that catalyses the phosphorylation of riboflavin to form flavin mononucleotide in riboflavin biosynthesis EC:2.7.1.26. Its structure resembles a RIFT barrel, structurally similar to but topologically distinct from bacterial and eukaryotic examples. The N-terminal is a winged helix-turn-helix DNA-binding domain, and the C-terminal half is most similar in sequence to a group of cradle-loop barrels. Swiss:O28174 has this domain attached to Pfam:PF00325. 25.00 25.00 66.80 66.10 22.80 20.10 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.50 0.71 -4.49 44 155 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 152 8 103 158 96 120.50 38 62.91 CHANGED VsSGLGEGpaYlol.hYpcpFcctLGFpPaPGTLNl+lssp.t.h.htttlcphpslhI.Gap.pssRsaGuV+sassplss.....lpuA......llhPpRTpHspcllElIAPhpLRcpLsLcDGDcVplpl ..VlSGlGEGpaYlSl.hYpcpFcchLGFcPaPGTLNlclppp...hphhttl.cshpslhI.uap.pssRsaGsV+sa.splss......lpuA......llhPpR..TpHs.psllElIAPhpLR-pLsLcDGDpVplpl.......... 0 26 61 84 +1857 PF01983 CofC DUF121; Guanylyl transferase CofC like Enright A, Ouzounis C, Bateman A anon Enright A Family Coenzyme F420 is a hydride carrier cofactor that functions during methanogenesis. This family of proteins represents CofC, a nucleotidyl transferase that is involved in coenzyme F420 biosynthesis. CofC has been shown to catalyse the formation of lactyl-2-diphospho-5'-guanosine from 2-phospho-L-lactate and GTP [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.46 0.70 -5.38 3 277 2012-10-03 05:28:31 2003-04-07 12:59:11 11 3 269 2 130 665 682 192.50 28 86.28 CHANGED M+lIIPVSshNpsKTRLSshLSsEERKsLL+sMLhDVIcALcs.lD.llllScDE-VL-aAhssLGlEllcEc..+DLNsAlcQA..Fpt.E-ccVIIIPSDIPLIuKcclcclL-puuphDVVIAPuRGGGTNhLlLR.Kcuhcl+YcssSFFKHLEEARKRGL+spIYDSFYlSVDINTsEDLGEIhlHGsGT+o+EYLRKLGFoVcPc+oSchRhcVpRp .........................................................................................hh..lllPVKpl...stAKoRLu.ss.....h.s.......s..pp....Rps..ls...h....A....M....L....t......D.....s......l.......s....A...s...t.....s......l.....s.....l....s...V..l........o....s...D...t.......s.......s....s.s...h.u..t................t..........h.....G...u.....p..l.l....sD..............s........t..........p.....s...........L....N.......s...Al....stu..............h.h...t........t....s....s....s.......s....ll.l..l....uDLPhlpsp-Ls...p.h..l..s..s..u...........t....p.....h....c.....s..h..lu.s...s..p.G.s..GTs...sL...h.h.t....s..s.t....h.....p.........s.......p..a.s.s..s..S...h..t.H...t...........................................Dls...ch.................................................................................tttth................................................................................ 2 39 96 119 +1858 PF01986 DUF123 Domain of unknown function DUF123 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial domain has no known function. It is attached to an endonuclease domain in Swiss:Q58030. The domain contains several conserved cysteines and histidines. This suggests that the domain may be a zinc binding nucleic acid interaction domain (Bateman A unpubl.). 25.00 25.00 27.50 25.60 21.20 24.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.76 0.72 -3.89 69 232 2012-10-01 19:55:08 2003-04-07 12:59:11 11 4 224 0 149 235 33 93.40 35 50.11 CHANGED phphptGhYsYlGSAht.su.....htpRltRHhp..tst.+..............................h+WHIDYLlt..psplstl.h.hhsppc....hEstluptLsphst....l.tGFGuSDsp..stSHLahhs ..............t.hphptGhYhYlGSAh..su.....ltpRlpRHhpps+...............................ph+WHIDYLhs...psphhts..h..s.stc....hEsplAptlsphht.....ltuFGuSDCp..CtSHLahh.p......................... 0 51 97 124 +1859 PF01987 AIM24 DUF124; Mitochondrial biogenesis AIM24 Enright A, Ouzounis C, Bateman A, Eberhardt R anon Enright A Family In eukaryotes, this domain is involved in mitochondrial biogenesis [1]. Its function in prokaryotes in unknown. 20.80 20.80 21.00 20.80 19.90 20.40 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.54 0.70 -4.88 153 1844 2009-01-15 18:05:59 2003-04-07 12:59:11 12 16 1144 7 748 1709 223 204.60 25 77.57 CHANGED aplh.tsshphlclpL.t.sucslhscsGuMlhhs....uslphps..th............th....lhtulp........chlsGEuhFhs.hp...tspG.pGclhlAss.hsGp.lhhlcL.ss.........p.tlhlppsuaLAs.sssl....phchpht......thttu..........hhu...........Gp..Ghhhh+lp...G.pGhlhlpu...hGslhphplt..scslhVDsuplVAassslpa.......plpps.s................................................thhuth.hu...GEGl..hh.php...Gs.G.pVhlQ.ohs ......................................................................................................t.shphlplpLt..ssp..s.lh.....sc.....s.GuMhhhp...................sslphps.h.t.............................utlhtthp...........phlo.GEuhhhs.hh........tspG..pGplhhAsshs.........sp..l..h.s.lcL..ss....................tpl.hl.p..p..ssaLAh.ss.s.l.....phshpht.....th.tu......................hhu......................Gp...Ghhh.h.p...lp...........G.....pG...h...lhlpu....tGslhph.p.l...t...sc.plhlDssplVAa...ss..s...l..ph..........slphs.s.....................................................................................shhsth...hu.........GEG.l....hh..ph..p....G.s..G...pVh.lQoh................................................................................................... 1 239 508 668 +1860 PF01988 VIT1 DUF125; VIT family Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the vacuolar Fe2+/Mn2+ uptake transporter Swiss:P47818, Ccc1 [1] and the vacuolar iron transporter VIT1 Swiss:Q9ZUA5. 28.10 28.10 28.10 28.10 27.90 28.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.37 0.70 -4.85 143 2783 2009-09-13 10:13:07 2003-04-07 12:59:11 14 11 1699 0 1169 2391 416 165.10 25 72.77 CHANGED hlRssV.hGusDGllosh.ullsGluuss...s.......s.st..slllsGlusl.lAuuh.SMuhGpYlSspupp-htptphpc.p..th.t..ptpht.phhthhtpp.Glstphspphspth.t...................t...............................................................ht.hh..thsh................pPhtuAlsosluahlGullPllsahlh....................shhholhhshlsLhllGhhtuths.s........tshhpuslchlhhGhlussloahlGtl ......................................................................................................................hpshlhGh.DGllssh.ullhGluuus.........s.....lhlsGhushluuuh.SMuhG-alSspsp............t-..tt.ht........h.........t...................h...hh.t...s...t.h......hs..h................................t..........................................................................h....tht...............s...............pPhpuAhsohhuahlG.....ulhPhlshhhh................................hthhhshh.hs.h..lsLhhhGhhtuhhs.t................................tshhpuhhc.lhhGhhshshshhlG.......................................................................................... 0 357 761 1000 +1861 PF01989 DUF126 Protein of unknown function DUF126 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. 19.30 19.30 26.90 32.90 18.50 17.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.51 0.72 -4.31 60 282 2012-10-01 19:37:30 2003-04-07 12:59:11 11 2 253 1 142 284 192 84.60 37 34.22 CHANGED slSFhGGVDPpTGhllDhtHslhGpSlsG+lLlhPsu+GSosGShVlh.pLtpsGpAPtAllhp.csEsIlshGAllAs......lPl...lst ...................lSFhGGVDPpoGhll-p.pHsLpGpslsG+lLlhPsu+GSos.GSh.Vlh.pLhpsGpuPsAl.lhp.c.s.-sIlshGAllAp..........lPll.t............ 1 42 83 110 +1862 PF01994 Trm56 DUF127; tRNA ribose 2'-O-methyltransferase, aTrm56 Enright A, Ouzounis C, Bateman A anon Enright A Family This family is an aTrm56 that catalyses the 2'-O-methylation of the cytidine residue in archaeal tRNA, using S-adenosyl-L-methionine. Biochemical assays showed that aTrm56 forms a dimer and prefers the L-shaped tRNA to the lambda form as its substrate [1] [2]. aTrm56 consists of the SPOUT domain, which contains the characteristic deep trefoil knot for AdoMet binding, and a unique C-terminal beta-hairpin [3]. 25.00 25.00 79.40 95.80 22.90 22.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.59 0.71 -4.37 31 143 2012-10-01 22:53:19 2003-04-07 12:59:11 11 2 140 4 88 146 75 121.00 48 66.03 CHANGED WGGs..FpVchsssa+phl+ca+ptuGhVVHLTMYGhsls-lhscI+...................pscc...lLllVGAEKVPt-lY-hADYNVuVGNQPHSEVAALAlFLDRLhcGcpLpp-FtsA+lpllPpc+GKcVlct ......WGGs..Fp.lchsssa+phl+cW+t.ts.....GhVVHLTMYGhsls-.lhscI+.........................p..p..p.cs....lLlVVGAEKVPt-lY-hADaNVuVGNQPHSEVAALAlFLDRLhcG+pLpp-FpsAcl+llPpppGK+Vlph.. 0 21 53 73 +1863 PF01995 DUF128 Domain of unknown function DUF128 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. The domain is found duplicated in Swiss:O27611. Many of these are attached to an N-terminal winged helix domain suggesting these are transcriptional regulators and that this domain has a ligand binding function. 25.00 25.00 63.90 27.70 22.10 23.00 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.26 0.70 -5.43 36 122 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 69 2 87 121 4 228.30 32 79.15 CHANGED lsFlhS+lpchhapssaD.tptpGcVllNhohlp..............ccch-pslcllccshcsG..huloshlplhccs.t.......tsplsItTlCSlThDGlLL+sGIPspPtYGGllclcstpPhRFp-lIsYcuTSlDPlclFhspshTsVhphhcsGpGtlLANlRplPhsAc-chcpllccl.t.uhsGll..plGcsspslhGlsVppsplGlshlGGlNPlsshpEtGlslchpshpslh-apphpch ............lshlhS+l.phhapssaD.pptpGpVllNhohlt..............cpphccslclhccshcsG..huloshlplhcc............psplsltTlCSlTlDGlLL+sGIPspPpaGGllclcs..tpPh+Ft-lIsYcuTSlDPlclFhsp.....shTsVhthhpsGpGplLANhRplPhsuc-chpcllccL.t.uhsGll..plGc..sspsl.hGlsVs.psphG.lshh.GGl..NPlushpEpGlslchpshpslhchpphpp.h......... 0 22 58 75 +1864 PF01996 F420_ligase DUF129; F420-0:Gamma-glutamyl ligase Enright A, Ouzounis C, Bateman A, MorningStar A, Mistry J anon Enright A Family F420-0:Gamma-glutamyl ligase (EC:6.3.2.-) is an enzyme involved in F420 biosynthesis pathway. It catalyses the GTP-dependent successive addition of multiple gamma-linked L-glutamates to the L-lactyl phosphodiester of 7,8-didemethyl-8-hydroxy-5-deazariboflavin (F420-0). This reaction produces polyglutamated F420 derivatives. GTP + F420-0 + n L-glutamate -> GDP + phosphate + F420-n 25.00 25.00 25.70 25.30 21.70 22.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.48 0.70 -5.29 91 759 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 642 4 296 699 526 231.60 28 69.99 CHANGED slcsh..lcs..GDD..Lspllhpuht..........lp-GDllVlupplVS+AEGRhlshsslpPu....................................Ah.lu..ht........tssph.........hplllcE...............................usclltt........tsshhlsps+tGhl.....hssAGlDp.........................SNs........sss....llLLPcDPcsSAcpl+ptlpphhGh....pVuVlI......sDohGRsaRhG....tsulAlGsuG......ltslt..-htGp.pDh.hGctLp.....................hT..uluDplAuuAsLlhGcusptsPlsllRGh .........................sltsh..lp.GDD..lsplltpss.........................lp-sDllslopplVu+spGphsshspht.s.........................................h..hs............hs.th.......................hpllhpE............................................................................................................sscllht..........tstshlspsppGhh........hssAGlDt....................................................................................................................................SNs.........ssst....lhLhP.p-..P..cssApplpptlpphhu.h....pluVl.l......sDoh..G+.a+hG....tsshulGhsG................ltslt..shtu...hD..hsp.lt...............................sT..slsD.luuhusLshGphst.hPlsllpG............................................. 0 110 228 272 +1866 PF02343 TRA-1_regulated DUF130; R03H10.4; TRA-1 regulated protein R03H10.4 Bashton M, Bateman A anon Pfam-B_814 (release 5.2) Family This family of proteins represents the protein product of the gene R03H10.4 which is located near a sequence that matches the TRA-1 binding consensus. TRA-1 is a transcription factor which controls sexual differentiation in C.elegans. R03H10.4 shows male-enriched reporter gene expression and acts as a direct target of TRA-1 regulation [1]. 19.40 19.40 19.40 19.70 19.20 19.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.97 0.71 -4.64 23 100 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 4 0 99 115 0 124.20 26 71.88 CHANGED clC.s.psspCPDLtshh............sshpl..s-tDGCss.loCs.suphPhhhupassSEIsss.ss..ssshshFtlhsPhohsphsu..........slhcaaGllCE....sspWphTKYPpGIthh..ss...hhGsDGShsGKKotlttlsC .....................................t........tspCsclhs..hh......................st..h.....p-tsGCsh.loCs...ss.t.h.shlhh..tasp.SEIs.h..P..ss...s.ss....t.hh...thh...s.s.s.tt.hs..sh...............................slhsaFGllC.E....sspWhsTKYPhGlt..............Yhs......ss......hhu....ss.uphsGK..Koplt.hth....................... 0 42 42 99 +1867 PF01998 DUF131 Protein of unknown function DUF131 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein family has no known function. The proteins are predicted to contain two transmembrane helices. 20.80 20.80 21.60 25.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.16 0.72 -4.61 35 123 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 85 0 87 115 3 65.00 29 72.33 CHANGED hlGshltsht......................................pscscschGGllhIGPIPIlFGos.....pphshhuhlLAlllhllhllhhh ....................................h.....h.........................................pppcspscsGGVlhIGPIPIlFGou......pph..shhslllAlllhllhllhh....... 0 26 46 65 +1868 PF02001 DUF134 Protein of unknown function DUF134 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaeal proteins has no known function. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.56 0.72 -4.14 4 568 2012-10-04 14:01:12 2003-04-07 12:59:11 11 5 485 0 227 751 43 94.30 39 66.00 CHANGED M.t....+sRsR+hRhIhhp..PpVRtFhPch..stTG..K..Vhlol-EhEAlRLVDYc-hoQ--AuchMGIS+sTlWRhLTuARKKlApALlEGR..hIlhcGGEhhpc ...........................................................................RPpphR.p..lpth...Pt.h..p..hFt.Ptu.......shpp.............l.h.....L....sh.....-.Eh........E....A.l.R.LhD....h.cG.LsQp-uAppMtVSRpThtpllp..sARp.KlAcuL..lpG+..hlhlpGG.h...ht....................... 0 87 154 191 +1871 PF02006 DUF137 Protein of unknown function DUF137 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of archaeal proteins has no known function. 20.40 20.40 20.40 33.80 20.30 19.80 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.08 0.71 -4.89 37 143 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 140 0 96 140 121 174.20 52 69.07 CHANGED LLhAcpPVISVNGNsAALsPcElVcLActls..AclEVNLFaRT-ERhcpIs-hL+.........cpGAp.cVLGhtsDs....pIPsLpppRupVspcGIasADVVLVPLEDGDRsEALhcMGKpVIsIDLNPLSRTucsAolTIVDNllRAlPplschsc-h+phscpcLppllppaDNcpsLpculctI ..LLLAcpPVISVNGNsAALsPcElVcLActss..AclEVNLFYRTcERhctIschLc.........c..p.GAp..cVLGltsDu....pIPsLpppRu+VspcGIapADVVLVPLEDGDRsEALs+MGKpVIsIDLNPLSRTA+sAolTIVDNllRAlPplschs+-h+p....hs+c-.LppIlpsaDNccsLpculctI............................ 0 23 56 78 +1872 PF02363 C_tripleX DUF139; Cysteine rich repeat Bashton M, Bateman A, Yeats C anon Pfam-B_602 (release 5.2) Repeat This Cysteine repeat C-X3-C-X3-C is repeated in sequences of this family, 34 times in Swiss:O17970. The function of these repeats is unknown as is the function of the proteins in which they occur. Most of the sequences in this family are from C. elegans. 22.90 1.00 23.60 2.50 22.70 -999999.99 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.68 0.74 -7.70 0.74 -3.51 166 2125 2009-01-15 18:05:59 2003-04-07 12:59:11 14 31 38 0 1234 2152 0 17.40 42 36.77 CHANGED spCtstCps.sCps.pCsp ...................pCtPhCps..uCpN.GhCst............... 0 331 447 1058 +1873 PF02405 Permease DUF140; Permease Bashton M, Bateman A anon Pfam-B_1126 (release 5.2) Family This domain functions as a permease. In Swiss:Q7DD59 it is involved in L-glutamate import into the cell [1]. In Swiss:Q8L4R0 it is involved in lipid transfer within the cell [2]. 20.20 20.20 20.20 20.40 18.80 20.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.06 0.70 -4.85 267 4359 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 2463 0 1190 3158 944 214.40 33 75.29 CHANGED p.phlp.phhplGhtol..........sllslhuhhhGhllulQshhtLpp.aGApsh....lushluluhlRElGPlloAlllAGRsGSAhsA-lGsM+lsEpIDAlcshGlsPlphLVsPRllAshlshPlLshlsshhGlhGGhllush..hhs...lssusahpphpp...hlsh.tDlhhu............llKuslFGhllulluCapGhp......s.puGspGVGpuoTpuVVtuhlhlllhDhl...lohhh ............................................h..hlp.phhtlGstol.........sIlsls.ulhlGh.V.lulQuhhhLsp...aGApsh.....lGhhl.uluhlRELuP....llsAllhAGRuGS.A.h.TA-lGsM+hoEplDAhcsMulcPlphLlsPRlhAullshPlLshlsshl.Gl..hGGhllush..hhG.........lssGsah.s.thps.....hl....s....h.tD.lhhu............l.lKuslFuhhlshlusapGap.......s..p.s.....ss.p.GlGp..AoT..poV..VtuhlsllslDhllohl................................ 0 329 763 1012 +1874 PF02408 CUB_2 DUF141; CUB-like domain Bashton M, Bateman A anon Pfam-B_1716 (release 5.4) Domain This is a family of hypothetical C. elegans proteins. The aligned region has no known function nor do any of the proteins which possess it. However, this domain is related to the CUB domain. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.59 0.71 -4.56 24 264 2012-10-02 11:50:15 2003-04-07 12:59:11 15 9 5 0 250 312 0 113.70 19 25.57 CHANGED ssshsC.sssl..hstP.ssupPh.haPssasts.sssthssspsCsaplslPpGaaAplhlpsphss....psslpshDshuph...tthtssptpsa..aFssPphplslssssss.....sFuFplpW.shs .........................s....tC...s.th..hs.s.......s.tsh...aPts.tts....s.hsths.ss.....sCsapl..s..lP....p.....G.....h..as..plp..lps.phps.......sshlplhD..sssph..................hh.h.s...s.s....t....p...sa.....ahsss.p...h..p..lpl..p...ssssss....pFthplpa.ph............................................ 0 50 67 250 +1875 PF02410 Oligomerisation DUF143; Oligomerisation domain Bashton M, Bateman A, Eberhardt R anon Pfam-B_1798 (release 5.4) Family In yeasts, this domain is required for the oligomerisation of ATP synthase subunit 9 into a ring structure [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.35 0.72 -3.97 165 4456 2009-01-15 18:05:59 2003-04-07 12:59:11 10 15 4365 5 1086 2727 2020 97.70 36 72.41 CHANGED hhphlspsl--cKA.pDIhllDlpp......ho...slsDahlIsoGsSs+plpAl......u-pltcph..+t...t......s.hpshphE.Gh..............................psu...cWlLlDh.G.DllVHlhpt-sRpaYsLEcLW .....................................................................h..phlhcsl--pK.u.pDI.lslDlps...................ho.....sls..Dah..lIsoG.sS..sRp..VpAl...A-pltcph....+p..........t..............G...hpshp..h...E.Gt...............................................................s..su...cWl....LlDh.....G...D.....llVHlh.p.p-pRpaYsLE+LW................ 0 374 701 924 +1876 PF02413 Caudo_TAP DUF144; Caudovirales tail fibre assembly protein Bateman A, Moxon SJ anon Pfam-B_1800 (release 5.4) Family This family contains bacterial and phage tail fibre assembly proteins [1]. E.coli contains several members of this family although the function of these proteins is uncertain. 26.50 26.50 26.60 26.60 26.40 26.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.69 0.71 -3.98 73 2529 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 606 0 168 1625 6 118.40 33 75.29 CHANGED csppsapspshp......h.ps.s.hsp......................ssstls.....t......ssGpahhhss..thh.thhs........................ppphhppA-pp+ppLlppAsptIssLp..sslcLs.hho--EpspLptWpcYtshLs+.lDsosAsD......IpWPp.Pp .......................................................................................................................a..tsht.hh.pt.h.hs..........................ssh.ls................s.u.ta..hhss...ph.h.t.hp..........................ttthh.ptA...Etp+..ppL..lptAsp..tIss..lQ.......stlc.L....s...hhT-EE.......p..spL.......pAWpcYtshLsp..VD.TS.s..APD......lpWPp.Pt............................................. 0 15 60 110 +1877 PF02415 Chlam_PMP DUF145; Chlamydia_PMP; Chlamydia polymorphic membrane protein (Chlamydia_PMP) repeat Bateman A, Yeats C anon Yeats C Repeat This family contains several Chlamydia polymorphic membrane proteins. Chlamydia pneumoniae is an obligate intracellular bacterium and a common human pathogen causing infection of the upper and lower respiratory tract. Common for the Pmps are the tetrapeptide GGA(I/V/L) motif repeated several times in the N-terminal part. The C-terminal half is characterised by conserved tryptophans and a carboxy-terminal phenylalanine. A signal peptide leader sequence is predicted in 20 C. pneumoniae Pmps, which indicates an outer membrane localisation. Pmp10 and Pmp11 contain a signal peptidase II cleavage site suggesting lipid modification. The C. pneumoniae pmp genes represent 17.5% of the chlamydia-specific coding capacity and they are all transcribed during chlamydial growth but the function of Pmps remains unknown [1]. This family shows some similarity to Pfam:PF05594 and hence is likely to also form a beta-helical structure (personal obs:C Yeats). 20.60 12.50 20.60 12.50 20.50 12.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.18 0.73 -7.79 0.73 -3.36 528 2166 2012-10-02 14:50:22 2003-04-07 12:59:11 12 61 306 0 261 1592 95 25.80 43 6.13 CHANGED slhFssNpu.....................................................tspGGAIhsp ........................................hhFssNsA........................................................................sspGGAIYs.................... 0 105 137 231 +1879 PF02457 DisA_N DUF147; DisA bacterial checkpoint controller nucleotide-binding Bateman A, Coggill P anon Pfam-B_1846 (release 5.4) Family The DisA protein is a bacterial checkpoint protein that dimerises into an octameric complex. The protein consists of three distinct domains. This domain is the first and is a globular, nucleotide-binding region; the next 146-289 residues constitute the DisA-linker family, Pfam:PF10635, that consists of an elongated bundle of three alpha helices (alpha-6, alpha-10, and alpha-11), one side of which carries an additional three helices (alpha7-9), which thus forms a spine like-linker between domains 1 and 3. The C-terminal residues, of domain 3, are represented by family HHH, Pfam:PF00633, the specific DNA-binding domain. The octameric complex thus has structurally linked nucleotide-binding and DNA-binding HhH domains and the nucleotide-binding domains are bound to a cyclic di-adenosine phosphate such that DisA is a specific di-adenylate cyclase. The di-adenylate cyclase activity is strongly suppressed by binding to branched DNA, but not to duplex or single-stranded DNA, suggesting a role for DisA as a monitor of the presence of stalled replication forks or recombination intermediates via DNA structure-modulated c-di-AMP synthesis [1]. 20.60 20.60 20.80 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.46 0.71 -4.98 190 2847 2009-01-15 18:05:59 2003-04-07 12:59:11 11 19 2386 11 670 1835 550 121.00 38 40.57 CHANGED lhpuhpthucp+hGALIVl...pcppslp..chhp.sGhh............lcuplosplLhslF...t..ssLHDGAlllps.scltuAushLPLops..ss...lspchGTRHRAAhGloEpo.DAlsllVSEEsGsISlshs..Gplh ...................................................hculphhucp+hGALIll..pc....sp...s.Lp....-hlp..oGh.......................lcu..closp.LLhslFh..s..oPLHDGAlIlps..s+It..s....Aush......LPLops..........st.......lo+-hGTRHRAAlG.l..SE.h...o....D.A.l.sllVS.EETGsISlshsGph.h.................. 0 281 502 603 +1880 PF02520 DUF148 Domain of unknown function DUF148 Bashton M, Bateman A anon Pfam-B_1103 (release 5.4) Family This domain has no known function nor do any of the proteins that possess it. In one member of this family Swiss:Q23614 the aligned region is repeated twice. 23.90 23.90 24.00 24.20 23.80 23.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.33 0.71 -4.29 45 259 2009-09-11 00:48:36 2003-04-07 12:59:11 12 5 28 0 213 215 1 107.00 20 48.61 CHANGED pcsppcahsIlpN.psLThsph-splppWApp.u..lsspappFppphpspppchcpstsplIspLo....slpspLssIhsscs.TtppppptIpsLppphsp-.hsslhaltp....htpttt ...................t.pspppahtlhps..tslohsphppplptaspp.s...............lpsthppappph.psttpphppshspllspLs....ss.spLpsIhs...sps.Thppppptlpplh.pphs.c.h..l..lht.h....t.......................... 0 69 84 213 +1881 PF02576 DUF150 Uncharacterised BCR, YhbC family COG0779 Mian N, Bateman A anon COG0779 Family \N 20.20 20.20 21.00 20.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.69 0.71 -4.60 40 4207 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 4149 1 904 2603 1054 139.30 32 84.80 CHANGED llpPllcshGhELhclchtpputthhLclhlD......p.-sG.lsl-DCpclS+tlSshLDs...pDPI..sp.tYhLEVSSPGl-RPLpptccFt+ahGchVclphp.shcsc+papGplhps-s......-slsl..phtsp....................plplshssls+ApL ...................................lpshlcshu.aELVclE..ahp...t....G....p....p.........hLRlaID..................................p..-..sG...lsl-DCsplScplSslLDs......................pDPI.......sp..tY.hLEVS.........SPGl.-RPLpptccat..chl..G..c.Vplp.L.....h..................s.........l..........p..........s...........c...+.......pa.pG..h..l..t..uh-s.................-plsl.........plcs..c...........................phplshspItKApl................................................................. 0 295 598 771 +1882 PF02577 DNase-RNase DUF151; Bifunctional nuclease Mian N, Bateman A, Eberhardt R anon COG1259 Family This family is a bifunctional nuclease, with both DNase and RNase activity [1]. It forms a wedge-shaped dimer, with each monomer being triangular in shape. A large groove at the thick end of the wedge contains a possible active site [2]. 20.50 20.50 22.70 22.40 19.60 20.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.61 0.71 -4.95 112 1079 2009-09-11 06:46:39 2003-04-07 12:59:11 9 6 977 4 436 880 532 135.00 31 73.52 CHANGED hp..ltGlshstss.s.....sslllLp-.p..sc.....+hLPIaIGthEApuIshsl...ps.hpssRPhTHDLhtsllpshshplccVhIsslc-ss.FaAplhl...p.psp.............t.h.................................plDARPSDAIALAlRss..sPIast-cVlp.puul.hp................t-c ....................................................h.plhslthp.sp.s...tssllLc-.s......uc.........RhLPI.aIGt.E.Apu.I.uhth.......ps....hp..s..s.RPLT.HDLht...s...l...lps......h......s......tp.l.pcVh..Iscl....p...-......us..FaAcLhh........p..pst..............................................cl.DAR..PSDAlALAlRss..sPIasp-plls.pu.ul.h..........pttt.................... 0 176 335 404 +1883 PF02578 Cu-oxidase_4 DUF152; Cu_oxidase_4; Multi-copper polyphenol oxidoreductase laccase Mian N, Bateman A anon COG1496 Family Laccases are multi-copper oxidoreductases able to oxidise a wide variety of phenolic and non-phenolic compounds and are widely distributed among both prokaryotes and eukaryotes. There are two main active catalytic sites with conserved histidines that are capable of binding four copper atoms [1]. 20.40 20.40 20.40 20.80 19.70 20.20 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.59 0.70 -5.29 132 3547 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 3384 10 808 2592 2090 224.90 32 89.31 CHANGED aoTR.........Gho...s.h.......tuhNluhp...s.sDs...ppVtpN....Rphlspt.....hshs............hhhhpQlHusplhhlpt..................................tssh.puDullTsp.ssl........slslhsADClPlLlh..D.psp..hluusHAGW+Gss....ssIstpsl.pth.pphss.......pspcl....huhlGPuIusssYEVu....p-Vh.ptF......tpthstttthhht............................s+hhhDLhthsptpLppsGlt....tpIth.......ssh........CThsp.s-.hFaSaRR.......psps.....GR.hhuhIhl ......................................oTR.....G..GlS........ssa............suhNluhp.....s...sDs...s..ttVtpN.......RppLhpt.........................hshs..........phlahp..QlHuscVhp.lst.............................................................stsh.puDAhhTsp..s.sl.........sls.lhoADClPVLhs....s.tpss.....h.lAA.sHAGWRGhh..............sGllcps.lp..h...t...p....ss...........................sspcl.............hAhlGPuIusp..sa..E..Vu....s-lh.ptF..................hs..t......s..p..s...t..s..h..h.h........................................s+ahhDLhths..ctp..Ltp.hGVp......pIhs.........ssh........................................CThs.......c.....tc......h.........FF..SYRR........ctpo........GR...hsuhIh.................................. 0 252 508 672 +1884 PF02579 Nitro_FeMo-Co DUF153; Dinitrogenase iron-molybdenum cofactor Moxon SJ anon COG1433 Family This family contains several NIF (B, Y and X) proteins which are iron-molybdenum cofactors (FeMo-co) in the dinitrogenase enzyme which catalyses the reduction of dinitrogen to ammonium. Dinitrogenase is a hetero-tetrameric (alpha(2)beta(2)) enzyme which contains the iron-molybdenum cofactor (FeMo-co) at its active site [1]. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -10.08 0.72 -3.91 179 2027 2009-01-15 18:05:59 2003-04-07 12:59:11 12 31 977 14 943 1863 98 94.20 19 46.31 CHANGED tsstls...tHFG...+uptFhlh...-ss....st.shc...llcsp.................ssstt......tsstphsp.hltp...pss..ssllsspl..GssAhttLppt.Glclhp..s....sssslc..-s.lpphhp .............................................tttlstHFG+uptFhla............-lp...................st...php.......hlppp..............................hs.sss......................ssp...sp...hsp.hlpt.........ps....s..sslls.us..l..G.tsshptLtpt..GIclht...s....stsslc..cslpth..t.................................... 0 381 689 831 +1885 PF02582 DUF155 Uncharacterised ACR, YagE family COG1723 Mian N, Bateman A anon COG1723 Family \N 21.10 21.10 21.30 22.40 21.00 20.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.83 0.71 -4.36 92 875 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 488 0 565 818 18 175.80 27 42.95 CHANGED plalF..p.aGslVhWshs................cp....ptpphL...phlp...................................phttp.hsppp....p.E-hpahhss.ph.....................................................................pspl...ts....................DhIh.Lpst...............................................................shhtchshSpuLupSs+LshhEpplsphl....-phpplsppLsp.sG+lsh.sc+......clhchhGclhth+hplslpspll.Dp.P-hhWc...cspL-tlYptlpchh-.......lspRlplLNc ..............................................................lFlF.p.aGslVhWshs................cp......pp.pphL...ptlp.................................................th..thp.l...sptphp..sEphpahhspph....................................................................................................................................pspl..hs..........D.hIh.L..p.st...............................................................shhhKlulSpuLAQSsKLuhaEptlsphl....pp..spslPppLu...p.sGc..l.sh..s+c............................plhpphGcLathRhplNL.puslL.Ds...P-haWc.......csp.LcslYptlppaL-lspRlplLN.............................. 0 179 331 483 +1886 PF02585 PIG-L DUF158; GlcNAc-PI de-N-acetylase Mian N, Bateman A anon COG2120 Family Members of this family are related to PIG-L an N-acetylglucosaminylphosphatidylinositol de-N-acetylase (EC:3.5.1.89) that catalyses the second step in GPI biosynthesis [1]. 22.00 22.00 22.00 22.40 21.90 21.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -11.03 0.71 -3.63 137 4157 2009-09-13 21:13:25 2003-04-07 12:59:11 12 36 2321 19 1451 3457 1554 136.10 25 45.05 CHANGED LllssHPDD-s.husGusltphtp.....p..Gtpltllsl.........os....G...............................phs........................phsth..Rpp......EttpA.s.phL.Gl........pphhhLs..........hsDsthpt........................................................hpphhpt.....ltpll...pph...p...P.........................cl.......lhsht....stss....................HsDH.psst.....t..hshp...uh ....................................LslsAHPD..D-s.hus.u.u.o.l....u....p.hsp..............p......G.h..pV..t..l.l.sh............Tp.........G...................................................................................................ph.u.p................................................phsth....Rcp.............EhppA.s..chL..Gl..........................pphthLs.....................h..D.sthtt...................................................................................................hpph.tpt............ltpll........cch........p...P......................................................sl.........l.h..o.ht..stss....................HsDH.htstpsshtA.h...................................................... 0 554 1008 1293 +1887 PF02586 DUF159 Uncharacterised ACR, COG2135 Mian N, Bateman A anon COG2135 Family \N 27.10 27.10 27.20 27.10 27.00 26.80 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.88 0.70 -4.80 164 2412 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 1845 9 843 2126 1620 208.00 26 88.83 CHANGED MCG..R...asht......ts...ppltphhth................................................ttphtspaNls....Psptssllht................................................................tpt..............................h.phhpWGl.....h....P...tasc...........................ththhNARsE...............o.l..tp..+s..sF+....psh...pppR.CllPusGa....YE......Wp......................tttsp.......K......pPah.....l...........php...s......t..ps...............................................................................................................................................hhhAGlap..tapss.tt............................................................htohsllTssu.......sst.....lstl.Hc..................R.MPllL..s.t-phcpWL.........pstssttpt....h......................lsttsspstpsss ...............................................................................................................MCGR.....as.t......ts......pphht.hh.t.................................................ssp.....sp.aNlu..Psp.s.llhpp..............................................................................ptth..................h..p.hpWGh.h......P.........sWhc..............................t..hhN..ARsE...........................Th....tp......p...hF+.....tsh....ppp....RCllPssGaaE...Wp..........................ttssp.......K..........pP.aa....l........................pht......-.............s...p..................................................................................................................................................................h.hhAGlap......phts...tst.......................................................................htshsllTssA....stt.......................lstl.Hc..................R....hPllL.......s...-..t.h.cpWL.........ssp..hstttt.....h....................................................................................................... 0 251 500 686 +1888 PF01519 DUF16 Protein of unknown function DUF16 Bateman A anon Pfam-B_764 (release 4.0) Family The function of this protein is unknown. It appears to only occur in Mycoplasma pneumoniae. The crystal structure revealed that this domain is composed of two separated homotrimeric coiled-coils [2]. 25.90 25.90 26.00 31.60 25.80 25.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.55 0.72 -3.80 18 91 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 5 3 30 66 0 93.90 42 69.63 CHANGED sFsGshsc..Kt.p......ocYVT+KQFs....EFK.D.........uscp+LpKlEscls...........tQGEQIpp.................................................QGEQIccLp.p.c....................................t.scsLplllpoLpphs.......cRLD+lEu ...........hsGshsc..+hp.......scYVTpKphs.......EaK.s..........uspQcLhKlEspls..................sQGEQIsplhphVpt..............QGEQI+pLphc.K....................................sQGcoL.p.IhpsLtthscRLDph-...................... 0 30 30 30 +1889 PF02589 DUF162 Uncharacterised ACR, YkgG family COG1556 Mian N, Bateman A anon COG1556 Domain \N 20.40 20.40 20.40 21.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.04 0.71 -4.78 209 4142 2012-10-04 00:26:15 2003-04-07 12:59:11 10 12 1994 1 981 2994 341 198.50 23 58.26 CHANGED lpp.htcplppts.splhhsps...........hp-shphltclltpt...sh.................................sh.shphsl.tchhl.............hshlctsptphhchhstph......................tpthtcphhpuDhu..........lousshulA-oGslslhsspust.chhshhPpthlsls.....sh.s+lV.sshpcAhptlph.....ts.t.........th.sshsshh.................sGP.ucsu-lphhh.hshp...GPp.clpllll ......................................................................................................h..phtpphpp.s.spVhhsps.........................tp-.ssph.l.h.p.lh.pcp...ss..................................................sh.sh-scL....sEhhl......................hssl.c...p.s...p..t.p.l.tchhppph...........................................cp.phc.cp.hh.pA..-.l.G.........lousshulAEoGolsl.ssspGs.......u..RhhshlPcsplslh.....sh.p+ll.sshp-shthlph......suhu...........p.phsshhshl..................oGP..pp.s.u.Dl-..................G.Pp.chpllll.................................................... 0 354 721 864 +1890 PF02590 SPOUT_MTase DUF163; Predicted SPOUT methyltransferase Mian N, Bateman A anon COG1576 Family This family of proteins are predicted to be SPOUT methyltransferases [1]. 20.90 20.90 20.90 21.20 20.60 20.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.79 0.71 -4.50 30 3767 2012-10-01 22:53:19 2003-04-07 12:59:11 12 4 3656 18 737 2319 968 152.30 38 97.90 CHANGED M+IpllsVG+hppcalcpuhsEYtKRlspasp......lcllEltsp+t....pspshpphhccEupplhtpl..tssshllsL-hcGKthoSEphAchlcchthpGppclsFlIGGutGLssslhpcAshphShSchThPHpLhRllLsEQlYRAasIhpscPYH .................................................M+lpllsVG+h.t..tcalppuhsEYhK.Rhsch.hp......h-llElsstKt......spstchp.p..lh.c+E..G..pp.lLutl..............ss..p..s.h.l.l......sL-lp.GK.thsS.phAp.pLppht.h........p.G.p...clsFlIGGu...G.Lu.s...slpp...p...A.s.........p...lShSphThPH.p........LhRllLsEQlYRAasIhpscPYH............. 0 246 481 623 +1891 PF02591 DUF164 Putative zinc ribbon domain Mian N, Bateman A, Eberhardt R anon COG1579 Domain Structural modelling suggests this domain may bind nucleic acids [1]. 23.70 23.70 23.80 25.90 23.40 23.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.02 0.72 -4.14 93 1089 2009-09-12 00:25:18 2003-04-07 12:59:11 10 5 1080 0 327 812 391 56.40 33 22.61 CHANGED llptY-+lRpp..pGhulstlpps....sCtGCphplssphhscl.pps..cc..llhCspCsRIL .......llphY-+lRcp..pu..huls.lpp.p.......sCsGChhplsspphtcl.ppu......cc...llpC.pCGRIL........ 0 138 262 312 +1892 PF02592 DUF165 Uncharacterized ACR, YhhQ family COG1738 Mian N, Bateman A anon COG1738 Family \N 22.00 22.00 22.70 22.40 21.90 21.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.71 0.71 -4.02 151 2443 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 2235 0 513 1577 1908 145.40 33 64.21 CHANGED shGslhaPhsFlhoDllsEhYGtptA+cslhhGahsslhh.hlhh...hhhh......hhs.ss.................................tshttlhs.....ssRlslAShh.AalluQhlDlhlashl.....+phtt......tp............tlWh..RshsSThluphlDThlFhs.lA..Fh......u............h...........hs.............hsthhslhhssalhK .......................................................................ThGshsFPhhFLsTDlhsclaGtphAR+llahshhssllh..hll...lhh......hhs..h....h.............................tultth.hh.......lsRIAlAShh.AYllGQhlDlhVFs+l+pt..........+............paWl.tshuSTlhGshlDTllFhs.IA...Fh..t......................s............h..........................sppahplslssYhhK.................................................... 0 152 307 420 +1893 PF02593 dTMP_synthase DUF166; Thymidylate synthase Mian N, Bateman A, Eberhardt R anon COG1810 Family This family catalyses the synthesis of thymidine monophosphate (dTMP) from deoxyuridine monophosphate (dUMP). The physiological co-substrate has not yet been identified [1]. 24.40 24.40 25.20 24.40 22.00 21.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.46 0.70 -5.00 24 85 2012-10-10 17:06:42 2003-04-07 12:59:11 9 3 45 0 68 91 5 224.40 30 91.77 CHANGED sllhp..GpaGcRhhpslhscsphp...........................lhlh-hs-..pls-hI-pscphL.p......l.-sDlllshsLHPDlshtLschhtp.sshtulIlsutssc....h...pl+cph-phshphhsPc..hCsLc...........sspshlccFsch..FGpPclclplp...sscl..pcVcVlRuAPCGuTaalAccltGhsls-hthps........uhthpp...YPCtAuhth..p.p-shlHpAGhltpcAlpcAlth ......................................llhc..GpaGcRhhpsltppsshs.........................lhhhchsc....tls-hI-p.P.c-h.Lsc......l.cuDlllshslHPDlshtLschh.....pc.ssscul.Ilsutpsc........pl+cphcphshphhsPc..hCsLcc...........stpshlccFlch..FG+Pcl-lplc.....ss..pl.....pcVcVlRuuPCGoThalA+clhGh.....plp-ht.ps........uhthpp...YPChAuhth..p.s..-shhHpAGhlt+cAlccAltp........ 0 16 41 54 +1894 PF02594 DUF167 Uncharacterised ACR, YggU family COG1872 Mian N, Bateman A anon COG1872 Domain \N 22.60 22.60 22.60 25.40 22.50 22.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.22 0.72 -4.05 29 1679 2009-12-08 16:16:35 2003-04-07 12:59:11 11 3 1585 3 582 1119 107 76.30 39 73.13 CHANGED pppslhlplcVpPpupcsplsulpsp.....pLclplpuPPhcGKANpcLlchLu+hhpls+SslplhpGppSRpKhlhlps ................s.sslhLplhlpP+...Au..+..ss..I.s.Glcs-.........plKVslsAPPlDGpANscLl+aLuK.thcVsKSp.Vslt+GchuRc.KplcI..s............ 0 177 345 468 +1895 PF02596 DUF169 Uncharacterised ArCR, COG2043 Mian N, Bateman A anon COG2043 Family \N 20.20 20.20 20.30 21.30 18.20 17.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.27 0.70 -4.95 76 379 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 254 0 211 370 56 210.60 21 82.60 CHANGED sppLtchLcLctpPVAV+hh.t...............................spc.hst....shp..ttthp..............aCphlth.AR.pG.........cshhh.ss-phsCssGusshGhtch.st...........lpsG........................phhhp.hthhps.csuc+hhcp.hPpl...ttphhullhuPLpc.ssh.....p.P..DllllhssPtQhhplspuhhYtpGs.phpsshsuhpusCu-ssshsh....hpspss.hslGCsGsRthut.hp....ccElshulPhptlpclhcsL .......................................................s...l.phlpLptpPlulphh.........................ppp..st.....th.......tt.hp...............hC..ph..hth..Ap....p.G.........pshhh..st-s...h...s..C.huGthshGhtph.sp.............................hlpsG...................................thhhp....thhpo.-tucchhpp..lshl...tsp.......h..tslhh.u..P.Lpc..hph.....-.P....Dl..llhhssPtphhtLsps...hhappss..hpsshssht.usCusssshsh.....ppsp.s..hslGshuspthst..hp....s--hshulPhpphpchhpt......... 0 81 158 191 +1896 PF02598 Methyltrn_RNA_3 DUF171; Putative RNA methyltransferase Mian N, Bateman A, Eberhardt R anon COG2106 Family This family has a TIM barrel-like fold with a deep C-terminal trefoil knot. The arrangement of its hydrophilic and hydrophobic surfaces are opposite to that of the classic TIM barrel proteins. It is likely to bind RNA [1], and may function as a methyltransferase [2,3]. 18.60 18.60 24.50 20.90 17.90 17.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.85 0.70 -5.06 23 418 2012-10-01 22:53:19 2003-04-07 12:59:11 12 7 347 2 286 396 27 273.20 32 79.92 CHANGED slSlslPsSlls..sspshc.tTahsupIARAsslFsVsEIlla--.....................................................tp..tstpptps.......shhlsplLpYhtTP.YLRKplF...shpspL+aAGlLPPLssspHh..ppscptcaREGlslchspps................................phlslGhsc.ltlcp....tls.ssRVTVchps................tplVsPscsps.....thYWGYpVR.hspshuclFppssh.tGaDhslhsScpupslspsphphh...........................shtplLlVFGth...........ptttps.ppsc......tpspthFDt.l.NssPsQsspslRsEEAlhlsLuhLp ..............................plSlslPuSlls.......ss..ps.....c.+ThhsGpIARAsslFpV-Ellla--....................................................................................t..t....tt..p..tps..............sh.hlsplLpYh-sP.YLRK.......tlF...........Ph.+p.sL+huGlLsP...Lc.sPaHh.......pps....c.......scaREGlslpt.ssptt.t..................................shVshGh.p..c....lplsp...................tl..ss.hRVTVchsp.......................................spl..Vssppscp...thshYWGY......pVR.hs..s..sL.splhsc.ssh......t..G...YDhsIuTS.c+Gp.sl.s.ps..t.ht................................php+hLlVFGs...................................pshtts.pt.p...............st.psp.hFD..hhl....Nss.P.sQ..GocolRTEEAlhlsLshL........................................ 1 82 155 230 +1897 PF02604 PhdYeFM_antitox DUF172; PhdYeFM; Antitoxin Phd_YefM, type II toxin-antitoxin system Mian N, Bateman A, Eberhardt R anon COG2161 and [1] Domain Members of this family act as antitoxins in type II toxin-antitoxin systems [1]. When bound to their toxin partners, they can bind DNA via the N-terminus and repress the expression of operons containing genes encoding the toxin and the antitoxin [2]. This domain complexes with Txe toxins containing Pfam:PF06769, Fic/DOC toxins containing Pfam:PF02661 and YafO toxins containing Pfam:PF13957. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.24 0.72 -4.39 156 5612 2012-10-03 00:18:00 2003-04-07 12:59:11 14 16 2448 47 1340 3995 616 70.50 18 79.79 CHANGED hptlshs-h.+sphupllcpsppspp..llITccGcs.ssVllsh.....ccacphpc...phthhpt.ttspphtp........tthp..ts .....................hhsho-h..+pphspllcpspps.p.....p....l.hI..o..p..p..s..c....s...s...sll.lsh.....................cpapp.h.p.......t..h.t....tt.............t....................................................... 0 421 912 1136 +1898 PF02616 ScpA_ScpB DUF173; ScpA/B protein Mian N, Bateman A anon COG1354 Family ScpA and ScpB participate in chromosomal partition during cell division. It may act via the formation of a condensin-like complex containing smc that pull DNA away from mid-cell into both cell halves. These proteins are part of the Kleisin superfamily. 23.70 23.70 23.70 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.71 0.70 -4.79 11 3012 2012-10-01 19:44:35 2003-04-07 12:59:11 9 6 2925 0 778 2298 2016 209.20 27 81.05 CHANGED Llpctclssa-VslsclT-pYlphlcph.ppL-Lch.......sschllhAuhLlchKucsLLs....tpp.-p..-.t....phc-thhtcL.....h+tttc.hcchchpth...tphs+c.sshcchlctlcc...th.cl-c......shht.htth.hh.c.thtsthcchp.........p.hclslE-phpELlt.............p.h.tl...s.hhsa.pLh............hshhclVssFlALLhLhpsptVplpQc-.au-lhlphh ..................................................................................................................................LIp+pclDIhDIslsplT-QYl.....s.....Y.......l......c......p......h.....p.........p..........h.........cL..-l...............................AuEYLVMAApLltIKS+hLLP...........p..t.p....t...p...p.......t....-.......t............................-s.....R.p.-.....L..l...p......c.L.h..-.Yc..p...a....K.th..u.....p.t.L.pp.h..t....ttt..........has.+.t......................p..p..h..h..p..t....t.............t..l.p...........p..h..c...L.h..h..A..h..tp.l...hp.+.t......p.h.pph.......................tl...s...l.cp.p.h.p....p..lh..t.....................................h........t.....t....p......hhpFp.p.L.h.p.p................................tps.t.t.t..l.l..sp..FLAlLELhKpthlpltQ.p.c..sassIhl...t....................................................................................... 0 264 526 667 +1899 PF02617 ClpS DUF174; ATP-dependent Clp protease adaptor protein ClpS Mian N, Bateman A, Moxon SJ anon COG2127 Family In the bacterial cytosol, ATP-dependent protein degradation is performed by several different chaperone-protease pairs, including ClpAP. ClpS directly influences the ClpAP machine by binding to the N-terminal domain of the chaperone ClpA. The degradation of ClpAP substrates, both SsrA-tagged proteins and ClpA itself, is specifically inhibited by ClpS. ClpS modifies ClpA substrate specificity, potentially redirecting degradation by ClpAP toward aggregated proteins [1]. 20.00 20.00 20.10 20.10 19.90 19.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.71 0.72 -4.26 194 2895 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 2572 47 893 1937 1064 79.80 36 28.96 CHANGED hcps..shY+...VlLhNDDaosM-FVlplLpphF.phsp-pApplMLpVHpcGp.uls.ulhsh-lAEs+spplpphu......pttth.........PLpssh ...........h.p.PshY+VlLhNDDaTsM-FVlpVLpchF...s....hs.........h-cAsplMLpVHppG+..uls.G.l.a.o.t.E.lA.EoKstp.Vpphu........+ttta.........PLhssh....................... 1 268 550 744 +1900 PF02618 YceG DUF175; ADC_lyase; YceG-like family Mian N, Bateman A, Moxon SJ anon COG1559 Family This family of proteins is found in bacteria. Proteins in this family are typically between 332 and 389 amino acids in length. This family was previously incorrectly annotated and names as aminodeoxychorismate lyase. The structure of Swiss:P28306 was solved by X-ray crystallography. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.01 0.70 -5.39 195 4001 2009-09-21 13:22:03 2003-04-07 12:59:11 11 7 3759 2 892 2962 1446 281.50 30 77.07 CHANGED htlplspGsohppluppLpcpullpss........hh....F.phhs+hp..stssp.l+uGpYplpss.hostpllptL..spG........................................................csht......h.plTlsEGhshpp............lhptlsp.........pshh..tp.t......t.tp........htphst.................EGhLaP-TYpa.stss.......ospp.llpphhpphp...phl..tpthppcs...................ttl...sh.osh-hlhlASIlEKEsu.hssERshlAuVFhNRLc.p.......GM.....pLQoDPTVlYu..l...tt.......tt............plppp...DL.c.ts....oPYNTYthpGLPPsPIusPGpsulpAAlpP.spsc.....aLYFVA...c.sc..........Gs..HhFopThpEHppsV...pcY .............................................h..lpl.tGsuhpplupp.Lpcpsl.lpss....th.....F...phh.h...+.h......p.......s......h...sp....l+uGp.Yp.l....pss.hosp.cllphL.....pp.G........................................................c.psp...........h..plslsEGh.sh.pp..........................hhptl.tp..........................tshl.......ppp..............hpp....th.h...t.t...h..tt.ph.s.ph..............................EGhhaPsTYp.h..st.....ss............o..s.p..llcphhpphp.....ptl........pph.h.t.tp.t.............t.sh.....sh.s..phlhlASllEKEs....u....h..s.p.-.RshlAuVFhNRL.p.t.......sM...........hLQoDsTVlYu..h........sc.........p............t...plppp.............Dl...c..hsoPYNT.Yh.h....sGLPPsPIssPuhsulcAuhp.....P....spo.....s.......aLY..FVA...c..ss........Gs..phFup.shp-HppsVpc................................................................................................................................................ 0 308 608 762 +1902 PF02620 DUF177 Uncharacterized ACR, COG1399 Mian N, Bateman A anon COG1399 Family \N 21.30 21.30 21.50 22.20 21.00 20.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.80 0.71 -4.06 181 3846 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 3794 0 853 2473 744 115.20 24 64.04 CHANGED lpuplpu..slphsCsRCLpshphslphp.hphh.h..h...sppt..tt...............................--..thhh.............sstp............lDLtphlp-pllLslPhtshp.....p..sCps..hsststs..tt.........................tt.........s....P.....a.usLpsL+ ....................shplpsslsl.CsRCLcP..hp.ht.lphs.hs.h..h...ht...sppp....tpt...............................................-s.psl.l............cpsp.............lDLtshlcDpllLslPhtslpp........-.....cCps.....h.s.s.s..ss..p...hsct...................................p...pp.ps.................s......P.....a.AsLtsL........................................ 0 270 558 726 +1903 PF02621 VitK2_biosynth DUF178; Menaquinone biosynthesis Mian N, Bateman A, Eberhardt R anon COG1427 Domain This family includes two enzymes which are involved in menaquinone biosynthesis. One which catalyses the conversion of cyclic de-hypoxanthine futalosine to 1,4-dihydroxy-6-naphthoate, and one which may be involved in the conversion of chorismate to futalosine [1]. These enzymes comprise two domains with alpha/beta structures, a large domain and a small domain. A pocket between the two domains may form the active site, a conserved histidine located within this pocket could be the catalytic base [2]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.28 0.70 -5.18 131 1044 2012-10-03 15:33:52 2003-04-07 12:59:11 9 3 556 15 368 1003 282 248.00 24 91.99 CHANGED lplGtsshsNsh.haauL.........................ths.hchhhssspsLNphhh.puclDl..uhlSshtasphtccYtlLs..usuuhGpshuslllu..................p....t..tt.....plAlsupssTushLh+lhh.pt........phh.hs....cl..thhpst......DA.........ullIt-stl..................satppth....hhlhDLGchWpchT.G..LPhshuhhsh++s.hs.....phhtplpc.slppShphuh..pphpphhph...hhcpsthh.......pp.hlphYls.phohslupctppulcphhphutc .......................................................hplup.shhNsh.hahul..............hpht.......ths.hc.h...h.h.ss...s...pp.LNchhh.....p..s...c..l.Dluhl....S....h....h....t..h..sp..h....h....c....c...Y.t.l.L.......ss.s..u....hG..c..sh..usl.lls..................pp....h..t...hp...........................plAls.u.....p.ssT.ushL.h+..lhhtct...................phh.hs......pl....h.hpst......DA............................ull...It...-..ptL.....................pa.t.p.p..h........phhhDLuphWp...-..h.T...u.....LP.......h.s..hu..shsh++s..h......................shhtplp.ctlppuhph.u....pp.p...hlhph....hh.cppt.hp...........pphlph.Yh..s...phsh...sl..u...p..pphtAlcphhchsh.t................................................... 0 156 292 345 +1904 PF02622 DUF179 Uncharacterized ACR, COG1678 Mian N, Bateman A anon COG1678 Family \N 20.60 20.60 20.60 20.70 19.80 20.50 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.22 0.71 -4.61 181 2362 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 2224 7 628 1535 1045 160.00 35 76.37 CHANGED hP.s..hsDspFp+oVlalC....cHsp.c.GAhGlllN+..Ph..s....lsls-llppls.........h................................ttph..............sVahG....GPV.ptc+....GF...lLHss..t............t....ts.............ol..pls...s.....slhlTsohDlLps...lupu.tu.....PpchllsLGYAGWusGQLEpEl.tpNuWLsssAssc..........llFss.s.pp...+WptAhpplG ......................................................................................................................................hPshpDshFpRSVlYlC....-Hsp..p.G.A....h.GlllN+..Ph..s......l.sl..psl..Lpplph.s..........................................thcp.....................................sVhhG......GP..l..pp-R.......GF...lLHss.tt......................h......su...................................................ol...pls.......s..sl.s.h...T...s..o.tD.lLcs....luss....tt............PpchllsLGYAuWst..GQLEpEl.tc....NuWLsss.A.c...s..........llFs....s....shtc.......+WppAhphlG..................................... 0 200 399 525 +1905 PF02623 FliW DUF180; FliW protein Lima T, Mian N, Bateman A anon COG1699 Family The protein BSU35380 from Bacillus subtilis (renamed FliW) was characterised as being a flagellar assembly factor. Experimental characterisation was also carried out in Treponema pallidum (TP0658). In Campylobacter jejuni, Cj1075 has been shown to be involved in motility and flagellin biosynthesis. The two paralogues in Helicobacter pylori (HP1154 and HP1377) were found to be able to bind to flagellin. FliW proteins are involved in flagellar assembly [4]. FliW is part of a three-part feedback loop: in Bacillus subtilis FliW inhibits CsrA (an RNA-binding protein) which inhibits FliC translation; hence FliW is required for FliC (flagellin) production [5]. 25.00 25.00 31.20 29.20 24.60 23.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.40 0.71 -4.55 83 725 2009-09-10 21:46:53 2003-04-07 12:59:11 10 3 652 2 212 534 71 119.40 31 84.43 CHANGED lpF.pGl.GF-.ph+cFhll.....ppsssFhhLQSl-ssclu..FlllsPathh.-YchclscpphptLplpstp......-lhlhsllol.....tss...hcchTsNLtAPlllNhcsphutQllL.psscYsh+a.lh .......thppsIhGFE.ch+cahlh.........ptcpsFhhLpSl-....st...s.lu..FlllsPahhh...-Y..ch...cl...ssthhphLpl.ps..tp................c.lhlhsIlsl........sps........hccsTl..NlhAPlllNhcsphutQllL.ssspYshpa...h................................................... 0 108 182 198 +1906 PF02624 YcaO DUF181; YcaO-like family Mian N, Bateman A anon COG1944 Family \N 25.00 25.00 25.40 25.00 24.10 24.50 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.89 0.70 -4.95 127 1709 2009-01-15 18:05:59 2003-04-07 12:59:11 11 25 1462 0 432 1316 83 324.50 28 58.87 CHANGED ssGKGtottp.ApsSAlhEulERauuth.hst.................hthtpshtph......ttts..lsspshhhhsppph.....................hs.st.lp........Wh................shsltss.....c.plhl..Ptsh...httsh..................hhh..sS...NGhAuGsohcEAllpulhEllERDuhslh.....hhsp...hshspl..........shss....thstlhpthpp...tG..hclhlhDho.......-h.slPshsuhh....................hpssssthhhGhGu+hsschAltRAloE..hsQsthhhh.ttstpp.....................................p.tphtphhshsphttt..hhh...t........sphphsshs.............s.tsslptl.lstlpptGh-.lhsl.Dho......h.clG.lsVV+lllPGhp. ...............................................................................................sGKGsopcs..Ahs.SALuEhhERhusshhhs-...........................hhhtpslsst...............................shs.pass-pah..t...................................thac.P.-scls.................hs................................shhc....s...cs......p..slal..Phsl....lsshas..........................S....NGhuA....G.NohpEAhlpGL.E.lhERtshsth..................................hhpp......lslPcls.........s.shs.chss.l.hctlpph.p.t.pG..aslhshDso......h..ss..th.P.Vl.ssll....................hsssssssh...suhGAH.sDhtlALcRslTE...l...h....QuRshps...h.sshssssh....................................stcchsc..ht.shpp..p..hhcs.stlh.....t........httt.....schsas-hs............hss.ss.p.c-hssL.hsh.hppt.sh-..lhls.Dhp.........cl...G...lhssRllVPGhp........................................................................................ 0 120 256 346 +1907 PF02636 Methyltransf_28 DUF185; Putative S-adenosyl-L-methionine-dependent methyltransferase Mian N, Bateman A, Moxon SJ, Eberhardt R anon COG1565 Family This family is a putative S-adenosyl-L-methionine (SAM)-dependent methyltransferase [1,2]. In eukaryotes it plays a role in mitochondrial complex I activity [2]. 23.20 23.20 23.50 24.00 23.00 22.70 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.82 0.70 -4.98 157 1692 2012-10-10 17:06:42 2003-04-07 12:59:11 12 15 1433 3 694 1556 1906 243.10 24 62.15 CHANGED hluhhhhphapph....s...ts.......hpllEhGsGpGpLhsDlLpshp........ph.P.ph.....htt..hphhllEhS.............stL+phQpppL.................tt...................................hsh.....W..hc.pl..t..ph.s...................hsslllANEhhDAlPl+phh.................................hpsss..a.pEthVshs.......................................ss................th...ths..h.ts....................htshltphthth...............................tGhhsElsssstshhppl...upplsp....................................................GssLhlDYGh.stpp...........hssTL..puh................ppHpht.........s.sh...tpP.GptDlTAHVDFssL....tpsu.pt.G...hpshuhs .............................................luhhhhp.hpth......t........t............hplhElGsGpGpLhtDlLptlp.........ph.s..th.....htt...hphhllEhS..........................spL.pphQpp.p..L..........ps........................................................htt.l.t.W....hp..pl..ph......................hsshlluNElhDAhPlchht......................................................................psptt.......h.hEhhVshs............................................................................ts............ph.ths.....h.tsh.t...........................tth.l..pht.th..............................................tsth....h..Els..tttthhppluptltp.....................................................GhhlhlDYGh..tst.............................psoL........puh..............................hp.Hchh......................s.sh.tts....G...ptDlTucVcFstltphs.pt.s..ht.....h............................................................................................................ 0 221 439 583 +1908 PF02638 DUF187 Glycosyl hydrolase like GH101 Mian N, Bateman A anon COG1649 Family \N 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.14 0.70 -5.57 4 1510 2012-10-03 05:44:19 2003-04-07 12:59:11 10 34 1000 0 288 1415 225 300.00 32 62.31 CHANGED clRGVWlssVsslca..............spsQppphIshLcclp...hNTlaspVhssGpsLYPSthu..Wpshhssp.ttD.GhDsLu.hI-cAH+RshcVhsWF...chuhpss.....shschlcpHPthhh.p+pDhsts.....GsphahsPhhPEVQsaITpllh-lVp+YDlDGVQhDDaFh.spphG.-..s.uhYcQ.st.sshus.cD..................WRpsplsphltplstpIKAsKPslphulSPsssa.N..............huYsshhtDhpcWlcpGllD.lssQlYhs.hut.suthphhuhh.ocplhPssVtlhhGlsshpls.s..p.-ssWss ..............................................................hRuhWlsoV.....p..h..-.aP.................hpt.p....pp...t....h.l....c...h.L...c...c...lpt.....thNsVhhQV.+.P.s.usAl.asS.p.......h.........h......P.......W.............S...........p..h....h....T.......G..........p........G...p.....s..P......G......aD.P..LtFhl-EAHK.R....G.....hc.lH...A...Wh...............hch...s....h..s.sp............p.h.s..p....h..h...s...p...p..Ps......p....h.h......p.....+.....-..h.......l.....h.s..........h...................................................u..s.......p...h...h.ls...P....GlPE.V.pcalsslltEl..Vp+.Y....s....lDGl..p..hD........D......Y...F...Y..............h......p............s.....s......G.........t......p.....h...s.....D......p.....p........s...a.p.p...Y..s.......t.s..h...s.s..h..s.D....................................................W...R...RsNs.pp...l...lpclppsl.K....s...h...K....P....t.........VcFG..l.S..P.h.Gla..s.tpc...............................sptGhtsY-.p..Y..AD..sppWl....p.p....Gh...lDYlsPQlY.W....s.........h.sh.....s........u......t........ashlhpW...W..s.p..s.....t..s....p.....h......tLahG.sh.hph.................................................................................................... 0 98 206 262 +1909 PF01579 DUF19 Domain of unknown function (DUF19) Vogel B, Bashton M, Bateman A anon Pfam-B_402 (release 4.1) Domain This presumed domain has no known function. It is found in one or two copies in several Caenorhabditis elegans proteins. It is roughly 130 amino acids long. The domain contains 12 conserved cysteines which suggests that the domain is an extracellular domain and that these cysteines form six intradomain disulphide bridges. The GO annotation for this protein indicates that it has a function in nematode larval development and has a positive regulation of growth rate. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.95 0.71 -12.01 0.71 -4.25 47 383 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 8 0 378 354 0 150.60 15 67.46 CHANGED sCsststh.........pshpCh....h..htphtpphpthshp....ptsphpphppsCsshhpChpshpC.......tt....hp.h.thCphh.ah.stpa.pChsKlts.......ptspChpsat...........ttptpphtpcsCpthhupc.sCh...........cctl.ppsCGp...pphptatsphh...........thsshhhp.pCshp ..............................................................................pCh.....h..htph.t...tht.t......hs..t.........pthpphpphCp.....p..h.h.pChp.sh..p..C....................t..p.hp.htt...h..C.p.hh.ah.sp.p.a..t.pChp....Klts.........p...pp..sCh..psa.....................................pptt...pppptC.p.thhspp.sCh...........cptl.pphCup...pthptatpph.................................h................................................... 0 97 120 378 +1911 PF02643 DUF192 Uncharacterized ACR, COG1430 Mian N, Bateman A anon COG1430 Family Two structures have been solved for members of this large (>500 members) family of bacterial proteins present mostly in environmental bacteria and metagenomes (distant homologues are also present in several Plasmodium species). TOPSAN analysis for pdb:3pjy shows that there is much similarity with the other solved structure, pdb:3m7a, solved for UniProt:Q2GA55 (Saro_0823), a homologue of Thermotoga maritima TM1668, UniProt:Q9X1Z6., The homologue in Caulobacter crescentus (CC1388), UniProt:Q9A8G6, is associated with CspD, a cold shock protein (CC1387), UniProt:Q9A8G7. However, the genomic context of UniProt:Q2GA55 is most conserved with a putative xylose isomerase, suggesting a possible role in extracellular sugar processing. Saro_0821, UniProt:Q2GA57, is annotated as an AMP-dependent synthetase and ligase. PDB:3m7a structure corresponds to the C-terminal (27-165) fragment of the YP_496102 (Saro_0823) protein and it is structurally unique, as the best hits from Dali have a Z-score of 3.8 (1nt0, 2j1t, 3kq4) and it is thus a likely candidate for a new fold. Interestingly, many of the top Dali hits are involved in sugar metabolism. There are no obvious active site-like cavities on the protein surface of 3m7a (http://www.topsan.org/Proteins/JCSG/). 20.30 20.30 20.70 20.50 19.30 18.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.23 0.72 -4.36 180 1078 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 966 4 459 1048 876 105.70 30 67.60 CHANGED shplElAcTstpRtpGLMhRpsl..........ss...sp.GMLFsa..s.......psph.tsFWM+NThlPLDllFlspsGplhslppts......Pts..tss...h.ssh...........s.sph....VLElsuGhhpphulpsGcplph.. ..................h.hplElAso.tpRtpGLM..aRpsl..............st...sp.GMLFla......s...........psph..tsaWM+NT.lPLDllFlcscGp.....lhslpc.t..........Phs.....ps.......h.sst.............................ts...spa......sLElsuGhhtchGlpsGsclp...h............................ 1 129 301 397 +1912 PF02645 DegV DUF194; Uncharacterised protein, DegV family COG1307 Mian N, Bateman A anon COG1307 Family The structure of this protein revealed a bound fatty-acid molecule in a pocket between the two protein domains. The structure indicates that this family has the molecular function of fatty-acid binding and may play a role in the cellular functions of fatty acid transport or metabolism [1]. 20.30 20.30 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.61 0.70 -5.33 45 5275 2012-10-02 12:41:15 2003-04-07 12:59:11 11 9 1863 22 831 3515 181 269.70 26 95.33 CHANGED clsIlTDSousL.s..t-hhcchslpllPLslhhss.csYpDsh..-l.s.cphhcchtpptphPp.TSQPssschtchacc.hhppua-tllsltlSStLSGTapsAthusphh.....shc..........lpllDSphsuhu.GhhVhcAuchhcpGt.shc-Ilpplpphppcp..psahsVcsLchLh+GG...........RlupstuhlGsLLslKPllphcc..GplpshsKsRupK...KAlcclhc.....l..ttptsstthhclslhaus..st.-puppltcplps..phst..ch.hsthuslIusHsGsGululshhhc .....................................................ltllTDSos.s..l.s....t..c.h.h..c.........c....h....s....l..p....l....l....PLs.lh.....ls....s.....p...s...Y...h...Dsh..................sl..s.c...ca...h.p..p..h.....p..s...p..p.h.P.p..T.....SQPshupahchacp...htp..s.h..s....p..lls.lpl..SusLSGoaps...Ap......u.s.p.h...............shp...................lpll.DS.phs.uhuhuhhlh.pA.s...c..h...h.pp..Gt....shc..-.l..l.ppl.p.p..h.pp..p.s..phah..hlss.LcpL.....h+GG...........Rlop......ssuhl...G..s...L...LsIKP..l..l.p..h..p..c...GpltshpKs....Ru.....p..K.....+..u..h..pplhc.................................hh..........p.pht.......s...t......t.....h..p...l..h......l..s.aus.....s....-tA...p....p....l....pp..p.lpp...........th.st.........pl....l..t.....h.usllusHsG.Gslulhhh..t..................................................................................................................................... 0 343 598 735 +1913 PF02646 RmuC DUF195; RmuC family Mian N, Bateman A, Moxon SJ anon COG1322 Family This family contains several bacterial RmuC DNA recombination proteins. The function of the RMUC protein is unknown but it is suspected that it is either a structural protein that protects DNA against nuclease action, or is itself involved in DNA cleavage at the regions of DNA secondary structures [1] 23.50 23.50 23.50 23.80 23.40 23.40 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -11.80 0.70 -5.58 23 2678 2012-10-11 20:44:43 2003-04-07 12:59:11 11 9 2619 0 607 1935 2922 298.80 31 66.14 CHANGED pppL-thhpslcEpLp.shppplcsoacphucchtpLpcpLt.l.thp...pplup-sssLppsLp..ssKopGsWGElpLEplLEssGL.cshpYppQssl.......tsuphRPDhhl+LPssp........hLlIDuKhsLpsYpchhsAp-sspp...ptshpphhpul+tHl+slupK.Y..........lhsscT.DaslMFlPsEuhascslcpsstlhphutcppVllsoPoTLhshLpolshha+spphpcpApcItchsucLhcchsphsschpcltppLspuspshsphhsphsptptphhpphpthpttusctptphsst ..................................................................ppLpphhpslc...Ep...L...c....sh....ccp....l....p.poap....p....pu....c....p....h.psL.......pc.pl............tt............lt....p....hp..............p.......plu....p-sssLscsLp..ssKspG.sWGEl.LpplLEsu.G.hh.cshc..Yp.pQssh..................st...........sup...hpPDhll+LP.ss+........................pllIDuKhsLsuY...p+hhs......A.....p.....-.s.t.p+....................cpshppahtul+pH....l....+sLupK.tY.t........l..s..c.o.hDaslhFlPsEshattAlcps.....s..pLh..p..uhc.pp..IhlsoPoTLhshLpolssha+ptp.p...csApcIucpsupLhcchstFs-ph.s....c.luppLspAsps...acpshsphsps..t...t...s...h..lp..phEthct.usp.......t................................... 0 190 388 502 +1915 PF02649 GCHY-1 DUF198; Type I GTP cyclohydrolase folE2 Mian N, Bateman A anon COG1469 Family This is a family of prokaryotic proteins with type I GTP cyclohydrolase activity. GTP cyclohydrolase I is the first enzyme of the de novo tetrahydrofolate biosynthetic pathway present in bacteria, fungi, and plants, and encoded in Escherichia coli by the folE gene; it is also the first enzyme of the biopterin (BH4) pathway in Homo sapiens[1]. The invariate, highly conserved glutamate residue at position 216 in Swiss:Q5F9K6 is likely to be the substrate ligand and the metal ligand is likely to be the cysteine at position 147. The enzyme is Zinc 2+ dependent [2]. 25.00 25.00 25.80 25.80 24.40 24.20 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.78 0.70 -5.15 121 985 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 926 5 311 797 284 258.70 36 90.12 CHANGED hPDVQ....sptsstplsls+VGlpslchPlplpsp.....t...pshAphshhVsL...P.sp..hKGhHMSRhhclLst.hp..p.t..lsstsl....cplLpchhpph......upsAclchpFsahlc+pu.lSs.hpuhtsYslthpuphp.ts.....t.hphplplpVshoSsCPCStplScpts...................................HsQRShsplplch....sst....l.lp-LIchlEpuhS.s.lhsllKRsDEpthsctuapNPhFVEDssRclstpLppp....sp.hssaplcscshESIHsHsAhA .........hsDlQ....ostcs.hthsIpcVGl+slchP.lplppt...................t..posuphshsVsL...s...tcpKGhHMSRhlchl-th.p....p..th.....lshs....sl....pplLpshhpph.......pupsAplcl.shsaFhc+tu.....Plos...lpuhhsY-.Vshsuphctts..........t.hchplplplsloolCPCSKcIScauA....................................HNQRuhlolpspl........sp..p......lh...lpcll-hh...EssuSs.laslLKRsDEKhVTEcAYcNP+FVEDhlRplAtpLhpp......................stlssaslcscN.ESIHsHsAaA...................... 0 100 210 268 +1916 PF02650 HTH_WhiA DUF199; WhiA C-terminal HTH domain Mian N, Morningstar A, Mistry J, Bateman A anon COG1481 Domain This domain is found at the C-terminus of the sporulation regulator WhiA. It is predicted to form a DNA-binding helix-turn-helix structure [2]. The WhiA protein also contains two N-terminal domains that are distant homologues of LAGLIDADG homing endonucleases [2]. 21.10 21.10 21.20 22.80 21.00 20.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.53 0.72 -3.59 23 1969 2012-10-04 14:01:12 2003-04-07 12:59:11 9 6 1950 1 338 1114 114 85.00 46 27.44 CHANGED lNRlsNh-sANlpKospAuh+plcpIphIhcplG.h-tLPppLcclApLRlpas-tSLpELGchlcs....sloKSGlNHRlRKlppIAc .....................................sNRLsNsETANLs+TlsAAh+plp.sIphIpcplG..l-sLP...-cLp....-lApLRlp+.-hSLcELG-hlss....Pl..oKSGVNHRlRKlpclAp.............. 0 134 244 300 +1918 PF02655 ATP-grasp_3 DUF201; ATP-grasp domain Bashton M, Bateman A anon COG1821 Family No functional information or experimental verification of function is known in this family. This family appears to be an ATP-grasp domain (Pers. obs. A Bateman). 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.91 0.71 -4.37 18 304 2012-10-10 13:17:03 2003-04-07 12:59:11 9 4 195 1 172 13200 4990 163.10 22 44.85 CHANGED suDKhcsh+tLcst..hssP........hp.tchtt..........scphllKPtsGsGupusphscstpp....................llQ-aIEGcshSVSllussccshsLslN+QhIsh...t..............htYsGshsPh.pph..ppchhphApcllcsl....GLhGhsGVDll.......ls-..ttPYllEVNPRhTso .............................................................................................................sKhhh.hp.hL.....pph....sl..s..h.P...................................h..............t........t...............................tt..th.llK.....P.h.......s.....G...s........G.....G.....h.......s...l...t...h....h...s...s...t.t...t......h....t...........t.............................................ll....Qca..l....c.....G.....p.....s............h....S..l....s....h....l...........s...........s...........s.......p......p.....s....h....h.....l...u....h...s..c........Q...h....l..s..h........t...........................................................ht.a....s..G......s.....h.......s......s......h......p......h......sh...........pp....p....l.....h....p.....h....s.....p....p.........l........s.......p......t.....l...........GL....t.......G....h....s...G......l...D.......h.l.......................l..s..s.........tt...a..l.l..ElN..P.Rhsu.............................................................................................................................. 0 52 118 149 +1919 PF02656 DUF202 Domain of unknown function (DUF202) Bashton M, Bateman A anon COG2149 Family This family consists of hypothetical proteins some of which are putative membrane proteins. No functional information or experimental verification of function is known. This domain is around 100 amino acids long. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.60 0.72 -3.76 197 2355 2009-01-15 18:05:59 2003-04-07 12:59:11 10 32 1062 0 920 1576 39 77.60 28 34.40 CHANGED +stlAsERThLAWlRTululhuhuhslhp..hh..h.t............................................hsh.........hhuhshh..............hlulhhhhhu.......hhp..ahptt...pt ....................+stLAsERT...a.L.AWlRTuLuhhuhuls.lh.phs..p..s.........................................................................................................hlth.........lhuhlhs..............lluhhhh.hau.....................hhR.aht....s.......................................................................................................................................................................... 0 298 582 809 +1920 PF02659 DUF204 Domain of unknown function DUF Bashton M, Bateman A anon COG1971 Family This family consists of hypothetical transmembrane proteins non of which have any known function, the aligned region is 180 amino acids long. 21.70 21.70 21.80 21.70 21.40 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.29 0.72 -3.85 162 3883 2012-10-03 02:02:08 2003-04-07 12:59:11 10 3 1582 0 728 2452 78 66.50 31 65.64 CHANGED hS..hDAhAVuluhuhh.............php....hhhsulhhGlhpslhshlGhhlGphhu..p..hlsph..........uchlGullLlhlG ..................hShDAhAVuluh.uhh.............psp......hlhsulhhGhhphlhsh.lGh.hlG.ph.h.u..p...hl.s.ph...............sc.hluullLlhlG...................... 0 276 519 622 +1921 PF02660 G3P_acyltransf DUF205; Glycerol-3-phosphate acyltransferase Bashton M, Bateman A, Eberhardt R anon COG0344 Family This family of enzymes catalyses the transfer of an acyl group from acyl-ACP to glycerol-3-phosphate to form lysophosphatidic acid [1]]. 21.00 21.00 25.80 25.70 20.70 20.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.15 0.71 -4.71 127 3881 2009-09-13 07:44:17 2003-04-07 12:59:11 10 9 3433 0 833 2498 1770 180.00 37 84.40 CHANGED lluY.LlGSlshuhllu+hhthhDlRphG.SGNsGATNshRsh.......GtphuhlshlhDhhKGhlsl.hl....uphhhhs.hhh.....hh...................suluull.GHhaPlahpF+.....GGKGVATshGl..llslsshhsl.....lshh.lahllhhlo+.hs..........SLuSlh...uslshslhshh..........................................thsh.hhlhsh...hluh..lllh+H+pNIpRLl ...................................lluY.LlGS.Issulllu+l.h..t.h..h..DlRphG.S...........GNsGATNshRlh.......GK.t.uuh.hsllhDhhKGhlsl.hl..........s.hh.hs.h..sshhh..............hh.......................................suluA.lL.GHhaPl..Fht.FK................GGKGVATuhGs.......lls.l......s............hh....s.l...........h.hhs....sall.slhls+.hs...............SLuull.......uul.hsslhshh..............................................h.p..h...h..hh.hsh.....hluh..lllhRH+sNIpRl..................................................................................................................................................................................... 0 284 542 701 +1922 PF02675 AdoMet_dc DUF206; AdoMetDC; S-adenosylmethionine decarboxylase Mian N, Bateman A, Moxon SJ anon COG1586 Family This family contains several S-adenosylmethionine decarboxylase proteins from bacterial and archaebacterial species. S-adenosylmethionine decarboxylase (AdoMetDC), a key enzyme in the biosynthesis of spermidine and spermine, is first synthesised as a proenzyme, which is cleaved post translationally to form alpha and beta subunits. The alpha subunit contains a covalently bound pyruvoyl group derived from serine that is essential for activity [1,2]. 25.00 25.00 27.60 27.30 22.50 22.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.43 0.72 -4.15 134 1798 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 1523 19 460 1159 1636 119.90 39 60.28 CHANGED Hllh-hass..s.chLsDtchlcphltcAsctuGATllsh.thacF...pP..........pGVoulslluE....................................SHlolHTWPEhs........aAslDVaTCGp..ssPhcA...hphlhcshpscphphpp.hpRG ......................................................................-pLhssppLpclLs-s.sphhGA.s.l..Lsl..uppca.....pP..........QGsS.s.slLlSE................................................SHIslHTaPEpp..........hAsIDV.TCGs...lsPhcA.....lsYLhcpL.cuc.hsshch.hsRG........................ 0 183 309 389 +1923 PF02676 TYW3 DUF207; Methyltransferase TYW3 Mian N, Bateman A, Wood V, Mistry J anon COG1590 Family The methyltransferase TYW3 (tRNA-yW- synthesising protein 3) has been identified in yeast to be involved in wybutosine (yW) biosynthesis [1]. yW is a complexly modified guanosine residue that contains a tricyclic base and is found at the 3' position adjacent the anticodon of phenylalanine tRNA. TYW3 is an N-4 methylase that methylates yW-86 to yield yW-72 in an Ado-Met-dependent manner [1]. 20.50 20.50 21.00 20.70 19.50 19.60 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.47 0.71 -5.20 42 374 2009-01-15 18:05:59 2003-04-07 12:59:11 9 24 292 11 258 380 11 199.60 29 62.09 CHANGED cppKpphLpclt..........-tp.cGplDcsIhsllchIN...uhpshhTTSSCSGRIoVh.p....................tp.hp+.......................tsupWLahsHcs...................hphpplhcslptshsst.....................lhh+hcP.ILHVhs+sLcpAptlhshAhssGF+cSGItshp...................pphlVtlRss.h+l-sPluhps...............chlV.sc-YLphLlcluNc+hpcspc+lpRLppslcp..hhp .................................tKtphLppl...........DhS.+GplDtslhsllphlN...sh.tshhTTSS....C.uGRlslhtps.............................t...tc.............................................................................tsupWLhhsH.c.............................................................hp.pphhphhph..ttttt..........................................................lhh+hEPh........ILHVhspshppAphLhps.A.h.s.uGF+p....SGltshp................................................tphhVulRss...htL-sPluhps.........................phhV..sc-Ylph....Llp....l...uNp+hpcNpc+hp+hhptlpt...t........................................................ 1 79 129 198 +1924 PF02677 DUF208 Uncharacterized BCR, COG1636 Mian N, Bateman A anon COG1636 Family \N 21.10 21.10 21.10 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.35 0.71 -4.49 75 1642 2009-09-12 22:47:58 2003-04-07 12:59:11 9 4 1534 0 279 1063 255 165.00 42 73.24 CHANGED lLLHsCCAPCSsaslctLppp..shclosaFYNPNIHPhpEYhhRhp-sc+hspch......slphltu-..Y-...hcpahctsc..GhEp-sEtGt.RCthCachRL-poAphApEtGFchFoooLhhS.aKshpplpchGcchupca.slpahhpDaRKtsGhpcslc.......luKchslYRQpYCGClaShc-p ..............................lLlHsCCAPCSs.shEhLpp....h-hslaFaNPNIHPhpEYhhRtpppt+Fscch...................sl..t..hl-.u-..Y-....c.pahcts+.....Gh.E..pEsE.t....G.hRCphCF-hRh-psAphAh.......E.......tG.......achFoosLslS.hKshp.pINphGhcssph........Y......s..............lpYhs.sa+KssGhpRtlE.......hs+c.phY+QpYCGClauhpp.p............................ 0 102 190 241 +1925 PF02678 Pirin DUF209; Pirin Mian N, Bateman A, Moxon SJ, Yeats C anon COG1741 Family This family consists of Pirin proteins from both eukaryotes and prokaryotes. The function of Pirin is unknown but the gene coding for this protein is known to be expressed in all tissues in the human body although it is expressed most strongly in the liver and heart. Pirin is known to be a nuclear protein, exclusively localised within the nucleoplasma and predominantly concentrated within dot-like subnuclear structures [1]. A tomato homologue of human Pirin has been found to be induced during programmed cell death [2]. Human Pirin interacts with Bcl-3 and NFI [3] and hence is probably involved in the regulation of DNA transcription and replication. It appears to be an Fe(II)-containing member of the Cupin superfamily. 26.10 26.10 26.50 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.44 0.72 -4.17 52 4965 2012-10-10 13:59:34 2003-04-07 12:59:11 11 18 2388 5 1562 3896 1851 109.40 35 40.71 CHANGED shclpcshst.........shhphhsPFlhhDphsPspht.u............shuh.ssHPHpGhETVTYlh...cG.clpHcDShGscuhlpPG-VphMTAGpGIhHSEhs.st...........stsh+GhQlWl .............................................................s....tp.hut.....shhps.hpsF.Fh..-a.a..sPp..ths.us............................-.h....stGF.ssHPH+shEh.l..Tall........c...G.pl.p...Hc..DS.h...Gs.......p.s..............h.............l...p.sG.-......lQhMoAG....s....GIh....H.SEhsssp........................spslchhQlWl........................................... 0 450 941 1293 +1926 PF01595 DUF21 Domain of unknown function DUF21 Bashton M, Bateman A anon Pfam-B_618 (release 4.1) Family This transmembrane region has no known function. Many of the sequences in this family are annotated as hemolysins, however this is due to a similarity to Swiss:Q54318 that does not contain this domain. This domain is found in the N-terminus of the proteins adjacent to two intracellular CBS domains Pfam:PF00571. 25.10 25.10 25.20 25.10 24.90 24.70 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.93 0.71 -4.99 126 9166 2009-01-15 18:05:59 2003-04-07 12:59:11 15 26 4552 0 2462 6747 2342 186.30 24 42.42 CHANGED hhllhlhlL.lhluuhFuusEhAlhulspsclcpht....cp..ss.....ptApt.lhplhpp......sphLsslll...Gsslsslhhu.slushh...hhphhs...................huh.hluhllhThlhl..lhGEllPKsluh.p..tsppluhhhuhhlthhhhlhh.Plshllshhs.phlhphhshp.........t.hhop.cElctllp.up.ppG....slc..pcE ..................................h..llllhlLlhlsu.aFu.uuEhAlhulp+s+lc.p.hs.........cp.....Gs.....................ppApt....lh...cl...h...p.p..........sphLosl..l................Gh.slsslhhu..hlupsh.......htp..hhts..................................huh.hluh.sl.lThlh.l........lhuE......lh..PK.slAh..p..ts-pluh...hhuh..slthh.h...hl..ht..P..l...lal.ls.ths.sh.l....h+.h.hshp..............ptth..oc...-E....l.c.tllptut.ptG.lpt.......................................... 0 785 1593 2105 +1927 PF02679 ComA DUF210; (2R)-phospho-3-sulfolactate synthase (ComA) Mian N, Bateman A anon COG1809 Family In methanobacteria (2R)-phospho-3-sulfolactate synthase (ComA) catalyses the first step of the biosynthesis of coenzyme M from phosphoenolpyruvate (P-enolpyruvate). This novel enzyme catalyses the stereospecific Michael addition of sulfite to P-enolpyruvate, forming L-2-phospho-3-sulfolactate (PSL). It is suggested that the ComA-catalysed reaction is analogous to those reactions catalysed by beta-elimination enzymes that proceed through an enolate intermediate [1]. 25.00 25.00 30.60 39.00 23.60 23.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.55 0.70 -5.36 30 234 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 207 2 133 248 145 238.90 32 79.40 CHANGED hthspRsp..K.PRppGlThllDKG...luhpthcDhLcsuupYIDhlKhGaGTuslhspchl+-Klclh+caslhsasGGTLhElhhh..psph.-cYlcps+clGFsslElSsGolclsp-c+tch....IcpspctGhpVhoEVG.....................pKcsppptp.psschlchhpp-L-AGAphVIlEuRESGps.Glacss...Gpl+sshlpcllpp.ls..hc+llFEAPpK.....ppQthhIpchGssVNLuNIs.p-llsLEsLRtGLRGD .............t..stRstK.PRppGlT.lhD.u.........................huhphhcDl.lc........s......sGpalDhlKhuhGotslhspp.l+-tlclh+paslhlhsGGhhhElhht....ps.p...h...ccYlcts+clGFcslElSsGsl.slsp-cphch....Icphp.ptGhp.shsElG......................pKs.ptths.pstchlchhcppL.-.A.GAphlhlEuc......Glhcss...uphRp-llppll.pp.ls...hc+lhFEAspp........ptptaaIccaGssVNLsNls.scllsLEsLRhGLhus...................... 0 56 91 118 +1928 PF02680 DUF211 Uncharacterized ArCR, COG1888 Mian N, Bateman A anon COG1888 Family \N 20.90 20.90 20.90 50.20 20.70 20.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.03 0.72 -4.18 22 122 2009-09-13 14:06:05 2003-04-07 12:59:11 9 1 121 29 80 118 3 93.30 43 97.60 CHANGED M...s..lRRlVLDVLKP.+pPsll-lAtpLuclcGV-GVNIoVhElDpcTpslplTIEGsslDa-pIcchIEphGusIHSIDEVsuGcpllEp.cssQ- ........slRRlVLDVLKP.+pPsll-lAppLuclcGV-GVNIoVhElDh-TpslplsIEGssl-a-clpcsIEchGusIHSIDEVsuGc+llEt.................. 1 19 41 62 +1929 PF02681 DUF212 Divergent PAP2 family Mian N, Bateman A, Yeats C anon COG1963 Family This family is related to the Pfam:PF01569 family (personal obs: C Yeats). 21.70 21.70 21.80 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.64 0.71 -4.46 40 730 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 571 0 233 576 175 132.70 41 83.60 CHANGED lhsNtsLhsAllAhhlAQhlKlhlphhhp...++hch.......phlhsTGGMPSSHSAhVoALuTulGlppGasSshFAlAslFAlIVMYDAuGVRRuAGhQAclLNp.Llp-h.p.............t..tpcpLKELLGHTPlEVhsGulLGlhlu ...........................hpN.sLhuulluhhhAQhlKhhhp.h....htp.....p.+.h.ch.......phhhuoGGMPSSHSAsVoALuoulGl....ppGhsSshFAlAslFAhIVMYDAsGVRptuGcQAplLNp..l...h..pph.pth............................tpcpL+El.lGHoPhpVhsGullGlll........................................... 0 86 168 208 +1932 PF02697 DUF217 Uncharacterized ACR, COG1753 Mian N, Bateman A, Eberhardt R anon COG1753 Family Structural modelling suggests this domain may bind nucleic acids [1]. 25.10 25.10 25.20 25.10 24.80 25.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.55 0.72 -3.73 16 123 2009-09-10 16:57:48 2003-04-07 12:59:11 9 1 63 0 61 129 9 67.10 27 87.20 CHANGED sKTIoIsD-VYccLlchK..tscSFS-VIpcLlc.......spcctLhchaGhls---h.cEhc+clpEs.tphcc+h ..........KTIslo--sYccLpchK.pssESFS-lltRLlp.......tpt...cplhch...h..Gh....h...s-c..-h...cchtcphcc............................................ 0 9 40 54 +1933 PF02698 DUF218 DUF218 domain Mian N, Bateman A anon COG1434 Family This large family of proteins contains several highly conserved charged amino acids, suggesting this may be an enzymatic domain (Bateman A pers. obs). The family includes SanA Swiss:P33017 that is involved in Vancomycin resistance [1]. This protein may be involved in murein synthesis [2]. 24.30 24.30 24.30 24.30 24.10 24.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.02 0.71 -4.73 148 7246 2009-01-15 18:05:59 2003-04-07 12:59:11 12 18 3217 2 1345 4642 908 154.40 20 61.35 CHANGED pscs.llVLG.........th..stt..sh..stp.Rl.ptulcLhc....t.shssh......lllS.....Gu.s.t......................hsEAphh......pchhhpt.G..ls.....tpplhh....-sp.ups...ThcNA..thst.plhp.ppsh.........t...ph.l.lVTsshHhhR.......Ah...hhhcptuh.pshshsssh.ssht................................................phhh+E.hhu....h ....................................................................................ph.llVLG...............th...s.t......s.h........htt.Rl.stA.....h....plac.............p..s..s....sh...............lllS.....GGputs...........................shsEA...psh.............tchh.hp..t..G.......ls......................tp.p.Ilh.....-.sp.ut.s....Th-Nhhhuc..plhp...pp...................................ph..l..lVTssaHh.R........Ah..........hhh....p......p..h..Gl..p...s.......s..hs..s.s..........................................................h.................................................................. 0 387 838 1133 +1934 PF01629 DUF22 Domain of unknown function DUF22 Bateman A anon Pfam-B_1137 (release 4.1) Family This domain is found in 1 to 3 copies in archaebacterial proteins. The function of the domain is unknown. This family appears to be expanded in Archaeoglobus fulgidus. 25.00 25.00 73.10 70.50 19.40 18.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.04 0.71 -4.58 23 49 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 29 \N 41 51 0 107.70 28 79.39 CHANGED hp.c....hphshchsGclh+cc..lchcsauYp.pup.lupWEslIAsEcl-VccGEshhl+I+clclPssTllhPhslhRHAhGsllDVspps.PtpVE-c+plscAlFlsscDG ................ht.............htscltchc..lctp.hsap..htp..huphcsllAsEcl-l+pG-hc.I+I+cIplPspollhPsshhpHslGpllslscct.Ph.lEp-RplcpAhFlsstDG.. 0 20 30 37 +1935 PF02713 DUF220 Domain of unknown function DUF220 Basthon M, Bateman A anon Pfam-B_1412 (release 5.5) Family This is family consists of a region in several Arabidopsis thaliana hypothetical proteins none of which have any known function. The aligned region contains two cysteine residues. 29.40 29.40 30.70 44.10 28.10 27.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.70 0.72 -4.30 17 83 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 13 0 48 82 0 69.60 52 24.65 CHANGED hSGslPIpLllcEN+Ksho..uKYKKcKMMFMKlFEGsWKVEPLYVDs-RLCKppcPKShEEY++CSGGpG+IuSK ........................SGslPlplll-EN+Ksho............sKYKp.pKhhFMKsFEGsWKVEPLYVDpERLC+.....shcPKShEEY+pCSGGpG+IuSK............. 0 21 27 30 +1936 PF02714 DUF221 Domain of unknown function DUF221 Bashton M, Bateman A anon Pfam-B_1596 (release 5.5) Family This family consists of hypothetical transmembrane proteins none of which have any function, the aligned region is at 538 residues at maximum length. 28.70 28.70 28.70 29.20 28.30 28.30 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.88 0.70 -5.57 82 1418 2012-10-02 00:51:22 2003-04-07 12:59:11 10 55 296 0 1031 1387 24 296.20 22 36.12 CHANGED uFlpFcophsAphssQ....................shtppps...hph..hsthuspPcDllWpNl.slshhp+hh+phhsshhlhhlllhaslPVuhl.uhlsslstLsphhPhLphl...t...hs.hhh.ullsGlLPslhLslhhhllPhlhchhuphpGhhopuphEhpshsp......hFhFhllplFLlsol.uuoshsh...lpplls.............pssslsphLApsL..P......ptusFalsYlllpulshsuhpLLplssLlhhhlhtthhs......pTPRchhpth.s.sshsaGshaPhhhhlhsIslsYo...lluP..lILsFuhlhFhlsahsa+YplhYlas.pph.-ot...Gtha..PhshtplhhGlhlhplhhhGlh ...............................................................uFVpFpsthtAthshp..........................................shtp..p.s......hph....hthAspPpDlhW......pNl....tls.hh..p...h.hh...R....phh..h.......h..h.l.hh.l.h.....h...h....a..s.....hPs.sh.l..ss......l.....s........p........l......s......hl...........pp..h......h......s...h....lp.l...........................hhh...s...h...lpuhLPsl.h.L.hl.h...h..l.l.P......hl..h.hh....hu.t.h.p.G.h.h..otothchtshpp......hah..Fh......l..h.p....lhll.s.sl.sussh.sh.......htthhp.........................................ts.p.p.h......p.hl.u.t.sl....P.........ptusF.a.ls.....Yl..lhpuhhs..h.uh..p.Ll.....pl..s.s....Llhhh...h.....h..t..h..hht...........posc.....c.....h..h.......th.......h.............p.....t....hp....au......t......hhs.h....hhhh...........h.............h.....l................s.....l..sYu...hlsP..lll..sF....shlhhhltahsh+aplha..........sh...tph..-st..........G.t.ha.....shhh..t..ph..hhulhlhplhhhGh.............................................................................................................................. 0 345 611 869 +1937 PF02720 DUF222 Domain of unknown function (DUF222) Mian N, Bateman A anon Pfam-B_1711 (release 5.5) Family This family is often found associated to the N-terminus of the HNH endonuclease domain Pfam:PF01844. The function of this domain is uncertain. This family has been called the 13E12 repeat family [1]. 24.40 24.40 24.60 24.40 24.30 24.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.74 0.70 -5.29 29 2037 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 207 0 557 1830 116 259.90 21 64.47 CHANGED thutlt-hpsttcpLsuhtthlhscl.....spcsh.tttGsculushlAsphplStupAspplphAtpLtpR...........LPtsAsAhtpGclshcpVpsItttscplscshslssh-tApsshlppusp..hpscpLtthscphhthh...-...............................PDush.....sct-ttc.......cRtlslus..ts-GMoplpGhLsscsuAsh-shLsphAus.hCss..................DsRossQRptDAlsAll....RhshssGphsp.sGh..........sslllpsshpchpssust............slsGhGslLPhp-lhcLApcAp..Lt.lh.t.hupPls.hs+spRhsSss.RhhLhsRD ............................................................................................................................h...........h.tth.sh...........h.....h.ph.....................ttts.......t....s.h......p.......h..s.t..l.u..t..h.t.h.o...........s.tAt..p...hh.....t......A.........t...s..L.h......................................................hst.stts.......htt...Gtl......s.t.phthlhth.......h.......t...........l...............t............s..............h...........t.....h..........h.......p..t....t...........s....t..........h.....t.t.h..t.......hps..tp...l.....t.thh.p.thhthh.....c...............................................................................................................s..c..u...............pttc.ttp................pRtl.ph.....t..t...t.....-.Gh.stlp........u.h.Ls.st...tt.sthp..shl..sthu......ts...hsss......................................................................................................D.sR.o.s.Q.p.....p.t.D.uhhthh...................................phhht...s.s...t....s....p...t.uh...........................................st.lh..l..p..h.s.....h.p.p..hthtss.................................................hhhs...hs...lsht..h.h.p....h........s.t....p........ut..........ht..............................s......s.....h......h.......c.....thh.s.t.t.hl.hpD............................................................................................................ 1 124 377 513 +1938 PF02721 DUF223 Domain of unknown function DUF223 Mian N, Bateman A anon Pfam-B_1714 (release 5.5) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -10.03 0.72 -3.90 15 441 2012-10-03 20:18:02 2003-04-07 12:59:11 9 27 17 0 209 461 0 88.80 23 26.26 CHANGED sLlLsDcpG......splcATIsp+hushYt.........-plp..EspWcsIooFsVp.sssulR...sTsHca+IhFhcp..........ThVspusshpssh.ahshTPFDhIl--osspslL .......................hlLhDcpG.......spIpAsl.p.p.phh.sp.ap.........shlp..EsphhplpsF.p.Vstss.sth..R...sosHca+ltFhts..........Th.lpt...s...psh.s..s........hh..p..hs.Fs.lhtt..................................... 0 3 13 25 +1940 PF02890 DUF226 Borrelia family of unknown function DUF226 Bateman A anon Pfam-B_1255 (release 6.0) Family This family of proteins are found in Borrelia. The proteins are about 190 amino acids long and have no known function. 25.00 25.00 25.20 25.20 24.50 24.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.84 0.71 -4.41 21 456 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 31 0 21 314 0 133.60 48 76.35 CHANGED lapFt.spsKcp+hhlhF+plaNpp+.hpthpLFPl+E..sDKFLGIaYGY+K.h.Kshhl+Yp...stspKsY.shsKsYYIEFRFKKGSVFCYl+ultpLL+.K-KhsTpY.ptLlcplhcLE+cVYcFYsKKLss..tGlIhKWIpK.NQ ............................hhpFt.sppKcpcFhlohRsLF.N...tc+.hpthpLas.l.KE..sDK.FlGIaYGa+K.h.Ks..h...hlKYp...sssp..KsY.sls.KsYYhEFRFKpGSVFCYl+uLhpLL+.Kc+tsscY.psLhshhpcLEppVYcFYsKKhsc..tG..hlhKWIhK.N............................. 0 20 20 20 +1941 PF02989 DUF228 Lyme disease proteins of unknown function Griffiths-Jones SR anon Pfam-B_1298 (release 6.4) Family \N 25.00 25.00 41.60 41.40 20.40 19.10 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.23 0.71 -4.72 7 322 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 26 0 26 250 0 188.30 51 91.81 CHANGED Mu-hsplccpa...cKhtclpthh...Kssppssu.ltNSl......cF+DcNl.a.spGGspoSptDKlENa...PhpuasYKRGVKLs...spsspl....plEsGGGsDLYGlClDlDEFS+TATVlPITNNFEGYLlsK...ssolKscDKL.hNpcG.LEKssGu....tssINAlALScAhpls...................pDlall+VtlFGN+ulpc .............................................u-psplccpa...cKlsElcslh...Kssppssu.llpNSl......cF+DKNlhasspuGspoSptDKIENY...PspuaPYKRGVKLl.....sppspl.......pVEs..GGssDLYGIClDlDEFSpTATVlPITN.NFEGYLlsK...ssolKsGDKLshNppGsLEKssGu....tssINAhALScuh.pls....................p-l.ll+VulFGN+ulc................. 0 17 17 17 +1942 PF03003 DUF230 Poxvirus proteins of unknown function Griffiths-Jones SR anon Pfam-B_1300 (release 6.4) Family \N 20.40 20.40 22.30 21.10 18.90 17.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.29 0.71 -4.60 41 207 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 61 0 0 168 0 126.70 31 43.33 CHANGED IhsaCpppps...sppChClaPs...pshlphucchhtP+hCWhc.C...p+sspaLlpspcpphupCplssCsIslssLpl..supsclpssCt....sstshhsshspschlppphphsh......lhs.hhhhl....slhllhhl ...lhsaCsppps...sppCtCshPs...pshlppu.+hh..t..P+hCWhccCs..s+sspaLhtspcpshupCplssCs.....IslssLslt.supsclpssCs....psptsssss.p.tsc.h.lppphp.hsh......hhh..hlhhl....slhllahh................................................ 1 0 0 0 +1943 PF03008 DUF234 Archaea bacterial proteins of unknown function Griffiths-Jones SR anon Pfam-B_1430 (release 6.4) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.59 0.72 -3.88 91 702 2012-10-11 20:44:43 2003-04-07 12:59:11 9 11 463 0 243 706 46 99.90 25 24.52 CHANGED WFRFlhPshsh.lEhGphc.hh.cplp..pph.spYhGhhFEcls+..-hLh.ch.t.t.h.....hsclG.+WW........c.....+sp..EIDllAlscppt....hlhsEsKWps.............ttcs+pl ..............................WFpFlhPst..sh.l-hsp.hc.lh...phl..c....pph.sp.a.huhsFEpls+..-hlh.ch.t..........hsp.lG.pW..W.......................c..........+pt....EIDlluh..scp...p..........hlhuEsKaps............p.................................................. 0 80 141 188 +1944 PF03057 DUF236 Protein of unknown function Griffiths-Jones SR anon Pfam-B_488 (release 6.4) Family This family represents the C-terminal region of a number of C. elegans proteins of unknown function. 21.20 21.20 21.50 21.80 20.10 20.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.83 0.71 -4.22 8 173 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 9 0 167 116 0 106.80 37 73.74 CHANGED K...............................................suhAuTHDPNYQTLAGLssNVFpcKsG....ssuAGGuuPsAPA.tsuKPGMAATHDPNYQTLAGLsN.slFc..KKD.Gut.......sAuGsuuPtsP.s-psuKAAT+DPNYQTLAGlsNDlF.............................................................s .......................................................................................................t....s.t.DPNYQThsulss.s....hh..ttt.........ss....ss........s...s......Pt..s...ss...............ss.t.......hAuTpDP.....NYQTLAGlss..s....lFt.....cK.s..ttt.......................ssuu.ss.sPtsP..ststhAuTpDP.NYQTLAulss.shF..............................................................t................................ 0 64 91 167 +1945 PF03072 DUF237 MG032/MG096/MG288 family 1 Mifsud W anon Pfam-B_2298 (release 6.4) Family This family consists entirely of mycoplasmal proteins. Their function is unknown. Another related family, Pfam:PF03086, also consists entirely of mycoplasmal proteins of the MG032/MG096/MG288 family. Some proteins, such as Swiss:P75072, are included in both families, but of course differ in the aligned residues. 25.00 25.00 149.50 147.70 21.90 18.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.75 0.71 -4.27 14 35 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 4 0 14 29 0 135.50 36 26.32 CHANGED LRtpLsphushpLs.lp..plplpsp.sss..........slpWNcphsshchppspPYcFpFElshcYpGsYslphathhhs...huuIPupWpGchplpahlDGclspahss+.DYPuohFpFs-..scLLFs.HlhQcIpVps LRtpLsppuslpLs.lp..plplpsp.sss....h.....sIpWNcchsshchppspPYpFpFElshcYpGsYslphathhhs...hGuIPupW+GchplpahlDGclssWhssK.DYPGohFpFs-..scLLFs.HlhQ+Isspt.. 0 11 11 11 +1947 PF01638 HxlR DUF24; HxlR-like helix-turn-helix Bateman A anon Pfam-B_1509 (release 4.1) Family HxlR, a member of this family, is a DNA-binding protein that acts as a positive regulator of the formaldehyde-inducible hxlAB operon in Bacillus subtilis. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.94 0.72 -4.35 14 8192 2012-10-04 14:01:12 2003-04-07 12:59:11 12 15 2896 25 2341 7312 707 89.20 34 69.10 CHANGED llGuKWphLILhpLhp.Gs+RFsElc+tlPsIop+hLoppLRELEp-GllpRpVYsplPP+VEYSLT-hGcsLpPlltthpcWGppahpt.. ......................................lus+W.p.h.l...Il.h..p..L..............h........p.....G............s........p...R.......F.....s..-..L.p.+.p.l.......s......u.......l.....o.............p+.hLop.....p.....L+cLEpcG..l.l..p..R....p..s....a...s..p....s..P....P..+..V....E..Y.s.L.T.c.h.G.c.s...L.t.s....l.....l.p....slt.p.Wucpah...hh................................. 0 770 1605 2009 +1948 PF03086 DUF240 MG032/MG096/MG288 family 2 Mifsud W anon Pfam-B_2385 (release 6.4) Family This family consists entirely of mycoplasmal proteins. Their function is unknown. Another related family, Pfam:PF03072, also consists entirely of mycoplasmal proteins of the MG032/MG096/MG288 family. Some proteins, such as Swiss:P75072, are included in both families, but of course differ in the aligned residues. 25.00 25.00 135.40 135.40 22.70 22.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.59 0.71 -4.47 13 35 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 4 0 14 32 0 120.10 40 22.97 CHANGED hsFpssYhsssssl.sLsFsLphpTsNFusLp-Lp-oFsp.sGssLssQLFaKssVsKLsh.ssNDLTplApTslu-shhshplsLscSIl....phsLppscpcF-ccllpPFhpcRpcAKtta- .hsFcssYhsssspl.sLsFuLphpTsNFosLp-Lp-SFsp.sGssLssQLFaKssVsKLsh.ssNDLTplApTAlG-slhshplsLscSIl..cssLpp.scppF-pchlsPFhpcRpcAKtta-. 0 11 11 11 +1949 PF03112 DUF244 Uncharacterized protein family (ORF7) DUF Mifsud W anon Pfam-B_2667 (release 6.5) Family Several members of this family are Borrelia burgdorferi plasmid proteins of uncharacterized function. 20.70 20.70 21.30 20.70 20.10 19.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.91 0.71 -4.69 7 158 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 30 0 10 164 1 148.00 70 39.00 CHANGED hEl.plpppIhps.thphh...NhD.pshhphlc-hV.pSDFYpsGlEFDWhsEFVEYV-ClDLEI+s-psAhNLEpsLhEIpsLpsELNKIQ....NEN+K+....EKPIKDlLKh+IscIhpcasLIsplNY+FccFVFshDPpKRAIoDRFKuLhPhSu+l.a.ss ......................hElhNL+KDIaSNYR--YLMAHNFNpDTFIKLV...E...DLVE..RSDFYSSGVEFDWAREFlEYVDCsDLEIKDsQSAENLAhDLMEIDSLpKELN+IQ....NENKKR......EKPIKDhLKMhIaNITNpYPLIE..plNYKFtEFVFTLDPKKRAISDRLKGLLPTSGsVFFPSN...... 0 8 8 8 +1950 PF03136 Pup_ligase DUF245; Proteosome_20S; Pup-ligase protein Mifsud W, Bateman A anon Pfam-B_3042 (release 6.5) Family Pupylation is a novel protein modification system found in some bacteria [1]. This family of proteins are the enzyme that can conjugate proteins of the Pup family to lysine residues in target proteins marking them for degradation. The archetypal protein in this family is PafA (proteasome accessory factor) from Mycobacterium tuberculosis [2]. It has been suggested that these proteins are related to gamma-glutamyl-cysteine synthetases [1]. 25.00 25.00 45.70 34.20 18.90 18.50 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.53 0.70 -5.88 31 731 2012-10-02 17:21:26 2003-04-07 12:59:11 10 3 352 0 214 642 154 443.10 44 90.47 CHANGED +RlhGhETEYGIsssspsssssl..o.p.l.......................hh..at.EsPhcDARs..hclpth.stssup...s................................NlhLsNGuRlYlD....tuH..........PEYSoPEsssst-hVhaD+AG-clhppAspcApph.ut...........sslhLaKNNsDucG.sSYGsHENY.....................................................................................................................LhsRsssF.splsctLhPFhVoRQllsGAGRVG........hs.suppsu...FQlSQRADalcptVuhpT.ThsRPIINTRD..............EPHADs-+YRRLHVIlGDuNMSEhoshLKlGoTuLVLphIE....sGh.....hs-Lsl..........csPVpul+plSHDh.oLptplpLtsG+phTAL-lQppYh-pstpalppc.s..........sspspcVLshWpcsLstlcsssh...........psusclDWlsKhpLl-pa+pR........puLshsc...P+LthlDLpYpDlcsp+GLaptLhp+GphcRLls-pclppAsspPPpsTRAhhRGchlcphs...pcl.......hsAsWsplhl ............................................pRIhGlETEYGloss.......s...stpsl..............................................................sP..csARt..hhhphs..uts.us....................................................................NlhLsNGARLYlD....tuH..........PEYuoPEssssh-hVhaD+AGEplhcshstcAp.pp...hut...........sslhLaKNNsDu.................t.G.sSYGsHENY.....................................................................................................................Lhs.Rps..........s....F.splscsLlPFLVTRQllsGAG+Vs...................................ps...u...c..tss..........FplSQRADal.ptVu.tT.ThsRPIINTRD..............EPHADu-+YRRLHVIlGDuNhuEsoThLKlGoTsLVLchIE...........sGss............hpDlsL...........ssPlpAl+plS+Dh.o...h.ct.l.pL....s...........s....G........R.p.....h..o..ALplQccYhp+s.tcalpp+t..........................sspsppVl.chWs.csL-sl-.sssh.............phusclDWshKhcLlctappR................pshshss......P+lttl..D...LpYpDl+.s.+G..Las.....tL.t+Gt..hcRl.ss..-ppl.tcAsspPPpsTRAhhRGchlppht....scs.......hsssWsplhh.................................... 0 66 156 198 +1952 PF03158 DUF249 Multigene family 530 protein Mifsud W anon Pfam-B_2304 (release 6.5) Family Members of this family are multigene family 530 proteins from African swine fever viruses. These proteins may be involved in promoting survival of infected macrophages [1]. 23.10 23.10 23.20 24.90 22.90 23.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.42 0.71 -4.77 9 88 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 11 0 3 75 0 189.60 43 38.18 CHANGED GALEucpYDL...IpKYasQIpDhHcILPLIQDPchFE.....KCH-Lsp.Csh.CLlpHAlKasMLsILQKa+cpLstc...hhsQhLFEhACcpp+a-llpWI..utsLplYp.pslFsIAhs++DloLaoLGYpLlhs+h.sp......p.shs.LLo.pHLcpAutKGLLcFhLETLKYGGsls...hlLopAlpYsHRKILsYFl+p .....................GALcucpYDL...IpKYasQItDhHpILPLIQDPchFE.....KCH-Lsh.CshpCLlpHAlKaNMLsILpKa+-pLp.tp...hhsQhLFElACcpp+h-llpWI..upsLtlhc.cslFsIAhsp+DloL..aoLGYpllhsph.sp......p.shhsLLs.pHLchAutKGLL.FhLETLKaGGslc...hlLotAlpYNHRKILsaFl+p.............................................. 0 3 3 3 +1953 PF03151 TPT DUF250; Triose-phosphate Transporter family Mifsud W, Bateman A anon Pfam-B_3234 (release 6.5) Family This family includes transporters with a specificity for triose phosphate [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.80 0.71 -4.57 62 2906 2012-10-02 19:55:49 2003-04-07 12:59:11 11 51 371 0 1969 5343 564 144.50 19 38.47 CHANGED Ghlhshh.ushshulp.shsphlhpc.............................................tthsshphhhhhu.hushhhlss.hhhh-thp..h.........................h.hhhhhllhsulhhahhshusahlltpsSslohsVsushKpsllllhullhFp...sp.lohhshlGhslulhGhhhYsh ..............................................................................................................................................................................................................................Ghhhshh.us.hhtuhp.lhs.p.hhh.p.p..........................................................................................................................................hp.h.ssh.s.h..h..h...h....h.u....s.h...s.....h.h.....h.h.........h.sh.....h.....h..h......h...-..t..h..t....hhh.th...........................................................................th..hhhl..h.hs.u.l.h.u..a......h.h.s.h....s.s.a..h....h....l..t...t.s.....S...s.l...Th.s.l...sGsh...Kp..hh............s.lh...hu.hlhat...............s...ho..h.h....s....h..lGhsl.s.lhG.hhhYs............................................. 0 634 1165 1620 +1954 PF03159 XRN_N DUF251; XRN 5'-3' exonuclease N-terminus Mifsud W, Moxon SJ anon Pfam-B_2349 (release 6.5) Family This family aligns residues towards the N-terminus of several proteins with multiple functions. The members of this family all appear to possess 5'-3' exonuclease activity EC:3.1.11.-. Thus, the aligned region may be necessary for 5' to 3' exonuclease function. The family also contains several Xrn1 and Xrn2 proteins. The 5'-3' exoribonucleases Xrn1p and Xrn2p/Rat1p function in the degradation and processing of several classes of RNA in Saccharomyces cerevisiae. Xrn1p is the main enzyme catalysing cytoplasmic mRNA degradation in multiple decay pathways, whereas Xrn2p/Rat1p functions in the processing of rRNAs and small nucleolar RNAs (snoRNAs) in the nucleus [3]. 22.90 22.90 24.00 23.10 22.80 21.50 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.75 0.70 -4.98 17 896 2012-10-03 20:43:45 2003-04-07 12:59:11 13 24 311 10 593 928 242 207.90 41 22.33 CHANGED MGVPtFFRWLSc+YPpllpslhEcp.......................EFDNLYLDMNGIlHsCoHPpDc..shshsE-E.....hahtlFcYlD+lashlRPRKLLahAlDGVAPRAKMNQQRuRRFRuA+-Apctptctpchtcph.pp.........................................cpFDSNsITPGT.FMspLupsL+YaIpp............................KlssDstWp.slclIlSsssVPGEGEHKIM-aIRpp+u.pPsaDPNT+HClYGLDADLIMLGLuTHEs....HFslLRE-l .......................................................................MGlPthaRWlsp...+YPt..h.........h.....c...pp...............................................................................................-hDN...LYLDMNGIlHs.C......s.Hspsp...............h..h.s--c......................h..h.ht.lFp..Yl-cl.h.p.h.l.+.P+....+l..ha...........hA...........l.................D..GV...............APR.A.KMNQ.QR.uRR..F.R.uup-s.t..pt......t.pt...p..t..p....h....t........................................................................................................................................ptaDSNs....ITP............GT.FMs..pLsptLca.a.lt...............................+.l....s.p......D..s.t......W.p....tl.p.ll.lSstps.........PGEGEHKI...Mc......aI......R....p....p+s......p...........ss.........a....s................P.Ns...cH..........slaGh............D...ADL.....IMLuL.s.....o.....H.-s.....pFplLREp................................................................ 0 247 365 514 +1955 PF03162 Y_phosphatase2 DUF252; Tyrosine phosphatase family Mifsud W, Yeats C anon Pfam-B_3756 (release 6.5) Family This family is closely related to the Pfam:PF00102 and Pfam:PF00782 families. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.93 0.71 -4.88 8 714 2012-10-02 20:12:17 2003-04-07 12:59:11 8 10 280 4 416 1012 121 149.50 27 62.75 CHANGED llPPhNFSsVts...slYRSuaPpstNFsFLcs.L+L+oIlhLssEshsp-sLp..Fh-sppIchaalthsus+c.............hlslhscplpcsLcllLspcNaPlLlHCscG+HRTGlVIGCLR.KLppWslsuIhsEYppFous.ttchh-ppFIEhFssslhhcpss.tsthshtp ..............................................................................................................sP.NFuhVps...slYRS.u.h...P...p.....h..s...a....s...F..Lpp...Lp...L+o.llhLs....s......c...................s...p......p..............hp......Fhp.p.ps....I.p...h...h....p...h....t........h.p.s...p...pp........................................................................l.sp..p.t...l..h...psL.c.l.l...l....s......p.s..a...P..lLlHC.ppGp.cR.T.G.sl.l.u..s.h....R.+.l.Q.tWs....h.sul.hpE....Yp..p...asts.......p....s.palc.ap.p...............hh.................................................... 0 143 269 372 +1956 PF03190 Thioredox_DsbH DUF255; Protein of unknown function, DUF255 Mifsud W anon Pfam-B_2331 (release 6.5) Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.18 0.71 -4.52 98 1287 2012-10-03 14:45:55 2003-04-07 12:59:11 10 21 1106 1 594 2317 1224 153.40 47 23.88 CHANGED sNRLtpppSPYLLQHAcNPV-WaPWGcEAhpcA+ccs+PIhLSIGYusCHWCHVMt+ESFEDt-lApllNcpFVsIKVDREERPDlDplYMsssQhho.GpG......GWPLolFLTP.DtcPFauGTYFP.....c.....s+h...Gh.................PG......FhplLpplschWcp.c.+pplhpsuppl...hptLp .............................N+LhpppSPYL.hQHA.pNP.........V......cWaPW.u..p.E.....A.h.pcA.+.p.c.s.+.P.I.hL.S.....l.G..Yu.sCH..WCH...VMt+.............E.S.F-Dt...-...l.A.t..l..h.N..c.p.F....V..s.l..K..V..........D......R....E....E.....R.............P....D....l.........D..............p.....l.Y...........M....s.........h.....s...............p.....s..........h....o......G..p....G..................GW.Ph.olFL...TP...-...t.c.......P....Fa..u.G..T..Y..aP.p.........sph........sh.................................su.........F.hpl.L...p.tltphWpp..c..+pclhppupplhptl.p............................................................................................... 0 217 426 530 +1958 PF03192 DUF257 Pyrococcus protein of unknown function, DUF257 Mifsud W anon Pfam-B_2788 (release 6.5) Family \N 25.00 25.00 25.90 29.10 23.40 22.10 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.13 0.70 -5.03 28 91 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 14 6 77 103 0 204.40 25 94.90 CHANGED lp-hl.cph+hGEol...LlEYsS..shhchlhatl.lphh+-c...shsllIsDlhDoL.lhh........pplclhGlcsslh..csspVIKlG.GphshGNVlt+l..t.-.shah...ppYtptlpclht...pcs..hIsIlLGlE+lh.hh.pshh-hhhllptltpa.lsscc+..puhYFlNpsllcphsss.hl.hLEEluosV..lclspc...t....hplhKuhp.th.Gh...plpl ........h..chl.pph+hGEsV...LlEYsS.hshsclhhhtl.lphhpcc...shsllIsDlhDohhlhh........ppLchhGlcssh...cslpVIKlG..GphphGsl...lt+l.h.t-.shah...ppYpphlpclhp...pcp...hlslllGl-+lh.hht.psht-h.hhhlps.ltph..lGsccR..huhYFlNpsllpph..p.hlshLEEluosV..lclppc......sp.hhhplhKu.p.p..s..ph..h....................... 0 4 5 43 +1959 PF03193 DUF258 Protein of unknown function, DUF258 Mifsud W anon Pfam-B_2832 (release 6.5) Family \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.98 0.71 -5.05 74 4240 2012-10-05 12:31:08 2003-04-07 12:59:11 11 13 3686 8 935 12978 6178 158.30 36 49.24 CHANGED hcphhpthcslGYp.lhhhssp..sspulcpLpshLps.+hslhsGpSGVGKSoLlNtLhs...ph........................................phcTuplSpp.s+G+HTTspscLhtl....ssGGhllDTPGhpphslh..hstcplspsFsEhpphh....tpC+F+sCpH...tsEP..uCAVcpAl-pGplst.RYpsYhcllc ......................................................h....ht.Ypp..lGYp...lh..h..susp.........ptpu....l....c....t....L..........p....p....h...L....s.......s......+.......l....o......l...h..s.Gp..SG...VGKSo.L...l....NtLhs.....p..h...........................................................................................................................................p..h..p...T..u....-..l....S.....c......s.........u...+......G......+......H........T......T....s......p....s...c...L..h...p..l.....................p..G....G......h.....l....l...DTP..G..h....p..ph...s...lh..c...l.p...s.......-.p....l.spsFs..E..h...pphh..........tp.C+...F..R...s.CpH...t.p.E..P.......u..CAl....+..p..A.l.-....p.........G....p.....I...s...p...p...RacsYhplh.p........................................................................................................................................................................ 0 336 638 814 +1960 PF03196 DUF261 Protein of unknown function, DUF261 Mifsud W anon Pfam-B_2687 (release 6.5) Family \N 26.10 26.10 26.30 26.40 22.40 26.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.68 0.71 -4.19 8 189 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 32 0 15 139 0 137.30 62 92.16 CHANGED M.l.KhpppNt.....Ip+aGCYFLClhahh.lhKphc......Fssh-IslsYp+FluLGYI+.sNCaIlNPCtILuhaGIcocVRaESh....sYlssp.sEFEIoElKlcssshhHFlssss..pcVLYDS.LsLK.pGppaplsS+RlF+hp ..........MhIsKIKQsN+sLh.EIQKWGCYFLCLHYYsSlFKphE......FsAaEINsAYhRFlGLGYIK..SNCFIlNPCMILNYYGIRSSVRY...ESh.....sYLuAA.....NEFEISEVK...IccVN.GYHFIATKN..KEILYDS.LDLKs+GKlFKVTSKRIF+l................. 0 11 11 11 +1961 PF03235 DUF262 Protein of unknown function DUF262 Bateman A anon Pfam-B_3462 (release 6.5) Family \N 29.30 29.30 29.40 29.40 29.20 29.20 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.80 0.70 -4.38 194 2983 2012-10-01 20:12:50 2003-04-07 12:59:11 9 17 1749 0 669 2580 315 195.70 18 37.50 CHANGED lppl..hsphp............................h...hlP..p............aQRsYs.W....ppcp.....hppLlcsl.....................................psah........lGsllh............................................ttt..tsphtllDGQQRLTolhh..............hhts........................................................................................................................................................t..hhtphhpttt..............................................pphppphhpshphhpp..................................................................................hh.h.h..................ps...shplFp...plNspGh....tLssh-..lh+shhh ...................................................................h.....htt......................h..hlP.t............aQR.sYs.W.......spcp.................hppLlcsl..............................................h.psa....lGsllh.............................................ppp......psph.llDGQQ....RLTolhh...........hlts....................................................................................................................................................................................................h...t.h....h.p...t.t..................................................................................................................p.t.tpphhp.h.phhtp..........................................................................................................................h.h...h.h...........................ts...s.....plFp....phN.stGh.Ls..-..lhpshh.h............................................................................................................................................................................................................................................................. 0 249 483 593 +1962 PF03237 Terminase_6 DUF264; Terminase-like family Bateman A anon Pfam-B_3575 (release 6.5) Family This family represents a group of terminase proteins. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.95 0.70 -5.70 41 3063 2012-10-05 12:31:08 2003-04-07 12:59:11 10 17 2033 7 507 3449 4695 362.10 15 76.55 CHANGED hhhtuppsGKTahhuhphhtpshtss......................psplhlu.sspsphph..............htthhhphspp.hhphphp........tp.hhh...........ssGup.ltahuhptt....stpuhpG.......t..hlahDEhhhhsc.thhpthtpshssptchh.........hhossssst+.hhshhssth.sp.................................................pth.hshp..................hshtDshptu.s.hh....................cplttphsspt..hpphhhupass.............ssuulFphtphphthstt..............s.tttphh........................huhDsu............tssDssuhhlh..............thssspahhlttpptpshshsthsstlpphhp...phssphlsl-sss.....sGpulhphlppth.................hshphs...........csKtshshthpsll...csu+lphsps..........phtshhpslcphhssss.........p..tt+sDhhsAhthAlhpt .................................................................................................................................................................................................................................tuht.G.tT.h....h.h....s.....h........h....h....h....h....h..tpt.............................pthhhs..s.s..tt.p.ht...........................................hh...h.phh.....t...p............h..p.h.thp......................tp.hhh............................sGs.....p...l....hh.h.us...p....t.t...........shp.s.h...p..G.........................................hl...h.h.DE....h...hh.h............p.............t...............p......t......h....t....p...h....h...h...t....h......t...t..hh................................hho.s.s.s....s..............t....p......h...h............h...h.p..t...t.h.sp.........................................................................................h..hp...........................................h.sh.ts.s..h.....t..h.s.tp..hh......................................pp..l..t..t..p..h.s.spt........hpp...h..h..s..p..hhp............................stu..s..l...a.s..h...t..t..h..p...t..t...h..h.tt..............................................................s..s...t...t.hh.........................................huhD.u..................tss.D.ts.s.hhlh........................................h......ss......p..h.....h..h..l....t..t..h.........t........h.......p.........t......h......s.............t....t....t......p.....hl..t..p..hht..................ph.t.s..t..h.l.h.........h..-sps......................hu.tt.hh..p..lpp..h.............................h.h.h.s.................................ps+...t...h..h......t....htshh..........pt..tp.l..hst.............................h.t.h..t..lt.t..h...h.s.t.........................tp....s...Dhh.uhhhsh........................................................................................................................................................................................................... 0 161 327 421 +1963 PF03266 NTPase_1 DUF265; NTPase Mifsud W anon Pfam-B_4081 (release 6.5) Family This domain is found across all species from bacteria to human, and the function was determined first in a hyperthermophilic bacterium to be an NTPase [1]. The structure of one member-sequence represents a variation of the RecA fold, and implies that the function might be that of a DNA/RNA modifying enzyme [2]. The sequence carries both a Walker A and Walker B motif which together are characteristic of ATPases or GTPases. The protein exhibits an increased expression profile in human liver cholangiocarcinoma when compared to normal tissue [3]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.03 0.71 -4.54 38 349 2012-10-05 12:31:08 2003-04-07 12:59:11 10 9 303 2 181 414 99 157.30 29 79.34 CHANGED +lhlTG.PGlGKTTLlpKlhctLppp.shplsGFhTtEVR..csGpRlGFcllslsoGccuhLA+ss.........tsts..+VG+YsVslps...h-plulsslcpt...ppsD...lllIDEIGsMELpopsFhpslcplLs.usps...lluslHcp.......llccl+pp.....sclap....lTpcN.................Rs...tlhpcll .....................................lhlTG.s.G.lGKTTll.p+.....lh.ptLp..tt...sh..t..........ls....GFh.T..p..E....lR........pt..G..pR..h.GFcll..s...l.....s.Gc..c.s..Luphp..................tt.pl.GpY.s.V.p.lps...........a-pl.ulshLpp.h.........pssc.................llllDEIGtMEl..h..upt......Fhp...sl.....pph....Lp....ssps......llusl.ht..........hlppl+p.p.s...splhp....l...s.pN...Rs.l..pl......................................................................................................................... 0 69 108 142 +1965 PF03270 DUF269 Protein of unknown function, DUF269 Mifsud W anon Pfam-B_4172 (release 6.5) Family Members of this family may be involved in nitrogen fixation, since they are found within nitrogen fixation operons. 25.00 25.00 28.20 30.50 24.10 20.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.36 0.71 -4.11 41 182 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 157 3 85 173 7 121.40 47 78.61 CHANGED a-shoDpplLsPaIloKEpRRpIPlhGDPDPsTlhRlchFYpAlAhsIEccTGhhssshhc..lsHEGFGRsllhsGRLVVls+sLR.DlHRFGF-SlpKLA-cGpKhlssulchIccaP-V.Ach ......W-scoDtc.LL.s.PaIloKEpRRp...IPllGDPDPps.lhRlchFYsAVulsIE+pTGlhssPhhc..hsHEGFGRhllhsGRLlVlsKpLR.DVHRFGF-oLpKLA-cGsKhVssulchIcpaP-VAp............ 0 21 54 68 +1966 PF03189 Otopetrin DUF270; Otopetrin Mifsud W anon Pfam-B_2323 (release 6.5) Family \N 22.80 22.80 22.90 22.90 22.70 22.40 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.45 0.70 -5.48 12 606 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 83 0 443 578 1 212.80 17 64.99 CHANGED YLRlGAlsFGlGshVY.GlEh...F.h.hsssCpslhluls.hhthlFshhQMpFIFhN..uclshtRa+hlARFGLMHhVAsNLshWhphlltEs...phEIhphtp.t...stpsshhpssptsstthsts.tttc........................................hh.th................................................................................................................................................................................hoppsss.......................................................th.hpstRhphhsshltssusaLasshlEYSLIsAslhahhWKplt.................h....tsshs+ps+phplDCsssppGlFhGIllllholIsllhaalLhpcst.phhAs..lsls-hlhaslshhA...llhuhhp..MRsLca......ppptcu.sLDsILLllu.sG.hlYushullushhshhpsp..sshlsllstlhpllQsslQolFILpuu+p+stusppsRspPG+pIlTFLLlsNluhahhpThEpt+u..uFpshhh-aYGhhsWollh+lohPLsIFYRF ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 124 164 306 +1967 PF03407 Nucleotid_trans DUF271; Nucleotide-diphospho-sugar transferase Bateman A anon Pfam-B_4460 (release 6.6) Family Proteins in this family have been been predicted to be nucleotide-diphospho-sugar transferases. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.26 0.70 -4.56 49 713 2012-10-03 05:28:31 2003-04-07 12:59:11 11 28 132 0 514 739 97 195.10 17 45.37 CHANGED thlpphlVlAhDppshpts.pphtss.hhhh.h...........shs.tsphhtstsahphhhh+hplhpplLchGhshlhsDsDshWhcsPhshh.................ssDlhhss.Dthssp.spthpphh....................NsGhhal+uospohphhcphhpphtp..ss..........DQslhshhhpp.hh...........................hshphphLstshF.........shFhp...pp.pthps..............hhlHsshph...shp.....sKhpch ...................................................................................................................t...pphllhshDttshphh....t......th.t........h.hhh....................................tt.hhhst..sahp.hh..............h....h+hp.hl.t.plL...p..........h...G...a...s....hlh.sDsDlh..a..h..c...s..P.hs.hh...............................ssDl..hhss..Dth.t.s..t.......t.tphh.............................................................NsGhhah+s.ss...tsh..p.hhcpWh...p....th..tp.........st........................DQss.hsthht.t.........................................................................................hthphthl.sh.....hh................hh.t...........h.t.....................hp.sh.......t.......tKh........................................................................................................................................................... 0 210 362 473 +1968 PF03314 DUF273 Protein of unknown function, DUF273 Mifsud W anon Pfam-B_3636 (release 6.5) Family \N 30.00 30.00 30.20 30.20 29.20 29.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.72 0.70 -5.04 12 87 2012-10-03 05:28:31 2003-04-07 12:59:11 9 7 10 0 85 68 5 176.00 35 55.45 CHANGED l+CYsKhHsYchllshDs-....ap..Csp.KDphFRRHCllAKlLssaD..slLFLDADhGVVNPpR+IEEal..ccslDIsFasRFhNWEItuGsYlsRNTpaAlshLpcFAsYE.+LPpSaHGoDNGAlHhaluE+lhPpso.clchC+.Khacp....SpsapDLaTYEuCI+slhGssscFs.KlRIL+K...............GT.GWARDGWLTs.hWpsch.DFMlHGWKsspLh.hPp..lts.ph .......................................hpCY.t..tY.h.hh..ttt.................ht...C..p....p..........................hF+RHChh....uthL......p........p....hlhhlDuDhullNPpp..................hlE-al..............p.th.DlhhYsR..a..........h..........NaE....lhuGSYls+NT.auhphLpc.aAs.a.E...pLPp.u.hH.GoDNGAlH...............hhlhchhhPp...p......h......p..C..phapt....otshtshhsapsCh+.hhGttp.as..cl+IhpK...............Gp..uWsRD.WLTsshWs.p..DFMhHuhKppp.................................................................. 0 32 41 85 +1970 PF03434 DUF276 DUF276 Finn RD anon Pfam-B_4450 (release 6.6) Family This family is specific to Borrelia burgdorferi. The protein is encoded on extra-chromosomal DNA.\ This domain has no known function. 25.00 25.00 30.20 30.20 20.50 19.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.74 0.70 -5.44 3 157 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 28 0 12 138 0 260.50 73 99.62 CHANGED MSIVFDSDFGILKRTI+DIVRoKREYLRVNYGINIDDNpSSIYNIIASSLALIEEEIINELNLFFSKMKPGGSYWAAIEEHISS.KSTTYSAVRNALLNL-GIEHsNIKSuAGKANIYLILKEDLLsTsKoNINsPEFKAKlWETLYLTTPSGTLLEGDIEIDGLNSTGQ+KSYKISLGKRKYVYMKVKYKLDLKNYLYLNIDSQIRDIYSRIISNNYhDMGISFEYQDFFAPVNEVKGIKFMEIuICIKDTDTESIoKISDSDFppNQDISIsDDTILLFNTT-RLLIDhD ....MSIVFDSDFGILKRTIKDIVRsKREYLRVNYGINIDDNpSSIYNIIASSLALIEEEIIsELNLFFSKMKPGGTYWAAIEEHISS.KSTTYSAVRsALLNL-GlEasNIKSuAGKANIYLILKEsLLDssKoNINssEFKAKLWETLYLTTPSGTLLEGDIEIDGLNSTGQ+KSYKISLGKRKYVYMKVKYKLDLKNYLYLNIDSQIRDIYSRIISNNYSDMGISFEYQDFFAPVNEVKGIKFMEIusCIKDTDTESIoKIsDSDFppNpDIsIsDDThLLFN.TTDRLLIDh................ 0 8 8 8 +1971 PF03353 Lin-8 DUF278; Lin-8_Ec; Lin-8_Ce; Ras-mediated vulval-induction antagonist Finn RD, Pollington J anon Pfam-B_3924 (release 6.5) Family LIN-8 is a nuclear protein, present at the sites of transcriptional repressor complexes, which interacts with LIN-35 Rb.Lin35 Rb is a product of the class B synMuv gene lin-35 which silences genes required for vulval specification through chromatin modification and remodelling [1]. The biological role of the interaction has not yet been determined however predictions have been made. The interaction shows that class A synMuv genes control vulval induction through the transcriptional regulation of gene expression. LIN-8 normally functions as part of a protein complex however when the complex is absent, other family members can partially replace LIN-8 activity [1]. 25.00 25.00 25.30 25.30 23.00 24.00 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.15 0.70 -5.14 14 108 2009-09-11 05:18:49 2003-04-07 12:59:11 10 17 4 0 106 110 0 233.90 14 66.63 CHANGED lohp-Yhphpppphhphsst...cstlKKVlLshlEcpPphWp..pssphttccWptlG.................................sEVacRTGpl........................lpssplpphappuKssL+p+L+psIhpK+hs+tssEtcLhcWEaYsah+YYRcsLtpaEApLRsc........hptptp.spscD-Ihh-shhp.......p.ppthEpsssss-h.s.Es.ht..sph.........ppsshpcsptsppshs..thsts.p.s............................sps.sushppsSpppps...........htsspp-ss...................sppIs.QspRLhpphPE+s+LlRcsLFcTllAh--t..-apssu-lFpDL.....At.psh++ppR .........................................................................................................................................p..hth.....hlht.lpphsthht..pt.p.h..p..at.lu.................................hphatRTG.h..........................................hpht.lpphappuKptLpp+lp.hl..p.c.p.hs......tphEppL.hp.W.hY..h+aaRp....h.tphEtthptp.......................................................................................................................................................................................................................................................................................................................................................................................pp........................................................................................................................................................................ 0 19 19 106 +1972 PF01709 Transcrip_reg DUF28; Transcriptional regulator Bashton M, Bateman A, Eberhardt R anon Pfam-B_1741 (release 4.1) Family This is a family of transcriptional regulators. In mammals, it activates the transcription of mitochondrially-encoded COX1 [2]. In bacteria, it negatively regulates the quorum-sensing response regulator by binding to its promoter region [3]. 27.00 27.00 27.80 30.60 26.90 26.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.54 0.70 -5.23 257 5704 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 4795 3 1221 3365 2823 228.30 42 95.40 CHANGED SKWsNIK++KutpDu+RuKlFoKlu+EIhlAAKt.G.G.sDPssNspLRsAlp+AKssNhPpDsI-RAIcKu.sG...t.....sussa-ElpY.....EGYGPuG.lAllV-slTDN+NRTsu-VRpsFoKs.GGsLGpsGSVuahFc++Ghlhhs.............tshsEDplh....-..ssl-....A.GA-Dl.................p.........s..-....-..............st...h..plhosssshtsVppuLcp.t....Gh...php.su-lshlP.psplpl...ss.-.......sspph.+LlDtLED.DDVQsVYsNh- .................................pKWsNIcp+Kss......pDup+uKlasK.................hs+EIhlAAK..........t.G..G..sDPcsNspLRhsIc+AKpss.hP+-sI-RAIc+u.sGs........sspsacplpY.....EGaGPu.G....sA.llV-sLTDNpNRTsu.-VRssFsK..............s..G..G..N...lGss.GSVu.a.h.F-+pG.lIshp......................utDt..D.plh....E.tslE....A..Gs.-.DV..............p.........p.-....-............................ss.....h.pl.ho..ss.p-ht...pVpcALc..s...s........Gl...ch..p..suElphl..P..ps.p.spl..........ss.E...stpph.+LlDtLE.DpD.D.VQpVYpNh..................... 0 405 775 1031 +1974 PF03436 DUF281 Domain of unknown function (DUF281) Finn RD, Bateman A anon Pfam-B_4313 (release 6.6) Family This family of worm domain has no known function. The boundaries of the presumed domain are rather uncertain. 25.00 25.00 25.80 25.20 24.90 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.38 0.72 -3.91 10 42 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 4 0 39 43 0 58.60 30 28.03 CHANGED sToDGCophtVpCshNsuh.Cs.............sstLhA-psuGshsssGossushAEuolTCQcDssW.DS ..........tsG.CsphtVpCphssuh..Cs.............sstLhhptsuGt.shsGss.su.p.u.psolTCssDuha...s... 0 20 20 39 +1975 PF03383 Serpentine_r_xa DUF286; Caenorhabditis serpentine receptor-like protein, class xa Mifsud W, Fenech M anon Pfam-B_2888 (release 6.6) Family This family contains various Caenorhabditis proteins, some of which are annotated as being serpentine receptors, mainly of the xa class. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.46 7 69 2012-10-03 04:04:29 2003-04-07 12:59:11 10 2 5 0 69 113 0 139.20 32 47.97 CHANGED phPhlYIhsMslsGllsKls..hhlDhh..s.lh.PstsYtpYRphIGppl.TllsThsYhhPhaLshLMThpRh.IlhpPhcp..hFosp+lhlYshhlhIlshllLLIPahSpCslNFNAps..apsACAPc+HPlThhQNpaLIhlPhsshllNs ..................................hPhsYIhhMhhssh.l.phh....hh..hshh.................h.hh.hs..p......ttY................t..............t...............a.h.......p..h..hGp.h.TlhsohsYhhshhlslLMolpRlh.llhpPhsp..hFosp+lah.YC.h..h.ls...............l.....hsh.h......Ll.IPahSsC.lsFshhphsahosCuP.p+H..............P...lT.hapNpYh.....IhlPhsshhlN............................................... 0 16 20 69 +1976 PF03384 DUF287 Drosophila protein of unknown function, DUF287 Mifsud W anon Pfam-B_2926 (release 6.6) Family \N 19.30 19.30 21.30 21.30 19.10 17.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.64 0.72 -3.78 13 34 2009-09-11 13:33:43 2003-04-07 12:59:11 9 3 3 0 2 34 0 49.60 42 8.73 CHANGED IsSIlpPs.-EKhLLccIh-spsscDDlshhDllVDuWccRLltEcKpIaacsla ..............IpSILtPsh-E+.hLpclh-c.pVDDsls.lD.llDSWccRLlsEcKpIaacsla.......... 0 0 0 0 +1977 PF03385 DUF288 Protein of unknown function, DUF288 Mifsud W anon Pfam-B_3134 (release 6.6) Family \N 19.90 19.90 23.10 20.80 19.80 19.00 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.20 0.70 -6.00 8 118 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 38 0 100 113 35 184.50 29 31.56 CHANGED RTTDIWRSFISQKILHLSGLTVSFVPTNAVQFRNAHsYLKDFKDEKQVYEDSGKMIEFLHNWpCospNSo.lEsCIppLlNDLVKVKLWGc-DApLMEMFLsDLKsMGFEFPpLls.sNah-PYuPSpNEToRDVNCRRMHLEF-Ll-P+Kp..sEsl++ApQKLNYFGDIlsWCNETGYSslospFPSPcQLA+pH-cSYVhQKchNSVLIVVNNYPWKYGMGLIQRLYQPYFATlIFCGSWYPEpFoslDNFTSTlaPINYIHMNPAEIcKGaFAYHCVTLVKELtLsNVpGYFLMuDDsVFNIWQRIDYSRVHHLsG.ShNhpNsWastspaGhpAAK+IlchVKsSTDsKlt-TWpKFDsGLpKauYhN.T-suEspMpSshGKSlSDF .......................sDlhRuahuQ+lLa...G.hhlsFhP.ss.sh.phRssHs.hhhsFp-Ecpla.psGcll......cFL.pWp....st......l........cplhcLshshscpshWsppDhphhthaLpDLhslGap.Pplht........................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 48 63 94 +1978 PF01062 Bestrophin Worm_family_8;DUF289; Bestrophin, RFP-TM, chloride channel Bateman A, Moxon SJ anon [1] Family Bestrophin is a 68-kDa basolateral plasma membrane protein expressed in retinal pigment epithelial cells (RPE). It is encoded by the VMD2 gene, which is mutated in Best macular dystrophy, a disease characterised by a depressed light peak in the electrooculogram [1]. VMD2 encodes a 585-amino acid protein with an approximate mass of 68 kDa which has been designated bestrophin. Bestrophin shares homology with the Caenorhabditis elegans RFP gene family, named for the presence of a conserved arginine (R), phenylalanine (F), proline (P), amino acid sequence motif. Bestrophin is a plasma membrane protein, localised to the basolateral surface of RPE cells consistent with a role for bestrophin in the generation or regulation of the EOG light peak. Bestrophin and other RFP family members represent a new class of chloride channels, indicating a direct role for bestrophin in generating the light peak [1]. The VMD2 gene underlying Best disease was shown to represent the first human member of the RFP-TM protein family. More than 97% of the disease-causing mutations are located in the N-terminal RFP-TM domain implying important functional properties [2]. The bestrophins are four-pass transmembrane chloride-channel proteins [3], and the RFP-TM or bestrophin domain extends from the N-terminus through approximately 350 amino acids and contains all of the TM domains as well as nearly all reported disease causing mutations [4]. Interestingly, the RFP motif is not conserved evolutionarily back beyond Metazoa, neither is it in plant members. 24.20 23.40 24.60 23.60 22.80 23.30 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.74 0.70 -5.47 123 2221 2009-11-19 14:00:48 2003-04-07 12:59:11 16 18 1111 0 1116 1855 146 255.60 24 74.64 CHANGED Mh..........Vpsp................shhclLhph+GSlhptlh........hcllhhhhhshllsh....hh..........................hahhhh..hhphshs.s..lls.sLulhLGFpssssYsRWWEuRphhuthlshsRslsppltsh...........lps.....ssp.............ctchl+ctlu...............asthhptt.LRshsstsc.....hpphl.....ppptp.tlp...st...pts.....shhlhhhhuppl.tps.tcpGp.hssh..........hhttltppLsphpsshuss-RlctTPlPhsYohllphslahahlhlshuh.p.........................slshhs.P.lhoslhsahahGhtplucpL.sPFGp-s.sDlsLstl...I-psltp ...................................................................................................................h.h.hh.h.p...u.o.lh......h..lh..........plhh.hhh.....hhlhh.h...................................................h...hh...hh.p.h..shh....s..........l.ls.....hluh....hL.u..F+ss..ss....as.R............ah-uRphhs.hh.tscslhp.h....hsh..............l.s...ptt..............................................htchh.+h.ls...............as.hhhph...h....L.....R.p....s.t........................................................ht.phl...............tpp.t...h.........tt.....tp.........s.....hlhhhh.t.t........l.....ttt....h...p......p..Gp...hs...sh.........................hh.h.pl..p.........plsphpshhush-+lt.sPlPhsYsh..............hlphslahahhhhPhuhh.............................................slt.hh..s...P.hhosl.hsaha....hu..htt...lupp.ltsPF.G.p........c.s..s..D...l.hstl...I-hsl......................................................................... 0 469 658 954 +1979 PF01724 DUF29 Domain of unknown function DUF29 Bashton M, Bateman A anon Pfam-B_2003 (release 4.1) Family This family consists of various hypothetical proteins from cyanobacteria, none of which are functionally described. The aligned region is approximately 120-140 amino acids long corresponding to almost the entire length of the proteins in the family. Swiss:Q2RPE2, PDB:3fcn, is a small protein that has a novel all-alpha fold. The N-terminal helical hairpin is likely to function as a dimerisation module. This protein is a member of PFam family PF01724. The function of this protein is unknown. One protein sequence contains a fusion of this protein and a DnaB domain, suggesting a possible role in DNA helicase activity (hypothetical). Dali hits have low Z and high rmsd, suggesting probably only topological similarities (not functional relevance) (details derived from TOPSAN). The family has several highly conserved sequence motifs, including YD/ExD, DxxNVxEEIE, and CPY/F/W, as well as conserved tryptophans. 21.60 21.60 22.20 22.10 21.50 21.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.81 0.71 -4.34 89 805 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 164 1 289 1041 50 128.00 32 89.81 CHANGED LY-pDahhWhpppsphL+p.....tch.spLDhpNLlEElEslG+p-+.cplpSpLtlLlhHLLKapaQ...s..p+pspSWpsoIpppRpcIpchLccsP.SLcsaLspt..hp....csYpcAhc.AtpEotlsh.........ptFPtpCPa..sh-plLspsahP ................hY-pDa.hWhppphphL+p........tph.splDhc.sLlEElEshG+p-+...cplpShLthLlhHLLKhpa...........pp.pspuWptsIpptRpplpch.Lpc.oP......S.L+.s.h..ltph.....hp.....csYpc.Ahp.utpcotls................t.FP...pp...sPa..sh.-.plLstpah............................. 0 57 198 269 +1980 PF03442 CBM_X2 DUF291; Carbohydrate binding domain X2 Bateman A, Eberhardt R anon Bateman A Domain This domain binds to cellulose and to bacterial cell walls. It is found in glycosyl hydrolases and in scaffolding proteins of cellulosomes (multiprotein glycosyl hydrolase complexes). In the cellulosome it may aid cellulose degradation by anchoring the cellulosome to the bacterial cell wall and by binding it to its substrate [1]. This domain has an Ig-like fold [2]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.94 0.72 -4.15 41 261 2009-09-13 17:41:48 2003-04-07 12:59:11 9 72 123 1 115 262 11 84.50 29 12.79 CHANGED oIsPoshsFcKsss..sDhslshshN.GNTLsu..lps.ssssLspGoDYol..SG.sslTlppuYLusl......ss...ussoLsFsFusGs...sssLsl ........................hsssshsFc+....ps..s...........s.....Dlslsl..shN..GN.o.lsu............lp...s..sspsLspGsD....YTl.....uG..ss....lTlppuYLusl........ss.....usssLohpFssGs...s.phpl............................ 0 69 95 105 +1981 PF03398 Ist1 DUF292; Regulator of Vps4 activity in the MVB pathway Mifsud W, Coggill P anon Pfam-B_3833 (release 6.6) Family ESCRT-I, -II, and -III are endosomal sorting complexes required for transporting proteins and carry out cargo sorting and vesicle formation in the multivesicular bodies, MVBs, pathway. These complexes are transiently recruited from the cytoplasm to the endosomal membrane where they bind transmembrane proteins previously marked for degradation by mono-ubiquitination. Assembly of ESCRT-III, a complex composed of at least four subunits (Vps2, Vps24, Vps20, Snf7), is intimately linked with MVB vesicle formation, its disassembly being an essential step in the MVB vesicle formation, a reaction that is carried out by Vps4, an AAA-type ATPase. The family Ist1 is a regulator of Vps4 activity; by interacting with Did2 and Vps4, Ist1 appears to regulate the recruitment and oligomerisation of Vps4. Together Ist1, Did2, and Vta1 form a network of interconnected regulatory proteins that modulate Vps4 activity, thereby regulating the flow of cargo through the MVB pathway [1]. 32.50 32.50 33.40 33.00 32.30 32.00 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.90 0.71 -4.84 44 593 2009-09-13 16:20:19 2003-04-07 12:59:11 9 15 281 8 399 584 6 152.10 33 39.47 CHANGED Kot.L+hulsRL+hlppK+pshs+ptR+-lApLLptG+p.................pp.A+lRVEplI+--hhlEshEllElYCELLlsRlsllpp.................ppsssslcEAlsollaAusRh.sEl.ELppl+shhspKa.G+-Fsttshs.t...sssVsp+llcKLs.spsPspchh.thLpEIAcpaslsa .......................................+s.L+hslsRl+hl.ppK+...........p.....thspptR+-lAph.LpsG..+p............................................................ppA+lR....V.........EplI+--.hlpshEllElaC-LllsRhsllpp.................................................cphssslpE...Aluo..llaAusRh.s-ls...ELptl+...shhstKY.G+-Fstt.....shp.t.....tstVspc.lhcKLu.s........psPstthh.phLtEIAcpasl.a.................................... 0 110 222 321 +1982 PF03444 HrcA_DNA-bdg DUF293; Winged helix-turn-helix transcription repressor, HrcA DNA-binding Bateman A anon Bateman A Family This domain is always found with a pair of CBS domains Pfam:PF00571. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.48 0.72 -4.54 13 1056 2012-10-04 14:01:12 2003-04-07 12:59:11 10 4 1032 0 280 1296 281 76.20 43 22.68 CHANGED pLTslQ+-ILpsLIsLYccp.spslKGcEIA-hlsRNPGTlRNQMQuLKuLGLV-GVPGP+GGYhPTscAY-sLslps.s .................................lTpRQppILphII-h.Y..t.po...tcPV.G.S+sL..t..c..p..l..s......S..s....A....TIRN-MucLEchG.L.l.cp......ps.SuG.....RhPo.tuh........hsssh............................ 0 77 176 236 +1983 PF03445 DUF294 Putative nucleotidyltransferase DUF294 Bateman A anon Bateman A Family This domain is found associated with Pfam:PF00571. This region is uncharacterised, however it seems to be similar to Pfam:PF01909, conserving the DXD motif.\ \ This strongly suggests that members of this family are also nucleotidyltransferases (Bateman A pers. obs.). 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.58 0.71 -4.56 86 1219 2012-10-02 22:47:23 2003-04-07 12:59:11 8 171 696 0 561 1623 276 130.00 27 18.27 CHANGED pplsplhtslhtpG....spsptlspllsplsDtlhcRllpLshtpht......s..sssasalshGSpGRtEQhltTDQDNuLlhsD...........tsttptaatphupclspsLspsGashCsGslMAsNPpWptslspWcpphppWlppPs...sc ...................................p....thh.thhtps.h.p.sch..lsp.h...lutl...sc...t...hh...p+ll.c..L..s....tphus...............s..PssashlshGSpGRpEQhlhoDQDNAlllp-.......................tsptpttaFtpluchl...spsLtph.....GashCsGphMssNspWphshptWpphhtpWlttsp......................................... 0 301 440 518 +1984 PF03479 DUF296 Domain of unknown function (DUF296) Bateman A, Dlakic M anon Pfam-B_796 (release 7.0) & Dlakic M Domain This putative domain is found in proteins that contain AT-hook motifs Pfam:PF02178, which strongly suggests a DNA-binding function for the proteins as a whole. There are three highly conserved histidine residues, eg at 117, 119 and 133 in Swiss:Q46QL5, which should be a structurally conserved metal-binding unit, based on structural comparison with known metal-binding structures. The proteins should work as trimers. 25.00 25.00 25.80 25.00 24.60 24.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.39 0.71 -4.49 113 1687 2009-11-26 14:38:53 2003-04-07 12:59:11 10 15 1088 11 617 1176 262 116.50 29 57.50 CHANGED hcsallclssGp.................Dlhpslpp...aupccshtsuhl.ouhG.ulsssslph..........s.......tp..h..sshsh.cuphEIlSLsGolh..........s.t.tsshsHLHlslus.spGplhGGplh.Gsl.hssu-lhlhthshtthpp ..................................h.p.aslcltsGp.................Dlhpplts....Fup...p...p.p.l.p.s.shl.uusG.ulssl..sL+h...............s............................sp...tsshpl....pG.p.aEllSLsGolt.....................s.st.....hH.LHlsl.....u...s...s.p.........G...p......s..lGGHl...h.......G..sl..tsssElhlhshs.h....h........................... 0 140 390 516 +1985 PF03537 Glyco_hydro_114 DUF297; Glyco_hydro_114; GHL7; Glycoside-hydrolase family GH114 Griffiths-Jones SR anon PRINTS Family This family is recognised as a glycosyl-hydrolase family, number 114. It is endo-alpha-1,4-polygalactosaminidase, a rare enzyme. It is proposed to be TIM-barrel, the most common structure amongst the catalytic domains of glycosyl-hydrolases [1]. 27.00 27.00 27.10 27.00 26.30 26.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.46 0.72 -4.19 128 403 2012-10-03 05:44:19 2003-04-07 12:59:11 8 12 334 6 208 401 21 78.40 24 25.17 CHANGED ssshhpP..sss......ssWpaQLs.........us..........h.sss...........sss......slhslDhh.c.............ss.....tttltt.L..+sp...G....+tlICYhSuGshEs......aR....s.D.t.....s....p....hsts .................................t.........t.s......tsath.Ls..........st..........h.pss.................ssh...clhslDhh.p........................hs.....tppltt...L..+sp..G..+hVlsYlssGshEs....aRs..h....pp.a...s................ 0 87 139 182 +1986 PF03618 Kinase-PPPase DUF299; Kinase/pyrophosphorylase Finn RD, Eberhardt R anon Pfam-B_3403 (release 7.0) Family This family of regulatory proteins has ADP-dependent kinase and inorganic phosphate-dependent pyrophosphorylase activity [1-3]. 22.10 22.10 22.40 23.20 21.00 19.90 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.63 0.70 -4.89 121 2713 2009-09-10 22:59:03 2003-04-07 12:59:11 9 2 2465 0 532 1496 1541 253.70 38 92.63 CHANGED lallSDuTGtTA-slu+AsluQF.......shp..h.phpphPalcspcplpcllpplpp...psu......llhaTlVssclpphlpppspthth.tlDlhsshlsslpphLGh.........ps.p....tpsGthHplscs....YacRI-AlpFulpaDDG.psscslpcADllLlGVSRouKTPsSlYLAh.pGlKsANlPLls...-.......splPcpLhph...ppKlhGLTIsP-RLtpIRppRhhsht...................upYushcplppElphAcplac+..uhshI-sTp+SIEEsAstIl .....................lahlSDuoG.TAEhlu+AshuQF................................s..sp.....h..phhph..Palcsppchcp.llpp.lpp........ppu........lVhaTlVps-l+phlt.p.u.p.t.h....shDlhpslls.lppphth........................cP.p.....ps.u.tsH..t..L..sss....YapRIsAI-FultaDDGt.s.s.+.s.ltcADllLlGVSRouKTPhSlYLAh.hGl+sANhPlls...-.......lslPspLh..th.......pcK.lhGLTIsP-+LspIRc.ERhps..................................opYAshcphppEls.s-plhc+..shshIssTspSlEEhAspIl................ 0 164 320 430 +1989 PF03625 DUF302 Domain of unknown function DUF302 Yeats C anon Yeats C Domain Domain is found in an undescribed set of proteins. Normally occurs uniquely within a sequence, but is found as a tandem repeat (Swiss:Q9X8B8). Shows interesting phylogenetic distribution with majority of examples in bacteria and archaea, but also in in D.melanogaster (e.g. Swiss:Q9VA18). 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.08 0.72 -4.40 170 1152 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 862 4 381 987 109 63.90 27 39.52 CHANGED clDhptshcptshp...........hpshpllhhsNPphupphhpts..plGhhLPp.+llVhc......psGpshlshhcP ..............................lDhttshpptshp.....................htshpllhhsNPp.......hus.hhp..tc..sp..lul.LPh.+lllhct...........ssGpshlshhp.................... 0 118 258 338 +1990 PF03629 DUF303 Domain of unknown function (DUF303) Finn RD anon Pfam-B_3622 (release 7.0) Family Distribution of this domain seems limited to prokaryotes and viruses. 21.20 5.70 21.30 5.80 21.10 5.40 hmmbuild -o /dev/null --hand HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -12.04 0.70 -5.16 136 2378 2012-10-02 11:02:24 2003-04-07 12:59:11 13 60 784 11 391 2310 644 279.90 27 56.17 CHANGED G-VWLsoGQSNMp.....h.ht............shts.sp..............pphtps..sp.PplRhh...ph........t..s.p............s.tp.............spWp.sostss....t.t.....h......................oAsuaaFu+pLtpp.l..slPlGLIs.suhGGoslE.....sWhstp..sh.tt..th....t.htt............t.....................................................................................................................................................................................................ssLYNuMlpPlt.sh................sl+GslWYQGEuNs.....t.t..................pYtphhsslIpsWRppa........s.......ps..-hP..FhhlQLu...........sahtt.............................st.ths.lR-sQ.tpshtpl.....sNsuhs...............sshD.....h...up.psI...............HPpsKpplGcRLAhhAhph ...............................................................................................................................................................................................................................................................................................................VhlhuGQSNh...........................uhup.uh..................s-hhcu......sp..PpI+.l..................pp.shp.....................s...............tt..........spapshhPtst....s.c..................h...........................su..uh.a.hA+...cLhshl.............sss.lhLls.sshGGSuhp.....shspss.........th..ts..s..s......t.................t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................psLY..ps...hlsts+..uh.................slhull.WhQGE.Dh....sss........................pas...t...h...Fss...hlppaRt.-.h........................................s..........ts.......slP.ahhs.phs............ahppp....................................h...........................................................................................................................................................p.................................................................................... 0 191 307 348 +1991 PF03733 DUF307 Domain of unknown function (DUF307) Yeats C anon Yeats C Domain Domain occurs as one or more copies in a small family of putative membrane proteins. 21.20 21.20 22.60 21.60 20.70 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.56 0.72 -3.53 122 2967 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1534 0 655 1690 93 52.80 40 43.34 CHANGED hlhNllWhl.huG.halulualluullhslTIlG.......IPaGhtsa+lushsLhPFG+p ....hlhNIlWhl.hu.G.aahsLual.lu.ul.....ltsloIIG........IPh....Ghusa+lu.tluLhPaGpp......... 0 171 375 544 +1992 PF03729 DUF308 RUF1; Short repeat of unknown function (DUF308) Yeats C anon Yeats C Repeat Family of short repeats that occurs in a limited number of membrane proteins. It may divide further in short repeats of around 7-10 residues of the pattern G-#-X(2)-#(2)-X (#=hydrophobic). 25.00 15.00 25.00 15.00 24.90 14.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.70 0.72 -3.98 354 6171 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 1945 0 1223 3772 186 64.10 19 81.07 CHANGED lhGllhlllGllslhhPhhsshsl....s.hllGhhhllsGlhpl.h.tshp.....pph.t....shhhhllhGllhllhG.lhllh ............................ullhllhGll.hl.h.t..Phh..uhh...sl........s.hl....lu...h...h...hllsGlhpl..h..tshp.......pph.................hh.hh.uhh.hhhuhhh..h.................................. 0 391 835 1043 +1993 PF03745 DUF309 Domain of unknown function (DUF309) Bateman A anon COG1547 Family This domain is found in eubacterial and archaebacterial proteins of unknown function. The proteins contain a motif HXXXEXX(W/Y) where X can be any amino acid. This motif is likely to be functionally important and may be involved in metal binding. 21.20 21.20 21.50 23.40 19.00 21.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.08 0.72 -4.52 100 762 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 716 5 231 570 211 62.40 35 37.77 CHANGED sthptulphasptcaacuHEsLEthWh....pssss..c+phhQGllQlAVuhhH.hpcGNhpG...AhtLh .........h..lphhhphas.ctcYFpCH-lLE-.hW+....ppssh.....ccshlhGLIphAsuhaH.hcRsNhpGAh+h................ 1 68 158 208 +1994 PF01732 DUF31 Putative peptidase (DUF31) Bashton M, Bateman A anon Pfam-B_2152 (release 4.1) Domain This domain has no known function. It is found in various hypothetical proteins and putative lipoproteins from mycoplasmas. It appears to be related to the superfamily of trypsin peptidases and so may have a peptidase function. 21.30 21.30 21.50 21.40 21.10 20.70 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.49 0.70 -5.13 40 325 2012-10-02 13:45:52 2003-04-07 12:59:11 11 4 69 0 65 293 0 366.40 20 54.21 CHANGED s-hY+phhcpTFulpasstpp...............................................................p...............................................pttGTuWllDYptsp.........................sspss......hphYlATNlHVhstLhst.s.......................................................................................................................shpspspshsLs+hspssshpsphspsp..........ppsptphshhsts.........................................ssKplasAsDFlpp..ssp.h.t..........................................pphpphp.shstasDFAVlElchph.p.........................sspphhchlsss...................atthpsptpp......hphhs......pphhpp..htph..........................................hahlGY.....Psspss..............................................shahststth.tp.t.........sththsphptss..hs.p.thpsh.spsulscshlst............................t.....ppYtpaGhshhhpph.shtuGuSGShlhspssp....llGI....aau ..............................................................................-hYcphh.poaulphhs.ts..........................................................................................................................tt................................................tt..ts.GTsWIlDap..h...sp..........................s.s.c.s......h+aYhATNhHVhs.h......................................................................................................................................shpspstshsls+hpsss.p.hppphphss................ppph.phhhhp.p.....................................................p.......sh+tla..suh.D..F.lpp..pstphht....................................................ppppphp...shstasDFAVlElcaph...........................................ssppltchlsss.......Yt..sppppp......hpahs...........pphhpp...htph................................................hahhGY.....Psspps.............................t....................tpp...........Slahsts..ph.pp.h.............sth..tphptss...hp.t.thpsh..spsulhcshlst...........................................................h...hthGhphh..pph..s.tuGuSGS....lh..........sppsp....llGIaa................................................................... 0 47 61 63 +1995 PF03750 DUF310 Protein of unknown function (DUF310) Bateman A anon COG1421 Family This family contains a number of archaeal proteins that are completely uncharacterised. The proteins are between 130 and 160 amino acids long. Their C-terminus contains several conserved residues. 24.60 24.60 27.40 28.50 23.90 23.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.56 0.71 -3.91 10 220 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 215 0 73 182 9 122.60 25 86.48 CHANGED hlsshsc.ssptLlcpAE+lG..chLuc.ssLsToQIRclaspV+s.....I-pcscshpp.......lppcLhhLKPKLAYpsGRhp........tulcsLhElLccslcpl.....scchc+..FcpFhcFFEAIVAYHRaYGG+p ....................................hptt................t..h......h.....t..................p..........l.oToQlRplhsh...l.sp.............lt...schppptp.t................pltpc...l...phL+s+hsYpsGRcp................sl+sF.h...c..h..l..p..csl..-.....tl................tcs.tcp......h.tcFscahEAllAY+Kahs............ 0 32 46 62 +1997 PF03759 PRONE DUF315; PRONE (Plant-specific Rop nucleotide exchanger) Finn RD, Wood V, Mistry J anon Pfam-B_3610 (release 7.0) Family This is a functional guanine exchange factor (GEF) of plant Rho GTPase [3]. 19.00 19.00 21.10 20.00 18.90 18.60 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.27 0.70 -5.91 16 263 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 22 6 163 260 0 316.50 52 68.73 CHANGED tt.u-hEhMKERFuKLLLGEDMSGuGKGVsoALAlSNAITNLuAolFGpph+LEPhssE+KshW+REh-WLLSVsDaIVEFsPotQph.sG.sphEVMsoRsRoDLahNLPAL+KLDsMLl-hLDuFc-.TEFWYl-ps.............ttuu.cp..spRp-EKWWLPsP+VPPsGLSEpuR+pLppp+-ssNQlLKAAMAINuplLuEM-IP-SYlEoLPKsG+uuLGDsIYRhl.Ts-pFoP-plLssLDLSSEHcsL-lsNRlEAShalWRRKhpt+..........ssKS..oWustVp.h......h-KpclhtERAEoLLhpLKQRaPGLsQooLDhoKIQYNKDVGpAILESYSRVLESLAaslhuRI-DVLasDchs+pps ..........................s-hEhMKE+FuKLLLGEDMSGuGKGVsoALAlSNAITNLs................AolFGp..h+LEPhss-+KshW+REh-WLLSVs....DaIVEhsP.ohQph.sG.sphElMss+.RsDlhhNlPAL+KLDsMLl-hLDuFpc..sEFWYlcps........................tsss.ct....s.pRp--KWWLPsPpV.PssGLS-tsR+.Lppp+-sssQlLKAAMAINuplLsEM-lP-sYh-oLPK.........s....G...........+usLGD.lY+hl...Ts-pFsP-plLss.....l-...........LSoEHpsL...-lt...NRlEAuhhlW+++hp.pp....................ps+o.....oW.shVp............h-..Kp.ph.htcR...AEslLhhLKpRFPGLsQosLDhsKIQaN......+.............DVGpuILESYSRVLESLAaslhuRI-DlLhsDphspp.s.............................. 0 21 94 128 +1998 PF03773 DUF318 Predicted permease Bateman A anon COG0701 Family This family of integral membrane proteins are predicted to be permeases of unknown specificity. 24.50 24.50 24.60 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.98 0.70 -5.44 12 3218 2009-09-14 00:13:01 2003-04-07 12:59:11 8 17 2210 0 803 2344 347 274.70 22 88.25 CHANGED shlshh.slhh.hhlcsl.hLlhuhhluuhIpshlscphlhchLsc..s+hhuhhluulhGhlhshCsCuhlPlhtthhcpGsshssAhsFLhuuPhlN.lslhhshhshG..hplshhRllsuhlhulllGllhphhhptp........................hlhshp...............................h.hls.sph.hsshh.+ht.shh.....cshs.hhphh..hLllGshIAuslpsal.Ppshlhs.hGt.....sslhushlhhlluhlh.hsohs-sslstuLhpt.huhGsslAhLhhGshlsl.shhhlhphhtt+hlhhhlshlsl.shlsuhlhsh .....................................................................................t...t...thh.h....hhlc.h.h..hll.l.uh....hlu..uhlpshls.......c...h..lh+h.L.sp.....sp...h..h...u..h.hh.us...h....hGhhhPhCpCuslPlhtuhhp.ptsshs.s.sh.s.FhhsuP..hl..N.P.h.slhhh.hh.shG......hp..hshh+....h..lsullhullhG..h...l.lthhhtpt............................................................................................................................h.............................................................................................................................t....h.....t..t...p...h.....t....t..h...t..h.hhh..........psh.p..hh.p.h.h...alll....Gsh...l.uu..........h..........h.......psa..l....Ppphhts...hut.........................ss.h.h....ul.hlh....h..lluhhh..l.sutush.ls....t....u.h..h...tt...hu.h.ushlAhlhhssh.lsl.plh.hh.pphht.h....+..hlhhhh.s.hl..hh.s.shl.huhlh..h........................................................................................................................................ 0 295 561 699 +2001 PF03778 DUF321 Protein of unknown function (DUF321) Finn RD anon Pfam-B_876 (release 7.0) Repeat This family may be related to the FARP (FMRFamide) family, Pfam:PF01581. Currently this repeat was only detectable in Arabidopsis thaliana. 20.60 20.60 24.40 21.10 18.00 20.00 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.13 0.72 -6.26 0.72 -4.48 32 105 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 1 0 2 105 0 19.10 66 55.12 CHANGED LRFWRENHGFoFLAtK.pla LRFWRENaGFTFLAGK.pVY. 1 2 2 2 +2002 PF03780 Asp23 DUF322; Asp23 family Bateman A anon COG1302 Family The alkaline shock protein Asp23 was identified as an alkaline shock protein [1] that was expressed in a sigmaB-dependent manner in Staphylococcus aureus. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -3.78 173 4865 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1784 0 734 2174 33 105.20 28 72.18 CHANGED phGplpIuscVltpIsGlAup-l.GVhuhsus...hhsslschl.sp...cshs+GVpV.chsp.ctplslDlhlllcYGssIsclupplpcpVcpslcpMTGLcVspVNlpVpslp ..........................................................p..GplplsscVltp..I..sGhAsp.cl.Glhuhsut........hhsslt.c..hl..sp..........psh..s...+GV...p.V....ch..........s....cpplsl.DlhlllpYG.s.plsclupplpcpVppslcphT.ulp...l.s.pVNlpVpsl.................. 0 308 520 657 +2003 PF03781 FGE-sulfatase DUF323; Sulfatase-modifying factor enzyme 1 Bateman A, Wood V, Mistry J anon COG1262 Domain This domain is found in eukaryotic proteins [1] required for post-translational sulfatase modification (SUMF1). These proteins are associated with the rare disorder multiple sulfatase deficiency (MSD) [2]. The protein product of the SUMF1 gene is FGE, formylglycine (FGly),-generating enzyme, which is a sulfatase. Sulfatases are enzymes essential for degradation and remodelling of sulfate esters, and formylglycine (FGly), the key catalytic in the active site, is unique to sulfatases [3]. FGE is localised to the endoplasmic reticulum (ER) and interacts with and modifies the unfolded form of newly synthesised sulfatases. FGE is a single-domain monomer with a surprising paucity of secondary structure that adopts a unique fold which is stabilised by two Ca2+ ions. The effect of all mutations found in MSD patients is explained by the FGE structure, providing a molecular basis for MSD. A redox-active disulfide bond is present in the active site of FGE. An oxidised cysteine residue, possibly cysteine sulfenic acid, has been detected that may allow formulation of a structure-based mechanism for FGly formation from cysteine residues in all sulfatases [4]. In Mycobacteria and Treponema denticola this enzyme functions as an iron(II)-dependent oxidoreductase [5,6]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.91 0.70 -5.03 22 4674 2009-01-15 18:05:59 2003-04-07 12:59:11 11 147 2003 22 1863 4765 2666 226.90 22 55.13 CHANGED pssshVhlsGGsFth...Gs.....................tspEsPh+phpl.psFhIsch.VTstpatpFlpt...ssh................................................h....ht.hppspt......ttsspPVspVoa.DAhAYApWhuc.......................................RLPTEsEWEhAARuG....tuphasaGsphts.....................t.stttsstsstpt.ssPVssass..NshGLhDMhGNVWEWTsshap...........................t.s....sssh+Vl+GGuahst.....sthhRsuhR.s.t..tssstssplGFRssp .........................................................................................................h.....h.l...s...G...h.h....G..........................................t....-....ts......c..p..lpl..ps...F..hhsph.V..T..stpa...t...tahpt........st............................................................................................................................................................................................................h.......p..........................................................................t............t...........pt..P......V......h..pVoa.-.At.s.....a.spWh.u..p.............................................................................................................R.....LPTEsE......W....E....h....A...A......+.u..s.....................t..t........h.....h..s.s.p.............................................................................................t..................t..t...........h.....s.....t....s...V...s....t.h.s.................s.sh..G.....l.a.-.hhGNVaEWst.s..at............................................................t................................t....s..t........t..l....h.+..G...Gu..ahs..........sth...hRss...hR....t................t.......t...........t....h.GFRhs........................................................................ 0 719 1285 1627 +2004 PF03787 RAMPs DUF324; RAMP superfamily Bateman A anon COG1332, COG1567, COG1367 Family The molecular function of these proteins is not yet known. However, they have been identified and called the RAMP (Repair Associated Mysterious Proteins) superfamily. The members of this family have no known function they are around 300 amino acids in length and have several conserved motifs. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.47 0.70 -4.49 154 1749 2012-10-01 21:23:39 2003-04-07 12:59:11 10 11 493 0 736 1663 29 234.30 14 68.48 CHANGED plpsloP.la......hGsu........................................................................................................................tssstshl.uool+GslR.....hhhc..................thht.........................t.tt.tttt..t.t..............................................................................................................................................................................................h.splF.....Gss........................................shtupltlpss.................................................................................................................................h..hh.hththptphthspt.t..................ap..hchh.tt.......................................................................tt..ppttphhtphLphh..........................thhslG....uppp.......hG.hGt..hp ............................................................................................................................................................................hhos..lhlGsu...................................................................................................................................................................................h....hp.t.h.t..s.......h.P...hIPG...SSlKG.tlR................shhp.............................................................................................................phht....................................................................................t......................................................................................................................................................................................................................................................................................................................................................................hhpplF.............Gsp..........................................s...htu...plhhp....D.s..h.h....................................................................................................................................................................................................................................................................................................................h..t...h..h.t..hp.h..p..h..t.lsp...pttst..........................................hp.....hchl.tt........................................................................................................................................................t....t............p....t.........h...t....h.hht.hhphh......................................t....htlG..utts.hGhGhh............................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 388 536 643 +2005 PF03804 DUF325 Viral domain of unknown function Griffiths-Jones SR anon PRODOM Family \N 21.50 21.50 21.90 127.20 20.10 19.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.31 0.72 -4.26 6 14 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 14 0 0 14 0 72.40 66 48.77 CHANGED l+FSLRLTpEaKENIVAHlcHLsRLRALIDGKVTpADVRRFGFsDRNALVuACMsVNVQsYs.PDuTI..RhQP lKauLRLTpEYKENIlsHhDHLoRLRsLIDGhlpspDVpRFshhsRNsLlSACMplNVpsYh.PsuTIDMRhQP 0 0 0 0 +2006 PF03885 DUF327 Protein of unknown function (DUF327) Bateman A anon COG1728 Family The proteins in this family are around 140-170 residues in length. The proteins contain many conserved residues. with the most conserved motifs found in the central and C-terminal region. The function of these proteins is unknown. 25.80 25.80 26.50 34.10 24.90 25.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.80 0.71 -4.37 51 443 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 442 2 118 290 4 147.00 26 98.51 CHANGED M+Ip............t.ppht.tshtpsptcpst.ss..psFtphh.sptcpcttp...........ccLppllscIcctGp+LscshshpsltpYKchlKcFLcpslppshplcpppuh....s.t..ps+haplVcplDc+LpcLsp..pllspc+cslsllspIsEIpGLllDlah .......................................................t......t..tt..t.ct..sp.sp.....tsFspph..sptcccph.................-plpphlcpIsctGc+LtppholcslhpYKphVKpFLp.hlcsshphccppuh................p.th..psphhslV+pl-pcLp-Lsc..pllsscpsplclLspIsEIpGLllslh... 0 54 92 102 +2007 PF03883 DUF328 Protein of unknown function (DUF328) Bateman A anon COG3022 Family Members of this family are functionally uncharacterised. They are about 250 amino acids in length. 24.60 24.60 24.70 24.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.57 0.70 -5.08 127 2603 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2522 0 474 1849 1083 236.40 36 94.32 CHANGED M.LlllSPAKsL.chp..ss..ss...tpho....pPphhcpuppLlchh.+plos...pclupLM.plS-cLAtLNhtRapsa....p...t..ps....u+...AlhAFs.GDVYpGL-ApoLspp.shsaAQp+LRILSGLYGlLRPLDhhpPYRLEMGT+Ls..s.pGpsLYpFWGsploptLspthtt........splllNLASsEYaKulc..ptl.pspllossFp-.K..sGp..hKlISaaAK+ARGhMsRallcpplsss-p..L+sF....shsGYtastphS ...................................MhIllSPAKshshp............ss....sh............tph.o..........Pph.h...sp...opp.L..l.cth....cpl.os.............splppL.h.plS......-.clAshNhtRapsap.................t.ps.....up.....AlhuFp.GslYp..uLps.cs......h..ocs...-h......casQpHLRlLSGLYGlLRP.L.Dh.hpPYRLE..MGs+..Lt..........sp....G....+............s...LYpFWssh..l...o.ptL.sctht..................cplllNLASsEYh+slcs......tpl.......ps..........c........llss.h.F...h.-pK.......sGc.....hKllS........aaAK+ARGhMsRall..c..N..p..l..p..p..s-p.Lp.sF.......shtGYhastp.S........................... 0 130 290 396 +2008 PF03884 DUF329 Domain of unknown function (DUF329) Bateman A anon COG3024 Domain The function of this short domain is unknown it contains four conserved cysteines and may therefore be involved in zinc binding. 19.80 19.80 19.80 19.90 19.70 19.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.34 0.72 -4.41 121 1426 2012-10-03 05:12:49 2003-04-07 12:59:11 9 3 1418 1 310 792 215 59.70 48 90.21 CHANGED h..s........VpCPpCt+tl.Wt.ps.ta.RPFCScRCphIDLGcWAsEcapIPup........ss-t.sp-p ......sl....VsCPsCGKsV......sW......s..p.s..sa..RPFCScRCp.LIDLGcWAuEEppIPuss-.....-sDt........................... 0 71 164 240 +2009 PF03886 DUF330 Protein of unknown function (DUF330) Bateman A, Sammut SJ anon COG3009 Family The proteins in this family are uncharacterised. The proteins are 170-190 amino residues in length. 21.00 21.00 21.10 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -4.85 95 1859 2012-10-01 20:48:06 2003-04-07 12:59:11 8 5 1526 8 438 1301 80 157.30 22 77.37 CHANGED YpL.......ssssssssssssss.....lhlssVslsshLspsplVhpp..ssspl.phscpspWAssLsptlppsLspsLspphssttlss.s.........ssssshplpls..lpcFcut....ssspshlsup..Wplhs...tp.......sp..........shss+s.hphptshsus.u....hsulVsAhspul.spLuppIA ............................................Y.L........stssstsssstst........hhLhlt.pVsls.c.hLs.ss..s..lV.hps......sss.p.h..shh.p..sshWAssLsptlpsp..LsssLspp....hsshslssts............hsssphpLpls....l..pp....F....pup................su..p....s......h...l...s..up....ahLhp.......p....................ut................lhpRs.Fp..l.pts.hs.ps..s........hss..hV.pu.hspuh.sphuspIs................................................................. 0 105 240 342 +2010 PF03889 DUF331 Domain of unknown function Bateman A anon COG3036 Domain Members of this family are uncharacterised proteins from a number of bacterial species. The proteins range in size from 50-70 residues. 25.00 25.00 25.90 25.20 19.10 18.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.80 0.72 -4.67 20 635 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 633 0 89 304 9 39.20 62 53.07 CHANGED GsI+.DNALcALVTSpLF+pRVEKsKKGKGSYpRKs......K+pG ...........GpIKDNAlcALl+DPLFRpRVEKsKKGKGSYpRKu......KH................ 0 9 28 58 +2012 PF03891 DUF333 Domain of unknown function (DUF333) Bateman A anon COG3042 Domain This small domain of about 70 residues is found in a number of bacterial proteins. It is found at the N-terminus the of Swiss:O28332 protein. The proteins containing this domain are uncharacterised. 25.00 25.00 26.20 25.40 23.10 24.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.61 0.72 -4.16 57 1338 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 848 0 141 468 11 48.40 44 56.10 CHANGED luhA.NPA.......usaChptGGplpltppspG...phGhCpLPsGcthEEWsLaRppp ....................thsNPA.......us.CtphGGoLtsspph.p.G...shuhCtLP.sGcph-EWuLhRtsp......... 0 22 53 100 +2013 PF03904 DUF334 Domain of unknown function (DUF334) Finn RD anon DOMO:DM04800; Family Staphylococcus aureus plasmid proteins with no characterised function. 21.40 21.40 21.70 22.20 21.20 21.30 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.64 0.70 -5.03 2 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 34 0 1 40 1 157.90 31 83.28 CHANGED .hthQ.h.S.cs.KpNcps.NhQhpSK.TGT.SpphQpoLpNEELpELK+QNKLllKYluEhpEpQclREKE.ptIpSpLKcsTcDFps+uhKl+NDFVclLQcpLp+VssE-lc..lt+slYtVREE.cpMLpEVKpSHEHYQpRQK.LFTGIGAMLLVFMLFALIMTIGpDFMuFLHVDsLQNAIAuKlKASEGFhohlWYIAYGLPYlLAIGLFIhLYEWIRA+FHD .........................................................................................................................p...pp.pth....phppTtccFh.p..s.phppDFlchlpcpLpcl..ss-..php.t..hpp-.lhc.p...pE.cphLp-.V+p...uhEchpppppplasuhsuhLhVFhlFsllMhhG.Dhh.............................................................hh.................................... 0 0 1 1 +2015 PF03928 DUF336 Domain of unknown function (DUF336) Bateman A, Moxon SJ anon COG3193 Family This family contains uncharacterised sequences, including several GlcG proteins. The alignment contains many conserved motifs that are suggestive of cofactor binding and enzymatic activity. 20.70 20.70 21.20 21.30 20.30 20.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.48 0.71 -4.35 36 2508 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 1635 13 741 1913 457 128.80 27 74.31 CHANGED sslshpsAh....phspAAhttApphuht.lslAV.VDt.sGphls.hhRhsGAshtosclAtcKAaTAsshtt.....sTtphsptlps.ssshhtshsth.....sphshhGGGlPlh.hcG.tl.lGulGVSGs.osppDcthAptulpAl .............................................................................................lohptAp..plht....suhpp.....Ap.c.........s...h...lslul..lDs..sG..p.hls..h.h..R..h....-s.u..s.....h.s..sclutcKA.hTAst.hcp...................................sottht..p..h....s..ps....sts.hhshtsth....................sthsshuG.GlPlh...hcG....p.........l....lG.ulGVSGs..sscpDtplApsult..h...................................... 0 202 445 612 +2016 PF03929 PepSY_TM DUF337; PepSY-associated TM helix Bateman A, Yeats C anon Yeats C Family This alignment represents a conserved TM helix found in family of bacterial proteins. The previous DUF337 alignment covered the whole (or most) of the protein. Analysis with dotter (E Sonnhammer) indicated that the same region was represented multiple times within the old alignment. 21.50 19.00 21.50 19.00 21.40 18.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.17 0.74 -7.28 0.74 -3.18 216 157 2012-10-01 23:59:14 2003-04-07 12:59:11 11 13 141 0 57 868 51 35.30 47 13.52 CHANGED hhplHp.................hhshhsshhhllhsloGlhhh .................sDLHKGRss........GssWtWhlDlhAlhhllFslTGlhLL....... 0 14 29 44 +2017 PF03937 Sdh5 DUF339; TPR_div1; Flavinator of succinate dehydrogenase Bateman A, Yeats C, McNeil M, Eberhardt R anon COG2938 Domain This family includes the highly conserved mitochondrial and bacterial proteins Sdh5/SDHAF2/SdhE. Both yeast and human Sdh5/SDHAF2 interact with the catalytic subunit of the succinate dehydrogenase (SDH) complex, a component of both the electron transport chain and the tricarboxylic acid cycle.\ Sdh5 is required for SDH-dependent respiration and for Sdh1 flavination (incorporation of the flavin adenine dinucleotide cofactor). Mutational inactivation of Sdh5 confers tumor susceptibility in humans [1]. Bacterial homologues of Sdh5, termed SdhE, are functionally conserved being required for the flavinylation of SdhA and succinate dehydrogenase activity. Like Sdh5, SdhE interacts with SdhA. Furthermore, SdhE was characterised as a FAD co-factor chaperone that directly binds FAD to facilitate the flavinylation of SdhA. Phylogenetic analysis demonstrates that SdhE/Sdh5 proteins evolved only once in an ancestral alpha-proteobacteria prior to the evolution of the mitochondria and now remain in subsequent descendants including eukaryotic mitochondria and the alpha, beta and gamma proteobacteria [2]. This family was previously annotated in Pfam as being a divergent TPR repeat but structural evidence has indicated this is not true. 20.90 20.90 22.30 21.70 20.70 19.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.31 0.72 -4.29 149 1961 2012-09-24 13:32:29 2003-04-07 12:59:11 11 9 1889 6 589 1180 1046 73.20 35 68.49 CHANGED cppRLhacu.RRGhhEhDllLssFhcpph..ssls-pchptac.pLLsts..D.DLapWlhsp........shPp....chph......tllpplp .........p+tRlpaps.RRGMhElDlllhsFh-c..p..h..ssLo-s-hphF..cLL.-..ps.........D..sDLasWlhs+........spPs....chcht...thlchI............................ 0 158 312 458 +2018 PF01784 NIF3 DUF34; NIF3 (NGG1p interacting factor 3) Bashton M, Bateman A anon Pfam-B_1006 (release 4.2) Family This family contains several NIF3 (NGG1p interacting factor 3) protein homologues. NIF3 interacts with the yeast transcriptional coactivator NGG1p which is part of the ADA complex, the exact function of this interaction is unknown [1,2]. 23.40 23.40 23.70 23.70 22.10 23.30 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.86 0.70 -4.80 122 4441 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 4063 30 1020 3147 628 264.00 27 92.17 CHANGED clhphlcphhPhphu..p.sa.Dss.............................GL....ls............pp..........................plp+lhlslDso.pllccAlp......ts.sshllsHHP...l.ha................pshcplssp......htphlhpLlc.s...........sIslauhHTshDss.s.GhNshLA.chL.....slp.........shpsht......................................................................................................................thuhGhl.Gplsp.s....hohpphhthlpppl.th.........tt......lphs.........shspplp.....+VA..lsuGuGs..phlt..pAhptss..D..lalTG-l...........paHsshpA..ppt.....G......ls..llssGHatoEphhhpt.....ltchLppp.ht.......................l .................................................................................lhphlpp.hhs.phtp..sa..D..s..s..................................GL.....plt.......thcp..........................plp.+....lhsul-soptll-cAlc......tt..sD.hllsH........Hs.......h.ha..............................+s.h.p...s.lt.ss........ptphlppLlp.p.....................................cIslauhHTs..l.......D..........s.......tst..GsNshLA.chL.............Glp..........shp.lp.........................................................................................................................................t.ulGhl.Gplsp...s.........hohtchspplcptL..sh.............ps.......................lchs....................s..sssp...lp.......+VA....lss...GuGp........shlp..pAh.pt.Gs.....D....salTG-l...........................pa+sspsA....hpp........G...............ls......hlssGHatoEph.hhpt.....lschLppt...tt..................................................................................................... 0 349 670 874 +2019 PF03956 DUF340 Membrane protein of unknown function (DUF340) Bateman A anon COG2431 Family Members of this family contain a conserved core of four predicted transmembrane segments. Some members have an additional pair of N-terminal transmembrane helices. The functions of the proteins in this family are unknown. 23.80 23.80 24.00 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.34 0.71 -5.01 43 1369 2012-10-02 17:06:44 2003-04-07 12:59:11 8 2 1098 0 227 767 126 173.70 40 69.66 CHANGED llsslllGhllGhhhhss.....hth..sphsphhLhhLlFhlGlpLtpsshsl+plh.ls++slhlslhslluollGGllsuhlL.slslppuLAluuGhGWYSLSGlllschhus........hhGulAFlsslhREllulhhlPllh+..hsp.suluhuGAToMDsTLPlIppsuGhcssslAlspGhlLollVPlLlshhhu ..................................l.husVllGhhlGhhthsh........lphs...spsSpas.L.hlLLFllGIpLtssu.hoL.+plh.LN++uh..l....lulls....lluSLlGGllsuhlL..s.........ls.lppuLAhASGFGWYSLSGIL.lo-t.aGP........hlGohAFhs-LhREllAlhhIPhLh+...tttss..AluhsGAToMDhTLPllp+.s.GGh.-.hVPhAIsaGFlLollVPlLlshFs.s..................... 0 68 135 189 +2020 PF03959 FSH1 DUF341; Serine hydrolase (FSH1) Finn RD, Bateman A, Mistry J, Wood V anon DOMO:DM04042; Family This is a family of serine hydrolases [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.56 0.70 -4.89 52 906 2012-10-03 11:45:05 2003-04-07 12:59:11 8 40 298 2 698 2926 1726 210.90 22 69.79 CHANGED Ms.......t.h+lLhLHGatQouphFptKsuul+KhLpc................sh-hsalsuPh.l..s.schs..........................sttsss.huWahpppt..................tsphhsh--ulchlpchlpc....................................pGP...FDGllGFSQGAsluuhlsshhpphh....................................ppP.h....KFslhhSGFp......ts................................app.........ha...ps.l...phPoLHlhGptDtllstp......+SptLhcthps........splltHsGGHalPsppthhpth ...............................................................................................................................................................h.......+lLsLHGhtps....u...p...hFp.....t...p..h...t..s...l..p..p..tl.pp...........................t.h.chsahsu.P...h.h....s......s.s..h................................................ttsshhs...W..at.tp.tt......................................................tthtsh....p.p.u....l....p.h....l..t...p..h....lp.c............................................................................................p.G.P......a......D....G.......l.......l.......GFSQ.GA.s..l.u..u..h..l.....h...t.h..p.p.h.h...............................................................................tts...h..........+.Fu.l...h.....h...u.G.h.h.s.........hs..t...............................................................................................hpt............hh........pt.l...........ph.P.ol.H.l..h..G...p.........p...D...hls............................t...u.tt....L..h.ph.hps...........stlh.p.H....s.u..G.H.h..l.Ptptt.h...h.......................................................................................................................... 0 185 395 577 +2021 PF03961 DUF342 Protein of unknown function (DUF342) Bateman A anon COG1315 Family This family of bacterial proteins has no known function. The proteins are in the region of 500-600 amino acid residues in length. 32.40 32.40 32.40 32.40 31.80 32.30 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.29 0.70 -6.07 100 994 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 688 0 301 827 74 432.60 19 78.32 CHANGED lplplusDcMpAtl..........plpss......sGpsl.......................shcclhptLpppsl.shGlpcptlpp...hltp.t.h.........hphllAcGp.PhsGpsuplphhhp.tppph.p.ttccs......plDh+sLsplhs..VptGphLh.c+lPsstGpsGhsVhGchl...sspsG+-htlt..h.G.....pustls.cDsphl.l.AshsGtshht.ps.t.....lpVs...sslplc.sVDlso.GNIp...........................FpG.sVhlpG....sVpsGhpVcAsGcIplt...GhV.-uAplpA.tGsIhlptGllGptp....................uplpA.pGslpspal.ptuplpAtsslhlpctlhpsplpussplhl....ptt+..GpllGGp.hputpplpst..lGu...................ptuspTplpl.uhs.sphpcph..ppLcpplpph...ppphpclppt.......lpth.p.pt.ttp......pthpchtpph....pphppplpplcpchp.plppplpph......tps.plpspc.plasuVplpls...stthphpcp.hsssphhh..psspl .............................................................................................................h.ht.pth.A.h.........................lh.s......tth.h...........................hhpplhp.hltpttl..hhGl.p....c.....p...tlpc........hhtp.ttt..................ph.lApGhtPlpupsspl.h....hhc...............p..p.t.................ppt..........plD...h.+................p.h.s.hhhs...........Vc.....cs-hlh.ch.h...s.p..p.G.psGhslpGchl........s.hsu...p...-....s.....lh...h.G.......css....ths.......p..-.s...spl..h.As..hsGhshhp..pst........hsVp....shlpls......sV.shpT.GsIc...................................a.p.G.sl.hl.pG......sVtsGhplpAstslplt....Ghl...ppu.plp.A..pusIplppGlhuttt.......................................................................................upltA..ttslh.s....phh.psup.lpAtp....lhlp.phh....h.psplhs.tpplhl................p..stl.u.sp..hpstp.plhsh....lGs.....................th.ss..t.o.p.lps.shp.......phppph....pplpp.p...hpph.......ppth.plppt.........................ltph..p.t............th.p.hpchhpph....tphptphpphptph....plppphpph................................l.h.p.thh.sspl.ht....t.................................................................................................................................................... 0 134 239 268 +2022 PF03966 Trm112p DUF343; Trm112p-like protein Bateman A anon COG2835 Domain The function of this family is uncertain. The bacterial members are about 60-70 amino acids in length and the eukaryotic examples are about 120 amino acids in length. The C terminus contains the strongest conservation. Trm112p is required for tRNA methylation in S. cerevisiae and is found in complexes with 2 tRNA methylases (TRM9 and TRM11) also with putative methyltransferase YDR140W [1]. The zinc-finger protein Ynr046w is plurifunctional and a component of the eRF1 methyltransferase in yeast [2]. The crystal structure of Ynr046w has been determined to 1.7 A resolution. It comprises a zinc-binding domain built from both the N- and C-terminal sequences and an inserted domain, absent from bacterial and archaeal orthologs of the protein, composed of three alpha-helices [2]. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.61 0.72 -3.61 69 2406 2012-10-03 10:42:43 2003-04-07 12:59:11 11 10 2242 11 804 1479 422 52.00 41 65.19 CHANGED chpLLclLsCPhs+G........sL............................................................................p....hchtc.....spLlCpss..phsYPIc-G ....................................ch+LL-ILsC..P..ls+G.............L.................................................................................................................................t....astcp..........pELlC..c..t...s..pLAaPl+DG.............. 0 241 477 650 +2023 PF03976 PPK2 DUF344; Polyphosphate kinase 2 (PPK2) Bateman A, Albrecht M anon COG2326 Domain Inorganic polyphosphate (polyP) plays a role in metabolism and regulation and has been proposed to serve as a energy source in a pre-ATP world. In prokaryotes, the synthesis and utilisation of polyP are catalysed by PPK1, PPK2 and polyphosphatases. Proteins with a single PPK2 domain catalyse polyP-dependent phosphorylation of ADP to ATP, whereas proteins containing 2 fused PPK2 domains phosphorylate AMP to ADP.\ The structure of PPK2 from Pseudomonas aeruginosa has revealed a a 3-layer alpha/beta/alpha sandwich fold with an alpha-helical lid similar to the structures of microbial thymidylate kinases [3]. 20.40 20.40 20.40 20.80 20.30 20.10 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.38 0.70 -5.07 10 2990 2012-10-05 12:31:08 2003-04-07 12:59:11 9 6 1855 12 886 2487 1001 226.10 37 80.64 CHANGED pls+ccYcccLtcLQhELlKLQpWltpcGc+.lVllFEGRDAAGKGGsIKRlT-pLNPRssRlsALsAPT-cEpuQWYaQRYlpHLPAuGElVlFDRSWYNRAGVERVMGFCssppapcFhRclP-FE+MLscsGIhllKaWhuIScEEQhcRFpsRpcsPlKpWKlSPhDlcuRp+W-sYo+A+--MhsRTcTshAPWhVVcADDKK+ARLNsI++lLpulcYpchccs .................................t..p+ppYcpp.Lp.pLQhc.Lhcl......Q.p..........h.l.......t.....p......p......s......p.....+.....llllFEGhDAAGKGGsIK+lh.p.tLsPRsh+Vsuls............t...P....o-c...Ep.........s............ph.........Y..h....Rah.p+LPs.........sG.....cI..slF..DRSWYsRsh..V..........E.........RV...............h...............G...........a...............s...........o.............p.....p..p.............a.............c....c..........h....hc............plspFEc.hL.s..c.sG.hhl...lKaahplSc-EQpcRFpp..Rhpc..P..h..KpW.K.....l.S.s.hDlcs.pc+W-cYp...pAhccMhtpTsTs.t....APWhllpusDK+.tARlsshchlLpp.l.ch.....h................................................. 0 253 565 757 +2024 PF03978 Borrelia_REV DUF345; Borrelia burgdorferi REV protein Finn RD, Moxon SJ anon Pfam-B_26177 (release 7.2) Family This family consists of several REV proteins from Borrelia burgdorferi (Lyme disease spirochete). The function of REV is unknown although it known that gene is induced during the ingesting of host blood suggesting a role in the metabolic activation of borreliae to adapt to physiological stimuli [1]. 21.60 21.30 21.60 22.00 21.50 21.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.88 0.71 -4.51 6 40 2009-12-11 17:11:53 2003-04-07 12:59:11 8 1 21 0 4 39 4 144.40 56 92.53 CHANGED MKNKNIhKLFFluML..FVhuCKsY.............VcEKKcIDSLhpuVSsLNNcsDcspFKsYK-KINcLKEsLKDluNA.ELcEKlLsLpsLFQDKLAAKLAALKAAKscIpuh...sDcDps.sKsKIWocAKLVGVTlhh.GSsToGpGs.......cMSK-AVEQ....IDcIIKFLEEGTN .........................M+pKNIhKLFFsSML..FlMACKAY..............................................VEEKKpIDSLhpslssLpNctst.....p..pFpsYKpcINcLKEsLKDlGNA...ELcEKLLsLppLFQDKLAAKLAALKA..AKpcIpsh..............TDpDpshsKpcIWuEAKLVGVTlph.GSsosGpGs.......cMSK-AVEQ....I-KIIKFLEEGTN........................................... 2 4 4 4 +2025 PF03984 DUF346 Repeat of unknown function (DUF346) Yeats C anon Yeats C Repeat This repeat was found as seven tandem copies in one protein. It is predicted to be composed of beta-strands. Thus it is likely that it forms a beta-propeller structure. It is found in association with BNR repeats, which also form a beta-propeller. 21.20 21.20 21.20 21.40 21.00 21.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.06 0.72 -4.50 10 31 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 15 0 18 32 1 39.70 39 21.81 CHANGED sQQHVauRss-GsLtHWaWs.sssulpppsW..sGsluGsPsA ........VFuRGoDspLtH+WWp.sssuh.s.tsWt.lGGslsSsPsh.. 0 8 11 12 +2026 PF03988 DUF347 Repeat of Unknown Function (DUF347) Yeats C anon Yeats C Repeat This repeat is found as four tandem repeats in a family of bacterial membrane proteins. Each repeat contains two transmembrane regions and a conserved tryptophan. 21.50 21.50 21.70 22.00 21.30 20.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.46 0.72 -4.08 170 1475 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 248 0 562 1404 64 55.80 28 80.30 CHANGED hhaWlshlhopsLGsshGDhLs.c..s...LGlGhssuoslhsullssslshhhtppt...ts .......hhaWlshlhopslGsshuDhlsc.....s...................LG..l.Gh.s..suohlhsullss.hlshh.hhspt...s......... 1 143 313 454 +2027 PF01796 DUF35 DUF35 OB-fold domain Bashton M, Bateman A, Krishna SS anon Pfam-B_1390 (release 4.2) Domain This domain has no known function and is found in conserved hypothetical archaeal and bacterial proteins. The domain is approximately 70 amino acids long. The domain is duplicated in Swiss:O53566. The structure of a DUF35 representative reveals two long N-terminal helices followed by a rubredoxin-like zinc ribbon domain and a C-terminal OB fold domain represented in this entry. OB-folds are frequently found to bind nucleic acids suggesting this domain might bind to DNA or RNA. 24.20 24.20 24.30 24.20 24.00 24.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.43 0.72 -3.75 157 1861 2012-10-03 20:18:02 2003-04-07 12:59:11 12 19 530 0 752 1722 492 65.50 26 35.94 CHANGED hhthu.GpGslhoaTssct..sstshh...........PaslulVcL.-..t.........Gs........clhupls........s.s.......ssp......t......lGttVcss.hp ........................hthu.spGslhoaolspt..s.ss.h............Pasl.AlVcL.-...-................Gs............plhupls.........sh.....................ssp.....t.l.t.........lGhpVcssa........................... 0 153 462 641 +2028 PF03994 DUF350 Domain of Unknown Function (DUF350) Yeats C anon Yeats C Domain This domain occurs in a small set of of bacterial proteins. It has two transmembrane regions, and often occurs as tandem repeats. The are no conserved catalytic residues. 20.60 20.60 20.80 20.60 20.30 20.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.36 0.72 -4.23 173 2291 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1116 0 490 1389 176 53.60 26 71.47 CHANGED asllulllhhlshhlhch.lsP.hchtptIpp...s.NhAuulhhuuhhlulullluuul ................ahhlullh.hllh....hhlhph..ls....P...h..shhphItp..........s..NsAAulh....huushluhulhluush........... 0 138 280 375 +2029 PF03995 Inhibitor_I36 DUF351; Peptidase inhibitor family I36 Yeats C, Bateman A anon Yeats C, Bateman A Domain This domain is currently only found in a small set of S. coelicolor secreted proteins. There are four conserved cysteines that probably form two disulphide bonds. Proteins 2SCK31.15C (Swiss:Q9ADK5) and SCO3675 (Swiss:Q9X8V7) also have probable beta-propellers at their C-termini. This family includes Swiss:P01077 a known peptidase inhibitor of known structure. This protein has a crystallin like fold Pfam:PF00030 and is distantly related by sequence. It is not known whether other members of this family are peptidase inhibitors. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.52 0.72 -3.62 7 93 2012-10-01 23:14:22 2003-04-07 12:59:11 8 5 47 1 33 113 0 89.90 18 54.95 CHANGED uhtcCPsGthChasussusGphhhs..sssstshGs.hssphtShhNptstssCsat.tsYss.....shhhsthshsRGt...h.sshsuslsuhphlpspptC .......sCssGphClassss..asGshhpht....sss..............h.s.....s...h............s.s..........h....ss.p.sp..Shh.Np.osttsshYpptsats...........................................h.h............................................... 0 10 25 33 +2030 PF04001 Vhr1 DUF352; Transcription factor Vhr1 Finn RD anon DOMO_DM06473 Family Vhr1 is a transcription factor which regulates the biotin-dependent expression of transporters VHT1 and BIO5 [1]. 25.00 25.00 47.90 46.90 22.50 20.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.08 0.72 -4.07 6 53 2009-09-10 17:29:09 2003-04-07 12:59:11 8 1 38 0 31 51 0 94.00 66 16.65 CHANGED spchothGsTH+IREpLNFsDEKKWKpFSSRRLELIDpFsLSp+KASEQD-NI+QIAshLRsEFsYPspsop-FEKLVTAAVQSVRRNRKRSpKpp ....t.p.su.GsTH+IR-pLNFpDE+KWKpFSSRRLELIDpFsLSp+KASEQDpNI+QIAsILRsEFsYPsohosEFEKLVTAAVQSVRRNRKRSpK+.h. 0 5 19 30 +2031 PF04007 DUF354 Protein of unknown function (DUF354) Bateman A anon COG1817 Family Members of this family are around 350 amino acids in length. They are found in archaebacteria and have no known function. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.11 0.70 -5.57 4 167 2012-10-03 16:42:29 2003-04-07 12:59:11 7 3 126 0 117 350 205 320.30 24 90.78 CHANGED h+VWIDIsNuPHl+FF+slIpchc+cGh-lllTsRchGslscLlchhGFphpsIGKHG.sTLpcKLltsucRshtLochIspp+PcVulhKpshELPRlAFGLpIPSIhllDNEpAlstNKLhhP........LscpllhPcshDt.clhchGuD.N.Ip.apGhsElsslps.....a..-splLccLs..hccpphIVMRPEP.tuoYhsu..+cSILscIl-hLpchss..IllhPRsccQ+EIacta-.lhIPccslDsLoLhahuchhlGAGGTMNREAAlLGsPslSsYPGK.LLuVDKaLI-pGhhaHSsDscEIlp....hVhsNhthR+tl+sh....-c.hchIl-hVpshhc ..............................................................................................................................................lhlDlspss.al+hF+thlpcLcc.c...G...a.-..lh..l..TsR...c...h...s...pshc...LLc..ha..sl..sa...h...s..lG.p..pu....ss...hh.......tK...lht....h..p....R.....t.h.p.Lh.ch...h.p..p..............h.c...P....Dlhl.u.h....s....u.sp....s...s....+.l....u.hs...ls..h.....Psl.....hh.......hD.........s.........-...tu.....h..h................s+L.shP.............hssh.ll....sPtsh................htp..h...Gtc...p.lh..asG...hp..Ehsa.l...p.s............F.p.P.c....t...s.llc..c.LG.....lc...s.p.t..a.l..l....l...R..p.s.h...p.A.s.Y.sss.........pp..sl...h.........c....l..l...ct....Lp...c.h..sp...l.V..h......l....s...cp......p....p......p.....t..p........h......h........c.........p.......h........p....h......h........l..s.......c..s...l........c...h...h...s...Ll...ha...uclhlusGu.TM.spEAAl..LGsPsl...p...h...h.s.........h..pp.h..h.hp...t.h.h.hh..p......t...h..................................................................h............................................................................ 1 28 75 96 +2032 PF04009 DUF356 Protein of unknown function (DUF356) Bateman A anon COG1844 Family Members of this family are around 120 amino acids in length and are found in some archaebacteria. The function of this family is unknown. However it contains a conserved motif IHPPAH that may be involved in its function. 29.50 29.50 30.40 115.80 25.00 29.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.37 0.72 -3.97 10 49 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 49 0 39 47 1 106.90 43 82.18 CHANGED lllIRADshcKlpsALuDL-RauulpItGKPRIlsPshADcllppllGp.lR++s+sAsLVcl-pssscAIs+lR+IHPPAHllVlSsRa.-sYp-Lhcpas+h.cL.+ llLIRuDsa-KlpsALADlcRautlpItGKP+IIsPphADpllcpIlGp.l+KpsKsAslspl-psAs+AIsRlRKIHPPAHIlVlSsca.-sapcLhcpasphP.LK.. 0 7 15 28 +2033 PF04010 DUF357 Protein of unknown function (DUF357) Bateman A anon COG1849 Family Members of this family are short (less than 100 amino acid) proteins found in archaebacteria. The function of these proteins is unknown. 24.00 24.00 24.00 35.20 23.80 23.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.36 0.72 -4.28 41 162 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 151 2 105 158 72 71.80 33 50.27 CHANGED c+Yhchlc-ALpplc........ts.httssc-hlchAcsYhcDu+aalc.pGDhlsALuslsYuaGhLDAutclGlhs .............................+Yhchhc-ALpplc........t..htsssp-...h...lphAcsYhcDu+aahc..cGD..hlsAluslsYAaGhLDAstplGlh...... 0 25 64 86 +2034 PF04019 DUF359 Protein of unknown function (DUF359) Bateman A anon COG1909 Family This family of archaebacterial proteins are about 170 amino acids in length. They have no known function. The most conserved portion of the protein contains the sequence GEEDL that may be important for its function. 25.00 25.00 52.30 51.60 22.80 18.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.41 0.71 -4.73 31 147 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 144 0 99 150 82 122.20 34 69.31 CHANGED VTp....sllcsGltPtlsllDsKT+Rp.hpppht......htphlpVcNPsGsIo.-llcslccu...htp.scsshIhVcGEEDLhslPsllhAP.s.ohVlYGQPs.pGlVllpVspchKpcspcll.cchptp ......sTtpllcsGlhPslullDtKT+Rp.spppht...........hhtphlcVpNPsGsIot-lhc.slcpu....htp...spsshlhVcGEEDLhslPsllhAPtG.sh..VlYGQPs.cGlVllpVspchKpcspcll.cphp..h............... 0 24 58 80 +2035 PF04021 Class_IIIsignal DUF361; Type_III_signal; Class III signal peptide Bateman A, Szabo, Z anon COG1991 Motif This family of archaeal proteins contains. an amino terminal motif QXSXEXXXL that has been suggested to be part of a class III signal sequence. With the Q being the +1 residue of the signal peptidase cleavage site [1]. Two members of this family are cleaved by a type IV pilin-like signal peptidase. 23.50 23.50 23.60 23.60 23.30 23.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.85 0.72 -7.04 0.72 -4.10 49 226 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 58 0 144 226 10 27.60 32 22.13 CHANGED p+GQlSlEahlLlhulllsshlsshhhs ..p+GQlSlEahlLlhullllshlsshhh......... 0 19 44 103 +2036 PF04015 DUF362 Domain of unknown function (DUF362) Kerrison ND, Finn RD anon COG2006 Domain Domain that is sometimes present in iron-sulphur proteins. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild --amino -o /dev/null --hand HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.45 0.70 -4.85 115 993 2012-10-02 00:55:42 2003-04-07 12:59:11 7 22 485 0 449 1069 99 221.70 23 62.70 CHANGED +VllKPNllt....sps.Pp........ussTcPpllc.ullchl.t.ctGu..p..lhluDusuhs.......ssppshcpsGhtphsc....chGs..pllshspst.hhthtt............sthhpphtlspslh-.u..DhlIslPpLKsHshst.hThulKNhh.Ghlss................pKst...hH............................................................................t.pphsphls-lsph.h.....pPsL..sllDulhu....hpusGP.s.sGpsh....ph.sh.llAusDsl.AlDss.uspll ........................................................................................................................VhlK.phsp..........ts....................tshhcPthlcsll.chl..p..ph...G..u...p...hlssssshhs..................pthps..ht.tsGhp..hst..............s.s......tl.l......hc..s...phhp.h.h..................tsphhp.php..l.upslh-.....u.......DhlIsls+hK.s.Ht.h...ss....hsuulKNlh.Gssstt..................sKtt......hH........................................................................................ttpth.t...ctls-hsps.l...............+sph.........s.l..h.....ssl.hs.......h.s..uP...s...ssp.st..............hh..sl.lhAS.tD.sV.AlDtsssch........................................................................................................ 1 226 385 422 +2037 PF04016 DUF364 Domain of unknown function (DUF364) Kerrison ND, Finn RD, Eberhardt R anon COG2014 Family This domain of unknown function has a PLP-dependent transferase-like fold. Its genomic context suggests that it may have a role in anaerobic vitamin B12 biosynthesis. This domain is often found at the C-terminus of proteins containing DUF4213, Pfam:PF13938. 23.00 23.00 23.40 24.20 22.50 22.80 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.52 0.71 -4.88 60 351 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 267 6 183 356 38 129.70 21 50.59 CHANGED ss.h-h.hthttsccVslVGt.......a.P..hlctlc.p...pstplhllEhsst....tth.h....ssttsptlLspsDslllTGoollN.pTl-pLLphsp..sstllllGPosshhPtshhshGlshluGsh..lhDs-tlhcslscGuustthpttsththhh ...............................tsp+VsllGh..........h.P..hlcplt..t.......tstplhlh-hss...........t.tphsh.................ssptsptllsp..sDhlllTGoollN.sTlsplLphs.c.....s..t.h.h..llhGPos.shhs.hhhthGhphlsu........p...hh.hh...................hh.......................... 0 77 130 159 +2038 PF04033 DUF365 Domain of unknown function (DUF365) Kerrison ND, Finn RD anon COG2028 Family Archaeal domain of unknown function. 22.50 22.50 22.80 89.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.33 0.72 -4.00 5 11 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 11 1 9 15 0 94.70 52 69.01 CHANGED MKlIFYASREDQGFaGEAEIERVEha-sPhcIlEKYGDcLFLTcEELK+Y.E+upcRWsu.....cu+RKRPWMVIcL+NIRKYc+VVKPKRFVPVuGRYV+E .MKllFYASREsQGahGEAElEpVchapss.cIlcKYtDcLFLT.EElRcY.p+s.pcRWss.....tu+R++PWMVlhL+NIRKYs+VVKPKRFlsVuGRYl+-. 0 4 5 5 +2039 PF04017 DUF366 Domain of unknown function (DUF366) Kerrison ND, Finn RD anon COG2029 Family Archaeal domain of unknown function. 25.30 25.30 25.80 170.30 23.40 25.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.95 0.71 -5.01 15 67 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 66 6 53 69 18 186.10 43 97.22 CHANGED M....hhlll--cl-YDGSQIpShWAapsFGlpGDSIVsFRGsC-Vch-cMlDlEDl+pcp.....IpScDMlHFIlEHFDp...DLchuhtRQRLLlull+EhLpch......GlcspRcGDDLYlss+KLSVSIATsSsVShKIHhGINVpocGsP..VsslGLpD....LGlcshp...plhccluppYscEh-cIc+DhpKsRs ............hlhhccplcYDGSpIpshWAacsFGlpsDSIVsFRGsscVcl-cMlDlcDl+tpp......IpSsDMlHFIlEHFDp...DLchshhRQRLhlsll+ElLpch......Glc.hpRcGDDLYhs....s....+KLSVSIAosSsVStKIHhGINVpscGsP..VpslGLp-.........lGlpshp.....clhccluptYscEhccIc+DhpKsR... 0 14 28 42 +2040 PF04034 DUF367 Domain of unknown function (DUF367) Kerrison ND, Finn RD anon COG2042 Family \N 23.80 23.80 24.20 29.50 22.90 23.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.59 0.71 -4.53 10 466 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 417 0 327 458 69 125.70 45 45.79 CHANGED lVLoPhuctslSPcD+-ll-+pGlsVlDCSWs+l-ch.Fscl+s...cptRtLPaLVAANPVNYG+PtcLSolEAlAAsLYIlGhh-cAptLLS+FpWGcsFL-LNcELL-tYupscsS.EIhclppcaLp ..........lVLoPh.uppslSP.s.D.+pl....lppt.GluVlDCSWs+l..--...s..PFs+lpu.......p.p...R..LLPa.LVAANP....VNYG+Ph+LsCVEAlAAshhIs..G.ap-hApp.lLspFpWGcsFL-lNc-LL-tYutC.p.sspElhpsppcaL.............. 0 111 190 275 +2041 PF04018 DUF368 Domain of unknown function (DUF368) Kerrison ND, Finn RD anon COG2035 Family Predicted transmembrane domain of unknown function. Family members have between 6 and 9 predicted transmembrane segments. 24.90 24.90 25.50 27.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.75 0.70 -5.19 110 1391 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 1306 \N 249 911 878 250.90 32 87.16 CHANGED M..GuADlVPGVSGGTIAhIsGIY-cLlsuIssls......phhphhhp...........tthtthhpthsh.FLlsLhhGlhhulholupll..saLLppaP...lhsauFFhGLIluSlhhlh+.p......................lpt..hshtp...l....lhhllGh.sluhh........ls...............................h........hsss.p...sssshhh..........................lFhuGhlAlsAMILPGISGSFlLLlLGhYssll.sAlpsh..........................slshlhlFuhGsllGLlsFo+lLpalLc+a+shTluhLsGhhlGSLttlW.....Pap.............t..hshhp...tslhshsa ............................hGhusllPGVSGGTlAhllGIY-chIsuluplh..pthc.hhp............................sh.FLlslhlGhllulhhhSpll.......paLLps........a....l.shhhFhGlIluolPhll+p.................................tpp.....hshtc........l..hhhl..h..uhll.s.hh.hs...................................................................................................h.................hssh.t...t.hssshlh.............................................................................................hhlAGhlAusAMlLPGlSGShlLLllGlYsshl.sulpsh..........................tLshlhsh....uhGslsGllshSKllsahLp.+a+shshthIlGlllGSlhhlh.....Phh..........................sss..h.hh.h.hh.......................... 0 76 167 220 +2042 PF04126 Cyclophil_like DUF369; Cyclophilin-like Kerrison ND, Finn RD, Eberhardt R anon COG2164 Family This domain has a cyclophilin-like fold, consisting of an eight-stranded beta-barrel with an alpha helix located between the beta-2 and beta-3 strands and a 310 helix located between the beta-7 and beta-8 strands. The catalytic site found in human cyclophilin is not conserved in this domain, suggesting a different function for this domain [1,2]. 20.30 20.30 20.30 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.60 0.71 -4.51 14 235 2012-10-02 15:38:38 2003-04-07 12:59:11 8 5 223 5 81 217 103 113.10 22 69.69 CHANGED h+l+Ihhs.....ptcs.sELs-s...pTsctlhcuLPlcupsspWG-ElYFosslpss.-Esup-sV-hGDluYWsPG+AlClFFGsTPhS.ssc.pPASsVNlIGRlsssh.chlppVpcGspVtlc .......................plclphs.....spphpupLt-s...tsucphhptL...Plsl.p.h.p.p..a..G..s..E...h.h.h..ph...Pt.p.l.s.hp.ss........s..t.s.s.s.p.sGDlsYasstss.....lslFY.tpt..............t.h..hGch.ts.........h................................................ 0 35 56 71 +2043 PF01809 Haemolytic DUF37; Haemolytic domain Bashton M, Bateman A, Eberhardt R anon Pfam-B_1485 (release 4.2) Family This domain has haemolytic activity [1]. It is found in short (73-103 amino acid) proteins and contains three conserved cysteine residues. 21.10 21.10 21.80 21.10 20.30 21.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.35 0.72 -4.36 219 3575 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 3453 \N 886 2340 1979 65.50 47 72.84 CHANGED hppllltlI+hY....QhhISPl.h.s.ss.CRFhPTCSpYul-Alp+aGsh+GsaLulpRllRCHPa....ssGG.aDPV ....................pplhlthl+hYQth..I..SPl....h......s.....ss..C...RF.....h.....P.....TCSpYul-Alp++GshKGsaLultRILRC.HPa....stG..G..hDPV.............. 1 320 605 769 +2044 PF04025 DUF370 Domain of unknown function (DUF370) Kerrison ND, Finn RD anon COG2052 Family Bacterial domain of unknown function. 21.20 21.20 21.70 21.50 20.80 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.12 0.72 -3.93 19 534 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 505 0 164 344 8 72.70 64 81.98 CHANGED LlNIGFGNhVsusRllAIVsPcSAPlKRlhp-A+-pupLlDATpGR+TRAVIlhDSsHVILSAlQPETlupRl .......LlNIGFGNIVSAsRllAIV...SP....E....SA....P....IKR....llQEAR-+GhLIDATYGR+TRAVIlhDSsH..VI.LSAlQPETlApRl........... 1 85 138 153 +2045 PF04027 DUF371 Domain of unknown function (DUF371) Kerrison ND, Finn RD anon COG2090 Family Archaeal domain of unknown function. 25.00 25.00 44.10 28.20 22.60 22.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.60 0.71 -4.49 19 131 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 127 1 86 127 18 128.60 38 81.94 CHANGED l+A+GHcNVpApH+SThElT+DcaLTPpGDCIIGlsAD+uhsDlsccFKptl+cscth.lplhlcs.....sshpDplhGpGpscLoLscssshVhR+SsYl.ssRTlhIpAsKAAtDlsR-llctL+.cGsplhlplhV ..........lpu+GH.NVpApH+oTlElT+-saLTspGDCIlGlsAD+uhtDhspchKptl.psspt..lhlhlcs............ts.hh-hlhGpGsscLsh.spspclVhR+Ssal.ssRTlhlpAsKAApDlsR-llptL+.sssphplpl..................... 0 23 51 72 +2046 PF04036 DUF372 Domain of unknown function (DUF372) Kerrison ND, Finn RD anon COG2098 Family Domain of unknown function. 20.40 20.40 21.00 40.50 19.90 20.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.65 0.72 -4.49 15 84 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 82 14 60 90 6 38.00 56 31.52 CHANGED ERAhFEAGIKLGALYHQFsGTPVu.coAsSLEpAIE-S ..EcAsFEAGIphGALaHQFsGTPVS...coscSLEpAIE-u.. 0 11 41 52 +2047 PF04123 DUF373 Domain of unknown function (DUF373) Kerrison ND, Finn RD anon COG2237 Family Archaeal domain of unknown function. Predicted to be an integral membrane protein with six transmembrane regions. 22.10 22.10 22.20 22.20 21.50 21.90 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.98 0.70 -5.42 35 156 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 130 0 101 157 110 341.20 29 93.44 CHANGED M.chLVlsVDRDsDlGcKsGlpTPVlGR-sslcAAlcLulsDPEDSDsNslFuul+lY--Lc...sc.Gc..-VEVAsloGspcs.ulpuDhplucpLDpVLpphsss...ssllVoDGu-DEpllPlIpS+l.lsuVcRVVV+QupslEoTYYll....KcslsDschpphhL.lP.GllLLlaulhtlhsh..............sshslsslshllGhahLh+Ghul-chlpphhcphhtsh..t.clohlohlluhhllllGlltGhhsl.phtshsshhhhs..........alhshl.ahshuslh.shlG+ll-phlpschphhphlst.hhlhuhthllashssah.hs............hhhsslphhhhslssslllulhs .....phLVlsVDhDDDlGcKsGlpoPllGR-sshcAAscLulsDPEDSDsNslasul+lYD-Lcpc...Gc..-VElAsloGspcs.ulpushplucplDplltphsss...tsllVoDGs-DEpllPlIpSRl.lsuVcRVVV+QucslEoTYYll....KchlsD.chpphhL.lP.GllLLlaulhtl.hsh..............sshshsslshllGhahl.h+Ghul-chlpphhcph.......h..G.plohlohlluhhllllGhltuhhtltshtsttshhhhs.................alhshlsahsh.uhlh.h.hG+ll-phlpp..ch.phhphlst.hhl...lulhhllhs.hsthhhsh.....................h.hssh.phhhhhlsshlllul..h....................................... 0 23 61 85 +2048 PF04028 DUF374 Domain of unknown function (DUF374) Kerrison ND, Finn RD anon COG2121 Family Bacterial domain of unknown function. 26.00 26.00 26.80 26.20 24.30 25.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.39 0.72 -4.74 79 649 2012-10-02 00:16:30 2003-04-07 12:59:11 8 4 572 0 167 448 385 78.70 34 33.22 CHANGED hhshha..tp..........tphtsllSppcDG-llupslcthGhpslRGS..........os..+GGspAltphlctlc.pGtsl..ulTPDGP+GPtc .......................hshha...tp...........tchhshlScpcDGEllApllcph.GhpslRGS............os+GGspALpphhctLc..pG..p.sl..slTsDGP+GPh...... 0 71 123 147 +2050 PF04041 DUF377 Domain of unknown function (DUF377) Kerrison ND, Finn RD, Mistry J anon COG2152 Family This family contains many hypothetical proteins, some of which are predicted to be glycosyl hydrolases. This family was noted to belong to the Beta fructosidase superfamily in [1]. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.06 0.70 -5.71 8 965 2012-10-02 00:26:57 2003-04-07 12:59:11 8 6 499 13 330 923 112 286.00 27 80.25 CHANGED hh+KhPs.PIlc.Po.hpGaDs+h................sFNPuVl.hcpch.....hVhlYRsps.cshsohRIGLuhSpDG......lpastcPEslhhPchphEhhG...lEDPRlsKlsccYhMTYTGhss+hsRlsluso+shloat+hs....shhs.Fshp....phho+sGslhPpKl.....sGcYlMaa+........sushaLAhS..sDtlpW-shhcs....lh..pscpsha-s...lKlGsGsPPlcsp-G..LVLhH...sss...s-phlYRlGsALhDt-sP.+...llhRocp.ILEPEt.aEhaGcVsNVVF.ssu.hV-ts.t+lllYYGuADptlGLAphs..lcclhchscc ................................................................................................................hh......................................................hhNsu...sh..httt..........................hhhhhRs............p....s........t.....t......h........h.................................hl....thu...S...pDG........................l.p.a.p.h......p....c...P....l...h..h....P...p....s.....p.............c....t.....h.....s......................h.DsRl.s..........t..h.....-....-.....t.....Y.....ah....sa.s...........u...........h...........s........t...........p...........s...........s..p..........h....u............l.u.........t.........o....p...D.h...t.....s...apphs..................................hh..s....s....................................+..ss....s..LF..P.c.........+l..........sG.....+.Yh.hhpR..............................tslalu.h.S..........sD........h.h.t.......W......s......t...p.pp.........................lh....ps.p..........t...h.a..cs.............h+l....GsGssPIcT.........p....c.......G.....WLhlYH.G......s....................sss...h.....t..Y...shGsh..L.L..DlccPs+.....lltRspp.llt..Pp....t......a...E..........p.......G...........V.s.N...VVFssus..l.........h.......c........t.......c.....p.lh.lYYGsADoh..hulAhhp..lpcllp....h................................................................. 0 164 266 307 +2051 PF04070 DUF378 Domain of unknown function (DUF378) Kerrison ND, Finn RD anon COG2155 Family Predicted transmembrane domain of unknown function. The majority of the family have two predicted transmembrane regions. 25.00 25.00 32.20 32.10 24.30 22.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.00 0.72 -4.35 26 729 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 606 0 136 385 24 60.70 43 82.79 CHANGED M+slshluLlLlIlGuLNWGLlGl..FpaDLVAulFG.Gps.....oshoRllYlLVGluulaplh.hhp .........hphlphluLlLsIlGulNWGLlGh.....F..pFDLVutlF...Gss.......ohluRIIYhlVGluulYslhhh..t.... 0 54 99 112 +2052 PF04038 DUF381 Domain of unknown function (DUF381) Kerrison ND, Finn RD anon COG2098 Family Archaeal domain of unknown function. Strongly conserved YPLM motif. 25.00 25.00 32.00 31.60 16.90 16.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -8.89 0.72 -3.93 21 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 85 14 63 93 5 60.00 33 50.59 CHANGED lpslsVcIcpshlcsphs..au.YoELsGcMLcVclplcYsusplpsphca..Eh-YPLMhlcc ..............scclpVcIcp-hlcppht..hu.YopLsGcML-Vclplch..psspstsplca..EhsYPLMhlc...... 0 13 44 55 +2053 PF04063 DUF383 Domain of unknown function (DUF383) Finn RD, Kerrison ND anon DOMO_DM06609; Family \N 23.50 23.50 23.50 23.60 22.10 23.40 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.26 0.71 -4.76 31 308 2009-09-11 22:59:18 2003-04-07 12:59:11 9 8 266 0 219 301 3 182.50 32 50.38 CHANGED hsplsc.pps....ADhh...shLLuNLu+s-shsp.llshppp...............................................pth.tsppsl-pLhDsFsKGhp........................................sshN+..cAsaDYLuhlFAslS+hccGRpaFl...........................scQch-t.l.PlsKLlsFTE+t.SplRRcGVAoTIKNssFDtstH....hLLs.....--t.................lslLPalLLPlA.GsE..EhsE.E-hhsLPs-LQ..hLPsDKcR-sDssIhph ...........................................................................................h...lhp.pp....uc.hshlLuNLs+.t-shtp..llpht.pp..........................................................ps....hslspLhcsFsp......................................................tshNc..push.cYLu.lhuslSphtpuRpahl...........................sppchs...........lp+LlsaTcpt...SplRRt.GVsuTl+NssF-tppH....thLLs.......pp................................lslLPalLLPlu.GsE.................-hs-.E-h..tphPs-LQ..hL.P.-K..pR-sDssIhh................................................................. 0 72 119 184 +2054 PF04064 DUF384 Domain of unknown function (DUF384) Finn RD, Kerrison ND anon DOMO_DM06609; Family \N 20.40 20.40 20.50 21.00 20.00 19.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.90 0.72 -4.40 29 294 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 252 0 203 282 2 57.90 36 15.57 CHANGED oLhLLssT+pGRchhRs+sVYsIlREhHph....p....p-.cclt-sC-+......lVplLhRcEsptsh-p ......slhLLssT.+tGRchlRstslY.IlREhHph.....E...................p-...ppVtpuC-+...............lVplLhp-.E.tts................................ 0 72 116 170 +2055 PF04074 DUF386 Domain of unknown function (DUF386) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family consists of conserved hypothetical proteins, typically about 150 amino acids in length, with no known function. 20.10 20.10 20.50 20.50 19.70 19.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.79 0.71 -4.70 13 3287 2012-10-10 13:59:34 2003-04-07 12:59:11 7 3 1492 8 258 1385 24 148.30 31 97.21 CHANGED M.Iluclsphshhhuls.tlpcsl-aLpspshssLssG+a-IcGDplahsVhphpTpsssscphEsH++YlDlQlllpGpEpItaussht..shps.psas-pcDltahtsscscptlpLpsGpFslFaPt-sH+Ptshsss.sppl+KlVlKVtls ....................................................................M.Ih.splp.p..h...p....h....t...l..sst.lpcu..l....-..a..l.ps..........p......t..hs....sht.s...G...+..h-..l.....c...........G...c.plFhpl...p...p..Tp.s.st..p..pp.hEhHc+YlDIQllL.p.G.p.E.t.hsaussss...st...ps..ps.a.pc...-cD.l.t...a.h............s........s..p...s.....E...p..hl..t.L..p.....s..GpFs.lFaPt-sH+Ptsh.s..s...p.....s.......t.......t.......l..+KlVlKVth.t......................... 0 59 125 184 +2056 PF04079 DUF387 Putative transcriptional regulators (Ypuh-like) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family of conserved bacterial proteins are thought to possibly be helix-turn-helix type transcriptional regulators. 27.40 27.40 27.90 27.80 27.00 27.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.71 0.71 -4.86 139 3001 2012-10-04 14:01:12 2003-04-07 12:59:11 11 3 2904 3 769 2172 2160 158.50 36 73.66 CHANGED shlEAlLFsuuc...P.lolppLsplh...........s...tsplpphlpcLppcY..................ttps+.Glplhch.ussaphtT+schuphlpchhpp..t..tcLSpAuLETLAlIAY+Q.PlTRu-I-plR.GVs.usshlppLhc+sLlcptG+pcs.sGRPhL.YsTTcpFLchFGLps..Lc-LP....sl ...........................t.hlEAlLFs...uu-...s.lshcpLscllt....hs...............splpphLpcLtpcY.......................................p.s.p..GlpLhcs.us..saphtT+schuphlpch.hpp......t...ptpLSpAALETLAIIAY+Q.PlTRu-I-p...IR.GVs...ossslppLht+sLIcts....G.....+p.-.s....sG..RPhL...YsTTctFL-hFGLps.Lc-LPs................... 0 268 528 663 +2057 PF04076 BOF DUF388; Bacterial OB fold (BOF) protein TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Proteins in this family form an OB-fold. Analysis of the predicted binding site of BOF family proteins implies that they lack nucleic acid-binding properties [1]. They contain an predicted N-terminal signal peptide which indicates that they localise in the periplasm where they may function to bind proteins, small molecules, or other typical OB-fold ligands [1]. As hypothesised for the distantly related OB-fold containing bacterial enterotoxins, the loss of nucleotide-binding function and the rapid evolution of the BOF ligand-binding site may be associated with the presence of BOF proteins in mobile genetic elements and their potential role in bacterial pathogenicity [1]. 23.60 23.60 23.70 23.90 23.50 23.50 hmmbuild --amino -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.10 0.72 -4.20 7 1522 2012-10-03 20:18:02 2003-04-07 12:59:11 8 1 890 1 148 625 10 102.00 40 81.34 CHANGED uhs...........tGGFpGss.sstsoVctAhphtDDs.VsLcGsIlcplscDhY.FRDsoG.pIpl-IccphasG.plpscshlpIpGclD.+chsps-lDVctlpK ...............................ttts.......s.s.ps.tsG..ap..G..ss.....u.ph..h..T...Vcp...A...K..s...h...+D-sh.....VoL+GNllc...+l..u...-DpYlF+DsoG.pIsV-IDcc.ha...s...G.h.s.VsPc.spVcIpGclD...Kc.h..s.s.s.cl-VcplpK................... 0 16 45 102 +2058 PF01837 DUF39 Domain of unknown function DUF39 Bateman A anon Pfam-B_7373 (release 4.0) Domain This presumed domain is about is about 360 residues long. The function of this domain is unknown. It is found in some proteins that have two C-terminal CBS Pfam:PF00571 domains. There are also proteins that contain two inserted Fe4S domains near the C-terminal end of the domain. The protein Swiss:O26943 has been misannotated as an inosine monophosphate dehydrogenase based on the similarity to the CBS domains. 25.00 25.00 42.10 41.80 19.00 18.70 hmmbuild -o /dev/null --hand HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.94 0.70 -5.79 75 219 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 176 0 141 228 13 356.80 43 84.63 CHANGED AlVlTAEEhhshVc-.GhppssccVDVVTTGTFusMsSSGAhlNhGa.o-PsI+h..p+saLNGVsAYuG......luAVDsYlGATphs-pss................pYGGGHVIcDLluGKplcLcApuhsTDCYPR+pl-splTl--lspAhLhNPRNuYQNYssulNsu..-+sl.aTYhG..sLhPphGNssYSsuGpLuPLhND......PthcsIGlGTRIFLGGutGYlshpGTQHss.........Phssh.uTLhlhGDhKpMsscalRGshhpuYGsoLhlGlGlPIPlLsE-lhptsulpDcDIhsPllDau..hsppsRs.slutVoYu-L....................................................................+SGpI....plpG+cV....p.TuslSShthAcclApcLKchIppGcFhL .AVVlTAEEhhphVc-.GhppuAc-VDVVTTGTFusMsSSGAhlNhGH.o-Psl+h..p+saLNsV.AYuG......luAVDhYlGATphs-sss................cYGGGHVI-DLl.uGKplcLcApuhuTDCYPR+pl-spIol--lNpAhLhNPRNsYQNYssulNsu..c+sl.aTYhG..sLpPphGNssYSssGpLSPLhND......PthcoIGlGTRIFLGGu.GYlhhpGTQHsPt...........ssh.uTLhlh.GDhKpMsscalRGshhpuYGsoLhlGlGlPIPlLNE-lhptsulpDcDIhsPllDau..hPpps+s.sluhVoYu-L..........................................................................+SGpI.........plpG+cV......TuslSShhhAcclApcLKpWIppGcFhL.................. 0 48 104 129 +2059 PF04094 DUF390 Protein of unknown function (DUF390) Bateman A anon Pfam-B_1698 (release 7.3) Family This is a family of long proteins currently only found in the rice genome. They have no known function. However they may be some kind of transposable element. 19.60 19.60 19.60 19.60 19.10 19.40 hmmbuild -o /dev/null HMM SEED 828 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.48 0.70 -13.45 0.70 -6.49 2 813 2009-01-15 18:05:59 2003-04-07 12:59:11 9 20 6 0 486 813 0 267.70 29 75.06 CHANGED MRTaQGhpWDWsPEDF+hlVQRVLNLsSVEASLIPQtlLPLCsDP-RAsILTIM.tVGAStERA.+GHDGAGGSRRGEQSTPGGGRASGsRDtGPGuSRPADARGKRKQtGTPsPSPPRGGGAVRASSRRPEGAsPTSQPEGERKKKRhpKMGtsp.utGs..p..thph...................................h.shSEIPSRPSRHSKSGpSEAE-sAsAEARRREuDRREAADRLREAEEAAQ-AsRsRQAEEsAREEA.RhRpAtEusRE.cAu.RtcpAh............sp.uPsPossttsTTStAstDEAAGs.LGPsPSGDA.DpPusGshP-SGTSIGGPSRAAsoPRRL..hPShAPLsAEPLLQALAAsNTTVLDGLSAQhEsLQAERAELDAAWARVEEGRRSVEAMVEhGRKAHRRHsSELEsR++sLAEIA+EVEEERtsALIATTVhNEspDsLRLQYGShtAELcKKLDAApGVLDAAAAREpRAAEsEAASRpRE.ALEARAhALEERApshERDhADREAAVsIREATLAAHEAACAEEE.ALRLREDsLTERtRuLEtAEuAAQpLADSL.LREAApEEQARRNLEGARAcRAALsQRsAELEARAKELDARA+SGGAAsG-uDLAARLAAAEHoIA-LQsALDSSAGcVEALRLAGEVGPGMLhDAVSRLDpAGRQsGLWtGRssKYAANQGGLAQRLScMAGsLQRLPEELEcTIKSSSRDLApGAVELVLASYQARDPsFSPWhAL-EFPPGTEDusRAQVRDAAspIVcSF-GoAPRLsFA.sSDEEGsssGAsDuDDEAGDPGAS ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 1 1 1 +2061 PF04134 DUF393 Protein of unknown function, DUF393 Mifsud W anon COG3011 Family Members of this family have two highly conserved cysteine residues near their N-terminus. The function of these proteins is unknown. 25.60 25.60 25.60 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -11.11 0.71 -2.97 156 1577 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 1266 0 502 1346 1808 108.70 23 68.72 CHANGED llaDGsCsLCstts..phltct.....Dp.t........t...plpassl....psststphh...........................t..s.ls..hpphpsh........lh....tsp...........................hh.ps....scAhhpl.hptlsh...hthlthhhh..lPth..ltchhY.phlAcpRaphhup ...............lhaDGsCshCsshsphlhpp......Dt.t.........t..phpassl..........psptstp..h...............................................................tt.s.ls......hpphpsh..........lhh...pssp.............................haht....ssA.hhpl..hp.....t..lst.......hphl.s.....hh.hh....h.....Phh....ltchsY.chhApsRhch.............................. 0 152 315 430 +2063 PF04143 Sulf_transp DUF395; Sulphur transport Bateman A anon Pfam-B_2577 (release 7.3) Family This is an integral membrane protein. It is predicted to have a function in the transport of sulphur-containing molecules [1]. It contains several conserved glycines and an invariant cysteine that is probably an important functional residue. 22.00 22.00 22.10 22.00 21.60 21.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.05 0.72 -3.93 106 5952 2009-09-11 12:44:05 2003-04-07 12:59:11 9 11 2209 0 1539 4246 1254 43.90 30 23.66 CHANGED hhthllG.GhlhGhGhtluuuCssushhssssshsh.uhlshhuh .............tsllG.GllhGhGhsLAGGCssGshhsussshsh..uhlshls....................... 0 487 974 1295 +2064 PF04148 Erv26 DUF396; Transmembrane adaptor Erv26 Wood V, Finn RD anon Pfam-B_22900 (release 7.3); Family Erv26 is an integral membrane protein that is packed into COPII vesicles and cycles between the ER and Golgi compartments. It directs pro-alkaline phosphatase into endoplasmic reticulum-derived COPII transport vesicles [1]. 22.20 22.20 22.80 22.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.64 0.70 -4.96 19 254 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 225 0 176 237 4 186.10 38 87.32 CHANGED M.lL.LLuYlGsllGFsFLTLuIASGLYYlSELVEEHo..s+RhLpRhIYslIslhlLLhlhDpFPapLolhSIsoahVYhpNL.+cFPhlpLouPhFllSClLVllNHaLWFcaFsp..........s...s.php.....Y.....t....hsoFsEVsSaFulCVWlVPFALFVSLSAuDNVLPotspptsstt.............................s..t...+p+spGLhKsllsslRchl .....................M.hl.llualuhhlthhFlTLul..................A......uGLYYLuELlEEaTshs+RllphhIahhhsl.hlhLhlh-pFPhh.hsh.hulhopllYht.L..ppFPalp.L..ouP..FlhSClLV.llNHaLhFpa.Fsp...............................................h.sFsE.lhuaFslClWllPFAlFVSLSAs..-NVLPohspt.s.................................................+t+t.Ghh.slhs.h+t..h.......................................................... 0 59 97 141 +2065 PF04149 DUF397 Domain of unknown function (DUF397) Bateman A anon Pfam-B_3066 (release 7.3) Domain The function of this family is unknown. It has been suggested that some members of this family are regulators of transcription. In particular, it is thought that this may regulator of antibiotic production in Streptomyces coelicolor [1]. 19.70 19.70 19.80 20.00 18.90 19.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.82 0.72 -4.39 107 1426 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 87 0 637 1558 2 53.40 37 77.67 CHANGED pspW+KSShS...........................sssusCVEV.A.......................ssss......s.lu.VRDSK.......c.P.sGPsLsassstWsuFlsus+s ...........................................t.htWpKSSaS...........ssuusCVEl.A.................t.hss.............s.lslRDSK.......s.s...sGP....sLhhssstWsuFlsusp................ 0 143 532 615 +2066 PF04187 DUF399 Protein of unknown function, DUF399 Mifsud W anon COG3016 Family No function is known for any member of this family. 27.70 27.70 27.90 27.70 27.40 27.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.51 0.70 -4.84 84 689 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 634 1 222 600 105 207.30 26 61.87 CHANGED slshpplhtpltsscllllGEhH-ss..scHtlQhpllpsL......tp.....ps.phuluhEMhspcpQshl-pahsu..........phsppplhpthta.p.sWs...ashYpPllphAtppplPllAuNls+shs+plsctG...httlstt.ccthlss.ts..h.s...ssshcphh....tphh.....suH.tt..............................................................hsps...ht.................thhpsQ..thhDpsMApslhphh.........sttllllsGshHs...cpshGlPtpltc ................t.hohpplhtclt.s..AcVlllG..EhH..sps..tcHthQ.htllps...Lt........pp...............ph..phsluhEMhspspQshlD....ph.hps............plspp.pLhpth..h.p..s.....Ws........aptYpPllph.Ahppph..slluuN.ls+phl.pplhp.G....htth.........ppp...h...l.ss.hs......h.s...ssshcphh.t.hh.....tu.Hpts......................................................................................stp....hp.............phhtsQ..hhtDcsMAcsllpth..................tphllllAGshHs...ppshGlshplt........................................................................................................ 0 76 146 189 +2069 PF04165 DUF401 Protein of unknown function (DUF401) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Members if this family are predicted to have 10 transmembrane regions. 24.00 24.00 24.00 24.10 23.70 23.90 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.41 0.70 -5.45 4 109 2012-10-02 15:12:49 2003-04-07 12:59:11 7 2 97 0 54 238 36 372.40 24 97.56 CHANGED slolLLh.FsVllVLIhl.KVNIGluIFlGuhhLAhLotLGlsGhLhoLh..us..pWsplhlIlIIuFlhGhopha..hGhLc+hppuhhclFs..KaShhhlPALIGLMPMPuGALlSAPMlcslspcaplsPEhtTllNYWFRHlWEh.WPhY.AlVlsSAllGlshtclSlphFPlsllhhhlGalFFstshccshpssRN....Lh..lhslYPIllllllSVlltLDhLhG.hluhLSl.l.NhhRlp.L+ElLcRsFp.+.IlhLLhAVMhaKsVIEsSGls-ulsthhlSaplPshllLhlhPhllGlhTGIohAYVuhshPLLpsFFou....hcplsLsYhuGYhGllhSPVHLClVhSAEYatsEltKlY++hLLPullhhllGhlhlhlI ................................................................h...hhhhh.hl.llhhh....h.h....+.+.ltl..u...h.uhh....hu.u.h..ll..shhh......h....s.h....pthh..h.shh....ss....phthlt.hl..l...h...l...hh.l..hshh.ph....Ghlcphhp..ulptlhsp...+hshs..h..hP..AhIGLLP.PGGAlhSAP.hVcp.ssp.ch.tlssc...cpshlNYWFRHlaEhhhPlYsulllsusls.s..lsltph.lhth.hPhs........lhhhlhG..a.l...hh.hp..........h...h.......pp........t.......t.....pt..ps...................................hh..hl..hs.hhPllssl....h........l.....s............l........h...h.............t...........l....s............h.h.........h...u..h.h...h.s.l...h....s....h..h.lht......hh.php.l.h.phl...ppuhp...h+....ll...hhl....h.hlhhFpphl.p....h...oG..sh....ptlsthhht.hs.lPh....hll..hhhls....FlhGlhTG....h....s.....uhl.ulshPl..lhsh..h..s.u.......hshh..s...lsahhuahGhhl.SPsH.l.CLllos-YFcschhplh+...h..l.h.hs.h.llhhh..hshh.....hhh........................................ 0 28 44 48 +2070 PF04167 DUF402 Protein of unknown function (DUF402) Kerrison ND, Finn RD anon COG2306 Family Family member FomD is a predicted protein from a fosfomycin biosynthesis gene cluster in Streptomyces wedmorensis [1]. Its function is unknown. 21.50 21.50 22.10 21.50 21.30 19.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.26 0.72 -4.15 76 1865 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1388 4 316 897 10 69.90 40 36.02 CHANGED hcttchhlphh..tsphaslthhh.cs.psphhshYlNlssssthppp....slchlDL.LDlh....l..hsss.....phpllDtDEh ..................................sc....csslsaFa..pc..haasl.....hsMh..cc.......s....slpY..YsNluo...Phhh.-cc............sl+alDa-LDlp.............V..hPsG.......chcLL.DtDEa....... 0 95 196 271 +2071 PF04168 Alpha-E DUF403; A predicted alpha-helical domain with a conserved ER motif. Kerrison ND, Finn RD, Iyer LM, Abhiman S, Burroughs AM, Aravind L anon COG2307 Family An uncharacterized alpha helical domain containing a highly conserved ER motif and typically found as a tandem duplication. Contextual analysis suggests that it functions in a distinct peptide synthesis/modification system comprising of a transglutaminase, a peptidase of the NTN-hydrolase superfamily, an active and inactive circularly permuted ATP-grasp domains and a transglutaminase fused N-terminal to a circularly permuted COOH-NH2 ligase domain [1]. 19.00 19.00 19.30 19.30 18.90 18.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.91 0.70 -5.05 180 1295 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 831 0 486 1208 743 297.00 26 60.07 CHANGED MLSRsA-sLaWhuRYlERAEssARllc.ssh......phshst.....ssss...ppht.slltssGstssattp...........t.thstpslhpalhh..DpssPuS..lhuslpsARpNA+tlRstlos-hWcslNphahplpphttpth..........................sppshh..phhchl.pppsthhpGhhpsoMhRs-uacFhplGphlERADpTuRlL-l+atsh.......................ss.h.-thpWpslL+usuuhpsYRphY.psshpstsls-hLlL-sphPRSltaslpplpppLppLt....tt..st...spspchhsp.lpspLphs.slcclh...................ttuLcpaLsphhpplsplustIsppYh ....................................hhSRsA-sLaWhuRYhERAEsh.ARllcssh.....phthts..........tsss.........ptht....sll....ts.hG..h...sshtpt...............h.......t...hs..tp.s..l...lphlhh......DtssPuSlhuslpsARpNA+slRstlos-hWcslN.s...hh..h..t.....l.pp.h.t.p.p.s.............................................ttt.shh....phhphl.tpps.thhpGhhtsohhRs-uapFhplGphlERu-t...ssRlLssphthh.........................................t..hcthtapslLcsssuhtsY+phY.p.s.t.hp...stsls-hLlLctphPRSlhaslpplpppLpp.L................t.thsstspchhsplt....s....pLphh..shpplh.........................ttsLpphLsph.tphtplustlsppah.................................................................. 1 137 300 400 +2074 PF04175 DUF406 Protein of unknown function (DUF406) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Members of this family appear to be found only in gamma proteobacteria. The function of this protein family is undetermined. Solution of the structures of the two members of this family investigated bear some resemblance to that of the single domain enzyme pterin-4a-carbinolamine dehydratase, PDC. Although the residues of PCDs involved in binding of metabolite are not conserved in the two structures under study, they do correspond to a surface-region structurally aligned with residues that are highly conserved, eg Glu 89, suggesting that this region is also involved in binding of a ligand, thereby possibly constituting a catalytic site of a yet uncharacterised enzyme specific for gamma proteobacteria. 25.00 25.00 25.60 32.60 22.50 22.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.10 0.72 -3.52 51 1127 2009-11-21 14:42:31 2003-04-07 12:59:11 7 1 756 3 94 348 5 92.60 50 97.42 CHANGED p.p..pps......-pCssCt..sssDlGslIDssDsshplshshsspttAcsthsphsp+A+ssp.....splpsplsss.-suhpLphsFsFpCpAEphIFQLphR .............Mp...sss...DcsssC...CshDlGTlhDNsDCTupaSRhFAoRtEAEptLstLs.E+A+uVp..oEsspls.phs-..p.sGVcLDhcFTFuCpAEhlIFpLuLR..... 0 8 27 63 +2075 PF04174 CP_ATPgrasp_1 DUF407; A circularly permuted ATPgrasp Kerrison ND, Finn RD,Iyer LM, Abhiman S, Burroughs AM, Aravind L anon COG2308 Family An ATP-grasp family that is present both as catalytically active and inactive versions. Contextual analysis suggests that it functions in a distinct peptide synthesis/modification system that additionally contains a transglutaminase, an NTN-hydrolase, the Alpha-E domain, and a transglutaminase fused N-terminal to a circularly permuted COOH-NH2 ligase. The inactive forms are often fused N-terminal to the Alpha-E domain [1]. 100.00 100.00 101.20 100.50 99.90 99.40 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.93 0.70 -5.82 28 1353 2012-10-10 13:17:03 2003-04-07 12:59:11 8 8 856 1 522 1302 873 327.40 42 53.87 CHANGED PtlIsusEWstlEpGlhQRscsLsthLsDlYucpcllps....GllPsplVhusssatcthhGlpsPtshalHlhGhDlsRss-GpahVLEDNscsPSGluYsLENRphhpRhhPclapph.....plpslssahptLhcsLpshusss...sPp.lVlLTPGsaNusYF.......EHuaLA+phGl.LVEGcDLhlcDs+Vah+Th.pGhp.pVDVlYRRlDD.DaLDPLthpsDShLGVsGLhpAhRtGsVsls.NAhGoGls-DKulhsal.Pphhchh....LGEc.hLss..VsTahCsps.schcalLspLccLVlKsstssG.....GhuhllGsphop.tptphhpchhtts..salAQ ....................................PRlIsusEWppl-pGltQRlcALNtaLsDlY.s.....c.Q...........cIl+s....GllPscllhusst...a...h....t.....hh..Gl...p.....s...P........s.s..l.......a.....hHlsGhDLlR.s.s....-....G....p....ahVLEDNhRsPSGlS..YhLENRchhtR.haP-L...Fpph.........plps.l.s.s.Y....sptL........h.psL......ps..........h...............uP.................s..................s.....................ts...........Pp..lVlLTPG.haNS.A.YF.......EHuaLAcp.h......G......lpLVEGpDLhV.c.D.s.p.VahR.....Ts.pG..h.c..pVDVlYRRlDD.s..FLDPLsFR.s..DS..hL..GV...s...GLlpAhRuGsVslu.NAhGoGluDDKulYsal..PchlcaY......LG.....E.c....sl....LsN.....VsTahCtcs..sphpaVLspLs....c....LVlKssps..uG.........Ga..Gh..l.lGPp......soptphtpht.t.+.l.hspPt.saIAQ...................................... 0 150 328 435 +2076 PF04181 RPAP2_Rtr1 DUF408; Rtr1/RPAP2 family Wood V, Finn RD, Bateman A anon Pfam-B_22202 (release 7.3); Family This family includes the human RPAP2 (RNAP II associated polypeptide) protein and the yeast Rtr1 protein [1]. It has been suggested that this family of proteins are regulators of core RNA polymerase II function [1]. 20.20 20.20 20.70 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.14 0.72 -3.89 45 383 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 326 0 228 335 3 76.20 28 20.32 CHANGED hsphlo.spYc-llpER...sls.phCGYs.lCspshtph......pppaplssp............spplaphp..c.t..........................................paCSptChpt.StahpsQLsp ..........h...hhp.spYp.-llpER....sl..phCGYs.LCscshpp.....................+tpa+lstt..................ppplhshs..c..t.....................................................................................................................paCSptChcp.upahthQl....................................................................... 0 65 117 185 +2077 PF04188 Mannosyl_trans2 DUF409; Mannosyltransferase (PIG-V)) Wood V, Finn RD, Mistry J anon Pfam-B_9248 (release 7.3); Family This is a family of eukaryotic ER membrane proteins that are involved in the synthesis of glycosylphosphatidylinositol (GPI), a glycolipid that anchors many proteins to the eukaryotic cell surface. Proteins in this family are involved in transferring the second mannose in the biosynthetic pathway of GPI [1] [2]. 22.40 22.40 22.50 22.50 22.00 22.30 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.93 0.70 -5.57 4 631 2012-10-03 03:08:05 2003-04-07 12:59:11 8 10 512 0 342 606 30 277.80 20 72.07 CHANGED Rpt..lhhatlhsRhlhLhLshLa..hhhshsputshssss......s.sthlsshhp+hLhs.hlpWDulaFlc..hucsG...aEppaAF.sLaPhhlplhs.phhsslhsLLultushh.shh.ls.hlhahlAshhLaplsp.lhpspchohhsullFChoPAulFhouhYSEuLaAhFoFsGlhph.pup........shsushhFuhush.hRSNGlhsssahshsthttha..uLhpLphshhhhphhsuhhLpshhlhlPFhh.QYYu.YppFC.st.........................s.sWCptplPL........lYsaIQchYWs...VGFLKYaphpplPNFLhAsPslIlllauhhhahp.........G.phsphppp.t.......................................................alVhsuhhlhhusFhMHVQVLsRhhS.uhPlhYWahAchlh.s.........Kpp.hpshu..............hthhhhWhs.............hYhlLtslLausFLP.s ........................................................hhh....................................................................................................................h.t.hhpWDs.aahp............lA..p....p.............G....Y..........................h...........t............t...........p........h...........A...F.......hP.hhP...hh.lp.......hh.......s..............................................h..........h......t.........................h.........h...s..h..h..l.s.....hh.........hhu..s...hhLapl...s.t...h......h.......................t.......t.........s......h.....h.............s.....sh.La.hh....sP.u.s.l.Fh.s.ssYoEu..............h......Fshhshhuhhhh..tsp................................................h.h.h.us.l...h....h....u...l..Ash..hR..ssGl...h.h...s...h...hhh......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 123 207 294 +2078 PF04190 DUF410 Protein of unknown function (DUF410) Wood V, Finn RD anon Pfam-B_12495 (release 7.3); Family This family of proteins is from Caenorhabditis elegans and has no known function. The protein has some GO references indicating that the protein has a positive regulation of growth rate and is involved in nematode larval development. 19.50 19.50 20.00 19.80 19.20 19.40 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.73 0.70 -4.84 5 337 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 275 7 236 317 2 239.10 26 79.21 CHANGED sppK+a--AlELlasGAppFFcccQpuSAADLshhlLEsLEK..........AElssuspplssLAcllstLsPuEPE......RcshVsRslcWS.....osucGKaGcPsLHpllAppLlctcplppA++HFLLusDsSupAphhLlEYpps+spcuEs...DhFlucAVLQaLsLcNhsoAhsoFTpYTc+ahc..tPphEp.....hchsaPacpPLLNFLalLlhsl-sKcpusFpsLsppYpspLKRD.uapuYLs+IGpLYFGI+PspspSs..uLGGLhSuLLu ...........................................................................................................tpppaspAh-llhpGAhhhhpt.s..Q.....t..s....SuuDLu...h.hll.-shpp....................................sph..ps..s..pt.....h.....t.cLhp..lhphh.....s...sppsp............................+ppalppslpWS........................ph.u.p..h..ph.Gc..P..cLHphluphh.......h............c..............-...........p..............p.............h...........h...c........Ac..hHhlhu.........scsps...hs........hl.............ht.p.s...t.s....-s...............shahucAVL.aLhltN...hpsAptshptat..p....p.......t..t..ht..........................t......s...P...L...L....NFl.hLLh.s.lpp.....t....p.......h.........s...h....Fp..LpppYtsp...l...p..c..s.s..app.hLptIGphaFs..h.....sp.p.t..s.....hhshhushh................................................................................................................... 1 77 131 196 +2079 PF04214 DUF411 Protein of unknown function, DUF Mifsud W anon COG3019 Family The function of the members of this bacterial protein family is unknown. Some members may be involved in conferring cation resistance. 21.40 21.40 21.40 24.40 20.30 19.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.50 0.72 -4.47 109 858 2009-09-11 13:47:38 2003-04-07 12:59:11 8 4 685 0 253 667 461 70.00 47 46.02 CHANGED pVpspsss..-hssl...KpchGlP.splsSCHTAll..s..GYslEGHVPAssIp+LLp-+.Pp.shGLAVPGMPhGS..PGM .............t.Vpshpss..shssl...KpchGls.spLtSCHTAll.s...GYllEGHVPAssIc+L.Lpp+.....Pp..shGLAVPGMPhGS..PGM...... 0 45 144 209 +2080 PF04217 DUF412 Protein of unknown function, DUF412 Mifsud W anon COG3092 Family This family consists of bacterial uncharacterised proteins. 20.70 20.70 20.90 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.85 0.71 -4.38 21 797 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 793 0 92 296 13 139.50 64 94.36 CHANGED shh.phlpcGQpYMKoWPhcKpLushFPEpRVl+AT+aAl+hMPslAllolshQhhh...shphhP.pAlshALFhlSLPlQGLaWLG+RupTPLPsoLhsWap-lppKLtptGhshpslpu+PsYp-LAplLKpAFcpLDcsah-- .....saF.olF+RGQHY.KTWPh...EKRLAPVFsENRVIKhTRaAIRFMPPlAVFTLsWQIAL.............GGQLGP...AVATALFALSLPMQGLWWLGKRSlTPLPPulLsWFYE....VRuKLpEuGQs..........L.A..PVEGK.PcYQuLADsLKRAFKQLDKTFLD.D.......................... 1 8 29 62 +2081 PF04219 DUF413 Protein of unknown function, DUF Mifsud W anon COG3085 Family \N 25.00 25.00 32.30 32.10 24.00 22.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.09 0.72 -4.24 23 807 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 791 0 100 247 13 91.50 65 80.88 CHANGED +FaDspNFPRGFuRSGDFTlpEAplLEpaGpshpuLtsGphpPpsc-EppFltsspGppssso.hE+sWhKYhchspp+p+FaoLsGst+..ss ........RaFDNKHYPRGFSRHGDFTIKEAQLLERHGaAFN-LDLGKREPVTEEEKhFVAVCRGEREPVT-tERVWSKYhTRI+RPKRFHTLSGGKP...Qs-............... 0 12 37 70 +2082 PF04220 YihI DUF414; Der GTPase activator (YihI) Mifsud W anon COG3078 Family YihI activates the GTPase activity of Der, a 50S ribosomal subunit stability factor [1]. The stimulation is specific to Der as YihI does not stimulate the GTPase activity of Era or ObgE. The interaction of YihI with Der requires only the C-terminal 78 amino acids of YihI [1]. A yihI deletion mutant is viable and shows a shorter lag period, but the same post-lag growth rate as a wild-type strain. yihI is expressed during the lag period. Overexpression of yihI inhibits cell growth and biogenesis of the 50S ribosomal subunit [1]. YihI is an unusual, highly hydrophilic protein with an uneven distribution of charged residues, resulting in an N-terminal region with high pI and a C-terminal region with low pI [1]. 25.00 25.00 51.50 34.20 20.10 19.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -11.16 0.71 -4.92 30 794 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 788 0 94 348 14 164.80 55 94.86 CHANGED RKusp.sssp...hssps++psRtpl-spuRpcK+cKK++GhcoGSRpstsptp..pp.psssppKDPRlGSKKPVsLhVpttstsp.........Php.....pps.............+LosEQELtpLENDtpLNpLLDpL-sGcsLussDQpaVDcpLDRI-pLMpcLGIp----s............ptps-DDLLcpFE .....................tss.......tsK...utuKsR.RKTR-EL-tEAR-RK..R..pKK++GpAsGSRsuuGsss..usucs.Qst.KD..PRIGSKpPIPLs..VsE.psstp+.p.......PKs.......cKP.......................hLSPptEL-hLEsDERLDALL-RLEAGETLSAE-QuWVDsKLDRIDELMpcLGLohDDDE........E--..................E-EKQEDhhRL..h................... 0 10 31 64 +2083 PF04222 DUF416 Protein of unknown function (DUF416) Mifsud W anon COG3068 Family This is a bacterial protein family of unknown function. Proteins in this family adopt an alpha helical structure. Genome context analysis has suggested a high probability of a functional association with histidine kinases, which implicates proteins in this family to play a role in signalling (information from TOPSAN 2Q9R). 22.30 22.30 22.60 22.40 20.20 22.20 hmmbuild --amino -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.98 0.71 -5.16 48 828 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 821 2 104 339 19 190.50 67 96.99 CHANGED sshapRLcpLcsWQplsFhsuLCERMaPNYpLFs-hophu-.sphh+slLsLlWEhLssK.su.KlNF-pQLEKL.EphlPsss-.aDhYG...VYPAhDAChALusLLp........uhlstcsh.-cslplSplShsTVAsalE...spsscEls.--p.......lcppthhppEh-lQhplhchL..p-sppR.ch-lIcsL+p-lpps.GlSNIGIsl .............................NPIHLRLE+LESWQHlTFMACLCERMYPNYAhFCpQTtFGD..upIYRRILDLIWETLTVK.DA.KVNFDSQLEK.h..EEAIPuADD.aDLYG...VYPAIDACVALSELlH........SRLSGETL..EHAl-VSKsSIoTVAMLEM..........TQAG..R.EMoDEE........LK-NPAVEpEWDIQ.WEIFRLL..AE.CEER.DIELIKGLRuDLREA.G.SNIGIt.h................ 0 16 40 74 +2084 PF04224 DUF417 Protein of unknown function, DUF417 Mifsud W anon COG3059 Family This family of uncharacterised proteins appears to be restricted to proteobacteria. 22.10 22.10 22.40 24.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.14 0.71 -4.90 6 870 2012-10-02 13:32:46 2003-04-07 12:59:11 7 2 767 0 99 439 11 168.40 46 92.16 CHANGED h+hhchlshtschslsllRLulhIlFhWlGshKassaEA-uIpPhVuNSPahSahYch.p......................................ssYssS.hLGllEsIlulhlLlGhhpstsGllGGllshshslVTLSFLhTTP-sahst............P.LuGssphVlKDlLhlAuulhlhthstpchL ....................................hclluptsclGlsLlRl.uIsIVFhWIGhLKFssYEA-uIsPFVANSPhMSFhYca.t...p.h.pc..puE............................c.thtWpptNsTYuhSsuLGllElIlulLlLss...h..s.h..lGLlGGlhAhshslVTLSFLITTPE..sWVssLGss.............paGFPaL.S.G.A.GRLVLKDhlhLAGAlhlhu-uA+cl............... 0 23 57 78 +2085 PF04235 DUF418 Protein of unknown function (DUF418) Kerrison ND, Finn RD anon COG2311 Family Probable integral membrane protein. 20.80 20.80 21.00 21.00 20.60 20.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.94 0.71 -4.57 138 1711 2012-10-02 17:00:17 2003-04-07 12:59:11 7 6 1135 0 378 1373 248 158.90 28 41.74 CHANGED hs+pshhp.tsppptphhpphhhhs..lslulshtlhhs.....................hhtshhhhhhthhuuhh.huhsYluhlhhlhpptth.ht.....h....hpslsssGRMALTNYlhQSllsshlF.huaGLGLhsplshh....thhhlslslahlQlhhSphWL+pF+hGPlEal....WRplTatp ........................................................................hh+pshhp..sp.p..hp...hh+..+hth..lh...lsluls.lsh.th..................h.h...s.ht.hss.hh.h....ph.......h.....p...luus...h.......huluY.suhh..hhhh.phpt..hp..............l.....ltsls.slGRMALTNYlhQ.........ollss..hl.F...................a.t.hu..L...hh...p.......h....s....hh....th.lhhslsl....ah.h.pllaSslWL.+.h.a.+.pGPlEWL....WRplThh.t...................................... 2 131 275 337 +2086 PF04237 YjbR DUF419; YjbR Anantharaman V, Kerrison ND, Finn RD anon COG2315 Family YjbR has a CyaY-like fold [1] 25.20 25.20 25.40 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.31 0.72 -3.64 196 3031 2012-10-09 12:12:44 2003-04-07 12:59:11 8 8 2138 7 527 1873 137 96.50 28 68.23 CHANGED shuhPsspcs..hsa........st....hsa+V...............u......KhFAh....hsttt............................lslK..sssppt.thLhpp..sshhsu..+hs+..pp.Wlsl.h...l......t.slspsplpchlccSapLl ...................................................shth..tsp.s...h.a.......s...ph...ssh+l..........ss..KhFAhltphptp.........................................hlsLK..s..s..P..-..hs..thLppp......ss.l.h.P.u.a..Hh.NK......pH.WloVh..........l........sssls.c.splhcLlscSaplh.......................... 0 174 335 445 +2087 PF04238 DUF420 Protein of unknown function (DUF420) Kerrison ND, Finn RD anon COG2322 Family Predicted membrane protein with four transmembrane helices. 28.50 28.50 28.60 29.00 28.40 28.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.73 0.71 -4.21 60 636 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 597 0 165 381 289 133.90 42 79.22 CHANGED lshLPhlsAslNuloslhLlhGhhhI..+ptphphH+phMlsAhshoslFLlhYlshphh.sssos......FG..............Gtu.........hl+s.lYahlLIoHIlLuslslPLsLholhhuhpp.................phspH+KluRhThPlWlYVulTGVlVYlMlh .........l.sILPplsssh.sloslhlshGhhhI...........h.++plptH+shML..uAhshuLhFhlhYh..o.hp....hh......hssT.s.........FG...............Gsu..........I+h.hYh.hhLhh.HIhLAslsssLuLhsllhuaps..................phshHRKlu.ashsIWhhsulTGVh.VYLhl................ 0 65 126 155 +2088 PF04239 DUF421 Protein of unknown function (DUF421) Kerrison ND, Finn RD anon COG2323 Family YDFR family 27.40 27.40 28.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.02 0.72 -4.48 196 3034 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 1816 4 627 2119 40 94.30 26 48.77 CHANGED ccllpGcPhlllcsGclhpcsl..++p+lohs-LhttLR.pp.ul.hsls-VchAlLEssGplSVlh+s.p..tsht.....................................t.t.h.t.h.....................hpcl ..............chlcGcPsllIcsGclhhcpl..pptphots..-lhhpLR.pp..G.l...h.......pl....ppVchA.lLEsNGploVhhtt.c......h...............................................................................hhh.................................................... 0 243 460 537 +2089 PF04240 DUF422 Protein of unknown function (DUF422) Kerrison ND, Finn RD anon COG2324 Family Predicted to be an integral membrane protein. 25.00 25.00 27.00 27.00 20.20 20.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.57 0.70 -4.93 13 245 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 225 0 128 250 252 208.50 23 75.40 CHANGED uFs.hhsLhGlhsMthshlAuhl...phl.stRuhuhllshsuhuhulELLGspTGaPaGcatYsssLGPhluGhVPhslPluaFsLslsuYLLs..hshLu.csppphl+hhts....uhhlsshDlVLDPuhs..ulsFWsW.ssGsFa.GsPhQNaAGWlLoGs.luhslhsluashsulptchp....ssshhLsshVuhhhhhsslsLhhGhhlPstlulhLGluhl ...............................................................h..........hhhhhhhhhsshh....hph.G..h..tp...s.h.hh.hs..hsh....s.luhs...sEhlGl..p.TG..a........PFG.pYpY.s...s...s.L...G....pl.h.G.VPlhlsluW...hhlshsuahlu.............thhl....t............................t...t......t....h.h.p.hhhs...................uhhhsshDlhlDP.shs......sh.s.aW......h......W....p..s..............s.............G........s............aa....G...lPlpNahGWhlsuh....lh..h.hl..h.p.h..h....ht.h...t....t..h..................h..h................hh....hh.h............................................hh................................................................ 0 43 103 124 +2090 PF04242 DUF424 Protein of unknown function (DUF424) Kerrison ND, Finn RD anon COG2412 Family This is a family of uncharacterised proteins. 25.00 25.00 27.10 34.40 24.90 18.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.59 0.72 -3.94 44 146 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 142 4 72 141 21 91.50 26 84.16 CHANGED apsss..hhluhs-cchhGppapctp..hhlplscuFassphs...t.ppslptLpcs.........slsNlhGpcslshult.Ghhc.ssVhhlsuss+splspl .......pspsp.hlluhCDc-llGcpaccsp..lhlplsct..FYsschs...p.-pshptLpcA.........slsNllGpcsVthAlchGhlc.ssVlhlsuss+AQlhph... 0 19 46 61 +2091 PF04248 DUF427 Domain of unknown function (DUF427) Kerrison ND, Finn RD anon COG2343 Family \N 20.70 20.70 22.80 20.70 20.10 20.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.09 0.72 -4.32 99 1160 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 605 5 492 1041 207 92.70 32 63.77 CHANGED lpVhhsGtllA-opcslhlhE...sshsPsaYlP.pDlph.s....hLp..o.sppohCPaKGpAs.Yas.l.....ssts..tpsAAWsYsp.Phss.sstIpsalAF.as.stl ...............................hplhhsGtllA-..o..p..p...s.lhlhE...sshss.....h.......aY.lP.sD.lp.h.p..........h.Lp.t.o..sp.p..ohCPaKGpAs.Yas.l..hs...sspt..................hpsAAWsY.p.P.hst.....s.....tt.....I....tsalAFass........................................... 0 132 295 403 +2092 PF04250 DUF429 Protein of unknown function (DUF429) Kerrison ND anon COG2410 Family \N 20.20 20.20 20.30 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.54 0.70 -5.07 41 405 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 389 0 180 381 182 209.30 20 83.95 CHANGED GlDhs....+suhlAlh..........ptsphhphthh..s.scllshhtst.....tllulDhPluL...............tss............tshRssDpthR+h..ht.....plF..Psp.shhtp.................................lotpuhplh..............tphthclhEsHPpsshptl.....ssstt.........t...tsh..RhthLtt.h....................th..shtpcDllDAhssAloAth.h.hpGps......hplsst.st...hspts .......................................................GlDhu.....p.shlush..............tssph.h...thth...ht...s......tpllshhpsh...........shlulDuPlsl............st.................................................sutRss-pthpch......ht.........t.ssa.....sspt..shhsp..........................................................h.s.puhp.lt.........h.t....h...t........h...........t..ptstplh..EsaPc.su.h.h.sl.............ttt.t...............ctp....tps.h...t...t....hh....p...hLhphh................................h.....t..th.tp......htts-D....h.lDAhlsAh.sAhh...h.....s.h........h..............t................................................................................................ 0 61 121 150 +2093 PF01861 DUF43 Protein of unknown function DUF43 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes archaebacterial proteins of unknown function. All the members are 350-400 amino acids long. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.51 0.70 -5.39 6 163 2012-10-10 17:06:42 2003-04-07 12:59:11 11 3 152 1 80 301 87 239.00 33 61.06 CHANGED -lLc+FpEIAK-RPcslppYDQGaVTPEoTluRVtLhasRGDLcGK-llVlG.DDDLTulAhuLTshPK+lsVlDIDERLIcFIc+sAcchGls.lEshsaDLRpPLPEchh++FDsFlTDPPETlhul+sFlGRGIusLKGtGsAGYFGlT++EuSlcKWtEIQRhLl.-hGsVITDlIcsFNhY.NWsYhppTRAhphlPlK+......cPEc.WYpSshaRIEsLc....s..+th-EElscsEclYpD-Euos .........................................hhcchpplscsRP..pshtphD.QuhsTsETsltR.sh..l..h..tp+G.DLpGKcll.sl..G..DD.DLsS.......lA.l..u.......L...........o..........s.......h........s.....p........c....lsV..lDID.-RllcaI.pchAc...c...hsl.....s...lcsh..pa...DlRp.sLP.c...chh..sp..FDsFh...TDPP..........Tl....p...Gl.cLF..lu..R.G.l..p.sL.+.s.p...G...s......s.u...Yh....u.hoc.cc..s...o.hpcah..plQ.+hLl.chGl...llp-Il.sFNpY....shs...h...l.tp......hh.......h..h.p...................p..ahh....hhphh........................................................................................................................................ 0 26 46 66 +2094 PF04254 DUF432 Protein of unknown function (DUF432) Kerrison ND, Finn RD anon COG2430 Family Archaeal protein of unknown function. 20.70 20.70 20.80 20.70 20.40 20.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.52 0.71 -4.20 17 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 63 0 53 71 5 120.30 27 51.26 CHANGED hPlstst.thsphlhl+hcpPlllsPtsshphalchPl-luVhhsst..t..thlDhhshs..+tKYsLYGsspsGllsRYhcoplhsc.Pps......pulh+lhlpNpssphspls+lVFshhshphYY ...........Ph.h.t..hsphlhlchpcPlhlsPtsphphalphPl-luVhl.sst..................hlDhh..sls..+.KYsLYGs.s.....s.....sGsls..RYac..Sphhs.....p.Pcs....h...pulhcltlpNssschhpls+llhshhshplYY..... 0 19 33 41 +2095 PF04256 DUF434 Protein of unknown function (DUF434) Kerrison ND anon COG2454 Family \N 20.40 20.40 22.00 21.10 20.30 19.00 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.72 0.72 -4.59 19 102 2009-09-10 18:22:19 2003-04-07 12:59:11 7 1 102 0 64 100 2 57.20 35 25.61 CHANGED tpLpcAhcDlpaLLNRGYs+csuLchVus+YpLstcpRhhLtRslho-c....cltt.h+pKh .....hLpcAhpDlpaLLsRGYsc+suLphVuN+YpLspcpRhhLhRslhScp....plpt.h+pK........ 0 27 40 53 +2096 PF04258 Peptidase_A22B DUF435; Signal peptide peptidase Bateman A, Studholme DJ anon Bateman A Family The members of this family are membrane proteins. In some proteins this region is found associated with Pfam:PF02225. This family corresponds with Merops subfamily A22B, the type example of which is signal peptide peptidase. There is a sequence-similarity relationship with Pfam:PF01080. 20.60 20.60 20.90 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.10 0.70 -4.97 14 885 2012-10-02 13:41:03 2003-04-07 12:59:11 8 15 291 2 585 836 37 274.40 29 63.39 CHANGED pspppstshssptAhhaslsuoshLlhLahhachht..............................................................llhshFslsush.....................hahlhsshhph.p...............phsttphph.....................hphphshtplhshhhslslslh.alh+p..+..WlhpsllGluhslsslphl+LsshKsuslLLssLFhYDIFWVF........uosVMVsVApu.h-s........................PhhLhhPph............ssssaShLGlGDIllPGlhlAhshRaDh.t..................ptppps.YFhsohluYslGLllThluhplhc.pAQPALLYLVPssLhshlllAhh+s-L+phWs .......................................................................................................................................................................................................................................................................t......phs..pA.hh.hslhuushLhh.L.ahh.hp.h............................................................................................................................................................................l.lhh.ha.hlhuhh................................u.hh.h..s.hh..........................................................................................t...tph.h...........................................h.php..h..sh..hpll..shhh.sh.hh..sl...h.....h....l...hpp...p.........W.l.hpshl....Gluhs..l..sh.........l....p.h..l+.L.......s.sh+s....us.lLLss..LFl.Y.D...l..Fa.VF.............sssV.MVpVApu..hcs...........................P..h..hL..h.hPp..............................sstsa.uh....LGlGDIl..l..P..GlhluhshR.a.Dh.h.................................................................................................tt.p.tps..YF..hssh.............lu..YhlGLlhThhs...h..t..lh...p.tu.Q...PALLYLVPssL.hs.hhhAhh...+s-lpthat.................................................................. 0 194 304 452 +2097 PF04260 DUF436 Protein of unknown function (DUF436) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of bacterial proteins with undetermined function. 25.00 25.00 30.10 30.00 21.20 20.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.83 0.71 -4.77 34 978 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 969 3 107 502 4 171.00 54 92.89 CHANGED pphpphlpEllctusLcpGplhVlGCSTSEVhGt+IGpsuSh-lupslhpslhphhpcpGlaLAsQuCEHLNRALVVERpsAcphshEhVoVlPsh.+AGGuhustAacphp-PV.VEpI..pAcAGlDIGDThIGMHlKhVtVPlRsuh+pIGpAHVTshpoRPKLIGGsRAhY ....c.phppllc-lh-..pusLppG..slFVlGsSoSEVlGt+IGpsuShEluEhIhpslhplhc......ppGIpLAhQGCEHlNRALVVERplAp.p.hsh..E..lVoVlPsl.HAGGShtstAFctMpDPV.VEaI..pApA..GlDIGDThIGMHlKHVpVPlRssl+plGpAHVThhsSRPKLIGGsRAcY...... 0 37 64 88 +2098 PF04266 ASCH DUF437; ASCH domain Kerrison ND, Finn RD anon COG2411 Domain The ASCH domain adopts a beta-barrel fold similar to the Pfam:PF01472 domain [1]. It is thought to function as an RNA-binding domain during coactivation, RNA-processing and possibly during prokaryotic translation regulation [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.51 0.72 -3.65 85 2885 2012-10-02 17:37:24 2003-04-07 12:59:11 9 30 1955 15 561 1820 99 108.40 21 70.36 CHANGED hp.........htsp.............htshllpGpKssp......hRhtspsh.....h.uphhll..hp................................spsphhshlclpsV...phh....papcl........spptuh.....-s..............sh..hhp.h.ph.....att............phhlss.p..Fchlt .....................................thhpc.............htshll.sG.pKThT.............h.+..ps..-s..h.p.......h..Gph.hl.l..hp...........................................................................tp.sp.hs...hlclp.slphh.....phspl............stp...hAh.....tEs....................oLt...hp.ph.htch........a.t.................p..hhh.cat................................................................................................................ 0 151 287 429 +2099 PF04282 DUF438 Family of unknown function (DUF438) Kerrison ND anon COG2461 Family \N 25.00 25.00 25.10 36.30 23.60 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.47 0.72 -4.18 27 563 2009-09-11 06:07:37 2003-04-07 12:59:11 8 11 557 0 100 390 1 72.00 48 16.66 CHANGED hLKcllhcLHpGtss--lKccFppllsslsstEIshhEQpLlp.-G.lsscElp+LCDlHAslF+sulpphtp .........lL+-ILhcLHsG..u.osEoVp-cFstpFsGVSAlEIShhE+ELMs...sG.lshEDVhcLCDVHAsLFKsAIcslc.s....................... 0 38 61 76 +2100 PF04283 CheF-arch DUF439; Chemotaxis signal transduction system protein F from archaea Kerrison ND anon COG2469 Family This is a family of proteins that are archaea-specific components of the bacterial-like chemotaxis signal transduction system of archaea. In H. salinarum, the CheF proteins interact with the chemotaxis proteins CheY, CheD and CheC2 as well as the flagella-accessory proteins FlaCE and FlaD, and are essential for any tactic response. CheF probably functions at the interface between the bacterial-like chemotaxis signal transduction system and the archaeal flagellar apparatus. 25.00 25.00 56.20 55.60 24.60 24.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.18 0.70 -5.14 14 80 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 50 0 56 83 0 221.30 22 74.64 CHANGED ptltph.uphh.hspsshc..pspWpcucllLopcRlVlus.tptph....slslsplpDlssp.s.t........hsthpshsulphppc.....shllsssss.......t.ppFtphlFpsllstptVhlpcsAhhGGsV.p-upWE+Gplplsccslphshsstp..pIsl..............sslusl-tcp+plsGcp+.VLplcHscc.spslsoalh.ssp+p.hplLcthlp.pht..........-......phptth- .................hlsch.Gphh..hsptshp..tscWpss+llLopcRlllss.pst+t....slslsplpDlsschs.pt......hhs.hsshsslphtps.....shllsstss........hppFtphlapslLs..tptlhlpaPAlhGGsV.p-spWEcGplplscpslph......shsssphhslsl..............sslssl-hpc+pl....sG.cc+sVLclc..Hhcs..sp..oV....soalt.sspcp.lplLpphlc.phh................c.............t............................... 0 7 33 47 +2101 PF04269 DUF440 Protein of unknown function, DUF440 Mifsud W anon COG3099 Family This family consists of uncharacterised bacterial proteins. 21.50 21.50 23.30 58.30 20.80 17.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.30 0.72 -4.00 9 735 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 728 1 72 234 7 105.90 69 97.44 CHANGED M...p.ho.D-sl-hAYDIFLEhAs-NL-PsDIhLFsLQFE-RGusEhs-suDcWpccVGhpl-s-taAEVhlGLss.Ep-Eh-DlFARhLlSRch-c+hsHllWKc .MDLN.NRLTEDETLEQAYDIFLELAuDNLDPADllLFNLQFEERGGAELFDPAEDWQEHVDFDLN..PDFFAEVVIGLAD..oEcsEINDVFARlLLCREKDHKLCHIlW+E...... 0 4 20 46 +2102 PF04284 DUF441 Protein of unknown function (DUF441) Kerrison ND anon COG2707 Family Predicted to be an integral membrane protein. 22.20 22.20 26.70 26.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.79 0.71 -4.30 35 1132 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1100 0 140 468 0 136.80 52 91.76 CHANGED LhLllLlllGllu+NpSlsIAssVLLll+hhs.lsph.hPhlpp+GlshGlhllTlulLsPIAoGcIshcsLhpuhhShtuhlAlssGllVAhLuucGVsLlssp..PplssuLllGTIlGVuhhpGlsVGPLIAAGIshlll ...............LlLLsLhhLGhlu+NsolslulhVLlll+lTP.Ls..........sa............FPalE+pGlslGIlILTIGVhsPIASGpls.ssLlcSFhs..aKullAIulGlhVuWLuG+GVsLMusQ..PpllsGLLlGTlLGVAL..F+GVPVGPLIAAGlluLll...................... 0 41 78 108 +2103 PF04273 DUF442 Putative phosphatase (DUF442) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain Although this domain is uncharacterised it seems likely that it performs a phosphatase function. 24.60 24.60 24.60 24.90 24.40 24.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.29 0.72 -4.20 5 718 2012-10-02 20:12:17 2003-04-07 12:59:11 8 8 547 8 211 636 313 108.40 33 46.58 CHANGED -hRclo-cLSVSPQlss-DlAshAcpGF+olINNRPDGEEPuQPuNAAlpAAAcAAGLuYsalPVhsGsITs-sVcuFpcAlAsA-GPVLAaCRSGTRulsLYALuQAlc .....................................hplscphslu.s.Ql.sssDl.t.p.l.u.p.tGa+olIsN.R.PDsE.t.s.s.QP.s.h.tp.l.pps.AcptGl.s.a.h.alP.V.s..u..s.p.l.o.p.p.sV.......ppF.t...ph.ls..p...h..t..t...P.VLA.aCRoGs.Ru...ss...Lasltp...t....................... 0 62 127 168 +2104 PF04276 DUF443 Protein of unknown function (DUF443) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of uncharacterised proteins. 26.20 26.20 26.40 26.40 26.10 25.80 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.36 0.71 -4.88 22 1105 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 227 0 32 406 0 179.30 47 93.80 CHANGED Ns+YRIlchssEYhhlDl.ssoahshhFPhlsWllP+chhKIopcEh.EpLphs+ss.........Kspsh.hsshGu...ulLlushlRthhphhslphpphlshhlshlshlhllhhalhls++h+hplas.s....ppscpKlhlhP.ohKphhhhlFsYlhhhuholhslhhhl..pspNlIhalshhhhh.hhhhhlNhhoIss.ppspVhh+ .............................................NPKYRlI+YssEYLMlDl..lS.oWlshFhPhINWhIPK+YsKIS..ccEa..EsL...Nl...VKPs.........KspsF.....WPlsGu......ol.LhulhhR....K..Yhhlh...slpL.-Kp.lVIhlChlshlG...ll..hFalhLN+K.LpLplas.s....+spppKl..l..LlP..ohKsh..shh.lFsY.l..hhGuhShhhl.hLlohs.QNIIlalsW.lhhh.hhF.FhlNhssIhs.Kpl+Vlh................................. 0 13 24 31 +2105 PF04285 DUF444 Protein of unknown function (DUF444) Kerrison ND anon COG2718 Family Bacterial protein of unknown function. One family member (Swiss:Q97LI1) is predicted to contain a von Willebrand factor (vWF) type A domain (Smart:VWA). 20.30 20.30 20.40 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.35 0.70 -5.84 8 1564 2012-10-10 16:07:06 2003-04-07 12:59:11 7 3 1281 0 389 1028 179 338.70 43 98.30 CHANGED MuhFlDRclNs..+cKohssRQRah++h+.EpIKculsDtVsc.cSIsshsutEslpIPtRulcEsph+.Gc.sGcpc+VtsGsc.....c.psGDhIsRssuGsG.tGsGcGpuuuDsEG.......EDtFphplSp-Ehh-lLFEDLpLPNLp++phsplsp.h+sc+AG.hpssGlsuNIshsRTlpsuluRctuhs+upp..hcu..tpl...t...s.tlhct.......tht.hcs+hcRlP.lcs.DLRa+paccpPcPpSpAVhhClMDVSGSMspscK-lA+RFFhlLphFLpp+YEsVElVFIpHHTpA+EVsEc-FFappEoGGTIlSSAL+hhpElIcERYssAcWNIYuhpASDGDNas-DosRClclLppclhthsphauYsEIs....hpsHps.........hhcYcthpsshDs.FthppI+stsDlaPVh+plFp+Eps ................................................................................t.hIDRR.Ns..+sKShsNRQRFl+Rh+.tpIKpuls-tlsc.cSl....hsh....p....s....s..-p.lsIPh..cslsEP...h...F...+p...Gp.sGhp.c.pV.tP..GNs................cahpsDpltR..PtuGu...............u....GsGpGpu.u.t-.GEG.......pD...pF.FplSp-Eah-hhFEDLtLPNLpppp..pp.lsp.h+.s.cRuG.hpssGs..Pu..NIslhRolpsuluRRhAhsts..t.hpth..........tt.l......h.......t....................t.....t................h..t..l..t..l.ct+htp...lP.aIDshDLRa+sapcpPp.Ps.SpAVMFClMDVSGSMsptpK-hAKRFahLLYlFLpR..pYc.s.V-lVaIRHH.TpA.KEV.s...E.c-.FFau.....pEoGGTlVSSAL+LhpEllc...........pR.YssspW.NIYuAQASDGDNW.s.s.Dosh.CtclLtpcllshspaauYlEls.........ptHps...........WhpYpplp.t.t.h.ss..FuhpplcptpDIaPlFR-LFp+p..u.......................................... 0 133 240 315 +2106 PF04286 DUF445 Protein of unknown function (DUF445) Kerrison ND anon COG2733 Family Predicted to be a membrane protein. 33.80 33.80 33.90 33.80 33.60 33.70 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.28 0.70 -4.93 106 2067 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1694 0 414 1545 89 326.60 24 87.11 CHANGED sEAAhVGulADWFAVsALFR+P.....lG.l.IP.....HTulIP+p+-clucsLuphVtsphLsscs...ltp+lpphshsptlspaLt..........ps.....spsptl........................sptss.phltthlctlpcpclpphlcptht...............................................pplpph.hushhuplLp.hht-scpptlhDhll......pphtphlpsscsp..ltphlpchhpph.sthh.....................stplsptlhpth.phlcclpt-sp+.hRpphsptltphlscLtps.th..pch-plKpphlsc.thpphhps...lhpplcphlhpthps.s........hlcpplsphhtthsp.........................................pLtp-..splpppls........................chlpptsttllsp.ttplspllp-slcpa-scclpctIEhtlG+DLQaIRlNGollGGllGlll.aslshll ....................................................h.uuhlGulssWhAlphLF..R...P..........h.h....h.lP........p.s..........ulIP+p+-clucsluphVpcchLssps...lhtt....l........cpt..p....sthltphhp............................p...ptsppl.................................................................stphh.phhpt..h..lph.h...t..s...t..p.....lpp.hlpp..t.hp......................................................................................................................................ptlpp..h.hs.t.h.shh.......ppphp..hh-h...lh..............tphh.t.h.ltp.pp.sp..ltp.l.ph.h.pp..sh......................................tthsphh.pt.......phhs.cl.....pt..c........t..+..h.c.....p.t.hs.p.h.h....hl.pp.Lh.p..............tchc.thKphhh.p.c.....t..h.tphhtp....................lhtp.l..pphl.h.s.thsp.p.................hhpp..ltp.hht.h.hp...........................................................................................................................plhtc......stlttplp....................................................................thlpptstplh.p.........h...l..sphlp-pl..psa.Dscchpc.Ip..hs+-LphIch.GsllGuhIGlh.hhls.h.................................................................................................................................................................................................................................................................................................................................................. 0 158 296 367 +2107 PF04287 DUF446 tRNA pseudouridine synthase C Mifsud W anon COG3098 Family This family is suggested to be the catalytic domain of tRNA pseudouridine synthase C by association. The structure has been solved for one member, as PDB:2HGK, which by inference is designated in this way. 25.00 25.00 25.80 25.70 19.70 19.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.97 0.72 -10.29 0.72 -4.20 56 892 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 888 1 128 441 33 100.00 49 91.89 CHANGED cp.p.ltptLtpLEttL+phsLWpspsPsscAhsSspPFulDThshppWLQalFlPRMptLl-sstPL.PpphulsPhhEcshp..pp...sphptLlshLpplDpLl ..........c.spV+.pLpsLEshLRcpphW..ps...st..Ppsct....FsSspPF.hhDTMcPhEWLQWVhIPRM+sLL-sspPL.PsuFAlAPYaEh..ALs....-+...Pppth..l..L..A.LpcLDsLh...................... 0 20 49 93 +2108 PF04289 DUF447 Protein of unknown function (DUF447) Kerrison ND, Finn RD anon COG2457 Family Archaeal protein of unknown function. 24.80 24.80 25.20 24.80 24.50 24.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.99 0.71 -4.71 45 190 2012-10-02 11:35:36 2003-04-07 12:59:11 7 1 185 12 120 206 54 176.30 24 88.66 CHANGED hEsllTTtssp....NhAPlGllh...cucs....hhl+lFcsS+ThcNltpsshhslslssDshlaspsshsth..p.tht.sh......h....hLcsuhsahthclpph..........pssspthhhphpslctthtp.....sh...NRupsullEusVhsTRl...ph.....hptcclhpclphhttllcKsGGspE..pcAhchlpc ............................................EsllTThs.p.....phAPlGlhh...................pssp.........lhlphacsopThcNltp..pshsslshs.sDshlFstushs....tthp..h......th...........hLpsuhuahph...clpph..........psssp.h.hhphcslctthpp.....htsa...NRApsAVlEusVhsoRLph........hst-clhpcltahtts.lcKsuGtpE..pc.Ahphl..t............................................. 0 30 74 101 +2109 PF04296 DUF448 Protein of unknown function (DUF448) Kerrison ND anon COG2740 Family \N 21.60 21.60 21.60 22.70 21.20 20.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.83 0.72 -4.35 155 2403 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 2395 1 477 1345 326 77.80 31 69.80 CHANGED .RpClsspc.hts+ppLlRlVhss-....u....p.lshD...pt+hsGRGAYl.ssstp..slcpAh.....++..+thsRuh+..ss.............ls..ss..lhct...lpphl .............RpClsspc.h......tsK..+-LlRlV..t..s.p-...G.......................p..lhhD...soGKtsGRGAYl.shc.....p.slppAt.......++.......+shs+uhchp............lscp....hhcpL.t..h.............................. 0 172 342 420 +2110 PF01863 DUF45 Protein of unknown function DUF45 Enright A, Ouzounis C, Bateman A anon Enright A Family This protein has no known function. Members are found in some archaebacteria, as well as Helicobacter pylori. The proteins are 190-240 amino acids long, with the C terminus being the most conserved region, containing three conserved histidines. This motif is similar to that found in Zinc proteases, suggesting that this family may also be proteases. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.57 0.71 -4.53 82 3481 2012-10-03 04:41:15 2003-04-07 12:59:11 12 7 2755 0 820 2754 698 165.60 22 80.80 CHANGED +plplplps.ssl.plssPhthsppplpphlpc.....+tsWltpphtchpp............tthhsspth.h..hGcpapLphhtsp.....................hhh.thp.pthpptlpcah+cpspt.hlppt....ltthspthsl...ph.tphpl+sh+oR.WGSC..ss...psplplshcLlhhP.pll-YVllHELsHLh.chNHutcFWpllsphhPs.acct+phLcpt ..................................................................................................................................................................................................................................................................................................................................................t.h......th.h..t.h.t......h..h......h.....h...............h....t...h...................ht.....l.p...p...pp.p..WGoC....pu......c....s.....pIpl...s..h.t.l..h..t....s.P...hl-Y.....ll...lHELsHLt...c..h....s.........HsctFaplspphh.P.p..a+p.hct.Lp...................................... 1 265 553 699 +2111 PF04313 HSDR_N DUF450; Type I restriction enzyme R protein N terminus (HSDR_N) Kerrison ND, Finn RD, Yeats C anon COG2810 Family This family consists of a number of N terminal regions found in type I restriction enzyme R (HSDR) proteins. Restriction and modification (R/M) systems are found in a wide variety of prokaryotes and are thought to protect the host bacterium from the uptake of foreign DNA [1]. Type I restriction and modification systems are encoded by three genes: hsdR, hsdM, and hsdS. The three polypeptides, HsdR, HsdM, and HsdS, often assemble to give an enzyme (R2M2S1) that modifies hemimethylated DNA and restricts unmethylated DNA [2]. 21.10 20.50 21.10 20.50 21.00 20.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.44 0.71 -4.56 77 3819 2012-10-11 20:44:43 2003-04-07 12:59:11 9 43 2584 8 831 3899 642 175.90 19 18.43 CHANGED thsEpthcp...hl..hl...hGaph.................ht................................................................................................................................................................................+sDlslhlN.......................GlPlsllEhKp..................................t.pt......hppLapYs...........................................thhlhoNGpphthhsthsptpp .......................................................................................................................................E..h.t...h...h.....t......s.at................................................tt....h..t....h.h..t.h.t.p................................................tt.......................t......t..h..h....t....h..h.......t.....h.......................t.......h...h.............hs.h...p..p......t..p................................................................................................Np....h....p..l..spphphp................................sp.ppp+.hDlll.h.l..N.......................G.l.Pls...hlElKp..................s...sl.ppAh.p.......Q..hp.p.Yp.pp...................hppl.a.pah....................................................tlhhh.os.shp.shhhst.t....p.............................................................. 2 293 576 721 +2113 PF04301 DUF452 Protein of unknown function (DUF452) Kerrison ND anon COG2830 Family \N 19.50 19.50 23.70 23.10 18.50 18.10 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.33 0.70 -5.04 2 358 2012-10-03 11:45:05 2003-04-07 12:59:11 8 2 355 0 43 228 1 204.30 37 95.06 CHANGED McTpahspQGspLIlYFAGWGTPPssVpHLILPENaDLhlCYDYpDLphDhDFSAYpHIRlVAWSMGVWsAERshQGh.LhSATAlNGTGLPCDDpaGIPpslFtGTLpsLsEssRhKFERRhCGsKs.hcDYQpashRP.htEIHtELhALashltQDRRTDLIpWopAlVGStDKIF.stNQ+tYWpsRCslpEIsstHhLFs+FTHWpsh .................................p.p.p..u..p.c.LIlhFuGWuoss.shhs...HL..h.......s.....p......s.....+..-l.l.l.s.YDYcsLs.h.......c....F....D........h..s.....u...a...p..c.IpLlAaSMGV.....as.A....sR..l.L..........p..p....l.....p.......h...p..p..t..h....A.....INGTshP.hD.cphGIssuIFctTLcs.hstp...sht+Fc+pht.t-.+p..h.pca.p.ph.s.t.+s.hc-l+pELptL...a.t.h...ht.p..c.....p..p...p..s......h.l.h..W..spsh........luppDcIFPsss..+pha.p..p...h...h......l.....l..-.t.sHahF.+FppWp............................ 0 13 26 37 +2114 PF04303 PrpF DUF453; PrpF protein Kerrison ND, Bateman A anon COG2828 Family PrpF is a protein found in the 2-methylcitrate pathway. It is structurally similar to DAP epimerase and proline racemase. This protein is likely to acts to isomerise trans-aconitate to cis-aconitate [1]. 20.20 20.20 20.80 20.80 19.30 20.10 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.34 0.70 -5.72 6 1338 2012-10-03 03:02:41 2003-04-07 12:59:11 8 10 1077 9 378 1097 393 341.90 45 96.08 CHANGED .l+IPsThMRGGTSKGVFF+hp-LPtss....stRDtlLLcVhGSPDPh..QIDGhGGGsSsTSKVsIlutSSpPstDVDYLFGQVuI-cthVDaoGNCGNlSAAVGPFAIcAGLV.sA+hP...lstVRIapsNhuKhIlAcVPlssGpVp.sG-hplDGVshsuA.ltLsFhDsAssss..GtlFPTGN.lDsL-sst.G.lpsThIssuhPhlhV-A-ulGhsGTELtEElNuD.phLA+hEpLRshuAh+MGhluclc-ts.p.tTPKlAhVusPppYhsSuGthhtus-IDlhVRhhSMt+hH+AhhsTuAVAIuoAsAl.GTlsshhAGust.hssVphuHPSGsLcVtscscp.pt..sh.pAthsRoARhLMEGaVhlP ..................................h+IPsshMRGGTS+GsFhhhpD...L...Pp.st....stRDtlLhtlh...GS..P.......D..sh......QIDGhGGuss.TSKssIlS............+.....................S.s...............c........s..c.....tDV.............DYLFuQVslc.c.sh.VDhosNCGN.h.uu..VGsFAIcsGLl...sup.P....................lspV....RIhpsNh.GphI.AcV..h.........s.....s...G....t..........V....p.......p.....G-.h...clDGVsh.s.AA.VtLpF.lss...Ausps.......Gp......hFPTGN...hl.D.l.......-......s......................................l..psThIssuhPslhlsAps..L......G..h......o.........G.h........E...L.........s...p.ls..uD.tt.hLu....+....hEsIR..htuuhtM......G.lsc......l......s.p.h.s.........hPKhshlus..s...p.....p.uG.........sls..VRh...h....tphH+AhhhTu..AlAIuo.........A....ssl..GT.ls.p.......h...................s.................s.........u.......s...............s.........h..............s....s..................lphtHPSG..sLcVt..hp...s..c...t..tsu........ssh+A..sh.RoARhlhcG.VhlP........................................... 0 82 190 304 +2115 PF04304 DUF454 Protein of unknown function (DUF454) Kerrison ND anon COG2832 Family Predicted membrane protein. 21.60 21.60 21.80 21.80 21.40 21.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.41 0.72 -3.95 112 2157 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 2047 0 323 1155 310 69.50 27 54.83 CHANGED hapWLlsH.haGshlcsWpppculsh+uKhhAlhhhhhshshu.hhhssh.hasphhlhshhh.hlthalhph ...FasWLltpphaGs.al...csapc.p.+.uhshpsKhpullhhhlshulS.....la...hs...sh....hhl.+l...hLhllhh..hlhhahap............... 1 79 164 253 +2116 PF04305 DUF455 Protein of unknown function (DUF455) Kerrison ND anon COG2833 Family \N 25.60 25.60 25.70 27.00 25.00 24.70 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.50 0.70 -5.11 81 772 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 692 0 260 668 146 239.50 38 83.29 CHANGED LtssDspcKsthspphtpta..tp.tph........shh..sth.........s......tPuRPtcPpLlsPpc..l.s+R.phso.cGRhslLHAlAH..IEhsAI-LAhDhlhRF....................................tsh.....Pps.......FapDWlcVAs-EAcHFsLLpp+L.pplGtpYGDhPAHsGLWcsuccTspDlhuRhAlVPhsLEARGLDsoPhhhp+lppsGDp....pusplL-lIhcDEIsH...................VuhGs+WF+alCp..pcsh-.........Phps..ap.pLl..ppahtstl+sPFNtcARtpAGhsps.ht. ...............................................................................pss.ppKsthspphht.thtstp.hth................t......th..........t....hPuRP..t..p..sp.Llt.Ppp...l....+R.phso.cup.ssllHAlAH..IEhNA..IsL.ALDsshRF.........................pshPtp.......FatDWlcVAs-Eu+HFpLLps+..L..ppLG............h..cYGDhP...........uH..su......L...........Wph...sp..pTtpDlhsRhAlVPpsLEARGLDssPhlhtKlppsGDp.....tssslL.-...lIhpDElsH...................VuhGs+W.a+alCp........pp.th.-...........................Ph..th....FppLl...pth.h..h..shh..+s...Ph..NhpARtpAGFspp.hp.s........................................................................ 0 81 174 222 +2117 PF04306 DUF456 Protein of unknown function (DUF456) Kerrison ND anon COG2839 Family This family is a putative membrane protein that contains glycine zipper motifs [1]. 27.60 27.60 27.60 31.00 27.30 27.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.90 0.71 -4.15 85 842 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 833 0 221 624 147 138.90 30 85.95 CHANGED hllPlLPGs.lllasGhllasas.suhs....huhhhlslhsllsl.lshlhDalssshGs++hGuo+huhh....GuslGsllGhFlhs...Ph.......Gll....lGPhlGAhluEl.hppc.........s...h......ppAh+suhGuhlGhlsussh+hslslhhlshFlhsl.h ..............lhPllPGs.hllasGhllatas.....hsht....huhshhlshsllsl.lhhssDalusthts++hGuSKhu.h....uuhlGsllGhFhhP....Ph............Gll....lsPFlus....als....EL...lptp..........sh......ppAh+sulGoll.GhlsuolschhlthhhlhhFhhsh.h..... 0 75 163 208 +2118 PF04307 DUF457 Predicted membrane-bound metal-dependent hydrolase (DUF457) Waterfield DI, Finn RD, Bateman A anon COG1988 Domain Family of predicted membrane-bound metal-dependent hydrolases, based on Swiss:Q97LP7. May act as phospholipases. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.30 0.71 -4.89 181 3256 2012-10-01 21:01:47 2003-04-07 12:59:11 9 8 1920 0 845 2084 420 171.40 23 77.02 CHANGED shoH.h.....hhGhsluhsh.ht................................htshhhhhhusluuhl...PD.lDhh.........thh.sptht.h............................................................................................HR.uh.THSllhhhlhuh......................................................lhhhhhthh........................hhhhhhhhhlGh...hsHllh.Dh..hTs.....h.Gst...lhaPh...................pptphths...hhhhhss........hhth...............................................................................................hhhhhhlshthhhtthh ............................................................................................................................oH.h.....hhuls.huhhh.h.t..h..................................ht.hthhh.sul.lsu.hL.......PD..lDph...............shh..spththl..t...................................................................................................................................H.R...G.....h.TH..S........l.l..hs.hll..sh.....................................................................................lhhhhhhth...................................h...hph.h.h.h.h.h.luh....loHllh.Dh.....hTs........h..G...lt........hLaPh..................................phph.ths.........hhh..h.hss...........hth...........................................................................................hh...............hhhhh....................................................................................................................................................... 0 248 566 724 +2119 PF04308 DUF458 Protein of unknown function (DUF458) Waterfield DI, Finn RD anon COG1978 Family Family of uncharacterised eubacterial proteins. 25.00 25.00 26.50 67.80 22.10 18.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.64 0.71 -4.47 19 224 2012-10-03 01:22:09 2003-04-07 12:59:11 7 1 219 0 67 155 174 148.90 44 90.53 CHANGED ccshpcIhpaltpsspssY+lhlGTDSQst.spTpFVTAIllHR....hGKGA.taaappphp++lpS....LRQ+IahETshSlElA........scltchltttsh...tshslElHlDIGt..pGcT+-LIpElVGhlhG..GapscIKP-SYuASslAD+aTK ...............p..VhpcIpsFlcc.D.PcshY..+LsIGTDSQs+..pc..sTcFlTAIhIHR....lGKGA.thha+pphpc+.to....LREKIahETphS.ElA.........pplh-lLt.hst....sshhhEIHLDIGs.....cGhTK-hIp-hsuhIpu....MG.hpAKIKPDuYAA.ShANRaTK.. 0 42 56 61 +2120 PF04311 DUF459 Protein of unknown function (DUF459) Kerrison ND anon COG2845 Family Putative periplasmic protein. 20.20 20.20 22.20 21.30 18.50 19.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.94 0.70 -5.59 2 172 2012-10-02 11:02:24 2003-04-07 12:59:11 8 2 171 0 55 291 5 282.60 32 67.88 CHANGED Psl.Ip+tps..h.ht.h.Phh.st.QhsIsphhP.hR.shl..sG.h.tstht-s..hs.thu.pPAstlstpssus..hlp-shhs.P.tlt.s...sE+.sssss.hhupsshp.hhsGDsp.p.hotshhp.htpp....thtItpspVs..hhshs.ha+hP+hh.shLshps....hAA..huhthsDh.s.FhDtsGuhssstssh.tpshc.+hcsslNl.ht.ph.hsahhthP.h+.htLst.hshh.tst.t...V.A.hssttshhhs..h..llsssthshuotlhssts.hhts.thsG.+hsIEGt...h.ph.PhhhsDhphs .............................................................t.t.tspsssss............................................t..t.sss...+hhll...hGDhhustlu-G...L..ptsaspsPs...lhl..s.p..ssuso....GhVR...cDhh...sWsttl.......hhtt.....pp.ss.....lll....hlG...uN...D.RQ..shhs...s..s.s..p.......hpsp.ospWpp.p.Yp+Rltths.chl...s...cp...+...hP...l....lWVGhP...sF+sp..th.op..D..h.Lsh.N.pl.YRsuA..p.+s.Gu.pasDlWD.GF....VD..EpG.p..F.s.po.GsDhsGQpsRLRusDGls.hTps....G+RK..LA..aYsE+Pl....pchL.u.h.....s...tp.ht....l..sssp..............ph.s...spsh.hs....ss...L..ts.stc..u..s...Lh.G....s.s.st.........t..h..ps......ctp...........hhhtDh........................................................................................ 0 13 27 35 +2121 PF01864 DUF46 Putative integral membrane protein DUF46 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein has no known function. It contains several predicted transmembrane regions, suggesting it is an integral membrane protein. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.22 0.71 -4.39 4 235 2012-10-02 14:06:56 2003-04-07 12:59:11 12 3 221 0 122 719 285 165.20 30 94.67 CHANGED hhsll..LtslWalLPAYhANsSullhGGGTPlDhGKsahDGRRllGDGhTWRGhhuGlhsGsllGllQhhL............Golh.tlhLuFLLuhGAlhGDhsGSFIKRRLsh-RGpPAslLDQLsFlluALhhu..YP..ltslPh-hIlllhlITshlHhuuNIIAY+LGhK-V.W ..................................................hh.............hhPshhsNsss.h.l....h.......u........u.......tpPlDh.G+ph..........h.DG+..R..l....h...Gs...uKTa+Ghhs....ul....hh.Gs.lhu..h....l...s....h...l.....................................................................s...h.h...........l.hhu..hh..luhu.AhlG......DlhuSFlKRRlsl...p........c....G....t..................s................h...l....DQl.D.h.l.l.u.u....l..lhh......hh...........h..h.s..l..s....h.t...........h..lh.l..ll.ls..shl..HhssNhluYhltlKp..a.......................................... 1 34 75 100 +2122 PF04312 DUF460 Protein of unknown function (DUF460) Kerrison ND, Finn RD anon COG2433 Family Archaeal protein of unknown function. 24.30 24.30 24.50 89.00 23.30 24.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.45 0.71 -4.65 30 115 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 115 0 80 116 4 136.10 41 22.10 CHANGED +sh+utDVpVclcshh+c+lcFhsh.......st+.cchlIVGlDPGhoTGlAllsL-GclltlhSpRshspu-llchlhphG+PllVATDVsPsP-sVcKlupshsAsLYsPpccLol-EKtcLscchu.......phcssHERDALAAA ................p..h+utDVpVclcsh.ccplcFhsh.........tt+.+chlIVGlDPGhTsGlAllDLcGclltlhSpRshspu-llchI.chG+PllVAoDVsPs.PcsVcKlupsFsAhlasPccsLsl-EKpclscchu........hcssHpRDALAAA. 0 21 50 65 +2123 PF04314 DUF461 Protein of unknown function (DUF461) Kerrison ND anon COG2847 Family Putative membrane or periplasmic protein. 22.20 22.20 23.30 22.30 22.00 22.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.19 0.72 -4.40 175 1433 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1205 6 437 1186 538 110.60 30 62.80 CHANGED lpcuasRs...............sss..ss.............................p......suAua.hslpNpuspss.pLluss.os.sAppsElHpphh.c...sGhMpMpplp.slslPAssslpL.....pPG....G.hHlMLhsLpps.l.ptG-plslTLpF..ccu......sp....lplp ..................................................................lpssasRs.....ss.s..st..............................................t......suuua..hslp..N.pu.........s.pss.pLluss..o.......s..susp..sElHphhh..p..........susM.+Mppls...slslPAss.slpL.....cPG.......G..hHlMLhsLcps..l.ptG-plslTL..pF..csu....tplpl....................... 0 102 259 351 +2124 PF04315 DUF462 Protein of unknown function, DUF462 Mifsud W anon COG3101 Family This family consists of bacterial proteins of uncharacterised function. 21.00 21.00 22.20 21.60 20.90 20.00 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.10 0.71 -4.87 44 863 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 858 0 104 403 45 164.50 66 89.42 CHANGED lFNpsFtppaNTpLVpGs-EPlYlPA.......ssp.......sap+IlFAHGaauSALHEIAHWClAGpcRRhL.DaGYWYpPDGRstppQspFEpVElKPQAlEWlhuhAAGhpFpVSsDNLsGs.psD....ppsFpppVtpQlhpal..p....................pG.lPtRAttFhcALppaYpss..ls.ppF.ht .............IFNsCFu-.-FNTRLlKGDDEPIYLPA.......Dsc......VPYpRIVF.AHGFYASAlHEISHWCIAGcsRRcLVDFGYWYCPDGRDAQTQopFEcVEVKPQAl-WLFCVAAGaPFNVSCDNL-GD.hEPD...........RlsFQR+VHAQVhsYL....p....................pG..IPcRPA+FI+ALQsaYcTP.pLsAEpFs.hs........................... 0 22 47 79 +2125 PF04317 DUF463 YcjX-like family, DUF463 Mifsud W anon COG3106 Family These proteins possess a P-loop motif. 25.00 25.00 26.90 26.90 20.10 19.90 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.57 0.70 -5.91 51 1031 2012-10-05 12:31:08 2003-04-07 12:59:11 7 2 996 0 159 638 61 429.50 59 95.04 CHANGED lRLuVTGLSRuGKTsFITuLVspLLcs............u+LPlapusppGRlhuschtPQPD.sVPRFsYEstlsuLhsss..PpWPsSTRslSELRLul+Ypspsultphhss.uTLaLDIVDYPGEWLLDLPLLcpsatpWSppphshhps.tRtplApsaLsthpslD.sutsD..EtphpplAcsaTsYLpss+tpp.GhphlpPGRFLLPG-.LcGuPsLsFhPLsh.s........ppsscsShhshhc+RY-tY+ppVVKPFa+-HFuRhDRQlVLVDsLsALNtG.pAhpDhcpALsplhpuF+hG+s................ohLscLF.uPRIDKlLFAATKADHlpc-QHspLhuLlppL...lpcutppApFpGscscshAlAulRATppuhVppsGcplsslpGp.hts..........scpsslaPG-lPpch................sssshWppts.FpFhpFpPstl......ssstslPHIRLD+sLpFLLGD+L .......LRLAVTGLSRSGKTAFITuhVNQLLshps.........s.......uRLPLhu...As...REpRLLG.....VKRlPQpDhulPRFsYDEuLspLh.....u.....sP..PsWPs..PTRGVSEIRLALRa+os.cu.LLRHh+.-.TuTLYL-IVDYPGEWLLDLPhLs.QDYhoWScQhtuLLpG.p.RuEhuspWhthsc..s....LD..PhAs..AD..EspLAcIAsuaTDYL+pCK...pp.GLHaIQPGRF..VLPGD.hAG.APALQFFPhPclss.t.....thhupAcKposhuML+cRapaYppKVVKuFYKsHFhRFDRQIVLVDCLQPLNuGPpAFsDMRhALoQLMpSF+YGp.R................oLh+RLF..SPhIDKLLFAATKADHVThDQ..HsNhVSLLQQL...lQ..-AWQpAAFEGIsMDCluLASVpATpoGhlc.hs.Gc+lPAl+GsRLsD..........GtslTlYPGEVPuRL.........................Pu..tsFWp..p.Q.G.FpF-uFRPpsh......clDcPLPHIRLDuALEFLIGDKL................................................................ 0 25 67 110 +2126 PF04327 DUF464 Protein of unknown function (DUF464) Kerrison ND anon COG2868 Family \N 20.40 20.40 22.10 22.10 18.80 18.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.51 0.72 -3.78 58 1433 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1398 14 197 712 9 103.00 33 94.18 CHANGED MI+lphp+sp.splhuFploGHAs...........hu......chGpDI.....VCAuVSulshsslNul-plt.phcsphph....psGaLplcl.s.....ppppsQllLcshhluLpsltppYs.ca.....lpl ...................MIpsshp..csc...tG....p....ltuhphsGHAs.....................hu...-a..Gp.Dl.....VCAuVSslshssl.Nulppls...sh...c...s...p...lcht....cuGalpl..cl..sss...................pp......cps...QlllcshhluL.ps.lp..c..p..Ys.-alp................. 0 90 142 168 +2127 PF04325 DUF465 Protein of unknown function (DUF465) Kerrison ND anon COG2841 Family Family members are found in small bacterial proteins, and also in the heavy chains of eukaryotic myosin and kinesin, C terminal of the motor domain (Myosin Pfam:PF00063, Kinesin Pfam:PF00225). Members of this family may form coiled coil structures. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.43 0.72 -4.26 175 1757 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1278 1 425 1015 380 49.40 33 66.23 CHANGED hscLtpcHpp.LDppI..pphppp...s..s.scshplpcLKKcKLpLK.........Dclhplhpp .......htpLhccHsc.LDccI.pphEss...s...s..ss..sh..cl..pc.LKKcKLpLK.........Dclhplhp........................... 0 96 245 335 +2128 PF04328 DUF466 Protein of unknown function (DUF466) Kerrison ND, Eberhardt R anon COG2879 Family Small bacterial protein of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 30.10 25.70 20.80 23.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.34 0.72 -4.14 41 1692 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 1243 0 200 511 13 64.60 52 94.47 CHANGED hhpclpphh+...hhtpsh+hhVGlP-Y-sYVpHM+ppHP-c.PlMohcEFFR-RQcARYuusuu...RC.C ................MFssLupstKYLGQAA+hhlGlPDYDNYV....E.HM+psH..PDp....s......sMoYEEFFRERQ-.ARYGGcGus...RC.C......... 0 32 93 150 +2129 PF04326 AAA_4 DUF467; AAA_div; Divergent AAA domain Kerrison ND anon COG2865 Domain This family is related to the Pfam:PF00004 family, and presumably has the same function (ATP-binding). 25.30 25.30 25.40 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.83 0.71 -3.96 163 2671 2012-10-05 12:31:08 2003-04-07 12:59:11 9 56 1473 5 706 2349 272 120.30 21 27.02 CHANGED Esppl.EaKpshttstp....................lhcslsAFAN..s....t.....G.GhlllGlp..D...........stp...lhGl.......................tpttpttphhpp.ltptlpPt...........lph.php.......hthpsp...............................................................pllhlp......l.puspts.......................http....sphYhRhuspspt.hs ....................................Esppl.EaKpshpp....p.....................lhcslsA..FAN....s.........p.........G..Ghlll.Glc....D.........ssp...........lhGl............................p.p....p.p.th.p.t.h.t....p........h.t.p..php..s...............lt.h....phph...........h..p..h..p.sp...............................................................pl.l.hlp...l.tutpts............................ht.p.............sthahRhssts....t................................................................................................................................................................... 0 253 490 579 +2130 PF04318 DUF468 Protein of unknown function (DUF468) Finn RD anon DOMO_DM06450 Family These conserved ORFs probably are probably not translated into protein [Personal communication, Val Wood]. 25.00 25.00 83.00 82.90 20.10 17.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.11 0.72 -3.42 4 21 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 3 0 12 21 0 79.70 78 74.39 CHANGED hHGTCLSGLYPVPFTHpsHcYPHFsIYISFuGPKYCITALNshlIPLLpHI............LTsphIaTYhNIspKSP.KpPKHKNILlFN.sp .MHGTCLSGLYPVPFTHpuHDYPHFNIYISFGGPKYCITALNTYVIPLLH+I............LTTQFIaTYsNITpKSPlKSPKHKNIL.FNpNT... 0 12 12 12 +2131 PF04320 DUF469 Protein with unknown function (DUF469) Finn RD anon DOMO_DM08606 Family Family of bacteria protein with no known function. 21.90 21.90 23.50 23.00 21.40 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.35 0.72 -3.23 37 852 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 825 0 116 317 11 100.30 61 90.89 CHANGED RRLRKKL+lsEFQELGFplshpaccshst.-phDshlDpFI-.hIEspsLsasG.uG...phtaEGhlssp.chG..psTEEcRtsVctWLcu+s.lpslclo-LhDhWa ..............RRLRKKMHIDEFQELGFSVuW+FsEGTS-.EQIDcTVD-FIsEVIEPNcLAFDG..SG....YLsWEGLIChQ...cIG..KCTEEHpAlV+KWLEtRp.Lc-VcsSELFDlWW...................... 0 12 33 77 +2132 PF01865 PhoU_div DUF47; Protein of unknown function DUF47 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes prokaryotic proteins of unknown function, as well as a protein annotated as the pit accessory protein from Sinorhizobium meliloti Swiss:O30498. However, the function of this protein is also unknown (Pit stands for Phosphate transport). It is probably distantly related to Pfam:PF01895 (personal obs:Yeats C). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.29 0.70 -5.17 9 1815 2012-10-02 11:27:25 2003-04-07 12:59:11 11 7 1654 7 546 1275 457 205.00 24 94.91 CHANGED hhhlFtpss.+plhcHhchlspslhthtchhcuhhcGshcpsEcltc-lsphEccADpl++-lclpltpuhFLPssRsDllcllc..DclhDshEcsAhhlhltc...thPc-hc--hhthhppolcshchltcslctl-p.l-suhp.........llpclcplEccsDtlptclhctlas...psh.sshchhhhhpllcplusluDpuEDsu-clpllhhc ................................................................................h...........t.h.phhpths.p.sh.pth.hp....hh...p.t.h.h...p...t...s......p..c...h...p....ph..t....pplpphEccuDplp+clhp.cLsp......s......F....l...TP.l-...R.....-D....Ih..p.L.ssph......Dclh...Dth....c.c.s...u...t....h.h......h...hp......l.........p....t.h...pp..t....hh..phsp.hlh....c.us.pp.h......p.p.s..l..p.t.L..sp.h.h...p.ss...h.p....................phsh...c..lcplEscsDplhcphh.p.c.LFs..........tp..t..h....-...s..l..pl...l...th.+.cIh.-tlEcls...DpspcVAstlEsllh........................................................................................................ 1 177 348 453 +2136 PF04322 DUF473 Protein of unknown function (DUF473) Waterfield DI, Finn RD anon COG1935 Family Family of uncharacterised Archaeal proteins. 25.00 25.00 103.10 102.80 20.80 18.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.19 0.71 -4.46 17 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 54 0 43 55 1 117.30 33 92.22 CHANGED MchhuLTGIu.psIp-LhpstlRTlEl+SspNlhslpph.psG..DhlFlTssshcDlhsGTpGllApVhphplshp+.h..tps.hhEE+EhhsuRlQLchlGhu+lh.clhppchhpshhV..- .MchluLTGIucpsls-Lh+splRTlEl+SspNlhslppl.psG..DhlFlTssshpDlssGTpGlIuclhplplspp+.h..tps..h-E+EhhsuRlQLchlGhu+lh.cVpppphhpsshV......... 1 8 22 33 +2138 PF04536 TPM DUF477; Repair_PSII; Phosphatase; TLP18.3, Psb32 and MOLO-1 founding proteins of phosphatase Waterfield DI, Finn RD, Bateman A, Eberhardt R anon COG1512 & Pfam-B_18715 (release 10.0) Domain This family has a Rossmann-like fold. It has phosphatase activity [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.21 0.71 -10.28 0.71 -4.24 162 3085 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 1821 5 879 2585 289 124.00 22 42.17 CHANGED hLo..sspppplpptl........................pph.Epposs................plhllhh.sh..s............................................................................................................pshcphAhchapp.ht...ls..pptpss.....GlLlhl......uht..-+..p....hpl..su..........tGlpsh.lscshhpp.llpsh...hsth+pspaspulhsulptlsphl ................................................................................Lssppppplpp.tl.........................p.ph...cpp.s..ss................plhVlh..l.ss.h..ts...........................................................................................pshcpaApclapp.ht......lG......ppp..pss...............GlLlll..........uhs......-R.p................lpIp.s.G..............hGlcsh..l.sD......s...hhsp.I.lpph......hsth.+.....p.s..c...astGlhtulpsltt.......................................................... 1 272 591 767 +2139 PF04334 DUF478 Protein of unknown function (DUF478) Finn RD anon DOMO:DM06402; Family This family contains uncharacterised protein encoded on Trypanosoma kinetoplast minicircles. 25.00 25.00 99.60 96.90 19.10 18.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.32 0.72 -4.12 2 2 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1 0 0 4 0 60.50 64 97.58 CHANGED MGVQh.sYTNPVLFWGlFEVRGTSKGVGVILTRFF...............l.Ih.lhlhhGF.pts.a MGVQh.sYTNPVLFWGlFEVRGTSKGVGVILTRFF...............l.Ih.lhlhhGF.pts.a 0 0 0 0 +2140 PF04336 DUF479 Protein of unknown function, DUF479 Mifsud W anon COG3124 Family This family includes several bacterial proteins of uncharacterised function. 21.40 21.40 21.40 23.10 20.80 19.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.51 0.72 -3.93 55 920 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 913 0 158 494 113 106.50 46 55.17 CHANGED aDHaLApcWppap.sp.P.....LspFsppsYptLpst.....tshLPt+htplhsthhppcWLsuYpchsslppsLpphup.Rhs+ss..Lssuht-lpp.pYppL-psFhsFYPpLhsa ..............................WDHFLuRHWsplos-..P.......LppFlsaAppplhsh.....lPcpPsRFlsL.NsYLWuEpWLs+Yc-h-a.I.ppVLsGMA....s.RRP....R....Lcu....L.ps........SahDL-s.HYssLEscFhpFYPchMs.p.................... 1 33 79 125 +2141 PF01867 Cas_Cas1 DUF48; CRISPR associated protein Cas1 Enright A, Ouzounis C, Bateman A anon Enright A Family Clustered regularly interspaced short palindromic repeats (CRISPRs) are a family of DNA direct repeats found in many prokaryotic genomes. This family of proteins corresponds to Cas1, a CRISPR-associated protein. Cas1 may be involved in linking DNA segments to CRISPR [2]. 20.20 20.20 20.30 20.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.77 0.70 -5.33 140 3143 2009-09-11 00:29:59 2003-04-07 12:59:11 11 9 1958 26 807 2515 107 209.90 20 81.31 CHANGED slalpspGshLphcssslhlpp....................ppc.........................lPlppl..cplhlhGp.lslosthlphhscpsIsltahstp.....G.pahuphhs...........thstsshhph.tQhpthhspptpltlA+phltuKltNttphL.....+htppp...............hpphhpplp.t......h..hpph............p.slsplhGlEGpuuphYapshs.pll................................................p....sapFp....tR......s+RP...P..pD.lNAhLSaGYulLhspshsslhtsGL-PhlGaLH.p...sp.s+.oLuLDlhE.FRPhllDchlhpLl...scphlp.tpcF..t.t...............sshLscpu++phlptapc+ ...................................................h........t.lphtttthhh.p..................................ptt...............hhtlPltpl..t.lhl.st.stlotthhthhsp.sh.lhah.st.t.....s..h..hhth...............ttps.h.hhhp.hph..s.t.t...phtlspphht.ph...............................................................................phtplhsh.Eu.t.hsp.hathh...........................................................tht.ap....t+........p.............ts...h..Nthlshu.s.hlhs.h.ttlhhhGhsshlGhhH.......p.t..uhs.Dlh-.h+s.hss.hs.hthh....t.......................................................h............................................................. 0 307 576 708 +2142 PF04337 DUF480 Protein of unknown function, DUF480 Mifsud W anon COG3132 Family This family consists of several proteins of uncharacterised function. 25.00 25.00 30.30 27.40 24.00 21.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.80 0.71 -4.35 53 923 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 913 1 173 537 52 150.80 62 67.31 CHANGED LoshEuRVlGsLlEKphTTPDpYPLSLNuLssACNQKSsR-PVMsLoEu-VppuLDpLpp+pLlpp..sshGuRVsKYcH+Fsps....LpLsstphAllslLLLRGPQTsGELRoRopRhapFsDlspVEssLppLtp+p....ssLVscLPRpPGc ...LTAhEARVIGCLLEKQVTTPEQYPLSlNullTACNQKTNREPVMNLSEuEVQ-pLDsLl+R+.hlRs..SGFGsRVoKYEpRFCNoEFGDLKLSuAEVALlosLLLRGAQTPGELRoRuuRM.a-...FuDhuEVEusLEpLAsRE.....sGPhVVRLsREPGK...................... 0 32 81 128 +2143 PF04338 DUF481 Protein of unknown function, DUF481 Mifsud W anon COG3137 Family This family includes several proteins of uncharacterised function. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -10.91 0.70 -4.61 121 1242 2012-10-03 17:14:36 2003-04-07 12:59:11 7 4 1038 0 283 927 368 208.30 31 76.69 CHANGED schGht.hsoGNTcosshsuphp.hphcts.pachph.phph.h..............p.............pss.sp....s........oscpahhshphcaph..scphahaupspacpDcFs.................shch.ctshusGhGaphhs.scpt...pLslcsGsuaphp.........chpss.............pspsph....hspsshsa.paplscshphppphph...........h....sssshphps-suLpspl..sss..luhcluhphcasops.ss.ut....cpsDophshsLsYsF .......................................................................................................h.phGah.upoGNTc...os.S.l..suc....ss..hsah.st...pp...tasl..husupp.............s..................sss.sc.........c.............ou-+assuscscapl.......s-hs.......Yl.a..G.pusahs...D+as.....................................................uYc....p.+.ss.l.s.uGhGhQhls..sshp.......shch..E...hGPGhRas........cascs.................................sscsps......luhuussY..ta...p.l...o...-..s.scF..s..pslol..................................h.........u.sp.cT....slsSE.....su....LsssI....scc....huLKluasl.sasopP.Pp..us.........c+oD.ppoolsLsYs......................................................... 0 92 164 226 +2144 PF04339 DUF482 Protein of unknown function, DUF482 Mifsud W anon COG3146 Family This family contains several proteins of uncharacterised function. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.14 0.70 -5.59 99 853 2012-10-02 22:59:21 2003-04-07 12:59:11 7 7 784 0 330 1004 1212 354.00 38 92.18 CHANGED ulspIssspWDul....s...............s...................s.......tsPFlpasFLsALEpSGssss..cTGWtPpHLslp.cs.......s.....pl....................lussPhYlKsHShGEYVFDauWA-AapRsGhcYYPKLlsulPFTPssG.RlLht...st.cpsth.tpsLhpultphspp....pslSShHlhFss..ts.-tth...........hp..ptGh...............hpRhspQFHWpN......pGYpsFDDFLusLsS+KRKsIR+ERcps.tppGlplchLpGs-l..spppW.ctFapFYpsThs++..WG.pPYLo+pFFphlscp.hs-pllLlhA....c............+.sGc..lAuALshh.......u.....u-sLYGRYWGshEc..hshLHFEsCYYQuI-aAItpGLpphEuGAQGEHKluRGahPssTaStHaltcsuhcpAlscaLppE+ttlpthhctls.pth....PF+c ....................................................................................t.lsplstspWsul....ss........................................t.sPFlp+sFLpALEp..SG..ss....ss..poGWtPpHlslhcs.....s........pL....................................................lussP....h...YlKsH.ShG..EYV.FDauWA-A.apRt.G..h..pY..YP...K.Lhs...ulPF...T.Ps..s..Gs.R.lLst.......t...p...t...t.th...tttLh.tulhp...hspp.....ps.l..SSh...H...l.h.....Fss.....t...s.....-..t...th..........h..p.....p.....t..Gh..................................ht.Rhsh...Q...F.HWp.........N........................pG.......Y.....tsFDDFL.s.s.L..su+.KRK.slR+ER+ps...t............t....t.Gl...p.hchl..p.G.scl...sp.t..pW.chFaphYtsTh..tc+....hu...pP..Y...L......s..c.......pFFphlu..p.p...h.....s.....-.p..llLlhA...c........................c..s..G...c...lAuAl.shh.....u........................sssL....Y....GRYW...G......s...h...c..c......h...s...t.....L..H..F...E...s.C..Y.Y...Q......u.........I-aA.ItcGLpp.h.E....u.G.......A....Q....G....E..H.....KlsR..G..ahPsh.T.aShH..als....csuhppAltcaLppEpttl.pthhptl.p.pt..Pa+.................................................................................. 0 81 191 269 +2145 PF04467 DUF483 Protein of unknown function (DUF483) Waterfield DI, Finn RD anon COG1790 Family Family of uncharacterised prokaryotic proteins. 25.00 25.00 39.40 33.20 24.30 24.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.48 0.71 -4.07 4 23 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 23 0 16 23 3 120.50 47 61.63 CHANGED h-hQI.lsc+hpshlRs..tl-shIs...sELGlhR.h.stp.G+LLsYP-CCl+SasEspR.hsh-tcaLtEsuEhs...h.....hGhhhIhhP....SsFIPCSLcCp-AlccshIuhhh+-EFcchh-Lcc.Lh ..........................LchQIEIVcKYpscVRP..AIDPhVS...oELGIYRRLD.DhElG+LLsYP-CCl+SFsEssR..huIDp-HLKElEchchc.........................s..hYAIlLP...............SGFIPCSLcCccAlcptLIuhlsccpac+lLcLEcEL............. 0 4 8 12 +2146 PF04340 DUF484 Protein of unknown function, DUF484 Mifsud W anon COG3159 Family This family consists of several proteins of uncharacterised function. 24.20 24.20 25.30 24.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.30 0.70 -4.88 6 1222 2012-10-02 14:34:25 2003-04-07 12:59:11 7 4 1147 2 271 774 583 214.30 37 92.58 CHANGED ts.pssLsspsVs-YLppHP-FFhcascLltcLslP+psussVSLschQLsRtRp+hccLccclstLhs.AtsN-plFhchhtLphsLhcApSLsDslppl-phs+chhhtshspLlLhsDsth..u.u......lupcshp.sthspLsscpshhGhLphs-thhLas-.-ApplGSsAll.Lu....pt.hGllAFuSpDspHFpsuhGT.FL+alAplLschLc..RWss .....................................ttLss.csVs-YLhcpP-F...Fh+pschlcsl....c...l...P...H....s...s...p...u...sV.SLVEhphtRhRp+lc..hLE-phshLMcpAtsN-sLFh+lhpLptpLh.sA..s.S...Lp-hl........hp.h...pchsR-.lhhs.s...s..sL...+.L...a....s...D......p........hp......h........u...........u.......h...........p.....h...s...l..u....c.........p..s..h...c....s.........l.p....h.....p........+........L......u.....t........p............p........t.Y.....LG......s...L.s....s............s...E...h....h....h...l..........h.........s.........p...A......p....tl................G............SlAhshLs................ssu.s...l..GlllhuScDspHapsu.GT.hLpplAh.hLsclLp..Ra..p............................................................................................................................. 0 52 138 206 +2147 PF04341 DUF485 Protein of unknown function, DUF485 Mifsud W anon COG3162 Family This family includes several putative integral membrane proteins. 28.00 28.00 28.20 28.00 27.80 27.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.91 0.72 -4.27 132 1454 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1255 0 378 871 80 90.20 38 81.72 CHANGED lpssPcFpcLhpcRppFuhsLohhhLlhYauallLlAassshLup.ls.G.slolGlslGluhhlhsallTulYVp+ANppaDtlspplhcc .............lpssscFp-Llc+Rp+FuhhLollhLshYhuFlLL.lAauPshLu.sPlt.G.slThGl.s.lGlGlI.lhoFlLTulYlh...+ANscFDclsppllc................. 0 96 224 312 +2148 PF04342 DUF486 Protein of unknown function, DUF486 Mifsud W anon COG3169 Family This family contains several proteins of uncharacterised function. 25.00 25.00 41.40 41.30 20.00 19.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.55 0.72 -3.99 4 583 2012-10-02 19:55:49 2003-04-07 12:59:11 7 1 574 0 179 409 45 109.10 55 92.52 CHANGED LhosLLLhsSNVFMTFAWYGHLKphss+PhllAsLlSWGIAhFEYLLQVPANRIGassLoVuQLKhhQEVITLhlFVPFSVaYLppPL+LsYLWAuLCllGAVYFhFR ...h.hsllLLlhSNlFMTFAWYGHLK.....h..h.........s......s......p......P.....lh.....hslLlSWGIAhFEYhLQVPANR.I.G.ap....s....a....osu..QLKlhQEVITLsVFssFo.V.a.Yh.p.E.sL+hsaLhAhlhl.luA.VaFlF+........ 0 56 129 152 +2149 PF04343 DUF488 Protein of unknown function, DUF488 Mifsud W anon COG3189 Family This family includes several proteins of uncharacterised function. 22.20 22.20 22.20 22.60 21.50 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.63 0.71 -3.74 219 2338 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 2035 0 577 1579 120 111.60 33 83.11 CHANGED plcchhc...hhpssGh+......lLVD.RhaPR.....G.hsK-slphsl..............Whpc.lusspclRchat.tt....p...hsphhcpYtpclps...........pslpcLtp.hs.ppt....lsLlhut+-.p+.......sHthlLt-h.L .............................................p.hcRlY-...htps-GhR......lLVD.RlWP..R..........G...lpK.pshth..D.....................WhK-.luPSs-L.....Rc.hap..p-s...c.................aspFtcpYttELtp.p...t....................pthccLts.ls...p...pp.......slsLLauu.+.c..ppp.......NHAhVLt-hL................................... 0 172 365 488 +2150 PF04356 DUF489 Protein of unknown function (DUF489) Kerrison ND anon COG2915 Family Protein of unknown function, cotranscribed with purB in Escherichia coli, but with function unrelated to purine biosynthesis [1]. 22.90 22.90 24.60 38.60 22.80 22.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.09 0.71 -4.71 56 1032 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1015 2 152 528 101 191.80 55 92.10 CHANGED -pslALAGlhQAupLVpplA+pGps.-pss.....hpsslpSllppsPs.....sshsVYGspp....sL+hGlcsLhstLs.......tspspss-lsR.....YhlullsLE+KLs+ssshhspLupRIsplpcQhpHF..sh............tc-slluslAulYsDsISsLusRIQVpGsPphLQpststp+IRALLLAGIRuAVLWRQlGGpRhpLlFuR+pllpp .......DITLALAGICQuA+LVQQLA+pGcC.Ds.DA.....LcsSLsSllshNPu......STLuVFGGs-...tNL+lGLETLlusLs..........ssppuhsuELTR.........YsLSLMsLERKLous+sAlssLGsRIstLpRQL-HF....DL............ps-slhSuhAuIYlDVISPLGPRIQVTGoPulLQsPpVQuKVRAsLLAGIRuAVLW+QVGGuRhQLhFSRp+Lhs.p.... 0 30 66 113 +2151 PF04357 DUF490 Family of unknown function (DUF490) Kerrison ND anon COG2911 Family \N 20.40 20.40 20.60 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.40 0.70 -5.36 151 2461 2012-10-03 05:41:17 2003-04-07 12:59:11 8 21 2009 0 647 2270 560 348.80 22 27.68 CHANGED pp..htst....sssGplplsG.....slsh..................tthp......hsLsl.ph.pp..htlhpssthp.......sp.l.sus...lplsG..sh......shplsGp..l.plspuplpl.phs..ssts.s...........................shhhhttt...............................t..h.h.h...thslplph.....ss..........plhl...............cu.u...l..csphs.G.sLpl..........sts.....ss.hthsGplplh.c.Gp.hph.....h.upphpl.p.pGplsas.G.sh....pPhLs.lpAhpps...........................ss...........pls.lplsG...ssspPp........lph.....................p.S...pP.................s.h......................sps-lluhLl.hGp....shsshu.............ttsthssuhuslhts...sshtshh.sp.........ltpt.......hGl...s.slsl.......sstt.........t.......................ttshslplGphl.ssclhlsht..hsh.ss.t...........tphp......lcYpls...pphplcups..s................tt......tlslhaphca ...................................................................................................hht........stpGpl.pl.sG.shsh........................................tthpshp.....hp.l.sh...pu...sp...........hp..l...hs...sshhp..............hp..l..sss...........ls.l.pu......ssp......thslsG.p....l...sl...stup....l.s.l.phs...ssssss..........................-l.shlsps......................................................................................................pt.......ts.hsl...phslp.lp.l....ss..............slpl...........................suhG............l..cupLs..G..s....Lpl..............sts...........ps...hthsGplsl....c..Gp..ach..................h.Gpclhl..c...cG...p..lh.....Fs...G.s.............pPhLs.lpAhpps....................................sl........................hss.l.plsG..........ss..pPp.........hpl................................................................h..S..pP..........................s.h........................spppsLohLl.hGp..........shssss..............p.ssu.hsshh.h.u....s...sphh.sp.....lsps...........hGl...s..slsl.................so.ps......................................sssspls.lut.hl.tspl.l......phs..huhhss.s..............................phs..........lcYpL.....plhlpuhs..s.....................sp....ulsl.hYphca................................................................................................................................................................... 0 213 426 555 +2154 PF04361 DUF494 Protein of unknown function (DUF494) Kerrison ND anon COG2922 Family Members of this family of uncharacterised proteins are often named Smg. 20.60 20.60 20.60 22.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.91 0.71 -4.55 48 950 2009-09-11 06:38:14 2003-04-07 12:59:11 8 1 940 0 158 376 87 154.50 57 99.23 CHANGED MFDVLhYLFEsYhcs...-.-hpsDp-pLsc-LtcAGFcppEItcALsWL-sLushp......................tshhtsssssoh.RIYospEhp+LsscsRGFlhFLEphsVLsspoREhVIDRlMuL-ssplsL--LKWllLMVLFNhPGpEsAathhE-Lla-pps...thlH ..............MFDVLMYLFETYIHs..........EAELpVD..QDcLpp-Lo-AGF-+EDIYNALhW.LEcLA-hQc..................s.scPht..hso...D.PhSh..RIYTsEEs-RLDssCRGFLLFLEQIpVLshETRE.MVI-RVhAL...Ds...sEF...-L-DLKWVILMVLFNl....PGsENAYpQMEELLF-ss-.GhLH............................... 2 38 88 123 +2155 PF04362 Iron_traffic DUF495; Bacterial Fe(2+) trafficking Kerrison ND, Mistry J anon COG2924 Family This is a family of bacterial Fe(2+) trafficking proteins. 20.90 20.90 24.90 24.90 19.20 18.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.95 0.72 -4.55 65 1349 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 1339 3 260 617 383 87.20 59 96.06 CHANGED Mu..RsVhCtKLp+EuEGLDhsPaPG-LGc+Ia-sVSKcAWppWhc+QTMLINEp+LshhDscsRcaLpppM-+FhFsps.sspspGYsPP ...........................MoRTlFCshLp+EAEG.DFQ.YPGE.LGKRIaspISK-AWupWp++QTMLINEK+LNMMssEcRKhLEpEMssFLFEGc.-..s+lEGYsP................ 0 61 135 203 +2156 PF04363 DUF496 Protein of unknown function (DUF496) Kerrison ND anon COG2926 Family \N 21.50 21.50 22.30 23.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.10 0.72 -4.17 13 780 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 775 0 75 202 1 94.80 77 85.78 CHANGED hpsVLEhVRhhRRKNKLpREIsDN-+KIRDNpKRVhLL-NLh-YI+ssMShEEIpsIIcNM+sDYEDRVDDYIIKsAELSKERRElS+KlKphKc ...................FQDVLEFVRLFRRKNKLQREIpDsEKKIRDNQKRVLL.LDNLSDYIKP..GMSVEAIQGIIASMKuDYEDRVDDYIIKNA.ELSKERRDISKKLKAM..GE.. 0 6 23 50 +2157 PF04365 DUF497 Protein of unknown function (DUF497) Kerrison ND anon COG2929 Family \N 23.70 23.70 23.70 23.90 23.60 23.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.80 0.72 -4.06 96 916 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 614 1 261 806 130 76.80 33 83.44 CHANGED WDps.KsptNhpKH.GlsFt.-AttlF..cshtlhh.................Dtccs..sEpRa.hslGh...ss+llhVlaThR....ts......tIRIISAR+As++Ec ....................................WDts.KsppNht..K....H.GlsFp.-Attl.F..ss..tlph................pDtpp....sEtRa.hslG.....hh.........st.p..llhV.saT.R........sp........hlR.lISs...R+Asc+E........................................ 0 68 179 219 +2158 PF04430 DUF498 Protein of unknown function (DUF498/DUF598) Waterfield DI, Finn RD anon COG1504 Domain This is a large family of uncharacterised proteins found in all domains of life. The structure shows a novel fold with three beta sheets. A dimeric form is found in the crystal structure. It was suggested that the cleft in between the two monomers might bing nucleic acid [1]. 20.90 20.90 20.90 22.80 20.70 20.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.42 0.72 -4.51 101 971 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 868 12 443 853 223 109.00 28 70.39 CHANGED lpuY..usGthpl.s....shp..........apuulllhPsuh..hsW..s..............stphppls.......hcchshlhsh...ps-llllGTGtc..hthlssplhptl.pptG..lslEsMsTssAsRTYNlLhuEs..RpVAAALl .................................................................lpuYussthpl.s.....shp..............hps..s..lll.hs...suh..hsW..p.................................................sps.ppls........................psh.shlhth......p..s....-llllGT.Gtp...hth..l.s.sp....lhthl....ppt..G..IulEsM.......sTssAs+TYNlLhuEs..RpVuAALl.......................... 0 132 244 338 +2159 PF00674 DUP DUP family Bateman A anon Pfam-B_99 (release 2.1) Family This family consists of several yeast proteins of unknown functions. Swiss-prot annotates these as belonging to the DUP family. Several members of this family contain an internal duplication of this region. 21.10 21.10 22.30 22.00 20.20 20.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.64 0.72 -4.09 32 469 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 27 0 183 427 0 103.60 29 52.60 CHANGED hlhhts.....hhhss.h.hhllhshhhhhhhhhsF.sht....hhsp-achphLhElIsc+Pu.tuc-WcpIAhpMNpYLF-cclWpT.YaFassp-CppaFpphltthhstKps ..........................................................hh............h.h.hhhhhh.hhhhs...h..h.h...hh..lhsh.php.......hsp.ph...hphlp-llpppPu..sspsW-tIA...tphNpYLa.-p.+hWpo.t.h.aFasutcCpphFpphlhp.hs.p..p................ 1 36 47 123 +2160 PF04465 DUF499 Protein of unknown function (DUF499) Waterfield DI, Finn RD anon COG1483 Family Family of uncharacterised hypothetical prokaryotic proteins. 21.10 21.10 25.60 24.10 19.70 20.90 hmmbuild -o /dev/null HMM SEED 1036 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.71 0.70 -13.89 0.70 -7.00 5 220 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 188 0 107 235 48 429.10 21 44.17 CHANGED PcALcDcEV....L+sYcs-KREplcElu-hIcsl..ulclVlVYGKss..lPpPs+PL-hsst+lKTlWGYIAapLG+YElVEcsD+NLTsPshElL+-LlpGc+VLlLIDEIuDYlDslppSusEE-RsYucNVusFLD+LApALos...SpSlMVITIPMEcEGts..lKspcEYcR..-lVRulh-AVoRVuGtclYoPV+h.GRcsELlEVLKKRIFK+lDcsE+c+sLp+L......REshus.+EIFG-sSpFhcEIccTYPFHPEYIpVLRsIIERls.LQRTRDLIRI...TRIVlRc.LlcuhE....sPsLIMPaHIDLs--cI..+GlLFucpocYuDYtoIa-s-lIs-EKl...Kchu+PELu+IILoYIFL+TYPaDS.....................Ppshs-FPTsccIARuVYEPEhFcpppWlPsDIKDslEEI+uSl+FlaLs...++Dus..aWFWRlANVochVcSKsEELIEos+G-VhspLVKp.lc+Ll+EuKSlcpuRu+tsps......caVsFFcspsVIVoK..-PQElhDos-YKLhVlVpDDV-EDhLc+lIFhhsousRTY+NTVVVlYlusc.uM..........................-oLlEhpAplhAC-cVM-cI+EhY.........uKYGKDVlcIQ+ohVc-I+c+ALED.LENQIlpsFR+VAYPctDG....l-pspAsASSKSVlENVYSALsS+..GKIV.-EFDFEpLtshLc-.Vslcl.+sEGYsVSELhNlIpS..NPcLPMIcpccLh-AIK-AVccL-IG.IERcG+IaFK+IY..KEIPch.--E+GcsPusVR..........cDVILPt.....-EALpRQlpELLc-EK--It-+sGp+Ycl+h..............WYE..IYLPppNhplPL+slV...scEscV+DE..ptlhaGaIVE++EEcpIl+GEF-..LpVucupIcGKPG-sV-lcV+lpPlGDtchsVELsosaG-LsoccVpL.EGpsl+lpW.phplsc.+c.shIcuKS.-s+ttphElllIPKlEp-IlEVcElcEpHKGslLlSI+..Slcs....lDoL-pIsc.aEGs........lsGSLEl.....EcPlWcVpF..cssDhcVFcaIluElc-aLGSpsplc..V-lchSEElhINDLlhEKL+PLsG+VKF+lKKG-.pc .........................................hhh.shs.......................................................................................h...pT.lWG-lAhQL.......G...........t.............ap...h....l...tp.t.Dt.p.t.hs...P.G.tphlpc.Lh.....p....t.....p.P..sLILlDElssYhc......phh.tst..s.h.s..sss.htpph.sF.lp....s.L..o.c.A.sps.....s.p.s.h.lV.l.o.ls.........s...s.................p...t..t..............................tt.....t...h....t....p.........................................thl.ppL....pp....hhs.Rl.t....t..h.p...Ps.........stp.p..hacIlR+R...LF...p..p.......l.......s.t....p....t......t.......p...p....sh....pph...................tphh.p.p............p.............h....s.....p.........p...s.....p....a.........tcc.lpp.......uYPhHPc.lh-hL.h.c..h...ssh....s.FQRTRGlLRl...hutll+p.La.pp.tp...............ts.LIhstp....lsl.ss....pl..p.......h.h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ttttt.................................................................................................................................................................................................................................................... 0 43 78 94 +2161 PF04366 DUF500 Family of unknown function (DUF500) Kerrison ND anon COG2930 Family Proteins in this family often also contain an SH3 domain (Pfam:PF00018), or a FYVE zinc finger (Pfam:PF01363). 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.40 0.71 -4.69 74 1081 2009-01-15 18:05:59 2003-04-07 12:59:11 7 22 629 0 603 1018 204 114.40 29 34.00 CHANGED hsG.uuhGh.QlGs-hoDhVhllpspculcshhp.uphoLGussSlAAGPlGRsupussssth....u....................................................................slaoYS+o+...............GlFAGloL-Guslhpcp-s..NpcaYG..........plsspcILtGps.s..........s....ss..uptL...hpsLpph ..........................h.susuhGh.QhGsp.tchVhlhpspcAl..c..sF...pp...ssho.lGuss..........o..........lA..s.......G.s.lG.t..sup.s.ss..s.sst.....u...............................................................................slh.s.a.s.p.o.+...............GLauGlol-Gohlh.p..tt...s.thYt...........h.....l.h...t...............................................h................................................................................ 0 172 359 505 +2162 PF04417 DUF501 Protein of unknown function (DUF501) Waterfield DI, Finn RD anon COG1507 Family Family of uncharacterised bacterial proteins. 22.60 22.60 23.20 23.10 21.70 22.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.73 0.71 -4.33 43 550 2009-09-11 10:23:03 2003-04-07 12:59:11 7 4 523 0 173 424 260 123.50 47 54.26 CHANGED VutRCssGhPsV.lpotP.LsDG........................pPFPTlaaLTsPplsttlS+LEupGhh+chpccLspDt-LtstatpAHcpYltcRpplh........................h.tlssh.usGGhss......+....VKCLHshlActLAs..ssNPlGchslsh......l ..................................................luhRCs.sGpPsV.ltTtP+LssG......................................................oPFP.ThYYLTsPthsuthS+LE..us.G.lM+cMscRL.....up..D....t..E....LAstYcpAHEsYlAcRstlt......................................................................shh.s.sh.SuGGM..Ps...............R..........VKCLHsLlAHu.......LAt...GPGlNPhGDEsls..l......................................... 0 68 127 162 +2163 PF04367 DUF502 Protein of unknown function (DUF502) Kerrison ND anon COG2928 Family Predicted to be an integral membrane protein. 20.80 20.80 21.20 21.00 20.20 20.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.52 0.72 -4.28 140 1039 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 869 0 414 929 1403 106.80 31 47.23 CHANGED lllsllh...l.hllGhlspshlG+tllphhEp.llpRlPll+slYsul....Kplh-slhssppps.Fpp....sVLlcaP..pp.GhasluFlTspstsclttth............chluValPToPNPToG ..................llslhh.l.hllGhhs.p...s.h..l..G..ppll.phh-t.llpR.l.PlV+sl.Ysul....K.Q.........lscslhsspsss...F+c.................sVLlc.a.P......cp.GhasluFlTupss.sphttthtp..........-hluValPToPNPToG........... 0 128 287 356 +2164 PF04456 DUF503 Protein of unknown function (DUF503) Waterfield DI, Finn RD anon COG1550 Family Family of hypothetical bacterial proteins. 21.20 21.20 23.70 26.50 20.80 20.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.49 0.72 -4.39 79 654 2009-09-13 11:37:40 2003-04-07 12:59:11 7 2 651 1 239 472 24 88.80 34 91.95 CHANGED hlus.lplclhl.ssspSLKEKRtll+slls+l+p+FNVSlAElsppDhapcutlGlusVusspspscphlppl.ch...l.-pps.-h-lhshchE ......hluhlphclhl.s.-s+SLKEKRull+slls+l+p+F.N.VSlAElspp.DlapRstIGlusVSuspspscchlsplhca...l...-uts.EhElhpsch................ 0 109 193 227 +2165 PF04457 DUF504 Protein of unknown function (DUF504) Waterfield DI, Finn RD anon COG1531 Family Family of uncharacterised proteins. 22.20 22.20 22.20 22.50 21.90 21.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.09 0.72 -3.81 36 171 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 169 0 131 171 3 53.70 33 12.31 CHANGED hlsYhcR..s.s......sh.pplshspltphpsthlhh........-shIPhHRIlcI..hpcGcllWcR ................lsY.DR...ht..........sh.cclshsphcptpsshh...............-phIPhHRIlhlh..+p.....cG..cllWcR.... 0 40 72 106 +2166 PF04458 DUF505 Protein of unknown function (DUF505) Waterfield DI, Finn RD anon COG1542 Family Family of uncharacterised prokaryotic proteins. 28.50 28.50 28.90 101.90 25.40 28.40 hmmbuild -o /dev/null HMM SEED 591 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.88 0.70 -6.39 5 33 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 29 0 22 36 0 543.50 33 98.34 CHANGED MlLpKRHAElL+clKEscppsEhctKlsE-Fcpcsl.....hELEltGLs+LcGsscacLT-AG+phsphLtEhlDKhsLEc.P-pW.tp.RWlGSEIIpMl+hscLs.G+VPEcWtctLcERGLA-E.........ssLTcaGKslLElY+EoHP+LYlTsElAuaIR+MPcGPGshcphlpYKsTlshGDNIlpALEAMRLLlISPPs-sG+sYALTuLGptVcpALchsPsss+sLILcclhEDFhsh.....pcLc-uEcsscLEcuGasscGsLTchGE+lh-sYKhhGKs-ahslPsasLE.DEl+lLKsI-+LhKKaEcNP-llPThKEIcR.h.....................................................EahRsDlD.lsssL+sLES+-LIc+cls+cG....KssYhLTEaGE+VL-s.....lusVoA-GVKAIThocu.F-uPssEWlccA+-EGLV+sGAlT-KGRhYu+LS+cIKRKPaLT+aEhplLtKlP+KuhIcR-cLlEhlpDcVGuEE+cIlcALucLEAKGFV-EL.NssVpLTEAG-clKTAIcsupp.Ellcl+aPVTPssaRlLKAIYDptcpas+tcKl+tpPKsaKt.....lcKcLuLosEphKKsLsLLRplGFlGppuLTEAGlsLLcAa- .MllpKcHh..hLpclhppcp.tchptthhEchpp.csl......ELplhGLlch...tss.tphpLTauG+thh.hLt-.....................hhcch.................lp....p.P-ta.ttaRaluSElIuMlchutcs.uc.l..st..h.hc.tLcERGhAc-..........ssLophucslh-lY+.ps+P.clhlss-lApalRcMP.GPup.......pshh.shssp.hthLEAMRLlsaSlP..pu-sauhTsLGptlppAlphh.shstc.sll.c.chh...tDh.t.......ptlp..ch..cpph.hphhhh.stpGplp.hGcthh-sY+lhpcts.hsh.saslp.pEhclLcsIcclh.c.+.h-psP-.hPT.cp.Ic+.h.....................................................pahcpsh-.lptsLasLEuFsLlcpc.pcp......+.sYhlTpaGccVl-t.t..thpslousuVKulThscp.hssPshEWhppAccEsllusutsTppG+hYtcLuhphcRhPhlT+hEhplLppl.Pp+t.h.ts.hhc...shsht-Eccl.....ALscLEA+GhlchL.sshlhhTEhGchhKcAluus.p....shthPlsPhhhRlLpAltchGshas+tpKlRh.PcshKc.....hh.Kh.uLs.-phccsl.lhR.stalGcpulsEAGlhLLcAh.............. 0 11 13 18 +2167 PF04368 DUF507 Protein of unknown function (DUF507) Kerrison ND anon COG2952 Family Bacterial protein of unknown function. 23.00 23.00 25.60 25.40 22.60 21.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.17 0.71 -4.65 20 269 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 241 0 73 139 10 164.00 43 99.34 CHANGED MRL+ht+lPYIApKIshDLhssualcl.spsl-sltpphpcILcEslpcEctL-E+s+ElLEcpp-E..h-hhplDc+phFahlK++LAs-hshhLs...........hEDRasploHpIL-tLhcpchIc.asVs-NplKNlIasuIcsalKta-cIEctVhcKIcsYK++llsGo-Ea-llacKLYEEEL+KRGhl ..........MRlKLsHIsaIupKlhhDlhpSshlEl.KsphEpLp.pIhplLEc-lhpEctLDEcs+ELLEpppDE..hE..h..MphDc+phFWhlK++LA.-hslhLs............EDRtNcLuHpIL-cLl-pDhIp.Fh.......VSENpl+..NlIapSI-sYlKh.YEclEsEVaEKIpsYKpK.lsGS-EY-LlFEKLYpEELRKKGh..... 0 28 58 71 +2168 PF04370 DUF508 Domain of unknown function (DUF508) Finn RD anon DOMO_DM06371 Family This is a family of uncharacterised proteins from C. elegans. 25.00 25.00 33.70 33.70 19.40 19.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.95 0.71 -4.60 2 11 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 5 0 11 9 0 141.30 44 57.13 CHANGED pPSthoTcsspps.spssTSuoShssuSpppSlPo....Ta.ssapphst+hVTMVpVKFlLlHpDshpRRsQShFTDEF.SDCRLE-VllNFpQLCsRQLhDtphpPRLSYCIGElshpsSKPV.usDhuKTLsQLAsopslhQhulIVD....Nhc. .....................................psssSss..S..ouS.s...Ssh..hSlPph....Ta..u.s.ph..p.p..ou..KhVhMVaVKFlLLHhDlh+RpsQSsFsDEFsSDChLpDVlhNFpQLCsRQL+s..p..chpPRluYCIGEls.hNSKPVhppDLtKoLAQLAsopolhQFuLIsD........st.. 0 1 4 11 +2170 PF01871 AMMECR1 DUF51; AMMECR1 Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family This family consists of several AMMECR1 as well as several uncharacterised proteins. The contiguous gene deletion syndrome AMME is characterised by Alport syndrome, midface hypoplasia, mental retardation and elliptocytosis and is caused by a deletion in Xq22.3, comprising several genes including COL4A5, FACL4 and AMMECR1 [1]. This family contains sequences from several eukaryotic species as well as archaebacteria and it has been suggested that the AMMECR1 protein may have a basic cellular function, potentially in either the transcription, replication, repair or translation machinery [2]. 25.00 25.00 28.30 28.10 24.60 24.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.00 0.71 -5.13 76 937 2009-09-11 09:06:12 2003-04-07 12:59:11 12 8 800 7 555 903 86 170.30 30 62.01 CHANGED llplARpslpptLpstp...............hssthpp.ptusFVTl.cp..........ps...pLRGCIGthps..........tsLhcsltcsAlsAAhcDPRFs..Plptc.ELsp..lplEVolLos.c.ls.......h..cplc..lG+aGlllct......G.hhpGlhLPQVssEpsW-tppFLsphsh.KAGls...sssW.ppssclhpFpupl..ap....E .......................................h.hthshpsl.thl.pttp................t.h.s.t.th......pp...ptulFVTl.cp...................ctpLRGCIGohps..........tsLt.psltchAls.u...........Ah.cDs......RFs..Plptc.E.Lsp..LphsVol.Losh.E.hs..............................................sh.tchc.....lGpaGlhlch...............................s.tpsus.hLPpV.hEp.sas.p.p.p.h.l.s.plhp.KAGht.......s....p.pshclh+apsphh............................................. 0 209 357 469 +2173 PF04459 DUF512 Protein of unknown function (DUF512) Waterfield DI, Finn RD anon COG1625 Family Family of uncharacterised prokaryotic proteins. 24.60 24.60 24.70 25.60 24.30 24.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.17 0.71 -5.25 60 441 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 416 0 174 431 193 194.80 30 44.54 CHANGED VPVGlT+aRt..sh.tLpPlstcpAtcsIsplcphQcchppchGstFsahuDEaYllAGpslPsht.YpshPQl-NGVGhl+.Fhcchtcthpp.ls.......pppcholVTGphstphlpplhccL.sthpGlplplhsltspaaGpsloVsGLLTGpDllptLpt.....p-lG..-tlllPslML+p.......spthFLDDholc-ltpp...Lssslhllcussc .....................................VPlGlT+aRp......tlhtLpshspcp.A..tcslpplcphQ..........cchhp.ch..GppahahuDEaY.......llA.....s.....p.lPs.phY-s.asQlENGVGhlRhhhpp.hppshpp..l.......................httppphollTGphuhthl.pphhcpl...p.....th...sl...p.lplhslpNcaFG.pslTVuGLlTGpDllppLcs..............................pplG....-tlllPssML+p.................scp..lFLDDhol.c-lppp...L.t.hplhlsp....t...................... 0 71 129 159 +2174 PF04375 HemX DUF513; HemX Kerrison ND, Moxon SJ anon COG2959 Family This family consists of several bacterial HemX proteins. The hemX gene is not essential for haem synthesis in B. subtilis. HemX is a polytopic membrane protein which by an unknown mechanism down-regulates the level of HemA [1]. 29.30 29.30 29.50 29.30 29.20 29.20 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.09 0.70 -5.87 5 1205 2012-10-02 17:03:51 2003-04-07 12:59:11 9 3 1165 0 207 918 162 356.10 38 87.20 CHANGED uusPp-sAcpssssSoPsAscsss+susussLAulALLlA.LGLGAGGaha............uLQQlschps+s-ALApclpQlssAp-s........spcpplpclp+ssp-thcQLc+pLushs+plsplQppVuslpuu......sssDWLLAEADaLL+LAuRKLhL-pDVsTAsuLLcoADspLAshNDPulpAlR+AlAsDIssL+AlPpVDpsGlhlRLssLpNQIDsLsLlssshc............tuspsusupAlo-ulu..............-WppNLp+Sl+uFhssFIsIRpRDsoshPLLAPsQchYLRENI+L+LpsAthAVhscQ-ElYRQAL-sAuoWlcuYFDT-sssspuFhtplDpLAcQslpV-lPspLQApssLpcllscRlpslph...ulutsAscu ........................................................................ppstpt.s..psss.p.s....s..s.s......p.t...t.......p....p....p.....s....s...u.....h.....h....L....u.....s...l.Al.s....l..A....LA.h.G...s....G..h.ah.a..............................................s.p.Q.Q.s..s.p.......s....s.......p...s.sA...L...ss...QL.s..sh.p..pupp................................sppscL..c.s.h...h...p...p....p...s..s...p...l.c...p....s..pp....p............p....s...h...s...cplscl...Q...pp.l.uslsup..................sscsWLL..ApADaLl+LAuRKLa.-pDVsTAsuLLcoADupLAch..NDPuLlslR+AIssDIusLpul..s..plDhsGllL+LspLpsQlDs..Lt...L..u.s.s.s..s.s.........................................................................tu.s...h..s..s.c....u.pp..l.S.uu..lu..........................-W...p.........N...L...p+...Sh...p...s.......F...hcsF...IsIRRR...Ds.s.s...h.PLLA.P....sQchYLRENIR.RLLsAtpAV.++QpEsY+puL-ssusWl+uYaDsc.cs.sT+uFLccl-..pL.upQsI..s.h...-.l..P.-...pL.p.S.p.shLcclhppRhpph...............stst....................................................... 0 34 94 156 +2176 PF04415 DUF515 Protein of unknown function (DUF515) Waterfield DI, Finn RD anon COG1627 Family Family of hypothetical Archaeal proteins. 25.30 25.30 25.30 49.70 23.90 25.20 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.27 0.70 -6.23 6 64 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 39 0 46 66 5 243.40 20 93.40 CHANGED ccpsP..................................KspPhKpPpRRss...................Rluo.+p+......................EcRpRlIIGA.VlslIIlhhshulYhahpspthpE...LENsKssKIuElNphFtu.chuNDsp.......KhthlsKIpuApslEElppIsV.t.hpp...........h-ptchhhplpphKspsIppIKstaG.lhphpl.ssELcpKI.....suplpsLhppVssscshcEllphp.-phLhsh.KhhY.............lchstcchhhoKD-AKcllsphs.ls-LhpapVpsV-hVplslVlsRpphsGulhpsGDpVplYsKss..ushh.tllspuYV.L........upIusSESpSpSoossspSSoooppS.pop.Ssu.tsph.sopp.p.p..soQotspp.SSShSYol.NLspIL+AhAAGKlst.scl+spLpsYGh+L.-lEcsTpl.thsEsTphLlIlcVPs-hVPcllphcsulllshsss ...................................................................................................................hh...lh.hhhhh...h.ha.........p.......hp..+..thttlptha....h.sp..............h..h..pl..u.s.pph.tl..............................thtth..p...h.................................................................................................................................................................................................................................................................................................................................................................s......... 0 8 18 34 +2177 PF04414 tRNA_deacylase DUF516; D-aminoacyl-tRNA deacylase Waterfield DI, Finn RD, Mistry J, Wood V anon COG1650 Family Several aminoacyl-tRNA synthetases have the ability to transfer the D-isomer of their amino acid onto their cognate tRNA. D-aminoacyl-tRNA deacylases hydrolyse the ester bond between the polynucleotide and the D-amino acid, thereby preventing the accumulation of such mis-acylated and metabolically inactive tRNA molecules. 21.30 21.30 22.80 22.70 16.80 16.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.46 0.70 -5.21 34 183 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 168 4 114 186 85 216.70 29 69.66 CHANGED lptDhh-......hhtcs-hllFlSRHpussshPsLTVHssGNhs.....s.sthGGpPtpluhusPthhssllcslpchs.........s....a-lohEATHHGPothshPshFVElGSoEp-WpcpcAscslAcullcslsp.tp..................pshlGhGGsHYAP+hoclslcschslGHllsKa.hs...........p.ls.........ppllppslp+s..ssch.shlDhKulputp+pplhphhcc.hulcllpp ............................h......t......thtss-hllFhS+Hsup.s.shPsLTsHssGNhs.........p.s.hGGcstpluhusPphhpshLptlpchss........p....s.................a-lshEATH.....H.G.......Po..........t..........hs.sP..........shF..lEIGSoEcpWp..spcAucslAc....ulhcshsttt.ttt................psllGhGGsHYu.s+aoclsh....c....s.p..hs..lG..Hllscatls....................t.ls....................tclltpuhpps...........ssph..shlDhcuhputp+ptlhphlcchslplh.h..................................................... 0 27 64 89 +2180 PF04378 RsmJ DUF519; Ribosomal RNA small subunit methyltransferase D, RsmJ Kerrison ND anon COG2961 Domain RsmJ is the tenth to be found of the ten methyltransferases required for full methylation of 16S ribosomal RNA (rRNA). It specifically methylates m(2)G1516. A strain of E.coli lacking RsmJ due to deletion of the rsmJ(yhiQ) gene is missing the methyl group at G1516 and exhibits a cold-sensitive phenotype. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.62 0.70 -5.38 8 1570 2012-10-10 17:06:42 2003-04-07 12:59:11 8 2 1530 1 321 1377 328 241.30 46 87.23 CHANGED -KPFhhLDTHuGsGRYDLuu-cAc+TGEacpGIuRLhpp...sslPt.LpsYLcsI+phNhuss........l+aYPGSPhlA+pLLRsQDRhlhsELHPsDhslL+spFssDtpVpVcpsDGattl+ApLPPpE.....+RGLlLIDPPaEhpc.EYpclVpAlccuhpRassulhAIWYPIKsRpplccFh+uLcsosl+KlLpIELtV+PsospcsMsuSGMllINPPWsLEsphpslLPaLpppLu.s.sGphpl-Wl.s .....................................................................-KPFhYlDTHAGAGtYpLs....u....-cA....p+TGEahpGIu..Rl...W.pp...........sc.L.P......s...t...LpsYl.ss.l+.p.h.....N.p..usp.................LR.aYPGSP.hl.A.c.p.L.....L......R.c......Q..D.......+.......lpLs...ELHPo.......Dasl.L.......+...s...p...F...............p.......c...............D............p......+...s........p...Vp...+...t..DGapt...L...+...A...hLP...P.hp........RRGLlLIDPPYEhK.s...D.Y.pt.VVpultcuaKRF.uTGs.YAlWYPl.....l......R................p.....p...........l....c....+.....h........h.ccL.......c........s.......s.....G.I.......+.....+...lLplEL......sV.....p.......P.........c....o.........-...............p......p.........GM.s...............uSGMlVlNPPWpL.......-pph.pslLPaLpptL..s.s....sG..phplpWl................................................................................. 1 70 169 245 +2181 PF04461 DUF520 Protein of unknown function (DUF520) Waterfield DI, Finn RD anon COG1666 Family Family of uncharacterised proteins. 22.00 22.00 23.10 22.20 20.70 19.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.09 0.71 -4.37 108 2052 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 2015 2 466 1129 1083 158.50 49 98.52 CHANGED MPSFDIVS-lDhpElcNAV-pup+ElssRaDFKGoc.usl-lpcc..plpltu-s-apLcplhDlLpsKLsKRsls.h+sl-hs.pspptoGpps+Qplpl+pGI-p-hAKKIsKhIKDsKlK.VQAuIQGDplRVTGKKRDDLQtVhsLl+.pp..-ls.....hPLQFsNaR .......MPSFDIVSElDhpElcNAV-pAs+ElsoRFDF+Gsc.A.slELs-c.....slplhu-uDFplpQlhDILpuKLlKRsls.spuL-hs.c...p...htSGcshp.pscLKpGI-s-sAKKIsKhIKDoKlK.VQApIQGDplRVTGKpRDDLQuVhAhlR...ts.....DLs........PlQFpNFR........... 0 133 293 392 +2182 PF04412 DUF521 Protein of unknown function (DUF521) Waterfield DI, Finn RD anon COG1679 Family Family of hypothetical proteins. 27.80 27.80 27.80 35.20 26.90 27.70 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.03 0.70 -5.92 42 289 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 256 0 143 298 337 382.20 33 88.27 CHANGED hhLocp-cshLsG-tG.puhppAMcIllthuchhGAc+LlslspAHls....Gs..........Yp..GsuuL....cFlcchhp.huu+VpV........PTTlNshuh..Dh.p.hcphs...lstphscttt...clhcAahch.......Gs..csohTCsPYh..lts.hPph.....G-plAWuESsAVlaANSVLGARTN+cus..sLsuAlsG+sPhhGlHL-ENRpuslllcVcs..t.....csu.ashLGalsGchsss..plPllpGLps........sspDcLKALGAAhAooGusuhaHltGlTPE..A.shtssh.t...............-plslsts-ltpshppl....sssspplDlVslGsPHhShpEhcp....luphLcu+c...sshslhlssuRtlhppspc.GhlptlEphGspllsDT..ChllsPl.....hstttps.lhTNSGKhApYsPuhsGhslthGolpcClcu .........h.Locc-ct..hLsGchG.cuhphAMcIllthuchhGAp+Lls....lsp.AHls.........Gs...............Yp...GpuuL....cFhcc.l.hp...hGu+Vp....V.................PTTlNs.huh..Dh.p.hctht.....lspp....htc....ttp.......clhcAahch..........Gs..psoaTCsPYh..ht.s.hPph.....G-plAWuESsAVsaANSVlGARTN+pus.hsLsuAlsG+sPhhGhHl-ENRpuplllclps..t...........sss.ashLGahlGchsss..plPhlpGltt........ssp-pLKshGAAhAooGusshaHl.GlTPEA....shtssht........h...........cplplstp-lppshcpl.....tssspplDhlslGsPHhSlpEh....pp.....luphlc......u+p.........psslhlssuptlhttsc.p..GhhptlcptGspll..sDT..C..hls.sPl............thps..lhTNSuKhApYhPuhh.GhpshhushtpClc..................... 0 42 84 111 +2184 PF04463 DUF523 Protein of unknown function (DUF523) Waterfield DI, Finn RD anon COG1683 Family Family of uncharacterised bacterial proteins. 22.70 22.70 22.70 23.20 22.40 22.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.99 0.71 -4.42 164 1833 2009-09-12 22:11:40 2003-04-07 12:59:11 7 10 1453 0 463 1420 90 141.80 33 63.83 CHANGED lhlSuCLlGppVRYc..Guppt..sph.....lpphlpcth.hls..lCPEltsGLssPR.shclh..s............ssph.+lhs.ps.u.pDlTcphhphupptlpthp....slsshIlppcSPSCGhppl..Y........psshs...stthtG.pGl.huthlhcpt.hl.sE--h ..........lhlSuCLhGt..s...sRYD......Gspph.........ssh...........lpphl.p.p...t..h.......hls...lCPEl.t.s.GLssPR.sshcllt.....................t.sps.+l.hs..ps..u..pDVT......cphhphupptLp..phpt.......plss.....h.I.lp..s+SPSCGhppl...Y.............cu.phs......s.t.ptsGsGlhsthl.hcph.hl.sE--............................................................. 0 157 291 373 +2185 PF04411 DUF524 Protein of unknown function (DUF524) Waterfield DI, Finn RD anon COG1700 Domain This domain has been identified as a member of the PD-(D/E)XK nuclease superfamily through transitive meta profile searches [1]. The domain has two additional beta-strands inserted to the core fold after the first core alpha-helix.\ It has been speculated that it could function as s methylation-dependent restriction [1]. 22.10 22.10 22.40 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.17 0.71 -4.39 28 222 2012-10-11 20:44:43 2003-04-07 12:59:11 7 5 207 0 64 224 13 157.30 25 23.37 CHANGED pLYEhWCalplhplLpph.....Ghphtppslhpspspsh.thslppsps....hhh..p..sshplpLtYpspht.t...........t..shsp+PDhsLplshtstt......hhhlhDAKYRl-sspct............................p-slspMHpYRD...Al..............ppsshuAalLaPGtps ..................................................pLYEhWsalplhplLpph.....................thph.hpp..s.l.h.p...hp.p.psh....hsslp.cups......hhhh.....p...psstplpLhaptph....t...............................t..sssp+PDhhLplpppspph................hphlaDAKY.Rlphs.tpt...............s.....................pcslppMH....p..YRD....Alh......................tt..hp+sshuualLaPhp.c............................................... 0 28 46 57 +2186 PF04379 DUF525 Protein of unknown function (DUF525) Kerrison ND anon COG2967 Family Members of this family include the bacterial protein ApaG and the C termini of some F-box proteins (Pfam:PF00646). F-box proteins contain a carboxyl-terminal domain that interacts with protein substrates [1], so this family may be involved in protein-protein interaction. The function of ApaG proteins is unknown, but mutations in the Salmonella typhimurium ApaG homologue corD gives a phenotype of low-level cobalt resistance and decreased magnesium efflux by effects on the CorA magnesium transport system [2]. 19.30 19.30 19.60 23.30 18.80 18.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -9.91 0.72 -4.02 147 1678 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 1515 9 519 1070 541 89.30 48 52.06 CHANGED l........s-pSsPpps+..asFuYslsIp........Npupp.s.....sQLl........oR+WhIT.DusG.chpEVcGpGVVGcQPhlpPG-.sapYoSGssLsTPsGsMpGpYpMhs...cs ..........................................................l..tQSsP-.p.p+..YsFAYslTIc..............NhGpt...s...........VQLl........uRaWlIT.....sus.........G....+.p..EV......p...G.cGV.VGpQPlls.........PG-..p....apYTSGss....lp........T.......P.......h.G.s.MpGpYpMlsc.s.................... 0 150 289 406 +2187 PF04380 BMFP DUF526; Membrane fusogenic activity Kerrison ND anon COG2960 Family BMFP consists of two structural domains, a coiled-coil C-terminal domain via which the protein self-associates as a trimer, and an N-terminal domain disordered at neutral pH but adopting an amphipathic alpha-helical structure in the presence of phospholipid vesicles, high ionic strength, acidic pH or SDS. BMFP interacts with phospholipid vesicles though the predicted amphipathic alpha-helix induced in the N-terminal half of the protein and promotes aggregation and fusion of vesicles in vitro. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.77 0.72 -3.84 131 1484 2009-10-26 17:57:44 2003-04-07 12:59:11 8 3 1473 0 318 838 803 78.10 44 82.37 CHANGED M.ss.chl-.......-lucphspshsss...puhtc-...lEpph+shlppths+lDLVoREEFDsQpplLt+sRp..........+lcsLEsRlutLEsp ....................Mhss.+hl-.......plA+plp-uhstu...hpphucD...lEcplR.p.hLQutLs+..LDLVoREEFDVQspVLlRTRp.......KLst.LEpRlucLEst.............. 0 79 171 241 +2188 PF04384 Fe-S_assembly DUF528; Iron-sulphur cluster assembly Kerrison ND, Eberhardt R anon COG2975 Family This family of proteins is likely to be involved in the assembly of iron-sulphur clusters. It may function as an adaptor protein. In Escherichia coli Swiss:P0C0L9 forms part of the isc operon, which encodes genes involved in iron-sulphur cluster assembly. Its structure is entirely alpha helical, and it contains a modified wing-helix structure, usually found in DNA-binding proteins. It binds to Fe2+ and Fe3+ ions and to the cysteine desulfurase IscS, the same surface of the protein is involved in both binding to iron and to IscS [1,2]. 25.00 25.00 26.00 25.40 20.60 18.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.90 0.72 -3.61 61 1135 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1120 2 184 434 221 63.60 63 95.87 CHANGED hKWsDsp-IAhtLh-paP-.lDPts...lRFTDL+pWVhsL...ssF-DDPp+ssEKILEAIQhsWl-Eh- ................lKWTDoR-IuEsLYDtaPD..lDPKT...VRFTDLHpWIs-L...-DFDDDPptS.NEKILEAIl.hsWlDEuE.................. 0 40 86 134 +2189 PF04385 FAINT DUF529; Domain of unknown function, DUF529 Mifsud W, Yeats C anon Yeats C Family This family represents a repeated region found in several Theileria parva proteins. The repeat is normally about 70 residues long and contains a conserved aromatic residue in the middle. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.31 0.72 -4.13 64 1072 2009-09-11 15:51:37 2003-04-07 12:59:11 10 26 6 0 961 1070 0 76.20 15 28.74 CHANGED hlcl.....tshppsphhhhp............hhhhhhthp.pshphpplhh...........psphlWptpsspphhhhhhhhhptt......hhlhlhhsptt ....................................................pp...ht.............ttthhpaph+.suhphpKlhh.......................ssp.hlWcp.sc...s.sp.hs.hh.h.h.ht..........hhhh.......h............................. 1 2 558 558 +2190 PF04409 DUF530 Protein of unknown function (DUF530) Waterfield DI, Finn RD anon COG1771 Family Family of hypothetical archaeal proteins. 20.10 20.10 20.30 104.10 19.00 19.80 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.58 0.70 -6.40 7 60 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 39 0 47 64 5 306.90 25 99.99 CHANGED .ooptLIspsN+hLDsIs.c..hschhcDhDshh............lhhTLcsNLc.Lp-LcEchEhR.G.tuPh.sltch+utts................HhpalhRKtp.+phhLERl+SAIsuHKIALuhLpt.h.hph.pst.p.p..p.p......................chch..p..slGRlEIlPaLshuG-hhclhuphs..shcuaKcIhshh+pEGhsthcShplplchhEp..G+..hK+h+l-.s.DsDhEt.LRK+aGpphRhcllphh+o+ssLINs+YshcsLALAYhshs......................................................................ccGtcL..........................luhDlF+YYhhTS.c-Rpp.ss.FPul+sslDspa....ol.sls.+chhDhh.s.hp...........hl.hKh-lEcpLsKh+hcls.h...hlGGlhLYhhusas.pcss-lhtl-lEElhc..hhhhlpslhpllhsc..-hpKhEKa.hP+o-KAKpFLpLLpG ............................................................ppp.Lltp.pchLptlt...........t.hp.p.................................l...Lppplp.Lpphcpphph..t...P...h.phttt..................a.phhhhhhp.+h..L-Rl+.AlsuH+hAlu.ltt..................................................t.hphpl...LshsGshh.hh.p.s..hhpta+.hhthhp.pt..................hs.hpthph.l.hhpt..s+..h++.pl-.....shEt.l+ppaGpth.hp.hph.hphs.llsspYshp.LuluY..h............................................................................................................h..DlhhaYhhpo.pcRtt..s.aP.lh.s.p.t.......h.th...t..th.....................Kh.hEp.h.thp..hp......hGuh.l......hp..p..th.th..ppl.c....................htphpph.h.ppt+s+pFLthl..t...... 1 6 11 31 +2191 PF04407 DUF531 Protein of unknown function (DUF531) Waterfield DI, Finn RD anon COG1772 Family Family of hypothetical archaeal proteins. 25.00 25.00 80.50 79.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.87 0.71 -4.38 9 38 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 38 0 33 41 9 170.90 53 89.30 CHANGED MLTlGLYNTYDpp+lHEAHhRAIARAAPlsYAFsF+LALhsFPac...h.sscElsEhVscpTTIG-uG+YLhtLt-pN+halhDhPp+GFPsQFGplVuTTSKP-ccKslsPh-lAchhL+s+ShhllIGLGR+GLPKElachucYHLDIT.s+tlSLETCTAIGuIsAhIhslhcsh ..MlTLuLYNTYD+pKlHEAHhRAIARAAPICYAFsFpLALhsFPhc.........t.p..-ls-hls.pcTTIG-uGcYLtcLh-pN+FallDhP.hGFPsQF.GpsVATTSKP-ccKtIoPh-lAchhL+t+ShhlLlGLGRHGLPK-lhKhu+YHLDIT.GKslSLETCTAIGuIPApItTlhc.h.... 0 6 12 24 +2192 PF04391 DUF533 Protein of unknown function (DUF533) Kerrison ND anon COG2979 Family Some family members may be secreted or integral membrane proteins. 29.60 29.60 29.80 30.40 29.20 28.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.09 0.71 -4.92 61 974 2012-10-03 21:00:09 2003-04-07 12:59:11 7 5 942 0 139 548 37 188.60 44 85.52 CHANGED uuhlss........husG............uLs.....GGlhGhLlGs+pup....phuusuhthGGh.AulGsLAYpAYpsa..psp...ps...........sssssspshstsss...................tt-pputhlL+AMIuAAKADG+IDppE+ptI.tplschshDsctppalpppLppPlDsstlApts.ps.phAsElYhAShlsl-.-shtE+uYLspLAptLsLssslspplEppls ...................................................................................................ts.sth......LssG.......AL..GGLAGlLlusKouR..........KhGssA..LhlG.Gu.As...h.Gslhap........tYpch.......p...s.................................stQspPQhsspsss.....................................................h-p+oppLlhAhlh............AAKSDGHIDscERA...tI-...ppLcEu..Gl...-...cpucshlEptlc....pPLDPp.cL.Apul.psEEpAh.......ElYhhS.phAID.....D.........pFM...ERuY....LstLucAL+lspslp-tIEpsl.p...................... 0 31 61 102 +2193 PF04392 ABC_sub_bind DUF534; ABC transporter substrate binding protein Kerrison ND, Mistry J anon COG2984 Family This family contains many hypothetical proteins and some ABC transporter substrate binding proteins. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.49 0.70 -5.46 9 2266 2012-10-02 13:57:41 2003-04-07 12:59:11 7 34 1502 3 510 1839 143 277.50 32 82.31 CHANGED cIulhthlpHsuLstthpGhp-uLp-hGhpp.p..hhphhNApss.spstphu+pLhscpsclllsIuTPsAQtlhuthcs..lPlVhuAVosPlutpLs.shcpsttNlTGVs..Dt.slppplphh+tlhPss+slslhYsPuEssshshhccl+thAcptGlpVlphslssos-lspshpshsp+ssulahshsshlssuhsu..llpsupptKIPlhsussuhVccGuh.AAhulshpphGhpsuphlhclLcGpcst-lssphssp....phhlNcpshppLGIplscshhspusphc .......................................................................................................................................................................................................plulhphlpHsu..LDth.hcGh.....-sL.tct...Gapp...c..plc.l.......c....h....p..N.A..p.....G.....-....p......u......p....htp.hu.....p.p..h..s.s.p....p..s.D..ll.l.u.I...A..TP..u...A.Q......u..l.........s.........s...s........o.......p........s.............l....P.l..lhsA.V.T.D.P.....lu....A......tL...V.p...s...h.....c.p.P....G...s....N.lTG..sS.......D..t..s..P...lp.pp.lcL...l.+c.lh..P.s.s...K.....p...lGl..l...Y..s...s.u..E......s..N..S...h..s..pl..c.p.h.+..p...h...s...p..c..t..G..l..p...l..h.......p....h........s........l.......s..........s..........o.........s.......-.......l....t........p........s.......s.........p........s............h....s...s...c........l.....D.....s...l....a.....l.....P......s...D.N....s.........l...s...S.....u.....hps.........l..sp...s..u.......p...p.....s..Kl.......Pl.h.s......u..s.s.s.h.....V...c...p.G...u...l...A.u..h..u.l..s..h.h.p.l.G.hp...T.Gc.h.ss.c.l.....L.c.G.c.c.P.u.s.h.sl.phh.pp.....chhlNpch.AppLG..lslspthhtt.....h.................................................................... 0 178 346 426 +2194 PF04393 DUF535 Protein of unknown function (DUF535) Kerrison ND anon COG2990 Family Family member Shigella flexneri VirK (Swiss:Q99QA5) is a virulence protein required for the expression, or correct membrane localisation of IcsA (VirG) on the bacterial cell surface [1], [2]. This family also includes Pasteurella haemolytica lapB (Swiss:P32181), which is thought to be membrane-associated. 19.10 19.10 19.10 21.40 18.80 18.70 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.84 0.70 -5.50 34 1261 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 911 0 96 664 6 271.70 36 90.85 CHANGED hhstps.....hhpphpp+hKahhRsllhhtphpphhphlsp...shhpslhpppPphst+lh+PYlspshstppRlpsltpHaphhppthstphhp.lhppt.slpLsplp...tppaslhLshssths+EG-hsL.Lpspp.tphlhploFolh.........sppsLhIGulQGsss....-psp-hl+phTKshHGLRPKpLllEshphLApthslcplluVupcsHlapph+a......ppclhsDYDphWpEhGGp..spsaap.LPlphtRKsl--IsSKKRuhYR+RYthLDplppslpptL ..............................................................t........thhpctpaRh+FhlRsll.hsthsh.phhptLsp...PphpsLls.p.p.Pplsh+lpRPYls.........tshs.tpp+lculh.Hathlpp............h............hs...........t..c..h............p.....hls..p...slpLuc.........lp.sK.......ss-pa..sl..pLs.h.hshc.+EG-holhlp..ssc....tsLuploFol..s...h.p......sppshhIGGLQGsph......chspphIppATKsCHGLhPKcLlhEAhphhApthpl-pll.AV.opcsHlapphRY.h.pK..cpph.hAsYssFWpslGG.....p...p......t.p...aac..LPhp.lsRKslt-IsSKKRupYR+RYthLDslptphst...................... 0 11 32 65 +2195 PF04394 DUF536 Protein of unknown function, DUF536 Mifsud W anon Pfam-B_2107 (release 7.3) Family This family aligns the C-terminal region from several bacterial proteins of unknown function that may be involved in a theta-type replication mechanism. 20.70 20.70 21.60 21.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.53 0.72 -4.22 29 746 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 588 0 44 358 4 47.70 44 25.93 CHANGED hccQlppcc...........pplpch.......ppLLDQQQpLsLpspchlEc........hKs ..............................hccQlchKD...........pQIsEt..............sK.LDQQQpLsLpuhKchEpLc.pL-p.t+....................... 0 6 14 32 +2196 PF04398 DUF538 Protein of unknown function, DUF538 Mifsud W anon Pfam-B_2637 (release 7.3) Family This family consists of several plant proteins of unknown function. 25.00 25.00 25.00 25.10 24.70 24.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.22 0.72 -3.80 58 600 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 46 1 341 552 1 111.40 29 68.11 CHANGED osh-lLcpashPhGLLPpsl.p.sYshspsoGphp.lhhs......s.........s..Cphphps....hp..lpY.........ssplo...........................GhlspG................+lsslpGlpsK.......lhlWlslsclth....susplhFps..Gshs....cshshssFpps .........................h-lLpchGLPtGLL.Phss..p.pashscsoGhhh.lhhp.....p..........s.Cphpacs....ht..lpY.........ssplo...........................GhlppG................+lpplpGlcsKt......lhlW.lslspltl......susplhFps..usls....csashssFp......................................... 0 30 189 274 +2197 PF04400 DUF539 Protein of unknown function (DUF539) Kerrison ND anon COG2991 Family Putative periplasmic protein. 20.80 20.80 21.00 20.80 19.30 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.62 0.72 -4.47 37 588 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 495 0 111 320 178 45.40 45 59.21 CHANGED hIhp+KsIpGSCGGluulGl-KtCD.C...cpsCDpppc+.hpttt..tp ...aIhp+KslpGSCGGLu.u.l....G.l.-K.sCs..C.............scPC.Dscp++hspttt...p......................... 0 25 53 87 +2198 PF01877 RNA_binding DUF54; RNA binding Enright A, Ouzounis C, Bateman A, Eberhardt R anon Enright A Family PH1010 Swiss:O58738 is composed of five alpha-helices (1-5) and eight beta-strands (1-8) with the following topology: beta-1, alpha-1, beta-2, beta-3, alpha-2, alpha-3, beta-4, beta-5, alpha-4, beta-6, alpha-5, beta-7, beta-8. The first six beta-strands (1-6) form a slightly twisted antiparallel beta-sheet and face five alpha-helices on one side. The last two beta-strands form an antiparallel beta-sheet in the C-terminus. PH1010 forms a characteristic homodimer structure in the crystal.\ Dimerisation of the molecule is crucial for function. The structure resembles that of some ribosomal proteins such as the 50S ribosomal protein L5 [1]. Although the structure resembles that of the RRM-type RNA-binding domain of the ribosomal L5 protein, the residues involved in RNA-binding in the L5 protein are not conserved in this family [2]. Despite this, these proteins bind to double-stranded RNA in a non-sequence specific manner [3]. 22.90 22.90 23.60 27.50 22.20 21.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.43 0.71 -4.10 52 277 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 140 17 189 274 118 115.90 25 77.86 CHANGED pplplpshlasTEDp-KVhcAlpslh............chphphpptcGaaup.....p.hlptt.sspcsLcplpchlcppcl..cth+s.tlpcplp..ssphhh+lsKQsAa.hGplphsp.ss.....ssIplplchc ..........plplpshlasTED.cKVhcAlpslh............shchp.h.ppppGaas.......h.lptt..pspcslcpltchlcppcl..cthps..lpcplp..ssplhh+lcKQsAa.hGplshsc.ss......ssIplplch............................ 0 47 104 149 +2200 PF04402 SIMPL DUF541; Protein of unknown function (DUF541) Kerrison ND anon COG2968 Family Members of this family have so far been found in bacteria and mouse SwissProt or TrEMBL entries. However possible family members have also been identified in translated rat (Genbank:AW144450) and human (Genbank:AI478629) ESTs. A mouse family member has been named SIMPL (signalling molecule that associates with mouse pelle-like kinase). SIMPL appears to facilitate and/or regulate complex formation between IRAK/mPLK (IL-1 receptor-associated kinase) and IKK (inhibitor of kappa-B kinase) containing complexes, and thus regulate NF-kappa-B activity [1]. Separate experiments demonstrate that a mouse family member (named LaXp180) binds the Listeria monocytogenes surface protein ActA, which is a virulence factor that induces actin polymerisation. It may also bind stathmin, a protein involved in signal transduction and in the regulation of microtubule dynamics [2]. In bacteria its function is unknown, but it is thought to be located in the periplasm or outer membrane. 24.90 24.90 25.30 25.30 24.30 23.70 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.12 0.70 -11.52 0.70 -4.47 156 2985 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 2441 0 788 2172 443 200.80 20 84.19 CHANGED lsVsGpuphpstPDhstlslslps..pspssspA.......hpp.s..spphppllstl......p...phu.......lt...p...ps.lp.....s......sshsl.p.............Ppa......p...........................................thtG.Y..........psspp.lp..lphp.cl....sphup......llsthhs...tsh................s..pl....sslpaplsctpphp..pcshppAlpsA+p+ApthApsh....G.hplsplhpl...............p.sstss.................hhthth.ttss..........sshpsuphplsssV.....sssapl ....................................................................................lslsGpuphc...sssDhsplslplps...........pupsss.su.............tpp.s.....spphsphlshL....c......................ptG....................................ls......c...c-........lp.........s...ushsh..p................................PpY..........pa.p.....................................tpptlpG..Y.......................................pusps..lp..lphc...sl.......splsp..............ll...st...hhp........tGh..............................................s...pl......psl..p..a..s..l..sp.....sthc..scshppAlpsApppApplApuh.....s..tpLG.t.l.h.pl.........phustss...............................ht.h..th.ht......................sh.p.sphphpsplsssat............................................................. 1 250 495 659 +2201 PF04418 DUF543 Domain of unknown function (DUF543) Bateman A, Wood V anon Wood V Domain This family of short eukaryotic proteins has no known function. Most of the members of this family are only 80 amino acid residues long. However the Arabidopsis homologue is over 300 residues long. The presumed domain contains a conserved amino terminal cysteine and a conserved motif GXGXGXG in the carboxy terminal half that may be functionally important. 21.30 21.30 23.20 22.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.67 0.72 -4.02 35 301 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 235 0 198 280 0 72.70 36 72.16 CHANGED Moppsp.......................................plscKWDpslussllKsuhGhusGllsSlLhF+R...RsaPlhhGsGhGlGhuYs-scthFp .......................................................................scs...........................lscKWD+ClushllKsuhGhuhGllhSllhF+R......................RsaPl....hlGsGhGlGhAYspCptpFp........ 0 66 107 168 +2202 PF04424 DUF544 Protein of unknown function (DUF544) Kerrison ND anon DOMO:DM04041; Family Eukaryotic protein of unknown function. 20.80 20.80 23.00 21.90 20.40 19.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.61 0.71 -4.30 8 377 2012-10-02 14:22:40 2003-04-07 12:59:11 8 9 242 0 265 379 5 124.40 30 22.46 CHANGED sNlLlLpssls........LssppphlTt--LhphLsDhhlp.ts...........shppNhs-shslLs+LtsGLsVNl+Fs...............ulpsFEaTPEhuIFcLlslsLYHGWllDPp.s-.hsculushSYspLhctlsss .............................................................................hNhLhLp.php........................L.s....p...tp..lohppLhphlu.....-hl..lphpst...................................sh.p..tsls-hhslLspLpTGL.sVNs+Fs............................GhtsF......E...h....T..s........E...htl.FcLhsIPLhHGWllDPps..sp....hh....pt.l..s..p..h..SYsph.phl................................. 0 79 132 200 +2203 PF04440 Dysbindin DUF546; Dysbindin (Dystrobrevin binding protein 1) Finn RD, Moxon SJ anon Pfam-B_3919 (release 7.5) Family Dysbindin is an evolutionary conserved 40-kDa coiled-coil-containing protein that binds to alpha- and beta-dystrobrevin in muscle and brain. Dystrophin and alpha-dystrobrevin are co-immunoprecipitated with dysbindin, indicating that dysbindin is DPC-associated in muscle. Dysbindin co-localises with alpha-dystrobrevin at the sarcolemma and is up-regulated in dystrophin-deficient muscle. In the brain, dysbindin is found primarily in axon bundles and especially in certain axon terminals, notably mossy fibre synaptic terminals in the cerebellum and hippocampus. Dysbindin may have implications for the molecular pathology of Duchenne muscular dystrophy and may provide an alternative route for anchoring dystrobrevin and the DPC to the muscle membrane [1]. Genetic variation in the human dysbindin gene is also thought to be associated with Schizophrenia [2]. 25.00 25.00 30.50 25.70 20.90 17.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.03 0.71 -4.45 7 190 2009-09-10 17:06:14 2003-04-07 12:59:11 11 2 41 0 87 168 0 142.30 37 66.55 CHANGED tths....csscsPpstl..ssplplptcpth..ts.ptEtEhsls...suhLplsE.+RpPluSlSShEVphDLLD.sELhDMSDQEhA-VFhsS--E.stspSPs................s.p...h.+husLcSsShsp.opsppspEcps.sDs-......ss..t.....l- ................................s...ctpcs.chth....sQQh+L+ERQKaFE-shQt-hE.hls...suaLQls-.RR..p.............PlGShSS..MEVNVDhLEQh-LhDhSDQEuhDVFLsSs..sEps.ss.SPs........................s.t.....hs.p....s..s.pSssh.s...psops..p.tttp.p..........................t............................................................... 0 3 10 37 +2204 PF04445 SAM_MT DUF548; Putative SAM-dependent methyltransferase Kerrison ND anon DOMO:DM04150; Family This is a family of putative SAM-dependent methyltransferases. 26.00 26.00 26.10 26.00 25.80 25.50 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.55 0.70 -5.21 7 1454 2012-10-10 17:06:42 2003-04-07 12:59:11 8 3 1428 5 251 988 353 223.20 46 88.57 CHANGED tclhtltstasLtpsp-sshALlhscp.....pLELRplDEsp.GulhVDFVuGAhAHRRKFGGG+GEslAKAVGlpsuhhPsVlDATAGLGRDAFVLASlGCcVphlERHPlVAsLLpDGLpRAhtDs-IGthlpp+lpLlhssuhstLp......shspPDVVYLDPMYPH...+pKSALVKKEMRlFppLVGsDhDAcsLLcsAhpLApKRVVVKRPcYA.hLusppPsauhpTKspRFDIYhst ......................................................................tt...........l...t..st.p.s...h.sLl.hs.p.....pLpLpp..h...s-s.c..G.slhVDFlsG.uhuH.R.R.K..F..G.G.G+.G..-.slAKA.lGlct.s...h..h....P..sVlDATAGLGRDAFV..L..A.S...l....G..........C....+..Vph..l..ERs.PVVAAL.L-DGLp...........R.........u....h.....t....D.......s......E.I.......s.......s....h.....l.....t.....cRl.p.Llau.sSlsh.Ls..................sh.s.sp.P-VVYL.DPM..F..PH.......+...p.K.S.AL...VK...KEMR...lFpsL.......V............G................s........D..h..D...A...D...s..LLps.A.h....tl.A..p..K......RVVVK.RP.chAs.L...u....s.h.......ts.s.sl..s..K.spR..FDlYh.p....................................................... 0 81 135 196 +2205 PF04446 Thg1 DUF549; tRNAHis guanylyltransferase Anantharaman V anon Manual Domain The Thg1 protein from Saccharomyces cerevisiae is responsible for adding a GMP residue to the 5' end of tRNA His [1]. The catalytic domain Thg1 contains a RRM (ferredoxin) fold palm domain, just like the viral RNA-dependent RNA polymerases, reverse transcriptases, family A and B DNA polymerases, adenylyl cyclases, diguanylate cyclases (GGDEF domain) and the predicted polymerase of the CRISPR system [2]. Thg1 possesses an active site with three acidic residues that chelate Mg++ cations [2]. Thg1 catalyzes polymerization similar to the 5'-3' polymerases [2][3]. 26.60 26.60 32.90 26.60 24.40 23.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -11.01 0.71 -4.54 100 440 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 356 8 291 450 19 126.10 39 45.52 CHANGED +hchl.+paEppsp...llPssalVlRlDG+uF++Focp.apFpKP...pDt+slplMspsApsl.hpphs.clsluYupSDEhShlhcc..sssh..............asRRtpKlsShlsShhoupashhatphh.p........h................................s..PsFDuRslhhP ......................................h.hcYV.+p.FEtts.p...llspsalVlRlDG+sF+.+.....Focp.apFpKP...N.Dp+uLpLM.spsApsl.hpc...........ht....-....lslAYGpSDEa...SFla+c...pssh.............................................apRR...s..sKlhohlsShFousYshhWtpaF.p............p..L.h................................................s..PsFDuRsVhYP................................................ 0 100 174 238 +2206 PF01878 EVE DUF55; EVE domain Enright A, Ouzounis C, Bateman A anon Enright A Domain This domain was formerly known as DUF55. Crystal structures have shown that this domain is part of the PUA superfamily. This domain has been named EVE and is thought to be RNA-binding [3]. 21.10 20.60 21.10 20.60 20.80 20.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.06 0.71 -4.17 140 1582 2012-10-02 17:37:24 2003-04-07 12:59:11 13 19 1431 13 568 1316 1573 137.80 30 64.70 CHANGED sY...WLhK......oEPp..........auhpph...ppptp..shWcGVRNapARN..hh+.pMchGDhshFYHSs.....tpt...ulsGlscV.s..cpsasDsoth.......s.t..........ps+WhhV-lchh.pphspslsLppl.....+........tp..t............Lpch.sll+pu..R............LSlhPVspccaphIh.ph ..............................................taWLhKoEPtt..............auhpp.....ttptt..t.WsGlR..N...ap..ARN.......hh+.pMchGDtshFYHSs........................ct.......ul.lGlsc..V.s..ppsasDsst..........stt...............tspahh..V-Vpah..cph.s..p.slsL.spl+.....................tp..sp.............................L.psh..llpps..R...........LSltPVop.spaphIhp..................................................... 0 170 336 459 +2207 PF04447 DUF550 Protein of unknown function (DUF550) Kerrison ND anon DOMO:DM04177; Family This family is found in a range of Proteobacteria and a few P-22 dsDNA virus particles. The function is currently not known. 21.40 21.40 21.40 21.40 20.90 21.00 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.38 0.72 -4.40 10 150 2012-10-01 21:36:44 2003-04-07 12:59:11 7 3 138 0 8 97 13 98.30 67 44.14 CHANGED phhc+HA-WS-+TFG.slpssGsLcHLuKEhlEltAsPsDlsEWADlhhLhaDutpRuGlosppIscAlhsKhslNcuRpWP-.+su-PchtI+cpsssss .....................................IR+EHAcWSDuTFG...sVGPIGPLKHLSKEALEAAAE..P...DDL....S....E..WADMQFLLWDAQRRAGIS.DupITsAMc-KLcINhcRQWPE.PKDGEPRLHIKE.sss.PV......................... 1 3 5 7 +2208 PF04448 DUF551 Protein of unknown function (DUF551) Kerrison ND anon DOMO:DM04177; Family This family represents the carboxy terminus of a protein of unknown function, found in dsDNA viruses with no RNA stage, including bacteriophages lambda and P22, and also in some Escherichia coli prophages. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.78 0.72 -3.22 26 815 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 445 0 22 522 9 66.40 34 34.52 CHANGED GWISCoERhP-..psp....VLlh..........t...hssthtsthhshhhssspa...............................thppsTHWMPLPcPPp ....................................................................................................................................................GWISCSERMP-.........psp.......tVLlh.................tt...hshhhss....t.h......h.s.ss.t..ss.s.ph...............................sth.ppsTHWM...PLPEPPQ............................................ 0 2 7 11 +2209 PF04472 DUF552 Protein of unknown function (DUF552) Waterfield DI, Finn RD anon COG1799 Family Family of uncharacterised proteins. 21.10 21.10 23.20 22.60 20.60 18.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.02 0.72 -4.06 120 2218 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 2038 2 449 1368 357 73.10 35 43.08 CHANGED lhlhcPpsa.s-spp.ls...cplcst.psVllNlpphcpcp...ApR.llDFluGssaulsGclp+l...upp.lalloPssVcls .................lslhcP+sY..s...-App.Iu...-p.L+ss.csV.llNl.p.phccsp.........A+R.llDFluGssaulsGslp+V......usp..lFLhoPsNVcl............ 0 149 312 401 +2210 PF04473 DUF553 Transglutaminase-like domain Waterfield DI, Finn RD, Bateman A anon COG1800 Domain This family of uncharacterised archaeal proteins are related to Transglutaminase-like domains. This family has previously been called DUF553 and UPF0252. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.11 0.71 -4.95 14 69 2012-10-10 12:56:15 2003-04-07 12:59:11 7 5 50 0 45 689 23 149.60 21 35.45 CHANGED pclpsluppL....pGsspt-ohWNlLcW.ccNlcYDasKuphh....................................................................................h..Ip.............................................oPpEhlth+cGlCsDYAlLTuulLLs.NhsshYllphphpps..h.....HAAsAlplsGshalLDQ+.P.hchssYl........phhc.cshhIpslshY .............................................................................................................................................................................h...t..t..s...pthhplh.pW.p..pslp..Y...shp+sp............................................................................................................................................................................................................................................hp...........................................................sP.h-sl.phppGlCpDYAlLTuul..Llp.................sh..s.......s..hY..l..........l...t...h...p.....h...p....s.p....t..........Hsssul..p.l.s..u....phal.lD..pp..s.h.....htpah....................................................................... 0 12 22 33 +2211 PF04474 DUF554 Protein of unknown function (DUF554) Waterfield DI, Finn RD anon COG1811 Family Family of uncharacterised prokaryotic proteins. Multiple predicted transmembrane regions suggest that the region is membrane associated. 21.00 21.00 22.20 22.10 20.30 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.24 0.70 -5.20 64 1327 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1217 0 230 848 239 223.00 39 97.03 CHANGED hhGollNshullhGullGhhltpplP-+h+pslhpulGLsslhlGlphshps............cshlllllSLllGullGEhlplEctlsplGphlc...............................................................pst.psphscGFVouoLLFClGuMuIlGulpsGLsGDhslLhsKolLDGhoullhAuohGlGVhhSAlPlhlhQGulsLhAshlpshlsssh..........ls-hoAsGGlLIluIGlslL....tl..+cl+VuNhLPALllshhlshlh ..............h.lGshlNusAlllGullGsLlsp+lP........ERh+sohhphhGLsslslGI.hslcs...................tNh.shlluhllGulIGEhhtLEctlsphssthp........p.......+..............................................................................tp.p.s..tcpsFhpuaVshhlLFCsuuhGIhGAhppGhoGDsoILhsKShLDhFoAhIFAsoLGluVsh.ulP.lllhQhsl.s.hhAshI.h.s.lhos.sh..........hs-hoAlGGlLllAsGLp.lh......uI.......K.hh.VsNhLPALllshslshh................... 0 97 162 198 +2212 PF04475 DUF555 Protein of unknown function (DUF555) Waterfield DI anon COG1885 Family Family of uncharacterised, hypothetical archaeal proteins. 25.00 25.00 41.50 41.20 22.30 21.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.27 0.72 -4.01 14 106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 80 0 72 99 1 102.40 53 80.82 CHANGED ssYhVsLEAAWlV+DVcolDDAIulAISEAGK+LN..sL-aV-l-lGhphCPtCGE.h-ushlVAsTALVGLlLpMKVFNAEStEHAtRIAKuhIG+AL+cVP ....sYhVslEAAalV+DV-osDDAIslAlScsGKtLN..cL-YVEl-lGhotCPtCGE.h-sAFlsAcTALVGLhL-M+VFNAESpEHApRIAKupIGpAL+clP. 0 10 44 62 +2213 PF04476 DUF556 Protein of unknown function (DUF556) Waterfield DI, Finn RD anon COG1891 Family Family of uncharacterised, hypothetical prokaryotic proteins. 21.20 21.20 21.20 22.00 21.00 21.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.55 0.70 -5.14 4 172 2012-10-03 05:58:16 2003-04-07 12:59:11 8 5 145 0 104 180 67 193.90 35 90.49 CHANGED MlLLlSPhsVEEAlEAIcGG..ADIlDVKNPsEGSLGANFPWVIRcIREhsPcD.h.VSATlGDVPYKPGTlSLAAlGAsVSGADYIKVGLYGs+Nh-pAlElM+sVVRAVKDhcsstlVVAAGYADAaRlGuVpPLtlPclAAcuGADlAMLDTAIKDGpoLFDahshEhltpFVchA+spGLhsALAGSlsppHltsL+ElGsDIVGVRGAsCcGGDRNsGRIcR-LVtcL+Ehhc ................................................LLhSstshpEAh.shtuu..sDIlDlKpPtcGuLG....A.............s.sl.+.tl....ht...h...s..st......p.....h....lSAslGD.h.s.hc..P..ss.hs..Ashs........ss.s..s.G.....scYlKlGlh..shts.tts..hphh..tth..t.h..........hh.l........lsshauD.h..........s.h.l.........thhtpuG.h...sssMlDTuhKc.G.ps....Lh-ahs.ptLtpFlp.s+phGLhsuLAGSlpht-lshLt..t..lssDhlGhRGAlCtstcRst.tlp.phVtthht.......................... 0 29 70 91 +2215 PF04452 Methyltrans_RNA DUF558; RNA methyltransferase Kerrison ND, Basturea G, Mistry J anon DOMO:DM04256; Family RNA methyltransferases modify nucleotides during ribosomal RNA maturation in a site-specific manner. The Escherichia coli member is specific for U1498 methylation [1][2]. 21.00 21.00 21.10 23.80 20.90 20.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.30 0.70 -5.23 96 4583 2012-10-01 22:53:19 2003-04-07 12:59:11 9 6 4226 22 1026 3326 2801 217.90 29 90.03 CHANGED ltlss.cps+HlhpVhRlp.G-plhlhs.sputtahupltphsccph..hplh........cthphssc.s.......lsls.ulsKs.c+h-hhlpKusELGssplhPhhscRo..p....pphtp+hpRhpplshpAsEQutRshlPplp.sh.shpphl.pphspt.h........llhapctt.t.tth.thttth................plhlllGPEGGaospElphhpp.tGhpslsLGsR.ILRsETAulsAluhlph ....................................................h..lss.-pspHlhpVhRhpsG.-plhLh........s.....s............s......s.......t........ha.h..up...l..t..p.h..s....c..c....p...l.....lc.lh...........................................cth..p.ts.tc...Php..................lplspuls+u.-+hEhhlQKusEL.Glsplhsl.ucRsh....s....+h......pt....c......+............htK+hp+.ap+I.shtAsEQ.st....Rsh.lPpl..p......hh...shp...p...hh....tp....hs.pt.t......................llha.t.c.s.t.p..p...t.p.hst..hp.....................................................plhlllGPEGGh.o.s.pEl.phh.........p.......p....t.......s.....h.pslsL.G.PR.lL.RoETAslsAluslp......................................... 0 362 687 881 +2216 PF04480 DUF559 Protein of unknown function (DUF559) Kerrison ND anon DOMO:DM04310; Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.45 0.72 -4.40 8 1749 2012-10-11 20:44:43 2003-04-07 12:59:11 7 59 1173 3 463 1520 202 98.00 26 32.61 CHANGED pchptpARcLRpstThsE+sLWphLRs+RLsGaKFRRQtPlGsYIVDFsCtpsKLIVELDGuQHstp..csYDApRTpaLputGasVLRFhN-EVhps.-sVL-pILptLp ......................................................................................t...................p..hh....h.....tt.....................h.php..p..ph..l........s.....t........a.....h...l.......D...h..h.....h............p....t....+......l....l..l....ElD.....G..s.....p....H...t...ps.............ht.....h...D.....t.....pR.......ss.hL....p..s..t..G..a.p.Vl.Rh.hs.pcl..h..pp..c.tVlptlht...h....................... 0 130 318 395 +2217 PF04575 DUF560 Protein of unknown function (DUF560) Waterfield DI, Finn RD anon Pfam-B_4010 (release 7.5) Family Family of hypothetical bacterial proteins. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.80 0.70 -5.18 22 575 2012-10-03 17:14:36 2003-04-07 12:59:11 8 16 290 0 79 541 30 264.50 25 62.17 CHANGED WpassuhsYhp-cNlNsuscptph..........ttsh..stPpsApGlsaphuhpKcasltssaahphphsstG+hYWcN+c..Ys-hosRhthGhuYpsu+pplslhPFhEppaa...........utcpYspst......GlplphshhlospWphusshEhtpppY..hcc...pchsGshphhSsoLhah.ssppaaahGhDahcpps.ps......cspSapppulRhGWuQ-Wsh..GlooplshuhupRpYcshshhh.............shpR+DcpassslolW+RshaahGlTP+LsapapKscSNhs.hasYsc..pplhlphsKsF .......................................................................................................Wphphuhshh.ppNlN.pustttph......................h.t...p..st..pu.pulsYphshp+p....hsl..t..sp....ahhththsstGp.hYhss+c........as-....h..s.h.phth..G.h.tapss.....p...pp...h.ulhPa.hp..pph..h......................usctastsh.................Gh..phph.shh..ls..p.Wphsssh.......p.htcp.cY...pcp...............pphsupt..h.h.u.s..o.l.h..a.h.h..s.s..p..t.h....hh.s..G.hs..a....+cps.....p-.......p.s.p..uh.pp..tu.l+h.G...hsp...past......G.lss.ph..p..h..u....a..scRp..Ycs...shh..................s.tR+Dcchphsh....u......l.h..+.c...p...l....p.a.h.GlsPpLsap.ap+s..pSNhs.hYpacp..pphhlphp+pF..................................................................... 0 27 48 65 +2218 PF04481 DUF561 Protein of unknown function (DUF561) Kerrison ND anon DOMO:DM04313; Family Protein of unknown function found in a cyanobacterium, and the chloroplasts of algae. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.43 0.70 -5.42 5 102 2012-10-03 05:58:16 2003-04-07 12:59:11 7 3 94 0 40 445 219 236.40 57 91.19 CHANGED MsN...K..lcsslcp+KAIKVISGLNNFNsspVlpIA+AuptusATYlDIAADPcLVcpVK.ulssIPICVSAl-schLhcuVtAGADLlEIGNFDSFYsQGphlsss-IluLVKpTRpLLP+TsLoVTIPHILsLcEQlpLApcLEuLGlDlIQTEGphouIS.....+scplschIcpSss....TLASTYsIS+sVsLPVICASGLSslTlPlAFuhGASGIGIGSuVo+Ls-ptcMVshlsElpsulSuspsh .....................................................Lppuhpp+puLKVISGLpNFDtssVstls+AAptGGATaVDIAsDspLVc.hsp.plo.sLPlCVSuV-PctFssAVpAGAshlEIGNaDoFYspGRhFputEVLpLT+pTRpLL.PplsLSVTV....PHhLsLDcQlpL..A.pLVptGAD...II...QT.E....GGT.S.SpP.......hpsGs..LGLIEKAuP.......TLAAAauIS.....R.....A.....V.....s........lP.....VlCASGL.S.s.V.T.s.PM.A.I.u.A.G..AuG..VGVG..S..A..l....N......+....LNDplAMlAsVRuLsEALt....sh............................ 0 6 26 37 +2219 PF04763 DUF562 Protein of unknown function (DUF562) Waterfield DI, Finn RD anon Pfam-B_6057 (release 7.5) Family Family of uncharacterised proteins. 25.00 25.00 36.60 58.60 19.20 17.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.89 0.71 -4.96 6 19 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 2 0 13 19 0 138.60 41 24.59 CHANGED phpcFL.hVltDEpEKcVlVVsscup.thcuLssculshLhcELpppGYSYLNIhSscscu.spVpERplLssc.pGRSFTVIls-LPlGssDIRsLQLASERIlVScph-AADAhASGC......KlLpa-c.pssWs....pcatsatcpV-cpt ...hpcFhhhlhhscpE+sVVVVsscu..sh.sLsspuhspLlcELppcGYSYLNIhuhcssu.hpVppRplLsss.pG+uFTllhs-.s.upsDhRsLQLASEthhsu+EhpAsDsYASGC......cllth-c.ppsWs....pcHttatccVccp.. 0 0 0 13 +2220 PF04577 DUF563 Protein of unknown function (DUF563) Waterfield DI, Finn RD anon Pfam-B_4026 (release 7.5) Family Family of uncharacterised proteins. 22.10 22.10 22.10 22.60 22.00 22.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.58 0.70 -4.42 219 1565 2009-09-11 01:27:59 2003-04-07 12:59:11 9 58 566 0 846 1505 506 202.60 16 44.06 CHANGED tha....t.Hahh-h.lspl.hhhppthhpps..............hh...hst..thh...sahp......chl.ph.......hul..ttppl.ht....p...p.hhhp.......................plhhssssh.........ththhshhh...........thtphltpphthpt............................................ts.........+hhalsR........tst......pRplhN..pp..cl...h....phhp...............phu.......ap...hlp....sp.phs....hp-Qlp.las.suchlluspGuultNh.....lFh..ps..t....spllpl..s.....pthsht....ahtlsthhshphhhlhsps .......................................................................................................................................................................................................................................................................................................h.....................t..........h.............t............t.hhh.........................................thhh..............................................................................thtp.hlhpthshsp................................................................tts.........plhhlsR...........sp..........pRtl...hN...ps......-l....h.............phhp..............................................phs................hp..........llp......hc...phs....................h.tptlp...lhs....su.chllusHGAuLssh....lFh...ss...........s.....usllcl...........hthp.t................ahthuph...shph......th....................................... 0 442 611 734 +2222 PF04483 DUF565 Protein of unknown function (DUF565) Kerrison ND anon DOMO:DM04315; Family Predicted transmembrane protein found in plants, chloroplasts and cyanobacteria. This family is also known as YCF20. 21.10 21.10 21.20 22.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.81 0.72 -3.81 19 173 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 110 0 85 173 161 59.50 33 43.19 CHANGED Qpu-WDllsAulllshsEhlutlhYpph.........ppp.th......hlphlNhhKlGlhYGLFlDAFKLGS ................u.hDllsAsllVhhhEhlstl...hYptsh................................htp.th......hlphlNhaKhGlhYGLFl-AFKLGS........ 0 16 55 75 +2223 PF04525 Tub_2 DUF567; Tubby C 2 Waterfield DI, Finn RD, Eberhardt R anon Pfam-B_4998 (release 7.5) Family The structure of this family has been solved. It comprises a 12-stranded beta barrel with a central C-terminal alpha helix. This helix is thought to be a transmembrane helix. It is structurally similar to the C-terminal domain of the Tubby protein [1]. In plants it plays a role in defense against pathogens [2]. 24.50 24.50 24.50 24.70 24.20 24.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.36 0.71 -4.90 16 987 2012-10-02 20:44:47 2003-04-07 12:59:11 7 6 608 2 363 768 7 155.80 24 85.77 CHANGED thhlss.ahsstthsLslhh+shhhsssuaslhDspGpllF+V-s.....hphscchhLhDssGpsLlol+c.K.....hhoLtspWplapusssp.......p.lFsl++s.....l......psahh..ssh...tppspsapIcGshhpcuhplhc..psspllApl+...s..pshhhGcDsasl.hVpsphDhuhlhuLlll ...........................................ht...................hhl.pp..h.hh.t.........s.c.p.Fsl....p......D.s.p.G......p.....lsapV-G..........ph....h..p.......hs.....c..phtl.......hD.s.s.G.c.......l...h.pI.p+..c...................hh.o.h...h.s.capl.h....ts.sp.p.....................h.as..l+Kp................................sFh....t....................s...h...................t..p....sh.shcl..cG.s.h..h...c....h..s.....aplh................p......s......p...s.....p.....llA.clp.....................p.......p...h.......h.th.......s.....s....s....asl...sV.h....s....h........ss...Lllulslh................................................................. 0 59 216 293 +2224 PF04601 DUF569 Protein of unknown function (DUF569) Waterfield DI, Finn RD anon Pfam-B_4902 (release 7.5) Family Family of hypothetical proteins. Some family members contain a two copies of the region. 21.20 21.20 21.20 21.90 21.00 21.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.78 0.71 -4.76 7 184 2012-10-02 19:42:32 2003-04-07 12:59:11 8 6 19 0 119 175 8 140.00 37 47.43 CHANGED MElFpcAcuVRLRS..pHDKYLhAD-DEEoVpQsRsGou+pA+WTVE.V..sp.clIRLKSCaGpYLTASNc.FLLGhTG+KVlQotshR..hDpss.-WEPlREGupVKL+oR.G.paLRuNGGlPPWRNSVTHDh.PHhSsTpc.llW ...............MEhFpcuphVRLRS...tp.sp..YLhAD-....Dt.....psV...sp...s...+......s.u..u.....u..h..s....Ah.WsVE..h..l..................s......s....s......s............h.........lh...L...+.S.s.Y.G+YLsA....o...st.......h....h...h..G.h.s..Gp......+....VlQs............t...p..................h-.s.sh......W....c..s.l..........+......c....G...s.......p......l...pL...+........s.t.....G.......pa..LR......ANG.......s.h....sW.csu.VTh.D...st.t.......................................................... 0 12 63 92 +2225 PF01881 Cas_Cas6 DUF57; CRISPR associated protein Cas6 Enright A, Ouzounis C, Bateman A anon Enright A Family This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats [1]. It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation [2]. Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers [3]. 23.60 23.60 23.70 23.70 23.40 23.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.12 0.71 -3.99 46 464 2012-10-01 21:23:39 2003-04-07 12:59:11 11 3 348 9 200 474 12 148.10 20 61.04 CHANGED hsuLh....ppsphpls...s....tpaplccl..chhcp.ch.ppphpFpTL..SPlllpshh.........sphpphaltPp-pc.....FhchlppNLhcKYpth.....hucp.p...pchphch.....h+t.....+hlphK.......pth..l+uhhhh.Fclpus..cllchuY-sGhGpKNuh.GFGhlcll ...........................................................................................h.........t...h.ht....t.....ph.hl.ppl....phhp...p.........h..ppph..hhpsl.....S..Plllppt..............ptpshahpsp-..pc..........FtchlppNlhcKa..pth..............hspp.hp....pphphps........h...th+....h.phhph+....................shh.....lpuhh.sh.acl.....pus....c...ll.....phsa...ps...GlGp+sup.GFGhlch.................................. 0 73 128 169 +2226 PF04489 DUF570 Protein of unknown function (DUF570) Kerrison ND anon DOMO:DM04330; Family Protein of unknown function, found in herpesvirus and cytomegalovirus. 25.00 25.00 32.00 31.60 16.80 16.20 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.45 0.70 -6.07 4 48 2012-10-03 01:18:03 2003-04-07 12:59:11 8 1 25 0 0 44 0 393.40 47 68.12 CHANGED ss.u..hpsshp.pssaG+uI..Aohpp+.ppsh..R+HLshYpuhLhsIlcQYpp.hlPsptphpYcpGtIclushlls.spQlsutp......lYsWoolshPKhhuhtELaFLlsSscppslshpPhITKGGhhSu.hoY...Phs.SsshslshlpsslhMlPFlPa+hPhhhs.hhs...........sh.hLppttst.taGplp.lKp+shhhLullcsLTWppphshs.psphhphhtA.FhGohss..stlP.-shAhRhhsNspaphtsaEFoINl-Nlsls+spcKllGTLATssCcphsspLosENhPphLllpFELloshpcs.cLhFSsNPpLaFoGDlLNss..L.psPNhaELpVaAPYDlHFhsuppHhVpLslRYhphsDRp.hLVSsh.sEshFcTthslWtsssPL+lTLhSao.NLlLPQGT.lAsLhhl ..............ohhEuhR-s..spssYG+sl....p.cs+.l+..Rot...pHLosYcshLhtlscQYNp.sssspt+ApYhpGsIahuhsVIhsstpssshp.......YsWosllsP+s.shsELaFLLCSsptsusVhQPhITKGGhpouhhsa...s...tps....pss....phshl+spLshlPFVPassPcauV.FhT..tcsuh.....................h.hlp.....tAs....FGpho.VpRpGushlshhcpLoWhs+plhs.sppchTpYlA.FcGTh-s..uhhsu.scsWhs.+NVpYEhhsh.FolNV-SlsVsspp.RpLlGTluouhCcps.os.ploscNMPp..shpF.Ll.sotpRt.slhFSpNPoLFFoGDuLN.s..L.ppPssasLTVHAPYDIpFt.psppsVplDlRYsphsDR.CFLVuslPpE..stFaTGLoVWRsspPL+lTLhShT+ollIPQGTPIAsLY.l.. 0 0 0 0 +2228 PF04672 Methyltransf_19 DUF574; S-adenosyl methyltransferase Waterfield DI, Finn RD anon Pfam-B_4601 (release 7.5) Family This family contains a SAM (S-adenosyl methyltransferase) domain, with a central beta sheet with 3 alpha-helices on both sides. Crystal packing analysis of the structure PDB:3giw from Swiss:Q82L35 suggests that a monomer is the solution state oligomeric form. An unidentified ligand (UNL, cyan) was found at the putative active site surrounded by the residues His57, His170, Phe171, Tyr216 and Met22 . The UNL is likely to be a phenylalanine or phenylalanine-like molecule. (details derived from TOPSAN). 20.00 20.00 20.00 20.10 19.80 19.90 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.57 0.70 -5.13 12 720 2012-10-10 17:06:42 2003-04-07 12:59:11 7 2 101 4 349 788 2 255.20 39 95.90 CHANGED stttsspIDTo+PHsARlYDahLGGKDNYPVDcpAu-shhtshPthhtsAptNRsFhpRAVphLAt-.GIRQFLDIGTGlPTtsNlHQlAQplAP-oRVVYVDNDPlVLsHucALLsusP-GhTsalcADl+cPcpILp..cstcsLDFsRPVALhLlAlhHal...sDs-DshulVcpLh-sLPsGShLsLScsos-hsPthh...ctstshaspu..utshphRohp-lttFFc..GL-LlcPGlVssscWRP-ssts...........................sstthshauGVARKs ............................................s.....tlDsshPpsARlYDaaLG.GKDNassDRpsucphht....hhPph..h..t..hAptNRsFLpRAVRaLAs.c.GlRQFLDlGoG.lP.T.t.................s....N..........lHE.VA.Qpl......s...P..-...uRVVYVD.NDPlVLuHu+ALL.su.s...s.p......Tsh...lpADlR-PcpILst....pspchLDh.sc.PVALh.llulLHa.l.......sD....p.-......c.s.t.slV.pplh-sLssGSaLs.lo..H......s....o.....s......-....h.s.....s....p.t..h..........pt...s....t...p.h....h...p..ps........ss..s.h.....t...hR..opp-ltpaFc...GL-Ll......-......P.G......lV.s......sp....W...RP-s.sss.............................p.s.tt.h.hauG.VuRK............................................... 1 125 277 340 +2229 PF04746 DUF575 Protein of unknown function (DUF575) Waterfield DI, Finn RD anon Pfam-B_6048 (release 7.5) Family Family of uncharacterised proteins. Contains several chlamydial members. 25.00 25.00 80.70 79.60 19.60 18.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.26 0.72 -3.98 6 20 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 2 0 15 20 0 97.00 55 18.45 CHANGED AFDFoRPlCSRITNFALGVIKAIPIlGHlVhGl-WLlSph.tphls+PsFsSDVssIlKlEKstG+DHluRVEphL+p.RlslssED.sKVHG+hPpsPFs AFDFo+PhCSRITpFALGVIKuIPllGHllhGl-WLlS+hhcphVo+PsFsSDVssIlKlEKlsGRDHIuRlEshLKp.RlsIssED.DKVHG+hPpsPF.u 0 0 0 15 +2230 PF04507 DUF576 Protein of unknown function, DUF576 Mifsud W anon Pfam-B_2120 (release 7.5) Family This family contains several uncharacterised staphylococcal proteins. 20.00 20.00 20.40 20.30 19.90 19.70 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.62 0.70 -5.46 29 2553 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 198 3 28 1021 1 231.00 55 98.31 CHANGED MpahK+lsLhISlllLhlhI.uGCGh.s..........K--SKEspIKKSFsKTLsMYPIKNLEDLYDKEGYRDsEFcKsDKGTWlIpS-MshpsKGcshco+GMVLhlNRNTRTuKGaYhlcclp--pcthspsp-KKYPVKMcNNKIIPhcpIcDcKLK+EIENFKFFsQYGsFK-LcsYKsG-ISYNPNVPsYSAcYpLsNsDhNVKQLRKRYsIPTpKAPKLLLKGsGDLKGSSVGaKclEFTFlcsKcENIaFoDSlpFpPSc ............Mhh.p+lhLhIsh.ll.Lhlhl..sGCsh.p..............ptsuKEtQIKK.SFsKTL.sMYPIKNLEDLYD..KEG.Y..RDs..EFcKsDKGTWhI.Schshps.Kscphcs+GMVLalNR.NTRTsKGaYhVpphhc-pct.h..........p.s.pcKcYPVKMhsNKII.hcplcD-KLKKcIENFKF.FsQY.usFK-lpsYcsGDlSaNspVPSYsAcYpLsNcDhNVKQLR+RYsIPTpKAPcLhLKGsGsLKGS..SVGaKclEFsFscpKc-slhasDSlpapPo.................................................... 0 11 11 27 +2231 PF04510 DUF577 Family of unknown function (DUF577) Finn RD anon Pfam-B_3938 (release 7.5) Family Family of Arabidopsis thaliana proteins. Many of these members contain a repeated region. 20.90 20.90 20.90 20.90 20.40 19.60 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.18 0.71 -4.57 17 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 9 0 49 54 0 158.90 33 38.91 CHANGED l+EIpPLLIoCLp.pphp.-SphKlLtclVSpVAaplh.hpcssW.ELpDhIhShu.ppE.hKAhalF.sLs.sl.hc-Flh+hhcs.ll.chpphLhsPpc..scspsWsLAhpsshphuIpLl-ssh+.schl+clhph....hlcSV+cLVp+GhE.thlhRuhc-lEphVp+phphYphoc ....................hhplpslLIsCLp.pthp.po.hKlLtplVspVsh.pl..h.hpctsW.-Lt....-hIho.u..ps-...+AhhlF.sLs..l...c-Flh.hhcp.Lh.ch.phL.sPpp...cspsWsLAhpuuhphsIpll-sstp.schlcclhpp....hlcSl+-LVp+thE.thlh+uhcchEphlpcphphYphs.t................. 0 25 29 30 +2233 PF04669 Polysacc_synt_4 DUF579; Polysaccharide biosynthesis Waterfield DI, Finn RD, Eberhardt R anon Pfam-B_4574 (release 7.5) Family This family of proteins plays a role in xylan biosynthesis in plant cell walls. Its precise role in xylan biosynthesis is unknown [1,2]. Its function in other organisms is unknown. 20.40 20.40 20.70 21.70 18.50 20.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.34 0.71 -4.93 29 437 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 248 1 286 418 7 142.80 30 66.25 CHANGED hEhphuh+slc+tps.hNlLshs.ssp.....pLhhstlscp.............hhpphccpaPslc.......scl.h-tccLhust.....................................tcpcW.hhhhss.cshht-........ashGslhRscsstsho..sslFVpcl..........pFhshEhtpsptGhsphhah .............Ehphus+slp+tps..hslLshs....ssp.....pLhho.phs..cp............................hhpph.......c.....cpFP...p..h..c...........sph..hct-cLhutt..................................................hctcW.h.hhpt.c...shhp-...........ashGsLhRscsstths..ss.lFlhch..........pFhshEht.psptGh..hh......................... 0 68 151 223 +2234 PF01882 DUF58 Protein of unknown function DUF58 Enright A, Ouzounis C, Bateman A anon Enright A Family This family of prokaryotic proteins have no known function. Swiss:P71138 a protein of unknown function in the family has been misannotated as alpha-dextrin 6-glucanohydrolase. 21.60 21.60 21.60 21.60 21.50 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.81 0.72 -3.84 161 3991 2012-10-10 16:07:06 2003-04-07 12:59:11 13 9 2067 0 1413 3655 957 85.30 25 23.95 CHANGED st-ats.lRcY.psGDsh+pIcW+soAR........t.sp...Lhl+ca.ptpp.p.....tplhlhlDtsts.......................................tphEttlphsss.....lshthhp.....pspthul ................-atp.lRpYps.G.DclRpI-W+so.AR...............p..sc........hhl+..pa.cp-p.p.......ts.lhlhlDhsssh..h.stt.......................................................tch-hul.phsus......lshh.shp.....pssphs............................................................................................................................................. 0 508 1002 1244 +2235 PF04515 Choline_transpo DUF580; Plasma-membrane choline transporter Mifsud W, Pollington J anon Pfam-B_2258 (release 7.5) Family This family represents a high-affinity plasma-membrane choline transporter in C.elegans which is thought to be rate-limiting for ACh synthesis in cholinergic nerve terminals [1]. 22.10 22.10 22.30 22.10 21.70 22.00 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.83 0.70 -5.70 73 1097 2009-12-17 12:57:17 2003-04-07 12:59:11 7 15 292 0 722 1057 28 295.60 23 51.13 CHANGED hhhllhshh.llhllhh..........................hhhppRI.hussllcpuscshp.ph.plhhhsllshllhhsahshashshlslhsptps....shps.s.....................................................................................tlhhhhhahlFuhhWhsphltslhphslA.GshusWYastpp....shPptsshsuhpRuhp.....YphGSlshGSLllullphlRhllchlppphpttt.....s.hhthlhpshpChhtsl-phlcahN+.AYlhlAlaGcuahpuAKcsapLlpp.sslcslhssslsshhLhluplhluhhsulluahhhph..........psshsashsshhhshlhuhhls.plhholhpsul-TlFlCaspD.ph.ppss ..................................................................................................................................h..hlhhhlh.thlhhh.hh..........................hh....hppR..l...h..sh..tllp.u.s.cslt...ph.thh..h..........h..sh.hsh.hhhhhh...hs..h.Wh.h..sh.l.h..l.hss.s..ps......p..t...............................................................................................tt...............hhhhhh.hahlhs.hhWh.sphlhuhtphslA.Gshusa....Yashpp.............shst..............h........s..l..h.s.u......ht...+sl.p...............aphGSl...shGS.Lll.sllp....h.hRh...........l.l..........p.hh.pp....phpttt.....................s.h.phhh.psht..C...hhhhl-..............p.hlcahNc..AY.........hhhAlhGpsFspuA+................psa.llh.........p..shh.cs..h......h.....hsp.........l......sshll....hh..u..pl.hls..h.hsu...hhuh.....h.hhp..................................t.th.phhh...h.sh.h..hs..h.h................h....uahls.phhhs..l...ht.sl-olalCas.D...h........................................................................... 0 287 424 583 +2236 PF04570 DUF581 Protein of unknown function (DUF581) Waterfield DI, Finn RD anon Pfam-B_4765 (release 7.5) Family Family of uncharacterised proteins. 23.40 23.40 24.00 23.40 23.20 23.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.43 0.72 -4.68 42 427 2012-10-03 05:12:49 2003-04-07 12:59:11 9 3 27 0 245 398 0 55.50 39 30.63 CHANGED stsstsstsstss.ssFLpsChLC++pLs..scDIaMYRG-pAFCSpECRppQh.hDEtp- ..............hs..........tsspFLpsChLCp+.pLs..scD.....IaM.YR..........G-pu...FCSpECRppQh.h.DEt............. 0 20 132 192 +2237 PF04518 Effector_1 DUF582; Effector from type III secretion system Mifsud W, Eberhardt R anon Pfam-B_2447 (release 7.5) Family This is a family of effector proteins which are secreted by the type III secretion system [1,2]. The precise function of this family is unknown. 21.00 21.00 21.50 22.40 20.80 20.90 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.21 0.70 -5.47 20 164 2009-09-14 12:27:57 2003-04-07 12:59:11 7 1 35 0 22 81 0 380.30 32 51.67 CHANGED plpsphsshstspspFss..sshsshpshplasphssspstls.shhtssspthlsshpps..s.uppspphhppapstAssplpplpppIsphppphtchsstKAohsss.......hphupsshlpopPLtSAauSllLD+YlPpQpphLpsLtpclsaSNhAushhNsllchlssFsouslYYsLuSYlsQ.....ppGtstFssuhtpApstLscE+ppsppDlppspcApshlspllsplpsDs...clTsuQ+pclh-slssYptplsslhssLssLpshLssLpl.........hsssspscssFclps.......-sWhtpLpsLEuhllsG...hssusssGGhhslhsplpuDQQsYsshuQNQQLsLQhchouhQQEWTlVusSLpLLNQIaspLsRp ......................................s.psphpphs.tpppaps..ssts.hpthplh....sphpsspshls.hhhtshspthlsshppsh.ptuppspphhscapstupschpclppplpphppphpplsstpsshssp.......ht.ApsshlcshPLtSAauplLLD+YlPpQpphLpsLtpclpaSNhAushhNsllchlspFssuslYYNLuSYlsQ.........ptupshFssshppspspLscE+pphctDlpp.......sppApphlppllppVpsss...plTspQ+sclp-tlssYtppLsslhspLssLpshLssLsh..........sssspscssFclhu......tcpWhhpLpsLEstllsG...hssusssGGhhshhs.lpoDQQsYsshuQspQLtLQhphouhQQEWTlVusSLplLNQIatpLspp..... 0 5 5 18 +2238 PF04519 Bactofilin DUF583; Polymer-forming cytoskeletal Mifsud W anon Pfam-B_2455 (release 7.5) Family This is a family of bactofilins, a functionally diverse class of cytoskeletal, polymer-forming, proteins that is widely conserved among bacteria. In the example species C. crescentus, two bactofilins assemble into a membrane-associated laminar structure that shows cell-cycle-dependent polar localisation and acts as a platform for the recruitment of a cell wall biosynthetic enzyme involved in polar morphogenesis. Bactofilins display distinct subcellular distributions and dynamics in different bacterial species, suggesting that they are versatile structural elements that have adopted a range of different cellular functions. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -10.41 0.72 -4.00 160 2175 2010-01-08 16:00:08 2003-04-07 12:59:11 8 16 1383 0 671 1765 1447 96.10 23 54.70 CHANGED tsolIupssplpGslp.hsu.slpl-Gp..lpGslps.ps....plhlupsGplcGp.lpup.plhlsGp..lpGslpupchl.pltssuplpGclpssp.lplcpGuhhpG.phph ............................h..shlstshp.hpG.p...lp.hps....tlpl..c..Gp...l..p..Gs.lps..ps......plhlupsup..l.c...Gs..l.p..s...p....p.l.hl...s...G.p........l..p..G.....s.....lp.u...p.p..l.pltssuplp.G..s..l...p..s...pp.....lplptGuhhpGph..h....................................... 0 268 476 575 +2239 PF04520 Senescence_reg DUF584; Senescence regulator Mifsud W anon Pfam-B_2571 (release 7.5) Family This protein regulates the expression of proteins associated with leaf senescence in plants [1,2]. 21.50 21.50 21.50 23.40 21.40 20.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.18 0.71 -3.72 22 277 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 33 0 170 265 0 149.00 31 84.22 CHANGED sth....ELsEsDla.ss...................................................pssusssshpsu......sss+th.th..............ssssssthtpSAPVsVPshuth..........pptcs.pptcs--p--t-ss.hlPPHEhlAR......Rcsuuh..SVhEGsGRTLKGRDLR+VRNAlhc+TGFhD ........................................................................................................t...-h.Et-lh.........................................................................................t.t....t....t........t.sthh..t..............................tstsssthhtpSAPVsl..Ps.hsphht...........................tttptt.t..tt.c.s.s.......c....pc..c.....s.......s.........ssp...hlPPHEhlA.R...................pptsuh........SVhE.....Gs...G.RTLK.GRDLp+VRNAVhcp.TGFl-....................... 0 18 95 134 +2240 PF04522 DUF585 Protein of unknown function (DUF585) Kerrison ND anon DOMO:DM04368; Family This region represents the N terminus of bromovirus 2a protein, and is always found N terminal to a predicted RNA-dependent RNA polymerase region (Pfam:PF00978). 25.00 25.00 233.10 232.40 18.60 17.60 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.67 0.70 -4.98 5 16 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 6 0 0 16 0 246.00 51 30.16 CHANGED M.SKFlssD.EYpVPSFQWLIDpSLEc..s.-sElAshVsc..s.....-PcsElTADGSLASFlLAVKPLVIG.Gpa-PPFDQARWGsCCcsVssls-uFTc++LIPhAEMARMLYLDI-GSFVDESEsDDWhPEDTSDGYsEYhuuDEsch.t...us-LppsLucEssshEh-EscEsoD.....SsPS...EhTLGDRYlsTsEEFtol-.SDYslTLNLhNPVEpRVullEDTascs-sD-.hspuPpYpERVSLcuLEAAGH ...MSKhhs--.sYpVPSFQWllDQoLEs..s.cs-sAthlsp..s.....-susElssDGoLASahhAVKPL.IG.Glh-PPFDQARWGpsCcsVhslspthsshtLIPhAEhARMLYLDI-GSFVDESEsDDWhP.DTSDGas-shusstsch.t...pochpp..hLup-usphE.cpsccToD.............sPs.....EhTLGDRYhshcEEFtol-.oDYDIoLsLhsPlEp.RVuhl.DTasHs-ssD.hsThPhYh-RlSLppLEAAGH 0 0 0 0 +2242 PF04532 DUF587 Protein of unknown function (DUF587) Kerrison ND anon DOMO:DM04375; Family This family consists of the N termini of some human herpesvirus U58 proteins, and some cytomegalovirus UL87 proteins. This region is always found N terminal to the Pfam family UL87 (Pfam:PF03043), which has no known function. 19.30 19.30 22.20 199.50 17.20 16.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.32 0.70 -5.40 8 45 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 27 0 0 40 0 215.80 55 24.92 CHANGED hp.psup-A..LIVcSsss..p+shsVPVaVNSaNLTpElSssEDuRLspssP..VDsE+lculFcsLapAuPsplcs-pDRsKllLsRLLLGPVAVPCYC-.EW-ss...............-YLscsuhtCpGPlLYVHRtRC+Cs..sstsshpFoVMcsahuTHVFRGLLSLpEWNs+LPslFCsC.....ssspsDRYshslLP+chSlaL-hYPYhLspLsRaLoVsEIDDhsNslI ......ush...phss-ALIVhusoA...hRVhsVPVaVsohNLTpELSssEDARhspstP..VssE+VcuVFtuLYpAsPsal.............cTEp-RsKlVLsRLLLGPVAVPCaCD.EW-sc...............-aLscssphCpGPLLYVHR.RCpCG....ssGpuLsaoVLcsHhATHVFRGLLSLoEWNpcLPslFCsC....ssuspp-RYsMAsLP+-hSlaL-.YPYhhVcluRhLoVsElDDhVsuh.o 0 0 0 0 +2244 PF04569 DUF591 Protein of unknown function Mifsud W anon Pfam-B_2799 (release 7.5) Family This family represents a conserved region in a number of uncharacterised plant proteins. 21.00 21.00 24.30 24.30 18.80 18.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.21 0.72 -3.94 49 201 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 3 0 185 199 0 81.40 47 29.53 CHANGED uGpR.........c...tpssupsspRs..ssspsGuppssp.....uctc+...p.GhhttsRGs-PoARIRppthDG..G..shRRRpPAApcG....Gssctso.R..................s+F.ustASst ................................................G.R........c..hpcss.utsGRRs..sssssGup+usp........Gcuc+....TGhhttsRGsEPsARIRpRtlDG....GthRRRQPAAtct....GNGDEsT..R..................GRFsusRASst................... 0 0 0 0 +2245 PF04574 DUF592 Protein of unknown function (DUF592) Kerrison ND anon DOMO:DM04429; Family This region is found in some SIR2 family proteins (Pfam:PF02146). 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.87 0.71 -4.66 14 90 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 54 2 54 87 0 117.70 34 26.38 CHANGED phhP..ptP.ppsl.ls+s..ssKalhstho+--olNARhaLKhaG.tcFL-sYLPp-lNSLalYaLIKLLGFplKDppLhssl.p.hp..hp.s.t..ht.....................t.pDPL-KKphl+LIKDLQ+AhsKVLuTRlRLsNFhTl-+Fls+L+sAKKILVLT .............................h......................hhh.ths+cpshpsRhaLK..shhpFL-haLP.phso..lhhhIthLGFt.+-.thh.................................................pDsLtccphhcLIK.Lp+AlsKVLspRlRLssF.TlDchlptL+sA++ILVLT............................... 0 8 25 46 +2246 PF04578 DUF594 Protein of unknown function, DUF594 Mifsud W anon Pfam-B_2859 (release 7.5) Family \N 21.00 21.00 21.70 21.30 20.10 19.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.99 0.72 -4.52 66 634 2009-01-15 18:05:59 2003-04-07 12:59:11 8 22 16 \N 390 558 0 57.80 34 8.75 CHANGED pLh.......htsppphWclluclWsEhLlYsA....s...pssupsHucpLupGG..EhlThlWhLhsHsGl .......................hh.........ptpptWcllsclWsEhLhYsA..s......ps.ss.ptHAcpL.upGG..EhlThlWhLhtHhGl........ 0 6 138 264 +2248 PF04591 DUF596 Protein of unknown function, DUF596 Mifsud W anon Pfam-B_5061 (release 7.5) Family This family contains several uncharacterised proteins. 25.00 25.00 26.10 37.00 22.60 24.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.53 0.72 -4.07 19 144 2009-09-11 22:03:20 2003-04-07 12:59:11 7 1 49 2 24 99 0 70.10 52 56.83 CHANGED LD-G+LKLu.+KG-aI.pGoT-ELV-MFRpuFPs..SDEEh-.........tGlWFhs-p.CPhsAVWlaKGtt.....ENGEDYY-Ws .LDEG+LKLu..++Gc..Fl.sGTo-EhVEMFRpuF.Ps...SDEEh-.............tGhWFhs-...pCPuGAVWVhKGt.....tENGEDYY-Ws.... 0 1 12 21 +2249 PF04640 PLATZ DUF597; PLATZ transcription factor Mifsud W, Riaño-Pachón D, Mistry J anon Pfam-B_5458 (release 7.5) Family Plant AT-rich sequence and zinc-binding proteins (PLATZ) are zinc dependant DNA binding proteins. They bind to AT rich sequences and functions in transcriptional repression [1]. 25.00 25.00 25.10 25.10 21.20 24.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.53 0.72 -3.77 12 304 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 36 0 210 282 0 71.90 52 29.08 CHANGED RR.sYHDVlRVs-lpKllDhSsVQTYlINSAKVlFLNcRPQsRssKG..........ssNsCpsCsRuLh-s.F+FCSLuCKl ................RRtSYHDVlRVs-lpK.l.lDlSsVQTYlINSA+VVFLN..cR..P.....Qs...R.s..sKG................................ss.stCpsCpRuL..h...D.s...a+FCSLuCKl............................. 0 34 136 177 +2251 PF04654 DUF599 Protein of unknown function, DUF599 Mifsud W anon Pfam-B_5550 (release 7.5) Family This family includes several uncharacterised proteins. 21.10 21.10 21.50 26.70 18.20 20.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.25 0.70 -5.19 46 411 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 270 0 210 414 55 205.50 27 85.43 CHANGED hlulshhlhsWhGYshahpppstpps..olsshhsphRctWhpphl...sR-s+.lhDspllusLppusuFFASoslllluGlhslluss.......-phhslhucls..hsstsschhhplKllsLlhlFlhAFFpFsWShR.asasulLlGusP.....tp.........ststtshAppsupl.shAuppaNhGLRuaYFuluhlsWhhuPlshhhsolhlVhlLapR-FpSpshpsl ......................................lhlshhlhshhuYp..hah.hhpstpps...olhsh.sthRctWhpphh....pccpp..lhssphltsh.tusoFhASos.lllhuGlh.slluss................pph..tphhssls....h..ss.p...sphh.h...plKhhsLhhhFlhAFhpFshuhRhasasulLluuss......p..................t.ttt.hhsptsuchhppAuptashGlRuaYauhshlhWh.huPhhhhhsohhhlhlL.ap.h-hhSpsh...h................ 1 47 122 162 +2252 PF00892 EamA DUF6; EamA-like transporter family Bateman A anon Pfam-B_177 (release 3.0) Family This family includes many hypothetical membrane proteins of unknown function.\ Many of the proteins contain two copies of the aligned region. The family used to be known as DUF6. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.39 0.71 -4.09 98 66028 2012-10-02 19:55:49 2003-04-07 12:59:11 15 83 4851 0 17444 56174 36728 125.00 15 75.68 CHANGED hhauhshlhs+hhhpp.....hsshthsshphlhus..hlhhhhhhhttt.............hhshpthhhllhlulhssslshhhhhhu...lphssuspuuhlht.hhPlhshlhu.hlhLtEplshhphlGhllslhGlhllhh .................................................................................huh.h.h.h..h..h..p..h..h..h.tt...........h.s...s....h.....h....h......s...h....h....p....h...h..h..u......s...........h....h..hh..h...h.h.h.h.tt................................h.h.h.s.h.p...t...h......h....h.....h.........h.....h.....h.........u...l...h...s....h....s.....l..s...h.....h.......h...h....h..hu..........l.p...t......h....s....s..........s...t..s..u..h....l..hh....h...t...P....l....h.....s........s........l........h..........u....h..........l..........h........h........t.........E............p........h.........s..........h.......h......p........h......l......u......h......h.l..s.l.hGlhlh..h..................................... 1 4905 10337 14119 +2253 PF04634 DUF600 Protein of unknown function, DUF600 Mifsud W anon Pfam-B_5411 (release 7.5) Family This conserved region is found in several uncharacterised proteins from Gram positive bacteria. 21.70 21.70 21.70 21.70 21.20 21.60 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.76 0.71 -4.18 15 1266 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 296 4 43 633 0 133.70 58 88.36 CHANGED LsplYpEIAppIsshIPsEWEKVYhhApls-cuuEVaFaYspPs.S-EhhYspsIsccaNlsccpFhcphh-LachFc-LRctFK-sstEPWTShphshs+sGKhslcFDYpDhh.po-asphtRphhapYK+hGllPEs..E.-hEhlcc ................................LSphYNEIANcISuMIPV.EW..E..KVYThAY..ls..D..p..G.GE..V..hFNYTcPu...S-EL.YYosI.......s+....cYNlScplF....Dhh.......h-LYchFccLRslFK..EEs....hE..PWTSCEFDF.T.c-.G.K.LpVSFDYIDWl.soEFs.hG.RpNYYhYKKFGllPEh..EYEhpclKc............................. 1 15 23 33 +2254 PF04645 DUF603 Protein of unknown function, DUF603 Mifsud W anon Pfam-B_5498 (release 7.5) Family This family includes several uncharacterised proteins from Borrelia species. 22.10 22.10 22.20 33.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.99 0.71 -4.34 3 189 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 29 0 14 166 1 164.70 61 95.50 CHANGED MKRAKRSFDDYVAYF+EGSLSDsEIAK+LGVSRVNVWRMRQKWESGEsS...VNEDSRVTISEDTFEHLLuQTF+SEVpARKVKSELDLERSNLELGFINAFKQYSSlELsSMhoKIEsLRuKIDSLNKpsNKKNtpsVNEE...INSLKlELDELIKECpIREMELYYECMKKLAsAHEVDSKSNY ........MK+sKRSFDDYlsYFpEGsLsDhEIAp+LGVS+VNVWRMRQKWESGEss............VNpDSRVTISEDTFEHLLuQTF+SEVpA+KV+SELDLERuNLEL..GFIpAFKQYSSlELsSM+o.KIEsLRsEIDuLNKuusKKNKpsVNt-...INSLKSELsEhIKECSIREMELYYECMKKLssApEsESKSNY.............. 0 8 8 8 +2255 PF04646 DUF604 Protein of unknown function, DUF604 Mifsud W anon Pfam-B_5503 (release 7.5) Family This family includes a conserved region found in several uncharacterised plant proteins. 20.80 20.80 20.80 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.72 0.70 -5.16 17 290 2012-10-03 05:28:31 2003-04-07 12:59:11 7 9 78 0 208 320 1 203.90 35 42.72 CHANGED QNlhFSYuMAaGGGGaAISaPLAstLu+h.DsCIcRYstLYGSDcRlpAChuELGVPLT+EsGFHQhDlhGslhGLLuAHPlAPLVSLHHLDhV-PlFP.sh..sRhsAl++Lhp.sscLDsupllQQSlCYD...........+..pppWTlSVSWGYsVQlhpuh..............lssRElEhPtRTFhsWh+pus.hssasFNTRPl.scs....sC.p+PhlFahssspts....sps...tThopYp+phhtt...spC.pashssP.sclcpIhVhp.+PsPphWp..+uPRRpCC ......................p.hauas.M.AaGGGGhAlShPLAptLs..c..h.DpC.....l...........pR.Y...s.....tl.....a.....G..S...DsRlpuCh.u......E.L.........G.Vs....LT..c....-.............GFHQ........h...............DlhGch...hGlLsuH.PlsPllSLHHh.-...h...h..pP.la....P.....th.....sp.t..u.l...phhh...t...shp.hDs.thhQpsh.CYs..............t..t.th...ohuluhGasl.lh...................h....ph.hs.pTF.sW.t..t.....a.hsT+......p......C.tpPhhaahpps................t.......hhs.Y.h............h....................lp.l.V.t................................................................................................................................................................ 0 37 125 168 +2256 PF04657 DUF606 Protein of unknown function, DUF606 Mifsud W anon Pfam-B_5554 (release 7.5) Family This family includes several uncharacterised bacterial proteins. 21.80 21.80 21.80 22.00 21.60 21.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.66 0.71 -4.09 147 2941 2012-10-02 19:55:49 2003-04-07 12:59:11 8 4 1497 0 499 1852 371 136.10 27 86.54 CHANGED lhhl.lulluGhhlslQsulNupLupt.lG.tshhAuhluahlGhlsl....hllhlhhp...t................shtt..ht......ss....PhWhal.GGllGshalhsshhhh.PclGsshshshllsGQllsulllDpFGhhGhsppslsht+llGlllllsGlhl ..........................hhlhslhuGhhlslQ.sslNupLupt.....hu....s............sh....hus....hlsahlGhlhh..hhlh...hhhp..s...............phtt..ht.........................sh.......sh.ahhl.GG.lLGshhlhsshhhh..sp..lGsuhshsh...hlsGQllsulllDpFGhhGs....tpshshh..+.llGslhlllGlhh............................... 1 148 281 409 +2257 PF01886 DUF61 Protein of unknown function DUF61 Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Protein found in Archaebacteria. These proteins have no known function. 20.40 20.40 20.80 21.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.59 0.71 -4.31 17 84 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 82 0 58 78 1 126.40 28 91.20 CHANGED h-+hlphtltplNpclPtcRKoLpELLpE-+PhlhlpsGscHhhc+cELEhLpphlsp-.hthl+lPIllEhpSohtpGshhlpGct.ElKsIp+lLshchs...tpshlhlh+..l.plRRcLPTsTphhFlh .............-+hlpttlpplNpphPtcR+oLp-LLpE-+Pplhlps.GppHhh++cELEhLpphls...tp.hphl+lPIllchsss.htpGthhlpGct.cs+slsplLGtcht......pshlhl.+..l.plR+tLsTsTphhFh................................................ 1 14 29 46 +2258 PF04748 Polysacc_deac_2 DUF610; div_psaccdeacet; Divergent polysaccharide deacetylase Waterfield DI, Finn RD, Yeats C anon Pfam-B_5949 (release 7.5) Family This family is divergently related to Pfam:PF01522 (personal obs:Yeats C). 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.16 0.70 -5.32 134 1399 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1342 3 291 924 146 203.80 39 64.31 CHANGED IlIDDlGh....phtsscslhsLPhslThAlhPhsststphuptA...+ppG+ElllHhPMpPhs.......t.p.sstsLhsshsspclppplptshsplPtshGlNNHMGSthTpsppsMphlh......ptLppp.sLhFlDStTsspShAtphApphGlsshpRclFLD...sp.ps.ssIpppLpphhphA+p.pGtAlu.IGHPtsp....Tlps.LpphlspLppcslpLVslS..p.Llp ....................................................................IVIDDhGh...p.pspppl..lul..P.s...lolAllPssPp...uc-hAppA...+ppG+EVllHlPMtPls........t..p..slt.ssLpsphSs-EIp+hlcpAlspVP.aAl..G..lNNH...M....G......SthTushhuMpplh......psLc.ch..s.LaFLDShThuso.Ah.........+hApt.........h........GV.s.lc.R...c.V.FLD...cs.p........stusIcpQhpcAlclA++..pGssIA.IG..HP+.Ps....Tl.c.s.Lpphl.pL.sc..lsLVhsSsLl.s.................. 1 98 175 231 +2260 PF04764 DUF613 Protein of unknown function (DUF613) Waterfield DI, Finn RD anon Pfam-B_6084 (release 7.5) Family Family of chloroplast proteins of unknown function. Some members have two copies of the conserved region. 25.00 25.00 91.90 91.90 20.20 19.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.72 0.71 -4.19 10 10 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1 \N 0 12 0 125.70 30 66.12 CHANGED cYlYRchKccsYWppa.h..lslYVNsK..........+VG.............hscsIo-lEYhLLDlFLaGPLchclo-.YcplpphlpcRsp+Ycchhcc.cplsG.....INIlFs...phsapchLpscYphcEhlssspILplYlls ...........pYhYRc.+ccsYWppa.h..lslYVNsK.....+VG...................hscsIo-lEYhLLDlFLaGPLchclo-.YcplpphlpcRsp+Ycchhcc.cplsG.....INIlFs...phsapchLpscYphcEhlssspILplYll.. 0 0 0 0 +2261 PF04751 DUF615 Protein of unknown function (DUF615) Kerrison ND anon DOMO:DM04973; Family This family of bacterial proteins has no known function. 25.00 25.00 39.40 39.00 22.50 21.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.90 0.71 -4.58 90 1331 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1309 1 252 777 138 159.20 49 85.34 CHANGED --...-....hsSKSphKR-hcsLQcLGpcLlsLspspLs+l....PLs-p.LhcAltpAp+l..pppEA+RRQlQaIGKLMRpt....Dh-sIppuL-php..................................spppppstthHplEphRD+Lls.....pss..............sulsphlppaPps.DRQpLRpLlRpApKEttp..........sKs....s..........+uhRcLFphL+c ..............p--.E.haVSKSElKR..DuctLpcLGtELV-LucssLcKl.....PL---..LtsAIchAQRI...cpEuRRRQLQhIG.KhhRpp....DlE...PI...cpALDKL+............................................................N++...s..Qps...shh.....H+LEplRDRLls.....pGD...................sAls-l..lshaPcA..D.....R........QpLRsLIRNAcKE+pt..........................................NKP......P....................KuhRpIFQaL+-............................................................... 0 54 129 196 +2262 PF04765 DUF616 Protein of unknown function (DUF616) Waterfield DI, Finn RD anon Pfam-B_6152 (release 7.5) Family Family of uncharacterised proteins. 21.40 21.40 21.60 21.40 20.00 20.50 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.98 0.70 -5.27 13 242 2009-01-15 18:05:59 2003-04-07 12:59:11 8 14 63 0 143 231 87 269.50 35 55.71 CHANGED N..........LsYlpt-p..s......t.spFGGp.ShpcRcpSFphppshs.VHCGFh+......psGF-ls-p..................D+thMcpC+.VVVuSuIFusaDpl+pPps..ISchoccsVCFhMFVDEpTLstLcpcssh...hDssthVGlWRlVlV.......+NLPYsDsR+NGKVPKhLsHRLFPsu+YSIWlDuKhcLslDPhLIL-pFLWRpsusaAISpHhcRasVa-EA.AsKRhpKYsc.suIDhQhchYpp-GLpPa..SssKhPlsS...................DVPEGulIlRcHsPhSNLFSCLWFNEV-RFTsRDQLSFAYlh.Kl+s.....saplNMFpsCER+slsh.apH ..........................................................................................t.....................s.FsG..ohtpRppoa..........htts.p.l.CGFhp........tsGac.hsc....................Dh..hppCc...llVuoslFu............s.D.lppP.hs.......hsphoh.p...plC.Fhh..FhD-.o.thhpp.sph.........sttthl.GlW+llll.......+sl.P.a..s.D.R+sGK.....................lPK.hLsHRLFPss+YSIWlDuKlpL..h.s.DP.h.h.lLE.thLW..+..ps..sshAISpHhpRpsla-Euttstph.........pKasp.stl-......QhphYp.p-Gls.a...tsphs.h.s...................tlPEu.ulIl.R..cHs..s..ho..NL.F.sC.L...WFNEVs+.F.T.s..RDQLSFsal..h.+lp.......hth.MF.sCthpthh..hhH....................... 0 25 94 119 +2263 PF04768 DUF619 Protein of unknown function (DUF619) Kerrison ND anon DOMO:DM04985; Family This region of unknown function is found at the C-terminus of Neurospora crassa acetylglutamate synthase (amino-acid acetyltransferase, EC: 2.3.1.1) (Swiss:Q12643). It is also found C-terminal to the amino acid kinase region (Pfam:PF00696) in some fungal acetylglutamate kinase enzymes. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.18 0.71 -4.84 33 416 2012-10-02 22:59:21 2003-04-07 12:59:11 8 16 240 39 279 450 57 166.10 28 26.44 CHANGED sLpKELFTcoGuG.TLlRRGh+lhptsohcph.....................shc+L+sllpcsh....ps+tslspYlcpl.cpp.hpsYss-s..h-ulAIV.....cpss...........................................phshLDKFslocsuhhs.sVuDslFssl.p+-FPp.LhWps+cssss..pWaFs+ScGohp.............csshhlFWYG................lpchsclpphl.......pphsst .............................................................l.hclFotpuuu.Tll++G.....plhhhsshpph..........................Dhs+Lppllpcsh........tt+hslpcYl.ppl.ppp...hphh....h....sts.........hcusAIlh.....s....t.s............................................hsaLDKFslhcsu.ts.uluD.lassh...+.....ccaPp.L.......hWp...u+p...s.s........sp....tWaFc+usGohp.................psthhhFWhG...................htshpp..phht.h...p.............................. 0 85 157 226 +2264 PF01887 SAM_adeno_trans DUF62; S-adenosyl-l-methionine hydroxide adenosyltransferase Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family This is a family of proteins, previously known as DUF62, found in archaebacteria and bacteria. The structure of proteins in this family is similar to that of a bacterial fluorinating enzyme [1]. S-adenosyl-l-methionine hydroxide adenosyltransferases utilises a rigorously conserved amino acid side chain triad (Asp-Arg-His) which may have a role in activating water to hydroxide ion [2]. This family used to be known as DUF62. 25.00 25.00 26.30 26.20 22.70 23.50 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.59 0.70 -5.33 121 1173 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 1120 112 316 783 493 260.10 40 93.36 CHANGED hIsLhTDFGhpDsaVushKGVIhsl....s.PsspllDlTHplsshslhpuAahLtpshtaFPpsTValuVVDPGVGopR+ulsl....css..s.taaVuPDNGlLoh....lh...........................pth..............tcshclsppp..............................thSsTFHGRDlFAPsA.AtL..ApG.hshpplGptl..............sslsclsh.ps..phps.....s......tlpGpl.lalD+.FGNllTNltsph..........hthuc....phplph..........tttt.............................................hhthscoau...............................clt.Gchl.shhsStGhlElAlsp......GsAuphhulphG..cp.lpl ..................................h.llLQoDFGLpDGAVuAMhGVhhph........s....s...s.l.+IhcLTH-IsPasIacuuYRLhQ.T.ssYW..........Pp..G.T.lFV.SVVDP..G..V.........Go.c.R........+S..llscTt...pspYhVoP..DNGoLoh....lt.c.................phGI..........pplhEIsEhts....h..........................p............cpSaTFHGRDVaAYsG..A+L.....AuG..IsFE..plG..spl..............s.spl.lcLs.h.ps..thpcs.................tlpGsI..Ilss.+.FGslWTsIspc.h........phthphG.c.phpVsIhpts.........................................hatsplsascSFu...............................DV.p...G.pPl..lYlNS.L.h....plulAlNp.......Gu....FuctaplusG..sp...hh............................... 0 118 221 276 +2265 PF04788 DUF620 Protein of unknown function (DUF620) Waterfield DI, Finn RD anon Pfam-B_6213 (release 7.5) Family Family of uncharacterised proteins. 20.40 20.40 24.20 25.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.56 0.70 -5.21 20 186 2009-09-10 16:40:51 2003-04-07 12:59:11 7 5 27 0 112 174 1 218.50 52 58.66 CHANGED MYAhG+V+MsssEhps....ustthsp..........sssus-tGuFVLWQhsP-hWhlELVVu.GsKVsAGSDGKlsWRpTPWls.sHAu+.GPsRPLRRhLQ...............................GLDP+oTAslFusAt....ClGEKtlss-DCFlLKlpss.usLpupSs..ussElI+HslaGYFSQRoGLLlplEDSpLhRlpo..ss..stulaW.........ETohpShlpDYRsVD...Gl.lAHuG+ossolhRF...........G-sstst.spTpMEEtWpI--VsFNV.GLShDsFlPP .....................hYAhG+V+M....hssEhpt...ustsstt.................sttssE.GsFVLWQhsP-hWhlELsVu.G..s.KlpAGssG+lsW..RpTPWhs.uHAu+.GPsRPLRRhLQ.......................................................GLDPho.TAslFus.Ah....ClGEKplssEDCFlLKLps-stsLcuRSp..ussElIRHslaGYFSQRTGLLVplEDS+LhRIps..su.......s-slaW.........ETThpShlpDYRsVD.....Gl.IAHuG+osVoLhRF.............G-sshsp....s+T+MEEsWsIEEVsFNV.GLShDsFlPP............................... 0 10 64 89 +2266 PF04822 Takusan DUF622; Takusan Mifsud W, Eberhardt R anon Pfam-B_3835 (release 7.6) Family This domain is named takusan, which is a Japanese word meaning 'many'. Members of this family regulate synaptic activity [1]. 25.00 25.00 25.30 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.71 0.72 -4.22 13 539 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 34 0 235 892 0 82.20 37 22.31 CHANGED sR+TSSpssplocpptc.cclEcLph-l+hIspERsELpchLshahpsshshR......lNspaphL+pp+cph...Mhshcchp.pIh-AhE ............tpsSo.ss..l.ocpppphpclE..cLKhcl+plop-p-EhpsILshah.....cslNhR..............hNsEaphlKpQH-cs...MhDhp+MpppIhpuhp....... 0 1 4 20 +2267 PF04844 Ovate DUF623; Transcriptional repressor, ovate Mifsud W, Eberhardt R anon Pfam-B_4487 (release 7.6) Family This is a family of transcriptional repressors. In plants, these proteins are important regulators of growth and development [1,2]. 25.00 25.00 39.40 38.80 23.70 22.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -8.97 0.72 -4.17 44 411 2009-09-11 05:52:17 2003-04-07 12:59:11 8 6 28 0 276 364 0 60.00 42 21.11 CHANGED SsDPhpDFRcSMhEMlspptlt.........shscLc-LLtpYLsLNupcpHshIlpAFs-...lhhs..Ltus .........SpDPhtDFRcSMhEMlspptlt............shc-Lc-LLtsYLsLNup.caHshIlpAFs-...lhhslh.s............. 0 25 150 222 +2268 PF04854 DUF624 Protein of unknown function, DUF624 Mifsud W anon Pfam-B_4640 (release 7.6) Family This family includes several uncharacterised bacterial proteins. 21.50 21.50 21.60 22.00 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.55 0.72 -4.12 65 1340 2009-09-11 11:23:42 2003-04-07 12:59:11 9 2 753 0 194 770 11 76.80 25 35.89 CHANGED lshLNlLWlhhs.........................LhGlslhGlhPAosAhaslh++ahpsc....css.lh+sFapsaKcpFhcushlullhhhhshllhlsh ...........................................hhhNlLal..................................................lsulsl..lsl..GsApsulhpshhc..htpsp...........c.....s..........sh+pa....ap.sa....KpNFh.pu.h.lhhhhlslshllhhs........... 0 75 133 155 +2269 PF04776 DUF626 Protein of unknown function (DUF626) Kerrison ND anon Pfam-B_2357 (release 7.6) Family Protein of unknown function, currently only identified in Brassicaceae. 25.00 25.00 34.10 33.00 19.00 21.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.85 0.71 -3.76 16 91 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 2 0 85 109 0 106.20 33 38.53 CHANGED slPca.s-ssh...psp.caY..lccS-lpps.-WlpLYhElslhsp.csp.hppt..lssLcIlpVslcT.ccshpss.c..LpApsAlhYIsa+shsps+.......hGpth.-RtAIlRRhhDtp.uphsL .................hPcW.s-ssh.....pcp.+aY..lpcSElpps..-WlpLYhElulhsp.cth...hpt......ht.LcIhKVsl-Tpt..pshcsspc..LcApsAlhYIoa+sh.sts..........s.th.-cpAIVRRshDtpsGphpL......... 0 23 23 23 +2270 PF04781 DUF627 Protein of unknown function (DUF627) Kerrison ND anon Pfam-B_2475 (release 7.6) Family This family represents the N-terminal region of several plant proteins of unknown function. 20.40 20.40 20.40 20.80 20.30 20.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.12 0.71 -4.45 17 70 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 12 0 49 65 2 113.40 33 10.00 CHANGED chAcchaspGsaIKALcllEDhIph+tccps.h.hlHhhQGplFhcLApcTE.NsDlKhsYLLuSVpCaScshtLsshs.........AhSLapLuppluSshaYKKslpKAKpuLSlpts.cs .......cAhshhtcGs+hKALcllcDhlscHt.......csp.....hlHphQGslahclAtps-...ssssKhtaLhuul-shocsstLus.u.........ApuLapLApcht.spaYc+slpcu+cuLSlpss..s..... 0 18 27 31 +2271 PF04780 DUF629 Protein of unknown function (DUF629) Kerrison ND anon Pfam-B_2475 (release 7.6) Family This family represents a region of several plant proteins of unknown function. A C2H2 zinc finger is predicted in this region in some family members, but the spacing between the cysteine residues is not conserved throughout the family. 21.50 21.50 21.70 22.30 21.40 21.40 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.77 0.70 -5.97 14 139 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 16 0 82 125 0 300.70 25 30.08 CHANGED cpL+oaWhGLDschKRsFhKVslsKLhoaVpsh....asccsp-sLcpsLs.A+pst+WpFWhC+s.CSc+F.ssE-CKpHl-ppHsucacPspppchsppIscsWActI.SsGsW-PVDssAAlchIKsRltcVKtFs...............YpNGW.................................sc-WPLAsDE..ERuKLLKEI+.lLVthh..-pKlLSCSlRDWlhpaslcaL.tpLcVScpsL.s-spLlcTPQSICFL-sc-LspILcaL+plK.....scRDDGssLVs+uV-uhhcsopVKE+IDhDsphS.hLLLD+RLLpscph......ph.DDEGolssa-ssshYscs.spGDcIlSWLh.DhsplDcp.....FPpsl.....+tHNh-IWlAVLRAVphTsRpLto+YsKKhphlsY-suLstsEslChpEDcRRcsh.c-QhspYASlLs-cCEEhl.c......lssKhFLssV+DVLctAspPsF-hhs.c-C....hshI+phco....lsDD.hVlKSlthL+pllppKV .........t..h+paWsu.h.s.-p+c.s.FLpVslscLpuahpsh.......tp..pp......stcslscsl..sas+c..sppWpFWhCsh....C.....s......p.+.FhsscphhpHlppcHhs.p......h.......phpp...h.sp..plspt.hchl.....WpPh........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 20 35 52 +2272 PF01889 DUF63 Membrane protein of unknown function DUF63 Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Proteins found in Archaebacteria of unknown function. These proteins are probably transmembrane proteins. 25.00 25.00 37.80 34.50 22.40 22.10 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.79 0.70 -4.32 13 123 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 87 0 88 133 119 269.70 27 93.11 CHANGED tIp-Fla+YYI-PIht.cpGYNhVNTlTYAlILulslahlY+.hL++L+IclD-+FlhuslPallhGuolRVlpD..uGllpsPhoYlhlTPhIaFLlFslshhsLhluhpl.ptt...t.Yh+laushGlshsllsLhhLl...hppp..lhcsallshll.sluoshssllYhlh+h.h.hshhpc+LshhllhuHhlDASoThlGI-h..l.GYhEpHVVPsaLI-hhGoAhsMaPLKlllllsllYIL-p.htc-....pl+phlhlslllLGLAPGlRsshRMlhGl ..................................h...t..GYs.lsplsYullLhlulhhlhp..llc+......L.cIplD.c.chhhAlhPahlhGuslRslpD.........s........G...........l..........l..PhshLhlTPsIYhslhhls...hssllluhtl.ppt.....ta.p.....h..hhshGslhh.hhsl...h..hLh......hptp....h..hp.h..sh..h..hll..sl..u..sh.hss.lh.....a.hlh+h.........hph...h..p..sh.h.shhllauHhlDusuTslGl-h.......h..G....Y.....hE....pH............sl............sphl............l.........-............h.........h............G............s............ua...s.........hhslKlhlhlsllal..hsc.lp--....phtpllhlslhslGLuPGhRshl+hhhtl...................... 0 13 48 72 +2273 PF04816 DUF633 Family of unknown function (DUF633) Finn RD anon Pfam-B_5077 (release 7.6) Family This family of proteins are uncharacterised have no known function. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.37 0.71 -4.98 13 1587 2012-10-10 17:06:42 2003-04-07 12:59:11 7 4 1567 12 231 1139 75 199.30 36 86.45 CHANGED lsDIGSDHAYLPhaLlpsshsptulAGEVscGPapuAhppVpcssLsc+IpVRluDGLuslcpt-.l.DslsIAGMGGtLItcILEpGpc+Lttlc+LILQPNspt.pLRpWLpppsapIhsEpIlpEcsKhYEIlVsE+upp.......h.osp-lhhGPhLhc-+sslFhpKWppElp+lphlhtpls.....scpsctchptlppcI.phlc- ........................................................................lADIGSDHAYLPlhL.lppshhpp.AlAGEVsp...G...Pap...u...A......hpsV.........p.........t..........p......s.....L.......p..-........c..........IpV...R.Lu....s..GLs....s....l.c.p.........-......l...csIsI..A...GM....GG....p....L....IscI...L-..p.G..t....s........+.L..........s.......s....h..p...+...L..IL..QP..N...h...pp....ppLRpaL.pps.........apIlsEpIlcE.su+hYEIlVs......c..........h..upt..........................ho.pt.-l....hF...G.Ph....Lh.p...p.p.s..s..l..Fh..pKWp+E...lpphpphhpplt.........ppp.......p.....c.ht..tltpcl.t.l........................................................................................ 2 89 145 191 +2274 PF04827 Plant_tran DUF635; Plant transposon protein Kerrison ND anon Pfam-B_2859 (release 7.6) Family This family contains plant transposases which are putative members of the PIF / Ping-Pong family [1][2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.30 0.71 -4.94 5 275 2012-10-03 01:22:09 2003-04-07 12:59:11 9 11 33 0 143 891 1 132.70 41 47.04 CHANGED scYL.RuPsAsslcRLLplGc.RGFPtMlGSlDCMHWcWKNCPTAWcGQYTRGcp+.PTIlLEAVASaDLWIWHAFFGsP.GSNNDINVL-pSPlFsDIlpGsAPplpYhVNG+QYNhuYYLADGIYPcWATFVKSIRpPQsEK+KLFApcQEusRKDlERAFGVLQARF+IVtsPu+lWspscLusIMpACIILHNMIVEDERDhs ...........................................................................................................t.................................................................................................................................................tt.Nthp...hlt...s..l..h...............p...G..p...s...s........h..p...a......lN.....tp.........Y..p...h......s.Y.Y.L.s...D..u.I.Y...P..pa..s...s....h......h.....p........o...I.........................P..........s.....t......+......t.p.h.auptQ.Eu....sRKDlEpAFGV.......LQsRatIl.pt....P....s....p.h....a.p.t...tl.t..l...hh...uClIhHNM.IlEDEtt..................................... 0 20 63 102 +2275 PF04828 GFA DUF636; Glutathione-dependent formaldehyde-activating enzyme Kerrison ND anon Pfam-B_2779 (release 7.6) Family \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.87 0.72 -4.04 59 3736 2009-01-15 18:05:59 2003-04-07 12:59:11 9 25 1139 16 1716 3480 2625 89.40 22 58.89 CHANGED hhCHCppCp+toG.usass.shhtpp.slp....hhpG...thppap......susshp+.hFCspCGoslah......tsttss.....hhhlthGsl-ps.......pthpPp.hcl.aspp ............................................................................h.hCpCshC........p........+.........t.sG.s....h..hh.shhs......ts....s.lp..................h..h..p.G...t.....plptap.........................................ssptsp....+.....hFCspCGs...lah...........ptpsss..............hht...l.s..h..u.s..l-ss..........p.h.......t......t.............................................. 0 364 828 1304 +2276 PF04830 DUF637 Possible hemagglutinin (DUF637) Kerrison ND anon Pfam-B_2732 (release 7.6) Family This family represents a conserved region found in a bacterial protein which may be a hemagglutinin or hemolysin. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.49 0.71 -4.62 30 397 2009-01-15 18:05:59 2003-04-07 12:59:11 8 83 154 0 54 380 8 148.40 36 13.38 CHANGED ussAu.............l......uuhuosAusuhlsspGshttshK.lspusslKuhssuulTAGlssGl......................s.s......s.ssshs.thstuo.sslspthss.sspushsAulsTAI.pGGSht-...sLtsuh....ssshuAtuAstIGs..................hshssstss+lhsHAhlGCAuutAsGu.........cCtsGAlGAus .............................................................htsAA.huSLsopAuVuhINNpG.s.......l.upsLK-LupusslKphssushTAGhhsth................................................hh.sshst.o.tth.....s.splssslssus.uuhls..TAl.sG...GS...LcD......NLtsshlusllsshpu-uAspItp............................hspshls+phAHAlsGC....suutsstu..........cCpsGAIGAuV....... 0 18 31 43 +2277 PF04829 PT-VENN DUF638; Pre-toxin domain with VENN motif Kerrison ND, Zhang D, Iyer LM, Aravind L anon Pfam-B_2732 (release 7.6) Family This family represents a conserved region found in many bacterial porlymorphic toxins which is located before the C-terminal toxin modules [1][2]. 20.30 20.30 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.69 0.72 -4.42 32 725 2009-01-15 18:05:59 2003-04-07 12:59:11 8 157 252 0 136 722 0 52.90 36 4.05 CHANGED sssp...LoEcpKQplosLupLAAGlAuGlsGss........s.sAssGApuucsAVENN.tLu.shps .............sspLo-ppKppls.uh.up.LsAGlsu....ul.sG.s.s...............tsssuApuucsAVENN.tLs...t..... 0 16 63 98 +2278 PF04842 DUF639 Plant protein of unknown function (DUF639) Kerrison ND anon Pfam-B_6010 (release 7.6) Family Plant protein of unknown function. 24.30 24.30 24.70 24.50 23.70 24.20 hmmbuild -o /dev/null HMM SEED 683 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.16 0.70 -6.37 4 126 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 22 0 82 122 0 514.10 35 90.47 CHANGED pcht-h....s.ps.sshp.hPc..LS.lANsVVpRCS+hLslosc-LpcpF-sph..ulpp..TauRpFlEaCsF+sLSp.s.phhspLpDtpFpRLhFDMMLAWpsPssssppuh.pt.sp.p.......................h.h.V-cctoVGpEAFsRIAPslPhlADhIss+NLFcsLTouTGp+.Lpa.lYDtYl+pLcKhhKth+oppssphhsh.hucsEhlLch-Gs.sstPVL+Hls.ouWPG+LTLTspALYFEuhslhuh-sshRhDLocD.KpslKPthTGPLGspLFDKAlsYpShsh.EslVhEFsEhpGpsRRDaWLsIlpEllhlHpFlR+a..........plpG.l..t+pEhlu+AlLGIhRlpAlpEhhplsssphKsLL.FsLh-plPtGDhVLEsLA.ph......sphtspRssp..uusthh+.....s.ss.chlspl.G..............s.pstsphtspphlVG.....-hsVu-lssLEpAlcpSRpchchlEtAQATlstlchcGIsoNlAVhKELhLPh..hh..h.plhhWp-PhhossFhhhsohhIaRsWlsallsssLlalAhhMhh....pRph.s+sK..ttlpVps.PspNshEpllulQsulppLEphlQcVNVsLLKlRulhhShhPQtospsAluhlVlAohhAlVPhKYllshshV-hFTRpss.hR+tSs-+hpRRlREWW.plPAAPVhllpsps ..................................st....................h....LS.hAs.slpphu.ph.shsspcL..ppF-.th..t.....s.u..RphlEaCshphL.t....ssp.htsh..LtDttFpRLhashMLAWEsPss..psp..............................................p.hVu.-AFs+luPshshhsDhhssc.hFcsLossost.+.Lpa.hacpYlptlp.+.shcthpt...t..s.t.h......h...p.s.EhlLpltus.sppPVlpa.s.osWPG....+LhLTspALYFEuhtl.hshppshth-Lsp.-..pp.lc.thsGPhGspLFDpAl.hpS.shs-shhhEFs-htuphRRDhWhuhlpEllhha+Flpca............pl.s..h..t+tcshspAh.uIhRhpAlpchhph.ss..cthL.FshhpplPtGDhlLpsLs.p...........t...ptst.......tss...t........s.....t..h...h.h................t.....pttt..h..t.....ph.h.uths.lptultpshpp.chlphspATl-tspl-GlssNlslhpELlhPh..hh..h..lhtW-cPhhohshhhh..hhlh+thl.ahhs..hhh.shhMhh....h+...t.tp..ht.l...l...s.ss.s....shppllslppuhpplEt.......hlQss.NlhLLKhRslhluh.s...........p..........................topphhhhhlshAhhhhhlPh+hllhhhhhp.aTpp....Rp..sc.ph.phh+EhW.pIPssPV.l.....p................. 0 13 46 59 +2280 PF04852 DUF640 Protein of unknown function (DUF640) Kerrison ND anon Pfam-B_6053 (release 7.6) Family This family represents a conserved region found in plant proteins including Resistance protein-like protein (Swiss:O49468). 25.00 25.00 32.40 27.30 24.40 24.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.60 0.71 -3.97 15 226 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 24 0 141 225 0 123.60 63 54.52 CHANGED ssspsutsuspssss...sh.SRYESQKRRDWNTFsQYL+Nc+PPlsLsp..CuusHVL-FL+Y..........LDQFGKTK.V....HtpuCsaFGpPsPPuPCsCPLRQAWGSLDALIGRLRAAaEE.s.G..GpPE.....sNPFuu+AVRlYLREVR-o ..............................................sssss............ss...s..SRYEuQKRRDWNTFsQYL+NHRPPLsLup..CSGAHVLEFLRY..........LDQFGKTK.V....HstuCsFFGp......Ps......PP.........A........PCsCPLRQAWGSLDALIGRLRAAaEE.s.G..GpPE.......................sNPFuARA..VRlYLREVR-.h................... 0 23 87 111 +2281 PF04862 DUF642 Protein of unknown function (DUF642) Mifsud W anon Pfam-B_4723 (release 7.6) Domain This family represents a duplicated conserved region found in a number of uncharacterised plant proteins, potentially in the stem. There is a conserved CGP sequence motif. 27.00 20.00 27.20 20.50 26.70 19.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.21 0.71 -4.58 56 419 2012-10-03 19:46:52 2003-04-07 12:59:11 7 25 70 0 246 418 17 157.70 32 51.47 CHANGED GLL.NGsFEpuPptuph.puot...lh.GppuIPsWclsGh.VE.YIsuGppp.GsMhLsVPcGsaAVRL...GN-AS..IpQpls.VptGthYolTFuAA..........RTCAQt....EpLsVSVs..s.....p...suslPlQTlYossGWDoYuWuFpA.pp.sslplsh.HNPGhp.-DP......uCGPlIDuVA.IK .........................sll.NGsFEpuPh.....ssoh..........l..sp.sslPsW...hl...p.u.....h...Vc...YIsusphp...................VPp...GspAVcL..........Gp..Euu......IsQplp..ss.GphYsLoFusu........................csC.sts....p..t.l...s.Vplhs.............p.....stplsh.p...oh.h...s.....s.s..Gacshu.hsFpA..ss..splthh....ps.shp..-Ds......hCGPllDsVtl................................................... 0 35 131 191 +2282 PF04867 DUF643 Protein of unknown function (DUF643) Kerrison ND anon Pfam-B_6086 (release 7.6) Family Protein of unknown function found in Borrelia burgdorferi, the Lyme disease spirochete. 25.00 25.00 44.00 67.60 22.90 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.68 0.71 -3.68 3 80 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 22 0 5 64 0 111.60 69 64.24 CHANGED INEISDFYDNLScclKKE....INKLYtTcQlTLKQK+QhYSuacuhQEYsIKTGKSl-EIls.IIDPsKcFIKDVLKcKaLIKKYKNFpNhKVDaSYKKGMLEKCLEKLGEccShtF .......INEISDFYDsLS.sTKKE....IsKLYGscQLTLcQK+chYpuahuIQEYKRKTGKSI-EIls.IlD.PAKpFIKDVLKDK....aIIEKYKNFQNhKhDhSYKKGMLEKCLEKhGEchSscF........ 0 5 5 5 +2283 PF04870 Moulting_cycle DUF644; Moulting cycle Mifsud W, Eberhardt R anon Pfam-B_4889 (release 7.6) Family This family of proteins plays a role in the moulting cycle of nematodes, which involves the synthesis of a new collagen-rich cuticle underneath the existing cuticle and the subsequent removal of the old cuticle [1]. 22.00 22.00 22.10 22.40 21.30 21.90 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.08 0.70 -5.36 9 114 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 9 0 113 89 0 257.80 25 45.59 CHANGED PhuhIsKpLpphV+shKpK..-..pKWp-llp+Ipccspcl+p+Kptcph.Rp.hth...t.............ht.ph........hps..h-phl-D--ltphhtp........ho-c-chhhhPhclIRpAsKLGhslsG..pNsosF-pKpl+lhSPRFMSllP-cptt..sslsLLSPSlFSLHc-Go-.EpphSlsplL.tuhhsssDppsal-hllEAoGVsEsV--Ac+chhptphccpct.hhs.cGp.h.Fo+pNhocta.sscspKl-lhEpLcKoYStpQhc-MNpTGYolhsscQMphhYGctSPapNsc..hLcsY+Nho+u-hcculassI+slAccch...KFc .............................................................................................................................................................................p.p......p.W.phh.ph.p..t.h..ppp.pp..p....................................................................ht...h-....ht......p.........................................tt.p.h..P....thlcp...............uh.ch.h..h..s..G.....ppssshs...hphlSPRhhulhP-p.......................s.phslLSPSlhuLac...c.sst.-phhsLspll.....phhp...pp-ppshlshlhEhoGlscsl-ch.ph.h......p..p........s........h.p.pp..thh...ptpphchhctLppoho.c...............Q...hpchpppGashhs.pQhphlYGptu.hpssp...................lcphpphoptp..hpptl.psI+tlAptp................................ 0 39 53 113 +2284 PF04875 DUF645 Protein of unknown function, DUF645 Mifsud W, Eberhardt R, Haft D anon Pfam-B_4997 (release 7.6) Family This family includes several uncharacterised proteins from Vibrio cholerae. There is some doubt regarding the existence of these proteins, they are encoded by open reading frames contained within a repeated region in the Vibrio superintegron. 19.80 19.80 20.30 20.20 17.40 19.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.94 0.72 -4.28 12 607 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 63 0 14 247 1 51.60 66 95.40 CHANGED lLDspHsphuFhKGCIIusIhlSLSRTLNpGQLNLDRFcFWQPTSQLLsLDVCLhDAFA ...........h.sh.ptphuFhKusIIsshhlSLSRTLNRGQLsL-RF-FWQPTSQLLsLDVCLhDAFA........................ 0 13 13 14 +2285 PF04883 HK97-gp10_like DUF646; Bacteriophage HK97-gp10, putative tail-component Kerrison ND anon Pfam-B_6160 (release 7.6) Family This family of proteins is found in the caudovirales. It may be a tail component. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.80 0.72 -3.05 172 1665 2012-10-01 19:49:39 2003-04-07 12:59:11 7 3 1229 0 221 1184 106 85.30 19 61.94 CHANGED ccltcplpp.htp..........hpcth.ccslc.psuppl....tpphpppsP......................p............s.Gp......lpcShph...................................................ttps..st.hplpltsssp.....................Y...AphlEaG ........................................................tl.pplpp.hsp......p.hp.chh.ccslp..psuphl......hpphpppsP................................................hc........o.Gp..........Lccslph.................................t.......................ptps.....st..hs..spl...ts.sst...................................................Y....spalEhG................................................................... 0 74 152 185 +2286 PF04890 DUF648 Family of unknown function (DUF648) Finn RD anon Pfam-B_5530 (release 7.6) Family Family of hypothetical Chlamydia proteins. This family may well comprise of two domains, as some members only match the N-terminus. 26.60 26.60 27.80 26.80 26.20 26.20 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.04 0.70 -5.26 8 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 15 0 21 58 0 213.60 23 66.45 CHANGED Mpl...YoFSP.slpsSapc+lhAsLDuYFFLGG+RhKVlolsspsshphhuppcshslSTsEKlLKILSa.LLlPIVIIALLlRhhLHppa+tph.sh.hL-st............spsl.................ph-l.tpstcsushhsh.s...........htppPpslctshhhsctpF.sshppcahhchho.....shltYllu.ppphpshsl...............cs..hp.oscshppph-hsh+scp+sl.......pcLcchLshsu..Khop.schlpptlhculpp...........sPpFh.-hscchhspLssss.Ishc.............................................tG.pssphpthtshhlhhp.....phth ....................hthSs...psoh.c+hhutLDsaFahGGcp.spllshss.tthh.hhpp...pshtl...SphEKllKI.lSa..lllPlsl...lALllRhhLH....thhphph..................................................................................................................................................................................................................................................................................................................................................................h......................................................................................................................................................................... 0 1 1 19 +2287 PF04894 DUF650 Archaeal protein of unknown function (DUF650) Kerrison ND anon Pfam-B_6199 (release 7.6) Family This family represents the amino terminal region of an archaeal protein of unknown function. 25.00 25.00 54.00 31.10 16.90 15.20 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.65 0.70 -4.92 36 192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 147 0 121 190 40 262.30 35 66.52 CHANGED hLCGhshCPILp+hcshpthhpphppt.....l.GuSPPSlFVGchGYP+VplsPhsPP.lpGDsu.h.a-sPppWhs..hslc-llphRsuLlputtphpVc...............pts.sthlpclQplAlSs+PV-sEhthp+h.Pphc...lhhDthhsPhGPuushcphclspNP+lP+plE+lhhD.Dh+Ac-AlhpLYcpGhslhpIp+lLSsGhLG..ccR+LVPTRWSITAVDchluchLhccl+passIsphcVahppahGNpahllLhPupasaEhlEhWhPsSlWs .....................LCGhshCPIlt+hcshh.....ph.....h.ph...httt.....p...l.GSoPPSlFVGchGYP+VplGPhhPP..htGDsu..h..h-sPppWhs..hslc-IlphRhsLlputpp.h.pVc....................ph..sthlpclpplulSs+PVDsEltlp+h.Pp.st...lhh.sthhsPhGPpu.lcchclspNPplP+tl-+lhtD.Dl+Ap-AlhpLYpp.GhDlhpIp+hLSsGhLG..cpR+LVPTRWSITAVDchluctLhc.cl+paspls.chcVahtpahGNhahllLhPs.papaEhlEhWhstohWs................... 0 32 71 99 +2288 PF04895 DUF651 Archaeal protein of unknown function (DUF651) Kerrison ND anon Pfam-B_6199 (release 7.6) Family This family represents the carboxy terminal region of an archaeal protein of unknown function. 20.50 20.50 20.90 21.10 19.90 19.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.37 0.72 -4.43 41 188 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 147 0 119 186 25 106.70 36 26.66 CHANGED EshcG+psY.spluGuYYAARLuVhEaLpcp+RQAssllhREIpssYasPVGVW.VREsVRcAhcscP.tpF-oLc-Alphlspp..lchshppahtpSpllpth...pQpoLssF ...........EshpG+psY..spluGuYYAARLuVLEaLpph+RQAsslllRElpssYhsPlGVWplREsVRpAhc..........s...........ps.tpa-.slcpAlphlspp..Lphs..hppahppSpllpth....Qppltpa............................. 1 31 69 98 +2289 PF04910 Tcf25 DUF654; Transcriptional repressor TCF25 Mifsud W, Eberhardt R anon Pfam-B_6652 (release 7.6) Family Members of this family are transcriptional repressors. They may act by increasing histone deacetylase activity at promoter regions [1]. 20.10 20.10 20.10 20.30 20.00 18.60 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.07 0.70 -5.51 44 359 2012-10-11 20:00:59 2003-04-07 12:59:11 9 8 293 0 251 376 8 304.20 28 48.74 CHANGED a..FpFp..asppYppspppFhhslphh.Dspslhpll.pphPYHlsoLLQluplhchpu.Dpshus-LlERALash-psh+ssF.....shss.ups+Lsapp.pNRtFaLsLa+alttLspRGsacTAhEasKLLLSLsPsp.DPhuhhhhIDahAL+ucpapaL....lchhp..........p.hh......pp....................................................................................................................................................................................ht..............hP...........................shuaShALAhht.............Lpcp........................................................................ppucthLppAhttaPhslhtLhc..................pl.................shs.hs...thtspss.tsh.......phhsclYltRsptlWcsspshpaLcpsht......................................ph..pt...tpshtt.....ppsht..pshshslhRallLSc......pphhshlP......pplh..tt.h.t.DP..LPP .........................................ata...asptYppsptpFhhs.l..pth.Dsptlh....tl...L..pptPYHlsoLLQluclhchpt.-tthus-LlERAL..ashppuhps.F............phsp.GpsRLsapp.p......NR..thalsla+aht.Ltp+GshRTAhEasKLlL.SL-Pp..........DPhshhhhIDahAL+u.cpa...p.al....lchhp.............................phth...........pp.....................................................................................................................................................................................................ht....................................hP........................................Nhsa...ShsLAhhh.............lppp.............................................................................................................ppu.pthLppAhhhaPhlhh.Lhp...................ph........................................sht.hs....h.hsp.s.s....sh..........phls.plYltRst.lW.pp.sphhsaLppsh..................................................................tp.t.....t.hsh.slhRalhLo-.....hpph.h.shlP.....phh...t.....D.lPP.......................................................... 0 90 142 209 +2290 PF04919 DUF655 Protein of unknown function (DUF655) Mifsud W, Bateman A anon Pfam-B_6697 (release 7.6) Family This family includes several uncharacterised archaeal proteins. This protein appears to contain two HHH motifs. 22.20 22.20 22.20 22.30 22.10 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.05 0.71 -4.57 8 157 2012-10-03 02:11:09 2003-04-07 12:59:11 7 2 154 1 102 214 123 176.80 41 87.79 CHANGED hE-YAaVLDYLPpGaPD-cp..a+ccPlsQulGEcpFpLLEloP+..ssDlh.....................................................................................lh-+VaIGKGp..RcKIsplsR.+lpY-DLTssA+sELPYVlE-IV+ppE-RFVcFFNcApPIosRLHsLELLPGIGKKhMWsILEERc+K.PFESFEDlcpRVculpcPs+lIscRIlcEl......csc.ppKYhlFVt .........................................................................................................................h.EcaAhVLDaLs.....hGh.....s..s-p....+........a..p..cc.P..lsQulG-ctFpLlElssc...ssslp.....................................................................................lt-+lalGpsp..Rc+l.plt+.......+lpY--LTssA+sELshllccIVcppE.c+FVcFFNcAt..PIT...h...RLHsLELLPGIGKKhhhpIL-ERc++.PFcSFcDlccRlt....ulp.cPschlscRIlcEl..........psp..pKYhLFs.h................................. 0 27 61 83 +2291 PF04920 DUF656 Family of unknown function (DUF656) Finn RD anon Pfam-B_5777 (release 7.6) Family A family of hypothetical proteins from Beet necrotic yellow vein virus. 25.00 25.00 27.00 31.60 19.90 17.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.60 0.71 -4.43 2 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 4 0 0 37 0 123.70 89 45.88 CHANGED MADsEICRCQsop.PLlphpsYDhTuRhh.cRI-IGPLGVLhNlshLFHMShlR+hDlaPaLNpIhSIsVSlDVPs.uslsssplhVhlahsp.........Wphl..C.....CasusclhSD.L ......MADGEICRCQVTDPPLIRHEDYDCTARMVQKRIEIGPLGVLLNLNMLFHMSRVRHhDVYPYLNNIMSISVSLDVPVSSGVGVGRVRVLIFTTSRERVGIFHGWQVVPGCFLNAPCYSGVDVLSDEL 0 0 0 0 +2293 PF04936 DUF658 Protein of unknown function (DUF658) Moxon SJ anon Pfam-B_5062 (release 7.6) Family Protein of unknown function found in Lactococcus lactis bacteriophages. 22.30 22.30 22.30 23.90 22.20 22.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.28 0.71 -4.44 2 26 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 17 0 4 20 4 142.50 57 91.62 CHANGED KlaDsYhpG.KhuTGThcEluchh.lS.pSluhWlKNGhssp.tNsthKhAllNc...KthME.......KhP.........G.h.hsGotpcISp....................................................Ehp-+-RRKHETKEERRLRRNIRAQMAIEsuRKE-.sh ....................KlaDsYhpG.KpuTGThcEluchatlShsSlulWlKNG+ssp+AsstYKHAI.LNcEpoKElhE.pKpEt+K.LPtuVYshYccGphlhTGTAcEISQhhpItppsVa.YlpsGKhsachhKphK+AlhNpsETRKRFPhhSh.pEcE...lEpp-K-RRKHETKEERRLRRNIRAQMAIEsuRK--.sh..... 0 4 4 4 +2294 PF04937 DUF659 Protein of unknown function (DUF 659) Moxon SJ anon Pfam-B_5061 (release 7.6) Family Transposase-like protein with no known function. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.77 0.71 -4.73 15 467 2009-01-15 18:05:59 2003-04-07 12:59:11 10 41 39 0 216 514 3 121.70 28 21.63 CHANGED PohcsL+spLLpcthp-lpphl+-hKssWptTGCSIluDsWpDscu+sllsFlVsCPcGshFLKSlDsSs..hhpssphLhcLlsplV-EVGscNVVQVITcssssYtuA.GKhLhp+a..solFWoPCAu+ClclhLE-lu..K.........lcals-slccAppIT ......................................................................................h...h.t.h.....hp.....t.h....G.so...lh.sDsW..ss.....pt+.sl.h.N.hhs..s...t.u.h.h..F.hpu.l.-sos...........css...phlhphhcp.hl....c..c....l....G......p...N..VVQl...l...T...DNsss.h...htu.sph..l.tch...pl.aW..ssCssHslsLhlc-hs..p.................h..htph.ltpu.............................................. 0 24 135 186 +2296 PF04939 RRS1 DUF660; Ribosome biogenesis regulatory protein (RRS1) Finn RD, Moxon SJ anon Pfam-B_6906 (release 7.6) Family This family consists of several eukaryotic ribosome biogenesis regulatory (RRS1) proteins. RRS1 is a nuclear protein that is essential for the maturation of 25 S rRNA and the 60 S ribosomal subunit assembly in Saccharomyces cerevisiae [1]. 25.00 25.00 30.50 26.30 24.40 24.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.16 0.71 -4.76 33 347 2009-09-10 22:34:59 2003-04-07 12:59:11 7 6 299 0 237 343 3 161.40 38 57.87 CHANGED hshpaDLGpLhuhDsNsl........ssssppEptLpshsRDs...sQlLlNplhs...L.Ptcss.s-u...........sllpLPpPsohL.PREKPlP+PK.s.TKWEpFAccKGI..............pK+++s+hVaDEsos-Wss+WGYKttNc.ct-c.pWllEV............sstctstDshttpctc+Kc+lpKNchpch+Nht ...............................................hphDLGpLhuh...DsNsl...........t.tsshEp.LpshsRDs...sQhLlNplhp...L.Phpps..p-u................hlhpLPt....P....sT..........h..L...PREKPlP.cPK...s.TKWEpFAppKGI..............pKc++s+hVaDEtstcWhs+WGY..Ktt......s...ctcc..pWllEV............................tss..c....h...pD.h..scp..cpp+Kc+ltKNEhpph+Nh.h................................ 0 88 139 200 +2298 PF04978 DUF664 Protein of unknown function (DUF664) Moxon SJ anon Pfam-B_5281 (release 7.6) Family This family is commonly found in Streptomyces coelicolor and is of unknown function. These proteins contain several conserved histidines at their N-terminus that may form a metal binding site. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.85 0.71 -3.97 48 958 2012-10-02 14:44:17 2003-04-07 12:59:11 7 6 365 1 344 2013 90 150.30 24 84.39 CHANGED sLtuaLctpRpslhhplcGLo-cph+ppssssus..oLhGL...........l+Hlu.pVE+sWhppshssp.sss.h..............ssssscat..hsss-ohssllutacpssspucthlu.thsLDsshsh.....tshht.....sol.RalllHhlcEhARHsGHADllREtlDG ...................................................................LhshLsttRt.sl..htpscGLoscphcp....t......s....ss..u.....s....ol.....suL..............l+.Hl...A...t...l...........E...........p....s...........W..h...t.t...s...h...t...sc....ts.h..t..h...ht........................tthst.shp.......h.s...p....s...c.....s..h...s......p...l.....l...s...t.a.pps.s...p.c.s..p..p.hl.t......t..h..s...h.-.ths.ph..s........hshtss........sol.c.hll.l.H..l.l.c.-tupH.sGp....A.D.llREhl........................................................................................................ 0 134 276 327 +2300 PF05006 DUF666 Protein of unknown function (DUF666) Moxon SJ anon Pfam-B_5319 (release 7.6) Family This family contains several uncharacterised viral proteins. 22.20 22.20 22.20 27.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.19 0.71 -4.22 25 69 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 65 0 0 67 0 150.20 43 75.16 CHANGED FpR.sslVDC.s.po+LPCVTDpQChDNChss.hh...sshpCpp.GFCsspss.lsu.ps.s.....CDtshGLlpVasA.u-FVVsphClSsYRDllDDsGplRPYlC.......ssGsLclcLpsp.FoscDChCssGYT+hhFsQsAhuRslPVCIPNphAsLYsRlY ......FpR.sshVDCsps..+LPCVoDpQChDNCshtshh...sthsCps..GFCsspsu.hsutts...sth-CDsshGLlcVasA.u-..F.V.VsphClSTYRDll...DDsGp.RPYlC.......ssGsl....s.lsLtsp.Fo.sscCpCssGasKhlFpQTAhARolPVCIPNphuslYs+lY...................................................... 1 0 0 0 +2301 PF05018 DUF667 Protein of unknown function (DUF667) Bateman A anon Bateman A Family This family of proteins are highly conserved in eukaryotes. Some proteins in the family are annotated as transcription factors. However, there is currently no support for this in the literature. 19.70 19.70 19.80 20.20 19.20 19.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.14 0.71 -5.07 9 406 2009-01-15 18:05:59 2003-04-07 12:59:11 8 22 162 0 285 362 9 167.50 41 37.28 CHANGED MFRssaQsG.FLolLhSsGSKPLpIWchpsKNGalKRlTDc-I+ShVLEIhGsNVuToaIssPssP.ppoLuIKLPFLVh.llKNh+KYFoFElQIlDD+ph+RRFRsSNaQSsTRVcsFhssMPhpLssG.WNQIQFNLuDFTRRAYGTNYlETlR.lQIHANsRIRRlYFsD+LYs-DElPs-a+LasPh+sp .............................................ptaQts.Flslhhu.hG......p..........p..PlphWcp...psc.s..G.cl+...........+lhDp-lp.S..VlEI.G....sssuosaIp..sP.....t....cs.....pp.........o.........LGI+hPFLlh...llK..s..................h..p.KY...Fo..F.Elplh..DDcsl+RRFphS..Na.....p..pp....s............+l.......p......P...h.hsp.......h................P......h..p...............l........s...............p.............G....WNplQhN....Ls.DaT.......c.c..u.Yu.....o....s..Y...h..coL+....l.plpANCRlRRlYFuDplYs.pp-lP.ch.+h.......p........................................................................... 0 118 162 232 +2302 PF05003 DUF668 Protein of unknown function (DUF668) Moxon SJ anon Pfam-B_4700 (release 7.6) Family Uncharacterised plant protein. 20.90 20.90 21.00 20.90 20.70 20.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.14 0.72 -3.77 18 204 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 22 0 131 194 0 88.20 43 17.08 CHANGED TLGuAuLALHYANlIlhIE+Ll..........ss..sptlstcuRD-LYpMLPsolRsuLRu+L+shs+s............DtsLAspW+sshp+ILcWLuPhA+NhlR .......pLGuAuLALHYANlIl.I-pll...................sp...sphls.ssRDsLYphLPsol+u.uLRu+L+shshs..................-thhssph+sshp+.hLpWLsPlApNTh+.. 1 11 80 109 +2303 PF05037 DUF669 Protein of unknown function (DUF669) Moxon SJ, Sammut SJ anon Pfam-B_5014 (release 7.7) Family Members of this family are found in various phage proteins. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.85 0.71 -4.28 22 204 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 181 0 29 185 46 130.70 19 77.97 CHANGED hFThDasps....psapsl.sGpYcVhl..psphpsscs.Gsphlplchclhss..........papsphla.c.hh....ppcsschs.ttLpsls+AhG...h.pspphpslcsahpphhs+sl+Vslchc..sE.NGppY...plhth..scs..Ptst ...................s.pps.....pthtsl.sGpY-l..hlp..psphpsocs......G.sph...lslphplcss............ca.p.spplahphhh............pppstchshttl.thstsht...........h..ptpphpshpph.......hp.th....s+.hplhlp.p...p.pup.h...p.................t..................................................................................................... 1 15 25 28 +2305 PF05050 Methyltransf_21 DUF672; Methyltransferase FkbM domain Moxon SJ anon Pfam-B_5811 (release 7.7) Family This family has members from bacteria to human, and appears to be a methyltransferase. 18.30 18.10 18.30 18.10 18.20 18.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.82 0.71 -4.31 459 2587 2012-10-10 17:06:42 2003-04-07 12:59:11 7 106 1140 1 1157 2771 5093 161.90 17 45.41 CHANGED DlGAsh.....G...................hhh.h......h..sshl...hs.......hEPtst...................thpthttt..........................hu.....ls...............................................t..t.hphhhhsh............t.ht.thhtst......t........ttttt....................t...............................ttphplpshsL................D..sh.hpph..............ths.....h....lKlDlEGtE...hp..lL.......s...spphl.pp...htshh...............lE.....h..................................thpplhp......hl.....pph........GYph ...........................................................................................................................................................DlGAph.......G.............................................hh...........h...tspl....hu.......................hEPtst....................hthlptp...........................hs...........ls...t.............................................................................................t..t....hphh...h..sh.t..............tth.h.....h.htt....t..shtt...........................................................ttspthplpshsL......................................................D.......sh..hpph............tpls........h...lKlDlEG....tE.....hp....lL..........pG......spphl..pp.....hpshh............h.hE......hp.......................................tt.hht...hl..........th..sa................................................................................................. 2 476 817 1040 +2306 PF05054 DUF673 Protein of unknown function (DUF673) Moxon SJ anon Pfam-B_5918 (release 7.7) Family Family of uncharacterised viral proteins. 25.00 25.00 37.30 36.30 17.40 16.60 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.30 0.70 -5.88 23 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 0 58 0 355.70 43 99.39 CHANGED MsCPhsI+VhISDpFlhFPYchVtsQsshus.....t.lpsLsVYVPTpEDlphls+pph..sp.FcsVhVh+Hc...sph-scsstcsssuslVYWNPIhPIsclG.lGpTpVFSVlLTssLa.Csohhlcpsssh...CPlQh....p.......................h.....phschsslsGEss..Ltphsplhcss.ssFlICFs+ETPphlKI.LNlKRlLhlhu.hRps.A+Ysl.LscpElssIapcLsWEpsRRLhKGs.hssp...C.shlNRsSLpYl+pA.-lLsIsssu.polhshlchFpsLIlsYplVP-lhlcLNslspp...........K+..VRLYC+sDShAITstGsVP.NhPssNsh.sFcasslssssh.pphppclhpcs...s.lhlpuscYNYFh ........................................................MsCP.hsI+VhISD+FhsFPYphVpPQsDlGs.....t.l.psLlVYVPT...--DlpaV-Kpth......sp..FpoVhVh+HE..ssph-ocsscKsssuTlVYWNPIlPIsElG.sGcTcVFSVLLTssLFhCpThllcppsPh.....CPIphp..........................p....hphpchhPIsGEhP..Ltchpclh-ss.ssFLIC..FshETPphlKI....LNlKRlLslhp.aRcsPA+YsIpLscpElsslYpcLsWEpsRRLh.K.......GD..hssp...C.shlNRsSLpYl+pAppLLtIscsS.polVcFVphFpsLIhPYplVP-lllKLNolcpp...........++..VRLYCKNDShAITshGsVPsNhP-pNPh..sFDaoDlsssptlpphtpclhpcs.hss.lhVpAs+YNYFh......................... 0 0 0 0 +2307 PF05056 DUF674 Protein of unknown function (DUF674) Moxon SJ anon Pfam-B_5937 (release 7.7) Family This family is found in Arabidopsis thaliana and contains several uncharacterised proteins. 23.20 23.20 23.20 23.30 21.50 23.10 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.82 0.70 -5.86 8 313 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 19 0 190 294 0 209.90 21 83.65 CHANGED MAcSoccPKlSLRLlIDEEKNKVVLAEAG+DFVDlLFShLTLPMGTIVRLLEcH.+KSpslsIGCFsNLY+SVV-MulDsFcT-ACKQMLLYP+Sl+cppsRsLKLNIDDTEusKhFhCPtF..pttCpchYSNFsTS+CcCGsh.........MNcEIph.cptts.uphts..DssGVFsps..+oSFlITDDLKVplsSscllLNsLKDLGhuDs.sKLsEhll-VGlcE............................VhTLLpClFTS-sPLTDTFL+K+S..o.sspRhaKt.SPslpcpt-EussDpslTLcsaVRKpD.hcILYVECGEDFVDLLFTFLAlPLESlWpISG....suIslGCIGNLhRSFKDLS...sstsppus..KCslPaYYpCQKQLL..slhTppPPsYhtah...ths...patLopp........pc+lpphhhhDPKocsphpStstp.....GFVK+sTKFhVoDDLIITPhNSsSTlslLK.chQlchDDlElQsIoIuKsE ...............................................................h...........hphhhppp..tpplhhhEuttchl-hlhs.hh.hPhuthhp..hh...................................................................................................................................................................................................................................................................................................................................................................................................................ahl.ssh..h......s........h.......th.t....h.p..h.hs...tc............................hh...h...L............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 15 106 142 +2308 PF05055 DUF677 Protein of unknown function (DUF677) Moxon SJ anon Pfam-B_5920 (release 7.7) Family This family consists of AT14A like proteins from Arabidopsis thaliana. At14a has a small domain that has sequence similarities to integrins from fungi, insects and humans. Transcripts of At14a are found in all Arabidopsis tissues and localises partly to the plasma membrane [1]. 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.87 0.70 -5.77 7 135 2012-10-01 23:20:42 2003-04-07 12:59:11 7 4 17 0 109 163 0 266.10 27 77.09 CHANGED L+SFDosLppRTsplIsSLsspsc.......spSls...hDuLhElsppLlEhNQsVs+lIl-.c-.DlWcNp-LhpLVpsYFcoopKTLDhCpslEpClc+Achup.LlIphAltQFEpEsh-ps.G...cp++YcKTLEEL+pFKshGsPFsG-........Fhs.FcplacpQhhhLEcLc+p+cKLDK+L+Nl.......+shRhlSNllFssAFlsVhlhSlVssuhusssssu.lAuuhssPltuhGhWss.........phWcK.EculK+p+plhpoM-tGh.ss.cshcsIphhVcpLcscIpShLcss-FAl-+cp...us+luhppIcK+l-thTcplcElG-psuppS+hIthuRhllLp+I ...........................................................................................................................................................................................................pl.tLl.pYFp.o.ps.phCttl.pslppsc.p....l..hlt.....h.....tp.................................t.h.t.hhpt.L...tpF..htsP.Fst.................ht.Fptlh.ppp.t.hlc+Lptp+++lc++l+.l.......+thp+s...Sslh..hssshs.s...lhl...sullhAs..h.s.hsslhs..hsuhhu.h.P.htsh.upahs.................phh.p.p.hpp..slp.t.t..p......phls.sh...t...tGTalhhcDh-.....oIphLVs+Lcscl-...shhphschulcp....pc...........s......lc....s.....l.c....cl+K..p.ps....Fhcpl--Lt-psthC.tsIp+ARtlVlpcI.................... 0 17 65 82 +2309 PF05077 DUF678 Protein of unknown function (DUF678) Moxon SJ anon Pfam-B_6127 (release 7.7) Family This family contains several poxvirus proteins of unknown function. 21.50 21.50 21.70 56.20 21.40 21.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.90 0.72 -3.99 10 55 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 35 0 75.10 63 95.94 CHANGED hDsu...GGtKR.RKRKP+T..oVc-p.......DDCsTCSSCpSKLVplSDlTKlSLspaplsGK...usTLoCuACGSpLphLsDFs ......DST..tuGhKp.RKKKPKT..TVhD-.......DDCMTCSuCQSKLVKISDITKVSLDhlssh.+....GNTLuCuACGSSL+LLNDFA.. 0 0 0 0 +2310 PF05078 DUF679 Protein of unknown function (DUF679) Moxon SJ anon Pfam-B_6129 (release 7.7) Family This family contains several uncharacterised plant proteins. 25.00 25.00 33.00 27.10 21.20 17.60 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.20 0.71 -4.60 24 212 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 23 0 125 191 0 154.50 40 74.00 CHANGED hsusAsLupLLPTGTVLAFQhLuPsFTNp.GpCss..sN+hLTusLlulCuhSChhhSFTDShhs..t.cG.+laYGlAT.......h+G.lhlhs...............................................................uusssspp...........chp+YRLRhlDaVHAhhSslVFhuVAhh.DtslssCaa.Pssupss.....+clLpslPlulGhluShlFhlFPosR+GIGa .......................pusupLspLLPTGTlLsFphL..sP.hT..Np.GpCts...ss+hlousLlslCuhuChhh..oFTDSap.s.....t..cG.plaYGlAT.......h.cG.lhshs................................................................t..th...................phscY+L+hhDFlHAhhSllVFhulAhh.DtslssCaa.Ps..s.tpt.....cpllpslPlslGhlsShlFhlFPopR+GIGa............... 0 12 64 94 +2311 PF05079 DUF680 Protein of unknown function (DUF680) Moxon SJ anon Pfam-B_6131 (release 7.7) Family This family contains several uncharacterised proteins which seem to be found exclusively in Rhizobium loti. 21.70 21.70 23.10 22.70 20.80 20.10 hmmbuild --amino -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.92 0.72 -3.48 10 44 2009-09-11 14:48:27 2003-04-07 12:59:11 7 1 5 0 29 44 0 52.80 42 70.12 CHANGED GSDsaGSsssNQPAAo.......stVDsotTASlcK....h.t...ossssps...psuQGs.........c.slaGp ......GSDpYGSsssNQPAAo........tlDsotTuSlcK...........ossssps............tssQGs.........chslaGp.. 0 0 10 10 +2312 PF05080 DUF681 Protein of unknown function (DUF681) Moxon SJ anon Pfam-B_6137 (release 7.7) Family This family contains several uncharacterised beak and feather disease virus proteins. 25.00 25.00 72.90 71.80 18.00 17.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.47 0.72 -3.57 2 31 2009-09-11 14:24:40 2003-04-07 12:59:11 7 1 1 0 0 31 0 88.90 84 58.80 CHANGED MGGAChs.+YWLVPs+Vh.Rh+.T.hTu.RGVARsDGPMsSLs.NhIKCAVsGGMDMhGKhSSShTT.MGG.LIARCSA.hTVTHI+CQLRA.LWSL.ARG ..MGGACIsSRYWLsPAHVhSRh+.TSSTsHRGVARVDGPMsSLGLNIIKCAVNGGMDMMGKhSShWTTFMGGYLIARCSASATVTHIKCQLRAhLWSLPARG 0 0 0 0 +2313 PF05081 DUF682 Protein of unknown function (DUF682) Moxon SJ anon Pfam-B_6152 (release 7.7) Family This family consists if several uncharacterised baculovirus proteins. 25.00 25.00 29.80 29.40 19.80 18.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.97 0.71 -4.67 16 57 2009-09-11 12:02:38 2003-04-07 12:59:11 7 1 55 0 0 51 0 147.80 46 94.26 CHANGED p+TI.LYLsc..Pssls....sDKssDcsVlYF-GlIEslsDcSCDKhohFuELKKEcALhMKKshpDLlp+ssGNYsKNHVLlDALlMYKTYVELl..D-SAFGtslLphCh-alTthF+LFpLpS+IlVllPspssacpDNLSsLLKHLhphslIpl ..........................+TlhLYLsc..P.s.slp....sDpssDc..shlYFEuIlEClsDcoCDKhohFuELK+EpALhMKKhhpDLlp+ssGsYsKsHVLlDsLlMYKTYV-Ls..D..-SAFGpsllph.C.palTtlFcLFpLpS+IlVllP.pls.acpDNLSsLLKHLhphslIpI.. 0 0 0 0 +2314 PF05082 Rop-like DUF683; Rop-like Moxon SJ, Eberhardt R anon Pfam-B_6161 (release 7.7) Family This family contains several uncharacterised bacterial proteins. These proteins are found in nitrogen fixation operons so are likely to play some role in this process. They consist of two alpha helices which are joined by a four residue linker. The helices form an antiparallel bundle and cross towards their termini. They are likely to form a rod-like dimer [1]. They have structural similarity to the regulatory protein Rop, Pfam:PF01815. 21.10 21.10 21.50 27.10 19.80 19.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.25 0.72 -4.50 32 142 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 135 4 75 130 7 64.10 40 88.76 CHANGED hs-.lcpLKtclKKLsu+AsphKMDLHDLuE-.LPssWppIh-lAp+Ta-sascLsph+ppLtshEsu .......p-lcpLKtcl+KLsu+AsphKM-LHDLuE-.LPssWppIhplApcsa-AatpLspt+ppLtthct.t............... 1 17 47 60 +2315 PF05075 DUF684 Protein of unknown function (DUF684) Moxon SJ, Pollington J anon Pfam-B_6081 (release 7.7) Family This family contains several uncharacterised proteins from Caenorhabditis elegans. The GO annotation suggests that the protein is involved in nematode larval development and has a positive regulation on growth rate. 25.00 25.00 26.20 26.00 23.20 24.30 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.22 0.70 -5.84 5 141 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 6 0 141 123 0 248.80 21 75.10 CHANGED LcKLtELcKcIc+LS-sMsucF-DLKAFIlspEFY...................sslAsTAsTLMKaMQDTsssPsKco+cs....F+-sssKsSPLcLAcpLhphL-sEsTNPLKMAMsADsL+sccTFEKWRcIIDAVlTQhLlLEoFtNGha+D+spYcPc+LsEchcEacEphccW+E-Yps.-oSYW-ctVcshVHcIQDNNEsp..SNEEKAslI+-sL-KIMTNDtFYVlVaD.hsh....supcapasltts.c+paIuSaNRGsCNVhVaRS.....t+s+ohsppsc-L++KaQHsLss+app.....Nspahlpspu+shps+lss.CGaVtlVRsscclA...Vcsoss-.tp+GPGthh-usFctssh...hSs.csFpl ............................h.........th..ltpph..thtphc.hhst..ph.........................phh..h..lhphh.ssht..............s.p.sht....................Ftt.ht..p..Phph...............sh................h.thlp..p.oNPlhhsh.t....p....pttshppWpthhtthhsphhhlEshhtGh...tts..t.p.l.p..t..th.p.htphppta...........................................hW....pthcphlpp..h.pptt.h...sptp+AphlpptLpp.h..hoscs.F..alhVhs............t...h..................ppp.hl.s..h....p.s.t....hshhlYRS.................t.p..t....t.......t.h.tp..ht...................................................t......h......p.....t.hhhhh.t............hp.st..........tPG...............................hhh...................................................................................... 0 20 31 141 +2316 PF05085 DUF685 Protein of unknown function (DUF685) Moxon SJ anon Pfam-B_6261 (release 7.7) Family This family consists of several uncharacterised proteins from Borrelia burgdorferi (Lyme disease spirochete). There is some evidence to suggest that the proteins may be outer surface proteins. 25.00 25.00 28.80 28.80 23.30 23.30 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.87 0.70 -5.34 4 176 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 29 0 9 140 0 217.00 43 96.09 CHANGED KLLIDEEETVQIKDLNKVTTVNNsDLLLLDDGVASSNAITFKNFL-ToKDKTFKGEGLuYFKEIIKSTIAEELAADc-FVEKIYsKIhDKLINNDSTNLSNLFSKIKSRLTDSISSATLSRSDcLLIM.SSuTIQKTPVPKQLLGlPSsashstphTcuTTlYPSDYcspuIhIsM-sauDVsLlF.KSsDDpPIYLDIDIclKHpcNc..sKuLhlpYuDETphshVYhhpGusuluhRIPhYKGWYlQKRsph.GcPVPhLlKL ......................................................................KLLIDEpETVQIKDLN+..VopVNsoDLLLLDDGsuSSsAITaKsFLcsspcchF.KGEtLsYFKpIIK.....STIApELsusp-FlcpIYsplhsKL.Ip....N....-.S....sp..lusLFpKI+SpLpsuIsp.s.TLo.cs-pLLhh..pssI.QKTslPcplhGlPssath.....t..hs..........sspla..p...Yc.........sKthhIsh......c..p....sssollh.Kp.Dsp.sIYLDI-lc..lch..p..p...st...pK.lhLpYsDEo...ppph.lYt.h.p.us..p.t....shphPhYpGWY.lQ++.t......Gt.hP.h.Kl.................... 0 5 5 5 +2317 PF05092 PIF DUF686; Per os infectivity Moxon SJ anon Pfam-B_6313 (release 7.7) Family This is a family of dsDNA Baculovirus proteins. It is required for the infectivity of the OBs or occlusion bodies. It is a structural protein of the ODV envelope required only in the first steps of per os larva infection, as viruses being produced in cells expressing the gene for this protein but not containing it in their genomes are able to produce successful infections. Baculoviruses are large DNA viruses that infect arthropods, mainly members of the order Lepidoptera. In their life cycle, they produce two kinds of particles, a budded, non-occluded virus (BV), which buds out of the infected cell and is responsible for the cell-to-cell transmission of the virus, and an occluded form, the occlusion body (OB), which is responsible for protecting the virus between encounters with larvae. A variable number of virions are included in the para-crystalline structure of the OB, mainly constituted by the virus-encoded polyhedrin protein; these virions are called occlusion body-derived virions or ODVs. 25.00 25.00 37.30 35.50 18.20 17.50 hmmbuild -o /dev/null HMM SEED 522 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.15 0.70 -6.14 26 72 2012-10-02 15:38:38 2003-04-07 12:59:11 7 1 65 0 0 71 0 489.90 37 94.42 CHANGED lLLlll.....lhllhphlsllphtpp..h..htslphFDNosVP........lIcPPs.EIlIEuNshpCH+pL.TPCoocuDCslCREGLAsCQhFcEpsllpls-.....ppphsIcPGESYCLALDccRARSCNPsTGsWlLscocs.GaoLLCoClpPGLVTQLNlY-DCslPVGCpPaGpIssINpsPl+CsC-sGYVuDh.ssTpTPaCRPpplRDVlh-ssFFPRsPCssGalpl-HPuLcshYRpphth..slCVhDPCSlDPIoGpRps..G+Lhhhh.sspch..hshCsC....shpcsLasVa....sssoM....lp......ps.......stplsNACI...pPFslphpplpph-hKhFWGRss.h.pSDsDlVhpVs...spl.cs+Y+thLa.hlpsH.Pphs..pss......hhllKFoluhs.....hhtpsth...tpslaphYhphstpp..sss.CF..hPGh.GcC........Isss.ssCIpR..tsh.V.osEshssshCahoRpsptl.................plWssss..hYspsphPlshhlssh...hh..hh.sspsh...pslhhlsutssssp.tph.......ssLtplLsTYPpYS .......................................llLllllhllhpalsllpht.cp.h..h.slthFDNspVP........lIcPPs.EIlIE...uNshpCH.+pL.TPCs..THtDCsl..CRE.GLANCQhF-Epshlphp-....tppphhIp....s....GES....YCLALDRcRARSCNP.sTGsWlLs................cocs......GauLLCoChpPGLVTQLN.hY-DCsVPVGCtPaGpIssI....Np..s..PlRCsC-sG....YVuDasssTpTPaCRPpplRDVhhDssFFPR.....sPCssGalpl-HPuLsshYppphph..slCVlDPCSlDPIoGpRps..GpLhhh..ss.ct..hshCsC....shtssLasVa...ssssuM..........lpps......stplsNAClpPFssph.tlpph-YKhFWu+s..........-.h.huDsDlVhpls...spl...ptRYchhLashlt.sHPphs..t........hhlhKFSluas.....hh.t..........tslaphahthpttp..sts.CF..hPGh.GcC.........Ists.s.CIp+..t...V.suEshtsphChhoRcsptl.................phWssss..hYspsphPsshhlpsh...hh.....sspph...pslhhltstthh.t..tph........ssLtplLtTYspYS................................................. 0 0 0 0 +2318 PF05095 DUF687 Protein of unknown function (DUF687) Moxon SJ anon Pfam-B_6321 (release 7.7) Family This family contains several uncharacterised Chlamydia proteins. 25.00 25.00 32.00 30.90 23.20 20.60 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.62 0.70 -6.14 11 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 28 0 17 48 0 453.50 30 80.94 CHANGED csuphssss.ph..s.stslh-s..lChh.ssssppstshpVslsYlNGStpTptEA.tEuhalS-.LRGcPVRslYNsG....puhupuhhhspRsShotppPlCpAlLcsWcpFFS+stNssppallIFaGDGGsaVQpALc...pohausRIhllGIuPohYVpGpspsHaYRVsGDlsohLDppGaspup....VoTLPYSuGu-GlFhPulpsPoFpaALRht..pl.................ltp.sthsussspDpp......uusthulhlshupsssuFoRlpchLsps-osuphEhNshP.sphhDllLoulaslhRloslhQtahlhs.lshps-s.lshlhhsuYssssLphhhLhhTNppstRcpaRslRllApuhpshhhLsslh-h.....lNhhphhppss..shlpAlahsuoshosollhh-...IhchshssLRsRlQphshRhhssss.c.....pccls................RsscuuhttslshlssssuGlhhulhhGlhNthulplPcs.....lscs.ssss............NsTslas.s.h+ss.........puWpoGDshulupTlshllshllllhslhshVp.VRpNt+RR ...........s.................slhps..ls...sspsp.sts..VtVsYlNuS.pshhcu.tEshaLSp.ltGc.VhhlYNpG....phhutshh..tp...pt..pPlspAllcsWctFFup....sppFlhhaaGsuuhaVppAlp...ps.asspIhllGIsPohalp.p..s..psaaYRV.G..shhohLD.tGahtup....VspLPYSsuu.Glhh..shtsssap.Alhht..ph....................hschSs.tsssspst...........upp..ul.ls.spssssFsRl.phLs.spTs.phE.N..hP.pph.shlhpshhslhRloshhp.ahhhs.lsht.ss.l.hh.hh.ashsslp.hhLhhTs..phRcthRshRllhhshpsh..hsslh-h.....ls.hhhhtps....hhpslahhhphhshsllhh-...lhchth.sLRstl.thshRhhts.s.c......ppphp.....................pp.hsshhhhhphlpssshulhhshhhGhhs.hulplsp......hspsss...............sposhhs.................sa.sscshulupshphhhshlhhhh.lh.hlt.lptp............ 0 3 3 17 +2319 PF05093 CIAPIN1 DUF689; Cytokine-induced anti-apoptosis inhibitor 1, Fe-S biogenesis Moxon SJ, Bateman A, anon Pfam-B_6320 (release 7.7), Wood V Family Anamorsin, subsequently named CIAPIN1 for cytokine-induced anti-apoptosis inhibitor 1, in humans is the homologue of yeast Dre2, a conserved soluble eukaryotic Fe-S cluster protein, that functions in cytosolic Fe-S protein biogenesis. It is found in both the cytoplasm and in the mitochondrial intermembrane space (IMS) [1]. CIAPIN1 is found to be up-regulated in hepatocellular cancer, is considered to be a downstream effector of the receptor tyrosine kinase-Ras signalling pathway, and is essential in mouse definitive haematopoiesis [2]. Dre2 has been found to interact with the yeast reductase Tah18, forming a tight cytosolic complex implicated in the response to high levels of oxidative stress [3]. 21.70 21.70 25.50 22.90 21.60 18.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.47 0.72 -10.97 0.72 -3.92 25 368 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 289 0 251 368 1 85.80 45 31.98 CHANGED sDhcps..hs.tt..Cs.s......tK++KACKsCTCGLAEtpEpEppsppspppp........................................................t.........phps.t..SSCGsChLGDAFRCuuCPYLG...LPAFK..PGEcVpL ...................................................................................................................t...t..Ctss.....tp+++A..CKsCTCGLAEp.ctcppttptttt...................................................................p..uuCGsChLGDAFRCuuCPYl.G...hPAFK..PGEclth............ 0 93 143 211 +2320 PF05148 Methyltransf_8 DUF691; Methyltransf_hyp; Hypothetical methyltransferase Moxon SJ anon Pfam-B_6432 (release 7.7) Family This family consists of several uncharacterised eukaryotic proteins which are related to methyltransferases Pfam:PF01209. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.42 0.70 -4.71 4 358 2012-10-10 17:06:42 2003-04-07 12:59:11 10 15 292 2 259 3274 773 201.90 38 54.17 CHANGED sLpt+hpcRLDGu+FRaLNEpLYospuscAhchFpEsPphFcLYHcGFp.QVcKWP.pPlDhIIccL+........p+.sshVlADlGCGEA+IA...thpc..pV+SFDLsulsc+VhsCDM.uplPh-DpSVDlAVaCLSLMGTNltsFl+EApRlLKsGGlLhlAEVpSRFs..SlchFt+tlsKLGF-hpphclpNshFhLF-FpKss..hhu.Kc.h.slpLpPClYK+R ...........................................................................................................................s.L.tphtt+LtuuRF.RalNEpLYTpsSpp.Ah.phFppsP.p............h.F.p......Y....Hp.Ga..pp...Q..V..p.....p....W.P.....N.P..V....D.h..h....lp...l+................................................................p.p...s..t...p...h...s..l..A.Dh..GC...G............-.......A....p....L.A....................ps.h......p........p...........p..............................V...............a.......S.......F........D..............L..............h..............u................s...............s.................s............h..............l.............s...............s...............s..............D..............h.......u...............p...............l..............P.....L.............t..........s.......pS.......lD....l.....s.....l....F.....C.....L...S......L.....M.......G.......T.....N....a....h.......s....F......l.c...EAh.R...lL.+..st..G...........L..h.....l........A........E.................l...p....S......R.Fs...................s.s.p...t.....F....h...p..h..l..p......h....G....F...h.............t...........p.hFhh...h.h...+................................................................................................................................................................................. 0 97 153 220 +2321 PF05114 DUF692 Protein of unknown function (DUF692) Moxon SJ anon Pfam-B_6476 (release 7.7) Family This family consists of several uncharacterised bacterial proteins. 25.00 25.00 26.90 26.90 22.80 22.50 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.63 0.70 -5.49 7 833 2012-10-03 05:58:16 2003-04-07 12:59:11 8 2 668 1 283 808 137 271.00 36 91.05 CHANGED hhpuAGLGhRpslhcshlsh.pssslpFlEssPENahshGGhhhtph-pltER.hPlshHGlSLSLGGtsslDhshLcslKthhcpacshhhSEHLuaspscG.pLa........DLLPlPaT--Alc+lstplR.sQDhLtp.lhlENsShYlp.s.pphsEl-FLpAlspcssCtLLLDVNNlaVNAsN+Gh.DPhtaLsslPscRlshlHlAGHspp..u......................spLhIDTHGtsVtssVWsLLttAhuRhus..PTLLERD.NhPPhuELhAEl-pItth.p+uttAtt ....................................................................................h...tsGlGhRtphhpthh.p.......................t...s.........sl.s....ahEltsEN..a......h......s...................G..G...h.......h..t.....ptL..cpl.t-..c..aPlshHG.l...u.L.Sl..Gu....s..s...PLD....tsa....L.p.pl+phhcphpsshhSEH..Lua...s...p....s.....s.....G...a.ha..................DL......LPl...Pa.Tc.Eul.cplsp.+lcpVQ-hL..t.c...lhlENsS.......t..Ylp......h.....s......t....u..phsEs.....-Flstlsccss..CtLLLDVNNlYVsuhN.H..........Gh.....D.........s.....t.........s....a....L....ssl....P.hc+Vs.lHlAGHs...pp......t........................ss....l.lID.THup..sV..ss..sV...WsLhpt...........sht..p.h............G.....s.....hP.TL.l..EhD.tslPs.hstL...hsEh...pphpph.t......tt.................................. 0 73 162 234 +2322 PF05113 DUF693 Protein of unknown function (DUF693) Moxon SJ anon Pfam-B_6473 (release 7.7) Family This family consists of several uncharacterised proteins from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 29.80 25.00 18.00 17.60 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.08 0.70 -5.50 2 142 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 27 0 14 124 0 282.70 62 98.41 CHANGED hlLLpYDFKIEFYcscpS..KcoosG.sh.EETPKIIINTQHGIHlDIoISN.aSNhphlpSKpsKIVLWNLPLDFsscIchGDIVKIaYKKFAHEKpFDFIMAGhLGTPMSTDYPGGDFSVELDVRLhspSNFFNRKL...EsKsFKGKTVQEAIESVFPNRNIIpMDccDRLKlI-KsIYATTPKEFlDKIKGlYlHsVIADlGss.hss-CNhIFTN.+phtus.pYcALEDYGLEFIPQQEIsltspaKIphlaWNApTFYTHKLplGDKVSFIDGLGKMIKTTIKETSApLSNsGECSLILKLcDDSscpc ...............MLLLQYDFKIEFYss..c..pS..Kco.sG.shsEEhPKIIINTQcGI.H.lDIoISNha.S.shNhlpSKpuKlVLWNLPLDFTccIchGDIVKIYYKKFAHEKs..FDFIMAGhLGsPMSTDYPGGDFSVELDVRLhopSNFFsRKL.........EsK...NFKGKTVQEAIESVFPNRNIlNMDEcD+LKII-KsIYAoTPKEFI-K.I.K.GhYlHsVIsDlG....s........s........shslECpaIFTNhtphtts..pYcALEDYGLEFIPQQEIsltsphK.h.phlaWNApsFaTHKL+lGDKVSFIDGLGKhIKsTIKETSARLSNsGECSLILKLhDDSs........... 0 7 7 7 +2323 PF05107 DUF694 Family of unknown function (DUF694) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of hypothetical bacterial proteins. 25.10 25.10 25.90 33.40 24.10 24.30 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.76 0.70 -5.17 68 568 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 510 0 201 554 13 266.70 31 92.36 CHANGED lpNRh-FlhlaDVpsuNPNGDP.sG.NhPRh.D.ETGpGlVoDVsLKRKlRsalpt..........p.........................G....p......pIaVpcpshhp.............................................ppptphhsth...........................................ph......pt..t.t.............................tttscphtphhCppahDVRsFGAVhu..........................thssuplRGPVQhs.aApSlcPl..l.hphsIT+hssop.......t......-uc...cpus.....................................tpMG.cKahVP.YuLYthaGhlsspL.A..cc....TGFo--......DlcllhcALhshF-pDp.SuuR..GpMpsRtLlhacH........sst....lGshsuttl.................acp....lplsh..............pp.psscsap..Dh.pl ..........lpp+h-Flhla-VpsuNPNGDPssu.NhPRh.Ds-pGhGhloDVsLKRKlRshlp..t.........................G....h....pIa..lpppthhs.....................................................................pp..p.phhpth...............................................................phttt.h.t......................................ttptcchtphh..sp...p...ahDlRsFGAVhs.......................s.....thssupl+GPVQls.hupS..l.cPl..h..phsIT+ssssp...............p-sc..cpps............................................pMG.pKahVs.YulYthpGhlssph...A..cc..........TGFo--......DlphlhcALhphF.-pDp...SuAR..Gphps+tllhacH...........sst............LGshsuthl......................act.........lplph...................pt.p..p.ht-.............................................................. 0 87 152 181 +2324 PF05117 DUF695 Family of unknown function (DUF695) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of uncharacterised bacterial proteins. 22.40 22.40 22.40 22.70 22.30 22.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.53 0.71 -4.25 13 259 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 238 0 50 209 9 131.20 24 51.99 CHANGED hppsWpsYhoplsD+.AhhohNlulhcphss..phhspllplplsappssEsGLPss-EatcLtch.s+l.splsAhs.slasG+lhssGptchaFYspps....sslh-sLsph.sashshshQ-DscWDhYFcFLh ..............................................................................ppWt.a.sh..l.s.sp.us.hplNlsh.hph.t.h..pphsphlplplt.apsssEsGhPot-EhppLtplE-plhp.pl....p.shs...slhsGplossGp.tcha.FYsccs....th.phltcsL.....p.p.h....c.h.th..p..h.thpcD.pW...chYhpaLh............................ 0 16 33 43 +2325 PF05128 DUF697 Domain of unknown function (DUF697) TIGRFAMs, Finn RD, Bateman A anon TIGRFAMs (release 2.0); Domain Family of bacterial hypothetical proteins that is sometimes associated with GTPase domains. 20.20 20.20 20.30 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.61 0.71 -4.75 81 1824 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 1554 0 346 1241 405 156.70 29 46.24 CHANGED hpspclhphhcphlhp.p-ppAcpllp+huhtsAsllAloPlsllDllhsshtNhpMlpcluclYGhchuhhuthcLh+plhtsluhhGssclussl....ltshLuh........slsuhluu+hsQGlssGhLTttlGhushchhRshs.......assts...lspllpclhpthcc ........................................stclht.htp.l.....hctpAcp.lp+hAttuuh....h...lA....lSPlsllDhshlshtshclIpcluplYGl-huh..h.u..t..h..c.......Lh.+.h.l..l..h.s.hA..h.sG.....u...s.c..l..sc..cl..........shshh.u......................slsu+l..o...spsuQGluAG...hl...Tt...tlG...h.tu.h....c.hhcshs............a.ttp...tltph.hpphht....p............................................................................. 1 76 196 287 +2326 PF01901 DUF70 Protein of unknown function DUF70 Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Archaebacterial proteins of unknown function. Members of this family may be transmembrane proteins. 24.20 24.20 25.00 25.00 23.60 24.10 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.18 0.70 -5.39 16 72 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 43 0 46 76 4 303.90 22 86.25 CHANGED .hhhllllhlalsluhl................phhhshllhhhuhhash....shlsuphlht.ht..........h.....................chphptpcphshhlllIGllhhhhsllhlGuIPLhcshlRtpLsshhhhhsaLhhlususllhu...phpchthhlhsh.............hLlsLhGYRTsVlsllluhhlhhYYsp+ls..t+tlllshlslhl.lLuluhhphhs.thts.s......lhhRhthThsVL-hIl...shsGshpGclphssl............GPRslIuphlh.shu..VoITsTlFGshhLDFGlhulhl.hhlLGlhhGlha+lspph.+ulYhhhYSlLluYsllGIETGhLDlslhlhYlhuhIlthh ............................................hhhhhlhhhalhh..h..................................hh.hhhlhhhhshhhhh......hhh.sphhht.h................................................hphhhppphhhhhhhlGll.hhh.lhhhtulPLhs.slRhtlss.....shlhhl.uhuhllhu............phpppthhlh.h..............hllsLhGY.Rosll..sllluhllhhYhp.p+lp......h+..llhhh....hhhhl.hlulshhthhh.t.tt..........lh.RhthTh.lhchll.......sh.uhhhGplhhss................sPRthI...........uphlh.hhs..hslTsTlhGshhhDFGhhGl.l.hhllGhhhthhhchtp.h..ptlYhhhhullhuhhhlulcoGhLsl.lhhhahhuhl........................................... 0 9 15 31 +2327 PF05142 DUF702 Domain of unknown function (DUF702) Finn RD, Sammut SJ anon manual Family Members of this family are found in various putative zinc finger proteins. 20.50 20.50 20.80 21.40 20.30 20.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.08 0.71 -4.05 16 166 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 27 0 104 156 0 135.60 46 49.80 CHANGED utuGs.hsCQDCGNQAKKDCsHhRCRTCCKSRGFcCsTHVKSTWVPAA+RRER.Qpthuu.ttt.stt.........ssussc+.Rp.........sttoohssophssusss.thsspp....................aPsEVSSsAVFRCVRVoulDD.u-sE.aAYQTsVsIGGHlFKGILYDQGs-pp ...............s..sshsCQDCGN.QAKKDCsahRCRTCCKSRG....Fc....CsTHVKST.WVPAu+RRER.Qt.hsshttt...................ttsss++.+...............t.t.t...sshsp...sphsss...p.....t.h.th.t.p.............................hPtpVpu.AlF+CVRVoul--..u......-sE..hAYQssVpIuGHlFKGhLYDpGsc..t........................................................................................... 0 16 70 85 +2329 PF05152 DUF705 Protein of unknown function (DUF705) Moxon SJ anon Pfam-B_6448 (release 7.7) Family This family contains several uncharacterised Baculovirus proteins. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.96 0.70 -5.44 27 75 2012-10-03 04:19:28 2003-04-07 12:59:11 7 1 73 0 1 97 1 256.70 34 92.37 CHANGED hpWhsL+p+pu.hl+cHlLhlscasDLphl....uacplchhEaVlFthssp...........pphssppYthplhps.pDsMp-lRpplKpsaKsshLGHsaVlpc+.shYsh..LcEWhV...shh-V.p.ht.pshhachPH...VlVFDLDSTLIT-EcpVpIRDptVY-SLp-L+ptsCVLlLWSYGsREHVscSLpcscLs.sYFDlIlstGppssphsss............hhsDpphchhahspsFahD......hsstps.....LPKSPRVVLaYLpcpGlNahKoITLVDDLhsNsauYDhFVpVp+CP....PlsDWphYH-pIlcNIpcY-shap .........................................................................................................................WhsLc.pts.hh+tHlLhlsphtDlhhl....thpthphhEallhths.p.............hs..pYhhphhps.tDsMp-hRhplKhsa+hshLGHhhllt.pc.shYsh..LpEWhl...sh.pl.p..ht.pphhaphPH...VlVFDLDsTLIT-E.p.pVpIRDstVY-SLp-L+ppGC...lLlLWSYGs+-HVscShccs..c..L..s....sa.FDllls..tGpphtp.ttt.............hhD..p..chhahppsFhhD..........hp.tpt........lPKSP+lVlhYLpchslNahKolTLVDDL.sNsasYD.aVplp+C.....PhpDWphaHp.Ilp.l.pY-t................................................................ 0 1 1 1 +2330 PF05153 DUF706 Family of unknown function (DUF706) Finn RD anon Pfam-B_2804 (release 7.7) Family Family of uncharacterised eukaryotic function. Some members have a described putative function, but a common theme is not evident. 20.60 20.60 20.70 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.86 0.70 -5.05 21 371 2012-10-01 20:28:14 2003-04-07 12:59:11 10 7 259 4 239 388 200 228.40 49 78.66 CHANGED Yssu..p-RV+pFY+ppHp+QTVsashptRtcat..phs+tcMolWEAlEhLNsLVDESDPDs-lsQIpHhLQTAEAIR+D.sc...-WhpLTGLIHDLGKlLhh..F...u-sQWsVVGDTFPVGCpFs-phla.s-oFcsNPDhssPhYsT+hGlYp.sCGL-NVhMSWGHDEYhYpVhK.spSTLPccuLhMIRYHSFYPWH+cGAYpHLhN-cDcchLchV+tFN.YDLYSKScchsDlEpLKPYY.pLI-KYFPs...hlcW .........................................................p-pVcphY+..HppQTlsashp.t+tpat........p.h.s..+.tcMolaEAh-..hLss..Ll..DE...SDPDs...-hspIpHhlQTAEuIR+D.sc...cWhpLsGLIH.....DLGKlLhh.........a.................up..........s..........QW....sVVGDTFPVGCta.sc.....plla...chF.........p.s.NPDhps..shY..sTchGlYp.sCGL-NVhhSWGHDEYhYpV...............hK..s.p.o.oLPppuLhhIRYHSFYPWHp....tG....s.YpcLhs-cDh.ch...LchVptFN..a...DLYSKs.s.p.h....s.....cl..-pL+.PYY.pLI-ca.hPt...hlpW................................................. 1 72 137 192 +2331 PF05212 DUF707 Protein of unknown function (DUF707) Moxon SJ anon Pfam-B_6598 (release 7.7) Family This family consists of several uncharacterised proteins from Arabidopsis thaliana. 24.10 24.10 24.70 24.70 23.40 24.00 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.10 0.70 -5.30 9 266 2009-01-15 18:05:59 2003-04-07 12:59:11 7 11 48 0 153 263 5 249.30 42 72.25 CHANGED hassopP+GAEhLPPGIVss-SDhaLRRLWGpPp..EDlspps+YLlshTVGasQKsNlsusV+KF.S-sFsIlLFHYDGRsotW.--hEWS+pAIHlSs+KQTKWWaAKRFLHPDIVA.Y-YIFlWDEDLGV-pFsAEEYl+llKKaGLEISQPGL-sspG..lsaphTtRRsct.EVH+....................shFVElMAPVFSR-AWRCVWHMI...............QNDLVHGWGLDFsLt+CV.......-sscEKIGVVDSQallHpulPSLGs...................QGpucputushpuVRsRsptEhphFpsRhusAEKph ........................................................................h.............LP.GIl..posh.h+..L.as.sp..............pph........t..p.LlshsVGhpQ....+t.lsthV....pKF...s...............p......sFslhLFHYDGpsstW.pph-WSpp...AI.Hlustp..QTKW.....WaA.KRFL..HP..DlVu.Y-YIFlWDEDLsV..-pFssc....c.....Ylcll+ccGLEISQPuL-sspu...lpaphT..hRp...t.......s........t..clH+....................tt.....t....th.s.t.t....sphPPCsu..........aVEh.MAPV.FSRsAW.+ClWH.hI...............Q.NDLlHGWGLDatLthCs......................tstpc+lGVVDupallHp.s.l..PoLGs..............................ps..ps.t.....p.s...hs.....h....tlRtpshhEhthFppRhtpA.t........................................... 0 28 91 123 +2332 PF05166 YcgL DUF709; YcgL domain Finn RD, Bateman A anon COG3100 Domain This family of proteins formerly called DUF709 includes the E. coli gene ycgL. Homologues of YcgL are found in gammaproteobacteria. The structure of this protein shows a novel alpha/beta/alpha sandwich structure [1]. 25.00 25.00 26.10 25.40 22.00 18.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.32 0.72 -3.85 61 977 2009-09-11 13:36:28 2003-04-07 12:59:11 8 2 970 1 147 486 68 71.60 58 75.14 CHANGED CulYKSsKKsshYLYVt+..........+DsFscVPEsLhphFGpPphVMhlsLssc+pLApsDsccVhpslpppGFYLQhPPs ......CsIY+SsK+-pTYLYVcK...........+DDFS+VPEtLhcuFGp.PQhuMh.L.sL-GRK+LssADlEKVKpALsE..QGaYLQLPPP.......... 0 27 60 108 +2333 PF01902 ATP_bind_4 DUF71; ATP_bind4; ATP-binding region Enright A, Ouzounis C, Bateman A, Yeats C anon Enright A Family This family of proteins probably binds ATP. This domain is about 200 amino acids long with a strongly conserved motif SGGKD at the N terminus.In some members of this family e.g. Swiss:Q12429, this domain is associated with Pfam:PF01042. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.19 0.70 -4.98 6 870 2012-10-02 18:00:56 2003-04-07 12:59:11 12 20 725 8 474 928 144 206.10 27 57.01 CHANGED hKsAsLaSGGKDSshALYhAhcc.hEVcaLVslho-Nc-.SaMaHssNlclscl.AEulGIPllchhspG.cEcEV--LtshlcpLc...V-ullsGulhScYQKpRI-pVCRElGlKshsPLWc+sP.chh.EhlctGFcshllAVsAhGLscsWLGRclc+chlD-LpcLsE+YGIc.AGEGGEFETlVLDuPlF+tRlElcph-thWctphGahtIc+tcl ............................................................................hh..hSGGKDS...shs.h......hp..h.hpt.....sh.p...l...h.....h...L.........h.s..........h.......h........s............c.....t................c......Shhap.....shs.h.p..hlph.A....c.uh.s....l.....P..L............h...........t....h............t...........h..........p............s...............................p..............p..................c....................h..c...s..l....h.p...h....L....p....p...lp........................lc..u...l...shGsl.....h.s.....p.h.....p..+s.hh.-pl.................spc..l..G.Lpsh.hPLW...........p................c...............s......p...........p...p..........Llp...-h......l...........s..........t...............Ghcs.hl..lpVs.u..h..G.L......s.t.p.a......L..........G+......p..l...s..............p.....h.....h.t.p..L.hp..h........................s....l......c........ss........GEsGEacTh..VlDu..P..lF....c.p.p.l...ltt..p...h.....................................h...................................................... 1 171 291 398 +2334 PF05164 ZapA DUF710; Cell division protein ZapA Finn RD, Mistry J anon COG3027 Family ZapA is a cell division protein which interacts with FtsZ. FtsZ is part of a mid-cell cytokinetic structure termed the Z-ring that recruits a hierarchy of fission related proteins early in the bacterial cell cycle. The interaction of FtsZ with ZapA drives its polymerisation and promotes FtsZ filament bundling thereby contributing to the spatio-temporal tuning of the Z-ring [1][2]. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.79 0.72 -4.02 162 2881 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 2807 8 629 1565 1385 96.40 23 86.52 CHANGED plp....l.pIhG+pYplsssssp-ctLppsAphlcp+hp.clppp..ss.thssp+lhlh..uALsl.......uc-hh.......p....tcppt....tphpp....................................................................phppc......lcpLt............................ppl- ...........lslpIhGcsaplsh.s.s.-.........p...cct.L..ppsAchlsp+lp..-l+pp......st...shs...spc.l.....sll..sALNl.......spEhh...........p.......hcpct......pphtp..................................................................phppp.lp.ltt..................................................................................................................................................................................................................................................................................................................................... 0 210 399 517 +2335 PF05167 DUF711 Uncharacterised ACR (DUF711) Bateman A anon COG2848 Family The proteins in this family are functionally uncharacterised. The proteins are around 450 amino acids long. It is likely that this family represents a group of glycerol-3-phosphate dehydrogenases. 25.00 25.00 28.30 28.20 23.80 22.70 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.23 0.70 -5.76 53 1207 2012-10-01 23:28:04 2003-04-07 12:59:11 7 3 1155 2 247 890 31 418.00 60 95.97 CHANGED M..p.ppIhETlpMlppcpLDIRTlThGIoLhDChssshp.htpplapKIs.phApsLVcsuccl....ppcaGIPIlNKRIoVTPIullut.uhtt.psh......lphAcsLDcAAcclGVsaIG.GaSALVpKGhopuDctLIpSlP-ALssTc+VCuSVNluoT+sGINMDAVpphGclIKchAphost..uhusAKLVVFsNusp.....DNPFMAGAFHGlGEs-sVINVGVSGPGVVcpAlcch..c.ut....s.............hptlu-.pIK+TAFKlTRhGpLVGppsuc+LsV...sFGlVDLSLAPTPshGDSVAcILEphGLppsGs.GTTAALALLNDAVKKGGhMAsopVGGLSGAFIPVSEDssMlcAlppGsLsl-KLEAMTuVCSVGLDMIAlP.GDTsscTIuulIADEsAIGhlNsKTTAVRlIPssG+csGD.V-FGGLLGpu.slh.lsphsuttFlp .........................................................................hphpplhETlpMIccpphDlRTITMGISLLDClc....sDhpcssc+I.YpKIT.opApsLVtsG-cI....tsEh..GIPIVNKRlSVTPIulluu.As.ps...sDa...................VslA+sLD+AAK...clGVsFIG.GaSALVpKGh.ppuDchLIpS.IPcALupT.-hVC.S.S.VNlGST+.....uG.I.NMsA.VppMGcl...IKcsAchos.................hGsAKLVVFs..N.A.V.E.....DNPF.MAGAF........H....G....VGE..uDsl...INVGVSGPGVVKcALEcl.....+Gp...s................a-hlAE..Tl.K.K.T.AFKI.TR.hGQLVGp...AScRLGV......sFGIVDLSLAPT..PAlGD..SVAclLEEMG.L..E......plGoHGTTAALALLNDtVKKGGlMAsspVGGLSGAFIPVSEDpGMIs....AVpsGsLsLEKLEAMTulCSVGLDMIAIP.GDTsAc..........TIuuhIADEAAIGhINpKTTAVRlIPs.GpcsGDhlEFGGLLGhA.PlM.VsthSs.sFl.s....................................... 0 100 167 212 +2336 PF05168 HEPN DUF712; HEPN domain Bateman A anon COG2250 Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.33 0.71 -4.21 92 1339 2012-10-01 22:14:54 2003-04-07 12:59:11 9 14 572 11 648 1274 106 115.30 18 27.05 CHANGED hcthlcpAccsLcsAch.hc.ps..hYshusFtupQAsEhslKAlLhp..hstp..s+sHsltpLlp..hlpphh........spphhcth.......phLpphhhtuRYsshh...........scc-AccslptAcp....llch.h......cth .......................................................hhppAppplptAc..h.....h....p...ps........ha.p......h..u.s..ap.sppu....sEpslKA...h...l...ht.............hsh....p........s........p.....s....H.s.l.t..t.Lhp......hlpph..................pph.tp.th..............phLpph..h..h...t...s..R..Ysshh......................spppsp.phlphspp....lhph.h...h.................................................................................................................................................................................................................................... 0 263 417 547 +2337 PF05206 TRM13 DUF715; Methyltransferase TRM13 Wood V, Bateman A anon Pfam-B_10143 (release 7.7) Family This is a family of eukaryotic proteins which are responsible for 2'-O-methylation of tRNA at position 4 [1]. TRM13 shows no sequence similarity to other known methyltransferases. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.98 0.70 -5.14 29 265 2012-10-10 17:06:42 2003-04-07 12:59:11 9 15 192 0 183 340 36 237.10 27 56.06 CHANGED hQQuSllGphcchph...L........ssss.....................salEFGAG+GcLSpalsp..sh.pp..............................stspalLlDRsspRh..KhDs+lppcp...................hlpRl+IDIcDLsLstl.p.................t...ptpth.......luluKHLCGsATD..LoL+Cllssshtpt....................................tsp..htGllIAhCCHHhCsacpass.........+ca.ltphGhs.pp.-Fphlp+hsSWAVsGpc.............................................t..tp..ttlshp-RcplGhhsKRlIDtGRlhalcpp.....Ga.pspLhhYsppslohENVsLluh ..........................................................................................QpuS.llt.hpp.thl........t.tp.........................salEFGAG+G.t...Lothls..sh.t....................................................................ttsphlL.l..-R.ts..Rh...K.hDtp....hp.ppt.............................................hpRlphDIpc..Ls.Lstl..........................................................................................t....tt..h..................lsluKHLCGsATD..hsL+Clhps.h.........................................................................................htulslAhCCH...HhC..pappass..........ppa..h..p..t.huhs.....tt.pFthhpphsS.WAssu.t.........................................................................................t....hs......tc+.......ptlGhhs+tllD.GRl.aLppp.............sh..pstLh.Ys..p..lo.ENhhlhs.h............................................................................................... 0 75 106 154 +2338 PF01904 DUF72 Protein of unknown function DUF72 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this family is unknown. 20.80 20.80 21.30 21.10 20.50 20.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.87 0.70 -4.61 218 2485 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 2050 4 731 1925 168 236.10 27 85.81 CHANGED hYPpshsp.tp.pLp..hYupp..assVElNuoFYs.h.s.p........shtpW.tpp....sP..c.s.FpFslKss+hlT.H...t...ppLp...............................sspsh.ppF.....hp.slpsL...t..s+...LGslLhQhPP..s..hphsspphptlhphlsph...............................s........hphAlEhRHto.ah........s..th.hphLc.c....tslshlhsDsst......hs.hs.................hss............shsYlR..hHGpsth.................h.htYss......ptL...ptaupclt.t..htt...u......................cc.salaFsN.stp.s...pAstsAhpLtchl .............................t..tp.pLptYupp..FssVElNsoa.......Yu.h.p..c.........sht+W..tp.p....oP....-..s...FcFshKhspslT..H.......p.ttlp...............................................pspct.l...ppF.....hp....tltPL...........t......s+..lG.llhQh...Ps.......s.......hths....s.cpl.stLh.phhpphs.....................................s.....h.shuVEhRH.s.aa......p..pph.hphLc.p....ps..ls.tll....sDptt.....sh.s..ht...............................................................ss........................shshlR..hpGpspht....................h..tYsp................ptl.....ptasp.+lt.p....htp....ts........................ppsalhhps.sstu....pAspsAtpLhp................................................................ 1 225 443 602 +2339 PF01905 DevR DUF73; CRISPR-associated negative auto-regulator DevR/Csa2 Enright A, Ouzounis C, Bateman A anon Enright A Family This group of families is one of several protein families that are always found associated with prokaryotic CRISPRs, themselves a family of clustered regularly interspaced short palindromic repeats, DNA repeats found in nearly half of all bacterial and archaeal genomes. These DNA repeat regions have a remarkably regular structure: unique sequences of constant size, called spacers, sit between each pair of repeats [2]. It has been shown that the CRISPRs are virus-derived sequences acquired by the host to enable them to resist viral infection. The Cas proteins from the host use the CRISPRs to mediate an antiviral response. After transcription of the CRISPR, a complex of Cas proteins termed Cascade cleaves a CRISPR RNA precursor in each repeat and retains the cleavage products containing the virus-derived sequence. Assisted by the helicase Cas3, these mature CRISPR RNAs then serve as small guide RNAs that enable Cascade to interfere with virus proliferation [3]. Cas5 contains an endonuclease motif, whose inactivation leads to loss of resistance, even in the presence of phage-derived spacers [4]. This family used to be known as DUF73. DevR appears to be negative auto-regulator within the system [2]. 22.70 18.80 23.30 19.50 22.60 18.70 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.54 0.70 -5.55 28 278 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 233 4 129 276 8 245.00 21 77.26 CHANGED lsuRhhlpscuLNtsEusG.NhscpR+ssll.hppt.......aplthVPslSGpultHtattsLsclutpp...shslschscph...thhhphsstp.h..............t..thEpphlppsslpDltGFLh...........s.tsps.............l+RsSslphoahlPs.cthp.......h-sthHsRas................ttppppQslaphEtuSulYshohslDlstIGhsps.................................................................p.phslpsc-+.pcRlcsulcALh.hLsth.hGA+psRhhPlht..phVsuloc.tPhss.s..sth.scYl-psh ................................................................................h..hph.hphpuhNh...s-.u.....h.....G.....Nhsph++l.................hpsp.hshlStpul+athhptlt.c.hh.t.......p........h....t...t........tthhpht.......................................................................................................ppsslpphhptDlaGaMp.......................................stpsts..................hpRsSsl+lo.hlulpsht..............h-hpF..tsphs......................................t......ttp..ss.slaphEhp.p.uhYphohslDLcplGh..s.pt.............................................................................t.htl..spc.E+..tcRlpthlculth.hst.s.htu.+.hspht..Phh....hlhsh.p.......................h........................................................................ 1 58 86 103 +2340 PF01906 YbjQ_1 DUF74; Putative heavy-metal-binding Enright A, Ouzounis C, Bateman A anon Enright A Family From comparative structural analysis, this family is likely to be a heavy-metal binding domain. The domain oligomerises as a pentamer. The domain is about 100 amino acids long and is found in prokaryotes. 22.40 22.40 22.40 22.50 22.30 22.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.20 0.72 -3.61 171 2580 2012-10-01 20:55:33 2003-04-07 12:59:11 12 6 1979 20 541 1427 512 98.20 39 85.64 CHANGED MlloTTssl.tG+cIhchhGlVtG..psltup...slh+DlhAul+sllG...GchpsYp.chLpcARcpAlpchtpcApplGAsAVlulch-hssl......sp..uhh..lhAhGTAVpl .............................................................MhhoTTssl.tGppIhchhGl.VtGpslhut......Nlh+DhhAu..l+.sl.lG............Gc...suY...p.c.Lp......cARc.AhpchtppActhGAsAlVGlchDhpsl...........up.....uM.l.....Vss.GTAVp.................... 0 198 364 467 +2342 PF01910 DUF77 Domain of unknown function DUF77 Enright A, Ouzounis C, Cerutti L anon Enright A Domain Domain of unknown function. The crystal structure of two of these members shows that this domain has a ferredoxin like fold and is likely to exists as at least homodimers. Sulphate ions are are located at the dimer interfaces, which are thought to confer additional stability. Although the function of this domain remains to be identified, its structure suggests a role in protein-protein interactions possibly regulated by the binding of small-molecule ligands [1]. 24.90 24.90 25.10 24.90 24.80 24.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.81 0.72 -4.31 104 1948 2012-10-01 20:40:01 2003-04-07 12:59:11 12 3 1567 27 505 1121 132 91.30 29 87.80 CHANGED sslsllPlG..s.....ssSlupaVApshcll....ccuG.lpaplssMuTsIEG-.hDclhpllccsa-tshptGusRVhoslKlDpRpD.pp.tsh...ccKlpsVc .....................hslsll..Phu...st.......sssVsphV.scslcll....p.p..uG....lp.a............plsshtTsl........EG-.hc-lhcllppspEhshp.t.tssRVhoslKl..chR.sp...hsh....ppKlpph............................ 0 171 325 444 +2343 PF01918 Alba DUF78; Alba Enright A, Ouzounis C, Bateman A anon Enright A Family Alba is a novel chromosomal protein that coats archaeal DNA without compacting it. 21.40 21.40 21.60 21.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.03 0.72 -4.38 126 745 2012-10-03 06:24:24 2003-04-07 12:59:11 16 6 348 31 496 820 75 66.30 25 38.49 CHANGED splhVss.tpshhshlptshphl....................................................pps...tcpVhlpuhG.pAIs+AlsluEhl+pch.......s.hp.lpplshss .............................................plhlss.tp.ltsalthshphh.........................................................pps...tccVhlpAhG.pAIs+sVslAEllKc+h..........shhp.hpplp...h......................... 0 148 273 408 +2345 PF01923 Cob_adeno_trans DUF80; Cobalamin adenosyltransferase Moxon SJ anon Enright A Domain Cobalamin adenosyltransferase This family contains the gene products of PduO and EutT which are both cobalamin adenosyltransferases. PduO is a protein with ATP:cob(I)alamin adenosyltransferase activity. The main role of this protein is the conversion of inactive cobalamins to AdoCbl for 1,2-propanediol degradation [1].The EutT enzyme appears to be an adenosyl transferase, converting CNB12 to AdoB12 [2]. 21.30 21.30 21.30 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.92 0.71 -4.45 180 2946 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 2347 41 715 2068 1103 162.40 34 75.35 CHANGED IYT+TGDpGpT...uLhsGp...RVsKsc.RlcuhGslDELNohlGlsh..sth..............................pplpph..LpplQpcLFclGu-LAs..s.....t............hcls.pp.plptL......EppIDphstp..lssLcpFlLPGG.......stsuAtLHlARTlsRRAERthVsL.....tppps......l.ssts....lpYLNRLSDhLFlhARhhs .........................lYT+TGDpGpT...sL...h...sup...RVsKsc..Rlpu...hGslDEhNuhlGhsh.sth...............................tpphpth.......LtpIQppL.Fsl.u.u-Lus....st.................................htls..pp..plptL................................Eptl..D.chstp...hs..s...L..cp..FlLPGG........stsuAhLHlsRTls..........RRAERthspl.....................tppps.............l..pstl..............lpYlNRLSDhlFlhuRhh................................................................................. 1 266 472 598 +2346 PF01925 TauE DUF81; Sulfite exporter TauE/SafE Enright A, Ouzounis C, Bateman A anon Enright A & Pfam-B_3578 (Release 7.5) Family This is a family of integral membrane proteins where the alignment appears to contain two duplicated modules of three transmembrane helices. The proteins are involved in the transport of anions across the cytoplasmic membrane [1] during taurine metabolism as an exporter of sulfoacetate [2]. This family used to be known as DUF81. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.43 0.70 -4.92 912 12351 2012-10-03 02:02:08 2003-04-07 12:59:11 14 23 3983 0 3751 10142 8371 231.90 19 89.56 CHANGED h.h.h..s..uhh..sG.hlsulhGh..GGul...lhlP..hLh......h.h..........h......................ssth.Ah..usshh....shhh..sshs.u...shs..a....h+....p.t.................l....s....h.p.h...........h..h...h....h.h.h......s.ulh.Gu...hl.G........uhlht..hls......sp..h.............h....phhhuh....hl.lhhuhhh...hh...........ptt...............................................ttt...................hhhthhhhhhh............u.hh.hGhh.uGhhGsG........uGshhsshhh..hhh...s.hshpp..ssuos.......th.hh....hhh........slsu.hhs.....a.............hh...........hGhls.....h...hhh.....h...lhhu.....ul...l..Guhl....Gsp.l....spcl....s.s...t..h.....l+hh...hshll..l.hhu....lth ...................................................................................................................................................hh.hhhuhh...uG..hlsul..hGh...GG..Gh.....lhhP.hLhh..h.....h........................s.sth..Ah......uss..hh...........thhh...sshs.u......sht...a........h+...........p...t............................................................l.......s.........h...p...h.....................h....hh.h...h..h....s.shl.G.u.......hl..G...............uhlht.......hls........sp......h..............................................................l..phlhsl.....hl....lh.h.ulhh.........lh.....p.p.h.........................................................................................................................t..t..............hhhphhh.hh.hh....................s..hh.hGhh..sGh...h...Gs.G....sGshhsshhh........hhh......sh.shpp.....ssuss....th..hhhh.s......................slsu.hhs........a..............hh........................................tGt.ls...........h....hhh.........h.........hhhu.sh..l....Guhl....Gsp..l.......s.t..p.h.......s..s......p.h.....l+hh.hh.h.lhh.hhuh................................................................................................................. 0 1230 2477 3207 +2347 PF01927 Mut7-C DUF82; Mut7-C RNAse domain Anantharaman V, Enright A, Ouzounis C, Bateman A anon Enright A Domain RNAse domain of the PIN fold [1] with an inserted Zinc Ribbon at the C terminus [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.07 0.71 -4.23 57 605 2012-10-03 20:43:45 2003-04-07 12:59:11 11 11 493 0 310 595 66 132.00 26 50.48 CHANGED .RFllDshL.GpLA+aLRhhGaDTh....ass.ch......................sDpplhplAtpEpRIlLTRD+tLhcRtthptt....hh..........................................l.pspcsppQLtElhp+hslph.phpP.hoRChpCNu......Lptls+pplhtp...............lP.tshphhpp...FhpCssCt+lYWcGoHacchpphl .................................................................+FlsDshL.G.p.L.A.+hLRhh.GaDsh.......ass..sh..........................sDtplh.ph.AtpE.pRll..L..T...R..Dc....t....L....hc...+pthhtt.......hh.............................................................................l...pstp..p.pQhhclhp.ch.tl.t..tht..hsRC..CNs........L.....h.t.t.t.t.h....t........................................................l..h.h.t..t......a..C..CtplaW.GsHhtph.t..h........................................................................... 0 106 212 273 +2348 PF01930 Cas_Cas4 DUF83; Domain of unknown function DUF83 Enright A, Ouzounis C, Bateman A anon Enright A Family This domain has no known function. The domain contains three conserved cysteines at its C terminus. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.20 0.71 -4.32 57 1113 2012-10-11 20:44:43 2003-04-07 12:59:11 12 15 868 0 454 1522 488 163.70 22 56.47 CHANGED sGshlpYYhlCcRclWLhs+.pIsh-pssp.tVph..G+hlHE......psYp+..pp...+-lpl..s....shplDhlc....ttphhl...........tElKKSp+h.......pcAs.chQLhaYLahLc.cpGlpsp.GhLpYPcp++ppc....VpLs-cpccclccslpcIcpllppp.pPP.spcpp.hC.+pCuYh-hCas ..................................................................................................................h..shlp.ahhC...Rphhlhth...t..t.h.t...p....p........pht.......G....phlHc.................................................p...p.....tt..............cpl.l..p...................tuhh.D.hlc.........tt.tphhs..........................lEhK......c....u..ctt..............t...p...s....t....chQLsh..shh......L....c...h...h....G..h...p..l.p............G...........h.l..h..Y........s.....p......p....+....+....p.h.p............V.t.h....s....p....p...h......c....p.p....l....h......p..h..l....p.......p.....l....c...p.l.h........p....p......t.......h.s........P............s....p....t.p......p.....hC...ppCuhh.chCh................................................................... 0 183 320 397 +2349 PF01931 NTPase_I-T DUF84; Protein of unknown function DUF84 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this prokaryotic protein family is unknown. 20.40 20.40 20.60 21.10 20.20 20.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.59 0.71 -4.52 20 1061 2012-10-01 20:37:09 2003-04-07 12:59:11 13 5 1021 15 237 655 51 158.90 48 84.25 CHANGED uSTNPAKIpAVppAFspsFstt.shclpGVsVsSuVsDQPhus-ETppGAhNRs+pAhphhspA-ahVGlEAGI-....s...thsFuWhVl.stp....ppG.u+SAuh.LP.hllcclh.cGcELG-VMDElsGpcNl+pKtGAIGlLTsspLoRoulYcQALIhALlPFhssph ..............................AosNPAKIpAlhpA...Fpcl...........Fu...pt......sscIsuVuVp.S.GVs.-QPhGsEET+tGApNRV...t....NA+..c.............h...h..P.........p..........A.........D.....aaVulEAGI...-..........s.................shT.F.uW.sV.l-sss........pRG..EuR.SAoLsLPsslLpcl+..p.....G..ctLGsVMsc..hsGh..cpItp..KpGAIGlFTsG+LTRsSVYaQA.VILALsPFhNs..lh................................. 1 59 118 177 +2350 PF01934 DUF86 Protein of unknown function DUF86 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of members of this family is unknown. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.21 0.71 -4.46 147 1619 2012-10-01 22:14:54 2003-04-07 12:59:11 12 7 989 2 590 1431 169 105.20 20 82.04 CHANGED lhcslpplpphh........................................shs.hcpFh.p....sthhppAlh+plpllsEushclspc............................lhspp..thsa+phsuhRNhllHtYhs.lDhphlaphlpp.cl.......................ssLpphlppl .........................................................................h..h...........................................t.t..hppah..p......sth...h...h.Ah.p.lphlhEushc.lspt.......................................lh.sp.ps.t.s.h.pphhshRsh....l.sHcYh.s...lc.h...chlap.hlpp.pl.......................s.lpphht..h..................................... 0 206 423 515 +2351 PF01935 DUF87 Domain of unknown function DUF87 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this prokaryotic domain is unknown. It contains several conserved aspartates and histidines that could be metal ligands. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.62 0.70 -4.54 46 1158 2012-10-05 12:31:08 2003-04-07 12:59:11 12 19 801 0 504 4373 796 209.10 16 38.09 CHANGED lp..lGpLh..ssss.............ls.ltlDl.schls+HhAIlupTGuGKSpssulllpcll.........phss..slllhD.HG.EYspht..........................tspsplhsst........................lpIshhplshp-ltplh.h....t.ssss.ppphlppshpphp..................pphtphshpchlpthhppht......................................................thpphptpshttlhpplpphhpphtth.........tshhptl.....cssplsIl..-lut.....lsptttph.lsuhlh+pl .................................h..hGt........ttt...........h....sh..l.sl...pc.hl.......s.c....H.hu.l.h...GsTGoGKS.p.h.l.p...h..L..lc.phh..............................ttts......phll.lD.s..c.G....E..Yssht................................................................................................t..p..h..t...h..hp.t..................................................................................h.h.l....s....h...................h.......p................p......c.....h.............t....h...h..........................................................hh.....h.h....h............................................................................................................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 154 317 410 +2352 PF01936 NYN DUF88; NYN domain Enright A, Ouzounis C, Bateman A anon Enright A Domain These domains are found in the eukaryotic proteins typified by the Nedd4-binding protein 1 and the bacterial YacP-like proteins (Nedd4-BP1, YacP nucleases; NYN domains). The NYN domain shares a common protein fold with two other previously characterized groups of nucleases, namely the PIN (PilT N-terminal) and FLAP/5' --> 3' exonuclease superfamilies. These proteins share a common set of 4 acidic conserved residues that are predicted to constitute their active site. Based on the conservation of the acidic residues and structural elements Aravind and colleagues suggest that PIN and NYN domains are likely to bind only a single metal ion, unlike the FLAP/5' --> 3' exonuclease superfamily, which binds two metal ions. Based on conserved gene neighborhoods Aravind and colleagues infer that the bacterial members are likely to be components of the processome/degradsome that process tRNAs or ribosomal RNAs. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.13 0.71 -3.91 149 2912 2012-10-03 20:43:45 2003-04-07 12:59:11 13 43 1840 1 1150 2564 397 149.80 21 49.81 CHANGED +lAlalDspNhhhs.p..............h.chcplhpplpp...........tGp.....lhhtpsYss...............tttt..............ppht...shL......pphG..........h.sl.......phhshhp..............sK.sshDltlslDhhchs..sph...DshlLlSGD..uD...Fs.llpplc.ppG.hpl.lhu................t.tssps......Lhsuss.pal........lp .................................................................................................................................................................lulhlDspNh.h..t.........................................h.s.h.ptlhpthtp..............................................hsp......lhhthhYss...................tttt.....................................pshh.....phL.............pphu........................................................h..tl.................ht.h.shht......................................tK.sssDh.tls..lD.hh..c.hhhp...pph................Dsh.lll..SuD.......uD.......Fs...ll.pplp..cp..G..hpV.hshu..h............t.hsspt......lhpsss.pah...t................................................................ 0 388 740 969 +2353 PF01937 DUF89 Protein of unknown function DUF89 Enright A, Ouzounis C, Bateman A anon Enright A Family This family has no known function. 25.00 25.00 26.10 26.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.29 0.70 -5.66 86 886 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 573 7 557 885 43 317.00 21 73.89 CHANGED shtcphPsll.ppslsslch.........spss...............pptcpllpphtp..lhtchppspshssh..............................................htshlaRclhphhs..........phDsFpchKp.ps.ppuhphl.pltpp..hcphtsp......................hhpphlclulhGNhhDhulhs.........pp.......ttphppthcpsl...llsD.hsphhptL...................ppspplshlsDNuG.ElhhDhl.LhchLhc...huscVhhtsKshP.hlsDsThcDhp..................................................tttlpc....thscllhp...........u.ss...ssshshtch........ss-lhptlp..pu-LlIhKGphNYcpL.ps-tpht.p..................lhhLhssKs-slssh..sshscpl ..........................................................................................................pphsh.hh.pphhtt......................t....................t.hpthhtth.p...hh.p..h.t..p..t.....h........................................................t.shh..h.....cplhph.ht...........hDs.a.t.c.Kp.pp.......p.....t.uh.p..hh....pl.tpp.....hpphtpp.............................................hhphlchslhGNhh.Dhuhhs....................pp......................ttthtpthp.p.sh.................llsc..hsphhptL............................................tttppl....hlhDNuGhElhhDhl.l.......schLlp.......hu.......s.................cVhhts+shP.hlsDsT.hpD..ht......hh.t.....h.......................................................ttthpp.........t...t..s..p.lh.h.p.................................s..ss.....ssshshtph.....................................s.clh.pt.hp.....puc......LllhKGthNaRpLhssh........th..t......................................lhtLthhKsphlsth...t.............................................. 0 199 329 463 +2354 PF01939 DUF91 Protein of unknown function DUF91 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this prokaryotic protein is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.24 0.70 -5.20 4 795 2012-10-11 20:44:43 2003-04-07 12:59:11 11 12 704 2 245 677 167 188.00 36 75.29 CHANGED +ttlllllApCpV.YEGRA+ScLspG-RlllIKPDGoFhlHpD.+Kh-PVNWQPPsS..h.clptsph.lhSIRRKPcEcLcVpIpElatsosa.scDhc-LslsGsEu-MuchIhcpPpLIEEGF+PltREYtlspGhVDILGpDccGshVllELKRR+A-lpAVpQLKRYV-.h+c-hs.ttVRGlLVAPSlTpsAccLLc-EGLEF+cL-PP+ss....++shcstphchF ......................................................................h....hhlhupCpspY..G.Rh.pupLs..u.RllhhKsDsol.lHss.tuhcPlNW.sss.s..h.............t.......p......................t..................t......................................l.......h...h.t......t.......ss-.L.l.ht.h........cls......tssL.hs...G.lE..scLQcLLs-p....p.h.l.....u...p.G..hpll.c..R..E.......Y...........T.....s.....h.....G...s.....VDlLspD....t.s....G.s.h.Vs.lE..lK.R+...u....p.h......-uVc.QLsR...Y...l-..hL.sc........-...............h.......h...s.........Vp..GlhsA.pp.Ipsp.A.+h.L.h.p-+Glchlhl-..h........................................................................... 2 65 167 218 +2355 PF01940 DUF92 Integral membrane protein DUF92 Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family have several predicted transmembrane helices. The function of these prokaryotic proteins is unknown. 20.40 20.40 21.70 31.90 19.90 19.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.42 0.70 -5.20 79 583 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 495 0 375 586 324 246.10 29 79.32 CHANGED ulhlsh.lLuhluh+tp..hLshuGhluAhllGshlhshsGhthahsllsFalhGShsT+hthppKpphul..s.ptcs.GpRshtpVhuNuhsuslhAlhhsh........................tstsh....hhhuahuuhAsstuDThASElG.thhspps.hLIT.Th.+.VssGTsGuVSl.GshAulhGuhlluhsuhhls....................hssth.....hhhl...sluGhlGohlDSllGAslQt..............................thshLsN-hVNhlsThhu...ullu .......................s..hhsh..hls.h..h..u..h.p..p+..sLshuGhhsuhllGhh..hh.hhs...u....hthhhhLlsFFls.........uohhTKh..p..tph..K......pphsh..............s....pcs....GpRsh............hp......VhuNussusl.hulhhhh...........................................................hhssh..hhhu.hl.u....uhAsssuDTauS.ElG.hl..t.p.pP.hLIT...Th..+............hVs.sGTsGGVolhGhhAuh.hGuhhluhsshhhh.......................................hsh........hhhl......sluGhlGollDSlLGAslQt.....................................................................................................ths.hLsNpsVNhlsshhsuh.................................................................................... 0 120 231 311 +2356 PF01941 AdoMet_Synthase DUF93; S-adenosylmethionine synthetase (AdoMet synthetase) Enright A, Ouzounis C, Bateman A, Moxon SJ anon Enright A Family This family consists of several archaebacterial S-adenosylmethionine synthetase C(AdoMet synthetase or MAT) (EC 2.5.1.6). S-Adenosylmethionine (AdoMet) occupies a central role in the metabolism of all cells. The biological roles of AdoMet include acting as the primary methyl group donor, as a precursor to the polyamines, and as a progenitor of a 5'-deoxyadenosyl radical. S-Adenosylmethionine synthetase catalyses the only known route of AdoMet biosynthesis. The synthetic process occurs in a unique reaction in which the complete triphosphate chain is displaced from ATP and a sulfonium ion formed. MATs from various organisms contain ~400-amino acid polypeptide chains [1]. 19.40 19.40 19.50 37.60 19.30 18.50 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.30 0.70 -5.97 48 243 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 220 0 139 229 20 380.20 42 98.33 CHANGED hRN...IsV-phpptsl-cpplElVERKGlGHPDoIsDuluEulSRuLs+tYl-+a.GslLHHNsDcs.lVuGputPcFGGGcVlcPIaIlluGRATpch......ts.p......lPlssIAlcAA+caL+cslthLDs-pc.lll-s+lGpGSsDLp-VFpcspt...lPhANDTSFGVGaAPLSpsEplVhpsE+hLNSp.hKpchPtlGEDlKVMGlRcsccIsLTlAsAhVDRalsshccYhphKppl+ctlt-hApph.....s..pccV.....cVtlNTAD...sh-csulYLTVTGTSAEtGDDGuVGRGNRsNGLITP.RPMShEAsuGKNPVNHVGKIYNlLuspIApclsccV-G.lcEVhVcLLSQIG+PIDpPhlAsspllsccG..hshsclcpclppIl-ctLsslsplocpllcGcl .......................tNIhVp.hptpslpcpplElVERKGlGHPDoIsDulAEtlStsLuphYlc+a.GsILHHNhDcs.lVuGputPcFG.GGcll.cPIhIllsGRATpch......tspp....lPltplslcAA+caL+cslp.LDs-pc.l.ll-s+.........l..u.p........G.......S.........s..........DLh-lFpcttp..........lPhANDTShGVGaAPLopsEclVhpsE+h...LNop.t...h.Kp.....chPtlGEDlKVMGlRpscclsLTlAhAhls+alsshpcYhphK-plpptlpchupch...........s..tppl...........pltlNTuD....t.cpsslYLTVTGTSAEtGDDGpVGRGNRsNGLITPhRPMShEAsAGKNPVsHVGKIYNlLuppIAcclhcpl.cs.l.c-shVhlLSQIG+PIscPplsslpllscpu......h.t...phppclptIscc.LsslsplschllpGc.h.......................... 0 34 81 112 +2357 PF01944 DUF95 Integral membrane protein DUF95 Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family have several predicted transmembrane regions. The function of this family is unknown. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.33 0.71 -4.63 172 1288 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 1099 0 417 1025 82 170.70 20 68.88 CHANGED lshllalhuhlhGh..hhsh......t.sphhpth..lst.h.thh.........................................h...hhtlhhNNh.tlshhshhhG...............lhh...Glsslhll.hhNGhhlGhshu..hhhppsh..........hh.hhthl....lPHGllElsulhluuus..Gltlu.................ht.....lltsth.................h........pphpsht...ptht...ptstlh..l.sshhllhlAuhlE.u..alosh ........................................h..hhhhhhuhlhGhhhs..........t.sph.pt.....hst..htphhtt.....................................t.t....h....hhtlhhNNh.h....lshhhhhhG.....................hhh.G.ls.sl..hhL..hhsGhhlGhhhu...hhhppst...................hhhhhhhl......lPHGllElsulhluuuh...uhp.lu.......................ht.......llpsth.........................h...............tct.p.slt.......pthh.......phhthh....l.hhhsllhlAuhlE.ualos.h....................... 0 141 283 372 +2358 PF01947 DUF98 Protein of unknown function (DUF98) Kerrison ND anon DOMO:DM04314; Family This is a family of uncharacterised proteins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.94 0.71 -4.46 20 217 2012-10-01 19:33:20 2003-04-07 12:59:11 11 6 206 6 95 274 193 150.60 30 71.93 CHANGED LLGDGSPTRHLpLL.TucpspV-lIsMts.s..sss..........spuPpcVppL.......ssPllRRQVWLps......uupsLAaApSWWNtpcs-caLps....+stPIWpsLspt+sELaRElcGltLspuc.hLEptFs.pcGPFWuRHYpFa+cG+sLTlIhEVFSP.tLEpaL ........................................lhuDGShT+hLpll..ss.c.pltVcllp.phhs..ss....................sth.t.hpth..............ss.llpRpVhL+s........................usphlsaAp..S.h..h.sh...s....cl....s....p.t....Lpp..........pshPIGchltpp.....+hE.....haR...El.t..p...lh.h...s.c.ss.....h...L.....t..pt....ap....p....c..t......h....h.u...RcYpl..h..ps.spPlh.lIpEhFs.............................................................. 0 26 65 87 +2359 PF01949 DUF99 Protein of unknown function DUF99 Enright A, Ouzounis C, Bateman A anon Enright A Family The function of this archaebacterial protein family is unknown. 22.80 22.80 22.80 22.80 21.50 22.70 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.20 0.71 -5.19 29 165 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 161 2 106 166 10 177.70 29 89.74 CHANGED KptlRllGlDDuhhtp.....cccshlsGlVh+ss.hl-GlhhsplslDGhDuT-sllshlps.phhtplpllhLsGlshuGFNllDlpclaccTtlPVIsVhc+tP-httlcpAL++hFscsp.c.Rhchlcph.Gphptl..............s.lahpshGls.ppAtcllpthohpu+lPEPLRlAHllApulhptp.+ .........th+llGlDDuhhtp................ptcshlsGslh+usthl.-Gl.shsplplDGhDAT-slhphlps...hth.plphlhLsGlshuGFNllDlppltccsshPVlsVhpctPchttlcpAL++.tFs.ssp....c..Rhphlcph.sphctl..............tslalpshGlshppAppllcthphpu.plPEPLRlA+llAsulh.....t............... 0 27 61 87 +2360 PF01207 Dus UPF0034; Dihydrouridine synthase (Dus) Finn RD, Bateman A, Kerrison ND anon Prosite Family Members of this family catalyse the reduction of the 5,6-double bond of a uridine residue on tRNA. Dihydrouridine modification of tRNA is widely observed in prokaryotes and eukaryotes, and also in some archae. Most dihydrouridines are found in the D loop of t-RNAs. The role of dihydrouridine in tRNA is currently unknown, but may increase conformational flexibility of the tRNA. It is likely that different family members have different substrate specificities, which may overlap. Dus 1 (Swiss:Q9HGN6) from Saccharomyces cerevisiae acts on pre-tRNA-Phe, while Dus 2 (Swiss:P53720) acts on pre-tRNA-Tyr and pre-tRNA-Leu. Dus 1 is active as a single subunit, requiring NADPH or NADH, and is stimulated by the presence of FAD [1]. Some family members may be targeted to the mitochondria and even have a role in mitochondria [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.04 0.70 -5.59 17 9141 2012-10-03 05:58:16 2003-04-07 12:59:11 12 33 4628 7 2600 10106 4606 294.60 27 86.25 CHANGED hlA.PMsGloDhhaRpLspchuss..........................LshoEMVsupshlpspphshthhsp.cps........hslQLsGs-PthhAcsAchst-.hGuptIDlNhGCPsc+VppstuGusLhpps-lltphlpshhcAVs...lPVTlKhRlGhD..-spppshchuctspssG.sphlslHGR.......T+tptaps..tAcaptItpl+psls...lPVluNG-Itsh...pcupptLptoGsDuVMlGRuAhspPhlht....tltpstht.pss.ht-h....hphhpp+hptlhphhu.tpulppht+HhtWhhpt...hssstth+pthspstshtch...httl....shhpsht ........................................................................................................lA.PMtshoDhsaR..........h.l.h....p....p....h.....us.s..................................................................l..h..h..o.E......M......l.........s....s.....p.........s.h....l.....h.....s.........s.....c....p......s......h......t....h.....h...sh.t..ppt...........................................t...lsl.Qlh..........G..s.-..P.pt.hu....c.s....A.c...h.......stp.....h.......G................s.....s............h................I.......D............l........N.h..G..........C.......Ps..p+..V........s.....p....p.....t.............tGu.....s.....Lh...p...c...P....c..l...l...t...pllp.uh..h.p.s.ls.........................lP..V.T.l........K....h....R......l......G..h-...................p.p..t.......p.....h........h...........-....hs...........p.t.........s................p.............p............s............G....s........s...........t..l.........s...........l.......H.u....R........................T.+.p.p..h..ap.......s...........p...s.......c...a......-......t.......I.......t....p............l................+....p...........p.......l...............s..................l....Pl..l.uNG...-...Ihsh...................pc...A...p..p...h......l.....p........t....s........G..........s.......D.........u........V.Ml..GR....u...Ah..s..p.Pa...lh..p..............h....h...p..p....t..........h...h.........s....t...s....tch.............................hp.hl....h....t....p....h....p..h..h......p........h..h..........s......................p........t......t...........t...h..t.....t.....h....p..+..p..h......h..h..h.pt..........h.s...s...t...t...h...+p.ht.t...tphtt...................thh.............................................................................................................. 0 870 1575 2170 +2361 PF00692 dUTPase dUTPase Bateman A anon Pfam-B_127 (release 2.1) Domain dUTPase hydrolyses dUTP to dUMP and pyrophosphate. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.46 0.71 -4.68 29 7405 2012-10-03 01:18:03 2003-04-07 12:59:11 14 34 4788 263 1771 5446 3768 123.20 28 66.70 CHANGED ppssh.sctpstsAGaDLhuspshsl.s..tssthlsTslthslPt.sphuhIhsRSul.utKGlllhs...GlIDpcapG.plplhlhNhspp.shplppGpRIAQLllh.hhp.....hp.spsh.cpopRGstGFGSos .......................................t............suuhDl..h...s.......s................s.......h.......h......ltP.....s........phs....LlsTs.l..t..l..p..........l...........P.........c...sh....h........u.........h...........l....h.sR.Sul...u.p.p...t...h..l...l...h...s...Gh.I.DssY......c.....G...p...l..t..lt..l......h.N......h......u..........p...t...sh.......p...........lp......sG-RIAQh.lhh.h.p.............sth.p...h....p...ph.....s......t.....p...+GptGhs.......................................... 0 563 1093 1455 +2362 PF02670 DXP_reductoisom 1-deoxy-D-xylulose 5-phosphate reductoisomerase Bashton M, Bateman A anon COG0743 Family This is a family of 1-deoxy-D-xylulose 5-phosphate reductoisomerases. This enzyme catalyses the formation of 2-C-methyl-D-erythritol 4-phosphate from 1-deoxy-D-xylulose-5-phosphate in the presence of NADPH [1]. This reaction is part of the terpenoid biosynthesis pathway. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.66 0.71 -3.60 87 3428 2012-10-10 17:06:42 2003-04-07 12:59:11 11 6 3275 79 838 2723 1839 125.40 42 32.41 CHANGED lslLGSTGSIGspTL-V..lp...pp....P-.pFplsuLsA.spNlchlhpQs..pp..F..pPph..l.sls-ppt....hppLpptl........t.......tsplhhGpcult...clAsh....scsDhVlsAIVGsAGLhPTlAAlcuG.KpluLANKEoLVsuG ..................................................lsILGSTGSIGspTL-Vlp...pp...........P-....pFcVsALsA.GpNls.hlscQs..tcF..pPch..s..sls..-..p.ss......spp.L.c..ptL.............s..........................tsclh.sGppuls....-lA.sh..........s-sDhVhsAIVGu.AGLhPTLAAlc.AG.KplsLANKEo.LVsuG......................................... 0 295 570 722 +2363 PF00350 Dynamin_N dynamin; Dynamin family Finn RD anon Prosite Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.96 0.71 -4.43 79 6429 2012-10-05 12:31:08 2003-04-07 12:59:11 18 111 1940 32 2723 11019 2690 176.80 21 26.13 CHANGED lsVlGs.SuGKSSlLpuLlGpch..LP..puss...sTcpslh.lph.........................................scp.p.............sphp..ht-h.....tpphpshsplppthpppppphs.....th..................................................................slhhphhs.ts.sloLlDhPGlsp..ssstcp.................shsppal.ppsslILhl.s.sust...shups..-thplspplDspsp+.........o.lsVlsKs .......................................................................................................................................................................lsllGs..SuGKSo...llp.u...l....l....G.........p.......c.....h.........LP....puss.............s..T.p...p.s.h.....h..lph..................................................................................................................................................spptp...........................................p..htp........................tpthps..h....p....p.......l...t....p.t..h..p..t..p..h.p...t...h..t....................................................................................................................................................................................................h....h....h..h.s....h..h............s...l..s..l..lD..h.P......G.ls...p...ssstpp...................................................phhtp...h...l...p..p......s.......s...h..l...l...hl..h..........s.ss.p.......s.hs.....ps.....-.......h.............h.h...h....p......h.c....t.t..h..............h.hhlhst.................................................................................................................................................................................................... 0 828 1542 2205 +2364 PF04912 Dynamitin Dynamitin Finn RD anon Pfam-B_5757 (release 7.6) Family Dynamitin is a subunit of the microtubule-dependent motor complex and in implicated in cell adhesion by binding to macrophage-enriched myristoylated alanine-rice C kinase substrate (MacMARCKS) [1]. 25.20 25.20 25.20 25.30 25.10 25.10 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.28 0.70 -5.66 8 360 2009-11-18 13:30:50 2003-04-07 12:59:11 9 7 237 0 243 360 0 273.70 22 85.27 CHANGED hIuhsQ.DlYETsDsPEs-ssph..pp-EssscsIEplplssscAapKFps+plsssslDFSDpIuK+p..+pGYphc.puEaEllGp..uppETPhQKhQRLQpEhtELh-Elpshpsssppsc.EEclSssAL.upslssLccpLcsL....+LppllGp.csl.sssp..t.s.sp+LloplEpa...........p.........pp.s.hutsssp............ptschspsu+lAsLE+RLtcLEsslGs.cs-h.s.Lssshp..sssll-u.....lpplpuKsuLLssspLDtlEpRLsslhtKhppIs.E++sus.pDssp-pKIs-LYElspKW-slspp....LPpllpRLpoLcuLHcpAspFuptlupL-ssQpslpspLssspsLLpcVpcs.....htpNLpslp-slsplEuRltsLp ..............................................................s...........................................................................................................................................................................................................................................................................................................................................................-.........s............p+htRl....Ehtph.t...ph.......t..t..........................................tt................t.............h....p.htt...hpp.L.t.......................t.p...........s.....................................tt.....tht....................................................................................................................................................p........t..........t......t....chsph-pRlt.lEthlt...t...................t...............tslh.s.................lp.lptp..lsh.L...p.........t........lpthptcht....l..tchp.......t......l.........t....p....tp..............t...............t......................................t..............t...............t................p..............p......p.............p..........K..............l.......p.t.lathh.phpshts................lP.llpRlhslptlHttAh.phs.......p.lptl-pppt.htpplpp.pphlptlppt............htpshthhtt.httlct+htt..t............................................................................................................ 0 80 126 197 +2365 PF01221 Dynein_light Dynein light chain type 1 Finn RD, Bateman A anon Prosite Domain \N 24.90 24.90 25.00 25.10 24.70 24.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.12 0.72 -3.99 75 1105 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 322 46 723 974 9 86.60 39 73.19 CHANGED h.....s..ptps.hlKssDMs--..Mpp-AlchutpAl..-....+as...h-..+-.lAtaIK+EFDc+YGsoWHCIVG+sFGSaVTHEsppFIYFhls.ph..ulLLFKou .............................................................pshl+ssDMs--.....MQp-Al-.h....A......p..pAh.....-........+as..........h-.........+-..lA....taIK..+.-F.......D++Y.......s.......s.....o.......WH.C.I.V..G..+...s..FG...S.aVTH.....EsppF...l..Y.Fh.lu..pl.ulLLFKo............................. 0 331 455 616 +2366 PF00519 PPV_E1_C E1; Papillomavirus helicase Finn RD anon Pfam-B_48 (release 1.0) Family This protein is a DNA helicase that is required for initiation of viral DNA replication. This protein forms a complex with the E2 protein Pfam:PF00508. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.55 0.70 -5.87 66 1112 2012-10-05 12:31:08 2003-04-07 12:59:11 12 7 265 44 9 1576 23 256.40 49 68.38 CHANGED KFK-saGlSFs-LTR.FKSDKTsCpDWVlssa.GlpcslhEusKpLLppaCtYhalpp..stthGhhsLhLlpFKsuKsR-TVtKLlsslLsVs-pphLhEPPKlRSssuALaWYKpuhussshs..aGphP-WIs+QTllsHp..hpsspF-LSpMVQWAYDN-hs-ES-IAYpYApLA-pDuNAtAFLpSNsQAKaVKDCusMsRaYKRAEM+pMSMSpWIpcRspcl.-..ssGcW+sIVpFLRYQslpFIsFLsAhKpFL+G..hPKKNClllaGPPsTGKShFsMSLl+FLpGpVISasNS+SHFWLQPLsDAKluLLDDATpsCWsYhDsYLRNuLDGNslSlDtKH+uslQlKsPPLLITSNlsltp-c+apYL+SRlpsFpFss.FPhcssGsPlapLsDpsWKSFFpRhWspL-Ls-.pED-s.-..s....GssppsF+C ..............................................................................................................KFKEhYGlSFhELlRsFKSsKosCsDWslusF.Gl.ssolAEuhKsLlp.ashYhHlQsL.ss..shG.hllLhLlRaKCuKNR.TltKhlupLLslspppM.lI-PPKLRSsssALYWY+ouh..S..Nh..SpV..hG-TPEWIpRQTllpHu...h.p-.s.F-LSpMVQWAaDp........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 4 8 9 +2367 PF00122 E1-E2_ATPase E1-E2 ATPase Sonnhammer ELL, Bateman A anon Prosite Family \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.33 0.70 -5.32 126 30268 2009-09-12 10:17:11 2003-04-07 12:59:11 15 261 5547 105 10217 25831 1942 229.00 24 26.96 CHANGED hllhhlhlsshlphhpch+upcslppL...tphtsp.p.spVhcs...........................s...........phpp...............l.sspclssGDllhl.csG-plPuDuhll.........................................p...s..........shplDcShLTGEo.......hPlpKp..........................................psshl.....auGohlh...sG..ph......pshVstsGpsopluclsphl...ppspp.tc....................................sslpphls+lsphhshls.lslul.......................lsh...hhthh........................................................................t.shhpslhhu.lulllsusPpuLslssslslshuhtphu+pslll+.phsulEs ..........................................................................................................................lhhhlhls.t.h.l....p.h.h.t.....c...t..+.u.p...ps.l.p..t.L.........h..p...h.t..sp.....p.............s.p..l....l....c..s..............................................................................................G.......................p.h.t.p...................................l...sspclh...s.......G.Dll......hl......c............s..............G...........-.............p........l...P..sDuhll..............................................................................................................p.......u.................pstl..DE...S..h..LT......G..ES..............hPVpKp...................................................................................................................................................tssh..l......h.u..G..o.h.sh............sG..........ph............................hh..h..V..st........s.G.....t.cT..h..l.....u...p.........l........h.ph.l......p..pspp...pc................................................................................................................ss.l..pp.hh.s....p...l.u.t.h..h..s.h..h..s....l....hlu.h.........................................l.sh...........hh..hhh..................................................................................................ttsh..h..t..sl...h.hu...l...sl....l....l...s...ss...P...puLs..ls....ssh.slhh............u....s...t.....ph.u.c.p.s.hl....l+phtulE........................................................................................................................................................................................... 0 3318 6138 8457 +2368 PF00676 E1_dh E1_dehydrog; Dehydrogenase E1 component Bateman A anon Pfam-B_117 (release 2.1) Family This family uses thiamine pyrophosphate as a cofactor. This family includes pyruvate dehydrogenase, 2-oxoglutarate dehydrogenase and 2-oxoisovalerate dehydrogenase. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.82 0.70 -5.70 16 9023 2012-10-02 16:07:47 2003-04-07 12:59:11 15 32 3995 112 2879 10789 7579 299.30 26 49.43 CHANGED tchhslpRht-ttthhapcpthhGFsthpsGQEAsslGhttAlspp.Dhlhss..YRspuhh.hh+Ghs..lpplhsphhG+t....pspGGuh+..hh..s+p...ahsusuhlusQ...slss..GlAhAtK..........hcpcppVshshaGDGA.osQGphaEuhNhAslachPl...IFlscNNpauhuT.ssc+uuussshhphutGhtIPGlpVDG.DhlAVhpAsKhAt-hsppGpGPhLlEhhTYRhsGHShS.DssooYRspcElpch+tpcDPIpth+ctllspGlso--Ehcshccclcpcl--AhcpAcsss.sssp .....................................................................................................................h.....htt.h..p...h.t.h.h.h.t.......t.t..t..........p.h..u..h............t..st...su........p............p.....Ah..p..huh..t...t....u..h...p.....t......s.....D..h..l..h..st..........a..Rs+.s..ph..l.s...p.....s.hs................hppl.....h..s.p.h.h.Gct..........................................psp.u..s..s...h..p.............h......h............................s.......p.........t...................p........h.h..s...s..s.........s........lusp................P....lsh.............Gh..AhAt..p................................................h..p....s....p.......p.....s....l....s.....l...s.h.a.GDuA.su........QGsh.aE....sh.N....h...A.....s.l.....h............p.....h.....s.s.....................l.a.l.l..............N.N...p..h...uh..o.........T.....s......s...........p.........c....s.p.......u......s.p..........h.......h.s........c........h..A........t.u.......h........s....hP.s..h...c.......V......s..G..s...Ds.......A.Vh.....t....s.s....c.hAh-..h....t....p....p...........h..........c...........t....ss.lI-h.h.s.Y..Rh.tG..H..s...pu...D.....c..P...s....t...h............p.....sp...........h.p.p.h...p.p.c.......s....Pl.th.apch..L.h.....p.........p...G.........l.........h..o.p...-...c..h...p...ph...t...p...p...h...c...p...t...lp.ps.hp.spt.......t............................................................................... 0 974 1810 2435 +2369 PF00524 PPV_E1_N E1_N; E1 Protein, N terminal domain Finn RD anon Pfam-B_98 (release 1.0) Family \N 22.50 22.50 23.20 25.40 21.90 22.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -11.07 0.71 -3.92 76 514 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 242 0 0 513 1 123.50 39 22.24 CHANGED MA.....s.scGTc........ttssss.GWhhl..EA..ssc......spD-..Esh..h-p.....soDlsDhI.Dsssh.....pQssuhtLap.pQpspcsppplpsLKRKYht.....SPtss.........ppLSPRLpuls..lssp.pppuKRRLF.p.....DSGhup...ots- ...........Ms-.spGTss......tttGssGWFhV..EA..spc...s..o-D...-.tEp...t-s.....spDhs-FI.Dsss......pptsuptLap.tQpsppct..pslp.sLKRKahs.....SP.ss........sppplSPRLpuIs..ls...pp....pppuKRRL.Fp.p....DSGYGsops....................................... 0 0 0 0 +2370 PF00511 PPV_E2_C E2_C; E2 (early) protein, C terminal Finn RD anon Pfam-B_87 (release 1.0) Domain \N 25.00 25.00 25.30 25.30 21.00 19.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.69 0.72 -4.02 99 565 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 249 38 0 497 0 76.60 44 22.58 CHANGED PllhL+G-sNsLKChRYRlpp+apsL.FpphSoTWpWss....ssssp.psuhlhlsasoppQRppFLssV+lPpulphthGhhs ...Pllpl+G-uNsLKChRYRl.p.+appL.apthSSTW+Wsu.........sss.pp.puhlTlTasScpQRppFLssVKI.P.solphshGhMo............. 0 0 0 0 +2371 PF00508 PPV_E2_N E2_N; E2 (early) protein, N terminal Finn RD anon Pfam-B_76 (release 1.0) Family \N 19.10 19.10 20.60 43.80 18.20 18.10 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.35 0.71 -5.14 82 467 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 250 13 0 494 0 197.90 44 51.73 CHANGED MEsLspRLsusQEp...lLslYEpsSpsLp-QIpaWphlRpEpsLhatARcpGlp+lGaQsVPsLsVScsKA+pAIchpLtLpoLppStYupEsWTLp-TShEha.hssPppCFKKtGpsVEVhaD.s-ppNsMpYThWshIYhpssss..WpKspGpVDapGlYY.ptpstK.pYYVpFpc-Ap+YGpoG......pWEV+..hsspslhsP.sosoS ....MEsLspRLsAsQ-c...lLslYEpDSscLpspIpaWphlRhEsslhYtARctGlp+lsa.QsVPsLsVScsKAppAIEhQltLcoLppSpYusEtWTLp-TShEha.hosPppCFKKpGhoV-VhaD.s-psNsMpYTsWstIYhpsps............s...............Wp+VpGpVDapGlYY..hppGh.+..sYYlpFp.c.-Ap+Yupos......hWEV+..hssplIhsPsos.............. 1 0 0 0 +2372 PF02319 E2F_TDP E2F/DP family winged-helix DNA-binding domain Mian N, Bateman A anon Pfam-B_8420 (release 5.2) Domain This family contains the transcription factor E2F and its dimerisation partners TDP1 and TDP2, which stimulate E2F-dependent transcription. E2F binds to DNA as a homodimer or as a heterodimer in association with TDP1/2, the heterodimer having increased binding efficiency. The crystal structure of an E2F4-DP2-DNA complex shows that the DNA-binding domains of the E2F and DP proteins both have a fold related to the winged-helix DNA-binding motif. Recognition of the central c/gGCGCg/c sequence of the consensus DNA-binding site is symmetric, and amino acids that contact these bases are conserved among all known E2F and DP proteins. 21.20 21.20 21.30 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.61 0.72 -4.41 33 1416 2012-10-04 14:01:12 2003-04-07 12:59:11 15 13 169 2 833 1251 16 71.90 39 20.10 CHANGED RpcpSLslLop+Fltlhpp....spc.shlsLscsAcpL....................ss..p+RRlYDIsNVLculslIpK.......hpKsplcWhG ................................................RhppSLthhopKF.ltlhpp..................sps....usl...sLs..p.....sAcpL...............................................ss..p+RRlYDIsNVLpulslIpK...........tpKsplpWhG............................ 0 297 417 591 +2373 PF02817 E3_binding e3_binding; e3 binding domain Griffiths-Jones SR anon Homstrad Family This family represents a small domain of the E2 subunit of 2-oxo-acid dehydrogenases responsible for the binding of the E3 subunit. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.38 0.72 -4.32 58 9252 2009-01-15 18:05:59 2003-04-07 12:59:11 12 41 3883 49 2483 6857 3964 38.30 38 8.78 CHANGED scshAoPssR+LApEpulDLs...plp.GoGssGRIh+pDlps ...............hsoPssR+lApE.p.G...l...-....ls....pVp..G.oG.h.p.G.RIh+cDVp.............. 0 779 1517 2071 +2375 PF00518 E6 Early Protein (E6) Finn RD anon Pfam-B_57 (release 1.0) Family \N 20.70 20.70 20.70 21.80 20.30 20.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.34 0.72 -10.86 0.72 -3.72 26 915 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 239 6 1 645 5 108.30 48 74.15 CHANGED ClaCpchLspsElhsFsh+-LplVaR-s.hsausCstCLphhuplcph+aaphSlausslEphstpslhclhIRChhCt+.Ls..EK.cpl.ppcpF+clp.spW+GpChpC ........................................CVaCKppLpp..pEVacFAapDLplV..YR.Du...PaAlCctCLchhSK..lpch.RaYp..hSlYGsT.LEphhpKsLs-lhIRChtCQ+PLsPpEKp+HlspppRFHpIp.G.....cWpGpChpC........ 0 0 0 1 +2376 PF00527 E7 E7 protein, Early protein Finn RD anon Pfam-B_95 (release 1.0) Family \N 21.50 21.50 22.10 22.10 20.00 19.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.56 0.72 -3.83 75 626 2009-09-14 14:24:34 2003-04-07 12:59:11 13 1 232 5 0 438 1 91.80 40 93.18 CHANGED G.........p.psTLpDIVL..ch.pP.........pslD.LhCpEpLs....sop..-.........-Esc.t......................psYpllssC.sp..Cppsl+LsVpuopss.lRsLppLLh.ssLslVCPtCu ...................................................Gp.psTLp-hlL..-L..pP..........pssD.LaCaE.QLs....cSs.-E........-E...chst..t..............p.tpssYtIlThC..sp..CcsslRLsVpSTps-.lRsLQpLLh.GolslVCPtCu.......... 1 0 0 0 +2377 PF00563 EAL DUF2; EAL domain SMART anon Alignment kindly provided by SMART Domain This domain is found in diverse bacterial signaling proteins. It is called EAL after its conserved residues. The EAL domain is a good candidate for a diguanylate phosphodiesterase function [1]. The domain contains many conserved acidic residues that could participate in metal binding and might form the phosphodiesterase active site [1]. 21.60 21.60 21.80 21.80 21.40 21.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.59 0.70 -4.88 137 26968 2009-09-15 16:29:44 2003-04-07 12:59:11 15 1049 2504 33 6836 21773 1845 227.50 27 36.53 CHANGED tpplppu...h..tpt..ph.thhaQ....P.lh...s.hpssp....lh...uhEslhRhpptptt.hl..sstp......ahshhcp........shhtpls.phllcpshp.phtph.........th.l..slNl....ss.tpltssp...hhptlt...thh.........th.ssp.lslElsEs..................pphp..phlp....p.L+p...hGhplulDcF..Gsshs.shph.ltplshc..hl.KlDpphl..tth.....stpspthlps........lhphucph........shpllAcGVEspp..phphltphGsc.....hhQ..Ghhh...upP .............................................................................................................................................t...ltpA.....l....ppp.....ph.hlhaQ..........P..hl...............s..hpssp..................lh....GhEu...LlR...a..pp..s..p....t...s....hl.....sPsp.....................Fl..s.hAEp.................................t....Ghh.htls..phllc.pshp....p...ht.p.h.....................................shpl.....................ulNl.....................Ss...tp....l....tpss............hhp.plt.....phl.........pph.tlss..p.p....l...t..lElTEs.........h..............hps.tp..h.h.p.hlp............p..L+p....hGhpl.u..l......D..DF...........GoGaS....oL.......sh...L....pp...h...s..h..D.......hl.K..I..D+uFl...............pslt......pst.......p....s.....pt.......l.l.p.u.......................ll..p.l..u.c.pL........s.h....p.l.l.AEGVEopp...php.h..L.p.p.h..G..s.s........hhQGahau+P...................................... 1 1860 4128 5580 +2378 PF04157 EAP30 EAP30/Vps36 family Wood V, Finn RD, Bateman A anon Pfam-B_8830 (release 7.3); Family This family includes EAP30 as well as the Vps36 protein. Vps36 is involved in Golgi to endosome trafficking. EAP30 is a subunit of the ELL complex. The ELL is an 80-kDa RNA polymerase II transcription factor. ELL interacts with three other proteins to form the complex known as ELL complex. The ELL complex is capable of increasing that catalytic rate of transcription elongation, but is unable to repress initiation of transcription by RNA polymerase II as is the case of ELL. EAP30 is thought to lead to the derepression of ELL's transcriptional inhibitory activity [1]. 25.00 25.00 25.10 25.00 24.70 24.60 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.28 0.70 -5.19 67 648 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 296 8 450 643 7 216.70 26 60.19 CHANGED sGluulpppptpppphs-hshphshp..clspLhpphpphhphhcphupptpschppssp...................phpphhsslGlss.....hsstc................h..uhspFa.ELuhplsEhsh........phhcpsG.......................G.....llsLp-lashhN+.....sR.tst......................................hlospDlhcAschh.cpLshs.hplhphs..............tuhhhlpsss.s-h.tspsplLphh............................................................................phtsslohtplspphs.......WohshupptLp..p.hhppGhlh..hD ...................................................sGluulpppt..pppp.sctsh..s.uhp..clspLh...........pphpphhshhcphApc..h..p..p.ch..p.p.s.sp.hcs.......................................pFpphstslGlss.........................sopc........................h...uhspaahELuhpl..sEhhh..............sshpc.sG......................................................G.........lls.Lp-lhshhs.+......uR...uhp......................................hlS......s..-DlhcAschL..csL.......shs..hplhphs.....................G.hhllp....shs............s-.........pstsplLphs...................................................................................................pht...uhlosppltppls...................WshthApphLp...t.h.ppGhlhhD............................................ 0 156 251 373 +2379 PF01309 EAV_GS EAV_env_prot; Equine arteritis virus small envelope glycoprotein Finn RD, Bateman A anon Pfam-B_656 (release 3.0) Family Equine arteritis virus small envelope glycoprotein (Gs) is a class I transmembrane protein which adopts a number of different conformations. 25.00 25.00 440.70 440.50 18.00 17.50 hmmbuild --amino -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.41 0.71 -4.80 7 84 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 2 0 0 84 0 196.00 95 86.34 CHANGED WWRAVHEVRVTDLFKDLQCDNLRAKDAFPSLGYALSIGQSRLSYMLQDWLLAAHRKEVMPSNVMPMPGLTPDCFDHLESSSYAPFINAYRQAILSQYPQELLLEAINCKLLAVVAPALYHNYHLANLTGPATWVVPTVGQLHFYASSSIFASSlEVLAAIILLFACIPLVTRVYISFTRLMSPSRRTSSGTLPpRK WWRAVHEVRVTDLFKDLQCDNLRAKDAFPSLGYALSIGQSRLSYMLQDWLLAAHRKEVMPSNVMPMPGLTPDCFDHLESSSYAPFINAYRQAILSQYPQELLLEAINCKLLAVVAPALaHNYHLANLTGPAsWVVPTVGQLHFYASSSIFuSSVEVLAAIILLFACIPLVTRVYISFTRLMSPSRRTSSGhLPQRK 0 0 0 0 +2380 PF02905 EBV-NA1 EBNA1; Epstein Barr virus nuclear antigen-1, DNA-binding domain Griffiths-Jones SR anon Structural domain Domain This domain has a ferredoxin-like fold. 25.00 25.00 53.10 53.10 18.40 18.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.54 0.71 -4.72 5 148 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 9 4 0 87 0 90.90 90 66.67 CHANGED KGGWFG++RGQGGps.sKFEshA-uL+ALLuRCcuPRTosEGcWssGVFVYsGSKTSCYNLRRuLALCIPECRLTPLuRLPYGaAPGPGPQPGPLRESossYFLVFLQTplFAEClKDAI+DYIpT+PsPTssl+VTVCoFD.DuVML .....KGGWFGKHRGpGGSN.QKFENIAEGLRhLLARCHVERTT--GsWVAGVFVYGGSKTSLYNLRRGIALAIPQCRLTPLSRLPFGMA................................................................ 0 0 0 0 +2381 PF00378 ECH Enoyl-CoA hydratase/isomerase family Finn RD anon Prosite Family This family contains a diverse set of enzymes including: Enoyl-CoA hydratase (Swiss:Q13011). Napthoate synthase (Swiss:P27290). Carnitate racemase (Swiss:P31551). 3-hydoxybutyryl-CoA dehydratase (Swiss:P52046). Dodecanoyl-CoA delta-isomerase (Swiss:P42126). 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -5.25 13 26211 2012-10-02 13:07:06 2003-04-07 12:59:11 15 113 4081 502 8836 22457 13530 233.90 23 67.75 CHANGED phphpsulAhlplcpPt.slNulssphlsElspulpphcsDssl+.ulllouscpsFsuGuDlp-hsst.....tp.sshptsstclapclpssshPllAAlNGhAhGGGhplALssDhtlAucsAp..hGhsEsplGlhPuuGGo.hLsRhlGhstAh-hlhsGcphsApEAh+hGlVspVVs............p-p.lhcpAlphspclssp............s.hulshhKphhpt.....hcpslspstptstptasushssccspcuhpu...................ahEc ............................................................................................................h....tssls.hls.l..sR...Pp...t.....hN....A.l...s......t...h.h.p...p..............l.........t...p...s...l.........p....p....h............p............p...........-.............s......s.......l.........c....s....l....l........l............s.......u............s.............s............c............s............F......s.............A.............G............u....D....l.......p.......t.......h.tph.........................t.....t.....h.........t..........t.........h.........h............p............t...............h................t............p................h.........h............p.........t...........l...........p.............p.............h.............s............+....P....l......l.Au...l...s..............Gh.AhG.G.G.h.p.Ls...h.s...s...D....h.t.l.................A............u.c..s..Ap..............h...u..h.s..c..s.......p..l..G....l.....h......P.......s....s.G.....u.s.........................h...L....s....+...h..l.............G......h..........t......t........A...h..........c.h............h...........h............s.....G......c...t..h............s..A.pc..A.h.p..h..........G...L.l....s..p.....l...ls......................................................................tsp....l........p....t....s.........h....p..h...u......p......p..l....t..tt.......................................................................s.s..h..u..h..t...h...h..K..t......h..h..pt..................t...........t...t............t.......t.......t.....................h.......t.....p...t.................................................................................................................................................................................... 0 2394 5222 7277 +2382 PF04736 Eclosion Eclosion hormone Kerrison ND anon DOMO:DM04950; Family Eclosion hormone is an insect neuropeptide that triggers the performance of ecdysis behaviour, which causes shedding of the old cuticle at the end of a molt [1], [2]. 25.00 25.00 48.60 47.90 21.70 21.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.34 0.72 -4.53 6 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 31 0 29 44 0 60.10 58 68.49 CHANGED phDlhGGYDhluVCIsNCAQCK+MaGsFFEGpLCAEAClpFKGKhIPDCEDIuSIAPFLNKL ...........s.hsGhDhltVClpNCAQCK+MaGsaFpGphCA-uClKFKGKhIPDCEDluSIuPFLNtL... 0 8 11 26 +2383 PF02963 EcoRI Restriction endonuclease EcoRI Griffiths-Jones SR anon Structural domain Domain \N 19.90 19.90 20.30 20.70 18.60 19.40 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.51 0.70 -5.59 3 52 2012-10-11 20:44:43 2003-04-07 12:59:11 11 1 50 10 6 62 7 187.20 49 86.48 CHANGED SpGVlGIFG-tAKtHDLoVGEVS+tVlsKLoEDYPQLoFRYRsSIcKKEINEAL+KlDPcLGQTLFVENASI+PDGGIlEVKDDaGNWRVVLVuEAKHQGKDIENIRsGlLVGKsKDQDLMAAGNAIERSHKNISEIANFMLuESHFPYVLFLEGSNFLTEoIcVTRPDGRVVsLEYNSGMLNRLDRLTAANYGMPINoNLCcNhFV+HKDKoIMLQAASIYTQGDGusWsuucMFEIML-ISKTSLRILGcDLFcQ ..........................................................................................................h+tplphp.h..N.hLptlcschs.pohFh.sspIKPDGGll..cl..KD..Dpt.h+l.lLloEAKhQGps..pIpt..h.GKtpp....tsGNAIERuaKNlsElANhMLpEt+FPYllFL-GoNFlTpsh.l.RPDGRhl.l.YssGhLNRlD+LTuANY.GMPINpNLC.N+Flphpst.IMLQAhSIYTpGpGttWs.p.Mh.hh..................................... 0 1 4 5 +2384 PF03974 Ecotin Ecotin Finn RD anon Pfam-B_54504 (release 7.2) Family Ecotin is a broad range serine protease inhibitor, which forms homodimers. The C-terminal region contains the dimerisation motif [2]. Interestingly, the binding sites show a fluidity of protein contacts binding sites show a fluidity of protein contacts derived from ecotin's innate flexibility in fitting itself to proteases while [4,5]. 22.20 22.20 24.00 24.20 21.70 21.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.76 0.71 -4.42 47 725 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 700 31 89 328 19 124.20 65 76.27 CHANGED tcphts.aPtspsGhpRpVIp....LPt.ss...Es...sa+VELlsG+shpl...D.CNpptLu.Gplpp...........colcGW.GYsYYplsths...ss.......oThMACs...spt+pppFVsl..ust..hlpYNS+LPlV.VYlPcssElRYRlWp......up ..............L.EKIAPYPQAEKGMKRQVIp....LsPQcD...ES..........sLKVELLIGQTLcV...D.CN.hH...RLG.GcLEo...........K.T.LEGW.GYDYYVFDcVouPV.........STMMACP...DGKKEpKFVTAaLG-su.MLRYNSKLPIV.VYTPcNVDVKYRlWKA............... 0 13 29 57 +2385 PF00736 EF1_GNE EF1BD; EF-1 guanine nucleotide exchange domain Bateman A anon Pfam-B_488 (release 2.1) Domain This family is the guanine nucleotide exchange domain of EF-1 beta and EF-1 delta chains. 20.80 20.80 21.40 21.00 20.50 18.10 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -10.04 0.72 -4.29 87 874 2009-01-15 18:05:59 2003-04-07 12:59:11 14 17 487 11 461 872 67 86.40 48 37.56 CHANGED hop....lllcVhP.s..sEsDhccLpcpl.cslptcul..s.s.ch.PluFGl+tLplhslltD.ccsusDpl.pEtl...sh.-t..........................VpSs-lsuhs+l ...................................KSs..llLDVK..PWD..DETDhpcLEcsV.Rul.p.h-..GLl......W.....G.uuKLVPVGaG.IKKLQItsVlED..DKV.usDtL.pEpI......pth--a..................................................VQSsDlsAhsKl............................................................................................. 1 149 255 374 +2386 PF00647 EF1G EF1G_domain; Elongation factor 1 gamma, conserved domain Bateman A anon Prosite Domain \N 20.70 20.70 25.40 25.40 20.60 19.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.25 0.72 -4.34 40 588 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 372 1 292 576 1 100.60 47 27.49 CHANGED Ksc.cPh-..sLPp...uoFs.lD-aKRpYSNpDT..hss..AlPaFW-p.aD.sEsYSlWhscYK..Ys-EL..ph.sFMosNLluGah.QRL..-.phRKauFushslaGcs.ssssIsGlalhR.....Gp- .......................KsKcPh-tLP+.........uoFs.LD-aKRhYSNc-o...hsV..AlsaF.W...........-p....a..D...Es.a.SlWhs-Y+..Ys-EL..oh.sFMSsNL......lsGhF.QRL..-.thRKhuFushhlaGps......ss..ssIsGlalhRGp-................ 0 96 162 239 +2387 PF00889 EF_TS Elongation factor TS Bateman A anon Pfam-B_1408 (release 3.0) Family \N 21.00 21.00 22.50 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.43 0.70 -5.02 168 5433 2009-09-12 05:55:42 2003-04-07 12:59:11 14 21 4607 24 1441 3651 3185 191.80 35 74.06 CHANGED AuEGllsshl..psst........usllElNsETDFVA+s-pFpthspplsphsh.............t.....pt.sshc...........tlht..h.t.....tslpphhtthhuplGEphpl+R..h.thhps.....s.h.ssYlHst.............u+lGVllthpss.t.........plu+clAMHlAAhsP...phlstc-lss-hl-..+E+............................cl.............hptpstp.......................................p....GKP....................................................p...plh....-K.....llpG+lpKah.......pEhsLLcQsFV..p.DschTVpphlcptss.......pltpFsRaclGE .................................................................AAEGllt..shhpssh..........usllElNsETDFVA+st.....tF.thspplsphhh........................................t...tp...ssh-......................th.h.t...............phppthhthh..u.p.IGEplplRR.........h.thhpt..............th......hssY..Ht.........................s+.l.GVls...h..pusst............................phu+plAMHlAAh..pP......phls...c.-..lst-h.l...c.+Ep.............................................pl..........................................hhppshp........................................................p..........GKP..........................................................cpIh-K.......hlpG+hpK.ah.......pEh.sLhsQ..s..al.......h.-...s..p...h.TVtphlcptss........plhtFhRaclGE................................................................................................................................. 0 539 975 1245 +2388 PF01132 EFP Elongation factor P (EF-P) OB domain Finn RD, Bateman A anon Prosite Domain \N 20.90 20.90 20.90 21.10 20.50 20.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.58 0.72 -4.29 271 5656 2012-10-03 20:18:02 2003-04-07 12:59:11 15 10 4552 12 1140 2928 2197 55.20 32 29.59 CHANGED c+chpYLYpDG-.t.ahFMDs-.oYEQhpls...tctlu.-shpaLpE..shplplhh..as.spsl ........+shpYLYp.D..G-..t.ahFMDsE.oYEQhpls...tctlt...-.phpaLhE..sh.p..splhh..as.Gpsl.................... 1 382 741 957 +2390 PF04863 EGF_alliinase Alliinase EGF-like domain Mifsud W anon Pfam-B_4527 (release 7.6) Domain Allicin is a thiosulphinate that gives rise to dithiines, allyl sulphides and ajoenes, the three groups of active compounds in Allium species. Allicin is synthesised from sulfoxide cysteine derivatives by alliinase (EC:4.4.1.4), whose C-S lyase activity cleaves C(beta)-S(gamma) bonds. It is thought that this enzyme forms part of a primitive plant defence system. This family represents the N-terminal EGF-like domain [1]. 20.00 20.00 21.40 21.00 19.00 18.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.98 0.72 -3.98 6 87 2012-10-03 09:47:55 2003-04-07 12:59:11 8 4 21 7 34 96 0 55.30 63 12.67 CHANGED hoWThKAAcEAEAVAAIsCSuHGRAaLDGlh.s-Gs.PhCECNsCYoGsDCSshlsNC ..........hoWoh+AAEEAEAVAsI.sCStHGRAFLD..Gll.s-Gs...PtCECNoCYsGPDCSp+lpsC. 0 6 24 30 +2391 PF01303 Egg_lysin Egg lysin (Sperm-lysin) Finn RD, Bateman A anon Pfam-B_1464 (release 3.0) Domain Egg lysin creates a hole in the envelope of the egg thereby allowing the sperm to pass through the envelope and fuse with the egg. 21.00 21.00 21.10 21.10 20.60 20.60 hmmbuild --amino -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.68 0.71 -4.11 24 77 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 40 11 2 85 0 88.00 52 83.19 CHANGED ls+thEsAlKlpIlsuaD+cLspWlp+HGptl....oshQ+KTLaFVNR.RaMQTaWpsYhhahs++IttLG.RsssssDYsplGAcIG+..Rlshchh.Ysall++N...hlP+apsYMtclhup+suDlPl .......ElAlKspIluGFD+pLspWLppHGptL....oslQ+KsLYFVNR.RYMQTah..a..h.......h........................................h..........h.................... 0 0 0 2 +2393 PF00971 EIAV_GP90 EIAV coat protein, gp90 Finn RD, Bateman A anon Pfam-B_210 (release 3.0) Family Equine infectious anaemia (EIAV). EIAV belongs to the family Retroviridae. EIAV gp90 is hypervariable in the carboxyl-end region and more stable in the amino-end region. This variability is a pathogenicity factor that allows the evasion of the host's immune response. 20.60 20.60 20.60 20.70 19.90 20.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.45 0.70 -6.00 3 1082 2009-09-11 14:56:58 2003-04-07 12:59:11 13 5 9 0 0 800 0 234.90 62 80.26 CHANGED SKNSMAESKEARDQEMNLKEESKEEKRRNDWWKIGMFLLCLAGTTGGILWWYEGLPQQHYIGLVAIGGRLNGSGQSNAIECWGSFPGCRPFQNYFSYETNRSIHMNNNTATLLEAYHREITFIYKSSCTDSDHCQEYQCKKVN.......Nusph.sVsNTTEYWGFKWLECNQTENFKTILVPENEMVNINDoDTWIPKGCNETWARVKRCPIDILYGIHPIRLCVQPPFFLVQEKGIANTSRIGNCGPTIFLGVLEDNKGVVRGNYTACNVsRLEINRKDYTGIYQVPIFYTCNFTNITSCNNESIISVIMYETNQVQYLLCN.NNNSNNYNCVVQSFGVIGQAHLELPRPNKRIRNQSFNQYNCSINNKTELETWKLVKTSGITPLPIS .........................................................................................................sF.sh.phtsNRsh.hsNpTuTLL-AYpREIT.IY+oSCsDSDHCQEYQCppVs..................s..s...................h.p..ss..p.s......o....s........E.....YWGFKWLECNQTENhKTILVPENEMVNINsssTWIPKGCNETWARVK+CPhDlLYGIp.IRhCVQPPFFLhp..pp.t..s.s.s.u.RIuNCGPTIFLGVLEDNKuslps...ssCplpphp.IpR.DYoGhYQlPIFYhCshTslpS..................................................................................................................................................... 0 0 0 0 +2394 PF01176 eIF-1a Translation initiation factor 1A / IF-1 Finn RD, Bateman A, Mistry J, Wood V anon Prosite Domain This family includes both the eukaryotic translation factor eIF-1A and the bacterial translation initiation factor IF-1. 23.00 23.00 23.00 23.20 22.90 22.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.04 0.72 -4.55 63 5916 2012-10-03 20:18:02 2003-04-07 12:59:11 14 9 5270 9 1627 2668 2074 65.40 52 77.15 CHANGED cphph.GtVhchLuNuhacVphtsGpphLu+IsGK.h++plhIhtGDhVlVch.....psap..hsKucIsaR ..................IEhpGpVlEsLPN.uM..F+V.c...........L..........E....N..G..H.........h....l.L..AHIS.....GKhR....ppaI....R....ILs.G.D+V.pVEl........oPYD.....Lo+GRIsaR....................... 0 532 1008 1345 +2395 PF05091 eIF-3_zeta Eukaryotic translation initiation factor 3 subunit 7 (eIF-3) Moxon SJ anon Pfam-B_6311 (release 7.7) Family This family is made up of eukaryotic translation initiation factor 3 subunit 7 (eIF-3 zeta/eIF3 p66/eIF3d). Eukaryotic initiation factor 3 is a multi-subunit complex that is required for binding of mRNA to 40 S ribosomal subunits, stabilisation of ternary complex binding to 40 S subunits, and dissociation of 40 and 60 S subunits. These functions and the complex nature of eIF3 suggest multiple interactions with many components of the translational machinery [1]. The gene coding for the protein has been implicated in cancer in mammals [2]. 17.50 17.50 18.80 17.50 17.20 17.20 hmmbuild -o /dev/null HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.83 0.70 -5.88 36 378 2012-10-11 20:44:43 2003-04-07 12:59:11 7 10 262 0 257 371 7 443.00 41 90.26 CHANGED hshhhssl..Nss.uWGPspssp.....th.........slPatPFSKuD+lG+luDW.......sshp............................pts.shu.ssp.ash................ts-DEsoFplVD.....ss+ss....tpt+sp.....thsptsttpc.....................tthhsppttputt+cttthtp..........hst+.stt.p................t...hR-uSVplps-WshlE..ElcFscLsKLs.l.ss.cs-Dl..sshGpl.aYD+saD+ls.sKs.E+.Lpph.c.Rs.hassTTo-DPlIpcLhpcspu.........................................................sVaATDsILusLMsssRSlYSWDIllp+hG.sKlFhDK..R-susl....DhlTVNEs.A..s-sPh-.......s....pssINospuLuhEAThINpNFspQVlhpspp..phpacpsp.PFhscspc....................uShuY+YR+asLssp................lpLlsRsElDulhp...s..sscspals..l+ALNEaDsKhps.....l-WRpK.L-oQRGAllAsEh+NNosKLA+WssQulLAGuD.hKLGaVS.......Rsss+DsppHsILuspsaKPc-hAsQhsLshsNuWGIlRsIlDhsh+..pp-G............KYlLlKDPsKsllRLYpl ...........................................................................s.......l..s.s.uWGPsthsp.......th.................shPYtPFSKuD..+LG+..huDWos...ts.p..........................pp.s.th.u..ss...p.ash................tt.-DEsoFplVD........ss+sp.t......hh...tpt..ph..............t.ttp.p.t.tpttt...................t...h..tt.tpt.ttptt..thtp..............ttp..hs.ttt...........................t+...RcuSVplps-WphhE..Eh-FspL.Khp..h.........ps......s.....-spDl.................tphG......t.L.aYD.+saD+ls..s......+s.E+....Lp..p.h.p...Rh.hasV..TTo-DPlIpc.L...tcspu.........................................................sVFATDsILuhLMsssRSlYSWDIVl.+hG.sKlFhD..K..RDsuph..........Dh.lTVsEs.u..t-sP.-................ptsshNsspsLuhEAThINpNFupQslhtstp...+hph.psN.PFhpts.-.............................AShuY+YR+acLsts................lpLlsRsEhDuVhp.................ss...sspspals..l+ALNEaDs+hps.........sl-WRpK.L-oQR....GAVlAoEhKNNuhKLA+WTspAlLAsu-....hKLG.aVS.......Rhps+s.stpHlILus.taKPp-FAsQhNLshsNuWGIlRsllDhshc..........t..-G................KYlllKDPNKshlRlYp............................................. 2 108 152 212 +2396 PF01287 eIF-5a Eukaryotic elongation factor 5A hypusine, DNA-binding OB fold Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family eIF5A, previously thought to be an initiation factor, has been shown to be required for peptide chain elongation in yeast [1]. 21.50 21.50 21.60 21.50 21.40 21.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.03 0.72 -4.03 97 832 2012-10-03 20:18:02 2003-04-07 12:59:11 15 11 539 15 474 760 38 66.70 37 40.40 CHANGED hVcRp-aQ.LlsIpsDs.......alsLMs.-sG-T+-DlclP..........pt...plscclcsta....csG.c-h.VsllsA.MGcEp...lhshK .......VpRp-YQ.LlsIp.pDG.......aloLhs..-sG-o+-D..l+lP..............-s......pLspp.....Icsta.......spG..c-...lh.VsVhsA.MGcEthhshK...................... 0 143 254 374 +2397 PF01873 eIF-5_eIF-2B eIF5_eIF2B; Domain found in IF2B/IF5 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the N terminus of eIF-5 Swiss:P55010, and the C terminus of eIF-2 beta Swiss:P20042. This region corresponds to the whole of the archaebacterial eIF-2 beta homologue. The region contains a putative zinc binding C4 finger. 21.60 21.60 21.80 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.74 0.71 -4.46 13 1012 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 491 15 654 962 101 123.90 36 40.09 CHANGED huh.....pNsctsssDcp..RachPthpstlEGp...tKTllsNhpDIAKsLsRsPp.alhKalhtELGosGslDup.sRhllpG+apspplpslLccaI+cYVlC+sCpsP-Tclh+c..sRhhhLcCcACGucss ........................hhp.....Nsp.sss-ph..RachP..p..s.hhEGp......hKTlhsNhs.-ls+s.................LpRssp......al............hcahhsELGsp.us.h.Dsp....sRhll....pG..pap.s.p.plpslLcpaIpc.YVhCtpCc.sP.-.TpLphc......sc...h..hhlpCcACGtp..s......................... 0 216 369 528 +2398 PF01912 eIF-6 eIF6; eIF-6 family Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes eukaryotic translation initiation factor 6 as well as presumed archaebacterial homologues. 25.00 25.00 26.20 26.10 24.60 24.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.15 0.71 -5.23 65 564 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 471 8 362 545 94 190.30 51 82.10 CHANGED h+hshtsss.pIGVaspsosshsLl.shssscphhshhcppLs....lsl.lcToIuGopllGplssGNpsGlLVPshspDpElp....pL+pph..slpVphlcp.+hoAlG.NlIhsNDpsALlaP-lsc-stchIsDsLsV.EVh+toIAs.shVGShuVhoN+GsLVHPcsot-ElcpLsslhp...Vs.lssGTVNtGoshlGuGllsNshus .......................s.RspFE.sss-lGVFupLTNsYsLV..ulG.u.SEsFYSsFEuELst.hIP.l...l+soIu.GoRllGRhssG.....N+pGLLVPssTTDpELp....HlRNsLP...Ds................Vplp..Rl-..E.RLSALG.NlIsCN.DaVALV..HPDl-+ETEEl.luDVLsV.EVF.RpTlAspsLVGSYsshoNpGGLVHPcTohp-.-ELSsLLQ...VP.lsAGTVNRGSslluAGhVVNDash..................................................... 0 122 213 298 +2399 PF03608 EII-GUT PTS system enzyme II sorbitol-specific factor TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 26.90 26.70 22.50 22.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.95 0.71 -4.41 17 904 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 809 0 76 276 63 163.20 64 89.76 CHANGED alsphAEsFlsLFptGGcsFluhlTGIlPhLlhLLlAhNulIthlGEERlp+lAphuu+Nsl....hRYhlLPllusFhLsNPMshohGRFLPE+aKPuaYsuAspasHs.sGLFPHlNPGELFVaLGI........AsGlTp..LG......hssssLAlRYhLVGllhshl+GhVT-hhTsalt+p .....h.IT+uAEWFIGLFQcGGEsFsGh.l..........TGIlPLLIsLLVhMNALIsFIGpcRIERhAQ+sAsNPl....SRYLLLPhIGoFhhCNPMolSLGRFhPEKYKPSYYAAAu...CHohNGLFPHINPGE.LFVaLGI........AsGlTT..LG.......LP.h.GsLAlpYLLVGLVsNFhRGWVTDhTTAhhpK+............. 0 22 38 61 +2400 PF03609 EII-Sor PTS system sorbose-specific iic component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.37 0.70 -5.09 72 5167 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1519 0 403 1910 44 235.10 33 88.43 CHANGED M...phshlQhlLlslluhlsshct....hh.sphthapPllsssllGLlhGDlpsGlllGuoLphhhLGh.sslGuAssPDsslAulluohlslt........ustsh.ssAl.ulAlPlAlhu.hLshls+....olsshhhHtuD+tAccGshpslphhphhuh.lhtulphulP.shl...slhhussslpshlssl......P...p..alhsGLsluGGhlsAVGaAhllphMss+..chhPFallGFllAua..h..pls...llulullG..hslAl.la ................................M...shlQhlLls.l.h.uh.l.u..s......h.ct......hh...sthth..ppPllsss....llGLll.....G....D....lpTGlllGuoLpLlhLGh.sshGu.A..h.sPDsshu...u.lluT.shult.............ss.h...ss....p....sAl...u.l...Al....PlA....s.h.sth....Lshlhp....ol.sshh.h.H.t.AD+...tAcpu.shpu.lp....h....hp.h.h.uh.hh...hu.l.h.h.ulP.shl...slhhG.sshVpshlssl......Pt....hltsGLslAGGhlPAlGaAhll.p.h.Mh.s.+...p.h.hPaFhlGFlhuAa...h.....pls...............llulullG..ss.hAll............................................................................................................. 0 104 225 318 +2401 PF03612 EIIBC-GUT_N EIIBC-GUT; Sorbitol phosphotransferase enzyme II N-terminus TIGRFAMs, Griffiths-Jones SR, Yeats C anon Yeats C Family \N 20.90 20.90 21.50 21.30 20.60 20.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.17 0.71 -4.67 31 932 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 811 0 85 414 65 167.90 55 52.77 CHANGED pslcIsKGsGGWGGPLhltPstp+cKllYlT.GGscPslsc+IuELTGhEAVDGFKsulP.--ElusslIDCGGTLRCGlYPKK+IPTINlhsTGpSGPLApaIsEDIYVSuVpscsI.............................pls-u.susssspststptstsp.............asssKKlo-Qps......ullu....+lGhGhGpVlusFaQAGR-oID .....................p.pl+IpKGsGGWGGPLpl.sspst+KlVYIT.u.GsRPsIVDKlApLTGhpAlDGFKpu.P.-sEIusAlIDCGGTLRCGIYPK+pIPTINlhsTGKSGPLAQYIsEDIYVSGV+.-.sI..................................slssc..sss.p.ss.s.ss...........................pc..............YDTSKKIT.EQsc..........GllAKlGMGhGpsVAVhaQuGRDoID.................................................................................... 0 27 42 68 +2402 PF03611 EIIC-GAT PTS system sugar-specific permease component TIGRFAMs, Griffiths-Jones SR, Mifsud W, Bateman A anon TIGRFAMs & COG3037 Family This family includes bacterial transmembrane proteins with a putative sugar-specific permease function, including and analogous to the IIC component of the PTS system. It has been suggested that this permease may form part of an L-ascorbate utilisation pathway, with proposed specificity for 3-keto-L-gulonate (formed by hydrolysis of L-ascorbate)[1]. This family includes the IIC component of the galactitol specific GAT family PTS system. 20.80 20.80 20.90 21.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.46 0.70 -5.70 89 5120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 1917 0 376 2362 24 393.50 26 88.73 CHANGED lpslLs..ssulllullshlGhllh++shscslpGslKshlGFlhlshGsu.llssslsshsphhpcsaulps......sl.ss.....sulsth.........auotsuhlhl.luhllNllls...+hTph+hlaLTGchhhahushlsslhhhhsh.ss...............hhlllhuulllulhhslhsshst.hhpclTs..ssshuluHhs......sluh....hlushlu+h......h...us.p..ch......ssEshplPKpLuhhp-shlsssllhhllallss..............hh.......................t...h..................sspshhh...hlhtulphAAulhll.tGVRhhluEllPuFcGIuc+llPs...............uhsALDsslsasa.uPsAlhlGFl.uhhstllslhlh.......h.........hslIl..P..GhlshFFsuusuulFuNuh...GGh+GslltuhlsGllhhhlshhh.....hshlushts.................................................................................tsshhushDa ................................ptlLs..ssslllsllhhlhhllh+.t.phscslcusl+shlGF.hhlss.shu.ll.s.s.s.lsPhs....p...th....sc....sa.sl.ph......slsDshhs..usls...............au.o....hu.hhhl.luh.l.l.Nllhl....hT...p...hppl.Lsschh.hhthhlss.hl.hh.hsh...ss................hhhhlhuullhulhhhhhushht.hhpclhs......ssuhuluHhp.......sluh....hls.t.hlsph............h.......ss.h..p....c............sh...-...s...hcl...s+plu.....hat...-shhsssllhhllhhlsshs..........................................h.................................stp..hh....hlphulsh.Asslhll.psVRhh......lstlsPshpGlsp+lhsp..................hh.ulDssllhu...sP.sslhhGhl.......llslhll.....................hslIl..P...Gshsh...Fs..shs.s.hsa.s.ssh...uuhRGslhtshltG.ll.hhhlslhh.....sshhushth....................................................................................................................................................................... 0 98 214 302 +2403 PF03613 EIID-AGA PTS system mannose/fructose/sorbose family IID component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 30.80 30.70 23.30 23.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.44 0.70 -5.46 125 5271 2009-09-11 00:44:58 2003-04-07 12:59:11 9 5 1544 0 416 2104 46 264.50 36 94.84 CHANGED tpLTK+D....lppsahRuhhh.psuaNYERMQshGasauhlPsL+KLY....ts..c--htpAlcRH.lpFFNTpPhh.ushIhGlslAhEEccusss........hsstsIsulKsuLMGPLAGIGDolFasTltPIhuuluuulAhpG.sllGPllahllasll....thhh+ahthphGYchGsphlspl.tuuhl...pplocuAollGlhVlGuLlss.hVplsh...shp.hstsp...................................................................tshslQs.hLDplhPuLlPlhlshhsaaLLpKK...lsshhlllhlhllGllhshlGll ...............................cLTKcD.....lpp.lhhR..S.hhh..QsSa..NYERMQuhGasauhlPslKKlY.....ss....c...--......h...t.pAl++H.L.p.F.FNTpPal.sshlhGlslAMEEpcu....sss..............h.ttuIpulKsuLMGPLAGlGDslFW.hTlhPIh.....uu...luA..uh.A........h..........s.G....sl............lGP...l..l..FhllaNll....phhh+ahhhphGYchGsshl.pch....uuhl....pplocuASILGlhVlGuLlss.a.V.p.lsh.s.l..p...h..s.t.sp..........................................................................................p.s.hslQs.hLDplhPuLlPLhhThhhaaLL+KK...hsshhlIhshhllGIlsphlGlh....................................................................... 0 107 235 331 +2404 PF04873 EIN3 Ethylene insensitive 3 Mifsud W, Moxon S anon Pfam-B_4883 (release 7.6) Family Ethylene insensitive 3 (EIN3) proteins are a family of plant DNA-binding proteins that regulate transcription in response to the gaseous plant hormone ethylene, and are essential for ethylene-mediated responses including the triple response, cell growth inhibition, and accelerated senescence. 21.40 21.40 22.00 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.44 0.70 -5.19 59 958 2009-01-15 18:05:59 2003-04-07 12:59:11 8 17 628 1 88 945 0 256.50 30 39.71 CHANGED EE.lc++...-Qh.l+...+p+php.hhts+Ess....ssupKss.............+spEQhp++phScA........h.thclsNh.GFh.s.sssssK..phssssLp...+Eclcpsh.Gshtlus........pscsLlhusscs..ps...cpospssolphlsDTshG.QtS.SLLpAth.tptpssPppphsLptultsP.............cELhas....suKDppssstsaKcP...........LpstlpHh.p.s.hchccpthcophLQs..KhSpRpSFAhhus.sp.Ec.ssplhsc..ot....shpc....pSPclslpsspc.-sp......................sccEschp.cspshcsss...........................................shshssp+c.csu-.sphs.p....tlsthCpsoQhpus........-ochhhss+pulsQs.h ............................................-Ehcc.t...pph...l+...+tpp.p...hh...pp....................htspKtp..............................psp-phpcphhscs.....................h..hplsph.uhh.s...ppsc....hsssshp...t-.t.p.ph.tshthsp.............p.pphh.....uttps.........ttu...tshpl..l.-Tsh..s......Slh.s.h.....t.pss.s..p....phs......hh....sP.............pE.h.s.....spptt...........shcp............ht..t.lpp...p.s..chcc..hpophLQs..+hot+pohshhu..tp.p..shphhst......s...........s.pp....tus.psshp.s.t.p.c..-sp......GpcEsp..p...spshps.h....................................................s.hshsp.p.c.p..p.ssc.....shhs.p..th..shhC.t...s.t.h..t.t.p.-hphhh..sp.uhspp................................................................................................................................................................................... 0 10 42 60 +2405 PF03317 ELF ELF protein Mifsud W anon Pfam-B_3282 (release 6.5) Family This is a family of hypothetical proteins from cereal crops. 25.00 25.00 25.10 147.50 22.10 22.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.85 0.70 -5.11 2 19 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 18 0 3 15 0 252.30 77 96.73 CHANGED MT.shhR.........hhFSo.St....Fpshhhphp.+h.hI.....shIhF..lCl....Fhh...IhhFlsh..IlLPIlphFu....sSFLITLPPElQDPQALAHLtGLNFYLSLYEQDPtWVsFIQpELNHNTPLEDIPGRL+LFLMEE+hSshR.DlIQEFlALYtR.GshLPlEPYLl-tALRSYLDpI+ATDsFolLQAuYQDLR-pEtGShFFRDsVSHNRDhLEApSutRphlEVEpp.hapcI.+upApLERsEapHsL.lFp.EDh+RthE .MpNh..............VRWLFST.SR....FTsFYhahCIKFPhIY....shILFS.lCl....FhF...lsRFI.h..ILLPIhpLF.u.....uSFLITLPPEIQDPQALAHLAGLNFYLSLYEQDPGWVTFIQNELNHNTPLEDIPGRLKLFLMEEKLSSMRQDVIQEFVALYQRlGPYLPIEPYLVDEALRSYLDHIHATDSFTVLQASYQDLRENEGGSVFFRDAVSHNRDLLEAESSARRCLEVEQRIRWEEIPKSKASLERAEHEHALDLFKSEDLRRELE. 0 0 1 2 +2406 PF02323 ELH Egg-laying hormone precursor Bashton M, Bateman A anon Pfam-B_953 (release 5.2) Family This family consists of egg-laying hormone (ELH) precursor and atrial gland peptides form little and California sea hare. The family also includes ovulation prohormone precursor from great pond snail. This family thus represents a conserved gastropoda ovulation and egg production prohormone. Note that many of the proteins present are further cleaved to give individual peptides [2]. Neuropeptidergic bag cells of the marine mollusk Aplysia californica synthesise an egg-laying hormone (ELH) precursor protein which is cleaved to generate several bioactive peptides including ELH, bag cell peptides (BCP) and acidic peptide (AP) [1]. 19.60 19.60 21.00 20.50 18.90 18.70 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.68 0.70 -5.23 4 25 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 7 0 1 41 0 116.80 44 91.91 CHANGED Mp+Ps.tssssh..llhCLhLSoLCsSSpSsuVHG+sFsspRAVK.pus.lV.SstD......stN.......-t.-csst.h.sss--..psEKpRLphtKRRlRFs+R..c.uth+.h.hpthshSADEN..FDLSN-DGA..QRchRsPRLRFYslRKRAAGs.EpSEspNPETESH...SRRKRSsLT.PSlpSLtpSLESGISKRISINQDLKAIsDMLLsEQhptRcRhLAsLRQRLL-hGKRuSs....VuLhsu-ht.-tRph .........p..............................................................................................................................................................ltK+...t.ptuEt.p.p.cS....Shp.RSh....s.PS......pu.E.th.K.ISIsQDlhs.sp..h.tpht.Rpp..tsLhphhhph............................................................................ 1 1 1 1 +2407 PF00964 Elicitin Elicitin Bateman A anon Sarah Teichmann Domain Elicitins form a novel class of plant necrotic proteins which are secreted by Phytophthora and Pythium fungi, parasites of many economically important crops. These proteins induce leaf necrosis in infected plants and elicit an incompatible hypersensitive-like reaction, leading to the development of a systemic acquired resistance against a range of fungal and bacterial plant pathogens [1]. 25.00 25.00 25.10 26.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.96 0.72 -4.04 87 406 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 40 16 173 325 0 89.40 32 49.01 CHANGED ssCossp.......lsslhsss.h...ssCu.....pss.s.hshhs.s..sossphtshCsuosChsllsslhshs..sD..C.....slshsu..hshpphlsshhstCs ........................sCosop....hsslsslLosssh...spCu.....sDS..G....Yshlss.s.s...h.PTssphphMCuSou.Cpshlsplhuhss..PD..C.....slsh.uGhhhNlhphssshtspCs............. 0 59 101 173 +2408 PF03789 ELK ELK domain Finn RD anon Pfam-B_3136 (release 7.0) Domain This domain is required for the nuclear localisation of these proteins [1]. All of these proteins are members of the Tale/Knox homeodomain family, a subfamily within homeobox Pfam:PF00046. 21.30 21.30 21.40 22.10 20.50 20.50 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.33 0.73 -6.39 0.73 -4.00 28 1671 2012-10-04 14:01:12 2003-04-07 12:59:11 8 7 111 0 131 525 2 22.00 75 11.86 CHANGED ELKcpLh+KYuGaLusL+pEF. .ELKDpLLRKYSGYLSSLKQEFh.. 0 14 79 110 +2409 PF01151 ELO GNS1_SUR4; GNS1/SUR4 family Finn RD, Bateman A, Kerrison ND anon Prosite Family Members of this family are involved in long chain fatty acid elongation systems that produce the 26-carbon precursors for ceramide and sphingolipid synthesis [1]. Predicted to be integral membrane proteins, in eukaryotes they are probably located on the endoplasmic reticulum. Yeast ELO3 (Swiss:P40319) affects plasma membrane H+-ATPase activity, and may act on a glucose-signaling pathway that controls the expression of several genes that are transcriptionally regulated by glucose such as PMA1 [2]. 21.00 21.00 21.50 21.10 20.90 20.70 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.99 0.70 -5.08 181 2001 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 384 0 1301 1987 48 227.10 27 79.46 CHANGED hhhss.h..shhlhshYlh...hV.....................hhGs.chMc....sR..c.......Ph............................pL+thhhlaNlhlslhShhhhhthh........t.hshh..h...........................................uhaphhC...............................................s.tthtts..hssaah..a.lSKhhEhlDTlFhlL+..........KK...ploFLHsYHHsshhhhsahshphh.suu..thhhh.hlNshVHslM.......YhYYhls..uhu...........h+.h..ha..KcalTphQllQFllsh.hhshhshhh....................................................................................................................sC........hh.shhhsh.......hhhhoalhLFhsFah.psY..h+pp....ptp...p .............................................................................ts.h.sh.hhhhhYlh...hl......................hhG..c.hMc..............sR.......p......Pa......................................pL+t.hhhl...aN...hhhslh....Shhhhhthh..................hhthh..............................................shaphhC.................................................s.t.tht.t..hhhh.h..h..h........ahhoK..hhEhhDTl..FhlLR.................KK......ploFLHsYHHsshh..h.h.s.W..........hs.h...p...hh...su.u............t...hhh...h.hlN..hVHslM..........YhYYhlu....uhu...........hp...hhW...Kcal..TthQllQFllsh..hhshhshhh.........................................................................................................................sC...........hhhhhhhhhhhshhhLF.hpFahpsY.hpttt...t........................................................ 1 477 666 1080 +2410 PF02488 EMA Merozoite Antigen Mian N, Bateman A anon Pfam-B_924 (release 5.4) Family This family represents the immunodominant surface antigen of Theileria parasites including equi merozoite antigen-1 (EMA-1) and equi merozoite antigen-2 (EMA-2) [1]. The protein shows variation at a putative glycosylation site, a potential mechanism for host immune response evasion [2]. 36.00 36.00 43.70 94.20 33.00 35.10 hmmbuild --amino -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.62 0.70 -4.90 10 417 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 56 0 2 413 0 240.60 65 91.16 CHANGED K-cKcDLslDVshoSh-NVTlsss...-AsslVhTA+-GaRFKTLKVGDKTLYsVDTSKaTPspsa+LKHs--hah+LsLcsApPlhFKKKGDKEWsEhcaAsYYD-VLFKGKpt.K-LDsSKFsDsuLFoossFGoGKKaTFpssFK.sSKVsF-cK-VGcscpAKaL-VhVYVGuDsKKVVRLDYFYsGDuRlKEVYFcLtD-KWs+lEQs-ANKsLHAMsooWshDYKPlVDKFSPLAVhuuVLIVuuuslYa .t.EEKKDLsL-VsATpsENhTVsso...suNcVVaTAp-GaRF.KTLKVGDKTLYTVDTSKFTPTsAaRLKHs--LaFKLsLppAKPLlFKKKoDK-WVpFsFupYLDEVLWK-KK-.K-LDASKFsDA.uLFsu-AFGTGKVYsFhGsFK.lpKVhFEcK-VGDssKAKYTuVKVYVGoD-KKVVRLDYFYTGDERFKEVYFKLVDsKWK+lEQSEANKDLHAMNsAWPhDYKPlVDKFSPLAV..................... 0 0 1 1 +2411 PF01105 EMP24_GP25L emp24/gp25L/p24 family/GOLD Finn RD, Bateman A, Wuster A anon Pfam-B_803 (release 3.0) Domain Members of this family are implicated in bringing cargo forward from the ER and binding to coat proteins by their cytoplasmic domains. This domain corresponds closely to the beta-strand rich GOLD domain described in [2].\ The GOLD domain is always found combined with lipid- or membrane-association domains [2]. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.10 0.71 -4.57 232 2288 2012-10-03 07:10:23 2003-04-07 12:59:11 19 33 357 5 1454 2124 14 176.60 21 79.83 CHANGED uhphplsss...ppc..Ca....h-pl..pp.sshlthpapl......sss...............................hslshp.........lp.....t.......ps....p.....hlhpp.ptp..pt..t...........pasFsu.p...................................psG.pYph.CFpsp.sphtt...............................ptlphclph.......s..tt..........................htp......hsp..ppclps..lpp............................plpp.......lppplpplpppppah+tREpph+sts-usspRlhhaSlhplhlllshuhhQlha..L+paFp ...........................................................................................................hhh.lss...t....ppc....Ch....hcpl.........tp..ss..hlh..h...p..apl...........pts....................................................t..hslshp..........................lp..................................ss....tp.....................hlhpp..ptp..st....s.........................pas..Fsu..p..............................................psG...p...a.ph.CFpsphsshst...............................ppVp.hclph................u...pt..p..................................................................htp........htp........ppclps..lpt.......................................................................p.l.pp............lpptlpp..lpcp...ppah+t.R..EtphRsts.......EssssR.Vhh....aSlhphhlllshuhhQlhhL+paF................................................................... 0 470 751 1133 +2412 PF04493 Endonuclease_5 Endonuc_V; Endonuclease V Bateman A, Wood V anon Wood V Family Endonuclease V is specific for single-stranded DNA or for duplex DNA that contains uracil or that is damaged by a variety of agents [1]. 20.00 20.00 20.30 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.19 0.70 -4.98 14 1175 2012-10-02 11:25:59 2003-04-07 12:59:11 9 7 1072 10 386 811 48 194.60 45 85.13 CHANGED h.p-lpp+lshpsphpp..ph..chlAGVDlua....cpscuhushVlhshsshcllcptsh.hsclshPYlPGFLuFREh.hhlthlcpLtpc.c......................llllDGpGlhHPR+hGlAoHhGllLshPTIGVAKphLps....shhp.pst.........sshp.lh......suphhGtsht.ohpsspPlalSsGsphslcsslclspphhps.h+lPcssR.ADhho .........................................plt.pp.l.hp.s.ph.s.....t........ss..phluGsDVuFc........psG-lspA.A.hVl......L.c.....a.....P.....o............L.........-.l.V.EhplA...cl..sso..hP.YIPGFLSFREh...PuLLsAh...c....t....L....s....p...+...P.D......................LlhVDGp....GIu............H.P.R...R......h.GlAS..Hh..G..lLl..D..l..PTIGVAKp+LsG....capslssc..............................................Gu.hsP.L.hc...............cGEp.lu.....hV.hR.....o.......+.......s............p.........s..p.....P..........LFlusGHRV.Sl-............oAlshVp+Chps..YRLPEPTRh.ADth.u...................................................... 0 138 250 323 +2413 PF02945 Endonuclease_7 endonuclease_7; Recombination endonuclease VII Finn, RD anon Structural domain Domain \N 23.60 23.60 23.80 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.62 0.72 -4.27 16 283 2012-10-05 18:28:12 2003-04-07 12:59:11 10 5 233 14 41 269 148 84.50 31 34.34 CHANGED tp+htthYsl....ohccht.tlh-tQsG..pCtl..Cp....s..tpsh....slDHDH.....csGh.........VRGlLCssCNp.hlG+..h................tDs.chlpp..hhsYLcs ............................t...........pph...tlhptQ.........su...pCsl..Cpt.........ht...t..tp.ph..................slDHDH..........pTGh...........VRG.lL..CssCNt.sl.Gph...............tDs......phhpphhpYLp.t................................... 0 8 21 40 +2414 PF04231 Endonuclease_1 Endonuclease_I; Endonuclease I Kerrison ND, Finn RD anon COG2356 Family Bacterial periplasmic or secreted endonuclease I (EC:3.1.21.1) E. coli endonuclease I (EndoI) is a sequence independent endonuclease located in the periplasm. It is inhibited by different RNA species. It is thought to normally generate double strand breaks in DNA, except in the presence of high salt concentrations and RNA, when it generates single strand breaks in DNA. Its biological role is unknown [1]. Other family members are known to be extracellular [2]. This family also includes a non-specific, Mg2+ activated ribonuclease precursor (Swiss:Q03091) [3]. 25.00 25.00 26.30 26.00 20.20 20.20 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.59 0.70 -4.51 55 1440 2012-10-05 18:28:12 2003-04-07 12:59:11 8 32 1199 11 288 962 273 210.20 40 70.17 CHANGED pohpphK.pLts...................psY..s...........................sss.hshYsGsshstp...........pu..sppsps..........schphEHV.......hPtpphupppp....................................pthpu.DlHpLhPssGpVNusRuNasFu.hs.................sss......phGpsshtssa.......cscphEP.cp.s+GslARshhYMstRY.s..............lclsctpp...............plhttWs+p.PVssaEppRNptIhp.hQGN+NPFl....spsp ........................................................t..t.hhhhh................................................................ps....css.....ss.hY..CGsphshpsp........thscht...sCGY.p.s.R.KspsR..........usRlEWEHV.......VPA.pFG+pppCWpp......G...............GR+sCtp....s..appM-o.DhHNLpPul.GE.....VNGDRuNa..au.phs.....................sGt......pYGp...Cs...hplcF...............Kp.+tsEPssc.s+GsIARsYFYMpspY..s.....................LpLSc.pps........................pLhpsWs+p.aPVos.WEp..cRs.....p........+Itc.......lQ.G.N.+.NPaVpcsp................ 0 90 164 237 +2415 PF04667 Endosulfine endosulfine; cAMP-regulated phosphoprotein/endosulfine conserved region Waterfield DI, Finn RD anon Pfam-B_4454 (release 7.5) Family Conserved region found in both cAMP-regulated phosphoprotein 19 (ARPP-19) and Alpha/Beta endosulfine. No function has yet been assigned to ARPP-19. Endosulfine is the endogenous ligand for the ATP-dependent potassium (K ATP) channels which occupy a key position in the control of insulin release from the pancreatic beta cell by coupling cell polarity to metabolism. In both cases the region occupies the majority of the protein [1,2]. 29.80 29.80 31.20 30.00 29.70 29.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.95 0.72 -4.02 12 485 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 254 0 291 440 0 86.40 28 63.05 CHANGED slshuphp.pE..............+lhKhYGtLsspKsh........Lp+Khpc.RKYFDSGDYAhtKuts.pspt....s.tt.t..hssssthccshh++thsuSss .....................................................t.......p..-Et................+..lhthY...G.pL...ssKtsh..............................................Ltp+hpc...+KYFDSGDYshuKAtht...s.pphs..........................................ss.................................................... 0 71 139 223 +2416 PF00322 Endothelin endothelin; Endothelin family Finn RD anon Prosite Repeat \N 20.50 20.50 25.30 20.50 19.70 20.30 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.73 0.72 -4.71 8 186 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 53 9 79 193 0 29.50 68 19.48 CHANGED Rsc.hCSCsshpDKEClYFCHLDIIWhNTss .....Rs+.RCSCsohhDKECVYFCHLDIIWlNTPp.... 0 3 9 26 +2417 PF00555 Endotoxin_M endotoxin; delta endotoxin Bateman A, de Maagd R anon Arne Eloffson Domain This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding. 25.00 25.00 28.80 27.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.47 0.71 -4.49 36 401 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 48 8 1 429 0 200.00 30 21.33 CHANGED TpspLTRElYTDPlstss................sshs....oFsslEsshlRsPHLh-aLsplplaTst.pt........hpaWuGpplphphots....shlppshaGsps..stsstsls....ssps....lYRshopsh..h........ssshtulptspFhhssspttthssshhp......sss..shDohspLPspssp.........s.hpsYSHpLSalphhttphsp.t........hPsauWTH+Ssc .........................TsoQLTRElYTsPlhtshst................psh......ohpslEsshl..RsPHLhDhLsp.lsIYT.st.pps............htaWuGHplthp.sss....sthshshaGshss..stssppls....hsps....lYRThSss..hh...th...........sp.ltslstscF..th....sss..thssshYt............tss....shDShsclP.Pp.s.ss.........Ps...h...puaSHRLSHlsh.hpts...tss.............sssF......uWTH+Ss-................................................... 0 0 0 0 +2418 PF03944 Endotoxin_C endotoxin_C; delta endotoxin Bateman A, de Maagd R anon Arne Eloffson Domain This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding. 21.00 21.00 21.40 21.40 20.70 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.98 0.71 -4.07 80 512 2012-10-03 19:46:52 2003-04-07 12:59:11 9 13 62 9 5 538 2 134.80 32 15.34 CHANGED ITQIPsVKuhpl.t.s...susVlcGPG.aTGGDll.hpssssh......hphpls......sshsppY.+lRlRYAS.ssshphtlphssssh........shshssThss.....st.....hp..Yp.sFphhshss......shsh.sssp.tplpl......thpshsss..splhlD+IEFI..Pls ................ITQIPh.lKu.tl..s...s.....ssoVlpG.......PG.aTGGDllphpsssst............uplpls.....p.sshup+Y.RlRlR...YA.S...s.ss.....hphplshsspshs........phshssThs..s.....sss........lp....hs.s..Fphhshss........shsh..ssss....hsl.......shpshsus...splhID+IEFlPsp................................................................... 0 2 4 4 +2419 PF03945 Endotoxin_N endotoxin_N; delta endotoxin, N-terminal domain Bateman A, de Maagd R anon Arne Eloffson Domain This family contains insecticidal toxins produced by Bacillus species of bacteria. During spore formation the bacteria produce crystals of this protein. When an insect ingests these proteins they are activated by proteolytic cleavage. The N terminus is cleaved in all of the proteins and a C terminal extension is cleaved in some members. Once activated the endotoxin binds to the gut epithelium and causes cell lysis leading to death. This activated region of the delta endotoxin is composed of three structural domains. The N-terminal helical domain is involved in membrane insertion and pore formation. The second and third domains are involved in receptor binding. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.47 0.70 -4.96 38 632 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 108 9 79 668 0 213.70 28 25.79 CHANGED lpsulslsuhlLush.h..PhuGhlhuhht.LlshlWPstsss....WchFlpplEpLIc.Q+Ischs+spAlucLpGLsssac.....hYhpuLcpWcpsssssps......pptlcppFpsh-sthpsulPtFslp..sh....p...lsLLslYAQAANLHLhLLRDuslaGp.cWGhspssls.......paYsc.hchhpcYosHClpaYNsGLspL+.........sos...hpsWhcYNpFRREhTLsVLDlVALFPsYDsRt ..................................................................................................................h...shslsthlLu.h..h........P.h...sG.h.l...h.s..ht.llshlW.....st......tsss..........Wc.thhp.plEpl...Is....Q+Ipp.hsp....spshucL....p.G.Lpshhp....................Yt.pshcp.W..p....ss.ssss......................tptlpppapshpst.hhst.l...P....Fthp....sa.............p.........l.LL.slYsQAANLHL.lLRDs...laGp..cW...G...hs....sssls.............................shhs.c........hpphhtpY.osas..lp.hYssGLpplt...............................tss....tpWhpa........s...p...aR+phTLtVLDllul.F.s.YD...t.................................................................................... 0 46 71 76 +2420 PF03272 Enhancin Viral enhancin protein Mifsud W anon Pfam-B_4236 (release 6.5) Family \N 19.50 19.50 19.60 19.60 19.40 19.40 hmmbuild -o /dev/null HMM SEED 775 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.10 0.70 -6.62 14 631 2012-10-03 04:41:15 2003-04-07 12:59:11 8 24 223 0 27 514 0 333.10 25 62.04 CHANGED hs.hsltlPslshPsWlpsss.shhulcHt+pPlshlltAushl+lRpsps........lTlclLNsscpTEcolss..sss.hphssptsSVsFVcpsh.sstssthc..VpaplsuphpsLPhYphGpsp..t.Fhsphs..spsssaual-hchlplLVP.sD+stlp...ttshslspLtsaYssIlpaYssLsGh........sptNh.s++YFsKADt.uGsGuAYYuptahApSssolp.haLpsossNWhsLHEIGHuY-htFs.spthht..EVWsNlhsDhaQYthhs.sE+.ppuhlYpsGp+spl-psIhshlsssh.sassWshhp+Lhhho.lh.ppuGcchhpphppphRph.pshshs.ppaplhshlssh.....sphDls.hhpLh.th..s.........................thp.tshhhtppshYPlppllssa-hht.....l+....hposasLVsssphht.....sslolphpIc-ss.......QIhGphhtlhcGschlhpsslssssphhhs.tlssGlYplhhPpGp.s+RYpls..............spYllVcsss.p.........hplpap.hstSslhsc.phplhGhsD.tlsAshhlshtpcplslplhsssPpstFsNphYaplsIcsssshph..hsh.t.sss..tpshhshch..ssshplhlacpcsspph.....ahssh.ss.psssahloppGlpptss.....ssttpplhs+IsphstaLsscsshLh.hpspl+DsIYLuhphl.sppp..ptLhppascalP....ph.ss.......pshshshhGhsphshlplphshshppsplhhtss.s.....t..hp.....Yhslph ...................................................................tphs.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................phphpGhuD.hphApls.h.spppc.c.hplslp..s..s..pPHsYFss..pYuoIpVhsps..Gphlap+chpG..s.ppp.ts.p.pp.hshp....sshplp.....laH.....sEs.t....Rlph.......shtsph.ppp.....Kpt..h.ap..lTppG.Lcp.p........................................................................................................................................................tp............................................................................................. 0 0 8 14 +2421 PF03386 ENOD93 Early nodulin 93 ENOD93 protein Mifsud W anon Pfam-B_2931 (release 6.6) Family \N 25.00 25.00 33.50 26.60 19.90 19.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.65 0.72 -4.27 8 99 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 28 0 48 83 0 75.50 65 70.20 CHANGED QKhAhAKpCS+EushAGsKAAAVAsVAoAlPTLASVRMlPWAKANL.N.TAQALIISoAAGhAYFIsADKTILusARKpS .........Q+lAhAKcCS+EuslAGsKAAAVAolASAlPT.........LASVRMLPWAKANl.N.TuQALIIoTsAGhAYFIsADKpILuhAR+pS............ 0 3 24 36 +2422 PF00113 Enolase_C enolase; Enolase, C-terminal TIM barrel domain Sonnhammer ELL anon Prosite Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.72 0.70 -5.45 11 7258 2012-10-02 01:07:48 2003-04-07 12:59:11 17 14 5866 108 1661 5284 3499 256.00 52 66.97 CHANGED shlLPVPhhNVlNGGSHAGNcLAhQEFMIhPoGAsSFsEAhRhGuEVYHsLKullKtKaG.sAsNVGDEGGFAPNIpoNcEAL-LIs-AIpKAGYpG..KVpIuMDVAuSEFY..c-sKYDLDFKs.psc.S+hlou-pLsDhYcElspcYPIVSIEDPFDpDDW-uWsphTtphGpclQIVGDDLTVTNPKRlpcAIEcKssNuLLLKVNQIGolTESlpAschApcAGWGVMVSHRSGETEDoFIADLsVGLssGQIKTGAPCRSERLAKYNQLLRIEEELGupAhYAGcsFp+sh ...................................................................................................................p.hhLPVP..hhNlING.GsHA.s.N.s.l.s.h.Q.E..FM.Ih.Pl..GA...p.oFpEAlRh..GuElaH.s.L.Kp.lL..........K..........p..............+G................h.........sT..u............VGD...E................GG................FAP................s.l.........t.........s.........s...............c.-...............A..L.phIh.c..AIct...AG..YpsG............pclhl.......uh....DsAu.S.E....F..Y.............c...s.....G.....p.....Y.......s.h.....p.......t....c...s.........................................................t..hh........o................upchscahpp.Ls.p....c.Y....P....I.lS.I.EDsh.......sE....sD..W-GWthhT......pp.l...G.......p...+...lQ................lVGDDLF....V.TN.s.c.hL....t..c..G.I.c..p...t.....hu.Nul...L.....lKlN..............Q...IGoLTETlpAlchA.+c..sG...a.sshlSH.R....SG..E..T..EDo..hIA..Dl.AVuhssG...QI..KT.Gu..uRo-RlAKYNQ.Ll...R.IE-t.L...u..p........A.........a...G.t....h.......................................................................................................................... 0 546 1007 1367 +2423 PF03952 Enolase_N enolase_N; Enolase, N-terminal domain Sonnhammer ELL anon Prosite Domain \N 21.10 21.10 21.40 21.70 20.90 21.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.46 0.71 -4.22 190 6649 2012-10-02 11:54:41 2003-04-07 12:59:11 11 15 5507 108 1614 4840 2520 125.70 58 30.79 CHANGED pIpplpAREILDSRGNPTVEV-VhhpsG.hG......RAuVPSGASTGppEAlELRDuD.....p+ahGKGVhcA......VpNVNshIussL.l..G.hc....st-QptlDphhl.cLDGTp.....NK..u+LGANAILuVSlAsA+AAAsshs.lPLYcYl .............................................................IhclhuREllDSRGNPTVEs-Vhh...-....s.G....h..hG....................RAsVPSG..AS...T....G...p..........+EA.l...E..LRDGD....K..............sRYhGK.GVhKA..............VpNVNshI...Ass.l..l....G..hD...............sp-Q..ttID.phMI.c.LD...G..Tt........NK.....u+LGANAILGVS.L.....AlA+AAA.sttslPLYcYl....................................... 0 524 978 1328 +2424 PF03735 ENT ENT domain Bateman A, Hughes-Davies L anon Bateman A Family This presumed domain is named after Emsy N Terminus (ENT). Emsy is a protein that is amplified in breast cancer and interacts with BRCA2. The N terminus of this protein is found to be similar to other vertebrate and plant proteins of unknown function. This domain has a completely conserved histidine residue that may be functionally important. 20.70 20.70 20.70 22.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.12 0.72 -4.25 20 289 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 104 5 169 275 23 69.90 40 10.89 CHANGED hcsp..l+pLEp-AYuuVL+AF+AQus.lSh-Kcplls-LR..................KELplSs--HRphlp+lssD-plpplRctppusss ..........ct.l+pLEhEAYsuVlpAh+AQu-..L.ohEKcsLlsELR..................+pLpI.SsEcHRt.lp+sssD-hlppItc.hts...s........... 1 42 89 127 +2425 PF01375 Enterotoxin_a Enterotoxin_A; Heat-labile enterotoxin alpha chain Bateman A anon SCOP Domain \N 19.60 19.60 19.90 20.10 19.50 19.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.63 0.70 -5.09 6 164 2012-10-01 23:25:29 2003-04-07 12:59:11 12 6 82 29 50 164 0 208.20 43 47.94 CHANGED hsplhhFFI...hlu....ANs..haRADSRoPDEl+cSGGLhPRGpsEsa-RGTslNINLYDHARGTsTG.sRasDGYVSTohoLRpAHLhGQshLuuascYYIYVlAsAPNhFDVNGVLGs.YSPaPsEsEsuALGGIPaSQIhGWYRVs....FGsl-st.hcRNR-YRc-hacsLssAPupDGYpLAGFPssaPAWcEtPWtpa.AP.uCssss+ppssssCsptsspLupttLt-app+lKRplshh..hS..ps-hhusssh+DEL .....................h.......................t...lYRu.D.s.RsP--I+puG..GhhPR.......Gpschh.p.cs.Tph...sh.......sLa-..HspGs..ps..........GhsR...h.s.D.GYVSTo......ho...lc.s.A..+..h.hu.p..s..hlu...s...hs......s......h..YIYsI...A.s.A..PN.MhsV............N...c....s...L......Gs..Y........SP..aPt.EpEhuALGGIPaoQIhGWYRVp...............................aG.sl-p.....t..hp+NctYcs..ch.a.pshs.h.Asuts...tLAG.FPs...pa..AWc.-cPWhpa..us.ss.t.p.t....c.s...sssCsttppphs.h.h.phpphh+.p.................................................................................................. 0 3 37 40 +2426 PF01376 Enterotoxin_b Enterotoxin_B; Heat-labile enterotoxin beta chain Bateman A anon SCOP Domain \N 25.00 25.00 80.60 80.50 21.30 19.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.57 0.72 -4.16 5 107 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 28 276 1 108 0 95.80 92 83.02 CHANGED TPQNITDLCAEYHNTQIYTLNDKILSYTESLAGKREMAIITFKNGATFQVEVPGSQHIDSQKKAIERMKDTLRIAYLTEAKVEKLCVWNNKTPHAIAAISMA .TPQNITDLCAEYHNTQIaTLNDKIFSYTESLAGKREMAIITFKNGATFQVEVPGSQHIDSQKKAIERMKDTLRIAYLTEAKVEKLCVWNNKTPpuIAAISMt. 0 1 1 1 +2427 PF02048 Enterotoxin_ST Enterotoxin_HS; Heat-stable enterotoxin ST Mian N, Bateman A anon IPR001489 Family This family consists of the heat stable enterotoxin ST from Escherichia coli. ST is a small peptide of 18 or 19 amino acid residues produced by enterotoxigenic E. coli and is one of the causes of acute diarrhoea in infants and travellers in developing countries. ST triggers a biological response by binding to a membrane-associated guanylyl cyclase C which is located on intestinal epithelial cell membranes [1]. 21.10 21.10 23.10 22.70 19.90 19.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.23 0.72 -3.71 6 34 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 13 3 0 35 0 45.90 57 76.04 CHANGED QETsShphuDu.SssIssEl.cKtCDs.psss....Es.sDW..CCElCCNPACAGC ..................................KtpIsh.s..ccCsh.Kpss....EshNsh..CCElCCNPAChGC 0 0 0 0 +2428 PF01417 ENTH ENTH domain Bateman A anon [1] Domain The ENTH (Epsin N-terminal homology) domain is found in proteins involved in endocytosis and cytoskeletal machinery. The function of the ENTH domain is unknown. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.49 0.71 -4.26 72 1127 2012-10-02 18:21:09 2003-04-07 12:59:11 15 25 307 14 691 1760 5 121.20 40 23.47 CHANGED hophphcVR-ATss-.sW.GP..osshhp-IuchTas..pph.clhshlh+Rl.........................................................s...cps+pWRplhK.........uLpLL-YLl+sGSE.....pslpph.+pplhhlpsL.ppFpa.h-p.pG+DpGhs............lRp+ucplhsLLpDs-pL ................................................................ophphKVR-ATs.sD.PW..GP.ou..s.l.M.s....EI.....A.....c.h...T........a......s.....h.....ts........a.t..E......I....Msh...la...+...RL............................................................p..-puKsWR+lYK..............uLpLL-YLl+sGSE........+V.s.p.ps...+c..s...l...a....h....I..psL....c..........sF....pa.lDc..pG+DQGhN............VRp+uKpllsLLpD--+L........................................................ 0 203 350 539 +2429 PF00429 TLV_coat ENV_polyprotein; ENV polyprotein (coat polyprotein) Finn RD anon Pfam-B_145 (release 1.0) Family \N 22.50 22.50 22.60 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 561 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.24 0.70 -13.00 0.70 -6.06 16 3200 2009-01-15 18:05:59 2003-04-07 12:59:11 14 13 221 9 70 2514 0 268.00 25 83.52 CHANGED llLhlLhh.........ht.ssP+pshslh.plhs.................phhP.h......shCsLuut..sshshsspss.sss..s.spsuhhss...............p..Csps.ph......stttstshYlCPspspsh..stpphGh.pssYs..sthtCppsG.....psYWpss.ohsh.olppsto.....................ttspssptpstsssLhlpFopsGppu....pshsWuhR..lahS.GtsPhhhhslphhhppl........p.lltpp.tPPsp.pPht.Phss......................................t.ssssphlsLlpusa.sLNhTsPshsp-CWLCLstu..PPaatulus.sshsspTsss....C.ssspatLp.spsos.shhhu.......sl.....PhsatshsN............ps...tttss..hLssssGohasCssshT.ChpTsl.NhooshClLspLhPclohhss-....l.s..psssRh+Rt.slulTLs.LLsGL...GIuuuluTGsTuL.Vu.....sppappLpttlcsDlptLppoIssLccsLsSLuEVVLQNRRGLDLLFhcpG..GLCtALpEcCCFYAs+oGlVRDphtKLpE+LppRp+lhpupthW.pGhhshSPWhpsLlpohhGPLllLLLlLhFGPCILN....+LspFl+ ...........................................................................................................................................................h......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ph.thtt...tpp...................................................................................................................................................................................................................................................................... 0 14 16 49 +2430 PF00811 Ependymin Ependymin Bateman A anon Pfam-B_1391 (release 2.1) Family \N 25.00 25.00 26.20 25.30 24.20 24.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.67 0.71 -4.45 20 196 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 88 0 108 182 0 130.50 28 57.16 CHANGED lEcssphNpopph.DlLlhFcE..GVhY-IDt+NcoC+KpsLppph+Ph-lPssATa.sEhhlGussh.tpGLcVchWsGc..lP-p+........GpYohpoTphGClPVops.Yps-ps...s.LhhSFa-lpstlcDP.pVFsPPu.hC ................h.........stph.-hlhhac-..GlhYpIs.pscpCpKpsL..p.p..sa...cPhclPpsAoa.....sphhlGus....tpulhVppWssc...hsstp........utY...s.s.p..s.s..s....ClPVpps...ahs....sps...........shlh..h..p..Fhslph..GIpDP.sVFsPPs.hC................................... 0 61 68 92 +2431 PF01404 Ephrin_lbd EPH_lbd; Ephrin receptor ligand binding domain Bateman A anon [1] Domain The Eph receptors, which bind to ephrins Pfam:PF00812 are a large family of receptor tyrosine kinases. This family represents the amino terminal domain which binds the ephrin ligand [1]. 19.80 19.80 19.80 20.60 18.70 19.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.49 0.71 -4.48 33 1131 2012-10-03 19:46:52 2003-04-07 12:59:11 14 44 89 57 480 825 1 167.20 52 19.47 CHANGED EssLLDopsspu-L.GWhsaP.s.G.........W-EloshD.-ptpslRTYQVC..sV....hcssQNNWLRTsaIpR.psApRlaVEl+FTlRDCsShPusss..oCKETFNLYYhEoDp............sss...sshpcstatKlDTIAADEu.hsphc...........hssps.h+lNTElRslGP.....Lo+.+GFYLAFQDhGAClALlSVRVaYK+C ...............................................sLhDopt.t.u-L......uWhs.P.p...G...................WEElSthD.E.phssIRTYQVC..NV.......h-.ssQNNWLRT...saI.sR....cuA.p......R.......lalEl+FT...l.R.DCsS..lP..ss...h..G.....o....CKE..TFNLYYhEoDp...............s.ts....sthpEs.a...........hK..l....DTIAADES.FophD....................hGsRh.hKlN.TElRslGP................Lo+.+G..FYLAFQ.DhGAClAL.lSVRVaYKKC......................................... 0 50 89 238 +2432 PF01370 Epimerase NAD dependent epimerase/dehydratase family Bateman A anon Pfam-B_93 (release 3.0) Family This family of proteins utilise NAD as a cofactor. The proteins in this family use nucleotide-sugar substrates for a variety of chemical reactions. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.13 0.70 -11.31 0.70 -4.93 96 38903 2012-10-10 17:06:42 2003-04-07 12:59:11 16 146 5462 329 11522 75709 43322 233.20 20 69.25 CHANGED lLlTGssGhlGutlschLhppuhc.........shshtppts..........................h..sDls.cpsslpchhppt...psDt....VhphAAtst...lttsh.ppstthhcsN....hhsshpll-uhp.........phsh.........h+hlhsoS.uplYGps..tt.....................h..sEsssh...tPhsP....YuhuKhhupthstsh......pcp..a.shpssshhhhNhhGPt........t.tthss+hlsthlpphh..........pup..................................lhhhG.......sGss.tRDalascDhscAhhhhlppsp.............spsaNlG ........................................................................................................................................lLlT.G.u..s..G...a.......l...G......u..p...l....s......p..t....L......h..p......p....G.hp......lh...................shs..t.s..t..p..t..t.......................................................................th.p.h..h.......s...D.....l......p......c.........t.......s.........t......l.......p.....p....h....h...p...pt........................D..s.......................V....h.......H..........h.........A........u......................s............h..........s...........t...........t..........s..........h........p..............p........s..........t.........t.........h.......h.....p.....s....N.......................l...h....u....o....h.....s........l.....l........c.....s....sp............................................................p.t.ss..............t+..h..l...a...s....S.......o.....s........s......l........Y...Gps.........tp.h....................s................................................................h.....s.E...s.psh............t.P.h.ss.....................................Y..u...h.....o....K..........h....h....s..E.......p....h....s...p...s...h...............................t...c...p......h......s.........h.....p........h.......s....h....h....R....h....h...s...s....h....Gst.......................t......s...p...h...l.....s....h...h...h...p...t..h...h.......................................................t.s.p....................................................................hh.l.h.G............sG.....p....t.....h....R.....s..........a....l......a..l.pD...h....s...pu.h..h....h....h..h..p.p.t....................t.hsh......................................................................................................................................................................... 0 3673 7370 9810 +2433 PF02350 Epimerase_2 UDP-N-acetylglucosamine 2-epimerase Bashton M, Bateman A anon Pfam-B_888 (release 5.2) & Pfam-B_4862 (Release 7.5) Family This family consists of UDP-N-acetylglucosamine 2-epimerases EC:5.1.3.14 this enzyme catalyses the production of UDP-ManNAc from UDP-GlcNAc. Note that some of the enzymes is this family are bifunctional such as Swiss:O35826 and Swiss:Q9Z0P6 in this instance Pfam matches only the N-terminal half of the protein suggesting that the additional C-terminal part (when compared to mono-functional members of this family) is responsible for the UPD-N-acetylmannosamine kinase activity of these enzymes. This hypothesis is further supported by the assumption that the C-terminal part of Swiss:O35826 is the kinase domain [3]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.11 0.70 -5.68 187 4483 2012-10-03 16:42:30 2003-04-07 12:59:11 14 13 3029 20 922 3343 2891 333.90 34 89.01 CHANGED ptlppp..t.....hchpllsTGQHh.pchspphhc............tapl..ps-hp....Ls..pss.tshup.huphlhthpcllpc...pP.DhVl...VhGDssoslAuAlAAhhhpIP.luHlEAG.Rs.........................-hp.s.hPEEhNRphss+luclHFssTppupppLlpEG....h..........ssp.....plassGsssl...Dsl......hhstppltt.........sl..p.............hppphlLlThHRtpshsts..hpplhpslpsls...pp..ph..llashH.sPpspchl.............phlpph.s.......plplhpPLsahcalpLhppuphl............loDSG.GlpcEAss..ls.....hPslsl..R..sER.EuhttGs...shl.l.u..s.pppIlpulpphlts.....tthpthsp....PYG.-GpA.uc+Ilchlt ...............................................................t..htpp..th-thlssTuQH..ccMh-p...lhp............hFpI....p......sD.hs.Ls.......pss..psLs.ch.......Tup....hlttlc.pllpc............cP.DhVL...VaGDTs..oohAsuLAAh.h.............ppIP..VuHlE...AGLRoh........................-hh..s.h...PEEhNRplsspluc...laFu...PTcpu+.pNLlp.E.uh.....................spp..........pIalT.GN.ss.l.DuL..............hhs.h.pp..h.....tt.....................plhtp.....................tsc+hlL.l..T...s....HRc..E.....N.h..s..p.s.....hc.p...lhpA.l.p.pls...pp....cht..llaPhH.....sP.p.s.pch.l.............pc.h.ltph..p.......plpLl-.P.l..sa.h.c.Fh...tLh.....p..cu.h.hl.............l...T.DS.....G..G..l.Q.E.EA..Ps..LG.....pPsLll.........R....s.s..TE.RPE....u......l...p...s..G.T......stL......lG....sc....tppIh.ptsppLlpc...........ppth.pp.hsp..stNPY...G..D.G..p.A.upRIlchl......................................................................................................... 0 306 604 780 +2434 PF00758 EPO_TPO Erythropoietin/thrombopoietin Bateman A anon Pfam-B_990 (release 2.1) Domain \N 25.00 25.00 25.90 25.90 23.50 19.90 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.96 0.71 -4.61 6 114 2012-10-02 01:28:15 2003-04-07 12:59:11 13 1 54 9 46 138 0 150.00 39 69.31 CHANGED sRLICDSRVLERYlLEA+EAENsTMGCuEussLuENITVPDTKVNFatWK+.EspppAlEVWQGLALLSEAlLpuQAlLANSSQssEsLQLHsDKAlSGLRSLToLLRsLGsQpEthSsP-sss..sAPLRslssDslsKLFRVYuNFLRGKLpLYTGEsCRRGDR .....t.lCDsRVLp+alhEA+-AEsthhsCsEsssLspslsVPsTclsahtWKphphp...ppA.EVhpGLsLLtEAlhtupu.LusostssthhQLHsD.pu...lpsLtSLpuLL+sLss..ptt...hos.....scs.....ss..hhshp.phhhsshpcLhplhsshLpuKhtL.sst..sspptt................................. 0 2 4 11 +2435 PF00275 EPSP_synthase EPSP_syntase; EPSP synthase (3-phosphoshikimate 1-carboxyvinyltransferase) Finn RD anon Prosite Family \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.42 0.70 -6.05 23 10932 2012-10-02 15:27:11 2003-04-07 12:59:11 15 37 4830 176 2360 7857 8210 390.10 27 90.26 CHANGED lpsssplsGpV.plPGsKShosRslllAALusu.....oplpNlLcucDspphlpsL+pLG..hhphppppsshhspG.htphpss....t.hslhhGNuGTshR.Lsuhhuh.ts........lsLsG-splppRPlspllcuL+phGAcIphppstshsPltlps...hphtslclpssluSthlTuhLhhAshhAcu..sshlps..hsscPplshThphlpphGsplcs.sstp...hhlcGspp...hsu.pahV.uDtSuAuaFlsAAAlssG.pVtlpslshsslp.u...hlt.hLcchG....upls.......hs-stslshtt.........hcshsl..slpshsD.uhslAhhAtFAp.......usoplcshtplRhKEo-RlhuhusELp+lGucscEt............DGh.......psshh.....LpsuclpoatDHRhAMuhuLsu..sps.........tshIccspshs+oaPsah-pL ......................................................................h..s.p.LpGpl..plsG.tp.ushsll.h.A.u.L..h..s.sp....spl.p.s.l...p..h........pDlt.shhph..lp...tLG..............s........p..l....................t...................................t..............s..............s...........s........h.............h.....h........s...........s...s..........t..........h.......t.....s.........h.p............ss...........ph.............s...h...p....h...tsS...hhshts.Lhuthut..s........................plsl.sG...ss.s........l.u.....p....RP.........lchh..lcuL.c.t.h..G.A.p..l.......p........h......p......p......s.........h...........h.ss.hp...........................tpL.p..usc........lh.h...s...h.....s...S........st..sTtslh.h...Au..hh..AcG..............pT.hlp.....s.....hspcPp..l.sc.htphLpp.hG....u..c....l.........p.....s.................s...s..sp.......................lp.l..........p....G......spp.............Lt.u.......t.p...a..pV.s..Dt.p....Auha..l..s.A...........A.........Al........s....s.............G...c..............lhl....p.s.s.......hh.......p.......php............hlh.hLcch.G............splp.......................h.t.c.........s..h..lt.hptt....................hc.h.t..s.l.......sl.p...s.....hs...cs.uhsh..shtu.hst................................hh.shhc.Gsu..lp.p.h.-.phhh.hss..ELp..+h..G.A.c..l.p...ps.................cs.hh...................................................hh.sst....................L..p..G.A..p.V....t..u.h..DhRhuhuh.slAuL...hu.p.G...........s.plpch.tpls+uYsshhccL............................................................................................................................................ 1 809 1563 2026 +2436 PF03736 EPTP EPTP domain Staub E, Mistry J anon Staub E Repeat Mutations in the LGI/Epitempin gene can result in a special form of epilepsy, autosomal dominant lateral temporal epilepsy. The Epitempin protein contains a large repeat in its C terminal section. The architecture and structural features of this repeat make it a likely member 7-bladed beta-propeller fold [2]. 20.60 20.60 20.80 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.87 0.72 -4.21 71 1712 2009-01-15 18:05:59 2003-04-07 12:59:11 12 64 48 0 784 1271 21 45.40 22 22.68 CHANGED ppFhphQsls....psshshcshsl.sschalllup.hu..top......lhcWss ............pFhpaQsls....hpsshshchapl..s......s......c.............alslus.hs.......top.............lacWs........................ 0 61 129 324 +2437 PF01133 ER Enhancer of rudimentary Finn RD, Bateman A anon Prosite Family Enhancer of rudimentary is a protein of unknown function that is highly conserved in plants and animals. This protein is found to be an enhancer of the rudimentary gene Swiss:P05990. 25.00 25.00 25.70 34.10 20.90 24.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.11 0.72 -4.32 13 234 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 144 7 154 192 0 97.70 53 65.02 CHANGED MSHTILLlQso.p+h-oRTasDYESlspshEGlC+lYE-+LKchNPssssITYDISQLFcFIDsLADLSsLVacpsTtoYhPasKpWIKp+lYthL+ppAt..p .................oHTILLlQPs.p+.-sRTYsDYESVs-CME.GVCKhYEE+LK+..h.....N...P......sssoITYDISQLF-FIDsLsDLSCLVYctsTp............o.YtPaNK-WIKEKIYhlL++pAt..s................... 0 44 65 117 +2438 PF00810 ER_lumen_recept ER lumen protein retaining receptor Bateman A anon Pfam-B_1387 (release 2.1) Family \N 21.50 21.50 21.50 21.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.90 0.71 -3.71 61 726 2012-10-03 12:15:12 2003-04-07 12:59:11 13 9 338 0 498 703 22 138.40 43 59.98 CHANGED SCsGlSh......KoQhLYslVFssRYlDL..ap.sah...ShYN......slhK.....lhalsoohhslYlh..hhpa+t...TY-cph..DoFplp.aLlssshl...........................................LAllhs.....ppa......phh-lhWoFSlaLEuVAILPQLhhlp.+o.scsEslTu+YlhsLGhYRsLYlhNWIaRY ...........................SssG.lShKoQ.LaulVa.......s.s.RY.LDL...ap.sah..........ShYNo..hhK..................lhalssohh.slYhh...hhca+s.....TYDtsp.....DoF+.........l.........t.a.........LllPshl...........................................L.uhlhs.........pca..........................shhEl..lWoFSlaLEuVAILPQLhhlp..+T..u........c....sEslTuHYl.FsLGhYRsLYlhNWlaR...................................... 0 174 285 401 +2439 PF02732 ERCC4 ERCC4 domain LOAD anon LOAD Domain This domain is a family of nucleases. The family includes EME1 which is an essential component of a Holliday junction resolvase [2-3]. EME1 interacts with MUS81 to form a DNA structure-specific endonuclease. 20.70 20.70 20.80 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -11.07 0.71 -4.43 159 1301 2012-10-11 20:44:43 2003-04-07 12:59:11 10 39 620 18 812 1250 227 143.70 22 21.21 CHANGED lDsRE..hp.st............l.phlpphu..h.phphpsLt..........lGDalh.......ts.......................................phhlERKs.hs..Dhss.....Slh...c..sR..hhpQh......pcLp..p............thpp....s.hlllEsptthhtt.....................hp.............pss.........................sl.....................psslsplplp.h.sltlhhops.hp-ouphltph ............................................................................lD.RE...hp.st..............l.phlpphs......l..phthp.p.L.s..............l.GDalh......ss.................................................................................................phhVERKs.ls.......DLhs.................Slp.........s......uR.hhpQh....................tclp....p.................thpp....................s.hlllE...tpt.st.h.p..............................................................p...................................................sl.......pssLsplplth..thtlhhops.pposphlh........................................................................................................... 1 273 458 660 +2440 PF04404 ERF ERF superfamily Aravind L anon Aravind L Family The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to ERF [1]. 25.00 25.00 25.00 25.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.21 0.71 -4.70 43 704 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 504 0 89 596 339 157.60 21 65.36 CHANGED hhpshsplpppl.t.pKs......sphhph.....pYt..shc-I....lcsl+sllscpGl.hh.hsp.............................spthlplpshlhc.tsu.t-phps.h.tp.......s.sKs....ssQssGouloYA+RYsLsuhFuIsscpDcDs.....................ttptppppsppppssppppp..ttchhpppsp ......................................tthtplptp.l..s..t...pKs...............sph.hpY................pYt..sh..psI....lcslcsll..p..pps..Lhls.pt.sh............................tthhhplpss..hhs...ssG....pphss.hhsp.....................ptsKs....sssQss.GuuhoYu+RYsLsuhhs.Iss.-..c...DsDs....................ttpptpp.tspp.t.p........................................................................................................ 0 29 53 69 +2441 PF03463 eRF1_1 eRF1 domain 1 Bateman A anon Bateman A Domain The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known [1]. The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site [1]. This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification. 21.20 21.20 21.30 22.40 21.00 21.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.36 0.71 -4.58 119 1096 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 518 26 713 1080 196 131.10 28 32.75 CHANGED h...hppshc.hchcthlchl.cppschhpLhsLlhssDp.hupsh+pl.pc...sssh.....puppsp.....................ppV.sslshtlp+lca....sstsuh....................................lhhsGtlhpt.....sphsphcolslEss..cslslh+.........phDshhhpchhcsh.p .........................................................pcslc..hch.ct.hlphl.psp.s.....sh..hphhsLlhPscp.h..up..sh+hl.pc................ssshpucssp...............ppVhs.slo.shp+l.ch..............sP.suh........................................................lhhs......Gpll.....sc..............c..s+.tp..hhol-hE..P.....+...s..lshph..................hhhDsthh.schhpth............................................ 1 236 416 593 +2442 PF03464 eRF1_2 eRF1 domain 2 Bateman A anon Bateman A Domain The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known [1]. The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site [1]. This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification. 24.10 24.10 24.10 24.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.50 0.71 -3.83 35 1105 2012-10-02 16:33:16 2003-04-07 12:59:11 10 12 531 23 724 1082 171 129.50 31 32.44 CHANGED phuhlllDcstAplullpupshcllpchssslPtK+ttGuQSth+at..htcttcpah+cVuEtsspthh.......ppsplculllAGPGhhKsphhppthhctchtp........thlh.llDlShuuctGlpEslcp..uschLsc ............................huhllh-t.stAhhuh.l..pu..s......s.pcllp+hss.slP+K+t..pGG...pSsh+at.......htcthcpahcclu-thsph.h..................shsslpullLAGsu.hKs-hh..pt......hhctchps...........................phlh.lVcsSh..Gscp.Ghppslch..sschLp................................................ 0 236 422 602 +2443 PF03465 eRF1_3 eRF1 domain 3 Bateman A anon Bateman A Domain The release factor eRF1 terminates protein biosynthesis by recognising stop codons at the A site of the ribosome and stimulating peptidyl-tRNA bond hydrolysis at the peptidyl transferase centre. The crystal structure of human eRF1 is known [1]. The overall shape and dimensions of eRF1 resemble a tRNA molecule with domains 1, 2, and 3 of eRF1 corresponding to the anticodon loop, aminoacyl acceptor stem, and T stem of a tRNA molecule, respectively. The position of the essential GGQ motif at an exposed tip of domain 2 suggests that the Gln residue coordinates a water molecule to mediate the hydrolytic activity at the peptidyl transferase centre. A conserved groove on domain 1, 80 A from the GGQ motif, is proposed to form the codon recognition site [1]. This family also includes other proteins for which the precise molecular function is unknown. Many of them are from Archaebacteria. These proteins may also be involved in translation termination but this awaits experimental verification. 21.20 21.20 21.20 22.20 21.10 20.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.31 0.71 -3.81 34 1128 2012-10-10 14:40:03 2003-04-07 12:59:11 10 12 530 29 741 1099 188 117.00 33 29.44 CHANGED hhpEpcllscFhcplup..DsshssYGhc-lhcAlEhGAl-sLLlh-sLhppc.h..+p............................................h-pLscpscphGuplhllSscpppGpQl.cuFGGIuulLRapl ..................................hpEp+llscaacclsp..Dsu.+ssYG..............hc-sh+AlEh..GAV-....s....Lllh-sL.psc.h.h+pp.............................................................................h-hLsc.......p.h+ph.G.u.plcll....osc..pppGpQh.cuhGGIuulLRYtl................................... 0 245 432 616 +2444 PF03734 YkuD ErfK_YbiS_YhnG; L,D-transpeptidase catalytic domain Bateman A anon COG1376 Domain This family of proteins are found in a range of bacteria. It has been shown that this domain can act as an L,D-transpeptidase that gives rise to an alternative pathway for peptidoglycan cross-linking [1]. This gives bacteria resistance to beta-lactam antibiotics that inhibit PBPs which usually carry out the cross-linking reaction. The conserved region contains a conserved histidine and cysteine, with the cysteine thought to be an active site residue. Several members of this family contain peptidoglycan binding domains. The molecular structure of YkuD protein shows this domain has a novel tertiary fold consisting of a beta-sandwich with two mixed sheets, one containing five strands and the other, six strands. The two beta-sheets form a cradle capped by an alpha-helix. This family was formerly called the ErfK/YbiS/YcfS/YnhG family, but is now named after the first protein of known structure. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.41 0.71 -3.93 173 11311 2012-10-02 23:30:06 2003-04-07 12:59:11 9 81 3160 8 2612 8151 1995 140.70 20 42.83 CHANGED pthlhls.....hsp.......phhhh..hc.ssp....hh.hp..h...........lssG......................p....ttTPhGtaplt............h..t....h..............tt...................................................................h.h.ttts....lhlH..ss......................................t.t..SpGClpl.t.............................pcspplhsh................lt.....................................hu.s..............................Vhl ...........................................................................................s..hlhls....hsp........ph.hhh....hp...ssp.................hl..hs..hs...........lu..hG...............................p.sptT...P...p.G.s....h.p.lp.............................p+t.h.sP.s........p...s..s...h...h..h..t..............h.........h..........................................................................hh.h.t.h.h.s.s.s.s......hhlHs..sss.s...h.................................ps.....SpGClRl..p..s............................pchp.t.L.ash................l..................G.s...........V.................................................................................................................................... 0 765 1615 2089 +2445 PF03694 Erg28 UPF0143; Erg28 like protein Bateman A, Kerrison ND anon SWISS-PROT Family This is a family of integral membrane proteins, which may contain four transmembrane helices. Members of this family are thought to be involved in sterol C-4 demethylation. In S. cerevisiae they may tether Erg26p (sterol dehydrogenase/decarboxylase) and Erg27p (3-ketoreductase) to the endoplasmic reticulum or may facilitate interaction between these proteins [1]. The family contains a conserved arginine and histidine that may be functionally important. 22.20 22.20 24.60 27.20 21.90 21.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.45 0.72 -4.06 21 246 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 215 0 172 225 0 113.00 34 77.67 CHANGED hGhLshWLlhlSlluhhNolQsYhs..hphoc+lYs...............................scs....sps.osLpuRsFGsWThloullRhauAapl.pspslYpLshhoaslAhsHFhoEhLlF+TspLstshh.....uPLlVuos .................................shLstWLlhVSlluhhNohQsahs..hthsc+lYs.........................................sps...........sts..ssLpuRTFGsWTlloullRhhsAhpl.pspslYplshhoahlALsHFhoEhlla+T.s.phs...hshh.....uPlhVuo.h..................... 0 46 89 137 +2446 PF04622 ERG2_Sigma1R ERG2 and Sigma1 receptor like protein Kerrison ND anon DOMO:DM04578; Family This family consists of the fungal C-8 sterol isomerase and mammalian sigma1 receptor. C-8 sterol isomerase (delta-8--delta-7 sterol isomerase), catalyses a reaction in ergosterol biosynthesis, which results in unsaturation at C-7 in the B ring of sterols [1]. Sigma 1 receptor is a low molecular mass mammalian protein located in the endoplasmic reticulum [2], which interacts with endogenous steroid hormones, such as progesterone and testosterone [3]. It also binds the sigma ligands, which are are a set of chemically unrelated drugs including haloperidol, pentazocine, and ditolylguanidine [2]. Sigma1 effectors are not well understood, but sigma1 agonists have been observed to affect NMDA receptor function, the alpha-adrenergic system and opioid analgesia. 19.50 19.50 20.00 19.80 19.00 19.10 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.32 0.70 -5.08 7 288 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 227 0 183 275 0 190.10 36 84.41 CHANGED thhtalslhlslluslh.slpphlhpo.YhFD.cplpcluppulu.as.....sscsllpclhDpLpsh.ss.hhh......ssppEWVFNNAGGAMGsMaIlHASloEYLIhFGTslGTEGHoGhHhADDYFsILpGpQhAassGsh-..uEVYssGssH+htpGps+QYpMP..tssaALEhApGWIPsML.FGhhDsLSSTlDh.TLahTshlTuR-MltsLhhsKF ...........................................................h.......hhhshh....l..h.....hl.......hh....p.....alFs.ppltpluppsh.u.........................stpthhpplhsc.Lpp.p.ass..hl.........................tpptpWlF.......sNA.....GG.h.M.GuMhlLHASloEYlllFGTulGTcG.HoGh.a.h....A-sahs.ILpGp.....ht..ah..sG..s..hc...sElahP.GsshhhtpGpspthphs..sssahlEYuRGh..IPshL.FuhsDoh.SThDh.TLahThhhhu+thhhpLh..h............................................ 0 58 101 148 +2447 PF01222 ERG4_ERG24 Ergosterol biosynthesis ERG4/ERG24 family Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.58 0.70 -5.94 5 987 2012-10-01 22:51:20 2003-04-07 12:59:11 12 14 331 0 568 1273 263 356.30 30 87.33 CHANGED +ptcaEFGG.stGAlGIphGhP....lhplahsspucsstsphhhPcohslssLhstI+s.sphlass...h....s+plWTVFhhaaslQAVhYlhLPG+hscGlPLS..sGc+LsY+lNAhao...hllTlAllslLpssplFcLpalhDpFuplhosAIlFuFALoIaLYltSLhssch.hs......c.LAsGGsSGNlIYDFFhG+ELNPRlG......LDlKMFhElRPGhlsWllINLusll+QYcpYGpVoPuLlhVsls..QhLYlsDulhsEEuVLTTMDITaDGFGFMLuFGDluhVPFTYSpQThYLssH.PspluWss.hsluIhllLhsGYYIF+oANuQKNsFR...........T.PtpPpLph..LKaIpTusGopLLsoGWWuhARHINYhGDWlQSLuWuLsTGFs................................olLPYFYslYFhlLLlHRstRD-+KCK+KYGcDWEEYC++VPY.+IIPYla .......................................................................................................................................................................................................................................................................php...s..h....h...hhh..hahhh...phh..h..h........h........h......l......P...........Gh...h...spGh.L..........sGp.pL.pYphN...u.h.u.............hhh.s...h...s...h......h.s................h..h....h...h...h.t.......h.h...........h.shlh-.pahtlhsss....hlhuh.h....luhhhYh..p.uhhh.t................................tt.oG..s.hl.YDaah.GtELNPR.l.s............hDhK...hF...h.....p....hR..u..h.h.hhh.l........l.....s......l.................uh..........hhc..p...........h...c..................h.....G............h..l...........o...............s.......s...h.................h.hh.hh...................phhY.lh.s.hhh.Ep...................hhlsoh.................D.lhh-taGFhLhahshshlPahYshpshYLs..............p......P....p....h.s.h......................................hhh.l........h.h.h.h.h..h.u.a.hla.c.s.uN.pQKsh...FR.........................t.P..p.....s..h..h..hh.........ph...h....t...o........t........p......G......s....p......LLs...s...GWW.......GhsR+.p.Y.hu..Dh.h......h..u...h.s......a.s.L..s.s...G...h..s............................................................................p....h...s..a...F.Y.......l..a...h...sh.l.L....h...H...R..t..h..R...D.p.t.+.C.t.pK.YG.p..s..WpcYpphV.a.hhlPhla............................. 0 174 294 458 +2448 PF00769 ERM Ezrin/radixin/moesin family Bateman A anon Pfam-B_851 (release 2.1) Family This family of proteins contain a band 4.1 domain (Pfam:PF00373), at their amino terminus. This family represents the rest of these proteins. 30.00 30.00 30.00 30.00 29.80 29.70 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.00 0.70 -4.84 10 499 2009-09-10 21:10:39 2003-04-07 12:59:11 14 15 129 5 242 462 0 216.90 35 42.32 CHANGED EchE+cppELcc+hpphEEchpcupccLpcppc+stcL-pctpptccptcpLcpcppchpctpc+LccpstsptcE+ccLttElsEhstclpplcpupp++EpEsschpp-lppupEcc-cs+pchhthhsss.....................s....l.ts.....................sssptct.ss-sst-tSp-L-......s-sphccps...EEcRlTtscKNE+lQcpLpsL+oELussRDEoK.coshDhlHpEN.lRtGcDKYKTLRpIRpGNTKpRVDpFEuM ....................................................................................................EphE+p+pEL.cRLhphcEpsp.pA.pc.tL...tcppcpA..Ltpctphsp-.EA..c...h...........Ltp.ct.....tpA..cpthpclpppuhcp...tcppcp.LttcltEh....p.tpl..ttlp-tpc+....+ccEsp......ph.p.p....chppAp-s.c+s+pcLhthhsts.......................................................s.s............l.ts.........................................st.p..p...........s.....-....s...t....t...t...h....St-hp...............................s-s...hpchs......Ec-Rls.h-Ks.c.+lpc.QL............ptLpoEltth+.cp+...cTt.DhlHsEN...h+.t.G.c..sKYpTL..+........p........l+pGsTKpRls.FEth................................................................................................................ 0 56 75 146 +2449 PF04137 ERO1 Endoplasmic Reticulum Oxidoreductin 1 (ERO1) Wood V, Finn RD anon Pfam-B_4729 (release 7.3); Family Members of this family are required for the formation of disulphide bonds in the ER [1,2]. 21.60 21.60 36.10 24.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.55 0.70 -5.44 39 459 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 292 6 304 440 6 345.10 38 75.19 CHANGED pplhshLppLsp.scaF+aaKVNLh.ppC.............PFWscs..shC.ss..ps.....................CsVpssscp-.lPt.............sa+h....phps.......ppspp..tt...............hppslss.......tt.pthtthc-...hsp..as..h-Dpss...pusYVDLltNPERaTGYsGts.utplWpuIYcENCF........................spss.p.................................................................s.......ClE++lFYRLISGlHuSIusHLstcY........l......ppcs............................WtsN................lphFhpRl......u.......paP-RlpNLYFsYtlllRAlsKlpsah...pph..ph....ou...sp..........tpDpcs.......+phlpsllsplps....hsth...FDE.....shhFpss..tuhpLKc-F+p+F+N.lSclMDCVsC-KCRLWGKLQTpGhGTALKILFp.ss..........ppt........hp...LpRsElVALlNshsRlSpSlptlc .........................................................h.tlhPhlppLhp.psaF+aa+....l.sLh..+.C.............PF..W....s-p.....uhC.s...ps................................................CuV....p.ssppc.lP.................................................sh+h....sphpt..........tppspp.....tt.............................lppslsp..........tt.hthtthD-...tpp..aC....-Dpps...spspYVsLlhNPERaT.GYpGss.AhplWpu..IYcENC.F..............p.ts...................................................................ts.C..lEKRsFY+LISGLHuSIshHLstcY..............L.pps......................p..........WssN................lp.FhpRh......s......ptPcR.lpNLYFh.Yhl.LRAlsKhts..ah......pp.......ph..h.oG..s.s.................tpDtcs........+phlhp.llpphps....................hs.h...FDE..............sthF..tss...................pu.pLK.c-.F+t+F.+N....lS+IMDCVGC-.KCRLWGKLQ.stGhGTALKILFp.ct...................................pp.........hpLpRpElVALhNs...hs.RlSpSlptl.............................. 0 109 169 247 +2450 PF03238 ESAG1 ESAG protein Bateman A anon Pfam-B_3037 (release 6.5) Family Expression-site-associated gene (ESAG) proteins are thought to be involved in VSG activation. This family includes ESAG 117A Swiss:P04477 as well as ESAG IM Swiss: Q26705. 20.20 20.20 20.20 23.00 20.10 20.10 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.47 0.70 -5.06 10 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 8 0 13 53 1 220.90 55 71.32 CHANGED sM-scpcKLEKLISYGNpMGDLVAKsGGLFAEVNESVRAVRKEIPuALIKsNKYYTAIAEITRTVWDDVKAlth..s.ucscCp-QcFcGVtEhcspCGDpTCPLucs.VsEuALpKYKsGCLplsVhsGSVScChNLPRsNLY+SGAlsoSs-sLcW+-cp.t.uphFQLpL+VcsIFGPLIAsFAAGQPPSsLhEMMsNITSLpSRFNEVHuNFTSLLlssNlssNVssTcSTI .....MctpcDKLEKLI.SaGNpMGDLVAKsGGLFAEVNESVRuVRKElPsALI+sNKYYTAIAEIsRTVWDDVcult....s..sctcCpspchcuVtEhcspCGspTCPLtcu.VsEuuL.pKYKsGCLplsV...s..Gp.Vsc.ChNLPRssLY+sGAVpsSscsLcWcpst...sthFpLpl+VppIFuPLIusFuuGpsPSsLh-MhsNITSL.S+FNEVHsNFTSLLlssslpssVssTsSTI............... 1 0 13 13 +2451 PF03433 EspA ESPA; EspA-like secreted protein Finn RD anon Pfam-B_4100 (release 6.6) Family EspA is the prototypical member of this family. EspA, together with EspB, EspD and Tir are exported by a type III secretion system. These proteins are essential for attaching and effacing lesion formation. EspA is a structural protein and a major component of a large, transiently expressed, filamentous surface organelle which forms a direct link between the bacterium and the host cell [1,2]. 22.20 22.20 22.80 45.30 21.80 22.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.20 0.71 -4.86 7 296 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 247 1 18 151 0 172.00 48 96.44 CHANGED hS....ssshtuusp.sssssSpshsss.hups-slp....husuhusLh.hhhhh...o-LupuKaspMpppuccu+suQcMANplDthIAclpcss-K.tp.cLPp-VlcYhpD..NGIpV....DGhp.hs....................................................................................ttLspG-LpsVKuAL-spuNpsoDhVsQuQLplQph.pohNsssohhsuhQoh.uch.pu ......................................................................pss..hs...ss..hutp-slp..N.hu.h.usLLhhhhhh...pslu.sKF.......h-hpcsuccupssQchuN.hDthIAcstpus..s.K.....sKtclPpDVIcYhpD..NsIhl....sGho..s.......................................................................................................GcLstGsLQsVKAAlsscANp.TslhspuQlpIQpMSppLNhlhothosl.Sh.hch.S.h.................... 0 6 8 13 +2452 PF04806 EspF EspF protein repeat Mifsud W anon Pfam-B_3518 (release 7.6) Repeat The enteropathogenic Escherichia coli EspF secreted protein induces host cell apoptosis. Its proline-rich structure suggests that it may act by binding to SH3 domains or EVH1 domains of host cell signalling proteins [1]. 20.20 20.20 21.30 20.20 17.50 20.10 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.40 0.72 -3.77 3 690 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 113 3 0 484 0 43.70 74 73.26 CHANGED IQPARSMAEHIPPAPNWPAPoPPVQNEQSRPLPDVAQRLVQHLAEHG ..............IQPARsMAEHIPPAPNWPAPsPPV..Q...NE...QSRPLPDVAQRLhQHLAEHG......... 0 0 0 0 +2453 PF00756 Esterase Putative esterase Bateman A anon Pfam-B_476 (release 2.1) & Pfam-B_4968 (Release 7.5) Family This family contains Esterase D Swiss:P10768. However it is not clear if all members of the family have the same function. This family is related to the Pfam:PF00135 family. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.57 0.70 -4.96 20 9472 2012-10-03 11:45:05 2003-04-07 12:59:11 15 87 3654 69 2036 9626 1891 233.50 17 73.54 CHANGED Sssh.spchpltlaLP.t......ssspphPllYhLDG.......ssa.pphsstphhpphstctth..sh.lshPhGsps.....t.h...........ssptstsppap..salppELsshlcusasspsps.......pulsGpShGGhuALhhALc..aPcpFspluuhSsh..............................ss.Wu..........tpst..psDshhph.t.ssstsslplhlssGsp-s.ht..ph.sph.........hphhstsppht..hphpthsthsc.a......usHshth..WpspLssthhph ......................................................................................................................h.tpphph..t.lhhPss..............tsspth...Pl.l...a....h.....L....c..G.....................................ssh..........p......p....h....t.....h....t.....t....h....h...p.....p.....h.....t.....t.......p....h...t......h.........h.....h...h....l...s......h............s.......t....s.....s.ps.......................h...................................................h.......t......t..........t......s........h.....s.....t..p....at...........sa..l..t...p...E...L....h....s...h...l...c....p...p...a...s....s..sspt..............psl.s.Gt.ShGG..h..s.A...L.h..h...u.lp......p..P....s....t....F.s.......p...l....s..u..h..S..s.h....................................................................................................................................s..Wh....................................t........h........p.......s.....s..h.............h........................................t.....t...........p....t....h............l......h....l...s..........G.....t.......t.....-.........................................................................................h.....t...t.....t..t.h.............h..t.....h................h.........p.....h..........................................a.tth.......h............................................................................................................................................................................................................................. 0 630 1289 1731 +2454 PF01684 ET ET module Hutter H, Bateman A anon Hutter H Family This domain has no known function. It is found in several C. elegans proteins. The domain contains 8-10 conserved cysteines that probably form 4-5 disulphide bridges. By inspection of the conservation of cysteines it looks like cysteines 1,2,3,4,9 and 10 are always present and that sometimes the pair 5 and 8 or the pair 6 and 7 are missing. This suggests that cysteines 5/8 and 6/7 make disulphide bridges. 21.80 21.80 28.50 29.40 21.70 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.89 0.72 -3.95 22 150 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 5 0 150 131 0 83.50 33 82.03 CHANGED CYsGlass.sss...hssuuhphCpG..pCuSloht.shNG..pssolYsCsPsslCpsLslss......sCssl.ss.........louCCCs.sssC.sssss CYsGlass..sss....hssuuhphCp.G..pCuSloh..shsG...pssolYsCsPsslCpsLslss......sCsslpss.........louCCCs.s-sChsss.s....... 0 42 61 150 +2455 PF00766 ETF_alpha Electron transfer flavoprotein FAD-binding domain Bateman A anon Pfam-B_853 (release 2.1) & Pfam-B_1321 (release 3.0) Domain This domain found at the C-terminus of electron transfer flavoprotein alpha chain and binds to FAD [1]. The fold consists of a five-stranded parallel beta sheet as the core of the domain, flanked by alternating helices. A small part of this domain is donated by the beta chain [1]. 21.00 21.00 22.40 23.80 20.90 20.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -9.80 0.72 -4.36 39 4727 2012-10-03 09:55:27 2003-04-07 12:59:11 14 26 2782 14 1280 3485 2076 85.60 48 26.01 CHANGED ps-.lspAcllVuGGRGltu..tEsFc..llp-LAchL.GusVGuSRssV-..........sGWhss-+QVGQTGKoV+PcLYIAsGISGAIQHhuGMcsSc .............................p.s-LspAclVVSG..G.RGluu..........t-s.ap...llcpLActL......G.A..s.l......GuSRssV.D..........sGa.hsp-.hQVGQ.T...G+h.V.s.PcLYlAlGISGAIQHlAGhpsSc.................. 0 448 834 1086 +2456 PF01012 ETF ETF_beta; Electron transfer flavoprotein domain Bateman A anon Pfam-B_1321 (release 3.0) Domain This family includes the homologous domain shared between the alpha and beta subunits of the electron transfer flavoprotein [1]. 24.20 24.20 24.40 24.40 24.10 24.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.97 0.71 -4.53 174 9173 2012-10-02 18:00:56 2003-04-07 12:59:11 16 30 2828 46 2509 6583 4289 156.00 22 53.86 CHANGED sshlls-.p..s......ssplp.shshpslstAtplupt..........p..lssls.......hG.....s.spsspt.....hhs.hGsccllhlsssthsth...sstshutslsphlpp..........ss..lllhussshucs....lusplAstLshshlosss..tlph..pss.................hh..hpRshh...uGp.thsplphs.....tllTlcs..sshps ..........................................s.........t....phtls..hsh.pslptAh.pLtpt.............................p..ls.sl.s................hG.....s..tps......t.p..s.........shs......h............Gs-cslll..p......s..s..t..h.t.th...........s..shApslsshlcp.......................tt..hs....lllhG...ssuhscs................lustl.AthLs.....h..s..hssso....plph....s.s.sp..................................hs.hpRtl...uGh..thh..p..l...phs......sllTlp.sh.p................................................... 0 863 1630 2135 +2457 PF05187 ETF_QO ETFD; Electron transfer flavoprotein-ubiquinone oxidoreductase Wood V, Bateman A anon Pfam-B_2305 (release 7.7) Family Electron-transfer flavoprotein-ubiquinone oxidoreductase (ETF-QO) in the inner mitochondrial membrane accepts electrons from electron-transfer flavoprotein which is located in the mitochondrial matrix and reduces ubiquinone in the mitochondrial membrane. The two redox centres in the protein, FAD and a [4Fe4S] cluster, are present in a 64-kDa monomer [1]. 20.40 20.40 20.40 20.40 20.10 19.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.44 0.72 -4.06 73 1342 2012-10-03 08:56:42 2003-04-07 12:59:11 8 18 1195 4 515 1183 1543 109.60 47 19.70 CHANGED GhhhGhshuul-p...lhp....Gps..PW.TL+.cppsDassLcsAscsp..IsYPKPD....GtLoFD+LSSValSsTNHEE-QPsHLpLc....DsslslshNLshYsuPppRYCPAGVYEhVcc- ................................................GhhhGhhhsGl-p......lht....Gph.Pa....T.....L+.c.p..........c..s............DttsLcs.As..p...s...p........I.....sY......P.K..P.......D....G.pLoFD+.LSSValSsTNHEE-QPsHLpLp..............Dss..lPls.h.NLs...hYs..uPppRY.CPAGVYEhVcp.t............................ 1 157 287 409 +2458 PF00178 Ets Ets-domain Finn RD anon Prosite Domain \N 21.50 21.50 21.50 21.50 20.90 20.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -9.92 0.72 -3.84 15 2375 2012-10-04 14:01:12 2003-04-07 12:59:11 17 22 255 41 1162 2070 0 80.40 50 21.09 CHANGED plpLWQFLLcLLsD..psppchIpWss.csGEFKls..DP-cVARLWGp+KN.KPsMNY-KLSRALRYYYc+sIl+KV..pGcRasY+Fsss .........................lpLWpFLL-LLpD....ps.stphIpWps.....c...........pGcFK..lh...Ds-c..............V.ARhW....Gt+Ks......+..P.s..M..N.Y.-KLSRAL......R............Y.Y.Yc....+s..Il..pK..l.....p.G...cRhsY+Fs..p..................... 1 249 337 689 +2459 PF03318 ETX_MTX2 Clostridium epsilon toxin ETX/Bacillus mosquitocidal toxin MTX2 Mifsud W anon Pfam-B_3569 (release 6.5) Family This family appears to be distantly related to Pfam:PF01117. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.49 0.70 -4.80 15 164 2012-10-01 20:43:00 2003-04-07 12:59:11 8 8 81 12 52 305 3 208.90 14 66.88 CHANGED sshsshsshhspshs........................h..t.sphhshsshasussslsNsTsppQphpThSFscshTcTsSsosTpGhp....huspssuphslsh.....ssEsslpho..lo..YNa...ooosTpTsososphhsPSQsVsVPP+spspsshhlt+sshs.sshpLhssl.up....................shho.hshscsshl.shttstssshsphssshs.hsssspl.phpGoGhhc.hstGsphhl+hschshssssutphs .............................................................................................h...............................................p....shhhtpshhpN.s.ost.p.p.p.h.p..o...paspshs.po....s..ohosppGhp....huhpsshp..hslPh.....hsp..sshphs..hp....ash......spo...po...ps...so.pp.......p.ph.h..ss....sp...s...l..tVPspppspsphh.lpcsp...hp.ssh..p..h........sph.............................................................................................................................................................................t................................................................................................... 0 27 34 46 +2460 PF01459 Porin_3 Euk_porin; Eukaryotic porin Bateman A anon Prodom_3211 (release 99.1) & Pfam-B__3211 (release 7.5) Family \N 26.90 26.90 26.90 27.30 26.50 26.80 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.59 0.70 -5.10 90 1317 2009-01-15 18:05:59 2003-04-07 12:59:11 17 7 369 3 757 1211 9 250.60 22 88.76 CHANGED sPssap-luKcs.+Dlhs+s.a.hssh+hclssh..ssu.stFpsou.............stshtss.......................hsstslpspat.......shshs................................................................hphss.ssslssplplpcthstu..l+sphssphsssp......psupl..chpYppsphshssslsh..t......sshhsushlhuhs.slslGs-ssacsspsphsp.shuluas.sp..................................cahsuhpl.sp......ssshsuSaap+ls..splpsGs-hshshsss.................pssho..........lGspYplcpss..tl+u+lsssGhlushhpccL..psslplsluuplDsh+t........stKlG .......................................................................................Pssap-ls+ps..+..Dlhsps....a....h.s..hh....+hslpp.....s.u..s.tFp.sst........................shphtst..........................................................hssh..slpspat................shshh.......................................................................................................................................................phss...s...sshssp..h.pl.p.pt.l.spu...L+.hph..ss....p...h......s...s.s.p..............................psspl.......chpYpt.pphshs.ssls...h...................ssh.hh..ss...hlh.u..hp...s..........hhhG.hphsa.cttp.sp..hsp........shs...luat..s....t....................................ca.hsh.pl.pp..............tsth.tuSh.a..p.....+.ls......pplpsuschsh..s.h.tsp.................p.s.p.hs...lu..spap..lctss..hh+....u..plsssuhluhhhpppl....t.s.l.pl..slu..u.lDthp...........shKhG.................................................................................... 0 215 374 573 +2461 PF04346 EutH Ethanolamine utilisation protein, EutH Mifsud W anon COG3192 Family EutH is a bacterial membrane protein whose molecular function is unknown. It has been suggested that it may act as an ethanolamine transporter, responsible for carrying ethanolamine from the periplasm to the cytoplasm [1]. 25.00 25.00 28.10 28.10 23.30 23.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -11.99 0.70 -5.49 27 734 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 686 0 58 324 5 371.20 64 95.93 CHANGED M..s.INcIIlYIhhlFhVlGAlDpl..................lGs+h.GLGccF-EGlhuMGsLAluMsGIhulAPlLuplLpsllsPlaphlGADPuhFAsTlLAsDMGGY.LAppLAp......s.cuhlauGlIlGuMhGsTIVFoIPVuLGlIcKcD+cYhApGlLsGllTIPlGshluGl.ls..............G......hshhhllpNLlPlllhulLlAlGLhhhPstMI+GFhhFGKhlsslIslGLshullptlTG....hslls............GhsPI...........pculpllGpIuIhLAGAFPhVhllT+hhpKPLttlGchLGhsssuAAGhlAoLANsIsMFthhK-MssRGKllNlAFuVsAAFlhGDHLGFoAuhpsshIhPhllGKLlGGlsAlhlAhhls ..........................Mu.INEIIMYIMMhFMLIuAVD+.I..................hGppl.G.GuQFEEGFMAMGALuLAMVGhsALAPVLA+VLGPVIlPVYEhLGAsPSMFAGTLLAsDMGGFhLAcELAu......DsuAWLaSGLILGSMMGPTIVFSIPVALGIIE.sDR+YLALGVLAGIVTIPIGCIAGGL.lAhhsth............sh..lp......FoFuLILhNhIPVlIVAlLlALGLKFI.PEKMIsGFQIFuKalVALITlGLAAAllcaLLG....acLIP.........GlDPIhhs.....t-.hRAIEVIGSIusVLhGAYPMVh.LLTRaFcKPLMsVGKlLsM.NslAAAGMVATLANNIPMFGMMKp.MDsRGKVINsAFAVSAAFsLGDHLGFsAus.h....s.uM.IFPMIVGKLlGGVTAIuVAhhL.s.................. 0 29 41 51 +2462 PF03319 EutN_CcmL Ethanolamine utilisation protein EutN/carboxysome Mifsud W anon Pfam-B_3053 (release 6.5) Family The crystal structure of EutN contains a central five-stranded beta-barrel, with an alpha-helix at the open end of this barrel (PDB:2HD3). The structure also contains three additional beta-strands, which help the formation of a tight hexamer, with a hole in the center. this suggests that EutN forms a pore, with an opening of 26 Angstrom in diameter on one face and 14 Angstrom on the other face [2]. EutN is involved in the cobalamin-dependent degradation of ethanolamine [1]. 25.00 25.00 27.80 27.40 21.20 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.57 0.72 -3.60 51 1340 2012-10-03 20:18:02 2003-04-07 12:59:11 8 3 931 33 216 583 325 82.50 46 89.31 CHANGED Mhlu....+VlGslhuTpKsssLsGtKLLlVc.lc...........ptt.tuphhVAsDslGAGhG-hVLlsp.GSuARtshss.p.ssPl...DssIlGIlD ..........MpLAcVsGslVuTp+ppuLsspKLLlVchls...........................p.s.p..ssupstVAlD.slG.AGsGEhVLlss.GSoARpu.hps...p....stPl.....DhsllGIlD................... 0 95 156 191 +2463 PF02472 ExbD Biopolymer transport protein ExbD/TolR Mian N, Bateman A anon Pfam-B_2343 (release 5.4) Family This group of proteins are membrane bound transport proteins essential for ferric ion uptake in bacteria [1]. The Pfam family consists of ExbD, and TolR which are involved in TonB-dependent transport of various receptor bound substrates including colicins [2]. 25.40 25.40 25.40 25.90 25.30 25.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.75 0.71 -4.15 80 6991 2009-09-11 05:16:59 2003-04-07 12:59:11 11 4 2435 5 1847 4807 3274 131.40 24 90.36 CHANGED cpp.tsplslsPhlDlhalLLlFFhl.oushsptt....h..plsL.Psssssp................hpppptlhlsls....tc.....sp......hh.......................................lssp...h...shppLtspLpphtpppsp..........................lllpuDpsssappllplhsthpps.Ghpplslssptp ..........................................p....hs-INlsPhlDVhLVLLlhFMl..Tushhsps.................l.....plsL...P.pus..sss.........................spspps.l..h.lsls............ss................sp....................lh............................................................................................................................................................lspp..........l............shpp.l.t.s.t.l..p...p...ht.pt..p.sps........................................................................hlhlp...uDcs.ssYpplhplhstlpp..u.Gh..p.plulhst..t.................................................................................... 0 593 1191 1567 +2464 PF01541 GIY-YIG Exci_endo_N; GIY-YIG catalytic domain Bashton M, Bateman A anon Pfam-B_489 (release 4.0) Domain This domain called GIY-YIG is found in the amino terminal region of excinuclease abc subunit c (uvrC), bacteriophage T4 endonucleases segA, segB, segC, segD and segE; it is also found in putative endonucleases encoded by group I introns of fungi and phage. The structure of I-TevI a GIY-YIG endonuclease, reveals a novel alpha/beta-fold with a central three-stranded antiparallel beta-sheet flanked by three helices [4]. The most conserved and putative catalytic residues are located on a shallow, concave surface and include a metal coordination site. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.17 0.72 -3.61 105 9798 2012-10-01 19:55:08 2003-04-07 12:59:11 19 83 4836 20 2329 7330 3586 77.80 26 19.92 CHANGED psslYhlh....st......ps...................phh....YlGpo.p..sl.ppRh.ppH..hps.......tptpht.................hpshphh.hlphhpspptt.........hphEpthlpthps.......thN ........................................................................................t.sulYhhp..........st........ss.....................................phl.....YlGpu...p........s...L...+p...Rl..psa.......hps..............p.s..s..p.t.ppt............................hpsh..phh....h.h..p..h...s..s......s....c.scA........................LhhEt.pll+hhps..................................................................................... 0 771 1529 1995 +2465 PF03081 Exo70 Exo70 exocyst complex subunit Mifsud W anon Pfam-B_2462 (release 6.4) Family The Exo70 protein forms one subunit of the exocyst complex. First discovered in S. cerevisiae [1], Exo70 and other exocyst proteins have been observed in several other eukaryotes, including humans. In S. cerevisiae, the exocyst complex is involved in the late stages of exocytosis, and is localised at the tip of the bud, the major site of exocytosis in yeast [1]. Exo70 interacts with the Rho3 GTPase [4]. This interaction mediates one of the three known functions of Rho3 in cell polarity: vesicle docking and fusion with the plasma membrane (the other two functions are regulation of actin polarity and transport of exocytic vesicles from the mother cell to the bud) [3]. In humans, the functions of Exo70 and the exocyst complex are less well characterised: Exo70 is expressed in several tissues and is thought to also be involved in exocytosis [2]. 23.70 23.70 24.30 23.80 23.30 23.60 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.07 0.70 -5.67 58 882 2012-10-03 17:31:52 2003-04-07 12:59:11 10 11 263 7 540 852 0 323.30 22 57.29 CHANGED hppWlpshphshpslhtsEc.pLhspl................asst..s.......tppsFscls.ppslt.....pllphsctls...................pstpssp.tlhcll-hhpslpch...hsplpthhps.........t.hh...phpph..........hppLtcsspphht-hhstlpp.................psspphs.ssGulp.lTphlMsalphLs..-apssLspllts.ssss...................................sts.luphlspllssLhssL-t+....u+thp............................c.uL........ptlFLhNNhpalhpp..lcp............S...cLtslLGsphhpchp...pthcpahst...YhcssWspl.hshLps...........................................tths..........spppp.hK-+.............h+pFspuF--hhppQ.ppapl..sDspLRppL+psltphll...PsYppFhs+atstht.....t........al+as.s--..lcshlscL ..................................................................................................h.hh..shthlh.sEh.plhppl................................hst..t..............tttsFsphs..pssh.......tlhp.hscsls.......................hthpsst..plhpll.s.hhpslpph.................hsphpt.hhts......................................................htt...ph.th..................hppltpsst.thh..t...-h.ptlpp.................pstpshs.........s.GslH...lTp.s....hpalphLh..cappslttlh.tp.ttts........................................................................................................spp.luth....lhplltsLtts....L-sK.............u.+.h.Yc............................-.uL................pt.lFLhNNhpalhpp..lcp...............................................................................S......pLttl..l...u..p....p.hh.pphp....phh.cphhpt...Y.cs.Wtpl.hsh..Ltp........................................................t..t.........stpp...p..l.K.c+.................hctFNttF--hhptQ..ptWhl.....sD..pLRppl+.sltphlh...s.sYpt..Fhp+.atth........................t.t+al+.Ys.scplpphltp..................................... 0 109 289 425 +2466 PF04257 Exonuc_V_gamma Exodeoxyribonuclease V, gamma subunit TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The Exodeoxyribonuclease V enzyme is a multi-subunit enzyme comprised of the proteins RecB, RecC (this family) and RecD. This enzyme plays an important role in homologous genetic recombination, repair of double strand DNA breaks resistance to UV irradiation and chemical DNA-damage. The enzyme (EC:3.1.11.5) catalyses ssDNA or dsDNA-dependent ATP hydrolysis, hydrolysis of ssDNA or dsDNA and unwinding of dsDNA [1]. This family consists of two AAA domains. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 805 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.30 0.70 -13.43 0.70 -6.56 84 2140 2012-10-05 12:31:08 2003-04-07 12:59:11 9 6 1971 4 352 2371 649 635.20 26 61.91 CHANGED MlplapSN+hEhLsphLsphlp...pPhsp.........Phps...EhllVpS.GhspWLphpLA............pptG..IuAslcF.shPusalhphhppl.lsp..ls...p....ps.s........FsppslsW+lhplL.....Pp......hh.........pp.st.atsLppYL.............sscsst...........h+haQLAp+lADlFDpYhlYR..............P-WlssWcpG.......................tthstspp.W...........QshLWRtL...hpchtttt.t...p..........hh.....hptlpp..tptt.ptLPc.RlhlFGlSsLsshhLclLpuLuph..t-VpLahhNPsppaWu..-lhs..p+phhp...................................t.tppthhttu......sPLLAuhG+.GR-ahphLtph.........................ttpch-hF...........ssppss...............LLpplQs-ILp..................tttptpht..hstsDpSlplHsCHSshREVElL+DpLLphh....spD....s.s......LpP+DIlVMsPDl-sYAPhIpAVF....u.................stptlPasl.uD...pshpppsslhtuhhpLLsL.spoRhsss-llsLLpsPsltp+Fslspp-lpplcpWlppuGlRWGlDtpp+pp...h..ths....s.pppoWphGLcRhLLGaAhs.................sssh..........hp....slhPa.splpG.puphlG+LhphlcpLpphhppL....pps.pshppWtphlppllsshF...sss.....scpphp...lptlpptlsph.tptsppus...............hpp..........lslsllpphLtppLsppsts..tFhsGsloFCslhPMRuIPF+VlCLLGhN-GsaPRpptssu.FDLhsp..p.....+.hGDRu+R--DRYLFLEALLSARcpLYlSYlGp.sh+DssphsPSlllsELl-alppsh..................stpshpph...h......hp.HsLpPFs.phF...................tssthhSas..ppahtstpthpptt.................................................tshhst.....................stst.pslsLpp .............................................................................................................................................................................................................................................hhlh.upph-.L.thh...ht.......t..............sht....p...l.lV.u.uhtpaLp.tl..u.....................tt...G..lsAslt.h..hstthhhphh.th..h...........t................ps...............h...s..t.h.W.p..lht...ll............................t.........h.................................tp..........h..........lttaL.............................................t.....tt............................thaplutphAslFspY.hhR...............sphl.tWtts........................................h.....tt....W............Qs.LWp.tl...ht.h.t...t.......................................................h.....httl............t.t..thPt..p...lhlhuhsths...lphlttluph....hplhlhh.sPst..hWtsl.t........t.h.t...............................................................t........psL.LsthG+.sp-...hL.t.............................................tha.........................................................t...ts............hLtt..lQtslhp...................................................................................t.ptth...ls.tD.p.S.....lphasC..Hos.RElE.lLp-pLLthl....pps..............s..s........LpP+DllV..hs..sD...lssYsPhIpAlF..................u.....................spthlP..atl.uD...pp.stp..pp...Pl...lpshhsLLsL.......p......uRa.stp-llsLLcs...ss...l.t.t...+F...slsp.p....slphl......cpalptuGI...R........a..Ghsttphtt...................ths........t.ttpoWp.hu..l.....p...R..hlL....Ghuh.t....................tts...............h.t....s....hh.s........h...s.......t......tu.........hth.h.GpLsp.hl.....tLp........hhp...tl...........tp.......t.sh.t...p.W.h......h...h...p...p...h..l..p...thh.........s..............t.pttht..............ht.lpp....h...tth...htt...h.t.st.........................h..tt....................lsh.t.h.l.tp.....l.t..t..t..L......st.t.......thp....t..a.h..sG...lshsohhshRulPa+llslLGhs.-Gs.aP..c..p.................s.......s......h.......D......L....hst...t..................t....G.........D.R.........s.........pR...p...-...-R.hL............hL.-u......l..huApp...Lhlo..Yh.G..t..s..p..ss...p.....h......Pusl...lp...pL..h-.h...ltt.h...................................s..th..........h...Hsh.sFs.p..a.............................s.....sas..tphh.httt..tt............................................................hth...................................................................................................................................................................................................................................................................................... 0 96 194 283 +2467 PF02601 Exonuc_VII_L Exonuclease_VII; Exonuclease VII, large subunit Bashton M, Bateman A anon COG1570 Family This family consist of exonuclease VII, large subunit EC:3.1.11.6 This enzyme catalyses exonucleolytic cleavage in either 5'->3' or 3'->5' direction to yield 5'-phosphomononucleotides. This exonuclease VII enzyme is composed of one large subunit and 4 small ones [1]. 22.30 22.30 22.40 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.94 0.70 -5.24 27 4810 2009-09-16 18:18:13 2003-04-07 12:59:11 10 9 4139 0 1058 3717 1818 268.20 31 69.15 CHANGED lFchs+KpslPthPp+IullTStouAuhpDhhcshpc..RhshsclhlhsshVQGcsAspplhpAlpph..s....th..hDslllsRGGGShpDLhsFNcpplsRslspss..lPVloGIGHEsDpTlsDhVADhRssTPTtA....Achllsctpcht...ppLpshppclppshppplcpppppLthhppphh..s........................................lpppppplpphtp+ltp......................shpphhpptppplpp...................................................................................................................................................LpppLcshsPpphLpRGaullh.pcG+llpsspcl.cpsctlplpht-Gphtspl .......................................................................LFs.phKpslP..hscplGllTS..o..GAul..+Dllpslpc....RhP..hp.lllaPshVQGppAs...........t..pIspulphh...............s....t.......h......t.....................p.......hDllIluRGGGSlEDLW.sFN-.EhlsRAlhtsp...hP.llSuVGHE.sDh.TlsDaVADhRAsTPouA.............AEhs.s....s.s.t.t.-hh....pplpphpt+l...tpshpph..lp.ppppplpplppph.htp.....................................................................P.hh.......hpt.p.tp.p.lcph.p.p..c.lpt....................................................................shp.p.hlp...ptppphptht....................................h.......................................................h......................ht..................................................................................................................................................................................t.ttpht.h.ptLpsl.s.P..th....lt..RGY..u...ls.p...................p.....p.s......p....h.lppsppl...p.sptlphphtDG.lps............................................................................................................................................................. 0 371 715 913 +2468 PF02095 Extensin_1 Extensin; Extensin-like protein repeat Mian N, Bateman A anon IPR002966 Repeat \N 15.00 0.20 15.20 0.20 14.60 0.10 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.48 0.74 -5.49 0.74 -3.49 28 430 2009-09-16 13:20:14 2003-04-07 12:59:11 10 19 12 0 56 410 4 9.60 82 76.50 CHANGED PPVYKPPVEK .....PPVYKPPVEK.... 0 0 56 56 +2469 PF01267 F-actin_cap_A F-actin capping protein alpha subunit Finn RD, Bateman A anon Prosite Family \N 22.30 22.30 37.40 26.90 21.60 20.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.93 0.70 -5.34 39 514 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 296 35 304 465 1 246.50 40 88.89 CHANGED pptcl..ssphlpsAPPGEls-VhsDl+sl.hss..........t...lpss...ltsAhppYNpcphsslclss..ppsllop.ascl..........tss+YhDspspptFphDHlppcusshps......h....spph-.h.....p..psl.cpLp....tYspcaY.ssushsV.........................................................................................................................................ashpssst........................ltllIhup+asspNFasGcWRSpaphs......t...plpGplclplHYYEDGNVpLpssKshpp......sh...ss...t.ApsllptIpphEspappplscsaspls-ssF.KuLRRpLPlTRsKlsWs+.lssY+LGp- .....................................hcIsspFlhpAPPGEh...s-Vhs.......Dl+hL.ls.s..........-.sllcps...hs..pAFtpYN.hcQh..sslc.l...pG...ppp........V...lIocascL....................................................usu+ahDPcs.phsFpaDHlcpcAo.Dsps...........hp.....stth-th................R.pulppsLp....sYlp-HY..ssGs...s.s..V.......................................................................................................................................................................................a.spp.csp................................plhssIpuppapspNFWNGRWRSp.Wphsh......ssss....plsGhl+lp.VHYYED.GNVpLsopK-lpp................ols...sss......t..psApphl+hlctsEscYQs....uls-sapshS-ssF.Ku.LRRQLPVTRoKl-WsK.lhuY+lGp-......................................... 0 82 132 212 +2470 PF00469 F-protein Negative factor, (F-Protein) or Nef Finn RD anon Pfam-B_128 (release 1.0) Family Nef protein accelerates virulent progression of AIDS by its interaction with cellular proteins involved in signal transduction and host cell activation. Nef has been shown to bind specifically to a subset of the Src kinase family. 20.60 15.00 20.70 15.00 20.30 14.90 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.64 0.70 -4.97 20 19129 2009-09-11 11:41:56 2003-04-07 12:59:11 15 3 100 35 0 15393 0 175.30 58 97.39 CHANGED GuuhSK+pspsh.slRERLh+s.ttssutp.....t....u.st.spttsS.shEtp.thhpucs................hpppNhD...l-up--.-cVG.hPVpPpVPLRsMTYKhAlDhSHFlKEKGGLEGlaYScRRpcILDLYl.+EpGIhPDWQNYTsGPGlRYPhsFGWhaKLVPV-spptsE....-sEspCLlHPuQppthDD..scGElLhW+FDspLAhcacAhphaPEpatc .................................................................GGKWS.K.p.p.h...h..G.Wss.lRERh+Rs......psu.t...........................G.V.G............AsSRDL.E+.+....GA.l.T.oS.N................Tst.sN.u..-..C...A.W.L...E.......A......Q...E..-....E...E...VG.FPVRPQVPLRPMTYKu..AhDLS.HFLKEKGG.L.EGLla.Sp.+RQ-ILDL.WlY+TQGaFPDWQNYTP..GPGh..........RaPLTF.GWCaKLVP.V.-spclEc...scGEss.sLL.H.Ph......s..H.G.h-D..s..ctEVLhW+FDSpLAh+.H..hA+E.hHPEaap................................ 1 0 0 0 +2471 PF03807 F420_oxidored NADP oxidoreductase coenzyme F420-dependent TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.98 0.72 -3.56 56 6885 2012-10-10 17:06:42 2003-04-07 12:59:11 12 28 4452 62 2074 23061 11556 93.30 25 34.58 CHANGED plull.GsGshGpulupthstsG.......pplhhusuRss-+httht.pphs............htsps.sst-Asp..tu-lllluV.hpth.plhppl........thhps+lllsssss ..................................................pIuhI...G.s...Gs...M.u....p..A...l...h....p...G..l....l.....p....s..u.................s.ppl.....h......s........s..t......s..........p...p...h........p....t....l...t....p.p.hG......................................h.p..s..s..s...........s...s......t..c.....s......s.p........pu.....D.....l.....l..l.......L...A.......V....K....P..p.....h....h......t...p...l.l...ppl............tth.t...p..s.p....l.llSlsAG.................................................................................................................................... 0 639 1255 1704 +2472 PF01115 F_actin_cap_B F-actin capping protein, beta subunit Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.80 25.50 21.20 22.80 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.57 0.70 -5.17 31 384 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 276 28 239 354 4 220.90 55 84.34 CHANGED phDuAL..DLlRRLsPpplccNLsslhs.LsP............cLs-DLLSSVDpPL+lpp.sppos..+-YLtCDYNRDGDSYRSPWSNpYaPsls.................-ushPScpLRcLElpANcuFDhYR-LYYEG.Gl........SSVYLWDl-ct......................uFAGVVLlKKs............................................spphsGsWDSIHVhElpppspt...........sspY+LTSTVlLpLps............p..t.....suslsLSGsLTRQtEpshslsss.......ssHlsNlGphlE-hEsphRshLpplYFuKsKDIl ...................hDsALDLhRRLsPpplccNLssLlsL.sP.............sLsEDLLSSVDQPLclt+.DctsG........+DYLLCDYNRDGDSYRSPW.......SNcY-PPL-........................DGshPSt+LRKLElcANpAFD.YR-LY..aEG.GV.............SSVYLWDLDc.......................GFA.GVlLlKKs.........................................s.ss.up.p.pGsWDSIHVhE.lp..-+.up............su.c.YKLTSTVhL.hLpT.....................................sp..pu.....sGphs..LuGslTR.QhE.....pD.slsss...........................................ssHluNlG+hVEDMEsKhRshLp-lYFGKsKDlV.......................... 0 89 133 198 +2473 PF01116 F_bP_aldolase Fructose-bisphosphate aldolase class-II Finn RD, Bateman A anon Prosite Domain \N 25.40 25.40 25.70 25.70 25.00 25.20 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.79 0.70 -5.31 188 7516 2012-10-03 05:58:16 2003-04-07 12:59:11 15 14 4376 71 1389 4343 1369 297.90 34 95.98 CHANGED slsshpclLpcApcstYAlsAFNlsNhEhlpAllcAApctpSPlIlQhSpGsh.pah.................G.hthlsthscshAcph.s.......VP.VsLHLDHups......h-slhpAlct........GF.oSl.......MhDuS.......chs.hEENlphT+cllchA+ths..ls...VEuElGpl..G.....Gp..EDults...ptt...........pthhTsP--AtcFsccTGlDu.....LAsulGssHG..hYps.........pPp........Lshp..h...................................LpcIpptl........................sl.......P.LVLHGu..........SGlsp-.....................plpculphGlsKlNlsT-hphAaspul....................+chltpp.st............aDPRphltsuppuhpchlpcphc..hh.Goss+s ....................................h..sshpphlppApc..psY.Al.s..AFN..s..s..N..h..E..hhp...AllcuApc.h.c.u.P.V.I.l.ph..S.s..Gut..p.ah..................G.ht.h.h...t...p.h..l...psh.u..cp.h..s........lPV.slHhDHu..tp................h-shhpulct..............GF.oSl......MhDuS..................chs.h....-E....Nlphs+c...l....V-h....sHt....hG..........loVEuELGsl........G.......Gp...EDslsspt...............pthh.TsP--s.t.ch.lpcT...GlDu......LAsulGssHG..sYp...............pPp..............Lshp..h...................................Lcclpchl.............................sl....P.LV...lHGu.....................SG..lspc......................c.Ip.c.uI.p.h...G.Vs..KlNlsT-.h.phAhsp.ul...................................+chhtpp...st............................aD.P.Rp.a.l.tsuppAhpphlppt.hp...hGstsp............................................................................................ 0 471 906 1182 +2474 PF03405 FA_desaturase_2 Fatty acid desaturase Bateman A anon Bateman A Family \N 21.60 21.60 21.60 21.90 21.40 21.50 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.02 0.70 -5.67 11 926 2012-10-01 21:25:29 2003-04-07 12:59:11 9 3 348 37 263 879 84 289.00 38 89.32 CHANGED PPcKhEIF+SLEuWAccN.lLshLKPVEKsWQPpDFLPDPsS..DuF...p-QV+ELRcRs+ELPD-YFVVLVGDMITEEALPTYQTMlNsLDGl+DETGAS.oPWAlWTRAWTAEENRHGDLLNKYLYLSGRVDM+pIEKTIQYLIGSGMDPtTENNPYL.......GFlYTSFQERATFISHGNTARLAK-aGDhpLAQICGoIAuDEKRHETAYTKIVEKLFElDPDsTlLAlADMM+KKIoMPAHLMYDGcDssLFcHFSAVAQRlGVYTA+DYsD.ILEaLVsRWcV-KlT.GLSuEGR+AQD....aVCuLssRIRRLEERAptRA+ptt...slPFSWIFsRcV .............................................................phclhpp.L-..shs-pp.l.pa.Lp.scc.sWpPpDal.P.......spu.............csF........h..tsc-hc.ct...........ppls-shhlshVsshlTE-sLPo....Y....pp.lsp....h.....u.h...........s..G...u...............sWu.tW.sptWTAEENRHG.lLpc.YL.hl.ot.pVD.p.p.lEcsh.hllss...Ghc.s..t.p...p..s.sshh.................uhlYs.oFQEhAThlS.HtN.....Tu.....+h..u.............u.........D.hL.uplhupIAuDEpRHthhYppllcphh-l..sP.stshhAhschhpp.hpMP.u.t.hh.-.................................Fp.+huslhtchGVYssp.pa.h.-.llp.l.lpcWcl.p.ht..s....los-Gp....+Ap....-....alp....tLs...ph...p+h.....pE.ptp.t.hh.t...p...................................................................................................................................................... 0 59 187 239 +2475 PF04116 FA_hydroxylase Fatty_acid_hyrd; Fatty acid hydroxylase superfamily Bateman A, Wood V, Finn RD anon Pfam-B_7847 (release 7.3) & DOMO:DM04600 & Pfam-B_905 (release 4.1); Family This superfamily includes fatty acid and carotene hydroxylases and sterol desaturases. Beta-carotene hydroxylase is involved in zeaxanthin synthesis by hydroxylating beta-carotene, but the enzyme may be involved in other pathways [1]. This family includes C-5 sterol desaturase and C-4 sterol methyl oxidase. Members of this family are involved in cholesterol biosynthesis and biosynthesis a plant cuticular wax. These enzymes contain two copies of a HXHH motif. Members of this family are integral membrane proteins. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.55 0.71 -3.68 122 4549 2009-01-15 18:05:59 2003-04-07 12:59:11 8 43 1563 0 2248 4426 2057 115.30 21 37.33 CHANGED hllhhlhh-hh.hYhhHR.hhH........hth...la...th..HthH.H...ps.......pt.phh....ssht...hps.hphlhhshhhh.................hhh...hh................hhh.hhhhhhhhhhtshhthhsHssh....................hhhh.....hhhthhhhssph..HphH.Ht ...................................................h.hhhhlhh-.hh.hYahHR.hhH......................hsh........la...ph.....H.p...s.H..H........p..........................pt..tsh.........suhh.........hps...hp...hhhh.shhhh........................hhs.....hhh.....................hs..h..t..s...h..s...h..h.hhhhh.hh.hht..hhsHssh................................hhh..............t.h..h.t.h...h...h.....s...s..sph......HchHH......................................................... 0 659 1293 1875 +2476 PF02504 FA_synthesis Fatty acid synthesis protein Mian N, Bateman A anon Pfam-B_1671 (release 5.4) Family The plsX gene is part of the bacterial fab gene cluster which encodes several key fatty acid biosynthetic enzymes [1]. The exact function of the plsX protein in fatty acid synthesis is unknown. 19.90 19.90 20.00 20.00 19.80 19.40 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.83 0.70 -5.56 11 3673 2012-10-02 21:08:39 2003-04-07 12:59:11 10 5 3469 4 781 2569 2334 315.60 40 93.65 CHANGED h+lulDsMGGDauPh...pllcGllpAhpshs.lchhLlGsccthpshlpc.....cpsp.lpllpApshlpMc-sPstAlR+Kc.SSMtlulshl+cGcADuhlSAGNTGAlhuluhh+lutlpulsRPAlsshlPThs.G.hsllLDlGANV-scPccLlpFAlMGpsYupplh.shcsP.+lGLLNIGsEEpKGs-.....hh+psachL+sh....shsFlGNlEupDlhsGhsDVlVsDG......FsGNlhLKosEGssphltpll+-chcpshhutLtull...l..slKphtp+hDaspYsGuslhG.lstsVIKsHGsusupulhsAIctAtphlpsslsp+ ....................................................................................plAlDsMGGDauPp...shl.ushp......A..lp..........p....h....s....p....l.c.llLhG..s.p....st.....l.....p...h..Lsp............................t....p.....+.....lp.....l.lps.s...-.h.Is.-.-..c..P..s.p.Al..RpK+.sS..S.......M...........hl...........AhchV...........K............c...........G....c....AcAsl..S..A..G..NTGALMuhuhhlltp.l.......c.G.......I.......-........R..P..A..LssslP.....o......h..............p....G....tslhLDlGAN..s-scsppLhQFAlMGuhaAcplh...slp.....pP..RVuLL...N...l...GpE.-sKGs-.....hh+cuhpLL...pp.t..........sl.NFlG.lEup-.lh.s..G....p...s.....D..VlVsDG......FsGNlsLKohEGsu....p.hl....h.p.hL.Kp.p....h....p.....s....u....h....h...u....K......l....u.u.ll........lp.ss.L..p...p...h...t...p.+hD.s.pYs..GAsL.L.G..LcGsVlKoHGuusscAhtsAI.c.pAhphlcppls..t....................................... 0 273 513 659 +2477 PF01557 FAA_hydrolase Fumarylacetoacetate (FAA) hydrolase family Bashton M, Bateman A anon Pfam-B_641 (release 4.0) & Pfam-B_1228 (release 4.1) Family This family consists of fumarylacetoacetate (FAA) hydrolase, or fumarylacetoacetate hydrolase (FAH) and it also includes HHDD isomerase/OPET decarboxylase from E. coli strain W. FAA is the last enzyme in the tyrosine catabolic pathway, it hydrolyses fumarylacetoacetate into fumarate and acetoacetate which then join the citric acid cycle [1]. Mutations in FAA cause type I tyrosinemia in humans this is an inherited disorder mainly affecting the liver leading to liver cirrhosis, hepatocellular carcinoma, renal tubular damages and neurologic crises amongst other symptoms [1]. The enzymatic defect causes the toxic accumulation of phenylalanine/tyrosine catabolites [3]. The E. coli W enzyme HHDD isomerase/OPET decarboxylase contains two copies of this domain and functions in fourth and fifth steps of the homoprotocatechuate pathway; here it decarboxylates OPET to HHDD and isomerises this to OHED. The final products of this pathway are pyruvic acid and succinic semialdehyde. This family also includes various hydratases and 4-oxalocrotonate decarboxylases which are involved in the bacterial meta-cleavage pathways for degradation of aromatic compounds. 2-hydroxypentadienoic acid hydratase encoded by mhpD in E. coli Swiss:P77608 is involved in the phenylpropionic acid pathway of E. coli and catalyses the conversion of 2-hydroxy pentadienoate to 4-hydroxy-2-keto-pentanoate and uses a Mn2+ co-factor [5]. OHED hydratase encoded by hpcG in E. coli Swiss:P42270 is involved in the homoprotocatechuic acid (HPC) catabolism [6]. XylI in P. putida Swiss:P49155 is a 4-Oxalocrotonate decarboxylase [7]. 23.90 23.90 23.90 23.90 23.80 23.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.56 0.70 -4.89 95 10787 2012-10-02 17:33:27 2003-04-07 12:59:11 13 37 3508 112 3559 8643 4023 213.40 24 76.20 CHANGED p.lhshuhN........at.....p+sppht...........................................hshPh.....tsh....hah+sssulsu.....ss......................s.l..hhPtt.................tt.....lcaE.sELull....lG+..tspph.......ppA..hs.hlhGaslssDlo.s.......................Rch......phtth...........hhh.uKuh-shsslGPh.....lsshsphst....t.............................slplpspl.........................NGch..hpcu..ss.schlasstplluh.lSphh.................sLpsGDl....lhTGTs...t.s.......................................................................tsshhlpsGD...p.lpspl...............tt.........lGslp.s..pls .................................................................................................lhshuhsat........sHstphs..................................s.t..........tsh........hFh+.s..s.s.....s......l....s..s......st.s............................s..l.....hhPpt....................................stp....hcaE.sELull...................lG+............sspph........s.pcA....hc...tlh..Ga.............s..ls.Dlo...t.........................R....ch..........Q.tptt..............shht..uKuhss.sss.l.G.....Ph.......lhs.hs.p.lss.........p.......................................s.lslphpl...........................................NGch..........hppu.....ss.....ss....h....las......h........s....p....l...l....u....a..l....ophh..................sLpsGDl....IhTGTs.......t..G.............t.................................................hlps.GD......p..lplph..............ps......lGp.lps.h.h........................................... 0 912 2048 2900 +2478 PF00667 FAD_binding_1 FAD_binding; FAD binding domain Bateman A anon Pfam-B_180 (release 2.1) Domain This domain is found in sulfite reductase, NADPH cytochrome P450 reductase, Nitric oxide synthase and methionine synthase reductase. 21.80 21.80 21.80 21.80 21.40 21.70 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.45 0.70 -4.90 15 3726 2012-10-03 00:38:56 2003-04-07 12:59:11 15 73 2009 39 1475 3268 224 195.00 27 27.25 CHANGED sPhctppsFhuslhss+cLpsssusRsshHlElDluso....ulpYpsGDHlGVaPsNspphVccllchlslss...cpslpLcsh-tp..........hc.Ph.sPsTlppALpaal-Iss.sPo+phLptLAsaAs-ts-+pcLchLuss...htpcapchthspshollclhccFPSschPhs.hLlsllPpLpPRYYSISSSschpPspVHlTssVVpacs....spG+h+pGVsSsa ..........................................................................................s.....ttsh.Aplhts.p.pl....s....s...........s...........u.....p...+..s.s.h..H.lEl....c...........l....s....ss..............slpYp.....sGD.t.luVhs.........pN.ss.....t.......hVppll...p..h..l..t...hss........cp.l..p......l..p.sts.............................................................................h..sltpsLphah-lss......s.stt...h.lpt.....hA......ph........s........s.....s...............p..p............................L..t..Lsss...............p.t...p..h.........p..p.a........h..............t...hs.h....l..........-.l.........l................p..............c...a..........s..........s.......s...............p........l............s.h..p...tllphL....Lp.P.RhYSIuS.............S....................t.........h.........t...........s.......s........c.............l.............c....lTVulVca.s........spup.....+..t...GssSsa......................................................... 0 471 808 1178 +2479 PF00890 FAD_binding_2 FAD binding domain Bateman A anon Pfam-B_255 (release 3.0) Family This family includes members that bind FAD. This family includes the flavoprotein subunits from succinate and fumarate dehydrogenase, aspartate oxidase and the alpha subunit of adenylylsulphate reductase. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.32 0.70 -5.63 63 15429 2012-10-10 17:06:42 2003-04-07 12:59:11 19 132 4737 137 4045 50736 24820 363.40 25 69.86 CHANGED DVlVlGuGhAGLsAAlphuc.pGh.plsllspstshtu..soshu.pGulshsh...t...t..DshchthpDshpus......sths.cpshsch.hsc.pusctlt.Lcph....Ghsasp........................t.......hhshpth.Gutptp...........................................cshh.sust.......sGpslltshhpps..hph.....lchh.chhstsLlh......p.......su....plpGshlt............psuphh....phtAp.uVllAsGGhut...................sht.ssss..sssGcGhshuhpsGst.lts..hchhtatPsulh....sssh.......hhpcshpu.G..........ulhls.spGcR..Fh.......................s-hsstchlscuhhtpchptsts..........sphaLhhsp.................................................lsscslcpplsthpcpstthhs.......hsshcc.............................P..............lhsss..hashGGltTDhpucs.............tttspslsGLaAsGpss...suuhcGssphuGsuL ...............................................................................................................................................................................................................DllllGuGh.AG..h....t....AA......l.........p........h.........s.......p.......t.......G..........h....p......l..h..l...l.s..+.....s...................h..tu...............po........h......h.....u......p........G.............G..........h............s...........s.........s....h.................................................................................................t.......D..........s.......h....p..........h...h......t......D......s.l..t.u.u..........................................shh..s.......c....p...p...h.....l....c....h........h.....s......p........p.......u.......s....p.....t....l...p......L..tph..................Gh...asp...................................................................................s...........................h....p...h...G..uh.php..................................................................................................................................R.h...h..h.....s..ss..h.................................s..G...p.......t.......l.....h....p......s......l......h......p.p..s.......h.p.ht.....................lph.h...t..ch..h..s...h.c.Llh....p.................cs...................................ps.h.Gshsh....................................................psu..phh............t.h.p.A..c...ulllAoG.G.hut...........................................................................h...h..t....h....s..o.......s....s..............h......s......o.G.c.G.h.s..h.uh.....cs.Gs...t...ht...s................h....-..h......h...Q....h.....a....P....s...s....h.......................ttth.......................................l.h...s..c..s.h..p..s..p...G...................................................u...h.h.....h......s...t.....p.....G...p..R...ah........................................................t-..h.....s.s..+...c......h....l....u.p..s....h..h........p....h.pttts.............................h.h..hL.hsp........................................................................................................................hs......p...h...l...t...p....p....h.....s..t..h........p.h.s.h......t..h......h..s............................h...s.....hpp.......................................................................h.......................................................l..h.P..ss............+a.....s.h.........G.....G.....l........s.s...p.s.ps.....................................................s....l....GLaA....sG.Ess.......suh..H..G..s.s..c..huusul.................................................................................................................................................................................................................................................................................................................................................................. 1 1281 2555 3442 +2480 PF01494 FAD_binding_3 FAD binding domain Bashton M, Bateman A anon Pfam-B_549 (release 4.0) Family This domain is involved in FAD binding in a number of enzymes. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.95 0.70 -5.40 20 17997 2012-10-10 17:06:42 2003-04-07 12:59:11 14 174 3547 120 7349 32963 12148 284.60 17 67.85 CHANGED cssVL.IVGuGPsGLhhuthLuptGlc......shll-+hsssssts..RAtslppRTMElLcphGltcchtspusspphhutthtss.........................................................cttscl-....t.sussthsshsQsclE.lLhccApppG.splpauTE...lhuhppDtsGVsuhlp...sctsGpp....TlcucYllGsDGs+ShVRcslGlp.hcGptth..hshhslhhcu.sls.........hhhlhsscssGh.................hltPhps...t.hhhhsshs.phpst..ssphsp--stptlcshsussthss.chpthotashssphAccaRpG.RlFLAGDAAHhpPPsGGhGhNsulQDAaNLuWKLAtVLpGpAsssLLDoYssERpsluppllctu ..............................................................................................................................................................................................................plh.llGu.G...sGhh..........h.....A.............h.....h...L....t....p......t.........G...h.p...........................s.h.l......l......E............p..........t...................................................s................t........s..................s.........l...........t.................t.......s.....h..............p......h..........h..............p.......t...........h..........G.......l........h...p....t.....h............t........t...................h........h..h..................................................................................................................................................t..ht.............................................h....h.....h......h...................p.........t...............h.......t.......t.....h........L.....h................p.....t.....h............t...........s.......h..p...h..h....h..s......t..p.....................l......t....h................t........p........t.....p..........t...............t.........h.........h..........................t.t.........t..................................h...p........u......c........h...l.l.........uA.D.....G.........h........p......S....h....l.....R.........p.......h........h............s...........h......t.........h..........t..............t.............................................h.....h.......h.....h......t...h....h......h....t............h..........................................................h......h............t.t....h.................................................................hh.h.s..h........t.....................................................h................h.........................................................t...t..............................t.....h.........................t.........t..........h....................t.................h........................t............h..........h..............t.......................................................h.........................h.........t...................................................h............t...............h............h..............................h...........t.........................h.............h.........h...........t..........p..........h...............h.............t...............s......p.......l.......h...L..........h.G....D.............AAH...............s......h......Ph........t....G....p..........G....h....s.......huh......p...D....s...h.....L...s........h.....l...........t.................................................................................................................................................................t......................................................................................................................................................................................................................................................................................................................... 0 1974 4250 6144 +2481 PF00941 FAD_binding_5 dehydrog_molyb; FAD binding domain in molybdopterin dehydrogenase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1112 (release 3.0) Family \N 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.84 0.71 -4.82 94 4901 2012-10-02 01:00:47 2003-04-07 12:59:11 16 65 2185 108 1498 4277 1822 176.00 27 36.83 CHANGED tphpahpPsolp-hhplht......ttssA+lluGuTslsshhchphhphshl.lslspl.s-Lptlpps..s....sslplGAssohsclhp..........tthsphhs.sLspthptlAu.QlRNhuTlGGNl.............................................ssusshuDhsssLhAlsAplpltssps..pR....plslp-.Fht.....shhpssL.pssEllhulplPhh .................................................................................................t...pah+Psolpcs....lplhs...............tts..pA..c..l.l..AGG..Tslh...h....h+...h.....p........h....h....p...........s....h....l....lsl.s..p.l....EL...p.t.lp....hs...s....................sulcIGAssshs..cltpt..................................................hpp.t..h.......s...sLs...c..t.h....p..t.h.Au.h.Ql.R.Nhu.....TlGGNl.......................................................ssu.s.s......u....DhssshhAh..sApl......p...l.t....s....sp.....u......pR...........pl.s.lp-..Fat......................s.hcs..sL..ts.s.E...llhulhlP.......................................................................................................... 1 466 867 1191 +2482 PF01687 Flavokinase FAD_Synth; Riboflavin kinase Bashton M, Bateman A, Mistry J, Eddy S anon Pfam-B_1221 (release 4.1) Domain This family represents the C-terminal region of the bifunctional riboflavin biosynthesis protein known as RibC in Bacillus subtilis. The RibC protein from Bacillus subtilis has both flavokinase and flavin adenine dinucleotide synthetase (FAD-synthetase) activities. RibC plays an essential role in the flavin metabolism [1]. This domain is thought to have kinase activity [2]. 19.60 19.60 20.60 20.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.59 0.71 -4.24 139 4738 2009-01-15 18:05:59 2003-04-07 12:59:11 12 21 4531 30 1203 3417 2167 126.90 37 41.33 CHANGED hLG+saplpGpVl+Gpph.GRplGFPTANlph....pshllPtp...GVYsspspl...................................ssp..........................................................ha..ulsslGhpPT.hss....pph..plEsHlh................DF.st...-....lYGcplplpalphlRsEpKFs.ul-pLhpQIppDhptu+.phhs .....................................................................hLG+saplp.GpVl+Gpph.G.Rp.............l.........G.............FPT..............ANltl...............pc.p.hhP.....tp...GVYAsp.l.p.l...............................................s.s.p................................................................................................................................ha....ulssl....G.....h.pPT..h.ss......tch............pl........E.....salh........................................DF...st......D.....l.Y.Gc.p.lpVtahchlRs.Et.K.Fs.u.......l.-tLhpQlppDtppA+phh.s................................ 1 411 772 1021 +2483 PF04703 FaeA FaeA-like protein Mifsud W anon Pfam-B_5784 (release 7.5) Family This family represents a number of fimbrial protein transcription regulators found in Gram-negative bacteria. These proteins are thought to facilitate binding of the leucine-rich regulatory protein to regulatory elements, possibly by inhibiting deoxyadenosine methylation of these elements by deoxyadenosine methylase [1,2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.86 0.72 -3.81 5 310 2012-10-04 14:01:12 2003-04-07 12:59:11 7 14 259 2 35 393 13 62.30 35 44.62 CHANGED MK-cILoalscl.........+uPC+TsEIA-AhGlSAYQARYYLhsLEKEGKI+RSPlRRGAuTLWclpu ..................................scIlpahpp..................pts..s+Tp.-IA-Ahul.o.h.Y.QARtYLhpLEctG..hV..c+..s.ht+Gh..t.h.W....s................. 0 6 14 21 +2484 PF02106 Fanconi_C Fanconi; Fanconi anaemia group C protein Mian N, Bateman A anon IPR000686 Family \N 25.00 25.00 26.30 26.10 19.80 22.60 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.76 0.70 -6.07 5 88 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 36 0 35 71 0 385.40 44 98.24 CHANGED MAQDSssLsoNaQFWMQKLSsWsQASTLETQ+DTCLHLuQFQEFLRQMYEsLKEMDSsA.lIERFPTIGQLLAKuCWNPFILAYDESQKILIWCLCCLhsKEPQNSsESKLNSWIRuLLSHlLSu.RFDh..KEVuLFspuLGYsshDYYPuLLKNMVLSLVSELRENHLNGhNoQcRMSPERVpSLSclCVPLlTLPDh-PLVEALLpYHG+EPQElLsPEFFEAVNEAFLpKKISLPsSAVlSLWhRHLPSLEsAsLHLLEKLlSScpNsLRRl-CaIKDSLLPQ.AACHPAIFRlVDEIFRsALLETDGAPEVlAsIQVFTpCFVEALcKENKQL+FALKTYFPYsuPuLlhsLLQ+Pp-IPpGHWLQsLKHIS-LL+ElVEDps+G..SausPFESWFLFVHFGGWlDlAAEQLLhu..AAEPP-ALLWLLAFYYuPpsGsQQRuQTMVElKsVlu+LhMLaRSssLSApDLQAuA.pssuuDsRsPssQQLVRHLLLNFLLWoPGGHoIApElIT+MAcTsAlp+EIIGFLDpTLYRhsHLulEusRocKLARELLcELpsQV ..........................php.WhpKh..Wtps...po.ps..Dss.HLs.hppFlpphhphlpphs......pph..htp..hLtp.C....hhu..tps.phlh...pL.sh.ptcP.sshp.ths.WhpslLsHllos.cht...tEss..hhpsLGYtshDYastLlcNhV.SLspELctsphsths.hp.pRhssp.phhslS.hClPLlTLs-htPLlEsLLhh..putpspEhLt.-Fh-tVs.......-Ahl.c+ls.....L.s.pulhsLWh+aLPSLEpAhLpLh-plhot.p.....pchpphlc-S.LPp.uAppsuIFhlVs-hF.R....sLhEo-Gs.plhshlQlFTpChlpthppps+Q.pasL+saFP.s..sLshsL.hppPp.tlspth..hhp.L.pIophL+phsE-tptt...............u.ts.aEtWFLhlphGtWschssp.Llh......tutss.tsLLWLLsFhatPpsttppRtpphsphp.hhs+Lhhh.t..p.shshtcLptsh......t.t.p.sh..tpLhh+Llhphhlas.tsh.hs.-hlshhh.ptth.pchh.hl.ph...h.h.p...p.t.cs.t...hhppL.tpL............................... 0 5 6 11 +2485 PF03511 Fanconi_A Fanconia; Fanconi anaemia group A protein Griffiths-Jones SR anon PRINTS Family \N 20.90 20.90 21.00 32.30 19.50 19.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.13 0.72 -4.19 2 47 2009-12-01 10:28:13 2003-04-07 12:59:11 9 1 30 0 22 44 0 61.90 56 5.08 CHANGED REELLlhLFFFSLMGLLSSaLTppsTs-h.KAhclCAtlLpCLE+RKlSWLsLFQLTEpDh+LG .....EcLLV.LFFFSLMGLLSS+Lssps...ssD..KAl.clCApILpCLE+RKI.SWLsLFQLTEsDh......... 0 1 2 7 +2486 PF01149 Fapy_DNA_glyco Formamidopyrimidine-DNA glycosylase N-terminal domain Finn RD, Bateman A anon Prosite Domain Formamidopyrimidine-DNA glycosylase (Fpg) is a DNA repair enzyme that excises oxidised purines from damaged DNA. This family is the N-terminal domain contains eight beta-strands, forming a beta-sandwich with two alpha-helices parallel to its edges [1]. 24.40 24.40 24.60 24.50 24.30 24.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.78 0.71 -3.73 109 5183 2009-01-15 18:05:59 2003-04-07 12:59:11 19 13 3625 70 1224 3585 2246 118.40 30 42.40 CHANGED PELPEVEslpcsL...pp.tlhGppItp.lplt..........psp.lchs.stt......htptlpGpplp.slpRRGKaLlhph......................................................................................ss.........sh.....hLlh..HLuMoGphhh......................................................................................ttt...t+Hs+lh..hph........s.sst.......................pLtasDsR+FGphp ..........................................PELPEVEs....sp+sl......ps...h....lhG...pp..l.p.p.l..t.lp......................s.p..l+h.s.stt..........h.tpLsGpplp.slp...RRGKaLlhch...............................................................................................ps...................st.......slls..HLu..M.sGphpl...............................................................................hstp..s.sp+H.s+..lt...hth............s..ssp.....................................hLh.atD.RpFGth.h......................................................................................... 0 373 766 1025 +2487 PF04750 Far-17a_AIG1 FAR-17a/AIG1-like protein Mifsud W anon Pfam-B_3664 (release 7.5) Family This family includes the hamster androgen-induced FAR-17a protein (Swiss:Q60534) [1], and its human homologue, the AIG1 protein (Swiss:Q9NVV5) [2]. The function of these proteins is unknown. This family also includes homologous regions from a number of other metazoan proteins. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.34 0.71 -5.15 28 524 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 287 0 312 511 38 182.70 21 80.16 CHANGED hpllhHl............................................hsls.ashhhahhpp.sp.............taG..........G+apaLThlsL...................llpslaaslsllsDl...........pppcpLptl.....tDahhsshuFPlulhVslhFWsLah....hDccLlh.......Pchlsph................lP..hW.............LNHshHThshlhlll-hhhpp+paPp+ht...ulshhsshshuYhhW.lphlhthsG.hWlYPlhstluh.t+llhFshusllhhhhYlhuchLss .............................................................................................h..............................................................................................hu...........tpa.paLThlsh....................h.hphhhhhlshltph.......................t..t.hhth........c.hhhshs...hsl....thh.......ls.hh.....aWs.lhh.......hs.phlh.......schhc...h................hP..ha.............l.s.hshHshshlhh.ll-hh..h.........t..h.s.....pht..............uhshh....s...s..hshhYhha...lh....h...h..h.............h...s....u....h........asYPhl.p.....hh.s...h......t..p..hh...h..ashshhhhhhhhhhh..................................................... 0 107 159 243 +2488 PF01581 FARP FMRFamide related peptide family Bateman A anon Pfam-B_666 (release 4.1) Family The neuroactive peptide Phe-Met-Arg-Phe-NH2 (FMRF-amide) has a variety of effects on both mammalian and invertebrate tissues [1]. 15.00 2.30 15.00 2.30 14.90 2.20 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.70 0.77 -6.28 0.77 -2.59 61 1303 2009-11-03 19:08:40 2003-04-07 12:59:11 11 24 73 0 591 1475 4 10.20 46 34.60 CHANGED ppcptahRFGR ......t..ptpahRFG+ 0 210 278 550 +2489 PF02469 Fasciclin Fasciclin domain Bateman A anon Pfam-B_562 (release 5.4) Family This extracellular domain is found repeated four times in grasshopper fasciclin I as well as in proteins from mammals, sea urchins, plants, yeast and bacteria [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.82 0.71 -4.13 122 4881 2009-09-10 22:24:17 2003-04-07 12:59:11 17 137 999 8 2602 4995 1077 128.40 22 47.97 CHANGED sp...hoshhshlp.....su..sLs.ppL.................ss.s.........shTlFA.PoscAF.........tpl.....stsphptLht....spppLpplLpaHllss.ph..tsssltsh......................tp...............lpo..lpGs......plplshp.........ss.........plplss.......Apls..psDl.tssN.....GllHsIDpVL.lP ..........................................................s..hsphh.th.lp.......ts....sLs....ptL....................................................ssts............................shTl.FA....Po............s.pA.F...............................pp.l........................st..t...t.h.p..tlht...............spptlpp.lL........p....a..H...l..l.....ss...th....hss.s..l.tss..................................................................tp...................................................................................lpT......hp..Gp.............plplshp...............ss..........................................plhlss..................upl.h......p.s..s.l.....s..sN...................G..l...l..Hhl.D.p.VL.hP...................................................................................................................... 0 870 1506 2063 +2490 PF02259 FAT FAT domain Sonnhammer ELL anon (Keith and Schreiber, Science 270:50) Family The FAT domain is named after FRAP, ATM and TRRAP. 20.00 20.00 20.00 20.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.01 0.70 -5.47 55 1294 2012-10-11 20:00:59 2003-04-07 12:59:11 18 84 321 0 880 1335 63 334.40 17 12.77 CHANGED usLussAAWtlsp.W-p..hcphlsshp.........................pss.........cpp...FapAlLul..++sphc...cApphIcpuRpllssElou...LsscSYscuYshllplQ.........hLsELEEhlpap.phspt........spchctl....hpsWppRL.ss.pcsl-lWpcllphRsllls.............................................................sc-shphhl+FApLsRKssphslAp+sLppLht.t............................Pplhhs.h+h.Wtps.......pp...........pcAlppL.pchsp.phspsht.t.tp.ht.t...............................p..s.pp.htt.spLhA..+sahchGpaptpl..............p.ht.pshspllpuYttAsphcpphhK.AWcsWAhhNhcllph.tpt.t...................................................................hhpasssAlpsah+SluLups.....puhpchhRLLo ..............................................................................................................hhhtsuWthsp..Wpt......hp.p..h.lp...h..........................................................................................................................pps..........................cht..hhpuhhul........+ppphp.............................phpph..l.ppu..p...phh......hp.......phss.......hs..tp...s..h..p.pth.s..hlh..phQ...............hLsELc....-hhp.h..h.t...................................................pphptl...............hp.tW.pp..R...L............sh..tsshphh.....ppll...thRphhhp.........................................................................................................................................................p..p..h..p.h...h.l...ch...upl...sRKp.s..ph.p...l.uhptLh.p....lhp.................................................................................................spl...ht.hph.at....t....p...................................................pcAh...phL...p......p...h.p..hhppt..............h.......................................................................................................................................................t.......t....ttphh.u....csa...h..hhupahtph.........................................................pp.pthhp.tap.....t..u..s.ph...p.p..hh.............+...u..........at....t.hu..hh....t.p.hhpt.p.........................................................................................................................................................hphhh..ulpsahculth..spp........tshp.h.RlL................................................................................................................................................................... 0 319 487 724 +2491 PF02260 FATC FATC domain Sonnhammer ELL, Wood V, Mistry J anon (Keith and Schreiber, Science 270:50) Family The FATC domain is named after FRAP, ATM, TRRAP C-terminal [1]. The solution structure of the FATC domain suggests it plays a role in redox-dependent structural and cellular stability [2]. 25.20 25.20 25.30 25.30 25.10 25.10 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.33 0.72 -4.51 141 1318 2009-01-15 18:05:59 2003-04-07 12:59:11 15 61 309 3 915 1361 13 32.60 39 1.32 CHANGED p.sL.olptQVppLIppATsspNLuphYh.GWsPaa .......h.sL.sVpsQ.VppLIppATs.cNL.....sphYh.GWssah.... 0 330 502 761 +2492 PF00316 FBPase Fructose-1-6-bisphosphatase Finn RD, Griffiths-Jones SR anon Prosite Family \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.09 0.70 -5.61 12 2715 2012-10-02 15:53:20 2003-04-07 12:59:11 15 11 2198 197 881 2125 479 306.40 43 94.95 CHANGED hTLsphhlppt......usu-.....lstllsulshus+hIuptlp+AuLsp.lGtuuusNspGDtQKtLDVluc-lhhsALcuS.sltshsSEEp--hlsh....stpupasVshDPLDGSSNIDsslSVGTIFuIa................sssssp..shhpsGp-.VAAGYslYGspThLVlohGs..GVptFsLD.s.GpFhLscpsh+lPtcuphauINpuNhpaas.slccaIcchhtstcssp.+sashRYlGShVADsHRhLl+G.GlFlYPust+...spGKLRlLYEstPhAFLhEQAGGtAosG+.cpl..LDlsPpplHpRsslhhGStscVt+hpchhtt. ........................................................................t.TLspal.hp...pptph....ssu-...............Ls.tLl.sslthusKhIsptl...p...+...u..........uL.......s.....s..l..l....G.......t...............u........G...........s........p.....N.l...Q.G...E..s.QpKL..Dlhu...N-...hhhsuLc..sp..s..tlsulA........SEEp..........-..........ch..ls..h.............t..t.....s....p........p.......u...c.YlVhhDPLDGSSNIDl..Nl.S.V.GT.I.FSIa+t.s..........................ssps..spt....ch.L..Q.....PGp.pQVAA.GYslYGsuTh..LVhT..sGs............G....V...p.........s..F..T.....h..........D.....s..s....l.......G............pFh.L..scp.........s..h..+..........l...Pc.c..G..p.........hYulNpGNhhpa.s....s.s.l+cYlc.hpttp..............pust....+sYshRYlG....S.............hVADh.HRsLl+.G.GIahYPustp.........psp..G..KLRL.LYEsNPMAFlhEQAGGt..........Ao...s.....G....p.....pRI...L.Dl...P.p.plHQRsshalGSpppVcclppaht.t......................................................... 0 253 520 722 +2493 PF03320 FBPase_glpX Bacterial fructose-1,6-bisphosphatase, glpX-encoded Mifsud W anon Pfam-B_3515 (release 6.5) Family \N 20.50 20.50 20.60 20.90 19.50 20.30 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.81 0.70 -5.79 111 2306 2012-10-02 15:53:20 2003-04-07 12:59:11 8 3 1790 13 437 1381 1900 308.20 54 95.06 CHANGED -RsL...ulEllRVTEAAAlAuu+ahGRGDKpsADpAAV-AMRphlsplshcGsVV....IGEGEhDEAPM.LYIGEcVG.s..G..pGPp............lDIAVDPlEGTslsApGhsNAlAVlAhuc+GslLcAPDh.YM-KlAVGP........tucGhlD..lstslp-Nl+slAcAhs.+slp-lTVslLDRPRHpclIcclRpsGARl+LIsDGDVAuulssshsc.o..GlDlhhGIGGAPEGVluAAAl+ClGGphQuRLhs.............ps-pEc.pRspch..Gl...Dh..s+lhsh-DLVpupclhFuATGlTcGsLLcGV+a.....psst.spTcSlV.hRucoGTl.RhIcut..H .......................RpLAhEhsRVTEuAALAuh+WhGRGDKNsADGAAVsAMRphLNplshcGplV......IGEGE.....hDE..APM.LY.IGEcVGsG......pGs.t................VDIAVDPlEGTphsAhGps.NALAVlAhu-+.....G.....shLc.A.PDM..YM-KLsVGP........tAtG.s..ID........LshPls-NL+sVAcAhu..Ksls-LsVslLc+PRHstlIpEhpphGsRVhhIsDGDVAuuIhTshs-...o.slDlhhGIGGAPEGVl.oAAAl+sLGG-MQuRLls..............h.s-pEhtR.CcpM..Gl......-.ss+VLpl-Dhs+u-..sllFuATGlTsG-LLcGlph.....push.....upTcoLl..hR.u+opTlRhIcohH.......................... 0 146 291 369 +2494 PF02634 FdhD-NarQ FdhD/NarQ family Mian N, Bateman A, Iyer LM, Zhang D, Aravind, L anon COG1526 Family A pan-bacterial lineage of proteins. Nitrate assimilation protein, NarQ, [1] and FdhD (Swiss:P32177) are required for formate dehydrogenase activity. Structurally, they possess a deaminase fold with a characteristic binding pocket, suggesting that they might bind a nucleotide or related molecule allosterically to regulate the formate dehydrogenase catalytic subunit [2]. 25.00 25.00 27.50 25.80 22.70 24.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.49 0.70 -4.91 203 2755 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 2387 4 651 1935 1212 230.20 33 86.35 CHANGED spEtPlslhl.NG........pphsshMsoP......................sch--hAlGFLhsEGllp.ssp-l..pulpls.....................................pp............t.p..........................lt...lp.........spt...t..h...cc+h..h..sssoGC........Gh..Ght..h.ph...hpth.h.........s.thplsssplhphhpplpp.tpslappTGulHuAulh..s.....ss..p..llhhtEDVGRHNAlDKlhGthhh.........psh.................s................hs...st.....hllsoGRlSsEMV.KsAphGlPlllohSAsTshAlclAcchGlTLlGhsRs..............pc..hslYot.scRlh ...................................................................................spEhPlslh.h....NG..........h.phsshMsoP......................pclE.phslGF.hoEGlIp.s.c-l...h.ulcls.......................................................ts.sss.........................lcls.........spt......thpt..........pc+th..sspouC...Gh..s..Ghpt..lpph.................hpshp.h................s.sh.phs.hs.p.l.hphhppL.pp....t.pt.lh.pp.TGulHuAAhhs.............ss..p....llsh+EDlGRHNALDKLlGtth.p.....p..s.h..............................s.......hpps..........hll.sSuRhShEhVtKuAhhGlslLhuhSAsTsLAlclAcchs.......lTLlGFs.Rs................sp..hslYotspRl.h......................................................... 0 188 402 541 +2495 PF04216 FdhE Protein involved in formate dehydrogenase formation Mifsud W anon COG3058 Family The function of these proteins is unknown. They may possibly be involved in the formation of formate dehydrogenase. 27.20 27.20 28.30 27.90 27.10 27.10 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.25 0.70 -4.69 49 958 2009-09-11 05:40:51 2003-04-07 12:59:11 7 2 907 2 192 561 21 287.00 48 95.45 CHANGED L....t.l........t..t.s.lhhss.tslatpR.....ApRLptLApp.pP.hu-YLpahAplscs.Qppl...lpph.shshs.st.h.t......tpuhPsLstpsh.p-stapt..hLptLlsplpst...s.ssshht..slctLcp...ssssplcthussLLs..........tphstsss..shshFlhAALplhasphAst..lstpshst.spp....shCPlCGohPluSllphssp.p.......Gh.RYLpCuLCpoEW+hlRscCssCspocslphhsl............-stps....sl+AEoCssCpuYlKhlh.p-ccsph-sh.ADDLAoLtLDlhh.p-pGapRsuhNPhLh .............................................................................................................chssscpps...ushIPPLLFPphKslYsRR.....AcR.LRcLA-s...N.........P..LuDYL+FAAhIAcA.Qcsl.....Lh-a...PLp.h.-.ls.s.cl.tcs............pspG.p...PPLDhcs.l.s.R.D..p+.W.pc....LLhuLlAEL+Pc.................hssssLA.......lIEsLEK..............ASspELEshAs.ALhA...........................u-FusVuS......-K.APFIWAAL.S.LYWA....Q..hAsh......IPu+.....A....+..s..........E..aG.....Ep.R....................p.aCPVCGShPVuShVplGss...Q.........G..L.RYLHCsLCETEW.HlV.R.l.K..CSNCEQo.tcLcYWS.L.....................-.sEpA........AlKAESCsDCs.TYLKILY...QE..K...-.....P....c..V.........E.uV.....ADDLASLsLDucM.EpEG.aARSSlNPFLF................................. 0 56 112 156 +2496 PF03147 FDX-ACB Ferredoxin-fold anticodon binding domain Bateman A anon [1] Domain This is the anticodon binding domain found in some phenylalanyl tRNA synthetases. The domain has a ferredoxin fold [1,2]. 21.00 21.00 21.20 21.90 20.60 20.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.85 0.72 -3.81 134 4662 2009-09-10 22:44:18 2003-04-07 12:59:11 9 19 4522 20 1190 3729 2172 94.00 33 12.29 CHANGED S+aPslpR.DlAlllsp..................sl.sss....plh..pslcpss......sp.hLc.slplFDlYp.G..........ppls.p..scKSlAhplhapst-+TLs-p-lsshhppllptLppchsupLR ........................S+FPuspR.DlAllVs.c........................................................s.l..sss......-lh........psl..cps.u.....................sp.....hLp.....slpLFDV..Yp..G...............cp.ls..p..........GcKSlAhsls.hp..s....s.-+TLs--E.....lstshpcllpsLpcchsApLR..................... 0 404 755 1004 +2497 PF00465 Fe-ADH Iron-containing alcohol dehydrogenase Finn RD anon Prosite Family \N 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.24 0.70 -5.82 226 13485 2012-10-02 14:41:14 2003-04-07 12:59:11 14 23 3785 67 2682 11831 4421 355.90 25 79.39 CHANGED P..s.c..lhhG...tG.s.lpplsp..hltph..t........+sLlVo...st.sh..tpt......u.hhcplhp...L..ppsulph.thast.l.pspPshpplpcuhp..hh+......pp..ss.......DsllulGGGSslDsAKuluhhhs.......................................lP............llslPTTuGTGSEso........shuVls..st...ppp..hKh.slts..phhP.......phullDPpLshshPhplsusouhDALsHslEuahs.....................p..ss............s..................hoD...............shuhpulc...hlhptL.ps.hpss.ps.....hpA.Rpph..hhuuhh....A....Ghuh....s...s..uuluh.......sHuluaslu....u.ha.p..............lsHGhssAlh.LPtlh..pash......................tp..htplup...hh.......................sp..............phlptlp....ph..hppl.ulPs.p...Lp-...h.G...ls..p....pp....l....spluptuhps..t..h.s....sPtthstp-.l ..................................................................................................................PtphhhG.tG.u...l.t.p.ls.p......htph....u..........+sllls...ct..slh..p..h...............G...hhsp.lhp....hL..........p..tts....l..p....h.....tla.....s.....t...l....p.s...s......P......o......h.....p.....s......l...p.....c......uh...p......h...h...p.......pt.......p.s...........D...h...l..l.u...lG...GGSs.hD.sAKu.............luhhhp......s.....t............th..............................hP.........................hls.lPT..T..u..G..T..GSEsT.....shuVIo..sp......................psp....hKh..slhs......ph..hP.......s.hullDs....pl.hh.shPtplsA....ssuh..........D..........A..........L..........s.......H.......s....l.......E.u...Y.so.......................ht......uss..............................h.o.D..................sh.u...h..p...ul.c........hl..h.c......t...l.......p....u........l.....p......ss........p....s..................ht..u.....R.....p.....p.....h.....hh.....u.......u.....h.h.......A........G.....h.u.F.....s.......s.......usl.uh................sHuh.u.H..tlu.............uha...c..................................................................ls..HG..hs.sAll....LP..tlh.paNt..............................................s......t+....htph.Ap....hh....................................th..........t.p.t.ttsp....................thlp.tlp............ph...h.p.pl....Gl...Pt..s........Lp-............h...G......lp....c........................pc....l....st....l.uptAhpc.ts....hhs....sPh...s.t............................................................................................................................................................................................................. 0 880 1660 2200 +2498 PF02742 Fe_dep_repr_C Iron dependent repressor, metal binding and dimerisation domain Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family includes the Diphtheria toxin repressor. 20.70 20.70 21.00 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.08 0.72 -4.29 141 3389 2012-10-04 14:01:12 2003-04-07 12:59:11 10 16 2647 96 811 2165 650 67.10 34 34.92 CHANGED TcpGcchAhpllR+HRLhEpFLschLuh.sh-clH--AcplEHhlS-chhc+lsphL..shPppsPHGsPI.Pst .......TpcGcclApplhc+H+l.l.EpFLhphLGl.s.-ps+c-AEtlEHtlS-chlc+lpphl...p.....p.sP........t................ 0 307 589 730 +2499 PF01325 Fe_dep_repress Iron dependent repressor, N-terminal DNA binding domain Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family includes the Diphtheria toxin repressor. DNA binding is through a helix-turn-helix motif. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.40 0.72 -4.03 10 3362 2012-10-04 14:01:12 2003-04-07 12:59:11 14 14 2588 94 779 4553 836 59.00 31 32.07 CHANGED cLscstE-YLcsIYsLpE.spshspssclAEcLsVpPuoVocMlp+Lc+pGYVcYpsY+Gl ....................................shEDYLc.s...I.h..c.L........t.....p.....c....t......s....t....sp...s...s..clApcLsV.S.s.PoVoc.M...lp+L..pcpG..llphp.apu.................. 0 286 561 700 +2500 PF02906 Fe_hyd_lg_C Iron only hydrogenase large subunit, C-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.14 0.70 -5.15 141 3059 2009-01-15 18:05:59 2003-04-07 12:59:11 9 160 948 10 841 2861 59 227.10 30 56.42 CHANGED +hVlsplAPAlRsulG-phuh..st...sG+hhuAL+c.LGFc.pVaDssauADlTlhEEusEhlc.Rl.........pp.st.......h..PhhTS.CCPuWlpasE+haP-.llsplSos+SP.thhGsllKs....hhsp......................th......s.spchhsVuIMPCsAKKhEut.R.-hps.......................cVDhVLTocELuphl+............pt.....sIc.hspLtc...pphDts.h.......s...uouuGtl............F.........GsoGGVhEAslRsshchh.........t..ppht.......hph..hRsh.t.....sh+.sphpl..............................................s........l.+lulspGl..t.ssccllcpl..+s.................G.....................c.....tp.......acFlElMACPGGClsGGG..QP .............................................................................................h..sst....ht..........................................t..hh...t....hh............h...........t...h..........G......ht.............h.h.-.sshst.phsh....tp.Ehhp.+h..................................................pp...tt.................shhoS.sCP...u..alphhEph...........a.P................c...hls.plSs..s+SP.t...................hhGslhKs......hh.s.p...........................................................................................t..h...............s.spclhsVslMPC..hAK.KhEsp...R.s.-.hts.....t....................................................cVDhVlT.o.c....EL.s.p.hlc.............................................pt..........sIs..ht...plts.....pp..h.Dt...h.........................s.......touu...u..hl....................................................................F.........................................u.soGGlhcushp.hht.h....................................................................................................................................h..phshspGh....t.sht.phl.p..th....ct............................s.................................................................................c...........hpal.Eh.sC.....t.GCh.G................................................................... 0 435 630 751 +2501 PF02256 Fe_hyd_SSU Iron hydrogenase small subunit Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_3750 (release 5.2) Domain This family represents the small subunit of the Fe-only hydrogenases EC:1.18.99.1. The subunit is comprised of alternating random coil and alpha helical structures that encompasses the large subunit in a novel protein fold [1]. 21.70 21.70 22.00 23.20 21.50 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -8.88 0.72 -3.96 109 999 2009-09-11 10:48:27 2003-04-07 12:59:11 12 61 583 10 362 911 20 58.20 34 10.86 CHANGED tssp-h.hppRtpulYphDpph.s...lR+SaENPtlppLY.ccaLs....cPh.uc+uHcLLHTc.Yps+p .....................t....chhtpRhpuLYptD....pph.....lRpSa-NPtlppLY...c-aLs....cPh.uc+AHcLLHTp.Ypsp......... 0 185 271 316 +2502 PF01032 FecCD FecCD_family; FecCD transport family Finn RD, Bateman A anon Pfam-B_377 (release 3.0) Family This is a sub-family of bacterial binding protein-dependent transport systems family. This Pfam entry contains the inner components of this multicomponent transport system. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -11.87 0.70 -5.60 145 19091 2012-10-02 17:14:55 2003-04-07 12:59:11 13 11 3812 10 3276 15283 1729 306.10 28 92.97 CHANGED ll..ll....hhlhu...L....hhGsh...tlshtplhpslht..................sshtt.........llhplRlPRhlhAllsGuuLuluGslhQslhRNPLAsPsllGlsuGAulusslshhhh............shhsh............hhhshhAh..lGuhlu...shllh.hlu.......httthsshpllLsGlulushhsulsshlh.hh....s.p..............phpslhhWhhGSl..s.stsappl.hhhhshllhshhhhhh.hu+pLslLsLG-chApuLGlslpph+hhhllhssllsusuVuhsGsIuFlGLlsPHlsRh...lh....G.sshpthlshoslhGullllhuDhluRhlht.PhE.l...PlG..lloullGuPhFl....aLlh+ ........................................................................................................h.hhhhhhhu...l........hhGsh....tl...s...h.tp.l.h.p.h.h.ht.......................................ss..tph..................................l.lh.p.h...RlPRhlhullsGu......uLulu.Gs..lh....Q.s...l..h+NPLA.sPsllG..lsuGAu..hu.hsl..slhhh...............................shhsh.................h.h.h..s.....h...h..A.h.....l.G..uhls..............sh.l.l.h..hlu.................t.p.t..t.hs.s..h..p....l....l...L......s......G...l.u...l.u..s.h.h..s...uls..sh..lh...hh.......s...p.................................................php...p..l...h...h...W.h..h.Gol............s...ss......sa...p............p............l..hh...........h.h.....s.hl.l.l....s.lh.h.s....h..h.......h......u.....p.....t.......LslL.s.L.G.-..c..hApuLGls...l...pp...h+...hhhllh..s.slLs......u.....suVuhsG.s.I...u.F.l.G..L.l...s..P.H.l.s.R.h....l.s......G...s.....ct+...hhlP.h.............us..............Lh...G..ull.......Ll.h........u...D....hl...u.....R...h.....l..........h.................Ph.......E.......l....................P.l.G...l.l..s.u.l.l.Gu.P..h.Fl.aLlh+...................................................................... 0 941 1977 2706 +2503 PF04773 FecR FecR protein Bateman A anon Pfam-B_3234 (release 7.5) Family FecR is involved in regulation of iron dicitrate transport. In the absence of citrate FecR inactivates FecI. FecR is probably a sensor that recognises iron dicitrate in the periplasm. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.33 0.72 -3.76 182 4810 2009-01-15 18:05:59 2003-04-07 12:59:11 8 61 1004 0 1571 4630 1229 97.30 25 26.17 CHANGED s................plpTs..s........sp.........ptplt...l.sDG.....op.lpLsssopl...............htttppp..lp....LhpGp....s.hhpssp.ssp.......t....Fplpo.s.sspltstG.T..pF..sl..........t..tpsspsp.....lsVhcGpVp ........................................................................t..h.os..h.Gppt.plp..L..s....D..G.......op..lhLsssSpl...................phshshs.pcp.h...lp.............lpG.E........s.hhc...V.....s+..ssp...................pP.......FhV.......po.........s..p.......s.......p.......l..pV...l.G...T.......pF...p..s..........h......tps.stsp........lslhcGpV............................................ 0 637 1127 1377 +2504 PF02388 FemAB FemAB family Bateman A, Mian N anon Pfam-B_1214 (release 5.2) Family The femAB operon codes for two nearly identical approximately 50-kDa proteins involved in the formation of the Staphylococcal pentaglycine interpeptide bridge in peptidoglycan [1]. These proteins are also considered as a factor influencing the level of methicillin resistance [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.23 0.70 -5.79 46 2929 2012-10-02 22:59:21 2003-04-07 12:59:11 11 8 1044 13 402 2189 375 322.00 26 95.28 CHANGED losp..............................EacpFsppps..ssahQosthtcl+ttpshpschlGlccspspVhuAs.....lLhpthshhuaphhYhsRGP....lhDasspcLlpaahppLpcasKp......ppuLalplDPhlhhphhstsGc.h......s.psptllcphpplGacapGhsp.uass.hQsRaphllcLcshs--plhp.shspps+pslK+upphGVcl+hl.st.-E.........LstFtclhppT.p-R+sap.Rs.sYYpchhcpa.s-p.uh..lhlAplshpchlppLppphpphpppltphppt.tpp.pp.............Kpps+hpphppphps.ppclpcspchhpptup..lsLAuuLalhpspEshYLauGossc.........apcahushhlQachhph.Ahc+GlshYNFhGIsGpF...spss....GVh+FKpsFNupl.EhlG-FhhPlpPhhYplh.phlc.......K ......................................................................................................s.pEaptaspp....sphh.Qs..t.hhp.h+.....p.s..h..p..s.ph..l.u.lt.p..p...t......t...........l..h.sus..................l.l...hp.........h.........h.....h........h.....hhY....scGP.......lh..Da...p....s.t...p...h.l.phhhp....t....Lp.p.a.h.+p......ppslhlhhc..P.h.....hp.....h......t................................s.pthh...p..hp.p.hG.a...pa..pGhpt...th.ss...Q....c.........a.hh.hh.sl.....p.....t.......h.....s.t.c........p........lhp.ph.s.t.p.t+ptl+....p....s....h....p.....h....G....l..plchh...ph....-p.............hp.hFhpl.....hp.....T...tc...+....c...t...at......+...s.....pY..apphhc.ha...t-p...s......hhlA.lshpph.tplp.p.t..ptphtth.tt.........................pt.pphtp....pp..phtt..ppchtp.......hpth......tp.st....lsluuul.hhh..s....pc.shY..h.h.uGossc.........appa......h..us.hhhpaphhph.u.h.p.p.s.hs......hYshh.G.lps.th..................st.p.s......GlhpFKpsa..s...s......l....c...hhG.cF..hshp...hYthh.phh..................................................................................... 0 151 260 364 +2505 PF04023 FeoA FeoA domain Bateman A anon COG1918 Domain This family includes FeoA a small protein, probably involved in Fe2+ transport [1]. This presumed short domain is also found at the C-terminus of a variety of metal dependent transcriptional regulators. This suggests that this domain may be metal-binding. In most cases this is likely to be either iron or manganese. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.45 0.72 -4.01 284 5362 2012-10-01 19:11:18 2003-04-07 12:59:11 9 18 3214 52 1271 3443 369 72.70 22 50.92 CHANGED hsLsp..lp.G......ppspl......tp.lpt..........p................................tthh............p+..LhshGlhsGsplpllp.ps.h..ssslhlpl....................p.sp.......h..s.LppshAppIhVch ..............................................Ltp..hpsG...ppspl..pp..lpt......t.............................tstht...p+LhshG...lh.Gsplpllp.ts.........sh......ssPlhl.pl...................................c..st...p....l...s..LccptAptIhVp................................ 0 485 947 1153 +2506 PF02421 FeoB_N FeoB; Ferrous iron transport protein B Bateman A, Yeats C anon Yeats C Family Escherichia coli has an iron(II) transport system (feo) which may make an important contribution to the iron supply of the cell under anaerobic conditions [1]. FeoB has been identified as part of this transport system. FeoB is a large 700-800 amino acid integral membrane protein. The N terminus contains a P-loop motif suggesting that iron transport may be ATP dependent [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.71 0.71 -4.95 147 3662 2012-10-05 12:31:08 2003-04-07 12:59:11 13 24 2856 55 802 32060 12341 155.20 43 23.61 CHANGED hplALsGNPNsGKTTLFNsLTGup.Q+VGNWPGVTV..EKKpGphp...h......ps.pph.pllDLPGh.....YSL....os..hS.-EtluRcall.....pcc.sDlllNllDAoNLERNLYLThQLlEhs.hPhllALNMhD.AccpG.hpIDhppLpchLG.lPVlsssApcspGlccLhctl ...........................................................................h.pluLlGNPN..sGKToLFN....t....L...............T...........G........u...........p............Q.......+.............V........G.......N.......W............s......G........V........T..........V........E......+....K....p....G..p....h..p......h........................ts......p..p.......h.....p.l..l..D........L.......PG.s............Y...S....L..................ss....h...S..........-......E.....p......l......s.....p.....c.....a.....l.l...................p..p.....p......s...D............l.....l............l............N.........V.....l............D..............A............o.............N.............L.............E............R.............N........L................a..........L...........T........h.......Q..........L...........l...........E..............l............s............h............P......h......l.....l..........u......L...N.M...h.D...h.............A...........c...........+...........p............G........l......p.........I.........D...........h..........c...........t........L.......u.........p.....p......L............G...........s........P.........V.......l.......s....l...s.A.pc..s..c..G.lcpLhpt.h......................................................................................................................................................... 0 308 571 698 +2507 PF00142 Fer4_NifH fer4_NifH; 4Fe-4S iron sulfur cluster binding proteins, NifH/frxC family Sonnhammer ELL anon Prosite Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.62 0.70 -5.21 14 20327 2012-10-05 12:31:08 2003-04-07 12:59:11 13 8 3168 72 446 26260 3397 132.50 67 98.47 CHANGED RpIAIYGKGGIGKSTToQNhsAALAchG.K+VhllGCDPKADSTRLlLtGKsQsTll-hht-cGt..E-lpl--VlhpGatslcCVESGGPEPGVGCAGRGVITuIshLEE.GAYs.D.lDaVhaDVLGDVVCGGFAMPIR-sKAQElYIVsSGEMMAlYAANNIsKGlhKYAcpGG.VRLGGlICNuRpsDpEpEll-paAcplGophIHFVPRDNlVQ+AEhp+pTVhEasPpss....QApEYRpLA++Ih-Nc..hlIPpPloMD-LEsllhcaGlhc ...........................................................................................................................................................................S..T...R...L..l..........L...p....u.......K....A....Q.....s......T.............l........L..............c....h........A.u.........E..............t....G..s..........V......E...............D............L........E..........L....E....D..V....h......+.......h........G......Y.........t.....s............l.......+......C.....V....E.....S...G...........G...P.....E.....P.....G.....V.G...C...A.......G......R....G....V....I...T....u...I....N......F.....L...E....E....p....G....A..Y....-.......-.........l..DaV....Y.D...V........L.G...D....V...V...C..G....G..F..A...M..PI.R...E....s......K...A....Q.E.I.YI.V.hS................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 138 291 373 +2508 PF01794 Ferric_reduct Ferric reductase like transmembrane component Bashton M, Bateman A, Finn RD anon Pfam-B_728 (release 4.2) Family This family includes a common region in the transmembrane proteins mammalian cytochrome B-245 heavy chain (gp91-phox), ferric reductase transmembrane component in yeast and respiratory burst oxidase from mouse-ear cress. This may be a family of flavocytochromes capable of moving electrons across the plasma membrane [1]. The Frp1 protein Swiss:Q04800 from S. pombe is a ferric reductase component and is required for cell surface ferric reductase activity, mutants in frp1 are deficient in ferric iron uptake [1]. Cytochrome B-245 heavy chain Swiss:P04839 is a FAD-dependent dehydrogenase it is also has electron transferase activity which reduces molecular oxygen to superoxide anion, a precursor in the production of microbicidal oxidants [2]. Mutations in the sequence of cytochrome B-245 heavy chain (gp91-phox) lead to the X-linked chronic granulomatous disease. The bacteriocidal ability of phagocytic cells is reduced and is characterised by the absence of a functional plasma membrane associated NADPH oxidase [3]. The chronic granulomatous disease gene codes for the beta chain of cytochrome B-245 and cytochrome B-245 is missing from patients with the disease [4]. 24.30 24.30 24.30 24.30 24.10 24.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.97 0.71 -4.04 209 4658 2012-10-03 10:28:09 2003-04-07 12:59:11 14 103 2096 0 2249 3954 660 129.60 19 25.54 CHANGED tGhh.uhhhhshhhl........lu...h+....s.h....htthshpp....hhh.hH+hluhhshh.hshlH...........shhahhhhh.......................................tthhtthhtp.hhh.......................hGhluhh.......................................hhhhlhlsS......hthhR.+hs............YchahhhHhl.hhh.............................hhllsh .....................................................................................huhhhh..shshl................................ls...h+.........s.l..........h.tt.h.s.hpp...........hhh..hH+h.........lG......hhsh.h..hu....hlH............................sh.sah.h.h..h.hh...........................................................................................hhtth..h..p..p..s.hh.h............................................................hGh...l...u.hl...........................................................................lh.hh.lhh.sS...........hthh.p...+hs................................................achahhhHpl..hh.lhhlhh.h............................................................................................................. 0 573 1182 1798 +2509 PF00762 Ferrochelatase Ferrochelatase Bateman A anon Pfam-B_879 (release 2.1) Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.91 0.70 -5.46 217 4061 2012-10-01 23:23:09 2003-04-07 12:59:11 14 15 3675 58 1013 3123 3342 305.40 29 88.92 CHANGED +hulLLlNlGsP-ss..psVc.aLpp...hh..sD.cllc...lP......hh..hLtthIh.hRst.cssctYppl..s..s....sSPLhthTcpQsptLpptLspt..........................p..h.VhhuMRY.spP.l.pcslpplpp.p..Gs.....ccllllPLYPQYSsoTouoshcpltcsh..tp..............ht..sp....lc.hlpsaascPsalpAhspp....lppthpp.h..............tp.........spLlhShHGlPpph.l.ptGD...............................P.....................................YtppsppTs....chl.tct........................................L.....shpp................aplsaQSR.hGs..tcWLpP.TsctlcpLspp.G.h..+plhlhs.uFsuDplETL.ElshEh.+-hhtctGs...ppaphlssLNssstalpsL...scllppph ..............................................hulLlhNhGoP-s....psVctaLpphh..pD.p.+llc......hs......h..hlt..hl......hRst.cl.............sctYppl.................s.....ssSP..Lh..........th.o..c...pQ.sp...tLpptLsp...........................................p.h..Vhhu..h.+Y.usP..l.ps...sl.pphp.t..p...Gl................cc...llll..sL..YPpYS.soTsuush-pltchh....tp...........................h...p.....lphl.csaaccs.taIpshupp....lppphtph..................................tc............shLlhShHulPpph.h..ctG.D...............................P................................................................Y.ppspcTs.......chltpt.............................................L...............shsp........hthsaQ.S+.hG....t.WLpP.sp-t..lcpL......scp.....G...h.............cplhlhs.s....uFsuDplETL.ElshEs..+....c.hh.h.....ch..Gs......paphhsslNssstaIchlsslltt..h................................................................ 0 298 610 840 +2510 PF04060 FeS Putative Fe-S cluster Bateman A anon Bateman A Domain This family includes a domain with four conserved cysteines that probably form an Fe-S redox cluster. 20.50 20.50 20.50 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.70 0.72 -4.37 19 2487 2012-10-03 08:56:42 2003-04-07 12:59:11 8 151 1933 7 725 1862 213 34.40 47 12.10 CHANGED lLPpoNCGpCGassChsaAptllpscsclscChsh ........lLP.psp.CGpCGY.P.GCcsaAcAl.s.p.s..t.s.p.l.spCsPG................ 0 301 522 632 +2511 PF02941 FeThRed_A FeThRed; FeThRed_beta; Ferredoxin thioredoxin reductase variable alpha chain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 25.20 31.40 24.70 18.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.12 0.72 -4.04 17 112 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 88 8 52 119 22 69.00 49 52.62 CHANGED lGDRVRVpsSVlVYHHPpHRspsFDlp.GhEGEltsllspWpGRsISANLPhhVpFs...........KF+AHL+--E ...lGcRVRVpss.VhVYHhP...+stshDlp.GhEGcVtp.hl...spW+G+...lSANLPhhVpFph..........+Fh.AHL+EDE.... 0 12 35 45 +2512 PF02943 FeThRed_B FeThRed_alpha; Ferredoxin thioredoxin reductase catalytic beta chain Griffiths-Jones SR anon Structural domain Domain \N 19.30 19.30 19.50 23.60 17.80 16.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.48 0.72 -10.95 0.72 -4.34 20 255 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 215 8 133 258 35 99.20 36 71.12 CHANGED csLEsh+KFuEpYAK+oGTaFCsD.uVTulVlEGLA+HK-phGusLCPCRHY..EDKpuE.VtssaWNCPCVPMRERKE.....CHCMLFLT....P-N....FuGppQcIsp-plpcpp ..............h-hh+cFs-paAc.+sGhaFssDtslTssVl.cGLucpK-chGtshCPCRhh..-c+pt-...pt.h.CPCl.hREc..c..........CHChLFlo........-.........t..............t................................ 0 48 107 123 +2513 PF01846 FF FF domain Bedford MT, Bateman A anon [1] Family This domain has been predicted to be involved in protein-protein interaction [1]. This domain was recently shown to bind the hyperphosphorylated C-terminal repeat domain of RNA polymerase II, confirming its role in protein-protein interactions [2]. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.44 0.72 -4.01 149 2377 2009-01-15 18:05:59 2003-04-07 12:59:11 14 41 279 21 1493 2278 3 52.50 24 17.77 CHANGED cscpsFp.pLLc-tp....lsspo.........pWp...chhp..hl.ppDsRa..hsl.....pstpc+.cplFpca ...............tcpsFp.plLc-.tp............lsspo......................................pWp.....chhp......hl.t......p.....DsRa......psl......tstp-R..cplFpca..................... 0 421 703 1108 +2514 PF05013 FGase N-formylglutamate amidohydrolase Bateman A anon COG3741 Family Formylglutamate amidohydrolase (FGase) catalyses the terminal reaction in the five-step pathway for histidine utilisation in Pseudomonas putida. By this action, N-formyl-L-glutamate (FG) is hydrolysed to produce L-glutamate plus formate [1]. 20.40 20.40 20.60 20.50 20.20 20.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.41 0.70 -4.46 188 1531 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 783 10 479 1356 624 215.20 26 74.23 CHANGED sss.lllsssHuGptlP..ssh...thshs.sthth+huh........Dhhscplhpths.th.....sAs..........lltuph....SRhllDhNRss.chsshh.........................stsshshlP....s...sp.lh.....thsts-tppRlpphapPYHsslsphlsphpsp.hshshllssHShsshh...sh.............shplGshas............s...ss....plspthhsthp......ssh.............s...lspNpPYsG.uhhspt...htt.spGhtslplElppsLhhs .........................................p..PlllsssHuGpplP..ssh.....thshs...stht.p.hst.................DhalcpLhpths.th.....sAs..........hlt.upa......uRhllDh.NRss.chs.h...........................stustshlP.....s...s.p.hh..........t.sss-ttpRlpphapPYHpslpthlschpsp.aGhslll-sH.Shssh..h....shh..................hssasl..Gsths..............st..ss....plspthhphhp......sts..............hs.sshNtP.apG..uhhscp........hststpshp..ulplElppshhh................................................................................................ 0 118 271 364 +2515 PF00167 FGF Fibroblast growth factor Bateman A, Sonnhammer ELL anon Prosite Domain Fibroblast growth factors are a family of proteins involved in growth and differentiation in a wide range of contexts. They are found in a wide range of organisms, from nematodes to humans [2]. Most share an internal core region of high similarity, conserved residues in which are involved in binding with their receptors. On binding, they cause dimerisation of their tyrosine kinase receptors leading to intracellular signalling. There are currently four known tyrosine kinase receptors for fibroblast growth factors. These receptors can each bind several different members of this family. Members of this family have a beta trefoil structure. Most have N-terminal signal peptides and are secreted. A few lack signal sequences but are secreted anyway; still others also lack the signal peptide but are found on the cell surface and within the extracellular matrix. A third group remain intracellular [2]. They have central roles in development, regulating cell proliferation, migration and differentiation. On the other hand, they are important in tissue repair following injury in adult organisms [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.52 0.71 -4.34 77 1638 2012-10-02 19:42:32 2003-04-07 12:59:11 13 13 202 252 751 1462 0 113.10 32 52.07 CHANGED RhtpLYsps..saaLplhs.....sG....pV.sGoppc..sstaulLplpus....sh.G..hVtI+usposhYLsMsccGcLY.......u...s..........pphs.s.-ChFpEph.-NtYssYtStha.....................tcshaluls+pG+s++Gp..+s...+ptppsspF.....L ................................................................hpLaspp.....uhaLpl..s...............sG...............pl.s..G...s...p..cc.............ts....a...s...h.....l.pl.sl....................sh..u...hVs..I.pGlp.o.shY.lsM.N.p.c.G.cLa.......u....o..............................pp.hs....s..-ChF..p.E...pl.....E..N.t..YssYtStha.....................................ptaaluLs+p.G.pshcGp...+s+.tp..spFh.................................................................... 0 98 159 373 +2516 PF00370 FGGY_N FGGY; FGGY family of carbohydrate kinases, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain adopts a ribonuclease H-like fold and is structurally related to the C-terminal domain. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.34 0.70 -5.06 13 14797 2012-10-02 23:34:14 2003-04-07 12:59:11 16 35 4137 100 3453 10824 3157 231.80 24 48.40 CHANGED hhlGlDpGTousKAllhs.cpGcllustptphshhpPcs..GasEpDPc-lapsstpslppllppt....tho.ppIpuIulouQt+uslllDcps.cPLhsAIlWsDsRTsshscpLpc..phs.pplhphTG.shhshaolsKLpWlp-pcPchhcphc....pahhs+-YLpa+LTG.....pasTDhosASsohhaslpstcWDcplLphlGIs.s...hLPpllpouclhGplpsphAthhGh..slPVsuGuuDssuuslG ..................................................................................................hlulDhGTous+..s..l...l.....h.........s....p.......p.....G......p..............h....l.......u.......t......t.......p...t....t....h......p...............h......h......s......p.s................G.a...s....E.........p.......c........s..p..........p.........l...a...p...s.....h...t...p.s....l.p...p....s...hpps........................th.p....p...p.......l..t....uI...G..........l..........s.........s......p..........t.......c........s..........h..........l..l.........h.............D.........c...........p..............s....p...............s...................l..................h...........s...........s...........I...................s................W.p..D..p..R..s...s..p...h....s....p....p........l............p...............p..............t....h.................h.........t................p..............t...........l................h................p................h........o.........G................h...........................h.......t................s..........h......h................s......h...s......K.........l.h.....Wl....h.c...p......s.t...h..h....p.p.sp................hhh.h.h....s.sa.lh..a..p.L..T.G....................t.h.s..s..D....h.....o.s..A..u...p.....T..h..h........h..........s.....l.....p..........p......h..........p........W........s........p........p........l........L...p.....h..........h.......s.......l...s...pp..................hl......P......p........l.......h......p........s...u.....p..h..h.G.....p.......h............p...t.t.....h.............t.............t....h....h.....u...................................................t..............h...P........l.s.s..s.s.sDptuuhhG............................................................................................... 0 1153 2092 2847 +2517 PF02782 FGGY_C FGGY family of carbohydrate kinases, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain This domain adopts a ribonuclease H-like fold and is structurally related to the N-terminal domain. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.30 0.71 -4.70 105 14710 2012-10-02 23:34:14 2003-04-07 12:59:11 11 34 4144 98 3368 11639 3173 190.40 23 38.30 CHANGED hthshGToshhhh..ts........spPh...tspuhhsshs...........................s..........shhhh-GspsssGslht.alhp........htthtpttt...................p.hst..t..ssss.s................sthhhh.....Psh.sGpts......Phhsspttusl..................hGhs.sps....huphhpuhlEuluhthRthlcth............tp.......................uts...hsslhhsGGts.cssllh.Ql..................hADshG..hPlhhsps.s-usslGuAhlu..uhAh ...............................................................................................................................h.ohGTushhhh.....ss............pp.sh........s.pu..h..h.s..s.hs..................................................s.tas...h.p.G..s.h.h.s.u.Gs.s..l.p....Wlpct.h................th......h.tpttp.........................p.hs.................p.ts...ss.....................ss.l.h..hl..........Phh..sG.....ts...............P...h.h....s..s.ps+..G......s.h.................................hGl.o...h.s.ss..............ps.clhR...A...s.....lEu....l...u.a...p...h..p.s.....s.l.csh.........ppt........................................................................Ghp........hpplt.l..sGGuu...pssh.h.h.Q.h..................h.ADlhs.......hsl...h............sts...t......E...ssALGAAhhAulu............................................. 0 1123 2048 2789 +2518 PF00498 FHA FHA domain Finn RD, Durocher D anon Prosite Family The FHA (Forkhead-associated) domain is a phosphopeptide binding motif [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.42 0.72 -3.85 221 10726 2012-10-01 21:55:46 2003-04-07 12:59:11 21 374 2041 93 5327 10014 968 70.70 25 13.00 CHANGED hplGRs...........ts.s-ls...ls..............s...tt.lS+pHut...l...phpss.............................phhlpD.....ht....SsNGT.al.....Ns......pplpt.................hpLpsGDh.lplG ...............................................hlGRs.....................ts..s..c.lh.......ls.................................s......t..lS.+.p.Hup...l......phpss.................................................................phhlpD...............................hs..........SsN.G..T...a.l...........Ns.............pplpp............................h...Lp.sGDh.lplG............................................... 1 1945 3258 4503 +2519 PF00771 FHIPEP FHIPEP family Bateman A anon Pfam-B_983 (release 2.1) Family \N 20.50 20.50 20.80 23.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 658 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.96 0.70 -6.20 180 3878 2009-01-15 18:05:59 2003-04-07 12:59:11 15 8 2246 8 680 2701 953 594.50 38 94.91 CHANGED sIlshhllPLPshlLDlhlshNIslSlllLhlulalp+PL-FSsFPolLLlsTLhRLuLNlASTRlILhpGc......AGcVIpuFGpFVlGG.....NhlVGlVlFlILsllpFlVITKGupRlAEVuARFTLDAMPGKQMAIDADLNAGlIspc-A+pRRpclppEucFYGAMDGASKFV+GDAIAGllIhhINllGGlhIGlhQ+shshucAhpsYolLTIGDGLVuQIPALllSsAAGllVTRsu.spp.....shupplhpQlhspP+sLhlsuullhhhullPGhPplsFlhluuhhuhhuahht+pp................................................ttttttsttpt.................tttttpshpshhps-...sltL-lGasLlsll-tsp..sut.LhpRlpslR+plup-hGhllPsl+l+Ds.hpLtssp....YpIplpGlplupuclhsschLAls..ssts..t................pl.sG..htsp.....-PsFGhsAhWIstsp+cpApttGa.............sVVDssoVluTHLscll+ppus-LlGhpEsppLl-pl........ppphPcLV-El.hPchlsluplpcVLppLLpEpVuIRcltsIlEsLs-hu.sps.cDsthLsEhVRtuLuRtIs....pphtss...p.spLsllsLssplEphltpul.pp.tsts.s.......hlsl-Pshsppl..lpplppth...pp.htt........tGtssV.LlsusslRhhlp+llcphh.s.plsVLSasElsssh.pl ....................................llshhllPLPshllDhlhshNIslulhlLhluhahpcsL-FusFPolLLlsTLhRLuLNluoTRlILhpup.................AGcllpuFGpFVlG.G......NhslGlVlFhILhllpFlVITKGupRlAEVuARFoLDuMP..GKQMuIDA....DLsAGlIspppA+pRRpclppEupFYGuMDGAuKFV+GDAIAGllIhhINllGGlhlGhhQasMshupAhppYolLTIGDGLVuQIPuLllShAAGllVTRls..ssp.........shupplhpQLhs.pPp.slhlsuu.lhhlhul.lPGhP....p..hsFlhhus.hlhhh..u.ahh.ppct....p.......................................................t..t......t..p...t..t..t..h.p..t.............ttpttp.t..sh.ss.l.........h.-....sl.tlclGh.p.Llshlctt.p...ttp...L....h..p+..I...cslR+phup-hGhl.hP..l+l.........R...-s..hpLpssp....YtlhlpGlplup.uc.lhssphhsls.......su.ps.hs................pl..sG........sh..............-.P.s..a.G..h.....suhWIpt.s.p..ppAph.Ga.............sVl.-.ssoVlsTHLscllpppss-lhGhpEsppLl-pl........pp.phPcLl--l.h................p..hlslspltcVLppLLpEplsIRDhtoIhEs....Ls....-hu...sh..........p....pD..s.hLsthVRhuLtRtIs....p.ph.h...s.s..........t.spl...plltLssplEphlhp.ul..pt..t..t..tu.t.........hhsl-Pshspplhpphppth..pp....t................h.u.t....ssV.LL..s.s..slR.hlp+hlct..h..s..pl.VLSatElssphp............................................................................................. 1 210 402 531 +2520 PF01269 Fibrillarin Fibrillarin Finn RD, Bateman A anon Prosite Domain \N 19.90 19.90 19.90 19.90 19.80 19.70 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.40 0.70 -5.44 42 616 2012-10-10 17:06:42 2003-04-07 12:59:11 12 7 468 19 394 756 165 215.80 57 78.68 CHANGED lc.H.+atGValsc.sctst..LsT+NLsPGppVYGEKhlphpsp...........EYRlWNPaRSKLAAAIlsGlcph.I+.GuKVLYLGAASGTTsSHVSDIVG.cGhVYuVEFS.RshR-Ll..shuccRsNllPIltDARpPpcYRhLVt.hVDllFsDVAQP-QAcIluhNAcaFLKsGGahlluIKApsIDsTtsPctVFtpElpcL+c...pshcsh-pl.sLEPa-+DHAhVlGha+h ...........................................................cPH.+atGVFls+.G+cDh..LsT+NhsPG.....cuVYGEKRls..l..pss................spKhEYRlWNPF..RS..K...LAAu.Ilu.G.........l-p.la..lcPGuKVLYLGAASGTTVSHVSDlVGP................pGhVYAVEFS+....R...s...G...R..DLl..s.h..A..c..+...R......s..Nl..lPI...l...E...DA.......R..+...P...t.+Y...R.M...L...V...........s..MV..Dl..I.F.u.D..VA..Q...P............DQA.RI.l.ul.N.A.chFLKsGGth.l.I.S....I.......KAsCIDS..Tss..s..E..s.....VF...A..........p.EV..pKL...pp...-phKPhEQl.oLEPaERDHAhVlGhYp..................................................................................................................... 0 137 227 320 +2521 PF00147 Fibrinogen_C fibrinogen_C; Fibrinogen beta and gamma chains, C-terminal globular domain Sonnhammer ELL anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.82 0.70 -4.76 11 4781 2012-10-01 23:56:02 2003-04-07 12:59:11 13 214 1032 239 2185 3828 41 153.80 35 42.42 CHANGED .s+DCp-lhpp..GucoS...thYhIpPcssp.cshcVYCDMcT-sGGWTVhQpRpDGSlsFtRsWcsY+pGFGNls............sEaWLGNDpIppLTptG..shcLRl-LcDapG-pshAhYssFpVps-ss+YpLpVssYpG.sAGsAh........stscoMThHNGMhFSTaDRDND.....ssptsCupp.uGG...WWYspCHuANLNGhYYaGushshp........sGVsWhsWKGp.........YSh+pspMKlRPh .............................................................................s.......thh...l...........................................h.......shC.-........p............t...s.....G.....G....W.h.....l.h....Q....p...............R...............s......G....p........s.....F.......c.t.......Wt.......p.Y.p...G.F..G.p.t................................sEaW..l....G..ctlh....lT......t..t..t................t...........Lh.lp..h......t....s......h.............s....p............h..............h.A........Y.....t...........tF...........l.....t.....s.....tt.p..Y.t.L.t.l..t..t....a...p....G.............................................................p..ps..h.FST..h.D..p........DND............................t.tp.C.u................t.....G..u.....WW.a.......p....t.....C....h..........s....p.....G.................................................................................................................................................................................................................................................................................... 0 849 987 1634 +2522 PF03516 Filaggrin Filaggrin Griffiths-Jones SR anon PRINTS Repeat \N 25.00 25.00 27.30 25.00 18.30 24.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.81 0.72 -3.51 10 278 2009-01-15 18:05:59 2003-04-07 12:59:11 8 19 11 0 38 231 0 53.10 57 24.32 CHANGED -uSRpSsu++HthSo+...ADSSRHSpsGQGQuuuu+o..SR+.pGSSsSQDSDSEGH .....-tuRpSsuc+Ht...Sp+....ADSSRHStsGpG..Q.uS.uuhs..St+.pGSSsSQsSDSEGH... 0 23 23 23 +2523 PF00038 Filament filament; Intermediate filament protein Sonnhammer ELL anon Prosite Family \N 40.00 40.00 40.00 40.00 39.90 39.90 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.07 0.70 -4.96 33 4874 2009-01-15 18:05:59 2003-04-07 12:59:11 16 38 268 31 1988 4185 5 271.60 33 60.33 CHANGED sEKcphQsLNDRhASYI-KVRtLEppNptLEs+lpphppp..p.....tspssphtshYppplp-LRppl-shst-+u+lpl-l-Nhptsl--h+pKaEcE............................hshRpssEs-lssLR..............+slD-toLs+lDLEt+lESLpEElsFLKphHEEElp-Lps....plp......splsVEhDs.s.phD....LsphLp-lRuQYEslAp+N+p-AEphYpsKhpcLppssspss-tlpsuKpElsEhRRpl...QuLphElpu....lpupppuLEppltEsEpRastplpshQstlspLEppLpph+p-MtRplpEYQcLLsVKhALDhEIATYRKLLE.GEEsR .................................................pEKtphptLNs+h.A.sa.l.-.K..................V..R...LEppNt....L-...schp.h.pp...........t..t...s..t.....sha...t.l...p........p..L...+..pp.............l...phtt-p......u...pl..h.plcs.h...p..th-...........c....h..+..t...+..a.Ep.E..................................................hsh.R.p.psEs-h..ssl+.................................................................................................+..slD.p....h.hhs+s.-..LE....tp....l-uLp......-.El...t.a.......L.+.p....a....-...c............El....p..plps.............plt.......spl.l...c..hD...s......s....p...D..............Lsp..hl.t..-...l.R.......s......Q.Y..E.p.l.s..pp.....s..+..t....-...s..E...p.h.a...p.........s.+..................h...............p...c.....lp......t....p.s....s.p......p....s....-.t.l.........p..ps.+.p...E.....l......p.......E...hp....+p..l.................pp.L..ph.El..ps...................................hp.s..................p...ts..L.Esplt-sEpphp...h.tl.pphps..h....lspLEt........pLpp.h+p-h..t.pphpE.Y....p.p.LhslKhtL-.hEIu.TY...R..+LL.E.GE-t.......................................................................................... 0 179 311 802 +2524 PF04732 Filament_head filament_head; Intermediate filament head (DNA binding) region Kerrison ND anon DOMO:DM04896; Family This family represents the N-terminal head region of intermediate filaments. Intermediate filament heads bind DNA [1]. Vimentin heads are able to alter nuclear architecture and chromatin distribution, and the liberation of heads by HIV-1 protease liberates may play an important role in HIV-1 associated cytopathogenesis and carcinogenesis [2]. Phosphorylation of the head region can affect filament stability [3]. The head has been shown to interaction with the rod domain of the same protein [4]. 21.80 21.80 22.10 21.90 21.00 21.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.68 0.72 -3.28 45 462 2009-09-11 21:54:48 2003-04-07 12:59:11 9 4 64 0 185 404 0 84.80 31 16.72 CHANGED tssSSYRRhFGs.............sts.p.uhss...st..ssuSshp...spps.psosu...s......pShttps.sssh...h.........-s.lDFSh.usAlNs......EFKtsRT ...................................psSSYRRhFGs.............shs.phuhss.........usuS.uhp.........spph..psSsu....ss.h...u.Sh..ptpsssssht......h.s...............-s.LDF.S..usAlNs......EFKsoRT.. 0 6 27 78 +2525 PF00630 Filamin Filamin/ABP280 repeat Bateman A anon Prosite Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.48 0.72 -3.44 132 7961 2012-10-03 16:25:20 2003-04-07 12:59:11 14 217 181 75 3933 8351 79 92.40 27 39.51 CHANGED sscssc.sps.....G.G.......l..pts.....hss.pss..pFplpsp.....ss.Gt...t........t....lpl......................tsspst................................................................................................................................................................th.h.....t...lp-pp.....................cG..sap.lpY.pPpps..Gp.aplpVph....ss.......p.plssS.PFp ......................................................................................................................................................................................................................................................s..sspclps............Gs.G...................L....pts..........hss..pss...pFslpsp...............sAs.s.s...................lt.......lpl...............................puP.sst................................................................................................................................................................................................hch....p.....lp.-.s..t...............................-.G.....o.a..s..VpY....h.P.p...p.s.....Gp..a..........pls....Vpa.....ss...................p..cl..P..s..S.PFp............................. 0 1135 1433 2563 +2526 PF01611 Filo_glycop Filovirus glycoprotein Bateman A anon Pfam-B_1023 (release 4.1) Family This family includes an extracellular region from the envelope glycoprotein of Ebola and Marburg viruses. This region is also produced as a separate transcript that gives rise to a non-structural, secreted glycoprotein, which is produced in large amounts and has an unknown function [1]. Processing of this protein may be involved in viral pathogenicity [2]. 25.00 25.00 176.80 176.70 18.80 18.10 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.08 0.70 -5.66 6 92 2009-09-10 21:45:54 2003-04-07 12:59:11 11 1 25 6 0 98 0 322.40 54 65.93 CHANGED +KTSFFlWVIILFQ+shSlP.LGlloNSTLQso-lDphVC+D+LuSTsQL+SVGLNLEGsGVATDlPoATKRWGFRoGVPPKVVsYEAGEWAENCYNLpIKKPDGSECLPsPP-GlRsFPRCRYVHKlQGTGPCsGslAFHKpGAFFLYDRLASTlIYRGssFAEGVIAFLILsKs++cFhpSpPh+EssNhTpsooShYaToolpYphssFGspposhLFclsspTaVpL-ssaTPQFLspLN-TIppssphSNTTG+LlWTlsPslD........oshGEWAFWETKKssocphpucohLSh.hhps+T.......pNhScpuss.....+pohpPus..ssTst.....ssppsssh.plPhpshpsssoppphpss.p .............++TSFFlWVIILFQ+shShP.LGllpNSTLpso-lDphVC+D+LuSTsQL+SVGLNLEGsGVATDlPoATKRWGFRoGVPPKVVsYEAGEWAENCYNLpIKKPDGSECLPssP-GlRsFPRCRYVHKlpGTGPCsGshAFHKpGAFFLYDRLASTlIYRGssFuEGVlAFLILscs++cFhpS+Ph+EssN..hTpssoStYaToolpYphssFGsppophLFclsshTaVpL-spaTPQFLhQLN-TIh.tssphSNoTG+LlWplsPplD........sslGEWAFWETKKs..pphhspp.lsh...pstt.......ps.stt........php.pstp...sTs......s.p.p..h.p...t.hpt..pp........h........................... 0 0 0 0 +2527 PF02097 Filo_VP35 Filoviridae VP35 Mian N, Bateman A anon IPR002953 Family \N 22.40 22.40 54.20 53.70 21.90 21.70 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.96 0.70 -5.23 4 107 2009-09-11 13:24:37 2003-04-07 12:59:11 10 2 30 27 1 61 0 190.00 70 95.46 CHANGED h.GPEhoGWlSEQLMTGKIPlo-lFsDl-NpPs.h.hphhspspsss+ss....sppoQTDshhs..hccVcpTLssLloslpRQssAIESLEsRlTT....LEsuL...KPV.DMuKTISSLNRuCAEMVAKYDLLVMTTGRATATAAAs-AYWsEHGQPPPGPuLYE-sAl+uKlcs.sshVPpuVp-AapNLsSTosLoEENFGKP.ISAKDL+pIhYDHLPGFGTAFHQLVQVICKI....GKDsNhLDhIHAEFQASLAEGDSPQCALIQITKRlPhFQDssPPlIHI+SRGDIPRACQKSLRPVPPSPKIDRGWVClFphQDGKTLGLKI ......................................................................................................................................................................................................................................................LALLLFTHLPGNNTPFHILAQVLSKIAYKSGKSGAFLDAFH....QILSEGENAQAALTRLSRTFDAFLGVVPPVIRVKNFQTVPRPCQKSLRAVPPNP.T.IDKG................. 0 0 0 1 +2528 PF00419 Fimbrial Fimbrial protein Finn RD, Bateman A anon Pfam-B_196 (release 1.0) & Jackhmmer:B2PIN3 Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.27 0.71 -3.98 244 16321 2012-10-02 17:35:21 2003-04-07 12:59:11 15 9 786 91 829 6504 36 150.30 18 66.83 CHANGED lshpu.sl......tsssCsl....s..ssss...................VsFG.slsh..s..plsss.........tht.pshslsls.........Csss.................slpl.thh........us.....ssshs..ssslts....sss..................slGlpl.......................................pps..ss.....h...................t........thsphhshshs.............hpAs.hp..tsssshs.t....GpFsAsuolplsY.p ...................................................................................phpG.pl......hsssCs...l........s..ssst...............sV.sh...G...plss.......s....phpss...........tssst.ts..F...slplp...........tCsss...............................tss.pl...shs.................us.......sss.ss....ss.hlss......sssss.t....................................ulGlpl......................................hss.s..sss.............l.....................thssss.......................s.hshssssss..........hs......................apAphhs..............sssssss...s............GshsAssshslsYp........................................................................................................ 0 59 218 557 +2529 PF04449 Fimbrial_CS1 CS1 type fimbrial major subunit Kerrison ND anon DOMO:DM04212; Family Fimbriae, also known as pili, form filaments radiating from the surface of the bacterium to a length of 0.5-1.5 micrometres. They enable the cell to colonise host epithelia. This family constitutes the major subunits of CS1 like pili, including CS2 and CFA1 from Escherichia coli, and also the Cable type II pilin major subunit from Burkholderia cepacia [1]. The major subunit of CS1 pili is called CooA. Periplasmic CooA is mostly complexed with the assembly protein CooB. In addition, a small pool of CooA multimers, and CooA-CooD complexes exists, but the functional significance is unknown [1]. A member of this family has also been identified in Salmonella typhi and Salmonella enterica [2]. 20.80 20.80 21.30 20.90 20.50 20.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.91 0.71 -4.14 13 326 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 291 4 19 128 3 139.00 45 81.62 CHANGED VpKsITVTAsVDPTl-lLQADGSALPoulplsYhPu.pshpstplpT+IaTNDpoKslpl+LsssP.sLsNlhsPo.ppIPlsVohGGcsLoTous...olcuucL.FusuulsssSsshsLsIuts...Tsussss..AGsYQGlVSlllTQu ...................................................................VQKDITVTANlDusL-hhQsDs..o..uLP.p.sl.chpYhPG...pGL..sapL.T+l.a..SND..TK...cVphpL.luss.QL..l.p...sL.Dso.Khl..slsVThGG..cplps.suu...sh.pAspl.Fs..s..s..ths....suShs.tNLhhuQp....stusL..ps....G.YpGlVSlhlSQ.................................. 0 1 9 13 +2530 PF02432 Fimbrial_K88 Fibrimal; Fimbrial, major and minor subunit Mian N, Bateman A anon Pfam-B_2036 (release 5.4) Family Fimbriae (also know as pili) are polar filaments found on the bacterial surface, allowing colonisation of the host. This family consists of the minor and major fimbrial subunits. 25.00 25.00 29.00 28.90 21.50 21.20 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.57 0.70 -4.94 17 376 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 101 12 10 223 0 219.50 30 89.32 CHANGED huG-lplsGhlTs....cshWsWplGsu.pshsssssch.spssplsIslstst..PlLhG+sh-sh.tG...GsGhsPploa.tuh-Ghsl.......shsssGpuphsLPVp.s-sssphGoLshslstuuslptspspp...................sha.GshhssshshlsGpotssssups.sht.........hhssphsshhp.....shpsuuh.s.shhocuslppltus....Yuuulstsps.chplstssss..hpWpAsLsVolsYp ........................................uGplplsGslTs...pts.WtWpVGsss.ps....h..s..schtcs............tssphslslstss...hPlLhG+sh.t.s..s..sG...tsGhsP.Isa..sspG.sh.......stsssuhhpholPVp..s-sss.ps..Gshshs.h.ptuusls.tshtsp................................a.Gs..h.hs..tsh...s...hl.s.s.p.usshstu.t.o.s.h..........hhssthsshhs.....ss.ss.u.sshs.uh..hs.D..upltplsus.....YAuslststs.-hphcts.sss...hpWpsuLsVoloYp.......................................................................... 0 0 0 3 +2531 PF05182 Fip1 Fip1 motif Wood V, Bateman A anon Pfam-B_4652 (release 7.7) Motif This short motif is about 40 amino acids in length. In the Fip1 protein that is a component of a yeast pre-mRNA polyadenylation factor that directly interacts with poly(A) polymerase [1]. This region of Fip1 is needed for the interaction with the Th1 subunit of the complex and for specific polyadenylation of the cleaved mRNA precursor [2]. 22.70 22.70 23.70 23.80 22.40 22.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.69 0.72 -4.69 36 388 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 282 0 249 374 4 44.70 53 8.76 CHANGED h-hD.l-.shc-..KPWRcPGADloDYFNYGFsEpTWctYCp+QcchRt ................-hDl-..sh..--..KPWR+P.........G...AD..lSDYFNYGFsEpTWcsYCcKQcclR....... 0 86 141 209 +2532 PF02433 FixO Cytochrome C oxidase, mono-heme subunit/FixO Mian N. Bateman A anon Pfam-B_2045 (release 5.4) Family The bacterial oxidase complex, fixNOPQ or cytochrome cbb3, is thought to be required for respiration in endosymbiosis. FixO is a membrane bound mono-heme constituent of the fixNOPQ complex. 24.20 24.20 24.30 24.70 23.80 23.20 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -4.98 56 1201 2012-10-03 10:02:11 2003-04-07 12:59:11 10 7 1028 0 361 915 226 199.50 49 74.82 CHANGED HphlE+NshlLhlhshlsloIGGlVEIsPLFal-sTI.EcV......................................................-GlRPYTPLELsGR-IYlREGCYsCHSQMIRPhRDEVERY....GHYSLAAEShYDHPFQWGSKRTGPDLARVGG.KYSDpWHlsHLhsPpuVVPcSIMPuYsaLtcst.LD...........hssltschpshptl...........GVPYo..-p...............IpsApsDhpuQAssps......s.hsshhp+Y.spAthp.......saDGssttloEMDALlAYLQhLGThVD ..............................................................................................HphlE+Nshllhlhh.ll.sl.ulGGL.V.E.I.l.PLFa...cs.s.hcsl..........................................................cGh+..P.YT..s....LpL.....tGRDIYIREGChsCHSQMI..R....P....F...R...u..E.s...E.R......Y.............G.......+.......Y......Sl.A..G.Ess........Y.........D..H.........PFlW.......GSKRTGPDLAR.VG.................u.+.Y...S....D....-....WHhsHLhsP..RsV..VP-ShMPuYsaLh...csp..lD.................scss.t.tc.hpsh+.p.l...........G.V.P.Yo..--................pIspAtpt.h.....cup.........................................................................................p........................................................................................... 0 111 233 301 +2533 PF01346 FKBP_N Domain amino terminal to FKBP-type peptidyl-prolyl isomerase Bateman A anon Pfam-B_402 (release 3.0) Family This family is only found at the amino terminus of Pfam:PF00254. This domain is of unknown function. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.08 121 3385 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 1650 10 537 2180 296 111.50 31 49.55 CHANGED shuhshsssttssss...........plpopppKhSYulG.hslGpphppphhc........lDhctllpGlpDuh..tssp.tlo-E-hpps...Lpphppclptppptpht..p........ApcNtpcGctFLs-NtpK-GVhsTp.SGLQYcV .....................................................................hts..........................shps..cppsuYulG.hp.lGpph...ppphhct...............ls.-tl.htGlpDuh.....tu.p.ps....tl...st..pp..h..pps....L..pp.hp.pchpsttptp................uptst..scGctFLpcN.tc.+......c.GVpsot..SG.LQYcV.................................. 0 144 275 420 +2534 PF04620 FlaA Flagellar filament outer layer protein Flaa Kerrison ND anon DOMO:DM04570; Family Periplasmic flagella are the organelles of spirochete mobility, and are structurally different from the flagella of other motile bacteria. They reside inside the cell within the periplasmic space, and confer mobility in viscous gel-like media such connective tissue [1]. The flagella are composed of an outer sheath of FlaA proteins and a core filament of FlaB proteins. Each species usually has several FlaA protein species [2]. 25.00 25.00 25.60 25.50 24.70 24.60 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.02 0.70 -4.93 10 195 2009-09-11 10:09:13 2003-04-07 12:59:11 7 10 102 0 60 164 0 204.90 22 55.90 CHANGED phshsshoslDa...ucsschuasutupth....hccuLshssW.Vthsuu...sp.....tsshhtts.scsputchuus+llGVRlpFsptGpNuhull....pPsatIPhhu.....................................slKoIuVWVaGtuY.asLplLlcDppGch+phhMGpLsFsGWKpLshs.NPsalsss+sR.lp.psshYPsussclshpGF+lccDssccsG-alsYFcDl...........+llhDhhsV-hsc ........................................................................................t....................t....t.tt......h.......pa...hs.s.............................sptt.stt.h..tspplLGV+spFsphu.sshs.hh.........tsshtIshhu..................................................hh.KplsVWVaGtsatasLplhlcDhpGpp....aph.h...G.pLsFpGW+pLp......hs..sPshls.......p.p.s.Ysh.ts.lphhuFtlppsstpthssahhYhc-l...........+llhDhh......p.............................................. 0 29 46 46 +2536 PF03646 FlaG FlaG protein Bateman A anon Pfam-B_2985 (release 7.0) Family Although important for flagella the exact function of this protein is unknown. 21.00 21.00 21.20 21.40 20.80 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.20 0.72 -3.91 139 905 2012-10-01 21:18:35 2003-04-07 12:59:11 10 1 864 1 295 725 111 111.20 22 87.37 CHANGED ststssssstsppsspsststs..................pptppptppp......tt...sppc.lp.cslpplschhps.hpps....LpFpl--cssc.hlV+VlDppos-VIRQIPsEchLclu....cplp-..........ht...........Gl.Lhcpc .........................................................tsts................t.tt.t.................pptt.pttptp..........pp.hspcclp.chl.c.clNchhps...hs..ps.....lpF.pl..c-.c..s..sp.hlV....pVhD.ps..Ts-lIRpIPsEEhLclh....pplp-...............hh.................Gl.lh-p....................... 1 114 218 264 +2537 PF03614 Flag1_repress Repressor of phase-1 flagellin Griffiths-Jones SR anon PRODOM Family \N 24.00 24.00 25.50 65.20 23.80 23.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.88 0.71 -4.68 3 93 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 77 0 3 55 2 147.20 63 91.53 CHANGED DISYGREAElWPRDYSsLARRlQFLRFNDIPVRLVSsNGQlIIGYIuKFNs+ENhI.LASDcsKGsNRIEVKLEpLAoLE....ELsusDuhstoLVssDhFNlQ.hsPSRRDFFSICNKCaKQGVGIKVYMtDGRlLpGcTTGVNACQVGlppuNGNHMQVMFDWVSRI .DIoYGh.AEhWPR-YShlt+tl.FhRhspIPVRls.psuplhshYltth.spcNhI.LASDc.sKGspRIEVKLEpLAhLE....ELsusDs....hshoLVssD.FNlQ.hsPSRRDaFSICNKCaKQGVGIKlYMt.GplLpGcTTGVNACQVGlppuNGNHMQVMFDWVSRI 0 0 2 3 +2539 PF05149 Flagellar_rod Paraflagellar rod protein Moxon SJ anon Pfam-B_6464 (release 7.7) Family This family consists of several eukaryotic paraflagellar rod component proteins. The eukaryotic flagellum represents one of the most complex macromolecular structures found in any organism and contains more than 250 proteins [1]. In addition to its locomotive role, the flagellum is probably involved in nutrient uptake since receptors for host low-density lipoproteins are localised on the flagellar membrane as well as on the flagellar pocket membrane [2]. 21.90 21.90 22.10 27.80 21.60 21.80 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.74 0.70 -5.21 13 124 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 28 0 18 115 0 271.00 42 47.55 CHANGED scc.lothl-s+cph+ptscpDLc+lc-phpphssscscppcpapsp+pc.-chlppNp-pQppsaccIp-LtccLpc.......LupcRpc.V-cRlcttccEcpRcssappFlphuspHpptLppolpNsctulcsssplpshVt-uCcplsuh.p-chppsLu-hhhtlpc-+LctFRshYlshGcLhaKKE+RLEpl-+pIRhsclppEhuh-oLDPNAKcaucscc-Lhtt+ppVppplshlcp+hcpu.csFcPTEcuLht....AGlcFVHPh-ElpctslpRps+lL-Y+phhs .........................ppssthh-s++ch+ppscpDLcplp-sIQcsDh-DucshKRausp.+c+S-.chlpcN.-pQ--sWp+Ip-LERpLQc.......Lus-Rh-EV+RRIEcp-REE+R+s-.appFLclsuQHK+hLcholhNCDhAhcssuhlE-hVsEuCsslcu+.aD+sppcLusLplpV+pEaLEtFRtLYhTLGpLlYKKE+RLEElDRpIRssHlQlEhslETFDPNAKcau-tKK-LhchRtpVEcELthL+-K.spAL-.FpsoEcuL.t....AGl-FsHPh-E.pctslsRRSKhlEY+s+l............... 1 7 13 18 +2540 PF00700 Flagellin_C Bacterial flagellin C-terminal helical region Bateman A anon Pfam-B_41 (release 2.1) Family Flagellins polymerise to form bacterial flagella.\ \ \ There is some similarity between this family and Pfam:PF00669, particularly the motif NRFXSXIXXL. It has been suggested that these two regions associate [2] and this is shown to be correct as structurally this family forms an extended helix that interacts with Pfam:PF00700. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.57 0.72 -3.82 33 7466 2009-01-15 18:05:59 2003-04-07 12:59:11 16 27 2537 11 1427 5887 1917 81.80 34 21.70 CHANGED lshlspA...lsplsshpupLGssQsRlcpssssLsspps.lssuhsclccVDhsctsochsphphhhQsuhulhAphspl..slLshL ...........................ttlDsA...lspls.shRusL...GAhQN.R.l.p.ps...l.s...NLsst.......spN.l....suu.p.ScIcDsDhApEsssho+..tplLpQAutuhLu.Q.AN.phs.p...slLpLL....................... 0 485 932 1156 +2541 PF00669 Flagellin_N Bacterial flagellin N-terminal helical region Bateman A anon Pfam-B_37 (release 2.1) Family Flagellins polymerise to form bacterial flagella. This family includes flagellins and hook associated protein 3. Structurally this family forms an extended helix that interacts with Pfam:PF00700. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.36 0.71 -4.38 34 9093 2009-01-15 18:05:59 2003-04-07 12:59:11 15 29 2631 13 1625 7111 2324 128.50 32 35.14 CHANGED IsTshhu.ssppslspspsplsputpclooGh+hssut-.uushuhusshpsphssLsphscssshuhuhlpsspu..Alsphtsshpp.........lppLsstuussssssh..sppshpsplppLhspls.....TsaNGphlhuGsto .......................................................lsss..s...s.pt...l...s...psp...s...t...hsp...shp+L.S...S.GhRIss.AtDD.uu.uhulupphpsphpuLspu.tcNs.scuhuhhQosEu..u.L.s.p...h.s...s.h.LpR.........h+-..L......u...l...Q...u....u...N..u...o..t.oss......D+....su....l.p.pE....l....p....pLh.sc...l.......spl..uspTp.a.N.G.pplLsGs......................................................................... 1 534 1032 1312 +2542 PF01350 Flavi_NS4A Flavivirus non-structural protein NS4A Bateman A anon Pfam-B_211 (release 3.0) Family Flaviviruses encode a single polyprotein. This is cleaved into three structural and seven non-structural proteins. The NS4A protein is small and poorly conserved among the Flaviviruses. NS4A contains multiple hydrophobic potential membrane spanning regions [1]. NS4A has only been found in cells infected by Kunjin virus [2]. 20.60 20.60 21.90 93.70 19.60 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.65 0.71 -4.64 32 3233 2009-09-11 15:45:06 2003-04-07 12:59:11 12 17 110 0 0 3283 0 144.70 60 4.30 CHANGED ssllcslutlPpahtp+stpAhDshYsLhsuEtGoRAa+tAhpElPEulpTllhlshLulhThGlhhhhht++ulu+hslGhlVlssussLhWhuslssspIAGshllhalLhlVLlPEPt+QRS.pDNpLAhhllslhsllGhV .....lsLIpElG+lPpHhsp+spsALDNLshLHTuEpGGRAYRHAlEELP-TlETLhLLuLlullTGGlhLFhhSGKGIGKholGhlClhsuShLLWMApVpspWIAuoIlLEFFLMVLLIPEPEKQRTPQDNQLAYVVIulLTllusV.. 0 0 0 0 +2543 PF01349 Flavi_NS4B Flavivirus non-structural protein NS4B Bateman A anon Pfam-B_211 (release 3.0) Family Flaviviruses encode a single polyprotein.\ This is cleaved into three structural and seven non-structural proteins. The NS4B protein is small and poorly conserved among the Flaviviruses. NS4B contains multiple hydrophobic potential membrane spanning regions [1]. NS4B may form membrane components of the viral replication complex and could be involved in membrane localisation of NS3 and Pfam:PF00972 [1]. 25.00 25.00 81.70 81.20 18.10 17.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.49 0.70 -5.45 35 3305 2009-01-15 18:05:59 2003-04-07 12:59:11 12 19 110 0 0 3287 0 244.40 64 7.42 CHANGED NEMGaLE+TKpDlttlFht.ppp...ptpssshp......slDL+PAouWuhYushsolhTPhlhHhlpophtshuhuuluupAssLhsLspGhPFhslchsVshLslushsphTssoLhsulsLsshHauhllPGhpAphs+pAp+pshuGlhKNssVDG.lssDlsch-stsshhEKKlu.llLlsLslsulllsRsshuhtEAusLsoAAlspLh-ssssshWshssAsGhsslh.RGsahuslsl...sWshhpssc ....NEMGhLEpTK+DLG..hGphtt....pp.s.p.u.....h..LDlDL+PASAWTLYAVATTllTPMLRHoIENoosNlSLTAIANQAslLMGLsKGWPlSKMDlGVPLLAlGCYSQVNPlTLTAAlLLLlsHYAIIGPGLQAKATREAQKRTAAGIMKNPTVDGIssIDL-Pls.YDsKFEKQLGQlMLLlLCssQlLlMRToWALCEAlTLATGPloTLWEGuPG+FWNTTIAVSMANIF.RGSYLAGAGLhFSlMp......p......... 0 0 0 0 +2544 PF00972 Flavi_NS5 Flavivirus RNA-directed RNA polymerase Finn RD, Bateman A anon Pfam-B_200 (release 3.0) Family Flaviviruses produce a polyprotein from the ssRNA genome. This protein is also known as NS5. This RNA-directed RNA polymerase possesses a number of short regions and motifs homologous to other RNA-directed RNA polymerases [2]. 19.00 19.00 19.00 19.10 18.90 18.90 hmmbuild -o /dev/null HMM SEED 649 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -13.14 0.70 -6.64 7 5252 2012-10-02 12:54:00 2003-04-07 12:59:11 15 19 173 49 1 4283 0 449.30 66 21.01 CHANGED Th.sDlsLssGTRuVsttpsh..shptltcRlp+L+pEapsTWahDp-HPYRTWpYaGSY.scsoGSAuShVNGVVKLLohPWDsltpVTpMAMTDTTPFGQQRVFKEKVDT+s.-P.tGTRclh+ssspWLachLu.+cKpPRlCT+EEFIsKVRSsAAlGAhhpEpptWpoApEAVpDs+FW-LVDcERphHLpG+CcoClYNMMGKREKK.uEFG+AKGSRAIWYMWLGARFLEFEALGFLNEDHWhSRENShuGVEG.GLphLGYlL+-lup+pGGhhYADDTAGWDTRITcsDL-sEphlhphM..sscH+tLApAlhcLTYpNKVVKV.RPsscGt..slMDVISRRDQRGSGQVVTYuLNThTNhcVQLlRhhEuEGVIptpchpp.....hhtlptWLpcpGp-RLpRMAlSGDDCVV+PlD-RFusALpaLNsMuKsRKDIspWcPS+GWssWppVPFCSHHFHElhhKDGRslVVPCRsQDELIGRARlS.GsGWsl+ETACLuKAYAQMWsLhYFHRRDLRLhu.AIsSAVPscWVPTGRTTWSIHuptEWMTTEDMLcVWNRVWIp-NPaMpDKTslpuWcDlPYLsK+pDhhCGSLIGhppRATWAcsIhsulppVRplIGpEc.....YsDYhssMcRYpttt-.t ..................................................hE.DVsLGuGTRtlsh.....ss.phItpRIppl+pEat..poWHhDpppPY+TWsYHGSY-s+.oGSASShlNGVV+LLoKPW.Dsls.VTphAMTDTTPFGQQRVFKEKVDT+s.cs..Gst.lhp.TspWLWt..Lu.+pKpPRhCo+EEFhpKVpoNAAhGAhF.-pNpWpoA+tAV-D.cFWchVccERphHh.GcCtoClYNMMGKREKK.GEFGKAKGSRAIWaMWLGARaLEFEALGFhNEDHWhuRcN.ShuGVEG.G..Lp....+L.GYILR-..luph....GGthYADDTAGWDT.RIT.tDLpNEth.lh.p.h..csEHt.LApuIhcL.TYppKVV+V.RPs.pG...TVMDlISRcDQRGSGQ.VsTYuLNT.F.TNhtsQLlR.MEuEGlht...ph.p......t.tl.pWL.p.G.ERLpRMAlSGDDC..VV...KPlDD.RFA.s.uLhhLNsMuKlRKDI..pWpPSpGW.sWppVPFCSpHFppLlMKDGR.lVVPCRsQDELlGR..ARlS.GA.GWsl+-TACLuKuYAQMW.LhYFHRRDLRLhusAICSAVPspWlPTuRTT.WSIHuttpWMTTE.DMLpVWNRVWIp-NPWM..EDKT.VpsWp-lPYlGKRED.WCGSLIGhpuRATWApNI.sAIpQVRtlIG.p.E.p.....YhDY.MsSh+Racpp.......................... 1 0 1 1 +2545 PF01570 Flavi_propep Flavivirus polyprotein propeptide Bashton M, Bateman A anon Pfam-B_304 (release 4.1) Family The flaviviruses are small enveloped animal viruses containing a single positive strand genomic RNA [1]. The genome encodes one large ORF a polyprotein which undergos proteolytic processing into mature viral peptide chains. This family consists of a propeptide region of approximately 90 amino acid length. 27.60 27.60 28.20 28.30 25.60 27.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.42 0.72 -4.26 9 5590 2009-09-11 10:42:09 2003-04-07 12:59:11 12 23 116 2 0 4419 0 80.40 58 3.84 CHANGED s+pG-shMllstp-+sculhh+sssG.NhCslhAhDlGchC-DTlTYcCPplsp.sEP-DlDCWCssss.saVpYGpCspsucpRRp+ .........o.RsGEPpMIVsppE+G+uLLFK.TusGlNhCTLhAMDLGEhC-DTlTYcCPhlsp.sEPEDlDCWCstos.saVpYGpCo.p..sucpRRp+........... 0 0 0 0 +2546 PF02525 Flavodoxin_2 NADHdh_2; Flavodoxin-like fold Bashton M, Bateman A anon Pfam-B_1456 (release 5.4) Domain This family consists of a domain with a flavodoxin-like fold. The family includes bacterial and eukaryotic NAD(P)H dehydrogenase (quinone) EC:1.6.99.2. These enzymes catalyse the NAD(P)H-dependent two-electron reductions of quinones and protect cells against damage by free radicals and reactive oxygen species [1].\ This enzyme uses a FAD co-factor. The equation for this reaction is:- NAD(P)H + acceptor <=> NAD(P)(+) + reduced acceptor. This enzyme is also involved in the bioactivation of prodrugs used in chemotherapy [1]. The family also includes acyl carrier protein phosphodiesterase EC:3.1.4.14. This enzyme converts holo-ACP to apo-ACP by hydrolytic cleavage of the phosphopantetheine residue from ACP [2]. This family is related to Pfam:PF03358 and Pfam:PF00258. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.50 0.71 -4.81 247 7847 2012-10-03 05:08:30 2003-04-07 12:59:11 12 15 2899 188 1533 11821 2314 183.60 23 92.92 CHANGED h.+lLll.ups.........ph.t.......uhspplspthhpthpptt......p....Vph.pDLhp......s.ht....................................................s.tp...t.hss....tt-p..pplhtADhllhthPha.auhPuhLKsalDpVhptGh.....sa.........t......tsLh.....GK.cshlhsop.Gus.hsht........t.sh-thh...hcshh.tahGhpsl....phhh...hpsht......s.pt.ppthppshp....phtp.l ........................................................................................................................plLllhups..........p.............p.s...s....t.ls....c.t...h....h....c..t....hpptt.............pp....................Vp.h...hD.....Lht...........................................................................t..p...........hsh.......tt-p....pp.lh....t.ADh..llhthPha.h...auhPshLK.t...alD..c...V....h...p..t..Gh.........sa..................t..u......s....t.sh.Lp.....GK...+.h.hl.h.hot....Gu.s...ptht.....................tt.sh..-.h.h..h....hc.t.h.h..t.a.hGhp.....l......tshh........hp.sht.......p...tpthtp.hhtth....h................................................................................................ 0 410 843 1194 +2547 PF02441 Flavoprotein Flavoprotein Bateman A anon Pfam-B_1622 (release 5.4) Family This family contains diverse flavoprotein enzymes. This family includes epidermin biosynthesis protein, EpiD Swiss:P30197, which has been shown to be a flavoprotein that binds FMN [1]. This enzyme catalyses the removal of two reducing equivalents from the cysteine residue of the C-terminal meso-lanthionine of epidermin to form a --C==C-- double bond. This family also includes the B chain of dipicolinate synthase a small polar molecule that accumulates to high concentrations in bacterial endospores, and is thought to play a role in spore heat resistance, or the maintenance of heat resistance [2]. dipicolinate synthase catalyses the formation of dipicolinic acid from dihydroxydipicolinic acid. This family also includes phenyl-acrylic acid decarboxylase Swiss:P33751 (EC:4.1.1.-) [3]. 22.20 22.20 22.20 22.20 21.90 22.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.76 0.71 -4.47 143 8070 2009-01-15 18:05:59 2003-04-07 12:59:11 14 27 4696 49 2213 5827 2565 130.60 29 43.39 CHANGED t+lllulTGusushh.uhcllctLt...cp...............hc..lp.llhopsA.pphlp.ts...............................thhpp..............................................lhsp............................tpshhspls..up...sDhhllsPsossTluKlAsGluDs.Llsps....................shst.h..........p+s.l..l.lsPshssh.....sshs ...................................................................................+llluloG.u.hu.Ahc..uscLlptLp...ct.........................................u......h-....V+.llhop...uA...pphlss.oh.................................tshstpt.....................................................................................................................................lhs-..............t..............................ssps...h..s..+.I.........s.......h...up..h.........sDhhllAP.sousolA+lApGhuDs..Llops..................................................shs..s..............................ppP..ll.lsPu.hsshhh.p.h................................... 0 675 1384 1855 +2548 PF00460 Flg_bb_rod flg_bb_rod; Flagella basal body rod protein Finn RD anon Prosite Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.77 0.72 -7.27 0.72 -4.19 16 12885 2009-01-15 18:05:59 2003-04-07 12:59:11 15 19 2264 1 2918 8471 1594 30.40 34 10.36 CHANGED lhsuhouLsspppplcllusNlANss.TsGaK ..............h.huhoGhs..up.ppphsllusNlANs....s.TsGap.......... 0 910 1827 2357 +2549 PF02120 Flg_hook Flagellar hook-length control protein FliK Mian N, Bateman A anon IPR001635 Domain This is the C terminal domain of FliK. FliK controls the length of the flagellar hook by directly measuring the hook length as a molecular ruler [1]. This family also includes YscP of the Yersinia type III secretion system, and equivalent proteins in other pathogenic bacterial type III secretion systems. 29.80 29.80 29.80 29.90 29.70 29.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.28 0.72 -4.31 344 2733 2012-10-01 19:58:36 2003-04-07 12:59:11 11 7 2004 1 679 2315 298 85.60 22 18.28 CHANGED tpphthtt.spshpphplpLsPscLGplplpl...phpss..p....lplpltsppspstphLcpshspL+ptLpp.......tGlpls.p..hsl..sttsttpt.t ......................tt.......hppstppsplcLcPtcLGplplpl...phsss..p.....h.plphhuppppspptLcpshspL+ppLsp.....pGlpls..p....hsl....stps.t...pt............................. 0 209 424 550 +2551 PF03963 FlgD Flagellar hook capping protein - N-terminal region Bateman A anon COG1843 Family FlgD is known to be absolutely required for hook assembly, yet it has not been detected in the mature flagellum [1]. It appears to act as a hook-capping protein to enable assembly of hook protein subunits [1]. FlgD regulates the assembly of the hook cap structure to prevent leakage of hook monomers into the medium and hook monomer polymerisation and also plays a role in determination of the correct hook length, with the help of the FliK protein [2]. This family represents the N-terminal conserved region of FlgD. A recent crystal structure showed that this region was likely to be flexible and was cleaved off during crystallisation [3]. 21.20 21.20 27.30 27.10 21.10 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.82 0.72 -3.91 135 2310 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 2042 0 554 1527 340 81.20 32 36.84 CHANGED sssssss....ssssssstst.........................s.sssssss.......hspc..s.FLpLLlsQLpNQDPhsPhDsscasuQLAQFSslEthpphNsslpsl ................................................................................sshss..............tst.t.t.t................................stssssss...hsps..s.FLpLLlsQLpNQDPhs..P..h-s.......schhuQhAQhSsVp.thpphNsslps............ 0 176 346 446 +2552 PF02107 FlgH Flagellar L-ring protein Mian N, Bateman A anon IPR000527 Family \N 20.40 20.40 26.50 21.40 19.60 19.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.87 0.71 -4.71 159 1866 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 1593 0 422 1225 436 186.10 37 80.12 CHANGED ts.stsss...GSla..psup..........ssLa..................pDpRAt.....plGDIlTVhlpEs.spAopsusosts+....sushsh....sh..sshh..................uhstts....................t..hshsu.sss...asGsGss..spsssLsuolossVscVLPNGNLhIpGcKplplNptschlploGlVRPcDIs......ssNolsSs+lA-ARIpYuGpGtls-spp.GWLp+hass.lhP ...........................ss....sssGSlapsup........psLF..................p.D.+Rsp.....plGDllTlllpEs..ss.ASKsussstuRsusssh....uh...ssls................shststs.....................s..hpssu.sss...Fs..GpGuu..spuNohsG..olTVsVspV.Ls.N.GNLhlpGEK.p.lt..lNpG..sEhI.RlSGlVcPcDIu.........ssNTVsSsplAD..ARIpYsGpGhl....s.-.u.Q.p.h.G.WLpRaF.s.l.P................... 0 107 239 330 +2553 PF02119 FlgI Flagellar P-ring protein Mian N, Bateman A anon IPR001782 Family \N 25.30 25.30 25.40 26.90 25.00 24.80 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.01 0.70 -5.80 159 1904 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1613 0 429 1324 595 332.20 48 91.94 CHANGED RIKDluslpGVRsNQLlGYGLVVGLsGTGDp....ps.sFTpQSlpsMLpphG..........l.sl...s.............ss............h.cs..KNVAAVhVTApLPsF.u+sGpplDVTVSSlG-AKSLcGGTLLhTPL+Gu...DGplYAlAQGslllGGhuup..Gts....uplphNhPTsGRIPsGAhVERpl......ss.sh.spt.sslpLsLcpsDFo..TApRlscuIN....pth................Gsss......ApulDusolpl..phPts.p.p..tVsFluplEsLpV.....ps.cssA+VVlNpRTGTlVhGpsV+ls.ssAVoHGsLTVpIs-s..tV...SQPs.sh..........u...............tGpTslsspoplslppp.....ssphhhl..ts.usoLs-lVcALNslGssPpDlluILpAlKsAGALpAEL.ll .............................................................RI+DlsslpGVRpNpLlGYGLVVGLs.GTGDp....spo...PFTpQolsNMLpphG..........I..sl..Ps.......tss..................h..phKNVAAVMVTAsLPPF.u+tGQpIDVsVS..S..h...Gs..A..KSLRGGTLLhTPL..+Gs................DGpV.....YAl...AQGsllVGGh..uAp..........usu...SplpsNp.suGRIssGAhlEREl......................Ps.sF..ups...sslsL..pL..p..c..sDFo..sApplscsIN....phh........................G.ss..........ApAlDupolpVpsPpsss....s.............pVpFLAplpslpV.....ss..tstsAKVllNuRTGoVVhsppVplp.ssAVup...GsLoVslscp...p.V...SQPs.PF.....u......................sGpTsVsPpopIs.lppp.....suplh.l...ps.usoLsslV+ALNslGAoPtDLhuILQuh+pAGALcAcLplI...................................... 0 119 250 339 +2554 PF04316 FlgM Anti-sigma-28 factor, FlgM Kerrison ND, Finn RD anon COG2747 Family FlgM binds and inhibits the activity of the transcription factor sigma 28. Inhibition of sigma 28 prevents the expression of genes from flagellar transcriptional class 3, which include genes for the filament and chemotaxis. Correctly assembled basal body-hook structures export FlgM, relieving inhibition of sigma 28 and allowing expression of class 3 genes. NMR studies show that free FlgM is mostly unfolded, which may facilitate its export. The C terminal half of FlgM adopts a tertiary structure when it binds to sigma 28. All mutations in FlgM that prevent sigma 28 inhibition affect the C-terminal domain and is the region thought to constitute the binding domain. A minimal binding domain has been identified between Glu 64 and Arg 88 in Salmonella typhimurium (Swiss:P26477). The N-terminal portion remains unstructured and may be necessary for recognition by the export machinery [1]. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.51 0.72 -4.03 135 1810 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1600 5 419 1076 250 64.00 28 67.14 CHANGED DsVplSspuppl.pphptt.......................h..sstsslcpp.KVpplKpAIssGsYplDscclAcpllpht ....................................................................t.tVplSsttt...p.............................................h...ssss.-lsh-.+V..ptlKpAIps.GphplD..s..p+lAcsllp..h............... 1 135 267 349 +2555 PF05130 FlgN FlgN protein Bateman A anon COG3418 Family This family includes the FlgN protein and export chaperone involved in flagellar synthesis [1]. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.80 0.71 -3.94 154 1850 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1595 3 413 1145 136 134.40 23 91.77 CHANGED M..................ppLhphLp..pphphhpp....LhpllppEppsLtst..c.hp.tLpplspp...Kppllpplpphc..ppptphhtphsht....................h..........ppspltphhpp....lpphhp...chpphNphNspLlppphptspphlshlpsspst.......tsYsssGptps ...................................ptLhphlpp..shlpp....LtslhcpEpptLpts.....s.ss..pLptlscp...KspLlspLsth-....ppRp...ph.pphs.........................................................tpspltpthpp...............lpphhp....pl+phNppNu...hLlptphchspp.hlphlpstpps.......shYsspGp................................................ 0 137 261 336 +2556 PF02465 FliD_N Flagellar hook-associated protein 2 N-terminus Mian N, Bateman A, Yeats C anon Yeats C Family The flagellar hook-associated protein 2 (HAP2 or FliD) forms the distal end of the flagella, and plays a role in mucin specific adhesion of the bacteria [2]. This alignment covers the N-terminal region of this family of proteins. 22.70 22.70 22.70 23.80 22.60 22.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.99 0.72 -3.63 191 2117 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 1839 0 481 1599 330 99.40 27 18.77 CHANGED uSGlD..hssllspLhs.uEptsh...splppppsphpsploAauplpotLsshp.......s.shssL......tp.......s..s..sa.........ps..............pss...osS..ss.......shlos........ousus..As..sGoYsl.pVppLApupp ......................SulDhssllspLhp..A-ctsh....s....lspppsshsschoAa.usLcosLsshp........s.uhssL......sp..............s.....s......sa...............ss..............pps.......ooS....ss...........ss.h.os........o..s.s..us..Ah..sG..s.Y..sl.sVsQLApupp..................................................................... 0 160 302 395 +2557 PF02049 FliE Flagellar hook-basal body complex protein FliE Mian N, Bateman A anon IPR001624 Family \N 22.40 22.40 23.10 22.90 22.00 21.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.97 0.72 -3.96 186 2295 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 2048 0 548 1400 351 98.30 29 92.64 CHANGED tsh.............t..tssssssssssssssssssFushLppulspVsptQp...pusphspshtpG.c.ssslp-VMlAhp+AsluhphslpVRNKllpAYpEIMpMtl ..............................thhpshh..........t.sstspsssspsssuFushLpsu...l...s...cl...sppQp...sAcsts.pphthGc..sssLp-VMlshpKAslohphslpVRNKlVpAYpElMpMpl.... 0 175 345 444 +2558 PF01706 FliG_C FliG-C; FliG C-terminal domain Bateman A anon [1] Domain FliG is a component of the flageller rotor, present in about 25 copies per flagellum. This domain functions specifically in motor rotation. 23.00 23.00 23.30 23.80 22.20 22.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.03 0.72 -4.08 154 2393 2012-10-02 13:19:07 2003-04-07 12:59:11 11 8 2047 8 574 1470 494 109.70 39 32.55 CHANGED lpslc.ppDsclApcI+cpMFsF-cl.hclcscslpplL+pls.scs...LslALKGAspp........lc-plhsNMSpRuuchlc--hcthGsV.+ls-VEpAQppIlphlRcLs-pGcI.ls ..................................pslc.chDs-LApcIp-pMFlFEsL.lc...lDD+uIQclLcEVs.s-s...LhlALKGAspt........L+-KhhpNM..SpRAA-..hlc--l.p.s.h.G..P.V.Rls-VEsAQ+pIltllR+Lu-sGEIhl.u............. 0 199 374 466 +2559 PF02108 FliH Flagellar assembly protein FliH Mian N, Bateman A anon IPR000563 Family \N 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.18 0.71 -4.26 50 2270 2012-10-02 21:03:42 2003-04-07 12:59:11 11 6 1901 0 508 1633 260 133.70 22 54.20 CHANGED ppt.hpplps....lhpplppslpph.......-pp....lpppLlpLslplu+pllt.pclpssPphllshlccsL.tthPhsspplplplpPsDhshlcpths.pthpthta.....plhsDsslspGGCplpossuplD.......uslpsRhcpl .................................................................................................p....tphpt.lhsphppsh.pth...........ppp...........hppcLh.phulp.h....A....+pVlt..p....ph..p...s......s.s.p.t.l......l.p...h.l.p.psL...t...p..sh.....t....s......splpl+V...p....P....-.D....hp....hl...c...c....t...hs....t.....h.p...h...psa...................cl.hsD..ssL.....p........GuChlps-pGp.lD.......uslpsRhpp.......................... 0 169 313 407 +2560 PF02050 FliJ Flagellar FliJ protein Mian N, Bateman A anon IPR000809 Family \N 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.52 0.71 -4.07 147 1890 2012-10-01 21:16:01 2003-04-07 12:59:11 11 4 1724 1 454 1152 113 121.90 22 82.41 CHANGED -cutppLsp...sppphpptppplptLpp.tppa..tpthpsph.......sphpphppalsplcpsItppppplpthcpplpptpppapptptchcphchLhp+ctpppppt.p+pEQ+hhDEhAhpta .......................................................-pAtptLup...hp..pthpp....t....p....pp....Lp..Lhp....hpp....-a....ppphs....s.......sh...t..G.....h.....ss.....sphhs..hpp..Flp.......sLc.psIsppcp....plpphppcl.......-pspppapctppchpshppLp-+pppptthtps+t-QKthDEhAtpt................. 0 149 289 371 +2561 PF03748 FliL Flagellar basal body-associated protein FliL Bateman A anon COG1580 Family This FliL protein controls the rotational direction of the flagella during chemotaxis [1]. FliL is a cytoplasmic membrane protein associated with the basal body [2]. 23.80 23.80 23.80 23.90 23.50 23.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.16 0.72 -4.00 175 2462 2009-09-10 21:49:05 2003-04-07 12:59:11 9 6 1859 0 634 1614 444 101.40 24 62.20 CHANGED shahsl....s...shslNlsss.....tt..+alp.lplslps....pspps.....hpplcp.....phP..h.lRssllthluspshp.....-lpss.pGpppL+pclhcpls.hl.............hpsp...............................lpsVhFosFllQ .....................................ahsL....c...shslNLsss.....ttp..+hlp.lslsLpl..........pspts.........tsplpp.................thP........lRspllhhhup.....ps.h.p.....pLps..pG+.ppL+pclppplsphL.............stsp............................................lpcVLaTsFll..................................... 0 199 394 512 +2562 PF02154 FliM Flagellar motor switch protein FliM Mian N, Bateman A anon IPR001689 Family \N 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.01 0.71 -4.64 7 1999 2012-10-01 19:50:22 2003-04-07 12:59:11 10 2 1927 3 493 1477 440 191.20 37 56.93 CHANGED YDFppss+hS+EplpoLphla-pFARhhoouLpshLRphlclplsuVcphsYtEFlpSlPsPTsLslhphcPLcGsullplsPolsFshlDpLhGGcGpshs...csR-hTcIEppllppllchlLtsh+EAWpslhslcschschEsNPpFspIVs.PsEhllllsLclclGchpGhhNlClPahslEPIhphL .............................................................YDhpp.cRls+-+.lpsL-hIpE+FARphphsLhshlRp.ss-lslsulc.hs.Y.p.EF.h.c......sLP.s.P.T.s.L...N....l....l..c......h.....c.....P.....L...+.....G...o.....u.....L.l.hhsPsLlFhhlDsLaG.G.cG.+......h........st.......-uRE..F..T.p.h.E....pc....l....lp....p....lLclsh....p....s....hp-AW....p.......s.l....h..s.....l...c.s....c...a.h..c.....s....E..h......s.s..p....a.....ss....I..so...Ps-.l.Vlh.ss.aclcl.G.s.h.s.GphsI.ClPashlEPlp-hL................................................................. 1 173 321 407 +2563 PF04347 FliO Flagellar biosynthesis protein, FliO Mifsud W anon COG3190 Family FliO is an essential component of the flagellum-specific protein export apparatus [1]. It is an integral membrane protein. Its precise molecular function is unknown. 21.10 21.10 21.10 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.88 0.72 -3.94 108 1689 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1647 0 458 1182 140 88.50 26 54.79 CHANGED AWLl+Rh......t....s..sss.tssstl+llushslGs+E+lllVcVssp.....llLGVTspplshLcp....l......................Pssssts.st.s...............................FuptLpphhpp ....................sWll+.Rh...............t...sstt..ssspsL+lluutuLGs+c+lllV..c.Vscp.....pllLG.VTs.s.pIslLcp.....L....................P.ps.tt.t.st............................................Ftphhpphh.......................................................................... 0 155 289 373 +2564 PF00813 FliP FliP family Bateman A anon Pfam-B_1679 (release 2.1) Family \N 23.70 23.70 24.40 24.10 23.60 23.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.27 0.71 -4.60 141 3505 2009-01-15 18:05:59 2003-04-07 12:59:11 15 4 2195 0 652 2050 491 188.20 45 81.00 CHANGED llTlLoLhPulllhhTSFTRIllVLulLRpALGhQQsPPNplLlGLALFLThFlMsPVhpphhpsuh.........pPhhstpl.....................shppAhpputtPh+pFMhcpT....cpp....-LthF.....hclu..........ptt.....sp.....shc-ss......hhlLlP..............AFhlSEL+sAFpIGFllalPFLlIDLVVASlLMuMGMMMlsPshISLPFKLlLFVLlDGWsLlltuLl .................lThLollPslllhhTSFs+lllVhulLRsALGhQpsPPN.lLhGlALhLThFlMsPVhpclhppuh.....pP..h..h..ptpl.....................ohppAhc.c..utpPh+pFMl+pT....ccp.....-lthF.hclu..........................ptt...............p..s.csss...hhlLlP..............AFslSELKoAFpIGFhlalPFLlIDLVVuSlLMAhGMMMlsPshISLPFKLlLFVLlDGWsLlltuL.............................. 0 200 389 513 +2565 PF02561 FliS Flagellar protein FliS Mian N, Bateman A anon COGs Family FliS is coded for by the FliD operon and is transcribed in conjunction with FliD and FliT, however this protein has no known function. 22.80 22.80 22.90 22.80 22.20 22.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.40 0.71 -4.26 6 2094 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1843 11 478 1234 391 123.60 32 91.09 CHANGED hps.shpAYpQs....pVpTAoPtcLllMLY-cuIppLppAtc.....shpspchc+tscpI.+Ap-IIo.ELpsoLDhEpGtclupNLhulYsahh+pLhpANlcp-ssclspVhshh+sLp-AW+clhps. ............................................hs....uhpsYt....ps.......pl....h....s....A....SPppLlhMLa-GslptltpA+h........thp.pps.....htt+sttlsKAhsIlp.tLpssL-h..E....p..G.....u.....Elu..psL...tuLYsahh.pc..L..h..p..A.N..l....c..s.D...s...p.tl-EV..sllpslt-AWcph..................................... 0 163 309 394 +2566 PF01698 FLO_LFY Floricaula / Leafy protein Bashton M, Bateman A anon Pfam-B_1633 (release 4.1) Family This family consists of various plant development proteins which are homologues of floricaula (FLO) and Leafy (LFY) proteins which are floral meristem identity proteins. Mutations in the sequences of these proteins affect flower and leaf development. 20.20 20.20 20.20 20.70 19.80 20.10 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.53 0.70 -5.61 38 1844 2009-09-13 17:08:47 2003-04-07 12:59:11 11 5 1128 2 26 1110 0 134.20 55 97.00 CHANGED MDP-s.F.....oAu.......hFK.WD....P+ssh.........s.Ps.....spl..pths.s.......sP...............s.ssht..hR...................uLE-LFpuYGVRYhTsAKIuELGFTssTLlsM+-EELDDMMsoLuclFRWDLLVGERYGIKAAVRAERRRL--...................................-..s+R+t..............ssDsss..sLDALSQE....G.LSEEsstp...h..uuuS..........................GGsu.sshthhshs......pc.........c++t....+p+++ptcc............................t.p..t.tttstts..........uGssGt....ERQREHPFIVTEPGEVARGKKNGLDYLFHLYEQCRcFLlQVQsIAKERGEKCPTKVTNQVFRYA.KKsGASYINKPKMRHYVHCYALHCLDEEuSNsLRRuaK.ERGENVGAWRQACYpPLVslAucpGWDIDulFNuHPRLuIWYVPTKLRQLCHhERussss ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................FcLYEQCtcFL.pV.QplAKE+GEKCPTK.......VTNpVFRaA.KhsGA............................................................................................................................................................... 0 11 21 24 +2567 PF00624 Flocculin Flocculin repeat Bateman A anon Pfam-B_51 (release 2.1) Repeat This short repeat is rich in serine and threonine residues. 20.80 20.80 21.60 20.80 20.60 20.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.32 0.72 -4.12 173 1092 2009-01-15 18:05:59 2003-04-07 12:59:11 13 66 28 0 906 1281 2 43.60 44 27.58 CHANGED tsso.Toho.sWos.shooTaSThhsThoss-G..pTTcTIYaVtTPh ...........h..to.Toho...sWTG.oh.ToThSTph.TThTGo-G....sT...sETlYhVcTPh..... 0 41 549 884 +2569 PF05202 Flp_C Recombinase Flp protein Studholme DJ, Finn RD anon DOMO:DM01865; Family \N 25.00 25.00 63.30 62.60 24.40 18.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.57 0.70 -5.16 8 18 2012-10-02 14:09:14 2003-04-07 12:59:11 7 2 15 12 3 31 0 194.40 52 58.85 CHANGED uspluccloKllcs-.csIWpllsplhsoI-ppoppsop+AtYpalLlsTFhNCCRtSDLKNsDPpTFEllpN+aLG+llRAhVsETKTRpsRaIYFFPlp.G+sDPLlALa-aLppspPl.K..oRTScpcoc.Q-aQLLRDoLlpsYDRFluKcuspulFuIhpGPKSHLGRHLMuSYLSpsphschsosaGNWSAuccphpSsVARu+YsHs.ppslPscLFAFLSsYYtcsspGch......cLhssp .h...s.th.+hh..p...hathh..hhp.hp..sh.sphps.hphhh.hohhNCsR.sDlKNhDPpoFcll.spaLGhhlpshVs-TKTph.RalYFassp.sthDPllhLcphhp.spPh.K..shossppsp.QcaQLL+-sLltsYs+hltKpss.ulFuIhpGPKSHlGRHLMsSaLSh+sLsELssllGNWS...DctuSuVARosYoHp.hsuIPDHhFAhlScYYshsP.uKphlsh..KDcs.P........ 0 2 2 2 +2570 PF04964 Flp_Fap Flp/Fap pilin component Bateman A anon COG3847 Family \N 23.90 23.90 23.90 23.90 23.60 23.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.16 0.72 -4.36 26 1017 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 521 0 401 913 41 46.60 34 72.55 CHANGED Fh+-E.sGATAIEYGLIAuhIAlsll..ssssslu..ssLpstFsplusul ...........FhcDE.pGsTAIEY.GL.IAul.lAlsl..l......susss.lG....ssl.s.shF.sslsst................ 0 120 223 315 +2571 PF03930 Flp_N Flp; Recombinase Flp protein N-terminus Finn RD, Studholme, DJ anon DOMO:DM01865; Domain \N 25.00 25.00 25.20 42.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.67 0.72 -4.22 7 12 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 10 12 1 23 0 80.70 32 19.65 CHANGED hss.....cchstlKhoTFhKYpppIppolpaDhsspsVpFcYHLKcsp-LpcsLccshtPhpFpl...pupKKssshhplhuuhc.+hp ............cpsstlKhuTFhKYpphIupoLpaDhsspslpFcYHhpcspcLpcsLcphhtshpFsl...tsp++.ssh.phhuuhphp................ 0 1 1 1 +2572 PF02662 FlpD Methyl-viologen-reducing hydrogenase, delta subunit Bashton M, Bateman A anon COG1908 Family This family consist of methyl-viologen-reducing hydrogenase, delta subunit / heterodisulphide reductase. No specific functions have been assigned to this subunit. The aligned region corresponds to almost the entire delta chain sequence and contains 4 conserved cysteine residues. However, in two Archaeoglobus sequences this region corresponds to only the C-terminus of these proteins Swiss:O29030 and Swiss:029595. 22.30 22.30 22.90 23.90 22.20 22.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.23 0.71 -4.43 66 475 2009-09-12 00:14:48 2003-04-07 12:59:11 11 77 203 0 303 485 67 116.80 37 34.02 CHANGED +IluFsCsasuYuuA..DhAG......ssRhpYPsslRlI+VhCoG+lsstallcAhpcG.ADGVhVsGC+hGD...CHah.pGNhpAccRhphl+chLpplGl-s-RlchtalSuuEup+asphlp-hscclccLG .........................+IluFhCpasuYsu.A..DhAG......ssRhpYPss.lRlIRV.CoG+lsshallcAhpp..G.ADGVhlsGC+..G-...CHah.pGNh.........h...up+RhthlpphLpplGl-s-RlchpalSuuEut+asphlpchscplcpLG................................... 0 150 250 287 +2573 PF02947 Flt3_lig flt3_lig; flt3 ligand Griffiths-Jones SR anon Structural domain Domain The flt3 ligand is a short chain cytokine with a 4 helical bundle fold. 25.00 25.00 36.60 29.60 17.80 16.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.84 0.71 -4.27 3 56 2012-10-02 01:28:15 2003-04-07 12:59:11 9 3 25 12 16 72 0 110.10 67 52.30 CHANGED TPDCYFSHSPISSNFKVKFRELTDHLLKDYPVTVAVNLQDEKHCKALWSLFLAQRWIEQLKTVAGSKMQTLLEDVNTEIHFVTSCTFQPLPECLRFVQTNISHLLKDTCTQLLALKPCIGKACQNFSRCLEVQCQ ....sC.F.aSPI.SSsFt.phtpL...............SDYLLQD.YPVTVAoNLQD-cLCGAhW+LVLAQRWMtRLKTVAGScMptLLEtVNTEIHFVTpCAFQ.s.PsCLRFVQsNIS+LLQ-TspQLhALKPhIs+.......pNFSpCLELQCQ................ 0 1 1 4 +2574 PF04772 Flu_B_M2 Influenza B matrix protein 2 (BM2) Kerrison ND anon Pfam-B_2165 (release 7.6) Family M2 is synthesised in the late phase of infection and incorporated into the virion. It may be phosphorylated in vivo. The function of BM2 is unknown [1]. 25.00 25.00 147.40 147.30 20.00 19.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.42 0.72 -4.00 4 501 2009-09-11 00:30:47 2003-04-07 12:59:11 7 1 487 16 0 121 0 108.60 95 99.99 CHANGED MLEPFQILSICSFILSALHFMAWTIGHLsQIKRGVNhKIRI+sPNKETINREVSILRHsYQKEIQAKETMKElLSDNMEVLSDHIVIEGLSAEEIIKMGETVLEVEELp MLEPFQILSICSFILSALHFMAWTIGHLNQIKRGVNMKIRIKGPNKETINREVSILRHSYQKEIQAKETMKEVLSDNMEVLSDHIlIEGLSAEEIIKMGETVLElEELH.. 0 0 0 0 +2575 PF02942 Flu_B_NS1 Influenza B non-structural protein (NS1) Bateman A anon Pfam-B_198 (Release 6.4) Family A specific region of the influenza B virus NS1 protein, which includes part of its effector domain, blocks the covalent linkage of ISG15 Swiss:Q64339 to its target proteins both in vitro and in infected cells. Of the several hundred proteins induced by interferon (IFN) alpha/beta, the ubiquitin-like ISG15 protein is one of the most predominant. Influenza A virus employs a different strategy: its NS1 protein does not bind the ISG15 protein, but little or no ISG15 protein is produced during infection [1]. 25.00 25.00 246.10 245.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.67 0.70 -5.47 5 839 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 785 7 0 395 0 243.30 96 87.75 CHANGED MADNMTTTQIEVGPGATNATINFEAGILECYERLSWQRALDYPGQDRLNRLKRKLESRIKTHNKSEPEuKRMSLEERKAIGVKMMKVLLFMNPSAGIEGFEPYClKNPSNSNCPNCsWADYPPTPGKYLDDIEEEPENVDDPTEIVLRDMNNKDARQKIKEEVNTQKEGKFRLTIKRDIRNVLSLRVLVNGTFLKHPNGYKSLSTLHRLNAYDQSGRLVAKLVATDDLTVEDEEDGHRILNSLFERF ...MAsNMTTTQIEVGPGATNATINFEAGILECYERLSWQRALDYPGQDRLNRLKRKLESRIKTHNKSEPESKRMSLEERKAIGVKMMKVLLFMNPSAGIEGFEPYCMKSSSNSNCsKYNWTDYPSTPGRCLDDIEE.EPEDVDGPTEIVLRDMNNKDARQKIKEEVNTQKEGKFRLTIKRDMRNVLSLRVLVNGTFLKHPNGYKSLSTLHRLNAYDQSGRLVAKLVATDDLTVEDEEDGHRILNSLFERL........ 0 0 0 0 +2576 PF03506 Flu_C_NS1 Influenza C non-structural protein (NS1) Bateman A anon Pfam-B_980 (release 7.0) Family The influenza C virus genome consists of seven single-stranded RNA segments. The shortest RNA segment encodes a 286 amino acid non-structural protein NS1 [2]. This protein contains 6 conserved cysteines that may be functionally important, perhaps binding to a metal ion. 25.00 25.00 114.50 114.10 18.70 18.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.02 0.71 -4.50 2 111 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 80 0 0 66 0 146.10 98 64.31 CHANGED GELLFNGTILQAESsTMT.ASVEMKGKK.PIDFsPSNIAPIGQNPIYLSPCIPNFDGNVWEATMYHHRGATLTKTMNCNCFQRTIWCHPNPSRMRLSYAFVLYCRNTKKICGYLIA+QVAGIETGIRKCFRCIKSGFVMATDEISLhILpSIKSGAQLDPYW GELLFNGTILQAESPTMTPASVEMKGKKhPIDFAPSNIAPIGQNPIYLSPCIPNFDGNVWEATMYHHRGATLTKTMNCNCFQRTIWCHPNPSRMRLSYAFVLYCRNTKKICGYLIARQVAGIETGIRKCFRCIKSGFVMATDEISLTILRSIKSGAQLDPYW 0 0 0 0 +2577 PF03555 Flu_C_NS2 Influenza C non-structural protein (NS2) Bateman A anon Pfam-B_346 (release 7.0) Family The influenza C virus genome consists of seven single-stranded RNA segments. The shortest RNA segment encodes a 286 amino acid non-structural protein NS1 Pfam:PF03506 as well as the NS2 protein. The NS2 protein is only about 60 amino acids in length and of unknown function. 25.00 25.00 133.60 133.10 21.30 21.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.00 0.72 -4.05 2 203 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 80 0 0 112 0 56.80 99 27.82 CHANGED VKSTNLMAFVATKMLERQEDLDTCTEMQlEKMKsSTKARL+TESSFAPRTWEDAIKD VKSTNLMAFVATKMLERQEDLDTCTEMQVEKMKTSTKARLRTESSFAPRTWEDAIKD. 0 0 0 0 +2578 PF00598 Flu_M1 Influenza Matrix protein (M1) Bateman A anon Bateman A Domain This protein forms a continuous shell on the inner side of the lipid bilayer, but its function is unclear. 20.90 20.90 21.10 23.00 20.20 18.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.86 0.71 -4.83 2 22622 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 22201 27 0 3071 0 152.20 94 61.75 CHANGED SLhs-s.sYlLSlh.sG.hKAElAp+LcshFuGKphDL-uhhEWlKs+.hLoslpKullGhshshhhPp-p..p+RRFlppsLsG.Gssss.hcthlhh.RKh+RploFHtAhEIA.uapuuALh.ChhlhY.phGshohpVhLGhlCAhCEp.Asp ..........SLLTEVETYVLSIlPSGPLKAEIAQRLEDVFAGKNTDLEALMEWLKTRPILSPLTKGILGFVFTLTVPSERGLQRRRFVQNALNGNGDPNN.MDRAVKLYRKLKREITFHGAKEVALSYSoGALASCMGLIYNRMGTVTTEVAFGLVCATCEQIADS....... 1 0 0 0 +2579 PF00599 Flu_M2 Influenza Matrix protein (M2) Bateman A anon Bateman A Family This protein spans the viral membrane with an extracellular amino-terminus external and a cytoplasmic carboxy-terminus. 25.00 25.00 25.50 25.50 24.90 24.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.24 0.72 -4.20 2 21616 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 21060 57 0 4083 0 92.50 89 100.00 CHANGED MSLLTEVETPh+stWECRCsDSSD.LVshASIhGILHLILWIhDRLFFKChYRRh+aGLKRGPuTuGlPESMREEYRQEQQSsVsVDsGHFVNIELE ....MSLLTEVET...PhRNEWECRCsDSSDPLVlAAsIIGILHLILWILDRLFFK...CIYRRFK..YGLKRGPSTEGVPESMREEYRQEQQsAVDsDDGHFVsIELE............. 0 0 0 0 +2580 PF00506 Flu_NP flu_virus_nuc; Influenza virus nucleoprotein Finn RD anon Pfam-B_10 (release 1.0) Family \N 25.00 25.00 27.70 26.80 16.80 16.10 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.60 0.70 -6.01 3 18503 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 18183 20 0 6541 0 474.40 89 99.75 CHANGED MAs.SsKR....S.EphpTuuE.......QpRQTPTEIRKSVupMVsclGEFYIQMCsElGLNsDhEtpLIQNuIAIERhlLuAsD-K+sEap+EKsAR-spcucushDscKTGGslYKRsRDGKsIpapslllLasKEEI+pIaK.oshGsD......AsuGLsHlMIaHSNLNDlsYQRoRALsRsGhDPcLhSLhQGSTLPRRSGAsGsAlKGlGTLVAEAIRhI......KRGlsDRshLR...ut+T+oAYERhhpsLKsKspsusQRALsDQVlcSRNPGsA-IEDLslLARSuLlLRPSVAsKssLPhClYhhA+lothDFpsEtYShVGh-AFcLaNhAp......sFSllRsNDDs....cDKSQLlaMACFGAAYEDLRVlSAlsGTclKPRupLKs+GF+VsusEpVETMsSuLLplRhpaWAshTRSGGNpssscuuuGQISsSPVFAVERsIsh-+QsVcchLohNlEGR-uDs+ssLlKMMc-uhu....pKoEssuFlG+uMF-LSDccKTNPI..sF+poscsFFFttDsAEDYDs .....................................................................MASQGTKR....SYEQMETGGE.........RQNATEIRASVGRMlGGIGRFYIQMCTELKLS.DYEGRLIQNSITIERMVLSAFDERRNKYLEEHPS.......AGKDPKKTGGPIYRR.hDGKWMRE...LILYDKEEIRRIWRQANNGED......ATA..G.....LTHlMIWHSNLNDATYQRTRALVRTGMDPRMCSLMQGSTLPRRSGAAGAAVKGVGTMVMELIRMI......KRGINDRNFWRGENGRRTRlAYER.MCNILKGKFQTAAQR.AMMDQVRESRNPGNAEIEDLIFLARS..ALILRGSVAHKSCLPACVYGLAVASGYDFEREGYSLVGID..PF.+LL.QNSQ......VaSLIRPNENP....AHKS...QLVWMACHSAAFEDLRVSSFIRGT+VlPRGpLSTRGVQIASNENMEsMDSsTLELRSRYWAIRTRSGGNTNQQRASAGQISVQPTFSVQRNLPFERATlMAAFTGNTEGRTSDMRTEIIRMMEuA.......+PEDVSFQGRGVFELSDEKATNPIVPSFDMSNEGSYFFGDNAEEYDs..................................... 0 0 0 0 +2581 PF00600 Flu_NS1 Influenza non-structural protein (NS1) Bateman A anon Bateman A Family NS1 is a homodimeric RNA-binding protein that is required for viral replication. NS1 binds polyA tails of mRNA keeping them in the nucleus. NS1 inhibits pre-mRNA splicing by tightly binding to a specific stem-bulge of U6 snRNA. 25.00 25.00 30.00 30.00 22.90 20.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.19 0.70 -5.20 3 18530 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 18330 77 0 6565 0 214.70 83 95.82 CHANGED MDSNTVSSFQVDCFLWHIRKplADQ-MGDAPFLDRLRRDQKSLKGRGSTLGLDIEsATRsGKQIVERILKEESDEsLKhTIASuPASRYLTDMTIEEMSR-WYMLMP+QKlTGuLhIRMDQAIMDK+ITLKANFSVlFD+LETLlLLRAFT-DGAIVGEISPIPSLPGHTNEDVKNAIGILIGGLEWNDNTVRlSEsLQRFAWRsSDENGGPPLoPK ...MDSN.TVSSFQVDCFLWHlRKRFADQ-LGDAPFLDRLRRDQKSL+GRGsTLGLDIETATpsGKQIVE+ILKEESDEALKMTlASsPASRYLTDMTLEEMSRDWFMLMPKQKVt.GsLClRMDQAIM-KNIlLKANFSVIFDRLETLILLRAFTEEGAIVGEISPLPSLPGHT.EDVKNA.IGVLIGGLEWNDNTVRVSEslQRFAWRSssEsGRPPLPP................................ 0 0 0 0 +2582 PF00601 Flu_NS2 Influenza non-structural protein (NS2) Bateman A anon Bateman A Family NS2 may play a role in promoting normal replication of the genomic RNAs by preventing the replication of short-length RNA species [1]. 25.90 25.90 26.00 26.70 24.60 25.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.97 0.72 -3.78 4 18368 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 18223 2 0 3232 0 92.00 84 77.49 CHANGED LNGMITQFEpLKLYRDSLGEuVMRhGDLHSLQsRNupWREpLuQKFEEIRWLItEsRphLphTENSFEQITFhQALQLLhEVEpEIRTFSFQLI ....LNGMITQFESLKlYRDSLGEAVMRMGDLH.LQsRNuKWREQLuQKFEEIRWLIEEVRHRLKtTENSFEQITFMQALQLLLEVEQEIRTFSFQLI.... 0 0 0 0 +2583 PF00603 Flu_PA Influenza RNA-dependent RNA polymerase subunit PA Bateman A anon Bateman A Family \N 23.00 23.00 23.00 26.40 22.30 22.90 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -13.03 0.70 -6.50 4 18248 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 17904 29 0 9139 0 661.40 91 97.25 CHANGED hsEhuEDhhhpss....+lhpIChHhpVChhhSDhcalscpGps...............pa-lIEGpcRshAh.l.pplCpppslEhP.+aLsDLaDhccppFlElGlT+chsD.Ya.pKhpKl.ussh+lhlFSasG.-hspss-hsLcEEp+tRIhohLsphtp.lspcNLapplhts-stEptI-..FclGpThpcL..RDpS...lP.sFpshEth+sYh-th..sPR.tlEsplupMpsplph..c.hchsphR.ItL.....-GP.sPapuhhLhtDuhhls.ls-PppcptuI.hh-.....phhasps-..hI+.p-Ks.susahh.....Wpplhuslpshtp......o.shp+sspscaAhG.shs.cKl.....ppsshsspshKQtEsclPEhpSlssWlpsEhshhpp.o-.stWl-LsEhsssss.lEtlApthpchahs.lsts+suphhhKhllhsTuLhspspsshu+hpllPIhsRshsccu........pphspLaGhslKGpSHL+pDTDsssllohEFShpDPcl-..Ka.KYoVFclGphh........VtG+phshaLYsRssuhSKIKhcWh.chRRCLlQohpphEsll.pESuhpppshsccsh.....Nc.pha.IGpppGtl.ttoltcslRslLspphhhslYsssQLEGFsAEpR+LLhslpAh+-p+p...PasFc.EGhh-tIEEClINNPhVlh.AphaNphlh.shc .................MKEYGEDPKIETN....KFAAICTHLEVCFMYSDFHFIDERGESIIVESGD.PNALLKHRFEIIEGRDRTMAWTVVNSICNTTGVEKP.KFLPDLYDYKENRFIEIGVTRREVH.IYYLEKANKIKSEKTH.IHIFSFTGEEMATKADYTLDEE.SRARIKTRLFTIRQEMASRGLWDSFRQSER.GEE.TIEE+FEITGTMRRL..ADQS...LPPNFSSLENFRAYVDGF..EPNGCIEGKLSQMSKEVNA+IEPFLKTTPRPLRL....P-GPP..CpQRSKF.LLMDALKLS.IEDPS.HEGEGIPLYDAIKCMKTFFGWKEPNIVKPHEKGINPNYLL....AWKQVLAELQDIENEEK.IP+TKNMKKTSQLKWALGENMAPEKV......DF-DCK...DVuDLKQYDSDEPE.RSLASWIQ....sEFNKACELTD..SSWIELDEIGEDVAPIEHIASMRRNYFTA.EVSHCRATEYIMKGVYINTALLNASC...AAMDDFQLIPMISKCRTKEG........RRKTNLYGFIIKGRSHLRNDTD.VVNFVSMEFSLTDPRLEPHKWEKYCVLEIGDMLLR....TAIGQVSRPMFLYVRTNGTSKIKMKWGMEMRRCLLQSLQQIESMIEAESSVKEKDMTKEFF....ENKSETWPIGESPKGVEEGSIGKVCRTLLAKSVFNSLYASPQLEGFSAESRKLLLIVQALRDNLE...PGTFDLGGLYEAIEECLINDPWVLLNASWFNSFLTHAL.K......................................... 0 0 0 0 +2584 PF00602 Flu_PB1 Influenza RNA-dependent RNA polymerase subunit PB1 Bateman A anon Bateman A Family Two GTP binding sites exist in this protein [1]. 20.70 20.70 20.80 21.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 740 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -13.07 0.70 -6.81 5 18462 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 18111 8 0 8861 0 698.20 94 98.09 CHANGED M-l.NPtLLFlc...VssQssISTTYPYTGPPPhSHGTuTtYTL-TVpRTacYSc.KuKpppscVsGss+l.cssNssL-EDNhsEPSGsAclDsVLtLhcph--ca.PGhFc+ustEAhEclhppcas+LTcGRQTaDWTssRNQPAATALpsTI-sh+pN.LsuscGsoMl-alcclhEsLD.+pElcFp..ssKshcR....+hcDppotthlp..cKphsKtcs+Is+cEYlpRALTLNTMsKDuERGKLcRRAIATPGM.lRGFVhlVEslA+sICEpLcpSGLPVGGNEKKAKLuosVcclhsp.ssuplosTlTGDNoKWNEChsP-saLAMlshIT+DuPpWh+-lhSIAPllFSNKhA+LGcGlhhpsKTp+pcslI+A-sluchc.ctFNEcp+stIcclEshl.p-GsspLosGMhMGM....FNMLSTVLGVSsLuhspcclsspthhWDGLQSSDDFlLFssA+Na-shppsl-cFh+lCKLlGINMSpKKS.YlstTGlFEFTSMFaRcGFVuNhAMELPSFs.suGlNESuDhulGholIKNNMINNsLuPuTAphAL+IFIp-YRaTY+sH.hDoclpsRRhKhLKchhcpppuKDGLLluDGGPslaNl+sLHIPElsLKa-..LMDE-Y+sRlhNPpNPFsu+spIE..cpsslFcAHGPl.+shEp-AVuoTHSacT+RNRolLNTcpRshlt-EQpYQKsCNlFEcsFsSuohRsPlG.tShhEAhtcRLc....cu+LctEuGRlpc-Ea-c ............................................MDV.NPTLL.FLK...VPAQNAISTTFPYTGDPPYSHGTGTGYTMDTVNRTHQYSE.KGKWTTNTETGAPQL.NPIDGPLPEDN..EPSGYAQTDCVLEAMAFLEESH.PGIFENSCLETMEVVQQTRVDKLTQGRQTYDWTLNRNQPAATALANTIEVFRSNGLTANESGRLIDFLKDVMESMD.KEEhEIT..THFQRKR.....RVRDNMTKKMVT...QR.T..I...GKKKQRLNKRuYLIR.....ALTLNTMTKDAERGKLKRRAIATPGMQIRGFVYFVETLARSICEK..LE..QSGLPVGGNEKKAKLANVVRKMMTNSQDTELSFTITGDNTKWNEN..QNPRMFLAMIT.....Y.............ITR........N..QPEWFRNlLSIAPIMFSNKMARLGKGYMFESKpMKLRTQIPAEMLAsIDLKYFNEST+KKIEKIR...PLL.IDGTASLSPGMMMGM....FNMLSTVLGVSILNLGQK+YTKTTYWWDGLQSSDDFALIVNAPNHEGIQAGVDRFYRTCKLVGINMSKKKS.YIN+TGTFEFTSFFYRYGFVANFSMELPSFG.VSGINESADMSIGVTVIKNNMINNDLGPATAQMALQLFIKDYRYTYRCHRGDTQIQTRRSFELKKLW-QT.pSKAGLLVSDGGPNLY...NIRNLHIPEVCLKWE.....LMDEDYpGRLCNPLNPFVSHKEIESVNNAVVMPAHGPA.KSMEYDAVATTHSWIPKRNRSILNTSQRGILED.E.QMYQKCCNLFEKFFPS....S....SYRRPVGISSMVEAMVSRAR...ID.A.RIDFESGRIKKEEFu....................... 0 0 0 0 +2585 PF00604 Flu_PB2 Influenza RNA-dependent RNA polymerase subunit PB2 Bateman A anon Bateman A Family PB2 can bind 5' end cap structure of RNA [1]. 25.00 25.00 37.80 37.70 17.90 16.50 hmmbuild -o /dev/null HMM SEED 759 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -13.23 0.70 -6.68 6 18433 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 18053 21 0 9733 0 710.70 92 99.89 CHANGED MERIKELRsLMSQSRTRpILTpTTVDHMAIIKKYTSGRQEKNPuLRMKWMMAMKYPITADKRIhEhIPERNEQGQT.LWS+TsDAGS.DRVMVSPLAVTWWNRNGPsusThHYPK.VYKTYFEKVERLKcGTFGPVHFRNQlKIRRRVDlNPGHADLSuKEAQDVIMEVVFPNEVGAplLTSESQLpITKEKKEELQ-CKIuPLMVAYMLERELVRKTRFLPVAGGTSSVYIEVLHLTQGsCWEQhYsPGGEVRNDDlDQSLIIAARsIVRRAsVSs.DPLuSLLEMCHST..QIGGs..RMVDIL+QNPTEEQAVDICKAAMGL+ISSSFSFGGFTFKRTSGSSVKREEElLTGNLQTLKIclHEGYEEFThVGK+ATAILRKATRRLlQLIlSG+DEQSIAEAIIVAMVFSQ-DCMIKAVRGDLNFlN...RANQRLNPMHQLLRHFQK...DAKlLFQNWGIEpIDNlMGMhGILPDMTPSTEhSLRGVRlSKhGVDEYSSTERlVVSIDRFLRVRDQpGNVLLSPEEVSETQGTEKLTITYSSSMMWElNGsESlLVNTYQWII+NWET.....VKIQWSQDPThLYNKhEFEPFQSLlPKAtRGQYSGFVRTLFQQMRDVLGTFDTsQIIKLLPFAAAPPKQSRM..QFSSLTVNVRGSGMRIL.VRGNSPVFNYNKsTKRLTlLGKDAGsLscDPDEGTs.GlESAVLRGFLILGKED+RYGPALSIsELusLAKGEKANVLIGQGDVVLVMKRKRsSSILTDSQTATKRIRMAlN ....................MERIKELRsLMSQSRTREILTKTTVDHMAIIKKYTSGRQEKNPALRMKWMMAMKYPITADKRIhEMIPERNEQGQT.LWSKTNDAGS.DRVMVSPLAVTWW..NRNGPTTSTVHYPK.VYKTYFEKVERLKHGTFGP..VHFRNQVKIRRRVDlNP.GHADLSAKEAQDVIMEVVFPNEVGARIL...TSESQLTITKEKKEELQDCKIA.PLMV..AYMLERELV.RKTRFLPVAGGTSSVYIEVLHLTQGTCWEQMYTPGGEVRNDDVDQSLIIAARNIVRRAsVSA.DPLASLLEMCHST..QIGGl..RMVDILRQNPTEEQAVDICKAAMGLRISSSFS.FGGFTFKRTSGSSVK+EEEVLTGNLQTLKIRVHEGYEEFTMVGRRATAILRKATRRLIQLIVSGRDEQSIAEAIIVAMVFSQEDCMIKAVRGDLNFVN...RANQRLNPMHQLLRHFQK...DAKVLFQNWGIEsIDNVMGMIGILPDMTPSTEMSLRGIRVSKMGVDEYSSTERVVVSIDRFLRVRDQRGNVLLSPEEVSETQGTEKLTITYSSSMMWEINGPESVLVNTYQWIIRNWEs.....VKIQWSQDPTMLYNKMEFEPFQSLVPKAsRuQYSGFVRTLFQQMRDVLGTFDTVQIIKLLPFAAAPPEQSRM..QFSSLTVNVRGSGMRIL.VRGNSPVFNYNKATKRLTV.LGKDAG..ALTEDPDEGTu.GVESAVLRGFLILG.KEDKRYGPALSINELSNLAKGEKANVLIGQGDVVLVMKRKRDSSILTDSQTATKRIRMAIN........................................................................... 0 0 0 0 +2586 PF03069 FmdA_AmdA Acetamidase/Formamidase family Bateman A anon Pfam-B_2541 (release 6.4) Family This family includes amidohydrolases of formamide EC:3.5.1.49 and acetamide. Swiss:Q50228 forms a homotrimer suggesting all the members of this family also do. 19.50 19.50 20.10 19.50 18.90 19.40 hmmbuild -o /dev/null HMM SEED 369 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.13 0.70 -5.76 8 1881 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 808 40 759 1832 361 191.80 18 81.87 CHANGED sllslD+sK..hcpss.hHNRWHP-lsssApV+PGEpl+lEshDAhGGQIpss-u...usDlcslDLoplH.LoGPltVcG.....AcPGDlLhV-IhDh.slc.......sct...GasGhFs+tsGuGFLsD+..ascstKslW-acGhassScplPGVRaPGhsasGVIG.sAPSc-lLsphscRE.t.ltpss..s..hs..Ppsp.th...........tlAsEuhRTlPsR.-sGGNhDlKslo+GS+lahPVFVEGAhLShGDlHaoQGDGElshs.AIEMuGplsl+lclIKsG.lcphslcs........Pha..u.l.cPpap..calhhpGluVD-uh.+pthhssshAh+puhLNsIsahc+aGYsstQshlllSsssspu....hVs.ssusssstlP.stIFpps ............................................................................tt..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sG..D...h..Gs.hhh.l...su.h.hGs.+h..u.uc.s....sh-..........h.h..h..................................................................................................................................................................................................................................s................................................................................................................... 0 226 478 623 +2587 PF01070 FMN_dh FMN-dependent dehydrogenase Finn RD, Bateman A anon Pfam-B_829 (release 3.0) Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.68 0.70 -5.61 180 7189 2012-10-03 05:58:16 2003-04-07 12:59:11 13 28 3457 189 2186 12550 8912 259.10 23 86.79 CHANGED A.+cpLPth..sasYlsuGAssEhThpp.NpsAFpchpltPRsL..psls...ph..chs..sslh.....GpphshPlhlAPsGhptLhas.c..GEl....shA.+AAsptGl..shslSohusssl.....E-l......up.s..............ss........ssh..WFQLYh...pDcshsppllcR.Ac..sAGhcuLllTVDsPshGpRc..p-hRssh....th....P.th.......hthh.phh.ts..............................tt................................................................tssth.tthht..h...p.....ss.lsW.cc.lphLRp..t.h..shPlllKGl...hss-DAttAhchG..sD.GIlVSNHGGRQ...........................LDuususl-sLPpl.sp..sl.s...............................sc.....htlhlDGGlRpGsDllKALALGAcuVhlGRshlaGLA.ssGptGVp+slclLpsElctsMsLhGssslscl...st.sh.lpt ...............................................................................................................................................................................................................................................................................................................................ts.hp.ph.hh.phl........hs.h.p...p.h........s..h..p..sph....h.................s........p..h........t..hPhh..hssh.s.....h......t...t....h......ht...........p....hph...........thAp....s.st..t..h.....s..h....h......hh.u......o.s.....h..h.........p..h..........................................................................h.h...h...................t..................th.h.........tt...h.............t...........h........s.l...h..p.h.s...........................................................................................................................................................................................................................................hsh....pp..lt..ltp................h......t.....h.....P.hll..K...t.....l...................hs.......c...s........s.t............h.....h....p.......h....G.......s....p.....u.l......l.......S....s.....+....G.....G......p....p........................................................................h...D..h.....s.........s....s..............h......p...s........L..........t.l...hp....h...t..........................................................tp........h.l.hh.s.uG.lRpG.Dl.h+....sl.A.L.G.Ac....hshlu.Rshl....h.ul............t....h.t.............G................t............u..l.....p.h...l.p.h...h.t...t...-hchhMthhGspslt..plp......h................................................................................ 0 652 1304 1811 +2588 PF00743 FMO-like Flavin-binding monooxygenase-like Bateman A anon Pfam-B_437 (release 2.1) Family This family includes FMO proteins, cyclohexanone mono-oxygenase and a number of different mono-oxygenases. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 532 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.62 0.70 -6.31 5 2951 2012-10-10 17:06:42 2003-04-07 12:59:11 14 45 679 107 1914 7810 2768 277.00 19 77.52 CHANGED AKKVAVIGAGVSGLuSIKCCLEEGLEPTCFERS-DIGGLWRFoEssEEGRASIYKSVloNoSKEMSCFSDFPFPEDYPNFMHNSKlLEYl+hFAK+FDLLKYIQFKTTVCSVsK+PDFSoSGQWEVVTE+EGKpcSAVFDAVMVCTGHHlNPHLPLESFPGIc+FKGQYFHSR-YKcP-uFpGKRVLVIGlGNSGuDIAVELS+TAcQVFLSTRsGSWVlSRluDsGYPWDMllsTRFsoFL+NlLPoslSsWlhE+QlNcRFNHENYGLpPcc+shtKEPllNDELPuRILsGtVpVKssVKEFTETSAIFEDGTlEEsIDlVIFATGYTFuFPFLEESlVKlccNclSLYKtVFPPpLEKPTLAIIGLIQPLGSIlPTsELQARWAsRVFKGLC+LPSppcMMpEIsc+pEc+hKhFG.spocslQoDYIsYMDELAsaIGAKPNLhSLhLTDP+LAlcVFFGPCoPYQYRLsGPGKW-GARNAILTQWDRoLKPLKTRlVpcSssPsuuF.hLKlFulslLLlAlFLlht ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s................t.....h.........................h.......................................................t..............................................a.....................t.......................h......tY..h.t..h..s.....t.......t.....h.....t.....h......................h.......h....t....t.........l..............................h..................................................................................................................................................................h..............................l..................................................................t...............................................................................h................p......h........l...........h..........h..s....s......G...........................s..............h........P........................................h..............s.....................t.....a....................s....t.........h..H.................u........................a........p...........t......s........t...................h..........t........s.....p..........p..........lh..ll.G....u..S.u.....-...l....s........p...h.......................s.....t.....t.....................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......h....................t.....t.......................................................D.....lhhsT.....Ga.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................thh................................................................................................................................................................................................................................................... 0 525 1056 1588 +2589 PF00039 fn1 Fibronectin type I domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.70 20.70 20.80 21.30 20.50 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.42 0.72 -4.15 20 1772 2009-01-15 18:05:59 2003-04-07 12:59:11 13 57 52 70 393 1849 0 38.10 42 17.20 CHANGED ChD.posspYplG.-pWpR.tp..Gthhp.CpChGtGpGchpC ..ChD.s..GspY.pl.G.-pWc+.tp..G.hhhp.CTCh.Gs..G.pGcapC..... 0 20 53 138 +2590 PF00040 fn2 Fibronectin type II domain Sonnhammer ELL anon Prosite Domain \N 22.20 22.20 23.60 22.60 22.10 22.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.60 0.72 -3.95 111 1582 2009-01-15 18:05:59 2003-04-07 12:59:11 14 126 87 45 677 1460 22 41.30 49 6.08 CHANGED ChFPFhapGppYpsCTscGcs.tthW..CuTT.ssYDpDpcWua..C ...ChFPFhapGcpYpsC...Ts...cG.R......p.D.....uhhW..CuTT.tsYD...p...D...p...+...aGFC................. 0 92 123 281 +2591 PF00041 fn3 Fibronectin type III domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.73 0.72 -3.85 106 58087 2012-10-03 16:25:20 2003-04-07 12:59:11 16 3078 1986 274 27544 50190 2367 84.30 20 23.88 CHANGED ssP.pslphpsh...sssslplsWpss.....suslssYplph....ptsssp...........hpphs...lsss....p...phslpsL.pPsspYphpVt.Ahsst.....t.S ....................................................P.tsl..p..h..p..sh......ss.s.....o...l..p..l......s...W.....p..s.........P................t..................s..........u...........t........l............p...........s...........Y..h.......l...p......h......ppt.sstt...........................................htp.h.s............h.s..s.s..................pp....p..h.p.....l.....s......s.....L...p.........s...s..........s.p.Y.....p....h.p..Vt..A.h.st.tG...u............................................. 0 7000 9544 17225 +2592 PF02986 Fn_bind Fibronectin binding repeat Griffiths-Jones SR, Schwarz-Linek U anon Pfam-B_2661 (release 6.4) Repeat The ability of bacteria to bind fibronectin is thought to enable the colonisation of wound tissue and blood clots. The fibronectin binding repeat is found in bacterial fibronectin binding proteins and serum opacity factor. Bacterial fibronectin binding proteins are surface proteins that covalently link to the bacterial cell wall, mediate adherence of the bacteria to host cells [2] and trigger the fibronectin/integrin-mediated uptake of bacteria by host cells [3]. Each fibronectin binding repeat is an array of short motifs that bind to fibronectin type I domains [4]. Fibronectin binding repeats are natively unfolded in the absence of fibronectin and are thought to adopt a well-defined conformation (tandem beta-zipper) upon binding [5]. 21.20 21.20 21.40 21.40 19.50 21.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.85 0.72 -4.44 34 1694 2009-01-15 18:05:59 2003-04-07 12:59:11 9 50 190 4 23 1338 0 37.00 43 15.06 CHANGED pslDhsE..DT.....suhS..Gpssss...oh.EDT+...Pp..hh.hGGp ......N.VDh-.Do....lPplp...Gp..Ncss..pshEEDTpt..sc..h...GG........... 0 6 6 14 +2593 PF03274 Foamy_BEL Foamy virus BEL 1/2 protein Mifsud W anon Pfam-B_4337 (release 6.5) Family \N 21.50 21.50 21.70 54.70 20.20 21.40 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.79 0.70 -5.12 6 21 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 8 0 0 35 0 218.60 34 59.84 CHANGED hshsEhE.slu..ssEN....Ptc.+hhpp..pssst-spsVoYH.uYK-pEDpps.cI.KhcDWlPsP-cM.SKplCppLILssLYsupKAtEllp..................hsasVpW..EQScssPshFtl...pYpChhCpslha-PMPlha....DscsclWhKht.LRuslGSlVashc+Hhpp...Cpu.VcP.pp..pGps.......t+PRhRssPshRa.+hhtEasssR.+coK.hlsppspuHp......SsGDshAhts ...................shsEhE.slu..stpN....Ptcs+hhpp......tcspploaH.uY+EhE-pps.pl.KLpDWlPsP-cM.SKpls...........pp.lhshLhps.Kst-.lp..................hPhsssW..pQscssPshFtl....Ypshhspshha-Ph.hha....DPpschahthphLhsslGplshphaKphps...Cpu.lcP.ss..pups.........scPRs+ssPsLpa.+hhhctshsRp+cp+.hlh+hspuHc.ss...SsuDhhAhp........... 0 0 0 0 +2594 PF03408 Foamy_virus_ENV Foamy virus envelope protein Finn RD anon Pfam-B_4411 (release 6.6) Family Expression of the envelope (Env) glycoprotein is essential for viral particle egress. This feature is unique to the Spumavirinae, a subclass of the Retroviridae. 20.10 20.10 20.40 24.00 20.00 20.00 hmmbuild -o /dev/null HMM SEED 981 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.94 0.70 -13.61 0.70 -7.02 7 89 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 14 0 0 86 1 583.90 52 99.33 CHANGED MTLpQWl.W+thppspphLpshssl..pphp.slh-hpt-EhlPh+h.RhKYhhYsCCATSTRlhsWlhhlClLL.lVhlSshlTlhRlQWppsIpV.GPVlsWNlopptshpshpst+luRphRs.H.t.p.VpVNhTSIPQGVaapPHPcPIIhKERVLGLSQVlMIsS-sIApstNLspEsKsLLs-MINEEhpsLusshlsFElPLGDP+sQ-QYIH++CaQcFAHCYlV+Ytss+sWPocplItDQCPLPsh.ass.tYphQsIWDYYl....htPpsWsscsh..YGp....ARlGuaYlPp..hppshoHVlFCSDQLYucWYshppo.pppEcLhhpKLhN..Lspts.upLKcRALPssWsspGputLFR-lNsLDhCs+PEAVhLLNpoYYsaSLWEGDCshpppsIoph.spC+sa.pp...pphHPYACRFWR....pp.tp-EhKChssEp..+CLYaPpaDosEtt.DFGaLAY.ssFPSPICIcppslp-.cYcV.SLYtEChppucpYGIcsVl.tLcshLs.sGhsls-hPsuRAFssLss.paPsoY.NhTpp.pppuC....s+RpRRSl..sNac+LpohGhuLssAlpTLSpISDlNDEpLtpGlaLLRDHlVTLMEAsLHDISlhEuMhulQHlHTHLspLKshLLtpRIDWoaIpSsWIQpQLQto--.MKlIRRTARSlsYhVpQTpsosTuTuWEIGIYYEllIPK+IYLsNWplhNlGHLl+sAGaLT+VplpHPYEIlNpECppppYLHLE-ChcQDYlIC-.V..VpPCGNsTs.SDCPVhActlKsPYlplpPLKNGSYLVLoSpTDCuIPsYlPslVTVN-TlcCFGhpFK+PLhuEp+s.sapPplPpLcLRLPHLlGIIAKlKulcIEVTST.EsIKDQIcRAKAELLRLDlHEGDaPsWlpQLusATcDVWPAAAsslpuIGNFLussApGIFGTsFSlLuYsKPlLIGlslILLllLlhKIlSWLss..c+K+p ....MTLQQWLlW+..........KMs-AHsALpNsooLTEEQKpQlIlEIQ...pEEV.l..PT+MDRlKYLsYsCCATuTRVMsWlhLIClLLIIVhVSCFVTVuRIQWN+DIsVhGPVIDWNVT.pp.AsY.QpLpssRloRSLRspHPc.pYlplNMoSIPQGVhYsPHPEPIIlKERVLGlSQVLMINSENIANsANLoQEsKhLLs-MINEELpsLSspMIcFELPLGDPRDQpQYIH+KCYQEFAHCYLVKYKp.P.psW.o-slIsDQCPLPGh.HssshYcYQsIWDYYlphppIRPpsWTocoa..YGs....ARhGSFYlPp.hRpssloHVlFCSDQLYGKWYNlpNslpENEpLL+oKLhN..LTsh..SpLKsRALPppWsspGpucLFRshNsLDlCN+PEAVLLLNoTYaoaSLWEGDCsaTps.Ipph.sEC+p.s+h...chhHPYACRFWR...aKp.spEEVKChssEc.c+CLYYscYsSPEupaDFGFLuYLsAFPu.hCIEspslR-s-YEVYSLYhEChNuAcpaGIDoVLhuLKoFLNaTGsPVNEMssARAFlGLoDPKFPPsYPNlT+E..p+uC..ps.pR++RS...sNlcKL+SM............................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +2595 PF03623 Focal_AT Focal adhesion targeting region Bateman A anon [1] Domain Focal adhesion kinase (FAK) is a tyrosine kinase found in focal adhesions, intracellular signaling complexes that are formed following engagement of the extracellular matrix by integrins. The C-terminal 'focal adhesion targeting' (FAT) region is necessary and sufficient for localising FAK to focal adhesions. The crystal structure of FAT shows it forms a four-helix bundle that resembles those found in two other proteins involved in cell adhesion, alpha-catenin and vinculin [1]. The binding of FAT to the focal adhesion protein, paxillin, requires the integrity of the helical bundle, whereas binding to another focal adhesion protein, talin, does not. 21.20 21.20 21.70 21.30 20.80 19.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.62 0.71 -4.69 7 286 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 84 30 140 236 0 126.30 54 14.58 CHANGED TtsLDRosDhVYpsshslVKuVhpLpstlpphssp-YlshVKsVGlsLRsLlsoVDclhssLPups+pEIEhspKlLsKDhuELIutMRLAQQ.uhToLcp-h++pMLoAAHsLAhDAKNLLDsVDpARl+hphhh..Pt ................TAsLDRosDpVYpsVsslVKAVlphssclp.us..PEp.YVs.hVKpVGLsLRsLluoVD-hlPhL.P....uSo.++E.I...........EMA..p..KLLN.pDLuELIsKM.+LAQ.Q...ash.TSLp.pE....YKKpMLoAAHsLAlDAKNLLDslDQARl+h.h.....t.................. 0 58 67 101 +2596 PF02980 FokI_C Restriction endonuclease FokI, catalytic domain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 104.30 103.10 19.40 19.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.71 0.71 -4.53 4 21 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 21 3 3 25 0 139.00 41 24.27 CHANGED upEc-lLhpAlLuYPPAspILoLLt-t.GpahTKF-LGcpLGFhGEpGFTShPpsIhlcsLAsup.st-K...pKIKosaEGoSDKYARhIusWLcplGLVpptsKpVhh.ThtpRKap..lupsa.ITu.GlpsLpcspGpoRas ....t.p-pclLpcAlLuYPPAsplLsLLsst.upthTKFpLGcpLGFhGEtGFTShsp-lhlpsLspAp.sp-K...pKI+SshEGTSDKYARhIsuWLhplGLVpppsKclsh.shssccap..shs.psYpITs+GlpAL+pspGpS+a.s....... 0 1 1 3 +2597 PF02981 FokI_N Restriction endonuclease FokI, recognition domain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 29.00 27.50 22.90 19.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.70 0.71 -4.58 3 21 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 21 3 3 25 0 143.20 34 25.01 CHANGED lhloEsS+lR..TFGWVQDPS.DF+sLs+VVuIFDcsSKlHpELtsp+IPsLVcEpclRpELlullNQ+PLplTYK-LVGTuho.RScAcCNuIVQAsl..pGQ.sRsaIsDWuADNFVRWAHALGFL+YtppuDoFsITElGlAhuKutD ..............chR..TFGWlQssS.shppLKKVVslFsssSchappLhcshlt.pllp-tsh+pcLhscLsss..shphoYhcLsGss.......h.......p...........RocuhssuLlQAsl.....puQ..u.....+tahcDWsADuFLRWAVulsFlcaspcsDTFsITchGhphsp................ 0 1 1 3 +2598 PF01770 Folate_carrier Reduced folate carrier Bashton M, Bateman A anon Pfam-B_1123 (release 4.2) Family The reduced folate carrier (a transmembrane glycoprotein) transports reduced folate into mammalian cells via the carrier mediated mechanism (as opposed to the receptor mediated mechanism) it also transports cytotoxic folate analogues used in chemotherapy [1], such as methotrexate (MTX). Mammalian cells have an absolute requirement for exogenous folates which are needed for growth, and biosynthesis of macromolecules [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.32 0.70 -5.96 24 433 2012-10-03 03:33:39 2003-04-07 12:59:11 13 6 105 0 249 620 26 306.20 34 82.78 CHANGED psWhh.ohlLChaGFhpphRPuEPFlssYLhGs.+NlTp-QlssplaPVhTYSYLAhLlsVFLlTDaLRYKPVIlLpuluhlssahlLlaspulhshQhhEhFYGlspAuElAYaoYIYuhVs......ppYQ+lTuYoRuuhLlGhhhuulLuQlLVo....hsthsahpLNhISLushsluLhhuhhLPtsp+SlaFppp..............................ppscspphppsppps......hpt.t..shpt...psl....tthhpchtssYss.pllhWSLWWAhuouGahQllsYlQlLWcpl...sspssplYNGuV-AsuTLLGAlsuhusGal..phpashautLsLulhSslpuGhlhlhshop...sIWlsYsuYllF+uhYhFlITIAshpIAssLsh-paALVFGlNTFlALhLQTlLThlVlD.ppGL..sLslpsQFhV ....................................................................W.h.shhLCh.aGhhtth+P..u.Esalh.aL.h.t....p...NhTtpp..l..p...........plhPhhoYS..als.hL..h...lF..l...hTDhl.RYK........Plllhpu.hu..hlh.h.ahh..Llhs..ps..l...hthQh...............hphha................u..........hh....h.A....s.c.l....A.Y...u.Y.Ia.u....h..V.p........................th..Yp.+.h..s....u.a.s.R.u.s.h....Lhu.hhhuul....lu.Q.lhls.....ht....hs.hh...L..thlo.....l.s.....h....uh..h.h.u.h.h......L............sppo...h......aa...pt................................................................................................h....hh.........h.hhtc..h..ht.s.h.pp....lhhWSlWWsh.s..ssGa..lh.....................YhphLWp.h..........pt..t.haNG.ss-A.h.shh........uu.h..ss.hhsuh..l..php.h.sh.h.u.....h.hlshho....h..h..u.s..lhh.h.t.....st.......sI....ahsYss...al.lat...Y.h...h.l...sl........A......hp...lA...........sLs......-.huLlFGhNTFhAhhlpollThl.Vss.ttuh...sL.l..Qah............................................................................................................... 0 72 94 174 +2599 PF03024 Folate_rec Folate receptor family Bateman A anon Pfam-B_1966 (release 6.4) Family This family includes the folate receptor which binds to folate and reduced folic acid derivatives and mediates delivery of 5-methyltetrahydrofolate to the interior of cells. These proteins are attached to the membrane by a GPI-anchor. The proteins contain 16 conserved cysteines that form eight disulphide bridges. 29.50 29.50 30.30 29.50 29.10 28.90 hmmbuild --amino -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.94 0.71 -12.36 0.71 -4.63 34 433 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 99 11 239 412 3 158.80 26 42.97 CHANGED hCh...........su+paKshPusEspLt.p..........Cssapcp............................uCCssspopphpps.s.hl.ph.hp+CGp...hospCccahhp.pChYcCSPplu.W.............................p...ppth.slP.LCp-.CcpWacsC+ssh...TCsssW.t...athspst.ppC..tst..Chsappha.ssss-LC...................cplaupuaphustt..................uspClp ...................................................Chstp..pp.stsc..tpLh............Cs.appp.................................uC....Cstspspphttp.t........hh..............h.......up...........hp.stCpcahhphtC.hcCSPphu.h....p.....................tp..pphhhslP.LCp....-hCppaap...sC+ssh.......ps....tssh....................pps...h...p.C........t.....C..h.p.hh.....sssshC........pph.hu.s.hthss.....................sthCh................................................................ 0 66 87 135 +2600 PF02152 FolB Dihydroneopterin aldolase Bateman A anon PSI-BLAST P31055 Domain This enzyme EC:4.1.2.25 catalyses the conversion of 7,8-dihydroneopterin to 6-hydroxymethyl-7,8-dihydropterin in the biosynthetic pathway of tetrahydrofolate. 24.10 24.10 24.10 24.40 23.60 23.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.15 0.71 -3.91 129 4525 2012-10-01 20:59:24 2003-04-07 12:59:11 13 22 3833 60 1029 2777 2126 111.20 28 72.22 CHANGED lhlcsLchhuhlGlhstE+thsQphhlDlpl....thchp...tssts.Dclppol....sYsplsctl.tphsp.ppp...apLlEsLAcpluptlhpp...........................as....lpt......lp.lplpKPss..ls.tsss.......lulplpR ................lhlcsLchhshhGlhstEpphtQchllDlpl.........thD...hp.........pAupo...D-.ls.c.s.l........s.Y.upl.scpl.tphlp...s.pp...........hpL..lEplApclAchlhpc................................as.....lpt..........lclclpK.PpA..ls..thcs.VuVplpR................................................. 0 309 627 864 +2601 PF00250 Fork_head FKH; Fork head domain Finn RD anon Prosite Domain \N 20.70 20.70 20.70 21.00 20.50 20.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.24 0.72 -3.94 21 4445 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 545 35 2306 4173 4 92.50 42 20.59 CHANGED KPP.....................YSYhuLIsMAIpp.uPsKhLTLupIYpaIh-pFPYYRpNppuWQNSIRHNLSLNcCFlKVPR......ps-cPGKGuaWpLcPsutshF.s................Gsah+Rc+R ........................................KPP.......aSYss.LIsh.A...Ipp....o.sp+pLTLs.pIYpa..Ihcp..FP..Y..a.....R..........p......s.....p......t.....u......W............p..............NS.IRHNL..SLN.c.C.FlK..Vs..+................t.....s....p.....s...G..........K................G..s..a.Wsl..c...Pss.t....p.h...h.t.......t............................................................... 1 603 883 1588 +2602 PF01226 Form_Nir_trans Formate/nitrite transporter Finn RD, Bateman A anon Prosite Family \N 28.20 28.20 28.60 28.50 28.00 28.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.70 0.70 -5.48 190 4680 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 2481 80 683 2429 127 247.40 28 87.19 CHANGED stphhpthhps.uhpK...hptshhphhltuhhAGhhIuhushhthhlsssh.................s...........uhspllsuhs.FslGLlllllsGu-LFTuNshhhshuhhpc+..loh.tpll+sWsllalGNhlGulhhuhlhshs...shht...........ts........thstthhplAp......tK..h......sh.s.............hhpsFh+GIhCNaLVsLuVWhsh.uu+sh..huKlhslhhslhsFlssGFEHsVANMhhlshu.......lhh.us..........................s.hhshhhpNllPVslGNllGGslhluhhhahha ..............................hthh.pt.u.tc...hp...ps.h.ph..hlhuhhAGhalul..uhlhhhshssth.................ss................uhsp.Lls.uhs.FslGLllllls.Gu-LFTusshhh....sluhhp..pp.......loh..sp..hh.......p.ahh.........shlGNllGulhhuhlhths..shht...............s.......................................thspt.hh.pl.up.tK.h................pp..s.....................................h.hphhhpGIhsNhhVslAlWhsh..t..s.c.ss..hsKhhshhhslhhFlhsGaEHs........lANhhhhshu...lhh..sp.................................s.hhsh..lhpNllsshlGNllGGulhhGLsYahh................................ 0 205 396 569 +2603 PF02971 FTCD formiminotr; Formiminotransferase domain Griffiths-Jones SR anon Structural domain Domain \N 24.00 24.00 25.40 26.60 22.80 23.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.74 0.71 -4.56 4 394 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 349 6 124 337 180 124.30 43 33.96 CHANGED FLIAFNINLLuT+..E.AHRIALslREQGRGtDQPGpLKKVQuIGWYL-E+NlAQVSTNLLDFEsTsLHsVaEEsChEApcLuLPVVGSQLVGLVPLKALLDAAuFYhcKEpLFlLp-E++I+LVVsRLGLDSLsPFpP+ERIIEYL ...............LlAaNlNL...s.T.s..l-IAccIA.KtlRtpu.......................................................................GGh+alKAlGl.Lc.......-+.....slsQVShNlsDap+TslaRsFEpl+hEAc.RaGVsVlGSE....llGLlPhcALlDsAcYYL.p....h.E.s.h.......................................................................... 0 62 84 104 +2604 PF02911 Formyl_trans_C formyl_trans_C; Formyl transferase, C-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -3.92 130 5755 2009-01-15 18:05:59 2003-04-07 12:59:11 13 28 4563 24 1384 4263 2359 99.70 28 26.51 CHANGED +lspp-stI-W.spsAppltphlRuh...s.PaP.uAas...hh....ss.......p....pl..........+lhcs.......phh.....spttttt.......................PG...p.....llphspp.slh.lssu.sus......ltl.ppl..Qhtuc+hhsspshhp....Ghp ....................pls+--ucI.D.W...spsApplppplRuh........s..PaP..sAas....hh..............ss..........p...........pl...................................Klapu.....................phh...........ssss.stt.....................................................PG....p.......llp....s...s....cp....slh..lAsu..sGs...............Lpl..tpl..Q...sGK+.t.h.sstsahpGh.t......................................... 0 435 862 1141 +2605 PF00551 Formyl_trans_N formyl_transf; Formyl transferase Bateman A anon SCOP Domain This family includes the following members. Glycinamide ribonucleotide transformylase catalyses the third step in de novo purine biosynthesis, the transfer of a formyl group to 5'-phosphoribosylglycinamide. Formyltetrahydrofolate deformylase produces formate from formyl- tetrahydrofolate. Methionyl-tRNA formyltransferase transfers a formyl group onto the amino terminus of the acyl moiety of the methionyl aminoacyl-tRNA. Inclusion of the following members is supported by PSI-blast. HOXX_BRAJA (P31907) contains a related domain of unknown function. PRTH_PORGI (P46071) contains a related domain of unknown function. Y09P_MYCTU (Q50721) contains a related domain of unknown function. 20.80 20.80 20.90 20.80 20.60 20.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.94 0.71 -4.74 15 13471 2009-01-15 18:05:59 2003-04-07 12:59:11 14 85 4844 104 3486 9789 6453 174.10 26 55.92 CHANGED h+lsllhSGsu.oshpsl.lsth+psstp...spllhVlos+spstGhp+utpsslsptlhpcps...................apsc.tthcpcltctlcthpsDllllAuah+lLssshl.pthss+lLNlHPSLLPpa.....cG.ssslppAlhsGscc.....sGsTlaals.-plDsGsIlhQpps.lhs.s-ossslppRlp-tEt.pshscsl .........................................................................................................+lhhh...Gp...s....p.....h..t..sl....l.p...t.hh.ts.th.............ls..s..Vh..o..p....s.c...s...h..s....h...t.t......s..h.p.....h..s......s..t.h.h....h.....t.ps..............................................h.psp...t..h.c.p...p....h...h....p...t...l...........p.....p...........h.......p...s.....D.....l.l.VlA.u.Y.h.p..l..Ls.sp.ll....p.........t..h..............t.....t.............t......h..l.NlHs...S..LLP........pa...................+.G...usP.h......pp...A...l........s....G.s.c.............................sGsTlH.....h.l...........s...p...........t..lDsG.s.Ilt.Q.t........t....l....s...l........pt...sDost...s.Lh.p.+l.t.p.h.tt.pl.lsps.............................................................. 0 1084 2180 2908 +2607 PF01491 Frataxin_Cyay Frataxin-like domain Gibson Tj, Bateman A anon Gibson TJ Domain This family contains proteins that have a domain related to the globular C-terminus of Frataxin the protein that is mutated in Friedreich's ataxia. This domain is found in a family of bacterial proteins.\ The function of this domain is currently unknown. It has been suggested that this family is involved in iron transport. 21.30 21.30 21.80 21.80 21.10 21.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.65 0.72 -4.26 96 1466 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1409 24 414 878 102 106.10 41 86.45 CHANGED hs-scapplu-phlpplE-tl-th..-ss.....................hDlDs-hs.uGVLTlpF.s.stuphlIN+QsPh+QlWLAo..SGGaHFcap................s.sp..W.lss+.sGp.phhshLscthsppsups.lpas ...................................Ms-oEFHcLADphh.sIE-plDph..-.uc.....................sD.l.DsEh.s....GG.VLTlsF.....-suS......pIlIN+QpPh+.QlWLA.o+.pGG...aHFchp.....................s..sc..Wl..........ssR...sGp.shaclLpptsoppuGcslph.p.......... 0 105 209 318 +2608 PF03197 FRD2 Bacteriophage FRD2 protein Mifsud W anon Pfam-B_2816 (release 6.5) Family \N 21.60 21.60 21.80 50.80 20.90 18.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.32 0.72 -3.47 6 36 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 32 0 0 28 0 99.60 58 79.79 CHANGED sMVclIc-NGGWFEVK-hpss.DGh.csVs+IpCANGcha.ssG.h-pYFEIpEDEF+CFREY...TsEcDElcccV..oGVTKIHCIVDENNVDEIIELLRKTFKK .sMVclIpENGGWFEVK-htsh.DGa.chVp+IpCANGcha.s..G.h-cYFElsE-EFaCFREYKE.TSEcD-lcDcV..SGVTKIHCIVDENNVDEIIELLRKTFKt.. 0 0 0 0 +2609 PF04422 FrhB_FdhB_N Coenzyme F420 hydrogenase/dehydrogenase, beta subunit N-term Kerrison ND anon DOMO:DM04087; Family Coenzyme F420 hydrogenase (EC:1.12.99.1) reduces the low-potential two-electron acceptor coenzyme F420. This family contains the N termini of F420 hydrogenase and dehydrogenase beta subunits [1], [2]. The N terminus of Methanobacterium formicicum formate dehydrogenase beta chain (EC:1.2.1.2, Swiss:P06130) is also a member of this family [3]. This region is often found in association with the 4Fe-4S binding domain, fer4 (Pfam:PF00037). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.47 0.72 -4.12 116 542 2009-01-15 18:05:59 2003-04-07 12:59:11 8 32 327 0 298 541 181 79.70 26 20.16 CHANGED tchhtApusst..hppuQsGGhVTulhthhL-pGhlDuVlsst....psp...................c.acshPhlssss--lhpsu..Go+as.hsshhphlcc ...........t.thhhApsp.p...hp.tupsGGlVTslhthhL-p.G.hlDuVlssp....psp................................c.a.pshshlspss--llp........ss..Go+as.hsshhphlt................................................ 1 80 189 254 +2610 PF03881 Fructosamin_kin Fructosamine kinase Bateman A anon COG3001 Family This family includes eukaryotic fructosamine-3-kinase enzymes [1]. The family also includes bacterial members that have not been characterised but probably have a similar or identical function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.81 0.70 -5.43 35 2015 2012-10-02 22:05:25 2003-04-07 12:59:11 9 10 1716 3 561 1603 432 272.80 31 95.06 CHANGED MWpuIup.lo-phGpsapIpc+pplsGG-Ispsahls........cuppcaFVKlNp..cshLshFcuEA-uLphLscopolpVPcslshGss.cspu............aLlLEaLshp.hsstcsthphGppLApLHp.......hssptpFGa-hDNhlGsTsQPNsWpccWusFFu-QRIGaQLpLsc-+Gh.p...husl-pllcpltphL................ssHpPpPSLLHGDLWsGNsuhsss.G..........PllaDPAsYaGDREsDLAMoELFGGFPssFYcGYpslaPLs...sGYpcRKslYpLYHlLNHhNLF...GGpYlspApphlcpl.Lpp ......................................................................................................................................................................................................h..ht.ls..G.Gp..lp.t.s...a.t.....lp...............h.stp....p.hFl..K..spp...............tph.....h.s.....h..F.p....u..Es..s.tL....p......h......l....t.....c......s........t.......s.........l..p..........l.Pcl..h......s..s....G.ts...p...s...t.u.....................................a...Ll.h...-........a.....L.........s.......................t.....................s...........s.........ps.........s..........hpL........G.pp.........lA.c...LHp.................................hps.pspF.....G..........a.....c.....hs.....s.....t.....h.G.s.........h.....s........Q....s.....N......s........W........p.....c......c.....W...s................s...FF....u...c...p.R.....l....s.h.........lc..h..s.t...c..+...Gh.h..............hsp.h.c...p....l..h....c....p...l..t.p..t..L...............................t..s...+..p.s.....p...P.SL...L.......HGDLWuGNshhsss..G...............................................Ph...laD....P.A.....s.a..a.GcR............EhD..L.....A......h....h..t..l.....F...........s...s....h.....s...s..p........h...a.c.uY...pp....h.....h.....P....Ls.....ts.a...pc.Rhsl.YpL.Yh...LL.s.+.ht.hF...G..u.t..ahs.tstp.hcplh..t.......................................................................................................................... 1 143 323 454 +2611 PF04961 FTCD_C Formiminotransferase-cyclodeaminase Bateman A anon COG3404 Family Members of this family are thought to be Formiminotransferase- cyclodeaminase enzymes EC:4.3.1.4. This domain is found in the C-terminus of the bifunctional animal members of the family. 21.80 21.80 23.30 23.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.73 0.71 -4.87 75 717 2009-09-11 00:17:51 2003-04-07 12:59:11 7 8 621 6 232 659 206 179.80 33 69.85 CHANGED holp-FlctlAScsPsPGGGSsuAlsuAlGuALssMVupLTl.GKKpYtph-s....phpplhppscphppcLhshlDcDscAFsplhsAa+LPKpTcEE+ttRptslQcAl+pAspVPlplAcpshchlplhptluphGNtsslSDssVushhscAAlpuAhlNVpINLs....sl...cDppahpchppchpplh .............olpcFlctluScsPsPGG...GosuAlsuAlGsALssMVupLTl...GK..+p.Ytp.hcp...................phpplhpp.s....p.plpp....cL....lphlDcDscAFstlhsAaclPK...pT-....EEK.stRppslQ....p.uhcp.AspVPhplsctshcshpl.hpphsppGNpsulSDsuVushhhcuulpuAhlNVhINLs....ul...+D.cpaspphppchpplh...................... 0 116 171 202 +2612 PF01268 FTHFS Formate--tetrahydrofolate ligase Finn RD, Bateman A anon Prosite Family \N 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.70 0.70 -6.53 141 4825 2012-10-05 12:31:08 2003-04-07 12:59:11 14 10 2767 19 740 4276 2739 447.70 51 94.46 CHANGED hhSDIEIAppsp..hcPIpclAp.cl.Gltp--..l-.YG+aKAKlshs.hlc..pl...ps+..............sGKLlLVTAIoPTPuGEGKTTTTlGLupAL.s+l......GKcshlsLREPSLGPsFGlKGGAAGGGYuQVlPME-lNLHFTGDhHAITuApNLLAAhIDN+lapsN.p......................................................LsIDscpIsW+RVlDhNDRuLRpIslGL...Gupt..sGhsREsGFDITVASElMAILCLusD......LpDL+cRlG+IlVuas....hcspPVTAcDLtssGAhssLLKDAI+PNLVQTLEsTPAhlHGGPFANIAHGsNSllAT+hALKLuD...............YVVTEAGFGADLGuEKFhsIKCRhuG.LpPcAlVlVATlRALKhHG....Gl.shss...............LspENl-ALccGh.sNLp+HIcNlpp.aGlPVVVAIN+FsoDT-uElphlcchs..p.ph.Gsp.sslspsWuc..GGcGuh-LActVlcs......pppsu.p.........FchLYssch.slc-KIpsIApclYGAssVphospAccplpphcchG.ascLPlChAKTQYSlScDPslhGsPpGFslslR-l+lusGAGFlVslsGclMTMPGLP+pPAA.sIDls.-sGpIsGLF ...............................................................................................................................................................................................................................................................................thoGDhH...AIsuANNLLAAh.lDNHIaQGN..p.......................................................LsIDs+c...IhW+R............s.lDhNDRtLRplssGL......Guph..............sG..h.sRE................DGFDITVASElMAILCLuss......lpDLKcRLu+IlluYs......h......c.s......p....PVT..spDLpspGAhssLLKDAlKPNLVQTLE..sTPAhlHGGPFANIAHGCNSllAT+hAL+.Lu.D...............YsVTEAG.F.GADLGAEK.FhDI.KCRhuG.lp.PsAVV.......lVATlRAL...KhpG....Gl....s+sp.....................L.sp.ENl-Al.cpGh.sNLt+HlcNl.pp.aGlPsVVAIN.pFs.o....DT-AEl..phlc......chspph....G....sp....sslspsWucG..GcGuhcLAc.pVlchh..........p.p....s....s...................FphlY...c.....c..h....slc-KIctIspc.lYG..A..ssVphss...p......A.p.cpltp....hp....p........G..asphPlshuKT.h.................................................................................................................................. 0 264 454 610 +2613 PF03239 FTR1 Iron permease FTR1 family Bateman A anon Pfam-B_3227 (release 6.5) Family \N 22.50 22.50 22.70 22.50 22.20 22.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.92 0.70 -5.47 13 2689 2012-10-03 03:33:39 2003-04-07 12:59:11 9 11 1826 0 661 2163 330 227.80 21 69.51 CHANGED hhuhhllshREuLEAulllollluhlKp........................................................................stccppt.................t.lahGhhhulhhululGsuhhthhtultt.......hpc-lhEuhhShlAslllohMhhaMt+...up+l+sclctplspslt..................t.paulhlhsF......lsVlREGlEsVLFluuhsttss........ssshlusssGhhsuslluhhlapsuh+lsLptFhhhsoslLhhluAGLhuhu....hhthhchspt.pGhchhthspussshshsh.ushthshhssthsu.....llhslhGahsp..........hhhlhlllhluhhhht..hshthh ............................................................................................................................................................................................................................................................................................................................................................................................................t..h...h....ht..hh..h...h.t......................ht...t..hpt.........th....h..................................................................................................................................................t....t..h......hu....l..h...h...h...s.F.........................lsl...hREGlEs.llF.lhulhttts.............thhhh.hGs.hh.G...lh...s...u....h....l....l...u...h.l..l.h....h....s....s.....h..c....l....s....h.t....t.....h....h..h.hsuh..hl....hhlussLhutu..........hhthhp.t..s....h.......h.....p..h.............t.s........h..h.....p..t..s.h......hh.......................................................................................hh........................................................................ 1 181 389 559 +2614 PF02491 SHS2_FTSA FtsA; SHS2 domain inserted in FTSA Anantharaman V anon Manual Domain FtsA is essential for bacterial cell division, and co-localises to the septal ring with FtsZ. The SHS2 domain is inserted in to the RNAseH fold of FtsA [2], and is involved in protein-protein interaction [1]. 25.40 25.40 25.60 25.60 24.90 25.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.50 0.72 -4.09 50 3487 2012-10-02 11:08:51 2003-04-07 12:59:11 15 8 3448 5 697 1941 882 80.40 34 18.91 CHANGED uGsplpshsspGhls..hpscEVsppDlcRVlcsApshshss...-pcILHslPpcasl.DspcGI.+-PlGMsGsRLcscsHllTs ........................Gtplps.sspGhls........hpscElo.ppDlcpV.lcs.A.p.s.h.s.lss.......-ccll+.llPp-ahl.Ds.p..c.G.I..+-P..lGM.G.l.RLcscsallTs............ 0 222 434 570 +2615 PF01580 FtsK_SpoIIIE FtsK/SpoIIIE family Bashton M, Bateman A anon Pfam-B_458 (release 4.1) Family FtsK has extensive sequence similarity to wide variety of proteins from prokaryotes and plasmids [1], termed the FtsK/SpoIIIE family. This domain contains a putative ATP binding P-loop motif. It is found in the FtsK cell division protein from E. coli Swiss:P46889 and the stage III sporulation protein E SpoIIIE Swiss:P21458 which has roles in regulation of prespore specific gene expression in B. subtilis. A mutation in FtsK causes a temperature sensitive block in cell division and it is involved in peptidoglycan synthesis or modification [1]. The SpoIIIE protein is implicated in intercellular chromosomal DNA transfer [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.18 0.71 -4.85 23 9858 2012-10-05 12:31:08 2003-04-07 12:59:11 13 40 4316 14 1937 10212 3173 196.90 29 27.52 CHANGED pltphhsspthctstsphslshGtph.tspslhh-lschP........HhLlsGuoGSGKSsslpsllhSlhhptpPpcVclhhlDsKhs.cLsshpslsHlhs....slsscsccshpsLctllsEMccRh..plhpphslcslst.....hppphsp..ht...................................................hs.lllllDEhs-Lhhssspc........lpshlsRluph......uR.....usGlHLllAs....................QRsus .............................................................................h...............t...t.....l....lsl..Gpsh...t....spslhh.D..L.s..c.hP............................HlLl..AGs.T....G.SGKSs.h..l.p.s.llhS..l..l..h..p...t...p...P..........c...c.........l....+...h..h.h.....l.D......P..K............h........s....-........L.......s..s.......a.....p........s.......l........P.Hl...hs...................sl.s...s..-...s...c..c....u....s....p..s.Lp...h...h....s...s.E.M...c...c...Rh........clh....s.p..h......s..s.+.s.lst.............asp..t...h..t..p.............................................................................................................................................................................................................................hPhlllllD.E...h.u-.L...h........h...s...s...s...pc.........................lpp...h...l..t+....l.uph...........uR.......us...GlHl.llAT......QRPs........................................................................................................................................................................................................................................................................................... 0 624 1301 1668 +2616 PF04999 FtsL Cell division protein FtsL Bateman A anon COG3116 Family In Escherichia coli, nine gene products are known to be essential for assembly of the division septum. One of these, FtsL, is a bitopic membrane protein whose precise function is not understood. It has been proposed that FtsL interacts with the DivIC protein Pfam:PF04977 [3], however this interaction may be indirect [4]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.04 0.72 -4.27 66 1462 2012-10-02 13:28:50 2003-04-07 12:59:11 8 3 1456 0 307 877 315 93.00 37 83.99 CHANGED Ls...thlhtsl.th.plhll....LhlhllhoAluVlassHpsRphhsp....hpphhpc+-pL-hEWtpLlLEpsshup+uRlEplApccLsM...phPssscplllp. .......................................h.........h...s+lslh....Lhlsll..lo..A.ls..VV..h...s..s..H....p..oR..hLh..s.p.......h-ph....h.hE+....-tL....-h.E.WcpL..lLEcsuLu.c..HS.R.VEplApcc.LpM...ppscsuppsllh.t..................... 0 85 172 247 +2617 PF03799 FtsQ Cell division protein FtsQ Finn RD anon Pfam-B_1605 (release 7.0) Family FtsQ is one of several cell division proteins. FtsQ interacts with other Fts proteins, reviewed in [1]. The precise function of FtsQ is unknown. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.26 0.71 -10.49 0.71 -3.91 180 3219 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 3193 6 616 2021 863 117.40 20 39.22 CHANGED lAhWpp..s.t..tllsppGplhts..................phs..t.s.sLPhlhG........ssspp.h.......hphhpph.th.ss.lthplsslphsspps....WplhL...s.....s.Ghp.lhLsps.......s..htp...............+lpphsplhtph............t.......pp.........lth..lDlR .........................................................lAhhps..p....thl.spsGpshss...................shs...phs..shP..h..l..h.G....................................pss.p.p.l...............hphh.pp..h.s....ph..sp....l....p.....hp..lpplshsspcs.............hpLhh.........s.............s...Gh...p..lhlsps................s....htc.................+.lthas.pl..h.pl..............t...tp.........lth.lDh.................................... 1 173 372 497 +2618 PF01098 FTSW_RODA_SPOVE Cell cycle protein Finn RD, Bateman A anon Prosite Family This entry includes the following members; FtsW, RodA, SpoVE 20.10 20.10 20.40 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.25 0.70 -5.67 15 10114 2012-10-02 17:14:55 2003-04-07 12:59:11 14 11 4374 0 2079 6986 6687 345.20 28 90.11 CHANGED hpLlhhlhhLhs.hGllhlaSASshpuhshhsss.hhhhh+QllahllGhllhhlhhplshp..hhp+hthhhallsllLLhllhl....lGssssGA+RWlslG..slslQPSEhsKlulhlalAphlupt............t..hpsphcshhhsll.hhhlhsslllhQPDLGTulllshhhhsllahuGhsh.phhlsllh......lulsshlhh....lhhcsY....phpRltuaLsPapDPh.....GsGYQlhQSlhAluoGGlhGpGlGpupQ.KhtYLPEupTDFIFAlluEEhGhlGsllllhLahllhh+uhpIAh+...spcpFtpllAsGlslhlhlQshlNIGhssGllPlTGlsLPalSYGGSSllshhsshGlLhsluppppt ............................................................................................................h...hhhhhllh..lG.llh..l...h..S..u...o.....h................s..............t..........h....h.........t..........t................h......h......h...h....+...Q.hha.h...h....l..u..h.l...h.....h...h..l...h...h...h.l....s..hc................hh..p..p..h......s..h....h..h...h...h...........h.....s...l...l..l...L...l........h.......l...h.h..................hG...t.t.....s..p.....G.........A......p.......p............Wls..l.G......sh..s..h.QPSE.h.hKl.sll.l.h....l..A....t.h...lsc.....................................p......h..h..s....p....h.......p....s......h......h....h....h.....hh.........l........h.h....l....s..h.h..L..l.....h..h..Q...P.....D.....L..G..o...s..l.l....l....h...s...h.....hh........s....h....l......a.........l.....u.........G....h.....s.......h.....p........h....h..h.....s.....hhs..................hh.s..h.s..s..h..lhh..................................................hhhp...sY.......................p..hpRlh..s..a......l...s.....P....h....p...c..s............................usG.Y....Q.l.hp.S..hhA..lG.s....G.G..lhG..p...G....h.....s.....p.......ut..........p.....p.....h.....t...a....L..P...E........s..cTDFIFulluEEhG.h.l.Gshhll.hLahhllhR.s.h...h...l...uhc............sps.......F..sp..h.lusGl.......shhh...hhpshlNlGhs..hGl..l.P.l.sGls.LPhlSYGGSSllshhhulGlllslstpt..t............................................................................ 1 736 1429 1800 +2619 PF03867 FTZ Fushi tarazu (FTZ), N-terminal region Finn RD anon DOMO:DM07486; Family This region contains the important motif (LXXLL) necessary for the interaction of FTZ with the nuclear receptor FTZ-F1. FTZ is thought to represents a category of LXXLL motif-dependent co-activators for nuclear receptors. 25.00 25.00 28.60 27.10 23.40 23.40 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.01 0.70 -5.15 3 27 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 17 1 8 31 0 259.90 70 61.39 CHANGED MATTNSQSHY.YAD..NMYNMYH...sHSLPPTYYDNSuSsSuYQsTuptWQ..PASYQSNYtAaYu.QESYSESCYYANspaQ.............PTssTVPQ.PsVPThPEPlltsS...PVKupKRKAEDsAAuIIAAVEERPSTLRALLTNPVKKLKYTPDYFYTTlEpVKKuP...PupoKuouSPAPSYEQEYVAVPTP...........SASEDVDYLDVYSPQSQsp+.KNGDFl..TPPPhATTPsossuI.EGISTPPQSPGEKSuoA..VSpEINHRIVTAANS..AuDFNWSHIEETLA ....................................................MAsTNSpSH.Y.Y..AD..NMYNMYH.....sHSL.PP....T..YYDNSuS..suhYQs....S.........uoYQ.SYtu.hYs.QESYSESCYYhNsQcQ............................p.stT.VP..Ps.Pss.......P.P...............KupKRKAE.....EsAAuIIAAVEERPSTLRALLTNPVKKLKYTPDYFYTTlEpVKKuP...sspoKsAuSPAPSYEQ...EYVsVPTP...........SASEDVDYLDVYSPQSQ......sp+....KNGDFs..TPPP..TTPs........ohssl...EGISTPPQSPGEKSuoA...VSpEINHRIVTAsNu...AuDFNWSHIEETLA........ 0 2 2 5 +2620 PF02952 Fucose_iso_C fucose_iso_C; L-fucose isomerase, C-terminal domain Griffiths-Jones SR anon Pfam-B_9303 (Release 8.0) Domain \N 28.90 28.90 28.90 29.00 28.80 28.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.78 0.71 -4.68 45 1454 2012-10-02 11:40:13 2003-04-07 12:59:11 12 4 1208 15 185 864 123 155.10 38 29.63 CHANGED ppshlhLhpsGsss......t.................pphptshptsphhstsppahpGu....shuophtphsssslThhRLs..cs.G.l.hhlucGpsl-hspc..spsplshpss.phspsaht..hs.p.........plhsshhuNHsuhsaGchupsLhphs.phLtIshs.h+ .............................................t.pGhlcLhNSGussLsh......tttpttt.t........hW-lppp-spusLtsTc.assA.h+..cY.F.RGG...........GaS.o.pF..hT.pu...ssPhThsRlNhl+GlG...PV.LQIAEGho.l-..LPc.c..sact..L.s.p.R..Ts.s.sW.P..oTaFs.s.R.lsucssF..psVYsVMsNWGANHuslshGHlGADllTLA.uMLRIPVs.MH.................. 0 82 141 159 +2622 PF02300 Fumarate_red_C Fumarate reductase subunit C Mian N, Bateman A anon Pfam-B_11568 (release 5.2) Domain Fumarate reductase is a membrane-bound flavoenzyme consisting of four subunits, A-B. A and B comprise the membrane-extrinsic catalytic domain and C and D link the catalytic centres to the electron-transport chain. This family consists of the 15kD hydrophobic subunit C. 22.50 22.50 22.70 22.60 22.10 22.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.80 0.71 -4.27 23 798 2012-10-03 07:11:12 2003-04-07 12:59:11 12 2 794 18 71 276 17 127.10 62 97.67 CHANGED soKRKsYVRpMpssWWpKhsFY+hYMlREuTulsslWFslVLlaGlhuLsp....GssuassFlsFLQNPlVllLNlIuLhusLhHshTaFphsPKsh.sIhlKsc+lsspsIlpuLWAlTslVSllsLslshl ................hoKRKPYVRsMTuTWWKK.LPFYRFYMLREGTAVPAVWFSI.LIaGLFALKs...........GP...E...u...WtGFV.sFLQNPVlVIlNLITLAAALLHTKTWFELAPKAA.N.I.I.VKsEK...MGPEPIIKu.LWAVTsVsTlV.ILaVAL.h...................... 0 8 27 50 +2623 PF02313 Fumarate_red_D Fumarate reductase subunit D Mian N, Bateman A anon Pfam-B_12414 (release 5.2) Domain Fumarate reductase is a membrane-bound flavoenzyme consisting of four subunits, A-B. A and B comprise the membrane-extrinsic catalytic domain and C and D link the catalytic centres to the electron-transport chain. This family consists of the 13kD hydrophobic subunit D. 25.00 25.00 28.90 28.90 23.80 23.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.46 0.71 -4.21 21 816 2012-10-03 07:11:12 2003-04-07 12:59:11 12 3 799 18 72 251 10 113.80 66 96.91 CHANGED hspsPKRSsEPlaWuLFGAGGMlsAhlsPVlILllGlLlPLGlls.tshsacplhuFApoalG+lhlLlllhLPhWpuhHRlHHGh.HDLKlHh.ssuphla.YGhAslholls..hhhVhs .....I.NsNPKRSDEPVFWGLFGAGGMWuAI...IA....PVhlLLV...GILLPLGL.h.P.uDAL.SYERVLAFAQSFIGR...lFLhLMIVLPLWCGLHRhHHAM.HD.LKIHV.P.A.GKWVF.YGLAAILTVVTlIGll.T................. 0 8 28 51 +2624 PF03630 Fumble Fumble Finn RD anon Pfam-B_3299 (release 7.0) Family Fumble is required for cell division in Drosophila. Mutants lacking fumble exhibit abnormalities in bipolar spindle organisation, chromosome segregation, and contractile ring formation. Analyses have demonstrated that encodes three protein isoforms, all of which contain a domain with high similarity to the pantothenate kinases of A. nidulans and mouse[1]. A role of fumble in membrane synthesis has been proposed[1]. 21.20 21.20 21.20 21.60 20.90 21.10 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.90 0.70 -5.59 11 1080 2012-10-02 23:34:14 2003-04-07 12:59:11 9 12 701 15 460 840 178 273.20 33 69.95 CHANGED s+lulDIGGoLsKLVYassps.sspp.pp..t.h.p...............................tGRLpFlpFpTpcIsphlpah+p.............pshsp.phslpATGGGAaKFt-hh+cplsl..plp+.DEh-sLlpGlsFllp...sl.pEsFsap.sp-sE..h.....phs.tsshYPYLLVNIGSGVSIltVpGpspacRVuGoSLGGGTFhGLssLLTsspoF-EhL-hAp+GDsoslDhLVtDIYGsc..Yp+hGLsusslASSFG+sh.....pcsKp....................ppaspEDlu+SLLhsIoNNIGQIAhhpAhppslcRlaFsGsFlRspshoM+TLoYAlcFWSpGphpAhFL+HEGYhGAlGAFLpht .........................................................................................................................................................................................................................................................................................................ts.p..hp..Fhp.....o.p......ph.p.p.....hl.phhpp..............................................................hlphTGGsA...hh.h.....tp....................h...s..h......th.h.sEh-.shh..................pGl.hl.p................E..............................................................................................p..hps.....hh.salllNlG.oG..........sS..llhl..p..u..p..s..p...h...cRVuGo..ulGGGThhGLspL........L..o.t.h.p.....s.a--hlp...hA.p........cG......DpsplDhh..VtD.IYuss................c....s..Lsu.shsA..S..sFG+sh..............pp..t...p.t.......................................................................thoppDlstullhhl................s..psIuplAh.hhAtpp......pl.ccllahG...shh+sp....h.hhchl..s...sahsh...............tthpshFlc..ctua.GAlGAhh...t............................................................................................ 0 148 239 359 +2625 PF04930 FUN14 FUN14 family Wood V, Bateman A anon Pfam-B_8237 (release 7.5) Family This family of short proteins are found in eukaryotes and some archaea. Although the function of these proteins is not known they may contain transmembrane helices. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.22 0.72 -3.72 34 431 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 305 0 272 412 7 95.60 28 60.52 CHANGED QlulGulsGassGashpKluKlshhslGssllhLphhpppGhIplsWpplpptlppstppst...................shhpphhshl..ts........................shsuuFsuGh.hlGht ........QlshGulsGhssGahhpKluKlsAhslGsuhllL.QhhsppGal....plsWp+.lpc.p.h.p.p..s..pppht...........................................................h.phhp..ph.hphlt.tsh..........................hsuuFhuGhhlGh............................................. 0 82 127 205 +2626 PF01475 FUR Ferric uptake regulator family Bateman A anon Prodom_2003 (release 99.1) Family This family includes metal ion uptake regulator proteins, that bind to the operator DNA and controls transcription of metal ion-responsive genes. This family is also known as the FUR family. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.39 0.71 -4.18 24 9302 2012-10-04 14:01:12 2003-04-07 12:59:11 14 10 4091 22 2167 5940 4980 119.00 26 80.09 CHANGED csGlKlT.pRhpILpllppscp...HhoAE-lYctlh..ccsspluLuTVYRsLphhcctGllpchphs.sucshaElsp...tcHHcHllChcCGclhEFpsspIEphppclsccpGa+lhsHplplY .......................................t.tGl+h.T.pRhtl.L.c.l.l.hpp....pp.......H..hoA--laptlp...........p..p.........t...s..p..l...u....l....u............TVYRsLphh.t-tG.l.l.c.c...l..p..........hs.....s.......u.......p......s...h..a....-..........h....sp...............tc.p.......H......p.......H...........l.....l......Cpp....C.G......cl..h...-....h....p....s......s............l.....p....p.....h.t......p...p....l...t...t....p...h...uaplppppl.h........................................................... 0 737 1498 1892 +2627 PF00757 Furin-like Furin-like cysteine rich region Bateman A anon Bork P Domain \N 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.75 0.71 -12.75 0.71 -4.55 40 888 2012-10-02 14:20:19 2003-04-07 12:59:11 15 44 120 49 334 892 2 154.30 37 12.91 CHANGED scpCs.........ssp.sp.......t.s..CWuss........cCQ..................plCsppCst..tCp.....ssscCCH.ppClGGC......ouPp.sosChAC+capp....sGpClspCPs................upYpF.GtpCV..cpCs............................ps.hlh.....csupClhpCPsuhpcst......ssstpCp.Cs.Gh.CPKlCp ...................................t.s...........t..t.Cp.ss...........tts..CWups........pCQ.........................tphCsp.pCst...pChh..............pss-C..CH.ppC.s.G.GC......oGPp...ss-C.l.AC+pF.c.......sG..sCh.....pp....CPs................u.+.Y..p....a...GspCV.....c.pCP...........................................pp..hlh.......csuuC...lpp.CPsshhcs...........sssttCp.sCs..G...C..P.KsC.............................. 0 65 91 210 +2629 PF04632 FUSC Fusaric acid resistance protein family Mifsud W, Bateman A anon Pfam-B_5345 (release 7.5) Family This family includes a conserved region found in two proteins associated with fusaric acid resistance, Swiss:P24128 from Burkholderia cepacia[1] and Swiss:Q48403 from Klebsiella oxytoca. These proteins are likely to be membrane transporter proteins. 30.90 30.90 30.90 30.90 30.80 30.80 hmmbuild -o /dev/null HMM SEED 650 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.29 0.70 -12.94 0.70 -6.09 92 3210 2012-10-02 19:04:43 2003-04-07 12:59:11 7 8 1176 0 491 3624 82 530.20 21 92.95 CHANGED phhaulKshhAuhLALalAhtlsLspPhWAhhTValV.upPhs........GhshuKuhaRllGTllGsssulsllshhsppPhLhhhulAlWlulCshhus.h......Rs..cuYuFhLAGYTssllulsuls..sP...ps.....lFshAlsRspEIslGIlCuslVusllh.P...ppsstsltsplpphhtshtphs..sssLssptssst.....................tttplhuclssl-shtstssa-ssphptpsttlptLpschhsll...............shhpultphhptlp...stsss..........ltsh..lpplss.............................ttt.tttht..............................tLhtpl.tshpstht............shtthhhtphhphLtphhpshpshpslppshtt............................................................tpsht..hsh+pDhhhAhhsulRssluhhhsuhhWlsouWsuGusslhhsulssslhustssPs...hshphhhGshhusshuhlhhahllPplss.FshLslsL.usshhhsuhhhspPphus.huhuhslhhshhlu.psshs..hshssahNsulAhllGhhhuslshtll.hPsssphttcRLhpshhpcls.phsptphst...tpt.......tatuphhD+ltpLhsth......ttstspcphhp.slssLclGpsllpLRcthtp..........hssshpts.lcphLpsls.......t..tst.ttt.tsssthhptlppshtthssptsttt...............................tphhssLhtlptsLhs.spsh ..............................................................................................................hhauh+hslAhhL.u....l....h..lu..h......h......h....p........l.....s......p..s......t........Wuhho.s...h..l...V..h.t..P..ts.....................Gslh..+uhhRlh..GTllGsh.s...u..l...l....l.....h...h...h......h.....h........p.....p....P....h........l............h.....l.l...h.h..u.h..h........h....u.h..C..s..a.l..u.t.....................pt....t.u......Ys..h...h.L..A....Gh.T.h.h...l.ls.hs.hhs...pP.....pt.................h.h..p..h.A.l..h.Rss-l..llGllsuhhhshl.h....P...........pps...t....pt...h...p....p...p...L....p.s...h...l.s....p...hh....p....hh.....p.hhpststpt.............................tthttl..htp....h....h...s...h..p.t....h.......t.s....s............h...-...s..........t....p.....h......p.......p...t.......s..........t.......t....l.......p......s......l......p...p....l...ph.h....................................ohh.s...p.h...h..h......p..h.........p....t.th.......................lpph..lppltp................................................stth.ht.p.ht............................................................................pl..hp.l.t..ttttt........................................hp...h..hh..t..p..h..h....hh.h...hh...tph.t.t..p.h....................................................................................................h...hs.thp....h..h.h...hh+shhshhhhhhh..hs.tW..s..h.shhhh.hslss....ths.s.sh....h..hhhshhh.h...huhlhhhhlhs.hpt.h........h.hh.l.s..h.h.h.h...hh.h..t.......p..s...hh.h.t.....hl..hh.h.....s.........hp.h...t..hhs.ps....hu.llGhhhuhhh.hhhl...hs......s.tht...hhtthhtth.....t.h.ph...................t.p.....th.sh.h.........h..h...........................t...h...................t..t....h.h.t...............................ht..h....hhtth............................................................................................................h........................................................................................................................................................................ 0 86 207 348 +2630 PF00523 Fusion_gly fusion_gly; Fusion glycoprotein F0 Finn RD anon Pfam-B_102 (release 1.0) Family \N 20.80 20.80 20.80 22.30 20.60 20.70 hmmbuild -o /dev/null HMM SEED 490 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.33 0.70 -6.43 12 7380 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 164 82 0 4285 0 200.40 38 89.01 CHANGED IshstLpplGllssps+phtlhoshpopalVlcLhPsls.......spssCspsplspYpphLscLLpPlt-sLst.pp..ts.......sspRp+RFhGsllGthA...LGVAT..AAQITAulALscAppsttsItplKsSlpsTNcAVpplppuspplshAVpslQDalNsplsPulsphsCchtshplGlpLs....ahoELsolFusplsssu..sslolQALp.Lhuusls.llpshhssphshhpllpothlpGpllsls.pth.hslplslPslsphssshlhch.slShshp...spEhhhplPshlhspus.ltsastss.ChhsspshhCspssu.slssphppC...lpGshosCshohlhushhs+FhhspGsllANCh.uhhCpCsssstsIsQs.spulohIshppCshltlsshphplsp.pspsshhtshslhsu.slslpPlD.lSspLupssppLpsupphlccSsphLsslsstpsotsshlhhlllsslslllhll ..........h.............................hhlcl.s.............th.pt.lt.hpphlppl....tssltt.ptshp....................sstRptRhlGhllGuh............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +2631 PF01621 Fusion_gly_K Cell fusion glycoprotein K Bateman A anon Pfam-B_1083 (release 4.1) Family This protein is probably an integral membrane bound glycoprotein that is involved in viral fusion with the host cell [1]. 19.40 19.40 20.90 20.40 17.90 17.70 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.22 0.70 -5.40 11 89 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 40 0 1 73 0 275.70 43 99.05 CHANGED hhsspst+lullsllouYhlhllWYss.tsphts..sClYAstshs...sssshsW....................ttaNsoLlYl...shsst..h.sshpshsshCRpsllstsshshhstssslcc+lRhVhtsRNChAYLWpspl+hlsluhhlYhsFlslRppRRMFGshRsss-hlSPssYoLNYAupllSsllL+ssYTKlsRLLsElshpRpuLScsFctDPloFhap+ssshsLlshElhl+luutslshsTlslsasPCuhlaPhal+IhsWlFVuslsslELlull.tscPsptuut........sssssspss..GltulCs...sCCusllSGlhlKslYlshlsssVllhl+YEpclQhpLFG ....h.hhhph.hh.hlhhhosashhhhaYss.hsp..p...ClYAshs.....sst..shhW......................hNpohlah..........sst....sshtshs..Cht.hlttp.hsh.s...shpp+lh.VhtshsChthlW.sphphhhhuhhlYhhFlhh+p.RpMFGshpsttchlsPstYhhNYsspllusshLthsYTKhschLCELSlpRpsLsphFcsDPloFLa++PululllssElhlRhsAhsLlluTshls+ssCAlsYPLalpIhTWsFVuhlulsELhhll...cpssA.csut.........susssscsp.......GhpGVCu...pCCShlLSGIsl+lhYlAllshhVlhhh+YEpplQhtLhs.... 2 0 0 0 +2632 PF02663 FmdE FwdE; FmdE, Molybdenum formylmethanofuran dehydrogenase operon Bashton M, Bateman A anon COG2191 Domain This entry represents the FmdE protein that is encode by the molybdenum formylmethanofuran dehydrogenase operon. FmdE does not co-purify with the molybdenum isozyme that is formed by FmdC and FmdB [1]. The domain is typically found as a single copy, but is repeated in some sequence two to three times. It is also common place to find this domain co-occurs with a zinc-beta ribbon domain, suggesting that is may bind nucleic acid and be involved in transcription regulation. 21.60 21.60 22.50 23.50 21.40 20.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.76 0.71 -4.25 188 431 2009-09-10 13:21:47 2003-04-07 12:59:11 9 12 244 4 258 430 25 131.40 25 54.33 CHANGED pFHGHhCPGlslGh+hsphAhcpL....sh...t......p.........s....cplhslsEss.....s...........ChsDAlQhlsGCThG+..s...sLhhh............ch....GKhAhoh...hscps..sculRl.......h......hp.pthpth.....htth..p........................cppppth..ptlhphs..cclaphpt ........tFHGHhCPGlslGh+huphAhcpL....sh....s........p.................s......cclhslsEss..............s................ChsDAlQhlsGCThG+....u....sLhhp.............ch......GKhuhoh.........hs..cps......sculRl............h.....hcs..pthpth........ph.th..p....................p..pphh.p.hhphs.pphh....h................................................... 0 119 200 228 +2633 PF04186 FxsA FxsA cytoplasmic membrane protein Mifsud W anon COG3030 Family This is a bacterial family of cytoplasmic membrane proteins. It includes two transmembrane regions. The molecular function of FxsA is unknown, but in Escherichia coli its over-expression has been shown to alleviate the exclusion of phage T7 in those cells with an F plasmid. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.35 0.71 -4.50 136 1830 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1809 0 376 1109 1286 120.20 33 73.64 CHANGED hl.lhlllP..llEIslllpVGshIGhhsTlhLlllTullGshLlRpQGhpsltphppphp...pG.chPuppll.....-.GhhlhluGlLLlhPGFlTDhlG.llLLlPssRphl.tth....ltp+hph...........ts...s.ht ...................................hhhhlhshlE...IslhItVu.p.....hlG.s.h.h...TlhLllhTullGh.Ll.R.s.QGhps..hhphppch.s......tG......c.............P.......u.t.phl............c.ul.hlhlAGl.LLllP.GFhTDhlG.lLL.L.l.Pss.pphl.phh........lh.pht.........tth.tthh...................... 0 104 230 311 +2634 PF04799 Fzo_mitofusin fzo_mitofusin; fzo-like conserved region Waterfield DI, Finn RD anon Pfam-B_6217 (release 7.5) Family Family of putative transmembrane GTPase. The fzo protein is a mediator of mitochondrial fusion [1]. This conserved region is also found in the human mitofusin protein [2]. 23.70 23.70 23.80 25.60 23.50 22.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.02 0.71 -4.88 11 189 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 95 1 101 193 1 163.20 50 23.86 CHANGED hsshspsshopE-hhlshVsulASlTSRoShGlllVGGVlWKolGWRlIsluhulYGsLYlYERLoWTT+AKERuFKpQFVcaAocKLphIVShTSuNCSHQVQQELuoTFARLCpQVDsTppsLp-EltcLspcIppLEslQspuKlLRNKAshlpsELppFpcsa.Lpts ..................................................hs.sshoQEEhMl..ohlouL.ASL..T..SRTS..MGllVVGG...VlWKsVGWRLIulShulYGhLYlYERLTWTT+AKERAFKcQFVpaAoEKLQhIV.ShTuuNCSHQVQQELusTFA+LCQQVDlTpcpLEpEIsplsccI-hL-slQspuKlLR....NKAshL-sELp.Fscpa.Lp..s............................................... 0 28 36 66 +2635 PF01125 G10 G10 protein Finn RD, Bateman A anon Prosite Domain \N 23.80 23.80 25.10 25.10 19.90 21.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.99 0.71 -4.64 28 378 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 308 0 261 313 4 138.50 57 91.64 CHANGED MP+I+p.pp..+tPP-Ga-cIcsoLpcasp+M..................+-Apsps...tc..sc...............p+sEs.........hW.IaplpHpRSRYlY-haY+++...tIS+ELY-allcppYADssLIAKW+KpGYE+LCCL+CIQss-ssau.............................sTCICRVP+upLcps........phlpCspCGC+GCuS .................MP+l+pt.p..KsPP-Ga-hIEsTLpEhppKM.....................................R-AE......s-s...H-...GK...................+KsEs..........LWPIF+IpHQ+SRYIYDLaY++K...AIS+ELY-ahl+psYADtsLIAKWKK...pGYEp.LCCLRCIQT+-oNFu.............................osCIC.RVPKupLcts.........phl.pC....lpCGCRGCu............................... 0 89 141 212 +2636 PF00503 G-alpha G-protein alpha subunit Finn RD, Anantharaman V anon Anantharaman V Domain G proteins couple receptors of extracellular signals to intracellular signaling pathways. The G protein alpha subunit binds guanyl nucleotide and is a weak GTPase. A set of residues that are unique to G-alpha as compared to its ancestor the Arf-like family form a ring of residues centered on the nucleotide binding site [3]. A Ggamma is found fused to an inactive Galpha in the Dictyostelium protein gbqA [3]. 48.80 48.80 48.80 48.90 48.70 48.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.31 0.70 -5.78 134 2902 2012-10-05 12:31:08 2003-04-07 12:59:11 15 23 511 117 1599 2617 11 305.60 39 89.11 CHANGED Mushh.......................................................................................ptpppttppps.p.ph.p.c.p.h.p.pc.ppptp.................pplKlLLLGsGESGKSTlhKQh+llatssas........................pp.EpppapsllhpNllpuhptllcuhpphphsht................................................................................................................................t.ppppptphlhphtsthtp.............................................................................................................................hstchspslppLWp.Dsslppsap..+ppchpL...................tDsspY.................Flsslc........Rlspss..YhPop..................pDlL+sR.l............................pTsGIhEhpFph......................pphpa+hhDVGGQ.RoER+K.WlphF-..................................sVsullFlsuLS-YDpsLhEDpps......................................NRhpESlpLFcsIsssphFp.......sssllLFLNKhDlhccKlpps...lpphFscap............................Gs..sshcp.stpalpppFhphsppp.p........................+plYsH............hTsAs..DTpslphl...hsslp-hI .............................................................................................................................................................................................................................................................................................t..st..h.p.p.l.p...tp..t...t..........................................................................pth..Kl.L.L.L...........G..s..........GE....S...GKST.......h..lK.........Q..M..+.........Il..H.t..s.G.as...............................pc.-....ttp.ap.hl........hpNhhp..u..h..hsllcAhpplth.ht.....................................................................................................................................p.tpt..psp..l.hth...s.t...t..............................................................................................................................................................................................................................................hs.chht..sl..p...pLW..p.Ds.Gl..p.tsap...R....ppEapL.............................................DsAtY......................................aL..ss..l-........Rl..s.t.s.s..YhP..op......................pDlLRsR..l............................hToGIh..Eh.pFph...........................................................................pplpa.+hhDVG.GQ..RS.....E...R+K..WI..HC.F.-..................................sV.Tu.I.lFhl.......AlS.p..Y.D..s.L..hE...D..p..pt.................................................................................................................................NRhp...EShtLFcoIh..N.s.caFt.......po.SlIL.......FL.NKhDlhpE..Klht..s.....lpth.F..P-Yp........................................Gs..pshpp...Atta.l.....hppF..phsppp.t........................+.lYsH......hTsAs..DTpslphV..FssVpDhI................................................................................................................................................ 0 558 822 1230 +2637 PF00631 G-gamma G_protein_gamma; GGL domain Bateman A, Ananthraman V anon Ananthraman V Domain G-protein gamma like domains (GGL) are found in the gamma subunit of the heterotrimeric G protein complex and in regulators of G protein signaling (RGS) proteins [1]. It is also found fused to an inactive Galpha in the Dictyostelium protein gbqA [2]. G-gamma likely shares a common origin with the helical N-terminal unit of G-beta [2]. All organisms that posses a G-beta possess a G-gamma [2]. 22.30 22.30 22.60 22.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.37 0.72 -4.23 50 1212 2009-09-12 06:39:52 2003-04-07 12:59:11 17 17 266 24 681 1015 0 65.80 28 29.38 CHANGED sphphphtpEl.ppL+p-Lpc..pRl.plSc..AstsllsYsp..sscDshlsss......s.ttpNPatpp..............tss.Chlh .......................t.....thpcpl.ppL+hclsh..pRl.KlSc...........uut....s....LhsYs-p.....hsppDPllsss.......s.tt.NPahpc....................tts.Ch......................... 0 129 220 399 +2638 PF04466 Terminase_3 G2P; Phage terminase large subunit Waterfield DI, Finn RD anon COG1783 Family Initiation of packaging of double-stranded viral DNA involves the specific interaction of the prohead with viral DNA in a process mediated by a phage-encoded terminase protein. The terminase enzymes are usually hetero-oligomers composed of a small and a large subunit. This region is found on the large subunit and possess an endonuclease and ATPase activity that require Mg2+ and a neutral or slightly basic reaction. This region is also found in bacterial sequences [1,2]. 22.40 22.40 22.40 22.40 22.10 22.30 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.22 0.70 -5.38 10 1027 2012-10-05 12:31:08 2003-04-07 12:59:11 8 7 859 2 119 1203 485 355.30 23 85.08 CHANGED s+papVhhGGRGSuKShslAhplVl+h....hpp...stssLl.lRcltNTlccSlasplp-slspLslschF+hs+Sslpl.phssGup..FhFpGh.DcPtKlKSlc....slsslWlEEAuphpp-s.acpLlsolR......c.thclahSaN.PhscpsWla+pFh..........Dspth........sDshlc+STYpDN...FLscshlcphE-hK+....pNschYRhhhLGEhsssGstlhs...lcshplpssp.tthphshth...........DhGapscssuhlphulch++phlalhs-ahp......sthlccpsu-hl+-.........tphp+cshpu-usEs+ultshcpp.......................thh+hhsu++upsohhpts+hlcpa+sllt..................pscspphIpEhpphpappDcpus.hs....c.hctD.....sHslDAlhYAlcc ..............................................................................................................t...hhlhhGG.RGSuKSh.s....h.uhhh..lhth......hp................s.h.t...hlh..h...RchtsolccSs.app.l.c..sl.p.p...h...u......l....p..c..........h..a..c.h.sp.o..h.c...h..hhss...Gsp.....h.l.FpG.....h....c.........s...s...p..+l..KShp.........sls..hhWh.EEA.....pp..h....pp.ps....aptlh.olR...................p.h....tp....l...a.h.oa...N.P.h.pp...ts..al.cpa.h.............................................................................psshl.hp...ssY....p..DN............a.lsc....hh.p.p.h.-.p.h.p.p................ps.sph.Y.+..h..hh.Gch.h.s.s..ss.h.s..h.................h..pt......h..pt..t........t..t..h..p....h..h.th......................................Dh....G.as....p...c.ss.uh.h..p..h.h...l................p...p.ptl...a..l.....p.-h.h.t...........................th......p...p..t.h..sp.hlhp...........................................................th..t.pt.......hh.uct..s..p......c.ht.thcp....................................hhph.hsspK.s..s..s.h..s.........p....h......l...p.t....h......chll................................ps.phhpEhpph.paphD.pt.s.p..h.........cp.p.........sHhhDAhhYsh.................................................................................................................................................................................................................. 0 34 78 95 +2639 PF04309 G3P_antiterm Glycerol-3-phosphate responsive antiterminator Waterfield DI, Finn RD anon COG1954 Family Intracellular glycerol is usually converted to glycerol-3-phosphate in an ATP-requiring phosphorylation reaction catalysed by glycerol kinase (GlpK) glycerol-3-phosphate activates the antiterminator GlpP [1]. 21.20 21.20 21.20 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.11 0.71 -5.23 8 1079 2012-10-03 05:58:16 2003-04-07 12:59:11 7 1 949 12 137 527 4 171.60 41 93.62 CHANGED PllsAl+shcpl-chl-S-hphlhLLsuclssl+pll+hlKs+sKpsFlHlDLlpGLspc.EhuhcFIppthcsDGIISTKuplltpAKKpslhsIQRlFllDSpALcpuhcpltphcPDhlEVLPG.lhP+lIccIsc+oshslIAGGLlcTcEEVcpAL+uGAsAVS.............TSpc..cLWc ..............................................................sllsAl+s.c.sLct.hl.c..o..c.h...ph.lslL..spIsplpsllchl.KpsuKhsFlHlDLlcG.lusc.Ehulpalpp.hcsc.GIISTKushl+pA+phuhh.sIpRlFllDS.uhcpuhc......lpp..sp..PDhlElLPG.sh.P...+.l.lpp...lp..c.ch.p...hP.l..IAGGLIpsc--lppAlpuGAsAloTSppplW..................... 0 64 97 119 +2640 PF00479 G6PD_N G6PD; Glucose-6-phosphate dehydrogenase, NAD binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.70 21.70 22.50 22.30 21.50 21.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.08 0.71 -4.05 117 4951 2012-10-10 17:06:42 2003-04-07 12:59:11 17 21 3884 29 1192 3729 762 172.30 37 36.68 CHANGED llFGAoGDLA++KLhPALapLhppGhLsps.hpllGhARpcho.c--a+phlcpslpp..........tpphsppthcpFhp+lpYhssshs.sspuappLpptlpch-p....t.ttssplFYLAlPPshFsslsppLppssls..ttp......hsRlllEKPFG+DLpSAcpLscpltphFcEcQIYRIDHYLGKEhVQN .........................................................llFGuoGDLA+RKLhPuLapLh.p.p.G.h.L.......s.p.p...htll..Gh..u..R..p.c..h.s.....c...-.p..app...h.l.cp.u...lpp....................p..t.h.sppthcpFhp+h..pY.hsh.-.h......s..c...s.p.s.....app.......Ltphlsph....pp..................t.......t................splFYLAhsP.shFsslsppLppss..Ls..........ptp.t.............hsRlllEKPFGpDLtSApcLNsplt.phF..cEcQl...........a.R.IDHYLGKEhVQN................................... 0 344 698 973 +2641 PF02781 G6PD_C Glucose-6-phosphate dehydrogenase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.50 19.50 19.60 19.60 19.00 19.40 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.96 0.70 -5.56 11 5312 2009-09-10 19:48:29 2003-04-07 12:59:11 11 20 3976 29 1208 3992 928 265.50 41 59.39 CHANGED LsLRFuNplFsshWNRcsIssVpITF+EsFGTEGRGGYFDphGIIRDlhQNHLLQlLsLlAME+PsShss-sIRDEKVKVL+ulcslssc-V....VLGQYsuu.sGs....ptGYl-DsTVspsSpTsTFsAhhlcIcNERW-GVPFhlRsGKALsp+KuElRlQFKsVstslFcpp...htcNELVIRlQPsEAlYhKhhsKpPGls...hssppo-LDLTYucRa.pshhlP-AYERLILDshpGspspFVRsDELctAW+hFTPlLctl-.scps+PhsY.hGS+GPptuDchhpcsGasascs ................................slRF.AN.s.lFcslW.NppaIcpVQI.............ThuEplGl.E.sRuG.YYDp..s.Gsl..R.DMlQNHlL.................Q..lLs.llA..ME..Ps...s...h....s....s.-....s....lRsEKl.................K..........VL+u..lc....h....s.....t..p.s.l...........hV...RGQY....t..u...Gh............................tu................t......h.uYh-E.....s....l....s.......s.Ss.TE.TFsAh+h.I-NhRWs...GVPFYlRTGKRLsp+s.oclsl.FK.p...s....s....h....s...l.F....tps................hs.Nh....LsIc.....l.Q..P.sE....Glplp..h.....t.....s...K.......P.G..s..................hp..h..p...sp.L-.......h.....s.......h....s.pp................t..........p..h.....p.........s..-....AYE+LlhDshpGstshFs.+.......tDEl.ctuWchlsslhchWt....t.........s.........t....s........................s..Ytu.....Go..........h.GP.tu.thl.t+p.GhtW..h.............................. 0 350 709 987 +2642 PF01468 GA GA module Bateman A, Finn RD anon Pfam-B_895 (release 4.0) Domain The GA (protein G-related Albumin-binding) module is composed of three alpha helices [1]. This module is found in a range of bacterial cell surface proteins. The GA module from Swiss:Q51911 shows a strong affinity for albumin. 24.10 24.10 24.10 24.10 23.90 24.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.86 0.72 -3.82 102 9090 2009-01-15 18:05:59 2003-04-07 12:59:11 12 216 256 15 122 9161 0 59.70 37 28.26 CHANGED lssucssLsGsppLtpAKpsAppplssLspLNssQ+sshpspIssAsolssVsslpspAp ................................VssuKsALN..G.s..p.p....LspA..K..psA.p.p.sls.sLspLNsAQKs.sLpsQIspAs..slssVsslppsAp.......................... 0 58 62 119 +2643 PF02938 GAD GAD domain Aravind L anon Aravind L Domain This domain is found in some members of the GatB and aspartyl tRNA synthetases. 21.10 21.10 21.10 21.70 21.00 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -10.04 0.72 -3.83 24 4662 2012-10-01 21:10:01 2003-04-07 12:59:11 9 15 4517 16 1094 3469 2015 96.00 34 16.26 CHANGED Fpsshtp..sGpV+AlplPs..sshoR+pIcthp...chuppaGApGLsal+lps.....tthpuslsKa...lsEpphpplh-pssApsG.DhllhsAsp.tphsspuL ............................Fttshps..tGpVtAlsl...s...Gu......s...phoR.Kp.lDp.hs......ca.s.p..t.a...G..AKGL..A..a.l...KV.s-............ssl...p..u.PlAKF.........l...s.p....-..t......h..ps......lhc......ps..s....A......ps........G..DllhF.sADp.tcllspsh....................... 0 359 705 913 +2644 PF02337 Gag_p10 Retroviral GAG p10 protein Bashton M, Bateman A anon Pfam-B_959 (release 5.2) Family This family consists of various retroviral GAG (core) polyproteins and encompasses the p10 region producing the p10 protein upon proteolytic cleavage of GAG by retroviral protease. The p10 or matrix protein (MA) is associated with the virus envelope glycoproteins in most mammalian retroviruses and may be involved in virus particle assembly, transport and budding [1]. Some of the GAG polyproteins have alternate cleavage sites leading to the production of alternative and longer cleavage products (e.g. p19 Swiss:P21411) the alignment of this family only covers the approximately N-terminal (GAG) 100 amino acid region of homology to p10. 19.60 19.60 19.60 21.50 18.50 16.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.96 0.72 -4.24 5 102 2012-10-01 20:35:47 2003-04-07 12:59:11 12 13 32 2 27 135 0 88.40 37 14.18 CHANGED MGSopSc.pLFVotLpcsLKpRGl+V+cscLlsFasFlc+VCPWFPpEGolslcsW+RVG-plccYYspHGPEcIPlsTauaWsLIRDlL- .................GpphSp...alphLphhLp...pRGlpVppssLlpFhphlccssPWFPcEG.T.lsLcsW++VGcpl+pahsh+Gs-pIPlpsashWsll+-hLp.................. 0 11 11 11 +2645 PF00540 Gag_p17 gag_p17; gag gene protein p17 (matrix protein) Bateman A anon SCOP Domain The matrix protein forms an icosahedral shell associated with the inner membrane of the mature immunodeficiency virus. 20.80 10.00 20.80 16.00 20.30 9.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.74 0.71 -4.41 3 37754 2012-10-01 20:35:47 2003-04-07 12:59:11 13 30 106 33 0 27496 2 123.20 74 36.45 CHANGED GARASVLoGGcLD+WEKIRLRPGGKKKYplKHLVWASRELERFAlcPGLLETcEGC+KILpQLpPuLpTGSEGLRSLYNTVAVLaCVHQ-IcVKDTcEALEpl+ccpscscKK..TtssssuuQQuAusGGTusSuGlSQNY ..........................GGc..LD.tWEKI...RL.RPGGK.K+Y+LKHlVWA.....S....R.ELERFAlNPG.L.LETo.EGC+QI...ltQ.L.QPuLQ.TGS..EE.L+SLaNTVATLY..C.V..HQ+I-VKD.TK.EALDKIEEEQ.N....KSKpK......................s...Q....Q...us...u..s............s..u...s..s...u...p...V.S.QNY...................................... 0 0 0 0 +2646 PF02228 Gag_p19 gag_p19; Major core protein p19 Bateman A, Mian N anon Pfam-B_1307 (release 5.2) Family p19 is a component of the inner protein layer of the viral nucleocapsid. 20.80 20.80 26.00 23.50 18.50 16.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.19 0.72 -3.68 2 75 2012-10-01 20:35:47 2003-04-07 12:59:11 11 10 18 1 0 77 0 83.90 64 15.89 CHANGED MGphaSpsAsPIP+sP+GLAhHHWLNFLQAAYRLpPGPSpaDFHQL+KFLKlAlcTPshlsPINYSlLAuLlPKsYPGRVpEIltILIQpps ...MGphau.ssoPIP+sP+GLusHHWLNFLQAAYRLpPGPSsaDFHQL++FLKlALcTPlWlsPINYSlLASLlPKGYPGRVsEIlpILIQsQ.............................. 1 0 0 0 +2647 PF00607 Gag_p24 gag_p24; gag gene protein p24 (core nucleocapsid protein) Bateman A anon Bateman A Family p24 forms inner protein layer of the nucleocapsid. ELISA tests for p24 is the most commonly used method to demonstrate virus replication both in vivo and in vitro. 20.70 19.70 20.70 19.70 20.60 19.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.36 0.70 -4.78 16 45086 2012-10-02 13:14:50 2003-04-07 12:59:11 15 80 305 153 68 32172 2 179.20 82 55.09 CHANGED .+pLpthhpslcp.uhsustslshhpulupsh..sshDhpphLss..ss...sptus.hpll+st...lsEpstchcpsp..s..sshs........hss-.IhGsGs..spppptphths.....sltpphpphhltAhp+lsphtsss.shhsl+QGPcEPYpDFVsRLhpslcsp.sstcsKphhhcpLuhpNANs-Cpphl+slt....hpuoL.p-hlpACpslGustpKuplh ...................................................................................PRTLNAWVKVlEE.KAFSPEVIP.MFoA.L.SEGA...TPQDLNT.ML.NT..VG...GHQAA.MQMLK-T......INE..E......A.AE..WDR.lHPVH..AGPlAPGQ...MREPRGSD.IAGTTS....TLQEQIuWMTsN.....PPIPVG-IY.K.RWIILG...LNK...I..VRM.YSPs..SILDI+QGPKE.PFRD..YV.D.RFaKTLRA...E..Q.A....oQ-VK.N.W.M.T-TLLV.Q....N.A.NPDCKT.I....L+.A.LG....PuATL..EE.M.......................................................... 0 11 11 17 +2648 PF02093 Gag_p30 Gag P30 core shell protein Bateman A anon SwissProt Family According to Swiss-Prot annotation this protein is the viral core shell protein. P30 is essential for viral assembly [1]. 20.40 20.40 20.50 21.50 20.30 20.20 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.58 0.70 -5.12 6 261 2012-10-02 13:14:50 2003-04-07 12:59:11 11 22 75 32 46 311 2 181.30 63 26.71 CHANGED QYWPFSuuDLYNWKopNPsFScDPsuLTuLIESlLhTHQPTWDDCQQLLQsLLToEERQRVLLEARKsV.GssGRPTQLPNEID-uFPLTRPcWDYsTAtGRE+L+lYRQhLLAGL+GAuRRPTNLAKV+pVsQGssEoPSAFLERLhEAYRhYTPaDP-sPGQpAsVuMSFIhQSAPDI+pKLQRLEGLQsaoLpDLVKEAEKlYNKRET .........QYWPFSSSDLYNWKsNsP.sFSEDPtcLT.........uLlESlhhTHQPTWDDCQQLLtTL.hTsEE+pRlLLEARK.....sV.GsD.GRP..T..QLs..NE...l...stuFPLp.RP.sWDYs.T.scG.R.ppLhlYRQhLlAGL.psAuRpPTN.LAKV+tlhQGssEsPSsFLERLhEAaRRaTPaDPps.uQcss..Vuh..uFIhQSA.DIt+KLpRLEsLpptpLtDLV+EAE+la.+RET........................................................ 0 4 4 12 +2649 PF03276 Gag_spuma Spumavirus gag protein Mifsud W anon Pfam-B_1878 (release 6.5) Family \N 19.60 19.60 19.90 19.90 19.10 18.60 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.99 0.70 -6.18 7 132 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 16 0 1 105 0 287.50 61 96.65 CHANGED MAt.....p.-LDs.tL.sla.s.Gl.ppP.Ht-lIulRhTuG.WG.u-RaphVplhLQDs.sGQPLQ.Pcac..s.RssNPtsp..hlluuPaspLRpAFpsl-lucGs.RaGPLusGpa.PGDtYSppFpPls.pEhAQhptpcLEp............lshlthtchEIRhLRp.h.chph...tGhssslPsAsts.PP..s..t.....................................................................P.p....PsA..........hPsPshsPhl..sssP.....PhPssp....IPIpHIRAVhGpsPsNPR-lPhWLGRsssAI-GVaPhsssshRsRllNALluup.GLsLps.-ssoWsuAlAsLahRTaGuhslHpLusVLpuIAspEGVssAapLGhMhospsasLVWGIlRshLPGQAlVsshQpRLDt.ssDtsRhtsF.phlpsVYplLGLNs+GQSlp....hsposptps.uputGRGppu.csp..........pppppGRppspssptQpsp.ps.ssQp..Qppsssps.ptp.GGYsLRPps.QPQRYGuGpGpt.Ns.......pPhRpsspspspppps.s...sRG.sQScssussttpGGRtspsRNppuussssop ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................AIDGVFPlTTPDLRCRIINAlLGGNLGLSLTPuDClTWDSAVuTLFlRTHGpaPhHQLGsVlpGIsNQEGVATAYTLGMMLSGQNYsLVSGIIRGaLPGQAVVTAhQQRLDQElDDQARAETFIpHLNAVYEIL.GLNARGQSIR.....ASVT.sQPRP.SRG.RGRGQstsRPSp.........GPAsSGRGRQRPAs.GQ.-RGSNsQNQs..QuNsuQ................................................................................................uGh................................. 0 0 1 1 +2650 PF00337 Gal-bind_lectin Galactoside-binding lectin Finn RD, Griffiths-Jones SR anon Prosite Domain This family contains galactoside binding lectins.\ \ The family also includes enzymes such as human eosinophil lysophospholipase (Swiss:Q05315, EC:3.1.1.5). 22.10 22.10 22.20 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.79 0.71 -4.81 80 2093 2012-10-02 19:29:29 2003-04-07 12:59:11 17 35 209 254 1005 1892 5 132.10 26 57.70 CHANGED shhtsl.sssh.ps...GpplplpGhst..................sss...................spFplNltss............psslslHhNsRasp........sslVpNo..hhsus..Wu..p.EER...........................................................tshPFppGp..Fplplhsp.pcpaplhlNstphhpFtaR..hs...pplst.lplpG..Dlp.lpslph ............................................................l..su.l.ts...GpplhlpG.ps...................ss..............................................pp.Fplsltss..............ts-lAhHhNP.RFsp..................................stlVpNo...hhssp....WG..p..EER................................................................t.t.t.h.PF..p....p.Gps...F........c.......lp........lhs..p..pp.pap......l...hV.sG...pph...h...patHR.......hs.........pplstl.p....lpG...Dlp.lppl..h................................................................... 0 188 321 655 +2651 PF03902 Gal4_dimer Gal4-like dimerisation domain Finn RD anon DOMO:DM03802; Family \N 21.10 21.10 21.30 21.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.62 0.72 -4.03 4 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 49 6 40 99 0 49.00 39 7.09 CHANGED TRAHLTEVEsRlupLEQlL+.lFPchDlDplLpp+cp.pl+tlLp.htsosslst.u TRAHLTcVEsRLppLEplhtpLFP.s....t....-lDt....lL....p.....p.......................................... 1 6 23 36 +2652 PF01762 Galactosyl_T Galactosyltransferase Bashton M, Bateman A anon Pfam-B_885 (release 4.2) Family This family includes the galactosyltransferases UDP-galactose:2-acetamido-2-deoxy-D-glucose3beta-galactosyltransferas e Swiss:O43825 [1] and UDP-Gal:beta-GlcNAc beta 1,3-galactosyltranferase Swiss:O54904 [2]. Specific galactosyltransferases transfer galactose to GlcNAc terminal chains in the synthesis of the lacto-series oligosaccharides types 1 and 2 [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.29 0.71 -4.71 14 2182 2012-10-03 05:28:31 2003-04-07 12:59:11 16 34 186 0 1505 2219 31 177.90 24 47.89 CHANGED sRRsslRpTWhspp...h......sclptlFLlGhss.t-t+hcchlhpEu+.hYGDllhs-hpDsYpcLsaKoLshhhausopsPpschIsKlD-DVhh.sspLhshLtpt...hssp.psthYGhlh.psGhshtpcps+.............Wahs.psasssp..............assYsuG.hYllotcAsptlhpsoc+p.pahplEDshl.GlhApchslshhsh. .......................................................................................pRpslRpTWhttt....t.....................................htl...h..h.hFl..............l....G....t..s....s..............................s............t............p............h..p...p..................l..p....t.Esp.ta.s.D.l..l.....h..s....a...h.D..s.Y.tN.L...o.h...Ks....lh............h...h.............pas....s....p.......h..............s..........s............t.........s..c....a.lhK.s............DDDs.al.sh.spl...lp...hLtph................................p...p...p...........h..........h.....h......u.......th.....h.......p........t....t......s..........h..p..p........t...s.K..............................a.a..h..s...p..a.......s.h..............................................Y.P.sa.s.s.G.............s................uYl.hStsls.p.h.l.....h....t.....s..s...........t.....p........h.....p.h...........h...........h........-D.Vhh.Ghhhtt.h.t.l......t...................................................................... 0 563 798 1152 +2653 PF02709 Glyco_transf_7C Galactosyl_T_2; Glyco_transf_2C; N-terminal domain of galactosyltransferase Bashton M, Bateman A anon Pfam-B_834 (release 5.5) Family This is the N-terminal domain of a family of galactosyltransferases from a wide range of Metazoa with three related galactosyltransferases activities, all three of which are possessed by one sequence in some cases. EC:2.4.1.90, N-acetyllactosamine synthase; EC:2.4.1.38, Beta-N-acetylglucosaminyl-glycopeptide beta-1,4- galactosyltransferase; and EC:2.4.1.22 Lactose synthase. Note that N-acetyllactosamine synthase is a component of Lactose synthase along with alpha-lactalbumin, in the absence of alpha-lactalbumin EC:2.4.1.90 is the catalysed reaction. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.64 0.72 -4.51 98 1328 2012-10-03 05:28:31 2003-04-07 12:59:11 9 46 442 59 697 3197 601 76.40 28 20.10 CHANGED pph..ssshsphphph.a.pphhGGlhuhppppahplNGas..NpaaGWGuE.DDDhhtRlhtpsh..p...............lpR....shshst.......a+hh....H.......p ...........................................h.h..tt..h.h.GGlhuls+pp.F..h..pl....N.Gas...spa.h...G.W.G.u.E.Ds-h.h.t...Rl.hh..s.G.h..p..........................lpR.....p.h.phst..........a+hh...H.tp.......................................................... 0 242 317 501 +2654 PF01296 Galanin Galanin Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.90 21.20 20.30 20.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -7.15 0.72 -4.11 4 94 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 41 0 35 88 0 24.10 73 25.87 CHANGED GWTLNSAGYLLGPHAlDsHRSFsDKHGLA ..GWTLNSAGYLLGP+AlDsHRSh..tDK.Ghs...... 0 2 5 10 +2655 PF02052 Gallidermin Gallidermin Mian N, Bateman A anon IPR001049 Family \N 20.20 20.20 20.20 20.60 19.00 20.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.53 0.72 -3.84 3 129 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 89 1 5 51 1 45.90 56 90.42 CHANGED MEAVKEKNDLFsLDVKVNAKESNDSGAEPRIASKFLCTPGCAKTGSFNSYCC .........h-........clhDLDVpVKus..s.sNDSuuDp..R..ITS+SLCTPG.Ct..K..T..GSFNSaCC............ 0 1 2 2 +2656 PF02744 GalP_UDP_tr_C GalP_UDP_trans_C; Galactose-1-phosphate uridyl transferase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports fold duplication with N-terminal domain. Both involved in Zn and Fe binding. 29.20 29.20 29.30 29.40 29.10 28.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.15 0.71 -4.71 9 2558 2012-10-01 23:45:21 2003-04-07 12:59:11 12 9 2265 12 565 1810 220 176.00 32 43.44 CHANGED E.........cpa.cYhttp........GShLhhDYsp.......hEltct-RlVh.s-p.alsVVPaWAtWPapsLlLs.+c+lhpls-Lsst..........pppDLAu........hl+plsp+YDNLFE.........TSFPYSMGlHtushsspts...tth.hHsHaYPPLLRSATVRKFhV.GaElLupsQRDlssEQAAt+L+..........sLs.phHhchsh ...........................................................................th..c...tYFttp............GS.LhtDahQ...............................hE.hA....s....t..pRhlht.s.-...ahsVs.shhs.pWPhpslhLs..pp.c...hpcls-Ls-p.....................ppsslst................slp.lsp.+.h.D..s.h.F.p...............................sShsYshGh............................................................hH..sH..h..s...h.l.+.pt.s.lt..hh.V.G.h.t.h.L.s.tp....+.-.htE.us.hLp..................tls..thH.t...t.............................................................. 0 171 324 461 +2657 PF01087 GalP_UDP_transf Galactose-1-phosphate uridyl transferase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports fold duplication with C-terminal domain. Both involved in Zn and Fe binding. 22.10 22.10 22.10 22.30 22.00 21.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.35 0.71 -4.23 135 2821 2012-10-01 23:45:21 2003-04-07 12:59:11 17 13 2385 22 659 2041 370 183.20 28 46.17 CHANGED s.ths.p-p.......................................sphRhssLhuphll..............suscp.................tcR..satspppth.tsshsptD.......hshLsPssscl....ssp.hsspY..psshshsNcFhuLpp.csshhpp..........scs..................hapspsshG.phcVhh.tsc.p...............p-hcslsssh.tphtp.........p......................th.phl.saENcGhthus......o..HP.............................Hu.lasppalP ................................................................................s..h...Dp...sp.RhtsLh.Gphll...............................susct..........................tc+........sht.s...tp...tt....pp.h.s.sp-................shls........Ps......s.....s..........p.....l..............stp.....h..................ssY...........psshhhs...s......c............FhsLtpcs.s.ht.p....................................sps................................haps.pss.hG..phclhh.psc.p................pchcplsts...phsc.....h..p.....................................h.thl.saENcGhthG....s....o.......HP..................................Hu.lhsspalP.......................................... 0 214 398 548 +2658 PF00304 Gamma-thionin Gamma-thionin family Finn RD anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.49 0.72 -3.95 50 545 2012-10-01 23:31:40 2003-04-07 12:59:11 15 2 132 17 121 592 0 46.80 35 59.18 CHANGED +sCcstSppF.+GsChssss..CspsCpp..Esas...uGcCct...hpRR.ChCo+.C ................hCcs..Stpa...cG..s.C...h...s...sss..CsshCps........E...sas...uGpCph......pp+.CaCpt.C............ 0 27 58 87 +2659 PF04410 Gar1 Gar1/Naf1 RNA binding region Kerrison ND anon DOMO:DM04007; Family Gar1 is a small nucleolar RNP that is required for pre-mRNA processing and pseudouridylation [1]. It is co-immunoprecipitated with the H/ACA families of snoRNAs. This family represents the conserved central region of Gar1. This region is necessary and sufficient for normal cell growth, and specifically binds two snoRNAs snR10 and snR30. This region is also necessary for nucleolar targeting, and it is thought that the protein is co-transported to the nucleolus as part of a nucleoprotein complex [2]. In humans, Gar1 is also component of telomerase in vivo [3]. Naf1 is an essentail protein that plays a role in ribosome biogenesis, modification of spliceosomal small nuclear RNAs and telomere synthesis, and is homologous to Gar1 [4]. 29.30 29.30 29.50 29.50 29.10 29.00 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.62 0.71 -4.77 57 687 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 360 15 496 691 25 146.10 25 42.52 CHANGED sp.hhcthhst...shshs..ssspl..lGph.pss-sp....lllcusss.......pV.phsuhlh.hEs+s.l.GplsElhGslpsshasV+.......sppthpst..shphGsplahsss....hph.hhspshtt....+Gscuusttccchs..tttt.pauccctptth+pttptp+ ...................................................................ttt....tth...h....t.h.p.s..sstp..l..h.lGph.p......hs.-.s.p....llscuhss..........pl..hsusla..h-...s.+.p...l.G+VsElhGslppsaaol+..................s-slpsp.....uh..p..hGsp.hYhssp....................hph.hhsp.hht.....................+Gsctut.ttstpss........tth.t.aus.sttt.tthtttht...t.................................................................................................. 0 175 287 416 +2660 PF01071 GARS_A GARS; Phosphoribosylglycinamide synthetase, ATP-grasp (A) domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_916 (release 3.0) Domain Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the ATP-grasp domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF02786). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.14 0.71 -4.74 19 4769 2012-10-10 13:17:03 2003-04-07 12:59:11 14 45 4503 19 1247 7505 5005 188.90 49 41.23 CHANGED uSKsFsKcFhp+asIPTA.pYcsFo..c.sccApualcctshss.....VVKAsGLAAGKGVlVs....psp-EAhpAlc-lhtpttFG.pAGcpVVlEEhL-G-ElSlhshsDGcolhshssAQDHKRlh-GDp...............................GsNTGGMGAYu....PsPhlopclhcplhcpIlpPTlcuhtcEGhsapGlLYuGlMlo.csG......PKVlEFNsRFGDPET ....................................................GSKsFuK-hM.t.+as..IP...TA...p..............Y....p..s..F...s........-......hc...p...A...h.....s..Y...l.c...p...t...G.sPl............VlK...A...D..G.L..A.A..G..K...........G...........V...l....VA...................o...h....-...E......A....t....p....A...l......c...c...h....h.......t.......s.......p.......t.......F.........G..........s.....A....G...p.........c.....V....V.I.EE.F.L....-...G...-..E..hS..h.hs..h..s...............D.........G........c.....p...h.l....P.h..s.s..u.....Q....DH.K..R..l..hD...GD.p.......................................................G.P..N.T.....G.G.M.GA.Y.o....Ps.P.h.l........o....p...........p..........l...........h.p..c.sh.cpIlpPTlcG.Mt.p..E.....G..p..sap.G.l.L.YAGLMls..psG..........PKVlEFNsRFGDPET................................................................................................................ 0 403 790 1057 +2662 PF02843 GARS_C Phosphoribosylglycinamide synthetase, C domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_916 (release 3.0) Domain Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the C-terminal domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF02787). 21.80 21.80 22.80 21.90 21.40 20.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.90 0.72 -3.81 94 4668 2009-01-15 18:05:59 2003-04-07 12:59:11 11 41 4494 19 1219 3584 1998 93.10 39 20.27 CHANGED sulsVVlAutGY......Pts..p+.Gh.lsh....t....ttpsshlFaAGo...................p....................tp..ss...............pllosGGRVLslsuhusolpcApppuYpslppl..phs.shaYRpDIGt...+sh ......................................................ulsVVlAutGY......P..s....s....Yc....K...Gs.IsG......................ttstsstlFHAGT....................................p........................................hp...ss.................pl.lT..sG..GRVLslsu..hG.colp-AppcAYp.tl.s.p..I.pa......c....u......haaRpDIGh+A.h........ 0 384 769 1030 +2663 PF02844 GARS_N Phosphoribosylglycinamide synthetase, N domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_916 (release 3.0) Domain Phosphoribosylglycinamide synthetase catalyses the second step in the de novo biosynthesis of purine. The reaction catalysed by Phosphoribosylglycinamide synthetase is the ATP- dependent addition of 5-phosphoribosylamine to glycine to form 5'phosphoribosylglycinamide. This domain is related to the N-terminal domain of biotin carboxylase/carbamoyl phosphate synthetase (see Pfam:PF00289). 24.30 24.30 24.30 28.30 24.00 24.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.21 0.72 -3.54 135 4637 2009-01-15 18:05:59 2003-04-07 12:59:11 10 33 4461 19 1204 3558 2108 100.50 48 21.94 CHANGED M+lLllGuGGREHAlAhpL.tpSsplsclasAPG..NsGh...uph..ups....hsl........shsDhssls..paApcpslDLslVGPEsPLssGlsDthpp....tG......ls..lFGPoptAApLE .......................................M+lLVlGuG.GREHALAaKl....spS...s...p........V.....c...p.lalAPG....Nu.Gh.......ut....u..p.s.........lsI.........shsDhsuLl.......sFAp..cp..pl-LslVGPEsPLl..t..GlVDsF..cs......s..G.....lp..lFGPopsAApLE................ 0 383 758 1019 +2664 PF03198 Glyco_hydro_72 GAS1; Glucanosyltransferase Mifsud W anon Pfam-B_2209 (release 6.5) Family This is a family of glycosylphosphatidylinositol-anchored beta(1-3)glucanosyltransferases. The active site residues in the Aspergillus fumigatus example Swiss:B0XT72 are the two glutamate residues at 160 and 261 [3]. 25.90 25.90 26.20 26.30 25.50 25.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.10 0.70 -5.44 12 708 2012-10-03 05:44:19 2003-04-07 12:59:11 9 14 163 3 511 679 10 297.50 41 61.20 CHANGED ssstutsss.sIpIhGN+FF.ops.......uppFaI+GVsYQPssSt...........SshsDPLADsc..sCpRDl.hhppLGlNTlRVYulssotsHDcCMphLpsAGIYlILDLssP..spSIsRtcP..oaNsshhpphhssIDsFpsYsNlLGFFAGNEVsNstssos..ussaVKAslRDhKpYIps+saRpIPVGYSAsDssssRlphAcYFsCGs.s...............s..+uDFaGhNhYEWC.Gt.SoapoSGYcs+sc-FcsasIPlFhSEaGCN....cVp.......sRsFsEVsslYSspMosVaSGGlVYpY.pEsNsYGLVplsssssp.hhsDFpsLKpphtplu ..........................................................ss.....sslssIplp..Gs..cFa..ps................GppFal+G...lsYQ.ssu...............................sshhDPL..uD..s..p..sCpRDl..shhpcLG..sNsIRVYslDsst..sH-cCMph.Lsc...AG...IYllhDlssP.........stSI.sR............s...s...P..................sass.shhpphhsllDtFupYsNsLG......FFuGNE....V..hNst.ss....os.......uusa...V.KAssRDhKpYIp....s..+..s.....h..Rp.IPVG.YSu......sD....s..........s.........p..h.R...phup.Y........hsCusss..................p...psDFaGhN..s.YpW..C...G.s....S..o...a....p.....s..S......GYsphscpF..p..s..a..s..lPlFFSEYGC....N.......p.sp.............PR.FsEltA.lY.....u.....s.....p.MosVa....SGGlVYpYh...pE...s....N...s....Y.....GLVp...l...s.s...s...s..s.h.....hhtDaptLppphtp..t......................................................... 0 101 260 432 +2665 PF02187 GAS2 Growth-Arrest-Specific Protein 2 Domain SMART anon Alignment kindly provided by SMART Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.59 0.72 -4.21 4 597 2009-01-15 18:05:59 2003-04-07 12:59:11 12 116 140 1 314 529 5 69.70 52 2.76 CHANGED cpl-DtV+Rlltp..CpCsp+FpV.pluEGKYRhGDSphLhlVRlLRSpVMVRVGGGW.sL-caL.KpDPCRspp ................................plc-tVp+....ltp....C........pCsp....+.Fp....l.ppl..u..-..sK............YR..............h.........G...D.o..p..tLhll..Rl..L.R..o..p.VMVRVGGGW.sL-cFL.s.K...pDPCRsp.t...................... 0 92 125 199 +2666 PF00741 Gas_vesicle Gas vesicle protein Bateman A anon Pfam-B_545 (release 2.1) Family \N 25.00 25.00 26.40 30.10 23.30 22.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.58 0.72 -4.08 52 505 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 154 0 197 506 18 38.70 48 35.03 CHANGED uLsDlLDRlLDKGlVIsuDlplSlusl-LlslclRlllu ..uLsDlLDRlLDKGlVIsu.lplSlssIELLsIclRlllA. 0 71 157 191 +2667 PF01304 Gas_vesicle_C Gas vesicles protein GVPc repeated domain Finn RD, Bateman A anon Prosite Repeat \N 20.50 20.50 21.90 21.90 19.90 18.20 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.90 0.72 -7.35 0.72 -4.32 19 227 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 48 0 5 187 0 32.40 49 53.18 CHANGED hppLpppTpcFLosTsppRhAQAcpQAppLppF ....ppLQppoppFLosTAptRhAQAccQApELhpF. 0 0 5 5 +2668 PF02704 GASA Gibberellin regulated protein Bashton M, Bateman A anon Pfam-B_1221 (release 5.5) Family This is the GASA gibberellin regulated cysteine rich protein family. The expression of these proteins is up-regulated by the plant hormone gibberellin, most of these proteins have some role in plant development. There are 12 cysteine residues conserved within the alignment giving the potential for these proteins to posses 6 disulphide bonds. 21.60 21.60 23.60 23.30 19.70 18.90 hmmbuild --amino -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -9.96 0.72 -4.00 41 346 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 59 0 171 350 0 59.70 52 54.40 CHANGED pCsspCstRCSposh.+chChphCspCCt+Ch.CVPPGTYGNKphC.PCYsshKT+pG...tsKCP .........pCsupCshRCStsuh...pc...hChchCspCCt+Ct.C...VP.PGTa.G.NK......c..tC.PCYsshpT.+pG...psKCP......... 0 15 111 144 +2669 PF00918 Gastrin Gastrin/cholecystokinin family Bateman A anon Pfam-B_1542 (release 3.0) Family \N 22.20 22.20 22.20 23.80 22.10 22.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.78 0.71 -3.28 11 170 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 67 0 55 185 0 103.10 35 96.97 CHANGED tsslClslllsVLusushscssshsc......sthpp............ssptthpshpptp........csctshuulhp+hLtttphst.huchushps.htpp-stcthtD+DYhGWMDFGRRSuEEh-hsu ......................tlslslLlssLu.h.u.shupss..sp........thtp.......................t..s.tt.psht..t.....................csctshuuhLsRhlpt.ttpst.su.cho.hhps.h.p.-..sat.tp.pDYhGWMDFGRRSAEE.-................. 0 3 7 20 +2670 PF00310 GATase_2 Glutamine amidotransferases class-II Finn RD, Bateman A anon Prosite & Pfam-B_5381 (Release 7.5) & Pfam-B_455 (release 7.6) Domain \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.22 0.70 -5.75 57 7459 2012-10-03 21:14:07 2003-04-07 12:59:11 16 48 3746 79 2152 13504 10212 226.90 25 23.73 CHANGED CGl.Ghlsphpspso+.pllppulpuLpplpHRGuhss-.psGDGAGIhh..phPcphh.........th.h.......usu.hah...............t.hpt.h.t.s..lhuaR..lPht.t.hu..s...tt.P.h.Q.hlt..................................thc.pLahhR+.phpp......phttpt..........hYhsShs..sllYKG.hhs..l..a.a.DLps.phposhulsHpRaSTNThPsW.hApPhR.........hluHNGEINTlpGNpsahp..u+pthh..po..h.......t.h.Ph...t.............................................................................................................................sSDSsshDss.lEhhhtsG.hp...shhhhlP.sh..tp.ths....phpsFaca..hsslhEsWD..GPsulsh....o...D....G+...h..luAslDRNGL.R...PuRasl.T..............cDs...h.llhuSEs ......................................................................................................................................CGh.uhl................pp...plh.p.hh.phL.phpaRGh..........DusGlhh.....stt.h.................................................................................................................................................................................................................................................................................................................................................................................................h...a....th.t.........t..h.....u.t.....h...sl.hH...Ra.sT.s...s.............s...p..h......A.pPhh.................................................hlsH.N..G.......pls.s..hp.....t...hp.h...h....t..p.t.h...h...ts...........................................................................................................................................................................................t.S..D.o.t.............h......p.h.h.t.h...h...th............................................................h............t..p.sh..aph.....t......h...h..p.....h..c.......G...sh...s.h.s.h......................Gp............h..h..s.....h.D...p..Gl...RPhhhhhp..............tpt.......hhhuSE.................................................................................................................................................................................................................................................................................................................................. 1 614 1255 1761 +2671 PF04572 Gb3_synth Alpha 1,4-glycosyltransferase conserved region Waterfield DI, Finn RD anon Pfam-B_4980 (release 7.5) Family The glycosphingolipids (GSL) form part of eukaryotic cell membranes. They consist of a hydrophilic carbohydrate moiety linked to a hydrophobic ceramide tail embedded within the lipid bilayer of the membrane. Lactosylceramide, Gal1,4Glc1Cer (LacCer), is the common synthetic precursor to the majority of GSL found in vertebrates. Alpha 1.4-glycosyltransferases utilise UDP donors and transfer the sugar to a beta-linked acceptor. This region appears to be confined to higher eukaryotes. No function has been yet assigned to this region [1]. 20.40 20.40 20.70 20.50 19.10 20.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.88 0.71 -4.47 17 301 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 96 0 207 294 11 127.30 29 35.50 CHANGED hsF-sp...HplsttClc-hspsaNGspWGpNGPtllTRVl.+hhCpspshtthpp...pCht.holhsPsAFYslsa.pacpaFcs.ptcpshphlp........pShslHLWNKho+..shhlc.GSpshhthLhppaCPpsapustphh ...............................................hFp.p...H.hhthshc-FsppY...sup.hWGp...pGPtLlTRVh.+phCshp..sh.t..t...........tChs...hshlsspsFYPlsap.cacc.aF.cs.psp..p..t.h....p..h...hp......................so.YulHlWN..+hop..tht.hch.s.Sp.lhtpLhppaCPtsht.h....h..................... 0 63 97 152 +2672 PF02263 GBP Guanylate-binding protein, N-terminal domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4308 (release 5.2) & Pfam-B_9065 (release 8.0) Domain Transcription of the anti-viral guanylate-binding protein (GBP) is induced by interferon-gamma during macrophage induction. This family contains GBP1 and GPB2, both GTPases capable of binding GTP, GDP and GMP. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.66 0.70 -5.33 13 1135 2012-10-05 12:31:08 2003-04-07 12:59:11 14 37 152 19 688 1289 39 218.70 30 40.25 CHANGED cppLtlNpEALcIL....psIoQPVVVVAIVGhYRTGKSYLMN+LAG...........................pppGFSLGuTVpS-TKGIWMWClP....HPspPchTLVLLDTEGLtDhEKuDs+sDuWIFALAlLLSSTFVYNShsTIsppAL-QL+hVTELTchI+u+ss...t.sclcsussFVuFFPsFlWTVRDFoLpLEh-GpslTsDEYLEpuL+LppGpstclQs.N.sRcCIRpFFPc+KCFlF-+PutctpL.splEplp---L-s-FppQlppFsSYIao.cuhsKTL ..............................................ph.ls..p.A..lphl.............ph..h..s..p.s...l...s.VV......ulsGhaRpGKSaLhs.h.h..h..t................................................................p..G.F..sh..t.s.......ss....p...s..p..Tp.G.Ih....h.W..s.s...........................ps.p...t.....p...h.s.......llLlDT..pGhhs....p.....p....u.s...........p....s.............s....s..hl..FAL.u...s..h..lSS..h...l.YN........t.s....I...pp.p..s....lp.p.L...........p..h............ho-.........................................................st..h..t......p...t............h.............p............F..........s.hhahl.RDa..s.h....h..c.h..s.h.t...............s..tp.....p.a.L......-p..t...L....p...h...................t.....p..p.......p....h..p...p.........R.tp.......l..pphF..s.phpCFh..hspPs.ph.th.........ph..ct......t..pplp.pFhp...pht.h...h.hlht..t........................................................................................................................................................................ 1 260 347 485 +2673 PF02841 GBP_C Guanylate-binding protein, C-terminal domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4308 (release 5.2) Domain Transcription of the anti-viral guanylate-binding protein (GBP) is induced by interferon-gamma during macrophage induction. This family contains GBP1 and GPB2, both GTPases capable of binding GTP, GDP and GMP. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.72 0.70 -5.23 23 678 2009-01-15 18:05:59 2003-04-07 12:59:11 9 20 96 9 366 663 7 220.50 29 37.66 CHANGED GGIhVoGs+LusLVpTYVsAIsSGslPClENAVlsLAQhENoAAVQKAlsHYpppMuQ+l.phPT-TLQELL-lHpssE+EAIpVFMcpSFKD..csQcFQKcLhspLcsK+c-FhcpNp-ASscpCpslLpcL.psL-cslppGsFStPGGapLalcc+cclcpcYpps...PcKGl+AcEVLQpF..LpS.......................+c.slpcoILQoDpuLTcpEKpIpsE+s+sEuAEtcpchLcpcpccppphMEsp-+SapEpl+QLpEKMEpERc.plhpEpc+hlppKLpEQcchLpEGFcpcucpLppEIpcLp ..................................GhpVsGp.tL.h.plhpsYlcslsu.G.s.l.Pshcssl.ulAphcNpuAVpcAhshYpppMpphh......p........hPh.p.......s....p.-.L.chH.pts...ccc.AlplFh...c...p...u.h..p..D.......s...p...p.......a..p.c.p..Ltpp.l.ccph..p..........s..F..............hp.pNpptu......thCpt...ht.........h.....h...............................h...................h.................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 107 154 219 +2674 PF02425 GBP_PSP Paralytic/GBP/PSP peptide Bateman A anon Pfam-B_2690 (release 5.4) Family This family includes insect peptides that are short (23 amino acids) and contain 1 disulphide bridge. The family includes growth-blocking peptide (GBP) of Pseudaletia separata and the paralytic peptides from Manduca sexta, Heliothis virescens, and Spodoptera exigua [1] as well as plasmatocyte-spreading peptide (PSP1) [2]. These peptides function to halt metamorphosis from larvae to pupae. 25.00 25.00 31.00 42.20 20.00 17.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.43 0.72 -6.85 0.72 -4.42 3 25 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 15 11 3 34 0 22.90 81 27.38 CHANGED ENFAGGCsPGYhRTADGRCKPTF .ENFAGGCssGYhRTADGRCKPTF. 0 2 3 3 +2675 PF02526 GBP_repeat Glycophorin-binding protein Bashton M, Bateman A anon Pfam-B_1047 (release 5.4) Repeat This family contains glycophorin binding proteins from P. falciparum the malarial parasite [2]. Glycophorin is a cell surface protein of erythrocytes. The Glycophorin binding protein contains a tandem 38 residue repeat. In Swiss:P02895 the repeat occurs 11 times. 25.00 25.00 102.50 43.00 18.50 16.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.67 0.72 -4.56 9 59 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 4 0 24 67 0 37.70 83 63.70 CHANGED TSADPEGQIM+tYAADPEYRKHlpVhYpILTNTDPNDE .TSADPEGQIM+tYAADPEYRKHLpVhYpILTNTDPNDE. 0 24 24 24 +2676 PF04551 GcpE GcpE protein Bateman A anon Pfam-B_1482 (release 7.5) Family In a variety of organisms, including plants and several eubacteria, isoprenoids are synthesised by the mevalonate-independent 2-C-methyl-D-erythritol 4-phosphate (MEP) pathway. Although different enzymes of this pathway have been described, the terminal biosynthetic steps of the MEP pathway have not been fully elucidated. GcpE gene of Escherichia coli is involved in this pathway [2]. 32.00 32.00 36.10 36.80 30.10 30.70 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.12 0.70 -5.65 45 3640 2009-09-10 23:01:59 2003-04-07 12:59:11 9 7 3355 8 908 2655 3339 344.20 45 91.10 CHANGED TRpVpV..Gs......VtIGGssPIsVQSMTs...TcTpDlcuTl....pQIpcLtcAGC-lVRluVsshcsAcAlppI+cp.....hs.lPLVADIHFs.a+lAltA...h-tul-KlRINPGNl.......................up.c-+hctlVctAK-pslsIRIGVNuGSL....pcclhp+Y.......tsTsc...uMVESALcalclhEchsF..pclllSlKuSclhhhlp.AYRhlupcs-.......YPLHLGVTEAGshhsGplKSulGlGsLLt-GIGDTIRVSLT...s-Psc....El............................................................................................................................................................................................................................................................................................................................................................................................................pluhcILpulsL.Rptus........clISCPoCGRs.thDLhplspclccplp..plc..........sl+lAVMGClVNGPGEuc-ADlGlu.uGpGp...uhlac+Gchl++s.sp-phl-cLlc.Iccht .............................................................................................................................................pplhVGs......VslG.u......s......uPlsVQSMTN.......Tc.TpD.lp...ATl....tQIp............pL.pcuGs-....IVRluVPs.-uAcAlttI+pp.....ss.lPLVADIHFs.h+.hALts........s-hGlcplRINPGNI....................................................sp.--+lctllcsA+-pshPIRIGVNuGSL............-.+.clh.p.KY......................stsTs-........AllESA...hcpsc.lh-clsF..cphhlSlKuSDV...hhhlp..uY.RhL.A.p.p.hD......................................aP.L....HL.Gl.T.E.A.G.ssp......p.G.......sl.KSAlulGhLLt-GIGD.T.lRV.....S...Lo.......u.-.P....s.E....E.l.............................................................................................................................................................................................................................................................................................................................................................+....V...uhcILcol......sl.....Rs.p.u...l..........ph..lu.CPoCGRp.ph.D.l.hphspplcptLc...clh..........tshcVulhGCVVNGPGEAcpADlGls.uGssc.....uhlahcGchhcpl.s.psphl-pL.t.lct..th........................................................................................................................................................... 0 329 616 783 +2677 PF02155 GCR Glucocorticoid receptor Mian N, Bateman A anon IPR001409 Family \N 25.00 25.00 29.60 25.50 18.70 18.30 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.21 0.70 -5.72 5 165 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 79 0 32 162 0 305.40 50 50.18 CHANGED VhDFasshRGGs...olpsSsSssPTSluSQSsS+QQP.......lsuDhsNGLsNNssQPDLSKAVSLSMGLYMGEoEsKVMGNDluF..PQQsQsulSoGETDFuLLEESIAsLN+Sooss-sllGuss......Pp-htsLKTc.DsSsps+sclcuQsGSNGsStlKLslsDQ.....SThDILQDLELPssSP.GucoN.sPW..DhLhDEs...uLLSPLu.sDDoFLhEGN.hsEDsKPlsLsDoss..KlsDsucpsLLosuslslPQVKTEKE...DFIELCTPGVIKQENsGslYCQuulSGuslhGsKsSAISIHGVSTSGGQMYHYDhNTuTl.SQhQQDQKPIFNlhPPLPSuSEuWNRCQGSsD-shAolGslNhsuRosFSNGY.SSPuhRPD .............................................................................shSss.s.husss..sssh.p........h.hDhspG.sss.s............................-LopA.lo.hohGlhht-s-s.KshspDhth...Q..Q...s..Q.....hul.os...G.......Epsh..p...L..LEESIAsLsp.....o..o...oss.-sshuuss......P.ch.....KTc...chS...-....ppp..h...csps.u.osGss.hcL.sssp...............sThD....I....LQDLEhsss.P....Gpcos..ssa...DhlhD-............sLL......SP...Lu.t-Dsh..Lh.-us..........sscDs.KP.llls.csps...............pl.pcsu............c...............h.L..s.............s.......s....s.sl.....shP....p.......VKsEK-...DFIcLCTPGVIKQEK..h...G.s.sYCQ......uuhsu..usl.u....s..+hS...uIS...lp.GVSTSGGQhYHYchNou.......Sh..uQ.Q..Q.DQKP.lFNlhPPlPssSEsW...NRsQGsG-ss...hs...shG..s..h.s........h.s...upos...FSsGa..SSPu.Rs-............................ 0 1 3 11 +2678 PF04107 GCS2 Glutamate-cysteine ligase family 2(GCS2) Kerrison ND, Finn RD anon COG2170 Family Also known as gamma-glutamylcysteine synthetase and gamma-ECS (EC:6.3.2.2). This enzyme catalyses the first and rate limiting step in de novo glutathione biosynthesis. Members of this family are found in archaea, bacteria and plants. May and Leaver [1] discuss the possible evolutionary origins of glutamate-cysteine ligase enzymes in different organisms and suggest that it evolved independently in different eukaryotes, from an ancestral bacterial enzyme. They also state that Arabidopsis thaliana gamma-glutamylcysteine synthetase is structurally unrelated to mammalian, yeast and Escherichia coli homologues. In plants, there are separate cytosolic and chloroplast forms of the enzyme. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.67 0.70 -5.30 12 2189 2012-10-02 17:21:26 2003-04-07 12:59:11 8 10 1592 13 681 1830 1695 275.90 24 68.31 CHANGED lGlE.Ehhls-.hsh.ht....pshh.ttshhshs...tsshhp...tstlEluss.PhsslpcshpphtpshptltphssphGlthhulGhpP.hphtph.lhsssRYpthhphhsp....s.upphhhtstplplslcsus-t......sthhphhhshhPlhhALuAsSPahpG+soGatSsRtplapphss...u.lPhshsc...acpahchhhcsshhtshp...............chahchRsss.h.s....hh-lthptssh.....lh.cthhch+hhsutst.chsshsshhhsshhcs..h.pstacAs+.hh .......................................................................................GlE.Eh.lh.s....shths.............ps..hh......t...t.s..h...thp...............ss..p.h...hp-hstu.lElu.os..s...t....p....s..lp....pstsplsthhphltpsAs..ph.....s.....lt..l.husGscP....h..t.p....h....p....c...p...t..l...hs.p.t.....RY.......pt..h.h....ch.h................sh................................h..u...........p..............ph.h.......h...............h.......up..pV.......pVsl.ssu..............s-sh.................h.hhcshpthhPh...........hl.....ALu..AsSP.ah.pGp..s....oG.a...tS.....tR.p.la.p.......p.hss..................sGs..h.P.h..h.....h.....s......t...........t....ac....t......hhchhhp.....s.s.hh.shc..............................................ch.ah-h.R..sss..thu..........shE..lcsh.c......os........sh...c..thh.h...thhpu......h.s..h..h.h.l.s....t..h.s.h.h........h.t.....................a.......hh.stapus+.h..................................................................................................................................................... 0 201 465 597 +2679 PF01597 GCV_H Glycine cleavage H-protein Bateman A anon Pfam-B_988 (release 4.1) Domain This is a family of glycine cleavage H-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. A lipoyl group is attached to a completely conserved lysine residue. The H protein shuttles the methylamine group of glycine from the P protein to the T protein. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.40 0.71 -4.43 14 4486 2012-10-02 20:27:15 2003-04-07 12:59:11 14 10 3523 30 1292 3350 2660 118.10 40 88.84 CHANGED LhYspcHE....Wl+scps.ssTVGITcaApcpLGDlVal-LPEsGsclpps-shuslESVKssS-lYuPloGpVlElNccLp-sPsllNccPY.tcGWlhKlKs...ss.c-hcp.LhsucpYtchlcc. .........................................................................h+YocpHE..........Wl......+..........h....-.....u.....-................s.....sslGIT...............-aA..Q.-....tL.GD..lVFV-.L.......P..-......l..Gs......p.l.st.....G.-s.h..usl..E.S.V.K.usS..D...l.YuPl..o.GcVlpV...N.-s...L.p.-sP.......c.l.l.Np....-.P....Y....sp......GW..lhKl..ch.....s-.......s....p....l....c...s....Lhs.sptYpthh...t.......................................... 0 431 806 1082 +2680 PF01571 GCV_T Aminomethyltransferase folate-binding domain Bashton M, Bateman A anon Pfam-B_933 (release 4.0) Domain This is a family of glycine cleavage T-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. The T-protein is an aminomethyl transferase. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.15 0.70 -11.08 0.70 -4.96 131 7911 2012-10-01 23:12:28 2003-04-07 12:59:11 16 47 3499 47 2456 6628 15559 195.20 23 43.65 CHANGED lhDlSp..huplplp......Gs-..At.....paLpplhssclstlt...sGpst.hshhLsp.pGtllsDhhlh+hscs.t............ahllssuustppshtalp......chhh.........plplpshosp...huhlultGP.....putpll...ppls....................sth.hhphtth.......h.....h........hlsRsuasGE..GaElhh.ss.............cp....stplactLht.s...thpssGhtAh-sLRlEtGhshhGp..........-h.s.pph..sPhEuuLt.ah .....................................................hhDlSp.huhlplp...........G..s...-.....ut.....pa.L....p.tl...........hs.s.-..l...........s.p........ls..................sG.p...s...h...ashh..h.....st..p.....Gt...lhsDh..h..lhch...s-s................................ahl...ls.s...u...us..........t.............p..p...s.....h.s...alp...........................................phh.t...............cVplp....sh.ss.............huh.l.u..l..t....GP...........................put.t..hl.........tplh.........................................pt..h..h.h..tht.h.............h.s...h................................hlhp.s..u...........a.s.G.....E.....G....a..Elhh...st....................................pp......u.tlhptLht.....s..............................................l..p..s..s.Ghts..h..c.sL.R.lE....t...G...hshhup....................-h...s.tph........sPhpssht.......................................................................................................... 0 718 1448 1998 +2681 PF04295 GD_AH_C D-galactarate dehydratase / Altronate hydrolase, C terminus Kerrison ND, Finn RD anon COG2721 Family Family members include the C termini of D-galactarate dehydratase (EC:4.2.1.42) which is thought to catalyse the reaction D-galactarate = 5-keto-4-deoxy-D-glucarate + H2O, [1] and altronate hydrolase (altronic acid hydratase, EC:4.2.1.7), which catalyses D-altronate = 2-keto-2-deoxygluconate + H2O [2]. As purified, both enzymes are catalytically inactive in the absence of added Fe2+, Mn2+, and beta-mercaptoethanol. Synergistic activation of altronate hydrolase activity is seen in the presence of both iron and manganese ions, suggesting that the enzyme may have two ion binding sites. Mn2+ appears to be part of the enzyme active centre, but the function of the single bound Fe2+ ion is unknown. The hydratase has no Fe-S core [3]. 19.40 19.40 19.40 20.40 18.90 19.30 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.23 0.70 -5.81 114 2289 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 1384 0 458 1521 2260 381.60 41 82.16 CHANGED p.tTFpGY+RsDGp.VGsRNhlhIlsoVsCsssluctlActhptp................................................slD..........Gllsl.sHshGCu.hs...p-hphhp+sLtGhuppPNhuG.VLllGLGCEssphsplhps.....hsttt....ctlc........hhslQ-s..tG.tcslppGlphlcchltp.AsptcRpssshScLslGlpCGGSDuaSGlTANPslGhsuDhLlptGGTslLoEsPElhGAEclLspRAtsp-Vuc+llchlpha.cpYht.ptsssh.ssNPSPGNKtGGLoTl.EKSLGustKuGos.slssVlcYuE...lp..p.pGLshhso...........PGtDssusTu.sAuGsslllFTTGRGoshGsh.sPslKluoNoshhc+hp-.hDlssGsllsGptol-chGcclachllcVASGct.TpuEth..GppEhs.aph.....Gssh ..................p.hsFpGYRR.ss.Gp.VGsRNhlhIlsoVtCssslschlsphh.cc.................................................h...th...slDGV..s.slsH.aGCu.ht......ssth.hsh+sLtshuppPNhGu.VlVl..GL....G.CEp..Qsptlhcs......................hschss..............cpsp.............hhshQcp...G.pstl-sulphhcch.hpt.hs...pppRcssshSELhlGhpCGG..SDuhSGlTANPslGhsSDhLlpsGu.TslhoEssEhhGAEplLtsRAh...sccltc+Llphhsh.......acp.Yht.....t.t......p.p.sh.....ssNPSPGNKtGGLoTl.EKSLGsht.Ku.Gpo.sls-VLphG....-....+.p........p....pG..Lhhhso............PusDh.Vs.sTs.sAuGsplhl.FTTGRGTPa.Gsh..........VPs.........lK.........luTNopLsp+h...t-hhD.lsAG....slhpGt.pol-pl....upclhchIlclAsG+p.TpuEph.......tap-.h.ulapsuss..................................... 0 130 277 372 +2683 PF01150 GDA1_CD39 GDA1_CD39_NTPase; GDA1/CD39 (nucleoside phosphatase) family Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 21.20 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.52 0.70 -5.99 9 1515 2012-10-02 23:34:14 2003-04-07 12:59:11 12 8 341 38 858 1436 26 351.80 24 78.66 CHANGED .htpp-sh+YullhDAGSoGoRlaVY+a.sp..........c.shhtlsttlcc....hphlsPGLSoaupKs..................cpsuphLpPLL-hAcphIPppppsp..........TPVhltATAGhRLLtt-...tpptlLcsLpshlpshssa.spp..ptlpIIsGppEGhYuWIslNYLLG+hscc..............pppTsGslDlGGASsQIsFs.p..pt...stplcsst..hhpphchhspcY...slYspSaLsYGhspuhpphltpLlpssss..............hpsPCh..Ghppshshsslp................hthpususappCppplhcllp....st.Cs.p..pCtFNGVasPs....tppphtsSshaYhs.shhshsuph.....hs.pchscts+phCs..psWsplpsshspst.p.ls.....phChcssa.holLhsGFshshp..........cphpsspcIpsp-....huWsLGshl.hssshsh.....................pppLtss .....................................................t.......pY.ulhhDAG.......So..GoRlalapa.t......................................................................p.h.....h........h..p......................ht.h.h..t..P.....G.......l.....S..s......asp.ps..........................pt.s.u.p.t.l.p.L.l.p.h.....Ahp...h.....l...P......t.p..h.h..tp..............TPl...hlh.AT.A.GhRl.Ls.p.......ptptll...pt......l.....p...p..........h.h...p.............t...sa..h..................ptspllsG.p.p.......E..Gh.auWlshNaLh.......G.phtp...............................................................tppThGh..lDhGGu...SsQ.....ls..F.sp...............................p..h..........tt.........................h......h.......p...h.phh..s....ppa.................plYspSaLsaGh....tAtpt....h..h..t..thhp.....t.t...t.........................................................................h.........p..PCh.....s.h.....p.....t...p.....h..ph.....t..t.............................................h..t.....u..t.u.....s..h..t..C..ht.h.....hhp.......................s.........................s........h....s....u..la............s.............pt..hh...h....p..aa..h...t.....h.....t....h..ht..........................phtphtttspphCt.......h.t.............h....t...................t................................................................hChphsahhslLh.p...G..aths.t...................................................lp.hhpplts..p.....hsWsLGh.h.l.hht............t................................................................................................................................ 0 240 423 643 +2684 PF02347 GDC-P Glycine cleavage system P-protein Bashton M, Bateman A anon Pfam-B_840 (release 5.2) Family This family consists of Glycine cleavage system P-proteins EC:1.4.4.2 from bacterial, mammalian and plant sources. The P protein is part of the glycine decarboxylase multienzyme complex EC:2.1.2.10 (GDC) also annotated as glycine cleavage system or glycine synthase. GDC consists of four proteins P, H, L and T [2]. The reaction catalysed by this protein is:- Glycine + lipoylprotein <=> S-aminomethyldihydrolipoylprotein + CO2 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.34 0.70 -5.79 13 6351 2012-10-02 18:26:03 2003-04-07 12:59:11 11 14 3103 20 1832 6081 5305 362.70 31 75.92 CHANGED cRHIGsspp-ppcMLpslGhsoL--LhtpsVPssI+hpcshphsts............hoEpEhltcLpslAu..+Npl.a+SaIGMGYYsshlPsVIhRNlLENsuWYTpYTPYQsEISQGRLEuLLNaQTMlsDLTGLslANASLLDEGTAAAEAMthshphsKpK.tp+FlVspssHPQTlsVl+TRAcshslcllhsshp-hshos..........s-VsGVllQYPsT-GclpD...as-llcpAHpptshlssAsDLLALolL+PPGEhGADIsVGSuQRFGVPhGYGGPHAuFFAsppc.....htRpMPGRllGVo+DusGKpAhRLALQTREQHIRRDKATSNICTAQALLANhAAhYuVYHGspGL+cIAcRlHstTshLApuLcc..sshplpcshaFDTLplpsss.puspp.lLc+Ahtpt..lNLRhs-ss.slululDETsTccDl-sLlplF .......................................................................................................................................................................................................................................................t........lPt.t.l..........t....p.....................h...s..ts......................................toEt-hlchl+pLus..............+.N...h..s.....p...u...h...I...shG..ps.h.p.h.sss..lpt.......h..l..p.Pta.....h..s..s..a...s..P......a.Q.P......E......h......u....Q..G...t.L.ch.lh..p.h.Q...p.h..............l.s-L.......T.Gh-....hs....s...s.S.L...s....p..u.s.....A...t..u.....E.......A....h.......h....l.......s.......h......R.....t.......t......+.......p......c.........p.s.....p.........h.....h..ls.ss...s.H...sps........h.s..........s.......h..........s...p.......A....p.h......h......G......h......c......V......l.....s.....s...s..h..p..p..p.s.s.hD....................................tss.l.u.ul.hlp.Y.P.u.T.p.G.h....h...E.....-.....lp.cl.sp.h.l.H.......p..t.G............u....h.l.h.l....u...............A.s....h.....A.l..s....h..Ls.s.P.....G...c.h..........G.......A...D.ls.ls...pps.F..u...lPhG.....h.GGPtsG.hus+sc........Ltthl....P.G..+.h.l....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 604 1157 1560 +2685 PF00996 GDI GDP dissociation inhibitor Bateman A anon Pfam-B_1220 (release 3.0) Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.44 0.70 -6.17 9 1094 2012-10-10 17:06:42 2003-04-07 12:59:11 13 17 362 23 674 1189 269 337.00 30 82.45 CHANGED MDEE.....YDVIVLGTGLpECILSG.LLSVsGKKVLHlDRNcYYGGESASlo.LppLYp+F+st..........................tppP.pphG+uRDWNVDLlPKFLMANGpLV+lLI+TcVTRYLEFKsV-GSYVa+.........pGKIaKVPus-hEALsSsLMGlFEKRRh+pFl.aVssYcEsDspTaculD.p+toMp-lacKFsLspsT.DFlGHAlALapsDsYLppPAh.TlpRIpLYspSluRYGKS..PYLYPhYGLGELPQGFARLSAIYGGTYMLs+PlD-llatcs.GKslGV+S.ts-lA+sKpVlsDPSYhP...cKl+psG.+VlRsICIhsHPIPN...TssupSsQII.lPQsQlGRKSDIYlshlSasHNVssKGhYlAhlSTslETssPthElcPuLcLLGPlpcKFsplp-laEPhs-GscsplFlSpSYDAToHFETsspDVhsIacchsGpsLshsppptpp ........................................................................................................................paDlllhGTGL..Eslluu....h...hS.hp...G..pcVLHhDpss.a.YGu.p.....u..oh.s....lp....p...l...h.p...hh..p............................................................................................................t......t.h..s.p..s....+caslD.L..h..P.Khlhup...G.tLlphLlp.opV.s+Y.l-Fp.lts....a.lh.p...................................................ts...p..l......p.VP..s..s..c..-sht.o..h......h....u............hhEKRp.h.......h+Fh.a...l..................p....a............p.....t.......p.........p..s.........p.............p.......h..........p.....s........h.....p......t.........p......s....h....pp.h........h..p.p..a.......t...L.........t...t......sh........p.hh.h.a.ulAL..h....s...s.p...........h.p..t........s.......h................p......s...lp.c.hp.ha.....p.Slu+a......G..t.o.....P..alaPhY.G.h.G.ElPQuF.s.RhsA..la.G.GsYhLs...p...s...l...p...p...l..................h...........p..........p.....s....c.........h....h.u.......l...p.....s.......tt.......p...........h........h....p..s.....c.ll.....s......s.s..o....ah......................pp...s...p............h....s.....p.l..R....s.lhlh.p.p.sl...t..............st.p....s..t.....s...h....t...ll......hP.............t.....p..h......s..p...............p..t...................h.h.l..h.hu.sp.ss.spG.......hh.lhh.h.os........p...s.....t...p..........t.-.lt...s....h...t................................................................................................................................................t....................................................................................................................................................................... 0 223 348 528 +2686 PF02351 GDNF GDNF/GAS1 domain Bateman A anon Pfam-B_889 (release 5.2) Domain This cysteine rich domain is found in multiple copies in GNDF and GAS1 proteins. GDNF and neurturin (NTN) receptors are potent survival factors for sympathetic, sensory and central nervous system neurons [2]. GDNF and neurturin promote neuronal survival by signaling through similar multicomponent receptors that consist of a common receptor tyrosine kinase and a member of a GPI-linked family of receptors that determines ligand specificity [3]. 21.50 21.50 21.70 21.50 21.00 21.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.16 0.72 -11.45 0.72 -3.47 64 955 2009-09-11 15:54:25 2003-04-07 12:59:11 11 8 83 11 503 812 0 84.70 24 52.80 CHANGED ClpAt.ctCpt-.pCpsthpt......ahppCts.........................tsstpCs...................scChpuhpsh.hsslhs................CpCpts.ppp.ppCtphtpphh.tssC .............................CLcst.ctCpt-p..hC.p.p..thpt......ahppCts................................t.ssppCst.....................pcChpA...hpsh..h...ssshs....t...........CsCpts.........p.ptCtphhpphh..s.C............................... 0 47 105 259 +2687 PF02212 GED Dynamin GTPase effector domain SMART anon Alignment kindly provided by SMART Family \N 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.88 0.72 -4.09 95 1604 2009-01-15 18:05:59 2003-04-07 12:59:11 13 34 368 21 867 1583 18 89.90 28 13.21 CHANGED ppphpschIpph...lpSYasIVp+pltDplPKsIhah.........................llppsccplpppllppLhcpct...hspLLpEsstlsp+RcphpchlctLppApphlscl ............................phphchIcph...lpSYhp.I...l.p+sl.pD....t....l....P.K.s.Iha.h......................................................................hl...p..ps..K....c..p....l...p...s.c.LlspL..hp..psp.......hspLL...pEssphsp+...RcchtchlctLpcAhphlsp............. 0 252 456 678 +2688 PF04807 Gemini_AC4_5 Geminivirus AC4/5 conserved region Mifsud W anon Pfam-B_3520 (release 7.6) Family \N 18.70 18.70 19.20 20.30 18.50 16.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.82 0.72 -7.28 0.72 -4.47 11 267 2009-09-10 17:27:41 2003-04-07 12:59:11 7 2 169 0 0 239 0 32.80 49 22.87 CHANGED pIVLHpsustLlVcHlKaLoKlhhsu.htoTVos ..MpVLHuspTGLllKHlKahoKILhhh.pRoolTs..... 0 0 0 0 +2689 PF00799 Gemini_AL1 Geminivirus_AL1; Geminivirus Rep catalytic domain Bateman A anon Pfam-B_286 (release 2.1) Domain The AL1 proteins encodes the replication initiator protein (Rep) of geminiviruses, which is a replicon-specific initiator enzyme and is an essential component of the replisome [1]. For geminivirus Rep protein, this N-terminal region is crucial for origin recognition and DNA cleavage and nucleotidyl transfer [1]. 21.10 21.10 21.20 21.20 20.80 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.43 0.71 -4.23 11 3067 2012-10-02 18:54:05 2003-04-07 12:59:11 15 7 1057 2 10 2756 11 106.30 58 35.36 CHANGED FRlpuKNhFLTYP+ColstEcsLppLtsLppphshp.YItlsREhHpDGpPHLHsLlQhEu+hphossRaFDLspspp..s.FHPNIQuAKSoscV+sYIsKDG....sohEaGpFphcu ..................................FplpuKNaFLTYP.+C.S.Lo.K.E.E.ALsQLpsL.p..o....P...s..sKK...a.....I+...lCRELH.E......DG.p.PHLHVLI..QFE.G.Ka..pC........p...N.sR...FFDLsSPoR...Ssa.............FH..PNIQ................u.A.K...S.S.o....DVKsYl-KDG....Dhl-aGpFQlD.G................................................. 0 4 6 6 +2690 PF01440 Gemini_AL2 Geminivirus AL2 protein Bateman A anon Prodom_1117 (release 99.1) Family Geminiviruses are small, ssDNA-containing plant viruses. Geminiviruses contain three ORFs (designated AL1, AL2, and AL3) that overlap and are specified by multiple polycistronic mRNAs. The AL2 gene product transactivates expression of TGMV coat protein gene [1], and BR1 movement protein. 25.60 25.60 25.80 27.50 25.50 25.50 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.86 0.71 -4.07 116 1883 2009-09-13 07:36:57 2003-04-07 12:59:11 11 4 905 0 0 1605 0 131.50 56 96.44 CHANGED Mps..SSPSps+ST.ssIKspHRhAK+..+.slRRRRlDLsCGCShYlpIsCps..HGFTHRG.sHHCoSucEWRl....YL.GssKSPlFQDs.psptpslppppph.....ppssslQPQPpESsGsoQshspLPsLDslTsSDWu.Lps .............MpsSSPSpsHsTpVPIK.VQHR.hAK+.....+slRRRRVDLsCGCSYYl..pI.sCp........N..HGFTHRG.THHCoS...u+EWRl....YL.GssKSPlFQDp.psppps.lppc.pH....ppsssslQsQPpESsGs....oQhFS.pLPsLDDlT...sSDWuFLK........................ 0 0 0 0 +2691 PF01407 Gemini_AL3 Geminivirus AL3 protein Bateman A anon Pfam-B_1874 (release 3.0) Family Geminiviruses are small, ssDNA-containing plant viruses. Geminiviruses contain three ORFs (designated AL1, AL2, and AL3) that overlap and are specified by multiple polycistronic mRNAs. The AL3 protein comprises approximately 0.05% of the cellular proteins and is present in the soluble and organelle fractions [1]. AL3 may form oligomers [2]. Immunoprecipitation of AL3 in a baculovirus expression system extracts expressing both AL1 Pfam:PF00799 and AL3 showed that the two proteins also complex with each other [2]. The AL3 protein is involved in viral replication. 22.70 22.70 32.80 26.60 20.80 20.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.54 0.71 -4.65 97 1723 2009-09-13 21:06:47 2003-04-07 12:59:11 12 3 909 0 0 1578 0 119.30 59 89.59 CHANGED DSRTGE.IT...AsQApNGVaIW-ls......NPLYFKIhpHppcPhhp....ppcIlplQIpFNHNLRKALGlHKCFLsFpIWTshps...tTGp.FL+VF+.......pQVh+YLcsLGVISINNVIRAVsaVLassl.pp...sl ..................................DSRTGEhIT...AsQAc.N..GVaIWEls......NPLYFKIhpHspRP.Fhp....spDIIolQIpFNHNLRKALGl....HKCFL.sF+IWTTLpP...pTGp.FLRVF+.......tQVlKYLssLGVISINNVIRAVcHVLasVlppT.h.................................................... 0 0 0 0 +2692 PF00845 Gemini_BL1 Geminivirus BL1 movement protein Bateman A anon Pfam-B_1535 (release 2.1) Family Geminiviruses encode two movement proteins that are essential for systemic infection of their host but dispensable for replication and encapsidation. 20.20 20.20 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.79 0.70 -5.03 17 508 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 260 0 0 471 0 250.50 54 95.03 CHANGED sYIcScRsEYpLopDhT-IsLQFPSohpQhsuRLptpCMKIDHsVIEYRpQVPlNAoGoVIVEIHDpRhoDs-ShQAuaTFPItCNlDLHYFSSSFFSlKDPhPW+lhYRVsDoNVhpusHFA+hKGKLKLSoAKHSsDI.F+uPTlcILSKpaopcclDFapVshuKhp..R+Llpssshs...phuh+tP.lpltPGEoWAo+S.sIG............................st..spppspsspaPYRpLp+LssutLDPG-SsS.s......us.shopsplp-llcsTVpcClposspsspsKsL ....sYI-ScRsEYpLopDhoEIhLQFPSshpQhou+LptpCMKIDHsVIEYRpQVPlNAsGoVlVEIHDpRho-s-ohQAsaTFPItCNlDLHYFSSSFFSlKDs.PW+lhYRVpDoNVhptsHFA+hKGKLKLSoAKHS.sDI.F+sPTlcILSKpaTtcslDF.pVshsc.p..R+hlpsssht...phuh+tP.IpltPGEoWAo+S.pIG.............................s...sp.ps.pss.aPY+pLp+LssssLDPG-SsS.s......ts.ohShtplp-llcsTlpcCl.ss.pss.sKt.................... 0 0 0 0 +2694 PF01492 Gemini_C4 Geminivirus C4 protein Bashton M, Bateman A anon Pfam-B_453 (release 4.0) Family This family consists of the N terminal region of geminivirus C4 or AC4 proteins. In Tomato yellow leaf curl geminivirus (TYLCV) the C4 protein is necessary for efficient spreading of the virus in tomato plants [1]. 29.10 29.10 29.70 29.70 28.80 29.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.71 0.72 -4.25 79 1778 2009-09-12 04:38:53 2003-04-07 12:59:11 12 4 816 0 0 1410 0 82.00 49 82.94 CHANGED MGsLIShssssSKsNosA+IsDSSThaPQsGQHISIpTFRELsss.hSsPT.p+TETshsG-sSRSh-DhLEEVuph.TTphPRp ..........MGsLIshsSSsSKuNosA+hsD.SSThaPQsGQHISIpTFRELN.ss.hSpPT.pRTETs.sG-s.RSh-DhhEtssp..hThhP+p.............. 0 0 0 0 +2695 PF00844 Gemini_coat Geminivirus coat protein/nuclear export factor BR1 family Bateman A anon Pfam-B_1430 (release 2.1) Family It has been shown that the 104 N-terminal amino acids of the maize streak virus coat protein bind DNA non- specifically [1]. This family also includes various geminivirus movement proteins that are nuclear export factors or shuttles. One member BR1 facilitates the export of both ds and ss DNA form the nucleus [3]. 23.50 23.50 23.80 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.35 0.70 -5.14 41 3840 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 1300 0 0 3087 0 202.70 48 94.35 CHANGED RRphshss.....st...hpcssstp+stsp..htRpsphh+hh+ss........sh+lpshp......tschsls....ssuhsphlossspGps..spRsssthplKpltlpGplhh-cshpphsts.......sshhhaLVhDR+Pptssss....sFsplFsh...hpspsuohslpssl+-RFhVh+paptslssst........ss.h..hssppph.s+RFhphlsh+sha.ppsssGpYsNlpcNALllYhshhpssssshuoh.........hhlhFcsshss .............................................................................Rthshsp...thsss...shstss.+tp.s....W.sp....R....P.M.....hRKP+.hYRhaRSPDVP+.......GCEGPC.K.VQSaE.........p+pDlt.HhGhl..hClSDVTRGsG..lTHRlG.KRF.ClKSl.YlhG...K.l.WMD-NIKppNHT........NsVhFaLVRD.RRPh.s.sP.....cFuplFNM...aDNEPSTA....TVKNshRDRaQVh++apusVsGGt.................h...us.+EQ.A..l.l+.+..Fh+.lNsaVsY..NpQEsu.KY.-NHoENAL.h.LYhshhp.ss.ssshssh...........h.Fh.sh............................................. 0 0 0 0 +2696 PF01708 Gemini_mov Geminivirus putative movement protein Bashton M, Bateman A anon Pfam-B_1771 (release 4.1) Family This family consists of putative movement proteins from Maize streak and wheat dwarf virus. 25.00 25.00 33.50 33.40 23.00 22.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.00 0.72 -4.44 10 288 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 88 0 0 221 0 81.80 47 87.79 CHANGED MDth.hss.h.spss....Pp......VPssAPuuuslPWSRVGElsIhohVAVlslYLLYsWVL+DLILVlKA++GRoTEElsFGssst+s....sussss ...........................pss.Y.pPR......VPo.uAPs.SuslsWS+VGEVuIhsFVAllshYLlYlWVL+DLILVLKA+pGRoTEElhFGspssctsss..h......... 0 0 0 0 +2697 PF01524 Gemini_V1 Geminivirus V1 protein Bateman A anon Pfam-B_893 (release 4.0) Family Disruption of the V1 gene in Tomato yellow leaf curl virus (TYLCV) stopped its ability to systemically infect tomato plants, suggesting that the V1 gene product is required for successful infection of the host [1]. 21.20 21.20 22.30 23.80 21.10 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.48 0.72 -4.00 45 1677 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 796 0 0 1216 0 75.00 69 68.72 CHANGED MWDPLlN-FPE...TVHG.FRCML..AlKYLQt.lcpTYuPDTlGa-LlRDLIsVlRu+NYsEAopRYscFpuRl..puTspuELRQ ..............MWD..PLLNEFPE...oVHG.FRCML..AlKYLQh.lEpTYsPD.TLGaDLIRD.LIsVlRARsYVEAo..pRYsHFauRl..EGost..uELRQ............................ 0 0 0 0 +2698 PF02053 Gene66 Gene 66 (IR5) protein Mian N, Bateman A anon IPR000714 Family \N 20.10 20.10 21.30 20.50 18.80 20.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.50 0.70 -4.91 6 66 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 29 0 0 57 0 175.70 37 67.47 CHANGED Mpsha..........sDtphcosuthh..sutculYPhh.ssousHssSLPRSV+shApsVhssut-AhsAhRuGtPPPAclW.cVYchasssFpcaptS.....husFHsAsPlR+hVGchLhshssAP.ETHuELusRLLaCuYWCCLGHAusCShsphYE..csChRFF-pshGhGEsP.s-uEpYWpsLhshsuscsphh.+asAtsAahptRsR+hsls................u.sh .........................ttthhcpsssp.........................sssusLYPlp.spshAHhshLPtshRshhcslspsSp.stssl+s..s.PPssthh.hlhcphphsaspahRu.....ppphpsl.PhRpAshsalss....husts.pTH...cchc-hLhh....CAaWCCLuHAu..........T..........CShAtLYs...sp..Ch+LFss.FGCGs..................................................ss.ss.................. 0 0 0 0 +2699 PF03323 GerA Bacillus/Clostridium GerA spore germination protein Mifsud W anon Pfam-B_3821 (release 6.5) Family \N 20.70 20.70 21.00 20.80 18.50 18.50 hmmbuild -o /dev/null HMM SEED 470 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.23 0.70 -6.36 152 1955 2012-10-02 18:57:54 2003-04-07 12:59:11 8 8 418 0 415 1730 13 442.60 33 90.93 CHANGED Nlphlc.phhs..pssDlhhRch....................tttshc.ssllalcGlsDpphlp..ppllcsL.....................ttttptphhpplh..pph...lshsplpphp.shccllpplLsGpsllhl.-Ghs..pulllsspsh...pRulpEPpsEpllRGs+-GFsEslpsNhuLlRRRl+sspLphcphplGcco+TclslhYIcsIss.cllccl+pRlppIclDsl.hssuhlEphIpDps.hosFPplt.TERPDtsuupLhEG+lsIllDGoPhslIsPsohhphhQssEDYap+ahhuoh.lRllRhhuhhlolhLPulYlAlhoaH.-llPspLhhslsusRpslPFPsllEsLlhElshElLREAGlRLPpslGpslGIVGullIGpAAVpAGllSshhlIlVAloAluSFshPsYphuhulRllRFhhllhuuhhGlhGlhluhhhlll+LssL+SF.GhPYLsPhsPh..phps.h+DsllRhPhhhh....p.pR.......Pphlp ......................................................................phlp.phht....pssDlhh+ph......................htshp.hslhalculsDpp.hlp..p.llpsLh........................................p..t..p......t.hp..p.hh..pph.ls.h.s...plpthp.shpclhptlLpGpsllhl.-.s.s.............p.s...h...lls.spsh...p..RulpEPpsEtslRGP+-GFsEslssNhuLlRR+l+sspLphcphp.lGcp.op.TclslhYlcslss.cll...pclcpRlp.pIchDs.l..hssuh.l.E.ph.I.p..Dps..hosFPphh.TERPDtssssLl-G+luIllDGoPhsLlsPsoahphhQosEDYhpphhh.uo.a.lRllRhh.uhhlulhhPulYlAlhsaH.pl..lPspLhhslhsuRpslPFPsllEs.LhhElshElLREAGlRLPpslG.pslGIVGGllIGpAAVpAGlsSshhlIlVAloAluSFshPs.aphuhuhRllRFshhlhAuhhGlh.....Glh.....lGhhhlhhHLspL+Sh.G.s.PYhtPhh...Ph...p...h...p...s...h+.Dhhl.RhPhhhhp.pRPt...p............................................ 0 209 341 357 +2700 PF00196 GerE Bacterial regulatory proteins, luxR family Finn RD anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.43 0.72 -4.65 30 38873 2012-10-04 14:01:12 2003-04-07 12:59:11 14 273 4244 74 9895 30161 2583 57.10 31 20.74 CHANGED hssLopREtpVLchlspGhoNcpIAppLslSp+TVpsHhsslhpKLslpucsplsphA .......................h...Lot.RE.t..c..l..L.p.h...l..u.......p........G...h....o....s.p.c......IAppL.....t.......l......S.t.p.T.....VcsH.h...p.......s....l..h.p..K...L...s...lps.+sphshh......................... 0 3079 6345 8321 +2701 PF01353 GFP Green fluorescent protein Bateman A anon SCOP Domain \N 25.00 25.00 36.30 36.10 24.50 24.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.25 0.70 -4.97 27 306 2012-10-01 21:32:51 2003-04-07 12:59:11 17 7 114 621 18 1347 1 212.80 38 91.18 CHANGED hssIK-pM+hKlpMEGsVNGHtFplpGcGcGpPacGsQ..shplpVscGuPLPFAaDILosuFpYGNRsFsKYPccI..PDYFKQSFPE...GaoWERshsaEDGGlsssosDIol...cG-.....sFhacl+FcGlNFPssGPVMQKKTh.tWEP.STEthYs..pDGlLpGclshALh.LcGGuHYpCch+ToYKuKKs.lph..PsaHFlDHRlEhhpccc.DhspVc.aEpAlA+hs ...............phlpttMph+lcM-GsVNGHcFslpGcGcGpPaEGpQ..shcLpV..s..c..GGPLPFuaDILossFpYGs+sFs+YPccl..sD.YFK.pu.h..P-...GYoaERohpF.........E.........DGGlsssosc..loh...cGs........shh.acl+hpGsNFPssGPVMpKK.Th..sW-P.S.s.Ep.h.hs..p-ssLpG.csshsLh.Lc.G...G.u+...apscacosY.....+....s..c....Ks...........l..ph.....P.shHalc+clphtp.psp..chpphp.hEhu.Ap....................................... 0 17 17 17 +2702 PF05165 GGDN GGDN family Bateman A anon COG2429 Family I have named this protein family of unknown function GGDN after the most conserved motif. The proteins are 200-270 amino acids in length. 20.60 20.60 20.60 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.25 0.70 -5.33 4 106 2012-10-01 23:51:22 2003-04-07 12:59:11 7 2 91 4 64 115 25 210.80 32 93.13 CHANGED M...shlQIDsYGPWTspPsPRREsDLQsLQucLYADlpc.huu+pGhVFhsRFDNMlulosGhsltsHcRlQ-uItNRYPlTVSMulusAcTPhDA.ctAspALpctGuAQsEpRpEsLsh.s.....ss-GYVpIAHIDlNslTtThTDhVushcThhpVpclhttLhchL..KhsAlh.FlGGDNhhAsssthp.t.lLclhsclc-phsl.....-LKAGIGhusTAccAusLAchALEcIR..Gths.sslsshcp ..................................h.sllplssYt.WT.o.sscREhtlQtlQucLatclpp.huthsuhsa.hRaDshlslsNGls.pphttl.pslppph.PlslshsluhucTPh-A.htAsttlpp.t.st.pt...p....................ss..ltluHhDlsshT.phTchhssa-oahplpphhhpLhchh.hp.hsulsh.....alGGDNhhs..h.st...hsppsh.phlpclpcphtl.............tl+sGlGhGpsAcs...AhhhAspuL.-tlR..tth..........t.................................. 1 16 34 51 +2703 PF01134 GIDA Glucose inhibited division protein A Finn RD, Bateman A anon Prosite & Pfam-B_4007 (Release 8.0) Family \N 20.00 19.10 20.00 19.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 392 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.29 0.70 -5.61 122 6402 2012-10-10 17:06:42 2003-04-07 12:59:11 17 49 4313 22 1570 23497 12080 363.60 41 65.94 CHANGED DVlVlGuGHAGsEAAhsuA+hGscshLl..............Thph-pluphsCNPolGG......AKGhLs+ElDALG.GhhucssDpsslph+hLNtp+GPAVtAhRsQsD+phYppthpctlcs...psNLplhpspVsclhspp................tth.G.lhht.s..h.sc........sVllTTGT.........aLtu.lhhG..th.sG...sp.hso.sLupsLc.c.hGhchtRhKTGTPPRlctcSIDasth.hpsu-.p.s.s....FSahsssh.......p...pcQ............hs.CalstTspps......HplI+cslc.pos.hasGp..IcuhGPRYCPSIEc...Kll+Fs...............-+spHplaLEPEuhsssp.hYhsGhSToLPhclQhchl+o.IPGLEsAclhR......GYulEYDalsPtpLh.s.oLEoKplpsLFhAGQINGT.oG..YEEAAA..QGLlAGlNAuh.phpscp ...............................................................................................................DVlVlG.u.GHAG.sEA.A.h..............Au...A..R.h...........G...h..p...slLl...........................................................Thp..h..-.....p.l....u....t...h.....s.....C.....N..P...u...l.G.G...............................u.K.G......h....L....l......+.....E......l........D........A.......L.....G........G.......h........h........u.....c......s....s........D.....p....s....t..l......Q....h.......+..........h......L..............N......t.....t.........K......G..........P.......A....V.......p........A....h.....R......u.........Q..s...D...R.......t.......h........Y.........p.....p........t....h.........p.....p.....t...l...c...s.................p..s..N..L...p......l....h...p..p........t...l...s.....-...l.h....s.pt......................................................tth..h.G.....s...h.....h.....t...........s.................h..t...u.p............sVllTTGT.................................................h.LtG..lhlG........p.....h....u.G..................t......p............s.S..h....s...........L.uc..pLc..c...hG.h...c.h......h.RhK.TGTPPRlcsco.IDas...hht.QsuD...s.s...............FSah.s..s.s.h.........p.....pQ................................hs...Ca.lohT.spps..................Hpl....lcsshc.....cus......h...asGh.........I-.u.h.G...PRYCPSIEc........Kll+Fs...................-+sp..HplFLEPE....Gh....s....ss..............p....hYhsGhS....T.S.LP..h.........clQ..hp...ll+o.I..................G...L..E.N.....A.......c...hhRs.............................GYAlEYDa.hs.....P.p.p.Lh.........s...TLE.......o.......K.......t...l..ps......LFhAG...Q...l....N...G....T.oG......Y......E......E......AAA....QGLlAGlNAAhthtsc................................................................................................................................................................................................................................................................................................................. 0 550 1026 1334 +2704 PF02527 GidB rRNA small subunit methyltransferase G Bashton M, Bateman A anon Pfam-B_1265 (release 5.4) Family This is a family of bacterial glucose inhibited division proteins these are probably involved in the regulation of cell devision [1]. GidB has been shown to be a methyltransferase G specific to the rRNA small subunit [2, 3]. Previously identified as a glucose-inhibited division protein B that appears to be present and in a single copy in all complete eubacterial genomes so far sequenced. GidB specifically methylates the N7 position of a guanosine in 16S rRNA [4]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.85 0.71 -5.15 17 4375 2012-10-10 17:06:42 2003-04-07 12:59:11 10 8 4276 14 957 4510 3558 182.20 33 82.38 CHANGED phpphptYhphLhchNpphNLouhp..chpclhp+HllDSlslhphlcptc...t...lhDlGSGAGhPGlPLulhhPph+....lsLl-uhtK+ssFLcpltpcLsLsplpllpsRsEc.......ttpaDslsuRAluslsclsphshp.Ll+ssGhhlthKGtpst-Eltphcpthphpth.hhplsth.tt....pcplh .............................................................pph.tYhclL...hcW...N.cth..N...L....Tulp..-.p.-..h...hh+H...lhD..Sl.s..l..h.........h...l....p....tpp......................p....llDlGoGuG.h..PG.I.PL.u..I....h....h....P.c....h.c...........hT.LlD..S..l....s......KR..l..p..F...L.p...p..ltp.c....L.....s...L..p...s....l..p...s...l.p...u...R.s....Ep......................ht..c.tpaD.h....V.....s....u....R.....A....l......A.....s......L........s..s....l...s...p.h.....s....h......s...L...l......c..............s.......G...p........h......l........A....h.....K..G...t..p..s..p..-.E..lt...p.h.pp.....s....h.p..h..h..th.....t.............................................................................................................. 0 336 646 821 +2705 PF03227 GILT Gamma interferon inducible lysosomal thiol reductase (GILT) Mifsud W anon Pfam-B_1477 (release 6.5) Family This family includes the two characterised human gamma-interferon-inducible lysosomal thiol reductase (GILT) sequences: Swiss:P13284 [1] and Swiss:Q9UL08 [2]. It also contains several other eukaryotic putative proteins with similarity to GILT [3]. The aligned region contains three conserved cysteine residues. In addition, the two GILT sequences possess a C-X(2)-C motif that is shared by some of the other sequences in the family. This motif is thought to be associated with disulphide bond reduction. 22.00 22.00 23.70 22.60 21.20 19.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.24 0.72 -10.81 0.72 -4.20 15 522 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 214 0 342 525 3 107.30 27 45.18 CHANGED lplslaYESLCPssppFlpppLh.hh..hptshtshh-LcLlPFG.pAcsscsto.....spCQHGspECcLNtLpACsIcshsspph..........hshlsClpps.pshppuh......Cspchthpp ................................lplslaYESLCPssp..pFlhppLhPhh.....p..p..ltsh..lsl..pLlP...aG..pA..p...hp.s.sss................hpCQHGspEChhNhlpsCslph.h.......p....p.h................hthlhCh.pt...t...p........t.t..........Chp.....h............................................................ 0 138 199 293 +2706 PF03359 GKAP Guanylate-kinase-associated protein (GKAP) protein Mifsud W anon Pfam-B_1892 (release 6.6) Family \N 19.20 19.20 20.40 20.00 18.90 18.00 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.39 0.70 -4.83 20 511 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 91 0 233 441 0 312.50 34 40.76 CHANGED -s+tt....p.h..pIuhp...........h.p.pths.sp.tsc..t.hpSlGlQVc-chp.ptchp.................tosusousspsD.chps..t.s....ssspspsh..s..sshp+p.pp..........ss...pssthth.p+sVs.................pputhsh.............sstsscspsss.psuthsPsps....................................................t...sp+DGpaFl+LLpuEs-RLcuaCpphE+-.....tc-s-LsE.Ell..........GhIRoAVG................pApLLhoQKFpQFctLC-p........................................sh..........s..Psup.csTspDLsGFWDMLpLsIEDVshKF--LppLKsNsWp................ccssPshsKKPsKs................................+sshs+s+uh-uu........ttpRtcARcRLhAAKRAAphRQsp.......socpu-SlEIalPEAQTRL ........................................................................................................................................................................................................................tt............................................tt.....t.........hpS..lGl..Qsp..cp.h.p..............p.p.................................................s.ps.....h.osthp.s.-.php.t....t.........t.ppps.h..t.......tshtpp.p.....................st..................pslp...................................................................ps.hsh.....................................s..ps.s..s..s..h..psu.h.s.P..s.s..s..hps..p...................................................................................tssphuss...pRD.Gpa....F...l..KLL.puEs.-.RhEuWCppME+-........tcEss.L....sE...-l.L............G+IR.oA.VG................sAQLLMoQK...F.pQFptLCpp........................................sh...............s.....Ps.ut..P..cPTsQDLAGFWD.h.LQLsIEDlshKFc-LppLKsN.sWp.hp...................pp-c.....c.ssPsh.s...KK..ss.+s....................................................psshs....+p+u.-ss..........p.R.pcA...RcR...L.hAAK+.A.sphRpss.......tpEpu-..ohphhhsp.............................................................................. 2 56 73 140 +2707 PF03275 GLF UDP-galactopyranose mutase Mifsud W anon Pfam-B_4203 (release 6.5) Family \N 25.40 25.40 25.50 25.40 25.30 24.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.56 0.71 -4.81 96 1461 2012-10-10 17:06:42 2003-04-07 12:59:11 8 6 1183 57 272 1189 350 188.00 47 53.74 CHANGED thh+uYTcKQWGpcP.p-Lsu...sllpRlPVRhsaDspYF.sDpYQGlP..hsGYTphhEp..MLt.pts.....IcVpLss..Dahchpc................thttcllaTGsIDpaF-Yp...hGcLtYRSLcFE...........pcsh.spssaQGsuVlNYs-.p-hPaTRIhEaKHFps..........ts+Tlls+EYsp.h.....pcu-.....PYYPlssscsppLap+YpcLAcppp...sVhFsGRLusY+Y ............................thlKuYTtKQWGpss.p-LPu..............lIp..........R.L.PVRh..sa..........DNpYF.sDpYQGlP..hsGYTthlEp........M.....L.t...tps........I-VcLss..Dahp.+c.....................th.s.h...s+llaTG.lDpa...........F.-.Yp...hGcLtYRoLcFE.............pEhl...c........tsa.QGsAV.l...NYs-......t-sPYTRIhEaKHFp.t........................p.psKTlI.s...+EYst.h....ccG.D....EPYYPlNspcs..ptLappYpphAp.p.ps..........p.VlFsGRLGpY+Y................... 0 85 177 232 +2708 PF02812 ELFV_dehydrog_N E_L_F_V_dh; GLFV_dehydrog_N; Glu/Leu/Phe/Val dehydrogenase, dimerisation domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 25.30 25.30 25.40 25.50 25.20 25.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.44 0.71 -4.57 262 6065 2009-01-15 18:05:59 2003-04-07 12:59:11 13 15 3955 213 1526 4625 1317 125.30 45 30.51 CHANGED sc+......llhsph.h........ct.uphpshpuaRs.HssshG.PsKGGlRa.aP.t..............................ls.s....-lhtLuhhMTa..KsAlssLPhGGuKGul...hhD.......P+..ths.......ppE.hcclsRuasptltp...lusspslsAsDlGTssp-Mualh..cpappls ......................PcRhl.hclsh......h.Dp.Gplps.cGaRlQassAl...G....Ph....KG....G.lRFHP.s..............................Vshs.........lKhL....uht.oa..KNul......s........s......LP.hGGGKGG........chD..............P+..shS.......csE..l.RhspuahpELtc...alGs.spDVPAsDlGsuu+-huahh.spYc+l.p...................................................... 0 538 1003 1309 +2709 PF00120 Gln-synt_C gln-synt; Glutamine synthetase, catalytic domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 22.60 22.60 22.60 22.70 22.30 22.50 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.73 0.70 -5.24 95 12610 2012-10-02 17:21:26 2003-04-07 12:59:11 19 19 7069 255 2691 9179 7704 221.90 30 57.41 CHANGED stsPRs.lh++shpthpp.tGhs..shhGsE.EFaLhct....sc....ssts....sthss..........................hpsGhaslss..h-.pspclhp-hhpsh.tphGl.plEstH+EVusu.QhElshc.hssslcsADplhhhKhll+plAccaGhtATFMPKPhhsss..GSGMHsHhSlht...ttG.pshFhssts....sLSchuhpaluGll+HstulsAhssP......ssNSY+RLsP........uhpAPshluauspNRoAslRlPts......tsspupRlEhRhPDs........suNPYLuhAAlltAG .................................................................................................t..ssRt..hpc.h..t...h..t.t........uhs.................shhG......E..Eaalhcp........sc...........s............th..............................................................t...hsu..h..ashss...hs...hpsh.hp-hhthh...t.h..Gl...s..l-.shppEV...............u...............s...............u....QaElsht.h.......s.......sslpsADph.............hhh+hll+plAc......p......a...........G.htsoFhsK..Phhsts.......G..oGh.Hsph..Slh.........t.................s....shh...........................st............t................t.....hs.p...th..hl.u....G.h.h.p.ph.......t......h.......shhss.........sssua+RLss..........th..A.s..s..hsau.........h........p..N.R...ss.........lRlPhs..........................t..ttclE....R.s.ss....................sssPYlshushh........................................................................ 0 856 1657 2250 +2710 PF03951 Gln-synt_N gln-synt_N; Glutamine synthetase, beta-Grasp domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -10.05 0.72 -4.40 188 7416 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 5499 249 1602 5187 2852 76.40 36 19.43 CHANGED slcalchpFsDhhGthp.......plshs.......h..spl.........ccs.........hc.pGh.sFDGSSltGatslpcSDMhLhPD...sT..hhl-P..apt..t.....s.hhlhC-lhss ........................................................lphlchhFsDhhGh.p...........plphs...........pph..............pph.....................................hp...ps...h.hFDG.SSl..tG.a......s.t.l......p.cS...DMl.LhPD...po..hh.lDP...apt..t.......s..hhlhC-lhpP.................................. 0 504 986 1339 +2711 PF03710 GlnE Glutamate-ammonia ligase adenylyltransferase Yeats C anon Yeats C Domain Conserved repeated domain found in GlnE proteins. These proteins adenylate and deadenylate glutamine synthases: ATP + {L-Glutamate:ammonia ligase (ADP-forming)} = Diphosphate + Adenylyl-{L-Glutamate:Ammonia ligase (ADP-forming)}. The family is related to the Pfam:PF01909 domain. 20.50 20.50 20.50 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.51 0.70 -5.12 22 4102 2012-10-02 22:47:23 2003-04-07 12:59:11 10 13 2001 3 970 3413 962 243.30 33 50.29 CHANGED ltpLhplhutSshlucplt+p....PhlL.cpLhs...hspspshpthtspLpphLhpsss-.....E............................phhcsLRpF+ppphhRIAhsDlhuhhs............lpclsppLotLA-AlltAulphhhppsssch.shP.t.t.sps..shhllGhGKLGGhELsauSDlDLlFhassssts.....sut+sl-stpaasRluQ+llphLst.TssGhlYclDhRLRPpGsuG.LshohsAapcYapppupsWE+.Ahl+ARsls.Gstplptpa.ph .............................................................................................h..pLhplhutSshlsc.lt.p..p.........Pt..h.l..sp..Lhs...................p.s.....t..h..p..t...h.....t....s..p...L.p...p.h...Lh.....p...sssc..........p...............................................................phhcsLRph+ppphlR...IAhs.D.l....su..h..h.s......................................................................lt...plsppL..otLA-uhlsuul.ph..s..htph..st......c........a...G.......P...................t............p.....t........p......s......p...sh...................sllGMGKLGGtELsauSDlDLIFlacpssss..........................sutc.t.l..ss.t...paa...sR...lup+l....hpl....Ls.p...........T.......s.....s.Gh....l.acVDhRLRPpGsuGsLVhShsAhpcYh....p....p....p......u................h..sW..E+.Ahl+ARslu.G-..plttph..t.................................. 0 261 591 797 +2712 PF03616 Glt_symporter Sodium/glutamate symporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.20 21.20 21.20 21.50 20.70 21.10 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.18 0.70 -5.96 8 1846 2012-10-02 17:06:44 2003-04-07 12:59:11 9 4 1484 0 231 1070 118 354.70 42 89.10 CHANGED lpLsshpTLllAsLlLLLGpall++lsFLp+asIPEPVlGGllVAlslhll+phtshclsFDtS.LQsshMLsFFoTIGLuAshspLhKGGKsLllaLhssssllllQNsVGluhAulLGlcPLlGLlAGSIoLoGGHGTuAAWussFt-p.aGlsuAsElAhACATFGLVhGGlIGGPVA+aLlp+pp...pspspsDsp-p.lspuaccsptp+pIsupsllETlsllslClslGpalusLlps...puLpLPoFVhsLFlGVIlRNhLohhhhap...V.-csVslLGsVuLSLFLAhALMSL+LWELtsLAlPlLlILsVQslsMlLaulFVTFRhMGKDYDAAVlsAGHCGFGLGATPTAlANMpuVTc+FGP ...................................................................plsshtTLshAs.llLLLG+hlspplshL++asIPpPVsGG.Ll.........s.........A.l....h..........l.......hl.....L............c......p........h....hs....h..................p.................l.....p....F....D....h..s............L...p..s.....s.......hML.sF.Fs..T...I.....G..L.........s..A.............s.h....s...p..L.....+.........t....G..G......+...sl...h.........l....F......l..h....ls.s.s.l..l.l.h.Q.N....s.l..G.....l......u.....h.....A..p.....l.......L......G..l.....c..P.L....h....G...L.....l..A..G.....S...I...oL..oG...GHGTuAAau.p.hat.-t..aG.h.....ssAh.pl..AhAsATFGL.VhGGLIG....G...P.V.A......+a.....Llc+tp......................hps..s..t.....t.......s.......-.........s.....p...........p.........................s...........p..........s...........a.......c..........p....s......p...t........s.....+....h........l.....T...u........s........h.........lc.........sl....s....l.........lslC....ls..lG.......p.......h.......l.u....p...h..l...ss.............ssh..........p....l....P..........s..........F....Vs..s..L..Fl.....GVIl....pNhl.s..h.h.t.h..hc..........l.............p..+.s........l..s..l.lGsVSLS.........LF.L..AMALMoLKLWcL.s.s..L..Al.Phll.IL.sVQ.slhMsL...a.Ah..F.V.T.a.RhM.....GKs..Y..DAAVluuGHCGFGLGATPTAlANMpulTc+aGs................................................... 0 69 132 190 +2713 PF01744 GLTT GLTT repeat (6 copies) Bateman A anon Pfam-B_681 (release 4.2) Repeat This short repeat of unknown function is found in multiple copies in several C. elegans proteins. The repeat is five residues long and consists of XGLTT where X can be any amino acid. 21.00 21.00 21.10 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.16 0.72 -4.49 35 266 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 125 0 137 344 218 28.60 43 14.72 CHANGED GLsThGLsThGLsThGLsThGLsohGLsT ........GLss.GLso.GLso.GLso.GLso.GLs............. 0 84 101 125 +2714 PF02686 Glu-tRNAGln Glu-tRNAGln amidotransferase C subunit Bashton M, Bateman A anon COG0721 Family This is a family of Glu-tRNAGln amidotransferase C subunits. The Glu-tRNA Gln amidotransferase enzyme itself is an important translational fidelity mechanism replacing incorrectly charged Glu-tRNAGln with the correct Gln-tRANGln via transmidation of the misacylated Glu-tRNAGln [1]. This activity supplements the lack of glutaminyl-tRNA synthetase activity in gram-positive eubacterteria, cyanobacteria, Archaea, and organelles [1]. 20.70 20.70 20.70 20.70 20.20 20.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.09 0.72 -4.18 191 3483 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 3411 33 945 2164 1701 72.20 30 68.71 CHANGED ls-..cEhpphsppLssILcah-pLsc..lDTs...sVcPhsps........h....s.hp.........sshR-Dts.....ppsh..s...p-phLsNAPpp.....ccs.hFtV .....................lo--EhpphsspLspIlsh.l.-.p.Lsp.l.D.Ts......uV..cPhsps........h............c..hp.................................ss.hR..-.D..hs................pp.uh...s.....p-chLp.NAP.cs.....css.hahV................................................................ 0 327 621 803 +2715 PF05096 Glu_cyclase_2 Glutamine cyclotransferase Bateman A anon COG3823 Family This family of enzymes EC:2.3.2.5 catalyse the cyclization of free L-glutamine and N-terminal glutaminyl residues in proteins to pyroglutamate (5-oxoproline) and pyroglutamyl residues respectively [1]. This family includes plant and bacterial enzymes and seems unrelated to the mammalian enzymes. 20.50 20.50 21.20 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.63 0.70 -5.37 3 437 2012-10-05 17:30:42 2003-04-07 12:59:11 7 3 394 9 188 420 289 246.90 34 86.35 CHANGED M+....L.LpphuhLsLAA...........shu.AA.VhuA+uPVht.+Vl+oYPHDosAFTQGLpYLssGHILESTGphGcSclRVa-Lcsutlptcpslsssl...FGEGlTslcspVYtLTWpDGVAapYDtcTFcsLGcasYpGEGWGLT..+DsKsLhMSsGTAFLpaRDPKTFAtpcoVQVTDpGVPVopLNELEYVcGpLYANVWQTsRIARIcPsTGKVluWIDlSsLL+Elulcuo+.sp....DDVLNGIAa.s-psRLLlTGKLWPpLFEVKLst........tN .....................................................h..hh.........................................................................h.......t..h..cl..lpp.aPHDsp..uFTQGL....h..ts.........s............p...Lh.......E........STG...............h........h.G..p.....Spl..+.......c...h.......c....l...p...o..........G.p..........s..........h......p.......p.......hp......Lss.ph.....FGEGlTh.h......s...c......c......lapLTW+pthu.ahaDh...s..o...........h.p..............l.t.p...h...s.Y.....p.......u.......E...GWGLs.................p-...s.........p.p..LlhSDG.ospLh.......h.....hD..P..........p......s......ap.......hp.p.......l......p......V...s..t.....p......u..ps..lpp.LNELEa.l.........s..G......p.......laANlW........o.......spIhc..I-PpoGpVhuhlD...hs...sLh.t..th............t.....................sVLNGIAa.....s..p.........ps.........c............halTGKhWsplaElch....h......................................................................................... 0 80 145 177 +2716 PF04262 Glu_cys_ligase glu_cys_ligase; Glutamate-cysteine ligase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Family of bacterial f glutamate-cysteine ligases (EC:6.3.2.2) that carry out the first step of the glutathione biosynthesis pathway. 20.10 20.10 20.20 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.30 0.70 -5.96 7 1652 2012-10-02 17:21:26 2003-04-07 12:59:11 9 13 1468 18 291 1175 573 336.30 42 63.50 CHANGED uctlthl..pppsphLppht+GlEREoLRVsssGpLuhTPHPtslGusLTH.hITTDFuEuLLEhITPsspcl-phLs.Lp-lHcaspspLts.EhlWPLSMPstlss-EsI.lAQYGsSp.G+hKplYRcGLthRYGchMQhISGIHYNFSLP.phashlhptps.ptstp-h.SuuYhplIRNahRauWll.YLFGASPAlsSuFLpsp.ppL..c+h........tpthhYhPYATSLRhSDlGYsNssptsLslsFNslppYlpulppAlcTPstpatplGlht-.GphhQLNsNlLQlEsEhYusIRPKRssppGEp.hpALhppGlpYlElRsLDlNPFsPlGIshpps+FLDlFhlaChhtDus.hDtp-htpspsNapcls .........................s....thl...tp.t.phlpthp+GlEREoLRVs.s.-.G.p.LApTs.HPc.uLGusLTH.aITTDFuEuLLEhITPls.ssl.pchLphLpslHchst.c...pl....s.c....EhhWPLSM...Pshl.s.p.p.p.-.I.lAp..YGoSN.GphKslYRcGLtpRYGthMQ.sISGlHYNFSls.shaps.hh...s.....t........p......s.....................t.......t....st....p-...hhSstYhclIRNahRasWllsYLFGASPul..ssuFlps..c.....p...p.L.......pph..............................tt..shha.hPa.ATSLRh.S.D.LGYs.N.c.s.Qs..s..Lt.l..s.aNsLppYlsuLcpAlcss...a.tlGh.......t..s..p...hQ.l.NsslLQIENEhYusIRPKphstsGEpPscALhctGlcYIElRsLDlNPFoPlGlspppl+FL-LFhlashLs.D.us.ph.ss.s.-.htphp.Nhppl............................................................ 0 62 142 226 +2717 PF01645 Glu_synthase Conserved region in glutamate synthase Bashton M, Bateman A anon Pfam-B_719 (release 4.1) Family This family represents a region of the glutamate synthase protein. This region is expressed as a separate subunit in the glutamate synthase alpha subunit from archaebacteria, or part of a large multidomain enzyme in other organisms. The aligned region of these proteins contains a putative FMN binding site and Fe-S cluster. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.02 0.70 -5.54 15 5264 2012-10-03 05:58:16 2003-04-07 12:59:11 12 61 3389 15 1586 7292 7907 342.40 41 29.94 CHANGED Hh.ssths+pLppAsps..........tshssacpYpchhscc.hlsslRshLchcsscss...........lslppVEss.plhpRFsouuMSaGALScEAHpuLAhAMNclG.upSNoGEGGEssp+hcsssss.......................tIKQlASGRFGVo.............scYLssActlEIKlAQGAKPGcGGpLPGcKVos.IAclRtssPGVsLISPPPHHDIYSIEDLAQLIaDL+plN..cA.lSVKLVuptGlGsIAuG....VAKAsADhIhIuGa-GGTGASPhoulKauGlPWElGLsEscQsLhpsuLRc+VpLpsDGGL+TGtDVshAshLGA-pashGTuuhIAhGClhsRhCHTNsCPlGlATQDPcLR..t+atGsPc+VlNahhhluEElR-lhAphGap .......................................................................................................................................p......h..ust.................p...a.ptatt.h..p..p..p..........h..t.lR..thh.thp.....t...t............l.s.lppV-.s.s.t.p.lh.p+.FssuAMSaG.u.lS...E.........A.HpsL....AhAh......Np....l.G......utSN......oGE..GGEs.st.+.h.t.s.t.t.ss.................................................................................tIKQ..lASGRFGV.s.......................stYL..s..s....u......c.tlpIKlAQGAKPGE.GG..p..LP.....Gp.....K..V.ssh..I.......A...ch....R..t...u..sP.....G.l...s..LI....SPPPH....H...D..............IY.....SI.....ED.LuQ.............L.Ia.DL.Kp.s.N....p..u...luV..K.....L...V..u......p......s......G......l.......G...s....l..A.s..G..........V.u....K....A....t......A...DhI...s....I.u..G..a.-.G.......G...T...........G........A..........o......P.......h.......s......u........l........c....a........s........G......h...P......a.........E.........l.......G...L........s......E.........s.........p.......Q........s...L....h...h....N....s.......L.........R..........s.......+...........l....p...lp...............s.D.G.tl....+T....G....t..............Dl....s....h.A....s...hLGA...-...p...a...G...huTu...s...h...l.u..h......G................C.....l.................h.................h.R..................h................C................H...h...s...s...CPsGVATQ........s.s..........p........LR................t..........c..........apG........psc+V.hN.......ahphlAcE...lRclhAplGh.t......................................................... 1 495 1024 1355 +2718 PF02364 Glucan_synthase 1,3-beta-glucan synthase component Bashton M, Bateman A anon Pfam-B_686 (release 5.2) Family This family consists of various 1,3-beta-glucan synthase components including Gls1, Gls2 and Gls3 from yeast. 1,3-beta-glucan synthase EC:2.4.1.34 also known as callose synthase catalyses the formation of a beta-1,3-glucan polymer that is a major component of the fungal cell wall [1]. The reaction catalysed is:- UDP-glucose + {(1,3)-beta-D-glucosyl}(N) <=> UDP + {(1,3)-beta-D-glucosyl}(N+1). 24.30 24.30 27.70 26.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 818 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.59 0.70 -6.67 9 769 2012-10-03 03:08:05 2003-04-07 12:59:11 10 21 230 0 498 771 22 530.20 35 43.68 CHANGED -FFPssSEAERRIoFFAQSLuTPlPEPlPVDsMPTFTVLlPHYSEKILLSL+EIIRE--.hS+VTLLEYLKQLHP.sEW-CFV+DTKlLu-Essh.psstp.pc-p.............................t.hppKhDDLPFYsIGFKsAsPEYTLRTRIWASLRo.....QTLYRTlSGFMNYS+AIKLLYRVENP-lsQhFuGNs-tLEh..EL-cMApRKF+hlVSMQRYuKFs...t-EhENsEFLLRAYP-LpIAYLDE-Ps.pputEsphYSALlDGasEl.-.NspR+PKaRI+LSGNPILGDGKSDNQNHulIFYRGEYIQlIDANQDNYLEECLKIRSlLuEFEEhphssssPYsssht...p.pspPVAIlGAREYIFSENIGlLGDlAAGKEQTFGTLFARTLApIG.GKLHYGHPDFLNuIFMTTRGGlSKAQKGLHLNEDIYAGMNAhhRGGRIKHsEYYQCGKGRDLGFGSILNFTTKIGoGMGEQMLSREYYYLGTQLPlDRFLSFYYAHPGFHlNNlFIMLSVQhFMl..lLlNLGuLp+pshsCpYDps...pc..hP......GCYNL.PllpWlpRsllSIFIVFaIoFlPLhVQELhERGha+AhhRhh+HhhShSPhFEVFsCQlYupulhssLsaGGARYIuTGRGFATsRlsFuhLYSRFAssSIYhGucshLhLLFuoho........lWhstLlaFWlolluLClAPFlFNPHQFuWsDFFlDYR-alRWLoR..GNS+.+tsSWlsasRloRoRlTGaKp+hlu..s-ctshDssRAphpNlhhuElhhPhhlhhhshlsYhFIsSpsGss................ppsossllRlhIlshhPIhhshslhhhhhhhusshGPhhs .....................................................t.-ApRRltFFspSL...h...hs.....s...lptM.s.a.ol....hhPaYtEphhhs...................p...................pl....................................p.............p......ptlohl.YLp.p..........la...............-WtsFhpch......t....................................................................................................................................................h.hh.h..u..c...............................t........t....u.h...............................................................................................................................thpt.sthKFphh.hshQ.ht..p.............pht.....sh..Lh.........t..t.lplual..-..............................................t....t.......................t...............ha.S.h.Llcs......................................................................haplpLsG.s......s.h.lG-GKs-NQNpullFhRG-hlQhIDhN............Q.....................DsY.hEEshKhRslLtE........a....................t................................................................ssIlGhREalFots.u.Lu.hhu.pEpoFsTlhtRhhu.......................h..s+hHYGHPDhhsthahhTRG.GlSKAp+slplsEDIaA................GhsshhRtGplpHp-.YhQsGKGRDlGhspI..F.psKlusG.GEQhLSR-hahlup.hsh.Rhhohaas.p.GFa..hsshh.h.hsl.h..ahh..hhh.lss.lttt...................................................................h....hh.lhhh.hlPhhht.hhEpGhhpuh.phhh..hphushF.sF.hthhsp.h.psl.hGGAcYhuTGRGFsh.+..Fs...Yphautsphh.hu.c..h....h.h...l..l.hh.h...............................................a....hh..W...h....h....shhhu.PahaNPptFta.chh.Dappa.pWh.......s............tpo.....W.......taht.................................................................................................................................................................................................t.......................................................................................................................... 0 149 330 444 +2719 PF02685 Glucokinase Glucokinase Bashton M, Bateman A anon COG0837 Family This is a family of glucokinases or glucose kinases EC:2.7.1.2. These enzymes phosphorylate glucose using ATP as a donor to give glucose-6-phosphate and ADP. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.86 0.70 -5.65 41 1646 2012-10-02 23:34:14 2003-04-07 12:59:11 11 5 1468 6 393 2092 1039 301.90 40 91.50 CHANGED LsuDIGGTpsRhuLhsssssp.........phpsasss-asolpshlppaLtt....ptshpp.............st....uslAlAG.Plssspsph.TNhsWslShpphppsLGls..slpllNDFsAhAhAlsh.Lsppclhplsuup......sp.....ssushullGsGTGLGlutLlhsssphh.sLsuEGGHs-FuPpsscEhtlhpaLcpch..u+VSsER.lLSGsGLsslYculsphc......shpsth..............tpsusIoptAL......sussshucpsLclFsshhGshAGsLALphsAhGGVYluGGIsP+llphlppSsFtptFpsKGRhps.hlpsIPVhllhpspsGLlGAushh.pph ......................................................................................................................LluDlGGTNARhALtsh...sssp...........................pph..c.s....a....s....s....h...D....a..s.o.......L.........ps....s...l....c...h....YLpp.......php...s.p.s........................................usl.Al.....A...s...P...l........s.....G.........D....h...l...t..h.....T...N.....p...s.....W.....s.....h...S.....h.....t.....p.h.p.p....s.....L.....G...h...s.....clpll..ND.F.sA.hu.h.Al.s.h....L.p.....p.....c.....c.....l.....hp..l.G.G...u.p...............s.h..............ts.t.s.h....uVhG..s....G.TGLGVAtL..l..p.....s....s..c....p.......a..l.....sL..PuEGGHlD......F......A......P......p..o......-......c......E..h..h..l.h.p.h.L......+tch..........GH...V.........S.u.ER..lL.SG.s.G.....Ls.s.LY.c.A.lsptc....................sh.h.spt...........................hpPt-I....T.p....t....AL..............ss.sss...spcsLslFCshhGphuGsL..A.L.....s..hGs.....h.....G.G.VY.luGGIlP.....R..hl.-....h.h...c.....s...S...s....F.R.stF....c.c...K..G...R..ap.s.alpsI.P.Val.l...h.p.s.p...sGLlGuushhp..s........................................................................................................................................................................ 0 133 258 329 +2720 PF01182 Glucosamine_iso Glucosamine-6-phosphate isomerases/6-phosphogluconolactonase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.70 21.60 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.56 0.71 -4.59 317 6449 2012-10-04 00:26:15 2003-04-07 12:59:11 15 20 4109 64 1532 4440 1070 196.50 27 73.82 CHANGED lspshAptlhpthppul.....tp...c.......up.hslsLoGGooPthlact..Lst...............................l..sW.....s+lplahuDERh.VP.scs-SNhths+ctLLs...ph....t.ls.......s.pla...h...............................................puAttYp.pplpphh....................................................FDllLLGhGsDGHTASLFPs..pst.h...t.....t.p..hlsslpts.Ppsss.......RlTLThsslss.A+plhhllsG.tsKtpslcp.shs .............................................................h....huphhsph.l....t..hh..t.p...p.............................sp..hsL.uLu.sGuoPh.shY.ct..Lsp...h....t................................tl..sa..pp.lpsathDE......h......s...l....s.t..c......c.......s.p.Sh.t.t.h.hpcp.l..hspl...............s..ltt.......p..plp..hhs....u..st............................................shcspsppYc..p.tlpp.hs.t............................................................................................lDlhl...L.GlGs..D.GHl.....u......h......N......Pu.......osh.s.................p.t.......t.p...........tsst.h....h....p......s.....s......s.........csPpp..............t.l.....ThG..l...s.s.l...hp.A+clhll.stG.p...pKApAlpphl.p................................................................................ 0 481 916 1262 +2721 PF00462 Glutaredoxin glutaredoxin; Glutaredoxin Finn RD, Bateman A anon Prosite & Pfam-B_3081 (Release 8.0) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.03 0.72 -3.98 279 11246 2012-10-03 14:45:55 2003-04-07 12:59:11 19 92 4174 90 3561 10294 5112 62.30 25 45.51 CHANGED Vhlas+.....stCsaCppscplL....cphs..lt....a.phlclsps..t...phpptltphs............sh.TlPpla...l.sGc.hl .......................lhlas+..........ssC.s.a.C....t....ps....+phL.............pp..pG......lp...........a..p.l..s.lpps...s................phc.p.t.lt.phs..................sh..T..l..Pp..la...l..ssphl............................................................ 0 1009 2074 2907 +2722 PF04399 Glutaredoxin2_C Glutaredoxin 2, C terminal domain Kerrison ND anon COG2999 Domain Glutaredoxins are a multifunctional family of glutathione-dependent disulphide oxidoreductases. Unlike other glutaredoxins, glutaredoxin 2 (Grx2) cannot reduce ribonucleotide reductase. Grx2 has significantly higher catalytic activity in the reduction of mixed disulphides with glutathione (GSH) compared with other glutaredoxins. The active site residues (Cys9-Pro10-Tyr11-Cys12, in Escherichia coli Grx2, Swiss:P39811), which are found at the interface between the N- and C-terminal domains are identical to other glutaredoxins, but there is no other similarity between glutaredoxin 2 and other glutaredoxins. Grx2 is structurally similar to glutathione-S-transferases (GST), but there is no obvious sequence similarity. The inter-domain contacts are mainly hydrophobic, suggesting that the two domains are unlikely to be stable on their own. Both domains are needed for correct folding and activity of Grx2. It is thought that the primary function of Grx2 is to catalyse reversible glutathionylation of proteins with GSH in cellular redox regulation including the response to oxidative stress. 25.40 25.40 27.40 26.30 25.30 25.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.31 0.71 -4.57 23 734 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 698 2 76 308 10 128.90 53 61.06 CHANGED scltsWl+cssshhspLlhPRasphsLsEFuTtsAppaFppKKEstlG.sFsptLscosphlpplsstLppLssLlt.ssptsNt.pLohDDIhLFPhLRsLTlV+GlpaPscVpsYlcphuptopVsLh.sshAl ..............PAIE-WLRKVsGYsNKLLLPRFAKu..u.FcE..FuT....PuARcYFlcKKE...ASsG.sFsshLA+Ss.uLIKpIucDL+tLD+LIl..pPNAVNG..ELSpDDIpLFPLLRNLTLVAGI.pWPo+VADYRDNMAKpTQINLLoohAl.......... 0 19 32 56 +2723 PF03157 Glutenin_hmw High molecular weight glutenin subunit Mifsud W anon Pfam-B_2180 (release 6.5) Family Members of this family include high molecular weight subunits of glutenin. This group of gluten proteins is thought to be largely responsible for the elastic properties of gluten, and hence, doughs. Indeed, glutenin high molecular weight subunits are classified as elastomeric proteins, because the glutenin network can withstand significant deformations without breaking, and return to the original conformation when the stress is removed. Elastomeric proteins differ considerably in amino acid sequence, but they are all polymers whose subunits consist of elastomeric domains, composed of repeated motifs, and non-elastic domains that mediate cross-linking between the subunits. The elastomeric domain motifs are all rich in glycine residues in addition to other hydrophobic residues. High molecular weight glutenin subunits have an extensive central elastomeric domain, flanked by two terminal non-elastic domains that form disulphide cross-links. The central elastomeric domain is characterised by the following three repeated motifs: PGQGQQ, GYYPTS[P/L]QQ, GQQ. It possesses overlapping beta-turns within and between the repeated motifs, and assumes a regular helical secondary structure with a diameter of approx. 1.9 nm and a pitch of approx. 1.5 nm [see 1, fig.2]. 30.00 30.00 30.20 30.00 29.60 29.90 hmmbuild -o /dev/null HMM SEED 772 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.86 0.70 -15.05 0.70 -6.26 2 1571 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 61 0 1 1570 0 130.40 21 95.07 CHANGED S.VAtQYEQplVsPKGGSFYPGETTP.QQLQQtIFWGhs.thlptYYPSVTSPpQsSYYPGQASPQpPGQGQQPGphQpsGQGQQhYYPTS.QQPGQhQQ.tpGp.GYYPTS......LQQPuQGQQ.GQGQQG.........YYPTSsQhp.GQhQQPsQGQ...Q.GQGQ...QPtQGQQPGQhQQG..P...ppP......QQLGQGQQ...PsphQQSGQGQ.GaYPTSLQQ.GQGQpGaY.sS...................QQ........QPuQGQQGp.PuptQQPGQGQ.GaYPsS.QQsGQGQ.GaYPsS.....Q.QpPGQGQQGQ..........PuptQQPGQGQ.GaYPsS.LQp..GQ.GaY.TS.QQ.GQGQQ.GQ......GQK.QQPGQGQQsGQG...QQP...pQtQQPGQGQ.GYYPTS.QQsGQGQ.....QQ.GQGQ.GYYPTS..QPGQGQ.Ga.PsS.QQPGQGQ..........GQ.GQp..................................QQPGQGQpP...pQGQQPGQGQQG...............YYPTSPQ..................QPGQGQ.........QLGQGQQGYYPTS.QQPGQGQQPG......QGQpGahPhSPQ.o......GQGQp.GQh.QPGQGQQGYYPTS.QQsGQGQQ.GQh.QsGQGpQ...P......GQGQQSGQtpQGY.SsYHVSsEpQAAS.hVAKAQQ.AsQLPshCRhEGGDALSASQ ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +2724 PF00745 GlutR_dimer GlutR; Glutamyl-tRNAGlu reductase, dimerisation domain Bateman A, Finn RD anon Pfam-B_544 (release 2.1) Domain \N 22.90 22.90 23.40 23.20 22.70 22.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.10 0.72 -4.07 164 2994 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 2895 1 730 2124 424 100.40 27 23.41 CHANGED tpAEtIIp...pcsppFhphhcsh.p....ssPsIpslRppucpl+cpElc+uhpp...ht....hsccs.ccslcphscplssKllHsPstpL+p.su.tpsc..pph.....lphlpplFs..l.....c ....................pAcpIls...pEsppFhpWlcsh.p........ssssI+slRppAcplppcplp+shpp..ht..........hup.cs....cpllpc.hscplsN+llHsPotpL+phu.pc...sc....sph......lphlpplasL..................... 0 231 487 639 +2725 PF05201 GlutR_N Glutamyl-tRNAGlu reductase, N-terminal domain Finn RD anon Manual Domain \N 21.50 21.50 22.70 22.50 20.40 21.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.89 0.71 -4.63 172 3200 2009-09-11 08:11:09 2003-04-07 12:59:11 10 10 3083 1 803 2306 768 149.10 37 35.09 CHANGED lGlsH+sAslclRE+luhss.pphtp..hht.pl.tt..slp....EsllLSTCNRsElY....h..ssp.................h............................phlsp.htt.ls...hp.p.lppthhhhp...spcAlcHLa+VAuGLD....ShVlGEsQILGQlKpAaphAp.....ctsssuthLpplFppAhpsuK+VRo-Ts .............................................lGlsH+TAsVslRE+lu.Fss...splpp........ulp.pL....h.......p......p.......t....lp......................Es.ll.LSTCNRoElY.........s.....ssc..p...................psttp...t.lh.....................................................caLuc.hpt..ls.............hc.-..lptp.l.a.h.hp.s.s-Al.cHLhRVAuGLD........ShVLG.....EsQILGQlKcAathAp..........c.sts..s.u....shLp+LFppuhssAKRVRoETt......................... 0 265 544 705 +2726 PF02595 Gly_kinase DUF168; Glycerate kinase family Mian N, Bateman A anon COG1929 Family This is family of Glycerate kinases. 20.20 20.20 20.20 20.30 19.90 19.80 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.13 0.70 -5.76 6 4076 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 2802 4 574 2648 422 345.30 42 97.58 CHANGED MKIlIAPDSFKESLSAhEsApAIcpGFcplFP-AsYlslPlADGGEGTV-ullsATpGphhchcVsGPLG-p.VsAhaGhsGDG+TAlIEMAAASGLtLV..Ps-KRNPLlsTShGTGELIttAL-sGAcpIIlGIGGSATNDGGhGMlQALGsphLDusGptlGhGGusLuplAcI-lssLDsRLtpsplcVACDVsNPLsGspGAutVFGPQKGAossMVppLDpsLuHaAcllccssGhslpshsGAGAAGGMGuuLhshLssclKuGIpIVhctlpLtptlpDA-LVITGEGRIDuQSlpGKsPlGVApsAKpashPVIuIAGslscDhsVVapaGIDAlFSIlsthusLc-ALpputssLhpsApslAssLthuts .....................................................................................................................MKlVIAPDSFK-SLoA.psupAIcp.Gh..................p......p......l.h..............P......c....A..........p.................h.......l....p......lPlADGGEGTl-.u.hh....t..A.............s....t......G...p.....h.......h....p...h.p......V.suP....h.Gct....l..............p........A...h.......a..G..h..............................s...............c.....s...........p..........T.......A.....lIEMAsAuGLp..Ll.....s.........p...........p...R.....sP.....L.h....s...o..o.hGs....GELItp..AL.c.p....G.......s.c.c..............I.llGlGGSATNDGGsGMhpAL.Gs....c..h..h...........Dt.pG............p...........l..s..........s...............G.us.L.t.....p...lsp.ID...h.........s.t...h..........c...s.....+....L.......p.......p...s.......pl.pl..As..DVs..NPLsGtp.GA.o.tl...FG..P...QK.........G.........As........p..h.l.tp....LDps..Lppa.uc.ll....p.....p........t...................h..................t.................h................s.......l..............t.....p..........hs..G.uGAAGGhGuuLhu.a..h........s........A........plpsGI-lVhc....h....h....pLcptl...p.....s.......A......D......L..VITGEGR.lDpQohh.G..KsPlGV.A.p.hA..+..p..h.....p.....h..P.V..Iuls..Gs..l..s.c..s...h....t........h......lh..p...t.G........l.s.AsFul...l......t......h......sLpcs..h..p..p..u..t..p..pl....p..s.u.psluthh....t............................................. 0 175 336 474 +2727 PF01228 Gly_radical Glycine radical Finn RD, Bateman A anon Prosite Domain \N 22.20 22.20 22.20 22.80 22.00 22.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.25 0.72 -3.68 141 5415 2012-10-01 23:28:04 2003-04-07 12:59:11 16 8 2388 24 579 3783 69 105.20 42 16.10 CHANGED husGssPhpG.....tDtpGshAsl.sS..luKlsa.thtpsGhhhshp..hssssl...p.........p.cpthpslsshlcs........aa......p.......................sGhHl.phNVls+.......csLh-AtccPEc..Y.sLslRVuGYus.pFs ............................................................usGusPhpG.....tDppGsh.A.sL.sS...VuKL............sh..sh..sps..G..l....h.shp...lsPs.sLst..p...........s.-sphppL..ssll-u..........aF...c........................GGpHl.NlNVhsR...........EsLhDAhc.......H..PEc..YspLslRVSGYuVpFs.................................... 0 177 347 474 +2728 PF00232 Glyco_hydro_1 glycosyl_hydro1; Glycosyl hydrolase family 1 Finn RD anon Prosite Domain \N 20.10 20.10 20.10 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 455 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.66 0.70 -6.29 13 11445 2012-10-03 05:44:19 2003-04-07 12:59:11 13 39 2849 314 2628 8499 1126 399.30 33 95.89 CHANGED hptsFPccFlWGsATAAYQIEGAapcDG+G.SlWD.....sFs+p.Ps+shssps.GDlACDpYHRaccDlpLhpclGhpsYRFSluWsRIhPpG..sGtlNptGLcaYc+LlDtLhppGIpPhlTLaHWDLPQsLpD.tGGWts...RsslctFpcYAcssFccFGD+VKhWlTaNEPhssuhhuatsGhauPG..............hpshpssapsuHplLlAHAcul+ha+-hh....sGpIGlslssshthPhosus.-s.cAu-RthpatssWFh-Plh+.GcYPpphhchhtcp........uhhsshs-pDhcll...psshDFlGlNYYoophhpsps.....psh.s.spshthspts................oshuW.hh.P.GL+cLLphlpccYsp..hsIYITENGhuhpDch.p.....uplpDstRIsYlppHLspltcAI.pDGlsl+GYhsWSLhDNFEWspG.YscRFGllaVs.....hsoppRpsKcSuhWYppllpsNGh .............................................................................h...thPpsFlWGuAsu..A.Q.........hE..G....u..h..p......t...s...G...+..G.............s.h...h.D..................................h.h.h.......p...................t..............t.......h........h............t....t...........................................sc.................u........D...a.Y.........c..+..Y..c........-...................D.lt.Lh.t.......c..hG...hpsaRhSIu....W.........oRl.....h...P..........p.........G..............p...s.......p......s..............N..p...tG.lpFYcplh-c.....h...h.p.h..................s..Ic.PhlTL..............H.......a.....-.h.P.............................t.............L.......................p.....h.................G...........G.Wh.s...........+.c..h.l........c....hFhca.Ac..............hsFppat....c+.........V.........+aWhTaN.E..........s............h..........h.......s.......a.......h......h.......u.......h....h.......ss...........................................tt.s.....p..t.ha...p.s..h....H....p...hlApAhA.........schh...+ph............................ps.p...lGhhl.s.h..s..........h.......YP......h.s....s....p..s......t....-....h.............A....t..........p...................h....................p.......t......h.......h...........................h...........a....h......D..s..........h.........t..G.p.Y.P..t...h..hphhp.cp................................s.h.....p..h..p..t..tD....h.p.h.l................p.s...s...sDaluhsYYhst.hs.p.st.....................................t.............t...s....t........h.....t..h...............................s.s..h..h......t..o.....p....h......s.W.t.I.....P......G..l...h..hl..p...l.hcc.......Yt......hsl.alsE..N........G...................h...G.s......-..p....t.......................tstl.p.Ds.h.RIp..YlppHl.p.thtc......A................l.....t.....-G.ssl.hGYhsWu.h........h......D.......h................Sh.s.s..G..hp.c.R..YGhl..aVD................sshpRh..K.cSh....h....WYpplltspt.t................................................................................................................................. 2 642 1346 1995 +2729 PF00331 Glyco_hydro_10 glycosyl_hydro3; Glycosyl hydrolase family 10 Finn RD, Griffiths-Jones SR anon Prosite Family \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.41 54 2356 2012-10-03 05:44:19 2003-04-07 12:59:11 15 132 632 164 770 2484 350 243.90 30 61.72 CHANGED oLcphhpsph..hhGsAlsts......hhst....ppttllp..pcFNplTsENpMKh-ulpsscG..........................pFsFssADplVsaAppNshtlRGHTLVWHs..QhP....sWhtp.....sss.ss..............LhptlcsHIpsVhs+YK.......plhuWDVVNEshs-ss..........thRp.......SsahplhG........p-alchAFphARcADP......sAKLahNDYNh-ps.....sK.spslhshV+chp.scGlP.......IDGlGhQsHlsssts...........splppulptauslG...h-ltITELDlps.t.................................tpApcYpplhphhhp........phsulThWGlsDstSWhss.............................stPLLFDssapsKPAY.ullssh .................................................................................................................................thh......t....a.s..hs..tNthK........hts..t..s............................h...s..a.t.......s...-..h..ht....hs.t......tp...s.......h........h.+...uH...sl.....h.....W.ts......p...s.............t..Wh.t............t..t...................................................hht.....hh.p.pal..ps.l.ht+.at................hhs...WDVV..N.Eslssss.....................shR.p.................................o.h..a.hp...hhG...............p-a...lt...hA..FchA+....psDP.......................sApL.......ah......N......D....Y.Nhpps.........sK...ppth..h...p.hl.c.p..lp.......p..p.G.ls....................I.D..G......lG.h..Q.....uH.h..........................t.h....t.t...hpt.h.s........t...........h......s...h.l.hTEh....Dlt........................................................st...htthh.t..hht....................h.slhhWsh.p..................hh.t...................................h...............................h............................................................................................................................................................................ 0 311 563 699 +2730 PF00457 Glyco_hydro_11 glycosyl_hydro6; Glycosyl hydrolases family 11 Finn RD anon Prosite Domain \N 21.60 21.60 22.70 22.70 19.30 21.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.50 0.71 -4.95 12 815 2012-10-02 19:29:29 2003-04-07 12:59:11 12 49 338 104 256 868 0 165.60 49 64.32 CHANGED GssuGhaYphWsDsGussshhss.GGsaSssWs..NtGNhVsGKGWssGo........sshslsYsusas.PsGNSYLsVYGWTpsPLlEYYIV-saGoYcPsus.spthsoVsusGuTYDIapTTRhNpPSIpG.TsTFpQYWSVRpoKRosG........TlosssHFsAWtptGMshG.phhYplhAsEGYpSSGSAslsl ............................tsshaaphWp.D.ss..u...s...s.p.....h......s...........s...........uG..paospWs.....s.s..uNalsGKGWpsGs..................................tpslsY...s...u...s.ap......ssG...N....u....YLulYGW.......T.p......s........P........L.......lEYYIVEsa......Go.Yp....P......su......s....sp..............h.GoV....ooDG.uTY-IYposRhN.tPS..I..p..G...TsT.FpQYWSVRps...K.Rs..tG.............olohu...sHFsAWtp..hG........hp...lG....sh...a..hlss...E.G..Yp....S.SGsusls.............................................. 1 85 169 226 +2731 PF01109 GM_CSF Granulocyte-macrophage colony-stimulating factor Bateman A anon Sarah Teichmann Domain \N 20.10 20.10 20.10 21.20 20.00 19.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.63 0.71 -4.17 7 64 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 42 5 16 68 0 118.90 55 84.06 CHANGED APTRsPssVTRPWpHVDA.IpEALSLLNpopDssAsMNc.sV-VVS-.F-.QcPTClQTRLpLYKpGLRGsLT+LcGsLTMhAsHYcpHCPPTPETsCtTQhITFcsFK-sLKcFLFsIPFDCW ...............................APT+.PussTpP.WpHV-A.IpEAhpLLN.SpDssA.MNc.sVEVVSEhFD.QEPTCLQTRLcLYcpGL+GSLTcLcGsLTMMAoHY+.........pHCPPTPETSCtTQhITFcSFKEsLKcFLh.IPFDCW........ 1 1 1 2 +2732 PF01670 Glyco_hydro_12 Glycosyl hydrolase family 12 Bateman A anon Pfam-B_1736 (release 4.1) Family \N 20.80 20.80 24.10 23.40 20.30 19.30 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.44 12 532 2012-10-02 19:29:29 2003-04-07 12:59:11 11 21 280 62 289 492 9 154.30 26 54.07 CHANGED .spsl.hhVuslpShsoslsauh.ssosl.sssAYDlahssss..pssusGchElMIWhsphGslpPhGspluTsol....sGp....oW-lWhG.....hsus.psauaVssoshsshphDV+cFhchhspspuh......stsshYlpshphGTE.ass.....ssshslssaSssVp .............................................htluslpo..hsosh..pa..sh....sss...slh.s.ssAYDlahs.....s.s.s..............t.....s....u..s.s.p....hElMlWlst...hG......s......h...t...P......l...........G......s.................l.....u......s.s.sl...................uGp..............sap..lapG..........s.s.u.sh.pVa.....o.a.l.........s......s..........s...s....h........s...sas.s.........D.....l.........ts.....F...ls........h..ts..s.p.sh............sss..YLs..........slphGsEs.asu.......ssshss..sp..aohtl............................................................................. 0 99 184 259 +2733 PF01373 Glyco_hydro_14 Glycosyl_hydr22; Glycosyl hydrolase family 14 Bateman A anon SCOP Domain This family are beta amylases. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.45 0.70 -5.73 5 548 2012-10-03 05:44:19 2003-04-07 12:59:11 12 11 191 66 172 752 23 308.80 44 78.16 CHANGED aVMGPLcKV...TDWN...oFKsQLpTLKNNGVYAITTDlWWGYVEsAGDNQFDWSYYKTYAsoVKpAGLKWVPIISTHpCGGNVGDDCNIPLPSWVWoKsSsD.-MQFKDESGpsNsEuLS......PlaSGls..KpYsELYuSFApNFuuYK.DlIsKIYLSGGPSGELRYPSYsPSsG.WoYPGRGKFQsYTEsAKSuFRsAMpsKYGSLsKlNuAWGTsLTShS..QIsPPTDGDsFaTNGsYNSsYGKDFLoWYQSVLENHLulIGuuAHssFDsVFGV+IGAKIAGlHWQYNNPTMPHuAEpsAGYYD......Ys+LlcpFKcucLDLTFTCLEMsDS..GouPpYS..hPpTLVcsVuolANsKGlcLNGENALspGsSu.tFp+ltEclTsauauG.....FTLLRlNNlVNsDGSsTuEMusF ...........................................................................................................................................................................................................lhSFHpC.GG.NVGD.s......ssIP.lPpW.Vh-h...s.....tp..........s.sDlhaTs.....+.....t.....Gp...RNhEhLohGs..............Dp.Plh.......p.GRTslphYsDaMtSFRps.h.pp..a.h...su......sIs-IpVGhG..PuGE..hRY..PSY.P..p......u.p....G...W...h....a.................P.G.....l..GE....F........C...YDKYhpush.+sAAt.....................Wth.....s.....s.....s..s..st......YNshP....c..c..T.t....F.F...p..p........s....G....s...a...oph...G..+....FFLsWYSshLlpHGD+lLstA.p..pl.......F............u.....s........t.................Vp..l.uh..Kl.......uGIH.WaYp....s.sHssEhsuGahs.......................Y..lh...thh..tp..........t.......h.h.hss..h..-...hp..s.................s.....PptLl.plh..s...t...h.h....tsEN..u.....h.....h.....s...th.......tph...................................................hsh.ph.................................................................................................................. 1 48 111 143 +2734 PF00723 Glyco_hydro_15 glycosyl_hydr10; Glycosyl hydrolases family 15 Bateman A anon Pfam-B_771 (release 2.1) Family In higher organisms this family is represented by phosphorylase kinase subunits. 21.70 21.70 21.80 21.70 21.30 21.60 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.47 0.70 -5.93 26 2515 2012-10-03 02:33:51 2003-04-07 12:59:11 16 24 1317 16 1160 2459 281 390.30 19 61.49 CHANGED uhttpthhpslts.thhh.ss.oGhllASsopotP..........DYhasWsRDuuhshtulh...............ctht.thppsltphhcs.hlps.hphputsps.sphpss...sLttstapsstsshsGs..WG+.QhDusuhhhhslhph......htsG..........thpssl..l...lps.lpYlsptWsps..saslWE....pppupp.a..opsstt+AllcuhshActhG..s.................................................ssph..hppptsplhspl.......p....ua..........................Wssttsahthsss....................ppsLDush..............lLhshhsasssss...................s+hlushhhlhss.hpsthhlssu..ts........ltR.YspDsh....................................................................t.ht.sGsPa.................................................aLsohh......................................................................................................................................................................................................................................................................................................sp.hhstlhthsuptt.htlschuhshacchsss.................su...p....pt.h.sh..s.tthss.hhphht..ssssGhLuEphsh..........hpGp..huupshsaSasshlpsuh ...................................................................................................................................................................................................h......h.p....hh..hh.h.hh....ts.sGsll.A..u.so....tshs....................cYph.sWlRDushshh..ulhthu............................tp..th.h....h.h..........tp....s.....h........h.........t....................p.....t............lp....hht..h.tsp..p.h...tp........................hh...thts.s...t.......s.s......h..h..G..s.......t......hsphQhDs.hu.hh..h.l.hth.................................hpts.......................hth.tp...h.th..........l.pt....h....l....ta....l.......t......p......t......a...p......t......s........D...GlWE.........pts.p.ph...Splhhh.hA...h.c....uhp..h.h.p.h.h.s.....ts........................................................................................................h.ph.....hpt..ht.pp.lhppl.............hpp..........ua...............................................................................................................................................................s.........h..h.a...h.t.h.ss.........................................ptlDush.......................................................................................lhhs....h..t...h.h.ss.s...c......................................................sphhs.sh.h.l.tp.....L.....h..t....t.t...h.................................................hhR..Y.....t.p.............................................................................................h.........t..ttsa.......................................................................................................................................................................................................hh.sshh.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 366 666 954 +2735 PF00332 Glyco_hydro_17 glycosyl_hydro4; Glycosyl hydrolases family 17 Finn RD anon Prosite Domain \N 24.00 24.00 24.10 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.10 0.70 -5.39 16 2142 2012-10-03 05:44:19 2003-04-07 12:59:11 13 28 364 18 1056 2132 103 250.50 30 66.00 CHANGED IGVCYGhhuNNLPuss-VVpLY+opsIpcMRlYpscspsLpAL+GSsIpllLsls.NssLtsLAus.otAssWVpsNV+saass.Vph+YIAVGNEVssusspu.....llPAMcNlpsALsAAGLuspIKVoTulphsllsNoaPPSsGsFpsssh..hlsPllpaLssssuPLLsNlYPYFuausNPt.slpLsYAhFpssssss.Ds..uhsYpNLFDAhlDulYuAl-KsGus.slclVVSESGWPSsGu...huAoh-NApsYspNLIsHVt....tGTP++PG.slETYlFAMFsENpKssc.....lE+HFGLFpPscpPtYslsF ..............................................................................................................................................................................................................GhshG.......s..p....s.ttsht.h.h.pt.....t.h..t..th+la.t.ss....thlpuh.t..s.o..sltlh...lsls.s.t...l..t.hss.............t....At.tWl..p.p..s....l.....a...........................s........p......hp.......hls.V.G.NE....h.......t.s.tt................llsAhp..slpt.u.....l..t.....h...s.l..t......tlp.......loss..h.t.h.shh...t....t...s.......s....P...St.u..Fp.th....hhtshh..pa.Ltps..s..u.P...hhhN.h...YPahuh....h.......s.s.......h.s.l.saAhFp....s.....s......s..........s.....h.....Ds...................tht.YpNhhD.A.lDuhhsAh...p...t..............h.....Gh.........s....s............l.............p................lll................uEoGWPo...tGs...............su..s.......h.t...s......A......t.sasps..Ll..p.+.lt....................pG....T..Ph....p......P.s....sl.....p.....sYl.F.uhFsE....s..K.....s.....ss..........sE+p.aGlF...s.s..t.p..hY.l............................................. 0 163 606 854 +2736 PF00182 Glyco_hydro_19 chitinase_1; Chitinase class I Finn RD anon Prosite Domain \N 20.40 20.40 20.50 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.61 0.70 -4.69 17 2104 2012-10-03 00:09:25 2003-04-07 12:59:11 14 48 920 28 497 2044 184 174.20 31 58.88 CHANGED IlopuhF-pMLpaRNssuCsuKGFYTYDAFlsAApuFPuFGso.GDsss+KREIAAFhAQTSHEToGthhhus-usauWGYCahpp...........sptpaPCusG+cYaGRGPIQLoaNYNYG.AGp..AIG.DLLssPDLVuoDsslSFKTAlWFWMTsQss.KPSsHsVIsupWpPSssDtuAsRlPGaGsITNIINGGlECspG.psspspsRIGFY+RYCshLGVssGsNLcC ............................................................................................................................................a...shtthh.tAh..t......a.......t......h.s.....t................s...........t......................t.....++pl....As.hhu.phspE.Ts....t...h........h.......h..t.....................s.......h..................t............................s..................hs....................................................h......................p....t....p.....h............s........s......s.......s.....G.....p.....p.....Y..a...GR.G......slQ.loapa.....N....Ys...ss.....Gc............u...l......u....h......D.........L........L........s.s.......P-......l......V..up.........D....sh......l......uhpoAl.WFW......hss..........................htsh......t...............................G.................a..G..hhphl.Nss.pCs......................t..........t....pl....tha..hht..................................................................................... 0 116 292 404 +2737 PF00703 Glyco_hydro_2 glycosyl_hydro7; Glycosyl hydrolases family 2 Bateman A, Griffiths-Jones SR anon Pfam-B_572 (release 2.1) Domain This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.59 0.72 -3.61 136 6352 2010-01-08 13:56:32 2003-04-07 12:59:11 16 146 2285 323 1491 5735 464 105.70 19 11.70 CHANGED spl.p...s........hhlpsphsp....ppuplplpl..............hpstsstshphplpsphhssstpphtt..sstthhhhttt.......................h.h.ps..pLWss....c.p..Pp.LY..slplpl.....tss.....phh-ph..ppphGhR .....................................................................................l..t...s............hh.l.t.s.p.hspt.......ppuplph..pl......................s........t...t...t.....s.....t...p.....h....p....l...p..h...p......l....h.....s.sp.s.....p.hs.ss.........sptths...................................................................ph.pl.p.sP.....cLWss.......-p.P.h.LY..pltlpl........tss...............phl..-th...spphGhR...................................... 0 531 1045 1312 +2738 PF00728 Glyco_hydro_20 glycosyl_hydr11; Glycosyl hydrolase family 20, catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_877 (release 2.1) Domain This domain has a TIM barrel fold. 20.60 20.60 20.60 20.70 19.70 20.30 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.32 0.70 -5.29 56 4091 2012-10-03 05:44:19 2003-04-07 12:59:11 17 134 1587 80 1175 3510 693 323.90 22 49.19 CHANGED FsaRGhhlDsuR.pFhshcplc+hIDtMAhhKhNphHhHLoDs.......puWplpl.....psaPcLochGuhpspt.........................................................hYTpp-lp-llpYAttRtIcVIPEIDhPGHstusltuhsphttt.........................thphtssp.hlssspspoapFlcslhcElsphas......spalHlGGDEs...........................tspWppsst...hpthhtptt...ts..............htph.th........h.t+htphlpptGpp....hlsWs-hhpss..................................ssspshl.sWps............st.stphs.............ppGaplIhoshs...hhYlDhhtsttst..sh..............h.shpps...................hsasshhpt.s..........stptt...........tlhGspuslWsEhl.......ssspl-hhlaPRhhAlAEtsWoss ....................................................................................................................hRGhhlDsu.R.p.ah.sh.........ptlcc.hl.Dhhuh.hKhN.thHh..HlsDs...........................tu....a.t.hpl..........p.thPt.....hs.ph.....sshps.t............................................................................................................t.hYTpp-hp-llpY....A.pp+................sIpllPEI-hP............GH......hp......u......h..l..tu..h.sc..hhtt.......................................................................tht.h.ts......t.......t..hls.....s...p.p.p.sh.p.Flc.sll......s-l..h....p..h.Fs...................sphhHlGuDEs................................tss.a..tt.s.....t.....hpthht..p...t......s..........................tpl..s.h........a.h...p..cl......t..p.h....l......p....p.....p...Ghp.......hhsW..s-.hhhtt...............................................sps....s..h....l..hWps......................ttshphs....................................................................pp.G.a..ch.....l....ss.p........hhYh.shhtt...................................................................pth..............................................................................................ph.ps.h..t..t.......................p.....................................lhGs.ss.hWs-..hh................stp.......h...p....hhaP...p...h.uh.AEh.Wp..t......................................................................................................................... 1 455 732 999 +2739 PF02838 Glyco_hydro_20b glycosyl_hydr11; Glycosyl hydrolase family 20, domain 2 Bateman A, Griffiths-Jones SR anon Pfam-B_877 (release 2.1) Domain This domain has a zincin-like fold. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.78 0.71 -3.58 143 2321 2012-10-01 20:56:08 2003-04-07 12:59:11 10 128 932 72 547 2311 319 127.00 21 15.90 CHANGED ph.sllPtPppl..p.h..tsGs....asls..s.s.sslsh..ss....ts....pt..s.....s....p.h.Lpph.l.p.......ttG.h..phs........tss..sss.........p...ssl..............t.......ts...................thhspEuYtLs.lss....s.....slsltussssGhFaGhpTLtQll..ttt...........ttt........pl..s.ss.pIpDa .......................................................................................................tllPtPp..pl..p.h...tpGp.....hs.l...........s....p..s..splsh..ss...........t.......tps...............s......p.hL.tph..lp...........hhGh....pht...............ss.tss..............p.ssl.......................h..........s.............................................................sth.ssE...uYpLs...lss....p.........tlplpus.s.ss.GhFaulQTLhQLl..sst..............................ttt...........pl...P..ss..pIpD............................................. 0 198 366 481 +2740 PF01183 Glyco_hydro_25 Glycosyl_hydr18; Glycosyl hydrolases family 25 Finn RD, Bateman A anon Prosite Family \N 27.00 27.00 27.00 27.70 26.30 26.70 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.41 0.71 -4.46 28 3628 2012-10-03 05:44:19 2003-04-07 12:59:11 15 150 2015 18 613 2804 137 177.30 24 50.67 CHANGED lDVSpaQusls...aspl.....ppsGl..phshlKATEGssahsshhspphssucssGlhhusYHFhp.s...ssAtppAcaFlssl...thssssh..sLDhE..............tphssstpssssthppFlcpl.cpptGh+sll.YosssFhpsp...hsph.spa..sLWlApYsspssss..........shsshshWQYTusG.hsGl ..............................................lDl.SpaQ....u....p.ls........appl.......................t.psG.h........pashl......+.....s......o......c.......G...........s.........s........h...........h...D....s...t...........app...php..p....upp....t......G...l...h.G.sYaasps.................ss......u.pppA.c..h...Flpps...............shs.ttsh..........s..lDlE.................................t.s.hst.sshpp.tl.p...s...ahspl...p.....p......t...s.....G.....h...p..s.h.l...Yos...t.s...ahpp...p.......................t........s...p.h........sph..............slW.lAp..Y.ssp....s.h...........................ts..hsh.WQaospGphsG.................................................................... 0 217 409 506 +2741 PF02156 Glyco_hydro_26 Glycosyl hydrolase family 26 Mian N, Bateman A anon IPR000805 Family \N 20.20 20.20 20.20 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.19 0.70 -4.95 8 834 2012-10-03 05:44:19 2003-04-07 12:59:11 10 61 453 23 298 816 38 238.10 19 51.41 CHANGED VsPNAptss+sLhsaLsslhs...++lLSGph....Guhs.sshshp-hpplpssTG+hPAlhuhDahc..hosupltcshsssss.p-hIsaW+..pG.GIlslshHas.......sPup..pts.sFhTtsTo.phcsslsss.osEY+.hlhc.lDpIA-tLpcLQsps...VPVLFRPLHEssGcWFWWGscG........PcsYKpLW+hlY-hhsch+GLsNLIWVYosss..s.tssa....YPGDsYVDIlGhDsYts..Dstshos.asphlsLhsutthstluE....sGolP.sthIsshchpas...aF.sWss.........phshsssptlpclapssaVls+DE ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h....t...l.s...t.h....tp.l.p..s......s........lPlla.R.hHEh.........s........G....s............W..F...WW..G..t.p....................sptahp..ha+h...hhch.hp..pt....p.sl.....s.Nll.a....s.....a......s......s......s....s..............................t........s............s.....s.....a..........YPG..D.....sYVDllGhDsYst.............s.......s........t.....t......................a..tp....h..h...t..................t....................ht.................................................................................................................................................................................................................................. 0 140 222 272 +2742 PF00295 Glyco_hydro_28 PG; Glycosyl hydrolases family 28 Finn RD anon Prosite Domain Glycosyl hydrolase family 28 includes polygalacturonase EC:3.2.1.15 as well as rhamnogalacturonase A(RGase A), EC:3.2.1.-. These enzymes is important in cell wall metabolism. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.09 0.70 -5.48 16 3587 2012-10-02 14:50:22 2003-04-07 12:59:11 12 63 822 30 1506 3608 74 283.30 21 67.90 CHANGED CushshsslhVPp..shtLc.sshs......uuholphpGTsphs..........hcp.ph.....hhhlsssplsshsssG.usIDGpGs........tWWcspsp.....ssKstPphlphppsc.suplsslslpNo..hthslptssslshsclsIssssss.............NTDGhclusSsslpIssssltstDDCluIsoGo......slphTsssCusGHG..ISIGSlGspss.ssVssVsVpssslssosNGVRIKT.hp..uuoGoVoslpapNIphpsls.hsIlI-QsYps.ssPssps.souVploslsacslsGTsso.tsslhlLCucu..Cpshshsslslou.G...psoupCpNls.......ssssC .................................................................................................................................................................................................................................................................................................................l.......h....t.h........p............h................h.........G.............u...h.....l.sG.....pGt..................................a..a.......p....................t...t............................t.................t.......P..........p..h..l.t.h........ps.p..s.hhl.p....s........l.pl..h.NuPh.a....p.h.p...h...h...t...s.p.slp.lp...slplps.s.t.ss..................................NTDGh......Dht...s.o...ps...lhIps.......................s..h.....l..p...s.........u................D..D..........Clulp...u....Gpt...........................slhlp..s..s..h..s....s.....s...G...H..G......l..ol.G...S.........sp..t...t........................................V...........p.s....l..hl.........p.....s...s....p......h....h........s..........o...........p.........s.............G..l....R........I....K.......o....h..................uts...G....h..l....ps.....lha...pslphp.........s.....l.........p..............s.......l...h.............l.......s..........t......t...........Y...........t...........s..........t.......t...................s.........t.........t...................................s..........t........h......................l.........p........s.......l.......h..........h....p....s..l...p.....u.........p.......s..t.......t..................s......h........h......s.....t.....t............h.ps.h.hpshtl...................................................sh........................................................................................................................................................................ 0 359 933 1245 +2743 PF02836 Glyco_hydro_2_C glycosyl_hydro7; Glycosyl hydrolases family 2, TIM barrel domain Bateman A, Griffiths-Jones SR anon Pfam-B_572 (release 2.1) Domain This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.88 0.70 -5.52 11 6947 2012-10-03 05:44:19 2003-04-07 12:59:11 12 151 2301 315 1472 6580 701 251.70 22 30.17 CHANGED lcl+c.shhhlNGK.lhh+GVNRHEpssh+G+uhshshhlpDlpLMK.phNhNAVRTSHYPsp.caYpLCDcYGlaVlDEsslEoHGhhphhG........................sssPpW.tAhl-RhpchlpRDKNHPSlIlWSlG.NES.....suGtshcshhcahKplDPoR...sVpYEus.......s.hssphsslhhsh.uphYpc.sh.tp....lpc.lps.....................p.pKPhI.CEYuHsMGNu.GshpcYpphapph.cYQGualW-atDpul.tptss.ss....chhtaGGDFs-p.sDhpFshN..GlhhsDRsP+PuhhphKchhp.hph ....................................................................................t..t.hhl..Nsp..h..h..hp.G..h.s....h.H..p..t...t...s......h.h..G......p.s...........h..........s.....................p..........t...........hh.p.D..l..p..l..h.K...phshNulR......s....u......H......Y......P........t...s.................p.h..h...c.h.CDch.......G.lh...V....h....sE.s........s....h...........s...h...s..h...h.......ht.................................s..p..s..s....p.h.....t.....p.....s..h..h.p.c..h....pc....h..lp.Rs......+NHPSllh.WS.....lu..NEs.....................................stt.t.........h.......p.......t....h......h.......p..h.......h.......+..p....h.......D......s...o....R..............l.p.......h..t.ts.......................................................h.....t..........................s....h.................t......h.....h......t...................t........................htt............................................................................p.....s...h...h......s...E..........as..t......s.....h.........h.....s...................t...................h.....t.................................h............................................................h..............t............t.......h........h..............................t...........................................................................................................................................................................................................................h............................................................................................................................................................................................. 2 553 1041 1294 +2744 PF02837 Glyco_hydro_2_N glycosyl_hydro7; Glycosyl hydrolases family 2, sugar binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_572 (release 2.1) Domain This family contains beta-galactosidase, beta-mannosidase and beta-glucuronidase activities and has a jelly-roll fold. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.09 0.71 -4.69 88 7272 2012-10-03 19:46:52 2003-04-07 12:59:11 13 195 2374 329 1801 6677 638 164.50 21 18.74 CHANGED ppphhsLNG.......tWpFphssss.........................t...t..h.................thss......tta.pplsVPssaphp.sh.........................................tt............sssuhYc+pFplssph.....p...scclhlpF-GVpsshhValNGphVGh.ppsuasshEaDlTshlptGc....NplsVpVh......phssusal................................s.DhaphuGIaRsVhLhspP ...............................................................................................................................................t...h..Lss........W...pFth.tts..........................................................................................t...................................................pt.....th...p.p..l..s..V..P.s..s..aphp....h...............................................................................................................sss.s.h.....Y.p+...p.F....p....l.s.p..s..h.......................p........s.p..p....l..h..L.pF.-.G...V.p...p..t.s.p.V...al.....N......G......p......h......l...........G........h......p........p..........s...u.......a.....t.....s.....h.......c.....h...D.l..T....s....h....l...........p....t...........Gp.............N..hlsVp.V.....................p......s.s.phh...................................................s.h.a............h.....u......G..IaRsVhLhh............................................................................................ 0 669 1268 1576 +2745 PF00933 Glyco_hydro_3 glycosyl_hydr14; Glycosyl hydrolase family 3 N terminal domain Bateman A anon Pfam-B_1151 (release 3.0) Family \N 20.70 20.70 21.30 20.70 20.50 20.20 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.73 0.70 -5.44 39 9251 2012-10-03 05:44:19 2003-04-07 12:59:11 16 142 3405 47 3184 8362 2951 295.80 24 46.55 CHANGED TL-EKls.hs....................................hhGuhhh.........................shtpssshsRlGlPtlhs.pDustGlptt.hs............TsFPsuhulu..uoastc....LhpchGthhGpEhpu......pGlslhhuPsls.lsRsPtsG.RsaEsFuEDPh.Lsuthusthl+GlQu....t........G....VhuosKHFsu....NppEspR...................pssssslsccsl+ElaLhsFptAV.cusssoVMsu...YNplNGs..............ustsphlLsplL+c-hG.....F..pGhVhSDWh...............sspssstulpAGlDhpMssshh................ppLhpulcsGp..lspppl-csspRIlph ..........................................................................................................................................................................................................................h.u.hh....................................................t.h...t.......p...t.h..h...t...............p....h...t...l.....P.h.h.hs..hD.....t.....t.......G.h.ht.thtt..................................................ThaP.....s....s..h.u........h...u.....u....o..a.....s...c.......................ls.p....p.h....G....p....h.....h.up....E...h.pu..........................hGl.sh.s.h......u.P.s.l.....s...l..........s......+.......s...s......p......h.........s....p...s................c.u.a.u....-.DPh.lsut.hut.s.hl..cGhQs.......t...............................u...l..h.us..s..K................H..F.suh........sss....-...s...p+....................................................................p.h..s...s..s..s.h...s..t..p....p..l..p...p...h........h........PF.cts.l..................c.....u....s.....s.........s.......u.........l..Msu..................a.....s......p.........l.....s..u.p..........................Pu.st.....sph.ll..pslLR..p.....c.....h..G..............F...cG.......h...l..h.S...Dhh...s....h..t...t..h.....h.................................htcs.s..h.t.ul..p.....A..G.s.Dh...h.ss...s.th............................................................ptlh...p..t..l........p...p..Gp..........l..s.....t...plspust+lh..h......................................................................................................................... 1 1038 2061 2768 +2746 PF00251 Glyco_hydro_32N glycosyl_hydro2; Glyco_hydro_32; Glycosyl hydrolases family 32 N-terminal domain Finn RD anon Prosite Domain This domain corresponds to the N-terminal domain of glycosyl hydrolase family 32 which forms a five bladed beta propeller structure [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.39 0.70 -5.06 138 4354 2012-10-02 00:26:57 2003-04-07 12:59:11 15 62 2280 59 842 3991 275 289.10 30 57.76 CHANGED HFpP.psWhNDPNG.hY..apGhYHLFYQaNP.huss..W..G...htWGHus.ScDLlpWpcls.hAlhPs.p.aDtpG..saSGSAslhss..p..............lhhlYTGs...sp...........p...tpsQslAhsh.c.u....tpahK.....NPllh....Pss...sspcFRDPpshWhp....-.upWhhllGuppp.....ppGhshlYcS..pD..hhpW.phhsphhpss..t.....stMWECPDha.lss.p........................s.t.t.....hhhVLthS...............pssttshY.hlGp.as..psspa.ss...........s...hh....chDaGp..FYAupoFhD.....ssp.RRllhGWhu.psDtp.tsphs......cGWsu..hholPRplhL.........-..ttspLlQhP ........................................................................................................HhpP.t.s...h...hN....DPNG............hha...a.....pG.p.Y.Hl.F..Y..Q...........a..................P....h.....u.....sh....................a....G.............h...p.................W.....u..H.s....s.....S....c..D.....L..l..pW.......p..p..............h.....s.......h....A....l........h.......P...s.....p..........h.......D.....p...p.....G.............sa.SGS..Ah.........ss..p.......................................................................................lhh.hYTGpsp..............................................pt...pph...Q..s..l..Ahs....csu....................hpap.K.......................sPllht...........P.s.............tsp...cF........R......D...P........pl..............a...pt................s..sp..ah......h.l............l............G.....u.......p...........p......................p................p.........G............p..l....h....lY.p..S.....pD......l....h..p...W.p.h.h..u.t...h.h..t.sst............................uhMWE...C..PDha..lssp.........................................................................lLhhss....sht.........................pshht.s.hY...h..l..Gp....a..p.....pst..pa.sp...............................tp.ht.plD...hG.....h.-.....a.......Y.A......sp....oa.s................sss...Rpl.hhuWh....s...............-.............hs..........tpsW...t.u.sh.................olsRplpl.......ps.....spLhphP............................................................................................................................................... 0 225 502 696 +2747 PF01301 Glyco_hydro_35 Glycosyl_hydr17; Glycosyl hydrolases family 35 Finn RD, Bateman A anon Prosite Family \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.19 0.70 -5.08 37 2284 2012-10-03 05:44:19 2003-04-07 12:59:11 14 47 1041 19 826 2224 59 273.10 34 43.63 CHANGED shll-Gp.hhllSGulHYhRhs.PphWtDhlpKsKshGhNslpTYV.WNhHEPppGp.asFpGt.hDlscFl+hs.pctGLaVlLRsGPYICAEWshGGhPsWLhph..shphRoss.s.ahptscpahstlhshlts..LhhspG.GPIIhsQlENEYG......hss-tsYht.thschhhchsss.shhhssssPhhhshps....shss...lhssssasss...t.........h........ts.pPpp....PhhhsEaWsGWFspWGp.......hpcsspc..lAtsltchltps......u.hNhYMaHGGTNFGhhsGushhs..........TSYDYDAPlsEtGphs.sKYttl+clhpth .............................................................................hhlsGp.hhlhSGtlHY.R...........hs....p.............W....c.....hl..ph.........K.shG......hNslpo..Y...l.........WN.......hHEP.p................Gp.....ap.F..........p..G...................hD......l.t..tFlph...A...pch.GL.a.l.l.l.R..............PGP..Y.........lC........A......EW....ph..G.G..h..P............s..WL.....h....p..............................s.......h.............p..........h.....R.................o........s...........s.................................s.............ah.....t....t...s....p...pa...........h...p........t........l........hs.h.hts..........h.h...s.........p.........G.....G.......s..ll.h.h.....Q.............lENE.YG.....................................s.-.....t..sYht....thtp.h.h.h.p.h.u...h......s..........s.....h.....hs..ss.....s....s..........hh.ps.............sh.t...........lhssssahst.....t.....................................................p......sp.t............Ph.........hh..........hE..has.G..........WastWut.................................sh....p.......p.s.s.pp........hs..s...ltch..lttt...................................s.hNhYM..................aaGG.T.N....F..G.hhsGsshh..............................TS......Y..................DYDAslsE.Gt.s.........Kahtl+phht..h........................................................................................................................................ 0 254 472 661 +2748 PF01074 Glyco_hydro_38 Glycosyl_hydr16; Glycosyl hydrolases family 38 N-terminal domain Finn RD, Bateman A anon Pfam-B_731 (release 3.0) Domain Glycosyl hydrolases are key enzymes of carbohydrate metabolism. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.56 0.70 -5.15 50 2799 2012-10-03 16:37:10 2003-04-07 12:59:11 17 32 1483 64 926 2418 153 271.20 24 29.16 CHANGED plasluHuHhDsuWLaslc-s.........pppspps..asol...lshhcc.s-hpF..htupsthaphhhcc.psphhpcl.+chVpcGphEhluGhWV.sDpshssh-ollcQhhhGp+ahccpFG..hpscsuWhsDsFGaSushPplhp.puGhctahhp+lsasspsph............ptpFhWcu.cu.........oclhoahhsh.............sh......ssph.............................cspsllpthpphtsptt......sspsLhshGhs.........Dsstsshcchlctlpthps.h.....hhsclphuTs.spYacslcp.........................ss-h.sas ..................................................lahlsHsHhDh..t.Whhshccp.......................chplhph.....hssl........ls.hh......c........p..p......P......-...........a.p....F.......htu.........p......s......................t...hh..c...a.h....p...........pP........c.t..........hp..cl...+......chl....p...p....G........+...lph.h...........Gshal.sDss.lsuuEuhlRphh..hGp..phh..pp.....paG.........tps......phuahPDsFG.a..suphPplhp.tsGhc..th........h....htR..h........s.....h..s...pt..sph................................p...pp.Fh...Wp.u.DG.......................ocl....hs.thhst........................hssth...........................................................phpt.h..h..p.t....h.p.ph....t.p...t.th.................s.s.p.h.lh.shGts.............................pt.t.hs.s.pp.ph..hch.hphh.pp...h..........................stsphhh.u.s..pcahptlctt....................................................................................................................................................................................................................................................... 0 336 545 754 +2749 PF01229 Glyco_hydro_39 Glycosyl_hydr19; Glycosyl hydrolases family 39 Finn RD, Bateman A anon Prosite Family \N 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.63 0.70 -6.11 4 463 2012-10-03 05:44:19 2003-04-07 12:59:11 12 17 292 30 189 512 70 356.40 18 64.27 CHANGED hthscsPclshsDutRshR.l..FWR...aC.....stus+hsLuhcpphphshhtshschGhE.lRhHhLL..DlhssRc.ssGpGLh..YNFTalDuhlDhLhE.tlhP....GF..hu.sSGphTsFc.K......+ph.cWp-LVphLAR+aIuRYGlscVpKWsFEsWNEPsh+DF.psuhppp.ahphYcssucul+tssssL+lGGP..........shChul.tahtshpNFhTtEs.V.lDaIShHppsuspuhhh.cp..tsltpscphhsEFt-s..lhp-EA.P.lsatlsp.......Yss.ss.hss.HpsP..hNuuhhhR..lLSpsssaLuh....FS.hThTshFp.sssp.......RpPhhsuhGLlALh...tl.t.s.+uhplLDu..tp...hlhtspHhsh...uspsshpsslllas....+shstpslPsp.hhlslP...shc.Valp..hlDp.pusPausW.+MGpPsaPotEQhcchRtsp-P.htcsspPhsssGcLTLchcLhhsSllLlclssR ...........................................................................................hh....................................................................................................................................................................................asa.t.lDthhDhh..h.t...thhP.....ua...................t.........s....s.....t.................t...................a.....................p....................................p.p..h.t.tW...t.pllpthsp+h.h...tR.Y...G..h.t....p....V.p.....p..W........aEl..WN.E.................Psh.....t............a...h...........................u........p...t.....p......a...h.p.hY...pt.ssc.sl+.t.s.....s.....P....t....h.p..l..G..GP................................................sh.s...h...s................t......a..h....phh..pa...h....ttp....t...h...........h.Da..loh.H.h.s.................................t......................................................................................................................................h.........................................................................................................hs...........................tt...........ss....h.........ht...................t..............h.....h.h....t.ht........................h....ss.......thhs.................th.t.s...h..htt.....tt.............t.t..................................t........h..hhh.t...............................................................................h.......................................................h......ss.....a.t..............s........t...h..h.....t................................................................................................................................................................................................................................................................................... 1 89 140 163 +2750 PF01915 Glyco_hydro_3_C glycosyl_hydr14; Glycosyl hydrolase family 3 C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_1151 (release 3.0) Domain This domain is involved in catalysis and may be involved in binding beta-glucan [1]. This domain is found associated with Pfam:PF00933. 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null --hand HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.56 0.70 -4.83 107 5906 2009-12-10 17:01:25 2003-04-07 12:59:11 17 121 2112 34 2239 5621 1277 251.70 26 33.08 CHANGED hVLLKN-s.....tlLPLsppt..............+lAllGssAs......................................t....sh.t.......................................hsoslpulpptssssshhh...t..h............................................................................................................................................................................ssstthspAspsAppu....DssllhlG...........h...................................t....st.slpLs..tspppLlptl.ssss....tsslVVltsGsPlsh..hhp...............pssAllhuahPG........pcuGpAlA-lLaGc......sNPoG+LPhTaspsh.........sphPshh........................th...tt.sshhsau.cGL.hsh ................................................................................................................................................................................................................................................................hVLL.KN....cs.................thLP.Lptst............................plAVlG.s..As........................................................shh.h...su...ss.........ut..s.........st................................................hso..h.pu.lppt...h...s....p.s....s...h..h...h..h........h......h.............................................................................................................................................................................................................................................................................................sp.thhs..p...A..s..p...s.A.p.pu.........D..s..sll.slG..........tspt............................................................t..ts+...s.s..l...s.L..s.....t...s....Q..p...p..L..l.p.sl...t.sss............pss.........V...lV.l.h.sGpPls.l.t..h.tp................pssAlltsa.hsG...................p.p.u.G......pAl.A-l.La.G.c.................................hN.P...S......G......+...LPhoaPpsh..................................sphPsh.hs...............................................h.hpta...........st...t......sha.sFG...aGLSYo.................................................................................................................... 0 741 1451 1968 +2751 PF02056 Glyco_hydro_4 Family 4 glycosyl hydrolase Mian N, Bateman A anon IPR001088 Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.86 0.71 -4.97 8 3002 2012-10-10 17:06:42 2003-04-07 12:59:11 11 5 1469 33 466 1859 496 179.70 40 41.67 CHANGED KIshIGGGSohhschllu.lp+h-cLsspcltLhDlDt..cRL-tltphscphl-csussl+hptossh+-ALpDADFVhsplpVG....hhssRtlDE+IPh+aGlhu..t-TsGPGGIh+GLRoIPslh-Is+chE-hsPcAWhLNYoNPhuhlTcAhhRhhPthKslGlCchshGhpctlAchLsl ...............................................................KlshIGGGSoaTPcl...l.Ghlt...c......h......cc..Ls..l..p...clhLhDl-s...p+L..-hltt....l..sp...+.........h..l..c.......p........s.....u.....s..s.......h......c....l.p..t....T....h...D...p+cALp.D..ADFVhs.plR...V.....G...........ths...sRt...hDE+IPL+aGllG..QETsG..sG.G.lh+uL..RTIPslh-Isc-hcc.l.s.P.s.A.Whl.NaoNPuuhVTEAhh+..h..t..s.....t..K.hlGlC...sh.P.lGhtptlAchLt.l............................... 0 162 299 370 +2752 PF02449 Glyco_hydro_42 Beta-galactosidase Mian N, Bateman A anon Pfam-B_2131 (release 5.4) Family This group of beta-galactosidase enzymes belong to the glycosyl hydrolase 42 family. The enzyme catalyses the hydrolysis of terminal, non-reducing terminal beta-D-galactosidase residues. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.27 0.70 -5.34 28 1345 2012-10-03 05:44:19 2003-04-07 12:59:11 10 34 760 3 435 3006 388 314.10 25 47.10 CHANGED DY.PEQWsc..-hap-Dh+hM+cAGlshVslGhFuWutlEPpEGpa-FuW.......LDcll-hLtctGlpllLuTP........TAusPtWlsccaPElL.l-tcGphptaGuRppaChoSPsY.RptstcIsctL.....AERYus+PullsWHlsNEYGs....tCYC-pCtpAF...............RpWLcp+Y.........ss.l-pLNcAWGTsFWSppYssa--I.sPp.ts.........phhsPuptLDapRFsoDplhsahct.ctchl+choPchPlTTNFMs........hpshDaachApclDhlSWDsYPs...........stcp......pssthuhtpDLhRul..tpGpPFhl.MEpsPS.sVNW.pshN.t+tPGhhcLhShpulAHGA-sVhaFpW..RpuptusEpaHuullsHsu.sso+sapEVsclGccLcpl ..................................................................................................................................................................Y.P-.ph...........t...phhtcDhthhp.ps.shshsp.lu.F..u....Wstl.E......P....p....c....G...p...a.....c.....F.s.a..............LD......c...h....l...c....h...h...t..........p..t..G..lp...l...h....L.......u....T...s......................o..u.s.............P.t.....Wh...h.......p........p.......a...P.........-...h...h.......h...s................p..........G.t.......h...............h....................h.........G.......s...R..........p....p........h........s....h....s.s..s.sa....+c...hs......t....pls.ptl......................A.c..+....Y.....s....p...c...P.....s...l.hhW.p............l......s.....NE..hus....................sas...p..t...s..t..ttF.....................................ppW..Lc..p+.Y.........ts....lc.tLNpA..W...s.......ss.....a.....W..u......p.......ph..psa...pp......l.........Pt.....hs.............................t.....s.h.......l...Da..pRF.spth.hphhp..phphl+....th.s.s......p.h.......lT....p.N.hh.......................sh-h.h..p.h.u..p.t..h..Dh....h..u..h..D..tY........................t...........................uh....tsh..hRsh............ttp.s..ahl.hE.......p.ps..u....t.h...t.........h........s............s.G........h.h.h.shtt.hAtGup.l....aapa..+p......u.Ephatuhl..st...s.hht-h.thst.ht..h................................................................................................................................................................................................... 2 150 293 365 +2753 PF04616 Glyco_hydro_43 Glycosyl hydrolases family 43 Bateman A, Finn RD anon Pfam-B_5336 (release 7.5) Family The glycosyl hydrolase family 43 contains members that are arabinanase. Rabinanases hydrolyses the alpha-1,5-linked L-arabinofuranoside backbone of plant cell wall arabinans. The structure of arabinanase Arb43A from Cellvibrio japonicus reveals a five-bladed beta-propeller fold. A long V-shaped groove, partially enclosed at one end, forms a single extended substrate-binding surface across the face of the propeller [1]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.00 0.70 -5.32 37 5170 2012-10-02 00:26:57 2003-04-07 12:59:11 9 188 1022 90 1859 5327 430 265.80 18 57.35 CHANGED hpNPllpGhhPDPolhRsscs...YYlssooFtha......PGl.la+S+DLssWp.lupslsptsphsh..tsss..........WAPslp....atsG+aalhao...............ps......sallsucssc...GPWo-shhl.........sstulDPolFhDss...GcpYLhhsths.t..............tIhhtchsssttphsups.phlhsus............shhhsEGP+la++sGa..YYLhsu....pGGTshs....ashsh...uRS+slhG.Paphpsssshlppt.....ssppslpssGHuullposcGc.aahsahstcs..............tshpshGRcThlp.lpWp.DGWP ......................................................................................................h...s...DPt...lh...t...h.........sst......a..Y..h.h....s......o....s.............................................t..h..........lh..p..S...p..D..L.........h......s..W...p........h..s.................s...h............s.........h............t............s.....t............t....h......t.tt.t............................W.....A...P.p...l.h.......................h..t..s..G.....c.....a..a.laas.....................t....t.ttt...................................hh.l.h.....p...u.....s...s..sp..........G.s.a...p...p....hth.......................................sttslD......s...sla.h.......D.c.....s........G.......p.h..Y....h.h......a.u.s.......t...................................t.l...h.....h....s..c..l......s.....s..s...h....h...p....h.....t...s..th..........t...h.hhtst........................................h.h...E...u.P....t..l....h...+......p...s......G..h.......Y.Y..l..hh.u.....................ts.s.s.t.s...........athsh...............................spu..p..s......h.G......Pa...........p..............t....t......hh..................................t.tt..h.........s.s......u.H.s.s..h..h...p...............p...u.....p..........h.h.h.ahsht........................................t...R...h.h..lt....h.ap...ss................................................................................................................................... 0 663 1303 1655 +2754 PF02015 Glyco_hydro_45 Glycosyl hydrolase family 45 Bateman A anon SwissProt Domain \N 20.30 20.30 21.70 20.80 19.50 19.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.30 0.71 -4.25 6 228 2012-10-01 21:39:58 2003-04-07 12:59:11 11 13 86 7 88 232 2 179.70 43 69.24 CHANGED sGpoTRYWDCCKPSCuWsu..KuPs.tPV.oCstsssslss....ssspSGCp.GGuuYhCosppPa..sVsssLAaGFAAsohsGusEsshCCACYtLTFTs....GsltGK+hlVQuTNTGGDLGsspFDhshPGGGVGIFs.GCssQaG...uhhGsQYGGlpStpECsphP-sLpsGCcWRF.-WhcNsssP.hSFcpVpCPupllucSGCp .......................................GhTTRYWDCCKsSCuWss..............K...A.ss....s.p...P...lpoCsts.s.ps.hss.............ssspSGCs...G...GsuYhCssppPW......uVN..ss....hu.YGFA...Ass...h.....s.....G.s...EsshCCsCapLTFTu..................uslsGKcMlVQsTNTGuD..L.....G.......s....N.....p.FDlthPGGG.VGhas.GCs..p...Q.a.u..s.s.st...aGpp..YG.Gl............s.o.........tspCsp......hP.....t..tLpsGCpaRF..sahtss....sNPshsFppVpCPtplsshosC................................................................. 1 41 60 80 +2755 PF01374 Glyco_hydro_46 Glycosyl_hydr23; Glycosyl hydrolase family 46 Bateman A anon SCOP Domain This family are chitosanase enzymes. 20.80 20.80 25.50 24.80 19.70 19.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.33 0.70 -4.79 7 127 2012-10-03 00:09:25 2003-04-07 12:59:11 13 7 97 4 42 129 4 207.80 39 69.76 CHANGED hpLluhsENuohpWhppYsYsEclcDGRGYTsGIhGhsoG.TGDhL.llt.hpchsPsshLsKalPuh++stt.....hcppuslhshhsp-p.WpotspDh.hc.A.scstDcoaas.sAhppscssh....poALspushhDsslpHGsssss-SFtullKRstp+sp.......DEtpahpsFhssRhtsLhsstsc.spscssp..sRshhhsslhcpsNhsLK ......................................................................................hpLlSpsENSolcWctQ......YuYlEDls..DGRGY.TsGIhGFsoG....TGDhLplV.EhYspt...pP.sN.s.LupYLPALRclsu.......s-sp....sulssh.......hsts.....W.p..psA.p....D.tFctAQcctpD+lYas.PAhppucpsG....hssLuphsaYDshl.HGs..u..ss....s....s.....o..Ftulpc+uhpcAp..P.t........GsDEpsaLsuFLDsRh.sht...p..c..ts.cp.-sochcp..AphshlpphshphpssL.................................. 0 18 32 40 +2756 PF02011 Glyco_hydro_48 Glycosyl hydrolase family 48 Bateman A anon SwissProt Family Members of this family are endoglucanase EC:3.2.1.4 and exoglucanase EC:3.2.1.91 enzymes that cleave cellulose or related substrate. 25.00 25.00 40.60 40.50 18.90 18.40 hmmbuild -o /dev/null HMM SEED 619 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.33 0.70 -13.25 0.70 -6.39 7 213 2012-10-03 02:33:51 2003-04-07 12:59:11 10 23 126 21 57 214 0 485.10 49 70.58 CHANGED YtpRFhphYsKI+DPuNGYFSsc.GIPYHuVETLhVEAPDYGH.TTSEAaSYYlWLEAhYG+lTGDaStFppAW-shEcYhIPsppDQPst.MSsYsPNKPATYAsEapcPShYPS.Lchs.ssVGpDPltsELhSsYGos.lYGMHWLhDVDNWYGFGtpsst.......sopsoaINTFQRGsQESsWETlPQPsh-EaKaGGpNGFLDLFTtDs.sYAKQaKYTNAPDADARAlQAsYWAs.WAKEQGK..slSuhVuKAuKMGDYLRYuhFDKYF+KlG..s..pspAu.TGYDuAHYLLSWYYAWGGulsu..sWuW+IGsSHsHFGYQNPhAAWsLusDu-hKPKSsNGtsDWApSLcRQlEFYpWLQSAEGuIAGGATNSWsGRYpphPAGTsTFYGMuYs.pPVYtDPGSNpWFGhQAWSMQRVAEYYYtoGDspAKpLLDKWVcWlhuplplssDG.TFpIPSsLcWoGQPDTW..suoYTGNsNLHVcVssYGsDLGlAuSLANALhYYAstotc......ppA+shAKc....LLDthWt..YpDs.KGlussEsRuDY+RF.-p......pVYlPuGWoGpMPNGDsIcsGlpFlDIRSKY+pDPsWsplpsAhpsGpsPshpYHRFWAQsDlAlA.GsYuhLF ..............................tcFht.atpl...ps....t....sGYas..p........G......lPYH..SlETLhl.EAPDaGH.TTSEAaSYhlWLEAhYG+lTG....D........W.o.....h.....hpsAWcshEpYhIP.....s.p.....p-....QPs...stY....ss.spPATYAsEh.ppPspYPu....lphs.sssGpDPltsELtSsYGost....hYtMHWLhDVDNhYGaGpt.st.......sotsoaINTaQRG.pESsWETlPpPoh-pF+aG....t.....N..GaLsLFst.-s...s..........u+QW+YTsAsDADARAlQAsYWA..WApppGp....plushhsKAuKMGDaLRYshaDKYFpplG...sts.p..s.s.su.sGh-SsHYLhuWYhuWGGuhss.....sWuW+IGsSHsH.GYQNPhAAaALus......s.shtP..KSssutpDWtpSLcRQlEhYpWLQSuEGAIAGGuTNSWpGpYtp.Pu..G..s..s..TFYGMhYs.tPVYtDPsSNpWFGhQsWshpRlhphY.....................p..G............s............tt......s..........t...........tlhc+Wh.Whht.h.ht....t.s...pa....hPupLpW.......sGt...P........................sss..lpsps.s.s.p.DlGhhuuhspsL.aauststt..........................................ttutthAptLLDthat...pst.hGlss.E...ttsY.Ra.p........tlYlP.tsas.Gp.spGs.l...........psu..................pF.slRshhppDs.a..l.thht......s......u..................P......hpaHRFWtps-hAhA.u.ht.h............................ 0 22 43 49 +2757 PF03718 Glyco_hydro_49 Glycosyl hydrolase family 49 Finn RD anon Pfam-B_8840 Family Family of dextranase (EC 3.2.1.11) and isopullulanase (EC 3.2.1.57). Dextranase hydrolyses alpha-1,6-glycosidic bonds in dextran polymers. 21.30 21.30 21.90 22.60 19.80 20.50 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.97 0.70 -6.44 4 30 2012-10-02 14:50:22 2003-04-07 12:59:11 8 4 25 8 8 54 0 496.90 33 80.09 CHANGED sus.psAhpstpsssTsssupLpTWWHssGEhNspTPsQsGNVRpSuhYsVQVupAsssps.hYDSFsYhSIPRsGpG+I............s.o..DGhpaosuhsLoMuWSSFEYS+DVhVclphpsGpolsSsspVsIRPoShsFthphsDssoVhI+VPYSssGYRFSVEF-spLaThhsD..Gst......suGs.psltsEPcNuhhIFApPhL.ut.tpphlPp.sSsshphPpPG.VpNLsosocpllYFpPGsYaMsscp..........HhsLsuNVpWVYLAPGAYVKGAhcFh.sTpupaKsTG+GVLSGEQYVYpADsssuYh.hSsAspscsosl+MhpassusutQp.hslpGlTlutPPapShsh.GNp....hphpV-sYKQVGuaYaQTDGhElY.GohhpssFaHsNDDsLKhYaSsVTlcNsVIWKscNuPVIQaGWTPRNI-NVslsNssVIHsRhhhp-shhNosIhsuSsaY....tshuSs.oouDospTlpNhphpNhssEGhosshhRIhsLpNh-NIpIKNlsIsuasGLp...hsop.Sal.taoshsstK........sspGlslENaTVGGppI..TssN.psspLGRls.hDsp.WssWphh ...........................................................................................................................................s.................hsssthpTWWHssu.hs.t.tshtsspVRpSphYpVpVt.As...t.pp.hacSFsY.SIPRsGpG+................s.o.tDGh.phpsphshsMuWopFpYspDl.lcl..psGpslsssspVsIRPsshsathp.ssstslh.I+VPhp.sGh+FSVEFpsplhohhss...Gpp....h.spGs..hlt.EPpNuhhIFApPhl.st....hlPp.pss...........s.s.hhpsG.lsshs.socsllYFtPGlYahs..pct..........+hpLssNspaVYlAPG.AYVKGAh...cah..tsppsapssG+GVLSGEpYVY..pAssspsY...h.........Aspscppsl+Mhhapsssu.sQp.hphtGsTlssPPapohslhu......spt........hpsplssYKQVGuaaaQTDGhplY.psS..h..l..+..csFaHsNDDsl.....KhY......a....S...s.........sslcsssIWKscNsP.l.I...QhG.....Ws...s..Rs.....Is.sVslcslslIHs....RhhhspshhsosIhsuSsaa.......uss.hpsDsppph.shphpNhssEGhss.sh..h+lhsLpN..hcthhlcNhth....s.sht......hsht.sh..h.thps...............shGl.lpNaolGsppl..shpNhtttphGphs.hcst.hspWph.t............................................................................................... 1 3 4 8 +2758 PF03512 Glyco_hydro_52 Glycosyl hydrolase family 52 Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 276.40 276.00 19.60 18.50 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.30 0.70 -5.88 4 35 2010-01-08 16:07:30 2003-04-07 12:59:11 8 1 33 0 14 36 16 418.20 50 59.32 CHANGED FTLGaPGpuGGLDLELG+PPcQNVYIGLpS.c-ttpYcsLPFatsu.pDEusRYDlEpsDss.......pQcPp.llhPFs+-EIhR-FsuuTDTWpAGDLTFpIYSPVcuVPDPpsAsEEEL+hALVPAVLsEhTIDNTcGppsR+AFFGapGsDPYSuMR+I-Dsss.plsGVGQGRlTuIsSsDculpsAlaFohEDlLspthcENhsFGLGpsuALlhDVPAGEK+TYpFAVCFYRGGlVTsGhDsoYaYTRaFpsIEEVGEYALtQFstlttputcu-pLlssutLS-DQpFMhAHAIRSYYGsTQLLEH-G+PLWVVNEGEYRMMNTFDLTVDQLFFELKhNPWTVKN.LDLaVcRYSYcDcVRFPG--TEYPGGISFTHDMGVANoFSRPsYSSYELaGIDsCFSHMTHEQLVNWlLCAoVYl ..FTLGahGpsGGLsLELu+PscpNVYIGhco.cpsuhYchLPFacss.p-Euc.RYshEps-ss........p+ss.hlhsFucccIpREapsATDTWpAGDLT..FplhSPhcsVPDPts..As--EL+hALsPAVhsElTlDNTpGopsR+uFFGapGsDPYouhRhl.........s-.........ss....s.....pLpGlGQGRphuI.s.o.p.-.csVcsuhtFuhEclLssphpcNh.sFGLGpsGALlh-VPAGEK+TYpFAlCFYRuGhVTsGh-sSYYYTRaFcsIE-VupYAL-phsthtstutptschlc.pu.tLS-DQ+FMlAHAI+SYYGSTQL.L..-.....c.........-Gc.PlWVVNEGEYRMMNTFDLTVDQLFaEL+hNPWTV+NsLDhaVcRYSYcDpV+FPG-.-pp......YPGGISFTHDMGVANsFSRPsYSuYELsGls.uCFSaMTHEQLVNWlLCAsVYl. 0 4 8 10 +2759 PF01630 Glyco_hydro_56 Hyaluronidase Bateman A anon Pfam-B_1150 (release 4.1) Domain \N 18.70 18.70 19.80 19.40 18.00 17.90 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.04 0.70 -5.80 11 533 2012-10-03 05:44:19 2003-04-07 12:59:11 13 6 152 6 235 478 2 274.00 38 74.85 CHANGED hRuP.llsspsFlhsWNsPTphChtchshslDlSh.FullusPpcshpGQslTIFYs-cLGhYPalssps....pthpGGlPQpusLppHLs+spp-IpphlPs.DhsGLAVIDWEcWRPpWtpNWpsKDlY+ppSl-LVQspHPphshscsptcApppFEcAA+tFMp-TL+LGKhLRPptLWGaYhFPDCYN.hcapsss..YsGpC.ssphpRNDpLuWLWppSoALaPSlYLpppLpusp+s.t.h.VRtRVpEAlRVu+lscsspslPVhsYsR.lFpDpsspaLoc-DLlpTlGE.sALGuuGlllWGohuhspotpsC.tL+pYhpopLsPYllNVThAAp ....................................h.....h.tpPFhhhWNsPop.Ch..+ath.l....s........l.ph..Fslhssstpp.h.t.G.p........slTIFYtspLGhYPahs.ps.............hshpGGlPQpss.LptH.Lpphtpcltt.....hlss.p.tGLuVIDW.............Et....WRPhWtpN.Wt.s.KclY+ptS......htllpp..p.....psp....h.s.......p..p.ltt.AptpFEtuA+taM.pTLpLupthRPptLWGaYhaPcCa..N.ashh..p..s..s....YoG..pCsshthtcN.-pL.tWLWtpSsAL.aPSlYl..tltss...ps.....Vp.RlpEAhRluthh.p.t............hPVhsY..s+..h.....h...p..t..............s..p.......h....Lop...tDLhpTlGEosAhG..uuGlllWGs..p..sp...o.....p..pC.tlppYlpp.Ls.alhNVo.us........................ 2 43 64 130 +2760 PF03065 Glyco_hydro_57 Glycosyl hydrolase family 57 Bateman A, Moxon SJ anon Pfam-B_2506 (release 6.4) Family This family includes alpha-amylase (EC:3.2.1.1), 4--glucanotransferase (EC:2.4.1.-) and amylopullulanase enzymes. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.98 0.70 -5.54 22 1145 2012-10-03 16:37:10 2003-04-07 12:59:11 10 25 737 10 560 1481 541 339.30 17 56.79 CHANGED sFhhHpp..hh.p................................uspsYl..........shhphlhch.p........h+hshsloGshlEQlpchtsph...............................................................................................................................................................lpha+p...LhcsGp..lEllsssaaHslhsllsc.....................p-hhpQlchtcchhcchFG...hcPpuhassEhsass................clhchltchGhchlhs-stth..............hhsspsst.hah.......tspt...lslhhRshpLScpIuFpa.......usccah.................pa.h....hh.hhstspspllhlhhDhEsa.....staphtppshh-hhhth..phppp.................shlphhtsp..Ehhpchtscshlhhs.tt.....................................hsWh...........ctshstWlus.tppthhptl ...............................................................................................................................................................................................................................................................................................................h.hH.p...hh......................................t.........................ht.psYh................shhthh.ph.p..........hph.shshosslhp.lpp.h.t............................................................................................................................................................................................................................................................................................................................................................ls.h.h+p.........L.c....p..Gp...lElls...ssasH.s.....l.h..P.Lhsp.......................csh.ptpl.....phuhpt..h..cc..h..F..G....ppP.c.G.hWhs...E.tu.ass........................................................sl..hphl.sp.t.G...h..c..a.h..h...s-stth............................................hhs.ht...s..s.p...ha.t..................sspt............................ltlh.hRDhplS.p.plh..ph..........................shccah..................................................................ph.h..............hh..t..........t............s..th.h.....hh..hchc..th...........tht.....tp.hht...hh.....t.....t...ptt.....................................t.....l..t..h..hp.p.....ch.h..p.....t.h......................h.................................................................pWh.................................................h.......................................................................................................................................................................................................... 1 229 411 492 +2761 PF02057 Glyco_hydro_59 Glycosyl hydrolase family 59 Mian N, Bateman A anon IPR001286 Family \N 27.00 27.00 27.20 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 669 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -13.24 0.70 -6.46 3 168 2012-10-03 05:44:19 2003-04-07 12:59:11 10 10 93 2 88 169 18 493.00 37 83.35 CHANGED MTAAAGSAGRAAVPLLLCALLVPGGAYVLDDSDGLGREFDGIGAVSGGGATSRLLVNYPEPYRSQILDYLFKPNFGASLHILKVEIGGDGQTTDGTEPSHMHYALDENYFRGYEWWLMKEAKKRNPNIILMGLPWSFPGWLGKGFsWPYVNLQLTAYYVVTWIVGAKHYHDLDIDYIGIWNERSFDANYIKlLRKMLNYQGLQRVKIIASDNLWEPISASMLLDuELaKVVDVIGAHYPGTHTVKDAKLTGKKLWSSEDFSTLNSDVGAGCWGRILNQNYINGYMTSTIAWNLVASYYEQLPYGRCGLMTAQEPWSGHYVVESPIWVSAHTTQFTQPGWYYLKTVGHLEKGGSYVALTDGLGNLTIIIETMSHKHStCIRPFLPYFNVS+QFATFVLKGSFSEIPELQVWYTKLGKPSERaLFKQLDSLWLLDSSGSFTLELcEDEIFTLTTLTTGRKGSYPLPPKScPFPosYKDDFNVDYPFFSEAPNFADQTGVFEYFTNIEDPGEHRFTLRQVLNQRPITWAADASNTISIIGDY+WTNLTV+CDVYIETP-oGGVFIAGRVNKGGILIRSARGIFFWIFANGSYRVTGDLAGWIIYALGRVDVTAKKWYTLTLsIKG+FASGMLNGKoLWKNIPVsFPKNGWAAIGTHSFEFAQFDNF+VEATR ......................................................................................FcGhGslS.ususs.SRLLhsY........c..s.......hs...pILcaLF.tss.aGAulph...LKVEhG..u..........D....spoo.....s.....Go....EPSp......+h.t..-.t.......N.....h...R..GYp...ahLht-AKphNPsIplhsLsWuhPGW..l...s......p.....u...............s.........h................p.....hss...........Y.h.h.pW....l..............uu.tphasLslDYl.u..h.h.....NE+.s..a...s.h.p......a.l.K............h....LR.p.hL....................s..........tp......G..........hppl+IlAuD......sh...a...t...s...lu..ts.hh.DspLhpsl.sllGs..H..Y.....s.....s....s...........ss....t........p........u........s...t...KplW.sSE.....shS.......s..h.......s...........p......h......G.u..........us........h.u......Rh........L...N...ps......Yl.su.....hT....uhIsWs...........lluuaY.tlsaspp.uLhs..AppPWSGtY.lps.slWl...AHhT.QFspsGW.hY.L...p...s....s..Gth....pt...........s.GSa..Vu.Lss.s.h.s.......s.h.ohllE.....Th.s.....tp.t.s...h...........t...s..p.hp..h.l...psth..t.t......tlpVW.op...h...s.t....s..p.t........a.phtsh.......s.tG..taolp..lt.splhTlTTh..sst.t+..u.sh.s..ss.....st....hs...............Yp-s..Fs...t.........hs...t..........s..h.s.D.tGsFEhh.s........tt...thhsh+Qh...sptslsWts.......s..hshlGs.pW.s.htlp.Dhhh-............t...h.l.hths..ts......tt..........sh.....h.h.l...ts.Gtat..l...........t.t..........................h..t........h.....t..Whpltl..h.t.....s.......st..h......................G.hshhs..h...spaDsh.l....t...................................................................................................................................................................................................................................... 0 35 48 63 +2762 PF01341 Glyco_hydro_6 Glycosyl_hydr21; Glycosyl hydrolases family 6 Finn RD, Bateman A anon Sarah Teichmann Domain \N 17.70 17.70 17.70 17.70 17.20 16.60 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.16 0.70 -4.98 74 628 2010-01-08 14:25:27 2003-04-07 12:59:11 12 33 332 44 282 614 35 289.60 31 63.55 CHANGED sNPhas.uphtstst..t.....ushssttpplA.phPouhWl.sphstsss...........ltstLsshhupttsusp...sshhVlYslPsRDCsAhASsG....ussuhspY+s.YIDsIsstl...................tassh+sllllEPDoLuNhlTs......ssspC....t.ss..pstahpslsYA.lppLtshs.NVthYlD........AGHuuWLG..ss...uAplhuphhpsAGss...plRGhuoNVuNYssh.............................hDEhsYtp.................slsstLsstGh.ssc...................................................FllDTuRNGhss.........................p.thGsWC..NssGs.GhGtpPosss...usshlDAalWlKssGESDGs.......ssssustaDt ............................................t.h....s...........t.....ss.ssthppl.u.shPsuhWl..sph...s.ss...........ltstlsshh.ststssst...hshlVlYslPsRDC...u.ht.S.s.G..........ussshspY+s..aIDslAshl.......................us..hpsllll.....EPDuLsph..................hspC...........t.ss...pst..hhp.tlpYA.lppL.t.shs.sstsYlDAGH.uuWL..............uAph.huphlpssGss........ps+GFusNVuNapsh.........................................-EhsYsp..................s.lsshl.st.th..ssp..........................................................allDTSR.NGhus.............................t..usW..C.........NssGtulGt.Posss....ssshlDAalWlKsPGESDGss....ssttu.h...t......................................................................... 1 93 195 260 +2763 PF03443 Glyco_hydro_61 Glycosyl hydrolase family 61 Bateman A, Eberhardt R anon CAZY Family Although weak endoglucanase activity has been demonstrated in several members of this family [1-3], they lack the clustered conserved catalytic acidic amino acids present in most glycoside hydrolases. Many members of this family lack measurable cellulase activity on their own, but enhance the activity of other cellulolytic enzymes. They are therefore unlikely to be true glycoside hydrolases [4]. 27.00 27.00 27.50 27.70 26.30 25.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.53 0.70 -4.52 55 827 2010-01-08 16:05:35 2003-04-07 12:59:11 9 12 88 14 728 841 1 202.70 30 69.52 CHANGED HuhVsslhl..sGspYsuapssth.hts.........s.st..ssssG.l..sshsSsDIh..................Cpp.......su..ssussps.sVsAGsplshp.Ws...............aspSH...+GPl......lsYLApC..sss.Csoss..pouhpaFKIspsG...hssuops..........Wus..........DpLI.sssssaslpIPsslusGpYlLRHElIALHuAu........sts........GAQ.YPpChplpVTGuGoss............Pu.G...VshsuhYpsTDPGIhh..sIYps.s......s.Ysl ........................................................................Hhhhtthhl..sG....tth.......................................ssusl........ps...h.s.os-lt......................Cst.................su.....ss..u..s...s..p.s.....sVtAG.spl.shphs...................................hs...s....H....G.Pl...................hsYhucs.....sss...ssshs....ssuhtWFKItptG..............hss..ss.t.................Wu.s....................t..h.....t.....s.....s..s...p...hshp........IPpslssGpY...LlR.tEh...IA.......LHsAt........p....s..........GAQaY...pCsQlpV.o.G.u.Gsss.............Pu...s................Vp.h.P.G........hYp.s.sD.P.G.lh..h......slYts.......sY....................... 0 249 456 651 +2764 PF03664 Glyco_hydro_62 Glycosyl hydrolase family 62 Finn RD anon CAZY Family Family of alpha -L-arabinofuranosidase (EC 3.2.1.55). This enzyme hydrolysed aryl alpha-L-arabinofuranosides and cleaves arabinosyl side chains from arabinoxylan and arabinan. 20.20 20.20 20.90 20.40 20.10 19.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.99 0.70 -5.27 5 187 2012-10-02 00:26:57 2003-04-07 12:59:11 8 20 99 0 116 198 2 263.70 55 59.08 CHANGED CALPSoY+WTSTGALApPKu.GWAuLKDFTsVsaNGKHIVYASsaDTuosYGSMuFGPFoDWS-MASASQTAMSto....AVAPTLFYFAPKNIWVLAYQWGuooFSYRTSSDPTNPNGWSAsQsLFoGpI..SGSuTGAIDQTVIGDDTNMYLFFAGDNGKIYRSSMPIuNFPGSFGopYolIMSDoTsN....LFEAVQVYTVDG...QN+YLMIVEAhGSuG.RYFRSFTAoSLsGsWTPQAATESNPFAGKANSGATWTsDISHGDLVRNNPDQTMTVDPCN ...............CsLPSoY+WoS.T..GsLAp..PKs....G..W..suLKDFTsVsYNGpaLVYAost....s....u.u...s...aG.SMsFusFosWSphuSAsQs.....thspu.......sVAPoLFYFAPKslWlLsYQ.WG.s.s.sFuY+TSoDPoNPNGWSusQsLF.o...G...o...I.....ssu..ssG...s...ID.sl.IuDspNMYLFFAG..DN..G.+IYRuoMPluNFPGuF.Gossssl.hSD.o..p.sN....LFEAspV...Y+lpG...ps..p..YLMIVEAIG..u..sG.....RYFRSF.TAoSLsGsWTPpA..uoE..o.NPFAG.....KAN....S....G.....A....o.WTsDISHG-LlRs.ssDQThTlDPCN............................................................................... 0 30 74 107 +2765 PF03633 Glyco_hydro_65C Glyco_hydro_65c; Glycosyl hydrolase family 65, C-terminal domain Finn RD anon Pfam-B_3470 (release 7.0) Family This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. The C-terminal domain forms a two layered jelly roll motif. This domain is situated at the base of the catalytic domain, however its function remains unknown [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.48 0.72 -3.88 18 1485 2010-01-08 16:08:30 2003-04-07 12:59:11 10 16 1123 2 318 1157 33 52.00 27 6.46 CHANGED PhLPctast.luF+.lhaRGph.lplplsppplplplhpGp..sLslclhGcclsLcs ......PtLPcpWpp..luFp.lha+Gph.LpVpls..p..p.p..lslph...pu............sl.slpl.Gp.hpl..s..................... 0 86 198 275 +2766 PF03632 Glyco_hydro_65m Glycosyl hydrolase family 65 central catalytic domain Finn RD anon Pfam-B_3470 (release 7.0) Family This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. The central domain is the catalytic domain, which binds a phosphate ion that is proximal the the highly conserved Glu. The arrangement of the phosphate and the glutamate is thought to cause nucleophilic attack on the anomeric carbon atom [1]. The catalytic domain also forms the majority of the dimerisation interface. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.27 0.70 -5.78 16 1977 2012-10-03 02:33:51 2003-04-07 12:59:11 10 29 1435 2 510 1594 109 362.80 34 46.01 CHANGED Rhsl......FaLhpshss.......p-....scLs...........lus+GLoG-u.....YtGtsFWDTEhaslPhhl.hstPclA+sLLpYRhppLsuA+..........csA+phGhp.GAhaPWpoup...GpEso..........s.cht..ss.ppphHlsusIAaA.lhpYhpsTG..DppahpcpGh-lll-sA+FWsu+schspp.stapIcsVsGPDEYc........sslssNsYTNhhAtasLphAhchhcphspt..........htlppp-hcpWc-l...ucphalPhccp.GV.......h.Qa-Gahcht.hshtt...h..t.h...h..chthspl.ptphlKQADllhhhYh............htttastcphtpN....h-aYts+Ts+-S.oLSsslaullAAchsch...spAh-aa.pcusc.................lDl.cs.sss.Tp-Gl.HlsuhuGshpsllpGFuGh...+hc..........sspL .......................................................RaslapLhtshss........p-.......tchs.............Iu..sKGL....T....GEu.......YtGHsFWDT.E.ha.hlPhal.hsp..P.p.lA+.s.LLpY..RappL.stA.p................cpA.......p.......p...............G.......h..........p..G..AhaPWpoh..p....G...p...Eso.............s.pht...hu..ttphHlsusIAaA.lhp.Yh.psTG..D....psal.t.c.p.....G.h.c.lLlEsA+FWss....t.s.p....h....s.....p....t..............s..c..apI.cs.VhGPDEYp...........ssVsNN.s.YTNhhAtasl...ph...Ah....phhpp..hspp.................thth.s..p.t......-htp..W.pch.........scphhlP.h...s......p......p........Gl........hsQpDuFhsht..h.s.hsp.....h.spph......l..c.h.ph..tpl.chphlKQADVlhhhYh..............hs.cpF..ot..p.p.ttpN....hcaYEs.hT..s..H..-..S...SLShslaullAA.cl.Gh.......pp.Ahpha.pcusp......................lDL....s....s..s.....p.s.....sp.....-.....G..l..H.h.....suhuGsWhulltGFuGh...csp.pst....................................... 0 155 305 432 +2767 PF03636 Glyco_hydro_65N Glyco_hydro_65n; Glycosyl hydrolase family 65, N-terminal domain Finn RD anon Pfam-B_3470 (release 7.0) Family This family of glycosyl hydrolases contains vacuolar acid trehalase and maltose phosphorylase.Maltose phosphorylase (MP) is a dimeric enzyme that catalyses the conversion of maltose and inorganic phosphate into beta-D-glucose-1-phosphate and glucose. This domain is believed to be essential for catalytic activity [1] although its precise function remains unknown. 18.80 18.80 18.80 18.90 18.70 18.70 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.62 0.70 -5.07 14 1768 2012-10-02 23:57:29 2003-04-07 12:59:11 10 18 1343 2 416 1391 80 251.00 25 31.06 CHANGED plppsphshpp.p+h.pESlhulGNGYhGhR........Gs.....h-Eshss...............ph.GoY..luGla............phsp..........lu....Ghsp.hpcllN.hsNahtlcl.hlsGcs.hDlst...sclpsappsLDh+pGlLpRphphp..s.ps.lclpscRFlShspppLuulchplp......sl-sps.clpltshlDusVsNcs...p...thhphhsp........chpsss.shlhscT..hsss......htsshsspphs.tsspt.h...sphp.sppcphlspphpsplp.spphplp.KhVslso.ScDh ...................................................l.ppphs.cp...hph..tESLhulGNGYlGhR.................Gs......hEEs.h.s..t............................................................................ph.tGsY...luG...la...................p..t.tp...................hu.........Gh..sc.....hpc.llN...sPshhshcl...tl....s.......s...-.......hsLsp........splpsapppLDh+p..............GhLpRphhaps.s...s.....G............pt...lplsscRFlShsphplsslc.hplp.................slss...s.s....plplpotl.......D...u..s.pNps.......................................hhptlsp...................p.s...t.spp....hhlh..hpT....ppss..............h..s.lsh.....uss.p..........h......t...s.........s.p....................t..............h.p.t.p....p......p.tltpp....hp.spl.p.tGps.h.slp.KhVhlts..Sc-................................................................................................ 2 125 264 361 +2768 PF02435 Glyco_hydro_68 Levansucrase/Invertase Mian N, Bateman A anon Pfam-B_2011 (release 5.4) Family This Pfam family consists of the glycosyl hydrolase 68 family, including several bacterial levansucrase enzymes, and invertase from zymomonas. 19.90 19.90 19.90 21.30 19.80 19.60 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.46 0.70 -5.77 11 358 2012-10-02 00:26:57 2003-04-07 12:59:11 11 10 223 29 68 411 121 335.70 36 80.16 CHANGED hps.s....sophTRADhLKlspp.p.ss...........Th.sIsushsl.ss.....-l.VWDoWPLp-hc.GsVssasGapllFuLsuD.p.....................Rpscs+IthaYp+sGc.........sWhhuG+lht-uss.........spopEWSGoshlss..cGslcLFYTs..h........sssu..pulsstshshhhsD-psVplcGhpps+sLF.p.uDGhhYQshtQs.h.................asFRDPHsh.D..pcG+pYhlFEuNsusEc.........spsshGts-hhsls..ssh.ss.....ssApassGuIGIsphpDspt..o.hclhsPLlTussVsDpsERPpVlh.sGKaYLFThS+tsphu........DGlsGsDsVhhhhusstLpGPYcPlN.uSGLVLsss...............sp.s..hpTYSHashPs...G.ssshhshhsspst...hctsuThAPoltlplpGscohlscshs.......G.IP .................................................................hohtshhpl.t....ps............s....ls.sh.s.h.sp.............pl.lWDohPLpphc...up.hsh....pG..apllhsLsus.p.....................ppscsplhhhYp+.hG.p.............sWh...uGp..lh.pssp......................................pspEWuGoshl............p..tsplpLaYTs.........................p.....thhst...hsh.h.sss.tl....hpshpp.+.lh.p.sD.....GhhYQs...tQ............................................as..hRDP..h.D.....sG.phYhlF.E..uNsus.p...............s.t.hsts.p.h.h.....th.p.....................ttA.htsuslGlhhhps.............p.hchhsPLloussVsDphERPphhh.s....GKaYLFT.S+t.phs........suls.u.s-sh..hhhhspt.lhusYhPhN.soGLVLsss.................t.s..htoYSahsh..Pt.......................u....s.shhs..............h.......s..t...............................................phtuT.APohhl.lpus.p.o.h............G.h........................................................ 0 18 35 50 +2769 PF00840 Glyco_hydro_7 glycosyl_hydr13; Glycosyl hydrolase family 7 Bateman A anon Pfam-B_1478 (release 2.1) Domain \N 19.60 19.60 37.50 19.70 17.30 19.10 hmmbuild -o /dev/null HMM SEED 433 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.81 0.70 -6.11 18 5196 2012-10-02 19:29:29 2003-04-07 12:59:11 15 7 239 68 226 5216 4 183.50 64 97.72 CHANGED QsGT.TsEsHPpLTWp+CTsuGs..CsshsupVVlDANWRWhHpssGtT...NCYoGNpWsoolCPDspTCApNCsLDGADY..suTYGlTTSGsoLoLpFVTpss...tpNlGSRlYLMssDoc.YpMFpLLspEFTFDVDlSpLPCGLNGALYFssMDADGGhu+assN+AGAKYGTGYCDuQCPRDlKFINGpANlEGW.sSssssNsGhGshGoCCsEMDIWEANShusAaTPHPCsssu.QptCsGssCG.s.s....RauGhCDsDGCDFNsYRhGspsFYGsG..hTVDTo+pFTVVTQFlssss...GsLsEI+RaYVQNGpVIpNussslsGlsssssITcsFCsupKssFG-ps.Fsc+GGLspMGcALupGMVLVMSlWDDauuNMLWLDSsYPsssss.tPGstRGoCssTSGsPupVEussPsupVsFSNIKFGsIG.STa .................................................................................................................................................Y.T.G..N.s.Ws.sTlC...P..D..s..t..o.CA.pNCA.lD.GA...D...Y....sGT.YGI..T.T........S..G.N.u.Lo.L+FVT......pup......uoNl..GSRl..YLM..p..s....D.op...Yp....MFpL.l.N.p......EFTF...DVDVSpLPCGLNG..AL.YFV.pM...DuDGGhuK...a.s.sN......KA..GAKY..G...T......G...YCDuQ........CP+DlKFI....N....G......p..........A..N..lcGWss.Ss.s.DsNAGsGpaGoC.CsEMDIW.......................................................................................................................................................................................................................................................................................................................................................................................... 0 86 145 203 +2770 PF02324 Glyco_hydro_70 Gluco_S_transf; Glycosyl hydrolase family 70 Bashton M, Bateman A anon Pfam-B_965 (release 5.2) Family Members of this family belong to glycosyl hydrolase family 70 [1] Glucosyltransferases or sucrose 6-glycosyl transferases (GTF-S) catalyse the transfer of D-glucopyramnosyl units from sucrose onto acceptor molecules [2], EC:2.4.1.5. This family roughly corresponds to the N-terminal catalytic domain of the enzyme. Members of this family also contain the Putative cell wall binding domain Pfam:PF01473, which corresponds with the C-terminal glucan-binding domain. 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null HMM SEED 809 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.63 0.70 -13.54 0.70 -6.77 12 260 2012-10-03 05:44:19 2003-04-07 12:59:11 11 22 109 32 37 910 3 705.80 50 54.62 CHANGED LLMsWWPDKpTQlsYLNYMsptshhs..ss..aospssQtpLstAApplQtpIEpKIupptsTpWLRpshssFVKTQPpWN.pSEs....stcDHLQsGALLasN.SshTsaANScYRLLNRTPTsQTGp.t.+hpt...s....GGaEhLLANDlDNSNPVVQAEQLNWLHYlMNaGoIsusDs-..ANFDGlRVDAVDNVsADLLQIAuDYhKAtYGVccs-tsAhpHLSILEAWScNDs....Ys+DpssspLsMDNhhRLuLlaoLs+.................s.shRushpshIs.............suLssRos-sppspshsNYsFlRAHDSEVQolIAcII+cpINPsssGhoh..ThD-lKpAFcIYNpDhtpuDKKYTpaNlPsAYAlhLoNKDolsRVYYGDhYTDDGQYMApKSPYYDAI-sLLKARIKYVAGGQsMps.h..sss.....s........lLTSVRYGKGA.pAoDs.GstpTRspGhuVlsuNpPsL+LssscplslNMGAAHKNQsYRPLLLoTpDGlssYhsDu-A..tthV+hTDspGpLsFsAs-ItGhpNsQVSGYLAVWVPVGAu-sQDsRssuSspsss-G.placSsAALDSQVIYEGFSNFQsFss..psspYTNtlIApNssLFKsWGlTSFEhAPQYVSSpDGoFLDSlIQNGYAFoDRYDluMSKsNKYGot-DLhcAlKALHupGIpsIADWVPDQIYNLPGcEVVTATRsssaGchpssupIcpoLYVupo+osGpDYQupYGGAFL-ELptpYPplFpppQISTGpshDsS.KIppWuAKYFNGoNI.GRGAtYVLpD.uospYasls...sstsFLPKpLssppups.GFspDGp ...........................................................LLMsWWPsKpTQlsYLNYMsp.shhs.........ss....aospssQstLstAuptlQhpIEp+Iu.tp.tsTpWL+phh.......ssFVcTQspWNhpoEs..........sspD.HLQGGALlasN.ss.......h.......Ts.ANS.cYRLLNRTPTsQsGp...phh...sp....GGaEaLLANDlDNSNPlVQAEQLNWLaYlMNaGoIstsDss..ANFDGlRVDAVDNVsADLLQIsuDYF+utYtlsps-tpA.pHlSILEuWSpNDs...tYsc-pssspLshDsth+huLlauLs+.................s.s.R...u......s..lpshls.....................ss..Lss.Rs.p-.s.ppspshsNYsFlRAHDSE...VQslIup..I......Ipcpl..s.s.s.s.sGhoh.....Th-plppAFcIYNtD.ppsc..KcYTpYNlPuuYAlhLoNKDolsRVYYGDLYTDDGQYMtpKSPYaDAIssLL+uRlKYVuGGQsMps.....sp....p...............tlLTSVRYGpGshsAsDp....Gs.s.pT.RTpGhullhuNsPsLpLss.cplslsMGsAHtNQtYRslLLoTpsGlstY.sDpss.....h+hTDspG.LhF............s............ss.............-............l............tG........h..pNspVSGYLuVWVPVGAsssQDsRssu.Ss.pts.ssG.psacSsAALDSQlIYE.GFSNFQsass..psspYTNhhIApNs..s..LF+pWGlTSFEhAPQYsSSpDs.....o.FLDSlIpN..GYA....F..sD..RY..Dl...u.......h.......u.p......s.....sK..Y.G.oh--LhsAl+ALHtsGIpshADWVPDQlYsLPGcElVTATRsssaG..p....h..ts.......u.IpppL...Ysspo+usGp.YQupYGGtFL-cLptpYPplFpph..................t.......IS.............oG..p.hsss.KIppWSAKYFNGoNI.G+GuhYVLp.D...uos..pYasls...sst.h...LPptLhsp..s.s..GF..p............................................................. 0 6 14 28 +2771 PF03659 Glyco_hydro_71 Glycosyl hydrolase family 71 Finn RD anon CAZY Family Family of alpha-1,3-glucanases. 25.30 25.30 25.30 25.30 25.00 25.20 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.56 0.70 -6.00 53 358 2010-01-08 16:12:28 2003-04-07 12:59:11 9 18 107 0 264 391 4 340.70 30 59.90 CHANGED hVFAHFM...lG.stsh.ossDWcsDhptAptusIDAFALNlu.....sDsaosppLshAapuAs.....pss..FKlFlSFDa....uhWst....spVhshlppYuspsAQhp.hs.GKshVSTFtG.tsuh.........DW.s...sl+susG........hFFlPsapshustshssh....hhDGhhsWtA.WPs...susshssss.........DpsYhpsL.............uKPYhh................PVSPWFaTphss.....asKN...WlapuDs.LWasRWpQlLpL..pP......c..aVEIloWNDYGESHYIG.....Plps........sht......scustp.aspsM...PHDGWRphhs.aIss..........YKsGtt...sshlsp...............-pllhWYRssP.usuC.sussTo.u....Nssstt..thpPs..-hhpDcl..FhsuLLpu.sAslpVo..uGsssphsas.........ussGl.phsVPh....s..sGp.phslsR.sGpslhsssG ............................................................................................VhsHah...lu....s.........sh...s.scatpDhphApt.ttIDuFALNh...u.......-.s.hp......s....ppls...Aa.puAt............phs......FKlFhSFDh.......s..hst...........splhphlppY.....sspsu.hp....h..s......u..+.shVSTF.t.G....t.su.........................................s.W.t.......sl+...p...t...s...u....................haFl.....Ps...apshs....s..t.....th.th...........hhDG....hasWt....u...WP......s.sp...s.....h.ssts................................................Dtta.hphL..............su+s..YMh................PlS.PWFa.Tp.hss.................as.KN...Wl..a..tu-s...LahpR..Wppllp.h..........pP........p..hVpIloWN.................DYGESHYIG.....Plts..th.............stus.p.as..t..s..h....s.........HsuWhphh..aIshYKsG........shlpp..................................................-tlh..hWYR.ps..s.ss.ss.ssos.s..............tPt..phhpDpl..ahsshLpp...suslp.lp...uuss.t..thp.........sssG.h.thplsh....s........sGt....hpl.R.sst.lhp..................................................................... 0 88 143 218 +2772 PF03662 Glyco_hydro_79n Glycosyl hydrolase family 79, N-terminal domain Finn RD anon CAZY Domain Family of endo-beta-N-glucuronidase, or heparanase. Heparan sulfate proteoglycans (HSPGs) play a key role in the self- assembly, insolubility and barrier properties of basement membranes and extracellular matrices. Hence, cleavage of heparan sulfate (HS) affects the integrity and functional state of tissues and thereby fundamental normal and pathological phenomena involving cell migration and response to changes in the extracellular micro-environment. Heparanase degrades HS at specific intra-chain sites. The enzyme is synthesised as a latent approximately 65 kDa protein that is processed at the N-terminus into a highly active approximately 50 kDa form. Experimental evidence suggests that heparanase may facilitate both tumour cell invasion and neovascularization, both critical steps in cancer progression. The enzyme is also involved in cell migration associated with inflammation and autoimmunity [1]. 20.50 20.50 20.70 20.70 19.70 20.40 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.93 0.70 -5.81 9 293 2012-10-03 05:44:19 2003-04-07 12:59:11 9 9 99 3 160 323 6 232.70 31 49.33 CHANGED sccuolslpGpstIApTDEsFlCATLDWWPP-KCsYspCsWGhASlLNLDLss.ILhNAIKAFsPL+lRlGGSLQDpllY-sGc.cpPC.ssFpKsoutLFGFopGCLshcRWDELNsFFppTG...................................AhlsFGLNALpGRsh...hsc.........u...shsGsWDaoNApshIcYTlSKGYs.IcuWELGNELSGSGVGA+VuAcpYApDsIsL+sllpplYpss.tspPlllAPGGFFDtpWaTEhLpposss.lcVlTHHIYNLGPGsDs+LIcKILDPSYLDt.upT.FpslpphlpptGohAsAWVGEuGGAYNSGtchVSsoFl.SF...WYLDQLG ..................................................................................................................................................s.......................................................................................................................................................h......hsh.....p.p..hD.L.sFhppoG...................................hpllFGLN.ALhtp..........................sstWssoNAtthlc..Ys.ssKtYs....I.uWEL.......G.N..E....s....u....p.t..h..s.........h..t..lsu....pQhucDhhpL+p.ll.p..p..h..a...pp....s........t....P.l..htP....t.s.......h......s...............p....hh........p.p....hlpt...uGt.....l-slT.aHh.Y..........l.ss....t....t...........th.h..pc.....hLsPp....h.LD.p...h..pp...hp....pl...phl.p.p...hss......t.t.....tsWlGEs.....uuAasuGt..lSssFssuF...haLDpLG........................................ 0 43 82 120 +2773 PF01270 Glyco_hydro_8 Glycosyl_hydr20; Glycosyl hydrolases family 8 Finn RD, Bateman A anon Prosite Domain \N 17.50 17.50 18.70 17.50 17.00 16.00 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.11 0.70 -5.72 7 1235 2012-10-03 02:33:51 2003-04-07 12:59:11 12 23 1064 33 227 966 30 329.70 32 84.54 CHANGED hshshtsshshshhsshssstsutss....tW-paKucalpssG.......RlhcsusuNhsps......EGQGYGMLhAVhhs.....-pshFDsLapascspLpp..hsstLhuW+hssstps.hts...ssATDGDh.IAauLLhAtKpWt.st+hshhp-Ahshhht.hhshhs..sG...sLhPGshGhsps..hthNPSYahhPsht.tFhshTusstWtslh-cshpllspht....upsGLssDWlshs...sssu.t.th-.......as.phShsslRlsh.hhhhs.....spsshlssaht.....hhpshhcshp.shssluostsusas..hssGhhAspssshtp.lss.....thsph.suu......csYausolpLhshlhpst ....................................................................................t.s.tt.ts..s...........tWcpaKpcal.s..ppG..................RVlDs...u..s..u...pp...s.o..S........EGQuYGMhhAlhAs.....D+ssFDplhsW.Tps..p..Lup................hp...p.....p.....L.......AWha.......u..p..........c................p..s..s....phps...........sNoAoDGDlahAauLLcAu..+h.....Wp....p..t..p...Y..s.......hup.Alh.h..........ths.....t.....c.s.............s.......s........l.....s....hG...................s............h..........L.............LP...G..p......h....G........F.....s.....p....s.......s........t....h..c...............h.......N.....P..........SYhs....P....p....lt....p....a.a....s.....t....h.......u....s....s..........W.p.s.ltcssh.c...lLh.csu............s...pGhsP.D..WVthc......tstu.h..p.......t..sp..................hshhu.SYsAl.Rl....h....h.....ahu.....hh..s.....c.......s...s.........pp...u....c...h...ls.pFts..........hssh....s...t.....s..G.h..s.s.p.t...ss.....l...s......o...s..p...s.....t...s...hs..........P....s..G..h...u...A.s...h........s.....h....h....p...s.h.ss.p................ph.s..t...h.sts............csYYs.sL.sLhu.hh.......................................................................................................................................................................................... 1 80 127 172 +2774 PF03639 Glyco_hydro_81 Glycosyl hydrolase family 81 Finn RD anon CAZY Family Family of eukaryotic beta-1,3-glucanases. Within the Aspergillus fumigatus protein Swiss:Q9UVV0 two perfectly conserved Glu residues (E550 or E554) have been proposed as putative nucleophiles of the active site of the Engl1 endoglucanase, while the proton donor would be D475. The endo-beta-1,3-glucanase activity is essential for efficient spore release [1]. 25.60 25.60 25.90 25.90 25.40 25.50 hmmbuild -o /dev/null HMM SEED 695 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.17 0.70 -6.52 5 477 2010-01-08 16:09:34 2003-04-07 12:59:11 8 36 230 0 313 469 76 526.10 26 69.41 CHANGED sPshsc+ShhPsPsphhusDpsssPLpTNKFYsNhhLsss-QPpasHPYSIp.sssSS.aGLAISHsosSQpsah..ssossspalFNPsGltshVFoA+pFsSusslsl-ppchpphpssLSsossoSsalchPLVpGMGFlTulY+.sLsFsls.SSIsFSTIhu.pSlShupuhsKYsIpLsNNpsWLlYAoSPs...spsFsLTlsusSoIpsSsuFoGLIlQIsVlPs-o..NsY.EslhDtuAGsYsssscLsuspsDupsscYcFsasstGYot.GssLMaALPHHhpSFos.-sQsphluptLsSTlcGlMsGYlTcSWsL.csplspplua-PVSlohsSpt.sYSc-uLpcItuAsspDVNsssSsAESslsS.YFhGKlIA+YAplsLlAcEIlaD-slTKpsLpplshAhshLsuNpQshPLlYDsKWsGlISou..GSpSSpADFGNoYYNDHHFHaGYalYAsAVIuhlDP........oWucc.NRcaVpoLlRDa.....ussucSDoYFPphRsFDWFsGHSWAuGLaEsGDGKNEESTSEDVNuhYAsKLWGLshGDo+LlspAsLhLolh+cAhpsYahhcsssoVpPc-FlG.N+VoGILFsNKlDauTYFGscEa..hhpGIHhlPlTPlSuhlRSs.oFVKQ-WNtKlsPII.-oVu-GWKGILauNpALYDPcsAYcpFus.ssFDsuNpLDNGhShTWaLAh .........................................................................................................................................................................................................................................................................................s..h....Tstah....thh......h......t..t...........h..Ph..h..................................................................h.h.................................................h.....................t..h....................h..........................................................................h...lhpG.saho.............................h....h......h...................................................................p.a.h..h...s...t......ahhY...........................................h.............t...........h...s.t.....h..G.h.hpls......h............t........t........................t......h..h.D...t.ss.h.a...s....htsp...h.........p.....h.p.................t............t.........hpa..p.......h.thtt.t.....G................t.....t.....h.l.h.hA.h.PHHhp.h....................s.........t...t.t...t...........h...s............h.sohcG.hhuh..h.......s....s.......p......h.h.p.................................h...........P.h.............................p...............ths.t.....t..t.h.t.....lht.h.h.p-.l....p...t.......tt..ss.s...o...Ya.GKhls+hA.lhhhs...p.............p....l....h...t...........s..p...s.....h.s....p...........h..h..........hh............p.t.h.hthhh.......s..........s......p......t......t.......s.....h.h.YDstWtGl.lop................................................s........t........hDFG.s.s.h.YNDHHFHYGYalhuuAllupl....Ds.......................sW.....h..pp..........p....s....h...lp...LlRDh...................u.N.s..s..p....s.DshFPhhRsFDhatG.HSWAp.G....l.....h.....t..s..D..........G+spESoSEshp........hhYu.htlaG........h......s.......h.......Gs........p......phpshGsh.h.......hsl.hpuhppYa................h.h..p..s...s.........s........p.....h..................P..tpa.ht...N+...V.....s.G.....llatsKhsasT............a...Fu...s..t.......hltG........Ip.hlPl.hs.hothl..........ts.t....pa.Vppph..........p..........h......h....t....t...h....h....p......t.....h..t...s..s.....Wpul.lhh..uhh.-.tsu...hphhtt......h.t................h.......................................................... 0 91 195 285 +2775 PF00759 Glyco_hydro_9 glycosyl_hydr12; Glycosyl hydrolase family 9 Bateman A anon Pfam-B_843 (release 2.1) Domain \N 20.20 20.20 20.30 20.30 19.20 20.10 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.65 0.70 -5.59 37 1983 2012-10-03 02:33:51 2003-04-07 12:59:11 14 98 574 36 722 2026 39 381.80 25 67.64 CHANGED sYp-ALpKulhFaEsQRSG+L.PsspR..........hsWRsDSGLpDGss...............h.sl.......DLoGGaYDAGDpVKFshPMAaosThLuWushEatsth................tpusplsphhcsI+WuTDYhLKspsu.ss......hasQVGD.....GssD........Hph....WttsEsMshsRssa+lstssPGS-luuEsAAALAAASlVF+ss.DssYuppLLppA+plasF..AcpaRutY...Ssuh.ss.ussaYsShSGapDELhWuAAWLahATs-.....soYLshhps.stphtsts.................sasWDsphsGuplLLA+h.h......p....hppa+sp..s-palsthhss..s.sp...sphTPGGLhah.pWus..LpYssssuFLhhsYuch............s.hssssphpshAcpQlDYlLGsNPhphSYlVGaGsp.PppsHHRs.s.....s+.......suapshhssssPp.phLhGAlVGG..Pss.....................pD.sasDcRssYspsEsAsshNAuhlGsLA ...............................................................................................hh..shha.a.hp+.....s..................G..............................a.tts.u...sh..Dtt.................................................................t.............DlsGGaaDA..G..D..hs..K.........ashs..huh...........o...........s.s.......h.......L....sash.hEhttth.........................tss..s...p..h......s.....c.....hh..ctl+WshDahl+hpst...t.......................hh.h.p.l.....us................ut....sc..............Hth..................Wt.ts..p.....s........h...........s..........h........t.........c......s....s...h......p.....h.....s........t.........p....................u..........o.......s......h...s.u.p.h...A.A....u....h.At..u........u..h......la....+........s..................h......-....s.....s...a..........u...p.p................hLptAc.........p.....hap.a......A...p.....p......h....t...s...h.....................ts..s..............s........s..........t..s.h..Y........ss....s....s...ht.....D.........E......h............hW........AA.s.La.h.u...T...t..p...................pp....Yhphhtp.....h...tt...ht.h.t.t.......................................hsW.sst...h..s..s..st..........h.h..........h...uph......h...................t.................hptht..pt..............s..p..t........h..........hs.thh.................................tt......h.......hs.............s...........t.......Gh.........h....h....h..........h.us....hphs...s...s..s...h.h..l..h..h.ha..s.............................................................................hhtspphh..p.h.A.p.p..p.......l.....sYlLGp........Ns..h...t.h...Sa...h..s...G...a..........G...t....p.........P.............p.p...H..HRh.......................................th.t.h....t......s................Gs....lsGG....P.s.t...............................................tc..ta......D.....t.....h...t.....sa....s..Essh.hN.Ashhhhh.................................................................................................... 0 310 549 663 +2776 PF03808 Glyco_tran_WecB Glycosyl transferase WecB/TagA/CpsF family TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.40 25.40 25.40 32.20 25.30 25.30 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.77 0.71 -4.86 167 2497 2010-01-08 16:18:29 2003-04-07 12:59:11 8 24 2143 0 522 1664 434 170.50 35 65.78 CHANGED phlppAcllhsDGhullhuu+hh.u..tsl.........cRlsGsDlh.tLhp.tsspp.sh..plaLlGup.slhcpsspplpppaPslplsGhpsG...ha.........s.t-p.ppllp...pIpp.ups-llhVuhGsP+QEhaltchtpp...lsss.lhhulGusaDhhuGp.hcRAPpahp+huLEWlaRLhpEP...pRlh+c ..............................p.hlppA-hlhsDGhulVhus+hh...s.....psl.............pRVs.GhDLhppLhp..huspc...sh.........pVFLlGuc.s......-...........V......lp.psttpLppp...a.s.slsl....sGtpc.G..YF............p.cccpslhc....cIpt.uts.cllhVuhGs.PKQE.haltcpcpt........hsss...lhhGVGGoaDVhuGp..lKRA.PphhpphsLEWLYRLlppP...pRltRp........................ 0 167 342 432 +2777 PF01531 Glyco_transf_11 Glycosyl transferase family 11 Bateman A anon Pfam-B_935 (release 4.0) Family This family contains several fucosyl transferase enzymes. 21.50 21.50 21.70 21.60 20.80 21.20 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.12 0.70 -5.43 5 654 2010-01-08 14:30:42 2003-04-07 12:59:11 11 14 365 0 223 638 1259 248.30 22 78.41 CHANGED sVSllh+spptlsLh.AsWAh......Pshss.s.p+hsu.hcGhaTlslNGRLGNQMGpYATLhALAp..hNGRhAFIPsuMHusLAPlFRIoLPVLpscsupRpPWpNaHLsDWMcE-YcclsGcal+hoGaPCSWTFYHH.LRQpIcpEFTLHDHLREEAQ..shLRsLpls.h.GsRPSTFVGVHVRRGDYVcVMPKsWKGVVuDpsYLppAlDcFRARasuslFVVTSDDM-WCKKNIcsSpGDVsFAGcG.puSPuKDFALLoQCNHTIlTlGTFGaWAAYLoGGDTlYLANaThPDSpFhslFKPEAAaLPEWlGIs .....................................................................................................................................................................h............................................................................................................................................................................................................................................................................................................................................................................................................h......................t......h...a...h....t......hhp.p....h.............h..php...s.....pl......p.......pt..hp.................th.h...p.t..h..p..ht.........h.....p..s.....p..s....h...V..u.lHlRRG.D..Y.lpsh.......t...........h..t..s.hss...h.s.......Y...h...p..pA....l...c.h....h.....p...................t.....+.......h....p....s......s........h....F...hlh...S....s...-...hp.W..........s....+....c..........s.....l.......s.....h.....t........p..........s.....p....s.......h....h.sss.......................tss..s.hp...Dhh.L.h...o.p..C...pasIh..s....s..STFuaWu.Aa.L.s......t..s..s.p...l...h.l...h.s..p.h.....t..p.................t.h..tW............................................... 0 67 103 167 +2778 PF01793 Glyco_transf_15 Glycolipid 2-alpha-mannosyltransferase Bashton M, Bateman A anon Pfam-B_1324 (release 4.2) Family This is a family of alpha-1,2 mannosyl-transferases involved in N-linked and O-linked glycosylation of proteins. Some of the enzymes in this family have been shown to be involved in O- and N-linked glycan modifications in the Golgi [1]. 25.00 25.00 37.10 25.60 23.30 23.10 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.12 0.70 -5.49 107 719 2010-01-08 14:40:35 2003-04-07 12:59:11 11 10 156 6 528 697 49 307.20 39 74.37 CHANGED lhh.shhttttp.......................tttttts.tpt.thpth...tpstst.h..tttt...t.............................s+....NAThlsLsR.Np-...L.sllpol+slEcRFN++..apYsWlFLNDcPFo--FKctspshs.Suc...sc.........auhIPcEcWshPsaI...Dp-+hpcshcphtccpIhYGsspSYRpMCRapSGaFa+cslLpcY-aYWRVEPslchaCDl.sYDsF+aMc-ssKpYGFsIulh.E.h.pTIPTLWspsccFh..pppPp...altp.sNhhsalo-..s.....................................................................................supsYNhCHFWSNFEIusLsFaRSc...............s..YpcaF-aLD+sGGFaYERWGDAPVHSIAsuLhLc+ccIHaFcDIGYhHsPa ...............................................................................................tt...............................................................................................+.sAshls.LsR..N....p..-....LtsllpSl+pl..............Ec+FNcp..apYsWVFL.....N..D.tsFs-.-FKctsp.shs....su..p....sp..................aGhI.....P..p.....-.c.Ws.hP.s...aI...Dpp.....p.hpcshp.pht...c..pt.........lhY.G..s...p.SY.+.pMCR.F.S....G................aF..a+H..s.lL.p..p..Y-aYWRV.......E..........Pslcaa.CDl...sYDsF.p.aMpcssKpYGFslulhE..h.pTlPo...LWpp.spcFh...cp..p..sp.......hl....t.p....ss..hhpalo-..c.............................................................................................tupsY..N...hCH....F.........WSN.FEIusLsaaR.Sc...............t..YpcaFpaLDc.sGGFaYER.........W................GDAPVHSIAsuLhLs+sc.I..HaFcDlGYhHss................................................................ 0 147 304 462 +2779 PF03076 GP3 Equine arteritis virus GP3 Griffiths-Jones SR anon Pfam-B_687 (release 6.4) Family This protein is encoded by ORF3 of equine arteritis virus. The function is unknown. 25.00 25.00 291.00 290.80 20.50 17.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.12 0.71 -4.40 3 141 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 2 0 0 140 0 161.00 84 98.16 CHANGED MGCTYSGPAAFLCFFLYFLFIsGSVGSNNTTICMHTTSDTSVHLFYAANVTFPSHFQRHFAAAQDFVVHTGYEYAGVTMLVHLFANLVLTFPSLVNCSHPVsVFANASCVQVVCo+lNSosGLGELSFSFIDEDLRLHIRPTLICWFALLLVHFLPMPRC ...MGpAYstPVAhLCFFhaFLFIsGSVGSNNsTICMHTTSDTSVHLFYAANVTFPSHFQRHFAAAQDFVVHTGYEYAGVTMLVHLFANLVLTFPSLVNCo+PVsVaANASCVQlVCs....p....s.N...So....s....G....LGElSFSFIDEDLRLHIRPTLICWFALLLVHFLPMPRC. 0 0 0 0 +2780 PF04724 Glyco_transf_17 Glyco_tranf_17; Glycosyltransferase family 17 Mifsud W anon Pfam-B_5914 (release 7.5) Family This family represents beta-1,4-mannosyl-glycoprotein beta-1,4-N-acetylglucosaminyltransferase (EC:2.4.1.144). This enzyme transfers the bisecting GlcNAc to the core mannose of complex N-glycans. The addition of this residue is regulated during development and has functional consequences for receptor signalling, cell adhesion, and tumour progression [1,2]. 21.70 21.70 23.20 22.00 21.10 21.60 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.45 0.70 -5.45 12 306 2010-01-08 16:27:10 2003-04-07 12:59:11 8 5 175 0 202 296 1469 291.40 27 74.31 CHANGED hhshS+h+sh.+th....hh.hhhhh.hslhslhh+tpplohhhRPlW-uPPp.FstIPHYatcNhoM.sLCpLHGWthR.-hPRRVaDAllFSsEhDlLplRa+ELhPYVspFVlLESNsTFTGhsKPLhFtp..p+sp.FcFlcs+lsYshlssh.hcpGc...sPFltEuYpRsAL..ctLlRluGlpsDDllIMuDsDEIPStcTIphL+WCDshPplhHLcL+pYLYSF.....................pa.sDspSWRAohHhapsG+TcYtH.RQosplLsD.........uGWHCSFCF+.lp-FhFKhpuYSHsD+sRascY...Ls.cRIQclICpGsDLFDMlP.EEYoFp-lhtKhGPlP.ohSAVHLPuYLLcNh-cY+aLLPGNChRcu .........................................................................hh.....................................................................................h.............h.....t.......s......R+....lhDshhh.s.s..E.lDlL.-lRhpELhshVDhFVl.lE.Ss..h...Tap.G.h...KsL.hFtp....pp...tp..F..p..h..hc...s.Kl....h..Y.h.......h....l..s.........th......................t..up..............ss..a.........t-.s.....a..p.....R...s.....tl................ptl.h....p.....h..t...s........h.....p....s...c.D..lll.huDs.DEIP.s.....cslthL...+h..tc.....shs....p....h....p.hp.h.+..hY...uF................................................a....h.....ss..p....h...p....t.......t......t..h...h..........s...h.p....p.h.....t...........p....p..s.s...t...l..lhp......................uGWHCSaCFp..p.....ph..h.h...Kht...u..h...sHs-.h....p.h.tta.............hs.p.hI.pphlppGt.haDh...p.h.....................P..h....................................................................................................................................................................................... 0 73 120 165 +2781 PF03033 Glyco_transf_28 Glycosyltransferase family 28 N-terminal domain Griffiths-Jones SR, Bateman A anon Pfam-B_1105 (release 6.4) & Pfam-B_2764 (release 7.5) Family The glycosyltransferase family 28 includes monogalactosyldiacylglycerol synthase (Swiss:P93115, EC 2.4.1.46) and UDP-N-acetylglucosamine transferase (Swiss:P74657, EC 2.4.1.-). This N-terminal domain contains the acceptor binding site and likely membrane association site. This family also contains a large number of proteins that probably have quite distinct activities. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.42 0.71 -4.41 43 6160 2012-10-03 16:42:30 2003-04-07 12:59:11 15 40 4548 35 1693 4850 1858 138.00 25 33.17 CHANGED lllsssGTtGcl.PhlAluppLpcpGacVp...luspsshpphl..ppsGlshhs..lsss.....................................thht...hhpthtsshpt..th.thhphlt....phshthsshhussshhush.....htthshhltEp.ulPhhssphhshhssphhh ..............................hllsuGGT.u.G...HlhPsl....Alupp.L....p......p.................p......G.........a..c...lp..........hlGop.....s...s....hE..pp..l.....h.p..p.......G.....l...p.......h.ps........lsss..........................................................................................s.hpt.hh............h...p....t.h..t....s....s.hc.h......h......p..u..h....h.p.....u..t..p....l.......l.....p...c.......h.....p.......s...c....s.....l......h...u.........h..G........G.....a........s....u........s......P....s.h..lAAh...........hhtl.P.s.llHEpsuhsGhsN+hhsphspph..h........................................................................................................................................................................................................ 0 541 1110 1453 +2782 PF04666 Glyco_transf_54 GnT_IV_N; Glyco_transf_55; N-Acetylglucosaminyltransferase-IV (GnT-IV) conserved region Waterfield DI, Finn RD anon Pfam-B_4541 (release 7.5) Family The complex-type of oligosaccharides are synthesised through elongation by glycosyltransferases after trimming of the precursor oligosaccharides transferred to proteins in the endoplasmic reticulum. N-Acetylglucosaminyltransferases (GnTs) take part in the formation of branches in the biosynthesis of complex-type sugar chains. In vertebrates, six GnTs, designated as GnT-I to -VI, which catalyse the transfer of GlcNAc to the core mannose residues of Asn-linked sugar chains, have been identified. GnT-IV (EC:2.4.1.145) catalyses the transfer of GlcNAc from UDP-GlcNAc to the GlcNAc1-2Man1-3 arm of core oligosaccharide [Gn2(22)core oligosaccharide] and forms GlcNAc1-4(GlcNAc1-2)Man1-3 structure on the core oligosaccharide (Gn3(2,4,2)core oligosaccharide). In some members the conserved region occupies all but the very for N-terminal, where there is a signal sequence on all members. For other members the conserved region does not occupy the entire protein but is still to the N-terminus of the protein [1]. 19.90 19.90 19.90 20.00 19.30 19.60 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.72 0.70 -5.68 18 495 2010-01-08 16:26:41 2003-04-07 12:59:11 8 9 124 0 316 424 6 244.80 33 57.08 CHANGED tths..........hpthpsssppl.pplsp...........phhh.hhsH.....hhcctu......slpPslltGpsRssVu.lVlGlPTV+Rp+poYLh-TLpSLlsphot-Epc-sllVValA-oD..salpplspplppcFspclpSGhl-VIusstpaYPshssL+coasDsp-RV+WRoKQNLDYuFLMtYApscGpYYlQLEDDllsp+sFhsshKpFssppsu......pcWhhLEFSpLGFIGKlF+SpDLspLlcFhhMFYp-pPlDWLLsHahhl+sC..tt.s...C.t.hpphhIRa+PSLFQHlGtpSSLpG+.hQ+LKDc-F .........................................................................t.t...........................................................ph....th.hu...s..h..pt..hp...lslGlso....VpR.....p..t..t....s.YLhpTlpSLhppho.pEpp.....chh..ll..V..hlu-............sD....p........a........h.pths...tp...lp...p.Fspc.l.uGhl....lIpss..paY.P.s..h....s..p.l+....p....shsD..s.p.cRs..paR..o.KQ.N.lDYsaLh...a.s..p.s..p.u......t.YYl..L.EDD.lhsp.s.a..hsph....+phhhp...hps............................ppWhhLE.F..SpL.GaI..G.K.......h..a+u..DLshlspFhhh.F.Y.p-hPhDaLLschhh.lhs..............................tp.hlpacPSLFQHhGhhSShtup..p.hp................................................ 1 73 104 181 +2783 PF03414 Glyco_transf_6 Glycosyltransferase family 6 Bateman A anon Pfam-B_4383 (release 6.6) Family \N 25.00 25.00 25.80 25.50 20.70 20.30 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.11 0.70 -5.98 6 613 2012-10-03 05:28:31 2003-04-07 12:59:11 8 8 118 121 171 620 297 246.90 53 92.10 CHANGED htPpVhssphthshWhsthhpsshp........s.phppsctc.ccppptscl.cplp.pshh.s.pRs-VLTlTPWhAPIVWEGTaNpAlL-phYthQplTlGLTVFAlGKYlc.aLccFLESA-+aFMVGH+VhaYlhsDDsuchPhVpLGPhRplpVhEl+scKRWQDISMhRMcTIu-HIht+hpHEVDaLFChDVD.VFpD+aGVETLGpLVApLpshaYtAs.psFTYERRc.SsAYIPhsEGDFYYtuAlFGGosscVhplTptCapuILtDKtNsIEAtWHDESHLNKYFLhpKPoKlLSPEYhWD.+l.GhPuslKpl+hoh.sKpashVRN ........................................................................................................................................................................pluhhhhAh.t.....+Ylt...FLc..hFLEo....AE+aFMVG..HRVp..YY.VFTDpsus.V.P..c..VsLGsGRpLsVlcV.pu.h.p.RWQDlSM+RMEhIu-ahccR.hhpEVDYLhChDVDhc.F.pD+lGV.ElL.......s...s......LhusLHPu.a....Ys..ss.RcsFTYERRPpSpAYIP..psEGDF..YYhGuhFGGoVp-V.+LT+sC+puhhhDpsNuIEAhWHDESHLNK.Yhlh.p.KPo..K.lLSPE.Y........hW.........D..ph.s..ss..l+hh+h.h...K................................... 0 7 24 62 +2784 PF01075 Glyco_transf_9 Heptosyltranf; Glycosyltransferase family 9 (heptosyltransferase) Finn RD, Bateman A anon Pfam-B_839 (release 3.0) Family Members of this family belong to glycosyltransferase family 9 [1]. Lipopolysaccharide is a major component of the outer leaflet of the outer membrane in Gram-negative bacteria. It is composed of three domains; lipid A, Core oligosaccharide and the O-antigen. All of these enzymes transfer heptose to the lipopolysaccharide core. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.29 0.70 -5.16 18 6767 2012-10-03 16:42:30 2003-04-07 12:59:11 12 124 2063 9 1547 5234 3651 225.80 19 64.03 CHANGED htpLtcsLppps.aDhllshpshlKSAhlsthhutsh+hGhctpot...husLhhs+phshsh.tthtVpRhttLhsps......hshstspspsphslshtptsttpsths............ttPhlshhPuuot....ssKpWPt-patcLsptLpcpG..hplhLhsuspc...-cppscplssuhc........hsslssKhsLppsstLlutAshlVusDoGLhHlAAALs+PllulYGsTsPthTs....Phucptssls..phtttsshpppshhsphp .........................................................................................................................................................................l.pt.tt.aD..hh.h.......s....h..............s...............h.......p..s....s......h....l.....s...h...h....h....t.............h........h...p..h....u.....h...t..h..t.t..............................h..........t....h...h.....h.....................................................h....l....p....p............h...th...hh...............................hs..h........................................h........h......s.....t......h..t......h.....t..hs........................................tt...h..l.sh......t...s...u.....uph........ssKp......W...s.....t.......p.p....a....s.p.l.h...p....t...l..t...p....p......s.........hp........l...l...L..h..u..uspc............tpp.h...h.p...t..l.s..p..t.h.t...............................shs.l.....s....s..c...h...s....Lt..p....h....s....sllsp.u....c....hhlusD...oG.hH...lAu...Al.s.p........P......s..lu.l.........a......G...s......o.....s.....s...t...h..hs...............sh....s....t..p........h.......h............................................h................................................. 0 478 953 1279 +2785 PF00274 Glycolytic glycolytic_enzy; Fructose-bisphosphate aldolase class-I Finn RD anon Prosite Domain \N 19.20 19.20 19.30 19.20 18.10 19.00 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.02 0.70 -6.05 58 2226 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 1445 193 546 1888 2485 252.40 41 94.39 CHANGED ELhcsAptIsuPGKGILAA.D..ESsGThGKRhssIGVENTE-NR........RtYRpLLFoo.sslsphISGVILF-ETLYQpss-.GpsFV-lL.+cpGIlPGIKVDKGllsLsGoss.EosTQ........................GLDuLucRCApYhKsGA+FAKWRsVLcI.....s..sssPSpLulpENApsLARYAuICQpsGLVPIVEPEILsDG-HclcpstpVTEcVLAtlaKALsDH+VhLEGoL.LKPNMVTsGpsss.p..+.soPpplAhhTVpsLpRoVPsAVPGlsFLSGGQSEE-AolNLNAhNph.............sh...+PWtLoFSYGRALQsSsLKsWtGKsEN...lpAAQcthlpRA+ANutAslG+Yssss........sssuspoLaltsasY .........................ht..upGhlAh.D...pS..suoh..sK..thtth.s.....l...p.........ps.p.sp..............+thRphlho....s....s....h...............ht.ph....I.G....sILFcpTh....p.t.......p.....G..h...........hsphl.tppGllshlK....VDK........Gh.ss...h.s..ss......st...pshp........................GLDsLh.cR.s.s..p.ah.h..GspFuKWRsVl...pI.................t.PS.thuIt.-sAp......sL.ARYAslsQp..pGLVPIlEPElh...cu...p...H...chctpthlpppl.httl.ph.t-p.pVhLc....hol.hcsshh.tsh...............tp....................................Vst..........lshLSGG.S.....c-cAs..Lst.s.th............................hSauRALtts.lth..tsp....t................................................................................................. 0 175 303 423 +2786 PF01102 Glycophorin_A Glycophorin A Finn RD, Bateman A anon Prosite Family \N 22.70 22.70 22.80 22.70 22.60 22.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.65 0.71 -4.35 5 139 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 43 6 29 143 1 95.20 40 58.86 CHANGED HT.SsSGSso.t.ISo.......oNDspspsStuTP........TtspEVosthssRsh.P.EpG..-slQLsHDFScsVITLIIFGVMAGlIGTILLISYsIRRLIKKSsuDVpP............hPsP..tDs-VPLSSVEIEsPE- ..................................................usss.t.los.......sssppppsohsss........stsp.cs.S.plo.sps.l.sspp..p....-p..s....Q....lsH.cFots.......I...hhIlhsVhAGl.IG..hI..L....lI.YsI....pRhhK.......................................................................................... 0 10 12 18 +2787 PF00606 Glycoprotein_B Herpesvirus Glycoprotein B Bateman A anon Bateman A Family This family of proteins contains a transmembrane region. 18.80 18.80 18.80 19.50 18.40 18.40 hmmbuild -o /dev/null HMM SEED 714 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.34 0.70 -13.22 0.70 -6.50 40 1279 2009-01-15 18:05:59 2003-04-07 12:59:11 13 17 240 20 36 932 2 309.30 28 74.43 CHANGED phsFRVCus.usGu-llRFptspsCPs.hspscsasEGIhllaKcNIsPYpF+VphYhK.lThsosasGh.shs..tlospasc+hPlPhhElst.IDppspChSusphsp.sshhhssac+Dsh.spsh.LhPschtosss+RahTss-hYss.Gsh.hhY+osToVNChVs-spARSsaPYcaFshuoGDsV-hSPFashp..ssp.hs....csss+Fp.lcsYphlDhtsc...hstsssspRsFLppsc.hTluW-htscpsssCshshWcp.hscslRsEts.sSaHFsupslTATFlosh.sphshsps...pthsClpccspctI-clatpcYNsTHlpsGsl.p...hYhTsGGhllsaQPlhspsLschhttphspsssssss.t.........................Rp+Rsssssstt......................................shpopssltaAQLQFsYDpL+salNchLuclAcAWCcpQpRpthlWpELoKINPoulhSAlaG+PVuA+hlGDVluVocClpVsQso.VplppSMR........lsusss....hCYSRPlVoFcahNsop................................hhpGQLGpcNEILLspphlEsCphssc+YFhsGsshhhYc-Ytas+plsls-Issl.sTFlsLNlohLENhDFpsLElYo+sEl+so.sVhDlEplhR-hNhasp+ltslcpslps...sspsshlpGlsshh.pGLG.slGculGsVluuluGAluShVoGlsoFlpNPFGuhslhLlllAsllslalhaR+hppltpsPlchLYPhss...p...shppps ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.hRph.t............................................p..psh..hu.....lQFsYsplpt.lNphhtpl..uWC..Q.+p..hh.th.pl......................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 12 36 +2788 PF00802 Glycoprotein_G Pneumovirus attachment glycoprotein G Bateman A anon Pfam-B_1049 (release 2.1) Family This family includes attachment proteins from respiratory synctial virus. Glycoprotein G has not been shown to have any neuraminidase or hemagglutinin activity (Swiss-Prot). The amino terminus is thought to be cytoplasmic, and the carboxyl terminus extracellular. The extracellular region contains four completely conserved cysteine residues. 20.70 20.70 20.80 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.95 0.70 -5.24 4 4325 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 47 1 0 2618 0 121.00 55 83.44 CHANGED hKTLc+sWcs.pahIVh.SCLYKLNLKSlsQhALSsLAMIh.TSLlIsAIIaIuouNpKspsTosss.phTpQhpNpTosahTpps.puspsSpQuTTos..pThssssT.Gs..phtHoTspTpsppT....T.stspKPshpsptspPPcp.pDc.cFplhsaVPCSICpsN.sChSlC+phspptPsKtsThpPpKpPpsKTT.KKsoKT........oTp+.Tp..ThhpsKsNhoTP..slLoosp..............................HsTs .............................................................................................................................................................................................................................K+D.Ks.spp.PK..c...sTTpP.T.cKPThp..TTcpshpT...TlLsosTotp.E+.................T.QppoLHSTo........................................ 1 0 0 0 +2789 PF02885 Glycos_trans_3N glycosyl_transf_3; Glycosyl transferase family, helical bundle domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Domain This family includes anthranilate phosphoribosyltransferase (TrpD), thymidine phosphorylase. All these proteins can transfer a phosphorylated ribose substrate. 20.90 20.90 21.00 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.75 0.72 -4.38 135 7326 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 4261 62 1772 5253 2054 66.00 26 16.80 CHANGED phpphl.cclhpu....psLoppEspthhptlhsGphs...........cspluAhLhAlph+G...pos-ElsuhscAh..tppup .......................pllp+h....tcG.....psLop-Ehpthhst.lh.....s..G.p.ls.........................-hQl...uAhLhAlph+G....o.pEl.suhspAMhppu............. 0 546 1116 1479 +2790 PF00534 Glycos_transf_1 glycosyl_transf_1; Glycosyl transferases group 1 Bateman A anon MRC-LMB Genome group Family Mutations in this domain of Swiss:P37287 lead to disease (Paroxysmal Nocturnal haemoglobinuria). Members of this family transfer activated sugars to a variety of substrates, including glycogen, Fructose-6-phosphate and lipopolysaccharides. Members of this family transfer UDP, ADP, GDP or CMP linked sugars. The eukaryotic glycogen synthases may be distant members of this family. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.91 0.71 -4.84 63 44775 2012-10-03 16:42:30 2003-04-07 12:59:11 15 435 6658 103 12903 42366 19227 161.60 18 38.27 CHANGED sppp.ptphsh.psphhllhlGRlthp.KGhchllcAhttl.t.........tsshpLllsG..........tsp.ppphpphspphtlpsplhhhuhhs..pclhphhpt....uclhlhsSth....................EuFGhshlEAhusGhPllsos.ssGhs-llpcst........sGhllpss...sscslsctlppllpsp.......phppphsppupph ..............................................................................................ht.................p..t....h..l..h...h...l...u.....R....l....t...........p....K........s.......h....p....h....l......l.....c.......u...h.tp....l.....................................t..s....h........p....l...h....l....h....G................................................pss....t...t....p...h....p......p.....h.....h......t........p...........h...........t..........h................t..............s................p...........l.........p....h...........h........G......h...........h...........s.............................p..........l..t.....p....h......hpt.............u-..l....h.......l.....h...s.S.hh.........................E...s.....h......u.....l....s....h....l...E......A.h....u.t....G....h.....P........l..l....s....s.....s........s.......s..........u........h......t.....-........h..l...t...s.st..................................sG....h..........h....h..........t.........st..........s............................p..............s...........h....s....p....t....l....t....p....h...h....t...p..........................h........................................................................................................ 0 4440 8647 11076 +2791 PF00591 Glycos_transf_3 glycosyl_transf_3; Glycosyl transferase family, a/b domain Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Family This family includes anthranilate phosphoribosyltransferase (TrpD), thymidine phosphorylase. All these proteins can transfer a phosphorylated ribose substrate. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.42 0.70 -4.98 21 7137 2009-09-11 15:42:01 2003-04-07 12:59:11 16 28 4318 62 1734 5172 3200 239.20 30 60.81 CHANGED shshlDhsGTGGDGtsThNlSTsuAhVsAAs.Gs+lAKHGNRulSSKSGouDlLEul.Glsl.phss-psp+slc-sGluFLFAPtaHsuh+assssR+pLsh+TlFNlLGPLhNP.ApsphpllGVast-LspshAcslppl.shp+uhVVHG.sG.....hDElohtupThVsclc.su...clspaslsPpDhGlppsplpsLpusss.cENtchlc.slLpGpsssh.....t-hlshNAushlhluGhsso.......LcpGsptAt-slcoGpAh .......................................s..hlDhpuTGGs.G....ss.o....Shss...A..llAus....G.l......h...........VuKp..u..s.R.u.l.u......s.p..u.G...o..h..D..hL.Eul....G.....h...p..l..ph.ssc....phtc.........h..l........p..c..........s...G..l..u..h..l....h..u..s..s..h.h..ss....h.c.+.h.h..s.c+...-l....s.s........T...l..s..l..PLlss........................................................ulh.st...clst.s.h.stll...h....c...l......s.....s.....t......pu.h....h.h..........+.....u..............................hD.E.sshh..u........po..h..V...s....hp....sG...........................ph.p...p.h..h.l...o.s.......s...hsL..s..p.s...............h.t.........s...............s.l.c..s.................p...Es.h.p.hLp...shh.tG.t...t.s...................................t-h.lh.h.t.u.u..hh..l.h..h.....u......s......h.....s..p..s.......................lpcuh....thstpslpsGpA................................................................................................................................................................. 0 557 1116 1464 +2792 PF04413 Glycos_transf_N 3-Deoxy-D-manno-octulosonic-acid transferase (kdotransferase) Waterfield DI, Finn RD anon COG1519 Domain Members of this family transfer activated sugars to a variety of substrates, including glycogen, fructose-6-phosphate and lipopolysaccharides. Members of the family transfer UDP, ADP, GDP or CMP linked sugars. The Glycos_transf_N region is flanked at the N-terminus by a signal peptide and at the C-terminus by Glycos_transf_1 (Pfam:PF00534). The eukaryotic glycogen synthases may be distant members of this bacterial family [1]. 23.90 23.90 24.60 26.30 23.50 23.80 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -5.13 154 2390 2012-10-03 16:42:30 2003-04-07 12:59:11 11 13 2271 8 551 1796 2084 180.60 36 42.32 CHANGED htp....chtE...RhGhh............................hlWlH.AsSVGEshustPLlctLtp...phPshplllTosTsTGtphs..pph.....h....s....ps.pta.hP.hDhshslp+FLpphpP.....chhllhEoElWPNllttspppslPllLlNARlSc+Shptap+h.s.h..hhpthlppl..shlhsQsptDupRhhsLGss.pp...lplsGNlKaD.hs..s ...........................................................................hp+htERaGhhtt.h........................ssIWlH....usS.VGE.shAuhPLlctLcp.............c.hP.sh.slhlToh.TsTGt-ps...pph...h...........sp.....slp.phY..LP..aDh.stslp+FLsph..pP.....clsllhET.........ElWP.........Nlltth++cp.......l..PlllsNARLSs+Sh.tsYt+h..s...t...hh+pl.l.ppl..shlhAQscpDupRahsL.G..sp.........p...lpVsGslKFDl..h....................................... 0 170 347 455 +2793 PF01153 Glypican Glypican Finn RD, Bateman A anon Prosite Family \N 25.20 25.20 25.60 25.50 25.10 25.10 hmmbuild -o /dev/null HMM SEED 558 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -12.90 0.70 -5.90 15 584 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 88 10 343 500 0 364.70 25 92.27 CHANGED hsl.Lhllhshsu.stu.ssscu+SCuEVRphYt.u+GaulsslPpstIuGEHL+IC.PQGaTCCTscMEE+huppS+h-Fcshlc-uospLpsllsspa+pFD-aFc-LLppuE+oLsphFspsYGcLYsQNuclFp-LFoEL+pYYhGus..l................NLEEhLs-FWu+LLERhF.+LlsPQYp....ho--YlECls+ts-....pL+PFGDsPRcL+lQlTRAFlAARsFlQGLslut-VVs+sspV...shospCsRAlMKhhYCPaCRGl.....sslKPCpsYClNVM+GCLANQAD.LDsEWpsaIDuLltlA-+.lpGsaslEsVltsIcV+ISEAIhshQENusplosKVFQsCGsP+.sssts....husp-t...p+ch+shs.Ec+PTsusss......L-pLVo-l+p+L+ph+pFWsoLPsslCscchsuussssc...CWNGps+u.RYhscVhGsGLsNQlNNP.EVcVDIo+PDhsIRQpIhpL+lhTs+L+sA.................................hsGsDlDFp.Dss.D-sSG.SGSGsusscc....hsssutchshssssspp.t.hsh.................sptssoushspssshhhh..lhsllshhhthh .........................................h............................................................................................................................hp...............phh..llp.upp.htthF.psat..hh.ps...hhtphasclp.ahh.t.us.....h................................................slpchltcFaspLh.hha..ph.hps.th..................s.ch.pC.ltt...hp....plpsF..GphPp.....lhhp.ht+uh.ssR...hahpuLthu.h-Vhpp.s.tpl..............s..tC.ptl.h+h.aCshCpGh................................pPC.saC.sVhpGChu................t..s.p.ls..Wpth..l..sh..ls.......pt.h.s..............s...h.......-.sh.sl.......hl...p-ul...hpps..t.....pl..s.t...p.......l.........p.h.Cu.s........t.................................t.......p.p...s....................................h.t..h..chhtplp........tha..tLs...lCtp.....p..h....t....t.........CWsG.th.....s...pY.......hh.t..s..tp......Eh....h.p............................p...h..p.h.tLp.h.t..h...h..........................................................................u...................t..................s......t.....................................................................................................h............................................................................................................ 0 64 95 210 +2794 PF05199 GMC_oxred_C GMC oxidoreductase Studholme DJ, Bateman, A anon Pfam-B_891 (release 2.1) Domain This domain found associated with Pfam:PF00732. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.74 0.71 -3.90 85 8000 2009-09-10 15:44:39 2003-04-07 12:59:11 8 92 2304 146 3849 7618 4016 132.70 26 23.46 CHANGED PpSc.GplpLss..s-shs.P.....hl.chsahssst...........D.hpthh....pulc.hstclhpss....................................................tthhshs.......thpssp...........h.thhht...h..shhHss.GTspM.Gts.t.......uVVD.sph+VaGlpsLpVsDuSlhPsh.sssssphsshulA...c+s ...............................................................................................................oc.Gplplps..........tcs......h.t....P..............hl.p.sahs...p...t........................D...hph.hh......puhc.hspc..l.hpp....................................................................................................................................tth..st.........................t.h.tssp.............................h...th.h.tt............s....sshH.............ss.GTs+M...Gs...ssp......................uVVD...s.......p....h.+....Va.......G.............lp.......s.L+.V..sDuSlhPph..su.sNss..sssh..hluE+h................................................................................................... 0 1059 2161 3210 +2795 PF00732 GMC_oxred_N GMC_oxred; GMC oxidoreductase Bateman A, Studholme, DJ anon Pfam-B_891 (release 2.1) Domain This family of proteins bind FAD as a cofactor. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.74 0.70 -5.26 20 7804 2012-10-10 17:06:42 2003-04-07 12:59:11 14 78 2279 50 3835 7620 5057 264.90 27 47.78 CHANGED h-tLatuGGshuos..........................................................ssphhhl.sGpslGGuSsVNhusslRsstpshc-WsschGlphauspchhshhcplp........................tpluV..sspshppsstN..pslhcuuccLGas.t.lscNssus..+.sGhCthG...CtpGtKpuospsaLhsAh.p+.supllossps-+llh..............ttpst+AlGVtspsssss.....hpphhhss+pslluuGAltoPtLLtpSGl.pspc..............lG+NLplHPs ........................................................................................................................................................................................................................DhlllGuGsuGs.slAsR.L.......s.....................p............s...................s.....t.........h.....p..V.....ll.lE....u.G.....s...........................................h...t..h....h..h...........h...........t...........t.......h..........s......h.....t...........h................s...........t.........s......p........................t.................h.................s.......s.......p..........t...........h....................h...................p..G.........+....s...........l.G..G..u..Ss..l..N.u........hh........a...h.........R......s.....p..t.........t........D........a........-........t........W........t........p..................G........t........s........W..........s.......a......p......p....h.h........s...a...a...c.+hE.p.......................................................t..h..p..u...h....s.......G............l.....t....l.............p...................................p.........s....hh..........p............s....h..h..p.........u....s....t.......p....h...G.h....................................h.s.t.....-.....h..s.st.........pt.p...G....h....s.......h....h.....t................................t.....t...................p.............G..............t........R.......h.......o.......s.s..p.......s..a...L............t............s........s..............h......p..........+.................N..............L........p....l..h..s..puhV.p+l..lh..........................................c.....s....p.....+.....A...s....G.....V..p....h..hp.s..stt............................h.p..h...h....s....t....+....-..VlLuAG.u..........l..so..Pp.LL..h...........h........S.......G.l.G.......stt.L....t.......t..h...........s...........l................................h...........h..............p......hs..tVGpNhtDH..t.................................................................................. 0 1106 2163 3210 +2796 PF00446 GnRH Gonadotropin-releasing hormone Finn RD anon Prosite Family \N 18.50 18.50 18.50 18.50 18.40 18.30 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -4.93 0.73 -5.27 0.73 -3.70 25 259 2009-09-12 07:42:02 2003-04-07 12:59:11 12 2 122 0 37 236 0 10.00 83 11.90 CHANGED QHWSHsWpPG QHWSaGWhPG 0 2 5 16 +2797 PF03071 GNT-I GNT-I family Mifsud W anon Pfam-B_2207 (release 6.4) Family Alpha-1,3-mannosyl-glycoprotein beta-1,2-N-acetylglucosaminyltransferase (GNT-I, GLCNAC-T I) EC:2.4.1.101 transfers N-acetyl-D-glucosamine from UDP to high-mannose glycoprotein N-oligosaccharide. This is an essential step in the synthesis of complex or hybrid-type N-linked oligosaccharides. The enzyme is an integral membrane protein localised to the Golgi apparatus, and is probably distributed in all tissues. The catalytic domain is located at the C-terminus [1]. 25.40 25.40 25.40 25.40 25.20 25.00 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.76 0.70 -5.94 4 372 2012-10-03 05:28:31 2003-04-07 12:59:11 10 10 147 7 215 355 35 315.40 31 71.55 CHANGED hhAshhhhaIthhLFhh.opYA-phssulcucNpssuphph.l.clu.pQsRlVtLEc...hhpp.s--lpplRuhlps..ht.cuhs+lshssthsVhsVlVhACsRAshlc+slcplL+Yp.PsApKaPlhlSQDsucpsV+ptshSY.splTYhpHL..Dhpslss..Puc..hpAYYKIARHYKWALsQlFhc+pFSpVIIlEDDhEIAPDFF-YFcAstsLL-pDcolhslSuWNDNGppQhVcs..P.sLYRSDFFPGLGWMLpppTWcELpPKWPKAaWDDWhRh.Ep++GRQhIRPElsRT..hsFGc+GuShGQFFsQaLc.IKLNDhhVcapphDLuYLhcsNYsKcFsshV.+pAh.lpssclshpshs...cG.-VRlpYcspl-FcchAcphGIh--aKsGVPRsAY+GIVsFphps.RRVaLVsPco.lpthssc. .................................................................................................................h............................................................................................................................................................................................................................s.hslhV.hAs.sRss.l.+hlcpL...lphp.....s.....s.......s........p.....h...lh.V........D.s........p....p...s.h...p..........h.lt...a............l...p...h.l.pp..........................p.s.l.t.h.....st.................t..............+..lu...p.HY+huLst..hF...p.....h..t..h.......p............t.....slllE.................-DL-l...AsDFFpYFpt.sh..LL...c....p.D.o.la............ClS............AWN....D..............s.G..............h.....pp.hsc...s....................s...p.....h.....L....Y...Rs-hhPGLG.WhLh....+pla.p........E...Lp...PK...W.P....p...s..........h...W..D...WhR.sppR+sRtCIh.P-lsRo...........hp.F.Gh....h............G...h.s.......G..aa....c.t.ahc..h.+h.N................p........l.t....h......p...t..s...ls..L.....c...................-tYc.thht.l..tu..l.p.....h.....pc....................................................................................................................................................................................................................................... 0 102 128 177 +2798 PF02447 GntP_permease GntP family permease Bateman A anon Pfam-B_1928 (release 5.4) Family This is a family of integral membrane permeases that are involved in gluconate uptake. E. coli contains several members of this family including GntU Swiss:P46858 a low affinity transporter [1] and GntT Swiss:P39835 a high affinity transporter [2]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.44 0.70 -6.00 12 5095 2012-10-02 15:12:49 2003-04-07 12:59:11 11 8 2135 0 706 3333 363 403.30 34 98.00 CHANGED hsLlhlslu.lllLLlLll+hKlpsFlALllVuhhVGlstGMslscllpohpsGhGGTLGplAlllGLGsMLG+lls-oGuAp+IAhThlppFGcc+lpaAlllsuallGlslFaEVGhlLLlPllFslA+puplsllhlulPhsAuLsssHuhlPPHPGPhslAshhsADlGtslLYGlllu.lPssll...AGPLasK...ahpphh..t.p.....st.hstchhpcpchPuFGlSlhshllPllLMhhpTlsplhhscssshhs.....hhpFlGsPssAhhIAlLlAhaThGhtRGhshpplhchhspulsshAhlLLIIGuGGuFKQVLl-SGVGchluphhpshslSPllhAWllAAllRluhGSATVAslTsuGlls.....Pllsths...sss.tLlsLAsGuGSllhSHVNDuuFWlhKcYhslo.lpETh+TWTlhpTIlSlsGLlhsl.Lhull ................................................................h..llhh.hlu...lhl...ll.l..Ll..h.+....hK......h.....p.s..Fl...u.L..l...l....s...uh.h...s............u............l............h...t............G........h.........s......h...........s.........c.............l............l.........p...........s.........h...p......s.......G....h....G..u...T...L.Gtl...u...l....l....lu.hGshlG+l...ltcoGuAppIApollpph....Gc.......+.......+.......st....h..A.......l.........s.....l..s....u.h..l...l.......G.l....s..........lF...........a..-....V.u.h.V..l....Lh..Pls.h.s....lA.+......p.......s.......t.......h................s.......h.l.hl..ulP.h.s.uu..l.s.s...s..H..s.h.l....PP.p.......Pu..P..h...s..h....A...s..h...........h.......t..A..s.....l.....G...h......s..l....l.....h..G....l......l..lu....lP...ss...l...l...........A...G....la.s+......................h..h..t.p......h.......................................t.....h.s....t....t....h.......h...s...p....p........p.h.............P....u.....h....u.........h.......o...l.......h...s.....h.l..l..P.l....l........L..h.h.......h.....p.s....l...u..p.....h.........h.....h.....s........t.......s.....p..s...h.ht............................hh..p..F.....lG....s.....P.....h..h.......A....h..h.......Ius..l.l.Ah.a.s.hG..h....t........p..s..h........s.h....p.p.....l.............c...............h....hsp.....u....l.t..s.h.u.h....l.l...L....l..l..G..uGGsFtplLh...-....SG...luphl....up....h...h.............p..............t.............h....s.............l..s....s....l...lhuallAs.....llRlA.GS....A....T....V.A...h....h.T.A.u...Gllu.............P.h.l...st..h.s........sls.s...t..lh..s..l.A..h.u....uGS.l.h.hSHV.N.D..u.G.F.W...l..h.p.....c.a..hsho.lt-TlKoW...ohhp....Tlluls.G.hlhsh.llsh.h..................................................................................................................................................................................................................... 0 172 385 568 +2799 PF00392 GntR gntR; Bacterial regulatory proteins, gntR family Finn RD, Bateman A, Hoskisson PA anon Prosite & Pfam-B_6405 (Release 8.0) Family This family of regulatory proteins consists of the N-terminal HTH region of GntR-like bacterial transcription factors. At the C-terminus there is usually an effector-binding/oligomerisation domain. The GntR-like proteins include the following sub-families: MocR, YtrR, FadR, AraR, HutC and PlmA, DevA, DasR [1-2][4][5]. Many of these proteins have been shown experimentally to be autoregulatory, enabling the prediction of operator sites and the discovery of cis/trans relationships [3]. The DasR regulator has been shown to be a global regulator of primary metabolism and development in Streptomyces coelicolor [5]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.87 0.72 -4.53 26 49014 2012-10-04 14:01:12 2003-04-07 12:59:11 16 87 4089 55 10949 36460 2838 63.00 29 23.05 CHANGED hhpplhppLcppIhpGphpsGspLPsEccLuspauVoRsslREALppLtscGllptppspGshV ..........................h..tplhppl.cp.t.I....h....p..G....p....h....t..s.G...s....p...L....P...s.-cp...L..u.p...p...h......s.V.S...RsolRcAlppLt.pc.G..l..l.pt...p...p..s..p..GshV.................. 0 2975 6426 8851 +2800 PF02188 GoLoco GoLoco motif SMART anon Alignment kindly provided by SMART Motif \N 20.70 20.70 21.00 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.27 0.72 -6.70 0.72 -4.52 46 1105 2009-09-10 20:12:23 2003-04-07 12:59:11 12 99 91 10 542 983 0 22.80 44 8.24 CHANGED s-cFa-Lls+sQ.upRhDDQRspl ......-cFF-Llt+sQ.usRhDDQRss... 0 104 151 308 +2801 PF04178 Got1 Got1/Sft2-like family Wood V, Finn RD, Fenech M anon Pfam-B_7371 (release 7.3) & Pfam-B_8991 (release 14.0) Family Traffic through the yeast Golgi complex depends on a member of the syntaxin family of SNARE proteins, Sed5, present in early Golgi cisternae. Got1 is thought to facilitate Sed5-dependent fusion events [1]. This is a family of sequences derived from eukaryotic proteins. They are similar to a region of a SNARE-like protein required for traffic through the Golgi complex, SFT2 protein (Swiss:P38166) [2]. This is a conserved protein with four putative transmembrane helices, thought to be involved in vesicular transport in later Golgi compartments [1]. 21.80 21.80 24.00 23.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.57 0.71 -3.99 103 970 2009-01-15 18:05:59 2003-04-07 12:59:11 7 13 313 0 645 936 31 114.90 25 66.90 CHANGED thGlhhhhlu.hhhhhslhh...pshhFuhhaolGslhhlh............uhshLhGspp..thc.hhhppp.Rhhu..ThsahsulhloLh.....huhhhcshhLsllF...ulhp..........h...........ssllhahl..ShhP.hGt.....sslphhhsh ...................................................hGhhhhhh.u.h.hhhhshhh........hhsp.h.hhholGNllhls.......................uh.s.hlhGstpphp.hh.h...p..pp..+...lhu.....Thhahsulhl.sLh...............huh....h....l...c.....s.....hslsllF.........shh...........................hslhh.ahh..uh.P.hGp.....shlp.h...h.............................................................. 0 220 359 523 +2802 PF00516 GP120 Envelope glycoprotein GP120 Finn RD anon Pfam-B_44 (release 1.0) Family The entry of HIV requires interaction of viral GP120 with Swiss:P01730 and a chemokine receptor on the cell surface. 19.90 18.00 19.90 18.00 19.80 17.90 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -13.00 0.70 -6.27 24 146453 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 446 117 0 119635 0 228.30 54 78.04 CHANGED LWVTVaYGVPVWK-AossLFCAo-s+.......slWATpsClPosPssQElsLssVTEsFshWc..NshVEQhpEDIhSLaDQSLKPCVKLTPLCVohNCschpt.................sssTsss.......................sttthtctEh+NCoFNhTs.hRDKpcphhshFYphDlVshssss............tophhlIpCNTSVITpACsKs.a-sI.l+YCAPAGFAlLKCNDccasGpuP.CoNVSsVpCT+tIcssVSTtLLLNGShAEpcshIh.scshs......sNsphIllphphslsIsCpRPsNpThh.thhhu...GpsFaohtplhs....lRQAaCphst.pWspslppVtppLtcp.p.p......psIsFs.psu........................GGDPElsha.FNCtGEFFYCNsotlhN........W.psp............ssps.......pphhlPCRI+QIINhWpcVGKshYAPPhcG...pIpCsSslTGLllsh..Dusssss.......pThhssuu-h+-.WRuELh+YKlVcIpPlGlAPT+sKR+slt...REKR .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................E..t..c..l..........h.I.R...Sc.N.h.o.........s..Ns.K.sI.IV..p..L..s..c..s..V..p...Is.C.T..R...P.s.N......N.TR...+......u...l....p.....lG....PG..p..u.F...Y......u....T......G......c...I....IG.....DIR.Q..A.H.C.N...l...St..sp.....W.N.p...T.L.p....pl.s.p..KL.p.cpFtp..............p..sI.....Fp...s.u.........................GGD.....El.h..h..h.....h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +2804 PF03010 GP4 GP4 Griffiths-Jones SR anon Pfam-B_1094 (release 6.4) Family GP4 is a minor membrane-associated glycoproteins. This family contains envelope protein GP4 from equine arteritis virus. 20.10 20.10 22.70 38.10 17.90 16.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.23 0.71 -4.61 3 72 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 2 0 0 68 0 139.50 93 100.00 CHANGED MKTYGCIFGLLLFAGLPCCWCTFYPCHAAEARNFTYISHGLGHVHGHcGCRNFINVTHSAFLFLNPTTLTAPAITHCLLLVLAAKMEHPNATIWLQLQPFGYHVAGDVTVNLEENKRHPYFKLLRAPALPLGFVAIVYVLLRLVRWAQQCYL MKhYGCILGLLLFVGLPCCWCTFYPCHAAEARNFTYlSHGlGHVHGHcGCRNFINVTHSAFLFLNPToLTAPAITHCLLLVLAAKMEHPNATIWLQLQPFGYHVAGDVoVNLE.NKRHPYFKLLRAPAhPLGFVAIVYVLLRLVRWAQQCYL 2 0 0 0 +2805 PF00517 GP41 Retroviral envelope protein Finn RD, Bateman A anon Pfam-B_44 (release 1.0) Family This family includes envelope protein from a variety of retroviruses. It includes the GP41 subunit of the envelope protein complex from human and simian immunodeficiency viruses (HIV and SIV) which mediate membrane fusion during viral entry. The family also includes bovine immunodeficiency virus, feline immunodeficiency virus and Equine infectious anaemia (EIAV). The family also includes the Gp36 protein from mouse mammary tumour virus (MMTV) and human endogenous retroviruses (HERVs). 25.10 25.10 25.10 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.54 0.71 -4.76 23 40495 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 166 141 21 37279 0 171.40 74 29.32 CHANGED huAsALshosQs+pllushhppppplhsslpt.pclLphplhslcp.l..LpsRVpulE+alcsptphs.hGCs.+p..hC+TshPh....hN.o.....Ws..NhThpp..Wpcplp.lppplhpllt-spppptpsh.shppLss..ht.hhs...WhshssWl............shIphslhlllhllsLpllhtllps.hh..phh.GYpsl.............ph.lhcspp.spst ................................MGAAS.lT.LTVQARp.LL.SG...I...VQQQ.sN.LLRAI.E.A.QQHLLQLTVWGIKQ....LQ.ARVLAV.ER.YLKDQQLLGIWGCSGKL.....ICT.Ts...VPW..NsS.....W....S.....NKSh.............spIW.s......NMTWMp......W-..+.EIsNYTshIYsL..lE-.SQ.s.QQ..E.KN.Ep..-.LLt.LDK..WAsLWN....WFs.Io.pWL............WYI+IF.IhI..V.uGllu.LRIlhh.llph.ls....+lR...p..GYpPl.................ph.................................................................................. 0 12 12 12 +2806 PF02925 gpD Bacteriophage scaffolding protein D Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 41.10 41.00 18.70 17.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.66 0.71 -4.40 2 78 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 54 10 0 51 2296 139.30 87 92.82 CHANGED .sh.psVpatTulAul+hlQASAVLDlTE-DFDFLTusKlWIATDRsRARRCVEACVYGTLDFVGYPRFPAPVEFIAAVIAYYVHPVNlQTACLlMEGAEFoENIINGVERPVpAAELFAaTLRl+AG.p-slhDAEENsR .....sEpuVpFQTAlASIKLIQASAVLDLTEDDFDFLTusKVWIATDRSRARRCVEACVYGTLDFVGYPRFPAPVEFIAAVIAYYVHPVNIQTACLIMEGAEFTENIINGVERPVKAAELFAFTLRVRAGNpDllscAEENlR 0 0 0 0 +2807 PF00044 Gp_dh_N gpdh; Glyceraldehyde 3-phosphate dehydrogenase, NAD binding domain Eddy SR, Griffiths-Jones SR anon Overington Domain GAPDH is a tetrameric NAD-binding enzyme involved in glycolysis and glyconeogenesis. N-terminal domain is a Rossmann NAD(P) binding fold. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.99 0.71 -4.25 112 14213 2012-10-10 17:06:42 2003-04-07 12:59:11 19 19 8352 412 2317 10425 3516 130.00 46 43.38 CHANGED l+luINGFGRIGRhVhRshh.....tp.scl-l.VAI...NDhs.sscthAaLhK.....YDSsHG+a.sspVp..hp........sst..lhl.sGc...p.Ip.lhsc+-.PtplPW.tphulD..lVlEuTGhF...psp-pup.t.....Hlp..u.GAKKVllSAP..u+st.......ssohVhGVN.ccpassp..pllSNASC ............................plulNGF.GRIG.R.shRth.............tt....tt..lc..l..VAl................N...D.....h..............s.......sp..h.h...Aa...h.l+.....YDo....sH.Gp.F.......p.......s.......p......Vp....hp......................psp......lhV...sG.c..........t.Ip....lht....c........+...s....P.t...p.....lsW...t......................p..hGs-..hVlEsTG.h.F........ssp....-...cAp.t...........Hlc........u...G..AK.....KVl.I...SAP....uts..........sshhVhGVN.c-.ph...c..s...s.....pllS.NASC.................................................................................................. 0 709 1371 1873 +2808 PF02800 Gp_dh_C gpdh_C; Glyceraldehyde 3-phosphate dehydrogenase, C-terminal domain Eddy SR, Griffiths-Jones SR anon Overington Domain GAPDH is a tetrameric NAD-binding enzyme involved in glycolysis and glyconeogenesis. C-terminal domain is a mixed alpha/antiparallel beta fold. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.81 0.71 -4.77 71 15479 2012-10-02 22:00:43 2003-04-07 12:59:11 15 25 9082 412 2355 11397 3548 142.80 53 50.30 CHANGED LAPlsKVlp.-pFGItcGhMTTlHuhTusQphlDss....+DhRcuRuAA...NIIPoSTGAAKAVuhVlPcLp.GKLsGhAhRVPTssVSllDLssplcKs.soh-ElssAlKcAuc.......s.hcGlLuYo--.lVSoDahusst..SSlaDuptohsls.....sphVKlluWY .............................................LAPlAK..V.l.p.Dp...F.G...I.hc...GL...M..TT.......l.H.....uh..T......u.s..Q..ps.lD...u...Pp......K...D.....h.Rt...........u...RuAu............tNI.IP...oSTG..AA...KAVG.c.VlPpL.s....G.........K.L.s.G..........hAh.R.......VPTssVSl.V....D.L..os.p.L.c......K.s........s....o........h....-.....-lpts.l.....K......pAup.................................s.h.+.G.lLuY.T....-....-...l.VS..sDa...usst....oSlh.Du..tt.shshs......tphlKlhuWY............................................. 0 719 1391 1898 +2809 PF05024 Gpi1 N-acetylglucosaminyl transferase component (Gpi1) Moxon SJ anon Pfam-B_4796 (release 7.6) Family Glycosylphosphatidylinositol (GPI) represents an important anchoring molecule for cell surface proteins.The first step in its synthesis is the transfer of N-acetylglucosamine (GlcNAc) from UDP-N-acetylglucosamine to phosphatidylinositol (PI). This chemically simple step is genetically complex because three or four genes are required in both yeast (GPI1, GPI2 and GPI3) and mammals (GPI1, PIG A, PIG H and PIG C), respectively [1]. 20.80 20.80 22.40 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -4.63 7 347 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 276 0 237 333 2 167.60 35 30.43 CHANGED phhsplh.lhlDlshGlhlh.hLh...Nhphlsshhhphst.ashcpLpohlthLhssPhGlKLNsplsphluphhlahIclW.oahshhpshl.......hlhhhluh.u.hhGhohhluhlhDhlslhohHlhshYhhss+LhshplpslsuLapLFRGKKhNlLRpRlDoh.YshcQlhLGTlLFolLlFLhPTh ................................................t..hs.hh.lh.DlhlGhhlh.hlh............tpht.l..st...h...t..h..h.p......................hhhctLpphlpW.L.h..u.h.PA..GLKLNppLsthLGchFLahIp.h.W.t.s.h...l..t..h..hp..s.hl..................................hll.h.h....l.u.h.u.....hhG.hoh.l.ulh.Dllsl.lT.hHlasaY..hh..su...+laphplphltSLapLFR.GKKhNl..LR.p.R.lD.........o........p........s.........Ys.........lD.........QLhlGTlLFTlLhFLhPTs............................... 1 82 132 199 +2810 PF04113 Gpi16 Gpi16 subunit, GPI transamidase component Wood V, Finn RD anon Pfam-B_7012 (release 7.3); Family GPI (glycosyl phosphatidyl inositol) transamidase is a multi-protein complex. Gpi16, Gpi8 and Gaa1 for a sub-complex of the GPI transamidase. GPI transamidase that adds glycosylphosphatidylinositols (GPIs) to newly synthesised proteins. Gpi16 is an essential N-glycosylated transmembrane glycoprotein. Gpi16 is largely found on the lumenal side of the ER. It has a single C-terminal transmembrane domain and a small C-terminal, cytosolic extension with an ER retrieval motif [1]. 20.20 20.20 21.40 20.80 17.60 18.40 hmmbuild -o /dev/null HMM SEED 564 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.93 0.70 -6.50 11 458 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 272 0 326 480 1 374.60 25 89.91 CHANGED L.Lhshlhlsssssu.s.st........sYcEsLhL+PLPpspLLASFpFcu............ssstssapppcachFPRuLuQILp+uss+ELHLRFopGRWDsEsWG.spPasGhppGGTGVElWAWl-usscpt....................Acc+WhsLTpuLSGLFCASLNFI.DSocTTcPshoFp.sustss.....tp..sh+LhaGsLPtEsVCTENLTPFLKLLPCKGKAGIuoLLD.GHKlFDusWQSMuIDVpslCsss.sp.Chlpl-QoVDhVlDl-RoKR.+.......s.sspph.C-pSKsYpsc.hCaPhtpssptsWSLs-lFGRslpGsCsLuc...tpssVsLpV........................P.phpVhsp.thht......pssstspsaslpsss......saDlhlPt....p.o....phssh-pPslpApRolsGaGQ-RGGlphhhsNPSsp.............sl-FIYhEpLPWFlRhYlHTlpspls.......tpttsssshlcclaY+PulDRc.+uTpLElphslPst.STlsLTY-FEKulLRYsEYPPDANRGFslssAVIolhs.s.........................pssshplRTouLLlsLPTPDFSMPYNVIIhTSTllALAFGulFNlLsRRalstEcutshptpshhs+Ltt+lht....+l+ ....................................................................................................................................................................................................................h..................................................................................................................................................................s.G.at...au.........................................................Gh.h.......s.h.................................tta..l.t.hsuhhssuht.h.............................h..........................................hhhu.LstE.hC.T.ENLTPhhcLLPst....stsGlssLhp.....pl.a.ps..apo.t.l..h...p.........t..................h...hp......h..h............................................................................h.h.that.....h......t.....s..s..........l.h.....................................................................................................................................h..h.................hph.h....................................s........h.stp...h.G.u.p.pGth....h.s..t..................................h.hhhhp.lPWahp.ahpoh.ph...t...............................................h.h....s....u.p..+......tst....hch.h...lPst.ps...............h...ht......hphc+..hLphtEasPDss+Ga.l.s.ul....l..h...........................................................................h..hhopslLl.LssPDFSMPYNVIhhssThhulhaG.hhshhh.+.h......................................................htth............................................................. 0 116 207 282 +2811 PF02831 gpW gpW Bateman A anon [1] Family gpW is a 68 residue protein known to be present in phage particles. Extracts of phage-infected cells lacking gpW contain DNA-filled heads, and active tails, but no infectious virions. gpW is required for the addition of gpFII to the head, which is, in turn, required for the attachment of tails. Since gpFII and tails are known to be attached at the connector, gpW is also likely to assemble at this site. The addition of gpW to filled heads increases the DNase resistance of the packaged DNA, suggesting that gpW either forms a plug at the connector to prevent ejection of the DNA, or binds directly to the DNA. The large number of positively charged residues in gpW (its calculated pI is 10.8) is consistent with a role in DNA interaction [1]. 20.90 20.90 20.90 21.30 20.70 18.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.38 0.72 -4.42 5 521 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 332 3 23 146 0 67.40 62 95.68 CHANGED MTcpp...ELQpARsAhHDLhTGKRVVS...VQKDG.RRVEYTAoSluDL++YIs-LEuQLGhot.RRRuPlGVRl .............Msp.t....ELt....AhRtAhhDLhTG.KRV.so.....VQKDG.RRlEaTAsSls-Lp+hI...s-hEs.l..G...h....Tp..RRRtPhGhhl................... 0 3 10 18 +2812 PF04965 GPW_gp25 Gene 25-like lysozyme Bateman A anon COG3628 Domain This family includes the phage protein Gene 25 from T4 which is a structural component of the outer wedge of the baseplate that has acidic lysozyme activity [1]. The family also includes relatives from bacteria that are also presumably lysozymes. 25.30 25.30 25.40 25.30 25.10 25.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.18 0.72 -4.46 181 3115 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1505 1 550 1824 1365 100.80 22 74.20 CHANGED sshss...htp.slp.......pslptlLsTphGpph..............................h.saG..ls-lhstsh..ssssttplpptlppultpa.EPRlp.hhpV..plts........................tspl.phplpup..lh.p...t.t..ls .....................phhcplp.......pslpplLsT..usph..................................th.saG....ls.c.hh..sts.....stshttplpptlppAlh.......+........a...EPRlp.hhpVplp........tp.............sspl.shplpuhlh.p........hhh.............................. 0 111 282 426 +2813 PF05084 GRA6 Granule antigen protein (GRA6) Moxon SJ anon Pfam-B_6204 (release 7.7) Family This family contains the granule antigen protein GRA6 which is found in the parasitic protozoa Toxoplasma gondii and Neospora caninum. GRA6 protein plays an important role in the antigenicity and pathogenicity in these organisms [1]. 22.10 22.10 22.10 132.00 20.40 22.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.48 0.70 -4.81 2 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 4 0 3 51 0 183.70 91 94.90 CHANGED MApsthhhRp+RsFsPlTVshlAVshVsFMGV.lsShGusssAssstuVcts.pthsS.Gt...AVGToE-YVNSSthuGupscu.AEs-pputtsEsDVpP.sVh..spEttu.ups.s.pERhEEtsst.+.ssVpps.spssuKRpQtRHRlIGssVlAssVAhLhhhF.RRpu...........GusctG..............GcsEsGuE- .MAHGGIHLRQKRNFCPLTVSTVAVVFVVFMGVLVNSLGGVAVAADSGGVKQTPSETGSSGGQQEAVGTTEDYVNSSAMGGGQGDSLAEDDTT.SDAAEGDVDPFPVL..ANEGKSEARGPSLEERIEEQGTRRRYSSVQEPQAKVPSKRTQKRHRLIGAVVLAVSVAMLTAFFLRRTGRRSP.EPSGssGGNDAGNNAGNGGNEGRG.tGcs-......... 0 2 2 3 +2814 PF00267 Porin_1 Gram-ve_porins; Gram-negative porin Finn RD anon Prosite Domain \N 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.16 0.70 -5.17 18 5868 2012-10-03 17:14:36 2003-04-07 12:59:11 16 2 765 89 237 5994 230 301.00 39 93.43 CHANGED KsGNK...........LDLYGKssGhHhaos.csus-.....GDpoYuRIGFKGETQIsDpLTGaGQWEYsspusssEup.ssp..ttTRLuFAGLKaG-aGShDYGRNYGVlYDlpuaTDhhPEFGGDoh...stsDsaMpsRusGlAT.YRNsDFFGLVDGLsFALQYQGKNtp...............scshh......+pNGD........GaGhShoY-hu..shuhuuAYssScRos-Qt.t.............p..utG-+A-sassGuKYDANNlYLAshYupTpNhT.hu.............shuhANKsQNhEVsAQYQF.DFG.LRPuluYlpSKGKDlsu.........ths-pDLVKYVsVGATYYFNKNMSsaVDYKINhLDcss.h..t.GlsoDDhsAVGLVYQF ........................................................................KsGsc...........hshh.....s.....ch.s.u......h..+....h.....h..os.....pp...u...p...c.........hs.p.h.h..s+.l.GFKGpp..p..l.s..s.t.........Lpu.h.................h....QhE.......p.......h.......p...........s......s............s.......s........-..........................................t...............s...........t..................ts.....R.u.........FsGLK..u-.aG.ohchGR......s...h......u......l.......l.......h......D.......s.....t......s....h..........s.......D............................P....................u...G.csh......................hts.s...s...a.....h.....p......t..............c....s.....p.....s..l...........u...........s.........Yc....s.....s......-......F...h...G....l....s.......s......u......l..p..a..A....h..p.......p....u.p.st...........................................................t...t..hh..............+.p.N..G.D......................u..a..t...s...u...h..s.Y...c.......s......G..................h...h....sh..u........u..u....Y...s..p..p..s...c...s...s.c...p................................................s....s....s.c..+..t.p.....s.a..p...h..s....h....t...Y.......D...s...N...s...l.....Y......l..A..s..t.......u..p...p...p..s.h..p...hs...................................shs..s...K..s...p....s.......h.....E...l....s...A......p....h.......p....a........c.....F.......G....l......p....P.......p....l.......u......Y....h...p.....u.h...s...t.s......lps......................hs-.p..s..h.h...c.......l..s..V......G..A.pY...F...sK..p.....h..S.....s..h.....V.....s...h.......t.....h...............h.....s.......p...sp............................h.....s.....s...........s......h.....s.....u..l..GhhapF......................................................................................................................................................................................................................................................... 0 18 57 141 +2815 PF00746 Gram_pos_anchor Gram positive anchor Bateman A anon Pfam-B_457 (release 2.1) Family \N 20.50 17.00 20.50 17.10 20.40 16.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.04 0.72 -4.03 84 9438 2012-10-02 15:23:12 2003-04-07 12:59:11 16 692 1185 11 443 6888 16 38.70 27 4.73 CHANGED spssptppLPpTG..pssshhhshhGh.hhhssuullhht++ ...........t..tpptppLPpTG....ppss.s...hh..s..l..hGh..l.hhhhusllhh++............. 0 110 196 316 +2816 PF01271 Granin Granin (chromogranin or secretogranin) Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 27.70 26.30 23.80 24.90 hmmbuild --amino -o /dev/null HMM SEED 586 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -13.02 0.70 -5.80 9 290 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 53 2 101 264 0 329.00 22 89.82 CHANGED ts+s-p.Vh+CllEVlSssLsKsSPsPlo.EChEsLcpsccclpspEpppN.ssh.tl+hLpD.A-stt....stpt.su.s.....ph.h.s.....tEsGu+sREpps.........s...tcph...h..c..tpp..ccp.c.hph...h.-ttpccph.ctssElsEsh.s.pspAohccshpElu+h-s.sp.p.RE+scccpKs.p-ss--sh+spshPtEs.ssspstss.............pEuEEspspE.s+c..+.psc+scShsscpp.uGphsh--E.............................s..cptphspcp+shSuh..........hhspG+pppptptccuccp.tcth-cps..p-ls.osshps..p-t.+tlc..sGcp.ttts-sstchps.tsh--cscs-.sHsph.cs...tc-th.s+tPps.......phLshG-ph...G.....th...p......c.ppEEsh..................h.LpppahD.tpWpppcpp++t.hs-phLE..tEEcs-hshsM+spFPEh..s.............uYtKR....sPt.sL+RtactsupEDuLctaht.ts...sct.uEEp+c...ts.pE-soAN+...ht-EDpELEsLuAl-tELpKlA+capshRRG ...............................................................h.ph...ht.t..p..s.s...cshphlc..tpp....h.....p..p...t.s.............hp...p........t......t..t......................................................................................................................................................................................................t.............t.................................................t...t..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 4 11 33 +2817 PF00396 Granulin granulin; Granulin Finn RD anon Prosite Family \N 20.90 20.90 22.50 21.20 20.70 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.76 0.72 -3.86 94 1181 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 140 9 509 1067 2 43.50 48 31.13 CHANGED TCCphss.Gs....WuCCPhspAVCCsDthHCCPpGa....pCshpsspChp .......................TCCph.s..Gs......WGCCPhs.pAVCCsD+h.HCCPpGa....pCshpt.spCh..... 0 196 261 377 +2818 PF04495 GRASP55_65 GRASP55/65 PDZ-like domain Finn RD, Bateman A anon Pfam-B_3985 (release 7.5) Domain GRASP55 (Golgi re-assembly stacking protein of 55 kDa) and GRASP65 (a 65 kDa) protein are highly homologous. GRASP55 is a component of the Golgi stacking machinery. GRASP65, an N-ethylmaleimide- sensitive membrane protein required for the stacking of Golgi cisternae in a cell-free system [1]. This region appears to be related to the PDZ domain. 27.70 27.70 27.70 27.80 27.60 27.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.86 0.71 -4.19 37 638 2012-10-02 11:12:46 2003-04-07 12:59:11 9 5 271 6 397 620 5 118.60 38 48.11 CHANGED hpVassKstphRplpl.sP...............Sshass..................uLLGlolcasshp.ss-plWHVL-V..ssSPAthAG.LhPaoDYIlGss..........sllp..............................spcslhsLlEs+.s+.............................sLpLaVYNo-pDssREVslsPspsWGG-...GuLGCslGYGhLHRIPsh ......................................................................................................................................Ghthp.t....sspp.s..aHVLcV.p.ssSPAthAG.LcP.ahDaI...l...u.h.st......................shLp..............................-.s.-....s.L..hs...Ll..c.s.p..s+..............................Pl+LhV..Y..Ns...c..s...p.ss.R.E..V..plsPs.ptWGGp............G.LGs.l.................................................................................................. 0 138 198 303 +2819 PF04723 GRDA Glycine reductase complex selenoprotein A Kerrison ND anon DOMO:DM04874; Family Found in clostridia, this protein contains one active site selenocysteine and catalyses the reductive deamination of glycine, which is coupled to the esterification of orthophosphate resulting in the formation of ATP [1]. A member of this family may also exist in Treponema denticola [2]. 21.90 21.90 22.30 23.10 21.50 21.80 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.72 0.71 -4.50 3 223 2012-10-10 17:06:42 2003-04-07 12:59:11 9 1 110 0 49 188 4 84.70 42 95.03 CHANGED SlFsGKKVIIIGDRDGIPGPAIEECLKuIssEVlFSSTECFVUTAAGAMDLENQp+VK-ATEpaGAENLVVLlGAAEAESAGLAAETVTAGDPTFAGPLAGVELGLRVYHAVEPEFKuEVDuAIYDDQlGMMEMVLDVDuIIEEMpSIRu ........................................................................................................................................................................................................... 0 22 40 44 +2820 PF01272 GreA_GreB Transcription elongation factor, GreA/GreB, C-term Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain This domain has an FKBP-like fold. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.43 0.72 -4.27 137 7631 2012-10-02 13:30:10 2003-04-07 12:59:11 14 15 4301 28 1688 4424 1946 77.30 33 45.59 CHANGED hss.....spVthGupVsl..ps...ssspppsaplVGs..........pEuch...pps..pIShpSPlupALlG+c.hGDplplpsPsG.php.hcIlplph ...................................t..p.spVhhGupVpl....ts.....ss...s...-...c..p..papI.VGs...................................sE.ucs.........pps.......tIShpSPlu+ALlGKc.hGD....p....l.p.l.ps.PsG..p....hp..hcllplp.................. 0 512 1024 1380 +2821 PF03449 GreA_GreB_N Transcription elongation factor, N-terminal Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain This domain adopts a long alpha-hairpin structure. 21.80 21.80 21.80 22.00 21.70 21.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.29 0.72 -3.92 134 5904 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 4289 22 1237 3294 1838 74.30 42 42.85 CHANGED hp.....tp.h.lTtcGhc+LcpELcpLtpscRPclsctlspARspGDLuENAEYcAAKccQshl-uRIppLpppLsp.A.pl .....................p..h.hThcGhc+.LcpELcpLcpscRPclsptIstA.RuhG.DLSENAEYcAAK-cQuh..lEu.RIppL-ptLpsApl........ 0 399 789 1035 +2822 PF01184 Grp1_Fun34_YaaH GPR1/FUN34/yaaH family Finn RD, Bateman A, Wood V, Studholme DJ anon Prosite Family The Ady2 protein in (Swiss:P25613) is required for acetate in Saccharomyces cerevisiae, and is probably an acetate transporter. A homologue in Yarrowia lipolytica (GPR1) has a role in acetic acid sensitivity. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.36 0.70 -5.04 58 1687 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 1218 0 658 1121 22 193.00 37 86.80 CHANGED FG...Gshs....PG.........htP.sp+phANPAPLGLsuFulTTFlLuhaNspshGlssPNllVuhAhFYGGlsQhlAGlWEhthGN...TFGuTAhoSYGuFWlSauslhls..sFGIhsAYtst.t...........hssAlGhaLluWsIFThhhhlsTlKSTluFhhlFhhLslsFlLLAsuph.......osstsltpAGGhhGllsAhhAaYsAhAGlAsppNSa..hh..sh.lPhsp...pt ..............................hs.............................p+hA.N..PuPLGLhGF..uh..TTll..L.s.laN..s.....Gh........h..s..l.....s...u.......l..l..l........u...hu...l.FYGG.luQlhAGlhEa++G.N.......TFGhTAFoSY.....Gu...........FWLoh.s..s..l.l...l.....h....phGls...su..............................................s..ph.l.G..hY......Lhh....W.u.....lFTh..h.M.hh...u..T...L+.s............sh.sl...h.lFh.sLslhFhLL..Alush........................................s.u.s...t.s.l.hp...h.AG..ahG..llsuhsAh..YhAhutllN.pphup.........h......tt............................................ 2 200 379 560 +2823 PF01025 GrpE GrpE Bateman A anon Pfam-B_817 (release 3.0) Family \N 25.80 25.80 26.60 26.00 25.40 25.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.97 0.71 -4.70 111 5353 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 4825 8 1413 3716 2850 180.10 28 89.19 CHANGED pp.............ttttpsphpshppp............lpp............................hcp.......chc.....ch..pcphh..........RthA-h-.Nh++Rh..p+-hp.phpcauhpphhpc...LLsllD...sl-+Alph...........pspphp.......slhcG..lchshcphhphL.pchGlptlss.h.GctFDPphH-..Altph........tssp.hts.ssllplhppGYplp.-R...llRsAhVhVup ..............................................................................................................t.tttptpppt........tttt.tt..ppc........................ltp........................................h.pt................ph.p...............-h....c-chL.....................R..hp..AE.h-..N.......h...+.......+Rs........pc-hp.......psp...c..a..u....hpch.s.p....-.......l.L.PllD...........sL-RA.lps...............tstphp..............................................sl.hcG...lch..shcpl.hpsl.cc..h.Glctlss.......t...G......p..t..FDPshHp...Altp..h...........ss..sc....hts....ssl.spVhQ+GY..p..Lp..-.R.....llRPAhVsVu................................. 0 485 928 1199 +2824 PF02955 GSH-S_ATP Prokaryotic glutathione synthetase, ATP-grasp domain Griffiths-Jones SR, Bateman A anon Structural domain Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -5.13 37 1779 2012-10-10 13:17:03 2003-04-07 12:59:11 11 6 1678 4 426 2863 2533 171.60 52 54.80 CHANGED EKLa.stpFs....chh.PsTLVopchpcl+pFhpcpsD..lILKPLsGhGGpulFRlpps.DsNlssllEhhTptsp..p.lMsQpalP-lpcGDKRIlllsG-Pls......ulsRlPt.tGEhRuNlAsGGpucsppLoc+-hcIsppluPpL+c+GLhFVGlDVIGshLTEINVTSPTGlpEIcphts .....................................................EKLa.ss.Fs....-l.s.P.T.LV.T..R...s....t.s...p..l..+s....Fhp....c.....H......u..D.....lI...LKPL..D....G...M..G..G..uu..I..FRlcp..........s.....D................s.............N..........L....u..........s.....I........l...E...o....L...T.....p...t......G..p...............c..h..sM...AQpY......L....P....s..I...c..c.....G....DK....Rl..LlV...DGE.P..VP................YsLA.R.....I....P....p.....s..G..E...s...R......G.....N.L...A...A..G...GRG......E...s...p..s..Lo...-....p.Dhc....I.Ac.p.l..G.P...s..L.+.c+.G.LlFVG...LD..l..I.....G.D.....h.....L..T.EIN..VT.SPT..ClREI-t..t.................................................................................................................... 0 103 240 337 +2825 PF02951 GSH-S_N GTS_N; Prokaryotic glutathione synthetase, N-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.30 21.30 21.60 21.70 20.80 21.00 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.54 0.71 -4.30 155 1672 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1622 4 395 1136 1521 119.20 46 37.80 CHANGED l+luhlMDPlpslslt+DSoaAhhhEAQpRGHplaahpsscLth.c.....sG..........cshApspsl.plppsts..........saa...phuptp.phsLuc.hDllLMRpDPPFDhpYlhATalLEps.....pp....tusLVlNcPpulR ..............lKlullMDPIusINI.cKDSoFAMhLEAQ+RGa-LaYh-.sDLhl..p......sG.................csp.Ac.s.+sl....pVc....pshs...........cWa.....ph.s..s....cp....clsL.u-.lDVILMRKDPPFDh...E......a.I..Y.A.T.YlL..E+.A....Ec.....pGsLlVN+PQSLR.................................... 0 89 217 308 +2826 PF03917 GSH_synth_ATP Eukaryotic glutathione synthase, ATP binding domain Mifsud W, Griffiths-Jones SR, Finn RD anon Pfam-B_2922 (release 6.5) Domain \N 25.00 25.00 25.50 25.20 24.00 24.00 hmmbuild -o /dev/null --hand HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.16 0.70 -5.87 10 535 2009-01-15 18:05:59 2003-04-07 12:59:11 12 14 352 15 323 538 43 395.70 31 92.73 CHANGED h-hcphDpchLpcLshDAlsWAhLpGLlhs-+osp+SGsVsuV.lsauPlsLLPoshPcutacQAsplpPlFNELVDRVS.DusFLppoLS+TpKs.....D-FTuRLl-IHpKh......Lcp..NKKp-lRLGlaRSDYMlD...-pTpu....LhQIEhNTISsSFuGlushlopLHpplLpphsc...thul-upplPsNsulpthA-ALAKAWscY...............ssPcAlllhVVQsEERNhaDQ+hlsupL+c+atlsshR+slAEl-ptuclpsD..........hsLhlsGQtVAVVYFRuGYoPsDYPSEpEWcARLLlEpSpAlKCPsIuhHLsGTKKIQQELA+PGVLERFL-sK.--lA+LRcsFAGLWSLDDo.....-lV+pAlEcPEhFVhKPQREGGGNNlYG--l+psLh+Lpc..oEEcAAYILMp+IhPpss.pshLlR.sGhhcpscsISELGIaGsYLRN..+DcVlhNppSGaLlRTKssoSsEGGVAAGFAVLDSlYL ..........................................................................................................................................................hh.thh.hsh.pGhhhh......................................................sPh..oLh..Po.hPcthappuh.t.lp.haNpLh.s.t.lu......p..s...t.aL.tp.hl.......p.p...hh...ps......................D.p.Fsu.p.Lhplatph.................................hpp......sh.h..p.t..l...............LG..lhRSDYMhc.......tts...t.................................l+QlEhNTIusS.Fuu.lu.shsstlHph.lhp.....h..h................................p............t.......p.l.s.t..N..p.s.htt.l...utulstA..hp..ass.pss.hhhh....h.h.ppth.t.th......hhhhth......t.................................................................................th...t.thsshhatsshhstph.spttWpsRhhlEpStAlKCPsl.hpLsGoKKl.QQ..L........u......p.......s................s.............h....L..p..pF..l.......................s................t......p....p..h.tp.........lcps..F......ss.has.....L..-...............s.......................s................p......h..................h....t.....Ah......p..p..P..p...paVLKPQR.EGG..GNNlYtpp..l.phLp.p..ltp................pchsu...aILMphI.....P.....ts..hh.l.....R....s.....s..................h............................t.p.h........lSELGlaGs......hlhp................tt...p......l.......lhN.....p.p..sGaLlRTK.t.pssEGGVuuGhuslDo.hL................................................................................................... 0 114 178 269 +2827 PF03199 GSH_synthase Eukaryotic glutathione synthase Mifsud W anon Pfam-B_2922 (release 6.5) Domain \N 20.90 20.90 23.50 21.80 18.30 17.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.42 0.72 -3.94 51 469 2012-10-01 21:12:38 2003-04-07 12:59:11 10 12 345 15 283 471 21 103.90 38 21.70 CHANGED spslVLhVVQssERNhaDQ+hlEhtLhcct.t.IpolRtThs-lpppsplt...............ss..t.....pL.....hlpu..t-VuVVYaRuGYsPsDY..ss.......-s..pWpARLhlEpSpAIKCPolhtpLu ..............................................s.pssllhlVQtsE.RNhaDQ+hl.....Eh...pLhc.......p........t....t.....lps..l..R......hohs-.lt.pp.s.pls.........................................................ss....p..............pL.....hl.s.s........tEVuVVYaRuGYsPsDY...so.........c.p..pWcARlhlEpSpAlKCPolthpLs............. 0 96 155 234 +2828 PF00255 GSHPx Glutathione peroxidase Finn RD anon Prosite Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.37 0.72 -4.68 12 4817 2012-10-03 14:45:55 2003-04-07 12:59:11 14 11 3035 33 1412 4342 2461 104.50 45 59.73 CHANGED hY-asshslsG.p.hshspa+GKVlLIVNVAShsGhTs.pYppLptL.c+htspGLsILGFPCNQFGcQEPupscE....lKhhpsst...atssFslFpKl-VNGpstcPlYpaLK ................................hashp.sps.hpG.pshsL..p.p.Y..c..G..K..V..lL..l..V..N.s.A........S.c.......C...G.h.T............s....QY..p...t...L..............p......pLaccY.....p...c....p...G.........h........h.l...L..G........F..P.......C..N...Q.F.s.........t...Q...E..P.G.....o.scE.....................Ipp..aCphs...........a.G.V.o..F.P.hF.sK...l.-VN...Gp....s.s..c....P..LapaL........................................................... 0 446 800 1119 +2829 PF03738 GSP_synth Glutathionylspermidine synthase preATP-grasp Bateman A anon COG0754 Family This region contains the Glutathionylspermidine synthase enzymatic activity EC:6.3.1.8. This is the C-terminal region in bienzymes such as Swiss:P43675. Glutathionylspermidine (GSP) synthetases of Trypanosomatidae and Escherichia coli couple hydrolysis of ATP (to ADP and Pi) with formation of an amide bond between spermidine and the glycine carboxylate of glutathione (gamma-Glu-Cys-Gly). In the pathogenic trypanosomatids, this reaction is the penultimate step in the biosynthesis of the antioxidant metabolite, trypanothione (N1,N8-bis-(glutathionyl)spermidine), and is a target for drug design [1]. This region, the pre-ATP grasp region, probably carries the substrate-binding site [2]. 22.40 22.40 22.50 22.40 21.40 22.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.42 0.72 -3.73 159 2307 2012-10-01 21:12:38 2003-04-07 12:59:11 9 6 1331 17 262 1223 27 99.70 35 22.57 CHANGED tl.aFssh....ssshEDttTs..taLpcsAppA.............Ghp.sthls.lc-.lshs.............t..pGth.hDh-s...p.lchlFK.LYPWEhhhc-.......................p...hustl..thtp......sthlEPsWphlhS ...............lahsss..c-shEDcsTs..pYL.p-sAppA.............Ghp...ocalhl--.lGhs....................t...tGph..s.......Dh..-s........plIpslFK.LYPWEahhc-..........................c.......hsshLhtp..t.............sthlEPhWpsIlS................................................ 0 56 147 206 +2830 PF00437 T2SE GSPII_E; Type II/IV secretion system protein Finn RD, Bateman A anon Prosite & Pfam-B_2215 (Release 8.0) Family This family contains both type II and type IV Swiss:P54907 pathway secretion proteins from bacteria. Swiss:P07169 VirB11 ATPase is a subunit of the Agrobacterium tumefaciens transfer DNA (T-DNA) transfer system, a type IV secretion pathway required for delivery of T-DNA and effector proteins to plant cells during infection [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.85 0.70 -5.50 23 13216 2012-10-05 12:31:08 2003-04-07 12:59:11 15 70 4434 45 3599 12796 4731 263.40 26 60.45 CHANGED tthhhcsl..ctssoDIhlps.tc.lhl..Rlsu...hlpplhp.stptsttllpRltshuph-IuE++...Ducls.......suhchclsh.Psshu..tp..lslR..cpsstth..sLpcLuhssshp.................................pthtchLcps...+s.ILVoGsTGSGKTThL...AhLstlssp..pcpIlTlEDssElpl.puhsplplps+.......sslThschLRuhLR.cPDhIhlGElRDtEshcl.lpAspTGH.s.loTLHsNSAhuAlpRLpphu........ls.h.lps.ltsl..ls.+LsRph .....................................................................................................................................................hh......h...p....ts...oDl.h..l.....p....s..............t...........p...hhl.....+hst...............h.h..t..h....................h......s.......p...........h..t....t....l.......h.t....p.....l....t.....h...h.....s.....t.....h..............p.....l.....s.....-......p.........p..................-..u.....p..ht..........................pshch......+.l.s..h....h.s....t.ths...........p............hs...l.R............ph..s...t.t.hh.....sl.p.p..L..s..h......s...............................................................................t.t..h...t.p..h...l...p..p...s..................p.....G.....h.l...L.l..o..GsTGSGKT.T.o.L.......t.u.h..l..s.....h...l.....sps...........tp.......+.I.....l....T....l....E.......D....P...l...E.....h.......h......p........t........h.....p.......p.....l...p..lppc........................ssh...o...a...s...s...s...L+..u.s....L.R....p.c....P..D..l.I....llGE.l..R.D.t.......E..T.h.ch.slp.A.u.p...T...G...H..h..l...h..o..TL...Hs.s...s.utp.s....l.s.R..L..h...s..hs.............ht..........l...p.p...t.l..t..t..s........hs...pl...h............................................................................................... 0 1167 2366 3072 +2831 PF05157 T2SE_Nter GSPII_E_N; Type II secretion system (T2SS), protein E, N-terminal domain Yeats C, Desvaux M anon Yeats C Domain This domain is found at the N-terminus of members of the Type II secretion system protein E. Proteins in this subfamily are typically involved in Type 4 pilus biogenesis (eg Swiss:Q9X4G8), though some are involved in other processes; for instance aggregation in Myxococcus xanthus (Swiss:Q9RF11) [1]. The structure of this domain is now known [2,3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.07 0.72 -4.01 110 3482 2009-01-15 18:05:59 2003-04-07 12:59:11 10 45 1888 2 1060 2838 803 107.90 18 19.45 CHANGED Llp........Ghhs..........................................p......pp.............................lhpsLucp.............................hslsh........lshpshphs.p..hhth.....lshshhpctthlPlp..hc.s......splhl..AhscP...hs.hphh-plphh.hth.plp..hhlsspsplp.phlpch....hspptst .............................................................................................................thls..........................................tpp..........................................lhphlut.............................................................hshsh..................hs.l.p..t..h...t..h..s..t....hhth.........hstphsp+apslP.lp.....tp..s........................sp..lhl...A...hsDP..hs.htsl.-.s.lpht.sst..plc........hslsscpplp.ptlpchht.....p......................................... 1 361 719 946 +2832 PF00482 T2SF GSPII_F; Type II secretion system (T2SS), protein F Finn RD, Yeats C, Desvaux M anon Prosite Domain The original family covered both the regions found by the current model. The splitting of the family has allowed the related FlaJ_arch (archaeal FlaJ family) to be merged with it. Proteins with this domain in form a platform for the machiney of the Type II secretion system, as well as the Type 4 pili and the archaeal flagella [1]. This domain seems to show some similarity to PF00664 but this may just be due to similarities in the TM helices (personal obs: C Yeats). 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.29 0.71 -4.21 340 14010 2009-09-11 10:25:30 2003-04-07 12:59:11 18 9 3954 8 3638 10916 2531 123.70 19 56.05 CHANGED hhcplushlp.uGlslhpulp..hlspptt..........pthhpptlppltpplptGt...sh..s...puhp................phs.....hh.sthhhthlpsu-puGsLsphLpphuphhcpphphp....hchhsshhtPh......hhlhluhhlhhhllshl.ls ..............................................hcpLuhhlp.uGl.slhpulp....hlsppht........................sthhpp.h.l.p.p..l..tp..p.l.....p.p.Gt.......sl.......s......pulp....................................phs......hF..sshh...ht....h.l.t.sG....E....p.....u.G.p.....L..s.....ph...Lp...pluphhcpphphp...........tc...hhts..h..h.Ph...........lllhl.uhhlhh.hllshllP.................................................. 0 1183 2343 3069 +2833 PF00263 Secretin Bac_GSPproteins; GSPII_III; Secretin; Secretin_C; Bacterial type II and III secretion system protein Finn RD anon Prosite Family \N 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.74 0.71 -4.86 155 5833 2009-01-15 18:05:59 2003-04-07 12:59:11 16 82 2114 0 1392 4693 1363 161.40 27 28.57 CHANGED ALppsupsclLusPplhshss......ppAplp.lGpplshh....sspss........ssss.........................hp..hp...........................ssGlpLp.lsP........plss.ssp...lpLplp..phoshss..t..............................................sssPs.....lsp.+plsT.plhlcsGpTlllGGllpcppppspspVP....hLu-IP....llGt.LF+spspppp+sELllhlTPcl...lp ..........................................................................................ALpppupspllu.sPp.lhshss.......................ppA.lp.sG..p.p.lPh.h..........ps.............................s.ss.ss.............................lp....hp.................................................................................................................ps..GltLc.V.s.P................pltp...ssp............lpL.p.lp...phus.hsp..stp..........................................................................................stsss....lsp..+plsT.....pVt.l.p.sGp.o.lllGGlhpcp.ppp..shsp....l.P.....hL.GD.IP........llGt....LF+pp.sp..p.pp+ppLllhlpPpll....................................... 0 408 827 1132 +2834 PF01203 T2SN T2SP_N; GSPII_N; Type II secretion system (T2SS), protein N Finn RD, Bateman A, Desvaux M anon Prosite Family Members of the T2SN family are involved in the Type II protein secretion system. The precise function of these proteins is unknown. 26.00 26.00 26.10 26.60 25.80 25.80 hmmbuild --amino -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.42 0.70 -4.77 42 400 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 393 0 109 339 61 211.50 25 84.42 CHANGED hslsps.lslsulsGolWpGpspplphp...sh.pls.pVpWslsshuLltGpsphslphup....tt.slpG.pG.l.shuhu.uhtspslphshssshlhphh....hshPlps....sGplpl.....tlppht.....upshCppl.pGplhhp.suslpsshGsl.sLG.sltusluCp.s.uplhsphppsssplslshpsplp..sstpaphsuplc.sssshPsslppsLshlGps.cs...pGtashphpGRl ..................................t.lplssspGolWpGp.A..s.l.thp...................sh...h.....G..plpWchphh....sLlhGphphclchsp.......tlpu.tuhl....shuh.s..uhhspshthslPuu.hlhshh.........hPh.s.lph..pGplpl..........slpphp.......hups.hspph.pGplhhp..tspl....p....usl....u....s....l....sLG....sltusls...sp..s..uslslpl.....sp...p..s.st...lt.hphphslt....sss..paphpuhhp..s...tsp.s....t...s....Ltt.LshLGps.su...pGt.h.h.................................................................... 0 28 57 83 +2835 PF03958 Secretin_N NolW-like; GSPII_III_N; Bacterial type II/III secretion system short domain Yeats C anon Yeats C Domain This is a short, often repeated, domain found in bacterial type II/III secretory system proteins.\ \ \ \ \ All previous NolW-like domains fall into this family. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.50 0.72 -3.80 231 6199 2009-01-15 18:05:59 2003-04-07 12:59:11 12 49 1721 11 1290 4691 1113 81.90 24 21.96 CHANGED spll.Lphssus-...............lsshLpph.h..................................................................................stph.pltssspsNslllp.ussptlpplpp.llpplDhsttp ..............................................................................................................pll.LpausAp-...................................ls.plLppl.h................................................................................................................................................ss.ps..plssD..pc.oNslllp.ssssshpplpp.lIppLDh..t..................................... 0 370 706 1018 +2836 PF02501 T2SI GSPII_IJ; Type II secretion system (T2SS), protein I Mian N, Bateman A, Desvaux M anon Pfam-B_2607 (release 5.4) Domain The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for the transport of proteins across the outer membrane first exported to the periplasm by the Sec or Tat translocon in Gram-negative (diderm) bacteria. As members of the T2SJ family, members of the T2SI family are pseudopilins containing prepilin signal sequences [1]. 25.00 25.00 25.60 25.40 22.90 21.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.50 0.72 -4.01 100 934 2012-10-03 10:38:27 2003-04-07 12:59:11 12 4 817 9 179 576 128 81.20 28 65.20 CHANGED tLc-+slAtalA-NplsclpLp.pthPshu...tppupsphuGppWhhphp.ltsTsss.p.......hpplclpVt.........spp.pssslspLssalsp .................Lc-+slAsWlA-Nphspl....pLt...pth.pss.........pppGpsphuGppWaW+pp..shsT...sss.h........lptl-lpVs........ppctsssltphpuhh.................................. 0 34 83 132 +2837 PF03934 T2SK GspK; Type II secretion system (T2SS), protein K Bateman A, Desvaux M anon COG3156 Family Members of this family are involved in the Type II protein secretion system. The T2SK family includes proteins such as ExeK, PulK, OutX and XcpX. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.75 0.70 -5.24 106 1236 2012-10-03 02:11:09 2003-04-07 12:59:11 8 4 993 1 289 1639 405 262.20 26 82.09 CHANGED c.phphcpspshhttpQAhahuhuupshActlLppsh................hspLsph.WA...ts..h.hsl-.....................p.GplsuplpDhpupFNLNsL...............................stsst.................................hpshshptappLLpsLuls......t.tupplscslhDalDsD............................pt..t..GuE-s.Y..tutsssahs.usp.hssluELphl.Ghosphhp+LtPalssLP......s..pthlNlNTh....sA.lLsAlhss.....lshspAppllpp..........R..sts.sap..sls-ahst............lss.......t.....hpstls...lsSpaFtlpup....sphspsphphpollpps...............s.ssshs.l ......................................................................................t.phtpsps..phtQAhahuhu..u..p.p.hAh.t.h.Lppshpppt.................hspLsp....WA............ts....t....h....ls...................................................................p..up..l..psplpDtpupF.N..LNsL......................................ht..s.....................................pshshphhptLl..t.p....l...s.ls...............thpuctl..spul.h-alDp.D..................................pps...t.hhG.s.EDs.Y.....ut.s.ssahs...usp............htsluE...L+hl.p..Ghs...s..........t.l....h..........p.......+.........L..tPhV....s........s........LP...........sphpl.NlNTh...............pu...lLtAlh.s......lo....s.pA..ptllpp...................R..........stp..sat....s.hsph.htt............................lss............htt............hpshls..l.p.Sp.aF..lpsp....hhh.sp.p.hp.hpoll.t.t.p.....hh............................................................................................................... 0 83 161 229 +2838 PF05134 T2SL GspL; Type II secretion system (T2SS), protein L Moxon SJ, Bateman A, Desvaux M anon Pfam-B_6494 (release 7.7) Family This family consists of Type II secretion system protein L sequences from several Gram-negative (diderm) bacteria. The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for extracellular secretion of a number of different proteins, including proteases and toxins. This pathway supports secretion of proteins across the cell envelope in two distinct steps, in which the second step, involving translocation through the outer membrane, is assisted by at least 13 different gene products. T2SL is predicted to contain a large cytoplasmic domain represented by this family and has been shown to interact with the autophosphorylating cytoplasmic membrane protein T2SE. It is thought that the tri-molecular complex of T2SL, T2SE (Pfam:PF00437) and T2SM (Pfam:PF04612) might be involved in regulating the opening and closing of the secretion pore and/or transducing energy to the site of outer membrane translocation [1]. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.33 0.70 -5.18 19 962 2012-10-02 23:34:14 2003-04-07 12:59:11 8 5 829 4 181 775 109 221.90 22 57.66 CHANGED Lhl+Lsssstpshph......hpstttphhtpG....phsstpshspls.hssts.shlllPusslhhpplsLPstst+phhpsLPahLE-plApDl-plHhslh.............stps-.ptpVssVc+phhptWLshhpptGlssppllPDshsLPh..sssuhsshphss..................................................pWLlRpstttGhus-tphhshhhtt...........ltshsssPt.s.s...........htsts.pshhhlhApshht...sshsLhpGtFs.p .....................................................................t...t..h...W...................tt....htpG.............hss..s.....p...p.L..tt....h.........t.....t...ts...lhl..l..lPuptlh.hhplpLP......s.......h....t...t.........+..........p...h...ph.Lsh.l.lE-plsp..D..l-p..l..H..h...slh.........................st..p.tp....ts....pV.s..s.l.c..p..phhcphlphLpts.Gl.sh.spl.........hP.........D....h........h....s.....l.Ph........t.tuhs..s.h.p.h.t.t...........................................................................phlh.Rts...tt...p......Gh...u..h......s..h.......s.hh.sh..h..ht.t...hspt..................ltshs..s.Pt..h.s....................................httt.s......tt...........h.......h.....hh.t....hht....sphsLhpG.ap...................................................................................................................................................................................................................... 1 37 88 134 +2839 PF04612 T2SM GspM; Type II secretion system (T2SS), protein M Mifsud W, Desvaux M anon Pfam-B_5302 (release 7.5) Family This family of membrane proteins consists of Type II secretion system protein M sequences from several Gram-negative (diderm) bacteria. The precise function of these proteins is unknown, though in Vibrio cholerae, the T2SM (EpsM) protein interacts with the T2SL (EpsL) protein, and also forms homodimers [1]. 25.90 25.90 25.90 26.00 25.80 25.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.74 0.71 -4.44 63 1099 2012-10-02 17:03:51 2003-04-07 12:59:11 7 7 852 2 233 747 121 149.60 22 84.64 CHANGED hpplpthWpshssREppllsshushLhlslhYhslhpPhppttppspppltptppplshlpppusphpthp.ssssssstst.........uLpsllspoupptGl....slpRhpspGc.plpVtlcsssFssllsWLspLppppGlplpphclsct..............ssGhVslphhh..tts ........................h...lppaatshosRE+thlhssushlhsshhah....h..l....hpPh....ppphpp...t...ppplpphpp.hshlpspAs.plt...shp...tts..ss...pst...................tlspllsposp.tts.l............sl.t.p...lt....p.........u.........c................p........lplhlp.sssasslhpWLstl.p..t.p.h.u..l.plpphpls.tst.............tsG.lplpth.....s......................................................................... 0 63 127 185 +2840 PF00043 GST_C gluts; GST; Glutathione S-transferase, C-terminal domain Eddy SR, Griffiths-Jones SR anon Overington Domain GST conjugates reduced glutathione to a variety of targets including S-crystallin from squid, the eukaryotic elongation factor 1-gamma, the HSP26 family of stress-related proteins and auxin-regulated proteins in plants. Stringent starvation proteins in E. coli are also included in the alignment but are not known to have GST activity.\ \ \ \ The glutathione molecule binds in a cleft between N and C-terminal domains. The catalytically important residues are proposed to reside in the N-terminal domain [1]. In plants, GSTs are encoded by a large gene family (48 GST genes in Arabidopsis) and can be divided into the phi, tau, theta, zeta, and lambda classes [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.00 0.72 -4.00 58 13485 2012-10-03 01:14:49 2003-04-07 12:59:11 20 103 2532 701 4893 16003 4903 96.70 16 41.41 CHANGED hhshthphhhhshtts................................pppc.thppsppclhchlphhpphLp......sppah.sG-phohADl..hhhshlt...hhhthphshh.........spaPpLpsahp+lhsps ........................................................................................................................hh................................................................t.........h...t...t.....h..h..p....p.....h...p....c....h.....h..p.h....l....-pt.Lt.............ppsa..l....s.......G......-.....p...h.olA....Dl......s...h.....h...sh..lt............hh.....t...t..h.t..h.s.h....................sphP.p.lptahpclttps......................................................... 1 1323 2464 3783 +2841 PF02798 GST_N gluts; Glutathione S-transferase, N-terminal domain Eddy SR, Griffiths-Jones SR anon Overington Domain Function: conjugation of reduced glutathione to a variety of targets. Also included in the alignment, but are not GSTs: * S-crystallins from squid. Similarity to GST previously noted. * Eukaryotic elongation factors 1-gamma. Not known to have GST activity; similarity not previously recognised. * HSP26 family of stress-related proteins. including auxin-regulated proteins in plants and stringent starvation proteins in E. coli. Not known to have GST activity. Similarity not previously recognised. The glutathione molecule binds in a cleft between N and C-terminal domains - the catalytically important residues are proposed to reside in the N-terminal domain [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.47 0.72 -3.80 53 5748 2012-10-03 14:45:55 2003-04-07 12:59:11 15 61 1695 674 2420 16335 5075 72.80 24 29.98 CHANGED hslphashps..tspthRhhLt...htGlpaEt..phhsh....t.....spahphpst...............splPhl.ps......shplspShAIhcYluc ......................................................ha...h.s......ts.t.h.t..h..h.Lp......thG..l...p.....a...-..h.............h..h...l..........s..h..........t...t.....t....p....ht.......sc.a.h..p..h..p.Ph..........................upl..P......sl...p...cs................sht...lh...E.......S.s..A.IhpYLu..................................................................... 0 799 1279 1900 +2842 PF00735 Septin GTP_CDC; Septin Bateman A anon Pfam-B_440 (release 2.1) Family Members of this family include CDC3, CDC10, CDC11 and CDC12/Septin. Members of this family bind GTP. As regards the septins, these are polypeptides of 30-65kDa with three characteristic GTPase motifs (G-1, G-3 and G-4) that are similar to those of the Ras family. The G-4 motif is strictly conserved with a unique septin consensus of AKAD. Most septins are thought to have at least one coiled-coil region, which in some cases is necessary for intermolecular interactions that allow septins to polymerise to form rod-shaped complexes. In turn, these are arranged into tandem arrays to form filaments. They are multifunctional proteins, with roles in cytokinesis, sporulation, germ cell development, exocytosis and apoptosis [2]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.72 0.70 -5.39 14 2609 2012-10-05 12:31:08 2003-04-07 12:59:11 13 25 286 15 1492 3139 269 234.50 37 62.73 CHANGED +GhsFTLMVlGcSGLGKoThlNoLFtosLhss.....................pth.tst-.ctcpTlcIc.tpps.lEEc.Gl+LpLoVlDTPGFGDslsN.spsWcsllcYI-cQa-pYLcpEsplpR.pphhDsRVHsCLYFIsPsGHGL+PLDlthMKpLpp+VNllPVIAKADoLTscElpphKppIhp-IcppsIcIap..Pss-ps-..........-E-.hppscpL+pslPFAllGSsphlEt..cGcpVRGRpYPWGlVEVENssHsDFltLRshLlpTHlpDLp-sTpchhYEsYRocpLpshthtscs .......................................................................pGhpFslh.s.s...G.po.GlGKSTl....lN...o...L....F.t...o....p..h....t..t................................................................................tp.......php..p.....s.l......p.l.p..........t........p...h....p......l......p....E..p.....s..l...+.....l...p..LT.....ll.D..T.s.G....F....G...D...t...l.....s.s...p..............p.......s....a.p.....s....Ih......c.a..I.-....p....Q..a.....-.....p.....Y........Lp....c.........E...........p.....l.....p.....R.........h.....p.....h.....................D.s..Rl.........Hs..CLY........F.I.s....P..........s....G.......H.u.L+.s.LDl.t..h....M.Kp...L...pp..+.......V..N....l.....l.P...lIA.KADo...lo...p.Ehpph..Kp..p........I.h..p-l.t.......p.ps....l..p...........l..Yp......s..t.....p..p.-p..............................................t.....h......p.....stp.....h...p.t....h............P.FAV..lGSp..p....h..p...........h.........t.......s.....+.......h.....l....+....uRpY...s..W.G.h...l.p...V...EN.......p.HsDFhhLRp..h.Ll...p.s.php...DLp-.TpphaYE.taRt.ppLttht.....tst............................................................................................................................................. 4 422 705 1111 +2843 PF00925 GTP_cyclohydro2 GTP cyclohydrolase II Bateman A anon Pfam-B_1147 (release 3.0) Family GTP cyclohydrolase II catalyses the first committed step in the biosynthesis of riboflavin. 23.20 23.20 23.60 23.20 23.00 23.10 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.07 0.71 -5.01 161 5362 2009-09-11 05:22:16 2003-04-07 12:59:11 15 28 3848 3 1428 3867 2940 160.90 39 47.23 CHANGED lcchspsplsTpa.....G...pFchhsac....sthssppH...lALlpG.......ch.s.sspssLVRlHupshsuDlhuoh...ps-sutpLcpAhctIuc..................................p.Gt.....GVllaL.t...pcucuhuLh...s+lpshthp.cp.uhcshcss..............DhRsY.uluAQILpcLGlp......cl.+LLoNsP.c.KhtuLpuaG..lcVsppl.sh ....................................................................................................phhspsplPTta.....G.....pFphhuac..................s...h..h..s.........s.....p...c.H....................lALlhG.................................................sh....s....s......p....ps..lLlRlHS.EClTGDlhtSh........RC.D...C...G.........QL....c....tAhp....t....I....sc...................................c..Gp....GlllYL.c....QEG..R.G..I..GLh...sKl+...A...Y...t....LQ...-p....GhD....T....l....-....A....N.htLG.h.....................ssDhR.-a.s.luA.pIL+.p.LGlp............cl.RLLT....N...N..P...c..Khpu..Lp..sa.G....lpls-cls........................................ 0 444 916 1213 +2844 PF01227 GTP_cyclohydroI GTP_cyclohydro_I; GTP cyclohydrolase I Finn RD, Bateman A anon Prosite Domain This family includes GTP cyclohydrolase enzymes and a family of related bacterial proteins including Swiss:Q46920. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.06 0.71 -4.99 104 4297 2012-10-01 20:59:24 2003-04-07 12:59:11 17 9 3743 160 1127 2999 2646 175.90 44 84.59 CHANGED hcpul+plL.pslGEDPsREGLh-.TPcRVA+ha.pElasG.hppss....phl.......sphFp.....................hs-h..Vll+cIphhShCEHHhlPFhG.pAHlAYl.P..s.s+VlGLSKlARlV-hau+RhQlQERLTtQIAsslpch.LpPcGVuVll-AcHhCMshR.GVc+ssupThToshpGhFcs-spsRpEFhsllp ........................................h.ptsh+plL.ptl.G..E...Ds..s...R-.GL.h-.T.PpRlA+hY...pElF..u..G.hptss........tplh..........ss.h.Fc..........t.................................h-EMVlV+DIs...haShCEHHhlPFhG......+AHVAYI.P....s..s+V......lG....L..SK..luR.....lV-haA+R.......Q...lQERLTpQIApAlt....ch.L..........p....sp.........G.......VuV.ll.EA.pHh.C.Mph..R..G.Vc.K.sso...tTsTosh..tGh.Fcp.stpsR.pEFLphl.t.......................................... 1 354 686 946 +2845 PF04670 Gtr1_RagA Gtr1/RagA G protein conserved region Waterfield DI, Finn RD anon Pfam-B_4577 (release 7.5) Family GTR1 was first identified in S. cerevisiae as a suppressor of a mutation in RCC1.\ \ \ Biochemical analysis revealed that Gtr1 is in fact a G protein of the Ras family. The RagA/B proteins are the human homologues of Gtr1. Included in this family is the human Rag C, a novel protein that has been shown to interact with RagA/B [1,2,3,4]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.53 0.70 -5.22 32 723 2012-10-05 12:31:08 2003-04-07 12:59:11 7 10 282 7 491 12357 922 216.90 39 64.20 CHANGED KlLLMGhptSGKoSh+pllFsNh.sp-ThpLusTl............................cl-pspl+.hutl..slslWDhsGQpsah-s...hhttpp-pIFpsVt......sLlaVh.Dsp........................p..-...htcsltpappslptlhphsPst+laVLlHKhDhlp...c-hRp-hacchtpclpcpsp.........sht.t.................................lshahTSIaDcSLYcAaSpIVppLI.PphsslEphLpphsptsss-clhLF-psohLhlupsstp.sc....t.............................paEphSshI ................................................................................................................+lLLMGhp.tSGKo.........S.......h........p.....p....l......l.............F.........p............p.....h..........s.....p........-....T........h......h.........L.....t...u........T..............................................................c..l...p....+...s.....c...l...p....................u.........l............sh..p.l.W..............Dh.s..G......Q...........s....F.....h..-s.............h............p......p..-...pI...F..p....s....l.s................s.LI..a.V..h..Dsp...................................................c......-.......h..c..s.l..p.p.h...p......p..s...l.....p..t......h.....h.....p......h.....s.......P........s....h.....p........l........l...h...l....H...KhD.....h.....lp..............-.D.....p....+......c..h.........c....-.......h....p...p...c.....l.p....cp...t........shs...........................................................................................ls.h.ahT..S.I.....a.D........c.S....l.a.c.A..a.......Spl...V...pp..L..I...P..plssLEshLp.hsp.....................ph-c...sh...............LF-hsohl..hIusss.p.s..c........hp..........................................paEhhSshI............................................................................................................................................. 1 177 261 390 +2846 PF04138 GtrA GtrA-like protein Kerrison ND, Finn RD anon COG2246 Family Members of this family are predicted to be integral membrane proteins with three or four transmembrane spans. They are involved in the synthesis of cell surface polysaccharides. The GtrA family are a subset of this family. GtrA is predicted to be an integral membrane protein with 4 transmembrane spans. It is involved is in O antigen modification by Shigella flexneri bacteriophage X (SfX), but does not determine the specificity of glucosylation. Its function remains unknown, but it may play a role in translocation of undecaprenyl phosphate linked glucose (UndP-Glc) across the cytoplasmic membrane [1]. Another member of this family is a DTDP-glucose-4-keto-6-deoxy-D-glucose reductase, which catalyses the conversion of dTDP-4-keto-6-deoxy-D-glucose to dTDP-D-fucose, which is involved in the biosynthesis of the serotype-specific polysaccharide antigen of Actinobacillus actinomycetemcomitans Y4 (serotype b)[2]. This family also includes the teichoic acid glycosylation protein, GtcA, which is a serotype-specific protein in some Listeria innocua and monocytogenes strains. Its exact function is not known, but it is essential for decoration of cell wall teichoic acids with glucose and galactose [3]. 21.30 21.30 21.40 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.75 0.71 -4.14 175 4541 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 2742 0 1077 3355 691 115.20 18 65.05 CHANGED +F.sllGshushls....hsl...hhlLh...h...............hthshhlAssluahsuhlhsahhNchaoFcspppt..........htphhpFhhs.shhuhhlshsh...hhlhh.phhth..........h..................................upll...uhhlsh.l...hsalhs+halF .............................ahhhGs.hss.lls...hsl....hhlhh...h...........................hth.p.h.h....lA..s....hl.uhl...luhlhsahhNc...tasF.cspspt.................hpph..h..p..Fhhh..thh....sh.hl.s....hh.l........hh.hhh...ph...h.sh..............hh......................................................................uhlh....u.hlsh.h....hsalhp+hhlF....................................................... 1 357 710 932 +2847 PF00211 Guanylate_cyc guanylate_cyc; Adenylate and Guanylate cyclase catalytic domain Finn RD anon Prosite Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.96 0.71 -4.93 21 9831 2012-10-01 23:51:22 2003-04-07 12:59:11 15 564 1435 89 5200 9161 3221 176.30 23 25.87 CHANGED lhsppacsVolhFuDIsGFTshsspps....shpllphLN-laspFDcLhsppt...lhKlKTIGDsYMssuGls....................cst.tHstphschALshhcth.pshshpp................................pslplRlGlHsGsVluGVlG..h+pscYslaGsTVNlASRMESsGhsscIpl..oppshphLp.......taths.c.uchpl+........G....+............uphpTaalhu ...............................................................................................................................................................h......ppl..sl..h.F.u.............D.l..h.u.....F...T.p..h.sp..phs........................st...p.ll..p.h.L...sc.h.h.......s........t.h.s...p..l.l.p.php............................shh....l......K..h...l................G..D......u......h..h..ss..h..Ghs....................................................................p...p..s...t..p...ss...p.........h..u...l....s....h....h..ct...h....p...p...h.p...hpp.....................................................................................................................s..l....p..l....R....l....G..l..p......s............G.........................l.l...s.....G..s.l......G................p..........p...........p.....a..s....l.h.Gs.s.V......N..........hAu.....R.h....-..u..h.....u..........t...............s.......s................p......l..h..l.......o...p....p.s.h.p.h.lp..................th..t....h..p...h....s....h...l+...................G....................t..........t..................................................................... 0 1996 2930 4185 +2848 PF00625 Guanylate_kin Guanylate kinase Bateman A anon Bateman A Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.98 0.71 -4.84 12 8050 2012-10-05 12:31:08 2003-04-07 12:59:11 16 166 4712 73 2497 6019 2540 171.00 31 44.42 CHANGED pt+slllhGPSGsG...+splhpsLlsphscpF.u.hsVs+TTRs.R.sEhpGccYaFl.o+-phppsIppstFlEaup.asushYGTphpslcplhcpu+p...slLDl-.hQusppl+ps.phsPhhlFItsPShpslpch.cthsppstcpltcchsthcpphpph.....FDhllsNssl-cshpcLcchlpst ........................................h...h.lll....s...GPS.Gs.G..............Ksolh.ptL..hp.p.....s.......p........h....t...hSl.S........t................T.TRt........R....u...Eh....c..G......h...c..Y.....a.Fl..oc.-p.F.cp..hl......p......p.s.p..h....l..E.......a...........u....p....h.......h..............u....N...............h........Y..G..T...s.h.p...tl..c..p.h.h.p...p...G..ps...................llL-.l-...hp.........G....s...p.p..l.....+.....p..............p......h...................p........s......h......h....lFl..t..P............P......S.......h............c..............p...........L........c...............p..........R..............l.............t............t............R...............u.............p......-...........s...t....c.....h....l........t...p...........R........h......t......p..A...p.....p..E..h.....p...t..h.....p.........aD..h.l.l....l.N..-..s...l.c....p.A....hpclppllt.......................................................................................................................... 1 729 1238 1842 +2849 PF02058 Guanylin Guanylin precursor Mian N, Bateman A anon IPR000879 Family \N 25.00 25.00 68.30 68.00 18.60 18.60 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.42 0.72 -3.91 15 87 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 40 5 38 70 0 89.50 45 80.48 CHANGED puVpVp-ssFoFsLESVKKLK-Lp-......spoPRLtpps..........ssslC.spPsLPp-LpPlCpppsAusIlpRLcsIus..DsCEICs.sACTGC .........uVhlQ.tsFphpLESVKKLc-Lp-t.....h.sPRltsps...........hPslC.spPsLPp-LpPlCppppAupIhppLcsIAp..csCElCs.sACTGC............ 0 2 2 7 +2850 PF05120 GvpG Gas vesicle protein G Yeats C anon Yeats C Domain These proteins are involved in the formation of gas vesicles ([1]). 27.30 27.30 27.50 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.57 0.72 -4.17 11 122 2009-09-12 20:43:27 2003-04-07 12:59:11 7 2 104 0 50 138 7 76.20 33 85.04 CHANGED LLplsh.uPlpGllWIu-plpEcA.-pphpD.psLpppLssLphpL-hG-IsEEpF-pcE-ELL.+Lcthtp...htttstttss .......lhhlsh.uPlcGlhWlu-plp-pA.-cEhh.Dsssl+pcLtpLptth-tGEIoEEEa-ppE-cLLpRLpthtt............st............. 0 19 40 49 +2851 PF05121 GvpK Gas vesicle protein K Yeats C anon Yeats C Domain These proteins are involved in the formation of gas vesicles ([1]). 25.00 25.00 36.70 31.80 21.30 19.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.75 0.72 -4.16 19 150 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 127 0 62 157 6 87.20 46 75.27 CHANGED pht......l-s.sscslptGLssLVLTVVELLRQLMEpQAlRRMEpGsLS-splERlGpsL.tLE-pltcLpppaslssp..DLNlDLG.lGsLLsp .....s.plsh-s-slcpsLspLVLTlVELLRQLMERQAlRRh-sGsLo--QlERlGp.oLMtL-cchp-Lp-paGlpsc..DLNlDLGPLGsLL.................. 0 22 49 60 +2852 PF02213 GYF GYF domain SMART anon Alignment kindly provided by SMART Domain The GYF domain is named because of the presence of Gly-Tyr-Phe residues. The GYF domain is a proline-binding domain in CD2-binding protein Swiss:O95400. 20.50 20.50 20.60 20.50 20.40 20.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.04 0.72 -4.38 79 823 2009-01-15 18:05:59 2003-04-07 12:59:11 11 22 290 12 564 801 13 55.40 30 6.25 CHANGED hhWhYh....DspuplQGPFsutpMppWhppGYFsssl.l++h........ttsp..pshh.....hpphhtp ...............hWhY+.........DspG.plQ........G.....PFsstpMppWhpsG.YF..s.......s.Lhl++s........pptph.pshh....ph...t................................ 0 187 316 466 +2853 PF02895 H-kinase_dim Signal transducing histidine kinase, homodimeric domain Griffiths-Jones SR anon Structural domain Domain This helical bundle domain is the homodimer interface of the signal transducing histidine kinase family. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.16 0.72 -3.64 148 3074 2009-01-15 18:05:59 2003-04-07 12:59:11 9 46 2055 3 891 2655 208 69.70 34 8.88 CHANGED spol.RVcsp+LDpLhNLVGELVIscspLtp..hspphp..................................................................pp..........lppshppls....+...........l.sp-LQ-slhph ....................pol.RVsl-+lDpLhNLVGELVIsputLsp..hup.p.hs........................................................................................................................ppc......................Ltp.shspLp...........c.................................................lsp-LQ-uVMph........................................................................................................................ 0 298 603 761 +2854 PF03030 H_PPase Inorganic H+ pyrophosphatase Griffiths-Jones SR anon Pfam-B_1050 (release 6.4) Family The H+ pyrophosphatase is an transmembrane proton pump involved in establishing the H+ electrochemical potential difference between the vacuole lumen and the cell cytosol. Vacuolar-type H(+)-translocating inorganic pyrophosphatases have long been considered to be restricted to plants and to a few species of photo-trophic bacteria. However, in recent investigations, these pyrophosphatases have been found in organisms as disparate as thermophilic Archaea and parasitic protists [1]. 22.20 22.20 22.40 22.40 22.10 22.10 hmmbuild -o /dev/null HMM SEED 682 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.86 0.70 -6.27 139 1540 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 1217 2 611 1460 5022 626.60 43 93.38 CHANGED ulluLlauhhhhttlhptssGs......................................................................................................................................pcMp-IussIp-GAtAaLp+QY+slulhslll..slll....hhh............................................................uhhsuluFllGAlhSuhAGalGMplus+ANlRTApAA................pptu.............hspALplAF+uGuVhGhhVlGLuLLGluhhahlh..t.......................tthhpslsGauhGAShlAlFARlGGGIaTKAADVGADLVGKVEAGIPEDDPRNPAVIADNVGDNVGDsAGMuADLF.ESYssollAshlL...ushhhsst...................hlhaPLl.luulGIlsSllGsahV+sppss....................................................................................................shhpALppGhhloullshlshhhlsthhh........................................t.st...........h.....tlahsslhGLlsuhlIshlTEYYTussapPV+cIAcu.SpTGsATNIIsGLulGhcSTshPlllIssuIhsuahlu........................GLYGlAlAAsGMLossGhslAlDAYGPloDNAGGIAEMu.tLspcVRphTDtL....................DAVGNTTtAlsKGaAIGSAuLsALsLFuuYsppl............................................................................................................sh..hsl..sltsPhVllGLllGuhlPaLFuuhsMpAVG+AAtphVpEVRRQF+-hPGIh..........................-Gp..s+PDYs+sV-IsTpuAl+EMlhPulLsllsPlllGhl..lG............s.............puLuGhLhGslloGlhhAlhMuNuGGAWDNAKKYIE.........sG..ph.G.G....KGS-A.....HKAAVlGDTVGDPaKDTuGPulN.LIKlhslluLlhs ..................................................................................................................llul.hauhh.hht.lht...s......Gs..................................................................................................................................tcMp-IuttIp-GA.uaLtppYphlslhhlllhlll....hhh.h......................................................shhssl.uFllGAhhSshuGalGMplushANsRTAtAA..................................p..u.............hstuhplAF+uGulhGhhlsuluLLslshhhhlh.......................................t..h.hpslhGauhG.uS.lAlFuRlGGGIaTKuADVGADLVGKV........E.tGIPE..DDP......RNP..AsIADNVGDN..VG....DsAGMuADLF.EoY..s..sohl...........Ashsl...ush..hhh..st..s.........................hhhaPlllsu.h.ulls.Sl.l.Ghh.h.lp.s.t.t.s.s....................................................................................................sh.tuLppuhhlo...ullshlshhhhshhhh.....................................sth.............tlhhsslhGlls.uhlIshlTE.....YYTusshp.............PVpplApu.u.p.T.GsuTslI.tGLulGhpSshhPslhIssuIhsu.ahhu................................GlaG.lAlAA....hGMLuhsuhhlAlDAYGPlsDNAGGIAEM.u......t..l.s.......p.........c...........V.RchTD.tL....................DAlGNT..TtAlsKGaAIGSAuLsAL.sLFuuYhppl..........................................................................................................................................th.....hsl.........slhsPhVllGLh.lGu.hlPalFuuhsMp.AVG+AAhphVpEVRRQF+-hPGIh..........................c..G...p...s..+PD.Ys.+sVcIsTcuAl.+EMlh..Pu.lL.........sllsP...lll.Gh.h..hG.................................stuluGhLhGsllo.G.........lh.hAI.h..uNuGGAWDNAKKhlE..........t.G......th..s...u............................KGS-sH.KAuVsGDTVGDPaKDTuGPulN.LIKlhslluLlh..................................................................................................................................................................................... 0 268 465 553 +2855 PF01725 Ham1p_like Ham1 family Bashton M, Bateman A anon Pfam-B_2030 (release 4.1) Domain This family consists of the HAM1 protein Swiss:P47119 and hypothetical archaeal bacterial and C. elegans proteins. HAM1 controls 6-N-hydroxylaminopurine (HAP) sensitivity and mutagenesis in S. cerevisiae Swiss:P47119 [1]. The HAM1 protein protects the cell from HAP, either on the level of deoxynucleoside triphosphate or the DNA level by a yet unidentified set of reactions [1]. 19.60 19.60 19.90 19.70 19.40 19.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.10 0.71 -4.93 110 5049 2009-01-15 18:05:59 2003-04-07 12:59:11 11 23 4640 36 1328 3692 2890 187.30 39 82.95 CHANGED lllATsNptKlcEhpplLsshs....hth............tclt....th.h-l.Es.usoat-NAhhKActhsphh............sts..................sluDDSGLpl-ALsG...hPGlYSARa................u.......................cpt..s.pt....LLch..lpsh.....cs........RsApFhssluhsp..sssp...........hhh................hcGpsc..GpIspps+..Gps.GFGY............DPlFhs.p...sh....spTaAEh...s.cpKN........plSHRu+Alpp.....lhph...L .................................lllATsNtGKlcE..h..pplLsshs.............hcl...hs.............s-hs.......s....-s.tET.........GtTF..tENAhlKActsu.c.h.s............Ghs...................slADDSGLpVD..AL..s.G.....tP..........G.lYSARa.......................................u.................Gpts..........sDpt...N.p+.........LLpp..Lpsh................cc................R.pApF.hsslshsp......ss.sp.............shl..............................................sc.G.ph...p..Gp.......Ih...p...p...s.+...........Gps...GF.........GY............D...Pl...Fhs..s..th..............scThAEL...st-.E..K.s...............tlSHRupAlctLhp..h........................................................................ 0 453 851 1131 +2856 PF04388 Hamartin Hamartin protein Bateman A anon Wood V Family This family includes the hamartin protein which is thought to function as a tumour suppressor. The hamartin protein interacts with the tuberin protein Pfam:PF03542. Tuberous sclerosis complex (TSC) is an autosomal dominant disorder and is characterised by the presence of hamartomas in many organs, such as brain, skin, heart, lung, and kidney. It is caused by mutation either TSC1 or TSC2 tumour suppressor gene. TSC1 encodes a protein, hamartin, containing two coiled-coil regions, which have been shown to mediate binding to tuberin. The TSC2 gene codes for tuberin Pfam:PF03542. These two proteins function within the same pathway(s) regulating cell cycle, cell growth, adhesion, and vesicular trafficking [1]. 25.90 25.90 26.00 26.00 22.00 25.80 hmmbuild -o /dev/null HMM SEED 668 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.39 0.70 -5.85 6 205 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 137 0 142 206 0 536.20 25 62.51 CHANGED pppssls-LlshL-SssLcpl--V+shlpEpLSs-+GshLVNsLVDYaLpTsSppslclLoolpEPHDKaLLDKhNEshs+sssRLsoLoLLGHVVR+QPsWlHKIu+hPLLsSLLKCLKpDoDVVVLhoulLVLITLLPMIPQosKQaLhDaFDIFGRLu...............SWsl..............+sPG+ss-VaLl.......HL+AuVYS.LFHRLYGMYPCNFlSYLR..paYSMKENh-T....FpEVVK...............................PMLp+VRlHPpLVTGTpDaEL.DPoRWKchEsHDlVhECA+lS...............LDsp..EuSsE-Gh.ohsc...........................SsusPhsusp..h.s.stShP..oshphoh+SspTph.spsphD.st..ssc-slWSPSslCGhuTPPSSh.shsP...hplshpsuh.suchhussGuGpspsus.STPuTos.....P...Pshuss..hhph..ssspsps.ppcc.psp.uhshhsRppp.........................lpsl-+psc.................tpthcsulspElhpl...sptp.c..ssh.Ghc.sh.h..cpLhssptpctp...............LsSo.D+....................Gsssspuu.shp..W.....FTPh-ssh+cp..s......c.pt...s..s.usCptsst..........s.PY-th...F-lALPK...susLFlp+KTtEslc+sttpc.p.......phcccslssoS....PhEVLD.clIppGp-AHsc..cR.hs.so.o.c.spht...P...-ELcslRuQLLL....LHsQLLYERaKRpQHAlR..........NRRL .............................................................................................................................t....................................................................................ppP..p.hhsplsp.hht.p...s...t...pRl.sLpLL.s.phlphpPsah.apl...ptsLh.slL+sL........phDssssslhsullsLlhhLP....hl....P.p....uh..tp....aL...chFslauRLs.........................sWph....................................................................................................................pps.s..ph..sc.hhl..............................HL....ph...u..l.h........t...LFp...hLYGhYPsNFlsaLR........pha....p.h.c..p.....s..h.ps...........hpcs.lc.........................................PhhpphplHPp.l.l...T..s...o..h......-.pEl..cspRW.......pchE..s.......cDlVhECtp.l.s.............................................Lssh.....pss..ppt......t.......................................................................................................................................................s.s.P.h..ts...................p..s...........t.h..p......t.t..................................................................t....h..os...p...............th...ps..s.........s......s.........................s....p........s............p......h...s..t.s..s........p........shsths...................................t............................t..................th..h...tttp..............................................................................................h.......p..................................tt....s.p..tp..th........h............................................t......................................................................................................................................................................................tt........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 47 66 110 +2857 PF01567 Hanta_G1 Hantavirus glycoprotein G1 Bashton M, Bateman A anon Pfam-B_399 (release 4.0) Family The medium (M) genome segment of hantaviruses (family Bunyaviridae) encodes the two virion glycoproteins. G1 and G2, as a precursor protein in the complementary sense RNA. 25.00 25.00 46.60 46.00 17.00 16.40 hmmbuild --amino -o /dev/null HMM SEED 525 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.86 0.70 -6.25 15 433 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 136 1 0 365 0 303.60 52 49.41 CHANGED +slYEhKlECPHTVsLGQGhVsGpVELs.lPLsclssLhlESSCNFDl..HsossshQpaTpVsWcKKushs-ossAupsoFEspSpEVsLKGTCslsschhEssaKs...RKTVlCYDLSCNQTaCpPTVaLIuPlpTChoh+SChluLuspRIQVlYEKTYCVoGQLlEGpCFsPs+TlshoQPsHTYDlsTlPlhCFhlsKK...GsshKIhsplEp.lhsKssCT-NslQGYYlChlGupSEPlaVPuh-DaRSuElhS+MlhsP+GEDHDhstsupushRIuGPlpuKVPpTpoo-ThpGlA.FuGlPhYSSLSsLlppsDPcYlFSPGllP-tNHSsCDKKTlPLTWoGalslsGphE+lTsCTVFCTLuGPGASCEAYSEsGIFNISSPTCLVNKsQRFRGoEQplNFVCQRVD.DlVVYCNGQKKVILTKTLVIGQCIYTFTSLFSLlPGVAHSLAVELCVPGlHGWATsALLlTFCFGWlLIPulTLllLKlL+llsa.CS+YosESKFKhILEKVKhEYQKTMGS .....+sl.-h+hpCPHols..hGps.l.G.sEls.h.lttstphh.ESSCshD...Hpohss.pphTpl.WctKu...pptpuspsoFps.ptplsh+GhChl..phh-psh+.s...++olhCYDLsCNpThCpPTlahIsPl.sC...c.Ch...........................................................................................................................................................................................................................................................................................sl.haCNG.+KhIhT+TLlIGQCIYohTSlFSlhPuVAHSlAlELCVPGhHGWAThhLlhTFCFGWlLIPshThhlLhhLhhhs..hpp.s.-pph+.llp.............. 0 0 0 0 +2858 PF01561 Hanta_G2 Hantavirus glycoprotein G2 Bateman A anon Pfam-B_401 (release 4.0) Family The medium (M) genome segment of hantaviruses (family Bunyaviridae) encodes the two virion glycoproteins. G1 and G2, as a precursor protein in the complementary sense RNA. 19.30 19.30 20.30 19.30 17.70 19.20 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.75 0.70 -6.09 6 1286 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 278 0 1 760 0 186.40 42 58.72 CHANGED shsLpPsWoDsAHGVGllPM+TDLELDFSLPSSSpYTYRRcLpNPsNEppplsFHlpI-cQsItA-IQpLGHWhDupaNLKTuFHCYGACpKYtYPWpTAKCahEKDYEYETuWGCNPsDCPGVGTGCTACGlYLDKLKSVGpsaKIISl+YoR+VClQLGsEppCKpIDsNDCLlTsuVKVClIGTlSKFpsGDTLLFLGPLEtGGlIFKpWCToTCpFGDPGDIMSTpsG.hpCPEasGuFRKKCsFATTPlCEYDGNTlSGYpRhlAT+DSFQSFNsT-PHlTss+LEWhDPDuoLRDHINllls+DlsFpsLuENPC+VsLpsuSIDGAWGSGVGFoLsCpVSLTECsoFLTSIKACDpAMCYGAoossLsRGQNTV+IsGKGGHSGSpFpCCHsp-CSppGLpAuAPHLDRVsGhsplDs-KVaDDGAPECGlpCWFpKSGEWlhGILsGNWlVlsVLlVlLILSIhLhSlhCPsR.p+KKu ........................................................................h.............................ChGtC.c.th...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 1 1 1 +2859 PF00846 Hanta_nucleocap Hantavirus nucleocapsid protein Bateman A anon Pfam-B_310 (release 3.0) Family \N 21.40 21.40 21.60 54.70 21.00 21.30 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.39 0.70 -5.78 4 1583 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 297 6 0 1051 1 233.20 54 99.96 CHANGED MSsLcElQ-pITtHEQQLVhARQKLKDAEKsVEVDPDDVNKSTLQSRRuAVSuLEsKLuELKRQLADhlusQKhsuKPVDPTGlEPDDHLKE+SuLpYGNVLDVNuIDlEEPSGQTADWhuIGsYIluFslPIlLKALYMLSTRGRQTVKENKGTRIRFKDDSSaE-VNGIRKPKHLYVShPTAQSTMKA-EITPGRFRTIsCGLFPAQIKARNIISPVMGVIGFuaFVKDWs-+I-sFLspcCPFL.....Pcstusutt.hhuT.RAYhhsRQstlspocl.DIssLhptApAuusTLhsDIpoPHSsWVFACAPDRCPPTsLYlAGlsELGAFFuILQDMRNTIMASKoVGTAEEKLKKKSAFYQSYLRRTQSMGIQLDQRIIlhYMlpWGKEsVNHFHLGDDMDPELRQLAQuLIDsKVKEISNQEPLKL ..............................................................................................................................................Ih.Ylhshs.sIlLKuLYhLoTRGRQT.K-NKGhRIRhKDDo....lNGIR+PKHLalShPsAQSoMKA-ElTPGRFRThlCGLaPsQIpsRNhhSPVMuVlGF.hhsKDWspRIccahtt.C.Fh..........sts..s.h..t....p.p.h..hN+sYhhpRQtsLst.cst-hptlhpaAtsusso..-pIcSPpu.WlFusAPDRCPPTs.......................................................................................................................................... 0 0 0 0 +2860 PF03866 HAP Hydrophobic abundant protein (HAP) Finn RD anon DOMO:DM07503; Family Expression of HAP is thought to be developmentally regulated and possibly involved in spherule cell wall formation [1]. 22.30 22.30 24.60 24.00 22.20 22.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.16 0.71 -4.45 2 5 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 3 0 0 5 0 146.80 39 58.77 CHANGED MMKYlFlALChhAlV.LAoEs-.tcusp+..tLsVtG...LlsClVAllApIVuGLLRVIIGLVlTLSGVLpIVlGsVlhLVsslAuLALDlVtuoVsGIhsshLshshlhphlEEsL+s..uQ.LLsGLVpAlhALPLulLVALssLT-ulApuuCshGhStsGhh ...........................MMKYlFlALChhAlV.LAoEs-.tcusp+..tLsVtG...LlcCIVAllAsIVS.GLLRVVlGLVI.TLS.GVLQIVlGlVlpLVDllAuLALDlVsuTVTGIL.NcLLcFDhlhthlEEsL+s..uQ.LLsGLVpAlhALPLulLVALssLT-ulApuuCshGhStsGhh.................... 0 0 0 0 +2861 PF01543 HCV_capsid Hepatitis C virus capsid protein Bateman A anon Swiss-Prot Family \N 21.60 21.60 21.70 21.70 21.20 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.72 0.71 -4.12 5 9598 2009-01-15 18:05:59 2003-04-07 12:59:11 12 23 146 9 0 6731 0 106.30 87 18.88 CHANGED STNPKPQRKTKRNTNRRPQDVKFPGGGQIVGGVYLLPRRGPRLGVRATRKTSERSQPRGRRQPIPKARsPEGRoWLSPGTLGPShuhRAlsGpDG.C.....GhAGh..sPtsuRs.Ws.ssPhhR ...............................................................STsPKPQRK.T.K..RN..TN..RRPQDVKFPG..GG..QI..V..G..G...V.YLLPR....RGPR...LG.V..RATR..K.......T...SER.........S..QPR..G..R..R.....QPI.....PKA....RRPEGRoWAQPGYPWP......LYGNEG.h......GWAGWLLSPRGSRPSWGPoDPRRR................ 0 0 0 0 +2862 PF01542 HCV_core Hepatitis C virus core protein Bateman A anon Swiss-Prot Family The viral core protein forms the internal viral coat that encapsidates the genomic RNA and is enveloped in a host cell-derived lipid membrane. The core protein has been shown, by yeast two-hybrid assay to interact with cellular DEAD box helicases [1]. The N terminus of the core protein is involved in transcriptional repression [2]. 21.60 21.60 21.80 21.80 21.10 21.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.38 0.72 -3.59 6 7406 2009-01-15 18:05:59 2003-04-07 12:59:11 13 24 134 0 0 5245 0 59.90 89 8.38 CHANGED SRNLG+VIDTLTCGFADLMGYIPLVGuPVGG.VARALAHGVRlLEDGINYATGNLPGCSFSIFlLALLSCLTsPAS ...SRNLGKVIDTLTCGFADLMGYIPLVGAPLGG.sARALAHGVRsLEDGVNYATGNL.P.GCSFSIFLLALLSCLTlPAS.............. 0 0 0 0 +2863 PF01539 HCV_env Hepatitis C virus envelope glycoprotein E1 Bateman A anon Swiss-Prot Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.44 0.71 -4.83 4 42235 2009-01-15 18:05:59 2003-04-07 12:59:11 12 27 140 1 0 24488 0 72.70 67 29.80 CHANGED EVRNlSuhYaVTNDCoNsSIsaphsDAlLHsPGCVPC.+-uNsSRCWlsVTPsVAs+psuh.TpsLRpHlDhlVhuATLCSALYVGDLCGuVhLluQhFhhSPp+H.hsQDCNCSIYPGHITGHRMAWDMMMNWSPTsshllA.hhRlPpsllDhIuGAHWGVlhGLAYFSM.GsWAKVlllLLLhAGVD .....................................................................................................................................................................................................................PssuhVluplLRlPQs..lhD.hl.AGAHW...G..VLAGlAYaSMVGNWAKVLlVhLLFAGVD............................................................. 0 0 0 0 +2864 PF01560 HCV_NS1 Hepatitis C virus non-structural protein E2/NS1 Bateman A anon Swiss-Prot Family The hypervariable region of the E2/NS1 region of hepatitis C virus varies greatly between viral isolates. E2 is thought to encode a structurally unconstrained envelope protein [2]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.19 0.70 -5.99 10 44241 2009-01-15 18:05:59 2003-04-07 12:59:11 12 24 122 3 0 25163 0 104.70 61 44.06 CHANGED +VTGGssA+TTpulsShFosGu+QcIQLINTNGSWHINRTALNCNDSLpTGFLAuLFYsH+FNSSGCPERhASCRsIDcFcQGWGPIoYss..ssspDpRPYCWHYPPRPCGIVPApoVCGPVYCFTPSPVVVGTTDRpGAPTYoWGENETDVFLLNNTRPPpGNWFGCTWMNSTGFTKTCGAPPCsIGGsGNsT..LlCPTDCFRKHP-ATYoKCGSGPWLTPRChVDYPYRLWHYPCTVNFTIFKVRMYVGGVEHRLsAACNWTRGERCDLEDRDRSELSPLLLSTTEWQlLPCSFTsLPALSTGLIHLHQNIVDVQYLYGlGSAlsSasIKWEYVlLLFLLLADA .........................................sTGGusA+sstG.l....s.u....L....F....o....s....GspQsIQLINTNGSWHINRT..ALNCNsSLpT.Ga..lAuL.hYh.p...+...F.NuSGCPER.hASC+slspFsQGW.GPIsasp.....st.s.......-pRP.................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +2865 PF01538 HCV_NS2 Hepatitis C virus non-structural protein NS2 Bateman A anon Swiss-Prot Family The viral genome is translated into a single polyprotein of about 3000 amino acids. Generation of the mature non-structural proteins relies on the activity of viral proteases. Cleavage at the NS2/NS3 junction is accomplished by a metal-dependent autoprotease encoded within NS2 and the N-terminus of NS3 [1,2]. 25.00 25.00 25.10 25.00 24.80 23.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.19 0.71 -4.87 29 2278 2009-09-12 04:23:25 2003-04-07 12:59:11 13 38 114 14 0 2355 0 183.60 72 9.16 CHANGED DsppuuslGssVlshlslhTLoPsYKthLs+hlWWhQYhlsRsEAhlplWlPPLpsRGGRDu.lIlLsslha...PplsF-lTKhLLAlLGPL....YlLpsuLl+VPYFVRApsLLRsChLlRplAGGKYsQhsLLclGthTGTYlYDHLuPloDWAusGLRDLAVAVEPVlFSPMEpKlITWGADTAACGDIlpGLPVSAR ........................................DpEhAASCGGsVLVGLhhLTLSPaYKtaluhhlWWLQYFlTRsEApLpVWVPPLNVRGGRDA.lILLhCslH...PsLlFDITKLLLAlhGPL....alLQ...AuLh+VPYFVRsQGLlR.hChLsRKhAGGHYVQMAllKLGALTGTYVYsHL.TP.L+DWAHsGLRDLAVAVEPVVFSpMETKlITWGADTAACGDIIsGLPVSAR..... 0 0 0 0 +2866 PF02907 Peptidase_S29 HCV_NS3; Hepatitis C virus NS3 protease Griffiths-Jones SR, Knutson S anon Structural domain Domain Hepatitis C virus NS3 protein is a serine protease which has a trypsin-like fold. The non-structural (NS) protein NS3 is one of the NS proteins involved in replication of the HCV genome. NS2-3 proteinase, a zinc-dependent enzyme, performs a single proteolytic cut to release the N-terminus of NS3. The action of NS3 proteinase (NS3P), which resides in the N-terminal one-third of the NS3 protein, then yields all remaining non-structural proteins. The C-terminal two-thirds of the NS3 protein contain a helicase. The functional relationship between the proteinase and helicase domains is unknown. NS3 has a structural zinc-binding site and requires cofactor NS4A. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.80 0.71 -4.68 9 8309 2012-10-02 13:45:52 2003-04-07 12:59:11 10 46 119 110 1 6342 8 144.80 87 19.64 CHANGED pGpl.VLuTuTpoahGTClNGVhaTsaHGAuu+TLAGPpGPlsphasssspDlssaPsPsGApSLpPCoCGusclYLlTRcusll.uR++GD.pssLhuPpPlSshKGSSGuPlLCspGHsVGhFpAAlsotGsspulcFl.PhEslsss ......................................EGEVQlVSTATQoFLATClNGVCWTVYHGAGo+TlAuPKGPlhQMYTNVDQDLVGWPAPs.GARSLT.PC.TCG.S..S...D..LYL..V.......T....R...H..A.....DV..IPVR.RRG....D...S.RG.....SL.....L...S..P.....RPlSYL.....KGSSGGPLLCPuGHAVGIFRA..AVCTR..GV..AKAVDFI.PVEshETT................... 0 0 1 1 +2867 PF01006 HCV_NS4a Hepatitis C virus non-structural protein NS4a Finn RD, Bateman A anon Pfam-B_315 (release 3.0) Family NS4a forms an integral part of the NS3 serine protease, as it is required in a number of cases as a cofactor of cleavage [1,3]. It has also been reported that NS4a interacts with NS4b and NS3 to form a multi-subunit replicase complex [3]. 21.00 21.00 21.10 22.60 20.90 20.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.64 0.72 -4.43 15 2766 2009-01-15 18:05:59 2003-04-07 12:59:11 15 42 118 67 1 2514 0 54.10 82 3.25 CHANGED SsWVLVGGlLAAlAAYChosGSlVIlG+lslsGpP..................sllPD+EshappaDEMEEC .......STWVLVGGVLAALAAYCLoTGsVVIVGRIlLSGKP..................AlIPDREVLYppFDEMEEC............................... 0 1 1 1 +2868 PF01001 HCV_NS4b Hepatitis C virus non-structural protein NS4b Finn RD, Bateman A anon Pfam-B_315 (release 3.0) Family No precise function has been assigned to NS4b. However, it is known that NS4b interacts with NS4a and NS3 to form a large replicase complex to direct the viral RNA replication [1]. 20.30 20.30 20.40 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.12 0.71 -4.91 11 2035 2009-01-15 18:05:59 2003-04-07 12:59:11 14 36 116 2 0 2032 1 181.50 82 8.46 CHANGED hp.pshshlssAstpApshpshlpo.....h.s+hpphWsphhhNhhShIthhsuh.shstNPslushhuFhhulooshssphpLhhslLhGhsuotLusPssuhAhssAhhAGuulso.hLu.lllslLuGatuuVsuA.lsFclhuGchs.s-DhhsLlsslhSPGAuVsGVshuslLhphhs..tGuspWhNRLls ..........................FKQKALGLLQTAo+QAEshsPsVpo.....sWp+LEsFWAKHMWNFISGIQYLAGLSTLPGNPA.IASLMAFTAulTSPLTTppTLLFNILGGWVAAQLAsPu.AAoAFVGAGlAGAAlGSlGLGKVLVDILAGYGAGVAGALVAFKIMSGEhPSTEDLVNLLPAILSPGALVVGVVCAAILRRHVGPGEGAVQWMNRLIA....... 1 0 0 0 +2869 PF01506 HCV_NS5a Hepatitis C virus non-structural 5a protein membrane anchor Paterson M, Bateman A anon Bateman A Family The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. The N-terminal region of the NS5a protein has been used in the construction of the alignment for this family. The C-terminal region has not been included because it is too heterogeneous. 20.50 20.50 20.60 20.70 20.30 20.00 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.48 0.72 -6.96 0.72 -4.20 19 5540 2009-01-15 18:05:59 2003-04-07 12:59:11 14 38 115 5 0 5082 0 22.40 82 2.09 CHANGED usWLhDlWDWVsplLschpshL+ ...GSWLRDlWDWICpVLoDFKTWLp.. 0 0 0 0 +2870 PF04618 HD-ZIP_N HD-ZIP protein N terminus Kerrison ND anon DOMO:DM04570; Domain This family consists of the N termini of plant homeobox-leucine zipper proteins. Its function is unknown. 21.70 21.70 22.10 22.90 20.30 19.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.77 0.71 -3.54 11 75 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 25 0 39 87 0 93.00 39 35.13 CHANGED Mt.ccD......cLGL..SLSLuhstppts............................tpss.phshtPs.................sop.pptsuppthhsspss-tpoFLRGIDVNRhPuss............-h.EEE.sG...VSSPNSTlSS.lSG.KRSppctp ..................................................................................................................huh..uhtLs...........................................t......................................t......t..t.....tspphp.....hsh.lRGIDVNRhPsss.............ch.-E-..sG...VS.SPNSTlSS.hSG..KRuppp.s................................. 0 5 21 27 +2871 PF02329 HDC Histidine carboxylase PI chain Mian N, Bateman A anon Pfam-B_19599 (release 5.2) Domain Histidine carboxylase catalyses the formation of histamine from histidine. Cleavage of the proenzyme PI chain yields two subunits, alpha and beta, which arrange as a hexamer (alpha beta)6. 25.00 25.00 28.30 28.00 15.80 15.10 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.96 0.70 -5.78 4 71 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 62 34 9 58 3 262.60 46 94.92 CHANGED csKlpthchD.....ppAISP.+caCpGYhpPGsh.GNGYVoslKlSsGsVDhoD..hLDu...............IVSYDRsEpNDAYlGQINMhTASSFsGlpGtlWGaDlAhp-sltpcK.hPLYpppQ.-Gss.....lPVYsh+PLL-As.cLFGptc..cRRFsshPGuaVlCANKusTA.cPp..tshK.G...hVWSsIuLulhcDRs+susLFlEDAGla..css.....sc--lhtaLEsph+tsTpSls.CGpDpHlha-chaIGatYshhcPGQlGsALoCAPYVoLApsAlPus..hpPuc....LspholSpW.-chshcsL..p..pK ......................................................s...............+hAluPacpascGYh.PGsh.GpGYVosLKVssGsscKT...DD.lLDu...............IVSYDRAEtpsAYlGQINMhTASSFsGhtGplhGaDlhtpspl..cc.pPLap.pQaDGoc.....L.lYDu+PL.-uhlEhFGTpc..pRRa.shPGAalhCANKulTA.RPpp..spshp.GpuYGVWShIAlShupDtscsushFlEDAGlW.tcss.....sE--lhtaLcs+R+uhshSlspCGcDp.....p.....lhacpoaIGFAashMcPGpIGsAlTsuPYlshshsulPut..hhPs.....hpphohspWL-cMsapsL.......t............... 0 4 7 8 +2872 PF02924 HDPD Bacteriophage lambda head decoration protein D Griffiths-Jones SR anon Structural domain Domain \N 29.00 29.00 29.00 29.00 28.40 28.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.43 0.71 -4.31 74 818 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 475 18 82 442 27 110.10 37 93.51 CHANGED shspshp.hu-hLhhEussph.SR-ssslsuG....sslssGoVLG.....tlsA...o................................................................................................Gc...asshsss.us...DGu.p.sAsulLhtslsAoss.cspsss....lsttu.lspssLsassuhss.sp+ssAhspLssh.uI ..........................................................................p....s.p.thp.hh.usss.s..tTusGshuts.....sssPAhTPLM......LDsu......o......................................................................................................GK...LVsW...........DGp...t....pAVGILs.l.sl-s.T......psslTa...YKSGo..Fsh-slhWP-uss-.pKKtsAFAGoA.....th................................. 0 28 48 67 +2873 PF01517 HDV_ag Hepatitis delta virus delta antigen Bateman A anon Pfam-B_808 (release 4.0) Family The hepatitis delta virus (HDV) encodes a single protein, the hepatitis delta antigen (HDAg). The central region of this protein has been shown to bind RNA [1]. Several interactions are also mediated by a coiled-coil region at the N terminus of the protein [2]. 25.00 25.00 26.00 25.90 21.90 21.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.26 0.71 -4.62 4 1108 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 61 5 0 1072 0 117.60 80 89.58 CHANGED MSQo-sR+sR+G.REEhLEpWlsuRK+tEELE+DLRKspKpIKKLEE-NPWLGNlhGIIRK.cKDGEGAPPAKRsRpDQMEVDSGPtKRP+tuGFTDpERcDHRRRKALENKKKQLSuGGKpLS+EEEEELRRLT-EDEcRERRlAGPRVGsVNPh-GGPRGAPGGGFVPSMpGVPESPFoRTGEGLDIRGoQtFP ...........................................................................................................................KQLuuGGKsLS+EEEEEL+RLTEEDERRERRVAGPpVGGVNPLEGGsRGAPGGGFVPSMQGVPESPFoRTGEGLDlRGsQGFP....................... 0 0 0 0 +2874 PF02985 HEAT HEAT repeat Griffiths-Jones SR anon Reference [2] Repeat The HEAT repeat family is related to armadillo/beta-catenin-like repeats (see Pfam:PF00514). 23.50 17.60 23.50 17.60 23.40 17.50 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.53 0.74 -7.59 0.74 -3.39 590 1878 2012-10-11 20:00:59 2003-04-07 12:59:11 17 196 359 47 1138 5988 309 30.50 27 3.66 CHANGED llshlh..phlp...Ds..s.pVRtsuspslsplsphh ...........hhshlh....phhp....Ds..sspVRpuAspsLsphsp..h..... 0 419 626 897 +2875 PF03130 HEAT_PBS PBS lyase HEAT-like repeat Mifsud W, Bateman A anon Pfam-B_172 (release 6.5) Repeat This family contains a short bi-helical repeat that is related to Pfam:PF02985. Cyanobacteria and red algae harvest light energy using macromolecular complexes known as phycobilisomes (PBS), peripherally attached to the photosynthetic membrane. The major components of PBS are the phycobiliproteins. These heterodimeric proteins are covalently attached to phycobilins: open-chain tetrapyrrole chromophores, which function as the photosynthetic light-harvesting pigments. Phycobiliproteins differ in sequence and in the nature and number of attached phycobilins to each of their subunits. This family includes the lyase enzymes that specifically attach particular phycobilins to apophycobiliprotein subunits. The most comprehensively studied of these is the CpcE/F lyase Swiss:P31967 Swiss:P31968, which attaches phycocyanobilin (PCB) to the alpha subunit of apophycocyanin [1]. Similarly, MpeU/V attaches phycoerythrobilin to phycoerythrin II, while CpeY/Z is thought to be involved in phycoerythrobilin (PEB) attachment to phycoerythrin (PE) I (PEs I and II differ in sequence and in the number of attached molecules of PEB: PE I has five, PE II has six) [2]. All the reactions of the above lyases involve an apoprotein cysteine SH addition to a terminal delta 3,3'-double bond. Such a reaction is not possible in the case of phycoviolobilin (PVB), the phycobilin of alpha-phycoerythrocyanin (alpha-PEC). It is thought that in this case, PCB, not PVB, is first added to apo-alpha-PEC, and is then isomerised to PVB. The addition reaction has been shown to occur in the presence of either of the components of alpha-PEC-PVB lyase PecE or PecF (or both). The isomerisation reaction occurs only when both PecE and PecF components are present, i.e. the PecE/F phycobiliprotein lyase is also a phycobilin isomerase [3]. Another member of this family is the NblB protein Swiss:Q9Z3G5, whose similarity to the phycobiliprotein lyases was previously noted [4]. This constitutively expressed protein is not known to have any lyase activity. It is thought to be involved in the coordination of PBS degradation with environmental nutrient limitation. It has been suggested that the similarity of NblB to the phycobiliprotein lyases is due to the ability to bind tetrapyrrole phycobilins via the common repeated motif [4]. 20.90 13.00 20.90 13.00 20.80 12.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.44 0.74 -7.55 0.74 -2.93 221 340 2012-10-11 20:00:59 2003-04-07 12:59:11 11 54 166 1 174 4509 751 27.50 31 10.24 CHANGED lRttAstuLuplss..............pulshLhpsLps ....hRhtAAhuLGplts..........................pAl.ssLlpsLp........................... 0 65 142 163 +2876 PF00632 HECT HECT-domain (ubiquitin-transferase) Bateman A anon Prosite Domain The name HECT comes from Homologous to the E6-AP Carboxyl Terminus. 20.40 20.40 20.40 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.28 133 4368 2012-10-03 14:42:41 2003-04-07 12:59:11 20 240 341 23 2797 4212 88 293.50 28 21.16 CHANGED phl.....spchh.s.sp.........shF...............................................................................h.tsspt....hhh.s....................................................................a...phlGtlluhulhs.......sh.h.....lslpFs.hha+............................................hlh.....................ttt......ls.hpDlt.p.....................lDsphhps.lptlh..t.p.t......................................h...slsFs.h...t......................................................................................................................................................................................................................................................................pL..h..s..G...psl.VTp..pNtpcYlphh....hca.hlpptlppphps........hpcGFt..pll.s......tphlp.hFsspELphllsG.....ps.p.......lshpsLcptsp..ap.tsas.....tss......................ttlphFa.cl..............................lpp.hsp-pppphLpFlTGss+lPh..sGhp.th..........php.....Ipphss..............................................................................................ss..t....LPoApTCa.shLcLPp..........Ysop-hhcc+LhhAlpps.p..s.Fshs ...................................................................................................................................................................................................................................................................................h..l.pphh...s..s.......h...shF...................................................................................t.p.pt........hh....s..............................................................................pphph................................................F...ca.l..G.p.l.lG.h..........Alh.c.........sp..h................l-.h...FshshaK.................................................................................................................tlL...................................................stt.......hs..h.pDlp..p.......................................................................l.D...s..phh...pu.Lhh...lh......pts..t...................................................................................tl......sLs..Fs..ls.pt.....................s.h.......................................................................................................................................................................................................................................................p..........................h-.L....h.ss..G..................psl....V...T...p.............cN.+.p-.Yl.c.hh.............spa..ph....p....p....t....l..p.p...Q.hpA......................Fh.....c..G.Ft...pl..l.s........................p..h.....l..p.....hF.....s.........pE..L...c..h...l.l.s.G..................s..c.............................lDl..p..c....hcpp........opY..........c...tsas.............ts..p..............................hl....p...h.F.W....cs........................................................l....pp....h....s....p....-.......c....+.....t....ch...L..p..F.lT....GosR.....lPh........tG......Ft..tL..................................phs.......I..pphs.t.........................................................................................................................................................................................ts...p.....LPp.u.p...T...............C..a..N.....h.....LcLP..............Y...p.o.....h......c.....hLcc+LhhAlp.ps.t.u.Ft............................................................................. 0 1023 1454 2163 +2877 PF03451 HELP HELP motif Bateman A, Suprenant KA anon [1] Family The founding member of the EMAP protein family is the 75 kDa Echinoderm Microtubule-Associated Protein, so-named for its abundance in sea urchin, sand dollar and starfish eggs. The Hydrophobic EMAP-Like Protein (HELP) motif was identified initially in the human EMAP-Like Protein 2 (EML2) and subsequently in the entire EMAP Protein family. The HELP motif is approximately 60-70 amino acids in length and is conserved amongst metazoans. Although the HELP motif is hydrophobic, there is no evidence that EMAP-Like Proteins are membrane-associated. All members of the EMAP-Like Protein family, identified to-date, are constructed with an amino terminal HELP motif followed by a WD domain [1]. In C. elegans, EMAP-Like Protein-1 (ELP-1) is required for touch sensation indicating that ELP-1 may play a role in mechanosensation [2]. The localization of ELP-1 to microtubules and adhesion sites implies that ELP-1 may transmit forces between the body surface and the touch receptor neurons. 21.90 21.90 22.20 22.10 21.80 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.51 0.72 -4.46 7 705 2009-01-15 18:05:59 2003-04-07 12:59:11 9 111 107 0 408 642 23 69.00 42 8.56 CHANGED cpG.l+halRG+slshhhPsc.h.shc.sscp-hPsp+LcL-WVYGYRG+DsRsNlalLPTGEllYFhAuVsVLash- ............................................................Ppt........p..tt.c.p.chP.....s..p.cL+L-.a.......V.....YG...YRG.+DC.R.s.NL..a.hL.s.............s......G.......E........l.VYalAuVsVlYNh.......................... 0 124 162 260 +2878 PF03996 Hema_esterase Hemagglutinin esterase Finn RD, Marshall M anon Pfam-B_505 (release 5.5) Domain \N 25.00 25.00 50.60 50.60 20.70 19.60 hmmbuild -o /dev/null --hand HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.92 0.70 -5.16 6 382 2012-10-02 11:02:24 2003-04-07 12:59:11 10 4 208 19 0 295 0 323.80 45 74.88 CHANGED hspplLlssShuhshGF...............scPhsssSHLN.sDWhhFGDSRSDC...sNsu.Pp.shhshpss.phSuK.Suphh.SlFts.HhoDah..YpGpGpphVFYEGVNFSPatuacChspG.sphWhs.KspFYsplYchhup.RslSlVNl.hshss.shssulCKssspshspsshlhpppshssphhpsctshshsshs.............tsCphhhsshplaNspthusph.shhspastslsshs.hssssTtspssuhDhsCcYLtLpPGsYpuhSsthhLshPoKuhChcphc.hsPVQsVpShWspsRpSDshsu.ACp.sPYChFhNpossYsG..stDsHHGDtchRplLSGLhYsusCISQQGshshsssSoph.hhP..YG+CPpAAclts ....................................................................................................................................s.PpsssShLN.usWhhFGDSRoDp...sNss..p.u.hshpsA.phpuh.uuphh.ShFts.thsDah..YpGpGpphlFYEGVN..aoPatAhcChpps....Whp.KhtF.pplYc.hu..+shohVNs..hh.s.t.stuhstsss.sh.psshlhpptshs...htsctshh..s.................s..hh.shphastchhsshh.h..sp.hhshsshs.hs.s.Tts.sss.shpCchl.L.sGpY.shSs.hhLhhPp+uhChshpc.hsPVpsVpShWspsRpSD.hss.AC..sPhChhhppppsYlG...hD.pHGDtthpplLSGL.Ypu.ChSQpGhhp.sss.o...LhP..aGRCPhAAc.ps............................................................. 0 0 0 0 +2879 PF02710 Hema_HEFG Hemagglutinin domain of haemagglutinin-esterase-fusion glycoprotein Finn RD, Bashton M, Bateman A anon Pfam-B_505 (release 5.5) Family \N 25.00 25.00 36.90 36.90 19.30 18.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.17 0.71 -3.92 12 364 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 203 19 0 284 0 131.10 47 29.16 CHANGED pFYppLYphhuh.Rslphlsl...p.shshost.psshCp....ssphh.sNPpalshpsshss........spFsLsu......CscaLVPhChh...suthhssst.....hhps.thaYs.Do.hhYGasssss......stthDhsCcYL.lcPGsYpAhossh ..F.pplYchhu..+slohVNs...shshpsost.ssu.Cp......s.hLpNPAhhspEsp.s-.hht.E..A.FsLss......Cc.alVs.ChF...stcshsppt.....hhs..QhhYspsstVlhGLsspho......sss.shpCchl.LpPGpYpshSs..t.. 0 0 0 0 +2880 PF00509 Hemagglutinin Haemagglutinin Finn RD anon Pfam-B_26 (release 1.0) Family Hemagglutinin from influenza virus causes membrane fusion of the viral membrane with the host membrane. Fusion occurs after the host cell internalises the virus by endocytosis. The drop of pH causes release of a hydrophobic fusion peptide and a large conformational change leading to membrane fusion. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 550 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.87 0.70 -6.37 15 50634 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 45867 505 5 30865 0 432.00 51 96.33 CHANGED IClGYHusNSocpVcTlhEpsVsVTpupELlEos+ssclCshp.....stpPlcLtcColtGhlLGNPpCDhhLs.psWSYIlERssu.suhCYPGsl.shEELRplluSstpap+lchhscs.sWssshsss....GsosACppsuss.uFaRslsWLsKp.....psssYPshssoYsNTcsp-hLhlWGIHHPsossEQssLYpsussh.loVuTpohspsasPpIGsRPtVs..G...QuGRhcaYWoll+PG-olsFpoNGNLIAPcauaplsspu.u.sIhposlslssCsocCQTstGuINospPFQN.lsphsIGcCPKYVKssSL+LATGhRNVPplp....pRGLFGAIAGFIEGGWpGhIDGWYGF+HpNupGoGhAADpcSTQcAIDpITsKlNslI-KMNppF-sls+EFsclEcRIpsLsc+l-DshhDlWoYNAELLVLLENp+TLDhHDSNVpNLaE+VRRQLR-NAc-tGNGCFElaHKCDspCMESIRNGTYDHscYcEEu+lNRpcIsGVKLcSss.sYcILsIYSslASSLlLsshlhGhlhWsCpNGshRCpICI .................lClGaHAsssss...hVcTlh-cplpVTpuspLlpsppsG+lCs.......shtsLchtsColhshlLGsPpC-sh.s.ppWshlVE+usu..pusCYPhDhsDYppLRplluSs...tphEhhscs.sW.stssps....GsS.uAC.+susp.SFappLhWLs+......psssYPsLshoasNscpp-hLhlWGlHHPs.o..ss-QpsLYtpusuh.loVuTp+hppphhPpIusRP+VR...s.....QsGRIshYWTllcPGDtlhhpusGNLIAPRhsFtlpps..........tSuIhpSDsPlspCso.pC.TPpG...uIsss...hPFQN.lp.lThGtCP+YVKpspL+LAT.GhRNlP.php...........oR.G.......lFGAIAGFIEsGWpGhlsGW.aGapppstpG.u.AAD.................................................................................................................................................................................................................................................. 0 2 4 4 +2881 PF01126 Heme_oxygenase Heme_oxygnease; Heme oxygenase Finn RD, Bateman A anon Prosite Domain \N 20.20 20.20 20.30 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.32 0.71 -4.76 18 1215 2012-10-02 21:56:19 2003-04-07 12:59:11 15 7 902 120 452 1089 282 182.30 20 75.77 CHANGED ssLuppLRcuT+csHshAENssFsKsFLpG.lscpsY+pllusLYalYpAlEcEhccp+spshhuslhFP.ELpRpsuLccDLsaaa.G.ssWcptlpsosAsptYVpRl+clusp...pPcLLlAHuYTRYLGDLSGGQlLK+................................................IAppuhsLsss.pGhuFYcFssls...spptFKppYRptLssl.ls-pppppllcEAspAFthNhplFp- ..............................................................................h..hsttl+ttTpt.Hpth.-........p..hhtt...h.....................hs.h.ptahphltt.ah.......hap....s.......lE.t....th................t........................................................................ht.h.h....s...pL...h...R.pt.t.LtpDlt.hh............h..s..............p.h...........t.........t........................h..........t.......s...............................s.......h..t......t...h......s..t..c..l..c....p...............l......spp...........pst.....hhluahYsh.h.u.sLu.GGp.ll.++................................................................................hhtpt..hs..l...s.t.....p..............t..........u..hp..........ahp...a..s...s...................sttth.hptac...pt...l...s...p..l..s...h...s.t.p.p.p...p.phlppAppuFth.hthht.................................................................................................................. 0 109 241 359 +2882 PF00372 Hemocyanin_M hemocyanin; Hemocyanin, copper containing domain Finn RD, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes arthropod hemocyanins and insect larval storage proteins. 20.80 20.80 21.20 21.10 19.60 20.10 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.72 0.70 -5.11 14 743 2012-10-01 19:27:11 2003-04-07 12:59:11 14 15 199 48 195 807 0 247.20 31 41.35 CHANGED s.aElaPphFssu-sIp+Aaph............chtp.....................................................ph.-cs............hlhcsphosslh..spEp+luYFpEDIGlNsaaaaaHhsYPFaasschaGthK-R+GElaaYhaQQlhARYchERLSNGhscl..FpahcslcpGYhP.hhplssGh.au.RPsshplcshcshc.lp.h-caccpIh-uIppGalps.cGppIsLccscul-hLGsllpu....ss-sssh.Y.atoLcshu+hhluthscPps+aphsPulh-papTSLRDPlFYpha+hIsNlFpc .....................................................................................ElhP.hFhs.sphl.pu.......................................................................................................................................t....................hhl...h..s..ao...s...s...h.....p.Ep.+luYFpE..DlGlNsaa.......aaaHhs.Y.........P....h....h...h..p.....p....h...........s....h.p.K.D......R..+GELF.aY.hHpQllAR...YphERLo.N....sLsclp.h...s.a.h.c..s.l.p..G.YhP..hh.p.h.h.s...G.ht.as..sR......ssh...ph......p......c.....h.............p.......p......h.....p..................l......p....chppacpRlh-AIp.pGa....l....hs.t....sG.....p.p..lsL.p......p..................c........u....I-hLGs.hlcu...........s....Ss.s.pa..Y.....G.s.lashu+hhl.uh.p-.sp..tc...a..................suVhtch....p.TuhRDPh....F.Yphaphlssha........................................................................ 0 61 85 176 +2883 PF03723 Hemocyanin_C hemocyanin_C; Hemocyanin, ig-like domain Finn RD, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes arthropod hemocyanins and insect larval storage proteins. 26.40 26.40 26.40 26.70 22.10 26.30 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.60 0.70 -5.24 163 687 2009-09-11 10:37:18 2003-04-07 12:59:11 9 13 179 48 209 753 0 235.80 31 36.26 CHANGED ssYTc-......-LpFsGVplssVpV.p..........LhTaa-ph-h-losul.shs.........................schslhs+hp+LNHcPFsY..pIpVps......spssst..sVRIFluPKaDph..Gp.hshs-pRhhhlElD+FhhcLpsGpNsI...sRpSp-SslTlscphoaccLhpph.ts.sttt.h...........php.stCGaPp+.hLLPKGpt.pGhsapLFVhlosaptspl.tpt........s.sshshCGht.sppYsDp+PhGaPFDR.l.........spht.a.h....NMthpDVpIhac ...................................................sYoppcLtasG..VplpslpV.sp.............................LhTaacp.-hDlssulshs..............................phslhu+hp+LsHcPFsaplsVp.s......spstpt.......sVRlFluPKhDph..G.p.....h.s...h.p-pRhh.hlElDcFhhp.LpsG..p..N..sI...pRpSp-ssholspchoap.pLhpph.tsh.p.t...h...........ph.ph...stCG....aPp+.hL..LP+G.p..p.Gh...hpLalhlosaptsps..tp......................p.ssh.h.CGht..sphasDp+shGaPFDR.l..........ppht.h.h...sNhhhp-lhIha.................................... 0 59 86 184 +2884 PF03722 Hemocyanin_N hemocyanin_N; Hemocyanin, all-alpha domain Finn RD, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes arthropod hemocyanins and insect larval storage proteins. 21.70 21.70 22.60 21.70 21.50 21.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.50 0.71 -3.97 111 668 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 164 48 193 736 0 113.70 27 17.52 CHANGED shhpKQpplhpLhp+lpps...h.t.chhplupsap.......tp.t.a...pcsch...spp.hhp.h+pshhLs+schFSlappcphcEAhtLaclLatA..KDa-sFh+sAsasRpplNEGhFlYAlslAllHRsDscGl .............................................................t..lh.lh.ph.t............phhthspph.........t....t.h....tp.ph......hpp..hh..hphs.hLs+sphFSla.tcHtcpAh.tLhclhhtu....pDa-sFhpsAsa..uRp+lNpshFlYAlolAllHRsDscsl................. 0 49 75 166 +2885 PF00045 Hemopexin hemopexin; Hemopexin Ponting C, Bateman A, Sonnhammer ELL anon SMART Repeat Hemopexin is a heme-binding protein that transports heme to the liver. Hemopexin-like repeats occur in vitronectin and some matrix metallopeptidases family (matrixins). The HX repeats of some matrixins bind tissue inhibitor of metallopeptidases (TIMPs). 21.30 21.30 21.50 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.30 0.72 -4.22 76 5321 2009-01-15 18:05:59 2003-04-07 12:59:11 14 87 208 96 2483 4600 17 44.70 26 26.74 CHANGED lDAshphp.......schaFF+GspaWchssp......php.shPp.lssh..ulss .................hDAshphpt..........uchaFF..K..G.....s..p..aW.+...assp..........php...s...u...aP...+.lsph...uls......................... 0 348 533 1140 +2886 PF05171 HemS Haemin-degrading HemS.ChuX domain Finn RD, Bateman A anon COG3720 Domain The Yersinia enterocolitica O:8 periplasmic binding-protein- dependent transport system consisted of four proteins: the periplasmic haemin-binding protein HemT, the haemin permease protein HemU, the ATP-binding hydrophilic protein HemV and the haemin-degrading protein HemS (this family). The structure for HemS has been solved and consists of a tandem repeat of this domain. 20.40 20.40 20.50 20.40 20.00 20.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.37 0.71 -4.33 108 1015 2012-10-01 19:49:10 2003-04-07 12:59:11 7 7 499 8 260 1007 60 130.00 33 72.42 CHANGED sloctphltstsu......p...hsppls..ss.........hpplLpthtphG.tlMshstNcuslp.+sGsh..pslp.t.....G........hh.lLss...s..hsL+LhhsphspuasVc+sT......pc.G..hhpSlphFDspGsslhplaspcp....s-hs..uWctLlp..pL .......................................loctphhtshss.........c......h.Ah+lp...ss..........l.tplLps.hppsG...clMshstNc.usVp.psGsh..p.p..l....p...t.......s.....t.......hh....l.lss...p....hsL..+..Lh.sphAps.aslccsT.................sc..G...hhpSlphFDtpGs..t..ltpl..aupcp..........s-hssWcplluph....................... 0 53 127 187 +2887 PF00906 Hepatitis_core Hepatitis core antigen Bateman A anon Pfam-B_8 (release 3.0) Domain The core antigen of hepatitis viruses possesses a carboxyl terminus rich in arginine. On this basis it was predicted that the core antigen would bind DNA [1]. There is some experimental evidence to support this [2]. 22.60 17.00 22.70 22.70 22.50 16.80 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.32 0.71 -4.63 12 7177 2009-01-15 18:05:59 2003-04-07 12:59:11 15 10 91 27 0 5167 0 125.70 89 88.87 CHANGED DIDPYKEFGAoVELLSFLPsDFFPSlRDLl.DTAsAL..Yp-ALpSsEHC..SsHHTAL....RQAlhCWtElhsLuoWVGsNLpDPsuR-LVVsY.........................................VNsshGLKhRQLLWFHlSCLhFGR-TVlEYLVSFGVWIRTPssYRP.NAPILSTLPETsVVRp..Ru.....RoPRRRTPSPR.........RRRSQSPRRR.................RSQS.pSpC .............DIDPYKEFGAoVELLSF.LPSDFFPSlRDLL.DTASAL..YREALESPEHC..SPHHTAL....RQAl.lCWG-LMsLAoWVGsNLpD.huR-hVVsY.........................................VNsphGLKhRQlLWFHlSCLTFG+psVlEYLVShtsh..................................................................................................................... 0 0 0 0 +2888 PF01771 Herpes_alk_exo Herpesvirus alkaline exonuclease Bashton M, Bateman A anon Pfam-B_822 (release 4.2) Family This family includes various alkaline exonucleases from members of the herpesviridae. Alkaline exonuclease appears to have an important role in the replication of herpes simplex virus [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.50 0.70 -6.03 14 226 2012-10-11 20:44:43 2003-04-07 12:59:11 12 6 154 6 18 307 11 373.60 23 77.26 CHANGED s.hsplsshshsp.FLcss.htphhu......pptsthstlRasYhahlhh......phsshhusst............................ssshhtcthttshsst...............................plsplhtts....................pphsscttpplhhhlEttTRGQu-NsLWclLRcGllouSKhhausp.t.phs.+hFps.shpsspasuu......sltFGh+sEpsl+sLlsphhs.......sc.................................tsspshGhLhsPpsGlhGsSLDhhss..s.tpuptshl.hpsssplaEIKCRaKYLFsKs-..DPhhptYtpLhppPsttsLtcFlhSIs+PuVEahssG+lPots-aLlTpDtsWc.ts.p++tshsst+ph...htcplthNptspSpVhlhs......DPstspupIsh...................................................cuthplslFsNPRHsYFaQlLlQphVl.ppYlphpssP....pLso.tshlsouhFRc.......R.tssshsCplssc...tl..sscIPllLIlTPVhlstp.shpchlpputshWp.psspcpasph.sWsssus ...............................................................................................................................................................................................................................................................................................................................................................................................................................hs.t.h.t...pth..hh....hlEptTRuQucs.....tLWplLRhshhTASphp.sst...t............s.....s..t..hhp.........h.p...s....t...hp.ss.............ulhFGhppEs..........hs.+.......s......l.ltphhht................................................tsshsCGhhhsspsGhhGAShDh.hhs........p.s..s...h..t..h.h.aElKCRhKYhhp.....p.h.t...tlhtt.s...hhthl.sh..Pslpah..tt.Pt.t-hLhs.p..h........h.............thh..Nt...S.lhlhs.................t...ttt.h.................................................................................h..shFhNs+H..ahQhhlQ.hlh.t.a.t...............s.hhp....t........R.................................t.tlPhhhllTPl.hs...hh..h.tts...ht.t.ht..............h.................................................................................. 0 14 14 18 +2889 PF04793 Herpes_BBRF1 BRRF1-like protein Waterfield DI, Finn RD anon Pfam-B_6247 (release 7.5) Family Family of herpesvirus proteins including Epstein-barr virus protein BBRF1. 25.00 25.00 86.70 86.40 19.90 19.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.65 0.70 -5.64 10 28 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 21 0 0 22 0 273.10 29 93.11 CHANGED aHYP-lssllpElcslsl.ssshsppppPssp.LERulaLsKlhQlLhQHRppEppIlPplpsNlhYaLspLpchsssctpphlpslLscl..sssDhtLsscLsphlsclLpl+YPsthsl.....sppssshspWCLpahlGlspph+phlsssLsstc.sShPSlpshspLu+pLFhspuhhpEsasDssFNh.hNQlVaWTsslchappClapchls-oIsshpsLLKpEl+uFhpWhcppp......phpshthlcahcahlcplTsssocsclsElhphLpcsK...+sLhs aHYPslspllpphsslsl..ssh.s.tppPhhs.lcRuLaLs+lhQlLhQH+ppEphIlPplpspltYhLptLpchsssch.pphI...tplLshl..sssshsLppcLhphlsclLthcYPpthsl........ssshtpWCLpahl.Glotph+phlpshlsotp.sshPShpshspls+pLFhspsh..EshtcpsFN..hN.slFWTsslphappCIapphlhcpIssh.sLLKtEl+pFhsWhc.pc......phtshshlpahcalhcplTsssoph.hsElhshL+HsK...+sL...... 0 0 0 0 +2891 PF05109 Herpes_BLLF1 Herpes virus major outer envelope glycoprotein (BLLF1) Moxon SJ anon Pfam-B_6348 (release 7.7) Family This family consists of the BLLF1 viral late glycoprotein, also termed gp350/220. It is the most abundantly expressed glycoprotein in the viral envelope of the Herpesviruses and is the major antigen responsible for stimulating the production of neutralising antibodies in vivo [1]. 25.00 25.00 409.60 409.50 21.30 20.90 hmmbuild -o /dev/null HMM SEED 830 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.69 0.70 -13.61 0.70 -6.58 3 270 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 6 1 0 74 0 437.20 62 100.00 CHANGED MEAALLLCQYTlQSVlpLsuDDPGFFNVElLpFPFYP+CcVCTADVNlTIlFcVGscs++L-LsFGs.LTPpTKuIYQPlHAoGGpENATsLFlLELLGAGpMALTMRSpphPINlSc.-EppVSlEAlhVYFpDsFDlhWCH+VpMp-PVYLIPcplPPll..WNcCNSTNITAVVRAQGMDlTVPISLPTpPp-SsFSl+hEMsGNGIDhpCchEss.ISPVLPGsNsFsITCSGDKPHFASGGILTPsSPssTPsPhsGYsYSLpLsPRPVPRFLGNsSpLYlaYohsGPcu-GGDaCIposIsFSDcLPTsQDMPssTQsloYTGsNATYpLsMAsSEs.NTAPNVTVTAFWAWuNsTpoDFKCKWsLsTssQpPsGCER.........huGtFsSNRTFElTVusssssuKTLIIoRsATNsTsssaKVlFoKAPDoTpos.hl.ThsastPppTTulPoosplP.....PsoouPThpstDPTSsTPTGhTS.................SphP-sTSPTSsoTosTPNATSPTsssT..TPNATSPTosKTSs.......TPNsTSPTslVoGsTosATSPPoGsTSs........PNATSPpVuEpSP.sNTs.........TPsVTpsPSVlTssshsuphGsoSSPTSuhouhPpo.....ssTPRsNSTSsTP.LTSsHPTGGcNITEsTPuuPST....sHVSTuSPuPsPGTTSQsuGPGNSSTSscPGts+VTcGhPspNATSPSAPSuQcTsVPTsTssGGKANsTT..KcToGSshtuSTpPsTstGusuTTs...YNATT..PPosSSpLRPRWT.TuPPsTTsQATVPVPPTp+P-aSNLSMLVLQWASLuVLTLLLLLVluDCAFRRsSSplHTYTa..PPYDDu.ETtV ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +2892 PF04633 Herpes_BMRF2 Herpesvirus BMRF2 protein Mifsud W anon Pfam-B_5353 (release 7.5) Family \N 25.00 25.00 190.20 190.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.20 0.70 -5.48 10 24 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 21 0 0 22 0 348.00 30 98.13 CHANGED pschsLshsuFssGsLAAoPFlWCFIFtoLaohshassWpotlah.WuhsssplshlhaCllphsp..hsRtlchlsslslhsshhsFhhpshshuhhshlPsLFllNhlhLslWlhlsh-sVYLCPsIhp+YaELGFLsAhslY.YlllppplaloslFhhPFhlFlshGlhuLpsl+++shYcpGlpRR+uIF.spsuKYhshShppshchssh-LlllssLlsuhssuhluLslaTclhhGlspYlaLFhsGshsCGGlhl.sSphhshVashluslshhLlalhsstlshhsppshlhslhlhsahpAlGCplphIRpKL++slNuPpllLslCsLsNlllslshhllsK ......schsLshtuFshGhLAAoPFlWCFIFtoLaohslaoshpshlah.WuhsssplhhlhaCllshpp..hsRtlchhhhhslhsshhsFhhpshshuhhhshPsLFllNhhhLhlWshlsh-lVYlCPsIhpRYaELGFLsAhhla.YhlltptlalosVFhhPFhlFlshGhhuLtsh+c+slapsGlpRR+uIF.htps+Yhshohppshchssh-lhslhhLlhshssuhlultlhoclhhGlspYhhLFhsGhhsssGlhl.pSphhshVhhhluslhhhllalhtsthsshhppshlhslhlh.hhpAluC.lphlRpKLp+slNuPphhLtlChlsNhllslshhslsK.. 0 0 0 0 +2893 PF04682 Herpes_BTRF1 Herpesvirus BTRF1 protein conserved region Waterfield DI, Finn RD anon Pfam-B_4518 (release 7.5) Family Herpesvirus protein. 25.00 25.00 125.00 124.80 19.60 19.40 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.59 0.70 -5.05 12 30 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 25 0 0 27 0 245.40 37 66.63 CHANGED hshhsshK+VpINEutNpthha-spp.l.phPh...................ssh.s+sIlKp......stsspst.....hhAplFaAhut.hP.plGpsPspsasTlhIMsRSpNSLphlP.shplsslQ+LFLKHVLLtchGLENshpsF.tlasp.lsslospQh.phFcpllppsKp+lEDhVFsLNSIspt.FptsVpssssss.thphAhEKYFLMFPPtD+.psAlpFuusllclICpGsshsclltFLp+YlsIpccsstsNhlKlYALLol .........................hhpshK+VplsEusNphhhh..pp.l.ph.....................pht.s+sIlK+...s.ss..p.tsh.t.t.hAplFaslup.hP.plGpuPsssasolhIMsRSpNSLphlP.shclsslQHLFLKHVLLpphGLENhlpsFpslYss.hsslospph.ctFEpllppsKpplEDhVFCLNoIssppFptsVpssssss.slhpAhEKYFlMFPPps+.psAlpFuAsll-lICpGsslsclltaLsKYhsIp+csstssLlKlYALLol. 0 0 0 0 +2894 PF04929 Herpes_DNAp_acc Herpes DNA replication accessory factor Finn RD anon Pfam-B_5837 (release 7.6) Family Replicative DNA polymerases are capable of polymerising tens of thousands of nucleotides without dissociating from their DNA templates. The high processivity of these polymerases is dependent upon accessory proteins that bind to the catalytic subunit of the polymerase or to the substrate. The Epstein-Barr virus (EBV) BMRF1 protein is an essential component of the viral DNA polymerase and is absolutely required for lytic virus replication [2]. BMRF1 is also a transactivator [2]. This family is predicted to have a UL42 like structure [1]. 25.00 25.00 254.70 254.30 21.80 21.80 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.33 0.70 -5.49 11 24 2009-09-11 06:58:28 2003-04-07 12:59:11 7 1 21 10 0 25 0 391.10 34 98.89 CHANGED shuhplcsssLsptsKlY-HlKuclK.sGlIQ......lsG.sssPsLullSslGssGlLsFclp.sAluphpspp.......phscssolSFRN.puhGsTFlaoRELFGssVcsAsltFY+R....tssupP..pFV+splpYsDslTpTpHTSsl.pstl.Phpcplpsspshu+VlLosKTsshLQKWLRQpKo+t..psV+VslNETLuVll.loVG-soKTlDaKsss..t.sstshsssKstDsGsVpsDssspVul-uLhsALulCKIPGlhlPsh+FauusILEVsGs.lKpup.ssssLSVhLLsssspscssst...sh..........sulsspsppsssssssuPP..............................tsspspopp.sssotsPssshs+s.t...ohpR..KRpsc.........pcc+sKKhKhsFNPLI ..phshplcsssLuphuKlY-HlKup.lK.sGlIQ......lsGss.ssPsLulluslGsuGlLshclp.sAluphhspp.......ptspssuhSFRN.puhGsTFlaoRELFGssVcsssltFY+R....psuupP..pFV+splpYsDsso.pTpHTSsl.pshh.Php-+lcssthhu+VlLssKTushLQKWLRQp+op...psV+VolNEsLuVhs.hTlG-soKTlDaKPhs....ss.tttshsssKstDsGsVpsDssspVul-SLhAALulCKIPGshlPsl+FauusILEVsGs.lpsus.sslcLSVlLhpssspspssus...s.........................sulpppspp..ssssssssPP................................s.tts.psposs.u.sopsPssshp+s..ss..p.p+..KRpucp......pppcc+sKKsKhsFNPLI. 0 0 0 0 +2895 PF01673 Herpes_env Herpesvirus putative major envelope glycoprotein Bashton M, Bateman A anon Pfam-B_1084 (release 4.1) Family This family consists of probable major envelope glycoproteins from members of the herpesviridae including herpes simplex virus, human cytomegalovirus and varicella-zoster virus. Members of the herpesviridae have a dsDNA genome and do not have a RNA stage during there replication. 25.00 25.00 54.20 54.10 21.50 17.90 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -13.16 0.70 -5.94 32 143 2009-09-11 13:51:45 2003-04-07 12:59:11 13 2 85 0 0 123 0 494.60 31 93.89 CHANGED lh...sEllhtuHhhpl.........shhssshtsssss................................................................................ph..h..s.........hsssLslDpsCplCpllshhh+c.s.s.pWhtDYAhLChKCltAPhsssuohlsAhEFlalhcpHF.stptss...hFspphlolhDlphHFFlppCF+...............................tpssshlphuNhoaltpuhhRuhLhsptssshshhphh...........................................s.t.shtthccpscsssCs..............................tp......................................th.tt................thsassLlhhlhuGossh.tts.....................h..lhts+tptlcphhpppt..................................ht.ssps-hshGPlLloslhthpp+spTso.ClLCpllss+sp.hhsL+pL+pcllsaspNNlpLhDpIp.lLsshts......................................hs.hpDtsthhtll+tsGspulaKHhFCDPhCAhNttpTsPpVL..Fspsss...ccLphhKAplAstNpatspl...CptLasLshhFKuaQlhphp...tTslusFl+-hstlL++HslsLls.taTlshYV .........................................hsELLhtuH.hpl....sh.p..t.s.sss..ss...s........................................................................t....-hs.h.ts.........hsssLulDptChlCphlplhh+p....shs.pWltDYuhLChKChtAPhsAhushlsAhEFlalhcpHF.phptss...hFtppsLTlhDlphHFFlppCFc.....................................ppss..stVphsNhohht.ussRAhlhs.sssshsttpth............................................s.tthltsac-pup.hc................................................................................................................sassLlhhhhuGTssh.pss.....................hpths.ts+tttlcthhpspp...............................t.hh..ssps-hshGPlhhoslhthps+spTss.CLLCpLlhspha.hlsLRcL+pcllsYspNNsplhDpIt.Vlssh.st.....................................ts.htssshhhtll+tsGspuIaKHhFCDPhCAhs.hpssPtVLFscsss.....cclphhKAtlAssNhat.uRl...CttlhhLhahFKsaQlhh.p....TtlusFlR-hstlLcpHslsLls.paTlspYV................................................ 0 0 0 0 +2896 PF02480 Herpes_gE Alphaherpesvirus glycoprotein E Mian N, Bateman A anon Pfam-B_849 (release 5.4) Family Glycoprotein E (gE) of Alphaherpesvirus forms a complex with glycoprotein I (gI) (Pfam:PF01688), functioning as an immunoglobulin G (IgG) Fc binding protein. gE is involved in virus spread but is not essential for propagation [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.67 0.70 -6.22 19 334 2012-10-03 02:52:13 2003-04-07 12:59:11 11 15 88 4 23 302 1 298.90 33 76.66 CHANGED Phsssshhscs.................WshL.ssss......t.t.lClp.s.ChtDllls...spC...shchsls.......hAphhh..tshppsssshhh..ssslhsphsssp.....ssLpltsAstssuGlYsLhsp...sssstsptss..hhVtVtstttt....................................hsstPh.....h.hsP..pp+Gsphplpsa+uhlassGDoFplslplpsthaD..ssFshplcWhahcs.sspCs.h+IYEsClaHPptPpCLpPsc.ssCuFuSshhupplus+hYtsCstt....hsscC..sssphctsstlpht...sssssLhhpsAssssoGLYV..........hVlhhNGHlcAWsYsllSTssp.ahNslp-hshPth................ssssssssssstss.stshst.hhhhlsulhusAsllhlsslsshsshtppppRchhp........thsshussYsuLPss-.......h.p.sp.Dspss--phs.tppt..................................sspppGSGap ............................................................................................................................................................................................................................................................................................................................................................ss..th+tsph+h.s.cu.LFuPG-TFsh.splhu.htDp.tsashslsWhhhcs.sspCh.hhlYEsClYHPphPECLpPsD.ssCuhoS..hsphlAhRsYusCS.h......stC....h-AahE.l.................pssssptuGLYl..........hVh...............t.t......tsh.p..hP.h..................s...tst.st.........st.h.hhhhhh.s.hhhhh.hhhh.hhhhhhphtpct..t................................t.hY.h..tl....s..c...................................................................................................................................... 0 15 17 21 +2898 PF01688 Herpes_gI Alphaherpesvirus glycoprotein I Bashton M, Bateman A anon Pfam-B_1222 (release 4.1) Domain This family consists of glycoprotein I form various members of the alphaherpesvirinae these include herpesvirus, varicella-zoster virus and pseudorabies virus. Glycoprotein I (gI) is important during natural infection, mutants lacking gI produce smaller lesions at the site of infection and show reduced neuronal spread [1]. gI forms a heterodimeric complex with gE; this complex displays Fc receptor activity (binds to the Fc region of immunoglobulin) [1]. Glycoproteins are also important in the production of virus-neutralising antibodies and cell mediated immunity [2]. The alphaherpesvirinae have a dsDNA gnome and have no RNA stage during viral replication. 20.30 20.30 20.30 21.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.86 0.71 -5.11 13 161 2012-10-03 02:52:13 2003-04-07 12:59:11 12 1 49 0 0 151 0 161.00 44 44.88 CHANGED splLhsllLhulhls...upulVaRGspVSLhssossss.ulhP...s-sslslhGcLlFlscQhPsss.YsGTlELl+as.s.spCapllpshpYsuCPRlpssAFpuC+ppcshp.s.stsplpsslcstsLLpIspPpssDuGlYhLRVplss.sssuDlFslslhlh ........p.LtuLllluLals.....usuLVhRGsoVSLsosS.lsA.AlsP.............hl--DLhlhGcLhFlGsQlP+oshYsGsVELhHYshu.p+C.RVVaVlohouC............PRpsAsAFshCRsscstH.oPAYssLphslsppsLLRlppsshc.AGlYVLRVhVss.AssAulFsLuhsl.h.... 0 0 0 0 +2899 PF01528 Herpes_glycop Herpesvirus glycoprotein M Bateman A anon Pfam-B_929 (release 4.0) Family The herpesvirus glycoprotein M (gM) is an integral membrane protein predicted to contain 8 transmembrane segments [2]. Glycoprotein M is not essential for viral replication [1]. 25.00 25.00 31.70 31.50 23.20 22.50 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.21 0.70 -5.76 30 163 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 92 0 0 141 0 329.60 26 92.25 CHANGED MpsS+sDphth+hWhhplssaslhalssslssluAsFPsl.GFPCaasslVDYushNho......stNttptlTP.......sLFL-ssphhsYlhhohllhhssslYYlluultlhpt++.tssshsshps...lshlGossslahulLphWTlQlFlhsLSaKhlhLAAFsYslHFhh.shastshlophss.hthtpthpsl.c.sshLcpllhph+slhsNlhhshlulphllhuLohthulGNSFalhlucslhuulslFhlLsllahllsEhhLs+Yl+h.hGaalGsllusshLhhPll+Y-shF.......suplpsslslslullsllslshhllRllRhhhtp+pcpscYhslspsst......phc..hp.................................hp.s.csshhpp-ss ..........spsDphsh+hWshplshhslshlshsshhlhAsh.th.GaPCaassllsYtthNho.......shshhthhsP........LaL-s.phhsahhhshllhhhhhhYhllusltlhhp+c.pshphpthtp...h.hhus.solhhuhlphWhhQlhlhhLua+h..LuuhsYhlHFhhhshassthsoph.s...h.pthhtl.c.pstLcthl..h+slhsNlh.hhluhphhhhulshthhhuNsFhhtssphlhsshshFhllslhhhllhEhlls+Yl+h.hGhalGshhuhshlhhsshcYtshh.......ssshppsltsslullsllslhhhllRhlRhahh++p.+pscahshspsst.....ppht.h.t............................................ppps..s......................... 1 0 0 0 +2900 PF01537 Herpes_glycop_D Herpesvirus glycoprotein D/GG/GX domain Bashton M, Bateman A anon Pfam-B_603 (release 4.0) Domain This domain is found in several Herpes viruses glycoproteins. This is a family includes glycoprotein-D (gD or gIV) which is common to herpes simplex virus types 1 and 2, as well as equine herpes, bovine herpes and Marek's disease virus. Glycoprotein-D has been found on the viral envelope and the plasma membrane of infected cells. and gD immunisation can produce an immune response to bovine herpes virus (BHV-1). This response is stronger than that of the other major glycoproteins gB (gI) and gC (gIII) in BHV-1. Glycoprotein G (gG)is one of the seven external glycoproteins of HSV1 and HSV2. This family also contains the glycoprotein GX, (gX), initially identified in Pseudorabies virus. 19.90 19.90 21.60 26.50 18.60 18.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.91 0.71 -4.17 22 286 2012-10-03 02:52:13 2003-04-07 12:59:11 12 2 62 13 0 265 0 111.50 34 26.10 CHANGED lthst.s.cuCplslLts.sshs.thppt.........asAoluWahhhss.......CthPlhhR-YhsCssst..u..sCsthShshhps..shssauhlsts....LlhsPuhhsuGpYhh.lhls.sphhsuclhl ......h........tsCthllLtsssps..hh.sus.............YsApVsWa+lspu.......CtpPIhlhpYssCpsscs.u..sCs.thThshhss..s.spauhlsss....Lhhsss..suGpahh.ltls.s.hhhsphhl.. 0 0 0 0 +2901 PF02489 Herpes_glycop_H Herpesvirus glycoprotein H Mian N, Bateman A anon Pfam-B_1142 (release 5.4) Family Herpesvirus glycoprotein H (gH) is a virion associated envelope glycoprotein [2]. Complex formation between gH and gL has been demonstrated in both virions and infected cells [1]. 20.00 20.00 20.30 24.80 17.60 19.50 hmmbuild -o /dev/null HMM SEED 657 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.97 0.70 -6.52 30 314 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 106 20 0 262 0 524.70 23 85.33 CHANGED hssphhcpLaspsslsEsLtshhp+hpphapss....sshphsst.s.ssssssspsspsls.spshphssh...h.s.h..lps.......pplhp.ha..pp.shhhps....pslhashcstshthslth....sspsplsstlTpsFhhlolp........sp.pslshlFGpspclsslKu.hshp.shthspsspasLlllsshps.hspt.h.hhp.shtchhhphsphshtthh.pplpshhhth..hpstCp.ts.hs.pphhphhFphslshFhhscsh..ttsphlslspllcphspLphlpchhppCassh...................htshphs.........s..lpphusstltshshpph...tphshtppphhlpthhhs..spph..lssptlptltpllpslYppashsh......L..ssssRchLahhhtllp..................spohpssphthhalltTSMCostElup........hsphhupscshslhchFSPChhSLRhDhop-+Ltp.hspss..................s..phspsssshhshLcshcts.shphhstl.pCls.....scshhl..lPlsshT.YVISsch.lspGhsYsVosshlssshhlosl...pssCphs.ssttssppIPslhNlo...pppC.saCs.SsllpYDEssGlpshhYIsspplpppLhsstsshh.ssN.+sH.....YLlLhsNGTVlElpuhhppc..shshlhlllhhluhhlulalLY+lh .......................................................................h..............h...h.................................................................t....h..........thht.hh....t..hhh.pt.............hshhashth.sh.h.hhhh......tpthhhhhhhs.sFhhlslp........sp...h.hhhs.....h.....hps.h..t....s.h..ts.paplhlls.h.p...ph..hhh..shht.h.p....t..h.h....t.hphhhh.....h....tt.st.........hhphhhthshshFhhspt....tst.hslsphlph.ttlh.ltp.hh.hp.shtth....................h.lh..................p....h.ph.th..hht.s.ht..........h..h.thhhhhhh......tp.......hst.hhp.......htphh.thapphhh.h......L.........ss..RptLhhh.thlh.......................t.tpthpthhlh.ouhCs..chst............hhph.tp.p...l.chaoPCh.uhRhDhs.phhh....h.tts.......................hstts.thhshlp...t...h....p.p...hhthh..sth...pC...........sp.hhh..lsls.p.hs.Yllopph.h.pGhsY.lsss.ltpslhlohl...pssCp..t.p..hp.shplshh..Nhs.....tC..hCt.ushhpYspstsl.shhhlps.p....h....h....ss......hsp...hh.s.sscsH........hLhLh.NGTVlclhsh.hct..shphhhlshhhlushlslhhlath........ 0 0 0 0 +2902 PF02689 Herpes_Helicase Helicase Bashton M, Bateman A anon Pfam-B_607 (release 5.5) Family This family consists of Helicases from the Herpes viruses. Helicases are responsible for the unwinding of DNA and are essential for replication and completion of the viral life cycle. 19.00 19.00 19.00 19.00 18.90 18.90 hmmbuild -o /dev/null HMM SEED 819 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.46 0.70 -6.80 15 968 2012-10-05 12:31:08 2003-04-07 12:59:11 9 38 270 0 294 964 161 251.50 33 46.34 CHANGED schs...........................sphhLNhTStsclc.IVc+I+pLSp...tsspsPp.hsWFcshh-sEsshsh.h.........................................LPFusYLITGTAGAGKSTSIQsLspsLDClITGATsVAAQNLSspLstsh..salsTIFpsFGF+SpHlshp..........Rh.hstsss.soIcclQ+pDLshYWsVlsDIsc+hhpt.................cphtthsshohsuhushscsuhPsLscSNlIVIDEAGlLu+HILTAVVFhaWFaNAhhcTPhYcsGtlPCIVCVGSPTQTDAlpSsFsHppQ+pcV+pu-NlLohLIsNcsLpcYsclscNWAlFINNKRCsDsEFGcLLKsLEYGLPlo-EhhcYVDRFVVPcuhIpNPsphsGWTRLFlSHpEVKuYhupLHspLcls...........tssphslFThPlhshVpscsF--Y...KctlspPuLol-cWLppNhsRLoNYSQFtDQDhusspsEh..........s-pu.....................................hllThclpYV+NSploVTuKsKKslhGFoGTacpFtplLcsDuFl-spuc-psEYAYsFLSsLLauGMYsFYsahhssu.st....phhpcLsplslP.ht.....pshsusss.....................................s-h.D-s...............shLspps...DhFYppYphsPssoohshsElhuhYpslKslFlsRaslhpchFGspFspuPFsTas.sNlsh+sstpFsSs.c.hsGLLuaAsss-oYTLpGYTassVhhhsp-tpp......hhphhhppchPplVl+DppGFlSlLcsNlo+hsEol-s.pplchsossDYGISSKLAMTIsKSQGLSL-KVAlCFuss.pNL+huplYVAhSRsssucaLhMNhNPLRpphE...csshlSpHIlpAL+sssThLVY ......................................................................................................................................hht............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lt.p....h..AMTIsKSQG.SLc+.Vulshsps...shphuphYVAhS..RsTS.p........c.hL.+...h............................................................................................................................................................. 0 106 204 254 +2903 PF03324 Herpes_HEPA Herpesvirus DNA helicase/primase complex associated protein Mifsud W, Fenech M anon Pfam-B_3676 (release 6.5) & Pfam-B_4951 (release 14.0) Family This family includes HSV UL8, EHV-1 54, VZV 52 AND HCMV 102. 21.10 21.10 36.20 34.40 20.30 17.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.96 0.72 -3.95 32 154 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 84 0 0 151 0 93.30 30 12.85 CHANGED hslpDlFpsp-hsltlts........t.lpl+llsPh.sFchlhssspsh.shuLhshaptlaspla.u....shpulhPlasYlsPphh.pGss..a..sl.....hFPGFPhl ............s.huDlFplp-sslh.ps.............t.hslRllhPt.hachhlosspsh..hsLhuhapphassla.u......shsulsPlasYLGP-h..pGssh.a.......sl......hFPGaPsl.. 0 0 0 0 +2904 PF03585 Herpes_ICP4_C Herpesvirus ICP4-like protein C-terminal region Bateman A anon Pfam-B_1422 (release 7.0) Family The immediate-early protein ICP4 (infected-cell polypeptide 4) is required for efficient transcription of early and late viral genes and is thus essential for productive infection. ICP4 is a large phosphoprotein that binds DNA in a sequence specific manner as a homodimer. ICP4 represses transcription from LAT, ICP4 and ORF-P that have high-affinity a ICP4 binding site that spans the transcription initiation site. ICP4 proteins have two highly conserved regions, this family contains the C-terminal region that probably acts as an enhancer for the N-terminal region [1]. 25.00 25.00 34.00 29.80 19.40 19.00 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.49 0.70 -5.76 13 161 2009-09-11 08:11:53 2003-04-07 12:59:11 9 3 37 0 0 135 0 321.00 49 27.44 CHANGED GGFRRhPsGssHTP.........sPucushpAYCsPcslucLsDaPLFPpsWRsALsFDPpALApIAARssussst.....................spthGshsussPLRRRsAWM+.QIsDPEDV+VVVLYsPLPGEcLsssssss...s..ttPpWsspR.GGLShLLAALuNR.LCsP-opAWAGNWoGsPDlSuLsAQGVLLLSTcDLAFAGAVEYLshchuuA+RRLIVlDTVssE-WPpDGPA.lSphHlYlRsslpPsuQCsVRWP...sspcLuRAVLsSuclFGPusFARl.EAAFARLYPsssPLRLCRuuNVRYsVcTRhGscTsVPlsPR-YRptVLPuhDGppchstQstuhshGsPDFl-GpAaSHRAAsRWGLGAPLRPVYLupGR+uutu..uPct..lPtslRsFCtpALLEPDs-ApPlVLpsspuss.............hsssPultWssuhGptsT.l .......uuaRp.P.G..hs..........hPsttshtAYC.sthhhtLh-..hhP..Wp.sL.asPtAhAplAthtss...................................sst.LRthsAWMp.Q..sP-DVpllllYsPLstEcl.s....s..........asst+.GGLShlLAALusR.lChPsotAWAGsWTu...PDlusLst...pGVLlLSTpDLuaAGAVEaLt.hh.usucR+LlVlDsVs.pcW.PtDGPA.lSp...h...HlYlRsshpPsuQss..VRWP...sscsLtRuVhsSucsFGPusFARl.EsAaApLYPst.PLpLCRGGNVtYpVpTRhs..o.VPhsPhtYhthVh.s...ptt............tsDF.tttuhSHRAst+WGLsA.LRPlal..u+.shh...tPt...h...hp.hCttslL.PDstA.PlVl.....st..s......................stl.W..t....................................... 0 0 0 0 +2905 PF03584 Herpes_ICP4_N Herpesvirus ICP4-like protein N-terminal region Bateman A anon Pfam-B_1422 (release 7.0) Family The immediate-early protein ICP4 (infected-cell polypeptide 4) is required for efficient transcription of early and late viral genes and is thus essential for productive infection. ICP4 is a large phosphoprotein that binds DNA in a sequence specific manner as a homodimer. ICP4 represses transcription from LAT, ICP4 and ORF-P that have high-affinity a ICP4 binding site that spans the transcription initiation site. ICP4 proteins have two highly conserved regions, this family contains the N-terminal region that contains sites for DNA binding and homodimerisation [1]. 25.00 25.00 102.60 102.30 22.20 21.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.13 0.71 -4.76 14 132 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 38 0 0 132 0 174.20 51 12.23 CHANGED sssGEsWPsusPPPtGRVhaGusG-pRpGLWDsP-VcpAttRapAusuPsPVaVPEhGDssKQYpALVchlas..sp-s..huWLQssKLousDptLschC......p+hhsus+upGo.......hlTGoVssslPHlGcAMAsscsLWALPHsAAuVAMSRRYD+sQKsFlLpSLRRAYAsMAaPtsus .hssGEPWPGusPPPsGRVhaGssG-oRpGLW-sPplcpAttRacAustPssValPEhGDsucQYctLlchlYs.Pst-s..huWLQNPKLossDtsLsphC.......+l..hssuRupGo.......hlTGSVApslPHlGcAMAsGcshWALPHAAAAVAMSRRYDRsQKsFlLpSLRRAaAshsaPcsus...... 0 0 0 0 +2906 PF03361 Herpes_IE2_3 Herpes virus intermediate/early protein 2/3 Mifsud W, Fenech M anon Pfam-B_2178 (release 6.6) Family These viral sequences are similar to UL117 protein of human and chimpanzee cytomegalovirus, and to intermediate/early proteins 2 and 3 of certain herpes viruses. UL117 is thought to be a glycoprotein that is expressed at early and late times after infection [1]. This region is close to the C-terminus of the protein and may be a transmembrane region [1]. 25.00 25.00 48.90 44.20 18.90 18.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.74 0.71 -4.91 14 91 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 27 0 0 96 0 159.10 35 28.34 CHANGED Phpshs.+plhpcshclC+phpspspsIhhhaTRoppVtpslcsh+pcLhphsNlolSsPFphpHT.PhhHs.cssppsschpppGsppsW-hpcsp.s....HshssRpSDh+ohhIpAATPhDFlsAs+lClphupcaPKplslRlsol.psuht...LPIYssssc. .....Phphssh+plh-Esh+hC+shpspspsIhhlaTRspplcstlcsl+scLhphsNlslSssFhhpHT.shsHsPcsuppTtchpppusptsWshcc.p.s.....+phssRpSDh+sMhlpAATPhDLlsAlpLClslspKaP+plslRlhsh.psuhhh..LPIYpphsc.h................. 0 0 0 0 +2908 PF03363 Herpes_LP Herpesvirus leader protein Mifsud W anon Pfam-B_1664 (release 6.6) Family \N 25.00 25.00 29.70 29.60 21.40 20.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.91 0.71 -11.96 0.71 -4.69 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 7 0 0 46 0 111.30 67 134.18 CHANGED RGD.SEuPGPoRPGPPGlGPEGPhGQLLRR+RssSPTtGs.E.P+RVRRRVhlppE-tsVSGsPotPRGD.SEuPGPoRPGPPGlGPEGPhGQLLRR+RssSPTtGs.E.P+RVRRRVhlppE-tsVSGsPsuPptPh.Q................PsApShREWLuRsspRssspPVsTh.RRRVYIEEEEE. .....hGD.SEssGPsRPGPPGlGPEGPhGQLLRR+RssSPTtGs.E.P+RVRRRVLVQQEEEVVSGSPS.PRGDpSEuPGPoRPGPPGlGPEGPhGQLLRR+RssSPTpGGQE...P+RVRRRVLlppE-.psVSGsPs................................................... 0 0 0 0 +2909 PF03122 Herpes_MCP Herpes virus major capsid protein Mifsud W anon Pfam-B_600 (release 6.5) Family This family represents the major capsid protein (MCP) of herpes viruses. The capsid shell consists of 150 MCP hexamers and 12 MCP pentamers. One pentamer is found at each of the 12 apices of the icosahedral shell, and the hexamers form the edges and 20 faces [1]. 25.00 25.00 40.20 40.10 14.90 14.40 hmmbuild -o /dev/null HMM SEED 1354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.16 0.69 -14.22 0.69 -7.25 32 190 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 98 2 0 182 0 1168.50 38 98.62 CHANGED stls.ssuslLspI+.outctLFcsFchhhG-DspcYslpF-sLLGsYCNslpaV+FLETuLulAClss.cF.DLphMs-G+lQFclphPpIA+uss+sss+.sphhlsKtscK+slossFplustsLplLptshps.T.l-phhplpAlppls+slphslDAhERGhlDphLpVLL+KAPPhhlLpsL.pcshhspRshs+lsRuslluphKp+lhpshFFLs+sp......+phlhphLs-hlsus.ppSVhpstsTaos.ssGc.lsGVllTTssVhppLlshlsp.lsctssssPssYGpaVlsGpNLVTAlshG+sh+sF-phsp+llshtps....tsp.p.........psp.spsplsssllplG-+hVhLEuLc.RVYpsTpssaPLtppl-lTFaFPlGLahP...s+YoT.uuplps.....tssscp..PspsaFhNKDphlhplsapsALtTlCHPphhssssslpuLptt.h....ssstch..............t.phauhthpp..sthslhphhhpFac...s+h.sshsshshcsphos-pFh+PsN..hLthELHPhFDFaht.tssshs............shtAohRlhsGNIP.PLuPssF+-uRGtQlts..shppl..ssuTlphlpsThpDssYPhlhYllEAhIHGsEctFshhhpLlupCIpoYWpsstpLAFlNSFtMltaIssaLGsGtlsc-sauhYRclhuclpsLcpsltchsh.s-slss.pshtthssuLhDssLLPPhlac..hs.ll.csp......Rssplpsssp.hsss...sc.ttlshh.+hpshhsctsshhss.ssss..h.hh.t.........-hhlLpKIaYYsllPAhopG+sCuhGVcacplhhsL...pss......................hss-....hsss-cs.hs............pLhssslpslhpsucls.sssshl.tpLttshhpssppTpslclpsshDsupppussp.shpshpssLaNGhhhhuhscpspthh.psaFYPlPhpthausP.....tlssshssplpshlpchPt.........lPshLuuEYhpahRsPhspYu....spstss.solo.sLhuhahKLSPluhhtQh+ptlHPGFAhTVVRpDcFhsEplLaupRASpShFlGpPpVs++E.+ssulsFcloQshuslDhGLGYoushssAplpslsTDMGspsQsLFtshsstsatss-lssalRptluspp..s.....sshshhshGthpsssssGltHGQhAsCEhIlTPVouDlsYFppPsNPRGRuus..lhusDs.sppsAcphhYDHopsDsAasaRuTsNPWASQ+sSLGDlLYNupa+pp..sssshYSPCtpFFoss-lhppN+sLh+LlsEYss..ussssousT-lQahsssGTcphlEcPCthhQEAaPhLsAScpALLcph..................hsspssp..........uEsHhupYLIc-suPlpthlh ..........hhsssuslLopIchuutcplF-.FphhhuD-sphYslpF-sLLGsYCNplphV+FLchuLulAshss.+F.-LshhspG+l.Fclp.PhIApssshsssp.hp.hhsKhhc++slshsFplustslshLptph.s.T.lsthhplcAlpplhRslpsshDuhERGhhcphLplLLcKAPPh.hlL.sh.pc.h.st+hhs+ltRuslluphKpphhps.FhLs+st......+phl.thLschssus.p.Slhhsh.Tass.spGc.lsGVlVTTsslhppLlphlhp.lpcppsssPsoYGphVlsupNhVTAlshGpshtsh-phsp+llshppp....p.p.......t...tst.ppspltscllplG-+hVhLEuLc.RlYpsTps.hPL.tph-LTFhhPlGLa.P...cpYushsuclt......psssct..PpplaFhNKDphl.plshpsAltTlCHPshhsspssltshppt......ch..................t.p.hthhhtp...s.hshhphh.pFhp...tch.hspsp.hhcsphoscpFhpssN..pLhhELHPhFDFhht.tssph.............th.sT.RlhsGNlP.sLsPssF+-sRGhplt...thpth..t.uTlthlpsohpD.sYPtlhYll-uhIHGsccsFthhhpLlspCIpsYWpspphlAFlNsathlthIssaLGsGtlPppshshYRclluhlphltphltphsl.stplss.ps.tthsphlhD.tLlPPhlac......h.+st......csstl.sssps.ssht..hp.tths...Rhsshhhct..hhss..sst.................-.slhpKIaYYshlPAhops+sCshGlchcplhssl...psh......................lsP-...hhss--s.hs.......................................pLlsshlpshhpsucls.sssshh.hpL.hhhhphsEpTpsl.spus.Dsutppussp.thphhptsLasGhhhhu..ch......tpaFhPlPhpthauss.....shssshsstl+phhpphP..........lPshlutpYashhRpPhspas....tps.ss.pul...sLhuhaaKlSPluhhtQh+stlHPGFAhTVVRpDpF.s-plLaup+ASpuhhlGp.pVs+c-.pshussFplTQshusVDhGlGYoussssAtlppshTDMGstsQsLahshss.shhsspsstalRptssstp.......ssshsh.sFGshpstp.suh.HGQtAsCEhIhTPVosDlsYF+pPsNPRGRAus..hhusD..pptsspthhYDHspsD.uhsatuTsNPWASQchShGDlLYNsta+.p..tssshYSPChpFFsss-lhsps+sL.+llsEhss..uhsshsuso-hQahps.GopphlEpPCthhQEAaPhhsuos.ALLcsh....................hpspstt..........sEsHhupYlIt-suPlpthh....................... 0 0 0 0 +2910 PF04797 Herpes_ORF11 Herpesvirus dUTPase protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_6280 (release 7.5) Family This family of proteins are found in Herpesvirus proteins. This family includes proteins called ORF10 and ORF11 amongst others. However, these proteins seem to be related to other dUTPases Pfam:PF00692 suggesting that these proteins are also dUTPases (Bateman A pers. obs.). 20.10 20.10 20.20 21.30 19.90 20.00 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.85 0.70 -6.01 23 85 2012-10-03 01:18:03 2003-04-07 12:59:11 8 1 32 0 1 104 1 307.40 23 75.33 CHANGED hshshWsssltsshlpl.oNppslpltsspsp......LPhsssh.thLtcphsuFuhssolhshp.hspsshhlhus....pssplclhPtslhcsppslslplpsstps.lstGsLphhllslshlphstlhhhhhsspss..h..sssssptusthsspp.splplsGpsspossps.h.hhhsppshsFhc..........................................................................shs+lpstpstsshlpshphucshh+lslp..p.ptsttlphph.lpsshohh.ps.l..........sFpas..hthshptsp...l..lYsssshhlsPspshpVchsspYtsstssttss..hhlsspsssspahlps...s.WhPtsshplslpNsostPlplpsustluhAlFlh.....ptssssshschhhpptosslphsusl ...........................................................................................Wphpl..shhhh..Nh..h.h..tp.t......LPhs.s....lhpthssashs.sh.t......ht.shhhhhst.....ss.h.lhP.hl.p.pt.l.lhlps.....h..tthhhhlh.lp.h..pth.hhh..............sth.p.hp.hss...s.h.ltG..h.s.....p.tp...hh.s.h...hhp............................................................................sphp.......p.h..........hstthh+htht.....t.s..h.h...htsh........l..........sh..s.....s...st....hhslYscpslsIsPtcotpVhasstYhp.usstphsh..hllstpssps..c..hhlcP...slWhPussstlsVhNsSspslsIsssTplApAlFh...........tt...p.hht..sttl.hst..................... 0 1 1 1 +2911 PF02399 Herpes_ori_bp Origin of replication binding protein Mian N, Bateman A anon Pfam-B_1518 (release 5.4) Family This Pfam family represents the herpesvirus origin of replication binding protein, probably involved in DNA replication. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 824 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.50 0.70 -6.59 13 207 2012-10-05 12:31:08 2003-04-07 12:59:11 10 9 101 0 8 215 11 389.30 22 60.09 CHANGED sYsSSsSLARhLYGsDLh-hlh+s..+P.uVolEpQscuPVsFPsPhssssRsVpVVRAPMGSGKTTALlcWLppsLt.usDtSVLVVSCRRSFTpTLtcRhscuGLs....................................GFsTYhsussYlMsst.a+RLlVQlESLHRVspsLLss..YDlLlLDEVMSTluQLYSPTMp.+LscVDulLhpLLRpCP+IlAMDATlNuQLVDhLutLRG-cslHVlls-YAosGFSpRpChlLcsLGs-sLtuslp.....tt.stpst......tsst...stt....psss.....FFucLptRLtuGcNlClFSSTloFSElsARFCtpF..TcuVLlLNSpcshs.DVssWupYRVVIYTTVVTVGLSFDssHFHSMFAYVKPhptGPDMVSVYQSLGRVRoLhcNElhlYhDuSGARu-PlFTPMLLNHVlussusWPspFsplTNhLCpsF+t+Cssua....ttucshhLFsRFKYKHhFERCTLssluDSlNILHsLLssNplpVph-Gsp...slss...csFssFLpsL+tDAhsup+cl+pLR................tsssshshssplh-s-...sVusFhcKY..LRssVss..p-lhpLlpsLus.PlsRtpFlNlslLc...AC.........htlPsAlcSpcVFp+lYs+YAoGslP.........llu..sGtlEhssLssshNssspW.-La+LCsphAcsLslsstpssss.s........lssssllpshsschsp.htphlLElh+CslT-uphhsccsVptstshLuGttstpht..shSptcH...AlulF+lhWcplFGs+lsKSspTFPGssRVKNL+KpEItuLLDuhslDRous+THRpLYsLLMpp+ppFpp.sRY+LRsPsWuchlp.p..........-h.....LEsuLuclsspsWPpspG ..........................................................................................................h....hh+ushusGKT..h.ht.hhtt...................ph...hhl.......shR.sh.tph.tph.................thh..h.p.........pt......ht.........tplhhplpSl.h+h......t.......................h..-..llllDEl.p.hhtp.h.h.o...hh...p...p.....ht..s.....t.hhh.hltts.pllhhDAshst.hhphh................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2 5 7 +2912 PF03325 Herpes_PAP Herpesvirus polymerase accessory protein Mifsud W anon Pfam-B_3097 (release 6.5) Family The same proteins are also known as polymerase processivity factors. 25.00 25.00 331.80 74.80 18.80 18.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.82 0.70 -4.86 6 52 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 27 2 0 48 0 243.10 52 71.12 CHANGED MsAsDasMEFSSsCVHGQ-lVRESGcSAlRVDLDaSVVuELLKWIuPpTRsKRssK+ssssssTsQIlVHuNPPoIKF.LsssSELEFTAosRVuFH-VKNMRlsVQhKNLaQALsNCAVTKLuCoLRllo-H-shLaluSKNuhFoVENFLTEEPF.RuDstF-Rthtppps.............slsNust.pss...........s-.h............s+Kt-R......ssRp.stt.........-H..ttG.upcsKYE.QHKITSYhVsKsuuuuG........-R..osaFNDsKEESDSEDSVoFEasPNoKKQKCu .....MsAPDaNMEFSSsCVHsQDl.lREotsSAs+VDLDauVVu-Ll+WIuPphRsKRNsKKtsssouTVQIhlHusPPslKF.Ls...ssSELEFTAsNRVuFHtVKNhRlsVQhKNhaQsLhNCAVTKLsCTLRlhT-H-ThLaVuS+Nu.FslENFLoEEPF.RuDs.....F-+..htt.................s.sNusshpss..ssG.D.thhs-.h................s+KH-R......ssRKhsttGs..........tDH..ttG.upcpKY-.QHKITSahsoKGusuuG........-R...usYFNDsKEESDS-DSVoFEas..PNoKKQKCs.... 0 0 0 0 +2913 PF04846 Herpes_pp38 Herpesvirus pp38 phosphoprotein Mifsud W anon Pfam-B_4545 (release 7.6) Family This protein represents a conserved region found in most herpesvirus pp38 phosphoproteins. 21.40 21.40 21.50 21.60 21.20 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.88 0.72 -4.66 5 34 2009-09-11 14:05:08 2003-04-07 12:59:11 8 1 9 0 0 24 0 61.50 59 25.88 CHANGED ENsshRSthllsLlhSAKoLVlGSCMuFhAGsLIG+usclcoospssssLhhAFCAGhlssGl .....ENATMRShMllTLIsSAKSLlLGSCMSFFAGhLVGRouc.V+TPlWDTVsLLMAFCAGllVGGV....... 1 0 0 0 +2914 PF04637 Herpes_pp85 Herpesvirus phosphoprotein 85 (HHV6-7 U14/HCMV UL25) Mifsud W anon Pfam-B_5418 (release 7.5) Family This family includes UL25 proteins from HCMV, as well as U14 proteins from HHV 6 and HHV7. These 85 kD phosphoproteins appear to act as structural antigens, but their precise function is otherwise unknown. 18.40 18.40 19.60 20.60 18.00 17.50 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.56 0.70 -6.20 13 77 2009-09-11 05:49:28 2003-04-07 12:59:11 7 1 26 0 1 71 0 464.20 29 71.87 CHANGED lD-pLNFhPslloscch+alpcsLspu.ssstVuhlNuslPMPsYsLEuLlcshl+pphs+s+slhcslI+lslhlN+YhsupchLcchpsthcshhsssp.hcRLcpuLppLh.pstpssssPhslhhtLscpsloputappsLcpLpclhcshshc.sschcs.pYppLpsaNhLapsPhaTopEAl-lYucNLpcLTpppscs..hcLloshpp..stsh-clLNDlhFLLSltphlhpapcpLptLRsaIhhplssLs-shYhsYsQlP-hRpsahsLsctltshhspsp..s-t.....FpshltsLhphl+plpcAsValsPsYl+ttlhth..hh.p.pps....ts.Dps.-tpp.....................h-.-ssushs.shhhspNshsss-lFRs.Pcss.phhtc..hhcpphopplhhsspssploscphplpphsplhhcuuuppsplsPcphhchh.........t......pst.t..t..husVphpsh.................p..pstp.s....uspssshssspPhusstp.p......tst ................................................................ls-pLsFpsslluspchcalttsLppu.ssstVshlNpslPMPsasL-uLl-shl+pthh+s+slhc.lIphslllNaYhsupchL+chpsthcshhsssp.hcRltptLppl..pstpspsss.slhhhlucts.lTtGtappsLcplcplhcshshptusctps.hYppLpsaNlLFpsPhaToptAlpLYtcNLp..cloppppcs..lcLLoshch..p.s.-csLNDhhFLLolpphlhpapcsLchLRsalhpphpsLs-hlYhsYsQsP.p.hRpsahpLsctlththspsp...D......hpshhtslhpFl+plpptslalCPsYlphslhtl.................h.Dps..........................ht.-.ss..u.s.hhstp.hsssslF+s.Pctspp.htc..hhpp..sh.hhhshpptphsspph.lpphhphhhpusph.stlsPcph.chh..p.hs.s.....................l..hsh.................ps.....ss....tt..t......sshP.p.h......ps................................................................ 2 0 0 1 +2915 PF03326 Herpes_TAF50 Herpesvirus transcription activation factor (transactivator) Mifsud W anon Pfam-B_3658 (release 6.5) Family This family includes EBV BRLF1 and similar ORF 50 proteins from other herpesviruses. 19.70 19.70 19.80 20.10 18.10 19.50 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.51 0.70 -5.55 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 25 0 1 68 0 284.80 34 89.77 CHANGED LFRN.Ks+pRupsuss.ssCAphuppll+alhERllhsTD+hFlsAsCSGVslP.sLApslaclhp-sRsKChuuWRpLussRRslMslucpllssYN.sLcssGhlSsch+AFhKLsFPshsLpplhpPlhthspGthsspshhshpG..up++RsspsshaSthsstspaslP-sLltshs-....sGLlssstsDlSsLLpNPppILp....hs.lupFpssssps..slPpsho.lssssoussPtp...sFtssasuso.........pssptpsostshustuspS.....sossptsplssS.ssts.sppshtuplshsosstPu......phsh.t..pssthP...........Pst.hsttp.p.........hssstlppshtsussppP..................tspPsusupps.sssush.pshss.ssossshpsssphsPshsp.ptpsssshps..............hsusshsspssp...........hsPuttp...........tDsslhshlppshssph-huuPss....hhhsshsp-l.p.sssPshcsspsstpt.-phsslssP ........................................................................................................................................................................................................................................................................pShLsauss-lShlL.pcsush.c...hsctsuPt..supThpP..ulPpspSchcPSPousssustp.spspstspuT.........ssspscssssPpuPpusop.....sossQcsp.sCS..s.ssusspssh..haspQsusshsu......plstsssssshshP...............................Pspspshppsshs................................................................................................................................................................................................................................................................................................ 0 0 0 1 +2916 PF04843 Herpes_teg_N Herpesvirus tegument protein, N-terminal conserved region Mifsud W anon Pfam-B_3992 (release 7.6) Family \N 25.00 25.00 25.10 26.60 22.80 21.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.11 0.71 -4.84 23 211 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 85 0 16 203 0 173.50 32 6.75 CHANGED Ss.sQuc.s+aGstAGsQClSNClhaLpssYhsG....ssllspcsLDslLcpGuclDthhcpssh......hs.spastls-lPshlpos...............hGpsshhhSp.h.Ghlpsps.s.pt.hlt.lchlhpsphpphsp.YhlhhssshutAlllp.ssphalFDPHspspt...ssAAVlsspsss-llsYl ....psQhs.schGPsu.GspClpsSlsFLphsahtG...hss.sLos-slDusLpEGuphspthpptsp.........sshsths-LPshlph................aGcssaa.stPhtG.hhp.Tpsss.pt........Fhttsatt+t.s.Y..sll..Tlssh....uhulhpp....scthalFDPHupsph.......stAhhspsphs-lh.al.......................... 0 8 8 16 +2917 PF04523 Herpes_U30 Herpes virus tegument protein U30 Kerrison ND anon DOMO:DM04370; Family This family is named after the human herpesvirus protein, but has been characterised in cytomegalovirus as UL47. Cytomegalovirus UL47 is a component of the tegument, which is a protein layer surrounding the viral capsid. UL47 co-precipitates with UL48 and UL69 tegument proteins, and the major capsid protein UL86. A UL47-containing complex is thought to be involved in the release of viral DNA from the disassembling virus particle [1]. 25.00 25.00 30.40 70.20 24.40 22.70 hmmbuild -o /dev/null HMM SEED 887 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.43 0.70 -7.09 18 68 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 48 0 0 67 0 893.00 26 92.53 CHANGED LpclhppLpssoss.hspL+hLhclElsslslpsl.ossplppFLNpLsphsG.tahsFIppahVFYLL+tuThsspphsht..stcplhphLpph............pphsssstsssssshLsNpplLsplppalsphp.....................................................................shpshppssssspsltshRsVEEllcthapshaphhhtshhh.pht.sssohL-pWLhhhatpshh.........tsshsslpshAppLlsp.............ctc.lFsshsssstshlshPhAcpputtIaplFsps..spsssstssPlLuhssspLs.plssphhFhYcallEuLhpspsY..........ssspstlEpFl..............................................scshshhssluspl.pssop...ppphohsslpphpttLhphGLoccsCpsatphlhhp....s....tssssshcthsphhphlspLshhuahFatCLtpaSPTulhtpch+.ILcpspuhtssppt.....................phsh.Ws..hhslhphFhspsPppphpphspulo.sthh+shhahshpcpWshhhhshp...........ttssssshsstsssopp-lp+aCctlph.......G-s..sY.-s..sllppPhFsptFlcphllPplptILssphpctRuhhplRWLllauus.cs..PpLhpl++sLshlYhtlh-Ihctst.............................................uuhtslhDhhp-lhstlpphs.........ssh..ohsssLLppLahtpassshpphlss.lpcalscspsllpshhshsplusslspsphphsspstpVplshts....ppshpl..............slssFpsslpslpppsp-hhstlsphspplpshhhcLh.sllpclpplppH.l.....shs..p.shpplpcthhcshp+hpsltsplssosshph.s+pFstLapssLlslthlpclLshs..cppssspshl-uhs.shsptssss.s........LsccslptLpchhsphh....psspsss...sslc+paosshchsthsls ..........hpclhcpL+tpsss.tttLphl.plEluulsspsl.oustlppFLstLsp.sG.haFsFl+ppsVFYLLppuTlpssppsL...sAcplhppLpch............tpttssssstsp..shLsNtp..sLsthpphlpplppl...................................................................shpplppssstspshsNCRsVsELl-hhYpphhtahhp....h.a.hthppssDopLDplLhhtahashh...........pussssLts.Fpphlcp.....................ptc.hhsshssoshsssptPsucphpshSa+lFstsltsp-ssshhaPllusshohLs..hluPcphFFasGllptLLpppsh..............t.hcph..L...............................................cthschhs.plspplhcsss....pp.hsltclhphhptLhphGLsccoCssYtpMlhhp.............s.......sspps.thls-htpplhpllassahFFhCL.hYSPTF.LFhp+++hILEppcuhlhus+p.....................phptlWsplshNlpphFssphsEc-Fpthscuso..phtRpaLY+clppKWGshhFshp...........ttsssssshsshcslTppDlh+hCthlpl.......u-s..sYsolhshspaPtFsshFhphhllPplppIhshs.sphpuhss.RLLhLhths.clLhPpphpLh+sLl.lYshhh.lhchDt.......................................................usF+slhDhlh-lhstlppls..........tssh..o.sscLLspLhspuhspsltsplsshlpchhpssts.lppalcasplChuLscspsphsp..c...shsVhl.lps.........phshpV..............shspFhpplcplhcpscpltpslptlspcltslhhRlt.plhp-spplspa.........stsp.shpphscshp+shp..+lpslpsplstshpp...sp+.NthhhsulhphhtlpclLspctlcppuhp.sls-Ahuhhppppuhsshs.s......hsp-shph..L+chhcshh.....pss.pss.....shhpphosst-pss.p............................................... 0 0 0 0 +2918 PF04541 Herpes_U34 Herpesvirus virion protein U34 Kerrison ND anon DOMO:DM04381; Family The virion proteins in this family include membrane phosphoprotein-like proteins such as UL34, Epstein-Barr and R50, from dsDNA viruses, no RNA stage, Herpesvirales. The family Herpes_BFRF1, Pfam:PF05900, has been merged in. 25.00 25.00 31.60 30.90 21.20 21.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.31 0.70 -5.42 29 132 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 81 0 0 115 0 202.70 29 69.62 CHANGED shtcphhschs......sllpRlhhlss.usLcsscssss...hspuss.shCshphp....sspsaslEYVh+h...WA.csps.....sPhhhlpNTGluVhlpsFhstPtss....hu.slsstcsNVhLpsscosuluLsDlpchKt.h..uhDhcshpuhlhhpsalp...spspLpFhhhGPcsssRspplLcpshttt....tsctptp.cpstsshpspsttsssts.sss ...........t........hh....slspRIhhlss.usL+ss-sp.....hspsss.uhCsh.hp....sspsaslEYlLph...Wt.cssp.....sPhhhhpNTGluVhlpsFhptPhss.....s.shs..stcpNV.LpsscolsloLsDl-clKt.h..shchsshpsplhhpsasp...sphpLpFhhhGP-ssu+hppLLcclhttt.....hsptptt..hppt.s.tsp..........hst...................... 0 0 0 0 +2919 PF04533 Herpes_U44 Herpes virus U44 protein Kerrison ND anon DOMO:DM04376; Family This is a family of proteins from dsDNA beta-herpesvirinae and gamma-herpesvirinae viruses. The function is not known, and the proteins are named variously as U44, BSRF1, UL71, and M71. The family BSRF1 has been merged into this. 25.00 25.00 28.90 28.90 23.60 22.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.38 0.70 -5.07 11 59 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 0 0 49 0 205.90 33 77.15 CHANGED Mu..h.tWh.CCGl..h.FG+ssp.pYcpLs-s...tpss-Rh+tEI-hGLPP.GVplGDllpsppsppoL+QsYLLAV.....QoNpIT-aLcRFDusclPpuCpslVpsQIsKL+ulpslIWNsMlSlAlGslolD-uulpsLLsKpAs-ohuLhEMEKlATAlchD..sossWApEIssllsstssssts......sslhspspsphshs.h..s..................cstph..sst ...............h...................utshs.cYhhLpss...-sp.c-lpshl-.sass.GlssuDLhphs+-s-.slc..aLLtLh....QspphssaLc+ahusp..C.sHs+sssclEspK.pplhpsl-lhhLKLsVGEhsh.s.-.-uLchLL-KFusDQsoLsEhpKlhsLlcMD..ppossah.......................................................hptsstts....................................................... 0 0 0 0 +2920 PF04529 Herpes_U59 Herpesvirus U59 protein Kerrison ND anon DOMO:DM04374; Family The proteins in this family have no known function. Cytomegalovirus UL88 is also a member of this family. 21.10 21.10 71.40 69.30 20.30 19.60 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.39 0.70 -5.62 8 35 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 27 0 0 27 0 390.30 40 96.13 CHANGED PsssWpDuALlhsDGTVtEHchhNusLucLlRRplcs..-s--t....sVauSELulYloGRssRpuSsFSIYWpsHS-llYsLTGlTHCsKIsVECGph.........................................................................usDup..............plY-hP+laLlRspD.ussuPpcVsWstTsshWsp-VsIcsVp..p+sssARshsslh...................P-ltthhhPLLphtsc................................................s-.slFcE+VussYpRL...t.chssss+oppclLp+ClcLAutK+LLLlDGst.LENFFLspVCLYpLGEDs....luEEhlGhLtsRP-c.....uuuuFhLH+cshpsAssLAllLNslac+Q-tLPsl-p........RlDpsD.lssAl+cYYcpalsVphpsLuhApclLstFopphsstsuhshl..sttlslDusVSRpDllpVLRu ........................sstWpDuALlM.sDGTlhEHtFhNssLupLlRR.............hl.s..-t--p..........sVauSELuhasSGRhsRRuSsFSIYWpsHSDLlYALTGITHCsKIsVECGph.........................................................................ssDup..............chY-hPtlYLlRsp..D.ussuPpcVhWstT.sshWspDVsIpsVp..p+hsuARshsssh...................P-lttchhPhlp.hsc................................................s-oshFcp+VuusYpRL.....chsshs+sspclLp+CVpLAuuK+LLLlDssh.L-NaFLspVCLacLsEDp....huEEhlGhLts+s-s....tuuusFhLH+cshpsussLAllLNslacapctLPsl-p........Rl-psD.lssshRcYYcuassVpspslusApclLssao-phsshcuhshl..chhVshDusVoRccLltlLph.. 0 0 0 0 +2921 PF03580 Herpes_UL14 Herpesvirus UL14-like protein Bateman A anon Pfam-B_2982 (release 7.0) Family This is a family of Herpesvirus proteins including UL14. UL14 protein is a minor component of the virion tegument [2] and is expressed late in infection. UL14 protein can influence the intracellular localisation patterns of a number of proteins belonging to the capsid or the DNA encapsidation machinery [1]. 25.00 25.00 68.00 67.80 19.60 19.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.73 0.71 -4.41 12 75 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 36 0 0 47 0 142.40 53 74.68 CHANGED pRRcRhhLAEs....+hRtplYKpRTL-LlptGVsspDPtFltAFTuA+pAct-hctpl+SstRlpslcp+sptIpt+V-pQssh+tlLsspRRaLsscFhcpLDpsEDslh-pE-pLp-A....p..huss.st.ct....hsp---uLLspWhLpps ....p.tthRRRLAEs....HlRAclY+-pTLpLhREGVoTQDPcFVuAFMAAKAAHh-LEA+L+S+ARLEhhRQ+AssV+lRVEEQAARR-hLsuHRRYLcPuLuERLDAs-D+LADQEEpLEEA..sssh...st....t.......ht.tppsLLh.W.Ltt.......... 0 0 0 0 +2922 PF04559 Herpes_UL17 Herpesvirus UL17 protein Kerrison ND anon DOMO:DM04385; Family UL17 protein is required for DNA cleavage and packaging in herpes viruses. It has been shown to associate with immature B-type capsids [1], and is required for the the localisation of capsids and capsid proteins to the intranuclear sites where viral DNA is cleaved and packaged [2]. In the virion, UL17 is a component of the tegument, which is a protein layer surrounding the viral capsid [3]. 25.00 25.00 25.80 25.40 22.10 22.80 hmmbuild -o /dev/null HMM SEED 540 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.81 0.70 -5.92 36 169 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 90 0 0 150 0 507.20 22 95.08 CHANGED M-sHltN-phh........sspsthllHl.llscssLpttslsh.......................spshhspspochpsssts.o..............................Wppl.authh.spsslsshl...........................hGLhlSlPlhs.t.p.....assFshlhLRlsh.....................sst......phh-lhFhYt-Llssttch.tpstph.....................................................................ssss.sssstpsplp..sshp-Ihphhppsst....shst................................................tsshsuhhpssh..................t..t.tpststptsuLEsPuplRGtt..................................t..psts..phhssshsthss.ssshsstWpt..pslslshhpHhhosp.phhVlsYppShshstptph......sslsp.ht.......................tphshhss.h.............shpsEhthlhphtsthhssltpshppp........th..pltQplshpl...........................psppthpplscph.cuhtslhstlsp.......sSullcAsluphstpstt......hhs......................shhLa-htssshGcslc.hspsslshththphh......................ssstthhpphhsGcshta.....lClha.spsLpshlVLPGGFulpuphsh ...................................M-sHlhs-hth........sspsthhlHl.llsppsLtttthsh..............................................ht.hhhps.sphpssstp.s..............................Wpps.Fsthh..s.tltphL..s..........t.h.........p....hGLhhSLslhh...p.....asshsshtLRlsh.........................sst..........phhsl.Fhht-Ll..thch.tcspph...t.t.hth.........ths.t...................................................t.st.ssss.tstlt..p...Elhp..hhppsst............................................................ttshsuhhpsth..................t...tttptsth....hssLEPPuphRhht..................................t.p.ps..chhsts.tt.ts.tst.sp.Wpt..tslplh.hpchtstt.phhVl.YppShhhstchsh......psl.thls........................t.....................shtschthhhphttsshpslhtslppt.........th..pltQhhthpl...........................tpp.hhttlhchh.thhtshhpthst.......poshhpAhhupLstttst.......hs........................shhhh-hhpss.G-sl+.hsps.lshshphpht.....................sststhhhpphhsGc.hta.....lClha.s.pcLpshlVLPGGFuhphpls.................................. 0 0 0 0 +2923 PF04544 Herpes_UL20 Herpesvirus egress protein UL20 Kerrison ND anon DOMO:DM04384; Family UL20 is predicted to be a transmembrane protein with multiple membrane spans. It is involved in the trans-cellular transport of enveloped virions, and is therefore important for viral egress. However, UL20 operates in different cellular compartments and different stages of egress in pseudorabies virus and herpes simplex virus. This is thought to be due to differences in egress pathways between these two viruses [1]. 25.00 25.00 43.10 32.70 22.40 22.10 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.04 0.71 -4.45 16 54 2009-09-11 15:08:44 2003-04-07 12:59:11 7 2 36 0 0 45 0 165.10 32 77.71 CHANGED GssDahloSuhsph.s.tpPsFo+pVllahhSAllL+PlCCllFhhYYhhossthhhlsuhshTshaYh.pLslp.hhhlYtNl+pDcLPLsssQphlluhlssupslsFhssuhptlFtssplFhhlhssptps.......h................shusshlshhushlYusDulsDuluFhLPRhWsRull+ ........GssDhhlSSAhsphss.s.pPsFoppslhahhSsllL+PlCClhhhhYYhhTtphhhhhsshshshsaYh.phhlp.shhlYhNl+pDhLPLusstphhluhhssutslhhhhsAhpthFtsstlFhhlhssptp..h.....h................shussshshhAhhlYusDslsDshsFhLPRhWsRslLp.. 0 0 0 0 +2924 PF01646 Herpes_UL24 Herpes virus protein UL24 Bashton M, Bateman A anon Pfam-B_946 (release 4.1) Family This family consists of various herpes virus proteins; the gene 20 product, U49 protein, UL24 protein and BXRF1. The UL24 gene (product of the 24th ORF) is not essential for virus replication, mutants with lesions in UL24 show a reduced ability to replicate in tissue culture and have reduced thymidine kinase activity as the UL24 gene overlaps with thymidine kinase [1]. 18.90 18.90 22.20 22.20 16.70 16.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.01 0.71 -4.82 16 158 2012-10-11 20:44:43 2003-04-07 12:59:11 11 1 93 0 0 143 0 174.40 32 65.99 CHANGED LphLPttRt+AGpRsHhRhY+pLhphh...shsplttaLs.........h.tPsspp..sclpLaFEVsLGpRlsDClhllpss.....tphhCallELKTChhss....shposo+psQRtpGLpQLpDoschLpphsP..sGspthplsPlLlFhuQRuL+slhlcp..hsspplpsssstLtshltsht-hssptpl ............................l.ptRhpsGhRsHhchY+tlhp.h...shsthsthLs.............h..h..shhpt.s+hpLhaEVsLGpRhPDClslhp.s...........ps...........ttshChllElKTChhsu......shpsso+ptQhspGh+QL+-ohphlpphsP....sGsp.hhhlsPhLlFhsQ+uL+s.hspp..hhspplpsshsslhshLtphp-hsl.h..h....... 0 0 0 0 +2925 PF02760 HIN HIN-200/IF120x domain Bateman A anon Bateman A Family This domain has no know function. It is found in one or two copies per protein, and is found associated with the PAAD/DAPIN domain Pfam:PF02758. 22.70 22.70 22.90 38.00 20.50 22.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.99 0.71 -4.89 10 158 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 24 18 53 201 0 163.60 54 42.91 CHANGED pccPlpVMVLKATcPFpYEosEpth+pMFHATVATcTpFF+VKVFNhsLKEKFltp+lIsIScYacpsGlLEINEASoVSEAsssQshEVPssII+cApcTPKIspLpcQsSGslVYGlFhlpKKpVppKshhYEIpDcTGsM-VVGsGchaNIsCEcGDKLRLFCF+L+ ..ppsPhpVMVLpATcPFpYEs.Eptp+pMFHATVATcopFF+VKVFNhsLK-KFhsK+IIsISsYhppsGhLElpc.sSoVS-ssssQphEVPpslIcpAspTPKIspLppQ.spGohV.GlFhlpKKsV.p.p..c.sshYEIpD..sTG.pM-VVspGc.hpsIpCEEGDKL+LhCFcL..... 0 5 5 10 +2926 PF03369 Herpes_UL3 Herpesvirus UL3 protein Mifsud W anon Pfam-B_2492 (release 6.6) Family \N 25.00 25.00 64.70 64.60 17.40 16.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.20 15 263 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 36 0 0 58 0 62.50 88 73.24 CHANGED VsFDTLFMVSSIDELGRRQLTDTIRKDLRhSLAKFoIACTKTSSFSus.sspp+tRtt...sp+sspSNKSLQMFlLC+RAHAt+VR-QLpuVIpuRKPRKYYTRSoDG+o+PsVPVFVaEFsAs-PVhLHRDNVlts .................................................................................................RKPRKYYTRSSDGRLCPAVPVFVHEFVSSEPMRLHRDNVMLA. 0 0 0 0 +2927 PF02718 Herpes_UL31 Herpesvirus UL31-like protein Mian N, Bateman A anon Pfam-B_1786 (release 5.5) Family This is a family of Herpesvirus proteins including UL31 (Swiss:P10215), UL53 (Swiss:P16794), and the product of ORF 69 in some strains (e.g. Swiss:O36420). The proteins in this family have no known function. 25.00 25.00 56.40 56.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.67 0.70 -5.70 30 120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 80 0 0 108 0 262.80 32 85.44 CHANGED pup............stss+psppppthh........hshctaFshluppP-hElchLRpMssPIssocslsLPasLsp.hsscsCLsLSshGap.shGusCssCpssupsphsp.................-hsulhLAFlpQlsslhpa+sFahSlls..................tu.chl+pslsQPpLFasYalL+ssshcshslha......tsssshltMYllF.pspslHlspchlcpLhsss.ssYplssDlhpssalLsl....phcp.s...........ssslsssslhcKls-LshssElttEap+hhshhsch ......................................ttstttsts........ths.ptp..p.ttht............+.ph+thashhtppPs.Elphl+.hphPIstppslsLPFshpp.psscsCLsLSshG.p.s.tusCssCtsssp.t.st.................p.sshhLAFlpQhsslhcaRsFhhSlht......................sp-lLctshsQPpLFahYalL+sustc.s.lha.......ssputhhMallF.pspslHl.pchIcphLsAs.ssYclshclhpspaVLsVppp.t.ps.o.............ssplsssslhpKls-lshss-lh.cap+hhshhp-............... 0 0 0 0 +2928 PF03581 Herpes_UL33 Herpesvirus UL33-like protein Bateman A anon Pfam-B_1115 (release 7.0) Family This is a family of Herpesvirus proteins including UL33 Swiss:P10217 ,UL51 Swiss:P16792. The proteins in this family are involved in packaging viral DNA. 25.00 25.00 45.60 45.30 20.60 18.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.48 0.72 -4.08 29 92 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 74 0 0 76 0 77.30 38 58.70 CHANGED tlhFEsll.Pp-h-llhPTsDA+LNaLsasp+Luuhlpatpst....................tssCsHuplLppKp-thssllsKhLDlcpILcs ...lhFEshl.Pc.h-llhPohDA+LNalshsp+LAuhlcaspst....................sssCsH.uplLppKpchhsullsKhlDlctILc..... 0 0 0 0 +2930 PF03586 Herpes_UL36 Herpesvirus UL36 tegument protein Bateman A anon Pfam-B_3425 (release 7.0) Family The UL36 open reading frame (ORF) encodes the largest herpes simplex virus type 1 (HSV-1) protein, a 270-kDa polypeptide designated VP1/2, which is also a component of the virion tegument. A null mutation in the UL36 gene of herpes simplex virus type 1 results in accumulation of unenveloped DNA-filled capsids in the cytoplasm of infected cells [1]. This family only covers a small central part of this large protein. 20.50 20.50 20.60 21.10 19.80 20.40 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.59 0.70 -5.29 11 176 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 33 0 1 127 0 213.20 54 10.60 CHANGED tpc-tWtuslcAsLccsEs+ucFDAsElsRLc-hAsstGYcst...ch+ppAEpslsApApsspsAl-uVhuFNPYuspNpt......pshhPPlusL+sloWhDuFssAAPhYspLF.Glss-sLhpLh+IutulLctA.sAssGp..lDYapsVspluuDLttlPpLsKYVDFYp+GascF.shhu+LsphRu-shpAsGshshElutAhEplstlR.sPpsA++sL-tGVplhlPStsslhshsstLcc.DhspFcsTAYtEhh ..............................h...ptWhtsl.AsL.thEstthFsusELsRLRDhAAsuGaDh+...slhspAcQVVAAsts..TusoALDTVF+aNPYTPENss........lsPPLAhL+ulTWhDsFulsAPsaTsMF.GVslEGLhhLhRIpuslLhoA-solsGh..ssYhthlhchutsLhtlPtLttaVsFa.puatpa.t..stlpth+tchh.h.sth.h-hthA.Ephth.+.s.tsAt.hlctG..l..su..hlhth.t.hpp.c.p.h.tTAYtc............... 0 1 1 1 +2931 PF03277 Herpes_UL4 Herpesvirus UL4 family Mifsud W anon Pfam-B_4461 (release 6.5) Family \N 25.00 25.00 50.20 50.10 18.70 17.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.06 0.71 -5.06 15 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 35 0 0 68 0 184.80 44 88.16 CHANGED sThIAYoLpsl+sssshslPchEQllCuh-uGoRulsVGscsRCDpLPsGsllIQHsPlGTLlsVDsts-FCSYthhh....ttppstshpshssshhVhPFsoWsssupspplpSsouGlLTl.hhsssolaITlTlYGps...st.sssh.h.spssspssssshst..ts...............................................tDlLspsl+Euclps ..pThIAYSLpps+uShs.sLPDstQVVpsFEhGTRuIhV+GcpRpD+LPpGsVVIQHTPlGhLlllDCpsEFCuYtFhs....pcpppph.puh-uphaAhPFsSWVuSuRscssRSsouGlLTV.lWsscoIYIThTIYGssspt.sssssps.stssss.spsssss..sup.....h.hQss.........................................tDLLsElLREhpLps............................. 0 0 0 0 +2932 PF05072 Herpes_UL43 Herpesvirus UL43 protein Moxon SJ anon Pfam-B_5928 (release 7.7) Family UL43 genes are expressed with true-late (gamma2) kinetics and have been identified as a virion tegument component [1]. 24.90 24.90 38.40 34.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.16 0.70 -5.71 9 65 2009-09-11 05:23:58 2003-04-07 12:59:11 8 2 29 0 0 61 0 353.40 33 91.23 CHANGED GsstssptChtChhssspulhthuhpsulhsuulhll.hhppshAhsshhhssIsshhlsh.hRhshphh-hlshlG+shQhlsshsuulsWslu........stsh.ts.phslssuhhshslhus.lt.apaVshAsusshpa+suhLshssGsllGloshhhslps.shhuhshslls...hssspD....sussLpsTCaY+hsRttslps.scLG+....uhhssss-ssttcEcs.sutsshcsphsh...lhlshVhhlssPhlhslp+hh..ttusphpsstsshhsshsGalluhulp.Lslh.s.pcsLhpsllhhashhtshulsLsshGh.hGsslhLAuusuhuhhsslslRppspshp.....+LAAuplsKslhsslh.....sshhlC ........................Gpsssspsp..lsCshshspuhhtlhlpuAslshshsll.h.ptshA.ssssshhslushhLuh.hRss.shscshsplhthlphhuuhsAhlhWslu.............hshs.ts.phulssuhhshlVhAs.lashahl....ussshFhsshhhlsuGhhlGsSAthh.ls.s.uuhuluhulls...hssspD....As.shccsChhhtsctss.h+sspD.tR....u..ssss..s..ss.........sp+ppsssusplh.sthst...lhlslVshlussslhshsthh..upusshsshshhstshluGHlssuhsp.Ls.shs.shcLocsllhlHsslplhslsLshts.tl.ulhhhLuuAshluLspslslR+Rh+ttc.....+LAAo.hsRuLahslY.....luhhlC............. 0 0 0 0 +2933 PF03387 Herpes_UL46 Herpesvirus UL46 protein Mifsud W anon Pfam-B_2545 (release 6.6) Family \N 25.00 25.00 101.70 101.50 15.50 15.20 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.31 0.70 -6.27 11 70 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 33 0 0 71 0 437.90 44 65.51 CHANGED suhpshS.pslhpcRhctGCL.........LPsPpslhsuAVtALc-ts-slhPssLhuspRpssLhsh+sNpVPESlIlsshusDsps-Yh+pYsushppsLscttLotsslhRslhspYW+YLp..tooGs-lsssstsssss....p.osllLhhsshs.K.Lu+pPFKpcsssusYtsshstL+-AhctlQ+YMYYMRPsDPhssSsDTslRLpElLAYssThYpWhlWhhDslDupVlRpht.h.phstGP.RsshsP-slFtRHLcsGPulsoGo.ussh.............hLssssuslLusLl+luslWppspW+usspG.sssAIVAAVELlollHHHhQYLlNhshsGYssWlcGGlpssaLpuALRuQpRFp+hhG.............pLhPTMospSWushEpuspsWFchAlA+Sllsa...GsPTtaYpslLpsl.........ssP.psphsspsss............stssts.hpstputPPsssu ..................s....t.s.tGlsERRlhsGCL.........LPTPpslLuAAVuAL+p+oD-hpPuhLpss-RustLuuppH.NsVPESLIVcslAuDs+hEYlR+YuuAAppsLu-scLouutlpRulLspYWKYLp..ssSGl-VP-cssscscs......SltlLLpPTlusKhLuRsPFKstussApYsAslAsLRDAl+tlQpYMaFMRPsDPopPSsDTulRLpELLAYVusLY+WA.Whl.TsDt+VC++Ls.ssR+hhsh.tuspuPs-hFuRHL-pGPosooGS..hpsh.............sLpAulucVLupLpRLusLWpsuchpuGTaG.sscslVusVEllSlVHHHsQYIINhTLsGYssWussuLsNpYLRAAlcuQcRFs+hsu.............sLFPTMousSWAcMEhSl+uWFstALAtsLLpp....GsPoh..HYcslLchl........uSp.sphptussPss....................................st............................................. 0 0 0 0 +2934 PF03362 Herpes_UL47 Herpesvirus UL47 protein Mifsud W anon Pfam-B_2182 (release 6.6) Family \N 25.00 25.00 163.40 101.80 18.80 18.30 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.28 0.70 -6.16 13 117 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 34 0 0 101 0 414.00 36 60.55 CHANGED cshcEcphhctpss.hs...hstW.u..sssastGsMYsusPs..csthsa+RshpQutALhh+lstsuLpsulssp.thostcAlhFLlDAslRlAtNsahsupp..................hhtthphhoslp.lPtusus.lLpssshpsP..............tpGPpAAlhRuuhGuLsYWPELRssLsc.schhlRYAtsth.hAEsaLLARhp.utpsuhsspEhc....hLushlTLhsllActslpaLhsusuthLp.scssctshptsptststptlPlsS.tLhsAEstsLushsusph.htssuLutshssuYhAlR....oAhTsLhh-au.t.....sptptcsscthssAhLussllLQRLLGHhNhlLspLstAAhhGGpsl.sVhptThtcYphLhpsssPLYp.sols-Fhc-R--AMcpLcLcs......ssu.sPhsuhchl....lp.tht.ssL-slh....shs.ssshshLGshVsls-hltca+chlhuc ........h.h..Ep.hh......ht....stW.s..t...s.GtMahtt.s..s.hsh.hpsltQupAlhaphhhsshhsthspp..ho.spuhuFLhD.AslRluhNshhhupp.................pshthhp.hhh.stLtslsttuss.lLpssthshP..............ppGPpsAlhRu.hGuLhYWPtlRhhlscssp.hsRYAstth.lA-hhLhuRhp.shpsphsspEtt....hLuphhslhsshutthlpWlphssuhhLt.sh.s+sAahsVstp..ahhlPlsSshLssAEstlLGclsssss.hts.ALssshhsuY.AlR....TAhoshhlcaA.......cstcpspschhspAhLussLlLQRlLGHANhlLshLstAAhhGGhsh..hlhpso.ctYspLhhAssPLYsppThscFW+DhcsAhcplslcP......sos.sP+sshRhl....Ic.shhh.sL-sh.....Pt..........Pp.Vcls-.h.paRp.lhG........................ 0 0 0 0 +2935 PF04823 Herpes_UL49_2 Herpes_UL49; Herpesvirus UL49 tegument protein Mifsud W anon Pfam-B_3850 (release 7.6) Family \N 25.00 25.00 27.60 112.50 19.50 18.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.11 0.72 -4.02 13 97 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 34 0 0 91 0 96.10 58 33.54 CHANGED sRAsPGsp...slstuK.luFSssPsosousWpusThuaN++lFCtAVutVAttHAptAAtuLWDhssPRoNE-L-chLptAsI+ITVsEGhsLlptAN ..........s.........tuhu++LpFSTAPsoPoAPWsspssuFNKRVFCAAVGRlAAhHARhAAlpLWDMspP+TDEDLs-LLshssIRlTVCEGpNLLQcAN.. 0 0 0 0 +2936 PF04540 Herpes_UL51 Herpesvirus UL51 protein Kerrison ND anon DOMO:DM04380; Family UL51 protein is a virion protein. In pseudorabies virus, UL51 (Swiss:Q85227) was identified as a component of the capsid [1]. In herpes simplex virus type 1 there is evidence for post-translational modification of UL51 [2]. 25.00 25.00 33.60 33.50 21.10 19.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.66 16 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 33 0 0 50 0 160.80 50 66.65 CHANGED lCGhtstscc..pYE.lpsss..sss.s.hRLpEAlssVNsLLPAPlTlEDsltSADssRRLV+ApuLARTYpAC.......RNLECLu+HpsutsssuL-AVVpsHhtsspRlADTChAuLhphYhSVGAs-tsTDshV-QAIRhsAEo-lVMuDVAllE+ALGlsupssssu .........lCGhttpspc...pYE.lpsus..ssstu.hRLpEALssVNuLLPAPlTLEDsltSADsTRRLV+ApuLARTYpAC.......+NLECLuRHpsut-sPsLDAVVtsHtpss+RLADTChAulhphYhSVGAsDtoTDshV-QAIRhsAEo-VVMsDVAllERALGLsst.t...sh... 0 0 0 0 +2937 PF04537 Herpes_UL55 Herpesvirus UL55 protein Kerrison ND anon DOMO:DM04378; Family In infected cells, UL55 is associated with the nuclear matrix, and found adjacent to compartments containing the capsid protein ICP35. UL55 was not detected in assembled virions. It is thought that UL55 may play a role in virion assembly or maturation [1]. 25.00 25.00 26.40 25.40 17.80 17.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.48 0.71 -4.86 7 43 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 31 0 0 35 0 164.90 38 89.39 CHANGED ttsh.sshp.stlsslhpVlsPLsl-soapSsp.spcpPsssshhlssRoYhlRAsCppssclHAFFhGLupcss.shs..hsslpshsplhNpp.hhpchtshp...phCcuPFStATlhDsl-ss.....shsIpGlsaHCHCcs.FSh-CWtuA.tAh-+lsshs+shpshst ........htss..sshhhphPslopVssPhsLssoWpupp.s.......hcssps.......ssuslssRoYllRAsCsossslHsFFaulhc-tstphs..hs-LcsFscllNp..llpELtsc+.stthCssPFSsuTIhDssssut..........phsIsGlsYHCHC+sPFSh-CWpuAsuAhp+ltSlupuhtusp.t........ 0 0 0 0 +2938 PF04534 Herpes_UL56 Herpesvirus UL56 protein Kerrison ND anon DOMO:DM04377; Family In herpes simplex virus type 2, UL56 is thought to be a tail-anchored type II membrane protein involved in vesicular trafficking. The C terminal hydrophobic region is required for association with the cytoplasmic membrane, and the N terminal proline-rich region is important for the translocation of UL56 to the Golgi apparatus and cytoplasmic vesicles [1]. 25.00 25.00 32.70 29.00 20.80 19.60 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.34 0.71 -4.60 3 23 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 0 0 22 0 193.30 71 84.13 CHANGED MASEAAQPDAGLWSAGcAFADPPPPYDSLSGRNEGpFVVIDlDTPTDoPPPYSAGsoLlGPlsPsSSG-GEssERGRSRpAA.RAARRARRRAERRARRRSFGPGGL.luTPLFLPEThluAPPDVsuDLlSGLPTYAEAsS........DHPPTYATVsAA........RoTEQPuGuluPsDQPRoQsSGsWRPPpVNSRELYRAQ..............RAARsu ...............MASEAAQPDAGLWSAGNAFADPPPPYDSLSGRNEGPFVVIDLDTPTDPPPPYSAGPLsSVPIPPTSSGEGEASERGRSRQAAQRAARRARRRAERRAQRRSFGPGGL.LATPLFLPE.TRLVAPPDITRDLLSGLPTYAEAMS........DHPPTYATVVAV........RSTEQPSGALAPDDQRR.T.QNSGAWRPPRVNSRELYRAQ..............RAARGS.... 0 0 0 0 +2939 PF01763 Herpes_UL6 Herpesvirus UL6 like Bashton M, Bateman A anon Pfam-B_878 (release 4.2) Family This family consists of various proteins from the herpesviridae that are similar to herpes simplex virus type I UL6 virion protein. UL6 is essential for cleavage and packaging of the viral genome [1]. 22.00 22.00 22.20 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.70 0.70 -6.33 30 194 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 91 0 1 167 0 449.10 31 82.83 CHANGED htahEILpGchuYscGQslasulRsssshhRQl.sslh+ssLsussY--ltsDWppHhss.......lspRastppptps.thtcpsFpoWppTL+hoLhchlpshls.hlastssss....Ys+YlDWlsslGlVPll+p.....s.......tt.hhpphppthppstppp.ssp.+lhssllppstphlpplhpslsultIs-asclpIhashpppphhsh..hssc+hcshVlhpPlhts.spllFDSPlQRLatElhpCasLpEHAKlCQLLNTuPlKlLlGp+scs......ssp+ll-+l...-ppspsusAtpcLl+Lllslps.+plusITDsV-saLp-sossllDpsplhssststh..........................spssppul+cpVssslh+sLEs.INp.FcTIcsL+ptNcshhp+lpphEspLp+hppc............spsssss..................sspl.s.sshpulptl...hppslh.sosshs-sphVANSFhSQYlPsac-phccLopLWEpElhRsFKLs+lssNQGpElulsYSssoIolLLuPahaslLcltpls.LlscphshhS.pElssslacpSRLpsYlsDlut+a.....s ..............hhFhclLpGphGYspGQslapslRsspshh+Ql.hslhpthLsusshcclhs-Wppahp........h..+ht.ttt.t..thtpphapsWttoL+pollshltsllt..has.st.s....as+YlDWlsslGlVPlhch..................ht.h...h.h..tt......thssphhtpsh.hlhplspshpustlhsascsplahphpptph.sh..hpspchchhVhh.Plhh.....t.ttllFsoPlt+lh.ElhtpptLpcHt+lCpLlNThPlKslhsp+pp.......thtchlphh...pppsptssAtppll+hllNhps.+phhslpDoVcualp-hsspllD.s...hssp.s............................ts.thppsh+s.hhppl.thLEt.lpp.hppIppL+phNtth.pplpphcttLp+httt............t.t..t........................ph.p.sh.tshphh....t.tl..hsh.hs-sthlsNSF.upalPshtp..ccLopLWEpEhhRsF+ltphhssQGtE.ul.YSs.slthhlhPah..llph.pht..lstt.h.hu.tElhtslacpo+hphYlp.lt.h.................................. 0 1 1 1 +2940 PF01677 Herpes_UL7 Herpesvirus UL7 like Bashton M, Bateman A anon Pfam-B_1086 (release 4.1) Family This family consists of various functionally undefined proteins from the herpesviridae and UL7 from bovine herpes virus [1,2]. UL7 is not essential for virus replication in cell culture, and is found localised in the cytoplasm of infected cells accumulated around the nucleus but could not be detected in purified virions [1]. Members of the herpesviridae have a dsDNA genome and do not have a RNA stage during there replication. 25.00 25.00 58.00 57.70 18.10 16.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.38 0.70 -5.01 33 122 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 82 0 0 109 0 220.00 27 77.39 CHANGED hshEV+cs.sshslthsssslsshsV....tttlhh.h.t...sshhshcsYhppphspssFtGFshsslsssEDtVpslslsPhllpcRhsla+PpshhcFpLCsll.hLENh..tssosshhhplhshLchlts+.ss.sphsphLhpusphLlsTlhhhath..tshcsphllsphshh+h..Lhp.tpssshsllpsla....t...hpshpLstsspps........hhhcstsuh..hNthass ......hshEV+ps.sshtsthsusslsshsV....stplhhhhps...sphlsscpYhppshspsuFtGFshsslsssEDtVpslslsPhlLpaRhsla+Pcshh-FpLCsLlhhLENh..tpsosshhhplhsaLphsts+.ss.pphpphLhpusphLlsThhahhth...shcsphllsph.hhch..Lht..tsss.sllpsla....t.s.tsshpLstsstps...............tthhps.suh..hs.hh..s............... 0 0 0 0 +2941 PF03554 Herpes_UL73 gpUL73; UL73 viral envelope glycoprotein Finn RD anon Pfam-B_3001 (release 7.0) Family This family groups together the viral proteins BLRF1, U46, 53, and UL73. The UL73-like envelope glycoproteins, which associates in a high molecular mass complex with its counterpart, gM, induce neutralising antibody responses in the host. These glycoprotein are highly polymorphic, particularly in the N-terminal region [1]. 19.60 19.60 20.20 20.00 17.90 19.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.95 0.72 -4.16 16 253 2012-10-01 21:33:21 2003-04-07 12:59:11 8 2 54 0 0 165 2 89.80 51 77.14 CHANGED hhshsstshsstssspspssssssspspssFYshsCsADTYt.oLsSFSSIWsllNslllhsAsslaLpYhCFp+Flsshs+ ...........................................hso.hooT.STTSTKsoSTTHDPN.VM+tH.spsDFYcAHCTSHMYELSLSSFAAWWTMLNALILMGAFCIhLRahCFpsFsspTh....... 0 0 0 0 +2942 PF01802 Herpes_V23 Herpesvirus VP23 like capsid protein Bashton M, Bateman A anon Pfam-B_1435 (release 4.2) Family This family consist of various capsid proteins from members of the herpesviridae. The capsid protein VP23 in herpes simplex virus forms a triplex together with VP19C these fit between and link together adjacent capsomers as formed by VP5 and VP26 [1]. VP3 along with the scaffolding proteins helps to form normal capsids by defining the curvature of the shell and size of the particle [1]. 25.00 25.00 26.60 26.60 23.70 23.60 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.63 0.70 -5.38 32 501 2009-09-11 06:10:29 2003-04-07 12:59:11 12 1 91 0 0 208 0 163.90 62 98.31 CHANGED tplhlslss+Loss-lupLQcphGpllslssh++hhslpsluLpshh.tssssDalplhshh+cphhAllpcVpsspllhshlshG....psht.l+NT.uP.FphssGDtlsllPPlFst.pssl.pLpSssh-LlFPhsVPpsLApEllt+llshslhuhststp..tsshsch...hpslpYpG+pasLs.shpptss.s.ssl+sLslshshhss.ustllhsllss.Lshpspc.hlsthhplhsspp.................hphhc.sshsh.pDhsR...............lsuahohhppLuslhshpshhpVssas.ssspssss......h ........................................................................................s.....sht.lpNs.uP.hphppss.lsllPPhFt....t........sp.......h.hL-SNGFDLVFPMVVPQQLGHAILQQLLVYHIYSKISAGAPsDVNMAELDLYTTNVSFMGRpapls........................................................................................................................................................... 0 0 0 0 +2943 PF03327 Herpes_VP19C Herpesvirus capsid shell protein VP19C Mifsud W anon Pfam-B_3451 (release 6.5) Family \N 25.00 25.00 30.10 53.50 18.50 18.10 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.55 0.70 -5.52 31 121 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 83 0 0 106 0 283.70 27 73.81 CHANGED shsp..shhschhhttttlsptp.shus.hh.hhpsss...suhcslllSlshlshs....h.st..ssusul+shltshYssp...upLtphssslpthlpsp.a.HchlpslGsllpslspshlsplTsVh+Gsshstpsspht.....slh.......lPs-hFlDlDt.h.....................ttspsuhphlYlshlYspph.pcttspla.hhpSttscpslhshLchhaushRtpph.......................................................slpspsshsphhaGAhs+LGhhssssshp.....psphphpussLPVVplpshhsc.hGsWp.h ..............................tpp.stchh.shtpltptpshus.....tpsss...susRshllSlsFLshs....h.sp..suusAlRstltu.Ysss.htscLschstsLpshlcs+sa.HchlphhGsLlphlopstlsplTsVspGsp.uscsspss....sslh.......lPushalDLDtph.....................thsssustalYLlhs.Y.ppct.scptsplY.lhpSp.hs.psltssLcthFuchRhspshp.....................................................clpu.psstpsssauAhscLGhhsps.sshp....+sphh.tusslPVVhl-shshc.sG.sWpth........................... 0 0 0 0 +2944 PF01521 Fe-S_biosyn HesB-like;HesB; Iron-sulphur cluster biosynthesis Bashton M, Bateman A, Wood V, Mistry J, Eberhardt R anon Pfam-B_518 (release 4.0) Family This family is involved in iron-sulphur cluster biosynthesis [3]. Its members include proteins that are involved in nitrogen fixation such as the HesB and HesB-like proteins [1] [2]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.35 0.71 -4.13 98 8207 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 3441 17 2003 4486 4099 102.60 30 78.59 CHANGED Mplsl..TcsAtpplpphhss.ppt.........lhLshcsGs........th.Cuh.uspaplhhls..cs......ssh-hhlc.ssshslhlcs..hstsaltpshslDas.thtshsLp.ssss.hlssssslsc ........................................lpl..T-.uA.sp..+lppllsp.psps...........................tlR.lhVpsuG............Cu..GhsYshshs.c...cs...................................spsD.h..s....h.......E.....p.....p..........G......l...p...l......h..lDt..hSh.aL...s........G..s.p.lDa.s.c.s.h.h.sp.t.Fp......hpN.P.Nu.p.s..p.CGCG........................................ 0 581 1176 1615 +2945 PF02444 HEV_ORF1 HEV_ORF2; Hepatitis E virus ORF-2 (Putative capsid protein) Mian N, Bateman A anon Pfam-B_1896 (release 5.4) Family The Hepatitis E virus (HEV) genome is a single-stranded, positive-sense RNA molecule of approximately 7.5 kb [2]. Three open reading frames (ORF) were identified within the HEV genome: ORF1 encodes non-structural proteins, ORF2 encodes the putative structural protein(s) [1], and ORF3 encodes a protein of unknown function. ORF2 contains a consensus signal peptide sequence at its amino terminus and a capsid-like region with a high content of basic amino acids similar to that seen with other virus capsid proteins [1]. 19.70 19.70 21.60 21.60 18.30 18.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.68 0.71 -3.88 3 266 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 12 0 0 229 0 81.00 74 96.04 CHANGED MGS.PCALGLFCCCSSCFCLCCPRHRPVSRLAAVVGGAAAVPAVVSGVTGLILSPSQSPIFIQPTPSPPMSPLRPGLDLAFANpPuHLAPLGVTRPSAPPLPPVVDLPQLGLRR .Mt..PCALGLFChCSSCFCLCCPRHRPsSRLAsssGGAAAVPAVVSGVTGLILSPS.SPIFIQPTPS..h...pPGLELALsspPs..AP.G................................. 3 0 0 0 +2946 PF02455 Hex_IIIa Hexon-associated protein (IIIa) Mian N, Bateman A anon Pfam-B_2076 (release 5.4) Family The major capsid protein of the adenovirus strain is also known as a hexon. This is a family of hexon-associated proteins (protein IIIa). 25.00 25.00 47.90 47.90 19.00 19.00 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.58 0.70 -5.81 13 134 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 100 1 0 130 0 481.10 62 87.16 CHANGED KVLsIVNuLl-spAIRsDEuGtlYsALLpRVu+YNSsNVQoNLD+LlsDV+pulupp...RhhcsssLGShsALNuFLuoLPusVpRGQENYsuFluALRLhVsEsPp.oEVYpSGPsaalQsuRpG.lpTVNLopAFcNLpsLWGVpssstc.psslSSLLTPNTRLLLLLlAPFT-usolS+DSYLGaLlTLYREslusu.clDEcThpEIspVuRALGs-.DsusLpuTLNFLLTNRpp+l.PssaoLosEEEpILRaVQpuluLhlhp-sts.sosALDpsutsh-PSFYuuNRsFINRLhDYhcRAAAhsPsYFpphlhNP+WlPPsGFaTGsa-hP-...-u..FhWDcscs................phhcccss-cpscstss....sssssutPuS.hsphs........................................uphspshhsGss-.....lhtPtps+N............h.Nssl-pLlDthuR....W+Thtp-.t...............++.+.tt......sc-ssucpsshhchtGsG...........tNPFAHL+P..+u...pha ....KVLAIVNALsENKAIRPDEAGLVYNALLERVuRYNSoNVQoNLDRLVTDVREAVAQRE..RF.+sssLGSLVALNAFLuTQPANVPRGQ-DYTNFlSALRLMVoEVPQ.SEVYQSGPDYFFQTSRQG.LQTVNLoQAFKNLpGLWGVpAPlGD.RuTVSSLLTPNSRLLLLLlAPFTDSGSlsRsSYLGaLLTLYREAIGQA.pVDEQTaQEITsVSRALGQp.DTsSLcATLNFLLTNRpQKI.PsQYuLoAEEERILRYVQQSVuLaLMpEGAT.PouALDMTARNMEPShYAuN.RPFIN+LMDYLHRAAAhNs-YFTNAILNPHWLPPPGFYTGEaDhP-s.NDG....FLWDDlDs................thh.ptph..tcc..tsts......sstts..stsus....hPSLst.hs.stSst......................................................GRloRPRL.GEpEYL..NDsLLpP.RtKN............hsNNGIESLVDKhsR....WKTYAQ-pR-..ts.............t.pp++pppth........---DSADDSSVLDLGGoG...................sNPFAH.LpP+h.h...h........................................................................................................... 0 0 0 0 +2947 PF00349 Hexokinase_1 hexokinase; Hexokinase Sonnhammer ELL, Finn RD, Griffiths-Jones SR anon Prosite Domain Hexokinase (EC:2.7.1.1) contains two structurally similar domains represented by this family and Pfam:PF03727. Some members of the family have two copies of each of these domains. 20.20 20.20 20.50 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.36 0.71 -4.99 14 1806 2012-10-02 23:34:14 2003-04-07 12:59:11 16 20 455 60 944 1749 11 195.20 35 42.73 CHANGED pshhcplcchhptFplosEpLpclsc+FhpEhcKGLs..+csss........l.MlPsaVtshPsGsEpGDFLALDLGGT..NhRVlhVclsGsp..ph-hppppYplP.......cclhpupu...cpLFDaIA-ClpcFh-cht........pspsLPLGFTFSaPspQsulspuhLlpWTKGFch.....sssEG+DVVsLLpcAIp+Rshs.lcVVAllNDTVGThhussYs- ...........................................................t......lpph.t.h..p..ls..pp..p....Lhplhpphhp.-h...ppGLp......pcs....ps..................................lpM.lPoa...Vp...s...h...Ps..........G....s..........E.p.......G..p.a.LA.LDLG..GT..NFRVhhVpl..pspp..................sh.c.h.p...p......p......h...a.s...lP..........................................pcl.h.p.Gsu.................ppL..F.-aIApslsc..Flcp.p.t.h.............................p.....t..p...pls..L.GFTFSFPspQss....l..s.p..Gh.Ll.pWTKGFph.....................s.s.s.....G..c..D..V.Vt...h..L...p......c..A.....l......c+......+......s....................................................l.......cl.s.A.l.lNDTVGTh.hussYp.................................................................. 1 201 397 678 +2948 PF03727 Hexokinase_2 hexokinase2; Hexokinase Sonnhammer ELL, Finn RD, Griffiths-Jones SR anon Prosite Domain Hexokinase (EC:2.7.1.1) contains two structurally similar domains represented by this family and Pfam:PF00349. Some members of the family have two copies of each of these domains. 21.30 21.30 21.40 21.40 19.60 21.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.45 0.70 -5.17 15 1869 2012-10-02 23:34:14 2003-04-07 12:59:11 11 20 457 61 998 1816 6 217.50 34 49.64 CHANGED psclGlIlGTGoNuCYhEchppIptlcG.........spupMsINsEWGuF.DstpLs.sRTcaDlslDcp.SsNPGpQtFEKMISGhYLGEllRhlLlcLscpGlLFc.....Gptss+Lpsshhh-TphlScIEsD.pcsLccscslLpp.LGlpsTss-phll++lCclVupRAApLsusGlAAllp+....RGhcthpsslGsDGSVYcpaPpFpcth..tpsl+-Lhsc.....s-..lshl.uEDGSGtGAAlluAVAt+ ........................................................stlG...lIl.........G..T.GoN.As.Y.hE.ch..p..s..l..t..h.hps............................sps..p..M.slN.hEW...GuF....Ds...t......h..t......tTcaDpt.....lDp............t.....S.................h....N........P..........G................p....Q..............h...a...........EKMlS..GhYLGEl.lRh.l.Llc.h.h..p..........p....s...h...l..Ft.......................s.p...h...s.......p.....p...L....t.p........h.h.....hp..............Tph............l.S.t.....l.....c.........p.....D.........................p...................s............l...p....p........s.p.p.lL.......p.......p.............l...........s.......l......p......s........o...t........p.......-.......t.h...hl.pplsphlupRuA.p.Lsus.............ul................u.ul...................l...........p.......+.....................................................................p............t........................t.....................................p..........ss..lusDGola....ch...a....PpFpphh..pp..s..l.p...p...l...hs................pp.......lph..h.u..cDGS..GhGAAl.lsA.hs......................................................................... 1 215 418 718 +2949 PF03559 Hexose_dehydrat NDP-hexose 2,3-dehydratase Bateman A anon Pfam-B_1070 (release 7.0) Family This family includes a range of proteins from antibiotic production pathways. The family includes gra-ORF27 Swiss:Q9ZA32 product that probably functions at an early step, most likely as a dTDP-4-keto-6- deoxyglucose-2,3-dehydratase [1]. Its homologues include dnmT from the daunorubicin biosynthetic gene cluster in S. peucetius [2], a similar gene from the daunomycin biosynthetic cluster in Streptomyces sp. strain C5 Swiss:Q53880 [3] , eryBVI from the erythromycin cluster in S. erythraea and snoH from the nogalamycin cluster in S. nogalater. The proteins in this family are composed of two copies of a 200 amino acid long unit that may be a structural domain. 25.00 25.00 49.50 35.60 20.40 15.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.01 0.71 -5.30 79 286 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 121 0 55 284 181 203.50 38 87.16 CHANGED psss-lhuWlspp+s.ppchpscRlPLscl..su...Wphssss..lsHcoGRFFsV.GlpVps.....ss.pcV..ssWsQPllp.sph..GllulLs+chcGVLHhLhQA+hEPGshsslpLuPTVQsT.uNYsplppusps.aL-hhhsss...s...RVhhDslQSEcGuhFh+tcNR.hlVEs..s--.........lshsss..FpWlTluQlppLL.ppsphVNhpuRTlLuCL ................sclhuWlspp+s.tpphpsc+lPLscl.....suWphssss..lsHcsGRFFsVhGlpVps..........ss.ptV......spWsQPlIptsph..GllulLs+chsGVlHhLhQA+hEPGsh.s.s.lpLuPTVQsT.uNYsphttu.....tps..aL-hhhss....s...t..s..............cVhhDslpSEcGupFh+pcNRphlVEs...s--.......................ls...s..ssFpWlTluQlppLl.p.psshVNh-uRolLuCL........ 0 14 41 49 +2950 PF04209 HgmA homogentisate 1,2-dioxygenase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Homogentisate dioxygenase cleaves the aromatic ring during the metabolic degradation of Phe and Tyr. Homogentisate dioxygenase deficiency causes alkaptonuria. The structure of homogentisate dioxygenase shows that the enzyme forms a hexamer arrangement comprised of a dimer of trimers. The active site iron ion is coordinated near the interface between the trimers [1]. 19.30 19.30 19.30 19.30 19.20 19.10 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.53 0.70 -6.08 14 1185 2012-10-10 13:59:34 2003-04-07 12:59:11 8 6 998 2 527 1251 479 378.40 34 93.62 CHANGED pYhoGFGNcapo..EulsGuLP.GpNSPQpssaGLYAEQLSG.oAFTuPRppNpRoWLYRIpPSssHtshh.hc....tphhsssast.ts..sPNpLRWpPh.lP...ppssDFV-GLholsGuGcs..hsppGhAlHhYtsNpSMtscsFa.NADG-hLIVPQpGtLplpTEhGclpVpPuEIsVIPRGh+FpVplh.ts..sRGYlsEsaGspapLP.....DLGPIGANGLANPRDFpsPV......AtaEDpcs....hpllsKapGpLassc.sHSPhDVVAWHGNYsPY...KYDLppFsslsoVuFDHsDPSIFTVLTuPSspsGsAssDFVI..FPPRWhVAE.cTFRPP..aYHRN.....sMSE.........FMG..LIpGtY-AKptG.FhPGGuSLHshMosHGPDhpsFEtAopA...-L+Pp+..ls-.ohAFMFEophshtlocaA.hctpplppsYhpC.WpsLcpcF ..............................................................................................................t...................................P.p.s..h.s.LYtEphsG.osFh.ss.......pohhY+.h.h.s...ss.p....h..........................t....h.......t......s...h....t...t...........tph..R.h...s.h.hs..........t....hsalpGhhs.hsususs.....htpp.shtltlat..ss.p.uM..............s..c.h.Fh..NA.D.GD.Ll.hsppGp..hcl.....tTEh.....G.p..............lp....lpss-hslIP.R.Gh..pa+.............l....ph......s.s...................sRsa.lh.Esh.s....u....thpLP......-h..G.l.G.s..pu...l...h...ssRDhpsP.s...............A.t.a...p....-..c.cs.............saplhsK..hp......G........p.......l.......a..p..s.....p..h..sa..sPh.DVVuW.HGshsPa....................+aslccFpslsohta.H.sPSlasVh............p.s....Ghs.lssFV.....PR.h.s.u-..ps..h+..sP..aaHpN.....lh.S.E...hhG...h.l.pG.sa-.A.p.......t.....p....G...FhPGGhS.LHssh.sHG.P..cstsa..-.t..A.p.u.......p.l..t.....pc....hs-...h...AhMh-Tp.............hslpho.chA.h...p...s.....t....hpt..pYh.t.s.Wt........................................................................... 0 150 295 428 +2951 PF01085 HH_signal Hedgehog amino-terminal signalling domain Finn RD, Bateman A anon Pfam-B_1424 (release 3.0) Domain For the carboxyl Hint module, see Pfam:PF01079. Hedgehog is a family of secreted signal molecules required for embryonic cell differentiation. 20.00 20.00 20.70 20.20 19.60 18.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.16 0.71 -4.68 4 383 2012-10-02 01:02:30 2003-04-07 12:59:11 13 11 169 34 136 371 1 126.60 72 36.71 CHANGED CGPGR..shGRRR.sRKLsPLsYKQhhPNVuEpThGASGhhEG+IpRsSERF+-LsPNYNsDIIFKDEEpTGADRhMTpRCKDKLNuLAISVMN.WPGV+LRVTEGWDEDGaHucESLHYEGRAVDITTSDRDRsKYGMLARLAVEAGFDWVYYESKuHIHCS ..................................................................p.hsh..hKQh.PslsEpshGASG..EG+lsRsst+F+.-.L.ss.NY.NsDIlFKDEEso.GADR.lM.TpRCK-+LNsLAISVMNpW..P....G..VKLRVTEGWD..E......D.G.H..............H...uc-SLHYEGRAVD...IT.T....SDR....DRs...KY...GhLARLAV.EAGFDWVYYES+.sHlHCS............................... 0 33 44 85 +2952 PF00730 HhH-GPD Endonuclease_3; HhH-GPD superfamily base excision DNA repair protein Bateman A anon Pfam-B_854 (release 2.1) Domain This family contains a diverse range of structurally related DNA repair proteins. The superfamily is called the HhH-GPD family after its hallmark Helix-hairpin-helix and Gly/Pro rich loop followed by a conserved aspartate [2]. This includes endonuclease III, EC:4.2.99.18 and MutY an A/G-specific adenine glycosylase, both have a C terminal 4Fe-4S cluster. The family also includes 8-oxoguanine DNA glycosylases such as Swiss:P53397. The methyl-CPG binding protein MBD4 Swiss:Q9Z2D7 also contains a related domain [1] that is a thymine DNA glycosylase. The family also includes DNA-3-methyladenine glycosylase II EC:3.2.2.21 and other members of the AlkA family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null --hand HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.54 0.72 -3.76 72 13897 2012-10-03 02:11:09 2003-04-07 12:59:11 20 76 4824 121 3791 10642 6191 138.90 24 47.70 CHANGED lssllspQssspsspphhtclhpth...............h.sscsltphs..pclpplh.................hGhhppKAchlpphuchlhcphtuphspshpphtt............................l.GlGthoushhhhhuhsh.p..hhs..lDstlpRhhpRlhhhpt....s.cphppph.chht. ..............................................................................luhllspQsss.tsl...t.thh.tchhpth..........................................................................PT...sps..l.u.s.....s....s....-clh.p.h.hps...................lG.h.a.s+A..+....s.lp..psuphlhcpasuphPpshppltt......................................................................................................................................................................................................................................hsGlGhtoussshhhuhuh.....hhs..VDspl.t.R.l.h.s.Rhh..hh.pt............s.tp..hcpth.t.......................................................................................................................................... 0 1240 2398 3196 +2953 PF03753 HHV6-IE Human herpesvirus 6 immediate early protein Finn RD anon Pfam-B_1006 (release 7.0) Family The proteins in this family are poorly characterised, but an investigation [1]has indicated that the immediate early protein is required the down-regulation of MHC class I expression in dendritic cells. Human herpesvirus 6 immediate early protein is also referred to as U90. 25.00 25.00 27.00 26.80 18.30 17.90 hmmbuild -o /dev/null HMM SEED 993 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.99 0.70 -13.75 0.70 -7.07 3 44 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 6 0 0 41 0 565.20 63 91.78 CHANGED YSoNVEEGASs-FKhllAQSlGNCIQSIGASVKAAMKQEQSDMEDsLINsAGLLTpcRSMLpcLuLEQLSQLININLLSSASSpFVSsYuKMLSGKpLDFFNWCEPRFIVFACDKFDGLVKKVASESR-LLhDLRANMNN-FIKAlKcIFSKAsVsLDspKLNpsATMLLMMAHNKEMSNP-ISNc-FCcKlNpLKQ-LLEuKNEIIEsNuKNMQhhQ-FAIKQMNQIFMDsCDKTFLKIHlNCKNLIoAAKNLGsAVLQSIVICSNEFSWQ+LKspR+pFKITMMsMITcACEpIEolYDDTGLIKPLsSlsIMEGYIshNKNRpSSICDuNlDPSDShlLELtDFDDHGKYSEESS...IESIHEDDDN.............................lsh.ph.-.ppspsssh.sPphsscp.phphhppIcppsluKMYPsTPSPDVPGKSKEs.......cTFlEsSRQoGcEQTSPNCVCT.......ASVTDLGGPDNlKSITGLpSu.......KchLlK+LLDTQsDSVVs.........pTsStpp-hhshS..phppscEhhQ-KsS..............psKpT-s.sG.........................TFoposp.spS.uuI..phs..sK.sp-hEth.pLhshsDGopDNPLISEMLoFGYETDHSAPYESESDNNDEIDYIAssDSusRTNNIHMNNTNENTPFSKSlpSP..PEVTPSKcsaKs-KhsslSpppKsKKRTA......................................KRKsVuhKosKSKKIKoDpLPcsTNVIVIS......SESEDEEDGsNIIcKShLcKsIKSEscSESSS.............ESDDCTSEDNpLHLSDYD............KVINNG.cCpSKGFPSPVFTIPIRSMpG.THGIRsKFVPKKNWLWFMRKTHKVDNCPIHSScKsNsK-DSDsTEAsHCFhNHFVPIKTDDEEY-KENVSYIYsKIQ-SKIDlEsITPTK+LIT-MlMDNFMDLTDIIKpGIsKHCQDLssKYsVlT.TsCEKsLNVsNSQslsTstTQlFDPsVTGNNSsILNIINDTTsQNDENRCTEGTSNsNEKCTs+SDCNSDpTEVFKLDGYPSDYDPFlENAQIY ......SsNVEEGupt-hKsLsAQSsGsCIQSIGA................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +2954 PF03486 HI0933_like HI0933-like protein Griffiths-Jones SR anon PRODOM Family \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.17 0.70 -6.00 49 4052 2012-10-10 17:06:42 2003-04-07 12:59:11 9 30 3055 3 923 14035 5432 360.40 33 90.89 CHANGED hDVllIGuGsAGlMuAhsA..uptGpcVhLl-+spphG+KlhlSGGGRCNlTN..phs.spalsp...Ns+FlpSA.lupFsspDhIshhpphGlth+pccpG+lFsss..pApsllchLlpchcc.tGVpl+hpspVpsl........ttssss.atlps......ssp.plpssslllAoGGhShPphGSoGhGYplAcphGhslhshpsuLVPhshp.tshh.hppLuGlulcslhh.h.............spsthsaptshLFTHhGlSGPAlLplSuah........psstplplDLlPshs..tltphLppp+ttpspptlpss..LsphLPc+lhthlhct..h.t.st..htploppplpplsptl+saslplsGopuhcpA.VTtGGVshcElss+TMpu+tlPGLaFsGEVLDlsGasGGYNhQhAauoGasAGps ........................................................................h.clllIGuGsAGh.hsAhtA..................up..t......G...t..p..V..lll.-..+.....s..........p...c........h........G..............+......K.........l....h........h.......o......G.....G...........G.......R......C.........N........h.......T...N..................t..........h........s.......................s.........p.........h.......l..s......p......................................N.....s.....+......F.....h............h.....S........s........h.....s............p.........a.............s....s...........D............h........l.........s..h.....h...p.p.....h.....G..l.......t...........h..........+............p.......c........s..........t...........G.............p..............l..............F..............s.s.........s...........................p..........A........p.......s.......l.........l...............c...............h.....L...h...p........c.h.c.p......h.....s..Vp.....l.p.h..p..s.p.l..h...sl.......................................tt...p...p....s...t............a......t....l....ps...............................ss...t......p....h....p...s...c.p...lllAoGG...............h....S.........h.....P...p...h.........G...u...os......G.a.c.lAc.phG..hslh..hpsu..sPh.........ph...........p......h............................ptLtGl.u.lp.sl.h.h....................................sptt...h....sh....ptshLF..T....HhG.lSGPulL...phSuah.........................................................p.............s.....................................l..p....l.s....l.h.P...s....h.s.......................l........tphL...p..pt..................p.t.......t...p.s....p.p....t.lps..h..................L...t...................t.....h.........L..Pc+lh.hhh.ph....t.......................l.........................p............tt.....htp.lstp..ph..p....t....L...sptlpphpl..pssGspshcpA.......V.....ThGGVsh....cE........ls..s+...TM..p..u...+h......lsG........L.aFhG..............EllDlsGhhGGYNhphAauoGasAup.s........................................................................................................................................................................................................ 0 297 604 782 +2955 PF04588 HIG_1_N Hypoxia induced protein conserved region Waterfield DI, Finn RD anon Pfam-B_4868 (release 7.5) Family This family is found in proteins thought to be involved in the response to hypoxia. Family members mostly come from diverse eukaryotic organisms however eubacterial members have been identified. This region is found at the N-terminus of the member proteins which are predicted to be transmembrane [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.40 0.72 -4.22 59 931 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 520 2 533 819 723 53.40 27 41.59 CHANGED s+cpPlVP....lGshussuslshuhhsh+.pG..spptSp+hhRhRlhAQuhTlsAlhsG ..............................t.hhs....................lu.hhussss.lhh.u..hhsh........t..pG............stphS....p+lh+hRVhAQuhslssllh.................. 0 133 262 402 +2956 PF01355 HIPIP High potential iron-sulfur protein Bateman A anon SCOP Domain \N 21.50 21.50 22.50 22.50 21.10 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.70 0.72 -3.85 43 231 2009-09-11 11:04:45 2003-04-07 12:59:11 12 2 165 32 75 216 47 65.40 40 62.29 CHANGED EsDspApALsYhpDAocs-..ps+a........sGQpCuNCthapup...sssshGsCsl..Fs...GKtVsusGWCsuas ........................EsDspAhALGYptDAocsD...ps+asp......tsGppCuNC.t.hapGp...tusshGsCsl..Fs...GKpVuucGWCsAas.......... 0 17 38 55 +2957 PF00713 Hirudin Hirudin Bateman A anon Pfam-B_707 (release 2.1) Domain \N 25.00 25.00 34.70 34.70 20.30 17.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.43 0.72 -3.87 8 20 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 3 141 0 31 0 57.20 81 94.47 CHANGED loYTDCTESGQNLCLCEGSNVCGcGNKCILGSsGccNQCVTGEGTPKPQSHNsGDFEEIPEEYL VsYTDCTESGQNLCLCEGSNVCGpGNKCILGSsGccNQCVTGEGTPKPQSHN-GDFEEIPEEYL. 0 0 0 0 +2958 PF02098 His_binding Tick histamine binding protein Mian N, Bateman A anon IPR002970 Domain \N 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.15 0.71 -4.39 13 345 2012-10-03 08:47:39 2003-04-07 12:59:11 11 3 20 17 31 355 0 139.90 15 70.32 CHANGED hsshQDAW+slp.ssscsaaLhhRTacs-.s.hGpshpCVsspspchscsp+shp...........sshtYpN..sss.sphpshstplpshcptsa.shc....Nshphppt.....sspshsh.lhao...Dtst.Csl...........................lpsspsstu.ct..................................CELWh.psphspsh........................................................PssCptsFpths ..........................................................................................h...............tt.hhhhh..p..sh........t.p.......................t..hpCh.hp.h.h.p.h..ppp..tp...h.......................h..h.t.ah.......pp...t....tph.....ph.......ph.th...p.h..h.p.....p.t....sh...sh.............shh.phptt..............ts.shphplh.as....D.h.pp.Chl...........................lp.h...t..t..p.p....s....t.t....................................................CpL..a.htp.p.t..l..p..p..t................................................................s.p.p...Cp.hatt.C........................................................... 0 31 31 31 +2959 PF00977 His_biosynth Histidine biosynthesis protein Copley RD, Finn RD, Bateman A anon Pfam-B_1089 (release 3.0) Family Proteins involved in steps 4 and 6 of the histidine biosynthesis pathway are contained in this family. Histidine is formed by several complex and distinct biochemical reactions catalysed by eight enzymes. The enzymes in this Pfam entry are called His6 and His7 in eukaryotes and HisA and HisF in prokaryotes. The structure of HisA is known to be a TIM barrel fold. In some archaeal HisA proteins the TIM barrel is composed of two tandem repeats of a half barrel e.g. Swiss:P05325 [3]. This family belong to the common phosphate binding site TIM barrel family [4]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.34 0.70 -5.09 146 7930 2012-10-03 05:58:16 2003-04-07 12:59:11 16 27 3687 37 2167 8472 6948 229.60 33 89.54 CHANGED cIIPslDl....ccG.+.sV...cGs..........pshp.ssDPl-...hApta..p..ppGA-cLthlDlsuut...ps+shph..cllc...c...l..scplt...lPlpVGGG....IRoh-slcpl..Lp.tG....s-....+Vslsos....Al.cs.Pcl..lpchucca.GspplVVulD..u+p...........G..............cVhhpGhpcs....oshcsh-hscchpchG..sucIlhTsls+DGThpGhs...l.ch.hcplsptl.s.lP.VIASGGsushc-lhplh.....ptGh..suslsupsh..apGphs .....................................................................................................................hlIPslDl....c..cG......p......lV.........c.Gs..........................hps.h.p.s...s..DPlp...........hA..ptY....s....ppGA-..c.Lp..h..l...D....l..s..u..up.............ps...+..s....h....ph.....shlc.............p.........l....sppl............lPlpVG.G..G............IR..o............h...........--............l...........p...........pl...........L.p..s.G...........Ac.................+..V..slsos..................Al....p...........s.....P...p.h.............l.pc...h...sc......c....F......G.....s.....p..slVlulDs+t................G..................................hpV.ss.p.Ghpcs......oshcs.h......-..hspch......p..p..h..G.....s..u..c...l.l.l...T..s..h....sp...D.Gs.h..........s..G...h.s..........l...ch..hptl..............sp..t.......s.....p..........l.............PlIASGGsushpc.lhphh.........ptG...s..cuslsupsh..atuph.......................................................................................................... 0 713 1441 1865 +2960 PF00815 Histidinol_dh Histidinol dehydrogenase Bateman A anon Pfam-B_1358 (release 2.1) Family \N 19.90 19.90 21.40 20.80 19.00 18.80 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.24 0.70 -5.84 16 3998 2012-10-02 17:28:28 2003-04-07 12:59:11 15 14 3598 8 1109 3262 5057 401.20 42 91.68 CHANGED hcphltRsh.cssc.lhptVpsIl-sV+ppGDcAlhEaTp+FD..GVp.l-s...hVst-chpcAhctlsscl+pAl-hAhcNIcpFHtsQh..ptshslEsp.GVhsuphspPl-pVGlYlPGGpAshPSTsLMlulPAplAGsccIVlsSPPs.p.sGphsPplLhsAphsGlccIatsGGAQAlAAhAYG.TEo....lsKVDKIhGPGNhaVTAAKhhVps....tlsIDMPAGPSEVLVIADEsAsPcaVAuDLLSQAEHsssSpslLlTsS..cphAcclpptlpcQlppLs..Rt-.hlppuLs..aSsIllscslpEAl-hSNpYAPEHLhlpscsscp.llsplcsAGSlFlGsaSPESsGDYuSGTNHVLPThGaARpaSGLultoFhK+hTlQplTc-GhcslupsVhsLAcsEsLpAHtpAV+hRhc ...............................................t.....t......s.ssp..lppsVpsIlpsV+pcGDp.ALh-Yop+F.D....p...s..p.....h.........s...s...l..........cV..o..t..p-l......psAh...p...p.............l...s...sc...h+pAlphAhcpIcpaHpt.Qp.....ps...............h...p.............h...c............s....t......s.....G.....lh.hsphhpPlp.......pVGlYVPG...G...p...A...shPSoVLMsul...P..A+lAGV..cclVhsoP.........P......................h...................G.........................ss....tl....L.sAA.p.l.s.GV.sc......laplGGAQAIAALAY......G......T.....E.o.......................ls+VDKIsGPGNtaVstAK.RtVhu..............tV...uI.D.M..AGPSElLVlA...D...p.s..A..s..................Pcal.AuDLLSQAEHss.t.u.ps..l.LlT.ss.........tplAcpVp.ptlpc.Q.L..t.p.Ls............R.t.-..lsp.puls....put.lllscs.l.s.pulpluNphAPEH.Lplps.....p...ss...p..p...hl.sp.lcsAGulFlGpaoPEulGDY.s..u.GsNHVL........P.........TsGsARhsSuLultDFhK+hol...p..h..o.c.pu.....h.pp.l.u.psltp..LApsEsLs.AHtpulplRh..................................... 0 350 717 947 +2961 PF00125 Histone histone; Core histone H2A/H2B/H3/H4 Bateman A, Sonnhammer ELL anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.06 0.72 -3.86 64 18172 2012-10-10 12:36:46 2003-04-07 12:59:11 19 47 9743 705 4298 10642 208 64.10 53 50.27 CHANGED psphthhphsltRlh+plppp..........h+lsupAhhhlppslEshhtclhpcAs.hhupcs+RhT..lts+-lphAh+hp .......................................STELLIRKLPF.Q.R..L.V.R...E....IAQDFKoD.................LRFQSoAl..h.A.L.......Q..........E....u......sE...A...YL..V.u...L.FE..D.....o...............s..L.......s...A....h..H....A..K....R................................................... 0 1432 2046 3176 +2962 PF01230 HIT HIT domain Finn RD, Bateman A anon Prosite & Pfam-B_8474 (Release 8.0) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.27 0.72 -3.46 26 8213 2012-10-01 23:45:21 2003-04-07 12:59:11 18 43 4728 105 2350 6615 3709 96.10 27 65.13 CHANGED ptEhsupllh-s-hshAFhDhpPpsssHhLVlP+p.p..lsplpshs........ttluplhhhspcluptlph.....................ttpuhphshpsGtpuGQsVhHlHlHllstcph ................................................t..phss.hl.a..c..s...-..h.s.h...A....F...h...D..l...p.....P..h....s..s......G........H....h...L.....l.....lP+p..c......h.s.s.l.t.-.ls...........................tpt...h...s.c....l....h..t.h.s.pc.l.u.c.t.l...t.t............................................................................t....p.G.h.p...l...h..hN....s..s..t..t..u..G.Q..s..V..h..H..lH..hHllP+h..h........................................................................ 0 773 1489 1980 +2963 PF00816 Histone_HNS H-NS histone family Bateman A anon Pfam-B_1651 (release 2.1) Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.19 0.72 -3.48 151 2937 2009-09-12 23:38:08 2003-04-07 12:59:11 16 8 1220 14 554 1580 133 100.10 31 80.49 CHANGED pcLpphhpclp.........pphppt..cppc.pppsltpl+phhpp..hG..lo...hp-L......................hststt..............sppp+s.sss..KY+pPps.Gp..TWoGRGRpPpWltshh..........spGcpl ..........................................................t.pL.chlccLc...........cptptt..c..hpE..cpc....tlpphp...c...hltt..G..Is...spEL............................................hsssts........tstt+R.sspP.A....K.Yc...s-s.Gc..TWT.G..pGRsPphItpshtptt...................................... 0 67 192 380 +2964 PF01870 Hjc DUF50; Archaeal holliday junction resolvase (hjc) Enright A, Ouzounis C, Bateman A, Dlakic M anon Enright A Family This family of archaebacterial proteins are holliday junction resolvases (hjc gene) [1]. The Holliday junction is an essential intermediate of homologous recombination. This protein is the archaeal equivalent of RuvC but is not sequence similar. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.87 0.72 -4.38 16 183 2012-10-11 20:44:43 2003-04-07 12:59:11 13 2 153 24 103 289 78 92.00 30 64.17 CHANGED shERELlchLpccG........................FAVlRusuSsuu.......DllAspssh.hLsIElKsopct..+lYl........cp-clcpLlcFuc+F...GupPhlAlKh....scsW+Fhssps .....................................p.hERELlphLc.c.pG.........................Fu...V..l......R...us...u.Ssus..........................hsDllA..scssh..hlsIE...sKosppp...plh.l..........pp-plcpLhpFucph...........Gu....pshlA.lKh...ttptWhhh....p................................................... 0 26 59 85 +2965 PF02110 HK Hydroxyethylthiazole kinase family Mian N, Bateman A anon IPR000417 Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.57 0.70 -5.20 9 2596 2012-10-03 06:25:16 2003-04-07 12:59:11 10 9 2292 46 490 2628 261 239.80 38 87.31 CHANGED Lpcl+pppPLVHsITNhVstNFoANsLLALGASPlMuhsh-Ehp-hA+IusA.LlINIGTLss..hcuhhtAscsAp-hspPllLDPVGsGATphRpcssh-LL.phthusI+GNsuEIhuLsGhs.tts+GVDospuuss..s.ltsspplApchsslVlhTGchDhVoDGpp..shslpsGsp..................Lhs+lTuoGChLuuVsAuFhAl...pssLhss.hsAsshYplAu...ppAstcspus.....GSFhsphlDtLhpLstE ................................................................................................................phl+ppsPLlpshTN.Vs.tsh.........sANs.......LLAlGASP.sM.u.p.s.scEs..p..-h.s.p.l..A..s...A..L.l..IN..lG.............T.L.os..p..p.h...puh....ht.As.c.t.A...p.p..s.s..h.P..h.VL.DPV.......u............s.......G.......A.....s..........s...a...R.......p.......c.......h.s........p...cL..L.....s.....h...+.....s...s...........lI.R...G.N....A.SE...Ih....u.L............s...........G..............h..............s......s...........t............u........+...........G.V.........D.....u......s......s....s....s........t..s...................s...l........t.............h....App....h...u..p....c............h.......s.........s.......l....l...l....l..T.G........p........h......D.......h......l....s.........s...u.............pc............s.h..s..l..p.s.Gss....................h.h.s..+...l..T.......GTGChLuAllAuF..h.uht...................ps....s........h......ss......ss.A..s..s.hh.sl...Au......................Eh.A.....s...t...c...s....p.Gs..............GoFpsthLDtLapls......................................................... 0 163 312 412 +2966 PF03865 ShlB HlyB; Haemolysin secretion/activation protein ShlB/FhaC/HecB Finn RD, Henderson I, Moxon SJ anon DOMO:DM07489; Family This family represents a group of sequences that are related to ShlB from Serratia marcescens. ShlB is an outer membrane protein pore involved in the Type Vb or Two-partner secretion system where it is functions to secrete and activate the haemolysin ShlA. The activation of ShlA occurs during secretion when ShlB imposes a conformational change in the inactive haemolysin to form the active protein [1]. 19.90 19.90 19.90 19.90 19.80 19.70 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.24 0.70 -5.74 2 1287 2012-10-03 17:14:36 2003-04-07 12:59:11 8 9 664 2 267 1255 50 340.50 24 65.32 CHANGED GDRhVNopLLFPtlcGpPLKLspLDQGLDQANRLQSNpsplDILPGpplGGSVI+LpNQ+tpPWhlshuoDNYGQKsoGRWLhRssAoLDSPhGLSDFVSLNAN.Th-NPspRaNRAYTLLYSlPYGuFTFSuFuSaSpYp.HQpL.ppsVpLaGpTpQhGlRuDYsF.RspcQIDoLshQlTaKRIcNYFpplRL-lSSPpLThhELuhsHLQIlPNGVhSsNLSVEpuhsWhGAtcpP..sp....D.pFTKsKLFsNh.QRhpLhcuTa.hNshFhGQYS+DsLPGVEWLSLTD+sAlRGFspST.SGDNGhYLpNTLShsapLstholTPRlGsDlGplh.+ts.pGWpuuhGlSoGhslpYQpA.lDLEVu+G.lL...oNpspscDPsQlLs+FSYhF ............................................................h.ht..........h..P.hh.t....G.cl....Lsl+-lEQGl-p.hp.Rls...........s.....p.sp.hpl....hP.....u....p.....p...s.....G....t.S......lhl.p.h.p.p..s.c...h....ph..s..huhDssG..p..cs..T..Gchpss..ss..l..sh-N......shuL..s.D.....h.......l..s.........h.....s.......p...s.......h........p......t.....t....t........p....p.....h.....s........p..........s........h.....s.ht.Y..S....lP..a..G..h..a..p.hs..h...h.s..h..sp..a.p....h...t..h...t...s.....t.....s..h.....p..h..p..G..p.o...p..p..h..shp..h..s..+...ll..h...R.st..p.p..+t.....shth....pl..p..+..ps..........ps.......a..l....s....s...s...c...l.p.h..pp....p.p..h.s..s...hph..G.l....s...a...p.p...h..h...s..t...u....h..h.......s..h....slu..a.p....pG.h.s...h...h..G..A....p...t...s.....s....t........t.....t...............................t........s.....p.....h.....p..t...h.ph..s...h...s...ht..h...s.......a...t...l...h...p...p..............ht.a..ss..t..ht...uQa..o..t..s..s..L.hs.t-p.holGuchoVR.GF...c.c.p.s.l.s...u-p.Gha....h+.N....-..L.........s.....h.......h...............s................t.......h.............h.......h.......u.h....DhGt..l................................................................................................................................................................ss.hh.h.hh.h.h............................................................................................................................ 0 31 115 195 +2967 PF02794 HlyC RTX toxin acyltransferase family Bateman A anon Pfam-B_1230 (Pfam 6.0) Family Members of this family are enzymes EC:2.3.1.-. involved in fatty acylation of the protoxins (HlyA) at lysine residues, thereby converting them to the active toxin. Acyl-acyl carrier protein (ACP) is the essential acyl donor. This family show a number of conserved residues that are possible candidates for participation in acyl transfer. Site-directed mutagenesis of the single conserved histidine residue in Swiss:P06736 resulted in complete inactivation of the enzyme [1]. 25.00 25.00 25.40 25.10 21.20 24.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.69 0.71 -4.56 29 386 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 293 0 46 249 70 134.70 34 84.55 CHANGED phLGtlsWLhhpSPhHRcaslu.lttpllPAlphsQatlhp..cs........................shPlAFsoWAhLo.-sEt+alpssppLhsp.DWsSG-+hWlIDalAP..FGc.....sptlh+pl+cp.Fs...sphh+slRhchsuppttplh.ht.......................lp ...........hlGtlshLhhpSPhH+p.asls..hthpllPAIptsQaslhp..cc........................Gh.PlAasoWAhLs.-sEs+aLps......sp......p......Lhsp.DWpSG-.RhWllDaIAP.F..Ga.....sptl.hchhRp.c.Fs.....tph.hRulRhc.sspp.tplhphph...hsh................................... 0 7 23 37 +2968 PF00529 HlyD HlyD family secretion protein Bateman A anon MRC-LMB Genome group Family \N 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.58 0.70 -5.11 44 15364 2012-10-02 20:27:15 2003-04-07 12:59:11 15 41 2231 47 3183 22645 3068 262.60 18 71.90 CHANGED tsplpspluGhltclhsc-GstVctGplLhplDssshpush......ptpuphttspt...psphhttthpphpshhtspth...............tppptssthtthppshhpsp.....hptsplppuptshpttpsshphs...............sthppstsphtstlss.......tspshhshsplpphhhphppsptt..t.htssspu.hhtt..t.ptlphsptht.....sstslhshl.hpphtlctphtpsphctht.s.pspltsshhsts..hhhpGsssss..h..hhshsssspthts..plhssshsphspthsVclths....ththsspshhsGppshl ....................................................................hpltspV.uG.h.....l..h..p.l.......h...p...-..sp..h.V.....ct......G.p..h.Lh.......p.....l........D.......s.......t.......s...h..pst..ht..........................................pA..p..u.....p....l....t....p....s....p.....t............................t....h.....p.............t....t...........p.....h.....p....p...hp...t.....h....h..t....t....tth..........................................................tpp.t...h..p..p..t.......t...............p.....p.....s.......h...p.t..................................h...s.....p..t.......p..l....p.......p.......u...c.......h.......s...h....t...h.....s....p...s...t..h..hs................................................................sth...s..t......t.t...s...t...h...t...s.h..lps...............................ps..p.h.h.......s.......s.......h...s........p....l......p............h..h....l....s.......l....p....p..s...p...................................................h..p.....u....s.t....s..G..............h.....h..........t......t..........................h.....s...l.......ph.sthss...................ssts..h...h...s....h....l..........h....p...s...h....h.......l...s...s..p.h.......t..p....s.......p......l.......pt......h......h...........s.......s.............p........s...p........l..h....s.s...h.hsp............hh..h.p..u.th...s...........h.........hs.t..t.....s..h.........p...........h..s.................s.........s....h....s....h.......p.....h..s..p.t..h.s.l...p.h.t....ht.......................h..s..................................................................................................................................................................................................................... 0 714 1599 2379 +2969 PF03201 HMD H2-forming N5,N10-methylene-tetrahydromethanopterin dehydrogenase Mifsud W anon Pfam-B_2929 (release 6.5) Family \N 20.30 20.30 22.00 72.70 19.80 16.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.20 0.72 -4.07 13 59 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 31 6 38 60 0 97.90 47 28.43 CHANGED A-LlusVsDMuSsVTAlshAGlLsYhsssTpIlGAPtchspp.sh.oLpplAuLh-ssGlcsh.cuLsPcsLlsoAcSMphssps..-.LssuLclLEch ..AcllusVsDMGShVTAlshAGlLsYhsssTpIlsAPtchsph.sh.uLpplsuLhcspGlcsM.csLsPcsLlsoAcSMphssht..-.Ls.suLclLcc....... 0 10 18 28 +2970 PF01101 HMG14_17 HMG14 and HMG17 Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 21.60 21.40 20.20 20.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.73 0.72 -3.29 18 482 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 149 0 153 417 0 80.60 54 86.60 CHANGED PKRK.......stssstssKpEPpRRSARLSAKPAPPKPEPKPKKsusKcK............ssKscK.uAKGKc..-ptpptsK-ss...suENG-sKs-E..s.ts-ustspc .....................t+.........t.sspthsKpE..Pp..RRSARLSA............K.PAPPKPEPKPKKAusKcp.................cK...ssKGKK...tc.ustu..p.ctss...PAENG-sKo-p...s.ts-t...................... 0 9 13 36 +2971 PF00505 HMG_box HMG_box; MaoC_dehydrat_N; HMG (high mobility group) box Finn RD anon Pfam-B_8 (release 1.0) Domain \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild --amino -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.03 0.72 -3.71 31 8695 2012-10-02 14:16:02 2003-04-07 12:59:11 14 175 1353 52 3772 8773 263 66.20 29 17.82 CHANGED PKRPhoAahlatp-tRtpl+p-sPshc..sspluKtlGctW+sLstc-Kt.Y.ptApct+pcaccphspYc .......................................KRP.h..sA.ah.la....t....p...c..p.......R........p....p............l.......t........t........p.........p.........P............p............h.........p.........su.........-..............l......o.....K.hL..G....p..p...........W..+..........t..L..s.cp........-.K............p.....ah.c.c.Ap.......c...h+t..pahcphssYp................................... 0 1189 1695 2729 +2972 PF01154 HMG_CoA_synt_N HMG_CoA_synt; Hydroxymethylglutaryl-coenzyme A synthase N terminal Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.06 0.71 -4.71 4 1680 2012-10-02 12:25:54 2003-04-07 12:59:11 12 15 1444 28 478 1426 215 165.20 41 40.45 CHANGED WPcDVGIlulElYFPoQYVDQuELEKaDsVssGKYTlGLGQs+MGFCoDREDINSLCLTVVpKLMERsslsassIGRLEVGTETIIDKSKSVKoVLMQLFpESGNTDlEGIDThNACYGGTAALFNAlNWIESSuWDGRYAlVVsGDIAIYspGsuRPTGGAGAVAMLIGPsAP ...............................................h....lGIctlthahPs.....YV.c.....hs.....-.L.....A.........c....s....R......slD....ssK.a..p...h...Gl...G...Qpchulss..sEDIlo.hussA.upsl.l.....s.c..p........-.t.p.pIshllV...uTESulDpSKu...s..us.h....l....p....pL........Lul...............p...s.......s....c.uh...E...hKpACYGuTAALphAhsalp..sp.s...........s......sc...c....sLVl..A....oD....I..........A............+.Y...........u...h..s.........s...........u.........E.P.T.....Q.....GAGAVAMLIuts..s.................................. 0 147 269 383 +2973 PF00682 HMGL-like HMGL-like Bateman A anon Pfam-B_71 (release 2.1) Family This family contains a diverse set of enzymes. These include various aldolases and a region of pyruvate carboxylase. 21.90 21.90 21.90 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.11 0.70 -4.71 20 12580 2012-10-03 05:58:16 2003-04-07 12:59:11 14 57 4555 111 3825 10439 6963 236.80 26 43.97 CHANGED RDGEQuhtss.holpcKlpIActLschGlc.IElG.............hsssuts-hctlcslsctlts...uclsslsR.............sstc-lctuhc.h.ssGsspl+lhlssS-hhhphplppsht.shcpspphlchA+phs.cV..........plusEDuuRsshsalhclsctshsA......GspplslsDTVGhhsPpphtchlptlpppls....s.lulHsHsDhGhAlANolsAlpAGAspl-solsGlG......ERAGNsuL.Eplshulcstt ...................................................................................................................................................RDGpQu........hss..h....s.....scc+lpl..........s....p..........t..........L..s..c......h.....G.....l.p......l...Esu...................hshs.u........t....s.........s.....h.....c.t..l.....p.....p......l...t.....c.....t....h..hp...............spl....p...s...L...h...p.............................................s.s...p....p.........l....c....p....s.....h....c..........h.........................s....s........h....s....h....l...........+....l.a...s...ussp...............................h.........t.........h.......s...................t...t......l...c.t...h......t....p....s.l.p..h........s......+.p...h.....s...h..ps................................ph.u...s....p......s...t....s......t....t...s......h...p.....h...h.h..c.....l..s......ct.h.h.ph...............G.s..p.h.l..slsDT.......s.......G.......h.......h.......p........P.......p.......p..............ht.......c.......l.......l.p...t.l.p..p..p..ls.........................h.lul.Hs.H....s..sh........Gh..A...........lA.s..s..l..s....A....l....c....u..G..............sc....t.....l...-..sslsGhG.......................tp..sGNssl.EsllhsLph..t......................................................... 0 1234 2475 3280 +2974 PF00423 HN Haemagglutinin-neuraminidase Finn RD anon Pfam-B_171 (release 1.0) Family \N 20.40 20.40 20.50 21.80 19.70 20.30 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -12.63 0.70 -6.22 21 2456 2012-10-02 00:45:24 2003-04-07 12:59:11 14 2 198 58 1 2086 0 439.80 33 94.13 CHANGED lhshlshlLullsllhhhshplpphshsssphpphhps.hulspsIcptsccltt.....lpPhhphIsspVuhplPtplsphtp.l.......................psp............................lsctC....................shspsptshhsh.psphhpsl..shpshhhsssssh.sshs.psphp.h.ssohl..ssssshsu..Clp.PshulupslauYoaslhpssCpct......spshphhplGhlpspusthPshpsssthshspsssh+sCSlsssshthhtLCohssssphp-huoss.pslhlshLsltGphpspchpss.hshc.....sastLYsosGsGlhhssplhFhsaGsls..............pstpspupChtstCpspstphCNpu.hsshhus+hhspGllplslsh..stpsplpltolssshhhhGupu+Lh.....s.hhhYpposuWhohs.hstlsls..sshs....lphsspshs..oRPGssp..CthsspCPt.CloGlYsDsa.Lss........shphlsushlsSpps+tsPhhshssspshshhh.lp.sspspsshooosCF..sapp+laChphsElsssshsshpsh.hhspl.hsC .................................................hhlhlhhl.lhhl..hh.hs.hhhphsh.....p...........pp..thsp.lpct..pl.........slhp.Ihcp...l.......slp.P.th.ph.p.l........................pt.......................................hs..h....................tsss..pt.hss.h....hss.....h....ltsh..ph....h.s.hp..h.sshh.........t.shl..sh.hotsh..ssp..sh.hopshYshTa.l..sshpsc......sp.hphhtlGhl+psu.shshhphhp.h..s.spshpsC.luhs.LthshLCpt.sshp....uSst.sshhhspLGh.sphpppphpssh.ht......shsthY.ustpGhhhDspshasV.sshp..............p.ppt.psC.pp..psh..thspss.h.shhss+hsp.ulLolplsh..olt...phhls.s.sslhhhGuthclh........h..ppss.sWho.s.hhshsls.h.ssh...........h.sP.hhs.......+...u..u.s..Cthssh.....lsuhhsssh.l...h......spshphVhuTh..sp.t+hssh.sh.s.hshshhh.hp.s.pshs..hp..pCF..shspchaChphh.htss..sthh.h..hsthhh................................ 0 0 1 1 +2975 PF04814 HNF-1_N Hepatocyte nuclear factor 1 (HNF-1), N terminus Kerrison ND anon Pfam-B_2624 (release 7.6) Family This family consists of the N terminus of homeobox-containing transcription factor HNF-1. This region contains a dimerisation sequence [1] and an acidic region that may be involved in transcription activation. Mutations and the common Ala/Val 98 polymorphism in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) [2]. 28.40 28.40 29.10 28.60 27.70 28.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.29 0.71 -4.18 9 283 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 52 22 104 212 0 171.40 47 37.26 CHANGED MsScLotLQpELLtALLpSGloKEsLlpuLs-ht......................Pthphs-p.hth..hshstutsp........th..uhGcs..................................chotDEs....S-DG--..sPPIhKELEsLusEEAAcQ+......ulV-pLLpEDPW+sAKhlKSYMQQHNIPQREVVDsTGLNQSHLSQHLNKGTPMKsQKRAALYsWYVRKQREltpQ ................................................................................Q.-LLttLhpoGhoKc.llpALtpht................................s.....st.tsh...ht.ut............................p.s......................................................+hSt-.cs...........sps.up..ca.s.....P..Pl....hp....c..h....p...s.h....s.....s.E.EAu.cpc........scV..Ec..LL..pcDsacluchIKuYhQpHNIPQREVV-sT..GLNQSHLSQH..L.NKGTPMKsQKRuALYsWYl+KQpEl........ 0 17 29 54 +2976 PF04813 HNF-1A_C Hepatocyte nuclear factor 1 (HNF-1), alpha isoform C terminus Kerrison ND anon Pfam-B_2624 (release 7.6) Family This family consists of an alternative C terminus of homeobox-containing transcription factor HNF-1, found in the HNF-1A isoform. Different isoforms of HNF-1 are generated by the differential use of polyadenylation sites and by alternative splicing.\ \ The C-terminal region of HNF-1 is responsible for the activation of transcription, and HNF-1A, which has this C-terminal extension, transactivates less well than the B and C isoforms [1]. Mutations and polymorphisms in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) [2]. 27.30 27.30 28.70 35.50 27.20 27.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.96 0.72 -3.71 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 34 0 20 47 0 81.80 65 14.36 CHANGED llToDPEuHTDSuIcEPSS......l.sQD.osILHLQSu.RLSPsPsVSSuSLlLYpsSsSoEoH.SHL.LSSoHusI-oFISTQMASSo ......................VFTSDoEAsSESGLHsPuSQAoTlHlPSQD.suuIQHLQPuHRLS.sS..........PT..VSSSSLVLYQSSDSoNGH.SHL.LPSNHSVIETFISTQMASSS. 0 1 2 5 +2977 PF04812 HNF-1B_C Hepatocyte nuclear factor 1 (HNF-1), beta isoform C terminus Kerrison ND anon Pfam-B_2624 (release 7.6) Family This family consists of a region found within the alpha isoform and at the C terminus of the beta isoform of the homeobox-containing transcription factor of HNF-1. Different isoforms of HNF-1 are generated by the differential use of polyadenylation sites and by alternative splicing. The C-terminal region of HNF-1 is responsible for the activation of transcription [1]. Mutations and polymorphisms in HNF-1 cause the type 3 form of maturity-onset diabetes of the young (MODY3) [2]. 25.00 25.00 54.90 52.20 20.40 21.20 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.83 0.70 -5.10 11 156 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 43 2 50 130 0 219.60 56 42.10 CHANGED AMDsapus..uss.s.hLsp.s......oPpsu...s.th..u.........................lRYSQpussEssoSos.ups.spu.....osLpQVSPsuL-PSHoLLso-sKh.IusSGGsLPPVSTLTslHSLstss....Ht.tQQsQNLIMssLPuVMuI....................s.uLsooQuQSVPVINSVGuSLTTLQPlQF.SQ......QLHsshQQPLhQQsQ.SHMu..QsPFMAThAQL.ssH.MYS.KsEssQYsHoShhsQsMVITDoosLuTLTSLouoKQ ...................................................AMDsYsus..s..s.sPhLsttSs....t.Pssu....sP...s.Khp..G.........................VRYuQ.ussEssusso.Stt...stshV.T.sposLpQVSPsuL-Pu..HsLLSs-uKh..lSsoGGsLPPVSTLTslHSLp........t.sQQsQNLIMssLsGVMAI.......................s.uLsooQAQSVPVINShuuSLssLQPVQF.SQ......QLHssaQQPLMQps..SHhu..QpPFMAshsQLQssH...hYuHK.EssQYoHouhhPpsMllTDTsslSsLsshosoKQ............................. 0 2 6 18 +2978 PF01844 HNH HNH endonuclease Bateman A anon [1] Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.73 0.72 -4.09 92 7400 2012-10-05 18:28:12 2003-04-07 12:59:11 18 79 3428 3 1824 6263 3802 47.60 26 18.89 CHANGED Cp..hCspph...........thplcHIlPhp....p..uGpps.....hsNLhslCpp.Cppp+psc .....................................Ct..hC..s.t..h........................thplc.Hlh.Phs...............p........GGtss....................hsN..lhhlC....tt...C.Hppcpt.................. 0 538 1239 1594 +2979 PF01848 HOK_GEF Hok/gef family Bateman A anon Swiss-Prot Family \N 21.60 21.60 22.80 22.50 20.90 20.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.01 0.72 -4.63 41 1852 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 385 0 46 440 2 42.80 47 78.31 CHANGED +thlhsLlllClTlLhFshlsRcoLCEl+l+pG.spEluAhLAh .......KhtLlulIVlClTlLsFsLll+cSLCEl+l+pt.shEhsAhLAY.......... 0 6 9 26 +2980 PF05102 Holin_BlyA holin_BlyA; holin, BlyA family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family BlyA, a small holin found in Borrelia circular plasmids that is encoded by a prophage. BlyA contains two largely hydrophobic helices and a highly charged C-terminus and is membrane associated [1]. 21.10 21.10 22.00 21.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.02 0.72 -4.25 3 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 33 0 3 53 0 59.30 54 93.80 CHANGED MDTIpIs...-FLlsLsNIKLItLhIFIollILullLllKPllKDhLoILIuKIhKNsNcKEKc ..........MDTIKlT...EhLINLNE...IKLIuVMIFlTVllL.GsLILLKPLLKDILoIl..IGKlFKNuNsNsp................. 1 2 3 3 +2981 PF05204 Hom_end Homing endonuclease Studholme DJ anon SCOP Domain Homing endonucleases are encoded by mobile DNA elements that are found inserted within host genes in all domains of life. 21.10 21.10 21.10 21.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.42 0.72 -4.11 24 211 2012-10-03 01:41:40 2003-04-07 12:59:11 9 23 116 23 55 250 50 102.50 30 13.28 CHANGED slPpaLto-slplREsFLAGLlDS-GhVccptt.....ssolpTh.polhpGlVplARSLGlpsoVss...cptphctpsVppp.s........YslslusussLpuVLutCusscppts.........tthh ..............IPphhhppshplR.uaLAGLIDSDGhsscptt.....phslpohpsolhcsllplARSL.Glssssss...c.th.h.hptsph............Ysh.h...t.t...t...s....h.t.s...............s.hhh.................................. 1 5 22 47 +2982 PF05203 Hom_end_hint Hom_end-associated Hint Studholme, DJ anon SCOP b.86.1.2 Domain Homing endonucleases are encoded by mobile DNA elements that are found inserted within host genes in all domains of life. The crystal structure of the homing nuclease PI-Sce [1] revealed two domains: an endonucleolytic centre resembling the C-terminal domain of Drosophila melanogaster Hedgehog protein, and a a second domain containing the protein-splicing active site. This Domain corresponds to the latter protein-splicing domain. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null --hand HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.37 0.70 -4.60 37 308 2012-10-03 10:25:13 2003-04-07 12:59:11 11 40 197 12 58 405 104 284.60 23 42.25 CHANGED CauKGTpVlMuDGopKslEclplG-tVMGpDGpPR..cVhsl.s+Gp-pMYclppps.p..................shhsasCsusHhLVL+ss.th....p..th.....................t.R....lsp.hptsa..p...u..tt.thhpph.ts.sscphhpaslpA+Dhs.tLssplRpso...pthhsPlhhtpt.hsphltstt....p.s...s..huYlLGLWlGsG.htcpsphshsopD.tplh..pplpphup..hhslp.phtptp.shh.t..........................................................................h......t..ht..............................................................................................................................................tshspsN.hhptlh.phGhhp........phsKpl..Pphhhs-shphREtFLAGLlDocGhl..pt......psslpsh.pplhpslltlARSLGlpsslspcptp...hthtt..............athp.lssssshhssLshsttspp.htstspth.ct...htFphhE.hpps-aYGlTl..-sD+paLLushhVlHN ........................................................................................ChstGT.lhhuDGs..cslcslphG-hlhG.D.....G....p.....s+......cV....hsl....s....p...Gh-..p..hY..plp.ts...........................................................th.shsssssH.Lshh.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 13 33 53 +2983 PF00046 Homeobox homeobox; Homeobox domain Eddy SR anon Unknown Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.63 0.72 -4.35 182 25115 2012-10-04 14:01:12 2003-04-07 12:59:11 24 290 1497 161 11025 24665 26 54.40 35 16.80 CHANGED ++pRTsaospQlptLEptFp...p..spYsstpcRpcLAppL.....sLs-ppVplWFQNRRtKhK+ ........................................++.Rs.s.a..o.p.t.Q......l..t..t.L.E...c...tFp.................p...s...pY.......st....t..c...........R.t.clA.ppl........................sLo.c.....p..p..V...........p.........l......W...........F..Q.N.RRhK.++...................... 1 2391 3737 7103 +2984 PF00742 Homoserine_dh Homoserine dehydrogenase Bateman A, Griffiths-Jones SR anon Pfam-B_459 (release 2.1) Domain \N 22.90 22.90 22.90 23.30 22.50 22.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.90 0.71 -4.60 147 5558 2009-01-15 18:05:59 2003-04-07 12:59:11 14 32 4234 19 1325 3990 2377 185.10 37 33.91 CHANGED Pllcslcps.LsGsclpplpGIlNGTsNaILopMp....p..GhsFpcsLc-AQchGaAEs.DPstDlsGhDsApKlsILAphshGhpl..s.hs-lthcGIsslsht.......................clphA.pchGhsl+Ll..................uts..ch............ttth.pspVpPphlspscsLAp.lpGs.Nulhlps-hh......uslhlhG.GAGstsTAuuVluDll .............................................................PllcsLpph..huuDcI.....ppltG.IlNGTsNaIhschs..................pt...GhsFs-slppApp...hGauEs.DPpsDl....sGhDsA+Kl....sILAph..........h..G..h........p.l.......p....hs.-l..t..l.E..G.ltslshp................................clp.hA.cc.h...G..h.sl+hl........................uhhc........................tshpscVpsshlsps.+PLAs.Vpss.NA..........lhlpuchh......ssl.hh.....hG.GAGstsTAuuVhuDll.................................. 1 417 834 1118 +2985 PF00103 Hormone_1 hormone; Somatotropin hormone family Sonnhammer ELL anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.21 0.70 -4.99 17 1608 2012-10-02 01:28:15 2003-04-07 12:59:11 15 2 560 24 164 1561 2 156.40 32 94.64 CHANGED LlhSshLLsp.....suSsPsss..........LpcLFppAsphucphapLusphas-F-ppahptpt.......hhpts.......shCHTsolpsPps+-pspphstpcLL+hslhLL.SWppPLhhLss-hpsh.ts..stsllo+Ap-lpcp.cpL.pslcplhp+l...stpphp.h..tasphu...tscc-p+hhshYsLhhCh++Ds+Kl-hal+lLpCRhhhp....C .............................................................................................................tpLhs.sh.hsphla.Lu.tchhp-F.....-c..p..hhsppp...................h.p.............shChopol.s.PpsK.ccs.Qp.h.o......hc.LLchuhhLlpS.W.tPlphLs....t......ts..s......h.t..p...cl..tht.L.......cuh..h...th.....................................h.h..ths.............pptlht.at..Llt..Ch++D.cKh-saLplhpCR.......C....................................... 0 6 15 61 +2986 PF00123 Hormone_2 hormone2; Peptide hormone Sonnhammer ELL anon Prosite Family This family contains glucagon, GIP, secretin and VIP. 21.20 21.20 22.50 21.20 20.80 20.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.58 0.72 -6.64 0.72 -4.09 56 842 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 138 23 275 773 0 27.80 39 34.80 CHANGED HuDGoFTs-ho+hL-phuA+cFlphLhs .......HuDGhFTssYS+hLcphus+calphLl..... 0 10 29 91 +2987 PF00159 Hormone_3 hormone3; Pancreatic hormone peptide Sonnhammer ELL anon Prosite Family \N 22.40 22.40 23.10 22.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.54 0.72 -3.94 21 274 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 120 32 90 268 0 35.70 61 41.61 CHANGED hPscPc..tPG-sAssE-..LupYhssLpcYlNllTRpRY .........YPsKP-..sP.G-DAssE-..hA+YYuuLRHYINLlTRQRY. 0 8 14 32 +2988 PF00220 Hormone_4 hormone4; Neurohypophysial hormones, N-terminal Domain Finn RD anon Prosite Family C-terminal is in hormone5 17.00 17.00 17.00 17.10 16.90 16.90 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.55 0.74 -5.72 0.74 -3.80 8 197 2009-09-11 03:36:00 2003-04-07 12:59:11 12 2 104 0 51 205 0 9.00 83 7.31 CHANGED CYIpNCPhG CYIQNCPhG. 0 3 5 25 +2989 PF00184 Hormone_5 hormone5; Neurohypophysial hormones, C-terminal Domain Finn RD anon Prosite Family N-terminal Domain is in hormone5 25.00 25.00 25.00 31.40 21.80 24.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.37 0.72 -10.76 0.72 -3.44 36 207 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 109 29 60 232 0 77.00 63 55.90 CHANGED RpChsCGPGs+G+.....CFGPsICCG-pl......GCalGTsEoh+CtEENYL..soPCps.GG+sCGs.t....uG+CAAsGlCCssEoCshDssC ........RpChPCGPGs+GR.....CFGPsICCG-pL......GCalGTsE..shRCpEENYL..PSPCpu.......Gt+sCGS.............GGRCAA...sGlCCss.-.uCshDssC................ 0 6 8 30 +2990 PF00236 Hormone_6 hormone6; Glycoprotein hormone Finn RD anon Prosite Domain \N 25.00 25.00 25.60 25.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.49 0.72 -4.17 10 201 2012-10-02 16:54:34 2003-04-07 12:59:11 13 2 150 10 30 185 0 91.40 69 81.48 CHANGED aPcs-hss.GCsEC+L+ENphFS+.GAP.IYQChGCCFSRAYPTPLRSKKTMLVPKNITSEATCCVAKphhRVTVhsslKlENHT-CHCSTCYYHKS ...................aPss-h....GC.EC+L+ENphFS+...GuP...lYQCMGCCFSRAYPTPlRSKKTMLVPKNITSEATCCVAKphp..+...spV..h...s.....l+.lcNHT.......-CHCSTCYYHKs................. 0 1 3 11 +2991 PF04617 Hox9_act Hox9 activation region Kerrison ND anon DOMO:DM04544; Family This family constitutes the N termini of the paralogous homeobox proteins HoxA9, HoxB9, HoxC9 and HoxD9. The N terminal region is found to act as a transcription activation region. Btg1 and Btg2 - the B-cell translocation gene products - may function as cofactors for Hoxb9-mediated transcription. The Btg proteins modulate Hoxb9 transcriptional activity by recruiting a multiprotein Ccr4-like complex [1]. 20.60 20.60 25.40 22.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.56 0.71 -4.11 29 299 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 74 0 96 259 0 167.00 38 65.14 CHANGED MSoSGTloNYYVDSlIspEs..EDhh.usRF.ssushssss.......R.su.s.s-pu.....-asSCoFtsKssVFuuS.WSsl..ps.pssss.sulYH.....PYh.....t.Qstl..suuDu...RY..lRSW.L-Phsssl.shsGh..ssspp.Y...ulKPEsl.u+ts.-s.shcspshhh...s-atssusssspcp...tppttstspssspsp......pE-K....pplDP ..................MSsoGsloNYYVDSllsHEs..E-lh.u.......uRF.ssGs.h.stss.......R.su..l..s-.ps......-FsSCSFtPK..ssVFuu.S.WusV..ps.pssss.s.uVYH........PYs.......p.Qs.l....susD.u.RY..hRSW..LEPhsusl....uhs..............Gh...s......su.....+p.Y...u...lKP-......sh...upps.-s......ts.ps.sh.....s-Yhhsssss.cpp....st.ts....scs.stsct.....pc-K...thD.................................................................................................... 0 4 13 43 +2992 PF01856 HP_OMP Helicobacter outer membrane protein Bateman A anon Pfam-B_395 (release 4.2) Family This family seems confined to Helicobacter. It is predicted to be an outer membrane protein based on its pattern of alternating hydrophobic amino acids similar to porins [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.15 0.71 -4.41 33 2354 2012-10-03 17:14:36 2003-04-07 12:59:11 12 2 57 0 129 2284 1 164.40 30 37.09 CHANGED ulplGYKpFFs..p....p+hhGhRYYGFaDYsauthspppht.s...............................shhTYGsGsDlLaN..h.hspppt.......................................shGhFuGlplAGsoWhsstt.thth..............................................psphpsotF........QhLhshGlRhshs...............t+pulElGlKl.Phltppah........................sshshps..pa+R.auhYlsYsasF .........................................hplGYcpFFs.........p...........p..+..hhGhRYYG.Fa-Y..s.......auhht...pp.th..t...............................................................................shhoYGs.GhDh..LhN.....h..hssp.pt..................................................shG..lFuG....ltluGs....o.ahssptsphts..........................................................................t.phpsstF........QhhhshGlRhpht...........................tcpshElGlKI.Phl.pp.phh......................................sshsh.ph...p.a.cRhashYhsYsasF.................................................................................. 0 39 93 129 +2993 PF02521 HP_OMP_2 Putative outer membrane protein Bashton M, Bateman A anon Pfam-B_1230 (release 5.4) Domain This family consists of putative outer membrane proteins from Helicobacter pylori (campylobacter pylori). 25.00 25.00 29.60 28.90 20.30 16.40 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.48 0.70 -5.91 9 419 2012-10-03 17:14:36 2003-04-07 12:59:11 9 1 55 0 36 429 0 455.30 38 98.14 CHANGED hppphhhhlhhhhhsusLpAF-Y....+lsGts-SFSKlGFNpppINsp+GIYPTpoFsTlsutLpls.ssLLsKthpt..HsLcsslGGhlGulsYDSTKh..........................h.sp.........st.hhsulsa.alGhatGahssp......................s.hst..p.st............+sRsYll.NAaLcYsYKD...hFthKuGRY....puph.-ahSGasQGaEhhh.....phpph+hhWFSSaGRAhAaspWlh-aYus+s.h.pst..hN.............hGhHshslhYp.hKslplpPFhYasPthhsAPGhpIsaDoNPsFpuhGFRupTshhsha.............Plas..h....hsshp......hss.hGcsGtoLhl+QRF-aNpaNFGhuhYpsFGNANuhIGhaGNP.....................lGh-hWsNolYss...ulsshhsAsAhThasasGGsa..+pFhWplhGRhTpus........RAsEtSlulsLuYphsc.plpssl+LpYYsshh+pGYp.hGhhh.....sPh...................ssshpuhhpDRSalMoslsapF ...................................................................h.....hhhh.hhhh.t.u....LpAF-a.......clsGtspsFSKlGFNpp.....INspKGIYPTpoFsolsuhlpls.ssLLsKth.ps...HtLpsslGGhlGulsYDoTKh...................................................hhsp...........s...ht.u.hhapahGhatGahsst.............................................s.tss...p.phst............psRsYllhsAaLp.YsYKD.........hFthKuGRY....posh..sahSGasQGF-hhh..........php.ch+hhWFSSa.GRuhAhs.pWlhsa.Yushs..h...pt...t..hN.............aGhHshslhYp..pK......tlplpPFhYFuPpsatAPGhpIsaDoN.PsFp.uhGaRspTphhsha....................................Plahshh..........hsshh...............hsshhGpsGtoLhl+QRFc.a.NpapauhuhYpsaGNANu.lGhaGsP.....................hGh-hasNolYss...shsshhsAsuhThashsGGha.......++FhWtlhuRhTtus........RAsEtululsLuYphs+.plphsl+LpYYsshh+pGYp...sGhh..........sPh...................tsshtushpDRSalMoslshpF....................................................................................................................................... 0 9 26 36 +2994 PF03328 HpcH_HpaI HpcH/HpaI aldolase/citrate lyase family Mifsud W, Bateman A anon Pfam-B_3076 (release 6.5) & Pfam-B_2811 (release 14.0) Family This family includes 2,4-dihydroxyhept-2-ene-1,7-dioic acid aldolase and 4-hydroxy-2-oxovalerate aldolase. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.16 0.70 -5.46 22 6388 2012-10-10 15:06:27 2003-04-07 12:59:11 9 23 2567 40 1697 6456 3815 211.30 23 76.53 CHANGED phhhalsusssthhchsshhGhDhlhlDlEcus...........hhltpsLpplph.tss...pslVRVssh-oshhpp......hlchGspslhlPhV-oup-stph.phhphs.tthtt.........................tsspsplhstIESupGlhNscEIA..usc........tltulhlGstDhssshGttpssstsElh.....hApsplltAu+AuGl.hhsslhsshssspsahpputhhhslGhDs+.hhps .........................................................................................................................................................................................p.hhahsh.sss.h.hh...ph.h...t...h..h...u......h...D...hl...ll.DhE.c.us...................................................p..tl...t....p....h.l..p.......p.h...........t......h...........t....s......h.......................ps..l..VR.l.s..s..h......s...s.......h......h.....t.....p..................................hl....c...h.............G.....s....p...........s....l......h...lPt...l..co...s...pc.s...pph....st....th.p..h..................t..th..................................................................................................................................................s...tp..h.t.l...l..s.......I.E.o.s...p...G...l.......t......s.........h...s...p....IA.....us..p......................t.ls...ul...h...lG...s....s...Dh.....s..........t..........s.........h.....G......h....t.....t........s.....s.....p....t...s..-..l.....................................hAh....pp....l....h....t.A...u....+.A..s......G......l.....s..h...s..h.l..h.........s...........s.............p...s.......t...p.......t........h...h...p...t.u.t.p...h.h.t..lGhc.st.hhp......................................................................................... 1 422 997 1374 +2995 PF04982 HPP HPP family Bateman A anon Bateman A Family These proteins are integral membrane proteins with four transmembrane spanning helices. The most conserved region of the alignment is a motif HPP. The function of these proteins is uncertain but they may be transporters. 21.10 21.10 21.10 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.55 0.71 -4.27 127 972 2012-10-02 19:04:43 2003-04-07 12:59:11 8 10 814 0 435 951 937 121.40 34 44.35 CHANGED hllAshGASAVLlFusPsSPLAQPhslluGplloAllG....ls...shphh.............ss.sh..........hus...uLAVulAIshMhhs+slHPPuGAsALlslluus.......GatallhPV.hluullLlhlAllhNs.ls..t.R.pY..Pt ..........................hllAshGASAV.LlF.u.sss.S.P..LA.QPhs..llsGplluAllG....ls...shphh....................ss..sh...........hss....ulAluhuIshMhhh+slHPPuGAsALls..l..luss..................uat.a.l.l.h.PV..hluu.llllshA.llhss.l....t...+..pYP................................. 0 128 267 361 +2996 PF01288 HPPK 7,8-dihydro-6-hydroxymethylpterin-pyrophosphokinase (HPPK) Finn RD, Bateman A anon Prosite Domain \N 21.50 21.50 22.70 22.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.69 0.71 -4.40 115 4673 2009-01-15 18:05:59 2003-04-07 12:59:11 15 34 4045 60 1118 3316 2284 125.90 37 60.63 CHANGED aluLGSN....l.GcptpplppAlptLpph.thplhphSslYcotPh.G....hssQs..t.FlNsVlplcT.sLsPtpL.LphlppIE.pch.GRtR.pt....+a..u......PRslDlDlLha...ssh..............h...l.....ps.......s.p............LplPHPchtcRsFVLhPLs-l ......................................................aluLGSN...........lus.....h...pplppAlp..........tL.......s.p.............h....................t...........h.p.....l.....lthSslYcTtPh...G................hp.s.Qs......s.....FlNss..l..t..l.........c.........T.......s..........L.......s....Pt.p.......L.Lp.t.....hppIE.pp.h.GR.....hR...pt.......+W...G.................PR.T.LDLDIlha.....ssp.......................h.....l.........ps....................s.c............................................LplPHs.c.hppRsFVLhPLh-l............................................ 0 368 712 952 +2997 PF02603 Hpr_kinase_N Hpr_kinase; HPr Serine kinase N terminus Bashton M, Bateman A, Moxon SJ anon COGs Family This family represents the N-terminal region of Hpr Serine/threonine kinase PtsK. This kinase is the sensor in a multicomponent phospho-relay system in control of carbon catabolic repression in bacteria [1]. This kinase in unusual in that it recognises the tertiary structure of its target and is a member of a novel family unrelated to any previously described protein phosphorylating enzymes [1]. X-ray analysis of the full-length crystalline enzyme from Staphylococcus xylosus at a resolution of 1.95 A shows the enzyme to consist of two clearly separated domains that are assembled in a hexameric structure resembling a three-bladed propeller. The blades are formed by two N-terminal domains each, and the compact central hub assembles the C-terminal kinase domains [2]. 24.00 24.00 24.00 24.00 23.90 23.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.16 0.71 -4.45 84 1842 2012-10-03 03:17:47 2003-04-07 12:59:11 11 6 1787 8 328 1032 93 127.40 32 40.73 CHANGED tp......lplpcLlcc....h..pLclls..Gccslppp.IssuDIsRPGLpLsGaFsaassc...RlQl.lGpsEhoalp.phspc.p+tphhcchhs.hcsPslIlo+sL..psPp-llcsAcctslPlLpoph.sToclhuplopaLc ........................................tlplccLl-p.......l..pL-.lls...Gc.c.s.lpct..Is..s.u..Dl..s..R..P..G..LEhsGYFsaass-...RlQl.lGpsEhoahp......p.hssc..pRh...phhcchhp..-sPslIloRuL............psPcEllcu.Acc...pslPlLpoph..sTop.LhucLosYLc............ 0 115 218 275 +2998 PF01627 Hpt Hpt domain Bateman A anon Pfam-B_971 (release 4.1) Family The histidine-containing phosphotransfer (HPt) domain is a novel protein module with an active histidine residue that mediates phosphotransfer reactions in the two-component signaling systems. A multistep phosphorelay involving the HPt domain has been suggested for these signaling pathways. The crystal structure of the HPt domain of the anaerobic sensor kinase ArcB has been determined [1]. The domain consists of six alpha helices containing a four-helix bundle-folding. The pattern of sequence similarity of the HPt domains of ArcB and components in other signaling systems can be interpreted in light of the three-dimensional structure and supports the conclusion that the HPt domains have a common structural motif both in prokaryotes and eukaryotes. In S. cerevisiae ypd1p this domain has been shown to contain a binding surface for Ssk1p (response regulator receiver domain containing protein Pfam:PF00072) [2]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.91 0.72 -3.98 241 11727 2009-01-15 18:05:59 2003-04-07 12:59:11 18 665 2711 46 3296 9899 750 93.10 19 13.09 CHANGED pllphFhpc.stchlpplppsl...p...............ttshpplhctsHpLKGuuuhhG.hpplsphspplEphhcptp..ptp.............h.thhptlpphlpplpst ............................................................................hlphFhpp..st.ch....l.ppl.pp.tl....p...........................tpshp..s.lhctsHplKGu.uu.hl.......G.hp.p.ltpls....ppl.E..p.h.h...pptp...psp..............................t.l.pthstlpp.lt.....t..................................................................... 0 1088 2075 2778 +2999 PF01628 HrcA HrcA protein C terminal domain Bateman A anon Pfam-B_1133 (release 4.1) Family HrcA is found to negatively regulate the transcription of heat shock genes [1,2]. HrcA contains an amino terminal helix-turn-helix domain, however this corresponds to the carboxy terminal domain. 19.90 19.90 20.00 19.90 19.30 19.80 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.04 0.70 -4.77 165 2791 2009-09-11 06:48:32 2003-04-07 12:59:11 16 9 2743 3 636 1950 812 218.70 24 63.95 CHANGED l-cllppuuclLSpLTphsullhsPp.hppspl+plclVsls.sp.plLsllVscsGpVcspllpls.....................p..sl.sssc....LpphsshlNspLt......Ghslsclppplhp.t.lttthpp..htphhcphlpthtps.h..ttt.......tp..pclhlsGpsNllpts..Ea.ps..........hpcl+pllphl...Epppt..lhpLl..........tps.p.psp...s.....................lplpIGsEs..........hpshpssSllousYphs..s.....p...........................sl.GslullGPTRMsYs.+sl ..........................h-cllppuuplLSplTphsu.l.lhsPp...p.psplcplcll.Ls..sp...pslslllsssGpVcsphlpls....................p.sh..sppp....Lpch.sshlNp+lt......Ghs..l..t..-lpp.pltp...p....l...........s.p.h.h.pp..................ht..p..hhps.llshh...tph.h.t......p.........pplhluGpsNLlphs...ph..ps..........hp.plpplhphl...Ep.ppt..lhpLl...........pph..t...psp.......s............................lpltIGsEs........t...tsl..p..shS.llousYths...t...p.........................................................................hh.GslullGP.T.R.MsYsphl....................................................................... 0 229 443 550 +3000 PF04877 Hairpins HrpZ; HrpZ Kerrison ND anon Pfam-B_6141 (release 7.6) Family HrpZ from the plant pathogen Pseudomonas syringae binds to lipid bilayers and forms a cation-conducting pore in vivo. This pore-forming activity may allow nutrient release or delivery of virulence factors during bacterial colonisation of host plants [1]. The family of hairpinN proteins, Harpin, has been merged into this family. HrpN is a virulence determinant which elicits lesion formation in Arabidopsis and tobacco and triggers systemic resistance in Arabidopsis [2]. 20.50 20.50 31.60 20.50 19.80 20.10 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.01 0.70 -5.01 11 233 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 84 0 19 217 1 221.70 27 93.65 CHANGED MQh.............L...uhssu.LtoshtshsLstsps.s.u.uoSocpLppVIsQLAptLT...................tsGphssoSPLG.....................................KhLsKuhusD........................................................GctGGhhcsVpsALcpLIpEKLGDNFGAuus..............ss.GhtGh.tsuuuuuQpDLhspVLsGLuK....usLsDLLT.ppusGToF.ScDDMPMLccIAQFMDDsPupFspP............DuGSWupEL.K.EDNhLsGsETAQFRuALDlIGptLupptsttssh.....sGGLG.................................G....DsGp.h.....hGhpu..uGtGhGoss.s..p............phspLltGL..pGLtusLpssGtsGssLpsSAApsushllshhL...pNpusA .............................................................................................................................................................................................................................................................................................s..ss..psl.ssLptlhtptLup..ut.s....................................susss..pLhstl..h.ulup....p.Lssl.s.p.....p....sss.ppF.s.....ppDhshhpclupFMDp.Pt.FspP............DstS.....WhptL.c.-Dsh.....hs.tphttFppAh.shItpthu......................................................................................................................................................................................st...................................... 0 0 5 11 +3001 PF02218 HS1_rep Repeat in HS1/Cortactin Bateman A anon Pfam-B_5631 (Release 5.2) Repeat The function of this repeat is unknown. Seven copies are found in cortactin Swiss:Q14247 and four copies are found in HS1 Swiss:P14317. The repeats are always found amino terminal to an SH3 domain Pfam:PF00018. 21.00 21.00 21.50 22.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.57 0.72 -4.07 18 1059 2009-01-15 18:05:59 2003-04-07 12:59:11 10 23 81 0 511 857 0 34.90 59 36.89 CHANGED GFGGKFGVQpDRhDKSAVGaDYpuKl-KHsSQ+DYu+ .........GFGGKaGVQpDRhDKS...Al.Ga-YptchpKHpS.Qp...Dhs................. 0 125 164 293 +3002 PF00447 HSF_DNA-bind HSF-type DNA-binding Finn RD anon Prosite Family \N 21.00 21.00 21.00 21.00 20.90 20.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.76 0.72 -3.86 114 1989 2012-10-04 14:01:12 2003-04-07 12:59:11 12 26 337 17 1305 1973 19 99.60 36 21.34 CHANGED sFlpKlaphl-..........Ds............shsplIpWs..ps.G...soFllhs.spcFupplL..Pp...aFKHsNauSF........VRQLNhYGF+Klpsspt......................................................aEFpppt..F.p+Gpc.........cLLpp..I+R+pssp ........................................................FlpKh..a..p.hlp............................-s.....................p.h...s..pll.pWs...ps..G.............soFl...V.hc........p.pF.sc..........p...l.L.........P.c.............a.F......K............H......s....N....auSF...................lR.......QLNh....Y....G.F+Klstsptt.........................................................hEFtp.t...F.h+G.p...........pLLpp..I+R+t..s........................................................................................................ 1 519 792 1058 +3003 PF00011 HSP20 Hsp20/alpha crystallin family Sonnhammer ELL anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.33 0.72 -4.16 37 9881 2012-10-02 21:54:05 2003-04-07 12:59:11 16 34 4016 178 3585 8031 3065 97.10 23 59.55 CHANGED -h+cscstathphDlsGhp.t-.-lcVplc-sp..lllpupcc......Eccsschhh.tht.tpFhR+apLP-..ss-h-plpush.psGVLslolPKhtst.......ps+sIplp ..........................................................................................pscspat.l.p.h-l..s...G..h...p..p...........-..-.lc..l..p...l.....p..s......s..h........Ls....l..p...up+p..............................................tp.p...p..p......p...p...h..h..h..p...t...h..............t..p..F.p.....Rp..a.p.L.....s.t.................s.....s.....-..t...p..p..l.p.Ash...ps...G..l.Lplsl..s+ttsp........psppItl............................................ 0 1108 2207 2972 +3004 PF01430 HSP33 Hsp33 protein Bateman A anon [1] Family Hsp33 is a molecular chaperone, distinguished from all other known chaperones by its mode of functional regulation. Its activity is redox regulated. Hsp33 is a cytoplasmically localised protein with highly reactive cysteines that respond quickly to changes in the redox environment. Oxidising conditions like H2O2 cause disulfide bonds to form in Hsp33, a process that leads to the activation of its chaperone function [1]. 21.40 21.40 22.00 21.90 19.70 19.50 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.46 0.70 -5.28 181 3184 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 3110 8 630 2039 673 275.00 33 92.77 CHANGED pDtlh+.hhhpsts.....lRuhhlphs.pshpcshpp.Hs.......hsssssthLGcslsussLlu.usLK..hc.splolplps...sGPlshlls-s......sssuplRGhs.......t.spsph.s............sltthl...Gp..GhLsl.oh.D......uh....tp.YpGhVsL...tus......plu-slptYatpSEQlPotlhLush....t.........httAuGhllQhLPss.............................cc.-shsclpphhp.....shhsptLl..shsscpl.Lp+La..p-t........lclh-s....pslpFpCsCS..+ERhts.sLtslupcElpchlpEp.Gpl-lsCcFCsppYpF..st ....................................................................................................................Dhlh+hlhpsts.....lRuhhlpso.colppshp.p.Hs..............hs..sss..pssLGchLsAssLLu.As.LK...hc.sclTlp.lpG..........sGPlshllssu.........s.sptplRGhs........splsh...s.t.................sltshl......Gs...Ghl.sl....sh..D..........u........t-.YpGhVsL..us.........plu-slphYatpSEQlPoplhLssh......pps.........................htsAGGhllQlhPus.............................pc-shs+lpphhp........shho.p..Ll.......shs..s-pl.LhcLat..--p....................lpl.h-t.......................pslpFp..CsCS....+E....Rhts.ALh.oL.scc...Elpshl...pE.....-.tt..h-hpCcFCsscYpFs..................................................................................................................................................................... 0 197 392 522 +3005 PF00012 HSP70 Hsp70 protein Bateman A, Sonnhammer ELL anon Prosite Family Hsp70 chaperones help to fold many proteins. Hsp70 assisted folding involves repeated cycles of substrate binding and release. Hsp70 activity is ATP dependent. Hsp70 proteins are made up of two regions: the amino terminus is the ATPase domain and the carboxyl terminus is the substrate binding region. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 602 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.87 0.70 -6.61 33 23002 2012-10-02 23:34:14 2003-04-07 12:59:11 15 120 7604 156 6301 21235 9750 370.00 30 85.27 CHANGED llGIDLGTTNSCVAVh-uGcsclIsNsEGsRTTPSlVAFscs.ERLVGpsAKpQAlsNPcNTlausKRlIGRcas-..lpp-hp.hPa+lsps.sscshlpsp...Gc..paoPpcISAhlLpKhK-s.AEuYLGcs..VscAVITVPAYFNDuQRQATKDAGpIAGLpVLRIINEPTAAALAYGLDKps.......-cpIlVaDLGGGTFDVSlL-ls..cG..VFEVhuTsGDTHLGG-DFDp+llcallpEFK+c.pGl.DLppDphALQRL+-AAEKAKh-LSS.tpTplsLPalohsstG..shclshsloRA+FEcLssDLlcRThpPscpAL+DAtls.ts-ID-VlLVGGSTRlPtVQchVcchFG.K-PsKuVNPDEAVAhGAAlQuGVLoG......-V+DlLLLDVoPLSLGIEThGGV.hT+LIpRNTTIPTKKS.QlFSTAsDNQs.uVpIpVaQGERphspDN....+hLGpFcLsGIPPAPRG.lPQIEV..TFDIDANGIlsVoAcDK.uTGKcppIoIpssuG.Loc-EI-+..................................MVc-AEpaApcDcpp+EplEs+NpuEshsass-pplc-h.....t-KlssscK....pplcsslptLcpsh....ttt-......h-chcschccLpphs.tlspphYp ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.l........s....h..l....h......t.t............s......p...t.................h............t......t...................l.........t...............t.........s..........V.........l.........sh.P.......s.............a........s.......s........t.......p.......+.....p....t........h......h.......p.........A.......u.......t.....h.......u..........G.........h............p......s....................p.........l........l........p...E.....P....s.........A..A............u...h...........s.....a......t....hp...p.t................................t.t...l...h..l..hDh.................G..G...G...Th.D.....h...o.....l...l...........p.....h...............t...............t.......s........................h.......p.........l....h......s.......s.....s.........t...s......t.....................l..G.......G.............p..........s.h....D..................h...h.....h.p..............h.h....h......t........t...........h............................................................................................................t..............................................p...........................................t......p..........l....h......p.............t..s......c.....t...s...K..............h..................t...........L.....S....s..............................t........................t......h.....................h..................................h.............................................................................................................................h.......................t.................l...s..............c..............t..........p................h............p.....t...h.......................................h.....................h...............t.....t..............................h.............h.......p............s.......l......t.....p..........u............t...h.....p...............t..............t.........l..................t.......t...l....hh.s.GG...s.phPh....l....p..l..t.......t........h.................t.......................................................t.......................p....s.............t....s...h..s...G...s..s........h...............s..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 2234 3809 5213 +3006 PF00183 HSP90 Hsp90 protein Finn RD anon Prosite Family \N 24.40 24.40 24.50 24.50 24.10 24.30 hmmbuild -o /dev/null HMM SEED 531 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.95 0.70 -6.11 11 5703 2009-01-15 18:05:59 2003-04-07 12:59:11 13 30 3641 350 1566 4977 1328 358.50 32 66.61 CHANGED EYLEEc+lKElVKKHSEFIuYPIpLhVpKEhEKEVs--Et-ppc-Etc-c-..............t.chEEs--EcEc-pKK.KTKKVKEsspEhE.LNKpKPIWpRsPc-lTpEEYuuFYKSLoNDWED...LAVKHFSVEGQLEF+AlLFVP+RAPFDLFEsp..KKKNNIKLYVRRVFIhD-s--LIPEaLuFlKGVVDSEDLPLNlSREhLQQNKILKVIRKNLVKKsLEhFpElA......................EcKEsacKFYcpFSKNlKLGIHEDusNRs+LAcLLRapSo+SsD-hsSLcDYVoRMK-pQKsIYYITGESKcpVEsSPFLEpL+cKsaEVlahTDPlDEYshQQLKEFEGKKLsslTKEGLcL-EsE-EKcpcEchKccaEsLsKhhKc.lLsDKVEKVVVSsRLssSPCslVTSpaGWSANMERIMKAQALR...DoShsuYMuuKKshEINP+HPIIcEL+c+.scsDpsDKoVKDLshLLaETALLoSGFsL--PpsaAsRIaRhl+LGLsIDED-ps..p-..spsthpstssscpssssScMEEVD ...........................................................................................................................................................................................................................................................................................................................p.......c......p.............lNptpsl......Wt.Rs.p-..lopE..-Y.tpFY.................+p.l.s.p..-..a.....p-........L.h.hh.H..p..sE...G..p..hcapulLalPp.p.s.P.a..D..h...a.p...p............t..p..p.s....lK.L...Y..V......p.....RV.FIhDs.s..c.p.hhPpa.LpFl+.GllDSpDLPLNlSREhL.Q.......p......s......p......hl.c......hl+ps....ls.K..+s.l.p.hh...p...c.l..s..............................................................................c..c.t..-.p.....Y.p..pFa.ppFuh...........slK...G..h...h...p....D........t.....s.....+...p.....p.ltc.LLp..at.......o.................o.................p.................p..............t.......t..........p..................h..............s..........o........Lp..-.......Y..l...................p..R.....................M...........p..........................-............s..Q.....p......p............IY..Y.l.s..........u..p.sh......tt...............h.ppSPalEhh+p+..G..hE..Vlhhs-...l.D.Eahh.p...l...p..-.a.........-...........s....K.......t.....h...s......l.......s........c........t....t.......h....p......l.......t......p........t.......-........c.......p.....p......p........t...................c.......p......................p..........t...........t............h...........p..........s.l.h....p.h.h...Kp....h...L..t..c...c.........V.p...c....V........h..l........o...p.R..L.........s..s.....o...P..shl.s.s..s..t....t..h.sh.p.Mt+.hht..t...............................................h................p.....hhElNsp...Hsl..l.p.t...........h...............................................................t.......t..........t............h...........t...p......h....s..l....lhp.u.hl...p.t.............t..p.s..t.....a.tth.p.h................................................................................................................................................................................................................................................................................................................................ 0 594 973 1277 +3007 PF04119 HSP9_HSP12 Heat shock protein 9/12 Wood V, Finn RD anon Pfam-B_14318 (release 7.3); Family These heat shock proteins (Hsp9 and Hsp12) are strongly expressed, an increase of 100 fold, upon entry into stationary phase in yeast [1,2]. 26.30 26.30 26.30 26.70 26.10 26.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.95 0.72 -3.89 24 130 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 97 2 87 129 0 58.00 47 59.56 CHANGED MSDsGRKsh..............o-KspEtlTPDSpKSThEKsKEsVTstsD+hAussps-spKShsQpssDp ....MSDsGRKsF..............osKApEplpPDSpKSshE+sKEplTDssD+lAushQP-spKSssQpstDp....... 0 23 49 74 +3008 PF04213 HtaA Htaa; Htaa Yeats C anon Yeats C Family This domain is found in HtaA, a secreted protein implicated in iron acquisition and transport [1]. 21.80 21.80 21.80 21.80 21.40 21.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.04 0.71 -4.01 25 599 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 181 0 91 421 21 153.20 31 45.43 CHANGED sslssGshs..WGlKpSFpsYlpGs.lApGsaphss...Gus...tssspFsF.shssushD...ssspssslpasGsV+as..GHp.............hhLDlslussclshsGssGpLhs-lsSpphp.sstsshut......lslAslshsuhsh...ssssshss.ssstLTtpGupsFusaYsAGpsLDPlohshsh ...................................t..ltpGohsWGl+pSFpsYlpus......s.....pG.p..hphsu................Gus........hssstFsF..shs...su.sh..D....ssps.sslpasG.s.V+as.Ghp....................shLDlolu-Ppls.lp..s.u..s..uplhhs.lp..Sps.hs...u.p.hshuc..................lshushssusht........ssssshsu.ssspLTupGscs.F..u....G.....FY..psG.p..s..h..Dslohslt.h............................................. 0 28 66 91 +3009 PF00126 HTH_1 Bacterial regulatory helix-turn-helix protein, lysR family Sonnhammer ELL anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.67 0.72 -4.23 1536 86700 2012-10-04 14:01:12 2003-04-07 12:59:11 22 49 4604 63 20714 62728 5685 59.70 33 19.98 CHANGED lppLchFhslscps.ShopAAc.pLsl...opsulSpplppLEp.pLGsp....Lhp..R....tt...p...t..l.pLTttGp ........................ppLchFhs.l.sc.pt....ohopA...A..c...pL..tl..........op.s...ulSpplpp.L....Ep....pL....G.s..p........Lhp......R...............ss...+....p......l.pLTptGp........................ 0 3926 9597 15357 +3010 PF04967 HTH_10 HTH DNA binding domain Bateman A anon COG3413 Domain \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.14 0.72 -4.38 30 808 2012-10-04 14:01:12 2003-04-07 12:59:11 7 73 81 0 433 919 135 52.70 37 16.19 CHANGED LT-+phplLptAachGYF-hPRcsslp-LAcplGIScsohpp+LR+AppKlhp ................LTc+QpcsLptAhctGYa-hP.Rc.sohp-lAcpLslSpsohsp+LR+App+ll.t....... 0 47 296 420 +3011 PF01022 HTH_5 HTH_ArsR_family; Bacterial regulatory protein, arsR family Bateman A anon Pfam-B_139 (release 3.0) Domain Members of this family contains a DNA binding 'helix-turn-helix' motif. This family includes other proteins which are not included in the Prosite definition. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -7.91 0.72 -4.29 42 9132 2012-10-04 14:01:12 2003-04-07 12:59:11 15 65 3608 33 2609 16970 1782 47.00 32 34.44 CHANGED cssRlcIlhhLtc...sp.hsls-lsptlphopuslScHLchLcctGlVpp ..........................................c.sRlp.I.l.t.hLtp.........s-...h...s....V.......s.......-.....L.....s......p......t......l......s......h......o.....p.....s..s.l.SpH.Lph.L.+psuLlp............... 0 903 1744 2237 +3012 PF01418 HTH_6 Helix-turn-helix domain, rpiR family Bateman A anon Pfam-B_3373 (release 2.1) Domain This domain contains a helix-turn-helix motif [1]. The best characterised member of this family is Swiss:P39266. RpiR is a regulator of the expression of rpiB gene. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.14 0.72 -4.25 10 9722 2012-10-04 14:01:12 2003-04-07 12:59:11 12 9 2886 5 1519 5638 383 75.30 25 26.84 CHANGED hslLppIpsthpcLscoE+KlA-aILssscpshphSlsplAptuuVS-uollRFs+pLGapGFs-hKlpLup-lusp ................................h..plpp..t..h.p.pL..o..ps.E..cc.....lA.c....a.ll.p.ss.p.p...s....h.pho..lp...cLApps.s....l.S..puolsR.Fs+.+.l..Gap.Gap-....h.K..htLtpth...t..................... 1 359 791 1163 +3013 PF02796 HTH_7 Helix-turn-helix domain of resolvase Finn RD, Griffiths-Jones SR anon Prosite Domain \N 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.01 0.72 -4.10 14 3816 2012-10-04 14:01:12 2003-04-07 12:59:11 10 39 1793 24 542 3351 302 44.90 28 23.91 CHANGED GR.pphscp..p......hsplhpLhppG.hshpplActhsloRsTlYRhhsst ...........................GR......p..hspp.....p...........hpp.lh...c.h..h....p....s...G...h.....s..hp..plAcphsl.oR..s.TlYRhhtt.t........ 0 153 335 445 +3015 PF00165 HTH_AraC HTH_2; Bacterial regulatory helix-turn-helix proteins, AraC family Sonnhammer ELL, Griffiths-Jones SR, Studholme DJ, Schleif R anon Prosite Domain In the absence of arabinose, the N-terminal arm of AraC binds to the DNA binding domain (Pfam:PF00165) and helps to hold the two DNA binding domains in a relative orientation that favours DNA looping. In the presence of arabinose, the arms bind over the arabinose on the dimerisation domain, thus freeing the DNA-binding domains. The freed DNA-binding domains are then able to assume a conformation suitable for binding to the adjacent DNA sites that are utilised when AraC activates transcription, and hence AraC ceases looping the DNA when arabinose is added [1-2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.91 0.72 -4.00 75 482 2012-10-04 14:01:12 2003-04-07 12:59:11 18 29 353 4 136 54828 2557 37.30 26 22.38 CHANGED lp...psp.hsltclApphuh.Stshhp+hF+pthGhoPppahp ...............................plpplAppsGh..S..t.pa.F....p+hF.Kchh.GhoPtpah.................. 0 48 84 114 +3016 PF04204 HTS Homoserine O-succinyltransferase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.84 0.70 -5.69 77 2128 2012-10-03 00:28:14 2003-04-07 12:59:11 11 3 2054 3 324 1318 572 290.40 50 95.62 CHANGED PI+IPcsLPAhclLppENIFVMs-sRAtpQDIRPL+IhILNLMPpK..lpTEsQlLRLLuNT..PLQl-lsLl+hsoHpuKNTst-HLppFYcsF--l..+sc+FDGhIITGAPlEplsFE-VsYWcELpcIh-WocspVpSTLalCWGAQAuLYaaYGlsKh.LspKhaGVFpHp..shp.ps.LhRGFDDpFhsPHSRaT-lpcc-l...pptssLplLucS--sGshlltscct.RplFl..hGHsEYDspTLppEYpRDhtpGh.s.....hplPtNY...........................aPsDDPsppPhspWRSHApLLFuNWLNYhVYQpTPY .....................................................PI+lsccLPAhphLcpENlFV.MsppR.Atp....Q....-IRPL+llILNLMPpK..lpTEsQhLRL..LuNo....PLQV-lphL+hco+pS+NTssEHLpsFYpsF.....--l..ccppFD.....G.hIlTGA.PlEtlp.FE...-VsYWp.Elpplh-WuKsH..Vp..S..TLalCWuAQAuLhhhYG.l...sKhshscKl.GVYp...H..c...h..l..c..s...p...s...h..L......h......RGF.D....D......s..Fhu.P...HSR.as-hst.ppI...pp.hs-.L.cILuco-.-.u.GshLhso+Dt..RplFl..hGHsEYDspTLspE....YhRDlptGl..s......sc..l..PhNY............................FspsDP...pp.pP.pssWRSHusLLFsNWLNYhVYQ.TPY................................................... 0 87 185 247 +3018 PF04955 HupE_UreJ HupE / UreJ protein Bateman A, Eberhardt R anon COG2370 Family This family of proteins are hydrogenase / urease accessory proteins. The alignment contains many conserved histidines that are likely to be involved in nickel binding. The members usually have five membrane-spanning regions. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.75 0.71 -11.52 0.71 -4.98 80 494 2012-10-02 18:22:22 2003-04-07 12:59:11 7 4 398 0 191 653 600 177.30 34 89.46 CHANGED lsh..h...hhsssAhAHsupst...su...GhhuGhsHPlhGhDHLLAMlAVGlWu.uhhus.....+uh.........ahl....PhuFlusMll.GusLulsGls.lPhVEssIusSVlllG..LllA......hst+lPh....ssuhullulFAlFHGaAHGsElsususshh......YshGFslATuhLHhsGlulGhhltpttt........lhRhsGuslAssG.hhL ...................................h..lhhhsssAhAHsuts.........ts.......Ghhu....GhhH..Pl.hGhDHLLA..MlAVGlhu..uthtt.....ptt................hhl....Ph..s..F....l...s...s...M..ll...G...u...h...l...G...h...t...G...l...s...l.....P.....h...sEssI.u.sSl.l.llG....l.h.lu...................hs..h....c..h.sh.................hh.u...h....sls.....ul.FA..lFHGaAHG...s...E....h.....ss....susshh............YssGFshuou.h.L.as.s.Ghu.l.uhhl..tphts.................hhRhsGu.shAhhGhh................................................................. 1 43 101 146 +3019 PF01455 HupF_HypC HupF/HypC family Bateman A anon Prodom_3112 (release 99.1) Family \N 19.60 19.60 20.50 19.90 19.50 19.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.26 0.72 -3.98 170 2243 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 1433 6 492 1128 72 69.50 44 76.37 CHANGED MCLulPucll....pl........p..sphAh...l........-hs.G..lp+clslsLls.-......splGDa.VLlHlG..aAl.pclDccEApco.Lphhp ............................MClGlPupll....sl.........-..tphAp.....V........-.ls..G....lpR..-VslsLlsp.......stlGpW.VLVHVG..FAh.uhIDEpEAccTLcsL........... 1 163 329 421 +3020 PF04809 HupH_C HupH hydrogenase expression protein, C-terminal conserved region Mifsud W anon Pfam-B_3701 (release 7.6) Family This family represents a C-terminal conserved region found in these bacterial proteins necessary for hydrogenase synthesis. Their precise function is unknown [1]. 21.20 21.20 29.80 21.30 20.00 21.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.44 0.71 -4.35 36 1273 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 541 2 95 481 8 110.40 37 72.84 CHANGED GlhNAPslLsEIt-+sts...htsustsHVINLTLLPhostDhsaLsphLGpGslsl.LSRGYGsCRlsuTuhhpVWpVpaaNShDslIL-TlElsslPcVAhAAsEDlpDStcRLpEllc.slp ...........................................hsLhpELhtplpp...........ss...scslsls.LPlstsD...RtFLspLLGcGpls..l...pp........t..s..hu....ESc...IppThhsGlW+VRphss.h..cp.hLhD.plEluslP.p.sh........A.u.tD.................h................... 0 30 56 72 +3022 PF01750 HycI Hydrogenase maturation protease Bashton M, Bateman A anon Pfam-B_548 (release 4.2) Domain The family consists of hydrogenase maturation proteases. In E. coli HypI the hydrogenase maturation protease is involved in processing of HypE the large subunit of hydrogenases 3, by cleavage of its C-terminal [1]. 20.60 20.60 20.80 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.51 0.71 -4.60 27 2632 2012-10-01 20:58:29 2003-04-07 12:59:11 13 8 1344 10 545 1351 63 127.40 28 73.29 CHANGED l+llEtLpctht..t......pVpllDuGTtuh.Ll.hlpp....hc+llllDAlch.GhcPGplchlcs--lsp..hthc.hShHphshtpsLphhp.htph.tclllhthpshph--...hshsLoptVcpslspAlchllpt ........................................chsctL..ttpht.st..........tlcllDGGTtu.h.p.L.l.s.hlpp......sc+LlIlDAl....ch..G...h..sP.Gsl.......hh.......h.c.......s....-..........c..l.....ss.....hh.s....t......t......h.......S.....s...HphshsclLs..hh..c..h..p.....G..p.h..P.tcl..hl.lGlp..P.p............l..c............thsL.....ot.lcst.ltsshpth...t........................... 0 173 357 462 +3023 PF01968 Hydantoinase_A Hydantoinase; Hydantoinase/oxoprolinase Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the enzymes hydantoinase and oxoprolinase EC:3.5.2.9. Both reactions involve the hydrolysis of 5-membered rings via hydrolysis of their internal imide bonds [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.70 0.70 -5.31 61 2437 2012-10-02 23:34:14 2003-04-07 12:59:11 13 36 1379 6 1233 2598 1387 281.50 29 34.74 CHANGED RssTsllNAhLtPlhppalptlpstlcptsht........................................splhhhpusGGlhshcpupppPlcslhSGPAuGllGAuh.....hsGhc........shIshDhGGToTDluhltpGpschsppstl............uGhhsph.hlclpolusGGGSll.hhtpstth+.lGPpSuGspPus...............................................................hshshGGp.hTlTDAslhLGcls.p.hh.....tt......phs.thh.t.hut.h............................................pplApthlplsstphscul+hl.................................................................................s.ppshsspshslsshGGuu.shasstlA..ctluhppllh..hs.uVhsAlGhhlAclptch .............................................................................................................................................................RssTsllsAhltPhhppa...lpph....p...p...t...lp...p....t..t.h.t..............................................sp.lh..hhp.us.GG.lhshpp....hp.t.h..cslhS.GPA....u..Gl.h.Gushh...............thsu...h.p.........................................ss.lshDhGG.......ToTDl.u.h..l....s.......tG......p............c....h.s...ps.th..............uGhthp..hsh.lcl.polusGGGSll........h.......p...........s......u.....t........hp..VGPcSAGupPGP..................................................................................................................ssat.....t...G.G.............h.Tl..T..D.As......l...h.L..G.t.ltsp....hs.........ttt.....ls.ths.t..t....th....tt.lut...lsht........................................spps..At.uhlcluspphspsl+tl...............................................................................................................s.t.pG..hcs.p...p..hs.Lh.sh.G.GuG.....s.ass.tlA......ctlG.....hp.....p.l.....ll..hs....ul.huAhGhulAclt...h...................................................................................................................................................... 0 388 797 1050 +3024 PF02538 Hydantoinase_B Hydantoinase B/oxoprolinase Mian N, Bateman A anon COGs Family This family includes N-methylhydaintoinase B which converts hydantoin to N-carbamyl-amino acids, and 5-oxoprolinase (Swiss:P97608) EC:3.5.2.9 which catalyses the formation of L-glutamate from 5-oxo-L-proline. These enzymes are part of the oxoprolinase family and are related to Pfam:PF01968. 21.40 21.40 21.90 22.50 21.00 21.30 hmmbuild -o /dev/null HMM SEED 527 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.60 0.70 -6.28 6 1619 2009-01-15 18:05:59 2003-04-07 12:59:11 9 25 1011 0 830 1658 1539 505.30 33 58.32 CHANGED hDPIhLE.lFpshatsIAEpMGhhLcpTAhSsNIKERLDFSCAlFDusGsLVAsAsHIPVHLGSMupuVKthl+.thcs..plcsGDVhlsNDPYhGGTHLPDITllsPlFpsst..llFaVAuRuHHADlGGlTPGSMPssSpplapEGhlIss.phLscsGchp-phhc.plhlcss..a.....sRsPcsslu.DL+AQIAANpcGIcpltchI-caGhcsVttahs+lps.uEcul++tIsclss..Gphca.......chDsstpIslplplchcptphhlDFoGTSsQscsshNAspulssuulhYsh+sLlssDIPlNcGCh+PLpIllPcGollsP..cpPAuVsuGNVETS......QtIsDslauAhs......V.AsSpGoMNNlTaGsts............asYYETIuGGoGAu......sGhsGssAVaopMTNohhTDhEllEpcYPlLlpEaulRpsSGGtGKaRGGsG..lVRphcFhpshololLusRR+luPaGltGGpsGusGcN.hlh+ssGsclclsupsolalp....PGDR..............................llIcTPGGGGYG....hp....hcD ....................................DPlhLp.lhp.pthhuIAEpM.uhsLp+TuhSssI+EchDaSsAla...s.s......c.G...p...l..l.A.p.A..s.p.l..PlHl.G.........o.M...s...t...sl.c..t.hlc...th.t.s..........slc.sGDVhlsNcPa..t...G...G.....o....HLPDlolls..PlF.............t.........p.........s....p..............l...............l...........h....alA.u+uHauD.................lGGh..sPG.Shs.spuppla.pEGlh.lss.h+Ls...c....pG...p.h..p..-p.hlp.......hl....hpss...........................R....sspp.shu.DLpAQ.lAA..s..pp....Gh..ppltcllpcaGh-.sV.psh......hptl............pcpuEpssR.ph.l....p..p..l....s....c...............G.p....h....p.h...s.....................s.th.D..s..s.....s.....lp..l...pl..s..l......s.......h......p.....s.............s....p...........hhhDFsGT...us.p.s...s..shN.....u....s......h......u......s.....s..h......u....u.....l...h......a.....s.....h..+......sll.....s........s......-......lP..h.....N.p.............G.sh.pPl.plhl.P..c..Gol.....L...s.P........phPA...A..V..su...u.....s....s...s..S........p...t..ls..cslh.tAhs....................................s..A.u..u...p.....G..s...h.......N....s.....h...s...h.G..stp.............................a..t.h....a...E..o.....l...u...G.G...s....GAs..............sshcGt..s...u...l...p..s...p..h..o.....N.o...p...h..T.s.sElhEt.caP.ll..l.cp.ap.lR.t.s..S.G.GsGcaRGGs..G....sh.R....p..l....c..h..h....p..s....h....p..h..s.l..l..u....p..R...p..h..h..s.....P..a..GltG.....Gts....G.......t.......h.......Gts....h.....l..h.....c.......t..............s..................s..........p.............h.......................p....l..........s...u..p...s..s...h...t..l.p........................s.GDh..............................................l.hl.T.P.GGGGaG.....t.........t.................................................................................. 0 241 519 705 +3025 PF01185 Hydrophobin Fungal hydrophobin Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 21.50 21.40 21.20 21.30 hmmbuild --amino -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -11.33 0.72 -3.53 69 519 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 203 0 178 496 0 81.40 33 62.51 CHANGED C.ss..uslpCCNpspsuss.........hhsslLu.LLGhsht........shsuhlGh.sCoPlsl...l.ul.uss.......u.Cssps..VCCpss...shs.GLls.lG...Cs...Pls .......................................C..ssuplpCCNpsppuss........s.......hsshLs.L.l.G.h...hs........shpu.h.lGh..pCoslsl......l.sh.uhs....................spC...ppp...sCCpss.....s.s...G.Lls.lG....C.sl............................. 0 98 127 156 +3026 PF01155 HypA Hydrogenase expression/synthesis hypA family Finn RD, Bateman A, Yeats C anon Prosite Family Four conserved cysteines lie either side of the least conserved region. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.63 0.71 -4.39 169 2158 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 1453 7 454 1106 89 112.50 38 96.54 CHANGED MHEhSlspullchlpc.pApppt.....sp.+VppVpLclGpLosVps-sLcFsF-shscsT.................................lscsAcLpIpphsspshChsCupphplpphh...................................CPpCGuhpl.pl.hsGcEhplcplEl- .........................MHEloLspshlEllppp.Appp.s.......sp..+VssVaLcIGshSsVEssulpFsF-.ls....s.c..G.T.................................lAc.Gs.cLclp.tsApsaChsCsphspltppp....tp................................CPpCpu.p.p.l.pl.ssG.-pLpl+pIEl........................ 0 154 306 388 +3028 PF01924 HypD Hydrogenase formation hypA family Enright A, Ouzounis C, Bateman A anon Enright A Family HypD is involved in hydrogenase formation. It contains many possible metal binding residues, which may bind to nickel. Transposon Tn5 insertions into hypD resulted in R. leguminosarum mutants that lacked any hydrogenase activity in symbiosis with peas [1]. 25.00 25.00 27.80 27.80 22.60 22.30 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.09 0.70 -5.71 129 1488 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1407 2 399 1039 96 349.00 49 96.11 CHANGED Dsph..spt.LhcpI.pphsp........c.shplMEVCGGHTHoIh+aGlcpLLP....pslchlHGPGCPVCVhPhtcIDpAlpLA.....tpssVIlsTaGDMhRVPGSpt...............................S.LhpA+A....cGA.DVRhVYSPhDAlclAccNP-+pVVFFAlGFETTsPsoAhslhpAt...........ttslcNFslhssHlllPPAlcslLp..ss.......p.splcGFluPGHVSslhGscsYc.lsccaphPlVVuGFEPlDlLpulhhllcQlppG.....cscl-N.pYsRsVpt-GNhtApphlscVF-lp-p.cWRGlGhIPpSGLpL+ccaupaDAcp+Fs.....ls.....thpss-spu...CpCGclL+Ght+Pp-CtlFGpsCTPpsPlGuCMVSoEGACAAYYpYs..ph ......................p....shpLhcclccpss...s.....+PlpIMEVCGGHTHuI.hKaGLcpLLP....c.s.l-hIHGPGCPVCVhPhuRIDsslclA.....pps-VIhsTFGDhhRVP.Gppu..............................................SLlpA+A......cGA.DVRlVYSPhDALclApcNPs+...cVVFFulGFETThPsTAlsLppAc...........tcslpNFhhaspHlpl.PslculLc....ps..........s..stIDuFLuPGHVShllGscsYphlAp.............ca....pp...........PlVVuGFEPLDlLpullMLlcQhhtu......cscVEN.QYpRlVscpGNhhAQphls-VFpl.sss.EWRGLGhItsSGlcLps-YppFDAEt+Fc......ss...............sp.p.ss-.s.tCcCG-VLpGhsKPppCsLFGpsCsPpsPhGuhMVSSEG.ACAAaYpYppt.................................... 0 137 272 345 +3029 PF02494 HYR HYR domain Bateman A anon [1] Domain This domain is known as the HYR (Hyalin Repeat) domain, after the protein hyalin that is composed exclusively of this repeat. This domain probably corresponds to a new superfamily in the immunoglobulin fold. The function of this domain is uncertain it may be involved in cell adhesion [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.74 0.72 -3.97 19 1816 2012-10-03 16:25:20 2003-04-07 12:59:11 11 329 171 0 1519 1518 549 80.40 30 16.53 CHANGED VDTsPPsV.sCsss.lhpsVElGsssspVhasEPTAsDsuG..pssLloRospPGD.FPVG.posVTYsapDsuGNpA.sCsFTlsV .........................................DspsPsl..sC.....P......ss...hphs..s......s........u.....p...s...s.....s.....s....V....s...a...s...t...P.....s....A...o.DNss..............s.s.s..l...s....s.........s....t..s....s.........G...s...........h..Fs.....l....G....s......s.....s...V..s..You.s.D.s.uG.Nsu..sCs..FpVsV.................. 0 1239 1292 1437 +3031 PF01608 I_LWEQ I/LWEQ domain McCann R, Bateman A anon [1] Family I/LWEQ domains bind to actin. It has been shown that the I/LWEQ domains from mouse talin Swiss:P26039 and yeast Sla2p Swiss:P33338 interact with F-actin [1]. I/LWEQ domains can be placed into four major groups based on sequence similarity: (1) Metazoan talin; (2) Dictyostelium TalA/TalB Swiss:P54633 and SLA110; (3) metazoan Hip1p Swiss:O00291; and (4) yeast Sla2p Swiss:P33338. The domain has four conserved blocks, the name of the domain is derived from the initial conserved amino acid of each of the four blocks [1]. 36.30 36.30 36.70 36.30 35.80 36.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.93 0.71 -4.27 22 586 2009-01-15 18:05:59 2003-04-07 12:59:11 12 37 239 11 325 542 0 147.60 43 10.97 CHANGED FY+KNsRWTEGLISAAKAVAtATshLlcsAssllp....ups....o.E.pLIVASpEVAASTAQLVAASRVKAshsSpspppLppAS+sVopAscsLVppspshhpptcp.....pcslDhspLo.tphKstEMEQQVcIL+LEppLptAR++LuplRKpp.Ypps ............................Ytcsu..pWoEGLISAAKuVAtusssLl...EuAss.llp....Gcu..............s.E.cLIsuu+pVAASTAQLVsAo+....VKA..sh.sS.c..s.h...c+Lp...............t............A...........u+uVs.........pAscsLVp.uspsutpp..ppc.................p-s..hc..hs.p...hshhph+spEM-sQs.......c....lLclEppLpptRp+LuplR+p.p.Yp..h............................ 0 97 141 228 +3032 PF04568 IATP Mitochondrial ATPase inhibitor, IATP Kerrison ND anon DOMO:DM04419; Family ATP synthase inhibitor prevents the enzyme from switching to ATP hydrolysis during collapse of the electrochemical gradient, for example during oxygen deprivation [1] ATP synthase inhibitor forms a one to one complex with the F1 ATPase, possibly by binding at the alpha-beta interface. It is thought to inhibit ATP synthesis by preventing the release of ATP [2]. The minimum inhibitory region for bovine inhibitor (Swiss:P01096) is from residues 39 to 72 [2]. The inhibitor has two oligomeric states, dimer (the active state) and tetramer. At low pH , the inhibitor forms a dimer via antiparallel coiled coil interactions between the C terminal regions of two monomers.\ At high pH, the inhibitor forms tetramers and higher oligomers by coiled coil interactions involving the N terminus and inhibitory region, thus preventing the inhibitory activity [1]. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.46 0.72 -3.98 34 329 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 226 8 221 316 0 83.00 28 76.51 CHANGED M......hphsh.........hp.....h.h.h.hthth.t.stGss..GAs.+uGG......tuGsuFsKREtApE-hYh+p+E+EQLtpLKc+lpc....pccclccL-ccIc ......................................................................h.....t..Gph.......Gus.+....ssG...................t.uG......s.uFuK+EtAp.....E-hYh+p+-+E.pLpp.L...Kc+hcc.......pppplcphpptlp............................... 0 69 114 179 +3033 PF01749 IBB Importin beta binding domain Bashton M, Bateman A anon Pfam-B_544 (release 4.2) Family This family consists of the importin alpha (karyopherin alpha), importin beta (karyopherin beta) binding domain. The domain mediates formation of the importin alpha beta complex; required for classical NLS import of proteins into the nucleus, through the nuclear pore complex and across the nuclear envelope. Also in the alignment is the NLS of importin alpha which overlaps with the IBB domain [4]. 23.80 23.80 23.80 24.00 23.70 23.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.55 0.72 -3.82 18 914 2012-10-11 20:00:59 2003-04-07 12:59:11 15 16 301 41 568 813 4 92.30 32 18.60 CHANGED pssscsphRhppYKspu.hcs-EhRRRRcEstVElRKsKREEpLhK+Rplsh.tppt.p...psphsss.t...ssplppsh.h..pulhScD.ppQ.Lpusst ............................t....hsphRhppaKspu.hcs.........-EhRRR.RpE.sV...ELRK.sKR-E.pLhKRRNl..s......h.....p..p.......ss.s.p............tsp.pssh.t............pph...s....p.........h..............psl..S.ss.t.Qltusp.t....................................................... 0 157 244 397 +3034 PF03617 IBV_3A IBV 3A protein Finn RD anon Pfam-B_3183 (release 7.0) Family The gene product of gene 3 from Avian infectious bronchitis virus. Currently, the function of this protein remains unknown. 25.00 25.00 89.10 89.00 18.60 17.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.99 0.72 -4.22 6 81 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 15 0 0 79 0 56.80 82 99.74 CHANGED MIQoPTSFLIlLILLWhKLVLSCF+ECllALQQLIQVLLQIlNsNLQSRLlLWHSLD MIQoPTSFLIVLILLWCKLVLSCF+ECVlALpQLIQVLLQIINSNLQSRLLLWHSLD.... 0 0 0 0 +3035 PF03622 IBV_3B IBV 3B protein Finn RD anon Pfam-B_3190 (release 7.0) Family Product of ORF 3B from Avian infectious bronchitis virus (IBV). Currently, the function of this protein remains unknown [1]. 25.00 25.00 30.60 29.90 21.20 18.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.21 0.72 -3.99 7 88 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 17 0 0 82 0 63.20 73 99.30 CHANGED MLsFEtlIETGEpVlQQISFsLQHISSVLsTplFDPFEsCYYRGGsaWElESA--hSGDDEahE ..............MLDFEsIIETG-QlIQQISFsLQHISSVLsTElFDPFEsCYYRGGsaWElES...AD-hSGDDEalE....... 0 0 0 0 +3036 PF03620 IBV_3C IBV 3C protein Finn RD anon Pfam-B_3232 (release 7.0) Family Product of ORF 3C from Avian infectious bronchitis virus (IBV). Currently, the function of this protein remains unknown. 25.00 25.00 25.50 58.60 20.40 20.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.05 0.72 -4.09 3 126 2009-09-10 22:14:47 2003-04-07 12:59:11 8 1 17 0 0 128 0 92.10 87 87.33 CHANGED MhNlLuKSLEENGSFLTuVYVFlGFVALYLLGRALQAFVQAADACpLFWYTWVlLPGAKGTAFVYNaTYGK+LNNPELEoVIVNEFPRNGWNN ...hhNlLNKSLEENGSFLTAlYlFVGFlALYLLGRALQAFVQAADACCLFW.YTWVVVPGAKGTAFVYKHTYG+.K.LNNPELEuVIVNEFPKNGWNN... 0 0 0 0 +3037 PF04629 ICA69 Islet cell autoantigen ICA69, C-terminal domain Mifsud W anon Pfam-B_5314 (release 7.5) Domain This family includes a 69 kD protein which has been identified as an islet cell autoantigen in type I diabetes mellitus [1]. Its precise function is unknown. 31.40 31.40 31.50 31.50 31.00 31.30 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.73 0.70 -4.40 6 145 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 51 0 70 148 0 173.60 37 43.48 CHANGED PYEFTTLKSLQDPscKLspc...cK-cpppsspsusp.scpLISLE-t+..cEouo..s-cthu.lSuh-ch.hcsos.GAhD-LLDhKs-E.hhht..lPtDPLEPEsuDKDEhpLLN-ILSuSSL-suEhopEWAAlFGDP..pLtsPcP.ShGss-.-.+ssoSSGFLPSQLLDQsM+DLtuSlpsWsupptop.....sps.Psss.......QpPAK.......uupDLoAWasLFADLDPLSNPDAVGKTDKEHELLNA .............................................................................................................................................................................................p.....t...p..t.......p....................t.hl.............t...........................................................................h......hss..c..-sh-K.-h.hLsplhss.uu...tpu-aopEhtssFG..ss........tp.t.s...shu.t..................tp.sotFLPSQLh......D..s..hp..htuu.h............................................................................p.pPtp..................u.spDhoAWFsLFADLDPLSNPDAl.G+o.Dc..ELLNA...................... 0 10 16 34 +3038 PF03921 ICAM_N ICAM_N-terminal; Intercellular adhesion molecule (ICAM), N-terminal domain Finn RD anon DOMO:DM01682; Family ICAMs normally functions to promote intercellular adhesion and signalling. However, The N-terminal domain of the receptor binds to the rhinovirus 'canyon' surrounding the icosahedral 5-fold axes, during the viral attachment process [1]. This family is a family that is part of the Ig superfamily and is therefore related to the family ig (Pfam:PF00047). 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.24 0.72 -4.51 5 218 2012-10-03 02:52:13 2003-04-07 12:59:11 9 28 36 23 89 253 0 89.10 36 19.09 CHANGED pcsFpVpVcPcKslLssGGSlpVNCSToCspP-hlGLETsLsKchL-p.Gpu.WKtFpLSNVocDSshhCahsCuGsQpSuSoNITVYpsPEp .........h.lplpPp.p.shV.h.GuSlhlNCS.T.s.Csp.P....p....h...h....G.LETp.LpK...p....ltp...Gsp...WttapL.s..slp..c..-.o.p.h.h.Cahs....C.u.s.p..Qpps.sssITVYphP-p......... 0 5 9 17 +3039 PF00818 Ice_nucleation Ice nucleation protein repeat Bateman A anon Pfam-B_2 (release 3.0) Repeat \N 20.80 20.80 31.80 20.80 15.40 20.70 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.76 0.72 -5.71 0.72 -4.50 110 1266 2009-01-15 18:05:59 2003-04-07 12:59:11 12 33 34 0 97 1330 0 15.80 74 44.84 CHANGED GYGSTQTAGhcSsLTA GYGSTQTA.utsSsLTA.. 0 35 35 35 +3041 PF00656 Peptidase_C14 ICE_p20; Caspase domain Bateman A anon Bateman A & Pfam-B_2524 (Release 8.0) Domain \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.80 0.70 -4.68 115 4033 2012-10-03 02:24:44 2003-04-07 12:59:11 17 280 992 455 1901 4284 706 230.40 15 47.36 CHANGED thALlIssspaspt..........sphpGstsDscslpphL.ppl.G.....apVps..........h.ps..................................................................................hotpphppt.lpphstptc.....ssD...shlhhhhuHG..tp........s..hsh.............Dsph..l...sl-t................lpshhpshps.sLtsKs+.lhllpACRusphspthttttstt............................................................................................................................................................................................................................tsshtphsstsDhlhhhuosssthShcs.................sspuShahpuLsphl......t.......hpltslhspVppplt................t.......................ttcQhPph.suhhpcphhh ..................................................................................................................................................................................hslll.s..p.ta..tt.....................tth......p...u..s.....t.D.s.p....tht..p...h..h...p..p..h....s...........a..psps..........h..ps.........................................................................................................ho.t.p.p.l.......hp......t...l..p..p...hhp.psp..............ssD...............shhhh..............h..........h..uH.G........tp................t.........t.h..................-sth......l..........sh-t...........................................lts......h.ps......h........s...t..........l....s..p...s....+...l............hh.l..pu....C...+.....u..s.s..h.....ph..h...h...t........................................................................................................................................................................................................................................................................................t....ht.t.h.....s.....s....s.h...l..h..h.h...u...s......h..s..s.....t...h.u.hcs.............................ttts...uh.ahptlhphl.....................................h..phh....ht.tth..............................................................h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 653 1049 1463 +3042 PF00463 ICL Isocitrate lyase family Finn RD anon Prosite Domain \N 19.50 19.50 19.50 19.50 19.40 19.30 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.93 0.70 -6.29 7 4174 2012-10-10 15:06:27 2003-04-07 12:59:11 16 13 1846 69 1148 4424 4281 225.10 28 93.61 CHANGED EVtElcKWWossRW+tTKRsYoAEDIss+RGsl+.lpYsSs.tAcKLa+lLcc+ptptssShTaGsLDPstVoQMAKYLDolYVSGWQsSSTASoSsEPuPDLADYPhsTVPNKVEHLahAQLFHDRKQpEERhphscpc..Rtph.s.lDYLRPIlADADTGHGGlTAlhKLTKhFIERGAAGIHIEDQusuTKKCGHMAGKVLVPlQEHINRLVAIRhpADIhGo-LLslARTDuEAATLITSoIDhRDH.FIlGsTNPsh..tsLsslMstA.htGtpGspLtslEs-WhtKAsLKhFpEAVlDtIptus...hsNKpthltca.ppsp.hpphst.EA+tlAcclhGp-IaFsW-hsRsREGaYRapGGspCulsRuhAaAPYADLIWMESphPDatQA+EFA-GV+utaPcQhLAYNLSPSFNWppAMssD-.QcTaIpRLucLGYsWQFITLAGLHTsALhscsFA+sYup.pGM+AYuppVQpP...EhcsGV-VVpHQKWSGAsYlDtlL+hlpGGVoSTAAMGtGVTEDQF ..........................................................................................t........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 314 663 947 +3043 PF03517 Voldacs ICln_channel; Regulator of volume decrease after cellular swelling Griffiths-Jones SR, Coggill P anon PRINTS Family ICln is a ubiquitously expressed multi-functional protein that plays a critical role in regulating volume decrease in cells after cellular swelling. In plants, ICln induces Cl- currents [1,4,5], thus regulating Cl- homoeostasis in eukaryotes [2,3]. Structurally, the fold resembles a pleckstrin homology fold, on of whose roles is to recruit and tether their host protein to the cell membrane; and although the surface charges of the ICln fold are not equivalent to those of the PH domain, ICln can be phosphorylated in vitro and the PH-nature of the domain may be the part involving it in the transposition from cytosol to cell membrane during cytotonic swelling [1]. 22.10 22.10 22.20 23.30 21.90 20.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -11.14 0.71 -4.32 115 359 2012-10-04 00:02:25 2003-04-07 12:59:11 8 8 280 1 243 349 2 133.60 25 52.92 CHANGED GsLalssp.plhWh.s.............t...............................spGhulsYsuIoLHAlp+.........................shsp.slYhQhp.....................................................................ttssspsssppsppsssElpllPsss................................st......lcslapAhspCus.LpPDss.......t-pptp...t..tptptp.shhhps ....................................ssLalspp....plhWhs.........................................................sshGhul.YssIoLHAls+..t......................shsp.slYh.ls...............................................................................................................t........ttptpppssc-ptpthsEhpllPsst....................................sp...........lcshapAhspCps.LHPDPp...............sp-s.-cc....t...............ttth....................................................................................... 0 73 122 187 +3044 PF04140 ICMT Isoprenylcysteine carboxyl methyltransferase (ICMT) family Wood V, Finn RD, Bateman A anon Pfam-B_15304 (release 7.3) & Pfam-B_5114 (Release 8.0) Family The isoprenylcysteine o-methyltransferase (EC:2.1.1.100) family carry out carboxyl methylation of cleaved eukaryotic proteins that terminate in a CaaX motif. In Saccharomyces cerevisiae this methylation is carried out by Ste14p, an integral endoplasmic reticulum membrane protein. Ste14p is the founding member of the isoprenylcysteine carboxyl methyltransferase (ICMT) family, whose members share significant sequence homology [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.26 0.72 -3.79 12 1448 2012-10-01 22:51:20 2003-04-07 12:59:11 9 8 1272 1 481 2287 1176 94.30 31 44.00 CHANGED hlsh..hlhuphhRhhshhohGphasp+lhh..hssHplVpsGlYpalRHPsYhs.hhhplus.sLlsss..huhlhhshsshhha..RIcpEEpsLhp .....................................hsh..hl..hlhu....h.hhh.h..sh..p.p.L..G.p..h..W..o...h.+........l..hl..........hss..H...p.lV..s...pGlY+..hh+HPsYh..h...............l.....l.........p....lh...s....h.h.....L...L...s..p...u.....h.......h........s....s.....l...l....h.....h.s.....h.....h....s..h...h.La..hR..I..c.p.EEphL..t....................................................... 1 166 278 396 +3045 PF03971 IDH Monomeric isocitrate dehydrogenase Bateman A anon COG2838 Family NADP(+)-dependent isocitrate dehydrogenase (ICD) is an important enzyme of the intermediary metabolism, as it controls the carbon flux within the citric acid cycle and supplies the cell with 2-oxoglutarate EC:1.1.1.42 and NADPH for biosynthetic purposes [2]. 25.00 25.00 27.90 27.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -13.26 0.70 -6.17 5 1265 2012-10-02 21:08:39 2003-04-07 12:59:11 9 3 1189 11 297 1040 630 682.90 61 98.77 CHANGED TIIYTlTDEAPALATYSLLPIV+AFAcsAGIcVETSDISLAGRILAsFPDaLTE-QRVsDsLAELGELspoPDANIIKLPNISASVPQLKAAIKELQ-KGYAVPDYP-DPpTDEEKAVKtRYDRIKGSAVNPVLREGNSDRRAPtAVKNYARKaPHSMGAWSpsSKSHVAaMcuGDFFuSE+ShTlucAssV+IEFluK-GsVcVLKcpVALp-GEIIDosVMSKKALtsFaEcQlpDAKupGVLFSLHlKATMMKVSDPIIFGHAV+VFYK-VF-KaG-LF-pLGVNVNNGlGDLYAKIcSLPAuQRsEIcADIpAVYA+pP-LAMVDSDKGITNLHVPSDVIVDASMPAMIRsSGQMWGsDGKtKDTKAVIPDSoYAGVYQslIDFCKpHGAFDPTTMGSVPNVGLMAQKAEEYGSHDKTFElsADGVVRVl.DAsGcVLhpHsVEAGDIWRMCQTKDAPI+DWVKLAVTRARlSsTPAVFWLDPsRAHDsELIKKVcpYLKDHDTsGLDI+IMSPlEAh+aSLsRL++GpDTISVTGNVLRDYLTDLFPILELGTSAKMLSIVPLMAGGGMFETGAGGSAPKHVQQllEENHLRWDSLGEFLALAASLEHLGlKTGNsKAKVLAKALDsATGKLLDNNKSPSRKlGELDNRGSHFYLAhYWAQALAAQs-DA-LpA+FAPLAKALTEcEAsIVAELNuVQGKPuDIGGYYAP-s-+sAsVMRPSsTFNAAL-u ......pIlYThTDEAPuLATYShLPIlcuFspsAGIsVcTpDISLAGRILAsFP-hLsE.....-Q+lsDsLAELGELsppP-ANIIKLPNISASlPQL+AAIKELQspGYslP-YP-.-PpoD....EE+pI+ARYsKlhGSAVNPVLREGNSDRRAPtuVKsYA+KpPHp.M.G....t.W.ot.s.SKoHVApMp.pGDFausEKShsl......s..p..usslcIEh..l....sp....c....G.s...p.pVLKpslsLpsGEllDuo.hMStcALppFhtcpltcAKpp.sVLFS.LHlKATMMKVSDPIlFGHAV+lFa+-lFsKauchhcplGlssNNGlu-lau+lp.s.L.Pssp.psEIpusl....pusa.s..p..pPsLAMVsSDKGITNLHVPSDVIVDASMPAMIRsuG+MWss-G+.pDTpAVIPDpoYAslYQssI-.C+ppG.AFDPsTMGoVPNVGLMAQKAEEYGSHDKTFpl.stsGslcll......s......t......s....GpVL..hpppVEtGDIWRhCQsKDAPIpDWVKLAVsRARhossP.......AlFWLDtpRAHDsplIpKVppYLt-....H.....D....TpGL...-IpIhuPh-AhpaoLcRl+cGpDTISVTGNVLRDY.LTDLFPILELGTSAKMLSIVPLMsGGGhFETGAGGSAPKHVpQhlcENHLRWDSLGEFLALusSLE....alu..pp..s......s..NsKAplLAcsLDpAsuchL-ssKSPuRKlGE.lDNRGSHFYLAhYWAptLAtQscDs-LtspFuslAptLspsEppIlsELs.ssQGpssDlG..GYYtsDst+sstsMRPSsThNshlp.s............ 0 77 185 259 +3046 PF01231 IDO Indoleamine 2,3-dioxygenase Finn RD, Bateman A anon Prosite Family \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.40 0.70 -6.05 39 516 2012-10-01 19:57:26 2003-04-07 12:59:11 13 11 214 4 368 501 538 358.10 28 79.96 CHANGED s.h.sLp-YtlS.cpGFLPtp.PLccLP.-.YYpsWEpls...psLPsL.............lts.+plRphV-.plPlLsssp.L....psct.EhRRAYhlLuFlsHuYlWus..spPpc.....................................hLP.sluhPhlclSccLtLPPlhTYAulsLWNa+......htss......shhsh-NL..sslpoFTGohDE.pWFaLVSVhlEtcuu.slphhlsulpAsp.ts-p..ttlspsLpplupslcclsplLpRMaEps-PplFYaplRPaLuG.KsMsshGLP.pGlhY.................-....uspsp.+p...YuGGSNAQSSLIQhhDllLGlcHpssu...............................................................pssFlpEMRpYMPssHRcFLpclpp...................................ssslRpYVhsppsst...tLptAYssClttLpsFRDpHIQlVoRYIlhsu+cst.tt.s.....................................ppcsssht.+GTGGTsLlPFLKQsRD-Ttcssh ..................................................................................httatls.ppGFls.................P..l..p..pLP.......apsWpplh....ppl..sth...............................l.s...tplRt...h.l.c....ph.....P.hL..s..s..pt...L...........pspt...thptA.ahh.....L.......ualspuY..l....at...t........ppstp..............................................................hLPtsluhPhhplup.............p.............Lt.l.P.P.hh...s.Y.A.s.hs.....L.hNap......h...hsss.....................t.h.phcNl.........phl.h..sFps....s.....sE...phFhLlslhhEttuushlphhhp..slp.......u.......hp....psst...............ttlhpsLppltpslpc.ls.th...hppM.........hc...t.......s.cPp..haYtplR.s.altG.......p..s....t...h.....P...pGlhY...............................-.......s.s.p...t..p.hp.....apGsSsAQSSllp..hhDhhLslp.c.ss.....................................................................................ttalp-hRpYMPssH+pFLptl.pp..........................................ss...lRp..hs....t..........ptsp..................tlt.sYstslttltthRstHhphspcYIlh.upp..........................................................................................sout.s....hl...h........h........................................................................................................ 1 115 201 288 +3047 PF02479 Herpes_IE68 IE68; Herpesvirus immediate early protein Mian N, Bateman A anon Pfam-B_2276 (release 5.4) Family This regulatory protein is expressed from an immediate early gene in the cell cycle of herpesvirus. The protein is known by various names including IE-68, US1, ICP22 and IR4. 18.70 18.70 18.70 20.30 17.90 18.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.59 0.71 -4.95 13 78 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 36 0 0 68 0 125.40 39 38.87 CHANGED +sEYGssssstphRs....s+usGuFCussWpPDlsRLspDlNplFRsIspuuhsssssscsLRRsLhDhYlhGhsptR.s.ssWEpLLQLoPsQ.otsLRsTLR-lspRsststc.lpsssplstphaGsECDVS .............................Pp.usssssthlR.....sRtstthsussWpPDlshlppslNpLFR.lhpsuhcspssuppLRRhlhDhYLMGYs+tRLs.psWppLLQlostp.uhpLRsTlR-lpuR.ststc.l..pt.ssl.sphaGsECDls....... 0 0 0 0 +3048 PF01008 IF-2B Initiation factor 2 subunit family Bateman A anon Pfam-B_1302 (release 3.0) Family This family includes initiation factor 2B alpha, beta and delta subunits from eukaryotes, initiation factor 2B subunits 1 and 2 from archaebacteria and some proteins of unknown function from prokaryotes. Initiation factor 2 binds to Met-tRNA, GTP and the small ribosomal subunit. Members of this family have also been characterised as 5-methylthioribose- 1-phosphate isomerases, an enzyme of the methionine salvage pathway. The crystal structure of Ypr118w, a non-essential, low-copy number gene product from Saccharomyces cerevisiae, reveals a dimeric protein with two domains and a putative active site cleft [2]. 22.40 22.40 22.80 22.70 21.80 22.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.63 0.70 -5.42 18 3047 2012-10-04 00:26:15 2003-04-07 12:59:11 12 22 1567 43 1662 2957 1606 282.00 28 77.40 CHANGED pl+GusslulsuhtsLhhhhpctp...hspsp-h.........hpplppthshLtsoRPouVsltsulchlhp...........tpshppscps.......................................llcpupphlp-.shpssptIuphutchI.........cDu-sILTas.sSus........sluVlpp.Ahppu.................+ch+VlssEoRP.thQG.+lsshpLhptGIsVTLlsDoulualMpc....lDpVlVGA-sIhpNGs.lhNKIGThplAllAKppslPFaVsA.phKFs.chthspplhhEtcsPpElhhhsssphh...................................................................slplhNPsFDlTPs-hIohlITEhGslsP ........................................................................................................................................lcGusshulsush.u....lh....h........h.....h....pp...h.t.............p.....t.t..h..................................hptlp...pth.p........hL...t..........s.........s......R......P.....o...u...l..s.....l.h......u.l.cp.hhp............................................t..t.....t...s....h.p..p..h...t...p.t.................................................................................................................................................................lh.ppup.p.h....h....p-....shts.scpI...u.p.h.u.h..p.h.l..................................p.s...s......t..s..lLTas..sous...................sh.u.s.lht..A...t..p.p.s....................................p...t..h...c.V.h.......s......s............E.....o......R.P....h................h.........Q.......G.....+...........h........o.A.................h.c..L.............t..........p..........t................G.................I..s..............s.TlI..s.D.s.s.s.uh..l.Mpp.................l....stV........lVG.ADpl.s.s.NGs.lsN......K......lGTht..lA.l..hA.........+...t........a.......s..............lPFhVsu..ss.phsh.p..h......s...s....p........p....l............l.......E.....+.s...s.p.E.l..h....p...h....t....s....t....p..h...s.s.t...................................................................................................................................................................s.hp.s.h..N.P....s.FDlTPschlo.u.llTEhGlh.............................................................. 0 560 982 1379 +3049 PF00707 IF3_C IF3; Translation initiation factor IF-3, C-terminal domain Bateman A, Finn RD anon Pfam-B_629 (release 2.1) Domain \N 20.90 20.90 21.10 21.90 20.80 20.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.86 0.72 -4.24 150 4680 2009-01-15 18:05:59 2003-04-07 12:59:11 17 5 4531 3 1156 2821 2168 87.80 44 49.92 CHANGED hlplKEl+hpPpI-cHDhpsKl+psp+FLcc.GcKVKlolpFR...GREhs+t-.lGtclLp+htpcl..p-luplEptP.+hEGR...pMhMhLu.P+p .......h.lplKEl+h..pPsIDcpDapsKl+ssh+FL.-c.......GD...K.....VKlol...R...F.R....GR.....E.h..s....Hp-.......lGhclLp.Rltc-l....p-.lA...h...V.EptP..Kh..E.G.R...pMhM.lLA..P+................................................ 2 397 751 978 +3050 PF01652 IF4E Eukaryotic initiation factor 4E Bateman A anon Pfam-B_1315 (release 4.1) Domain \N 21.00 21.00 21.50 21.20 20.70 20.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.19 0.71 -4.71 135 1579 2009-01-15 18:05:59 2003-04-07 12:59:11 13 11 412 47 834 1511 155 157.30 32 65.03 CHANGED PLpppWohWap........stssp.......................tpsapssL+.l..............ssFsoVEcFWulYspl..P.ocLshts-a...............alFKcGI.+P.hWEDstNppGG+Wh..hphp..........................+p..........................ph-chWhchlLuhIGEph..pts...................--lsGlV....ls.lR..t.ptpp...............lulW.s+ss.....................ssppshhpIscpl+..chL..plssp..........h....papsH ..........................................................PLpppWshWa..............tt..s.p....................................................spsap..ss.l.+...l...............................tsFs.o.VEpF...W....s.lYs.pl.....hs.....o..pL...sh....t...s..Da........................a.lFKc..G..I.c...P..hWE..DstNtp.GG+Wh..lphp............................................Kt.................................................thDphWhch....lLuhI.GEpF..pps...............................--l.CGsV.......l..s..l.R.....t....ptc...p........................lulW.s+ss.....................ps.pt....s.h.....h.pItcph+.chL..ph..p......h................................................................... 0 326 467 676 +3051 PF00932 LTD IF_C_term; IF_tail; Lamin Tail Domain Finn RD, Bateman A, Anantharaman V anon Anantharaman V Domain The lamin-tail domain (LTD), which has an immunoglobulin (Ig) fold, is found in Nuclear Lamins, Chlo1887 from Chloroflexus, and several bacterial proteins where it occurs with membrane associated hydrolases of the metallo-beta-lactamase,synaptojanin, and calcineurin-like phosphoesterase superfamilies [1]. 25.70 25.70 25.70 25.80 25.60 25.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.43 0.71 -4.01 51 1619 2009-09-10 16:30:14 2003-04-07 12:59:11 14 164 831 14 675 1585 960 119.30 18 18.47 CHANGED sssssssssssslhls-hstsusss..........calElhNsus.pslDLsGaplpcp.......ssphthhssshh........sGpslhlhtsss..........tt...thhhttt...ssasst...............s-slsLhsss..Gphlsthshsstss .......................................hs........stsplhIsE..h.......s.u.....t....................calclh..Nsu....s...psl....sL..u..G....ap..lppp....................sssh..p.hs.sshhhh..................suph.l.slhssss...........................tts.sphhhptp.......hshsss...................................ssth.h.Lhssp......G.p..l.s.ht......t.............................................................................................................. 0 248 401 556 +3052 PF00714 IFN-gamma Interferon gamma Bateman A anon Pfam-B_615 (release 2.1) Domain \N 20.30 20.30 20.60 26.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.75 0.71 -4.34 9 229 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 125 13 27 233 0 123.10 50 85.86 CHANGED lLusSGsYCQusah+ElEsLKpYFNASssDVu-GGsLFLDIL+NWKEESD+KIlQSQIVSFYFKLFEsLKDN.QsIQ+SM-sIKpDhhsKFFNuSpsKhDDFp+LhphsVsDLpVQRKAlsELI+VMs-LSPcSsLRKR ...........................h.c-h-pLKtaF.Nuups...DVucu.GsLFl-ILKNW+...E.-uD.+KIIQSQIVSFYhKlF-.N.h.K..Ds...QhIQ+ShcpIK...EDh...h...s..+....F...h.N.uu.ppKhcDF.p+LhplsVsDLplQRKAlsELhpVhp.cLS..P.tuphtK........................... 2 1 2 7 +3053 PF00047 ig Immunoglobulin domain Bateman A, Sonnhammer ELL anon Bateman A Domain Members of the immunoglobulin superfamily are found in hundreds of proteins of different functions. Examples include antibodies, the giant muscle kinase titin and receptor tyrosine kinases. Immunoglobulin-like domains may be involved in protein-protein and protein-ligand interactions. The Pfam alignments do not include the first and last strand of the immunoglobulin-like domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.98 0.72 -3.85 52 1594 2012-10-03 02:52:13 2003-04-07 12:59:11 20 231 155 48 526 42115 2 63.00 21 11.35 CHANGED GsssslsCps.....tss.tsphpWhppspthtpttpsth..tt.............................lplsssp.p.cuGsYtChs ..........................................................pslpLsCps...................t.s.....s....t....h.p......W....p......p.......p......s.....p.......s.....h....t......t......t......t....h....t...t....t....p...h....t..t.................................................................................Lh.l.s.s..s......s..............p..cuG.s.YpCh......................................................................................... 1 144 166 265 +3054 PF02395 Peptidase_S6 IGA1; Immunoglobulin A1 protease Bashton M, Bateman A anon Pfam-B_540 (release 5.2) Family This family consists of immunoglobulin A1 protease proteins. The immunoglobulin A1 protease cleaves immunoglobulin IgA and is found in pathogenic bacteria such as Neisseria gonorrhoeae [3]. Not all of the members of this family are IgA proteases Swiss:O32555 from E. coli O157:H7 cleaves human coagulation factor V [2] and Swiss:O88093 is a hemoglobin protease from E. coli EB1 [1]. 19.70 19.70 19.70 19.70 19.60 19.50 hmmbuild -o /dev/null HMM SEED 769 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.74 0.70 -13.53 0.70 -6.41 9 1016 2012-10-02 13:45:52 2003-04-07 12:59:11 11 15 370 10 37 880 2 454.70 25 57.43 CHANGED huhssh..uhAupsp.s.lsYQhaRDFAENKGhFpsGATNIplhsKpGphlG..hhstsPMhDFSsVsss.Gl.....ATLlssQYlVoVpHNu.GhsslsFGs.Gp.................spYphVc+NNhts..................DaphsRLsKaVTEstPsssosshssussYpsppRYsshhRlGuGpQaltp+s.............Gptsphsu.uYpahsuGostt..............uhtNuphhhus.ssstshp.hs........sLsshushGDSGSPLFsYDspcpKWVllGslpshssa.tts.t..slhpppFhsph.pcDpsu.lpt.s...sslshp....ssssGsGslT......pGSp......shchtsppss......................hNtGKsLhFpGt...GslsLpsslsQGAGGLhFcsshphpssss...shTWpGAGIslscGsoVsWpVpsspsDpLpKIGcGTLpVpGsG.NpGuLKsGDGpVILsQQADstsphQAFSpVsIsSGRuTVhLNsspQlsPs.slhaGFR..GG+LDLNGpsLTFc+IpssDpGAhIsNp.os+pSslTlss....ss.IstP............s.s...........h...t.h...hp....pp.phhhLctus.s......s...pss.h.h.G......pppshs+hp.t+htsas..haFstp.ssshsssl.hhhpGps..hhhh.sussNhcGslopppGsLhhoG+Ps.HA....sht.t...........pspslsppDW.NRsF+hcslplcsushhlS..psuslpusIpAo.NuslslG..ssp..hspscuc..TG.hshshctslosscslss.sh..hsGslsLsppus....pthstGltuhsus.plo..spuhashsss ...................................................................................................................................................................................................................................................................................................................................................................................................................t.pa....hp.....t.p...h...t.......................................................s..........................................tt....................t...........h......GDSGSshahast..tpW.h.u......................................................................................................................................................................................................tpsh.h.......t........t..l....h.pth...p.G..u..l.Ft...t...t..h....t...t..h.GuGl.httt..l.Wth........................t....................tD.LtKhG.GsL.lp.h.......t..p.stlphGpG.VlLttp........sFspl.hs.uGpshl.lst....tpth....................t.t........hh.t..GGhL-hsG.s..Fp..l.....s.ts.l.st..............h..............................................................................................................................................................................................................................................................................................p.-W....F.ht..h.h...t..............................................................................................................................................................................................s................................................................. 0 13 18 27 +3055 PF00219 IGFBP Insulin-like growth factor binding protein Finn RD anon Prosite Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.00 0.72 -3.42 54 1051 2009-09-11 00:39:06 2003-04-07 12:59:11 13 24 116 8 458 832 0 54.50 44 18.34 CHANGED Cs.....sC..p....C.P..............s.sPsCs.sVp...sGCGCChs..CA+p.......hG-sC......shpshC..spGLpC ......................Cs...sC..p.......C..P...................ss..s.sCs...sVp......-GCGC.Chs..CApp........G-s...C..........s...s.h....s...s....h.C..upGLpC.......... 0 48 94 234 +3056 PF01378 IgG_binding_B B domain Bateman A anon SCOP Domain This domain is found as a tandem repeat in Streptococcal cell surface proteins, such as the IgG binding protein G. 21.20 21.20 21.20 21.20 20.80 20.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.53 0.72 -4.16 4 50 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 18 74 4 119 0 53.60 73 27.86 CHANGED TYKLllpGpThpGETTTKAVDAATAEp.sF+QYANsNuVsG-WsYDsATKTFTVTE .......TYKLVlpGpT..hpGETTTKAVDAATAEK.sFKQYAN-NGV.DG-WoYDDATKTFTVTE... 0 0 0 2 +3057 PF00475 IGPD Imidazoleglycerol-phosphate dehydratase Finn RD anon Prosite Family \N 20.30 20.30 20.60 24.10 20.20 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.79 0.71 -4.34 146 3695 2012-10-03 01:04:38 2003-04-07 12:59:11 13 12 3613 24 1011 2611 2275 144.30 51 57.73 CHANGED TGlsFaDHMLsthu+HuthDLplpupGD...lclDs..HHTVEDsGI..sLGpAlpcALGDKpGIpRa.................GsuhlPMDEuLs............psulDlSGRPalsac..sph.p.tp.lGs....hsoEhlccFFcuhu.sutlTLH.lphhhGc......NsHHhhEAhFKAhu+AL+ .........................................TGVGFaDHMLctlApHGthcLplpscGD..........Lc...ID-.....HHTVE.DsGIsL...Gp.AlppA.LG....D...K.....+...GIpRa.................Gs.hh...lPMDEsLu..........................psslDlS.....GRPaLsac............spas.....p....p+...l.Gs..............hsTEhlccFFculuhsutl.TLH.l.c...sh..Gc......NsHHhlEu..lFKAhuRALR................. 0 330 666 867 +3058 PF00218 IGPS Indole-3-glycerol phosphate synthase Finn RD anon Prosite Domain \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.44 0.70 -5.33 20 5164 2012-10-03 05:58:16 2003-04-07 12:59:11 16 18 3907 23 1085 4373 2572 228.50 40 76.40 CHANGED Lp+Ihsp++tcVsst+pp.s...uclpt.h..t.sssspuFh-sLpp....tpsulIAElK+ASPSKGhIc.-hssschAcsYppuGAusISVLT-scaFpGuh-hLtplRpslslPsLp........KDFllDpYQIhEARhhGADslLLIlusLs-pp..hpcLhphApuLGM-sLVEVpstpElcRAL.tlGA+llGVNNRNL+oFcVDLssTpcLsshls.......t-.slLluESGItotpDlcphpcpGscuhLVGEuLM+ssDhcttl+cL ..........................................................................................................................LtpIhttpt..ltth...p.t...s.....t.h.t.h................................h...s......h..h..tsLpt..................tt...h...shIsEhK+AS...PS..c.....G........l....p...............p.....h....s....s......thAt.....Ypp.h....t........AsslSVLTDpcaFp...............GSh-........Lptl.p.....p.....t.....s.....s......h.....Pl.Lp......................KDF.IIDsaQIh.ARhhGADslLLhl..u..sL...........s...D.........c...p...........hp...cLhshA.c.s.LsM..s.VLlE.VpsppE...l-...R......Al...pL.......s.........s...c..........l....IG...I.N.....N...R....s....L.........+..T..h..p...s.....D........l..s.....p......T....h.....cL...t.....s.....hlP..........................cs...t...lllSESGIhotsplctl.t.th....ssuhLlGpulMpt.ts..ttthtt............................................................................ 1 350 707 929 +3059 PF05049 IIGP Interferon-inducible GTPase (IIGP) Moxon SJ anon Pfam-B_5519 (release 7.7) Family Interferon-inducible GTPase (IIGP) is thought to play a role in in intracellular defence. IIGP is predominantly associated with the Golgi apparatus and also localises to the endoplasmic reticulum and exerts a distinct role in IFN-induced intracellular membrane trafficking or processing [1]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.15 0.70 -5.65 6 355 2012-10-05 12:31:08 2003-04-07 12:59:11 8 11 60 8 235 584 54 294.00 31 78.44 CHANGED SsEllptIcpAlcEGpLpcllShIc-slpslspshlcIAVTG-SGsGhSSFINALRsIGHEE-sSAPTGVVcTThc+TsYpp.s+aPpVhlWDLPGlGuTs.os-oYLEEMpFupYDhFIIIuSppFSpNclcLApsIppMtK+FYhVhTKlDpDLosp......oFsc.clLQsIpcNhpssLQcstVpEPPlFLlSshcsspaDFPcLc-TLhKDLPshKpHshhhoL.sls-psIspKtpsLKp+IhL.EuL+uuhls........tssDhsNhccshKsYRphFGlDDtSLpplApchsh.ltchcsph+u.phashpc--pllcphhphhlptFhtlht......hh.sssshh+chhapphhFL-hVApDAKslLcKI ...................................................ht.thtp.tp....s.h.shlp..p.h..l......t.t..h..p...p......s.......l.pluVsG-oGsGKSShINA.l....R.G....l....s.p.-...-...c..s..u.....A..T..G......V......l.......E....T....T.........h...........c...........s.....s..Y..............a....P..............p.........h...P......s...V..tlW.DL..P.......G.........h................G...........o.........s.....s..........h..............s........c..........p........Y.......L.p..p..h..p..F........tp......Y.....D......h.........F...I..l...l..u...u...p.R..F.p....t.s..-...s.pLA.....ctI......p.....p...h....t...K..p..F...YaV...RoK.............lD....p..D.l...ts..........p.t.....t......p...........p.s.a..s......c.....p....p..h...Lp..p....IR....pp...shp.......pLp......p...t....tl..pps.lFLlSs.h....p....tta.DF..L.psl.p-Lsth+pph.h.....h.slsshs.thlppKtt.hppplhh..uhhssh.t...............c.t.h.t.hp.Yp..FGlDc.Slt.hupphth...tp.hp..hc.............................................................................................................................................................................................................................................................. 1 49 83 136 +3060 PF00340 IL1 interleukin-1; Interleukin-1 / 18 Finn RD anon Prosite Domain This family includes interleukin-1 and interleukin-18. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.59 0.71 -4.65 16 589 2012-10-02 19:42:32 2003-04-07 12:59:11 14 3 132 42 189 614 0 110.80 26 54.99 CHANGED hpspshpppshFsMshhpsp.psstlPVs..LuIcsppL.aLSC.pc.spssLphEts-.P+hhsspcp-hhFhFp+ststspsp...FESutaPsaFluTpp-pp..h.Lsph.ttps.lTDFph ...........................t........t.hh.h.....h.....p.........t..t....t..s...p..t.....h.PVs..L.ul..p...sp...p..h..hLSC...t..p..p..t...s..p.....P.sLpLc..cs....c...h.....c.........h.....s..p...p.cp..s.....t.......h..FhFh.+pp..suspsp....FESutaPuWFluTsp..ps..p........l.lsp..t...t...............t................................ 0 9 13 38 +3061 PF00726 IL10 Interleukin 10 Bateman A anon Pfam-B_885 (release 2.1) Domain \N 24.20 24.20 24.50 24.50 23.80 24.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.00 0.71 -4.96 3 341 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 126 14 83 316 0 146.60 36 93.10 CHANGED ALLpCLVLLAGVtsSRstYstsESuCTHhPsShPHMLRELRAAFS+VKTFFQpKDQLDNLLLT-SLL-DFKGYLGCQALSEMIQFYLEEVMPQAENHGPEIKEHVNSLGEKLKTLRLRLRRCHRFLPCENKSKAVEQVKNsFNKLQEKGVYKAMSEFDIFINYIEAYMTI ................................................................................................................................................hs...hLp-LRssFpcl+s.h.hQh..cD.p.hp.s..l..LL..pps.l..l.pphKuhh..G..CpshschlpFYL-c.V.h.Ppu....pp....p....s............s....c..l+..c+lsS...lupphpo.L+h..c.LR..pC...+...p....ahs.C...c...s...+..s...c...u...lp...p...l.+...p...sa.pcL.p.p....p.G.l....hKAhuEhDlhlsalE...................................... 0 4 8 25 +3062 PF03039 IL12 Interleukin-12 alpha subunit Mifsud W anon Pfam-B_2071 (release 6.4) Domain Interleukin 12 (IL-12) is a disulphide-bonded heterodimer consisting of a 35kDa alpha subunit (e.g. Swiss:P29459) and a 40kDa beta subunit (e.g. Swiss:P29460). It is involved in the stimulation and maintenance of Th1 cellular immune responses, including the normal host defence against various intracellular pathogens, such as Leishmania, Toxoplasma, measles virus and HIV. IL-12 also has an important role in pathological Th1 responses, such as in inflammatory bowel disease and multiple sclerosis. Suppression of IL-12 activity in such diseases may have therapeutic benefit. On the other hand, administration of recombinant IL-12 may have therapeutic benefit in conditions associated with pathological Th2 responses [1,2]. 25.00 25.00 25.60 27.60 23.60 24.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.44 0.70 -5.23 3 97 2012-10-02 01:28:15 2003-04-07 12:59:11 9 3 54 2 30 88 0 172.50 47 88.05 CHANGED MCP.uRsLLLLATLVLLN...HLSLARsLPsSTsGPuh.t+CLNpSQNLLKTVDssLQsA+EpLEaYSCTAEEIDHEDITKD+TSTVKACLPLELApNESCLAoRETShIT+GSCLTSGKTSaMTTLCLSSIYEDLKMYQlEFQAINAKLLMDPKpQIaLDKuMLsAIDELMQALNaNuETVPQKPS..lsEuDhYRTKhKLCILLHAFRIRAVTINRVMSYLNSS .....................................................................................lhlshLsLL........Ls.shsLPss...s.....h.sLphSpsLLpsssphL.pKA.+QtLchYsCTsEElDHEDIT+sposTlcACLP.ELhhNEs....C.Lsopch.S...hp...pGpCLsot..+.oSahhsLCLpSI.YEDLKhYpsEFpshstt.LhhpscpQlhL..DpshLssIp-LMQu..LNh.s..u...c..sl.sp.psu........hp.c.c.Y+s+hKLCllL+AFplRsVTIsRhhuYLsu........... 0 1 2 12 +3063 PF02372 IL15 Interleukin 15 Bateman A anon Pfam-B_2545 (release 5.4) Family Interleukin-15 (IL-15) is a cytokine that possesses a variety of biological functions, including stimulation and maintenance of cellular immune responses [1]. 28.50 28.50 28.60 30.10 28.40 28.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.04 0.71 -4.37 14 168 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 53 22 54 152 0 126.90 31 84.55 CHANGED VhIhuClSAulPpTEAs...Wp........sVIpDLcpIEplhpSlHlDsoLYT.poDs+.pCphpshpCFLLElpVIL+Ep..csssIccplcNllhhusssLso..ptssTtstCK.CEEhEcKNhsEFLQsFs+llQhFlp ...........................................hh.Ch.shh.tTpAp.....hp..............plIpcL..cplcshl..psh.ch-.........hLYT..so.Ds........c....p.CphouhpCFhhEl.Vllc.Es...ssspl..pcplpsllh.hspsL..s..s........t..tp.hs...p.s..sCp.pCEphE.cKshpEFLpphhpllQh................... 0 2 4 16 +3064 PF02394 IL1_propep Interleukin-1 propeptide Bateman A anon Pfam-B_1500 (release 5.2) Family The Interleukin-1 cytokines are translated as precursor proteins. The N terminal approx. 115 amino acids form a propeptide that is cleaved off to release the active interleukin-1. 21.30 21.30 21.50 22.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.67 0.72 -3.65 23 206 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 87 0 40 195 0 96.10 34 38.57 CHANGED MApVP-Lhp-hhshYS-.pp-hhaEhDt.shhppsF.DhshssLt....DpslpLphScpphsp..oF+psVsllVAs....-cL+Kh.lshsQsFpDDDLculhsslFEEEsI .........................MAhVP-h.p-.hsh....hS.......-.pp-.h...t.Dt..p..p.psh.D...hshssh.................cpslpLplScpptsp..oF+.....psVslVVAh....cKL+..+......hshspsFpD-DLpu.l.hsp.lFEEE.l............... 1 2 2 8 +3065 PF00715 IL2 Interleukin 2 Bateman A anon Pfam-B_709 (release 2.1) Domain \N 25.00 25.00 25.60 25.60 24.60 20.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.02 0.71 -4.47 7 151 2012-10-02 01:28:15 2003-04-07 12:59:11 12 2 76 47 19 164 0 124.30 65 94.53 CHANGED LSCIALTLsLlsNSAPTSSS..............s+pTQppLEpLLLDLQhLLptlpN.cNsKLoRMLTFKFYhPKp.ATELKHLQCL.EELKPLE-VLsLsQSKNhp.p...hK-hhSNIpVTVLcLKGSETpFpCEYDDETsTlVEFLN+WITFCQSIhS .......LSCIALoLsLlsNuAP.T.SSS...................TppTppplcpLLLDLQhLLptlpN.cN.KLoRMLTFKFYhPKp.ATELKHLQCL.EELKsLEEVLsLApSKNhphpc.......h+-.hsNIplhVLcLKGSETpFpCEYDD-TsshVEFLN+WITFCQSIhS.. 0 1 1 2 +3066 PF02059 IL3 Interleukin-3 Mian N, Bateman A anon IPR002183 Family \N 25.00 25.00 25.80 30.50 19.40 18.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.58 0.71 -4.11 5 41 2012-10-02 01:28:15 2003-04-07 12:59:11 10 1 25 2 11 36 0 113.60 47 79.56 CHANGED Au.oposol+soRTs.CSslhcEIls+Ls...lssps.ssLNuDDpshLpcsoLRRsNL-sFhppAs-ohssD.osIcSsLKcLpsCLPsATssSspcPIaIKDtDhsDFR+KL+FYLlpL .........As.sQshsl+Tohsp.CSshlcEIlscLpp.slP..s.ssLNs--psILhcssLRRPNLEAF..pAscohp..Nu.SuIcS....LKpL.PCLPhAT.ssPspcPIpI+DsDasDFR+KLcFYLcsL. 0 1 1 1 +3067 PF00727 IL4 Interleukin 4 Bateman A anon Pfam-B_833 (release 2.1) Domain \N 20.50 20.50 20.70 20.50 19.50 20.40 hmmbuild --amino -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.70 0.71 -4.23 8 198 2012-10-02 01:28:15 2003-04-07 12:59:11 13 2 92 24 28 186 0 84.40 48 78.13 CHANGED pch-hsL+EII+TLN.lTs+.cssChELsVsDVLuAsKNTTEKEhhCRAopVLRphYp+H...............pposlhphLptLDRNLsuLAst.oCoVNEuKpoT..LKDFLERLKoIM+cKYS ......phcIsLpEIIKTLNhLTsc....Ks...sChEL..sV.sDhhsss.K.ssscpEhhC+AuthLpph...p+....................tt..h...h.lttL.psh.uhsth..sCssptsp.......hpphL.tL............................... 0 1 2 3 +3068 PF02025 IL5 Interleukin 5 Bateman A anon PSI-BLAST P05113 Domain \N 21.90 21.90 22.20 25.50 20.50 21.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.47 0.71 -4.05 6 60 2012-10-02 01:28:15 2003-04-07 12:59:11 10 1 46 8 18 50 0 105.40 63 84.89 CHANGED AlEuPMstLVtETLsLLSTHRTLLIGstsLhIPsPpHKNHQLCIEElFQGIDTLKNQTsQG-AVc+LFpNLSLIKcYIDhQK+KCGtERhRVKQFLDYLQEFLGVINTEWThE ..........................hE.shstLVtETLsLLSoHRTLLIustsLRIPsPsHKNHQLCIEElFQGI-TLKNQTspGssV-+LFQNLSLIKcYI..Dt...Q...K+.KCGtER+RV+QFLDYLQpFLGVhNTEWhhE...... 0 1 1 2 +3069 PF00489 IL6 IL-6; Interleukin-6/G-CSF/MGF family Finn RD anon Prosite Domain \N 21.40 21.40 22.20 21.60 21.20 20.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.86 0.71 -4.63 10 256 2012-10-02 01:28:15 2003-04-07 12:59:11 12 2 99 27 74 227 0 135.50 31 71.80 CHANGED I++IlucISAL+KE...hCssaphCcssc-sLAENsLsLPKhtppDGCaQoGFNQ-sCLs+IpoGLhEYQsaLphLpsphcusp.sslcsLQhsspsLlphlpQchcs.t..ssssPsspssLtshhpupspah+csshhLIL+sLccFLphSLRAlR ..................................................h.tt............Ctt.t.C.sp..hlhtppL.slPph.p.tDu.C....ppuh..sp..psCLp+lpsG..Lh.aphhL..c..hlps.p.hts..tt....tp..l..csl.phsspsLhphlpp..chc....p......hh.sPs.hp.sshhsphps..ps.tW.+phsh.hlILpsLpsF...LphuhRsl+.......................... 0 3 6 22 +3070 PF01415 IL7 Interleukin 7/9 family Ponting CP, Schultz J, Bork P anon SMART Family IL-7 is a cytokine that acts as a growth factor for early lymphoid cells of both B- and T-cell lineages. IL-9 is a multi-functional cytokine that, although originally described as a T-cell growth factor, its function in T-cell response remains unclear. 21.00 21.00 21.00 21.30 20.40 20.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.74 0.71 -4.49 7 77 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 28 3 23 73 0 111.60 42 85.17 CHANGED sltG...+cspshtulL.ls..I-cL.cp..sscssCssN.sshhphpl.sDDspcssF....uc+LpQhhphs.pscFslhhp+VppuspsLhNsTs..................hpppKssppppttNsLsFLKpLLpphppshs+lL+G ................pltGpss.phtu.l.h.ls..IspL.pp..t.tssC..sN..shhph.l.sDsspcshF.....tctLpQhhphs.ptcasLhh.+Vppss.hLhNspst.......................hppppshppppphNshsFLKpLLpphppph.+hhht............ 0 2 2 2 +3071 PF00048 IL8 il8; Small cytokines (intecrine/chemokine), interleukin-8 like Eddy SR anon Overington enriched Domain Includes a number of secreted growth factors and interferons involved in mitogenic, chemotactic, and inflammatory activity. Structure contains two highly conserved disulfide bonds. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.30 0.72 -4.00 181 2117 2009-01-15 18:05:59 2003-04-07 12:59:11 15 8 161 279 731 2035 0 63.80 25 56.48 CHANGED hus......C.Chph..psphls.hphl...p.saphhssst.Csp.sulIhp.h+......p.s+plCssP....pppWVpch..l..chL .........................tt.........C.Chph....tpp.ls...hphl.......p..shp.hhssu...tCsp..tlIhp..hK.................p..scplChsP....ptpaVpchl.pt............................ 0 45 90 187 +3072 PF01787 Ilar_coat Ilarvirus coat protein Bashton M, Bateman A anon Pfam-B_1131 (release 4.2) Family This family consists of various coat proteins from the ilarviruses part of the Bromoviridae, members include apple mosaic virus and prune dwarf virus. The ilarvirus coat protein is required to initiate replication of the viral genome in host plants [1]. Members of the Bromoviridae have a positive stand ssRNA genome with no DNA stage in there replication. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.92 0.71 -4.74 20 499 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 29 2 0 481 0 189.10 34 91.67 CHANGED s....sstpsststssuRppphsspRAsuhRsp.t....ssstsshPl......slssspps.tsphplphPssts......h..sspssophsupssssh.sshshcThlpsI.pl.cssTKlYsllhGFlu.pSDGhhGhl..-uhsssssssPsslsRhtFKKspYpu+phshssGpsls-l.sshslVWphDss....tpsspplplscaWluhSc.PslhPPpsFLVs-- ..........................u..stpsttspssst...htttptsspsss.......tss+hsssh........Pss.spss.popWpl+GPNs.s+.....t.a.sspsp.pElos.s.ssspa.hsIsFpohhtplh...spshplaslllphsu.sSsGhhGhV..-sacss...sssuPNsls...R+...GFpKcp.RGhQacsP....sshshsshscshtlVhcacsp....h.suscV...hhpshal.hSt.s.l..PpshLhsp............ 0 0 0 0 +3073 PF01450 IlvC Acetohydroxy acid isomeroreductase, catalytic domain Bateman A, Griffiths-Jones SR anon Prodom_2380 (release 99.1) Family Acetohydroxy acid isomeroreductase catalyses the conversion of acetohydroxy acids into dihydroxy valerates. This reaction is the second in the synthetic pathway of the essential branched side chain amino acids valine and isoleucine. 29.10 29.10 29.10 30.20 28.10 29.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.67 0.71 -4.27 19 5123 2012-10-02 19:36:47 2003-04-07 12:59:11 14 7 3940 36 1202 3289 2644 141.90 39 46.51 CHANGED Fc-EscoDLFGEQsVLCGGlpt......................LlcsG.......................................................................................................................FETLlEAGYp.PEhAYFEslHE.lKLIVDLIactGlttMphslSsTAEaGshspusRlhsptoKttM+clLc-IQsGsFu+chlhpspsup.p..hpthpcp.cpc.IEplGccLRthhshhp ..........................................................................................................FppEscoDLaGEQs...VL.C.G.ulpu.llpsu..................................FE.sLV.Eu.Gat.PEhAYaEsL..H..E...hcLIs-LlhcGGls.p.Mph.sIS.sTA..Ea.G.sYlhu.sclh..tts+.hh...+....phhs-IQsG...p...Fucs.h..l..t..-..t.......p..s..s.psp.....hps.h.Rc.tttpctIEplGtcLRthMs.h................................. 0 361 757 1011 +3074 PF00920 ILVD_EDD Dehydratase family Bateman A anon Pfam-B_1309 (release 3.0) Family \N 18.60 18.60 18.60 21.70 18.50 18.30 hmmbuild -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.64 0.70 -6.08 89 6864 2009-09-11 11:08:28 2003-04-07 12:59:11 16 13 3924 2 1852 5418 6322 522.70 40 91.02 CHANGED +PhIGIssoas...........-hsPsphHLpcluctlKpGlppuGGhP.hpasohulsDGIshGptGMchSLhSR-lIAsslEhhlpups.hDuhlhluuCDKssPGhLMuAsRl.slPulhlsGGshhsGph.....tG...........ppls...hpssa-uhGt.ht....tGclscc-............lpplEpsusPus.GuCushhTANTMsslsEALGhuLPGuushPA......sssc+tphucpuGcplhch..l.....c.c.......slp.Pc-IlTccuhcNAlssshAhGGSTNslLHLhAIA+c...AGlp...lsl.-DF-cluccsPhlsslpPu.G.pah.hpDhptAGGlsslh+pLhct.s..h..LctDshTVo......G+TluEplpps...................................hstc..................................VI+PhssPhps..pG.GltlL+GNLAPcGAVlKhuus........chhta...........cGsAhVF-uE--shpAl.tssclc.Gc........VlVlRapGP+GGPGMsE..MLssTuslh.utGlupcVALlTDGRFSGu.o+GhslGHVuPEAus.GGPIAllcsGDhIpl..Dlt..sppLslh.ls-cE.........l....tp.R.+.......ttat.s.......s......sphp......pG.hLttYsphVssAspGA ................................................+PhIulssoas-hlPu..........H.......hH.Lcc..........hu....chl+culcp.A.Guls.hpF.............s...T.lu................l...sDGIu..h.GptGMhaSLsSR-lIAsSlEhhlsuph.hDuhlhlusCDKlsPGhLMAAhRh.sl...............PslFVsGGPMtsGts..........p.h.............................p.t.ht.....hhshh.c.u.h..t.t.h.s....s.G.c..l..o..c..pc.............l.h..ph..E..ps....uCPos.GoCuGM....aTANoMsslsEuLGl.u.LPGsuol.A....................sp.s..p..Rcplh.tpAGcpllchs........c...c.......................sl.h...Pp...cI......lTccAa-NAhslshAhGGSTNslLHLlAhA...pc.........A...G...l.c......h.sh..cDhsc..lS......cc..VPh..LscltPu....G....p.ah..hpDla....p.A.G...GlsullpcLhc..s..G..........L.L.+t.DshTVs........Gp.TL.s-tlpph............c.........................hs...s.tp.....................................................................lI+s..h-..pP..hpp........cG..GLtl.L.p...........GNLA.p.G.uVlKsuuV.s............ph.h..pa......................pGsAhVF-Sp--.shpAI.....s...s....c..l..p..t..G-.........VVVlRYpGPKG..G..PGM..E..MLtsTu..hLh..utGlGcclA.LlTDGRFSGu.opGhulGHloPEA.A.s.G.....G...............s....IAllc-GDhIplDhs....sppl....pl......l....s..-.tE....................Lsp.R.+.................tt.hp..s..................p............p.ht........pshLttYsphsouAspGA..................................................................... 0 509 1129 1530 +3075 PF05046 Img2 Mitochondrial large subunit ribosomal protein (Img2) Wood V anon Pfam-B_17929 (release 7.6) Domain This family of proteins have been identified as part of the mitochondrial large ribosomal subunit in yeast [1]. 27.30 27.30 28.30 27.90 26.90 27.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.91 0.72 -3.75 22 279 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 228 0 205 270 3 82.20 32 50.39 CHANGED shsYaVpR..opsspLPVYhch+ttGs+hhTt.........IRKlpGDl.sLcpDLpphLs...t........shpVs............................chsGplhlKGthhpclccaL.ppsF .................hsYaVcR......opsp.plPVYhch+.......p.......tGs.+phTh.........lRKl-GDl....htLppDLpphLt...t............tspls..................................phsupltlK.G...h.hpplcpaLhpttF......................................... 0 81 116 170 +3076 PF04156 IncA IncA protein Bateman A anon Pfam-B_2718 (release 7.3) Family Chlamydia trachomatis is an obligate intracellular bacterium that develops within a parasitophorous vacuole termed an inclusion. The inclusion is non-fusogenic with lysosomes but intercepts lipids from a host cell exocytic pathway. Initiation of chlamydial development is concurrent with modification of the inclusion membrane by a set of C. trachomatis-encoded proteins collectively designated Incs. One of these Incs, IncA, is functionally associated with the homotypic fusion of inclusions [1]. This family probably includes members of the wider Inc family rather than just IncA. 50.00 50.00 50.20 50.10 49.90 49.90 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.36 0.71 -4.70 54 286 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 42 0 56 201 0 185.00 17 49.97 CHANGED shptplhslshlllull.llssGlss.Lshhh...hushhsshluhshlAlu..hlLlshulhhhhtpp.ht.hpttp...........................................................chtphppplsplp...cclpp..hppphtphppplpphppshpttp...........p.hpphppclpshppshpphtpchpclh...................t.ht.....htt......t..tthtphhtp.pplhpp...........h ............s.ht.hlhslhhhlhull.llssGlss.Lshhh...husslshhlslshlAls..slllusulhhlhpppphthhpstth...................................................plpphpptlspLp...p-hsp..hppphtphppplpthppsh.ttp...........p.hpshtpplcshppphpphtpchpclh............................tthpphhpphtth..........t.ptlt..htp..t.hp..h........................................................................................................................................ 0 5 6 55 +3077 PF02387 IncFII_repA IncFII RepA protein family Bateman A, Mian N anon Pfam-B_1209 (release 5.2) Family This protein is plasmid encoded and found to be essential for plasmid replication [1]. 22.90 22.90 22.90 23.10 22.50 22.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.85 0.70 -5.24 22 635 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 342 0 25 477 1 239.80 41 90.66 CHANGED M...........s+ppYVpNspPhFs.P+psK+pssFI.phhMcpA....uchDlA+p.hphphhshs.hTh..h.................RpRphNtHRApAhcAll.AMhaHashhoshVpsSIcpLuDECGLoThSpuGNpSITRASRhlspFhEshGhIpsc+haD.hlusYIPKhIhlTPhFF.LhslSptpltsApppplpW.Npphpcctht.lohsEhhhpuhc..hpphhphptphh.h.hp+t.A++hhph....sEcsh+pcIlptLl+pYotsELsphG.puLK+cls.cY..L++ltpp. ...........................................................tph..............P.a..stp.t....th....h...thhht+s.....schDhsh..p.....hahp...h....s.........................hR+Rh.sshRpRAlDALl.uLsaahD..shus.....pVptSlopLAhE.CG..Lu..T..cS...u............uG.......p......l......S..I......T..R.Ao..RuL.pal..t..chGl..I...........o....h.pT..p...aD...hush..lPpcIhhTPAhFthL..sVopstltst..p....c..pRlchp.....sppp....pcpth......ss.Lth.D.E......lh....A+s..hR......ahhpRhpshppch+u+.uh+.....R.tR.ARR.sph....pcpchlphltctLp+clstschht.st-slKRpl.hc...........cpphhht....................................................... 0 6 11 18 +3078 PF02974 Inh Protease inhibitor Inh Griffiths-Jones SR anon Structural domain Domain The Inh inhibitor is secreted into the periplasm where its presumed physiological function is to protect periplasmic proteins against the action of secreted proteases [1]. A range of proteases including A, B and C from E. chrysanthemi, alkaline protease from Pseudomonas aeruginosa and the 50 kDa protease from Serratia marcescens are inhibited. 25.00 25.00 25.10 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.29 0.72 -4.42 27 259 2012-10-01 18:54:33 2003-04-07 12:59:11 9 4 230 3 85 217 6 98.80 31 59.58 CHANGED AoSlhl.ssupluGpWpls...............tsspsCcltLspsph.ts...thuGshsCssph.LuphsssWpsoPcslsLhstsGsslshhs+pscspapuphssG.tslsLpR ..........ssoh...ssusluGpWplu...............tuspsCcltLspsphspG.........hhuGshsCssp...Lsphsu.Wt.s.s.s..ct.lsLh-ssGsslupLhpsupupac..up.ps.uG.pslsLpR.................... 0 13 34 54 +3079 PF00876 Innexin Ogre; Innexin Bateman A anon Pfam-B_779 (release 3.0) Family This family includes the drosophila proteins Ogre and shaking-B, and the C. elegans proteins Unc-7 and Unc-9. Members of this family are integral membrane proteins which are involved in the formation of gap junctions [1]. This family has been named the Innexins [2]. 21.80 21.80 21.90 22.10 21.40 21.70 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.50 0.70 -5.34 99 863 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 123 0 560 840 1 283.30 22 76.49 CHANGED Dshs.RLpaphTshlLlshul...llos+QY.h....GcP...IpCh...ssphs....ppahssYC..alpsTahl......................................................s...hspph.....................s........sthp.c...thchhsYY...............QW.....VshlLhlQAlhFYlPphlW+.....shpGhplctlspth...................ssshhsp.........-ppcpp...................hchlspah.pptlptpp..th.....................................................shhYhhsclL.llNllsQlahhstF.Lut..............................sat....haGhpllt.h........hps....pphppsh..hFP+lThCsap.....phG.........s.hpp.ass.CVLslNhhNEK.......IalFLWFWhhhlulloshsllahlhplhhs.....phhhphLch.......psphts.....................................................................hpchtt..chLphsshFlLphlspNhssllhp-l...lppLhppapp.p ....................................................................................s.h.+lphhhTshl.......lhhhsh...........llssppa.h...................G.pP......IpCh......s.sphs.............tpahpsYC......WhpsT.ahl................................................................t..pth...........s.......................t.t..........t.htYY...................QW.....lsa...hLhhpAhhhYlPphhW+..........hp.uhp.lphlhpth...........................ppsh..t.................ptpppp........................hphlhphh..p..h.p.pp.h.......................................................................................................................shhYhhhchL.hhs......hlhphhhhsha.ls..t..............................................t.......au....thh.......h...........................h..........hFP+.hsh.Cph......hu..........s..p..hsh.Cll.hNhhsEK..............................lahhla....hWhhhl.hhshh.hh.hhh....h..........hh..hh...........................................................................................................................th.....t.ht.sshhhh..ht.p.s...h.ph...h.th.......t............................................................................................................................................................................... 0 230 281 485 +3080 PF01658 Inos-1-P_synth Myo-inositol-1-phosphate synthase Bashton M, Bateman A anon Pfam-B_959 (release 4.1) Family This is a family of myo-inositol-1-phosphate synthases. Inositol-1-phosphate catalyses the conversion of glucose-6- phosphate to inositol-1-phosphate, which is then dephosphorylated to inositol [1]. Inositol phosphates play an important role in signal transduction. 20.00 20.00 20.00 24.60 19.30 18.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.15 0.71 -4.24 60 1141 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 916 36 533 1086 280 110.30 40 26.16 CHANGED GtThh+ssLuphhtsRGl+lpshhphNhhGNsDuhsLss.pphcSKchSKosslsshls.......h................................................pts.HhsslcYlPhlGDpKhAasclcsptFhGsshplplphpspDS ............................................GtThh+pVLschhhsRGl+lsphhphNhhGNpDhhNhtp.pphcS.Kc.ISKopsVs..shls....tp..Lh...............................................tpcssHls..s.c.YVPals.DpKhAhschpuchFhG..s..slph+hpspDS............. 1 186 341 459 +3081 PF00459 Inositol_P inositol_P; Inositol monophosphatase family Finn RD, Griffiths-Jones SR anon Prosite Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.84 0.70 -5.10 59 10135 2012-10-02 15:53:20 2003-04-07 12:59:11 20 42 4250 79 3140 7691 7848 256.80 24 93.87 CHANGED phpplhphs.......hphsp.tuuplltptht......pphshphcs.........sssD...h..VTtsDptsEphlhpt....lppth....Pscsh..luEE.shstss..........................h........sss.hsWllDPIDGTpsFlcGh..............shaulslulhh......ptpPlhGllhsP.......h................hs...p...hapuhpG........p.Gu..hh...su...............l..plpsss........hspsh..hhsththp.....pttphsp...h..htth.thhstt...............h..R.th...Guush..clshlApGpsD..sahch..s......hp.WDhAAGthllcEAG..............Ghls.chsGs........hsh..hs.t..thlsus...........tth...hhpthp.phht .........................................................................................................................h........hhphsp.pAuphlh.phhp...........pthphphcp....................................ss.s...........VTp.sDp.ts..cphlhpt.....Lppt...h..............P..s...t..s..l........lu.EE..s..stt.ts..................................................................................tpp.th.hW..llDPlD..GTpsF.lc.sh.............................................sp.a..ulsIAlhp.................cG.....p.....s...h.h....G....l....lasP......h...................................................ts....c.......ha.h...A..t.cG..............t..G...A....h..h....ss...........................p...l...psppsp...................................hp.ts.h....l...h.s.......h..s..h..p...............t..p.t..t...h.t.p............h.....h.t..th..h..t.thptt...................................................h..R..th...G.uu..ul...ch.s.h.V......A......p.........Gp.........h.D....s.a..hch.s........................hp.WD...h...A...AGthl.lpc........AG..................G..hl.s...s.h.pGp.....................sh.h.......tp....t.....thhhss.........................hh.......................................................................................................................................................... 0 961 1869 2579 +3082 PF03488 Ins_beta Ins_beta_nem; Nematode insulin-related peptide beta type Griffiths-Jones SR anon PRODOM Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.97 0.72 -4.16 11 116 2012-10-01 19:54:34 2003-04-07 12:59:11 9 2 6 1 114 148 0 47.10 30 45.29 CHANGED hRuCGR+LhphVhslC.G-.CsspsshDIuspCCpsp..CTc-aI+psCCP ........+tCG.p+lhp...hlhplC..ut.....C.s.....s...psshcl..u.stCCspt....socp.lpptCCP....... 0 32 37 114 +3083 PF03811 Zn_Tnp_IS1 Ins_element1; HTH_Tnp_IS1; InsA N-terminal domain Griffiths-Jones SR, Bateman A anon PRODOM Domain This appears to be a short zinc binding domain found in IS1 InsA family protein. It is found at the N-terminus of the protein and may be a DNA-binding domain. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild --amino -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.90 0.72 -4.46 4 1346 2012-10-03 10:42:43 2003-04-07 12:59:11 8 14 477 0 86 743 12 33.10 58 28.45 CHANGED MAoVsl+CPpCpus-.VhRHGpussGHpRaRCppC+ ........MAolslpCPpC.s...........usc.VVRNGKSTAGH.QRYL.CScCR..... 0 15 37 72 +3084 PF00049 Insulin ins; Insulin/IGF/Relaxin family Eddy SR anon Overington enriched Domain Superfamily includes insulins; relaxins; insulin-like growth factor; and bombyxin. All are secreted regulatory hormones. Disulfide rich, all-alpha fold. Alignment includes B chain, linker (which is processed out of the final product), and A chain. 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.68 0.72 -3.19 28 1447 2012-10-01 19:54:34 2003-04-07 12:59:11 13 6 340 923 373 1711 0 62.30 31 56.65 CHANGED ppLCGscLV-sLh.hVC...G-......R.GFaY..P...hsh.......ht....................................................................................h....ptt+......tKRG..IV-pCChpsColtpLpsYC .......................................................................................plCGtcLscsl..hlC...st...............h...th..................................................................................................................................................................................ppG.....l..scpCC.hp.sCsht..L.t..hC....................................................................................................................... 0 102 130 226 +3085 PF00552 IN_DBD_C integrase; Integrase; Integrase DNA binding domain Bateman A anon SCOP Domain Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain. The central domain is the catalytic domain Pfam:PF00665. This domain is the carboxyl terminal domain that is a non-specific DNA binding domain [1]. 20.70 20.70 20.70 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.65 0.72 -4.52 37 14492 2009-01-15 18:05:59 2003-04-07 12:59:11 16 61 191 15 23 12714 0 49.90 85 8.72 CHANGED hppphsYa+...cspstpW+GPsplLhcGcGAlllpspppt.hhhlPc......RcsKhl.s ..........I.QNFRVYYR...DSRDPlWKGPAKLLWKGEGAVVIQ.DNSD..IKVVPR......RKAKIIRD........ 0 9 11 16 +3086 PF02920 Integrase_DNA integrase_DNA; DNA binding domain of tn916 integrase Griffiths-Jones SR anon Structural domain Domain \N 20.80 20.80 20.80 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.20 0.72 -4.56 4 459 2012-10-03 08:51:45 2003-04-07 12:59:11 10 6 296 5 41 279 13 64.40 54 14.30 CHANGED EKc+cs+tchl+suEsptK-hRYhhKYl-phtcspFsYSWKLlsTctssAtKp-sluLR.K.tEIpK ....................EKRRDsKsRlL+sGES..QR.p.DGRYLYKYlDsaGcspaVYSWKLss.TDcsPAGKR-slSLREK.sclp+.................... 1 8 15 21 +3087 PF00357 Integrin_alpha integrin_A; Integrin alpha cytoplasmic region Bateman A, Finn RD anon Prosite Family This family contains the short intracellular region of integrin alpha chains. 20.20 10.00 20.20 10.10 20.10 9.90 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.96 0.74 -6.32 0.74 -3.75 27 407 2009-01-15 18:05:59 2003-04-07 12:59:11 15 17 77 12 158 412 0 14.90 53 1.49 CHANGED KhGFFKRph.th.pt KsGFFKRsh.sh.-..... 0 16 23 59 +3088 PF00362 Integrin_beta integrin_B; Integrin, beta chain Finn RD anon Prosite Family Integrins have been found in animals and their homologues have also been found in cyanobacteria, probably due to horizontal gene transfer [1]. The sequences repeats have been trimmed due to an overlap with EGF. 25.70 25.70 26.20 28.00 25.40 25.60 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.47 0.70 -5.59 48 945 2009-09-12 08:02:03 2003-04-07 12:59:11 13 57 153 59 425 786 1 347.30 38 48.53 CHANGED oCp-CIpsuP...pCAWCsp....................................sF.t....sssRCDshp...pL....hppG....CstppIpsPp..uphpl..tcspsLosttt...........................................t.ph.sQlpPQclpLpLRPGp...................................stpFplph+pscsYPVDLYYLMDLShSMcDDLpsl+sLGspLucchp.plTsNaRlGFGSFVDKslhPalsTsPpc.lpsPC....t.t.sCtssFGa+plLoLTccsspFsppVpcQplSGNlDuPEGGFDAlMQsAVC............................pccIGWR..spup+LLVasTDusaHhAGDGK..LuGIlpPNDGpCHLcsss.........hYotSsth....DYPSluQlscKLp-ssIpsIFA......................................VTpphhs.........lY............cpLsphI.s..soslGhLusDSuNVlpLIpcAYpclsScVplp.pshP-s.lslsasupCts.spth.s.....ppCsslplG-p..................VsFsVplsspc.......Cspctt...pshpI+P.....lGFs-p...LpVplphhCc.C ............................................oCtpCl...ts...tCuWCtp.......................................sa............s.R.Cs.....t..pL..h..tpG........C..p.l....P.t..u.p.hph......psps..lspt............................................................................t.sQlpPQpl..tl.....pLR.s...G..p......................................tphplph+....p...sccYPlDLYYLMDLSh.SMpDDLpplcpL....Gs..p.L...hpthp.pl...........T......p....s...hR.lGFGo.FVDKsl.P....ah.pht...Ptc.lpsPC.......t......pC.ssFu....a+plLsLT..s.psppFpptVt.p..p...p.lSu.N....lDsPEGGh.DAlhQssVC........................................................t..pp..IGW..R.....p...sp+LLVFsTDs.shHhA..hD..G+..Lu....GIl.....P.NDGp.CHLpss.................Y.sh.S..p.h..........DYPSluplsppLsp.pNI..IFA........................................V.Tp..p....h.....................................hY.............pphpphl...P......to.s..lG...L...p.p.cS..sN....llp...L...lh.pAYp..plpS.......pV.l.p............p.......scs.....lp...l.pa.p..u.h..C.s....s....h..t......tpCpslplGpp..................lpFplplp..h..p.p.........C...t.p..t......pphhl+s.....hGh.p-t...lpltl..p..h.....CpC.............................................................. 0 104 134 265 +3089 PF00143 Interferon interferon; Interferon alpha/beta domain Sonnhammer ELL anon Prosite Domain \N 25.00 25.00 25.80 25.10 23.90 23.60 hmmbuild --amino -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.95 0.71 -4.37 15 1000 2012-10-02 01:28:15 2003-04-07 12:59:11 14 1 135 18 321 1009 0 150.70 39 85.89 CHANGED LPpsasLss+csLpLLtQMsRlSsh.CLcDRpDFsFPpEhhcupQhQKAQAhsVL+EhLQpIFslFpo-pSS.AuWNpTLL-pLpssLaQQLscLcsCLtppht.pEsshsppssh...Lsl++YFpRIplYLcEKKYSsCAWEVVRsEIhRulShS.sNLptRLRp ................................................ppthhspcsLp..LLs.pMp+.l...s..s..hpC..L.p.-..RpD..F.sFP.p...Ehh.p.u.s...Q.h.Q..K..s.Q.A...l.sl.lHEhlQQhFpLF....s..o.ctSu..AuWspoL...L-phhssLp.QQLpcL-.sClhpphs..pc.s.s..l..hpp.ssh...............Lsl++YF.p+I.s.h.Y.LpEKcYSsCAWElVRsElhRsh.....spL.tthp............... 1 17 20 70 +3090 PF03487 IL13 Interleukin_13; Interleukin-13 Griffiths-Jones SR anon PRODOM Family \N 25.00 25.00 46.40 46.40 23.60 19.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.00 0.72 -4.25 4 47 2012-10-02 01:28:15 2003-04-07 12:59:11 8 1 36 11 14 53 0 42.00 77 32.90 CHANGED MALhLTsVIALsCLGGLASPuPVPsSs....sLKELIEELVNITQNQ ..MALhLTsVIALTCLGGLASPuPVPs.So....ALKELIEELVNITQNQ. 0 1 1 1 +3091 PF01348 Intron_maturas2 Type II intron maturase Bateman A anon Pfam-B_105 (release 3.0) Family Group II introns use intron-encoded reverse transcriptase, maturase and DNA endonuclease activities for site-specific insertion into DNA [2]. Although this type of intron is self splicing in vitro they require a maturase protein for splicing in vivo. It has been shown that a specific region of the aI2 intron is needed for the maturase function [1]. This region was found to be conserved in group II introns and called domain X [3]. 20.80 11.50 20.80 12.00 20.70 11.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.98 0.71 -4.23 25 36524 2012-10-02 14:46:49 2003-04-07 12:59:11 16 20 25025 0 102 35521 17 102.30 63 26.36 CHANGED ppclphhsPlpplltpLscpsahstp........G+PhuhsphssLsDpsIls+aspIhRulhsYYShusN+ppLh.hlpYILRhSCs+TLApKaKs.Tlpphhp+aG................ssLh.pphhsppcph.............shthpphphshpp.......hhschhhlshhp ...........................................h.KKhDTlVPIIPLIGS.L...u.....K.AK..FCNlh..........GHPIS..K..P.l.W.s.D.L.SDS..DII.DRFsRICRNLSHYa.S..G.SS.K.KpsLY...+lKYIl....................................................................................................................................................................................................................................... 1 28 68 90 +3093 PF03519 Invas_SpaK Invasion protein B family Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 26.90 26.40 24.20 17.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.67 0.72 -4.14 7 181 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 173 8 14 65 0 77.60 50 58.83 CHANGED GCsssllusLDsHSTIsLcL.p-hPsIpIuhpsDD...lhlWusls-hppshlptpu.plLt.lhpss..sathGt.h.ltcpss .........GCsPSLIGslDSHSTIsL-L.aulPoIsISlcDDD...VWIWApL....GAsShVlLQQpAYEILhsIME.uC..pFupGsQlLLtcps....................... 0 3 7 9 +3096 PF04741 InvH InvH outer membrane lipoprotein Mifsud W anon Pfam-B_3503 (release 7.5) Family This family represents the Salmonella outer membrane lipoprotein InvH. The molecular function of this protein is unknown, but it is required for the localisation to outer membrane of InvG, which is involved in a type III secretion apparatus mediating host cell invasion [1,2]. 25.00 25.00 25.40 25.20 22.80 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.02 0.71 -4.35 3 211 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 181 0 2 80 0 116.60 64 95.63 CHANGED MKKFYSCLPVFLLIGCAQVPsPSSGSKPVQQP-AQKEQQANAcSIDECMSLPYVPSDLAKNKTLSNQNADNSASKNNTISSSIFCEKYKQTKEQAFTFFQEHPQYMRSKEDEEQLMTEFKKVLLESGSKNLSIYQTLLoAHKRLQAL ...........................MpKhYSCLPhFhLlGCAQss...ShSKPVQQssAQpEQ.AsAsSIDEC.SLPYVPsDLAKNKoLSNpsADNSASKNssISS..SlFsEKY+QTKEQAhsFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLsAH-RLQAL..... 0 0 0 1 +3097 PF00904 Involucrin Involucrin repeat Bateman A anon Pfam-B_1158 (release 3.0) Repeat \N 30.00 0.10 30.90 0.30 22.00 -0.10 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.55 0.75 -5.76 0.75 -2.71 129 691 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 12 0 40 721 0 10.00 64 70.08 CHANGED lEQQEGQLct .LEQQEGQLch. 0 40 40 40 +3098 PF02121 IP_trans Phosphatidylinositol transfer protein Mian N, Bateman A anon IPR001666 Family Along with the structurally unrelated Sec14p family (found in Pfam:PF00650), this family can bind/exchange one molecule of phosphatidylinositol (PI) or phosphatidylcholine (PC) and thus aids their transfer between different membrane compartments. There are three sub-families - all share an N-terminal PITP-like domain, whose sequence is highly conserved. It is described as consisting of three regions. The N-terminal region is thought to bind the lipid and contains two helices and an eight-stranded, mostly antiparallel beta-sheet. An intervening loop region, which is thought to play a role in protein-protein interactions, separates this from the C-terminal region, which exhibits the greatest sequence variation and may be involved in membrane binding. PITP alpha (Swiss:Q00169) has a 16-fold greater affinity for PI than PC. Together with PITP beta (Swiss:P48739), it is expressed ubiquitously in all tissues [1]. 21.30 21.30 21.60 22.40 18.60 19.20 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.57 0.70 -5.20 4 692 2012-10-02 19:24:03 2003-04-07 12:59:11 13 19 134 7 422 566 0 221.10 45 46.39 CHANGED hLIKEYRllLPhSV-EYQVuQLY.lActS+pEouG.GpGVElLhNEPYccs.GtpGQYTHKIYHltS+lPualRhLhPcuALslcEcAWNAYPYsRThhTs.ah.EcF.IcIETaahPDhGpQ-NVapLssssh+ph.lshIDIs.RsQVhsuDYKAEEDPthF+SlKTGRGPLussWtcEh...spstcpP......hMCAYKLspVcF+aWGhQsKlEpFIHc.t.RRlhhphHRQhaCW.DcWh-LTM-DIRchE-ETp+tL ...........................................................................h+Ea.Rl..hP.ho.....V.-E..YpluQLY.lu.ctS+pp............o.........s.........G.............G.pG.....V......E.....llp...N.cPa........p..........c.....s.......................s..............t..............p....GQYTcKlYHlt......S+lPuah+hlh....PcsuLhlcEcAWNAYPYs+..T......h.............h............T.........s......s.ah..-cF.IcIET.hap...s.D..h.G..pp..-N.V......a.sLss.....pp.hpph.pl..shIDIs..pct.l...s.t-YK...t...-..EDPphFp.S........KT.........sRGPL.s..s.Whcph..............pP............................hMCuYKLlplcF+.aWG.h.Q.s+lEpFIp.c..t.........+clhhphHR.....QhasWh.......Dc......W......h.......sLTM-DIRchEccsp.................................................. 0 135 169 310 +3099 PF03278 IpaB_EvcA IpaB/EvcA family Mifsud W, Bateman A anon Pfam-B_4003 (release 6.5) Family This family includes IpaB, which is an invasion plasmid antigen from Shigella [1], as well as EvcA from E. coli Swiss:Q9ZNF1. Members of this family seem to be involved in pathogenicity of some enterobacteria. However the exact function of this component is not clear. 29.90 29.90 35.50 35.00 29.80 29.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.89 0.71 -4.99 9 303 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 125 10 3 134 0 135.40 47 82.57 CHANGED Muppo.lhlshsKlssplLpsVu+Qo.spDlpsWhppE+psYsSRslNpsIDcaChpNNssIosEs+p+lFphVEpca.plsLDhpuAQSSIsHh....ltuNuhFsKKlDp.C.cGhshts+NsTpsplhNhlucpha-Kphs.....scI-lhp.psp ......................p..hhhshsKhssplLpsVu+Qo...stDhppWhppEphsY.SRslNpp....ID....sYChp..........p...Nu...hI...........SpEp+p+lFphVp.....p.....t......h...p.sLDhp...uAQSSIsHh....l.uNt.FsKKhDt..C...cGhs.....s+tsTpsplhs.lucchap+phs.....s-Ichlp.+sp................ 0 0 0 3 +3100 PF04979 IPP-2 Protein phosphatase inhibitor 2 (IPP-2) Moxon SJ anon Pfam-B_5306 (release 7.6) Family Protein phosphotase inhibitor 2 (IPP-2) is a phosphoprotein conserved among all eukaryotes, and it appears in both the nucleus and cytoplasm of tissue culture cells[1]. 20.90 20.90 20.90 20.90 20.50 20.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.02 0.71 -3.90 20 478 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 281 4 294 440 0 115.20 26 55.30 CHANGED +KS.pKWDEhNI..LATaHPu-KDYGhMKlDEPpTPYp......pspsshs-s-sscsls....s-sLucKL..sAucupssshthc.-ppssp..p--p-l..o.Eccp++cpFEp+RKhHYsEhhslKLARpLI..scEhps-s---pp ..........................................................+WDEhNl..h.o.c...tp..ct..shMKI-EP..pTPYp.......h...........tsts.......t.....................t......p.........pc.s..pp.th.s..........................sptl.s.pph..........ts.ps...t..p..............p.h..........t.....t...ppppt..........tpppph.......p.pp.p....t+ptpFc.tRKhHY.s.Eh....h.p.........p.ph....tp...........tt................................................. 1 89 146 221 +3101 PF01715 IPPT IPP transferase Bashton M, Bateman A anon Pfam-B_1875 (release 4.1) Family This is a family of IPP transferases EC:2.5.1.8 also known as tRNA delta(2)-isopentenylpyrophosphate transferase. These enzymes modify both cytoplasmic and mitochondrial tRNAs at A(37) to give isopentenyl A(37) [2]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.37 12 5290 2012-10-05 12:31:08 2003-04-07 12:59:11 12 20 4605 26 1405 4112 3280 236.80 32 78.09 CHANGED hplYKshDIGTAKPohp-hpslPH+LlDllDsscsaosupapcpAhptl....p-IptcG+lPllVGGThLYhpuLlcGls.hPts..-sslRtclcpphtppG.shLatpLsphDPttAstIpPsDspRlhRALEVahhTG+shophtpp............tps..ashl.luls.hc+c.LapRIspRhcpMlcsGhlpEV+tLhsps.......hppshsuh+ulGYpphhsaLpG....chs.LsEhhpphhpsTRphAKRQhTWhRp..t.lpWlDst ...............................QlY+s.h.DIGTAKs....o..........c........E...h........t...s.....lP..H+LlD.....lh.......-.s.....s..p................s..........a..Ssu..cFppcAht...tl...............p.c.I...t...s.c.....G...+.l...P.l.l.V.GGTsLYhpuL...l................ps.....h.........s.............h....s.ts............s..........p...l..........R.........t............p............h..........c.....p...................p...........h......t............p............G......................p..t.......L...app....L...t....p...l...D..P....s.A.t....c.I..c...s...s......s.......pRlhRA..L..El...h.h.h.....o...G..p.s..hophhpp......................................t...t....h.......a....p....s...h....h...h.uls....hs..R.phLa.pR...IspRhc.tMl..p.......p...G..h.lc..E....l...ctL.hpps.........................t....s.....h.s..u..h.....pul.GY+phh.s..Yl..pG............................chs....h--......h....h....p...ph....hpsTRphAKRQ...hTWhR.p....t....hpah...t..................................................................... 0 467 901 1191 +3102 PF01745 IPT Isopentenyl transferase Bashton M, Bateman A anon Pfam-B_2229 (release 4.1) Family Isopentenyl transferase / dimethylallyl transferase synthesises isopentenyladensosine 5'-monophosphate, a cytokinin that induces shoot formation on host plants infected with the Ti plasmid [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.55 0.70 -5.19 7 88 2012-10-05 12:31:08 2003-04-07 12:59:11 11 5 64 7 18 3011 1753 163.70 35 80.48 CHANGED hplaLIaGsTsTGKTuhultLAppTGhPVlsLDRlQCh.plusGSGRPhssELpuTcRlYLs.sRslscGlIsAccApcpLhtcVppa.s.csulILEGGSISLlppMups.aWpssF.WplpRhcLssc-sFhspAKpRlppMLpspsstsSlLsELsphWtpsst+shLEsIDGYRhhlcaApppplsh-pLhslst..chhppLlpuIApEYhpHAhhQEp-FPth.....thttu. ..........lhlIhGPTsoGKTshAlt..L.....A....c.....t.....h.......u.....h.....s....l..IulDplQ.sh..plssG..Su.R.Pp.s.s.E.L.p.u.s.p......R........aLt....pp.l........p....G......h.ss..t..p..h.....t...h..hh.....l............t....tt.................h......llEGGS....h...SLh.th.........p.................................h.h.....h...........h.....t.....t..a........h.t...Rhtphh.......t..uhhtEl..hh...t.h...httl....sh.......p..h.aht..th..t.........t...................................t................................................................................................................................... 0 5 11 16 +3104 PF00605 IRF Interferon regulatory factor transcription factor Bateman A anon [1] Domain This family of transcription factors are important in the regulation of interferons in response to infection by virus and in the regulation of interferon-inducible genes. Three of the five conserved tryptophan residues bind to DNA. 21.00 21.00 21.10 21.80 20.90 20.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.71 0.72 -4.28 26 872 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 101 29 310 642 0 105.30 43 28.65 CHANGED RhRL+PWLltplsSGpasGLpWlsp-+phFpIPWKHAuR+shs...-cDusIFKAWAltpG+aptG.-cs.............DPssWKsshRCALNpos-Fp.lhDpop.csspP.aKVYclh...s..t .........................+h+sWLltQ.lsSspa.PGL.tWls.c.-.+phFpIPWKHAu+pshs...-cDuslFKAWAltp.G.Kapt..G...D.cs...........DPssWKsphRCAL.Np.....s..-Fc.lh.Dpop....c..sspP..aKVYchlst..................................... 0 43 68 158 +3105 PF04120 Iron_permease iron_permease; Low affinity iron permease Wood V, Finn RD anon Pfam-B_71435 (release 7.3); Family \N 25.00 25.00 25.60 25.30 22.40 24.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.54 0.71 -4.57 19 374 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 264 0 195 387 17 117.30 31 64.84 CHANGED Fs+hAstsuphsGpPhsFllAlhlVllWuloGPlFcaSDTWQLVINTGTTII.TFLMVFLIQNoQNRDss......AlQlKLDELItusptApNsh..........lslEcLsEcELcplcptapchuctsppt.thsttpptpsttstp ................................................Fp+huphhsphsGpshsFlhulhlllsWssoG.P.hapaS-TWQLlINTuTTIl.TFLMVFLlQssQNR...Dst..............AlphKLcELltshp....tApsth..........ls...lEphp.tplpthtt.h.t.t.....................ptt............................................. 0 38 103 150 +3106 PF02060 ISK_Channel Slow voltage-gated potassium channel Mian N, Bateman A anon IPR000369 Family \N 20.60 20.60 20.60 20.60 20.20 20.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.79 0.71 -4.56 3 180 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 45 1 100 159 0 96.40 30 75.87 CHANGED MILPNoTAVhPFLT+LWQETAcQGGNsSG.LARRSPLuDDGKLEALYILMVLGFFGFFTLGIMLSYIRSKKLEHSHDPFNVYIESDAWQEKDKAYFQARVLESaRuCYVlENQLAVEQPsTHLPELKPSs .....................................................................................................ttst..p...h.lYlLhVhuhFuhhlluIhLuYh+SK+hE+ps....D.PaplYI.cp-...Wtpt.......................................h.......................... 0 4 10 31 +3107 PF00180 Iso_dh isodh; Isocitrate/isopropylmalate dehydrogenase Finn RD anon Prosite Domain \N 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.21 0.70 -5.50 23 12354 2012-10-02 21:08:39 2003-04-07 12:59:11 15 30 5707 258 3358 9844 7894 320.30 29 93.82 CHANGED pIslLPGDGIGPElhpsAl+VLculsppts...hchpacpthlGGsAIDttG..pPLP-ETlcss+cu.-AlLhGAlGGPcW......t.ssphRPEp.GLLsLRKphs.LaANLRPspha..tLtstSPlKp-hscs.lDhllVRELTGGlYFGp.pttps....................st....phuhsTphYocpE.l-RIsRhAFchAtp+.....+t+VsSlDKANVLcsSc....lWRchsp-ltp..............................EaP-lpLpHhllDssuMpLl+sP.ppFD..VllTsNlFGDILSDtAShlsGS.LGhLPSASLus.......pshulaEPlHGSAPDIAGKsh......ANPlAsILSAAMMLRauhshpcpA......stlEsAVppsLpp......GhhTsDLusts........t.huTs-hs-tl ................................................................................................................lshl.GDGIGsE...lh..ts.hc.llcshh..h...................hpl..php......p.h...h.G....t...t...s..h.c...t.p.u...........p.ls..t.....-s.hpshcc.h..ss...sl.........h..Guls.s.Pp...........................s.th+...s....p....l...lt..lRc..........p.h....s.....hasN...l..........RPs......p..ha.............t.s......ssl+..............s.....t.......t...hD.hll.s.REsot.s..Yt..Ghphthss.....................................................................t........tt.suhsh..phh.o.c.pt...hcRls+hA...FchAhp+.....................+.p.......p.ls.hlc.K.uN.lhchsc....sha.+chsp..-.l.uc......................................................................ch..s......c......l..h..hc.chll....Dshsh...ph.l..p.p.P..tpaD..Vlss.Nl...GD.........llSD.....suhh.s......G.u....lG.hhP...uusls.....................pth..uhaE..s.........s...HG.............o...A..P...c..h.....s.Gps..h..........................sNPhu..Ih....ohshh...L.....c......a.............h.......s.......h...t.......p...t.....A..................ptlpp.u...lt.t.s.ltp......t.h.h.Th.Dhtt.h................tspphhp........................................................ 1 1065 2017 2773 +3109 PF04279 IspA Intracellular septation protein A TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 24.00 24.00 24.00 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.17 0.71 -4.30 129 1696 2009-09-11 22:50:41 2003-04-07 12:59:11 10 2 1533 0 344 990 1626 176.80 39 93.65 CHANGED M.Khll-hhPlllFFlsYp................hts.............................IhhATushlsAollplshhahhtp.+lspMpllohsllllFGuLTlhhpD-pFIKhKPTllYhlFAssLhsuhhh...t+sllcthh..........sptlp.L....scpsWp+LshtWshFFhhhullNhh......................................V........sh....hop-...........hWVsFKlFGhhulsllFhlsQuhhlh+ah.cp .......................................................MK.hlDalPLllFFsh.YK........................h.h.s.............................IasATushIlATslt.l.h.h.sa.l...p.a.+..Kl-+MtllohllVlVFGGLTLhhHs-pFIKWKsTll..YsLFAssLLlSphh...p..K..sLIp...p...hL..................G.c..pls....L.........P........p...t.......l...W..............s+LN..huWulFFlhhul...h...Nla..........................................l.......ua..h.....h.sps..............hWVs.F.K.lF.GlhulTll.Fsllpulalh+Hh.p.c.................................................... 0 85 177 257 +3110 PF01128 IspD UPF0007; 2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase Finn RD, Bateman A, Eberhardt R anon Prosite Family Members of this family are enzymes which catalyse the formation of 4-diphosphocytidyl-2-C-methyl-D-erythritol from cytidine triphosphate and 2-C-methyl-D-erythritol 4-phosphate (MEP) [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.35 0.70 -4.83 6 4492 2012-10-03 05:28:31 2003-04-07 12:59:11 14 13 3924 49 957 7221 4670 220.50 32 84.34 CHANGED lhsllPAAGpGcRMtsGpPKtalsLtGpslLp+TVcuhLup.tlc+IllslsssDhsthppLLss.....plpLVsGGssRs-SVhsGLpAlssss..paVLVHDuARPhltpsslscllAtt-p.psGuIhAlPVpDTIKRs-.sG..hlscT.DRsGLWtAQTPQuFshsLLhcuascu...ppGuphTDDASllEps.GhpVplVsGcssNlKlTpP.DLAlAEhILpp ...................................................sll.AAGhG..p.R.........M.......s............s.................s.........h......P..............KQ...aL...p.l.......s..s........+.....s...ll...h.+...ol.c...s...h..........h....t....p..........s......t.....l.......p..c......l..l....l..s...l...s.....s....s....p...........s....h......h....p....p.....l..h.tp........................ptp.l..p...l...l......p........G.....G....s....p....R.....t.......-......S....V.h...s...u...L..........p..........t.........l.............s........s........p..........t.............s...........h.............V....L.l...H.D.....u.ARPhls...tch...lp.c..........l.l.p.....t....s.......p.........p.............................t..s.....u....s..l.............s........u..l..P............l...t..D.............T.......l....K......p...s......p.............t.s.................hl.......s........p.......T.s.....s........R........s.......p.......L........a......t.......u....Q.......T.....P........Q...........s....F..p...h..p.....h...............Lpc...u...a....p....ps.....................p..p....t....h........p.......h...T...D..-...u...u.....h...........l.E......hh.....G...h....p.......l..t..l..V..p..G..c..h..p..N..l.K....lTpPpDLt.lAchhlt.t.................................................................................................................................................................................. 0 339 640 817 +3111 PF01695 IstB_IS21 IstB; IstB-like ATP binding protein Bateman A anon Pfam-B_982 (release 4.1) Family This protein contains an ATP/GTP binding P-loop motif. It is found associated with IS21 family insertion sequences [1]. The function of this protein is unknown, but it may perform a transposase function [2]. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.91 0.71 -4.86 17 7213 2012-10-05 12:31:08 2003-04-07 12:59:11 12 34 2932 5 1265 9058 3056 156.90 22 63.86 CHANGED p+plpppl+hA+LP.httslEshDasts.ulDcchltpL.tshsalcptpNlllhGPsGsGKTaLAsAlGhpAsc.tGapVhahpsscLlppLptA+t-Gphtptlppls+s.cLLIlD-hGhhPhsppsuphlFcllscRYE+pShllToNhshupWsclhu.DsslAsAlLDRLlHpu.chl ...........................................................................................................................................................................................h...............p......p........t.......p.......s........l..l.h...hG.s.s.GsG....K.o.a.L..A.s.A....l......u..............t................p........h..............h..........p......p..............u....h.......p...........s.......h.....h...h.....p....h......s...............c....l....h...p.........p...........l.......p.......p...........s.......h.......p......p.......s......p.......h...........p.....p.......h......l......p............p.....l......t...p....s....c....l.....L..l.lD...-..l.........G........h.......................h.......s....p................t.........t..........p........h.....l....h......p......l.....l..s...p.....R..h.......p.......p...t.....s.....h.l.....h.T.oN.hs....hp....p......h...t..p..h...hs.......p............................................................................................................ 0 465 857 1042 +3112 PF02189 ITAM Immunoreceptor tyrosine-based activation motif SMART anon Alignment kindly provided by SMART Motif \N 21.10 21.10 21.10 21.20 20.30 21.00 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.63 0.74 -6.93 0.74 -3.24 12 339 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 49 15 123 429 0 21.10 35 14.82 CHANGED -tlYpsLspcpcsp.YspLspp ..-tlYpsLppspcsp.YssLst... 0 9 10 16 +3113 PF01156 IU_nuc_hydro Inosine-uridine preferring nucleoside hydrolase Finn RD, Bateman A anon Prosite Domain \N 27.80 27.80 27.80 27.90 27.70 27.70 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.17 0.70 -5.21 118 5444 2009-09-11 13:52:43 2003-04-07 12:59:11 14 17 2525 62 1248 3860 1059 302.10 28 95.00 CHANGED hpplIlDsD....sG.......h...........DD.AhAlhhuh...t.ps.p...................l...clhulTsl............G..Ns.slcpsspNuhpllphh..............h....ts...lPVht.....Gss.........tP............................t.hhs....tphH....GpsGlss.........................................................................................................h..........sttt...tptp........As............phlhctl.p..ptssc......lsllslGPLTNlAhslptt.Pc.........lhpplcc.....lllMGGuht.................hGN.......................h.ssh...A................EaNha.sDPcAAchVh.s........shs...lshl.sL-..lop....p.....shh.............s.tpthppl....tt.....................ss.shuphltp.hhp.hhhph....................tshshaDslssuhh...h..........Pph...............................................................................................p.spp.hhlsV-ss........s.hshGt............o.ls-.......................t.ssssplsh..plDs.......ptFhphhhp.hlt....ths .....................................................................................plllDsD..........PG..........h........................D.DAlAlhhAl.....t.pP..c........................................l....-lh...ulos.ss.............G...Ns.....sl.-......p.....s..hpNuhp.l..lphh...........................................t.......pp.........l.P..Vht...Gus...pP..Lh..c.....................................hh.hs........spl.H.....................GpsG.lss..........................................................................................................hs...........................sptth......t.stp......As.......................phlhctl...p......p.s.s...tp..........lTllshGPLTNlAhh.lppp..Pc.................lhpplcc.....lVlMGGuht............................hGN.........................................h..oP..s..A.................EaNla....sD.PEAAph..VFpu..........uhs......................lshs..uL.D...lTp....p.......sh.l.......................................o..sc.h.h.p.phtp...............................................ttt.....su.ph.ltp..lhs...aahphph...........p.t........hsGh.lH.D.sh.s.luhl......l.......p..................P.pl..................h................................................................................p....h..pp..hhlp.V-sp...........u.hstGt...T..lsD....................................t.hsp.sssplhh....sl.Dh...ptFhphhhc.hh...hh............................................................................................................................................................. 0 384 731 1025 +3114 PF04183 IucA_IucC IucA / IucC family Bateman A anon Pfam-B_1982 (release 7.3) Family IucA and IucC catalyse discrete steps in biosynthesis of the siderophore aerobactin from N epsilon-acetyl-N epsilon-hydroxylysine and citrate [1]. This family represents the N-terminal region. The C-terminal region appears to be related to iron transporter proteins. 25.00 25.00 26.30 25.60 24.50 23.80 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.57 0.70 -5.13 168 2438 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 938 17 394 1532 11 241.30 25 40.66 CHANGED sttsahph....Ep.ulhtGHPhHPssKuR.Gastp-httYuPEhsssh...pLpWlAlccshhphts......................tth...........htpthsspthtt..h.t............................shtsssah.llPlHPWQhppllt...sthts.lspspl.......l.LG...tust.ahsspSlRTlh.....shs....ps.....hhlKhulslp.Tss..hRslssppltsustlsphLpplhtp.-.hh.....pp..shpllpEsAuhshptst..........................th.tctluslhR-sshth..httt.......ppshs..hAuLhpss ...........................................................s.tsaht.EQ..slhpGHPhHPssKu+hGhs.t..p.-.httYuPE..htp.sh...pL.palAlc+shhttps........................tsh....pph.............lpptlssphhpt.htphh.........................................................t.shp...scahhlPVHPWQhcpllt....s.att.lspthl.........l.LG....tstp.ahsspShRTlh.............................sh.s............ps..........halKlslslp.Tus..hRslsspphtsusthschLpplhpp..-.hh.....tp.....thplhtEsuuhshtspp....................................phhppLuslhR-s.hth........tsp......ppshs..hAuLhtp.t..................................................................... 0 100 225 321 +3115 PF01419 Jacalin Jacalin-like lectin domain Bateman A anon Bateman A Domain Proteins containing this domain are lectins. It is found in 1 to 6 copies in these proteins. The domain is also found in the animal prostatic spermine-binding protein (Swiss:P15501). 21.30 21.30 21.40 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.81 0.71 -4.26 52 1166 2009-01-15 18:05:59 2003-04-07 12:59:11 12 54 224 165 665 1119 2 122.50 23 46.35 CHANGED Gs.WDDG...sa-uVcKlhVutupsuIs.........hlcF-YsKsst.h.spp+Gtps......htscpFplshs.sEalsuVcGhYcp.........tssllsuLpF+TNKppou.hhG.......sGpcFsLp..pGpKIVGFHGpuu.p...hlpulGsYhss ..........................................................................t.h.hp....hh.h..s.....hlt..........lp.h.pY....p.t...s.......t.........s..ttt...Gstss...............thpp.............hpl.s.hs..sEal.....splpGpast..................................t.shl.puLp.....Fp.....T.....sps......p.....h.....s..s....hhs..........psGs..tF.s..h...ss.........p.s.t.pl.....lGF..a.G+uu..t........hlculGsah............................. 0 227 342 470 +3116 PF02375 JmjN jmjN; jmjN domain Bateman A anon [1] Family \N 26.00 26.00 26.40 27.40 25.30 25.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.36 0.72 -4.42 65 1103 2009-01-15 18:05:59 2003-04-07 12:59:11 12 65 271 51 660 1080 7 34.20 45 3.06 CHANGED PlFcPThEEFpD.hpYlpp..Icp....hut..chGlsKllP .PlFcPThEEFc.DshtY....Isp..Ics.....ut..+sGIsKllP..... 0 166 312 494 +3117 PF03957 Jun JNK; Jun-like transcription factor Finn RD anon DOMO:DM01956; Family \N 20.90 20.90 24.10 22.40 19.10 19.10 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -12.06 0.70 -4.64 8 302 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 169 1 108 298 0 171.70 38 70.40 CHANGED METPFYtDDuLSuh....tuuua............s.........u.ss.....shlhK.pohsLNLo-s.uuuLK.Pstt......tstu..ststsuGL.LsSPDlGh.LKLASPELERLIIQS.NGLlTTTPT.PuQFLYPK..........s..lTsEQpsFAEGFVKAL-DLHKQNQ.Lsus..ssuutuASu..hsuP...hus.Au.uslhusuhtsE.PPVYANLSoasPss..s...usuasusohuauA.Ph.........sP.h....ss..PR.......hsALK-EPQTVP-sPS.G-SPP ...............................................................................................................................................................................................................................................ss-.....h.t..hplsos-LE+h..I........l.....s...ss.h.s....oP.s.ss....p....hhhs+....................s..lTcEQEGFA-GFV+ALs-LH..p..pNp...hsss......ss.u..ssss...........h.s......sss...Au.....s...ssh..su..s..hts-..sPV.Yss.....Lssassss......o..........sss.h.s.ss..s.hsas.....................s.h......................ssthP+..................hpuLK.EE.PQTVP-h...st............................................................................. 0 22 33 66 +3118 PF01486 K-box K-box region Bashton M, Bateman A anon Pfam-B_25 (release 4.0) Family The K-box region is commonly found associated with SRF-type transcription factors see Pfam:PF00319. The K-box is a possible coiled-coil structure [2]. Possible role in multimer formation [1]. 23.50 23.50 23.50 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.16 0.72 -4.21 68 4553 2009-01-15 18:05:59 2003-04-07 12:59:11 12 15 1081 0 451 4266 0 93.00 28 47.14 CHANGED asptssss.....htpsphpphppEhsKL+pplctLQ.....ps.RphhGEsLssLshKELppLEppL-pulppIRu+Kscllhsplcphp+KEcplpppNptLppKlt- .......................................................................ptp..pph...ppE.ht...+L+p.p.cplp.......pp.....R.p.h.h...G..E....-..L..s..s..L.s.h.cELp.pLEppL-p....uLpplRs+K............tp....lh.h.pplpphp+...K.....-.....pp.l.pctNp.Lppph....................................... 0 56 254 350 +3119 PF02960 K1 K1 glycoprotein Bateman A anon Pfam-B_345 (release 6.4) Family \N 21.20 21.20 21.50 23.30 20.60 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.83 0.71 -4.10 4 847 2012-10-03 02:52:13 2003-04-07 12:59:11 9 4 3 0 0 751 0 99.90 76 46.82 CHANGED GLSSRLSNRICFWApCANITPETsTVSVSSTTGFK.............h.pTNtLlpIIPATTHAlVVVEEVKSppPaIpV.FLVFMTLVALIGTMCGILGTIIFAHCQKQSDSNKTV.QQLRDYYSLHDFpTEDYTQPVDWY ...GLSSRLSNRICFWApChNITPETaTVSVSSTTGF+TFSTNuLlp..II.ATTHsVVVVcEsKSTNsHIpVPFLVFMTLVALIGTMCGI.L................................................... 0 0 0 0 +3120 PF02149 KA1 Kinase associated domain 1 Mian N, Bateman A anon IPR001772 Domain \N 20.80 20.80 21.10 21.40 20.50 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -8.02 0.72 -4.51 26 802 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 272 3 433 777 1 46.20 40 6.17 CHANGED tsssl+FElplsKl......shhGlch+RluGssahapclssplLppL+L ......s.tshlpaEhEVCKLP.p..........huLsGl+FKRl.u.GsuhsaKslsscIhs-L+L........ 0 138 206 321 +3121 PF02524 KID KID_repeat; KID repeat Bashton M, Bateman A anon Pfam-B_1382 (release 5.4) Repeat This is family contains the KID repeat as found in Borrelia spirochete RepA / Rep+ proteins. The function of these proteins is unknown. RepA and related Borrelia proteins have been suggested to play an important genus-wide role in the biology of the Borrelia [1]. 20.80 15.10 21.20 15.10 20.10 15.00 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.75 0.77 -5.93 0.77 -2.78 28 617 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 25 0 37 464 0 11.00 84 15.55 CHANGED KIDsVEpsLpt KIDsVEKNLpp. 0 37 37 37 +3122 PF00050 Kazal_1 kazal; Kazal-type serine protease inhibitor domain Eddy SR anon Prosite Domain Usually indicative of serine protease inhibitors. However, kazal-like domains are also seen in the extracellular part of agrins, which are not known to be protease inhibitors. Kazal domains often occur in tandem arrays. Small alpha+beta fold containing three disulphides. Alignment also includes a single domain from transporters in the OATP/PGT family Swiss:P46721. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.05 0.72 -3.94 51 1792 2012-10-02 00:52:43 2003-04-07 12:59:11 16 207 356 88 938 4387 283 52.00 31 19.67 CHANGED Csph.............Cs...tphpPVCGoDGhTYsN-Ctl....C.....ppppslplp+pGtC .......................................................................................Cs.......tp.h.pP...VC....G..o.......D...u....p.....T...Y...sN.c.Cth...................C......................pp....t....t..p...l..p...l..t..p..G.C.................................... 0 276 378 621 +3123 PF03522 KCl_Cotrans_1 K-Cl Co-transporter type 1 (KCC1) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 28.00 26.80 24.50 23.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.14 0.72 -4.44 3 128 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 40 0 48 150 0 29.80 66 2.91 CHANGED VKDRNShLRLTSlGSDEDEETEAusEKVHM .VKDRpShL.RLpSlhSDE-.-Eo.tsht...-KlpM.... 0 2 5 18 +3124 PF03520 KCNQ_channel KCNQ1_channel; KCNQ voltage-gated potassium channel Griffiths-Jones SR anon PRINTS Family This family matches to the C-terminal tail of KCNQ type potassium channels. 25.00 25.00 25.20 25.20 24.50 24.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.43 0.71 -5.00 9 568 2009-01-15 18:05:59 2003-04-07 12:59:11 9 14 136 9 208 521 1 163.00 42 29.81 CHANGED R+Ssusslss....hsuSPoKs.p....ShuFscRo+hh.shh...........ss.usuhD.lt-EcthpC.hphppLpsshKssIRslRhhKaaVAKRKFKEsL+PYDVKDVIEQYSAGHLDhLsRIKpLQtRlDQIlGKsshhsccp.+sK............shtphShhuRVsKVE+QV...tslEpKLDlLlsh...........appphttssssshshsssphP ....................................................................................................p.s......ttSPsKs.K....uh.uhssRsR.h.p.uhthps.t......................ttt.h.s..-...h...-.-..c.sht..s.-...h..hp...-lhs...s...lK.ssIRul.R..hh+FhVuKRKFKEs...LRPYDV+DVIEQYSAGHLDMLsRIKpLQs.R.............lDQIlG+s......ss+.c.+.p.K.........................................c.p.ShhuRlsKVE+.QV...tsh-pKLD..hLlsh............h.phht..s........................................................ 0 34 52 115 +3125 PF03812 KdgT 2-keto-3-deoxygluconate permease TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 25.30 25.10 19.80 22.00 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.09 0.70 -5.27 3 900 2012-10-02 17:06:44 2003-04-07 12:59:11 8 5 674 0 114 494 12 292.30 53 95.17 CHANGED MKIKpoIEKIPGGMMLVPLFLGALCNTFoPGAGKYFGSFTNGLITGTlPILAVWFFCMGASI-F+ATGTlLRKSGTLVVTKIAsAWVVAlIAupFIP-DGIQsGFFAGLSVLALVAAMDMTNGGLYAALMNQYGoKEEAGAFVLMSLESGPLMTMVILGsoGIAoFEPclFVGAVLPFLlGFALGNLDPDLR-LFSKuVQTLIPFFAFALGNTINLoVIlQTGLLGIFLGVuVIIVTGIPLILADKFIGGGDGTAGVAASSSAGAAVATPlLIAEMsPuFAPVApuATALVATSVIVTSlLVPIlTulWuKKlK ....................MpIK+slE+lPGGMMlVPLhlGALhpTFuPtA...hc......hG..uFTs.uh.h..s..G..s..s..P.IL.ulahhCMGAsIplpAs.spsL+.KuG.oLs.loKlulAhlluhlsu+lhstcG.l......hGLSsLAllAAMs.oNGGLYAulhtpYGsc...............c-u...GAhslhSLpsGPhhTMl...hL...GsA.GlA..s..h..hshVusllPhllGhhLGNLDs-hR-Fhscus.sLIPFFAFALGssIsLshlhp.sGLhGIL.LGlhslhlsGh..h.IhAD+.L...l....u...G.....G.s..G.sA..Gh..AA..SSoAGsAVATPshIAphsP.u.F.p..s.hAsuAT..ulVAsuVIlTuILsPlLTuhhu++................ 0 29 52 90 +3126 PF03814 KdpA Potassium-transporting ATPase A subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.30 20.30 22.30 22.20 20.20 20.20 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.82 0.70 -6.50 14 2239 2012-10-03 11:11:44 2003-04-07 12:59:11 10 6 1897 0 441 1655 66 488.70 47 98.06 CHANGED IhlhlsllsslshsLGtYlt+Vatsp+.............shhs.lhsslEphlY+lhGlsPcp-MsW+pYhhAlLshNlhhhlllahllhhQusLPhNPsshsuhs.cLAFNTAlSFlTNTNhQsYuGEsohSYhSQMhulTh..FsSAATGlAVuhAhlRGlstppsstLGNFasDLlRshhRlLLPluhllAllLlhtGlPQTLtu..hsssTLpGuhQpIshGPVAS.EAIK.LGTNGGGFFsANSAHPFENPsshoNhlchluhhllPsAhhasFGchltsp.....+QuhslhsshhllaluhsslshhuEhtGN..PhlpsLGlp........suNMEGKEsRFGhutSuLaAlsTTusosGuVNuMHDShTPlGGhlsLhtMhLspl.hGGhGsGlhshlhallLsVFluGLMVGRTPEaLGKKIEu+ElKhssLslLlpPlhlLl.oAlAhslPsuhuuhsNPG.HGhSElLYtaoSAuANNGSuFuGLssNT.aaNlohGlsMLlGRalsIlshLAIAGSLAsKpssPtosGTlsTDssLFsuLLlusllIlGuLTFhPsLALGPIu...EtLs ...................................................................hhhlhllhlluhPLGtalt+.lhpsp...................h...h.h.s.lEphla....+lhGlp....s....t....tcMsW+pYhhulLshNhhshhhlahlhhhQthLPL.N.Ppt................h.s.u.h.s...cLAhNTAlSFlTNTNhQtYoGE..sslSYhoQMhGlsh.pFlSAAoGlAVhhAllRu.h..............s...................t.......p.............p................h........p.......................s...................lGNaWsDlsRhslhlLlPluhllAlhhltQGs.Qshp..................s.h.t.................l.......p.........T.....l............p.................G............s..........p...Q.........h.....l.....shGPVAS.EAIK.LGTNGGGFFsuNSuHPFENPoshoNhlphhulhLIPsALsasFGchstcc.............................RQ....GhhlhhuMhhlFlhsls.lsh.huE.hpGN..Phl.t.sh.G.htt.............................usNM.EGKEsRFGlh.............h..............SuLFu.ssTTusSsGuVNuMHDShTsLGGhlshh.M.lspV.FGGVGsGLhshllaslLuVFIuGLMlGRTPEYLGKKI-s+EMKhs.sLslLlpP.hllLhhoAlA....h.........h....h......s..........u.......u........t...s.....u............h............hN........P.....G...............HGhSElLYthoSAAsNNGSuFuGL.s......s........N......T................s................F.............aNhhhuhsMhlGRFhsIlshlAlAGSL..........ssK.....+..h........s.t.o..s..GTlsTcsshFsslLlsslllluALTFhPsLuLGPlAEaL............................ 0 126 260 357 +3127 PF02669 KdpC K+-transporting ATPase, c chain Bashton M, Bateman A anon COG2156 Family This family consists of K+-transporting ATPase, c chain, KdpC. KdpC forms strong interactions with the KdpA subunit, serving to assemble and stabilise the Kdp complex [1]. It has been suggested that KdpC could be one of the connecting links between the energy providing subunit KdpB and the K+-transporting subunit KdpA [1]. The K+ transport system actively transports K+ ions via ATP hydrolysis. 19.90 19.90 20.10 20.10 19.50 19.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.13 0.71 -4.60 3 2067 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 1913 0 393 1342 41 180.70 42 95.45 CHANGED hRsLlPALoohllLhlITGlVYPLlVTVlGQLaFPaQANGSLIc-u.GQVlGSALIGQsFTusGYFHuRPSAsu-u.....YssuASGGSNLAPSNP-LLutIAARVsAhRtEN..sAuspVPVDLVTuSGSGLDPsISPsAAphQhPRVAKARNISlcQLppLIsKHT-uRhLGalGEPuVNVLcLNLALD+L .......................................h...lpsulthhlhh....hl.l.s....Ghl..YPLlhTuluQ.h.h.FstQ.ANGS..Ll.p......ps.....s.......p..............l..l..GStLIG.....Qs.....F.........T...............s......s.......t.........YFaGR.....P.....S.....A.ss.t.................Ysst.......u...S.GGS.Nhus.......o.......N.............P..............cLtpplp........t.+.......l.s...t.h.............pt.......tN............s.s.....s.....s.....V.....P..sDL.VTuSuSGLDPcIoPpuAthQlsRVAc..A......R........s..........l.sh...pp...lppL...l...scpopt..h.huh...lG.ps.hVNVLcLNlALDp............................................................... 1 117 235 323 +3128 PF02702 KdpD Osmosensitive K+ channel His kinase sensor domain Bashton M, Bateman A anon COGs Family This is a family of KdpD sensor kinase proteins that regulate the kdpFABC operon responsible for potassium transport [1]. The aligned region corresponds to the N-terminal cytoplasmic part of the protein which may be the sensor domain responsible for sensing turgor pressure [2]. 26.10 26.10 26.20 28.40 26.00 25.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.12 0.70 -5.20 21 2010 2009-09-10 15:55:13 2003-04-07 12:59:11 12 29 1860 2 394 1489 34 208.40 55 27.21 CHANGED scRG+L+IalGhAsGVGKTYsMLp-A+chhcpGhDVVlGhVETHGRs-TtshlcGLthlPh+plpY+GhtLpEhDlDAlL.tR+PpllLVDELAHTNssGSR+cKRaQDlEELLsAGIDVaTTlNlQHlESLNDlVppITGlpVRETVPDpllcpAD-lhllDlsP--LhcRLppGKlYts-pscpAlsNFFphsNLsALRELALRcsADcV .......pRG+LKlahGAusGVGKTaAMLscApcht.tpGlDlVl.GlVETHGRt-..TsAhl..-G...LthlPh+pl...pY+...G+plpEhDLDAsL....tR.........+.........P.s.llLlDELAHo.....Ns.....PG...S......R...H............KRWQDlEELLcAGIDVaTTVNlQHLESLNDlVp..tlTGlpV+..ET.VPD.hhcp.AD-l.LVDlsP--LhpRLpcGKVYhs.c........p.s.-pA.l.......psFFphuNLhALRELALRcsAD+V.............. 0 120 245 325 +3129 PF04962 KduI KduI/IolB family Bateman A anon COG3717 & Pfam-B_11840 (release 10.0) Domain This family includes the 5-keto 4-deoxyuronate isomerase enzyme EC:5.3.1.17 that is involved in pectin degradation. This family aldo includes bacterial Myo-inositol catabolism (IolB) proteins. The Bacillus subtilis inositol operon (iolABCDEFGHIJ) is involved in myo-inositol catabolism. Glucose repression of the iol operon induced by inositol is exerted through catabolite repression mediated by CcpA and the iol induction system mediated by IolR [2]. The exact function of IolB is unknown. Members of this family possess a Cupin like structure. 20.10 20.10 20.10 20.60 20.00 19.80 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.73 0.70 -5.31 72 2006 2012-10-10 13:59:34 2003-04-07 12:59:11 7 5 1578 16 396 1311 510 252.20 30 91.15 CHANGED LLs+s.t...psGp...lhploPcsA.....sWpalGFplhcLtsGpshphp.....ssspEhslVll...sGpssVss....ts...patplGsR.toVF-.......tsPtulYlPsspphplsA.tss...sclAl.ssAPupssh..ss+.hlsPsslshptRGpGssp....RhVpsIhspsps....AcsLLVsEVh.TPuGsWSSYPPHKHDp-s.s....tEohLEEsYYa+lsPsp..........GFuhQRVYo....-.....DcslD-shsVpstDVVhVP+GYH.PsssssGY-hY.YLNVMAGPp..RtWthps-PsHpWl ................................................................hh.hhh..........t.....hhph.sp.h.....shthh.s.h.p...h...h.h.h..t.sGcshp.h.......hpcRElsllsl.......uGsuslps.........-G...p.hhclGpR................-ulYls....pus.c.........s....lhu....tss.........A+...hh.l.ssAPA....+..p.sa......Ps..+.....hl.s.......ss...........-............s....s........s..................p.....hG..c..s...tss..............Rplpphh.....ss......s.......sh......tsspL.hu.sh.sPGuhWsohPsHpH-cc..s......t.p..ppsa.aat.hpPpp..................hh..................t...sc..ph..s..lpNcp.sVls.P.ph.....sltuusGhcsY.alhsMsG.s...........p.h...hsD..cH.h.................. 0 122 254 318 +3130 PF02422 Keratin Keratin Bateman A anon Pfam-B_1920 (release 5.4) Family This family represents avian keratin proteins [1], found in feathers, scale and claw. 25.00 25.00 25.10 25.10 24.60 24.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.58 0.72 -4.03 4 299 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 20 0 238 304 0 100.70 56 90.20 CHANGED SChs..L.CtP..Cs...PTPLAsSCNEPCVRQC.DSoVVIQPssVVVThPGPILSSFPQsosVG.SouA.AV..GShLutp.GhshuSGGa.GLuGaGGpYsG.hst.s .......................................SC.s........CtP...Cs....PsPLAsS.CNEPCV.RQCQDSpVVIQPsPVVVTLPGPILSSFPQNTuVG.uST.SA...AV......GSh...L..Ssp....Gl........P..I.o..SG....G.F.....sl...u..shus..t.hs.G.hs............................................................................. 0 0 0 98 +3131 PF01500 Keratin_B2 Keratin, high sulfur B2 protein Bateman A anon Pfam-B_706 (release 4.0) Family High sulfur proteins are cysteine-rich proteins synthesised during the differentiation of hair matrix cells, and form hair fibres in association with hair keratin intermediate filaments [1]. This family has been divided up into four regions, with the second region containing 8 copies of a short repeat [1]. This family is also known as B2 or KAP1. 27.60 27.60 28.00 27.60 27.40 27.50 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.61 0.71 -12.53 0.71 -4.47 8 307 2012-10-03 03:07:01 2003-04-07 12:59:11 12 11 41 0 85 433 0 97.90 45 90.76 CHANGED M..ACCuTSFCGFPoCSTuGTCGushCQPsC.p....................................oSCCQPoCspTSCCQPhs..oSCCpPo......................CpP.shQTSCCQPTChQTSuCpTGCGIGGuhGYGQ.GSSGAVSoRhRWCRPDCRVEGTCLPPCCVVSCTsPoCCQLaaAQASCCRPSYCGQSCCRPsC.......CC.............CsEPoC ............sCt......s.hSo.u.CG........p..............soCCpsssspTosCp.P................................CppP.sC.hpsssht..p.us................................................................................................................................... 0 17 17 18 +3132 PF04579 Keratin_matx Keratin, high-sulphur matrix protein Waterfield DI, Finn RD anon Pfam-B_4676 (release 7.5) Family Family of Keratin, high-sulfur matrix proteins. The keratin products of mammalian epidermal derivatives such as wool and hair consist of microfibrils embedded in a rigid matrix of other proteins. The matrix proteins include the high-sulphur and high-tyrosine keratins, having molecular weights of 6-20 kDa, whereas microfibrils contain the larger, low-sulphur keratins (40-56 kDa) [1]. 25.00 25.00 32.40 94.70 22.70 22.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.43 0.72 -10.87 0.72 -3.71 4 69 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 27 0 36 73 0 96.40 74 96.39 CHANGED ACCAhhCCSVPTGPATTICSSDK.CRCGVCLPSTCPHsI.LLQPTCC.DNsPPPCphPpshVPTCFLLNSsHPTPGLpuINLTTYlQPGCEpPC.PRC ..........CCss+...sCSVPTGPATTICSSDKsCRCGVCLPSTCPHpl.LLpPTCC.DsCPPPCplPp.....shVPTCaLLNSspPTPGLpsINLTTasQP.sCE.....PC.PpC..... 0 3 3 5 +3133 PF03882 KicB KicB killing factor Bateman A anon COG3006 Family The kicA and kicB genes are found upstream of mukB. It has been suggested that the kicB gene encodes a killing factor and the kicA gene codes for a protein that suppresses the killing function of the kicB gene product [1]. It was also demonstrated that KicA and KicB can function as a post-segregational killing system, when the genes are transferred from the E. coli chromosome onto a plasmid [1]. 20.00 20.00 20.30 21.20 18.00 19.90 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.54 0.70 -5.86 3 769 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 745 10 73 342 4 428.70 78 99.75 CHANGED MSEFSQT.....IPELVoWAKcpDFSLNLPTERLuFLLAIAlLNNERLDGEMuEGELVDAFRHVocAFEQSoEAIAsRANNAINDLVKQRLLNRFTSEhTEGsAIYRLTPLGIGITDYYIRQREFSsLRLSlQLSIVADElpRAuDSAEEGG-....EaHWRRNVFAPLKYSVAEIFDSIDLoQRlMDEQQQSVKD-IA-LLNKDWRAAISSCEtLLSETSGTLRELQDTLpAAGDKLQAQLLRIQDCVhG+DDLaFIDcLloDLQSKLDRIISWGQQSIDLWIGYDRHVHKFIRTAIDMDKNRVFSQRLRQSlpcYFDcPWaLTYAsAERLlDLRDEEMsLRDEEVTGELPEELEYEELs-l+DQLAp+Ip-hLtVYKEpssPIDLuLVLREYLusYPRoRHFDVARIVVDQAVRLGhApuDaoGI.PDWQAINDaGAKVQAHVIDKY ......................................................MSEFSQT.....VPELVAWARKNDFSISLPsDRLuFLLAlATLNGERLDGEMSEGELVDAFRHVS-AFEQTSETIuVRANNAINDMVRQRLLNRFTSE.AEGNAIYRLTPLGIGITDYYIRQREFSTL.RLSMQLSIVAuELKR.AADAAEEG.GD....EFHWHRNVYAPLKYSVAEIFDSIDLTQRlMDEQQQQVKDDIApLLNKDWRAAISSCElLLSETSGTLRELQDTLEAAGDKLQANLLRIQDATMs.+.D.DL.aFVD+LVFDLQSKLDRIISWGQQSIDLWIGYDRHVHKFIRTAIDMDK.N.RVFAQRLRQSVQsYFDcPWALTYANADRLLDMRDEEM.uLRDEEVTGELP.DLEYEEF.NEIR.E.QLAAlIE-pLAlYKs+QsPLDLGLVlREYLuQYPR..ARHFDVARIVlDQAVRLGVApADFo.GL.PAcWQsINDYGAKVQAHVIDKY.................................................................................. 0 8 23 49 +3134 PF04383 KilA-N KilA-N domain Aravind L, Iyer LM, Bateman A anon Iyer LM Domain The amino-terminal module of the D6R/N1R proteins defines a novel, conserved DNA-binding domain (the KilA-N domain) that is found in a wide range of proteins of large bacterial and eukaryotic DNA viruses. The KilA-N domain family also includes the previously defined APSES domain. The KilA-N and APSES domains may also share a common fold with the nucleic acid-binding modules of the LAGLIDADG nucleases and the amino-terminal domains of the tRNA endonuclease [1]. 21.00 19.70 21.00 19.70 20.90 19.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.46 0.72 -4.44 139 2605 2009-01-15 18:05:59 2003-04-07 12:59:11 8 39 812 3 1531 2312 29 103.00 27 37.07 CHANGED hphsphplll.pp.p.ss...alNhTcltchs.....t...Kc......hppWh+pcpopcllpphppt.s...........................................h...............................h...tttpttp.........hp.G...sY.lH.clshplA.pW.ISs.paslhl.chlpphhpt .................................................h.l..cp.p..ss...hhphpslhpst......t.pp.+p.................pcWhcsppTpEllpphppth.ht.......................................................................................................................h.h.s.ptthsst.........hp.G...hY.lHc.Ll.shA.hW..hSP.cathhlhchhDplpp.................................................................. 2 1330 1420 1504 +3135 PF02172 KIX KIX domain Bateman A anon Pfam-B_4149 (Release 4.2) Domain CBP and P300 bind to the CREB via a domain known as KIX [1]. The KIX domain of CBP also binds to transactivation domains of other nuclear factors including Myb and Jun. 21.20 21.20 23.40 23.10 21.10 20.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.58 0.72 -3.93 6 242 2009-09-11 11:24:58 2003-04-07 12:59:11 11 28 90 6 142 201 0 78.30 70 4.11 CHANGED shcKsW+cplTpDLRsHLVcKLVpAIFPsPDssAhpDpRMcsLluYA+KVEt-MaEpApsR-EYYHLLAEKIYKIQKELcE ............G.h+KsWHEclTQDLRNHLVHK....L........VQAIF.PT...P..DPAAL.K..D+RMENL..VAYARKVEGDMYEoANoR....sEYYHLLAEKIYKI.QKELEE.......... 1 45 57 101 +3136 PF03037 KMP11 Kinetoplastid membrane protein 11 Griffiths-Jones SR anon Pfam-B_1062 (release 6.4) Family Kinetoplastid membrane protein 11 is a major cell surface glycoprotein of the parasite Leishmania donovani. 25.40 25.40 25.40 139.80 24.70 25.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.14 0.72 -3.40 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 19 0 4 44 0 88.60 90 97.89 CHANGED MATTYEEFuAKLDRLDsEFsKKMpEQNtKFFADKPDESTLSPEMKEHYEKFEKMIQEHTDKFNKKM+EHSEHFKtKFAELLEQQKNAQaP MATTYEEFuAKLDRLD-EFNKKMQEQNAKFFADKPDESTLSPEMKEHYEKFERMI+EHT-KFNKKMHEHSEHFKpKFAELLEQQKAAQaP.. 0 1 2 4 +3137 PF03790 KNOX1 KNOX1 domain Finn RD anon Pfam-B_533 (release 7.0) Family The MEINOX region is comprised of two domains, KNOX1 and KNOX2. KNOX1 plays a role in suppressing target gene expression. KNOX2, essential for function, is thought to be necessary for homo-dimerisation [1]. 20.50 20.50 20.80 20.60 19.30 19.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.17 0.72 -4.63 30 1800 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 158 0 141 642 2 43.00 58 16.60 CHANGED sslKAKIhuHPpYspLLsAYlsCpKVGAPPElsshL-tlppctcs ......ulKuKIluHPpYPpLLuAYlDCQK.....lGAPPE.VVucLDtlopEhp.................... 0 14 85 119 +3138 PF03791 KNOX2 KNOX2 domain Finn RD anon Pfam-B_533 (release 7.0) Family The MEINOX region is comprised of two domains, KNOX1 and KNOX2. KNOX1 plays a role in suppressing target gene expression. KNOX2, essential for function, is thought to be necessary for homo-dimerisation [1]. 20.80 20.80 21.00 21.10 20.40 19.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.67 0.72 -4.69 20 1804 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 164 0 135 669 0 34.80 65 13.37 CHANGED sssslGsDPELDpFMEsYCclLsKY+-ELo+Pac....EAhsFLpcIEsQLssLst ......th...sIGhDPE.LDQFM..EAYC...EMLhKY+ppL.............................. 1 13 84 115 +3139 PF00051 Kringle kringle; Kringle domain Sonnhammer ELL anon Swissprot_feature_table Domain Kringle domains have been found in plasminogen, hepatocyte growth factors, prothrombin, and apolipoprotein A. Structure is disulfide-rich, nearly all-beta. 26.30 26.30 26.70 26.30 25.60 26.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.28 0.72 -3.88 24 2697 2009-01-15 18:05:59 2003-04-07 12:59:11 13 237 179 118 1376 2545 55 76.90 39 21.44 CHANGED ChpGsGpsYRGshupTtSGhsCQtWsuphs+phsh.sspp.sstsLtt...NYCRNPDG-tc.PWCYT.pssphpaEaC.slspC ......................................................Ch.p.s.s.G..psYRGsh.s..h....T....t...oG....h.....p..CQtW.s..u....p.h..P..H....p.......a......p...h........p....s...p..p...h..s..........s..tsL.pc.................NY..C............R....N....P.....D....u.......c........t....p....P..WCYT....s.....s...s...p......h......p....a...-a.C.slstC..................................... 0 528 604 870 +3140 PF00197 Kunitz_legume Trypsin and protease inhibitor Finn RD anon Prosite Domain \N 20.70 20.70 21.30 21.10 20.10 20.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.58 0.71 -4.72 17 766 2012-10-02 19:42:32 2003-04-07 12:59:11 13 4 124 55 109 793 0 164.40 31 83.53 CHANGED sVlDs-Gc.lpsGGsYYllsthhutGGG....hphstsup..CPLsVVposs-ls...pGhPlphss.h..tssh.ssh...ohlplpFsshs.phsss.....hWpVscpptt.t..hVphuthcs...sp..s.hFKlc+sst.........YKLlaCstt...........ptCpslGlphD.tcshpRLllopp.sPlslhFpK ........................VlDspGct.lp..s.G..s.pYh.Ihs..shhs..t..uGs..........lth.s.s..ss..t.hC.P....s..Vlppst...p..s...pGh.P..VpFos......hpspsslIp.so.lslp.F.s..ss..o.s...ph.sss..............hW+.ltpp.............s...........s..s...........t.....sh...hVsTGGstu..............tshFKIpKhss...s.............YpLs.aCP.s...........ps.C.hslGhh.hp.....t.phLs.hsp....sh.hhF...................................... 0 7 91 97 +3142 PF02442 L1R_F9L L1L_F9_C19; Lipid membrane protein of large eukaryotic DNA viruses Mian N, Bateman A, Coggill P anon Pfam-B_1868 (release 5.4), Iyer L Family The four families of large eukaryotic DNA viruses, Poxviridae, Asfarviridae, Iridoviridae, and Phycodnaviridae, referred to collectively as nucleocytoplasmic large DNA viruses or NCLDV, have all been shown to have a lipid membrane, in spite of the major differences in virion structure. The paralogous genes L1R and F9L encode membrane proteins that have a conserved domain architecture, with a single, C-terminal transmembrane helix, and an N-terminal, multiple-disulfide-bonded domain. The conservation of the myristoylated, disulfide-bonded protein L1R/F9L in most of the NCLDV correlates with the conservation of the thiol-disulfide oxidoreductase E10R which, in vaccinia virus, is required for the formation of disulfide bonds in L1R and F9L [2]. 21.90 21.90 22.80 65.90 21.10 21.80 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.42 0.71 -4.84 32 163 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 82 6 0 124 2 198.30 32 74.57 CHANGED Musus..slp........slhsthsp+hhppLspp.sssssssC..slcIGplpsch.psCslpltNhC.sssphp...hshllpuhp-.shssLspcp+ttl............Atplshsl.ssssp..hpsplcppC.pupuslsspIsl...pslplspC.usssph.hphphlNoGoutuNCuhpulhsshsppss.....stltpp.pstspsh...hhlhhsllllhlhhlhhh ...........Musst..shp........ThhNhh.-+hhpcLtQp.sssssssC..sIcIGplhhch.+sCslplpNhC.usushp...hshllpuhpE.shssLspcp+shl............AtpIthsl.ssssp...h.splcpsC.supAslsNhlcI...Qsl.ls-CtussGph.hplphlNoGoupuNCulpslhpshsKtss.......Itpp.phshtsh...hhllhlllllllhslhhh...... 0 0 0 0 +3143 PF05047 L51_S25_CI-B8 Mitochondrial ribosomal protein L51 / S25 / CI-B8 domain Wood V, Bateman A, Finn RD anon Pfam-B_9461 (release 7.6) Domain The proteins in this family are located in the mitochondrion. The family includes ribosomal protein L51, and S25. This family also includes mitochondrial NADH-ubiquinone oxidoreductase B8 subunit (CI-B8) EC:1.6.5.3. It is not known whether all members of this family form part of the NADH-ubiquinone oxidoreductase and whether they are also all ribosomal proteins. 21.00 21.00 21.20 21.20 20.90 20.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.29 0.72 -4.23 67 788 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 307 1 534 748 8 54.90 25 38.29 CHANGED sRpFl.ppphsshp.tpNPpl.lhlpc...pss.p....PhlhucYtsG.............................pccplslcshss ............sRpFl.cpplsp....h....p.ppNPpl.lhlpc.....psshp........PtlhucY.t.pG........................................ppctlslcshp................................................................................ 0 164 279 432 +3144 PF04604 L_biotic_typeA Type-A lantibiotic Waterfield DI, Finn RD anon Pfam-B_4608 (release 7.5) Family Lantibiotics are antibiotic peptides distinguished by the presence of the rare thioether amino acids lanthionine and/or methyl-lanthionine. They are produced by Gram-positive bacteria as gene-encoded precursor peptides and undergo post-translational modification to generate the mature peptide. Based on their structural and functional features lantibiotics are currently divided into two major groups: the flexible amphiphilic type-A and the rather rigid and globular type-B. Type-A lantibiotics act primarily by pore formation in the bacterial membrane by a mechanism involving the interaction with specific docking molecules such as the membrane precursor lipid II [1]. 21.70 21.70 22.00 21.80 21.40 21.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.52 0.72 -4.32 15 102 2012-10-02 23:56:30 2003-04-07 12:59:11 8 1 70 0 6 64 0 48.00 40 91.69 CHANGED Mcppp.......-shsuLpEVo.cELDplLGG...GsGVlpTISHEC+h.NSaQalFTCC .........................Mcppp.......thhsulpEVS-cEL-pllGGt..GsGhhtTIoc-C........lhsCC.......... 0 0 1 3 +3145 PF02502 LacAB_rpiB Ribose/Galactose Isomerase Main N, Bateman A anon Pfam-B_1105 (release 5.4) Family This family of proteins contains the sugar isomerase enzymes ribose 5-phosphate isomerase B (rpiB), galactose isomerase subunit A (LacA) and galactose isomerase subunit B (LacB). 24.40 24.40 24.40 24.70 24.00 24.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.55 0.71 -4.53 170 5002 2009-09-12 00:30:27 2003-04-07 12:59:11 13 13 3211 88 900 2738 1726 140.00 34 86.39 CHANGED +I.uluuDHuGhcLKptlhpaLppp...s..h.-VhDhGstst.-...ssDYP-huhtlAptVs......ss.c....s-..........h.GIllCGTGlGhuIuANKltGlRAAlspDshoAchuRpHNsANlLsl........GuRllG.tlAppllcs.......aLsscFc....GGRHtp..Rlspl....s ...............................................+IslGs.DcuuhphK....phlhpaLcpp......G..a..-Vh.Dh.Gst.st.p..............pscYPphuhtlAptVs.........ss.p.........s-..............h..GIllCGTGlGhshuANKl.GlRuAlsp..D...hoAhhs+pcNsuNllsh........Gu+ll..G..t.t...lAppIlcs.........alss.cac.....ssRppp+lstl..................................... 0 348 631 790 +3146 PF00356 LacI lacI; Bacterial regulatory proteins, lacI family Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.07 0.72 -4.34 26 29289 2012-10-04 14:01:12 2003-04-07 12:59:11 16 36 3489 72 5798 19321 1208 45.90 41 13.66 CHANGED Tl+DVA+hAGVShuTVSRVlNss...t.tVSppT+c+VhpAhcELsYtPN ...............TlpDlAchAGVShuTVSR.VlNsp............s..pV..o..p..p.T+c+VhpshccLsYpPN........................ 0 1667 3424 4636 +3147 PF02450 LCAT LACT; Lecithin:cholesterol acyltransferase Mian N, Bateman A anon Pfam-B_2099 (release 5.4) Family Lecithin:cholesterol acyltransferase (LCAT) is involved in extracellular metabolism of plasma lipoproteins, including cholesterol. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.45 0.70 -5.50 7 1122 2012-10-03 11:45:05 2003-04-07 12:59:11 10 21 550 0 521 1745 75 256.00 21 58.91 CHANGED hhplWLsls........hFh.hshssWh..hphVhspsst..hh..tPtsplcs....sGFs..ts.uhEhLD.shlsGhh...appllpsLsshGYctsphltAu.YDWRluss...cpscYappLKthIEphhphp.sppVhLluHSMGs.lhhYFLh...tph..........hspaIcuFIsluuPhhGSs+sltslhSG.......spshshl.s.sht........p.pRhhsspsa.....hhPps......hsh...--cl..ssssh................Nhshtsh.pahtslsht.h..........hh.app........................hssLpssLs.sPthclYClYGsshPT.csYhah..............sthhshts.ss..........hhhs-GDsTVshhuhs...........hCppWhstps...........hphsH....hhsl+s.......spHlsllhps .......................................................................................h...............................................hp..t..............ts.th.............t.t........sh.....h...s......h............................................ht.......hlpt.L.h.t.......G.Y.......s...p....t..h...h.us....YD..W.Rhs.t......................t..p....h.h....p...p....L..t.th.l.............E.........p....h.....h........t.......h..........s.........p..................lh..lluHShGsh.h......h..h.......h.a.h...p..................................h...p...ph...l....t...th..l.lu.s...s.....h.h.G.s....t..s.....h........h.h........G......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t........................................................................................................................................................................................................................................ 1 192 317 431 +3148 PF04369 Lactococcin Lactococcin-like family Finn RD anon DOMO_DM04110 Family Family of bacteriocins from lactic acid bacteria. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.80 0.72 -4.05 3 23 2012-10-02 23:56:30 2003-04-07 12:59:11 8 1 16 0 4 19 0 51.00 27 76.43 CHANGED MKNQLNFNIVSDEELSElNGGpLpal.SsGshsWYpsTsTuKTlsQQTpssauAshsshs .............p.pap.lS-pELuplsGGshshhQs.sushshh.pt.sthhh.tQs.ssh.hth.s......................... 0 0 0 1 +3149 PF01306 LacY_symp LacY proton/sugar symporter Finn RD, Bateman A anon Prosite Family This family is closely related to the sugar transporter family. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.52 0.70 -5.92 4 1275 2012-10-03 03:33:39 2003-04-07 12:59:11 14 7 574 10 131 2345 309 380.70 39 96.84 CHANGED hhsh+NpsFahFuhFFFFYFFI.MusaFPFaPlWLp-VstLoKT-TGIlFSClSLFuIlFQPlaGllSDKLGL+KpLlWsIohlLVhFAPFFIYVFuPLLQhNIhsGullGGlalGhlasAGuGAlEAYIE+sSRsspFEYG+ARhaGClGWALCAohAGIhFoIsPplsFWluSGhAllLhlLLhhuKs-sspoAhVhDAlGANppsFSh+hshpLF+..phWhFllYlVGVussYDVFDQQ.FAsFFpuFFsosptGT+lFGalTThGElLNAlIMFhsPhIlNRIGuKNALLlAGsIMolRIlGsuausoshEVlILKhLHhhElPFLlVGsFKYIousF-sRLSATlaLIGFphuKQLuhllLSshsG+hYDphGFQssYhlLGhIslohTlISlFTLSust..hlh.sst ............................................................h........h....h.ul.h...a..F..h.Y.FFh....h...u.h.a...h...P...F...a....s.....l...W..L.....+.....s.......h..G...L..o..s...s....s.....h.....G....lla....us.h.tL...hu.lh.h...t.s..l.h..G..lluD....+......L...G.h....+..K..h.....L.....l.....h......h...l......s....s......l...h..l..L...h.u......P.....F.....h....l...a....l.....a.....u.....P.....L.........L...p....h....N....h....h......h...G...s..l..l..G...u...l....a...h...u...h.....s...F..s.u..G.....s...s...h.h...-....A....a..h......-.+...h....u......R...p........s.......F....E......Y....G..+..s....R...h.....a...G.....S.l.G....a..A.l..s......u.....s.........l.....s..G.......h.........h.......a......o.....l........s...s.........p.........h.........s..F..W.....l...s..S...s.....................h...u...l...l.h.h..l...L...h..h...h...h.....+...s.....s..................p..s.........h...h.....p...s.......G....A.s....+...p....s..........h....o....h...t.....s.h......t.L.h..+.......p...h....W....h...h..l.....l...a...Vl....G...l..p.......s...s....Y.s.l....aDQQ...F...s.s.F.a.s.sa....F..t.....o...s....p.......G.....s...p...s.a....G.....Y....l..s....oh..G.l.h.E.uh.l.M.h.h....u.P..hl...ls.....R..l.G.....u.+....s..sLL.l..u.uh..l.Ms.....lR..I...h..G...s.u.h....s...T...s...s...h......l...l...l.l...K...h....L..Hs..h.....E.h....P.l..h..ll.u...s....F..K.YI..s....u.p.F.......-...s...+........l..S.u........T....l......a.h....Vs....a...s.h..u.p...p.l...uh...h...l...hSshA.....G......h...h....Y...........-.......p.........l.....G....ap..s...s....a...hl..h....u....h.l..s.L.h....h..hl.h.uhFh...Lpt.t........s................................................................................................................................................................................................. 0 19 50 90 +3150 PF00961 LAGLIDADG_1 Intron_maturase; LAGLIDADG endonuclease Bateman A anon Sarah Teichmann Domain \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.38 0.72 -3.46 91 1228 2012-10-03 01:41:40 2003-04-07 12:59:11 14 21 340 88 258 1340 92 96.80 21 50.58 CHANGED lsGFhDu-GsFplshpppp...................ht.....................h.hpFplshc......c-htlLptIpphhs....................hGplphppss...shpaplss...p.hphllshFspYs..lhopKhhcahpapchh ....................lsGFh-....u...-GsFtlshpppp...................................................................................................phpI...s.p..............pchtlLphlpphhu....................................hGpl..th.....p....pspp...........hhpapl...ps...ppththlls..hh.....s...p.h......hho.p.K.h.h.pahhahp............................. 0 77 194 231 +3151 PF02264 LamB LamB porin Bateman A, Mian N anon Pfam-B_4810 (release 5.2) Domain Maltoporin (LamB protein) forms a trimeric structure which facilitates the diffusion of maltodextrins across the outer membrane of Gram-negative bacteria. The membrane channel is formed by an antiparallel beta-barrel [1]. 25.10 25.10 25.10 25.70 24.10 24.70 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.45 0.70 -5.38 6 1601 2012-10-03 17:14:36 2003-04-07 12:59:11 10 3 781 33 201 906 30 388.90 35 86.13 CHANGED FHGYhRSGlGhousGutppshps.stt.....uhGRLGNEs-TYsElpLupElap-ssKoahh-oMVAhus..stpNDWE.......................sus.shRQhN...VQuKsLlsahPtuTlWAGKRaYQR.+DlHhlDFYahslSGs.GuGIENlclGsGKlSlAhlRp...stshT.ssss......phssslhDlRLAslthhssusLEluscYupAN....p+Dstshp..tsuKDGhMhouchTQulhsG.FNcaVlQYuT.uthutshGphhG............stoph.h.stthcssussaRllsaGshslG-sWpluashhYptupDhh.......usps........h-hhoVsVRPMYKWsshhpThhEsGYpsscspssu..t.-csst..YKlTLAQsWpsGsShWu..RPEIRlaATYhchDcc.sphtshs................stscDsEasFGsQhEsWW ....................................................................................FpGYhRSGh.G..h...s.u........s.......G.ut.pps...h....t...s.......t...............Shh.RLGNE.s-..sYhEL....pLs.Qc..la...p...-..s...s...+.o.h.h..hss...ls...hsl........p.t.p.ssWp...........................uss..s...h..+...p.hs...V...p...sKsh.....l...t.a.....h...P.tu.slWAGKRa.hp+...aDlHMlD.ahh.h.s.h...u.Gs....GuG.lENhc.l.G...G..Klul...Ahs.Rp......................st.s........s.....t.spp...............................phsss.shD....l...Rhssh.....t..h..........s.s....u......sL..p.lusc..Y.up..uN...........pp.D...s........p...s........h.s.t...K...D........u..a..hh...os.p..h.s....Q.s...h....h.....p...G..a.NcFsl....Qh..As....s....hh...u...ps..hGp.....tG...........................stst..h.s..t..........hshpssG..p.hhR...llsp...Gt....h..l.u.Dp.aphh.shh...Yp.p......Dh...........s.s.p.u........................h..c.hosslRPhY.tWs.shhpThhElG.Ypp.hcsp..p......s..s.....................scss...........YKhTLAppa....p..s..G..s.S.hhu..RPtIRh.a..AT...Yhchs-p...s..t.t..h..shs......................csss....D.pa..shG......sQhEhWW.......................................................................................................................... 0 19 67 132 +3152 PF03746 LamB_YcsF LamB/YcsF family Bateman A anon COG1540 Family This family includes LamB. The lam locus of Aspergillus nidulans consists of two divergently transcribed genes, lamA and lamB, involved in the utilisation of lactams such as 2-pyrrolidinone. Both genes are under the control of the positive regulatory gene amdR and are subject to carbon and nitrogen metabolite repression [1]. The exact molecular function of the proteins in this family is unknown. 22.00 22.00 22.20 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.48 0.70 -5.27 15 2484 2012-10-03 16:37:10 2003-04-07 12:59:11 11 7 2180 4 565 1717 1361 236.60 45 95.10 CHANGED plDLNuDlGEuFGsaphGs..D-pllplloSANlACGFHAGDPssMccsVcLAtc+GVtIGAHPuYPDLhGFGRRshslospElhshllYQlGALpuhscupGsplpaVKPHGALYNphs+DcslAcAlscAVtshDssLhLhuLu...GSthlctAcchGLpshpEsFADRsYpsDGpLVPRupsuAllcDs-pslpQVLpMl+cGpVpulDGptlslpADolClHGDsPpALthsp+IRpsLcpt .............plDLNuDLGE.....uaG.a...p...hGs.........DpplLslVoSANlACGFHAGD..spsMppTVch.AhcpG.........VuIGAHPuaPDltGFG...RRs.h.s.l.s.sc-lhs.slYQlGALpuhs...+upG..splpHVKPHGALYNtuAcDtplAcA...l.scA.l.t.s..........h.........D.........s....s.......LhLhGLu.......sS...t.l.lctAcp.h..G.L.tshpEsFADRuYpsD...G....o.....LV.sRppsGA.llcDp-pslppslpMlpcGp.Vp.ols.Gp.hl..s.l............pAc....o....lClHGDs.pAlthAcclRpsLtt.p.................... 0 150 310 461 +3153 PF02061 Lambda_CIII Lambda Phage CIII Mian N, Bateman A anon IPR000278 Family The CIII protein from bacteriophage lambda is an inhibitor of the FtsH peptidase [1]. 25.70 25.70 25.80 26.90 25.30 25.60 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.02 0.72 -3.98 3 144 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 136 0 0 39 1 42.50 77 81.18 CHANGED MpaAIAGuAVMGlupLsESLLERITRKLRAGWKRLlDILNQPGVP ....MpaAIAGuAVMGhspLsESLLERITRKLRsGWKRLh-ILNQPGVP..... 0 0 0 0 +3154 PF00052 Laminin_B laminin_B; Laminin B (Domain IV) Sonnhammer ELL anon Swissprot_feature_table Family \N 22.40 22.40 23.00 22.50 22.30 22.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.70 0.71 -4.56 9 950 2010-07-04 11:48:03 2003-04-07 12:59:11 13 274 91 0 537 928 1 133.20 26 8.37 CHANGED YWthPppFLGcplhuYGGpLcaolpast....ss.sp.upsDVll+Gsslplss.hhspstsh..s-shhchhhplhEsh...aphpst..lo+cchhplLuNlstlLIRAoY.upchsthpLssVoL-hA..+.sst......sAs.VE .........................................YWthP.t.paLGs.p.l.s.SYGG.pL.pasl..phps....................ps...p....pt......s.....D..V.lL...............p.G..s...............s..............hp.l...h.h.............p..p.......s.ts.......t...p....h.........p.......ph.....p.l..p.l.p.E.s.....t.h..........h.................p....ts................st....................loRp-hhtlLusLpslhI.....RAo.....Y...sst..........t.p..s..pLssVsL-sA...p.s.ss.s.......A..VE............................... 0 124 168 337 +3155 PF00053 Laminin_EGF Laminin EGF-like (Domains III and V) Sonnhammer ELL anon Swissprot_feature_table Family This family is like Pfam:PF00008 but has 8 conserved cysteines instead of six. 21.00 13.40 21.00 13.40 20.90 13.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.90 0.72 -4.03 72 16492 2012-10-03 09:47:55 2003-04-07 12:59:11 19 919 252 15 9516 15120 36 49.50 30 19.94 CHANGED CsCsspus.t.....spCc.............oGpC..Cp.sstGpcC-pCtsGaas.........sss.sC ..............................................CpC.s.st.u................tpCs.....................................................sG....p..C......h..C.........p.....t.....s..........s.t........G..t.....p.C.....-.....p.....C.t...s..G..aas............................................. 0 2235 2973 5910 +3156 PF00054 Laminin_G_1 laminin_G; Laminin_G; Laminin G domain Sonnhammer ELL, Finn RD anon Swissprot_feature_table Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.66 0.71 -4.14 21 1504 2012-10-02 19:29:29 2003-04-07 12:59:11 18 398 102 34 720 7364 51 130.20 24 13.02 CHANGED F+TpsssGllh..Ys.Gptsct.........DahultLhsG+lphphchGss....tssltsssp.lsDGpWHplplpRsp+pssLpVs.spp........................tspsshusss....Lshss..LalGG.hP......thtttthshssu.hpGCl+plhlssp.lp ...................................................................hpT.sss.Gllh........Yt...uppspt........................Da.h..u.l.t...L..h..........s....G....+.....l...ph..p..........a..c.........h.............G..ss.................ssh.l..t.....s.......s.......s.......h.....l......s......s..G..p..W.....H.....p..l......p......l........p........R.................p........p......p...............s.........s........l....p..V..s..s..tss.............................................................pt.ps..s.h.ssph...........ls.h..p.s.......la.l..G.G...hP....................h..h...t..t...t...h.....t......s....h..........s.........s.......s.....a.p.GClcpl..lstp................................................................................................. 1 123 175 377 +3157 PF00055 Laminin_N laminin_Nterm; Laminin N-terminal (Domain VI) Sonnhammer ELL anon Swissprot_feature_table Family \N 19.50 19.50 20.00 20.00 19.00 18.80 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.71 0.70 -5.37 9 1311 2012-10-03 19:46:52 2003-04-07 12:59:11 12 174 102 7 725 1124 0 213.80 31 15.25 CHANGED CaPAhsNLhhstp..lououTCGh+tPE.aClloclpt.....cKpCalCDuppsh.t....ppHhhphlscsps.tp..sWWQSp...NGl.....VTlpLDLcttFchTalIlpF+.T.RP.ushllERStDhG.sWtsYpYauhs...Ccsoastsspss.+p....s-llCTScYS-IpP.pcGEVhFpsL-..Puh...Ds.SPclQphlphTslRlpLsRL+TLGDsLhs.........c.clpc+YYYAIpDhsVtG ....................................................P.hhshh.st....l....s...s...sTCG............................s............p.......aCh..h...s...t.......................................pC....hCDu.p..p..................................sH.s.p..h.h..D.h.p.....s............saWQS......................p..s...h...................................pVsl......sLs..L..t.p.....tFcls.a.lhlp..Fp....osR....P.sshll-.+...........S.hD...hG....poWp.PaQYauts......Ctptas.......h.sp.s.s..h..p.......p...........p-sl...C......o.....s..p.a...S....c..h...P.h...p...s.G.....p........l....h...........aph.......l....s............t............P..u.h......p.........h.......-.......s.......SstLp...........-a..hpsTslRlph...h..R..h.p..s.......h...s...s.p.h...hs..........................................p...t.s.h.p..pY.aY.ult-h.VtG..................................................................... 0 151 209 445 +3158 PF01299 Lamp Lysosome-associated membrane glycoprotein (Lamp) Finn RD, Bateman A anon Prosite Family \N 31.40 31.40 31.80 31.40 31.00 31.20 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.12 0.70 -5.28 25 515 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 100 0 288 524 0 247.70 23 80.35 CHANGED YslpphohsYNhoDsshFPsuospsstTssstss..lpuslssta+Csusoslphs.sVs.ph.ssplpAahpssshSppcshCppD...pss.Pss..............s.sssPuPo...hP..sPs.lGpYsVssu..NsT.CLLApMGLQLNlTYppt.ssppssplhNIsPNs.TsssGoCssp.pusLcL....sspshphLsFpFshssp...scaaLptVslshsl.....ssuss..hFsssNsoLshhpAslGpSYpCspEQslplosshp.......lNsFslpVQAFplsssp.FusspECphD-s.shLlPIs.....VGuALuGLlllVLlAYlIGR+Ro+u.GYQol ..............................................................................................................................................................................................................................s.s............................s.Ps.......s....pst...hsp..Y..plpss....st..s..Clhhph.ul.plplp...ap....p......s....t...............h...t.th..hs..ls.s...st.....sps..sG..sC...........t.....s........p....p....s.pLpL............s.t...p.h...h......h..L.....sh....tFs..........h.....s..s.....t................ppa...hL..pp.lplshsh..............ssstt....h...h.ps.t......s......p.s.....h.....p.....h....h...p.....ss.l....G....pSYtCst.....p..p..s...lpl......s.ssh.p..................lphhslp.lQ..s....F.....pl...p...ss....p..F.u.s.........s...p...c...C..s....hD......t.........t....s.............l...lP.lh.lGhsL.usllll...l.ll...u..Y....hl....tp......+pt.....Y.................................................................................................... 0 98 117 186 +3159 PF05147 LANC_like Lanthionine synthetase C-like protein Moxon SJ, Finn RD, Fenech M anon Pfam-B_6095 (release 7.7) Family Lanthionines are thioether bridges that are putatively generated by dehydration of Ser and Thr residues followed by addition of cysteine residues within the peptide. This family contains the lanthionine synthetase C-like proteins 1 and 2 which are related to the bacterial lanthionine synthetase components C (LanC). LANCL1 (P40 seven-transmembrane-domain protein) and LANCL2 (testes-specific adriamycin sensitivity protein) are thought to be peptide-modifying enzyme components in eukaryotic cells. Both proteins are produced in large quantities in the brain and testes and may have role in the immune surveillance of these organs [1]. Lanthionines are found in lantibiotics, which are peptide-derived, post-translationally modified antimicrobials produced by several bacterial strains [2]. This region contains seven internal repeats. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.52 0.70 -5.89 47 1539 2009-01-15 18:05:59 2003-04-07 12:59:11 8 21 836 8 490 1232 30 282.40 16 54.42 CHANGED s.hshsLYsGhsGlulhhhphh..chhss.tp...hhph.spphlpphtpthppt............shohasGtuGlhhshthhsphhsp.pth.tphlpphhphh.pphtttt...............DlltGt..uGhlhhllhl.....p.h.tppph.....chlhphspplhpstpp..............p.hh.p......tsph...hGhAHGhuGlhhsLh.phhhp......hhtp...pphhchlcpslphhhphp..pp.ss...a.sthtpp...............stWCaGssGlhhsh.........hphtpsh.pcpphhcthtpshph.shp..ps....hhpshslCHGsuGsh.hhhhhtchhsppp..h.phtpph...hp.lhshtpp.h...........hstpss.spshuLhpGhuGhh..hhlhchhsspps.......sshhl .........................................................................................................................t......h..G..Ghhhhh..h............t...........................h.th....h.t.h.....l.....pt.h.....ht............................thuhh......G..s..G..hh.h.h.hh..........h.h.........p.h......t.........p..........h....phhp....t.h....h....p..h...t...h..t......................................................-h.h.Gh...uGh...hhh...l..lhh............p.h...tptph............ph.lh.p..hh.pt...lh.t.th.t...............................................................t......hGhuHG..h......sGhhhhLh...h........................hht.........thh....ph.....lpthh...t....h...phh.......ttt..........h........t..................................................tWChGssGhhhhh.....................hht......h.....h...t...p................p....h..p..h...p....h.hp...hhp........tt...........hht.slCHG.sG....h..h.h...........h..h..p...........ht.t........h.....hh..t.h.................ht..h..h..t............t............................................................shh..G.sGhh...h.h.t.............................................................................................................. 0 162 275 398 +3160 PF04738 Lant_dehyd_C Lantibiotic dehydratase, C terminus Kerrison ND anon DOMO:DM04916; Family Lantibiotics are ribosomally synthesised antimicrobial agents derived from ribosomally synthesised peptides [1]. They are produced by bacteria of the Firmicutes phylum, and include mutacin, subtilin, and nisin. Lantibiotic peptides contain thioether bridges termed lanthionines that are thought to be generated by dehydration of serine and threonine residues followed by addition of cysteine residues [2]. This family constitutes the C-terminus of the enzyme proposed to catalyse the dehydration step [2],[3]. 19.80 19.80 20.10 19.80 19.70 19.60 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.54 0.70 -6.26 37 446 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 278 0 138 401 8 415.20 17 49.92 CHANGED l....phscLlpplspphsshssptl..cphLppLlppphLlosLcsshssscsLpallstL................tshsttsp.hhptLpplpphltpasp..tshupttphhpplppphpplh........ss.....ps.LtsDhhhssp........lplspsltpphtpthphlhplssths...spt.thppa+ptFhc+Y..G.........sp.Vslh-hlssstGlGhP.sh...........shspttpcp.hthhtphhptslpsp.pclsLs-p..tlppltssss............h.sos-lhhplps...ptlppGphplhlsshh..tuh.susshGRFt...........tppppltpphtphhpsh.........sh.splshhstpt+stNVhps.phh....sphlsl.sptss.sp...ppltlsDlhVs....ssspplaLhshs.p...pphlh.hssshhsht...phPslhRFLh-lu....t...ht......hshtt.hpthsah....PRIpatchlls.t+Wplsst-lss.....thppatthht..pa+pphplPcplalt............psD.................phlhlDlcsshplplLtpplc.+psphpl ...................................................................................................................h..l..t.......t.h...thltpL...lppthlh....p...lp.s........pshphlhphl....................tttt...hhttlt.pltthht.....thtt...tt.stt..thhtpl..pthptlh...............................................................................pt.l.hDhhhsht....................lst.phh.p.p.l.t.t..hth..lh..hs.........t.t..hpthppt.F.h.c.+a..u.................tt.V..slhphhtt...hu..hshs..............................................t...t......h..ht....ph....h..t..s..ht.tt...tp.....l.lspt........lpthh.p.................h..ph-lhhthht................tt.....t.......p.hlhls.hh..hs....h..uthh.uR.Ft.....................................tph..t..t....h...tp..tt.............s..spl...s.h.s......p.ttNl.htp.....h.....th.lsh..tt.ss...t...........ttl.lsDlhls....sp..ttpl.hlhs.p..h....sp..lh.h.....sh..hs............s.hh+.hLhpls..........................................h......h......tths....ah............P....Rlph.t.p.lllp.tpWplstt..tlsh..............p.t.......htth.....h.t...pappph..p..lP....phlhlh..................ttc..............................p.lhlshppshphthlhpthp.p...................................................................................................... 1 60 101 119 +3161 PF04737 Lant_dehyd_N Lantibiotic dehydratase, N terminus Kerrison ND anon DOMO:DM04916; Family Lantibiotics are ribosomally synthesised antimicrobial agents derived from ribosomally synthesised peptides [1]. They are produced by bacteria of the Firmicutes phylum, and include mutacin, subtilin, and nisin. Lantibiotic peptides contain thioether bridges termed lanthionines that are thought to be generated by dehydration of serine and threonine residues followed by addition of cysteine residues [2]. This family constitutes the N-terminus of the enzyme proposed to catalyse the dehydration step [2],[3]. 22.80 22.80 23.20 23.70 22.60 22.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.20 0.72 -3.97 38 378 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 260 0 115 326 3 93.90 23 10.55 CHANGED hp-AlhhuSssLhppltp.......htss....hps+chRphtpulhpYlhRhssRsTPFGhFuulshGpa......sppsss.phsssp..+ppsphDhtaltplspplcp .......................pEulhhuossLhpslpp.................htts......hps+c..h+.p..hppolhcYhhRhssRsTPFGhFuuluhuph............ssps...sh........ph..s..ssh......ppts..ph-ttalhplsphlt.t............................... 0 47 82 100 +3162 PF00500 Late_protein_L1 late_protein_L1; L1 (late) protein Finn RD anon Pfam-B_69 (release 1.0) Domain \N 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.57 0.70 -5.96 19 3438 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 304 77 0 2486 0 179.90 45 98.21 CHANGED lWhPupsKVYLPPs.PVo+VlsTD-YVsRTsIaYHAuSsRLLTVGHPYaslppss.....p+hsVPKVSu.QYRVFRl+LPDPN+FuLPDpslaNP-pERLVWAstGlElGRGQPLGVGloGHPhaNKh-DsENssthtssstp......DsRpsluhDsKQTQLhIlGCsPslGEHWspupsCssstsp..G-.CPPlELhNosIpDGDMsDhGFGAMsFpsLQusKSDVPLDIssolCKYPDYL+Mus-sYGDSMFFahRREQhasRHaasRAG..ssG-slPsshYlcu..........t.....psshuoslYhsTPSGSlVoSDuQLFN+PYWLQRAQGpNNGICWsNQLFVTVVDsTRuTNhoIsssspss.....ssYssocF+cYlRHsEEY-LpFIhQLCKlsLTsEshuYlHsMssoILEsWphGlsPPPsssLEDpYRalpS..AhpC.cssPPpcpcDPYpch+FW-VDLpE+hSh-LDQFPLGRKFLhQsGlpppsphpspphp..sssssssppssKR+R .......................................................................................................................................................................................................................................................................................................................................................................................................................................hDsTRSTNh.olssthpst..........ssYpss...paKcYhRHs.EEa-LpFl.FQLCplsLss-lhshlpsMssslL-sWphGh...s..s..ssssl.DpYRalpS..AhtC................................................................................................................................ 1 0 0 0 +3163 PF00513 Late_protein_L2 late_protein_L2; Late Protein L2 Finn RD anon Pfam-B_39 (release 1.0) Family \N 20.40 20.40 20.50 21.80 20.20 20.00 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.66 0.70 -5.63 29 499 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 258 0 0 495 0 460.80 41 97.13 CHANGED R...s+RpKRASATpLYpTCKtuGTCPPDVIsKVEtoTlAD+ILpaGSlGVFFGGLGIGTGuGoGGRTGYlPLG.....tsshssssssssRPPlllE...........sVuPoDPSIVSLlE-ou.....................hIpuGAshsshssss...GFslToSus...sTPAlLDlo....ssspoh.losTpapNPsFs-Pohlpsst.suEsoG+lllS....ssTluscs............hEEIPhcTFss.ss..ss.sshSSTPlPssppsst.t...LYoRshQQ.VpVsDPsFLopPpc....LlTaDNPAFEs.p..sTLhFpps...hptsPDsDFhDIltLHRPAlTo.RcGtVRhSRLGp+uThpTRSGppIGARVHaYpDlSsIsst.......-tIELpsLsttussss.............psuLaDlYs-.....sss.s..........hhpss.h.....pshsss.ss.ss.sspssNsTlPhspshcssh.sGPDlsh......Psss..sssshhPhsPshPshsIhlpGs....DFYL+PShahh...+RRKRlsh ....................upRpKRASATpLYpT..CKtuG.TCPPDVIsKVEtsTlADpILpaGShGVFFGGLGIG.TGu.G...T.GG.R..T........G...Y...l.PLGs........pssp.s..sss..........s.......s...ss......R.P....P...l..s.l-...................................sVuPs..DP..SIVo..LlE-.ou.......................................hI-uG.A.s.ssshssss.........GFs.lo...o....oss.........sTPAlLDls..........................sss..p..s......h...oso.s....apN...PsF...s...-.P...ol...l...p...sst....su-s.u...G.clhlo...................ssol.uscs.......................aEEIPh....cT.....F..h..s...........ps........ss............sspSST.P...l...Ps.s...c..t.s....s...pht.............LYuR.....s.h.Q.........Q.............VpV........sD.P.sFLop.Psp....llT..a..D..NPAaEs....sTLhFpps....h.p..APDPDF.h..DI..ltL.HRP.A...lTo..RcGs.VRhSRlG.p+.uT.l+TRSGppIGA+VHaYpDlSsIsss................Ep..IELQsLss.....s.ssss.........h.................pssl..aD...lYu.-s....s.......s.p...............t..h......s.ssst......shs......s.sp.p...s.s..ss...T...l.Phs.s..s.h.s...ss.l...h.s..G..PDlsh..........................ssss.....sssshh...Ph.s...P.....h.....s...Pth....s...l..h.lpuu........DFaLHPShhhh...++RKRh................................................................................................................................................... 1 0 0 0 +3164 PF02354 Latrophilin Latrophilin Cytoplasmic C-terminal region Bashton M, Bateman A anon Pfam-B_874 (release 5.2) Family This family consists of the cytoplasmic C-terminal region in latrophilin. Latrophilin is a synaptic Ca2+ independent alpha- latrotoxin (LTX) receptor and is a novel member of the secretin family of G-protein coupled receptors that are involved in secretion [1]. Latrophilin mRNA is present only in neuronal tissue [1]. Lactrophillin interacts with G-alpha O [1]. 19.30 19.30 69.20 20.30 18.70 18.70 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.07 0.70 -5.74 3 436 2009-01-15 18:05:59 2003-04-07 12:59:11 11 23 39 0 132 329 0 235.30 47 23.71 CHANGED RHhHCCShhPpG..RSol.ESopcoGupSGSGsPhpauh.oQSRIRRMWNDTVRKQSESSFIoGDINSTuTLNRGsMGNHLLTNPLLRPHGTNNPYNTLLAEoVVCNsPSsPVFNSPGuhhp..HsL.NuRDTSuMDTLPLNGNFNNSYSLRSGDY..uDGVQllDRGhNLND.TAFEKMIISELVHN......NLRGtSucs+ss.hchPVsPVsG..uSEDDAIVsDuAo...HtDssGLELlHKELEAPLLPPRsaSl....LYQss.....p....DcSESasupLTAEucEcLQSPsRDSLYTSMPNLRDSPoYP-SSP-s.sEsLsPPPPAcsGssE.IYYKSM.PNLGARN...QLQsYYQluRGSSDGYIlPPNKEGssPEG..D...GQMQLVTSL ..................................................+oaCC.....t...+uo..tSsht.ostp...sot...................................oQSRIRRMWNDTVRKQoESSF.hsGDIN...........SosoLNp.........t..............................................................................ss..R-sssMDTL...PLNGNas.NSYSlttspa........tss...s..t..s......h....s.ht-.sshEKhIlpELspN.......N.+........t.t.......................................................h.....s.......................st...cpp..............s..hh.st.........................tt........tlEhh.....h.....p.t..ptPLl....R.....s..s.....................................................ttsto.hs..s..t...............t........t...............................ss....tRDSLYsShs.Lts.s.........................t.........-........................p..Yh.uh.PtLsst.........hYph......t.pG.......-.....s-t.........t...phlTSL.......................... 1 4 16 60 +3165 PF01273 LBP_BPI_CETP Lipid_binding_gp; LBP / BPI / CETP family, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family The N and C terminal domains of the LBP/BPI/CETP family are structurally similar. 28.30 28.30 28.80 28.30 28.10 28.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.85 0.71 -4.90 36 774 2009-01-15 18:05:59 2003-04-07 12:59:11 20 9 110 3 412 720 0 166.10 17 38.69 CHANGED uLphssp.uh..h.t.Lpplsls-hhssht.t..u....................th.hshhslcIpshplsp.hplchhssssh.hhsth......ssslplps..ph.hh......t.shlcl.........shslslpsplpltp-sp.GcsplshusCssshsslplpl.ss.....hshlhshhpshlppsltpllpsp....lCsll.....psh...Lpsl .................................................................l..h..t.sh..htpt.l.p.p.hplsshhsp..s....h...........................ph.hslpsl..plpphplsp..splph.hsstth.hh.h...............ssslslps....phtht.............t.thlch.................hhslslssplpltpssp.Gpspls.h.s...sCsspls.......p...lplphtss.....hshlhph.h..p.s....h...lpp...sl....pp....hlpsp........lC.hl........sth...h................................................. 0 64 88 182 +3166 PF02886 LBP_BPI_CETP_C Lipid_binding_gp; LBP / BPI / CETP family, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C terminal domains of the LBP/BPI/CETP family are structurally similar. 20.60 20.60 20.60 20.60 20.20 20.50 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.42 0.70 -5.14 10 709 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 111 3 395 669 0 211.60 20 46.01 CHANGED ssLDlhhK......G....EFFshsc+sPsshsssshslP..pscsRMVYhulSDahFNoAshsYacuGaLpholssDhlPs-sslR...Lso.....psFushlPpLActaPshslcLpssssssPhlshpss.slslpsshslpsallh..Psu.s+pslaplshssssosslohpsc+lsGpLchc+lplcLpcSclG.hssEhlpul.LNhhlsshlhPtlN-+Lt+GFPLPLP.ctlpL..aclt...LpsHcsFLhlGADlpa ................................................................................................................................tpppMlhhhlS-ahhs...ohhhhh.p..s..Ghlphsl...s.s.p..ls..p............hh..........so.............................pshu.hlsp.l....s.p..t.a.P.s....t.s.h..lpl...p.s..s.p.s......P.h..ls.......hpss...sss..l..p..ht..ss.l.p.hhsh.......s......ss...s............tsl...............hplshs..s.s......h..ss.p....h.s.h..p....s.....p.......+....L..h.s...p....l.p.l..p.p..h.p..l...p.......h..tp.S..........pl.s...h..s....h.t....l.p.sh...lp.hlphsh...hP..hlN..t..h..L.....t.....tGhs.lPh.....ts....lph....hs......l...hhpshlhlt.................................................. 0 92 125 231 +3167 PF03815 LCCL LCCL domain TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Domain \N 22.60 22.60 22.60 23.40 22.40 22.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.47 0.72 -4.00 68 852 2012-10-01 19:31:27 2003-04-07 12:59:11 14 48 198 1 528 762 11 100.40 30 19.17 CHANGED lsCpsphtpht..........h....hssphhhpCPssC..tpst...splhGot......................lYsssSSICpAAlHuGll.................sspGGplslphhsshppYhuo..p....pNGlpS...tsh..t....tpuF ........................................................pC.sph.p.t.................ssp.hthpCPusC..tpsp...............splh..Got..................................................hYp.s..sSolCpAAlHuGVl.................sspG.G.tlsVt.h..h...s.s..p..p.p..Yh.uS......h....pNGlpS....sh............................... 0 140 228 365 +3168 PF04072 LCM Leucine carboxyl methyltransferase Wood V, Finn RD anon Pfam-B_5898 (release 7.3); Family Family of leucine carboxyl methyltransferases EC:2.1.1.- . This family may need divides a the full alignment contains a significantly shorter mouse sequence. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.09 0.71 -4.72 211 3171 2012-10-10 17:06:42 2003-04-07 12:59:11 9 48 1290 36 972 2545 132 173.20 26 52.57 CHANGED AlhsshsRAhcspt.s...ssllpDshAttllpsh.......................phshtt..............h.thttt...hs.sRs+ahDchhtpth..tsG...htQlVlLuuGLDoRuaRls..t.........ssplaElDhPpllphKpphLspt.sp.....................s...hh.h..lssDlp..psWhssLttt..Gap.ssp..........P...shalsEG.llhYLst-shppLlppls .......................................................................................hhshhtRAhtspt.s.............psllp..DshAt.hlpth............................................................th..tt....................ht.t.h.t.tt......hs..sR...sphhDp....hhtphh..........tss.....................htQlVhLuuGLDo......R......sa.RL.s.............................tsp.ha.E.....lDh..P.p.ll.phKpphltp...t.tt.p.........................................................................................t..thc..h.........lssDl....p...t....p......s....W.....s..s....L.ptt......shc..sst.............................P......shhluEGl.lh.YLstp.t.ppLlptl.......................................................................... 0 293 586 821 +3169 PF04792 LcrV V antigen (LcrV) protein Waterfield DI, Finn RD anon Pfam-B_6155 (release 7.5) Family Yersinia pestis, the aetiologic agent of plague, secretes a set of environmentally regulated, plasmid pCD1-encoded virulence proteins termed Yops and V antigen (LcrV) by a type III secretion mechanism. LcrV is a multifunctional protein that has been shown to act at the level of secretion control by binding the Ysc inner-gate protein LcrG and to modulate the host immune response by altering cytokine production. LcrV is also necessary for full induction of low-calcium response (LCR) stimulon virulence gene transcription. Family members are not confined to Yersinia pestis [1,2]. 25.00 25.00 61.00 41.50 19.40 19.80 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.84 0.70 -5.40 2 127 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 82 1 10 81 0 260.30 49 80.26 CHANGED .lRshpts.phFl--L....ltt.su.uSu..EELltLl+scpIsluhthpPhp-upV....................L.t.AhLhuup.ss..tpGlchl+EhLpu..pPssQW-LRtFhsshaFSLpu.RlD-DlltVhhDshpppsstRptLh-ELttLTAELKlYSVIQupINttLSuttsIpIcstuIsLhD.sLYGYs.s-.haKsSsEYtlLpph........-T.pthlSIKsFLpup.KpoGtLtsLpDpYsapKDNN.lupFATTsSD+SRPLNDhVspKTT.LsDhoSRaNSAlEALNRFIQKYDSVhpclLssh ................................................................................................l.shpts.thFlt.L....ltt.su.tss..pELltLl+scpIslu.ht.pPhp-.ups........................L.t.Ahl.u....up.ss..ttG....lthl+phLps.....pPs.spWpl..RtFhslhh.SLpuDRlD-Dlltshs-shsp+sstRppL+-ELt-LTAELKIYSVIQucINptLSs...sus...hphcspuhN....LhDhsLYGYs.s-thF..Ks.....SsEYKlLpch.............t.........spppl.lolKsFLtupsKpo...Gsluslcs.pYpapKDN.NcLupFuTosSD+SRPLND.VspKTTpLsDloSRaNSAlEALNRFIQKYDSlhpclLssh............................. 0 1 3 4 +3170 PF00056 Ldh_1_N ldh; lactate/malate dehydrogenase, NAD binding domain Bateman A, Eddy SR, Griffiths-Jones SR anon Overington enriched Family L-lactate dehydrogenases are metabolic enzymes which catalyse the conversion of L-lactate to pyruvate, the last step in anaerobic glycolysis. L-2-hydroxyisocaproate dehydrogenases are also members of the family. Malate dehydrogenases catalyse the interconversion of malate to oxaloacetate. The enzyme participates in the citric acid cycle. L-lactate dehydrogenase is also found as a lens crystallin in bird and crocodile eyes. N-terminus (this family) is a Rossmann NAD-binding fold. C-terminus is an unusual alpha+beta fold. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.60 0.71 -4.36 31 12983 2012-10-10 17:06:42 2003-04-07 12:59:11 18 15 8382 421 2210 8108 2565 125.70 35 47.64 CHANGED hKVullGA.GtVGpuhAhtlhtp.slucE..........LsLlDlspp..tscGsAhDLpHusshs.ssshlss...sshsshcsuDlVllsAGsspKPG.o.....RhDLlptNspIh+slssslscsu..ssuhllVloNPVD....IhshlhhKhushspp+lhG .............................................pl.lhGs.G.lG...........sh.............h......h..h.t........s.t...................l.l....h-.....h.............hh.G.s....h.-.....ltc.....h...........................h..................h.................p..............s........h...................s...........s..........t..........s......sh......ssh.c.sADl.Vl...l...o..A.....G....s....s......R....K......P......G..M..s....................Rt.........D.L..h..s....h....NstI...h....Kslsppls.p.s...s........ssu...h.ll.l.l.oN.P...Vs....lhs.l.h.p.Kh.u...s..hspp+lhG................................................................ 0 689 1286 1790 +3171 PF02615 Ldh_2 ldh_2; Malate/L-lactate dehydrogenase Bashton M, Bateman A anon COG2055 Family This family consists of bacterial and archaeal Malate/L-lactate dehydrogenase. L-lactate dehydrogenase, EC:1.1.1.27, catalyses the reaction (S)-lactate + NAD(+) <=> pyruvate + NADH. Malate dehydrogenase, EC:1.1.1.37 and EC:1.1.1.82, catalyses the reactions: (S)-malate + NAD(+) <=> oxaloacetate + NADH, and (S)-malate + NADP(+) <=> oxaloacetate + NADPH respectively. 21.50 21.50 21.90 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.82 0.70 -5.72 8 2631 2009-09-11 11:09:40 2003-04-07 12:59:11 9 4 1423 44 609 1711 3069 322.80 31 94.97 CHANGED M+lshEpL+phIpcVLpphGlsEEcAchVADshlcADL+GhpSHGlsRhPpYlppLppGsIsscsch+hlcpususullDGDcuhGpVsA++uM-hAI-pA+ppGVGlVul+NuNHaGtuGYYuppAAcpGhIGIshTNoc..PhVsPaGG+EshlGTNPlAlAhPupc..hhhlDMATSshAaGKlhsARc+GcpIP-ssulDc-GssTTDPshlhcs.t..lLPhG.......GaKGYGLSlhlDlLuGlLuGushuspVopssss.--tsspsplaIAIsP-hFhss-pFcp+lsphh-ElKuSp.usthpplhlPG-hpshphccpp+pGIslDcslappLpsl ..................................lshppL+p.hhpplLpp.tGhspppApt..lA-hLltAshpGh..pSH.Gl...s...Rl.st..Yl.pplppGt.lp..s.s.p...s.plh.p.p.s.su..h..shlDucpuhGplsuctuMstAI-....h....A.cppG.luhVul+...s.u.s.HhGthuhaspp.sAc.tG.hl.........u..l.shs....s..os....sh...lsP.aGGp-shhGTNPlshu..hP.........s................p......s.........t.....shllDhATSshAhGKlpssp..ppG..c..p.lPs.shulD.p.p.G..p.s.......T...sDP....t.....s....h..................p..s..h.............h...L.P.h.G.........................taK.G.GLuh....hl-lLuGsL.s.Gu.s..h...s..t.......p.......l........s.....p...........h...s.....s.......t.ps...t..h.....s..phhI.s.IcPshF....s..s.....t..t..h.p..tc..hp.p.hhchlpsst....ts..s..p.tlhhPG.c.p.htt.t.p.c.s.t.p.p.G.I.slssshathl...t...................................................... 0 183 341 493 +3172 PF02866 Ldh_1_C ldh_C; lactate/malate dehydrogenase, alpha/beta C-terminal domain Bateman A, Eddy SR, Griffiths-Jones SR anon Overington enriched Domain L-lactate dehydrogenases are metabolic enzymes which catalyse the conversion of L-lactate to pyruvate, the last step in anaerobic glycolysis. L-2-hydroxyisocaproate dehydrogenases are also members of the family. Malate dehydrogenases catalyse the interconversion of malate to oxaloacetate. The enzyme participates in the citric acid cycle. L-lactate dehydrogenase is also found as a lens crystallin in bird and crocodile eyes. 22.60 22.60 22.70 22.70 22.40 22.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -4.53 19 12166 2012-10-02 13:39:36 2003-04-07 12:59:11 13 14 7571 421 2231 7218 2555 135.60 32 49.53 CHANGED TpLDpsRupsh...........lAc...ttsl.sPpsl...pVhllGsHus..........lshlPl.p.sp..sphs.cpplpplhpplppuGscllctKt..GSsshShAhAhs+....hscullpsh......pshh.huVhpsu..husps....................haauhPlhlstsG.....lpcl.l-.hs.Ls-hEccthp.putspLcpplcpuhphh ................................TsLDssRhcsh.................lA-...............hhsh..ss.ps.l............cs.llGsHu...........s...................................V........s.......h........l.......P.......L....h....s........l....p...............................t.........s.....t.....t.....h.....t...tl....h.t.....p..h..tp..t.u...hpl....lph.+.................sus........hu..h..u.uhhp.................hsp...sh..h.ts.................................pt........hus.h....p.......u........ht.......t...........................hhhu.Pshls...p.G.......h.ph....hp.......h.....l..s.t..p..tt..ht...t.h...t.l.t.........h....................................................... 0 697 1291 1800 +3173 PF00058 Ldl_recept_b ldl_recept_b; Low-density lipoprotein receptor repeat class B Bateman A, Sonnhammer ELL anon Swiss-Prot Repeat This domain is also known as the YWTD motif after the most conserved region of the repeat. The YWTD repeat is found in multiple tandem repeats and has been predicted to form a beta-propeller structure [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.77 0.72 -3.63 28 8618 2012-10-05 17:30:42 2003-04-07 12:59:11 12 734 140 112 4691 7279 7 41.70 31 14.93 CHANGED splYWoDhs.p.....sluhsshsGss..pcsllspc........lppPpuIslDh .............tplYWo..D.hs.pp.............pIc.pu...s.h...c....Gsp........Rps.l..l..sss............lsp.Pp..ulslD.................... 0 963 1312 2717 +3174 PF03760 LEA_1 LEA-group1; Late embryogenesis abundant (LEA) group 1 Finn RD anon Pfam-B_1549 (release 7.0) Family Family members are conserved along the entire coding region, especially within the hydrophobic internal 20 amino acid motif, which may be repeated. 25.00 25.00 26.10 26.10 22.70 21.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.55 0.72 -3.83 8 153 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 36 0 41 142 0 71.70 43 51.71 CHANGED MQSuKEKhoNhASoAKEph-lsKAKs-EKAEKuTARoctE+clAHp+cKAKEApAcM-hHpAKAcHAA-K.ps .........hpohKEpspNhuuuAKtth-hsKAplpEKAEKhTARs.hcKclAcc++cAKhspAch-h+pA+tcpsAtK.t.t........ 0 3 22 34 +3175 PF03168 LEA_2 Late embryogenesis abundant protein Mifsud W, Griffiths-Jones SR anon Pfam-B_3080 (release 6.5) Family Different types of LEA proteins are expressed at different stages of late embryogenesis in higher plant seed embryos and under conditions of dehydration stress. The function of these proteins is unknown. This family represents a group of LEA proteins that appear to be distinct from those in Pfam:PF02987. The family DUF1511, Pfam:PF07427, has now been merged into this family. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.19 0.72 -3.63 144 1601 2009-09-13 08:28:33 2003-04-07 12:59:11 8 14 422 3 862 1521 41 100.30 16 47.34 CHANGED plplpN.PNs.hs.lsh.ssls...hslt..h.....su....ptlu..t.us.spshplsspups....hlsl..s.....lplshts.h.........hphhtshh...........hstplshplcuplp.h.h.h.hthsl.shspps ....................lplpN..PNp..hs.lhh..sshs......hplh..h.........ps............ptlu.........ssthssht.h.stp.s.ss..............slsl......s..............lsls..sht.h..................spth..t..s..hh....tt..................hshplphplcs....chp......hh..h..hphph..ht.................................................. 0 152 498 703 +3176 PF03242 LEA_3 Late embryogenesis abundant protein Bateman A anon Pfam-B_3170 (release 6.5) Family Members of this family are similar to late embryogenesis abundant proteins.\ Members of the family have been isolated in a number of different screens. However, the molecular function of these proteins remains obscure. 21.00 21.00 21.40 21.00 20.40 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.54 0.72 -3.74 13 167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 52 0 57 159 0 86.70 32 82.53 CHANGED MARSlo.suKhlSAhlsDslSsh.hpRRGYAAAust..hsu.....hRuGustsss.hts+suEs.....sstssuWsPDPVTGYYRPEspusEIDsAELRphLLs ...................................................................................t.h................ppRuYuA....uAst...httu.........ts.us.s.tphs.......tt..scssts..............spcpssWhPDPsTGaYtPEs.phsE.lD.sAELRttlLp......... 0 7 30 43 +3177 PF00059 Lectin_C lectin_c; Lectin C-type domain Sonnhammer ELL, Griffiths-Jones SR, Eberhardt R anon Swissprot_feature_table Domain This family includes both long and short form C-type 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.85 0.72 -3.39 95 12227 2012-10-02 16:37:33 2003-04-07 12:59:11 16 665 521 504 7014 11645 360 107.50 20 30.77 CHANGED pthsapcApthCp.p...hsupLsshps....tpchchlpphh...........sspphWlGl.......ttpptsapW.sssshp...........................spsspsppCshl............sssphss......psCsp.tphhlCcp .......................................................................................t..hsappA..p.p.hCp..p............ts....u..p.L.s.s..lps.....................pp.E..p..p...a....l.tp.hht................................tppthW.....l.GL..................pptppps...p......a.p.W.s..D.u...s..s.h..s............................................h.tW..........................sst.ssspsCshh.................................ttsspW.ps..............................ts..C......p....p....p......h.........a..lCp........................................................................................................... 0 2294 3027 5110 +3178 PF03041 Baculo_LEF-2 lef-2; lef-2 Bateman A anon Pfam-B_1773 (release 6.4) Family The lef-2 gene (for late expression factor 2) from baculovirus is required for expression of late genes. This gene has been shown to be specifically required for expression from the vp39 and polh promoters [1]. LEF-1 is a DNA primase and there is some evidence to suggest that LEF-2 may bind to both DNA and LEF-1 [3]. 25.00 25.00 60.10 58.10 20.00 19.50 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.22 0.71 -4.57 11 69 2012-10-01 20:20:07 2003-04-07 12:59:11 9 1 62 0 0 68 0 164.60 35 79.78 CHANGED ph.lhapP...tpsIDcstpYlVchpsas..lslosYTsFppsGLhVhlsGhpLhpLlcsssstppssssp.........tpp+sp+NVCFpssss.s+psllshLpsplp...hPsChpphhpclpsp..PRssRaRKRFlFNsYlsNsloCs+Csp.pClhcAhphhYptDpKCVpElpp .........h..hWsP..ht..t..sslDK.sts.YhlchpDa...lsloPYTsFhpsG.hhlclsGhR.Lhh...Llpssss.ppp.psst..................hhc+Sp+NVCFpsstp.t+psllchlppplp....hPsC....hpph.hssLp..hp..PRGsRa+KRFlFNsYIuNlloCsKCcp.pClhcAltthYppDsKCVpElp... 0 0 0 0 +3179 PF03388 Lectin_leg-like Legume-like lectin family Mifsud W anon Pfam-B_2789 (release 6.6) Family Lectins are structurally diverse proteins that bind to specific carbohydrates. This family includes the VIP36 Swiss:P49256 and ERGIC-53 Swiss:P49257 lectins. These two proteins were the first recognised members of a family of animal lectins similar (19-24%) to the leguminous plant lectins [1]. The alignment for this family aligns residues lying towards the N-terminus, where the similarity of VIP36 and ERGIC-53 is greatest. However, while Fiedler and Simons [1] identified these proteins as a new family of animal lectins, our alignment also includes yeast sequences. ERGIC-53 is a 53kD protein, localised to the intermediate region between the endoplasmic reticulum and the Golgi apparatus (ER-Golgi-Intermediate Compartment, ERGIC). It was identified as a calcium-dependent, mannose-specific lectin [2]. Its dysfunction has been associated with combined factors V and VIII deficiency OMIM:227300 OMIM:601567, suggesting an important and substrate-specific role for ERGIC-53 in the glycoprotein- secreting pathway [2,3]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.52 0.70 -5.24 8 783 2012-10-02 19:29:29 2003-04-07 12:59:11 8 10 280 35 470 811 29 200.80 27 52.53 CHANGED cpacpcaSLhtPahspupsslP.hWcatGsshlo.SstIRLTsc...pSppGulWs+psshh.csWElclsF+VsGptt.pltuDGlAlWYTp-psh..sGPVFGusDpasGLuIhlDoasNcsss.p+spPalsuhlNDGohpYDHscDGspspLAu....Cpt.cFRNp-asoplpl+YspshLolhh-.s.lcsps-achChplssVpLPsGhY.FGlSAsTGsLuDsHDlhShhhhplps ........................................h......hoh..P...h....s.......s.....tt.h....s.hWph.tG..s..s...h...h........p.....s....p......h...l.RLTssh.........ps.p.p.G........s.l.Ws..c.sh.................h...p......sWc...l...............clp..F+l.........p.........Gp............t....ph...t..u...D..G.hAlW...h.sp......ppht.............G......s......l....a...G.s...s.....pa.pG.....l....ulhh......Do....a...Np............t....................................t.....hP..h...lps....hhN....s......G..s..h...p..Y-p...ppD...G....p..t..p...t....l..uu.......Cph.........t....h.....R........s...t...s.....h...s.........o....phpl..pY.h..p..t.....h...p..l.................h.....s.............h....p......sp...tp......a.p..........Chp.s....p.....s.....l...t...l.....P......p.....s......h......a.....hGhS..At.TG.......s.L......u.D........sHDllphhhht............................................................................................................. 0 158 234 362 +3180 PF00139 Lectin_legB lectin_legB; Legume lectin domain Sonnhammer ELL, Bateman A anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.90 0.70 -5.00 47 1651 2012-10-02 19:29:29 2003-04-07 12:59:11 14 73 320 724 782 1765 72 204.20 27 39.67 CHANGED ssspFsa........ssF......st......ssLhLpGsApl........ssGhLp.LT.......sssp.......pshG+AhYs.pPlpl...hssssup....h.......sSF.sToFsFsI..h....ssssucGlsFhluPss....ssu..sutaLGLhNtpsss......sspllAVEFDTht...php-hDs....NHlGIDlNSlpSsto...sh................tshsLsu...GpshplaIsYcussppLsVslu.s..p.............cPpp.sllStsl.DLss.lls..-......psaVGFSAuT.G........hsptHhlLuWSFs ..................................................................................t...a.h........t.a......tt.........ts.l....h...Gs..A..h.........ss..s.h.lp..LT.....pst.....................tthG+shas.tP.lph............hp.........s..s..t..........h......................tuF.so...pFsFs.l....................................t........s............suc...........G....huFhls..sst.......sh.sts......suth...L....G...l..hst......p....stt.....................psphlAV...EF...DT.hh................t.....t..c.hss...............sH.l.G..l...D..l.......N......u....l..h.S....h..t..s...........................................hsh.s.lt.s......up.......hpshlsYsu.s.s.....p.......hLsVsls.....t...........................pspp.....h..lo.h....l.....sL...ps...h.l.s....-.............shlGF....SuuT..G.......thh.pt..p.lhuWsF............................................. 1 114 409 591 +3181 PF03954 Lectin_N lectin_N; Hepatic lectin, N-terminal domain Finn RD anon DOMO_DM01961 Family \N 25.80 25.80 26.50 25.90 25.20 25.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.54 0.71 -4.77 5 128 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 27 0 61 131 1 121.20 43 48.84 CHANGED pchpHLpNs.p-c+shcKGPPPoQPLLQRLCSssRLLLLSLuLSILLLVVVCVIGSQN.....SQLQEELRoLRETFSNFTSSTcsEVpALST........................pGGSVGcKlTSLESpLEKQQQDLKsDHSoLLhHVKQFssDLRoLoCQMAsL ......................................s...ptp....tG.....sPs.ps.h.hpR.LpShspL.LLuLuhslLLLV.llCVlGSQs............upLpc-LtsL+psFSNFous...ThsE.lpuLso........................pG.uu..lscKlpSLEupLEKppQ-Lps......cp..SplhhHlpphs.DL+.LsCQhs.h............................ 0 4 4 10 +3182 PF05098 LEF-4 Late expression factor 4 (LEF-4) Moxon SJ anon Pfam-B_6330 (release 7.7) Family Late expression factor 4 (LEF-4) is one of the Baculovirus late expression factor proteins. LEF-4 carries out all the enzymatic functions related to mRNA capping [1]. 20.20 20.20 20.60 22.10 17.80 20.00 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.78 0.70 -5.73 25 64 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 60 0 0 63 0 434.20 40 97.95 CHANGED EpEISYTINaSQDLLYlIhcoYIsK+h.pht-cYsDllDpNsVRTRlhs................sphsSV+Kpslshp+hVah..sssslVPhVsRcshEpsssssspp......l+RllcspVY+...tpsslEIKFEHlYappNhsDphDuLhAsKQIsLaNLLpspsp..slspNSHLGSDEILAslRLEhEYc..sssstssLpphscllsch-.slstppNIoPhLPYTTLhNpIhYRKFpcE+.hl.....hs.t......t.ssssVh+WAlKLDGlRG+Gahs+s.hhll.hDDMQhFSGpl.......................................ss.FslNNlVuFQCEll-..pslYlTDlLpVFKYpYNNRsQYEsSh.psYsl-shsAlpslNhh........sssssphslssh.tst..hpl+FQpFacPPlphst....YsolPsDGFVVLDsphpYVKYKphKTlElEY-spsstFpslpGslpspshhss......Lp+ssIYEsllsc...ssIpVlKpRPDRLVPN ....................................................EpEISYoINhSQDL...LYhIhcoYIs.cph..phtpcYsDlhDpNslRTRl.s................sph.sSVpKpshphc+hVah..spsslVPhVpRcshEpshspsshp......l++llcspVY+......psplEIKFEHlYhppshh..Dp..hDuLhAsKQls....LhNLLpsssp.....sl.hpNSpLGSDEILAslRLEhEY-t...sssst.....shLpt..........hs....plltph-.slsptp.NIs..PhlsaTTl.NpIhYRKFtcEphl..hs.t...........t.sssslh+WAlKLDGlRG+Ghhsps....h.............h.............ll.hDDMQhFuupl.......................................ss..F.slNNl.VuFQCEllst.pphYlTDlLpVFKYpYNNRTQYEsSl.ssYslsshsAlpslNhh.........ppss.pplslpsh....s.thpl+FQpFacsPlp.st...........YsolPsDGaVVLssphpYVKYKhhKThElEYsstsshFpslsGslpshplhss......Lp+ssIYEsllsc...ssl.pVlKpRsDRlVPN.... 0 0 0 0 +3183 PF04941 LEF-8 Late expression factor 8 (LEF-8) Moxon SJ anon Pfam-B_5130 (release 7.6) Family Late expression factor 8 (LEF-8) is one of the primary components of RNA polymerase produced by polyhedrosis viruses. LEF-8 shows homology to the second largest subunit of prokaryotic DNA-directed RNA polymerase[1]. 17.60 17.60 57.60 20.40 15.10 14.80 hmmbuild -o /dev/null HMM SEED 748 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.32 0.70 -6.86 7 329 2012-10-01 19:23:01 2003-04-07 12:59:11 7 2 143 0 0 243 2 320.40 47 92.17 CHANGED VlpDFscLYcplcsp.asLcahLsCss.puss.sol+hLQERKSYFCCAlc.sht+CVLHKCVlVVFGThLDtpFRss-......................sspsslpGTFMlDGRaLSFPNIMMNNNlLlHNFYDKLYu..KsCKRMFLYGNlD-EKpINRAIQLVYDctpDlLFARDVYApDYVVT--LNplLEhYLpsSGKWcPLsFLFcasptps.pLV-pIKhIMpt-INYSIDSLuNKIIYKHsYLlpLlY.cslLptYpt.htps.s...............s.sstsK++Ks.QolhasKEsKKIVDoIVNG+LIYsVSKTFSKQKKsF.N.QDNSSNNNIEIs.PsLKYRlGNEVlRITNDoMRQDMLKQchDFVKFlDSFFHGEMTVAGKKFFLCRsVRLPsVDYphVAc+FppLlppsLlhhss...........cp..........s-stDs.LLIAFNsRPTshpCcRsclspIhYthKRNhsPlElKlsssILFVNHHEGMlCIKKpV+lss....lpIssLLTPYEYHNppSllpshs...spl.EpDcVssLMSKLlQYYYpsahplFsTlPVPKLIVSLTNLKNAMPVhpYss.......t..lssLPlGNSVsVuPclhhNNKMFpLWTLVRDs+LMTAEDPYIPchsLPI+LYNNKlNKLKGKLshupp.psPhlKFh.pS.spsNhVslpsGpVLhhsGVlVSNsKIsWsaDGKRYKIETCpNKsaaVYKIYlYaRplcsQ+lE+lcuphsstsDsValKlslVTSTssLcGlKICGIHGQKGVhNsuEDLTEWMAEDG .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................VAGKKFFLCRsspLPsVDYphVApKFp.LlppsLlhhss...........s......................................p.sp..cs.tllIAFNsRPThhpCp+.ssl.hIhYphKRNhsPlELKl..sspILFVNHHEGMlCIK+plpIps.....pspIssLLTPYEYHNppSllps.ssh....spl.E...p....D...cVpsLMSKLlQYYY+sahplFsThPVPKLIVSLTNLKNAMPVhpYpp..........p.hlssLPsGpSVsV.s.spIhhNsKMF+LWTLVRDs+LhT.............................................................................................................................................................................................. 2 0 0 0 +3184 PF05094 LEF-9 Late expression factor 9 (LEF-9) Moxon SJ anon Pfam-B_6326 (release 7.7) Family Late expression factor 9 (LEF-9) is one of the primary components of RNA polymerase produced by baculoviruses. LEF-9 is homologous to the largest beta-subunit of prokaryotic DNA-directed RNA polymerase [1]. 19.50 19.50 22.60 22.50 17.20 15.60 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.64 0.70 -6.11 7 276 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 131 0 0 138 0 180.10 76 98.01 CHANGED hlpFhsKsPocF-LLhDPscl.ssshFhshccF+hFLKslIsDLK+..KhNaFNSLl-QLIsVYp-ss.t+NEHTchLuRIlhATslVVT-LPSNVFLKKLKhNKFTDsIsYLILPNFILWDHNFllFLNKsFNSKH-suLVDISGslQKIKLTHGVIKDQlQsKNGYAGQaLYSTFLNTASFYANVQChNGsNEIlPP+sSlpRYYGRDVsNlRAWTTRHPNISQLSTQlScVhts....-s.sDWNVKVGLGhFsGANpDCDGDKcVITaLPpPNSLIDLECLLYGDPRasFICFDKNRLuFVSQQIYYLaKNlc+lEpLhcohPllhsLWppa+...stpFupRLEhLLRDssLlhSSNsSaLLappLsplIcsEEMVCuDcElhsLsGpFsslIcSGAKGStsLlcSTcpY+pTcssDlDTVupRAlTuLNSaIoSHNRVKlsGGDIYHNTsVLQNlYLKsshICYKsDshsluslCsLPSEFLFPEHLLDhF ....................................................................................................................................................................................................QaLYSTFLNTASFYANVQCLNGsNEIlPP+uSl+RYYGR.DVs...N..VRAWTTRHPNISQLSTQlScV+ps.......-s..TDWNVKVGLGhFsGANT....................................................................................................................................................................................................................................................................... 1 0 0 0 +3185 PF05150 Legionella_OMP Legionella pneumophila major outer membrane protein precursor Moxon SJ anon Pfam-B_6492 (release 7.7) Family This family consists of major outer membrane protein precursors from Legionella pneumophila. 20.90 20.90 21.20 22.60 19.80 19.70 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.87 0.70 -5.30 2 182 2012-10-03 17:14:37 2003-04-07 12:59:11 7 2 18 0 16 92 0 199.50 58 94.10 CHANGED MhsLKKTssAVhALGSSAlFAGTMGPVCTPGNVTVPCERTAWDIGITALYLQPhYDADWGYNGFTpVGGWRpWHDVDhEWDWGFKLEGSYHFNTGNDINVNWYHhDssoDHWA.hsphHsYs.pWDAVNAELGQFVDFSANKKMRFHGGVQYAhIcsDVNRahNsFhhssFNSKFNGFGPRTGLDMNYVFGNGFGlYAKuAsAILVGTScFhDs...hsF.hGSKNAIVPElEhKLGADYTYAMAQGDlTLDVGYMWFNYFNAhHNTush.....suhETDFuASGPYIGLKYVGNV ..................................................................................................................................................YHa...D...sD.....o....D........+W.....s..........s...hu..............s.........h.............H...........s...........Y..s....N....+WDAVNAELG.QFVDFSANKKMRFHGGVQYARIEA..D......V.....N.....RY.........F.........N...N.....F......A....F.N.....G....F......N..S.....KFNGFGPRTGLDMNYVF.GN...GFGVYAKGA...AAILVGTSD.....F...Y.........D..G......................I........s......F......I......sGS..K..N....A..IVPELEAKLGADYTYA.M......A.QG.DL.T..LDVGYMWFNY...FNAM...H....N.T...uVh.......................................................................................................... 2 10 10 15 +3186 PF03020 LEM LEM domain Bateman A anon [1] Domain The LEM domain is 50 residues long and is composed of two parallel alpha helices. This domain is found in inner nuclear membrane proteins. It is called the LEM domain after LAP2 Swiss:Q62733, Emerin Swiss:P50402 and Man1. 20.50 20.50 20.50 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -7.94 0.72 -4.51 8 494 2012-10-03 03:04:30 2003-04-07 12:59:11 10 23 94 5 269 440 0 41.30 39 7.99 CHANGED sDlspLSDsELpspLppYGlssGPIluoTR+LYEKKLhKLccp ...................ppLosp-LpppLhch.Gls.s.GPIs..soTRplYEKKL.h..ch................ 0 66 87 174 +3187 PF04011 LemA LemA family Bateman A anon COG1704 Family The members of this family are related to the LemA protein Swiss:P71452 [1]. LemA contains an amino terminal predicted transmembrane helix. It has been predicted that the small amino terminus is extracellular [1]. The exact molecular function of this protein is uncertain. 21.00 21.00 21.00 22.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.03 0.71 -5.18 10 2668 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 2218 1 681 1981 523 183.70 35 94.86 CHANGED hhsslhlhllllllslhsssuYNsllst-cslcsAWSplpsphQRRsDLIPNLVETVKGYAuaE+cTLccVsEARA+ss........thpcspshp+hppApsELouuLuRLlllsEsYPsLKANpsFhpLpspLcGTENRIAluRpcYNcuVpcYNspl+pFPollsA+haG..F+stshFpss..EutpssPKVcF ...........................................................................hh....lllh.l..l.l.l.l..l.s.h..h.s..h....s.s.Y..NsL...lp...hcppscpuWup.....l-sQh.pRRsDLlP..NL........Vp..TVKGYAp.......a..E.p....p.....sL.p..p.....VscA..Rspss...................................s..s...p..s...hpp..ht....p....A..p.s.p.L.....osu......L...u..p..L.h....slsE....s.YP.-.LKAsps..FhpLQ...ppLp.sTEN+IuhuRphYNssVppYNspl.c.p..FP.s.s.l.l.A.t.h.hu.......a.p.t.t.s.hapss....tt.t...p.p..s.PpVpF............................................. 0 226 462 587 +3188 PF02998 Lentiviral_Tat Lentiviral Tat protein Bateman A anon Pfam-B_1519 (release 6.4) Family This family contains retroviral transactivating (Tat) proteins [1,2], from a variety of Lentiviruses. 19.80 19.80 19.90 21.70 18.20 19.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.07 0.72 -4.16 7 50 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 15 0 0 41 0 58.70 81 96.23 CHANGED .EEhPpRR.stscEhs.h..a.cEh-sWpasS.RVPGEhLQRWLAMLpsuR.R++VlREMQKWMW+aPKAPVIRsCGCRLCNPGWGo .....Ep.s.t..ht..th..h..h.c-h-sWphhS.RsstphLQhWLAMLp.tp.RtpVhpEhQhhhWh..tA.lhRsCGCRLCNPGWGT. 1 0 0 0 +3189 PF02024 Leptin Leptin Bateman A anon PSI-blast P41159 Domain \N 25.00 25.00 25.40 25.00 24.90 23.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.02 0.71 -4.53 7 218 2012-10-02 01:28:15 2003-04-07 12:59:11 10 1 124 1 24 189 0 120.30 70 91.48 CHANGED VPIpKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPlLSLSKMDQTLAlYQQILTSLPSRNVlQISNDLENLRDLLHLLAhSKSCsLPpspGLEohESLGGVLEASLYSTEVVALSRLQGSLQDMLpQLDLSPGC ...................................lpsDoKsLlK....TIlsRIsDIS+hQSVSSKQRVTG...LD.F...IP...GL...H...PlLSLSKMDQTLAlYQQILTSLPS.RNVlQISNDLENLRDLLHLLAsSKSCsLPpsp..uLc..oL-SLssVLE...ASLYSTE.VVALSRLQuuLQ-hL.pLDhuPtC.... 0 1 3 6 +3190 PF03588 Leu_Phe_trans Leucyl/phenylalanyl-tRNA protein transferase TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.40 22.40 23.00 22.60 21.90 22.20 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.97 0.71 -5.13 9 2004 2012-10-02 22:59:21 2003-04-07 12:59:11 9 3 1956 17 506 1355 651 170.90 46 73.57 CHANGED ssspLltAYppGlFPhhpp..spslhWasP-sRullhs-.plHls+slp+sl+pshapVphstsFtsVI-uCAsst...tpsTWIspthpcsYhpLHphG..aAHSlEsWpscc.....LVGGlYGlulGplFaGESMFSptssASKlAhlpLschLcstuatLlDsQh.spHLcphGApclsR ..........................................s.stRLLtAYppGIFPWass........spP..ll...WWS.....P-.....PRuVL.....h..Pc.....ph...H.lS....+Sh++....h....h....+........p.........s..............a....clolspsFspVIcu.CAss....+..................p.........puTWIscplhcA..YpcL....HchG............aAHSlElW..p.s....s..c.......................LVGGlYGVul..G..p...lFhGESMFS....c...t....p....s...A...SK..sALhh.Ls.p.c.htpp..G...h..pLIDCQ.h.h.s.sHL....tS.LGAp-IsR..................... 0 155 324 426 +3191 PF01819 Levi_coat Levivirus coat protein Bateman A anon PSI-BLAST 2ms2 Domain The Levivirus coat protein forms the bacteriophage coat that encapsidates the viral RNA. 180 copies of this protein form the virion shell. The MS2 bacteriophage coat protein controls two distinct processes: sequence-specific RNA encapsidation and repression of replicase translation-by binding to an RNA stem-loop structure of 19 nucleotides containing the initiation codon of the replicase gene. The binding of a coat protein dimer to this hairpin shuts off synthesis of the viral replicase, switching the viral replication cycle to virion assembly rather than continued replication [2]. 19.40 19.40 21.40 21.00 19.20 15.80 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.25 0.71 -4.56 5 151 2009-09-11 00:12:26 2003-04-07 12:59:11 12 1 26 164 0 161 0 102.30 51 72.02 CHANGED ApLpslVLsspGuTGNsTlsls...VNssNGVAEllpusuVPutEpRVTlSVRQoSssR+KYslKlcVPslsoQTVNGlssPuVsRpuYssV-LoastaSTscER.slIsppLAALLKDshlIcusIssNsGa ..uphpphsLst.GtsGp.Tls.....ss.sNGVuth.ps.ss.u.th+VThShttsutpp+paplplcl.pssspTssGsp.PssshpuYhsh-LTIPIaATssDs.tLIsKuhtGLLKDGsPIsusIusNSGh... 0 0 0 0 +3192 PF01726 LexA_DNA_bind LexA DNA binding domain Bashton M, Bateman A anon Pfam-B_1975 (release 4.1) Domain This is the DNA binding domain of the LexA SOS regulon repressor which prevents expression of DNA repair proteins. The aligned region contains a variant form of the helix-turn-helix DNA binding motif [1]. This domain is found associated with Pfam:PF00717 the auto-proteolytic domain of LexA EC:3.4.21.88. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.96 0.72 -4.41 9 3433 2012-10-04 14:01:12 2003-04-07 12:59:11 11 9 3000 10 740 2157 1670 64.30 40 31.34 CHANGED MpsLTtRQp-lLchI+splpppGaPPohtEIuptlGhpSssuspcHLcALp+KGhI-hsPGpsRu .............................tLTt.RQpcll-hI+ppl.p.ppGhP..P.ohpEIuptlGh.....p.SssusccHLpsLp.+K...GhIchssspsRu............ 0 239 478 621 +3193 PF01790 LGT Prolipoprotein diacylglyceryl transferase Bateman A anon Prosite Family \N 24.90 24.90 24.90 24.90 24.70 24.80 hmmbuild -o /dev/null --hand HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.67 0.70 -5.44 67 5002 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 4357 0 1133 3460 3019 251.80 31 88.04 CHANGED hsPlhhplG.P..l.......sl+W.....YGlhhhhGhllAhhl.ut+psppt...........shst-pltDhlhauhhusllGuRlhYV......l...aph......shY.....hpp.........P.hplh...plWcG........GhuhHGGllGshluhhla............................s+ppp..ls.....hhphsDhlAPslslG.uhGRlGNFhN.tElaG+.....s....ss........athl.P............................................t....................th.......................h.hh.hHP.......o.LYEuhhp.lllFllLhhhh....++..hthh.GtlhuhallhYulhR.hhlEhhRp..sshh............hhhh...............lphuQlLSl.hllhGlhhhlhtp ................................................................................................sPlhhpl.G..P....l..........sl+W.....YGlhhlsG.hlh.Ahhl...up..+..c..hp+t..................................sh.s..p...-...p.l..-...ll.h...h...u.h.lu.s...ll.GuRlhYV......l......aph............................shY...............hps..................P....hp.lh..........p.lW.pG............................Gh.....uh.H.GGll.G...shls..hhl.a............................................................s.+..+.p.p.....hs.......hh.p.hhDhl.APslslu.uhGRh.GNF...hN...tE.haGc............ss......shs...................huhlhs.........................................................................................................thshh..hHP....o..LYEuhhp.llhFll..L.h...h.ht....................++.................h............t......G....tlhu....lalhhYuhh...R.hhl.E.hh.Rp.ss.h..........hhhh.............................lphuQl..LS.ls..h..l.lh.Glhhhlh..t...................................................... 0 398 772 979 +3194 PF00556 LHC Antenna complex alpha/beta subunit Bateman A anon SCOP Domain \N 20.50 20.50 20.80 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.65 0.72 -4.11 28 509 2009-09-11 14:13:18 2003-04-07 12:59:11 15 1 110 48 157 438 71 38.60 27 65.17 CHANGED ptchhclac.hhsh...hhhhsllAllsHhllhs...tpsWls .................chhplas.hhsh...hshhhslAllsHhllhs.....hpsWl......... 1 40 76 92 +3195 PF04991 LicD LicD family Moxon SJ, Bateman A anon Pfam-B_5278 (release 7.6) Domain The LICD family of proteins show high sequence similarity and are involved in phosphorylcholine metabolism. There is evidence to show that LicD2 mutants have a reduced ability to take up choline, have decreased ability to adhere to host cells and are less virulent [1]. These proteins are part of the nucleotidyltransferase superfamily [2]. 22.60 22.60 22.90 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.53 0.71 -4.22 142 2479 2012-10-02 22:47:23 2003-04-07 12:59:11 8 25 1053 0 634 1816 209 187.40 23 55.89 CHANGED sccpslphalstGoLLGhh+csshlPWDsDlDl.tMshc-hp+.Ltp.hhsp.......h.............................................phlh-ssohhhpptptstp.............csGla.IDIhsl....s.hs................................................................................................ht.ttpphht...................ppc.hhshspl.Plpps.FEGh..hhlPsshcphLpptYG ..........................................sccpslp.aalstGTLLGAlR.c.p.G...h.IPWDDDlDl..sh.R.c.D..Y...p....+...hhp..hhpp.....h.t.................................h.h.t.............h.................................tplhc.t.ss.hhhpp..t..ht.h........................................pulh..lDlhsh........Dhhsp........................h....................................................................................................................................................h..................h.....p.th.p..tp..............................hshtp.s.phhs.h......................pp..hh.sh.p...ht......hh...h.FEsh....h.hPtpacphLpphYG.................................................................................................................................................................................................... 0 221 392 560 +3196 PF01291 LIF_OSM LIF / OSM family Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.50 20.60 20.30 20.30 hmmbuild --amino -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.17 0.71 -4.31 5 88 2012-10-02 01:28:15 2003-04-07 12:59:11 12 1 43 8 39 131 0 150.60 44 76.61 CHANGED sLLsQLQNQssLhssou.oLL-PYI+lQGLssP.sLcctCsp+ssDFP.SE-sLpcLoRhsFLpTlsATLGslLppLocLQQcLscsAch...........KLssAccNlRGLtNNVaCMApLLp+Su...hsEPTpss.G...PsPsTossDsFQRKltGCpFLtGYHRFMuoVGQVF ........................................................LhsQlppQhs.lp.sou.sLh..Yhpu.QGpsh.......P.....s...L...c.....c...h.C.s...s...sss.FP.....c...s.s...hsphs+..h..st..Lh...p...l.hshLuss...L...sslTc....QchLssssh............LhpKLpsstsslRGLhsNlhC.hlsph.apsu....phsss..........sPsT.sspDsFQ+KhhGCplLtpY+phhuslupsF................ 0 2 2 8 +3197 PF00549 Ligase_CoA ligase-CoA; CoA-ligase Bateman A anon SCOP Domain This family includes the CoA ligases Succinyl-CoA synthetase alpha and beta chains, malate CoA ligase and ATP-citrate lyase. Some members of the family utilise ATP others use GTP. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.80 0.71 -4.62 20 8836 2012-10-02 00:59:22 2003-04-07 12:59:11 14 29 3569 68 2607 5875 4883 125.00 32 32.56 CHANGED lhsGGTLAhEshtllpts....................supstshlDlGsDsas.........ssphtctlphhssDs.cVpsILlslhlGhGss-t.AuullpAhccsp.......tplPlVuplsGTssD...t.h..pptphLpcuGltlhsusspAstsstsl ...........................hspuusLshpshc.lpth...............................G.hs....ushl..slGGssh....................spphh-slchhhs..Ds...ps..c.....uIlh.h.hG..............sps-t..As.u.h.l.p....A.hppht..........................thPlVshl..t...G..ssAs..............tttt..hl.h....s...u...G.h....s.hsssc.hs.tstt..h.................................. 0 824 1565 2169 +3198 PF00412 LIM LIM domain Finn RD, Griffiths-Jones SR anon Prosite Domain This family represents two copies of the LIM structural domain. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.69 0.72 -3.90 38 16362 2009-01-15 18:05:59 2003-04-07 12:59:11 17 199 437 88 8615 14824 48 57.20 26 24.59 CHANGED CuuCsctIh....sp.hs..uhscsaH.pCFpCspCpp.Lssss....ha.p-u..c....lYC+p.cahcth ................................CstCsp.Ih...................stt.h.....h.................u..........h.......s..........p.....t...a..H.....p..CF......p..............C.........s......p...C.........p..p..t....L....s..sts..........ah....p.....cs....p...................haCpp..sa.t..h.................................... 0 2081 3129 5636 +3199 PF01803 LIM_bind LIM-domain binding protein Bashton M, Bateman A anon Pfam-B_1352 (release 4.2) Family The LIM-domain binding protein, binds to the LIM domain Pfam:PF00412 of LIM homeodomain proteins which are transcriptional regulators of development. Nuclear LIM interactor (NLI) / LIM domain-binding protein 1 (LDB1) Swiss:P70662 is located in the nuclei of neuronal cells during development, it is co-expressed with Isl1 in early motor neuron differentiation and has a suggested role in the Isl1 dependent development of motor neurons [4]. It is suggested that these proteins act synergistically to enhance transcriptional efficiency by acting as co-factors for LIM homeodomain and Otx class transcription factors both of which have essential roles in development [2]. The Drosophila protein Chip Swiss:O18353 is required for segmentation and activity of a remote wing margin enhancer [1]. Chip is a ubiquitous chromosomal factor required for normal expression of diverse genes at many stages of development [1]. It is suggested that Chip cooperates with different LIM domain proteins and other factors to structurally support remote enhancer-promoter interactions [1]. 19.80 19.80 19.80 20.60 19.60 19.30 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.25 0.70 -5.26 42 464 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 238 0 283 431 0 206.00 32 39.09 CHANGED hhRLhpasccL.....ssh.......................sppssltaWppFVpcFFs.........ssushRhslpttss.............................................K.pFElshshlPRaa.pohFsoGlpchphhl.ptsp-pshssushhl-ss+sshhhhatsss..........................plsscGpL+shF.................s.phKIchh-hsspsHpEhl.sRshlt.hht...................t.....t.chspspphp...tt............l.......pslsp.hGlspsshphLclu-llspMpsLhsapppps.luPh-uL+p ...................................................t.hRlhph.c+l.....pph........................sppss..aWctFss-FFp.........ssAhhphshshtsss..............................................K.pa...pIstsllPRaF.pohF...........cuGlp-hhhhl....thp.c..p.s....hts.......s.....tlhl-sspsshlo.ascsh..........................pVs..s-GpLhlpF.........................cs.h+IcsWcFs.............h+pa...cEhl.PRshlt.tsp..................................-.phhsp.ht............................................................................pslsc.hGloptslp.hL.............cls.llpsMp-Lhshp+phs...luPh-sL+.................................... 0 75 140 221 +3200 PF00538 Linker_histone linker_histone; linker histone H1 and H5 family Bateman A anon Arne Eloffson Domain Linker histone H1 is an essential component of chromatin structure. H1 links nucleosomes into higher order structures Histone H1 is replaced by histone H5 in some cell types. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.35 0.72 -3.88 20 1947 2009-01-15 18:05:59 2003-04-07 12:59:11 14 46 377 9 991 1945 10 71.20 32 23.85 CHANGED uHPsht-MIssAIpuLKERpGSSh.AIpKYIps.sY.chslss..hshhl+puLK+sVpsGpLhpsK......GusGSF+Luc .......................Pshtphlh....pAI.p...s.....L..c..E...+...s.G..o...ShtAIpKalps..pa...p....p..h...s.s.............h.t...ph.l+tsLKph.VspGpLhpsK..........G...sGoa+Ls.......................... 0 258 429 691 +3201 PF04454 Linocin_M18 Encapsulating protein for peroxidase Waterfield DI, Finn RD anon COG1659 Family The Linocin_M18 is found in eubacteria and archaea [1,2]. These proteins, referred to as encapsulins, form nanocompartments within the bacterium which contain ferritin-like proteins or peroxidases, enzymes involved in oxidative-stress response. These enzymes are targeted to the interior of encapsulins via unique C-terminal extensions [3]. 24.30 24.30 24.30 24.40 23.60 24.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.20 0.70 -5.48 59 323 2012-10-03 06:22:39 2003-04-07 12:59:11 7 3 313 13 123 298 12 246.00 34 89.18 CHANGED Ms..LtRc.APlostsWppI..-cpstpsh+ppLsuRRhl-ltGPhGhshsulshsclpthps.ttt............lpsthRpshPlsplphsFpLstp-l-sscRsutshDhssltcAAcplAhtEDchIFpGhstsultGlhsssupttlpls..ssstshhpslscAlspLpps.....GhsG.PYsLlluschYstLtchtsp.G.hs.hc+lccll.susllhuPslcs..ullloscsush-LtlGpDlulGYhupsspshphhlhEohs .............................................................................Ms.LhR-LAPlopsuWtpI..-pcsscoh++plu......GRRlVDVssPh..G.shuuVssG+ltplpsssps............Vtsp.hRpstPLlcL+VsFsLsRp-lDsl-RGupD.sDhpslc-AAcclAh.s.EDcsIF.........cGa....s.........sAu.....I....p.....Glpsu....su.s.st....l........s......L........s............pc..s..pshscslupAlspL+..h..u........GlsG...PYullLus-s..Ysplsc.s...s...-p....G.YPlhc+lp+ll.s.....GcIlhuPulcG..AhllosRGGDa-LplGpDluIGYhuHcsp.sVpLalpEohT.............................. 0 44 82 104 +3202 PF03583 LIP Secretory lipase Finn RD anon Pfam-B_3085 (release 7.0) Family These lipases are expressed and secreted during the infection cycle of these pathogens. In particular, C. albicans has a large number of different lipases, possibly reflecting broad lipolytic activity, which may contribute to the persistence and virulence of C. albicans in human tissue [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.75 0.70 -5.21 12 1102 2012-10-03 11:45:05 2003-04-07 12:59:11 9 8 480 0 414 1391 144 260.10 24 62.70 CHANGED hQhuusho..Tl.TQh-hhhlsshLppGaaVVlPDYEGPKo.....TFsVGRQSG.pAsLDSIRAsL+otphoGlssDu+VulWGYSGGulAouWAAtLQPpYAPELpps.LlGAAlGuhssNlTuhscusDGolFuGlls.uLsGlANEYP-h+phlhpclsctup.shcphsptClusulhcashpphhTG.p+sFppGhslLcs..sls+hlp-NtL.hhspphlPplPlhlYHGshDpIlPItsscthhpsWCshGhsslEFuEDhhsG.......HhsEshsGAPAAloWlpsRFsGcsslpG .........................................................................................................................h...................p..h..l...sth.L..s.pGasVs.ssD.....Y....p...G......ss..............sah.s.....s.....p.....t...t.u..tu.....l..LDu.lR.A....uhp...h...t..t............s.l......s........s........s......l...slh..GaStG.G...h.A.u....h....h.A...A....p....h.t.....s.....s.......Y...........A....P....ELs........lh...G.s...s.h...Gu.s..s.........s....s.....l.....t..........t.....h.......h........p..........t..........h........s.........s..........s............h..............h..........s......G.......h..............h..............s..........h.......s.....l...h........G.l........t....p..t...a...P.....p...l...p..........l...p...p.hl.ss.p..u....p...t..h...h....p...p....h.p...p....t..C...h...s..t...h...h..h.t...h..........s.................h...t...p...h.......................................h.........t..s........p.h..h...t....p..............htph.h...p.p..p.....th........................p....h...hP.....s......h....Plhl....hp................u................h.t................D....t....l.....l.s....h.t.s.s.pt.h.h.p...p.ass.......t.........G...............ss.l.p........a..t..t.............h..s..t..........Hh....h........s....h...s..........s...h...t.altthhtt.................................................................................................................................................................................. 1 83 269 368 +3203 PF03279 Lip_A_acyltrans Bacterial lipid A biosynthesis acyltransferase Mifsud W anon Pfam-B_1803 (release 6.5) Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.61 0.70 -5.54 13 5234 2012-10-02 00:16:30 2003-04-07 12:59:11 8 10 2565 0 1047 3702 3078 280.50 24 90.64 CHANGED phpFphphhhPpYahs.WlGluhlhllshhPhshhctlsstlGthht..hhhpctpcpA+pNLshsFP.-ho-sE+-pIlcpshpohuhslhEhuplshhs...ccclpcphc..............htGLEplcphhppscullLhssHthsh-luuhhlsppts..shushtppptN.hlsalhsphRpphstclls+pss........lcshlcuL+pGc.slhhlsDpDhssc.ulhVsFFus.sssssssuhLuh+os..AsllPlhshhpscs...ttashplpPshchp.pt....DspplspthNchlEphItspPEQYhWlp+paKo ...............................................................................................................h...................h.h.h....s.h..h.h....h...l.s.hl.Ph..hh..hh.l.u...t.tl.Gthsh......hh..h..p..p..pt...c.h.s..p..t...N.L.p.h.s...F......P......p.......h.o..p........t........-........+......ctll.t.c....a.psh.u.h.s.h.h....Eh.s.h.h.h.h.hs........s.p.c.l.p.p...h...h.c...................hp.G..h..-...t...l....c..p..h....t..t..p..s..c..u..ll.l.l.s.sHhhs..hE....l....s....u...h...h....h....u....t.....p....t........th...u...s...h...h...............p......p........N...shh..-....h..l.h.....p......p....s..R..t....+......h.....s....t.....p....h.....l..s...+......p..s.....................l+...s.hl.....p...s....L.....+.....p....G....p...hlh.h..h..s.D........p...........D...h.....u..........c.......t....u...l.a....l...s...FF..u....h.......s...s...s...s.s.s...s.s...h.l.h.p...ths....As.l.l.s.h.h.......s.....h.....+..p..scs....................ta...p......l......h....l.....t.....P...s.....h....c....s......s..t.t...........Dt..p....p....s....s...t...h....h...N....c...hl...Ep...h.ltttP-QY.h.Wha+RaKp................................................................................. 1 274 606 844 +3204 PF00151 Lipase lipase; Lipase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.18 0.70 -5.22 16 1975 2012-10-03 11:45:05 2003-04-07 12:59:11 14 31 255 16 1058 2017 14 262.60 27 71.62 CHANGED ppVshtplss.upp.shss.shpcsh+.h..psPpphc..s+FLLapscs....shQhh..ucssTlcsspFsss+hTphIIHGasscG......................pEsWlschscshhph..cssNsIsVDWtsuupshYs.AstNl+lVGtElAthlshLpsphsas.psVHlIGaSLGAHVAGtAG+phsG....plGRITGLDPAcPhFpsssp.sRLsPuDApFVDsIHTsst.....LGhGhoQpVGHlDFFPNGGpp.hPGCppsl...........th...hhtCsHhRSh+Yas-Slh..Nsc.sFsuasCuShppFppscCasC.tt...pCspMGah.hscashppttlptcaaLpTsspSsF .....................................................................................................................................................................................................phhlh...t....tt........t......................h......................p.....s...h..t..t..s..t...a.....s.p.p.shhllHGatsss.................................pp.hh..p.h.h.p...s.hhp...t.......t.s..h.N.lls..lDW......t.....t......h.u.........p........t.........h.........Y..................u..s.t.s..s.p..h..l.Gt..pluph..lphL......t......p..h...s........h.s...h.....p..p.......lHlIG.aSL.GAHl.uG..huGph.hpt..........pl..s..R...IT..............G..L.....D...PAt........P.h....F.p..........t......s........s.........t..............+.....Ls...s.D.A.pFVD....lIHTss...............h.....sh...G.h....pshG+hDF.YP.N....G....G.p...QP.GC...t.........................................................h...CsH.Ruhphah-Sl........p.p..sh..hu..h...C..s..s.h.p.t.......h....t..s....C..hs......................tps...hG......h.....t......t..............................................hhh.stt..sa.................................................................................. 1 307 390 782 +3205 PF01674 Lipase_2 Lipase (class 2) Bashton M, Bateman A anon Pfam-B_968 (release 4.1) Family This family consists of hypothetical C. elegans proteins and lipases. Lipases or triacylglycerol acylhydrolases hydrolyse ester bonds in triacylglycerol giving diacylglycerol, monoacylglycerol, glycerol and free fatty acids [1]. Swiss:P37957 is a extracellular lipase from B. subtilis 168 [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.38 0.70 -5.03 15 461 2012-10-03 11:45:05 2003-04-07 12:59:11 13 7 242 42 222 809 70 188.40 27 62.77 CHANGED psPllhlHGsu........s.tussapphhphFhccG.YssuplYATTaGcssp.sshppsp....pCpal.pplRchl.AVttYTGs.+VDIlAaShGsPIARKAIlGG..pCs.DssssLGssLsppVcTFlulu..............GsNtGhssC.u............hh..hC.NhpsGLh....C....................tScFLpDINsps+h-.uptlaoIaSpsD-llt....tphsCG+pouhIPsucshp.YcthsHppshppTsthQhp ...............................................................pPV.VhlHGsu...................s.sh.hs.a.st.h.tsh...Lts.....p.G...ast..........l..auh.s..a...s..st..st...........s.....hpsst.................h.....s....t...............p........l.s.pF...l-....p.V...L...t..t...TG....A.pKVD...lluHS.G.u.s.l.sRhY..l..c...hh.......sus...c....p....h.t....tl.s..s.......h.s.p.h.t.s.......h.l.u.l.t.........................................u...s...s.h.sh..s.h..................h..hs..s....th....................................................................................Stal.......ppl..N....s....s...s.....h.....u..thhslh.ophDphlh..................................................................................................................................................................................................................................... 0 88 142 214 +3206 PF03280 Lipase_chap Proteobacterial lipase chaperone protein Mifsud W anon Pfam-B_4313 (release 6.5) Family \N 21.10 21.10 21.30 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.31 0.71 -4.78 34 328 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 282 2 63 274 15 184.30 29 59.61 CHANGED sLhppYlsY+pALssLp..............tshtsphsl.sslpphhpphtsLpppaFuts.tpuhFGsEpphpphslc+lpItpsssLospp+tptlttLpspLPsslppu.ppptpt.pcltpttpth.tpG.ssspplhtht.tphlGs-uApRLtplcppctsWcp+hssYhppRspI...ssslspsp+pttlspLRpptF.sssEth ..........................sLappYltY+psLupLp...................sttssthsh.sthpphhpphtsLpt+hFu.sttpshFupEpthpphsLc+h.....c.Ihpst.sLsst.p+tptlp.t.Lh.....tp..h.Ppthppu.....p.........tp.sph.tpLpp....tttt......h..ttt........uss..pphhttR...sthl.G...s-.uApRLtpL-ppcssapp+hssYhtp...RspI.......ps..tLS....sp....-+ptpIpp.LRpppF.sspph.................................................... 0 9 23 45 +3207 PF00657 Lipase_GDSL GDSL-like Lipase/Acylhydrolase Bateman A, Mistry J, Molgaard A anon Prosite & Pfam-B_543 (Release 7.5) Family \N 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.79 0.70 -4.54 65 4517 2012-10-02 11:02:24 2003-04-07 12:59:11 17 78 1181 20 2407 11569 2075 269.30 15 70.51 CHANGED lhshGDSloss........................stsh.tshtshl.t........................shshshhstuhuGps...........................................................................................t.hht.h.ttsthsssllhlhlGsNDh..........................................tts.ptshsphhsplpphlptlp..ptssp.....shhhhsts....................................h.t.tthpphsttasphlpcls.......................tthphshhDhaphh.ph.............................................................................hhhDsh........Hsospupphhuctl ............................................................................................................................................................................................................................................................................................hhsFGDSlsDs...............................................................................................s......t..p.....h...............................................................s......h.s...h...s...h.......s...h....s.........s..G.th..s.ssthh...........................................................................................................................................................................................................hht.....h...h...t......t........t....s...t...t........h....h.......s.....p....s..l........h.....h..l......h.G.s.N..Dhh..................................................................................................t.....p...t....h..h.....s...t....h....h....s....p....h....p........p....h.......l....p....p............L....h.....ph.....G..u+..........................p..h......h.l.h.shs...shs...........................................................................h......................................t.....t...t.......t....s.....h.p....t....h...s....p......h....s.......t...h..a.....N..p...t...l...p.phl...tt........................................ht.t..h..............sh.p.h...h..h....h.....D...h..a....s...h.h......p.h.......h.tp.stthsh......sh...............................................................................................................................h.s.ppah...h.h.....D.s.h.................H.opt.spphlup..h............................................................................................................................................................................................................................................... 0 460 1376 1961 +3208 PF00061 Lipocalin lipocalin; Lipocalin / cytosolic fatty-acid binding protein family Eddy SR anon Prosite and HMM_iterative_training Domain Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The family also encompasses the enzyme prostaglandin D synthase (EC:5.3.99.2). Alignment subsumes both the lipocalin and fatty acid binding protein signatures from PROSITE. This is supported on structural and functional grounds. The structure is an eight-stranded beta barrel. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.71 0.71 -4.21 156 2221 2012-10-03 08:47:39 2003-04-07 12:59:11 18 19 289 489 974 3261 80 128.60 17 76.93 CHANGED sG.pWhhhuhts.....hcphhpthssshtphp...sssshthpthp...h........pss.p..sppts.shcps..ppshphshp.......tssp...phpsl.ph-hpshhlhhtptp....tt.pshhtplhscs.clstph....h.th......shspss.hhph.pps .................................................................................................G.pWh.hhs...ts......hpchh.p.t.h...s....l.s.h.t.thp.....h...sssshss.hhht.......h......................................pss.p........hpph...s......s.h..c..ss......ph..s..h...c..hshph.t.........hsssp.........chp...sl...sh.-h.t...s..hhl.h.hpptp......u..p..p..sh..htpl................s....cs...p.h....ph..................shtt..hh..................................................................... 0 111 187 385 +3209 PF00820 Lipoprotein_1 Borrelia lipoprotein Bateman A anon Pfam-B_1321 (release 2.1) Family This family of lipoproteins is found in Borrelia spirochetes. The function of these proteins is uncertain. 25.00 25.00 40.00 25.00 18.30 20.40 hmmbuild --amino -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.71 0.70 -5.30 7 599 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 62 32 2 611 0 175.20 69 94.88 CHANGED sSShDEKsusphDLPuthc..V........hVSKEKsKDGKYsLcAhVDplELKGTSDKNNGSG.sLEG.KsDKSKsKLoIu-DLsphThEhacpss.ThVS+KVspKDtS.TEEp..h.cpGcLSpKplTRsNGTpLEYo-Mps-.susKAhEsLKN.lphEGsLsus.KTTLplpEGTVTLo+EIsKsGclplhLsDo.ootuoKKTusWsspTsTLTISsNSKKTKpLVFpp-sTITVQpYDSAG.TsLEGoAsEIKsL-cLKsALK ........................................tsp.DLPGtMc..V........LVSKEKsKDGKYsLhATVDKlELKGTSDKNNGSG.sLEGsKsDKSKVKLTIoDDLupTThElFKEDGpTLVS+KVs.KDKSS...TEEp..FNcKGc.lSEKhlsRuNGT+LEYTphpss.ssuK.ApEsLKs.hhL..EGsl.sst.csp.l..l.p.puTVsh........................................................................................................ 0 2 2 2 +3210 PF03202 Lipoprotein_10 Putative mycoplasma lipoprotein, C-terminal region Mifsud W anon Pfam-B_2205 (release 6.5) Family \N 22.40 22.40 22.40 22.70 21.90 22.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.96 0.71 -4.14 12 80 2009-09-11 23:29:08 2003-04-07 12:59:11 8 3 32 0 36 80 0 128.20 30 19.30 CHANGED sITQGPNlIGIHANEKENtETpKFVNWFLNsp.oWcspp.sptpppppp....pTsApaFAESASYILPLKEhFcps.....ptctspsp...........................t...NoascKAL-lFpplucsplluYSDPSDFRSGKFRDuIGusFN.AsVsSKs .........................sQGPsLIGIHu.N-KE.-ptTpcFVpWh.........l.sp...ppsapt.p..t.......p.....p.pp.............tTsupahscsuSYlhPhKphhsps.....p....ttp.............................tpNhhhcps.h.chhp.p.h..p..p..s..p..hs.a.p.-P.ushpSupFRcslsosassh.s........................................................................ 0 23 34 34 +3211 PF03260 Lipoprotein_11 Lepidopteran low molecular weight (30 kD) lipoprotein Mifsud W anon Pfam-B_4108 (release 6.5) Family \N 25.00 25.00 28.90 45.30 19.80 19.10 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.77 0.70 -5.38 6 84 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 4 2 52 54 0 234.40 40 85.01 CHANGED VhusCVlAuSAuls-hsss.....sc.LE-cLYNSllsGDYDoAVpKShEhpcQuKGsIIpssVNpLIcDupRNTMEYAYpLWltsG+-IVKcYFPlpFRlIhuEssVKLI.KRDNLALKLGsssssss-RIAYGDucDKoS-pVSWKFIsLWENNRVYFKIhNTccsQYLKLu..sssssssD+hlYGssoADTaRcQWYLQPAKY-NDVLFFIYNREYNcALcLuRsV-usGDRpAaGHsGcVtG.P-lFuWhIssF .........................................p........sp.lp-pLYNsllsuDYDpAVppohpl.ppspup.lIpplVscLIcstcpNsh-aAYKLW..h.ssup-IV+chFPhpFRLIhspp.hlKlI.+p.shALcLussss.ts-RhAaGDupDK..TS.cVSWKhlslW.ENN+VYFKIhNschspYLKLu..ssssssGD+hsaGussu-opRcpWaLpPsc..a-sclLFaIhNREYsp.uLKLupsV-ssGDRhsaGpsGpVsGpP-hauWhIps...... 0 52 52 52 +3212 PF03330 DPBB_1 Lipoprotein_13; Rare lipoprotein A (RlpA)-like double-psi beta-barrel Mifsud W, Studholme DJ anon Pfam-B_3255 (release 6.5) Domain Rare lipoprotein A (RlpA) contains a conserved region that has the double-psi beta-barrel (DPBB) fold [3,4]. The function of RlpA is not well understood, but it has been shown to act as a prc mutant suppressor in Escherichia coli [1]. The DPBB fold is often an enzymatic domain. The members of this family are quite diverse, and if catalytic this family may contain several different functions. Another example of this domain is found in the N terminus of pollen allergen. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.87 0.72 -3.98 127 5162 2012-10-01 21:39:58 2003-04-07 12:59:11 13 43 2566 5 1785 4300 468 87.00 26 33.33 CHANGED ssssssuha......ssu.....susGpsaphps..sspts....................sps........pulsVplsDhsP................ssppthDLStsAatpluhh.........ps.Gl.ls.Vpa .......................................................................su.uuhY.......sst............susGp..s..a.p..hp.s.hsuAcps..............................................hsp..s+s........s....t+ollV..plsD+sPh...........................ss.s.c.h.lDLSt.sAhp.p.l.uhh.........ps.Ghs..Vp............................. 1 492 1115 1484 +3214 PF03640 Lipoprotein_15 Secreted repeat of unknown function Yeats C anon Yeats C Repeat This family occurs as tandem repeats in a set of lipoproteins. The alignment contains a Y-X4-D motif. 21.10 21.10 23.90 21.30 18.80 21.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.49 0.72 -4.52 22 995 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 347 0 428 953 61 45.30 32 56.64 CHANGED hshlppcD...Gsh.hshcGhsLYpFs+Dpt.G..suss.hss...sWssht .................htptc.....Gp.h.hs.hpGhsLYpFs+D.s.t.sGthpus...s.sss...sW.sh..................... 1 119 258 343 +3215 PF04791 LMBR1 LMBR1-like membrane protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_6189 (release 7.5) Family Members of this family are integral membrane proteins that are around 500 residues in length. LMBR1 is not involved in preaxial polydactyly, as originally thought [1]. Vertebrate members of this family may play a role in limb development [3]. A member of this family has been shown to be a lipocalin membrane receptor [2] 26.60 26.60 27.00 27.10 26.30 26.50 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.54 0.70 -5.91 9 832 2009-01-15 18:05:59 2003-04-07 12:59:11 11 11 260 0 554 801 5 364.40 17 72.65 CHANGED lhhlLhhllalhshhlls+ap+hsct............................tplshhlsshsLuluhsulhLLPhslhu.............sphhh.hspsh...........htalssSlltuLWphlalhSsl.lallhPFshhFhESpsFssppKt..lpuplhEshshhhLhulllLsllhVhusl..........................h.................uhhshhphahshlhSssohhGllLLllhsshGlsch.sshhphhl+....................................spllpDspcph.p.shtpupls+clpp......t.....thtt...t...u.tt.hh..pt.hts.tph..t......................................................................................................................................................ssthphshshshshhsLLhhsshplLhVstshlplllshstlshhs.......ttsslthsohohhGhhGsslphllIhYLhhoohsGhaph.h....hphht.chccTshsphlhNsuhlLl.sSsLPlh.phlGlspaDhhssau...................slphlG.hhhlhlaplhFshloshsL ........................................................................................h.....hhhh.hh..hs....h....h..hlh.ha..tp...t.pt...................................hh.shhh.shhshhhshh.hlh.llPhslh........................................................................................................tt.......................................h.th....h..st.....t....h..h..h.......h.......W...........h.......h.......ahhs.hhhh.......hl..lPhh..ah...........-.......u.......ts.....s.s....t+...........lh...tt...l..h.t..s..h......hh......h..h..hh..h..h..l....hh..h.hh.h..hh.hh...............................................................................................................h...........h.t....h..h.sh..hh..u...h..h..shhGhhl.....hl....h.hhuhG.lst.l...Phshh..p..h..t.........................................................................................................................................p...........h..........................................t............h.............................................................................................................................................................................................................................................................................................................................................................................................h........h.h.h.hh......h...h....phhh..t.h...h..............................h...........................................h........h..h......h.h..h....h..ah........h.ssh.s.........h.t..h.....h..h...h.ttp.......hh.......h.p...sh...phh...th................................................................................................................................................................................................................................................................................................................................................................... 0 202 313 445 +3216 PF03923 Lipoprotein_16 Uncharacterized lipoprotein Bateman A anon COG3056 Family The function of this presumed lipoprotein is unknown. The family includes E. coli YajG Swiss:P36671. 20.90 20.90 21.10 22.90 20.50 20.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.58 0.71 -4.88 25 800 2012-10-01 20:48:06 2003-04-07 12:59:11 8 2 789 0 100 348 15 156.60 50 82.07 CHANGED Pphs...ssp.ssspshslulsotDpRssphluplscssp.lphlssopslchhlppsLppphsupGapl..sssussplplplpchhssVpcushpachsoclplplhupsspG.chsKpYsussshpGs.hoAssscIcpslNplLspllscIhsDsELssalp .....................Ppls.LPQQDPoLhGVTVSI.s.G.A.DQRsDQ..ALAKV..s.R..c..s..Q..l.Vs..LTASRDLRFLLQEVLEKQMTARGYMl......GsNGs...VsLQIlVspLYADVoQGslRY.N..IsT.KADIuIlATAtNG.sKh.oKNYRAoYsl..E..GA.FpASNcsIscAV.NoVLoDsIADMuQDTSlppFIK................................. 1 13 37 71 +3217 PF04200 Lipoprotein_17 Lipoprotein associated domain Bateman A anon Pfam-B_3382 (release 7.3) Domain This presumed domain is about 100 amino acids in length. It is found in lipoprotein of unknown function and is greatly expanded in Mycoplasma pulmonis. The domain is found in up to five copies in some proteins. This family also includes the Mycoplasma arthritidis MAA2 variable surface protein. MAA2 is implicated in in cytoadherence and virulence and has been shown to exhibit both size and phase variability [1]. 22.40 10.00 22.60 10.10 22.10 9.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -10.47 0.72 -3.84 272 508 2009-01-15 18:05:59 2003-04-07 12:59:11 7 17 40 5 287 476 0 92.90 16 31.67 CHANGED pls..pl....p..shphpssst...............tstlPS.plp.spsl.............................h.sshsapl..........phhp...........................ssDppGsL.plp..l.......ph.....phss.................pt...shp.lsGFps .............................................................t.........t.php..tt...........pshhsS..pls..ppslp.....................................ts.sss.shpl................phhp.....................ssDppG.sL.plp.h..................pl...............phss..s..........................pph......php..lsGFp......................................... 0 162 287 287 +3218 PF00921 Lipoprotein_2 Borrelia lipoprotein Bateman A anon Pfam-B_1509 (release 3.0) Family This family of lipoproteins is found in Borrelia spirochetes. The function of these proteins is uncertain. 21.20 21.20 21.80 21.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.48 0.71 -4.24 16 1748 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 30 4 24 1776 0 138.10 61 67.07 CHANGED lKpIV-sshKs..........tsss..sptsussspKssuKlhussusss..usus....susKAAAtluuVoGt-ILpuIspupct....tt.stsh-tsssssuhuhu.t....ptphstsuspKsAslAuGIALRuMAKsGKFAu.ts..ss..ssAVpuAuuoAVs...KlLusLphhIRKTVcusLcpl+EAlc .................................IKEIVE.AAGGS...............EKL.Ks.......sA..A....u..t.G......E.s.NKsAG.KLF.G.K.A.G..A..u..A...p.G.DSEA................ASKAAGAVSAVSGEQI.....LSAIVpAAsA..........A........-Q-Gc.KP.t.-AKN.PIAAAIGct....-sGA..-...Fs....p....-..t..M...KK....D..D...QIAA.AIALRGMAKDGKFAVK..ss.....EK.....tt..........................................................s............................. 1 1 1 1 +3219 PF00938 Lipoprotein_3 Lipoprotein; Lipoprotein Finn RD, Bateman A anon Pfam-B_1076 (release 3.0) Family This family of lipoproteins is Mycoplasma specific. 21.20 21.20 21.90 30.70 21.00 20.10 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.78 0.72 -4.21 16 55 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 4 0 21 46 0 81.00 39 33.93 CHANGED sushpsllppsospchshu+shcLopsK+NLIssLKcuYEssPcpTsplLLsAWKhoL.-tcILpcphs.s.+F.psFGss.scpslpP ...ushcsllppooupchshu+shchspsKKNlIsuLKcSYEsNPccTsclLLsAWK.oh.-sclh.pphs....F.ps........................ 1 19 19 19 +3220 PF01298 Lipoprotein_5 Transferrin binding protein-like solute binding protein Finn RD, Bateman A anon Pfam-B_893 (release 3.0) Family This family of proteins are distantly related to other families of solute binding proteins. 29.40 29.40 29.90 29.60 28.90 28.90 hmmbuild --amino -o /dev/null HMM SEED 570 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.99 0.70 -5.56 37 819 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 180 13 33 777 8 399.10 24 87.70 CHANGED GGSFcl-s.....................stsssspptpP+Yp...Dssopp....pctphschppPuhGhuh+lhtpNhh..............tcc.s.lsppDh..h...................................tphptl.pthp...psspp............................................ps.p..t...ttps.pYVhSGhhYhtshs..p.tpp......hhtG..G..alaYpGppsuppLPs.....sspspYKGsWcahTs.sct...spc....ht.hst..tttssschuAhS.tp.........ctp.tsstpschuhsSEasV-FusKplsGpL..htNtphp.t.ptpstcppphYs...l-AcltGNRFpGpshs.spcsppp.....aPFso-...............spLcGGFaGPpuEELuu+FLssDpclhsVhuAKppsc......................ttpptthpshhDAhp.....................shsshsp+plss......FGcustLllsuh.IsLhs.............................................tssshtsppththtpcphpV.sCCsNLsYlKaGh...................................lppcsspt....................s........................................hFLpGpRTs.....pcplP..ppGs.scYpGoWhGal.ts....Tuaussuspppsts..tA-......FsVsFusKploGpLpspsspp..ssFsI.susIcu..NGFpGoAposc....shslDspsops.phh.h.supVsGGFYGPsAsELGGhFsassst...................................sts.supusVVFGAK+Q .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhst....................................................p.ttht.pp.hss........hs.phstLllsG..lsLhs...........................................................................t..p..ph.p.h..ss...psp.hpY..lpa.Gh......................................................hp.tstt..........................t..............................................................................ha..lpGp..s.............tptl.P......tup.shYp.Gp.hhthh..tt......................h....sst......tt....s...tAc.............VsFusKplsG.......p.l..ps...t.st.t............shph...pu.tIpu........NuF.pGshpsss.................................sspVpGtFYG......spupEluG.h..t.............................................t.tt.thhsV..FuuK+p.................................... 0 10 15 29 +3221 PF01441 Lipoprotein_6 Lipoprotein Bateman A anon Prodom_1149 (release 99.1) Family Members of this family are lipoproteins that are probably involved in evasion of the host immune system by pathogens. 24.60 24.60 31.60 31.50 24.30 24.10 hmmbuild --amino -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.18 0.71 -4.45 42 763 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 58 21 5 741 0 168.60 61 91.18 CHANGED -.tSspsAcpsups.sLscISKKIpDosAFshuVKEVETLlpSIDELA.KAIGKKIpsss...sLsss...usp..NsoLlAGAYsIushIspKLssLpsspt.....LKpKlpcuKpsScuFssKLKspHu-LGtss...soD-sAKpAILKoNss.psKGAcELccLspuV-uLhKAAptAlssulKELT.uPVhA ..................s..sSsNsADESsKGPNLTEISKKITDSNAhVLAVKEVEsLLuSIDELA.KAIGKKIcpNs..uLss-..usp...NuSLLAGAYsISo.LITpKLssLcsppt.....LKcKIpcAKKCSEsFTsKLKssHAc.LGhps...sTD-sAKcAILKTsus..KDKGAcELccL.cSVEuLuKAAp-h.LsNSVKELT.sP.............. 0 1 1 1 +3222 PF01540 Lipoprotein_7 Adhesin lipoprotein Bashton M, Bateman A anon Pfam-B_615 (release 4.0) Family This family consists of the p50 and variable adherence-associated antigen (Vaa) adhesins from Mycoplasma hominis. M. hominis is a mycoplasma associated with human urogenital diseases, pneumonia, and septic arthritis [1]. An adhesin is a cell surface molecule that mediates adhesion to other cells or to the surrounding surface or substrate. The Vaa antigen is a 50-kDa surface lipoprotein that has four tandem repetitive DNA sequences encoding a periodic peptide structure, and is highly immunogenic in the human host [1]. p50 is also a 50-kDa lipoprotein, having three repeats A,B and C, that may be a tetramer of 191-kDa in its native environment [2]. 24.10 24.10 24.10 24.80 24.00 24.00 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.31 0.70 -5.38 3 40 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 3 0 2 41 1 200.70 50 99.44 CHANGED MKKSKKIFITLCGIAATAILPVATISCNDDKLAEKNGKEKADAALKQANALAEELKKNPDYSKILETLNKEIAEATKSFKEAGSYGDYPAIISKLSAAVENAKNEKKAIDDKNAQIA......................................................................................................................KELAERNAKIQSNIEELKKINNEAFELSKTVNKTIAEVEKKFKI-ccFKEQLENFADDLLDKSRQIDEFTTVTSTQEGFTLAELESFKEITTTWFNGMKSEWARVLDAWKNELTEIN...SIIKGVEELKKLSHEISEFSNSVKKTISELEKKFKIDDKTNK-EAKpFKNELENFADQLLNKSHEIDKFVTVTSARcDFSLSELESFKSFNTTWFNEMKuEWARVQEAWKDQLKEISTK ............................................................................................................................................................A-ENtKIppGhcELhKLScchpshucTIshTIsKLE.KKFpIDcsFKcQLhSTI-.LNKKSsElcTFsTVsohKc-FlLuELESFKEhNToWhp...cIhSEWtcVpcAWpcELsEIp.............................................................................................................................s............ 0 2 2 2 +3223 PF02030 Lipoprotein_8 Hypothetical lipoprotein (MG045 family) Mian N, Bateman A anon IPR000044 Family This family includes hypothetical lipoproteins, the amino terminal part of this protein is related to Pfam:PF01547, a family of solute binding proteins. This suggests this family also has a solute binding function. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 493 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.79 0.70 -6.00 3 61 2012-10-03 15:33:52 2003-04-07 12:59:11 10 4 56 0 17 843 30 438.40 27 76.45 CHANGED MKKQLKYhFhhhGlT....hSsILTACS.SopFVlANFESYlSP.LLLERAccK+P..LTFLTYPoNEKLINGFANNTYoVAVASoYAVSELtcpGLLpPIDWAKFNLKKosNuSsp..lpNtEDAK-LFTKpIt-ISpth........KDuKNsELLcWuVPYFLQDLVFVYRGEKIsELE..pcDVoWSDVIKAIV+...HKDRF......................NcNRLlhIDDARTIFSLANIVphE..sKNNolDVN.............................................PKEsslNYFsNVYESFupLGLK+sNLsolFVN.........SDSNIVINELAsGRRQGGIVYNGDAVYAALGGDLRDElsE...NplPsGDNFHIVQPKcSPVALD.FLIINpQQopFccAAHElIa-LAL-GAD..................QTKEpLlKTDEEpGTDD........ED.aYLYGAMQNFSYVNYVSPLKsIS....DEoTGIVuKcsppADhKp.hKQpS.o-Qpo.poEKEc..............sc.aDaYTcTLKuLLp...KsDShE...........LN-psKKLV-TIKKsYpIcKs-uIp.........................WsNLlEKPIoPLQRSNLoLSWLDFKp+a ..........................................h..........hh...h.h..u..hs............h...s...h..l...o.....u.s.....t......p......s.........c.......L....V..lu...Na..ssYlsP.plh...t.E...p.tpp.h..p.s...............plsYphassNEhl...h...s....tl....+....s...s...t.....YDltlsSsYhVs+LtppshlpKIsa....SK...h.s.....l..h..tpt...s.p..t...p..........s.pthpp.hs.cphhtl.tt.......................................ps.s..plL-YhlPYahpDL.lhsap..s..ppl..p.Lp..pppl.a..phhpth..p...p.pph.............................sp.s..K.hh.l-stpp.hsl....uphhp.t......psp.hphh..............................................ht..ph.h.hss.ppht.lt..........t.hpph..ss..hhhN..........ssSs.lls.ls.tph..ts.ul.hYsGDhhaAs......sG-.....p.E.hsp.............h.stp..s.h+.lc.pso..hhD.hhl...p.....s...tp......ptAYphlpplhh.tuhp............................p...............p.................t...h......s....h.NFsaltYsss................................................................................................................................................................................................................................................................................................................................................................................. 0 10 14 15 +3224 PF03305 Lipoprotein_X Mycoplasma MG185/MG260 protein Mifsud W anon Pfam-B_4433 (release 6.5) Family Most of the aligned regions in this family are found towards the middle of the member proteins. 20.60 20.60 27.40 27.40 20.50 18.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.72 0.70 -4.99 17 77 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 34 0 32 75 0 232.50 27 33.10 CHANGED LlDFuoRlAKSFsc.lps.p.sKKus-lQGVLGlDSssNsLaousFAuGsGsYsNFFapl....psupsDFsNFhNK.so.oYpNLpclaNcaKsLIspNGLalN+G.GoYoSNapKFHQLAFuloSTSGahauFAspsu.K.RLpFssps.......hpaPphT.....ppl..+sPspspp.tsh..tspstspsNLLGshslpssph.......pt.t.t.....................t....s.pspu......KsIplYKspIsssKpts.sAlL............Ipsp .........................LlDFusRltKSFsp..p..p.sppssshpsVLGlDssssslasssFAs..usGshssF..hhpl............p...s....s..pscass.Fhs+.so.uYpNLpclasphpphlpppula....lspu..GsYoSs.atphHQhuFuIuSTuGYhasFsspsu..K..plpFspss.......hpaspho........ppl..pssspspp.tsh...t.pp..pt.psshlhphshpsspt...............p.............................t.tp...t..t.ptpt..............ch...hct....h.pttp.t........................................................................................................ 0 20 30 30 +3225 PF00305 Lipoxygenase lipoxygenase; Lipoxygenase Finn RD anon Prosite Domain \N 19.40 19.40 20.70 20.00 19.00 19.30 hmmbuild -o /dev/null HMM SEED 667 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.13 0.70 -13.09 0.70 -6.54 16 1473 2009-09-11 06:47:26 2003-04-07 12:59:11 14 34 251 68 681 1491 13 407.70 28 73.57 CHANGED PusLhKYREEELcsLRG...-GTGchcta-RIYDYslYNDLGsPDpstp.sRPlLGGotpaPYPRRsRTGRtPTcpDPsoEpc.....s..hYlPRDEpFGHlKpsDFLshulKulsQsllPthpus...hp.hssEFcoFcDVcpLaEGGlpLP....hphlsph.Pl.hlc-lh+TDGpt...hL+assPpVlphs+ouWhTDEEFAREhLAGlNPslIcplpEFPsKSpLDsthYGspsSsITtEcLE.pLcG.hTV-EAlpsp+LFlLDaHDhhhPYlp+INs.sssKsYAoRTlLFLp-DGTL+PlAIEL.ShP+PsGc..u.sSpVahPu.-Gspu.lWLLAKAaVhVNDushHQLlSHWLsTHAlhEPFlIATNRpLSslHPIaKLLhPHaRsT.MNINuhARpsLlNusGIlEpohhsG+Yu.hEMSuslYKs.WsFs-QALPsDLlKRGlAlcDsouPaGlRLhIEDYPYAsDGLEIWsAIKoWVp-YVslYYtoD-slppDsELQAWWKElsEhGHGDtKDcPWWPKhQTp--LlcssTllIWsASALHAAVNFGQYsYuGal.NRPThoRphhPp.sTsE...a--lhps.pKsaL+TlssphQollslollElLSRHuoDElYLGpR-s.p.Wss-tcshtAFc+FGpKLp-IEccIstRNsD.sLp.NR.GssphPYTLLhPS .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tW.pD..FuhphlsGhNP.h.lp.....h.....h.....p.....................t..h..P..........................................s....t...l...............h....tt.......h..........t..............t..h........p......t.tpla..hh...D......a.....t.......h.........h.................h..............t.....h...............t.........t.........t..........................t......h......hhu..shsl.hh.....h.....p..................t......s......t.....L..hP.lAIp.l.....p.....P..............s..............................l..h.....h......P......s.............p.....s...................s........W.lA..Khas.ss-.thHp..hhsH..h...........lpTHh..h..Es..hhlAs....R..pL......s.hH.Pla+..L.L..PHhc.T.htIN..shARt.Ll......s..t.....t..........G.....h...h....-.........s.............h..................s.................u..h.p.h..th.h.h.tp...h..t..at....t....s..hPtsl.h.tRG...h.....................................l...sY.Yt.Dul.lWtsl.....pp..................alpthlt..h.aY...............s....s.....t......lttD..E..........LQsWh.p.-h........h...p...........G......h.........t...........h..............p.....t........t...s..h........p..h.po.........tpLhphhThhlahsou.HuAlNh......uQ.h.s.hhu.a.h..P..NhPs..p...P..t.............t.......t...........h.......................hts.hs..s..ts..h..h.....hh...hLu.p.............s.................l........Gp....h....t..................h.........p.......................t......httFttp.l..t.l..tt.l.thNt..t.......h..............................s.Y.hh............................................................................................................... 0 146 331 477 +3226 PF04778 LMP LMP repeated region Kerrison ND anon Pfam-B_2380 (release 7.6) Family This family consists of a repeated sequence element found in the LMP group of surface-located membrane proteins of Mycoplasma hominis. The the number of repeats in the protein affects the tendency of cells to spontaneously aggregate. Agglutination may be an important factor in colonisation. Non-agglutinating microorganisms might easily be distributed whereas aggregation might provide a better chance to avoid an antibody response since some of the epitopes may be buried [1]. 23.60 23.60 23.70 23.70 23.10 23.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.13 0.71 -4.47 6 52 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 10 0 11 48 0 142.60 60 48.35 CHANGED sANpLLspLsDsDscItcAKopL-pEIppANQAlsSNNsASMQSAKSSLDAKVsEITKKLETFNKDK-AKFpELEQTRKsI-EFIsss.KNNP....NYusLlppLTsK+DuKNSVTsSSNKSDIpuANsELKQALscApssKsQlDshsKSlKEQLsspIs ..............sANpL.spLoDpDspIQpAKo-L-pElpKAsQAltSNNTASMQSAKSSLDAKVsEITKKLETFNKDK-AKFpELcQTRpQIQEFINTN.KNNP.....NYS..ELISpLTSKRDSKNSVT-SSNKSDIEoANTELKQALsc.....ApscKsQsDNhsK....ShKEQLssols...... 0 11 11 11 +3227 PF01451 LMWPc Low molecular weight phosphotyrosine protein phosphatase Bateman A anon Prodom_2132 (release 99.1) Domain \N 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild --amino -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.85 0.71 -3.85 27 7884 2009-09-12 21:07:05 2003-04-07 12:59:11 16 44 4157 78 2043 5778 2186 133.10 26 83.15 CHANGED lLFVChGNhCRS.hAEulh+phh.t..hstth.plpSAGsp...supssc.pulplhccpGlslst+hu+plspp....hpphDlllsh.spsthtphsshhPt...hp................phplsDPh...tps....ssFcplhstIcptspphh .......................lLFVC..h.GNhCRSs.hAEulh+phh.........................................t............t.............p.............h....plp......SAGs..........t.....t...................u....p......s....scs.t......uh.p.s........h..p............c............t......G.............l................s............h.......s...s...........p..p..u.+...p...l...s..p..p............hp...p...h...DhllsM...sp.p...ph...t..p.......l....p.t...hhPs..............hp.ph...hhtp...................................................phslsD..Pa.............h.ts...........tsF.c.....p...shc.lpptspth................................................................................................. 0 663 1320 1746 +3228 PF03548 LolA Outer membrane lipoprotein carrier protein LolA TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.00 22.00 22.00 22.00 21.80 21.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.63 39 2649 2012-10-01 20:44:21 2003-04-07 12:59:11 10 3 2149 7 649 1876 1519 159.10 24 75.94 CHANGED splpohsusFsQpshssptt...pspGphhlpRPshhRWphssPpcphlluDGcslhha-splcQs...h....htpslspoPhhLLh..psptphtppasls......tthtshhLpP+.ttssshpphplshsppsh.lpphplhDp.Gpcoplthpshp.ssslssshFpFs ..........................................................................t.thpohpusFs.Qp...s.........p........s....s..........s....h............t................................p.upGp...hhl.....p......R.....P....s........h........hpWch...s.p..P........t...p..........ph...l.l.uDGcpl..h..h...a...s....s.......p.....l.......c.....Qs.....shp.........hpp..s....h...s.....s.....o....Ph.......h...llh............tsp.ssh...p...p.a..........s...lp......................................................p......s.........t....t....p.......h...hL..s..P.....+....s......s....s..s.s......hp......p...h...pls...h........s........p......s.......s.....h...lpph...............p......hh-...p..p.s.p.p...o.s.hp..h.pshp..s.s.sl.ssshFpa.......................................................................... 0 183 390 523 +3229 PF03550 LolB Outer membrane lipoprotein LolB TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.00 20.00 21.20 21.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.87 0.71 -4.64 24 1327 2012-10-01 20:44:21 2003-04-07 12:59:11 9 2 1272 3 243 817 151 154.60 36 75.40 CHANGED plppaphpGpluhh.....usppphuupF.Wppp.sppacLtLossLGpTtlplps..psssspLpsscGphhsussuctLlp.clhGhslPlspLt.Wl...........pGhPs...ssschplDsptpltplpp....psWplsY.pY.....pppsts.LPpplpLp.....pss........hpl+LhlspW ..........................p.lspapscGthAhh...........usp.pps.ApF.h.WQQs..tcpacLh..Lo.s.P.L.GsTtlpLss...psusspLsss.cGppYsAs-AEch.lt.cloGh..slP..lssL+pWl...........................hGl.Pu....sssch..pLD...s.p..h......+.L.spls................psWpVsYtsY......................sspsp.ss.hPpplcLs.....s..s.s........tcIKLhhcpW................................................ 1 45 119 188 +3230 PF04728 LPP Lipoprotein leucine-zipper Kerrison ND, Coggill P anon DOMO:DM04880; Domain This is leucine-zipper is found in the enterobacterial outer membrane lipoprotein LPP. It is likely that this domain oligomerises and is involved in protein-protein interactions. As such it is a bundle of alpha-helical coiled-coils, which are known to play key roles in mediating specific protein-protein interactions for in molecular recognition and the assembly of multi-protein complexes. 28.40 28.40 28.40 28.40 27.70 28.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.87 0.72 -4.17 19 1023 2009-09-10 20:56:13 2003-04-07 12:59:11 8 1 668 20 75 206 2 55.30 64 68.34 CHANGED SNAKIDQLSSDVQTLNAKVDQLSNDVNAhRoDVQAAKDDAARANQRLDNps.psY+K ................sA.KlDQLSS-VQTLNAKVspLosDVsAhR.......u.......sl.......p.......AAK-.......-AARANpRLDN.u.ppYpK..... 0 7 21 47 +3231 PF02169 LPP20 LPP20 lipoprotein Mian N, Bateman A anon IPR002217 Family This family contains the LPP20 lipoprotein, which is a non-essential class of lipoprotein [1]. 22.10 22.10 22.10 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.99 0.72 -3.89 81 734 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 501 0 158 508 331 101.10 22 44.89 CHANGED ss........shssspph...lhAh..............+uuphsAh+pLup..plhshplsupss...................spshhhpsspl..pstVsuhl.........+...Gscllps.h.......ssspah..s.lplc ...................................................s.spth.....hhAhG..u..................+AAph-AhcpLAp....plhGhplsupss...................lpsthhp..s..p..pl....pspVsu.h.I............+.........suc.llcp.hh.......spsh...ah.splcL...................................... 0 55 103 136 +3232 PF04348 LppC LppC putative lipoprotein Mifsud W anon COG3107 Family This family includes several bacterial outer membrane antigens, whose molecular function is unknown. 28.70 28.70 28.70 28.70 28.60 28.60 hmmbuild -o /dev/null HMM SEED 536 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.71 0.70 -5.95 6 1623 2012-10-02 13:57:41 2003-04-07 12:59:11 8 4 1023 1 224 1425 378 366.20 30 91.36 CHANGED pscsNAou-hYlp+ssQoQs.p-ppsa+LhAA+lhlpENclsQApALLtcLs..sLsspQplctuLlcAcluAs+ppspsA.pQLph....sLspLSsoQpsRYacstAplhEsctDslsAsKtRIphDphLosspc+ppNtD+hWuLLpshssuVlps..ssstssssLuGWLsLsphhss.hspPsQLppolpsWpst.PpHsAAphhPstLpsLhshpQsshoplALLLPLoGshthlupsI+sGFssA+......spsssslplFD.Tssp..ol-slhsQAppsGIchVVGPLlKpNV-hLhspsQ..hQslslLALNtosNs.cu.hupLCYaGLSPEDEAcuAAs+hWsDGhRpPlVlsPQN-lGcRsssAFs.RWQphuGoc.Aslcaas.PuDlshplp..........................sspppssDuVYllAssspLspIKshLss.....stshtlYAoS+ussu..NssP-pct.LsGlpFSDIPahhsssssphp..cls.+hspuchuhhRLYAMGsDAWhLhsphsEL+tV.PGasIDGLTGpLShsssCNVER-hoWhpapsG ................................................................................................................................................................................................................................................................................................................................................................................................................................h..........................................................hh.....h..............................................t..a....h..t..............................h...........t..s.........................................................................................................................................p....hulhLP....s...G......h..t..........up....hlppGh..uAh.......................s...p..........s......s......t..l..p....l..h......D...o..sut......sh.s..p..l...ht..Q...s.p.tp.s.s...shVlGPL...+...s....p...V....p..t..L.h..t............p..tt.............................................slss.L.A...LN........t..s.......p.....s......s..........t...........h.............h....uL....u.s..pp..-...s...t..h...A.ph..h...h....t.p..t..h.p..........sh.hh.h.s.p.st...up.R.h..tsF..t.....att..s....tt........................h.....t...................p...h...t..........lt........................................................................h...s..h.hah.h.s.t...t.p.h...lhs.lt............hhss....Sp...........t.t.t........hpsl........hs.p..h.Phh..h.......t..........................................h....................................................R.h.....A.h.GhDuh...l................h.t.......h...........t..............t..th..G.oG.Lth...t...t..t....l.Rt..h............................................................................................................................................................................................................................................... 0 40 100 165 +3233 PF02684 LpxB Lipid-A-disaccharide synthetase Bashton M, Bateman A anon COG0763 Family This is a family of lipid-A-disaccharide synthetases, EC:2.4.2.128. These enzymes catalyse the reaction: UDP-2,3-bis(3-hydroxytetradecanoyl) glucosamine + 2,3-bis(3-hydroxytetradecanoyl)-beta-D-glucosaminyl 1-phosphate <=> UDP + 2,3-bis(3-hydroxytetradecanoyl)-D-glucosaminyl-1,6 -beta-D-2,3-bis(3-hydroxytetradecanoyl)-beta-D-glucosaminyl 1-phosphate. These enzymes catalyse the fist disaccharide step in the synthesis of lipid-A-disaccharide. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.20 0.70 -5.83 8 2446 2012-10-03 16:42:30 2003-04-07 12:59:11 10 6 2354 0 585 1902 2953 358.50 35 93.79 CHANGED IalSAGEsSGDhlGupLlpsL+pcY.....sh+FhGluG.pMpt.pGhcsLhshcElulhGhhElLs+Lh+Lh+lhccll+phlpppsDslIsIDuPsFNlpLtK+LRKpGh+h.IIHYVuPSVWAW+spR..spplt+hsDhLLAILPFEpsaacKhs.LcspYlGHPLsDpIphpss.cspt+-hl.lspsc.hlslhPGSRcuEIp+h.hslhspAtplppphsslphlVslssscact.h.phhtt.shplshlhhsstshcuhhsuDhALhpSGTssLEsuLstoPhVVsYRl+PloaaLAKhLlKlpY..lSLsNIlhsctlhPEhIQtcschphhuhtthhhLtsspp.tcpp+stpcchpphhphtpsptccphsphh .........................................................................................................................lhllAGEsSGDlL.Gu.sLl+AL+....t......ch.P................s.sc....FhGluGs.p..MpA...p........Gh...cuh.a.chc-LuVMGl.....l.....EVL.....t+L.cll+h..tp.clhc.ph.h.p.....pP.....DlhlsIDuPDF.Nlpltt.pL.....K.........p.....p.....G.l......c....................s......l.....+YVS.PSVWAW.R..p..p.R...............lhcIt+ssDh.lLulLPFEtsaY.c....+.......h.......s...l.s..........spalGHs.h.s..D..............t...............h..............s......h............p.........s........c.......c...........t.............s...s...+.............ph...L....ul..............s..............t............s..........t.........t.....h..lA.lLPGS.Rt.uE.lc.......hLhssFlp.....s.....u....p....hL.......p......p.......p.......h.....Ps....l.c.....h.l.l...P..h...s...s....s.....pp...c....p....h.......p.p.....h.....h........t.......p.......h.......s...s...............p.....l........s...............l...h.........l..........l.......c.........s........p..........s......p.....p......s........h.t........Au..D...suLluSGTAsL.EshLh+sPMVVuY+..h..p..s..h.o..a..h.................l...u..+..+...l..l...K.....h..p.a................lo...L.PNll.....u.....s.....c.......p....l......Vs....ELlQ.-csp....sppLut..tlh...lLt.s.s.....t....thht.h................................................................................................................................. 1 184 368 488 +3234 PF03331 LpxC UDP-3-O-acyl N-acetylglycosamine deacetylase Mifsud W anon Pfam-B_3666 (release 6.5) Family The enzymes in this family catalyse the second step in the biosynthetic pathway for lipid A. 20.60 20.60 21.00 20.90 19.30 19.30 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.68 0.70 -5.45 10 2676 2012-10-03 01:04:38 2003-04-07 12:59:11 8 8 2359 35 672 1850 2808 253.70 43 87.84 CHANGED hc..Q+TLK+.VphoGVGLHoGcpspLTLcPAtsNTGIlFhRoDLss..shlPAchss..Vt-TthSTsLupc.sspIuTVEHLhAALtuhuIDNlhIclsusEIPIhDGSAtsFlhLIppAGIpEp..puscchh+IpcsVpVpcpDpaltshPssshclsaTIDFsassls+.sQthohs..hsp-uFtcpIApARTFGFhp-IEaLpspGLstGGSL-NAlVlD-.s+lLNtsGLRFtDE.VRHKlLDLIGDLhLlGpsllucahuaKoGHuLNspLl+pllsspc ....................p.Q+TLpp.lphsGlGLHoGcc.V.plTL...cP..As....s...s...oGlla.p.R.s..D..L......s............s....s.........sphsAs..sps..V..t.....-.....T.h..h..sT.sL.......ss....p....s.....s..........+luTlEHLhuALsuhGI.DNhllE.l.s.u.........s.........ElPIM.DGSAtsFlhh.ltpAGlpE.............sss.......K.........+.....al...+.I..pc..s..V..c...V........c..-....G.....-....K..a..s..c...hp...P.....h...s...........u.....apls..asID....Fs.c..P...s.Isp..Qp.hshs......hs...s.....p......s.FhcpIucARTFG......F..h.+......-...............l.....E......hLpu....tGLshGG.....S.h.-NAIV..........lD-.....cl......L.N......p..-G.............LRa.....c.D...EF.VRHK.hLDAIGDLhhh.....G.t..s.....l.lGtapuaKuGHsLNspLlctllsp............................. 0 209 429 564 +3235 PF02606 LpxK Tetraacyldisaccharide-1-P 4'-kinase Bashton M, Bateman A anon COG1663 Family This family consists of tetraacyldisaccharide-1-P 4'-kinase also known as Lipid-A 4'-kinase or Lipid A biosynthesis protein LpxK, EC:2.7.1.130. This enzyme catalyses the reaction: ATP + 2,3-bis(3-hydroxytetradecanoyl)-D -glucosaminyl-(beta-D-1,6)-2,3-bis(3-hydroxytetradecanoyl)-D-glucosam inyl beta-phosphate <=> ADP + 2,3,2',3'-tetrakis(3-hydroxytetradecanoyl)-D- glucosaminyl-1,6-beta-D-glucosamine 1,4'-bisphosphate. This enzyme is involved in the synthesis of lipid A portion of the bacterial lipopolysaccharide layer (LPS) [1]. The family contains a P-loop motif at the N terminus. 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.22 0.70 -5.48 173 2466 2012-10-05 12:31:08 2003-04-07 12:59:11 9 11 2274 0 558 1911 2496 293.20 33 91.66 CHANGED hthLLhP....lohLYuhlst..hRptha.....t.shh.pshch.slP.VlsVGNlosGGsGKTPhllhLsctLppp..Gh+sullSRGYGucsp...s..shhV.ssss............ssspsGDEPlLlAp..pss..ssVhVutc.Rspusptllptts...s-......................lIlhDDGhQHhtLt........RDl-llVlDu.....tRshGNGhlLPAGPLREPhs..pLpcsDh.l..lh.......sssps..ttt.......h...............................................thpLt..P......................h.....t...sspt.s...l.psh.h.........hAhAGIGpPp+FFsoLcp.hGhplhtspsFsDHasastp-lptltpt..p.........ll.hTEKDAVKht................................p.......hhhlslcspl.....ssshtphlhptl....p ..........................h..hlLhPlShLYuhlsth+phha.............t.hh....psh.......ch..slP....V.lsVGNlosGGsGKTPhllhLsctL.ppp........G....h...+.sull.SR..GYGu+sp................s....shll.ssps........................sstpsG...DEPlLlhp.........+ss.....ssVsVuss...Rspu.s....ctllttts.....sp............................lIlhDDGhQHh.t.L.t........R.Dl-IlllDu........hR.h.hGN.s....hhLPuGPhREshs..pLcp.s.Dh.llh.............sus.s..p..t.s..t...h..............................................................hpLh.st.h..................................................hth.t....ssptps.........h.tt.h.ph...........lAhAGIGpPtRFFso.Lct....h....G.......h.phh.............t.......s.....hsasDHpsastt-..lptlhp...t...........ll.hTEKDAVKhp.s......hst.t.................p............hahLslcspl.......ssph..thl.....t................................................................... 0 177 353 466 +3236 PF03788 LrgA LrgA family Bateman A anon COG1380 Family This family is uncharacterised. It contains the protein LrgA that has been hypothesised to export murein hydrolases [1]. 28.00 28.00 28.30 28.60 27.80 27.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.10 0.72 -4.29 148 3245 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 2309 0 485 1676 121 95.60 31 72.62 CHANGED h.hlGchlsphht..lPlPGsllGhlLLhhhLh.ts.hlp..............chlctsushLLpphsLhFVPuuVGlhsahsl.ltpphh.lllslllSTllslhsouhlhp ......................hhhGphlsphhs..lPlPGSllGhllLalLLt..hp.ll.h.............caVcsuu.....shLlp.hsLhFVPsuV.ulhpahsl.lp.tphh.lllslllSTlllhlssuhss.t................... 0 117 247 369 +3237 PF04172 LrgB LrgB-like family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The two products of the lrgAB operon are potential membrane proteins, and LrgA and LrgB are both thought to control of murein hydrolase activity and penicillin tolerance [1]. 25.00 25.00 26.90 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.33 0.70 -5.19 36 3458 2009-01-15 18:05:59 2003-04-07 12:59:11 11 6 2403 0 586 1883 148 210.90 35 88.87 CHANGED Tlhsahluphlac+h+t.hh.sPlLluhllllslLhhhs.........IsYcsYhtuuphlshlLuPA.TVALAlPLYcphchl++aahsIhsulllGulsuhloshhlA+hhuhsptlhhSlhPKSlTTPIAhtlopplGGlsslTAlhVllTGllGullGshll+hhpl+cs.lA+GluhGsuuHAlGTA+AhEhGpppGAhuuLuhslsGllosllsPllhtll ..............................................................................Tlhsahhuphlhp+h.p.....h.....hL....sPLll.uhllllsh.Lhlhs............................IsYpsYh.p...G.........up.hl.......shLLtPA.sVALAlPLYcph.c.hl+cp.a.hs....Ilsul.hlG.o.l.lu.hhosh...hlA.thhGh..s....p.pl.hhSlhP+SlTTPIAhslup.........p...........lGG.......h.s..sloAlhVlhsGllGul.l.G.thlLch...h.+I..........c................s....s......hA......+........GluhGsuuHulGTA+uh.Ehu..p.-GuhuSLuhslsG..llos..ll..sPhlh.l.h................................... 0 145 310 457 +3238 PF01462 LRRNT Leucine rich repeat N-terminal domain Bateman A anon SMART Family Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the N-terminus of tandem leucine rich repeats. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.58 0.72 -4.39 26 4081 2009-01-15 18:05:59 2003-04-07 12:59:11 13 511 131 66 1403 3750 2 29.00 41 7.50 CHANGED tCP.htCpCs......spsVpCsstsLp..plPh.lP ............CP.s..tCsCs..........................sssV.pCss+...sLs...slPssIP...... 0 249 354 721 +3239 PF01816 LRV Leucine rich repeat variant Bateman A anon PSI-BLAST Repeat The function of this repeat is unknown. It has an unusual structure of two helices. One is an alpha helix, the other is the much rarer 3-10 helix. 21.30 21.30 21.30 21.30 20.60 21.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.84 0.73 -7.10 0.73 -3.41 56 206 2012-10-11 20:00:59 2003-04-07 12:59:11 12 15 101 4 120 200 32 25.00 40 14.00 CHANGED hpVRttVAp+..hPsctLttLsp..Ds- ........tVRtsVApp.thPsphLtpLup..Dss.. 0 37 87 114 +3240 PF05083 LST1 LST-1 protein Moxon SJ anon Pfam-B_6166 (release 7.7) Family B144/LST1 is a gene encoded in the human major histocompatibility complex that produces multiple forms of alternatively spliced mRNA and encodes peptides fewer than 100 amino acids in length. B144/LST1 is strongly expressed in dendritic cells. Transfection of B144/LST1 into a variety of cells induces morphologic changes including the production of long, thin filopodia [1]. 22.30 22.30 22.40 24.60 22.00 22.20 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.81 0.72 -3.98 3 46 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 14 0 6 49 0 61.30 43 73.44 CHANGED LGGLLLLsVlLLoI...................CL.......CuhSpRVKRLERutpsuG.cQEPHYASLQQLPVSSSDITDM........KEDLSTDYACIARSTPT ...........................LGGLLLLhVllL.h...................CL.......ChhppRh+tLEpsh.........t...QE.hht.L..LPs.sp.-h.th..........pED.psDYACIs.s................................ 0 1 1 1 +3241 PF00677 Lum_binding Lumazine binding domain Bateman A anon Pfam-B_291 (release 2.1) Domain This domain binds to derivatives of lumazine in some proteins. Some proteins have lost the residues involved in binding lumazine. 21.30 21.30 21.30 21.80 20.40 21.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.58 0.72 -4.09 20 8363 2012-10-03 00:38:56 2003-04-07 12:59:11 12 15 3923 28 2044 5873 3972 84.80 32 79.71 CHANGED oGhlcusupItpIpcp......ssshphslphstptLpchhls.soIulsGlSLTVscls....ssthplslh.cTlp.pTshsphKlGscVNlEhs ................................oGhl-shGplh...p.l...p.p.p...................ss...s....h....p....h..pl........p..s....s...........................t....................l..t......c.......h..h..ls..uSIAlsGlsLT.V.....s.....c.....l.s.....................ss....p......Fs..l..s..l..h..scTlp..pTsL......u...p......h.....p.s.G...s.p.VNlEts.............. 0 668 1327 1740 +3242 PF00894 Luteo_coat Luteovirus coat protein Bateman A anon Pfam-B_123 (release 3.0) Family \N 20.30 20.30 22.60 27.60 19.90 17.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.71 0.71 -4.40 13 1368 2012-10-04 01:49:40 2003-04-07 12:59:11 13 2 47 0 0 1080 0 131.00 58 53.22 CHANGED ETFVFSKDsLsGNSsGoIsFGPSLS-CPAFSsGILKAYHEYKITplhLpFlSEASSTouGSIAYELDPH.CKhSoLuShINcFoIoKuGpKoFouctINGtpW+DSoEDQFtILYKGNGsSS.lAGSFRITI+VthQNPK ..EsFlFShDsLpuNSoGsIpFGPSLSpCPAhSsGILKAYHEYKITulplpFhScASuTouGuIuhELDsp.CK.SuLuShINpFsIoKsupKsFsAphINGpEap-o.........otDQFhlLYKGNGsoo.hAGpFhIphps.h.s................ 0 0 0 0 +3243 PF02122 Peptidase_S39 Luteo_ORF2; Peptidase S39 Mian N, Bateman A anon IPR000382 Family This family contains polyprotein processing endopeptidases from RNA viruses. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.03 0.71 -5.01 15 486 2012-10-02 13:45:52 2003-04-07 12:59:11 10 4 34 1 0 526 2 164.30 35 39.72 CHANGED VcGapoaslPpsPPKsuVlplp+ssGoHsGYAoC...lpLasGpsuLlTutHslsc........sttstSh+sG..sKIPL..s-F+slhcssptDlslLpGP.PNWEuhLuCKusphsTsspLu+usAohashccst.WhussAc....IsGs....p..sp.assVLSNTcsGaSGoPYFsGK.sllGVHpGt...sscscNaNh.MuPIPslsGLTSPsalaEoTuPpGcl .........................pGhh.aphspp.scasV.thh.PsDp.ht.Aos.....s.h.ssEcAhhsu.ts.sp.........s.t..s.c.ph.+ss....phPL...sE.p.h...L.....s..s..hhs.ll.phs.sphtshlttKhh+.t.spp.....p..pushp..hh.....cp.....h..psuc.....s.ts..p..sh..ssVh..S.p..s.pts.hussPhash..p.htsHpG....uppsppap..huPlssh.....Pp....ss..p.................................... 1 0 0 0 +3244 PF04662 Luteo_PO Luteovirus P0 protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_4444 (release 7.5) & Pfam-B_3579 (release 10.0) Family This family of proteins may be involved in suppression of PTGS a plant defence mechanism [1]. 26.60 26.60 27.00 45.30 24.60 26.50 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.09 0.70 -4.87 8 137 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 22 0 0 137 0 208.60 35 86.25 CHANGED llsp.sRpLhsp-RshhsutFLhphshhlsh..............t....cspthlRSlLatLPLLL.....sG-hhassuthph.h.....LsRaultsGhsPsso.........psslcLplPuocsshRhhLtRssoouLuE+lpRap-sLt.pGhcpFp+FLssahpshERpLscsshcsshssphhl-LssLGstLtchVhspplhppuhhoclAhthp+laGEssuls ........................lhlppsRhLhhcpcshlhuhhLlsItphlph...........h..t.st.p.l-hhlRSLLatLPLLl........Gct..lasssthph.h.....hscFuLhsGhtPsso.........sttlcLclPsTc.suhRphLtRssuSshuE+hpRhsEshh.puhEtFpRhLusahcptERplhpustc..hGscphl.LusLGphLtchVsspphpppshhuclAhthp+laGEstuh................. 0 0 0 0 +3245 PF01659 Luteo_Vpg Luteovirus putative VPg genome linked protein Bateman A anon Pfam-B_970 (release 4.1) Family This family consists of several putative genome linked proteins. The genomic RNA of luteoviruses are linked to virally encoded genome proteins (VPg). Open reading frame 4 is thought to encode the VPg in Soybean dwarf luteovirus [1]. Luteoviruses have isometric capsids that contain a positive stand ssRNA genome, they have no DNA stage during their replication. 25.00 25.00 26.80 49.70 24.10 18.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.09 0.72 -4.34 8 687 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 39 0 0 510 0 103.60 47 67.26 CHANGED l.h.E-QAphRHSaSQRTsS+sTPpEVSsSGRlYQsApHSpMEYSRPTMsIRSQsShaSSSsRPLP..sVPSLhshT.sApspP.ssp.hsStStpsspcsSphp .......hh-ppAshRaSaSQpTsS+sT..tpSsSuRlYpsAQ+SphEYS+PTMsIRSQVShaS.StpP.P.pps.SLhshT.pAssp.asspLIsSsSt+hs.psSp..t...... 0 0 0 0 +3246 PF04443 LuxE Acyl-protein synthetase, LuxE Kerrison ND anon DOMO:DM04138; Family LuxE is an acyl-protein synthetase found in bioluminescent bacteria. LuxE catalyses the formation of an acyl-protein thioester from a fatty acid and a protein.\ This is the second step in the bioluminescent fatty acid reduction system, which converts tetradecanoic acid to the aldehyde substrate of the luciferase-catalysed bioluminescence reaction [1] A conserved cysteine found at position 364 in Photobacterium phosphoreum LuxE (Swiss:Q52100) is thought to be acylated during the transfer of the acyl group from the synthetase subunit to the reductase. The carboxyl terminal of the synthetase is though to act as a flexible arm to transfer acyl groups between the sites of activation and reduction [2]. This family also includes Vibrio cholerae RBFN protein (Swiss:Q06961), which is involved in the biosynthesis of the O-antigen component 3-deoxy-L-glycero-tetronic acid. 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.34 0.70 -5.77 7 387 2012-10-02 15:58:18 2003-04-07 12:59:11 7 5 327 0 124 413 321 321.20 24 83.12 CHANGED Ms....lcppplhuSoElDDlIF.upP.phoh-EQcplpp+llhpua.aHYppN-pYRpaCpsptVs..-sl...ps..IsDIPVFPTSlFK.....a..t+LhTss--cIEsWaTSSGTSGlKSplAR...DRlSIERLLGSVsaGMcYVGsaa-HQhEL.lNlGPD.....RFNusNIWFK....YVMSLVELL...YPTs..FTV....spDc....IDF.EpThtpLhpIppptKslCLIGsPaFlYLLsp..a..hK-psIp.FpuGpchaIIT.GGGWKppps-u.....LcRt-FNphLh-sFpL.s.sQIRDTF...NQsELNTCFFED...php++....HVPPW..VaARALDPcTLcPlsc.Gp.GLhSYMDASuTSYPuF....IV.TDDIG.Ilcc..pps-..asGspVEIlRRlpTRttKGCulSMspshp ...................................................................................h....................................t....htthhhp.htap...appst.Y.+paspt....t.hs.....tpl.....ps...lp-...I..PhlP...lph...FK........t...tpl.h.oh.s......p.p..p.l.p.t.h.h.p.SS.G.T.....o..G......h...t...S..p...h.hh......D.p......s.ht.t.....t..s..hth..hh..p......h...h..t.....p..t....h.t.h..ls...h..t..P.s..........p.t..s.s..h.h.h.p......h.h.h....hs.....hh.......hs..sp.....ahl.......ptp.t..........h.c.h..pthhttl...p..tt..pspslhlhG.sh.hh..ah.h.h....h...hccp..shp...h.pl...s...t.s.shlhc.sGGaKthppc......ls+pphpphltpshGl.....pphhsha...s.sE..h...s....hhcs...........sttth............p..s..P..sa....l..h..hR.....D.s.p.s.h...p.s.hs.....G..c.h.........Gllphhshhsh.s.suh....ll.T.-DlG.hltt....ss.....s.h........FcllGRh.ptu-h+GCu..........h.................................................................................................... 1 47 90 108 +3247 PF02664 LuxS S-Ribosylhomocysteinase (LuxS) Bashton M, Bateman A, Adamkewicz J anon COG1854 Family This family consists of the LuxS protein involved in autoinducer AI2 synthesis and its hypothetical relatives. S-ribosylhomocysteinase (LuxS) catalyses the cleavage of the thioether bond in S-ribosylhomocysteine (SRH) to produce homocysteine and 4,5-dihydroxy-2,3-pentanedione (DPD), the precursor of type II bacterial quorum sensing molecule. 20.20 20.20 21.00 21.00 19.60 18.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.67 5 2731 2012-10-02 15:41:56 2003-04-07 12:59:11 10 2 2619 23 302 1250 26 152.40 46 95.62 CHANGED Ppl-SFslDHTKlsAPhVR.lAc+KsTscGDlITsFDlRFCtPNKE.lMcscuIHTLEHLhAshlRNHtN.hssh-IIDISPMGCRTGFYLollGcPspp-llDllcsoMpDVLclp-..pIPuuN-KQCGsYppHoLEsA+clARsaLs+..tI...KsEpLcls ..........................l-SFplDHT+hpAPhVR.lApphpsP.+G..DsIosFDlRFstPN+-.hhsptGlHTLEHLhAshhRsHls.u...clIDlSPMGCRTGFYhslh.Gp...s..sppclscshcsuhcD.lL.p..s..p...s...clPthN..hQCGsYp.HSLptApph.A+plLpp...tl...ps-.....s............. 1 74 166 239 +3248 PF00206 Lyase_1 lyase_1; Lyase Finn RD anon Prosite Domain \N 20.60 20.60 20.60 21.20 20.50 20.40 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.71 0.70 -5.24 18 16439 2009-01-15 18:05:59 2003-04-07 12:59:11 15 45 5117 204 4142 12313 9223 297.30 26 66.01 CHANGED GchssssDthhuhhopcsh.sh+lst.s.p.......uhshhp+AtApssh.h....pctssuIhpuh-clhc.schscpFslsshppsssTssphplsElIu.......t.........csss+VppupSoNDshsTuh+lhhtpslpp.LlstLppLhpslsc+AcpatDllhhGRTHLQcApPlTlGpchpuasttlpc-hpRlppsh.chhhtshsGGsusGTulNsc..ch..hlsccLu...hou....sssNph-Alus+DtllEhhuslshlsspLtKh.ApDlplhuSG..sGhsElshstsp...GSSlMPtKhNPsssEh...lpthuspVhG .............................................................................................................ht...ssthhth.o..t..ps..h.....p..h.....h.....lt......ph.............s.hhh..t.p.u..huth.sh..............................tp.sp.tlh...p.u.h..c.p...l..h..p.....h.p......h..t.........t....t....h....l...p.h.h.p..p.s.......s...t....p.....s..s...p...h...p......l.s.E.h..lu........................................................p.s..sc.clphup.So.........NDh.....h..s.Tuhp...lhhtpsl.chlhst....l.p.p.L.h.psltppAp....pa......p..c..hl.....hhGRT..H.h....Q..c...A.pPhThGpchtuas..t.l.p+...s.hp....+.lppsh....p.h.............h.h....t.h..ht.G....u.s...u......s.........G.....T......u........h.............s.......h........s.....................t.....................t...........h.s..s..c....pLu...................h..s.s.....................s.t.s.........h..h.........p...s..l....s....s+.D.h.h......s..c.....h....h..u.....s.lshlu....s..pLp+lupDlhh.hsos...huhs.El.s......h.st.s.p...G.SSl.MP.t.......K.hNPs.hsEh.....lpthshplhG.............................................. 0 1294 2586 3471 +3249 PF02278 Lyase_8 Polysaccharide lyase family 8, super-sandwich domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_4840 (release 5.2) Family This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen. 20.00 20.00 23.30 20.40 19.40 18.90 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.84 0.70 -5.03 5 1067 2012-10-02 23:57:29 2003-04-07 12:59:11 13 57 784 47 126 780 15 260.60 32 28.72 CHANGED scP..stL+tFNsMDRlsaaNt-huFuFGLSMsSKRTt+YEuhNcENhRGWYTGDGMhYLYN..oDssHYoDtFWPTV-hYKlPGTTspDs+psDs.Tc-.............tpsss.oscauGo.KlDDpaAuuuMDFcNhscTLTA+KSWFhLcDKIVFLGoGIpsTsssssssTTIDsRK.tsssuYssYsscKphohscup.....-spSVFLESa-s++NIGYaFhcsusIolp+csQTGsWp-IN.sSpocccVos-ahTloQcHusc..sssYGYhllPGlsRssFs ............................s.....p.hpha.spMD+hshhp...t.....s..auhuLShhSpR..ltsYE......t.h..N.s.E.Nh+GWaTusGMhY.LYs.....sD..h.s.c...Y.....p-....s..a....W..s..T...sD.h+lPGTTs....h....s...s...p...hs..cs....sss........................................t.t..hps...p...s..huGu....ph....s.s.......p..h.us.h...u.....M.......c..a......s...............s............................s...........p..............o................L....s..A.....+......KSWFhLsDcI.l.h.LGosI...p.........s..s.s.......s........p...s..s...............t.To.l.-.p....R+................s.ssh.pl..h.s..s.s....t.p.h..s...s..p..pp...............................psp...l.h..h....p....s......t..s...s..p...p..s..l.G...Y.a..F.h.p..p.....s.....s.......l.sh.p.pppp.......pGpWp..-I..N.....p..u...p....s....s....c......p....h......pspahplt.sHsps.......sspYuYhllPshspt........................................................................................................ 2 69 106 120 +3250 PF02884 Lyase_8_C Polysaccharide lyase family 8, C-terminal beta-sandwich domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_4840 (release 5.2) Domain This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen. 20.20 20.20 20.20 20.20 19.90 19.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.06 0.72 -3.96 29 875 2009-01-15 18:05:59 2003-04-07 12:59:11 12 50 698 45 82 585 8 76.70 25 8.46 CHANGED lplLpNssphQuVpcsp........slhussFa.ssu......psst...lssspsuslhl+cpsss.hplslu-Pspsssshp...............plh .........................hpllcNscslQuVccsc........slhuhsha..ssust........pssp.....l.pl.p....ptu..hhhl.+......cp..ssp..hcluhh.sPppppss.s...hh........................... 0 46 68 79 +3251 PF00062 Lys lys; C-type lysozyme/alpha-lactalbumin family Eddy SR anon Overington and HMM_iterative_training Domain Alpha-lactalbumin is the regulatory subunit of lactose synthase, changing the substrate specificity of galactosyltransferase from N-acetylglucosamine to glucose. C-type lysozymes are secreted bacteriolytic enzymes that cleave the peptidoglycan of bacterial cell walls. Structure is a multi-domain, mixed alpha and beta fold, containing four conserved disulfide bonds. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.91 0.71 -4.14 21 1035 2012-10-03 00:09:25 2003-04-07 12:59:11 15 9 315 768 356 1346 9 110.30 37 77.66 CHANGED KhasRCELu+pLKt.GhDGYpGlSLusWlChspaESuYsTpAhspN..ssSTDYGIFQINs+aWCscucTPtupNhCsIsCspLL-DDITcslpCAK+Il.DspGlssWlAW+shCp.cDLspa.tsC ...................................................................hhpRCELActLp.t...shs.att.pLssW.l.Cls.aESuasTpshs.......tss...sG.S.s-Y...GlFQINs+aWCp...s...sp..h.......s.......t.............shCpl.sCs...t.L...Lsc-lscsltC.A+p.Ih.p.....t.......p.......G.......h.......puW..........sWpt.hCp...pplsph.ttC.................................................................. 0 50 75 177 +3252 PF01810 LysE LysE type translocator Bashton M, Bateman A anon Pfam-B_1537 (release 4.2) & Pfam-B_7916 (Release 8.0) Family This family consists of various hypothetical proteins and an l-lysine exporter LysE Swiss:P94633 from Corynebacterium glutamicum which is proposed to be the first of a novel family of translocators [1]. LysE exports l-lysine from the cell into the surrounding medium and is predicted to span the membrane six times [1]. The physiological function of the exporter is to excrete excess l-Lysine as a result of natural flux imbalances or peptide hydrolysis; and also after artificial deregulation of l-Lysine biosynthesis as used by the biotechnology. industry for the production of l-lysine [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.14 0.71 -5.02 19 13176 2012-10-03 02:02:08 2003-04-07 12:59:11 13 5 2968 0 3080 9500 5031 188.60 20 91.83 CHANGED lshhhsPGPsshhlhppulp+uhttulhsshGshluDhlhhhlshhGluhllt.sshhhsllthhGusaLhahGhtshRsshpsps.pt..........................tssp.........thhpsahpGlhloLsNPpslLaahulhushls....tht.hthhhhhsuhhlusl.lWhhhluhhsuhhthhhpt.th...hlshlsuslh...................hhFulhLlhpu ...............................h..hhlsPGPss.h.hlhpp....u.l......p.....p.G.....h...p.tuh....hs....shG.h.sh.G.....h...l...h...h...h.hu...s...h...G...l..us...l..lt...t...s.s.h......l.h.p...l...l..ph....s.G...u....s..YL....l....a..lG....h...p.h....h.....+....u........s.......h...p.....t.....p...t...t...t.t..................................................................................t.tst...................shhp..h..a.hp..uh...hss...l.h.N.....PK.s..h...l.a.h...lu.l...h...s..p..h..lss.....................t..s..h...h....t...h....h....h......h.....s..h...s......h.....h.....h.......s.....s....h......h....a....h.....h......h......h....u.......h....h....u...s...h....h......t....t....h....h...p....p....t....p.hh......phl.s...t.l..h.G.s.lh...................h..h.huhhlh...t........................................................................ 0 649 1498 2340 +3253 PF03641 Lysine_decarbox Possible lysine decarboxylase Bateman A anon Pfam-B_741 (release 7.0) Family The members of this family share a highly conserved motif PGGXGTXXE that is probably functionally important. This family includes proteins annotated as lysine decarboxylases, although the evidence for this is not clear. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.77 0.71 -4.34 32 5042 2012-10-01 21:16:48 2003-04-07 12:59:11 9 29 3532 56 1524 3863 1410 131.70 28 49.24 CHANGED Msslscush..cuGup........slGlhsphlhs.pchs........................sptls-l..lhhpshapRKthhschu-AFlshPGGhGThEElhEllThhQlGhc...p+.PllLhs.su...aacsLhpalc.phlppGhlsssstp.hhhhsssscplhptl ...........................................................Mtusscush.......cu.u.u.p...........slGl..h....shlls...p.chs.......................................................ss...h..l...s...c..l.......lh..h..p..s..h..p.p..RKt.hhs........c........h........u........c.u.h.....lsh.P.........GG.hGT.......l-El.hEhloh..hQh.shp.....................pp...P..l...l.Lhs.st..........aaps.L.h.pa..l...p..thh..t.p..u...h...lstpshp..hhhls-ssp-lhp.................................................. 0 441 972 1290 +3254 PF02402 Lysis_col Lysis protein Mian N, Bateman A anon Pfam-B_1555 (release 5.4) Family These small bacterial proteins are required for colicin release and partial cell lysis. This family contains lysis proteins for several different forms of colicin. Swiss:Q02112 has been included in this family, the similarity is not highly significant, however it is also a short protein, that is involved in secretion of other proteins (Bateman A pers. obs.). This family includes a signal peptide motif and a lipid attachment site. 25.00 25.00 57.10 57.00 22.10 21.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.32 0.72 -4.38 9 52 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 28 0 2 37 0 45.10 67 96.50 CHANGED MK..Klhhhhlllls.hhLuACQANYIRDVQGGTVAPSSSucLTGlulQ ..MK..KlhhlhlLllu..hhlLuACQANYIRDVQGGTVuPSSSuELTGlusQ. 0 0 0 2 +3255 PF04971 Lysis_S Lysis protein S Finn RD anon Pfam-B_7633 (release 7.0) Family The lysis S protein is a cytotoxic protein forming holes in membranes causing cell lysis. The action of Lysis S is independent of the proportion of acidic phospholipids in the membrane [1]. 20.70 20.70 20.80 21.00 20.60 20.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.17 0.72 -4.29 7 1200 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 353 0 16 366 0 66.40 84 94.47 CHANGED M-KITTGVuYsTSAsusGYWFLQhLD+VSPSQWAAIGVLGSLlFGLLTYLTNLYFKI+EDRRKAARGE ....................................MEKITTG...VSY.sTSAsGTGYWhLQLLDKVS.PSQWuAIGVLGSLlFGLLTYLTNLYFKI+ED+RKAARGE.................. 1 2 5 9 +3256 PF01186 Lysyl_oxidase Lysyl oxidase Finn RD, Bateman A anon Prosite Family \N 20.30 20.30 20.30 21.50 19.50 17.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.49 0.71 -4.70 4 515 2009-01-15 18:05:59 2003-04-07 12:59:11 12 16 109 0 231 428 15 188.90 54 36.67 CHANGED PDLV.DsthVQtosYlpctsLa.LpCAAEENCLASSAhRssshsYspRhLLRFspRl+N.GpADFhPptsRaSWpWHpCHpHYHSMD.FoHYDLLsssus.KVAEGHKASFCLEDTpCs.GhhKRYACssa.pQGlSsGCaDTYptDIDCQWIDITDV+PGNYILpVpVNPsa.VsESDFoNNlhRCsh+YsG++laspsC+Iusu ...................................................................PDLV.cs.hlQto....sYlpchsha.LpCAt.EENCLu...u.....SAh..p....s....p...........h.s.....Y..s....hRhLLRFsppl+NtGpuD.F...h...P..p.tsR..asW.WHpC..H..p...HYH.SM-.FoHYDLLshssp.....KVAEGHKASFCLEDT.p.....C...........c.....Gh...p+..........R.............Y.........s..........C.s.s.....a..............p...Q.G.......losGCaDTYptDIDCQWIDITDVp.P.G.s.Y.ILp...............VslN..P....papVsESDaoNNsh+Cph...+YsGp....h..h..h..h.sCphs............................... 0 40 63 139 +3257 PF02401 LYTB LytB protein Mian N, Bateman A anon Pfam-B_1515 (release 5.4) Family The mevalonate-independent 2-C-methyl-D-erythritol 4-phosphate (MEP) pathway for isoprenoid biosynthesis is essential in many eubacteria, plants, and the malaria parasite. The LytB gene is involved in the trunk line of the MEP pathway. 18.70 18.70 19.10 19.20 18.50 18.50 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.86 0.70 -5.49 13 3664 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 3349 24 911 2662 2891 277.30 42 82.28 CHANGED IlLApPRGFCuGVcRAIphsEpA.Lppttu.PlYlpppIVHNppVVspL.RpcGshFlEsl........p-lP..........cGshVIaoAHGVuspVcptAccRsLpllDATCPhVTKVHptspphu+cGaclILIG+csHsEVhGshG.ss.........................spshlVpshc-ltpLshps.p+luhloQTTloh--st-IlstLccRascltt.........spssIChATpsRQcAl+sLutcsD.lllVVGupNSSNSsRLhElApcpGs.suaLl-ss-.................-lpt-Whpssp.slGlTAGASTP-hllpsVlp+Lpph ..............................................llLAsPRGFCAGV-RAIp....l.......V..Ep.....A........L.........c.........h.....a.............s.............s....PlYV++ElVHN+a.....VV-sL.c.p.+.G.sl..F..l...-..cl..........s-lP.......................................cs.s..h.VIFSAHGVo.sVppcAc.p......R....s.L..p..l..h..DATCPLV.......T....KVHhcl....t....+............h......sc...........c......G...........h....clllIGH.t.GHPEVcGThGphs........................................sth.h...L....V.......-.......s...s.....-.....D.......l.........t.........p.........L.......p.........l....p....s.........p..c..lshloQTTLSlD-Ttc.llctL.+p..+..a.P.p.lhs...............................Ph.pc-ICYATpNR.QcAV.......+.p..l....A.....p..p.......s......D.....lllVVG.upNS.SNSsRLhElA.pc................h.......Gt....su.aLl-ssp.................................-l....p....p..Wl.....p.....s.....s...p......sl..GlTA.....GASsP-hLVppVlsplpt.h............................... 1 319 613 776 +3258 PF04397 LytTR LytTr DNA-binding domain Yeats C anon [1] Domain This domain is found in a variety of bacterial transcriptional regulators. The domain binds to a specific DNA sequence pattern (see [1]). 21.50 21.50 21.50 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.83 0.72 -3.92 114 8937 2009-09-11 13:50:36 2003-04-07 12:59:11 10 26 2937 3 1808 6179 571 96.10 22 42.29 CHANGED sphhhlshp-Ihal..ps.pschlplhst.....spphhh.ptoLpplcpcls..stFhRsHRShlVNlppIpclp.......t..ph..plhhps.....spplslSRphhcpl+phlt ............................................p..hhhlshp-Ih.ah........ps...p.....s...c..hl...h....l...hop............sp....p....h....h.h....p..h.....s.L.p.....p.....l.c...p.....p.....L....s........pp.......F.h.RsH+Sa.lVNhp.plpplc..........................ph...pl....h..lps.............sppl..l..SRphhcpl+phh.t........................... 0 766 1373 1637 +3259 PF02370 M M protein repeat Bateman A anon Pfam-B_208 (release 5.2) Repeat This short repeat is found in multiple copies in bacterial M proteins. The M proteins bind to IgA and are closely associated with virulence. The M protein has been postulated to be a major group A Streptococcal (GAS) virulence factor because of its contribution to the bacterial resistance to opsonophagocytosis [1]. 20.80 15.60 20.80 15.60 20.70 15.50 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -7.31 0.75 -7.27 0.75 -2.86 59 653 2009-09-11 13:49:06 2003-04-07 12:59:11 11 24 24 1 3 651 0 21.10 53 14.87 CHANGED ptK+plEuchpcLppcpptlc ..EAKKclEA-htcLpp-pQh.c.............. 0 0 0 0 +3260 PF03855 M-factor M-factor Finn RD anon DOMO:DM08003; Family The M-factor is a pheromone produce upon nitrogen starvation. The production of M-factor is increased by the pheromone signal. The protein undergoes post-translational modification, to remove the C-terminal signal peptide, the carboxy-terminal cysteine residue is carboxy-methylated and S-alkylated, with a farnesyl residue [1]. 25.00 25.00 28.40 83.80 22.30 16.60 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.19 0.72 -3.97 2 3 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1 0 3 3 0 42.30 72 100.00 CHANGED MDShAssspSSSlVNAhNp.Ps-slp.pslKNYTPKVPYMCVIA MDSMANoVpSSSVVNAGNK.PoETLN.KTVKNYTPKVPYMCVIA 0 3 3 3 +3261 PF05034 MAAL_N MAAL; Methylaspartate ammonia-lyase N-terminus Bateman A, Moxon SJ anon COG3799 Family Methylaspartate ammonia-lyase EC:4.3.1.2 catalyses the second step of fermentation of glutamate. It is a homodimer. This family represents the N-terminal region of Methylaspartate ammonia-lyase. This domain is structurally related to Pfam:PF03952 [2]. This domain is associated with the catalytic domain Pfam:PF07476. 21.00 21.00 21.40 36.90 20.00 18.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.79 0.71 -4.46 4 186 2012-10-02 11:54:41 2003-04-07 12:59:11 8 4 173 12 42 142 2 155.20 62 38.70 CHANGED M+IccVLhTPGhuuFaFDDQtAI+sGAspDGFsYsGsPsTsGFpplRpsGEulSVtLVLpDGpVshGDCAAVQYSGAGGRDPLFhApcahsllpctltPtLlGRDlssahssAthh-ch.lstptLHTAlRYGlSQALLcAsAtsptsThsEVlsDEash ..MKIKpslhTsGhouFYFDDQpAIKsGAsHDGFhY.sGcPVTtGFsuVRQAGEslSVtLILEsGuVAhGDCuAVQYSGAGGRDPLFLA-cFIPhlpccI+PhLlGRDlcuFhssAchFDcL...+l.........D...Gp.hLHTAlRYGlSQALLDAsAhAs.s+hhsEVVsDEap................ 0 16 27 36 +3262 PF03281 Mab-21 Mab-21 protein Mifsud W anon Pfam-B_4530 (release 6.5) Family This family contains Mab-21 and Mab-21 like proteins. In C. elegans these proteins are required for several aspects of embryonic development [2-3]. 20.10 20.10 21.30 20.50 19.10 19.80 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.92 0.70 -5.28 33 746 2012-10-02 22:47:23 2003-04-07 12:59:11 9 9 110 0 456 647 1 241.40 20 57.39 CHANGED uhh-slcltssscach.........hh.lpt.shhp...................hppptphsshshl+l.............................hps.phhpcals........................tsshlssp+lhstFt.sllppu...............................lsphphps............h.phttpssslpltlppt.........thslDllsulch.shWPp.u.pa.hh.............phWPs.thhpplts..pGhaLls+.........................................tshsshpWRlSFopuEptLhpph............sss++cClpllKtlpcphh...........hsslsoYHLKTllhapC-+hPpt..pWpp.sslupplhsllccLhpCL....ps+plPHYFlP.....phNLhpsh..spstlpphspplhclhp ......................................................................................................................................................................................................................................................................................................................................phh................................................................................................t.hhsst...phhphh..ph..lt.s..................................................h.p..t.t.................t...ts....hpl.ht.............h.hplhPshph...........t................................................sh.Llst.......................................t..t.t.pss.tWhluFstsEptllp.h............sus+.hpsLpl....h..+s..l.tcpth.............................thtsls.sYHL.+ollh..ahspch...st....p....Wpp......ptlupph.tll.tLhpsL....................ppcp..hspaahs..................phshhpth....tth..hh........t............................................... 0 90 123 236 +3263 PF01823 MACPF MAC/Perforin domain SMART anon SMART Domain The membrane-attack complex (MAC) of the complement system forms transmembrane channels. These channels disrupt the phospholipid bilayer of target cells, leading to cell lysis and death. A number of proteins participate in the assembly of the MAC. Freshly activated C5b binds to C6 to form a C5b-6 complex, then to C7 forming the C5b-7 complex. The C5b-7 complex binds to C8, which is composed of three chains (alpha, beta, and gamma), thus forming the C5b-8 complex. C5b-8 subsequently binds to C9 and acts as a catalyst in the polymerisation of C9. Active MAC has a subunit composition of C5b-C6-C7-C8-C9{n}. Perforin is a protein found in cytolytic T-cell and killer cells. In the presence of calcium, perforin polymerises into transmembrane tubules and is capable of lysing, non-specifically, a variety of target cells. There are a number of regions of similarity in the sequences of complement components C6, C7, C8-alpha, C8-beta, C9 and perforin. The X-ray crystal structure of a MACPF domain reveals that it shares a common fold with bacterial cholesterol dependent cytolysins (Pfam:PF01289) such as perfringolysin O. Three key pieces of evidence suggests that MACPF domains and CDCs are homologous: Functional similarity (pore formation), conservation of three glycine residues at a hinge in both families and conservation of a complex core fold [1]. 20.70 20.70 20.80 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.30 0.70 -4.64 69 1344 2012-10-01 20:08:01 2003-04-07 12:59:11 14 63 273 9 771 1199 5 197.40 16 29.28 CHANGED hshsssspa.pph........phpppppphhhtpspsphhphph...ts.ph........LsppF.....hptlppLPsp.as.sp.............................ahphhcpaGTHhlspsphGGphthhhph.......spsplpptphpttc............hssthth.h..................thphptstpspppppppp..s......ppthppthshlhGGss................tsspsappWtpolpp.pP.ssIphp.....................................lhPl....ppLlpp...............tppslppAl...ppY .........................................................................................t.......th.pt..........t.tppp.p..h....h....h...th...h...p....hh.p.hph........p.pph..................LstpF.....hptlpp.L....sp.p...h.s.st...........................................h..phl.ppYGTHal.ss.ss.h..........G.......Gphphhhhh........................sppp...hp.p.pthptt.p...................hpht.hth.h..............................ht.p.hstp....tt....t..p......tt....t......................................pt....p.h....hh...GGp.................................thppW...hpolt........t..tP...hlp..h.p....................................................h.Plhpll.....................ht..hptAh........................................................................................... 1 245 380 547 +3264 PF03523 Macscav_rec Macrophage scavenger receptor Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 45.90 44.70 23.80 18.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.30 0.72 -4.21 4 55 2009-09-11 06:31:35 2003-04-07 12:59:11 8 6 27 0 17 49 0 48.90 75 13.96 CHANGED MEcRIQ.ISDocANLlDoE+FQNFShTTDQRhNDVLLQLNSLlsolQtH ....MEKRIQaISDhEANLIDoEHFQNFSMsTDQRFNDlLLQLSTLhSSVQGH.. 0 1 1 2 +3265 PF03817 MadL Malonate transporter MadL subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 27.90 27.70 24.60 23.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.67 0.71 -4.27 4 182 2009-09-11 09:12:09 2003-04-07 12:59:11 8 1 176 0 51 151 39 121.60 57 91.51 CHANGED MIIYGVALLulCpLsGlhlGshLGshLGVKANVGGVGIAMlLLIhspphLtK+GtLsphop.GVtFWuAMYIPIVVAMAApQNVluALsGG.lALLAuluAllVshhhIslls+.u..puph.sAlE .MIIYGsALLAlCpLAGlhlGDlLGshlGVKuNVGGVGIAMlLLIhh+haLp++Gh.hspcoEtGVuFWuAMYIPlVVAMAApQNVVuALpGGPlAlLAuluuVslCsssIslLuRhu..+tps.....t................ 0 9 20 35 +3266 PF03818 MadM Malonate/sodium symporter MadM subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 59.60 59.60 20.50 19.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.77 0.72 -4.40 4 185 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 183 0 51 137 41 59.80 57 23.68 CHANGED a-.lsssLspNGLITuFAllGlhMaVSahLSthLT+G+lHGSAIAIhlGLVLAYVGGssT ......-hlpKsls+NGLlouFAlVGllMalSshLS++LThGRVHGSAIAIlIGLlLAYhGGshT... 0 9 20 35 +3267 PF02545 Maf Maf-like protein Mian N, Bateman A anon COGs Domain Maf is a putative inhibitor of septum formation [1] in eukaryotes, bacteria, and archaea. 19.40 19.40 19.60 19.70 19.10 19.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.13 0.71 -5.19 12 5338 2012-10-01 20:37:09 2003-04-07 12:59:11 9 24 3690 6 1449 4011 3017 188.60 33 91.91 CHANGED h.llLASsSPtRpplLcpht..lshpshsuph-Ep.....tp..PtthshcLAppKAhu.luphtss...........sllluADTllhls.GclhsKPpst-cAhphL+phuGps...tpVhTulslhpptp.hpsh.....s.TcVpFpp.lscccIpsYlpos-slcpAGuaslpGhGuhhlcplcG.sh.sVhGLPl.tlhphLpphsh.........hts.h ..............................................................h.pllLASsSPtRp..pL.L.p.p.l......G.........l.s...........F.p.h....h.........s.........s........c.......l........D.Es................h.s...-.......t.........................P......p........p...h...........V...h.....+.....LApt.K.A.p..u..........l.s..p.h..hss......................................................tlllGuD.ol........l.s..............l.................s.....G..........c.......l....lG.KP......pstc...cAtp.....hLp......t......h.......S.G..pp...........apl..hT..u..l....s..l...h.....s...s....p.....p..t....htph......................shocVpF+p....Lo...-p...-...I...p...s....Ylt..s..s...-..P..h..c.pA..Guasl..p.G.h.G.s.t.h.l.cc.l.p..........G..shpsllGLPLhphh.ph.Lpp.s........t.................................................. 2 469 903 1204 +3268 PF02792 Mago_nashi Mago nashi protein Bateman A anon Bateman A Family This family was originally identified in Drosophila and called mago nashi, it is a strict maternal effect, grandchildless-like, gene [3]. The human homologue has been shown to interact with an RNA binding protein Swiss:Q9Y5S9 [1]. An RNAi knockout of the C. elegans homologue causes masculinization of the germ line (Mog phenotype) hermaphrodites, suggesting it is involved in hermaphrodite germ-line sex determination [2]. Mago nashi has been found to be part of the exon-exon junction complex that binds 20 nucleotides upstream of exon-exon junctions [4]. 20.10 20.10 22.60 22.10 18.60 18.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.64 0.71 -4.50 3 383 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 275 17 226 307 1 132.20 69 83.18 CHANGED DFYVRYYVGHKGKFGHEFLEFEFRsDGpLRYANNSNYKNDTlIRKEsFVSEuVLKElKRIV-DSEIlKEDD-NWPEPDKlGKQELEIlhsNEHISFsTuKIGSLADVQNScDPEGLRlFYYLVQDLKCLVFSLISLHFKIKPI ...........................................................pFYLRYY...lGH+.............GKFG.HEFL...EFEFR.....s..DG.....+...LRYAN..N....SNYKNDshIR...KEs..aV.ppuVhcE.lKRII.c-....SE........I..hK.......EDDshWP.PD+..l.GRQELEIllGsE......HISFTTo.KIGSLlDVspScDPEGLRlFYYLVQ.DLKCLVFSLIuLHFKIKPI....................... 0 80 120 175 +3269 PF03082 MAGSP Male accessory gland secretory protein Bateman A anon Pfam-B_256 (release 6.4) Family The accessory gland of male insects is a genital tissue that secretes many components of the ejaculatory fluid, some of which affect the female's receptivity to courtship and her rate of oviposition. This protein is expressed exclusively in the male accessory glands of adult Drosophila melanogaster. The proteins are transferred to the female fly during copulation and are rapidly altered in the female genital tract [1]. 25.00 25.00 115.50 47.80 21.00 20.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.82 0.70 -5.00 2 35 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 7 0 1 123 0 235.40 77 99.83 CHANGED MNQILLCS.ILLLhFTVAsCDuEpQLDSuhcL.....KSAsLKNVAPKNstTQAcIsKDDVALKsuKKGDYlMDI-lSDhPLDDYPINpSKShKsSSh..ssI.....LsD....chNQGSNQ.ALKALppRLlhEQNNsLhLRNHSl.LMpEIEARKTDIIpsRQLNlDLELELpolpR+L.EhN..lQNsRKSTKsCKKRsSKD.APP.sNQhQEs.V+NTYRNKYLTLLppLuQKIN.EIApV...ATDVPTtpsPSpGN.sTh ...................LhFsVANCDGEHQLDSSl.D..L..K.S.DppKSAVLKNVAsKNDATQAEIAKDsVALKSGKKGDYVMDI-.V.SDMPLDDYPINNSKSRKNSSTLPSP.I.....LTD....KLNQGSNQIALKALKHRLVMEQNNNLFLRNHSVSLMNEIEARKTDIIQARQLNIDLELELEALKRKLSEMN..VQNARKSTKSC.KKRPSKDIAPP.sNQLQEVIVKNTYRNKYLTLLTQLAQKINYEIANVNNPATDVPTGKSPSEGNPSTT................... 0 1 1 1 +3270 PF04112 Mak10 Mak10 subunit, NatC N(alpha)-terminal acetyltransferase Wood V, Finn RD anon Pfam-B_9176 (release 7.3); Family NatC N(alpha)-terminal acetyltransferases contains Mak10p, Mak31p and Mak3p subunits. All three subunits are associated with each other to form the active complex [1]. 20.70 20.70 20.80 22.90 20.30 20.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -5.03 18 303 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 263 0 218 301 3 149.50 30 21.95 CHANGED +sGcLV+-spFsLF-uhuAlEIh-sKMDoGhlt.s.pt....tlcpuhssu.........pslshtclluIhDpLhss.huWhpGa.oLuQTVhoClYlcpl......t...t.............................ssplhcpVLpuaslulhphhshltshlpsutlaEE..EDhsspshshshhsphs.htchhshL ........................GcLl+st.h.FsLFEuhuAlE........lMDPKMDuGhlsst.p.......th-p.shcss....................csLs.tElluIhDplhsp.hsWhpGa.sLuQTlaTslYl..cp..............................................................sp.l.p.sL+....uashulh+hsshspphlspsthaE..E..E.DF.s.shshp..hhsphs...h....h........................................................... 0 77 120 182 +3271 PF04874 Mak16 Mak16 protein C-terminal region Mifsud W anon Pfam-B_4960 (release 7.6) Family The precise function of this eukaryotic protein family is unknown. The yeast orthologues have been implicated in cell cycle progression and biogenesis of 60S ribosomal subunits. The Schistosoma mansoni Mak16 has been shown to target protein transport to the nucleolus [1]. 20.30 20.30 20.30 21.30 20.00 19.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.49 0.72 -3.93 9 337 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 299 0 235 325 3 98.70 42 33.08 CHANGED lspKlc+REppREcKALlAA+L-+sIE+ELl-RLKpGsYGD..aNhsppsacKhL-tcc.cppsEtE.-....p-Ep--uchEaVuDp..-..t...-lpDhED .....................h.spKlcRREppREcKALsAA+lEpuIEKELLERL+p.Gs.YG...Dh..........h.....Nh.....spphacKsL........c..............p.......p.-...........................c............p............p...........s.........c......p.......-....t........-...............c..........t......-.......-........-....-.........E..c..-....................s..ch.....Ea..V..p.D...t-.ptp..-hp.D.hE............................................................. 1 82 130 195 +3272 PF01274 Malate_synthase Malate synthase Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family \N 20.00 20.00 20.00 20.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.84 0.70 -6.50 16 2722 2012-10-10 15:06:27 2003-04-07 12:59:11 17 11 2102 17 690 2600 3654 522.80 38 94.48 CHANGED hslhsshs.ptscILTpcALsFlApL.pRcFsspp+pLLppRpchQtclDpGp.hs..........cFL.PETsalR.-ssWps.us.......................................................................................................................................lsssLtDR+VEITGPsD.RKMllNALNouAhsaMADFEDSsuP...........oWpNllpGQlNLpDAlcspIsapst.tsKpYpLs.......cphuhLhVRPRGWHLsE+Hlh.lDG.....-slsGu...lhDF...GLaFFHNt+ph.......lupGtGPYFYLPKMEuahEA+lWN-lFshApDhlGlPRGTI+ATVLIETlsAuapM-EIlYpLR-HuuGLNCGRWDYlFShIKshpsp.sshlLPDRstVTMssP....FMpAYschLlcTCH+RusaAhGGMAApIPI+cD.tANstAh-pV+tDKlREspsGaDGoWVAHPuLsslshpsFpphh.spPNQhphh+pc...plo....tsDLLs.thPsu.......phTpcGlcsNlplGlpYltAWL.pGlGC..VPlhp...LMEDAATAEISRsQlWQWl+HGVshc-....Gcplohphhtpll.cEphstlt.t.u....tsscap.Atphh.t.sh.uscasDFLTh.uYscls .............................................................................................................h..hht..s..tptl.s.sphhphlspl.hpchsPcppp.L..LstRtphQtplDthp..hs...................sFL..pE..hs.lh..-.......ssh..hh..ts....................................................................................................................................................l....h-Lp.cp......s...ch..sG..s..s.-.t.........t.hlhs...s....l.sts.shshht.DhED....SlAs....................sW.slhp.Gplshp.....suhsGph....hp....s......s.s+.Yp.ht...............tshulLhlRshGaHhscpplh...hcG.........ptlsuu...lhDh......uLhh.h..Hshpsh.................u+..u.uusYhYlPK.........h........cu.pEAthWs-l......FshsE......chlGls..........p.GT.IKsslLlEpl.Ashphc......EllatlR-+lstlNs.GhhDhhhs..I+ohhpt.ss.hl...Rps...shhs.pP....alpAYpch....lpo..............................hh..huGhu.uh.h......ht..ps....................s...sphhs..chhtsKhtcspsGt......sssWVsp.PshAsh.ththapph......spssQhpl........tppp.....h.hs..........hcp.....LLs.....s..u...............pchp.ptlcsNlpshl.........tYl.tWl.pGs.GC..VP.hs...........LM.EDtAThcISppplhpWl+H.t...........h.s...hp..s...............h..........sohphht....phl.spp.t...................................spsta..p.h...........usph.s.lsh.uhp.l................................................................................................................................. 1 193 413 579 +3273 PF02330 MAM33 Mitochondrial glycoprotein Mian N, Bateman A anon Pfam-B_17905 (release 5.2) Domain This mitochondrial matrix protein family contains members of the MAM33 family which bind to the globular 'heads' of C1Q. It is thought to be involved in mitochondrial oxidative phosphorylation and in nucleus-mitochondrion interactions [1]. 20.90 20.90 21.40 21.50 20.50 19.70 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.39 0.71 -4.56 7 515 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 299 14 330 513 1 177.80 22 68.50 CHANGED LssEIc.E+c.th........pcsLs.hsh..shc.pssssEscLh+Ks.uuEcIpVsFNlspsls.sass-p-.spht...-pp.p.sssssh.s.ssppD.s.h.shsl-hphPtsp.lsacspA.....psDh..Fslc-h.htsou-s........EW..c...as.shcsLD.sLhD.thhcaLt-RGlDsshuc.LVphusshE+pEYIsaLEclKpFl .........................................................................................................................................................Eh.......t.............................h...t............th.....p.sttth.Lh+ph.....tsEplpl.h...p.....hs...p..................................t...t.....t........t.......t...................................t..........................................................h...........h....s..l.sh..p..c.s..s..ps....lph.ps.sh............................ss........htIcpls..h.h.tp.sp..................t-h.......tt.h.....Yh...GPsassLD-pLpc.tlhcaLc.cRGlssphusal.cahphKEppEYlpWLc....slcpFl................................................. 0 97 176 265 +3274 PF02157 Man-6-P_recep Mannose-6-phosphate receptor Mian N, Bateman A anon IPR000296 Family This family includes both Cation-dependent and cation independent mannose-6-phosphate receptors. 20.40 20.40 20.50 20.40 19.50 19.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.98 0.70 -5.39 2 195 2012-10-02 14:19:21 2003-04-07 12:59:11 10 3 147 24 129 261 1 170.90 29 61.70 CHANGED M.PhpusWRTtLLLLLLhuVAVRESWQhEEKoCDLVGEKsKEScpElALLcRLpPLFNKSFESTVGQusDhYSYlFRVCREAuNHSSGAGLVQINKSNsKETVVGRhNETpIFNGSNWIMLIYKGGDEYDNHCG+EQRRAVVMISCNRHTLAsNFNPVSEERGKVQDCFYLFEMDSSLACSPElSHLSVGSILLVhhASLVAVYIIGGFLYQRLVVGAKGMEQFPHLAFWQDLGNLVADGCDFVCRSKPRNVPAAYRGVGDDQLGEESEERDDHLLPM ......................................................................................................................................................................................................................................................................................................................................................+p...t...h...h...Cs....p.p.......t........h...hs..p....p.p.ChY..hFEh.c......S........thAC.........s...s.......t..t....s.....p.....l..u.....s..G....u..l...h.h.llh.hhh.ls..VYl.l....GGhhYpRhVhtt+GhcQhPphuhW.tslhshh..t..D.hh.h..h...................................................................................... 0 39 63 100 +3275 PF01232 Mannitol_dh Mannitol_dh_N; Mannitol dehydrogenase Rossmann domain Bateman A anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.80 21.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.86 0.71 -4.27 16 4952 2012-10-10 17:06:42 2003-04-07 12:59:11 18 14 2308 4 770 3058 403 147.30 28 33.49 CHANGED .phlHhGsGNhtRu....alspLlsp.ssh-hshs-ls.pssst......LptQcphhslltp.......sscpsclluslssh.shpts..psllcthsc..sclVoholst.............sshPpIttslspst............Lp+RpstshsPhslluC-Nh ......................+hlHhGhGshtRua.s.ah..sc.L.Ls.......p.....s....s............h..............c..huhscV.s...hssst.........tL...p.t.Q....c...p.lh.s..h.lht..s.........tps...p...pscll...u.u..Vpt..s....l............s...s......t....p....-........t....t....ll....p.th.sp.stsclVohslTEtG.......ht...ttt..h.....s.s.t....P.h..Istsl.tps.......................................hlhth.lp.+R.p.....t.s.....s.h..p.s.hslluC-Nh.......................................... 2 193 431 596 +3276 PF01050 MannoseP_isomer Mannose-6-phosphate isomerase Finn RD, Bateman A anon Pfam-B_899 (release 3.0) Family All of the members of this Pfam entry belong to family 2 of the mannose-6-phosphate isomerases. The type II phosphomannose isomerases are bifunctional enzymes. This Pfam entry covers the isomerase domain. The guanosine diphospho-D-mannose pyrophosphorylase domain is in another Pfam entry, see Pfam:PF00483. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.96 0.71 -4.60 17 2703 2012-10-10 13:59:34 2003-04-07 12:59:11 13 9 1834 0 687 2749 2876 136.20 49 31.58 CHANGED lGlpsLVVVpTcDAlLVuc+ccsQDVKclVcpLKtpsRsE..tphHpcVaRPWGpYcsl-pGcRapVKRITVKPGp+LSlQhH+HRAEHWlVVuGTAcVThspcshlLsENESTYIPhGshHpLENPGKIPLELIEVQSGuYLGEDDIVRhED ..................................................lGlcDlllVpTtDAlLluc+spsQ-VKp........lVp.p.LKtp.s+..p.-......hp....H..pp..la.R..PWG.p.Yc...s.....l.....-.....t.....G.....p.....R.....ap...V..K.....R...I..s.....V.+PG.t.p..L..S.l.Q..h.Ha.H..R..u..E.H...W.l.V.V.s.GT.Ac..VT.h.....s.....s......c............h.l........l.s......c.....N-......S.........h.YIPh..G..shHp.L...cN........P...G.c...l..P.L...c...lI.E....V....Q.oGsY..LsEDDIVRhpD............................................. 0 212 432 567 +3277 PF05007 Mannosyl_trans Mannosyltransferase (PIG-M) Moxon SJ anon Pfam-B_5638 (release 7.6) Family PIG-M has a DXD motif. The DXD motif is found in many glycosyltransferases that utilise nucleotide sugars. It is thought that the motif is involved in the binding of a manganese ion that is required for association of the enzymes with nucleotide sugar substrates [1]. 22.00 22.00 22.20 22.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.89 0.70 -4.74 6 328 2012-10-03 03:08:05 2003-04-07 12:59:11 8 8 285 0 238 382 29 248.70 35 61.37 CHANGED ISTRGNu-ulluhlllhsLaLlpKpp.........hhtAullaG.hulHhKIYPllYslslhL.l.pshtcQ..........oshsphp....pLlsh..t.lhhl..lsoLhoFsusshhhYhhYG.pFL-csYLYHlhRcDhRHNFSsaahLLYLspAsphh..Sphl....tlhAFlPQhlLlhhh.uhpa......hcsLsashFlpTFsFVTaNKVCTSQYFlWYLshLPLlhsp..............h+h.SW++uLsllhLWhhsQuLWLhsAYhLEFpGhNsFh..laLuusLFFlsNsalLtQll ..........................................................................................ISTRGsu-ullssh.lhhhL..a.hl.h.ppc................................hhhAulhhG.luVHhKIYPlIYuhslhh.h.l...tp.p...t.tt.................................sh.hthhh.............ph.hsh..ptlh.hs......hsohh.oFhsls.h..hhYh.hY.G..hpFLpc.sY.LY.H.ls.Rh.DtRHNFSs.Y..h.hl.YL.s.....ss............pt...................u.......l..............................t.hhu...F..l....PQ..ll.L.h.h.h.....l...s.h.th.........................tccLs.h....shF.......h.....QT.hs...FVsF...NK..........VCTSQYFlWYlshLPlhlsp...................................p.h.......s..h.t...p....u..l...h.hl...h..lWhhu...QuhWLh.u.Yh........LEF.Gh.....s.......sF.h...lahuulhFhhsNshlLh.h....................................................................................................... 0 88 140 202 +3278 PF01575 MaoC_dehydratas MaoC_like; MaoC like domain Bashton M, Bateman A anon Pfam-B_297 (release 4.0) Family The maoC gene is part of a operon with maoA which is involved in the synthesis of monoamine oxidase [1]. The MaoC protein is found to share similarity with a wide variety of enzymes; estradiol 17 beta-dehydrogenase 4, peroxisomal hydratase-dehydrogenase-epimerase, fatty acid synthase beta subunit. Several bacterial proteins that are composed solely of this domain have (R)-specific enoyl-CoA hydratase activity [2]. This domain is also present in the NodN nodulation protein N. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.33 0.71 -4.74 32 6997 2012-10-02 20:54:35 2003-04-07 12:59:11 14 82 2642 72 2491 6367 2455 119.40 20 29.77 CHANGED phst.....hspshptphshslo...............sphthaAhhSGDhNPlHlDst.hAphutFs.ssIsHGhhohuhstthlhpthss........sthtthps+FhssVhsGsplpsphtpsu..hpttt.phhphpsshhsssss ...........................................ht..............t.hslo...................ppc..l.hhaA.......t..h..oGD.h..pP..lH...h...Dsp...hA.p....p.....s.....s.....F......s....p........h..I.s....H....G....h...h....o........h....u...h......h.......s...t..h...h...s..p..t.hs.s...............hsth....s....h...........p..l.+F..h...p..P.V.h..s....G.-.....s.l.p....s.p...h.plhs.....hp.tt...t..............st.t................................................... 0 656 1497 2090 +3279 PF03642 MAP MAP domain Bateman A anon Pfam-B_1396 (release 7.0) Family This presumed 110 amino acid residue domain is found in multiple copies in MAP (MHC class II analogue protein) Swiss:Q9Z4J2 [1]. The protein has been found in a wide range of extracellular matrix proteins [1]. 24.30 24.30 25.40 24.50 23.80 23.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.89 0.72 -3.76 10 1325 2012-10-01 21:38:54 2003-04-07 12:59:11 8 6 160 5 10 373 0 87.10 51 73.35 CHANGED VPYTIuVNGsSssltScLsFsscpploYpDLssKVKSVLcsDRGIo-c-L+hAKpApYTVaFKNGsKKVVDLKScIaTtNLFsupDIK .........VPYTIsVNGTSssIhSpLsFspspploYpDLssKVKSVLts-RGIo-hDL+hAKpApYTVaFKNGsKKVlcLKusha.TuNLlsssDIK....... 0 8 8 10 +3280 PF02991 Atg8 MAP1_LC3; Autophagy protein Atg8 ubiquitin like Griffiths-Jones SR anon Pfam-B_1384 (release 6.4) Domain Light chain 3 is proposed to function primarily as a subunit of microtubule associated proteins 1A and 1B and that its expression may regulate microtubule binding activity [1]. Autophagy is generally known as a process involved in the degradation of bulk cytoplasmic components that are non-specifically sequestered into an autophagosome, where they are sequestered into double-membrane vesicles and delivered to the degradative organelle, the lysosome/vacuole, for breakdown and eventual recycling of the resulting macromolecules. The yeast proteins are involved in the autophagosome, and Atg8 binds Atg19, via its N-terminus and the C-terminus of Atg19. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.31 0.72 -4.05 9 1011 2012-10-03 10:59:06 2003-04-07 12:59:11 11 11 362 37 573 990 106 100.50 47 79.62 CHANGED +RptEuc+IRpKYPDRIPVIlEKsspoc.lPslDK+KYLVPuDlTVGQFhalIRKRIpLsP-cAlFlFVss.olPsTushMuslYpcc+DEDGFLYhsYSuEsTFG ....................................................pRptEup+.IR....pKaP-Rl.......P.................VIlE.....+sp...............+..uc....ls.s..l.D...K.....+K.......a.L.V.....P....s..DlT....V...uQF....hal.I..RKRl.........p.............Ls.........s.............E.cAlF.l........F.V.........s..s.....s........l.P....s.........o.u..u........hMuplY-c.cK.D.E.DGFLYlsYSuEssFG...................... 0 190 288 420 +3281 PF00414 MAP1B_neuraxin Neuraxin and MAP1B repeat Finn RD anon Prosite Repeat \N 20.60 20.60 22.30 20.60 18.00 20.50 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.94 0.73 -6.21 0.73 -4.33 11 173 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 24 0 75 166 0 16.90 54 3.54 CHANGED TT+oP-susYsYEptEK ..TT+TP-sSuYSYEpoE+. 0 5 5 10 +3282 PF01124 MAPEG FLAP; MAPEG family Finn RD, Bateman A, Brock T anon [1] Family This family is has been called MAPEG (Membrane Associated Proteins in Eicosanoid and Glutathione metabolism). It includes proteins such as Prostaglandin E synthase. This enzyme catalyses the synthesis of PGE2 from PGH2 (produced by cyclooxygenase from arachidonic acid). Because of structural similarities in the active sites of FLAP, LTC4 synthase and PGE synthase, substrates for each enzyme can compete with one another and modulate synthetic activity. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.87 0.71 -4.53 319 2722 2009-01-15 18:05:59 2003-04-07 12:59:11 13 15 1609 34 1052 2147 1247 123.10 21 86.01 CHANGED hh..hhs....slhshhhhhlshhssthRhttth.......................s.....................ts....hhpR.....................................stRAatNthEthshFhshhhlhtl.t......................shhsshhuhhalsuRllas.hsa...hts......hsh..hRs.hs....ahluhlshhslhlhslh .........................................................................................................................h.hhhsslhslhh.hhhsh.ls.p.h.R.h.p.hp..s..h.......st.............s.................................hs.....thpp...............................shR..sa.tNsh.EhhPlal..s.h.h.lh.hth..s......ss..............s.......shhsth..hu.......hlalsuRlhah.hua..........hts.......hhh.......hRt..hs....hh..hshhshlhhhlhsh.h............................................................................. 0 315 546 790 +3283 PF01914 MarC UPF0056; MarC family integral membrane protein Enright A, Ouzounis C, Bateman A, Dlakic M anon Enright A Family Integral membrane protein family that includes the antibiotic resistance protein MarC. These proteins may be transporters. 23.60 23.60 23.70 23.80 23.50 23.30 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.15 0.71 -5.20 12 4024 2012-10-03 02:02:08 2003-04-07 12:59:11 12 4 2220 0 892 2286 718 199.30 30 95.29 CHANGED hphhhtshlsLhhlhsPlGslPlFloLhpphstcc+pplhh+sslhuhllLhlFhhhGchIhp.hFGIslsuh+lAGGIlLFhIuhcMlpup.pt.....ptpppEtpt...h-plullPLAhPLlAGPGuIToshlhhsptss.........hhhshlulhLshhhshlhhh.ushlhRhlGctGlsslsRlhGllLsslulQhllsGlpshh ............................hhpshlsLhslhsPlG..slPl.Fluls.p.ths.s....tpR....p....c......h.....s.....hp.....uslhsh..lll.h.l.hhahGphlLp.hFGIolsuh+I.AGGlllhhIA.h.c..Ml.h.s.ptst.................t..p.s..p..p.....p....t......t..................t......................t....ps..l....ulVPLAhP..lhAGPGs.I.uslllh...u.s.p.h.t.s..........h...t..h..h.s..h....h.....l.u.lh.l.s.h.l....hs.a.h...l...h.t.h.us.hlhRl....L..Gp...s....G.......l....sllsRlMGllLsslulphllsGlpth...................... 0 241 508 711 +3284 PF02063 MARCKS MARCKS family Mian N, Bateman A anon IPR002101 Family \N 19.50 19.50 21.30 20.30 19.10 19.30 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.43 0.70 -4.56 4 119 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 35 1 60 106 0 175.90 36 96.19 CHANGED GAQFSKTAAKGEAA.AE+PGE.AVAuSPSKANGQENGHVKVNGDASPAAAEuG.KEElQANGSAPA...EEsuKEEAAou...............tAAsccEAuAu...........sAEuEsAEPuSP...AEGEuA.....opTp..cAtssATPSsSsETPKKKKKRFSFKKSFKLSGFSFKKNKKEuGEGAEuE.GAsAt..tEtAK--AAAAAsEAsst..EpAtAsuEEAuAAus.pttttcEs..AusupPp..EsKs-EA..APEK.PsuEEspsAEE...pKsEEKsAEEAs...AsuAA...sEAPuu.......EpEAAsAEEP.........AAs..sQEAsSEsSPt..AssAE ............................ss+t-ss.s-...t..s..ss..sKsNGQ....ENGH.s+.NGshost.........................................................................spu-.sh-sss..........................u...ts.s...sspETPKKKK.+FSFKK.sFKLSGhSFK+s+KEsutsupst..uss.t...t-ttttt.t......ts.t......t....-ttt.ss..ps....t...........t.t...tttst..t..E..t..stts...t..pt...st...t..................................................................................................................................... 0 3 11 27 +3285 PF02124 Marek_A Marek's disease glycoprotein A Mian N, Bateman A anon IPR001654 Family \N 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.45 0.70 -4.86 15 236 2012-10-03 02:52:13 2003-04-07 12:59:11 10 1 52 0 0 276 0 201.10 41 44.49 CHANGED YYPspSsclpWFpsGp.sVc.spalcsshphts-.G.lhopsSslol.........ssssstsh.PPolRCp......lsWa+cuVuppRh.......suussPsVYhtPplol.cFtsGcAVCsApCVPcs..VplpWhlsDt....sssssshtsGsCsp+PGLVNlRSshslSttcushcYTCpLsGYPpslPsFpcotsaDASPpssutshllsll...uVlsGlsslullsllsslClhp ........................................................YYP..psVchpWFpcst.tVt.ssplcsthptpss...G..FopVSsVT..........t..ssssps..PsslpCp......hsWaRD..uVs.pRh.......sAsuhPtVhs.hPslol.....tF....t.s.uasVCTAtCVPcG..VshsWhlsDss....sAspsshpsG.Csc+PGLsslRSshPl..S..t.....t..u...-YoC..+L..s..GYPsslPs..h......ptptoa..p.s..s..Ptsso...V.hphl...ulhhhlhshGllslhshlhlh....................................................... 0 0 0 0 +3286 PF01047 MarR MarR family Finn RD, Bateman A anon Pfam-B_269 (release 3.0) Family The Mar proteins are involved in the multiple antibiotic resistance, a non-specific resistance system. The expression of the mar operon is controlled by a repressor, MarR. A large number of compounds induce transcription of the mar operon. This is thought to be due to the compound binding to MarR, and the resulting complex stops MarR binding to the DNA. With the MarR repression lost, transcription of the operon proceeds [1]. The structure of MarR is known [2] and shows MarR as a dimer with each subunit containing a winged-helix DNA binding motif. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.37 0.72 -4.26 38 19890 2012-10-04 14:01:12 2003-04-07 12:59:11 17 44 3773 111 4329 25839 3062 58.30 23 35.57 CHANGED lohspatlLthltppss.hshs....cLApphplspuslTphlc+LEcpGhlpRppsspD+Rpl .................................lo.sph.h.l...L...h....h....l......t.....p.....p.........s........s.....h.....s.hp............cL....uc.tl..t.....l....sp........u.o.lo..p...hlccL..Ec..c..G..hl..pR.p.p..s.....p..D+Rt.......................... 0 1317 2694 3560 +3287 PF02064 MAS20 MAS20 protein import receptor Mian N, Bateman A anon IPR002056 Family \N 23.40 23.40 23.60 30.10 23.10 22.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.45 0.71 -4.20 33 336 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 234 22 220 310 0 124.60 35 71.87 CHANGED lsAGl.u..ushFlGYClYFD+KRRSDP-aK+KL+ERR...+cptpp..........ttsssstl.P-h.pD....pcslpcaFLp..ElphGEpLlspGch-pGlcHLs.NAlhVCuQPs....pLLplLQpoLPsplFphLlp+Ltt ....................................................sus.s.ush.hlGYs.lYFDhKRRsDPpF++.pL..+..c..+R....+ppttt...................................t.ttthttlssh...p-..................s-thppaFhpplphGE............pL...hu...p...G..c........h.....p..........c....ust.+.hhpAl.tVhsQPp....pLLplhppTlP.sVaphLlp.h..h......................... 0 56 93 163 +3288 PF04769 MAT_Alpha1 Mating-type protein MAT alpha 1 Kerrison ND anon DOMO:DM04990; Family This family includes Saccharomyces cerevisiae mating type protein alpha 1 (Swiss:P01365). Mat alpha 1 is a transcription activator which activates mating-type alpha-specific genes.\ MAT alpha 1 and MCM 1 bind cooperatively to PQ elements upstream of alpha-specific genes [1].\ Alpha 1 interacts in vivo with STE12, linking expression of alpha-specific genes to the alpha-pheromone (Pfam:PF04648) response pathway [1]. 22.10 22.10 22.10 22.80 21.90 22.00 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.22 0.71 -4.99 13 507 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 358 0 51 428 0 156.70 31 59.93 CHANGED chlss.....hhpshtphccsAuhtAhouphhhss.......hst.ssssppAKKslNuFhuFRoYYh..shFsphPQKchSshlolLWptDPp+shWslMspsaSsIRDpluK......ppssLspFlshtsPhhtl.s.stYhplhGWsLopsppGshslp+sssschpphspshs.shsLshpslhshlp.hhhsssashhsphsssoaphhphss ............................................................................................tth..t.t.s.p.t.s.+.+.sLNuFhuFRsY................Yh..hhFs.s.hp.QKphSshlshLWps.D.Ph.+.scWulhAKsYShlRDphsc......tpssLstFhslhsP..hhsh..st.Ylt.hGWt.....h....t.s.pu...................h.p..h....t...............h...slht.h....h.......................................................................................... 0 8 25 44 +3289 PF01554 MatE UPF0013; MatE Bateman A anon Pfam-B_163 (release 4.0) Family The MatE domain 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.61 0.71 -4.62 58 32381 2012-10-02 21:24:20 2003-04-07 12:59:11 13 52 4365 8 7913 28954 6802 160.40 16 68.74 CHANGED Phhls.hlps..........hhthlsshhlupl..GstslAuhuluhslhshhhh.........hshGlusuhsslsupthGApphcphthshtpuhhlshhlulhhsllhhhhsp.lhphhssstp......lhphutpalhhhhhshshh.sh.hshsuhLputGcschshhlshhuh.llslslsal ...................................................................Phhlt.hh...........................hh.th.l.s......s..h.h....l....u.........p......h.......G...........s.......t..s............l........A.....u......h.....u....l..........s.....h..s..l.h.t.h.h.hh.........................hh.h.u.l.u.....hu..s.....s........s........l.........l....u......p.........t..........h.......G...........u........p........c............h............c............p........s................p......p......h........h....t...t....u....h..............h....h.u..hh.h.u.l.l.h.....sl.l...h....h...h..h.s.....p.....l....l..t....l....h.....s....s.....s..s.p............................l.h.p......h..u......t...p.a....l..h.....l.h....h....h......s....h............h....h.h.....s....h....t...h......s.....h.....s.............u.................h...hp...u.h....u...p....s...p......h...s...hh.hsh...h...s..h.....llsl.shs.......................................................... 0 2610 5153 6676 +3290 PF01824 MatK_N MatK/TrnK amino terminal region Bateman A anon Pfam-B_30 (release 4.2) Family The function of this region is unknown. 21.70 21.70 21.70 21.70 21.10 19.80 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.12 0.70 -5.34 15 38881 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 26823 0 17 37918 0 249.20 57 67.98 CHANGED MEEappYLELD+SQQHsFLYPLIFQEYIYALAHD+GLNR.....SILLENsGYDN.K.ShLIVKRLIT....RMYQQNHhllSsNDSNQNhFh....GaNKNhYS..Q..MISEGFAlIVEIPFSLRLlSSL...Et.KclVKSaN..LRSIHSIFPFLEDpFuHLNYVLDILIPaPIHLEILVQTLRYWVKDASSLHLLRFFLaEYpNWNoLIT..sKKu.asF.SKRNpR..hFLFLYNSaVCEYESIFlFLRNQSSHLRSsSSGshLERIaFYGKIE+hl.EVFs+DFpssLWLFKDPFhHYVRYQGKSILASKGTsLLMNKWKYYLVNFWQCaFYlWsQPGRIaINQLSNHSLDFLG ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......p....l.....clhIPaP.lH.hEIL..VQhLp..hWl.pDssSL..HLL.....R.....F.F.L..a...E..........Y...p.....N.........a......N......S......L...I...T................sK....K.........u.........h.................h..........F.....S........K....c.............N....p..R.........h.F........h....F....L....Y....N.....S.aV.h.EY..ESl.F.l.F.L.R.p...Q.S.S..a.L...R...S.TS...G....s..h.L..ER.la..FY..s.....K.....I.........E.......+.......l............l................V.....a........s......p........s........F........p.......t....h...L......W..h.......F.....K..D..P..F......h...H.YV...R...Y..Q.GK.u.ILA..S..K..G.......T..........L...L...M..p.K..WKaYLVNhWQ.ha..Fhh..W..s.Q..P.....t...R..I..a..I...N..Q..L...S..p..a...S...hpFLG............................................................................ 0 2 8 14 +3291 PF00661 Matrix Viral matrix protein Bateman A anon Pfam-B_128 (release 2.1) Family Found in Morbillivirus and paramyxovirus, pneumovirus. 19.80 19.80 25.80 23.60 17.30 15.60 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -11.86 0.70 -6.13 13 784 2009-01-15 18:05:59 2003-04-07 12:59:11 16 3 105 0 0 688 0 266.80 48 96.27 CHANGED pshhshspsuhsssusLpPhPlphss....cG+Ll.QlRltclspssc+cpshsal.sYGFlcsscppsshlGt.p...ps.ttthhTuuhLPLGsG.shusspclLcthsphcIpVR+TAuspEplVFplsshsplLpsaphshpphhlhsAsKhs+uPsplssshshpaplsFlSlThhsssttaplP+slLchRSppshulpLpVhl+lpscs-SshhKshhsccptcs..AohhlHlsNlp+pcsc..hYss-YhppKlcpMpLshuLuslhGsoltl+usG+lsKThtshhuh+thsCaPLhDlsPsLs+hLWusuCEIsclpAILQsSs.p-hhhasDlIhcclphhhc ..............o..hhth..ss...pspl.sh....h..h.t......ctplh.QhRl.c.s.hscpc-s.hahhhhGhl.p.ss...tsslG...............hs.u.LsLGss.ssuc..-Lh+tshphslss++oAs.sEchVF..sps.plLpshhhshsphs.hsAsphspAsphlPhsss.ca+VsahSlThlscpshYplPp.thLchpusshhslsLpVTlcl-scscSshlKolsc...u-sshh....AslalHIG.hspscp+h+phohDhhcpKIR+hsLsVuLuDVhGsSlhl+upGthoKhLts.hu.ptshCYPlhshs.pls+lLWpppsplhplpsllQsus.pthtlhsDh.lsssph.h.t............. 0 0 0 0 +3292 PF03819 MazG MazG nucleotide pyrophosphohydrolase domain TIGRFAMs, Griffiths-Jones SR, Bateman A anon TIGRFAMs Domain This domain is about 100 amino acid residues in length. It is found in the MazG protein from E. coli. It contains four conserved negatively charged residues that probably form an active site or metal binding site. This domain is found in isolation in some proteins as well as associated with Pfam:PF00590. This domain is clearly related to Pfam:PF01503 another pyrophosphohydrolase involved in histidine biosynthesis. This family may be structurally related to the NUDIX domain Pfam:PF00293 (Bateman A pers. obs.). 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.32 0.72 -4.03 54 7328 2012-10-01 21:36:44 2003-04-07 12:59:11 12 20 3581 41 1765 5952 4148 73.70 28 47.30 CHANGED Qohpolh.allEEshEls-AI.cc..........p-.s...........plc-ELGDlLhpVlhhuplupc..pstFslp-VhpplscKhhcR+P+lF ..........................................................shh.hlhEEstE.l.h.-...s..l....pp.........................................p.-.tp..................................cl.p.-ELG....DlLhtl..l.hhAph...............hthc.hccs..hpph...scKh.cRh.............................. 1 620 1185 1510 +3293 PF04837 MbeB_N MbeB-like, N-term conserved region Mifsud W anon Pfam-B_3854 (release 7.6) Family This family represents an N-terminal conserved region of MbeB/MobB proteins. These proteins are essential for specific plasmid transfer. 21.30 21.30 21.70 21.70 21.00 20.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.31 0.72 -3.87 9 115 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 74 0 3 93 2 51.20 41 30.92 CHANGED MSplLsLApsFEp+SKppupSTpchlpssFccHEpslpptLppupp+Is-AI .....MSplLsLAp-F...EpKSKpctpSTpphLpssFpchEpulpptLppupp+IpsAI..... 0 1 1 1 +3294 PF04899 MbeD_MobD MbeD/MobD like Finn RD anon Pfam-B_5673 (release 7.6) Family The MbeD and MobD proteins are plasmid encoded, and are involved in the plasmids mobilisation and transfer in the presence of conjugative plasmids [1]. 21.40 21.40 21.40 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.37 0.72 -4.07 6 132 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 105 0 14 86 0 74.90 43 78.49 CHANGED MTELEppLLSALEQLQQDYpQRLpEWESAFs-hQ+MhuLppRENA.......ALSEpVTsLSQQVpcLScQlcRLSp ............................MpELEhpLLsAhppLQpsappphppWpSuaspLQphhphoppcpu.......sLpt+sptLspplppLstphp................................................... 0 5 8 10 +3295 PF03621 MbtH MbtH-like protein Yeats C, Eberhardt R anon Yeats C Domain This domain is found in the MbtH protein Swiss:O05821 as well as at the N terminus of the antibiotic synthesis protein NIKP1. MbtH and its homologues were first noted in gene clusters involved in non-ribosomal peptides and other secondary metabolites by Quadri et al [1]. This domain is about 70 amino acids long and contains 3 fully conserved tryptophan residues [2]. The structure of the PA2412 protein shows it adopts a beta-beta-beta-alpha-alpha topology with the short C-terminal helix forming the tip of an overall arrowhead shape [3]. MbtH proteins have been shown to be required for the synthesis of antibiotics, siderophores and glycopeptidolipids [3-6]. 20.80 20.80 21.10 21.10 20.70 20.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.81 0.72 -4.60 87 1247 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 966 4 274 742 10 53.60 46 64.74 CHANGED M....sssF-csss....pahVLlNcEtQaSLWPshtslPsGWpssh...G.su.......sRpsCLcal-p ...............M..oNPFDcspG.....sFhVLhNs-tQaSLWPsh..sslPAGWclVht.st.......o+tuClpalE........ 0 50 144 224 +3296 PF02289 MCH Cyclohydrolase (MCH) Mian N, Bateman A anon Pfam-B_6511 (release 5.2) Domain Methenyl tetrahydromethanopterin cyclohydrolase EC:3.5.4.27 is involved in methanogenesis in bacteria and archaea, producing methane from carbon monoxide or carbon dioxide. 25.00 25.00 102.30 102.20 23.50 22.50 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.00 0.70 -5.39 6 211 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 188 1 101 219 69 285.00 43 96.87 CHANGED SVNchAtplVEcMl-tuE-L+l-VtKLENGATVlDCGVNssGSa-AGhhaoclCLGGLAcV..slsshphsulshPsVplpTDaPAIAsLGuQKAGWplp..VGc..YFAMGSGPARALALKPKETYEEIGYEDDADlAVLsLEuscLPsEcVsEalAcECuV-PENVYlLVAPTASlVGSlQISuRVVEsGlaKhhEVh.FDls+lK.usGhAPIAPlhsDDlpAMGpTNDsllYGGpsahaVcuD-..s-lcplscplPSssScDYGKPFh-lFKcAsYDFYKIDsGhFAPAcllVNDLpTGKoapuGclNsElLhpSFG .........................SlNthuh.ll.cphlp.u....ccLpltltphpsGspllDsGlp.s.GuhcAGhhhsclChGGLupV...sht....t..th...t.sh.h.shlpVpospPslACLuSQhAGWpls....htc.....aFAlGSGPARALAtc.........c........cla.c-l.s..Yc...D.pu-.t..u....lL.sLEusplPsppVs-clAcpCGV.sPpslhlllAPTsSlAGoVQlsuRVlEsAlHKhppLt.F-lpcIlsuhGsAPluPst..sDtlpAMGRTNDAllYGGpshhhVc..ssp..tthpplspplPSss.Sc-YG+PFh-lFcphshDFYclDstl.FuPApVhlsslpoGcsapsGpls.plLtpSF............. 0 27 71 90 +3297 PF00493 MCM MCM2/3/5 family Bateman A, Finn RD anon Prosite Family \N 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -12.00 0.70 -5.64 62 3858 2012-10-05 12:31:08 2003-04-07 12:59:11 18 63 938 2 2067 6744 833 253.70 39 40.85 CHANGED -hcplpcl.uccs..slhpplspSlAPsIaG..p--.lK+AlhLtLhGGsp.K..phsc.thc..lRGDINlLllGDPG.sAKSQlLKal.p.pluPRuVYToG+GSSAsGL.TAuVh+Dstot-...asLEuGALVLADpGlCCIDEFDKMs-pDRsulHEAMEQQTISIuKAG.Ihso.LNARsSlLAAANPh..hGRYstp+ohspNlslssslLSRFDLl.FllhD.ps-cppDpplAc+llshHhtt...........................ppphptp.................................lshchL++YItYA.R.phpP..t....Loppu.pctlsphYlphRp.s........tt...ttthslTsRQLESlIRLoEAhA+h+LuphVstcDVccAl+LhppShh .........................................................................................................pplpph..t..p.p.....plach..lspSlAPp.Ia..........G..........p..............pc...l.......K+Al.lLhL.......h.G........Gs.........p.............K...................p..h.....s.....s...........s..h..+.................lR...GD......I.....Nlh.lhG...DPusuK....S...Q.h..........L.p....a....h..............p....h....s.......s...........h..........u........l..........h...........o....o...........G.....p............u...u..........S.......u...s............G.......L........T......A............t..........l.....h.....+........D.........................o..t..p......................hh....l.......E..u.......G.A........h.......V............L.u..D.t....G.....l.............sh....IDEF.D.K..Mp.t.....s...psslHE.......sMEQ..Q..o.l.......ol........u.....K....AG......l..h.sp..L..p..uR.s.ul...lA..Au...N.Ph..............u.................p....a..................s................................h.........s................................t.........N.........l............t............h.........sl..lS.R....F..D..l..............h.........h........l........l.........h........D.............s...............................D....................l....up.....a..l.ht..a..t...................................................................................................................................................................................h.s...t..............h....p...p..Y....l.......h.s...+..................t....h..t............P.......h.....................l..s.........s....t..p......h.l......p....Y....t.hRp.t..................................t.......tt......h..os.RpLts....hlRlup....uhA+hphp...t................s.t.t.cst..Ahplh.....h........................................................................................................................................................... 0 763 1189 1734 +3298 PF00015 MCPsignal Methyl-accepting chemotaxis protein (MCP) signalling domain Sonnhammer ELL anon Blast MCP1_ECOLI/361-421 Family This domain is thought to transduce the signal to CheA since it is highly conserved in very diverse MCPs. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.51 0.70 -4.79 9 30898 2011-09-19 18:58:56 2003-04-07 12:59:11 16 309 2287 12 9245 27179 1335 198.30 30 36.30 CHANGED ppAo-hAppAsppA..............ppsspsVcpslpshcplsspSppIscIlsVIspIA.QTNlLALNAAlEAARAG..EpGRGFAVVAsEVRsLAp+SApAA+EIcsLIppsspp..............l-sGsshlcpTucshpcIVsulsclsshls-IAuuosEQSpulppls....pulschsplTpps..............sAhsEEuuAAutsLcppApcLpphlstF+lp .......................................................................ttt.........................s..............................h...p...p....h....s...p.......t....h.......p....p.......s.......s....p.......s...h.......p....p.......l....s...p....p....u.......p.......p.......I.s...........p...l...l.......s.l.......I...ps.......I...A...p...QTNL......LALNA.A.I....E.A.....A..R...A....G............Ep....G......R..G.........F.AVVAsE.......V.......RpLAp+.....o.......t.......p.......u.......s.......p.......E.......I.......p...........p.......h..........I....p.......p.......h..............p.......p.......p.............................t.....................................................................................h...............t...........................................................................s........p..........p........u.......s...............p........h..................s..............p.......p.......s.......s...........p.............s..............h..............p..................p..............I..............s..................p..............s.......l..............p....p..............l.......s.......s...........h..............h..............p..............p..............I.......u..............s..............u...........s..............p.......c......Q.......s.......p........s.......h....p....p.....ls................ps...l....s.......p....l...s....p....h...s.pp.s..........................................ss.h....spp.s...s.......t....s.......u.......p.......p.......L.......p.p.....u.......ppL.pphlt.Fp.................................................................................................................. 0 2775 5451 7351 +3299 PF02993 MCPVI Minor capsid protein VI Griffiths-Jones SR anon Pfam-B_1634 (release 6.4) Family This minor capsid protein may act as a link between the external capsid and the internal DNA-protein core.\ The C-terminal 11 residues may function as a protease cofactor leading to enzyme activation [1]. 25.00 25.00 27.70 27.70 23.70 23.20 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.55 0.70 -4.21 9 139 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 101 1 0 157 0 227.40 52 97.63 CHANGED FusLAPRpGspPhhu..psIGsS-h+GGtFsWGSLhS....Gl+shGSslpNhup+hhNSpThp.l+ptlpDoslhcpVsphhupulsulVDIuR.clppclpphh-+h.s.....-p.hsptcsh..s...h.tstpstscP.scpshlstsstP.Psh.psl..hs......hsspcPhhs.shssss...psPsplshPP..tsp....ssss...s.....ssppsp..............+.hRsusWQusLssllGhGVpsspRRpCY ...........FuSLAPRHGoRPaMGTWs-IGTSQLNGGAFNWuSlWS....GLKNFGSTl+oYGsKAWNSSTGQhLR-KLKDQNFQQKVVDGlASGINGVVDlANQAVQ+pIsSRLD..P..hPs..................pt.h...-th.................s+G-KRPRP....DtEEsh.lhpscEP.PSYEEAlK.G............hPsTRP.IAshAs.uVh.......pPsTLDLPP....sPs....sssss........ss.s.s.....s....ss.ssstR..................ssttpt.stNWQSTLsSIVGLGVpSlKRRRCY................. 0 0 0 0 +3300 PF02249 MCR_alpha Methyl-coenzyme M reductase alpha subunit, C-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2706 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (this family), 2 beta (Pfam:PF02241), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The C-terminal domain is comprised of an all-alpha multi-helical bundle. 25.00 25.00 32.70 26.70 21.80 21.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.78 0.71 -4.12 22 7244 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 318 31 49 7541 13 118.60 61 67.10 CHANGED DQIWLGSYMSGGVGFTQYATAAYTDNILDDasYY....GhDYlp-KYG...s........scu.s.oh-sVpDlATEVTLYuLEQYEcYPThLEDHFGGSQRAsVhAAAuGsosAhATGNupAGLuuWYLS.hLHKEuaGRL ....................................GVGF.TQYATAAY..TDNI.LDDas.YY....GhD.YlcDKYG.......s........h.s..+sKs..ThDlVpDl.ATEVTlYGhE...QYEpaPThLEsHFGG....SQRAuVlAAAuG.hosulATGNuNAGL..suWYLS.MhLHKEuauRL.............................................................................. 0 10 32 41 +3301 PF02745 MCR_alpha_N Methyl-coenzyme M reductase alpha subunit, N-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2706 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (this family), 2 beta (Pfam:PF02241), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The N-terminal domain has a ferredoxin-like fold. 20.50 20.50 21.00 20.50 19.40 20.00 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.81 0.70 -5.19 5 1424 2009-09-10 16:32:42 2003-04-07 12:59:11 10 3 189 31 50 1557 13 56.70 70 20.77 CHANGED EKKLFl-AL+cKFc.E-PcEKpTKFYsFGGWKQSERK+EFVEaAKKlAEKRG.IPFYNPDI..GVPLGQRKLMPYpVSGTDsaVEGDDLHFVNNAAMQQMWDDIRRTVIVGMDTAHcVLEKRLGKEVTPETINEYMETlNHALPGGAVVQEHMVElHPGLVsDCYAKIFTGDDELADEIDK+FLIDINKpFPEEQAEQLKKAIG+RTYQVsRlPTIVuRsCDGATsSRWSAMQIGMSFISAYKlCAGEAAlADFSFAAKHA-VIpMGohLP .....................................................................................................................................................................................................................................................................AMQIGMSFIuAY+MCA.GEAAVADLAaAAKHAuVlpMushLP............ 1 11 33 42 +3302 PF02241 MCR_beta Methyl-coenzyme M reductase beta subunit, C-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2692 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (this family), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The C-terminal domain of MCR beta has an all-alpha fold with buried central helix. 25.00 25.00 325.40 325.00 24.30 18.90 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.52 0.70 -5.17 6 82 2009-11-23 11:17:05 2003-04-07 12:59:11 13 1 65 31 49 86 12 252.00 65 57.60 CHANGED GYALRNIMsNHlVAsT+KNThNAlALuoILEQTAMFEMGDAVGuFERhHLLGLAYQGLNADNLVaDLVKtNGKcGTVGTVVASlVERALEDGVI+VcKpLsSGFKlYcPsDhAhWNAYAAAGLVAAshVNsGAARAAQGVASTlLYYNDILEYETGLPGVDFGRAEGTAVGFSFFSHSIYGGGGPGIFpGNHlVTRHSKGFAIPsVAAAMCLDAGTQMFSPE+TSuLlGoVaStIDEFREPLKYVsEGAsElKc+ GYuLRNIhsNHhVAhT+KNshpAsALoSIhEQsAhFEMGsAlGsFERhHLLGLAYQGLNANNlVaDLVKpNGcsGTVGoVVtSlVERAlEDGVIpscKphsSG.aphYcssDsshWNAYAAAGhlAAshVNCGAuRAAQuVuSTlLYaNDlLEaETGLPGlDaGRspGTAVGFSFFSHSIYGGGGPGlFNGNHVVTRHSKGFAIPCVsAAMsLDAGTQMFSPEpTSuLlGsVautI-EFREPlKhVAcuA......... 0 10 32 41 +3303 PF02783 MCR_beta_N Methyl-coenzyme M reductase beta subunit, N-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_2692 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (this family), and 2 gamma (Pfam:PF02240) subunits with two identical nickel porphinoid active sites [1]. The N-terminal domain has an alpha/beta ferredoxin-like fold. 20.90 20.90 23.90 32.00 20.00 17.60 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.92 0.71 -4.83 7 83 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 66 31 49 89 12 178.50 51 41.17 CHANGED DplDLYD-+GpLlEEsVPLEAlSPL+NPsIpslVpDlKRoVAVNLAGIEsALpsuplGGKushI.GREl-LsIVtNAEuIA-plK+hlQVsEDDDTsVcLlssGKphLlQlPSpRlcVAA-YosusLlsutAlspAIIctF-VDMaDAshVKuAVhGpYPQolDahGuNlAolLusP.pLEG ...D+IDLYDD+GpLLcpsVPLEAISPLpNsuIp+llpssKRTVAVNLAGIEsuL+TGplGGKuppI.GRELDlsIVsNA-uIA-clcchlpVp-sDDTsVcllsGGKphLVQlPotRlcsAA-YoluhhssAuAlspAII-pFclsMaDAshV+AAVhGpYPQohDhtGGNluolLslPpp.EG......... 0 10 32 41 +3304 PF02505 MCR_D Methyl-coenzyme M reductase operon protein D Mian N, Bateman A anon Pfam-B_2115 (release 5.4) Family Methyl coenzyme M reductase (MCR) catalyses the final step in methanogenesis. MCR is composed of three subunits, alpha (Pfam:PF02249), beta (Pfam:PF02241) and gamma (Pfam:PF02240) [1]. Genes encoding the beta (mcrB) and gamma (mcrG) subunits are separated by two open reading frames coding for two proteins C and D [2]. The function of proteins C and D (this family) is unknown. 25.00 25.00 108.30 108.10 18.90 17.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.76 0.71 -4.65 10 75 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 59 0 51 72 4 148.00 41 92.72 CHANGED I-VcIFPHRlLpA-TTEKlLNclY-.l-GltRVllHGpsLPcpVsaGPA+GhPVNHs-R+lIpVKGp-lEL+VpVGRIhVplcD.c..-ppl-cI-EICcElhPFuYclpsG+Fh+ccPTVTDYlKYGcc....lcclDsRLlGhVDPpuRhpssVslIK ..h-lEIFPpRhLps-TTEKlLNclhs.l.cGlpRlllaG.sLPcpVsYGPA+GtPlsHs-R+hIpVpGpslELpVpVGRlhlpl-s.....-shl-cIcclCc-.hh...P...FuYclp.....GpFh+scsTVoDYhKYG.p.....sphDccllGhsDP+u+hppplthl.......... 0 11 32 42 +3305 PF02240 MCR_gamma Methyl-coenzyme M reductase gamma subunit Mian N, Bateman A anon Pfam-B_2713 (release 5.2) Domain Methyl-coenzyme M reductase (MCR) is the enzyme responsible for microbial formation of methane. It is a hexamer composed of 2 alpha (Pfam:PF02249), 2 beta (Pfam:PF02241), and 2 gamma (this family) subunits with two identical nickel porphinoid active sites [1]. 25.00 25.00 320.70 320.50 18.60 18.00 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.56 0.70 -5.24 7 85 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 67 31 49 88 13 246.80 61 94.67 CHANGED sQaY.PGpTplApNRRKaMNP-hELcKLREIsDEDlV+lLGHRsPGE-Y.SVHPPLEEMD.PEDslR-hVEPhpGAKtGcRVRYIQFsDSMY.APAQPY.RuRsYhWRaRGlDsGTLSGRQlIEhREpDLEpluK..LlETEhFDPApsGlRGATVHGHSLRLDENGlMFDhLpRhlasE-TG+VhhVKDQVGc.LDEPVDlGcPLsE-pLtchTTIYRlDslshR-DcEhlElVpRIHphRThuGFtP ...PQaYPGsTpVApNRRKaMNPshcLEKLR-IsDEDlV+lLGHRuPGE-YpolHPPL-EM-EP-sslR-lVEPhsGAKAGDRlRYIQFsDSMY.APuQPY.RuhshhhRaRGlDsGTLSGRQlIEsRERDLEcloK..Ll-TEhFDPApsGlRGATVHGHSLRLDEsGhMFDhLpRhlhsccoGpVhYVKDQVGpPLDc.VslGcPlsE-cL+c+TTIYR.hDsluh........R-DtEsl-lVp+IHptRThuGFt.... 0 10 32 41 +3306 PF02315 MDH Methanol dehydrogenase beta subunit Mian N, Bateman A anon Pfam-B_12628 (release 5.2) Domain Methanol dehydrogenase (MDH) is a bacterial periplasmic quinoprotein that oxidises methanol to formaldehyde. MDH is a tetramer of two alpha and two beta subunits. This family contains the small beta subunit. 20.60 20.60 21.10 20.70 19.70 16.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.06 0.72 -4.05 4 44 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 44 23 21 47 0 90.00 58 93.97 CHANGED MKplLshhslAushu..hsssALAYDGTpCKAPGNCWEPKPGaPEKIAGSKYDPKHDPtELsKQsESlKsM-ARNtpRltNhKKTGKF.YDVKKI ..............hhhhshhsushs...hussAhAYDGTpCKAPGsCWEPKPGa...P-KlAGSKYDPKHDPpELsKQs.pSIcuMEpRNtKRltpFKKTG+FhYDVpc........ 0 6 16 18 +3308 PF04349 MdoG Periplasmic glucan biosynthesis protein, MdoG Mifsud W anon COG3131 Family This family represents MdoG, a protein that is necessary for the synthesis of periplasmic glucans. The function of MdoG remains unknown. It has been suggested that it may catalyse the addition of branches to a linear glucan backbone. 25.00 25.00 28.80 28.60 17.70 17.30 hmmbuild -o /dev/null HMM SEED 484 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.60 0.70 -6.23 4 1625 2012-10-02 23:57:29 2003-04-07 12:59:11 7 2 926 2 268 967 64 475.30 48 91.60 CHANGED FshDsVscpA+sLAucsYpu.KssLPoshpchpas-YQpIRFppD+AhWss.csPFplpFaHpGhaFcoPV+IsEVsustspclpYDPshFsaG.slhpscpssKDLGaAGFRVhYPlNotccpD.ElssFLGASYFRslGpGpsYGLSARGLAIcTA.PuGEEFPcF+tFWlE+PpssscplVlYALLDSPpsoGAYRFslpPGcsslMDVputla.R-cluKLGlAPhTSMahaGsNp.p.spsaRPElHDSsGL.lhsGNGEWlWRPLsNPpaLpVSsF.spNPpGFGLLQRsRDFu+YpDL-s+Y-hRPShWVEPKGsWGcGoVpLVEIPT.DETpDNIVAFWsPcp.PcsGpEhpFuY+LpWu.spsclp.PpsuaVppTtpuhGDVtppshhR...hohtFVVDFpGsthKtLssstPVospsslcsNutllEs.olR.NPsh+GaRLshclcst-s.pp.sEhRAtLsspsscPLoETW.YQhs...h ..................................Fshs.ltphApsLAtpsap..u.s..p.s.LPs.s.lt..shsassY.ppIQass-+uhWps....l.c.p...aclpFaH..G.MhFcp.V+l.pV..s........s.......s....t....s+c.......I+.......asP-hFsa.s.......-s...t..h....D..p...-..s...s...p.....D..L...G...FAGFRVh..h.....s.......h...s..p........p.......t..pc....-lVSFLGASYFRslGsst.YGLSARGLAIDT.uh.s.o.s..EEFPcFptFWlEpP......K....P.s.s.pplTlYALLDSPssTGAY+Fs...I..p..s..G.+.s....s.l.h.DVpu+lYhR............cc............lpcLGlAPhTSMFhaGsN......p.......p................ss...saRPplHDS-GLuha.s.G....N....G.........E.........WIWRPLNNP....p.+.L.p.houa.ssc.N.P+GFGLLQRsRDFS+YpDltshY-pRPShWVE.P...........+....u.........c...........W.GKGsVpLlEIPTssET.DNIVAaWpP-p.hs..csG.......cEhsFpY+LhWS...s...............p.s.......l+..ss.h....AhVht.....T......RpuhG...s.h...........p......ss........hh.....chshtFsVDFsGu.-h.K..........h.sts.s.......P.h.....sh.pss.I.s.s.s.u..thl...........cp............p.lt..Y....s..s..s..h..c..G.aRlhhchh.......s.p..........ss....pc..........ss-MRhhLts...t.s....ps..lSETW.YQh..h................................................................... 0 60 133 191 +3309 PF02975 Me-amine-dh_L Me-amine-deh_L; Methylamine dehydrogenase, L chain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 25.50 25.40 17.80 16.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -11.05 0.71 -4.14 2 110 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 90 102 36 118 21 113.60 54 68.56 CHANGED .sRttapPQDpD.puCDYWRHCSIDGNlCDCsGGSLTsCPPGTcLusuSWVASCaNPsDGQoYLIAYRDCCGhpssGRCsClNspGELPVYRPEFsNDIlWCFGA-sDAMTYHCTlSPIVGKA .........ts..........t.t.Ds.puCDYWRYCAIDG.LCo.CCGGohoSCPPGTpsSPsoWlGoChNPtDGpsYlIuYpDCCGKso.C.G...cChC.ps.Et.EpPlY+P...phsNDIsWChGsts....YpCosusllGh..... 0 7 19 26 +3310 PF04934 Med6 MED6; MED6 mediator sub complex component Wood V, Bateman A anon Pfam-B_4045 (release 7.6) Family Component of RNA polymerase II holoenzyme and mediator sub complex. 20.20 20.20 20.20 20.60 19.30 20.10 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.89 0.71 -4.63 32 317 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 279 3 225 313 2 139.90 37 51.89 CHANGED lpW+sPpalp....................h..sLp....ssNVL-.YFup..SPFYD+oSN...Npll+hQtphsp.................................................hppcLppMsGlEa.llsps..pEP...................................slaVI+KQpRps.............sspspslssYYlluuslYpAPslhsllsoRl....................hsslhplppu....hsphs.phspasP ................lpWpsstal.............................hLp.....ssslL-.YFup...sPFYD+TsN...Npsl+hQphh.p.......................................................................................cLppM.s..GlEY...llhps..pEP....................................lalIRKQp.Rps.................ssplhsLusYYIluu..s..lYQAPsltsVls..SRl........................loslpslppuhcpst.shspapP......................................... 0 76 120 183 +3311 PF03525 Meiotic_rec114 Meiotic recombination protein rec114 Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 33.70 30.80 24.30 19.70 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.03 0.70 -5.94 4 26 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 25 0 15 25 0 261.40 35 69.83 CHANGED YS+YTIPSaAPsGFsShLEPPpIDKWQHLSsNCTLQF+VLLhDStQlhlpVlLNNSThLEpIRLPLGsNpDhIQFSCKsPIISCKYISEEFGP+hL+RFQlNLPNDl-FNRslVSLKNLsFVl+TAKTSIApsThssQsps.NNupKlsFsEss.NsSoYppsNsQFQ............................TQNMlhDFSQphQEKstREssNpSNITLPpDs..hsIsQQpaPss-hNVVpoSQDLNTPpATQTlhupPEsL.VpshElSpshssooss.PshcNppppsthsSDhhS+Kthh.pp.sshhpshcLPKERpppEpplpuhh..hpsssThlhS.tppNpl+pNs ..................YSpYs....APpGFpohhpPs.pl-KWpH..l..S.ssssL.pFplLLh....cspp.lhlpV..lhN.........N.uslLEpIplP.l.....us.s.......p-lIQFSsKuPhISCKYlscch.usp...h.lRRFQhsLss-s-Fs+ssssLpsLsFVl+sA+...o..ohupsshps...phps.ps.spphsh..ps...phss..t..p.ph........................................................................s.s..h...sp.................................................................................................................................................................................................................................................................................................................................................... 0 2 7 13 +3312 PF03243 MerB Alkylmercury lyase Bateman A anon Pfam-B_3505 (release 6.5) Family Alkylmercury lyase (EC:4.99.1.2) cleaves the carbon-mercury bond of organomercurials such as phenylmercuric acetate. 21.00 21.00 24.00 29.60 18.90 20.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.70 0.71 -4.30 22 236 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 189 13 65 184 11 118.90 31 52.93 CHANGED LolpPTsH+hpls..GcpLYsWCAlDsLhass.hlspssplpSpsssTGpslcloVcs.st...lsslpP.sssVVShl.ssss..s...sl+suhCs.hlpFFuo.psApsWhspHP...supllslt-Aa.pLGptlttplh ................hohh.TsHphpls.....u+.pLYuhCAlDoLhhsu.ll.u.p.s..sclpStCssTGpslplolps.s.p......lhslpP..sshsVuhlsssct.s..........shpsuhCs.plp.FFsS.psApsWhsp+s....th..tllslppAa.tlup.h.t..h............................ 0 26 44 57 +3313 PF02065 Melibiase Melibiase Mian N, Bateman A, Eberhardt R anon IPR002287 Family Glycoside hydrolase families GH27, GH31 and GH36 form the glycoside hydrolase clan GH-D. Glycoside hydrolase family 36 can be split into 11 families, GH36A to GH36K [1]. This family includes enzymes from GH36A-B and GH36D-K and from GH27. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.45 0.70 -5.78 13 2958 2012-10-03 05:44:19 2003-04-07 12:59:11 13 57 1465 71 1044 2993 252 265.70 24 45.35 CHANGED SFpTPEllhsYospGLNuhSQpaHsLhRp....+lh+statsc.RPlllNsWEAsYFDaNcsplhslA-cA.....tcLGlEhFVLDDGWFGs...RsDDsuuLGDWhlspcKaPsG...Lssluccl........+spGhcFGLWhEPEMVs.s-ScLa+pHPDWllp.sssRstopuRNQhVLDlups-VhDaIhpphspLLspssIDYlKWDhNRplTEssSsths.......ssHpYhLGlYclh-pLsspaPcllaEuCuuGGGRFDhGhLhYhPQhWsSDsTDAl-RlsIQaGTSL............lYPsushGAHVSAsPNcQstRtT.ShphRutVAhh.GshGhELDlspLsccEcptltp.lAhaKchRpllt.....hGshYRLpsPt..su.NpsAhhhVSsDpspAlltahplhussshs..........shl+LtGLDsc ...................................................................................................................................................................t..p..t...P..h.h....h.Ns.........W.p..s....h.........h....h.........c..........h..........s..........c....pt..........l..........h.p....h..A....c..th................tc..h.....G...h.......-.....h..h....l..l..D....DG...Wh.s..t.................R.p..s...c................t....................h.......G...........c........h......h......s........s..........t........p.......+a..P.....s.....G..........lpsLsct.l...........+..sh.G.h.c.F.Gla...h....p....s...................s...........t.........o......t.........h........h...t..t....a....P.....................................................................................................................................................................................................................tt.hh...t.......hta.hKh..D...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................s............................................................................................................................................................................................................... 1 383 695 892 +3314 PF01372 Melittin Melittin Bateman A anon SCOP Family \N 20.60 20.60 21.10 25.90 19.70 20.50 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.39 0.72 -7.01 0.72 -4.59 3 18 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 16 4 1 33 0 24.20 71 40.82 CHANGED GIGAILKVLoTGLPALISWIKRKRQQ .GIGAlLKVLuoGLPsLISWIKp..... 1 1 1 1 +3315 PF02964 MeMO_Hyd_G Methane monooxygenase, hydrolase gamma chain Griffiths-Jones SR anon Structural domain Domain \N 25.00 25.00 118.90 118.40 20.90 19.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.92 0.71 -4.33 3 19 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 18 54 4 20 0 160.40 53 94.31 CHANGED IH-NsTRsAWhuKIApLNTLEKAscFIpDFRscHoSPFRsSY-LDlDYLWIEtKIEEKLAVLKs+cFN-sDLlsKsTsGEDApuVtsssVAKMcAAsDKYEAERIHIsFRQAYKPPVMPVNVFLDTDRQLGT+LMELRNTDYYATPLEuLRKcRGVKVVHL ......IH-NssRsEWhuKIApLsolcpAstFlpDFRhpaoSPFRpoYcLDlDa.aIEtKIEE+LuVLKscc.h.ssu-LlTKsosGEDAttVpsshlAKhcAscs+Y.EAERIHIpFRphYKPPVhPVNVFLcTDptLGT+LMELRNTDYYusPLEsLRKcRGVKVlpL....... 0 1 4 4 +3316 PF03203 MerC MerC mercury resistance protein Mifsud W anon Pfam-B_2720 (release 6.5) Family \N 23.00 23.00 23.00 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.74 0.71 -3.72 42 387 2009-09-11 08:49:01 2003-04-07 12:59:11 9 5 300 0 102 277 593 114.30 31 81.69 CHANGED hD+hulshSsLCslHC...lshPhlhshLsshG.hhh.....hhsphhHphhlslslslullAlhhGhhpH+.phthhsluhhGlslhshuhhh.cshh.....ct...............................................slTllGssllshuHlhNhRhs ..................hD+hGshuSllsAhtC.hshPslsuhuuulG.uhh...............hcs............hah....th..lLslhssluhlA.slGa...hpH+.phh..hhh.L.uhlG.sllhsus..h.....hhhshhh.....ps.................................................................slhhlGhsLhlusplhshh..s...................... 0 45 71 89 +3317 PF05052 MerE MerE protein Moxon SJ anon Pfam-B_5840 (release 7.7) Family The prokaryotic MerE (or URF-1) protein is part of the mercury resistance operon. The protein is thought not to have any direct role in conferring mercury resistance to the organism but may be a mercury resistance transposon [1,2]. 20.70 20.70 22.10 21.70 20.30 19.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.66 0.72 -4.00 6 198 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 160 0 22 56 10 73.70 75 93.45 CHANGED MNSP-+lPsET+..pPlTGYLWGsLAVLTCPCHLPILsh..VLAGTTAG.AFluEaWGIAALsLTGLFlLSloRhLRAF...p ............MNuP-+LPsET+..pPloGYLWGALAVLTCPCHLPILAs....VLAGTTAG.AFlGE..H..WGlAALsLTGLFVLSVTRLLRAF.c........ 1 5 9 16 +3318 PF00376 MerR merR; MerR family regulatory protein Finn RD, Bateman A anon Prosite & Pfam-B_3021 (Release 7.5) Family \N 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -7.41 0.72 -4.39 347 3063 2012-10-04 14:01:12 2003-04-07 12:59:11 18 35 1779 31 910 13735 1815 37.70 36 21.31 CHANGED sIu-l.....Ach.s..GloscolRaY-ct.G.Ll...ssp+spsGhRhY ..............sIGcl.....Act.s.....GVsspsl...RaYEpp..G.Ll.....ss.tR..s..s..u..u.hRpY................. 0 354 658 814 +3319 PF02411 MerT MerT mercuric transport protein Bashton M, Bateman A anon Pfam-B_1796 (release 5.4) Family MerT is an mercuric transport integral membrane protein and is responsible for transport of the Hg2+ iron from periplasmic MerP (also part of the transport system) to mercuric reductase (MerE). 29.30 29.30 29.30 29.40 29.20 29.20 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.55 0.71 -4.28 3 490 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 366 0 109 326 70 112.50 47 88.93 CHANGED MSEPpNsRGALFAGGLAAILASsCCLGPLVLlsLGlSGAWIGNLTlLEPYRPIFIuAALVALFFAWRRIYRPVcACcPGsVCAlPQVR+TYKlIFWlVAlLlLVAluFPYVlsaFY ......................psu+u...u...Lh..u.G....s..L.A.AlhASsCClGPLVLlsL.GhS.GA.WI.G....N......LT.s.LE.PYR.PlF.IGs...A.LlAL..h..FAWp+l...Y......R..P..s..p..A...Cc.....P.G.c.lC...A...l...Pp...l.R...t.........s....h...K.llF.W..l.VusLVLluLuFPYlhPaFY......................................... 0 32 75 97 +3320 PF02475 Met_10 Met-10+ like-protein Mian N, Bateman A anon Pfam-B_2239 (release 5.4) Family The methionine-10 mutant allele of N. crassa codes for a protein of unknown function, Swiss:O27901. However, homologous proteins have been found in yeast (Swiss:P38793) suggesting this protein may be involved in methionine biosynthesis, transport and/or utilisation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.28 0.71 -4.72 11 1002 2012-10-10 17:06:42 2003-04-07 12:59:11 11 35 540 11 661 9354 2050 197.00 24 47.12 CHANGED psaslIGDllllplpsEhpp.acphIuEslhchp.slKsVhp+supl..cG.aRshchEllAG.ps.opTlH+ENGspa+lDluKVaaSs+htsERtRlsph.sc-GElVlDMFAGlGPFuIslA+htKschVaAl-lNPcuh+aLpENI+LN..KVcshl.sIhuDsc-Vh...cshAD..............................................RllMshPtpsccFLcp...Ahptl+-G............GVlHY .......................................................................................................p..athhGc....l..h..h..l..p....h..........t....p..................t....t.....l.....u...p............h...l...h.p....t............t...l...c.s...l...h....p.c....h.s..........h......t..s..t..h...R................p....h.....c....l....l..h.....G....c...p.....................h..............s......h.pE.....p....s..h....p.....a...p...h..........D.....h......s....+.....l.....a....a.....s......s....p............t...s.............E......+........t.......R...l.......s........p................h.......h.........p.........t........s.............-.....s............V..hDh.a..AGl...G..s.....F......u.l....s...h...u.....p...............+....s....t....h.........V...aAs-......lNPp...u...h....c.......hL..p.c........N..h...p.....l..N.......+..l...........p.....s.........h.......l........s.....h.....p...h.....D.....s.....p...p....hh............hh..p........................................................................................................................................................................................+...h..l....h....s..h............................s..........p......h....h.......h.................................................................................................................................................................................... 1 209 377 540 +3321 PF02965 Met_synt_B12 Vitamin B12 dependent methionine synthase, activation domain Griffiths-Jones SR, Eberhardt R anon Structural domain Domain \N 20.80 20.80 21.50 21.30 19.70 20.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.16 0.70 -11.32 0.70 -5.14 11 2573 2009-09-10 23:08:37 2003-04-07 12:59:11 12 20 2384 10 722 2189 810 252.90 35 25.51 CHANGED sslpElhsalc.+shap.......................s.t..stcsp+lh.supshLp.........ucslsshLsu.........................................................................u-hhuhFAsTuGhthEchtcshppptD.hpuhllcAlGstlAEAhA-plcccl+pc....stscslsho.........hR.uPGYsuh.-hoEpttlapLlpsEp.hGl+LT-ShhMsPtpSVSGlhhhsPp.u+h.hhuphspsphpDhhtRKth .............................................................................................................................................................................................................................................h..ltplh.aIDWtsFF..sW..........ph.hG.a....................Ptl..h..pc....G....tA.ppla.t-u....pt..hL.....c.cl.tpphl..........ps+uVhGha.PAspsGDDl.lhtscsppphhth.h.......................................................................................................................pp.thsshsluDalssh.oGhs..DalGh.FAV..TuGhth.-thscta.c.sp..p.DDYstIh.l+ALADR..LAEA......hAEhlHc+VR+c..hW....GYus.sEsLss--Llp.....................ppYpGIRPA.PG.Y.P.A........CP-Ho-Ktsla.cLL..c..........s..-.....c..h.......G......hpL.T.E.S.a.AMhPsuSVSGhYau.H.P.c.....u.....+...Y......F.....u.l..u....p...l.tpD.Q.l.p.c.aAhR+t..................................... 0 250 478 616 +3322 PF03724 META DUF306; META domain Yeats C anon Yeats C Domain Small domain family found in proteins of of unknown function. Some are secreted (e.g. Swiss:O25998) and implicated in motility in bacteria. Also occurs in Leishmania spp. as an essential gene. Over-expression in L.amazonensis increases virulence (Swiss:O43987; [1]). A pair of cysteine residues show correlated conservation, suggesting that they form a disulphide bond. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.50 0.72 -4.18 122 2207 2012-10-03 08:47:39 2003-04-07 12:59:11 11 29 1689 2 524 1564 142 100.60 23 59.33 CHANGED sltsspWpl......h..put.shhs...tpps...pls.........h.tss.................s...plsGp...uG..CNpahGsapl.pss..........p.....lshs....sluuTchuC..s.s.hhphEpthhpsLps.spshpl.p..ss............pLsLps ..........................................................................ttpa.l..........h....p.ut...hs.t.............psts....pls.........h..s..pp...................t....plsGp.....su..CNp..a.....h..Gp...h.p.......l...ps.s..........p.......lphp....sl.u.s.T.c...Mh.C......ss.....p..hhphEpphh.p.hLpp..stphsl.s......ss.............pLhL..t............................... 0 156 334 437 +3323 PF01676 Metalloenzyme Metalloenzyme superfamily Bateman A anon Pfam-B_1926 (release 4.1) Family This family includes phosphopentomutase Swiss:P07651 and 2,3-bisphosphoglycerate-independent phosphoglycerate mutase, Swiss:P37689. This family is also related to Pfam:PF00245 [1]. The alignment contains the most conserved residues that are probably involved in metal binding and catalysis. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null --hand HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.65 0.70 -4.99 27 6157 2012-10-03 20:55:17 2003-04-07 12:59:11 13 22 3518 49 1368 5458 2001 425.30 23 96.34 CHANGED pphlLllLDGhG.tsspc...tssl..psApT.........PshDplhc.........sspGhtlslP..uphGsS-luHhsl..............huhpstchasspshhtshsc....tt...........Glco+.sclthhsphst.c......hhllhDtR..cl.sppspshhcpltp.pltph........l....s..h...........h.tpuacshhltps....ts.ssshlcs...........lsPphl....stshh.hpss-sl.hhsacsc+stplhptlhppsach.hpptth..t+lh...hh.atuph.ushhh..sstshsph.hchhsppsl+phtluETtphs+lshhas..............h.l.sspGsThslpschpuhthscphlcsl.tsca.D.hlhlNhsssD.hsGHsGchcuplculEthDp.plscll-tltpssshlllTADHGNs.phhs..................TtHTtcPVPlllhut....sh+.......hsphh.tsphAsluuTlhplhGhchsp.hpsh .....................................................................................................................................................................................................................................................................................................................................................................................................hhLlllD..u...hG...h...stttc................sssh..........tsA.ss.......................s.shsclhpt...s................t.t.p..G...h..s...l.s...LP....sphG.uc.l.u.+hshussphshpshschstthppsphhpsstht.hhthhhssstshhhhshhssttsppp.pthhthhphstttsh.t.h.....................................................................................................................................................................hh.t.httsthhhhpsssshhhhhtpcshthtphhhhhpphttphpttthhhhthhhhhhhssp.sshhth.s.p.s.hss...p..s.p.h.h..s...h...p..t...l..p.p.....h..t...h....u-s.....phu..+.ls..h.asG...............................................................................................s.p.s..h...T...h..c..lp..s..c....h....u..s....h....cl...h..-...th.................l.................c.t.l............p..................p......s......c...............h....s......hl.h.sNhs..shD.hhGH....p....t.............s...h.....p....u...h...h..c..A..l.E..shDp...pl.s.c...l...h....c....t.....l................p.......p......s......s......h......l....l..I..T.A.DHG...N..s.s.phts........................................................................................T.s.H....T....p....p................V.....P.....llh.h..us.............th.p........................s.s.p.l..t.p....t..s...p...h....u..D..l.usTlhphhuhp.sp.hpu.h.................................................................................................................................................................................................... 0 517 943 1184 +3324 PF02066 Metallothio_11 Metallothionein family 11 Mian N, Bateman A anon IPR000869 Family \N 21.40 21.40 57.30 56.80 18.50 16.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.18 0.72 -4.09 2 7 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 1 0 7 7 0 54.10 63 100.00 CHANGED MEFToAhFGsSLl.op.pTppKHNLVssCCCSpsTpcso...SCsCoKCuCcoCKC MEFToAhFGASLIpop....pTppKHNLVNsCCCSpsspcso...SCsCoKCuCcoCKC. 0 7 7 7 +3325 PF01439 Metallothio_2 Metallothionein Bateman A anon Prodom_1611 (release 99.1) Family Members of this family are metallothioneins. These proteins are cysteine rich proteins that bind to heavy metals. Members of this family appear to be closest to Class II metallothioneins, seed Pfam:PF00131. 23.00 23.00 23.30 23.30 22.90 22.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -11.06 0.72 -3.43 23 407 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 153 0 53 379 0 72.40 50 90.61 CHANGED M.SC.CGu.sCGC..GSu.CKCGssCs.CpMYPDls.pt.soTo..pTllhGVAPpKs...puuEhuss...ussGCKCGusCpCsPCsC ..........MSC..CGGsCGC...GSu.Cp.CGss......ssKMYPDls.pt...soTo...pTllhGVAPpKs..phE...u.s.....Ehusst.....sssG.....C.K.CGs.sCp.C.s.PCsC................................................ 0 2 22 40 +3326 PF02067 Metallothio_5 Metallothionein family 5 Mian N, Bateman A anon IPR000966 Family \N 25.00 25.00 25.60 25.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.01 0.72 -3.91 2 57 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 16 0 28 47 0 40.30 57 94.64 CHANGED MsC..CGosCpCuuQtstssCsCspDCpCsscptststCsp MsCKuCGTNCpCoupK.CGsNCuCspDCpCsCKN.GPK-pCCp.... 0 4 5 19 +3327 PF02068 Metallothio_PEC Plant PEC family metallothionein Mian N, Bateman A anon IPR000316 Family \N 26.80 26.80 26.80 29.50 25.60 26.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -10.77 0.72 -3.61 4 37 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 20 3 16 36 0 72.90 53 73.21 CHANGED GCDD+CGCssPCPGGsuCRCsSu...puuuGstEHpsCsCGEHCGCNPCsCs+spspsSG+u..+ApCoCGsuCsCASCAS ..........hCs-+CGCslPCPGGs......sCRCsus.....tuuu.Gs.s-HsTCsCGEH.CGCNPCsCsKs..tsoGpG.....CpCGsuCsCAoCu.... 0 2 11 13 +3328 PF02069 Metallothio_Pro Prokaryotic metallothionein Mian N, Bateman A anon IPR000518 Family \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.47 0.72 -4.14 13 100 2012-10-05 18:33:37 2003-04-07 12:59:11 11 1 96 1 39 94 51 47.90 40 73.21 CHANGED oTsTphKCACssChCsVohssAlp+-GKsYCSEuCAsGH.hsspGC....GHsGCsC ................pCACssCsChVs..scAlp+-G+hYCscuCAsGH..psps.C....tpssCtC......... 0 8 21 32 +3329 PF00131 Metallothio metalthio; Metallothionein Sonnhammer ELL anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.81 0.72 -11.32 0.72 -4.09 21 581 2012-10-05 18:33:37 2003-04-07 12:59:11 15 4 201 21 109 522 0 58.90 56 96.13 CHANGED .DP......CsCtc.susCsCus.uCp.CpsC+CssCcKss.ssCssuGC...sKCu..psCsCtt......t..pCoCCs ......MDP......CsCsp....uG...oC..sCuu..S...CK.C.c.sC+C.....TS.C.KK..S.C..C.uCCPsGC....sKCA...pGC..l..CKG......tspcCSCC.................... 0 14 17 30 +3330 PF01717 Meth_synt_2 Methionine_synt; Cobalamin-independent synthase, Catalytic domain Bashton M, Bateman A anon Pfam-B_1909 (release 4.1) Domain This is a family of vitamin-B12 independent methionine synthases or 5-methyltetrahydropteroyltriglutamate--homocysteine methyltransferases, EC:2.1.1.14 from bacteria and plants. Plants are the only higher eukaryotes that have the required enzymes for methionine synthesis [1]. This enzyme catalyses the last step in the production of methionine by transferring a methyl group from 5-methyltetrahydrofolate to homocysteine [1]. The aligned region makes up the carboxy region of the approximately 750 amino acid protein except in some hypothetical archaeal proteins present in the family, where this region corresponds to the entire length. This domain contains the catalytic residues of the enzyme [2]. 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.06 0.70 -5.46 14 4790 2012-10-01 21:20:02 2003-04-07 12:59:11 13 9 3434 28 1233 5086 1353 308.90 33 54.56 CHANGED hPTTT.IGSaPQTsclRcsRtphppuplu..tpYpsth+ttIcpslchQEclGlDVLVHGE.ERsDMVEYFupplsGh.shTtsuWVpSYGSRsh+Pshhhuclptspsholc.thhupshsscs..VKGhLTGPlTILsWSFsR...pD.s+cshshQlAlAL+cElt.cL-sAGhtlIQlDEP...ALREGlPLpp..tc.stYhpWultuFRlssssstscsQlpoHhCYSpassIh...stIsshDsDVloIEtu+sc.phlsslpc.htas+plG.GVaDlHSP.+VPSsEEltshlpcshch.........h.t-pLWVNPDCGLKTRshpEshsuL+NMVcAAct .....................................................................................................................................................hPTTT..lGSF..Ppop..p.....l.......+..p.....h.......R..........t..t........a.......p...............p..G..p......lst...ppapphhp...........ppIpphl......phQEclGL.D.VllcG....Ehc..R....s....-..............h.Vp.a...F.u...E.p....L.........sGa....h..............h.......s..............p....p...............u.............a..........V...............Q......u....a.............G................s................+................s.............s....+.....s.............P...l..........l....h.....G...-.l........s....p...s.....p..s.....h..........s.......V...c......h.....s......p.a.......u.p.....u.h....T.s+s...............h..Ku.....h...L......T..G......P......s......T.....l....l.....s....h.....o.a.....sR................pt......s...pc....p.....hst..........p.l...A...h.....A.....l...+-El.t......cL.c.s..A.......G.l............p.l.......I.QlDE...s........................Al..+..E.t.....l.....Ph+p..............tc...h....p..t...a.....L............p......h........u............l........t.......u........a...........p............l.......s....p..........u......s....s.........t..........s..........c.....s......p..........I..p...T..........H.h..C...........Y..................u..........p...........F...........s..........shh.................st.l..s.....s..h.....c..s....Dl..l...o.....l..E..s...u....c....S....c.............h.............p.............h...l..........c...sh.....c......p.....h........s..h....s..p..c...l..G.....GVh..D....l.+....S....P.....c....l...P....s.....h.-..c..lt..s.h.l.c.cu..hph..........................lsh..c..pl...alsPD............CG.......L........p......o......p.......t......h.............ps......h.....tt.Ltthltssp.t....................................................................................................................... 1 360 749 1029 +3332 PF01035 DNA_binding_1 Methlytrans; Methyltrans; Methyltransf_1; 6-O-methylguanine DNA methyltransferase, DNA binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1191 (release 3.0) Domain This domain is a 3 helical bundle. 21.30 21.30 21.50 21.40 20.70 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.97 0.72 -4.24 93 7975 2009-09-12 05:33:12 2003-04-07 12:59:11 15 49 4422 25 1944 5717 2191 84.40 39 44.75 CHANGED osFpppVhpt...ltp.IPhGplsoYuplA....ctlG.p.....spuuRsVGpAh...spNP....hshhlPCHRVlsusGp.......lusa....th........uhth..Kp.tLLptEuhth..p ................................osFpppV.Wps.Lpp..IP.hGpshoYuplA................ptl.G..p................P.p..A..sRAVG...s...As........upNP.................lul.ll..PCHRVlsssGp.........L.s.Ga..........ss......................Gl.pc..Kp..tLLphEuh...t........................................... 0 624 1221 1628 +3333 PF02870 Methyltransf_1N Methlytrans; Methyltrans; 6-O-methylguanine DNA methyltransferase, ribonuclease-like domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1191 (release 3.0) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.44 0.72 -3.38 11 2628 2009-09-14 13:10:39 2003-04-07 12:59:11 10 12 1908 11 442 1666 131 76.10 23 35.10 CHANGED tlhaohhcoPlGplhlsssEcs.lptlthtspshs......h.t..phhhts-......hsshthhppplcpYhAuptp.....thslPlch ....................................t.hhhthh.soPlGtlhl.s.s...s-....p.....u..Ls.ulha..tspptp.............h.ph..ph..h...t..sc......................ts...h....t..th...p...p.....pLcc...YFuGppp......hslPls.............................................. 0 118 236 347 +3334 PF00891 Methyltransf_2 Methyltransf; O-methyltransferase Bateman A anon Pfam-B_152 (release 3.0) Family This family includes a range of O-methyltransferases. These enzymes utilise S-adenosyl methionine. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.42 0.70 -5.18 30 3396 2012-10-10 17:06:42 2003-04-07 12:59:11 13 42 1164 67 1586 4681 447 220.60 22 62.75 CHANGED tps.........phYshsssuchLhsspss...sshsshhhhhtspshhpsWtpLp-..ulh-Gt.....ssFpcsaGhs..ha-ahus.Ds...chsphFspuMssssslhs.hchhpshpsFpslsolVDVGGGsGsssptIsptaPpl+uhsFDLPcVltsAss.......tssVcaVuGDhF.-slPp..ADAllLKalLHDWuDpcClclL+pChcAls...tpG+VlllEsllspssssshps...........hhhDlhMlshss.G+ERoccEacpLhp .........................................................................................................................tttt......................h..................................................................h.............h.............h...tp.......h........p..st...................s..h.t...h..h.h...s....s....ha.ph...h.t....p...p.s..........phtp....h.......F..t......p..u.M.......t.........s..............s........t.........h...........h........h...............................h.......h.................t....h.........h.....s.....h.........p...........s......h........p................p.............l..l.DV.....G...G.G.p...G....t.h...h...t..t.l..h....p.....t....a..........P....p.......l.......c......s....h.....l..h...D.....L.....P...p.....V.....l...t.t..utt.................................ts.p.l...p...h..h...u...u...D...h.....F.....p.........s.......l......P.........t......u.......D.........s.....h......h...h.p.....h.............l....L.............H..c...........W.....s....D.....c....p.......s...h.p...l...L.ppst.p.A...hs........t.........u...+...lllh..-h.l...h..sp...t.....t...t.....t..............t..t......................................h.h.c.h.h..M..hshhs...Gp.....pRs.tcattlh................................................................................. 0 329 890 1294 +3335 PF01596 Methyltransf_3 O-methyltransferase Bateman A anon Pfam-B_749 (release 4.1) Domain Members of this family are O-methyltransferases. The family includes catechol o-methyltransferase Swiss:P21964, caffeoyl-CoA O-methyltransferase Swiss:Q43095 and a family of bacterial O-methyltransferases that may be involved in antibiotic production [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.17 0.71 -5.27 8 3858 2012-10-10 17:06:42 2003-04-07 12:59:11 12 23 2673 61 1074 4599 1860 191.10 25 85.30 CHANGED h-oSsh..Es-hL+ELR-tTs+pP..hstMplss-EGQFLuhLl+LhsAK+TLEIGVFTGYShLssALALP-DG+IsAhDlscEshplGhsaIQKAGVscKIshhlGDAhpsL.-pLhp-cp..spFDFsFlDADKsuYspYaE+hLcLVKsGGLIAhDNTLWtGpVs-s.-sssP-shp.....lhchNchLusD.RV-IuhLslGDGITLsRRl .................................................................................t...................................................h...........l.s...p..p.u....p..h.L.p.h..L...l.....c...h..h...p....s...+....p.....l......L...E.l.G.....T.hs.G...YSu....l...h.....h...A...p.......u..............l...........s........p........s............u.........c.....l......s...Tl....-...h.........s..t.........c........h....h...p..........h......A........+....p......h.p....c......u....s...........h......p......p...........c.................l...p...l..h............G.......s..A.....h..-..s......L......s.....p.............L...h..........t...........t............................s...a...........Dh.l......F............l.D............u........s..K............s.......p........Y...h......p......a.....h........p.......t..h..l..........p..........L.........l.c....s..G.G......l....l......l.s......D......Nl...............l....h......t......G.....t.....l....hp......t..........t........p..t.t......................lpph....c...h...l....h...p....p....p...hp...s.s.hl.sls.....DGlh.h.h..t................................................................................................ 0 347 681 901 +3336 PF02390 Methyltransf_4 Putative methyltransferase Bashton M, Bateman A anon Pfam-B_1023 (release 5.2) Family This is a family of putative methyltransferases. The aligned region contains the GXGXG S-AdoMet binding site suggesting a putative methyltransferase activity. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.20 0.71 -5.26 19 4759 2012-10-10 17:06:42 2003-04-07 12:59:11 12 26 4414 24 1194 8689 3879 194.50 28 77.49 CHANGED hP........ph..........Datphasp.s...........................lhlEIGsGhGshllshApppP-h.alGlElchsslttshp+htph........thpNltllptsAhphhpphhs.spLp+lhl.FPDPW.KpRH+KRRllpsphlpthtphLt.GGhlhhtTDspshhc.hhcthpp.............................shhphh................pss-h..sPhs........s.htssTchEp+shptstslaphhFp+h ........................................................................hs.htt............hchp.p.h.F.s.p..ptP...........................................lhlEIGhGhG....phl....s.......t.h..........A.....p........t....p...P..-.........h.........s..a.l.GlEl.....p.ps..sl...s.....t.....s..L..pp.h.tct............................................................sl.s.N.......l..+..l..l....p...t...D......A....h.....c.....l......L...p.....p...h.....h...s......p......s....p.....l.s....p..l..a...l.....h...F........PD..P..W.....K....p....R.Hp.............KR........R...........llp.....sFlphhtph..L.p.........s..............G.....hl....+.ht...T...D..csh.h.cahL.cshsp........................................................................t..hp.t.h..................................................s...c..h.........s...s.h.............t...p.shTcaEp....+hpp.t.Gpslaclhh...h...................................................................................................................................................................... 0 408 771 1016 +3337 PF03737 Methyltransf_6 Demethylmenaquinone methyltransferase Bateman A anon COG0684 Family Members of this family are demethylmenaquinone methyltransferases that convert dimethylmenaquinone (DMK) to menaquinone (MK) in the final step of menaquinone biosynthesis. This region is also found at the C-terminus of the DlpA protein Swiss:Q48806. 20.70 20.70 22.30 21.90 19.30 20.00 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.54 0.71 -4.35 11 3287 2009-01-15 18:05:59 2003-04-07 12:59:11 10 22 1989 41 1060 2581 1112 153.90 30 75.23 CHANGED pphsTssLsDshst.sssspPhhss...hstpsphsGpssTV+shp..............DNsLl+pslppsutGcVLVlDGtGsh..........cpAlhGshluthAtppGhpGlVlaGslRDlsslcpLDlslhAhussPst..usppuhGp..lsVslshuGVT..lpPGDhlhuDssGllV ............................................................................................sstl.D......h................p.....h.....t....h..h.p.......s...hps.......hss..p...s.p...h.sGphhTV+sht..........................DNs..h....l....p..c..hl.....-..p.....s.s........t........G...c..VLV...l...D..u....s.....G.s.h.....................cp.A..l.hGs.hl.A.p.h.A.h.p.p.Gh....t.GlVlp.G.u..VRDlspl.p.p..h.....s..ls..l..h.A.h..u..s.h.Phs.................s.s.p..c...u..h.G-.............hsls..l..........s..................h..........G...................G.......ls................lhPGD.hlhAD..s.sGll.............................................. 0 248 604 862 +3338 PF03492 Methyltransf_7 Methytransf_6; SAM dependent carboxyl methyltransferase Bateman A anon Pfam-B_1148 (release 7.0) Family This family of plant methyltransferases contains enzymes that act on a variety of substrates including salicylic acid, jasmonic acid and 7-Methylxanthine. Caffeine is synthesised through sequential three-step methylation of xanthine derivatives at positions 7-N, 3-N, and 1-N. The protein 7-methylxanthine methyltransferase (designated as CaMXMT) catalyses the second step to produce theobromine [1]. 20.20 20.20 20.30 20.90 19.80 19.30 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.15 0.70 -5.68 23 707 2012-10-10 17:06:42 2003-04-07 12:59:11 10 4 157 8 380 728 190 269.90 29 87.55 CHANGED lppslpph....h.ts...hssslslADLGCSSGPNTFhslssIlcslcpphppps.....tssPEaplahNDLPsNDFNTlF+hLP.h..................tphFsuGVPGSFYGRLFP+pSLHhsaSSaoLHWLSpVPctlp-p.s..h.....NKuplahs.........pVh+AYhsQFpcDhshFLcsRuEElVsGGhMlLshhGRps.......D.ssptss..hhh-LlupuLsDlspEGll---KlDoFNlPhYsPsspEl+thI-cpGSFpI-chEhhp.....h..p....................tsucthusshRAlhEshLssHFGpslh-clFc+atp+lspph.hpp...p.hhsllluLpRp ..........................................................................h...................sp.hhlADLGCSsGsNohhhlppllphlpphhpp.t....................chphhhNDLssNDFNslF.p....t.L......sth.t....t........................shahsu...l.PG.SFY.s.R.LFPppSlchhaSuhu............Lp.W.L.....Sp........l.............P..ptl..s................................Ncsplahs..sss.....tlhpuYhpQappD.h.p.FLphRupElhs.GGthllsh..h.u+ps.................p....p.t.t.ts............hhp....hl...ttulpph.........stc....G........hl....pc-ch.-sFslPh....Ytso.pE.l...ct....hlpp.....p....u...p......F...p...l.p...p.hc..hhp..............h...................t.........................u.thu.th.hR.uhhpsh.l.......t...p.....h.....G........t.......t........l....h.c.tlFtphtphhtt...................................................................................................................................... 1 54 213 296 +3339 PF02086 MethyltransfD12 D12 class N6 adenine-specific DNA methyltransferase Mian N, Bateman A anon IPR002294 Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.83 0.70 -4.93 27 3733 2012-10-10 17:06:42 2003-04-07 12:59:11 10 25 2224 21 654 3159 1250 233.30 24 80.29 CHANGED ahGuKppLlspIpchlPp......ptppah-PFsGuGuVhhph......phtpslhNDlps.lhslap.llcs.........scpllpphp.h....hh..p..ssp-hahph+............pchN....pshp.h.c......sh.hhhhsthshsslhphsppupasssaGtatp......hhstpplpthsptlps.....sphhstsh-tshhhh.phts.hlYhDPPYtshp.tssapsapp.shspppphthtshhcpht....tphthphhlsss.......ssp.hcclaps...hslhphpthpsl ...................................................................................................ahGuKhtLhsp.l.......hp...h.lP....................ptpph.lEPFsGuG..uV.hh.ph...............php.p..h..l.l..s..Dlssc..Llsl...a.p.....hlpp.............................pspp.h.l...pthcth....................h..............s...p.pc.h.ah..ph+...............................................................................................pphs............p.th..p...h.c..........................uhhhh.a..l.s...+.h..s.....a.......sG.l..h......c....h....s........h.......p....u......p.....h........s..s....s.....h.....u.....p.apps....................hhs....p...p....p..l...h.t...h...s...p..p..hpp......................sthhst..s.a......p..........p.....s...h......t............t......h..............t.......t......s.....s.......l..lY.h.DPPY......h.s..........................ts............s....a..p.......t.....a....p..t.t....s.a.s.........p.......p....p..h..p.L....t..p....h...h.pph................t.th...h...hlSsp....................ss..h+phapt......th........................................................................................................................................................................................................ 0 232 454 559 +3340 PF01340 MetJ Met Apo-repressor, MetJ Finn RD, Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 31.60 31.50 20.80 19.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.33 0.72 -4.28 3 798 2012-10-02 18:44:02 2003-04-07 12:59:11 15 1 794 30 96 197 8 102.70 87 97.11 CHANGED AEWSGEYISPYAEHGKKSEQVKKITVSIPLKVLKILTDERTRRQVNNLRHATNSELLCEAFLHAFTGQPLPDDADLRKERSDEIPEAAKEIMRELGIDPETWEY .....AEWsGEYISPYAEHGKKSEQVKKITVSIPLKVLKILTDERTRRQVNNLRHATNSELLCEAFLHAFTGQPLPsDsDLRKERSDEIPEAAKcIMREMGIsPETWEY.............. 0 12 33 66 +3341 PF04648 MF_alpha Yeast mating factor alpha hormone Kerrison ND anon DOMO:DM04511; Family The hormone is excreted into the culture medium by haploid cells of the alpha mating type and acts on cells of the opposite mating type (type A). It inhibits DNA synthesis in type A cells synchronising them with type alpha, and so mediates the conjugation process. 19.00 19.00 24.60 19.20 17.90 16.20 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.97 0.73 -6.12 0.73 -3.43 3 152 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 27 0 59 116 0 13.00 78 26.85 CHANGED WHWLSl+PGQPMY WHWLpL+PGQPMY. 0 6 30 45 +3342 PF04202 Mfp-3 Foot protein 3 Bateman A anon Pfam-B_1860 (release 7.3) Family Mytilus foot protein-3 (Mfp-3) is a highly polymorphic protein family located in the byssal adhesive plaques of blue mussels. 20.70 20.70 21.60 23.10 20.00 19.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.69 0.72 -3.80 2 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 5 0 1 49 0 71.80 63 96.17 CHANGED MNNoSVSVLVsLVLIGSFAVpSDAAsYYGPNYGPPRRYGG..YNtYNRYuRtYGG.+GWNpGWpRGhRG+h ..MNNhSVuVL.lALVLIG.FAVQSDA..u.......hhYsPsYssPh.YssGh..Y..NGYNtY.tt...RYGhNKGWNsG.W................... 0 0 1 1 +3343 PF01078 Mg_chelatase Magnesium chelatase, subunit ChlI Finn RD, Bateman A anon Pfam-B_616 (release 3.0) Family Magnesium-chelatase is a three-component enzyme that catalyses the insertion of Mg2+ into protoporphyrin IX. This is the first unique step in the synthesis of (bacterio)chlorophyll. Due to this, it is thought that Mg-chelatase has an important role in channelling inter- mediates into the (bacterio)chlorophyll branch in response to conditions suitable for photosynthetic growth. ChlI and BchD have molecular weight between 38-42 kDa. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.14 0.70 -5.16 56 4229 2012-10-05 12:31:08 2003-04-07 12:59:11 16 28 3288 9 1225 9364 2476 172.30 42 36.05 CHANGED DhsDV+GQppA+RALElAAAGGHNLLhlGPPGoGKoMLApRLPulLPPLotpEuLEsstItSluG..thptshhppRPFRuPHHouStsALlGG..Gs.hs+PGElSLAHpGVLFLDElPEFsRplLEsLRpPLEsGclsIoRAptpl.saPAcFQLVAAMNPCPCGahucssptCpCo..spphpRYhu+lSGPLLDRIDlplcV.stlshtpL .............................................................................................Dht-lhGQ....t....p...u....+....R....A....l....c...l....A...A.................A.............G......t.......H.............s...l..l..hhG..sPGsGKoM.LApR.l......s....s......l......L....P.................s..............h...s.....................p............E............h..l............E...........s..s.......t...........l....h.........S.......ls.......s..................................t.....t.......h...t...p.t..............P..F...Rs....P.........H..........H.....os.........S.............s.A..l........l...G.......G..................G...........s...hspP..G.....E.l.o...L....A....H.p..G....V.LFLDEl...sEF...pcplL.-s.L.R..pPl.Es..G..p.l.p.lu....R.s.t....t........p....l........p........a....PA+F.LluA.hN.....PsPsG....h..t...t.....t......t.t..C...s.........stph.cY.s+..lS..GPhlDRhDlplph.......h................................................................................... 0 414 845 1059 +3344 PF05043 Mga Mga helix-turn-helix domain Moxon SJ, Bateman A anon Pfam-B_5126 (release 7.7) Domain M regulator protein trans-acting positive regulator (Mga) is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions [1]. This domain is found in the centre of the Mga proteins. This family also contains a number of bacterial RofA transcriptional regulators that seem to be largely restricted to streptococci. These proteins have been shown to regulate the expression of important bacterial adhesins [2]. This is presumably a DNA-binding domain. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.76 0.72 -3.70 33 5627 2012-10-04 14:01:12 2003-04-07 12:59:11 8 55 1351 2 389 3825 25 88.20 19 15.90 CHANGED tshshpplhphhhcc...............ShphplLphlh..tpcphslpphupchalSpoolhRhlcclsphLp.paslpl.....pssplh.GcEhpIRhFahhLa .........................................................................t..............pp..................phph.h...l.l..phl....h......tppth..shp..p.lspchhlScuTl.h+clppl.p.p.h.Lp...pa..s..L..pl.........ttps...h..p..l..h..GsE.hpl.Rhhhhph.................................... 0 126 235 307 +3345 PF05220 MgpC MgpC protein precursor Moxon SJ anon Pfam-B_6685 (release 7.7) Family This family contains several Mycoplasma MgpC like-proteins. 18.30 18.30 21.50 24.60 17.90 16.50 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.49 0.70 -5.23 3 49 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 7 0 9 57 1 203.40 69 29.95 CHANGED MGSstVPSLWYWVV...uE-ssSGKuTWWA+TELNWGTDKQKQFVENQLGFKETSuTDSHN..FKupuLTQPAYLISGLDVVADHLVFAAFKAGAVGYD...MTTDSSASTYNQALAWSTTAGLDScGGYKALVENTAGLNGPINGLFTLLDTFAYVTPVSGMKGGSpNTEsVQTTYPVKSDQKATAKIASLINASPLNSYGD-GlsVFDALGLNaNFKhN-ERLPSRTDQl .....MGsshlPSLWYWVV...upcssSsKsTWaApTpLsWGpDKQKQFVENQLGaK..-souosSHN..F+SpuhTQPAYLISGlDsVsDpLlFuuFKAGuVGYD......So.SSuS...TKDQALAWSTTsuLDScsGY+sLVpNssGLNGPING.Fol.DTFuaVsPhSG.+sss.ts...o.tslpTsYPVKsspKuTsKIsSLINAoPLNSYGD-G...lsVFDALGLNaNFK.NpE+Lspt................................ 0 8 8 8 +3346 PF02308 MgtC MgtC family Bateman A anon Bateman A Family The MgtC protein is found in an operon with the Mg2+ transporter protein MgtB. The function of MgtC and its homologues is not known. 23.00 23.00 23.20 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.71 0.71 -4.32 11 3455 2009-09-11 06:02:09 2003-04-07 12:59:11 11 7 2432 0 775 2251 137 132.60 37 56.32 CHANGED LllAhllGulIGLERphRt...+hAGLRTpsLlAhGuslhsllSlpsthshs.........PsRlsAQllSGlGFLGuGsIL+cu........sslpGLTTAAolWsoAulGllsGuGhaphulhuolhlLh..lhLpslsphlp+p.hp .........................lhlAhllGulIGhER..p.h.....+p..................+s.AGlRT.ahLVu.lGuslh...h......l.....l.u.......hh....s....h....t.s....h..........................DPsRlu...A....QVV....SG.l.G.F...L.G.A..G..s.I..l..h.ps...............................ps.l.pG...L.TTAAslWsoAulGls.h..G.s.G..h......ahhA.lhusl.h..ll.h.s.h.hh..lp...ltp.......hht................................... 0 254 503 630 +3347 PF03448 MgtE_N MgtE intracellular N domain Bateman A anon Bateman A Domain This domain is found at the N-terminus of eubacterial magnesium transporters of the MgtE family Pfam:PF01769. This domain is an intracellular domain that has an alpha-helical structure. The crystal structure of the MgtE transporter [1] shows two of 5 magnesium ions are in the interface between the N domain and the CBS domains. In the absence of magnesium there is a large shift between the N and CBS domains. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.29 0.72 -3.70 169 3637 2012-10-02 13:19:07 2003-04-07 12:59:11 12 24 2860 12 968 2783 1850 101.10 22 23.58 CHANGED hcssDlAcllp......pl...s..ccphtlhphL.stcptu-lhppls.tc.hptpllpph..sspphsp.llpphssDDtsDl...lpcLspct.tpplL...sth.stcc+pplppLLsYs-c .....................................................pssDlAphlc......pl......s..cp....ph..hlh.ph.L....ss-p...tu-ll.p..pl...s...pc...hp...tpl...l...p...p...h...sspchu.c...llp...phss..D..-ss-l...lpcL...s...p.sh....hp...pl...L...s.th...s.cctpplpp.lLp.Ys-p........................................... 0 358 675 840 +3348 PF03165 MH1 MH1 domain Bateman A anon Pfam-B_519 (release 3.0) Domain The MH1 (MAD homology 1) domain is found at the amino terminus of MAD related proteins such as Smads. This domain is separated from the MH2 domain by a non-conserved linker region. The crystal structure of the MH1 domain shows that a highly conserved 11 residue beta hairpin is used to bind the DNA consensus sequence GNCN in the major groove, shown to be vital for the transcriptional activation of target genes. Not all examples of MH1 can bind to DNA however. Smad2 cannot bind DNA and has a large insertion within the hairpin that presumably abolishes DNA binding. A basic helix (H2) in MH1 with the nuclear localisation signal KKLKK has been shown to be essential for Smad3 nuclear import. Smads also use the MH1 domain to interact with transcription factors such as Jun, TFE3, Sp1, and Runx [1,3]. 25.00 25.00 25.20 27.80 24.20 24.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.70 0.72 -4.03 19 1320 2012-10-05 18:28:12 2003-04-07 12:59:11 11 19 119 10 608 1055 0 101.20 42 22.56 CHANGED hc+sscuLlKKLKc+.....tL-sLhpAlpspGsss..................otClsls..............................+ph-uRLt............st+KuhPHllhC.....RlaRWPDLpptpELKslssCpts.hp.cpcp......lClNPYHYpRlp ................................................................t.pKhspsLl+KL+Kch...tthE-L.pAlo.....s....ps......................stCls.ls....................................................................................................p..h.ph.ts....................scpKGhP+lIhC..............+l..WRWPD..Lp..t..tELK...s...lt.h.C.phs..hphppst.............lC....lNPYHapRl...................... 0 141 193 390 +3349 PF03166 MH2 MH2 domain Bateman A anon Pfam-B_519 (release 3.0) Family This is the MH2 (MAD homology 2) domain found at the carboxy terminus of MAD related proteins such as Smads. This domain is separated from the MH1 domain by a non-conserved linker region. The MH2 domain mediates interaction with a wide variety of proteins and provides specificity and selectivity to Smad function and also is critical for mediating interactions in Smad oligomers. Unlike MH1, MH2 does not bind DNA. The well-studied MH2 domain of Smad4 is composed of five alpha helices and three loops enclosing a beta sandwich. Smads are involved in the propagation of TGF-beta signals by direct association with the TGF-beta receptor kinase which phosphorylates the last two Ser of a conserved 'SSXS' motif located at the C-terminus of MH2 [1-3]. 25.00 25.00 25.10 25.00 22.20 24.60 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.32 0.71 -4.46 11 1087 2012-10-01 21:55:46 2003-04-07 12:59:11 9 17 139 31 549 937 0 167.40 50 39.60 CHANGED hpcsphWsoIuYaEhssRVGEha+lsss...slhlDGas....Dss..sssRhCLGtLoNsNR..ssslppsRt+IGcGVpLshc..sGp..ValpspScpsIFVpSsshshptG.hc.s.pVpKlhPs.solKlF-hphhtpllppssppu.tu..................................stpLp+.hColRlSFVKGWGtcY.pRQslpuoPCWlEl+Lsts ...............................................................h.-PtaWCSIuYaE....hssRVGEsF+ssps.......ol.hlDG...as................DPS.....susRF..CLGhL...S.........N..V.s...R..............spslEp.sR.pHIG.+............G...VpLhhh..sG...-........VascC.l...S.............-.puIFVQ......S............sh.s..p....t.......G.....h...........+Ps.......s..........Vp...........K.......I.....................PG.........s.............s....l...K...............lFs.pc...htthh..tpss....s.p.u...ts...........................................................................V.pLp+.hC.ol.RhSF.VK.GW.G.s....-Y..pRQslouTPCWlElpLpt.s................................................................ 0 143 186 378 +3350 PF00129 MHC_I Class I Histocompatibility antigen, domains alpha 1 and 2 Sonnhammer ELL anon Prosite Domain \N 28.30 28.30 28.40 28.30 28.10 28.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.17 0.71 -4.93 25 25261 2012-10-03 22:02:01 2003-04-07 12:59:11 13 14 308 732 732 17686 0 155.50 62 74.51 CHANGED GSHSLRYFhTAVSRPGhGEPRFIuVGYVDDTQFVRFDSDAtsPRhEPRAPWhEQEG.PEYW-RpTphuKuptQsFRVsLRTLhuYYNQSEuGSHTlQhMaGCDlGsDGRLLRGYpQaAYDG+DYIALNEDL+oWTAADsAAQIT+RKWEtAshAEp.tRAYLEGpCVEWLRRYLEpGKETL ......................................................................SHShRY..FhTu..h.S..RP..G..R..G..EP...R..FIu.V.G.YVDDTQF.VRF....D...S...D.A.s.S..........R.h.E..P.R...A.PW...l.....E....Q.....E.G......PE..Y.W...........D...c.p.T..ph.h.K...s.p.sQ...s..R.s.L.p.s....L..p.t.Y..Y.N..Q..S............E............u.......G............S............H......Tl.Qh.MYG.....CD..l...G....s...D...G....R...h.LRGapQ...AYDG+D..YIAL..N.E.D..L..+.SW.....T....A.....A....D.....h..A.....A....Q....I....T.......p...R.........K...W....E.......A..A...c.....A..EQ....h.......RAY...L.E.GpCVE.......WL....RR...YLENGKETL...................................................................................... 0 92 131 319 +3351 PF03707 MHYT SPNTR; Bacterial signalling protein N terminal repeat Yeats C anon Yeats C Repeat Found as an N terminal triplet tandem repeat in bacterial signalling proteins. Family includes CoxC (Swiss:Q9KX27) and CoxH (Swiss:Q9KX23) from P.carboxydovorans. Each repeat contains two transmembrane helices. Domain is also described as the MHYT domain [1]. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.17 0.72 -3.86 27 2404 2009-01-15 18:05:59 2003-04-07 12:59:11 11 83 683 0 856 2299 124 61.50 27 23.87 CHANGED GhGIhuMHasGMhAhph.........s..h.ap.hhhh.hSll....lultssssul.hhshttthhhthhhsusll ...............GhGIhuMHasGM.hAhphs...........................st..ltas...hh.......h.......s....h...l...Sll.............lA.l.ss.u.h.hAL....hhs..h..t....t.h............h....................................... 0 190 449 678 +3352 PF02426 MIase Muconolactone delta-isomerase Bateman A anon Pfam-B_2784 (release 5.4) Domain This small enzyme forms a homodecameric complex, that catalyses the third step in the catabolism of catechol to succinate- and acetyl-coa in the beta-ketoadipate pathway EC:5.3.3.4. The protein has a ferredoxin-like fold according to SCOP. 20.10 20.10 20.40 20.20 19.80 19.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.95 0.72 -4.17 7 493 2012-10-02 00:20:33 2003-04-07 12:59:11 11 4 392 0 136 363 26 90.60 44 92.14 CHANGED MLFpVcMsVplPssMss-tAsplKucEKAhSQcLQ+pGKW.HlWR...lsGcYuNlSlFDV-SssELHplLtsLPLFPYMsIEVpsLsRHPSul .............................................MLFhVcMsVpl.Psshssptsscl+AcEKshuQcLQcpG+Wh+LWR...ls.G..pYuNl.SlFDVcsssELH.s.l.Lp.uLPLa....P..a.M....sI.cV.ssLspHPSsl.................. 1 29 70 109 +3353 PF03526 Microcin Colicin E1 (microcin) immunity protein Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 25.00 26.40 23.70 24.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.50 0.72 -4.37 3 96 2009-09-10 20:23:55 2003-04-07 12:59:11 8 1 70 0 2 41 0 48.70 45 43.97 CHANGED YYFLASDKMLYAIVISTILCPYSKYAIEHIAFKFIKK-FFc+RKNLNssPsAKls ..hhhh.hhhh.................lLaPhuKahIEchAlKFT+.-FWpps..FFssssGKhs....... 0 1 1 1 +3354 PF04687 Microvir_H Microvirus H protein (pilot protein) Kerrison ND anon DOMO:DM04728; Family A single molecule of H protein is found on each of the 12 spikes on the microvirus shell. H is involved in the ejection of the phage DNA, and at least one copy is injected into the host's periplasmic space along with the ssDNA viral genome [1]. Part of H is thought to lie outside the shell, where it recognises lipopolysaccharide from virus-sensitive strains [2]. Part of H may lie within the capsid, since mutations in H can influence the DNA ejection mechanism by affecting the DNA-protein interactions [3]. H may span the capsid through the hydrophilic channels formed by G proteins [1]. 21.30 21.30 36.80 61.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.93 0.70 -5.38 3 108 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 1 127 1 300.50 78 94.51 CHANGED MGKLFGGGQSAsSsGIQGsVLAoDNNsVGMs.DAGIKSAIQGSNVPNSsEAAPStlSGlMA.......cAGKuLL-GTlQAGostVusKLlDtVGLGGKSAuDKGKDTRDYLAAAFPELNPWERAGAGASSuGlpDAGFpNQKELTKMQLDNQKEIAKMQNETQKEIAGIQSATSRQNTKDoVYAQNEMLAYNQKESpARVASILENTsLTKQQQTSEIMRQMLTQAQTAGQYFTNDQIKELTRKVuAEIDtVH..............Q-TQNSRYGSSQVGATAKDVSNsITDAASGlVDaF+GhDptVADsWNNFFKDGKScGIuSNpR+ ............MSKLFGGGQpAASuGIQGsVLATDNNTVGMs.DAGIKSAIQGSNVPNscEAAPshlSGsMA.......cAGKuLL-GslQAGostVo-KLlDhVGLGGKSAADKGKDTRDYLAAAFPELNAWERAGAsASSAGMVDAGFENQKELTKMQLDNQKEIAEMQNETQKEIAGIQSATSRQNTKDQVYAQNEMLAYQQKESTARVASIMENTNLSKQQQVSEIMRQMLTQAQTAGQYFTNDQIKEhTRKVSAEVDLVH..............QQTQNQRYGSSpIGATAKDISNVVTDAASGVVDhF+GIDKAVADTWNNFWKDGKADGIGSNLSR.... 0 0 1 1 +3355 PF04726 Microvir_J Microvirus J protein Kerrison ND anon DOMO:DM04878; Family This small protein is involved in DNA packaging, interacting with DNA via its hydrophobic carboxyl terminus. In bacteriophage phi-X174, J is present in 60 copies, and forms an S-shaped polypeptide chain without any secondary structure. It is thought to interact with DNA through simple charge interactions [1]. 19.00 19.00 21.40 46.90 17.80 16.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.72 0.72 -6.88 0.72 -3.99 3 63 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 53 4 0 16 2 30.40 78 97.41 CHANGED pKutRRSG.............GKRKGARLWYVGGoQF ...pKut+RSG.............GK+KGARLWYVGGpQF 0 0 0 0 +3356 PF04517 Microvir_lysis Microvirus lysis protein (E), C terminus Kerrison ND anon DOMO:DM04358; Family E protein causes host cell lysis by inhibiting MraY, a peptidoglycan biosynthesis enzyme. This leads to cell wall failure at septation [1]. The N terminal transmembrane region matches the signal peptide model and must be omitted from the family. 25.00 25.00 50.50 49.90 17.30 15.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.82 0.72 -4.22 3 70 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 48 0 0 58 2297 42.00 67 46.41 CHANGED pPVSSWKALSLsKTLSMASSARLKPLNCSRoPCVaAQETKpL ..RPVSSWKALSLRKTLLMASSVRLKPLNCSRLPCVaA.EThph. 0 0 0 0 +3357 PF04478 Mid2 Mid2 like cell wall stress sensor Kerrison ND anon DOMO:DM04295; Family This family represents a region near the C terminus of Mid2, which contains a transmembrane region. The remainder of the protein sequence is serine-rich and of low complexity, and is therefore impossible to align accurately. Mid2 is thought to act as a mechanosensor of cell wall stress. The C-terminal cytoplasmic region of Mid2 is known to interact with Rom2, a guanine nucleotide exchange factor (GEF) for Rho1, which is part of the cell wall integrity signalling pathway []1. 23.10 23.10 23.10 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.05 0.71 -4.81 6 122 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 88 0 81 123 0 108.60 28 28.33 CHANGED sos.ssTI.TSlIcGpTILSstYTTlTYTPouTu..usss+ptpp.GLSKKN+NIVIGCVVGIGVPLllllLsLlYhFCIpssRTDFIsSDGKVlTAY+sN+hoKWWhsLLGKcl..o-cYpS-SPlGu.ssshps.tltpscDlhpsspshohctsss-up ..................................ss.....sh..o.oh...p.G..p..Th....h.s.sth..o...T...l....Th...o.sosos..........ss.s.sp.s...pso...G....L....Spps+.sIl.IGlVVG.lGs..sll.l.s.hlsll.ahhh..........................................................................................th........................................ 0 14 46 75 +3358 PF01187 MIF Macrophage migration inhibitory factor (MIF) Finn RD, Bateman A anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.10 0.71 -4.00 6 669 2012-10-01 20:38:22 2003-04-07 12:59:11 13 13 403 152 326 668 118 108.50 27 78.19 CHANGED PhhplcTNlPpspVPsuL.p+LostlApuhGKPtphluVclsPGttMsaGGSo-PCAlhSlpSIGsV.uucpN+SaoptlhchLscELplspDRlhIpFaDlpuuslGaNGoThu ...........................Phh.lpTN..l..s..h...s..p.h.s...p...s..lh..............p.c.h.sptl.uph.......h....GK....Pcsalh....lt.lp...sshshh..F...u..G.o.p.-.P.sAhs.pl..p..Sl.G.s..l...ssppN....cphottlsp.h.lppcLu..ls.t.s.R.halpahshpst..hGaNGssh.................................... 0 106 186 263 +3359 PF03775 MinC_C MinC; Septum formation inhibitor MinC, C-terminal domain Bateman A, Finn RD anon COG0850 Domain In Escherichia coli Swiss:P06138 assembles into a Z ring at midcell while assembly at polar sites is prevented by the min system. MinC Swiss:P18196 a component of this system, is an inhibitor of FtsZ assembly that is positioned within the cell by interaction with MinDE. MinC is an oligomer, probably a dimer [1]. The C terminal half of MinC is the most conserved and interacts with MinD. The N terminal half is thought interact with FtsZ. 29.70 29.70 30.10 30.20 29.30 29.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.20 0.72 -4.40 15 2118 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 2070 4 484 1401 159 105.10 38 45.31 CHANGED ThllcpsVRSGQpIYuc.GDLlllGsVssGAEllA-GNIHVYGsLRGRAhAGspGss.sApIaspplpuELluIuupahhs-c....lssphhscsspltlcs-tlhlpsL .........hllcpsVRSGQplaut...uDLllhusVssGAElIA.cGNIHVYGhlRG.RA.lAGss......Gsp..pApIhsppL.pAE.LluIAGhYhh.u-p.......lssp.....h.h.s..p..s..s.p...l.h.L.p..p.s.t.lhlp..h........................................... 0 152 303 397 +3360 PF05209 MinC_N Septum formation inhibitor MinC, N-terminal domain Finn RD anon COG0850 Domain In Escherichia coli Swiss:P06138 assembles into a Z ring at midcell while assembly at polar sites is prevented by the min system. MinC Swiss:P18196 a component of this system, is an inhibitor of FtsZ assembly that is positioned within the cell by interaction with MinDE. MinC is an oligomer, probably a dimer [1]. The C terminal half of MinC is the most conserved and interacts with MinD. The N terminal half is thought to interact with FtsZ. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.14 0.72 -4.24 8 1242 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 1231 5 255 716 30 102.50 34 43.57 CHANGED hppssl.-LKuophslslLpLcss...slsclhptLucKIspuPsFFpcsPlllslucl....-slshpALcpshpppGlpslGl.....thtppc...l..hph.G.LslLocucsp .........................hppssl.ELKGSo.FTLoVl+..L+cs...-..cs.lcptLp-...Kl.u...QAP.u...F.h.c.pAPVVls.lu..sL.........cs...s..l..s..a.sslpp...s...l...s...ssGL+llGVss....tstphcsp......h....tch...G.LPlLstupt..................................... 0 66 134 198 +3361 PF03776 MinE Septum formation topological specificity factor MinE Bateman A anon COG0851 Family The E. coli minicell locus was shown to code for three gene products (MinC, MinD, and MinE) whose coordinate action is required for proper placement of the division septum. The minE gene codes for a topological specificity factor that, in wild-type cells, prevents the division inhibitor from acting at internal division sites while permitting it to block septation at polar sites [1]. 25.00 25.00 29.00 25.50 23.00 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.37 0.72 -4.47 104 1852 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 1808 14 433 931 152 69.60 41 75.76 CHANGED SAslAKERLQlllAH-Rsthps...-....hLspL+cEIlpVIsKYVpl-p.-plplpl-.+s.psh......ssL-lNIslsc ......oAslAKERL.....QlIlA.c-Rsssts....sc.....YLspL+c-ILcVIsK..YV....p.IDs...-tlpVp.l-.+s....scl........olLElNlsLPc................... 0 129 267 356 +3362 PF00230 MIP Major intrinsic protein Finn RD, Delamarche C anon Prosite Family MIP (Major Intrinsic Protein) family proteins exhibit essentially two distinct types of channel properties: (1) specific water transport by the aquaporins, and (2) small neutral solutes transport, such as glycerol by the glycerol facilitators [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.68 0.70 -4.89 14 9641 2009-01-15 18:05:59 2003-04-07 12:59:11 15 26 3806 68 2727 7120 1558 211.20 28 85.40 CHANGED Ehpphsh..hRAslAEFluThlFlFhuhG..ouluhsttst.........................s...........sllslAlAaGhulhshV.ssup..ISGuHlNPAVThuhhlupplollRAlhYhlAQhLGAlsusulL+hhpsu....................pssthss.lusGhssspuhshEllhTFhLVhslaussDch+ssp.up....APLuIGhhVshstLsuhPhTGsuMNPARSFGPAllttp..............assHWlaWlGPhlGAhluullY ................................................................................h..........tt.hhuEhlGT.....h..lh..l...hh.GsG........ssh....h...th.t..h.s..................................t.........................shh.hls..h........u.a..G.....h.u.....lh.....h..h..l.a....s...sut.....l...S...G..u..Hl..N.....P...A..........VTl................uh.......h........l.......t.........u.............p...........h.........s............h.............t......c...s......h.......Y......l...l...A...Q.hl.GAh.....h.....u.......u.s.l.l.h.h.hh.h..s.h...............................................................t.t..hs..h...h.....s....p..h....s..s......s......t......h....s...h..h..p....u......hh........s.Eh..lhT...hh...Ll.h.s...l.....h.........u.......h........s........s.....p....t............p..s.....s.....s.....................................h..s.s.l..s.....l...........G...h.h..l....hh...l....t.....h....s.....h.......s....s....hT......G......h.u.h.NPAR.s...h...G...P.t.lh.t..t.h........................................th..t......h.W.....l.....h...l.uP.l..l....G.u.hluuhha..................................................... 0 763 1487 2164 +3363 PF03094 Mlo Mlo family Mifsud W anon Pfam-B_2483 (release 6.4) Family A family of plant integral membrane proteins, first discovered in barley. Mutants lacking wild-type Mlo proteins show broad spectrum resistance to the powdery mildew fungus, and dysregulated cell death control, with spontaneous cell death in response to developmental or abiotic stimuli. Thus wild-type Mlo proteins are thought to be inhibitors of cell death whose deficiency lowers the threshold required to trigger the cascade of events that result in plant cell death. Mlo proteins are localised in the plasma membrane and possess seven transmembrane regions; thus the Mlo family is the only major higher plant family to possess 7 transmembrane domains. It has been suggested that Mlo proteins function as G-protein coupled receptors in plants [1]; however the molecular and biological functions of Mlo proteins remain to be fully determined. 18.50 18.50 18.50 18.50 18.10 18.30 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.66 0.70 -6.38 10 512 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 76 0 283 513 6 337.20 33 80.51 CHANGED sstpoL-pTPTWAVAsVCsVllhISlllE+hLH+lG+ahK+++KKALaEALEKlKsELMLLGFISLLLTluQsh.IucICl.spclupsMhPCs........s.tcEcsshupph...........ssR+LLt..............shAssusspCucKG+VPLlShpuLHQLHIFIFVLAVFHVlYsllThsLG+hKI+sWK+WEcEstsh..-a-husDspRFRasH.......-TSFlRcHh.shWo+.oshhhWVtCFFRQFasSVsKoDYLTLRpGFIssHhss..ss+FNF+KYIpRSLEDDFKsVVGIS.hLWshsVLFLLlNlsGWcoYFWloFIPLlllLsVGTKLEtIIocLALEIpE++sVlcGsPVVpPSD-hFWFu+PcllLaLIHFlLFQNAFpIAaFFWIhasFGl+SCaHcphshllsRLllGVhlQlLCSYhTLPLYALVTQMGSpMK+ulF-EpsucAL+sW++ssKc+pchtt.......ppGusssssucssss......spuoSssplLppspspsss ............................................t.........sW.ls.hh.hhl.hshhh-h.lphhtp...........hh.pp.pp.p.h.tsl....+hpt.......ELMllGhlSLlLsh.hpt..l..plCl.s...t.h.......h.h.C.....................................................................................................................................C.......t......G......sh.hS.tulcpLH..hFlFhLAlh....HlhashhT..hhLuhh+.....h.pt.W+.WEtph.t............p.......................t........h.....h.p................p....pFhpt+h......t..h.p........hh........ah....hsFh+QFh.tSV.+sDYhshR.uFl.h......Hh...........pasFppYhhRuhEp-FchlV..Gl.........S........l..Whh.slh...hh...........hhs..hp.G.h............hhW.hsh......lPhhll.LhlGsKLphllhphu...........c..............ht...t.......t...t.....hh...........p............t...............hl....ps........pcphFWFt+PphlLhL.IphhLF..................Q.NuFphshFhW.h........h..p..hu..h...p....oChht.p..h...........h........h.+lh.hG.hhhQhlCSY.TLPLYAlVo.QMG.ophK..tlht.pphtpslhtWttt.s..+pp..t..............................................................tt....................................................... 1 53 194 244 +3364 PF03304 Mlp Lipoprotein_12; Mlp lipoprotein family Mifsud W, Bateman A anon Pfam-B_1663 (release 6.5) Family The Mlp (for Multicopy Lipoprotein) family of lipoproteins is found in Borrelia species [1]. This family were previously known as 2.9 lipoprotein genes [2]. These surface expressed genes may represent new candidate vaccinogens for Lyme disease [1]. Members of this family generally are downstream of four ORFs called A,B,C and D that are involved in hemolytic activity. 21.00 21.00 21.40 21.00 20.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.21 0.71 -4.21 13 220 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 33 0 23 188 4 128.90 36 65.43 CHANGED sQQTKSRpKRDLoQcEhsQQ.K.p.p..EEhLhppLs..................cspKo........ahDWLp........Ehhsuhuch.cFLEsc...cuKhKoh.saIKutlDspsup...............spsNcttsshpsslppaF+GsshDhF....uNptlhpCh ..................................pQsKSRtKRDLo...QcE........sQp.K.p.p..EEhlhppLs.......................cspKs..................ahDWL+............-hlsuhsch.pFLcpc..........cuKhKshhsaIKutlDpsss....................ppssp.tss.h.pp.lpthh.pGs..h.D.F....sspt..t................................................................................ 0 9 9 9 +3365 PF03562 MltA MltA specific insert domain Bateman A anon Pfam-B_1127 (release 7.0) Domain This beta barrel domain is found inserted in the MltA a murein degrading transglycosylase enzyme [1]. This domain may be involved in peptidoglycan binding. 25.00 25.00 28.10 27.70 21.90 20.60 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.85 0.71 -4.50 47 1797 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 1393 13 305 1164 129 152.30 46 39.97 CHANGED llcuStspsspapaPlYphPssh....................................................................tp.thhsa.sRtpIp......tsult.puhpluascs.lDhFhlplQGSGplphsD....Gphlp.luYAupNGaPYpuIG+h..............Ll-cG..................clstcphShpuI+pWhptp.Pp.clpclLppNsSaVFF+ ...............................................VlpuccpRpupapaPlYthPsc..........................................................................t.tht.h.o.Rsp.I.........uG.A..L.s...cu.hlLuYups.lDsFhMclQGSGhlch..s..D....G..p..h..l..p.huYAuKNGHsYpSIG+h..............LlD+G...................l...c....tp....p...hSMQu..I+pWhcp..p....Pp.cl....pE....LLppNPSaVFF+................................ 0 69 167 238 +3366 PF01642 MM_CoA_mutase Methylmalonyl-CoA mutase Bateman A, Griffiths-Jones SR anon Pfam-B_1611 (release 4.1) Family The enzyme methylmalonyl-CoA mutase is a member of a class of enzymes that uses coenzyme B12 (adenosylcobalamin) as a cofactor. The enzyme induces the formation of an adenosyl radical from the cofactor. This radical then initiates a free-radical rearrangement of its substrate, succinyl-CoA, to methylmalonyl-CoA [1]. 19.00 19.00 19.20 19.00 18.00 18.90 hmmbuild -o /dev/null HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.56 0.70 -6.36 11 3025 2009-01-15 18:05:59 2003-04-07 12:59:11 17 11 1507 35 1053 2608 2315 449.20 38 72.72 CHANGED apTsEGIslcPlYt..ccchp-hpt..htshPGhhPFsRGsYsTMYshpPWTIRQYAGFuTsc-oNthY+hslAsGQpGLSVAFDLATppGYDSDsPRlhG-VGpAGVAIDol.DMchLFcGIsLsphS....VSMThNGsllPlLuhYlssuEcQGVp.ppLsGTlQNDILKEahsRsTYIaPPcPShRlIuDIhtasuccMP+aNoISISGYHhpEAGAoAs.ElAaTLADGlpYl+sulsuGLslDsFAPRLSFFauIGhNaFMEIAKhRAARhLWA+llcp.Fu..sKssKShhLRsHuQTSGWSLTtQDPaNNllRTslEAhAAslGGTQSLHTNuFDEAL..uLP...T-FSARIARNTQlllpEEStls+VlDPhGGSYYlEpLTcclsccAhphIpcI-EhG................GMs+AlcsGlsphcIpEuAA+cQscIDpGcpslVGVNKYpl.cEttl-..lL..cl-sssVctcQlc+LpchRusRDsctlctAL-tlscsAt......tst...NLLsLulcAuRspsTlGEho-Ahccsasca .....................................................................................................................................................pt..ht.ha..........t..t.........t....t...h.....................h.sh..sG..PahRG..sh...s..s.h.h..h....sp...sW...s.....hRQaAGauoA.c.-.oN.tha..+.h.t..l.s..t..G...p.p.....G..L......S..s......A.FD..h.sTh.hG.hD.sD.p.s.p...l....h....G...-V....G.t...s.GV...ul.s.ol....DMc..hLFc....sI.sL..s..p..h..o....lS..M..Th.N...u.s...sh...s....l..l.A.hal.ss....u.........c..-..Q...............G.............s..s................................p......................p......................lpG..Tl..Q..s..D..I..LK................E....ahs+...sT.aI.a.....P.P..p..s......S.h..+.l..l.....u..D.lh.t.a.s.u.p.p..h..P..+a...s..o..ISI.SGYHhpEAGAssspElAaTLAsGhpYlcshl........p..t....G....l....s..l....D....p....F...A...s....+L..S.F..F.auhuhs.h.FhplAKh.RAARhLWuclhct.as.............sps......s.+..............u.h....h..LR.....hH..sQTSGhoLTtQ-PhsNllRoslpAhuAsh...G...s...spSLH.T.Nu.hDEA.l........u.LP.......o-hSsRIAhsTQllltpE.otlsc..shDPhuGSaalEpLTppltcpAhthhpcl-p.h...G.................................GhstAl...csGh.phpIp-uutcpptpl-p..Gc....p....sllGVN.ca...................t...p.....-.....p.........h....-...............l.l...........pl..-.........s.............s.....h............p......t..p.......Q..l.....t..........pLp..p.h..+.....t.....p...R.....c.....p.....t.....t.............sp.............t...sLt....tl....p..ps....At.................................tt........Nlh.thhhpAs+s..h.solGEhstshptshst.................................................................................. 0 382 755 937 +3367 PF03972 MmgE_PrpD MmgE/PrpD family Bateman A anon COG2079 Family This family includes 2-methylcitrate dehydratase EC:4.2.1.79 (PrpD) that is required for propionate catabolism. It catalyses the third step of the 2-methylcitric acid cycle. 28.20 28.20 28.70 28.80 26.30 28.10 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.25 0.70 -6.31 16 2262 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 1523 6 768 1930 3171 437.30 28 94.04 CHANGED ppshcclAcaltshphosh....psh-ps+htllDslGsuhsuhpp.sspthhs.hs.sshsps.ssplhsTptphssstAAhsNGshs+hLDasDsahuAchttsSsslPulLAsA-tluts...........h..c-lLpAhlhuYEItsplAtptuhpchs...hspshalslAuAAusucllsL-p-plhpAluhAhspusulRphcputhsup+t..usu.AstsGlhuAhhAtpGhsG..Psslh-sphGFhcshhss.shphp.................atlpsshhK..aPsphHupoAs-AAhpL+cchp.....hpcIcplplcTapsAh+llscss.......sPpshcsscaSl.YhlAlsLlhGphslppacsch.hpDscl.sLtc+lphp.sscasptY....Pppps....splhhccsssht-ttl-hPhGc.hcpshuhsplh-KFcplhtthhs..pppp.hhhttshcptpltshhss ...............................................................t...h.plsc.al.hshp.hss.h......shcpA+htllDsl..Gsul..t.u.h...p..h..s.......s.s.p.phhs....h......s................u.....t.....s.......s......s..............s.......u............sp.V....G..........s..........s...........h.....p.l.....sP..s...tAAas.Gshh+hLD.......a......sD.T..........a.h....u...A...c........ht.t..su.........s..........sls......u....lLA..sA.-h.l.ups...................h...+clLpAhltuaE..lpsp..l.....u.........h....t.........s......u..hs..+hs.............hs.p....s.h..h.s...p.....l.....u.....usA.ssuch......LG......Ls..p-p......l.hsAluh....A..h.s..p...u...t...u.....L......+...p...h...p...c..us....s.sup+t..hssGpAsppulph.......Ahh.A.p.p.G...h.G.....P..s..slh.t...s.t......h...G....Fh.cs...h.h...p...s...p...s.h.cht...pshu...................................pas.h....p..s..l..h.a..Kh.aP........upaaupsAl-AA.h...pL..tpp..httt............ss-I.cclsl...c......T...pc..s.sh.cl...l..s....c.ts............s.ssst.-..t.ca.sltYhlA..lsL.l...h...G.c...l..s..h...........s.c.apc...........sh...h.....p...c....s........clps.LtpKlps.h.p.D...s..p..hots.Y..........Ppc.psh................ltl.th.p....c..G.s.....p.h...p..hhlch..s.hG....p...hccs......uhsplh.pKFpt.ths...th.hs....ptp...hhh....t..................................................................................... 0 185 425 615 +3368 PF02406 MmoB_DmpM MmoB/DmpM family Bashton M, Bateman A anon Pfam-B_1148 (release 5.2) Domain This family consists of monooxygenase components such as MmoB methane monooxygenase (EC:1.14.13.25) regulatory protein B. When MmoB is present at low concentration it converts methane monooxygenase from an oxidase to a hydroxylase and stabilises intermediates required for the activation of dioxygen [1]. Also found in this family is DmpM or Phenol hydroxylase (EC:1.14.13.7) protein component P2, this protein lacks redox co-factors and is required for optimal turnover of Phenol hydroxylase [3]. 25.00 25.00 25.90 43.80 24.10 19.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.48 0.72 -4.00 40 224 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 168 27 81 207 3 87.60 31 82.55 CHANGED sspVslslp..ss--scsllEAlttD......NPs..stVp-psuhl+IcucGcLplc+polpEtL.GRsaclp.-lplslsohuG+lpps.D-phslpa ..............stVslslp..ss-pucsllEultp-......NPs...ssVpcpsuhl+I-up.ucLhlctpslpEtL.G+s...achp.plclshsohsG+lspp.DDphhlh........... 0 29 57 73 +3369 PF03176 MMPL MMPL family Mifsud W anon Pfam-B_357 (release 6.5) Family Members of this family are putative integral membrane proteins from bacteria. Several of the members are mycobacterial proteins. Many of the proteins contain two copies of this aligned region. The function of these proteins is not known, although it has been suggested that they may be involved in lipid transport [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.92 0.70 -5.73 34 10072 2012-10-02 18:57:54 2003-04-07 12:59:11 10 47 2346 0 3112 23929 8914 311.20 16 68.96 CHANGED shsspsusuhtshcphsphF.p..ssssshhhlhh.tsstsLss.sspsshpphlsplppDsstlspl.D.......................hs.......................ss.....stthhhSsDG+Ashh.lshpGs.usstu.pslsslcshscpss...s.pGhpshlsGsAuhhtDhpcusstshtllthsslsllhllLlllaRSllssllhlhTVslSlssuhGl.lhlhtthhslslsshslsl.hshlhlAsGoDYslhLluRa+EthttG.cptt....uhhpAhtuoGpVlsuuGLslAhshhu...LshucLshhsp.lGsslulGlllssLsshTlh.PAlhsl...........hGRh...............s.t....................thW..phuthls+tPtsh ............................................................................................................................................................................................................................................................................................t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..s.............-.......t.....p.....h....h.............h..h..........l...........h............p...............t..............t......s.......t.....t.......s........p.......t.......h..........p.........t....l.....p....p.....h...h.p.ph...........................ts.h.....p..s...h...l....s....G.........s..s...h...t.....t....-......h..t...p.........t...h...p.......p.......s...h..h....h.h......h......h..l...s...l.s.l......l...h....l...l....L..h......l..s......a.....R....S......l....h.....s.......s.....l....l....s....l......l...s.....s.........h......l....u.....l.........h..........s.........s........h.........Gl.......h.....h.........h..........h............t.........h.........h.............h..........h...........h...............s...............l..........s.........s..........h.........s...................s.........l......s........h....s......l....h..l....u....l.....u....h......D..Y..sl......a....l.....l.....s........R....h.........+...E.......p..............h.....t........p..........s.......t..........s......t..........p........................A....l...h.....p......u....h....t......s......s....G......p....s....l....h........h...u........u....l....s...h....s....s.u....h...h..u...............h......h.......h............s...............s...........h.........s......h.......l..............p...p......hG..h...........s.h...u....l.......u.l....l....h....s..h....h....s.s..h.s....ll...P....A....l..h.t..l..................................h..uph...............................................................................................................................................hh........................................................................................................................... 2 1002 2163 2770 +3370 PF01926 MMR_HSR1 50S ribosome-binding GTPase Enright A, Ouzounis C, Bateman A anon Enright A Family The full-length GTPase protein is required for the complete activity of the protein of interacting with the 50S ribosome and binding of both adenine and guanine nucleotides, with a preference for guanine nucleotide. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.14 0.71 -4.13 703 46032 2012-10-05 12:31:08 2003-04-07 12:59:11 18 181 5181 76 12638 71407 21684 130.90 24 37.52 CHANGED plullGhPNsGKSoLlN..tL...............s..............t...t.....p..........hsh..................................lush..s...us..Tpch.hh.......thh.h..........................................tt.................h....h.llDosG..................................lh............pt...tp.h.........................t.........hhpt........h..hptl.pps...........-l...llh.......ll.......D.sp................h.......t................t...pl......................h.....................ppl............................................................tt.shlllhNK .....................................................................lullG..h.PNVGKST..LlN..tl..........................................s.......................s....p.c..............hsh............................................................................................................................................s..u.....s..h.......s.......t..s....T...pc.......h..................th..h.ph.................................................................................................................................ss.t.p....................h...........h.llD..T..s....G.............................................................lh................cstt.ps.t...................................t....hhp..p........................h....hp....t...l....pps.................................cl...........llh........................ll.....-...hpp...............t.............................tt.....l.....h.....t.h...........................................ths.h.hhhhsK.......................................................................................................................................................................................................................................................... 0 4467 8137 10741 +3371 PF01054 MMTV_SAg Mouse mammary tumour virus superantigen Finn RD, Bateman A anon Pfam-B_518 (release 3.0) Family The mouse mammary tumour virus (MMTV) is a milk-transmitted type B retrovirus. The superantigen (SAg) is encoded by the long terminal repeat. The SAgs are also called PR73. 21.60 21.60 27.40 27.40 21.40 19.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -11.90 0.70 -5.53 5 88 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 18 0 0 93 0 231.20 73 95.85 CHANGED MPRLQQKWLNSRECPTLRGEAAKGLFPTKDDPSAHTRMSPSDKDILILCCKLGIALLCLGLLGEVAVRARRALTLDSFNSSSVQDYNLNNSENSTFLLGQGPQPTSSYKPHRlCPSEIEIRMLAKNYIFTNKTNPIGRLLITMLRNESLPFSTIFTQIQRLEMGIENRKRRSTSVEEQVQGLRASGLEVKRGKRSALVKIGDRWWQPGTYRGPYIYRPTDAPLPYTGRYDLNFDRWVTVNGYKVLYRSLPFRERLARARPPWCVLTQEEKDDMKQQVHDYIYLGTGls.lWt.hFaYT+EGAlA+lLEshKAssh ................................................................GLFPTpDDPSAppRMSPSDKDIhILCCKLGIALLCLGLLGEVAVRARRALTlDShN.sSSVQDYNLNsSENSTFLLtQGPQPTSSYKPHRhsPSEIEIRMLAKNYIFTNcTNPIGRLLlhMLRNESLsFSTIFTQIQ+LEMGIENRKRRS.TuVcEQVQtL.AoGLEVKcGK+SshVKIGDRWWQP..GTYR...GPYIYRPTDAPLPYTGRYDLNFDRWVTVNGYKVLYRSLPFRERLARARPPWCVLoQEEKDDMKQQVHDYIYLGTGM..ahthFa.o+EtAht+ll-phpt............................... 0 0 0 0 +3372 PF05067 Mn_catalase Manganese containing catalase Bateman A anon COG3546 Family Catalases are important antioxidant metalloenzymes that catalyse disproportionation of hydrogen peroxide, forming dioxygen and water. Two families of catalases are known, one having a heme cofactor, and this family that is a structurally distinct family containing non-heme manganese [2]. 20.80 20.80 20.80 20.80 20.20 20.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.71 0.70 -5.65 6 1280 2012-10-01 21:25:29 2003-04-07 12:59:11 7 5 890 24 300 1030 18 231.50 36 96.59 CHANGED MF.hHsKcLQY.sKPs+PDPshAKtLQElLGGpFGElSsAhpYLaQGaNpRscsKh+...DLLhDluTEEluHVEMlATMIu+Ll-sAssctpEcAsc.s.hhtulhuGhNspH..uIloGhGuhstsSsGsPWoAsYIluSGNLlADhRtNlsAEupuRlphtRLa-MTDDPGl+DMLoFLlsR-thHQ.pahtAlcpLctp-.s.hlPss........as+thEcQEhu+phhNhScG-.sopupWhpGcu.-thtthphlhtshshuthPcL...............+sAP.h.+sT.......lsspthh .........................Ma.hap.K.c.L..p.a..s.V.+.l.spPsPthA..phL.EQhG.GspGELuAAhpYhsQu.hsh..p...s.t...t..t......+...-lLhD...IuTEEluHlEhluohlthL..hcs..u...ss.p........h.........c....s....s.......t............................................................................................sp...H...........hl.......tutush.ssusGsP...WoAsYlp..s..pG-.huDLhpNlAAEt+A+hhYppLhph.o.DD.ssl+-sLpFLhsREhsHpppFtcALpplpsph...............................................................................................................................................................................................ttt......................................................................................... 1 112 214 248 +3373 PF03962 Mnd1 Mnd1 family Wood V, Bateman A anon Wood V Family This family of proteins includes MND1 from S. cerevisiae. The mnd1 protein forms a complex with hop2 to promote homologous chromosome pairing and meiotic double-strand break repair [1]. 29.90 29.90 29.90 30.10 29.80 29.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.63 36 274 2012-10-04 14:01:12 2003-04-07 12:59:11 10 2 237 0 195 289 2 181.60 31 82.42 CHANGED lhchFhpopshasLK-LEKhsPK.p.GIsshpVK-llQsLlD.-slVpsEKIGouNaYWsFPupshp..........................ptcsphpcLppclpchcpchpplppplppt.ctsRcps.............p-Rpp.lhpclppLpcchcpLpsclp..phppsDPptlcch+ppsphhtpusspWTDNIas..lpsahpp..phshpppplc...pphuIs.p-h-Y ..........................................hhphFhpopshapLK-LEKhsPK.p.GIs.u.hsVK-llQuLVD...D...shVcsEKIGooNaYWuFPucthp....................................................................tpcpphcpLppplpchppchtpLppplppt.....ct....sR.p-o................cERpp..hhpclppLcp...chppLc...s-lp.......phpc..s......D.P.p.hlcphcp.thpht+cuss+W.TDNIas..l.pp..ahpp..p..h.shppptlc...cthtls.t-hc................................................. 0 82 114 162 +3374 PF04039 MnhB Domain related to MnhB subunit of Na+/H+ antiporter Kerrison ND, Finn RD anon COG2111 Family Possible subunit of Na+/H+ antiporter [1], [2]. Predicted integral membrane protein, usually four transmembrane regions in this domain. Often found in bacterial NADH dehydrogenase subunit. 21.80 21.80 22.10 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.69 0.71 -4.06 25 1811 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 1346 0 552 1386 262 127.40 30 27.60 CHANGED shIlcsss+hlsshllhauhhlhltG..H.sPGGGF.uGlhhAsuhlLhhluhuhp....phhphphttlhslG..lhhshlssls.hhhGhshhs.......h.hshhu...........pshlhsshhhslGlhlssstsshhhh ...............hllpsss+llhsllllhuhalhhpG..HssPGGGFluGLlhusA...hlLhhlA.h.shc..h............h...............pp........h...h.................h......s...............ht.h.lh.ulG.......ll..h......ush.o...ul.s.u.hh...h.G..t....s.FLop.............hhhplPhlu....................phcl.soshhFDlGVhlsVlGsshhh................................................................................... 0 158 340 458 +3375 PF01899 MNHE DUF68; Na+/H+ ion antiporter subunit Enright A, Ouzounis C, Bateman A, Cerutti L anon Enright A Family Subunit of a Na+/H+ Prokaryotic antiporter complex ([1],[2]). 21.80 21.80 21.80 21.90 21.60 21.50 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.97 0.71 -4.59 164 1796 2009-09-11 05:06:25 2003-04-07 12:59:11 11 2 1345 0 544 1267 256 146.80 25 89.96 CHANGED hslhLhllWlhLs........s...........sp.........llhGhllullls.hhh.....pt.h.h...h.....................h........tllth..lhhh.lh-llhuslpVAthl.Lp.spht....p.Puhlplslclcsch.slslLAshIoLTPGTlsl-l...........sp-p.....ph..LhlHslc......hsst.tt....hhppl..ppphEchlh............cl.a ..........................slhlhhhWlhls.........s...p.hs..hss.........hlhGh.llulhlh....hhh.......pt.h.hst..p................................hhhphh.....thlph.lh.hhlh-l....lpuslp.Vsph.l.......lp..sphp...hc..P......u.hlt.h...h.c.lc.s-h.slslL.ushI.TLTPGTlslcl.................................sp..-..p.....ph.lhlHsl-...hsst...cp............htpl...ppphEchlhcl....................................... 0 161 337 456 +3376 PF03404 Mo-co_dimer Mo-co oxidoreductase dimerisation domain Bateman A anon Bateman A Domain This domain is found in molybdopterin cofactor (Mo-co) oxidoreductases. It is involved in dimer formation, and has an Ig-fold structure [1]. 20.90 20.90 20.90 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.91 0.71 -4.52 25 2673 2012-10-03 16:25:20 2003-04-07 12:59:11 11 47 1355 31 742 2393 488 104.90 33 28.91 CHANGED cauIh-Lsl..NSsIspPpHsEhlslss.......tsYsl+GYAYuGGGR+IsRVEVoLDcGcoWpLAslcatEc..+.....tc.............tpaCWsaWsL-lsls-...Lhssc-IhlRAhD-uhslQPc..chhWslhGMMNNsWaRVsIphp ............................................phsh..pShls.P.t..t..t.p..l..hs.....................t.hplpGh....Aa..sGs..u..t.lt+V-...lo...hD....t....Gt.sWp.u...p...l..t.....t.............................................asWphW..php..l...p.l.s...................ust.....c...l..hsRAhDcshssQPc....p.hWNhh......s.............t........................................................ 0 208 443 625 +3378 PF01967 MoaC MoaC family Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family are involved in molybdenum cofactor biosynthesis. However their molecular function is not known. 20.70 20.70 24.50 24.30 20.40 19.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.79 0.71 -4.50 19 3524 2009-01-15 18:05:59 2003-04-07 12:59:11 16 27 3330 47 977 2349 1242 135.10 51 71.35 CHANGED MVDlopKssopRpAhApuhlphpscslchIppsplt..KGDVlusApIAGIhAAK+Tu-LIPLCHP.LsLouVcV-........hchp.ct..lclpusV+spG+TGVEMEALTuVSVAsLTlYDMsKAl-+s.......hhIpsl+LlEKoGG ...........................MVDVSsKstThRpAhApuhlph.p.s.c.s.l.p.h..I.........t...s............u.....p...hp...................KG.DVlusARIAGIhAAK+Ts-LIP.L.CHP.LhLopV-Vs...................................hp..h..p....s....p...p.......t....lcIpups..+ss..G.+.T..GVEMEALTAsSV................AALTIYDMsKA.V....-..Ks.................MhIssl+LlpKoGG..................... 0 325 623 829 +3379 PF02391 MoaE MoeA; MoeE; MoaE protein Bashton M, Bateman A anon Pfam-B_1056 (release 5.2) Family This family contains the MoaE protein that is involved in biosynthesis of molybdopterin [1]. Molybdopterin, the universal component of the pterin molybdenum cofactors, contains a dithiolene group serving to bind Mo. Addition of the dithiolene sulfurs to a molybdopterin precursor requires the activity of the converting factor. Converting factor contains the MoaE and MoaD proteins. 21.40 21.40 21.70 21.50 21.30 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.42 0.71 -4.29 18 3376 2009-09-10 22:57:28 2003-04-07 12:59:11 12 13 3008 21 906 2269 1201 115.20 35 72.11 CHANGED cltlsspP..lsssthhphlsssps..GAIVsFsGhVR-hstG+.pVppLpYEuYs.MAtcpLppIspEsc.........p+asshc.ltlhHRlGhLplGEssllluVuusHRp-AacAscahlDplKpcl ........................................................t...lt.ps..hs..h..s.p.....h.p.h..l..t..p...ppp......G.A.llsF....sG..pV.R..s.t.........s............t.....G............c......pl...ps......L......p.....h..E.t.Ys.s.M..s..c+t..LtcI..sp-sp............................p..+...W..s...l.tp....lsl.hH.RlG.pLtsG-thVhluVoosHRpsA.FcAspalhDhlKsp.s............................. 0 273 561 756 +3380 PF01076 Mob_Pre Plasmid recombination enzyme Finn RD, Bateman A anon Pfam-B_717 (release 3.0) Family With some plasmids, recombination can occur in a site specific manner that is independent of RecA. In such cases, the recombination event requires another protein called Pre. Pre is a plasmid recombination enzyme. This protein is also known as Mob (conjugative mobilisation). 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.25 0.71 -4.67 18 1003 2012-10-02 18:54:05 2003-04-07 12:59:11 14 11 568 0 72 798 47 164.00 24 46.75 CHANGED Mu.aulhRhpKhK.ssslsGhppHspRpppsp...pNcDIDh-+othNh-Lhss.pshsappcIcphlpcph.tp.RtlRpDAVlhsEhllTuss-Fa.csho.E-p+caF-puhcahpccYGc.pNllhAslHhDEs...........TPHMHhG.......lVPhs-ct.....+LSAKclhst+cpLpphQs....chschhpppGapLcRGptt...oc+KHhssspYKp ......................................................................h...Khp........tsht.ht.H....pR.....pp.............sl..pctt.N...hc.h..h.....p............t.....p......h..p............p..t..l.p.phlpp....h.....................pth+.pcs.h.hh..........chll.o...s..s..c....hh.......p........t..........h........s..................p..c..............t.........cpah....p...p........shp...hh....p.c...+..h.....Gp..pNlh.AslHhDEp....................TPHhHhs.......................hVPh..spst.........................+ls.u+plhs......c......p.....p......Lt......thps...................ph.phht..p....h..lp.R.G.......ott..cp...h..ah............................................................................................ 0 20 46 62 +3381 PF03389 MobA_MobL MobA/MobL family Mifsud W anon Pfam-B_3424 (release 6.6) Family This family includes of the MobA protein from the E. coli plasmid RSF1010, and the MobL protein from the Thiobacillus ferrooxidans plasmid PTF1. These sequences are mobilisation proteins, which are essential for specific plasmid transfer. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.53 0.70 -4.93 9 1275 2012-10-02 18:54:05 2003-04-07 12:59:11 10 14 701 1 159 1186 123 200.30 31 35.78 CHANGED RSAssuAAYppss+h..E+ps+shDYsp+p.lhapEhlL.PspuPp.h....sDR......pshWNcVEshE+RssupLs+-lplALPhELTs-pphtLlc-FlcchhsscGMlADhslHt.........................Dss.tNPHsHlMhThR.lstDGhhtpK.....hhlstsGp.h...........hsttGc.......lhhp.W.sspshhsphRcsWt-phNptLpttGlshRIDtRSacpQGI-hhPTlHlGssu+thE++ ..................................................................SslusuAY..huspplhs........-....h...u...........hh..c.as..p+p....lhh.pphhL..Pt..p...sP..t...h................t-R..........................ppL..WNt.....V.....E..t.h...E...+.........p....psu..pl..ARch.lAL.P..p..E..L.......s.pp.p...........hpLlpcaspc.p...h..s..s..p..GMhsDh.AlHp........................................................sss....t....NP..........HsHlhhohRs...l..s....cGh.h.ttc.............h..tptst.....................................................................ttt.............................psW....sspp...thp.taRcpWushsNphLtpt.s..h.......pt.....R.........lDcRSh.......pp.Q..............u.......h....c........PphH.G.tsttht...................................................................................................................... 0 30 90 120 +3382 PF03205 MobB Molybdopterin guanine dinucleotide synthesis protein B Mifsud W anon Pfam-B_2446 (release 6.5) Family This protein contains a P-loop. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.92 0.71 -4.47 28 2671 2012-10-05 12:31:08 2003-04-07 12:59:11 9 40 2297 9 887 3759 1045 131.50 27 46.35 CHANGED shlhlVGspDoGKoTLhctLlshhhppGh+sshhhchch.Gp.slshPGs..............luhs.hcc...h.h.pth..ppphs...............llhh.Gaps.sspsc...lslsp...phsphhppph..........................pt.shllso ...........llsls.G..h.s.soGKTTLlcpL.l...s..t......L.......p.........t...c.........G....h.....+....l...u...h....l..KH.sc.H......p.....h...-...........l..D...h.......s..Gp....Do..h....ch...tpA.G...A...t.t..s.l.l.s..upp...............ht.h..h....p.p.....h..............p...p...p.s.....L..t....lhtths..................hD..llLl..E..GaKp.t...shsK.....l.lh.R......t...........................................hhh.hhh.t.............................................................................................................. 0 285 523 732 +3383 PF04698 Rab_eff_C MOBP; MOBP_C-Myrip; Rab effector MyRIP/melanophilin C-terminus Waterfield DO, Finn RD, Eberhardt R anon Pfam-B_4174 (release 7.5) Domain This domain is found at the C-terminus of the Rab effector proteins MyRIP and melanophilin. 20.80 20.80 22.10 20.80 20.20 20.70 hmmbuild -o /dev/null HMM SEED 714 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -13.27 0.70 -6.42 7 193 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 47 0 70 165 0 315.90 37 56.40 CHANGED ChDlltGuhhEsslENEGSIsGSDSTFYRQSE.GHSMMDTLAVALRVAEEAlEEAISKAEuau.DSLDKQNEAsYLR-HKEELhEELATTIlQKIIRKQKsKoEQt..Et-s-WPpsp....ssSspsuDpuhhshPGucRusssLWRSQSAFSlsuEDs......htoussEuhh+p.+spsp+.+-c..SALPSWKSVDpLs-oshsPVLpSsDGNWVALpsso.hPPs..RhLAKPKSpsFpALEstSpVsSAYDEhGS-SEEDaDWu.ALscLp.pspth......tps..pst.s.us-ph..ssSPSsuh.sNsEshh.SDSETSShsSSpEu+.tpu+..WlpR+sspN.sptEKh+lpGEL..DVNFNPQusuhEhSDSSEsEEs.ash-++uRRW+Rs+stsEE.s.t.spscuph+sLpTpps.t...DLSETDlSsEspcp+o.sDshEEKL+oRLaELAhKMS-KETSSGE-QESEsRTEs-NQKpuLSSE-supsVQEELKKKYSAVSLCNISTEVLKVINATEELIAESoGPW-hPssstD+tcGoFPlGTD.lRLDEQLToLEENVYLsAGTVYGLEGQLoELEDAARpIpSsTsEoELA-LEDQVATAAAQVHHAELQISDIESRISALTlAGLNlAPCV+LTR+R-QKQpsQVQTIDTSRQQRRKLPAPPV..KuEchEuSsVTslKTFNRNFlLQGShTpRsK.ERKSosKDLMEPsltSAlMY ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................st.Ts-pELu-LEDpVAssAupVppuE.plSDIESRIuALphAGLslts..sc.p+..+pp.................................................................................................................................................................................................................... 1 2 8 33 +3384 PF05161 MOFRL MOFRL family Bateman A anon Guo J Family MOFRL(multi-organism fragment with rich Leucine) family exists in bacteria and eukaryotes. The function of this domain is not clear, although it exists in some putative enzymes such as reductases and kinases. 20.20 20.20 20.20 21.40 19.80 19.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.38 0.72 -3.92 14 869 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 746 3 399 837 348 107.80 45 25.09 CHANGED PssLlsGGETTVslpG..p.G+GGRNpEhsLuhshtlc....st.sh...hhLuusTDGIDG....soDAAGullsssshtp.hpstGlD..tphLpssDSYshapthus...LlhTG.TGTNVND ..............................PsslLuGGETTVT.l..p..G................p....G.......+GGRNpEhh..LuhA....hslp.......................Gh.s...s.l...........hsLuusTDGlDG.........spDsAGAh.ssssols+...hp..t...tG...........l....c.....stshLssNDuashFpsl.us............Llh.T.GPTtTNVND................................ 0 127 238 325 +3385 PF04603 Mog1 Ran-interacting Mog1 protein Waterfield DI, Finn RD anon Pfam-B_4771 (release 7.5) Domain Segregation of nuclear and cytoplasmic processes facilitates regulation of many eukaryotic cellular functions such as gene expression and cell cycle progression. Trafficking through the nuclear pore requires a number of highly conserved soluble factors that escort macromolecular substrates into and out of the nucleus. The Mog1 protein has been shown to interact with RanGTP which stimulates guanine nucleotide release, suggesting Mog1 regulates the nuclear transport functions of Ran. The human homologue of Mog1 is thought to be alternatively spliced [1,2,3,4]. 19.80 19.80 20.80 21.90 18.00 19.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.86 0.71 -4.24 3 266 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 230 2 185 261 3 130.10 31 62.57 CHANGED ssPLFGGALSAILPPthlDVS-LRpIPDNQEVFsHsSTDQ.SlIVELLEhQE+VpDuuAARYHFEDVAusNDAcu.scVhSVcPLuL-sL.uLRu-CssAWlLoGpQpV...AKcNpEuAssVsIHhALlRLPQaQTDLLlTFNsPs .................................p.h.LaGGAls.s.slPt....sahDlS....sl.RpVPDsQEVFs............s...s............s...........s.............c...........p...S..........lIlEl...................LEhh......s...................p.......s..........ps..t....p..A....hpaahpD...lu.t.p...s......s..ts..........hpl.....p....t................h..............t.p.h............t.............................................t................s........h..h.h...u.tp..p.h......................s+....t...p...p...........t..p......l.hl.hh..s..llR.L..p.....hpT.......Dlllohs.P.h........................................................................................................................................ 2 59 103 150 +3386 PF04879 Molybdop_Fe4S4 Molybdopterin oxidoreductase Fe4S4 domain Bateman A anon Bateman A Domain This domain is found in formate dehydrogenase H for which the structure is known. The first domain (residues 1 to 60, 448 to 476, and 499 to 540), comprising two small antiparallel sheets and four helices, coordinates the Fe4S4 cluster just below the protein surface [1]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.96 0.72 -4.15 65 10650 2009-01-15 18:05:59 2003-04-07 12:59:11 11 141 2936 51 2519 7754 1735 56.60 28 7.53 CHANGED hchsposCs.aCGsGCulplts.......pssclh............plpustppPsNp.....GplCsKGttshph.lts ................phshosCs..hC..u.....s..G..Cu.lphts.................cssc..lh.........................pl..cu...c..s...s....s..s....s..sp............................GthCsKG.tthhchl.................................... 0 756 1595 2106 +3387 PF00384 Molybdopterin molybdopterin; Molybdopterin oxidoreductase Finn RD, Griffiths-Jones SR, Bateman A anon Prosite & Pfam-B_2803 (Release 7.5) Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.38 0.70 -5.74 32 25621 2009-09-12 07:55:41 2003-04-07 12:59:11 17 170 4023 138 4565 20492 6386 349.30 16 53.56 CHANGED RLppPhlR.....................................p.pGcahtloWc-ALshlsptlpp..tphsp....h...sssushsssEshhshpchhpthsut.hth.sthsch.............sss.ptshhhssslt...............sl-suDh..llLhGsNstpptsll......ss+hhpthhpst.....hclsslusphs......hshshcaluh..ssso.hsl.hshtpshhpphpt..sc.....................................................................................................................................................................................................................pshlllGt..Ghhpp.....tcGsshhttltslsthhs.ht....sasshs........................................................................................hlpstAspsush....................................................clsh.ss.thh...t..t....phhalhG...........................sDtsph..t......ph....chhllhpsph.spsAphADllLPussasE....+puhasNsEGpsQt....tppsl.ssG-A+pDWcIl+sLuch .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................Rlt.Ph..hp.....................................................t....s...t....h....h.sWppAhp.h......ttt......h.........t..........................................t...............................s...................t........h..................................h............................h.........................................t............h............h...t...................h......h..t.....h..h.h......t................................h........t........s..p..........t.............................................................s..........................................p.h....t.....p.s..ph.....h.....hhh.G....t.......N......h..t...s..t..............................h..h..h.......t.hh.tt............................hphlsl.s...s.hhpt............................ht....t.s.........a....h..........p.s.s.Dh.sh...h........u..h......h............h.h..........h..........p..........p.................h.................st..........................................................................h.........t.............h...................h...........................................................................................................................................................................................................................................................ht.......t..t..h...........t........h........t....h....s........t............h....................t...........................t...............................................h.........h.........s.h..............u..pp..............................u.........................h......h...h....h.......hG..p...h........s..........G...s.......s.....................................................................................................................................................................h....t.......t.............h..h..............................h..............................................................................................................................................................t..........................................................h.......h..............................................................................h..t...h...........h......h.h.t.................................................................s...............t.............p....t.h.h........t.h.t.........ph........c...h.h...l.s....-........h.......h...s......t............s.............s..................h............u........D...............l..........l............L.P.s.s.....hE...................p..........t............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 1319 2784 3767 +3388 PF01568 Molydop_binding Molydopterin dinucleotide binding domain Bashton M, Bateman A anon Pfam-B_129 (release 4.0) Domain This domain is found in various molybdopterin - containing oxidoreductases and tungsten formylmethanofuran dehydrogenase subunit d (FwdD) and molybdenum formylmethanofuran dehydrogenase subunit (FmdD); where the domain constitutes almost the entire subunit. The formylmethanofuran dehydrogenase catalyses the first step in methane formation from CO2 in methanogenic archaea and has a molybdopterin dinucleotide cofactor [1]. This domain corresponds to the C-terminal domain IV in dimethyl sulfoxide (DMSO)reductase which interacts with the 2-amino pyrimidone ring of both molybdopterin guanine dinucleotide molecules [2]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.25 0.72 -4.24 87 16734 2012-10-02 17:45:13 2003-04-07 12:59:11 16 106 3319 124 3570 12374 2211 112.40 22 13.29 CHANGED lhL.hosR.shtphpoth.tstps.thtphts......hl.lsPpDApphG.lpcGDhVclpoppG....phhstA..tlocp.lt.Gslhhshtaht.................sssNhLT..ssthcPhuttPth.+ss.s .......................................h.l.hos.+..ht.p.h..H..oth..t......s.hp.h..h.p.h.tp.hts.................hlhls.....Pp..DApp..h..G.........lp.....s...........G...D...h....V.............cl....h.....s.......p..p.....G...............pl..t..s.tA........hl..o.p......c.....l........h........s............G........s........l........h........h.....hshhh...................................tsthNsLT...t.thsshsthst.p............................................................................... 0 995 2156 2944 +3390 PF04744 Monooxygenase_B Monooxygenase subunit B protein Waterfield DI, Finn RD anon Pfam-B_6020 (release 7.5) Family Family of membrane associated monooxygenases (EC 1.13.12.-) which utilise O(2) to oxidise their substrate. Family members include both ammonia and methane monooxygenases involved in the oxidation of their respective substrates. These enzymes are multi-subunit complexes. This family represents the B subunit of the enzyme; the A subunit is thought to contain the active site. [1,2]. 22.70 22.70 23.10 24.50 19.20 22.60 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.30 0.70 -5.76 5 143 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 76 12 16 144 19 231.40 40 82.99 CHANGED GEKSQpAFLRMRTIpWYDlKWSK-olKVNEossIoGKFHVFEsWPcAVupPcsSFLNVGpPGPVFVRLooaINGphsPRSlsLEIG+DY-FEVsLKARRPGcWHVHTMlNVcGGGPIIGPGpWIsITGSMuDFcNPVTLLTGpTVDLETaNhuNsIFWHlhWhulGlAWIGYWstRPMFLPRhhhlpAGcDD-LlsspDKKVuhlVLluTLLlVlhGY+sTEoKaPhTIPLQAGppKslpPLPVcsNs......VSlKVpcANY+VPGRALRlTlcVTN+GDpPl+lGEFTTAGlRFlNusVhKc.DssYP-ELLAscGLShDssuPIAPGET+sV-lcApDAtWEVQRLuDLlYDPDSRFGGLLMFaDsoGNRplssIuGPVIPsFs ...............................................................................Gtcs.tsh.Rhpoh.aaD.h.at.............p.htls-.hshoGKhhlhtsWPp.slshPthuFhNhu.PuPshhRhtphlst....p..h..Shsl.hGts..YpaclplKARhPGpaHlHshlNVcsuGPllGPGtalslsGshssFpsslphLsGpTl.shEsashsphhhW.................................................................................................................................................................................................................................................................................................. 0 7 12 15 +3391 PF03473 MOSC MOSC domain Aravind L, Anantharaman V anon Aravind L, Anantharaman V Domain The MOSC (MOCO sulfurase C-terminal) domain is a superfamily of beta-strand-rich domains identified in the molybdenum cofactor sulfurase and several other proteins from both prokaryotes and eukaryotes. These MOSC domains contain an absolutely conserved cysteine and occur either as stand-alone forms such as Swiss:P32157, or fused to other domains such as NifS-like catalytic domain in Molybdenum cofactor sulfurase. The MOSC domain is predicted to be a sulfur-carrier domain that receives sulfur abstracted by the pyridoxal phosphate-dependent NifS-like enzymes, on its conserved cysteine, and delivers it for the formation of diverse sulfur-metal clusters. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.44 0.71 -4.68 30 4691 2009-01-15 18:05:59 2003-04-07 12:59:11 12 41 2815 8 1479 3587 1209 130.30 22 44.61 CHANGED pGhss-.........sshphptus.............cpslhlhspcslstlpppls.pt...............ssstFttNlsl.....sGhs................Esphh....cphplGs..shlcVspspp.Chhhshch.........psuph............................hthhhtshh.GhhhpllpsGplpsGDslpl ..............................................t...............th.hphus...............spsl..h..lhsp...ps...h...t...t.....h..pp..c.h.s...tp....................................................shstF..t.tNlsl........................s.G..h.s.................................................Essh.h.............cth...+.l.....Gc..................sll..pVs....p.s..pp...Ch.th.sh.p.h.............ppsph.............................................................................................................................................h..t.h...h...h.s.......h......G..h.h..h....p.....l..l...p..s...G...h....l.psGDtlp................................................... 0 425 840 1217 +3392 PF02722 MOSP_C Major Outer Sheath Protein C-terminal region Bashton m, Bateman A anon Pfam-B_653 (release 5.5) Family This is a family of spirochete major outer sheath protein C-terminal regions. These proteins are present on the bacterial cell surface. In T. denticola the major outer sheath protein (Msp) binds immobilised laminin and fibronectin supporting the hypothesis that Msp mediates the extracellular matrix binding activity of T. denticola [1]. 25.00 25.00 25.00 26.90 24.90 24.20 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.45 0.71 -4.76 8 373 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 20 0 10 327 0 169.80 53 36.12 CHANGED DALL........ohQW+WlSsGsYhATAsuNVFGpplhspshosahDsAuFl+lETK.uGDPhT+LlsGLsuGV-sRlYIPhTatlYhsssu....t................th.sspIsLPVsGKsWsSY+IsLu-auWlKPasslYGsTNRhshss.u...........pphppthshtYcVGlohSPlEKVElcspWEQGpLucsPYhs........Ipcslos...cpa.GTFVCGlKlsW ...................DALLThtYRWhSuGuYFAotuoNVFtsshL.s.p..s.h.s.p.h.DhAAalKLETK..uuDP.TphLpGLDhGV-sRsYhPlpathhh......................................................ssssIphPVhGKsWsSYRhshG-YGWVKsYAslYGuTN+tsss.s.sus..................pphptEYCuhYcsGlshSPhEKh.h..h............................................................................. 0 10 10 10 +3393 PF02707 MOSP_N MOSP; Major Outer Sheath Protein N-terminal region Bashton m, Bateman A anon Pfam-B_653 (release 5.5) Family This is a family of spirochete major outer sheath protein N-terminal regions. These proteins are present on the bacterial cell surface. In T. denticola the major outer sheath protein (Msp) binds immobilised laminin and fibronectin supporting the hypothesis that Msp mediates the extracellular matrix binding activity of T. denticola [1]. 25.00 25.00 36.30 35.50 18.20 20.10 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.22 0.71 -4.74 7 405 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 20 0 10 364 0 167.70 49 36.84 CHANGED laAElplKsLpVshpostsu..........t..as.spppsolEATLHCYGAYhTlGpsPsFhsNFApLWcPalsss.Ycpc..cspYAPGFsGhGGKlGY+ApsluuSGlslDluhLSFuSNGsW-utsos...................HSKYGFGuDhsLsYshtt....pchlplElAuNATLpptYppuspp.ssst..ppsplLWslGuRlTLpPhssF+hshAhDsGs ..............................spl.htsh.hth......................ht.ht.thp.hpATLHhYGAYhTlGpsPshhssFA.LWcPahspt.Yppc..ts.YtPGFtG.GG.KLGY+tpDIuGoGlohDIuF.pFASNssW-upsss.s..s..u.s.................................................................HSKYGhGuDlhhuatRsR....QEhl+VELsGNuTLu..sGYspu..sts..........s.......phstlLWsVGu+lohp.hhGhph.hA......................................... 0 10 10 10 +3394 PF01618 MotA_ExbB MotA/TolQ/ExbB proton channel family Bateman A anon Pfam-B_1099 (release 4.1) Family This family groups together integral membrane proteins that appear to be involved translocation of proteins across a membrane. These proteins are probably proton channels. MotA is an essential component of the flageller motor that uses a proton gradient to generate rotational motion in the flageller [1]. ExbB is part of the TonB-dependent transduction complex. The TonB complex uses the proton gradient across the inner bacterial membrane to transport large molecules across the outer bacterial membrane. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.57 0.71 -4.55 28 9835 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 2969 0 2514 6816 4219 133.80 23 52.20 CHANGED hcthsshtth.thhshpphhthths.....htptthhctlphthppsh.sp.ttpLppshslLuTluusAPalGLlGTVhGIhpsFhslutsup...sshsslusGIupALlATAhGLhVAIPul.lhYNhlsppspthhtchcthtp .......................................................................t........................................tt.h....h....c..t..l.p.t...t.h.c...p.th....pp....tt.p....ph..pp..shsh...L.uslu.u.hu...Ph....lGLhGTVhGl...hpu...hts......l...u...t............tt............ss...h.s.......h....l.......us.......GIupALl....uTshG.lh..sA.l.s.ul..hhashlpp.pspphht.hp....t................................................. 1 796 1613 2101 +3395 PF04006 Mpp10 Mpp10 protein Wood V, Bateman A anon Pfam-B_12513 (release 7.3) Family This family includes proteins related to Mpp10 (M phase phosphoprotein 10). The U3 small nucleolar ribonucleoprotein (snoRNP) is required for three cleavage events that generate the mature 18S rRNA from the pre-rRNA. In Saccharomyces cerevisiae, depletion of Mpp10, a U3 snoRNP-specific protein, halts 18S rRNA production and impairs cleavage at the three U3 snoRNP-dependent sites [3]. 21.40 21.40 23.30 21.40 20.90 20.30 hmmbuild -o /dev/null HMM SEED 600 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -13.28 0.70 -6.15 6 429 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 298 0 297 439 2 435.40 22 88.04 CHANGED plttlpussstFlhspst.usshcslsph.sh.sphp.cstscpsPLsplsh-uhDsEQIWpQLclpsc.lLssl.p.tst..p.hhspccIoshsc.p.p.pp-Dsp-..-.st.ppuDD-p-.c-.Ep......h......s..s.tEsst--.pt..sEss-.pc.-...........-.tt....p...tp.ppKch-spsl-DpFFcL-EhpcaLpptEcc..Ecut.s--cctc-h..D..pchpsD..................Fu.tc.ppsccptslpYcDFFs...............c.hp..pct-.p.pttsscch.cccu-pcp.cpsctD--.-ppps..p..p.......sh.sthc........--.s..tscpsuscphSSaE+cpt+lpp+IcpLEcEsLucKsWpLpGEVsAppRPpNSLLEccL-FD+sA+PsPVITEEsTcoLEDlIKpRIpDpsaDDV.Rps+lsssttch+cphpLscpKSKpSLAElYEpEYh+tsspph........sthspc.sctHpEIpphhssLhhKLDALSsFHFsPKPstsElcIVoNsPAlsMEEVuPlAsSDAthLAPEEIacssKstc...........hocsEhsppDKpRcRRpKKpKpp.................................Kthphstptpphts-tssp.shtps.shhsKht-pssss ............................................................................................................t......................................................................................................la......................................................................................................................................................................................................................t..................................................................................................................................................................................................hpD....tF...hphpphp....t....h...-tt...........t......t.....................t..t.tt...p................c.................................................................................................t...p...t......t..t.........h....h.tDh..ht.......................................................................................................................................t....................t...........t..............t.................................................................................................p....t.......t.......t.....t......t.....t........h...S.s.aE..+pp.c...............h.tpp..I..pplEtt.ltp+.WphpGEsputpRP.NSLL.E....p....s....Lc...F-c.s........s+..................sP.sl..............Tp...E.ho.............p...........lEp........hI+pRIhpp.........taD-.l..+p....................................................t....c.t............h...p.....lsppKup.uLu-lYEp-ah...p.tt....................tpp.p......t+tE..lp..phhppl..cLDuLSshH...ahPKss.......p.h.p.....l..h...s.....s...h.s..s.lshE.-stP.....h...s...h......u....s...s....s......h....lA.PpElht..t.t.t............................hs..ptEhotp..-.+t+.R...pt....tK....p.p..............................................................................t.......t................................................t................................................................ 1 106 169 251 +3396 PF05172 Nup35_RRM MPPN; Nup53/35/40-type RNA recognition motif Guo JH, Coggill P anon Guo JH Domain Members of this family belong to the nucleor pore complex, NPC, the only gateway between the nucleus and the cytoplasm. The NPC consists of several subcomplexes each one of which is made up of multiple copies of several individual Nup, Nic or Sec protein subunits. In yeast, this Nup or nucleoporin subunit is numbered Nup53, Nup40 in Schizo. pombe and in vertebrates as Nup35. This subunit forms part of the inner ring within the membrane and interacts directly with Nup-Ndc1, considered to be an anchor for the NPC in the pore membrane [1]. This region of the Nup is the RNA-recognition region [2]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.46 0.72 -4.26 7 252 2012-10-02 20:46:34 2003-04-07 12:59:11 8 5 191 5 164 260 2 95.70 32 25.66 CHANGED ssspsusp......VhVFGFP.u.ss.lltcFupaGpIlcch..................................................hspsuNWh+lpYps..pAp+ALpcNGhlhssslhlGV..s..tsKplhstp ..................................................s....scpW.....VTVFG..FP..u.sushlLppFupaG..sI..lcch.....................................................hs.ssuNWh+lpYpSchpAp+ALp+.NGpl.hs..sslh.lGVp.s...h.cphht..p................ 0 60 91 135 +3397 PF01188 MR_MLE Mandelate racemase / muconate lactonizing enzyme, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain C-terminal domain is TIM barrel fold, dehydratase-like domain. Manganese is associated with this domain. 22.40 22.40 22.40 22.50 22.30 22.30 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.73 0.72 -3.44 1272 5681 2012-10-02 01:07:48 2003-04-07 12:59:11 16 31 2337 582 1586 7550 3093 69.00 28 18.12 CHANGED tl...pulRcs....hus.th...........t.lhlD.....s...N.........tu..............h..............p.pAlph...hctL.......p.p..........h.....h.alE.pPlss......tsh.......pshtp.l...p....p.ps....lP...l.sss .............................h.ltsl+ct.....hP..-h........p..ltlD.......sN................tu................W.sh..........ppAhph...sctL..........s.s...................l....talE.-Psss........tph.........cshtp.h....p...........c.ts..........slP..lAs............................. 0 468 964 1332 +3398 PF02746 MR_MLE_N Mandelate racemase / muconate lactonizing enzyme, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain SCOP reports fold similarity with enolase N-terminal domain. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.34 0.71 -4.10 9 7573 2012-10-02 11:54:41 2003-04-07 12:59:11 11 23 2340 767 2008 5956 2681 111.20 25 29.06 CHANGED tlpshlsssshs...Ph.huhtohtpt.hVllclp.s-GlsGlGEusshG.......ps.sltshlcsaLuPhLlGpDssplpshhphhh+ushs.....shoAtAAlDhALaDlpu+shshPlscLlG .........................................................................................hhhh..........................................................t....h.....l......llc..l.p...s...-...s....G.......l.....s.....G...h...GE....s....s...s....hs..............................ts.h...t...s...t...l...p...c...........l..t....s.h......l.....l......G......p.......-....s....t...p....l...p.....p..........l.....h.....p.....t...h....h..p...t..h.hh.......................hshsA..h....uAl...D.......hALWDlh........uK.thsh.Plap.LLG....................... 0 558 1207 1629 +3399 PF04152 Mre11_DNA_bind Mer11_DNA_bind; Mre11 DNA-binding presumed domain Wood V, Finn RD anon Pfam-B_3909 (release 7.3); Domain The Mre11 complex is a multi-subunit nuclease that is composed of Mre11, Rad50 and Nbs1/Xrs2, and is involved in checkpoint signalling and DNA replication [1]. Mre11 has an intrinsic DNA-binding activity that is stimulated by Rad50 on its own or in combination with Nbs1 [2]. 25.00 25.00 27.40 25.20 23.20 24.80 hmmbuild --amino -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.04 0.71 -4.41 56 403 2009-09-10 20:52:09 2003-04-07 12:59:11 9 9 297 12 239 389 1 164.80 32 24.61 CHANGED aphpsIPL+TVRPFlhc-llLpcps.........pssscpclp.paLhp............p.V-phIccAppph.................t....tttcss..LPLlRLRV-Yous.................aps.NPpRFup+.FVG+VANss......DllpFa...+.+Kptpp................pptp.tptthht.pphsplclcsLVp-aL....ssppLslLscsshucAVppF ....................aphp.IPLcTVRPFhhc-ll.Lsccs................................psps.pppl..p..p.alhp............t.V-.phI-cAppph...........................................s.p.pps...lPLlRLRV-Yous.................apshNspRFup+..Fls.+VANsp......DllpFh..+++cpppt..............t............tttt.tpt.h....tt.pp.h.sshcVEsL...Vpcah....ps.pLplLspp.shscAlppF.................................................. 0 81 132 200 +3400 PF04085 MreC rod shape-determining protein MreC TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family MreC (murein formation C) is involved in the rod shape determination in E. coli, and more generally in cell shape determination of bacteria whether or not they are rod-shaped. 28.00 28.00 31.50 30.70 27.50 27.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.64 0.71 -4.79 20 3591 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 3553 5 739 2380 2076 151.70 29 50.93 CHANGED pVlsh.ssssasppllIs..pGppcGl.....pps.sVls....spG.LlGpV..spVsptoS+VhLLoDssp...plslplp..+ss.....................................................hp.GIlsGpsspp........tLplphls..sss-IchGDhlVTSGLG......GhaPsGl.VGpVsplctcsttht..t.htlcPssclpcLcaVhllh.s ..............................................................Vlsp.sss..s.appplsIc..+G..sps..G......l........tps.sVls..........spG.....lVGpV..s.pVsphoS..pVh.Ll.s..c.ssp......tlslplt...+ss........................................................................hp..ullp....G..p...s..s..ps...................Lplppls..........ssscl..p..h..G.......Dh..lV.T.S.G.L....G.........uh..aPpGlsVGpVspV.pp...c.s.tthh.....pplp.lcPsA.shpcl.chlhllh..s.......................... 0 258 496 635 +3401 PF04093 MreD rod shape-determining protein MreD Finn RD anon manual; Family MreD (murein formation D) is involved in the rod shape determination in E. coli, and more generally in cell shape determination of bacteria whether or not they are rod-shaped. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.02 0.71 -4.38 14 2682 2012-10-03 02:46:00 2003-04-07 12:59:11 7 2 2669 0 496 1464 599 156.00 23 93.18 CHANGED pRhhhtallhls.hllullhth.hsh.ss....hhhhtPpaLhLhLlahslthsppsulhhuhlhGllaDlhhsullGlpshhhsllsallshhhthl+h......hshhhtshl.....l.h...hshllthllths.h.hlh.shap..Pph..lhshlluhlLh.hlhhLhptl ..................................h....h.hll.hl....hl...l.s..hl.lp......h...h......s..h..hs......hhhht.P...pa...l......ll.h...l.l...a.h......s.l.......t.......h...s...c...p...s..s....l......h.hua..lhGllhD.l.h..........h..u..u..........h..........l.G.lpslshsllsall.s.h.h...h....p.h.h..hp...........ls...l..h..h...ts..ll........lhl.......hs.hh...l...t...h..l.....l.h..h..s..p...h...........h.h...h.....l.......s.......h...p.........P.p.h....lhs.h..ll.t.slL.ashlhhhh...h...................................................................... 0 156 313 416 +3402 PF03919 mRNA_cap_C mRNA capping enzyme, C-terminal domain Finn RD, Bateman A anon Sarah Teichmann Domain \N 26.50 26.50 27.70 27.30 26.40 26.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.68 0.72 -3.71 11 377 2012-10-03 20:18:02 2003-04-07 12:59:11 10 15 304 18 250 369 89 107.30 30 20.63 CHANGED phNoVDFhLchsct....t.shL....pphGhlpah...........tt.s.....................+hssp.pphss+IlECpa..-p..psp...WhahRhRsDKopPNshsTscsVlpoIpssVTcEhLlc ..................................................................NolDFplclph....................ps..l.........p.s....hh......ah.......................................................tthp.......h........................t..phpp...p...hs...s+..IlECph...........-p....psp...................Wp.......hh.......RhRsDKspsNphsTsppVhpSIp-sVoc-tLl....... 0 96 147 211 +3403 PF01331 mRNA_cap_enzyme mRNA capping enzyme, catalytic domain Finn RD, Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family represents the ATP binding catalytic domain of the mRNA capping enzyme. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.36 0.71 -4.61 12 521 2012-10-02 00:43:09 2003-04-07 12:59:11 14 25 354 18 336 652 333 173.70 27 35.15 CHANGED pPlShs+.cthchLpppsYhVs.KsDGhRhhhhlsps........sahlDRc.phahlpthpaPhph..........hhphTLLDGEhllDhhtt.....tphRYLlaDhlshsGps.lsppshs..pRhphlp+clhpPpstth..psthhphc..PFtlphKshh.h.tsp+.h..htphhtplsHcsDGLIFpsscsPYssG.pspslLKWK ...................................................................pPVShs....p....pphp..L...pp..sYhVs..K....sDGhRhhhh..lstp...............tsahlD.R...p...ps..hhh.l.p.....hpaPh...........p......................t.h..hp...sTLlDG....E.h..l.h...Dph.s........................ttp.+.....aL.laDhlhh........s...............ups....l...hp...pshp.............pRlthlpcplhp..Pht....ph.....h...............p.....p................h.....hp.........tt.......................p....s.....F...........p......l.ph.Kshh.hh...thpp.......h..............ht.phh...............pl....H..t.sDGLIFp...s..h..p....t...sYh.G...psp.p.....lLKWK......................................................................................... 0 145 207 288 +3404 PF02940 mRNA_triPase mRNA capping enzyme, beta chain Griffiths-Jones SR anon Structural domain Domain The beta chain of mRNA capping enzyme has triphosphatase activity. The function of the capping enzyme also depends on the guanylyltransferase activity conferred by the alpha chain (see Pfam:PF01331) 20.40 20.40 21.00 21.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.62 0.70 -4.60 24 259 2012-10-01 23:11:28 2003-04-07 12:59:11 10 13 212 14 161 256 64 210.30 23 42.20 CHANGED .cshs+sls-alht................lpspshpplElEhKlGhlhctp....sspRhphsl.opslhspp.........pspFpsslscspapphpcaLpphs..............................................................t.t..hh.hshhcspppDphYpht...................tp.splRloh...Dsp...sschhtt...IcKcRlssl.lasPpssa..........................................DhRlSlslEhshs.s.......stsssps.thpRpKcRhSYh.+ss..h+hDLT+V..........s..tpscspppaElElEl ...............................................................................h...ltphhh..h..............h..t.....lElEh+hGhlhstp.........ptpRh..th..s...h.........s.tslhppt................thtFpsshs.....tttapthpphLpphs.....................................................................................................................h.hshh.cppppDphaphs......................tctsplRlop....Dtp........ssphhtt....ItKp+ls..slpl.a.Pp.s.s..h.........................................................DhRlSl..slEhshsts..................thtpp..s.t..hpRpKcRhSYp.pts..........hplDlTpV......................t...ttstptphpaElElEl........................................... 0 60 97 142 +3406 PF02349 MSG Major surface glycoprotein Bashton M, Bateman A anon Pfam-B_864 (release 5.2) Family This is a novel repeat in Pneumocystis carinii Major surface glycoprotein (MSG) some members of the alignment have up to nine repeats of this family, the repeats containing several conserved cysteines. The MSG of P. carinii is an important protein in host-pathogen interactions [2]. Surface glycoprotein A Swiss:O59920 from Pneumocystis carinii is a main target for the host immune system, this protein is implicated in the attachment of Pneumocystis carinii to the host alveolar epithelial cells, alveolar macrophages, host surfactant and possibly accounts in part for the hypoxia seen in Pneumocystis carinii pneumonia (PCP) [1]. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -11.10 0.72 -4.01 205 505 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 3 0 0 514 0 90.30 24 46.08 CHANGED pphht-ppChp.htptC....Lpps.........Cp.php......................Cpp....l+cpC...p.phppctp..................................................hpphhhpph+s.....plpsc....pcCpctL.pc.C.thpp..sp ........................................p....h-ccCtc.htpcChh.Lpps..st..phptp..............CppL+ppCtphthpph.....................................................................hpchLhcthcs......shpsc......pcCpctL.pchCspLpc.tp.t.................. 0 0 0 0 +3407 PF04066 MrpF_PhaF Multiple resistance and pH regulation protein F (MrpF / PhaF) Kerrison ND, Finn RD anon COG2212 Family Members of the PhaF / MrpF family are predicted to be an integral membrane proteins with three transmembrane regions, involved in regulation of pH. PhaF is part of a potassium efflux system involved in pH regulation.\ It is also involved in symbiosis in Rhizobium meliloti [1]. MrpF is part of a Na+/H+ antiporter complex, also involved in pH homeostasis. MrpF is thought to be an efflux system for Na+ and cholate [2]. The Mrp system in Bacilli may also have primary energisation capacities [3]. 21.30 21.30 21.30 21.30 21.00 20.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.36 0.72 -3.69 16 1647 2009-09-11 16:03:59 2003-04-07 12:59:11 8 2 1250 0 488 1048 205 54.60 31 57.36 CHANGED hsDRVlALDslsstllullsllulhhpphhhlDlAlllulLuFluTlAhA+altt ........................sDRllALDslsh.shullslhulh....h.s.s.s.hal-shlllAlluFluTluhu+Fl......... 2 147 300 407 +3408 PF04471 Mrr_cat Restriction endonuclease Waterfield DI, Finn RD anon COG1715 Family Prokaryotic family found in type II restriction enzymes containing the hallmark (D/E)-(D/E)XK active site. Presence of catalytic residues implicates this region in the enzymatic cleavage of DNA [1,2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.69 0.71 -4.28 227 2560 2012-10-11 20:44:43 2003-04-07 12:59:11 7 96 1770 1 790 2681 416 113.70 20 32.61 CHANGED l.p......p...hs.stpFEclltp....lhpp.h..Gap.......s..p......h.st.t.us.Ds...G..lDl...l......h...p.p.s....u......p.............p..h..hlQsK+a.....p........s..p...luh.pt.lpp.h.hu.s.h.p..t......p.p....us......p......GlhlT.o.us...Fo...psA....p...ph.A....p..p.....p....p........lpLl..DupcLhchl ...............................................................h...hss.tFEphltp....lhp.....p...h...Gap...................s.p.............h.st...t..ss...D.t.....G....l...Dh..l................h...pp...s.t....t.......p..................................ph...hlQ.sKca........p..................s..p..........lu.t....tt....l.p..p...h.hu...s.h.t..t................h.t.......ss................p..........................ulhl..T..s.us.....Fo....p...sA....p....ph.A....pp.......p....t..............lhLl..stppLhph............................................................ 1 266 525 668 +3409 PF00924 MS_channel UPF0003; Mechanosensitive ion channel Bateman A, Martinac B anon Pfam-B_1136 (release 3.0) Family Two members of this protein family: Swiss:Q57634 and Swiss:Q58543 of M. jannaschii have been functionally characterised. Both proteins form mechanosensitive (MS) ion channels upon reconstitution into liposomes and functional examination by the patch-clamp technique. Therefore this family are likely to also be MS channel proteins. 20.70 20.70 20.70 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -10.89 0.70 -5.00 37 13351 2009-09-13 17:39:23 2003-04-07 12:59:11 13 28 4579 14 3447 9838 4024 200.60 21 41.76 CHANGED tplhphllhslshlhsLshlGhs...hsullushGhhGluluhuhQshlsslluGlhllh-+.shclGDhlpls...............shpGtVpclslpsTpl+sh-sphlhlPNsplhsppltNaop...pstpRlphslslsasos...hcplhchltchhtppstlhp................................................tshlhhsphussulshplpsasps.......tchhslppplthc.........lppthccpsIph ......................................................t..lhph.h.l.h.h.l.s...l...l..h..h..l.....s..h..l...G.ls........hs..s........l...l........s....s....h....G........s....h.u..l....u....l....G....h....u.h...Q..shlss.hl........uG....l....h.l.l.h.p.c..shclGDh.lp.ls..............................................sh.p..G..s....V..p..cI.....s.l.p.........s.........T.p.........l............p.........s...........h...........D.........s.....p.....h.....l..h.lPNstlh.s.t..s.l..t..Naot....................shtc....h..t..h.s..l...s.l.s......h.....s..s.s...............h.p........p..h.h...p.h........l.....h.....p........h....h...t...p...p....s..t..l.hp...............................................................................p....t..........l..h....h...t...t...h...s...t..s...s...h...s...h...t...lt..h..a..sps..................t.p..h...h.t.....h..ttplhtp.........lhphh.pptslp.................................................................................................................................... 1 1007 2127 2858 +3410 PF00985 MSA_2 Merozoite Surface Antigen 2 (MSA-2) family Finn RD, Bateman A anon Pfam-B_1052 (release 3.0) Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.00 0.71 -4.12 5 652 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 19 0 1 531 0 103.60 55 57.25 CHANGED TTTTTTTNDAEASTSTSSEN.NHpNAETNPKGcGEVQpPNQANKETQNNSNVQQDSQTKSNVPcTQDADTKSPTAQPEQAENSAPTAEQTESPELQSAPENKGTGQHGHMHGSRNNHPQNTSDSQKECTDGNKENCGAATSLLNNSSNIASINKFVVLISATLVLSFAIFI .........................osoToTTNsuEupToTso..............sA-Ts......spscu.ps.p..Ps.....sspEopssuN..s.pcopsKup..s..pQst.scSspttPc...............APpt.QTtpsE..usP.-N.................................................................................... 0 1 1 1 +3411 PF01741 MscL Large-conductance mechanosensitive channel, MscL Bateman A anon [1] Domain \N 22.40 22.40 22.60 22.60 22.30 22.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.71 0.71 -3.99 8 3477 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 3349 6 744 2091 328 128.40 42 92.61 CHANGED hlKEFKEFAhRGNVVDLAVGVlIGuAFGKIVoSLVsDIIMPllGhLlGulDFu.sluh.h....G...........hsusslsYGlFIQslINFlIIAFAI.FlsIKsIN+L....++KcEs....ps.A.s-....sppsLLTEIRDLLKpp ...........................hlKEFK-Fh.hRGNVlDLAVGV..IIGuAFupIVoSLV...s...DIIMP.l.l.Gh.l..l....G..u..l....D.Fs...sht.h.h.........st...............................h.s...s..s.slpY...GsFIps.l.....lsF....lIl.A....Fsl...F.h..h...l.....K....h....l....N..+...l.................p..+..+...c.c..tt......t...t.s.....s....ss...........pp..LLsEIRDLLcp.............................................. 0 218 475 634 +3412 PF01716 MSP Manganese-stabilising protein / photosystem II polypeptide Bashton M, Bateman A anon Pfam-B_1814 (release 4.1) Family This family consists of the 33 KDa photosystem II polypeptide from the oxygen evolving complex (OEC) of plants and cyanobacteria. The protein is also known as the manganese-stabilising protein as it is associated with the manganese complex of the OEC and may provide the ligands for the complex [1]. 25.00 25.00 37.30 36.80 20.40 19.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.58 0.70 -5.31 8 265 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 197 17 67 285 155 210.30 51 79.74 CHANGED -hQsLTY--I+uTGLANpCPsLs-ssRGolsl-uGppYtlscLCLEPosFhVKEEPsNKRQcAEFVssKLlTRhTooLDQIpGclpVsuDGSlTFpEKDGIDFQAlTVQLPGGERVPFLFTIKNLVApTpPshoSlsoShDFcG-FpVPSYRGAsFLDPKGRGluTGYDN..AVALPusuDcE-Ls+pNVKpsshuKGpISLpVuKVDusTGEIAGVFES.QPSDTDLGAKEP+DVKlpGlFYApl- .....................pshTY.plKGTGhANpCPslssu...ts.sh.slcsGp.YphpchClEPToFtVKt....E.s.hs..K.pt...t....s..-..F.pTKLhT.R.hTYTLDp........lpGshpVssDGslpFp.E.cDG..IDaAslTVQLPGGERVPFLFTlKpLsApupsss...................FuGpF...hVPSYRGusFLDPKGRGssTGYDpA...VA.L.P..A.....t..u....D.....p-....-L.+EN.K..ph..ts..spGplshpls+scspTGElhGlFpS.QPSDTDhGuK..Ph-VKlpGlaYupl............... 0 21 48 60 +3413 PF03429 MSP1b Major surface protein 1B Finn RD anon Pfam-B_4414 (release 6.6) Family The major surface protein (MSP1) of the cattle pathogen Anaplasma is a heterodimer comprised of MSP1a and MSP1b. This family is the MSP1b chain. There MSP1 proteins are putative adhesins for bovine erythrocytes. 25.00 25.00 231.30 62.20 20.90 22.80 hmmbuild -o /dev/null HMM SEED 726 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.50 0.70 -13.29 0.70 -6.64 5 35 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 4 0 10 61 0 481.30 64 97.78 CHANGED MTEDDKQQQQNQSNVVQAISAVFQRKSAELQRLNDFIKGADGTLKNVHPHMKSLEALSKQLSEKIAAEAAAKADAKYESVGLRAKAAAALGNLGRLVARGKLKSSDAPKDLDQSIDALPFMDEAPDTGERVVVPAGEEQEFGKAAAWGLAGFKRTVDESLEMLGRGMNMLAEGQAQISQGIADKSTALVREGLETSRLGAGLCRNGLVEASYGVGYANETMGKYAGKGLEKCKNKLGDACYKWSKALEEIENLRTAIDAKAEQQVEGEAWSPEGVSANTFYRGLHKIGAAIAVAAQATWEGLAMTGKFMGAVAKLAGAVSMCVAAYTAAIVGMAAATPATLLLTAMDNQS.VNNAVVKVSEYLHSNVEQATKDLMASEFAMMTFGGIMTCAKLMKGSFAAINQKFEEINATLTREATDIAQGIKETYKSIGDAF..................KSANDGIAKWT.....AALAGYASVEQLEEAKEADRAQAEQRAEAQAMTERVAGERAATVAAGTETIKTIVS..................DMCNELAQIGGLSQAERDALVQSFTPKPPARTTKEIVSQMCNSVKSAFGSISHITNVIRQAGKDAQKIDPQVEVAEISPETIYAMSEALYALNMQESAsINNALLAAVNDSSKDDQAIVTDLINATIEVCTEQTNTLAGHTAEVQAGLEAAGIKLDDAQGLQEATPEA.KGVEGINPEELEQAAEGLATAVNEASADGKIQSLNQQETQIAQGGQHAAQQQSSGWSR ...............................VhQthpAtht+.NthlcthDsh.K.h+.pMpsL-ALpppLppKhA..tEsssKhsthhtSsuLRAKAsA................................M-ps..PsshE.h.s.hGEE.pEFG+AsAaGLsGhK+TlDEulEMLsRGMpMluEGQAplupGltsKsstlV+tGLEhStLGsuhChpuLs-hoaGlthspc.sMGKhAGKGL-KC+pKLtsAs.KW.pAhpEl-sLRTAI.....-t......tAtppsEGEAWSPcGVpuNsFY+uLppIGttIAsAAQATWEGLAMTGKFMGAVAKLAGAVSMCVAAYTAAIlshh...hPAsl.LshMsspS.lsphVs+sucYLHSNVEQATKDLMASEFAMMTFGGIMTCAKLMKsSFAAlNQKFE.ElNATLs.RcuTDIsQGlKEsYpSIGDAF..................KSsNsGIAKWT.....AAlAGYASVEQLEEAKtADRsQA-QpAEtQAMscpVAttRAATVAAGTtTIKTIVS..................DMCNELAQIsuhSQAthsA...........................................................................tEuthlpsALLtAVNDoSKDDQAIVTsLINAsIEVC..TcQTNTLAGHTAEVQttLEAAGl+h-DAp....ss.pt.KGhEGIN.EELtQAA................................................................................................................................................................................ 0 0 0 10 +3414 PF04421 Mss4 Mss4 protein Bateman A, Wood V anon Wood V Domain \N 21.20 21.20 27.90 21.50 19.50 19.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.38 0.72 -4.15 5 159 2012-10-01 21:11:27 2003-04-07 12:59:11 8 4 136 5 109 153 0 95.40 32 61.01 CHANGED sscsllNpcc.shV-hPLhsp+pcR.......t...........Dssss-.lc-FaLVKDM.....FsFENVGFS+slc.shKaLVCADCE+GPlGaaDhsscpsa.luLERVsHp .........................................................................................................h...httt...............t..................sssss-h....lptaWhVp.DM.....asFENlGFo+s.................Vs.....s.....h..............KaLlCADCEhGPIGapsh.s...s+psa.lAhcRV........................ 0 38 53 90 +3415 PF03940 MSSP Male specific sperm protein Finn RD anon DOMO_DM01786 Family This family of drosophila proteins are typified by the repetitive motif C-G-P. 21.20 21.20 21.50 21.80 19.90 21.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.52 0.72 -10.77 0.72 -4.57 3 8 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 6 0 4 8 1 45.20 74 47.76 CHANGED CGGPC..CGPCGG..CG....PCG.G.CGPC..CGPCGPC...CGPCGPCGPCCGsscsaCGC .ssGPCCGPCGG..CGPCGG.CGPCCGGCGPpCGPCGuCGPCCGsspsaCGC........ 0 3 3 4 +3416 PF05063 MT-A70 MT-A70 Finn RD anon Pfam-B_3025 (release 7.7) Family MT-A70 is the S-adenosylmethionine-binding subunit of human mRNA:m6A methyl-transferase (MTase), an enzyme that sequence-specifically methylates adenines in pre-mRNAs. 21.50 21.50 21.80 21.80 21.40 20.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.38 0.71 -4.71 10 995 2012-10-10 17:06:42 2003-04-07 12:59:11 9 19 544 0 535 970 231 158.10 28 43.04 CHANGED FslIlhDPPWc.+hhtt.t.....plsYsshsp--lpsLslscLtsc.psllFLWsTspthhps...+psLcpWGachlsc.lsWlKsNphscslt..shpssphhp+spEHCLlGlKGsspp.hst..hptth.....spslIlu...hctpS+KPsElatllE+ls.....stc.+LELFuRs..l+PGWholGsp ......................................................aslIhhDPPWp.........t.............................t..Y.ss....h.....s...........p-.....lt.pL..s...l...p.l..s.....sp....sshlalWss..s..tt..h..ts...........hchl..p....t....W..G.....a....ch.h....sp..hhWlKhs.....p.t..t................p....hh..................................th...........st......h...hp..pspEphLhu.h.+.......Gss.p....t.....t..........................t..................pppllhu................http.....S........+..K............Pst.....hhph..l-phh......................ssht..+lELF.u..Rp.........hpsGWh.shGsp............................................. 1 194 302 434 +3418 PF01993 MTD methylene-5,6,7,8-tetrahydromethanopterin dehydrogenase Enright A, Ouzounis C, Bateman A anon Enright A Family This enzyme family is involved in formation of methane from carbon dioxide EC:1.5.99.9. The enzyme requires coenzyme F420 [1]. 25.00 25.00 142.50 142.40 19.50 19.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.46 0.70 -5.36 4 55 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 54 54 41 57 4 271.60 56 99.53 CHANGED VVKlGlIKCGNIGTS.llDhLLDERADRcDI-VRVVGSGAKMsPEplE...csshchlcEh-PDFlIaluPNPAAPGPpKAREhLutushPAlIIGDAPGL+VKDEhEEQGLGYIllKsDsMIGARREFLDPsEMAlFNADVlKVLAuTGAaRlVQEAIDchI-clKtGK..psELPplVIsppKAVEAtcFoNPYAKAKAMAAapIAEKVADlDV+GCFhppDsE+YIPIVASAHEMhRhAAcLADEARElEKuNDsVhRsPHu.-GKhLSK+pLMtKPE .VVKIGIlKhGNIGhSsllDLlLDERADRpDIsVRVlGSGAKMsPEplE...cssschlp-h..cPDFlIhISPNsusPGPppARElLtttslPsllIuD.uPuhK.s.K..D..t...hccpGhGYIIlKsDPMIGARREFLDPsEMAhFNuDllKVLAsTGAlRlVQptlDcsIcslctGK...-lcLP+IVlos-KAVEtupFsNPYAKAKAhAAaphAEKVAslDVKGCFMsK-hEcYIPlVASAHEhhRhAAcLsDEARElEKusDuVhRpPHupcGclLsKscLhpKPE.... 0 9 28 35 +3419 PF02536 mTERF mTERF Bashton M, Bateman A anon Pfam-B_1422 (release 5.4) Family This family contains one sequence of known function Human mitochondrial transcription termination factor (mTERF) the rest of the family consists of hypothetical proteins none of which have any functional information. mTERF is a multizipper protein possessing three putative leucine zippers one of which is bipartite. The protein binds DNA as a monomer [1]. The leucine zippers are not implicated in a dimerisation role as in other leucine zippers [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.04 0.70 -5.86 15 1485 2009-01-15 18:05:59 2003-04-07 12:59:11 9 25 144 8 968 1433 16 187.20 13 64.71 CHANGED LsLh+ShtFosSpISoI......IpsYPplLlscscpoLssKLpaLpS+GASpS-lTclVSsVPcILupcth+olushaDal+cllhs..DpuSphE+hspsLspuspp.sh.pNlulLR-LGlsp+lLhsLLlSchpPVsG+......cph-tplccllEhGhDPsss+hVhuhp..llhphsDKslctpVshh+sLGFslsDVhslhp+tPphLshSpcp...h....................K........t.p.llsoIEphlu..........LGFSR-EhthMV+RaPtslshSsEp.VK+KhEFlVKcMshPl+slVphPpVhuYSLEKRhhPRssll+sLhSKG.......cLsslSpsLssTDppFLp ..........................................................................................................................................................................................h..............................................................................................................................................................................................................................................................................................................................h.............h..h.h...t......h.t..........t.......h........h...........h.................hh..............................................................................................................................h.........tht.hh..p.....................................hG.hs.tppl.h...t...h..lhp.h.P..ll...t.h.............s.cp..lp.php.ah........ht...........h......th.............t.........l....h....t.........P..hh..ht...h.......................................................................................................................................................... 0 188 543 757 +3420 PF02219 MTHFR Methylenetetrahydrofolate reductase Bateman A, Mian N anon Pfam-B_2407 (release 5.2) Domain This family includes the 5,10-methylenetetrahydrofolate reductase EC:1.7.99.5 from bacteria and methylenetetrahydrofolate reductase EC: 1.5.1.20 from eukaryotes. The structure for this domain is known [1] to be a TIM barrel. 19.90 19.90 20.00 19.90 19.70 19.80 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.62 0.70 -5.32 12 4458 2012-10-01 19:29:00 2003-04-07 12:59:11 12 9 3842 40 1248 3279 1590 271.20 29 75.03 CHANGED plpphhp..puchhhSaEFFPPKTspG.pNLhsphcRhptht.PhFlsVTaGAuG.....spt.chohslspthppppsl-shhHLTCsshshttlcptLcphhphGlRNILALRGDsPtstc..atp.ptt..hpYAhDLV+hI+p....caGDhFsIuVAuYP..EsHP......psts...hptDlhaLKcKl-AGADFlITQhFa-s-sal+FhspspstGh....shPIlPGIMPIpsacphp+hsphs.pspIPpplhsplp.sl+sD-psl+plGlchth-hspcLlspG...V.slHFYTLNhEcushtIlcpLG ..............................................................................................................t..........tp.slShEhaPP+......s......t....p...h..t......p..pl...h.p.s.l.c.p..Lpt....hp...P..cFl.oVT.auus.u..........................s.p.c.pp.Thphs...p.tlp...p........c...h....ul........p..shsHLT.Ch.s.t.s.t.....p..clpphlts.h.t.p.hG.l.c.pIlA......LRG..D....s.t...s......t...........................s...t....s.....s..............h.p..a.A..s-.L......l...p.h.l+.......................phu..c..............F.......s.......I.u....l...A...u.....Y..P.........Eh.HP....................................cu.p.s......hps..Dl...t..p..L+c.K....l....-....A......G.....A....s.....h.....hI.TQhF.FD.......s....-....p....ah....cF...h...-........c....s....tt.t.Gl.........s.l.P.I...lsG.....I..h...P.l.s.......s.h..p.p.ht..+..h.s..p.hs..ss.c..lP..ph..hhphh-....t................h.........c....s......D....s......p.........s....hc...t.h...G.h..p..h...u...h-...h...h...p...pL......h....p...p.G............l.......shHhY.T.hNpsphshtlhp.................................................................................. 0 432 804 1058 +3421 PF05068 MtlR Mannitol repressor Bateman A anon COG3722 Family The mannitol operon of Escherichia coli, encoding the mannitol-specific enzyme II of the phosphotransferase system (MtlA) and mannitol phosphate dehydrogenase (MtlD) contains an additional downstream open reading frame which encodes the mannitol repressor (MtlR). 21.50 21.50 21.50 21.90 21.00 21.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.03 0.71 -4.82 4 1088 2009-09-11 14:05:50 2003-04-07 12:59:11 7 2 679 8 70 326 4 169.90 49 95.31 CHANGED Msshs.......................EsclLE+LNtscolRuFhhTuVslLsEAlctLl.plF.RKD-aAVK.sV-PLLssoGPLsDLoVRLKLlaGLGlIs+plapDIpHhhpl+cpLNcDspEYsFsDs.Ils.IppLsslschuhL...shct.-ssD.SlYphphtRhpphl+SsLoLAlTplhppLsh ....................................................hp...EscVLEpLsAscslpSFhhTAsplLspulphLl.plF.+cD-aAVcY.AVcPLLs..tsG.PLuDloVRLKLIYuLGlls+t.YpDhphhhtl+ctLNc-ss-huFsDDtIls.hup..L.p..sl..s..s.s.........s...t.h.c..ADhulauh..tRapphV+oshsLulTpllpclo.h.................................... 0 6 21 46 +3422 PF03083 MtN3_slv Sugar efflux transporter for intercellular exchange Bateman A anon Pfam-B_623 (release 6.4) Family This family includes proteins such as drosophila saliva [1], MtN3 involved in root nodule development [3] and a protein involved in activation and expression of recombination activation genes (RAGs) [2]. Although the molecular function of these proteins is unknown, they are almost certainly transmembrane proteins. This family contains a region of two transmembrane helices that is found in two copies in most members of the family. This family also contains specific sugar efflux transporters that are essential for the maintenance of animal blood glucose levels, plant nectar production, and plant seed and pollen development. In many organisims it meditaes gluose transport; in Arabidopsis it is necessary for pollen viability; and two of the rice homologues are specifically exploited by bacterial pathogens for virulence by means of direct binding of a bacterial effector to the SWEET promoter [4]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.83 0.72 -4.11 25 2182 2012-10-03 12:15:12 2003-04-07 12:59:11 11 19 651 0 1265 2047 467 83.20 24 68.44 CHANGED .hlGhlsssholshFhuPLush.tpll+pKSs-shshh.hlsshlsushWhhYGlhhpDh..hlhhsNslGshltsl.llLalhYs.ccp ...........................h.hlGhlushhulh.hahu..s...........l..........sh..hhp..lhp.....s.+...........os.p.t.hs.h.h.....lsshlsshlW...hhYG...l.....h.....p.....p...Dh....hlhhsN.s..h.......Gh.l.huh..lhhhhahha.....t........................... 0 350 759 1105 +3423 PF03821 Mtp Golgi 4-transmembrane spanning transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.40 22.40 22.60 22.80 21.40 22.00 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.65 0.70 -4.97 3 234 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 71 0 98 196 0 190.60 37 71.58 CHANGED HlVMSVLLFIEHoVEVAHGKuSC+h.pMsYLRhADLlSSFLLIssLFIISlSLLIGVVKNREKYLlPFLSLQIMDaLLCLLTLLGSYIELPAYLKLA.RsRsuuSKlPLMTLQLLDFCLSILTLCSSYMEVPTYLNFKSMNHMNYLPSQEDlPHsQFIsMMlIFSVAFITVLIFKVYMFKCVWsCYKaIKsMNSsEEcssSKMhp..KVVLPSYEEALSLPsKTPEG-PAPPPYSE ..................................................................................................................................................hhs..shshh.hslshLhhhhsthhsYGshp..........p......p.suallP.FFChQLFDFsLosLsAhSsls...YlPslpcalsp..........h.Pa+-clhphss.....sLhl...Illlhahhllhh....K..uYhIsCVWsCY+Yl..ps+..N.ss-.........h..s.....sth.t.........phl...L.Ps..Y-.Ah....p..ttp.ssPP.hs....................................................... 1 15 21 45 +3424 PF04208 MtrA Tetrahydromethanopterin S-methyltransferase, subunit A TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 25.00 25.00 27.10 25.70 24.80 23.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -4.82 6 110 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 65 0 76 111 15 167.80 46 71.55 CHANGED K+cPAEGWPlVpGDYlVGDPESsVuVVTLGSHlp.csslcAG....AAIuGPCKTENLGIEKVlANlISNPNIRFlllCGuEVpGHITGQohcALHpNGVD.-st.IIGApGAIPYlENlscEAVERFp.pQVElVDLIDlEDh-pIsptl+EClpKDPGAh-E-PhllclpE..GtcEEEEss ...........KKtPAtGWPllpG-Yhl..GsPcSsVAVlTLGS...........Hh.....ps.slc....A....G.......................AAIsGsC+TENLGIEKllANlISNPNIRFlllsGsEVp...GHloGQslhALacNGl...sc...-G...+...IlGApGAIPFlENlsp-ulcRFQ.ppV.EllDlI-sEDhutIpstIc-shuKDP.GAhtt.-shllclpt..tttt......st....................... 0 17 47 62 +3425 PF04211 MtrC Tetrahydromethanopterin S-methyltransferase, subunit C TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 25.50 25.50 25.60 89.50 23.40 25.40 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.61 0.70 -5.34 4 55 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 53 0 40 55 5 258.60 39 98.46 CHANGED MSs.uuGGcAtt.....uhP.pplMAlGIlsGLlGhYhush.ss..ltslhuuLuAlsAsVWGADAVRRVAuYGLGTGVPSIGhhuLGhGhlAAlhGlulsus.slPhhAAPIlullluAllGsllGsLsp+lltMKIPIME+shsEISsAGsLulLGhosAlAGSashpuVlshVlAsGhIALlFIlsuMuILHPFNACLGPsEsQcRTLhLAspsGhIshhlA........GLhshsls.....shLlGhlhWhlsFhKFhphoh+DAsuVlaoGhlPKpE ....................huh.usuu.Att.....hhPpsplhslGlluuLlGIYlu..thhss.....lh....shlGGLuAlsAhVhGAsslR+VAuYGLGTGVPSIGMluLGhGlluulhGltlush.......................s....h........s........uP......Il.......ulllAhllGhllGhLus..pslsMKIPlM.puhscLuhAGALulLGhosshu.Guash..s...............sll...............s...............sslssGhIAlhFIhuuhAILHPFNACLGPsEspcRTLtLAltsGhluhllh.......................ulhshshl..........sllluhlhWhlsatpalphohcDAssVhhss.lPct............. 0 9 26 34 +3426 PF04207 MtrD Tetrahydromethanopterin S-methyltransferase, subunit D TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 25.00 25.00 58.50 58.50 21.90 21.60 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.59 0.70 -4.96 4 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 53 0 40 55 5 213.10 54 91.73 CHANGED MDhll...lhhhhITlGGllIssuVHFVPVGGAPAAMATATGVGTGTsQLAAGAGLTGLloAAsMs......spshhlIhhuGAVGuMlMlulTMLVGslIYVYGVGsVPsSAKVclDPITthcQ-hYVTPGTEGHGlPTVsFVSGIIGGuLGGIGGuLlYaALhclhhs.u...huss..........VAuIhAlGhFFlNAVlASYNIGGTIEGFHDPKFK+hP+uVVuSLVASI .................................llslhtIsIGGslIuhuVHFVPVGGAPAAMApuTGlGTGTs.LAAGAGhTGLluAAshst..............................s.shhllhhoGAVGuMlMhulTMllGshIYVaGlGlsPAuuKsphDPITtDpQc.YloPGTpGHGlPTVsFVSGlIGuhLGGlGGuLlYhuLhp.lsh.s.....s...........s...........l..AulhAlGhFFlNAVlASYNIGGTIEGFHDPKF.K.+hPpulluuhlASl.................. 0 9 26 34 +3427 PF04206 MtrE Tetrahydromethanopterin S-methyltransferase, subunit E TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 20.20 20.20 20.30 131.00 19.50 19.60 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.92 0.70 -5.38 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 55 0 38 57 5 270.60 54 91.35 CHANGED LGlLALhGAuATIAGAuEDLESDVGSQSNPNSQVQLAPQMGNlHRaFNKAISGEPVSYuLaCuIAGoVAaVlMpphsLPslhALAlGAuIAAhVHssYAlTAaLGRluS.uupFsQPlYLDMlhSHLGPIAGHGFIsTFCIVulAYLMTllLs...HPFPLPLLAhIWGITIGAIGSSTGDVHYGAEREYQ+YPFGGGlPVAsHGDITRKAEhGlRNuMDsuaFCAKaGGPLTGLAFGLIVFLusWssllhs.....ttGulIs......hhGLlIVhlLIIhN .....lGslALhGAhATlAGsAEDLESDlGSQSNPNSQVQLAPQMGplHRhFNKAISGEPluYGLasuluGulAasLh.......ths....hs..sllAIslGuslAAhVHGsYusoAalGRhsu.ppcFsQPlYhDllpoHlssIhuHuFIAsFshlshuYLhs...ss..Lu......pPF..PLPLlAlIWGITlGAIGSSTGDVHYGAEREYQph.FGuGlPlAspGsIsphAEhGhRNulDsuaFCuKaGGPlTGlsFGLIVFL-hWRollFs.....thG.u.lls..............hGlllVllhhlhN............. 0 8 24 32 +3428 PF04210 MtrG Tetrahydromethanopterin S-methyltransferase, subunit G TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 21.50 21.50 21.60 58.60 21.40 20.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.32 0.72 -4.30 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 54 0 38 48 10 69.20 41 66.95 CHANGED --K..lPpslssss-apclpcRLD-IEcKlEFssuElhQ+hGK+lGRDIGILYGlVIGLlL.hIhsllshhF ........s.....lPtslsssp-apcl.c+LDcIEcKVEFssuElhQRhG+KlGRDlGILYGlllGll.lhll.h..h....h......... 0 8 24 32 +3429 PF02007 MtrH Tetrahydromethanopterin S-methyltransferase MtrH subunit Enright A, Ouzounis C, Bateman A anon Enright A Family The enzyme tetrahydromethanopterin S-methyltransferase EC:2.1.1.86 is composed of eight subunits [1]. The enzyme is a membrane- associated enzyme complex which catalyses an energy-conserving, sodium-ion-translocating step in methanogenesis from hydrogen and carbon dioxide [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.68 0.70 -5.42 6 101 2012-10-03 05:58:16 2003-04-07 12:59:11 13 2 81 0 68 133 38 294.50 41 94.20 CHANGED KEQcVh-IuGVKIGGQPGEhPTVLAGTIFYu+HKIVcDEc+GIFD+-AAEsLIppQE-huDtTGNPhllplhGpTPEAIl+YIDFVA-loD.uPFLIDSopG-lRhAAAcaAoElGLADRsIYNSINsSl--uEI-ALspSDlsAuIVLuFNsMDsoVcG+lplLEsGu.ust-KGhLplA-cCGI.Kh.LIDsAlTPlGsGAGsAl+sohslKuKaGhPlGSGhHNsPSAWDWLRca+KcptEttt.........ssDlGoNllQthAuuDFlLYGPIENA.hlFPAsAMlDhhIuEAs+.-lGlEss .............................................+cQpVh-luGsKlGGQPGEaPTlLsGoIFYspHcIV...pDtc+GhFD+ptAEsLlscptEhSDhTGNPthlpl..hu..pos.....E....Ahp+Yl-Fls...-ls-.sPFLlDSosu-sRhuusc.h.s.sElGls-RsIYNSINhuhs..c..p..E.l...c...ALp..c..Sclc.uuIlLuF...ss..h..D..s.o.lpG+hplLpsGu.ts.ccGhLplAc.csGIpp...LlDsAlhPlGsGuGh.uh...RushslKu+aG.hP...sGuGhHNssSuW.cWL+ch+Kp.ht.phht................................ssDlGuNllth.htGuDFlLYGPI-NAthsFPAsAhsDhhluEusc.-hGht..h.............................................................. 0 20 49 58 +3430 PF05175 MTS Methyltransferase small domain Yeats C anon Yeats C Domain This domain is found in ribosomal RNA small subunit methyltransferase C (eg Swiss:P44453) as well as other methyltransferases (eg Swiss:Q53742). 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.93 0.71 -4.88 34 8728 2012-10-10 17:06:42 2003-04-07 12:59:11 9 24 4157 22 1810 48732 14113 160.50 23 53.65 CHANGED phphpohsGVFStsclDhGoplLlpslst...hsup....llDlGCGhGlluhhhuctsPchp.lshsDhsttAlpuu+tshpsNtlc....sclhhuDlhssht..tcaDhIloNPP.............FHsGt.sht.tlspchltsAtptLpsuGcLhlVuNcpLsYpsh...lcchFG..pschlscsstaplhp ............................................................................t................................................................h.....h...t.........h.................ttp....................lL.DlG...s...G..s.G.........s....l.....u..l....s....l...........A...........p..........t..........t........P............p.............h.............p...........l........s........h.......s...........D......l........s........t......p....A.........l..........p.......h......A.........c.......p....N.....h.............p........p................p.............t.........l.........p..........................l..............p...............h..............h.........p............u.............c.............h..............h...............s..............s.................l..............t....................t............p.............a...........D..........h.......I......l..o......N....P......P................................................h....t......s........up.....p...............s..............t...........h......h........p.......p........l........l....t.......p.......A....h.........p...h.L.p..s.s......G.......L.....h.......l...........h.................................................................................................................................................................................... 0 525 1050 1457 +3431 PF02416 MttA_Hcf106 mttA/Hcf106 family Bateman A anon Pfam-B_1826 (release 5.4) Family Members of this protein family are involved in a sec independent translocation mechanism. This pathway has been called the DeltapH pathway in chloroplasts [2]. Members of this family in E.coli are involved in export of redox proteins with a "twin arginine" leader motif [1]. 20.50 18.00 20.50 18.30 20.40 17.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.51 0.72 -4.85 12 5974 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 3284 1 1554 3673 2404 70.10 27 71.89 CHANGED luhschllIhlVslllFGsKKLPEluRuLG+oLRtFKpAs+phpst..............................pcpptpc .......luhhcll.l..l..h.l.l.s.l..L..l.F..G.s....c.K.LP....p....lups....l.G......p.......u...l.......+.......tFK......c....uh.p.-..ptt.....................................pttttttt..t............................................................................................................ 0 510 1045 1336 +3432 PF02316 HTH_Tnp_Mu_1 Mu_DNA_bind; Mu DNA-binding domain Mian N, Bateman A anon Pfam-B_12856 (release 5.2) Family This family consists of MuA-transposase and repressor protein CI. These proteins contain homologous DNA-binding domains at their N-termini which compete for the same DNA site within the Mu bacteriophage genome. 21.60 21.60 21.60 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.77 0.71 -4.48 4 448 2012-10-04 14:01:12 2003-04-07 12:59:11 11 15 279 6 72 410 4 108.40 21 26.71 CHANGED pchsGhPGlPphssGVphhAptpGWpKpp+pGsKG.hAhEYclsSLPpEsRttllt.........uAuhsEhsspo.hhcshsppllhshppLssDppptslchl....l....Phl.stDchhsthGhoppp.tTLhhh.Alssp ......................h.GlsGlPpospGlp....h....t....A+.+.....-.uWt....p........R.....+..R..........pGh.....pG...uhEYplsSLP.psptt.l...h.................................................................................................................................................................................................................................................... 0 16 39 58 +3433 PF02914 DDE_2 Mu_transposase; Bacteriophage Mu transposase Griffiths-Jones SR anon Structural domain Domain \N 20.30 20.30 20.60 20.50 20.00 19.30 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.41 0.70 -4.83 2 134 2012-10-03 01:22:09 2003-04-07 12:59:11 10 8 111 3 20 153 2 197.90 45 33.31 CHANGED VthLpAMpWINGDGY.HNVaVRa.sG-lhRPKTWhWQDV+TRKlLuhRsDhSENhDoIRLShhDVloRYGlPc..HlTIDNTRuAANKhhTGGs.NRYRapVpEs-spGlh.hhGhchHWTSl.hGKGhGQAKPlERAFuhGGLt-YVDKH.hLtGAYsGsNs.tKPD....Nhu-psVDhthFLhsLtpGlt.aNshssR.TEhCuGK.Sas-sFER-aA ..............................pLcAhphlNGDGapasVFV..+..W....s.G..c....l..l..RPKsah...WQDltoRKILuaRsDhSE.Nt-.olR.LuhhDl.......l..pp......Y..G.IPc..+lpIDNsRu.sAsKhhTG.Gs.............NRaR..........F...K.V+...-.-...-spGlh..hl.G.h...c.hHWTo..l....h...hs.cGhGQAKPlERAF.u..hsu..Lt-hlDK.+.PthsGAYTGsss.uKP-.....Nhu.c..+ulsh-hFlts..ltptlthaNs+ssRpoEhCtG..h.SacpsFptpY.............................................. 0 8 14 18 +3434 PF03888 MucB_RseB MucB_ResB; MucB/RseB family Bateman A, Finn RD anon COG3026 Family Members of this family are regulators of the anti-sigma E protein RseD. 29.00 29.00 29.10 29.30 28.40 28.80 hmmbuild --amino -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.68 0.70 -5.38 8 1077 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 1056 12 197 621 90 281.60 46 88.58 CHANGED Lp+MscAsppLSYEloFVhp+suSh-ohRYRH.scp.DG+shA+LlpLDGstpEVlpRDsclSYhpPulpsFTlc.ss.lssshP.lhpsDhc+LSsaYDFlpVG+uRVAGRhssVlRllPKDsaRYuYllalDcEopLsL+SDLLDR-GpLLEQFRslsLsluptlststctLpssshPt...llpsspsstpsshuWpssWLPpGFshhs.thpp.slo-c..l-SthYSDGLFsFSVaVpsscusslpDpps+pGspTlhSchl....GspElTVVG-lPhuTAcRIApSI+Fs .................................LppMs.ASQsLNYElSF.lp.l.s.c.p.u..lESLRYRH..Ach..D.sc.sl.ApLlphDGPpREVlpRG..s.E.lSYFE..P...G...l-PFT.........ls....Gc..hIsDo....lP...ul..lh...............s..D...............hc.+L...u.....s..aYDF.lsl.G.R.sRlAsRhCpVIRlVs+Du.pR.Y.SYllWhDp-o+LPhRsDLL....DR.DG.E.sLEQ..FRl..lu.hs..V...s.....p.....c...l..u........s....s..h........p....s.L.s........K...s....sL.Ps..................lL...sl..P....s.....u......p..............p....s..c.........h.......s....Wps.sWLPpGFsplupsRR...............s............L........s...s...........h......-s...........h.....lE..SpLYSDGLFSFSV.V...s..t......s..s.......s...s...s...s...-...Q.h..lR.p....GRRT..lhopV+........sss..E..ITl..VGElPPpTAKRIAcsIcF.t...................................................... 0 31 91 149 +3435 PF03108 DBD_Tnp_Mut MuDR; MuDR family transposase Bateman A anon Pfam-B_271 (release 6.5) Family This region is found in plant proteins that are presumed to be the transposases for Mutator transposable elements [1,2]. These transposons contain two ORFs. The molecular function of this region is unknown. 21.80 21.80 21.80 21.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.18 0.72 -4.40 89 954 2012-10-02 23:28:20 2003-04-07 12:59:11 10 78 48 0 424 946 0 64.40 24 8.07 CHANGED ssthtlGphFpst..pph+pulppaulppphphchh+oc.pp+htscCh....scs...CsW+lhAuptpc.sptapl .....................hhhGthFssh..cch+pAl...pp.au.lpp.c.hp.ach.h+os..pp.+hps..cCh......tcs.....................Cs..W+l..pApphpt.pphatl.................... 0 13 155 219 +3436 PF04310 MukB MukB N-terminal Mifsud W anon COG3096 Family This family represents the N-terminal region of MukB, one of a group of bacterial proteins essential for the movement of nucleoids from mid-cell towards the cell quarters (i.e. chromosome partitioning). The structure of the N-terminal domain consists of an antiparallel six-stranded beta sheet surrounded by one helix on one side and by five helices on the other side [1]. It contains an exposed Walker A loop in an unexpected helix-loop-helix motif (in other proteins, Walker A motifs generally adopt a P loop conformation as part of a strand-loop-helix motif embedded in a conserved topology of alternating helices and (parallel) beta strands)[1]. 23.00 23.00 23.10 23.10 22.50 22.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.58 0.70 -5.02 2 745 2012-10-05 12:31:08 2003-04-07 12:59:11 7 2 733 6 73 491 11 224.10 84 15.54 CHANGED MIERGKapSLThINWNGFFARTFDlDpLVTTLSGGNGAGKSTTMAAFlTALIPD.oLLHFRNTTEAGuopuSRDKGLaGKLpAGsCYuhLDslNSRpQRllhuV+LQQVAGRD+KVDIKPFhIQGLP.pVpPTpllsETls-+pARVh.lNElKDtltthEGspFKpFsSIsDYHu.MF-hGlIs++LRsuSDRSKFYRLIEASLYGGISSAITRSLRDYLLPpNuG .......MIERGKFRSLTLINWNGFFA.RTFDL..DE...........L..............VT........TLSGGNGAGKSTTMAAFVTALIPDLTLLHFRNTTEAGATS.GSRDKGLHGKL.....KA.G.VCYShLDTINSRHQRVVVGVRLQQVAGRDRKVDIKPFAIQGLPhSVQPTQLlTE.TLNERQARVLsL.NELKDK.LEsMEGVQFKQFNSITDYHSLMFDLGIIARRLRSA.SDRSKFYRLIEASLYGGISSAITRSLR.DYLLPENSG........................ 0 7 23 49 +3437 PF04288 MukE MukE-like family Mifsud W anon COG3095 Family Bacterial protein involved in chromosome partitioning, MukE 25.00 25.00 26.80 26.60 21.50 17.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.52 0.70 -5.56 3 756 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 742 12 72 290 5 226.30 80 96.59 CHANGED MPsKLApAIANPLFPALDSLLRSGRHISoD-LDNHAFLMDFQs-L-tFYpRYNVELIRAPEGFFYLRPRSTTLIsRSVLSELDMLVGKVLCYLYLSPERLApEGIFTsQELYDELLTLADEuKLLKLVNNRSSGSDLDRQKLtEKVRoSLsRLRRLGMVhTlG-psSGKFRITESVFRFGADVRuGDDPREAQtRLIRDGEAs..TP-shslEsQtQLhENDTsEtDE.DoEa.GE-E .........MPVKLAQALANPLFPALDStLRSGRHIGLDELDNHAFLMDFQEaLEEFYARYNVELIRAPEGFFYLRPRSTTLIPRSVLSELDMMVGKILCYLYLSPE.RLANEGIFTQQE.LYDELLTLADEuKLLKLVNNRST.G..S..D..lDRQKLQEKVRo....SLNRLRRLGMVWFMG.pDSSKFRITESVFRFGADVR...uGDDPREAQtR...LIRDGEA..........MslEN.c..LQL.......N..DEoE..E.s.Q...sDS...GEEE.............................................. 0 7 22 48 +3438 PF00893 Multi_Drug_Res DUF7; SMR; Small Multidrug Resistance protein Bateman A anon Pfam-B_1082 (release 3.0) Family This family is the Small Multidrug Resistance (SMR) family. Several members have been shown to export a range of toxins, including ethidium bromide ([1] and quaternary ammonium compounds [2], through coupling with proton influx [3]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.02 0.72 -3.52 17 6822 2012-10-02 19:55:49 2003-04-07 12:59:11 14 6 2594 2 1186 4014 2383 92.70 32 83.38 CHANGED hsal...hLhlAhlhEllusshLK........ocGFo+hhPolhsll...uauluFhhLohuhcp.lPlulAYAlWoGlGhlhsslsullhFtEplshhpllulsLl ................................................................tWl...hLhlAhl.hElh...ushs...L...Kh...........................o.p..G..F..s..+....h...h.....s...s..l...l..s...ls...........sh..sl.S.F...h...h...L..u.h..Ah.+p....l..P..l....G...lAY...AlWs.Gl.Gh...l.ss.slh...uh...l.las-slshhpllulsLl......................... 0 303 684 942 +3439 PF01225 Mur_ligase FPGS; Mur ligase family, catalytic domain Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family contains a number of related ligase enzymes which have EC numbers 6.3.2.*. This family includes: MurC (Swiss:P17952), MurD (Swiss:P14900), MurE (Swiss:P22188), MurF (Swiss:P11880), Mpl (Swiss:P37773) and FolC (Swiss:P08192). MurC, MurD, Mure and MurF catalyse consecutive steps in the synthesis of peptidoglycan. Peptidoglycan consists of a sheet of two sugar derivatives, with one of these N-acetylmuramic acid attaching to a small pentapeptide. The pentapeptide is is made of L-alanine, D-glutamic acid, Meso-diaminopimelic acid and D-alanyl alanine. The peptide moiety is synthesised by successively adding these amino acids to UDP-N-acetylmuramic acid. MurC transfers the L-alanine, MurD transfers the D-glutamate, MurE transfers the diaminopimelic acid, and MurF transfers the D-alanyl alanine. This family also includes Folylpolyglutamate synthase that transfers glutamate to folylpolyglutamate. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.62 0.72 -3.91 236 13732 2012-10-10 17:06:42 2003-04-07 12:59:11 20 28 4352 28 3015 10183 5325 86.30 23 18.21 CHANGED plp.hlulsupshs....shu.hhlshtGhpl...............sGpDhhpp......hhptG.Atshlsc....pst.............................t.th..tspt.tl.l..sttphLuplspthh .....................lphlslcup..t.hs........shs..la.l.s..h.h.G..t.ps...............sGpD...hhsp........................hhp.pG....ss....shl.sp......cst...........................................................................th...t....s..p..h..s..t.l..V....hs.t.tphLuplst.h.h............................................................................. 1 1033 2013 2574 +3440 PF02875 Mur_ligase_C FPGS; Mur ligase family, glutamate ligase domain Bateman A, Finn RD, Griffiths-Jones SR anon Bateman A Domain This family contains a number of related ligase enzymes which have EC numbers 6.3.2.*. This family includes: MurC (Swiss:P17952), MurD (Swiss:P14900), MurE (Swiss:P22188), MurF (Swiss:P11880), Mpl (Swiss:P37773) and FolC (Swiss:P08192). MurC, MurD, Mure and MurF catalyse consecutive steps in the synthesis of peptidoglycan. Peptidoglycan consists of a sheet of two sugar derivatives, with one of these N-acetylmuramic acid attaching to a small pentapeptide. The pentapeptide is is made of L-alanine, D-glutamic acid, Meso-diaminopimelic acid and D-alanyl alanine. The peptide moiety is synthesised by successively adding these amino acids to UDP-N-acetylmuramic acid. MurC transfers the L-alanine, MurD transfers the D-glutamate, MurE transfers the diaminopimelic acid, and MurF transfers the D-alanyl alanine. This family also includes Folylpolyglutamate synthase that transfers glutamate to folylpolyglutamate. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.13 0.72 -3.95 50 21443 2009-09-14 13:20:23 2003-04-07 12:59:11 16 65 4466 65 4592 15708 6742 85.40 20 18.16 CHANGED lt.tRh-hlsp..........pshhllsD.YAHsPsuhpsslpshtth...............t+llhlhGsht-...Rstshps.hspltsthssh.lllhs....t.tp.ststh ..........................tRhp..h..l.s.p.............................p.s...s..p.l..lsD..aAHsPsuhps.s.l.puh..p..t.h..s.....................................pt+.ll.hlhG.shhc...................+....s...t.s....h....t.t...h.s.p..h.h.s...t.....ss....llh..h..t..................h............................................................. 0 1552 3028 3916 +3441 PF02873 MurB_C UDP-N-acetylenolpyruvoylglucosamine reductase, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_1092 (release 5.2) Domain Members of this family are UDP-N-acetylenolpyruvoylglucosamine reductase enzymes EC:1.1.1.158. This enzyme is involved in the biosynthesis of peptidoglycan. 21.00 21.00 21.30 21.20 20.80 20.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.28 0.72 -4.28 13 4494 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 4271 10 953 3207 1942 113.70 37 35.04 CHANGED cpplhppspphpppR.h.ppPhchPsu..GShF+NP.sp..............................tAGpLIEcsGL.KGapIGGAp...lSchHANFllNpGsAoucDVlsLIchV+ppVt-caGlhLc.EV+hIG ...........................t..plhpthpclppp+....h...p..P..h....p..h..s..os..GShF.K.NPhsp...........................................................hAGhLI-p.u.GL.KG....h....p.....l.......GG....At.................VSp..+..HAhhllN...t..u......s.......A......Tup........D.......lhsLhctVpppVt-+FGlpLcsEV+hlG.................. 0 325 632 810 +3442 PF02976 MutH DNA mismatch repair enzyme MutH Griffiths-Jones SR anon Structural domain Domain \N 21.90 21.90 23.00 22.30 20.70 21.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.34 0.72 -4.15 50 953 2012-10-11 20:44:43 2003-04-07 12:59:11 10 5 911 6 127 531 32 101.00 60 39.83 CHANGED h.LGAsAGS+PcQDF..scLGlELKTIPIstpGc.............PLETTaVslAPLhshsGl.sWcs.SpVpcKLp+VLWlPlpG-RpIPl......u-RpIGsslLWpPss.p..ppphL+pD .................hLGAoAGSKPEQDF.......AtLGVELKTIP.lcuhG+.....................PLETTFVCV.APL...TGNoGV.TWEo.SHVRHK..Lp..R..VLWlPVEGER...sIPL............AcR...RV...GoPLLWSPsc...E....E-cQLRpD................................... 0 19 53 92 +3443 PF01624 MutS_I MutS;MutS_N; MutS domain I Bashton M, Bateman A, Studholme, DJ anon Pfam-B_800 (release 4.1) Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF05188, Pfam:PF05192 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds with globular domain I, which is involved in DNA binding, in Thermus aquaticus MutS as characterised in [4]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.30 0.71 -4.04 60 5408 2009-09-12 04:05:03 2003-04-07 12:59:11 15 62 4333 49 1586 4546 1040 110.20 40 13.01 CHANGED TPhhpQYhclKpp.a.DslLhF+hGDFYEhFh-DAhhuu+hLslsLTt+t........tstsp..hPMsGlPh+uhcpYlp+Llp.p.Ga+VAlsEQhEsstts.................+.....s...llcRcVs+llTPGTlh-ss ...................TPhMpQY.hclK....tp.....a.s..........D..slLhaR..h...G....D...FY.E...........hF..........a.....-......D........A.......h......p........A.uplL-IoLTpRs................tsssp....lPMsGlP.a..Hu.......h.-sYlsc..Llp.p..Gh+V.AI..sEQ.h.p-.Ptps......................................K......G......VcRcVscllTPGTlh-t.s........................................... 0 529 978 1330 +3444 PF05188 MutS_II MutS domain II Studholme, DJ anon Pfam-B_800 (release 4.1) Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF01624, Pfam:PF05192 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. This domain corresponds to domain II in Thermus aquaticus MutS as characterised in [4], and has similarity resembles RNAse-H-like domains (see Pfam:PF00075). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.77 0.71 -4.11 47 4968 2009-09-11 06:26:47 2003-04-07 12:59:11 12 45 3989 50 1597 4177 853 129.10 23 14.77 CHANGED shlsu...lht.........cssp.hGluhlDlooGchhlscht.-....hpcLhscltp...lsPpElllspsh.p....................hphh.thphh.....s....ht.hphpp....s..ppplp..pta......sspslcshs........t..thsls.AhusL.....lp......Ylcpsppp.......tlsplp .................................................NhLsu....lhp................pppt..hGlAhlDl.oo.Gca..h....ls..p..hs.s............hppl.ts-ltp..........lsPpEllh.sc...s.hsp...............................................h..h..p...h...h.....t.......t....h..p.h.h.................t................hp...h..p..h...ps.................s......p..pplp......ppa.....................ts..p..slps.hsh.....................................pt.hthsls...As.usL.....Lp..............Ylcc.TQpp........pls+l............................................................................................... 1 504 952 1325 +3445 PF05192 MutS_III MutS domain III Studholme, DJ anon Prosite Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF00488, Pfam:PF05188, Pfam:PF01624 and Pfam:PF05190. The MutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds with domain III, which is central to the structure of Thermus aquaticus MutS as characterised in [4]. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild --amino -o /dev/null --hand HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.54 0.71 -4.54 306 6533 2009-09-15 09:54:17 2003-04-07 12:59:11 13 68 4115 50 2168 5633 1219 291.10 24 33.92 CHANGED ssThpsLElh.......ps................tstpps..........oLhsll.cc.shTshGpRLL+p..a.ltpPLpc.hspIppRhcsVpthh....p............t............t..ppl......pptL....c.pl..s.Dl-Rll....sR.........................l..............................th..........sps.........s...........s+-lht.....ltpul....pth.p.l..............................................................tph.......htt............................................ltphhp..pls................plh.......phlppslsp..p............sh.h.................................................pcG.......t........................hl....tsGhsscLDph+phtppscphltphppc.ppptshsslchthspshsh.....................................................hhpsppsttpphstpahpppshtss.Rahos-Lpchppclhpsppcthth-pplhppLhpp.h.hsp..hstl.ppsspslApLDsLhuhAc .......................................................................ohpsLElh................psh...................tstpps................oLh..tlL...Dc..otT.s.h.G...............s........RhL......+p.......W..lp.............pPl...h..s...hp.p.I.......pp..R..ps....lp..thh................p...............t...hp........ppl..........................p.p..hL.........................c..pl....h...Dl.ERll..........uR......................................................................l..................................................................sh........................sps.........................s....................................s+DLh.p...........lp.pul..............ptlsp.l..............................................................................................pph.............t.hps......................................................hlpplhp.........pls...s...............htclt.................phl..ppu.lsc....p..................sshhh..................................................................................p-Gs............lI..tsuhstpLDch+phtppspphltphptpccppsulpsl+ltaspshua.........................................................................................................................................................................................................hhphppsttphsstpah++pThtsspRahssELpch-sclhpucpcththEtpl.a.pplhppl.h.....tp......h.....pt.....l...pphup...sl.ApL.DsLhuhA....................................................................................................................................................................................................................................................................................................................................................................................................................... 0 758 1352 1838 +3446 PF05190 MutS_IV MutS family domain IV Studholme, DJ anon Members of PF01624 Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF01624, Pfam:PF05188, Pfam:PF05192 and Pfam:PF00488. The mutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds in part with globular domain IV, which is involved in DNA binding, in Thermus aquaticus MutS as characterised in [4]. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.87 0.72 -3.81 51 5085 2009-01-15 18:05:59 2003-04-07 12:59:11 13 40 4000 48 1596 4267 904 92.30 34 10.61 CHANGED uasscLDch+phhcpspphltchppct+c..chGl...psLK...lshsphhGYalclocsp....tppl....Pt....pahc..ppT..hKsutR...atTscLpphppclhpscpc .....................GastcLDchRphtcsu.ppa..l..t...c...lct+ERc.........coGl................poLK.....l..s.a.N...p.....V...a..G..YaI-..l..ocup..............tppl......Pt.......cahR.....+QT...Lp..NuER.................ahT..sELKchEcclLsAcp.t.................................... 1 515 946 1314 +3447 PF00488 MutS_V mutS;MutS_C; MutS domain V Finn RD, Studholme, DJ anon Prosite Domain This domain is found in proteins of the MutS family (DNA mismatch repair proteins) and is found associated with Pfam:PF01624, Pfam:PF05188, Pfam:PF05192 and Pfam:PF05190. The mutS family of proteins is named after the Salmonella typhimurium MutS protein involved in mismatch repair; other members of the family included the eukaryotic MSH 1,2,3, 4,5 and 6 proteins. These have various roles in DNA repair and recombination. Human MSH has been implicated in non-polyposis colorectal carcinoma (HNPCC) and is a mismatch binding protein [2]. The aligned region corresponds with domain V of Thermus aquaticus MutS as characterised in [4], which contains a Walker A motif, and is structurally similar to the ATPase domain of ABC transporters. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.56 0.70 -4.96 35 9697 2012-10-05 12:31:08 2003-04-07 12:59:11 16 85 4461 50 2841 8224 1959 222.30 35 28.75 CHANGED YsRPphscp.............slpIctGRHPVVEphh...sttsFlsNDspLspsp........phhlITGPNMuGKSTYlRQsALIslhAQlGSFVPAcpAcIullDpIFTRlGAuDcLspGpSTFMVEMhETAsIL+sATccSLVllDElGRGTSTaDGlulAaAlsEaLt..cch+uhTLFATHY+ELTpLspp.....hss.........Vp..NhHhssh..........Eps.pslsFLapl.ppGsus.cSYGlpVAcLAGl.PpsVlpcA+phLppLEpp ................................................................................spPp.h...s...p..pt..............tl.pl.p..puR..HP..l..l....-p..h......................................h.....l....s.N..s....l.p....l...s...p..p.p...................................phh.l...IT...........G.....P.......Nhu...G.K...S.Ta........hR.................p...............luL..................lslh........A.Q...h.....G..s.a..V.P..A....c............p......u.......p..l.....s.l.h.........D.p..I.F...o....R..l............G..s...........s....D.........s......l....t.......p....G.....tSTF..........h.............sE.......Mp..c........hs............p.I...L........p..........p.........A...............o..........p................p.............S............L...l..L..hDE.......lG........+.GT.soh..DG.h....ul.Ah.A.ll-h.lt................p.......p...h.......p....s....h.s.lh.u.T.HY.h..E..L...s..p.......h...s....p...p......h..t...t.................l.t.....N...h.p.h..p...h...........................................p..s....ppl...h.....a.....h....a...cl......h.......G....s.s.s......c.S..a.....ul.plAp.hh..G..l......Pppllp.....cApphht.....t........................................................................... 0 1046 1858 2452 +3448 PF03023 MVIN MviN-like protein Griffiths-Jones SR, Studholme DJ anon Pfam-B_1348 (release 6.4) Family Deletion of the mviN virulence gene in Salmonella enterica serovar. Typhimurium greatly reduces virulence in a mouse model of typhoid-like disease [1]. Open reading frames encoding homologues of MviN have since been identified in a variety of bacteria [2], including pathogens and non-pathogens and plant-symbionts. In the nitrogen-fixing symbiont Rhizobium tropici, mviN is required for motility. The MviM protein is predicted to be membrane-associated. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.45 0.70 -6.07 15 3361 2012-10-02 21:24:20 2003-04-07 12:59:11 9 10 2911 0 898 5047 4017 445.50 29 80.32 CHANGED lAshhGAushuDuFhlAF+lPNhlR+lhu..EGuhssAFlPlasctp.........stccuptFspplhslltsshlllollh.llsu.hlltlhusGh.........st-shpLssthhplhhPalhhluLsulhsulLNstc+FhssuhoPllhNlshIhslllhtsphs...hh..........u...LulGlllGGlhQhLhplsslt+tGht.................hc.hhshp-psl+cllphhhssllusulsQlsLhlsptlASh......lpsGuhuhltYusRlapLPlGlFululuTllLPplS+shtsp-ts......phtphhspul+lshhlhlPsohulhlLutsIlslLap+GsFstp-sttsuplLtsaulGLlsauLhplLtpsFYApc-s+oPhplullshllNhshs.....llhhsshussGlAlAsuluuhhshshLahhlt+phh........hh.ttththhltp..hlsshulhusllhhlpphhp..asssphhhhhht ............................................................................................................................................................................hAthhGA..G..h..s.sDAF.h.lAh+lPNhl.R.c.l..hA..EG..A..F.u.p.A...FV.P..l....hschp..p...........................pspc.t..spt...a....s...sp..l...h...s..h.l.s...h..h.l.h.l....l.T...l.l.u...h.l...huP...hl.....l.h....l...h.u.s.Ga..........................sspp..h..s.Ls....st....hh+.I.h.h.P.a....l.h..hl.u.Ls.u..lh.u...u....l.....L....N....s.h....s...+....F....u.h..s..A...a...u..P....l.....l.....h.N.....l.....s.....h....I.....s....s.....h...l......h.....h...s....s....h....h....s...........s.hh.........................u.........L.A.h...u.l...h...l.G...G...l.....l....Q....h...l...h...p...l.......s....h.L...p.+..h...G.hh.............................................ht.s...c...h...s....h....p......c...s........s......l....p....+........l........h.....p....h.....h....h.....P.....u....l........l...u.....s.......u..l....s......Q.....l..........s.....l........l........l....s.....o.......h........l......A.S.h................................L..s.s.....G....u.....l....S..........h..........h.......h..........Y......A.....s......R..........L..........h........p.....h..........P........h........G..........l........l....G..l....A....lu.T....l..l....L.....P.p.L.S+.t...h.ss.s.s.ps......................p.hpph.hshul+hshlls.lPuulul....h...lLu.........tPlst.sL.....F..p......h.............G..p.....F...s...s...t....D...s.....t..h.s.u...t.u...L.h.s..Y..u.l....G...L...l.u...h.h....l.h.+...l....L.s...uF...Y......A...+.p.D...s.+..T....P.l.t....l....u..l...l...s...l.l.l.sh..l.hs.......................lhh..h......h.........h..........t.......h......s....G.......L....u.....l...u..t....u....l....u....u....h....l.s...s..s...l....L....h.h...h..L..p..+.p..hh....................h.t..s..t......t..h...h.h..h.hhp........l.hlu.s...h.l.h..u....s.s....l....h.h.h.......h.....................hh........................................................................................................................................ 0 310 608 772 +3449 PF02344 Myc-LZ Myc leucine zipper domain Bashton M, Bateman A anon Pfam-B_829 (release 5.2) Family This family consists of the leucine zipper dimerisation domain found in both cellular c-Myc proto-oncogenes and viral v-Myc oncogenes. Dimerisation via the leucine zipper motif with other basic helix-loop-helix-leucine zipper (b/HLH/lz) proteins such as Max Swiss:P25912 is required for efficient DNA binding. The Myc-Max dimer is a transactivating complex activating expression of growth related genes promoting cell proliferation. The dimerisation is facilitated via interdigitating leucine residues every 7th position of the alpha helix. Like charge repulsion of adjacent residues in this region perturbs the formation of homodimers with heterodimers being promoted by opposing charge attractions. 20.50 20.50 21.30 21.20 20.40 20.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.22 0.72 -7.47 0.72 -4.32 2 981 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 838 4 26 560 0 27.60 84 13.20 CHANGED u-Ep+LIuEK-.LR+RREQLKHKLEQLRNSpt .SDEHRLIAEKEQLRRRREQLKpKLpQL....... 0 2 5 9 +3450 PF01056 Myc_N Myc_N_term; Myc amino-terminal region Finn RD, Bateman A anon Pfam-B_387 (release 3.0) Family The myc family belongs to the basic helix-loop-helix leucine zipper class of transcription factors, see Pfam:PF00010. Myc forms a heterodimer with Max, and this complex regulates cell growth through direct activation of genes involved in cell replication [2]. Mutations in the C-terminal 20 residues of this domain cause unique changes in the induction of apoptosis, transformation, and G2 arrest [3]. 22.20 22.20 22.40 22.30 22.00 22.10 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.43 0.70 -5.27 9 1946 2009-12-11 14:44:54 2003-04-07 12:59:11 13 10 1164 2 129 1419 1 143.50 44 69.28 CHANGED MPlsuShssKNYDYDYDSlQPYFYhDp-D.sFYa..pQps.phQPPAPSEDIWKKFELLPTPPLSPSRRsSLu........................TA-QLEMVoEhLG........sDsVNQSFICD.ushupoFlKSIIIQDCMWSGFSAAAKLEKsVSE+LAShpAuRKEsshusss......................uussRhsusYLQDLusuASECIDPSVVFPYPLs-su........pss............sAsP..............psh.shcssPs....uSSSSGsDo..........-pp---EEEp-EEEEIDVVTVEK..ppp+pcsssSto..............ta.SPLVLKRCHVshH.QHNYAA..PST+.cDtPusKRl+LEup..spslpth.s.....pRKCsSP .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...p.pp......t....s..........................s+s.tSPL..V.LKRC..H.Vs.hH..Q.HNYAAP.......P.S.T+.....h-....Y...P..u....uKRh+L-u.............uRVLKQISs.........sRKCsSP.......................................................................................... 1 13 28 56 +3451 PF01669 Myelin_MBP Myelin basic protein Bateman A anon Pfam-B_1868 (release 4.1) Family \N 22.00 22.00 33.60 26.40 20.00 19.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.84 0.71 -3.87 5 142 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 42 9 32 174 0 115.40 55 72.19 CHANGED MASASTSDHARHGhGs..RHRDSGLLDSLGRFFG..GDR+VPRKGpGKs...shtttl..hPp.+tttttst...s-ssVVHFF+shhoP...ss.ptthpthu..ho.................h.p.pppuh+u+K....-GpGs.....chG......tptpuSP.SRR ................hAoASThDHARHGahs..RHRDoGlLDSlGRFFG..GDRs...sP+RGSGKss............sRssHhGSLPQ.+s...pH.uRst...D-NPVVHFFKNI.VoP+...........sPPspu+utth.......t........................................................................................................................................................................................................................ 0 1 2 10 +3452 PF01275 Myelin_PLP Myelin proteolipid protein (PLP or lipophilin) Finn RD, Bateman A anon Prosite Family \N 20.40 20.40 20.40 23.30 20.00 20.20 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.79 0.70 -5.03 7 395 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 126 1 135 334 0 203.20 47 86.95 CHANGED GCaECCI+CLGGVPYASLlATlLCFsGVALFCGCGHEALoGTEpllEpYFS..pNhpDahhLhsVIphFQYVIYGlASFFFLYGILLLAEGFYTToAVKplaGEFK..........TTsCG....................RClS......shFlhlTYlLslsWLuVFAFSAlP...VaIYaNhWoTCQsls.ssps.ssshtplCsDsRQYGllPWNA.PGKlCG...sLtsICpTsEFphTaHLFIsAhAGAuATllALlpYhhusoaNaAVLKhhuRtst.p+h .................................Ghh-CCh+CL...sul...PaASLlATlLha.GVALFCGC.GH.ALouT.tllc.paFp......psh.t.Dh....lhph.Iphhp.YV..IYG.lAuhFFl.YG.llLL.sEGFaT..T.u.Al..+.c.l.a.G-.aK..........sThC.G................................R.Clo.......................................u.hFlhlTYlhhlsWLhV.huFoulP...Va.haa.NhWo.hCpshp.spts.............plChDh...............RQa...G..ll..P..h...s....s......s....h..K...............l.C.ss...........sh...phCpo..sE.h..ho....acLFIsAhsGA.u..hsl.luh...............lp...ahhhhohNaAhl+.hs+.pt............................... 2 31 42 79 +3453 PF00063 Myosin_head myosin_head; Myosin head (motor domain) Sonnhammer ELL anon Blastp MYSA_HUMAN/1-840 Domain \N 19.10 19.10 19.20 19.10 19.00 19.00 hmmbuild -o /dev/null HMM SEED 689 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.23 0.70 -6.49 24 7667 2012-10-05 12:31:08 2003-04-07 12:59:11 16 484 1348 171 3682 6883 268 481.40 32 44.72 CHANGED s-DhstLohlsEsulLcNL+pRYt.sshIYTYsGhhLlulNPa+pls.lYopchlptYcG+p............+tEhsPHlaAlA-pAYRsMhs-pcsQsllloGESGAGKTEsoKhlhpYlAuluussst..................................................p.tplcpplLpuNPlLEAFGNAKTsRNNNSSRFGKalcIpFstsGph....sGupIpsYLLEKSRV...lhQspuERNaHIFYQLLuGsssp..+ccLpLs...............sspsYtaLspsu....shsl.........................sGlDDsp-FptsppAhcllGhocp-ppsla+llAulL+lGNlp.Ftpst.....ppptu.............csppt.hpthutLhGlsstshtpuLhp.plpsG........pphlsps.shppAhhu+DALAKulYuRLFsWlVs+lNpsLstpp......................................ppsthIGlLDIhGFEIF..-hNSFEQlCINasNEKLQQhFscphhphEQEEYh+EGIpWshI-a.hDspssl-.LIEp...PhGIlulLDEpshhP+u....oDpoFhpKLhppa.spps...pa.p+s.c..........tspspFtlpHYAGcVpYsspual-KN+DsLpssllslhpsSps.sllsplFpp.p..............................................................tttpttpspptph.pTsutpa+pslspLMpsLpss...sPHalRCI+PNcpKtsspa-sphVhpQlpssGlLE..slRlpRtGFssRhhascFhpRYplLsspsh............tsscpusctlLppl......sh-...tppaphGpTKlFh+ ......................................................................................................................................p...t.........h.....l...Ra.....................................h.....a................o...............u...........h.h.ls.hNP....h..t...h.......................t..........t.............h.......t.......................................................t............P...Hh..ashs..p.s.a.............h..................h................................................t.....................s.....Q.....s..h.l................h................o......G.......E.......SGuGK..T..........s...........s.K.......h..l...h.p.....a..h.....ut...l.u...s.......tt......................................................................................................................................................................................................................tplc.p..p..l...........l....tu............NP..lL..E.....A.FG...............N.AKTh+N.s...N.............S.............SR...F..............G....K.a.l.c.........l...p..F.....s.......p.G.t.l...................................uu..p.l..p...............p...........Y...............L.L...E........K.S.........Rl...................shQ............t.s.E......Rs.aH........lFYp..l.h..s.s....t....p.................p..............hp..h.h.l.t............................................ss..p.a..tal.sp....s......................ph.p.l............................................................................................................s...h...s.....D....t....p......p.....h............t.......h......................p.......A....h....p.hl.G.hs.........p...c................................sl.hplhu...u...l.....h.+...h..GN...hp....Fpttt..................t.t...p..ts..............................................................sp...p...........hp.....hs.....u..........Lh..sl.....s...........t...p.....l....ps.....l....hp..ph.hst..............................................t-h.l...p...s..s...h.t.p.......s..................sh..s...uh...uKslY.t..p.hF..a...............h........l.......t...lN..t....h....................................................................................................lu..l..L....Dh....h.G..F.E.h.....................t.N...............s...h...E..Q..h..h..lN..hs.........s.E..p..............LQ........t......a..p...................h............hh...-........p.........p...................................Y.....t....E..........t.....l................h.........................................l......p..a...............s..................t...............h.h.................p.........l.......l.............tt.............................................................u..............l...h..hl..p-ts...h...t.s...............sp...t...hh.t....p.h...t.........t...tt...................h.................................................tF.l...H......a..u.s..........V.......Y.p....h..........t....h...h.....+...N...-.....h...........p.h....th..h...............t.........u......t.......................hh.......h.h...................................................................................................................................................................................................................................................................................sh..s....h...p.........p.h....L..h.t..l...tt.................ts.al+C..............l..............h......s...................N...........................................................................................................................h............p.............hhtQlp...t.hhp....hp..h..t...........u.a................s......p.......h.......hh..............a...........h...................................................................................................................................................................................................................................................................................................... 0 1142 1634 2706 +3454 PF01576 Myosin_tail_1 Myosin_tail; Myosin tail Bashton M, Bateman A anon Pfam-B_356 (release 4.1) Family The myosin molecule is a multi-subunit complex made up of two heavy chains and four light chains it is a fundamental contractile protein found in all eukaryote cell types [1]. This family consists of the coiled-coil myosin heavy chain tail region. The coiled-coil is composed of the tail from two molecules of myosin. These can then assemble into the macromolecular thick filament [1]. The coiled-coil region provides the structural backbone the thick filament [1]. 51.00 51.00 51.20 51.00 50.90 50.80 hmmbuild -o /dev/null HMM SEED 859 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.14 0.70 -13.91 0.70 -6.82 15 2444 2009-01-15 18:05:59 2003-04-07 12:59:11 14 63 325 2 1048 2212 11 548.10 35 44.88 CHANGED -hEpp+c-lEpsLp+KEuElstlss+lE-EQshltpLQ+pl+-LpuRIcELEE-LEsERsARsKuE+pRuDLucELEELuERL-EuuGATuAQhELNKKREAELsKLRRDLEEuslpaEsslusLRKKHsDAlsElu-Ql-QLQKsKuKhEK-KuphptEl--ltupl-phsKuKtssEKhsKphEsQlsELpsKhDEhsRpls-lsopKuRLssENu-LsRQlEEtEsplssLo+hKupLsuQLE-A+RsLEEEuRpRssLpupl+slppDhDsLREplEEEsEAKu-LpRQLSKANuElpQW+oKaEsEuht+sEElEEhK+Khpt+lsEhE-plEutpsKsssLEKsKsRLpuElEDLpl-lE+usutsupLEKKQ+sFDKlluEh+pKs--lpsEL-sAQ+-uRshSoELa+LKsph-EstDplEuL+RENKsLuDEl+DLs-pLuEGGRslHELEKs+RRLEhE+-ELQuAL-EAEuALEpEEsKshRuQlElsQlRsEhERRLpEKEEEhEspRKNppRsl-SlpAoL.EuEsKuKuEshRlKKKLEuDINELElsLDtAN+usA-tpKslK+hQpQl+-LQpplE-pQRp+--scEphthAERRsssLpuElEELRssLEpuERuRKtAEsElsEAs-RlsELsuQssoLsupKRKLEu-lsslQuDLDEshsEh+sA-ERu+KAhsDAs+LA-ELRpEQ-pop+lE+hRKsLEpplKELQsRL-EAEusAlKGGKKhIpKLEsRVRELEsELDuEpRRptEspKshRKt-R+lKELphQsEED+KNh-+hQDLlDKLQtKlKsaKRQlEEAEElAshNhsKaRKhQ+ELE-AEERADpAEpsls+lR...uKuRs .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...................................h.....h.............ph...p.p.p.............t....t........++...p......phtt...t.t.t.......th..c+.t........p.h..-hpsh............hp...t...t.......h.pp.p....p...hp..p...........tp.p..........t..p.......h...pt.....tp.p.h....sch.t....httth....p.p............th.t..t.......hp.......p.........t.h.t.......t.......cht..p...h........................tt...tt.tt.p..h.tphpt.hpthp........ph......c.p.t.......hc-..ts.......thp.............c.......tt..............h.Rhphp.h.....t.th+tphpc.ch.t...t+-.-p.-t....h..+.p.t..t....h..cp.hp.tL..-..t..Ehc.......+..t.h.t.+...cchc.plt...............p.h..p.....thp.tts..p.............t.........-...........+p....h....c..............p................p.............h...p....................-....h...............hth...--t........t........pchht.p..t..pcc....t.hts-h.p.............hp...htts.+t.++.h.......c.-h...............-..httph.......t.........tt.......t....t....s....s.......h.........p...tt....pp....h-tcl.plpt-h--..pp.p.h.-+hc+s...p..h.t........h.t.....p-Ltt.Ep.......p..tttc..E.p.+pphEtp..............K-.Lp.+Lp-..hEt.s..hp...t........h.+.t....lttLEs.+.................l....tpLE..tpl-tEp....+..............ct....ts.+thR+h-++...lKEl..h.Q.s........-..........-..-++p..p.h.p-.h-Khp.+lK.....thK.RQ.h-Eu................Ep.sp...s..ph.R+hQ+EL--tpEts.-...hppplpth+..................................................... 0 216 306 674 +3455 PF00819 Myotoxins Myotoxin Bateman A anon Pfam-B_1337 (release 2.1) Family \N 25.00 25.00 80.40 80.30 21.20 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.44 0.72 -3.90 2 29 2009-09-10 20:59:54 2003-04-07 12:59:11 12 1 8 2 0 31 0 41.80 88 77.46 CHANGED YKRCHhKtGHCFPKphIClPPSSDFGKMDC.W+hKCCKKGSsp YKpCHKKGGHCFPKEK.IClPPSSDFGKMDCRW+WKCCKKGSsp.. 0 0 0 0 +3456 PF02384 N6_Mtase N-6 DNA Methylase Bashton M, Bateman A, Mian N anon Pfam-B_508 (release 5.2) Family Restriction-modification (R-M) systems protect a bacterial cell against invasion of foreign DNA by endonucleolytic cleavage of DNA that lacks a site specific modification. The R-M system is a complex containing three polypeptides: M (this family), S (Pfam:PF01420), and R [4]. This family consists of N-6 adenine-specific DNA methylase EC:2.1.1.72 from Type I and Type IC restriction systems. These methylases have the same sequence specificity as their corresponding restriction enzymes. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.90 0.70 -5.35 15 7778 2012-10-10 17:06:42 2003-04-07 12:59:11 11 73 3514 13 1474 8902 2092 276.20 22 51.07 CHANGED pstDlhGDsYEYlLupFApspGKpuGEFaTPpsVocLlsclL....-spssc.IYDPAsGSGuhLlpsp+altt........sspspslsla....GQEhstoTapLA+MNhhlHsIchstht...lphuDTLtsspap........-tphDhVlANPPast+.W........ssssltsDs..Rap.t.....hsPpssA-aA..al.HhlhpLussG+AAlVlssGsLac.GusEucIR+tll-pshl-slIuLPspLFas.TuIPsslllLsKsKs.........pcscVLFIDAsp.atpcs..+ppstLos-cIpKIs-sapph.....................c-ls.....tFu+sAol-EIpcNDYNLslsRYVsstcpc-- ........................................................................................................................................p...p.huph...Y.E..h..h.lt.............p.............h................................t....t.............s.............p..........p..s..Gpa..a..T.P.p.t.l...s.c..h.h.s....pll.........................................p.s...p.......t...t.....p....l.....h.....DPs.sGoG..u..h....L....h.p...s....h....p....h...l............................................................t.p.........t...p...h.....p.h.a..................G......E...h.....s...............t.......s...h.....t.........l.....u.......t............h.....N........h........h.......l........+.....s......l........p...........t...............t....................lh......s...s.....o......L..p...p..s.hh....................................................tph..D...hl..luN.P..P.a....u..h..p..................................tt..p..p..h.......t..p......s....................h.................................hs.s.p...s.....p...s....p....h..h..............F....l...t.....p.....h.....l.....p...h..........L..............p..........s...............s..........G.........p......h......u...l.l...l.s.p....u..h.L.hp....s..........s......t......p......t....p...............l....R..p...h.l.l..........c........p.......s.........................l.c..s.....l.............l...t......L.........P...........s.......s...l...F.......h...s.....s..s......l...s...s..s..l..h...h..h.p....K.p.p.........................................tpp..p.l..h..h.......h..c..h...t....t............h....................................tt........t.p....h.........t...............................t..............p.........h.............h.........t.h..h.t...........................................................................................................................................t........................................................................................................................................................................ 0 495 1021 1282 +3457 PF01555 N6_N4_Mtase DNA methylase Bateman A anon Pfam-B_164 (release 4.0) Family Members of this family are DNA methylases. The family contains both N-4 cytosine-specific DNA methylases and N-6 Adenine-specific DNA methylases. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.58 0.70 -4.87 49 7047 2012-10-10 17:06:42 2003-04-07 12:59:11 13 54 2966 14 1466 6738 4026 224.00 20 60.70 CHANGED lcllhssPPYshspp........................................thsptps..........hppahpa.....hhphlppstcl.........LKssGslhl.ss.phht............................................................................hhthhhp.hs..ahhhshIhWpKs........t.shsp.pt..............phstspEhllh................asKscph....................................................hhshphhch.hpptpthphtt........................................h...........W......................................................................................................ht..ppppt..............H......ssp+PhtLhc+lI.phsop.........sDlVLDPF....hGS.......GTTshsAtpLsRp.......................aIG.hEhppcahc..hutc .....................................................................................................................................................................................................................lchlhhDPPY..ht.p.................................................................................................t.t.......p...............................p..ta....h.p..h............h.......h..ht..hhcl.............L.p...s.G.s.h.hl....hs...pth.........................................................................................hhhh.h...p.............s................h..h....s.......l.h..Wppt............................................hsp....t........................................................................hh.t.....t...t..E..l...hh.................................................................................h.....Ks.tt..............................................................................................................................................................................................................................................................................h..........................W.....................................................................................................................................................................................h......t..............................................a..........s..s...KP.t...L....lppl.I....t..h....s..o..p................................................s..s...l...V...L.DsF....sGS.......GTTsh......s...uh..p.h.s..R.p................................................hlu.h-.hptthhp.....t........................................................................................................................................................................................................... 0 537 1019 1270 +3458 PF04245 NA37 37-kD nucleoid-associated bacterial protein Mifsud W anon COG3081 Family \N 24.60 24.60 25.60 24.90 23.10 23.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.20 0.70 -5.10 105 1727 2010-01-13 16:22:14 2003-04-07 12:59:11 8 1 1574 0 252 1017 43 316.50 30 96.20 CHANGED llHplsppsp........................phhhpsp.h.h..ssphp....t..hltchhpth..spc.sttsauph.........................t..thsttlpphhp...t........ppsFhphSpphuppLh.pphpc.p...hs....uGpllhspa...........p..t....sp.........hlhlhhlcpcpuhhhspp............lchp.p.ptl.........sls+lphuAhIslsph..............................phhlshlcscsscp......sstaatc.FLuspp.thsspppocshlpslccaspst.......lspppp.p.....ph+pplhsahpc.......s.-..plslpcluspl..................t.p.....th..sFt....pahppp..th.plp......ps..Fssspsslcph.t+h..sspstulslphctphhs....p.....phh..s.pss-.s....l..pIps..h.s.hpcplpc ..........................................................lHQl.pccp..........clhLp-phL.s...spplp.....p.hltcltchh....ssc.p.psauha........................ps...hsph.L.p.hhp....s...............pcDFlth.SpshsppLp.-.............th..sc.s........hs....uG.llFspa........................phhu..s-................................aLhlhhls.ppohphsps........l-ls.s.phL....................sls+hDhsAcIsLoph..............................spalsalcsR.ls+c.......ls.Fh..hc..FLuss..slssKtps+s.Lhpssc-...astpt.....ph-ctpp..p.......pl+pp...lasahpE.......u.-....El...pl...c.pLup-Lh..............ss..........ps...sFh.....-aspEp........th..tlp....Es..hs.s.D....+..p.s.lcph..pKa.....sutusGlslsh.ss..lhs-........chh...s..ssD.T..........l......hIKs...h...h................................................... 0 62 131 201 +3459 PF01235 Na_Ala_symp Sodium:alanine symporter family Finn RD, Bateman A anon Prosite Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.50 0.70 -5.90 6 5556 2012-10-03 01:44:59 2003-04-07 12:59:11 12 8 2931 0 865 4003 1762 405.60 38 87.29 CHANGED hhchhp....hhu++scuG..sloSFQALsTuLAuRVGoGNluGVAhAIuhGGPGAVFWMWVsAFlGMATuFsEsTLAphYKh+Dp-GshpGGPtYYlp+GLuhRWLullFAlh.llloFGhlhsulQsNuIAcAhssuFshsslVTGIlLsllsuLlIhGGlKRIAtlophlVPhMAllYllsALsIlhhNI-plPsVIttIhcSAFuhptAsGGhlGusl..AhhhGlpRGLFSNEAGhGSAPpAAAAAcsp..HPspQGlVQhlGlFlDThIlCTsTAllILLoG.h.......................................hss.pLcG..............................................................hplTQtAhppthG.uaGupFluluLlLFAFSollu.YYYuEsNlhaLh.sph+ulh..haRlshlAhVhaGoltshsllWphADlshulMAlhNLIAIlLLpplsachhKDYhcQhKpGhpP.Fcspch ................................................h...............t..t..p..p..p..p.u...slSsF.Q.ALssuLAupVGoGNIAGVAsAI.......s........hGGPGAlFWMWlsAhlGhAotasEuoLA.hY......+.............p...+..-..t...s......G..p.ap......GGPhYYl........p.........+....G.L.......s.........t...........+..............W........h.......u.......l....l.......F.....A.....lh....l.l......l........u........h........G......h....h........h.....s....sl.....Q.......u........N.......o.......l..................u..................p..............u...............h................p...............s................u..............a..............s..................h.............s..............s...............h..............l..............s.................G.............l........l..L.s.......l...ls..u...l.l.Ih.G.G.l.+..p..Iu.plsphlV..PhMAlhYllsulhllhhNhsplP....sshthIhpu.A.F...s..h..p....u......u.s.G.G.h.hG...s...s...l..........................h.GlpRGlF..SNEAGhGSAP.AAA..uA.psp..............H....P..scQGllphhGlFlDTlllCThTAhlIL..l....o...G....h...................................................................................................................t....s...s..h...p...G......................................................................................................................s...t....l.Tpt.Ah...s..shh......u.......s......h.......G..s.....hh.......l.s....lu....lh....hFA..Foollu.hYYuEp.s...........lt.a......L.......h....................s.........p...p.....h.....l..h..............laR.......l...l...h....l............s.....h.l..hhG...u.......h.........h...........s.......l...s....h.........l...WslADlh......hGlMAlsN...LIAlll...L...u....thshphh+DY......p.p..h..c....t.................t................................................ 0 285 542 723 +3460 PF03390 2HCT Na_citrate; 2-hydroxycarboxylate transporter family Mifsud W anon Pfam-B_3683 (release 6.6) Family The 2-hydroxycarboxylate transporter family is a family of secondary transporters found exclusively in the bacterial kingdom. They function in the metabolism of the di- and tricarboxylates malate and citrate, mostly in fermentative pathways involving decarboxylation of malate or oxaloacetate [1]. 25.00 25.00 25.00 27.90 22.10 24.10 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.36 0.70 -6.10 36 838 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 694 0 149 549 5 398.60 39 93.58 CHANGED +IsslPLPlallhuslllluhhh...........spLPssMlGuhulhhlhGhlhGpIGpRlPIl+s.lGGuAIlslFlsohlVaasllspsslcssoshM.Ks.......u.sFL.hYIAsLlsGSILGMsRclLl+uhl+hhssllsGslsAhhsGhlVGhlhGh..........shpcshhaIllPIMuGGhGtGAlPLS..hYuplhG.hsppphhSpllPAlhlGNlhAIlsAulLsplGc+pPcLoGNGp.Ll+spp.....htsp.pcpptplshpthGsGlllAsohFhlGtllschl.......sl.uhshMIlhsAllKhhsllPpphcpGAhphapFhusshTasLlsGlGluassLsplluAl.ohsalllshslVluhlhsuhhlG+lhshYPlEuAIsuG.CtushGGTGDVAlLSAusRMpLMPFAQISoRlGGAhhlllAslL ...........KIsulPLPlYlhh..hh.llhlsthh..............spLP.s.shlGuhAlhhhhGhlhGpIGp+lPIhpp.lGGsulhshhlsuhhVaasll................sp..s..lcusst.hM.cp.......................u..NFL.haIAsLlsGSILuMNR+lLlpuhh+hlssllsGhlsAhhsGhlVGhlhGh..................shpcshhalllPIMuGGhGtGhlPLS..hYuslhG...tsppphhuplIPhhhlGNlhAIlhAulLsplGc+hPp....LoG.pGp...Ll+pss.........sp..pcppptplshpthusGhllAsohFlhGhllp+hl...........sl..s.s.h.Ml.llsshlKhhslhPschcpGAppl.cFhSpslTasLMsGlGlshhsLp-llssl.ohs..lllshslVluhlssuhhl.G+hhshYPlEuAIsuu.CpushGGTGDV.AlLSAuNRMsLhsFAQIuoRLGGAIsllluoll.................... 0 28 64 100 +3461 PF03553 Na_H_antiporter Na+/H+ antiporter family Bateman A anon Pfam-B_620 (release 7.0) Family This family includes integral membrane proteins, some of which are NA+/H+ antiporters [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.93 0.70 -5.39 38 7404 2012-10-02 15:12:49 2003-04-07 12:59:11 9 9 2159 0 1156 6794 758 256.90 17 76.09 CHANGED sssFGs.ttShlu.ssshusthhtstlhsphp...........h.hhhpshshtlhuhhhhhhluhlhh......hhhslsshhpp..tsh.....................phshhthtt.............Phhslhhhslhth.hhsshsh.shhs.shshthh...hpssshhtshhhuuhsuhhhthhsth.......htphsshlscG.hpsMhhslhlllhAhuhuullpcsG.hssllpslspth.ps.........hthhshlhhlluhhlshusGsuauolsIhsslhtshstphshsh..........hshsusltsuushu-shsPhSsoslhsssshts...plsplp.sshshhhhhs.h.hhlhhlhhGh ......................................................................................................................................................................................hhhs....s.h....h..h.h.....h.........h.h.ttht................hh.h..........h.h.shh..hh.h.h..h............................th..........h.t......................................................................................ht...h.....................................s..hh.h.h..h..h..h.......h.h......t.h.....h.hh......s........h.........h...........h.................h..........h.........s........h........h.h.thh.................hp................s....................................................h........h...............s.......h.........h.h..u....hh....h...h..hh.h.th.............................h.tp..h.......p...h...h.....s.....c.....G....h.....p..s..M.....h.........h....h.hl.h.....lh.uh.u..h..u.ul..h.p.....p......h......G.....h.ls...s...ll.....p.....t.....l....h....ph.h...ps....................................t.t.h....h.h..h..s.h.h.l.l.u.hh..l..s.h..u..h.G.s.u...as....o.ls.l.h....u.s.l.h.h.sls.p.p.h..s.ls...........................hhh.s.u.slt...su...us.hs.s.th..Phu..s...s.s......l.....h......s........sh....h..........................t..........h.......s...........h............h........h..s..h..h.....h.h..............l..hhh............................................................... 0 401 740 997 +3462 PF00287 Na_K-ATPase Sodium / potassium ATPase beta chain Finn RD anon Prosite Family \N 23.30 23.30 23.30 23.70 23.10 23.20 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.96 0.70 -5.63 11 608 2009-09-11 13:38:36 2003-04-07 12:59:11 13 6 122 10 324 552 0 246.50 31 90.97 CHANGED MA+.cpccs........p.supWKcFlWNPcppEFhGRTuoSWshILLFYllFYGhLAGlFshTlhVMLpTls-apPKYQDR....lAsPGLhhpPcs..sp...hEIsFssu.sspSappaVpsLppFLcsYssopQsp....hpsCs..PscYh-pss.s...pspKKuCpFptphLG.sCSGlsD..sFGYs-GKPCVllKhN......RIlGFpPcs...............s.......ssh.lpCsuKcsE..tpplsslpYaP.s.....GshsLhYaPYYGKphpssYlQPLVAVpFsNlo..tssEltlECKla.usNlphs-.+D+F.GRlsFK .......................................................................tth.ppah.a...ssp..pt...ph.hGRTstuW.........hh..Ihl....FYlhFYs.hLsuhFshsh.h.s.h....h...T.l.s..s.h...h..P....p...a...p..-p...................hs...s..P.G......l....hhtP.s............tt.......h...l..a..phs..c..spoaptaspplppFL....p.......Ypss.ptp..........sCs.........tthh.p.t............tpsCpFph.p........L.....t....sC....Suhp..D.....aGYpp...G.p.PClhlKhN......................Rl.lsahPp..........................................................t..............ssh...ls.Cp.s....p......p........t....t......c.........t..p.......p.......l.......sp.l.......pYaP.s....................sh..s......h.h..Y..a...P...Yh...s...............p....s..sY.....h.p...P....LVAVp..Fh..shs............hshtlplcC+hh.upNl..s.p.tcc.h.Gphth............................. 0 79 107 205 +3463 PF00939 Na_sulph_symp Sodium:sulfate symporter transmembrane region Finn RD, Bateman A anon Pfam-B_1100 (release 3.0) Family There are also some members in this family that do not match the Prosite motif, and belong to the subfamily SODIT1. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.60 0.70 -5.62 11 6208 2012-10-02 15:12:49 2003-04-07 12:59:11 14 27 2554 0 1377 7719 2167 426.10 24 91.05 CHANGED tsphhpLlhhlslhllIahlPsPsGlsspAWphFAIFhusIlGlIhcPlPhuslAlhululh.llstsLs..................lstuLSGFussssWLlFsAFhlutGah+TGLG+RIAhhhlphhGpoo..LsLGYullhs-llLAPuhPSsTARuGGIlaPllpuLss..........uhGSsspcs.................................o.+plGuaLMhssh.uosITSuMFLTAMAsNsLshslhsp...hsGhploWhpWhlA....AlssGllhLllsPLllYhlaPPclcps.............-shphAcpcLcpMGPhstpE+tllslhlLsLlLWlFGs...........pls......lsAoTsAllslulhllhp.....................llsWc.DllpppsAWsTllWaGuLIsh..AshLspsGFlpWlusshsshlu..uhssthuhhlllhla.ahsHYhFASsoAaluAMhPlFluVupul.G.ss.hhhuLsLsFusulhGhlT.YGoGPuPlhaGuGYlsspcaW+hGhlluIlslllaLslGshWWphLuhh ..........................................................................h............h.h..h...h.h..h.h...h...h..s...h.....s...................s.........h.....s..................s....h..h.........h.........hu..............l..hl..............hh...hl..h...h...l.h.c...s...l.Ph.s..s.s.u..l...l.s...h..s.lh.....lhs.hh.......................................................................................p...s.h.u.u.a.us.s..slaL.hhuu..Fhluhu..hpc.ps....L....s...+R.l.A..h..h.l..l..p...h..h....G.p...p.s.................l.h..l..u...a...h..h..s..h..s.....l...h..h.....s..s..su...s..oA..h.h..h..s..l..h..h...s.l.l...p.s.ls.........................................h.t..s.p...p.p............................................................................................................................................................................................................................t.p...h.u.th...l.h..h.s.h.hhu.s...s...lsu.hhh.hsu....s.s...s.Nh.l.hh.t..hhpt....................t..h..p....l..s..ah..p..W..hhh.....uh.P....s..l...l....h....l.....h.l...h....h.......l.h....h.h....h.a.........s...p....h....p..p...h....................................................................t..s....t...h..h.....p..p...c.h...p.p...h...G.............h..s..h.....t...E......h......h...h....l.......s.......l...h..l....l..s..l..h......h..........W......h......h..t.s..........................................ht..................l...s...s..s...s..l..u...l...h..s....h.s..l..h....h..h.hs.....................................................................l..l...s....W...c........h.....h.....p.....p..p.....h.s......W.....s.....s.l.....lh.....h.uuh..lsl..............uss..L..p.p..oG.h..hp.al............uptl..ts.h..hs...............h..t..............s.........h...s.h.h.h....l..............l..h...l...h...h.h.h..h..h......F.s..S.s..o.A...ts....s....h.h.h....P.l...h....hs..l..........u..........h...u...h.......................u.......h..........s......s......h......h....h...s...l.....h....h...........s....h....u....s..u....h...s......hh....h...Phus.......s..P.....s..s.....l.......h..........a........G........s........G......h........l.....p..............h.....p.......c......h..h...+....h......G.....h....h....h......s....l...l.s...h........l...l...h.h...h.h...s.............hh......h........................................................................... 0 425 767 1127 +3464 PF04902 Nab1 Conserved region in Nab1 Kerrison ND anon Pfam-B_6188 (release 7.6) Family Nab1 and Nab2 are co-repressors that specifically interact with and repress transcription mediated by the three members of the NGFI-A (Egr-1, Krox24, zif/268) family of transcription factors [1]. This C-terminal region is found only in the Nab1 subfamily. 25.00 25.00 26.50 44.50 17.60 18.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.04 0.71 -4.76 2 61 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 38 0 27 45 0 154.70 68 34.82 CHANGED GERDELSPKRIKVEDGFPDFQ-sVQTLFQQARAKSEELAALSSQQsEKsMAKQMEhLCsQAuYERLQp.ERRLoAGLYRQSStEHSPsGLsSDsSDGQGERPLNLRMPNlQNRQPHHFVsDGELSRLYsSEAKSHSSEsLGILKDYPHSAFTLEKKVIKTEPEDSR ..GERDELSPKRIKlEDGFPDFQ-oVQTLa.......QQ..........A..+AKSEEhsALuSQ.........Q.sEKV.MA..KQMEFLCsQAGYERLQpsERRL.SAGL.YRQu..SEEHSPN.GLsS.D.sS.DGQ.GERPLNLRMP..N..LQ.NR.....Q.P..HHF.....VlD..........G..........E..........LSR.L.Y.s...S...E.AKS+SS..E.SLGILKDYPH........S.AFT.L.EKKVIKTEPEDSR.......... 0 1 4 10 +3465 PF01849 NAC NAC domain Bateman A anon [1] Family \N 21.30 21.30 21.40 21.40 20.50 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.73 0.72 -4.44 91 1327 2009-01-15 18:05:59 2003-04-07 12:59:11 13 15 510 10 771 1213 39 57.30 36 25.22 CHANGED pKKhpphhc+lGl..cplsslpcVslpp.sctphlhhspPcVp..pu.s.ssoahl..hGcscpcs ...............-KKhpp.sl.pKLGl.....psls.Glpc..VshhK.sc..shlhhhsp.P.cV..............tS.s..usTahl.hGcuchc................... 0 231 389 577 +3466 PF00175 NAD_binding_1 oxidored_fad; NAD_binding; Oxidoreductase NAD-binding domain Sonnhammer ELL anon Prosite Domain Xanthine dehydrogenases, that also bind FAD/NAD, have essentially no similarity. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.61 0.72 -3.49 72 19747 2012-10-02 19:13:12 2003-04-07 12:59:11 16 239 4712 191 6047 15837 2129 105.10 20 24.44 CHANGED hlu.GGoGluPhhshlpphhtt..p...........tspshl.haGs+spc.cll..hc-Elcphttph.t....hhh.h..........ppssss.tst+GaVpctl.c..ch.............ttth.lahCG.P.sMhcsspp ..............................hlu.uGsGlsPhhu.hl...pp...h..h..t.....p....................................................ptsp...h.h...l...h..aus...R..........stp....-.......h..h.........a....t.c....-.l.p...p......h..t.pph................h...h.hhh.............................spsp..s.....s..............................p.....G...h...l.s....p.....t..l....h...p..phht.................................t.s..s.p.l.a.l.CG.sssMhcsst.t...................................................................................... 0 1691 3423 4899 +3467 PF03446 NAD_binding_2 NAD binding domain of 6-phosphogluconate dehydrogenase Griffiths-Jones SR anon Prosite Domain The NAD binding domain of 6-phosphogluconate dehydrogenase adopts a Rossmann fold. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.76 0.71 -4.40 163 12732 2012-10-10 17:06:42 2003-04-07 12:59:11 10 80 4225 91 3550 30059 15093 161.10 28 46.30 CHANGED htpIGhIGLGlMGpshAhNl...h.ctGasVslas.Ro..t.....spscphhtp...u..........hhsstohp.-hspsh......chllhhVtsussVcsll.s........lh..stl........ptG..sllIDsussphpsspchscp.lpppG..ltalsusVSGGptGAtpGs.olMsG..Gspp.uacplpP.llpshus.........sls.......h.Gs ..........................................................................plGhl...G.L.G.h.M.......G....t...s....h....A..t..Nl.................h...c....t....G........a......s.......V......s....l.....a..s.....p.s......t...........p...t...s...c.....p...h...h..t.p....u.......................................st..s.s..t....o...s.p....-...h..s..p.sh.........................chl...l...h.........M......l..........s.........s..........u..........s...........s..........l.......c.........s..........l........l.......s.......................l..h.....s.t..h..................................................p..t...G.......s....l..l......l...D.......t...u.......s.....s.s........p..s.......s.p.c.h.s....p.......t.......l....p.........t......p..G.............h.p...............a.....l...-.........u....s...........V....S..G........G..p...t..........G...........A...t.p......G.........s.............olM..s...G..........G...s....c...p......s....a..c..p..l...p.P...lh.p.s.h.us...........slh.h.G.t........................................................................................................................................... 0 986 2066 2904 +3468 PF03447 NAD_binding_3 Homoserine dehydrogenase, NAD binding domain Griffiths-Jones SR anon Pfam-B_459 (release 2.1) Domain This domain adopts a Rossmann NAD binding fold. The C-terminal domain of homoserine dehydrogenase contributes a single helix to this structural domain, which is not included in the Pfam model. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.33 0.71 -10.36 0.71 -3.70 30 6031 2012-10-10 17:06:42 2003-04-07 12:59:11 11 30 4281 27 1498 6276 2360 124.60 26 23.63 CHANGED GhGslGsslhchLpcstt........slclsulsscc...hptt..........htshhsstshsssl-clls........c.sDllVEs.uu.pslcphshphLcpGhcllssshuAlu...shhtcLhphAcpsssclhh. .........................................................................GhGsV..Gu.u..l.l..c...l..l...pcppptltt...............phshplp.lss..l.ss..pc......h.ptp........................................ht.t.t.h..s..s..t..t...h....s....p..s...h..p.....p.llp...............................p.hsl.lVE......l...u......u......s......p.....s.....s.....t.....s.....h..h..h.c..sL.p...p..GhHV.....V..T.A.NKs..hhA.........sahppLtph.A.cpss.hphhap................................. 0 464 940 1256 +3469 PF01210 NAD_Gly3P_dh_N NAD_Gly3P_dh; NAD-dependent glycerol-3-phosphate dehydrogenase N-terminus Finn RD, Bateman A, Moxon SJ anon Prosite Family NAD-dependent glycerol-3-phosphate dehydrogenase (GPDH) catalyses the interconversion of dihydroxyacetone phosphate and L-glycerol-3-phosphate. This family represents the N-terminal NAD-binding domain [2]. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.72 0.71 -4.50 44 5998 2012-10-10 17:06:42 2003-04-07 12:59:11 18 26 4763 27 1531 8856 4915 150.50 28 44.49 CHANGED pluVlGuGuWGTALAtlLucsu........ppVplWspcs.........phhcplspp+p.NscYLPs.lhlPs.slpsssDLtcAlpsu-hlllsVPopshcplhpplpshl.....ppsshllt........ssKGl...Et.........sohphlScllpEhlstp....luVLSGPshAcEVAtth.osssluu...pstphu ....................................................................lsVl.G.u.Gu.a......GT.A.l.A.h..hl..ucsG............ppVh.lW.s..p..p............................................chh.p.p..l......p......p.......p.......+......p.......N..................p..........a.......L..........s.....s.......h............t.......h.......s........p.......s..........l...........p.....s.....s......s........D....l....t.........p...A.....l.............p......s......u...............-......h.....l......l.h.s..l..........P..o...........p...........s........h..c...p.........s..h..p.p....l..t..s..hl..............pss...s...h...l..l.t...........................ss..K..Gl................E.............................soh.ph...h...s..c.l...l...p..-.......l.stp.................luV.lSG.....P..saA.pE.lutt...oshsluu.ps.p..h....................................................................................................... 0 513 967 1299 +3470 PF01513 NAD_kinase DUF15; ATP-NAD kinase Bateman A, Wood V anon Pfam-B_797 (release 4.0) Family Members of this family include ATP-NAD kinases EC:2.7.1.23, which catalyses the phosphorylation of NAD to NADP utilising ATP and other nucleoside triphosphates as well as inorganic polyphosphate as a source of phosphorus. Also includes NADH kinases EC:2.7.1.86. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.62 0.70 -5.63 33 6066 2012-10-02 15:20:27 2003-04-07 12:59:11 16 26 4752 59 1890 6136 3391 258.10 24 81.92 CHANGED plullssss....................ptpstthspcltchLhsp........shhhh.ttthtt..........................................................phhhhshphhcpss-hllslGGDGThlsssphhsp....slPllGlss.GshsFhsph..p.cshtphlsphlp.schplpct.....hl-...........................shlppspp...............hh..thslN-hslhtutssphhp..hclhlssshhsshts-Gl.............................lluTPsGSTuashuuGGsllpPsltsh.l....s.....hhhttcslVlssppplph.......ppsshlshDGppplpht.sshlplphu.pt.hhhlch ........................................................................................................................................h.......................................................................................................................................................................................................................s..p...p..s...D.lllslGGDGThLpAs.+t..h..tp...............s..lP..l....l.G.l....s........t......G.....p...L...G..F.....L....s.-.h...............psp..p.h....p..ph.l....p.....p.l..........h..........p...s..............p......h......p...l......p....p..+.......hLc.............................................................................................................................................................sp.l...p.ppsp.....................................................................hh.ph..h.A.l..N...E.l....sl....pp..u.s..t.sp.hhp...........h-l....hl...c.s.....p....h..h....p...s....h..puDGl....................................llSTPTG...STAY..shS...u.G..G...sl.....l.p.P.s.lp.u.l.hlsPlss.........+s.ht.sR...Pl...V.....ls.s.p.splplph...............tppshh....ls...hD.u..p...t..h..t..h..p.s...ppl..lphst...hphh................................................................................................................................................................................................................... 0 612 1178 1585 +3471 PF02540 NAD_synthase NAD synthase Mian N, Bateman A anon COGs Domain NAD synthase (EC:6.3.5.1) is involved in the de novo synthesis of NAD and is induced by stress factors such as heat shock and glucose limitation. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.63 0.70 -5.47 11 9645 2012-10-02 18:00:56 2003-04-07 12:59:11 12 26 4887 87 2593 10500 6866 175.90 22 39.30 CHANGED llpclpsFl+phVpcsGs.pGVVlGLSGGlDSulVAhLshcAl.........Gp-psluLlMP.s..ssccDhpcAhslscpLuhphpplsIcshhpuasp....thppst....pchscGNlKARlRMshLYsaAsphshLVlGTuN+oEhhlGYFTKaGDGusDlsPIusLhKopVacLA+tls.....lPccllcKsPoAsL...asGQTDEcELGlsYc.LDplLp....hhp.t...p-..tthslstphscpltphlpKoEHKRc.P ........................................................................p.......hlt.p..h.h....p....p.....s....Gt.....c....p....l..l.......L.G..l..SGGlD...S..o....l...s....u....t....L....s...p....c.A..l.............................................u..p...p...p.....l.h....s..l.....h...l............s...........s..............h..................t..........p..........p............p..............p..............h..p..t......s......h......t.......h..h....p.t...h.....t.....h......p.....h.....h.....h......l.....s....h.....p........h..h...........t............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 860 1651 2190 +3472 PF00146 NADHdh NADH dehydrogenase Sonnhammer ELL anon Prosite Family \N 20.00 15.00 20.00 15.00 19.90 14.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -11.82 0.70 -5.37 23 21511 2009-01-15 18:05:59 2003-04-07 12:59:11 16 18 13672 0 1222 18971 2989 236.20 39 95.74 CHANGED hhllshLhlIlsllluVAFLoLhERKlLuhhQhRKGPNhVGshGLLQPlADGlKLhhKEslhPusushhlFhluPslulhLullhWsslPhshshl..shNlGlLFhLAlSSLuVYulLhuGWuSNSKYuhLGuLRAsAQoISYEVoLuLILLol.llhsGSashsslspsQp.....hhWhlh.phPlhlhaaISsLAETNRsPFDLsEuE.SELVuGaNVEYuussFALFFLuEYuNllhMshLs.slLFLGu.h.............................s.hhhhhKshhlhhlFlWlRuohPRaRYDQLMpLsWKsaLPLoLuhllhhsulhl ...............................................................................h...lh.h..h.l....l....L...l.u....l....A....a...h....o..l......h...E.......RK.lLuah.QhR+GP........N...........h..........V...........G.......................h..........G......l......L......QPhuDulKLh.hK.E......l.h.Ptt....u.s........h....l..a.h.hs.Phl.u...l..h..l........u..lh.h..a.h..........l.......P.........h....................s..............h................s............h..............h........................................................s...........h......s....l...u..l.L..........a..........h..l........s..h.o.Sl..u..VY.sl......l..h........uG.Wu.S..N...SKYuh....lGul..Ru..sA.Q..sISY.E..ls.h.s.l.h.l.l...s........h......l.h..h......s........u.............s..........h...s...h.......p..h..........s.Qt..................................h.W......h......h......h...................h...s....h...h...h..h..a.....h....h..ss...l....A.......Es....N.R...tP.FDh...s...E...u..........E...pE....L..s...s.G..a......h...EY...u.uh.Fuh.h....h.h.u....E.Y.h.....t.h.h....hh.shh.....s.h.ha..h..s.s..........................................................................................h.hhhc.h.....h...h.....h.h...h.h...h.c.shsRh...Rhs...h.....hh..W.h....h......................h.................................................................. 1 394 766 1015 +3473 PF02477 Nairo_nucleo Nairo_nucleocap; Nucleocapsid N protein Mian N, Bateman A anon Pfam-B_2892 (release 5.4) Family The nucleoprotein of the ssRNA negative-strand Nairovirus is an internal part of the virus particle. 25.00 25.00 41.20 28.40 17.60 17.10 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.50 0.70 -5.79 3 472 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 16 1 3 283 0 180.00 74 96.38 CHANGED MENKIcANNK-EFNcWFKpFuEKppLsssaTNSASFC-cVPsL-pacaKMALATDDsEKDSIYSSALVEATRFCAPIYECAWsSSTGlVKKGLEWFDKN..uDTIKlWDAsYh-LKsElPcsEQLluYQQAALKWRKDVGFcINpaTuuLoHsVlAEYKVPGEllhslKEMLSDMIRRRNlILNGGGD-APKRGPVSREHV-WCREFAuGKFlsAFNPPWGDINKuGKSGIPLlATGLAKLAELEGKcVhD-AKpoltsLcGWV--NKDpVDcuKA--LlpslpKHlAKAlELuKpSNALRAQGAQIDTsFSSYYWsWKAGVTPETFPTVSQFLFELGKsPRGsKKMpKALlSTPLKWGK+LIELFADDDFppNRIYMHPAVLTuGRMSEMGlCFGsIPVASPDDAAQGSGHTKuILNaKTcTEVsNPCAsTIVpLFEIQKsGa ..............................................................h.hpMA.ATDDupKDSIYASALVEATKFCAPIYECAWsSSTGIVKKGLEWFEKN....uGTIK...SWDEsYsELKV-VPKIEQLANYQQAALKWRKDIGFRV.NANTAALSNKVLAEYKVPGEIVMSVKEMLSDMIRRRN.lLN+sG.--ss............................................................................................................................................................................................................................................................................... 0 3 3 3 +3474 PF04131 NanE nanE; Putative N-acetylmannosamine-6-phosphate epimerase Mifsud W anon COG3010 Family This family represents a putative ManNAc-6-P-to-GlcNAc-6P epimerase in the N-acetylmannosamine (ManNAc) utilisation pathway found mainly in pathogenic bacteria. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.22 0.71 -5.33 7 2374 2012-10-03 05:58:16 2003-04-07 12:59:11 9 5 1884 8 180 1739 546 188.00 47 81.67 CHANGED hstMAhAut.uGAsGlRhpGVpslpthpshVslPIIGIlKRDhPssslhITshhp-lDpLAssGs-lIAhDuTsRsRP..lsl-shlcpIKcph..pLhMADCSohEEGlhspphGh-hlGoTLSGYTssp.s...s-PDapLl+sLspAGhh..VhAEGRhsTPE.Apcsh-hGssuVsVGuAlTR.ccIsphFspAlpp ...................................................................................................hutMAhAAppuGAVuI.......Rh........p...ul...p...s.lps.l.+p.h.V..s.lPIIGIlK......R.D.h.s...s....s..s.V..a.....IT.s.s...h.c....-VDtLsp.sGs.-lIAlDu..Tt.RpRP.......hslp.phlp...pI...+.c.+h..........hL.h..M......A.D.s.S.o....h...E..-....u..l....s....A...tc...h...G.hDh....l.........G...T..T..L......S..G.....Y.....T..s..sss.........ppP..D.....h.....p....L.....l....+...p....L....s....c....s......s..s......VIAEG.+hpTPppAtc.s.h.c.h.GAauV..VV..G.u..A..I.T...R...Pc...c..IsphFssAlc....................................................... 0 49 100 147 +3475 PF04660 Nanovirus_coat Nanovirus coat protein Waterfield DI, Finn RD anon Pfam-B_4486 (release 7.5) Family Family of conserved Nanoviral coat proteins [1]. 25.00 25.00 143.60 143.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.99 0.71 -4.56 3 85 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 11 0 0 65 0 166.00 74 99.27 CHANGED suRWshpGhK+RRosRRKYGRhAYKP..PoS+VVSHluSlLsK--VVGs.EIKP.DuDluRYpMhKVMLlsTL+MsPGELVNYlIVKSSSPIANWSouFosPuLhVKESsQDhVoIVuuGKLESuGsAG.oDVTKSFRKFl+LGuGISQTQHLYLlhYoSsAlKIsLEsRlYI-V ....huRas+puIKKRRVGRRKYGS.KA.....ATSHDYSSLGSILVPENTVKVFRIEPTDKTLPRYFIWKMFMLLVCKVKPGRILHWAMIKSSWEINQPTTCLEAPGLFIKPEHSHLVKLVCSGELEAGVATGTSDVECLLRKTTVLRKNVTEVDYLYLAFYCSSGVSINYQNRIhhcV. 0 0 0 0 +3476 PF00956 NAP NAP_family; Nucleosome assembly protein (NAP) Finn RD, Bateman A anon Pfam-B_1009 (release 3.0) Family NAP proteins are involved in moving histones into the nucleus, nucleosome assembly and chromatin fluidity. They affect the transcription of many genes. 26.90 26.90 27.30 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.60 0.70 -5.25 97 1619 2009-01-15 18:05:59 2003-04-07 12:59:11 13 20 399 31 836 1514 9 214.90 28 67.14 CHANGED +..p+l.psLcplQtchppl-tcapcchhcL.Ep+YtphhpPlap+...RpcIlsG.h...tsp....................................................................................s.lP.sFWLssh.cNpshlu..phIs.-cDcphL.caLpDlchphhcs...............uFpl.FpFss...NsaFpNp..lLoKpYhhps.....p........................pspuspIcW+.pu+slThphhp+....................KpcpKtpt..th+.lpct..tc...........SFFNFFs......s.......p...........................t.t..ptpchp..t.ht.DaclGphl+-cllPcAlpaasGcs ................................................................................................................................tlttLctlQ..chsplptph.cchhp..........l.EpKatph.hp.Ph...ap+...Rp.pllps...........................................................................................................................................................................................................................................................................................................................................IP.sF.W.hss.......h..tN..ps.........lu...............thlp...-pDEth.L..caLpc.lclp..hct..........................uap.l.pFhFpt......N.sY.Fp.Np.............lL...s..K.pah..hpp.....psp............................................................tspuspIcW+..pGcs.lsh.ph.pp......................................................tp.ppptt................p..............................................SF.Fs.aFs........................................................................................................t..t..p.t.................s.cluphl+-clhPpsl.aah............................................................................................... 0 233 366 559 +3477 PF03892 NapB Nitrate reductase cytochrome c-type subunit (NapB) Bateman A anon COG3043 Family The napB gene encodes a dihaem cytochrome c, the small subunit of a heterodimeric periplasmic nitrate reductase [1]. 21.10 21.10 21.20 21.20 20.80 20.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.10 0.71 -4.48 75 1044 2012-10-01 23:37:15 2003-04-07 12:59:11 9 3 988 11 158 429 23 131.50 46 86.09 CHANGED hhshsssussssssp.......................ltsLRsss.lssp.ssssshpph.....spcstphpRsYspQPPlIPHsl-u...YplshssN+CLoCHuhppspcstAshlSsTHahD..R-GphL.uslSPRRYFCsQCHVPQsDu.pPLVpNsFpsh ..............................hhs.hsshstssts.................................................t.s....sst.-..shh+h.......P+Epp+hshsYVNQPPhIPHSl-G...YQ.VTpNsN+CL.pCHuh.....-....s....h+s...o.GAs+ISsTHF.h.D....p..................DGK..Vs...upVuPRRYFCLQCHVPQuDs.tPlVsNoFpP.s.................... 0 28 80 125 +3478 PF03927 NapD NapD protein Bateman A anon COG3062 Family Uncharacterized protein involved in formation of periplasmic nitrate reductase. 28.00 28.00 28.20 28.20 26.40 27.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.32 0.72 -4.15 59 1029 2012-10-02 00:20:33 2003-04-07 12:59:11 8 1 992 2 164 424 22 78.70 39 82.85 CHANGED pch.HIsSLlVpspPcclspVpssltslsssEIauhssp.GKlVVllEusspttlhcplspIpslsGVLssuLVYHp...h-st ......p.phplsSLVVpA.Ks.-.+.l.sslpspLsshPus.Eltss....-s..Gp.....LIV.VlEu-sp....-sllpoIEu.lcslcGVLuluLVYHQQ-p............. 0 34 85 130 +3479 PF03059 NAS Nicotianamine synthase protein Mifsud W anon Pfam-B_2173 (release 6.4) Family Nicotianamine synthase EC:2.5.1.43 catalyses the trimerisation of S-adenosylmethionine to yield one molecule of nicotianamine. Nicotianamine has an important role in plant iron uptake mechanisms. Plants adopt two strategies (termed I and II) of iron acquisition. Strategy I is adopted by all higher plants except graminaceous plants, which adopt strategy II [1,2]. In strategy I plants, the role of nicotianamine is not fully determined: possible roles include the formation of more stable complexes with ferrous than with ferric ion, which might serve as a sensor of the physiological status of iron within a plant, or which might be involved in the transport of iron [1]. In strategy II (graminaceous) plants, nicotianamine is the key intermediate (and nicotianamine synthase the key enzyme) in the synthesis of the mugineic family (the only known family in plants) of phytosiderophores. Phytosiderophores are iron chelators whose secretion by the roots is greatly increased in instances of iron deficiency [2]. The 3D structures of five example NAS from Methanothermobacter thermautotrophicus reveal the monomer to consist of a five-helical bundle N-terminal domain on top of a classic Rossmann fold C-terminal domain. The N-terminal domain is unique to the NAS family, whereas the C-terminal domain is homologous to the class I family of SAM-dependent methyltransferases. An active site is created at the interface of the two domains, at the rim of a large cavity that corresponds to the nucleotide binding site such as is found in other proteins adopting a Rossmann fold [3]. 23.80 23.80 23.90 23.80 23.70 23.50 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.86 0.70 -5.24 13 220 2012-10-10 17:06:42 2003-04-07 12:59:11 11 2 133 12 98 214 3 240.50 29 85.46 CHANGED spN...ssslVcKIssLYstIS+LsSLs....PSscVssLFTcLVssClPssP.lDVoK..Ls..clQchRspLI+LCupAEGhLEuHaSshLuua-.NPLsHLshFPYasNYl+Lo+LEaslLupasst.sPs.+lAFIGSGPLPLTSlVLA.....opHLs.....................sTsFcNaDhsusANspAppLls.uDssL.usRMsFHTuDlsclss-LssYDVVFLAALVGMspE-KsKllsHLu++MAsGAsLllR.SAHGA.....RuFLYPllDPp.Dlp..GF-VLulaHPs.D-VlNSVIlARKhh.ss ...............................................................................................tt........hltplhth.ttlt..pL.s.t......p...p......pth...Ft...pL...hs.....p....ch.p.....Lt...phpthhtplhplps.ApuhLEtahu.p....l...s...uh..p......s........P....p..p.......l.p...F...sYassY.pLsphEhphhsh..thst..........st..+VhFlGSGPLPhoullLs.....tph.hs....................................sshh.shDh-ssA....tphAp.pLlp...s...s......L........p.pphp.Fhsss...h.t.p............h..s..p..s.lt.taDhlhlA.uLVu.......pKtplltpLtphhs.......s.....uu..hLlhR...supGh.....Ru.hL.Y..s...lc.t...pht...saphhs.h.P...........h.so.hh........s...................................... 0 27 56 80 +3480 PF04159 NB NB glycoprotein Bateman A anon Pfam-B_1501 (release 7.3) Family The NB glycoprotein is found in Influenza type B virus. Its function is unknown. 24.40 24.40 24.70 80.70 21.60 24.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.43 0.72 -4.27 24 1614 2009-09-10 20:28:57 2003-04-07 12:59:11 8 2 1536 0 0 543 0 95.00 93 99.71 CHANGED MNNATFNYTNVNPISHIRGSVIITICVSFTVILTVFGYIAKIFTNKNNCTNNsIGL+ERIKCSGCEPFCNKRDDISSPRTGVDIPSFILPGLNLSESTPN ....ATFNYTNVNPISHIRGSlIITICVSFIVILTIFGYIAKIhTNRNNCTNNAIGLCKRIKCSGCEPFCNKRGDTS.SPRTGVDIPuFILPGLNLSESTPN............. 0 0 0 0 +3481 PF00931 NB-ARC NB-ARC domain Bateman A anon [1] Domain \N 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.75 0.70 -5.55 12 16257 2012-10-05 12:31:08 2003-04-07 12:59:11 17 1033 906 35 4750 16655 176 210.80 21 35.74 CHANGED hcs.l-plhp+Lhp.p...pphtllulhGMGGlGKTTLAppla.pc.p.lpppF.DhhhWlsVScphst....hclhcs..lhpcLt.ss..h..........................spcs.sp.httclpchLppK..RaLllLDDVWcc...pa-plt.shsstp.ts+lllTTRspsVstthss..sphhplp.hLp.-cuWpLFppc.....shcpp..........tpsplE-lu+plspcCpGLPLulpslGuhhtpKp.pl.pEWcchhcp.hppphht..p......splhs.hlthSYcsLst.pLKpCFlhhu.......hFPcshphttcplhchWhspshl.sps. .....................................................................................................................................................................................................u..G..l..GK.TT.......l....A...........p.....t....l........h....p.......p.........................h.......p.........p.....p.........F........p.....h.....h.....h......a.....l............s.....s.....s....p....p..h.sh.................hpl...pp.......l...h...p...p..l.....t..h.p.......................................................................................t...s.....pp.....hh....ph....l....p......p.....h.....L.......p.......p....+...........+.............h...L......l....l....L....D............D.........l...............p.p...................ph.........p...........t...............l...........t......t..................h.....................................h........s..............................G...........o............+.........l...l..l...T......T..................R.........p.........p.........p.........l..............h......t...........h.........h........t......................................t........t..............h.......h.....p....l........p............L................s............p....p...........c.....u....h....p....L.....Fp..pp.......................................s.h..tpp.................................p.h............p.....l...u.......p.......p...l...s..p..h.......s...t...G.......l..P.L.Al...................h....s.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 726 2813 3845 +3482 PF04485 NblA nblA; Phycobilisome degradation protein nblA Kerrison ND anon DOMO:DM04316; Family In the cyanobacterium Synechococcus PCC 7942 (Swiss:P35087) , nblA triggers degradation of light-harvesting phycobiliproteins in response to deprivation nutrients including nitrogen, phosphorus and sulphur. The mechanism of nblA function is not known, but it has been hypothesised that nblA may act by disrupting phycobilisome structure, activating a protease or tagging phycobiliproteins for proteolysis. Members of this family have also been identified in the chloroplasts of some red algae. 25.00 25.00 27.10 27.00 23.20 20.70 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.41 0.72 -4.10 14 86 2009-09-10 15:48:25 2003-04-07 12:59:11 7 1 55 22 26 74 5 52.30 38 84.84 CHANGED s.sLoLEQcFpLphappplpplshEQspchLl-hh+QhMl+-Nll+tllKpuh ....pLSLEQpFpLpshpppVpphSpEQAp-aLlchhcQhMl+-Nhh+pLlKpp..... 0 3 18 26 +3483 PF03801 Ndc80_HEC HEC/Ndc80p family Bateman A anon [1] Family Members of this family are components of the mitotic spindle. It has been shown that Ndc80/HEC from yeast is part of a complex called the Ndc80p complex [1]. This complex is thought to bind to the microtubules of the spindle. 20.50 20.50 20.70 20.50 19.80 19.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.88 0.71 -4.80 37 358 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 283 5 242 368 4 153.10 29 25.72 CHANGED stpc+oshht........sRsShs....shtspps............+DPRPL....+D+saQsphhpplhcaLsppsa....spslo.+sLpsPopK.-FhtlFpaLYpp.lDPsaca...tp.ph-pEl.plLKpLpYPa..sIoKSplsAlGusp.WPphLuhLpWLhpLsphhsphhsp.......Dpshtspth ............................................................................s..................R.S.h.......s...t.tp...................pDsRPl.........pD+saptphhpplhcaLspp.sa............ttsl.o.....p..sl.ps.....PotK.-F.hlFpaLapp...l-.Psaph.......................pp.ph.--E.ls..lLK.pLpYPa..sloKSpltAsGusptWPphLuhLpWLhclsp.hhpthhppp.............ss.t...................... 0 82 134 203 +3484 PF00334 NDK Nucleoside diphosphate kinase Finn RD anon Prosite Domain \N 21.60 21.60 21.80 21.80 21.40 21.50 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.50 0.71 -4.37 171 5820 2009-09-13 01:46:53 2003-04-07 12:59:11 14 37 4319 479 1940 4004 2188 131.60 43 80.92 CHANGED E..RThsllKPDuVpRs...LlGcIlsRh.Ecc.Gh+lluhKh.hplocchAcp..aY.....scHps.+P....FFssLlpahoS.....G.P....llshVlEG....c.sulptsRplh....GsT.sPtc.A...tPGTIR...u.DFuhsh........sc...NslHGSDSsE..oAp+EIshaFs...tpEl .....................................................................E+ThslIKPD..u.V..p..+.s.............l.l.G.c.I.l.s.Rh.Epp..Ghcll.u.h..K..............h.h..............p.l..o.c-.p...Ac...p...aY................................u-.....H....ps...+P................F..Fs..s..L.l..c.F.MoS.................G..P.......l.l.s..h.............Vl...EG..............cs..Al.pp...hRplh.......G....uT..sPtc.A......................tsG.....TI..R...u..Dauhsh................sc........NslHG..S.DS..E..o..Ap+EIshaFst............................................................................. 0 667 1125 1540 +3485 PF05031 NEAT Iron Transport-associated domain Yeats C anon [1] Domain This domain is involved in the transport of iron, possibly as a siderophore. 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.50 0.71 -4.11 53 2305 2009-01-15 18:05:59 2003-04-07 12:59:11 7 72 393 47 143 1431 4 121.10 25 39.58 CHANGED ssspltcGpYslsaplhKcss....s-tShhssYhpcPuplplcsGKphlplTlss..o.....sahpshplp....sGp...........hhcscll...Scsp........tps.s+s.lcFc.Vsslspclssphplhl.....h...sYctpaplphthDsss .....................s..pt.tssphslsapl.hKs...ps.....spp.S.hh..s.sah.p.cPuplhh.p.s..GK.hh.lp.h.slps...u......................sahpcaplp...tsst.............................thss.cll........ScDp...................pps...s+s..lpFt..Vs....s......hsppls.shlcl.l.....sp...h....sYcapYslph.hp................................................... 0 46 94 107 +3486 PF00880 Nebulin Nebulin_repeat; Nebulin repeat Bateman A anon Pfam-B_1603 (release 3.0) Repeat \N 20.10 20.10 20.20 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.86 0.73 -7.23 0.73 -4.11 143 8841 2009-01-15 18:05:59 2003-04-07 12:59:11 13 120 100 0 4092 7213 2 28.70 29 38.71 CHANGED DsPphhpA+pssphhS-hpYKcsaccp+s .....DsPchhpA+pstchhS....-..hpYKcsa-ct+........ 0 255 486 1390 +3487 PF04299 FMN_bind_2 DUF449; Neg_reg; Putative FMN-binding domain Kerrison ND, Bateman A anon COG2808 Family In Bacillus subtilis, family member Swiss:P21341 (PAI 2/ORF-2) was found to be essential for growth [1]. The SUPERFAMILY database finds that this domain is related to FMN-binding domains, suggesting this protein is also FMN-binding. 20.70 20.70 20.70 21.20 20.50 20.30 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.14 0.71 -4.76 129 884 2012-10-02 11:35:36 2003-04-07 12:59:11 7 4 757 2 337 872 133 165.30 34 77.90 CHANGED MYlPstFp..ts-hstltphlcppshuhLl..............o.t....ssss...l.AoHlPhlLc...tt..........sspshLh..uHlARuNPph...ppl................ptss.p..lLllFpGP.cuYlSPsWY.....ps.tcsVPTWNYtuVHsaGphplhp...Dsc......hltphlscLospaEssh.........tpPWp...hs-sssc..alcthl+uIVGhclpIscl ............................MYlPttFt...sc.ptltph.lpppshusLl.....................o.p...ssss.......Ao.HlP..hhLs.....t.........................tsspshLpuHlARsNPpappl................................ptsp..c....VLllFpG....s....cuY...ISPsWY...............ps.....tc.....t.....VP.TWNY.huVHsaGp.hplhp.Dpp............hltshlscLoppaEssh......................tsPWphs-ssts..alcthh+uIVGhcIplscl............................. 1 89 193 279 +3488 PF00960 Neocarzinostat Neocarzinostatin family Bateman A anon Sarah Teichmann Domain \N 25.00 25.00 27.70 27.20 22.60 23.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.04 0.72 -10.55 0.72 -4.13 4 25 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 19 25 6 38 1 109.60 34 72.85 CHANGED usoVoPAoGLSDGpoVoVSuoGhssGTsYpluQCAhVssGhhACNsAshoshTsDAsGpuosSlsVR+SFpGhlhs.GT.hGoVDCsTs..uCplGlGssuG-hup.sAIoFu ...........sloVoPuoGLoDGpoVoVouoGhssG.oshtluQC..As..ls..s..GthACssAsssssT.sDAsGss.osolsVR+SFsu.shs.Gsth.uoVDCs.Ts..uCtlusus...susssut.ssloF..................................... 0 4 6 6 +3489 PF03391 Nepo_coat Nepovirus coat protein, central domain Mifsud W anon Pfam-B_3589 (release 6.6) Domain The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure [1]. 20.30 20.30 20.50 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.72 0.71 -4.99 11 339 2012-10-04 01:49:40 2003-04-07 12:59:11 10 7 28 141 0 391 1 168.10 69 25.28 CHANGED PslsaPhps.tchphhhlhhPPhphsluussuhposshhhupshhsusppsYsasssLlSaaLGhGGol+GcV+psuosFhossLhVs.pWtGsosshppLhphPtshlct.DGphplcIpSPaacTss.hh-ututh.lss......luGPlAPpspsuphtahlpIcpIst..s ........................................PTLVFDPGV.FsGKFQFLTCPPIFFDLTAVTAL+SAGLTLG.Q..VPMVG.TTKVYNLNSTLVSCVLGMGGTIRG+VHICAPIFYSIVLW..VVSEW.N..GT.T..MD.WNELFKYPGVYVE...E...DGSFEVKIRSPYHRTPA..R...LLAGQSQRDMSSLN.FYAIAGPIAPSGETARLPIVVQIDEIVR..P................................. 0 0 0 0 +3490 PF03688 Nepo_coat_C Nepovirus coat protein, C-terminal domain Mifsud W anon Pfam-B_3589 (release 6.6) Domain The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure [1]. 21.10 21.10 21.90 23.80 20.40 21.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.79 0.71 -4.61 21 412 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 28 141 0 397 0 149.10 66 26.03 CHANGED hshsphF.sWhsl...sshpsss....hphpIPuRltDls........spsssVshtsNPhuhhhussGhHtGp.lpL+hpWsh.psphuptpGslthhphhtchupshsspstl......ssh.ssuhph.lphGsFuGssssu..shshhsc..altlphspucplppLpVslclhsGFpFYGRo ..SF..EDDYF.VWVDF...SEFTLDK....EEIEIGSRFFDFT........SsTCRVSMGENPFAAMIACHGLHSGl.LDLKhQWSL.NTEFGKSSGSVTITKLVGDKAhGLDGPSQl......FAlQ+LEGss-LLlGNFAGANPNo..HhSLYSR..WMAIKLDQAKSIKlLRVLCKPRPGFSFYGRT........ 0 0 0 0 +3491 PF03689 Nepo_coat_N Nepovirus coat protein, N-terminal domain Mifsud W anon Pfam-B_3589 (release 6.6) Domain The members of this family are derived from nepoviruses. Together with comoviruses and picornaviruses, nepoviruses are classified in the picornavirus superfamily of plus strand single-stranded RNA viruses. This family aligns several nepovirus coat protein sequences. In several cases, this is found at the C-terminus of the RNA2-encoded viral polyprotein. The coat protein consists of three trapezoid-shaped beta-barrel domains, and forms a pseudo T = 3 icosahedral capsid structure [1]. 27.30 27.30 27.60 28.80 27.10 27.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.56 0.72 -4.26 21 379 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 32 141 0 386 0 87.90 68 12.85 CHANGED hsaspslplPp-sptGslLuplslhsthpshsuhsatcWlppG..hlpsplclhh+lssssFsGlolhhsaDAasRl.s.sshssshshshshsl ...LAGRGVlYIPKDCQANRYLGTLNIRDMISDFKGVQYEKWITAG..LVMPpFKIVlRLPANAFTGLTWVMSFDAYNRI.T.SRITsSADPlYTLS.V......... 0 0 0 0 +3492 PF00064 Neur neur; Neuraminidase Eddy SR anon Overington and HMM_iterative_training Family Neuraminidases cleave sialic acid residues from glycoproteins. Belong to the sialidase family - but this alignment does not generalise to the other sialidases. Structure is a 6-sheet beta propeller. 19.60 19.60 19.80 19.60 18.90 19.30 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.77 0.70 -6.40 7 29236 2012-10-02 00:45:24 2003-04-07 12:59:11 13 4 28530 235 0 16025 0 436.40 57 99.38 CHANGED MNPNQKIlsluulululsllslLlpluslhlsls.hh+ttt....pps.ssps..p..Npslptpshsp.sshshhhs....ppspahs.sculCslpGashhuKDNuIRlGpp...uplhVhREPaVSCsPsEC+pFhLoQGshlssKHSNGTl+DRosaRsLhSs.LGpsPslhpo+FcsluWSuouCHDG+tWhsIsloGsDssAsAslhYsth.T-sIpSWu+sILRTQESpCVClsGsChhVMTDGsAsspAph+IhhhccG+Il+pp.lousupHlEECSC.YsppspVpCVCRDNWpGuNRPllpls.p.hsapstYlCoGlhsDTPRspDsshossCs.Ns.ssps....GVKGFuacpGss....sWhGRTISpsSRSGaEhlhl.sGhopssS.p.hp+QslVsspsWSGYSGuFh.hst.st.pChsPCFaVEhIRG+Pc.EppshWTSsS.lshCGsssphssWSWsDGAplsa ....................................MNPNQKII.TIGSlshsIuhlslhlQIu.llohhs...H.hQht....spsp......p..p..s........s........s...h....p...p..sh...s...p.......s...Y..ls...ss..hph.hs....hhu..hh.shsps.CsloGaA.aSKDNSIRlGup...GDlaVhREPalSCush-C+pFhLoQGshLNsKHSNGTl+DRoPYRTLhSs.lGpsPsshNo+hcslAWSuSuCHDGhuWLpIsloGPDssAsAslhYNGhlTDoIpSWpps.ILRTQESECsClNGoChsVMTDGsu.sGpAs.h.K..Ih+I.EcGKIVKospL.s.usshHhEECSC.YPc.tsplpCVCRDNW+GSNRPh.VshN.s.hphphuYlCSGlhGDsPRPNDuouSsssh.sPssppG..u.GVKGauF+.GNs....VWhGRThSpsSRsGaEhhhs.sGW.os.ssS.s.sh+QsIVshssWSGYSGuF..asp.st.sCIpPCFaVELIRGRP.c.E..pslWTSsS...lsFCGssushsshSWPDGA-lsh............... 1 0 0 0 +3493 PF02932 Neur_chan_memb Neurotransmitter-gated ion-channel transmembrane region Bateman A, Sonnhammer ELL anon Prosite Family This family includes the four transmembrane helices that form the ion channel. 23.10 23.10 23.20 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.56 0.70 -4.48 50 6256 2009-12-16 13:54:54 2003-04-07 12:59:11 11 81 288 162 3294 5277 71 169.90 20 43.13 CHANGED lllPClLIohLohLsFaLPsDuG.pKsoLuIosLLohTsFhlllscplP.poShslPLlspYllhhhhlssh.lhhsllllNsphRsP.psHphsphl+phhl.phlPphh.....hhppsstshssspstptttstsh.h.................................................................................................................................................................sststtstsssssss.hphssplcpulpus...............paIupp....................................................................................h+pcsp..p.tpsWhhsutslDRlthhlFslshllsolsha .................................................................................................................................................hhlPshhlsh..l..o....hl.s........Fa....l........s............c..u...s.................t+h.s...L...............u...............lo..s.l......Loho.s.....h...hh........hp..p.tlP......t..s......S......h..s...........h........s....l...........................h..h...............h.h..........h..........h..h..hh...l..hs......h.......l..................h.s..h.l.s.....h.............h....p..h...p....t.............t........h............h..............t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 922 1195 2303 +3494 PF02158 Neuregulin Neuregulin family Mian N, Bateman A anon IPR002154 Family \N 20.30 20.30 24.60 22.80 19.40 19.00 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.50 0.70 -5.69 2 267 2009-09-11 01:37:10 2003-04-07 12:59:11 10 8 42 4 77 244 0 313.30 50 58.48 CHANGED AEELYQKRVLTITGICIsLLVVG.MCVV.AYCKTKKQRpKLpDRLRQSLRpcppNlsNhsNtPHpP.NPPP.cNVQLVNQYVSKNVISSEHllEREsETSFSTSHYTSTsHHSTTVTQTPSHSWSNGhoEShIS.cS.SVIhhSSVENSRHoSPsG.PRGRLpGlGG..c.sSaLRHAR-TPDSYRDSPHSERYVSAMTTPARMSPV-F+TP.SPKSPs.EhSPP.SShsVShPSVAVSPFlEEERPLLLVoPPRLREK+YD+........pQhNSaHHNPuHpSoSLPPsPLRIVEDEEYETTQEYEss.EPsKKlsNSRRtKRTKPNGHIuNRLEhDSsoSS.SSsSESETEDERlGE-TPFLuIQNPLAASLEsAshaRhA-SRTNPsuRFSTQEELQARLSS .......................AEELYQKRVLTITGIClALLVVGIhCVV.AYCKT....KKQ.RK...phHs+LRQs..hpsc+.pN.hhslANG..P..pHP.tsPs.E..plQhs.sp.YlSKNl...uo-Hllc+EsET..o..FSsSH.sSsu.HHso.TsTpT.s.....S...........H.......o.......WS.t+oESlhS.-SpS.slhhSSVtsS+psSPss..sRuR...hs...u...hsu.......c....c....ps..hh....th+-o.DShRDSP.HS..ERYVSA...hTTPARhS.......P.V-Fchs.....sspsPs.phosP.uuh.shS.hPshs.hs.h.h.t-pp..PLl..h..................................................ts.......................................................s.S.Pso.Ph+l..--..ph-sh......pph.ss.................p..........t......p...t+sp.sG.hu.c........sp.t..u.s.s.............o.........s-t-.-t...........uEsTPFLuhps............sh........tsRT........................................................................................................................................................................ 0 2 9 27 +3495 PF03823 Neurokinin_B Neurokinin B TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 28.00 35.90 19.50 18.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.91 0.72 -4.22 3 39 2009-09-10 23:24:23 2003-04-07 12:59:11 9 2 26 0 16 39 0 54.90 63 37.49 CHANGED MRSsLLFAAILALSLApSFGAVCEEPQEQVVPGGGRSKKDSDLYQLPPSLLRRLYDSRs ......MR.hLLFsAILAhSLApSFGAVCcEsQEpssPGGG+SK.+DsDLYQLs..sl..LRRLacS+S.................. 0 1 1 4 +3496 PF00243 NGF Nerve growth factor family Finn RD anon Prosite Domain \N 21.30 21.30 21.30 21.70 21.00 20.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.57 0.71 -4.36 7 3765 2012-10-02 16:54:34 2003-04-07 12:59:11 13 4 2066 24 107 2860 0 90.40 67 44.04 CHANGED psht+RGEhSVCDSlSlWVT..-KpoAsDl+G+pVTVLs-Vshssus.hKQYFFET+C+ss.......sssssGCRGIDp+HWNSpCpsopoaVRALTh-spp.VuWRaIRIDTACVCsLopKoGp .............................p-..su+RGEhSVCDShSc..WVT...-Kp.TAVDhpG.tpVTVLtcV...sls.put.lKQYFYETKCpsh..........................t....s.K.pG.CR.GIDt+a.WN.S.CpTopo....aV+AL..T.-tp............................................................. 0 12 21 45 +3497 PF02979 NHase_alpha Nitrile hydratase, alpha chain Griffiths-Jones SR anon Structural domain Domain \N 27.20 27.20 27.20 27.20 25.90 27.10 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.23 0.71 -5.02 37 386 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 295 57 122 382 231 157.00 41 86.98 CHANGED sp.o-hthRspALEplLhEKGllssusl-thl-haEschGPpNGA+VVA+AWsDPsF+ttLLpDuoAAhtEl....GasGtp..G.........EahlsVENTsplHNllVCTLCSCYPWPlLGLPPsWYKossYRuRsV+-PRsVLt.EFGlsLPsssclRVWDSoAEhRYlVlPhRPsGTEGhoE-pLAsLVTRDoMIGsuls ...................p.....t.h.hAL.phL.tKGllsst.lcthhphh-pp.u.PttGAclVA+AWsDPsF+thLLtDusuAh.tp.h.....Ghts.p.G......................phhhslEsTsph+NllVCoLCSChsWslLG.LPPsWYKs.tYRuRhV+-PRtVLs.E.hGhplsscsclRVaDooA-hRYhVlP.RPsG.T-.shsp-tLttlVo+-shIGsu......................... 0 38 69 88 +3498 PF02211 NHase_beta Nitrile hydratase beta subunit Bateman A anon Pfam-B_5347 (release 5.2) Domain Nitrile hydratases EC:4.2.1.84 are unusual metalloenzymes that catalyse the hydration of nitriles to their corresponding amides. They are used as biocatalysts in acrylamide production, one of the few commercial scale bioprocesses, as well as in environmental remediation for the removal of nitriles from waste streams. Nitrile hydratases are composed of two subunits, alpha and beta, and they contain one iron atom per alpha beta unit [1]. 21.30 21.30 21.70 21.70 19.80 21.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.69 0.70 -4.64 33 440 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 230 80 156 463 350 165.30 25 93.48 CHANGED MsGsHDlGGhpGh.GPV..s.-....s.--PlF+t-WE+RAhulphAhsu..hGt.....................WslDpsRaAtEphsPs-Y...lpsoYYc+WltulppLhl-pGhlop-ELst.......uphhpsstshsp.slsA...-tVsshLtcGsPspR.sssssspFtlGDpVRs+.shsss.....uHTRLPpYlRG+sGplt.thaGsaVaPDosA.cGtGEs.PpaLYsVtFsupELWGss.ssssssVslDlWEsYL-sA ..................................................l....p........t..tts..sFpt.WEtpsaulhhshtt..hGh.....................ash....--......h......R.......h..........u....h.......E....p.....ht.....s.....t.pa.......h.t.hsYYc+WltulppllsccGllotpELtth...........t.....st.....................................................................p.l.h..............a.Rh..ah.th.G.l................................h.l...................................................................................... 0 31 80 108 +3499 PF01292 Ni_hydr_CYTB Prokaryotic cytochrome b561 Finn RD, Bateman A anon Prosite Family This family includes cytochrome b561 and related proteins, in addition to the nickel-dependent hydrogenases b-type cytochrome subunit. Cytochrome b561 is a secretory vesicle-specific electron transport protein. It is an integral membrane protein, that binds two heme groups non-covalently. This is a prokaryotic family. Members of the 'eukaryotic cytochrome b561' family can be found in Pfam: PF03188. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.42 0.71 -4.89 84 1155 2012-10-03 10:28:09 2003-04-07 12:59:11 15 11 975 0 237 5046 1484 182.50 26 73.89 CHANGED ta..shhhR...........hhHWhhAlhhlhhhh...oGhhhhhh..hhhhh...................................................................hph....Hhh..hGhshhslh.lh+lhht.....................................hptphPthhttsshtph...................hhhhlhhhhhhhsloGhhh.............................................stshtphhthlH..hhuhhlh.shlhlH.lhtslhpphh.t...slppMhsG .............................................................................h.ashhsRlhHWssAlhhllLlh....oG.lh...h....s.h.h...s.h.s.h.st.h....................................................................hsl..........Hth...hGahLluhh..hhhll.thh..............................t.hhth..s.h.......t...t...h.t...p..h...h....h.h...h......h...t..l..h..p.....s..c..........t.....s...h....s....u...t..s.sph.....................hhh.l.h.as.L.l.sllllTGllhhh......................s.....h....................................hs.uh..t..h..h..hth.....H..hhlAhlhl..halhsH.l.h..hs.h.hs..ph.t...p.......hhcuMlsG........................................... 0 56 139 181 +3500 PF04097 Nic96 NIC; Nup93/Nic96 Wood V, Finn RD anon Pfam-B_5541 (release 7.3); Family Nup93/Nic96 is a component of the nuclear pore complex. It is required for the correct assembly of the nuclear pore complex [1]. In Saccharomyces cerevisiae, Nic96 has been shown to be involved in the distribution and cellular concentration of the GTPase Gsp1 [3]. The structure of Nic96 has revealed a mostly alpha helical structure [4]. 19.10 19.10 19.70 19.50 18.90 18.70 hmmbuild -o /dev/null HMM SEED 613 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -13.15 0.70 -6.39 40 382 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 274 4 256 382 2 536.10 25 67.14 CHANGED s.plh-tapths..............................sshcscpls-hWphlpphh.....................shpusshptpllpsu+paLEppahpalpshlt+s.pcAthtGhPsshs+lpualch+ltpts......sLt...l...supPlWsllaYhLRsGhhppAhchl....tpthpphcpsFhsahpsaspstsppLsschps+lptEasppl+ss.t............DPYKhAlYplI.....G+CDl....spp...shs.plspolEDaLWl+Lsll+pss.t.sps.......h-pasLs-lQshlh.paGpppFss......stps.hYaplLlLoG.FEtAlpaLhc.....hspsDAVHhAIuLs.huLLpsust.p.........pplLshssps......lNauRLlspYs+.pFchsDsptAlpYhhhlshhps...............ppth.shhpcslp-L.....lL-oR..-FshLLG.clpp.cGp+..hsGll-c.........lhthps.cp.hpplsppsApc..s-ccGphp-AlhLYpLup..............paDpVlsllN+hLupslspsst......................................stospsslhthApplhchYpsssth..p..lpspscpT.hplLlplhphhshappspappALptlppLs.llP.............ss.sp..lcctuppFps..lssplh+slPslLlhsMssltphhpplpssthss.tp................ppplspL+ppA+slh..haAuhlpY+hP .......................................................................................................t...hhthhtth...............................tthp.ppp..hschWhhhpth.......................................shpshphphphlptuppaLEppahpahpshlhts.ppAthGG......hPsshp.......hl+ualpl+h.t.t..........tht....s....................pspslWsllaYhlRsG.hptAhphl...........pth..pp.....tpF..hshhppa.........hp...........s..................sppl...s..php..pp.lp.capptl+pss.........................DPa+hAlYpll.........GpCDh........sppp....tls.pshEDal....Whp...L....s.lp.ssts.p.s....................p.hsLtplQp.lh.ph.Gt..phFst.........ttp.hhah.lLhLsu.FEtAltaLhp.....h..scAVHhAlsLt.htLLthsst.p.................tthlshpstp.........................lN..hspllh.Ysp.pF.c..ssspt...AlpYhhhlp..ts...............................pt..phhhpslp-l.....llc..o+...pashllG..chp...cGp+.........h.Ghl-p......hts...t.hpplh.psAp...spppGh..hp-AlhLYpLAt.................................................ph.-pslplhsphL.upslst..t......................................s.sttpp.h.hAtphhphaptps...h........................hpt..pps...hhlLlplhphhs.hpst..p....h.tpA..l...c.lppLp.llP.............hs.tp....lcptsttaps..hsstltpslspllhhshphl.p.hp.phttst.ts..t.................pt.hppl+p.upslh...auu.l.a+h............................................................................................. 0 88 139 216 +3501 PF03824 NicO High-affinity nickel-transport protein Griffiths-Jones SR, Kerrison ND, Finn RD, Eberhardt R anon Manual Family High affinity nickel transporters involved in the incorporation of nickel into H2-uptake hydrogenase [2] and urease [3] enzymes. Essential for the expression of catalytically active hydrogenase and urease. Ion uptake is dependent on proton motive force. HoxN in Alcaligenes eutrophus is thought to be an integral membrane protein with seven transmembrane helices [4]. The family also includes a cobalt transporter. 25.00 25.00 25.00 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.72 0.70 -5.09 20 2471 2012-10-02 18:22:22 2003-04-07 12:59:11 11 9 1720 0 612 1814 250 244.40 22 78.85 CHANGED hhuhshLuashGlhHAh-.ssHhtAl...lpshhtsscssltsGhhhohupSslhslhulhlshushhltp.....hsthpphsuslth.lsusFlllluhhshhlhpslhchatt..t....stt.tsthpthhtt.....................................t.hs+hht.......h+.h.tpphHhhssG...u.G..susphthhu.sst...th.shshhshhsasslFuuGhp.ssuu.sllhhuhhshsahs..hhthhasholsGsulsluhhsshlhshphlAp+hshpss...hstlstls...hphlGhhllhlhulshlsuhlh ............................................................h.hhhhhhuahh....G....lhHAl.s.s.sHhtsl................ht.phh......h...........s..t...........c.....s.s.l...t..........uhh....hu....lutohhh..shhs.lhl..sh..sshhltp.................thp.thsu.t.h.hht...l......s.shll...llu.h..s..h.h.h.h.......ps.hh.p..hth.htt...............t.t...h..p.th.h.................................................................................................t.h.........................t.....h...p...c..ph+..h...h......h...u...............t...........p....s.sph.th...h.s.s.t........th............sh.s...h.....h....h.h...h....hhhssGh..h.sssh.sllh.h.sh...h...h......shsh....hht...hhhshsl.hs...hh.l..shhhshh...h.p........hs.t..+.h..s.h.ts....hhp..hhshhs.............hhGhhhlhhhh.hshhh....h................................................................................................... 0 143 336 484 +3502 PF00374 NiFeSe_Hases Nickel-dependent hydrogenase Finn RD anon Prosite Domain \N 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.84 0.70 -5.73 12 4820 2009-09-12 07:59:36 2003-04-07 12:59:11 14 9 1571 65 1336 3444 238 268.90 27 63.43 CHANGED RGLEhILKGRDPRDA.tFspRICGVCThsHALASVRAV-sAlGIclPpNAphIRNLhhtuhhlHDHlVHFYHLpALDWVslssALpADPttsucLspslS.shsh.oss.h..csVQs+lKcFVESGQLG.FtNuYas..p.AYhLPPEssLhAsuHYLEAL-hQ+chschhuIFGGKNPHs.shlVGGss.sh.slDs..........cRL.s.hpuhhcclp-FlcpVYlPDllsluuhYKc..hhhtGGhs.pNhLu....aG-aPps.....hstsohhhPsGslhsushscVhsl....D.t..c...EaVpHSWYc.ss-.huhHPa-GhT-Ppashhs.................D-s.+YSWlKAPRa+G+shEVGPLAphlluhup....uc......pthcc.l-hhltths......lssstlaST..............LGRTsARulEsthssp.ht.hhccLhsNlpsGc.sss.sccW-.sphPppu+GVGhs-APRGuLuHWl+IKDGKI-NYQsVVPoTWNuuPRsspGplGshEtuLlsT.hscPcpPlEILRTlHSFDPClACusH ......................................................................................................................................................................................................................................................................................................RGhEh.lhpsR.....s.h.s.s.h.hh.spRlCGlCshsHshu.shuhE.....s.....A..h.....s......l...p......l.....P.......p......A.phlRslh...t.s.hhpsHhl................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 448 910 1161 +3503 PF04891 NifQ NifQ Kerrison ND anon Pfam-B_6173 (release 7.6) Family NifQ is involved in early stages of the biosynthesis of the iron-molybdenum cofactor (FeMo-co) [1], which is an integral part of the active site of dinitrogenase [2]. The conserved C-terminal cysteine residues may be involved in metal binding [1]. 19.40 19.40 19.50 19.50 19.10 18.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.37 0.71 -4.09 39 166 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 159 0 76 167 16 160.20 35 80.39 CHANGED cshAsl..Luhuhtptt.tt.usls.hh.GLsssshssLls+aFP........s..thtsh.....sshsss.spE.spLpsLLhsc.....pusssstuthlAtllAptshtssHLWpDLGLtsRsELStLlt+aFPsLAutNspsM+WKKFFYRpLC.-p-GhhhC.puPsCstCsDastCFG .....................................................h....thLuthhttht.t.stssl.s..t.h.GLsssphttLlsph.FP............................t..sthh..h..........sth.ssstspEhtpLtsLLhtp..............tuss..sststhlAtllActshtssHLWpDLGLtsRs-LotLht+aFPsLAspNsp.sM+WKKFhY+plC.cpcGhhlCpuPsCstCsDashCFG.............. 1 20 43 57 +3504 PF01106 NifU NifU; NifU-like; NifU-like domain Finn RD, Bateman A anon Pfam-B_1206 (release 3.0) Family This is an alignment of the carboxy-terminal domain. This is the only common region between the NifU protein from nitrogen-fixing bacteria and rhodobacterial species. The biochemical function of NifU is unknown [1]. 20.90 20.90 21.00 21.60 20.20 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.17 0.72 -4.20 20 3818 2012-10-01 19:25:19 2003-04-07 12:59:11 12 19 2999 6 1182 2450 2182 69.10 37 38.36 CHANGED lpcVl-c.lRPhLhtDGGDlELlcV-Gs....pVpVcLpGACuuCspSshTLpsulEp+Lp-pls.pslcVlsl ..................lctlL-p..lRPhLt.t.DGGc..l..pl..h..c..l..p..cs.......hV.h.l.ph.t.Gu.Cs.GCsu.ushT..L..KpGIEptLhpplP..p..lh.tV......................... 0 376 769 1014 +3505 PF01592 NifU_N NifU-like N terminal domain Bateman A, Wood V anon Pfam-B_772 (release 4.1) Family This domain is found in NifU in combination with Pfam:PF01106. This domain is found on isolated in several bacterial species such as Swiss:O53156. The nif genes are responsible for nitrogen fixation. However this domain is found in bacteria that do not fix nitrogen, so it may have a broader significance in the cell than nitrogen fixation. These proteins appear to be scaffold proteins for iron-sulfur clusters [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.44 0.71 -4.19 16 4872 2012-10-01 20:52:23 2003-04-07 12:59:11 11 21 4284 25 1274 2971 2268 125.30 35 78.48 CHANGED tYo-KVhDHahNPRNsGsl-csss.......hsGpVGu.uCGDsh+LplKVctpsshItDA+FpTFGCGSAIASS.ShhTEhlpG+sl--A.hploNp-IActLut......LPs.KhHCSshup-AlcAAltsYcu+phps ......................................................YpphlhDHacN.P......+N..h..G.ph.-ssst........................suh..h.s..s..P..sCG..D.s.hcLpl.+....l.........s..c..............p.............s......h.....I.....pD......s..+Fc..s.hGCu.uhASo.ShhT-hV..c..GK.o....l-EA....t............l........p............p...s...p.l...h...pp.l.t........................ls....s...s..c....h.c...s...s.hLu-s..s.lh.sulsca.t+ht.s................................................................ 0 441 833 1074 +3506 PF03206 NifW Nitrogen fixation protein NifW Bateman A, Mifsud W anon Pfam-B_2891 (release 6.5) Family Nitrogenase is a complex metalloenzyme composed of two proteins designated the Fe-protein and the MoFe-protein. Apart from these two proteins, a number of accessory proteins are essential for the maturation and assembly of nitrogenase. Even though experimental evidence suggests that these accessory proteins are required for nitrogenase activity, the exact roles played by many of these proteins in the functions of nitrogenase are unclear [1].\ Using yeast two-hybrid screening it has been shown that NifW can interact with itself as well as NifZ [1]. 20.60 20.60 20.80 20.70 20.40 19.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.43 0.72 -3.93 39 208 2009-09-11 15:38:48 2003-04-07 12:59:11 9 1 203 0 87 189 12 100.60 30 92.76 CHANGED Mo.......hlcph.....ccLouAE-FFpahslsYDscVVsVsRLHIL++FspYlspt...sh..ssh.s-sthhsph+shLp+AYpDFlsS.oPhpc+lFKVap-tstp..........hVsLsslps ..................hhpcl.....ppLooAE-FhpaFslsYDscllsVsRLHILKRFspYLttt......c......th.spppthtthRphLppAYpc.FhpS.sshpp+lFKVhppt.............hVslstl..t............... 0 19 52 70 +3507 PF04319 NifZ NifZ domain Bateman A anon Pfam-B_6057 (release 7.3) Domain This short protein is found in the nif (nitrogen fixation) operon. Its function is unknown but is probably involved in nitrogen fixation or regulating some component of this process. This 75 residue region is presumed to be a domain. It is found in isolation in some members and in the amino terminal half of the longer NifZ proteins. 20.30 20.30 20.50 21.10 19.80 18.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.47 0.72 -4.60 41 240 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 176 0 121 231 7 74.10 42 68.32 CHANGED t.PtFphGp+V+sp+slRNDGTaPG..pchG-lLVc+GshGYVhsIGoaLQphh.IYuVcFl-pGhlVGCRccELpss ....t..PtFphGp+V+up+sl+ND.....G..TaPG..pphG-lLl.++G-hGaVhslGoa..L..Qp.h..IYsVcF...h-....p......uh...l......V....Gh+ccEL.s.h............... 0 31 76 98 +3509 PF01077 NIR_SIR Nitrite and sulphite reductase 4Fe-4S domain Finn RD, Bateman A anon Pfam-B_1092 (release 3.0) Family Sulphite and nitrite reductases are vital in the biosynthetic assimilation of sulphur and nitrogen, respectfully. They are also both important for the dissimilation of oxidised anions for energy transduction. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null --hand HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.87 0.71 -4.87 91 15116 2009-01-15 18:05:59 2003-04-07 12:59:11 17 120 3409 86 2437 13876 917 143.60 20 41.54 CHANGED susslRsspsssssshsptthhDs.........................ptlstplp......................tphpththPtKFKlulsG.........ssssCststsp.Dlullustcsth.................................................................................GaslhlGGthutp.ttsthhh......sah.t-.....p.lhphlctllphap........cps+t+p...Rlt.hlcch..Gh-cFtppl..tp.hsp.h ..................................................................htthps.hss..G...s..h.Cchs.shDs..........................................pshsttlt...........................t..h......s..p.h..p..p..h..t..h..P..t......K...h.+..h.....sluu..........Cs.N.s.C.ssst.sp.Dlu..l....lG....h....t...c...p....s.............................................................................................................................shplh.luut.h.s.......................................p...........................h....hh...t....h...h...................................................................................................................................................................................................................................................................................... 0 719 1561 2066 +3510 PF03460 NIR_SIR_ferr Nitrite/Sulfite reductase ferredoxin-like half domain Bateman A, Dlakic M anon Bateman A Repeat Sulfite and Nitrite reductases are key to both biosynthetic assimilation of sulfur and nitrogen and dissimilation of oxidised anions for energy transduction [1]. Two copies of this repeat are found in Nitrite and Sulfite reductases and form a single structural domain. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.90 0.72 -4.35 174 13756 2009-01-15 18:05:59 2003-04-07 12:59:11 12 109 3391 98 2671 12133 911 66.90 22 18.80 CHANGED htpps.t.h..hlplpls.s.....Gpl.ssp....phptluclu.....c..cau.t..upl.+lTspQs.l.ltslptpplsslhptLtph ..........................th......t....atlplphs..u.........Gpl.osc..................tlptls-lA.................c....cas..t......Ghh.+h.Tspps.l.hh..sspppplttlhptLt..s..................... 0 772 1683 2259 +3511 PF02665 Nitrate_red_gam Nitrate reductase gamma subunit Bashton M, Bateman A anon COG2181 Family This family is the gamma subunit of the nitrate reductase enzyme, the gamma subunit is a b-type cytochrome that receives electrons from the quinone pool [1].\ It then transfers these via the iron-sulfur clusters of the beta subunit to the molybdenum cofactor found in the alpha subunit [1]. The nitrate reductase enzyme, EC:1.7.99.4 catalyses the conversion of nitrite to nitrate via the reduction of an acceptor. The nitrate reductase enzyme is composed of three subunits [1]. Nitrate is the most widely used alternative electron acceptor after oxygen [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.44 0.70 -5.00 66 2225 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 1577 10 386 1081 183 214.50 41 82.12 CHANGED ls.phLaslaPYlslslhllGohhRacp-QaoWpocSSQlLcp+.tL+hGSsLFHhGILslhhGHlhGLLhPpshhcslGlopthhph.hAhhsGuhsGlhsllGhslLlhRRlsssRlRtsoohsDhhlLllLhlplhlGLhohhhuttph.......-hshhhplusWhpulhshpPs..uphlsslshhaKlHlhhGhslFslaPFTRLVHlaS..sPltYLtRs.Y.lhRpRp ...............................................................................................N.FhasIaP..YlshslFllGshhR............Y...cYsQaoWput...S....S.....QhL..-.++.thh.lu.S..sL.F..HlG.......I.L....s..l...FhG.H..h..h......G.hL...sP.....c...h...h...h...p....s.h........l....s............p.....s...h.....p....h.......h.A..h....hhG..u......h..s......G..l..ls.L....lG....h.s.hLlhRR.l.h..s..t..R....V...............R.s....s..o....o....ssD..ll..lhhlL..llps..hLGLhohshospph............Dhs...h....hp.lss...W.......h.p.S..lh...o...F.p.s....s......uph.h.s.sV.s.h.l.F.+l....HllLGhT.lFhlaPFTRLVH...l...aS...sPl..p..YlsRp..Y.lhRpR.............................. 1 127 252 327 +3512 PF02087 Nitrophorin Nitrophorin Mian N, Bateman A anon IPR002351 Domain \N 20.90 20.90 21.10 21.00 20.70 20.80 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.31 0.71 -4.49 3 15 2012-10-03 08:47:39 2003-04-07 12:59:11 10 1 3 66 0 101 0 166.40 42 85.60 CHANGED CSTNISPKpGLDKAKYFSG.sWYVTHYLDKDPQ.VTDpYCSSFTP+ESuGTVKEALYHYNuKKKTSFYNIGEGKLESuGlQYTAKYKTVDKK+sVlc-ADsKNSYTLTVLEADDSSALVHICLREGsKDLGDLYTVLoHQKDuEPSAKVKuAVTQAGLKLSDFVuTKDLuCpYDD.QFTSL .........................CopNlpsKpshDKsKYFoG.sWYVTH.Y.L..D.p-P..p..V.T.D.cYCuuF.s.s.+.p...u...sG.p...V..K...E.ALY.HY.Nsc.scs...o.F.Ys...l...u..E..u..p..l....pS....sG.....KY.T..A..K..ap...pVDKct..c.clc.cs.s...t..s.pYT..h..T..lh-sD.D..S.p.AlV..HhC...h....p...cus...c...s....l..hsLY..s....VLsRs..c..s..sp..s.s.s.KVKsAl.s.psuLKLscFhssKs....sCpYDs.phh............................................................................. 0 0 0 0 +3513 PF00881 Nitroreductase Nitroreductase family Bateman A anon Pfam-B_481 (release 3.0) Domain The nitroreductase family comprises a group of FMN- or FAD-dependent and NAD(P)H-dependent enzymes able to metabolize nitrosubstituted compounds. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.94 0.71 -4.42 141 16517 2012-10-02 14:48:17 2003-04-07 12:59:11 19 110 4534 206 3919 12487 3614 164.20 18 68.07 CHANGED lppR+Sh..R.pFpsps......lsp-p...lppllpsA.................ph......uPoutsh....psh....chhll.p.......stchp.......pcltphhht.h..................................................................thtpthhtsAshhlhh..........................................ssptttthshh...ssuhsspshhL..sApshGlusshhusht.......ttt............lpchl.sls............tplh..hhlsl..Gh ...............................................................................................................................................................................pR+Sh..+..p....a.p.sp.......lsp-.p...l.p.p.l.l.p.su............................................ph............uP.S.u.t.sh............Qsa.............+hlll...p..............stct+...................................pclt.ph..h......t...............................................................................................................................................................................................thttthh.t.p.A....s..h....hl..hl..........................................................................h....h.s.t...p....p.......h.......h.s.h.h.......ss.uhsst..s..h....hl..uA.p.s.h..G..lus...shhsuhp...................................tpt..........lpc...hl....s....ls.......pp........hp.h...hhlsl..Gh........................................................................... 0 1261 2568 3341 +3514 PF05211 NLBH Neuraminyllactose-binding hemagglutinin precursor (NLBH) Moxon SJ anon Pfam-B_6567 (release 7.7) Family This family is comprised of several flagellar sheath adhesin proteins also called neuraminyllactose-binding hemagglutinin precursor (NLBH) or N-acetylneuraminyllactose-binding fibrillar hemagglutinin receptor-binding subunits. NLBH is found exclusively in Helicobacter which are gut colonising bacteria and bind to sialic acid rich macromolecules present on the gastric epithelium [1]. 24.40 24.40 24.50 24.60 24.00 24.30 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.56 0.70 -5.14 3 170 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 54 3 13 172 0 241.00 39 94.79 CHANGED KKshLAluLuSLLlGCAhasulEsthPsctppppsspohE....osEhsh.LcFNYPl+ucQsPpNcHlVlLLcP+IQlSDNIsKsYpcKFccuLhLQlpoILE++GYpVlpF..pDcc-lshspKKKuaLVLcMcGpVuILEDlKhslc-ss........uLsc-clVDhSSGaVplNFlEPcSs-llHSFulDVSclcAlpcplcoT+ouSGGF.lsKThVHc.K-T.N+-DAI+KILN+hYAsVMp+hsKELTK+NlE+YcKsucEMKs+K ..................................................................................pKs.luhsluulLlusu.p...hht................................t.sh.hphsY.hpucp..s.tss+h..llLLcP.plQhSD.N.IsKsYpsKFcsuLhlQlppILc++GYp..V..lph...pDcs-lshsp..K+cuaLsLchsGplsl.LpDhKhs..l.pcss..........uhsctcsV.hSuGalplsFlEPpSs-slcSFslDlSclchhpc.....hlchp+o.suuGh....ls......s.......oh.....V.........Kts....NpsDAI+plLN+hYsplMpcls+cLTpKNl-pYcKssc-hKsp+............. 0 3 7 13 +3515 PF04170 NlpE NlpE N-terminal domain Mifsud W, Bateman A anon COG3015 Domain This family represents a bacterial outer membrane lipoprotein that is necessary for signalling by the Cpx pathway [1]. This pathway responds to cell envelope disturbances and increases the expression of periplasmic protein folding and degradation factors. While the molecular function of the NlpE protein is unknown, it may be involved in detecting bacterial adhesion to abiotic surfaces. In Escherichia coli and Salmonella typhi, NlpE is also known to confer copper tolerance in copper-sensitive strains of Escherichia coli, and may be involved in copper efflux and delivery of copper to copper-dependent enzymes [2]. 21.40 21.40 21.40 21.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.10 0.72 -3.59 17 993 2012-10-03 08:47:39 2003-04-07 12:59:11 7 4 912 6 119 541 17 87.20 46 43.00 CHANGED YpGlLPCADCsGI-TsLhLppDGTahLpppY.s+csts.phsuphsWscsuspls.L.sstu-..tspahss-ptL.MLDppGphlpGpL .acGlLPC.......A.......D....CcGI-...ToLhL.......c.......c.....D......G.......T..alhsEcY.LG......s.....+.-............c..P...s..o.F.suh.GT....W........s...c........s........u.......c.p......l.l..Ls.c..u...p..G.-....cs.....YY.+..s......p..s.s..s.LpML..Dp.-.Gpslpup..................................................... 0 27 68 98 +3516 PF04973 NMN_transporter Nicotinamide mononucleotide transporter Bateman A anon COG3201 Family Members of this family are integral membrane proteins that are involved in transport of nicotinamide mononucleotide [1,2]. 23.00 23.00 23.00 23.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.33 0.71 -4.82 105 2756 2009-09-11 15:40:44 2003-04-07 12:59:11 7 3 2145 0 399 1643 528 184.10 26 82.74 CHANGED shlEhlAslhGlhslhLsh+pslhsa.hGllushlYshlhapspLau-hhLplaYhh.hs...lYGWahWp.ptp...ptp......pth.lpphshpphhhhlhhhhlsshhh.uhlhp...pas.........push......Pal..Duhsoshullupahhu++hlEsWlhWlllDllulsLaht+Gl...............hhouhlaslahlhulhGah..pWp+th .......................................hlphluslhGllslh.hsuct.+h.hsahhG....llsss....l.ahhlh........a........p........h.........p.........L.Yuphlhplaahh.hs.....laGaa..tWp..ptp......................tpp................tplph..+h.h...s.hpp.h.......hh...h...ls..l.s...ll.shhlh...shhhs...h.s............................ssh....................Pah....Duhhh.slSllA.hLh.s++.a.h...EpWlhWllhsllu.lslah.hpuh...............hshsh.Yhlahh.ulhGhh...Whp.t.................. 0 104 221 319 +3517 PF01233 NMT Myristoyl-CoA:protein N-myristoyltransferase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of NMT are structurally similar, each adopting an acyl-CoA N-acyltransferase-like fold. 25.00 25.00 25.60 25.90 22.60 24.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.00 0.71 -4.62 6 493 2012-10-02 22:59:21 2003-04-07 12:59:11 14 9 326 58 293 473 38 152.50 55 33.97 CHANGED -GPIDK.hs.-cVppEPhsLsuGFEWsolDlsDctQLcElYsLLs-NYVEDcDAMFRFsYStEFLpWAL+sPGW+.-WHlGVRVppopKLVAFIuAlPssl+Vp..sKslssVEINFLCVHKKLRSKRLAPVLIKEITRRVNlssIaQAlYTAGllLPsPVoTC .........................................shc..plcpEPasL.Pp.uFpWsolDls..s..pp.LcElYsLLs-NYVED..DDsMFRFsYS.-FLpW...........ALp..PPGWh.pWHsGVRl...........p.....p.....o.pK.LVuFISAIPssl+lh.....................cp......h......h.............+.hsEINFLCVHKK.LRSKRlAP.VLI+.....EITRRV...NLpuIaQAlYTAGlVLPpPVuTC................... 0 93 151 228 +3518 PF02799 NMT_C Myristoyl-CoA:protein N-myristoyltransferase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain The N and C-terminal domains of NMT are structurally similar, each adopting an acyl-CoA N-acyltransferase-like fold. 20.60 20.60 23.80 20.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.23 0.71 -4.92 39 510 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 331 51 298 486 34 186.00 47 42.34 CHANGED EVGFStLsss.T.....hs+tl+hYpLPcpspTpG.LR.McpcDlspVpcLLpcYLp+F.cLs.hF.ocEElpHWhLs........pcsVlhoYVVEsss..t+ITDFhSFYsLPSTVlpss.pHcsLpAAY.FY.soss.................ppLppLhpDALIlAKp.hsFDVFNALslhDNphFLccLKFG.GDGpLpYYLaNa+sss.hss.............oplGlVL ...............................................-VtFSpLstshT.....hpR.ph+hY+LP..c.................s...s.............p..TsG..LR.Mc.+DlstVpcLLppYL.p.p.F.cLsPsh.scEElpH...WhlP.......................p-pll.oaVVE..............ss..............s....tclTDFhSFYoLPSolhpps...pHpsL+AAY.aY.sssp.............................................................................ssLhpLhsDAL.IlAKp.psFDVFNALslM-Np..pF.LccLKFG.hGDG.p.LpYYLYNa+sss.hts............................pplGlVh....................................... 0 95 157 236 +3519 PF02070 NMU Neuromedin U Mian N, Bateman A anon IPR001942 Family \N 18.40 18.40 18.90 22.80 18.10 16.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.68 0.72 -6.91 0.72 -4.42 8 56 2009-09-11 14:39:29 2003-04-07 12:59:11 10 1 29 0 18 49 0 23.70 66 18.09 CHANGED aKlDEEaQGPhuuQSRGYFLFRPRN .....hcEEhQuPhssQSRGYFLFRPRN 1 1 2 5 +3520 PF03980 Nnf1 Nnf1 Finn RD, Wood V, Mistry J anon Wood V Family NNF1 is an essential yeast gene that is necessary for chromosome segregation. It is associated with the spindle poles [1] and forms part of a kinetochore subcomplex called MIND [2]. 22.40 22.40 22.50 22.70 22.10 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.15 0.72 -4.06 26 221 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 191 0 149 223 3 104.50 26 47.01 CHANGED spsLcssppQhsphhpppspcEFspIhcERslpp+LsELDcllpcAccRpct.t.............................sshhcsLsPpcllcupLhsthppphspLspplpplpt-NppLtpp..lpphccp .....................t.hpplhcphhppLppshpcEhpcIhcEpslpttLNcLDcllpEA+p.Rcp..t.....................................................................ss.hpsh.sPcphlpuplh....shh....hpttspLptplpphptpNtpLtpplttt+p................................... 0 35 71 115 +3521 PF02898 NO_synthase Nitric oxide synthase, oxygenase domain Griffiths-Jones SR anon Structural domain Domain \N 21.00 21.00 21.90 21.70 20.30 19.50 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.17 0.70 -5.79 8 836 2009-01-15 18:05:59 2003-04-07 12:59:11 10 23 543 489 206 694 26 326.40 50 49.70 CHANGED sssRssE-LLspA+DFlsQYYoSI+RtpScAHhuRLpEVpKEl-sTGTYcLTpoELlFGAKpAWRNAPRCIGRIQWuKLQVFDARcsoTApsMFEtlCNHIKYATN+GNlRSAITIFPQRTDGKHDFRlWNsQLIpYAGYKpsDGollGDPAsVEFTElCIcLGWKP+pGpFDVLPLVLpAsGcDPEaF-.lPPELVLEVPlcHPcYcWFpELGL+WYALPAVSNMLL-sGGLEFoAsPFNGWYMGTEIGsRNhCDspRYNlLEcVAp+MGLDT+sssSLWKD+ALVEINlAVLHSFQpsNVTIVDHHoAoESFMKHh-NEh+hRGGCPADWlWIVPPMSGSlTPVFHQEMlNYhLpPuFhYQ.s-sWKTHhWK .........................................................Lh.pAptFIpphYpp.....h+h.....t.p.hppRlc....-lphEIcpTGTYph...Ttp.ELlaGAKhAWRNus...RCIGRl.WspLpVhDAR.......cssstpthhptl..spHIphATN.pGpl.+..ssIT.IF.s..+t..sG.t..s..h+I.aNsQLIRYAGYc.p.....G.hh.GDPAsh-hTcls..c.LGWp.s...p...t..s.pFDVLPLlhQh.s.s.c.s.Pcha-.lPscLlhEVPIcHP....c..a.....h.hpcLs....LKWYAlPhlSNMhL-IGGlpa...suuPFNGWYMsTEIGsRN.......hsDstRYN...lLE....cVActhsL.....DTp.+.s..sS...................L..........WK.......D+ALVElNhAVLHSFp............pp.sVoIVDHHoAucpF.pa.cNEtp.ttt.s..ss..ucWsWllPP...lS.solTslaH..p.t.h..s.N.hh..hpPsFhYp......c..p........................... 0 54 88 134 +3522 PF02474 NodA Nodulation protein A (NodA) Mian N, Bateman A anon Pfam-B_2183 (release 5.4) Family Rhizobia nodulation (nod) genes control the biosynthesis of Nod factors required for infection and nodulation of their legume hosts. Nodulation protein A (NodA) is a N-acetyltransferase involved in production of Nod factors that stimulate mitosis in various plant protoplasts. 33.80 33.80 38.20 34.30 27.00 23.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.39 0.71 -5.02 30 1084 2012-10-02 22:59:21 2003-04-07 12:59:11 10 1 856 0 23 753 0 170.20 67 98.03 CHANGED MRSpV+W+LCWENELpLuDHlELu-FFRKTYGPTGAFNAKPFEGuRSWAGARPElRsIuYDu+GVAAHhGlLRRFIKVGplDLLVAELGLYGVRPDLEGLGIoH..ShRVMYPVLQcLGVPFuFGTVRpALcpHlpRhs.RpGLATIloGlRVRSTLsDVahDLPP.TRl.EDlLVlVhPIGRshSEWPsGTlIDRNGPEL .......................................-H.ELu-FFRKoYGPTGsFNAKPFEGuRSWAGARPElRAIuYDucG.lAAHhGlLRRFIK..VGp......sDLLVAELGLYuVRPDLEGLGIuH..S..l.R.sMYPlLp-LGVPFuFGTVRpALcpHlpRhs.R.pGlATllsGlRVRSThs-la.DLP.P.TRl.ED.l.LV.l.VhPlu.RshoEWPsGshI-RNGs.............. 0 2 9 12 +3523 PF01798 Nop Putative snoRNA binding domain Bashton M, Bateman A anon Pfam-B_1362 (release 4.2) Family This family consists of various Pre RNA processing ribonucleoproteins. The function of the aligned region is unknown however it may be a common RNA or snoRNA or Nop1p binding domain. Nop5p (Nop58p) Swiss:Q12499 from yeast is the protein component of a ribonucleoprotein protein required for pre-18s rRNA processing and is suggested to function with Nop1p in a snoRNA complex [1]. Nop56p Swiss:O00567 and Nop5p interact with Nop1p and are required for ribosome biogenesis [2]. Prp31p Swiss:p49704 is required for pre-mRNA splicing in S. cerevisiae [3]. 25.00 25.00 27.70 26.40 23.90 22.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.53 0.71 -4.91 16 1302 2009-01-15 18:05:59 2003-04-07 12:59:11 13 19 497 30 857 1287 98 145.10 41 29.75 CHANGED plpchscplhpLt-hRppLhcYlcs+MsplAPNLosLlGphVGARLIuHAGSLppLAKhPASTlQlLGAEKALF+uL+.....o+ssoPKaGlIapushIppussps+GKluRhLAAKsulAuRlDshucp.ss....hG..h+pclEpRlcplcpt ..................................................................ltphscp.llpLschRp....pLhpYLps+MptlAPNLosLlGp..h.....lGARLIu.+A.GSLtsLAKhPAS.T..lQlLG......AE..K....A.L....F...R...ALK........T+.s...s...TPK....aG..l.IYHush..........ls..p.u..........s....s.cpKG+..huRhLAuKsulAuRlDs....h....u..-......sss........s..............hG.ph+p.plEpRLchhp..s.......................................... 0 299 489 709 +3524 PF02451 Nodulin Nodulin Mian N, Bateman A anon Pfam-B_2163 (release 5.4) Family Nodulin is a plant protein of unknown function. It is induced during nodulation in legume roots after rhizobium infection. 25.00 25.00 168.50 32.00 19.20 19.20 hmmbuild --amino -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.91 0.71 -4.85 6 33 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 2 0 5 25 0 114.60 33 83.64 CHANGED sEAYESP+FKKFVTcCoSHVuETCSssss.pspEAl+..psshGLuaCLFDSMEp..CLs-HpAulhs.....................................psplcshsshPsolp.....................................................................................................s...........tP............................................lLIpTlpFRTVL+TCS+VSARoCLTAPNVATSsLsACLtPShNQCVYPsu....s.sssssPPI ..............h..ss+hp+hlT.souHVsppCS.......ptt.h...s...huhphhspM.t..Ch..atsth...............................................................................................................................................................h..hhphpsshthCo..oAp.ChssssVssSsL.th.hs.hppClY.................................. 0 0 5 5 +3525 PF01189 Nol1_Nop2_Fmu Nol1_Nop2_Sun; NOL1/NOP2/sun family Finn RD, Bateman A anon Prosite Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.53 0.70 -5.03 10 8954 2012-10-10 17:06:42 2003-04-07 12:59:11 12 43 4410 23 2832 7258 1507 238.20 24 57.46 CHANGED llpsssspsshslRsNspKtsRcshtpsL-ppulshpsluphsps..hshcpssulssLPtapsGashlQstSu.hsshhLsPpsc-t.............ILDhCAAPGGKToalsplhhp.pupllAsDhsppRlpplhsNlpRLGspshhlp...sDssc.tptssss......FDRlLLDAPCSusGsIc+cPcl+hhRp-pDItplspL..Q+cLLputh-hl.................KsGGhLVYuTCSltsEENpt..........sIchaLpc+scs-Lsspshscsht.ht.........hlphshphhP+ppstDGFFhAcLp ....................................................................................................................................................h..t..............t......t..............................h..............................................................................................................................................h......tt.....h...s......t...a....t.p......G......h...h...hl.Q.-tuo....h..s....s....h....h.....L....s.......s....p....s....s..pt.......................................................l.LD..h..C.......A......A......P.........GG.......K..............T............s........p.l.........s..........p...................h..............h............t...............t......p.................G..........t...........l..........h....A...........-h..s...t.p.R....l...p..h..l......t........p........s........h.....p........R......h......G.....h......p.......s...h......h.....ht............t-..u..t.......p....h............t..h..h...tt.......................................FD+...l...Ll..D.A..P.CS....G.........G....s........l.......R.......+..........c..........P............-..........h...............t.........h.......h..h...........p.............p......p.........h.......t.....p...l.sph...........Qtpl.Lptsh..phl.............................................+..s...G.G..h..llYuTCo....h..pEN..pt........................................................lp.h...h.....L....p.....p...t..........t.....h.....p.......................h...................................................................................................hp.h..P....t..........................-.uhhhs................................................................................................ 1 951 1651 2323 +3526 PF04135 Nop10p Nucleolar RNA-binding protein, Nop10p family Kerrison ND, Finn RD anon COG2260 Family Nop10p is a nucleolar protein that is specifically associated with H/ACA snoRNAs. It is essential for normal 18S rRNA production and rRNA pseudouridylation by the ribonucleoprotein particles containing H/ACA snoRNAs (H/ACA snoRNPs). Nop10p is probably necessary for the stability of these RNPs [1]. 22.40 22.40 23.40 30.30 22.10 22.30 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.75 0.72 -4.23 44 426 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 393 22 298 416 50 51.40 49 80.10 CHANGED hchhl.csstpc......lYTL+..pss.sGp.TtsuHPARFSP-DKYu+YRltlK+Rhs ..............hhahl.sssGpR......lYTLK........Kss.sGp......TpSAHPARFSPDDKYS+aRlslKKRa...... 0 95 170 244 +3527 PF05048 NosD Periplasmic copper-binding protein (NosD) Moxon SJ, Bateman A anon Pfam-B_5499 (release 7.7) Domain NosD is a periplasmic protein which is thought to insert copper into the exported reductase apoenzyme (NosZ) [1]. This region forms a parallel beta helix domain. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.36 0.70 -5.21 120 1281 2012-10-02 14:50:22 2003-04-07 12:59:11 8 160 695 0 642 3494 729 202.70 19 40.37 CHANGED hsNshsusshhh............sGlhl.ts............ssshl.....psssltpspsul...hhttupssplpsN....thpss.....thGlphhtuss...........shlpsNhhpssp...........tGlhlhsop....tt.slp.....sNplpsspt.......GI.hLptuss.......sp...lpsNpl...................ssspGlhlhsu..........ssNsls.sNplssN............shGlh...................hssS.......ps.....NplasNhF.sNs.slphssst...........sspW................ssst.................GNaW.uch.sG.........tD.tsuDG..IuD...s....sY....phs..ssh....DhlPLhtst ...............................................................................................................t.....................................................................................h..............t..h............t.sul............hh..t.s..p..p..sh.l..pss...........thpps..............phG.lp...h.h...sss..................shlps.N.h.h.ps.st................sG..lh....lhtss..............ps..plp..................sNthtssps..............Gl..h.l....p...t..s..s.s...............sh......lps.N.p.l...........................................t..ss.....s.....p.....G..l.h.lh.su..................spsplp....sN.p.hp.ss.......................t.h.Glt......................................hs.s.u..........ps..............Ntl.h.t...N.t...h...s.N.....t....h...........................................................................................................................................................................................t................................................................................................... 0 208 429 517 +3528 PF04054 Not1 CCR4-Not complex component, Not1 Wood V, Finn RD anon Pfam-B_13503 (release 7.3); Family The Ccr4-Not complex is a global regulator of transcription that affects genes positively and negatively and is thought to regulate transcription factor TFIID [1]. 22.50 22.50 32.10 23.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.29 0.70 -5.60 30 447 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 288 0 299 459 11 319.90 38 17.68 CHANGED sLLh-hs........ph.pshpspph....tphhpsFuphhptLpPhthPGFsFAWlpLISHRhFLP+lLp.hspppGWshaspLLhsLl+Fls.al+ssp.lscslpllYKGsLRllLlLhHDaPEFLsphHaphCssIPssClQLRNlILSAaP.+sh+LPDPFsssLK.l-hLs-hppuPplht..s.stsLps..tlKKslDsaL+t.tsssshlpplhst..lhhsppctt......sssphNl.......pLlNAlVLalGhpAlsphpppspssshssps..sthsllppLhpcLcsEsRYhhlsAIANQLRYPNsHTHaFspllLplFs.p.....sssppsIQEQITRVLLERlIVN+PHPWGLlITFhELlKNs.pYpFachsFl+ss.PEIcplFcsls+s .................................................................................................ht..................t.t.......phh.sFspthphLpPhthPGFsauWLpLlSHRh..FhsphLt.............h......s.p...p.p............GW.shatpLLhsLhcals...saL+...ss...........p.....lspshphlYK..GsLRl.LLlLhHDFPEFLs-aHathCssIPssClQLRNlILS..Aa.P.cs..h..+L..PDP.....F..sssL.K.l..........-hLs..Eh.p...sP.plhs..shsssl.......thKp.lDpaLps.....tssssh...l.pp..lpptl.hs.pt.....................sssphNh.................LlNALVLYlGhpAl........tphpp.................pst....s.....s..............s.hs......s.....s...................sths.lhppLh...hpLcsEuRYhhlsAlANQLRYPNuHT+aFStslLhLFu................psppptIpEQIsRVLLERLIVpR..PHPWGLllTFhELl......+Ns.tapFaph.Fl+su.P.ElpplFpslhp.s............................................. 0 116 177 255 +3529 PF00066 Notch notch; LNR domain Sonnhammer ELL, Bateman A anon Swissprot_feature_table Domain The LNR (Lin-12/Notch repeat) domain is found in three tandem copies in Notch related proteins. The structure of the domain has been determined by NMR [1] and was shown to contain three disulphide bonds and coordinate a calcium ion. Three repeats are also found in the PAPP-A peptidase [2]. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.82 0.72 -3.69 53 1456 2009-01-15 18:05:59 2003-04-07 12:59:11 12 302 120 25 783 1259 39 37.40 38 5.70 CHANGED p.hppCs..........ttCtpph.usGhCDpp..CNshsCtaDGsDCs .........................t....pCt..........ttC.tsph..uDGhCDpt..CNsttCtaDGsDCp. 0 251 317 518 +3530 PF03000 NPH3 NPH3 family Bateman A anon Pfam-B_1584 (release 6.4) Family Phototropism of Arabidopsis thaliana seedlings in response to a blue light source is initiated by nonphototropic hypocotyl 1 (NPH1), a light-activated serine-threonine protein kinase. Mutations in NPH3 disrupt early signaling occurring downstream of the NPH1 photoreceptor. The NPH3 gene encodes a NPH1-interacting protein. NPH3 is a member of a large protein family, apparently specific to higher plants, and may function as an adapter or scaffold protein to bring together the enzymatic components of a NPH1-activated phosphorelay [1]. 20.60 20.60 21.40 20.80 20.20 19.60 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.58 0.70 -4.87 51 658 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 36 0 421 661 0 228.20 33 41.97 CHANGED pD.WWhEDl.stLsl-hacR.llsuh+u+..slpsclIupuLhtYAp+hLssh.............................t.sss.sppcp+tllEoIlsLLPs.c+ss....l...ospFLhtLL+sAhhLsuut...ss+pcLE+RIGhQL-pAol-DLLIPu.........hssp..........slYDVDhVpRIlcpFlpp..................t.........................................t.tt....ts.stsshhpVuKLlDuYLAElA.sDs.NLplsKFhuLAE...slP-tAR.hcDGLYRAIDlYLKsHPtLo-sE+c+lC+lhDCpKLShEAC ...........................................................sWWh-Dl.s.Lplshap+.llhuhcsp...sh.......p.ph..lutslhhYAp+h.L.sh..................................................................t..ss....tptcp+hllEslls...lLPs..c+ss...................ssspFLhtL.L+sA.hhlpsu....ss.+tpL.E+RluhQL-pAolsDLLlPs............ssp................shaDl-hVp.Rll...ptFltt................................................................t...ts..sstsshhpV...u+LlDsYLuEl.A..D..s.sLphsKFhsLAp...slPc.AR....hcDGLYRAlDhYLK.....s....H..P..tlsctE+cclCp.lh-spKLS.-As...................................... 0 54 267 350 +3531 PF03116 NQR2_RnfD_RnfE NQR2, RnfD, RnfE family Mifsud W anon Pfam-B_2882 (release 6.5) Family This family of bacterial proteins includes a sodium-translocating NADH-ubiquinone oxidoreductase (i.e. a respiration linked sodium pump). In Vibrio cholerae, it negatively regulates the expression of virulence factors through inhibiting (by an unknown mechanism) the transcription of the transcriptional activator ToxT [1]. The family also includes proteins involved in nitrogen fixation, RnfD and RnfE. The similarity of these proteins to NADH-ubiquinone oxidoreductases was previously noted [2]. 22.90 22.90 23.30 23.60 22.80 22.80 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.05 0.70 -5.24 169 2456 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 1700 0 519 1837 989 327.80 35 93.22 CHANGED shhspsuPHl+ssh.shp+lMhhVllAhlPuhhhu.hahaGhts..................................................................LhhhlluhlsulhhEslhtplRp+pl...h.DtoslVTulLlAlslPsshPhWhlslGssFAllluKplFGGlGpNhhNPAlsGRshLhh.uaPspM.os....Ws...................s.............s........Duh....osATPLshhpss.......................h........................p..hs.......hhshFhG.h.........GulG..EsSsLulLlGGlhLlhp+lhsW+Isluhlsuhhlhuhlhthh...s.............s.ht.hsshaHLlsGGhhhGAhFhATDPVouuhTs+G+hlaGhhlGllshlIRsau.uYPEGlhaAILlhNhhsPLIDa..astspshu++ ......................................................................h.hhtsusas.+.sth.ssp+lMhhVllAhl.Puhhht..ha..a.G.hts......................................................Lh..hhl.uhlsulhhEslh.h.............pl.R.++.....pl..............h....-...tus.l.......lTulLlAlslPP.......hh..PhWhsslGssFu....llluKpl.a.....GGlGpN.hNPAh.sGhs..h.Lhh....u...aPspM...ou.......Wh....................................s................................hDGh....otATPLsthpsu................................................htt........hs........ptht.....hh.shahG...ls.......GuhG...Eh..ssL.AhLlGG.l.hLlap+.lhpW+...Issuh.l..lsh..hlhuhl.hthhu........................s.hh..hsshhH.LlsG.u.....hhlGAhFhATDPVouuh.Ts+G+hlaGhLlGlhshlIR...shu...uYP..-..G.....lhaAlLluNlhsPLIDaash.+shs+................................................ 0 174 339 445 +3532 PF01566 Nramp Natural resistance-associated macrophage protein Bashton M, Bateman A anon Pfam-B_624 (release 4.0) Family The natural resistance-associated macrophage protein (NRAMP) family consists of Nramp1, Nramp2, and yeast proteins Smf1 and Smf2. The NRAMP family is a novel family of functional related proteins defined by a conserved hydrophobic core of ten transmembrane domains [5]. This family of membrane proteins are divalent cation transporters. Nramp1 is an integral membrane protein expressed exclusively in cells of the immune system and is recruited to the membrane of a phagosome upon phagocytosis [1]. By controlling divalent cation concentrations Nramp1 may regulate the interphagosomal replication of bacteria [1]. Mutations in Nramp1 may genetically predispose an individual to susceptibility to diseases including leprosy and tuberculosis conversely this might however provide protection form rheumatoid arthritis [1]. Nramp2 is a multiple divalent cation transporter for Fe2+, Mn2+ and Zn2+ amongst others it is expressed at high levels in the intestine; and is major transferrin-independent iron uptake system in mammals [1]. The yeast proteins Smf1 and Smf2 may also transport divalent cations [3]. 29.80 29.80 29.80 29.80 29.70 29.70 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.11 0.70 -5.66 126 5115 2012-10-03 01:44:59 2003-04-07 12:59:11 13 22 3156 0 1386 3862 534 343.50 27 78.31 CHANGED hATslpAGupaGYpLLWlllhuslhshllQthuA+LGlVTGc....sLuphh+cca......s.phhshhhhhhstlushAsslAEllGs.AluLplLh..s.......lPh.........hhusllsuhsshhlLh...ptu.a+hlEtllhsLlsllslsFlhplhlspPsh......splhtGhl.Pshss..........sslhls.....lullGuTVhPaslaLHSullp.s+t..h..............................pt............l+hsph.Dsh.lul.sluhllshulllhAAusha.....tstt..tl........................sslpsAtpsLpP........lhG..hA...shlFuluLluuGluSshsushAGthlhpGhl.phchsh.h+h.hsp.hthhsshhslhhhussht............hLlhSQ...VlhulhLPhsllsLlhhsss+p.....lM.G.pasNsh ............................................hoshtuGu.paGY.t.............L.La.l.ll.lu.slhuhllQ..............hhuu+.L...Gls.T.G.t............sLuch.h.p..c....ph.............................s.p.h.h.....sh.....hh.h.....l.....hstl.u..h.....h...........Ao..s.l......u..E..................l.l.Gu....A....l...u..l....p.l..Lh...u.............l.sl......................hh.Gs....ll.Ts.ls.sh...l.l...L.hh..................p..........t..........G......h+hl.Etl...lh.sLl.h.h.....lh.....l..s..a..h..h..t..l..h...h....u.....p.....P..s...h.........s.pl.h.....p..Gh.l...Pp.h.h.............................tslhhs.........................lul.l..G.....AT.l.MP....H...s..l...a.LaSulsp.s+t.hst.....................................................................cp........ptlphsp....h..D.ss.lul....hl.u....h.h.l.N..h....slll....huAushh.......ssts....sl...........................................................................................ssh.tp..sh..ps..L....ps...............................hh....G.....hu......shlF...u.lu....LlAuGhsSol.su.........o..h......uG..p...hl......hp......G.hl......p....h......c.h...s.....h......h..............h............R....p.........h.l............T..........h....hl.....P..s....h..........l.l..h.h.h.hs.ssst................................lL.l.h.u.Q......Vlh.u...h...t.L..Ph.sl.ls...L.lhh..o.s.pcp..lMG.phhs..h............................................................................................................................ 0 404 819 1134 +3533 PF03813 Nrap Nrap protein Bateman A, Wood V anon Wood V Family Members of this family are nucleolar RNA-associated proteins (Nrap) which are highly conserved from yeast (Saccharomyces cerevisiae) to human. In the mouse, Nrap is ubiquitously expressed and is specifically localised in the nucleolus [1]. Nrap is a large nucleolar protein (of more than 1000 amino acids). Nrap appears to be associated with ribosome biogenesis by interacting with pre-rRNA primary transcript [1]. 22.90 22.90 22.90 23.50 21.70 22.60 hmmbuild -o /dev/null HMM SEED 972 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.00 0.70 -13.73 0.70 -7.02 26 425 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 280 0 303 441 12 679.90 22 81.89 CHANGED SauL+Tulp.p.psh..slDlslTMPcplFpcKDYLNaRaaaKRAaYLAhlAttL.....ccsts.thplpasahssDsLpPlLhlp...Ppsppssc...hco+....hpIpllsuhPpslFs.p+LLPs+NsIRsspssp.............PTPaYNuollo-sshstYhKhLapst+....ps-uF+DAClLGRlWLpQRGFuSuhpp....GGFGsFEauhLhuhLLpG....G......G.pGpplLhpuaSSYQLFKuslpaLAspDL.tppslhhsuss..............httshpsPslaDssptlNlLhKMoshSYphL+pcAphTLphLN-stpDpFstlFltKhs..shh+aDtllcl...............sphssspplphhphps.......h..plaplLc+AL.....G-RlphIplp..tpsppsaslpp+hsspttp.......slplGLllNP.ccsp+lVD+GPuu....EcccEutpFRpFWG-KA-LRRFKDGSIpEolVW...Ss...ssptsIlppIlpYlLcpHlphs..scclph....hucpFcphL..s..............shsssstsuFtsltcAFssLpKslpsl-.tLPLsl+pltPsussLRYoulp.PhPh................tsssh..-VllQFEoSu+WPD-LsAlp+TKsAFLLKIu-pLp.sss..hpshlsh-pp...hshthstshLsllpspGasFRLRlhp-REpsLhcRtlt........ssssptK.tpst.shhtac+pahtsP+HTpslpsLsp+ashaSPTlRLhK+WhsuHLL..ssHl.s-EllELlsl+sFlpPhPassPuSspsGFLRsLpFLucWDWpp-PLllDls.............................scLstpphctIpppFpshRp..DPsh..sp...sshFVAoc.D..ssGlhWo.....ptsshslAsRlTuLAKuAh.pllc....ppGh.............phslspLFpsuLpsYDFlIpL+sp.slpspps................ssspFKNL..pp..stsp....hsp.....tsshc.h.taVc-LptpYu....ssllFFps.........spsssVIuGLWs.Pphhps+sa+lshshsspPhs......................s.cpVpl..N+-AllsEIupLGs-LlpsI-s....p .....................................................................................h-h.l.hPtthht.cDhhs.ph..KRshalthlht.l............................................th....tht.....ttp.h.s.l.l....................t..............................................lplh.s.........t....h...t..........+h..s.t.......t...ssl........+.................................t.............................................sTPhYNt.lh.p......h......phh.th............t...thtpuhhLh+hWhpp+t.......h..t................................tshs..th...h..sh.....hh.hLhpt...................................l...hs....shphh+.hh......hlu..t..p.h.....t.h.ht...............................................shh.c..t.hNlh.phs...ht............l.p..t..A.t..sh.h.h.pt............s..t.....F....t.hhh......................taD.hhpl...................................................................................l.thltpuL.......spRhphl.h........................................h.h..ttt................................l.lul.h.ps.tth.ph..lphGP.s......................pt......tuttF+pFWG.t.+.upLRRFpDGsIhcs.................hlW....................p......tt..l.t......plhp.ahL..thH..h........tptl...............t.hp..h.........................................s.t.....hhpsapplt.................p.l.t...........hp.......lPLplttl.shts.hRhsp...s.s........................................................................................phh..hph-tSspWPpph.ulphh+hAh.l.pltc.Lt.t......................................................p...t.....hplh.h....p......uhhF+h.lhhth-....hh.t...............................................hp..h...s.htthlpsht.....t.pa...hussh+lhKpWhtsp.hh........hh..tp.h-Llsh.ha...........h.............p............................s..................Pt....................o..suFh+hL.hlupasWp..pPl..ll..shs.............................t.php...tp..tltpth.t.Rt...........................hhlso..D...................ts.....hao.................t..s...lhtphh.lA.ttu.h.thlt........t.................................ph...t.....lF...h...ssht.a.Dh..ll.lp.t.....t..............................................................................h................................................hp.......hht...Lp.....at........t.hhh..t...........ttt.lshhh..............h.t..................................................................................t.......................................................................................................................................................................... 0 118 180 266 +3534 PF03916 NrfD Polysulphide reductase, NrfD Finn RD anon DOMO:DM04466; Family NrfD is an integral transmembrane protein with loops in both the periplasm and the cytoplasm. NrfD is thought to participate in the transfer of electrons, from the quinone pool into the terminal components of the Nrf pathway [1]. 20.30 20.30 20.30 20.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.25 0.70 -5.02 13 2377 2012-10-01 19:35:38 2003-04-07 12:59:11 9 13 1174 0 635 1611 579 296.60 25 81.36 CHANGED ssstphssl...sWuhslAh.YhFll..GluAG..ulhlAhhh....chhptp..ttt..s.s...tl.lpsusll.uslsll.h.ullhllhDLs.RP..+Fhthh.h...asF..o...Shhs..h.Gl..hhht..sahslll...laLhhhhct..pltsht......h.s...........ht.hlt+....lhsl...u...........tthsp..sltllh.hlhAlhluuhTGaLlSslhuhPhhpsslLPs.hFlAuuhuSG...lAs.hllshll.hs+hps.csp.ssthlpphthhl.hht..phhlLs......h....hhV.G..hth.......ssstp..hpuhps...A..........Lpu.hh...s......hhFhlGlshlGlhlPllLt.h..hlsc.lptshshl.lhuulhsLsGshhhRahllhsG.hh.lu ..........................................s....h.tsh..sWs.h..IAh.alalh..Gl.u..uG..uh.h.l.Ahhl..................hh...apht.......tht....s.t.......hl..h+.s.s.hl......h...u.h.h.u.lh..l..Ghlhl...hhc....l............s....R..........Pa....ph....h.......h...h.....h....h.........h..h.......s.......a........s..................Sshs...h..ss....hhhs........lY.h..s...l.hs.....la.hhh..hh..t....cl.h.tht............................................................lp+........h....hth.....................................................thhh....hlts.ls....hlL.u.s....hl..tuh..sG..hL....l..o..u.h..h.s..h.....P......h......W..p.....s........s..h.....hPs..LF.l..hou..h.uG...h.uh...slh......h.h....h..........l.....t.........t...........p.hp...........s.....p..........s.....-....t............p...h......l...t..th......hsh....lhs....tl.hl.lh.....................s.......h...a...l...s......lth.............uss.t..h......hp.th.sh.....s...............................................h.s.u..s..h..a.........s.................hhF..W.l.t.l.l..h.h.s..h.l..hP..l...l...l......h...........hh.....s+...h.....p.p.s..h.t....h....l......h...hsu.h....h...sLlGs.h...h.h.R.h.llh.s.............................................................................................................. 1 251 451 567 +3535 PF02723 NS3_envE Non-structural protein NS3/Small envelope protein E Mian N, Moxon SJ, Bateman A anon Pfam-B_1913 (release 5.5) & Pfam-B_7381 (release 8.0) Family This is a family of small non-structural proteins, well conserved among Coronavirus strains. This protein is also found in murine hepatitis virus as small envelope protein E (e.g. Swiss:O72008). 25.00 25.00 27.80 27.60 19.90 19.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.91 0.72 -4.06 12 303 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 234 0 0 157 0 78.20 35 97.76 CHANGED M.h..slhhh-ssshVlshllhhllhllhLllslAhLshI+LChpCsshsNshlhtPshalYs....hYpsa..clpP.ss..s.l ......Mh..shsht-TsshVlshlhlhLlhllllllslAlLssl+LChhCCslsNshllpPolYlYs+s+phYpsa..phps................. 0 0 0 0 +3536 PF02071 NSF Aromatic-di-Alanine (AdAR) repeat Casavant T, Bruckert F, Bateman A, Mian N anon IPR000744 Repeat This repeat is found in NSF attachment proteins. Its structure is similar to that found in TPR repeats Pfam:PF00515. 27.00 1.00 27.10 3.10 26.90 -999999.99 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.98 0.77 -6.20 0.77 -2.28 51 10 2012-10-11 20:00:59 2003-04-07 12:59:11 15 2 2 0 5 284 6 13.00 28 22.38 CHANGED AuptappAupha .....AuppYpcAAcha...... 0 5 5 5 +3537 PF05064 Nsp1_C Nsp1-like C-terminal region Wood V anon Pfam-B_3555 (release 7.7) Family This family probably forms a coiled-coil [2]. This important region of Nsp1 is involved in binding Nup82 [2]. 22.50 22.50 22.50 22.50 22.30 22.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.49 0.71 -4.45 30 353 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 282 2 233 349 0 113.90 30 21.20 CHANGED ssspssp.sssss........LcsKsL--lIsKWop-LscppKpFppQAspVspWD+hLl-NG-cIspLassshcsEpsQscl-QpLpalcsQQcELEphLssYEpchcplh.sph..sps.......stc ........................hs.....t...ssss.s.........hphppL..--lIs+Wsh-Lpcpp+cFhpQAspVssWD+hLlcNGc+IppLap.cstcsctsQpcl-ppLshltsQQcEL-shLsshEcplcp...sp........................................ 0 80 128 189 +3538 PF03146 NtA Agrin NtA domain Bateman A anon [1] Domain Agrin is a multidomain heparan sulphate proteoglycan, that is a key organiser for the induction of postsynaptic specialisations at the neuromuscular junction. Binding of agrin to basement membranes requires the amino terminal (NtA) domain [2]. This region mediates high affinity interaction with the coiled-coil domain of laminins. The binding of agrin to laminins via the NtA domain is subject to tissue-specific regulation. The NtA domain-containing form of agrin is expressed in non-neuronal cells or in neurons that project to non-neuronal cell such as motor neurons. The structure of this domain is an OB-fold [1]. 20.80 20.80 20.80 22.80 20.00 19.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.79 0.71 -4.49 2 48 2009-01-15 18:05:59 2003-04-07 12:59:11 10 18 29 4 20 43 0 116.40 73 11.37 CHANGED TCPERALERREEEANVVLTGTVEEILNVDPVQHTYSCKVRVWRYLKGKDlVApESLLDGGNKVVIuGFGDPLICDNQVSTGDTRIFFVNPAPPYhWPAHKNELMLNSSLMRITLRNLEEVEFCVEDKP .....CP.E+sLE..+REEEANVVLTGTVEEIlNlDPVp..pTYSCKVRVWRYLKGK-l......Vsp....E.sL.LD...GGNK...VVIGGFGDPLICDNQVSTGDTRIFF.VNPAP.YhWPAHKNELMLNSSLMRITLRNLEEVEaCVE-K................................ 0 7 8 13 +3539 PF02136 NTF2 Nuclear transport factor 2 (NTF2) domain Mian N, Bateman A, Griffiths-Jones SR anon IPR002075 Domain This family includes the NTF2-like Delta-5-3-ketosteroid isomerase proteins. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.45 0.71 -3.72 43 1695 2012-10-03 02:27:23 2003-04-07 12:59:11 15 45 499 189 981 2094 108 119.60 22 36.72 CHANGED luptFlppYYpthss.....c.ptlsphah..t-sShhshsGp................thpGhpsIscphtsLshsp......h.ptplsslDsQ.......ss.ttslllhVsGplpsssp..h...ppFsQsFhLsspp....tsaaVhsDhaRh ................................uptFlp.pY.Yphhsp...........sh....p.t.LtphY.........tcs..o.h..h..sh.tsp......................................................shp.G..tp.sIt....c.t.h...p...s.L..s..hsp.............h..phpl.p.s..lDsp...............................sshpssl.ll.V.sGtl..............p....s..sp..................hpFsQ..sFlLssps..........ssahlhsDhhp................................................................................... 0 254 446 711 +3540 PF00483 NTP_transferase ADP_glu_Plase; Nucleotidyl transferase Finn RD, Bateman A anon Bateman A Family This family includes a wide range of enzymes which transfer nucleotides onto phosphosugars. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.68 0.70 -5.08 64 22811 2012-10-03 05:28:31 2003-04-07 12:59:11 18 112 5108 164 5670 20398 12452 242.50 22 69.14 CHANGED tulILuGGsGTRLaPLT+shsKphlshhsp...shlphslsslh...uulpc.lllsspppphhlpctltcssphu.........lplphshpstsc.........GoAsAltlut.hltppps......hLlLuuDtlhp...satphlppttspss....ssohhhhsspssotaGllchs......sst......plhpFhEKPptsp.....................sshsshGlYhassslh.hh.p..lpp.tctcspls-hhtthlcpsphshshhhps...................................WpDlGohcula-usttltpt .........................................................................................ulIh.Au.G...h..G..T.R..h......h.......P....h...T........c....s..........h.......s..........K......t...h.l......s......l........s...........s.......+....................s...........hIp............a.sl....p...p...h.......h.......t............u..............G.....l.....c....c.........l.....l.l....s.....s...t.........t....p.........t.........p...t.....l..p...c....h...h....s...p..s.......p..ht......................................h..p.l.t.h..h...h...p..s...p.st.........................G..h.u...p..A....l...h...h..u...t..t..h..l.ssps...................hllL.s....s....D....h...l..h.s.......................sh.p...p....h...l........p.......t.........t..........t....p...p...s.....t.....................h.....s..s.............h..h..h......h........h....p...p....s...p....t...a...G....l..l...phs......................................pps..........pl...h...p...h...h...E....K.....P..p....t..s.p........................................ssh..s..s....s..G..hY....h..h.s..s..p..l.....h...........h..h...........p............................p.................t.........t.....t.......s......c........................h....p.......-......h.....h........h...h......h...p....p.....t...p....h........h...t....h...hts........................................................................................................h.D.hG.s....htshhph.......th........................................................................................................................................................................... 2 1857 3653 4776 +3541 PF05014 Nuc_deoxyrib_tr Nucleoside 2-deoxyribosyltransferase Bateman A anon COG3613 Domain Nucleoside 2-deoxyribosyltransferase EC:2.4.2.6 catalyses the cleavage of the glycosidic bonds of 2`-deoxyribonucleosides [1]. 27.80 27.80 27.80 27.80 27.70 27.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.36 0.71 -4.20 107 1106 2012-10-02 19:28:18 2003-04-07 12:59:11 10 7 886 35 291 866 351 125.70 22 73.25 CHANGED lYLAuPh.Fs..t..sphphspchpphLpphuh....psh.hPh.............ptph......s......s.thuptlaptslptlcpuDsllAs.ls.shc......sDsGTuaElGaAhAhsK......Plhshps-hc.........................................p...thshNLM .....................................lYhAush..Fs....t...tp....hphh.p.p.lhphlcptsh..........phh..hPh..................ct.ph........tt.........tshttsptlhpsDh.pt.lppuD.lllAh.ls..s..........DsGouhElGhA.hA.h.....s.K......Pl.l.s.hhpstp...................................................................................................................... 0 83 165 228 +3542 PF03825 Nuc_H_symport Nucleoside H+ symporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.27 0.70 -5.71 4 1944 2012-10-03 03:33:39 2003-04-07 12:59:11 11 11 949 0 284 2015 437 383.60 38 93.67 CHANGED Mshsh+LplM.FLQahIWGuWhssLhsYMhhoh.FsuuplGhVYSuhGIAAllhPhlhG.lAD+ahSAc+shAlsHhlGAlsLahAApsTs.tshFhlhLl.ShsYMPTluLoNSIuausLt....DssssFPsIRlhGTlGaIhu.......hhlluhuslE.oph.LhIuAusShlLusaALTLP+hP.scppAspulsshLGLDAFhLh+s.phhlFFlhohlhus.Lphh.lFuNsFLpshst.......hhsphuollhSluQhuElhFhLslPFFLpRaGIKsVMllulVuaslRFuhFAYG.DssshshsLLlLuhllaGsuaDFF.lou.lFV-pcsssslRuSAQGLFhhhspGhGsllGuhluG.hhlchaohsG.....shDWpTlWLlhAuhulhlAslFhhhF+.scs ..........................................................................................................................MthphpLphh....Flp.ahl.W...Gu...W...h..s...s....L.u.....a...h.h.s......o..h..Fsuup.....IGh.l.Yus.h.uIAA..ll.h.P.h.L...l.G...h...l...u...D...+...a...h..s...Ap...+....l....h....u....l..h....p.....h...s...u.A......l....h..........h.......a.........h.......A................A.......p...........................T......s............s...s...h....F...h...l....h...L..h.........u...h...s...Y...M...P..T....l...u..L...s...N..o..lu.a..s..plt............D.h.s.p.D.FP......I.R..la....G.TI......G.F..I.h..u...........................h........h...h....l...........u...........h..........s....s...h...p.....o...........p...h........L....h..I..s...u...........u....s....S..h..l..L........u..l.......a........s..............h...s.....L.....P......c....h......P.........s........c..p.........p..........u.............p....p.....s....h..p.....s....h........L.G....L.......D..A....h..s......L...h.+...................s...+.p...h.....h.l.F....F.l..h.....u.....h.L..l.u..s...s...L..t.h..h......h...Fu..Ns..a...Lp.s.hu............................h..s.p.p.s...o..h...h..h.......o...l.uQ..h....S....El.h..F.hL...s..l....PF.F....h+....R.a....G..I....K..pV..h.L.l.u...h..lu..h.s.l.Ra.u..hF.u...aG.s..ss..........h......sh.s.L..Ll...L...uh....l.....laGs.u.FD.F.a...lou......laV.-pcs.s.sp.h.Ruu..AQGl.h...h...h...h...sp...GhGshlG.sh....luG....hh.h...c.h.a....s......s.t..........................................hh.s....W.ps.h.W.h.hhA..sh..hl.l.A.hhF..h..hh.F+.pc.h................................................................................................ 2 107 174 238 +3543 PF02321 OEP Outer membrane efflux protein Bateman A anon Bateman A Family The OEP family (Outer membrane efflux protein) form trimeric channels that allow export of a variety of substrates in Gram negative bacteria.\ Each member of this family is composed of two repeats. The trimeric channel is composed of a 12 stranded all beta sheet barrel that spans the outer membrane, and a long all helical barrel that spans the periplasm. 30.90 30.90 30.90 30.90 30.80 30.80 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.95 0.71 -4.70 113 30969 2012-10-02 20:27:15 2003-04-07 12:59:11 13 38 2653 50 9040 27392 6704 187.10 16 75.76 CHANGED hpphhptshpp....ssplptt...ptphpts.ptphphtpst.h.hPplshssshshptststtht.................................................hslsl......shslas.....spttsphctuptphptsptphppsppplthpltpsahphht.spppl.phtpptlptscpthphtpppaptGthsh....s.lhpsptp..hhpsctphhpsptphptshtpLtthhu .........................................................................................................h.t.slpp....p..ss...lp..tA.....ptp..lptA...ptplp..hucu.......s...h..h......P...s...l....s...l.....s..u....s...h......s...h..s...p..s...s..h.s..s..h..h.t.t.t...t.................................................................................................................hu.l..s.l.......................................sh...s.l.h.ph..............Gphps...plc.t.A..ptphp..t..u..pt..p..h...p....p....s....thsl...h..t...pl.s.p...sa.hs.htt...tpppl..p....h.t...p........p.......t.l.p.s.t....p.ps.hc..h..s..p..p......p.a..p......s...G..h..ssh..h..........-..lhpAptp.....hhps.p..tp.h..hpsptphttsthpLtthhG................................................................ 0 2657 5583 7554 +3544 PF02265 S1-P1_nuclease Nuclease; S1/P1 Nuclease Bateman A, Mian N anon Pfam-B_2480 (release 5.2) Domain This family contains both S1 and P1 nucleases (EC:3.1.30.1) which cleave RNA and single stranded DNA with no base specificity. 21.90 21.90 21.90 22.00 21.80 21.70 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.12 0.70 -4.75 81 645 2012-10-01 21:01:47 2003-04-07 12:59:11 11 7 360 1 375 699 285 243.10 24 82.02 CHANGED WGppGHhhlupIApphL.sspstptlpplL.ss............ssLuplusWADcl+p......h.................chsus....WHalsh.s.t..........t...aspsh....t.s.ppss.lluuIpphsstLpctpts..ppp+........spALtaLlHFlGDlHQPLH.suh......ttDpGGNslpVpah......scp.....s........NLHplWDothl..psh.t.............shsphsssL.pphs....tpphpsht........h.s...s....................htWApEShphup..phsh.....ssspssps...Lus..p.YhtsthsllcppltpuGlRLAshLNplhs ............................................................................................................WuttGHhhlutlApph...L...sspstttlp.plLst...............................ssl.sp...husWs.D...pl+t.....t.....h.....................................................ph..sus...hHal.shst.................................................spsh..........tp....s..psts..lhsulp..phtp.tL.t...stpts........ttpp..........................................s.uLta...lsHhlGDlH.......QPLH.s.uh.......................tDtGGNplp.lph...........spp...........s.............................NLHplWDst...hh.....pphht...........................................shpthh..ptl..pphs..............tpphpth....................................s.s....................................tt.asp.-ohp.hsp..p..hsh............................th.tstp.............lst....p.Y..ht..p..ths..lsppplt.uGhRLAthLstlh.t........................................................................................................... 0 171 268 338 +3545 PF03066 Nucleoplasmin Nucleoplasmin Griffiths-Jones SR anon Pfam-B_2930 (release 6.4) Family Nucleoplasmins are also known as chromatin decondensation proteins. They bind to core histones and transfer DNA to them in a reaction that requires ATP. This is thought to play a role in the assembly of regular nucleosomal arrays. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -4.69 23 444 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 114 62 203 360 1 149.80 36 64.51 CHANGED pspsaLaGCELspccpphoFcs.....t--c....cspHpLsL+olCLGspAK-.EhNlVElpu...hsh-upp.lplslAsLKsSshPhVolsGhElsPPVTFRL+sGSGPValSGpH.lshtpt.t.t................................................---EE-p--E-D-ED----pppE-.sPsKps+ .........................................................................pshhaGCELpucc.shpFcs...........-p-.........cs-....HpL.sL+.p...lsLGssAKD...Eh..p.lV.Espu...hsh..-up...lcVslAoL..Kh..S.s.pP.t......VoLs..s..Fp.l.s.P.PVs....h..+L.+.........sG....S......GP..V..aloGp........H.h..ls..hp.p.-t..p....pc...........................................................................pc--...-tt....---p-..-..-..cp......t.................t........................................................................... 0 29 44 89 +3546 PF03177 Nucleoporin_C Nucleoporin; Non-repetitive/WGA-negative nucleoporin C-terminal Mifsud W anon Pfam-B_1212 (release 6.5) Domain This is the C-termainl half of a family of nucleoporin proteins. Nucleoporins are the main components of the nuclear pore complex in eukaryotic cells, and mediate bidirectional nucleocytoplasmic transport, especially of mRNA and proteins. Two nucleoporin classes are known: one is characterised by the FG repeat Pfam:PF03093; the other is represented by this family, and lacks any repeats. RNA undergoing nuclear export first encounters the basket of the nuclear pore and many nucleoporins are accessible on the basket side of the pore [2]. 25.90 25.90 26.70 25.90 25.50 25.60 hmmbuild -o /dev/null HMM SEED 587 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.99 0.70 -6.00 63 645 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 273 7 432 629 4 485.40 17 42.05 CHANGED hsssh.pppLchhhpslpsLhcFlppsts............................................................hstppchtlhtctctlsuhhtLhpphpcthshhp.hlhp.............................p............hsshhhhlsp.sphpplhshtacplht.....sp.p.tp.htp.Llsshlspslhpsu..th-.phusth...p.ps...........ssh.t.........................phhpup.pssh.shcp.hchhlp...............hpth.tph.......shpshpptssphhshphhttshtl.hhphsptt.spsppshphhpc.tt.pps....+p..ht.htp.hshac.shplhcphcshsshsp.h................................t...ppttphppch.phhtphs-.sFthtlYcahlpp...sth.phh.phppsalhpaLpc..................................thucLhWlahhppp.cahpAApsLhpL....sps..................hpcRl-hhhptluthsst.......s.................htphlpclpchl-lsplQcplhp.hlps.tph.....................ttp..h....pthhpt.....................Lhsphshshphh.................................lpLthhphss......stthlptlWpchltp..s...p....pttttt..........................hpthphthsslhpplpts.hshshsthh.h ................................................................................t......pplp.h.pthptLhpFlpp.t.h...........................................................................thttphth.tctptlt.....uhhtLh...ph.t...ph.th..ht..hh.p................................................................................t.............hp.h.hhhtp.pt.pplh...thhhc.p.....lh................p...hp.....hth.l...hs.l..hthhpsu.............hc.thsphh....p.tp..............................s.h.p.....................................hpup.ph....t.h.p.....hc.hhp.....................................ht.....h..................................htth......s.tph.hp..hp.h...t.hhh.h......hh........hst.t.....p....t.t.s.p.......pp...t.....tp........p..........pt.....h.p.httap.hhp...hh...p.ph....shs.ths..h......................................ttsp.hpph.ph...hpp.t-....Fp..hlapWh.l.....pp...........shh..ph.....h..p............p.pshltpaLpp...........................................h.cLh..Wha.hppp..pat.pAA.psLhpL.u...spp.....................................hpc+hthhhtt.lsthsstt.......................................tphlcclppphc.lhplQpplhp..hlpt.ttt...........................................................th.t...................httLhsphs.shp..h....................................................hph.h.p.tt.t...........p.thhpthWpphlpp...p..........ptt.................................h...thh.lhphh.......................................................................................... 0 144 236 357 +3548 PF01773 Nucleos_tra2_N Nucleoside_tra2; Na+ dependent nucleoside transporter N-terminus Bashton M, Bateman A, Yeats C anon Yeats C Family This family consists of nucleoside transport proteins. Swiss:Q62773 is a purine-specific Na+-nucleoside cotransporter localised to the bile canalicular membrane [1]. Swiss:Q62674 is a a Na+-dependent nucleoside transporter selective for pyrimidine nucleosides and adenosine it also transports the anti-viral nucleoside analogues AZT and ddC [2]. This alignment covers the N terminus of this family 21.50 21.50 22.10 22.10 20.40 20.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.15 0.72 -3.64 166 4054 2009-09-11 09:51:31 2003-04-07 12:59:11 15 8 1905 1 661 2186 512 74.60 34 17.73 CHANGED lGlllllhlAaLhSpsR+.sIphRsVh...hulslQllluhhlLpsshGpshlpslusslspllsauptGssFl..FGsLs .......lGllVllslAaLhSs....sRK..pIphRs..Vh...hhLllQlhluhhhLt..........ss..hG..t.thlpshup...shppllsaus.s.GssFlFGul........... 0 162 320 492 +3549 PF03800 Nuf2 Nuf2 family Bateman A anon [1] Family Members of this family are components of the mitotic spindle. It has been shown that Nuf2 from yeast is part of a complex called the Ndc80p complex [1]. This complex is thought to bind to the microtubules of the spindle. An arabidopsis protein has been included in this family that has previously not been identified as a member of this family, Swiss:Q9C953. The match is not strong, but in common with other members of this family contains coiled-coil to the C terminus of this region. 20.90 20.90 21.50 21.50 20.60 20.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.68 0.71 -4.23 31 294 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 249 4 211 290 3 143.80 25 32.91 CHANGED hsp......t.sFPhLsscEIlssLps...hsls.hotcsLp+......PssphlpplYpphlphhhGh.sh-pl.t..........hhtsstpslp.sp.................hhp-slshhshh+hhpchh.ssGls.DFshpDlh+PcscRhp+hLSullNas+FREp+h...thhcctlpchcsth .................................................................................hp......p.sa.h.LsspEIstplpp............h..sl...s....hstpslt+.............Pp..s..ph..lphlap.thlphhhsh..sh.-sh.......................hhhsstpslp..sp......................................hhpss.hsl..hshatph.pchh.ts.....sls......DFshsDlhcP..p..s..cRht+...hLSull....NFh+FREpph...thhpchhtphcp.t........................ 0 69 116 172 +3551 PF04121 Nup84_Nup100 Nuclear pore protein 84 / 107 Wood V, Finn RD anon Pfam-B_13117 (release 7.3); Family Nup84p forms a complex with five proteins, of which Nup120p, Nup85p, Sec13p, and a Sec13p homologues. This Nup84p complex in conjunction with Sec13-type proteins is required for correct nuclear pore biogenesis [1]. 18.50 18.50 18.60 18.60 18.00 18.40 hmmbuild -o /dev/null HMM SEED 697 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.03 0.70 -6.85 16 402 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 252 7 259 410 4 519.90 20 76.82 CHANGED pphp.hcLEtchW+LlptLaphR...htpppccptphc.........shsSctsh.cplhppNsplhEhpLllsWLcssh.t..h-tssslsh...os+WpNTlhslt.pht..shs.tt......shVpslDsDAPlR-pps.LcstDcpcDppla+hlFpLlhsGpl-EAhchCcpoGNhhhuhILpG...hcpa.hDPslDhphss.........................ps.pGsp++sLW++ssaplupssplDcYERAIYuhLu.Gslss.lslsts.sWEchLhsYlsphhshplEs.lhspshssp........h.lPpphht....olpplhspL.stpssclh-puccPlRll.stlILsslssllcshschL.sshss.....t..tpsalLRllTHLslFl+hlsh.tp.p.sDc..llssYlphLtttt.h-LIPhYloaL.spp.tl-tYShFLpsls-sptRc+QlElupphuLs....................lssIh+pTVppl.hp-o-pthh.sspssh.....p.clochDp+lIsulEWL.h-sp.hs-ulppuhAlhR+FLhsGKlcuh+phhpthshcslhp.Yph....t..cslssc--......sshcEllpYcshlpulctap-Wpch..........................pppp.sscpphsphpp+lpthopsshcLlhshLlt................s-sp....pctphh.plRsLYlPhllhtLHp..lhtpsp.....hhpculpLushVAsEscplYh.lF.ssG+LpEaLpplucsShl ....................................................................t.....hp.Eh.hWpLl..lh............c.................t......................................ott..h.pt..h...h.t.tst.hh.p...h.lhpWLpt.h.............................t.......................................thhW..........Th.tl...............................................hlpplDsDAshR..pt.....h..........Lp..Dp..-.thhhhhaphlRtG...h.p.c.h.chspptsp.W+Ah.lt...G............h...........ss.t.p...t...........................................................s.........p.tlW+hssh..........t....h.up..........p............t.........t...........h.......s......a..................-+Alau..hls...G.pht..t......h.....h...ls..p.....sWc-hlashhp.h.l.p..hc....t.l.t.............................................h.st.....h...........................s..h.....p...p.hh.pt..l.......t.t..p.th.t..tspp..h.phlpt........llh..t...........ph..s.t.......h.h..p...th.........l...t.........t............................................................................................................................................................................h................lRhhsHl....h..lhhp...l.t...............................................t...............p......................................t.........................l...l.......t..tYl.p.h.L........t......t.....p..hlshYss.L.st..th.......t.hu.hL.t.l..........p....p.......+............l...phh.....h...tls.............................................h..thhp.......h....p.....h....htt................................................................hp.....-...h....hpsl-Wh...h.s............t.h.c...........hh..s.hhh+ha.h......h.........tp.h...tshp.hh..h........t...h.................................................................................p........h....t.h.....p....hhth..p.h..ph.p.............................................................t.......t..t......h.......h.....hp.......t.....h..h...h.........thl..................................................................................................h..lRthhhP.hh.h.h.t........h..tt.....................................h.........tshths..lu..p...hh.................h........tt..t..p...h..hh..h............................................................................................................................................................................................................................................ 1 84 143 213 +3552 PF01029 NusB NusB family Finn RD, Bateman A anon Bateman A Domain The NusB protein is involved in the regulation of rRNA biosynthesis by transcriptional antitermination. 21.30 21.30 21.40 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.47 0.71 -4.18 114 7994 2009-09-17 00:13:35 2003-04-07 12:59:11 13 8 4433 32 1709 5198 2636 125.80 25 44.18 CHANGED pp...s....RphAhpsLhph............................htpthhpthhs...............hspp.spsahppLlhGshcphtplDthlsp.hh.shslc+hshhs+slLRlulaElha..h..clPspsslsEuV-luKph...ssppsst..FlNuVLcplt+ ....................................................................................................................psRphAhpsLhph.............................ptthhs.hhs.........................ttt.h.sst....Dtsh.hp..cLl...hGshpppspL.Dthlst..hh....s..hs.l..pc..l.s..t.h..+.sl.LRluhYplha.........h.....clP......s..+....s....sl.sEuVElAKph..........uspp....s.tp...alNGVLcphh............................. 1 580 1130 1455 +3553 PF02357 NusG Transcription termination factor nusG Bashton M, Bateman A, Finn RD anon Pfam-B_697 (release 5.2) Family \N 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.45 0.72 -3.75 170 5994 2012-10-02 20:41:53 2003-04-07 12:59:11 14 4 4395 19 1234 3385 2659 97.80 33 51.97 CHANGED tpWYllpstsspEp...c..ltp...pL.p....p..t..lps..................hlPhtph....ch...ps........+hp...ppshaPGYlFlchshs....s......p.....h..htl........pss..uVss...hl.......G.......sp.....Pss.......lscp .................................................................pWYllp..sa..S.GhEp....+..Vpp................sL...c.........p....p....lps.shtc...............................hhplhlPpEcl.....hcl+...sG........+pp..hc+phFPGYVLVchshs.............D..........c..........s..Wpl......l+sT....PsV.s.G..Fl.........u...........s+......PsPl.p.t........................................... 0 418 810 1052 +3554 PF04277 OAD_gamma Oxaloacetate decarboxylase, gamma chain TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 21.20 21.20 21.20 21.20 21.10 20.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.65 0.72 -3.61 96 1171 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 853 0 225 777 96 73.50 28 65.31 CHANGED ltpultlM.lhGMuhVFlFLhlLlhhlplhupl....hpch.............ssppssssstss.sspststt................s............sscllAsIsAAlppa+pp ..............puhtlh.hlGMuhVFhh...Lh...lLlhslphhutl...........lsch........................hsc.s..s...s.s.tsts..tssss.................................................s...................................sclhAsIs.AAlppap..t..................... 0 83 144 191 +3555 PF03977 OAD_beta OadB_MmdB; Na+-transporting oxaloacetate decarboxylase beta subunit Bateman A anon COG1883 Family Members of this family are integral membrane proteins. The decarboxylation reactions they catalyse are coupled to the vectorial transport of Na+ across the cytoplasmic membrane, thereby creating a sodium ion motive force that is used for ATP synthesis [2]. 30.00 30.00 30.40 30.50 29.90 29.90 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.12 0.70 -5.71 9 1424 2012-10-02 17:06:44 2003-04-07 12:59:11 8 5 969 0 273 1037 261 354.20 54 94.72 CHANGED plG....................pllMllVGlhLlYLuItKcaEPLLLlPIGFGslLsNlPtuGls..shp......................................................GhhhhhaphuIsstlhPLLIFhGlGAhTDFGPLlANPKTlLLGAAAQhGIFhslhsAlhL....G..Folt-AASIGIIGGADGPTuIYloopLAP-LLusIAVAAYSYMALVPIIQPPlh+hLTTpcERKI+Mp.QLRpVS+hEKIlFPIllhllsuLLlPsAsPLlGMlhhGNLhREsGVV-RLucsAQptLlNIVTIhLGLuVGSphpADpFLsspTLhIlsLGllAFssuTAGGVlhAKlMNhFppcKINPhIGAAGVSAVPMuARVspKluhEEDPpNFlLMHAMGPNVAGVIGSAlAAGVhLthlu ........................................s..GphlMllluhlLlaLAItKpaEPLLLlPIGFGslLsN.....l.P..........u.....Ghh...................................................................................................................................................................................................sGhLhhhaphuIp.o.ulhPhlIFhGlGAMTDFGPLlANP+s.lL.LGAA.AQFGIFsollsAlhL...............u......F.oht.pAAuIGIIGGADGPTuIaluu+L....A............P.c...L.LGuIAVAAYSYMALVPlIQPPIMKhLTTccERpI+M..t....ph.....R..p.......V..S.........KpE.......KIlFPlllhllsuLllPsusPLlGMhhhGNLhREuG.V.l-.RLScTAQNtLlNIVTIhLGLoVGuphpA-pFLp.pTLtIlsLGllAFshuTAuGVLhAKlhNh....h.o..+....p....KINPLIGuAGVSAVPMAARVspKlGh-tsPpNFLLMHAMGPNVAGVIGSAlAAGlhLshl.h................................................. 0 113 184 234 +3556 PF04225 OapA Opacity-associated protein A LysM-like domain Mifsud W, Bateman A anon COG3061 Domain This family includes the Haemophilus influenzae opacity-associated protein. This protein is required for efficient nasopharyngeal mucosal colonisation, and its expression is associated with a distinctive transparent colony phenotype. OapA is thought to be a secreted protein, and its expression exhibits high-frequency phase variation [1,2]. This is a LysM-like domain. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.78 0.72 -4.09 12 1645 2012-10-01 23:00:54 2003-04-07 12:59:11 7 7 935 1 206 836 122 83.80 37 23.87 CHANGED sW+papVtpGsTLhQlFRcNsLsloDlsAhs+lEGusKPLSplKsGQhl+hplsspGplstLplEsssp.tlhFhRtuDGoatRsK ........+pYhVpsG-TLuplF.ppa.GlshsDlhtlAps.p..s....s.K.sLsN.L+sGQplph.ph.s.A.s.G.p..L.ptL.o..h-.s.upppph.h.asRp.ss.G.ua....h.................... 0 28 71 142 +3558 PF03373 Octapeptide Octapeptide repeat Bateman A anon Bateman A Repeat This octapeptide repeat is found in several bacterial proteins. The function of this repeat is unknown. 17.50 3.90 17.90 3.90 17.10 3.80 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.78 -5.53 0.78 -5.51 0.78 -2.61 3 3282 2009-01-15 18:05:59 2003-04-07 12:59:11 9 83 143 0 12 2907 0 8.00 83 23.91 CHANGED PGKEDNNK PGKEDsNK.. 0 12 12 12 +3559 PF02317 Octopine_DH NAD/NADP octopine/nopaline dehydrogenase, alpha-helical domain Mian N, Bateman A, Griffiths-Jones SR anon Pfam-B_9653 (release 5.2) Family This group of enzymes act on the CH-NH substrate bond using NAD(+) or NADP(+) as an acceptor. The Pfam family consists mainly of octopine and nopaline dehydrogenases from Ti plasmids. 23.30 23.30 23.50 24.20 22.80 23.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.55 47 615 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 543 5 140 433 40 146.60 33 43.51 CHANGED lphtpshLphsLsNhsslhHPshhlhshuphc......................thp.......sch.hahpGhs...stsucllcslDpERhslApAluhph..hshtphhph..tY.......thcsss..lhchhpssp.uasslssPss.hp..........pRYlhEDlPhGLlshtuluchhGVssPhhcullphssshhGc ......................................h.shtpsllpssLpNsNs.lHPs.hlhNhGpl-.......................as............uEaslat-Glo...cpssclLcul-tERlsluctL.Ghch.........shp-s..hh......ptYh.......................tttp.-s.ps..L...pch..hpTss.sastl..s.sPsp.hc.....................sRYlsEDlsaGLshhsuluchhs.VsTPsh-ullhlusshht.p......... 0 53 95 127 +3561 PF02101 Ocular_alb Ocular albinism type 1 protein Mian N, Bateman A anon IPR001414 Family \N 25.00 25.00 32.20 31.50 22.90 22.60 hmmbuild -o /dev/null HMM SEED 405 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.42 0.70 -5.87 2 77 2012-10-03 04:04:29 2003-04-07 12:59:11 10 2 52 0 48 82 0 326.40 45 95.20 CHANGED MASPRLGhFCCPThDAATQLVLSFQPRsFHALCLGSGsLRLsLGLLQLLsGRRssG.tuPATSPsASV+ILRAAsACDLLGCLGhVIRSTVWluaPpFl-slSshNtT-IWPAsFCVGSAMWIQLLYSACFWWLFCYAVDsYLVIRRSAGhSTILLYHIMAWGLAsLLCVEGAsMLYYPSVSRCERGLDHAIPHYVThYLPLLLVLVANPILFpKTVTuVASLLKGRpGlYTENERhMGAVIKhRFFKIMLVLIhCWLSNIINESLLFYLEMQsDIpGGSLK.lpsAA+TTWFIMGILNPAQGhLLSLAFYGWTGCSLshpsP+h.IQWEohTsSAAEGsa.oPl...hPHpNPt..KVspVGGpTSDEsLShLSEsSDASTlEIHTAotSCNhpEsDsh..spG-L .................................................MAsPpl..hCC.....h...s.s.t..hh.tFp..hatshslsSushthhhslhQlh.............h..t...........t....................................t...p...IlhhhshsDhLushGllhRSsl..Wlu.h.Pshlp.shSshst.o.claP........ss.F...CV.u.S.u...hWIQ.L.hYSAsFW.WhFCY.AVDsa.LVl..+c.SAG.hSs.ll.LYHhhsWGLAsLLslEGhshLYa...P.....S.....h..u.....p.C...E.pGLp......HA....I....PHYlTTYhPLLLVLluNPILFp+TlsuVsSLLKGR.pGIYTENERRhus.IKlRFFKIMLVFhlC..WlsNIINEoLL.FYL......EhQsDl........psssL+.l...+.s..AAhhTWaIMGILNPhQGFL.oLAFaGWT.Ghp.lth.....p.h.h.W-phssS.h.s.tt......................p...t.........p.....tpp.s-slshLSE................................................................................................ 0 14 18 32 +3562 PF02100 ODC_AZ Ornithine decarboxylase antizyme Mian N, Bateman A, Moxon SJ, Wood V anon IPR002993 & Pfam-B_34796 (release 7.7) Family This family consists of ornithine decarboxylase antizyme proteins. The polyamine biosynthetic enzyme ornithine decarboxylase (ODC) is degraded by the 26 S proteasome via a ubiquitin-independent pathway. Its degradation is greatly accelerated by association with the polyamine-induced regulatory protein antizyme 1 (AZ1) [1]. 23.90 23.90 24.00 25.70 23.40 23.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.58 0.72 -4.57 28 301 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 183 1 182 274 0 105.80 32 48.95 CHANGED hsssphspW......pslls....................ppsL..aV.lPp.sshspu.........................................KculhuLLEaA--p.LpssclhlshpKscs-...pssLlRohpalGFcllsss.p.............ss.......sschhaMsasl ..............................................................................s......pW.......tlhs.........................spsL...alp.lPt..ssLspG........................................sK-ulhuLLEaAEEp.Lp.....sspVhlChc+sRsD.....RusLl+sFs.alGFEllpPs.c............................................shsP........pschhFMsY.h.......................... 0 47 74 125 +3563 PF02423 OCD_Mu_crystall ODC_Mu_crystall; Ornithine cyclodeaminase/mu-crystallin family Bateman A anon Pfam-B_1960 (release 5.4) Family This family contains the bacterial Ornithine cyclodeaminase enzyme EC:4.3.1.12, which catalyses the deamination of ornithine to proline [1]. This family also contains mu-Crystallin the major component of the eye lens in several Australian marsupials, mRNA for this protein has also been found in human retina [2]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.69 0.70 -5.66 11 2697 2012-10-10 17:06:42 2003-04-07 12:59:11 10 13 1722 12 903 3070 1011 299.80 23 91.90 CHANGED scshhhhttcVthcshhs.chhshlEssFRpaups...thcpsP+lssas+c....GslclMPsh.us...hhGhKaVsuaPcNscp.GLPTlsAhhVLs-ssoG.hPlhLh-sshLTAlRTAAsuAluuKaLA.psupshAlIGsGsQuhhQscAhptlhs.lpcl+laDlcscusc+hupplpt...thplsAssssppAVcGADllsTsTsscp......sllpssaVpsGsHlNAlGuDssGKsELcs-lLtcADlaV.-a.sQs+tpGElpp.................lssccshspLu-VhsGphsGRsssccITlFDSsGhAlcDhushchla-pspsts .......................................................................................h......................thhph...lpp.s.h.t.ths...pt........th...ts......sp......h.......h..h.........h.....p............ts..hh.h...h.MPu........h....h..........s.......s.........................h...............hGh.K.......hl.s......s...........h..s......p.....Ns....p..........p....u....h........s.........s..h.........p....u..h.h.l..L..hD.s..pT.G.hs.h.u.l.h.-ushlTshRT.A..A...s..o....u.l..A.......s....c..h....L.........A.....p....p.......s.....u.........c.........s........l........s.....llG.s.GhQA.p.h.p.l.p....u..h...t....t......l.......h.......s....l..cc...l.....t......l......a.............s........p...s..............p....p..s............p....p....h.............s........p............p....h.........p...........t.........h..........t..............h...........p.......h......h.............s.........s..........p.............s........s.........p.........c.............A.........l...........p....s....A.......D......l.......l.s....o.sTsupp...............P..l..l..p..s..........p.....h...l.............p.....s....G........s............a....l....s...s..l.G.u....p.s...s.h..p.E.l...s........s....c..l....l.....t.....p.......u..........p............l....hV....-.....h.......s.......t...h.....p.......c.....t.G.-.lh........................................................hs.t..p....t...h....h..s..-....L..u......p.l.........l........s.....G....p.....t......s.....s...........R........p.....s.........s.......c..........c.............l........o..l...........F.cssGhulpDlssAph.lhpph....s.................................................................................................................................................................................. 0 225 509 735 +3564 PF02159 Oest_recep Oestrogen receptor Mian N, Bateman A anon IPR001292 Family \N 20.60 20.60 21.80 21.20 20.50 20.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.98 0.71 -4.05 11 220 2009-09-12 00:57:07 2003-04-07 12:59:11 10 10 133 0 28 206 0 120.80 46 25.14 CHANGED MYs-ps+s.....ushNY.-GA.YD.....Fssss....suAPs.apsuolu....Yhsus..ssaGssu.uthpoLsssssSPLhFlp.ouPQLSPaL.H.sG.....pQVsYYL..........-ousoshYRSSsssppQuu....cEhhouus-............+Guhuh-SsKE ....................hYs-ps+s......ushsY.EGs.Y-.....assss......ssuPl.Yuposls....Yhsus...suaGssu.Guh.sLs.osssSPLhhl..ssP....Q.LSPFl.H.tu.....pQVsYYL...........EsussshYRsssssppQuu....cEp.huoos-..............+GuhuhESsKE................... 1 2 4 11 +3565 PF04664 OGFr_N Opioid growth factor receptor (OGFr) conserved region Waterfield DI, Finn RD anon Pfam-B_4529 (release 7.5) Family Opioid peptides act as growth factors in neural and non-neural cells and tissues, in addition to serving in neurotransmission/neuromodulation in the nervous system. The Opioid growth factor receptor is an integral membrane protein associated with the nucleus. The conserved region is situated at the N-terminus of the member proteins with a series of imperfect repeats lying immediately to its C-terminus [1]. 19.80 19.80 19.80 21.00 18.70 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.26 0.70 -5.01 13 234 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 123 0 109 217 96 177.60 34 53.33 CHANGED hh-.c.s.-h..........hNL+FYpNEIshp...PsGhaI--lLppWpss.....Y-hLEcNHoYIQWLFPlcEpG.lNhpApsLThcEIctF+psc-l+++hlcuYclMLcFaGIcLs-c+sGpVpRApNappRFpNLNcasHNpLRITRILKsLGpL.GhEpapsPLV+FFL...pEoLVptpLssl+pSuLD.YFhFsl+s+ppRRELltaAataacPptcFlWGP.chhphp ..............................................ht.....................lpFYpsch.........spGhhI-plhppW.ts...........YchLEcsHsYIQWL.FPlpE.u..hN..hh...A..LTtpElc...tF+psp...ch....hc+hlp.uYch.MLpFaGlcLt.............s.p.sG...p.....l...t...+.u...t..s.............a...p...........cR.a.p.p.....L..s.pp.pHN.............LRITRILKSLupL.GhcpapssLs+ahl...pctlhppph..s..s.lp.puslc.Yah.slh.................................p................................................................ 0 34 49 73 +3566 PF04680 OGFr_III OGFr_repeat; Opioid growth factor receptor repeat Waterfield DI, Finn RD anon Pfam-B_4529 (release 7.5) Repeat Proline-rich repeat found only in a human opioid growth factor receptor [1]. 19.50 19.50 20.90 19.50 17.20 19.30 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.41 0.73 -6.43 0.73 -3.63 4 97 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 10 0 7 144 0 21.10 66 13.11 CHANGED uPpET.GPRsAs....Pss...DpPAE .uPpETPGPpsAG....PAu...DpPAE..... 0 7 7 7 +3567 PF04606 Ogr_Delta Ogr/Delta-like zinc finger Mifsud W anon Pfam-B_5059 (release 7.5) Family This is a viral family of phage zinc-binding transcriptional activators, which also contains cryptic members in some bacterial genomes [1]. The P4 phage delta protein contains two such domains attached covalently, while the P2 phage Ogr proteins possess one domain but function as dimers. All the members of this family have the following consensus sequence: C-X(2)-C-X(3)-A-(X)2-R-X(15)-C-X(4)-C-X(3)-F [2]. This family also includes zinc fingers in recombinase proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.63 0.72 -4.30 67 983 2012-10-03 10:42:43 2003-04-07 12:59:11 7 4 577 0 122 677 13 46.70 40 45.45 CHANGED pCPpCsspA+hRoocth..otps.....pc....hYtpCpNh.p.....CupoFsst.ohs+sl ...........pCPhCss.s.A..+sR..o.Sp..h.....osps.....+-.....+YhQCpNl.p......CutTFhohEohp+hl.................... 0 13 47 82 +3568 PF01276 OKR_DC_1 Orn/Lys/Arg decarboxylase, major domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.29 0.70 -5.93 10 4944 2012-10-02 18:26:03 2003-04-07 12:59:11 15 15 2315 25 649 3284 459 379.60 33 61.32 CHANGED PPFaKALhcYVccsphoFssPGHpGGssFpKcPAGphFYDFaGENlF+uDlssusspLGDLLtHpGstp-ApcaAA+VFsAD+oYFVlNGTSsuNKlVs.AlsssGDhVLlDRNsHKSlpH.ALhhuGAsPVYLcPs.RNsaGllGGIstppFpc-sl+ctls-ssss+s......hhAVIpNuTYDGslYNs+pll-plc+hss.IhFDSAWVuYppFt.Phhtssoshtu-..p.ps.sIhsTQSsHKhhAuFSQuShIHhK-sph......ls+cRFNpAaMMHsSTSPaYPlhASLDVAAuMhcGsuG++Lhp-slcpulchRKpl........lssuphFcPatPplVcspp....................ha.hcssppWHuFcshsspphalDPsKlhLpTPGhs.csG-hc-hGlPAslVApaLcEp.GIlsEKoD..sILFLho.GtscsK .............................................................sPhhcALh.p.Yscp.t.phsF...s.s.PGH.....ps.Gp.h.a.p+......pP.sGp......ah-a...aG....c..shh+s.Dls...shst.L.......GsLLpH.pGs.h.t-A.pchsA+...sF.sA....-....+o.........ah..VlNGT.SsuN+slh..us......h......s........s.G........-........h.l.L.lDRNsHKS.lhp...uLh..hs....G..u.p.P.l.Y.l.css..R..........Nt.h.G.l.l....G.G..Is..t.p..php............p..hl..p...ct....l.p..c.....s.....s.s.s..ps.....................hsl.ls.p..s.TY..D..Ghh..Y...N...sc..t....lh...c...h....l...p...p..h..s.........l..hF..DpAWsuat..p.Fp....s....h.....ht.s.p..........s..htu-.........t..t.....hl.hsTQSsHKhhuuhS.QuShIHl+ss.....................hsc.cph....N..p.A.ahhHsoTS...PtYslhASl....D.su.sphhcut..sG...........c.pLh.pc.slchu.............lc.hR+tl................sps..hhh...cs..a............lss...........................h.a..h.....s.s.....t...tWH.u.F..t.s..h......t.t.......hlDPhKlhlhs......PGht...pG.ph.p.p.....GlPAs.ll..s.t.aL.t.c.p.Gllsp+ss..slhhLho.u.s.t............................................................................................................................................................................ 1 197 374 507 +3569 PF03711 OKR_DC_1_C Orn/Lys/Arg decarboxylase, C-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.20 20.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.69 0.71 -4.12 32 4706 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 2279 25 600 2682 216 119.40 31 18.69 CHANGED LPslapppPchYcshsl+-LCQphHcha+ppslhpLppchF..sshPchsMsPpcAatchl+sclEhVslc-h.GRluAphhLPYPPGlslllPGEha...upsllcYhhhLp-hhspaPGFps-IpGlahppcsst .............................................................hPplhtt.......Yt.th.tlppL.spthath..hh..p....s.......t..h...........th...a..........p.h.hP..p...h.hh....s....P..pp.A..a.....t..l...c.s....p....sEhV..slc..c....h..GR.......l.......u.......AphllPYPPGlPllhPGEhh........s.ps.ll.cYlt.h.Lp.p...h..thhP.G.Ft..-hpGh.......s.......................... 0 176 343 466 +3570 PF03709 OKR_DC_1_N Orn/Lys/Arg decarboxylase, N-terminal domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Domain This domain has a flavodoxin-like fold, and is termed the "wing" domain because of its position in the overall 3D structure. 28.60 28.60 28.60 29.00 28.40 28.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.18 0.71 -4.03 82 2923 2012-10-01 22:20:39 2003-04-07 12:59:11 10 5 1008 23 274 1331 55 108.10 28 15.04 CHANGED hpttshppltptlpt.pshplltssshcch..hh..tscluulllsh-................phtpsllpplcppshplPlFlhsct...sspplsschlppls...thhph.hpssscahApplppAsppY ........................hpttslccLtssLst.pshpllhssspc-h..hhp.psclsul..lhshs........................c.stsllsclcpp.s.pl...PlFhhssp....sshcl.s.s.shhphl.....tahph...h.sss-.lAtclcpssscY................................ 0 34 100 179 +3571 PF01277 Oleosin Oleosin Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 20.70 22.80 20.40 19.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.49 0.71 -4.53 15 355 2012-10-03 03:08:05 2003-04-07 12:59:11 12 4 58 0 111 359 0 107.80 35 54.63 CHANGED SosQlLsllsulPlGGsLLsLAGLTLAGTlIGLsVATPLFVIFSPVLVPAslsIGLAlTGFLsSGuFGlTuLSSlSWlhsYlRtppssss-pl-hAKtphp-hstasGpKs+-hGp+h ...................pllpslsusssussLLlLuGLTLs.GolluLsluTPLhlIFS.PlLVP..AsIsssLlssGFluuGuhGlsAl.uslsWlh+ahpu.....t.p..s......h....s....th..p.hh...........................t....................................... 0 25 71 86 +3572 PF02191 OLF Olfactomedin-like domain SMART anon Alignment kindly provided by SMART Family \N 20.80 20.80 21.20 21.20 20.00 19.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.73 0.70 -5.11 40 1087 2009-01-15 18:05:59 2003-04-07 12:59:11 11 39 91 0 497 860 0 245.70 37 33.48 CHANGED sLhslupPhhl.....+pssp...phGuWh+Dsh.....tssc+lahhsshsss...p..lhcapphpsF........htsptspp.........hpLP......hshpGsGaVV.Ys.GulYYpct............popsllKa-Lsspslsspp.LssA.uap..s.hsYthuuhosIDlAVDEpGLWlIYuTppssGp.....IVlS+LsPpoLpl..ppoWsTsh.KpssuNAFhlCGsLYsscohstps..........scIpYsaDTp..ssppp.tsslsFpppapthstlcYNP+-ppLYsWssGa.lhYslphtp ................................................................................h.LpslspPhpl.....ppssp...phGuWh+DPh......pssc+l..a...hh.sh.h.tp....pplhEY.pshpsF............htuptsps............................a+LP.......athpGTGaVV.Ys.GulaaN+t..............popsIlKaDLco........ch.........hs.s...............pt...............hlssA..sYc....................s..p.P...Y....p..W....u...GpoDIDLAVD.Es.GLWlIYuTpp..s..s...Gp.......IVlSpL....sP.hTLpl..cpTWc.T.s.h.sK.puAuN.AFhlCGlLYslcSh.psp..........sclpYsYsTp......ps.pt.p.h......ss.ls.F.......NpYp.aluhlDYNP.+DptLYsWsN..sa.ltYslph................................... 0 66 116 258 +3573 PF00691 OmpA OmpA family Bateman A anon Pfam-B_166 (release 2.1) Family The Pfam entry also includes MotB and related proteins which are not included in the Prosite family. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.65 0.72 -3.58 84 17270 2009-09-13 10:58:36 2003-04-07 12:59:11 15 239 3418 100 4376 13311 4143 97.50 27 31.23 CHANGED hFs.sssp.l..s.......spspptLp....plu..phlp...........t........ltltGa.....................sss........hss....tpt....NhpLStpRApsVtph.Lhp.tG....ls.sp...cl...h..puhGp..spshssssssp.spthp.........R ..........................................................hFshspup..l......p........sp..s.p...p...h...Ls..........plA.....phLp.........................p.....h.....lpl.p.GH.....................................................................................TDs............................pGs......pph.........................NhpLSpcRA....pu....Vtph...Lhs...tG...................ls..ss............+l......ps.....hGhGc.......ppPl..ss.s.s..s..tt..spt.N.................................................... 0 1365 2716 3618 +3574 PF01389 OmpA_membrane OmpA-like transmembrane domain Bateman A anon Bateman A Domain The structure of OmpA transmembrane domain shows that it consists of an eight stranded beta barrel [1]. This family includes some other distantly related outer membrane proteins with low scores. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.28 0.71 -4.77 13 1301 2012-10-03 17:14:37 2003-04-07 12:59:11 12 16 841 9 132 2219 115 165.60 47 47.12 CHANGED Pp-NTaYsGuKsGWupaHDss.................psstssHcNshshGsFGGYQlNs.....LuhELGYDahGRhsh+upst..........................sstapsQGspLosKhuYPlsDsLDlYu+lGuhlsRuDsK....tpu.t.......tpsHDstsSsLhAsGlEYAlsPElAsRLEYQalsslGchpstsp............pPs.uslolGlSYRFGQus ..............................................................................................................................................................................................PpsNTaYs..Gu...Kh..G..W.u..p..a...c...Dss....................................sst.ss.c.c..s...p..l..u..h.G........s..F...u.....G......Y..Qh..N.sa...................luh..E...h.......G.....Y....D........a......L.....G..Rh..s...h....c.u...p..........................................................ss.s.a..c..A..p...G.....l.....p.....L......s.....s....K..h..u.........Y......P..........l......s...-....c.....L...Dl..Y..s.....R..L..G...G..h..l....W....R......u.....D...s...+...s.t..............t.........................hpsHD.T..GVS..Pl...h.A....uG..lEY..A..l..T.....s..-lAs..R..L....E...Y.Q....W..s......N....N...I...G...D.sp.o.h.Gs............................RP..D...s..uh....lS....lGlSYRFGQs................................................................... 0 12 44 91 +3575 PF03938 OmpH Outer membrane protein (OmpH-like) Bateman A anon COG2825 Domain This family includes outer membrane proteins such as OmpH among others. Skp (OmpH) has been characterised as a molecular chaperone that interacts with unfolded proteins as they emerge in the periplasm from the Sec translocation machinery [2]. 29.20 29.20 29.20 29.20 29.10 29.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.14 0.71 -4.29 151 2754 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2008 6 700 2002 1837 154.60 21 89.11 CHANGED lslhh.h....hss....................ssst...Ap......................+luhlDhpplhpp..sttcp....hppplppp....hp...phpsc..............................................................................lpphppcl...ppthpphppp..tt.........thsppttppt..pp..chp.....................pptpchpphppphp...pc...........................lpppppp........hhptlhpc.l...ppslpplucp....pshsllls............pss...............llas..ss..sh..DlTscVlptls ...................................................................................................................hh..hhh..h.hss.............sstt.......Ap.........................................Kluhlshpplhp.p..h.st...tps....spppLcpc...........hp...thts-...................................................................................................................................................lpph.ppcl...........psthpchppp....t...........................hhstsptpch....pp.......-lt................................ptppph..ppt.....tpthp........pc...................................htpcppc........thpplhsc..l...ppulcplApp....pshsllls...........tss.......................lhYs..ss....sh...DITscVlcpl...................................................................................................... 0 256 489 615 +3576 PF01278 Omptin Omptin family Finn RD, Bateman A anon Prosite Family The omptin family is a family of serine proteases. 20.10 20.10 20.40 20.40 19.60 19.30 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.87 0.70 -5.32 7 512 2012-10-03 17:14:37 2003-04-07 12:59:11 15 3 418 9 44 269 1 275.50 56 92.86 CHANGED uos...s.hsP-shosshSlGsLuGKo+EhVYcs.-sGRKlSQLDWKhpNsAIl+GslsW-.hshlolsApGWToLuStuGpMsDhDWhsS....spss.WTDcSpHPsTplNYANEaDLNlKGWhLpsssY+lGlhAGYQEoRaSaTApGGSYlYsssu....phGsFPssh+sIGYpQ+FphPYIGLsGsYRhscFEhsuhFKaSsWVpApDNDEHY..hRclTFR-KspsppYYusulsAGYYlTssAKlasEhoas+hpptKGsTplhDp.SssoushsssuAGIpNhNahsTAGLpYpF ................................t...h.shoP-slssslSlGsLsGKo+EhVY.s.-s...GRK.lSQLDWKhpNs.AIl..+Gsls.W....-h.......h.s.....l..olsA.tGWTo.L.u.S..t....uGpMVDpDWMsS.......spPG....WTDcSpHPD.Tp..lNYANE...aDLNlKG.....WLL.......ppsNY..+..lGlhA....GYQEoRaSaTA+GG..........S...YI.......Y..........ss...tc.....................IGs..FP...s...G....R....uIGYpQRFcMPYIGLs.G...sY.R.h.p.D.F.EhsuhFKYSsW..Vp.ApDN...DEHY.................h+.+......l..TaRpK.s.cs.ppY.YusulsAGYYlTsNAKlasEhuas+hs.ptKGsTplhD+.sss..Tu.autsuAGIpN.NahsTAGLpYpF............................................................................. 0 10 21 29 +3577 PF03922 OmpW OmpW family Bateman A anon COG3047 Family This family includes outer membrane protein W (OmpW) proteins from a variety of bacterial species. This protein may form the receptor for S4 colicins in E. coli [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.23 0.71 -4.64 9 1853 2012-10-03 17:14:37 2003-04-07 12:59:11 9 3 1309 10 354 1343 105 183.40 37 85.72 CHANGED psGsahl+uthspVhsssusshh.......hsssschs...sssssp.sLohTYhhoDplusEL.............luuT.hpHplssps...sssshluc...sphLPPTLhsQYa..hsssush+PYlGAGlNYshFaspphsss....thoclKLcsuaGhAhplGlDhhls.cshhlNhsVhhhhlcosAshp....sussthcscV+LDPWVhhhGlGa+F ...............................................................................................tGphhhRst.hsVhPspsussh................ssh.sshs........lsss.spluLohoYhhTD.N.lulEL............................................luu...o..PhpHcls..s.s..................s.....h.G..c....lus..........s+.pLPPT..Lhh.Q.Ya..hhsss..u...chRPYV..G..s..Gl...NY.Th.......F.as.p...p.hsss................................uh.o.sl...p...l..c...s.S.aG.s.A.s..p..l..G..hDYhls...cpWhlshsVhYh..sI.cTsushp.............hu..s...s......p......t............p.s...s..l+..l..DPa.V.hhhusGYRF................................................. 0 69 155 256 +3578 PF03532 OMS28_porin OMS28 porin Griffiths-Jones SR anon PRINTS Family \N 20.70 20.70 20.80 48.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.74 0.70 -5.31 2 39 2009-09-11 23:15:34 2003-04-07 12:59:11 8 2 24 \N 1 27 0 167.90 72 99.15 CHANGED KIFSNLIINGLLFGFVSLNVFADSNNANILKPQSNVLEHSDQKDNKKLDQKDQVNQALDTINKVTEDVSSKLEGVRESSLELVESNDAGVVKKFVGSMSLMSDVAKGTVVASQEATIVAKCSGMVAEGANKVVEMSKKAVQETQKAVSVAGEATFLIEKQIMLNKSPNNKELELTKEEFAKV-pVKETLMASERALDETVQEAQKVLNMVNGLNPSNKDQVLAKKDVtKAISNVVKVAQGARDLTKVMAISLY ...........KhFtNLIIsGLLFGhVsLNVFADSNNsshlp.pSNVlEpsDQKDsKp........LDQKDQVspuLshIsKVTEDVosKLEtVRESSLtLVESNDAulVKcFVGSMSlhSDsAKGsVlASpEATlVAKhSGhsAEsAN+VVEhSKKAsQETQKAVSVAsEAhFLIEKpIM.NKSPNNKELELTKEEFAKVEpVKETLMASERALDETVQEAQKVLNhlNGhNPSNKDQhlAKKDVtKAISsVVKVA............................ 0 1 1 1 +3579 PF02462 Opacity Opacity family porin protein Bateman A anon Pfam-B_2356 (release 5.4) Family Pathogenic Neisseria spp. possess a repertoire of phase-variable Opacity proteins that mediate various pathogen--host cell interactions [2]. These proteins are integral membrane proteins related to other porins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.62 0.71 -4.27 4 799 2012-10-03 17:14:37 2003-04-07 12:59:11 10 2 118 1 25 2965 35 133.70 47 70.81 CHANGED sGYchs.shRlAsDYT+YtKhsus...............sosl+Gh.....................LGhSslYDFDTtSchKPYlGsRVu......................oN.thchossA+h.thcu....sScp+lGlGVlAGVpYclTsNlsLssGhcYNplGph.psopVpshtspsGlRapF ...........................................................................................................VGYD.FG...s.....W.....R...I..AAD....Y...A.S.....Y.R..K...Wsss...............................p.............s..p......p...l..ptp.ssstpp.p.pp.................................QtNGoFHAsSSL.G.L.S....A.I...Y....DF.K..L..N..D...K.F...KPY..I..G..A..R..V..A...................................................................................hs....ps....+....h..p....s.....c....o....s...p..s....t..p..p..t................................................................................................................................................................................................................................................................... 0 4 9 13 +3580 PF01160 Opiods_neuropep Vertebrate endogenous opioids neuropeptide Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 21.00 21.50 19.60 18.40 hmmbuild --amino -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.01 0.72 -4.20 31 202 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 67 0 73 181 0 44.50 39 20.36 CHANGED -CspDCutCuh+..spps.plNsLsCoLECEGcLsosctW-tC+clLpht ...............-CtpcCshCsh+..st...s.slNsLs.CsLECEupl.ssp..W-hCpphLp.h................... 0 3 11 26 +3581 PF04966 OprB Carbohydrate-selective porin, OprB family Bateman A anon COG3659 Family \N 19.80 19.80 19.80 19.80 19.70 19.30 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.98 0.70 -5.31 48 1348 2012-10-03 17:14:37 2003-04-07 12:59:11 7 5 568 0 433 1321 465 361.60 17 76.71 CHANGED cpshthssphthshshDLp+lhGh.cusphphs...hhpppGpslsss......hsshtshpplhGtt.......................................pthRLs.phhhppphh....................sspLsl+hGphshsspF.......................hssshsspFh.....shuhsusssss.........hsshhhsaP...hushusplchp.ssp....hhlpsGsacssssshss.............pGhsh........psssGshlssEls..ap.t......................................httsthsGpYclGuahssuphsshh.shsh....h.....................ts..spptp......................................uptGhahshpQpltpsss..........tsLslFupss............husssssh.....hsttlshGlshtGshtsRspDslGluhuhtphsspsptttt.ht..sh..............tssEhhhElaYphplsstlslpPslQYlhpPuss...........sphssuhlhG.l+hphsF ............................................................................................................................................................................................................................t.t..tph..thph-LtthhG...h....pshphphs.....hhp.t.ps.h.shosp.................s..ts..h...s..h..pt.hh...utt...............................................................................................................ph.+Ls...phhhp.p.p.h................................tspL..pl..cl.Gphs.hsp..a........................................................s.sh..t.spFh.........................shsh.s.s.s.s..sss..........................hst.hh...s..aP.......tthus...pl.....ph..p.sst...................hhh.p.sGsa.p..ss..ss....s.hps...............................puhsh........................ttssGh..hh...hchs......at.t......................................................................s.s.th...sG.p..a..phu...s......h.h..s.ssp.h.s..sh.......h..s...t.h...............................................................s..pt.tp.............................................................................s.p.huhh.hshp.Qtlht.s.................sl.slhu.phs................................hsss...s.s.sh..................hs...h.thshGhsh..tu..h.....tRss....Dp...lGluh..u.h.t.h...ssp.h....p....th...t..t.................................pssphh..hEhhY..phpl........s..s........t..lp..lpPshQalhpP..uts.............pth.sssh..lhG..l+sphpF............................................................................................................................................................................................................................................... 0 101 252 353 +3582 PF03573 OprD Peptidase_S43; outer membrane porin, OprD family Griffiths-Jones SR anon MEROPS Family This family includes outer membrane proteins related to OprD. OprD has been described as a serine type peptidase [1]. However the proposed catalytic residues are not conserved suggesting that many of these proteins are not peptidases. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild --amino -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.97 0.70 -5.68 47 2107 2012-10-03 17:14:37 2003-04-07 12:59:11 8 5 748 29 364 1637 75 371.00 27 90.15 CHANGED tGFlEDSpsslphRNaYhNRDa+ss......ss.t...........upp-..EWuQGFlhsapSGaTpGsVGFGlDAhGhhGlKLDuGtuc..uG.oGl.......LP......hs.sD....Gc..ssDcau+hGs.ssKhRlSpTpLKhGc.hhPshPlltssDuRlLPpoFpGstlsSpElcsLsLpuGphspss.Rssoshc.chsh.ht........sussucphsasGGsYphs.cphosuhahucL-DlacQpYhulsashPluts.sLssslphacocDsGp....u....................thG..plDNcsaSshhuhph.GuHohsluaQ+ssGDssasYls..............G.ssshaL..sNsh.huDFssssE+SWQlRYDhsFushGl.....PGLoaMsRYlpGcslcssss....................s-G+..-WER-h-lpYVlQSGshKsLul+hRpuohRssh..........sschDEsRLIVsYslsl ........................................................................................................uFl-Dup.hshthh.h....hpR.Dh+ss..................st...............thpp......chsp..u...hhLs...apSG..a.......st.......s.......h.......l.......G...hG..lDs..huh..hulcLsssssp.....us...sth................h..............p.....ss.....tt........scsa.......u.......c.........h.G.....s.....s........sKh..+..h....u....p..o....h..l..........+.........h..Gp.h...........s.p.h.Ph...lhss.stRhlPpoap....G....s....t....h....s.s....p....-.h...s......s...L..shpshh....hs.c.hpt............sp.s..s..h....c....chh.ts....................................tsspscth..p.hhGu..p..Y.p........h.....s...ss.h.s..hphh.huphc-h.........hcQ...haht.h.sap..h.s..lu..s......sLsssh..p.ha....tsc....-c..Gs......u...........................hss...th...D.sp....s..a..s..tt..h..o..hsh...t..s..t.....ph....t..h....sa..t..p.....s.....s....G...sp...u.....a....hht.................u..sssha.h..........hu....D..F.s.s...ssE+oh......h..t..hsY-hts.h.s.l.....P..GLs.hhs..p..Ylh...G.shcsssh......................tphc..ch..thshphsYslQs.G.......hKshth+h+...s.hc.t....................................................................................................................................................................................................................................................... 0 43 136 258 +3583 PF03169 OPT OPT oligopeptide transporter protein Mifsud W anon Pfam-B_3048 (release 6.5) Family The OPT family of oligopeptide transporters is distinct from the ABC Pfam:PF00005 and PTR Pfam:PF00854 transporter families. OPT transporters were first recognised in fungi (Candida albicans and Schizosaccharomyces pombe), but this alignment also includes orthologues from Arabidopsis thaliana. OPT transporters are thought to have 12-14 transmembrane domains and contain the following motif: SPYxEVRxxVxxxDDP [1]. 24.50 24.50 24.50 24.90 24.30 24.40 hmmbuild -o /dev/null HMM SEED 624 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -13.07 0.70 -6.09 45 2900 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 1215 0 1473 2844 385 534.50 19 86.15 CHANGED phThRuhllGlllsllhussshhhuh+sshlshsslsstlluasls+hht..................................................h.hs.stPaohpEpsllpshus....uuss.h....shusshlhs..thhah..................tpph...shshtlhhshssshlGhhhushlR+hllhs..sphha......Psuhsssplhpshcpsp...............................................................p..hpph+hhhhshhsuhlatahsthhF.hlush.uhh.h........sthshshhhhths.........hshDashhh..lusshlsPhhsshshhhGslluahllhPlhhat.sshhsshhs.......h...us..............tshhhshtta.shushhhuuhhsl........hthssslspslhhphpsht...............................................t.......p-.ppphhpphc-.....................lP.hahhsshl.lshslulhhhhhhh..hphsh..........hulllulhluhlhshssuhhhulosh....ssul........shls.llhuhhhsup.................slAslhhsuhshsshupAtshhpDLKhGahh.tssPRthhh....sQhlGslluullsssVhphh..spt.hhss.........shshss.spshhhuulhhulhus.........shsahhlhhuhlhGhlhshlthhh....................tph.hPt..................lhhu.....lshhhssphss.......shhlGslhthhlc+...............................httthhpch.............shlluuGLssGtulhulllhhhh ..............................................................................................................................ThRsh.hlG......hlhshlhsshsh.....hhs.h+..ss.h....sslss............t..llu..hsl...hphh..........................................................................ts.hshhE.ps..................hl..phhus.......uuts.............hu.........s.hlhs......s.h...hhh.........................................................htth.....shh.hhhh..s...hss...t.....hlG..hhh..sh..............hR+.........h..h........lh.........sph.a....................Pp......shssstlhpsh.cptt................................................................................................................p..thphhhhshh...............hu..hh..ath........s...h..h.h...hs.....th..sh...................................h..h..t..h.......tht..........................hshsh...s.hh....lGs...s....h.l.................hs.hh......ssh.shhhGh..hls.ah..l..hhPh..h...h.....h..h..........h.........s............t....hs...................t...........................................phh..h..s...h.tah..sh..uhhhhs....u..h.hsl............................hthht..hh.ps...hh...t...hp..thh...................................................................................................................................................s.....t.t...p...h..t.t...pp..........................................................................l.s..h.hh.h.h.hhl.....h.hhh.h...h.h.h..h.....h..h........h....hsh..............hshl.l.shhluhh....hshs.suhh.huhss.......hsuh...................shl.s...hl.hu.hh.h..s.......................................shhslhhss............hsh.ssss....usshhpDhKhG.....ahh...ths...........P...tt..h..................uphlGshhushl.ss...........lh.hh...............shh.hs..........................s.shs..s.sp.shhh...........s.slhhulhss............................................sh..a.th...l...hh...shhhGh.hh.lhs.hh..................................thh.hP.......................................lhhu......hs.h.h.h.s....t..hs.h....................h.hl.Gh.h....ht.hh...hp...c.......................................................................hh..t.thht..ph.................................................................shlhuuGL.sGtulhullhhhh.h.................................................................................. 0 399 892 1266 +3584 PF04069 OpuAC Substrate binding domain of ABC-type glycine betaine transport system Kerrison ND, Finn RD anon COG2113 Family Part of a high affinity multicomponent binding-protein-dependent transport system involved in bacterial osmoregulation. This domain is often fused to the permease component of the transporter complex. Family members are often integral membrane proteins or predicted to be attached to the membrane by a lipid anchor. Glycine betaine is involved in protection from high osmolarity environments for example in Bacillus subtilis [1]. The family member OpuBC is closely related, and involved in choline transport. Choline is necessary for the biosynthesis of glycine betaine [2]. L-carnitine is important for osmoregulation in Listeria monocytogenes. Family also contains proteins binding l-proline (ProX), histidine (HisX) and taurine (TauA). 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.64 0.70 -5.04 87 7137 2012-10-03 15:33:52 2003-04-07 12:59:11 7 15 3034 55 1357 6630 3203 253.50 21 72.82 CHANGED sslslusts.as-shhhuplhtplLcpt.Gh.sschsshss.ss.hhsul.ps..G-lDlhs.tahsss......................hpthtc.thtpphshhh.s.shhhssp.uhsVsphhsc.............sl...polsDL.........tphss............shsschhsts.sGhs.......shhcsY.Gl......hh..ushtsh.s.....tlhpAhcptc.shlshsasscshhsph....c.lhhLc..................................Ds...+shhss.....tpltslsp.pshtcctPp.ltphLpplp..l.sscphpplttplst.pttss..pp.sAppalpp ............................................................................................................................slplusts...h.sE.st..h.h...s...p..l...h..p...p..l...lE...pt...Gh...ssp...h.h.s..h..s..s.......sh.hapul..ts........G..-....lD.l.hs.pas.sss.......................................................................hpt.hp....p...ht.t..p...h.thhh.h.s....shh.h.s.s.s.h...u..l..sVspth..up....................................t....sl...polu.D..L.........................................tchtt................shsschhs..pt...sGhs...........sh..h..c..s..Y...sl...............h...s.s...h.s.sh.s.............hhhpAh.pps..c...hsshhs...a.s...s..c..s..h.....h..s..p..h........c..l..t..h.Lp..........................................................................................Ds.......cp..h..hss......................hp.s.s..s.lsp....cs....hh...c.c....p.Pp..ltp.h.l.splt.......l.ssp...phpphphpl...ps...ptt...ss.....pp...sAcpalp...................................................................................................................................................................... 1 350 775 1085 +3585 PF01718 Orbi_NS1 Orbivirus non-structural protein NS1, or hydrophobic tubular protein Bashton M, Bateman A anon Pfam-B_1752 (release 4.1) Family This family consists of orbivirus non-structural protein NS1, or hydrophobic tubular protein. NS1 has no specific function in virus replication, it is however thought to play a role in transport of mature virus particles from virus inclusion bodies to the cell membrane [1]. Orbivirus are part of the larger reoviridae which have a dsRNA genome of at least 10 segments encoding at least 10 viral proteins [1]; orbivirus found in this family include bluetongue virus, and African horsesickness virus. 25.00 25.00 111.40 111.20 18.80 18.40 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.23 0.70 -12.87 0.70 -6.48 7 185 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 65 0 0 167 0 452.80 51 99.24 CHANGED MERFlRhFplsuptA.tlRhhtsISspWTCSHh+RsChhpGhCs+QpFpps.s.tshcpt-hstAp+llclAtphhhsRpclWhpshpshsp.hs-phtpchppshppL.-sYppSshpcchtshhphpcssp.lhhDDShShh.hhYhPhspss.spssplsRhtphhhsFYsspsscs.hl..pp.thcthhsphhp.stctlspC.aTGsptsl.tlhahP.phhshhss.p.sphlhRhsphDlphIhphsh+c.sRlhhQRFGhcssu.sslaphhlt+lch.sh.pollpt+h.....psWpphhlPhlLhRthhhthhs.p.hhsahpschsCQsCaltptsphcplhllDsRsuElsG...ssshths+hhcHhDs-.ph.pht-LptsEhlsR.usHWhshsChoot-AlhlThh.IHRhlRGsGlhss.thp.uh.hLARshLhWt..ssttpSslFRLhCashhthpspspGphhsWtDLGsFhchlhcspcLs.s.sEchasshhchs.....LhYhpp....h+hsshs...tsthpptplhpllpt.hsh ....MERFLphaslsG-hANAsRhFhsISPQWTCSHLKRsCLhNGhCs+Qp.F-cs.h.tAT.Dsp-.tpAh+LV-LApcAhhDR-TVWLpshKshsp.hpppl-tphccpsspLh-s....YppSGhh-Eh.p.pthssspR.lhlDDShShhPYhYlPhppGp.hlssshlSRatQluahFYsssssDc..aIs.s.hGlRttpsplK+tlEcplss..CPYTGhpGRlhpVhaLPlphhshhchp-.Apph.RhAuhshp.hh+s..Gatct.RhlpQhFuh.sssEhslHphMLh+hch.scspolVphRhhpsGs.sWpoWhlPhhlhRpuh.t........p.E.llsaM.c+KhTCQ.lC.ahhc.t.h.ThsVlDs+hu.ELsG...soshhhs+h..HssNc.pV.ps+.LhhsE.ht+IsDHahhppCaTstEAlhsTAIpIpR.IRGsG.Wss.happuhhhLsRlllhW.h.shspRSAlhRLhCFshaGhtPpAcGph.DWssLGoFhc.lLcG.-Ls...EDEp.sahoMhcMs.....hhasp+....s+hsshs...tsshE.tpVhplsth..p............................ 0 0 0 0 +3586 PF01616 Orbi_NS3 Orbivirus NS3 Bateman A anon Pfam-B_1029 (release 4.1) Family The function of this Orbivirus non structural protein is uncertain. However it may play a role on release of the virus from infected cells [1]. 25.00 25.00 47.30 47.10 20.50 20.50 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.13 0.71 -5.34 11 375 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 70 0 0 339 0 202.30 56 91.79 CHANGED p-holhsh.....PPhh.soAPshs..........thpshuLslLspAhossTGAotspKcEKAAauuhAEAL+D..stslRpIKhpVsppsLscL+t-Lpsh+R+psll+hlhhlsuslslsoohlsuhophssplpphhptp...............hlshslpslsLhsTshhlhsu+hppplppplcRsK+-IhKRcoYssAAphshsuss.........thsLpsschut .............s.............pP....PpYsPSAPhPS..........oMPTVAL-ILDKAMSNTTGATpsQKsEKAAFASYAEAFRD..DlRLRQIKRHVNEQlLPKLKo-LuGhKKKRAIlHhTLLlAAVVALlTSlsTLoSDhuVs..hKhNsTps-ls............ahKsLsshlGllNLGAThlMMsCAKsE+uLNQQIDhIKKElMKKQSYNDAVRMSaTEFS.........SlPLDGF-hP.h............... 0 0 0 0 +3587 PF00898 Orbi_VP2 Orbivirus outer capsid protein VP2 Finn RD, Bateman A anon Pfam-B_1525 (release 2.1) Family VP2 acts as an anchor for VP1 and VP3. VP2 contains a non-specific DNA and RNA binding domain in the N-terminus [1]. 18.40 18.40 18.40 18.50 16.10 17.90 hmmbuild -o /dev/null HMM SEED 946 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.48 0.70 -13.27 0.70 -7.02 25 468 2009-09-11 12:16:15 2003-04-07 12:59:11 12 2 67 0 0 373 0 558.40 24 90.95 CHANGED uLLs+YPLAIplsVKl-D.GG+HsllKIPESDMIDlP+.oIlEALsY+PtRNDGlVVPRLLDITLRAYDsRKSsKsA+GlE.FMTcs+WMKWAIDD+MDIQPLKVoLDcasS.VNHQLFNClVKAcSANADTIYYcYaPLEsusK+......CNHTNLDLLRSLTssEhFHsLQGAAYuLKoTYELVsH..SERcshSEoYpVGspcaIpLpRGT+IthpGpsYE+FlSSLVQVllcGKlP-cIRsEIApLNc...I+sEWhsAsYDRs..+IRALELCKILSsIGRKMLDs.....pEEPKDEMsLSTRFQFKLDEKFh+sDpEHlNIFcVGusATD-GR.FYALIAIAATDTQpGRVWRTNPYPCLRGALIAAECcLGDVYhTLRpsYcWSLRP-YGp+ERsLEsNK.YVFuRlNLFDoN.LuVGDcIIHW+YElhps..+ETTaDcGYhCsppt..s.DDELlCclDED+YKEMhDRMIQGG.WDQERFKLHsILT-P.NLLTIDFEKDAYLssRSELVhPsYYDKWIsSPMFNARL+Is+GEIAThKuDDPWssRuV+GYIKssuESL-YsLGsYYDLRlpLaG-sLSLpQcQS..AVFpahuQpDDFusLTchppGtsVCPHS....GGshYTFRKVALhlluNYE+LsPsLHEGhEccpYhHPu...lsstac++VLEM+DhSQL...ICFVFDYIFEK+pQLRss+EARhIlYLIQssoGstRL-VLccsFPNFhc+lhsL+-lKplpDLNVIN.FhPLlFLVpDNISYhHRQWSIPMlLFDcs.IRLIPVEVGAYANRFGhKSFhNFhRFHPG-uKK+QcADDsHKEFGslsF-YYssTKISQGplcsPVVToKhDsL+lHlASLCAGLADSlVYTLPVAHPKKsIVLIIVGDDKLEPplRSEQIVs+Y.aSR+HlsGlVSIsVsQsuQL+VHopGIs+HRlC-KsILKYKCKVVLV+hPG+VFGNDELMTKLLNV ..................................................................................................................................................................................................................................................................................................................................................hhhp.t.s.....-sh.h.p..sht..............Hs..thh.phh..t...h.pthsYtht.p.hplhsh.......sp...tphp.....c.....h.s....p..h..t..h.....+th.h........tt.tthphhh.......tshs.......hphph..........................t...............tp.......h.plp................I..ptp....http.s....p....p.sp.lpthht..hh.p.......p.tsts.p.shp.+Fphpl.tph...pstch..I.ht..tttthsp.t+.FhsllhlsssDs.pttha....h.hlhhhh...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.ptt....hh....p...hphhhus.CtGht-.h..hhPhtpP..shlhh.h.s.......t.t.h..hh....p...thh.l.lt...............tt................................................................................................. 1 0 0 0 +3588 PF01700 Orbi_VP3 Orbivirus VP3 (T2) protein Bashton M, Bateman A anon Pfam-B_1622 (release 4.1) Family The orbivirus VP3 protein is part of the virus core and makes a 'subcore' shell made up of 120 copies of the 100K protein [1]. VP3 particles can also bind RNA and are fundamental in the early stages of viral core formation [1]. Also found in the family is structural core protein VP2 from broadhaven virus which is similar to VP3 in bluetongue virus [2]. Orbivirus are part of the larger reoviridae which have a dsRNA genome of 10-12 linear segments [1]; orbivirus found in this family include bluetongue virus and epizootic hemorrhagic disease virus. 25.00 25.00 48.40 47.90 23.50 19.20 hmmbuild -o /dev/null HMM SEED 890 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.54 0.70 -13.39 0.70 -7.12 7 204 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 66 2 1 161 0 531.10 67 98.12 CHANGED ssuPYLcGDplpsDsGPLLSlFALQEIhpKVRpsQtchtstupEl-sshP-VppIlssl+sLtcp+sY+llppPshSaRaIshQSp-RhhRVsoaaERhSplG-shcpp-PhcFhssllc+V+alRscGuFlLaslsT+.hcGtEls-s-sLGV-hpshhssLsA.sRthlQstLsuhlIpNtpsscp.VDVa.GACsDslYRlHstLpuYlEssQhtphRpulsWLpphGppKRIpaspcaLTDhhpuDTIalLohpLPsNPpVIW-VPRsuIuNLIhNhALslPTGpYluPNPRIsSITlTpRITTTsPFA.LpGhsPTAtQMsDVRKIYLALMFPNQIlLDl+.-PGHplDPllphVuGVlG+LhFoaGPRhhNITpsMAp.LDhuhscaLhaMhssRhplpaGsoGcPLDFhI..GppQaDCNthtss.pTGpGYNGWu.VDs.tccPoPYsHVpRhIpYhshDScElID.RhhG.shsY.happMhchLltAG+ssEtsYhctML.aHhVRFA+INQIINcDLlSAFShPD-pFshhhssh.pssattssPlVLDlSahSIWFAFphRF.PssRs-hl..tPLlESVYAScLSlhKlcsppLphhhspsP-shlpA+PochWKAVhcp.PEPl+slhsLstp+sFlshRDlhpWlppsthQcSLhhhh-cEAWtsssD.pDLMhsccVYhHRp.lPEPhLDDlcpFRR-uFYYTNMlDu.PshscsVhhohthhhlpAshGph+uAlRphlDDssal+hGssLRslhlcFF-ShPPp-lLpALPFsYpscE+sG.lshsol+hsspspsaaLlYNV-hsshPDphlslsPshshTKlalpp+lVcRVcsssALuVhN+cFluY+uKhRlMDlTpuLcsGsQLAuPos ....................................................................................................................................................................................................................................................................................................................PRstlsNlhhshuhs.Phs.ah.sss+Istlolstcls..psssFu.l.u.hsTt.QhsDVRKIYLALMFPGQIILDLKIDPGERMDPAVRMVAGVVGHLhFTAGsRFTNlTQNMARQLDIALsDaLLYMYNTRlQVpYGPTGEPLDFpI..GRsQYDCNsFRusFtTGsGYNGWuhlDVEhR-PAPYsHsQRaIRYCsIDSRElIpPtT.a.GhsMpYhhapcMh.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +3589 PF05059 Orbi_VP4 Orbivirus VP4 core protein Moxon SJ anon Pfam-B_5992 (release 7.7) Family Orbiviruses are double stranded RNA retroviruses of which the bluetongue virus is a member. The core of bluetongue virus (BTV) is a multienzyme complex composed of two major proteins (VP7 and VP3) and three minor proteins (VP1, VP4 and VP6) in addition to the viral genome. VP4 has been shown to perform all RNA capping activities and has both methyltransferase type 1 and type 2 activities associated with it [1]. 25.00 25.00 98.50 98.30 16.00 15.70 hmmbuild -o /dev/null HMM SEED 644 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -13.16 0.70 -6.76 6 86 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 47 5 0 97 0 564.20 62 99.40 CHANGED M-P.HAVLYlopsls.hLccuFLPIhcLsGtEsLNsLWhtpGpasTDhYshGslpKWoIRQLRGauFIFlup+.cpIphtDsshshDllI.Ppphscsps.KcFEThIGhcRVtLRKsFGDhLRsYAhpashpFHGSEAETL.hA-P+RH+VhGhPpsPPshsh.sph.ssapsDpPTDEKLVSMLDYllYSAD.plaYVGCGDLRTLppFt+RDp+RFsRVpWhCIDP.IuPEosssNVllhpthlssscDL+pahp.ssslE+lLIWDVpoDputhushEWEppphpEDcLGEpIAhuhpshhuhAlIKHRIPp.ocppapsaoShLlPQPGAPhsMYELRNlhRL-GaS+VDRoHIPcApspplptcDsRpLVcpaHGpsRG+pLK+slaEaLHIpRpsGLpHtu-.PRADLFYLTNppNts+hp-ItcVlEpSsIuTlWVGsc.hasYDDFsYsRpplML+Fsp+s+hVlDGNGhILaLMW+hs.schspclsYDPuWApsFuVlhh+shsss.VPDlSLCRFIGLRphSohLRlpocpVHchsDlLK+LGLDlSGHLFIuLhSuuYlhDLhWWhcMIh-WSlhs+ppKLtslpctpAEVIEW+E-+AsEPWHhhsDLhAALhthuch.ch.hhcp..uslppWl-hLR ..MP.EP.HAVlYVTpELsHllKpuaLPlWclpGsEoLN-LWLpNGKYuoDlYAaGclppWohRQLRGHGFIFlST+.KslQLsDllhsVDVRI.P+-lh+s.D...hKtFEs.IGRRRl+hRKsFGDhLRsYAa+hAI.hHGSEAETLNsANPRLH+lYGhPc.PPhYhEhtph..ts.FsDEPTDEKLVSMLDYhlYSsE.EVHYVGsGDhRTLhpFtKRSPtRF+RlhWHlYDP.IA.s.-sp..sNVhVHphhVsuK+Dlh+phNhLKRVERLhIWDVSoDRupMsDcEWEppRFAEDRLGEEIAhphuGhFS.AlIKHRIPp..h-pYHshSTYLhPQPGAssDMYELRN.hM+L+GYSHVDRphHP-ApVhplVs+DlR+hVEhYHG+D+GRhLKKpLFEaLHIhRcNGLhcEs-EPRADLFYLTN+CNhuh..slYcVh+KShIAThWVGps.LaDYDDaulPRuhlMLpsSa+DlRlLDGNGAILFLMW+YP.-hhKKsLsYDPuWAMNFAVohKEPlP-PPVPDISLCRFIGLRVESSlLRlpNPplHpssDELKRMGLDlSGHLYVTLMSGuYVTDLhWWF+MIL-WSups+EpKLcsL+cStAEVIEWK-pMAERPWHVRNDLIAALREaKh..KhthRps..Asl-SWL-lLR................ 0 0 0 0 +3590 PF00901 Orbi_VP5 Orbivirus outer capsid protein VP5 Finn RD, Bateman A anon Pfam-B_1525 (release 2.1) Family cryoelectron microscopy indicates that VP5 is a trimer implying that there are 360 copies of VP5 per virion [1]. 23.60 23.60 24.20 143.70 22.00 23.40 hmmbuild -o /dev/null HMM SEED 508 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.52 0.70 -6.30 14 206 2009-09-11 01:23:19 2003-04-07 12:59:11 12 1 72 0 0 195 0 462.80 62 96.62 CHANGED tLuRhG+phssALTSsTAK+IYpTIG+AApRhAESEIGSAAIDGllQGolcShlTGEsaGESlKQAVILNVlGss-shPDPLSPGEptltpKlcELEcEp+p-hlpp+HsccIhcKaGp-.L-clh+hhsspt+hpptEccQh-hLcKAlcuhtclhcpEscplpcLtcALp+EsptRTc-EscMlppaRpKhcALpsAI-lEppuhpEEAIQEhhshoADllEsAAEEVPlhGuGhAoulATuRAIEGuYKLKclIstLoGIDLoHLcsPcIpPphlpslLcpsst...lsDppLspultuKlctlcE.ppElcHlpppIlPclKKthc--cc....hts.pcphIHs+sh.paKlPppQpPpIHIYoAPWDSDpVFlFHsluPHHtscSFhlGFDLEl-aVaaEDlotchHtL.GuAppssGRoF+pAY+EFhphAhp.stssthHp+RLpRSpusHPIYLGShpYplSYtpL+pNA.plVpss-LQhHlLRGPl+FQRRsIluALhaGVcll ...oLSRFGKKVGsALTSNTAKKIYsTIGKAAERFAESEIGSAAIDGLlQGSVcSIlTGESYGESVKQAVLLNVLGuG--lPDPLSPGE+GhQhKl+ELE-EQ+sElVRlKaNccI..tc+FG...c-.LE-VYcFMsGps+pEttc-cQaclLpKAVsSYpKlltpEcpph+pLApALQ+Ehs-RTcsEppMVcEYRpKIDAL+sAIElER-GMQEEAIQEIAGMoADVLEAASEEVPLlGAGMATAlATuRAIEGAYKLKKVINALSGIDLSHLRTPKIEPshluThL-pcttc..IPDppLAhulluKtcuIp-NppEltHIcpEILP+hKKhM-E-+E....lpuh--KhIHP+lhM+FKIP+sQQPQIHIYoAPWDSDDVFhFHClS.HHtNESFFlGFDLuIDlVHaEDLosHWHAL.GuAQpAtGRThpEAY+EFhNLAlusshsothHtRRhlRS+ssHPIYLGShHY-IoappL+sNAp+lVYD-ELQMHlLRGPLHFQRRAILGALKaGsKl........... 1 0 0 0 +3591 PF01516 Orbi_VP6 Orbivirus helicase VP6 Bateman A anon Pfam-B_765 (release 4.0) Family The VP6 protein a minor protein in the core of the virion is probably the viral helicase [1]. 20.00 20.00 20.00 22.00 19.10 19.90 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.03 0.70 -5.37 3 135 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 47 0 0 149 0 255.80 52 93.72 CHANGED LLAPGDVIKRSSEELKQRQIQINLVDWhEScuuKE-..EPK-EucuEpp..lSDGEGsQpccGpKEESuKETcDAsVDRRlHTsVGSGSusKGSGERAsKcADtGDGKstGGGGDADcGuGATGTs.GGGWVVLT-EIARAIESKYGTKIDVYRDEuuAQIIElERSLQKELGISREGVAEQTE+LRDLRRKERs-s+IKAV.+GsRKp.Rpptcusup+EGVtEE.spEEso+IGITIEGVMSQKKLLSMIGGVERKsAPIGARESAVMLVSNsIKDVsRATAYFTuPTGDPsWKEVAREAuKKKNILAYoSTGG.DsKTEFLHLIDHL ................................LLAPGDVIpRSoEELKQRQIQIpLlDW.-s-s.t........Kcp....EsKt.E.scsctp........cDG.E.........G..s.p....p...c.supKcc....uu.....cE...s..pD..AssD..RRlcTsVGpGous+GsG.ERs....sc..ssD..t..............GDuKst....tGuG-s...ctGsGs....s........Gss..t.GtWVVLT-EIAcAIco+YGsclc......VY+s-..s....s....u..pIIplE+SLQKELGloREssAEQTEtLRcL+...R..Kc+st....s+s+...us.....t.....+...GtcK...p....tp.p...cu........s.u.p+.Eu...sp.c-.....s.........pcpss.....pl..u............ls....IEsVMSQKKLLSMIGG.sER+htsIsARESuVMLVSNsIcDVsRATAYFTAPTGDspWKEVARcAoKKcNIhAY.oS....oGG..D..sKpEFLHLIDHL........................ 1 0 0 0 +3592 PF00897 Orbi_VP7 Orbivirus inner capsid protein VP7 Finn RD, Bateman A anon Pfam-B_1523 (release 2.1) Family In BTV, 260 trimers of VP7 are found in the core. The major proteins of the core are VP7 and VP3. VP7 forms an outer layer around VP3 [1]. 25.00 25.00 48.60 43.30 18.90 18.80 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -11.96 0.70 -5.51 7 191 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 65 22 0 159 0 303.00 60 99.57 CHANGED MDuIsARALoVlcuhso.pDsRsph-sss.-hhuIhlsRaNuhT.RsVohRPooptcRsphFahslDhsluAhslplu.l.PsYp.shtTlulLApsEIPYTspAhsclsRlouphsshtssRp.hh.a.ssptlhtPGpha.hsAups.pshsluushhplolssutpsplsshlhPspsDslMhhFlWpplppapsssGss.-susssplolsssphcAGs.lls..sG.AslslsssuspsuhlchpVhaYhshs.o.shYsshpsplhssYSY+s.pWauLRuhlLpphslPshhPPhhPss-tpplLsLhLlSpLtDsYsshpP.aslhus.sh.sphptul...shsAh+ ........................................................MDsIAARALoVh+ACsTLp-sRlslEusVhElLGIAINRYNGLTLRuVTMRPTS.sQRNEMFFMCLDMhLuAsslNlGsISPDYtQphATIGVLATPEIPaTsEAANEIARlTGETuTWGPuRQPaGaFLpstElhQsGRaa.hRAuQslTuslsusshhQVShNAGARGDlQt.lFQspNDPh.MIYhVWRRIcsFu.spGNSQpT.sGVTVsVG.GVsMRAGc.IlA.WDG.QAslpVpNPstpsuMlQIpVlaYlShDKTLsQYPuLsApIFNVYSa+s.TWHGLRsAILNRTTLPNhlPPIFPPsDR-slLsll.LLSsLADVYoVLcP-FslaGVsshsGslsRAl...stsAY...................... 0 0 0 0 +3593 PF02072 Orexin Prepro-orexin Mian N, Bateman A anon IPR001704 Family \N 22.60 22.60 22.80 25.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.90 0.71 -4.34 3 54 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 40 4 25 44 0 111.00 55 75.78 CHANGED MNsPsAKVsWAAVTLLLLLLL.PPAlLSLGuuAQPLPDCCRQKTCSCRLYELLHGAGNHAAGILTLGKRRPGPPGLQGRLQRLLQASGNHAAGILThG+Rt.ERPuTRhpsuhpChAustsoVoPsG+uuh .....................htslhLLLLLLL..shLho.ussAQsLP-CC.RQKTCSCRLY-LL.....H..G..........A.....G...............NH..........AAGILTLGKR+sGs..s.sLQuRLQRLLpu.SGNaAAGILTMG+Rs.t-su................................................. 0 3 5 9 +3594 PF03827 Orexin_rec2 Orexin receptor type 2 Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 28.20 27.00 21.90 18.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -8.88 0.72 -3.92 3 46 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 37 0 28 36 0 57.30 70 13.61 CHANGED LGVHHRQEDRLTRGRTSTESRKSLTTQISNFDNVSKLSEQVVLTSISTLPAANGAGPLQNW .........LGVH++Q-DRLsRGRTSTESRKSLTTQISNFDNlSKLSEpVVLTSISTLPAANGuGsLpsW....... 0 1 4 12 +3595 PF02999 Borrelia_orfD Mlp; Orf-D; Borrelia orf-D family Bateman A anon Pfam-B_1511 (release 6.4) Family Borrelia burgdorferi supercoiled plasmids encode multicopy tandem open reading frames called Orf-A, Orf-B, Orf-C and Orf-D. This family corresponds to Orf-D. The putative product of this gene has no known function. 21.70 21.70 21.90 24.00 21.50 21.60 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.29 0.72 -3.86 4 180 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 29 0 13 111 3 94.80 63 78.45 CHANGED SLP-cPshPhhpTLpsLup.EApLu-YVMYL.sFLs+TKsKVN....DspYPca.Y.D.SThKDEpoIps...lKaNIslahcYIcKTKPIsctVYpKYS+Lph ......sLPEEPcPPIIpTLKSLAKYEsQLS-YVMYLlTFLuKTKVKVN....DPNYPEYsYPDLSTLKDEHSITu...lK+NIslYLEYIcKTKPIAcKVYpKYSpLKh... 0 8 8 8 +3596 PF04160 Borrelia_orfX Orf-X; Orf-X protein Bateman A anon Pfam-B_3014 (release 7.3) Family This short protein has no known function and is found in Jaagsiekte sheep retrovirus. Jaagsiekte sheep retrovirus (JSRV) is the etiological agent of a contagious lung tumour of sheep known as sheep pulmonary adenomatosis. JSRV exhibits a simple genetic organisation, characteristic of the type D and type B retroviruses, with the canonical retroviral sequences gag, pro, pol and env encoding the structural proteins of the virion. An additional open reading frame (orf-x), of approximately 500 bp overlapping pol [1]. 25.00 25.00 96.40 96.30 19.10 19.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.04 0.71 -4.41 16 37 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 3 0 0 36 0 151.00 93 93.07 CHANGED MQPtNPMIYITKIVILYACNLKF.VKLHGKLLNLsLLVLNSLFSLNMVSTLEVYALITSGKQMLLTFLNLDVLNMFMFLLTLFPIFSWP.FTLENQHVTVFNIhCFsFLLQEShKPL+QIMDLsILAVLFNVFVFL.KFIIKQtFLIIHRDKVL MQPENPMIYITKIVILYACNLKFPVKLHGKLLNLALLVLNSLFSLNMVSTLEVYALITSGKQMLLTFLNLGVLNMFM.LLTLFPIFSWPPFTLENQHVTVFNICCFAFLLQESHKPLKQIMDLVILAVLFNVFVFLSKFIIKQEFLIIHRDKVL 0 0 0 0 +3597 PF04061 ORMDL ORMDL family Wood V, Finn RD, Bateman A anon Pfam-B_4871 (release 7.3); Family Evidence form [1] suggests that ORMDLs are involved in protein folding in the ER. Orm proteins have been identified as negative regulators of sphingolipid synthesis that form a conserved complex with serine palmitoyltransferase, the first and rate-limiting enzyme in sphingolipid production. This novel and conserved protein complex, has been termed the SPOTS complex (serine palmitoyltransferase, Orm1/2, Tsc3, and Sac1). 21.30 21.30 45.20 44.90 20.90 20.80 hmmbuild --amino -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.00 0.71 -4.61 37 393 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 252 0 253 347 2 131.30 48 74.56 CHANGED NhNssWlst+.........GsWlhalllIhll+lhh.hlP..shosshuWTLTNlsaslsoalhFHhlKGoPF-h...stGsYcpLThWEQID...................pGsQaTss+KFLhsVPIlLFLlooaY...o+Yc.hhFhlNh.sslhlsllPKLPhhH+lR ......................N.NspWhsu+..GhWhhallllhhL+llh.slP..hhSsshuWTLTNlha.hu...............pYlhhHhVKGTPF-t...spGthctLThWEQlD...................pGsQa.T.......soRKFLhllPIlLaLluoaY...T+.YD..h..s..hFhlNh.lullsVllPKLP.hHtlR................ 0 63 115 181 +3598 PF02784 Orn_Arg_deC_N Pyridoxal-dependent decarboxylase, pyridoxal binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain These pyridoxal-dependent decarboxylases acting on ornithine, lysine, arginine and related substrates This domain has a TIM barrel fold. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.64 0.70 -5.20 17 10956 2012-10-03 05:58:16 2003-04-07 12:59:11 11 31 6377 85 2329 7307 4527 211.00 26 55.03 CHANGED Dlu...lp+ttthhpthhsh......hpshYAVKsssshsllplLschG..s....GhDsuSpsElphVLu....hGsssc+IlassssKstuplchAhpp....tlshhshDshpELcpltchtsct....plhlRlpss.-spspshlus.....KFGhshpp...stthlctApphs.lplhGlphHlGSths-hpsahpAspcshplhctht.phGh...hchLDlGGGasss.p...........s.phpchupslpsulcchhs.......ptspllsEPGRhhsus .............................................................................................................................h.....t.t....................ha..sh.Kss.p....t.l...lp.hh......h.....p.........G..................................sh-.ssSh.............sEl.h.s.ht..............hs..h...........s.......p....p......l.hhss.s....h....K......s.tt.l.p...h....A.h.t...................th.h.h.h.sh-.s..t-...l....p....h.lt...p.hs..th.................................tl.hlR.lp...........s......................t..t..t...s....t................h......h...s...s.......s.......t.....s.....KF.Gh.sh......p.............p..........h........h.t.h.l.c..t..s.p.p.......h.............s..l..p..l.h.Glc..hH..l...G..S....p.h..s.....c..h..c..s....ah.pu.....h.....p...c....s....h.......plh.sph..t......ph.............G...h.....p.................lphlslGGGhu.l...s.Yt.........................................t..t.........t...h....h...t........h.......t...h...h...pt...h.h.t...................plh..hEsGRhlsu............................................................................................................ 0 749 1458 1952 +3599 PF00278 Orn_DAP_Arg_deC Pyridoxal-dependent decarboxylase, C-terminal sheet domain Finn RD, Griffiths-Jones SR anon Prosite Domain These pyridoxal-dependent decarboxylases act on ornithine, lysine, arginine and related substrates. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.45 0.71 -4.37 105 9303 2009-01-15 18:05:59 2003-04-07 12:59:11 17 26 4591 89 2414 7018 3338 153.60 20 33.75 CHANGED sLlspVhsh+p.st..........................................................................................tt.............hhh.lssGhhsp.hsshhhst....hhslhhht...............................................hpspstttsslsGsoC-usD.hl...sp.........chhLP.........plp.GDhlshhssGAYshshus.saNuhs..tsshlhl ..................................................................................................................................................................................................................hsplh.hcp..................................................................................................................p...................hhh.lDsuh.ts..t....hcsh.h....h.s.t........ha....l.h..s..lpt..................................................................hs.p..t..s....t..p...p..ssl.sG...hC-.osDhl..sp.............................................shtLP........................php.G.D.hL..s.hhssG..............AYsh.sh.us....saNshs..pss.h......................................... 1 788 1532 2038 +3600 PF02088 Ornatin Ornatin Mian N, Bateman A anon IPR002463 Family \N 25.00 25.00 40.40 40.20 18.40 17.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.35 0.72 -4.39 2 7 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 1 0 0 6 0 37.90 75 84.39 CHANGED l..Ct-h+E.GQPscKCRCsGKPCTVG+CshARGDssDKCh L.YCG-FRELGQPDKKCRCsGKPCTVG+CphARGDssDKCh 0 0 0 0 +3601 PF02250 Orthopox_35kD 35kD major secreted virus protein Bateman A, Mian N anon Pfam-B_3549 (release 5.2) Domain This family of orthopoxvirus secreted proteins (also known as T1 and A41) interact with members of both the CC and CXC superfamilies of chemokines. It has been suggested that these secreted proteins modulate leukocyte influx into virus-infected tissues [1]. 21.90 21.90 23.60 22.30 21.50 21.70 hmmbuild --amino -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.46 0.70 -5.05 5 131 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 30 7 0 113 0 211.00 44 89.18 CHANGED hCopcEDs+YMGIDllhKV.TKpspTsssDshCQulp-lpESsc-tDEuoEctuTSTspGDshu...................oTYaTlVGGGLSlsFGFTGCPplsSlSEascGsaVYVRLSScAPW.+sTsslShNRsEAL.ulLEKCELSIsIKCSNpchsE.........TThsouoLsPcIopcsT-p...............uDIIGSTLVDT+CVcSLDloVcLGDMC...K+oS-LSlKDuhKYsDGELl....-DsuDsaslsSssLpAC ....................................................Cptctpphahthpl.h+l.s+ps.h.sssphC..hhphpps.....................-............t..s.ps-s.......................sshhSl.sGGLphshuahpC....+olup.sstsTV.A+huSlsPh..pscsss.uhT..+.....--s....l....thlc-C.VsIpl+Cs.-cpcs...............hhppssht.p.scpKs.s......................pcllGSh..I..VDscCVpslchpV+ItDhC...KppS.hpl+DhFphssGp........................................... 0 0 0 0 +3602 PF00213 OSCP ATP synthase delta (OSCP) subunit Finn RD anon Prosite Family The ATP D subunit from E. coli is the same as the OSCP subunit which is this family. The ATP D subunit from metazoa are found in family Pfam:PF00401. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.82 0.71 -4.40 192 4976 2012-10-02 21:03:42 2003-04-07 12:59:11 13 14 4570 6 1207 3297 2540 165.70 25 84.70 CHANGED lAppYAcALaclA.p.-.......ps..t.....l-ph...tppLptl..ssltpsscltphlssPtlsspp....Ktpllpp.lhps........................tl..sp.....hstNFlplLs-ppRlshLspIhptapp.lhsctcshhpspVpoAhsLsspptpplpptL...ppthsp.plplphplDssllGGlllcl...GspllDuSlpscLpplpppL .............................................................................................................................................lup.YApAlaphA..h.c.........ps...p.......l-ph...tp...pLthl....thl..t..p...s..t.....p.l.tphLs.s....s....s..lsspp....+tc.hltt...lhsp..........................................th...st.....h.pN.h.l.p..lls.......-s.pRl.s...h.ls.p.l.........hppa.p.l...hs.p.p.p...sp......h....ps....pVt.S......A......hs..L.o.....c.....p.....p.hp.cl.tpt..l.............pc..p....h....u.....p.....p...l.p.l.p..splD..sl.lGGlllcs..........G...c.....p.....llDsSl+s+Lpplppt................................... 0 405 776 1019 +3603 PF02566 OsmC OsmC-like protein Mian N, Bateman A, Finn RD anon Pfam-B_2694 (release 7.0) Family Osmotically inducible protein C (OsmC) (Swiss:P23929) is a stress -induced protein found in E. Coli. This family also contains a organic hydroperoxide detoxification protein (Swiss:O68390) that has a novel pattern of oxidative stress regulation [1]. 22.80 22.80 23.10 23.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.55 0.72 -3.82 173 7485 2009-01-15 18:05:59 2003-04-07 12:59:11 14 22 3139 56 2170 5359 1255 100.10 21 63.21 CHANGED ussp.....ussPh-llhuuluuChshshthhsppp.th..pl..psl....plplp.......h...chs.t.................hpplp.....lplplthss......s.......pphpchlpts.pchCslt.psl.ptssplphp .........................................................s.tsp...ussPt-LlhuuhuuChshsl...thhhpct..th...sh.....ssh............plpsp............s............pts.ps..s.t.................hpplp......lphpl..p..hs..s.........hs.t........pph.pcll..........p....hu...c....phCslu.psl..psslslph.p.............................. 0 654 1315 1789 +3604 PF03207 OspD Borrelia outer surface protein D (OspD) Mifsud W anon Pfam-B_2915 (release 6.5) Family \N 25.00 25.00 66.10 66.00 23.70 23.70 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.91 0.70 -5.23 2 24 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 13 0 1 22 0 218.40 86 98.61 CHANGED MKKLIKILL.SLFLLLSISCs...........L.......DNEGsNS.sYESKKQSILuELNQLLtQTTNSLKEAKNTTDNLNASNEtNKVVEAVIssVNLISSAADQVKuAppNMHDLAQMAEIDLEKIKpSSDKsIhAuNlAKEAYsLTKAsEQNMQKLYKEQpc..co.S-SD.........hpsSsEIKQAKEAVEIAWKATVcAKDcLIDVENsVKEsLDKIKTETsNNTKLsDIcEsAELVLQIAKNstEIsQEVVAh MKKLIKILL.SLFLLLSISCs..................LDNEGsNS.sYESKKQSILuELNQLLtQTTNSLKEAKNTTDNLNASNEANKVVEAVIsAVNLISSAADQVKuATKNMHDLAQMAEIDLEKIKpSSDKAIhAuNVAKEAYsLTKAAEQNMQKLYKEQpc..co.S-SD..........psSsEIKQAKEAVEIAWKATVcAKDcLIDVENsVKEsLDKIKTETsNNTKLADIcEsAELVLQIAKNsKEIsQEVVAL... 0 1 1 1 +3605 PF02471 OspE OspEF; Borrelia outer surface protein E Mian N, Bateman A anon Pfam-B_962 (release 5.4) Family This is a family of outer surface proteins (Osp) from the Borrelia spirochete [1]. The family includes OspE, and OspEF-related proteins (Erp) [2]. These proteins are coded for on different circular plasmids in the Borrelia genome. 25.00 25.00 30.80 30.40 21.50 20.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.42 0.72 -4.17 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 28 0 4 77 0 108.10 74 60.70 CHANGED FSEFTVKIKN.KDNsuNWoDLGTLVVRKEEDGI-TGLNsG.......hGHoATFFSlEESEVNNFVKAMTcGGSFKTSLYYGYK-EQSss.NGIpNKEIhTKIEsINsSEaITFhGD FS-FTVKIKN...KD.NuuNWsDLGsLVVRKEEDG...I-TGLNsG........GHSATFFSLEESEVNNFlKAMTcGGSFKTSLYYGY+-...EQSst.NGIpNKEIITKIEpINsoEaITFLGD... 0 4 4 4 +3606 PF03968 OstA OstA-like protein Bateman A anon COG1934 Family This family of proteins are mostly uncharacterised. However the family does include E. coli OstA Swiss:P31554 that has been characterised as an organic solvent tolerance protein [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild --amino -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.59 0.71 -4.13 50 3517 2012-10-01 21:43:16 2003-04-07 12:59:11 9 18 2150 10 780 2490 1095 121.40 23 30.26 CHANGED pIcuDpt.phcp..pss...hshasGNVhlpQGshplpA-clhlhpstp............................tthpplpupG.....shhptphphsspthpupAcphpYp..stpchhhLps.sAhlp......psssslpGspIphshppph ..............................................................................................IpuD.ph.p.h..c......tss......ssaoGN...V.h..l..p..Q......G..s..h.p.l.p...ADclh..lppsps.........................................................................................................................pshp.plssts........shht..t.p.h...p....h....s.....s....c........h..c.G..pAsphp.Yc.....htp.chhhL...p...s....s....u.hlp..........................psspsl..p..G.sc.Ihaphcpt......................................................................................................... 0 220 460 635 +3607 PF04453 OstA_C Organic solvent tolerance protein Waterfield DI, Finn RD anon COG1452 Family Family involved in organic solvent tolerance in bacteria. The region contains several highly conserved, potentially catalytic, residues [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.29 0.70 -5.38 45 2046 2012-10-03 17:14:37 2003-04-07 12:59:11 9 9 1874 0 426 1515 1009 363.00 25 49.70 CHANGED ppRhh.hthpppttlss.sap........htl-hshlSDp.sYhpDhsss.............hshtspsp.L.p..............puphsY......ttt.h.hshp..sppapsl.............tttpsspP....aptlPplshshhtsphh.....shphshpuphspFpcpstth.................................pusRhphpPplshshppshGh.lssphplp..tsh..................Yphststs..t............shspshsRslPphsl-sshshtRshph..................tappoLEPchtYhh.....hP..p..........sQssl..P..........saDos.hphshtpLFpps+asGh.DRIssuNplohulosph...hcsssh........................phphslGQhaahpsppshh.................sp....ss.ppstSshhsphshphspthphsushpas.pspphppsssshpYps...p...sthlslsYpahpsphttt...........................hpplshsstaslsp............pW .......................................................................................................................pRhh.hhhpcst.shsp..sap.........hslDh.spV..S..D......sYhpDhssp........................hsssossh..hpp...........................phplsY..............ts..psh.s.hslp.........sppaQsh......................ptps..sps..........YpthPpl..shsh..htsshh...............shchph.uph.spFh..ps..p..t..s.s...........................................pusRhphpPslshPhs...sshG...lss.ph..p..lh........sst...........................Y.ptsp..s......t.............................phpcs.ssRshP.p.h.pl-s..ths....a.....-Rsh..ph....................sa...p...QTLEP+hpYlY.............sP..h+................sQs.sl...................saDos.....hp..............s.hssL.F....p...sp.pasGh..DRlssuNplohGloo.Rh....h-sss..........................................E+hshslGQ..haYhscppst.......................................................p.......sspps.s.pss.hs.s.phthph..........spphshpushp.....Y....cs.c...s....p..hspus.sshpYp......s.......pphlplsYpYtssp....hh..t.tt.......................................lpQlshsuta.lsspW.......................................................................................................................................... 0 113 246 338 +3608 PF00865 Osteopontin Osteopontin Bateman A anon Pfam-B_1593 (release 2.1) Family \N 25.00 25.00 28.50 28.50 24.40 24.30 hmmbuild --amino -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.47 0.70 -5.17 7 105 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 40 2 25 108 0 216.10 52 93.19 CHANGED K.AsSGSSEEK.......h.pKasDAVAThLpPDPSQKQshLAP..QNshSSEEsDDh.KQpTLPSpSNESp-phDD.DDDDDD.DHssSpD...Ss-S--sDpsDD.cpsDESHHSDESDE.VssaPT-.stspVhTPslPTs-ssD.GRGDSVAYGL.RSKS+pF+hSchQhP..DAT-EDlTSHhcScEhsss.KsI.VA.cLphPSD.DSptKsSpEoSQlD-pSVETcS+EQS+pac.+AsDpSs..................E+SssIDSQEs..SKsSpE.p...F+SHEDKLs.D.KS.E-D+HLKhRlSHEL-SuSSElN ............spSGSSEEK.......h.shass.luoWLpsDPSQKQshLAs...QNslSSEE.psD..cQpTLPSpSN.ESH.-chDD.DD-DDs......SpD...Ss.-.............sD-S+cSDESDEhVs.cFPT-.ststsFTPhlPThss.s.GRGDSlsYGL.RSKS+.phchs..s.Qhs..DuT-EDhTSchcS.tE.pts.csh.hsp.l.hsSs.cspt...ps...........S.....EsSQhD-.ShETpSpcps+.hp..c.spccSs....cpSs.I-SQEp..S+sSpE............hpStEc..s.D.+S.E--+aLKh+hSHEh-SuSSEhN....... 0 2 2 5 +3609 PF00185 OTCace Aspartate/ornithine carbamoyltransferase, Asp/Orn binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 29.40 29.40 29.70 29.70 28.10 29.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.01 0.71 -4.36 123 10559 2009-09-13 17:21:38 2003-04-07 12:59:11 19 39 4814 296 2543 7158 4618 158.00 28 43.70 CHANGED cGl.clshlG......D.t.splspShl...hshst.hGh.c.lplsuPps..h.s.......t...h...........t..t.hplp...p..shp.culps......sDllasspht........................pEc.............tphcthps.aplspchlp..t.sps.c...sllhHsLPh.......R.....s.........................EloscVh-.....s.sp.ShlacQAcNtlasphAllhhl ....................................slpluhlG........D..thspsupShh...........hshsp..h....G...h.....s...lpls.uPcs...h.s..............................t..thl................t...ht..pt..G.......spl.pls.......p.shc..-u.l.c.s......s.Dl...lhss.pht........................cEpt...........................pc.hpt.hps....at...lspc..h.hp....t..sp....s....s................s.lhhHsLPs............R....s.........................h.E.l.os-Vh-..........u.sp.ShlFc.QAcNtlasptAllhh.................................... 0 832 1612 2137 +3610 PF02729 OTCace_N Aspartate/ornithine carbamoyltransferase, carbamoyl-P binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.90 20.90 21.20 21.40 20.60 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.81 0.71 -4.53 105 10527 2009-01-15 18:05:59 2003-04-07 12:59:11 16 43 4811 296 2554 7124 4842 142.50 36 39.25 CHANGED +c.llsl.pD...hspc-lppllchAtph+c........thtpt................thLpG.+sluhlFhcsSTRTRhSF.EsAhppLG..upslhls...ssspl..........u+.........GEolpDTu+sluph.sD..slllR..........phppsslpph..Achs.....slPV...INuhs...stpHPsQuLsDlhTl...pcc .............................................................+chLslpD.hotpEl.ptllchAtphKp......................ttppt............................hLcG..Ksluh.lF.c.sS..TRTRsSF.E.sAhtcLG..ucshhls....sssopl................................u+...GEoltDTu+...lLuph..sD....sIhhR................p.p.p.t.t.s...cpl.....Ac..au..............slP.V.....lNuhs....s......ptHPTQsLhDlhTlpE......................... 0 833 1616 2149 +3611 PF02338 OTU OTU-like cysteine protease Mian N, Bateman A, Finn RD anon medline:20130692 Family This family is comprised of a group of predicted cysteine proteases, homologous to the Ovarian Tumour (OTU) gene in Drosophila. Members include proteins from eukaryotes, viruses and pathogenic bacterium. The conserved cysteine and histidine, and possibly the aspartate, represent the catalytic residues in this putative group of proteases. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -11.08 0.71 -3.52 51 2248 2012-10-10 12:56:15 2003-04-07 12:59:11 14 84 441 33 1395 2228 90 124.90 19 19.92 CHANGED .uDGs.Cha+uluptlh..................plRpts.....sphht......................ppppthpphltsp.................hh..h.t.t....sWus.......................................plpl.tuhuphh..phpIhl..............h.tsht.....hh.p.h.....tthpptlplha.t.....stHa ..........................................................................................uDG.sCLa+.Alup...tlh.............................tptth.hp...plRpts............sphlp..............................................................pptppa.p...hlpss................................................ahpphtpss.....pW.Gs................................................................................................pl-..l...huluchh.....phsIhl.....................................hp....ttst.......h....t.......................t..........hhh.............t..Ha........................................................................ 0 507 785 1100 +3612 PF00724 Oxidored_FMN oxidored_FMN; NADH:flavin oxidoreductase / NADH oxidase family Bateman A anon Pfam-B_642 (release 2.1) Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.92 0.70 -5.45 12 10595 2012-10-03 05:58:16 2003-04-07 12:59:11 15 107 3470 129 3408 9713 2738 312.00 28 71.36 CHANGED pLFpPlclGs.hpLppRllhsPh.ophtutccG.lsp..hhhcYastRuphssshlIsEushlsspus.shssssslas-cpltpa+cls-AlHcpGuhlhlQlaahGtps..schhtpts.........sstss..hsss...............+tlot--Icphlp.paspAA++Ahp.AGhDuVElHuApGYLls.pFLsPtpNcRoDcYG.GShENRsRFsLEll-tlpcslGp-t.....luhRlSs.shhtst.tshtEs...h.hhhhhschthchhsh.phAhl.chspPt.htshpspht...p.tss...phl+phhphPllssGp..hsssp.tshl.htc..scsslluhGR.hlusP-Lsp+lccGh..p .........................................................................................................LFpPhp..l....s........s....h.....pL.pN.R..llhuPh....s.........p.............h.......t..........u.........t...........t......s......s..................h...........s.........s......h.....hhta....Y..t..p.R....A..p..........s.s.GLl.......l..s.....t.u.s.t.l.s......s.......p.........u.......t.......s............h.....s.......t.........s....s.....s....l........h........s......s........p.......p........l..........t.....u....h.+...p...l....s.....c.....u............l..........H.............t...............p.........G.............u.........p..............h.........h....lQ.l......h.H...s.G..Rh..ut...........t...h....stt............................uss...............s..ht.t..................................s...........+th.o.p.c.-I......t...p...l..l.p.sFspAAppAh................c.A.G..............FD..............GVE.lHu..Ac.GY..L.lp...QFlos.hoN...p.Rs.D.p......Y.G....G...S.h.-N.....R..h....RhslEllc....A....l....p....p...s....h....u....s....c..h..........lshRl.....S...s..............h....c....h..............h..............p.......s.........s.......h......s.......h..p....-s...................ht.h.h..p...h.......h....t...t....h....t......h......p......h...h.....................................t...h.......u.........h..............h.....p..........h.......s...p.....................................t.....s..............s..h....t...........................h.........................t.t.l.....+...p......t......h.....p.....h..s...........l..l....s......s...Gt.....................h............s.....s...........p...........................A.................p................p.h........lt.p.....................G........t......s........D....h.lu..huRshls.sPchst+htps.......................................................................................................... 1 954 1981 2860 +3613 PF00174 Oxidored_molyb oxidored_molyb; Oxidoreductase molybdopterin binding domain Sonnhammer ELL, Bateman A anon Prosite Domain This domain is found in a variety of oxidoreductases. This domain binds to a molybdopterin cofactor. Xanthine dehydrogenases, that also bind molybdopterin, have essentially no similarity. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.09 0.71 -4.84 150 4576 2009-01-15 18:05:59 2003-04-07 12:59:11 14 45 2283 46 1578 3981 1512 159.10 27 44.65 CHANGED hspts.hP............tl.s....tsapLpl.....s..G.......h.Vcpshshohc-Lt.p..hPp..tphss.slpCsss....................Wuh...h..sspWsGV.LpclLctsss...............p......su.........................paVhhpu.hD....................................................s.....YspulPl.scshc......................csllAaphNGc.....sLs.pHGhPlRLllP.shhGh+ss.KWlpcIplhsp..t......tu...aWpp ......................................................................................................psaplpl.........p..G......................V.tp.....s.....h..s.....hshp-Lh...t...h.s......p..p..h..h..h....p.h.p...........Csts...................................Wuh.....ss.WpGssLpclLp.t..s..ts..........................................................p...ssA.....................+aVtFpu.h-...........................................................................s..Y..s.p.u.l.pl..s.cA.hc.........................ts..llAht.h..sG..c.......sL.ss.p...p.G..t..P..lR.lll.........P........t......h.....h..G........h+.s.....s.K..hlhpIplspp...s.......sha............................................ 0 442 944 1319 +3614 PF00148 Oxidored_nitro oxidored_nitro; Nitrogenase component 1 type Oxidoreductase Sonnhammer ELL anon Prosite Domain \N 28.60 28.60 28.70 28.60 28.50 28.50 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.07 0.70 -5.83 116 3989 2012-10-03 15:23:08 2003-04-07 12:59:11 14 13 1338 128 1280 3778 993 356.20 21 84.43 CHANGED CshhGAh.hshhslcsshsllHGstGC...ssastshhspphc.............c.h............hhoTshsEpsslaG.GpcpLtculcplhppapP..chIslhoTChsphIG-Dlpulscph.....ppph......................llsssssua.sssp.pGactuhpullcpl.....................................................................................p.tpcst...............plNll.sshtls.....uDhpplccll.cthGl.........................chsshhsussslc-lpphspAphslshspp.htt....suchlcccaGlPhhp...ssshGlpsTsphlppluchhG................tt..h.sptltc....cc.sphhcthhc.h+....thlt...G.++.........sslhus..s.shshulsphlt.E.hGhcsshssstssstt.h..t........................................................ph.t.htt............lh.s..ch.h-lcph.lpphp..sDll.....lGssp.sphhucch...........h.hGhPhhsthsht.ptshhGYpGsh....plhcclssslh .........................................................sthhGuh.hshtslcs.shhlhHGs.GC...........st.a.h.h.sh.ht.thht...................p.......................hh.sosh..pEpsl..l..h.G...up...ccL...h....csltphh...p.......h.....h.......t..P...............ph.lhlhsoCsstlIG-Dlpuhs.cph.....ptchsh...................sllssps...s.ua..sssps.tG.hchshp.ulh..chhhs.........................................................................................................................t.t..tp.t....................slsl...l.Gshshs.........u-h..hp...l+...tl..L...cph...Gl..........................................cl.shh.ss.s.s.s.hp-lt.ph......sp.Aphsl.......hhs...............t....ht...............huphhpc..c.h.uh.Phht...............................hG.h..pthtthlptl.sp.hhs.................................................................h....t.....ltt........tp....tt...h.......h...t...t....h.p....hp..........hht.........G..+p................hh.l....hss..s.....h.h....t...........hsphh...-....hGhps.hh.h.sh....h.t..ptt...........t........................................................th.t...htt......................lh.t...s..........p....h..phh....t.p........ht.............s-...lh.............h..us.h..tt..hh.t.ph.....................uh..P.hh...........h.hhGatGh..hhp.h.....h.................................................................................................................................... 0 432 869 1085 +3615 PF00361 Oxidored_q1 oxidored_q1; NADH-Ubiquinone/plastoquinone (complex I), various chains Finn RD anon Pfam-B_4 (release 1.0) Family This family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.78 0.70 -5.13 33 91875 2012-10-02 00:39:38 2003-04-07 12:59:11 15 57 35037 6 4521 86901 9765 225.60 28 56.30 CHANGED ussllhhaluaE..hhslssal.Llshattp.cuhpAuhphhlhstluShhLLhuhh..hlahhs.uohsa.tlhp...............s....hhhhhhlhluhhhKhuhhPhHhW..........LPcuhtusssssullhuuhhlhsulallhRhh.lh.......hh.hlhhluslohlluuhsulsQsDlK+llAYSoluphGa.hhshluh...s.h..tsuhhhhlsH.uhhpusLFhhs.shhhpp.t.........spslhhhs.ulhthhPhhthhhhlshhuhs..GlPsh....sGFhuKhhlltsh ....................................................................................upphh..h.ahh.hE.....sh......hh.......l....................h.................p............p........................p.........s.........h..............p..........A.....s..........h....p.hh.l.h..p.t.h.u.s...h.h....l..L..h.u...h.....h.........h......h................s......G......p.......h.....p..h..t..p..h.t.....................................................................................h........h...h.....h...h....h...hu.....h..hh...K.u.h..Ph..H.h..W..............hP.c.sh.p..u...........s......s.....h....s....u..h..........l..h...u.s.h.....h...h.s.s.h........h.l...l...h....ph...............h...............................................................h...l....h....h...l.u.lh....o.hh.h.u.u.h..hu..L......s......Q.....s......p.....l..+.....+ll.....A....aS....olu....p.h...G..h.........hh....hh.lhh......................ss.................t.....h..u....h.....h....p.h....h.s.....a....uh.h.p...u.h.l.Fhhh....s......h.p.p.hp................................spsh.h.hh....t......s.........h..h...p...h...h...P....h..h..s...s....h..h........h..l..shL.uls........G..l.P..P.h.............s.s.FhsK.hIlp..t........................................................................ 1 1460 2936 3788 +3616 PF00662 Oxidored_q1_N oxidored_q1_N; NADH-Ubiquinone oxidoreductase (complex I), chain 5 N-terminus Bateman A anon Pfam-B_22 (release 2.1) Family This sub-family represents an amino terminal extension of Pfam:PF00361. Only NADH-Ubiquinone chain 5 and eubacterial chain L are in this family. This sub-family is part of complex I which catalyses the transfer of two electrons from NADH to ubiquinone in a reaction that is associated with proton translocation across the membrane. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.03 0.72 -4.41 32 19906 2009-01-15 18:05:59 2003-04-07 12:59:11 15 26 14684 2 1246 18780 2012 60.60 42 9.78 CHANGED hlphhpWhssps.hplshuFhhD.hohhhlslshhVohhlhhaSh.YMp.pDPphs.......RFFpYLth ..................................hhWsWh.h.s.....ss....F...slp..hu..a...hl..D..s..LoslhhhllohVu......hhVll.YS..ss..YMu...c..D....p...s.h.....................RFFsYhsh............................... 0 404 818 1051 +3617 PF00420 Oxidored_q2 oxidored_q2; NADH-ubiquinone/plastoquinone oxidoreductase chain 4L Finn RD anon Pfam-B_193 (release 1.0) Family \N 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.15 0.72 -4.39 194 10049 2009-01-15 18:05:59 2003-04-07 12:59:11 19 10 7521 2 1437 7095 1962 96.00 27 90.05 CHANGED hhhhshhhhFhlulhGlhh.RppllpsLlsLEhhhLulhlhhshhuh.h...........................................pshhsphhslhlLshuAsEuulGLALLVthhRsaGo-plpslslLps ................................................h.hhhhhshhFhlGl..hG..l..l.....h...Rp.....pllphLl..sLEhMhhulhlhhs.hhuh.h.........................................................................................................................................................tss..h..s...phhsl.hhl.shuA.sEuulGLAlllth.h..RspGosplpslshhp.............................. 0 454 913 1189 +3618 PF00499 Oxidored_q3 oxidored_q3; NADH-ubiquinone/plastoquinone oxidoreductase chain 6 Finn RD anon Pfam-B_61 (release 1.0) Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.24 0.71 -4.41 100 8649 2009-09-11 06:35:13 2003-04-07 12:59:11 15 6 7126 2 830 7391 1836 152.30 22 84.54 CHANGED lhsslhhlh..s.psPlhhslhllhhslhhuhh..hhhhs...ssahuhlhhllYlGGlhllFlahssls......ssphhth.......................................................................thhhhhhhh.......hhhhhhhhhhhht.h.hths.ht..th..h...............................................hhssthh.hhh....hhslhLllslls..sltls ........................h..sul.hss.s.......s..ssPlaus.....LhLll.shhsssul..hhh.hG...usFlul.llhl.lYlGuhhVlFlasshhs............spp.hs.ct.h.t.p.h.........................................................................................................................hhhh.h..h.s.hhh.............hhh..h..h..h.....h....h......h....h......h.......h....t.......t.....h.........h....h.....s.....h.....s....s.....h...t...t..h..s..h..t..ht...............................................................................................................................hhth.la.o........h.sh..hhh......lsuh..hLLlsllssl.l..................................................... 0 275 538 685 +3619 PF00507 Oxidored_q4 oxidored_q4; NADH-ubiquinone/plastoquinone oxidoreductase, chain 3 Finn RD anon Pfam-B_68 (release 1.0) Family \N 21.60 21.60 22.10 21.60 21.30 21.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.52 0.72 -4.20 99 11473 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 9331 2 859 9513 1923 105.50 45 89.88 CHANGED llshllhhl..............uhh.......ls......cph..t...-K.osaE.CGFcPh.spuR...hsFol+FaLlullFllFDlElshlhPhshhh.....tt.hhhh..hhhhh...hFlhlLhlG.lhYEWppGsLcW ...................................hh.luhlLhhl........saa..........Ls...p.....h.ss...ss.......EKhSPYE.CGF.D.Ph...Go..AR.............LP.FSlRFFLVA.ILF..LlFD..L.EIALL..LPhs.huh..........h.p.s....s...h.hs..........hhhs.h..hh.l.l.l.L.s.lGLlYEWt.pGuLEW............................... 2 284 563 713 +3620 PF01059 Oxidored_q5_N oxidored_q5_N; NADH-ubiquinone oxidoreductase chain 4, amino terminus Finn RD, Bateman A anon Pfam-B_381 (release 3.0) Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.32 0.72 -10.67 0.72 -4.18 143 6646 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 4701 0 249 6505 763 104.90 37 24.78 CHANGED MLKlllssl.hLlPhshh.t...hha...hhhshllhhlShh.h..hp.shthhhs.hhshh.huhD.lSssLllLShWLhPLMlhASppthpp.shtpp+hFlhhlhhLplhLlhT .....................................................................MLKlllP.Tl..ML.hP..h.s.ah..s...p...hlW....s..shh.h.SllI.uhhSL....h......a............lp.................s..p................s............h...........t.....s..........hs.h.........h.husDsLS.sPLLlLTsWLLPLMIl.A....SQ.sH...lpp.EshsRp+..halohLlhLQ.hhLIhs.................... 0 77 152 195 +3621 PF01058 Oxidored_q6 oxidored_q6; NADH ubiquinone oxidoreductase, 20 Kd subunit Finn RD, Bateman A anon Pfam-B_1345 (release 3.0) Family \N 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.22 0.71 -4.54 169 7525 2009-01-15 18:05:59 2003-04-07 12:59:11 17 28 3465 80 2007 4724 2053 123.50 33 48.57 CHANGED CsGC...shul.tshtsshh-lh.t..hp.........htat..slhssst..........................ptDlllV-Gul..ts......................pstlchlhcht.tcschllAlGsCAshG.Gl.shts.......sh...........................................slhp.........lsV...lplPGCPPpPctlhtslhtl ..................................................sC.slph..hssts.s...hhDh............................c.aG.h.hh.h.u.ust...........................................puDlhlVsGslspch.......................................................................................tPs.l.c.+..l.a-p.h.s.-P.+h.V..IuhGu.CAssG.Ghathts........sh............................................................................h..........lpGl-.ch...............lPVD..lalPGC...PPpP-shltulh..h................................... 0 667 1322 1707 +3622 PF01237 Oxysterol_BP Oxysterol-binding protein Finn RD, Bateman A anon Prosite Family \N 25.40 25.40 25.40 25.50 25.30 25.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.26 0.70 -5.96 149 2468 2009-01-15 18:05:59 2003-04-07 12:59:11 13 56 312 10 1498 2357 5 330.10 27 52.43 CHANGED ulhuhl+p.....slGpDLS+.lohPlhhsEPhShLQ+hsE..hEY.scLLspA...sp.................psDshpRhhhVss.aslSshssp..hpR.t.......KPFNPlLGETF....E.hsc..t.....shchluEQ.........................VSHHPPloAhascs..p.s...............aphtupstscsKFhGp.Shplp........hGtsh...lplpp........................................................................................t.........sEpYshsp..ssspl+sIlhGphalE.hGchhIpsp.........p..oG.pshlcFps.p.....Ga...huup...pp.........lpGt..lhc..p..............sspsha..plpGpWs.cplhhppsp...........................................................................ttpt.....phlWcssshss..p..pha.h..s.hs.hsL..Nt.....c..........plssTDS..RhRPDp+AhEpGch-tAspEKp+lEccQRptR+cc..............pppsp..pap.P+aF..pcs......................hpshstp................a.hpss......YWch .......................................................................hhsll+p......plG.h.D.L..o..+...ls...hPs...h.EPhS..hLp+.h.s-.....h.p.a..s...c..L.l.s.p.A...s.p.............................................................p.ps.shcRhh.h........Vsta...hl..Su.atpp.......htpst...............................KPaNPlL...GETa..........................................-.h.p.....p............sh.ph.....l..uE.Q.........................V.SHH.....P.P.........loA..h.a..sps......ps...................hph..s.p...ht...h+.sK..F.........h.........G.p..S..lpl..h................hG..p..hp.......lph.p.....................................................................................................................................t......sEpY.hh.sh..ss..stl+..sI...lhG..p..ha..lEh.hG.ph...pIpsp.............................p.....ou..h..psplpF..pt..p.................................sa....h..u.s..ph.....pp................lpGh...lhs..t....................................stcs.h.h....plt.G.pWs.pp.h.h.httss..................................................................................................................................pt......phlacss....h.s........t..t..ph.h................L............c.........................................ssp-S....hh+...s.s.p........c..t......l.c.p...u.c.h.-t..AsppKp.clE-tQRtt...p+pc.........................................................p.p.p..t............tap.......s+......aF.....ppt.................................................................................t.............................................................................................................................. 0 500 792 1157 +3623 PF00543 P-II Nitrogen regulatory protein P-II Bateman A anon SCOP Domain P-II modulates the activity of glutamine synthetase. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.23 0.72 -3.54 93 4823 2012-10-01 21:59:08 2003-04-07 12:59:11 17 7 2688 149 1478 2996 1680 101.60 53 82.59 CHANGED IpAII+..P.KL--V+cALsphG.ltGhTVo-VpGhG+QKG..sElaRGs....................cahss.hlPKl+l-llV.sDctl-pll-sIhcsApTGc...lGDGKIFVtsl-..cslRI.RTG ....................................IpAIIKPFKL--V+EALscl.G.....lp........G.hT.V.o.E.VK.GFGRQK.G.H...TE.l...Y.RGA.................................EY.hV-..FLPKVK.lEl.........lV.s.D.-...l.-.p.ll-sIh...cs.Ap...T...GK........IGDGKIFVhslp..cl.lRI.RTG......................... 0 486 973 1256 +3624 PF04275 P-mevalo_kinase Phosphomevalonate kinase TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Phosphomevalonate kinase (EC:2.7.4.2) catalyses the phosphorylation of 5-phosphomevalonate into 5-diphosphomevalonate, an essential step in isoprenoid biosynthesis via the mevalonate pathway [1]. This family represents the animal type of the enzyme. The other is the ERG8 type, found in plants and fungi, and some bacteria (see Pfam:PF00288). 23.90 23.90 24.90 24.20 22.70 23.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.53 0.71 -4.37 11 154 2009-09-10 15:39:43 2003-04-07 12:59:11 9 8 123 1 104 164 4 113.90 38 29.84 CHANGED hSGKRKSGKDalo-+LppRLsts+sp..IlRISpPlKpcaA+chsLDhpcLLusGsYKEpYR+DMIpWuEpcRpcD.GaFCRtAhpps.......spslhIVSDsRRpoDlcaF+EsYG...hshsl ....................hoGKRKuGKDals-h.ltp.pL...st..s.....hst...........ll+lSsPlKcpYApppGL...DhpcLLssusYKEpaRt-MIpWu.EcpRppD....s....Ga....FCR...t...shcts...........spslhllSDsRRhsDlpaFpptas.......sthh...................................... 0 32 52 78 +3625 PF04699 P16-Arc p16_Arc; ARP2/3 complex 16 kDa subunit (p16-Arc) Waterfield DI, Mifsud W, Finn RD anon Pfam-B_4180 (release 7.5) Domain The Arp2/3 protein complex has been implicated in the control of actin polymerisation. The human complex consists of seven subunits which include the actin related proteins Arp2 and Arp3, and five others referred to as p41-Arc, p34-Arc, p21-Arc, p20-Arc, and p16-Arc. The precise function of p16-Arc is currently unknown. Its structure consists of a single domain containing a bundle of seven alpha helices [1,2]. 21.90 21.90 22.90 22.70 19.60 21.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.89 0.71 -4.20 36 389 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 277 15 237 330 3 143.80 37 92.13 CHANGED psaR+lDIDsh-.-sh.c.stps............spsphtsptsplRshLpuGchttALphsLpssPhsucs.psK-tthpsVl-VLsuhKs....s-.IsshlcsL.....sppph.DsLMKYlYKGhusPs...............................stsp................hulLLsWHEKllpluGlGsIVRshoDR+TV ..........................................................................s.taRplDlDth-.pp..p.p....t..............st....sph......tsptspl.cshLp......p.....G.....ch.htALpssLc.s..s.Plssc....s....ssK-tshthVlcVLt..uhKs....s-...lp.psl.puL......spsth.DlLMKYlYKGhptss.......................................................psss...................................ulLLpWHEKhhthuGlGsIlRVlTsR+pV......................... 0 69 116 181 +3626 PF00864 P2X_receptor ATP P2X receptor Bateman A anon Pfam-B_1590 (release 2.1) Family \N 19.60 19.60 24.30 19.70 19.40 19.20 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.38 0.70 -5.75 19 762 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 78 7 284 674 19 263.70 36 83.68 CHANGED F-YcTsK.Vll+shplGll.RllQLhllsYllGWVFlacKGYQppDssl.SSVhTKlKGluhsN.s........hht+lWDVADYVlPsQGsssFFVhTNhllT.sQpQGpC...........PE...lP-......stCspDss.CptG.sshtusGltTG+Cls....hss..sh+TCEIhuWCP..sEs-pt.Pss...shLtpAENFTlaIKNsIpFPpFshoKpNllsshssoalKoCpYctp..psPaCPIF+LGsllccuGpsFpclAhpGGllGIpIpWsCDLDhshppCpP+..YSFpRLDsp......pslSs.GYNFRa................................A+YYpc.sGsEhRTLhKuYGIRFDllVsGpAGKFslIPThlslGSGluhhGluollCDllLLahh..................+ppcaY+pKKFEplpcsp.phstpp......pt ..........................................................................................................................................pYpT.+.hhhpshphGhh.hhhphhlh.Yh.h.................a.shl.pKtYQt.-.......SoVhoKlKG.hs.hp....................................plhDss.-Ysh.P................p.........t..ssh.lh..Tph.hT.tQ..thC..............sE...................t......h.C....pDtt...C......G.............................u......p.G..h........TGpCl..........t.....tTCEl.uWCP.h.E.tt...........p...........hh.tA.sFTlhlKN.......pl.aPh..ap..hp.......p.......tN.....l.h........th....s.....t....Chap.....p....C....PlFclG.lhp.s......G.ts...F.........pp.........hA....p..........G.......GhhGl.ItWsCsLDh....p.C.Pp..YpFp.tL-..............h.....GaNF.R.a................................A+aah..........s......s..phRsLhKsaGIRhDlhV.Gp..uGKFshl.hhh.hssshs.h.Ghs............shhhDhlh..h......................t...Y.ttKhp.h............h................................... 1 76 97 158 +3627 PF04045 P34-Arc p34-Arc; Arp2/3 complex, 34 kD subunit p34-Arc Wood V, Finn RD anon Pfam-B_9846 (release 7.3); Family Arp2/3 protein complex has been implicated in the control of actin polymerisation in cells. The human complex consists of seven subunits which include the actin related Arp2 and Arp3, and five others referred to as p41-Arc, p34-Arc, p21-Arc, p20-Arc, and p16-Arc [1]. This family represents the p34-Arc subunit. 19.70 19.70 19.70 20.20 19.10 19.20 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.77 0.70 -5.09 27 380 2009-09-11 15:40:12 2003-04-07 12:59:11 9 9 288 15 254 369 3 223.70 41 65.50 CHANGED hlSlplKsap-LhptG......s.chLppcYushh........sssEsGYshoLhlDLp..phs.....ptpppllpcluhLKRsshAuPFcpsFpt.tphsp...................spphhsI+YRs..DEolYlcsptDRVTVIFoTlFpDEsDplhGKVFLQEFV-AR+ps...QoAPQVLaSH.-PPLElpshsssp......ss-shGYlTFVLFPRHhsptcpppsoIspIphFRsYhHYHIKCSKAYhHoRMRtRVs-FLKVLNRAKPEsts ..........................................................................lSltl+happL.paG......ApchLpctYGshl..........sssEsGYsholhlDLp..plP...............psp...........ppllpphuhLKRNshAusFEchFph.tphpp...................stphhsIpYRc..-EshYlc....s........ptDR.VTVlFSTlF+D-sDhlhGKVFhQE..Fh-..uRRts.........psAPQVLaSp.....-PPLELps.ssst.........sssshGYlTFVLFPR.H.spsptptpoIshIphFRsYhHYHIKCSK....AYhHoRMRt+sssFLpVLNRA+P-s.p.......................................... 0 79 133 205 +3628 PF00067 p450 Cytochrome P450 Eddy SR anon Overington and HMM_iterative_training Domain Cytochrome P450s are haem-thiolate proteins [6] involved in the oxidative degradation of various compounds. They are particularly well known for their role in the degradation of environmental toxins and mutagens. They can be divided into 4 classes, according to the method by which electrons from NAD(P)H are delivered to the catalytic site. Sequence conservation is relatively low within the family - there are only 3 absolutely conserved residues - but their general topography and structural fold are highly conserved. The conserved core is composed of a coil termed the 'meander', a four-helix bundle, helices J and K, and two sets of beta-sheets. These constitute the haem-binding loop (with an absolutely conserved cysteine that serves as the 5th ligand for the haem iron), the proton-transfer groove and the absolutely conserved EXXR motif in helix K. While prokaryotic P450s are soluble proteins, most eukaryotic P450s are associated with microsomal membranes. their general enzymatic function is to catalyse regiospecific and stereospecific oxidation of non-activated hydrocarbons at physiological temperatures [6]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 463 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.40 0.70 -5.97 50 39592 2009-11-03 19:16:01 2003-04-07 12:59:11 17 282 2977 873 20665 39656 2722 330.60 17 79.49 CHANGED Psss..shPlhGshhplth....pphhtp.hpphpccY..GslaslhhGs.pshVlltshchl+clLhcps.thssc.t.shhtp...hhpspGlhhsss..scW+phR+hhhsshpsh...ph.shpphlpcpuppLlcplccpssps......hDhtphlsphshssIsslhFupphs.hp-pp...........h.phhphhpphhp..hhts.hhphh.hhs..hlhhhsspht+hhppshphhpshhpphlcc+cpslsss.................p.hDhlchhLht........tppst....phopcsltsslhslhhAGs-TTSooLpasLh.hLhcaP-lQc+lpcElcpll.Gptc.........tsshpDhsphPYlcAsl+EsLRhhslsP...tlsRtspp...Dsplps...ahIPKGTpVhlslhult+Dspha.ssP-pFcPpRFLscps................h.tpshtalPFusGhRsClGctlAchEhhlhLsplLppFplc..s.ss.hsh.pt.....llhhs.shplph ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................h................................+...t......h..................h............................................................h...............................................h........h.....t...........h.....................................................................................h.............h............................t.............h...................h...........h.....h.s..........................................................................................................h.......................................................................................................................................................................................................................................................................................................................h..............................................t........................h...........t.....h........h.....................t......h.....h......t.........t....t..........t.t..t.t........................................................................s...h....h....p......h...ht.....................................................h...s........p...p....l........h......t.................h............h......................h......h.........h......u.................G.........................-.....T..................o....s.........s.......s....h................t....h..h....h............h......l.....h......p....p....P...............p....h..........................p....p.........h.................p.....E........h.............t....h.....h....t...t......................................t.....t...t.......h..............p......h.............h.........h.......p...............t.............s.l...........p..............E........s.............h............R........h..........h..............s....s............s..........s.....................h..........h.........................R..h..s...h.p........................s.....h...........p.................l.........t...............s.....................hh.l........s.............t.................G..................s.................h...........l.........h..............h..........s......h...............h...................s............h..........p................+.............s............................p................h..........a....................p...............P...............p........p.............F...p......P..p......R...a......t...tt..................................................................t....h.....h.......s.........F....u.........h...............G............................+........................Ch...........G.......t............t................h...A..........h........hp.....h..........h.......h......h...h....s....t...l...l...t....p...a...p.h................................................................h................................................................................................................................ 0 5443 11134 16825 +3629 PF00870 P53 P53 DNA-binding domain Bateman A anon Pfam-B_782 (release 3.0) Domain This family contains one anomalous member, viz: Zea mays (Q6JAD8). This sequence is identical to human P53 and would appear to be a a human contaminant within the Zea mays sampling effort. 20.30 20.30 22.60 22.60 18.80 20.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.31 0.71 -4.71 7 550 2012-10-03 00:25:27 2003-04-07 12:59:11 13 15 172 206 160 635 0 158.10 54 43.93 CHANGED soVPossDYPGsasFcLpF.Q.SuTAKSVTsTYSPpLNKLaCQLAKTCPlplhVspsPP.GshlRAhAlYKKsEHVs-VV+RCPHHppss-.s-t.hAPsuHLlRVEGN.hupYhEDs.ThRpSVhVPYEsPQlGoEhTTlLYNaMCNSSCMGGMNRRPILTIITLEs.-GplLGRRuFEVRlCACPGRDRKTEEcsh .................................................lPs.psY.G.atFpl.......tF...p....os..s.s.K....SsshT................YSs....LpKLasp..lAKT.CPl..plhl.....s....s...P..P....G...s...hlRAMslYK+spHhsEVV+RCPpHchs..p.-.....s.-s....hs.s.....s..pH.....LI.RVE...G..s.....tu.pY....h....-....D............T....h..RpSVlVPY....E...sP.p.....l...G.o..-......h.....T..T.lhY....saMCN.SSC..h...G..GMNRRPILhIlTLE.....s.........t...s...G.plL....GRpshEsRlCACPGRDR+s-Ecp.h...................... 0 40 49 92 +3631 PF04636 PA26 PA26 p53-induced protein (sestrin) Mifsud W anon Pfam-B_5416 (release 7.5) Family PA26 is a p53-inducible protein. Its function is unknown. It has similarity to Pfam:PF04636 in its N-terminus. 29.40 29.40 36.00 33.00 28.20 28.30 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.58 0.70 -5.79 13 327 2012-10-01 19:19:04 2003-04-07 12:59:11 8 4 116 0 185 285 7 362.70 43 81.45 CHANGED tpGPSsFIPsp-llphsstscpppthhp-sh...GRh..........DplopVhGhHPpYLcpFh+optalhphDGPLPhsaRHYIAIMAAARHQCSYLVshHpp-FLpsGGDspWLpGl-asP.KLRpLs-lNKlLAHRPWLIsKEHIptLlK....s.GpssWSLuELlHAlVLlsHhHuLuSFVauCGlp.-hD.h....tstshps.........ss..spsps..............ph.sssshsssptstus....................................t-VEtLM-RMKcLpcp..pc-EuSpEEMsT............RFE+p+ppohhVh.........suscptcssssssluRal-DssFGYpDFuRRGppslP.TFRsQ.....DYoW.....................................EDHGYSLlNRLYs-l.....GpLLD-KFpsshsLTYpT...............hAs+ssVDToth.......RRAlWNYlpClaGIRaDDYDYGEVNQLL-RSLKsYIKTssCaPE+sTpc.YssahhpF+HSEKVHVNLLLhEARhQApLLYALRAIsRYMT ........................................s..............................p.hh.cth..t.G...Rl..........Dpls.VMsh.HPpYLpsFh+ophhlL..ph..D...G.P.Ls..h.aRHYIuIMA.AARHpCsYLlshahs...cFLp...s.......G.....Gs.....s........p........WL.p.G.L......c..sP.tKLppLs-lNKlL..AHRPWLlo.+..-.HIptLl+........................s..tcpsWSLuELl.pAlVLLsHhHuLu.SFsFGsG.lpsE.hc.....thu..s.hp..........................ss.sp.p..s....................................sss.st..p...t.....s..........................................................-lEsLMc+M.+..p..Lp...cp....p..p-ps..o.......pEE.hts..............................RF.Ehp+ppohhsh............................ss...c.....sl..pah-Ds.sasYp.DFs+..............+G..p....P..TFRsQ......DYsW.....................................E-HGaSLl.s..RLYs.-..h.....GpLLDEK....F....phshs....LTYsT...............hu..h...+ps..VDTo.hh.......RRAlW...NYlpChaGI.RaDDY....DY.G.ElNQLL-RshKlaIKTlsChP.E...+s..Tcc.Ysth..h..cp...FcaS....EKV.HVNLLlhEARhQAtLLYALRAIs+ah.................................................................................................................................................... 0 56 73 122 +3632 PF02251 PA28_alpha Proteasome activator pa28 alpha subunit Bateman A, Mian N anon Pfam-B_2837 (release 5.2) Family PA28 activator complex (also known as 11s regulator of 20S proteasome) is a ring shaped hexameric structure of alternating alpha and beta subunits. This family represents the alpha subunit. The activator complex binds to the 20S proteasome ana simulates peptidase activity in and ATP-independent manner. 21.10 21.10 21.20 22.70 20.60 19.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.11 0.72 -3.80 18 310 2009-09-11 04:56:17 2003-04-07 12:59:11 13 5 109 7 133 251 0 61.10 41 23.66 CHANGED lss-spt.KV-sF....+ppLppEAEpLlusahPpKIhcLDsL.LKsstlNlpDLosl+us..LsIPIPDPs .................p.-spt.cV-sF....RppLhpcAEpLlssaFPpKIhcL-sh.L.....+-stLNlp-Losl+u..s..LsIPlPDP........................ 0 28 37 74 +3633 PF02252 PA28_beta Proteasome activator pa28 beta subunit Bateman A, Mian N anon Pfam-B_2809 (release 5.2) Family PA28 activator complex (also known as 11s regulator of 20S proteasome) is a ring shaped hexameric structure of alternating alpha and beta subunits. This family represents the beta subunit. The activator complex binds to the 20S proteasome ana simulates peptidase activity in and ATP-independent manner. 20.90 20.90 21.30 20.90 20.20 20.60 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.69 0.71 -4.74 16 411 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 150 7 201 352 1 137.60 43 56.79 CHANGED ssG..hlssNcplhpllcplKPElppLhEphshlphWIQL.IP+IEDGNNFGVuIQEcslpclssVcocstuFhspISKYaspRGchVuKsuKhPHVtDYRphV+ElDE+pahpLRlhlh-lRNpYAhLaDlIhKNh-KIp+PRus..sptthY ............................................sG.l.sNcplh.sllp.hlKPElppLhEphsh................VphWlQhhIP.+..I..EDGNNFGVulQ.........Ec.s...lpclpslcocstua...hspI..S+Ya.pRuchVuKsuK.P.H.....V.....tDYRphVpEhDEtpYhplR...............lhlh-lR.NhYs...LaDlI....KN...h...EKlppP+up..pt..hY................................................................................. 0 70 89 138 +3634 PF05138 PaaA_PaaC Phenylacetic acid catabolic protein Bateman A anon COG3396 Family This family includes proteins such as PaaA and PaaC that are part of a catabolic pathway of phenylacetic acid [1]. These proteins may form part of a dioxygenase complex. 27.60 27.60 27.80 28.70 27.40 27.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.49 0.70 -5.43 117 1413 2012-10-01 21:25:29 2003-04-07 12:59:11 7 4 637 42 464 1179 369 266.20 33 88.22 CHANGED M.................................................thppsLhchlhphADssllhupRhuEW...hu+APsLEc-lALsNhu.D.lGpuphhashAtp.......................L.....s..G..........pscDcLAahRpstca..+NhhLhEhPss...........caApohsRpaLhDuhthhhhpsL.ppSo.sslAtlAsKssKEtsYHh+put-hlhpLu.cGTcES+p+hQsAlsphW.ash.-hFsss-s-.................pshtttGhssssspLRppahsplsshlp.pssLslP...c.....st.hphs.G+....pG.pHo-thGhl................LschQhhpRuaPsu.sW ...................................................................................ttttp...................thcpsLhc.lhphucopllhhp.tu.p.W...hs+APsLccchsLhshs.DphGH....uh.hL.aohA.tp.......................L..........G...............tsc.Dc.lh.th...p..st.ch...+.......sl...hph..Psh.............saA.DshshsaLlDuhtlh..ssL..scoohsshAphhs+.h.hKEpsaH.RpuhchlhpLu......pGT.....c...tp...+phhQpAlschWh.sh..hFsss...-sc....................puh.s.h.th.h.t.h.s.s.cp.LRppahspssstlp...hs.LslP...s...............ss....h....phs....sc....pG.tas...G.l.................htchphhp+ua.tu.tW................................................... 0 123 282 386 +3635 PF02758 PYRIN PAAD_DAPIN; PAAD/DAPIN/Pyrin domain Bateman A anon Bateman A Domain This domain is predicted to contain 6 alpha helices and to have the same fold as the Pfam:PF00531 domain. This similarity may mean that this is a protein-protein interaction domain. 22.40 22.40 22.90 23.00 22.10 22.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.52 0.72 -4.28 38 767 2012-10-01 21:41:45 2003-04-07 12:59:11 11 56 60 11 298 774 1 81.90 26 11.69 CHANGED tchhLlpsLEpLscc-hccFKthL............ptsphpIspspl-p.ss.chclAsLlhppaspctAhshslplhcchspp.cLsccLpc ........p.tLhphLEpLs..cp-hc..cFKhh.L..................ppst.tplPh....s.pl-p..As..thclAslLlppa.stptAhphslplh..cchspp.-Lscchp.................... 1 23 69 115 +3636 PF00658 PABP Poly-adenylate binding protein, unique domain Bateman A anon Prosite Family The region featured in this family is found towards the C-terminus of poly(A)-binding proteins (PABPs). These are eukaryotic proteins that, through their binding of the 3' poly(A) tail on mRNA, have very important roles in the pathways of gene expression. They seem to provide a scaffold on which other proteins can bind and mediate processes such as export, translation and turnover of the transcripts. Moreover, they may act as antagonists to the binding of factors that allow mRNA degradation, regulating mRNA longevity. PABPs are also involved in nuclear transport. PABPs interact with poly(A) tails via RNA-recognition motifs (Pfam:PF00076) [1]. Note that the PABP C-terminal region is also found in members of the hyperplastic discs protein (HYD) family of ubiquitin ligases that contain HECT domains - these are also included in this family. 21.00 21.00 21.20 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.17 0.72 -4.31 38 874 2009-09-12 06:34:48 2003-04-07 12:59:11 13 31 321 31 523 833 28 70.80 49 9.26 CHANGED tshsAuhLAsAsPpp.......QKQhLGEpLYPhI....p........phpP..-hAGKITGMLLEhDNsELLpLLEss-sLcsKVsEAlsVL ...........................hushLAsAs.Ppp...............QKQhL......GE+LaPlI....p.........................shps.....phAGKITGMLLE..h..D...s...oE....L...LphLES.s.-...uL+uKV..-EAlsVL................... 0 158 255 394 +3637 PF03068 PAD Protein-arginine deiminase (PAD) Mifsud W anon Pfam-B_2195 (release 6.4) Family Members of this family are found in mammals. In the presence of calcium ions, PAD enzymes EC:3.5.3.15 catalyse the post-translational modification reaction responsible for the formation of citrulline residues: Protein L-arginine + H2O <=> Protein L-citrulline + NH3. Several types are recognised (and included in the family) on the basis of molecular mass, substrate specificity, and tissue localisation. The expression of type I PAD is known to be under the control of oestrogen [3]. 25.00 25.00 26.30 27.30 24.80 24.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.39 0.70 -6.04 7 323 2012-10-01 20:45:11 2003-04-07 12:59:11 10 8 86 12 176 305 2 310.40 44 60.02 CHANGED sLsEsslFTDTVsFRlAPWIMTPNT.PP.EVYVCplpD....N-cFLcslspLspKApCKLTlCPp.ENRsDRWIQDEMEhGYIpAPHKohPVVFDSPRsRGLKDFPlK+lLGPDFGYVTRE..hsssSuLDSFGNLEVSPPVTVpGKEYPLGRILIGuS.aPpSsGRcMspsVRDFLpAQQVQAPVELaSDWLsVGHVDEFLSFVPssDpKGFRLLLASPuACapLFQEKQctGaGEAhhF-GL+tppp....oIscILuscpLpcpNtasQpCIDWNR-lLKRELGLuEuDIIDIPQLFphcptt.....AcAFFPsMVNMlVLGKaLGIPKPFGPlINGRCCLEEKVpSLLEPLGLpCTFIsDahsYHhhtGEVHCGTNVRRKPFuFKWWpMVP ................................................................................s......lapDoVhFRlAPhlhpPsh..P.plals..p..h.hp...........p........Flct..ltt.l.st...pst.hp..l...hl...s...p......s.p.s.DpW.hQD.....chEhG.Yh.....ph......P.....p....p....s....h......s....V....l....h.......c.....o.P...R.s.ttL.....p..c....F..s....h.+..plh....u.s.shG..aV.s...p.t.................t.....t...s..ssl.DS..hGNL-VSPP.lss.t.G.KpYPhGRIlhGss...hs....t.....t.u+phtpslpsFL.AQ.pVQ.sPlc.LaoDWLhVGHVDEFhsFlPs....s....s....p....KGFp...hLlASPpushcLhpctpppG......aG......cs..h..h.....F.......p.................u...lt.......t...............t.....p........................oIsplLusc..pLh..ptNtas....p...p.s...I.........chNR-lLK+ELGLs.EpDIl-.lPtLFph......p.p...............................A.....AaaPsh.VNMlV.L.....s....+.p..LG....I..PKPaGPh........l.....p..GpCsLE..pclpsLlcsLGhpCsFIDDa.sYHh.h...hGElHCGoNV.pR.pPFsa.KWWp.......................................... 0 12 52 89 +3638 PF04371 PAD_porph Porphyromonas-type peptidyl-arginine deiminase Kerrison ND anon COG2957 Family Peptidyl-arginine deiminase (PAD) enzymes catalyse the deimination of the guanidino group from carboxy-terminal arginine residues of various peptides to produce ammonia. PAD from Porphyromonas gingivalis (PPAD) appears to be evolutionarily unrelated to mammalian PAD (Pfam:PF03068), which is a metalloenzyme. PPAD is thought to belong to the same superfamily as aminotransferase and arginine deiminase, and to form an alpha/beta propeller structure. This family has previously been named PPADH (Porphyromonas peptidyl-arginine deiminase homologues) [1]. The predicted catalytic residues in PPAD (Swiss:Q9RQJ2) are Asp130, Asp187, His236, Asp238 and Cys351 [1]. These are absolutely conserved with the exception of Asp187 which is absent in two family members. PPAD is also able to catalyse the deimination of free L-arginine, but has primarily peptidyl-arginine specificity. It may have a FMN cofactor [2]. 25.00 25.00 28.80 27.50 24.10 24.30 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.07 0.70 -5.39 22 1778 2012-10-01 20:45:11 2003-04-07 12:59:11 10 7 1418 31 444 1482 890 324.50 33 93.65 CHANGED a+MPAEapPppusahsWPp+s-sW..ttthtsAptsasslAcAIu+h.........E.VplsVsssp...htsARthLss.........slcllchshsDuWhRDsGPsallssp.......G.chc..slDWpFNuWGGhh.....sappDspVupplsclpthshapss......alLEGGuIcsDGpGTlLTTcpCLLs.sRNPpLo+tpIEppL+caLusp+llWLtcGhhts...-.TsGHlDslspFlsPupVlhshs-DtsDPpYthhptshchLpshpDAcG+.hplh+lPhPs..................th.cpsG-RLsASYsNFhIsNsullhPtasDs.sDphAhclLpphFPc+clVGl.suRcllh.GGGslHCITQQ.P ................................................................hhPuEap.pptshhhW.P...p.c.....s.s...W........t......t...h..p.....sp...p...sa...s...p....lspsI.uch.........-..Vh.l.hst..ptp........htp.s.p...p.hlsp.......................plp.h.l...c..h...s..s..s..D..s..WhRDpGPhhl..hs.sp.............................u....p....ht.......slD..as..F..N.u..W..Gsph................................shc.p.D....s.p....l...s...p...p...l..s....c.....h....t....t....h..s.....h...h.pss...................hlLEG.GuI....cs.....DG..p....GTlLsTc.......pCLLs.........s....R..NPp....L................o.........+.p.p..I..Epp.LpchL...G..l.c.........+lI..WL...s.pG..h....hts....................-.....Ts..uHlDslspFl.....sssp..........llhs.......h..s.......-..D.p.......s.......D.......s.......p.......Y..t.h.hptt.h.chLppt.pcsc.....Gp.hplhcL..PhPt.......................................c...sG....c...R.L...s..A.S..YsNFh.lsN............s....u....l.l..........l........P..ta........s.........D........s.....s.........D......ph.A.hchLpphF......P.s......+....c..l.l.Gl.ss....c.pll....h.tGGslHClTQQ.P...................... 0 159 301 392 +3639 PF03551 PadR Transcriptional regulator PadR-like family Bateman A anon Pfam-B_1014 (release 7.0) Family Members of this family are transcriptional regulators that appear to be related to the Pfam:PF01047 family. This family includes PadR Swiss:Q9EXE6 a protein that is involved in negative regulation of phenolic acid metabolism. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.38 0.72 -4.21 33 8479 2012-10-04 14:01:12 2003-04-07 12:59:11 9 19 3006 28 2200 6899 485 72.50 28 49.56 CHANGED lLtlL.sc.psh..aGYplhpclcph..Ghhphs...cGoLYPhLc+LEccGLlssphppt..htu.sRKhYpLT-sG+ptLpc ................................lLslL..tc..pst...aGY.-.lhp..p.l....c....p....h...............s..h...h...p....h..s.........tG...olYshLp+L.pcpuh.l............p.........s....t......h.pp.............t..............tt....sR.....K...h.Y.p..lT..cpGcptLt............................................ 1 832 1595 1975 +3640 PF03283 PAE Pectinacetylesterase Mifsud W anon Pfam-B_1589 (release 6.5) Family \N 22.90 22.90 22.90 23.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.18 0.70 -5.82 27 583 2009-01-15 18:05:59 2003-04-07 12:59:11 8 11 204 0 340 545 102 274.10 26 69.61 CHANGED hhhhphs..hssp...................VtlsllpsAhtcGAlCLDGShPuYHhccG.GoGussWLlphEGGG.WCss......hcsChtRppT..chGSSphh.ppplsFs....GlhSspsppNPDFaNWN+VplRYCDGuSFsG.csc...spuspLaFRGp+IapAlh--Llsc.GMppAcpslL.oGCSAGGLusllaCDp....F+shLPt....sspVKCloDuGaFl...DshclsGscshcpha.....psllplps.tpslspsCss+hpPs........CFFPQpllptIcTPlFllNuAYDsWQlpphLsP.ss..tt.WttC+hs...hspCsssQlphlpsFRsphlsulpsh..tpsppsGhFlsSCasHCQotpptoWhsts..SPhlpspslAcuVGDWaasRp..hctl ...................................................................h.........................................tshC.DGo.s..ua..a..hp.u..........tu.s.p.p.all.hp.....GGu.hC.s..............hpsC........R.....ht.s...........thho...S..p......h.......p...t.h...hs......................Gllus.p.t.tNP.aas..WN.hVhl.YCsGssasG...s.......s.....p........................................................p...............t...............s..............t.........p..............h..............hapGtpl.........hp...s...lhpcLhsp.....Ghtp....A....p.........p.h..lL.sGs..SAGGhushlpsDp....hpph.hst............sspV.+sluDu..G.hFl.........................s......th..thtt..ht.h...........t.hh......t...h..p.......tt.hst.C...t.h..t.............Chas..hh......hps.Ph..Fhhp.haD.hQh.t.......................................hp..t....ph.hhptht.thht..h................u.ahsuCh.Ht...............a...................htt..........h...h..............h................................................................................................................................................. 0 128 217 285 +3641 PF03403 PAF-AH_p_II Platelet-activating factor acetylhydrolase, isoform II Mifsud W anon Pfam-B_3469 (release 6.6) Family Platelet-activating factor acetylhydrolase (PAF-AH) is a subfamily of phospholipases A2, responsible for inactivation of platelet-activating factor through cleavage of an acetyl group. Three known PAF-AHs are the brain heterotrimeric PAF-AH Ib, whose catalytic beta and gamma subunits are aligned in Pfam:PF02266, the extracellular, plasma PAF-AH (pPAF-AH), and the intracellular PAF-AH isoform II (PAF-AH II). This family aligns pPAF-AH and PAF-AH II, whose similarity was previously noted. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.42 0.70 -6.17 7 427 2012-10-03 11:45:05 2003-04-07 12:59:11 8 13 218 13 281 1346 263 295.60 23 70.00 CHANGED huupspsplPtssGPasVG.............CsDLMhsts....cusFlRLYYPs...p.-psp.-sLWIPpcEYh.GLu-aLss.phhGplL.phhhGShphPsphNush+ss..-K.YPLllFSHGLGAFRTlYSAlshpLASpGFlVAAVEHRDcSAusTYahc-tsssE.t.......pcpWlhh+chpt.-pEhplRpcQVpQRspEC.pALshlhpIs.tGpsscNl.....LsssFD...hppLKsslDpo+..lAVhGHSFGGATsl.......poLuc-....pcFRCuIALDsWMaPlsc-ha.Sph.QPlhFINStcFQhstslhpMKK......................hhs.ccpp+hITlcGSVHpsFsDFsFloGclIG+hhpl..KGclDsp.Ah-lss+ASLAFLQKaLsLcc-asQWssLh-G.spNl .......................................................................................................h.............G....lG....................................t.....................t...h.....plaYPs....................t.....................ah..s...........t................s.h.......t..t..h...h...............t...h............h..h................h..................h..h....t......................h.p..h..s..s.........ss......h.............t..................st..............t.....p..aP.lllFSH.G....l..u...u.....R.ph..YSt.hshpLASp...GalVsslE...H......p.....D.tS.....us.h.o..h.h......p..............tt...................t.................................................................................................h.........t....t.................t..h....thRptQ.lp.R.....h.tE......h....shp..hl...ths....tG..p............h.........................h......t..p...........h...t.h..psp....ls.h...pp.........lshhGHSFGu.ATsh...........................................tsh.pp................p.h...p...s.....sl.hl.....Ds..a...h.......h...s......l...t.........p......t............................t.........h.................................P.............h.....h.hl.p....s..p....t...a......................t........p.............................h..p.......................................................................................................h..................t..................h.....h...h...h.....t.s.H.s.sDh..h...h......s.................................................................................................................................................................................................................................................................................... 1 109 170 222 +3642 PF03985 Paf1 Paf1 Finn RD, Wood V anon Pfam-B_ (release 7.3) Family Members of this family are components of the RNA polymerase II associated Paf1 complex. The Paf1 complex functions during the elongation phase of transcription in conjunction with Spt4-Spt5 and Spt16-Pob3i [1,2]. 20.40 20.40 20.50 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.59 0.70 -5.74 12 358 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 263 0 255 360 4 334.60 24 76.57 CHANGED p-aIs+l+YpNsLPsPshsPKhlpash....spsh.sp.hloSLhpcpphssLh...DpDLGhslDLl.........Dpchhts.s.ssp...LcscDchLL+Dsthsp.........ho+pc.ptVoaLR+TEYIS...sp.sp.t.....stcthpschthslcpshpppp..hhstpp.lctlEpTF-sspp.....phhpHssK+plpsVcshsLLPDhsphcpsahplpFsGss..........shstp-tp.........p.thpsslhtsh-hEt-caluhahs........hpppL-cphcDhp........cppc.YcaKhhR-Yshphhp...........psthp-hslhhs............pcpsssYYpPLcoRlcL++RR......lc.lVpppshsplslphRsssspEpchpcthRtch-shshsph--.E-Ec-ct.p.ppc.pcspspppptttptps.pptt...........tp.t.ctppttcptpsssotss ....................................................................................................................................................thlspl+YpNsLPs.shsPKhlphsh.......tt........phh.s....h.hh.os.Lt+p.phph.....-.-lGhslD..L.l.................s.....phh.....h..s......s...............lcPtDp.LL..c..p...h................................................psp..pt.ts.V.saLR+TpYIS..........st...s.t.t....................s.pt.php..h....thp.hp....p.t............t.....s...p.t....l.p.t...I-p..oFpsspp....................................tH.....s...+....p..lpsVphhPlhP..Dhpha.....p..s..hh.lhFsssP...............................s.t.tt.....................t.h.p...shl.h..........t..........t.........p........p....pa..h..s..h.....ah...s..............................h..t.p...h...p.....t.t-..................t...t........p..a.p.......aphhRpYph.php..................tpth.-....s.hhhh........................c.tpssaY..l..s.+hpLppcR....................t..h............p........t......s.l........lph+.t..st.t.....-...t..p.h..p.h.t.....t......................................................................................................................pttt........................................................................................................... 1 81 135 206 +3643 PF02671 PAH Paired amphipathic helix repeat Bateman A, Mian N anon Pfam-B_281 (release 5.4) Repeat This family contains the paired amphipathic helix repeat. The family contains the yeast SIN3 gene Swiss:P22579 (also known as SDI1) that is a negative regulator of the yeast HO gene [1]. This repeat may be distantly related to the helix-loop-helix motif, which mediate protein-protein interactions. 21.30 21.30 21.60 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.13 0.72 -4.34 121 1587 2009-01-15 18:05:59 2003-04-07 12:59:11 16 30 270 12 991 1522 9 48.30 34 10.07 CHANGED sphYppFLclLptapp.pphstsp.................lhp................c....Vsp.Lhp.....sa..-Llp...tFsp.FLs .................................................chYppFLcllpp.app..pp..lstst.........................Vhp................p.....Vsp..LFp.....s.as..-Llp...tFsp.FLP............... 0 344 546 754 +3644 PF00221 Lyase_aromatic PAL; Aromatic amino acid lyase Finn RD, Eberhardt R anon Prosite Family This family includes proteins with phenylalanine ammonia-lyase, EC:4.3.1.24, histidine ammonia-lyase, EC:4.3.1.3, and tyrosine aminomutase, EC:5.4.3.6, activities [1-3]. 21.80 21.80 22.20 22.00 20.90 21.70 hmmbuild -o /dev/null HMM SEED 473 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.48 0.70 -6.05 143 3331 2009-12-15 11:53:36 2003-04-07 12:59:11 14 13 2194 102 901 2989 703 434.10 35 86.47 CHANGED lpl.suss.Lolspltslu....p.ps...tplpLs..spuhpclppupphlp.chlpps.pslYGlsTGFGthusscI..s.........cc...htpLQcNLlhSHusGlGp............................................slspshlRshhllRlsoLu+G.....aSGlRhpllctLhph.LNpslhPhlPppGSlGASGDLAPLAHluhs.llGc.Gcs.hh.p.......Gp..................h.....hsu.s-ALppsGlpP..l.pLtsKEGLALlNGTpshoAlushulhcAppLhthAtlhuALohEAlpGsspsFcsplH.tl.RsH.GQlcsAstlRplLp.G...S......plhpscp....................+..............lQDsYoLRChPQVhGAsh-slphspcslphEhN..usoDNPLlh...........s....-...............t.s.......cl..lSGGNFHupPlAhAhDhlslAluclGslu-RRlspLlssths.G.LPsFLs......s....ss.GLsSGaMlsQhouAuLsSEN+hLApPuSsDSls.oSusp..EDHVSMushuAR+h.tchl-NlptllAlELlsAsQAl-hR........t.....shph.usshptlhphlRppVshh..p...p ..........................................................................................................................ltstpLoltplhtlh......p.....ts......spl.pls...tpuhttlptut.thlp.p..hh......t...p.s.p................s...sYGlsTGFG.thu...shpl..s..................cphtpLQ...csLlh.SHusGlGt..........................................................shspphsRhhMll+ls..o.LspG.....aSGlRhpllptlhth.lNt.slhPhlPt..pG.SlGASGDLsPL.uHhuhs.Ll.GcG...cs...hh...p...........Gc..........................hhsutcu...LpttG.l..p..P.....l.p.LtsKE..GLALlNGT.ps...ouhuhhulhcAppLhthuplhuAhohEuht.....Gp.psFc.t..p..lH.t.l.Rsa.GQ.hpsAtthRp..l..Lp.u........S.....plhppppt.............+...................................................lQD.sY....o.LRC.PQlhGush-slp...hstphlp.hEhN..u.....ssDNP....L.lh......s....c.............................................................t.s....cl..lSGGNFHu.pPlAhuhD..LtlAlAcluslu-RRhstLlssth..s.....s..LPsFLs......s.......ps..GlsSGa.M......l...sQh..ss.Auls..SEs+t.LApPuS..l.DS...hs.ouspp........ED..+VSMushAuR+h.hchl-shptllulE......hlsAsQ...ul-hR........t.....s.p.......ush.h.pps.hp.hhRphls....p........................................................................................... 0 266 520 727 +3645 PF02089 Palm_thioest Palmitoyl protein thioesterase Mian N, Bateman A anon IPR002472 Domain \N 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.93 0.70 -5.47 2 572 2012-10-03 11:45:05 2003-04-07 12:59:11 10 11 252 6 338 538 22 229.00 32 83.50 CHANGED DPPuP.PLVIWHGMGDSCCNPhSMGsIKKMVEKcIPGIaVLSLEIGKshhEDVENSFFLNVNsQVshVCQILtKDPKLQQGYNAhGFSQGGQFLRAVAQRCPoPPMhsLISVGGQHQGVFGLPRCPGESSHICDFIRKoLNAGAYsKhlQERLVQApYWHDPI+EslYRNaSIFLADINQERsVNESYKKNLMALKKFVMVKFhNDoIVDPVDSEWFGFYRSGQAKETIPLQESTLYTpDRLGLKtMDKAGpLVFLAhEGDHLQlScEWFhAHIIPFLc .............................................................................PhlhhHGh..............h..Dp........sh......t.h.....p...hhp..p.....h.s.Gh...hshslpl.s.p..............s....s....tsh..hhplptQhp.ls.p.............lt.p.........L..t..p.....G...hshlGaSQ...Gu.hhRulhpps.s.....s.....s.h.....p.................shIolu.u.p......tGhhuhs..h..C................t......h.......h.C..phh....pphl.p.h.ts.Ysphs...Q...ppls..upYa+...D..P..h...c..........s......Y..h...pt...S.FLs..clN.s..E............p...................h............N..p.....s.....Y......+........c...Nh........hpLp...phVhlhF....sDshl..Php.Sp..aFG....aa.........t...ss.t..........p.p..h...........h.sh...pcp.t.lYtcDhlGL+ph.cptGpl.hh.hs...h...G...t.H...h....t.....h..s.....t............................................... 2 127 185 265 +3646 PF02569 Pantoate_ligase Pantoate-beta-alanine ligase Mian N, Bateman A anon COGs Family Pantoate-beta-alanine ligase, also know as pantothenate synthase, (EC:6.3.2.1) catalyses the formation of pantothenate from pantoate and alanine [1]. 20.70 20.70 20.80 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.87 0.70 -5.64 7 3517 2012-10-02 18:00:56 2003-04-07 12:59:11 10 12 3268 96 934 2614 2426 268.30 42 94.43 CHANGED hplhpsltplpphp.+phR.p..t+plGFVPTMGhLH-GHhsLlcpA+.tcNshVVVSIFVNPsQFGssEDh-tYPRsl-cDhtlhEptsVDllFsPssc-MYPtshpsplp.tt.........Lop.LEGtsRPGHFcGVsTlVsKLFNlVpPsRAYFGpKDhQQlhllcphVpDh.hDlEllssPIVR-pDGLAhSSRNsYLssEpRKtA.uLY+uLptutphlps....GE+sspclhptht.tl....lcttpsh.lDYlEltDhp..L....-Phpp.cpt..llhVAshlGc.......sRLIDNhhl ..................................................lhpslttlc.p.hh....pp.h..+.tp.......up..pluhVPTMGsL..H-GHhsLlccA+.....p...c......s.....D...............h......V.VVSIFVNPh.QF...u.s...s.EDhspY.P...RsL.pcDhphL..........p..........p............t..................G...............V.....D............h......lFsP.s...s.c-h.....Y....P....p....s........h......p.....s...p.s...p...lss..................................lushL..-G.up..R.P.G.........HFcGV.s.TVVsKLFNl............V.....p....P......D......hAhFGcKDaQQLslI+pMVpDh..s.h.....s.....l.....c.Ilus.P.hlR.-.p..D.....GLAhSSRNsYLo.s-cR.p.tAstL..csLpt.....s..tp....th..ps......................G.p....c......s..h...pt.l..h..ptst....ph.........................Lp..p..t......h....h..plD....Ylplhcsss...L.....................p..s......h......p......p......h......p....p......t......h......ll..h.h.A.shl.Gp.....................sRLIDNh................................................ 0 311 620 808 +3647 PF02548 Pantoate_transf Ketopantoate hydroxymethyltransferase Mian N, Bateman A anon COGs Family Ketopantoate hydroxymethyltransferase (EC:2.1.2.11) is the first enzyme in the pantothenate biosynthesis pathway. 25.90 25.90 26.10 26.10 25.70 25.40 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.44 0.70 -5.27 117 3509 2012-10-10 15:06:27 2003-04-07 12:59:11 10 8 3295 45 993 2531 3391 255.50 45 92.45 CHANGED p..+h...TlsclpphKpp...sc+IshlTAYDhshAplh-puGlDhlLVGDSLGMVlhGhsoTlsVTl--MlaHscAVsRGsp....puhlluDhPFhSYpsoscpAlcsAs+lhc.puGApAVKLEGGt.....thsctlctLsctGIPVhuHlGLTPQslpthGGa.+lQG.+.stpsAppllcDAhuLpcAGAFulVLEslPspLAppITp..pl.slPTIGIGAGsssDGQVLVhpDhLG..l.......sshtPKFVKpYs...shsshlpp......AlppYsp-V+sssFPst- ....................hTlspLtchK...p...p...sc.....+lshlTAYDh..shA+lh-p.s.G.lDllLV.GDS.LGMllhGa-o....TLPVTl--MlaHscAV.tRG.....ut..................p....shll.uDhPFhoYt..s.o.scpAhpsAs+lh+..u.GApuVK...lEG....Gt.........tl.s-.s........lchLsptG.IPVhuHlGLTPQSV......shhG.GYKVQ...G.+.s.....tc........s......A......ppLlpDAhAl-pAGAhh...lVLEs.VPspLApcITc..tL...s.IPsI.G.IGAGs.......s.sDGQVLVhaDhLG.......l...............t...ss.....hhPKFlKpah..............shss....s.lpp..........AlppYhp-V+susFPup................................... 0 329 652 849 +3648 PF02711 Pap_E4 E4 protein Mian N, Bateman A anon Pfam-B_1589 (release 5.5) Family This is is a family of Papillomavirus proteins, E4, coded for by ORF4. A splice variant, E1--E4, exists but neither the function of E4 or E1--E4 is known [1]. 20.50 20.50 22.10 23.40 19.00 17.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.81 0.72 -3.17 40 248 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 78 0 0 191 0 91.30 42 90.27 CHANGED LpLh.ssp.....+YPLLcLLsshp...s....PPp..P.Pp..shAPp+......s+RRl.sD.............sDsssspssssssh.t..........pssWTVpTsssol..olpupTpsGToVsVTL+L .......LpLs..sss.....+YPLLpLLsoh........T........PP+.hPsPs..PWAPp+.......+RRL.sD.............pDps.p.s.p.psssss................ps.WTVpp...t...ol..pLpApTKcGsoVlVTL+L........................ 0 0 0 0 +3649 PF04755 PAP_fibrillin PAP_fibrillin Mifsud W anon Pfam-B_3698 (release 7.5) Family This family identifies a conserved region found in a number of plastid lipid-associated proteins (PAPs), and in a number of putative fibrillin proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.48 0.71 -4.59 12 635 2009-01-15 18:05:59 2003-04-07 12:59:11 7 13 130 0 381 643 102 170.70 22 64.03 CHANGED tpLKppLl-ulhGp.p.RGLp.Aos-s..+scI.phlppLEuhNPossPs.-s.sLLsGpWhLhYTottslh......sLl.tspl.h.l+ltpI.QsI........Dspshsl.N.sp..htu....PhhpssholsAcFEltSspRlplpFccuhlt...l.....................h.t..t.hpssht.l........Plph.hssspspuWL.sTYLDc..-LRISRGstGslFVLh ........................................................................t......lht.h...t...h..Gh...hp........pp.......ptpl.phlpt......LE.....sh....s.ss.s......t......sh....p........s..................s.........h.LpGpWpLhY...oo..ttthh...................................h...........s.th....h......h.p.....h.s.p...lhQ.tl....................cs..ts....h...p.....h....N..hhp.........h............h.s.......h..p.s.....t........h...plp.u....p..h.p...h......h.......s.......s.......p........+.l......p.l..pFp.ps.hlt........................................................................................................................s.....h....................................t......t.s.p...u.h...h..hTY.LDc......clRls.R..us.t.Gshhlh.h.................................................................................................................................. 0 148 296 349 +3650 PF04926 PAP_RNA-bind Poly(A) polymerase predicted RNA binding domain Wood V, Bateman A anon Pfam-B_1341 (release 7.6) Domain Based on its similarity structurally to the RNA recognition motif this domain is thought to be RNA binding [1]. 22.10 22.10 22.30 22.20 21.70 22.00 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.84 0.71 -4.70 54 630 2009-01-15 18:05:59 2003-04-07 12:59:11 10 17 301 11 392 595 5 146.80 29 22.62 CHANGED sFFpp.YKaYLplhsuupsp-.pphcWpGhVESKlRhLlt........pLEphss.ltlA.HPaP+sFp.......................................................................t...ppppp............................haposaaIGLphpttps.pttp................lDlphsspcFhshsps....h.t..h.tsthplpl..paVKpppLPs.V..atputp+sp+s .............................................................................sFFpp.YKaYlhlhs..s......us......s......p-..pthpWs.GhVES+lRhL.lt........pLE.+.s.ph...ltl.A..Hs.PpsFs.........................................................................................................................................psppp.....................................apohaa.lGLthcp...s....t.s.t....t.............................p..lDlshsl.ppFpsp.lhp....h.p...hh.ctsh.plts..pHV.+.pppL.phl....th.pc....t.................................................................. 0 121 204 312 +3651 PF04795 PAPA-1 PAPA-1-like conserved region Waterfield DI, Finn RD anon Pfam-B_6501 (release 7.5) Family Family of proteins with a conserved region found in PAPA-1, a PAP-1 binding protein. 21.00 21.00 21.90 21.90 20.10 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.12 0.72 -3.28 19 276 2009-09-11 14:54:57 2003-04-07 12:59:11 7 9 209 0 180 273 0 83.40 31 21.17 CHANGED Etth+RAEtARRR+.QuEK+sEEpKh-TIp+LL+ppu.......tt+.tt...tttppshtppcstcsppsssshlRals..utpGopluhPpc.lssP .................Ehth+RuEtAR+R+..uEK+sEEc.........K.......t-TIp+LLKppu.............tp.p.t.t..p......t.t.t.....pp.....tttpt..c.ttp.tt.ssshlRalp....s..pGshlshP.t.h........................................................... 0 50 95 142 +3652 PF03333 PapB Adhesin biosynthesis transcription regulatory protein Mifsud W anon Pfam-B_3068 (release 6.5) Family This family includes PapB, DaaA, FanA, FanB, and AfaA. 20.40 20.40 20.40 23.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.00 0.72 -4.17 14 275 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 191 2 10 111 1 87.40 42 78.55 CHANGED hpphst......hhhph+pthLhPGplsEc+FaLLhEISsI+ScKVI.AL+DYLVhGhoRKElCE+asVssGYhShuLsRLp+lsphVtpls.aY .................t.th...............h+tutL.PGplsEEpFaLLl-ISsI+S-KlIhAL+DYLVpGaSRKpVCE+ashssGYFSsslsRLpclsphVtpLssaY........... 0 1 3 6 +3653 PF03628 PapG_C PapG chaperone-binding domain Finn RD anon Pfam-B_3074 (release 7.0) Domain PapG, the adhesin of the P-pili, is situated at the tip and is only a minor component of the whole pilus structure. A two-domain structure has been postulated for PapG; a carbohydrate binding N-terminus and chaperone binding C-terminus (this domain). The chaperone-binding domain is highly conserved, and is essential for the correct assembly of the pili structure when aided by the chaperone molecule PapD [1,2]. 19.60 19.60 20.70 19.80 19.00 18.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -10.42 0.72 -4.38 3 75 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 38 2 1 61 0 108.50 73 33.56 CHANGED HGNLSIDSANGNYASQTLSIYCDVPVoVKISLLSNTpPAYNN.QpFSVGLGNGWDSIISLDGV-puEETLRWYTAGS+TVTIGSRLYGEuGKIpPGsLSGSMTMlMpLP ........HGDLSINSANNHYA..AQTLSVSCDVPsNI.RFhLLpNTsPsYSH.GppFSVGLGH...GWDSIVSlNGVDTGETT.M..RWY+AGTQNLTIGSRLYGESSKIQPGVLSGSATLLMILP.... 2 0 0 0 +3654 PF03627 PapG_N PapG carbohydrate binding domain Finn RD anon Pfam-B_3074 (release 7.0) Domain PapG, the adhesin of the P-pili, is situated at the tip and is only a minor component of the whole pilus structure. A two-domain structure has been postulated for PapG; a carbohydrate binding N-terminus (this domain) and chaperone binding C-terminus. The carbohydrate-binding domain interacts with the receptor glycan [1,2]. 25.00 25.00 48.80 48.10 22.10 21.10 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.50 0.70 -4.90 4 66 2012-10-02 17:35:21 2003-04-07 12:59:11 8 2 29 4 1 50 0 216.30 62 66.66 CHANGED MKKWFPAFLF.LSLSGsNDALAuWpNlMFYuFNDh.shsuGNVplhDpsQFhlsWpoGuATAT..YsuCsGPEFssG..lYapEYlAWlVVPK+V.TpsGYslFl-VpSKhGWS.ENpsDpD.YaFhpGYcWDphsssuuRlCh.sGpp+pLsppFs-lhFplhLPsDLPKGcYshPl+YlRGIQ+HaYsahts+YKhPYs.hKpLPtsNTl.hShcNsGuCRPSAQSLEI ..MKKWFPAhLF.LslSGpssA...hpshhFYShsDs..h.thsVhlTphsQFIsshpsu.uTsT..aspCNG.sascG..hYapEYhAWlVhPK+V.ohNGYslalElpsKGSaS.-sp.DNDsYahhKGatWDE.A.suGplC.p.GEpppLs.p.FsslhhpstLPsDLPhGDYohsl.ahpGhQRp.hsYlGuRaKIP.slhKThPhpsph.F.hKNhGGCRPSAQSLEI 0 0 0 0 +3655 PF03025 Papilloma_E5 Papillomavirus E5 Bateman A anon Pfam-B_1916 (release 6.4) Family The E5 protein from papillomaviruses is about 80 amino acids long. The proteins are contain three regions that are predicted to be transmembrane alpha helices. The function of this protein is unknown. 25.00 25.00 37.90 37.60 21.10 19.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.91 0.72 -4.03 13 139 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 32 0 0 103 0 70.20 58 92.77 CHANGED llhlFllCFCVhLhlChhl.PLL.SlplaAhlllLVllhW.VshTSPhcsFhVYllFhYlPhaLlHhHA.hhhp .....LLssFLLCFCVLLCVCLLlRPLLLSVSsYsoLlLLVLLLW.losuSshRsFhVYllFlYIPLFLIHsas....... 0 0 0 0 +3656 PF02380 Papo_T_antigen T-antigen specific domain Mian N, Bateman A anon Pfam-B_1131 (release 5.2) Family This domain represents a conserved region in papovavirus small and middle T-antigens. It is found as the N-terminal domain in the small T-antigen, and is centrally located in the middle T-antigen. 19.10 19.10 20.20 19.80 18.30 17.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.92 0.72 -4.06 10 268 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 63 6 0 201 0 85.00 40 54.97 CHANGED V.ussasssh-phaCKpWssCh+shs.spCs.CllChL+ppHcp+.KhhR+sPLVWl-CYCa-CappWFGh-loppslhhWspIIupTPhcsLcL .......................l..t.h....cphasKpWshC.pp.p.scCs.ChhC.LchpHhp+.KhhRKp...PLVWl-CYChcCappWFGhsl.TpE.ohpaW.plltpTsa.p.LcL.................. 1 0 0 0 +3657 PF01507 PAPS_reduct Phosphoadenosine phosphosulfate reductase family Bashton M, Bateman A anon Pfam-B_590 (release 4.0) Family This domain is found in phosphoadenosine phosphosulfate (PAPS) reductase enzymes or PAPS sulfotransferase. PAPS reductase is part of the adenine nucleotide alpha hydrolases superfamily also including N type ATP PPases and ATP sulphurylases [1]. The enzyme uses thioredoxin as an electron donor for the reduction of PAPS to phospho-adenosine-phosphate (PAP) [1,2]. It is also found in NodP nodulation protein P from Rhizobium which has ATP sulfurylase activity (sulfate adenylate transferase) [3]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.14 0.71 -4.44 27 7027 2012-10-02 18:00:56 2003-04-07 12:59:11 14 43 3053 30 1986 5609 1987 185.10 24 61.61 CHANGED phshsaShGp-usVhLcLshKshhs..h....................P.llalDTGacF.Eshcah-cltc+h...sls.lhlhtstcshtpths.hs...phapp........hs.lhKscslpp........................................ALcch..p..AhhsGhRRs-st.sRuphtlhphcss.t.......................hl+lh.PlhsWott-lWpYlhtpslPhssLatpG.....................................apolGCh.sTss ..................................................................................................................t..sh.hS.h.G.t-us....V.hL.c....L....s.t..ch..hhs....h.....................................................s..ll..a.l....D..T......s......h......p.........F...........E.s........h........p.....a.......h.........-...............c.....h.........t...cph..............sh.....p....l....h......s....h....p....s............t......s...h..t.....t..t..h......s.........h........t..................t..h.app......................................................h.h.K..sp.s....hpp........................................................................................................................sl....p.....ph..........t...t..u..h...h..sG...h.RR.--s.......t...s.......R.........u........p.........................h.....h..s..h....c..p..t......tt.....................................h.+.....lh..Pl..h...s...W...o..ptDlWpYl...tp.......p.......sl.............h........ss.La...t.t........................................................a..ho...lGsh.hTt........................................................................................................................................................... 0 598 1228 1656 +3659 PF03285 Paralemmin Paralemmin Mifsud W anon Pfam-B_4064 (release 6.5) Family \N 25.00 25.00 25.40 26.60 24.60 24.70 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.94 0.70 -4.92 6 266 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 38 0 120 211 0 209.00 26 60.43 CHANGED h+KQMQEDEtKTRtLEETItRLE+ElEpLEsGsSs.suoKEs.stsu........sPAtpE.p.csl.ssppoPLsTs.ttpchS.oPh+.stussMMpA............................................VVHAV...DGsspNGlpsLSSSEVDELlHKADEVTLuEuutsu...-............sss.s.t+sTP.R+EITGVpA+PtpssstssstpPutEsPVTMlFMGYQNVEDEsETKKVLGlE-.TIKAELVVIEDu-spstsps.....+-pAPPNGSAuEPstsssptEEsphs..ssssssssc....DhshKKQRCKCCoVM ..................................................................................................................................................tp.ppDp.php.LEpsl...R..L..EpE.lp.LEp.t.t.t..hu.spEp..h..t..........................hpc.....c...p............................................................................................................................................................................................................................................................................................................................................................p.PVTMlFMGYQph.-D.pt.Et.pp.hh.uhpt...hlpAElVlIp-ttt.................................................................................................................................................................................................... 0 4 21 51 +3660 PF01508 Paramecium_SA Paramecium surface antigen domain Bateman A anon Bateman A Domain This domain is a cysteine rich extracellular repeat found in surface antigens of Paramecium. The domain contains 8 cysteine residues. 21.20 21.20 21.60 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.79 0.72 -3.76 78 1822 2009-01-15 18:05:59 2003-04-07 12:59:11 11 40 5 0 1522 1826 0 63.40 27 47.23 CHANGED ttTcspCpua.ss....CTs.t..........sG......suClsh...ssCssY.sspssCtps.............ssspChWsss........tsChsto.......Cs ....................h..ocspC.psah..ss............Cos..................sG.......suCl..ph....ssCssa...tspp..s..Chts................ssstChWsss............tpChs.ts......Ct.......................... 0 1522 1522 1522 +3661 PF03210 Paramyx_P_V_C Paramyx_P_V; Paramyxovirus P/V phosphoprotein C-terminal Mifsud W anon Pfam-B_2037 (release 6.5) Family Paramyxoviridae P genes are able to generate more than one product, using alternative reading frames and RNA editing. The P gene encodes the structural phosphoprotein P. In addition, it encodes several non-structural proteins present in the infected cell but not in the virus particle. This family includes phosphoprotein P and the non-structural phosphoprotein V from different paramyxoviruses. Phosphoprotein P is essential for the activity of the RNA polymerase complex which it forms with another subunit, L Pfam:PF00946. Although all the catalytic activities of the polymerase are associated with the L subunit, its function requires specific interactions with phosphoprotein P [2]. The P and V phosphoproteins are amino co-terminal, but diverge at their C-termini. This difference is generated by an RNA-editing mechanism in which one or two non-templated G residues are inserted into P-gene-derived mRNA. In measles virus and Sendai virus, one G residue is inserted and the edited transcript encodes the V protein. In mumps, simian virus type 5 and Newcastle disease virus, two G residues are inserted, and the edited transcript codes for the P protein [2]. Being phosphoproteins, both P and V are rich in serine and threonine residues over their whole lengths. In addition, the V proteins are rich in cysteine residues at the C-termini [3]. This C-terminal region of the P phosphoprotein is likely to be the nucleocapsid-binding domain, and is found to be intrinsically disordered and thus liable to induced folding [5]. 25.00 25.00 38.50 64.70 23.40 19.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.64 0.71 -4.35 21 673 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 103 5 0 640 0 161.50 42 37.80 CHANGED sclcpchcpILsplsol.tlKs-lpsIKs.......olATlEGploolpIh-PGsusss..sssclctssc.pPllssssGcssspltc........................psplthD.Lu+Plsspsppshthssssssho..+..pslpuLIcsphhsscp+pchhphlsps+opp-Ltcl++tIl .......Klcpp.phllpphsSl.hh+uElpplKp.......SlushEupLu.hhIh.PGhupss..shuDlc..tssc.+PllspssGcs.s...Vhp........................tGplhhshh.pPls+.s.p.hhshs..ss..s..s.shu..+..sslRulIpSp.hc.sppthLhohLDshcuhp-ltKh+ph................................ 0 0 0 0 +3662 PF00946 Mononeg_RNA_pol Paramyx_RNA_pol; Mononegavirales RNA dependent RNA polymerase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_586 (release 3.0) Family Members of the Mononegavirales including the Paramyxoviridae, like other non-segmented negative strand RNA viruses, have an RNA-dependent RNA polymerase composed of two subunits, a large protein L and a phosphoprotein P.\ This is a protein family of the L protein. The L protein confers the RNA polymerase activity on the complex. The P protein acts as a transcription factor [2]. 20.60 20.60 24.20 24.20 19.40 19.00 hmmbuild -o /dev/null HMM SEED 1072 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.82 0.70 -13.58 0.70 -7.19 41 1469 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 433 0 4 1221 0 621.10 23 54.24 CHANGED EsHLsSPllppclhhhlp.hsshspshthcsps.h.t.hc..hphhtt.psphhtchtph.pshltpplhsh...........ththl.asph...Lh...ph...phspphpphh+hs..spshshhsptlpphhp.....plshpLsspsphtppttt..hp.................phhplt........................................thht.spahp.h.hWh.h...............chph+pllpphpptppppppshlh..hcspshhlllsspllhlhspp.th..hhhhTa-hlLMhsDllpGRhps.hhsshs.......sphssht.pl...........ppLaplsDpl..hthhGsssYsllshlEslshuhL.QLt-.hh.Lp...GpFhsahhsEltp...tLptpshhsptt...hhp.....................lhslhp.phshc.huElauhaRpaGHPhlc....utpAhcKVRcphstsK..llshpshhcstuhFpthlIsGahcc+st.........................tWPssphshpssp.lpphhcssptloh.phslcpacpastlpFppth-hshspDLohahKDKAlSss+p-Whosa.cp.h............h.....psstsstoRRLl.sFLp-ssFsshphlpYVhstpYLpDs-aslSholKE+ElK.sGRhFAKMoa+hRtsQVluEsLlAspluchF+-ssMstsphcLpKpLhphSp.u..................................................................................................................psscshchsushloTDLpKaChsaRaposshauppLsclaGhssLFpWlHhhlpcSslYluDsasPPpsssp.hsL-psss.........sslhhpsshGGIEGhCQKhWTlloIshlhLsAhcsss+ltullQGDNQsIslTpcV......shshppccptshptsppahptL+pshtslGHpLKtpETllSucahlYSKplaacGhlLspuLKshoRsshhosslh-sspuusSsluTshtchtEpGhshhhuahlshhhshpplhh..........pl....................hashssshspslpphhh.....................ps.shlhth...sllPupLGGl.sahshoRlahRslGDPlTsulAclKch...............Ipsshhsppllphlhs.ppPup.....usahcLssDPaSlNlststssTshLKphspcslhps..osNshlpulapcssppE-cpLupFLhspcslhPRsA+tIhspo.sGtpcpIhGhlDoT+Tlhptshpppslssphlpplhphshpphphhhphhpp ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lp.......uh..................................hs.hh..hh..hhpp.t...........................a..................................................hh.h...tt.....t.h...h......h..h.h...h.h....s.p.hhpDKuhs..pp.h.t.h...........................t......sp+l.l..hltp..hs.....p.h..h.p...thh....p...ph.huhp.KE+Elp...GRh....sh..R.h.hhsE.hltp.hh.hh.t.hhh.s..p..c.h...s.........................................................................................................................t.....t.h....hDhpKas.t.Rhp....hhp.hhsphhGh.phaphhH.hh..shhhhs...s.....t.....h.t....p.........t.h.hh.....GGlE.G.hpQ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 2 2 4 +3663 PF01692 Paramyxo_C Paramyxovirus non-structural protein c Bashton M, Bateman A anon Pfam-B_1202 (release 4.1) Family This family consist of the C proteins (C', C, Y1, Y2) found in Paramyxovirinae; human parainfluenza, and sendai virus. The C proteins effect viral RNA synthesis having both a positive and negative effect during the course of infection [1]. Paramyxovirus have a negative strand ssRNA genome of 15.3kb form which six mRNAs are transcribed, five of these are monocistronic.\ The P/C mRNA is polycistronic and has two overlapping open reading frames P and C, C encodes the nested C proteins C', C, Y1 and Y2 [2]. 25.00 25.00 180.00 179.80 22.20 21.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.42 0.71 -4.70 4 36 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 19 0 0 53 0 194.90 70 97.69 CHANGED M.phlKuhl.LtcRcQEspphophpssoShsSY..SsPTsc+TctsohpSopts+coA...cPolstKscQQ+pt.KIlDQlp+l-SLGcpss.pQ+phlEhLIpKlYptsLGEEhsQhl.LRlWuhEEoPEuspILpMc.chRc.llpMKhERWlRTLlRGKpspL+.FQpRYpEVhPYL.ppKVEpVIMEEAWsLusHllQ- .M.phl+thl..ttRcpEppphopM.SDS.hpSY.sst.psEcTEAGShssSTh.Kcpuh.hcPplpsKpcpp+RRPKIIDQVRRVESLGEQsSQ+Q+HMLEoLINKlYTGPLGEELVQTLYLRIWAMEETPEShKILQMREDIRDQlL+MKTERWLRTLIRGcKTKL+DFQKRYEEVHPYLMhE+VEQlIMEEAWpLAAHIVQE 0 0 0 0 +3664 PF00973 Paramyxo_ncap Paramyx_ncap; Paramyxovirus nucleocapsid protein Finn RD, Bateman A anon Pfam-B_158 (release 3.0) Family The nucleocapsid protein is referred to as NP. NP is is the major structural component of the nucleocapsid. The protein is approx. 58 kDa. 2600 NP molecules go to tightly encapsidate the RNA. NP interacts with several other viral encoded proteins, all of which are involved in controlling replication. {NP-NP, NP-P, NP-(PL), and NP-V}[1,2,3]. 21.40 21.40 23.40 23.20 20.60 21.30 hmmbuild -o /dev/null HMM SEED 524 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.76 0.70 -5.80 9 5874 2012-10-01 19:59:50 2003-04-07 12:59:11 14 1 449 4 0 2227 0 180.70 74 97.00 CHANGED MAoLLKSLALFKRNKDKPPLAuGSGGAIRGIKHVIIVPIPGDSSIsTRSRLLDRLVRhlGDPDISGPKLTGsLISILSLFVESPGQLIQRITDDPDlSI+LVEVlQS-pSQSGLTFASRGssMDDEADcYFoh--Psuu-ppphtWFEN+EI.DIEVQDPEtFNMlLAoILAQIWILLAKAVTAPDTAADSELRRWlKYTQQRRVlGEFRL-KtWLDsVRNRIAEDLSLRRFMVALILDIKRTPGNKPRIAEMICDIDTYIVEAGLASFILTIKFGIETMYPALGLHEFAGELSTIESLMNLYQQMGEsAPYMVILENSIQNKFSAGuYPLLWSYAMGVGVELENSMGGLNFGRSYFDPAYFRLGQEMVRRSAGKVSSsLAuELGITtEEA+LVSEIAupTs-DRssRuoGPKQuQVSFL+sDpu-stp.psut+--t+shQs+tctppu.+ss+hscsoDppsso.sscThlDlDpspEuspDP.ss++SAEALh+hpAMApILccsshssDoshsYND+DLL ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................KVSSTLASELG.ITA.E.D.A....R....LV.S.EI...A....M.H.T..T...ED.R.hSR.A..VGPRQ........AQ..VS.....FL..H..G..DQSEN..EL.P..GLG.G.KE...D...RRV..KQ.SRGEA......RESaRETGs.S.R.ASDARAAHLPTuTPLDIDTASE.uQDPQDSRRSADALLRLQAMAGILEEQGSDTDTPRVYNDRDLL................................................................................................................................. 0 0 0 0 +3665 PF02725 Paramyxo_NS_C Non-structural protein C Bashton M, Bateman A anon Pfam-B_1636 (release 5.5) Family This family consists of the polymerase accessory protein C from members of the paramyxoviridae. 20.60 20.60 20.70 21.80 19.10 20.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.82 0.71 -4.62 6 123 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 31 0 0 111 0 164.30 72 90.13 CHANGED pPSc.l..shhph++shpuGp+PssppctpcssssR.+coLRISsNHApQQhDQs+oAphhphIRDLE+ulssLh+hsss.cpspcpsLpYsVIMFMITAVKRLRESKMLTlSWFpQAL.llssSpEEpcsLppAMhILAplIP+EhL.LTGDLLPuLppp-.LM .........................................................G.LSRPSPSAHWPSRKsWQHGQKYQTTQDRoEPPAcKRRQAVRVSANHASQQLDQLKAVHLASAVRDLERAMTTLKhWESPQEISRHQALGYSVIMFMITAVKRLRESKMLTLSWFNQALMVIAPSpEETMNLKTAMWILANLIPRDMLSLTGDLLPSLWGSGLLM....... 0 0 0 0 +3666 PF01806 Paramyxo_P Paramyxovirinae P phosphoprotein C-terminal region Bashton M, Bateman A, Coggill P anon Pfam-B_1628 (release 4.1), Karlin D Domain The subfamily Paramyxovirinae of the family Paramyxoviridae now contains as main genera the Rubulaviruses, avulaviruses, respiroviruses, Henipavirus-es and morbilliviruses. Protein P is the best characterised, structurally of the replicative complex of N, P and L proteins and consists of two functionally distinct moieties, an N-terminal PNT, and a C-terminal PCT [1]. The P protein is an essential part of the viral RNA polymerase complex formed from the P and L proteins [1]. P protein plays a crucial role in the enzyme by positioning L onto the N/RNA template through an interaction with the C-terminal domain of N. Without P, L is not functional.The C-terminal part of P (PCT) is only functional as an oligomer and forms with L the polymerase complex. PNT is poorly conserved and unstructured in solution while PCT contains the oligomerisation domain (PMD) that folds as a homotetrameric coiled coil (40) containing the L binding region and a C-terminal partially folded domain, PX (residues 474 to 568), identified as the nucleocapsid binding site. Interestingly, PX is also expressed as an independent polypeptide in infected cells. PX has a C-subdomain (residues 516 to 568) that consists of three {alpha}-helices arranged in an antiparallel triple-helical bundle linked to an unfolded flexible N-subdomain (residues 474 to 515). 25.00 25.00 164.00 163.80 19.90 19.00 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.64 0.70 -5.12 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 20 2 0 49 0 240.10 59 42.99 CHANGED EESToSs-EMATLLsSLGVIQSApEFELSRDASaVFA+RsLKSANYAEMTFNLCGLlISVEKSp-sKV-EN+sLLKQIQE-lcShRDlHKRFSEYQKEQNSLlMSNLSTLHIITDRGGKTDsP-soTRSPSVFTKuKENKlKKTRFDPSMETLGspKaKPDLIREDEhRDEI+NPVhpEpNs-scASNASRLlPS+EKsTMHSL+LVIENSPLSRsEKpAYIKSLpKCKTDQEVKsVMELFEEDI-SL ..tESsp.hcchhTLLpsLGVIQSspch-..pDtphVhstpsLpsAshAphh..lsGLllusphspssKlsp.pp.lhplppslcphc-.a+Rh.E.QKEQ.SLlhS.lSsL+IhT-RGGKpDps-ossRos.lhsKsKEpKhKtTRFDP.MET.G..K.hPDLhRcsEhpsE.cs.VhpEhsopsctSNAoRLlPp+ppsTM+SLhlVIpsSsLSpupKtuYIppLp+CKoDpEVpplM-hhpEDlpS... 0 0 0 0 +3667 PF01279 Parathyroid Parathyroid hormone family Finn RD, Bateman A anon Prosite Family \N 19.70 19.70 20.90 20.00 19.40 19.30 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.49 0.72 -3.37 11 168 2009-09-12 04:59:12 2003-04-07 12:59:11 12 2 66 21 66 160 0 89.80 41 64.87 CHANGED pKRSVSctQLMHD+G+sLp-hcRRhWLQcLLc-VHTAphht................................shstsu.p.KPsssTKNlP.tatL.-pEup..sLsQETpKs.saK-....p.hcs.shKKKsKs ........KRuVSEhQLMHD+GKslQshcRphWL+chlp-lHTAphc.................................sss..csussp+Pt..tppcNh................-sc...t+.....Lspt.sKs............................................................................... 0 3 8 27 +3668 PF02195 ParBc ParB-like nuclease domain SMART anon Alignment kindly provided by SMART Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.10 0.72 -3.88 51 10329 2012-10-01 20:12:50 2003-04-07 12:59:11 13 78 4621 22 2371 7819 3243 91.10 27 28.42 CHANGED hplslsplp....sppps+........tpplccLhpoIcp..................pGhh.pPllVcppt.....shapllsGcRRhcAsphhG......hpcl.....ssllhphs........cppththslh-Nh ..........................................................................h..lslsplt.....ss.h.pPRph...........spppl.p-.L.s.pSIcp......................................pG.ll...p.....Pl..lVcptt...........................stYpl..l..s.G.c.R..R.h.+......A.s.p.h.hu.........................hppl.......P..s..l.l.t.phs........cpph.h.t.h.slhcN............................................................ 0 734 1530 2000 +3669 PF00644 PARP Poly(ADP-ribose) polymerase catalytic domain Bateman A, Griffiths-Jones SR anon Bateman A Family Poly(ADP-ribose) polymerase catalyses the covalent attachment of ADP-ribose units from NAD+ to itself and to a limited number of other DNA binding proteins, which decreases their affinity for DNA. Poly(ADP-ribose) polymerase is a regulatory component induced by DNA damage. The carboxyl-terminal region is the most highly conserved region of the protein. Experiments have shown that a carboxyl 40 kDa fragment is still catalytically active [2]. 23.90 23.90 23.90 24.00 23.60 23.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.32 0.70 -4.90 28 1951 2012-10-01 23:25:29 2003-04-07 12:59:11 15 267 268 92 1317 1843 56 176.60 20 23.35 CHANGED Lps.plpsLcpsScEaphlppYhpsTtsss+t.....shplhclF+lpRpuEtccFpttcp.......hcN..............+hLLWHGSRloNasuILspGL+ls.spuPlsGhh.....FGKGlYFADhsScSA..pYshs..sts.......ss.....suh......hlLu-VALG-.h.clhtsp..h..pp...shpSstGhGcstP......tt......ps.....lPtucs.hssthpsst...l..sEYlVYcssQl+h+YLlclph ............................................................................................................h..tp.pat.............l.p.h.p....s.h.t......................thp...l......p....l....c.....l....p...p.......t.........pp..apt.tpp..........................................hs.................................cphLaHG.o...p.h....p.........s..h.................t..u..Il...p...p..Gh....................s..............h..s....s...h.p.....G..t..h............................aGp..G.l.YF..A..s..p...s..o..h.Ss...........p..Ysts......ts................................ps.......th........................................hh..lsc.VhlGp..................................................hh.u....t.............................................................................................................t.alla.p....t...ph..tall.h..h................................................................................................................. 0 534 710 997 +3670 PF02877 PARP_reg Poly(ADP-ribose) polymerase, regulatory domain Bateman A, Griffiths-Jones SR anon Bateman A Domain Poly(ADP-ribose) polymerase catalyses the covalent attachment of ADP-ribose units from NAD+ to itself and to a limited number of other DNA binding proteins, which decreases their affinity for DNA. Poly(ADP-ribose) polymerase is a regulatory component induced by DNA damage. The carboxyl-terminal region is the most highly conserved region of the protein. Experiments have shown that a carboxyl 40 kDa fragment is still catalytically active [2]. 26.80 26.80 26.90 27.00 25.50 26.70 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.57 0.71 -4.49 35 515 2009-01-15 18:05:59 2003-04-07 12:59:11 9 63 207 33 360 542 8 135.40 33 17.01 CHANGED cScLstpVQ-LlpLIFDhctMppsMtEhpaDscKhPLGKLSpcpIppGYplL+clpchlp...............tssppstl.c......LSNcFYThIPHsFG...hp+PP..llcotchlKpKlchLEuLp-I-lAspllcsspssc....hssLDppYc ..........................................................spLs.tlpcLlphIash..ch...h...c..psM.h...c.h.................p.hD.h.p....K..hPLG.KLS+ppI.ppGapsL...pclpphlp................................................ts.s.pp.ppl.-...........lSscFYTlIP.H.s..F.G....h.pp.PP.....lIss................ctlp........p.K.l.c.hL.......-sLt.DIElA.pLlpsspp..sp.........cPlDtpYp............................................... 0 142 203 291 +3671 PF01358 PARP_regulatory Poly A polymerase regulatory subunit Bateman A anon SCOP Domain \N 24.00 24.00 24.50 25.00 23.70 23.90 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.90 0.70 -5.77 6 111 2012-10-10 17:06:42 2003-04-07 12:59:11 13 2 69 24 18 112 3 256.90 44 70.01 CHANGED huhcKP.hhYFcEIssEh-YcsEstsph..pKhPhQGQLKLLlGELaFLs..pLp++shLsu.sslVYIGSAPGsHIpaLh-aapshsl.IKWhLlDGRsHDspLpuLps....Volls+FVDEcYlpph+pt..hph.+llLISDIRSpRG..pEPoTcDLLpDYuLQN.MlSlLKPlASSLKWRCPFPDQW...I+-FYlPcGpEhLQPFAPsaSAEMRLLSlaousshpLpsloppcuhpYEKKMaYLNphlR.+IllsFDYsNQcYDaFaMa+hL+Tlhhs..KoFsosKsKVlalppSIF+hLsI .........................h.thpcP..hhahp-lstth-Ycscsspp...h.....KKhs..apGQhKLLLuELhFLo......+LpR+..Gh...Ls......u......ssVVYlGSAPGoHIpaL......c.-hF.shs..l+WhLIDsRpHDs.h.LpuLcs......VoLls+Fs.sEp...hl+..pl+cp..hp..tcIlLISDlRStc..s.......ssEP..s.T...tD....L...L.p...sYsLQNhh..lplLpPhAS.LKaRsPFP...DpW.............hccah..lscGschLQsFAPphSuEhRLlslhss.pshp.hppls.p.DshpYE++MaYhNpllR.phl...lsFDYsspp..Y.DaaahahhL....ps.lh.s..p.a..s.ptpll.hppthFp.lt................................................. 0 14 16 18 +3672 PF00740 Parvo_coat Parvovirus coat protein VP2 Bateman A, Finn RD anon Pfam-B_436 (release 2.1) & Pfam-B_445 (release 3.0) Family This protein, together with VP1 forms a capsomer. Both of these proteins are formed from the same transcript using alternative splicing.\ \ As a result, VP1 and VP2 differ only in the N-terminal region of VP1.\ VP2 is involved in packaging the viral DNA. 19.80 19.80 20.00 20.00 18.20 19.70 hmmbuild -o /dev/null HMM SEED 529 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -13.16 0.70 -5.55 24 3412 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 190 158 0 2269 0 301.20 29 80.82 CHANGED G.......suGuGuuGVGsuoGsWcssopapt.....spVospsTRphhLsh.ssc...Ypplp..ssspos.............sssahshsTPWuYhDhNtassaFSPpDWQpLlNshtph+PcuhphcIFNl.lKpVTp....sss.ssshsNsLTuslplhsDssapLPYs.ussppsshs.aPhcsahlsQYuYh.....................sohssssss...p.scc........osFasLEpt.shphLRTGspFph.oYpFp.shPh+psasappshphhhNPlhpphhhthsshsssss.st......hppsptss..httpspNalsG............Pt.....h.ppshpsssusshpsshs....................ts.tthssppsshsst.shspptspssstt......hppphtass.s.puscssp.thspphshscptht.....t..t.p.hh.......hssss.uspos.ph...........sslhshhhhsuulWpscslYhpG.IWsKhPcsDt+h+hp.sthGshshppPPsQlFlKhsPsPss...p..ss.usssShIspYuThphpsclpaclc.+csoppWNP.hQhohss.......s.tshlsas .................................................................................................tt......G...spu.a.tts.h.........lhsp.oR..hh....tc...Y+.....s...................p..hth.TPWthhshNtht.aFsP.-aQhlhpph.tht..thp.pl.tl.l..Kpl.p.....t.................................................................................................................................................................................................................................................................................................................................................................................................................................ss.pss.ph...............ph.h..stl.s..slh.ps.lWsK..phD.p.hhp.s..s.hs.ppsPsplFlKhh...ss.......t..s.....ohls.Yssh.hpsph.acht..h.totpWpP..p.s............................................. 0 0 0 0 +3673 PF01057 Parvo_NS1 Parvovirus non-structural protein NS1 Finn RD, Bateman A anon Pfam-B_400 (release 3.0) Family This family also contains the NS2 protein. Parvoviruses encode two non-structural proteins, NS1 and NS2. The mRNA for NS2 contains the coding sequence for the first 87 amino acids of NS1, then by an alternative splicing mechanism mRNA from a different reading frame, encoding the last 78 amino acids, makes up the full length of the NS2 mRNA [2]. NS1, is the major non-structural protein. It is essential for DNA replication. It is an 83-kDa nuclear phosphoprotein. It has DNA helicase and ATPase activity [1]. 20.00 18.00 20.00 18.20 19.90 17.90 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.45 0.70 -5.53 24 1228 2012-10-05 12:31:08 2003-04-07 12:59:11 12 11 210 5 11 1432 7 174.20 34 51.93 CHANGED p......................spspcca..............hsLlchLl........ccGlsoEcpWhths..ppYhphpssssutpplcsuLphsppchssstsshcalsptsss............hshppN+lhplhphpGYsPhhsGphlhsWhs+phGKRNTlWha.........................GPusTGKoplAp...................AIApslPhaGsV..NWsNcNFPFsDsss+hll.WW-EGhhpsphVEssKulLGGpsl+VD.....pKs+sosplpsTPVllTSNs-hshV.hsGsssohtHtps............Lc-RMhphphscplsss....aGhlopp-l+pah.pWupp.........thphshphh .....................................................................................................................................................................................................................................................................................................................................................................................................hh...........................hlst....h.......h.G..hl....Nh..N.....s..FsF..s...D...st...t.+...ll.haE.E..s..h..h..p...p..s...hV...E..s..uKsILG.Gp.tsRlD......Kt+sS.h..l..sPVlI.o..oNtD..l....h.l.ssGssso..Htts............lppRhh.hph......h..s.....hs.h.....h..hh.........................h................................................................. 1 9 9 10 +3674 PF00989 PAS PAS fold Bateman A anon Sequences from SMART alignment Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.22 0.71 -10.01 0.71 -4.29 49 15580 2012-10-04 01:10:46 2003-04-07 12:59:11 19 1711 3946 104 4551 43707 3363 102.20 15 16.49 CHANGED pchptllcs...hssslhshD..tsGtlhhhNsshpplhGhst..pchhGpslhphl.ttp.......hhptltphhtstpptpshphphth..........puphhhhplpssshhstssp...shhshhpDl .............................................t..hptllps.........hs..s..s..l....l...s....l......-.......pp.....G.......p..........l..........h...h....h..Ns.s....s.p....p....l......h.....G.....h.......st.......p..-........h..........h.......G.......p.......s.........l....h....p.....h.......l.............................................hh...p..h..h....t.......p......h.....h..........t......t......p......................t...........p...h...h.....................................t.......h.....h.........hp..h........h.......................................................................................................................................... 0 1473 2845 3740 +3675 PF03793 PASTA PASTA domain Yeats C anon Yeats C Domain This domain is found at the C termini of several Penicillin-binding proteins and bacterial serine/threonine kinases [1]. It binds the beta-lactam stem, which implicates it in sensing D-alanyl-D-alanine - the PBP transpeptidase substrate. It is a small globular fold consisting of 3 beta-sheets and an alpha-helix. The name PASTA is derived from PBP and Serine/Threonine kinase Associated domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.77 0.72 -4.32 237 11616 2009-01-15 18:05:59 2003-04-07 12:59:11 14 55 2312 34 2319 8832 1936 62.10 23 21.30 CHANGED splP.slhG....hshpcApp.hLpp.......tGl........................ssshspGp...VlpQsPss.....Gspl.pp..us.plplhlup ..................tlP..s.l.hG............hshppApp..tLpp.........tGl...........ht..t................ssphs.pGp.......V........l.....p.......Q.....s.P.s.s.........Gsp..l..pp....ss..plplhlu.................................. 0 910 1702 2086 +3676 PF00292 PAX 'Paired box' domain Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.47 0.71 -4.42 6 1629 2012-10-04 14:01:12 2003-04-07 12:59:11 13 23 304 7 623 1527 5 115.90 67 31.76 CHANGED GpGcVNQLGGVFlNGRPLPNpIRp+IVEhAppGlRPCsISRQLRVSHGCVSKILsRYpETGSIRPGsIGGSKP+.VsTP-V.s+IcEYK+psPuIFuWEIRD+LLp-GVCDppslPSVSSISRlLR .......................................................putVNQLGGVFVNGRPLPs..sl..Rp+.IVE.LA.+.p.G.l.R.PC.DI..SR.Q...........L................+..............V...............SH............GCVSKILu........R........Y.hETG.S..I..+.P..G.s.I...G.G..S.K.P..............+........V.............A...TP.....c..........VVp+Itp.YKRc..sP..shF....AW..EI...RDRLLs..............-Gl...CspsslP.....S........V......SSIsRllR........................................................ 0 161 211 439 +3677 PF03535 Paxillin Paxillin family Griffiths-Jones SR anon PRINTS Family \N 19.20 19.20 19.30 19.50 19.00 19.10 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.48 0.71 -4.50 6 135 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 44 6 60 102 1 163.20 41 33.41 CHANGED ssPP.V.sPPSu-sLNGo......pWs.usppatsppP..ppsPhh.sStsKs..........SSsssssuEEEHVY........................SFPNKQKSuEsSsssMoSSLGSNLSELDRLLLELNAVQcSsP.uFPu-Ett.uPPLPuusss+Y.l.ENusSsssKsuPPspEKPKRN.uG+slEDVRPSVESLLDELESSVPSPVPsspsup.u-hsuPQcsssSQQ ..........................................................................................................................................tp.s..u.s...s..s..s..t.s...u-c-HlY........................Sh...PsKp...+S...u-Puss.s...h...SSS...LGoNLSELDRLLLELNAVQaNss.....u.........Fss..p...............-t......t...usshPss............Epsss.shpsss....c+.P.ts..sphh--hRPoVEoLLspLEsuVPs..........ss........s...p..t.p........................................................................................ 0 4 9 28 +3678 PF03717 PBP_dimer Penicillin-binding Protein dimerisation domain Yeats C anon Yeats C Domain This domain is found at the N terminus of Class B High Molecular Weight Penicillin-Binding Proteins. Its function has not been precisely defined, but is strongly implicated in PBP polymerisation. The domain forms a largely disordered 'sugar tongs' structure. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.03 0.71 -4.21 165 11499 2009-09-13 02:10:02 2003-04-07 12:59:11 10 19 4376 46 1966 8801 4788 176.10 24 27.45 CHANGED hl.stRGpIh........DRs......Gph.LAsstsshs.......lhhsspphpptpt......................................................hpcLuplL.t....hs....pclpp...........................................................................................................................................tttpshp.lhltc............................pls.....cphsplpphthp.....................Glthp.sp..pRhYPp..us...hsupllGalst.......................................................t.thG....psGlEpta-phLpGpsGtpp.hps..cstGp.hltphp ...................................................................................................................................................................................................stRGpIh...............DRs......Gps...LA.tstsshs...........l..hh..s.....p..p.ht.p.t.t.p..h..............................................................hppLupll...p......hst.........pplpc........................................................................................................................................................................................................................................................................................................................................................................................t...pt..t.p..p.h..p..hh..hltp...................................................pl.s.....pp.h....t...t....lpp..hthp...........................................Gl.php...sp...pRhY..P..t...Gp......hhupllGassht....................................................................t.ttthG...ppGlEpta.-...........c......h...........Lp....G.p.s.Ghpp..hph....Dph.Gphl...tt........................................................................................................................................................ 0 675 1302 1665 +3679 PF01395 PBP_GOBP PBP/GOBP family Bateman A anon Pfam-B_1765 (release 3.0) Domain The olfactory receptors of terrestrial animals exist in an aqueous environment, yet detect odorants that are primarily hydrophobic. The aqueous solubility of hydrophobic odorants is thought to be greatly enhanced via odorant binding proteins which exist in the extracellular fluid surrounding the odorant receptors [1]. This family is composed of pheromone binding proteins (PBP), which are male-specific and associate with pheromone-sensitive neurons and general-odorant binding proteins (GOBP). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.75 0.71 -4.49 158 2289 2009-01-15 18:05:59 2003-04-07 12:59:11 17 9 218 135 802 2480 0 116.50 15 75.34 CHANGED hhhhh..ht.........hhtphtpph.........pphhppChpc...h.slsp.ctlpphpptphsss.........pphcCahpClhp.p..hshhs....psupl..p..hctlhphhtthhtt........pphpph.....lppCt.......stssss...CcpAap.hhp....Chhpppt ........................................................hhh.....................htthhptChpp...............tlst...p.t...h.p.p.h.t.p.h..p..h.ss..............pphpChhp.Chhp.c..hs.l.hs.........ps.u.p.h......p...hc..p..h.hp.h.htthhtt..........pphtph......hppCt..............ttptps.......tC...p...t.uhp.hhp....Chhp...h................................................... 0 184 287 686 +3680 PF00427 PBS_linker_poly Phycobilisome Linker polypeptide Finn RD anon Pfam-B_159 (release 1.0) Family \N 24.60 24.60 24.90 30.90 22.60 24.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.86 0.71 -4.48 152 614 2009-01-15 18:05:59 2003-04-07 12:59:11 16 8 108 15 206 625 257 129.90 39 46.64 CHANGED hpsssspschp..plIpAsYR...QlaspphshpspR..hsslE.SpL+sGpIoVR-FlRuLupS-hY+ppFapsssshRhlELsa+HlLGRuPhsppEhtta.plluspGhpuhIDuhl-SsEYtpsFG-csVPY.Rs ...............h....hopsphptlIcAuYR...QVhu..pp..h...h.hp..spR...hsshE.SpLcsGpIoVR-FlRuLApS-hY+cpFapsssshRhlELsa+HlLGRuPhsppEhttahplhus...........pGapAhlDuhlDStEYtcsFG-ssVPY.R............. 0 28 118 181 +3681 PF03792 PBC PBX; PBC domain Finn RD, Mistry J, Burglin T anon Pfam-B_3021 (release 7.0) Family The PBC domain is a member of the TALE (three-amino-acid loop extension) superclass of homeodomain proteins [1][2]. 25.00 25.00 26.00 27.40 20.10 23.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.37 0.71 -4.61 8 415 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 103 0 176 302 0 161.20 75 49.67 CHANGED Shu.....phtpslu-lLQQlhsITDQSLD.EA.QARKHuLNsHRMKsALFsVLCEIKEKTsLShRss.......E--PPDPQllRLDsMLlAEGVuGP.........-cuGstuAsuuusss......ssohEHuDYRAKLuQIRpIYHuELEKY-pACsEFTsHVhNLLREQSRoRPISs+EIERMVsIIpRKFsuIQhQLKQSTCEAVMILRSRFLD .......................t.........p.hptlhtI.s-psLD.-s..pt+..KasLss.HRMKPALFs.VL.CEIKEKTs.LSIRusQ......EE-PsD..PQLMRLDNMLLAEGVuGP.........EKGGGuA....AAAA..AAAAoG......sssDNSlEHSDYRAKLuQIRQIYHoELEKYE...........QACNEFTTHVMNLLR.EQSRTRPIoPKEIERMVuIIHRKFSoIQMQ....LKQSTCEAVMILRSRFLD.......................... 0 32 51 106 +3682 PF02229 PC4 Transcriptional Coactivator p15 (PC4) Bateman A, Mian N anon Pfam-B_6534 (release 5.2) Domain p15 has a bipartite structure composed of an amino-terminal regulatory domain and a carboxy-terminal cryptic DNA-binding domain [1]. The DNA-binding activity of the carboxy-terminal is disguised by the amino-terminal p15 domain. Activity is controlled by protein kinases that target the regulatory domain. 20.60 20.60 21.10 20.90 20.00 19.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.70 0.72 -4.74 203 1136 2009-09-14 14:13:59 2003-04-07 12:59:11 11 16 977 22 344 696 66 53.70 41 52.06 CHANGED sh......hph..utp+clslppapGp.shlDIR-aat..c.s.t..hP..spK.GIoLoh-p......hptLtchl ...................u..spp.GapKclshlSaNGt.sKaDIRpWss...D+s.+..........huK....G..ITLosEE......appLhct.h................ 1 134 213 285 +3683 PF01851 PC_rep Proteasome/cyclosome repeat Bateman A anon [1] Repeat \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.40 0.73 -7.87 0.73 -3.42 157 1407 2012-10-11 20:00:59 2003-04-07 12:59:11 17 15 352 4 875 1693 10 34.70 28 7.63 CHANGED uAshulGllttG.sssp...tshphLp..ph.h..sspsshtt ...uAhhuLGLlhtG.ossp..pllphLhsh.hp.pspp.h............ 0 256 433 695 +3684 PF01135 PCMT Protein-L-isoaspartate(D-aspartate) O-methyltransferase (PCMT) Finn RD, Bateman A anon Prosite Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.23 0.70 -4.80 9 3573 2012-10-10 17:06:42 2003-04-07 12:59:11 14 23 2359 22 1324 13744 3799 199.20 32 79.48 CHANGED tp.pptLlcpL+ppGhlto-+VhcAMtsl-RpcFlsc.....tsYhDsPhsIGas.....sTISAPHMhAhhhEhLc..LpsGh+lL-lGoGSGYhTAshAphVGppG.....hsluIE+I.cLstpuccNlcp.shp......sVhlhhGDGphGasthAPYDAIaVuAAuPclPpsLlcQLcpGGRLllPVG...stpQhlphh-Kps.Gpl.h+shtsVhaVPLsspct .....................................................................................p...thl.p.p.h..h..t...t.s...l.p...s.tp..VlpAht......t..l...P..Rc.h.Fl...............s..........t..........s........h...t...........p.....t......A.....Y......p.......s......t......s....l...P..I..u..t.u............................po.I...S..p..P.....h...h.....l......A.....+...M...h...c.....h....L.p..............l..p......s.........s....s.........+....V..L.EIGo.G.S.G.Yps..A....l....L...A...c...l....s........t.....c....s...h..........................ol.E......+.h.p........s.....L.t....t..p.A...+.....p.....p.....L.....c....p....h...s.h.p.......................................N.l.p...h.....t.....h.....G......D........G.....h.......p.........G......a.......s.....s........p.........A.........P...........a.....D......s......I.......l......V........s.......A......A........s.........s..........p..........l........P.........p........s..........L..........h........p........Q........L....p...............GG..h.....L.......V...h.......P...l.........G..........t.........tt............Q.....h.l.h.h.l.p..+.....p..s.............s...ph.....p..l..s..haVPlh....t......................................................................................................... 0 387 810 1083 +3685 PF00705 PCNA_N PCNA; Proliferating cell nuclear antigen, N-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_598 (release 2.1) Domain N-terminal and C-terminal domains of PCNA are topologically identical. Three PCNA molecules are tightly associated to form a closed ring encircling duplex DNA. 20.30 20.30 20.60 21.70 20.20 19.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.51 0.71 -4.62 13 917 2012-10-02 11:47:48 2003-04-07 12:59:11 13 9 610 146 433 892 363 118.30 32 46.62 CHANGED MhEARLlQGSlLKKVLEulKDLls-AsFDCSuoGlsLQAMDSSHVuLVuLpLRSEGF-cYRCDRNluMGhNLsSMuKlLKCAGN-DIlTl+A-DsuDTlshlFEsssp-+ls..DaEMKLMDlDsEa .........................hEsch.pu.slLK+ll-ul..+..-L.lp-..ssa...........-.ss.p.sGlp....lQuMDsSHVuLV..sLhL.pscuFp.c..Y..+..C..D...+.........s.h.sl.GlNls.shsKlLKpus..sc.D..hlp.l..c.t...........c.-.s.s......-..s.l..s..h....h..h..Es.p..pc...+...hs..phchh.hp.............................................. 2 146 247 355 +3686 PF02747 PCNA_C Proliferating cell nuclear antigen, C-terminal domain Bateman A, Griffiths-Jones SR anon Pfam-B_598 (release 2.1) Domain N-terminal and C-terminal domains of PCNA are topologically identical. Three PCNA molecules are tightly associated to form a closed ring encircling duplex DNA. 20.60 20.60 20.60 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.51 0.71 -4.22 12 906 2012-10-02 11:47:48 2003-04-07 12:59:11 10 9 585 135 428 908 335 118.50 31 47.57 CHANGED GIPEpEYsslV+MPSuEFARIC+DLSpIGDsVVISsoK-GVKFSssGDlGsuNIhhpQsosVDK.E-AshIEMsEPVsLTFALRYLNtFTKATPLSspVTlShSu-lPlVVEYKIA-MGal+aYLAPK .................................................................................IP-...cY..sshlphsSsEFt+Is.+DLpp.h.u..-.....s......l.h..I..ps..s..K.-...u.l..pF..s..sp.G..-.......hG.s.uslhl...p....p.........p......h...-..c.......-.p.......t.....l...plph...p.-.s.Vs...h.oF..ul+YL...ss.F...s.K.A.s.s.L.u.s.pVplph.us.-..h.P.lhlcY.p.......l......t.......s.....h.....G.h.l.paaLAP......................... 0 155 249 354 +3687 PF02429 PCP Peridinin-chlorophyll A binding protein Bateman A anon Pfam-B_2945 (release 5.4) Domain Peridinin-chlorophyll-protein, a water-soluble light-harvesting complex that has a blue-green absorbing carotenoid as its main pigment, is present in most photosynthetic dinoflagellates. These proteins are composed of two similar repeated domains. These domains constitute a scaffold with pseudo-twofold symmetry surrounding a hydrophobic cavity filled by two lipid, eight peridinin, and two chlorophyll a molecules [1]. 20.10 20.10 22.20 21.40 18.80 18.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.90 0.71 -4.08 9 449 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 30 13 0 428 1 140.30 71 79.66 CHANGED Dp.IGcAAKpLSEASYPFlK-IDWhSD....lYlKsLPut.ss.pshcAIDKMIhMGAshDsshLKAAApAHH+AI.GSID.ApGVTShADYsAVNAALGRhlASVPKupsMDVYNuhAsh.hssslsstMFupVNshDApAAhKAFhsFKDVV ......................DcIGsAAKtLuDASYsFhK-lDW.ss....laLp.hPGp.ps.csLKAIDKMI.MGAthDspLLKtAA-AHHKAI.GSIs.spGVTShADa-AVNAALGRlVASVPKQpVMDVYsuhtcI.sDPpVss.MhShVNshDA.pAhpGFhpFKDVV........ 0 0 0 0 +3688 PF01884 PcrB PcrB family Enright A, Ouzounis C, Bateman A anon Enright A Family This family contains proteins that are related to PcrB Swiss:Q53726. The function of these proteins is unknown. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.59 0.70 -5.11 5 644 2012-10-03 05:58:16 2003-04-07 12:59:11 12 1 623 16 186 1485 1169 227.50 42 96.70 CHANGED a-llEc+t.lHlTLLDP--ssPEEtlEll...t-uGTDAIMlGGSs..tuVsLDNslRtI+Kl..hsLPIILFPGsssGlSRYADAlFaMSLLNSsNsaWIlG.......ApsLGAtTlpKhs..lEslPMGYlVlEPuss..VGaVG-A+.lPpNKPcIAAhYsluucaLGMRlhYLEAGSGAstsVsEEslclsKsLscssLIVGGGI+SuEpA+chlcuGADlIVTGNllEEssp.lEctl+sltcst .................................................................................................................................................................................phppW+...HlFKLDP.s...K....p.l....s....D...-.....s...L...-tl...........s.SsTDAlh..lGG..oD.....sVT......DNV...l.+..l..hscl+................caslP..lVLE....lSsl.-u.l...h....P....G....hDh...YalPoVLNSp.cstah.sG.............................hphEAl.K..paG.chhs.....a-Ellh...EGYlVl....Ns-..uK.........VAp.lTcAp.ss.l.s.p-Dlt.AY.Aph.Asc...........hh+l..P..lh.Y.lE.Y.....S..G....s..Y..G...D...l......p...t...V...p...s..l...s...p...p...L.....s.......c.......s....p.......L.....a...Y..G.GGI.pstcpApEMAph..ADTI.VVG....s....l....I....Yc....D.l......cpA.LcTV...ch.......................... 0 58 123 158 +3689 PF04194 PDCD2_C Programmed cell death protein 2, C-terminal putative domain Wood V, Finn RD anon Pfam-B_19053 (release 7.3); Domain \N 30.00 30.00 30.90 30.40 29.60 29.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.31 0.71 -4.65 24 515 2009-01-15 18:05:59 2003-04-07 12:59:11 8 14 299 0 347 505 5 170.90 25 44.09 CHANGED P.......a.p.a.lss-.Eshp...........shsp.spptplssh-p.tstts...sup....-tc-hhEt......stD+sFp+FpcRlupsPEQlLRY............pasGp..PLhhSpsssssch..........................lPpCs..CGupRlFEhQlhPphlshLcsc...p.s.........uh-WGTlllhsCs+sC.......stsGYh.EEashVQ.-. ..............................................................................................h......h.l.ht.E..................t....p...p..p..tp....h.....t..pp..t.tt..ttt........tt.....................tpp-thEt.........ptD.cs.Fp+Fpp+lu...t.s...P...cQlLRY....................shs..Gp......P.L.hh.osp..s....s.t.p.t............................................lPpCs..CG.upRhFEhQl...hPpllshLchs.....p.s.................................................uh-WG....TllVaTCt..psC...................tst.sah..cEalhlQ........................................ 0 120 191 285 +3690 PF04868 PDE6_gamma Retinal cGMP phosphodiesterase, gamma subunit Mifsud W anon Pfam-B_4858 (release 7.6) Family Retinal rod and cone cGMP phosphodiesterases function as the effector enzymes in the vertebrate visual transduction cascade. This family represents the inhibitory gamma subunit [1], which is also expressed outside retinal tissues and has been shown to interact with the G-protein-coupled receptor kinase 2 signalling system to regulate the epidermal growth factor- and thrombin-dependent stimulation of p42/p44 mitogen-activated protein kinase in human embryonic kidney 293 cells [2]. 25.00 25.00 67.70 67.60 20.70 18.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.88 0.72 -4.13 5 124 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 56 4 53 95 0 77.10 77 91.72 CHANGED PPcuT..opusPsAosGPTTP+KGPPKFKQRQTRQFKSKPPKKGVpGFGDDIPGMEGLGTDITVICPWEAFSHLELHELAQYGII .............stsphhusstsstGPsTPRKGPPKFKQRQTRQFKSKPPKKGVpGFGDDIPGMEGLGTDITVICPWEAFsHLELHELAQYGII...................... 0 2 6 23 +3691 PF00233 PDEase_I PDEase; 3'5'-cyclic nucleotide phosphodiesterase Finn RD anon Prosite Domain \N 24.80 24.80 24.90 24.80 24.40 24.70 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.49 0.70 -4.61 8 2707 2012-10-01 20:28:14 2003-04-07 12:59:11 14 49 307 332 1579 2650 61 218.10 32 32.64 CHANGED YHNhhHAhDVoQosahLLtssulcphho-lElLAhlhAAhhHDlDHsGToNpFplpo..cS-LAlLYssc.SVLENHHluhuh+LLQsEphsIFpNLo++capplhchlh-hlLATDMStHhphhccl+shlpptch............h.hs...c+hpllsLll+AADLSssTKsaplp+RWsthlhtEFFpQGDhEpphGhc..pPMCDRcsA.hlspsQlGFIDaIscPlaplLsDlscc..spslh-tl-sN+ .................................................................YHN....hHAhsVsps......h.............ah.h....l...............................p.................s..........t.................l..................t..............t..h..............h..........s..s..........l....E.h.............h.A.hlhAAhhHDl..D.HsGhsNs....Fhlp.o..................ps..L...A..h..l..Ys......cp....SV.LE..........s..HHh.uhu.h.p.l.L...........p..........p..p..p......h..sI..h..........p.....s..........L...s...............p..c.............p......h............p............p..h.........c..p.........h........l...hphlLAT.D.h..........u.......p..H...h..p..h......l...s..p...h....p....p....h.l.ppt.ph........................................................h..p.........c+h.hl...h.p....h...h.l+.s..uDlSss.................s..+......shc.lpp............p...W...sp...............tlhpEF...ap.Q.................G.D.........hE+..p.h.........s............h....h.............s......P...h...h....D...R............p..............p...s...p................l...s.......p....Q..l.u.FIca.l.s.......hPha..p.t.h..sp.h..h.................................................................................................................................................................... 0 624 791 1167 +3692 PF02112 PDEase_II cAMP phosphodiesterases class-II Mian N, Bateman A anon IPR000396 Family \N 23.70 23.70 23.70 23.70 23.20 23.60 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.20 0.70 -5.52 5 323 2012-10-02 15:46:01 2003-04-07 12:59:11 10 6 249 0 179 390 31 254.00 25 76.11 CHANGED Fss..........ssLGQsGGlp-GstouaLlcccusssFlpLDuGollpuls.cLslSKahusshsITlPp.s...h-sushtKsoahlcs+IpsYaIoHuHLDHVuGLVINSPshh.t......p...........................sKKTIaGLsaTIcsLpKHlFNsplWPNLsutGph.h............h.plshh-LsPuEasslTtTThSlls.........................FPlsHuuulhp...hh.....STuFLF+DslS......s-sIlsFGDsEsDpssup.ShptcIWuslAshItpsKLKuIlIECSsPp-oPDspLFGHLoP+aLlpELspLp.......ohssSou.sLssLN...VIloHlKsslAcss......NPccsILtpLcpLsEtssLG.VsIpIsppG ..........................................................................................................ht..ttth.t..h....t.................................hhth-ttsh..h.............................................................................................................h...tshhhhp.ltsahIoHsHLDHlu..G..h..l..l.s.o.st...............................................p..K....l..hu.stTl.pslppthFN.hhWPN...h...ss..st.h.........................................................................................................................................................................h.hpp...s.....................................Sohhhlppp...............................p.hhhFGDsts............Dp....hp....h............p.....t.pplWp..hA.hl.........t..tpL+ulhlEsSa..s.s.s..s.p.p.L.a.G.HLsPphlhtELp.Lt..............................................phs.....t...t........h.....sh......lhl.H.hK...................................................h...t............t..h.h.................................................................................................................. 0 41 97 144 +3693 PF00341 PDGF PDGF/VEGF domain Finn RD, Bateman A anon Prosite Domain \N 20.80 20.80 22.80 21.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.53 0.72 -3.87 57 855 2012-10-02 16:54:34 2003-04-07 12:59:11 12 13 154 99 329 797 0 80.20 36 33.29 CHANGED CpPR-slV-lhpEasspsst..hap..PsC.VslhRCu...GCCs.c.EulpCsPopspslohplhcl............ttt.spphhhlshtpHspCcC ......................CpsRphlV...-l...hp.Eh.....ss.p..s.st...hFh..PsC.V.lh...RCu...G...CC..s...s...culpChPo..p.spplshplh+l..............................t..hp.p.phsplshtpHppCcC....................................... 0 40 65 165 +3694 PF04692 PDGF_N Platelet-derived growth factor, N terminal region Kerrison ND anon DOMO:DM04730; Family This family consists of the amino terminal regions of platelet-derived growth factor (PDGF, Pfam:PF00341) A and B chains. 24.90 24.90 24.90 24.90 24.60 24.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.73 0.72 -3.48 10 162 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 44 9 51 136 0 73.90 43 34.82 CHANGED EEssIPcELIERLu+SEI+SISDLQRLLEIDSV................usEDssppcl+ppps+sspH.....h.-h..+sl.SRRKRS......lEEAVPAl ..........................pts.IPcELhEhLucSpI+SIcDLQRLL......c..hDS.V.................tEDsh-tsLph.ppsHss.pc...............p....Rt+RS..........hE.Al.A....................................... 0 2 6 18 +3695 PF00800 PDT Prephenate dehydratase Bateman A anon Pfam-B_1095 (release 2.1) Family This protein is involved in Phenylalanine biosynthesis. This protein catalyses the decarboxylation of prephenate to phenylpyruvate. 20.60 20.60 20.60 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -11.10 0.71 -4.74 134 4375 2009-01-15 18:05:59 2003-04-07 12:59:11 13 27 4002 7 1176 3130 2645 177.50 33 54.29 CHANGED luaLGPpGoaScpAAh....phh.......stssphlshsohp...............clhcuVppsc...s.-hullPlENSh..pGsVstohDhL..hp.t..s..................lpIhuEhh...l.lcHsLhstsss............................ph..pc.Icp........lhSHPQAluQCppaLppp....sphp..hhsssSTAtAAch.....t....................pppts.....sAAIuuphAAchY.uL.plltc.sIpDpt.sNhTRFlllucpts ....................................lAaLGPcGoaochAsp..........................phh.........................tph..ph...h..s..hs.s.hp......................................-lhp.sVc...sGp............s.-auVl..P..lE...N..oh....pGu.lspshDhL..hc..s.....s.................................lpIsGE.hh...lsIcas.Lls.t.sss.........................................................ph...pp..Ic..p............la..SHP..............Qu...........luQCppaLppp.........s..p....hc...h.sssSTAtAAch........l..t...........................t.p.s.s..........sAAIus.c.t.uA.p..hY.....GL.p...........lltc..sI..........p-.p.p.p.NhTRFlllucp.s................................................................................................... 0 367 767 1005 +3696 PF03740 PdxJ Pyridoxal phosphate biosynthesis protein PdxJ Bateman A anon COG0854 Family Members of this family belong to the PdxJ family that catalyses the condensation of 1-deoxy-d-xylulose-5-phosphate (DXP) and 1-amino-3-oxo-4-(phosphohydroxy)propan-2-one to form pyridoxine 5'-phosphate (PNP). This reaction is involved in de novo synthesis of pyridoxine (vitamin B6) and pyridoxal phosphate [1]. 24.50 24.50 25.20 34.70 22.80 24.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.54 0.70 -5.22 9 2100 2012-10-03 05:58:16 2003-04-07 12:59:11 8 4 2052 50 516 1491 2175 235.70 50 96.84 CHANGED hhLGVNIDHlATLRpARssthP-slcAAhlAtp.AGADtITlHLREDRRHIp-pDlhhlpclhpsc.....hNlEhulo-Ehht.......lALcs+PcpVsLVPE+RpElTTEGGLDlstttp+lcshlccLpssGhcVSLFIDss.cpIcAutpsGAshIELHTGtYAsh+s...................cscptcp.h...hp+lppsAthAt-lGLtVsAGHGLsYpNVpslstI.t..ltELNIGHullucAlahGLtpAVtcMtplhtt ...................h.hLGVNIDHlATLRNAR.........G..s....s.........aPDPVpAAhlAEp.AGADGITlHLREDRRHIpDcDVphL+.p.slpT.+.............hNLEMAlT.-.E.M.ls.................IAlch+....P.chsCLVPEKRpElTTEGGLDV..su....p..t..-+lpsssp+Lps.A..GI.cVSLFID..s..D..t..cQ.....IcAAtc....l..GA..s.hIElHTGsYA.c.A..p.s......................cs..c..p..t..pc.........htRltpuAphA...ts..L....G.......LpVNAGHGLsYcNVpsl..........Aulsp..lpELNIGHuIIu+AlhsGLppAVt-MKplh..h............. 0 150 324 427 +3697 PF00595 PDZ PDZ domain (Also known as DHR or GLGF) Bateman A anon [1] Domain PDZ domains are found in diverse signaling proteins. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.67 0.72 -3.80 58 26099 2012-10-02 11:12:46 2003-04-07 12:59:11 19 730 2536 601 11639 33290 6344 80.30 22 16.19 CHANGED plpltt...ttttlGhslssssst..............slhlsplhtG..uuAptss.lchGDpIlpl..NG.....pslpsh.sppcshthlcsssp.....plpLtlt ......................................................................................h.....h...tttshGhsltsspst.........................................................sl.h..l.s...p...l........h....tu.....u.....sA.p.....p..s..........G..L.......p...........s............G...Dp.......I..l.......p.........l.........NG..........................hs.l....p...s....h........s........p..p...c..s....l..p..h..l..+..p..stt.......tlpL.l.h....................................... 1 2855 4028 7376 +3698 PF00544 Pec_lyase_C pec_lyase; Pectate lyase Bateman A anon SCOP Domain This enzyme forms a right handed beta helix structure. Pectate lyase is an enzyme involved in the maceration and soft rotting of plant tissue. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.59 0.71 -4.85 10 1509 2012-10-02 14:50:22 2003-04-07 12:59:11 14 45 449 44 752 1578 23 190.70 27 45.93 CHANGED spslVIsustohD..............uhs.hsh..pphhhG...hGussplsNhGhtlh...psoSNVIl+NlpIcsh.......l..t.t....sssspDGDAIplp.Nus.slWIDHsolSsusapss.hs.........aDGLlDIpcuSssVTISNshFssHcKshLhGH.uDst.upDpG.h+lTlsaNhF.sslspRhP.RsRaGhhHlaNN.Y.......sphppYuhGlussuoILSEuNpF .................................................................................hh...................................................................t.t.......lh.l..s..S.....p..c.Tl...G........hGs..s.s.t...lts...h.....G..ht.lt..............t.s.....s........N.....VIl+..N.lplpsh.....................................................thhs.s..uD..u..lslt.......s.....u.......s.....plWlDHs..s.hs..s.s..........................................tDGl..l....Ds......h..p.....u.o...st.lT..l.S.ps.h....a..............p..............s....+.....s...c...........s..h..L..h..Gp...sc..p......st...D....ps....hp.....lThtaNaa..p..........shsp...........Rh.P...+...h.....R......h....G........h........h...H.......l..hNN.a..............................t..p.h.....h.....a.uh.sss.....t.s.s..p.lhspsNhF.............................................................. 0 220 471 647 +3699 PF05041 Pecanex_C Pecanex protein (C-terminus) Moxon SJ anon Pfam-B_5192 (release 7.7) Family This family consists of C terminal region of the pecanex protein homologues. The pecanex protein is a maternal-effect neurogenic gene found in Drosophila [1]. 19.80 19.80 25.70 20.80 19.00 18.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -5.32 8 305 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 96 0 180 282 2 209.20 49 13.85 CHANGED psVcpDc..DSsLVTLCauLslLuRRuLGTASHs.hSsuLEsFLYGLHsLFKGDFRITs.+DEWVFADhDhL++VVAPAVRMSLKL..HQDHFossDEaDE.ssLY-AIssacpphVIuHEuDPuWRsAVLuspPsLLuLRHVhDDGsDEYKlIMLs+RaLoF+VIKlN+ECVRGlWAGQpQELlFLRNRNPERGSIQNs+QsLRNlINSSCDpPlGYPIYVS.......PLTTSas-opsQL ................................................................h.....t...s.ssLlsLshuLslluR.R.u.L.Gs.Auap.....hu........ss.l-sFLaG.LHsLFKGDF....R....Ios.+DEWlF.u.Dh-LLppVVsPulRM.oLKL..HQD.pFsss...-..E.a..-..-s..tsLa-AI........psaEp.....p.....hlIuHEuDP..sWRpAVLusp.PpLLuLRH.l......h.D.-Gs..s..EYKlIMLp+p.aLoF+VIK.......lN+ECVRGLWAGQQQELlFLRNRNPE.RGSIQNsKQsLRNhINSSCDQPl.GY.PIYVS.......PLTTSa.sop.Q............................ 1 53 65 112 +3700 PF03211 Pectate_lyase Pectate lyase Mifsud W anon Pfam-B_2273 (release 6.5) Family \N 25.00 25.00 27.90 27.20 19.20 20.30 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.25 0.70 -4.94 12 678 2012-10-02 14:50:22 2003-04-07 12:59:11 8 10 217 9 311 638 4 205.80 33 67.04 CHANGED ushussssss...tsuuspshpsoIpVttGpsaDG+stpasus.pshussspu-pQcPlFhLEsGAoL+NVllGtstuDGlHCpG....sCsIpNVhapDVsEDAlTlKupu......sspIssuuAhpAsDKVlQhNussolslps...FhAsDaGKLhRosGssphpt.....sslphsssssssshtuls+s.uDssplpssshph.....ssVsptacGsp....usspssc .....................................sss.........P.....susss..hsl..sps.hhVtsGpsa..D..G..thppastu.....hs..h..u..ss..sp.s.tppcslFhLcsGATLKNlIl..G.....ps...tt-G.lHCcu....sCsl-NVaaccVsE..DAlol..Kupuss...................shplhGGuApsAsDKVhQhNGt.G..s............lpIcs...F.hsp.ca.GK.....lhRSCGsCps....p..........p...............s.........sl.s.s.p..s.h..s.s.ss.....sl.hulNpNhuDpsolpsh.s.lph........tth.p.ptapt.t...........stt....................................................... 0 88 194 287 +3701 PF04191 PEMT Phospholipid methyltransferase Wood V, Finn RD anon Pfam-B_14367 (release 7.3); Family The S. cerevisiae phospholipid methyltransferase (EC:2.1.1.16) has a broad substrate specificity of unsaturated phospholipids [1]. 21.70 20.30 21.70 20.30 21.60 20.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.21 0.72 -3.93 105 2350 2012-10-01 22:51:20 2003-04-07 12:59:11 8 12 1459 0 1059 3719 1768 101.80 20 45.04 CHANGED hphhhGhhLhshGhhlshsuhhsLGhtGsahGDaFh..hhp..t.......hlpsusaphhsNPhYhuushshhGhuLh.tssshs.lllshlshlhhhhhlt.hEcPahtclYuppt ...................................................................hh...hhuh.hl.h.hh...u.h...hh..h....h.......u...h.h...t..h...t...h.t...t...s..h..h.s.....s..............ttsp.p.............................LlssGsYp..h.sRpPhY.hu.h.h.l.hh.h.......G...h...s..l........h.....hs....s...h...h.........s.....llh...s.....h......h..h......h...h.h..h.h.hh.hhhh.EE.thl.t.phaut................................................................. 1 349 655 903 +3702 PF03965 Penicillinase_R Pencillinase_R; Penicillinase repressor Finn RD, Bateman A anon DOMO_DM03102 & Pfam-B_5099 (release 14.0) Family The penicillinase repressor negatively regulates expression of the penicillinase gene. The N-terminal region of this protein is involved in operator recognition, while the C-terminal is responsible for dimerisation of the protein [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.36 0.71 -3.99 47 2958 2012-10-04 14:01:12 2003-04-07 12:59:11 11 9 1713 20 713 2659 173 112.30 25 82.47 CHANGED lustEhcVMcllWpput.sosp-lhpt.Lspt.hth..uhoTVtTllsRLhcKGhlspc+p.G+tahYpshlscpphhpttscpllschhts.shsshlsphlcpp..tlotc-lppLpphlpp ......................lopsEhclMc.llW......p......p.....s......p.....h.....osp.-lhpt..L.....p...........c............p....h....p....h..sh..sTltTlls.R.L.h.c.Ksh.lp.....p.....c......+......p.....G....+.....t.....a..h..Yp..s..hls.c.c....ch.h.pt.ts.ps..hlsch.h.s.s...sh...ts.h.ls.phlppp...plo.tc-...lccLcphlp............................................................... 0 301 536 647 +3703 PF00805 Pentapeptide Pentapeptide repeats (8 copies) Bateman A anon Bateman A Repeat These repeats are found in many cyanobacterial proteins. The repeats were first identified in hglK [1]. The function of these repeats is unknown. The structure of this repeat has been predicted to be a beta-helix [2]. The repeat can be approximately described as A(D/N)LXX, where X can be any amino acid. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.50 0.72 -4.66 95 11082 2012-10-03 04:02:01 2003-04-07 12:59:11 17 346 1513 31 3972 17635 5755 38.50 33 30.01 CHANGED usLpsAsLpsusLpsusLptAsLssAslpsAsLpsuslps .........................pLttAsLp....s.....AsL.....p......s.....A....sL.....p.s....A.s.L......p.s.A.....s...L.....p......s.A.sLpsupl....................... 0 1098 2769 3557 +3704 PF00354 Pentaxin pentaxin; Pentaxin family Finn RD anon Prosite Domain Pentaxins are also known as pentraxins. 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.30 0.71 -4.88 9 1094 2012-10-02 19:29:29 2003-04-07 12:59:11 12 93 109 168 564 1160 95 183.20 29 30.79 CHANGED KsFVFP+ES-TsYVpLhs.LcKP.LpsFTlCh+hYo-LS..RuaSlFSYuTpp..pDNElLlahc+sspYShhlGss...clhh+s.EphsuPsHlCsSWESuSGIsEFWVsGK..PhV+KuL+KGYTVtspsSIlLGQEQDSaGGsF-toQShVGEIuDlpMWDhVLoPEpIpolYtG.sshs....sNILsWRuLsYElpG.Vhl+P ...................................................................FP...t.st...sas..ltsp.hths...L.puFTlC..hh..h.t.s..shs.......p.h..o..h...F..SY.us...s......p.s...N..-l....l.l............h...t......p.......t......t............s...s...h........p.........l......h...l...ssp.....................t.s..h..h...............h................t........s.....s...p.W...pH.l.C...soW...s.....o...p....s.Gh.hp.....hahD.Gp....................h.s.s...t..p.s..............l.....t..t..u..a....s..l....t...s.......s.G..s...l...l.LG......Q....-Q..D........s..............h....G.........G........s.............F.......-..s............s.....Q.......u...F...lG...-.luplshWDpVLo.sp.pl.....t.s.l..h..ps........p.h.p........GN..ll.sWtshphp.h.s............................................................ 1 154 195 378 +3705 PF02896 PEP-utilizers_C PEP-utilising enzyme, TIM barrel domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.60 19.60 19.60 19.60 19.50 19.40 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.91 0.70 -5.54 17 9485 2012-10-10 15:06:27 2003-04-07 12:59:11 13 33 4444 34 2008 6852 3404 301.30 34 43.25 CHANGED +tt.uphtsh.uhssDGp+lclhANlupsp-stsAhssGAEGlGLhRTEalahsp.sp.hPsEcE....................QhpsaculhcAhsG+PVslRTLDlGuD..KtLPa.h.............................chspEhNPhLGaRulRlslsp..s-lhcsQlRAlhRAus...hu........plpIMhPMluohpElcp.s+pllcch+tplctphhth.s.shplGhMlElPuAAhhA-phA+c.sDFFSIGTNDLTQYThAhDRtsst....lual......asPhpPulL+hlppllctAcpcG......hhlGhCGEhuG.DPpul.lLlGlGLDphShSshul.ts+thhtphph ...........................................................................................................t..ht..shs...c.s...p...pl.clh.sNluss.p..-.s...t.s.shp..G....A-GlGLhRTEalahsp....s.....p.....hP.s...cE.............................................................................Qhpsa.p.p.lh.p...u.h......t..s..+....s...V.ll..Rs.h....Dh.su.c......+...Lsh..h................................................................................................t...s.cE..t.NP..hL..GaR..u.lR....ls..l..sp.....s-laphQ.l.RAll.RAus.....ts..................slcIM...lPhlsolcEh.......c..........t...s+.p...l.......l.c..........c.t........t....t.p.l.........t...p...p..............h.....h...p...t.....s.....l...c.lGhMlElPu.sAhhA-p.h..u........c.......c.......s.......D.........F...FS...IGTNDL...T...QashAhDRssst..........lutl.................asshs.P....ull.chlppllcs...A..c....p.pG........................thl.GlC.GEhuG.D.s.p.ss...hLl..s.h.....Gl-plShss.s..s.l.sts+hhltph.h.................................................................................................................... 0 656 1260 1670 +3706 PF01327 Pep_deformylase Polypeptide deformylase Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain \N 22.00 22.00 22.00 22.60 21.30 21.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.89 0.71 -4.83 130 7120 2009-01-15 18:05:59 2003-04-07 12:59:11 16 13 4525 176 1596 4663 3258 154.60 34 87.31 CHANGED hhpIlphs-s....hL+phupsVp...phs...sp.lppllccM..h-...TMhss.s............GlG.LAAPQlGh..shRlhl....lchsppptt.....................llINPcl.................................................hsp......pthtttEGCLSlP.....s....hhspVpRspplplchhD.hpGpphph.c...hpG.......h.hAcslQHEhDHLsGhLalD+.....lsphcpt .........................h.hpIlphscs....hL+ph.ucsVs...ths...........sp.lppLlcDM......h-TM..hss..p......................................GlG..LAAsQlGl..spRlll.......lc....ls..p..ppst.......................................lhlNPcI...............................................................t.p.p.......t..p.....s..t.tEGCLSlP..........s..hhu.V.t.Rhpclplch...hD...t.....p............G....pt...hpl..c...scG.......h..hAhslQHEhDHLsGhLFhD+ls..p.t................ 0 529 1053 1351 +3707 PF01562 Pep_M12B_propep Reprolysin family propeptide Bateman A anon Pfam-B_117 (release 4.0) Family This region is the propeptide for members of peptidase family M12B. The propeptide contains a sequence motif similar to the "cysteine switch" of the matrixins. This motif is found at the C terminus of the alignment but is not well aligned. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild --amino -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.60 0.71 -4.36 119 2978 2009-09-11 14:07:58 2003-04-07 12:59:11 14 104 182 0 1366 2653 2 126.10 24 15.43 CHANGED hEllhPp+lp.......................................................hpttt..............ph....splpYplphpGcphhlcLcpscthlussashh...............pYspsGsh.hspps.ht....scCaYpGtlpu.s..s.S..hVul..SsC.uGL+Ghlphp.stsYhIEPl........pts..ssh........pHllY .................................................................................................................................t..................th....tphpap.l.ps...G.c.ph...hLcL.p.h...s....p.p.l...lu.ts.ashp......................hh.s....p.s....G...pt...hspt.s.................pcCaYpG....p..l.ps.ps.....s....S...........hsu..l..SsC...s...G...L............c........G.hhp.hp...s...ts.YhIE.Pl...........pts......sp.................Hhla............................... 0 218 304 662 +3708 PF03413 PepSY Pep_M4_propep; Peptidase propeptide and YPEB domain Bateman A, Yeats C, Rawlings N anon Yeats C Domain This region is likely to have an protease inhibitory function (personal obs:C Yeats). This model is likely to miss some members of this family as the separation from signal to noise is not clear. The name is derived from Peptidase & Bacillus subtilis YPEB. 20.90 15.00 20.90 15.00 20.80 14.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.15 0.72 -3.33 348 4684 2012-10-01 23:09:26 2003-04-07 12:59:11 14 98 1721 4 837 3397 168 65.70 19 24.77 CHANGED tlo..hppA.hphA.........tpp...............h.......s....st.hhp....hphttt...................................psphsYc.lp.hp............sstt.........hph...lDAtoGcllppp .....................................................................................lo.cpA.hphA.................hpp.................................h.............s......sp..hhp........hp.tpp............................................................psphsYp..lp..lp...................sstp............hchh....lDAp.oGcllp........................ 2 290 543 701 +3709 PF00311 PEPcase Phosphoenolpyruvate carboxylase Finn RD, Coggill P anon Prosite Family \N 20.60 20.60 20.80 20.60 20.20 20.50 hmmbuild -o /dev/null HMM SEED 794 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.50 0.70 -13.20 0.70 -6.70 65 4128 2012-10-10 15:06:27 2003-04-07 12:59:11 12 6 3076 5 773 3411 2728 540.50 36 85.56 CHANGED pLslcLVhTAHPTElsR+ollpKp++IuphLppL-p.phsspcc.................pplcppLpccIphhW+TDElRph+PTVlDElchuLpYFcpsLaculPplhcclppsL....pppasthphPh..........shhpFGSWlGGDRDGNPsVTs-lThcshphQRphslchYlpslcpLhppLShS..hspsss-Lhpplcpcpt..............phschh.ph.t.h..pEPYRltLuhlppRLtsTpcc.pphhssp......................hsssphYpssp-hhp-LpllpcSLppsu.t.lusspLtcLlppVcsFGhpLspLDIRQESs+Hs-slsElscaLsl.....stsYsphsEp-+htaLhpELps+RP.L.....lPs.......ph.....shSccTpEslssh+sltclppcaGscshpoYlISMscssSDlLpVlLL...t+EuGLhpssss.............spLtVVPLFETl-DLcpAPplMppLhslPhYRphlss..............................shQEVMLGYSDSNKDuGhloSsWpla+AQcsLtclAcca....GVpL+LFHGRGGoVGRGGGPu.apAILAQPsuol.sGpI+lTEQGEVlusKYuhP-lAhtsLEhhssAllpAS..Llsss.sp.......ppWpplM-cLuspSpppYRsLVaEpP-FlsaFppsTPlpElupLsIGSRPA+R....+s..stslpuLRAIPWVFuWTQoRhhLPuWaGlGoALpphh...tpssp.....plphLcpMYpcWPFF+shISplEMsLAKsDLpIAppYsppLs.ssc.tct...lappIppEaphTpchlLplTsppcLLsssPs............LpcSlpLRNsYlsPLsaLQVpLL+RhRptspp.............................pphscs-hLcsAL.LTINGIAAGMRNTG ............................................................................................................hhTAHPTp....sh.........h.....h.........................................................h.....h...h.o........ps.sEhp.sh.hhppshhpslPphhpchp....p..h.....t..t......h..........................lp..hstWhGGDRDGN.P.VTsphht.sh.htt.hh...h...h..h...hs................................................................................................................................................................................................................................th...l..h.t....l.........................h..hh...h..FGh.h..hDhRp...puthattsls.t.l.h....t........h.....th................tpY.tt..hsEtp+.t.h.Lhp...p.L..t.t...R.....L........s.................h..............s.....t........h..t.c..Lthhphh...tth.......tshtthlIShspssSDl.Lt.l......lL.......+.Esth.................................................h.lsPLFEpltDLp.tu.thhtphhshshhpth.l.ts.......................................................hQ..lMlGYSDSsKDuGhhuusWt....YpApptLh....p....h....s....c....p....h.................u...lclphFHGRGGo......lGRGGu...P.s...a....A.l....huQP.......to..l...p.u.tlRl....T.....QGEh.It.p..au....p....hshpsLphhsu.A.h.Lpts...hh........t...sp..................................................pap.t.hM-phu.h.u.h..p.YRsh...Vh.......p.p........p..F...h....YFh.....................tuTP..Ehup.h.s.lGSRPupR.....ps......ts..s....lpsL.RAIPWlFuWoQsRhhlP..sWhGh..Gsuhpth.........p.t..........................p.t.LptM...........hppWPFFpshls.l-MVhuKss..lAth.Ysphh...st.p.h........hhptlhtp.hp.shphlLt..lt.sp.p.p.lhtt.sh..............l.t.pplphR..Yh..ssLshhQ.h.h...lt+.Rt.t................................................thl.hohtGlAs........................................................................................................................................................................................ 0 201 483 654 +3710 PF00821 PEPCK Phosphoenolpyruvate carboxykinase Bateman A anon Pfam-B_1309 (release 2.1) Family Catalyses the formation of phosphoenolpyruvate by decarboxylation of oxaloacetate. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 587 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.87 0.70 -6.35 56 2517 2012-10-02 15:24:17 2003-04-07 12:59:11 13 4 1840 45 434 2002 389 325.30 56 96.18 CHANGED palp-sscLspP-plalCDGS-cEhcpltpphlcsGphhhLt.p+a.NsaLsRocPpDVARVEs+TFIsopscc-ssPssN............WhsPp-hcppl.pplasGsM+GRTMYVlPFSMGPluSPho+l..GVplTDSsYVVhSM+IMTRhGpplL.....ctL.ssctpFV+slHSVGtPh..s....ptsss.WPCNs.ccphIsHhP--+......pIhSaGSGYGGNuLLGKKCaALRIASshA+...-EG.WLAEHMLIlGlTsPp..GccpYlAAAFPSACGKTNLAMltPsl....sGWKlcsVGDDIAWh+hs.p-GpL.....hAlNPEsGFFGVAPGTu.cTNPsAMpol..tp.NoIFTNVAhTsDG.cVaWEGhspp..Ps...........plhsWpG.......cs.Wp.s....pssc.PA...AHPNSRFTsPhsQCPhlc.PcW-sPcGVPIsAIlFGGRRssslPLVhEAhsWpHGVa.lGAohpSEsTAA.Apupsuhlc+DPhAMLPFhGYNhGcYhpHWLshupc.........p+hP+IFpVNWFR+sp.sG+FLWPGFGENtRV.LcWlhcRlcG....c.ssAhcTPIGhlPphssLslpGL.thspt....s...hcclholststWhpEl.cplcc.aapph.GscLPpplhpcLctlcpRlpph ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sap.hpslGDDI.sWh+...s..tp.....G..pL.....hAINPE.sGFFGVA..PGTshpTNPsA........MpTl........hpN....T.......lFTNVAt.T.s.....DG.sV.a.WEGh-c-h...ss............plTDWhG.ps.Ws..................cstp...PAAHPNSRFss..PApQCPhID.Pt.W.EsPpGVPI..sAIlFGGRRs..p....sVPLVhEAhsWpHGVa.lGAsht......S..........Es....T.AA...A.............t...tp....t..............lh+DPhAMhPFhGYphupYh.................................................................................................................................................................................................................................................... 0 144 249 359 +3711 PF01293 PEPCK_ATP Phosphoenolpyruvate carboxykinase Finn RD, Bateman A anon Prosite Family \N 25.50 25.50 25.80 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.40 0.70 -5.97 128 2905 2012-10-02 15:24:17 2003-04-07 12:59:11 15 5 2608 26 690 2097 1598 442.70 48 86.52 CHANGED sssplhh.NlssspLhEculp........cs.....EGhlsssGALslpTGpaTGRSP+DKFIVc-ssoccplaW......GslNpPhst-pFctLhp+shsaLss+.claVhDsauGADsca.RlslRVlsEhAWpuLFs..+NhFIRP....sp-ELp.sF..cP-aTllsuPsFpAcPtpc.GspS-shlhlNasc+hhLIuGTpYAGEMKKulFol.hNYlLPt.+slLsMHCSANlGp.c..G.Ds......AlFFGLSGTGKTTLSA......DPpR.pLIGDDEHGWs.-sGVFNhEGGCYAKsIsLSpEsEP-IasAI+..FGulLENVVl.ctps+plDasDsSlTENTRsuYPlcaIsNthhss.huGcPcsIIFLTsDAFGVLPPlS+LTs-.QAMYHFLSGYTAKVAGTEtGl.oE.PpsTFSsCFGAPFhPh+PshYAchLsc+l.......pc.asspsaLVNTGWoGGs...YGs..GpRhslchTRAllsAhLsGsL....sssc..acppshFslplPpsls.....GVsspl ..................................................................................................................................ts..plhaN.Shs.Lapctlp.......s................cGhlT.ph...GAlsspT.G.haTGRSPKDKalVc-..s....soc-....plaW.........t..ssNpPhstEsa.ppLhthshc.LssK..claVhDuas..Gu.s.cpR.lplR.h.ls.EhA.Wp..u.hF.s.....+.N.M.F....IR.P............o.p...-.E.......Lt......s.F..........c.......P.D.Fslh..N...u.sp....h........p..s...s.....h..p-...G.h...s........S..E.....shl.hhNhs....c+....h.lIuG.T....Y..u..GE.MKKGhFSh.MNYlLPh..cslhSMHCSAN....s....Gc....c......G...DV...........Al..FFGLSG..TGKT.T.LSs......................DPc..R.....pL.IGDDEHG.Ws....Ds..GV..F..NhE.G.G....CYAKsIs..LS.p.E..t..E.P.-..Ia..s.AI+..hsu.lLE.NV.......sl..c.pst..plDa.sD.s.Sh.T.............E..NTRsuYPI.aIs.....Nhs.....h.....Ps....tuu....Hs.p.p..........lIF..LTADA..FGVLPPlS+L.o.s-.QshYHFLSGaT..A..K............lAGTE.+.......Gl....TE..P.ps.......TFSsCFG....AsFls.LHPopYAchLsc+.h.......pt..t.sspsYLVNT.GWsGs.....................GKRhsl+.TRullsAIlsGsl.......p.pu-......htphs..hFsLtlPsplsGVssp....................................................... 0 226 436 591 +3712 PF01195 Pept_tRNA_hydro Peptidyl-tRNA hydrolase Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 21.70 20.70 19.70 20.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.20 0.71 -4.80 116 4825 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 4548 28 1217 3288 2684 180.00 38 92.48 CHANGED LlVGLGNPGtcYppTRHNlGFhslDpLsc......chs......hphppppthpuhhsp.h...ht..............sp...cllLlKPp.TaMNhSGpuVttlhpaYc.....lps.ccllVlaD-lDLshGpl+l+tsGusG.GHNGl+SIhppLGo.pcFhRlRlGIG...+...P.....ttt...slssaVLupFstpEtptlpp.slppuscslpthlpp...s....hstshsph.s ...............................LIVGLGNP...GscYttTRHNs........GF...hhlDt.LAc...chs.........hshpp.p......p.........Fpu..h.huc...ht...ls.....................................Gc..clhLl+Pp.T....aMNhSG.cu..V.t...s.l......h....s....a..Y.c...................lss....-....-.......l..lV.l..aD-LDLssGplRl............+.t.......sG...us.........G....GHNG..............l.K.S.....I.hs.p.........L................G..............s.....p............s...............F.....h.........Rl..RlGIG....+.Ps....spt...................pVssaVLucF...s.p.p....E..pt.h.lsp...sl-c.uscsl.phhlpp..s..hppshschp............................................................ 0 407 789 1024 +3713 PF03564 DUF1759 Peptidase_A16; Peptidase_A16_N; Protein of unknown function (DUF1759) Griffiths-Jones SR anon MEROPS Family This is a family of proteins of unknown function. Most of the members are gag-polyproteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.76 0.71 -4.39 8 975 2012-10-02 13:37:57 2003-04-07 12:59:11 10 76 48 0 921 926 2 119.20 20 16.10 CHANGED FuG-hpEWpsFh-lFpShlHsppcLucl.KFpYL+ShLpG-AAsllpHhsl.TusNYpsAh-tLpcRY-NscpIhpsLhcclhcl.s.sspcssptL+plh-ssscllRtLcplGcps-..DshlspllLpKlspcs+++hlppu+cpcs ............................................FsG.p..h..p.a.tFhp.Fp.shlc.p.tp.ls.s.hp.Khth.L......h..phL....p.G..p.Atphl.p...s.h....h.....ss..t...s..Ypts...hp..hL.pppas.p...p.h...lh.pthhppl.p.h....................ts....tthpt...h...htthpth..h...t..l..pt..h..t.....t......t..........h.........h..ph................tt................................................... 0 563 601 892 +3714 PF03566 Peptidase_A21 Peptidase family A21 Griffiths-Jones SR anon MEROPS Family \N 25.00 25.00 46.00 25.00 19.00 18.60 hmmbuild -o /dev/null HMM SEED 648 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -13.09 0.70 -6.43 5 29 2012-10-04 01:49:40 2003-04-07 12:59:11 8 1 9 20 0 36 0 607.00 68 93.74 CHANGED uDANlAupp..sR+RoRslR..............................................pNP..............ltApsVsVuss+RpRR+RRsG++hss.ssspsAstcluQsLsusolossushooM.PslRshAttclDlussSlGWaaKYLDPAGAsESu+AlGEYSKIPDGLl+aSVDAEhREIastECPsVo-solPLDGupWoLSIISaPhFRTsalAlANlsNc..ElSL-slN-lIpsLNN.luDWRD.lsosQWspFopsssaYhpIhVLpPTYAhhDVPDPT-.GlstoVoDYRLTYKGITsEuNsPTLVDQGaWVGAQaslsPsoEsQpslsc..uGosslusoshhpssuusshthsWA..sLPsGGoAPssssuhT.soSGpFhshchsG.s..uuVhoaTlPpGathEs........ssFAssGDTlTFsh....suGsslslTp......TAPTsTlTlhuohTuossl..sRslss-oG.....hss.l-ssulNRp...plslPPhThuQsssNsPKhEQFLlKETtGsYlVHpKMpNPVFpMTPASSFGuVpFssPGa-hssstsulGGIRDThDsNaSoAVsHFpSLSpSCoIVsKTYpGWEGVTNsNSPaGQFAHoGs.K-DElLsLAccLsscLTGVYPAsDNFAGAVSAhAAshLuplsKSpATuSlIKuVApsAsGslpuuhApLsGllpSl.G+luAR....l+ARRARRRAuRts ...........MDSNSAS....GKRRSRNVR...............................................................IAANTV..NVAPKQRQARGRRAtSRANNIDNVTAAAQELGQSLDANVITFPTNVATM.PEFRSWARGKLDIDQDSIGWYFKYLDPAGATESARAVGEYSKIPDGLVKFSVDAEIREIYNEECPTVSDASIPLDGAQWSLSIISYPMFRTAYFAVANVDNK..EISLDVTNDLIVWLNN.LASWRDVVDSGQWFsFSDDPTWFVRIRVLHPTY...DLPDPTE.GLLRTVSDYRLTYKSITCEANMPTLVDQGFWIGGHYALTPIATTQNAVE....GSGFVHPFNVTRPGIAAGVTLTWA..SMPPGGSAPSGDPAWIPDSTTQF.QWRHGGFDAPTGVITYTIPRGYTMQYFDTTTNEWNGFANPsDVVTFGQT.GGAAGTNATITI......TAPTVTLTILATTTSAANVINFRNLDAET............TAASNRS...EVPLPPLTFGQTAPNNPKIEQTLVKDTLGSYLVHSKMRNPVFQLTPASSFGAISFTNPGFDRNLDLPGFGGIRDSLDVNMSTAVCHFRSLSKSCSIVTKTYQGWEGVTNVNTPFGQFAHSGLLKNDEILCLADDLATRLTGVYGATDNFAAAVSAFAANMLTSVLKSEATTSVIKELGNQATGLANQGLARLPGLLASIPGKIAAR....VRARRDRRRAARMN............. 0 0 0 0 +3715 PF02160 Peptidase_A3 Cauliflower mosaic virus peptidase (A3) Mian N, Bateman A anon IPR000588 Family \N 21.30 21.30 21.40 21.50 20.70 21.10 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.39 0.71 -4.72 6 102 2012-10-02 15:32:34 2003-04-07 12:59:11 10 11 42 0 23 125 0 173.90 29 22.75 CHANGED NPNSIYIKGpLhF+GYps.hplchYVDTGASLChAs+alIPEEaWpsuc+PIpl+IANsplIpIsKVspsl.lpluGcpFhIPTlYQQ-oGlDlllGNNFC+LYpPFIQapDpItF+hsp..psV.hpplT+AhhsuspuFLEShKKpSpspp..shNIopsp.....-plshlp.hc....-E+hapth..+hptIEpLLEp.VsSEpP ......................................ht.h.hpshlDTGAslChhpc.hlIPcchW.psppsl.hlphAssphhplshhspplplhIs.s.....chFpIP.plY.....pp...-.....o....s..hDhllGsNFhp.LYpPF.lph.p.-.p.lhFph..st......l.hhttlsps.....hphshpth....hps..hpptpp..spp....shp.....th...................................................................................................................................... 0 1 17 19 +3716 PF01828 Peptidase_A4 Peptidase A4 family Bateman A anon MEROPS Family \N 21.00 21.00 21.00 22.50 20.30 20.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.49 0.70 -5.27 22 234 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 99 7 175 241 1 194.70 32 70.59 CHANGED SoNWAGAVL.....sussYTuVoupFsVPpPosssus.................st.suSAWVGIDGD.ThssA...ILQsGVDFhlp..sGpsoaDAWYEWYP-hAYsFsslslSsGDsItlsVsAoS.osGsAhlENhooGpoVo+Tlou...ousLsthNAEWIVEDFppuso......hVshAsFG.oVsFTGApAss.uGpohGhssATll-lc.Qs..spVLTcsolsusotloVpYl ..................SsNWuG.ulh...........sussaosVousasVPssosss.u.............................s.uSuWVGID..G......s....ssssu...............lLQsGl..shp.............h.....p.........sG..p..ss...........asAW.............a..EWa.......P...s..h..u...h.....s..h...s.....s..hs.lssGDplpsoV.s..s..so...s.o.....s..G..s....ssl.p.N.......ho........sG....ps....sopo..hos.........sssL..sttsAEWIVE....D....h...p....t....s..s.s.......hs..sh.AsFG.sVsFos........spAss......sutp..h.....s...ss....u...p....h..hph.....t..tt..s.s.......................................................................... 1 28 100 140 +3717 PF01829 Peptidase_A6 Peptidase A6 family Bateman A anon MEROPS Family \N 25.00 25.00 324.40 324.20 20.70 19.90 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.33 0.70 -5.43 3 9 2012-10-04 01:49:40 2003-04-07 12:59:11 11 1 7 40 0 24 0 359.20 60 88.70 CHANGED AhspNNLh+LSpPGLuFLKCAFASPDFNTDPGKGIPD+FEGKVLo+KcVLTQS.INFT....sN+DTaILVAPTPGVAaW....sAcAPAssuAloTTssFsAVsFPGFsSLFGToATsRADQVoAFRYASMNsGlYPTSNLMQFAGSIoVWKsPVK..LSosQaPVATTPsT..SQLVHAIsGLEulLAVGs-NYSESFI-GVFSQSVCNEPEFEFaPILEGlQTLPPANVTVAQAGMPFNLsAGAtsVAGaTGlGsMDAIsI+VTAPTGAVNTAlLKTWAClEYRPNPNosLYcFAHDSPAsDElALQpYRKVARSLPVAVtAKpNAoMWERVKSILKSGLshASsVPGPVGlAATGlpGIu-LIuuLuF .......s.hNhsALsRLSpPGLAFLKCAFAPPDFNTDPGKGIPD+FEGKVloRKDVLsQS.IoFs....uspDTaILlAPTPGVAYW....sAsscAushshoTT.sFsuVsYPGFTShFGTTATS...RSDQVSSFRYASMNsGlYPTSNLMQFuGSIoVWKsPlK..LSosQaPVuT..sPAT...SsLV.HsLsGL-GVLAV.GsDNaSESFIKGVFSQSsCNEPDFEFsDILEGlQTLPPsNVslusoGQPFsLsAGuEssoGlsGaGNMDTIVI+VSAPTGAVNoAILKsWuCIEYRPNPNAhLYQFuHDSPPhDElALQEYRsVARSLPVAVhAAQNAoMWERVKuIlKSuLusASsIPGPlGlAASGIpGLSsLhpuhuF.... 0 0 0 0 +3718 PF01252 Peptidase_A8 SPASE_II; Signal peptidase (SPase) II Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.30 28.10 23.20 24.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.22 0.71 -4.60 181 4771 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 4358 0 1061 3181 2288 151.40 31 88.98 CHANGED hlsh..hl....lh.lDQhoKhhlhpph.................h..................psh....hlss...h.hslshlp..NpGsAFuhhsst......h.hhhhlslllhhhlhhh.hh+htp.......tthttlu.l....uLllGGAlG.N.llDRl.........hh...........................Gt......VlDFls.h......h.................hta.P..sFNlADsuIslGslll....llth.hh......tt.ppp .....................................................................................hhlslll....ll.lDQhoK..hhlh.ppht.......................hs....................psh..........ll..s..s....h....hslshsp..NpG.....AA.F...S.....hLsst.............ph...hFh.hlslsls.l.h.lh...hh...hh.+p..tp......................pth..htlu..h.....uLl..lGGAl.GN.llDRl...........hp............................................Ga.VVDalph......h...........................hp.a..s.....lFNlADssIs...lGshllllth.hh.......pptt....................... 0 371 723 907 +3719 PF01640 Peptidase_C10 Peptidase C10 family Bateman A anon Pfam-B_1522 (release 4.1) Domain This family represents just the active peptide part of these proteins. Residues 1-120 are not part of the model as they form the pro-peptide, which before cleavage blocks the active site from the substrate. The catalytic residues of histidine and cysteine are brought close together at the active site by the folding of the active peptide. 21.80 21.80 21.80 22.00 21.40 21.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.78 0.71 -11.58 0.71 -4.53 53 317 2012-10-10 12:56:15 2003-04-07 12:59:11 12 11 122 17 47 318 19 176.30 29 29.83 CHANGED WsQssPYNphsP..............stpssTGCVATAhAQlMpYacaPpp......uhGsh.sYpsstsp.........hphsh.....sposYsWssMhsshsp.........shspppt.cAVApLMtclGhAVpMpYsss...uSG.uhstp.shpALpphFsYsts.lphh...+sthssspWtshlhpELss..utPVhYsGsstss.....GHAFVhDGY....................scsuha...HhNWGWuGt.usGYap ..........................................................................WsQttPYN....t....hsP...................ttpshsGCVATAhAQlMta....ap..a....Ptp....t.tt...sht.t..t..........................ttt..asWsp...h...sshp...............sptpt..pslupLhtclG..hu..l..pM..pYs.s.........uSu..u...tt....s.ps.....Lpp........a.sYs.....ts..ht.h....+p.hs....t..pp....a....phl.h.p-Lpp...s..p..P...Vh.hs..Gsstss..................GHAaVhDGa............................................................................s.pps...hh........HhNaGWsG...ss.Gaa................. 0 18 39 47 +3720 PF03415 Peptidase_C11 Clostripain family Bateman A anon MEROPS Family \N 25.80 25.80 26.00 29.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.47 0.70 -5.49 7 367 2012-10-03 02:24:44 2003-04-07 12:59:11 9 25 244 4 90 357 28 328.40 17 58.43 CHANGED VahsuDN..sLptah.pDlpEMhpu.pss.....sllllhDth.....................hppsphh+lsccsph..ttl.sssth-hN.GDsssLcsalphhpspa-ADphhLlhWsHGsuah.csp...h.s+ulsaD-sN......tsplthsElpcsLp.....ts.plDlLuFDACLMGohEVhYplRs.....pADYlVASu...PG.GacY.phls.l..ssssss.....................plut.hV-pYt-.hssshh.........shoLSlaDhScl-tlhpslctlApsh................ss..pp.............shhsh......................................as.hspt.hsphsYpsLh-Lsphhpt.pphssthtth.ts...............................................................h....pshpphllYu..aG-hsst....shppuh...........oIaLPpssp..............shptahssasslt.....a.tchtWsc................l-pW ................................................................................ah.uss......sh..t...t.h...t..sltphh.t..s..htt.........t............plllhhst..........................................................t...thh.....h...p.t.t.t...h.........tth...hp...........p...s....s...s.s...ps....lpphlp....shp....a....PA.......c...p......YsLllhsHG............sGW..........h.....................s+..u.....h..s..hDpp.p......................................tt.t.hs..l..s.-lspulp.................sshph-hlhFDuChMuslEVAYpL+s......hscYllAS.s.sph.......u.......Ghs..Yp..p.hhstl..........s.s..ss..s..ht...........................................p.h..u.pthh.s.Y..t.s..h.s.sh.................................tho..l.olhDhsp.l....c.s.lt....phhcp.l.h..th.....................................................................................................................................................................................................................................................................................................................................................................................................htt.h........................................................................................................................................................................................................................ 0 51 78 88 +3721 PF01088 Peptidase_C12 UCH; Ubiquitin carboxyl-terminal hydrolase, family 1 Finn RD, Bateman A anon Prosite Domain \N 20.80 20.80 21.00 21.00 19.10 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.62 0.70 -5.16 72 1199 2012-10-10 12:56:15 2003-04-07 12:59:11 16 18 337 29 757 1143 15 189.70 30 64.44 CHANGED sWhslESsP..sVhsphlppLGV..pshphp-lauL.Ds.....Lthl.pPlhullhLF.h.........hptppsttht...........tpt.sptlaFh+Q..sIsNACGThAllpslhN..sts.........lp.GstLspahchstsh.sP.ppRuphLpss....ctlcpsHsuhA.....ppu...........pspsst......pcs......saHFluaVs.....hsGpLYELD..Gh+p.tPls+Gtss.......scsalpcst...llp.chhp+h ................................ah.lEusP..tlhs.....ph....l.pphGl...pshphp...-lauL..-t..............p..lt.....l.......................pPlhul.....lhLF.ht.............................tt.........tt..t..p....tt....................h.......p..t..spslaFh+.Q..sIsNACuT......hA..............llpslhN........spt.......................l..p...G.s.tLpph.h..c......s..t..s..h....s.s.t.+uhhlp.ss...........p.tl...cp....sHsshA...ptt...............................................ppps...........cpcs...........saHFlual..............hsGpLaEL...............D..Gh......+....tP..ls.h..Gs.hs........pcshl.ptst....lhp.phh................................................................ 0 230 382 594 +3722 PF01470 Peptidase_C15 Pyroglutamyl peptidase Bateman A anon [1] Domain \N 22.70 22.70 22.70 22.70 22.20 22.60 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.15 0.71 -4.48 9 2041 2012-10-01 19:48:29 2003-04-07 12:59:11 12 13 1648 60 470 1259 104 185.10 39 88.92 CHANGED hKlLlTGF-PFGs-slNPoh-ss+pLst.ppIusAplhuchlPssFpcut-sLpcslpch+PDlVIslG.AsGRotITsERVAlNlsDA.R..IPDN-GpQPlDcsIpsDGPsAYFoTLPlKAMspsl+csGlPAslSpoAGTaVCNalhYtshah.sppu.sl+uGFIHlPalP-QVlsK.tp.....PSMsL-s.ltGlpsAIcsuhct. ...................................................clLlT.G.Fs.PF.u.s.............c........p....l.....NPu........h....E..s....l...c...t.L.............s...................p...............p.......I...............s.............s........s.............p.........l.......h.....t..h.....pl......P............s..s.......F......t.c.u....h.........p.......h...l....p...p....t...l....p.......p.......h.......p.......P....D...........h...V...l...slGQA.G.G.Rs.s...l.TsE....R....VA...I....N....h..s..DA....R.......Is.D......Np......G...p.Q.P.l.Dp..s..I...h......-..G........s...s....A........Y..F........o.sLP........l..KA....hl...p..........sl.....p..............p..........p..........G....l..P.......u...........s..........l.............S..poAGT..aVCNalhYthh..a...h....h......p...p.........p.....h.........t....h.+.....uG....FlHlP.....a.hs.-.Q.s.h.s.+.s.s.t..........suMslpplhtul.phultsh...p.................................................................................... 0 141 256 365 +3723 PF01831 Peptidase_C16 Peptidase C16 family Bateman A anon MEROPS Family \N 20.20 20.20 20.40 20.20 16.80 19.10 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.58 0.70 -4.85 2 113 2012-10-10 12:56:15 2003-04-07 12:59:11 12 9 56 0 0 122 0 244.20 70 4.43 CHANGED AFDAlhSEsLSAFYAVPSDETHFKVCGFYSPAIERTNCWLRSTLIVMQSLPLEFKDLtMQKLWLSYKAGYDQCFVDKLVKSsPKSIILPQGGYVADFAYFFLSQCSFKsaANWRCLcCsMELKLQGLDAhFFYGDVVSHMCKCGNSMTLLSADIPYThcFGVRDDKFCAFYTPRKVFRAACAVDVNDCHSMAVV-GKQIDGKVVTKF.GDKFDFMVGaGMTFSMSPFElAQLYGSCITPNVCFVKGDVI ..................s...................-.pahKVhGLYsPphTRsNCWLRSVLhVMQKLPhpFKDhslQcLWl.YKttYsQhF.VDpLVspIPtsIVlPQGGYVADFAYWFLo.C...DapshA.W+ClKCDhu.LKLpGLDAMFFYGDVVSHVCKCGpSMsLIssDlPaThHFuL+DchFCAFhT.RpVa+AACsVDVNDsHSMAVVDGKQIDs+hVTphouDKFDFIlGHGMSFSMooFEIAQLYGSCITPNVCFVKGDII.............................................. 0 0 0 0 +3724 PF00648 Peptidase_C2 Cys_protease_2; Calpain family cysteine protease Bateman A anon Prosite Family \N 20.10 20.10 20.30 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.94 0.70 -5.42 9 2514 2012-10-10 12:56:15 2003-04-07 12:59:11 16 97 332 30 1434 2331 90 246.30 28 35.47 CHANGED LFpDPpFPssspSLtapphsP.....htlcWKRPsEIsssPpFIlGGAo......RT....DICQGsLGDCWLLAAIAsLTLN-cLLtRVlPtsQSFpEs.YAGIFHFQFWpaG-WV-VVlDDpLPT.+sGc..LlFsHSs-tsEFWSALLEKAYAKlpGsYEALsGGuToEuhEDFTGGlsEaaEL+cAPs...................................NLhc..IIt+hlc+ss.............LhGsShchsss.sshEshphptLV+GHAYSVTGsc-Vph....+Gph.pLlRlRNPWG.pVEWsGsWSDssspWs.lD.--+spLphphp.DGEFWMSFcDFlpaFoRLEICNLT .....................................................................................................................................................................................W.hR.....s.....t.........c......h.......h......t......p................s........t........h.....h.......t..s.h.p...................t...........s..l..t..QG....l........G.c.....C.a....hl....u....u.l....u....s....l.............s....................p.................p.........h.................h......t..p.......l......h....................t....p................p..........h.............p......p..............p..................h.............G..h.......ap.......h...p.............h.....a.....p............G..p..W...............h.p.Vl.lD.D.h.L...P.............s......h............p.......s....p....................l......h...a.........s........p.........s..........t.p.....t......s......-....h........W.ss.L.lEK...AYA.K......l............t........G...........s..Y...........p......s.l..............p........u.G....p....s.........t.......c.....u...h........c.h......TG..s.....h..s....p.....h...t......h....p..p...s..................................................................................................................................................p.h.h.p......hh..t...p....h....h...p.ptp...............................................h.h.ss....s.h.....t........................t.............t...t....h...t...t...........u...L.......h..t...tH..AY..ul.h...s.h.p.p.......h.p.h...............................................p..Llcl.....+N.....PW...............G......p..................h.......c.....W...p....G........W......u.........-..t...........s...............p....W..........p.........................t.........p....t.......t......h.....................p...................t..................s.G.......F..W.....hs..hp-h.h.p.apth....................................................................................................................................................... 0 477 681 1105 +3725 PF01478 Peptidase_A24 Peptidase_C20; Type IV leader peptidase family Bateman A, Yeats C anon Yeats C Family Peptidase A24, or the prepilin peptidase as it is also known, processes the N-terminus of the prepilins [1]. The processing is essential for the correct formation of the pseudopili of type IV bacterial protein secretion. The enzyme is found across eubacteria and archaea [2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.44 0.72 -3.88 103 5022 2012-10-02 13:41:03 2003-04-07 12:59:11 13 9 3530 2 1191 3816 981 108.60 23 49.30 CHANGED llhhshLlhh..uhhDhcpthlPs.......tlsls.....hlhhullht.............h.h.shhshhttlhuu.........hssalhhhh.....................................................................................................hh.....................tht....uhGtGDhKLhuslGshhu.......httl...................hhhlllus.lhGslhsl ......................................................................................................h.hhhshllhh..uhhDhcp........th...l.P.s........tl.s...hs........hh.h.h.ullhs............................................h.h....hhs...h..h.s.s.lh.Gu..................hss..ah...hh..hh........................................................................................................................hh....................t.hp.........uhGhGDlKL...huul.....G...s...a.h.G...............hp.tl........................h.h.l..l.h.h.uu...lhuhlhh.h............................................................... 1 403 759 1005 +3726 PF03510 Peptidase_C24 Endoptase_C24; 2C endopeptidase (C24) cysteine protease family Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 25.20 26.00 24.70 18.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.36 0.72 -3.86 5 118 2012-10-02 13:45:52 2003-04-07 12:59:11 9 6 60 0 0 131 0 103.30 42 5.21 CHANGED GYuVHIGNGlYISlTHVAsussclhus-hKss+osGEhChl+ustIp.ousslGoGsPlpDspssPluTshpc+oasTTos+IsGhpsssso..pT+pGDCGLPYlD .....GaslHIGsGlYlSssHVA+uus.h.spchhshp.ssu.-hChl+ustIt..SsAtl.u.pGpPVp.DshtuPlu.................TthtcKsaoToohKIsGhthsssT..pTppGDCGLPYhD. 0 0 0 0 +3727 PF01364 Peptidase_C25 Peptidase family C25 Bateman A, Griffiths-Jones SR anon Pfam-B_516 (release 3.0) Domain \N 20.00 20.00 20.00 20.10 19.20 19.90 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.57 0.70 -5.49 77 266 2012-10-03 02:24:44 2003-04-07 12:59:11 13 33 169 1 119 314 445 362.10 21 31.09 CHANGED hlII....s..ssph.................hstsppLssa+psp......GhsstlVshpplhspas.............sG......sssu.........................IRsalchhasph.s..t...h.ph....llLhGDs................................tt...lPshps...................................................................................................................s.hsoDsaauhl-s...............................................scths-..lslGRlssposppspphlsKllsY-ps...t.....t...........W+pph..hhluss...................................................................................................hp..thtsthtthhst.h.sp+lahs..sh.ppss............................h..............lhpsls.p..GshlhsY..h.G......HGutssh........sstt.............l......shsshpshsNts....phPlhlshoCthupa.sss................ohuEthl......hsssuGAluhlusoc........hsh...................................sss.....spthspthhctlhts.t............p..............slGcshhtu+tphh............................................hsLlGDPulpl .....................................................................................................................................lII....s....stph.............................hstspcLssa+ppp.......Ghpstllsspplhspas..................G....sstA.........................lRpal+hhaspt.s.......t.pY...llLhGDs.........................................thlsshts.............................................................................................................................sthso.Ds.aa...uhlcs...............................................ss.h.-...lslGRhsspos......p..-spshl.sKhlpYpps..........us................Wppph..hhhuss.....t........................t..............................................................................................................................................................................................hs....thspthtt.th.st....h.hp+lhhs..sh.ptss................................................lhpths.p........Gs.hlh.s.Y..h.G.............HGutsth..................uppp...............................l......shsc.hp.s..hsNts.........phPlhlos.sCthu...pFDtst....................ohuEthh...........hsspu...GAlAhhuooc.hsa...................................ssh...............spthspthhctlhsptts................slGcshphuppphhpt...............................................shhp......asLlGDPulph............................................ 0 77 113 119 +3728 PF03785 Peptidase_C25_C Peptidase family C25, C terminal ig-like domain Bateman A, Griffiths-Jones SR anon Pfam-B_516 (release 3.0) Domain \N 20.80 20.80 20.90 21.40 20.50 20.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.39 0.72 -4.17 3 29 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 9 1 7 33 38 79.70 41 6.38 CHANGED PTcMQVTAPAsIstosASasVuCDYNGAIATIScDGcMaGTAVVc.sGsATIsLTcuIT-EoNLTLTVVGYNKlTVIKsIpV ................Ppp.phThPApls.spuShslsss.sGuhssISpsGphaGouVsp.sGsATlNlT.slTs.pushslTlst.NhhsVIKsIps......... 0 7 7 7 +3729 PF03412 Peptidase_C39 Peptidase C39 family Bateman A anon Bateman A Family Lantibiotic and non-lantibiotic bacteriocins are synthesised as precursor peptides containing N-terminal extensions (leader peptides) which are cleaved off during maturation. Most non-lantibiotics and also some lantibiotics have leader peptides of the so-called double-glycine type. These leader peptides share consensus sequences and also a common processing site with two conserved glycine residues in positions -1 and -2. The double- glycine-type leader peptides are unrelated to the N-terminal signal sequences which direct proteins across the cytoplasmic membrane via the sec pathway. Their processing sites are also different from typical signal peptidase cleavage sites, suggesting that a different processing enzyme is involved. Peptide bacteriocins are exported across the cytoplasmic membrane by a dedicated ATP-binding cassette (ABC) transporter. The ABC transporter is the maturation protease and its proteolytic domain resides in the N-terminal part of the protein [1]. This peptidase domain is found in a wide range of ABC transporters, however the presumed catalytic cysteine and histidine are not conserved in all members of this family. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.43 0.71 -4.34 36 3484 2012-10-10 12:56:15 2003-04-07 12:59:11 10 38 1880 2 701 2968 283 127.90 24 22.09 CHANGED +h.hlhQscppDCGlAsLuMlLpa..aGpphslscLRphttsstcGsohhuLhpsucphGhcspulpschs.hh....ppl.hPhIsah....phsHahVlhph....pcspllIsDP..uhGphplsppcFpppao.GhhLhhsssss ............................................................t...hh.Q.pt.-..CGh....AsL.shlh.pa.......a...G...p.......p....h........s.......h...t.........p.......L.........+...........c..........h.........h..........t........h............s............t..........p....G.....s.o....h.h....s.......l......h......c...s.A.c..p....l...G.h.....p....s+.s...l...p....h.shp....pl...........spl..s..h..Ps.Ilah......................phsH..asVl..hph.......................cps......p.....l....h..l..t..D.P......uhu.h...h.........p...h.s..h.pc.F..t.pta.....s.....Gh.h.lhh..t..s...................................................................... 1 198 417 576 +3730 PF00770 Peptidase_C5 Adenovirus endoprotease Bateman A anon Pfam-B_900 (release 2.1) Domain This family of adenovirus thiol endoproteases specifically cleave Gly-Ala peptides in viral precursor peptides. 20.90 20.90 21.20 21.50 20.50 20.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.46 0.71 -4.74 13 133 2012-10-10 12:56:15 2003-04-07 12:59:11 13 1 103 2 0 101 10 176.50 66 88.43 CHANGED hFLGTFDKRFPGFlucsKhuCAIVNTAuRETGGlHWLAhAWpPpSpThYhFDPFGFSDp+LKQlYpFEYcuLL+RSALuuosDRClTLlKSTQoVQGPsSAACGLFCshFL+AFspaPssPMcpNPsMsLlsGVPNphhpsPps.ssL++NQptLYcFLpp+SsYFRpHpcpIccsTuFs+lp ...........YFLGTFDKRFPGFlu.scKlAC.AIVN..TAGRETGGVHWLAFuWNPRSpTCYhFDPFGFSDcRLKQIYpFEYEuLLRRSALA.o.sDRCloLEKSTQTVQGPpSAACGLFCCMFLHAFV+WPcpPM.DtNPTMsLLTGVPNuMLpSPpV.sTL+RNQEpLYcFLtpHSsYFRsHcspIc+sTAF-+h.......................................... 1 0 0 0 +3731 PF03568 Peptidase_C50 Peptidase family C50 Griffiths-Jones SR anon MEROPS Family \N 25.80 25.80 25.90 26.50 24.70 25.50 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.13 0.70 -5.78 18 368 2012-10-03 02:24:44 2003-04-07 12:59:11 12 14 277 0 272 385 4 306.50 25 21.85 CHANGED lphspssssL.....lloR..lssspsPhh.l+lPlp+hssp.ts.t.............................shcchccIlppsppss........pspps+ccWWpcRhtLDp+hppLlpslEcsalusapslh.s..h...psshpphspph.shLpptL.scpt..........hphsphlLclh.......................hhsthpcLhhhhlclLpapt.t........uhsEtchphhphhlc-slschppppspp......ppaslLVlDpplpphPWEslssLps.s.VoRlPSlphLhphhpppcsphp.th.hssp........................shYlLNPsuDLspTpccFcs..hFpphts.....WcGhhuphPop-phhcsLpssDlFlYhGHGuGtQYlpspslp+hpppusshLhGCSSstlt.pGp.hcstGshhpYlhAGsPhllGsLWDVTD+DI ...........................................................................................h....l.+...h.........hh..h.hsh..t..t........................................................hhtph.tl.p.t..s.............sttt+ttWWt.tR.t.LDp.phptll........tph-.p..hhu..sa.pslh..................t.......tth.ph..ltp...l.tpt.....................................................p..h.hp.h.........................................t.h..h.....hphht...........................................shs..t.p...h.............h.t..htt...h.t.t.................................tt.hhLllDpp..lp.h.hPWEshshhpt.....s...lsR..hP..Sl.t.hlhp..h....h..........p......t........t.................................................................................................................shallN.Ptu-.....LtpTp..p...php......hpp.............Wp.uhh.sp........Pst.p......p.......h.ptL..t..pp.....c...........lhl.YhGHGuGtpal.ps.pt.l...t.+.......h...........p...p......p......u....s......shLhGC....SSutlt.tG...........p..h-sh...............Gh.hpYhhAG.sPhllusLW-VTDpDI.................................................................................. 0 103 169 235 +3732 PF03421 YopJ Peptidase_C55; YopJ Serine/Threonine acetyltransferase Bateman A, Mistry J anon Bateman A Family The Yersinia effector YopJ inhibits the innate immune response by blocking MAP kinase and NFkappaB signaling pathways. YopJ is a serine/threonine acetyltransferase which regulates signalling pathways by blocking phosphorylation [1][2]. Specifically, YopJ has been shown to block phosphorylation of active site residues [3]. It has also been shown that YopJ acetyltransferase is activated by eukaryotic host cell inositol hexakisphosphate [4]. This family was previously incorrectly annotated in Pfam as being a peptidase family. 19.90 19.90 20.30 19.90 18.00 19.20 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.14 0.71 -4.41 17 290 2012-10-10 12:56:15 2003-04-07 12:59:11 11 1 201 0 29 190 4 171.80 37 54.49 CHANGED spsLhpYuptshsplpsspt...s.shsphDlchLshLstshNpRhPsLNL+hacSsp-hhpulpp.......pssstuhRsllp.....hstsshH+sAlDl+.+.sG+solllhEPAsh.s.t......lsthsphtpphppphhspschshlEsphQ+S.s-ClhFuLshAl+uapcp.shhcplHcs.tpts ................................ctLpshhptlpsplhcGphh..phshpchDlchh..PhLVsptNp+aPp.........LNL+hhp..Ss.p-..h...spuIKp..............ht.puspStRhlls.....hss....sulHhsslDh+......h.....h.sG..Ks..SlIlhEPAshsshsss...............hLuhc.sc.....s..sh.-pp..p..L...P..cs.....hhuhlEhDIQRSso-CsIFSLuhAKKhhhEt.tshs+lH-cphp.t................... 0 2 8 16 +3733 PF03290 Peptidase_C57 Pox_I7L_G1L; Vaccinia virus I7 processing peptidase Mifsud W anon Pfam-B_4082 (release 6.5) Family \N 21.00 21.00 21.20 281.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.35 0.70 -5.96 11 72 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 43 0 0 60 7 422.30 72 98.86 CHANGED MDRYTDLVINKIPELGFTNLLsaIY..ShsGLshslDlSKFhTNCNGYVV-+a.DcSsTAGKVSCIPluhLLELVcpthL.......stPsstcp...ELslKctLl......spL+s+Ypshp-lhsLP..TSlPltYFFKPhL+EKVSKAlDFSQMDl+sDDL.S+tGlpsGE.NsKlV+lKIcPD+cAWMSNpSIpsLlu.hua.GoEVsYlGQFshpFLNshslaEKh-hFht+phLualL+DKI+....puppRYVMFGFCYhuHWKClI....aDKccphVsFYDSGGN.PsEFHHYcNFYFYSFSDGFNsNs..cs.SsLsNpNsDlDVLFRFFpssF.ss+hGCINVEVNQLLESECGMFISlFMllCshpPPKGFKulRKlYTaFKFLADKKhTLaKSILF.........Nhschsl-lccl-scul+EYpKME+WTpKuIssLssKIos+sNcIlN M-RYTDLVISKIPELGFTNLLCHIY..SluGLCoNIDVSKFLTNCNGYVVEKY.DKSsTAGKVSCIPIuhhLELVESGaL.......S+.P.NSSD........EL-QKKELs......-ELKsRY+SIhDlFELP..TSIPLAYFFKP+LREKVSKAIDFSQMDLKIDDL.SRKGI+TGE.NsKVVKh..KIEPERGAWMSN+SI+NLVSQFAY.GSEVDYIGQFDMRFLNSl..AIHEKFDAFhNKHILSYILKDKIK....SSooRFVMFGFCYLSHWKCVI....YDK+psLVSFYDSGGNIPoEFHHYsNFYFYSFSDGFNTNc..++.SVLDNoNCDIDVLFRFFEsoF.GAKlGCINVEVNQLLESECGMFISLFMILCTpTPPKSFKSLKKlYTFFKFLADKKMTLFKSILF.........NLpDlSL-lsEoDNsGLKEYKRMEKWTKKSINVICDKlTTKlN+IVs.. 0 0 0 0 +3734 PF00851 Peptidase_C6 Helper component proteinase Bateman A anon Pfam-B_326 (release 3.0) Family This protein is found in genome polyproteins of potyviruses. 25.00 25.00 33.40 31.50 21.30 20.50 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.42 0.70 -5.86 19 1177 2012-10-10 12:56:15 2003-04-07 12:59:11 13 27 145 1 0 1300 0 409.10 47 19.54 CHANGED pFapGaspsFhch+s.pshsHsCssshs.VcpCGclAAllsQulaPstKITCppChpphpshstcEhtphlpsphppthphhpsthspFsHlpphLchlppthpspNhNhpshtEIh+lls.spppuPFs+lpclNchLlKGspsosp-htpAoppLLElsRahKNRT-sIcpGuLpoFRNKlSuKAplNsuLhCDNQLD+NGNFlWGpRuYHAKRFFsNaF-plDPucGYs+YhlRcsPNGpRKLAIGsLIVstsLcphRpphpGcsIpp.sloppClSppsGNaVYPCCCVTh-DGpPhhSElhhPTKpHLVlGNoGDPKYlDLPpspsspMYIAK-GYCYlNIFLAMLVNVsEc-AKDFTKhVRDhllPcLGcWPThhDVATAChhLslFaP-spsAELPRILVDHsspTMHVlDSaGSLoTGYHlLKAsTVsQLIpFAsssL-SEMKaY+VG ..................................................................................................................................................................................................................................................................................................................FW+GaspsahphR..h.s.ps.H..p.C.t.s.s.ls.lcpCG.clAAlhspulhPChKITC.pCspchtphstp-.htphl.pp.phpphtp...p...hts.hs....cFt+....hhphLphhcp..h..h..psts..t..sh..phht-lhchls.phppssapplpclNphLlKG.s.ps.Tsp-hppAoppLLEluR..ahKNRp-slccGsl.psFRNKlSuKA.....plN.sLhCDNQLDpNG.NF....lWGpRt.YHAKRFFuNYFE.lDPscGYppa.hRhpPNGpRKLA.IGpLllPhshpphRcphpGc.lpp.sloppC.lSp.psssaVYsC.CCVTh-DGpshhS-lhsPTKpHLVIGNSGDsKYlDLPps.c.sppMYIAKcGYCYlNIFLAMLlNlsEp-AK-FTK.hVRDhlls.cLGpWPThhDlATsCh.LplaaP-stsAELPRILVDHpsphhHVlDSaGShooGYHlLKAsTVsQLIpFspssL-S-hKaY+VG...... 0 0 0 0 +3735 PF01830 Peptidase_C7 Peptidase C7 family Bateman A anon MEROPS Family \N 20.50 20.50 70.50 70.50 20.20 20.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.77 0.70 -5.22 14 61 2012-10-10 12:56:15 2003-04-07 12:59:11 13 3 5 0 0 57 0 157.40 65 35.17 CHANGED MSCLRKPSQSLVLSESVD.....PTT.....VD..PFVDVRAEEVVPTGCMTLWEYRDSCGDVPGPLSHGDLRRLRTPD.GVCKCQIHFELP....TVLKSGSTGTVPEHPAVVAAFMGRPRRCSLEQRTKELDFRFLQLVHEGLPVRPSYMIARPPRPVRGLCSSRDGSLAQFGQGYCYLSAIVDSARWRVARTTGWCVRVAcYL+LLQWVGR+SFGSFQIEESAVEHVYHVI.......VDTEaQSEQDGALFYQAVSDLAARDP .....................MSCLRcPSQuLVLstS.D.....Pso.....lD..PFsDVtsEEVVPpGChTLWEYRDSCGsVPGPLSHGDLh+L+TPD.GVC+CQIHFELP....TVL....................................................................................................................................................................................... 0 0 0 0 +3736 PF03569 Peptidase_C8 Peptidase family C8 Griffiths-Jones SR anon MEROPS Family \N 21.60 21.60 22.20 228.00 21.20 21.50 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.45 0.70 -5.03 2 18 2012-10-10 12:56:15 2003-04-07 12:59:11 8 4 4 0 0 20 0 161.60 79 15.59 CHANGED hARuIGlSptustELVRsT+VsEsKPHLsPMpEAp.s.cQ.L.stRuh.Vssc.tslEls..clPscEGcCa.h.Fp.s.hT.hIhspKP..p-lluhhp.sssThDSL-Io+psthVHh.sG-sapsY+pIhAhLc.h.h.s.p.hlVGAcpspltDYVtAutpFLhp..pWl+NGL+lAtthhpPt.lhK..h.NsShP+sl.psD..YIp LARAIGLSHsAsuELVRAT+VDEsKPHLVPMEEhKEAPRQQLVPRRSTFV-NHEEEVEVDsLRLPTEEGRCFELLFN.NQlTPAIFDKKPLLRDVL-VFEENVCThDSLEISHSD+CVHIVPGETFRNaKEIKAVL+VILWN-P-.ILVG............................................................... 0 0 0 0 +3737 PF01707 Peptidase_C9 Peptidase family C9 Bateman A anon [1] Family \N 25.00 25.00 115.50 114.60 22.30 20.00 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.34 0.71 -4.72 8 257 2012-10-10 12:56:15 2003-04-07 12:59:11 11 7 47 2 0 311 0 192.80 66 8.58 CHANGED DPWIKsLTNsP+GNFTATlEEWQAEH-uIMpuIpspusssDsFQNKsNVCWAKuLlPVLcTAGIcLTsEQWsslh.sF+-D+AYSPElALN.ICTKaaGlDLDSGLFStPoVPLoYc.......ssHWDNpPGG+MYGaNppVAtpLp+RYPhlptthcoGcQlslspt+lpshsscsNllPlNRRLPHuLVspacppcuuchEphlsK DPWIKTLpNsPKGNFpATIcEWcsEHsuIMAuIss+thohDsF.QNKANVCWAKuLVPlL-TAGI+LsccQWSpIl.AFKEDKAYSPEVALNEICTRhYGVDLDSGLFStPhVSlaYt.......sNHWDNRPGG+MaGFN.EsAshLER+YPFh+Gphsh...s+QlsVsTR+IcDasPssNllPsNRRLPHSLVsEH+ss+GpRhEWLVsK........ 0 0 0 0 +3738 PF00413 Peptidase_M10 matrixin; Matrixin Bateman A, Finn RD anon Prosite Domain The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null --hand HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.42 0.71 -4.50 66 3213 2012-10-03 04:41:15 2003-04-07 12:59:11 19 158 1096 316 1273 3093 388 160.90 27 39.87 CHANGED +W.p+.pp....LTYplhsho..sclspspVcpslc+AFpsW.ucV......oP.LpFpclt.......p...s....ADIhIsFs............pspHGDshP..FDGsuG.........sLAHAahPs..........hsGDsHFDsDE...pWThs...............................................................................................................................................................................spGhsLhhVAsHElGHuLGLsHSss.sAlMaPhYp.ht.p......pLspDDlpGIQpLYG ................................................................................................................................................................................................................................................................................................................................................................................h........tt.h....pp....shtpA.hp..h...W.sp.s.................ss..lp..Fppl.............p.tp..........ADI.h....l.t.Fh..............................p.t..t..c.....s......s.........s...............s......a.......-...u......s.s..........................hL..A+.A....ah..Ps..............h..tGss....H.a..-...t...s.c.......a.s.h.s...........................................................................................................................................................................................................p...s..h.s...L..h..h.VA...s...HElGHuLGL.s.H..........o...........s......s..................p........u...l...M.h.....P.h.hp..h..................tLtt-Dlpu..lptlY........................................................................................................................................................................................... 2 289 498 817 +3740 PF00675 Peptidase_M16 Insulinase; Insulinase (Peptidase family M16) Bateman A anon Pfam-B_88 (release 2.1) Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.58 0.71 -4.45 26 11461 2012-10-02 15:41:56 2003-04-07 12:59:11 15 55 4386 192 4038 9720 2217 136.00 20 22.38 CHANGED pVsspps.ssspuslulhlssGSthEspp.sGlAHFLEHhhFpG.Tp+hss.ppltphlcphGGph....NAhTscEpTsYhhcshscs......lspul-hlu-hhtps...thscppl-c........Elptlcsch.....lhctlcssuaptpsLupslhsPt .........................................................................................stht.lhh.t.s.Guh........t-...s.....t....t..p...........s........G...l.A.Hh.l..E.H.............hh..Fp............G......o.....p..p......h......s.....s....t....p.....ht..p.hl.p.p..h.G....u.p.h...........................NAhTu....h....-........p..TsY.h..h....p....s.........s..pp....................................lp..p..s.l.......p....lhs-...h....h..t.p..s...................t.h.s..p..p.p...l.........c..p...............tp.........t....h....h..h..t..E.h.p......h.psp..............h...t.....h.................t.......................................................................... 1 1413 2529 3444 +3741 PF00883 Peptidase_M17 Cytosol aminopeptidase family, catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_990 (release 3.0) Domain The two associated zinc ions and the active site are entirely enclosed within the C-terminal catalytic domain in leucine aminopeptidase. 25.00 25.00 25.00 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.89 0.70 -5.55 17 5342 2012-10-02 19:46:12 2003-04-07 12:59:11 16 21 3337 121 1482 3949 3549 304.00 41 63.96 CHANGED lthuR-LhssPuNhlsPshhAcpApclupshu...lplpVlcpcphcchtMGuhLuVupGS..ppsPphlhlpYpsst.....cspphluLVGKGlTFDSGGhsIK..sussMctMKhDMuGAAuVhushpAluplchs.hsVhsllshsENh.SGsA.+PGDllsshsGKTlEVpNTDAEGRLlLADulsYApp.hssctlIDsATLTGAhslALGsshsGlaoss-pLtcplhpAuppuGEtlWRhPhp-c.YhcplcS.slADls..NhusttG..GuhTAAhFLppFlpp.....WhHlDIAGsshtpptt......GuoGhsVpTLsp ..............................................h..hsRDLsNtPsNh....hsPstlAp....p....A.....p....p.lspphu............lps...c.l.l...sp..cplcchG..hsuhh.....uVG+.G....S.....pp....sP....t....llhlc...............Ypsss...................t........ss...t......lsLVGKGl..TFD.................o.....GGlSlK......Pu.s..sMc.pMKhDMuGAAsVhGshps..lu.p.h.........p..L.....s.......lp....Vhullss.s...ENhsuGsA.............h+P................GDllTshs...........GpTVEVhNTDAE.G......RLVLADuLsY....Apc.....h............c........P..c......h....l....ID.....hA.TLTGAs...h.............lAL...G...p.c...h...sul....h....os....s....-....s..............L...s....p....p..l....htA...............u.ppss-..h..h..W..+L....P.....L.s.-..........p.....a....p.....c.p........l..c.........S...sh...A.D.ls........N..h.....u..u.p.t.s.........Gu......lTA......AtF......L.......p......c.......F.....s............c................p.....................................h.......sWsHl...DIAGsuhpssst.........tG.uTGhsVphLs.p........................................................................................................................ 0 495 885 1213 +3742 PF01401 Peptidase_M2 Angiotensin-converting enzyme Bateman A, Coates D anon Swiss-Prot Family Members of this family are dipeptidyl carboxydipeptidases (cleave carboxyl dipeptides) and most notably convert angiotensin I to angiotensin II. Many members of this family contain a tandem duplication of the 600 amino acid peptidase domain, both of these are catalytically active. Most members are secreted membrane bound ectoenzymes. 19.40 19.40 19.50 19.50 18.70 19.30 hmmbuild -o /dev/null HMM SEED 595 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -13.13 0.70 -6.32 6 802 2012-10-03 04:41:15 2003-04-07 12:59:11 13 14 230 68 446 817 299 468.30 35 85.85 CHANGED DsshpssshssDEAtAphFsEpYspSApsVh.c.stAsWsasTNITsEsu+h..Ecuh.sppas.saGp+AKch.ss.aQNhT...l+RIItpVpsLspAsLPltchppYNplL.sMppIYSsupVCaPN..uoChsL-PDLTNlhAoSRpYscLLaAWEGW+DtVGhslhPhY.castlSNcAhR.sGaoDsGs.WRShYESPohEpDLE+LYQpLpPLYLNLHAYVRRALHR+YGscYINLcGPIPAHLLGNMWAQoWpNIYDhVVPFPstPNlDsTpsMlpQGWssp+MF+.A--FFTSLGLLPhPPEFWscSMLEKPTDGREVVCHASAWDFYNtKDFRIKQCTpVsMEpLssVHHEMGHIQYahQYKDLPVoLRcGANPGFHEAIGDVLALSVSTPcHLHpIsLLsp.ssshEpDINaLhKMAL-KIAFlPFuYLVDQWRWtVFsGphspppYN.-WW.LRhKYQGlCPPVsRspscFDsGAKFHIPussPYIRYFVSFllQFQFHEALC+tAGHpGPLHpCDIYQSpcAGt+LtsshphGhSRPW.EshKslsGpsshsApAhLsYFpPlopWLpppNpRpGEsLGWPEYpWpP ...........................................................................................................................................................................................................................................................................h..hht..p.....h.......utW..ts.ls.t....s..................t..h..t.......t.t.h....................t....p....s.t.t..h..............hps.p.......h+..p.....hthl..t........t.....s...s.l........s......t.c......t..ph...t....pl....h..s...p...Mpt......hYutu...ph.C..........p......................tp.C......h......Lp.....P......p...lpp.....lh...s......pS+.......sh..........pchhasWpuW+ptsG..h.....h....+s.a.chV.t...LtNc..sAp....s.................satsh.uth.W.....p..t....Y.....-......................s..........p..........h...........pp.....l-.pla......p.....plpPLY.....pLHuYVRtpLhptY..G...p..h.l...s........p.G..s.lPAH....L....L.....Gs.MWuptWs..sl...hsh.hhP..................a.s..t.t..s..t.h..D..l.o..........t...t..h.hp..p.s.a.ss.......chap.u-pFFhSlGh......hs....pFW.ppS...hh..p.c....PtD...R..cl..lCH.s.o.A.WDhh.......t........t.....t.........D.......h..R.....IK.Cs..p...ls.h-...phhshHHEhGHlpY...ht...Y..p..p...Ph..hh+pGA.NsGFHEAlG-hhuLSssTPp.......aLp.p.lGLL..p.p........................t...s......................c..............t..p.........IN.......h.L...hp...ALppl.shlPFsh.hh-pWRWtVF.pGp....ls...p..p....aNptWW.ph..+.catG...l.hsP..l...Rs.p.p...hDsuuKaH..lss.s.hs.a.h..............RY..ahuhlhQFQFacuLCpt........u........t.........p.......p.......s...........P.......L......ap.CD.lh...t.sp...tAGph.Lt.phh.phGtSp.....sW.psh.............p.hs..G..p..p...p.......hsspsl..hpYFpPLhpWLpp.pNt.....hGW................................ 0 164 207 346 +3743 PF00557 Peptidase_M24 pep_M24; Metallopeptidase family M24 Bateman A anon SCOP Domain This family contains metallopeptidases. It also contains non-peptidase homologues such as the N terminal domain of Spt16 which is a histone H3-H4 binding module [3]. 20.70 20.70 20.70 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.78 0.70 -4.75 656 20523 2009-01-15 18:05:59 2003-04-07 12:59:11 19 77 5099 201 5995 15852 9743 218.60 23 58.35 CHANGED pth+pAsplsspshp.th....h..t.......lc...s...........................G.........................h.........o..-t-lsshlp...h......h...tp.......t..u...........tpth..........ua.....s...sllssG.sss...h..........sHh...hs..........s.tpt................l....pp....G.................................-l..lhlDhGsph.....GYtuDhoRTh....h..h.......G................s.....t.......cp.la.......phlhcup...p........tul.p.tl.+....P.G.....s...........p..hs-lsptspphlp..p.......h......G.h...................................thhh+.......thG.H........GlG......l.plH...tt..............................P......h............l.....s........................................................tt......tth.........Lc.....sGMlhol.EP.................Gha..........h.s.....................................................sh..........................................................................h.Gl+lE-......sllloc .....................................................................................................hh+pAsplss.pshp.th...h.....ph..........lc.........s...........................G.................................h..............o.....ph-...l..s..thhc...ph........h.........pp.................tu..........................hsth.....ua..........s...shlss..u..hN.ss........l...................sHh.....hs....................s..sph...........................l.......cp.......G.................................-l.......l.h.l..Dh..G..s...h...hs..............G.atuDhoRTh...h......l............G..........p.ss.....p.................................tcc.lh...............p.h..s....h....c....uh...................tul....p...th....+.................P...G........s................................p......ht-.l...stthpphhp..p...............t.............G.h...................................................thhtc........hsG.H....GlG............h...ph.H.....-t............................................P.......l.............................................................................t....pshh.....Lc................GM.....lho...l..EP....................ulh............s...........................sh...................................................................................................t.usphEc...slllT.......................................................................................................................................................... 1 2007 3696 5023 +3744 PF01742 Peptidase_M27 Clostridial neurotoxin zinc protease Bateman A anon Pfam-B_407 (release 4.2) Family These toxins are zinc proteases that block neurotransmitter release by proteolytic cleavage of synaptic proteins such as synaptobrevins, syntaxin and SNAP-25. 20.10 20.10 20.40 20.60 19.20 20.00 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.48 0.70 -5.61 13 216 2012-10-03 04:41:15 2003-04-07 12:59:11 12 5 34 118 5 238 0 366.70 42 32.95 CHANGED hsIN.sFNYsDPVsNcsIlhl+...sctsspaaKAFcIssNIWllPERa.aGpsPs...hstssplpuh.s.uYYDPNYLoTDsEKDcFLpshIKLFpRINssssGcpLLshIssAIPahGs............................spT..spFssspcosslshK..p....sspshhhsNLlIFGPGssIl-ssshshp.......sppsucpGFGoht.lpFsPcasasFsss...........p.FhsDPALpLhHELIHuLHtLYGIK.ssshplh.ppps.ahstpts.lphEElhTFGGpDhplIssssppplhshhLssa+sIAp+LNplpos.sss..h.lspYKphacpKYphsp-ssGpasVNlsKFsclYppL...aTEpshustaplpsRppYashcYh.hph.slLssslYsIp-GFNluphshplshsGQNtplNs .....hpIN.sFNhssPVDNcsllhl+...sctsspaaKAFcltsNIWlhPERa.aGps.p...hscs.phcs....uhYDssaLsTssEKDpFLpshIK.LhpRINssshGcpLLphIssuIPa.hs............................pps..tpatthhposhls.p.......sspphhhsNllIhGPGssIh-spsh.ht........p.sucpGhGoht.lhFpP.hsatasp..............FhhDPAlpLh+pLI+uLahLYGIK.sssh.l.hp.ps.h.p.phs.lph.-hhs.GGpDhphIsss....h.shhhssh+shtpphNchpspltss....hN.hK.hhcpKath..sspshaslslshFsc.ap.l....h.p.phspthphhpRppYa.hsas..........c.YsI.pGFs.sphNhplshpspNpsI.s......................... 0 2 5 5 +3745 PF02073 Peptidase_M29 Thermophilic metalloprotease (M29) Mian N, Bateman A anon IPR000787 Family \N 21.50 21.50 21.60 21.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.29 0.70 -5.70 73 2095 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 1554 6 468 1517 492 367.30 34 92.86 CHANGED M....hcpplc+aAcllV.....chGlslQtGpplllpus.hc.stchs+hlsccAYctGAtpVhlpas......DsplsRhhhpt.usc-p..hc..phPpach...pphhccssuhlulhu.pssshLssl-sc+luthp+At.sts.hcthhpth.tsthsWslsshPotuWAp.h.....psp.ul-phh-hlFcss+lDtt.sPltsWcp+scplpphschLNcpph.pL+ap.us.....GTDLolplscp+hWtuust.sp.....hhsNhPotEVFTuPhcpsVsGhVpsspPLsapGsll-shpLsFccGclV-hsA..cpGc-hLpcll-TD-GA+hLGElALlsssu....hshhhaNTLFDEN.....uSsHlAlGpAYstsltsG.t.........tGhNcShlHlDhhlu........usclplDG...tsp..slh+sGsa ..............................................................................................................................................................................hcppLc+hAclll.....phGl.s.lQ.Gpplhl.....pss.....lp.thtL.s.+hlsccAYp.hGAt.Vh.lpap......Dpt.lp+.hh.tus..-p..hp.........phspach.t...t.t.h.h.cp..t..s..uhlulhu......psP.....shhssl-..s.....p+ltthp.psh.utu..hc......hhpthttsphsWslsuhP.s.tWAp.las........ssctuhp.thh-tlFc.ssR.l.s....t..sPl.t.sWcpHsptL....pphsc.hLNc.pphttLH.as...us........G...TDLTluLs..c..p..+..h..W.p.s.us.s.hsstt.......hhsN...h.P.....T.....EEVFTuPspp.........p....ls......GhVs..s....s...p..P..Ls...Y..s.G..s..l.I...-.s..hplpF..c..cGcIV..-hsA................cp.G..c....c...hL......c.c..l.............l............p..s..D......E.............G.............A....+h.L...GElALVsssSP.....Iuppsh.....h.Fa.....N....TLFD......EN.....AosHlAlGs...AY..s.......h.......s......h............s......G.t.......p..h.......s.......c..p..........h.......t...tGhN.p.S....................l............HhDaMlG............osch.s.lDGh.tsGp...lh+sGpa..................................................................... 0 181 345 417 +3746 PF01432 Peptidase_M3 Peptidase family M3 Bateman A anon Swissprot Family This is the Thimet oligopeptidase family, large family of mammalian and bacterial oligopeptidases that cleave medium sized peptides. The group also contains mitochondrial intermediate peptidase which is encoded by nuclear DNA but functions within the mitochondria to remove the leader sequence. 19.80 19.80 20.00 19.80 19.50 19.70 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.57 0.70 -5.45 29 8043 2012-10-03 04:41:15 2003-04-07 12:59:11 15 23 4148 12 2004 6381 1947 390.10 24 63.94 CHANGED hhhpspscplR+psapAhhpphtp.t.....h..stllpchlph+......AclhsasoatphsLts.hspsspsVhsFLtclhpcttPhhcc.hphhpphh.tt.......hshp...clpsWDh...........as......lspp.hp.aFsh.ppslp..................................tpsapp..............ptshhuphalDhasRcu.KtuG..Aashshhst...............................................shshllhNasp.................shs-l.pTLFHEhGHuhHshhoppp.shhssh.sss.....h-hsElsSph.Ethhh-hthhthhs..chpstts...................l.pchlpphh........h.thhRQlhhuth-hplHpttp.st..s...h.c..atplppchhs..hssss.......sp.th.s....ths.l.....thYsusYYsYhaupsh..usshapphhc.s...s.........chhpphLptGGStsPh-hlcphtsc.spscshhcshsh ...............................................................................................................................................hh.spspplRcphapuhh.pthp......p.......................t..............p.........h...sthh.pphl.ph.+tth.....................Ap...lhGap...s...h.t.....p...h.....t...L..t...s.............s...ps...s...pp....l..h..s.h.L...p.l.h.p.p.t....h....sh...h.p.c...ht.hpp...................................hGhp.......plp..aDh............................as..................hsp.tt.h.c......h.hsh.pp.....s.hp...............hhh..t.h...sh.........................a..p..........................................................ptth..............sphal..Dha.............s+..p..u....K....pu.G....Aahs.shhst..............................................................................s.hsa..l......l...h...Nast.....................................................shs-............l.hTL.hHEhGHuhH.p.h...h.o..p..p.p......s..h..sss...sss.................hshsEhsSph.....Eth....h.hc....h.....h.p.h.h........c..h.c...p..t.t..s................................................................l..pp.hl...c..p.hh.................................h.thh.cpl..h.u..hF.-hpl..Hpt.........pt....s.t..............t.............lsp..hhtp..lpp...ch..hs........sh..............................s.p..thsh.........tau+l..............thY..t..s..s..a..Y..s.........Y..h..a..u..p..sh.......ussh.a.p..t....hh..c....p.u.....s.........................h.......ch...h.p...hL.p.t.GuSp.p.s.h-.l.h.c.t.h.t.s.c..sp.cshhpt...h......................................................................................................................................... 1 652 1241 1669 +3747 PF02074 Peptidase_M32 Carboxypeptidase Taq (M32) metallopeptidase Mian N, Bateman A anon IPR001333 Family \N 19.90 19.90 19.90 19.90 19.70 19.80 hmmbuild -o /dev/null HMM SEED 494 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.46 0.70 -5.87 3 1205 2012-10-03 04:41:15 2003-04-07 12:59:11 10 5 1118 17 345 1078 910 472.10 36 95.91 CHANGED MphcsshKElL-hhRRIuaLGcAsALhuWDpRTshPKcGpt-RAcuhGpLSsLlaEltTcPRhsELlEKlcGp..hEDLsEDs+ssVRlh+RpYEcsKuIPEchhKElupssSKAETAWEEAKuKDDFStFpPYL-+LIuLAKR...hlpYlG.YpE-P.....YDALLDLYEPGlRsRDL-pLFcELccuL+PLL-+ILuSG++PDsusLcK.+YPKEtQ+cluLalLQchGYDL-u.GRLDsTAHPFsTslGhGDVRITTRY-EcDFRsAIFGTIHEhGHALYEQslDEAahGTPlu-GASLGIHESQSRFWENlIGRSKcFWEhaYP+lKEsh.sphcDloLEDFahAlNtV+PSFIRVEADEVTYNLHILlRaELE+AlFSsEVplEDLPEhWN-KYccYLGIsP+TY+EGILQDVHWAGGsFGYFPTYoLGslYAAQLatKhpEDLP-FEsLlARGEFcPIKsWLREKIHtaGSRa+P+-LlK+ATGE-lNAcYFlRYLcsKY .........................................................................h.......pphh.phhcc...lp.t...hp.phhulhtWD.pT.hMP..p.tG.s..ptRu-shuhLush....haphhT....s....sp....hs....ch....lp..ph...c..pp..............p..p.....L....s.........p.....h......p.....p....t....slcc.....h++p..a.ppsptlPtphhpthsthsucucpsWcp.A.Rt.p.s.Das.tFtPhLpcllchp+c.......hhph.h..u....h.t..p..s..s.........................YDsLL-.YEPGhTsppL-plFspL+....ptlssLl....p.pl.hp.p.....t.....p....p........p...s...s..h..lp.t...p............as.......tcpQ.cphsh..pl.h.c.hl.GaD.Fsp.....GRL.Dp.osHPF.ssu.ls...s.DVRITTRY.sEs.-FhpulhuslHEsGHAhYEQ...s.l...s...c...p...ht....s.o.P.l.....u.........putSMGlHESQSLFaE..lGRSpuFhphhhsh.hpcth....sph..s...s..h.....s.h..-..shacthscV.c.su.h.IR.l-ADElTYs..hHlllRYElE+tLhsG...plpVp...DLPphWNcKhpp..Y.LGl.p.s.p.s.c.tp..GsLQ...DlHWosGsF.G.YFPoYsLGshYAAQhhpshpcsls...........s........l..-s..hl.tp..G-.hsslhpWLpcpIapaGsh.h.s.s.p.-Llpcu.T.G.E.s.LNspahhcaLcp+Y............................................................................................................................................... 1 111 208 279 +3748 PF02102 Peptidase_M35 Deuterolysin metalloprotease (M35) family Mian N, Bateman A anon IPR001384 Family \N 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.12 0.70 -5.80 4 234 2012-10-03 04:41:15 2003-04-07 12:59:11 10 2 67 1 138 280 1 281.40 31 91.47 CHANGED MRhTsLuoAlLALAssslAhPspos.uPsLDVoLoQVDNTRIKAVVKNTGuEcVTFVHLNFF+DuAPVKKVSlaRp..................ssEV.FpGIKRRhpopGLoc-uLToLusGEolEDEFDIAoToDLopGGslTIRocGFVPIssDsKlTGYlPYpSN-LplpVDuAKAAsVspAlK.LDRRTKVssCsGoRpSALoTALpNsspLANtAAoAApSGSuS+FpEYFKTTSppsRosVAuRhRAVA+EAuSsSSGSTTYYCsDsYGYC-oNVLAYTLPupNlIANCDIYYSYLPALspoCHAQDQATTTLHEFTHAPGVYSPGTDDLGYGYcAATuLSuSQAL.NADoYALaAN.......ulclKC .............................................................................................................................................................................................................................lpl....p.....l.......................t.t.......hps.lpNtutp..hp.hhp.h.t...h..s.t....spph.l..t.t..............................t.t..h..FtGhh...h...ttl.........p..h..l.sttohp.......hs.hA.h.ph..t....t.....h..h...........t...s.......h....h..h.............t...........................s.......h.......h...ss..ht...h.t.s.............s.................................h..................................t........c.Rs..p......h..s..s.......C.s.s.s.ptstl.psALpssupLAptAtp...A...s..p...s...t.s....s...phappaFKoss...sphp.phVtspF.ptlupEs.sps.s.sups.ohhCpD..s.h.s...h..C....p.....s.....s........s..lAYT.....l..s.s.......p..s.....ls..C.Pha.....as....p..hP..s..h......o.ppC...p...u..t...D.......Qs.T....ohLHE.hoHh.thh.......tsp...Dh..u.YG.apssppL...su...s...p...ulpNADoYulFAps............hhht.................................... 0 38 76 114 +3749 PF02128 Peptidase_M36 Fungalysin metallopeptidase (M36) Mian N, Bateman A anon IPR001842 Family \N 23.30 23.30 23.30 24.00 23.00 22.90 hmmbuild -o /dev/null --hand HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.28 0.70 -5.62 31 458 2012-10-03 04:41:15 2003-04-07 12:59:11 10 28 290 0 202 440 80 321.30 31 49.08 CHANGED AsYpVYPaslssPsc.......GsRsllssP.hss..hASPauWpsssss...paTs...TRGNNshA.ps..ssussth.....sshpPsu.....ssLpFs.aPashshs.Ps.........sahsA.ulTpLFYhsNlhHDlhYphGFsEtuGNFQhNNhGpGGtGsDhVlspAQDGSG..................hNNANFuTPPDGpssRMpMYlWstss.................................................................................................................................................................PpRDGsF-uGIllHEYsHGlSNRLoGGPuNosCLss.p............ouuMGEGWuDahuhslplKsuDTtssshshGsasssps...sGIRsasYSTshshNPhTYssl.......sshsss........HulGsVWAohLaElhWsLIDcaGhss.....sha.........tsG.....GpplAMpLVlDGMtLQPCNPsFlpuRDAILsADhslssGtNp.......CpIWcuFA+RGLGhuAp ........................................................................................................................................................................t.........................................................................................................................................................................................................................................................................................................................................shp.....................u...........ttD.h.h...s....ts..................hN.NAshtss...s.D...GpsschphY.hsttt..........................................................................................................................................................................s.hD..suh..-AuIlhHEYsHulps+hssG.uss...t.t...p................uGuMGEGauDahAs.sh.ph.................p......ss....s.p.......t.......t..s.......s.......h.......s....h..Gp.......ass.....s..........s.....................s..........s..lR...p.h....sh........ss.......p........s..N..s........hsa.ssh...............pths.pl...................HshG....plWushLa-hhhshlp......p.h.Ghs...............s.h...........................................Gp.lhhpllhpuh.h....ss.......s.spFhpu+cAIhpADthh...hs.....Gtpt........s.l.chaAtRGlu....h........................................................... 0 108 146 176 +3750 PF01447 Peptidase_M4 Thermolysin metallopeptidase, catalytic domain Bateman A, Griffiths-Jones SR anon Psiblast P06142 Domain \N 22.20 22.20 22.20 23.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.20 0.71 -4.10 13 1948 2012-10-03 04:41:15 2003-04-07 12:59:11 13 69 1073 112 314 1608 50 143.00 32 26.78 CHANGED sssstsGctVhGc......tp.lsls+sust..ahLpDsTctss..lpoYsussp........hohsuo.......lhpssssshs.....s.tuuuVDAHaaAthlYDYY+shasRsSlD...spGhplpSpVHY.....G..ssYNNAaWsGspMlYGDGDs...sFhPLSuslDVVuHElTHGVTEp .......................................................................s...thGh..h.st........sht.hst.sst........a.h..-.s.p...t....t....h............................tshshppt.................sh..st..................hh.p.sp.spshs.........tps...susD.Aa...a.h.u.stsa.Da.Y+...sh.asRsSlD.......spGhtlhShV...Hh..........u.....ps..a..s.NA..F......W.......s..G.....s...p..M..saGD.G..-s...............h..Fh.slsuulDVsuHEloHGVTp............. 0 95 182 266 +3751 PF03410 Peptidase_M44 Protein_G1; Protein G1 Finn RD anon Pfam-B_4417 (release 6.6) Family Protein G1, named after the vaccinia virus protein, is a glycoprotein expressed by many Poxviridae. 25.00 25.00 131.40 131.30 20.90 20.50 hmmbuild -o /dev/null HMM SEED 590 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -13.06 0.70 -6.26 3 79 2012-10-02 15:41:56 2003-04-07 12:59:11 8 2 49 0 0 69 0 519.90 70 99.81 CHANGED MIVL-NGVRVFINsuMsKDIYlGlSNFGFENDIsEILGIAHLLEHILISFDuTRFVANASTARSYMSFWC+uIRuc.TplDAlRTLVSWFFu+GuLKDDFSlS+IRaHIKELENEYYFRNEVFHCMDILTFLuGGDLYNGGRLSMLDpL-sVR-lLscRMRRIuGPNVVIFVRELuPusLuLLspTFGoLPACPpTIPuTlhsSIsGKsVMsPSPFYTVMV+V-PTL-NILuILCLYEoYHLVDYETlGN+LYVTFSFIHEpDYEuFLRGsGcLplo.spRIcLNYSDDYlMNlYLNFPWLpHDLaDYLTplNsDosSlLpSLEc-IYpSVRsRDlIVVYPNFSsoMsNTRDRQpH+lVVLDs.NluhuupPsRSIcLMKRQT+NElFIRYGDAuLlDYVsLALu+.RchuL+Rps+GIRl+HuFSADDI+sIMESDTFlKYSRSKPAAMYQYIFLSFFASGNSI-DILp+RESlVcF.uR+sKNKIVFGKpARYDVsTKSSFVCGIlRGPpLocsuLT-hMW-LKRKGLIYSLEFTcLhuKNTFYLFsFTIYPE-VYcYLuopKLFSu+ClVVSpKG-sEDFSSLKKDVVI+l .............MIVLPNKVRIFINDcMKKDIYLGISNFGFENDI.DEILGIAHLLEHLLISFDSTpFLANASTSRSYMSFWCKSIput.opsDAIRTLlSWFFs.N.G.KLKDNFSlSsIRhHIKELENEYYFRNEVFHCMDlLTFLuGGDLYNGGRIsMl-NLshVccMLsNRMpRISGsNIVIFVKcLssuTLshhppTFGoLPuCPElIPss.hs.sossGKIVMhPSPFYTVMV+lsPTLDNILuILhLYEsYHLIDYETIGNQLYlTlSFIDEo-YEuFLRG.ul....LplupsppIs.hNY.oDDYhMNIYLNFPWLuHDlaDYITcIN-cocslLhSLps-IYsSIlsRDhIVIYPNFSKuhhNocDsQpH.IVVL..................Ds.....sN.....c..s..hh.....+........+...P.......h.....psIs...........LMK+.h..T.s.NEIaIRYGDASLhDhlTL...uLSh....pDhsL...+..RssEGIRl+HsFSADDIpAIMESDoFLKYS+SKPAAMYQYIFLSFFASGNSI-DILsNR-ST.Lch.uK+s.KsKILFGRNTRYDlssKSSFVCGIV+GKsLccsoLs-hMW-LKKKGLIYSMEFTsLhSKNTFYlFsFTIYsDEVYcYLsssKhFou+CLVVSsK...G...-lEsFSSLKKDVVIRl........... 0 0 0 0 +3752 PF01427 Peptidase_M15 Peptidase_M45; D-ala-D-ala dipeptidase Bateman A anon Psiblast Q47749 Family \N 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.35 0.71 -4.70 5 2743 2012-10-02 01:02:30 2003-04-07 12:59:11 12 9 1142 6 295 2325 328 113.20 39 86.22 CHANGED cssGhlsLcpllsDlchDuKYATtDNFTGK..shYpssRsLucc-sApuLtcApulAsspGhsLllWDuYRP+sApsshhtWsAsPEsshst-sthssIp+sth.s...suopSoHSRGoAIDLTLh+sDpGpLVDMGocFD-McERSHssAsu.VussuspNR+..........pLRuIMEuuGFpuYSuEWWHFcLsDps....Yscsah-FsVs ........................................................................................................................FTG+..l.............ssY.....s...sR.hlhp.csh..s...sALtpA.pp..p..A..p..s............G..as.Llla.DuY....R.P.QpAs....p......t........h......h......t.....W.......s.......t......p..........P.................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 109 194 256 +3753 PF01435 Peptidase_M48 Peptidase family M48 Bateman A anon Swiss-Prot Family \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null --hand HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.52 0.70 -4.79 21 10571 2012-10-03 04:41:15 2003-04-07 12:59:11 13 115 3867 4 3276 8591 3845 215.40 20 61.15 CHANGED hhhlhsshhhhphst.hhtplhtph............stppsplcphlpclApps....sh.....hclhVlcssp...............sNAFshstup....lslpsuLl....pt......................................hscsELtAVlGHElGHhttpHshhp..hhhuhhhslhhhhshshhhs.thh.tttt.......................hlhthhhhshSRppEhpADthuhph........hhpshhp.t..........hshhtplpp.......................................ttssssshhsthhpTHPshspRlpt .........................................................................................................ht.................................t................................t...t.......l.h...p...h..s.p....p...l...spps..............s..h.s...............p..l...h..l..h.p..s.sp............................hNA........F......A......h.......s......s.....s..........p...........................l.s.l.psG..Ll.....pt.............................................................................................hsc..s..E..l.tuV.luHEluHlt..ptchhpt...........h.h.......t....s...h......h...s............s....h......h....h......h.........h.......s........t.........h......h..........h.........h.......h.....h......s........t......h.....t.........t..s....t............................................................................................................hhs.t.h..h....h..t.h...S...R...p....p.E..hpADthuhpl...........hhpuhhp.psh.................................phh..p.p...h.tt.......................................................................................h....t....s......t....t...h.......s....p.......h....h...t......o..HPs.ppRlt........................................................................................................................... 1 1028 2092 2791 +3754 PF03571 Peptidase_M49 Peptidase family M49 Griffiths-Jones SR anon MEROPS Family \N 19.10 19.10 19.20 19.20 18.80 18.70 hmmbuild -o /dev/null HMM SEED 551 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.95 0.70 -6.44 27 706 2009-09-11 00:19:35 2003-04-07 12:59:11 10 9 421 5 366 722 86 347.00 24 67.47 CHANGED spl.appspch............lYshcsptt..LGa.spsshosYYs..........sslocp-h-hls.chhpspsl...s.NTRlhK....sucs......aplhlASsppsppst.h..............phpGpp..lplshGDast.hp+lsppLcpAppaAAN-sQpcMLctYlcpFpoGshpsHKcuQ+hWlKDhuPhVET.IGFIETYRDPtGlRuEaEGFVAhVNK-pot+FspLVssApphlphLPWsps..........aEKDpFh.PDFTSL-VLoFuuSGlPuGINIPNYDDlRps.GFKNVSLGNVLuusspspp.....lsFls-cDpclapKapspuFEVQVGLHELLGHGoGKLhpcstsGpaNFDhps..ls.lssc..l...soaYcsGETWsShFGsluuuaEECRAEsVulYLshpc..-lLcIFGhpssp.....-tccllassaLpMlcuGL.huLEaasPcsc....KWsQAHhQARFsIl+slLc..tspshlclppsp.sp...hsslplclD+S+I..osG+pAlpcaLt+LplYKSTu-hcsGpchaschosVs-p....ahchR-lVlt..+KpPR+halQuNThlss........spVp.lh-Y-po.tGhIpSalER.. ...............................................................................................t.............................................................................................................................................................................h.h........G...a.t..htphs..lppAtt...hutsttptthlt.hhp.atpGs.ptacp.phhWl.pphss.l-.......GFhEsYtDP.h.G.h+.upaEu.hlth.....hs........t.ottht.hs..tput.h....Phs.t..............ac+..p..h.....t..s..shpslps..h...h..h...uu.....s..........h.....P....h....GI.....NlP...............N.........p.lRtp.G.Ks..VpltNlhtsh..t...........h.l.p.....t.....t..p......th.h..p.h.t....s..lp...sshHEh.hGHGsGpl......................................................................................................................................................................................................................................................................................................................................................................................s.................................................................................................................................................... 1 147 232 311 +3755 PF02868 Peptidase_M4_C Thermolysin metallopeptidase, alpha-helical domain Bateman A, Griffiths-Jones SR anon Psiblast P06142 Domain \N 24.40 24.40 24.70 24.40 24.20 24.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.14 0.71 -4.50 113 1990 2012-10-03 04:41:15 2003-04-07 12:59:11 10 77 1088 114 328 1719 59 152.70 37 28.88 CHANGED usLhYpspSGALNEuhSDlhGshlc.ahttt.........t......t..........DWllG--lhp........t...G..suLRpMssPsps.........sspssphs........sh.....................tD.....sssVHhNSGlhN+AFYLlupu...........................hsGhuhcKAtcIaYcAhsh.YhTsso....sFspu+suslpAApDL...aGttu..tphpsVpsAassVGV ...........................................usL.YpspSGALNEuhSD...lhGhhlc.a..tt...................t...............sWhlG-.-lht.................t.u....suLRsMpsPsp.................tsps..schsca..................................ttDsGGVHhNSGI.N+AhYLlupu.............................t.u.l.GpcKs....tpIaYpA.s....Y....ho...s..so....sF.......pps...+suslpuA..p..-L........YG........s.......tpspsVtpAassVGl............................................... 0 104 192 278 +3756 PF04951 Peptidase_M55 D-aminopeptidase Bateman A, Rawlings ND anon COG2362 Family Bacillus subtilis DppA is a binuclear zinc-dependent, D-specific aminopeptidase. The structure reveals that DppA is a new example of a 'self-compartmentalising protease', a family of proteolytic complexes. Proteasomes are the most extensively studied representatives of this family. The DppA enzyme is composed of identical 30 kDa subunits organised in a decamer with 52 point-group symmetry. A 20 A wide channel runs through the complex, giving access to a central chamber holding the active sites. The structure shows DppA to be a prototype of a new family of metalloaminopeptidases characterised by the SXDXEG key sequence [1]. The only known substrates are D-ala-D-ala and D-ala-gly-gly. 25.00 25.00 33.80 31.10 22.20 22.20 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.55 0.70 -5.18 41 469 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 424 5 164 444 48 259.60 34 95.97 CHANGED MKlaISsDhEGlsGlsshpplps.......usspYp+uRclMTp.......EsNssl-uAhpuGA....s-VlVNDSHusMsNL...lh-clcs.....cspL.lpG.ps+shuMhpGl.-puhDushFlGYHu+AGs.p.GlhuHThsutshpplhlNGptlG.EhslNAhlAGtaGVPVsLluGDDh.htpEs..cthhP.ssphlslKcuhu+huuhshosppspptl+puscpAl..cptpph.....pshphssPsplclchpssuhA-hsshhPslERl-usT.Vcapupshh-shpshpsl ........................................MKlaISsDhEGluGls.s.hppsps..........sst..c.Y.pcsRchMTp.......-ssAslcGshp.u.G.u.......sEllVsDSHus.....hp.NL...hh-p...l.ct.....Rspl.l..pG..ps+.shuMhpGl....-.p.u.hDulhFlGYHAtA.G.s.c.GlLuHThsusshtplhlNGhthu.EsslNAthAuchGVPVsLloGD-s.h.tc-s...cthhP...psthVs..lKculu.p........huuhshsPppspptIptusppAl..ppttph................tshp.h.s.....s.PhplclphpssshA-.hhshhPslc..Rl-..u.pT..Vpapupshhcshphh.s.............................. 0 54 107 134 +3757 PF02031 Peptidase_M7 Streptomyces extracellular neutral proteinase (M7) family Mian N, Bateman A anon IPR000013 Domain \N 25.00 25.00 26.20 25.30 23.40 22.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.66 0.71 -4.44 4 82 2012-10-03 04:41:15 2003-04-07 12:59:11 11 2 55 2 34 84 5 131.00 48 61.41 CHANGED AVTVsYsASsAPSFpoQIApusQIWNSSVSNVRLptGSs.ADFoYhEGNDsRGSYASTDGHGRGYIFLDYpQNQQYDSTRVTAHETGHVLGLPDHYSGPCSELMSGGGPGPSCTNsYPNSsERSRVNQLWANG ...........sTlhYsuSpAssFcutIspustIWNuS..VsNV+Lt..p.u.os...Achs.h.h.t..s.s.cspu.ohAsssGt...GpG.hIaLs.h.p.t.s.Q.t.YssTR...lsAHEhGHlLGLPDpYsG.PCSpLMSGuusGsSCTNshPsAsE+uRVpphaA.G............. 0 11 25 33 +3758 PF01457 Peptidase_M8 Leishmanolysin Bateman A anon Prodom_3085 (release 99.1) Family \N 19.30 19.30 19.30 19.30 19.20 19.10 hmmbuild --amino -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.83 0.70 -6.16 8 1086 2012-10-03 04:41:15 2003-04-07 12:59:11 11 33 163 1 602 1111 26 328.90 21 71.05 CHANGED H+CIHDtLQARVlQSVAtQ+hsPuuVSAlGLPYVosssh...tpAsssDauhusuoo..VsRAAsWGsLRIsVSsEDLTDPuYHCupVGQplsNHtGslssCTAEDILT-EKRDILVpaLlPQALQLHs-R.LKV+QVQGcWKVTGMssslCucFKVPssHlTs.....GVoNTDFVLYVASVPSE.....uVLAWAsTCQVFuD.G+PAVGVINIPAAsIsSR..YDQlsTRVVsHElAHALGFSs...sFFcss.GIlppVoslRGKsa.................................sVPVINSsTVVAKAREQYGCsoLEYLElEDQGGuGosGSHlKhRNApDELMAPsuu..AGYYoALTMAlFpDLGFYpAcFopAEsMPWG+ssGCsFLocKC....MEcNITpWPuM.FCN.......copsshR..CPTsRLsLGoCslssYpssLPsYaQYFT.........sssLGG.SsFhDYCPallsau..........sGuCsQcsSsAsshhctFNVFS-AuRClDG..sFpPKsssu..hls.YsuLCANVpCDTAs+TYSVQVhGuoGYssCTPGtR.lcLuTVSsAFpcGGYITCPPYVEVCQuN .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t......t........sDh..hhhh.......................................hu..A.h..C..............................................+.....PhhG..h...h.....p.....h.......s..............t........h............................t................................h......t.....h......hh......HEhhHsL.........G.Fs................hh...t.............................................t.......................h...........t..t.....................................................................................h.hl......o.s.p......sh.t..h..s...+pha.........s.........C.....s.........h.........p.........t........hp.lE..s.........t..G...u...t....u....o....h.h.o.....Hhct.+...................h..sEhM.s.....s................................s.......ts.........h...h.........ot...h...T...hAh.h...p.....Dh.....G...a.Y.p..s..s....h..s....h..s..p....h....................WGpt...uCt...h..h...t.t.C........................................................as......................................C....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 425 463 569 +3759 PF01752 Peptidase_M9 Collagenase Bateman A anon SWISS-PROT Family This family of enzymes break down collagens. 22.10 22.10 24.70 27.80 19.40 17.40 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.98 0.70 -5.51 7 608 2012-10-03 04:41:15 2003-04-07 12:59:11 12 12 306 3 82 554 4 282.40 35 35.67 CHANGED hpplhtth.hG....uppDplWLuss-hhpYYA..shpt.s..lsttpt-LAt+lhP.pa.CpssuhIcupphosupsApuCclhtsK-ttFHpshpsspsPVtDDtssplcVslFsssssYhpY.hah.......FspsTsNGG.YLEGNPuchsN.spFlAYchtphs.Dl.lhNLpHEYsHYLDuRFspYGoFscshtcuphlWW.EGhAEYhHYpQG...hpAAhphhspG.chsLSslhsTT...a......SpDosRIYRWGYLAVRaMhE.pHspDspohLshoR.G.pas.au.tsthhs.hYss-athW ........................................ppl.phs.ls....ssp.cs...hWLhss.ulaYsuph..uphc.s.shs..thhp.Ah+hhPhhtpp.hhsAh.phps...hsupstsu.sslshpc.cc.t+pthh.s+s......shD.Dsshsl+sushsoc-ch++L.haA......pFt.psssNst.h.cGNPsDlh...shshasu.-..c..a..p.h....N....c.....hhh..scphshYl-u.h...spahoacR.T.s.c.p............S...............h...............hsh.p.hhtEas..HYhQG....pht...ssu.lh.....up.....G....c....h....h....s-..hhshh.pt.......usc.osplh.ht.lh..sth...-.....t....p...p.ssphhhh.u+h.G......pa-hYshths.h.u..hYsppapha................................... 0 22 36 62 +3760 PF00768 Peptidase_S11 D-alanyl-D-alanine carboxypeptidase Bateman A anon Pfam-B_864 (release 2.1) Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.57 0.70 -5.10 12 8726 2012-10-02 21:13:33 2003-04-07 12:59:11 15 33 3497 44 1522 7430 2600 235.10 31 61.21 CHANGED sspssstP.........plsApuullhDhsoG+lLYppsscphhs.ASlTKlMTuhlVhcAhpttplc.sshVslupssathst....tsSphhLc.spploV+-LlpulhlsSuNcAslALA-hluGup......csFVchMNstAcpLGl+NT+F.sspGLsscs...............hSoApDhAlluptll+t.h.pphphspcpphsFp..........phshhNpNtLlhpps..lDGhKTGaTstAGasLVuoAscs.shRlIuVVhsAps .........................................................h............pl.sA.p...u.h.l.l.h.Dh...s...oG.....c...l....L....h........pp.....N....s.c.p.thsPASlTKlMT.uhllhcul.....p.t.....t.......p......l..................s..h......s...s...h..V..s....l.o.p...p.u...h.t.t.s...................sS...ph.h.l.c....s...G.p.p.lolc-LlpuhllpSuNDAulALA....-....t....l....u.....G..o.p.....................................p..sF.l..ph.M.Np.p.A.c.p.LG.h..p....s.....T.+.F.h....s........s..s........G.L.....s....s..s..s.t....................................hooA.cDhAllu.pthl.....c.....p......h....s...p...........h....s....h.....t...p....p..p.p...h...s...a.s..............................shp.h.h.....N......p......N.....t.........L...l..........h............p......t.......s.............................s................l....D....G...h.....KTG...a.T.stAG..as..L.lu......o.......A......p.......c.......s.......s.......h......R..l..I.s.VVhsu..s.................................................................................................. 1 486 949 1228 +3761 PF02113 Peptidase_S13 D-Ala-D-Ala carboxypeptidase 3 (S13) family Mian N, Bateman A anon IPR000667 Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.31 0.70 -5.98 9 2907 2012-10-02 21:13:33 2003-04-07 12:59:11 10 7 1969 75 742 2721 1248 317.50 25 88.36 CHANGED LstslsshltD.PpL.Gu.sGlhlpcssoup.lapapusp.hlPASstKLlTAsAALhsLGssacFoTcVhssG....tshpGsLhlhGuGDPTLssps............lsslscp....L+cuGVpsl.phslhlDsSlFsu.shusuW..sD.s.sasuP.sushl......DsGph.splss..t.Gp....p.ss...h...t......ttusts.........hs.pphhlpGsls.t...h..shsVpssuthAuchhpcpLtttGlphsuslshssss...puupsLAstpSsPL.clLppMhKpSDNhhAEslh+tluhsh.ptPuoapsussuVpptL.sphGlDssshhLtDGSGLSRpshloucTlsplLpshs..ppsshpshlssLPlAGts.....GTLpsRht....spsssGhl+AKTGoLouV.uLuGa..........lpspsGchlsFuhl.N...hsstsspshcsshsthtspl .....................................................................................................t.....huh.l.p.ht.....s..s....t....h..h...t.h..p..sp....h..PASs.Klh.Tsh..A.Ah.........L....s.s..s..ap..h....p....Tp.l...........h.................p.s.....................................tu..........sl...hh.h.hsuDP..h..htt...tp....................................h.....t..h.h.tt...................l.p......t.....t........G...l...p.....p.....l...............p.l..h...h...Dt..oh....a.........t...........t.....h..s.......s...........h...........t..s..............................s........tsh.h..............s.shh......................................................................................................................................................................................................................................................................................................................................p....t...h.hthh.t.ht.t....s....h.t....h...................s........t........h......h.......t...........t..s..................t...sp..........l...s...t....h..........p..S....s..l........p..llp.h.p.SsNhhA-tl..h....h..l.............u..................h.....................t..............................s...........s.......h........t......t..u.....t...s.........l......p.p.hl....t......p..h..G.l...s...h...s.......s.......h.......h..h..D.GS.....GL.....S....pt.......shlsst..hhph.Lthhh.....................pp.........................h..h...p...l.P.lu.Ghs...............................G.olp......R.ht.......t....h..tu.ltsKTG.o.............L......p.....s...........l..........................sluGh..........hh....s.t...s.G.p...h.hhs...h.st..............................h............................................................................... 0 232 506 664 +3762 PF02129 Peptidase_S15 X-Pro dipeptidyl-peptidase (S15 family) Mian N, Bateman A anon IPR000383 & Pfam-B_2704 (Release 7.5) Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.90 0.70 -4.98 48 2545 2012-10-03 11:45:05 2003-04-07 12:59:11 13 23 1647 54 719 8025 2907 265.50 22 43.65 CHANGED DG..scLtsclhpP......psstslPsllptoPYt..tpp.hssshthtpst.....................................................asscGYssVh.-sRGptsS-Ghhsst..........s.pEspDstssI-Wlss..............p.sWssG+VGhhGhSYtGhhshtsAs...............sss.uL+slsstsu.....hsshas..hhhpsGshttsshhsh........................sshttthhupthsstphhpthht................htthhp.hhtchcpp.......................psshssaWpspsh...p....ph...stlcssslhspGhtD.shh.psshphapsLpsss......................+LhlushsHst ...............................................................................................................................DG...shlts..slhp....P...........s.............ts.......tp.hP.....s..l..h..p..t..s....P..Yt........ps....h.h.p...p.h..s..h..h..t.h...........................................................................................................................................................as.s..c..G..Y..ss...lh...s..s.....s......R.....G......s......t........p........S....p......G..h.h.ssh.............................shp..E..h.....p..D..s..h...s.l..I...-......W.l.ss............................................p...s...W......s......s...G.....p.....V.G.h..h..G.hS.Y.h.G....h..h.p..h....t..s...A..s...............................p..s..s..s.....s...L....c..s.....l...l...s..h...su.........h.s..s...h......Y...p..........h...h...h...p...s......G...h......ht....s......s...h..h..h...h..................................................................t...h...h....t...h..h........s...p....t.........t......t..h...h.t..............................................................t...htt..ptt...............................................p......s...t..h....s...p....a..W......p...p...t.sh.....................h....................ppl...p..ssshhstG...h.tD.shh..t..tsh.ph..a..p.tlttt..............................phhh.t.htHh............................................................................................................................................................................. 0 221 468 626 +3763 PF00716 Peptidase_S21 Assemblin (Peptidase family S21) Bateman A anon Pfam-B_729 (release 2.1) Family \N 20.60 20.60 21.20 21.20 19.60 20.10 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.07 0.70 -5.10 11 192 2012-10-01 19:43:34 2003-04-07 12:59:11 12 2 89 47 0 212 0 280.20 30 49.47 CHANGED ELhLs.-sVpptLPsss........slPlNI-HpssssVGpVlulhssppGlFhlGllsssphhslLppsupsuhhuppss...shh..-thL.hLosaLPuLSLSS++....husspts...-sshFpHVALCulGRRhGTlAlYups.-hslstFscLSsup+-tlhp....sppssu.............hhtsshps.spsLLusAlcshal+-RhshLppc+phAGIt.scoYLpASssh..tsssp.pssspph........................shpsss...........shsssps...............spshssssPsu......................ssssusustsh......tDhlhlPtspatpLlsupttsts....ss.sssthhhPsssss.sshP .........pLhLs.-hVpthLssts.........slPlNlsHptss.VGtVhulhss..cG.FhlGhlsssphhpllppsuptshhsptss........................-.ll.hloshhPulSLSSh+................tstt.s......................stshFtHVuLCslGRRhGTlssYsts.-hslt.F.tpLo.sp+ttlht................stt.hs................ts.tsshps.shsLLusulsshhl+-Rhshlt.c+p.sGlt.tpoYlpASt......t.t.........tt...............................s.............................................................s.s..shhssh........................s..ssssssushs.s......t-hlalPtsta.pLls..upttst.s.....s..sts..h.P....................................................................................................................................... 0 0 0 0 +3764 PF00717 Peptidase_S24 Peptidase S24-like Bateman A, Finn RD anon Pfam-B_616 (release 2.1) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.42 0.72 -4.33 228 17337 2012-10-02 16:34:55 2003-04-07 12:59:11 18 67 5051 47 3901 12330 5963 72.80 24 32.79 CHANGED pVpGcSM....t..sslh.sGDhllVc...pts........................psppG-..lVlh..phssp.............shlKRlh..thsssthh..................hsslhlssp ..........................................................l.GsSM...............p.s.s......l.........h......s..........G.........D.h..l.l...V.c...........+ts..................................................................psppGD..lV..lh.........phssp.....................................shlKRlh......th....s...s...c...h...h.h....................t.l.h...................................................................................................... 0 1246 2442 3261 +3767 PF03572 Peptidase_S41 Peptidase_S41; Peptidase family S41 Griffiths-Jones SR, Finn RD anon MEROPS Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -11.05 0.71 -4.84 142 9366 2012-10-02 13:07:06 2003-04-07 12:59:11 13 89 5410 38 2078 8432 3390 171.70 27 34.34 CHANGED plGYl+lssF..t............tsspphpptlpcLpp......pshculllDLRsNs.......GGhlstulplsshals.......sssll.pp....sptsppp......................................................t.t.t....................hstPllVLlspsoASAoEIhA.uAlp-tpRu..hllGpp.TaGKGslQshhpL....ss............ssslp...lThu+ahsPsGps.lpth...GlpPDltl ..........................................................................................................plGYl+l.s.s.a..tt...........................................tsstt.lp.p..hh.pp.Lt...................sspul.l.lDL.RpNs.............G.G.h...l..s.t.s.s..t..l.s.s.hhhs........sssl..h....p..h......sph.hs.pt...........................................................................................................p.hs...sp.h.........................................................h.tsts.llVL.sstt.ou...Su....uEhlA..tsL..p......c......h.....p..RA......hl.l...G...cp...Th.....G.....t.....u..h.....s...p.p...h..h.pl.............ss................................s.h.h.lp.......ls.h.u.+..h...h..s...P...p.G..ts...hpth..........GlhPsl...................................................................................................................................................................... 0 802 1448 1814 +3768 PF03574 Peptidase_S48 Peptidase family S48 Griffiths-Jones SR anon MEROPS Family \N 21.50 21.50 22.20 31.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.86 0.71 -4.20 4 138 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 109 4 5 124 0 146.60 84 79.25 CHANGED YlEQGQNLRMTGHLHHIEPKRVKVIVEEVRQALTEGKLLKMLGSQEPRYLIQFPYVWLEpYPWpPGRsRIsGsSLTs-EKphIEsKLPusLPDApLINSFQFMELIEFLH+RSQEDLspE+RMsLSEALAEHIKRRLlYSGTVT+lDsP .YLEQGQNLRMTGHLHHLEPKRVKlIVEEVRQALTEGKLLKMLGSQEPRYLIQhPYVWhEKYPWpPGRSRlPGTSLToEEK+QIEpKLP.....sNLPDApLloSFEFLELIEFLHKRSQEDLPscHQMPLSEALAEHIKRRLLYSGTVTRIDSP. 0 0 4 5 +3769 PF03575 Peptidase_S51 Peptidase family S51 Griffiths-Jones SR anon MEROPS Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.95 0.71 -4.48 24 2034 2012-10-03 00:28:14 2003-04-07 12:59:11 12 9 1732 6 487 2189 1077 152.20 25 62.84 CHANGED Yhpphtpshpp.LG......ht.lssLchst........-hpstlppsDhlaVGGGNTFpLLptlpcpsLsphlpctl.ppG.hsYhGhSAGu......hl.....s.s.oIpsss...hs.h....p..sa...p.u......LsLlsatls.PHa...sstp.t.....E...shtpplppa...psh.slluls-GsAlhlpscp .........................................................hhtsht..lG......hp....lph.lc.hhs...............c..ht.tl...p...pA..-....hlhluG.G.N.oh.p....Ll.....pph......c.....c.....p.....s.....lhp......hlp...c.hl....p......pG...slhh..Gh.SAGA.......................l..................h..ss...s.l.t.s.s.s......p.h.s.s...................p.tsh.........s...u.....................................L.sL..h.s.....h.....t......ls..PHa.....sst..p.............p...shppclpph............t.s...p.......h..s.h.luls-s.sslhlptt.p.............................................................................................. 0 177 325 427 +3770 PF03576 Peptidase_S58 Peptidase_T4; Peptidase family S58 Griffiths-Jones SR anon MEROPS Family \N 22.20 22.20 22.70 23.00 21.10 22.10 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.01 0.70 -5.25 108 1255 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 969 47 448 1181 478 314.80 32 91.85 CHANGED hNuITDVsGVpVGHsTl.ttt......................sl+TGVTsIlP+.ts..ppsssAusaVhNGsGc.spGhshlcEhGhlps..PIsLTNThulGh.sssullphhhst..s..........hshshPlVsEs.DuhLNDItuttlp..........tpcshpAlsu.A..................tsss.......hspGuVGAGTGMhshshKGGlGoASRllst...........uaTVGALVtuNa.Gph.....tphhl.uGh.lG.pchushs.s.........................................................................tpGSlIsllATDAPLsspQhpRLApRAtsGlARsG..ustssuSGDlslAFSTusps.tstst................l.pt...ls.LapAAupusEcAIhsulhsAps.hsG...t..pscth.ul .........................................................................................h.ssITDVsGlpVGHtoh..p........................thtTGlTVlls.....................ppsssAulcVhsGsst.optsshlc.hshlpp...sllLosspuhGh.uusGlhchhhcp.....sh......t.hpssssssPlVst...u.hL.....Dlt.stssp..................................tphuhtA.hps.A.........................t.s.s..........hs..pGs.VGA.Gs.....Gh.......s.......s.........h.....s.......h................K.......G.......G.....l......G.o...A.....Sth.lss......................shsVGALVhsNh..Gsh....................tthhh...ssh.....su......t..h..tt.h......s..................................................................................................ttsstsosIsll.ATDAsLsptQhpRlAttApsGlARsh..ssas.hsGDhhhAhoTupths...t.............................................lsslhtAAA-shpcAllpulhtAps.h.u.........h............................................ 0 134 269 374 +3771 PF00082 Peptidase_S8 subtilase; Subtilase family Eddy SR, Sonnhammer ELL anon Overington Domain Subtilases are a family of serine proteases. They appear to have independently and convergently evolved an Asp/Ser/His catalytic triad, like that found in the trypsin serine proteases (see Pfam:PF00089). Structure is an alpha/beta fold containing a 7-stranded parallel beta sheet, order 2314567. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null --hand HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.35 0.70 -5.26 63 15113 2010-07-03 07:17:22 2003-04-07 12:59:11 17 580 3631 324 6420 14631 3376 299.50 19 44.44 CHANGED slullD.oGlp.....ssHs-lp......................................sph........t...ua.........................................................................................shssss.......................p.ttstss............................HGT+sAGhluuss...tss.s..hGVAhsuplh...............ul+l...lssst..ssss......................hhpulphss.................pph.clhshSh.Gs............................t.....psshhptsh.........p......stsspGtlhVhAuGN......ssssss.....................sssPuhh........psh..loVGuss......................................................................................................................................pp.................s.....ph....s.aSsh.........s............sss........clsusGs..sh..........ssh...sssp..................tshtthsGTSh.........AuPtsuGhhAllhp....t.Pph...............ospplpt.hllpoAp.hs...........................hsphh......GaGl..lshtpslp ................................................................................................................................................................................................................................................................................lsllD.oG....l...............p.p......p.ht............................................................................................................th..................................t...h....................................................................................................................................................................................................................................................................................................................................................................................s.h....ttt....................................................s.t.ts............................................................................HGT..ps.A.u......h.......l.....uu..........t.................t................................................................t........................h..................G........l........A............p.....u...p.lh..........................................sh.+h............hssts.....sptts.....................................................lht.u.lphsh.................................................................pps.sp..l..l..s...h...Sh...Gs....................................................................................................................t..t...h.....h....p..t...sh........................p.................ts.h..p..p..G.....h..h.h..l..s..A......A..GN..........................s.u....s.s.st..........................................................................................................................................hs...P..uth................sss.......lsV..uu...s..s...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pp...............................................s..............th...............utaSsh.............................u................................................................s.h.............................-..l...s....A.....P....Gs...sl......................hush.........ssst.....................................................................................................................................................ht.hhsG.TS..h...................................A.s.P..hl..u.....G.hs.A.Ll.hp.................t..s.ph..........................................s.st.t....l.....pt....h....lh..p..s.....u..t...............................................................................................u.G............t............................................................................................................................................................................................................................ 0 2244 4178 5504 +3772 PF00326 Peptidase_S9 Prolyl_oligopep; Prolyl oligopeptidase family Finn RD anon Prosite Family \N 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.29 0.70 -5.12 70 9052 2012-10-03 11:45:05 2003-04-07 12:59:11 16 114 3171 311 3615 20889 7745 202.90 21 30.52 CHANGED ssF......s..tphthh..............spGhlhshsshRGuuthGcpatcss.ptphtpsshsDhlsuscaL......hppt..assss+lulhGuSsGGhhsusshs.tpschF+sulutsshsDhlphhtts......hst.pa.caGss.p..stchYpthsshsshcshh..........thsshLlhpGhpDsRV.h.tcuh+hhstLp.........ppGss.hhhth.ssuGH...ustpsptph.cthtphhuFhhpthshp ..................................................................................................................................................................h..h......pp.G.hh.hhh..s....s.....h.....R....G.............u....s...........t.............h.......G...........p...t................a........h................p.......s...........h.......h.......t.........p.......h......t.......t........t......s.....h......p......D....h...h....s.....s...s...c..h..l.............................hp..p....s.....h....s...c...s....s.....+...l..ul.h...........G..t.S.h...G....G.....h...h.....s.........s............h..............s.............h.............s......t..............t..............s...........c..............h.................F................p...............u............s..........l............u......t..............s.........s...........l...........s.....D........h.....h.........t.........h....h....t.....t.................................hs.t.........p.....h.........p.........h.......s............s.........s......p..................s...p...h...a....p....t....h.......s........s..h...s.......h.c.p......................................hts.s..l......L...l...h..p..G..t...p..D....s...p.....V.......h....t.p...u....h...p..hh.s...t.Lp........................ptst..s...h..t.h.........h.........h.h.........s..t....s..H.............u.h....t....p...s....p...t...p....h.......p....h.......h.t.thh..t..ah.phh...h................................................................................................................................................................ 0 1280 2250 3034 +3773 PF03418 Peptidase_A25 Peptidase_U3; Peptidase_M63; Germination protease Bateman A anon MEROPS Family \N 19.70 19.70 19.80 19.70 19.40 18.80 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.11 0.70 -5.72 2 602 2012-10-01 20:58:29 2003-04-07 12:59:11 9 2 413 2 149 477 6 219.30 31 94.65 CHANGED AVEsK-hhhppQshPsp..EIKGhI.KE+-ctGlKIphV-lTcEGAEh.GKKcGRYlTLEu.GIREpDoEhQEthptVFAcEhshFlcsLNIscDASCLlVGLGNhsVTPDALGPhAV-NLLlTRHLFcLQPEsVQ-GaRPVSAhsPGVMGhTGIETSDIIhGVlcpspPDFlIAIDALAARulERVNsTIQISDoGIHPGSGVGNKRK-lSh-TLGlPVIAIGlPTVVDAVoIsSDTlDaILKHFGREMK-p.+PS+SLlPuGMTFGcKKhLTEDDLPspcQRQoaLGhlGTL.--EKRpLIHEVLuPLGHNLMVTPKEVD.FI-DMANVlAsGLNsALHccVsQENhGuYsH ...................................................................................Gh....p..p.....t.....t..t..hhl.s...lp.l.......p...s.t.p.....h.sK...GpYlTl-h..t....h.t..s.th.pph.t.hsp.ht.h..............................p.......ph...LllGLGNhplTsDuLGPhshpplhlTRHl.h.p......s..p.....h.....t..t.h.p...l.sultPGVhu.TGhEos-ll.ullpphpPchllslDALAuRphpRlspoIQlssoGIpPGuGlGNpR.tlspcslGlsVlulGlPTVlpAsslsp-shp..h.................................................................................................................................................................. 2 82 127 135 +3774 PF01136 Peptidase_U32 Peptidase family U32 Finn RD, Bateman A anon Prosite Family \N 21.30 21.30 21.30 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.49 0.70 -5.43 166 7851 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 3207 0 1379 4878 390 231.70 27 57.79 CHANGED lppltchlp..h.h.......................phtsDu...lIlsDhGllphhcct.splslHsSsphsltNhpslphhpch...GhpRlVLuREL...olcclpp...ltp..ps...........s...lElElFlHGul...........CluaSG+ChhSph.htsps...................sN.+GpCs.psCRh.sat....................................hhppt.pstp..................................ahhSspD.l..shlpplscLhcsG.lsulKIEGRh+s..tYls.plspsYRpslDsh.tt.............t.t..phhppLpph..hpR.......shssGahhspsst ........................................................h...h.chlc..l.h.......................p.hGsDA....lIhu...D.Gl......lt.....h....s.......p....c.......p......h......P......p...l........t......l..HhS..sQssss..Nhtslc.aa.pch...G..........h.p...RlVLuREL.....ohcpltp...............lpp....ps.......................s.h..-lElFla....Guh............Clu..aS..GRChlSsa..hst.+s...................sN...pGs..C..s..psCRh...pap....................................................................lh-pt...pst..c.t.....hsh....t.....p..Gp.........................................................ahhsspD....l....shlpplscL.hc.t.G..lcSlKIE..GR.h+s..tYls.plspsYRpAlDthhts..........................p..thhpp.ltph........hpR.......shssuah.tp...p............................................................................................................... 0 454 880 1149 +3775 PF03577 Peptidase_C69 Peptidase_U34; Peptidase family C69 Griffiths-Jones SR anon MEROPS Family \N 20.70 20.70 21.10 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.26 0.70 -5.60 13 1802 2012-10-03 21:14:07 2003-04-07 12:59:11 10 11 864 0 402 1558 66 354.50 29 76.08 CHANGED ACTTlLVGKpAShDGSThIARsED....tsushsPK+alVlps...ccQP+c.Y+Slhoshph...cLP-sPhpYTSsPsu....sspc.GIWuEAGlNpsNVAMSATETITsNpRlLGsDPhV..................psGIGEEDhlTllLPYlpSAREGVcRLGpllEcYGTY.EuNGlAFSDpcEIWaLETlG......GHHWlAtRlPDDsYsssPNQhsIDcFDhsDs....-sYhsSsDLc-FlcppHLs.shp......................tcFNhRcAFG.opscKDppYNsPRsWhhQ+aLs........P-hc..psPcspclPahp+...PpRKlolEDlKalLSsHYpsT.....saDPYGs..pGstpsccta.........R......PIGlNRoppsalLQlRsslPptluGVpWLuaGsssFsshVPFYssVscTPstap..cTsschoss..shYWts+hlAsLuDscYptassslcsa.cp ..........................................................................................................................uCTsllVGKpAohDGSshluRs-D.................t.h.sp.phhhh.s......t.p..t..t.t......h......h....s...................s....t..h.....ph..........l.P.....t.....p...s...h.....p....Y.ss....h.ssh...........................t.p...Gh..h....s..p....s..G..hNp....t....s..V..u....h....o....u....T.co..h.h.s.Npp.s.h.u.h..D....P.h.l.......................psGl.sE.ps......hhslsLPhhcoAREGVphlGpLl-c..Y...G..................s...........h.....E..............u.............N........u.............l.......h............huD.p.s.E.lW.a.h.Ehhu..............GH......p......WsAtR...l.PD...DsY.ssh.sNphtI...p.p.h........D...h...s....c.....................pshhhSsslhpasccpth...s..sh.p.......................................................t.F.shppsau.....s....t....s.......t...c.....th..Y...s...psRsWhh.......phhs...................Pphp..............p.s..p......s..t....phPhhhp.....s.p.+.plol.pDlt..hh.psH.apsT.........a..D.s.hs.................t......pt.a.....................................R................sIuh...pstpstlhQl..R..s...t...h...P.tt..hs.sl.WhuhG..sshsshlPaa....ss....h..p..p....h..s..ap...................t.s..s.t......p...h...s.p...............shaWh.phlsshs..pat.hh...pth...t.......................................................................................................................................... 0 123 214 291 +3776 PF03419 Peptidase_U4 Sporulation factor SpoIIGA Bateman A anon MEROPS Family \N 25.00 25.00 27.20 26.60 24.70 24.50 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.54 0.70 -5.34 34 409 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 402 0 90 356 5 271.70 27 96.09 CHANGED M...hlYlDllaL.NhlhshhLLhlTAthl+ppsphhRllhGuhlGuhhsllhhh.P......hhshhhphhhKllhShlhlhhuFuhcphth............hl+slhhFYhsoFlhuGuhhuhphhhptshhhpsthh.h........hsahhllluhsshahhh+thhchlpp+phptphlhcVplthsspphpl+uLlDTGNpLpDPlTptPVhlV-hstlcpl.hs.ph..........slpphtph....t....p+hRlIPY+ulGp.ppGhLhuhKPDplhI.pppchlpspcsllulspppLSspscYpullpPcll ............................lYhDlhhl.NhhhshhlLhhouhhh+t.phphhRllluAhlGuh.hslhhhh..P....................h.hs.h.h.h..p....hh...K.l..lhSllh.lhhuFshpsh+p....................ahpslhsFYhsoFhlGGshhuhphh..hps.st.h.h..shhh.............hs.hh..h.llhuhs..lh.a.hhh.c.thhc.l.c.p.p.p.hpts.lhclclpls.p..c.....p....lplpuLlDoGNpLhDPlTppPVhlhchss.lcph.hst.h...h........t.........ph...p.h..p............hhp+l..RlIPa+uVGt.ppthLhul+PDplpl.pps.p.p.hhhpcsllulssp.pLSspscYpsllpPphl......................................... 0 47 75 80 +3777 PF03411 Peptidase_M74 Peptidase_U6; Penicillin-insensitive murein endopeptidase Bateman A anon Bateman A Family \N 20.30 20.30 20.40 20.80 20.20 19.60 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.57 0.70 -5.14 3 916 2012-10-02 01:02:30 2003-04-07 12:59:11 8 8 890 8 146 530 97 237.60 59 82.86 CHANGED QSIGSYoNGCIlGAsALPscGEsYQVMRhsRNRYaGHPcMItaLERLSpcAussGhPTlLVGDIuMPuGGRFLTGHASHQsGLDADIWLp.MPKpRaTsApppcspALslVcRDup+VDs+lWsPs+soLIKLAAQDP-VTRIFVNPAIKpQLCpTAGsDRuWL+KVRPWaGHpuHFHVRLTCPADSsECEsQPhVPuGDGCGcELtSWF..EPPKPG..ToKPcKKssP....PLPhuCQAlLNuPsh ..........................QSIGSauNGCIlGAssLPl............pu-s...Y...QVMR..o..cpcR..YaGHPDLlh.F.I.QR.L..Sp......p.s.p..p.h..G.h.G.T.l.L.IGDMuMPuGGRF..suGHASHQoGLDVD.I.aLp......LP.K..p....R.a.o....s......A....Q..L...h.....c.P...p.A..lc.L......V.s......+......D......G....+....+V..l.s.shWpsphh.u......LIKLAApDp..-VT..RIFVNPAIKpQL...C.h...-.....A.......G.s.D.....R..........s.....WL....RK.........V........R.P..W..F....t.................H+uHMHVRL+CP.As.Sh.ECE-QshP.....Ps.....GDGC.G.A.E...L...pSWF...............-P....P.K..Pu......os.K..P....c.K.K.s.P..P................PLPPuCQALLcp.s.h............................................ 1 31 69 105 +3778 PF01343 Peptidase_S49 Peptidase_U7; Peptidase family S49 Bateman A anon Pfam-B_707 (release 2.1) Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.46 0.71 -4.44 20 6794 2012-10-02 13:07:06 2003-04-07 12:59:11 13 22 2890 24 1513 5321 2903 149.70 26 46.31 CHANGED pthcptKPVlshssshuASGuYalAosA-+IlusssullGSIGVhhphsshpshlcKlGlshpslpsGpaKsss..ohhcshos-t+phhQphl-psaphFlppVAcsRslsssplcplApG..clapGppAhcsGLVDElGsh--Alsphtp.hspls ...............................................t..tptsKPlh.ssh....s....s...hu....A....S....G....u....Y..hlA.s.s.A....s...c...I.h.s..s..P..s..u..l..l.G..S....I..GVh.s.t......h.....s....h.p.p............h.L.c.+.h.........G............l.chc.........hhp...s...G..p..aKssh...........shhpshos.c...s.+..p.thp.p.h.l..sp.ha.ptFlp.hV....u....p...sR..............t......h....s.........h........p.......p.......l........p........p........l...A...pG.........cha.......p......G.......p......p.......A......h......p...h......GLlD......pl...u.s.h.cc.s.l.tthtp.......................................................... 1 508 959 1257 +3779 PF03420 Peptidase_U9 Prohead core protein protease Bateman A anon MEROPS Family \N 22.00 22.00 22.10 39.20 20.00 21.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.40 0.70 -5.07 2 91 2012-10-01 19:43:34 2003-04-07 12:59:11 8 1 86 0 6 81 1485 196.80 38 87.65 CHANGED .E.QLLIEsWG.su.hhsts...PhlEu+.st-hGhc..LYIEGIFMQupVVNRNtRhYPK+lhEpAVpcYIpEQVhTpQALGELNHPsRuNVDPhpAAIhIp-hWWcGssVhG+AhVlpss+..Gphltu.IcuGWlPGVSSRGLGSlpps.-Ghp.Vp-.F+LTVGVDsVWGPSAPsAaVpP..lTES.......pTtEhspSsDstahtLAEshKphL ................................t...............................pppG.tK..ha.IEGIFhQu-lhNRNtRhYP.+plLp+sVsc.Y.pc..lp..sppALGELsHP...s.......ts.....s.........ls..psuhhIpcLhh..c..Gss...shG+A+l.l.-ssp..GchltuLlcu.Ghh.GVSSRGhGolp.cp..cGhslVp-sFhLssusDlVhsPSAPDAaVps..IhEu..p......................................................h..................................................... 0 3 5 6 +3780 PF03036 Perilipin perilipin; Perilipin family Griffiths-Jones SR anon Pfam-B_1154 (release 6.4) Family The perilipin family includes lipid droplet-associated protein (perilipin) and adipose differentiation-related protein (adipophilin). 28.20 28.20 28.60 28.50 27.70 28.10 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.30 0.70 -5.63 17 474 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 106 1 237 475 0 278.60 26 73.43 CHANGED husstps...QpsVVsRVssLPLVSSThshVpssYsuTK-sHPhl+SVC-hAE+GVpolsosAhsuApPllp+LEPQIusuNchAC+GLD+lEcpLPlLppPs-plhups+th...........l.tsVsuAK-oVsp.loussshshuuspsus-hT+shhstsh..VhuoRhsphsosuVDssLspSEclVDpaLP.o.-cELts.upp........................scu.-sssh....t.psuYaVRLGuLSs+lRcRshppols+l+pu+ppsQEtltQLppshsLlc.sppshp...pphhtt.tphh.hWhphppst.pst..............tp.pplEtcslslsRslT.............ppLQosshslsuSlpGLPsslp-pstplpptsttl.tshtshtshpc...ls.thLspu+tpltphptsLDplh-hllsNs ...................................................................................h..........p.pslpRlhplPlVpushphhppsY..ps...Kpp.s.h.lt.shphsEps...........l.......sh..A...h....t.s.Pl...l.p....p....lp.s....l...sh.ssphss+GLD+lEpplP..h..l..p..P....sp....p.........l......htp...h+th..................h..t.l...ps.......stssls..............hst...t..........th..........th............hh..t..s...h..h..phh.......t..sh-.s.....h.s...hs-.h..l..-.hl.P.s...pt-.t..st.p.....................................................................ps.p................phh.+lupLutphpp+shppshtplpth.p...t...pphh..l..sht.L.ht......................................................t................................p......h.tt............................tpl.thh..h....ht.hP...th....t.h.th..................h..........h......h..h.t.h................................................................................................................................................................................. 0 54 73 152 +3781 PF01497 Peripla_BP_2 Periplasmic binding protein Bashton M, Bateman A anon Pfam-B_461 (release 4.0) Domain This family includes bacterial periplasmic binding proteins. Several of which are involved in iron transport. 27.50 27.50 27.50 27.50 27.30 27.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.16 0.70 -11.13 0.70 -5.24 33 15783 2012-10-03 15:23:08 2003-04-07 12:59:11 13 38 3934 62 3407 11094 1155 233.80 16 72.71 CHANGED lsshshssspllhtL...Ghtsplsussstspt.htttt...............sh.........spssstpphsh...................EplhslcPDlllssphh..........sph.chhptthsslshsssp.....................pshhctlptluphhspp..ccAcphhpphppplspsppphssh....ptpshhlhhhttt....thhhhhussshhsplhct.huhpsh.hstt.......tpttt.luhEplhs..hssDhllhtsp.....................pppshctlhps....sthppl.sAl+ss+lhthssph ................................................................................................................................hsht.t.h.chhhtl...............s.h..t...s.........h....s....u.......h....s....t............s...p.........................................................ph.............................................p..l....u...s...h....t...p...s..sh.........................................................Et.l......h..s.......l......c........P..D.....L.l...l...s.sstt.......................tt.th.....p..h.....h..p.....t....h....h......P...s...l....h...h...s..h.s.p..................................psht.p.p...l.p....p....l.u....p.hh...spp............c.c..A..c...p.....h....l....p....p....h...c......p..p...l...s...p..h....p...p.phtst...............ttp.s.h.h...h..h...h..h.tss.............................t..h.h..s...h....s...s..p.....s.....h...h...s...p..l..l..pt....hG.....h...p.....s..s...hss...............................ttsh.h..p...l..o..h...E..p...l...hp........hs....P..D.h..l..h.lhst............................t.t..t.t...h...p...t....l....h...p.s...................s.h...h...p.ph..sAl.+ssclh.h....t..................................................................................... 0 1003 2187 2887 +3782 PF00532 Peripla_BP_1 periplasmic_binding_like; Periplasmic binding proteins and sugar binding domain of LacI family Bateman A, Griffiths-Jones SR anon MRC-LMB Genome group Domain This family includes the periplasmic binding proteins, and the LacI family transcriptional regulators. The periplasmic binding proteins are the primary receptors for chemotaxis and transport of many sugar based solutes. The LacI family of proteins consist of transcriptional regulators related to the lac repressor. In this case, generally the sugar binding domain binds a sugar which changes the DNA binding activity of the repressor domain (Pfam:PF00356). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.68 0.70 -5.20 6 5137 2012-10-02 13:57:41 2003-04-07 12:59:11 16 10 2047 27 638 25439 2217 255.60 21 79.54 CHANGED shslGhllsps..csPa....ahclstulscAuppaGhslhLlssspps-tt...pthc.LtsptsDGllIs.ohssc..scIpthtct.shPVItsscshs.s......sVPssh.D.shpAut.psspaLlptGHcp...l...slhstssSsh.sstcRspGahsAltssGh....h+phplhpssschpsutpAlpphhpps..Pshp..AllshNDpsAhGuhhsh.tpGh.lcs.psVs..h.ulhuasuL........spsshl..s.lss.psstp.lGhpsu-hlhp.l........tp-pscslhIs.....thhhtcs ................................................................................................................................................s..tlGll.l.P....sl........pss.....a.............a.s...p.....l...h.p.u....l...p....p.....s........s....p.....p.....t.......G..........a.....p.....l.........h.......l.........h........p..............s............s.....t.....p......t....c.tt.............pt.l....c....p....l.......h.......s.......p.......t...l...D....G...l....l......l.......s......s........t.......s...............p................s...........p..........l......h.....p........h.......h.....p........t........s......h..P....l.......l...h...h..s....c..t.......t..s.s......................................s..l..s.....h....l.....h..........D......s.........h....p.........u.........u......h......p....h....s.....p........t.......L.......l.......p.........p......G.....p....c..+.............l......................u.h...l.....s.....u.......s........t........s...........t............h......s............s..........t.............c.........R......h......t............G............a.........p.......p.......A.......l.......p......p.....t.......uh..................h....p....................h..........h...........h........t.........s........s..............p............s..............h...............p.........t.............u..........h.......p........h.......h.......p....p..........h......h......p.............pt..................P...p.........hp..........ulh..s..h...s..-...p.h..A.h.G...s...h......h.....t.t...G..........h..p......h..s..........t..t.......l..............................s..h......u...h....p.....s.....h.........................t.....h.............s........l.....s............p..........................G...........s.s...p...hhh.t...l..........................................................t........................................................................................................................................................................ 1 130 282 466 +3783 PF00141 peroxidase Peroxidase Bateman A, Sonnhammer ELL, Studholme DJ anon Prosite; PfamB-105, Release 14.0; Family \N 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.92 0.70 -4.87 162 7995 2009-01-15 18:05:59 2003-04-07 12:59:11 18 39 2016 398 2881 7388 3647 260.20 25 77.65 CHANGED Vcphlpphhpp........psshus...sl.........lRLhFHD.........................................Chs...............t.GCDuSl.ll.......htsEcs.sss.Nt...uLc..uhc..................ll.....-slKspl-p..tC.st..h...VSCADll.................sLAucsulth...................s............GGP..hh.sl.hGRcDupsupt.tts...tl...........................................................P.................ss.t.shsp.lhptF.sphG.L.ssp-hVsL.u......................................GuHT..lGts+.Ct.h...........+h..................ths.thh...t.......C.......s....................h.h.D....os.................spFDNsYapsLhs....................................ptshhpoDtsLh....sss......pspshVppaAss .................................................................................................................h....htthht...............tphus....hh..........lRhh.aHD................................................shs.........................................uGss..uuh.hh..............hss.pps.hss.Nt....sLc...uhc..................ll.....psl...K.pphsp.......t.................lShADl.l.........................sLAussulpt..........................................................s................Ghs...sh..shs.sG.RtDuhpspt..sth...t.l...................................................................P...................ss..hssspp...l...h.s.p...F...tp.hu.....h.s.s..p.Eh.V.AL..u................................................................Gu.H.o....lGt.sa.ss...........p............................t..th......t......................tspt..............hs.h..c.......sP...................................................................stasNs.aFtsLhs..........................................................................................................................ph..shh..poDhsLh..p..ss......th.c..tlschasp................................................................................................................................................ 0 552 1714 2399 +3784 PF01328 Peroxidase_2 Peroxidase, family 2 Finn RD, Bateman A anon Sarah Teichmann Family The peroxidases in this family do not have similarity to other peroxidases. 21.00 21.00 21.10 23.70 20.10 20.40 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.05 0.70 -5.76 2 357 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 84 13 308 383 0 183.60 23 63.07 CHANGED MFupll.hsuslssh.....tcs........psshhsassPGPsDsRuPCPAhNuLANHGalPHDG+uIoh.sl.sAh.sthsluss.hthAlssAhlls...stushhso.hNLs.LsE.ph.hEHDtShSRtDYhpussp.....DshsFstphFpp.Lshhsu.p.hshsshsth+htR.phppEhD..h.aotp+.l.sh.EouhhhuhluD...Psps....scl-Wh+haFppEphPY+.GW+.sSstppl..lsuh.utllhAs.....ssLPpG.ltssApsVsluFuu.hsPhhhshN........plhs ................................................sth................................................................a..ss..t..s......s...D........RuPCPuL.NsLANHG.........al.P..+s..Gt.s.l...o...ht...pl...h..p.....u........h..tp..s......hs.h....u..........s........................h......s....h...h.h...........h...h...............h.....h......................................................................................................t............h.........h.......s....L.......s....t........L..........s.t.......H.......sh..hEtDsSlsRtDhh........................................hht..............ht.............t....................s..............h...s.....h......p........t....................................................................................................................................................................................................................hhhhh..................................................................................................................................................................... 1 104 191 267 +3785 PF04088 Peroxin-13_N Peroxin 13, N-terminal region Wood V, Finn RD anon Pfam-B_8055 (release 7.3); Family Both termini of the Peroxin-13 are oriented to the cytosol. Peroxin-13 is required for peroxisomal association of peroxin-14 [1]. 20.90 20.90 21.10 22.70 20.70 20.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.01 0.71 -4.27 29 243 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 221 0 175 247 0 154.00 37 39.23 CHANGED GYG.........Gh.G..ts.s..shht..hspSTpATFQlIESllGAhGGFAQMLESTYMATHsSFFsMloVAEQFupLKssLGShLGIFAlh+al+plhtKlssthh...............tspuhssspF.ppFpspt....ps.ptt..........ts+.ShKPllhFluAlhGhPYLlsKllp .........................hhsshYs.......uhh..s...s..s.ss.ss..phs.pp.hppSopusFQhIESIVsAFuuhApMLESTahAsaSSFh.........AhluVA-pFupL+spLuslhuhFsllRhl+plht+lpthhh..............hpssu.hs..stF..spapss.....t....s..t...........sss+sSphPlhhFlss.lhGhPYLhhKll.t...................................... 0 50 87 141 +3786 PF04882 Peroxin-3 Peroxin-3 Mifsud W anon Pfam-B_6513 (release 7.6) Family Peroxin-3 is a peroxisomal protein. It is thought to be involve in membrane vesicle assembly prior to the translocation of matrix proteins [1]. 26.70 26.70 28.40 27.20 26.60 26.60 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.32 0.70 -5.77 32 408 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 258 4 274 399 2 283.60 23 86.51 CHANGED hsuhtsahpR+++KlhlsuullGusYl...lspashpK.lp-hpp+hsp-hhs+E..................pl+p+FcQsQp-CshTlLuL.LPslspslhc.tLssEplsppLppp+.........................................................................................p.............t....................................................................scsKhpLWs-LKIpolTRhlTllYolohLhlhhRlQLNlLGR+pYL-...........o...........................................................shthutsppttppsths................................................hpspppYLSh..SWWLLs+.GahplhshlcpsVpcsFssls.+ppLolschppllhplpptlpt........s........sspp..........hlshLlP...t....hl.po...................s.hss.s.....pssspLpcLlsETpcll..-SsshspVlpthlspuFsplh-plt.tphs.pt.s.s...........................tthsshsphp..............hpLAplLshlscQsptlssss.......................................sNpalpslpp.lccLcsFuAsVYos.F ........................................................................................................h.pppt.hhh.........shhssh...hh......h...h.h.t+......h.....p......p.....p.........tphh.pt..................ph..p.+..Ftp.pp.s.................s..h.h..h.h..ht..l.t.....hs.p.lh..lp............................................................................................................................................................................................................................................................................................................................................................................tp+hplWpplcl.shs+.hs.hashshL.lhh+lQlslluth.Yh.p..................................................................................................................................................................................................................t............................................................................pptaL.s.....ahhtp..Gh.th.p.hpt.l...t...ht.....hp.............p..p.....hsh.phtphh.tl...h..............................................h..hhhs......................................................................................h.thhtEh.thl..ps..h..lht...p..h.hhhpth..........................................................................................................h.hs.hh..ht........hhtt.........................................s.hl..t......h......ht.h...lht.................................................................................................................... 0 85 138 210 +3787 PF03212 Pertactin Pertactin Mifsud W anon Pfam-B_2005 (release 6.5) Family \N 21.30 21.30 21.40 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.50 0.71 -4.39 8 2661 2012-10-02 14:50:22 2003-04-07 12:59:11 9 35 708 7 134 1809 3 114.80 27 13.74 CHANGED VssLpLss.GsVsF.......usPAsssGpFpTL.slpoLSGoGsFhMNssls......supuDhLsVsssAoGpa+lhV+NoGsEPsSuss.LsLVcTs.GGsAuFTLuNtGGtVDlGTacYsLsss.....tNssWsLps ........................................................tl.hst.upl.h.......................p....s...t................a...sL..ss..s..p..L.s.G...s..G..s.hh.h..po..s..ls......ss..t..s..DpLsV...p.G...s...s.oG.s.hplt.V.ss...s...G...s.p.......s.s.......s.......s...s....t......lpllp.....ss.....s.....u.....s.....u........s......Fs......h......s......s.......................t....h....V...s......hGsYpYpLhps............s...tsW.Lh.s.................................. 0 32 55 98 +3788 PF02917 Pertussis_S1 Pertussis toxin, subunit 1 Griffiths-Jones SR anon Structural domain Domain \N 20.90 20.90 21.30 21.30 20.60 20.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.55 0.70 -4.79 2 121 2012-10-01 23:25:29 2003-04-07 12:59:11 9 5 104 6 16 77 1 179.80 46 59.63 CHANGED sPsthVYRhD.RsPE-lFppGFoshGsscNhh-Hlh....GRSh.luoSposhsuhp..tpah.Ehh.EH.hpthl.t.RAsp...HFhshhhpscs..shhtttpshF-.sDp.hsphGhhhhts.hsYQpEahsct.IsstNlR..othhhsulssEsspscassuR.VsppTRhN.Pp.ass+hp.hph.us...hhPs.Ghshshphp.spAhushs.ptGpuhsLs.ats.saS ...s..sDFVYRVDSpP.P.-lIFR...D.G....FohhG..h..NR...N..hQQaIS...........GR..SC...u...uGS..S...D.SpaIATT.So.....h......s.o.Ysh....t+uh.a.uR.ush.pG..plYRYQIRADNNFYShhsSl..s..YL-o.pGu..phs..th..p+sh...hph..Q...pEYlushsIhPENIpcAssl.laDu..sTG..s.p.s.s.p.h..NupYlshsTpSN..P...........................................................slh.hl...pthppphhshhh..hsth..ht....................................................................... 1 3 10 13 +3789 PF02918 Pertussis_S2S3 Pertussis toxin, subunit 2 and 3, C-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.90 21.90 23.50 27.60 21.60 21.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.42 0.72 -4.32 3 72 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 64 12 5 26 0 109.00 71 70.55 CHANGED ThR+TGQPATD.HYYSNVTATRLLuSTNSRLCAVFVRDGQPVIGACsSPY-GR.YR-MYusLRRhLYhIYhuGLuVRVHVSKEEQYYDYEDATFpTYALTGISlCNPGuSl .....TGDKT.....NAYYSD.EVISELHVGQIDTSPYFC.......IKTVKANGSGTP..VV.ACAVSKQSI.WAPSFKELLDQARYFYSTGQSVRIHVQKNIWTYPLFVNTFSANALVGLSSCSATQC.F...................................... 0 3 3 4 +3790 PF02529 PetG Cytochrome B6-F complex subunit 5 Bsahton M, Bateman A anon Pfam-B_1348 (release 5.4) Family This family consists of cytochrome B6-F complex subunit 5 (PetG). The cytochrome bf complex found in green plants, eukaryotic algae and cyanobacteria, connects photosystem I to photosystem II in the electron transport chain, functioning as a plastoquinol:plastocyanin/cytochrome c6 oxidoreductase [1]. PetG or subunit 5 is associated with the bf complex and the absence of PetG affects either the assembly or stability of the cytochrome bf complex in Chlamydomonas reinhardtii [1]. 20.80 20.80 20.80 20.80 20.50 20.20 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.63 0.72 -4.43 17 647 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 629 9 42 207 66 35.10 81 95.05 CHANGED MlEPLLsGIVLGLlPlTlsGLFVsAYhQY+RGs.phsh .....MIEshL.GIVLGLIPITLAGLFVTAYLQYRRGDQLDl..... 0 13 32 40 +3791 PF05115 PetL Cytochrome B6-F complex subunit VI (PetL) Moxon SJ anon Pfam-B_6510 (release 7.7) Family This family consists of several Cytochrome B6-F complex subunit VI (PetL) proteins found in several plant species. PetL is one of the small subunits which make up The cytochrome b(6)f complex. PetL is strictly required neither for the accumulation nor for the function of cytochrome b6f; in its absence, however, the complex becomes unstable in vivo in aging cells and labile in vitro. It has been suggested that the N-terminus of the protein is likely to lie in the thylakoid lumen [1]. 21.70 21.70 22.80 22.70 20.80 20.70 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.14 0.72 -4.04 28 723 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 681 2 17 306 0 30.30 69 83.62 CHANGED MhTllSYhGhLhuuLshTlsLFlGLsK.IcLI ........M.TITSYFGFLLAALTITssL.FIGLsK.I+LI....... 0 8 12 14 +3792 PF03742 PetN PetN Finn RD anon Pfam-B_3260 (release 7.0) Family PetN is a small hydrophobic protein, crucial for cytochrome b6-f complex assembly and/or stability. 20.30 20.30 20.50 20.50 20.20 20.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.25 0.72 -4.38 22 1420 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 1364 8 44 221 4 25.50 91 97.19 CHANGED MDIloluWuuLhshFTFSlALVVWGRNGh .............MDIVuluWAALMVVFTFSLSLVVWGRSGL. 0 13 31 40 +3793 PF04614 Pex19 Pex19 protein family Wood V, Bateman A anon Wood V Family \N 22.20 22.20 22.30 24.10 21.90 22.10 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.82 0.70 -4.76 27 341 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 273 5 218 335 3 236.60 24 75.49 CHANGED ts-...........................sppshhpphppphppLhs.................................tttssptppphpphhpphst................ppsssstpsppss..............sFpsslpcThp+Lpcuucplssshtt.............s--hLsplLpshshss..........sss-tshtthl.sMMpQLsSKEVLYpPlKELpsKaPtWLcppps..plspE.chp+YccQhplsscIlppFEp..............tsYsDpp...cpct-tlhcLhpphQ-hGpPP..............sELluphsss........................tstsh.sshstt..-sCppp .......................................................................ttt.t...........p.pt.h.tpLht..............................................t..thttphtphh.pthtt..................................................................t.th.tt....p.p..t.........t..t.......t............................pFppslpcThpt....lp.c.s..up...p.......hpssh.....................--...l....s.p..hhcths.s.t............................tssct....sh.shht....sh.MppLhSK-l..LYp.PhK....El......s-K.......aPpWLpppcs...........plstE.-hcRYpcQhplhpcIsptaEp........................ps.sDpp........tchchlh-...lMpp......hQphGpPP...............p-L.su-hsss..h............................s.......tC.................................................. 0 68 120 177 +3794 PF04757 Pex2_Pex12 Pex2 / Pex12 amino terminal region Bateman A, Wood V anon Bateman A Family This region is found at the N terminal of a number of known and predicted peroxins including Pex2, Pex10 and Pex12. This conserved region is usually associated with a C terminal ring finger (Pfam:PF00097) domain. 22.20 22.20 22.90 22.40 21.70 22.10 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.47 0.70 -4.94 113 868 2009-01-15 18:05:59 2003-04-07 12:59:11 9 26 288 0 602 841 8 218.00 17 58.58 CHANGED -pplpshLpsplppllphht..............hhthhhp......atcEl.phlhphllhthshhp........tssohGEpahsLthhs..st.t......................................................................t.....phhsttp+hhhlhh..hlhhPYlhpKlpphhpptttpt..t..................................................................ptthhp.hhshlpsh..hphhphhphhhFlhsus......ahols.pRlhul+hshh............ttph.t...............................ssaphhsthlhhphhhphlhhhh......shh..h.hh.............s ..........................................................................ppl.shLtsplpphhphht.....................h.t..hhhp................atsEl.thhhphllhthslhp....................tssohG-phhsLphhs..t................................................................................................................................t..th.sttp+h.hhhhh.h..lhhPYl....hp+lpphhtpp...tt.tt....................................................................................................................................hphh.....hp..hhs.hlp.sh......hphht....hhphhhFlhsup............................aholh..cRlhGl+hshh..................ttphppt...............................................ssachhst..l...h.hph..h..h..phhh.hhh........hh..................t............................................................... 1 173 313 489 +3795 PF03011 PFEMP PFEMP DBL domain Griffiths-Jones SR, Bateman A anon Pfam-B_822 (release 6.4) Domain PfEMP1 (Plasmodium falciparum erythrocyte membrane protein) has been identified as the rosetting ligand of the malaria parasite P. falciparum [1,2]. Rosetting is the adhesion of infected erythrocytes with uninfected erythrocytes in the vasculature of the infected organ, and is associated with severe malaria. PfEMP1 interacts with Complement Receptor One on uninfected erythrocytes to form rosettes [2]. The extreme variation within these proteins and the grouping of var genes implies that var gene recombination preferentially occurs within var gene groups. These groups reflect a functional diversification that has evolved to cope with the varying conditions of transmission and host immune response met by the parasite [3]. A recombination hotspot was uncovered between Duffy-binding-like (DBL) subdomains [4]. Solution of the crystal structure of the N-terminal and first DBL region of PfEMP1 from the VarO variant of the PfEMP1 protein is found to be directly implicated in rosetting as the heparin-binding site [5]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.34 0.71 -3.86 41 832 2012-10-03 17:31:28 2003-04-07 12:59:11 10 55 9 4 23 881 0 120.50 28 18.86 CHANGED hFpcWVpphLcD..+h+.cKlspClpssct..p.spst..CpppCpChc+.WlppKccE.WppIKc+FpcQtchtp........................ttt.h.lpthLpph.h..shpcshsstcclp+lcchlpppttsstt...pst..s.......sppcshIDpLLp+.pccAcpCppppssps .....................................alpph..p-h.+h+..cK..h.....c.spIN.ssp..ps..Ccss...........CpptCcsYcp.WIocK.K.p.EWDtlps+apshpsucp..................................................................................................................................................................................t................................................................ 1 22 22 23 +3796 PF00365 PFK Phosphofructokinase Finn RD anon Prosite Domain \N 20.30 20.30 20.30 20.30 19.90 20.20 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.75 0.70 -5.45 12 6795 2012-10-02 15:20:27 2003-04-07 12:59:11 15 15 3846 82 2001 4903 1585 268.50 35 74.02 CHANGED K+IuVLTSGGDA.GMNAAlRAVVRpAIhpGh-VasIh-GYtGLlpG...pItplshtsVushlphGGThlGSARh.EF+pcEGRhtuhppLhcpGI-uLVVIGGDGShpGAphhppEau......................hsslGlsGoIDNDhsGTDhTIGhDoALpplh-AIDtIcsTApSHpRsFVlEVMGRaCG.lALhuGlAsGADhIhIPEts....h.p-plspplpcspp+GK+psIllVAEGshs....s..sphhcplhhpt.sh-TRlTVLGHlQRGGoPoAaDRlLAS+hGscAVchLLp .....................................................................+IulLTSGGDuP.G.hNA.u.lR..u.l...V.......+...p....u..l....t............p........G...h...c...V..h..G...l..hc.G...at..G...L...l......ps....................ch....h...p......l......s......h............p......s...........V......s......s......h.....l.............p.......p..............G..G.T....h.L.G.o.u..........R..........h.............................t...........h...........p............p..............t............p.............s............p............t...........p............s...........h.........c.......p...........l.......c.c.............t........u......I-.uLllIGG...D.GS.h.pu......A...t...t.....L.....s...........chs................................................................................................l.s.slG..lPt..TIDN..Dl..s...u..T.....D........h........Tl.....Ga.....-T.....Alp....p....h....h.....-..u.l....D....c....l....+...s...T...u...s.............S....H....p....R...h.h.ll.E.................VMGRpsGalA...Lh.u..u....l.....A.......s.......G......u........-.......h.....l.llP.Ehs...........................hs.h.c....p...l...h...p...p.....l....p.......p.......t....h.......t.......c.......G.......+....p.......t...s........l....l.l.l.u.EGshs........................................................t...............p................h..................h..............t..................t.................l.................................t.................t....h...................................t..............h-.sRssl............L.......GH.hQRGGs..Pss..hDRhh..u......ophGh.Ahphl..t................................................................................................... 0 732 1287 1714 +3797 PF02901 PFL Pyruvate formate lyase Griffiths-Jones SR anon Structural domain Family \N 24.30 24.30 24.60 24.50 24.00 24.00 hmmbuild -o /dev/null HMM SEED 648 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.84 0.70 -6.10 129 4920 2012-10-01 23:28:04 2003-04-07 12:59:11 10 9 2399 26 529 2860 102 572.20 38 79.76 CHANGED hh.....p............clsh....lppsh..h.ucss.l.s.........hc+shhlpctt+c.s......tGh.shs..scpspu........Ihspt.........shhI.......c-tElIVGhp.TstPhttshh....Pphuh+h.......lcst.hsh................................sapl.sppscchhpc.............htKotsctlhshastEh.......ptshcsslhT........uh.pshGtG+lhsDYc+lh.hhGlctlh.......p-tppchpphs.............................hht...hs....cchhchtcElu-p......pthpELtphAphhshs.ucP.ApshpEAlQhhaFs.aLushpEpNG.suhShGRhspaL.sYhc+Dlc.pGh...lT-cp.....AQELl-phhlKlp.lphhRssthsp.................hFuG.ss.ahshslGGhs....h-G.cshlschSahhLcsht..pl.tsspPslolhhs.pphP....csFhchssclshp..su..papND-lhh.................................hh.pD..DYu.lssCVps..h.............thG.....Kphpah...uAhhNl..uKsL.hslNsGhDph...ss......tplu.....P..ph.ssl..p..ph.hsa--lhpsacc.hcalschhlpuhNlIchhHcchs.cshh...uLh...Dsslp+shshsh....................sGlussuDSLuAIKh..........sKhhslc....c.pcu.................LsssFcsp.....................................................................................h....hPKYGNDD.....DclDplAtclsptahp.clcphps.h..R......suhhshulLolouNV.saG ...............................................................................................................p....clsh.....lppsh..a.scts.l.s..........h-+s.ht.hhc.tpp..p..........pu...shs.....sp.....hsps.....IhsptshaI....+c.....EhIVGhQ..sstP.hctshh.............P..h...u.lph.......lcpp..hs.....h.....................................................................shpl..s.chccl.ap-.............hp+Thpp...t.las..haos-h..........................................htspco..s.llo........uhss..sh..G...cG.+I...IsDYpR.lh..haGlchLh.......cctttphsplp................................................tthh....cchhcLtEElA...p...p..............hp...t....ht...p...........lhphAthhuhshppP.ApshpEAlQhhaFs.aLsshhppNG..uuhShGRhspaL..Yh.cRDlc...sGh....loEpp......ApEhl-phhhKlph.Vphl..R.ospasp..............................hF.u.G.ss.ahT.olGGhs............hD..G.......R....shVsc.SathLcolt..sh..uPpPNLTlhas.pp.LP..............tsFhchssclshp..pu..QacN.D-lhhs................................................pD..DY.ul.ssCVps...h...............hsG........Kphpaa......uAtsNl..AKslLhulN.GGhD-t........t.............h.p.h.u....................P...ph..tsl..p....s-h....Lsa-cVhpph-p.hhcalsphhl.puh.NlIchMH-+a.sa-shh...ALh...Dcsl.......tRshu.h.sh.......................s.G.lussuDSLuAIKh..............spsh.s.....lc.-.psu.................Ls.sDF-hp.........................................................................................tc...aP+YGN.s.D........-cVDsluscllcpahp.clcp..h.ps.h..R.......su.h.T.uhlTIouNVsaG............................................ 0 178 334 439 +3798 PF01471 PG_binding_1 Putative peptidoglycan binding domain Bateman A anon Pfam-B_2277 (release 4.0) Domain This domain is composed of three alpha helices [1]. This domain is found at the N or C terminus of a variety of enzymes involved in bacterial cell wall degradation [2]. This domain may have a general peptidoglycan binding function. This family is found N-terminal to the catalytic domain of matrixins [3]. The domain is found to bind peptidoglycan experimentally [4]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.50 0.72 -3.99 237 10090 2012-10-01 23:43:47 2003-04-07 12:59:11 13 329 3183 14 3243 8659 2140 56.00 25 14.51 CHANGED psssVptlQphLp.ph.Gah...........t.........shs.....uhaustTppAlcpFQ.pthGLs.ss..GhssspThptL ..............................................................t...tlttlQ.phL..p....th..Ga.............................s...............ths............uh.a..ss....p..o.p..p.....A....l...+.pF.......Q..p.......t......t......G..........L.....s...s.....s.....Gh.h..stpThphh...................... 1 982 1922 2518 +3799 PF00300 His_Phos_1 PGAM; Histidine phosphatase superfamily (branch 1) Finn RD, Griffiths-Jones SR, Rigden DJ anon Prosite Domain The histidine phosphatase superfamily is so named because catalysis centres on a conserved His residue that is transiently phosphorylated during the catalytic cycle. Other conserved residues contribute to a 'phosphate pocket' and interact with the phospho group of substrate before, during and after its transfer to the His residue. Structure and sequence analyses show that different families contribute different additional residues to the 'phosphate pocket' and, more surprisingly, differ in the position, in sequence and in three dimensions, of a catalytically essential acidic residue. The superfamily may be divided into two main branches. The larger branch 1 contains a wide variety of catalytic functions, the best known being fructose 2,6-bisphosphatase (found in a bifunctional protein with 2-phosphofructokinase) and cofactor-dependent phosphoglycerate mutase. The latter is an unusual example of a mutase activity in the superfamily: the vast majority of members appear to be phosphatases. The bacterial regulatory protein phosphatase SixA is also in branch 1 and has a minimal, and possible ancestral-like structure, lacking the large domain insertions that contribute to binding of small molecules in branch 1 members. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.35 0.71 -4.14 267 22415 2012-10-02 11:42:54 2003-04-07 12:59:11 17 114 4654 228 6555 16302 6276 153.40 22 62.77 CHANGED plhllRHGp...........op...............................s........t....................hpGpt-.............LoppGtppApthu.pt......Lp................................................shphst.......lhsSshpRstpT....Aph.lsp..............................tht........................................................h.h....tL...pEhsh.s.............Gh.htphttth..................thtthhp......t...h..................................................shtphhp.Rstphlpplh............................ttspsl....llVuHusslpsl ........................................................................................lhllRHGc.........................op................................hN..................htt.h..............................................................hp.G.ts-..................s.s.....................Lo.p.p.G..t.pp.A..p..t.hu..p.h.......Lp..........................................................................................................................................................................s.h.p.hc.t..........lh..o...S..s......h.......p..........R.uhpT.........uph...ltp...........................................ths..........................................................................................................................hs.h...h.p.....pL...........pE.h.ph....G................h....................p....uh..p....h....t...p..h..t.t..th..................................thhp.................t.h.....s...................................................................................tuE.............s.h...t.p..hht...R.h.......h.h.pphh......................................................ttppl.....llsuHusslp............................................................................................................................................................................................... 0 2047 4021 5493 +3800 PF00342 PGI Phosphoglucose isomerase Bateman A, Finn RD anon Prosite Domain Phosphoglucose isomerase catalyses the interconversion of glucose-6-phosphate and fructose-6-phosphate. 20.50 20.50 20.50 20.50 19.40 20.40 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.73 0.70 -6.07 10 7325 2012-10-02 15:05:26 2003-04-07 12:59:11 14 16 5754 105 1399 5509 3130 366.00 35 89.39 CHANGED DYSKsplss-hhptLlpLAcEttlcthp-tMFsGE+INsTEsRuVLHlALRsRospslhsDGpDVhP-VstVLs+MKsFs-+lRoGsWKGtTGKslscVVsIGIGGSsLGPlhVpEAL+shsps............plaFVSNVDGTalAEsLKpLssEsTLhlVASKTFTTsEThhNAcoAR-Wlhpthst.....cuuVAKHhlALSTNspcVcKFGIDsp..NhFsFWDWVGGRYSVWSAIG.LPlALulGa-NF-chLpGAcshDcHFsooPhEcNlPlLLALlulWhsNFhGspT+AlLPYDQhLa+husYlQQLsMESNGKhVopcG.shlsapTGsIsFGEsGTNGQHuFYQLIHQGT+lIPCDFIusVpopp......chssHHcpLhSNFFAQs-ALhhGKosEEV+pEh.tu........psLlPHKsFpGsRPosSILlscLoPasLGALlAhYEH+lhVQGhlWGINSFDQWGVELGKsLApsIhscLcsuthhs.....uaDuSTsuLI ....................................................................................................................................................................................................................................................ut...s......h...h....s.........l.p..................s...hth...tt....t.tl.hputhp...h..h.s..s.p.....ps..llsIGIGGS.L.....Gshhshc.........h......L.p......htt..........................................h.ah.ssN.lsss...t..........lt.c.hl...ph...l....................s............s.........c............s............s........h....h.ll..hSK.........ohTT.E..............shhs.hp.h.+phhh.p.t...h.t.......................ttt.....h.t+.+hhAh...o.s....p..t.....p...t.....s..h....c....h....t..h.stt........ph..F...h....D....VGG.Ra.SlhoAlG.Ls.....l.s.ls.h......ts..h..p.................phLp...G.A..p.t..h..-p...c.a......t...s..s...s..h.....c.....p.....N..................h.s..h.h.h.u...h..l.t..h...h.h...sh...Gh..tschl..l.s.Yp.t............hlphastahpQh..hESpG..K....sh.p.G.........................h..s....ush.a....oss.HuhhQhl.pp...........s..phh....h-h.lh...hpp.p............................h.p..p...h..s..h..sthph.Lh...GK....s....c..h.pt.ph.....t.........................l..h..s.a....p.s......h...G.h.Ps..lhh..p.l.sshslG..LlhhaEhthhh.pGhlh........sls..sF...........c....Q.G........VEhhK...h.tlh....................................................................................................................................... 0 472 905 1183 +3801 PF00162 PGK Phosphoglycerate kinase Sonnhammer ELL anon Prosite Domain \N 19.90 19.90 21.80 21.70 19.50 18.70 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.33 0.70 -5.91 154 5701 2009-01-15 18:05:59 2003-04-07 12:59:11 14 18 5059 60 1467 4176 4138 364.90 46 93.99 CHANGED olcDl.........slpGK+VLlRlDhNVPl...c..s.G.......pIoD-sRIcuulP.TIchll-p.GA+.VllhSHLGRPc........................thps.........................chSLpPV...................................AptLuclL.Gpp...VpassD..s....lGspscptltth..psG-llLLENlRFpstEp.......................cscsphscpLAsLu...DlaVNDAFGoAHRAHASshGl..sphl...susAGhLhc+ElchL.sculpsP.pRPhlAllGGuKVSsKlpll....csLlp+..lDplllGGGMA.TFLtA.pG.hslGpSL...hEp..........-hlch...................A+cllpcA.cptthclhLPsDhllu.....cc.Fst...su.ps.psssssp..ls.....ss.hhsLDlGPcTlchasch.lppA+TllWNGPhGVFE.hssFupGTtslscAlAcs....su.hollGGGDosAAlpp.hGhs-.cho..HlSTGGGAsLEhLEGK ..........................................lpDl....Dl.p.GK+....VllRsDhNVPl......cs.G.......pIT..s.Ds...RIpAuLP.TIchhlc.p.G.u+.VllhSHLGRPct................................................................................................p.pt...................................................................chSLtPV...........................................................ApcLuch.L......u.....pp.......V.t...h.....s....s.........D......s..........lG.................p.lct.hht..t..l..psG..-l..lLLENlRFppt....Ep.................................Ks-.p.huKphAu..Lu.......D...l........aVN.DAFGTAHRAHASshGl..uphh.....sus.A..G.h.LhppElchL........sc.AlpsP...pRPh.lAIlGGuKVSsKlsVlcsLlcK.....sDplllGGGMAaTFltA..p.G...h.p...lG..p...SL...hEc......................Dhl..-h.....................................................A+c.llp.cA.pt..............plh.LP..lDsl...lA..................sc.......F........u....s.......sA.......p.....s....phs.....s...sss......ls......................ss....huLDI.G.Pc..ohc...h..asch.lp........s......A.KT........llWNGPh..GVFE.hssFup.GTpslucAlAcs.........suhoIl.G..GGDosAAlp.......p.......h.......G.......huD.chS.......aISTGGGA.L.EhlEGK.................................................... 1 516 944 1234 +3802 PF00408 PGM_PMM_IV PGM_PMM; Phosphoglucomutase/phosphomannomutase, C-terminal domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Family \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.44 0.72 -3.97 112 12833 2009-01-15 18:05:59 2003-04-07 12:59:11 15 36 4927 56 3279 9608 4242 81.50 20 16.51 CHANGED .lNhplsc..t..........tt...htphtphhps........................htps.phhttcGhhl...............lRsSGTEPl...lRlhhEup.sppthpphtppltph...........lc .........................................................................................................................................t...th.t......................................htsshc.h.h.h.s..c...G...t.......hl................................lRsSG....TEPh......lR..lh...sE..ut...sp.pt.hpphhpphh....h............................... 0 1056 2051 2778 +3803 PF02878 PGM_PMM_I Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain I Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.69 89 14497 2009-01-15 18:05:59 2003-04-07 12:59:11 11 70 4976 61 3802 11145 5249 133.00 29 27.44 CHANGED thhFGTsGlRGhss.tp......hssphshplupuhuphlppp.......ttsplllGtDsRhsutthtpshhpslsusGlcshhhG......hhPTPsluatscp..hp......ssuGlhlTASHNPsphNG....lKhhhss.Gttlsssh.pppIpphhpphptht .......................................................t..hFGTsGlRGhhs..tt...............hs.thshplutu.h.up..h.ltpp.........................tptpl.llGt.Ds...R.......h......s......u.................htp.slhtsL.su..s..G..l.c...V.h..hhu..........................hhsTP.sl..u..a..hs..p..p.....hp..........................ss.uGlhlT.......ASHNPhp.NG.............hKhh........s.......s........s....G.....tt..lsssh...pptIpphhpt....h.................................................. 0 1252 2394 3225 +3804 PF02879 PGM_PMM_II Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain II Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.47 0.72 -3.70 98 14085 2009-01-15 18:05:59 2003-04-07 12:59:11 11 54 4971 55 3517 10795 4847 103.50 26 21.14 CHANGED st....Yhcpltphht.......hptp...sh......p.llhsshpGsusthhtpllpp.huh..............phht.hptts-ssFss...phPsPpt....t.shp.hhphsppp.....ss.cl..uluhDsDuDRlsls.....cppG ................................................tYlphltshhs..............h..p..h..p........s.l..........................+..lllDshsG..s.utthssp...l...h.p.c.hGh...................................plhs..lpspP..Du..sFss.............ttssPpt...........t.sht..ht....p.h..s..h....cp..................tA...D....l....GlAhDGDuDRhhll.....cttG............. 0 1142 2220 2982 +3805 PF02880 PGM_PMM_III Phosphoglucomutase/phosphomannomutase, alpha/beta/alpha domain III Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.64 0.71 -4.03 584 13734 2009-09-11 23:52:46 2003-04-07 12:59:11 11 46 4950 49 3296 10440 4816 115.60 24 23.67 CHANGED s.....GDp.lhslhuphh........hppt..........ht.s........s..lltTlhosh.ul-cshp...p.hGh..p.hh.cotVGc+alhct.hppt.....s.......h.shGGEpS.GHhhhh.ch.....tp.....s....sDGllsuLhllplhs.........pp..s..p....slu-lh.phhpt...a .................................................................pGsp.lhslhu.phh.....hpp.............ht.s......................s.tllpolhooh..t.lc...c.hhp...p....h.....Gh..p.....h.h.c.......shsG..a.+a..lh......c......p......hpct.................................s............h..hhG.GEpS.ut..h..hht..ch...................................sp...........s......cDGlhs...slhl.h...p...hhs...........pp..s........p..sLs-lhtph.t............................................................................... 0 1070 2094 2795 +3806 PF04608 PgpA Phosphatidylglycerophosphatase A Mifsud W anon Pfam-B_5195 (release 7.5) Family This family represents a family of bacterial phosphatidylglycerophosphatases (EC:3.1.3.27), known as PgpA. It appears that bacteria possess several phosphatidylglycerophosphatases, and thus, PgpA is not essential in Escherichia coli [1]. 21.00 21.00 21.50 21.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.95 0.71 -4.38 172 2384 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 2276 9 516 1528 1638 145.30 32 86.26 CHANGED hphlut.shGsG.hh.hAP.GThGolsAlshhhhl...h..........hht.h..............hhh....hlhh.....shllGlahsstspc.ths.......hcD.utlVhDEllGhhlsh......hhh...............sh..............................h..............hh.ll......u.FlhFRhFDIhKPhPIshhD......cc................hp.............GG...hGlMlDDllAGlhAslshtlhh .................................................h..hhuhhFssG.h...hs.P..GThGo.LAulshhhhh..............................ph.sh.....t..hhh.........hhlhhshhhGlhlsptst+..chs..........s+DcG.t.l.VhDEhlGhh.Ish......hhh...................s..........................................sh.............hh..shGFlhFRhhDhhKPhPIchhD...+p.................lc.............GG...hGlMlDDllAGlhAuhshhhh.t............ 0 164 318 426 +3807 PF03334 PhaG_MnhG_YufB Na+/H+ antiporter subunit Mifsud W anon Pfam-B_3611 (release 6.5) Family This family includes PhaG from Rhizobium meliloti Swiss:Q9ZNG0, MnhG from Staphylococcus aureus Swiss:Q9ZNG0, YufB from Bacillus subtilis Swiss:O05227. 23.40 23.40 23.60 25.20 22.90 23.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.47 0.72 -3.93 234 1788 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1344 0 548 1242 226 82.50 33 67.13 CHANGED hlllGuhhslluulGllR.hPD.....hasRlHAsoKusTLGsshlllushlhh........h........ptt......hsh+hlLlhlFlhlTuPlu...uahlu+AAhp ...........hlllGuhhslluulGllR.h.D......hasRhHAsoKusTLGshhlLlushlah...........spsh......................hsh+hlLhhlFlhlTuPlu...uHhlu+AAh......... 0 162 340 457 +3808 PF02304 Phage_B Scaffold protein B Mian N, Bateman A anon Pfam-B_9648 (release 5.2) Family This is a family of proteins from single-stranded DNA bacteriophages. Scaffold proteins B and D are required for procapsid formation. Sixty copies of the internal scaffold protein B are found in the procapsid. 25.00 25.00 150.70 150.60 19.00 16.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.58 0.71 -3.80 6 76 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 51 2 0 60 1 115.60 64 97.99 CHANGED hpcNtssshopEsIpsospPQhRNEsshNsSs.pGssssT-PuGLRRDPVQpclEAERQcRspIEAGKuhCuRRFGGATCDDpSAcIaApFD.ssppVQPAEFYRFNDuElsKaGYF .hTcNQssspSQEulQNpNpPQhRsEsAhNspuVpGshssT.puGLRRDuVQsDlEAERpKRs-IEAGKuhCoRRFGGATCDDKSApIYApFDcNDhRlQPAEFYRFpDuElNpaGYF 0 0 0 0 +3809 PF04717 Phage_base_V phage_base_V; Phage-related baseplate assembly protein Waterfield DI, Finn RD anon Pfam-B_5996 (release 7.5) Family Family of phage baseplate assembly proteins responsible for forming the small spike at the end of the tail [1]. Also found in bacteria, probably the result of horizontal transmission. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.39 0.72 -4.04 71 4590 2009-09-13 07:45:09 2003-04-07 12:59:11 7 55 1466 5 992 4204 146 90.00 34 15.09 CHANGED Gslsslshs....p.............uR..lRVp..................sGs........h.osWl.hhsstAG..psppatsPslGEQVllh...sG..-.spulllsu.lassppssPsss ....................................................................................Gs.sAhVsu........p................hDp.GR..l+VpF.a........................t...tssc............p.Ss.WlRVup...sa......A......G...ts.......a......G.th......hlPRl.GpEVlVs....F.lsG...DP.D.pPllhGp.lYsspshsPh................... 0 209 482 739 +3810 PF03864 Phage_cap_E Phage major capsid protein E Finn RD anon DOMO:DM07502; Family Major capsid protein E is involved with the stabilisation of the condensed form of the DNA molecule in phage heads [1]. 20.90 20.90 20.90 21.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -12.17 0.70 -5.51 77 1242 2009-09-10 22:20:44 2003-04-07 12:59:11 10 3 734 1 126 899 64 301.00 29 96.11 CHANGED FssssLs.tsls...phs.hsshltsh..lFtp..p...s...opplsl-tp.sshhslh.shsshsssu.tht...ppptpspshpss..alt.ppslpss-ltshRsh.Gpp.............sthppht.phlsc..ch...tph.cpphchThEahphsAlts.p.lh......sscsss.hhDah.phshstts...hth....t.ts...ssh.spshpslcphpppl...phhsss..hslsusphastlhs..psplpc...thpth..t.........hp.............t..psh.patGhh..........................a.pcht................t...stph....hls.......sscshhhss.......t.shhpphausss...ppsss.u...............tshasp.h..pt.cspshplpspSsPLslsscPsslhp.sps ....................................................................................................as.tplh.th.p....ph....phpshhhpl..aFpps..........hs.hpTpplhlcpl....su.h..h.shu.shs.oPhs.tupslp....pcutps..p.hp.ss.....al.Ks+cpls.pp..h...hhRhs.sp-.................ssshphh.cllhpsh...ppt.cpulsts.Eh.sssAlh..tGK.hh........spth.p....hpl...Dh.....shstptph........t......phup..stss..h.cs.tpcl.ctht.th.......ush.sshhlhsspsap.hlps......acth+-......thcsppsu......................ttsl+.............shstsl..sa.pGhh.................lhhhpsp...Yhpp..................sG....spcp........als..................ssphllsss.........t.spGhphY.Gshp......tps.p.G...............sshYsKpa...hpss.cPstph..shspSsPL.lhs.cPsthl.lp.......................................................................................................................... 1 43 83 102 +3811 PF05125 Phage_cap_P2 Phage major capsid protein, P2 family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 20.40 20.40 20.60 20.80 19.70 20.30 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.99 0.70 -5.55 5 788 2012-10-03 06:22:39 2003-04-07 12:59:11 7 2 540 0 77 634 13 308.10 45 94.81 CHANGED RppFsAYLupLAKLNGlssscVu+..KFsVEPSVsQpLtNslQESSDFLKpINIlPVuEhKGEKIGlGVoGTIASTTDTuGsstR+spDhoALssN+YECsQINFDhalsYAhLDhWA+.FsDFQcRIR-AIlcRQALDhIMIGFNGTSRA-TSsRAsNPhLQDVAVGWLQKYRNpAPARVM....oE-sKooGKV...lls.GcsuDYcNLDALVFDusssLIDPhaQ-DPcLVsIVGR-LLADKYFPlVNKc.QcsTEplAuDlIlSQKRlGGLPAVcVPYFPAcAlhVTTL-NLSIYap--S+RRoL+ENP-KDRlENYESpN-DYVVEDYuuGCLlENIcV .................................thssYhtplApLNuls.st..t..lsp......+FoV.EPoVpQpLcsthp-S.up.FLphI.Nl.hsVs-.pGphlslGssusIAuTs..-o.s.....s..pc.Rp.P.pc.st.l.tsp.cYcC-..QsNaDshlsY.spLDh.WAp.....a.pD...FQ.RlpstIs+p.ALDhIMIGFNGsp+A.c.sSsh.ssNPhLQDVNhGWLp+hRp.....-As.p.+VM..............sp.s....t....o....o....sph.............hts.....u......c..u.G....c..YsNLDAlVhDshssLI-.hap-Ds-LVVIsGRpLlu.schhPllN...pt....ppNoEhlAup.lIlSp+plGGL.AlpsPFFPssshLITpLcNLSIYaQcsocRRhh.h.-sPchcRlEsapShN-uYVVEDYttsuhl-pl.......................... 0 9 29 54 +3812 PF05144 Phage_CRI Phage replication protein CRI TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The phage replication protein CRI, is also known as Gene II, is essential for DNA replication. 21.00 21.00 22.70 22.20 19.90 19.00 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.67 0.70 -5.30 13 166 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 131 0 13 147 10 238.00 25 63.01 CHANGED MIDalshclPhcc..............s.lshucVsplu....ssGsVEacs...spclpVsG.........SahouIul+sh...pus....s.tspLplcusPAKhhQGHNVaGo-DLpshshthapslhss.......hs-.hchlchGthplsRlDsTaShcLsocpplhuhLcuhcpsu+ppp+usts.shtoTlYFsKs..........SR+aoLphYSKt-Elp.thcs.............+ph.ctLspscLhpaAss+LRhEhpl+o+.Lpchs........lpasssapsc ..................................................................................................ls...splhphs....sst.tlpap.....tphpsps.........op.oshth+.h.........put........h.stlplcusPuKhh.QGH..NVaG.ss.....slphssth.hhstlhph..................sthhchl..s.s....tscloRIDhThshplus..c.hpphlcthpshuptpt...+sstp..s.htsTlaas.c.s..........u+ph.pl...phYsKt.sElp.pphpc......................p......tt.s.p...pp..h..h.....-.hp.t.spL.ta..u..ts..h..lRhEhph+pphLpchs..................h.................................................. 0 2 6 12 +3813 PF02303 Phage_DNA_bind Helix-destabilising protein Mian N, Bateman A anon Pfam-B_9239 (release 5.2) Domain This family contains the bacteriophage helix-destabilising protein, or single-stranded DNA binding protein, required for DNA synthesis. 25.00 25.00 26.70 26.40 21.70 21.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.12 0.72 -3.96 3 81 2012-10-03 20:18:03 2003-04-07 12:59:11 12 1 61 22 8 90 1 84.70 33 88.25 CHANGED lKVEI+sSQVuV+oRSGVS.+pGKPYTlp.EQpAYVDLGG-YPlLFsIsLDEGQPPYusGhYplHPuSFKlNpFGuLtlGR.lRLlPsK .......................hpl..tt.th..psRshps.+pu..K..P.t.phh.EQpA....hl...hhG...Gp..aPs.hc...lpL-csQss....YssGhYplHssSa.hls.s.a...GsLplc+..h......h................ 0 1 4 7 +3814 PF02305 Phage_F Capsid protein (F protein) Mian N, Bateman A, Haft D anon Pfam-B_10357 (release 5.2) Domain This is a family of proteins from single-stranded DNA bacteriophages. Protein F is the major capsid component, sixty copies of which are found in the virion. 25.00 25.00 70.30 26.40 19.30 21.90 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.78 0.70 -5.96 7 248 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 72 12 1 272 157 229.10 36 98.46 CHANGED Spl.ou.hpRs.aDhSHhshpshphGcLhsI.hs.VlsGDoFphctsshhRLoshhhslhsc.plDhahFaVPpRhla.cpW.pFht-ssss..................uPhsuhspsohhDahGhsspls..sh+lsthhapuY..IaNsYFRs..h.ppstss.us.......hsts-sphs.............h+ss+h+shaTusLP.Pppthuhphsl...............................................................................................................uooulsI.uLptAhsh.+h.pccshhhoRYh-llpuahGspShDuc.pRP.hLhtosh.sss.sVstTspo........sLutFSsps..s.K+hhs+.FV.EHGhlhsLhhsRhs.TapptlchhhuR.phpa.Dhh..PsL.upLs.ptl..KEla..........tsusssthFthtEt.t.YRa+PspVssha.....psLDua.h...asshP......sLpcphl.ps.sshDcsht...Ssp.spa.sphhFNhpshRsMPshpsshh ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 1 1 +3815 PF03335 Phage_fiber Phage tail fibre repeat Bateman A anon Pfam-B_3576 (release 6.5) Repeat \N 20.00 5.00 20.00 5.00 19.80 4.90 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.45 0.76 -6.72 0.76 -3.45 38 2424 2009-01-15 18:05:59 2003-04-07 12:59:11 8 41 239 21 57 1692 4 13.50 43 14.97 CHANGED susGsHsHohouss ....suhGuHoHoloss..... 0 14 30 41 +3816 PF03406 Phage_fiber_2 Phage tail fibre repeat Bateman A anon Pfam-B_854 (release 6.6) Repeat This repeat is found in the tail fibres of phage. For example protein K Swiss:Q37842 [1]. The repeats are about 40 residues long. 20.20 20.20 21.10 20.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -7.95 0.72 -4.32 50 1985 2009-01-15 18:05:59 2003-04-07 12:59:11 8 103 486 0 146 1761 33 43.20 57 13.82 CHANGED ohopKGllQLSSATsSsSEohAATPKAVKs.shDpAssphspsps ......ohspKGlVQLSSATNSsSE..o.LAATPKAVKA.AhDhAsu+hsspp......................... 0 4 69 102 +3817 PF02306 Phage_G Major spike protein (G protein) Mian N, Bateman A anon Pfam-B_8833 (release 5.2) Domain This is a family of proteins from single-stranded DNA bacteriophages. Five G proteins, each a tight beta barrel, from twelve surface spikes. 25.00 25.00 27.70 27.70 24.00 20.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.10 0.71 -4.64 6 79 2009-09-10 23:22:16 2003-04-07 12:59:11 10 1 55 6 1 73 2 173.40 60 100.00 CHANGED MFQpFlSKHNuPl.TSp.slusolTPAus.A..PVlsTPclou.+oslhlshTlTAuostuG.Fsasl+lDsosPssNQVlSVuAsLS.sVsuchIAslVRFEsAs.ssPTulP.uhYDsYPlE.uhtsGuuhSs+DCVTIDsHPRTsGNsVYVGlhlaSstWoAuploGllSlNQVs+EsTVLQPLK .MFQsFIS+HNosF.hSc..lssSlTPAus.A...PVLpoPchou.pohah.ulTlsAus..uG.FhHslpMDTSsssuNQVlSVGAsluFsuDscahAClVRFESu..oVPTolP.osYDVYPl-.utpsGGhhoVKDCVTIDVhPRTsGNNVYVGhMlWSN.aTAo+spGlVSlNQVI+ElhsLQPLK 0 0 1 1 +3818 PF04688 Phage_holin Phage lysis protein, holin Kerrison ND anon DOMO:DM04726; Family This family constitutes holin proteins from the dsDNA Siphidoviridae group bacteriophages. Most bacteriophages require an endolysin and a holin for host lysis. During late gene expression, holins accumulate and oligomerise in the host cell membrane. They then suddenly trigger to permeablise the membrane, which causes lysis by allowing endolysin to attach the peptidoglycan. There are thought to be at least 35 different families of holin genes [1]. 20.60 20.60 20.60 20.90 20.40 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.29 0.72 -4.44 24 332 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 263 0 29 157 1 44.80 42 47.79 CHANGED GhsPlPlsEsplpphh.......SslhThssolhuWaKNN.lTp+u++ppphL.+ ......GhSPIPlD--pl...........SollhslsuLashaKsNs.sopcGKhupphLK....... 0 12 20 26 +3819 PF04531 Phage_holin_1 Bacteriophage holin Mifsud W anon Pfam-B_2644 (release 7.5) Family This family of holins is found in several staphylococcal and streptococcal bacteriophages. Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the buildup of a holin oligomer which causes the lysis [1]. 21.60 21.60 21.90 22.10 21.20 21.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.79 0.72 -3.92 23 523 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 423 0 29 274 7 78.90 39 81.28 CHANGED pINWKlRhKNKshhsAlluulhLlspplst.....hhuh..shsph.pplpsllsslhslLshlGllsDPTTcGluDSppshs.YpcP+c .............INWKlRh..KsKshhl.ull....uAlhLhhQ.plst........hhGh....chpshspQlss...hlNulLslLslhGVl......sDPT.T..cGlu.DSp.A.p.YptP+............................ 0 11 23 27 +3820 PF04550 Phage_holin_2 Phage holin family 2 Finn RD anon Pfam-B_61235 (release 7.0) Family Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the buildup of a holin oligomer which causes the lysis [1]. 25.00 25.00 25.50 25.30 20.40 18.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.90 0.72 -3.89 6 181 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 164 0 22 73 0 89.80 62 93.82 CHANGED Ms-uEKolIsLhlh....GALIulGKVLsGsEPIThRLhlGRhlLGoulShlAGlALlphPDls.lALsGIGSuLGIsGhpslElhLpR+...htup ...........MsscEKSlL.SLFhI....GsLIsVGKVLAGGEPITPRLFIGRMLLGGFVSMVAGVVLVQFPDLS.sAVsGIGShLGIAGYQVIEIAIQRRhKtp.......... 0 0 8 13 +3821 PF05106 Phage_holin_3 Phage holin family (Lysis protein S) TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This family represents one of a large number of mutually dissimilar families of phage holins. Holins act against the host cell membrane to allow lytic enzymes of the phage to reach the bacterial cell wall. This family includes the product of the S gene of phage lambda. 22.30 22.30 22.70 22.60 21.90 22.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.12 0.72 -4.02 25 643 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 434 0 62 284 2 96.80 31 90.20 CHANGED Mscc.PchW..AslLshLpp.h...splhuuslAhlhAhLRhhY..sGssh+cpllEuslCGhlshshtssl.........pahGlssshushsGuhlGalGV-plRphspRhls+K ....................................t.h.pllshlhsth.....thhuslhAhshAhLR.hhY........sGsuhpcsll-uhhCGhluh..sltssL................c.ah....G......h.s..ps...luh..hhushIGalGs-plpshlhphhs++............... 0 6 17 39 +3822 PF05105 Phage_holin_4 Holin; Holin family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Phage holins and lytic enzymes are both necessary for bacterial lysis and virus dissemination.This family also includes TcdE/UtxA involved in toxin secretion in Clostridium difficile [1]. 19.80 19.80 19.90 19.80 19.70 19.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.44 0.71 -4.42 57 986 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 662 0 122 758 27 117.20 29 83.22 CHANGED lu...uhhshh..hGuhsthLhhLllhhll..DalT.GlhsA....hhp+c........lsSphGh+Glh+KlhhhllVhluthlDhhlhts......tslps.....s......llhaYluNEulSIlENhuphGlPlP...phl+phlc.pLpscs .....................................................h..shhshh..hGshs.h.hltll.lhhhll..DalT.Ghht...A....hhptc............................lsSch..Gh.p.Glh.+Klhhh.l.lls.luh.h.lDhhlsss..................slpp....s.............l.lhFY.l.u.NE.hlSll.ENh.uc...h.G..l..P..l.P...phLpphlp..Lppc.p......................... 1 60 95 103 +3823 PF00589 Phage_integrase Phage integrase family Bateman A anon MRC-LMB Genome group Family Members of this family cleave DNA substrates by a series of staggered cuts, during which the protein becomes covalently linked to the DNA through a catalytic tyrosine residue at the carboxy end of the alignment. The catalytic site residues in CRE recombinase (Swiss:P06956) are Arg-173, His-289, Arg-292 and Tyr-324. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.99 0.71 -4.59 35 44346 2012-10-02 14:09:14 2003-04-07 12:59:11 17 151 5378 81 8222 34800 8709 168.00 19 51.02 CHANGED phLstcplppllstspp........thcs+sh......lplhhtTGhRhuElhsLchsc.lshppphhhl................psKsp+p...RtlPls...pthhphlcchltpp............tpc......................hlFssp...........tsp.hsppshppth+phstp.....sulp......thssHsLRHohuopLhcpGh.slcslQclLGHsslshT.thYs+sstpc ....................................................................................................................................................................................................................................tph....t.h..h..t......h.t........................p.....t..h.h............................h...l....h....h....h...o..G.h..R...h.......u..E.........l.....h..........s.................L......p...........h...........p...........-.....l...........s........h..........p........p.....t......h.....h...t..l...............................................................................tsK...s.p..pp.............+h.l...s.ls................pph....h.......p......h....l...p...p....h...h.ptt........................................t.p.p...........................................................................................................................h..l..F....st.................................................tt..p.......h.....s.....t....p.....s.....h.....p....p.....h....h........c..p....h...s..pp..............................ss.lp............................ph..s...s..Hs..h..R..H.o....h........u......o.......t...........h.........h.........p............p............G.........h........s............h.......p...........h........l.........p......p........h.............L.G.H......p......s......h...p....s....T....phYs+h....p.............................................................................................. 0 2564 5275 6927 +3824 PF03245 Phage_lysis Bacteriophage Rz lysis protein Bateman A anon Pfam-B_3219 (release 6.5) Domain This protein is involved in host lysis. This family is not considered to be a peptidase according to the MEROPs database. This family Rz and the Rz1 protein (Pfam:PF06085) represent a unique example of two genes located in different reading frames in the same nucleotide sequence, which encode different proteins that are both required in the same physiological pathway [1]. 25.40 25.40 25.60 25.40 24.70 24.70 hmmbuild --amino -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.49 0.71 -4.14 30 1696 2012-10-02 17:03:51 2003-04-07 12:59:11 8 3 601 0 80 1081 5 120.20 51 80.99 CHANGED putphcpptschspplphsp....ssIschpsRpR........slAsLDs+aTcELu-A+uc.-sLRsDVAuG++RLp..lpA..oCsu...sspssusuulssuuusR..LsssApcDYhsLR-cItpsppQlpuLQ-YlRsp.Ch .....................................NAlsYKtQRDcps.pc..L.c.hAN.....AsITD.MQh.RQR............DV.AALDA+Yo+ELADA+AEN-sLRs.DVAAGR...+RLp.......lpA.......sCss.......st.csTu.sSGhs.N.u.s.u.PR...LussA.....ERsYatLR-tlhphppQLcshQ-YIRoQC.......................... 0 7 26 55 +3825 PF00959 Phage_lysozyme Phage lysozyme Bateman A, Griffiths-Jones SR anon Sarah Teichmann Domain This family includes lambda phage lysozyme and E. coli endolysin. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.47 0.72 -3.77 18 3255 2012-10-03 00:09:25 2003-04-07 12:59:11 14 30 1340 658 411 2608 736 110.60 29 59.84 CHANGED hhTIGhG......pspslssp.t..................hopppusshhphslp.thtctlsphhp...........shstsphDAlsshsaNhGht...........hhttoohlcthpttphhttssplhc.h.s.G.......phhptlhpRRpp ....................................................hhTlshG...............sspslhsuhp........................................hocpps.stl.h.....t.h.-ht..ps...t.....h.l.p.p..h..lps..............hs.p.pt.s.u.ls.hsaNlGs.................Gth...t.sS.T........hh+.+...l.N...t.........G.....-..h..p..u..A..C....c..p....l.......h.t.W.h.h..s...sG.............pth..h...GhhpRRc............................................. 0 74 208 308 +3826 PF03863 Phage_mat-A Phage maturation protein Finn RD anon DOMO:DM08200; Family \N 25.00 25.00 25.70 25.70 19.00 18.60 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.33 0.70 -5.41 11 306 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 19 0 0 219 0 218.90 48 94.72 CHANGED aaPcchht..hslplspYthsthhssp..hpthss..hhph...poPcphs..........sPssah.GhcslTslch.ssshhhph.sssscas.upusuuslchpsal..ss..........hhuphsashpsphsschtschSthssphGshluEuRcTlphLuhhhtplhcua+Al+RGDl+cltphlp.h...............................pptcapupssushWLEhpYGlhPLhhDIpushE.......-ah+sHtchthhhRhSsshGpshslphsth.Puhshh...slpspsslpRR.........pthahshssupLt.lS.....uluhlNPhpluWElsPaSFVVDWFlNVGchLEth..phhhslchlsG.hsp+clch+Sl.ol+shhssss.............shp.upspshtshhsRshpsshPpssstlcoshush.HllDulALl..sQRlK+ .......................E.sassolhsYscstp.sNuaShchhsN..........aTPtRhs......+pa+hPosaSpGhhsVTol-Q..GAapRphSshGRsa-.tsGhuhoLcsc.....us.phhs.s...hhh.hs.shssp....................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +3828 PF04233 Phage_Mu_F Phage Mu protein F like protein Kerrison ND, Finn RD anon COG2369 Family Members of this family are found in double-stranded DNA bacteriophages, and in some bacteria. A member of this family is required for viral head morphogenesis in bacteriophage SPP1 (Swiss:Q38577). This family is possibly a minor head protein. This family may be related to the family TT_ORF1 (Pfam:PF02956). 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.41 0.71 -3.55 13 1923 2009-09-11 06:39:01 2003-04-07 12:59:11 9 14 1455 0 257 1607 203 114.20 19 30.44 CHANGED pplpssltpultpGpshcclu+pl+ch.................................th.s+tRhphlARspstps.sAuphtp..pptphtht.h..pasustDsRsRspHtcLcGpl.....aphD-..hp.sst.............PspchNCRCslh .................................................................................................t..lpp.lhpul.h.p.G.p.....s...p..phtcplptt..................................................................hth.sp..c...h...pplhcTphsp.s...hs.tuphpp.......htph.sh.....t....h.....h....p.a.hu.....s....h...D.s...+...s....p...........p....t........p.....t..s.h.....c..G..p.h.........ac.....hc.-.sh.h.t...hth...........................................................P..s..p.......h.sCRCph.h.................................................. 0 81 168 215 +3829 PF05136 Phage_portal_2 Phage portal protein, lambda family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.1); Family This protein forms a hole, or portal, that enables DNA passage during packaging and ejection. It also forms the junction between the phage capsid and the tail proteins. 26.30 26.30 26.60 26.30 26.00 26.20 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.98 0.70 -5.92 62 1366 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 672 0 152 1135 323 316.40 34 69.92 CHANGED ustspphtsWpstst..ussstltt.shsplpsRuR-l.s+NsshusuulcphhsNllG.sGlp.p.tths.................spthpcplcphW.ppWs.ss...psDssGphsahthpplshRshl.psGEshsphhhtpts......shhhshplpll-sDpls...ssh...........ssss..p.......lptGlEhcphG+.lAYalhcs+Pssshht...........phpRlPAp.......pllHlac..pRsGQhR.Glshhusslh+lppLcpap-ucLhptcluAhhuuaIcpspss.ssshtt.ss........................t.ptpt.hplpPG.thttL.PG--lphhssscssssassFhpt.LRslAuGh.GlsYctlotDhssssYSShRtullEhp+thpthpp .......................................hssphtta.s.sp..osDstlh..shpthpsRAcsL.scNsshussulphhp-+lVG..ph..hhlp..htss.hphh.............AcshscclcstW.pEau.s......h.DVpu....phTas.h.R.sltsah.hsGElFsphshspts......sthhthphchlpPchls.ss......................stus..............lptGV......pls..c..h..G.cshuYalhc..st..st.h.t............chpplstc.........phlHl.h.ps..h.cssQhR.Gs.s.hhuVh.plphLcphpsspLpuAhltAhh.ushIcpt.ss..ptshch.ts..............................................h..s.tt.l...pl.sG.htlscLhPG-clph.su.pcssssapsFcpu.LRhlAAGh..slSYpplu+sast.oYSutRtuhsEuhchahhhpc............................................. 0 47 88 121 +3830 PF05133 Phage_prot_Gp6 Phage portal protein, SPP1 Gp6-like TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family This protein forms a hole, or portal, that enables DNA passage during packaging and ejection. It also forms the junction between the phage head (capsid) and the tail proteins. During SPP1 morphogenesis, Gp6 participates in the procapsid assembly reaction [1,2]. This family also includes the old Pfam family Phage_min_cap (PF05126). 24.50 24.50 24.70 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.23 0.70 -5.92 56 1289 2009-12-18 13:44:14 2003-04-07 12:59:11 9 4 997 13 170 1119 57 413.50 14 90.04 CHANGED pscpltphlpcphtpppp......h..hppYYpu...p.chhppttt...............t.hthcp+hshsaschllcphsshhhspshphsts.....sp......pssctlpchhp...tNchcsptpplhpsshhaGtuahhhahs........pssp.....hchth..hssppshslassshppp.hshlphhpppspptht...............................plasssp..........hhhhphpss...thh.t.stttt..........................astlPlltasN.sc.p.....puch.cshhsllDshscsh..sphsschpshs.chlhsl........................hs...........................t.ttphhpshpptphhpl.................sssssschlp......pphsssshcshhctlcp.lhphutssshs.spthu.....ussSuhAlchthpthppcsppppppappulpchhphhhphhs.....tps.h...........clphsascsh.tshsppsc......sthtt...tGhlopc..shlpths.........hl......s..-scpEhc..chpp-ptphtptttsttp ............................................................................................................................................t..........................h..h.paaps...........p.phh.t..t................................h.s...p+hshsasphlscthssh.h.h..s..p..s.hphsts.............pp...........................ph..pt....lpphhp...........ts.p..h..ppt.p.phhppsh..h...hG.tu..a..h..h..h.hhs.....................ps..sp.......................hplph.........hss...p......p..h...h..sl.h.s.s.p......p.p.p.....h..h.h.lph...hp...p.p..t..t.pt.t...................................................pla.sss.t.............hh..h.h..hp.tt...............thh............st..t.......................................................hsh.lP..l....h...h.s..s.....c.p..............................huph.cphh.sllDthspsh.....uphss..p..h.p.th..t...phh.h.sh.............s........................................t.ttp..h.p.p.h.t...ptthhpl.......................................s.tss.ss.phlp.............tph...s...s.tth.c.shhc.tlp...p.lhphsttsshs..spphu.............ustSu.u.....hc..hp..h.shptpsppppphhp.p.ulpp.h...h...p.hhhp.hhp.......................t..............................plp...hpas...p...s..hshshtpth...p..........shht.......tGhlop.c...ph.ht..h................hh..............s..cs.p.tthc.....chppct...........t........................................................................... 0 61 125 149 +3831 PF05135 Phage_connect_1 Phage_QLRG; Phage gp6-like head-tail connector protein TIGRFAMs, Finn RD, Bateman A anon TIGRFAMs (release 2.1); Domain This family of proteins contain head-tail connector proteins related to gp6 from bacteriophage HK97 [1]. A structure of this protein shows similarity to gp15 a well characterised connector component of bacteriophage SPP1 [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.32 0.72 -3.84 81 1873 2009-09-11 08:37:08 2003-04-07 12:59:11 8 5 1271 14 232 1181 124 94.80 16 85.42 CHANGED Ms.......................Lc......clKphLpl..-sspcD..sllpthlp....sApphlpsthsp.....................t.pphssthphhlhtlss.phap...p+ss..pupss........p.hsh..shpshlspl+th ..................................M...............LcclKthL+l....-psp...-D....chL.pt.hlp....sApphlp...s.hhst.......................................................................................th..sthph..s...hh.lss.hhYp.......ppt..........st.t............................h...........h............................................................................................................ 0 72 146 194 +3832 PF04492 Phage_rep_O Bacteriophage replication protein O Kerrison ND anon DOMO:DM04335; Family Replication protein O is necessary for the initiation of bacteriophage DNA replication. Protein O interacts with the lambda replication origin, and also with replication protein P to form an oligomer [1]. It is speculated that the N-terminal half interacts with the replication origin while the C terminal half mediates protein-protein interaction (annotation of Swiss: P14815). 28.70 28.70 28.80 28.80 28.60 28.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.04 0.72 -3.80 9 670 2009-09-11 22:41:49 2003-04-07 12:59:11 8 3 446 0 27 476 7 97.40 48 35.34 CHANGED phhEpR.....hADL-DGYhRlANpll-uhhtscLotRph..pVlhAlhR+TYGaN........K.hDhlsNsQlAplTs.......lssp+lscAhppLlccsll...ppsG.+hlGhNpslSpW ................s...E.p..VADL-DGYsRluNtLlEAhhhusLTp+Qh..pVhLAlhRKTYGaN...........K.hDhlossQLuElTt.......l..s+.+C.spAKppLV+hsIl.............hQpG....t.hGhNpslSEW.................... 0 7 8 15 +3833 PF04984 Phage_sheath_1 Phage tail sheath protein Bateman A anon COG3497 Family This family includes a variety of phage tail sheath proteins. 26.20 26.20 26.20 26.40 25.90 26.00 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.39 0.70 -6.05 41 3025 2009-09-13 02:25:27 2003-04-07 12:59:11 9 6 1415 24 555 2645 2611 301.10 16 92.64 CHANGED M...s.....tsphhPGhYlphpssuttshshuspGhluhss......shshGss.....spshpl.ssts...phhphhG..........hssphhthlcthhp.tsstplhlh+l.st....................Gspsssph...........................................................................................................t..hhtshhsGtpustltlhl..phstt...................................hshhh..ts..h............l.sp...........thh.....t.s.ht....................sstsltuGss..suthss.chht..........shssh...pshphshlshss.......tsp..slpssltshspphptt.ttcthullss.ss..s.......t...................-thhslssthshhsss.ht.............sssalAGhtAt.sshppuhss......hphsuhhslshp..hspsc....hpthlpsGhhshpts..ssslplhpslsohhp.s.tspspsatp.pslRshshlppsltpthpp.hhsctss......sc............sshtslpssltsah.cpLpstGslp.sacs......ptDhplpt.......................sscpsclhhphshpsVsshcplhhshplp .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.......................................................................h.......................thhh.hs...........................................h........h............................h.....hhs..h..hs......................................................................................................................t..h..........t..........h....................................................................s...hhhuhh.......uh...s....t.....s..................................t..........h.....s.......h...h..s...h..................ht........t.p................t..........h...............t.t....l..s....shh...........................t...t.h.h.h..t...shs..................s...h.....hhchhthl..p.tht...h...........hh............s......................hhttl..t.hh..tth.t.t..hl...shp...................t.....p...............................t.t.sth.h.h...t.....hc.h.h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 146 338 468 +3834 PF04630 Phage_tail Phage major tail protein Mifsud W anon Pfam-B_5341 (release 7.5) Family \N 20.70 20.70 20.80 21.20 20.60 20.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.01 0.71 -5.22 14 370 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 319 0 29 212 1 175.60 25 78.65 CHANGED MA...ohGlctltluLlDspsshlh.cussGL.........ossGla.hDs....pshGTpTANITsLpuussplaGNNpltcsshupupPpVAhshNsLsa-lppKlhGhhsDscGGYs.ps..pKP.+VAlLlpS.pslDpsp.lYauFusGphsE.outNhsTss.supsRs-DslTasuLs.........pshsspshKha.ss-.................ssFs.pssMhp-VFsGYstsss ....................................................t...............................................pt....p..u.shphplosltsp.hscha.usN.t.lhh.t.t....pGsuclpschshhs.lP...s.-.shspllG.t.p.ccp..s.G.shh.hup...pspP.saVull.hEo..c.stsss.shaluLhKGpFoh..suhchpT..cp..cs..s..pss.plohphhs................t.tt....hh.h.h...t..t....................t...t...............s....................................................... 0 11 19 26 +3835 PF05100 Phage_tail_L Phage minor tail protein L TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 25.00 25.00 25.40 25.20 20.00 23.20 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.43 0.70 -5.20 8 1425 2012-10-02 17:50:33 2003-04-07 12:59:11 7 6 613 0 63 1256 146 186.40 62 86.69 CHANGED TshGu.-hhhFps...............E.pttGt........PlsWQGppYpsaPIpupGFEtsGcGousRPoLTVuNLhGhlouhspshssLVGApVlR+cTYA+FLDAsNFssG.NPsADPpQEh.lshWhlEQpot.su.posoFlLSoPs-hpGshlPuRphLusoCsWs....YR.G-sCGYsGsshhDcaspPToDsutDcCo+slouC+hR..ahtsNllsFGGFsuss+l ............................................T..GG.ERYFFCN................E.NE.K.GE................sVTWQGR.pYQsYPIpGoGFEhNGKG.ousRPoLTVSN....L.........a.........G........h......V.TG....M.AED......L......QSLVGuTVVRR+VYARFL.DAVNFV.sG.Ns..-....ADPE..QEl..hsRWhlEQhS-LoA..hoASFVLuT.PTETDG..AlFPGRIMLANTCh.....Ws..............YR..GDECGYs....GPA.VADEaDpPTo......Dl.p...K..................D+CS.KCh.p.GCchR...........s.s......l..sNFGGFLSINKL.................................. 0 5 25 45 +3836 PF04761 Phage_Treg Lactococcus bacteriophage putative transcription regulator Mifsud W anon Pfam-B_3898 (release 7.5) Family This family represents a number of putative transcription repressor proteins found in several Lactococcus bacteriophages. Horizontal transfer may account for the presence of similar proteins in Lactococcus [1]. 25.00 25.00 54.90 54.80 21.90 16.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.90 0.72 -4.20 2 33 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 27 0 3 26 0 56.50 75 97.85 CHANGED M..EpslsHhGpsl.QcSVEaYKcpL.hc.ShpFlpsSLlPQLaEWSNAYKAAVELTK ....MpQEKTINHLGQlVYQESVEFYKEKLSVa.SKDFLQNSLIPQLYEWSNAYKAAVELTK.......... 0 2 2 2 +3837 PF04985 Phage_tube Phage tail tube protein FII Bateman A anon COG3498 Family The major structural components of the contractile tail of bacteriophage P2 are proteins FI and FII, which are believed to be the tail sheath and tube proteins, respectively. 22.40 22.40 22.60 22.50 20.80 22.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.73 0.71 -4.81 53 1069 2009-09-11 14:55:36 2003-04-07 12:59:11 9 6 755 0 146 654 22 163.70 38 97.85 CHANGED slP+.pL+phNlFsDGpuahGplpplTLPKLstKhE-aRuGGM.sGslclD..hGh..-tL-sphshuGh.pt-llphaGhsshsustlpFpGuapppc.....upssslplshRGchpEl..DhGshKsG-csphphohssoYYKlsl.sGcsllEIDhlNhlptlsGsDhLus.hRpslGl ....................hlP+.+L+hhNlFh.D.G.p.shhGhlpslTLPKlocKh-pYRGGGM.sGuls..lD..hGL..sAL-spashuGh.pttlhpta....Gtssh.c.u..l...LRFsGuhpp-c.......u-s...s..lElhh...RGRhpEl..Dh.G-h..Kp.G..Es.s.ppphs.h.s.s.o.YaKLol..sG..csLhElDhlNh..l.h.l.sGsDhltp.hRsslGL................................................................. 0 28 72 114 +3838 PF05155 Phage_X Phage X family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.1); Family This family is the product of Gene X. The function of this protein is unknown. 20.80 20.80 21.00 20.90 20.10 18.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.82 0.72 -4.05 18 179 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 139 0 15 160 7 88.90 29 24.99 CHANGED shsphuspphschspthphhhthWppG.scl+shl...s+uTFaRacpcLhphGlDhuph.sls+tsss.hlPLs+lL-scss......phP-WYhpPshhhs ..................hs.tGshshs+tsphhphYhhLhppG.psl.+t..th.........s+soaaRahpc.Lh.phG.lshuph.slschps...lPhschlphc.s......phPs..aYhcPs....s........... 0 2 8 14 +3839 PF02912 Phe_tRNA-synt_N tRNA-synt_2_N; Aminoacyl tRNA synthetase class II, N-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 21.00 21.00 21.10 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.12 0.72 -4.25 53 4298 2012-10-01 23:07:44 2003-04-07 12:59:11 13 4 4261 7 933 2788 1981 72.50 33 21.23 CHANGED tpApshcsL-plRVcaLGKKG.lTplh+sLGsL.ssEER.thGthlNps+pplppslsp+cptLcputlst+Lt ...................p.tAsshpsL..cplRVpaL.G.K.K.G.plT.thh+s..lt.sL.ssEERsthGthlNps+ptlppsls.....t+pptLcpttlpt+Lt................ 0 317 621 796 +3840 PF02332 Phenol_Hydrox Methane/Phenol/Toluene Hydroxylase Mian N, Bateman A anon Pfam-B_15166 (release 5.2) & Pfam-B_3223 (Release 7.5) Domain Bacterial phenol hydroxylase is a multicomponent enzyme that catabolises phenol and some of its methylated derivatives. This Pfam family contains both the P1 and P3 polypeptides of phenol hydroxylase and the alpha and beta chain of methane hydroxylase protein A. 24.40 24.40 24.40 25.90 24.00 23.70 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.55 0.70 -5.06 23 1141 2012-10-01 21:25:29 2003-04-07 12:59:11 13 3 362 190 164 1148 22 186.10 33 63.63 CHANGED sWs.papppcs.......ttpsl+hscWpsacDPhchhYtsYVphpscp-ststu.lhsuhscs+thpph.sstat.sphptth.sslphsEauA.hupuph.schu..husshpssusa.tshDcl...Rahphphhhhcchstps.phshup...+thaps-.uhpuhRchh--hhh.stDhhEshluhshsh-sshsshhahtL.sptAspsGDpshuhLhsuhpsDpsRHsphusshlphhlppstp .......................................................................pttpppt..u.hh-uhspssu..th.cscah.pshKhhh.sslp.hEYtAhtu..u.hl.scph..sussh+suh.h..QslDEl...RHsQpphah.pta.t...pt.hshhp....pcha.sps.hhpss.+phF-Du.h.oussh.E.h.h.hu.lshshEhllTNlLhVsh.hphAAhNGDhsTsThhhSs.p.o.D.E.uRHhshGhps.lphhlppt..s................... 0 56 115 147 +3841 PF04663 Phenol_monoox Phenol hydroxylase conserved region Waterfield DI, Finn RD anon Pfam-B_4509 (release 7.5) Family Under aerobic conditions, phenol is usually hydroxylated to catechol and degraded via the meta or ortho pathways. Two types of phenol hydroxylase are known: one is a multi-component enzyme the other is a single-component monooxygenase. This region is found in both types of enzymes [1,2]. 18.40 18.40 19.40 19.20 17.60 18.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.38 0.72 -4.23 25 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 72 6 28 84 3 66.00 43 56.72 CHANGED s+DphENFpGtpLLYluW-cHLhFCAPhsLPlPPshPFGsLlppVLsssautHPDaA+IDWspspWh .....+Dtp-NFsu..LLYlGW-cHLhFCuPhshPlPPshsFusllppVLsssautHPDFA+IDWuplpWh....... 0 4 17 26 +3842 PF04674 Phi_1 Phosphate-induced protein 1 conserved region Waterfield DI, Finn RD anon Pfam-B_4596 (release 7.5) Family Family of conserved plant proteins. Conserved region identified in a phosphate-induced protein of unknown function [1]. 25.00 25.00 35.00 32.20 23.90 23.10 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.02 0.70 -5.23 21 304 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 33 0 177 296 0 240.80 43 84.05 CHANGED LpYHpGslL.sGsl.oVsllWYG+FsPsQ+ullsDFltSL.....Suss.............sssP..SVAsWW.+TscpYhptsssphs.......sslsLupQlhDpshShG+pLoctplppLAucu..u........tpsulslVLTusDVsV-sFChupCGhHutohsuhhth............phsYsWVGNStsQCPGpCAWPFHQPhYGPQus...........PLsuPNGDVGlDGMVhsLAohLAusVTNPauNGYYQ.GsssAPLEAAoACsGlYGsGuY.PGYsGpLLVDtsTGASYNAsGlsGRKaLlPAlaDPsoSsCsTls ......................................hpYHtG.s.lL...s..G..s....l.sVpllWYGpasssQ.......+..u...lltDFltSlosss.............s.tP...SVupWW.posp.tYh.ttsttt..................tplhlu..tph......Dp.th.Sh..GKsLpp.pl.plstpu..s...............tp.sulhlVLTutDV....s....V....p........s..........FChuhCGhHs.s..hs..u........................phsYsWVGNutpQCPGtCAW.PFt.tP.h..Y..G..Ppss.................sLhsPNGDVG..lDGMlhslAphLAsssTNP..as.suaa.p...G.......sssAP........lE............s...sssCsG.lY.GpGuh.s.GYsGplhsD.soGASYNs.Gh.pGR+aLlPtlasPtspsCts...................................... 0 30 114 149 +3843 PF03831 PhnA PhnA protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 23.00 23.00 24.00 24.40 22.00 18.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.70 0.72 -4.56 104 2536 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2436 2 438 1315 90 55.40 62 47.32 CHANGED hh+DuNGshLp-GDoVolIKDLKVKGoShs.lKpGTtV+sIRL...st..sssccI-s...Kl-...G .......hl+DANGNlLsDGDoVTlIKDLKVKGoS.ps.lKhGTKVKNIRL.....V....-GDHsIDCKIDG....... 0 121 255 351 +3844 PF02562 PhoH PhoH-like protein Mian N, Bateman A anon COGs Family PhoH is a cytoplasmic protein and predicted ATPase that is induced by phosphate starvation. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.32 0.71 -5.09 164 6190 2012-10-05 12:31:08 2003-04-07 12:59:11 11 24 4026 2 1327 5544 4974 200.10 47 57.55 CHANGED sI+s+ThsQppYlculp...ppD....lsFGlGPAGTGKTYLAVAhAVpuLpp..pcVcRIILoRPAV.EA...GE+LGFLPGDLp-KlDPYLRPLYDALa-hl........................sh-psp+hh..EcshIElAPLAaMRGRTLscAFlILDEAQNTTspQMKMFLTRlGhsS+hVlTGDlTQlDLP.ps.p.....pSGLhcAhclLcs.lc..uIuhsphsppDVVRHsLVp+.IlcAY- ..................................................................................................s.I+s+s.sQp.t.Y..l...p.slh.......pp-......llF..ul.GPAGTGK.T..........a.....L..A..l.....A..t.Al..p.A.L...pc......pp...l..c.....R...I..l....L.T..R..PAV....EA.............GEc..L.......G..F..L.P..G.D..l.p.....E..K..l..-.....P..Y...L......R.P...l..Y....D.....u..Lhph..l....................................................................u.h..-..p...h....p..c....l....h...-...+.....s.hI.E.l.......A..PL......A......Y...M......R...G.....R..T......L......s...c......AF..lIL.DEA.QNsThpQ......MKMF....LT.R.l....G..h.s..S...KhVl.......oGDl.o..Ql.DLP..ts..h...............pSGLtp..AhchLc..s...lc..........tlua.sph...p..t.pDVVRHslVtcllpAY........................................................................................................................................................................................................ 0 436 867 1125 +3845 PF02114 Phosducin Phosducin Mian N, Bateman A anon IPR001200 Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.53 0.70 -5.67 2 1240 2012-10-03 14:45:55 2003-04-07 12:59:11 11 6 686 7 485 1332 19 169.10 36 84.47 CHANGED ht.ApSpo...hE.shEG.uspTGPKGVINDWR+FK.LEoEpp-p.s+phccll+phS...........pppcpKD.pE+hStKMolpEhthhcKsh-DEphLppYR+QpMp-M+QpLphGPpat.VhEl.SGEtFL-hI-KEQK.ThIhVHIYEDGl.Gs-AhNushICLAAEYPhVKFC+l+uSshGAusRFopssLPsLLlYKuGELluNFlpVT-QLuE-FFssDlEuFLpEaGLLPEKEhhVLppsp........-pDhE ..........................................................................................................................................................................................................................................................................................................l....p..t..-..c.E..D.E...p..h...L..c...p.....YR........c.+pMp....-h........+.......p.....c......L.......s..........h.......s........s.....+..........a.....G......p.....l......t.........E...l.......p...........s............G-................pFL..........-...s.....l....................E.......................+.......p.......p........K..s...s.................s....l.......l...VHl..Y.-..-..u......l...p.....s..Cc..l......LNssL..s....s..L.As.cY.s.h...V+..FCK.I+.A...s.s...s...s.....s.t...........-+....Fss...cs..LPTLLVY+.s.G...-LlusFl...uls.-..ph..............................................................................................t............................................................ 1 154 244 369 +3846 PF00068 Phospholip_A2_1 phoslip; Phospholipase A2 Eddy SR anon Overington and HMM_iterative_training Domain Phospholipase A2 releases fatty acids from the second carbon group of glycerol. Perhaps the best known members are secreted snake venoms, but also found in secreted pancreatic and membrane-associated forms. Structure is all-alpha, with two core disulfide-linked helices and a calcium-binding loop. This alignment represents the major family of PLA2s. A second minor family, defined by the honeybee venom PLA2 PDB:1POC and related sequences from Gila monsters (Heloderma), is not recognised. This minor family conserves the core helix pair but is substantially different elsewhere. The PROSITE pattern PA2_HIS, specific to the first core helix, recognises both families. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.18 0.71 -3.86 37 1645 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 315 390 515 1676 0 114.20 33 69.11 CHANGED NLhQFtpMIppss..Gppshh.pYssYGCYCGhGGpGoPhDshDRCChsHDsCYucs.p.sGC.....pP+hstYoYpppsGs.lsCss.pssCpptlC-CD+sAAhCFups..sYN.pphh.....shsp..sc+C ................................lhphtpMIpphs....sp...pshh....p.....Y...tsYGC....YC.............G..........h...........G........G........p...........G.......p.....P..........hDs.....h..D.+CChs..HD..sC...........Ys........c..s....t...........t......ps.C..............pP...p....h.s.t..Y....s....a......p...h..........p........s...............s.......p.......lh......C........s..........s......p..........s........................Cpp.lCpCD+t.uAh.C..h.tps......sYs.pp.hh................................................ 0 195 222 307 +3847 PF04185 Phosphoesterase Phosphoesterase family Bateman A anon Pfam-B_1803 (release 7.3) Family This family includes both bacterial phospholipase C enzymes EC:3.1.4.3, but also eukaryotic acid phosphatases EC:3.1.3.2. 20.40 20.40 20.40 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.43 0.70 -5.26 20 2105 2012-10-03 20:55:17 2003-04-07 12:59:11 9 30 757 2 743 1807 98 335.40 23 58.08 CHANGED lcHlVllhpENRSFDHhaGphsssps............................st.h....h.....s..stt...sh.p.sa...s.t.s.shhsu.h.ps..Hth.t.t.thstGh.tD.h...........shs.shsMuhaptpclPha.atLAcsFslsDpaFsSl.GuTpPNRlalho.uss.....................cssGs................................................puss.ls.st..........tsashpshs-pLppsGloWslYppthsssh..............h.sh...................................cthhtssthtphthssFtpDlcpspLPpVSalhs.......sssps-...HPuass..tGspalspllcsLtusPp.WscTlllloYDEsGGaaDHVssPsssh.s..sthh.............s..s...........ht.hGLGsRVPshlISPa..s+sGhlscp.........paDHoSlLphlcppFs....ls ............................................................................................................................................................................................................lcplVlhh.EN+SFD.......phhG...h.....ssps............................................................................................................................................................a...............................................t...........h....t....................h.....t.....t..t....h..t.t...uh....s..............................................t...shsha............p..t..p..p..hPhh..hsL....AptaslsDsaa...........s....uh.....hs......sotP.N+.hhhho.uss..........................ss..t..........................................................................................................ss.ss.h...t....................sh..s.hp.s..h.s-...tL...pp..s.....G.....l.o.W...t..hY.t.pt.....h...........s....s....h......t.............t..................................h..........h........................h............t........................................................................................................................tt.t.h.s..t.th.....t.....h.....h.s.pFht.clt......s......s..p..L.P.p.V..oa..lhs......................sts...st...............H..............P..........s.............s..........s............................st........G........s.........h........altp.......ll.pslp...psPp...W..p....c....T....slllsaDE....s.s..G...a......aDH..V.s.s.s..h....ss.........................................................................................ht..G..R.lPh.hll.SP..a....s+s....Gh.Vspp.....................ha-HsSlL+hlcphaul.................................................................... 0 244 433 617 +3848 PF04272 Phospholamban Phospholamban TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Domain The regulation of calcium levels across the membrane of the sarcoplasmic reticulum involves the interplay of many membrane proteins. Phospholamban is a 52 residue integral membrane protein that is involved in reversibly inhibiting the Ca(2+) pump and regulating the flow of Ca ions across the sarcoplasmic reticulum membrane during muscle contraction and relaxation [1]. Phospholamban is thought to form a pentamer in the membrane [1]. 22.40 22.40 22.40 95.30 21.10 22.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.79 0.72 -4.52 2 32 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 28 15 20 25 0 52.00 90 96.41 CHANGED MEKVQYlTRSAlRRASTlEhs.QARQpLQpLFlNFCLILICLLLICIIVMLL MEKVQYLTRSAIRRASTIEMPQQARQNLQNLFINFCLILICLLLICIIVMLL. 0 1 1 4 +3849 PF00922 Phosphoprotein Vesiculovirus phosphoprotein Bateman A anon Pfam-B_1160 (release 3.0) Family \N 20.50 20.50 21.70 21.50 20.30 20.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.80 0.70 -5.19 5 426 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 22 22 0 209 0 151.00 51 89.39 CHANGED oR...LpphLKsYPcL-sTLu-I-EhE-ppc-cssp..FpsDusscsopPSYYLu--hc-S-oEsssEDspsl.splPPsspVE.a.ts.spDshhDDDl.....sVsFs-c.sWosslpKsssG+tsLcLouPpGLTstQhsQWpcoIcAls-pSKthRLupspIcsou-GLllpER.MTPslSsoo...................-.hPsSssSssoopslS.......o.posoPuspSssS...LGLP-lsus....uhh.+EhpLsPlsuSssPYcsTLs-LFGSp-ualsYsssGshSLK-AlIuGLK+KGIYN+IRI .......................................................................................................................................................................................................P.GLot.QpsQWhhTIcAVspSuKaWNLuECph.sSs-slIlKtR.hTPDs.pss.......................h.psps.pSEulS.........................slWsLppT.........shphpsK+AulpPLTloLc-LFuSctEah.hssptt.p.hthhlhGl+h++LaNp.............. 0 0 0 0 +3850 PF00343 Phosphorylase phosphorylase; Carbohydrate phosphorylase Finn RD anon Prosite Family The members of this family catalyse the formation of glucose 1-phosphate from one of the following polyglucoses; glycogen, starch, glucan or maltodextrin. 19.00 19.00 19.10 19.00 18.60 18.90 hmmbuild -o /dev/null HMM SEED 713 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -13.21 0.70 -6.65 9 4954 2012-10-03 16:42:30 2003-04-07 12:59:11 15 27 3182 252 1289 3940 903 590.10 40 79.46 CHANGED ALhpLGlsLEELhEpEpDAGLGNGGLGRLAACFLDShATLsLPuaGYGlRYcYGhFcQpIscGtQsEhPDtWLchGsPWEhpR.-hphsVcFaG+Vp....psupt..hcW.psphlhAlAYDhPlPGYcssssNolRLWSA+s.sp-FNLscFNsGsYlsAlpp+ppuENlocVLYPsDsphpGKELRLKQpYFlVuAoLQDIlpRaKpsc........psasshscKVAIQLNDTHPsLuIPELMRlLlD-EphsW--AW-lTs+TFuYTNHTlLPEALE+WPVcllcpLLPRHhpIIaEINcRFLptVtppaP.sDhc+hcphSll-E......t..+pV+MAaLslVGSHsVNGVAplHS-llKpclFsDFhplaPsKFpNKTNGITPRRWlthsNPuLutllsctlG.-cWhssL-.LpcLcpasDsstFhcphtplKptNKh+LAcaltpchsVplNPpuLFDVQVKRIHEYKRQLLNlLHVIshYpcIKcsst...tphsPRssIhGGKAAPuYahAKpIIKLIssVA-VlNsDPtVsshLKVVFlsNYsVSlAEhlIPAuDlSEQISTAGTEASGTuNMKFsLNGALTIGTLDGANVEItEclGc-NhFIFGhps--VpplcccG.YcspcaaptDscLcpVlppIpsGhFuP.psctFcsllsS......l.asDhYhVhtDFtuYl-sQcpVDphY+spcpWsctuIhNhAssGhFSSDRTI+EYAccIWslcP ....................................................................................................................................................................................................................................................................htthsh.s.lpcl.-.........E.DsuLGNGGLGRLA...A..C..FlDS.hAolshsuhGhGlpYcaGhFcQth....s..G.QhE...P...-.W.....h.p........t...sW...h.c.....p...........hth...V..ta.h.G..h........................h......lhu.saDhs.......lsG...Yps.t...s.spsLRLWpups....t...h...s.h..t..Fs.t.G...t....a.tu.tpt..scslotlLYPsD.sp....tG+.L.RLhQpYFhsusslps.Ilcca.h..t..t...........t.sh.p..ph.s-..h......slp.lND..THPs.lu.IPELh.Rl.L.....l.D..pct.hs.W..-c..Aapls..pps.......huYTNHTlhsEALEpWslphhpp.lL.PRhh.p..II.cI.s.pp........ah.t................l.p.t.....p..h....s.....s..s...........t....h...h..tp..h.ull.c.-.................................s.....t.p......V+MAhLsl.hsu.a.pVNGVutLH...oc...l............l......pp...sh.h......t.c..a...h...p....l.....a.....P.....p......+......F..p......N......h......TNGl.........T...........R.....R..................W..................l......................h...............s...NP.........tL......u..t....l.l....s.................c................t.......l...........G.............c........p........W......h.........p.....-...................h.................s..............p..............L...p.....p........L...........p....p.........a.........s.............-.....c..........s...........t....h......t.........p.......p............h........t...p........l.......K....................h.........p............N............K..h...........+......L.........A.....p..........a.....l.........p.........p..............p..............h.................s...............l..........p.................l............s....P.....p............u.............l.....F.........D......l............Q............l......KRlHEY.KR.QhLNh..L..al.....lt..hYp.c...I+.p.s..sp.............hphhPR.s.h.lFuG..K..AAPuYhhAKpII+.hI.splAchlNsDP.tlsshLK.V.......VFl.NYsVShAch..lI..............PAuDlSEQIShAupEASGTuNMKhhLNGALTlGTlDG...................ANV..................E..hh-....tl.G.....t-N.h.al.F.G..p..s-p.V...t.h....h...t.p..s.................Y....p...s....h....phh.p....ts...tlctslp..l...s.G..h.h...s.s........p........t..h....aptl.h.pp........h......t.....D.ahlhtDatsYhcs.p...................cpltt..Y..........p.s.t..........p.tWhphs.lh.....NhAp.G.hFSSDRoItpYspcIWph........................................................................................................................ 0 432 793 1068 +3851 PF00124 Photo_RC photoRC; Photosynthetic reaction centre protein Sonnhammer ELL anon Prosite Family \N 21.90 21.90 22.10 21.90 20.10 21.20 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.54 0.70 -5.36 55 8728 2009-09-11 05:37:13 2003-04-07 12:59:11 14 5 3408 262 234 6932 2419 232.40 49 91.25 CHANGED lYlGahGlhol..hhuhluhhlIuhshhsss........sassh..hpphahlulpPPsspYGLu.hsPLtE.............GGhW.hsshahssulluWhlRpachu+pLGhGhHlshAFuuAIhhalsLslIRPlhMGSWucuhPaGIhsHLDWssshuhpYsNhaYNPFHMLuIuFlaGouLhhAhHGuhILussphs.sscplc..hs.t.usE+ts........hFaR.hh....GashstcuIHRauhahAlhsshhuulGIllo..................Gsll.....csWhpWhshashsPha ............................................................................................ss...htphh.hul.ssusuhGhp.hhPlWEsts.s...cWh..GG.apllshHhhlGlhsahsRpaELuh+LGMRPaIslAaSAPl..AAhsuVFL.lYPlGQG.SaS-uhPhGIuupFsFhlsFQucH.NlhhpPFHMLGVAGVFGGuLh.uAMHGSLVpSoL.....h......h.E......s.s-.......st........otshuachuQEEETYsh......VsApsaFucLIh......suFN..NoRuLHFFhhhaPVsGlWho.....ul.Glssh..................u..h.....tsa.ph..h.......s.................................................. 0 46 140 195 +3852 PF01895 PhoU DUF65; PhoU domain Enright A, Ouzounis C, Bateman A, Cerutti L, Dlakic M anon Enright A Domain This family contains phosphate regulatory proteins including PhoU. PhoU proteins are known to play a role in the regulation of phosphate uptake. The PhoU domain is composed of a three helix bundle [1]. The PhoU protein contains two copies of this domain. The domain binds to an iron cluster via its conserved E/DXXXD motif. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.15 0.72 -3.54 388 11541 2012-10-02 11:27:25 2003-04-07 12:59:11 14 27 3795 24 2708 7226 2003 87.20 22 51.69 CHANGED ltchuphst.phlppuhpu.h...hpp.D.....phA.ppl.h..pt....-cplDph........tppl..........p.....phhphh.tt..pp..sh.....................hp....h..h.....hht..lspslERluD.aussIAchs ..................................................pMuphst.ph....lp.pul.pu..h.........hpp...D....h........chA..pcl..h...pp..........Dcp.lsph.......pppl.............pc....phhphh..sp.......ppshtp...................lph...h...hs...hlp...hspslERlGD.autsIuch.h..................... 0 935 1793 2290 +3853 PF00502 Phycobilisome Phycobilisome protein Finn RD anon Pfam-B_10 (release 1.0) Domain \N 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.93 0.71 -4.47 22 4056 2012-10-01 21:46:00 2003-04-07 12:59:11 14 6 1323 188 272 2702 388 96.30 32 84.97 CHANGED TpllssADspG+ahosu-LpulpuhlppustRLcusptlssNtssIlppAuptlascpPplhssGGssYssRph..AtClRDhcaYLRhlTYulluGssusL--hslsGl+EsYpuLGlPsushsculphhKss..shphlu......................upsssEhssYFDYslsulu ...............................................................................................................................................................hh...............................h....thths.......................................................................t..................................................... 0 37 167 246 +3854 PF02333 Phytase Phytase Mian N, Bateman A anon Pfam-B_14843 (release 5.2) Domain Phytase is a secreted enzyme which hydrolyses phytate to release inorganic phosphate. This family appears to represent a novel enzyme that shows phytase activity [1] and has been shown to have a six- bladed propeller folding architecture [2]. 18.90 18.90 19.30 18.90 18.60 18.60 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.35 0.70 -5.75 2 586 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 259 7 131 592 319 193.30 42 71.03 CHANGED MphsKThLLosAAGLhLohsAsSspA.Hhls-.aHFpVsAtsET-PVsousDAADDPAIWlc.KpPppSKLITTNKKSGLsVYsL-GK.LHSYchGKLNNVDlRYDFPLNGcKlDIAAASNRSEGKNTIElYAIDGcpGpLpSITDPN+PIuosIsEVYGFSLYHSQKTGtaYAhVTGKpGEFEQYElssstpGYloGKKVRtFKhNSQTEGhsADDEYGsLYIAEEDEAIWKFsAEPsGGSpGpVlDRAsGcHLTsDIEGLTIYYAssGKGYLhASSQGNsSYAhYERQGpN+YVAsFpITDG.chDGTSDTDGIDVLGFGLGPcYPaGlFVAQDGENIDpGQtsNQNFKhVsWEpIAp+lG.hPplpKQVsPRKhpDRS .....................................................................................................................................s.DA.....A...DDPA....lWlpPp......sP......s...p...Sh...lluTsK....c....u......G..Lh..VY..DLsG+...l..Q.....................h..........s...s..........G...+.h...N....NV..DlR....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s.................................................................................. 1 46 81 114 +3855 PF04833 COBRA phytochel_synth; Phytochel_synth; COBRA-like protein Waterfield DI, Finn RD, Bateman A anon Pfam-B_4078 (release 7.6) Family Family of plant proteins are designated COBRA-like (COBL) proteins. The 12 Arabidopsis members of the family are all GPI-liked [1]. Some members of this family are annotated as phytochelatin synthase, but these annotations are incorrect [3]. 19.00 19.00 19.10 21.20 18.90 18.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.41 0.71 -4.49 22 265 2009-09-11 09:29:15 2003-04-07 12:59:11 10 3 41 0 152 243 23 163.40 49 35.02 CHANGED YlAhVTlpNap.ht+lps..WpLuWpWt+cEhIaoMpGApso..-puDC.....upah..hshs+...sCc+cPslVDL.PupshspQluN...CC+sGsl.shu.DPupStSuFQhpVuph.....sssppslpsPpNFplpu.....sPsYsCGsshhVsPT......pF.sssG.ppsTpAl........hTWpVsCshoQ ..................YlAhVTh.NaQ.aR.+Ip.s.PGWpLGWsWAK+E.lIWSMhGAQsT..EQGDC...........S+FK........ushPH....sC++sPs.lVDLLPGsPaNpQ...l.uN...CCKuGVlssa...sQDP.u......susSuFQloVGhu......G.T..oN.c.TV+hPpNFTLtu....PGPGYTCGssthVs.sT......pFhosDt..+RpTQAl......................hTWs.VTCTYSQ.......................... 0 22 94 125 +3856 PF00360 PHY phytochrome; Phytochrome; Phytochrome region Finn RD, Mistry J, Hughes J anon Prosite Family Phytochromes are red/far-red photochromic biliprotein photoreceptors which regulate plant development. They are widely represented in both photosynthetic and non-photosynthetic bacteria and are known in a variety of fungi. Although sequence similarities are low, this domain is structurally related to Pfam:PF01590 [1], which is generally located immediately N-terminal to this domain. Compared with Pfam:PF01590, this domain carries an additional tongue-like hairpin loop between the fifth beta-sheet and the sixth alpha-helix which functions to seal the chromophore pocket and stabilise the photoactivated far-red-absorbing state (Pfr) [1]. The tongue carries a conserved PRxSF motif, from which an arginine finger points into the chromophore pocket close to ring D forming a salt bridge with a conserved aspartate residue [1]. 20.30 18.00 21.00 19.50 19.90 17.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.12 0.71 -5.15 80 3555 2012-10-02 14:34:25 2003-04-07 12:59:11 15 76 2538 22 318 3683 22 152.50 56 28.28 CHANGED lhchps...hLs-hlh+..chshulhs..psPslhDLlcs-GAALhapsphhplGtTPscpplc-lspWLp.p..t.sss......shu.TDuLscs.YP.sAtslu-sssGhhAlpIo.ppch...lhWFRscssppVpWGGs.ccssphss...st+hpPRsSFcsahElV+s+SlPWcshEh-.....AhcsLphhLhsshh.cp.....ps ..........................................................................................................................IL+TQTlLCDMLLR.....DuP.l.u.IlT...QSPNlMDLV.KCDGAALaYc.sc......h........WhL.Gl..TP..o...E.sQI.+DIsp.WLhch......HssoT.............GLSTD...SL...h-A..GYP....uA..s..s..L.G...Du...........V.C.G...MAAs..pI..o....s......+...D..h........LFW.FRSHTApEl+WGGAK.Hc.P.t..-cDc............up+..M...H.PRoSFcAFlElV+.+ShPWp......................................................................... 0 89 180 254 +3857 PF03284 PHZA_PHZB Phenazine biosynthesis protein A/B Mifsud W anon Pfam-B_4020 (release 6.5) Domain \N 20.50 20.50 20.80 20.60 20.30 19.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.95 0.71 -5.15 8 88 2012-10-03 02:27:23 2003-04-07 12:59:11 8 2 31 21 8 134 0 153.10 69 98.67 CHANGED Mscstssp.sFsDphEL..Rp+NRtTVEpYMcT+Gp-RLRRHcLFTEDGsGGLWTTDoGpPlVhpG+-+LAcHAhWSL+CFPDWEWaNV+lFET-DPNHFWVECDG+GKILFPGYPEGYYENHFlHSFEL-sG+IKRNREFMNPhQQLRALGIPVPpIKR-GIPo ..................................................................................M.spth.p.shpDt.EL..RR+NRAT.V.E+YMp.hKGt.-RL.pR.H.pLFVEDGCuG...WTT.-oGpPlVhRG+-pL.tc.hA.W....+C.....FPD.W..E.W.aNl+IFET-DPNHF.WVECDG+GKhLhPGYPEG.YhE..NHalHSFEL.Es.G+IKRNREFMNshQpLRALuIsVPpIKR-GIPT........... 0 4 5 7 +3858 PF00388 PI-PLC-X Phosphatidylinositol-specific phospholipase C, X domain Finn RD anon Prosite Family This associates with Pfam:PF00387 to form a single structural unit. 22.10 22.10 22.20 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.82 0.71 -4.80 140 2890 2012-10-01 22:17:21 2003-04-07 12:59:11 14 118 764 50 1401 2585 81 145.10 30 19.62 CHANGED MspPLo+YFIsSSHNTYLsG.............................sQlt.....up.SSscsYhpsLppGCRClElDsW.......DGsss...p............................................................................................................P........llhH...GtThT...op..l.Fc-VlcsI+caAFhsS.p...........YPlIlSLEsHC.oscpQ..pphAchhpplhG-hLhspshst................ssstlPSPppL+tKILlKsKp ......................................................................................................s.Plo+YaIso..o...HN..oYLh.s.............................pQ.h.h.......u.p...opsp..sY......hps..L.....pt.GsRslE......l...D.s.a.........cus..cs....p..............................................................................................................................................s......hlhH........Gh...s...hs...................sp.l.....hp...-................Vl.c.s.Ipc...a.u...F..tss..p.....................hPl..Il..S.l...Es......+s.....s.p..pQ...p.t...h.....s..p...........hh...pp..l......aG-...h...Lhspshp............................ssptlPospp...L+tKIllps+..................................................................... 0 360 614 1002 +3859 PF00387 PI-PLC-Y Phosphatidylinositol-specific phospholipase C, Y domain Finn RD anon Prosite Family This associates with Pfam:PF00388 to form a single structural unit. 20.20 20.20 20.30 21.00 19.60 20.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.76 0.71 -4.11 86 1860 2012-10-01 22:17:21 2003-04-07 12:59:11 14 98 351 26 1049 1720 51 113.40 41 11.94 CHANGED -LScLlsYspulp....FcsFpt.sppp....hhchtShsEspspphhc.pp..stp..hlcaN++pLoRlYPpG...hRl.cSSNa.sP..hWssGsQh..............VALNaQT.Dh.sMpLNpuhFp.N.......GpsGYVLKPshhR ...................................................................LSsLl.Yspss.p......ap.s....hp........pt.p..........t..................hhchsShsEsputchh........p..pp...............stc..al..p..aN..p+pLoRlYPpG...hR.l.DS...S.Na...sP..hWssGsQh..................VALNaQT.......sh..sMpLNpuhFp...N.......G..ps..G...YVLKPphh........................ 0 250 418 726 +3860 PF02192 PI3K_p85B PI3-kinase family, p85-binding domain SMART anon Alignment kindly provided by SMART Family \N 21.00 21.00 21.10 23.30 19.70 20.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.34 0.72 -4.56 13 271 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 91 6 146 207 0 76.20 47 7.94 CHANGED hlplcVsppsTlppIKp.LWKpAcphPLaphLt-.suYhFsslNppApcE.ELpDEo..RRLCDlRPFhPlL+LlpRpssp ...........hlslcssRpAoLpsIKp..LW+cAcphPLap..hLp...-.suYlFssVsQ..p.AEpE..EhpDEo....RRLCDl+s.F..hPlL+llp+pGs....... 0 32 44 93 +3861 PF00794 PI3K_rbd PI3-kinase family, ras-binding domain SMART anon Alignment kindly provided by SMART Family Certain members of the PI3K family possess Ras-binding domains in their N-termini. These regions show some similarity (although not highly significant similarity) to Ras-binding Pfam:PF00788 domains (unpublished observation). 20.80 20.80 20.80 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.17 0.72 -4.10 12 588 2012-10-03 10:59:06 2003-04-07 12:59:11 13 31 130 78 323 502 1 105.50 25 9.04 CHANGED ssp.hPcsl..hh.ssplhlslhhp..........ttp.shThpsssssostplhtphlpK...phshhpps-ssp........DalL+VsGppEalh.ssasLtpapaIppplppstpscLsLhp.st ..............................................s.....t.l...h...p..s.p.lhlslhhp..............s.ppshThpssssss.stplltpslpc.......ppsh...ht.ps.-.tt.............-YlL+VsGpcEYLh.s......c......as.Lspa.............p..a...IppClp.sp......cLhLhp...s............... 0 83 113 201 +3862 PF00613 PI3Ka Phosphoinositide 3-kinase family, accessory domain (PIK domain) Ponting C, Schultz J, Bork P anon SMART Family PIK domain is conserved in all PI3 and PI4-kinases. Its role is unclear but it has been suggested [2] to be involved in substrate presentation. 29.00 29.00 29.00 29.20 28.80 28.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.20 0.71 -5.11 16 1353 2012-10-11 20:00:59 2003-04-07 12:59:11 15 53 314 96 849 1260 21 179.80 26 14.57 CHANGED cchcs.sppc+cplcpIlshDPhspLotcE+phlWpaR.a.hlhshscALs.......KhLh.SVcWsshppsspshpLhtc...Ws...lcsscALELLsssass....hVRsaAVpsLEp.hsD-ELppYLLQLVQsL+YE.........................................................hpcShLs+FLlp+ALpNh.clGpFhaWaL+uEh......aspsh.spRFsslhEtahcsh.spthtpltp ..........................................................................................h.............l....l.h.t.h....s.........l.p....p-.pphlW.p.hR....h....hh.....p..............p.uLs...........................+hLh..s.s.p....a..pp..........p....p..s.t...p....h.............h.p.l.L..t..p......................W..s.........l.ss.p..A.LpL.Ls..s..p..ass.....h.............VRpa...AVppL.c.p..hs.c-c...LhhYL.QLVQ.A..L.K...aE............................................................................................................................................h.p.u.....sLscFLl....p.R.............Ahp.s...plu+hlaWh.L.....p...s-h.................cst..ph...t..p.h..t.lhpthhp...........hh.................................................... 0 284 422 654 +3863 PF02226 Pico_P1A Picornavirus coat protein (VP4) Bateman A anon Pfam-B_345 (release 5.2) Family VP1, VP2, VP3 and VP4 for the basic unit that forms the icosahedral coat of picornaviruses. Five symmetry-related N termini of coat protein VP4 form a ten-stranded, antiparallel beta barrel around the base of the icosahedral fivefold axis [1]. 20.80 20.80 21.60 21.00 20.30 20.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.30 0.72 -4.24 31 6859 2009-01-15 18:05:59 2003-04-07 12:59:11 11 25 444 79 0 4094 0 67.40 60 13.37 CHANGED GAQVSTQ+oGuHETustAssGSoIsYTNINYYKDuuSsSAs+QDFoQDPuKFTcPVhDlhpcshPsLp ........GAQVSpQpsGoHEss..Ns.AosGSsIsYhNINYYKDuASuuAu+QDFSQDPSKFT-PVKDl.hpchhPsLp......... 0 0 0 0 +3864 PF00947 Pico_P2A Picornavirus core protein 2A Finn RD, Bateman A anon Pfam-B_138 (release 3.0) Domain This protein is a protease, involved in cleavage of the polyprotein. 21.00 15.00 21.20 15.10 20.70 14.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.54 0.71 -4.33 9 2140 2012-10-02 13:45:52 2003-04-07 12:59:11 14 29 270 3 0 2018 0 113.10 63 8.57 CHANGED NhHLAT.pDapNslh.sasRDLLVsposApGsDpIARCsCsoGVYYCcS+pKaYPVshpsPshhhlcts-YYPtRaQophLluhGhuEPGDCGGILRCpHGVIGIlTAGGpGlVuFADlRDLhhlE- ....................................NRHLATpsDWpNsVWEsasRDLLVooT...sApGsDT.I.ARCs...CpoGVYY.CpS+pKaYPVSFpt....Psh..h....l.ptSEYYPtRYQSHlhlutGauEPGDCGGILR.CpHGVlGIlosGGpGlVuFuD..lRDLhhh--.......... 1 0 0 0 +3865 PF01552 Pico_P2B Picornavirus 2B protein Bateman A anon Pfam-B_214 (release 4.0) Family Poliovirus infection leads to drastic alterations in membrane permeability late during infection. Proteins 2B and 2BC enhance membrane permeability [1,2]. 20.00 20.00 20.20 21.70 19.40 19.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.26 0.72 -3.84 22 1455 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 271 0 0 1484 0 99.00 57 5.36 CHANGED QG.loDYlppLGsAFGsGFTspIucplsplpshh..psslscKllKtllKllSALVIllRNpsDhhTVhATLALLGCsGSPW+aLKpKlCphhtl.Palp+Qu ..................QG.loDYIcpL....GsAFGo....G....FTs....pIs-pVs....t..Lpshl...........psslsEKlLKsLlKIISALVIllRN.ppD..h.s..T.lhATL.ALl.GCs...uSPWpWLKpKssphLtI.Phsp+Qu....... 0 0 0 0 +3866 PF00345 PapD_N pili_assembly; Pili_assembly_N; Pili and flagellar-assembly chaperone, PapD N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain C2 domain-like beta-sandwich fold. This domain is the n-terminal part of the PapD chaperone protein for pilus and flagellar assembly. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.55 0.71 -4.33 154 7954 2012-10-03 16:25:20 2003-04-07 12:59:11 15 5 993 92 559 3765 45 119.70 30 51.71 CHANGED ulslssTRlIass..spc.psolplp.Nsss.tPhLlQsWl......-s.........tt......t.ptps.P......Fllo.PPlhRlcssppptlRl...hp..sss....LPpD...R.ESlFalNlppIPsts......................ps...plplAl+s+lKLFaR.P...suLt ..............................ulslssTRlIas.u..s.p.+...ps.slplp.Npss..psh.....Ll.QoWl..................-s......................tss.ppps.s............Fl.lo.PPlh.+lp.stpppplRl..hp....ssp.....LP.pD.R...ES.lFahNlcpIPsts..............p..p........spNsLplA..h..po+lKLFaR.Ptsl....................................................... 0 63 176 377 +3867 PF02753 PapD_C pili_assembly_C; Pili_assembly_C; Pili assembly chaperone PapD, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain Ig-like beta-sandwich fold. This domain is the C-terminal part of the pilus and flagellar-assembly chaperone protein PapD. 22.80 22.80 22.80 22.80 22.50 22.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.14 0.72 -4.14 155 6654 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 783 91 357 2906 15 65.10 26 28.05 CHANGED lplpNPTPYalohssl.pls...........spplpss......MlsPhushphslssssss......plpaphINDaG.........uhsptpt ......lplpNPTPYYlols....s.l..pss..............sts.l.pss.......MlsPhust.shslssssss..............plpaphI..NDYG.........uhh....h........................ 0 28 98 239 +3868 PF00114 Pilin pilin; Pilin (bacterial filament) Sonnhammer ELL anon Prosite Domain Proteins with only the short N-terminal methylation site are not separated from the noise.\ The Prosite pattern detects those better. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.84 0.72 -3.34 147 1425 2012-10-03 10:38:27 2003-04-07 12:59:11 14 15 722 48 226 1319 90 110.70 23 71.92 CHANGED ARuQloculuhhsuhKosl.Ehhhsp........Gphssssss.............hGhss.sss..ht.......shsssstsss.....................usIssshssssst......lsGpslslsts..ss..............................ssWsCss.......................sl.sscahP..s.uCp .............................+uphspslt.hsu.hKsslt-hhhsp................Gth..ssssss..................hGhss...sss.hps.............Vpslsls..s........................GslTsshpssss........lpup.plsLtsp...ts...........................ssss.pW..t..Csss............................sl.ss...chh...PssCp.................................... 0 58 120 168 +3869 PF05137 PilN Fimbrial assembly protein (PilN) Bateman A anon COG3166 Family \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.43 0.72 -4.15 214 1587 2012-10-02 17:03:51 2003-04-07 12:59:11 8 5 1422 0 398 1103 323 79.40 23 35.54 CHANGED pllspL.sphlP.-s.lalsslp..hpss.......................plplsGhut......s.splushhcsLc.p........................................Sshhp..sspltshppss....................tttp.......htp............Fslpspl ........................hlppL.sp.hlP..-t..saLo..slp....h.pss................................................................................slplpGhup.........o.sslsshhps.Lc..p..........................................sshap.......ssphtshppps......................tps..............hhp............Fplpht.h..................................................... 0 129 253 341 +3870 PF04350 PilO Pilus assembly protein, PilO Mifsud W anon COG3167 Family PilO proteins are involved in the assembly of pilin. However, the precise function of this family of proteins is not known. 21.80 21.80 21.80 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.82 0.71 -4.31 75 807 2012-10-02 17:03:51 2003-04-07 12:59:11 8 8 747 2 266 706 143 144.00 26 68.03 CHANGED L...cptpscEppL+ppappKtppAsNLsta+pQhtpl-ppastlL+QLPscsElsuLLsDIspsGhssGLpFpphc.ts.Els..+-FYsElPIsIpVsGsYH-lGsFsuslAsLPRIV..oLcshslsss....pss..........tLshshhA+TYRah- ........................................................thptpEtpL+pph.pp..K..t...t...ps....s....sL..p....th.......c.....p.......Qlt...p..h...c......pph.......s....t.hl....c.......pLP....s.c.s....E.hssLLpcl..spsuhs......sGL...p.......h.pp......l......p.......t...............s.........p.h.s...............p.c......a............a..h.c.l.P.....l..slp..ls.G.sYaplup...F.sss.l..u...s.LsRIl..olcslslpts....spp.................ttLphphhApTYph...................................... 0 94 174 231 +3871 PF04351 PilP Pilus assembly protein, PilP Mifsud W anon COG3168 Family The PilP family are periplasmic proteins involved in the biogenesis of type IV pili [1]. 21.20 21.20 21.20 21.30 20.90 21.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.54 0.71 -4.30 54 679 2009-09-11 22:48:09 2003-04-07 12:59:11 8 3 667 8 183 533 67 140.50 27 79.17 CHANGED DLcpahscs+scspspIcPLPchpsapshsYsuss.hRsPF........sPhclt.......t.ttpspsslpPD.....hsRtK...EhLEsasL-sLpMVGol...s.pssthaALlcsssGslaRV+sGsYlGpNaG+lspIo-splplhElVsD.Gp.GsWlERsssLtLp .....................................................Lptahtphctpst..l.tsh..........h.shsYps.tt..h+..sPF....................s....phh.................thttsssht.P-..........p.Rtc...psL..EpasL-sL+hlGsl..........s.p.s..sphhALl.p....ss...sup.lapVpsGsYlGpNtG+lspIs..c....splplhEhl..D.u.t.GsWhcR.spLtL..................... 1 48 110 154 +3872 PF04697 Pinin_SDK_N pinin_SDK_N; pinin/SDK conserved region Waterfield DI, Finn RD anon Pfam-B_4141 (release 7.5) Family SDK2/3 is localised in nuclear speckles where as pinin is known to localise at the desmosomes where it is thought to be involved in anchoring intermediate filaments to the desmosomal plaque [1,2]. The role of SDK2/3 in the nucleus is thought to be concerned with modulation of alternative pre-mRNA splicing [4]. pinin has also been implicated as a tumour suppressor. The conserved region is found at the N-terminus of the member proteins [3]. 25.50 25.50 26.40 26.30 25.20 25.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.61 0.71 -4.13 2 80 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 51 0 41 78 0 126.30 73 20.63 CHANGED MAVAVRoLQ-QLEKAKEuLKsVDENIRKLTGRDPs-lRPhQsRhLslsGPGGGRGRG..LLRRGhSDSGGGPPAK.RDl-GAl.RLuG-pRsRR-SRp-SDsE.DDDVKKPALQSSVVATSKERTRRDLIQDQs .......MAVAVRoLQEQLEKAKESLKNVDENIRKLTGRD..P..NDl.RPhQ....ARlLu..L..oGP.G..GGRG..RGu..l.LL.RRGFSDSG.G...G..PPAK.QR..D.LE.G...AlS...RLGGERRTRRESRQESDsE.DDDV..K........K.PALQSSVVATSK.ER.T.RRDLIQDQN................................................... 1 10 12 22 +3873 PF00224 PK Pyruvate kinase, barrel domain Finn RD, Griffiths-Jones SR anon Prosite Family This domain of the is actually a small beta-barrel domain nested within a larger TIM barrel. The active site is found in a cleft between the two domains. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.06 0.70 -5.83 13 7047 2012-10-10 15:06:27 2003-04-07 12:59:11 16 29 4703 229 1953 5106 2651 326.30 42 68.66 CHANGED hR+TpIlsTIGPu.................opoVE....pLpcLhcAGhNVsRhNFSHGoaEYHtssIcNVRcutcphut.......VAIALDTKGPEIRTGshps.....tlplssGcchhlosDtsh.tsssscchlalDYps...lscslpsGshIhlDDGlloLpVlphsssp.plhscspNsGslus+KGVNLPGscVDLPALoEKD+s.DL+FGVcp.s.VDhlFASFlRpAsDVpplRclLG-cG.+pIpIIuKIENppGV...sN...hDEILcsoDGlMVARGDLGlEIPA.cVhlsQKhlIuKCNhsGKPVIsATQMLESMhpNPRPTRAElSDVANAVLDGsDCVMLSGETApGpYPsEuVphMpclslpAEpulsphsla ...............................................................................++TKIVsTl.G....P.A.....................o.p.s.....-..hl.ppll.pA.G.h.NVhRhNFS.H..G..s.a......--Ht.pphppl......R.p.s.tp.p..h..u..cp...................lu..ILh..Dh.................p........G.........P.....c.....I...R..su......p..........h.....c............s.........G....t......................lpLpt.G.pp..h..h...l.......s........s..........c.............t......s..h......t........................................p..c.............p..l.u.l..s...Y..p.s.................l.s.p....Dl.......p......s.....G..s.....hl......L........l.........D.....D........G....hl...p..........Lp.....Vh...p..l....p..........s.................s...........p.......................l..................h.s....c...V..h.....s....u...G........L........u........s.p......K...Gl.......N.......l........P...Gs.....s..l..s.....l..PA..L.oEKD..pp.D.l.p..a..u..h..ct........s...lD.alAhSFlRpupDlt....p....l....R....c.....l.........l........c....p...............p..........G......p.........p..........l.........p.........l.l....uK....IEptE...ul...cN....hD-I..l.....c.....s.....o.....D......G.....lMVAR...GD....L....G....VEl..P......h....E...c.......V.shl................Q.........Kt...l...Ic+s.p..................p.h.sKsVITATQ..ML-S.......Ml...psPpPTR..AEsoDVAN.A....llD.G.TDAVMLS.uEo..AsGpYPlEuVpsMu.pI.s..hpsEp........t..................................................................... 0 656 1238 1641 +3874 PF02887 PK_C Pyruvate kinase, alpha/beta domain Finn RD, Griffiths-Jones SR, Bateman A anon Prosite Domain As well as being found in pyruvate kinase this family is found as an isolated domain in some bacterial proteins. 21.70 21.70 21.70 21.90 21.60 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.29 0.71 -4.54 173 6414 2009-01-15 18:05:59 2003-04-07 12:59:11 11 22 4576 233 1792 4498 1547 116.60 30 23.66 CHANGED s-...ulutuAspsApc....ls.spuIlshTpSGpTAphlu+hRPs.hP...IlulT.ptp................st+pls.lhhGVhs.......................h...hhsptp.s.........s-chlppu.lch...............uhcpGh.lpsGD..hlllsuG...hstsG..sTNhh+lhp ..................................................................-uluh.uuspsApc......Ls...sp..uIlshTp.S.GpTA+..hl.S.+..aRPs....sP......IlAlT.scp...........................................st+p..ls..Lh..hGVhP.......................h.....hhc.phs..s...........................s-ph.hpt...u...hph...............shc.p.Gh..h......pp...G.D....hl.l.lsuG......s...h.....u....tsG......sTNhh+lh.h............................................. 0 593 1132 1494 +3875 PF02827 PKI cAMP-dependent protein kinase inhibitor Bateman A anon Bateman A Family Members of this family are extremely potent competitive inhibitors of camp-dependent protein kinase activity. These proteins interact with the catalytic subunit of the enzyme after the cAMP-induced dissociation of its regulatory chains. 25.00 25.00 30.50 30.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.62 0.72 -3.86 9 148 2009-09-11 05:02:57 2003-04-07 12:59:11 11 1 62 106 88 142 0 68.20 41 81.75 CHANGED TDVEoshuDFhuSuRTGRRNAlPDI.s.SsAssso.u-LslKLutLsl.Ks-Gcpcspcssp-psscspsEu.p.c ........h-VEo..shu-FluouRTGRRNAlPDItu.SpAssso..s-Lsh+LusL.s.lpps-u..ptc...sppsspct.tpspspt....t.................... 0 14 19 43 +3876 PF02173 pKID pKID domain Bateman A anon Pfam-B_1547 (Release 4.2) Family CBP and P300 bind to the pKID (phosphorylated kinase-inducible-domain) domain of CREB [1]. 25.00 25.00 26.10 26.70 24.40 22.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.19 0.72 -4.72 17 369 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 106 1 112 349 0 42.00 56 14.79 CHANGED -s.uDuspssp++.R-ILoRRPSYRKILNDL....uupsssh.....ptpp .........s-ultDSpKR.REILSRRPSYR........KILN-L....SS-ssuls....+h-............ 0 20 30 61 +3877 PF03832 WSK PkinA_anch; WSK motif Griffiths-Jones SR, Bateman A anon Griffiths-Jones SR Motif This short motif is names after three conserved residues found in a WXSXK motif in protein kinase A anchoring proteins. 25.00 25.00 25.50 25.50 23.90 23.90 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.16 0.72 -4.60 12 161 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 35 0 78 190 0 31.00 42 5.82 CHANGED pcshssWsShK+LVTsRK+s+ssscpcttps ...tcshssWsSFK+hVTsRK+s+sstcpcpt-.t.... 0 4 9 23 +3878 PF00069 Pkinase pkinase; Protein kinase domain Sonnhammer ELL anon Unknown Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.65 0.70 -5.24 54 114309 2012-10-02 22:05:25 2003-04-07 12:59:11 20 4512 7677 2311 65911 134432 5368 237.60 20 40.43 CHANGED aphhptlGpGuaGsV..apuhcpsssp........hhAlKhlptp.ptppptt.t.h....................Elplhppl.p....Hss..I..lphhshhps..................psplh..llhEahsss.....cLh.phlp................tpshl...scppspphhtpllpulpYlHs.p..........sllHRDLKspNILls.psspl...................KlsDFGlAp...................ttsss.thpshsGT.hYhAP....El....lt...............sptasttsDlWSlGlllhphls.....Gp......sPFt.sp........sphthhtphhtsthphs...............................................ppspcllpphLphcPpcRho.....sppllp+sah ..............................................................................................................................................................................p.l.G...p....G...s.h....u..................V................h...h.......s......h......................t.....t...t........................................h...A.....l...K......h.....h....p.................t........t............................t...........t................................................................................E..h...p...h.....h....t...p....l......p..............................H.............p..........l...........l......p.....h....h...s....h...h...t.p......................................................................tp.p..h..h.........l...l...h...E.......h........h..t...s.t...........................................s.L..h....p....h..lp.................................................................tt..t.....h...............s....t....p.....h.......h...............t......h.......h....h........p.............l...............h............p............u.......l........p....a.......l.......H...p...p.....................................................s.l.....l......H............R..............D.........l..............K..............P.........p................N...........l.....L.....l........s.....p....p...s..p.h....................................................................................+.l...s.......D.....F....G.....h...up...........................................................ttt..t.......h....p......s.....h......h........G......T....................h...........Y..............h..........u......P..............Eh..........h................................................................................t..t....t....h.....s..........t....s.......D.....l.........a...............u..............h..............G............h.....l....h........h.....p...h..h.t.......................u.p............................................s...a....st..................................................p......................h.......h..........t.......h..........h.....t..............................................................................................................................................................................................................................................t..h....p....h..h..........t.......h.....h.....t..............p.....s...t...p.....R.....s..................t...h........................................................................................................................................................................................................................................... 0 23780 38306 53856 +3879 PF00433 Pkinase_C pkinase_C; Protein kinase C terminal domain Finn RD anon Pfam-B_135 (release 1.0) Family \N 20.90 15.00 20.90 15.70 20.80 14.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.80 0.72 -3.44 160 3537 2009-01-15 18:05:59 2003-04-07 12:59:11 19 82 369 79 2010 3207 9 48.00 30 6.79 CHANGED plputpDssNF.D.........p...cFT.pps.....................................................sthoss......s.thh..............................sshsp.......pp..FhGFoYsssp .........lpu.pDssNFD...........p....-FT..pps.................................................................................................................................................sthoPs................ttpsl.....................................ss.sp.................pp.......FtGFoYss............................ 0 477 765 1344 +3880 PF02253 PLA1 Phospholipase A1 Bateman A, Mian N anon Pfam-B_3500 (release 5.2) Domain Phospholipase A1 is a bacterial outer membrane bound acyl hydrolase with a broad substrate specificity EC:3.1.1.32. It has been proposed that Ser164 is the active site for Swiss:P00631 [1]. 19.70 19.70 19.80 22.80 18.30 19.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.91 0.70 -4.96 78 1227 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 1131 11 196 747 50 244.60 43 80.00 CHANGED .uhlspchph-ptsp...pssaslpsa+sNYlLPhoas.sssNppshpst..........s.t..hcshEsKFQlShKhsl.hpslhs.t....sssLahuYTQpSaWQlYNp.p.SuPFRETNYcPElFhhhsss.....h...h.....hphphlslG...hsHpSNG+ussh..SRSWNRlYsshsacp.....s.sa..slsh+sWaRIsE...stpp.......D.DNPDIpcYhGph-lshsYths.cpphshhlRpNhp..sss+GulclsaoaPl........tspl+hYsQYFsGYGESLIDYNpcp.p+lGlGlsLs .................................................htsh.......t..hppp..cssaslhsYcsNYll.as.o.sshNppshss.t...........sps.....t++sEsKFQLSlthsL....ac..s.l.lG........susLhhuYTQpSaWQl.Ns.cp..SSPFR..E...TNYEPplFlsassc.....aphs...hhshR.p..l..phG...hsHpSNG+.......o-Pp................SRSW.N...RlYsphhh-p.......G.sa..hlpl+sWahlsp....s........D.DNPDIs+YMGYaplpl..uYphs..-thl....o.h..phpaNh.........ssuaGusEluhoY.P.I.............spcl+hY..sQhasGYGESLIDYNapp.oRlGlGlhLs................. 1 39 107 161 +3881 PF01735 PLA2_B Lysophospholipase catalytic domain Bashton M, Bateman A anon Pfam-B_2127 (release 4.1) Family This family consists of Lysophospholipase / phospholipase B EC:3.1.1.5 and cytosolic phospholipase A2 EC:3.1.4 which also has a C2 domain Pfam:PF00168. Phospholipase B enzymes catalyse the release of fatty acids from lysophsopholipids and are capable in vitro of hydrolysing all phospholipids extractable form yeast cells [1]. Cytosolic phospholipase A2 associates with natural membranes in response to physiological increases in Ca2+ and selectively hydrolyses arachidonyl phospholipids [2], the aligned region corresponds the the carboxy-terminal Ca2+-independent catalytic domain of the protein as discussed in [2]. 19.70 19.70 19.70 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.57 0.70 -6.33 15 966 2012-10-02 11:19:24 2003-04-07 12:59:11 13 15 204 2 571 979 5 338.50 26 56.67 CHANGED IulAsSGGGaRAMLsGAGhluAhDsRs.ssss...sLGGLLQSoTYluGLSGGsWLVGoLAhNNasSlpslhsp..tp.slWslspSlhs.P..tGlsls.pshphasslscpVppK+sAGFNlSLTDlWGRALSashhs.hppGGsuhTaSSlpssshFQsuEhPaPIhlADGRh....PGsslIslNuTlFEFoPaEhGSWDsolpuFssscYLGTplsNGsPl.pspClsGaDNsGFlMGTSSoLFNphLLp.lNoos.....hsshlppllpchLp.-hSpcps.DIu.Ys.sNPFp-ss.h.tpstos.........sIssscsLaLVDGGEDG..QNIPLhPLLpspRcVDVIFAlDsSs.Dscp.WPD.GsSLVsTYERpau....spu.puhuFPYVPDspTFlN.LGLss+PTFFGCDu+NhTsh....sp.sPPLVVYlPNs.aoahSNlSTFKhsYs-o-RpuhIpN.GFcuATpsN.p.DssFhuCVuCAIlpRp.EphNhotPspCppCFpsYCWNGTls ............................................................lulhhSGGGhRAhhshhG.hlhuhp........................sGlLpssoYluGlSGu.sW.h....h....u....olh.s..s.........h...s.....p..sl................p.............................................h.t......h..p..ps...........hhp................................h.............p.......p..h..tha................h.p....pltt..+tpt...G..h....hohsDhW..................G.h.h.l.........s..p.hh.....................................................t............t...................s.........h.........phS...s...p..................p.t.h.p...pup.PhPIhsu......th.p.............................s.s..............................h......t.h....t........p.........hhE.FoPaEhG...........ph...tuF.hshchhGoph....Gp...................................t............................p.....thh..........h..............Ghhhus.....u...h....s.......thh..................................................................t..h.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.......................................................................................................................................... 0 144 261 426 +3882 PF02988 PLA2_inh Phospholipase A2 inhibitor Griffiths-Jones SR anon Pfam-B_1254 (release 6.4) Family \N 21.70 21.70 21.80 21.80 21.00 21.40 hmmbuild --amino -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.98 0.72 -3.55 5 85 2012-10-03 01:43:02 2003-04-07 12:59:11 10 2 45 0 26 94 0 82.30 38 41.96 CHANGED +SCEICHNlG+DCsu.asEECuSPEDsCGTVLhEVSSAPLSlRolHKNCFSSSlCKL-aFDlNsGpEoYLRGRIsCC-c-cCEs ...............sCElC+s.hG.psCsG.hh..cpCsuscDsCsplh.hEloo.uslShpssaKsChoSshC+LshlssNhGpcsYlRu+hpCCpp-sCcs........ 0 2 2 12 +3883 PF00321 Thionin plant_thionins; Plant thionin Finn RD anon Prosite Domain \N 20.50 20.50 22.80 23.80 19.50 16.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.19 0.72 -3.74 37 120 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 29 31 30 148 0 44.90 51 37.08 CHANGED KSCCPoTsARNsYNsCRlsGs.spshCAshoGCKllSussCPssas.+ ..KSCC.oTsuRNsYNsCRhsGs.upthCAslssCKllSG.sCPssas+.... 0 4 5 14 +3884 PF01307 Plant_vir_prot Plant viral movement protein Finn RD, Bateman A anon Pfam-B_881 (release 3.0) Family This family includes several known plant viral movement proteins (e.g. Swiss:Q85292) from a number of different ssRNA plant virus families including potexviruses, hordeiviruses and carlaviruses. 20.30 20.30 20.50 23.90 19.40 20.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.54 0.72 -4.31 78 326 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 133 0 0 321 0 104.10 33 92.51 CHANGED LoPPPDao+shlssulGlululhlahls+..s.sLPpVGDN..lHsLPHGGpY+DGTKplpY.suPspt...........ttpssphhshhhllhLs.hhIah.sph..........ptptC.spCs ..LoPPPDhocshhshAlGluluhhl.ahhsp..s.pLPps.GDN..lHpLPHGGpYpDGTKpIpY.ssPppt.............tp.ss.ph.t.shhllllLs..hhIhhhspht.........ppppC..pC.h................... 1 0 0 0 +3885 PF04819 DUF716 Plant_viral_rep; Family of unknown function (DUF716) Finn RD anon Pfam-B_5106 (release 7.6) Family This family is equally distributed in both metazoa and plants. Annotation associated with Swiss:Q9SLW7 suggest that it may be involved in response to viral attack in plants. However, no clear function has been assigned to this family. 25.00 25.00 28.30 27.20 23.20 23.20 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.89 0.71 -4.74 32 388 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 80 0 247 370 0 128.40 27 44.55 CHANGED hshuL...ppLhhuhAFh.EhhLFahHs.p........s+tulEsphH.LLlhslhlsshsshltlhhPpshh..lpLh+ushlhlQGsWFh.........................QhGFhLasP............................sstschc...tspccshhhlshpFsWalshshlhlsshYsh ................................................................h...slpplhhuhAhhhp.....hhLFhhHs..p........s..+tsl.-h.p.hH.LLlhslhlsslsshlclh...h..P..s.......s.hh............l..pl.h+uhhhllQGoWhh.........................QhGFhLasP................................hstsphc...psptpshhhlshtFsW....HlhhshlhhhshYs.h........ 0 44 107 147 +3886 PF05015 Plasmid_killer Plasmid maintenance system killer protein Bateman A anon COG3549 Family Several plasmids with proteic killer gene systems have been reported. All of them encode a stable toxin and an unstable antidote. Upon loss of the plasmid, the less stable inhibitor is inactivated more rapidly than the toxin, allowing the toxin to be activated. The activation of those systems result in cell filamentation and cessation of viable cell production. It has been verified that both the stable killer and the unstable inhibitor of the systems are short polypeptides. This family corresponds to the toxin. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.00 0.72 -3.84 8 1079 2012-10-03 00:18:00 2003-04-07 12:59:11 8 3 735 0 333 1012 133 89.40 31 94.60 CHANGED MhhsF+cKsLcpFapcss..optIsushsc+Lpc+Lphl-sAps.pDLphP...uh+hc+L+G.chcsaaSI+VNspaRLIFpacsu.s.....ssllsYlD.H ...........................................MlhsFpc...ct.hppha...p....t.tp.....tpt....h......s..p.h..tpth.t.++LphLcsAp.s...h...pDLphPs.....u.+LctL.pG......c..c..p......G.aSI+l...Ns...p...aRlsFc.apss..s................sh.lph.DYH.............................. 1 107 224 290 +3887 PF01672 Plasmid_parti Putative plasmid partition protein Bashton M, Bateman A anon Pfam-B_1163 (release 4.1) Family This family consists of conserved hypothetical proteins from Borrelia burgdorferi the lyme disease spirochaete, some of which are putative plasmid partition proteins [1]. 22.10 22.10 22.10 22.10 21.20 22.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.68 0.72 -3.55 34 451 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 32 0 18 262 0 83.30 42 46.53 CHANGED AYtYLKlApulp-GllppchlhEsGhppohphlcsccssshKKS+p..........N.IKPLRFQLKspESYDFYKpNuKFTuFlL-clFpspKD ........sYpYLKIApulp-Gllp.chlhcNGlppolphlcspp.s.pl+..KS+p..........N.IKPLRFQLKspES.YDFYKpNsKFTuFlL-clFpspK-........ 0 17 17 17 +3888 PF05016 Plasmid_stabil Plasmid stabilisation system protein Bateman A anon COG3668 and [2] Family Members of this family are involved in plasmid stabilisation. The exact molecular function of this protein is not known. This family also encompasses RelE/ParE described in [2]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -10.17 0.72 -3.59 158 6528 2012-10-03 00:18:00 2003-04-07 12:59:11 9 12 2562 20 1504 5217 580 85.40 16 88.52 CHANGED lthstpA..tpDlcclhchh............spphhpplppthpplt.ptsthscs.................h.thhphtht................sahlhYp..l......p.tthll.llplh+ppcth ...............................................h.hstpA..hc-lcclhphh...................tpph...h.pp.l...t..p.t.l.p.p.Lt....p.s.t.hspt.........t.........................h..sh.hc.h.t.ht................sahl.lYp.l..........ppsp.tl..l.l.lplhcppp......................................... 0 448 979 1251 +3889 PF00681 Plectin Plectin_repeat; Plectin repeat Bateman A anon Pfam-B_68 (release 2.1) Repeat This family includes repeats from plectin, desmoplakin, envoplakin and bullous pemphigoid antigen. 20.50 20.50 20.50 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.78 0.72 -4.43 119 3591 2009-01-15 18:05:59 2003-04-07 12:59:11 15 114 75 10 1456 3428 0 43.40 31 9.87 CHANGED hpLL-uQhuoG.GllD.Ptssp+lolcpAhccGllstchtppLhpsp .....................lLEuQhsoG.Gll.....D.Pt.....ss.....c.....+.lo.l.ppAhp+GllstchtptLhps........... 2 150 264 648 +3890 PF01523 PmbA_TldD Putative modulator of DNA gyrase Bashton M, Bateman A anon Pfam-B_845 (release 4.0) Family tldD and pmbA were found to suppress mutations in letD and inhibitor of DNA gyrase. Therefore it has been hypothesised that the TldD and PmbA proteins modulate the activity of DNA gyrase [1]. It has also been suggested that PmbA may be involved in secretion [2]. 19.70 19.70 20.90 20.80 19.40 17.40 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.71 0.70 -5.14 158 5034 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 2139 8 1513 4013 3929 291.30 25 63.32 CHANGED sts-l....hhp...p.spshsl.phpssclcphpp.spstuhulRl......hh....ss.+hG.husos...shsp.ps......lp.phlcpAh..p....hAc.......hspt......thtthsshsthsht...............hp.t..........htsh...shcc.......thchht.chpptstphttthhs.......sshspsppphhlhsSpGhphppppshhththpshsp......pssphpps.............ht.h...................tthsscpl.....ucpu....scpAhptl..s..upphpsGph.s..Vllsspssusllp.shs....pshpucplhps..pShh.....ts.......+l....GcplusptlolhDDPp...h.....suhuohsaDsEGlss.pcphll-cG.lLpsa .................................................................................................................................u-l.hhpp.spstol...sh...c....pu..plcssph.....st-..pul.Gl.Rs.........................ht.............sp..+pG...hA....t......os.........slo....tu....................lt.pslpt..Ah..s.....................hA+....hsst................ttsshsshs.hshp....................hp.........h.sh..t..shcc.................tlclht.cs-ps.u.hs.t.-tclpps.......tuuhssthphhlhuso..c......Ghhs...s.php...shhplussllup.........csschcps...h..........shtshphh.....................t....sscth..................uccA........sc.p...A..lspL...s......utts.s...s..G.ph..s....Vlhusshs.....us.Llpcslu..p..ulpG.stsh+t.........sShh......ts..............pl.....Gc.p.l.s....s....p...h....lT..lh-Dsp..............l...........pthu.S..hsh............DsEG.s.ss..ppp..slIcsGlLpsa..................................... 0 444 903 1233 +3892 PF03332 PMM Eukaryotic phosphomannomutase Mifsud W anon Pfam-B_3713 (release 6.5) Family This enzyme EC:5.4.2.8 is involved in the synthesis of the GDP-mannose and dolichol-phosphate-mannose required for a number of critical mannosyl transfer reactions. 20.00 20.00 20.20 20.10 19.80 19.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.37 0.70 -4.79 9 691 2012-10-03 04:19:28 2003-04-07 12:59:11 8 10 520 12 309 572 314 202.50 39 83.41 CHANGED hc-hLp.cLRp+lsIGlVGGSDhpKhtEQLs...tcsVlscFDYsFuENGLsuY+tG+.lupQolhpaLGE-KlpcLlNFsL+Ylu-lDlPhKRGTFIEFRNGMlNlSPIGRsCSpEERp-FtcaDKp+pIRpKhVcsL+ccFs..chGLTFSIGGQISFDVFPpGWDKTYCLpHlEp-..FcsIHFFGDKTh.GGNDaEIasDPRTIGHoVsuP-DTlphlpElht. ......................................................................................................phlt..cLc.p..+.sslulVGGSDhs.KhpEQl...........tp......s..l..h...........p..p.a...D.....ahF..sENGhhhY+.sc.hh..p..ps.h....hphLG-.-.p.hp.c.h.l.p.asLph.h...u..c..l...p...l......P..h.....+.......R..G.sFl.E.hRs...GhlNlSPlGR.ssoh-ERpt......ap.p.h.D.KcpplRpphltsLppcFs.................ths..Lpash..GGpIShDVFPpGWDK..sY.s...Lpc...l.............p...p...p..........t.......h..p.pIaFFGD+s.....G..GNDYEIasp..p.s.hGasVssPcDThphhcpLh..................................................................... 0 99 159 241 +3893 PF03901 Glyco_transf_22 PMP; Alg9-like mannosyltransferase family Finn RD, Bateman A anon DOMO:DM04662 & Pfam-B_7750 (Release 8.0) Family Members of this family are mannosyltransferase enzymes [1-2]. At least some members are localised in endoplasmic reticulum and involved in GPI anchor biosynthesis [3-4]. 20.40 20.40 20.70 20.60 20.20 20.30 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.61 0.70 -5.48 16 1285 2012-10-03 03:08:05 2003-04-07 12:59:11 12 33 352 0 898 1337 89 359.40 19 69.81 CHANGED hhahhhlshRlhsuhhsth...csDEhapshEsh.HhhlashGh.TWEap.phulRSaha......hlhthshhhlthhhtcsphhl............................................hhhsRlhhulhusls-hhlaphlspph......................................shpluphhlhh.lsshhhhhsuochhssShphhhshluLthh................................................hshhhlsshuhhtRPpusllhlPlshh.............hL..hhpphhpta........hhhslshhshhhh...slllDphaY...............G+hlhsshNhlpaNVh....sstsshYGscPaaaYhhsshsthshshhhhhlhushhh...................................hhsshhhhLhlaShhsHKE.RFlaPlh.Plhhlsuuhslsph.....................phthpth........hhhhhlhhhsslshuhhhulh+phGsh..h....chhstlppss.......t.s....slh...................lhtphaphPsphal. ......................................................................................................................h....hhh.hph.h...shhs..h.....psDE.....ap..-...sh....t........h....h.................h.........h........h....................s........h.................s..W.-a..........................s.l....Rshha...............................h...h..h...h...h...........h...h.....h....h.........h.h.....h........................h..h.....................................................................hhhsRh.h.h.uh...h.s.h.h.s....h....hhhp....h.t..ph.h....................................................s.....p..h..s...hh....h..h...hh.....hh..sh.....h.hhh...htoc...h...hss.shthh..hsh.huh.hh..h.....................................................................................................................................................t...hh.hh....h..h....shhu..h.h...h..t..s.....hh.hhh...sh.h.h...................................h.l.........hh.t..p......h.........................hh.h...s..h...h..h...h...hh........h...s.lhlDsh.a.a............................................................t...p.h..s..h......s....hs..hlhaNlh.......tstssha.G.......s..p.......Phh.aYh..hpsh........h.h.h....s..h...h..h..h..h.h..h..hhhh..........h.........................................................................................h..h..h.h..h.hs.hhh.h..l....hl..h.Sh..sH....K.E.RFlhP....hh.P..hl...h...l.h.u....uh....sh.t.h.h..............................................................................................................................hhhhh.h..h.hh..shhh...u.h.hhuh.h....p..ths.......................h........h......t..................................................................................................................................................................................................................................................... 0 296 484 741 +3894 PF00822 PMP22_Claudin PMP22; PMP-22/EMP/MP20/Claudin family Bateman A anon Pfam-B_1393 (release 2.1) Family \N 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.09 0.71 -4.59 13 2113 2012-10-03 00:20:40 2003-04-07 12:59:11 15 8 110 0 1175 2148 0 165.70 24 76.65 CHANGED MhlhLhuhhlsplusssLLhloTl.sshWhlushss..................htsGLW+sCsssosssphssts.....t.pssLpAspAhMlLSl....IhulluhllhhhQhhshcpGscahl........uGIhhllusLClllusuIYTs+hssthtpsh......scacaGauahLuWluFshshluGllY ..................................................................................................................................h....hhuh.h.lshhuhl..sh.lh..s..ssh..s...h..Wh..hs.shh.s....................................................................hhapGL...WhsC...sh........p................u.......................s...............s.........t..h.....p.....C.......p........h...s...........s............h......h........s..........h.........s............s.............l....Q.........u.........s..R..u...L.....h..l....h..ul..............................llu..h..lu..h..l....l....s....h.....h......G....h....p.....C......h....p....t...s....p..p.ttsht..............................................hhhhuGl..h.a.l.l.......u..........G.lhs..l......l.ul...s..h......a.....s.......s......p........h........h..p..-......h...h....s..sh...................t.chch.........G.....huh.alGWsushlhhlu.Gsh.................................................................... 0 109 224 589 +3895 PF01625 PMSR Peptide methionine sulfoxide reductase Bateman A anon Pfam-B_1111 (release 4.1) Family This enzyme repairs damaged proteins. Methionine sulfoxide in proteins is reduced to methionine. 22.30 22.30 22.30 22.30 21.60 22.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.86 0.71 -4.35 66 6530 2009-01-15 18:05:59 2003-04-07 12:59:11 16 26 4318 25 1573 4711 3705 153.90 41 66.75 CHANGED ppshhAGGCFWssEshFpplt........GVlpspsGYsGG..psps.PoYcpVsps......oGHsEsVclpaDPshloappLLchF..aph.aDPTphstQusDhGsQYRSuIahpsppQcphAcphhpphppp............hsptlsTplpsh..psFasA...E-YHQcYht+pspt..Ys..phhh .......................pshhAGGCFWuhEphF.p.p.l.s.........G..Vh...sstuGYsGG......ps......t.N......Po.....Y...c.pV.ss....sp............TGH......sEsVclsa....DPph...l..........S..a..cpLLp.ha..a.ch...h..D..PT.......s...h..s....p..QG.s..Dh.G.sQYRo.uIah...p......s.....p.....c......Qcp.....hAcpshp..p.hppp.....................hpps.I..sT.E..l..............t......s.....h........p..s.......F.......Y....A.......E-YHQpYh..c..KNPpt..Yst..hh............................................... 0 508 1007 1340 +3896 PF02366 PMT Dolichyl-phosphate-mannose-protein mannosyltransferase Bashton M, Bateman A anon Pfam-B_556 (release 5.2) Family This is a family of Dolichyl-phosphate-mannose-protein mannosyltransferase proteins EC:2.4.1.109. These proteins are responsible for O-linked glycosylation of proteins, they catalyse the reaction:- Dolichyl phosphate D-mannose + protein <=> dolichyl phosphate + O-D-mannosyl-protein.\ Also in this family is Swiss:Q94891 Drosophila rotated abdomen protein which is a putative mannosyltransferase [2]. This family appears to be distantly related to Pfam:PF02516 (A Bateman pers. obs.). 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.35 0.70 -5.01 22 2315 2012-10-03 03:08:05 2003-04-07 12:59:11 13 20 1595 0 896 3419 711 228.90 21 38.08 CHANGED hhhtlhuhhsRhaplshsspllasEsphschtshYhptpaahss+..PPluphlluhshh.....lsshsssa.a.s.h.phhss.sshhthRhhsuhhusLsssLsYhhshphshsthsuhluullhhh-suhlT.u+ahLl-uhLlFFhshuhhshhph...............hotphhhhhhlsGluLGhulssK.hsuhhslhhlhhhshhplWphh......cpph.hh........................h.ahhsphhhLlllPhslalhh..ahlHhhhhh ............................................................................................h...hhhshhhhhhhh....h..s.....sh.....h..........sc.................h..........t.....Y....h......c....h..s.....h.............h..........hp..........s.hs...........hh..h.......s..hh...............h.h.t...h...s..........s.....s...h.......h.....a....h...ps....h....s......p......h...h...h.....s............h.........s....................h...u......h.R.h..h.......ss..hh....usl.s.ls.l.s......hh....hs..h.c.............l.....h.............t........s......p............h........s.........u.hl..A.u.l.lhh...h......p.....s...h.h.....h.....s.h.u..p.h..s..l..L...D...s...h...l..s...h....a.l...s..s.u.h.h..s..hhhhh.................................ptt..h....t......t....s....h........h.......h....h..h.....h..l....h..Gl.s..h.Ghu..h..hs.K...h...h....s..h...h....s..l............l.h...h..h..h....h....h.....s...h....h..p.t.h..h..................p.th.h...h.h....................................................h...h..h....h..h....h..h..h..L..l.l..l..P.h.h.l.hlh...h..th..h............................................................................................................. 1 277 520 754 +3897 PF03393 Pneumo_matrix Pneumovirus matrix protein Mifsud W anon Pfam-B_3641 (release 6.6) Family \N 20.80 20.80 21.90 51.30 20.00 16.30 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.47 0.70 -5.45 9 182 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 17 2 0 128 0 183.10 74 99.13 CHANGED METYVNKLHEGSsYTAAVQYNVlEKDDDPASLTIWVPMFQSShPADlLIKELtslNILV+QISTPcGPSL+VhINSRSAVLAQMPsKFoISANVSLDERSKLAYDlTTPCEIKACSLTCLKsKsMLTTVKDLTMKThNPTH-IIALCEFENIhTSK+VlIPTYLRSISVKsKDLsoLENIsTTEFKNAITNAKIIPYAGLlLVITVTDNKGAFKYIKPQSQFIVDLGAYLEKESIYYVTTNWKHTATRFuIK ..........................................FQsshs.s.lhc.LhslTITTLYsASQsGPILKVNASAQGAAMSsLPKKFEVNATVALDEYSKL-FDKLTVCEVKoVYLTTMKPYGMVSKFVsSAKuVGKKTHDLIALCDFhDLEKshPVTIPAal+SlSl+pp-.solEshhosEhcpAlTpA+IhPYuGLlhlhThsssKGhFKhltstsQhIV-LGsYlptESl........................ 0 0 0 0 +3898 PF03246 Pneumo_ncap Pneumovirus nucleocapsid protein Batreman A anon Pfam-B_3020 (release 6.5) Family \N 18.70 18.70 22.90 22.90 18.40 17.40 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.12 0.70 -5.94 4 663 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 24 70 0 329 0 129.50 46 99.85 CHANGED MSLppl+LsDlp.KculLspSpYTIpRssGsoTulospslQpclspLCGMlLhTcascac.sApIGhQYhhotLGp-co.pILRsuG.cVpsVhT..Ksaol.hpGKphKhE...VLsIpulssuhhtslEhpARcohsphLKEtu.plPpNQR.sAPDsslIlLCIuALlhTKLAusscsGL-sslRRAspVLpsthpRYPph-l.cIAcSFYELFE+KsYYhslFIcaGhA.uSopuGS+sEuLFsslFMpAYGAGQsMLRWGVlA+SspNIMLGHsSVQAEhcQVsEVY-hspKhGsEuGhhHlRpsPKAuLLSLTsCPNFuSVVLGNAAGLGIIG.Y+Gps.NpELFsAAcuYAcpLKEsNhINaSuLsLTsEE+EAhpp.LNhsDDsspc ..................................................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +3899 PF03438 Pneumo_NS1 Pneumovirus NS1 protein Bateman A anon Pfam-B_3221 (release 6.6) Family This non-structural protein is one of two found in pneumoviruses. The protein is about 140 amino acids in length. The NS1 protein appears to be important for efficient replication but not essential [1]. The NS1 protein has been shown by yeast two-hybrid to interact with the viral P protein [2]. This protein is also known as the 1C protein. It has also been shown that NS1 can potently inhibit transcription and RNA replication [3]. 20.50 20.50 21.70 266.10 19.60 18.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.85 0.71 -4.36 2 37 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 12 0 0 30 1 136.00 85 98.24 CHANGED MGSpoLShIpVRLpNlaDNDcVALLKITCaTs+LIhLTpsLAKuVIHTIKLsGIVFlHlITSSDhCPsssIlspuNFToMPlLQNGGYIWEhMELTHC.QsNGLlDDNCEIpFSK+LSDSphspY.NQLSpLLGhs MGsNSLSMIKVRLQNLFDNDEVALLKITCYTDKLIhLTNALAKAVIHTIKLNGIVFlHVITSS-lCPsNNIVVKSNFTTMPlLQNGGYIWEhhELTHCSQsNGLIDDNCEIKFSK+LSDSsMTsYMNQlS-LLGhD 0 0 0 0 +3900 PF02478 Pneumo_phosprot Pneumovirus phosphoprotein Mian N, Bateman A anon Pfam-B_2290 (release 5.4) Family This family represents the phosphoprotein of Paramyxoviridae, a putative RNA polymerase alpha subunit that may function in template binding. 21.00 21.00 21.50 23.70 16.70 20.90 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.74 0.70 -4.87 8 859 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 20 0 0 264 2 99.10 76 96.37 CHANGED PEGKDILFMGsEAAKhAEAFQ+Sl+psuptt.pSIsG-.lpT........luEplpLPslspss............osKsuppKssptsssslh...-lEplpEchlssss-sps.sscss-ossps.....KK+VoFcsscs...G+YTKLEKEALELLSD.pEDsD-ESSlLTFEE+Ds..ousSIEARLEuIEEKLSMILGhL+TLslATAGPTAARDGIRDAMlGlREELIscIhsEA.....KsKAAEhh+EE-sQRuKIGsGS.VKLTEKA+ELNKIlEDpSoSGESEpEpE....-sDtpt-DI .........................................................................................................................................................................................................................................lREELIA-IIKEA.....KGKAAEMMEEEMNQRSKIGNGS.VKLTEKAKELNKIVEDESTSGESEEEEE.K-hQ-NNQt-DI....... 0 0 0 0 +3901 PF01048 PNP_UDP_1 Phosphorylase superfamily Finn RD, Bateman A anon Pfam-B_1190 (release 3.0) Domain Members of this family include: purine nucleoside phosphorylase (PNP) Uridine phosphorylase (UdRPase) 5'-methylthioadenosine phosphorylase (MTA phosphorylase) 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.13 0.70 -11.60 0.70 -5.39 159 15802 2012-10-01 20:25:13 2003-04-07 12:59:11 15 225 4828 837 4138 10648 3603 227.10 18 76.95 CHANGED plullsuss............tchtthtpp.........h.ph.hhp..............tthphhhGph......tsttlslstpG.........h.Ghspss..hsshthlp..th.......................tsctllthGss........Gul....p.s..lp......sGDlllssp.....h.......lp.........hshtss....hh...................................................hhsshsth..hssplhphhtp..........htphs........................hplpp....Gshhsssuhhhps................tschp...hhpphG.......ss.....sl.-MEssshstlApph.s..ls.....hh.slpsl....os.............ttt..................pphtphhppstpphtpl..ltphlp ...................................................................................................................................................................................................hsllsu.s.............cht.hhp................htph...hs...................tshhhh.h.Gph............pup.p...l........s..l...h...t..sG.................h..G..hs.s.s....s........hhs.ttllp......th..........................................................ss..c...tl..l..t..s..Gs...s......................Gul...........pts....lp....................lGD....l..l..l..sps.........h.....................hp....................hssss.......h.h......................................................................................................................hh.st..hts....s....s....p...l.h..p...thhp................................sspphs.............................................................hphch.......Gshhos-shhhss............................................tphp..............hhpp.hu.....................sh.........sl..-ME.uusl.......s.t.s...u.tp....h...s.....l...........hh..slts.l......oDhh.t.............................................................ph.t........................................................................................................................................................... 0 1276 2469 3410 +3902 PF02233 PNTB NAD(P) transhydrogenase beta subunit Bateman A, Mian N anon Pfam-B_2220 (release 5.2) Family This family corresponds to the beta subunit of NADP transhydrogenase in prokaryotes, and either the protein N- or C terminal in eukaryotes. The domain is often found in conjunction with Pfam:PF01262. Pyridine nucleotide transhydrogenase catalyses the reduction of NAD+ to NADPH. A complete loss of activity occurs upon mutation of Gly314 in E. coli [1]. 19.80 19.80 19.80 20.20 19.70 19.70 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.59 0.70 -5.71 10 2429 2012-10-03 09:55:27 2003-04-07 12:59:11 11 22 2140 27 707 1732 3888 439.00 53 87.05 CHANGED sLhphsYls...uulhFIhuLtGLSspcTARtGNhhGIlGMsIAllATl..lssstsshs..................hlluulllGusIGlhIAp+VpMTsMPQLVAhFHSFVGLAAVLVuhAsaltppstuhsssu.............s.sshphlElaLGlaIGulTFTGSlVAFGKLQGlIsS+PLhLPu..RHhLNhuLLlssVllhlsFhhssshssthssLll..................uluhlhGhpLVhuIGGADMPVVISMLNSYSGWAsAAuGFhLsNsLLIlsGALlGSSGAILSYIMCKAMNRSlhsVIhGGFGsssusuuucs.t...sGps+psoA-EsA-hLhsApSVIIlPGYGMAVAQAQaslA-lschLcccGlpVRFuIHPVAGRMPGHMNVLLAEAsVPYDlVhEM-EIN-DFscTDVVLVIGANDTVNPAAp-DPsSPIAGMPVL-VWKA+sVlVhKRSMuoGYAGV-NPLFa+-NTpMLFGDAKKss-pllcpl ................................................slsshsYllAulL...FI...huLtG...LS...p.cTuRpGNhaGhh..GMul.AllAT...l....h..s..s..s...s...t..s..hs..............................h.l.ll.uhll...GGsl....G........hhh..A....++Vc..MTpMPpLVAhhHShVGLAAVLVuh.su...alp....t..s..h...s..h.h.............................................h.ss..hphsElaLGl..h.IGAlTFTGSllAFGKLpG.......p.....l.........s...u.....pP..l.......hL.Ps....+..H.hl...Nlsh....ll..s..sh.h.h..h..l.hF.s.........t..s....s...u........h...........h..h.s..L.hl.hs...........................hlAhshGhtllhsIGGADM...........P......VVlSMLNSYSGWAAAAtGFhLs..NslLIlsGA............LVGSSGAILSYIMC.....K.......AMNRSFlu.....VIh..G.G......F.G.....s...........s..s........s.....s......s................u...s...s...t...........................ttGp......h..+.p...ho.......A.......-..........-.s...............A.thL.....p.sApoVIIsPGYGMAVAQ..AQasVtElscpL+t.c.Glp.V..+FuIHPVAGRhPGH.MNV....LLAEAcVPYD...hVhEMDEIN...cD.FusTDVVLVIGAND.sVNPA.Ap-.DP.sSPI.uGMPV....L-VaKAp....sVlVhKRSM.s.oGY....................AGVpNPLFa.c.-N.Tp.....ML.FGDAKpsl-sllcu................................ 0 215 421 584 +3903 PF03833 PolC_DP2 DNA polymerase II large subunit DP2 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 24.00 24.00 24.00 40.70 23.00 23.80 hmmbuild -o /dev/null HMM SEED 900 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.62 0.70 -13.57 0.70 -6.88 6 125 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 111 1 80 138 362 805.40 45 69.80 CHANGED ccYFEpLpcEl-+hY-IAcpARc+GhDPossVEIPlApDMA-RVEuLlG.pGlAcRIRELs.t-huRE.sALclucEIl-G+FGDhc...+Ecth-pAVRTALAILTEGlVAAPlEGIAcV+Ic+N..sDsocYLAlYYAGPIRSAGGTAQALSVLVuDYVR+tlGlDRYKPoE-EIERYlEEV-LYcptss.LQYpPos-ElRLsscNhPlpIsGEuT-csEVSGHRDLsRVETNplRGGhhLVLsEGll.KAPKllKYscplslEGW-WLcclhcusccu-...........t...pEEc.tlst...............ssDKalcDlIAGRPVFuHPS+sGGFRLRYGRuRNoGhAThGlpPATMaLls-FlAlGTQlKsERPGKAusVVPVDTIEGPsVKL+NGDVl+IsshpcAhcVRs-VtEIL.LG-hLlsYGDFLENNHsLhPAuascEWWIQpl.sus..........................tsDsctl+.........ts.s-pAl+huc-aclPLHPcYTYaWHDlosE-lchLtshltp..p.chpspc.....pchVL.lch.ppsKclLEhLGlsH+V+-splll-paaPhhtuLGhslppp.....phph.hpstpssl-hlNtluslcl+c+A.ohIGuRMGRPEKAc-RKM+PsVHsLFPIGpAGGupRsItcAsccsp.......shcVEluht+CPsCGcpohpphCPsCGoh.c............................sclcshs+pcIcLs-lhccAhcslGlpc..hDclKGVKGMhStpKhPEPLEKGILRAKp-VaVFKDGTsRFDsTDlPlTHF+PpEIGVSVEKLRELGYs+DhhGsELcc--QlVEL+PQDVIlscsuA-YLl+VAsFlDDLLp+FYsL-tFYNlKscEDLlGHLVIGLAPHTSAGVVGRIIGFocAssGYAHPYFHAAKRRN .........................cYFcpLppclcchaclAccARppGhDPpscVEIPlApDhA-RVEsll........G................cG..VAcRIRE.Lt..pchu..+..E.....sALcluc-h.....s.....-GchGchs......+cpth-tAlRTAlAlLTE.GlVAAPlEGIucVcltcN..sDGo-YlslYYAGPIRSAGGTAQALSVLVuDYlRptlGlsca+P..p..-...-ElERYsEElpLYcptss.LQYpPps-El+hhscNhPlplsGEsT-c.EVSGaRDLpRV-........TNplRGGhhLVlsEGlhhKAsKlh+....as....c....p....lth-.uWs.WLp-llssptpsc.............................................t..c.....p......p.t..t..................................htsssKalc-lIAGRPVFuaPSc.GGFRLRYGRuRNoGaAosGlpPAoMhllD-FlAsGTQlKsERPGKAssVsPVDoIEGPhV+LpsGsVl+l-s......hccAhc...............l..+......spV-cIL.lG-hLlsaG-FlENNHsLhPuuYs.EWWhp-htpss...................................................hs.p.hp.............ss.ccAlchupchshPLHPcYTYhWcDloh--lttLtchlhp.......hpt.t..............................tp....l...l....p.......t..phKc....hLEhLhl.Hp.....h.....p..s.....pplhl..p..t..hs..hhhsLGhshp................................tht..t..........h...t...ss...psslchlNclushcl+p+AsohIGsRMGRPEKucpRcMpP.sspsLFPIGp.uGGspRsltcAsc.psp....................hpl-luhpcC..spCGp...ohhthCstCGspp...h.C..Ct........t..........................C.psthc.hpshpp......tplslpphhppAhcpls.pp...hc.lKGV+GhhSppKhsEPLEKGlLRAKpslhsFKDGTlRaDhTDlPlTHF+PpE.lslolE+L+ELGYpcDh.GpPLpp--QllEL+sQDllls................c.....s....sucYhl+supFlDDLL-+aYulp.FYsscpp-DLlGcLVhGhAPHTSAGllGRlIGFopAtVGYAHPYFHAAKRRN........................................ 0 17 48 68 +3904 PF01620 Pollen_allerg_2 Ribonuclease (pollen allergen) Bateman A anon Pfam-B_1050 (release 4.1) Family This family contains grass pollen proteins of group V. Swiss:Q40963 has been shown to possess ribonuclease activity [1]. 25.00 25.00 77.70 26.60 23.70 23.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.09 0.71 -3.89 12 100 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 13 15 2 117 0 121.20 46 87.34 CHANGED TVALFLAVALVA.....GPAASYAADuGYsP..............sssTPAssus....AuGKAT.T-EQKLl................EDlNAuFKAAsAAAAssPPADKaKT..FpssF.osusKu.lAstuo.........psstLssKLDsAYplAYcuApGATPEAKYDAFVuuLTEALRVIAGsLEVHAVKPAs ............................................................................................................................ttE.plI................-clsAuFKsAssAAsusPssDKFps..FEAuF.stu.Kt...........ssuu..uht............shphlPpL-AA.hK.AYsAssuAsPEsKYssF.AuLocAlpshuts.cV..s................ 0 0 0 2 +3905 PF01190 Pollen_Ole_e_I Pollen proteins Ole e I like Finn RD, Bateman A anon Prosite Family \N 20.80 20.80 20.90 22.70 20.50 20.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.88 0.72 -4.15 91 693 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 68 0 380 702 0 93.10 23 40.42 CHANGED VpGhVaCpsCpt..th.....sshsltGApVplpCcs....stt......thhtpuhTDppGhFp...ltl.t.s............tt.ptCpshLhso...Pps..sCshst.............uhpsupl .......................VpGhVhCssCpt...shp.....tstsltGApVplp...Cpst......pst........hthptp.usTDpsGhFpltlss.s.................tttptCpspLh..so.....sps.......sCst.ts...........h................................. 1 40 199 290 +3906 PF00659 POLO_box POLO box duplicated region Bateman A, Mistry J, Sammut SJ anon Prosite Family \N 21.00 21.00 21.00 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.22 0.72 -4.00 81 913 2009-09-12 20:48:28 2003-04-07 12:59:11 13 8 221 70 585 888 4 67.70 25 17.34 CHANGED aps+hulsapLSsusltV..FsDpo+lll.ssptp...tlpYls....p................ptppppashsph..........spplpc+lphlc .........hps+hulsapLossoltV..F.s......DpT+lll..ssstp........tlpYls.......c.........................................ptpp.psathsp..............hsppLtp+lphh.............................................. 1 224 294 444 +3907 PF02563 Poly_export Polysaccharide biosynthesis/export protein Mian N, Bateman A anon COGs & Pfam-B_1505 (Release 7.5) Family This is a family of periplasmic proteins involved in polysaccharide biosynthesis and/or export. 26.80 26.80 26.80 27.00 26.00 26.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.59 0.72 -4.22 183 3850 2009-01-15 18:05:59 2003-04-07 12:59:11 11 25 1898 24 1011 3117 920 91.50 27 23.26 CHANGED sshsssssss..sYplGsGDhlplpVaspsc..............................................................sht......VsssGpIshPhlGslplsGhTssplppplpppLpp..hl...psP.pVsVpltp ............................sh......ts.pYplusGDhL....pl..s...V...a..s.psp................................L................................................s.s.......shh............VsscGsI.hh...P....h.l.G.p.lplsGhThsp...lpspIps+Lsp...hl......ps.P...pVsVpl..t................................... 0 301 639 825 +3908 PF01743 PolyA_pol Poly A polymerase head domain Bateman A anon Pfam-B_814 (release 4.2) Domain This family includes nucleic acid independent RNA polymerases, such as Poly(A) polymerase, which adds the poly (A) tail to mRNA EC:2.7.7.19. This family also includes the tRNA nucleotidyltransferase that adds the CCA to the 3' of the tRNA EC:2.7.7.25. This family is part of the nucleotidyltransferase superfamily. 23.00 23.00 23.80 23.10 22.80 22.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.78 0.71 -3.92 21 6516 2012-10-02 22:47:23 2003-04-07 12:59:11 15 54 4623 24 1693 4940 3053 124.10 35 27.78 CHANGED hYlVGGsVRDhLLG+ps...c.......DhDlsos.........AssppltphF.tpphhh.....Gtcathhplhhss..ph....lElAThRscp..tshts.pp.phs............ol--DhhRRDFTINAlhhs.....sssp....llDhhs.GhpDLcsphlR ..............................................................................................................hYlVGGuVRDhL...L...Gp.s........+...........................................DhDl.s.s.s..................................................up..P..c.p...h.p...p..h...h......p......psh.......h..................................G......h.......ca...t...sh..tV.h..h...ps...pt......................hEl.sT.hR...s...-p.t.......t..s....s..s..p.....p...s..ps..phsp.................................................................ol-.-Dh......tR......RDFTINAlAhs.......................t.s...s...p.............llD.a...s...G..h..pDLps+llR................................................................................................. 0 581 1098 1445 +3909 PF01518 PolyG_pol Sigma NS protein Bateman A anon Pfam-B_803 (release 4.0) Family This viral protein has a poly(C)-dependent poly(G) polymerase activity [2]. 25.00 25.00 27.90 27.10 16.50 16.10 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.14 0.70 -5.97 4 145 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 59 0 0 124 0 334.10 58 99.39 CHANGED MssolRluVSRssuGsuuQTlhpsahLLRssloscshpsshthQ.+FPshtpss+pLsPLtshstDRhl++ssltplhoR-hhhssDh.tphsacssshPho.sspuhphtcLlsshauEt...h-Hl....hPs.usoYsPuulA+hhohsMAGhsP.cG-shhhcssl.aLAA-LlsaphsLPYhls.lDGsosI.shPotsVEchLss.lutLsslDhSaGlEsRuDpRhTpDsupsSSRSlNEL.scEptt+h.shKlhLsh.shQLKlELDsLAcp+sE.pt.thlsuFGp+LFpQhShFusIDp-LhpLslhIKDpshths.tplhphWo.IRou.ucslssuuhslplcsGsWhltcG-DstLoVpPsRl ...........................................tshRhslS+.ttsssuQplh.NaYLLRCNISADG.RNAT+AVQuHFPaLSRAVRCLSPLAAHCADRT...LR..RDNVKQlLTRDLPFsSDL.INY.AHHVNSSSLT..TSpGVEAARLVAQVYGEQh.shDHl....YPoGStTYCPGAlANAISRIMAGFVPpEu-sFs.sGsIDaLAADLlsapFVLPYMls.VDGcsp.I.VlPot.TVEEMLss.suLLN.sIDASFGIES+SDQRMTRDAAEMSSRSLNELc-HEpRGRM.PWKIMLAhhAsQLKlELD...ALADpRsEsQuNAHVTSFGuRLFNQMSuFVsIDRELMcLALlIK-pGFAMNPuQlsuKWo.IRpS.usohshSuhplplctGpWhhhp..................................................................................... 0 0 0 0 +3910 PF00738 Polyhedrin Polyhedrin Bateman A anon Pfam-B_423 (release 2.1) Family These proteins are found in occlusion bodies in various viruses. The polyhedrin protein protects the virus. 25.00 25.00 71.90 71.70 20.60 20.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.34 0.70 -5.29 17 392 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 161 4 0 238 0 188.10 78 98.09 CHANGED lGRTYVYDNKaYKNLGuVIKNAKRK+HhlEHEhEE+pLDsLD+YhVAEDPFLGPGKNQKLTLFKEIRNVKPDTMKLVVNWSGKEFLRETWTRFMEDSFPIVNDQElMDVFLVlNhRPTRPNRCY+FLAQHALRCDPDYVPHEVIRIVEPSYVGsNNEYRISLAK+GGGCPlMNLHSEYTNSFEpFlsRVIWENFYKPIVYVGTDSAEEEEILLEVSLVFKIKEFAPDAPLaoGPA .........................................................LGPGKNQKLTLFKEIRsVKPDTMKLVVNWSGKEFLRETWTRFMEDSFPIVNDQ.E.lMDVFLVlNMRPT+PNRCYKFLAQHALRCDPDYVPHEVIRIVEPSYVGsNNEYRISLAKKGGGCPlMNLHSEYTN.SFEpFl.s+VIWENFYKPIVYlGTDSAEEEEILlEVSLlFKl.................... 0 0 0 0 +3911 PF03364 Polyketide_cyc Polyketide cyclase / dehydrase and lipid transport Mifsud W, Mistry J, Wood V anon Pfam-B_1457 (release 6.6) Family This family contains polyketide cylcases/dehydrases which are enzymes involved in polyketide synthesis. The family also includes proteins which are involved in the binding/transport of lipids. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.71 0.71 -4.15 95 3012 2012-10-02 19:24:03 2003-04-07 12:59:11 15 30 2234 27 1131 3986 1993 127.70 23 69.89 CHANGED lshssp.plaslls.Dh...cpascahPhsp...........s..scllppsst........................................phphshtshpppa...sscsttphspp............................................ht.hpspWphhsht.............................................tstsplphphph..pht..hhshhhthhhpphhpphhpshp ....................................................................lshsscphaplV.s..Dl...ps...Y.PpF..l..Phsp...........................u..s...c..l.lppsss..........thhA.......................................................................pl.p.l.u.h...s..u..l...p.p..s..F.......so.c.s..p..h..p.spp.................................................................................l.h..p.h..h...s...G.....s..F...c...p...h....ps..t..W..pFps..hs.............................................................tsssclphp..l.ca........-..a..s..s..t...l..h...p.h...h..huhhhpchspphlpuF................................................................................................................................. 0 346 687 944 +3912 PF01736 Polyoma_agno Polyomavirus agnoprotein Bashton M, Bateman A anon Pfam-B_1917 (release 4.1) Family This family consist of the DNA binding protein or agnoprotein from various polyomaviruses. This protein is highly basic and can bind single stranded and double stranded DNA [2]. Mutations in the agnoprotein produce smaller viral plaques, hence its function is not essential for growth in tissue culture cells but something has slowed in the normal replication cycle [1]. There is also evidence suggesting that the agnogene and agnoprotein act as regulators of structural protein synthesis [1]. 20.40 20.40 26.00 26.00 18.30 18.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.87 0.72 -4.33 3 130 2009-09-11 01:00:53 2003-04-07 12:59:11 11 1 5 0 0 117 0 58.10 81 91.17 CHANGED MVLRQLSRQASVKVuKTWTGTKKRAQRIFIFlLELLL-FCcGEDSVDGK.RK+souLTEps-S .MVLRQLSRKASVKVSKTWSGTKKRAQRIlIFlLEFLL-FCpGEDSVDGK.Rp+po.uLTppp.S........ 1 0 0 0 +3913 PF00718 Polyoma_coat Polyomavirus coat protein Bateman A anon Pfam-B_748 (release 2.1) Domain \N 20.10 20.10 20.20 27.90 18.60 19.90 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.15 0.70 -5.51 12 3425 2009-01-15 18:05:59 2003-04-07 12:59:11 15 1 57 68 0 825 0 141.30 62 81.55 CHANGED LLlKGGlEVL-V+TGPDShTpIEAaLNPRMGps.........tphaGFSpsIslusshssDsPppspLPsYSsA+ItLPhLNEDhTCsslLMWEAVSlKTEVVGloSLhNlHu.tup+s...sshGuuhPlpGhsaHhFAVGGEPL-LQGlhpNapssY....PssllsPp.....shsspuQV..LsPphKA+LDKDGtYPlEsWsPDPS+NENTRYFGoaTGGhpTPPVLpFTNTsTTVLLDENGVGPLCKGDGLYLSuADIsGhasppss.pQpaRGLPRYFsloLRKRhVKNPYPloSLLsSLFsshhPphpG ......................................................................................................................................................................................................................................................tsPPVLphTNTsTTVLLDE.GVGPLCKuDsLYlSAsDlCGhF.........TspsG.sQpWR....GLsRYFKlpLRKRpVKNPYPIShLLssLhN+hs.+VsG................... 0 0 0 0 +3914 PF00761 Polyoma_coat2 Polyomavirus coat protein Bateman A anon Pfam-B_871 (release 2.1) Family \N 25.00 25.00 43.10 25.30 18.70 22.50 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.24 0.70 -4.93 8 383 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 41 1 0 366 0 225.50 48 91.94 CHANGED GAsLolLhphlApVuElAuuTGhSVttIhuGEAhAsIEl...plAsLs.slE.Gl....sssuEAlAAlGLTspsaAllsut....P....sAl...uGh.......AAlh.QTVoG....uSAl...AssGhpaausWcHcVssVsL.tp.sMALplahP--.hDILFPGspoFsp.laYLDPh.+.WGsSLFpsVGpulW.c.lhRs......shspl..s..o..+-lptRTsp.lp.......-oLARaLEssRWslosuPlshYssl....psYYupLsslsPs.hRQlApR.h..hshG+o...slDpsDuhpthspch..-l..ppPp.sp..............SGpaIEKhtAPGGApQRsAPDWMLPLLLGLYGDlTPshcuacDp.ppcc+c ............................................................................................................................................................................................MALplapP--haDILFPGVssF..VNsl...pYLDPt.H.WGPSLFpoluQuhWp.llp-...........slstl..s..S..pElpcRTpchhh.......-sLARhLEpopWslsN...u...P.hshYshl..............p-YYucLs..sl.pPs.lRQl..Ap.Rctp.lshG+oa..slDpsDslpthspph..cl...cs.p..lp..............SGEaIE+shAPGGANQRsAPpWMLPLlLGLYGsVTPuLcuhE..DGsppKcc.R................................ 0 0 0 0 +3915 PF00348 polyprenyl_synt Polyprenyl synthetase Finn RD anon Prosite Domain \N 20.70 20.70 20.70 21.10 20.60 20.60 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.58 0.70 -5.54 16 11487 2009-09-13 10:32:47 2003-04-07 12:59:11 12 34 5158 295 3330 8899 6348 242.40 28 77.16 CHANGED llttht.hhhtuG.KRlRPhlllhsuchlu.........hphsshhslAsslEhlHshSLlHDDl..MDsuclRRGpPTsHtpaGpssAlLsGDulhspAFphlsphp....h.sphphhtl.cLspsssspG.lt.QhhDlpst........chohcphhphhptKTutL.FtsusphuulhuGss..tctpcsLpcauhplGhAFQlhDDlLDhhusspplGK.sGsDlppsKsThsslhuLct...uppctppllcpshpp..pphshpshsth.tlcthtthhh .....................................................................h....h..hh.h.h.s.G.G...K.RlRP.hls.l.h...s.s.p.h.hs....................................hp.t..p.t....h.....h.t.hA...s.u..lEh....lHs...hoL....l.............HD.....Dl........hD....ps.c..hR..R.Gp.s.....................T.........s...............p.................t.................t................a..................G...................p..........s................s.............A................l......LsG.D.......hL.....h...s....hA....ap.h....l.s....p.hs.............................p.........p.......h...h...t.....l.....h.....p.....h.......s..............h....s.......s...h..............s...........p.G....ht.....Qhh...c....h..t...s.t..................................................ph.s.h..c...p...h....h.p.l....h...pt....K...T..utL..ht........tu.sph........G.u.........l.....h......u............s......s.................s........t..........p........................h...............p.......t.....l....p....pa...u...pt.l.G..h.AF.QlhD...Dl.L.D.h..........h...u.......s......s..p............p.....h..G...K.s.s.....G.s...Dl.p........ps..K...Th....P..hl..h...u...hcp............up.t....t...t....t...t.....h...h..p...p.....t...........t..........................................................ht....................................................................... 0 1100 2096 2820 +3916 PF01943 Polysacc_synt Polysaccharide biosynthesis protein Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family are integral membrane proteins [1]. Many members of the family are implicated in production of polysaccharide. The family includes RfbX part of the O antigen biosynthesis operon [2]. The family includes SpoVB from Bacillus subtilis Swiss:Q00758, which is involved in spore cortex biosynthesis [3]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.75 0.70 -5.13 31 9954 2012-10-02 21:24:20 2003-04-07 12:59:11 12 25 3559 0 2091 9084 1735 278.40 14 58.36 CHANGED lh+NhhhhhhsplhshllshlhhshluRhLuspsaGlhuhshshsshhshlsshGlsssls+plutsps.........phtsthhhsshhshhlhsllhhhhhhlht............hhshschshlhhlhhhhhh..hhhsss.hhshlhpuh.-phphhslpphlpplshhhhhhlhlhhhss.....lhhhshhhhhusllshllshhhhph.h..h...........hphhhht.hpth+phlp.uh.lhlsplssslhsthspl.hluhhhGsts........lGhYssuhplhhhh.tlhsshss.shhPhhuclh .............................................................................................pssh.h.hhh.up.ll.s.h..h.l.u.h.l.h.h.h....h...h..s...p....h....l.......G.....s......p.....s...h.....G.....l...h...sh...s..hs...l...h...s.h...h....h...h...l....s...s.......h...G.....l....s....s....u....l....s....+.h.....l..u..p..hps.............................ph..h.p.....t...h....h......h....s.....h.....h.....h.....h.....h.....h.....h....h...s....l.....l....h.....h...h..h..h..h..lhs............................hht......t....s.....p.....h.....t..........h..........h.....h....h....h....h....s..h..shh.........h.h....s...h....h.......s.....h.........h.....p....s....h...h....p....u........h........p.......p......h.....p....h........h...s....l....p.......p.......l....h...t....p....l....h......t...l....h.....h....h...h....l....h...h..h.h..h.h..t...........................hh..t...h.....s..h..h..h..h..h..u...s...h..l.s...h...l....h......s...h..h..h...h...h...h.hh....t...........................................ph..t.h.............h...t....h....h.....p...p........h..h......p....h.....u....h.........h....h....l......s....s.l....s.h.....l....h....p....hl.-.p.....h....h.........l...s...t...h...h...sh.st..................................hG..ha....s..h...u.h...p.......l.h.t.h.h.h.h.h.h.sshhs..shhPhlst..h............................................................................................................. 0 663 1369 1745 +3917 PF02719 Polysacc_synt_2 Polysaccharide biosynthesis protein Mian N, Bateman A anon Pfam-B_1536 (release 5.5) Family This is a family of diverse bacterial polysaccharide biosynthesis proteins including the CapD protein (Swiss:P39853) [1], WalL protein (Swiss:O86159) mannosyl-transferase (Swiss:O05349) [2] and several putative epimerases (e.g. WbiI Swiss:O69130). 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.51 0.70 -5.52 77 3666 2012-10-10 17:06:42 2003-04-07 12:59:11 10 11 2439 14 813 25131 16928 278.60 42 55.79 CHANGED lLVTGuGGSIGSEls+Qllph....sPpclllaspsEhshYplcp-...h...h.t.pl.......lGDVpDpcplppshpthpl-hVaHAAAhKHVPlsEh.NPhEul+sNlhGTtNlhcAAlpssVcphVhlSTDKAVpPsNlMGAoKRhuEhlh.Ahsppp......................TpFssVRFGNVLGSpGSVlPLF+cQIppG.GPlTlTcPchTRaFMTIsEAspLVlpAu..shucGG-.....lFVL-MGpsV+IhDLAcphl.cL.Ght..........DIpIchsGlRPGEKLYEELlhps-shpsppasclhtsps .......................................................................................................................lLlTGuu.GS.I...Gu..El..s......+..p.l..h.ph...........s..P...c.......c.....l....l.......l.......h.......s..........+.........s.......E......h........s.......h....a....p......lp..p-........................l..........h..t...p...............l.....p....h.........................h....I..........u......D.....V...................p.............D.......p......p......p......l......p....p......s.....h.....c......................h.............p............s.....-.........h...........V...a....H....A....A.......A......h........K......H.....V......P........h......h.............E.......h.......N.....P.....h....E....A....l.....+....s........N..........l....h.....G........T....p..N........l.......h.......c...............A..............A.......h.........p...............s..............s................V..............c.................+............h.................V..............h...............l...............S.................T..................D.................K.................A....................V................p.................P................s................N...............l................M............G................A................o................K..R.....h....u.....E...h....l...h...u...h.spps.......tp..............................Tp.a..s..s.V..R......F......G....N.......V.......L.......G.......S.........p.........G..........S.......V........I......P....l....F....c...c...Q...I.......p....p......G.......u...P...l..T....l.......T......c....P.......c.....h.......T...R...a...F..M...T...l...s...E....A..s....p....L...V.......l...p...A.u........s...h...u.........c........u....G..-................l....F....V...h..c..M....G....p..s...l..+...I...h...D...L...Ac...ph.l...p.L..G............................-lp.I.c.h.s.G.l.R..........P.........G.EKLaEELlspp..E..t..hps.pphtchahh........................................................................................................................................................................................... 0 272 542 695 +3918 PF02530 Porin_2 Porin subfamily Bashton M, Bateman A anon Pfam-B_1122 (release 5.4) Family This family consists of porins from the alpha subdivision of Proteobacteria the members of this family are related to Pfam:PF00267. The porins form large aqueous channels in the cell membrane allowing the selective entry of hydrophilic compounds this so called 'molecular sieve' is found in the cell walls of gram negative bacteria. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.36 0.70 -5.41 16 586 2012-10-03 17:14:37 2003-04-07 12:59:11 9 3 282 0 170 720 19 326.40 28 88.24 CHANGED DAlVtAEPEPsEYV+VCDsYGsGaFYIPGTETCl+luGYlRhphshGs.....t...sh.ss.ssstpssastpoRhplphsTto-TEhGsL+sahchhhshssss...su..t........................................hslt.AaIpLGGh+sGpshShaDs.hlGhtuDslss.......spthNplpYpa-uGsuFsAulSl--tpusss................................hs...hsspssslVuuVcsstuhushpsssu.................aDs-hcpsAs+stlslp...susu.olhlsGsYusus..Y.........................................hsssp...................Wus.....huuhpapsssKsslssuhph.......................sshstatlGsslcYshlcslohps-lsYschsp.......................phpsp-slsGh .....................................................................................................................................................Dh.lh.c..sh-YV+lCshaGsGaaYIPGT-TCl+ltGYlR..h..-s..t.h.ss................................s.h....s.s..p..t.pss..hst..tu..Rh...t.lphsot.opTEhG..s..Lps...ahph...ca.s...hs..sss.t..su............................................tts..htlphAalph...u....G...hphGhs.S.Fps..htu..h...s.s.slss...sh.ss.ssh...ps.s.p..lsYTashG.s.G...aoA.s...lulEpss..ssss.....................................................s.sh......h.ss.h.h.Pcllutlch..stuaG..uhthssA...........................................................a-s.....s.......h..p.t.....aA......s..p.s...tssls......lss.t.slhlpu..sYu..su..s...................................................................................................................................................................................................ssstp......................htt...W..ss.t..Wss............................au.uh.p...att.st+h...t.h.shthsh.....................................ssh..ttht.ss..ssltap.V.shshss-htYhp.hs....................................st.hsh................................................................................................................................................................... 1 38 89 115 +3919 PF01379 Porphobil_deam Porphobilinogen deaminase, dipyromethane cofactor binding domain Bateman A, Griffiths-Jones SR anon SCOP Domain \N 22.10 22.10 22.70 22.70 21.70 22.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.42 0.70 -5.42 19 4004 2009-09-11 15:01:14 2003-04-07 12:59:11 15 21 3728 9 1153 3057 2572 207.30 46 65.99 CHANGED pl+IGTRpSpLAlhQuphVhctLcphhPs....hph-lhhlpTpGD+ILDpsLuKlG...GKGLFsKELEpALLps.....clDlAVHShKDlPs.lPcGLhlusIscR-DP+DAll....pshpsLppLPpGullGTSSLRRpuQLttphPcLchcs.lRGNVsTRLpKL-ss..-aDAIILAsAGLpR...LGhpsclsp.....h.s-phLPAlGQGALuIEsRpsDpchhslLp ................................h.lpIuTRpS..LALhQuphVtstLp..tt..a..P.s.........................lpsEl.lsh.sT.pG.D...t....I....L....D..s..s.L.........u...+.......l.G............GK.G.LFsKELEpALLcs.......csDlA...VHShK.......D......l......P..s..t..h..P..pG.LsL.u.sls..cR.ED.....PR.DAhV.....o....................p.....s..............h......t..........s....Ls...s........L...P..........p....G..u...........l..........VGTSSLRR....p....s.......QLtt..........h.....R........P..........D..........L.......pl.p....s..l...RGNlsTRLpKL............c........sG...............-aDAIILAsA..GLpR................L.......G.h..p..s.....c....lpph.................lss-.....hL.......P..........AsGQ.GA....luIEsRts.D.pchhtlL.s....................................................................................................... 0 364 739 986 +3920 PF03900 Porphobil_deamC Porphobilinogen deaminase, C-terminal domain Bateman A, Griffiths-Jones SR anon SCOP Domain \N 21.30 21.30 21.40 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.62 0.72 -3.87 21 3787 2009-09-11 23:02:33 2003-04-07 12:59:11 10 15 3616 9 1082 2829 1539 74.20 34 23.23 CHANGED thpshAERuhl+pLpGGCpVPIusauph.....tsp................lpLpuhlsssDGh........hhchptps.....t.pcutclGhclAcclhs ...........h.hpVpAERuhhppL.-GGCplPIuuaAplp......ssp................................................................lp.Lc..u.lVu.s.sDGsp...............hlcsphpG.............................s.pcucplGhplAccLl.................................................. 0 334 692 920 +3921 PF00280 potato_inhibit Potato inhibitor I family Finn RD anon Prosite Domain \N 24.50 24.50 25.20 24.50 24.30 24.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.02 0.72 -3.83 60 372 2012-10-01 19:32:51 2003-04-07 12:59:11 13 2 66 54 144 396 2 61.90 42 75.93 CHANGED KsSWPELVGhsuctA+tlIp+-pPslpsl.ll...sGos.VTtDa..cssRVRlaV...st..tsh.VspsPtlG ...........................KooWP.ELVGhsscpAc.ph.Ihc..-pPclpll..Vl....PsGoh....VThDa..cssRVRlaV.......st........sh..VspsPplG.............. 0 20 82 119 +3922 PF00767 Poty_coat Potyvirus coat protein Finn RD, Bateman A anon Pfam-B_868 (release 2.1) Family \N 19.70 19.70 20.20 19.70 18.60 18.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.56 0.70 -5.09 33 6595 2009-01-15 18:05:59 2003-04-07 12:59:11 13 28 378 0 0 5499 0 218.30 55 33.96 CHANGED +D+DVssGT.sGTFsVPRlKsls.sK.hplP+l+G+slLNL-HLLpYpPsQhDlSNTRATppQFpsWYcuVKp-Y-lsDp.pMsllhNGLMVWCIENGTSPNIs..GsW.....sMMDG-EQVEYPLKPll-pAKPThRQIMsHF.SDsAEAYIEhRNsccsYMPRYGLQRNLsDhSLARYAFDFYElTS+TPsRAREAHhQMKAAAlRssssRhFGLDGNVuTp-EsTERHTAsDVN+NMHoLLGs+h ..............................................+D+DVssGo.sG.phsVPRl..ct..h..opK.MphP..p.h.c.G.pslLN..L-HLlpYpPpQ...h.DluNTRATppQFcsWa-uV+t-Y-l.s.-s...pMsl.lhNG...L..M.V..W.CIENGT.S.P....slN..GsW...............sM..M..D....G...-...-..Q......V..E.Y.P.lKPll-pA..+...PT.....hRQIMtHF....S.DsA.E.A.YI.E.h.R.Nt.pc.sYMPRYGLpRNLpDhSLARYAF..DF...YEhs.S+.TP.sRAR.EAHhQ.MKAA.AL.+..ssps+.LFGLDGsVuopp.EsTERHTspDVspshHsLLGhp.s...................................... 0 0 0 0 +3923 PF00157 Pou pou; Pou domain - N-terminal to homeobox domain Sonnhammer ELL anon Prosite Domain \N 21.00 21.00 21.00 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.42 0.72 -4.33 20 1357 2012-10-04 14:01:12 2003-04-07 12:59:11 12 9 164 22 578 1331 1 69.20 59 18.40 CHANGED c-ssshcELEpFA+p...FKp+RIsLGaTQuDVGhALusLaGss...FSQTTICRFEuLQLSaKNMCKL+PlLc+WLp-AE .................c...ss.cELEpFA+p......FKQRR...IK.LG..aTQuDVGhA..Lu.........sLa.G.ss...FSQTTI...C.R..................FEuL.pLSFKNMCKLKPlLpKWLcEA-...................... 0 111 158 327 +3924 PF05061 Pox_A11 Poxvirus A11 Protein Moxon SJ anon Pfam-B_5994 (release 7.7) Family Family of conserved Chordopoxvirinae A11 family proteins. Conserved region spans entire protein in the majority of family members. 25.00 25.00 32.10 30.30 24.70 19.30 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.92 0.70 -5.36 9 62 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 43 0 0 49 0 290.10 58 99.77 CHANGED MTslPVTDIsN...-YslTsFSEDsYPsNKNYEIToGQLSILRTVN-KL.....LA+TppspshpsDlspt........hhPs-DsPhoIlp+V.uPpssa.-sss.......llhuE..pQRppRlNIhhS.stEslIE+cslpp...tloSl.op..............TPSLGsVFD+-KRl+LLE-ElhpL+p+ps...psssNL-NFT+lLFGKsshcSsElNKRlsIVNYASlNpSsLThEDLEsCS-EEID+hYcslKQYN-ohKK+IlVTphIoIlI.VlEQlLVKLGF-ElKGLSsElTSEIIDlpIG-DCEtIAsKlGIuNSPlLNIslFllKhhIpRI+Ih .........................................................................................MTTVPVTDItN...Dh.lT.pFSEDsYPSNKNYEIThtQhSILppVNshl.....hAhssSPp..phpSpls-s..................lhPD-DSPsTIIE+V.pPpTshlD.sssssp.....tEllluE..QQRppRhNIpVS.stEAlhEpcsh......IT.ShPop..............TPSLGVVa..DKDKR..IphLE-EVhpLRNppu.pocoSsNLDNFT+lLFGKsP.h+SoElNKRIAIVNYAsLNsSsLSlEDL-lCSE-EID+IYKsIKQYpESRK+KIIVTNlIII.lI.sIIEQsLlK.L.GF-ElKGLSo-lTSEIIDVEIG-DC-AlAsKLGIGNSPVLNIlLFllKlFV+RIKIl.......... 0 0 0 0 +3925 PF04651 Pox_A12 Poxvirus A12 protein Mifsud W anon Pfam-B_5523 (release 7.5) Family \N 25.00 25.00 45.40 38.30 23.20 22.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.44 0.71 -4.17 11 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 41 \N 0 60 0 182.20 63 97.69 CHANGED MA-.KKLo.R..SSYDDYIETlNKlTPQL+TlLuHIuuEQusptsNhs....sssssss.pssuG...sssohp+op+soposspp......+.ss............SGAPpR+pss..hus.sc..psQ..hhQAVTNuGKIVYGTlK.DGKLEVpGpVGElNpDLLGI..ESVNAGRKs.o+up.............stpph..............puu....h+KtcshsssspshDh..GMs .......MADKKNLAVR...SSYDDYIETVNKITPQLKNLLAQIGGDsAVKGGN........NNhsSQs-V.TAG...AssTKSKSoKChTs+sKo........pSoSo...............SsS+sS.p.oSGAP+RRTTs...ooS.hNA..hDGQIVQAVTNuGKIVYGTVR.DGQLEVRGMVGEINHDLLGI..ESVN.AGKKKsSKKh................PTsKK...........hshSSG....MRRpEpINssDsClDh..GM.h...................... 0 0 0 0 +3926 PF04848 Pox_A22 Poxvirus A22 protein Mifsud W anon Pfam-B_4558 (release 7.6) Family \N 21.00 21.00 21.00 21.10 20.80 20.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.82 0.71 -4.32 13 94 2012-10-03 01:22:09 2003-04-07 12:59:11 8 1 75 0 1 111 206 152.10 38 81.27 CHANGED plICAhDlGsKNPARTllEl.ps........ss..I+llcIsKLDWS.ssWE+pVA+Dlsp....ashshVLLE+Qs+RSPasKFIYFIKGhLYs..opT+VIslsPs.....hsGsSY+sRK++SlclFLshhshFG..lss..lsch+KLDDVADSFNLAl+Yl...LsK .............................................hIsAhDlGs+N.AhsllEs.cs.............ss.....l+..llDl......u..K.lc....ho.....pDa.c..+.pl.....s.+Dlsp.............hphssVLlERQP..cR.u..s..hl.+......h..l...a.FI.+..u..ah....h....p......s..s......s..KVIs.V..SPs..........................hsG...s.oY+-.R....K...K..pS.....V....Esh....hs....ahcs.as.......lpc....sl...s...cp.+.KhDDlADoashAhpal....................................................................................................................... 0 1 1 1 +3927 PF04584 Pox_A28 Poxvirus A28 family Waterfield DI, Finn RD anon Pfam-B_4756 (release 7.5) Family Family of conserved Poxvirus A28 family proteins. Conserved region spans entire protein in the majority of family members. 21.40 21.40 24.50 24.50 20.70 18.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.30 15 68 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 47 0 0 50 1 139.50 60 99.61 CHANGED MNslolFhIllATsAlClllFQhYslYENYDNIhEFNssHusLEYSKolss.stlDRpVaDPNDplaDsKpKWRCVpas.ssYVSlShFGF.pussusp....l+pFsTl-sClsaTFScuscusIaNPChsss..pSp-ClFLKSlL ..MNuLSlFFIVVATAAVCLlhlQuYSIYENYsNIKEFNAsHAAhEYSKSlGG.PuLDRRVpDsNDsIpDVKQKWRCVsYs.NuaVSASlFGF.pA-sGsN....IRKFsThppCIDaTFScshshcIaNPClsPN..sssECpFLKSVL.... 0 0 0 0 +3928 PF04665 Pox_A32 Poxvirus A32 protein Mifsud W anon Pfam-B_5586 (release 7.5) Family The A32 protein is thought to be involved in viral DNA packaging. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.68 0.70 -5.07 10 981 2012-10-05 12:31:08 2003-04-07 12:59:11 7 4 115 0 835 1034 176 165.70 48 66.54 CHANGED Ep+FsRcSLLcsPFRMAlVGGSGSGKTsYLLSLFpTLVc+Y.KHIFLFTP....VhNsuYDuYlWPDHIpKVoopEE...LEYsLsssKpKIE+asp.upspK....t.pFLlILDDlGDhQhRS+sLlslhNaGRHlNlSlIlLCQTY+HVPlNGRsSITHaCCCNVS-SDlENhlRSMSI+GoKKpLl+slulhRuup.ppR+Vl.IIEDSVFspGEtRICYDoAD-pVltpclDhsILlsQFSHMKppLss ................................................................................................................pc.pphhpc++h+pcppp....................................................................................................................................................................IEhluK.KhQ.ua.c.........YP+R...sL.LILDD....F...A..S...H.h.K..s....R...-.Q....-...M.C.R.IL.K.....K...LR.HF.N..IS.VVICVQ.TA...KS..L.SKD.V.KR.IL.T..D.Il.L.F.P..s.h.scD..h.E.LMpESMusK.hc+cE.lWEtYKllps.......P+.oshcIH.IhsNpV........................................hlKst............................................................................... 1 782 794 835 +3929 PF04948 Pox_A51 Poxvirus A51 protein Finn RD anon Pfam-B_6937 (release 7.6) Family \N 19.90 19.90 20.60 20.60 18.50 18.90 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.11 0.70 -5.77 8 76 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 35 0 0 70 0 276.80 52 97.71 CHANGED M-.hIlsss.Sh.+DhDll-alKssFssc..hspscNVCoKaDNllhoopoKsKllluDhPpIDpslSphYppsht..pslsRlSRFCKlltLcscKD.....alYlPtocsll..slLsIsppssssp....pCElphhsssspssI..pLp.scFsIlpss.pshhVKGsNllllIllF-EtsaPtIPLIRoISsNsVlISRHsRLHcElPscNWFKFYVEL+HsYoSuLhlllDGolLYAsuDYKTHChISKp.psp+c-lsDDCtCCYsssplplhsKp-llEpssCcsIRGGlpIplpcVGcFuASalGKYPNh-YIKIsluosYcMIsKQDplSGKptpusYlYGIA+R ..............................................................................................................................................................................................MD.lIVhslpuL.Kch.EplsalKNsFhhc...sptppsCcKlcNVhIsupoKssslIADlPhlDsulS-lhpohht..hsluRISRFsplIcl-cccc.......YsYhp..p-sls..sIloIu+ccD.........sCEllIsS.DpusssI..cLsph+hAILshs.sSFFsK...G.Nus.LlILLFD.hshsusPLLRS.losNsVlISRHpRLHcElPSpN.WFKFYlsl+psYCSlLYhVVDGSlhaAhADp+THshISKs.hhcsssINDECcCCYh-.PQI+ILDR-EMLsu..S..S.....Cchs..R+s..IhhsLs-lGcFGSSh..lGKY.EP-hIKI.ALSsutslIpspDhIsGR+taShYVYGIApR.................... 0 0 0 0 +3930 PF04924 Pox_A6 Poxvirus A6 protein Finn RD anon Pfam-B_5792 (release 7.6) Family \N 19.70 19.70 20.00 19.80 18.30 17.70 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.43 0.70 -5.66 12 67 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 44 0 2 54 1 362.30 62 98.12 CHANGED MDKLRslYp-FYpIS+cYLE+pTsppsssssa-sDVshhhslVPlLEpKlss.IssshoD-sllhhM+asNY+hFSFWFLKSsAVVKSVYN+Lcp-pE+p+FhplFKDlLlssQTLlSlNsMYpNlKQDTs-IVsDSKKIlEIVspl+sussEssAYKlLQsNaoFIVKTINKlLSDENYLLKlIAlFDocLloDK-KLpEY+ElFolSsESllaGI+ClS-L-lsolslp.NN..KYltFFKKlLuslILFQNssLsup+FlplVuKLYslIapphpTNspluhLlo-VLDSlKsKlSl--lKpcGVpNlQoLI+aIusN+s.YKsIlucEYhKREsslIcILQsIsscssIcasGpslDlctLlchhK-+ahp ..............................................MDKLRVLYDEFhsISK-pLERETGLosSDlDhDhDlsIFMTLVPVLEKKVCs.ITPoIcDDcIlsMMKYCsYQuFSFWFLKSGAVVKSVYNKL-.DsEKEKFlssF+DMLLNVQTLI.SLNuMYopLRQDTEDIVSDSKKIMEIVSHlRuSTsENAAYplLQpNNSFIlpTLNKILSDENYLLKIIAVFDSKLIS-KEpLNEYKpLaTISoESllYGIRCVSsLDISSVpLs...NN..KYVhFlKKhLPpIILFQNNDlNuQQFANVlSKlYoLIYpQLpoNV-VGsLLTDsl-SsKTKISVEcIKQsGINNlQSLIKFISDNKcpYKoIISEEYlu+EDcIIoILQsIlNEacIcY-spllNhR-LIshh+ERYu.......................... 1 1 2 2 +3931 PF04745 Pox_A8 VITF-3 subunit protein Waterfield DI, Finn RD anon Pfam-B_6036 (release 7.5) Family Family of Chordopoxvirus proteins composing one of the two subunits that make up VITF-3, a virally encoded complex necessary for intermediate stage transcription [1]. 25.00 25.00 237.70 237.40 18.40 17.30 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.71 0.70 -5.48 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 39 0 287.30 66 98.14 CHANGED MF-PVPDLNLEAolELGDVsI-sT+u+s+E..SsuYVSKsRRLFVH+SKD-ERKLALRFFLsRLYFLoYKElNYLFRClDsVKDVsITKKNNVIVAPYlILLTlSSKGYKLTESMIEhFFPELYNEsSKKFRFNSQIsIIQEKLGYssusYHVYEFEhYYSTVALALRsc+.....-s-lFNsRcESplVSSLSEITYRFYLIpLKSshVQWSuSTGoVINQlVNTVLlTVY-lLpKslpps+pFpCTLApEoclPlpLLlDRh-hFsKIIs-L++TNSFKISK+DKcsLLKYCp .MF-PVPDLNLEAolELG-VNIDpTs.shl+E..souFlSRSRRLFsHRSKD-ERKLALRFFLQRLYFLsaRElpYLFRClDAVKDVoITKKNNIIVAPYIsLLTlASKGhKLTETMIEsFFPELYNEpSKKFKFNSQVsIIQEKLGYpsuNYHlYDFEsYYSTVALAIRccc.....sSs.IFNlRQEShLVSSLSEITYRFYLIpLKSDLVQWSuSTGAVINQMVNTVLITVYEhLphslcs.cspFsCoLAlESc.LPlcLL+DR.s-LFsKhIs-LK+TsSFKISKRDKDTLLKYFp....... 0 0 0 0 +3932 PF04835 Pox_A9 A9 protein conserved region Waterfield DI, Finn RD anon Pfam-B_4431 (release 7.6) Family Family of Chordopoxvirus A9 proteins. 21.50 21.50 21.90 24.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.84 0.72 -4.06 8 68 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 1 53 0 54.00 71 55.25 CHANGED AIDlhRHhFMYFCEs+lRPNSFWFVllRollSMlMaLlLGlsLLhISsNs-csc ..............AIDLCRHFFMYFCEQKLRPNSFWFVVVRAIASMIMYLVLGIALLYISEQDDKKN..... 0 1 1 1 +3933 PF04508 Pox_A_type_inc Viral A-type inclusion protein repeat Waterfield DI, Finn RD anon Finn RD Repeat The repeat is found in the A-type inclusion protein of the Poxvirus family [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.54 0.74 -7.01 0.74 -3.68 27 453 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 22 0 1 343 0 22.90 49 16.96 CHANGED cElc+h+p+Ip-L-cpLscspcs .pElscL+sRIpDLERpLs-C+cs.. 0 1 1 1 +3934 PF03286 Pox_Ag35 Pox virus Ag35 surface protein Mifsud W anon Pfam-B_4295 (release 6.5) Family \N 24.10 24.10 26.20 26.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.48 0.71 -4.79 10 84 2009-09-11 15:09:14 2003-04-07 12:59:11 9 3 42 0 2 75 0 198.20 52 93.58 CHANGED M.SWSINLu..uu.GDNFKTL-EIRAHVRSTTEssD..cssDDIFPs.....DI.................cIP.op+pP+pK+.....sTs.RK.......ssssKucKscKEKss.ttcc.psDs-K.............................TEENEs.sp..p-sscscpusSsssps......s-Ds.....................hDsSDLKlAT-sIlKDLKtLNsRVoAlSTVLEDVQAuSIoRQFTSLsKul-pL+slApsGKppVs..RKKs+s.sKK ..............M.AWSIo.p..us.oSSFpphsEIRAHLRsoA...ENpD......K.N-DIFPE.....DV...................................lIP..STcPKTKR.....sTsPRK......PAsTK+S..TKKtc-.+pplEE..E.....s...llEEhcp....sTEENSsssss..oPssGD.IsESlsAs-h-...........-.DssD-.........................poDhSDLKVATDNIVKDLK+IhoRISAVSTVLEDVQAAGISRQFTShTKuITsLucL.VopGKS.KVV..RKKVKo.CKK............ 0 1 1 1 +3936 PF03336 Pox_C4_C10 Poxvirus C4/C10 protein Mifsud W anon Pfam-B_3519 (release 6.5) Family \N 21.00 21.00 21.20 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.25 0.70 -5.66 8 127 2012-10-10 13:59:34 2003-04-07 12:59:11 8 2 33 0 1 132 15 260.00 52 96.81 CHANGED hlplHlFo-shFcshKp-lh..lp..php......p.t..h.............pcSKplhhcsoLsc-lhspl+sl....lYcpLKslVc......sVpVcNclTllpY-+GDahsppps..ssshs+NtlshaLLlaLpps-pGGcs+lYlcssss.hlslooDlLFDKolsH-oppVcsGcKplAlhDVhlc..h..ccsllsTIcY.hsssIsLYD+EsDp.sLCYC-lp.Ipshs..s-hhphGlIsDRSGKClLVHpstclsphcc...lacSFp-lChpphh-.....tlhplppsss+sIAWSslc.sscsDpalPpsc-hYKhLpclss+p+s.ppt+l-hh...........shss---E.hahhCpVo+YYFsLPc .........................................................................pTIKlFNp.EFDsIRN-lhpLhKhVp.......................................sss.l.pl.s...pD.s.D.I..-sIRcI....L.Y+phKN...Vc....................sl-lsssIoFhKYs...........N.ssl.....T....s.....s.h.t..YhLVIYLppsh...plKhh......aP.....Ts...p..........I.....p.o.s..........c..D.....IMFuKoLsF+.ppVhpshKhl.hhsISls.....Yp.ShspIpY..ssh...........IDIpssppsp.pLCYChIT.hDsHaL.lDlETlsVlVs+SGKCLLVNpahhhhhhpc...I.sSFsDlCMDpIF-h.spscELFoLpNDDsRNIAWDsDK.csssshWhPhT---YKFLS+Lh.hAK...ssThFDYY...........VLs.GD.T-PsTVF.FKVT+aYhNh..h................................................. 0 1 1 1 +3937 PF03287 Pox_C7_F8A Poxvirus C7/F8A protein Mifsud W anon Pfam-B_4089 (release 6.5) Family \N 24.10 24.10 25.50 31.90 23.90 24.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.80 0.71 -4.49 11 76 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 37 0 0 56 0 148.20 44 89.05 CHANGED MGIpH-LDIalVsEslulKslpLhKGDSYGCsIclKlsspKplcFlllL+.PDWppIs-lKPIpMclNGhsl-spLl........................pcohhphIYsuslslpscos.lphaSDsccp.apctYPslpINs.KKhYclhcpGhThhaI-SPIsspDKhpahc ......MGIpHEhDI.lIsssIAL+NLpLHKGDsYGC+LcIISsshKplcF+hIl+.PDWSEIcclKsLpsasNshslcls+l........................ccohYhlIYpAslpLYscpTplLlaS..Ds-s-.aK+YYPhIsLNhhsKcYcVK-cNYossaIEaPllshcchp.hc........... 0 0 0 0 +3938 PF04701 Pox_D2 Pox virus D2 protein Mifsud W anon Pfam-B_5832 (release 7.5) Family \N 25.00 25.00 66.70 66.50 21.70 21.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -11.02 0.71 -4.22 10 55 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 41 0 0 41 0 135.20 51 90.63 CHANGED ch-lK+..LssI..ltNssllFssDlsplhsE+aIlLE+s.sGpshclHlYcssARFDN+oIa+lVKalY+sRsclL+llFPspshhESlcsLhPshTlsl...........pcsstss.hp...scs.ssKhlLLELFNSF+hGKsss...shPYYhLP ........phDIKK..ITDL..L.NusILFPDDV.pclLpEKYIVLERcsNGTPsssHIYco.hARFDNKSIYRIAKFLFhNRPDVIKLLF.....LEslEPLLPsKoINI..........ShssoEhPpL-....sPluTKlsLLELFNAFRoGtu-....PlPYYYLP.... 0 0 0 0 +3939 PF00874 PRD BglG_antitermin; PRD domain Declerck N, Bateman A anon Pfam-B_772 (release 3.0) Domain The PRD domain (for PTS Regulation Domain), is the phosphorylatable regulatory domain found in bacterial transcriptional antiterminator such as BglG, SacY and LicT, as well as in activators such as MtlR and LevR. The PRD is phosphorylated on one or two conserved histidine residues. PRD-containing proteins are involved in the regulation of catabolic operons in Gram+ and Gram- bacteria and are often characterised by a short N-terminal effector domain that binds to either RNA (CAT-RBD for antiterminators Pfam:PF03123) or DNA (for activators), and a duplicated PRD module which is phosphorylated by the sugar phosphotransferase system (PTS) in response to the availability of carbon source. The phosphorylations modify the conformation and stability of the dimeric proteins and thereby the RNA- or DNA-binding activity of the effector domain. The structure of the LicT PRD domains has been solved in both the active (pdb:1h99, [2]) and inactive state (pdb:1tlv [4]), revealing massive structural rearrangements upon activation. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.89 0.72 -3.86 211 15913 2012-10-02 16:05:11 2003-04-07 12:59:11 15 106 2053 11 1448 9078 56 90.60 18 28.55 CHANGED pcllphh.....cpphphph......ssthhhsLhhHlp..hslpR.lpps..........hphpsshh.pplcp.Y.sptaphspchhphlppph.shpls-sElsals.lHltss ..............................................t.thlphh.ppp.h...phph..........spthh.....sLhhHlp..hh...lp.R..lpps...............hph.p.s.s..h.h...p...p..l...p...p.......a.....spta.p...hs.p.p.hh.p....h....lpp....p..h....s.h....p..l.s.c.s.Elsals..lHhht.................. 0 437 816 1106 +3940 PF04580 Pox_D3 Chordopoxvirinae D3 protein Waterfield DI, Finn RD anon Pfam-B_4684 (release 7.5) Family Chordopoxvirinae D3 protein conserved region. Region occupies entire length of D3 protein. 19.90 19.90 20.30 20.10 19.60 18.60 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.61 0.70 -5.18 10 66 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 37 0 0 60 0 225.60 54 96.18 CHANGED MDIhll+Dst.YPhhsscsNcs.FlLLGNHspFIsshLpclpp+..hhFascYtloPDchG.oLplchlsSSat..I+s+hVsV-EFIshGpshcWCspphpts....phscsDpllIaDIsahcsshWKRIlhlpCPshlssphEp............FlTNPa.lhppsp.......clF+NllLRStlNshIFs.psSsLcpLLsH.Ilophsl-+h..pslls...ac-sssl+LlppCY-Rs+F+AFVYAWFsuQl.sNsphENEKVc+sacpVpchI ...................MDIFI.VKDNK.YPKVDNDDNEV.FILLGNHNDFI+sKLTKLKE+...VFFS-YIVTPDsYG.SLCVELNGSSFQ..HGGRYIEVEEFIDuGRQVRWCSsSNHIS....cDhHTDKFlIYDIYTFD..uFKNKRLV.FVQVPsSLGD..DS............aLTNPh.L....S.......PYY+NuVARQMVNsMIFN.pDSFLKYLLEHLIRSHYRVSKH..ITIV+...YKDT-ELNLTRICYNRDKFKAFVFAWF......NGVsENEKVLDTYKKVSsLI.......... 1 0 0 0 +3941 PF03288 Pox_D5 Poxvirus D5 protein-like Mifsud W anon Pfam-B_4009 (release 6.5) Domain This family includes D5 from Poxviruses which is necessary for viral DNA replication, and is a nucleic acid independent nucleoside triphosphatase. Members of this family are also found outside of poxviruses. This domain is a DNA-binding winged HTH domain. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.96 0.72 -3.72 41 1059 2012-10-04 14:01:12 2003-04-07 12:59:11 11 32 639 6 337 979 69 91.70 20 16.60 CHANGED pssDslh-Fhtphhs..............tu.hlPs.phlattYhpahccpGath.......LstppFppclsp.hhpt...s.....ahp++p+..................sthspthhphhhhp-h.s ...........................................................ps-shhtFht.h..........................................h..h.hsp..p.LYcsYhtahppp..Gats.......hohppFtpslpp.hhp.......t..........apc+p............................................................................................ 0 258 294 317 +3942 PF04805 Pox_E10 E10-like protein conserved region waterfield DI, Finn RD anon Pfam-B_6357 (release 7.5) Family Family of poxvirus proteins. 25.60 25.60 25.70 94.90 25.00 25.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.47 0.72 -4.45 11 59 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 43 21 69.90 74 73.15 CHANGED slEtCK++LYsIssTLPCssCRtHApcAIpcNNlMSSsDlNYIYaFFIuLFNNLssDspa+.IDlpKVpPL .NIEuCKRKLYTIVSTLPCPACRRHAThAIE-NNlMSSsDLNYIYYFFI+LFNNLASDPKYs.IDlsKV+PL. 0 0 0 0 +3943 PF04497 Pox_E2-like Pox_E2; Poxviridae protein Finn RD anon Pfam-B_3979 (release 7.5) Family This family of proteins is restricted to Poxviridae. It contains a number of differently named uncharacterised proteins. 30.00 30.00 32.80 32.70 29.70 28.10 hmmbuild -o /dev/null HMM SEED 728 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.09 0.70 -6.50 27 184 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 43 0 0 167 0 610.30 29 95.71 CHANGED M...h..spplRcuhtphtsp....................phsaccLshpc..shpLlthGlH.spLPcphYscsl.chs.splhhFcPcpVphhDLlpllpppps.s..pphtstltaa+ppllppsshp.llp+hlphhhl.s-DDlch....llsc...t.hss.hLhplNsp.ltph..hhhScptlcplhpp.shphhshLYp+.s.hshphLhphhtcasIsPsNpulhp.p..shstsl-llpphsppp.......hlchlspplhsscphhphlh......IhtsplsshhshspcaLhsphsc............GlhsshhFs...........hspht..ptlocc..........phphIscaIshYchp..................uplhsph.c-hlpcphshtlh....pst.hlphhphshpsptshhtshp.sh--ll...........paLDsltlpsh.chphs.l..-.................hhhs.s.haN..spllc.hlpphuhsptKhptLhht..............hs.s.tshthhhphht..psshhhsst.hhsolh.h..............ht..t.hhhh...p...t..p.sh.ststpsphp.h..h.p..thph.hht........p.........................s.th.ss-scllsplaDlsphApaGll.hshhhhss.WhPlhshl........................phsphssssph.psslhpls.schscatsh........shst.lsshasthsshhsslhhYllsuhhh.....s.pp..thppFlppllsshhcGhtlt.........hspslpsss.p.hhclcphls.ts.ssh.hhhphhL+ssltlhccl ......................M..h..sp.hR+Ah.c..s+....................phshthlppcc..tlsLlchGhH.shLPKsLY.psl.p.s..plhhF.PchlsshDll.pslpp.p.p.s..-hhtshl.aHKsslhhsu.hs.llhhhh.Y.ll.ocsDlca..........lhpp....ss.hthhL.hINt..l.hh..hpho.sEIlsllpc.shhhh..lYpp.s.lD.chlhph.DcYsIsPlpsulhc.p..s.EhhIcllhthshsp.......hlshlsps.hhpsshhphIhs.....hhpt+lshhhshlp-aLpshh.D.......................sIhushhFp...................lshhs......lTts.............................E..alph.hshYshh...................................hp.hs...p.Dhlh.c.shpIs......poss.l.hhphP..hp..p.shlhp.p.ph.DIl...........palDphchso..chuh-sl..-.................hhho.ohhhN..ssllphhhhp.h.h.hhhhhLhh...............hshpp..sshhLpth...+Ghhhh.sh.slhoh.................h.h.hp..ph.....p.hpsc.lsh.sshppsh.h.hhphpul...............................................hsp-ctlh.plaDlsRhA.aGhh..s.hh.ps.WsPlsphl........................chp-hhs.t+..phsllslhsp..hchps.........ph..slss..AtlshhhsTll.Yllhs.hh.....slpp...sccFV.pllphhhcuhtl..........L.p.lpsss.h..pclc-hsspGs.ss..aLh+.plphh..lh-cl......................................................................................................... 0 0 0 0 +3944 PF04656 Pox_E6 Pox virus E6 protein Waterfield DI, Finn RD anon Pfam-B_4392 (release 7.5) Family Family of pox virus E6 proteins. 25.00 25.00 25.70 195.00 20.50 20.20 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.73 0.70 -6.31 11 74 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 45 0 0 63 0 552.40 72 100.00 CHANGED MDFIRRKYLIYTIENcIDFLRsElhsKlSNFoLNHVLAlKYLlhsFs+sVlTKDVLuNsNFaVFLHhV+CscVY-hVL+pSFDlPsLYlKuLlKNYshFschIpsYKphspcLl.Dc+FlElschusphp-lIGVNYDhtLNPLFHpGEPI+sMEIIYuKLFKKTcF++V+KlpVlRLLIWAYLoKpDTGhcFsDNDsQDlYTLaQKoGs.llpS-MTEpFKEYIFs..ss+TSYWlWLpEsIhNDsclYhctsApoMY-KlLSYIYSElKQGRVNKNMLKLVYlFEsDp.I+ullLpIIYGVPGDILSIIDo+DEsWKpYFluFYK-NFIDG+TFsSspoF.-DLF+VVA+IDPEYFDsc+.IhSlFcpcP-plchFDchDINsTYlSplIYpTpDlsLpslEchptCQIYN-DTcYaIKEYNTYLYLsE-DPhVl.cGhLsKLSsl.sps++..hoLFScsILKYYlDG+LAslGLVLssYcsDlll+lloHLKClEDVTsFlcausC+NsSllPSllRTILuNFNluIIlLFp+FLRENlaaVEpaLD+opHLTpNDKKYILplIppGRS .MDFIRRKYLIYTVENNIDFLKDDsLSKVNNFTLNHVLALKYLVSNFPQHVITKDVLANTNFFVFlHMVRCCKVYEAVLRHAFDAPTLYVKALTKNYLSFSNAIQSYKETVHKLTQDEKFLEVAcYh-ELGELIGVNYDLVLNPLFHGGEPIKDMEIIFlKLFKKTDFKsVKKLSVIRLLIWAYLSKKDTGIEFADNDRQDIYTLFQpT.GR.IVHSNLTETFRDYIFP..GDKTSYWVWLNESIANDADIVLNRsAITMYDKI...LSYIYSEIKQGRVNKNMLKLVYIFEPEKDIRELLLEIIYDIPGDILSIIDuKNDDWKKYFISFYKuNFINGNTFISDRTFN-DLFRVVVpIDPEYFDNER.IhSLFSTSA.s-IKRFDELDINNSYISNIIYEVNDITL-TMD-MKKCQIFNEDTsYYlKEYNTYLFLpEsDPMVI-NGILKKLSSIKoKSRR..LNLFSKNILKYYLDGQLARLGLVLDDYKGDLLVKhIsHLKsVEDVSAFVRFSTcKNPSILPSLI+TILASYNISIIVLFQ+FLRDNLYHVEcFLDKSlHLTKsDKKYILQLIRHGRS. 0 0 0 0 +3945 PF03394 Pox_E8 Poxvirus E8 protein Mifsud W anon Pfam-B_3759 (release 6.6) Family \N 20.50 20.50 20.70 89.40 20.20 16.80 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.68 0.70 -5.00 8 66 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 41 0 0 52 0 236.70 77 85.75 CHANGED h+NTYLYHNYAYGWIPETAlWSSRaAsLDlTDYYPITLGLLKKFEFMhSLa+GP...s.sYpsKINTEFlupGSFhGRalsaa++FoILPTcEFISFLLLTSIPIYNILFaFKsTpFDhsKHoLFuuhYosss+HlELA+Yh++uGDYKPLFu+Lc-..cslYo.....u...shPlshtsll+spsPsu.p.ssS.DYETLANLSAILYaTcYDPVLhFLhFYlPGlSVTTKITPuVEYLMcKLsLsKcDVsLl MKNTYLFDNYAYGWIPETAIWSSRYANLDASDYYPISLGLLKKFEFLMSLYKGP...IPVYEEKVNTEFIANGSFSGRYVSYLRKFSALPTNEFISFLLLTSIPIYNILFWFKNTQFDITKHTLFRYVYTDNsKHLALARYh+QTGDYKPLFSRLKE..NYIFT......G...PVPIGI+DIsHPNLSRA.R.SPS.DYETLANISTILYFTKYDPVLMFLLFYVPGYSITTKITPAVEYLMDKLsLTKsDVpLL. 0 0 0 0 +3946 PF04943 Pox_F11 Poxvirus F11 protein Finn RD anon Pfam-B_6911 (release 7.6) Family The protein F11 is an early virus protein. 24.10 24.10 24.10 24.20 20.30 24.00 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.06 0.70 -5.60 7 107 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 36 0 1 85 0 197.40 38 88.29 CHANGED o.tphss......hslp.p............Phhhlspussp.l....Lhsslalp..slcslhspsstllscssphpp.s..p..apht.l.l.sshhhchlhDtpsaFcls-shll+LcHGshahptshasssshuFsAlICl+NpGhSulhVsposhlppshppGssllhRSspulphLPQIuGcuhYLIlplsPTcchhcphh.slhssspspsu.........p.phthsuppc+cshchlsslIphpl.LEchhhchs....hhpEhtshYsslhh........................................pppppp.lpshhppscshhppuhssh.hh..........tpsspssh+cchLhtthp.+Dlp.lhsshsp...............laschtclhpclsshls..................tps-hlppaIhthlspspssh.psplhpsL.....hslsslsahl ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1 1 1 +3947 PF03337 Pox_F12L Poxvirus F12L protein Mifsud W anon Pfam-B_3082 (release 6.5) Family \N 25.00 25.00 87.60 87.40 24.50 17.70 hmmbuild -o /dev/null HMM SEED 651 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.27 0.70 -6.61 11 87 2012-10-02 01:06:00 2003-04-07 12:59:11 8 1 43 0 0 83 0 615.20 60 99.36 CHANGED shhsplpssss..t....plsssLpch.chsllLA+spcGKGlllhusNlptspsh...lslopLcllulsthl-shssPstPhspLhIsuh-o-.saYSPcTSpo.PLlDIL++Ruppps..-LtpAl.tc.hs.pussSloEINpWhspsGLt+YRFlsacDt+thph....sphTllD-MsIsaIGpHhIWVKDh.sYsRPElDI.LsaDlcsluptspWucl.pshstphhplhuhhlpuhlos..sGPplYMIoTY.PG+sFhshsSsK.Llp-FLcWlp-.hhtshp....................TlsLlGahSSlFDhPLL+ssasps.pGWshl....usssllScsGh+lhllDhupFuhuh.olp-YCpaWsusshshscD..llocpEs+hphchlccsuscsspsLasAshsppssLsplhsssshhtFssL-DMllspuhhhuApp.stthYhPstssshshlppulptctVpoh..ssss..t.hppa+L+SllclltsphYPlG+PpaVpp.hscGKLYIALCcVTh+ssl+IPllassc.sEsshoF.ssLTSVDIphAt+lGGYpI+.lsALpW-cShpl.+sslpchhstlscl...spospLhsplsp....scLh.pspss.hsp..hl..atAFAASYCRtplHslIcclDSHalGsaVh+HsYpclalpsssststshLSphhcl ..........................................................................s...QhLMKTAN......NYETIEILRNYLRLYIILARNEEG+GILIYDDNIDSlMSM...MNIT+LEVIGLT.HCTKLRSSPPIP...MSRLFMDEIDHE.SYYSPKTScY.PLIDIIRKRSHEQG..DIALAL.E+.Y.sIEN..TDSISEINEWLSSKGLACYRFVKFND.Y.RKQhhh+.ho+tTIVDSMIIGHIGHHYIWIKNLETYTRPEIDV.LPFDIKhISRDELWARI.SSSLDQTHIKTIAVSVYGAITD..NGPhPYMISTY.PGNTFVNFNSVKsLILsFLDWIKD.IM.TSTR....................TIILVGYMSNLFDIPLLTVYWPNN.CGWKIY.....NNpLISSDGARVIWMDAYKFSCGL.SLQDYCYHWGSK.PESRPFD..LIKKsDAKRNhKSlVKESMsSLKSLYEAFcTQSGALEVLMSPCRM.FSFSRIEDMFLTSVINRVScNTGMGMYYPTNDIssLFIESSICLDYIIVNNQcS.....NKYRIKSVLDIISSKQYPAGRPNYVKN..GTKGKLYIALCKVTVP.TNDHIPVVYHDD..DNTTTFITVLTSVDIETAhR.AGYSIVELGALQWD-NIPcLK-..sLLDSIKhIYDL.NssTTN....NLLEQLIE............N.INFNNSSII.LFYTFAISYCRAFIYSIMETIDshYISQF....SYKELYlsSSaKDINEsMSQMVKL................... 0 0 0 0 +3948 PF04596 Pox_F15 Poxvirus protein F15 Mifsud W anon Pfam-B_5182 (release 7.5) Family \N 25.00 25.00 167.60 167.30 16.70 16.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.85 0.71 -4.58 11 51 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 38 0 0 33 0 136.20 68 88.25 CHANGED LsPF+sMs+IKINpc-NClLGNRCFVKlscV+ahPpsuls....Topolph+sacFTLsELLYSPFHFpQsQaQYLhPuFVLpCI-EAs+NpppC+YChss+ssc..suLNINIFlPThspphYlIIGLRIKsFWsssFcIE M+PFKNMNKIsINpcDNCILANRCFVKIDTsRYIPssSIs....oSshIRIRNHDFTLSELLYSPFHFQQPQFQYLLPGFVLTCIDKsoKppKcCKYCISNRGDD..DSLSINlFIPTINKSIYIIIGLRhKsFWKsKFEIE. 0 0 0 0 +3949 PF04708 Pox_F16 Poxvirus F16 protein Mifsud W anon Pfam-B_5863 (release 7.5) Family \N 25.00 25.00 25.40 33.50 23.90 24.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.49 0.70 -5.06 9 65 2009-09-11 09:28:37 2003-04-07 12:59:11 7 1 39 0 0 56 0 212.30 56 91.97 CHANGED cpAAIlTSLlSLFDsSlpaQlchC+pYCp.LuhpVlhEl+EFGYIs-csLpocpW.ssltsssIshlVFYQlKQLoISscpLYshhh+.sc.ssl+lYFV+DsLsFDG..hPPoF+plshslphtsRKKl+DlIsLlshpTsscpllpcFlpsNFGsVctLLpllcpssLWlphhLsppcp+t..................hshhpa+pFlsKl+clct.hpsp.l-cICsshpsIsl ..............M.KVVIVTSVuSLLDASIQFQKTACRHHCNYLSMQlVKEI-EFGTINEKNLEFcTWKDVIQNDEIDALVFYRVKQISISsGVLYcSMMR.NRTKPISMYFVRDCLAFDG..sPPSFRMTSCNINAYNRsKIKDLIIL.MNMKT.CNKKIIGEFIIDNFGSVcuLLSIINSNVTWVTSVINNSNGRGINIRVS........NNKMLTITSFRRFVNKLKhYKTTKCsSQLDNLCT-hNKMcI.... 0 0 0 0 +3950 PF04767 Pox_F17 DNA-binding 11 kDa phosphoprotein Waterfield DI, Finn RD anon Pfam-B_6128 (release 7.5) Family Family of poxvirus proteins required for virus morphogenesis. Protein function necessary for proteolytic processing of the major viral structural proteins, P4a and P4b [1]. 25.00 25.00 32.90 32.20 18.10 17.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.27 0.72 -3.74 11 57 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 37 0 95.10 63 94.23 CHANGED hhhcoPFhlsTc.EGRYLVLKAlKlCslRTV-Ccus+ASCVLKVEKP.susC-R..ssoPs.Rstht.......osPs.+sssplPFMRTNhLpsl.usNR.NssuRlLu .u..AHTPFYINTK.EGRYLVLKAVKVCDVRTVECEGS.....KASCVLKVDKP.SssC.ERR......PoSPu.RCtRh.......ssPu.....pQVPFMRTsMLpshFAsNR.NV.uSRlL.p 0 0 0 0 +3951 PF04599 Pox_G5 Poxvirus G5 protein Mifsud W anon Pfam-B_5216 (release 7.5) Family This protein has been predicted to be related to the FEN-1 endonuclease [1]. 20.10 20.10 45.60 45.30 19.00 18.40 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.60 0.70 -5.78 11 78 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 70 4 413.50 57 96.90 CHANGED MGIKNLKoLLLcptsLppl...csh.+s.hstIFVDTMSlFholAasVssl--LpspFhcalpta.ppsG+VTLFlDRGsIsIKcsLREKR.+suhcNThKRKphElcp.........LpstIstLslsDhhYEEhKT-lcL+IcKLpFa.FLuspsNlKhs.L-csLstl...-sVsIlYCDslDAEFVMCpcAKcls.poGpWPllISoDQDTLLhuSsDshsKI..ItohsphYpalPssco+YLoKLlsLsNGCDaFsGLYGhsITpKoLpp..IpL....F-DFol-Nll+....SLshKNYsp.......Ktos+hlD.....lDpIIcFIscY.................osLDcslY.pppsssslolQEFlFoALsp+Wpphcsohlc.ssulhssLhslLc..P++cIsps-lpplpphlpcs.hp++sslssIpolssIFGYchspsssllhGIhshpslhLsacc...pFYFNspsIIc ...MGIKNLKSLLLEs+SLTlL...D-slh+laNGIFVDTMSIYIAVApCVpNLEELsolFlKYVNuWl..K..KuGHVTLFIDRGSI+IKQsVRDKR.RKhSK.TpcRKhLELEK..............................................................ssu-Ip..NV.ouhMtEEIKAEhQLKIDKLoFQIYLSDuDNIKho.LNElLT+h.psENVTlaYCDchDAEFVMCLEAKspapoTGEWPLIISTDQDTMLFuSsDsHPKh..IKslTQLFKFlPoAEssYLuKLTALVNGCDFFPGLYGtSITssNLNK..IQL....FoDFTIDNIVs....SLA...IKNYaR............K.TNosVD.....VcNIVTFINcY.................ANLDD.VY.ualP..PCQCTVQEFIFSALcEKWNcF.....KsSYLc.sVPLsCpLhYALE..PRKEIcVSEVKTLuohIDh-..NsKscI-sIKSIoSIFGYuscN..ss..oI.VFGI.hK.cNLLLulNs...oFYFNsollI.o................ 0 0 0 0 +3952 PF04787 Pox_H7 Late protein H7 Waterfield DI, Finn RD anon Pfam-B_6266 (release 7.5) Family Family of poxvirus late H7 proteins. 22.10 22.10 22.40 29.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.76 0.71 -4.40 9 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 53 0 142.60 56 98.71 CHANGED MDc+L+oluhThFpGELoThDIhsLthalhsppPp-TlFShc.csspFhIDFcY.DssLASsYlspphpsI.s-cYhsaushIAcELTNh-IIp-DlssYIpsSc+LKRhIKhY+s...pKps++IppssK+LKlAhp+GlDY-YIK-sh ...........MDKRhKSLAMTuFaGELsTLDIMALIMpIFK+HPNNTIFSVD.+.DGpFhIDFEY.DsYKA........SpYLDlsLTPIouDECKoHASSIAcpLsssDIIKEDIu-YIKToP+LKRFIKKYR....NRScsRIScDocKLKIALuKGIDYEYIKDAC........ 1 0 0 0 +3953 PF03289 Pox_I1 Poxvirus protein I1 Mifsud W anon Pfam-B_4306 (release 6.5) Family \N 25.00 25.00 184.70 184.50 19.20 17.70 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.12 0.70 -5.59 11 65 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 42 0 0 54 0 307.60 73 99.43 CHANGED MEp..-QLVLNSISA+ALKuYhsoKIs-hVDELVs+KasQKKKSpsK+hEsRIPlDLI+psFV++FpLcsY+sGlLsSLIsSLVENNYFop.DGKLs-sucpELVLsDlEK+ILupIs+sSsLYIDluDVKsLAuRLKssAssFpFssppYhLEsDKlE-lINpLs+NssIhLDEKsSlKDShYhls-ELL-VLKsRLFRCPQVKDNhISRTRLYDYFsRlTKp-EsKIYVILKDt+IAcILGIETVplGsFlYTKHShLlsoISuplDRYSK+Fp-sFYpsIAEaV..KDNEKlNVSKVVEsLhVPslph-t...E .s.EhEDQLVFNSISARALKAYFTAKINEMVDELVTRKCPQKKKSQAKKPElRIPVDLVKSSFVKKFGLsNY.GGILISLINSLVENNFFTK.DGKLDDTGKKELVLTDVEKRILNsIDKSSPLYIDISDVKVLAARLKRSATpFsFNGHTY+LENDKIEDLINQLVKDEuIQLDEKSSIKDSMYVIPDELIDVLKTRLFRSPQVKDNIISRTRLYDYFTRVTKRDESSIYVILKDPRIAsILSLETVKhGAFhYTKHSMLTNAISS+VDRYSKKFQESFYEDIAEFV..KENERVNVSRVVECLTVPNITIuSNs.E.. 0 0 0 0 +3954 PF04661 Pox_I3 Poxvirus I3 ssDNA-binding protein Mifsud W anon Pfam-B_5571 (release 7.5) Family \N 18.60 18.60 18.80 18.90 18.30 17.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.80 0.70 -5.60 11 69 2009-09-14 12:26:09 2003-04-07 12:59:11 7 1 43 0 0 57 0 259.30 58 95.83 CHANGED ssphppssscsst.hTCssslEasKSLSpSspKsIEuVpLosSQYPSCSsIsIsLs-oLuSKhsSsaIhlEGEuKIY+NKKsstpu......-sYFLKI+PouASPhLYQLLEsIYsNI+csp+lPsSLssl.slssh-EKTFpcGslYI..NKhsGAllEYpssG.sputlpolscElEsLuKRDtQhuKsIlsPIVFYRsuspsKVTFALKKlIh-R-hossVlDlsGcsp+lsMu.....Eo....sEE-hsRGLGll-..pD.........csh-E--t-..poLFNV ......................ts.p.csssuuDSlpTCuGVIEYAKSISKSNs..KCIEhVTLsuSQ.YssCSSISI+LT-SL.SSpMTSTFIhLEGEoKlYKNKucpsRS......DGYFLKI.KlTAASPMLYQLLEuVYGNIKcpcRIPsSL+uL.sVETIsEKTFKDEs.IFI..NKLNGAhVEYlSsG.ppSIlRSI-sELEuLSKR-+QlAKAIIsPlVFYRSGsETKITFALKKLII-REssANVIGLsG-SERVSMT.....Es....sEEDluRsLGlVDl.-D.........EhcEDsDcE..cslFNV....................... 2 0 0 0 +3955 PF04713 Pox_I5 Poxvirus protein I5 Mifsud W anon Pfam-B_5901 (release 7.5) Family \N 25.00 25.00 73.10 72.90 23.10 22.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.36 0.72 -4.27 11 51 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 32 0 76.80 53 97.05 CHANGED hlss+ElhosIGlTlLhllMlloGuALlhKplsP.a+hlshRShshsRVlshLEalulllFIPGTlsLYuAYl+pLhh ...MhsAhslLoAIGITlLMLLMVISGuAhIlKclsP.pclhoMpSlpFNRsVTIhcYlulhIaIPGTIILYusYlKoLh... 0 0 0 0 +3956 PF04595 Pox_I6 Poxvirus I6-like family Mifsud W anon Pfam-B_5073 (release 7.5) & Pfam-B_6224 (release 8.0) Family This family includes I6 proteins as well as the related F5L proteins. 20.80 20.80 37.30 34.40 20.70 20.00 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.89 0.70 -5.68 13 110 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 43 0 0 95 0 309.40 41 87.11 CHANGED +pss..............ISFsFss.FYYsN-sLFsKPpNoL-DVsKSlllhpoFcYEpaVIpull+hL..+thualhDlaFlPlGWLsG....t--s....sspHlsl+...llh.oss..htsl+spl+-hLuhaslhshsl...hps-pclsIspFshsts....hPssVl.......ShaPFDs-t.lLlVlFFGpapDuYCGIoY.sss+-pl.tllphLtPhVuElplloD-lsRFsol+lhss..pss+tFPcsp.......ltoICEll+tF-cpcFsssssss.s....shssalPK+lVSllDLPSsV-I+CtScsGlDalTHIssK+LsslLlIsKDsFl+ssohsGTFKKENllW+GpYTYR......Ihc ................s...hIpaslDs.hahCscslho+.spT...h.cpllhhto.shpphhIpuhIKhl........hslhhlslu.Lp.....h-Ds....s.oHlh.t...lIh.soh...ISlKuplI-hhs.hshhshsh...hpschchpIsoFslssS....hPhphI.......pahPFsTD.....hhahG.hpc.hsG.sY................Ph.S.h..lSsphsRho.l+.hNs..hSshhFspNh.......lpsICE...pa-ths.sss..po.o......sups.lspclsShhs.....plps.scsssDa.T.hNscpLhhIllI.........Th.uhhlh..llhtuIhhY+............ 1 0 0 0 +3958 PF03338 Pox_J1 Poxvirus J1 protein Mifsud W anon Pfam-B_3556 (release 6.5) Family \N 20.70 20.70 21.70 76.40 20.30 19.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.79 0.71 -4.51 12 67 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 48 0 0 46 0 143.40 63 94.92 CHANGED MDH.ppYLLThFLp-D-SFF+YluppsD-pAhuDlpsIsphLDFLLulLIRSK-KLEulGahYEPLSEp.a+slh-Fp.Dh+sL+pLFs+hsl.phs.spplpls+GYluDFVlSlhRLp+phths.ss..psspYIDPpcshthsNlluILp MDH.sQYLLTMFFsDDDSFFKYLAuQDDEouLSDIhpITpYLDFLLhLLIpSKsKLEAVGHCYESLSEE.YRpLs+FT.D.p-FK+LFNKlPI..Vo.DuRV+LNKGYLhDFVISLMRhKKEsuLs.ossh-PlRYIDPR+DIuFuNlhsIL+. 1 0 0 0 +3959 PF03339 Pox_L3_FP4 Poxvirus L3/FP4 protein Mifsud W anon Pfam-B_3380 (release 6.5) Family \N 25.00 25.00 30.90 156.20 22.70 22.60 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.95 0.70 -5.97 5 71 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 43 0 0 64 0 314.20 61 90.71 CHANGED MptNNtsp-sscspspsplPps.s.ppsppshh....-..cFlEpRLCsYE+Eps.hph-ChluhhaslpKQVscE.......EssCWlELSuLV+u+KALGFPLlYusKcaoa.G+sLYFEpFKs...s+Vp+LTsso+ClsDsllFQIVlILYSLYK+sIaSD-FlFDLVSIPRSTIohSVNQLVFslsTDsLVVLSls.TRLY+AcLPQSCYLsYlau+ssLA.++shEooNY.FFEWFI+NHlchLo+QslDIFKlKK+YlTsspIsRLsEPGTLVYVh+-DhalhGITLT-VSloDNVRVLFSsDGt..slLEIDDFSlcDVFsAGELlsRSQsooI ..................t.+hNsppRh.p.s.hsps+sc.pss.ss.+pcNK..CR-......EsuDFINIRLCAYEKEYC...NDGYLSsA.YYMLKQVDDE.......EhSCWuELSSLVRSRKAVGFPLLKuAKRI.S+....G.S.MLYFEQFKN....oKVV+LTP.QlK..CLsDoVIFQsVVILYSMYKRsIYS.NEFCFDLVSIPRTNIVFSVNQLMFNICTDlLVVLSICGNRLYRTNLPQSCYLNFIHuH.ETIA.RRGY...E+SNY.FFEWLIKNHlSLLTKQshDI.hKVKKKYATGAPVNRLLEPGTLVYVP.KEDhYalGISLTDVSISDNVRVLFSTDG...hVLEIEDFNI+clFMAGEhFVRSQSSTI... 0 0 0 0 +3960 PF00485 PRK Phosphoribulokinase / Uridine kinase family Finn RD anon Prosite Domain In Arabidopsis the region carries two binding domains, a phosphoribosylpyrophosphate-binding domain and, at the very C-terminus, a uracil-binding domain. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.17 0.71 -4.75 12 8151 2012-10-05 12:31:08 2003-04-07 12:59:11 13 32 4282 57 1728 5333 1269 168.80 27 60.59 CHANGED lIulsGsSGuGpoost+phsplFst..l.sh...............hsuhhphsps..chpht-cpsttspphoahuPcANsFDLhhphh+shtputsscp.hYsHsssptss...........pph.tssclLhhEGLHuhhspc...ltpLhDhplhlsss.slchhpKlpRDhspRG+ohEulhsoI..tRhPDassYIsPQhppsDlshpplPsstsu ...........................................................................................lIul.uGusuuG.....KoT.hu....ptlhp.h...lt.t.......ht...................................h..l.spD...s..a..h..h...s..t..p..h..h.......p.....c...t...h........h...t.....t.....t...h....s...h.......t.......t...P.....p......uh....Dh....p........h...L..h....p.h...l....p...p....l....t......p.........G.........p.......s....l....p..h..........P...h...Y....s.....ah....h.....t...s.hh..............................tp.....h...h..p..s.....s..c...l..l.IlE...G....l......h.......s...h.......t...s...tt..........................l..p.c...h.h.D...h...pl.....a.............V.............D.............s.............s...........................-..............l.............p.............h.............h.p+..............l........RD.h.........R.s.......h...s..h...p....tl...h..tp..........h...hhh.....a.t..a..l.tP..p..hphscl.h.........h............................................................................................................................. 0 527 1013 1414 +3961 PF04872 Pox_L5 Poxvirus L5 protein family Kerrison ND anon Pfam-B_6088 (release 7.6) Family This family includes variola (smallpox) and vaccinia virus L5 proteins. However, not all proteins in this family are called L5. L5 is thought to contain a metal-binding region [1]. 21.90 21.90 22.10 22.70 21.80 21.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.66 0.72 -4.34 12 65 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 0 0 46 0 76.90 55 61.63 CHANGED ElhhhFp.+s+hs.sPI-ph.pco.LhCctstLhI.sLP..ssphsALulstpPIshpsCcsLLpSING.SppVSLsDlLpR .............ELsMhFh.Kp+lP.DP.ID+l.c+usLuC--DKLMIhGLP.hsspssALSIN.u+PIVYKcCscLL+SING.SQ.VSLNDlLRR... 0 0 0 0 +3962 PF03356 Pox_LP_H2 Viral late protein H2 Finn RD anon Pfam-B_3929 (release 6.5) Family All Members of this family show similarity to the vaccinia virus late protein H2. This protein is often referred to by its gene name of H2R. Members from this family all belong to the viral taxon Poxviridae. 21.40 21.40 21.40 76.20 21.30 21.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.95 0.71 -4.61 12 119 2009-11-17 14:52:54 2003-04-07 12:59:11 10 1 51 0 0 56 0 179.90 64 97.56 CHANGED MDcTTLsVNGlEL-YsRp+tscslphA+sSTlhFFlllLhlSslLhaaQsScNslhsELs+YtRIKsslpuW+PLVpuKT+lES-RGRhhuhs.+sDhFpFpClDFGsYalPlRLDppTFLPQAIRRGpGDGWMl+KAuchDsSAQQFCEYllts+usNsITCGhcMhsclGYSGYF.suHWCushhsll .MDKTTLoVNuspLEYlREKtscGIRuAKsSTlhFFVLILAlSuLLLWFQsSDNSlFuELsKYsRIKNsVpuW+PLV-SKTKLESD+GRLhAAG.+D-lFpFpCVDFGuYalPlRLDpsTFLPQAIRRGsGDGWMVKKAsKVDPSAQQFC-YLI+s+ScNVITCGscMhspLGYSGYFhssHWCSsh.sh.... 2 0 0 0 +3963 PF04887 Pox_M2 Poxvirus M2 protein Kerrison ND anon Pfam-B_6168 (release 7.6) Family This family includes M2 protein from variola virus. The function of this protein is not known. 25.00 25.00 64.90 64.60 20.60 20.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.38 0.71 -5.00 4 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 26 0 0 40 0 194.80 75 88.90 CHANGED pCsopcYRYW.LAupLTIGLsYsI.Ep..tEC+hc....s+hushllTGYGLpIshpITs.lspphVAuuEGhsssNpLslLLFhspchoc.tssl....plTITCh-h-CDssshcpsLssplpKN...-lhIhGSClTCVsL-T.Pspl......Nshhs+PhShhltcssuYohR.....sapcchspC.lDhccluYslC.+p .........CPPRQDYRYWYFAAELTIGVNYDINSTIIGECHMSESYIDRNANIVLTGYGLEINMTIMD.TDQRFVAAAEGVGKDNKLSVhLFTTQRLDK...VHHNI....SVTITCMEMNCGTTKYc.SDLPESI.H+pSSCDITINGSCVTCVNLETDPTKI......NPHYLH.PKDKYLYHNScYuMRGSYGVTFIDELN...QCLLDIKELSYDICYRE...... 0 0 0 0 +3964 PF03341 Pox_mRNA-cap Poxvirus mRNA capping enzyme, small subunit Mifsud W anon Pfam-B_3728 (release 6.5) Family The small subunit of the poxvirus mRNA capping enzyme has been found to have a structure which suggests that it started life as an RNA cap 2-prime O-methyltransferase. It has subsequently evolved to a catalytically inactive form that has been retained in order to help stabilise the large subunit, D1, and to enhance its methyltransferase activity through an allosteric mechanism [2]. 25.00 25.00 45.50 45.40 19.80 19.20 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.72 0.70 -5.36 9 57 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 42 4 0 43 0 279.60 73 99.50 CHANGED h-pltc.I+-GltshhPFY-oLP-LsLshGKs.LPSLEYGANYFLQLS+VNDLNRhsTDhLSLaTHDLhhsEoDLEKVYE.hsIcSVKoYGKoI+ADAVVsDLSA+NKLFKKERshlKSNNaLsENNLYluDYcMlTFEVFRPLF-huoEKaCIlKLPTLFGRsllsslRVYCSLFKsVRLaKssuDSWLKDSAIMVspcsaptNlscFhoalRcsTKSssW+DuNN.VpFslLcssV-+EFI-KFLsFSspVYEuLYYVHSLLYsSMTSEsKSIENEaQ+KLlKLLh ..MDcIsKNIREGsHVLLPFYEoLPELNLoLGKSPLPSLEYGANYFLQlSRVNDLNRMPTDMLpLFTHDlMlPEoDL-KVYEILpIsSVK.YGRShKADAVVADLSARNKLFK+ER-AIKSNNaLTENNLYISDYKMLTF-VFRPLFDhssEKYCIIKLPTLFGRsVIDThRlYCSLFKsVRLhKCVSDSWLKDSAIMVAS-lpKKNlDhFMSHV+SVTKSuuWKDsNs.VQFSILp-PVDoEFIsKFL-FSsRVYEALYYVHSLLYSSMTS-SKSIENcaQR+LlKLLL.. 0 0 0 0 +3966 PF03213 Pox_P35 Poxvirus P35 protein Mifsud W anon Pfam-B_2785 (release 6.5) Family \N 19.20 19.20 19.20 20.10 18.60 18.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.03 0.70 -5.43 12 130 2012-10-03 05:28:31 2003-04-07 12:59:11 9 3 65 0 1 104 12 279.60 48 99.48 CHANGED MAtsp.....clslYVIPll.GRssp-VlPphct...h..tchcslKc..........ss...psp.thphs............+hh.Wp......Gsl-s...ac-.......YFSuhCphhCopEhKpolA+HhSLW-ph...spsshpss-scallVlEDDNTlpc...lp..sl+shIpuMp-psIDlLQLREshpssssRs.hs.t.tp.shasYsGGYDhSLSAYIIRluoAh+lhspIhcptGlSsuLshElh+lEpcLtlNR.VLssuspYVpH-h+hlsc+Rss.ch+sulhsRlusWluppaPshhYhlopPLFSFFGlFDIsllGllhlLaIllLlIFslNSKLLWFLuGhhhoYll ...............................................chslhVlPll.sRssu-shPpl+p...h....pc.hcslKc.......hsp.tsh.h.pscsch.hsh.................................thl.Wpt.....Gslcs...asc.......aFSuhCsshCTcEsKpsIA+HhuLWcSh...h.s-hcspcscalVllEsDNslcc.....lphlcsllpsMp-ppIDIhQhREhhpssp.s+s.h.s.p.ts.thasYs.GGYDhSLSAYIIRlssshplhscIIKstGlSouh.hElh+lEpchplNR.llssus+YV.H-.+hlscpRhp.ph+sshWsRluphhu+paPshhYhhopPLlSFFGlFDIsllGlllILFIlhMl..IFslsS.KLLWFLsGhhhThhl.................................................................................. 1 0 1 1 +3967 PF03395 Pox_P4A Poxvirus P4A protein Mifsud W anon Pfam-B_2985 (release 6.6) Family \N 25.00 25.00 49.40 49.30 18.50 18.10 hmmbuild -o /dev/null HMM SEED 888 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.75 0.70 -13.56 0.70 -7.01 4 92 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 46 0 0 84 0 847.60 67 98.38 CHANGED QLEcSEYlF+llSTlLPplC...............LDYKV--tLppTFVHPFDslh.sshGsls+t-plpsulppLGINYLlsshsp..+LFshllssspIthhssshths......................................................................................................................................................................s+sNPllNsHoFs-LPsFTpcLlphRhps.Et+ARFhGGYlhsctus....................ssshs.h....cYPsLsF-NTYhhNlLYpssIss.ht..F+..................................................................................................................................A+sssGlhhhpDasNLlulRsLlospspsRF-psashpphApcaslsls.s.s.sclDLhoMso........................................................................................KphlhahQaFsDpYtch.hpaNGsslh.sc.clhslslSh+YQuhIs+Lspha.slPlhNshslhshstspsttphslsh.slpalslssNlsaFlshhN.......hlA+EpRs..ssLcs.p.ShFWDGlDYp-YKpKplp-hhFIsuoCYVFuLap+NsTTaCShLoDhluAspTP.RVCllPRsluu.+TsscLluEsLcSlNshoh+-FP+pssStl.HIGLSEpGFMRFFQLLRLlss+s.EoAlKEVlhsYsGlKhuDpGsPah.I+p-SYpsFlhLLFuuMGF+VoV+pSlhGSsNhohIolRP.RVo+pYIsshLhKsSCS+s-A-KLlousa-LLsFMlSsush+DhpSYh.tRphssshaauG....s.ps--..tTIIphopPlslLDRlslpGlhuAsTlsEhLssDhFtPENthFKsNLpthIpSsploG..-sIhptMPhslLD+llTs.......AGssp.VSls-llDNIss.o.DCDtTN-IsDhINouLKcohsKcNshlsSpshsuVANpScppLtDl+pS.oC+hAslFKsLA+SIYTlERIFNs+lSD-VKh-hLEKhKsFosISpSLYsD.......LIulEslKAlLYIlKRSG+sl-cTpIssD...-l+KSYplI+PKIhshhNYYsphSRsYFppMKKNLNMpDsstsS .....QLEDSEYLFRIVSTVLPHLC................LDYKVCDpLKTTFVHPFDlLLNNSLGSVTKQDEL.QAAISKLGINYLIDTTS.pELKLFNV.sLNAGNIDlINssINIS........................................................................................................................................................................SETNPIINTHSFYDLPPFTQaLLNIRLTDTEYRARFIGGYlKPDGSDo...................MDVLAEK....KYPDLNFDNTYLFNILYKDV.IsuP..I.cp.FK..............................................................................................................................................AKIVNGVLoRpDFDNLIGVRQYlTsp.DpsRFDssYsIsDAApHYGVNLN..TLPLPNVDLTTMPT.......................................................................................YKHLIMacQYFlss.........Y-+VsIYYNGN+hlasD.EIhsFsISMRYQSLIPRLV-.hFPDIsVNNNIVL+TRD.PQN.AsVNV.sV.sLPNhQ..FVDIs+NpKFFINFhN.........LLAKEQ.RS..TAIKV.sKSMFWDGhDYEEYKSKsLQDMMFINSTCYVFGL.YNHNNTTYCSILSDIISAEKTPIRVCLLPRVVGG.KTVTsLISETLKSISSMTIREFPRKDKS..IMHIGLSETGFMRFFQLLRLMADKPHETAIKEVVMAYVGIK.LGDKGSPYY.IRKESYQDFIYLLFASMGFKVTTRRSIMGSNNISIISIRP.RVTKQYIloTLMKoSCSKNEAEKLITSAFDLLNFMVSVSDFR...DYQSYR....Q....Y....R..........N..Y.......CPR...YF.YAGS...P.............................EGEETIICcSEPISILDRIDTRGIFSAaTINEMMDTDIFSPENKAFKNNLS+FIES.GD.ITG..EDIhCAMPYNILDRIITN........AGTCT.VSIGDM.LDNITsQS.D.CN.MTNEITDMINASLKNTISKDNNMLVSQALsSVAN+SKQp.IGDLRQS.SCKMALLFKNLATSIYTIERIFNAKVuDDVKAShLEKYKlFTDISMSLYKD.......LIAMENLKA...MLYIIRRSGC+IDDA.QITTD...DLVKSYSLIRPKILSMINYYNEMSRGYFEHMKKNLNMTDGDSl....... 0 0 0 0 +3968 PF03292 Pox_P4B Poxvirus P4B major core protein Mifsud W anon Pfam-B_4215 (release 6.5) Family \N 25.00 25.00 27.30 25.20 17.20 17.00 hmmbuild -o /dev/null HMM SEED 666 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.24 0.70 -13.22 0.70 -6.58 9 314 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 152 0 0 187 3 273.70 52 97.52 CHANGED MEoshs......lFlsu+lsLtssYsNphL.LlspsHlHtPspSlSCSlCsSLuplss.--hISAGARppR.sl+R...........R...stspsst....p.ss+psss.h........shVPIDEluSTpDWpl+LR+DGsAIA+YLpssKsDlpNFTIQDhlslM+KLNIhRosRsELFELLuHVKuoLosoSlSVKsTHPLVLIau+ucP+IG-QhKEL-+lYSsSpYphLLSTTRFQShHFsDMSSSuDLhFca+ssDSshFlHPIhhALFGlKLPALENsFVaGDSYSLLpQLach+KV+P-NYMLLVNRLTE-uPIlhTGVsDslSTEIQRAslHTMIRKhIhNlRMGIFYCp---ulDsaLMKIIHssCSplMoDEEQhLASILSIVGF+PsLVSVs+Puhussa....DMpLQoVPYIVVsPhKMITTSssPISINosslaSLTaDutoGRVlFsPsshuYtttsssssssshssh...sssshtpshsSPVIVNGsLlFYVERRQsKNhhuGECYTGaRSlIsDpPI-Vup-lslNGIMYRL+SAVCYKlGD...phh.ssCs.....uuDIFLKGaYTILFTEhGPWhYDPLSlasKuuR-uRLhRAhKNpYt+ps..sthD-up.FY-WlKG-Gus.hhtuKQQ.LMNHhsMF-DDLLoMEEAMSLISRpCCILIYAQDY-PYloAKsIo-lF .................................................................................................................................................................................................................................................t.............shlsIDElTSTHDWQhpLRKDusAIs+YLh-pKCDlpNF.ThQDLlpVM+pLNIIRs-RpELFELLuHVKuoLossSV...SVKsoHPLhhIYu+scs+IG-QhK.L-shasPS.cYQsLlsTTRFQSspFsD..MSo.SS.DhLFcFK.cpD..Sh..halHPIlhA............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +3969 PF03296 Pox_polyA_pol Poxvirus poly(A) polymerase nucleotidyltransferase domain Mifsud W anon Pfam-B_4019 (release 6.5) Domain \N 19.50 19.50 19.90 19.50 19.30 18.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.85 0.71 -4.65 13 65 2012-10-02 22:47:23 2003-04-07 12:59:11 8 2 45 9 1 62 42 149.10 72 31.45 CHANGED chsShcclAc...chLsShNVsshoc.cl..MGRHsVSsL....VssVNpLMEEYLRR....HNKsCICYGSYSLHLLNPcI+YGDIDILQTNuRsFLIsLAFLI+FITGpsVlLLKVPYLKNYhVL+DccssHIIDSFNIRQcTMpsIPKlLIDNIYIVDPs .........N.lTSMEELARDMLNSMNVAlIDK..uKV..MGRHNVSSL....VKNVNKLMEEYLRR....HNKSCICYGSYSLYLIN..PNI+YGDIDILQTNSRTFLID..LAFLIKFITGsNIILSKIPYL+NYMVIKDENDNHIIDSFNIRQDTMNlVPKIaIDNIYIVDPT........ 1 0 0 0 +3970 PF03294 Pox_Rap94 RNA polymerase-associated transcription specificity factor, Rap94 Mifsud W anon Pfam-B_4535 (release 6.5) Family \N 25.00 25.00 63.60 63.50 19.20 19.00 hmmbuild -o /dev/null HMM SEED 795 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.67 0.70 -13.50 0.70 -6.59 10 107 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 53 0 0 84 0 595.20 68 99.97 CHANGED M-oKESlLIEIIPKIKsYlh-sshssKSYsDFIScNKsIFllNLYNVusITEEDIRLLYsTIEQNhDlDDQTLlSIFSYIGYKFEQsl+EEIooSLthsEps.hTD-M.TaNhYsLFFNTLDMYlRQ+RINVLVND-ssuD......lsINY+..TSDLsSsF-sstEPEVREIPFNMK-hlsYVSKNlDQlRFSKKYLDFAYLCRHIGIPISK+KhNlRYlaLYclDGloIPIVI+DFLDVKYVYLcETGKsY+NsFSE-hNsSLhDWG+lIIPpL...KN++LYSYlFLSsYaL+DLF.-LIpp+-ssF+stcp.hctIpltEPtuW+c-VplEahPCEHQI+LtEAhKlDs-YFsKlNsFAsEYIYYEDGlAYC+ICGhNlPpFNlDAADVIKss..VIVoTaNKSIFLSEPYSYFVHSQRFIFNIIMSFDsIMKSQTWsMKYNINRLILNFLI-INu+RQcYEK+FusEI.K+GlFFLRLSANLFDIHsSSTELFYSuKhLNLNYIVlLVIlLNSSADFIlSYMsuKKKp........VsEooLKauISVIIYDFLlKT+ICEKsuLDTIs..LhT-VYhSIMPEELcsHapRIllEL+KLlSIpRocptPsYDV-s+h...Plssl..+FFsspslhsp.Mhshp.tch...pspslhpPs.hstsoc-shppF.cclThc--hKVLIRhNDTNAopLVhFsoHlKIEIE+KKlIIsLKs...................LFIsNsLKYYY..............SssuhYVFRFGDPFPFD--LIDppHVQaKlNsYNLLRaaLLPcSD.VFVYFucSLsR--LEYsFYhFLspYVNs.VppWIDENIo+IRELYhhNFNN ....M-oKEolLl-IIPKIKsYlhDsshssKSYsDFIupNKsIFllNLYNVssITE-DIRLLYhTIEQNhDssDpTLluIFSYIGYKFEpsl+E-hsoSL.hs-p..hTD-M.saNhYshFFsTLDhhlRQ++lslLVND-hp.GD......h.lsY+..sSDLsosFssph-Pcl+cIPFNMKshlsYlpKNlDQlRFSKKYLDFAYLCRHIGIPISK+KhNhRYlahYplDGloIPIlI+DFLDVKYVYLEpTsKhY+NsFuE-.Nsul.-WG+lIIPhl...Ks+tLYSYlFLSsaaLpshascLltpc-shFhthpp..hchItltEP.uWcccVplEhhPCEHQI+Lh-AhKlD.spYFsKlNsFspEaIYYEDGlAYCplCGhNlP.FNLDAuDVlKss..Vl.VsTaNKoIFLSEPYSYFlHSQRFIFNIIMSFDsIMKSQTWsMKYNINRLILNFLI-INS+RQcYEK+FusEI.K+GlFFLRLSANLF-.psSSTELFYsuKhLNLNYIVsLVIlLNSSADFIlSYMpSKpKp........VpEooLKYuISVlIY-FLlKT+IC-KusL-TIh..LhT-VYTSIMPEELclHapRIhlEL+KLlSIpRSthpPNYDVEspt.t...Phssl..+FFsspslhs+sM..ss...pph....hppp.IstPs..sp..octshppF.+c.lTsD-DlKlLIRhpD.TNAoKLVIFPoHLKIEIERKKlIIsLpo...................LalsNsLKYYY..............Sss.LYVFRFGDPhPF---LlDpEHVQaKINCYNlLRYcLLP-SD.VFVYFSsSLNRcsLEYuFYhFLupYVN..VppWIDENIo+I+ELYhINFNN...... 0 0 0 0 +3971 PF03340 Pox_Rif Poxvirus rifampicin resistance protein Mifsud W anon Pfam-B_3377 (release 6.5) Family \N 25.40 25.40 27.90 86.10 19.80 25.30 hmmbuild -o /dev/null HMM SEED 541 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.82 0.70 -6.44 11 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 11 0 61 2 518.40 71 98.91 CHANGED hhsSlIss......--u...s+RpNVFusDsppPThYMP.QYIolsGlh...ssssssVls..aEIRDQYIsAhNpFlLoIsLPElKGlG+FuYlPYVGYKsIpcluls......SsNslIWEosGE-LFssshsscpA.phSGaSpELNDlSsGhoPNDsIK-ussVYlYl+TPFDs-..cTaSSLKL.u-oKlslslTFNPlSDlllhDusFsh-uF..l+-FVYssELSFlGYhV+slphKssYIEcs....RRsluQhNpsT..sslo-VaulTuLuVYlKP..aYG.h-N+FISYPGYsQTEccYIpuaV-RLL-DLlIVScs....hPcsF.P-su-lVEVPssGlVsIQ.DsDVhV+IDNVPsshslYaHTNlLlFGTR+NSssYNISKKFSsIsGsYScsTc+IhFoclpHolNIoDVSIPVulWsup+NlYsGDNRSspSKsKDlalNDPFlKGlDFhNK.hDlISRhEVRFGN-llYSEsuPIS+IaNpLLossssusRpL...........pFNasPtsFF+PTsLhANsSRGKDKLuVRVsapshDssNPIpYVsKQLVllCsDLY+loYDss.IplsKIs- .................lINSLIGu......DDu...IKRuNVFuVDsQhPTLYMP.QYIoLoGVho....NsusDspslu.oFEIRDQYITALNHLVLSlELPEVKGhGRFGYVPYVGYKCIpHVSlS......SsNGlIWElpGEELYNsCl.NNshALcpSGYSpELNDIShGLTPNDTIKEsoTVYVYIKTPFDVE..cTFSSLKL.SDSKITVTVTFNPVSDIlIRDSoFDaEoF...sKEFV.YVsELSFIGYMV...KNlQlKPSFIE+P....RRVlGQINQsT..AslTEVHAsTSLSVYsKP..YYGNTDNKFISYPGYuQsE+DYIsAaVpRLL-DLVIVSDu........PPoGa.PESAEIVEVPpsGIV..SIQ.DsDVaVKIDNVP-sMSVYLHTNlLhFGTR.KNSFlYNISKKFSAITGTYS-ATKRhhFuHISHSINIhDsSIPVSLWTuQRNVYNGDNRSscSKAKDLFINDPFIKGIDFKNK.TDIISRLEVRFGNDVLYSENuPISRIYNELLopsssGTRTL...........pFNFTP+hFF+PTT.IsANVSRGKDKLSVRVVYSoMDsNpPIYYVpKQLVVVCNDLYKVSYDpG.VslTKIh.G...... 0 0 0 0 +3972 PF03293 Pox_RNA_pol Poxvirus DNA-directed RNA polymerase, 18 kD subunit Mifsud W anon Pfam-B_4188 (release 6.5) Family \N 25.00 25.00 259.40 259.30 22.30 21.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.82 0.71 -4.73 10 55 2009-09-11 05:39:38 2003-04-07 12:59:11 9 1 41 0 0 36 0 160.00 73 97.25 CHANGED MSTFsppVYLPVsLpPHELTLDl+cNI+cAVh+cYLHKEouGlMAKKI-IChDpELPLGEIVNNpVVV+VPClVTYKYYKlGDlVpGTLNIEDESNIoVtCGDLICKLsRDSGTVSFsDSKYCFIRNGpVY-NGSpVSVsLKEAQpGh.-SsFVFLAoIlD MSoFVsNsYLPVTLcPHELTLDIKsNIRsAVYKsYLHREhoGhMAKKIEIpcDsELPLGEIVNNuVVIsVPCllTYtYY+VGDIVRGTLNIEDESNVTIQCGDLICKLSRDSGTVSFSDSKYCFhRNGsAYDNGSEVoAVLhEAQQGh.ESuFVFLAsIVD.. 0 0 0 0 +3973 PF03396 Pox_RNA_pol_35 Poxvirus DNA-directed RNA polymerase, 35 kD subunit Mifsud W anon Pfam-B_3921 (release 6.6) Family \N 25.00 25.00 92.40 92.30 19.60 18.40 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.80 0.70 -5.38 11 79 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 48 0 0 66 0 275.80 64 95.56 CHANGED RcEpplsl-LsPulATFIKHGFNp+V+WPlLslGVVLsNTTTAVNEEWLTAlEahPTRKIFYsaspcILcpElsFCVYLpKo.QopscsYloLtDFDYYlIcsDs.php+l-KPKELcETLLHoFQEYR..hKNhQsIELlAFSSGTpIs--llppLo.FLslElFNREYsNlKsllsppFcohsPFIVlAPhG+LTFFlEcYsWhDhKoHlK-lLDaLEtsLluDl+SHplpss..hpD.s.ssSuYNssSGhLaVNDllTMslVNFFGCsuRLsoYH+FDhoplDscsFl+ALucAh ..REEsoIsV-L-PuLATFIKpGFNshV+WPLLNIGlVLoNToTAVNEEWLTAVEHlPThKIFYKHlaKILsREhGFhVYLK+S.QSEcDNYITLYDFDYYIIDpDs.olohVDKPsELKETLLHsFQEYR..lKSsQoIELIAFSSGTlIsEDIVs+LT.FLDlElFNREYNNVKsIlcs-FlupuPFIVIuPhGKLTFFlEsYSWhDFKSphKDIlDFLEGsLlAsIHsHhIcVu..ssD..ETVSSYNPpSGhLFVNDLhTMsIVNFFGCNSRL-SYHRFDMTKlDlEhFlKALS-Ah. 0 0 0 0 +3974 PF04490 Pox_T4_C Poxvirus T4 protein, C terminus Kerrison ND anon DOMO:DM04331; Family This family of poxvirus proteins are thought to be retained in the endoplasmic reticulum. M-T4 of myxoma virus (Swiss:O55698) is thought to protect infected lymphocytes from apoptosis and modulate the inflammatory response to virus infection [1]. 25.00 25.00 49.20 49.10 24.80 18.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.99 0.71 -4.45 6 35 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 17 0 0 33 0 141.70 45 64.07 CHANGED sppsFIlsslccuVYusGHhsYhEhSs.Nlshl-slPpCu+pIoluVSCDp.ssshp.atcppphpcs-lpIslphDoSCl+ahShshSlps.Cp++Louhup........-pLsCstlcsppHs+YLKoCssspFDR.haKpYh.HQ+salsKlhh .pspsFIlssscpshaGssH.sYlEhSs.Nsus.-slPcCS+plplSV.CDQ.suslc.apchpphcssslpIslKhDoSCIcaluhshShhNECp++Looh.p........-pLoCsuhchps+sKYLKTCossKFDRpsaKpYh.+p+shhsKs..L.... 0 0 0 0 +3975 PF04491 Pox_T4_N Poxvirus T4 protein, N terminus Kerrison ND anon DOMO:DM04331; Family This family of poxvirus proteins are thought to be secreted or retained in the endoplasmic reticulum if the protein also contains an additional C terminal region (Pfam:PF04490). M-T4 of myxoma virus (Swiss:O55698) is thought to protect infected lymphocytes from apoptosis and modulate the inflammatory response to virus infection [1]. 25.00 25.00 28.90 31.10 18.80 17.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.27 0.72 -4.13 5 44 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 24 0 0 36 0 46.50 59 24.35 CHANGED TWclcIGLCIps.cDF+up+TGC..+hspGPGGLITEGNGFRIFsHD-C .TWtLKIGLCIhA.KDFYscRTDCSVHhssuuGGLITEGNGFRllhHDpC.. 0 0 0 0 +3976 PF03295 Pox_TAA1 Poxvirus trans-activator protein A1 C-terminal Mifsud W anon Pfam-B_4259 (release 6.5) Family \N 20.70 20.70 21.10 93.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.95 0.72 -4.32 10 57 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 41 0 0 35 0 62.90 71 41.95 CHANGED uMIKopVALREEPKloLLPLVhYccPEcVlslINsLRsKEGlYGoCaacEccpsIcloLRSLl .SMVKSHVALREEPKISLLPLVFYED.EKVINsINhLR-KDGVYGSCYaKENuQhIcISLRSLL 0 0 0 0 +3977 PF03355 Pox_TAP Viral Trans-Activator Protein Finn RD anon Pfam-B_3956 (release 6.5) Family These proteins function as a trans-activator of viral late genes. 25.00 25.00 245.00 244.90 19.50 19.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.84 0.70 -5.02 5 56 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 42 0 0 35 0 253.90 80 99.79 CHANGED MSLRIKIDKLRQIVTYFSEFSEEVSlNVDsuSslMYIFAoLGGSVNIWoIVPLsSNVFYDG-sNpVFNLPVLKVKuCLCSFHNDAVVoIEPDl-NssV+LSSaHlVSVDCNcEslPHRTsTuISLGIsQKKSYIFNF+KYEEKCCGRTVlHLDLLLGFIKCISQYQYLTVsFc.DKNLlLKTPGo+DTFVRcYSMTEWSP-LQsYSFKIAIsSLNKLRGFKKRVlVFEoKIVMDsDDNILGMLFRDRlGoY+VNVFMuFQD MSlRIKIDKLRQIVAYFSEFSEEVSINVDSsDpLMYIFAALGGSVNIWAIlPLSASVFYcGs-NhVFNLPVSKVKSCLCSFHNDAII-IEPDLENNLVKLSSYHVVSVDCNKELMPIRTDToICLuIDQKKSYVFNFHKYEEKCCGRTVIHLEhLLGFIKCISQaQHLsIhFK.DDNIIhKTPGNpDsFSREYSMTECSQELQKFSFKIAISSLNKLRGFKKRVNVFETRIVMDsDDNILGMLFSDRlQSFKINIFMAF.D. 0 0 0 0 +3978 PF04441 Pox_VERT_large Poxvirus early transcription factor (VETF), large subunit Finn RD anon Pfam-B_3920 (release 7.5) Family The poxvirus early transcription factor (VETF), in addition to the viral RNA polymerase, is required for efficient transcription of early genes in vitro. VETF is a heterodimeric protein that binds specifically to early gene promoters. The heterodimer is comprised of an 82 kDa (this family) subunit and a 70 kDa subunit. 25.00 25.00 242.20 242.00 16.70 16.70 hmmbuild -o /dev/null HMM SEED 700 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.39 0.70 -6.37 9 69 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 45 0 0 60 2 696.20 72 98.19 CHANGED hYhVsPQLVlLVs+sQcIc+sLYLohYshID-p..SslY.YFl+saLch..ppPcll+RHILLTL+lpQlKGYlpsLLslp-DIIIYSHKNNLEYSYVDNTIFNPFs.TQ+KTLI+sD...uFLYNlYssACDFLVlWVApAs..DTslsEhGSY........EEVDsNILKFEp+LlpsFspLDL-holpSKFNNIFRTNL+.TGL+sIlpp.....p...sh+hLlh+oDEaFIshoGN+FlLsD.......EpLNLSIWDssssLAISSDGcTlhlNsVcLFT-lls-.slQMERIKu...DlTYKlaLsTPITS+lKLDIETSFIFlET.ATNNILLSsDKKISIILAKNHISIKVKNaIPNIEKYFTFLllulNphFNsVQpSuDFTKlETlYWSRICQNTKsKNRKPVIlsSLDss.MpKlSDNFY+Scp+....................EVFlNsNGlMFoChDPhG+YNslGFLuIFY+Lp+..hCIPCCFL+sQuHo-TFpSCVapc-.lpcshlsPYILNFGKlV.TpSKlSFLPIlFssahNcshpIshEpDNKRL+tTsGYaVV+uCpss....IhRlRThsDIIpFVNpsssILIssDhVYFPM....sh.chsN...+laILIQEIVHElVhl+KptcpDtIthh.ssps+...L+-hFPhposphsItp-sGlsLTTDGFalDGchFspcLSopassFhcNlospsts.uKYFu.lFKYVl.....T-sh-hFIKTWlINIMlphGhssp.st..hhstLEKYYsp R.YIVSPQLVL.VGKGQElERALYLTPYDhIDEK..SPIY.YFL+SHLNI..ppPEIVKRHILLTLRMoQLKGYLGNLLDI+DDIIIYSHKNNLEYSYVDNTIFNPFVaTQKKTLlKsD...SFLYNVYPGACDFLVIWVARAs..DTSIPEFGSY........EDVDNNIlKFEThLhEVFPpLDLDhTVESKFNNIFRTNLKhTGLKKIIQ+Vp..-LDl.NYKSLLS+hDEaFINMTGNHFILND.......EpLNLSIWDhDGTLAlSSDGDTlMINNVKLFTDLV.SDIDTQMERIKG...DITYKVaLuTPIsSRIKLDIETSFIFIET.ATNNILLSoDKKISIILAKNHISIKVKNHIPNIEKYFTFLVIAINuMFNSVQKSuDFTKVETVYWSRICQNTKNKNRKPlIIs.LDss.MKKISNNFY+SDcK....................EVFINDNGIMFTCMDPLGKYNpVGFLNIFHchpK..hCIPCCFL+DQSHcsTFSSCVHQhD.V-KcIlSPYILNFGKVV.TESKhSFLPIIFDsFLNDGMoAshEQDNKRLKETSGYHlVRCCsG-....sIVRLRTsSDIIQFVNEDKNILIsNDMVYFPM....NsoDIGp...KIHILIQEIVHEVhIVKK+EooD+IDFFPPNYKL...LKDLFPKQThpssIpSDuGMsLTTDGFYIDGKLFN-DLSSKYVTFTKNVhsS..DuV.uKYFSPLFKYVI.....oEAKDRFIKTWhINIMl+MsVDPss....IIPsLEKYYPN. 0 0 0 0 +3979 PF04947 Pox_VLTF3 Poxvirus Late Transcription Factor VLTF3 like Finn RD, Iyer L anon Iyer L Family Members of this family are approximately 26 KDa, and are involved in trans-activator of late transcription [1]. 25.00 25.00 35.80 35.50 24.90 24.70 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.96 0.71 -4.75 23 135 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 120 0 1 101 263 169.80 31 59.54 CHANGED pthppssphs...hs+h.Ha+ssLpphQu+pssp........lsscllcplccclccpplp....................hsclspshVtpaLKcLs...h.sKpYcsshhIhphlTups...ssplspch-pplhchFcplhphapc..............tpcpshlsYsalLa+lhchh.....uhsppLssh.hlKstsKhpspD..placclhscLsWcahtt ........................p..pcsshas.s+h.Hh+psLc+h.u+p...........hss-llspLhchhcKppIs..................hs-lstshVpshLKthc...h..pKtYchVh.Ihsplpscc.....slosch.pclhclFcclhhhhpc.................ss.pphlsYSahL.Klh-lh.....uhscpLps..plKshsKtsspp..hlWcphhschphchh..p.... 0 1 1 1 +3980 PF04498 Pox_VP8_L4R Poxvirus nucleic acid binding protein VP8/L4R Kerrison ND anon DOMO:DM04340; Family The 25 kDa product of Vaccinia virus gene L4R is also known as VP8. VP8 is found in the cores of Vaccinia virions and is essential for the formation of transcriptionally competent viral particles. It binds both single stranded and double stranded DNA and RNA with similar affinities. Binding is thought to involve cooperative interactions between protein subunits. The protein is proteolytically cleaved during viral assembly at an Ala-Gly-Ala site. Possible roles for VP8 include packaging and maintaining the DNA genome in a transcribable configuration; binding ssDNA during transcription initiation; and cooperation with I8R protein to unwind early promoter regions. VP8 may also function in either transcription elongation or release of mRNA molecules from viral particles [1]. 25.00 25.00 26.10 174.20 20.80 19.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.34 0.70 -5.03 8 58 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 43 0 216.10 66 86.45 CHANGED AKlKFPRShLSIaplVPRsMTRYELcLlpoEsITGsVFTTsYNl+KNLGls-.-KLThpsIEcYYLD.sN-VLTLMlsNTslscl..us.R++uRR.pKNPVlFRpGSsPLlllFcSRKKlsIY+Ecpcpss.sooYspIssslALls+YushpLLDVHoPSusLpLsAVYGh.sscpEL+KLuosKElEsYQsos.LpEPl+LsDFppLF-slKKsIsLTNlsh AKsKFPRShLSIFNIVPRTMoKYELELlpsENITGAhFTThYNIRpNLGLGD.-KLTIEAIENYFLDPNNEVhsLlINNTDhouV...lPKKuGRR.NKN.VIFRQGSoPlLhIFEoRKKlNIYKENh-SA..sopYo.IGDNhALISKYAGlslLsVaSPSoSM+LNAlYGF.TsKNcLcKLSoNKELEsYSSoP.LQEPIRLNDFlGLh-CVKKNIPLTsIP.s 0 0 0 0 +3981 PF04395 Poxvirus_B22R Poxvirus B22R protein Mifsud W anon Pfam-B_3510 (release 7.3) Family This is highly conserved C-rich, central region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses. There are three pairs of conserved cysteine residues. 25.00 25.00 42.70 42.30 22.10 16.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.82 0.71 -11.44 0.71 -4.81 14 108 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 32 0 0 94 0 176.60 39 11.78 CHANGED hssC-ssShphMspssulPcpFNsTLpphulpssssops..aYtC.hl.psss-C....uls.hhsssohpslsp..pppsppppRcpco....hpshpp-DhhChaptYshsppts-.......Csssp....................cppppccpscss.ssp.sclshhutccLGs..+shIPpcssclQlGspG.t-us....VsGDssIYppVKpchcptlpshhssss.....ssshPp .............husCc.csSM..sLMsuVlssspEFNsTLcpl.G.lpsspssss...YYtC.hl.sssusC.phlsLsphlsshThssl.p...sshsssssR+pRs...................hp..ssc-lpClYcsYGlscp..sc.......C........................................hpps+ccp.scst.........pthcLhppupc-Lth..csVIP+uTT+hQVGupG.ssGs....VsGDss.apsVKschp.hhcchhPpls....hsh...cp........ 0 0 0 0 +3982 PF00550 PP-binding pp-binding; Phosphopantetheine attachment site Bateman A anon SCOP Domain A 4'-phosphopantetheine prosthetic group is attached through a serine. This prosthetic group acts as a a 'swinging arm' for the attachment of activated fatty acid and amino-acid groups. This domain forms a four helix bundle. This family includes members not included in Prosite. The inclusion of these members is supported by sequence analysis and functional evidence. The related domain of Swiss:P19828 has the attachment serine replaced by an alanine. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -3.71 163 41592 2012-10-02 01:16:24 2003-04-07 12:59:11 20 3219 5404 127 13580 37590 2935 66.40 23 7.49 CHANGED ptltp.hlsphl..t......t.ppl...sss.ssh....h.c.hGh.DSlthhplhsplpcp.h......h..plshs..plh.p.tsolpplsphl ........................................ltp.lhsclL.......s........h..ppl............shc..ssF..............h..-..LG...s..DSLtslclhs..p.l........c..cp..h..............u..l.........pl.ssp............plh...p...hs.....Tltslsth......................................... 0 3350 7619 11106 +3983 PF02503 PP_kinase Polyphosphate kinase middle domain Mian N, Bateman A anon Pfam-B_2701 (release 5.4) Domain Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules. 25.00 25.00 27.60 26.70 22.40 22.10 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.22 0.70 -11.20 0.70 -4.82 173 3692 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 2906 6 697 2965 1012 200.80 31 31.95 CHANGED pppppalcpaFpcplhPlLTPlulDsuHPF.PhlsNtoLslslpL..c..............s......s..................................................t........pt..h.AhlplP..psl.PRhl..pLP........ttts...................alhLEslIptalspLFsGh...clhssh.FRlTRsuDlpl-.p..--.sc.DLlptlcppL+.pR+hG..psVRLElsss.hspplhchLhc..........plplsc.p-lapl.sG..lsLspLhplss.hs...pscLcas.sasPt.hs..ttl .............................................................................tpppalcpaFpcplhPllTPlul..D.s.s.HPF..PhlhsculslAVpl..p....p.........tps...................................................t..........................p.....tp..h.AllplP...psl..PRhlpLP.............................schs.t.th..........................alhL-sllpt..alccL...Fs..Gh...clhssapFRlTRsuDlpl-..p....--...s..c...sLhptlpppL.ppR+h.G..psVRLE....l.pps..........h.sp....t....h....h.c..hLhp..........chs.lsc.pDlahs.sG...lsLscLhplss.ls......+scLph..PasPhh....h.................... 0 208 457 598 +3984 PF03012 PP_M1 Phosphoprotein Griffiths-Jones SR anon Pfam-B_1336 (release 6.4) Family This family includes the M1 phosphoprotein non-structural RNA polymerase alpha subunit, which is thought to be a component of the active polymerase, and may be involved in template binding. 20.60 20.60 21.50 21.50 20.40 19.70 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.94 0.70 -5.06 11 932 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 30 6 0 723 0 217.20 76 99.90 CHANGED MSKIFVNPSAIRAGLADLEMAEETVDLVN+NlEDsQAHLQGEPI-VDuLPEDhp+LpIsDspssphsspssppEtup-EDFYhsEupDPhlPFQSaLDslGhQIVR+MKTGEtFFKIWSQusE-IlSYVssNF.PtPssKsoc-KSTQTssccsppssptosssp+-cpSspsshsup.-sSGPsuL-WusoN--D-uSlEAEIAHQIAESFSKKYKFPSRSSGIFLWNFEQLKMNLDDIV+pA+slPGVsRlAc-GsKLPLRCILGaVA.spSKRFQLLVsoDKLuKlMQDDLN+Yhup .......................................HLQGEPIEVDsLPEDM+RLpLDDtKPSsLGEhA+sGEuKhcEDFQMDEGEDPuLLFQSYLDNVGVQIVRQMRSGERFLKIWSQTVEEIISYVsVNF.PssPG+SSEDKuTQTssRElKKET.hsuso.QR-SQsSKA+MsAQ.oASGPPALEWSATNEEDDLSVEAEIAHQIAESFSKKYKFPSRSSGIFLYNFEQLK..................................................................... 1 0 0 0 +3985 PF02818 PPAK PPAK motif Bateman A anon M Greaser Motif These motifs are found in the PEVK region of titin. 21.00 21.00 22.80 21.00 18.10 20.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.44 0.72 -4.04 33 559 2009-01-15 18:05:59 2003-04-07 12:59:11 10 58 28 0 180 495 0 27.60 59 1.68 CHANGED PPsKVPEsPKKsVsEEKlslslPKK.Es ........PPAKVPEVPKKsVPEEKlslslPKKsEs... 0 15 15 26 +3986 PF01326 PPDK_N PPDK_N_term; Pyruvate phosphate dikinase, PEP/pyruvate binding domain Finn RD, Bateman A anon Sarah Teichmann Family This enzyme catalyses the reversible conversion of ATP to AMP, pyrophosphate and phosphoenolpyruvate (PEP). 20.00 20.00 20.10 20.10 19.90 19.90 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.06 0.70 -5.40 101 5203 2009-01-15 18:05:59 2003-04-07 12:59:11 14 40 3545 11 1535 4567 2865 323.40 27 40.31 CHANGED tsssth..h.GGKussLucht.............ptGl.sVPsGFslos.ss..apthh................st..l.t.l.th.t.....................ts.tthtthsttlpphlhp.sthP...spltpt...ltps..........hpp..hht.........................s.lAVR......SS.......................AssEDhsps..SFAGphcoh..Lslpu............cplhpul+psauShass+AhtYR........tpps.hst.............tpsthAVlVQpMltu........p..soGVhFTtsP..........hsu.ppst.hhlpushG.LGEslVuGps.ssDpahl..pp.th..........................stp.h.hh.........................................tttpt...sLs-...............................pp.....ltpLsph........upclEpt..aGt......P..D..IEWAlss.......sp...lalLQuRPloshtptsttt.tht.......................lpst .................................................................................................................hp...th...l.GGKsAsLuEMh.................................................thGl.s...VP.s.G.FslTs.cu..hpp..ah....pp........................................................st.....l.p.p.l.hphhp....................................................l.ls..s..hs..th.t.t..s.hsthhp.hlls..hshs........sch...ps........ltpt..............hsp...hhs............................................sshs.lR..........oS.......................................................usuED..h.pt.........uh.AG.p.coh........lslps.......................................................h-pl..htAlctVauShhssRAlsYR..............htps..hsc..................................p.huhuVsVQp....MVhu.........................s....uoGVhFT.hss..........toG.tp..p..h......s.......h...hs.G..GE...sVV.s.Ghh...sP.cp...hh.h...t............................................................................................................................................tph......slps............................................................................................................................................................pt....hpcLs.chutplEpH....Y.tc........................s...DIEash-s............................Gc......LallQsRst...ppstttt.................p................................................................................................................................... 1 586 1082 1362 +3987 PF01239 PPTA Protein prenyltransferase alpha subunit repeat Finn RD, Bateman A anon Prosite Repeat Both farnesyltransferase (FT) and geranylgeranyltransferase 1 (GGT1) recognise a CaaX motif on their substrates where 'a' stands for preferably aliphatic residues, whereas GGT2 recognises a completely different motif. Important substrates for FT include, amongst others, many members of the Ras superfamily. GGT1 substrates include some of the other small GTPases and GGT2 substrates include the Rab family [2]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.44 0.73 -7.42 0.73 -4.39 143 2973 2009-09-11 15:11:22 2003-04-07 12:59:11 17 39 322 678 1957 2958 11 30.30 27 27.00 CHANGED cELphspphlptsscN.assWpaRphllppht ..........pELphspphl..pp..sscN.a.ssWpaRphllpp........... 0 614 1031 1549 +3988 PF02541 Ppx-GppA Ppx/GppA phosphatase family Mian N, Bateman A anon COGs Family This family consists of the N-terminal region of exopolyphosphatase (Ppx) EC:3.6.1.11 and guanosine pentaphosphate phospho-hydrolase (GppA) EC:3.6.1.40. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.61 0.70 -5.22 10 5020 2012-10-02 23:34:14 2003-04-07 12:59:11 11 15 3248 19 1263 3783 1832 281.70 27 64.20 CHANGED llschssGt....hphls+pKppV+Lu-u.hspss....hLsp-uhpRslssLpcFsElhpsass..pplpsVATuAlRcAhNu--Fls+lp+thG.....hsl-lIoGp-EARlsahGVspsh..scuctlVlDIGGGSTEllhGps.....tcsptlhSLslGsVplscpahscD..slspcphpth+chlcph..LpchssphchtG....htpslGsssThcsltplpssps.........ttpITtcclpphlcclhphsp.....cchcltGlsp-RAsllsuGslIlpulhctLphc.slplSssuLREGllhuhlhpc .............................................................................t...stt......h.p.hl.s.ph.+p..hVRLupG...lspss...............tLsp-uhp.R.....u.hpsL.p.hFsch.h..p.....s.h..s.s......pp..l..p..sVA.Tu.slR..p..AtNu.s..-Flpcspc.hh.G.........h.sl.....cl.IoGpEEA+..Lh.a............h...G............l.......t.......p.....s.....h...............s.........p..........s..........c..............h........L.....V..l.D.....IGGGSTE..ll.hup.s.................................tp.stt....h...h...S.h..s..h.GsVp..h...t.....c.p...a.......h.....s.....s...s..............hs...p..c.p....h...p...t...h....pt........t...sppt..............l.p...s.....h...s....t.....p....h.ph.ts......................h.hs.l...G.....s...u..s.T....l.p.....s.....l...t.p.l...h...h..t..h.s.........................................sttlo.hpclpp....lhp...cl.h...p.hst...............hpc..h.p.l...G...l..s.t-R.tsl..lsuGhslltsl...hctl.....s...h.........p...p......h......tlu.ss.uLREGllhphh...h........................................................................................................................................................................... 0 378 804 1077 +3989 PF04403 PqiA Paraquat-inducible protein A Kerrison ND, Finn RD anon COG2995 Family Paraquat is a superoxide radical-generating agent. The promoter for the pqiA gene is also inducible by other known superoxide generators [1]. This is predicted to be a family of integral membrane proteins, possibly located in the inner membrane. This family is related to NADH dehydrogenase subunit 2 (Pfam:PF00361). 21.50 21.50 21.60 21.50 21.00 21.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.76 0.71 -4.66 167 3875 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 1245 0 618 2356 195 157.40 30 72.64 CHANGED ulpcs.hAlhluullhhlsANhhPlhp..hph...hGt.ppssTlhuGlhtLh.ppGthslAhllhhsSlllPhh+llslhhLhhsh..pht...............h.t..htpth.lh+hlchlscWSMlDlFllullluLlclss........luslpsGsuh...hsFuhllllohhushphDsRhlWcthp.t.pp .................................................lppshAhh.ls.ulllhl.ANl..hPlhh..........hpl.............hGs...p.psT..lhsGlh.L.h.pp...u...s...h.sl.As..llFlsolhlPhhpllslhh...Lhhss...c.ht.h...........................h.shppp......lhchl.ph.ltcWuMlD............l.F.............l.lulhluhl+..hts...................................hhs.ltsG.su.h...hhFshhllLThhus.phDsRhlW-ph....t..................................................... 0 132 279 449 +3990 PF01502 PRA-CH Phosphoribosyl-AMP cyclohydrolase Bateman A anon Pfam-B_782 (release 4.0) Family This enzyme catalyses the third step in the histidine biosynthetic pathway. It requires Zn ions for activity. 21.50 21.50 23.60 23.40 20.90 19.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.57 0.72 -4.35 168 3692 2009-01-15 18:05:59 2003-04-07 12:59:11 13 14 3587 2 1007 2642 1827 75.10 51 36.77 CHANGED MlAaMNcEAlppTlpTGpspaaSRSR.pcLWpKGETSGphQpVp.clphDCDsDslLltVct......tGs..A.CHT......G.pcSCFap ............MluaMNpEALp+TlcoGc.ssaaSRSR.pcLWpKGETSGphQpVh.s......lphDCD.sDoLLlhVp...........hGs....s.CHT.......G.ppSCFh.t............. 0 326 664 868 +3991 PF01503 PRA-PH Phosphoribosyl-ATP pyrophosphohydrolase Bateman A anon Pfam-B_784 (release 4.0) Family This enzyme catalyses the second step in the histidine biosynthetic pathway. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.62 0.72 -3.70 53 4463 2012-10-01 21:36:44 2003-04-07 12:59:11 12 19 3909 57 1077 3829 3398 91.00 30 42.54 CHANGED lpchccshttcttp.tPps..stphhsh........ptphlt.......EEh.sEhht.............Asp....ps.................................shsclsctlsDl.......lYhshGthsthG.......................l..c.hcslhcplccuN .......................................................................h..phtphltp.Rpps....pPpsS...aTspLhscG...........pplspK.lG..............E..Eu...sEssl...............AAp.......st............................................................................................s.cp..c.lssEsuDL.......lY..Hh..h...V...hl.t..p.tG...........................................................................................l.....s.hpclhppLppp................................................................... 0 327 690 914 +3992 PF03208 PRA1 PRA1 family protein Mifsud W, Bateman A anon Pfam-B_2976 (release 6.5) & Pfam-B_8147 (Release 8.0) Family This family includes the PRA1 (Prenylated rab acceptor) protein which is a Rab guanine dissociation inhibitor (GDI) displacement factor [2]. This family also includes the glutamate transporter EAAC1 interacting protein GTRAP3-18 [3]. 21.30 21.30 22.10 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.99 0.71 -4.88 58 779 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 305 0 497 744 4 144.50 22 68.28 CHANGED phptthus..hRPW.s-Fh.-hsphupP..pshu-htsRlppNLsYFpsNYhhlshhlhhhsLlh.pPhsLllh..hslhsuhhhlahhps................pslslhs+phsspphhh.................slhlsolh.llal..sus..ssslhhslshuhhllhhHAuhRt.sp.....h.ht-pcstths .................................h...thsshRsh.s-Fh...s..........p....phuhP..pshschpsRltpNLtYapsN...Yhhlh.hhlhh..h.s.llh......pP........hhllsh..hhlhs..s...hhhlh..hhps.................pslhhh..t..p.phs.spt.hhh..................................slh.hsuhh...lhhh....su.s..hhslh.hslshshh.llhhHAuh+...p..................s........................................................ 0 145 277 397 +3993 PF00697 PRAI N-(5'phosphoribosyl)anthranilate (PRA) isomerase Bateman A anon Pfam-B_247 (release 2.1) Domain \N 20.60 20.60 20.60 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.28 0.71 -4.76 23 3795 2012-10-03 05:58:16 2003-04-07 12:59:11 17 13 3604 9 968 2961 2133 193.90 32 66.13 CHANGED s+hsulpphpDlhtshsssu....htuhlhssssp+phs.cpuptlhpsss.hh..lVGVFhNpslsplhphhpphuLDllQLHGsE..st-htphlt..lPll+ththstsht.....ht.sppt.p....shhLlD........utpGGoGpthDWshlpphht...........................hpllLAGGLsP-NVspAlp.......pshGlDVSSGVEos...Gl.KDhcKlptFlps .................................................................................................................KlCGlop.p-spsAhpsGA.....shh.G.h..l..F...h.......p...S........R.............pV..........s.............h...c.p......A.......p...p............l...h...p...t.....h....s.....t......................hV.GVF..l..s........t...s...h......s.p.........l.h...c.....h..h.....p...p.h.s......L.shlQ.......L..H........G......s.......E..........s..t..ph.....h..........p.........t..........l....c...................t...........h....s........h.......h....p....s...lt..h...s..t..shp..................ht..h...p...p...h...tth...........DhhL.hDs............................................................sthGG.....oGpsF.D..Ws..l...l.ss.ht...h............................................................................................h.s.h.lLAG..GL...ss...-..N...ls..c..Alp.................hts....hulD..luSGVEss........Gh....KDhp+lpphhp.t........................................ 0 323 632 826 +3994 PF03967 PRCH Photosynthetic reaction centre, H-chain N-terminal region Finn RD, Bateman A anon DOMO_DM03113 Family The family corresponds the N-terminal cytoplasmic domain. 20.10 20.10 20.20 120.20 20.00 17.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.59 0.71 -4.58 21 85 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 78 111 27 89 105 137.20 45 53.31 CHANGED Ms.sGshhuahDlAplslYsFW.lFFAuLlaYLppEs+REGYPLps-.ssustsspu......hhslPpPKTFcLtcG+.shslPsspp-....psclshsposshsGuPahPTGsP...hsDGVGPuuaA.RpDhP-lshcGps+IsPLRls ....M.tsshhuahDlAQlsLYsFW.lFFAGLlaYL+pEs+REGYPL-s-..ss....s....phtstG.....hhslPpPKTFhLsc.Gp.TholPsspss....ptsls..hptsushsGuPhhPTG.sP...MlDGVGPuuaAsRpDhP-Lsh.cGps+IVPLRl... 0 6 14 17 +3995 PF00432 Prenyltrans prenyltrans; Prenyltransferase and squalene oxidase repeat Bateman A, Finn RD anon Pfam-B_130 (release 1.0) Repeat \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.07 0.72 -4.38 69 2497 2012-10-03 02:33:51 2003-04-07 12:59:11 16 40 482 411 1550 6750 154 43.70 26 14.26 CHANGED hshcphhpalhppQp....-GGasspsss............pscsttohhulsuLslls ............................pplhpalhp.pQp.......sG.Ga.s.s.+.ssp.................................hsDs..ha..ohashsuLpll............................. 0 520 866 1268 +3996 PF01080 Presenilin Presenilin Finn RD, Bateman A anon Pfam-B_789 (release 3.0) Family Mutations in presenilin-1 are a major cause of early onset Alzheimer's disease [2]. It has been found that presenilin-1 (Swiss:P49768) binds to beta-catenin in-vivo [4]. This family also contains SPE proteins from C.elegans. 24.50 24.50 24.50 24.70 24.00 24.40 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.44 0.70 -5.64 6 478 2012-10-02 13:41:03 2003-04-07 12:59:11 12 7 156 1 255 421 7 264.60 37 83.85 CHANGED cEEtsLKYGApHVIhLFVPVoLCMllVVuTlpolpFYsppsu.pLlYTPFscpo.osup+hLsSlhNuLlhIuVlVlMThLLlVhYKa+hYKlIHuWLIlSSlhLLFlFohlYLpElh+sas.......lshshsTlhlhlhNFGslGMhsIHWKGPLRLQQhYLIhluALMALVFIKYLPEWTsWhlLssISlWDLVAVLsP+GPLRhLVETAQERNEslFPALIYSSs.hhhhVs........sss.t..posc.s..............................ssptps.........phsp.pps.ptpcDsusoptpp.....o...............ppsuhhhs....cphttph.-lps....s...p....pt..........pEERGlKLGLGDFIFYSVLlGKAuuo..GDWsTTIACFVAILIGLClTLlLLAla++ALPALPISIshGLIFYFuTchllpPFh- ................................................................................lh.PV.hs.Mh...h...s..h.h......h........................................................t..................................h.......h.h.............t...................t................th..t....uhhsshhhhshlhhhThhhlh.Lah.....hphhthlhsahhhushhll.hhh.sh.hh....lhptht.......hshD.h...oh.h..hhhhN.hu.slG.h.hs.l.a..h.t..s..sh...l....pQhYllhhuslhA..h.......hhphlP-...WosWhlLhhhulaDlhAVLsPhGPL+hLl-hAppRsc...PuLlYput......h....................................................................................................................................................................................................................................................................................................................................................................................................................................ttslKLGLGDFlFYSlLlu+....Au............sh.s.hhsshl.ullh.GLshTLhlLuh.h.p...c...sLPALPlSlhhuh.....hhhh.st.hh.................................................................................................................. 0 93 133 193 +3997 PF03991 Prion_octapep Prion_octopep; Copper binding octapeptide repeat Bateman A anon Bateman A Repeat This repeat is found at the amino terminus of prion proteins. It has been shown to bind to copper. 12.10 0.50 13.10 0.50 11.80 0.40 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.22 0.75 -5.16 0.75 -2.73 5 127 2009-09-13 17:14:17 2003-04-07 12:59:11 7 9 13 0 5 130 0 7.80 94 27.84 CHANGED PHGGGWGQ ..PHG.GGWGQ. 4 0 0 0 +3998 PF03063 Prismane Prismane/CO dehydrogenase family Griffiths-Jones SR, Bateman A anon Pfam-B_2956 (release 6.4) Family This family includes both hybrid-cluster proteins and the beta chain of carbon monoxide dehydrogenase. The hybrid-cluster proteins contain two Fe/S centres - a [4Fe-4S] cubane cluster, and a hybrid [4Fe-2S-2O] cluster. The physiological role of this protein is as yet unknown, although a role in nitrate/nitrite respiration has been suggested [1]. The prismane protein from Escherichia coli was shown to contain hydroxylamine reductase activity (NH2OH + 2e + 2 H+ -> NH3 + H2O). This activity is rather low. Hydroxylamine reductase activity was also found in CO-dehydrogenase in which the active site Ni was replaced by Fe [2]. The CO dehydrogenase contains a Ni-3Fe-2S-3O centre. 21.70 21.70 28.20 24.40 20.20 19.50 hmmbuild -o /dev/null --hand HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.80 0.70 -6.13 251 2395 2009-09-15 11:55:15 2003-04-07 12:59:11 15 14 1486 49 664 2022 69 468.60 34 95.85 CHANGED MaChQCpps....ts.suC.phs.................GVCGKss-sushpDlL.lhshpGluthupc.u+ch..........tspphhhpuhFsT.....................lTNssFcspphlphlpcshsl+...pph...............................................pshpphstpss.........phhlsthtpp..sp..DlpuLcphlhaGlKGhs.........AYtcH............AhhL.Gtt-c-.lhphhpcshssh..ss.csls......tllshslcsGths.hpsMtlLDpussttaGsP....ps..TpVslGlh.ps..ssIlloGHDlpshc.LLcQTcspGlslYTH.....GEhLPupsYPsh+..pYpH.......hsGsaGsuWppQpp-FsuhsusllhToNClhPs..h.p.sp...Y+s+laTTuslua...PG.spHI...p.............tpt.....t......................cDaotlIcpAlcshs.p.............c.............ps.p......lhs......GFu+psllshu..............c.......................tll-A.....................................................VKsGsI++hhlluGCDGt+ssRs..Yas-hAcpL.PpDsllLTsGCuKa+a...s+hsLGsl.........G.G...................IPRlLDhGQCNDuYuhlhlAhtL..AcsF......s.s.-lN-LPl.....uhsluW..aEQKAVslLLsLLsLGl+sl+LGPohPuFlSPsVhplLs-s....a..slss..hs.slEpDhpthh ..............................................................................................MaChQCp........ssuCpht.................GhCGKss-sushQDhL.lhshp..G.lu.sas..hc...uRcht.............................scphh.puhatT..................lTNsNFcs.phlth..h.tp.t.hthcpth.....................................................................................................tshtph..tpst.......ph...h..t..ttt...tt..shhslc.hhhhsh+sh...........ahcp............uhh.....sthc...........p-lh.t.....hpp....hh.s.hh..ss....shs..........llthshchGths.hpsMtlLDtupTtt..a....GpP.............ps...opVslt....sh.ts..hsIlloGHDLpslttLLcQ...o...c...u....p......G....l....slYTH......GEMLPupsYPth+..+apH..................hsGshGsuWtpQph.FsthsusIlhToNClhss..s.......YpsRlaTpu.suh......PG.s.p.H.l...p...............................t....................cDFs.lIppAhph.s.......t...t...hhhshshs....................................................slhuhActll-h......................................................................VppGpl++hhlluGCDuhpst+p....Yas-hAppl.PpDslILThuCuKa+a...s+.hshGsl..............s.G...................lPRllDhGQCNDuhuhhhlAhtL..uchh.............s.h..slN-LPl....shsluW...aEQKAlslLLs..LLtLGlpsIhhGPshPsFhosslhtl.Ls-p.......a.....sltsls..ss-pDhpth.............................. 0 274 478 585 +3999 PF00484 Pro_CA Carbonic anhydrase Finn RD, Bateman A anon Prosite & Pfam-B_9319 (Release 8.0) Domain This family includes carbonic anhydrases as well as a family of non-functional homologues related to YbcF. 22.10 22.10 22.10 23.40 21.90 21.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.89 0.71 -4.22 497 5590 2009-01-15 18:05:59 2003-04-07 12:59:11 14 20 3442 146 1635 4073 995 149.00 28 64.39 CHANGED slhluCuDSRls..sphlh.shtsG-lFVlRNsGNlls................................stss.huul-a.......AlphLpV.ccIlVhG....HssCGulp......su..........h.............................shlsp....hl....pthpsshpp.......htt..tt......................pph..........ph.............ctNVhpplppLppp..Phlp....pthtpup..........lplpGhhYclps......Gpl ...................................................lhluCuDSRls.....sphlh..s......h......t......s......G..-.lF.Vl..RN..suNlV...........................................................stsshu....ul.pa.....................AVp.....h.L.p..V.c.......cIlVhG....HssCGulp.......us........................................hp.........................s.................hshlpp..a.l..pp.h.p.ss.hhp.........hpph.h.tt..........................................s.pph....th......hc.sVhppltpLtpp..shlp..................t.t.h.t.c.sp.............................ltl+GhhYslpsGp................................................................. 0 480 1010 1381 +4000 PF01619 Pro_dh Proline dehydrogenase Bateman A anon Pfam-B_1092 (release 4.1) Family \N 27.00 27.00 27.10 27.00 23.80 24.20 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.91 0.70 -5.44 15 3379 2012-10-01 19:29:00 2003-04-07 12:59:11 13 21 2877 20 991 2741 1035 291.10 31 35.23 CHANGED lsG-sltpslcpscpLc....ppthphohDhLG.........Etspsss-Apphhhshtpslpslucpstst...........thhthPshph.phstLh.phs.....shpp-h...shpphlt+lcslsptApchslslplDAE-pphhpho......lclhtc..h.p.tppshsslGsslQAYL+csspslctlhslAp+pshh....htlRLVKGAYh-uEtp+Ap..Gh.s.ssaopcspoDhtap...shschlhtscsh......lastlAoHNscolAhshplscppt.hs...spaEFQpLaGMu-tlpppLspps......hssRhYsPaGsh...pchluYLlRRLhENsuNsuFsp+thshp ....................................................................................................................................Gpsltpshtthcthp....ppGhphoh.D...hLG...................................Essh.stt-.Apthh..tpa.....phlc...slspt.s....s.....................................s.....h....h......photlp..s....+.h..s.........phphct..........shpph.h...s...p.l...pp...l...s...th...A.........c..........p...h..........s...l.t.lsIDAE-tsc...L....-ho...........................lclhcc.....h..............p...........................h.......t........s.......a..........s...s.......l.G..hVlQAYh...+cs..slchLh.chApcp....tht.....lhlRLVKG.AYh-sEhth...up..........G..........h..t........h.ssaoc...Kht.o.Ds..sYh......................shh....c....h..l..L..t...s..sph.......................lasthATHNspol..uth....h.p......h....s..........t...p...p...............h....h............................................................s..p...............aE..F..QhLaG........Mu-s....lhc....p....l..ss..ps............................hss.RlYsPh...Gsh...cs...hluYLlRRLhENs.ANsuFsp+hhp..t........................................ 0 297 600 836 +4001 PF00160 Pro_isomerase pro_isomerase; Cyclophilin type peptidyl-prolyl cis-trans isomerase/CLD Sonnhammer ELL, Wuster A anon Prosite Domain The peptidyl-prolyl cis-trans isomerases, also known as cyclophilins, share this domain of about 109 amino acids. Cyclophilins have been found in all organisms studied so far and catalyse peptidyl-prolyl isomerisation during which the peptide bond preceding proline (the peptidyl-prolyl bond) is stabilised in the cis conformation. Mammalian cyclophilin A (CypA) is a major cellular target for the immunosuppressive drug cyclosporin A (CsA). Other roles for cyclophilins may include chaperone and cell signalling function [1]. 20.80 20.80 20.80 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.41 0.71 -4.01 168 13237 2012-10-02 15:38:38 2003-04-07 12:59:11 16 154 4706 283 5300 10643 4524 159.30 35 60.14 CHANGED hlph..ps...........G.plhlc....L.hsctu.PhsspNFl.p.........Lsp..................pG..........................aYc..sshFa......R.l..........lps...F........hl...........Q................uGc...........................................t......ss.............s........................h.sEh..........................................................................p............pt..GsluMA.pt.....................so....s.uSQFFI......sh..............................ts.........................s.......tL..D........s..........ta.slFGcVl..pG...h-.llcpIp.p........s.........................lhIhpstl ....................................................................................h...lpTs......hG.c.I.t.lc...L....a..s..c...t......u.Pp.Ts.c.........N....Fh..s...........L..sp..........................pG.................................................................aY.c....Gs.h.....FHR...V....................................Iss...........F...............................Ml................Q............................G.GD...............................................ss...sst.........................................hut............p....................hpsE.h............................................................................................................tlpp...........tt..GsL.u.MA..psG..........................Pso..................s.GSQ......FFI..sh........................................................................................s.s.......................................ss...........aL..D........s....................pa..sVFG.c....Vl..........cG..............h-...V..l.c..pIp.p..........................stsst.............stPhps..lhItpst................................................................................................................................................................................... 0 1874 3188 4422 +4002 PF00235 Profilin profilin; Profilin Finn RD anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.69 0.71 -4.00 72 1081 2012-10-02 21:07:43 2003-04-07 12:59:11 14 8 440 49 440 984 4 121.20 33 90.65 CHANGED SWQuYVD.....ppLhus......Gp.lspAAIlG.p.D...G.u...........lWA...pSss...F.plps....................pEhssIhss.Fpp......s...sslhssGlpluGpKYhslp..u-.....sps..lh.uKKu.psG.....lslhKTspAllluhYs-s...................hpsGp.sspsVE....cLuDYL ..............................................................uWpsYlD......pL.hss..........up..lssAAIlG..pD.....G..s.........................VW..A.pSss.....F.phps.............................pElsslhss.Fs-........s......spltssG..lhluGp..........KYhllp.....u-.............sss....lh..u.....K.....K..u....suG...........lsltKT..s.pAll.lu.hYcEs...................hpsG.p.sshhlcplucYL................................................................................... 0 172 257 346 +4003 PF02161 Prog_receptor Progesterone receptor Mian N, Bateman A anon IPR000128 Family \N 24.10 24.10 24.10 24.80 23.90 24.00 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -13.11 0.70 -5.68 5 113 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 60 0 25 104 0 382.60 60 62.18 CHANGED MTElpuK-sRAPHsAGussSP..s.pP..tRpDussh.uSQsSD..........s.utsSulPlu.LDuLLFsRpsQup-.P-EKsQ-pQshsDVptA.sclEAocs+GusSsRP..PcpDsssLDSVLDTlLAPSGPuQupsS.PAhEstoSWCLFGPElPEDPRus.uopthloPLMSRPEuKAGDuuGhuuupKs.P+ulSPSRQ.L.PssGustWPGAsuKsusQsAsluVEE-uGhcAEGSsGPLLKGKPRsLuGsuuGGGAsAsAPGs.sGGhs.VPKEDSphuAP+sSLsEQDAPsAPGpSPLATTshDFIHVPILPLNsAaLAARTRQLLEuE.oYDGGA.....FAPPRSSPSAsssPVPuGDFPDCuYPP-u-PK-DuFPlYGDFQPPALKIKEEEEGsEAAuRSPRPYLuAGAuuAsFsDaP......PPhPPRAPsSRPGEuA...sAssususSssSSsGPoLECILYKAEGAPPsQGPFAsuPC+sPuAuuCLLPRDuhs......uAssSuAAPALYsPLGLNGLPQ.LGYQAAVLKEG.LPQVYPPYLNYLRPDSEASQSPQYSFESLPQ .........................................MTELpAKssRssHsuGusPSP.pluSPL.sR.sss.F.uSQsSD..........s.s.sSulPIS..LDGLLFPR.CQGp-..stKTQspQsLuDV-GAa..stsEAspusG.....usuups..PEKDpsLLDSVLDTLLsPuGstQSpsS.P.A.sEshosWCLFGsELPEDP.us...PuTptlLsPLMSRstsKsGDuSthuAupKlhPpGLSPspQLL.PsssustWsGAssKPus.ssssp.sEE.-su.cs-t...ususlLKucPRs.tGssttuth.ssssu.hs.Guhs.sP+EDuRhsAs+suL.E.pDuPhAPGRSPLAToh..hDFhHVPILPLstAhLAAR..TRQLLEt-.sYDGGA.........FusPRuSPsAsSsslssuDFP-ssY.s.-s-sK-ssa....shau-FQ.PsLKIKEEptusps.........sYhsuustsss.h.ch..............s.ps...............................s.sssLEClLYKAEs..A.st.....ssass.Ps+ssusuuClLP.tp................s.ssuuu...sP....slY.sLuLNGh.Q.LGYQA.AVlK-u.LsQVYPPYLNYL..R.PDoEsSQSPQYuF-SLPQ........... 0 2 2 6 +4004 PF02244 Propep_M14 Carboxypeptidase activation peptide Bateman A, Mian N anon Pfam-B_2335 (release 5.2) Domain Carboxypeptidases are found in abundance in pancreatic secretions. The pro-segment moiety (activation peptide) accounts for up to a quarter of the total length of the peptidase, and is responsible for modulation of folding and activity of the pro-enzyme. 22.10 22.10 22.10 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.14 0.72 -4.31 103 1016 2009-01-15 18:05:59 2003-04-07 12:59:11 11 15 167 24 581 999 1 73.10 22 16.68 CHANGED a+l...pspsppplplLppLp...ps.plcFWpsss....thspssclhVsspphtshpshLpppslpaplhlpslQphl-pEp .........................hcl.spspppl.phLppLp...........pp.pl.cFWpsss.......t.sps.lDlhVss..p.ph...p.s.hpshLppp.sl.p.a.p..lhlc.slQphl-pp......... 0 141 201 408 +4005 PF04352 ProQ ProQ/FINO family Mifsud W, Moxon SJ, Bateman A anon COG3109 & Pfam-B_7673 (release 7.7) Domain This family includes ProQ, which is required for full activation of the osmoprotectant transporter, ProQ, in Escherichia coli. This family includes several bacterial fertility inhibition (FINO) proteins. The conjugative transfer of F-like plasmids is repressed by FinO, an RNA binding protein. FinO interacts with the F-plasmid encoded traJ mRNA and its antisense RNA, FinP, stabilising FinP against endonucleolytic degradation and facilitating sense-antisense RNA recognition [2]. 21.70 21.70 21.80 22.00 21.00 21.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.39 0.71 -4.47 46 1580 2009-09-11 08:23:37 2003-04-07 12:59:11 8 2 1071 7 186 857 21 115.00 41 57.46 CHANGED p+hsss.ppslshLtcpFPtsFstpst.hpPLKlGIapDLhtclpptt...lS+spLRpALppaTpuhRYLpuh.ptGssRlDLsGpssutlot-csp+ApppLtcp+p.csttcctppt..s .............................phpsscEslshLtcpFPtsFss..-st...s+PLKlGIhpDLl-cl....ut..c........LS+spLRsAL+hYTp...ShRYLtul.KsGAsRhDL-G..pPs....Gclsppcsp..aAppp......Lpct+s...+sptpcttQ.ut........................................ 0 25 76 133 +4006 PF02428 Prot_inhib_II Potato type II proteinase inhibitor family Bateman A anon Pfam-B_2913 (release 5.4) Domain Members of this family are proteinase inhibitors that contain eight cysteines that form four disulphide bridges. The structure of the proteinase-inhibitor complex is known [1]. 20.40 20.40 20.90 20.40 20.30 20.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.21 0.72 -3.91 10 341 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 42 25 13 355 0 48.10 62 74.95 CHANGED sKACTpECDPclAYuhCPRSEGs..h.sslCTNCCuGhKGCpYYusDGoFICEG .......PKACP+NCDsRIAYulCPpSEpp..tps.pICTNCCAG...p..KGCpYFSsDGTFlCEG............ 0 1 8 11 +4007 PF00260 Protamine_P1 protamine_P1; Protamine P1 Finn RD anon Prosite Family \N 20.50 20.50 20.70 20.60 17.20 17.60 hmmbuild --amino -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.00 0.72 -4.15 14 147 2009-01-15 18:05:59 2003-04-07 12:59:11 15 1 133 0 9 97 0 47.40 74 96.52 CHANGED ARY.RsCRSpSRSRC.RRRRRRs+...............RRRRRpsRRRR...Rt..ssRR...Ysh..RpRR .ARYRCCRSpS..RSRC..RR...RRRCR.RRRRRCCRRRR.Rs..CCRR...YThhRCtR.. 0 1 1 1 +4008 PF00841 Protamine_P2 protamine_P2; Sperm histone P2 Bateman A anon Pfam-B_1350 (release 2.1) Family This protein also known as protamine P2 can substitute for histones in the chromatin of sperm (Swiss). The alignment contains both the sequence of the mature P2 protein and its propeptide. 25.00 25.00 25.30 37.70 24.10 18.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.63 0.72 -3.43 4 60 2009-09-11 10:14:09 2003-04-07 12:59:11 14 1 43 0 8 54 0 92.80 65 88.38 CHANGED MVRYRVRSPSEsPHps.tQphcspEQG....p-QGLSPEcVEsYGRTHpG+aHYR+RpCSRRRLhRlH+p.+RSCRRR+R+uCRHRR........R+RRGCRpR .MVRYRhRSPSEpP..Hps.GQ.phctpEQG....ptQGLSPE+VEsY.GRTHRG+p.HaR+R+CSRRRL+RIHRR..+RSCR.RR.RR...RSCRHRR...........RHR..RGCRpp........... 0 1 1 1 +4009 PF03247 Prothymosin Prothymosin/parathymosin family Bateman A anon Pfam-B_3463 (release 6.5) Family Prothymosin alpha and parathymosin are two ubiquitous small acidic nuclear proteins that are thought to be involved in cell cycle progression, proliferation, and cell differentiation [1]. 23.90 23.90 24.10 24.20 23.70 23.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.24 0.72 -11.24 0.72 -3.89 8 185 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 42 2 61 210 0 90.50 59 75.61 CHANGED uDsuVD..uusElosKDLKEK.KElVEEsEsuK-....sPsNGst.sEENGppcuDsp.--EEEs..-E--EE--G-G---Es---E.......Es-GsTsKRAAE...-E-D-s-sKKQKTD-sD ............u-tsV-..susElosKDLKEK.KEl..VEEuENG..+-......APANGNs...NEENGEpEuDsEs-EEEEp......tE-p.....E......EE-s....-.GEEE-s-E-E.......E.sEusssKRAAE.......pE-D-lDsKKQK.T-ps................ 0 3 8 21 +4010 PF05044 HPD Prox1; Homeo-prospero domain Moxon SJ, Bateman A anon Pfam-B_5293 (release 7.7) Domain Prospero is a large drosophila transcription factor protein that is expressed in all neural lineages of drosophila embryos. It is needed for correct expression of several neural proteins and in determining the cell fates of neural stem cells. Homologues of prospero are found in a wide range of animals including humans with the highest level of similarity being found in the C-terminal 160 amino acids. This region was identified as containing an atypical homeobox domain followed by a prospero domain. However, the structure shows that these two regions form a single stable structural domain as defined here [1]. This homeo-prospero domain binds to DNA. 25.00 25.00 42.70 42.70 22.60 21.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.94 0.71 -4.56 13 191 2009-09-11 05:20:22 2003-04-07 12:59:11 7 3 88 3 116 167 0 139.90 65 21.88 CHANGED psLTPhHL+KAKLMFFYTRYPSSslLKsYFPDV+FN+ssTuQLlKWFSNFREFYYIQMEKaARQALuEGlsssc-lhVo+DSELa+sLNhHYN+sNchEVP-pFltVsppTLREFFsAIpuGKDs-PSWKKsIYKVIs+LDcpIPEhFKSPNaL-cLp ..................p.sLoPsHLKKAKLMFFaTRYPSSshLKsY...FsDVKFNRClTSQLIKWFSNFREFYYIQMEKaARQAls-GVos........s........c..-LslsRDsELaRuLNhHYNKuNDFE.....V.P.-pFlcVsphTL+EFFpAI.uGKDs-PSWKKsIYK...lIs+LDs.lPEhFKossh..................................................... 0 23 31 75 +4011 PF02840 Prp18 Prp18 domain Bateman A anon Bateman A Family The splicing factor Prp18 is required for the second step of pre-mRNA splicing. The structure of a large fragment of the Saccharomyces cerevisiae Prp18 is known [1]. This fragment is fully active in yeast splicing in vitro and includes the sequences of Prp18 that have been evolutionarily conserved. The core structure consists of five alpha-helices that adopt a novel fold. The most highly conserved region of Prp18, a nearly invariant stretch of 19 aa, forms part of a loop between two alpha-helices and may interact with the U5 small nuclear ribonucleoprotein particles [1]. 22.10 22.10 22.30 22.30 21.10 22.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.81 0.71 -4.38 29 352 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 294 2 249 327 2 137.60 42 40.45 CHANGED l.............htalphl.....LpcWcppLpscppp....tpohpuc.s........hsthhQT+cpl+PLhcpl+p....p..pLspDILpsLspIlpthp.pRc.......YhcAsDuYlcLuIGNAsWPlGVTMVGIHpRo.....uRpKIau.sp....l..AHlhNDEpTRKYIQulKRLlTFsQphaPs.c ..............................................htahphlLppWtppLspcppt................+pohpGK.s.................sshhQocchl+PLF+pl+c.........p..sLssDIhpslsc.Il.ct.h..p.Rc.......YlcAsDuYLcluIGNAsW.....PIGVTMVG.IHtRo.....uREKIas...pp.....lAHlhsDEspRKYlQulKRLhThsQppaPsc......................... 1 85 132 201 +4012 PF02340 PRRSV_Env PRRSV putative envelope protein Bashton M, Bateman A anon Pfam-B_939 (release 5.2) Family This family consists of a conserved probable envelope protein or ORF2 in porcine reproductive and respiratory syndrome virus (PRRSV) also in the family is a minor structural protein from lactate dehydrogenase-elevating virus. 25.00 25.00 27.00 29.20 21.20 21.00 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.51 0.70 -5.19 4 222 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 9 0 0 236 0 230.50 83 92.22 CHANGED Mh.hSSLhPhLI.hF.hsFCLu.PSPsGhW.hhSDWFuPRaSVRs..hT..sYRR.acshlp.CpPDl.paG.Kt.hGMLWHMKVuThlD-hlppRlhppMpHpGptsWtQVhoctsLppIushslVsHhQplAulEtEuCpYLhSRLPhlpshussh.NVTIpa...LNpshhI..uPuspshhss.+.WLlShpSSlFSSVAAussLaIVhhLRlP..RpVFGFhahpts++o .................................MLSRs.WCPLLISSYFWPFCLAS.SPVGWWSaASDWFAPRYSVRALPFTLSNYRRSYEAFLSQCQVDIPTWGsKHPLGhLWHHKVSTLIDEMVSRRMYR..IMEKAGQAAWKQVVSEAT.....L.....SRIS.uLDVVAHFQ.HLAAIEAETCKYLASRLPML.H.NLRhTGSNVTIVYNSTLsQVFAIFPTPGSRPKL.HDFQQWLIAVHSSIFSSVAASCTLFVVLWLRIPhLRoVFGFRWLGAhF...... 0 0 0 0 +4013 PF01366 PRTP Herpesvirus processing and transport protein Bateman A anon Pfam-B_1171 (release 3.0) Family The members of this family are associate with capsid intermediates during packaging of the virus. 20.30 20.30 24.90 24.60 18.90 18.70 hmmbuild -o /dev/null HMM SEED 638 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.96 0.70 -6.53 32 211 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 109 0 0 193 0 587.10 37 83.17 CHANGED htQcLsAlhuQlpshshplplL+hCDPss..h...hphsslphNshhlphLpcplhstLtpQs.phpsosLoltLphhLcshptcsttlhpuLp...p............tppaFppsh...tLs..ttCshHpplplshYG.sttlslplshlNDlEphLKpLNhsahhlsspsulpslpplhpFLschhGsuslssP-lYssop.PCh.Ca-ELslssNQGcolp+RLhsphCsHlspph....lpsth-s.lpplshshshsspch.......t.ht......................htspptpsps..................hhppAtphLcsaslFp.ssstplYplS-LpYWsuoupt...............tsshcthsssLspLhp+Epphcthpstl.phhlhscsspHFhchatsp.............sh-pLasGuhhsSs--hI-ALhpsChspahspPhhpcLhppps-hhstLpplLpphp............................sssssssststs...................................................t....phst.ttsp.t......................................................pshp-sphR+ctYhc+lo+cuhspLtpClcpQcc.LpKhLslsVaGsslhcphsplhNtFhtRptalpts.htst........spssstsF-sppal+ssLhppplssphLssLsppFacLlNGPLhscs.chFs.PsNssLhasl-ssGlLPHhKp-Lschhhsshps.pDWhsspFppFYsF....ss...pslsssQ+hs ....................h.hQ+LhslhupsppashplEhL+hCDPpl..hh..tc.sshKhNuhtlhhLh+plhPtlhtQs.ptp.o.LolhLchlLcthhc-sthL.tuLtsat......t...t.D.........httaappsh...tLs....CshHpplpLphhs.ss.hshpLshLpDlEpFLpphNasahlhsspsultshtplhphLtphsGhu.ls..Elas.up.sCh.CaEELslhsNQGcolp+RLtshlCsHlshp....pspsp.-s.hppl.pshh..stchs...tslsslct.....................l.upsss.ps...................hhp-ApthLctaslFp.shstplYulS-hpaW.tou.................pshhcths.slppLshhcp.hhcthhssl.phtLaGcpscch.thhtt.tls...........h.-plhlGuhhsuPschI-hlhphshpta.ssPlhp+L.p.ppp.hstl+plLpclp......................................s............s......................................................t...................................................................shtthhppstp-sphR+RtYhp+lSchuhuplh+Cl+pQcp.lpKhlcVNlhGplhhchhuplhNGFhhRppahpts...ss........shusthsaDtHhalhssLl++plssthLPtLspphacLlNGPLFsHspcpas.P.Nsshhaus-NsGlLPHlK--Ls+hh.u............ssts...s-WhVscaptFasF......ss..hpslsshQ+th............. 0 0 0 0 +4014 PF02666 PS_Dcarbxylase Phosphatidylserine decarboxylase Bashton M, Bateman A anon COG0688 Family This is a family of phosphatidylserine decarboxylases, EC:4.1.1.65. These enzymes catalyse the reaction: Phosphatidyl-L-serine <=> phosphatidylethanolamine + CO2. Phosphatidylserine decarboxylase plays a central role in the biosynthesis of aminophospholipids by converting phosphatidylserine to phosphatidylethanolamine [2]. 20.00 20.00 20.20 20.30 19.80 19.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.26 0.71 -5.02 112 3949 2009-01-15 18:05:59 2003-04-07 12:59:11 10 31 3153 0 1299 3080 1984 203.70 27 63.28 CHANGED hstaFsRth+sst..Rsl......stsss..........hllSPuDGplhth...l.....tt................h.hKt...hthtphlss..................................................................................................................ps..shhlslaLuPtDYH+h+sPssGplpp.hpahsGch...................h...............................sss.hthpp.................................................pNERshhhhp......hu.t......hhhltluuhhlspIhhth.t.........................................................................tstplp+GcchGhFph.GSsllllh.ts.h..................p.plphGp.plphGps.ls....t ..................................................................................................hstFFsRhl+sss........R..s..l..............spsss..........................hlluPADGt.l......sph...sp.l..p..ttp................h..h.KG...ta.shppL.Lus...........................................................................................................................................................................hps..shhsslaLuPtDYHRhHhPss.G.p.l.p.c....h.hahs.Gc.h........a.........................................................sVs.hpupp..............................................t.h.h.s.c.N.E....Rshhlhc.......sphG..............................hshl.VGAhhVupIhh..sh......................................................................................................ptu.plp+Gc-hGhF+h.GSTl.l.l.lhstst.....................p...p..lp.....sp...hsphGptlh.t......................................................................................................................... 0 425 811 1087 +4015 PF04230 PS_pyruv_trans Polysaccharide pyruvyl transferase Kerrison ND, Finn RD anon COG2327 Family Pyruvyl-transferases involved in peptidoglycan-associated polymer biosynthesis. CsaB in Bacillus anthracis is necessary for the non-covalent anchoring of proteins containing an SLH (S-layer homology) domain to peptidoglycan-associated pyruvylated polysaccharides. WcaK and AmsJ are involved in the biosynthesis of colanic acid in Escherichia coli and of amylovoran in Erwinia amylovora [1]. 25.30 25.30 25.40 25.40 24.80 25.20 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.77 0.70 -4.70 163 2481 2012-10-03 16:42:30 2003-04-07 12:59:11 8 39 1781 0 656 2150 297 275.20 15 70.68 CHANGED NhG.Dtl...........hhuhhphLpptt..psplhshs.......t.ttttthht.h...............................................................................................lllsGu...................................t.....hh..........hhhhhhtpthpp.h.......lhhutuh...................t...h..............ppttpp...hhpphhpp.hsh...lslR-ph.Shp.........htphsh.......psthssDss.h..hht..................................ttthhthtptst................t.thtphhhhhhphhhp.pthphhhh.ht.t.t.t..p.h............................h.hh...hsspchhshls.pschllssRhHuhlhuhhhshPsl.slsh.ss ................................................................................................................................................................................................................................NhG.s.h....h....hhuhhph.l....pp.........t...p....h....ph.shs......................t....t.t.....h.ht.h...h..................................................................................................................................................hlhsGu............................................h..h............hthhhhsth.hpp...h.......................hhhutul...........................us..ht.................................stthpp...........hhp.hh..hsp..ssh.....lslR-ph.Shc......hhpphslp................psphssDss.h......hls.......t...t..........................................................htthh.http.t...............................tth..t..pt.hhthhph..hhp.........pthp.hhh.h..s..hth.t...t........p.h..........................................................thh.h.h...t.hs...st...ch.hph...ls..psch.slusRhHuhlhuhhhshPhl.sls......................................................................................................................... 0 230 461 572 +4016 PF00223 PsaA_PsaB psaA_psaB; Photosystem I psaA/psaB protein Finn RD anon Prosite Family \N 19.00 19.00 20.60 19.50 17.90 17.50 hmmbuild -o /dev/null HMM SEED 684 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.30 0.70 -6.45 22 3289 2009-09-12 09:11:12 2003-04-07 12:59:11 14 18 1996 14 178 2825 824 538.70 58 96.78 CHANGED FSpsLApsP.....TTphIWphhAsAHD.F...-oHsu.s.Epl.pKlFuuHFGHLAlIFlWhSG.hFHsAh.uNapsWlpDPhpl+P.ApslW.PhhGQphls......scsGush...shhssSGlaphWhshGhsophpLhtsulhhLlhAulhLaAG............WhH.hp.th........................PpLtW........FpssEShLNHHLuuLhGluSLAWuGHhlHVAlPhsphhcs.............hsshhpl.PhPpsL.......................sPFFohNWutYup..................hLTFpG.....G...LpPtTGuLW..LTDlAHHHLAIAllFIlAGHMYRTsa.GIGHshKEll-upps.................hs.uHpGLa-hlssShHhQLulsLAhlGolo.lVApHMYuhPsYsYlupDYsTphuLaTHH.aIuGFlhVGAhAHuuIFhVRDY..DPthN..........tsNlLsR.......lLcH+-AIISHLsWVslFLGFHohGLYlHNDoMpAhGcPp.....ptI.lpPlFAQaIQuhp..upsh........sssu....s......s.s.hhsG....lsuls.scss.hhl.lGsuDFLVHHhhAhslHsTsLILlKGsL.ARuS+LhPDKtshGapFPCDGPGRGGTCplSuWDphaLulFWMhNoluhVhFaapWKh.S.hhGss................................................upFspSShhlsGWLRDaLWtpSSQlIsuYssh.....LSsauhhFLhuHhlWAhuhMFLhShRGYWQELIEolVWAHpKh.lAstI.......pPhALSIsQGRhVGlsHahlGsIhThhAFhlA ............................................................................................................................................................................s.Ecl.pplFuuHFGQLuIIFlWhSG.hFHsAh.uNapuWlpsPhpl+P.ApslW...PhhGQ.hlp.............scsGush.s..hhSGhaQhWhs.GlpophpLYssAlhhLhh..uulhLhAG............WhH.hp.th........................PplpW........FpNsEShLNHHLuGLhGluSLuWsGH.lHVulPhs+h.cs.............hss.h.-l..Ph.Ppth....................................sPFFohsWs.Yup..................hLTFhG.....G...h.pP.TtuLW..LTDhAHHHLAIAlLFll.AGHMYRTNa...GIGHShK-lLEAHpsP..................hhGpGHKGL.Y.-hl.ss.ShHhQLulsLA.lGslo.lVA..pHMYuhPsYsalApDasTQhuLaTHH.aIuGFhhsGAhAHuAIFhlRDY..sPp..ps..........hsNl....LsR.......hLcH+-AIISHLsWsslFLGFHohGLYlHNDsM.AhGpPp.....p.tI.lpPlFAQWIQssH..uhsshsh.....................ussu......u............spo.h.hh.sG......l.sAls.sp.s...h.lslGs.uDFLVHHhhAhslHsTsLILl.......KGsL.ARuS+L...hPD..KtshGapFPCDGPGRGGTCphSsa.D.t.h.................................................................................................................................................................................................................................................................................................................................................... 1 36 97 152 +4017 PF02531 PsaD PsaD Bashton M, Bateman A anon Pfam-B_1336 (release 5.4) Family This family consists of PsaD from plants and cyanobacteria. PsaD is an extrinsic polypeptide of photosystem I (PSI) and is required for native assembly of PSI reaction clusters and is implicated in the electrostatic binding of ferredoxin within the reaction centre [1]. PsaD forms a dimer in solution which is bound by PsaE however PsaD is monomeric in its native complexed PSI environment [1]. 22.40 22.40 24.20 24.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.57 0.71 -4.97 20 196 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 148 6 61 196 124 127.70 66 81.82 CHANGED ssLssps..PhFGGSTGGLL+uAEsEEKYAITWTSpKEQlFEMPTGGAAlM+cG-NLLYLARKEQCLALuT.QLRs+F..KIpDYKIYRI..FPsGEVpYLHPKDGVFPEKVNtGRtslGpssRpIGpNsNPsslKFoGK.psa- .............Lssps..PhFGGSTGGLLppApsEEhYsITWsSsKEQlFEMPTGGAAIMpcGpNLLhLARKEQCLALG.T.QLRoKF..KIpcYKIYRl..FPsGElQYLHPKDGVaPEKVN.GRptVGps.RpIGcNssPhplKFoGKtsa-....... 0 19 43 56 +4018 PF02605 PsaL Photosystem I reaction centre subunit XI Bashton M, Bateman A anon Pfam-B_1741 (release 5.4) Family This family consists of the photosystem I reaction centre subunit XI, PsaL, from plants and bacteria. PsaL is one of the smaller subunits in photosystem I with only two transmembrane alpha helices and interacts closely with PsaI [1]. 25.00 25.00 40.70 39.20 22.20 21.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.99 0.71 -4.69 21 179 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 141 6 63 174 145 149.10 48 79.09 CHANGED pphlpshs.DPhVGsLuTPlsoSsho+sFIsNLPhYRpGLSPhhRGLElGMAHGYFLhGPFshLGPLRN.o-hAhlAGLLuulGLllILTssLohYGsls.........spsssh....................................-sLpTpcGWupFsuGFhlGGsGGAhaAahLlpsh.l.ts .........h.phlpPhssDPhlGs.LtTPlsSSshsthalsNLPAYRpGlSPlhRGLElGhAHGYhLlGPFshh...GPLRs.o-hAthAGhLuAlGLVlILTlsLolYGhsuFpp......spssss...shsh...p..........................s.-sLpTscGWupFouGFalGGhGGuhaAahLltslth..sh........ 0 20 47 59 +4019 PF00737 PsbH PSBH; Photosystem II 10 kDa phosphoprotein Bateman A anon Pfam-B_465 (release 2.1) Family This protein is phosphorylated in a light dependent reaction. 20.80 20.80 20.80 21.30 20.70 20.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.50 0.72 -4.39 19 1173 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 1095 17 62 665 119 39.10 76 66.99 CHANGED pTtLGslLKPL.NSEYGKVuP..GWGTTPlMulhMsLFhVFLlIILpIYNsSllL ........TslGslLKPL..NSEY.GKVAP..GWGTh.hM............................. 1 14 42 56 +4020 PF02532 PsbI Photosystem II reaction centre I protein (PSII 4.8 kDa protein) Bashton M, Bateman A anon Pfam-B_1731 (release 5.4) Family This family consists of various Photosystem II (PSII) reaction centre I proteins or PSII 4.8 kDa proteins, PsbI, from the chloroplast genome of many plants and Cyanobacteria. PsbI is a small, integral membrane component of PSII the role of which is not clear [2]. Synechocystis mutants lacking PsbI have 20-30% loss of PSII activity however the PSII complex is not destabilised [2]. 19.30 19.30 19.50 25.70 18.50 17.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.58 0.72 -4.33 15 674 2009-09-11 13:22:19 2003-04-07 12:59:11 9 2 643 17 44 216 33 35.40 85 96.30 CHANGED MLsLKlsVYsVVlFFVuLFlFGFLSsDPuRNP..sR+D .....MLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGRcE...... 0 13 31 40 +4021 PF01788 PsbJ PsbJ Bashton M, Bateman A anon Pfam-B_1227 (release 4.2) Family This family consists of the photosystem II reaction centre protein PsbJ from plants and Cyanobacteria. In Synechocystis sp. PCC 6803 PsbJ regulates the number of photosystem II centres in thylakoid membranes, it is a predicted 4kDa protein with one membrane spanning domain [1]. 21.20 21.20 21.20 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -7.91 0.72 -4.42 25 1304 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 1239 17 50 454 91 31.80 78 95.70 CHANGED Mus...TGRIPLWlVuTVuGluslullGlFFYGSYuGLGSSL ......MAD..TTG.RIPLW..l.IGTVsG.I.VIGLlGlFFYGSYSGLGSSL...... 0 14 34 45 +4022 PF02533 PsbK Photosystem II 4 kDa reaction centre component Bashton M, Bateman A anon Pfam-B_1331 (release 5.4) Family This family consists of various photosystem II 4 kDa reaction centre components (PsbK) from plant and Cyanobacteria. The photosystem II reaction centre is responsible for catalysing the core photosynthesis reaction the light-induced splitting of water and the consequential release of dioxygen. In C. reinhardtii the psbK product is required for the stable assembly and/or stability of the photosystem II complex [1]. 25.00 25.00 25.50 25.50 23.10 22.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -7.99 0.72 -4.02 25 756 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 711 17 53 470 13 40.60 78 72.83 CHANGED shllAp..LPEAYuhFcPlVDVLPlIPlhFhLLAFVWQAAVuFR ..s.hFFuK..LPEAYAhhsPIVDVMPVIPlhFFLLAFVWQAAVSFR.. 0 13 35 46 +4023 PF02419 PsbL PsbL protein Bateman A anon Pfam-B_1884 (release 5.4) Family This family consists of the photosystem II reaction centre protein PsbJ from plants and Cyanobacteria. The function of this small protein is unknown. Interestingly the mRNA for this protein requires a post-transcriptional modification of an ACG triplet to form an AUG initiator codon [1,2]. 19.70 19.70 20.40 20.40 18.60 17.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.29 0.72 -7.54 0.72 -4.53 14 1035 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 985 17 49 215 89 36.70 90 86.43 CHANGED pssNPNppsVELNRTSLYhGLLLlFVLulLFSSYhFN ...TQSNPNEQ.NVELNRTSLYWGLLLIFVLAVLFSNYFFN..... 1 14 32 41 +4024 PF05151 PsbM Photosystem II reaction centre M protein (PsbM) Moxon SJ anon Pfam-B_6558 (release 7.7) Family This family consists of several Photosystem II reaction centre M proteins (PsbM) from plants and cyanobacteria. During the photosynthetic light reactions in the thylakoid membranes of cyanobacteria, algae, and plants, photosystem II (PSII), a multi-subunit membrane protein complex, catalyses oxidation of water to molecular oxygen and reduction of plastoquinon [1]. 21.10 21.10 21.40 21.40 20.80 20.50 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.06 0.72 -4.35 19 1155 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1094 17 44 275 4 26.80 81 83.33 CHANGED MEVN.LGFlAolLFlllPTsFLlILYlpTsu ......MEVN.LuFIAosLFILVPTAFLLIIYVKTVS.. 0 11 30 40 +4025 PF02468 PsbN psbN; Photosystem II reaction centre N protein (psbN) Mian N, Bateman A anon Pfam-B_2222 (release 5.4) Family This is a family of small proteins encoded on the chloroplast genome. psbN is involved in photosystem II during photosynthesis, but its exact role is unknown. 20.40 20.40 20.60 20.50 19.10 19.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.16 0.72 -4.34 32 1871 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 1799 0 52 442 322 43.00 83 95.48 CHANGED ME...sAhhlsIhluslLlulTGYulYsuFGPPS+pLcDPFE-HED .........ME...TATLVAI.ISsLLVSFTG.YALYTAFG.QPSpQLRDPFEEHtD... 0 14 38 48 +4026 PF04725 PsbR Photosystem II 10 kDa polypeptide PsbR Kerrison ND anon DOMO:DM04871; Family This protein is associated with the oxygen-evolving complex of photosystem II. Its function in photosynthesis is not known. The C-terminal hydrophobic region functions as a thylakoid transfer signal but is not removed [1]. 21.80 21.80 21.90 23.50 21.60 21.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.37 0.72 -4.00 5 100 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 62 0 33 95 1 92.70 63 71.49 CHANGED SGuKKlKTD........cPY......GhGGGMsl+sGVDASGRKuKGKGVYQFVDKYGANVDGYSPIYoP-EWSPSGDVYVGGTTGLhIWAVTLAGLLuGGALLVYNTSALAs ..................................usKKIKTD..............pPa......GhuGGhsl+sGlDASGRKuKGKGVYQFVDKYG.ANVDGYSPIYss--WSsoGDVYsGGsTGLhlWAlTLuGlLuGGALLVYsTSALu......... 0 8 21 29 +4027 PF01405 PsbT PSBT; Photosystem II reaction centre T protein Bateman A anon Pfam-B_1880 (release 3.0) Family The exact function of this protein is unknown. It probably consists of a single transmembrane spanning helix. The Swiss:P37256 protein, appears to be (i) a novel photosystem II subunit and (ii) required for maintaining optimal photosystem II activity under adverse growth conditions [1]. 20.30 20.30 20.50 20.50 20.10 19.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.89 0.72 -6.96 0.72 -4.36 3 1892 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 1795 17 44 477 0 27.70 90 83.23 CHANGED MEALVYlFLLlGTLuVIFFAIFFRDPPRI ....MEALVYTFLLVSTLGIIFFAIFFREPPK.V.. 0 12 31 41 +4028 PF03912 Psb28 PsbW; Psb28 protein Finn RD anon DOMO:DM04467; Family Psb28 is a 13 kDa soluble protein that is directly assembled in dimeric PSII supercomplexes. The negatively charged N-terminal region is essential for this process [1]. This protein was formerly known as PsbW, but PsbW is now reserved for Pfam:PF07123. 25.00 25.00 56.40 56.40 18.70 17.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.38 0.72 -4.09 34 184 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 126 1 63 178 102 103.70 44 82.25 CHANGED IQFhcGlsEpllP.-VRLTRS+.DGssGpAhFhFcpPps..lst.pt.s..-ITGMaLlDEEGEltTR-Vpu+FlNG.....cPpulEAsYhhcopp-W-RFMRFMcRYApsNGLuas .IQFhpGlsEpllP-V+LTRS+.sGssGsAhFhF-pPps..l-ptps..p.tsITGhaLID-EGElsop-VsuKFlNG.....cPptlEutahhco.p-W-RFMRFMcRYuptNGLsa..... 0 18 45 59 +4029 PF00849 PseudoU_synth_2 YABO; RNA pseudouridylate synthase Bateman A anon Pfam-B_421 (release 3.0) Family Members of this family are involved in modifying bases in RNA molecules. They carry out the conversion of uracil bases to pseudouridine. This family includes RluD Swiss:P33643, a pseudouridylate synthase that converts specific uracils to pseudouridine in 23S rRNA. RluA from E. coli converts bases in both rRNA and tRNA [1]. 23.80 23.80 24.10 23.90 23.70 23.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.13 0.71 -4.41 57 25319 2009-01-15 18:05:59 2003-04-07 12:59:11 17 48 4782 26 5572 17465 6565 147.20 25 50.03 CHANGED ph.lllsKPtGhsspsts............................thtthhth.hhtt..tpttphthlpRLD+sooGlllhupssphspplpp.h....tppp.lcKpYhuhl.............................................h...ptshhp..h..............................................................................ssptshshhcslpps.........................................sphshlplplhoG+pHQlR...tphst .............................................................................................................................................................llllNKPtGhlspsss......................................................................................................t..t..h...h..h..h..h..h.h.t........ttst...c..h...hhVt..RLD+..........-ToGL...........ll..l..s.p.s..s.pht.p.pLtc...............................p+p....l....p....K....pY.hAhVp.....................................................................................................................................................................t...t...t....h...l..p...t...s.l.tp.................................................................................................................................................................................................................h.....h.....h.....h.....h...h....p....s....u....p...s.......u....h...o...p.h..chlpph.............................................................................................t.s.h..ohl..c.lplp.p....G..RsHQlR...hhht......................................................................................................................................................................................................................... 0 1812 3469 4634 +4030 PF00796 PSI_8 Photosystem I reaction centre subunit VIII Bateman A anon Pfam-B_528 (release 2.1) Family \N 19.80 19.80 19.80 19.80 19.40 19.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.46 0.72 -6.76 0.72 -4.36 36 650 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 624 7 44 352 15 23.70 74 61.13 CHANGED LPSIhVPlVGLlhPAlsMulLFlaI .LPSIFVPLVGLVFPAIuMA.LaLal... 0 11 31 39 +4031 PF02427 PSI_PsaE Photosystem I reaction centre subunit IV / PsaE Bateman A anon Pfam-B_1594 (release 5.4) Domain PsaE is a 69 amino acid polypeptide from photosystem I present on the stromal side of the thylakoid membrane [1]. The structure is comprised of a well-defined five-stranded beta-sheet similar to SH3 domains [1]. 20.90 20.90 21.40 23.50 19.50 20.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.19 0.72 -4.60 25 181 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 147 11 65 180 119 62.20 61 69.39 CHANGED lpRGSKVRILR.ESYWap-VGTVAoVDpoG..l+YPVlVRF-KVNY.......uGlNTNNFAhcEL.clt ...pRGuKV+ILR.ESYWaN-lGoVsoVDpuu..l+YPVlVRF-KVNY.......uGlsTNNFA.cElpcV............. 0 19 45 58 +4032 PF02507 PSI_PsaF Photosystem I reaction centre subunit III Mian N, Bateman A anon Pfam-B_2122 (release 5.4) Family Photosystem I (PSI) is an integral membrane protein complex that uses light energy to mediate electron transfer from plastocyanin to ferredoxin. Subunit III (or PSI-F) is one of at least 14 different subunits that compose the PSI complex. 21.10 21.10 21.80 22.00 20.90 21.00 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.15 0.71 -4.72 10 178 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 141 6 65 172 142 142.20 43 86.29 CHANGED phhAllhL.slhls....sPssusADlAGLsPCpESsAFpKRtKsolK+LpppLtpY-PsSuPAlAlptph-+TKpRF-pYupAGLLCGsDGLPHLIsDG....RaoHAGEFllPGlLFLYIAGWIGWVGRuYLlAVpsocc.PT-KEIIIDVPLAlKhhhsGFsWPlAAhpEatSGcLlA+D- ............................................t....hhh.shhh.......s.....shs.ph..us.LTPCp-S.tFtp+tptthppht......tstos............ptRF-pYups..LCG.s.DGLPHLIssG.........chs.HhG-FllPulhFLYIAGWIGWsGRsYLhslp..p.p..cp..ss.pEIIIDVPLAhphhhpGhhWPluAhpEhhsGcLhtp-.s....... 0 19 42 58 +4033 PF03244 PSI_PsaH Photosystem I reaction centre subunit VI Bateman A anon Pfam-B_3007 (release 6.5) Family Photosystem I (PSI) is an integral membrane protein complex that uses light energy to mediate electron transfer from plastocyanin to ferredoxin. 25.00 25.00 25.40 25.40 18.80 16.60 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.72 0.71 -4.94 6 67 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 39 4 27 66 2 124.50 62 96.53 CHANGED olssVpPo.slKGLAGSSluGtKLtl+su.huh.+ssppRAsuVVAKYGDKSVYFDLEDluNTTGQWDlYGSDAPSPYNsLQSKFFETFAuPFTKRGLLLKFLlLGGGSLlsYhSusuutDl.LPIp+GPQpPPclGP....RGKI ....................ssspPs...s.lpGLuGSS....lsGpKLsh.+Pupp.uh...+sps......h..RuuuV.VAKYGDKSVYFDL-DluNTTGQWDLYGSDAPSPYNsLQSK.FFETFAuPFTKRGLLLKFLlLGGGuhlsYhuusuosDl.LPIK+GPQ.PPp.GP....RsKl................... 0 8 20 25 +4034 PF01701 PSI_PsaJ Photosystem I reaction centre subunit IX / PsaJ Bashton M, Bateman A anon Pfam-B_1599 (release 4.1) Family This family consists of the photosystem I reaction centre subunit IX or PsaJ from various organisms including Synechocystis sp. (strain pcc 6803), Pinus thunbergii (green pine) and Zea mays (maize). PsaJ Swiss:P19443 is a small 4.4kDa, chloroplastal encoded, hydrophobic subunit of the photosystem I reaction complex its function is not yet fully understood [1]. PsaJ can be cross-linked to PsaF Swiss:P12356 and has a single predicted transmembrane domain it has a proposed role in maintaining PsaF in the correct orientation to allow for fast electron transfer from soluble donor proteins to P700+ [1]. 20.50 20.50 21.20 21.30 19.80 20.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.85 0.72 -4.19 31 684 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 652 7 51 324 19 36.80 72 80.63 CHANGED Mps...hppYLSTAPVluslWhshTAGlLIElNRFFPDhL ......McD...lKTYLSsAPVLuTLWFuh.LAGLLIEINRFFPDAL.. 0 13 38 47 +4035 PF01241 PSI_PSAK Photosystem I psaG / psaK Finn RD, Bateman A anon Prosite Family \N 20.60 20.60 21.00 21.30 19.70 19.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.66 0.72 -3.97 9 219 2009-01-15 18:05:59 2003-04-07 12:59:11 13 1 131 9 87 209 97 76.60 33 73.61 CHANGED s..sshVhslshsssLhsstFGhhshpp+ss...usslPhss.............t.........ssGFsLs-lLAhuSlGHllusullhGLpshGsl .............................................t...oshlhhl.hssslhAhthGhhshpppss......u.ss.lP.ts.............................uGFs...ls-lLAssSlGHIlGhGllL.GLushGs..... 0 19 60 81 +4036 PF00421 PSII Photosystem II protein Finn RD anon Pfam-B_182 (release 1.0) Family \N 20.10 20.10 20.20 20.50 20.00 20.00 hmmbuild -o /dev/null HMM SEED 437 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.55 0.70 -5.89 19 3769 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 2075 40 249 3264 956 386.90 46 96.73 CHANGED hGhsWapspshllNssG+LlusHlhHsuLlshWAGuMsLaElAhFsPpc....PMacQGhhlLPahApLGhssu.GG.slss.shhssG...........h.tlhuotllh.GhlFhAhlh.....sh.-.hchFt..hshcsthcLspIhGIHLhLhGlhsFhhshhthhhshs.shWss...DshslTs.s.ssAshaGh.shsPFsssG..........llutHIhsGllsIhuGlaHlhs+P.thhh+AL.hhshEshLS.SlAAlhhhuFlsushhWYsssAhPsEhaGPTt.phsQ...pQthphhVc.....stpLutshush.t.huhhcYlhpSPutthlFt.Gth.+h.slttsWLt.hhh.s..G...................lDhstlhps..Pap...p+huhE..........................Yhp+A.LGpl.phstsshp.suV.hsSPRuWhohuHhshAhhFFhGHLWHuuRshh..shAGh-tslscplEhshh.KlhD .........................................hll..N.....s..G+LlusHlhHsuLl.....shWAGuMsLaElAhFsPpc....PMacQGhhllPahspLGhssu.GG....sl.ss.h..shhssG............................h.tlhuutllh.GhhahAhlh................sh......-...........h..........hFs.............hshKsp.chspIhGIHLhL.GlusFhhsh.htl....hh....h....hs.shWss........DshtlT................s..h...s.....ss.s..sh.....aG..h...h.......h.s...P..F.s..stG..............IhutHlhhGhlsIluGlaHl.s.+.......P.t...h..h......h..+............uh..........h....h.............shEshLS..SlAAl.hhuFlssshhWasssshP.EhaG.PTt.phsQ........tQth.hhhVp................s.pLutshuph.t.huhhcYlh.psPstthlFt.Gph.ph.sltssWLt..hh.s..G..................................lDh.stlh+s..Pap...p+hSsE..........................Yhp+A.LGpl.plsts....shc.suV.h.sSP...RuWhshuHh.huh.h.hh.GHlWHuuRshh....hG......................................................................... 0 50 157 222 +4037 PF04012 PspA_IM30 PspA/IM30 family Bateman A anon COG1842 Family This family includes PspA a protein that suppresses sigma54-dependent transcription. The PspA protein, a negative regulator of the Escherichia coli phage shock psp operon, is produced when virulence factors are exported through secretins in many Gram-negative pathogenic bacteria and its homologue in plants, VIPP1, plays a critical role in thylakoid biogenesis, essential for photosynthesis. Activation of transcription by the enhancer-dependent bacterial sigma(54) containing RNA polymerase occurs through ATP hydrolysis-driven protein conformational changes enabled by activator proteins that belong to the large AAA(+) mechanochemical protein family. It has been shown that PspA directly and specifically acts upon and binds to the AAA(+) domain of the PspF transcription activator [2]. 32.00 32.00 32.10 32.10 31.90 31.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.43 0.70 -4.99 11 2762 2012-10-03 05:15:35 2003-04-07 12:59:11 7 5 1843 0 615 1709 167 219.40 26 93.85 CHANGED slasRltcll+uslschl-chEDPp..+hL-QtlR-hcsplscu+pslAphhApp+phEcclcptpppspchcppAptALstG..............pEpLA+EsltchpshcppspshcsphsptcststpL+pplspLEsKlpph+scpphLtARtpsA+Appplppphushs...ssuAhsphcRhcpKlt-hEsptputupht...........psts....hDtclcptthptss....cpsLupL+utpsp ...................................ulFsRhtcllpuslsshl-..c..sE..D..Pp..+hlcphIp-.hccpLscscpssAcslAppKphpc+lcptptphtchpp+A....ph..A.Lpps....................c-sLA+t...ALtcptphpshl.pshc...pphsphcsshspl...ccp.l..schpp+lsch+..s.+.pp.s.lh..u...........+pp.sAp...........up..p..p..ls...p...p.h..suhs.........hs.s.Ah...tp.h-ch...c.......c+...l..pp....hpAcu.cutspht...............t.ts.............L-pch...uph.tt.ssth............pp.LApL+t....ht................................................................................... 0 180 388 505 +4038 PF04839 PSRP-3_Ycf65 Plastid and cyanobacterial ribosomal protein (PSRP-3 / Ycf65) Kerrison ND anon Pfam-B_2979 (release 7.6) Family This small acidic protein is found in 30S ribosomal subunit of cyanobacteria and plant plastids.\ In plants it has been named plastid-specific ribosomal protein 3 (PSRP-3), and in cyanobacteria it is named Ycf65. Plastid-specific ribosomal proteins may mediate the effects of nuclear factors on plastid translation. The acidic PSRPs are thought to contribute to protein-protein interactions in the 30S subunit, and are not thought to bind RNA [1]. 25.00 25.00 45.80 45.40 20.50 19.40 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.56 0.72 -3.91 24 145 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 125 1 52 140 129 48.90 60 35.66 CHANGED LTsYFFWPRsDAWEplKspLEuKsWIsps-+lpLLNpsTElINaWQEps ...LTsYFFWPRcDAWEpLKscLEuKsWIocp-+lplLNpATElINaWQ-p.G.. 0 11 35 47 +4039 PF03034 PSS Phosphatidyl serine synthase Griffiths-Jones SR anon Pfam-B_1414 (release 6.4) Family Phosphatidyl serine synthase is also known as serine exchange enzyme. This family represents eukaryotic PSS I and II which are membrane bound proteins which catalyses the replacement of the head group of a phospholipid (phosphotidylcholine or phosphotidylethanolamine) by L-serine. 25.00 25.00 26.10 25.60 19.60 18.30 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.96 0.70 -5.00 18 319 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 164 0 208 305 10 251.10 42 61.24 CHANGED PHPAhWRlVhuhuVlYllhLsFLLFQshcss+plh.talDPc.Ltp.ls.c..+pYuts.Cphhossc.......lhpp.hDhFshAHhlGWhsKslllRcahLsWslSlsFElhElTFpHhLPNFpECWWDplILDVLlCNuhGIhhGhhss+hLch+pYcWsul+.......................ch.o..hpGKhKR....hlhQhTPpShst.....................acWtshpo....hpRFhtlhhllhlhhlsELNsFFlKalLthPPsH.lslhRLllashlussulREaYsYl..sDs.pp++l.....GspsWlhhulhhhEsllslKhutp .....................................PHPAhWRhlhGhuVlYhl.hLhFlLFQ.shpss+phh.halDPp.Lt.......h..h...E....cpYus..s.Cp....lhs.-p...........lhsp.hDhFshuHhhGWhh.KslhIRshhls..WhlSlh..aElhElsFtHhLP.NFsECWWDp...........lILD.lL.lCNuhGIahGMhsscaLch+pYc.Wtulp..............................................................ph.o...hpGKhKR........shhQFTPtsWst.....................hcWhs.pu....hpRhh......tlhhhhlla..l...sELNTFFLKalhhhsspH.lshhRllhhshlsssslRpaYsal.........sDs....sK+l.....Gpp..sWlhhsI....shhEhllslKau.s........................................... 0 90 117 167 +4040 PF01515 PTA_PTB Phosphate acetyl/butaryl transferase Bateman A anon Pfam-B_799 (release 4.0) Family This family contains both phosphate acetyltransferase and phosphate butaryltransferase. These enzymes catalyse the transfer of an acetyl or butaryl group to orthophosphate. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.76 0.70 -5.37 21 7061 2012-10-02 21:08:39 2003-04-07 12:59:11 14 23 3973 31 1389 5144 3674 287.60 33 59.22 CHANGED phhchlhcpAc......stpp+IllPEGp-tRlLcAAptlhppGlAp.slLlG..s..-lpt.h.thtlphth.hplhsspss.th.ccasspahchRKpKGhTh-tAcchlp.DsshhushhVchGpADGhVsGsspoTucslRsuLQIItstsGspllSulFlM.hs............cthhhauDCAlsspPsu--LA-IAlpoAcsA+ths.hcP+VAhLSaSThGSucupss-+VtpAspls+ctpP-L..slDGElQhDAAlspcVAppKsPs.SsluGpANVhlFPsLpAGNIuYKlsQRhuphpAlGPIlpGhspPlNDLSRGsSscDIlNssAlTA ....................................................................................h.....lhphup......tt..t+lV..hsE..G..p-.RslcAsthhhpp.sl.sp.slLlG....s..cl.pt..h..t..h..t...h..t......l....p...h........s.........s...hplls...s......p...s..s..................c...p...h.....ht..t....h..h..p..h.tp..t+..........h.........T.......p...Apchlp...csshhushhVptGc......ADuh..l..s.G.sl.p.os.upsl+sslpl.I.t...s.t.s.G..s...p...h..sSuh.h.hh.hhs............spshhhuDsulN..............s.............P..s..............u.cpLA...-IAlt.u.Ac.os.....c..t.F.G............h..-....P.+VAhLSa.Sshs.S.u....p.......s....s....s..........s...p+sppAs.c.ls............c....c..........t..........t..........P.....-.......L.......hlDG..hphDAAls.p.l.up.pK.....h.P.s.....Ss.l.s.G..p.A.slhlFPsLpuGNlsYKhlp.p.h.u...s..h...h.ul.G....PlL..............Ghpp.PVpsLSRusssc-llphsslss.......................................................................................... 0 423 876 1160 +4041 PF02126 PTE Phosphotriesterase family Mian N, Bateman A, Griffiths-Jones SR anon IPR001559 Domain \N 20.00 20.00 20.00 20.00 19.80 19.90 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.65 0.70 -5.41 4 1183 2012-10-03 00:45:34 2003-04-07 12:59:11 13 6 947 100 216 791 85 301.20 34 95.71 CHANGED lshuphGhTLsHEHlphshsuFhpshPptassppt..........................psuthhpcLtch+A+GVcslV-hTshslGRslphltcVuctTslpIVAuTGhYh.sshP........usphcSVEpLTphhlcEIpaGI-sTuIKAGIItphssu..tlTPhpE+VLcAsApAptpTGsPloTHTutu.ttGhpQhtIhppcGlDlSRVslGHsD.spsDls.LhchhshGsYlthDph.....Ghps.........h.s.pcRlthl+tLhDcGahc+lllSpDhpphacuhhpshhshu.......hsa....l.sslIPtL+p+GlopcsLcshLl-NPppahp ...................................................................................thsshGhTLsHEHLh.h.....s.h..u.s...hh..p........s.s..s...p..t...h.s...........................................................ph.ptshtElps...h....h.s.h.G.sco....lV-hT.s.t..s.hGRss...phltcVu.cc..T...G..lNlVuu..TG...a....Y...h...c...th...h...s...................p.t.l..t.pp.o..V.....c.p.....LAphh.lcElpp....G............I..............s..........s..........T......s......l..........+....A..Gl..I.u..E..l....usu....stl.T.h..Ec+..........shpA.AAhAp.pp.T.G...sPl....ssHs.s....hss...h..G.h..E......lc.l.......L..............p...cp.GV.c.s+...Vsl.u...H.sD...p.....pp..D..............h.....s..h..h..hc..hh..c.h..G.sa..lpFDtl.............hhp.t.......................h.hP..-.pcRl.shlttLh-c.Ga.tcpllLStDl....sp.............c....t....h....h.ts.s.s.G.hG..........asa........l.s....sFl..P.tL....t....p....p....G..ls....psp.l....c....ph.hl-NPuphhp................................................................................ 0 78 132 176 +4042 PF00809 Pterin_bind DHPS; Pterin binding enzyme Bateman A anon Pfam-B_1411 (release 2.1) and Pfam-B_3423 (release 6.6) Domain This family includes a variety of pterin binding enzymes that all adopt a TIM barrel fold. The family includes dihydropteroate synthase EC:2.5.1.15 as well as a group methyltransferase enzymes including methyltetrahydrofolate, corrinoid iron-sulfur protein methyltransferase (MeTr) Swiss:Q46389 that catalyses a key step in the Wood-Ljungdahl pathway of carbon dioxide fixation. It transfers the N5-methyl group from methyltetrahydrofolate (CH3-H4folate) to a cob(I)amide centre in another protein, the corrinoid iron-sulfur protein. MeTr is a member of a family of proteins that includes methionine synthase and methanogenic enzymes that activate the methyl group of methyltetra-hydromethano(or -sarcino)pterin [2]. 25.50 25.50 25.70 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.29 0.70 -4.96 42 8265 2012-10-03 05:58:16 2003-04-07 12:59:11 17 42 4552 134 2104 6541 3687 202.90 28 37.28 CHANGED llNlosDSFu-Guphhs.pt.slpp..........AcphlppGAcllDlGupuo..tP...........shlsscpEhpRllPllctltt..............sl.lSlDThcsclhctALct.GuchlNDstuhp........p.chhplstchss..slVlMHhp..................spshtpp...phc.-llp-lhphhptp.ht.h..sGls..pcllhDPGl..GF..u+s.pcshtllpplsc.........htth....shPlLlusSRKshlu .........................................................llNls.cSFssG..u.....p.....h....h.....p......h......t..h.....s..hp.c...................................Ac.phlp..p.....GAs......lIDlGs.-us....tP..t.................s..l..s..s.c.cE...h.pRlls.....l.l.....pultp...................................clslSlDo.+scVhct...uL....c....s....G....u...c......h...lN.Dshuhp.........................p.ph..hpl..s...t.c.....h..u.s........slllMahp..........................................................p.s.....t..cs.........sph..c.................-.l..hp.....clh.p.hhtpp.h..h......sG.ls..ccIllDPu.hsF..........uts......c...c.s.h..t.l.l..p.p.l.pt.............................hpth................shs..l.l..hGsSpKthhu................................................................................... 0 718 1410 1818 +4043 PF01091 PTN_MK_C PTN_MK; PTN/MK heparin-binding protein family, C-terminal domain Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.70 21.20 20.30 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.63 0.72 -3.99 10 216 2009-09-12 05:24:01 2003-04-07 12:59:11 13 5 76 1 107 213 1 60.10 45 38.44 CHANGED GA-CKYcFpsWGECDusTGhKoRoGoLKKALaNA-CQpTVoloKPCsptsKsKspu...KKGcGK- ...........Gu-CKYpF..p.uWGECDssTuhKoRo.GoLK..+...uhh.sAsCpp..TlsloKPCsptsK....sK..pt.......pKtp............................ 0 19 28 65 +4044 PF05196 PTN_MK_N PTN/MK heparin-binding protein family, N-terminal domain Finn RD anon Manual Domain \N 20.00 20.00 22.80 22.80 19.40 18.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.32 0.72 -4.00 6 140 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 45 1 55 116 0 58.00 63 38.71 CHANGED KKEKsKKshtuS-CuEWpWGsClPNStDCGhGhREGT.....Cs-pT+KlKCKlPCNWKKcF .............KKEKscKps.tt..S-C..u.EWpWu.sCVPoSGDCGlGhREGT.....Ccpph+p.+CKIPCNWKKpF........... 1 4 9 25 +4045 PF04387 PTPLA Protein tyrosine phosphatase-like protein, PTPLA Mifsud, W anon Pfam-B_1525 (release 7.3) Family This family includes the mammalian protein tyrosine phosphatase-like protein, PTPLA. A significant variation of PTPLA from other protein tyrosine phosphatases is the presence of proline instead of catalytic arginine at the active site. It is thought that PTPLA proteins have a role in the development, differentiation, and maintenance of a number of tissue types [1]. 21.20 21.20 23.80 22.00 19.50 19.50 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.17 0.71 -4.82 70 686 2009-01-15 18:05:59 2003-04-07 12:59:11 9 16 312 0 448 652 10 151.90 32 59.31 CHANGED Qsh.AllEllHuhlGlV+Ss.lhsThh.............................QVhuRlhllaulh..hhhPpsps.......................p.shshhlluWSloElIRYuaYshsl....hshs..PthLsWLRYohFhlLYPlGls.uEhhh.................lapulsh............h...sthshuhtah.......................hhLhhhLlh.....YlPG..hhhlYsaM...ltQR+KhLtptp .......................................................................................Qsh.AllE.....ll.H.....s.h.lGlVpos.l...hsThh.............................QVhuRlhllas.lh..t..sh...phps..................................p.slhhhlhuWols.........Ell....RYsaYshsl.............hs.hh......PthLpWL.RYohFllLY...PlGls.uEhhh.................latulsh...htt.......t.hs...h.h......P.th.sh.s.h...pah............................hh..Lhhhhhh................YlPs....h.hh.ahaM...hpQR++hlt...t.............................................. 0 128 210 344 +4046 PF01242 PTPS 6-pyruvoyl tetrahydropterin synthase Finn RD, Bateman A anon Prosite Domain 6-Pyruvoyl tetrahydrobiopterin synthase catalyses the conversion of dihydroneopterin triphosphate to 6-pyruvoyl tetrahydropterin, the second of three enzymatic steps in the synthesis of tetrahydrobiopterin from GTP. The functional enzyme is a hexamer of identical subunits [1]. 21.30 21.30 21.50 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.77 0.71 -4.43 149 3812 2012-10-01 20:59:24 2003-04-07 12:59:11 14 10 3101 68 1103 2685 1412 126.40 30 87.70 CHANGED pls+c.hpFsAAHpLh.sht...............GcCpp.l..HGHsaplclplpup........hscsG...hlhDFsclKphlpp.lhp.p..hDHphL.N...-hsthtt..............................................P..TuEslAtalacpLpptl..........thpltp.lclh..E.ossshsp..ap.tp ..................h.lh+c.hpFpAAHpL...phs.........................t+Ctp.l..HGHoahVclplpGc.h....................scsG......h.lhDFuc..lK.phh+...lhc....p.......hD...HphL..N.....-h.sth..ps.....................................................................................P..TuE....slApalapplpstl................splp.p..Vplh..E.Tssshs.ap................................................................................. 0 366 700 933 +4047 PF00854 PTR2 POT family Bateman A anon Pfam-B_571 (release 3.0) Family The POT (proton-dependent oligopeptide transport) family all appear to be proton dependent transporters [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.02 0.70 -5.68 25 7665 2012-10-03 03:33:39 2003-04-07 12:59:11 16 26 2595 5 2449 6215 1113 304.30 20 67.32 CHANGED apTIhhuullYslGhslhsluuss.sh...............s.sht..............hshhaluLhlIulGTGulKPsVSuFuuDQF-.csp...ctppspFFshFYhuINsGSLlushhsshlp........pphsasluFGlsulsMllulllFhhGpthY+ph..tss.hhhs...hlshllsshh+ptthths...ttthlhhs....hpphs................pthhppphhtsp.......sshlhhshshhhshh.....sp.spsthlptlhhhhtlh.hhhlh.sphhs.huhhhlt.lsthphhlhs.hplsssphsshsshslllhlslhshllsshsp.....hphshohhh+hulG.hhhthushhssh.................................................lph.....thssstuhsss.....hhhl.shhhhhthsplhlpusu.s.....hhh.phhhspt.shhpuhhhhhpssss ..................................................................................................h..slhhu.uhl...h.h..l...........Ghhh......h..s..h.....th....................................................................................................................hha..hu.l.h.h..l.s.lG..s.G.......hh....K...........sshushhuc....ap...tp.........................c.ph....su....hF.shaY....hu.l.NlGuhh.u.h.hsshlt.................................................................pp..hu.a.t...h..uF...s...l.s..s..h..u.hh...l..u.ll.......h.a......h..h......s..t.....t.......h.....h..........h.....t.........................s.........hp...................ht.h..h.....h...h.....h......h..h..h......h...h.........h..................................h........h..................................................................................h..h...........h.h...h.............................h.h.......h............h...................................t..p..p...h......hhhh..hh.h....h.h.....h....h....h.....hh...h..h.t....p...h...............s..t.......h.s....h..h.......h........p........s.p...........t.................................h.......h.......s...................h...........p.........l.................s.ss...h.p..s...l.sshhl...h.l....h.ssl...hshlhh..hhp..........................t.s.h..h.+.huhG.........h....h....h..hh.uh....h..h..h..hh..............................................................................................................................................................................................................................................hth...............hs...t...t..t..s....hs...................hhl.......hhhh..hsphhlssl..u.s..................hhh.phh.tt.h.s.hhuhhhhh.h..h.......................................................................................................................................................... 0 540 1303 1941 +4048 PF00381 PTS-HPr PTS HPr component phosphorylation site Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.36 0.72 -4.09 168 8181 2009-01-15 18:05:59 2003-04-07 12:59:11 14 41 3582 89 1297 4207 498 83.30 31 35.45 CHANGED h..hp.tpl...sltsttGLHARPAuh..lVptAspF..suclplt.ps.s....ppssAKSlhulhsLustpGsplplpscGp.........DtppAlpsltpllp...st .......................ptphhlt.sppG.LH..ARPAsh..lVpts....ppF......su-lplp...pp..s.......cpssu....K...S....lhulhs..Lu..ltp...G..splpl.p..A.c.Gs.........................D..t.ppAlpslsphhpt.t....................... 0 417 759 1033 +4049 PF01885 PTS_2-RNA DUF60; RNA 2'-phosphotransferase, Tpt1 / KptA family Enright A, Ouzounis C, Bateman A, Kerrison ND anon Enright A Family Tpt1 catalyses the last step of tRNA splicing in yeast. It transfers the splice junction 2'-phosphate from ligated tRNA to NAD, to produce ADP-ribose 1"-2"-cyclic phosphate. This is presumed to be followed by a transesterification step to release the RNA.\ \ \ The first step of this reaction is similar to that catalysed by some bacterial toxins.\ E. coli KptA and mouse Tpt1 are likely to use the same reaction mechanism [1]. 20.90 20.90 21.20 22.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.26 0.71 -4.99 88 1070 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 913 1 404 881 94 168.10 38 78.73 CHANGED sp...ppt.sclSKhLShlLRHt.spchGLplDppGal..slspLlpthpt......p..sht.h.....shpclpplVpss-KpRFplp..................................................................................................................................sstpIRAsQ...GHSlp.Vc......Lph...sspt........................PthLaHGTspcthssIhppG..Lp.MsRpaVHLoss.............................hpsuhhsG...tR+.sssVllhlDspphh.psG............hpFahSsNsVhLoc......tl ....................................p..pp.sphSKhLSalLRH.........t..Ppt........hG.........l..s..lD.pcG..as..slccLl..tthp.t..........t......shp...l..........oht.lcplVts....s-....Kp......RFshp.......................................................................................................................................................................t.s.s....tpI.R..AsQGHSh.p...Vs........h.....sptt..................................sPthLaHGTspchh.spIhp.pG..Lhth.....p.Rp.....aVHLS..ss............................................ttsAhhs.G...tR+..sssVllhlcspphh..pc..G............l.Fa.upNGVhLos.......sls................................................... 0 144 252 336 +4050 PF00358 PTS_EIIA_1 phosphoenolpyruvate-dependent sugar phosphotransferase system, EIIA 1 Finn RD anon Prosite Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.62 0.71 -4.60 14 7674 2012-10-02 20:27:15 2003-04-07 12:59:11 15 18 2590 18 868 4397 51 132.00 41 27.04 CHANGED ssstslsoPlsG-llsLspVsDpVFuuchhGcGhAIhPosGpVhAPlcGsltplFsT+HAlGlpS-sGsEILIHlGIDTVcLcGcGFpuaVppGscVctG-hLlpFDlstlcpsuhshhoPlllTNoscassl ...........................s...plhuPl.sGcll.sLs.cVsDsVF.u.pchhG-GlAI....c...P........o.....s.....G......p.........l..h...APs.s.Gplsp..l.F.s.T.p.HAlGlp...o.....-..s....G......l.......E.......l.LlHlG.l.DTVp..L..c.....G...c........G......Fp.sh.V..p.pGpcVp.sG.shLlphDlshIcp.su.h.ssh.oPVl.lT.Ntspht..h................................................... 0 230 456 653 +4051 PF00359 PTS_EIIA_2 Phosphoenolpyruvate-dependent sugar phosphotransferase system, EIIA 2 Finn RD, Griffiths-Jones SR anon Prosite Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.97 0.71 -4.47 153 18510 2012-10-02 23:31:29 2003-04-07 12:59:11 17 113 3409 21 2010 9683 678 141.30 21 36.45 CHANGED p.hhspphlhh.s..php...scpcslphh...sp..hLhcp..s.........hlpst....hhpslhpREc...hhsTsl....s..su.........lAlPHsc........t.t...lp.......cs..sl.slsphpp.sl.pa....s............tt.Vcllhhlssscsst............alp..hLsplsph.l...tspph.....hppLh...p.sps.pclhp.llppt .........................................................................................t..hptp.lhh.sh..psp.....sppcslchh...sp..hL...h..pp...s........................hlpss.........ahpu.ll....p...R..Ep....t..hs.Thl....s........su........................lAl.P.Huc....spt.....lp.......cs...sl.sls..p....h.p..p..sl..pa...s.............t.-..spsl.p..l..lhhlusssspp............Hlp...hLspLs.ph.l....tc..c..ph.....hpp...Lh...p..sp..s....p.clhp.llt..t........................................ 1 621 1143 1577 +4052 PF00367 PTS_EIIB phosphotransferase system, EIIB Finn RD anon Prosite Domain \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.50 0.72 -4.71 138 13320 2009-01-15 18:05:59 2003-04-07 12:59:11 15 22 2535 8 1413 7412 51 34.90 43 6.63 CHANGED AppllpslGGpcNIpslspChT.RLRlsl.....p....D..pstls .........AtpllpulGG....p-NIs..slsp.C.h.T.RLRlsl.....p.....D..pspls.............. 0 324 668 1027 +4053 PF02378 PTS_EIIC Phosphotransferase system, EIIC Mian N, Bateman A anon Pfam-B_639 (release 5.2) Family The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The sugar-specific permease of the PTS consists of three domains (IIA, IIB and IIC). The IIC domain catalyses the transfer of a phosphoryl group from IIB to the sugar substrate. 26.80 26.80 26.90 26.80 26.60 26.70 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.05 0.70 -5.61 55 25880 2012-10-01 19:13:17 2003-04-07 12:59:11 13 36 2821 4 2482 14149 266 300.10 17 57.49 CHANGED tcuhhhslPlllsuulllslus.................................thhs.h..hhptsusssFshLsllhuhulAhsl..t.......sssssss.uuhluhlsh.h...hshhhh..t......................................hsshhshhhsspGhhuullsullsshlaphhhp...h.h+lPcslshhs.....uptFsslIsshhshllhuhlhthlhshhpsslhs.h....h.......hhtssshhushlhuhlhphlhhhGlHtshhsshhhtsh...........hhh....s..hh................................hh.tt.h............hhhGssGssLAhhhshhhtp+spptptlspuuls.......................shlhGIsEPl.Fuhshlhshl.hhlshllsu ............................................................................................................................puhh.hlshlls.u..G...l....llulst......................................h........................................t.h.h.t.s...h...h.....t.h....h...t....h.h..u..s...s......s......F..s....h...ls........l...l....h..uhsh..u....h....sh.........................s.ss..sh...h...s.....u..s.h.h...u.h.ls..h...h......................................................................................................................................h....h..h......h....s....s...s....Gh....huu.l.l.su.hl.uu..h....l.h...p.h.hpc..............h...cl.....P....p...s...l...s..hhh........s.h.h...ss....l....lss...h....h...s....h.h...l...h....u....h..l....h...t....h.l....h.....s..h...l..s..s.s.lt.s.............h...............ltsts....hh..u...s.h.l.h.u.hl.hth.hh.h.h...G.l.Ht.s.h.s....s.s.hh.h.hhh........................................................hh.........................................................................................................................h.h.............hh..hu....s..s..u...s....s.....lu.h...h.h...s....h....h....h....t....p..+.......p..p.....t....t....p..t..h...hs.u....uls.....................................s.h.l.h.G.lsEPh.....auhsh.shs....h..hlsshlss............................................................................................ 0 606 1215 1835 +4054 PF02255 PTS_IIA PTS system, Lactose/Cellobiose specific IIA subunit Bateman A, Mian N anon Pfam-B_3710 (release 5.2) Domain The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The lactose/cellobiose-specific family are one of four structurally and functionally distinct group IIA PTS system enzymes. This family of proteins normally function as a homotrimer, stabilised by a centrally located metal ion [1]. Separation into subunits is thought to occur after phosphorylation. 20.80 20.80 20.80 21.30 20.70 20.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -9.98 0.72 -4.12 110 4200 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 1842 38 343 1558 6 94.80 39 88.68 CHANGED EphshplIhpuGsARStshcAlptA+pG-accA-phlppApctltcAHphQTpLIppEAsG..s+hphollhlHAQDHLMouhsh+-LspEhI....cLa+c ............lshplIhtuGsARSphhEAlpt.A.+p..G..-.F..p.pAcphlppApp.s.lhcAHcsQT.p.LlppEA..sG...s.c.h..p..lollhlHAQDHLMTohhhp-LhcElI-La+c................. 0 79 164 248 +4055 PF03714 PUD Bacterial pullanase-associated domain Yeats C anon Yeats C Domain Domain is found in pullanase - carbohydrate de-branching - proteins. It is found both to the N or the C terminii of of the alpha-amylase active site region. This domain contains several conserved aromatic residues that are suggestive of a carbohydrate binding function. 25.00 25.00 26.00 25.40 24.80 24.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.50 0.72 -3.88 39 1526 2009-01-15 18:05:59 2003-04-07 12:59:11 9 80 727 16 186 1138 23 104.40 26 14.47 CHANGED psplplHYpR....sc..usY-sWsLWlW.s-ssssss............hs.......pshshstp-c.YGsahslpLspsspp....lGFllpps...sspD.........usD+hlsh....ptsptlWlhpGspplahsps .................s.shhRlHYp+..........ss..usY..........-shuLWhW....s-spsssp..................tWs........tshph.stpDc.YGtYhcl..pl...s..s..s..t..sp..........lu...Fllpss..........ptps......hstD.......hpl.....c...hh.....t........shs..p...l...Wlh-sDtplYhp........................ 0 48 96 146 +4057 PF03829 PTSIIA_gutA PTS system glucitol/sorbitol-specific IIA component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 26.60 25.80 24.20 22.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.51 0.71 -4.42 35 1216 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 908 2 104 427 58 117.30 39 95.66 CHANGED Msh.....lYpoplspIGs.Apphlp-.pMl..IlFs-sA.Ps-Lt-aChlHphs..p...hpsslpsGshlplsspsY.ITAVGslAppNLcpLGHlTlpFDG.spps-hPGolalps...ps..sslphGs..pIp .......Mph.lYposIscIGspAp-hLs-...pMl..IhFsE.s.A.Ps-Lc-aCaIHspu.....p....hpss..lpsGsphslup.ppYslTAVGslAppNLc-LGHlTLpFDG...hs..-..s..chPGsl+..Vss.....ss....s..cIssGshl.h................. 0 31 55 83 +4058 PF03830 PTSIIB_sorb PTS system sorbose subfamily IIB component TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.70 21.70 23.30 22.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.84 0.71 -4.35 122 5348 2009-09-11 23:04:30 2003-04-07 12:59:11 10 6 1539 14 422 2069 35 149.20 34 73.55 CHANGED pIshsRlDsRLlHGQVustWspphssspIlVlsDplA...pDclpcshlchAs.PsGlphplhslpcslcshpssp.hssp+lhllhcsPpDslcllcuGl....slcp..lNlGshphpp....G+cpls..p.slslsppDlpsh+cLppp.Glc.lplptlPs-spt...s ............IsLsRIDsRLIHGQVustWspph.ssspIlVss.DpVA.............pDslp+sll.chu.s..PsGlphplhslp..Kslc.shps.sp..h..sp.p..+lhllhcsPpDshcl.l.-.u.Gl....s.l....c.p.....l...NVG..sM.......uh..p.p......G.+p..p..ls......p.slslsccDlpshccLppp.Glc.lplptVPsDst........................... 0 108 241 337 +4059 PF03209 PUCC PUCC protein Mifsud W anon Pfam-B_2839 (release 6.5) Family This protein is required for high-level transcription of the PUC operon. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.31 0.70 -5.75 11 317 2012-10-03 03:33:39 2003-04-07 12:59:11 10 5 173 0 127 1156 603 383.60 31 84.17 CHANGED TLNRVMIVELAVPAslVulMlALPhLhAPFRsLhGa+SDo+tSALGa+RsPYIWhGolh.hGGhAlMPFAlllLos..ps...tsPtWhGhhsuulAFLhlGsGlHhsQTsGLALAsDluscEsRP+VVGLhYVMLLlGhlloAlhhGhLLss...................as.u+LIpVlpGsulsshVLNllAhW+.Estp.tps.....t.cscpcPsht-AWtphs.upspAhphLhslhLGThuFsMpDVlLEPYGGpVlsLsVupTTpLTAhhuhGsLlGFhhuuhsLupGhcshphAshGshlulsuFhhllhuuhhs..s..hslFhsGshhlGhGuGlFutuTLTusMpLAstspuG...lALGAWGAsQAoAAGluhhlGGslRDllpths.........t.suhuYshVaulEhhlLllolhhhssllcsptt .............................................................................................................................................................................................TLNRVMIl.E..LulP.Ahlsu.hh.lul...hh.h.uP..h..R.shhGatSDspt.hhGh..+R..sPa.Ihh..Gshh...h..s.G..hh....l.h....s.....h.....u..l...h...h...lu..s...................sh.h.....h...Ghh.h.usluF...l...hh...G..hGl.....p..ss.pT.shLA.....LhsDhs....s.t...c....p.R.......s.p.ll.ullas.M.......h....l..h.Gh.h..l..o......Ah.lhG...t..l..Ls.s.........................................................a.o......t..+.L....lp...V...l..p.s....su.......l....l....s....h..s....L....s.....h....l.A..lW..t.Et...+..tstts.................ttsp..p....s....Ft...p...s...h.t..phh......spsps.....tph..hhh.l...hl.us.hu.a.......h..Q.D.l...l......L......E...PY.G.Gp.l...huh..o...l...u.p.TT.p.....Ls.....A.h.h.u...s...G....s.Ll..Ghh...h....s....u....h...h....l..s.....t...t.h.s........h...p...h....s...t....h...G..s..h.h..u....hhu...h...s...h...l..l...h....u..u.hs.........s......sh.hhtsu...s..hll..Ghu.sGlaususl..os.h.M.sl..s...s...t.....s......p.....sG.......lhlGsWG....A.s....Q..AhAtGluh.h.l.GGs.l.tD.......ls..p...thss................s..sshuY.u...hVat.l..Ehhlhhs.u.lhhlh.lsht...t......................................................................................................... 1 27 64 92 +4060 PF00806 PUF Pumilio-family RNA binding repeat Eddy SR anon [1] Repeat Puf repeats (aka PUM-HD, Pumilio homology domain) are necessary and sufficient for sequence specific RNA binding in fly Pumilio and worm FBF-1 and FBF-2. Both proteins function as translational repressors in early embryonic development by binding sequences in the 3' UTR of target mRNAs (e.g. the nanos response element (NRE) in fly Hunchback mRNA, or the point mutation element (PME) in worm fem-3 mRNA). Other proteins that contain Puf domains are also plausible RNA binding proteins. Swiss:P47135, for instance, appears to also contain a single RRM domain by HMM analysis. Puf domains usually occur as a tandem repeat of 8 domains. The Pfam model does not necessarily recognise all 8 repeats in all sequences; some sequences appear to have 5 or 6 repeats on initial analysis, but further analysis suggests the presence of additional divergent repeats. Structures of PUF repeat proteins show they consist of a two helix structure [3,4]. 21.20 18.60 21.20 18.60 21.10 18.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.47 0.73 -7.78 0.73 -4.18 50 6866 2012-10-11 20:00:59 2003-04-07 12:59:11 14 49 324 360 4462 7032 23 33.70 25 20.65 CHANGED phpsplhpLsp.cpaGshllQ+hl-h.tstpptphlh ............splhpLsp.DpaGshVlQ.....+hl-t..ss.ppt.................... 0 1426 2555 3719 +4061 PF02245 Pur_DNA_glyco Methylpurine-DNA glycosylase (MPG) Bateman A, Mian N anon Pfam-B_3352 (release 5.2) Domain Methylpurine-DNA glycosylase is a base excision-repair protein. It is responsible for the hydrolysis of the deoxyribose N-glycosidic bond, excising 3-methyladenine and 3-methylguanine from damaged DNA. 21.00 21.00 26.40 21.00 19.50 18.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.20 0.71 -5.14 149 1752 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1672 8 481 1305 356 183.20 36 88.25 CHANGED spp...Fa.spsshtlAccLLGphLlpp..t....st.lsG........pIVETEAYhG......sD.AuHuapG.pTs.........RspsMFGssGphYVYhhYGhHaChNlVstscGh.usAVLIRAlEPlp.Ghph....hp.....................tpR.h................t..................................ppLssGPG+LspALuIsts.tsGtsLss.......ss........................lhl...........sss........hss..........tp..l.ssusRIGIs....cus-hPWR.Faltussh .....................tFh.spsshpl.A+pLLGt..hLhtp.....s...t........tt.hsG........hIVEsEAYhG......s.DtAuHuatG...pTs.........RspsMas...sGplYlYhhaGhHhhlNlVsp.s.c.G.h.spuVLIRAlEPhp.Ghph.......ht..................................................................tpRtt.........tt..........................................thpLssGPGKLspALu.Ishp.hsGtsLts...........ss..........................................lhl.............ppt....................................t.s..tp..l.hsusRIGIs.....csschPhRahlpGss................................ 0 157 292 393 +4062 PF04845 PurA PurA ssDNA and RNA-binding protein Mifsud W anon Pfam-B_4535 (release 7.6) Family This family represents most of the length of the protein. 24.60 24.60 24.70 24.70 23.80 24.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.46 0.70 -5.23 6 296 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 121 4 173 276 3 177.70 43 73.55 CHANGED pQELASKplcIQsKRFYLDVKQNsRGRFlKIAEVGsGG......pKSRlhLShsVAsEh+DpLucF.-aYApLu.....ppttp..pp.......................pptLKSEhllRDsRKYYlDLKENpRGRFLRIpQTshRG.........s.spcQpIALPAQGhIEFRDALscLI--YGss........p-..uELPEusslpVDNKpFaFDVGSN+aGVFhRlSEVK..ssYRNSITVPhKsWs+FscpFscYsEcM .......................................................ppLAo+.lplQpKRFYlDVKps.+GRFlKlAElh.st..........................p+u.plhLuhssA.th+p.Lsph.chhspls..........t.t....tt.................................hLKo-hl.p-pR+YYhDLKENtRGRFLRlp....Qshs.s.....................tttppIslPAQGhlEFRDALspLl-caG....................tt....-....LP.....Eupsl....p..V..D.sK.pFaFDlGsNch.GlFlRlSEV+....ssa.RsoITlP.....h.....csWscFtphhscas-c....................................... 0 40 74 120 +4063 PF02700 PurS UPF0062; PurC; Phosphoribosylformylglycinamidine (FGAM) synthase Mian N, Bateman A anon COG1828 Family This family forms a component of the de novo purine biosynthesis pathway. 21.30 21.30 21.30 21.30 20.70 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.58 0.72 -4.03 102 1833 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 1819 26 504 1114 1244 77.80 38 68.30 CHANGED ac.scVhVoL..KsuVLDPpGpAlcpALppLGaps.VpsVRlGKhlElplpss.spppAcpplcchC-+L.LANPVIEsYch.-lpc ...........lpVpVpLKsuVLDPQGpAlpp.A.L.p.p.L....G..ast.Vpc.VRhGKhh..-lpl-s.s...st.p..tscsplcphs-cL.LANsVIEcYphcl.......... 0 167 348 442 +4065 PF00855 PWWP PWWP domain Bateman A anon Bateman A Domain The PWWP domain is named after a conserved Pro-Trp-Trp-Pro motif [1]. The domain binds to Histone-4 methylated at lysine-20, H4K20me, suggesting that it is methyl-lysine recognition motif. Removal of two conserved aromatic residues in a hydrophobic cavity created by this domain within the full-length protein, Pdp1, abolishes the interaction o f the protein with H4K20me3. In fission yeast, Set9 is the sole enzyme that catalyses all three states of H4K20me, and Set9-mediated H4K20me is required for efficient recruitment of checkpoint protein Crb2 to sites of DNA damage. The methylation of H4K20 is involved in a diverse array of cellular processes, such as organising higher-order chromatin, maintaining genome stability, and regulating cell-cycle progression [2]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.28 0.72 -3.78 273 2569 2012-10-02 16:56:36 2003-04-07 12:59:11 12 117 297 46 1417 2476 10 92.50 23 11.99 CHANGED hpsG.-lVWu....K.h...cGa...P..hWPuhlhs.t......................................pt......t......t...p...........tt.........tsph.............................h....VhFFu...sp.p......a..u.....al..p..t.pp.lhsap......pt..p...............pp............h...pp.................p............p...cp..................psh...ppAlppAt...pt .....................................................u-lVWu....K.h......cGa.........P...aWPAhl...hs.........................................................................t...t..............p.........tt............tsph.............................h..VhFFG.......sp..p..............h..u.....al..s..........p.p.lh.sap.....ppp..................p.p............h.tt..................t............p..pp....................ttappAltch...t............................................................................................................................. 0 323 550 943 +4066 PF02436 PYC_OADA Conserved carboxylase domain Mian N, Bateman A anon Pfam-B_628 (release 5.2) Family This domain represents a conserved region in pyruvate carboxylase (PYC), oxaloacetate decarboxylase alpha chain (OADA), and transcarboxylase 5s subunit. The domain is found adjacent to the HMGL-like domain (Pfam:PF00682) and often close to the biotin_lipoyl domain (Pfam:PF00364) of biotin requiring enzymes. 20.60 20.60 21.20 21.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.28 0.71 -4.88 139 3341 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 2687 38 837 2634 637 192.00 34 23.79 CHANGED VhhaplPGG.hoNLtsQhcp.GhtD+hc-VhcphscVpc.lG.lshVTPoSplVGs.Alh.Vhs.......................sc.+hhshsppVhsahcG.hGpPPushscclpp+lL.ps..ccs..osRPu-hLpP.-h-plcpElpp.h............h.............sc-DlLoYsLaPpVstcFhct+pphssht.hss....................hhhshphspphp...lcl-..Gcshhlcl ........................................lhhtplPGG.....hoNLppQh+p....Ghs-+a--Vh.c.thscVpp.hG.lshVTPoSplVGs.Alh.Vhs.......................................sc..chhshscplhshhpGch.Gp.s.sus..hs.pc.Lp.p...plL..cu.........p..c..sl....Ts..R....P..............u..-....h..L..pP....-..hc..p..l...c..p-ltchh....t..t.....h......................sc-DlloaAlaPplh.....hcahct......+p......p..h....s..s.h..p....l.sp..t.............................hh....s....t....s..p.php...lplp.Gpthhlp.................................................................................................... 0 304 553 729 +4067 PF03013 Pyr_excise Pyrimidine dimer DNA glycosylase Griffiths-Jones SR anon Pfam-B_1388 (release 6.4) Domain Pyrimidine dimer DNA glycosylases excise pyrimidine dimers by hydrolysis of the glycosylic bond of the 5' pyrimidine, followed by the intra-pyrimidine phosphodiester bond. Pyrimidine dimers are the major UV-lesions of DNA. 22.10 22.10 22.50 22.30 22.00 21.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.85 0.71 -4.38 26 421 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 392 7 56 317 25 114.10 37 90.02 CHANGED MphhppsLlscLscppLLupaREhstl..............+ssuhtpcHhTl............shhFhpp.hhLhphathlhcEMpcRG...YpsstpWhs............tpapGchsPs....hhshpclttphPhhsc+.p.Yhpps .................Mphhpp.hhlscLscppLLupHREhsul..............+usuhs.p+hTl............shhFhtpshhLhphHthlhpEMppRG...Yps.s.p..Wh..-..................paR.uchpPs....h.shtcl.....t.p....hshhscast.Yhtt................................................ 0 21 32 43 +4068 PF01948 PyrI Aspartate carbamoyltransferase regulatory chain, allosteric domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain The regulatory chain is involved in allosteric regulation of aspartate carbamoyltransferase. The N-terminal domain has ferredoxin-like fold, and provides the regulatory chain dimerisation interface. 25.00 25.00 46.80 45.80 18.50 17.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.25 0.72 -3.96 60 1268 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 1241 116 234 645 96 95.50 52 59.80 CHANGED cpcLpVpsI+sGTVIDHIsAGcu........LpVLclLs.lp..ssstploluhNlsSp+hG+.KDIlKIEsph.Lscc-lsplALlAPpATlNII+-YcVVcKhplpl ..c.pcLpVEAIKpGTVIDHIPAphG........hKlLpLF+Ls...cocpRlTIGhNLPSsch..G+..KDlIKIENsF.......Lo--plspLALaAPpATVNhI-NY-VVpKp+....... 0 65 137 188 +4069 PF02748 PyrI_C Aspartate carbamoyltransferase regulatory chain, metal binding domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain The regulatory chain is involved in allosteric regulation of aspartate carbamoyltransferase. The C-terminal metal binding domain has a rubredoxin-like fold and provides the interface with the catalytic chain. 21.80 21.80 21.80 23.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.08 0.72 -4.53 70 1294 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 1268 116 241 677 90 51.10 47 31.64 CHANGED cplpGl..l+CsNsNCITs..s.EPVpopFtV..hp.pshpL+CcYCE+hhscpclhp .........-cIssV..ltCPNsNCIo+...s.EPV.s.S..s.FtV...........pcsscltLKC+YCEKcas+p.V..t..... 0 70 143 195 +4070 PF01243 Pyridox_oxidase Pyridoxamine 5'-phosphate oxidase Finn RD anon Prosite Domain \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.76 0.72 -3.97 141 8674 2012-10-02 11:35:36 2003-04-07 12:59:11 15 54 3619 83 2886 8805 4127 89.20 19 47.13 CHANGED lsp..php....phlp..p................phssLAT....ss..t-..GtPpsphhh..hhhstsp...........hhhhhsstpu......p+spsltpsPc.lulhhh.tp.........h..t.ptlp...l..pGpuchlsctp ............................................thtthlt..ps...............ps..hh.lA.T................ss......tc..........G.....p......P....p.....s.....phhh......ht..h.hstp...............................hhh..h..h...o...s..t..t.u.........................p+....s....p.p.l..........p....p......s......P....p....lu..l.......h..hh..p.................................ht.ptlp.....l.....pGp.uphlpt..................................................... 0 870 1911 2492 +4071 PF00282 Pyridoxal_deC pyridoxal_deC; Pyridoxal-dependent decarboxylase conserved domain Finn RD anon Prosite Family \N 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.97 0.70 -6.02 11 7254 2012-10-02 18:26:03 2003-04-07 12:59:11 14 35 3273 90 2360 6947 942 298.70 24 67.83 CHANGED PGYL+t...hlPtsAP.csEshppIhpDhcchlhPGlopWpsPpFauaaPsssSh.uhlu-hLssuINssGFoWtsuPAsTELEhlshcWluchltLPttFhtp.t...GGGVlQsouSEusLlullAARp+hhpch+tps........huKLVsYsSDQsHsSl..cKAutlusVc...h+hl.scp..phthpspsLccAIEcDhppGhlPhaVsATlGTTsssuFDslpcluslspc......aslWlHVDAAYAGoAhICPEaRp...hhpGlEpADShshNPHKahlshhDCoslWV+-cstL.pshphss.YLppsp...shssDhtcapIshuR..+hculKlWhllRuaGlcsLpspIRcphphAphhpshlppDsR.FElss...chthuLVCFRlK .........................................................................................................................................................................................................................t......h........................................................sh.s.....h..h.p..h.......P.t...h.s.....lE.......hhlshl.s.p.h.h.....t..h...s............................................us.Gs....hp..s.u.u.o...pus.hhu.....h....h.........s.h+......t......p.......h......h.....t......p......h...t.t...t..t..............................................htc.....s....hh.s..u.pp..s.H...h...s.h....cK...s...u....t...h....h....s...lt..........................lp....l....s....st.........pt...t.h.p...............h..t..s...l.c...p.t.....l...p....................t...........s........p......p..............h................................h.......l................l.....u...o..h..G.......oT......sG.....s.h.....D.....s.....l....p....t....l....s..s....l...t..p.c...............................h..s...l...a...lH.V......DA..A.h.u...G.....................h..........l......s......s........hp...........................h.ht.l.....p...........t.......s.c..S......lshss..HK.......ah.h.ss.....h.ssu...hh...h..h.+.p....p....h...h......p..........t..h........h...p...s........Y..L..t...................................................................................................................................................p.......h..h.......p...h.......tl...h.u.R................t...h...h...p.ha...hhc....h..G..h...p...s...htthhpp.........t...........ht.......hAthht..p.....l....t....t..................h..lh............h.s.....l..h....................................................................................... 0 695 1263 1924 +4072 PF00719 Pyrophosphatase Inorganic pyrophosphatase Bateman A anon Pfam-B_613 (release 2.1) Domain \N 21.90 21.90 21.90 22.00 21.70 21.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.63 0.71 -4.78 157 4053 2009-01-15 18:05:59 2003-04-07 12:59:11 14 17 3177 158 1242 2843 708 156.90 40 75.74 CHANGED ssl...lEIPpsop.sKYElDKcsshhhlDR.............hhass.....htYPtNYGalPpTls......................sDGDPlDlLVlss.tshhsGsllcsRslGlLpMhD-.....uttDtKlluVsss...Dsta.p...slpclp-ls.thlppIpcFFcpYKsLc......psKhl..plpsatstctAtchIpcuhppa.pp ..........................................slIEIPtsup...hKYElDK-o.G....t...lhlDR.............hhhou......hhYP.sN.YGalPp..TLu............................tDGDPlDV.LV.ls.s...hP.l.h.P.G.s.V.l.c.s.Rsl.GlLpMtDE........ustDtKllAVPts....c.pa..s.......clcD..l...s...Dl...s....t.....hh...hppIp..HFFcpYKsL-.................tsKaV..c.l.psa.tstctApt.Ihpuhcphp.t....................................................... 0 391 752 1029 +4073 PF02547 Queuosine_synth Queuosine biosynthesis protein Mian N, Bateman A, Eberhardt R anon COGs Family Queuosine (Q) biosynthesis protein, or S-adenosylmethionine:tRNA -ribosyltransferase-isomerase, is required for the synthesis of the queuosine precursor (oQ). It catalyses the transfer and isomerisation of the ribose moiety from AdoMet to the 7-aminomethyl group of 7-deazaguanine (preQ1-tRNA) to form epoxyqueuosine (oQ-tRNA). Q is a hypermodified nucleoside usually found at the first position of the anticodon of asparagine, aspartate, histidine, and tyrosine tRNAs [1,2]. In Streptococcus gordonii , QueA has been shown to play a role in the regulation of arginine deiminase genes [3]. 25.00 25.00 25.30 25.20 21.70 24.10 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.28 0.70 -5.72 110 4206 2009-09-10 15:09:34 2003-04-07 12:59:11 10 5 3721 5 867 3069 1946 327.50 45 97.57 CHANGED pls-FDacLPpELIAQpPsp.....RcsSRLLllcpp..........ss..t....lpcpp.Fp-lschLpsGDLLVhN-T+VIPARLaGpK......t.....oG..................G+lElL......lhch......h...t...........ttt........a.s..hl+su+ch+sGsplhh..........st.......sh.....pupVltp...tpt..sth.llphphpt..........shhphLcchGc.....lPLP.PYIcR...................ts........stt...Dp.cRYQTVa..A....c..csGuVAAPTAGLHFoppLLppLppcGlphuhlTLHVGhGTFpPV+...s-clpcHpMHuEahplspcssptIpps+.tpGtRllAVGTTslRsLE.o.......A......upp.........Gt...........lpshpGcT-lFIhPG..YpF+lVDsLlTNFHLP+STLLMLVSAFuGp-plhpAYpcAlccc.......YRFaSYGDAMLIh ...................................p.lsDFcFcLP-c.L.IAphPht..................pRssuRLLl...l...ctp...................su.....p...l.p...c.p...p.FpDll-..hLp.......sGD...hLVhNsT+VlPARLaGpK..............t.......oG.............................................u+l...ElLl.+............................h...s...scp..........h.s.........hl+.......s.........u........K.............+........h.........K........sG...s...pl.hh...............s-...................................pl.pA.p.hh.tchpp........ttt....lhch.p..ass.....................h.h-lLpplGc.....hP.L.P.P.Y.I..cc...............................................t......-..t........Dp-cYQTVY......A....c...c.....G.ul..AAPTAGLHFocpLLpclcsK.GVchsalTLHVGhGTF...pP...Vc...V-slp-Hc.MHoEahplsp-ssctl..............pt...s...........K.......t.................pG.s............RllAVGT..TSl..RoLE..o....s......upp.........pst...............................................l..p..s..h.p.G..TsIFIhP..G..Ypa+l.VDuLlTNFHLPcSTLlMLVSAF..........u......G.h-.phhsAYcpAlpc.c.......YRFFSaGDAMhI................................................. 0 311 582 740 +4074 PF00788 RA Ras association (RalGDS/AF-6) domain SMART anon Alignment kindly provided by SMART Domain RasGTP effectors (in cases of AF6, canoe and RalGDS); putative RasGTP effectors in other cases. Recent evidence (not yet in MEDLINE) shows that some RA domains do NOT bind RasGTP. Predicted structure [3] similar to that determined [1], and that of the RasGTP-binding domain of Raf kinase. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.91 0.72 -3.50 54 3464 2012-10-03 10:59:06 2003-04-07 12:59:11 18 281 276 21 1924 3208 3 89.70 17 10.59 CHANGED spthl+Vahssts.............hpoltlstssospcVlpthlcKhtl.......sssppatL............hhtptstcc..........hLtss-pPlhhhhph..t.........psphhlppppp ............................................t...hl+lah..ts...s.........................hpslt..ls.pp.sTsp-..V.lptl...lp...+h...........tl..............ss......spp.asLh................................hhtt....ts...tc+.........................................hLt..s.p-psl.hhhhph................thphhlcpt..h........................................... 0 459 669 1233 +4075 PF03528 Rabaptin Rabaptin Griffiths-Jones SR anon PRINTS Family \N 27.20 27.20 27.40 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.52 0.72 -4.13 5 248 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 45 2 100 219 0 108.70 41 30.00 CHANGED KAlAsVSEoTKQEAlscVpRQpQEEVASLQAIlK-Tl.......SuY....EoQasL.LcQE....................RtQhtQspEucERE.lu+L+phLucAp..DsLE+pMKKs+ELs-pL...Kpssp-hEpcI .....................ph.uth..spcQEth.pshp.pQEEsASL.us...lp.-ul..............suY...........EsQaph.LppE...........................................ptQhtQ.pEutpR.......E....ls.cLpp.hLpcup.....-..sLEcpMc....ctQE.t-hL...+p.s.s.pppI................................................ 0 9 16 42 +4076 PF02144 Rad1 Repair protein Rad1/Rec1/Rad17 Mian N, Bateman A anon IPR003021 Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.72 0.70 -5.63 26 354 2012-10-02 11:47:48 2003-04-07 12:59:11 11 7 284 3 227 571 15 238.90 27 75.42 CHANGED lFoAsossscpLhslLpslshhs..c..........AhlploscGl+hssE.cs+slQupsalspslFssYpa....................................................................tt.t.ptshssFplsLsslL-sLsIFGssssss..............................................................sCphsYpGpGsPLslhLE-...sslpTpCclsT...............Y-s-........-sh-lshs.csplhhclIh+uchLpsAlpELctstspplplhsos..p.............pP.......................aFtLsop.GphGp......Spl-asscps.....................................lhEpFplp..........pcshps.Ycauhlc+st+AhtlAsKVslRhDppGlLSlQhhh ....................................................................................................................................................h.u.hsssctl.phLpslt.atp...p............uhh.lopp.Glph.s.....s.E.pu..+sl.Q....upsalp.p.p.lFppaph............................................................................................ppttspFtlsLshLl-sLslFusss.st.........................................................................................................................................shphp.Y.t.u...G.tPLhlhlc-........ssls..s..p.s...plpT....................hts-......................tsh-.hs..hp..ps....s.l.h...chlhp..o.phL+cAh..p..............-Lc.s......s.p....lplhhss...p.........................tP........................hh.plps..Gt..hup.......splca...s...psps..........................................h...h....-...tFpsp................ph..s.Y+hshlc..sh.c.....Aht....hu....sK....l.tlRhstpGhLolQhh....................................................................................................... 0 86 129 188 +4077 PF03215 Rad17 Rad17 cell cycle checkpoint protein Mifsud W anon Pfam-B_2764 (release 6.5) Family \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.80 0.70 -5.82 3 406 2012-10-05 12:31:08 2003-04-07 12:59:11 10 19 278 0 287 3132 1602 329.80 21 56.41 CHANGED lp-DcsE.WaEKYKPpstc-LAVHK+KIc-VcpWLcApsLEsp.+p...ILLIoGPSGCGKSTslKlLSKELGhplpEWlNPssh+pPsN..QcoDF+GhspspSpF.....hSQhEoFsEF.L+uo+Y.hlQhhGcshpscKKlILlEDLP..NpFatDo.+sF+pVIRpaLpSucsh..PLIhlITEh..-sLEGDNNQR+.....oFsu.pIMsKEILp-PRlosIpFNPIAPTlMKKaLspIlspEhph.tGKsKsPK+pSllEhICQuopGDIRSAINSLQFSuS.....KGppNlR.hKcG......hSLcustVL.pLSKScREu........ps.hshpspp.puhhtKDVoLuhFHAIGKVlasKRuossEl.........DSERIsu.............pLpp.pR-ph.............LVEsEsslp.StLSG-lF+LGLaENYlDFs...hoIDDAsslsDhLShuDsLSGDasos.YsLRElSToFuspGshtpN+upsYh.pphtspphps.aKsQh.......................Fhh.t.hhcpC.uhKtha..FYLPuL.shpot.h.hhshho..M+spstIshlpcI...........GhhPL................cR+a ..........................................................................tt........W.pp.a.........tPts....t-..l....s....l...p........c....K....l.....t.p.l...p.p.h..l....p................t..................h...........t......t................t..t...............................lLllp..GPsGsGKo.s.s....l.p.h....L......u....p....p......h.....s..h..p....l..........c.......h...............s..s................................................................................................................................................p...t......a...........t....h.........................................t..............................................................................................................p.............................p....l..l..L...l..-..-hP.........s.h...t.....t....t..hpt.l.p.h....h..............t....t.....................Pllhhlo...................t........................................lh....s.....p....l.....t......p.....t..ht.........l...pFNslsss.hhKhLphl...ht......p..............................................h..s..........l.p..l...st..............GDlRsAl..tsLph.s.....................................................................................................................................................................+-.sl.hF+u.lG+llaspp.........................................................................................................................................................................................pt.h..........t.....h..hl.hpN...h...................h.p.....h...h..p....hu.sD..h.................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 96 159 246 +4078 PF04824 Rad21_Rec8 Conserved region of Rad21 / Rec8 like protein Kerrison ND anon Pfam-B_2686 (release 7.6) Family This family represents a conserved region found in eukaryotic cohesins of the Rad21, Rec8 and Scc1 families. Members of this family mediate sister chromatid cohesion during mitosis and meiosis, as part of the cohesin complex [1]. Cohesion is necessary for homologous recombination (including double-strand break repair) and correct chromatid segregation. These proteins may also be involved in chromosome condensation. Dissociation at the metaphase to anaphase transition causes loss of cohesion and chromatid segregation [2]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.56 0.72 -4.78 16 498 2012-10-01 19:44:35 2003-04-07 12:59:11 11 5 266 4 328 500 17 52.40 30 8.40 CHANGED ssspt.phspls.supsR+pAA+hFaphLVLpsp..phIplcQ.pcPYu-IhlpssPsh ...............s.......hptls..psss.R+.pAAphFaphL....VLtsp..pslcl.pQ.....p.....ps.....au.cIhhp.t..................... 0 93 164 254 +4079 PF04825 Rad21_Rec8_N N terminus of Rad21 / Rec8 like protein Kerrison ND anon Pfam-B_2686 (release 7.6) Family This family represents a conserved N-terminal region found in eukaryotic cohesins of the Rad21, Rec8 and Scc1 families. Members of this family mediate sister chromatid cohesion during mitosis and meiosis, as part of the cohesin complex [1]. Cohesion is necessary for homologous recombination (including double-strand break repair) and correct chromatid segregation. These proteins may also be involved in chromosome condensation. Dissociation at the metaphase to anaphase transition causes loss of cohesion and chromatid segregation [2]. 20.80 20.80 20.80 22.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.44 0.72 -4.11 49 651 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 292 0 433 644 1 112.00 34 17.91 CHANGED MFaoptlLs+.cGsLuplW.........LAAphpp+.....Ls+pplhpsslsposc..........................................pIhp.............Ppss............huLRlSGpLLhGVVRlYu+KspYLlsDspcsh.+l+hshp.s.........thsh..............ttpsshsslsls .......................................MFYsp.lLs+..pGPLuplW.........LAAp.hc..+K.......................LsKs....p...lhpssltpos.-...............................................pIlp...........................P.p.s....................hALRh..SGpLLl...........GVVR...........IYs+Ks......cYLLs.Dsscshh..+l+hsa+.s.........s.h-.ls.......t.................................................................... 0 128 232 348 +4080 PF03835 Rad4 Rad4 transglutaminase-like domain Bateman A anon Bateman A Domain \N 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.92 0.71 -4.79 41 550 2012-10-10 12:56:15 2003-04-07 12:59:11 10 22 265 6 403 698 3 136.50 22 18.13 CHANGED pslptsphostcpppsp.................tppphtspch.hssaWsEVa...sp................tpc+WlsVDshst...........hhpph-sh...sppspps.hsYVlAass-sss+DVTpRYst.p.hsups++hRls..................tpWa......cpllp......app.pppc..................................DphE-tphp ........................................................................................sp..............................................hs.hhWsEVa.....sp................tpp+WlsV.Dshph........................................sh.pPh..........tpshp.....p...hsYVlAap.s.c...s........ss+DVTpRYsp..p.h..stsc+tRls...........................tpWh.......pp..s.lp............hpp.thpp..................................-phEp.ph.................................................................. 0 105 204 338 +4081 PF04098 Rad52_Rad22 Rad52/22 family double-strand break repair protein Aravind L anon Aravind L Family The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to Rad52 [1]. These proteins contain two helix-hairpin-helix motifs [1]. 20.30 20.30 20.30 20.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.98 0.71 -4.49 35 602 2009-01-15 18:05:59 2003-04-07 12:59:11 10 8 410 33 261 527 127 137.00 28 43.84 CHANGED chpplQspLc+.lss-alotR.GssG.p+........lsYl-uapllsLANElFGFNGWuoplhslpsc................ah-pptt.t...................+hslshsshVRlTLKDGT....a+EshGhGpspshcsKutAapKAKKEAsTDALKRALhs.FGsslGpslYD+phhtp..hsphpps..shch ...........................................................................tt.Lcp.hssE.lp.Rht...tu.u.p+............lsY....l..suctlhphhN-lFGas..GWpsplp.slss............................................................................hshsshsclo.l+...s.so...........................u...ths.....pph....cut..t.....s..........c.......A.hKtuhosuhKR.Ahhp.aG..lGphlYshp.hht.......................................................................................... 0 80 143 209 +4082 PF04139 Rad9 Rad9 Wood V, Finn RD anon Pfam-B_28077 (release 7.3); Family Rad9 is required for transient cell-cycle arrests and transcriptional induction of DNA repair in response to DNA damage. It contains a Bcl-2 homology domain 3 (BH3) [2]. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.50 0.70 -5.26 16 382 2012-10-02 11:47:48 2003-04-07 12:59:11 8 8 271 3 251 862 61 232.50 24 56.82 CHANGED hu+AlpsLSRlGD-LalEsscctLsL+slNSS+SAaushhFss.FFppY.........st.sssshpC+lthKulLslFR......................uhsthptsVE+Cpltl....pspps+.......................................llhplhCKpGlpKTaplsappspsl.pAlaspspssshlphss+lLs-hlsaFspsh-ElTl...........sssp.t+..Vhl+oasE-shct.........p.cshpTplslcs.-EFcpaplstc.................scITFslKEFR...........................uhLtaAEshsssluhaFspsG+Phhho....hs..c.phlEupFlLATl .........................................tcsl.sLu+h.ucpl.lp..s..............t.....p....t.......LtlpslNso..+SuasphhF.t......FF.ppa.................................tt..ttt...h..p.Cp.l.h..K.u.hh.s..lF+....................................................................................s.t...ppsl-psplpl.................pspps.+..............................................................................l.h.hph.h...s...c......p.G....l...h..K....o.....a.....pl..s.h..............p..................ts......p.....sh..p.................s................hh................s.p...p....t..t...s........s.h.....h.t...hps............+..hLtch.lt..pF..ss.....s...h.....p..-lsl...................................thss..pp.......l.hp....s..at...cp...h...st..............................................p.pshtTphs....l.s....c-....F..p..pa...plttp.............................................hplsF.slK-h+...........................uh...ls...a......u-......s..h..p........h.......s.............ls........hhas.tsGcPhhhs.....hp......p...hpsphllhT..................................................................................................................... 1 84 135 195 +4083 PF04002 RadC RadC; DUF2466; RadC-like JAB domain Kerrison ND, Finn RD, Iyer LM, Zhang D, Aravind L anon COG2003 Family A family of proteins present widely across the bacteria. This family was named initially with reference to the E. coli radC102 mutation which suggested that RadC was involved in repair of DNA lesions [1]. However the relevant mutation has subsequently been shown to be in recG, where radC is in fact an allele of recG [2]. In addition, a personal communication from Claverys, J-P, et al, indicates a total failure of all attempts to characterise a radiation-related function for RadC in Streptococcus pneumoniae, suggesting that it is not involved in repair of DNA lesions, in recombination during transformation, in gene conversion, nor in mismatch repair. Computational analysis, however, provides a possible function. The RadC-like family belong to the JAB superfamily of metalloproteins [3]. The domain shows fusions to an N-terminal Helix-hairpin-Helix (HhH) domain in most instances. Other domain combinations include fusions to the anti-restriction module ArdC, the DinG/RAD3-like superfamily II helicases and the DNAG-like primase. In some bacteria, closely related DinG/Rad3- like superfamily II helicases are fused to a 3'-5' exonuclease in the same position as the RadC-like JAB domain. These conserved domain associations lead to the hypothesis that the RadC-like JAB domains might function as a nuclease [3]. 20.60 20.60 20.60 20.60 20.50 20.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.13 0.71 -4.43 107 5120 2012-10-10 14:49:21 2003-04-07 12:59:11 10 20 3434 8 980 3486 385 118.30 39 57.04 CHANGED tlsssp...plhphlp...plt.....stpp....EphhllhLsspspllthpplsp...GolspshVpPRElh+pA.lptsAsulIlsHNHPSGsspPSppDhplT....c+ltpAspllsIpllDHlIl.u.......pspahS.ht-p.Ghl ................................................................h.hposptstcalt.....phs.....s.h.p.p.........EtFhlLaLsspsplIt.tcp.l....Fp....G.Tls...ps.V...a.PR....E....ll...+pA.lp.t.sAuu...l.ILsHNHPS..........Gs..s......p....P..S..psDhtlT....c+lhcAspllsIclLDHlIl..G...........ps.p.hhS.atEcGh....................... 0 330 632 813 +4084 PF04712 Radial_spoke Radial spokehead-like protein Mifsud W anon Pfam-B_5891 (release 7.5) Family This family includes the radial spoke head proteins RSP4 and RSP6 from Chlamydomonas reinhardtii, and several eukaryotic homologues, including mammalian RSHL1, the protein product of a familial ciliary dyskinesia candidate gene [1]. 22.70 22.70 22.90 22.70 22.10 22.50 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.60 0.70 -5.75 22 369 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 118 0 235 361 8 295.50 22 75.46 CHANGED hptplpps...+AaL..ppsspsshsLY-HLsplLs+llc-cPps.uhDl..hEshStpl+pspa............tppcs.ts..phtsshchspppppLFtpst...............................p.p.............tcthscs.lssl...............hpt.shaappuGlGLuc-Esa+lhhAlKpLsc...pcslpssRFWGKIhGhptsYalAEsp..hccsEcptc.tt.t.t.t..ph..t.ttt.................................................lPhEtsts.....GsN+asYaVsspsu.s.sWs+LPsVTPtQIhsuRpI++hFTGcL-AsVho.aPsFs.............GsEtsYLRAQIARISuuTpluPhGh...Y..............phpE-Et.ttppt.tt.p............ph.csP-acslps............ph...phssWVHhhtaILsQGRssah...........tpptE-c...............-E-c-ccc--.cEs............c.EhGssLLsslscD................................................t.tlsst.......ssWoh+hsu.shhs..................paulsVl+SthWPGAashu..s....u++apslYlGaGhKhssp.sasP.....s.PPPstpEass....lsEhpDPos-EEtthch......sp-psp ................................................................................................................................................h............................................................................................................................................................................................................................hht.sGhsls.pc.h...l.hulh.l.p...p....h..tphhFWG+lhGhp..tsYhlsps........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..c...................................................................................................................................................W................................thhhhps.hW.Ghhhh............tt........h..hYhG.G.p.......................................................................h........................................................ 0 110 129 177 +4085 PF03089 RAG2 Recombination activating protein 2 Griffiths-Jones SR anon Pfam-B_4702 (release 6.5) Family V-D-J recombination is the combinatorial process by which the huge range of immunoglobulin and T cell binding specificity is generated from a limited amount of genetic material. This process is synergistically activated by RAG1 and RAG2 in developing lymphocytes. Defects in RAG2 in humans are a cause of severe combined immunodeficiency B cell negative and Omenn syndrome. 19.40 19.40 19.40 19.60 19.20 18.80 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.13 0.70 -5.72 18 5792 2012-10-05 17:30:42 2003-04-07 12:59:11 9 5 4451 0 28 5451 1 268.30 58 91.61 CHANGED KpsELKLRPloFSNDSCYLPPLRpPAlspl.ssp-u-sppYLIHGG+TPNNELSspLYlhohsS+u.NKKsoLsCpEKELsGDlPcuRYGHTlsVVaSRGKohsVlFGGRSYhPsGQRTTEsWNSVVDC.PpVFLlDLEFGCCTSahLPELpDGhSFHVSlARsDsVYlLGGHoLpossRPPpLa+LKVDL.LGSPsloCTlLsuGlSlSSAIVTQsus..cEFlIlGGYpS-sQKRh.CNolsL-DssIcIppREsP-WTu-IKHSKTWFGusMGpGuVLlGIPu-sKptssDup.aFYhlsFtp-c-t.....stQsCSQEST.pD.EDSsPLEDSEEFYFupE .........................................................ts.....pYlIHGG+TPNNElSsplYlhohss+s..N+KlThpCpEK-LVG-lPtARYGHolsVVaSRGKohsVlFGGRSYhPsupRTTEpWNSVlDC.PpVFLlDhEFGCsouahLPELpDG.SFHlulARpDslYhLGGHSLssssRPPpLaRl+V-L.LGSPsl..oCsl.L.ssGlSlSSAIl.Tps.....u.....s........cEal.IlGGYpSDsQKRM.CshlsL--stIchp.hEsPcW..TsDIpHS+hWFGushGpGshLlulPstsp....s-s..ahY.lpht......t............................................................. 1 2 4 11 +4086 PF04901 RAMP Receptor activity modifying family Finn RD anon Pfam-B_5615 (release 7.6) Family The calcitonin-receptor-like receptor can function as either a calcitonin-gene-related peptide or an adrenomedullin receptor. The receptors function is modified by receptor-activity-modifying protein or RAMP. RAMPs are single-transmembrane-domain proteins [1]. 20.40 20.40 20.70 21.50 19.80 19.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.45 0.71 -4.33 19 191 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 42 22 86 148 0 108.10 39 71.27 CHANGED tthtphChppFpccMcsls.phWCsWsphhp.YppLosCTchhAchlsCaWPNshs-cFhhtlHppaFpNColsththpDPPsslLhshIllPIhlTlhhsuLVVWRSKco-uhs ............................hh...phChptFptpMppls..ptWCsWs.ph....h...t..YppLopCTchhAptlsCaWPN..shs-pFh.htl.HppaFpNCs...l.s.th.thpDPPsslLhshIllPlhlThhh.suLVVW+SKcs-s.h...................... 1 9 17 37 +4087 PF00638 Ran_BP1 RanBP1 domain Bateman A anon Prosite Family \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.41 0.71 -4.07 8 1512 2012-10-04 00:02:25 2003-04-07 12:59:11 13 83 333 19 988 1495 10 115.30 30 23.31 CHANGED EVKoGEEDEEsLFppRAKLaRFDs-s..ppWKERGlG-lKILKpKcs.sKlRlLMRRDplLKlCANHhIossMpL.KPhsGS-RualWts.uDaADu-uKsEpLAlRFKspEsA-pFKppFEEuppt ......................................ltTGE.E..s..E.c.slap.h.+.uK..L.a..ca..............s.....p....ps...............pp..W.....K....ERGhGsl.+..l......L.............c.............p......p.........p......s.....t.........p..........s.........R.llM..R.p-..........p.s.....h.............+...lh..hNphl...hs...s...h.pl....p......t.........ss..p...................c...u.hh.a........ts......hD......h..u...-..t...p....s...c...................s..............pp..h..sl+h....ts....t.-......Apphtphhpch................................................................. 0 331 490 762 +4088 PF03085 RAP-1 Rhoptry-associated protein 1 (RAP-1) Mifsud W anon Pfam-B_1750 (release 6.4) Family Members of this family are found in Babesia species. Though not in this Pfam family, rhoptry-associated proteins are also found in Plasmodium falciparum. Indeed, animal infection with Babesia may produce a pattern similar to human malaria [4]. Rhoptry organelles form part of the apical complex in apicomplexan parasites. Rhoptry-associated proteins are antigenic, and generate partially protective immune responses in infected mammals. Thus RAPs are among the targeted vaccine antigens for babesial (and malarial) parasites. However, RAP-1 proteins are encoded by by a multigene family; thus RAP-1 proteins are polymorphic, with B and T cell epitopes that are conserved among strains, but not across species [1,2,5]. Antibodies to Babesia RAP-1 may also be helpful in the serological detection of Babesia infections [3]. 25.00 25.00 55.30 48.50 22.00 18.70 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.76 0.70 -5.03 12 102 2009-09-11 11:32:32 2003-04-07 12:59:11 10 3 9 0 17 101 0 195.70 39 57.46 CHANGED scoLtssspshpsttpppchspsMpp.hs.hssphh-tlCptshcp.spC+ptVssYVsRCpptsCholDshpashptp.psLsLPsPYQL-AAFhlF+pssusPh+pthcphhhRF+psupausY+pFlhsLLp+N...hhhcssssD..l-shls+YhYMsTlYYKTYLsl-phpu+hhN+hsFs+alFuhtI++ALppIl+sNlPccht.chslsclpplssuYtpYhh.sQlPshspFApcFupMVhcsLlpolu ....................................................hpttMt..h...tp...tphCpts.tt.ppCtt.ltsYhpRCtt..hsChTlDshph....t.psLsLPs.aQL-AAFhLF+pSsuN.t+pthcp.hh..........Rh+t...ttpasta+pFlhsLhppN...hhp.p.tssD..l-.thsspahYMsTlaY.KTYL...sls.hsAKhhN+huaopclFuh.tIppsLppll+.NlP.sht.phs.tplpplssuYtpYhh.oQlPshspFAccauphshcsLl.sls................................................................................ 0 2 10 10 +4089 PF00071 Ras ras; Ras family Sonnhammer ELL, Fenech M anon Swissprot Domain Includes sub-families Ras, Rab, Rac, Ral, Ran, Rap Ypt1 and more. Shares P-loop motif with GTP_EFTU, arf and myosin_head. See Pfam:PF00009 Pfam:PF00025, Pfam:PF00063. As regards Rab GTPases, these are important regulators of vesicle formation, motility and fusion. They share a fold in common with all Ras GTPases: this is a six-stranded beta-sheet surrounded by five alpha-helices [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.79 0.71 -4.85 61 21243 2012-10-05 12:31:08 2003-04-07 12:59:11 17 332 1006 663 12839 30754 3385 151.90 29 62.64 CHANGED KllllGDuGVGKSsLllpFspspFsppa.sTIth-.....Fhp+plplD..........ucplcLpIWDTAGQEcacslpstYYRsupGhllVYslTscpSFpplp.pWhp-lpchss..........pslshlLVGNKsDLcp.................................pRtVstpcupphA.......cchs.lhFhETSAKsshNVccsFhpls+plhp .........................................................................KllllG.c.u........u.V.G...K........o....s........L............l...............h...........p.............a..............s.................p..............s...............p..........F..............s...........p............p........h.........................s........T.........l..................h..-..........................h..h...t....p......p...l...p....l..s.......................................s.p..p...l....p..L........p....l.........W................D.............T.........A..................G............Q............E.............c..........a.............p............s..........l...........p..............s...........h................a...........a.................+...............s................u..............p.....u....h.......l.......l.......l.........a....s................l....................o...........s............p.........p..............S..........F..........p.......s............l.............p......p..........W...h...p....c...l.p..p....h.ss...............................ts.h...s..h..l.L....l.......G...N.....K....s......D....L...p.p......................................................................................................................p.+.......V...s.....t......p.....p.....u.....p.....t....h..A.....................c...p...h.......s.....h...............a........h...........E.....s...S...A..p...s.........s..........s.....l...c...p....s...F..phhpth..t..................................................................................................................................... 0 4857 6747 9773 +4090 PF00616 RasGAP GTPase-activator protein for Ras-like GTPase Ponting C, Schultz J, Bork P anon SMART Family All alpha-helical domain that accelerates the GTPase activity of Ras, thereby "switching" it into an "off" position. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.30 0.71 -4.65 115 1769 2012-10-03 21:54:49 2003-04-07 12:59:11 14 106 254 6 1114 1705 9 183.90 26 14.50 CHANGED hlppllp...pEl........................pp.ssp...sslh.R..uNohso+.hlspah+t.....hGppYLcpsLtsh....lpcl.h..ppchsh..ElDPtcl.................................................................................................................................................sppphp........pshppL...tphspphhssIhs.Sh.sp....h.Phsl+hlsppltpplpp+a...t.................................pth..hpsluuFlFLR..FhsP..AI......ls.Pchaslh....p....ts.stptpRsLhhluKll.QslAs .............................................................................................................................................................................................hpphhp....tEl.............................pp...ssp...pplh..R.uN.ohso+.hhppah+h...........hGppYLpps.L..tsh.......................lppl.h........cpch....sh.El.D.Ps+l................................................................................................................................................................................................................................................................................ppplp......................psh.ppL...hthspphhssIhs..Sh..sp......h.Ph....tlRhlhpp.....lpptspp+.a.t...................................................................................p.h......hphluuhl.FLR..FlsP.AI............ls..Pchaslh......p....................p..sspspRsLs...hlAKhlQslAs................................................................................................................ 0 379 541 819 +4091 PF03836 RasGAP_C RasGAP C-terminus Griffiths-Jones SR anon PRODOM Family \N 22.10 22.10 22.50 22.50 21.90 22.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.45 0.71 -4.50 38 469 2009-01-15 18:05:59 2003-04-07 12:59:11 10 30 217 6 318 442 1 133.50 33 10.26 CHANGED shtchKpcslcslpcLEphGhlspp.NpYQp......llspIAp-I+sppphRppcppElctlppohpsLpc+spaLppQlcpYpsYlcsshtslptptp...................tpthhth+php+p.s+t.chGsaKaoAppLtccGVllchp..s.ptp ..............................htcpKp+hhcsLp.pLE..phGh.....lspp.spYQp......lls-IApDI+spcphR..ppRptELtpLppThpsLscKssahppQlchYcsYIcsshs....sLppptt..........................h.t.pthpcp.s.+tp+hsshKYoAtpLp-K.GVLlchpshp..t........................ 0 97 155 240 +4092 PF00617 RasGEF RasGEF domain Ponting C, Schultz J, Bork P anon SMART Family Guanine nucleotide exchange factor for Ras-like small GTPases. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.16 0.71 -4.57 148 3434 2009-09-12 23:24:09 2003-04-07 12:59:11 14 184 292 23 1986 3165 4 186.00 25 19.95 CHANGED hshsst-lAcQLTll-h...cl....app....Ip.....................pE..............................hl..spta.......sp...pp.............................ss.s......lpthlpphNplopaVsspIltp.p.............shccRspllp+aIclA...........p....................p.hcplpNasohhAI.luuLssssltRLcpTWptlspcth...c............h.hpcLppl............h............................ssp..................pNapsY..R.....ptlp...................................ps.....Ps.....................lPalGlaLp..........................DLsalpcu...ss...................shlps............................................llNFpKh..pp ........................................................................h..hsshclApQLThh-h...pl......Fpp....lp..................................................pE..............................hl..t.t.h.a......pp.............pp...t.......................................................................ss.s.........l.pthlppa.Nplo.....h.a...Vs..spIltp..t.....................shppRs.p.l.lp+.a.IclA..p..........................................c..h.p..p...lp..NasohhAI.l..uuLs..s...s..s..l...t.....R.L........c....p.T....Wp..........p.......l.spc.p........p.....................h..hp....pLp..p.l.....................h..................................................................ss.p..................pN..ap...ph....R.......phlp..................................................................phps....ss..............................lP.a......h..........u.........l.h.Lp..........................Dlhhlcpu....s.................sh.h.tt....................................................................................hlNapKh................................................................................................................... 1 658 907 1382 +4093 PF00618 RasGEF_N RasGEFN; RasGEF N-terminal motif Ponting C, Schultz J, Bork P anon SMART Domain A subset of guanine nucleotide exchange factor for Ras-like small GTPases appear to possess this motif/domain N-terminal to the RasGef (Cdc25-like) domain. 28.30 28.30 28.30 28.30 28.20 28.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.50 0.72 -3.92 88 2431 2012-10-02 12:00:53 2003-04-07 12:59:11 15 132 260 14 1433 2250 2 105.10 21 10.42 CHANGED plpuuolp..tLl-tLssp.ph........c..sahpsFlhoa+sFs.ostclhphLhpRa............................h.ssstph.ptt..............................................................hph..+...lhpllppWlppaht...-......apps...............hlpplhpa ............................................ltuuoh-cLlp+Ls.pp...phh...........................-s.salp.sFLhTaR.....sFh.o.s..........pc.Llpt.L..l.p..pa.............................................................................................................................tstt..ptt...........................................................................................................................................................................................hph....+...lhpllp.tWlppa.....-...........Fpts.........hht.h............................................................................................................................................................... 0 411 620 975 +4094 PF01858 RB_A Retinoblastoma-associated protein A domain Bateman A anon Swiss-Prot Domain This domain has the cyclin fold [1] as predicted [2]. 25.00 25.00 25.50 26.70 24.40 24.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.50 0.71 -4.64 7 366 2012-10-03 00:42:12 2003-04-07 12:59:11 12 10 157 16 185 368 1 188.10 38 21.11 CHANGED TPVpoAhsolppLpshlsuh.spPSppLpphhpsCsps.ppslhcRlptlhEhhhpphs.u.ct...........s.phAppRhphA.pLYY+VLEshhpuEtcpLss.shosLLspch....FHcoLlACslElVhtoYps.........s.hpFPalLEshslpsF-F.KVIEsFIRp...EssLsREhlKHLNSlEEplLESLAWppsSsla-hl ..........................................TPV.ssAhpolppLps.hlsu..h..ts.tP.S..ppL.ph..hp.s.Csps.......P...pp...s...l.h..........pRlcplhchappp.hs.....t.s.sttt.................................shphu.ppR...hphAt.tL...YY+lLE..sllppEp.c..................R....L...s.....s..t...................s....h.otLL....p.p.-h....FHpSLlACsLElVhhoYps.....................s.hsFPall-lh.slss....Fc....F...aKVIEsFI.Rs..............Ess..L.sR...-hlKHLsplEEplLESlAWppsSsLaphl............ 0 49 77 127 +4095 PF01857 RB_B Retinoblastoma-associated protein B domain Bateman A, Griffiths-Jones SR anon Swiss-Prot Domain The crystal structure of the Rb pocket bound to a nine-residue E7 peptide containing the LxCxE motif, shared by other Rb-binding viral and cellular proteins, shows that the LxCxE peptide binds a highly conserved groove on the B domain [1]. The B domain has a cyclin fold. 20.70 20.70 22.20 21.10 20.50 19.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.86 0.71 -4.66 6 352 2012-10-03 00:42:12 2003-04-07 12:59:11 15 10 158 15 185 360 5 140.60 39 15.49 CHANGED soLslFa+KVY+LAulRlpsLCp+L.s..........lp-cLccpIWshFcaoLspps-LMhDRHLDQllhCAhYshAKVsp..-hoFp-IhpsYRsQPQApspVaRSVhIct.........p...p............................h..pchsDIIsFYNplalsplKs ...............................................oLslFa+K.l...Y+LAulRLps....LCt.+L.s.............................ls.s.-..lc..cpI.WT.hFcao.L..p.p.s..-L.MhDRHLDQllhCuhYshs.K....Vs..p..............-.h.oFppIhpsY.R...p..Q....P.Qu............psp.V...a.RsVLlcp.........................................................................................pchsDlIpFYNplal.plK............................................ 0 53 81 127 +4096 PF02196 RBD Raf-like Ras-binding domain SMART anon Alignment kindly provided by SMART Domain \N 25.70 25.70 25.70 25.90 25.40 25.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.09 0.72 -4.22 10 710 2012-10-03 10:59:06 2003-04-07 12:59:11 10 34 96 14 293 599 0 70.50 30 9.82 CHANGED phhpVaLPssQpolVtVRsGhol+DsLppslcpRGLssssstVhhht......t+csLshcsctuhLsu.cElhlEhh ....................hhplhLPspppsll.ssRs...Ghol...c-sLpthlcp+G....Ls.p..sssVahht.......pccslshspsh..s...Lsu.c-lhlEhh..................... 0 45 68 156 +4097 PF02033 RBFA Ribosome-binding factor A Mian N, Bateman A anon IPR000238 Family \N 25.00 25.00 26.10 25.40 24.90 23.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.07 0.72 -4.00 183 4543 2009-09-13 23:02:53 2003-04-07 12:59:11 13 5 4463 8 1061 2620 2217 105.00 34 77.59 CHANGED RspRlu...pplp+plupllp..pp.l+..DPRl.........shlolocVclSsDLphAcVal...ol....h........................sp....................ppt..cpshpsLppAsGalRpplucplp..lRhsPcLpFhhD...p....ol-put+lspLl ..........................RspRlupplp+Elupllp....cc..l+........DPRl..............thlTl..ocVclosDLphA+Val...oh..l..s.......................sp...................ppt...cpsh.puLp.c.Ap..GhlRppLG+p.lc..LRhsP-LpFhh..Dp.Slchut+lspLl......................... 0 369 692 897 +4098 PF05025 RbsD_FucU RbsD / FucU transport protein family Moxon SJ, Bateman A anon Pfam-B_4828 (release 7.6) Family The Escherichia coli high-affinity ribose-transport system consists of six proteins encoded by the rbs operon (rbsD, rbsA, rbsC, rbsB, rbsK and rbsR). Of the six components, RbsD is the only one whose function is unknown although it is thought that it somehow plays a critical role in PtsG-mediated ribose transport [1]. This family also includes FucU a protein from the fucose biosynthesis operon that is presumably also involved in fucose transport by similarity to RbsD. 25.00 25.00 27.90 30.40 24.60 23.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.74 0.71 -3.87 133 2765 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 2076 86 395 1291 399 134.10 35 98.06 CHANGED MpcsslLNs-L.pllAshGHsDpllIuDAshPh....s.tspplcLslphsssshhplLcslLsph.l-phlhupphthp....ss...thhsslhpth.........ttsh.lphls+ppF.hcp.s+p..upAllpTGEpssYuNlILpsGVlh ........h+hsslLss-l.plluchGHsDpl.......ll...sDAs........hPh..........s.ts.ttlclslpt.sssshhplLpsllsphpl-s..hh....hA.pphhtp........ss...ph.ht.plhpchtt..............t.tssshpIphl.sH...p...pF.ccp.spc.......upAll.RTGEsosYu.NIILpsGVs...................... 0 101 207 293 +4099 PF02341 RcbX RbcX protein Bashton M, Bateman A anon Pfam-B_948 (release 5.2) Family The RBCX protein has been identified as having a possible chaperone-like function [1]. The rbcX gene is juxtaposed to and cotranscribed with rbcL and rbcS encoding RuBisCO in Anabaena sp. CA [2]. RbcX has been shown to possess a chaperone-like function assisting correct folding of RuBisCO in E. coli expression studies and is needed for RuBisCO to reach its maximal activity [2]. 21.00 21.00 22.40 21.70 20.80 20.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.37 0.72 -3.93 16 676 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 573 77 69 329 10 109.20 67 78.97 CHANGED M-lKpIAKDTAKsLtSYLTYQAVRsVhsQLuETNPshAlWLppFSuptpIQDGEuYLctLhpEsp-LuhRIMTVREHLAc-VsDaLPEMVRsuIQQuNhEHRRQhLERlTQ .........................................MsLKQIAKDTAKTLQSYLTYQALRTVLAQ.LG........ET........N......P.P....L..u..hW.....LpNFSu..GKI.Q.DG.EuYIEpLhtE...K....s....D.L...ALRI..MTV.REHI.A.p....E.l....s..E.F..LP..EMVpTGIQQANMEQRRQHLERITp............................................... 0 18 48 64 +4100 PF00415 RCC1 Regulator of chromosome condensation (RCC1) repeat Finn RD anon Prosite Repeat \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.40 0.72 -3.75 104 14888 2012-10-05 17:30:42 2003-04-07 12:59:11 13 606 566 106 9774 17535 3239 51.30 29 20.18 CHANGED cGplasWG..psptGQLG..............tssppspthPph..lphhps..............lhplu.sGspHohsl ......................GplasW......G............ps....s..........h.....G.Q.....LG.....................................................tss...p..p...s...p...h....t..P..ph.........l..p...s..hps.................................tlh..p..lu...s..G..s...t.Hohsl....................................................... 0 3249 5152 7453 +4101 PF04381 RdgC Putative exonuclease, RdgC Kerrison ND anon COG2960 Family Members of the RdgC family may have exonuclease activity. RdgC is required for efficient pilin variation in Neisseria gonorrhoeae, suggesting that it may be involved in recombination reactions [1]. In Escherichia coli, RdgC is required for growth in recombination-deficient exonuclease-depleted strains. Under these conditions, RdgC may act as an exonuclease to remove collapsed replication forks, in the absence of the normal repair mechanisms [2]. 25.00 25.00 28.60 28.30 24.30 24.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.78 0.70 -5.46 71 1392 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 1234 4 260 909 119 288.20 47 96.62 CHANGED MW..FKNLhlYRhscshshss-pL-cpLsptsFpPCuup-hpphGWlsPhucpsp..LsHssssplLlsh++E-KlLPuuVl+ctlcc+lpplEtcpuR+lt+KE+cpLK--lhppLLPRAFo+pppThuaIDsppshllVDuuSsp+AE-lluhLR+o.lG.SLPls..PlpsppsPsssMTpWLtps.psPssapls-EsEL+us.t-cuuhlRsKpp-Lpu-...EIpsHlpuG.KhVo+LALsWp-+lsFlLs.....--hslKRlKFtDhlpEps--h.sp-Dhst+hDADFsLMosELsphlspLlpshGGppp ...............................................hWFKNLhlYRL..s+-ls......h.....p.u.-p...hEcpLuphsFoPC...G..S.Q.D..h.s+.hGWV...s...P..hG...p..p....u.-h....L....s....H...su....s.s..pll..lsA+..KEEKlLPusVIK..pt....Lc....t.Kl....t.c....l.E....s...-..p...u.R.K.L...+KpEK-sLKD-Vlc...sLLPRAFS+hopThhaIDsssGLIhVDs.ASuK+AEDsLALLRKo..LG....SLPVV.......PLs...h...cs...s....p......hsLT-WlcsG..sss...p....G.....FpL..h-E...AELKuh...hE-.G.ulI.RsK+.QD.Ls..u-...E.ItsHI-.AG.KlVTKLAL...s..W....p......p...RlpFVhs.....-DholKRLKFsD.pL+-QN..-Dl..scED.hApRFDADFhLMouELusLlpsLl-uLGGEt.p......................................... 0 53 130 207 +4102 PF05183 RdRP RNA dependent RNA polymerase Wood V, Bateman A anon Pfam-B_2226 (release 7.7) Family This family of proteins are eukaryotic RNA dependent RNA polymerases. These proteins are involved in post transcriptional gene silencing where they are thought to amplify dsRNA templates. 20.80 20.80 21.00 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 580 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -13.05 0.70 -5.82 94 667 2009-01-15 18:05:59 2003-04-07 12:59:11 7 26 187 3 486 719 5 486.90 23 46.88 CHANGED splhhpssphphss+lhRc.................a.t.pp.....Flclpas-cphpthh.t...........................hpphtphhtpulhl..........s.htapa..hs.stpt...............h+ppssahhsss.........................hshpphhphh.sshpp................sKhhuR.....hu.shosohs........lphpph.............phlsDl........................................................ttpth..........shoDGs...GhlS.slsctl...tppls.............................................................................htthPoshQ..hRh..................sG.sKGllh.l....s.ph.........................hlhlRt.ShhKa.................psthpsl.....................-lhphup..................p.shLN....................cphlhlLpphGl....pc.........hFhph.pptlpphtp..hhts....pthhphhpphtp.s.hth................................................................hhttuh..........tppsalpphlpthhppplpphpp.....+h+I.lsputhh..hGlhD.t.................Gh.....Lc..........sclalt.............................................tsttp.phlpG..pllls......RsPshpPuDlph..lcA.............lth....st............Lpp.h....pslllFss....pGt.................................pshss.huG..uDh.DGDtah...lh.......WD.pl............hs.tshp................................hphpphtcahhp.....hpsshlGhhsstahths..........................shhsspshpLuphhutslDhsK.sGh.hp.....h.p..................ht.pthPcahp.......................pptt.....................sppllsplacphtptttttttp .........................................................................................................................................................................h.s..ht.ssphhRp...a.t.tp......hlclp.h.ptt...h......................................................................hhpphhthhtp.uh.h...........stphap..a..hs.stst.........................................h+ppt.s..ahhs...............................................................hph.t.thhphh..sphpp..........................sKhhuR........hu.shSpoh..........lt.pph..................h.Dl........................................................................................tttth......................haoDGh...GhlS.thsptl..hptht.............................................................ht..PoshQ..hRh.......................sG.hKGhlh..l.....................s.p...................t..........................l.lR...S..Ka....................................ttptttl...................................................................................-lh..p..hup....................shLN...........................pphl..lLpp..hu.l........pp............hh.ph.pp..lpphtt....hh.s.............ttshphhpt....t...t................................................................................................hh..Gh............ppsalpthl.phhht.tl...............tth..pt.......+h+I...ls..p..uhhh...hGshD.h......................sh.....Lp............................splalph..............................................tttt..h.ltG.......llls......+sPshhPGDlph..h.pA.........................................................V.h..........st..................Lpp..h....hsslVFsp....pGt...............................................................+shss..huG..uDlDGD.ah..lh.........WD.pl......................h......shp...........................................................t.pph.pahhp....................h.ps.lGhhsph+..hhs................p..........th.s..shpLuphhu......sV..DhsK..oGh.sp....h.t........................h.Pcaht..................................t...........................................utplls.lh..h.........t..................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 210 321 430 +4103 PF00154 RecA recA; recA bacterial DNA recombination protein Sonnhammer ELL anon Prosite Family RecA is a DNA-dependent ATPase and functions in DNA repair systems. RecA protein catalyses an ATP-dependent DNA strand-exchange reaction that is the central step in the repair of dsDNA breaks by homologous recombination [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.95 0.70 -5.37 28 12149 2012-10-05 12:31:08 2003-04-07 12:59:11 16 16 9034 84 1090 10099 5077 240.80 66 92.66 CHANGED KALssALuQIEKpFGKGolM+LG-cs.t.cl-slSTGSLuLDlALGlGGlP+GRIlEIYGPESSGKTTLsLHsIAEsQKpGGssAFIDAEHALDPtYApKLGVclDsLLlSQPDsGEQALEIsDtLVRSGAVDllVVDSVAALsPKAEIEGEMGDuHVGLQARLMSQALRKLTGsls+SNThlIFINQIR.KIGVM.FG.sPETTTGGsALKFYASVRLDIRRhuslKcus....chlGscT+VKVVKNKVAPPF+pAEFDIhYGEGIS+hGEllDLGVchsllcKSGAWYSYpsc+IGQG+ENA+paL+-sP-lusEIEpplRpphsh ...................................................................................................................................s.lM..+.h..G..p.t..t.......pl.p.s.......l.uTG.SL.......uLDl..A.L.......G...........l.........GGLP+.G....Rl..lEI.Y.....G.P.E.S..S.GK.T....T..L....s....L.p..s...I....A......p............u......Q...........K................p..........G....G........h................s...A...F..I....DA......E........H.....A........L..........D............P........h.........Y......A.........c..........K....L..........G........V.............s............l.........D..........s...................L..........L.........l...S.......Q.........P...........D...........T............G.......E....Q.......A..........L.......E....I........s.........D.......s....L........V.....R.......S.....G..A.........l......D........l....l.VlD.SV.AA..L..s....P..K...A..........E.....I..E....G..E..MG..D...........S.p.s......G...L..Q..A...R..LM..S..QA..LR.....KLTu....slp.pos.s.....h.s....I.FINQl.R.........K....l......G......V...M....F.....G....s...P...E..T..T..s..GGpALKFYuo..lRh...-......lR.......p........t......t........lK...t.tt............t.h.G.ps+h+lhKs+hs.................................................................................................................................................................................................................................................................................................................................................................................... 1 379 738 938 +4104 PF02565 RecO_C RecO; Recombination protein O C terminal Mian N, Bateman A anon COGs Family Recombination protein O (RecO) is involved in DNA repair and Pfam:PF00470 pathway recombination. 20.50 20.50 20.70 20.50 20.40 20.40 hmmbuild -o /dev/null --hand HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.54 0.71 -4.24 157 3973 2009-01-15 18:05:59 2003-04-07 12:59:11 10 9 3952 7 846 2634 1633 151.60 18 61.19 CHANGED ht-htthshutalsELlpthl..-pcsp.sp..LaphhhtsLp.tL.....spps......s....hhhhh.FElpLLphhGas.slspCstsGpp.s.shh..h......................ttpsspsttshths.pshtllthhhpt...t...ht...hsspshp...pht......plhptalppplst.thpspp ............................shhthhhuhalsELls+hl....t....c....p.....pst...st.........LFphhhpsLp.tL..............spsp............ss...thhthh.FElplLst.hG.ausshscCstsupssstthh.hh....................................................................................................................t.ppttpsttththsspshhlhphhtth..thpt.......ht........t..h.s.t.p.p..hp.........php.................phhphhh...c.a.lst...L+Sh..................................................................................... 0 279 551 713 +4105 PF02132 RecR RecR protein Mian N, Bateman A anon IPR000093 Family \N 29.10 29.10 29.50 29.40 28.90 29.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.20 0.72 -4.52 192 4246 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 4214 6 904 2429 1218 41.00 38 20.52 CHANGED p.tsppLApAltpstpplphCphCtslo-..p-....hCsICssspR.D ...............ts.cLApALhpApcclpaCshCtslT-..p-........sCpICs-spRD.... 0 309 605 770 +4106 PF03837 RecT RecT family Aravind L anon Aravind L Family The DNA single-strand annealing proteins (SSAPs), such as RecT, Red-beta, ERF and Rad52, function in RecA-dependent and RecA-independent DNA recombination pathways. This family includes proteins related to RecT [1]. 20.80 20.80 21.00 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.19 0.71 -5.02 81 1551 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1057 0 145 1046 127 180.60 24 65.71 CHANGED sphhptshshlpp..........................pL...tt......ssspshhsslhpssphsLsPht....ppsYllsa.....................hs.tcsphhhuhcGhhplApRssphp..ulputslhctc.hphpht...........ht........ppsphlusaAhhhh.....psstp.h..hhhshpphppttp................p.ttt.........................s.W..tsp.cpMhcKsslpphhp+hhPhshp.............shhstDEh...............t.pppppsss ..........................................s....p.hhthlpp..........................th.....hp.......ssstphhshlh...ssphG..LsPhs....ppsYhl..P.................................ht.sth.phh.luhcG..h.plh....p..cstp..hc..........uhp.hp.s.hc....ht.t.........................................................t..hushshlhh.....pstsc..h...p.hhhc..phcp.hp.......................ptptt......................................usW..ps..p.......c....pMh.++pshhphh.+hhhshu.....................shhstDEt......pp.........t.ts................................................ 0 44 96 119 +4107 PF03838 RecU Recombination protein U TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Domain \N 25.00 25.00 26.10 25.80 23.70 23.10 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.98 0.71 -4.73 43 1337 2012-10-11 20:44:43 2003-04-07 12:59:11 9 3 1214 10 140 682 9 163.20 48 83.68 CHANGED NRGMoLEctINpoNpYYLspslAVIHKKPTPlQIV+VDYPpRStAsIpEAYF+psSTTDYNGVY+G+YlDFEAKET+NKouFPLcNhHpHQlpHhcplhpQsGIsFlllcFsshcchallsuppLhpaWpt..pssG+KSIPhs.I.....pcpuaplphshpPplsYLcsl-pl .......................................pRGMohEc.INpoNpYYLppslAVIHKK.PTPlQIVcVDYPp.....RSpAhIsEA.YF.+psSTTDYsG...VY..p.G.h.Y.IDFEAKET+s.Ks.uFPh.....p.....NhHtHQIcHMcpshp.QpG..I..sFll.l+Fso..hpE.sYlLPsp...clhpaa.....psp.........G+KShsls.I.....cc.pGapI.phsh.t...Ppl.sYLcsl-p.............................................. 0 47 90 115 +4108 PF02631 RecX RecX family Mian N, Bateman A anon COG2137 Family RecX is a putative bacterial regulatory protein [1]. The gene encoding RecX is found downstream of recA, and is thought to interact with the RecA protein. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.60 0.71 -3.91 147 4223 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 3600 6 808 2825 1656 117.80 22 66.09 CHANGED Lpp+....h.sp.ch....lcpllscLpchsal.sDpcaActalcs+ht.pshGstplppcLp.pKGlsppllp.psLp....p..hs.tpp.......................Atplhp++hpph.....t....................hctcpKhhpaLhp+GFshchlpp...slp........ttt- ............................................h..st.p......hppllshhtc.thl.sDtcautthlpsp...t..ps.h...G...ttl.p.pcLt.p.K.G..ls......p....p..h..lc....ps.lp...........c..hc.pc..................................Apcl.hpKch.pph......ht.ss...............................................................hphcpKlhphLhp+Gash-.Ippslp........t.tp................................... 0 262 516 680 +4109 PF02014 Reeler Reeler domain Bateman A anon Bateman A Family \N 21.00 21.00 21.00 21.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.95 0.71 -3.90 40 496 2009-01-15 18:05:59 2003-04-07 12:59:11 11 45 110 8 357 521 42 122.60 24 18.03 CHANGED sCschh...PpH......stsPp....sssaplsss..spsatsGpphpVsl....tt.ssspF.cGFhLpARssss.tt..........................lGpFsls.sss.hpphhsC.....sAVTH...ssspsKpplpl.hWsAPss.s.GslhFpATllpphtlaasc .................................C.th.......tt..........ttp.t...sssap...ltss.........sptahsGpphpVol...................sst.F..cGFhlpAcptsspt...........................lGpFphh.ssp..........h..p.......h..hhsC.........................su..loH...sssp......tK..s..p.l.p..l.hWtAPs................ss....s...G...sV.hFpAT.ll.ppht.haah........................... 0 151 183 286 +4110 PF04221 RelB DUF415; RelB antitoxin Mifsud W anon COG3077 Family RelE and RelB form a toxin-antitoxin system. RelE represses translation, probably through binding ribosomes ([1], [2]). RelB stably binds RelE, presumably deactivating it. 20.90 20.90 21.00 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.44 0.72 -4.23 6 2614 2012-10-02 18:44:02 2003-04-07 12:59:11 7 4 1541 3 352 1624 75 79.40 22 87.15 CHANGED suhlshRID-clKspAssVLcpMGLThSpAl+lhLsplApsculPF-lplPpsNptTlsuIpctctGps.......ppshsscchhscl .............lphRlDpclKppApplhpphGlshosAlplhlpplspp..pu..lP..F....c.l....p.......h......s..................s.......p..ts.h....t.s.h.......p.....c............................................................................. 0 120 212 267 +4111 PF03763 Remorin_C Remorin, C-terminal region Farmer EE, Finn RD anon Pfam-B_1798 (release 7.0) Family Remorins are plant-specific plasma membrane-associated proteins. In tobacco remorin co-purifies with lipid rafts. Most remorins have a variable, proline-rich C-half and a more conserved N-half that is predicted to form coiled coils. Consistent with this, circular dichroism studies have demonstrated that much of the protein is alpha-helical. Remorins exist in plasma membrane preparations as oligomeric structures and form filaments in vitro. The proteins can bind polyanions including the extracellular matrix component oligogalacturonic acid (OGA). In vitro, remorin in plasma membrane preparations is phosphorylated (principally on threonine residues) in the presence of OGA and thus co-purifies with a protein kinases(s). The biological functions of remorins are unknown but roles as components of the membrane/cytoskeleton are possible. 22.10 22.10 23.80 23.60 21.60 21.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.77 0.71 -4.54 28 381 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 26 0 224 354 0 105.30 33 31.49 CHANGED pccp..puhhsAW-EuEcuKhps+hp+cpscIsuWENpcKAKsEApl+KlEpc.LE+KKActtEKhpN+lAtl++cAEE+RAts......EA+Rscchh+scEtAs+h..RsTG+hPsphh ......................t..pthtshhsAW-c.u.EpuKhps+...hc.......+........c.cspIpuWEsppK....AKsEAph+.KhE..........t...c.lE+c+AcshEKhts+lAt.s++pAEEp....RAts......EAc+spchh+st-.tAshh....R.tsGphPtp.................. 0 36 136 183 +4112 PF03766 Remorin_N Remorin, N-terminal region Farmer EE, Finn RD anon Pfam-B_1798 (release 7.0) Family Remorins are plant-specific plasma membrane-associated proteins. In tobacco remorin co-purifies with lipid rafts. Most remorins have a variable, proline-rich C-half and a more conserved N-half that is predicted to form coiled coils. Consistent with this, circular dichroism studies have demonstrated that much of the protein is alpha-helical. Remorins exist in plasma membrane preparations as oligomeric structures and form filaments in vitro. The proteins can bind polyanions including the extracellular matrix component oligogalacturonic acid (OGA). In vitro, remorin in plasma membrane preparations is phosphorylated (principally on threonine residues) in the presence of OGA and thus co-purifies with a protein kinases(s). The biological functions of remorins are unknown but roles as components of the membrane/cytoskeleton are possible. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.84 0.72 -4.19 5 55 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 19 0 34 55 0 59.00 43 30.80 CHANGED ssVscEpApscsssPPPs..cpKsDDSKALsVVEsssEEsAscKsucGSlDRDVlLA+L .......................ssc-hA.pEK...ss.sP...PPs.......cpc....s....DDSKALs.lVE...K.....ss.E.s.....stc....K..ss....p....GSl-RDssLA+l. 1 5 22 28 +4113 PF01244 Peptidase_M19 Renal_dipeptase; Membrane dipeptidase (Peptidase family M19) Finn RD, Bateman A anon Prosite Family \N 20.20 20.20 20.40 20.20 19.80 20.10 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.68 0.70 -5.50 48 2249 2012-10-03 00:45:34 2003-04-07 12:59:11 16 15 1559 27 897 2181 2191 312.60 26 85.48 CHANGED th.tph.llDuHsDhshphhtt.t.................t.t.hchsl.+h+pGtlsuthhula.h...........................thppuhptsLcplshhpphhpppsp.lplspossDlcpshp.cs+luhlhuhEGucsl..sscl...shLctaapLGlRhlsLTa.stsN.hucushpt........pssGLoshGcclVpchNcLGlllDlSHhucpshhDsl.......plSctPllhSHSsu+ulss+sR..NlsD-plcsltcsGGllslshhsta.lpps........................................................spuolc-hlcHl-alsslsG.h-aVGlGoDF.....DGss........................................stulcc...................suph.pLhptLhc.cGas-p-lcplhttNhlRVhcc .........................................................................................h.......lhDsHsDh.h.t.h..................................tt....hphsh.+hp...pGtl.s.u......thhula..h..............................................tht..p.s.h.p.t..s..l.p...t....l.s....h....hpp.hh.....p..p.......s...........s.........lt.............h..s...p...os................s...D.......l......p.......p.....s.........h.........p.....p..........s.....+..........l.u............s.....llulE......uu......csl..........sssl...............shLc.h.h..a.p.h.........G.l..RhhsL.s........a.stsN.h.u...sushtp....................................ttsGL.o.s.h..G+p..llpch....N.clG..h.......hlDlSHhucpshh...-sl.............ph.o...p....t......P....ll.hSHSs.....s...p.ul.............s.........s.........p.s.........R................NlsDc.l....ctl........t....c.....p......GGllt.....lsh.....hstF..lpst...............................................................................................tpssl.pc..h....s.....cHlcalhpl.hG.....h-.p....lGlGoD..F............DG.ht.t...........................................................................................................................................................................................stslps............................s.u.p.h.s....plhtt.Lhc....p....G.........asc.........pclp....cl....h....ttNhlRlhp.t.................................................................................................................................................................................................................... 1 281 511 734 +4114 PF01664 Reo_sigma1 Reovirus viral attachment protein sigma 1 Bashton M, Bateman A anon Pfam-B_1003 (release 4.1) Family This family consists of the reovirus sigma 1 hemagglutinin, cell attachment protein. This glycoprotein is a minor capsid protein and also determines the serotype-specific humoral immune response. Sigma 1 consist of a fibrous tail and a globular head. The head has important roles in the cell attachment function of sigma 1 and determinant of the type-specific humoral immune response [2]. Reovirus is part of the orthoreovirus group of retroviruses with, a dsRNA genome. Also present in this family is bacteriophage SF6 Lysozyme Swiss:P21270. 25.00 25.00 43.90 38.50 19.70 19.00 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.31 0.70 -5.26 3 60 2012-10-01 20:11:45 2003-04-07 12:59:11 11 3 13 30 0 78 0 208.30 62 46.59 CHANGED IsuLPSRlGoLEuS+IDSVlPPLslpSouuTRlLchhYDoSDFsIsNSVLoLRsRSToPTaRYPLELsSAsNpVulucNYRhRpGpWoGQLpYpsPuLsWRAsVTlNLM+VDDWLlLSFopFoTsSIhAuGKFVLNFVTGLSPGWtTGDTEPSoT..lsPLSTTFAAIQFlNGGuRlDAFRILGVuEWsDGELEI+NaGGTYTuHTNVcWAPMTIMYPCss .FDuINSRluslEQSYVASsVsPLRLNSS..TKVLDMLIDSSTLEIN.SSGQLsVRSooPNLRYPIsDlSGu..IGMSPNYRFRQSMWIGlVSYSGSGLsWRVQVNSDIFIVDDYIHICLPAFDGFoIADGGDLSLNFVTGLLPPLLTGDTEPAFHsDlVTYGA.pTlAIGLSu.GGsPQYhSKNLWV.EQWQDGVLRLRVEGGG.ITHSNSKWPAMTlSYPRSF.. 0 0 0 0 +4115 PF04582 Reo_sigmaC Reovirus sigma C capsid protein Mifsud W anon Pfam-B_2922 (release 7.5) Family \N 41.00 41.00 41.00 43.10 40.90 40.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.02 0.70 -5.26 4 160 2012-10-01 20:11:45 2003-04-07 12:59:11 7 1 59 7 0 145 0 239.10 45 97.25 CHANGED MAGLsPpQRREVVuLILSLTSSsThNsGDLTslYERLosLEuustSLcpSluslsoclSDlSusLQshspoLs-spusLsuLposVpALpsSVssLSoslssLoshsSuHsuulSsLQTolcuNossISNLKSsVSupGLsIoDLppRVpuLESuoSpuLpFusPLSlusGVVSL-MDPYFCSpphuLTSYSA-AQLMQFQWhA+GpsGSSsoIDMsVNAHCHGRRTDYMMSoTtuLTVTuNsVoLsFsLDhIT+hPSDLSRLlPssGFQAASFPVDVSFTRDosTHuYQVYGsYSSuRVFpITFsTGGsGTANIRFLTVRTGIDT ........................Ls..QRREVluLILSLTSssshs..GDLs.lh-RLosLEuust.Lppo.......lssh.sploslSupLpshstslspspspLpsLptplpthpssl.s..ssslsshot..pspss.losLpsphss.tstlsNL+s..sV....s....s.uL..slosLppRlpslEsssup.hphtsPLp..lssGslSLphcPhFCo.phsLoSYSspA.LhpFpW.s+.upsGuu.sslsh.lpsHsHGpRTsahhSoptshTVs.usss.Lshsls.lhp.ssD.huhLlPstGFQtAoFPVDlSFpRsssoHsY..Qs..Y..GsaspsthFplsa.ssts.st................................................................ 0 0 0 0 +4116 PF00979 Reovirus_cap Reovirus outer capsid protein, Sigma 3 Finn RD, Bateman A anon Pfam-B_1049 (release 3.0) Family Sigma 3 is the major outer capsid protein of reovirus [1]. Sigma 3 is encoded by genome segment 4. Sigma 3 binds to double stranded RNA and associates with polypeptide u1 and its cleavage product u1C to form the outer shell of the virion. The Sigma 3 protein possesses a zinc-finger motif and an RNA-binding domain in the N and C termini respectively. This protein is also thought to play a role in pathogenesis. 18.40 18.40 18.50 22.40 17.90 18.30 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.30 0.70 -5.51 5 149 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 31 5 0 145 0 250.80 49 98.83 CHANGED MEVRVPNFHSFVEGITSSYl+sPACWNA+TuWDospFapPDVI+VGNAYCCoQCCGVLYYGuhPsDGpaFPHHKCHQQppRsDoPLLRaVRIGRTTEHLLD...QYAVtLpoIADHY--tupcpscEPtuDpVAuLDIlsRTESlRoDpAVDsDFWTsPLERRsD.DSRRDIA.oAhW+MIDASupShTLPDCLVSsuLHoRslFcQM..tTTToIYDVAsSGKsu+FSPMVAshPpR-uGPlpLsccssc-sVsosWp..sHF.......AlSPIIGGVG.IoGQatRsShHpVGHPlIGSGKKlSHYRNLFM-lsRGWSKSoFsCAsGLEPAE.sESRLRGHARTMLGRo....LPcVCDaussspossssoPLp..+osKlThlECG .........phl-.h.sua.th...asstphWt.....h.P-hhhhGsAhsC.pChGVl..G....t...hPHH+CpQ...p.s.s.hphsc..RhTthhhc...tash.hpthhp.hp..t........tt.l.......................ps.hp.....s.............p...Wp.sl...p.....sst...h..tth.hh.ps........hlsphhh....as.thht.......hoIYDVAsSGK.ul+FS..PMVushu...pR-uGPlhLssssst-sVhohhp..uHF.......AhSPllGGVu.loGpatRtShH..hhshltsstKhp..Rph..t...sat..ththhhs............................................................h................ 0 0 0 0 +4117 PF01446 Rep_1 Rep; Replication protein Bateman A anon Prodom_1565 (release 99.1) Family Replication proteins (rep) are involved in plasmid replication. The Rep protein binds to the plasmid DNA and nicks it at the double strand origin (dso) of replication. The 3'-hydroxyl end created is extended by the host DNA replicase, and the 5' end is displaced during synthesis. At the end of one replication round, Rep introduces a second single stranded break at the dso and ligates the ssDNA extremities generating one double-stranded plasmid and one circular ssDNA form. Complementary strand synthesis of the circular ssDNA is usually initiated at the single-stranded origin by the host RNA polymerase [1]. 26.00 26.00 26.20 26.10 25.70 25.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.67 0.70 -5.02 25 775 2012-10-02 18:54:06 2003-04-07 12:59:11 12 3 413 0 22 552 35 156.80 31 69.80 CHANGED hKhuhpsppIlscuhpcpPpuRaLFLTLTV+Nsp..G-cLcpolspMscuFpRLhpaKK.........hppsllGalRusEVThNc.pcs.....oYHPHhHVLLhVcsoYFp......pNYlsQs-WscLWc+uh........KlDYcPlVcl+sVKspp.........pp..psl..p..................sAlhEsuKYsVKssDhhsssp......ts.pslh-LppuLtppR.IuaGGLLKpI+....+cLpL-DsE..puDLlpss--ccp.spsp......plhhhWphpppsYhl .........................................th.....t...t...h..lFLTL.oh..Ns......hpp...Lpttlpthtpu...ap+h...phpp....................h.ts.....hhG...ah...+thE.l.Thsp......ps................................ap.HhHhllhlp...shht.......t.altp...tpW.phWp.p.sh......................ths.....s.......hplp.hp..........................................................................h.p......h...............................................lsauGlhKph+.......K.Lp....s-h........shlp..p.pp............phh....................................................................................................................... 0 8 12 22 +4118 PF04057 Rep-A_N Rep-A_protein_1; Replication factor-A protein 1, N-terminal domain Wood V, Finn RD anon Pfam-B_6000 (release 7.3); Domain \N 21.70 21.70 22.00 21.90 21.40 21.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.19 0.72 -4.22 11 381 2012-10-03 20:18:03 2003-04-07 12:59:11 7 20 239 3 250 351 3 97.50 29 16.47 CHANGED LTsGAIutlhs....u-ssh....cPVLQVl-lK.Iss.......sspRYRhlhSDGpst.hpu.MLuoQLNshVcsGplppsullpLpcalsNslpt...sR+llllhpLEVlsp .......................................................lo.GAlttlhp.......spss......pPllQllsl+..lss.............sssspRaRllhSDGhph.hpu.MLATQL.NphlcsspLpp.s.sllplpc...ahssslp........s++.l.lI.lhclEVlt............................. 0 86 146 208 +4119 PF01719 Rep_2 Plasmid replication protein Bashton M, Bateman A anon Pfam-B_1901 (release 4.1) Family This family consists of various bacterial plasmid replication (Rep) proteins. These proteins are essential for replication of plasmids, the Rep proteins are topoisomerases that nick the positive stand at the plus origin of replication and also at the single-strand conversion sequence [2]. 25.00 25.00 25.10 25.10 24.80 24.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.46 0.71 -4.80 24 278 2009-09-17 10:57:14 2003-04-07 12:59:11 12 4 204 9 32 240 1 182.60 27 74.28 CHANGED hsKcpt.............RpWsFllYP..ESh.....PcsWpphL..-phtl.hhhSPLHDKDls.............psGchKKsHaHlllhacsspohppVpplhcph........susthspl..tslcshYcYhsHts....s.cKapYshs-IhshsGF.....DIcpalshssp.c+.p..llppllchIc-pslh...php-Lhpashppt.c..hhslhtppsh..........hFhspY.lcupptp .................................................................................pt..........phasFlhY....ESh.....Ppsahp...hL......-pht.lsh.hhSPhHDKDls..............psschKKs...HhHshhhacshpohsplppllpph..............sssthlphh....hS.....pshapYhsHtp....ss-Kp....YshpDItshsGF...........-l-calh.ssp.p..p.p.....hlppll-hIccpshh...php-Lhtahhtpt.p...hhshhh.ps...........hh.ttY.cpth..t........................................................................................ 0 2 6 23 +4120 PF01051 Rep_3 RepB_protein; Initiator Replication protein Finn RD, Bateman A anon Pfam-B_313 (release 3.0) Family This protein is an initiator of plasmid replication. RepB possesses nicking-closing (topoisomerase I) like activity. It is also able to perform a strand transfer reaction on ssDNA that contains its target. This family also includes RepA which is an E.coli protein involved in plasmid replication. The RepA protein binds to DNA repeats that flank the repA gene [3,4]. 22.40 22.40 22.90 22.40 22.20 21.90 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -4.75 166 2007 2012-10-04 14:01:12 2003-04-07 12:59:11 16 8 1146 6 232 1665 74 220.10 17 69.43 CHANGED ppthlhpsNsll.psshp........hohtEh+lhhhhluplcsp..ttpt.....................h.h...phtcht.th....shspps....hpplppshppLh.cch.hph.......................tppht.hhhhhtahp...................tpuplpl..phspclhshLhpL...pp..p..F.TpapLpphtp.l.cSpYohpLYclL.....ppa....................cs....hsp..hp..........hsl.......-...........-h+p........hhsl.........................sph.phschcp+VL.csulcE...lsp........pssh.tlphcph.+...pG.......Rp...lsslpFph .....................................................................................t...hh..pNph.h.ps..t.......hshhphplhh...hhltphppp......pptt..................................hph.....shpchh.th...............thpsps........hpplcpshppL...pp..hph...............................pt.......hh.ahs.h...............................tpshlpl.....phs.sp........lt..hlhph........pp...t...a.sphplpphsp.L.cup..auhpLa.clh..........hpa.........................ps....ttt........hh...........lpl...........p...............................-h+p........hlp.l...........................................tsth...phs..phcp+ll..csslp-.....lsp........................hsph..tlshpph..+.........cG....Rp....l.th.F........................................................................................................ 0 58 131 189 +4121 PF02486 Rep_trans Replication initiation factor Mian N, Bateman A anon Pfam-B_2164 (release 5.4) & COG2946 Family Plasmid replication is initiated by the replication initiation factor (REP). This family represents a probable topoisomerase that makes a sequence-specific single-stranded nick in the plasmid DNA at the origin of replication. Human proteins also belong to this family, including myelin transcription factor 2 (Swiss:O15150) and cerebrin-50 (Swiss:Q16301) [2]. 29.60 29.60 29.60 29.70 29.40 29.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.19 0.71 -4.80 84 1463 2009-09-11 00:34:10 2003-04-07 12:59:11 14 8 833 0 83 967 19 187.80 25 54.34 CHANGED hhssploRlDlAhD.hpt..........hshschhpph.ppsthhpph.........p...............hpphushp.............hGpThYlGu+.pSp...hhhRlY-KshE...............t..p..WhRhElcL+spct..hshchlhs.....tp.hsshhhthhphh.....................................t.tthpl.....ssphp..ths.......................h-pt....hpalpc....psutslphltph.t......................tphpshltpllpp ...........................................................h..sthsRlDLAlD-hss.........hhshspltcch.pptchhsth................hp...................hpphtssp.......................pt.thGpT.hYlGSp..pSp......hahplY-KshEph.......................t..hcp..sph..hsRhElcL+pccs....h....hshc.Lhs..............hp...hpthhhthhsthhphh................................hhtthp.lcl.........ssp.c....ho...........................hccs.....hpWlp.+....Qs.u.slthh..ph.t............tpphh..h...p........................................................................................................... 0 24 46 64 +4122 PF04796 RepA_C Plasmid encoded RepA protein Waterfield DI. Finn RD anon Pfam-B_6223 (release 7.5) Family Family of plasmid encoded proteins involved in plasmid replication. The role of RepA in the replication process is not clearly understood [1]. 21.40 21.40 21.50 21.60 18.90 21.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.90 0.71 -4.63 10 189 2012-10-04 14:01:12 2003-04-07 12:59:11 7 2 146 0 63 178 18 157.60 32 48.85 CHANGED lPYGshPRLlhsaLsT......cAl+o+o.............slpLGpShocFlccLGhps..oGGtpGslsslR-QlsRLuussh..........sluhcsuscu..sspshslhccthhhW.spcss.....QpuLas..SpVpLopsFFcpLtc+PVPlDhsAl+tLppSPhuLDlYsWLoYRlphl..s.+s....ssloWcp .................................................................lPYGshsRLlLhalsT......pAl...+s+sR............clpL.G...pShspFh.c.t.hGlss..pGGc..shptlRcQhpRLhuspl...............................................phthp..s....sts.........thh.shthscph......h..h....h.hps.p..pss...............Qt.uh...at....sh.....lpLS-sFappL.hc+sVPlDhcAl+tLp...t..SshAlDlYsWLsYRlatl....p..+s....shlsWp.s.............. 0 21 45 54 +4123 PF01421 Reprolysin Reprolysin (M12B) family zinc metalloprotease Bateman A anon Swissprot Domain The members of this family are enzymes that cleave peptides. These proteases require zinc for catalysis. Members of this family are also known as adamalysins. Most members of this family are snake venom endopeptidases, but there are also some mammalian proteins such as Swiss:P78325, and fertilin Swiss:Q28472. Fertilin and closely related proteins appear to not have some active site residues and may not be active enzymes. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.29 0.71 -4.73 38 3201 2012-10-03 04:41:15 2003-04-07 12:59:11 14 123 179 77 1444 3423 29 190.20 28 23.35 CHANGED +YlELhlVsD+thap+hsushstl+p+lapllNhlNphYps.LNlpVsLsuLEIWospDpIslpssussTLppFspWRcp.Llp++pHDsApLLouhsass.slGhAahuuhCsscpSsGlspcap..pshhlAlhMAHELGHNLGhpHDst.....tCpC....ssssCIMss.huppsuh.hFSsCShppappFlpptpspClhNpP ..................................................................................................................................................+alEhhlVsD.pt...h...h......p....h........h.....s.....t.....s.....h......p.....t..hp.p..hlhpl.....h..N..h..........l......s...t..h...........a........pp........l......s.........l....p.......l....s...L.......l...s...l......t.....l..............a.........s.............p...........p............s...........t........l.......p......l...........s..................t.........s...........s.................s.......p..........o...L..p...sF.....s......p.....W.....p...p.............p.............h....h......t........p...........p...............p...........H...............D.....s.........A........h..L.l..T.........t.........p.............s..........h..........s.........s.........t..........s.............l........G............h.....A................l.........u.........uh.............C......s....s...t.........p..........S............s.......u.............l.s........p................-..............p...........................s.........h.........sh.A..............h......oh..........AHE..l.G...............H............s........lGMp.HDss.............pC......p.........C.................sttss......lM......us....h.......s......t.......t...............s.h.....taSsC..Stpp.h.pp..a......L....p...p.....t..t....s...p.CLhs.P................................................................ 0 219 301 684 +4124 PF05140 ResB ResB-like family Finn RD anon Pfam-B_1866 (release 7.7) Family This family includes both ResB and cytochrome c biogenesis proteins [1,2]. Mutations in ResB indicate that they are essential for growth [1]. ResB is predicted to be a transmembrane protein [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.68 0.70 -5.94 127 1951 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 1367 0 559 1727 720 307.30 18 58.66 CHANGED h+hAlhLLlllAluSlhGTllP.Qp................ss.Yhppa........G..hhuplh.pLsL.clYsShWFhsllhlLslSLlsCslpRthshh+sh+ph..htptp..Lp+hshttphph...........................s.psshppltthLcpp.tac..lptpptp.................lhAcKGhhs+hGsllsHhullllllGuhhuuhh....................................uac........uphhls..pG...........psssh......sG..........saslclccFpl-ahs.................sG.tscpFtSclslhcss...........pp..hpptplpVN.cPLpacGhslYQssau.sshphhthsssh....tt....h.phshphhs.sst...............................plplhuhhhs...........................................................pp.psssPsltlplhs.ptth......................................................................................................................sspspthhthphsphpthphssh.........................................................................hlph...........stshp......hoGLplp+DPGhslValGshlhllGlhhuhalpcRRlWlhhps...........................tsplhluGhss+sphu.atcEhtc ............................................................................................................................................................................................................................................................................................................................................htc.h..s.hh.Hhuhlhhhhuhhht..........................................................................................................................h..p..t..h..l...ts.......................pt....h................................hsaplplpcF..hphhs.....................................................................................................pt...sppatopltlhptt.........................tp...hphplthN.cPLph.t.Ghp.h..Y....ua.s..s..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................................................................................................................ 0 197 410 519 +4125 PF04851 ResIII Type III restriction enzyme, res subunit Finn RD, Mifsud W anon Pfam-B_4631 (release 7.6) Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.52 0.71 -4.46 55 18536 2012-10-05 12:31:08 2003-04-07 12:59:11 10 220 5248 26 4564 43274 12710 160.50 19 20.83 CHANGED phphRsaQtpAlcplhpthppt...............................ppcsllhhsTGoGKThhuh.........t...hhtclhp..................+sLFls...sppsLhcQsh.cpa........................................pttt.th..t...pstphhhsslQpLpt.................................ph..stas..................lllhDEsH+ususs...................applhp.................hpsthh......lGhTATP..pc ............................................................................................................................................................................................h....p..Q.pAl..p...p....lh.p.t...hppt......................................................................................................tp.pt.l..l....h..t..sT...G...oGK....T..h..s..h.h...................................................................................p......hh..t.p.ltc.................................................psl..l....l...s.......s...p....p....s....L....s......s...Q...hh...pch.............................................................................................................................................................t.........h...............t.....t....t......t.......h..................................................................t...........t..........t...............t.....h.......h.......h..............h..t......h.................................................................................................................................................................................................................................hh.l...h.DEs.......H....t.........................................................................................h.t............................................................h....h.o..uT.................................................................................................................................................................................................................. 1 1830 3138 3952 +4126 PF00239 Resolvase recombinase; resolvase; Resolvase, N terminal domain Finn RD, Griffiths-Jones SR, Bateman A anon Prosite & Pfam-B_3830 (Release 7.5) Domain The N-terminal domain of the resolvase family (this family) contains the active site and the dimer interface. The extended arm at the C-terminus of this domain connects to the C-terminal helix-turn-helix domain of resolvase - see Pfam:PF02796. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.83 0.71 -4.32 222 13550 2009-01-15 18:05:59 2003-04-07 12:59:11 16 84 3644 51 2369 10593 1582 133.80 24 45.62 CHANGED hhuYsRV..STpc....Qs.....hppQ.hptLcphu.sp.........p.l.....at-c.hSGpp....t.p..R..stlpchlp.tlcp..s...D..s......llVtclDRLuRs.ht-hlpl.lcpl.pppGlplhsls..p......hcs............sssh.uch...hhslhush....Ach..E+phlp-RsppG.lttA+t...pGch ......................................................................hhYsRl...Sotc............Qs..........hpt..Q..h.....p..t..l.p.p....hs.pp................t..l.............apD.p.....hSGtp.................h.p.......R........sthpc.h.lp...tlcp.........s.....D....h..............................llVtclD...RL.uRs..ht-h.hph..l.p..tl....p...p......p..s..lp...l.hsls..p..s............hcs......................................sssh...sch..........hhp.l..h.u.s.h..................Ach......Epc.........h..htcR.hpt...Ghtt.ttt.pG............................................................................... 1 844 1618 2027 +4127 PF00072 Response_reg response_reg; Response regulator receiver domain Sonnhammer ELL, Griffiths-Jones SR, Finn R, Fenech M anon Prodom Domain This domain receives the signal from the sensor partner in bacterial two-component systems. It is usually found N-terminal to a DNA binding effector domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.07 0.71 -4.06 57 151337 2012-10-01 22:20:39 2003-04-07 12:59:11 19 3468 5531 462 44329 116945 19054 111.60 26 30.95 CHANGED lLlV.DDcshhpphlpphlp.....p.tuh...pVs.tsssuppulphl....pppp.....hDlllhD..........lphPs..h...sGlclhppl.+pps...s...ssllhlT.upss..thshpul.psG.ApsalsKPh.shscLhptlp ........................................................................................lLlV...-.D..c.........h....h....t....p....h....l....p....p....h.Lp.............p...tu....a........................p........l.......t.....t.....u.........s......s..G.....p....p..A.l..p...h.h...................pp..pp...........................................D.....l......l..l..h..D................................................l.t.....h.....P.....s.....h................s..G.....h.......c.....l......h...c.....p....l....+ppt.....................p.........................h..P..l..l............h....l........T.....u........p........s........p...........t............s.........t..........h.....p.......u...........l.....c...........h......G......A.....s........-....Y.....l...s...K.P...h...s...pc.Lhttl.t.................................................................... 2 15208 29453 37848 +4128 PF02813 Retro_M Retroviral M domain Bateman A anon Bateman A Family Retroviruses contain a small protein, MA (matrix), which forms a protein lining immediately beneath the phospholipid membrane of the mature virus particle. MA is located in the N-terminal region of the Gag precursor polyprotein. The N-terminal segment of MA proteins directs the Gag protein to the plasma membrane where budding takes place, and has been called the M domain. This domain forms an alpha helical bundle structure. 25.00 25.00 25.40 42.10 24.10 19.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.96 0.72 -3.82 3 206 2012-10-01 20:35:47 2003-04-07 12:59:11 9 15 46 1 0 218 0 81.30 90 16.63 CHANGED DsVIKVLsohCKDYCGKTSPS+KEIuosLSLLpcEGtLcSPSDIausupWDhlTAALoQRAMsuuKAGELKTWGLlLGALKAAREE ...............ACKTYCG.....KTSPSKKEIGAMLSLLQKEGLLhSPSDLY.....SPGSWDPITAALSQRAMVLGKSGELKTWGLVLGALKAAREE. 0 0 0 0 +4129 PF00424 REV REV protein (anti-repression trans-activator protein) Finn RD anon Pfam-B_169 (release 1.0) Family \N 20.80 20.60 20.80 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.33 0.72 -4.09 13 15943 2009-09-12 07:43:04 2003-04-07 12:59:11 13 3 89 13 1 6741 0 83.00 60 88.13 CHANGED Mst+st...-EtL+RtLRLI+lLaQoN.PYPp.ssGTApQRRpRRRRWRpR.cQIhALA-RIhsh..-.PhspPlDptl.cLQcLsIQpLPDP.....................Psss ......................................................................................................P..Po....sEG.T.RQA.RRNRRRRWRpRQR.QIcu.ISthILSohL......GR.P.sEPVPLQLPPLERLoLDCsEDC.GT.oGTpsssssphh............ 0 1 1 1 +4130 PF00472 RF-1 RF-1 domain Bateman A, Finn RD anon Prosite Family This domain is found in peptide chain release factors such as RF-1 (Swiss:P07011) and RF-2 (Swiss:P07012), and a number of smaller proteins of unknown function such as Swiss:P40711. This domain contains the peptidyl-tRNA hydrolase activity. The domain contains a highly conserved motif GGQ, where the glutamine is thought to coordinate the water that mediates the hydrolysis. 21.30 21.30 21.50 21.40 20.80 21.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.28 100 11930 2012-10-03 10:08:23 2003-04-07 12:59:11 15 22 4796 19 3127 8043 5160 115.70 40 37.79 CHANGED .hstsh.clplsts-lclcsh.RuoGsGGQpVNKssSA....................V+l.pHh.......Pou..................lllpsQppRSQhpNRcpAhchLpu+Lhph...t........hpcpcppppsp+tuphcph-tupc..hRsYsh..psplKDtR ........................................cphclcIs.s.sDl+lD.sa.R.u.SGAGG.........Q+V.N+..T-SA...........................................VRl..THl.........PTG..............................................IVVps...Q...s-RS..Qpp......N+cpAhphLp..A+L.hph.p......................hpcp...p.t....pt..s.s.tR.p...s...p...h..t...t..h...s....tu..pp..IRo..Ysh...PpsplpDhR.............................................................. 0 1030 1966 2619 +4131 PF04506 Rft-1 Rft protein Wood V, Bateman A anon Wood V Family \N 20.80 20.80 23.20 22.70 18.40 18.10 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.70 0.70 -6.14 4 365 2012-10-02 21:24:20 2003-04-07 12:59:11 8 9 278 0 263 371 4 394.10 23 88.38 CHANGED pSShpGhsaslhhQlhsRIlTFhlNthllRhlusclhGlssl+LplLpSTlLFLSREulRhAtlphsups...h.pss.tch.pphaLSs.lpsllss..sYI.h.aluhs.ulhhSha.ahslss....lh.shucsslFlhhl.phlcL..L.psaahlsQhhlhhstpstscuhuhhhsulhphtlsshs...p.............cuhuhL.Fshsslu.plp.huhhca.thps......FSshLsKltp.......p.......haasp-hlphhhohhhQslLKpLLT-G-Khlhsh..lhShp-QusYsllsNhGSllsRhlFpPIE-ssphaFuphlppcsphsp......+pulslLsplL+hhuhluhlhhsFG.sYSshVL.hhuGscaussus.tslLphYshYI.hhAlNGIhEuFhhusssucQIhcauhhhhshSlhaLlhualLhsh..hustGhIlANIINMslRIlYshhaIp+.a+-hshs.ohshshh.sphhlhshhhusllsaWh.ups.hLsahlsslhhuhshLhhhhlsc+phhphhhhhhsphthK ................................................t..........hhhQlh.+hhoFhhNthllR..hls.t...hhGl..s...pl..lh..ohlFhuREuhRhuh.p.......t...................................t.............t............................................................thh..hs.h.......hh...h..................................h...................................h.h....shh..lh.h...s..hhlcL..hsEs..h..a....h.l.p...h.hth.+..hhc.....s.h.....uh..hhh.sh.h.p.hh..hh......h.h...................................................hh....h.h...h..h.......h.hh....h..h.......h..................................................h........h......h..........................................................hp....t.h...ph.hhp.hhhQuhhKplLT-.G-p..hlh.s...h...h..............ohtpQG.hYslssNh........GSLlsRhlFtP..lEEo...hhhFup.............h.l...................t.........p...........t.........p........t...................p................................................................................t......sh.....p..........h.L....t.......ll+hhhhluh.hhhshG..shu.hhLpl.h.....h...G...t..t..hs...sst.......s.......................hLphY.......s.h...Yl...................hL........AhNGlhEs.Fh.ushs.t.pp...............ltp..shhhhhhShhahh...hs...h..h.hh......t..............hG...s....G.............hlhANhhNMhh.RIhas....hhal....tp.......h.h.......t.....t............................................................................................................................................................................................................................hh.hh............................................................................ 0 96 153 222 +4132 PF04589 RFX1_trans_act RFX1 transcription activation region Kerrison ND anon DOMO:DM04454; Family The RFX family is a family of winged-helix DNA binding proteins. RFX1 is a regulatory factor essential for expression of MHC class II genes. This region is to found N terminal to the RFX DNA binding region (Pfam:PF02257) in some mammalian RFX proteins, and is thought to activate transcription when associated with DNA. Deletion analysis has identified the region 233-351 in human RFX1 (Swiss:P22670) as being required for maximal activation [1]. 25.00 25.00 28.80 28.80 18.60 23.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.25 0.71 -4.49 8 197 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 39 0 75 132 0 135.80 50 21.09 CHANGED MQsSEuGSDosuoV.sLpTSsuuQu....sVPuoQpRslVQshsps.Kst.sQQl......sV.hsQQVsQQVQQ..VQHVYsuQVQYV.EGu-uVYTNGsIR.oAYoY.sEoQlYuQoSGuuYFDoQ...GuuAQ.....VTTVVSS.......HoMV.......GIsMsVuG.SQIISSo.uuYLItGG .......................................psucsuusssuoV..tLpspsstQ......Vs.sp.................sQ.l......sV.......p...s.....Qp.VQQ...VQHVYPuQVQYV.EG.uD..sVYTNGAI.R.osYsY.sEoQhYoQs..o...uusYF-sQ......GouAQ.......VTTssSS.......HuMV.....soGGIsMsVsG.uQllSSo..GsYLIts.s............................................................ 0 3 8 28 +4133 PF02257 RFX_DNA_binding RFX DNA-binding domain Bateman A, Mian N anon Pfam-B_3682 (release 5.2) Domain RFX is a regulatory factor which binds to the X box of MHC class II genes and is essential for their expression. The DNA-binding domain of RFX is the central domain of the protein and binds ssDNA as either a monomer or homodimer [1]. It recognize X-boxes (DNA of the sequence 5'-GTNRCC(0-3N)RGYAAC-3', where N is any nucleotide, R is a purine and Y is a pyrimidine) using a highly conserved 76-residue DNA-binding domain (DBD) [2]. 27.00 27.00 27.20 27.30 26.40 26.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -9.78 0.72 -3.84 34 732 2012-10-04 14:01:12 2003-04-07 12:59:11 10 11 217 2 423 656 0 83.20 43 10.10 CHANGED c+p+phaA.hhWLhsshE.tsp.ssslPRs.......................plYscYhptCsppp.lcPLssAoFGKLlRslFPsl+TRRLGs..........................RGpSKY....HYs.....Gl+l+sss ...................p......hs..hpWLhsshE..pspssslPRs.......................plYscYhpaC..............p.............p..............pp.h.cP.lsuAoFG.......KlIRplFPsl+TRRLG.o..........................R..GpSK...Y..........H..Yh.....Gl+lK...s................................ 0 100 152 268 +4134 PF03214 RGP Reversibly glycosylated polypeptide Mifsud W anon Pfam-B_2662 (release 6.5) Family \N 23.80 23.80 29.90 29.80 23.70 22.40 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.19 0.70 -5.70 4 169 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 63 0 94 181 3 299.80 50 91.75 CHANGED MSh..IpcsEVDIVIuALpsNLTsFhppWRPFFStFHLIlVKDP-hp.cclplPEGFss-lYs+oDhE+VlGup.sSIpFSGauCRYFGaLVSKKKYlloIDDDClPAKDsuGhsVDAVsQHlhNLcoPATPFFFNTLYDPaRcGADFVRGYPFSLRpGVsCAhSCGLWLNlADhDAPTQslKsppRNTpYVDAVMTVPttAMhPlSGINlAFNRELlGPAhhPALhLtGEGKhRW-TlEDlWsGhCsKllCDHLuhGVKTGLPYVWRs..EtGsAl-SL+KEWEGlKlMEcllPFFpSlKLspTusssEDCVIELActVKEpLGp.DPhFsKAADAMhcWlcLW+Slss.SA ....................................................................................................................................................s........cs-lDIVIsslp.sL.sFhp.WRPahp.aHLIlVpDsD.t.c.lplPpGFDhclYs+sD....lp+..lLGsp..ss.IsFpspuCRsFG..ah...VS...+K...KYlh.oID.DDChsA..K..D.ss..GpplsAl..pQ..H..l..pNLhsPSTPa....FFN..TLYD.PaRc.....GA.D.FVRGYPFS.L.........R..........E..........G............l........s........T..........A.........lSpGLWLNlPDYDAPTpL.V......K.........Ph.....cR..NoR...........Y..VD...A..V....hTlP+GshhPhCGMNLAFcR-LIGPAMaF..GL..hscGp.....slGRaD...DhWuGaCsKVlC.DHLGhGVKTGLPYlaHS.........KA.u.s.s.FssL+K.EacGlhhpE.-llPFFQs.....st.L.s.....cpss..oVp.pCYhE.Luc.V+p+...L..ut..l.....Ds..hFt.K.hA-AMlsWlcuWcplss...st...................................................... 0 14 66 82 +4135 PF05045 RgpF Rhamnan synthesis protein F Moxon SJ anon Pfam-B_5448 (release 7.7) Family This family consists of a group of proteins which are related to the Streptococcus rhamnose-glucose polysaccharide assembly protein (RgpF). Rhamnan backbones are found in several O polysaccharides of phytopathogenic bacteria and are regarded as pathogenic factors [1]. 25.00 25.00 26.50 26.00 24.80 24.40 hmmbuild -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.78 0.70 -5.97 7 464 2009-01-15 18:05:59 2003-04-07 12:59:11 7 50 316 0 120 454 62 322.40 20 50.84 CHANGED llFlSNuplocpspppLps..hsDchl.RENpGFDhhAa+-Gl-hlGFDcLspYDplhlhNcThaGPla.huphFpchEp+.ssDFWGIosH+thp.s....tsthhscHlpSaFIuh+psllpSpsFcsaWcshsphpsht-sl.haEophTshFhshGaphpshlDoc+hsusa..hlcsD.ohhs.ssILK+Rl.Fhcsphl-spth...hPhhLchlccsosYslsLIhcpl.chu.P.sLs.....h.lLsschlps.ttss.sptKlAVphHlYYsDhL-EhLshhpNhshsYDLhlTTsos-K+tEIcph.L.ptsG.pss.VhVs.tN+GRDhsshh.sL+-hL..c+YDhVs+hHTKKS.pssh.tGp.a+cchl-.Llcs...scNlLshFcpNsplGlslssl.sahpasplspAW.tN...tsphtclh+phslph+hD....sT.VhuYGThaWF+.cAL+pLF-hchp.cDhssEP.hspsolhHAlERLlsYhAhssuY ................................................................................................................................................................................s.....h............t.l...........h.t...hh.RpN.GaDhhuappu.h....h.....t..h..ph.splhlhNco..hG.Ph.........ht...hh.tp..ht..t.p....thDhaGho.......t....................................h..HlQSaahsh....pt.hhts..F.paWpth...h.p.s..htpsl..aE..hophh.p..Ghphtsh..h..........................................................h.................hh......t...............................................................................................................................hhhhHhh...hh.th.t.......h.........hphhho...h.............h.t...h.................h......ph.psh..hh..........h....t..hthhhhhps...+p..........................................t.t.h.t....hht.hh...........htt...h..t.http..hu.hhhs.......................................................................h...h...th.........h............................G...shhah+......thht..h.t......t..............h....................t.....hE........................................................................................................................................ 0 26 73 97 +4136 PF00615 RGS Regulator of G protein signaling domain Ponting C, Schultz J, Bork P anon SMART Domain RGS family members are GTPase-activating proteins for heterotrimeric G-protein alpha-subunits. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.78 0.71 -4.00 74 4150 2012-10-03 22:10:09 2003-04-07 12:59:11 14 139 303 83 2509 3808 11 120.60 21 22.88 CHANGED ohcpllssps.GhphFppFLcsphsc..-slpFWhusc-a+pt..........................sppchhpp..AppIapcalt.sus.ppl.sl-ppstpthppslpp............................ss.shFc.tuQpplhp.hhcp-sas+FLcS..shYpch .........................................................................hppllps....uhthF.p....p...F..L.c.p.ch.st...........-sl.............p.....Fa...hss.c.ca+ph.............................................................................................................................s.p.p..p.htpp......uppI..a..ppa.l....t.........s..p....us...p.........p....l...s...l.s...p......p.....s..p..p..p...l..p..p..p.lpp.......................................................................................................ss.ph.Fc..pup..p..p.lhp.....hh..c..p..c..s..a..scFlpS.tha.p........................................................................................................ 1 835 1141 1805 +4137 PF00974 Rhabdo_glycop Rhabd_glycop; Rhabdovirus spike glycoprotein Finn RD, Bateman A anon Pfam-B_167 (release 3.0) Family Frequently abbreviated to G protein. The glycoprotein spike is made up of a trimer of G proteins. Channel formed by glycoprotein spike is thought to function in a similar manner to Influenza virus M2 protein channel, thus allowing a signal to pass across the viral membrane to signal for viral uncoating. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 501 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.53 0.70 -6.27 8 3017 2009-09-17 10:48:14 2003-04-07 12:59:11 13 3 114 5 2 2015 0 333.50 41 96.20 CHANGED hlslllllthsss.hsp..PlhssssspssWs.Pl.sashsCPssthspscuupspsPhpac-.ssGhlosspV.GalCpu....slapsssah+aVGY+shT....Fut+slcPTls-C+tAhpchssGsspsppsLaFPsPsCtWhosVsss-tshhhlTP+oVplDsYstcalDscF.uG+Copps.....CpTpassolWhs-ss.hpstCsp.hpplcGhhasspsSppsshus....cpGa+Pah.LcsuCplsFCGK.GIRhspG-Wlulsss....sssphhshPpClss.....lsshRssts-ashls-Ll+...cRpECL-sascIhsopsloshcLS+hRshtPGhGcsYslhNGoLhcupspYlpV-h...-lh.s+ssh+stspsshhhWsph..FGshhtG.cGlltss..phphP.lppasulh-suhh.hhpst.lPHPshpsspsspD.......hhsschts.tpsl-slDhthssWuhalhluuhsllsllLllllhpCCshssp..pPptppsIshpphsps..p ............................................................................................................Wp.sl.ht..tCsp.s......s.......cs..s.p....hsshpa.c..p.uhlss.ph.tahshu....shhtsts.hphhshhshs....athpphc.h..ts+.uhp.thstssphcts.h.P.PshtWhpsshppc.ohhhloPpssplDsYs+phhsp.F.sGcCps..s.......CpTpashslWhs-ps....tspCDh.......hp..s.....cG......hh.ss....c..ts..c..p.sshss................ptGha....LctACplphCGh.Gl+h.pGshlthps..........ssth..hhs....Pcphss..................hhsh+.ssp.-a.....hl..p.-.L.l+.......pRp...c...CL-shppIhsstslo.h.LShht....G.s...sahh.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 2 +4138 PF03342 Rhabdo_M1 Rhabdovirus M1 matrix protein (M1 polymerase-associated protein) Mifsud W anon Pfam-B_3629 (release 6.5) Family \N 25.00 25.00 319.20 319.00 21.20 20.70 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.29 0.70 -5.21 4 24 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 9 0 0 24 0 221.50 62 98.90 CHANGED MuD..p.GEphhs..u.sLhcL-u+LcN.s.cDuplsth.u..KcssshptE...osK+pscPp....LEpllL+aVsEcpplDAhKcFGtLItpI+.SHQtELTpHLE+VusEpRANLpALhcSQpEppKsoKpILSslIulRuplsENuS.+PKsLDhDQV+sERALGFshGYpTAlslhs+lKshsPppssptpVKshAlpAhEc-EYEGStphF+pVlctlKtchc MuD.IEMuEpLVLSHG.SLADLD+RLDN.APKDsRuALFSS..sssso+QKs...SPKKKssPT...TLEElIGHFVsEDLQLDAsKAFGQLLRRIKhSHQEELTQHLEKVNsEpRAKMGALLESQKENGKKTDNILSILIuMRGEGAENASKKPKVLDGDQVRNERALGFNRGLTTAAIAMKKFKLEDPLsLCKGSVKRAALSAMEKEEYDGpRETYusVuKAlKA-lc. 0 0 0 0 +4139 PF04785 Rhabdo_M2 Rhabdovirus matrix protein M2 Kerrison ND anon Pfam-B_2486 (release 7.6) Family M protein is involved in condensing and targeting the ribonucleoprotein (RNP) coil to the plasma membrane. M interacts specifically with the transmembrane spike protein (G) is important for the incorporation of G protein into budding virions [1]. 25.00 25.00 414.40 414.30 16.80 16.70 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.28 0.71 -5.28 4 134 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 30 1 0 136 0 202.00 89 99.89 CHANGED MNhLRKhlKoC+D-EoQKssssSAPPDDDDLWLPPPEYVPLpEloGKtNhRNFCIsGEVKlCSPNGYSF+IlRHIL+SF-sVYSGNRRMIGLVKVVIGLsLSGSPVPEGMNWVYKLRRTLIFQWA-SpGPLEGEELEYSQEITWDDDoEFVGLQIRVsA+QCHIQGRlWCINMNSRACQLWSDMoLpTQQS-EDcsTSLLLE MNFLRKIVKNCRDEDTQKPSssSAPPDDDDLWLPPPEYVPLKELTGKKNMRNFCINGEVKVCSPNGYSFRILRHILRSFDEIYSGNHRMIGLVKVVIGLALSGAPVPEGMNWVYKLRRTLIFQWADSRGPLEGEELEYSQEITWDDDTEFVGLQIRVSARQCHIQGRlWCINMNSRACQLWSDMSLQTQRSEEDKDSSLLLE 0 0 0 0 +4140 PF03397 Rhabdo_matrix Rhabdovirus matrix protein Mifsud W anon Pfam-B_3980 (release 6.6) Family \N 25.00 25.00 166.50 166.30 18.10 17.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.91 0.71 -4.91 10 35 2009-09-10 23:57:29 2003-04-07 12:59:11 9 1 9 0 0 31 0 165.90 66 85.60 CHANGED KRTILlPPPHLTSNDEDRVSTILTEGTLTITGPPPGNQVDKICMAMKLARAILCEDQHPAFNPLVHLFQSAMIFGETSEKIDFGTRSKTLITSFKlAEAKAIYLDoSPVRSRIEAKKYTTPIRHGSVTYYGPFlFADDHVGGKGHREKLGALCGFLQSssYGQAKDYY .KRTlLIPPPHLhSsDE-RVollpsEGplplTGhpPosLpEKIhhuMpLAuAIlGGD.HPuFpshsaLFQptMEFGuTpEKlsFGoRcsss.ToapVh+A+tlYLpopPl-K+IshppYosshcsuoITaoGpFLFSusHVGscDsRsKLAuL-GFhpSsSYtpsKDYY 0 0 0 0 +4141 PF00945 Rhabdo_ncap Rhabd_nucleocap; Rhabdovirus nucleocapsid protein Finn RD, Bateman A anon Pfam-B_477 (release 3.0) Family The Nucleocapsid (N) Protein is said to have a "tight" structure. The carboxyl end of the N-terminal domain possesses an RNA binding domain. Sequence alignments show 2 regions of reasonable conservation, approx. 64-103 and 201-329 [1]. A whole functional protein is required for encapsidation to take place [2]. 19.70 19.70 20.30 20.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.15 0.70 -5.88 11 7911 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 119 82 36 2619 0 200.80 64 92.45 CHANGED ssphVss+hPs.sDssEYPhsaFpcsp+phhshhps...hDLsphRphlhsGlpuuclssscVsSYLhtshcthcsphccDWsSaGlpIu+KG-pIs.hsLlslpcpcsp...h.cGtpcssRs...s-DtaLshhLLuLYRlu+sssp...sY+sslhD+lpp.hcst...phstcphl.sstchhssWsspsNFphlsuuhDMFFpRFcpHtauslRaGTlVoRYcDCuuLsohoahhKlhsLo.hc-shpWlFs+sht-Elh+MMp.PGQEIDsscSYhPYhhshGLSsKSPYSSsssPphashlHhlGshlsSsRShNAphlsstshpclossuhlluashtucushcppFhps-cchpptcsp-tsps-ss.scs.........spus-shsWhuhapchthphscphcsah+RhVpsls ............................................................................................................................YLhush..............................................................................................................................................................................................................................................................................................................................................ttc.h+.s...ch.t.sssu..............t.........hst-sps..uhYtch.hphsp.hh.tlp+hhts..o.......................................... 1 8 8 36 +4142 PF03216 Rhabdo_ncap_2 Rhabdovirus nucleoprotein Mifsud W anon Pfam-B_2146 (release 6.5) Family \N 20.20 20.20 20.30 20.60 19.90 20.10 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.11 0.70 -5.73 4 252 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 12 0 0 122 0 172.00 65 94.19 CHANGED uul+cpFsGLRDVKssshtspGp-acPuclpLslYt..ossDhDhphIh+AlutVGGspTscALulLhAFVhttos.sth-stsKlLp-hGFpVcplPhusslpssltsP.pcLApslspENlh-lV+GlLaTCALhsKYsVDKMtpYhppKLtcLAoSQGlsELpphsss+usLt+luuslRPGQKlTKAlYuhILlpluDPsTtuRA+ALsAMRLsGTGMTMVGLFsQAAKNLGAsPADLLEDLCM+SlV-SARRIV+LMR.Vu-AcslpA+YAlMMSRMLG-uYFKuYGlN-NSRIosILhsINs+Ys-sThtGLpGlKVSs.FRcLAccIAclLVcKYDssGssGpGASDlIRpA ............................................................................DtKVIVDALSALGGPQTVQALSVLLSYVLQGNTQEDLtsKCKVLTDMGF+VTQusRATuI-AGIhMPMRELALTVNDDNLM-IVKGTLMTCSLLTKYSVDKMIK.................................................................................................................................................................................................................................... 0 0 0 0 +4143 PF02484 Rhabdo_NV Rhabd_NV; Rhabdovirus Non-virion protein Mian N, Bateman A anon Pfam-B_2189 (release 5.4) Family Infectious hematopoietic necrosis virus (IHNV) is a member of the family Rhabdoviridae. The non-virion protein (NV) is coded for by one of the six genes of the IHNV genome [1], but is absent in vesiculovirus -like rhabdovirus [2]. 25.00 25.00 72.90 201.70 18.40 16.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.35 0.71 -4.18 2 24 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 6 0 0 24 0 110.50 88 99.66 CHANGED Msp+s.sTshtAL+-lLRYKspVAtHGFLFDDGclVWpE-sDtsWpRLhsVVsALhSSpRMppsLaMDhSITKG-GaLLFsDLQGTpNh.a+pP+FRpaLh.l--FLshPR MDHRDINTNMEALREVLRYKNcVAGHGFLFDDGDLVWREEDDATWRRLCDVVNALISSKRMQRVLYMDLSITKGEGHLLFVDLQGTKNRLYKEPRFRRHLILIEDFLAYPR 0 0 0 0 +4144 PF00554 RHD Rel homology domain (RHD) Bateman A anon SCOP Domain Proteins containing the Rel homology domain (RHD) are eukaryotic transcription factors. The RHD is composed of two structural domains. This is the N-terminal domain that is similar to that found in P53. The C-terminal domain has an immunoglobulin-like fold (See Pfam:PF01833) that binds to DNA. 19.70 19.70 19.70 20.30 19.60 19.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.95 0.71 -4.53 21 1068 2012-10-03 00:25:27 2003-04-07 12:59:11 17 44 147 75 369 1068 0 154.90 36 22.50 CHANGED lcIlEQP+QRGhRFRYcCE.GRSsGSIPG.tpSpcssKTaPolclpsYpG.puh.lploLVT+-pP.h+PHP....HsLVGK...c.C+cG......hhplclsP-.shs.hsFpNLGIpCV+KK-lcpslppRh.............................phs.sPap.lt.pphcthpchDLNsVRLCFQsal..cpsGsashsLsPVlSsPIaDp ...............................LpIhtQPcp+.aRhRYpsE...sS.+Gul.u...........ps..ppsaPs.V.............c..lp....sYts....sh...lpl.lsTsst.....h+PHs....HclsGK..........s..s.p.p.s.................lh..cl..t..ltPc...shh..hshsshGIhpl+.pp-lptt.h.t..................................................phs..s.hp....................shVRLsFpsal......sps...shp.hh...lhSpPI.sp........................................................ 0 51 81 191 +4145 PF02115 Rho_GDI RHO protein GDP dissociation inhibitor Mian N, Bateman A anon IPR000406 Domain \N 22.70 22.70 22.90 22.80 21.90 21.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.22 0.71 -4.90 16 507 2009-01-15 18:05:59 2003-04-07 12:59:11 12 6 274 48 308 499 3 181.80 38 87.83 CHANGED MuEppspstphpphttpp-c.p.tss.........YKPPspKSlcElhchDK-DESLpKYKEsLLGsss...slsDPssP.NVhVp+lsLLssstP.slphDLoG-..lcplKcps.FsLKEGscY+lKIsF+Vp+EIVSGL+YlppTaRtGl+VDKspaMlGSYuP+.s-....Y-ahTP.EEAPpGhLARGsYslKSpFTDDDKpsHLoWEWsLpIpK- .................................................................ptp.t..........acss.sp..KolpE...h..ph............Dt-DESLp+aKcsL...L.Gtss...............sls-s.ssP..pV.h.l.ppLsLh.sp..st...P....sl..s..hDLss..s....l.pp.h...Kcps..FslKEG.spY+l+l.sFcV.p+-IVSGL+YlppshRpG...l+...V..DK......tp.......M......lG.S.Yu.P.....p..s-...........Yphhh...s.......E...EAPsG.hlAR.GsYsspSp......FsDDDcps....H.....L..pacWshpIpK-.............. 0 79 153 234 +4146 PF00581 Rhodanese Rhodanese-like domain Bateman A anon MRC-LMB Genome group Domain Rhodanese has an internal duplication. This Pfam represents a single copy of this duplicated domain. The domain is found as a single copy in other proteins, including phosphatases and ubiquitin C-terminal hydrolases. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.63 0.71 -3.73 147 30295 2009-09-13 10:52:53 2003-04-07 12:59:11 15 226 4760 191 9427 22860 9896 101.90 20 44.29 CHANGED ssp..pltphh.......ttsshhllDlR........sttc..............................a..............pt..........u....HIsuA....l.slsh..................................t.......htphththspsp......tl..llas................tstpustssth.....hpt.....Gap.......pl.....hhlpGGhpsWttp ..................................................................t....h.thh.......ttsh...hllD....l.R.......................ss..t.-.............................................................................................................a.....................................pt...................G......H..l..s....G....A...........l.sls.h...............................................................pt......................h.t.p...h..h....t..............s...p..s.p.............................tl.........llaC.............................ttu..cus.t.sst.h............................Lpp..h........Gap..................................pl..................h...l......p.G.GhttWt..t.................................................................................. 0 2977 5838 7876 +4147 PF00620 RhoGAP RhoGAP domain Ponting C, Schultz J, Bork P anon SMART Domain GTPase activator proteins towards Rho/Rac/Cdc42-like small GTPases. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.75 0.71 -4.60 87 7538 2012-10-03 21:54:49 2003-04-07 12:59:11 22 300 353 36 4418 7012 27 147.00 25 17.80 CHANGED Phllppslphlcp.pG..lppcGlaRhuGspsplppLppth-psts..........................thpptslpsluulLKpalRcLPcPLlshphapphh.t..shph.............tstppp.....................hptlppllp..pLPpsshpsLchLhtaLpclsp..psph............N+MsspNLAhlFuPsllps.t....tsp .............................................................................................................Phhlppslp.h.l.cp...pG............l.p.p.c......Gla...R..l..s....G.s..t....s.....p.l..p..p.........L.c..p.t.hcpst.p..........................................................t.pt.hclps....l....u...u.....lL..K.....h..a.h....R....-..L..P.......c.........P.L.....l....sh...p...h...apphl..p.......shph...............................t.stpp.p................................................................................hptl.p.p..llp.......pL......P.....p.....s....s..h..p..s.L..p.h.Lh............p..aL..p..c......l.....sp........psp..................................NpMs.s.pNLAh.lauPsLhps......sst........................................................................................................................... 0 1311 1920 3077 +4148 PF00621 RhoGEF RhoGEF domain SMART anon Alignment kindly provided by SMART Domain Guanine nucleotide exchange factor for Rho/Rac/Cdc42-like GTPases Also called Dbl-homologous (DH) domain. It appears that Pfam:PF00169 domains invariably occur C-terminal to RhoGEF/DH domains. 20.90 20.90 21.00 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.45 0.71 -4.40 192 7395 2009-01-15 18:05:59 2003-04-07 12:59:11 15 583 301 68 4161 6705 27 174.70 22 17.10 CHANGED llpEllpTEpsYlpsLphl..hphahpslpp..................................pshlstpc.........lp....t..lF.u.slpplhphp.pp.....h.LppLcp.................................................tp..........pp..................l.............uclF...lp.htp..............hhph.YspYsss...hspuhphl....pp..hp.............................................ppsp.........Fpp.......hlp.hpt.............ptht........LpshLlpPlQRls+Y.LL............Lccl.lKpT.s.t..sc........-h.p....sLpp........Alptl.pplspplNc .............................................................................................htEllpTE.ppY...l.c.p....Lphl......hp.....h..ahpshpp.............................................................................................thl..s..t...pp..........hp......h.lF..s....N.l...p....p.lh....p.....h...p..pp.....h..L.p..p.....Lcp....................................................................................................................................t...tp..............tt.pp...................................l.............................us.ha.................lp...htp.......................hh.ph..YtpYsp..s..hs.p...u..h..ph.l.....pp.......ht.......................................................................................................................................................p.p.pt...................hpp............hh.p.p.hpt.....................................s..pphs...........Lp..s...h.L..l...p........P....l..QR............ls.+Y..LL..................................L....p..c.l...l+p..o....s....p....sp....................-t..p.........pl.p...p.........................Alphh.pplhpphN..................................................................................................................................................................................................................... 1 1275 1737 2801 +4149 PF03527 RHS RHS protein Griffiths-Jones SR anon PRINTS Family \N 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.96 0.72 -4.18 18 2442 2009-01-15 18:05:59 2003-04-07 12:59:11 9 181 606 0 216 2289 23 39.60 54 4.56 CHANGED laaYHsDphGtPLpLocs-GchsWpAcYcsWGslhpEpssp .....lHhYHCDHRGL...PLALlSp-Gs....T....sWsAEYDEWGNhLsEEss.......... 0 42 71 144 +4150 PF00073 Rhv rhv; picornavirus capsid protein Eddy SR anon Overington and HMM_iterative_training Domain CAUTION: This alignment is very weak. It can not be generated by clustalw. If a representative set is used for a seed, many so-called members are not recognised. The family should probably be split up into sub-families. Capsid proteins of picornaviruses. Picornaviruses are non-enveloped plus-strand ssRNA animal viruses with icosahedral capsids. They include rhinovirus (common cold) and poliovirus. Common structure is an 8-stranded beta sandwich. Variations (one or two extra strands) occur. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.07 0.71 -4.57 50 28435 2012-10-04 01:49:40 2003-04-07 12:59:11 15 90 713 302 0 21797 0 144.60 21 43.85 CHANGED hsssspsshss.phsshhp........h.pppsssshpth....t...t.pphht.hpshshs.ttpshhhhphsh.spthph....h.......hGthhpaaoahRsuhcl.phh..h........su......oth......ppG.........p..........h.........l...l...sh....lPsGss...............................................sssst.................................hpuhhssH.hhshttssosplslPalussshpshh...........................................pa.sshshshhshssh.sh ......................................................................................................tst..s..ths...p..........tt..ps.h.th.......t.ts..................t..h.........s........................s...............t........................h.......h.s........a.p.l.sh...ht.hsp............htphhc.haTYhRh.DhEhshh...h...........ss.......................s..........t..........h..........hha..lP.sG..ss..........................................................................................ps...s...sp........hh..p...t...ts....sphslPahu..tphh...t.h.s.........................................h..................h................................................................................................. 1 0 0 0 +4151 PF02267 Rib_hydrolayse ADP-ribosyl cyclase Bateman A, Mian N anon Pfam-B_3719 (release 5.2) Domain ADP-ribosyl cyclase EC:3.2.2.5 (also know as cyclic ADP-ribose hydrolase or CD38) synthesises cyclic-ADP ribose, a second messenger for glucose-induced insulin secretion. 25.00 25.00 45.70 37.20 19.60 18.80 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.78 0.70 -5.03 12 149 2012-10-02 19:28:18 2003-04-07 12:59:11 12 3 48 156 78 136 0 212.20 38 81.23 CHANGED Tsphp-IhLGRChpYs.....thhpPt................tphsCpsIhcsFhpAhhsKsPCsls.pDYcshhpLsspo..lPssKolFWSpsp..plsHsasp.sppchhTLEDTLhGahuDsLsWCGp....tssSthsapsC...Pshpc....CsN..NshssFWppsSppaAcsAs..........................GlVpVMLNGSps..ss......asppShFuslElssLpss+VsplplaVhc-ht.....sssp-oCusuSltpLcphlps+shtasChss.+slhhLQClcpsppssC ...............................ph.plhluR.Chpah.......hpst....................t.phsCptlWcsF.ps...hh...s..K...s...PCslh.pDYp.h....hp.L...stps......lPpsKolFWppsp...lsppasp...sppc.hhsLpDs..LhGhhsDtL....sWCGp.....pss.sthsY.p.SC...Ps.pc....CpN....NslssFW+puS.tpaAcsus..........................GslpVhLNGShs..ss.......as.hpuhFushEl.sLp.pKVpplplaVhaplt.....s.s.s...h-oCupsSlp.LcphLpp.hshpaoChsshc.ssph.l.Chcpspp.tC....................................... 0 22 23 41 +4152 PF01872 RibD_C RibD C-terminal domain Enright A, Ouzounis C, Bateman A anon Enright A & Pfam-B_6425 (Release 8.0) Family The function of this domain is not known, but it is thought to be involved in riboflavin biosynthesis. This domain is found in the C terminus of RibD/RibG Swiss:P25539, in combination with Pfam:PF00383, as well as in isolation in some archaebacterial proteins Swiss:P95872. This family appears to be related to Pfam:PF00186. 20.50 20.50 20.50 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.29 0.71 -4.71 27 7001 2012-10-03 00:23:32 2003-04-07 12:59:11 12 23 4048 36 2265 6046 2900 188.90 22 66.43 CHANGED salhhphshSLDGthusssGsuph....lsstt...............spthhpphRppssulllGt...sTVhs-sPthssphspt.............cpPh+lVlssphchs.psp......llsps....hhhhtt...............................h.sllcpLtpp.shpslhlEGGusLtsu.hlctGLlDclplhluPhllGu..utshhsstsh.t.....hplchtphpphusslhl .............................................................................................................................hlhhphuhSLD.....Gh.hA......t....s..s.....u...p...s...p.....a.............l.o.s.tt.........................................sc.pt..s.p...p.h...R...s....p....s....sAl.l..h..Gt.................sT..h....h.....t....-....s.....P.....t.....h....s..s...c..h..tt..............................................ppP..h..+..l..V...l..s.p..p.....h..c.....h...s......s.tp.............................................lh....p....s....s............h..h..h..h....t...t................................................................h............................................h.s.ltp.hl.p.p.....L....t...pp.....s........h........psl..h.l....EG.Gs.....p..lss....s.hl.............p.......t....s....L.....lDElhlhlu..P.h.l...l.Gs........u.....hs..h..h.s..t.....s.ht...............phphhphp.hs.....h................................................................. 0 833 1564 1990 +4153 PF05062 RICH RICH domain Bateman A anon Pfam-B_277 (release 7.7) Domain This presumed domain is about 85 residues in length and very rich in charged residues, hence the name RICH (Rich In CHarged residues). It is found in secreted proteins such as PspC Swiss:Q9KK19, SpsA Swiss:O33742 and IgA FC receptor Swiss:P27951 from Streptococcus agalactiae. This domain could be involved in bacterial adherence or cell wall binding. 22.00 22.00 22.00 23.70 21.70 21.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.73 0.72 -3.85 26 495 2009-01-15 18:05:59 2003-04-07 12:59:11 7 33 170 0 1 455 0 74.10 40 17.91 CHANGED spppcKttptVcpalcKhLs-l...pLcKc+HTQsVsLhpKLucIKpcYLhcLss.ppc............splpph.pcsKscLDAAF-pFKK ..............ppcKutptVspYhpKhLp-h...pLc+c+HpQsVuLlpcLssIKppYL.-lssspsK..........sphppLspKspu-lDAAhpcFKK........ 0 1 1 1 +4154 PF01042 Ribonuc_L-PSP DUF10;UPF0076; ribonuc_L-PSP; Endoribonuclease L-PSP Bateman A, Finn RD, Kerrison ND anon Pfam-B_797 (release 3.0) Domain Endoribonuclease active on single-stranded mRNA. Inhibits protein synthesis by cleavage of mRNA [1]. Previously thought to inhibit protein synthesis initiation [2]. This protein may also be involved in the regulation of purine biosynthesis [3]. YjgF (renamed RidA) family members are enamine/imine deaminases. They hydrolyze reactive intermediates released by PLP-dependent enzymes, including threonine dehydratase [5]. YjgF also prevents inhibition of transaminase B (IlvE) in Salmonella [4]. 27.30 27.30 27.30 27.30 27.10 27.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.45 0.71 -4.35 30 11334 2012-10-01 19:40:00 2003-04-07 12:59:11 16 61 4115 187 3227 8930 3544 116.70 29 81.61 CHANGED otsAP..tshusYSp.Alp..ssshlalSGQlslcs....tssclh.sssstpQscpshpNlctlLptAGsshsc......lVKsTlFlsDhsc.FAplNplaspaFsps..........PARusVpVutLP...hsshlEIEslAhh .....................................................s..ss.a..up...ulh..........hs.s..h.l..a.h.SG...Q...l..s..hs...............................s....sp...l..........s.....s...s.....l.......p.....t.....Q.....s......c....p....s....l...c.......N.......l...c....s....l....L........p....t..A....G...s.....s...h..sc......................l...l...+...so...l......a..l......s.........D...........h..s......c......F....s....s...h.N..........p...l...a...s...p.a...Fspt................................P..A..R....o.sVt.V...s.t..Ls.....sh.hlE.I-slAh.h............................................................. 0 905 1902 2685 +4155 PF00317 Ribonuc_red_lgN ribonucleo_red; ribonuc_red_lg; Ribonucleotide reductase, all-alpha domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.61 0.72 -4.18 137 5796 2009-09-10 18:18:06 2003-04-07 12:59:11 16 66 4614 118 1297 4181 2797 77.40 30 10.10 CHANGED hthshhuhp.hL.tpcYLh..+sp..ps.........hhEsspphahRlAhslAt.p..................................................................................sphphsp....caa....phhsphpahsuoPslhNAGpsts .....................h..hoahuhp.ph.hspYhl.......+sp.......sG....p...........hhE.s..pphhhhVAhsLup............................................................................................schphAp......cha....chhsphca.p.sATPThhNuGpsp.p.................................................................................................................... 0 412 816 1098 +4156 PF02867 Ribonuc_red_lgC ribonuc_red_lgC; Ribonucleotide reductase, barrel domain Finn RD, Griffiths-Jones SR anon Prosite Family \N 19.50 19.50 19.50 19.60 19.40 19.40 hmmbuild -o /dev/null --hand HMM SEED 538 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.54 0.70 -5.95 136 8373 2012-10-01 23:28:04 2003-04-07 12:59:11 10 91 5010 124 2033 6508 10158 423.00 26 65.30 CHANGED loSCFl........s.hsDol.....cuIhcshppsuhlpKtuGGlGlshop.lRspGu.lpussususGllshh+lhssssphlsQuG.pRpGAhslYLcsaHsDIh-FLs...hK.ppsuc-phR..........................................s+sl.hulhlsD..hFMc....tlcp.......stpWsLhsPpp..........................................l.phaGp.......capp.Y.ph...............................pps.......htc..plpA+cLappIhpsth-oGpPalhacDssNppssp.....p.............p.....h.........GpIpsSNLCsEIsp.tp..........................................................phusCNLuSlNLsphlpts............................................................................hshcplccslchssRhLsslIDhstaP.l.pscp............................sspcpRslGlGlhGLsshLhthtlsYsSp-AhphscplhEtltatAlcuShcLA.c.cpGsat...................sa..csStaupG....h.hchh..................................p............................................................................................WppL+ppltph.GlRNuhhhAlsPTuohS...ls.ssosulEPhhuplas+pshsst......h.hss..h.ptlpp..............h..t.ht.l.......................tsl.sh..tl.tphhth..ac.o.Aa.......-l...s.cthlchsAstptalDQu.Shslal................s..cs..ohpclpphahhAactGLKo.hYYh .......................................................................hsSCal.........thtD..sh..............puI..h.ts.h.pp.hh.lsp.uuGlu...hthup......lRthGs.lp....s.h..p.s..usGhlsh.h.+.hhps........................sh.thssQh.............................G.h...........Rt.GA......h...........s.lalphaH.Dl.p.Fls...h+.ppp...u.pc.t.+..................................................................................................hpsh..shulh.l..s...c...hhhp.h...s.p.........stphh.L...hsP..p...........................................................................................t..h.................................thpt.Y......th...............................tp............h.h.p...pl.pA.pclaph.l.h.p..t.hpoG.Pal.hhhD......psNptss.........................................u......lp.....S..NLC..sEIh...t.............................................................p...hsCs..L.........u.........S.........hNlsthhp.......................................................................................ch.t.c.......hsphh..h....+h....L.....stl....h......D......h.....s......h..s......h...thpp............................ss...t.t+slGlGhh..shts.h...L.ht.tl.Ys....S.....p...u....h.phsphhh.hlthhshp..sS.plA..c..ct..s..s.a.......................a..p.t..o.....h..s..p....u.....h.........ph..ht..............................................t...............................................................................................WttLtt..tl..p...........Gh...h...Ns.h.Al.hP..o..sohS.....ls...........s.....s.o.s.ulc.P.hs...h.h.........+p..hpst...................................................................................................................................................................................................t...........h...h......a.p...s.sa..................ch...s.pthlchhu..hp.........p......alD.......Qu.Shsl.h................s....ph.......shppltphhhhAa.c.........h.G.....l.Ko.hYY.................................................................................................... 0 708 1339 1729 +4157 PF00268 Ribonuc_red_sm ribonuc_red; ribonuc_red_sm; Ribonucleotide reductase, small chain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.50 20.50 21.10 20.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.77 0.70 -5.40 13 7021 2012-10-01 21:25:29 2003-04-07 12:59:11 16 12 4393 189 1340 4104 4718 239.00 25 82.28 CHANGED tshLpcstchF..ashcas-IhphhpcuhuspWhspElsLucDhpDhcp.LstsEppFhpalhAFhuAuDslVN.NL.schosphp.p-hcaaYs.QhuIEslHScsYShllpsLh.pDtpp+pphhsuIhspPsIpcKscWlhchls-scu.huE+llsFhhlEGIFFuuSFASIhaL+pRGLMsGls.oN-LISRDEulHTshAChlYsphh...c+Ps.chI....hclhpEAV-lEpcFlp...spsPsthhhhs...sctlpQYlcasADRLLsslGhstlassss..s.s.hs ..................................................hh.....................p....h.....p.h.app...h.s.p.aWhspc.ls..l..op.D.h.s.a.......p.........p.....L......sttE.pp.hhhpshs....hhshhD.o...l.s..p.s..h.......sl..h....h..p..h..s...c.......c..shhst..th...E.s.lHu....cuYohlhp.......s...........l............s.............s....................t...........t...............t.......h....a.......p.......h.....h...............p...................................................l...........p...c....+...........s............p...........h.............l...........................p.............h.............h.................t.............t.............t..............t...........h....................p............p...................................l.........u............s.....h.....l......Euhh....F.a.u..u....F.h.h....h.h...h............t..p.........p....s..h.....h.ss.uph.....h..chI.RDE.u.lH.h....h....h.h.t....h....h....h....p.........h....h....t....t.......p....p...........t.............p.....p....l...............hplhh...p....h....h.p.........E...h...p...asc...................h..h..s.t.......sh...........tp..h..ppalca.uspth.tlGhc..ha..sp.........s.................................................................................................................................................................. 0 402 783 1091 +4158 PF00545 Ribonuclease ribonuclease; ribonuclease Bateman A anon SCOP Domain This enzyme hydrolyses RNA and oligoribonucleotides. 21.30 21.30 21.70 21.30 20.30 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.31 0.72 -3.91 80 855 2009-01-15 18:05:59 2003-04-07 12:59:11 15 18 696 335 302 839 28 88.20 33 48.99 CHANGED LPspsp....pphshhptGhs.............sh.tpsGshFtNpEp......................hLPttssG....hYpEhsl.h.s....G.....sRGA..cRlVhusp...........uh.haaTsD.HYsoFpph ..................................................................................LPtphhpphphhptGhsh..................sh.spsGphFsNpEt......................................tLPptpsu........hY+EhsVth...s...................G......sRGu...cRllhuss.............uhhaaTsD...HYpoFpph........................ 0 68 167 250 +4159 PF03631 Virul_fac_BrkB Ribonuclease_BN; Virulence factor BrkB Bateman A, Eberhardt R anon Pfam-B_4424 (release 7.0) Family This family acts as a virulence factor. In Bordetella pertussis, Swiss:Q45339 is essential for resistance to complement-dependent killing by serum [1]. This family was originally predicted to be ribonuclease BN [2], but this prediction has since been shown to be incorrect [3]. 25.60 25.60 25.60 25.80 25.50 25.50 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.98 0.70 -5.03 155 5654 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 3939 0 1383 3900 630 252.90 22 76.58 CHANGED tpcchshhAuu.luaasllulhPhlhhhhulh.s........h....h...........thhpplhsh.......ltph...........................................hP.....t..sh.phlpshl.p.phh......pps.....................ttulluhuhll..........asusshhpslppuhNp....ha.............ph..pp.t+....s.........hl...ppphhshshh.l....hhsllllluhslsshhth..........................hhsh......h...hhphhph........lshhlh.hhh.hshlYphhPs.t..chphchsh.Gulluulhahlsphs...au.h.Ylsphs..sa.sshYG.ulu.ullllll..WlalsuhllLhGAplssshpptp ...............................t..pphsthAusluYaslLSlhPlLhlhhulh.u.h...hs................th.tpplh..sh......lhs.....................................................................lP........s..tss..shlpshl....p...p.l.t....sps................psu.l...l.ulshll...........aou.thhss.....lppul....Np....la......................cs...pc...pR....s.............hl...hphhhs.h....hhh..l....hhs.l.h.lh.h.ulslss..hhts.h.th..........................................hhsh..........th.......hhphl.ph......lshh.lh.hlh.ahhl.YhhlP....s...t.....+s................p.h.+t.s..l..hGuhluul.h.a...lh.p.hh...Fu..hYl.s.p.hs..........sh...sthYG...uhu..sl..l..ll..hl..WlahsshllLhGAtlsusht...h.............................. 0 407 908 1180 +4160 PF00825 Ribonuclease_P Ribonuclease P Bateman A anon Pfam-B_1558 (release 2.1) Family \N 22.30 22.30 23.40 23.00 22.20 22.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.61 0.72 -4.17 141 4169 2012-10-03 01:04:38 2003-04-07 12:59:11 13 4 4132 9 899 2569 2063 107.70 28 88.95 CHANGED h.s.+ptRLpppp-Fppla.ppup+.hts...pth..llh........htt..............................shsts..........................Rl..GlsluKKhup.AVpRNRlKRhlREsaRhppsp.ls.......uhDlVlls+.ps....ht..phshpplpp.........plpp...hhp+ht ..................................................pt.Rlpppp-Fptla....pp.u....p.....p....hu.s.....tp.h....llh...........hht......................................p......p.tps.......................................RlGloVuKK.hup.AVpRNRlKRhlRcs.hRh.t..p.p.p..ls..................shDhVl.lA+..ps...........st.....phsh..p.p.lpp.........sLp+.lhcp..t................................................ 0 309 600 766 +4161 PF00445 Ribonuclease_T2 ribonuclease_T2; Ribonuclease T2 family Finn RD anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.66 0.71 -4.68 116 3203 2009-01-15 18:05:59 2003-04-07 12:59:11 13 17 1368 27 551 2843 56 162.70 25 76.12 CHANGED aD.........a.ahLs...........lpWsssaCt...................t......tpC........sttstsF.....slHGL.WPs..................p........ts.............h.sp.....C.........h........................slhsphpphWPsh.t.p.........shhpHEWpKHGTCus........................................tpsYFstshplh..pphsl.shLtpt..lhs.spstph.........shsplpsAlppss.....s..ttsslpCpp...................................hLpEltlChsps..............h...sCsp ..........................................................h.hs..pW.sshCt.....................................t........C................t.a.....TlHGL.WPs..............................................ths...t...................h..p.sCsss......hs......p.....................................................phhs.p.L...pp..hWPs...h.tsts..................phW.ctE..a.pKHGoCst.s.........................................t.tYFp.psh....phh................p.......ph......s.h.....phLtph......lhs...t..spph.......shpclt.sslppsh..........t...ttstl.pCpts.......................................................h.pl.hs............................................................................. 0 163 305 441 +4162 PF00687 Ribosomal_L1 L1; Ribosomal protein L1p/L10e family Bateman A anon Pfam-B_115 (release 2.1) Domain This family includes prokaryotic L1 and eukaryotic L10. 21.70 21.70 21.80 21.80 21.40 21.60 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.64 0.70 -5.00 255 5940 2012-10-01 21:21:48 2003-04-07 12:59:11 16 17 4958 95 1878 3991 2982 212.10 38 87.56 CHANGED lphlpp.....h.............................................t..ttcFscol-ltlsLph.........sppsspph...........psslsLP+.sht............+sh+.l...........sVhs.psp.ptp.......................pstthsuc.lluh--Ltpp........hc.pth....tcaDhhlAsschhshls.+..LGphL....us+GhhPsPh.....s.lsh.....................................s...lsptlcch+suphhhps.....+ss..slpstlGch..sh.sscclh-Nlpsllstlhph..t....hsKGh...l+slhlp..oTMusu .........................................................................................................................................................th.hthttththht.................................................chs...psKFcp..o..V-.lulpL..sl........Ds++u..D..Q.pl................R..Gsl.........sLPp.GTG............+shRV................h.VFupG.scsctt...........................scs.A.GAD.hVGh--Lh.cp............lp.pGh......h.cFDlllAoPDh.Mt.hVG..+...LG+lL....................GP+GLM.PNPKs........GTVTh..................................................................DVscAV.c.csK.u.Gp...l.p.aRsD...........KsG.hlHssIGKl..oF...ss..........-cLhENhpuhlsslh+tK...Ps..........suKGh..Y..l+plslo.oTMGs................................................................. 0 649 1169 1576 +4163 PF00466 Ribosomal_L10 L10; Ribosomal protein L10 Finn RD anon Prosite Family \N 22.00 22.00 22.20 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.06 0.72 -4.08 158 5754 2009-09-13 14:02:41 2003-04-07 12:59:11 15 11 4979 96 1716 3716 2281 98.80 30 50.84 CHANGED hspppKpphl..pclpp..hlppsp.slhlsch....pGlsssphpplRppl.+ps...ssphpVsKNoLhchA...lcp.ssh................t........l.......p..s.hl...pGssulhao..pp-sss.ssKhlpca.K ......................................ppKpthV....scls-hh.......c.....s.......ut..usll..s.-h....c.GloV..sph..opLRcph.Rcs..............ssphcVs...KNTL..h++A....lct..ssh-.......................s................L.....s...c.hh....sG.Psu.ls.Fo....p-.-..ssu.sA+llpcFuK............................ 0 591 1068 1434 +4165 PF00298 Ribosomal_L11 L11; Ribosomal protein L11, RNA binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.40 21.40 21.40 21.70 21.30 20.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -8.99 0.72 -3.83 171 5449 2009-09-13 02:34:37 2003-04-07 12:59:11 14 14 4897 169 1606 3179 2185 69.30 49 46.79 CHANGED KTPPsohLL+KAAsl......c........+GSs..pP............s+pp.VGplohcQltEIAchKhs.DL....sussl-uAh+hlhGTA+SMGlpV ..............................................KTPPAu.sLLKKA.AGl.....c........+G.Su...cP.........................s+sK..VGp.lTpsQlpEI..........AcsKhs....DL.....sAs.slEuAh+hIt......GTARSMGlsV........ 0 555 1012 1339 +4166 PF03946 Ribosomal_L11_N L11; Ribosomal protein L11, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The N-terminal domain of Ribosomal protein L11 adopts an alpha/beta fold and is followed by the RNA binding C-terminal domain. 19.60 19.60 19.60 20.10 18.90 18.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.56 0.72 -4.51 167 5468 2009-01-15 18:05:59 2003-04-07 12:59:11 9 15 4889 114 1604 3175 2123 58.80 56 39.60 CHANGED hlKLplsAGpAsPuPPlGPALGptGlNIhpFCKpFNspTpc....hp...Gh.lPVhITVas..D+SF ..........hlKLQlsAGpAsP....uPPVGPALGQ.pGl.N.....IM.cFCK...tFNA+T.pc................pt.Gh..IPVhITVYp.D+SF................. 0 555 1008 1337 +4167 PF00542 Ribosomal_L12 L12; Ribosomal protein L7/L12 C-terminal domain Bateman A anon SCOP Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.08 0.72 -3.97 191 5164 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 4749 15 1334 3031 2145 67.20 57 51.44 CHANGED EFDVlLsss..GspKIsVIKsV....Rs.lT.GLGLKEAK-LVEuA...Pps...lKEu..luK-EAEclKcpLE-AGA.pVElK ..................EFDVlLpuA...Gs....pKltVIKsV..........Rp.l.....T.GLGLK...EAK-LV-uA...Pp.s...lKEu..VuK.--A.EplKppLEEAGA.pVElK.......................... 0 469 872 1139 +4168 PF00572 Ribosomal_L13 L13; Ribosomal protein L13 Bateman A anon Prosite Family \N 21.10 21.10 21.30 21.70 20.30 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.59 0.71 -4.08 115 5540 2009-01-15 18:05:59 2003-04-07 12:59:11 13 13 4945 229 1626 3368 2257 124.90 46 81.05 CHANGED WhllD..AcsplLGRLAoplAphLtGKp+s.apPt..hssGD..hVlVlNA-KlhlTG............pKh...pp+hYhph..oshsGuh+p.shpphhp.+..pPpcllc....+A......V+GMLP+..sphG+phhc+L+lYsGstHs.atst......pshtlph ..............WaVlD..Aps..ps.LGRLAoplAphLRGKpKspaT..PH..lDsGD..a.V.IVlNA-KltlTG............pKt...s.c.KhYY+H..osas.....G.GlKphoh.tchhp..+pPc+llE..........pAV+GML..P+....ssLGRthhc.KL...+..VYuGsEHs.HsAQpPpsl-.l...................................... 1 568 1025 1367 +4169 PF01294 Ribosomal_L13e Ribosomal protein L13e Finn RD, Bateman A anon Prosite Family \N 24.90 24.90 25.90 25.00 21.80 24.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.11 0.71 -4.81 6 596 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 416 4 328 576 5 161.50 47 82.53 CHANGED shIhpsHF+K+WQphVKTWFNQPARKsRRRtARptKAt+IuPRPsuGsLRPlV+sPTl+YNhKVRuGRGFoLEELKuAGIst+aA+TIGIAVDHRR+N+SlEuLQsNVQRLK-Y+uKLIlFPRcsp+s+ts-ussEEltpATQlpGshhPIpp.pt+plch+clocctKpapAFssLRht .............................l.psHF+KcW......Q.......p......h.......V+saFsQPu.+K.hRRRpuR.tKAttluPR.P.s.s.......LRPlVRsP.T.l+.Y...N....p....K...lRsGRGF.oLp.ELK.uAGlst+hApTIGIuVDtRR+N...+...S.EuLp..tNVpRLK.p.Y+uK..Ll...lFP.+..+.s..p..t....s.+t.....G......D......u.....s..s...E....-.....l...t..h...A..............s.....p......h....p......s............h.hPltp......h.ht........c....h...hhloc....p.....c....thp..AatpLR................................................................................. 0 130 199 274 +4170 PF00238 Ribosomal_L14 L14; Ribosomal protein L14p/L23e Finn RD anon Prosite Domain \N 23.60 23.60 24.00 23.60 22.70 23.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.89 0.71 -4.36 103 6114 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 5540 233 1569 3096 2117 119.80 57 97.21 CHANGED MIphpohLpVuDNSGA........KplpCIp.Vl............st.p....ppphAslGDlIlloV.Kcu.................sscl+K....uplh+AVlVRs+Kt.h+.RtDGshlpF--NAsVlls..sp.s.p......PhGTRIh...GPVu+El.+..........pc.p........asKIsSLAspll .....................................MIQ..pohLpVADNS...GA.............................................+clhCI+VL....GG.S............pR+aAslGDlIl..soV.KcA...hP.................pGpVKK....G-Vl+A..VlVRT.+Ks..hR..RsDGShI+FD-NAuVlls..sc..t.p.................PhGTRIF...GPVuRELR..........pc.p........FMKIlSLAPEVL........................... 0 520 978 1314 +4171 PF01929 Ribosomal_L14e Ribosomal protein L14 Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the eukaryotic ribosomal protein L14. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.50 0.72 -3.63 53 552 2009-01-15 18:05:59 2003-04-07 12:59:11 12 7 422 10 304 545 2 73.10 34 48.58 CHANGED .RpshslpplpLTchplc.lsRsu+otsl+KAhcpuclppKWsposWAKKlss+cpRusLoDF-RFKlhhA.++pRsph ....Rpshsl+plpLTchtlc.ls..+uu+scsV+KAaccu.clppKWtposWAKKltspc+RuphoDF-RFKlhhsKptRp........................ 1 106 170 248 +4173 PF00827 Ribosomal_L15e Ribosomal L15 Bateman A anon Pfam-B_1567 (release 2.1) Family \N 21.70 21.70 21.90 22.50 21.60 21.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.07 0.71 -4.80 57 759 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 568 73 409 697 98 176.40 58 88.23 CHANGED uhYpYlcEhW++.ppshh+hLh+.Rhhca...............Rp.sulhRlpRPTR.D+ARpLGYK.AKQGhllhRVRVRRGGR++shs+uup..+P+phGlsplps.s+slQtlAEERuuR+.hsNLcVLNSYWVGpDupYKaaEVILVDPpHsuI+sDsclsWIspssHKtRthRGLTSAG+KuRGLt.s+G.+tspKsR.sShRAs...a+ ......................GAYKYlpE.La+KKQS.D.Vh.RFLhRlRsWpa..........................................RQh.sslpR.ssRPTRPDKARRLGYK.AK.QGaVlYRlRVRRGGRKRPVP.KGusYGKPpppG.VNpLKh.tRuLpulAEER.sGR+.husLR....VLNSYWVspDuoYKa..aEVILVDPtHpAIRpDPchsWIs.ps.V.HK.H.R.EhRGLTSAG+Ku.RGL..sKG.Htappsh..sS.pRtsW........................... 0 129 221 315 +4174 PF00252 Ribosomal_L16 L16; Ribosomal protein L16p/L10e Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 20.70 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.77 0.71 -4.40 141 7936 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 7072 230 1613 4271 2288 118.30 47 88.89 CHANGED P++s...............KaRK......tp+..s+......h.pGtutt.........Gspls.aGc..............auLpuhEsu.hloucQlEAAR........huhsRhl................++...suc..........laIRl...FPcpslopKPh-sR.MGpGK.........GssctWsAhV+s.GpllFEl...s.G.l..s........phA+cAl+hAupKLPl+s..+hlt ..............................................PKRs.KaRK......pa+....G+....h.+Ghupt........................................Gspls..FGc..............auLpAl..Esu..hlTuR.QIEAAR.........hAhoRah..................................+R........sG+..........lWI+l..FPc+slTt+...shtsR.MGp..GK...................G..ss-hWVA.l.ps.G+ll.aEh.......sG.V..sc.........phA+EAhchAstKLPh+sphl......................................... 0 541 993 1334 +4175 PF01196 Ribosomal_L17 Ribosomal protein L17 Finn RD, Bateman A anon Prosite Family \N 20.20 20.20 20.50 20.50 20.10 20.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.29 0.72 -3.42 153 4802 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 4671 165 1229 2613 2175 101.60 51 72.16 CHANGED hl.+NhssuLlp.............................+c+IpTThsKAKElRphsE+lIT.................lAK....c..................s.s...............................lpuRRp..shuhl.......................p...scp..................h....................................lpKLFsc........lus+Yts.RsGGYTRIlK.hu..RpG................................DsA.MAlIELV ..............................MLRNhssuLlp.............................HEpIpTThsKAKElRph.lE+..LIT...............................LuK....c.......................s..s............................................................................................lts..R..R....Ahu..hl........................+...scp..................s....................................VpKLF.s-..............luP.R.a.s.s..RsGGYTRIlK.sG...R.pG................................DsAPMAlIELV............................................................. 0 426 794 1042 +4176 PF00861 Ribosomal_L18p Ribosomal L18p/L5e family Bateman A anon Pfam-B_495 (release 3.0) & Pfam-B_741 (release 4.1) Family This family includes ribosomal proteins from the large subunit. This family includes L18 from bacteria and L5 from eukaryotes. It has been shown that the amino terminal 93 amino acids of Swiss:P09895 are necessary and sufficient to bind 5S rRNA in vitro [1]. Suggesting that the entire family has a function in rRNA binding. 27.80 27.80 27.90 27.90 27.70 27.70 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.24 0.71 -3.90 33 5752 2012-10-02 16:33:16 2003-04-07 12:59:11 17 13 5132 229 1513 3405 2152 116.10 39 83.74 CHANGED scpctpp+++hhhp++hhssst+sRLlVh+SN+alhAQllssststslssASshst-Lpp..s.psshsNhsuAhtlGhLlAcRAlp........cGl.....................stlsaspsuhpYtGRVtAlAcuA+EuGLph ..............................pchtt.p.R.+tp.h.p.p.+...h......t.s...s.....t..p....sRLsVaR.Ss+cIYAQ........lIs...sssut..s..Ls..u.ASol.-.+.-lpp.......................................hs..h..ssh-AAttVGcllAcRAhp........................tGl..............................................ppVs.FD.R....u.G.a.h.Y.HG.R.VpAL...A-AAREuGLpF..................................... 0 490 947 1257 +4177 PF01245 Ribosomal_L19 Ribosomal protein L19 Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 22.80 22.30 21.10 21.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.23 0.71 -4.29 127 4774 2009-01-15 18:05:59 2003-04-07 12:59:11 15 7 4673 153 1203 2595 2397 113.20 52 88.03 CHANGED ts.llc...plEppph.....+..ps..lPpFcsGDTV+Vpl+lh.........E......Gs.........................+-RlQsFEGlVIu++s.t..Gl..spoFTVRKl.StGlGVERhFPlaSPtlcpI-VlR..+G............+VRRAKLYYLRs....hpGKuA.R.IKEch ......................................plIcpl-p..pQl.......+....p-.lPsF+...s..GDTVcVp.l+Vl.........E.Gs.........................+.cRlQ.sFEGV..VIu++s..t..Gl...spsFTV.RKI.S.s.G..l.G.VERsFPlHSPtl..-pIEVhR...+.G........................................c..VR..RAKLYYLRs....hpGKAA..R.IKEp.h............................... 0 415 776 1019 +4178 PF01280 Ribosomal_L19e Ribosomal protein L19e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 29.00 29.00 19.40 19.00 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.79 0.71 -4.43 63 764 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 542 78 423 735 79 138.90 55 54.02 CHANGED ssLphQKRLAAslLp......sGpsRVWl..DPpchs-lusAhoRpsIRcLIc-GhIhtKsh....................pupSRuRsRcpptp++.tGR++GhGpRKGspsAR.hspKphWhpRIRslRRhL+chR-stc.ID++hYRp...LYh+AKGspF+shppLhpalc.ptth.c ..............s.sL+hQKRLAAuVLp......CGcpKVWL...DPNEhsEIusANS.RpsIRK..LlKDGlII+KPs....................psHSRuRsRc...hptA++..KGRHpGhG...KR.KGTtsAR.hPp.......KhhWMRRhRlLRRLL+.+YR-ucK.ID+Hh.YHp...LYh+sKGNsFK.NKRhLhEaI++tKA-................ 0 147 245 348 +4179 PF00181 Ribosomal_L2 L2; Ribosomal Proteins L2, RNA binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 27.10 27.10 27.10 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.55 0.72 -4.01 50 6547 2012-10-03 20:18:03 2003-04-07 12:59:11 18 12 5878 241 1538 4221 2237 76.40 54 28.40 CHANGED GRNsp.G+ITsR++GGGpKp.tYRhIDFpR.spts......l.uhVhsI-YDPsRsAhIALlpats...Gccp.....YILuspGLplGsplhu .............................GRNsp..G+ITs.RH.pGG.G.HK+.tYRlIDFKR..sK.cs.............IsupVtpI....EYDPNR...oA.pIALl.p.YtD.....GEKR...........................YIlAPc.GlpsGsplh................................... 0 516 966 1289 +4180 PF00453 Ribosomal_L20 L20; Ribosomal protein L20 Finn RD anon Prosite Family \N 24.10 24.10 25.20 25.00 23.10 22.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.37 0.72 -4.12 129 5432 2009-01-15 18:05:59 2003-04-07 12:59:11 13 10 5206 158 1102 2738 2165 104.60 52 89.30 CHANGED sRV...KpG..sssRpR+KKlLKhAKGahGs+uphaRsApppVh+AhpYAYRDR+p+KR-FRpLWIsRINAAsRtpG...l..oYSpFlpuL+pusIp......lNRKhLA-lAlpDspuFspl ...................hRVKpG..ssuR.tR+K.KlLKh.AKGYhGu+pplaRsApptV.h+uhpYAYRDRRp+KRsFRpLWIsRINAAARtsG...............l...........SYS..+hhpGLK+A...sl-......lsRKhL.A-lAlpDtsAFssl.................................... 0 386 724 933 +4181 PF01157 Ribosomal_L21e L21e; Ribosomal protein L21e Finn RD, Bateman A anon Prosite Family \N 21.60 21.60 22.00 23.90 21.10 21.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.14 0.72 -4.20 6 896 2012-10-01 20:16:17 2003-04-07 12:59:11 13 6 529 75 505 859 81 94.60 51 64.65 CHANGED spScGhRRtTRhhFpRcFRcHGlssloThl+pYKcGDhVcIKssuSlQKGMPHKtaHG+TGpVaNVs.uulGlhlNKcVpsphl.KRIplpsEHI+.Sc ...........po+GhRptTRhhFuR.sFR++.G..s.l.P.Lu.TYh..+hYKhGD....hVDIK...s........sGuVQK.GM.P.H.KhYHGKTG.cVaNV..Tpc.AVGlll.NKpVps....+hlt.KRIs.VRlEHl+cS................................. 0 137 228 326 +4182 PF00829 Ribosomal_L21p Ribosomal prokaryotic L21 protein Bateman A anon Pfam-B_1297 (release 2.1) Family \N 21.10 21.10 21.10 21.10 20.80 20.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.94 0.72 -3.86 67 4725 2009-01-15 18:05:59 2003-04-07 12:59:11 16 12 4593 156 1157 2545 2044 95.10 44 82.00 CHANGED MYAllcoGGKQa+VptGchlhlEKl.ss.csGsplphscVLhluss.tpsplGpPhlcG..ApVpAcVlpc.s+ucKlhlaKa+p+Kph+++pGHRQ.aTcl ...................................MYAllco.GGK..Qa+VptGphlh.l.E......KL......ss......c......s............G.......pp.lpF..s..c..VLhVuss.......p.lplGs.P.h..V..sG...ApVsAc.Vlpp....G+.u.c..KlhlaKa+.p.+Kp.h.++.+pG.HRQ.aTcl.................... 0 416 764 986 +4183 PF00237 Ribosomal_L22 L22; Ribosomal protein L22p/L17e Finn RD anon Prosite Domain This family includes L22 from prokaryotes and chloroplasts and L17 from eukaryotes. 20.10 20.10 20.30 20.40 19.70 19.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.24 0.72 -4.09 126 6446 2009-01-15 18:05:59 2003-04-07 12:59:11 14 18 5594 236 1745 3781 2204 106.90 41 78.62 CHANGED Aht..+tl+lSscKsphlschIRGpslpcAlshLchs....................................................sc+uuphlhKlLcSAhANA..p.....psps.h-.s-pLhlpclhlscGsphK.RhpPRApGRAs.lpKcssHlpllls- .....................................Aht+tlRhSs..pK.s.RhVsDhIR..G......+pls..cAl.slLpas.........................................................P.c.+.A.Athl....tK.lL..pSAl.A.NA...E.............................pNtu...hD....hc....s..LhV..........scsaVs.c.G.s........s.......h.K..Rh.pP..RA....+G...RAstIp..KcTuHITllVs....................................................... 0 576 1053 1415 +4184 PF01776 Ribosomal_L22e Ribosomal L22e protein family Bateman A anon PSI-BLAST P56628 Family \N 23.30 23.30 23.70 24.20 22.10 22.20 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.55 0.71 -4.06 35 584 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 350 6 314 534 5 110.50 52 81.65 CHANGED p.KK...ssh....KFslDCopPsEDs.IhD...lusF......E+aL+-RIKVsGK...........sGNL.G.....ssVsls+p.csKlsVsoclsFSKRYLKYL........TKKYLKKssLRDWLRVVAo..sKssYELRYFpl.spsp----- ......................t.pK.sh+FslDCopPVEDs.IhD...su.sF......EpFLp-RIKV.s.G.K...........sGNL..G......s.s.V.sl.p+p..KsKIs..Vs.S..-......h.s...F.S.KRYLKYL...........TKKY.LKKps......LRDWLRVVA.s..sK..s..sYELRYFpI.sp---E--.c...................... 0 100 168 246 +4185 PF00276 Ribosomal_L23 L23; Ribosomal protein L23 Finn RD anon Prosite Family \N 21.10 21.10 21.20 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.83 0.72 -4.04 12 6180 2012-10-02 20:46:34 2003-04-07 12:59:11 15 12 5417 233 1734 3501 2091 88.90 36 80.46 CHANGED hcll+hPllT-K.shphhcp.NphsFhVshcusKhclKcslcplasV+VhsVNThlh.sKhhR...Gph..hhp..hKKAhlpLp.cs.pthshhsch .....................................h.cllhtPll.TEK..uhth............h.........-.c.......s......ph.sFpVsh...cA.sKspIKpAVE.pl.F...s.....V..c..VtsVN.T.hs...........h....p....u.....K.....h........K.......R........hG.....p........h.....h....G......p............p..t..c...hKKAhV.oLp.tG..ppl.h...t.................................................. 0 536 997 1350 +4186 PF03939 Ribosomal_L23eN Ribosomal protein L23, N-terminal domain Finn RD anon DOMO_DM01622 Family The N-terminal domain appears to be specific to the eukaryotic ribosomal proteins L25, L23, and L23a. 20.50 20.50 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.78 0.72 -3.94 47 612 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 318 8 359 542 1 56.00 52 33.22 CHANGED tss..ts+AtpA.............pKAllKG..spu.........p+p.+KlRTSspF+..................RPKTL+hsRsPKYPR+SsPcp .........................................................................................s.pspuKAhKA........pKAVlKG...sau......................+Kt..+K.lRTSsTF+..................RPKTL+LpR.pPKYPRKSsP+.............. 0 80 141 211 +4187 PF01246 Ribosomal_L24e Ribosomal protein L24e Finn RD, Bateman A anon Prosite Family \N 20.80 20.80 20.80 20.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.37 0.72 -4.12 7 1060 2012-10-03 05:12:49 2003-04-07 12:59:11 15 10 541 76 653 956 68 67.60 43 44.21 CHANGED h+schCpFsGtcIYPG+GhhFlRsDupVFhFtsSKCcp.F+.++pPR+lsWTshYR+pHtKs.stEstccp ...............M+hchCtFsutpIYPG+Ghh.aV.....R....sD.uKlFpFp.sSKC.c.ps.F.p.h+....+NPRKlpWTthaR+tptKths.c...................................... 0 228 377 537 +4188 PF01386 Ribosomal_L25p Ribosomal L25p family Bateman A anon [1] Domain Ribosomal protein L25 is an RNA binding protein, that binds 5S rRNA. This family includes Ctc from B. subtilis Swiss:P14194, which is induced by stress. 21.50 21.50 21.60 21.90 21.40 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.86 0.72 -3.86 184 3556 2009-01-15 18:05:59 2003-04-07 12:59:11 14 2 3478 158 851 2159 2048 87.40 33 49.38 CHANGED LpAphRp....p..hG.KuuuR+LR+.pGtlPAVlYGts.pc.s.....hslslstpcltchl....ptshtssl.lsLp.lc....G......pp.tpsll+-lQhcPlps.plhHlDF ............lpuphRp.....p..hG..KuAuR+LRc.sG.plPAllYGts..pp..s........................lslplcppclhphlt...ptt...h...h....s.s.l.lsl.s.l.c.G...........cp.hp.Vllp-lQhcPh.+s.plhHlDF................ 0 295 572 730 +4189 PF01016 Ribosomal_L27 Ribosomal L27 protein Bateman A anon Pfam-B_1340 (release 3.0) Family \N 20.50 20.50 20.80 21.10 20.00 20.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.54 0.72 -4.19 88 4787 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 4661 166 1215 2396 1851 80.80 60 81.70 CHANGED AHKKuuG.So+N.GRDSpuKRLGVK+aGGphVpuGsIIlRQRGT+hHPGpNVGhG+DaTLFALh-GhVcFppp......pp...++hVsVh ...................AHKKuuG.ST+N.GRD.SpuKRLGVK+aG...GphVpAGsIIlRQ.R.............G.............T...+hHPG.sN.....VGhG.+D.cT....LFAhsDGhV+FcpK........s+s...RKhVSV.......................... 1 418 783 1027 +4190 PF01777 Ribosomal_L27e Ribosomal L27e protein family Finn RD, Bateman A anon PSI-BLAST P51419 Family The N-terminal region of the eukaryotic ribosomal L27 has the KOW motif. C-terminal region is represented by this family. 22.60 22.60 23.50 22.90 21.40 20.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.14 0.72 -3.86 42 533 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 381 6 297 498 5 82.70 49 58.62 CHANGED KVTK+MuK++ltKRS+lKPFlKllNYNHLMPTRYol.Dl..t.Ksl......ls..p-sh+-ssp+ccA++pl+ttFE...ERa..........KsGKN+WFF.pKLRF ...................KVT+pMuKKKlsKRSK.lKsF..lKllNYNHLMPTR..........Yol.Dl......Ksl........ls..p-sh+-ssp..+..c....cA++p.....sKtt.hEERY............csGKN+WFF.pKLRF.................................................... 0 107 164 238 +4191 PF00830 Ribosomal_L28 Ribosomal L28 family Bateman A anon Pfam-B_1561 (release 2.1) Family The ribosomal 28 family includes L28 proteins from bacteria and chloroplasts. The L24 protein from yeast Swiss:P36525 also contains a region of similarity to prokaryotic L28 proteins. L24 from yeast is also found in the large ribosomal subunit 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.65 0.72 -4.14 114 4841 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 4485 146 1176 2313 1891 59.80 39 73.86 CHANGED +c.CplTGKpshh.GNsVS.............H..upp......+T+RpatPNLppp+lassp.schl+l+los+sL+o..lcKpG ........................+hCplTG.Kpshs.GNs.hS..............H..upN.............pTKR+ahPNLpphRhhl.-...s...+h.h.+.l..pVSs+uL+s...lpt.............. 0 402 770 1005 +4192 PF01778 Ribosomal_L28e Ribosomal L28e protein family Bateman A anon PSI-BLAST P17702 Family \N 21.60 21.60 22.10 22.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.56 0.71 -3.69 67 772 2009-01-15 18:05:59 2003-04-07 12:59:11 12 8 384 5 475 736 4 113.20 30 53.54 CHANGED LhWpll..+psss.FhhKpp.......stpFop-shNlsshsoh+asG...................................Lsss+slsl..pss..sG....tlhlshKps...cpsppPu.+thpp.....hplsp..sh+cuhcplcphhpt.....aRt-Ltpt...uhpRhot.............lhpst ...........llWpll..pps..s.F...h.lKp.p...........sppFs+.......p.hNlsulsshp.ss...................................LuNs.+hssl...pts...pG..................slhlh.h.Kph...ccsp..tPu...+h....ap+..............splsc...shc+slpplccplth...........apt.Lt.pt.....shpRhotlhp..h...................... 0 169 262 389 +4193 PF00831 Ribosomal_L29 Ribosomal L29 protein Bateman A anon Pfam-B_1296 (release 2.1) Family \N 24.50 24.50 24.50 24.60 24.30 24.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.53 0.72 -4.28 156 5085 2012-10-02 11:59:50 2003-04-07 12:59:11 18 7 4872 242 1354 2614 1785 57.70 41 77.08 CHANGED spElRphos.cELpcclt-L+pELhpLRhppus.Gpl..ppsscl+plR+sIARlhTlls-+ ................hpELRphos.-ELppc...LtpL...KcELFsLR..hQtAT...GQL..ppsscl+pVR+sIARl+TllpE+............. 0 475 879 1141 +4194 PF01779 Ribosomal_L29e Ribosomal L29e protein family Bateman A anon PSI-BLAST Q24154 Family \N 20.70 20.70 21.30 25.30 19.20 18.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.16 0.72 -4.09 30 523 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 333 6 267 471 3 39.40 67 40.99 CHANGED KSKNHTsHNQ.N+KAHRNGIKKP+c..pRa.ShKGhDsKFL+N .........KSKNHTsHNQ.o+KsHRNG.I.KKP+s...pRY.....t.SLK..G.lDPK..FLRN........... 0 77 129 188 +4195 PF03947 Ribosomal_L2_C L2; Ribosomal Proteins L2, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 21.50 21.50 21.70 21.90 21.10 21.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.54 0.71 -4.34 58 6875 2012-10-01 20:16:17 2003-04-07 12:59:11 13 12 6045 240 1601 4450 2557 124.70 56 47.82 CHANGED lchGNslPLppIPlGThlHNlEhpPGcGGplsRoAGohAplluKps..p.ashl+LPSGEh.+hlpppC....hATIGtVuNhsppphslG.KAG+sRWhGh.....RPpVRGVAMNPVDHPHGGGEG+sthGR.psVoPWGpP ..............................IKsGNsLPLp....sIPlGTslHNlEl+P........G..+GG.......Q.lARoA..........Gs.AQ..........llu.......+...........-G................pYsplRLsSGEh.Rhl.ssC.........+A........TlGpVGNsp.+.t..h..slG.KAGpsRWhGh.....................RP.sVRGss.MNPVDHPHGG.GEG+ss..h....G..R....p...PsoPWGh................................... 0 533 1001 1341 +4196 PF00297 Ribosomal_L3 L3; Ribosomal protein L3 Finn RD anon Prosite Family \N 26.50 26.50 26.80 26.80 26.40 25.20 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.72 0.70 -4.99 13 5795 2009-09-16 22:34:17 2003-04-07 12:59:11 17 16 5080 235 1672 3675 2404 198.80 40 84.21 CHANGED KsGMTplh.........scs.tss.sVTllEssPssVltlpshs...............................................s-GhpulplshpphKcppsshplluHhphtsss....tKtalhEhplssu..........cpaE..pslsV.slFppsEhlDVtGlTKGKGFpGshKRWGhp+hPtpHupu..+R+lGslGA.hcPuRVhhosthsGphG.c+RT.hNhKIh+lss......................................csshlhlKGuVPGs+pplVpl+ ..........................................................................................KlGMTplF.............scs.G.hlP.VTVl-ss.s...shVsQ..l+oh-................................................................................................................................sD...G.Y...p....A..l..Q..l...s...h...s....s.....h........+........t.....p..p........s........s..K...P.....t........G...H......htKAss...................t..................spRhlh.E...h+.hsss............................pthp.l.Gp..plsl.-l.FtsGc.hVD.VoGsSKGKGFt.........GslKRasFpt.t.s.t...o.H.G.sph....HR.psGSl....Gs...tts..P....uR.VFKG++MAG+MG.sc+..VT..l.QNLcV.l.+VDs...............................................................EcsllLlKG.u.V.PGsptuhlhl+................................... 0 600 1058 1399 +4197 PF00327 Ribosomal_L30 L30; Ribosomal protein L30p/L7e Finn RD anon Prosite Domain This family includes prokaryotic L30 and eukaryotic L7. 22.40 22.40 22.40 22.50 22.30 22.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.32 0.72 -4.37 115 5033 2009-01-15 18:05:59 2003-04-07 12:59:11 15 10 4388 240 1490 2792 891 51.90 37 54.41 CHANGED tlplshl+u.hl...utt.cp+pslcsLGL++lspsVhhcsoP.slp.........GMl.pcVpaLV .....................l+lThh+S.sI....Gp..pp+tT....l.puLGL+...+lspo...Vhh.c.D.s.P.ulR.............GMlppVpahV.................... 0 474 898 1215 +4198 PF01197 Ribosomal_L31 Ribosomal protein L31 Finn RD, Bateman A anon Prosite Family \N 21.80 21.80 22.20 22.70 21.70 21.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.37 0.72 -3.98 163 5588 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 4410 108 1124 2737 1977 73.20 45 92.06 CHANGED M.KpsIHPc.Yppl.shpss.sGspFhopSTh...............pt..l...pl-lsSps..HPFYTG.c.p+h.lDosGRV-+Fp++a.utt ..................................M.Kp..sIHPc.Y+.p.l.sh.pso.sG..ptFhotSTh.................shshl...pl-lsSps..HPFYTG...+....Q+h..ls.s.sGRV-+Fp+RaGh.h............... 0 368 722 947 +4199 PF01198 Ribosomal_L31e Ribosomal protein L31e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 26.10 25.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.60 0.72 -4.31 59 847 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 531 78 443 768 76 81.00 49 67.39 CHANGED pcllTR-hTIsL++th+tsshKKRAP+AlKpIRcFsp+pMtTc..DVRlDscLNctlWu+GI+ssPtRlRVRluR+cs-p-suppc .....................................-VVTREYTIs.lHK+.lHGl..............sFK.KRAPR.AlKEI+KFAtKpMGTs..DV.RlD.s.cLNKtlWu+GI....+.s.VPhRl.RVRloR+.RN--.E-u.p...................... 0 137 224 315 +4200 PF01655 Ribosomal_L32e Ribosomal protein L32 Bateman A anon Pfam-B_1346 (release 4.1) Family This family includes ribosomal protein L32 from eukaryotes and archaebacteria. 25.00 25.00 26.20 25.80 24.60 24.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.22 0.72 -4.01 75 903 2009-01-15 18:05:59 2003-04-07 12:59:11 13 11 637 76 442 805 68 103.50 53 78.33 CHANGED +ph+FpRapSc+ap.Rlss..sWRKP+G.lDs+lRR+a+Gphh...hPpIGYGSs+csRtlhPS..GacchLVpNlc-L-hL......pscshAucIApsVuu+KRhpIl.c+ApcLul+VhNs ..................+sK+FhRHpSDRah.+lpp..sWRKP+G.IDsRVRR.RF..K.Gp.hh...MPsIGYGSsK+TR......HhlPo..GF+..+F.LVHNV.+-LEhL....hM.pN.+sasAE......IAHsVSu+KRptIl.cRA.tpLul+VsN......................... 0 133 228 323 +4201 PF01783 Ribosomal_L32p Ribosomal L32p protein family Bateman A anon PSI-BLAST P31558 Family \N 21.20 21.20 21.20 21.30 21.10 21.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.89 0.72 -3.79 171 5348 2012-10-03 10:42:43 2003-04-07 12:59:11 18 14 4994 161 1126 2495 1357 53.90 34 82.85 CHANGED AVPK++sS+o++chRRup.h.....plpss.sl................s.ssp..su.phplsHplssss.G..aYps+plhpt ...............AVPp++sS+o..++shR.Ro..+.h......pl.sss..sl................s.s.s..sG..-.....h.+.lsH...+...lshs...G.....hYpG+plh............................................. 0 378 726 946 +4202 PF00471 Ribosomal_L33 L33; Ribosomal protein L33 Finn RD anon Prosite Family \N 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.48 0.72 -3.72 117 6317 2009-01-15 18:05:59 2003-04-07 12:59:11 15 6 4846 154 1214 2452 978 49.00 45 90.31 CHANGED R...htlpLt..Cop..............sssppYsTsKN++spsc+LEl+KaCs........hspKHTlH+EtK ....................R.pIpLt..sot........................sss+pYhTsK..N..+..R..N..s.P-RLE..lKKasP........hs+KHsla+EsK............ 0 417 791 1027 +4203 PF00468 Ribosomal_L34 L34; Ribosomal protein L34 Finn RD anon Prosite Family \N 25.00 25.00 30.30 33.30 22.90 22.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.21 0.72 -4.28 143 4214 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 4149 154 1033 1670 249 43.90 63 86.81 CHANGED hKR.TaQPoph+RpRsHGFRuRMp.T+sGRcVLppRRtKGR+cLos ............MKR.Ta.Q.Poph.+RpRsHGFRsRMu.T+sGRpVLApRRsKGRKpLos.... 0 353 675 875 +4204 PF01199 Ribosomal_L34e Ribosomal protein L34e Finn RD, Bateman A anon Prosite Family \N 20.60 20.60 21.50 21.40 20.00 19.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.21 0.72 -3.76 52 630 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 462 6 352 585 12 89.50 47 77.16 CHANGED Ms...+lhh....++R.sYpT+SN+++hs+TPG..G+lVhpahKKpsshP+..CupCtph..LpGl.thRstchp+..hsKpp+pVsRsYGGshCspCl+c+Il+AFL ...............Ms..p.RlTaRRR..sYsT+SN+pRll+T.PG..G+LVh.ahKKpussPK..CGs..C.st+..LpGl.......slRPp.chtp..lS..+scKoVsRsYGGshCupCV+-..R..IlRAFL................................................ 0 122 194 280 +4205 PF01247 Ribosomal_L35Ae Ribosomal protein L35Ae Finn RD, Bateman A anon Prosite Family \N 21.00 21.00 21.00 29.10 20.90 19.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.25 0.72 -4.21 44 533 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 374 8 317 528 13 93.90 51 77.29 CHANGED LY..sKulahGY+Rup+NQppssuLlKIEGVss+c-upFYlGKRlsYVY+uppppp.........t.o+hRsI.WGKloRsHGNSG....sVRA+F+.pNLPspAh.GpplRlh ....................LasKuhahGY+Ruh+NQp.pT.uLlKIEGVps+c-sp.FYlGKRs.AYVY+A+pp..pp.....................so+hRsI.WGKVTRsHGN..SG....sVRAKF+.pNLPs+uhGtplRVh.................... 0 112 169 243 +4206 PF01632 Ribosomal_L35p Ribosomal protein L35 Bateman A anon Pfam-B_1156 (release 4.1) Family \N 22.20 22.20 22.30 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.95 0.72 -4.07 22 4520 2009-01-15 18:05:59 2003-04-07 12:59:11 14 1 4430 154 1091 2113 1842 60.70 44 86.05 CHANGED sKhKTp+uusKRFKtTusG.thhRc+As+pHlLtKKosp+K.R+Lcppshlsps-sctlcthL ............PKhKT++uAAKRFKhTuoG.plK.Rp+Ahp..pH....lLs..+..K.....osKpK.R.....pLR.tssh.V..s.p.uDh.cpl+phL.............................. 0 376 708 922 +4207 PF00444 Ribosomal_L36 L36; Ribosomal protein L36 Finn RD anon Prosite Domain \N 21.70 21.70 22.00 21.80 21.60 21.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.90 0.72 -3.97 108 5148 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 4407 121 1073 1688 153 38.20 58 90.83 CHANGED MKVRuSl...K+hCcsC+llRR+......G+lhVIC.ssP+HKQRQG ......MKVRsSV...Kp...h.C..ccC+ll.+R+......G+lhVIC....sNP+HKQ.RQG... 0 360 686 906 +4208 PF01158 Ribosomal_L36e L36e; Ribosomal protein L36e Finn RD, Bateman A anon Prosite Family \N 19.40 19.40 19.90 20.30 18.60 18.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.33 0.72 -4.06 39 534 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 365 6 288 505 4 94.80 52 79.12 CHANGED ss+...slsVG..LNKGHpsT+...........p.ppsRsS++...KGhhoK+s+hVR-llREVsGaAPYE+RshELLKluKD...KRALKFtKKRLGTHhRAK+KREEhpslltt..RKt ..................h.+.slsVG..LNKGHp.sT+...............ps.tsR.S+p............KG.thoK+o+FVR.-llREVsG.aAPYE+Rs.hELLKloKD...KRALKhhKK.R.L..GTHhRAK+K.+EEhsslltt.R+..................... 1 97 156 230 +4209 PF01780 Ribosomal_L37ae Ribosomal L37ae protein family Bateman A anon PSI-BLAST P54051 Family This ribosomal protein is found in archaebacteria and eukaryotes. It contains four conserved cysteine residues that may bind to zinc. 21.90 21.90 21.90 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.22 0.72 -4.12 55 636 2012-10-03 10:42:43 2003-04-07 12:59:11 14 4 512 74 373 557 67 86.00 53 92.54 CHANGED s+RTKKVGlsG+aGsRYGuoLRKpV+clElpQ+u+YsCsaCG+pu.VKRpusGIWpC..+pCscshAGGAYsssTsuutos+psIcRl.cchp- ..............KRTKKV.GIsGKYGTRYGASLRKhVKKhEloQ...Hu+YsC..sFCGKss.lKRp.uVGI.WpC..cpCt..+slAGGAashsTsuAsol+S.slRRL.+-...p................... 0 127 212 300 +4210 PF01781 Ribosomal_L38e Ribosomal L38e protein family Bateman A anon PSI-BLAST P23411 Family \N 21.00 21.00 21.20 21.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.30 0.72 -4.22 26 458 2009-09-10 15:39:13 2003-04-07 12:59:11 13 5 349 6 280 401 2 68.40 56 82.79 CHANGED P+pIpDIK-FLphsR.RpDA+...Sl+IK......Kssps.....TKFKVRCS+YLYTLVVsDpcKAcKLcQSLPPsLplp-l ....................................P+pIp-IK-FLhhAR..R..KD.A+.....SV+IK...........KNpcs............lKFKVRCSRYLYTLVlp.Dp-KA-K.LKQSLPPuLpVp-l............... 1 97 156 225 +4211 PF00832 Ribosomal_L39 Ribosomal L39 protein Bateman A anon Pfam-B_1293 (release 2.1) Family \N 21.60 21.60 21.80 24.60 20.60 21.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.05 0.72 -4.49 44 553 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 446 76 309 534 23 42.30 60 75.51 CHANGED pKhRLAKAhKQNR.lPtWlhlKTsp+lchNsKRRaWRRo+LKl ...hKp+LAKttKQNRPlPpWlRh+TsNpIRYNuKRRHWRRTKLt........ 0 101 179 245 +4212 PF00573 Ribosomal_L4 L1e; Ribosomal_L1e; Ribosomal protein L4/L1 family Bateman A anon Prosite Family This family includes Ribosomal L4/L1 from eukaryotes and archaebacteria and L4 from eubacteria. L4 from yeast has been shown to bind rRNA [1]. 20.80 20.80 21.20 20.80 19.80 20.60 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.01 0.71 -4.91 165 5579 2009-01-15 18:05:59 2003-04-07 12:59:11 17 12 4937 231 1620 3471 2695 191.40 36 83.57 CHANGED lpL...ssplFsh.....ph.ppsll+psVht.hus.................pRpGTtssK.sRu-VsG....us+KPa+QKGTGpARtGoh+..............uPha+GGGhsaGP+P.R.sash.+lN+K..........h++hAl+oALotpsps.splhll-.....s..h.p..lp............psK......................TKphhphLps.................lph............................p.............psLllss........p................sp.................................slhhuuRNlssV.clhsspp.....................lNlhcllptcp....lllTcsAlcp.lpch ........................................pLscslFu.h..-.h...Nps..llapsVhs.hAs...........tRQGT+.ssK..sR....u-..VoG....uG.+...KPa+QKGTG.RA....RpGolR..............uP.aRGGG.h.sF.uPpP..R..sa.u..h.KlsKK.........................h+RhAl+SsLSp.....pspp.sp.l.hll-.............sh..s...hp..........................sPK......................................TKp..h.sph..L.ps......................lsl.........................................c...cs..L.llss...................c...............hc..c.........................................................slh...LuuRN..l....s..s.....V....cVhs..s.s.s.....................lsshsllstcc....llhTpsAlcplEE.h......................................................................... 0 573 1032 1366 +4213 PF01020 Ribosomal_L40e Ribosomal L40e family Bateman A anon Pfam-B_884 (release 3.0) Family Bovine L40 has been identified as a secondary RNA binding protein [1]. L40 is fused to a ubiquitin protein [2]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.75 0.72 -4.37 3 580 2009-09-11 04:53:38 2003-04-07 12:59:11 12 14 450 7 314 466 8 48.80 64 40.21 CHANGED VMEPTLsALAKKYNCEKKVCR+CYARLPPRATNCRKKKCGHSNsLRhKKKLK .............l.IEPSLp.LApKYNC-KhICRKCYARL.PRAoNCRK+.KCGHoNpLRPKKKlK......... 0 93 167 254 +4214 PF05162 Ribosomal_L41 Ribosomal protein L41 Wood V anon Wood V Family \N 22.30 22.30 23.10 23.10 22.20 21.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.82 0.72 -7.02 0.72 -4.27 3 137 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 118 2 61 85 2 24.80 85 51.88 CHANGED MRuKWKKKRMRRLKRKRRKMRQRSK ..MRAKWRKKRhRRLKRKRRKMRtRSK.... 0 22 33 49 +4215 PF00935 Ribosomal_L44 L44; Ribosomal protein L44 Finn RD, Bateman A anon Pfam-B_1065 (release 3.0) Family \N 25.00 25.00 26.30 28.60 23.70 22.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.73 0.72 -3.97 44 678 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 513 75 382 617 73 76.10 56 65.83 CHANGED pHo.HcVspYK.pGKtothupGcRRacR..+ppGaGGpp+Pl.c+.uKs......TKKlsL+hcCspC+ptphpsh..hRsK+FElsp ..........KHp.HKVTQYK..pGKsShhAQ.GK.RRYDR..KQsGYGGQTKPlF+KKAKT......TKKlVLRLEC..s..p..C..+p+pp.h.sl..KRCKHFELG.G.............. 0 123 209 303 +4216 PF00281 Ribosomal_L5 L5; Ribosomal protein L5 Finn RD anon Prosite Domain \N 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.57 0.72 -4.18 17 5354 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 5030 233 1428 3077 2087 56.40 51 30.97 CHANGED Nh..MclP+..lpKlVlNhGlGEuspc...LppuhptLptIoGQKPlhT+A++olusF+lR ................................slMplP+..l-KIVlNMGVG...E.Ass..c.pKhL......-.sA..........s.p-.....Ls.......hIo.GQKPl......lTK.A+...KSlAuFKlR................. 0 489 916 1202 +4217 PF00673 Ribosomal_L5_C L5_C; ribosomal L5P family C-terminus Bateman A anon Pfam-B_69 (release 2.1) Domain This region is found associated with Pfam:PF00281. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.12 0.72 -4.25 133 5473 2009-01-15 18:05:59 2003-04-07 12:59:11 16 5 5067 233 1480 3191 2159 95.30 53 52.03 CHANGED sIGs.+VTLR.Gc+MacFL-.+llslslPRl+DF+GlsspuFDupG.NashGl..cEpllFPElc..Y...Dth.p..lhGMDlsll...............ToAcssp..........cu+.LLcthuhPFh ...........PIGsKVTLR.G-.RM.a.EFL-.+LlsluLPRl..R....DF.RGl........s........s........+u.F....D....G........R........G.N.YohGl..+EQl..I..F.PEI-.....YD+l-+..l..RGhDIslV.................................TTA...p...oD-..........EuRtLLpthuhPF.t............................................... 0 504 944 1249 +4218 PF01159 Ribosomal_L6e L6e; Ribosomal protein L6e Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 22.20 22.40 20.50 21.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.43 0.72 -3.81 46 607 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 377 8 327 549 4 102.90 43 45.55 CHANGED lNGVPLRRVsQpYVIATST.KVDlusVc..........l.cc..lsD....sYFp+p+....pcp+......Ks.Es.shFs..pctp.cppsscpR+s.......D.....QKsVDssllsuIKKp.Pp...LppYLuupFoL+suphPHchpF ...........................................................lNuVPLRRlsQpYVIATST.KlDlSu..Vc..........l.c.+..lsD....tYFp+c+..........p+t+.........+p..Eu.-hFp.......pcpc...+h.....plsppRKt..........D..............QKsVDptllstIKt.h.P.......LpsYLtuhFuLps.G....hPHchhF............................................. 0 110 171 254 +4219 PF03868 Ribosomal_L6e_N Ribosomal protein L6, N-terminal domain Finn RD anon DOMO:DM07096; Domain \N 25.00 25.00 25.80 25.80 24.90 24.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.95 0.72 -4.08 19 310 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 167 1 127 268 0 59.80 38 24.76 CHANGED hhspK...tpt+sSRNPsLsRGIGRYSRSAMYpR+uLYKhK...sKsstsh......KccsttsssKsl ..................ht...tK....tp.+ssRNssLs.+GIuRYSRStMYp++..A.lY+hK....hpsstst.......Kp+s.sslsKs............................... 0 38 57 81 +4220 PF01248 Ribosomal_L7Ae Ribosomal protein L7Ae/L30e/S12e/Gadd45 family Bateman A, Finn RD anon Prosite Domain This family includes: Ribosomal L7A from metazoa, Ribosomal L8-A and L8-B from fungi, 30S ribosomal protein HS6 from archaebacteria, 40S ribosomal protein S12 from eukaryotes, Ribosomal protein L30 from eukaryotes and archaebacteria. Gadd45 and MyD118 [1]. 20.40 20.40 20.50 20.50 20.30 20.20 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.83 0.72 -4.47 54 5260 2012-10-10 14:40:03 2003-04-07 12:59:11 21 25 2077 176 2228 3938 199 91.70 23 56.33 CHANGED pplpphlphuhcssp.lthGhpcssKslcps..cA+LlllApssss.phhp......hl.hlsp..cpslsh..hhls..uttcLGphsGhphh.ssuhulhssGsuphlh ...........................p..h.phlt.h.uh..cstp...lhhGhpp.......shK....s..lcps.............pup...........L..V.llA......p....Dss........s..........phht.........................l..shsp..........ctslP.h....h.h.lt.....s.p........t....cLGpssGpph......ssslulhs.t.t....h................................. 3 763 1239 1762 +4221 PF03948 Ribosomal_L9_C Ribosomal protein L9, C-terminal domain Finn RD, Bateman A anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.56 0.72 -3.90 147 4474 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 4407 129 996 2561 2059 87.20 34 56.38 CHANGED hp....pA..pplAppLp..shs..lpltt+uG-.sG+LFGSVTspDIA-ulppp..GhclD++clpl.sp..s.IKslGpaplsl+LHs-Vsuplplp..Vssp .........................................h.spApthtppLp....shp..VplssK.u......G..-....sG..+.L..F.G.....Slos+-IA-Alppt....G.......lc.......l-K+cl......cLsp...s.I+.slGpapVsV..+L.Hs.-VpuplpVpVst................. 0 346 671 852 +4222 PF01281 Ribosomal_L9_N Ribosomal_L9; Ribosomal protein L9, N-terminal domain Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.04 0.72 -4.72 157 4600 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 4492 134 1130 2731 1908 48.00 47 29.79 CHANGED McVILhccVpsLGctG-lVcVpsGYARNaLlPpshAhhATpts...lcph....c ................McVILlccVtsL..G.p..hG-lVpV+sGYARN.FLlPpGhAl.ATptslpthc.................... 0 398 748 961 +4224 PF00338 Ribosomal_S10 S10; Ribosomal protein S10p/S20e Bateman A, Finn RD anon Prosite Family This family includes small ribosomal subunit S10 from prokaryotes and S20 from eukaryotes. 21.20 21.20 21.90 21.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.11 0.72 -4.14 193 5735 2009-01-15 18:05:59 2003-04-07 12:59:11 17 13 5081 211 1645 2828 1995 96.50 50 83.97 CHANGED lRIcLcua-t..phL-pssppIlcsu......ccp.uhplpGPlsLPT+p.pphTlh+SPasp.Kco+-p.FEhRsHKRll-l................pssscsl..ctL.....h.plpl....PsGVsl-lpl ..................................................IRIRLKAaDH..+llDpostcIV-TA.................KRT..GAp....V.p...G.....P.I.P.LPT++..phaTll.+SPHhp..K.....cSREQ.FEhRTHKRLIDI.l.......................pP.T...s+TV....DuL......M...+L-L.....suGV-lEIp....................................... 0 550 1016 1368 +4225 PF00411 Ribosomal_S11 S11; Ribosomal protein S11 Finn RD anon Prosite Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.13 0.72 -3.88 13 6124 2012-10-02 16:33:16 2003-04-07 12:59:11 14 12 5534 199 1594 3230 2256 109.70 55 79.77 CHANGED GlsHIpuSF....NNTIlTlTDlpGpslsWuSAGusGFKuoR.KuTPaAAptAApsAActsh-pGhpplEVpl+G........PGsG+-uAlRAlp+uGlhIspIcDVTPhPHN.GCRPPK+RR .....................................................GlAHIpuoF....NNTIVT.I.T...Dh.p.......G........N.........s........l.u..W.....u.....SA...G.u.h.G.F...K.GSR.....K.STP.FA.AQ..hA.A...E...s.Auc..t..A.........h....E.......h.......G....l....+..s...l-...V...h..V.K.G..............................PGsGRE....o.A..l....RA.L.....p....u.....u...G.....l....c........l....o....tIpDV.TPlPH...N.GCRPPK+RR...................................... 0 517 960 1328 +4226 PF00164 Ribosom_S12_S23 S12; Ribosomal_S12; Ribosomal protein S12/S23 Sonnhammer ELL anon Prosite Family This protein is known as S12 in bacteria and archaea and S23 in eukaryotes. 19.90 19.90 20.00 20.00 18.80 18.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.68 0.71 -4.69 18 6678 2012-10-03 20:18:03 2003-04-07 12:59:11 20 19 5947 204 1582 3422 2092 119.60 62 94.59 CHANGED sThppLlR+c...........RcchtpcsKssALcGsPp++GlChclhslpPKKPNSAlRKlsRV+Lp..NGhcVTAaIPG-G..HsLpEHs.VLlcGGp......VtDLPGVRY+ll+GhhDsuGVp..tRtpuhSKYGsc+P+ .....................................TlpQ.Ll.R.+.s..............R.p...p.h.h.p+.s..+....sP.A.............L..............p...............u.............s.PQ+.RGV...CTRVh..............ThTPKKPNSALRKVARV..RL......o........NG......hEV.TAYIPG.G...HNLQEHSVVLlRGGR.......VKDL..PGVRYH..IVRG.sLDsuGVp........sRpQuRS...KYGsK+PK...................................... 1 531 988 1316 +4227 PF00416 Ribosomal_S13 S13; Ribosomal protein S13/S18 Finn RD anon Prosite Family This family includes ribosomal protein S13 from prokaryotes and S18 from eukaryotes. 24.60 24.60 24.70 24.70 24.40 24.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.39 0.72 -3.59 15 5791 2012-10-02 21:21:44 2003-04-07 12:59:11 17 7 5215 199 1480 3234 2174 105.00 49 84.93 CHANGED RlhssslsusK+..lhhALTtIaGIG++pAptlhpcsslDtspRsu-Lo--plcpltphlsp.......................ahlpu-LcpclppDIcRLhcIcsYRGlRHhtGLsVRGQRTKTNuRT .......................................RIAGVslPpc...K+..l.IuL.T.a.IaGIGpspup....pIltps.G.............ls.....s....h+lp.-Lo-...-plsplRc..ls.p.........................................................................ahVEGDLRREls.hsIKRLh-lG..s....YRGlRHR+G.LPVRGQ+TKTNART.......................................................................... 0 503 954 1256 +4228 PF00253 Ribosomal_S14 S14; Ribosomal protein S14p/S29e Finn RD anon Prosite Family This family includes both ribosomal S14 from prokaryotes and S29 from eukaryotes. 19.70 19.70 19.80 20.50 19.30 19.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.84 0.72 -4.58 178 6805 2009-01-15 18:05:59 2003-04-07 12:59:11 16 10 5516 200 1562 3442 1882 54.30 45 63.36 CHANGED hp..lpph.cspshsRhpsRChlsGRs+..uhhR+F.sl..sRhsFR-hAtpG.lPGlpKuS ................t..lpphP+s.usssRhpsRC............phsGRP+..uhh..RKF.GL..sRlphREhAhcGplPGlpKuS.................... 0 508 968 1302 +4229 PF00312 Ribosomal_S15 S15; Ribosomal protein S15 Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.90 20.90 21.10 21.90 20.60 19.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.55 0.72 -4.22 9 5932 2009-01-15 18:05:59 2003-04-07 12:59:11 17 14 5410 212 1580 3366 2136 82.00 44 77.02 CHANGED htpthhlpphtcpppspGSsEhQlhhLTp+l.+LppHhccH+KDapSpRGLhphluKR++LLsYL+pcs.....hh+YcphIppLGlR ...................................c..Ksplltca..t..p..p..c..s..DT.GSsEVQlAlLTt+IspLs.cHhc..p.H....K..K.D+cS+RGLl+hVu+RR+LLsYL++pD......htRYcp...LIpcLGLR.................... 0 536 990 1322 +4230 PF00886 Ribosomal_S16 Ribosomal protein S16 Bateman A anon Pfam-B_1025 (release 3.0) Family \N 21.20 21.20 21.40 21.60 21.10 20.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.89 0.72 -4.25 158 5911 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 5742 192 1200 2936 2304 56.10 47 58.89 CHANGED RhGpK+pPaY+IVVsDuRssRD.G+aIEplGhYsPh....tp........................................plplch-+hphWlspGAQPo ..........RhGtK+p..P...FY+lVVsD.uR...s.....R..D.GR..hIEplGhY.....N..Plts..............................................plplch-+.lhaWLspGAQPo.............................. 0 415 780 1019 +4231 PF00366 Ribosomal_S17 S17; Ribosomal protein S17 Finn RD anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.70 20.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.33 0.72 -3.93 15 5324 2012-10-03 20:18:03 2003-04-07 12:59:11 15 10 4890 201 1529 2971 1891 68.80 50 69.69 CHANGED GhVVSsKMcKTllVcl-phthHPKYs+hlKRpKKh.AHsssshppl..GDlVcItEsRPLSKTKRFpVlcV .............................GpVV.S.D.K..M-KTIsVtlE.p..........hh.p.HPlY.u.........K.......hl++opKh....+AHD.Es..N.p.s..p.l....GDhVcI..h.E.s.RPLSKTKpapLVc........................ 0 521 966 1279 +4232 PF00833 Ribosomal_S17e Ribosomal_S17; Ribosomal S17 Bateman A anon Pfam-B_1566 (release 2.1) Family \N 22.80 22.80 23.30 24.70 22.50 22.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.63 0.71 -4.36 32 683 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 508 9 396 637 67 107.30 53 84.61 CHANGED MGRVRTKTVKRAuRhllEKYYs+LT.hDFppNK+ls-E..VAlIsSK+LRNKIA...GasTHLMKRIp+GPVR....GISlKLQEEERERR.saVPEhStlDhs......hlpVDt-Tp-ML+p.hsh..slsshhl.st .........MGRVRTKTVK+uu+hlIE.+YYs+LT.hDFcTNK+ls-E...lAlI.s.SK+LRNKIA...G.YlTHLM.KRI....Q+G.PV.R....GIShKLQ.EE.ERER+.paVP-hSAl.-.p........lcVD.-Tt-hLc..Lsh..pls......s................................................................ 1 138 226 324 +4233 PF01084 Ribosomal_S18 S18; Ribosomal protein S18 Finn RD, Bateman A anon Pfam-B_712 (release 3.0) Family \N 21.00 21.00 21.20 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.46 0.72 -4.10 121 5681 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 5182 193 1326 2673 1849 53.20 50 56.97 CHANGED tpphp.hlD....Y.KcschLp+Flo-.pGKIlPRRlT..GssuKpQRplspAIK+ARhlALL ......................s..thppID.........Y.KDs.shL+c.FIoE.pGK..IlPRRlT..GssuKtQRplspAIKRARhluLL............ 0 451 833 1106 +4234 PF00203 Ribosomal_S19 S19; Ribosomal protein S19 Finn RD anon Prosite Domain \N 21.00 21.00 21.00 21.00 20.70 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.69 0.72 -4.36 22 7420 2009-01-15 18:05:59 2003-04-07 12:59:11 16 5 6888 210 1487 3278 2069 78.30 55 82.39 CHANGED Rol+KsPFssh+Lh+Khcp.s.ttcK.phl+TWSRsSsIlPpMlGpslulYNGKpal.VhIoscMlGHKLGEFu.TRphttH ..........................................................RSLKKuP.F.V.s.t.H.L.h....+K...............l...-.t.h....s........t...p...........t.c.K...cs......I+TW...S...RpST.IhP.shlGhTIA.VHNG..+cHl.PValT.-.c.MV.G.HKLGEFAPTRTapGH............. 1 498 945 1255 +4235 PF01090 Ribosomal_S19e S19e; Ribosomal protein S19e Finn RD, Bateman A anon Prosite Family \N 24.20 24.20 24.50 25.30 24.10 23.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.46 0.71 -4.75 62 756 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 526 9 441 663 97 128.00 43 85.22 CHANGED sTVcDVsscchlptlAphLKcs.s+lcsP-WsshVKTGsaKEhsPps.sD.WaYhRsASlhR+lYlcu.PlGVtpl+phYGG++c..+G..s+Ps+as+uSGulhRpsLQpLEphGlVcKs......psGRplTspGpp.LD+lAtplhpcht ..............................oV+DVstpchlpshAtaLK+p...GKlcl..........PpWsDhVKTu.ttKEhsP.-...D.WaYhRs...ASlhR+lYlRs.........slGVG..php+lYGGppp....pG..spPsHascu.SGulhR+sL.QtLEphtllEps.........pu....GR.plTtpGp+DLD+IAsplhtt.p...................... 0 182 268 368 +4236 PF00318 Ribosomal_S2 S2; Ribosomal protein S2 Finn RD anon Prosite Family \N 23.80 23.80 24.00 23.90 22.30 23.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.26 0.70 -5.11 177 7370 2009-01-15 18:05:59 2003-04-07 12:59:11 15 20 5811 224 2025 5251 3040 185.10 40 79.94 CHANGED lLcAGlHh.GH.p......sphWNP+Mp...Y.Iasp.R..s...GlHIIDLpc.ThthlppAhchlpphst.ps....................................................................sc....lLFVGTKp...pu..pchltctApcs......sth.al.s.....pRW.lGGhLTNapolppplpphc......h................t.hsK+p.h.htcpht+Ltp.hsGlpphtpl..Pc....l...lhllDs....pp-p.AlpEApcLsIPll..ullDTNssPsh...lDasIPuNDDuh+ulpLhhphluculhcup .....................................................................hLcAGVHF.GH.p......T..+pWN.PKMt...a..Iasp....R...s.........................u.l.a.IIsLp+.Tht.hhp.pAhphl....pphst..ps....................................................................splLFVG.TK+........pA..............p-slt....c...tA.tcs.............s.th...aV..N.........pR.W.LGGhLTNapTl..ppplp.+hc.c....lct.tpsG.............................hphhsKKEhh...h.p+pht+.L.pp.....LG.GIcp.........Mttl..........Pc................h.lhllDs.....pc.E....p..hAlpEA....ppLsI....Pll..ul.l.DT.N.sc.P-.................lDhsIPu..ND.DAh+ulpLhhthhApAlh-u...................................................... 0 683 1211 1641 +4237 PF01649 Ribosomal_S20p Ribosomal protein S20 Bateman A anon Pfam-B_1685 (release 4.1) Family Bacterial ribosomal protein S20 interacts with 16S rRNA [1]. 20.60 20.60 21.40 21.40 19.40 18.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.93 0.72 -3.60 14 4402 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 4355 191 972 2284 1917 80.90 41 93.95 CHANGED ANhKSApKRh+psp+pRl+NpuhKStl+ThlK+shpslpss-ps.......tAppths.stphlD+supKGllHKNpAAR+KS+LAttlpp ...........ANhKSAhK.Rscpsp+ppt+Ntuh+SthRThlK+s.c....tAlts.s.D.p.p...................sAp..pthptApphlD+sA..s.KGlIHKNpAAR+KSRLutplp.t................... 1 342 654 833 +4238 PF01165 Ribosomal_S21 S21; Ribosomal protein S21 Finn RD, Bateman A anon Prosite Family \N 21.00 21.00 21.70 21.30 20.50 19.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.66 0.72 -4.52 140 4324 2009-01-15 18:05:59 2003-04-07 12:59:11 15 5 3974 61 1053 1753 1966 57.00 45 77.38 CHANGED s.pVplpcs-.sl-pALRRFK+php+sGlhp-h+cRc...aaEKPotcR++Kpttut++p.t+ ............s..lhV+-NE.shDsALRRF...KRs.spKsG.........llpEhR+.R.E...aYEKPoscRK.++ptuAhKRch+............. 0 333 644 864 +4239 PF01249 Ribosomal_S21e Ribosomal protein S21e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.40 25.10 18.30 19.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.94 0.72 -4.04 36 472 2009-01-15 18:05:59 2003-04-07 12:59:11 13 8 349 6 258 420 6 79.10 54 76.04 CHANGED MpNDsGchVDLYlPRKCSATNRlIsAKDHASVQINlucVDts.G+.hsGphpTaA...lsGhlRphGESDcslsRLspccGllsps ..................MpN-tGch.VDL..YlPRK......C.S.AoNRIIpAKDHASVQIslucV.Dcs..GR.hs.GphpTYA...lsGhlR......th....GESDD.ulsRLApc-Gllt..s...................... 0 88 142 211 +4240 PF01282 Ribosomal_S24e Ribosomal protein S24e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.30 25.00 24.30 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.61 0.72 -4.19 70 778 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 520 12 448 664 66 83.90 46 63.63 CHANGED hhl-lhHsup.uosS+p-l+-KLAphhps.ss-tlhlash+TpFGsG+osGauhIYDsh-thKchEPcatLhRs.slhpc.ct..tucc ..............................hVlDVLHPG+.AoVsK........sElREKLAchY.Ks.ss-sl.hVFGFRTpFGGGKoT..GFuhIYDol-hAKKhEPKa..RLsRp..GLhcKhc..uR.p....................................... 0 129 224 315 +4241 PF03297 Ribosomal_S25 S25 ribosomal protein Mifsud W anon Pfam-B_4038 (release 6.5) Family \N 28.80 28.80 28.80 29.10 28.70 28.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.42 0.72 -4.00 24 591 2012-10-04 14:01:12 2003-04-07 12:59:11 10 9 405 8 328 514 4 102.30 49 83.60 CHANGED MPPKc........p..pttKtttusuGGKs.+KKKWSKGKs+DKLsNhVlFD.KuTYDKLhKEVPsYKlITsSVlS-RLKIsGSLARpALc-Lpp+GlIK.V.s+HpuQhIYTRus .....................................t......t.t.pt..s.s...t.uGGKt..cKK.KWSKGK..V.+DK...lNNhV.L.F.D..csTYDKLhKEVPs..Y.KLIT...PuVlS-RLKIpGSLARtALp-Ltp+GlIKhV.spHpsQhIYTRs................. 0 112 176 252 +4242 PF01283 Ribosomal_S26e Ribosomal protein S26e Finn RD, Bateman A anon Prosite Family \N 20.70 20.70 20.70 21.00 19.80 19.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.46 0.71 -3.85 37 581 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 425 4 325 528 16 107.20 56 86.00 CHANGED MspKRRNsGRsK..+uRGHVp.lRCoNCuRsVPKDKAIKRFslRNlVEsAuhRDlp-Au.lY...psYslPKLYhKhpYCVSCAIHu+lVRlRSc-.....sR+.Rsss.+h....t.ttp.sst.s ...............MspKR+NsGRsK..+G.RGHVpslRCoNCuRClPK..DKAIK+FslRNIVEu..AA........lRDlsEAS.Va...............stYsLPKLYsKLpYCVS.CAIHu+lVRsRS+E.....sR+sRsPP.Rht.t.........ss.............................. 0 118 180 260 +4243 PF01599 Ribosomal_S27 Ribosomal protein S27a Bashton M, Bateman A anon Pfam-B_638 (release 4.1) Domain This family of ribosomal proteins consists mainly of the 40S ribosomal protein S27a which is synthesised as a C-terminal extension of ubiquitin (CEP). The S27a domain compromises the C-terminal half of the protein. The synthesis of ribosomal proteins as extensions of ubiquitin promotes their incorporation into nascent ribosomes by a transient metabolic stabilisation and is required for efficient ribosome biogenesis [3]. The ribosomal extension protein S27a contains a basic region that is proposed to form a zinc finger; its fusion gene is proposed as a mechanism to maintain a fixed ratio between ubiquitin necessary for degrading proteins and ribosomes a source of proteins [2]. 21.30 21.30 22.50 22.50 20.00 20.00 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.58 0.72 -4.14 12 740 2012-10-03 10:42:43 2003-04-07 12:59:11 14 11 555 5 375 629 45 45.60 58 31.50 CHANGED AVLcYYKVDssGKlpRLR+ECPt.pCGuGVFMApHtDRpYCGKCthT ..................AVL+aYKV...D.c.s..G..K.l.....pR.LR+EC...Ps...cCGA.GV.FMAsH...h...D...RpYCGKCshT........ 0 122 208 302 +4244 PF01667 Ribosomal_S27e Ribosomal protein S27 Bateman A anon Pfam-B_1929 (release 4.1) Family \N 24.50 24.50 24.50 25.30 24.40 24.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -8.76 0.72 -4.42 7 748 2012-10-03 10:42:43 2003-04-07 12:59:11 12 5 546 9 399 669 46 54.30 62 63.75 CHANGED PpShFlcVKCPsChs.psVFuHupThVhChtCupsLspPTGGKu+lptth..hhc ..............................PsSaFMDVKCPGCapITTV.FSHAQTVVlCsuCuTVLCQPTG.GKA+L.TEGCSFR+K.......... 0 125 218 314 +4245 PF01200 Ribosomal_S28e Ribosomal protein S28e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.70 25.70 20.80 19.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.15 0.72 -4.23 38 607 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 501 10 344 470 76 67.10 58 88.74 CHANGED M-pt......ptsh.....AcV...lcllGRTGscGplTQV+V+lLcs.sp....sRhlhRNVKGPVRhG..DI.LhL...hETEREA.R+Lc .........................p.sphAcV...h+V..LGRTGSpGpsTQVRVcFl--..ss.....RpIlRNVKGPVREG..DI.LsL...LEoEREA.RRLR...... 0 109 191 285 +4246 PF04758 Ribosomal_S30 Ribosomal protein S30 Wood V, Bateman A anon Wood V Family \N 20.00 20.00 21.00 20.70 18.70 17.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.83 0.72 -4.22 23 515 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 387 7 297 462 15 57.30 61 58.05 CHANGED KVHGSLARAGKV+sQTPKVsKpEK+.Kp.sGRA+KRhpYNRR....Flsll.t.G...+K+GsNups .....KVHGSLARAGKV+.uQTP..KV....-KQ....E..KK....Kp.pGRA++RhpYsRR....FVNVs...s.hG....pK+t.Nss....................................... 0 113 168 244 +4247 PF00189 Ribosomal_S3_C S3_C; Ribosomal protein S3, C-terminal domain Sonnhammer ELL anon Prosite Domain This family contains a central domain Pfam:PF00013, hence the amino and carboxyl terminal domains are stored separately. This is a minimal carboxyl-terminal domain.\ Some are much longer. 21.30 21.30 22.30 21.80 20.90 21.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.64 0.72 -3.77 124 7624 2009-01-15 18:05:59 2003-04-07 12:59:11 15 11 7097 199 1388 4926 2193 82.90 50 31.68 CHANGED pl..........uppLEc+........h.sFR...+sh+psl......pph......psu.s.cGl+lplSGRL...sG....s-hARsEh..hpc........Gpl..sLpolcspIDYuhspAtTphGhlGlKVWI ...................................................IAtQL.cpR..l.uFR...RA..hKpAl.........pps................hpusA..+GIKl.plSGRL....sG.......AEIA.RoEh..a+E.................G+l.........PLpTlRAcIDYuhtEAcTp........YGhlGVKVWI..................... 0 478 888 1168 +4249 PF01015 Ribosomal_S3Ae Ribosomal S3Ae family Bateman A anon Pfam-B_1334 (release 3.0) Family \N 25.00 25.00 27.20 27.10 22.10 18.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.05 0.71 -4.98 69 888 2009-09-11 07:57:53 2003-04-07 12:59:11 13 8 583 4 470 815 113 191.30 50 82.23 CHANGED ut+Kst.t+hhDsap.pKcWYslhAPsh.F....sppplGcThssc.......s-tlhGRlhEsoLuD.LssD.p..psapKl+hplpcV.pGc..sshTpFhGh-hTcDhlRSLVR+hpohI-uhl-V+TpDGYh.lRlhsluaTp++............ApsSQh+sIRcphh-llpccusptshcphVpcll...............sslup-I.c.ps+pIYPL+cVtIRKlKlLcpPc ..........................................................tuKKGh.KKKssDPFs.+K-WY-l..KAPsh.F....s.hRs...lGKTLVs+opG.....................h+.As-uLKGR.....VhEVSLAD.Lps...D.-....puaRKh+Lhs-.-V..QG+..Ns.LTNFaGh-hTpDKlpShV.+KWp.T.hIEApV-VKT..oD.uYh.L.RlFsluF.....T+++.sQh...++TsYAppoQlR.tIR+KMhEI.hpccs................psssL..cclV..pK.l.I..............P-s...Iu+-IEK.uspsIYPLp.sValR.KVKlLKpP+.......................................... 0 157 256 356 +4250 PF00163 Ribosomal_S4 S4; Ribosomal protein S4/S9 N-terminal domain Bateman A, Sonnhammer ELL anon Prosite Family This family includes small ribosomal subunit S9 from prokaryotes and S16 from metazoans. This domain is predicted to bind to ribosomal RNA [1]. This domain is composed of four helices in the known structure. However the domain is discontinuous in sequence and the alignment for this family contains only the first three helices. 21.70 21.70 21.70 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.14 0.72 -3.61 18 11914 2009-01-15 18:05:59 2003-04-07 12:59:11 14 7 10020 203 1485 8634 2034 83.20 44 43.67 CHANGED uR.YpGsphKhsRR.sphstLssc....ppptsp......pstpcshps+h........+hSpYtlpLcEKQKlRhhYG.lhERQLhpYsclu...pKh+GspG.slhplLE ...................................................................................h+h.RRhG..h....h..Lss.+.............t..t.h.c.s...............ss.tppu..tspp..............K.lSpYtl.p...L..c.E.KQK.LR.a.pY.......G..loERQhhpYh+hA...........p+tK.G..s...T..GpsLLQLLE......................................... 0 530 969 1271 +4251 PF00900 Ribosomal_S4e Ribosomal family S4e Bateman A, Finn RD anon Pfam-B_1205 (release 3.0) Family \N 25.00 25.00 25.00 25.50 24.50 24.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.35 0.72 -4.31 81 821 2009-01-15 18:05:59 2003-04-07 12:59:11 15 13 555 7 448 754 91 75.80 52 30.43 CHANGED cosEpaRllhDs+GRhslpcIss-EAphKLs+lpsKshspsGhsplshHDGRslhhsp.........sph+ssDolhlsl...ssp......cIh .....KTsEpFRLlYDsKGRFslH+Ios..EEAc...YKLCKV+.+lph.......Gp+GlPa.LsTHDGRTIRYPD.........PhIKsNDTlplDL.tosKIh..................... 0 144 246 342 +4252 PF00333 Ribosomal_S5 S5; Ribosomal protein S5, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 27.00 27.00 27.00 27.20 26.80 26.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.04 0.72 -4.23 13 5805 2012-10-02 17:51:51 2003-04-07 12:59:11 15 11 5179 202 1620 3309 2088 66.50 52 34.20 CHANGED cLpE+VlslpRVsKhs+uGR+hpFpAlVVVGDcNGpVGhGhGKA+E.VssAIpKAltpAK+sllsVsh .....................pLpE+llsINRV.sKs.V.KG.GR+hpFs..ALVVVG..D.t.s.G+.VGhG.hG.KA..+.E..VPsAIpK.Ah-pA++..shlpVs.h.................................... 0 552 995 1332 +4253 PF03719 Ribosomal_S5_C Ribosomal protein S5, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.40 20.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.26 0.72 -4.70 114 5623 2012-10-03 01:04:38 2003-04-07 12:59:11 10 12 4962 202 1648 3246 2178 73.10 45 37.14 CHANGED VhG+aGuu+Vhl+PAstGTGlIAGG.ssRuVlEhAGlcD....lhsKohG.SpNshNhl+AThcuLpphposcplAphR.G ................lpGcaGuucVhlpPAscGT.....GlIA.GG.ssRAV.LEhA.GlcD.....lluKo.h.....G...Ss..NP..hN.lV+AT.lcu.L.p.p.hpss.cplAthRG.................... 0 557 1007 1357 +4254 PF01250 Ribosomal_S6 Ribosomal protein S6 Finn RD, Bateman A anon Prosite Domain \N 21.00 21.00 21.10 21.00 20.90 20.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.76 0.72 -4.14 180 4748 2009-09-12 05:02:19 2003-04-07 12:59:11 12 8 4630 210 1190 2634 2089 91.70 35 77.12 CHANGED +pYEhhaIlcPcls-c.phpshl-chpsllppp.uGplh.ch-p...W.Gc.R+LA.Y.IpKh..........pcGaYhlh.phpu..s.spslpEl-Rth+ls-sllRahsl+h ......................p+YElhal...l...+...P.s..hsEp...phs......uhlE+a.psllsss.GGplp.ch-c...W.G+....RpLA....Y...Ip....K.h.....................................+cGaY.....hll.slcu..s...spsls.El-+.....hh+.....l.....s.-sllRphll+.h................................... 0 409 769 1011 +4255 PF01092 Ribosomal_S6e S6e; Ribosomal protein S6e Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.20 25.20 24.10 24.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.45 0.71 -4.50 7 798 2009-01-15 18:05:59 2003-04-07 12:59:11 14 6 566 5 447 741 92 116.50 52 53.22 CHANGED MKLNlSaPtsGsQKhlElDD-+plRhFh-KRhGpEV-u-hlG.EacGYsl+IsGGNDKQGFPM+QGVLsssRVRLLhucGpsCYRPRRsGERKRKSVRGsIVsssluVLsLsIl++GEp-IPGLTss .................................hKlNlu.PssGsQKhlEl.-D-p.+lR..Fh.-KRhupEVsu........-........s..L........G........-........E.a.........K............G.........Yl.h+IoGGsDKQGFPMKQ.GVLsssRVRLLLs+G............ps.CY..R...P...R..R...............sGERK.R.KSVRGCIV........sssLuVLsLlI.V.K.pG...Ep.....-lsGLTD......................................... 0 141 238 341 +4256 PF00177 Ribosomal_S7 S7; Ribosomal protein S7p/S5e Sonnhammer ELL anon Prosite Domain This family contains ribosomal protein S7 from prokaryotes and S5 from eukaryotes. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.77 0.71 -4.62 166 7992 2009-01-15 18:05:59 2003-04-07 12:59:11 16 14 7200 206 1618 4678 2374 146.70 44 86.24 CHANGED MsR...+.tpst+R...l.s....Dshasst.........lVs+hlNplMh....................cGKKulApcIlYpAh-hl............pp....+.spp...sP.................lp.....lhppAlcNlpPtlEV+.......uR......RlGGuoYQV.PlEV.ps.pRphsLAlRWllpuuR....p.......Rs.t+..sMsp+LAsEllDAu.ps..pGsAlKK+E-sH+M...AEA.N+ .....................................................sR+..sphs++...h...s....Dshapst.........lVs+.llNplMh....................cG....KKuhA.pI...VYpAh-hI......................................cp.....+...Tuc.........sP..........................................................Lp.....VhppAlpNltPt.l-V+........uR......R...lG.Gu.sa.Q.V.PV.EV..pP.tRRssLAlRWLlsuAR............h...........Rs....tK.....oMs-+LAsELlDAA..ps...pG..........sA.lK.K+E-sH+M...AEAN+.......................................................... 2 573 1027 1367 +4257 PF01251 Ribosomal_S7e Ribosomal protein S7e Finn RD, Bateman A anon Prosite Family \N 26.60 26.60 26.60 26.90 26.10 26.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.14 0.71 -4.82 27 599 2009-01-15 18:05:59 2003-04-07 12:59:11 13 7 415 4 298 532 6 174.90 53 92.14 CHANGED sKIhKpsst....PoEhEpsVAQALhDLEsss...-LKupL+sLplsuA+E.l-luu.sKKAlllaVPhP.Lpua.+KIQt+LsRELEKKFus+cVlhlApRRILs+PpRp..tt...ppQKRPRSRTLTAVH-uILEDLVaPuEIVGKRlRh+lDGo+lhKVaLDp+-pss..lEaKl-oFsuVY+KLTGK-VsFEFP.stt ............................................Kl.Ksp..stp...PsEhEps.luQ..........ALh....-LE.ss...DLKu.pLR.c.LtIsuA+E...l-Vus.....s+..............KAll.IaVPhP.L.+uF.pKI..QsRLlRELEKKFS.G+.HVlhlAp.RRI.LPK.PpRp..s+....ppQKRP.R.S.RTLTAVHDAILEDLVaPsEI..V..GKRlRh.....+lDGS+llK.....Va.L.Dp.pppss..lEa.Kl-TFuuVY+KLTG+-VsFEFPt................... 0 102 164 239 +4258 PF00410 Ribosomal_S8 S8; Ribosomal protein S8 Finn RD anon Prosite Domain \N 24.00 24.00 24.50 24.50 21.10 23.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.51 0.71 -4.30 119 6129 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 5619 211 1551 3553 2221 126.70 42 95.88 CHANGED DsIuDhLTRIRNAphsp+ppVpl.Ps........................SKlptslhplLpcEGYIpsaph.........hppppp...t...............................lplp.LKY.........pttps..lIpplpRlS+PGhRlYsstpclP+.......lh..sGhGl...........sIlSTS+G.lMoc+cARcppl.GGElLChVa ....................................................DPIADhLTR..IRNA.pts.p....+psVpl..Pu........................SKlK..tsIs.plLpc.EG...........aIcsach...........................hc..-..s.p..t......t.......................................................................lplp.LK.Y.............ptct..........lI.p...s...l..cRlS+PGLR..lY.sptc-.lP+.........Vh......sG..L...GI............................uIl.....ST..Sc....G......l...hTD+cA.Rp...............psl..G.GEllsYVa.......................................... 0 516 966 1289 +4259 PF00380 Ribosomal_S9 S9; Ribosomal protein S9/S16 Finn RD anon Prosite Family This family includes small ribosomal subunit S9 from prokaryotes and S16 from eukaryotes. 21.80 21.80 21.90 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.56 0.71 -3.86 183 5483 2009-01-15 18:05:59 2003-04-07 12:59:11 14 9 4963 199 1599 3183 2185 121.50 50 81.31 CHANGED GRRKoulARVhl.ps.G.....................s....G.p.lplN.s+............sh...p-Ya..spphhphplhpPLt.lssp...........................................................hsp...............................aDlhlp.................VpGGGhoGQAsAlRhuIARALlp.....................................hs....sp............h+stLKpt......GhLTRDsRhhERKKa..Gh++AR+p.QaSKR ...............................................GRRKsulARVhl...hP...G.................................................s.....G...cI..slN.s.+.....................s.lcpYa..s....pc........sh.chhl.pQPL.t.l.s.pp...........................................................h..s..p................................aDlhls.................V.p.G..GGhoGQAG.....A.IRHGIuRAL.hp.............................................hD...ss..............hR.ssLKcA..............................................GhLTR..DuRhsERKKh..GL..+KAR+tsQFSKR...................... 0 557 1015 1351 +4260 PF00834 Ribul_P_3_epim Ribulose-phosphate 3 epimerase family Bateman A anon Pfam-B_1291 (release 2.1) Domain This enzyme catalyses the conversion of D-ribulose 5-phosphate into D-xylulose 5-phosphate. 20.60 20.60 20.60 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.32 0.71 -5.21 13 6022 2012-10-03 05:58:16 2003-04-07 12:59:11 14 17 4642 50 1359 4019 3152 198.40 43 88.92 CHANGED hlAPSILSADFu+LucElpslppuGuDhlHlDVMDuHFVPNlTlGPhVlculRsh...sphPlDVHLMlcssDphlssFAcAGAs.hIoFHsE..AocHlcRolphI+ctGsKAGlVLNPuTPLssl-alL-clDlVLlMSVNPGFGGQuFIPssLsKlcpl...R+hhsp......hshhlEVDGGlsscshtplscAGAshlVAGSAlFuus .....................................................................................................................................IAPSILSAD..F...ucLu.c-l.ppl...p......t...u.....G...A.DhlHlDVM...D........G..........HFVPN...l.T..h..Gs..llcu..l.Rph...................sphP.....l.DVH.L.Ml....p......s..............P..-.......c.......a.lss.F...Ac.A......G..A.s....hI......oh.H.sE..................A.s......p....H......l...c.......R......s.l...p...h...I.+........p......t.....G....h.K...................A.........Gl.s........l.N.P.u.T..P.l.p.h...l.c....l..l......c........p......l.......D.h..lLl...M.o.......V.N....P..G.F....G...G....Q...p...F...Isp.s.l...cK.l.cpl......+..phhc.pps..................hs.hpIEVDGGl.s..s.p.s.h.tphs.p..AG.A-.hhVA.G.S.u.lFpt.s..................................... 0 465 866 1148 +4261 PF02009 Rifin_STEVOR Rifin/stevor family Bateman A, Lawson D anon Lawson D Family Several multicopy gene families have been described in Plasmodium falciparum, including the stevor family of subtelomeric open reading frames and the rif interspersed repetitive elements. Both families contain three predicted transmembrane segments. It has been proposed that stevor and rif are members of a larger superfamily that code for variant surface antigens [1]. 34.70 34.70 35.40 54.50 33.20 34.60 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.28 0.70 -4.77 16 412 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 2 0 71 414 0 288.50 30 96.01 CHANGED aNp..Np.hITs..ppsshoo......RhLsEC-las.spYDNDPEMKcVh-pFscpToQRF+EY-ERhhspRpKCK-psDK-IQcIIlKDKhE.........................KSlA-KVEKsCL+CGssLG.GlhsusGlhG...slulsph.sp....uAthsAhp.h.csu.................htsslcph.......cuhsphhshhsh...............thhpulhssssYps.hsllsslhs.tstshCshspsstsshhshsspsttshhstp....................VpshspsAsssAptsspthstsltptssuhhss........IhuSllAIllIlLlMlIIYLILRYRRKKKMpKKhpYhKLLpc .....................p........h...ps......R.LsEC-las.s.Y-sDsEMKpVh-pFs.cpT.pp.RF....cEYcE.+h.cpRpppKEps-KpIQ..KIIhKDKhE...........................KSls-KsEKt.CLcCuhshG.ulssphGlhu...........shshsth.ps........sshss....u..hp.u...ttu.....................htssltts..........c.shsphhthhph....................tshpslhsss.....sasshhslh......shlts...th.........t.s.h...s..........s..h.........s........s....s.....s.......t..............h...shsh...thhph..hst.......................................stphhps..usssutt....ss..s.th.s....s....shtstthu...hhps.......tsIhuSslslllIll.lhlIIYLILR.YRR.+K.phKhp..KhL....................................... 0 71 71 71 +4262 PF02197 RIIa Regulatory subunit of type II PKA R-subunit SMART anon Alignment kindly provided by SMART Domain \N 20.30 20.30 20.40 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.42 0.72 -4.52 13 656 2012-10-01 20:11:07 2003-04-07 12:59:11 12 27 223 33 372 614 5 37.00 33 10.58 CHANGED pulpsLLcshsspVh+ppPuDllpFstsYF.p+LpcpRt ...........slpplLcshthpllpppPs....clh...pFsspYF..p+Lpptp.......... 0 132 172 259 +4263 PF01782 RimM RimM N-terminal domain Bateman A anon PSI-BLAST P51419 Domain The RimM protein is essential for efficient processing of 16S rRNA [1]. The RimM protein was shown to have affinity for free ribosomal 30S subunits but not for 30S subunits in the 70S ribosomes [1]. This N-terminal domain is found associated with a PRC-barrel domain [2]. 23.50 23.50 23.50 23.90 23.40 23.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.74 0.72 -4.06 170 4228 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 4195 7 921 2739 1298 83.60 27 46.27 CHANGED hlGpIsssaGl+Gpl+lhsh.T-....s-thhshtshhht..................pttt............hplpph+.hp..pp.thllphcGlss+spA.ctLpGtpl..hlscsp ..............lG+lsssaGl+Gcl.+Vhs.h.TD......s-..p.hhc.h..ss..hhht.......................................pssp.........h.t..........lplps..h+..hp..........ps.hhll+hcG.....ls.....stssA.ctLpstplhlspp................................ 0 302 605 781 +4264 PF00848 Ring_hydroxyl_A Ring hydroxylating alpha subunit (catalytic domain) Bateman A anon Pfam-B_407 (release 3.0) Domain This family is the catalytic domain of aromatic-ring- hydroxylating dioxygenase systems. The active site contains a non-heme ferrous ion coordinated by three ligands. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.46 0.70 -4.68 128 4563 2012-10-02 19:24:03 2003-04-07 12:59:11 14 18 1613 110 962 3945 3034 206.60 16 58.44 CHANGED hp.ph.phttphphc....................hpsNWKlhh...-NahE.sYH..hs.ssHsph.t...hppht............................................t.t.h.h..............h.hshttt.tttsth......th.pptphtsh....................hhhlFPNhhlt.hhsshhh...hhphhP...husspsphphphhhtss............tssch..............tpphtphhpt............ltpEDhthscphQpG....lpo.....................sh..tsu..h.........sttEt.sl..pp....Fpphltchl...st ...................................................................................ht.hhp.......................htsNWKhhs..Es.h.h-...sYH...ss.hsH....shs.p.s.h..tths....................................................................t...hth..s.tts.hh..............................h...t.sh.s.h.p.hhtt....hsttp.h.......................t.ht..tt.....h..thh....................................................................th.s.la....Psh.hh....................ss..thh.............hphh.hP.......hussc...sp..h...hhhh.hs.t.s.....................tstch.....................ppthhp.h..t........................ht.pDht.htt....pts.......hts.....................................................................................-.............h......t.................................................................................................................. 1 181 516 777 +4265 PF00161 RIP Ribosome inactivating protein Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.10 20.10 20.70 20.70 19.90 19.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.47 0.70 -5.14 73 945 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 239 176 103 936 0 197.20 29 61.64 CHANGED sFslssus...psYssFlpsLRsplsssstt.......hs..lPlL.......ssssp+alhlcLps....ssp.....lTLulchsNlYVlGY......psssphahFp............psspssLhsss.....tpppL.sasGsYssLpptush......Rpp.lsLGhptLssulssLhths...................................sspstAcsLllhIQMluEAARF+aIcppltssh........tpshpPs...sthlsLcssWuplSptlppu ...................................................................................Fsht.ss...tsYss.lsslRppl..tp.hpp........hpt..hsVhs.p.......tssstpahhl-lpsh..........ptp....p.lpLhlchsNLYlsGFh........sssssaapFs................................-hsphhhsss...........pshsl.shsu.s.YssLppsuu............Rps.hpluc.tsLssuh.s.Lhpassss.................................ppstu+......ullt.hlphsuEA....hRFp..Ipcphcpsh...............tpsp..shshs...st...t.ls..hp.s.WuclSpsl................................................ 2 2 17 64 +4266 PF04957 RMF Ribosome modulation factor Bateman A anon COG3130 Family This protein associates with 70s ribosomes and converts them to a dimeric form (100S ribosomes) which appear during the transition from the exponential growth phase to the stationary phase of Escherichia coli cells. 27.90 27.90 28.20 28.70 27.10 27.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.69 0.72 -4.28 27 514 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 498 3 92 194 24 54.70 67 89.99 CHANGED MKRQKRD+hcRAas+GYQAGlsGRS+EhCPap.sh-s.RspWLuGWREuhpD+hsGh ....MKRQKRDRLERAapRGYQAGIuGRSKEhCPYQ.sLsp.RStWLGGWR-AMtDRss..ht....... 0 17 34 64 +4267 PF04321 RmlD_sub_bind RmlD substrate binding domain Waterfield DI, Finn RD anon COG1091 Domain L-rhamnose is a saccharide required for the virulence of some bacteria. Its precursor, dTDP-L-rhamnose, is synthesised by four different enzymes the final one of which is RmlD.\ \ \ The RmlD substrate binding domain is responsible for binding a sugar nucleotide [1,2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.59 0.70 -5.52 37 4012 2012-10-10 17:06:42 2003-04-07 12:59:11 12 20 2929 21 1194 32857 21491 276.30 30 91.82 CHANGED Mp.lLlTGusGQlGp-Ltcthtt.pshslluh.s+s...........phDlscspultphl.........pph+Pc..lVlNsAAaTAVDpAEs-..-tAhtlNutGsttlAcust.phGs.LlalSTDYVFDGs........tsts.YpEsD.ssPhslYGpoKLtGEpAVhs..sss.cthllRTuWVY.ut...tGp..NFVcTMl+LA..tpccplpVVsDQhGsPThstslAcslhtlhpphhp........hGlaHhsssGt....soWasFAptIhcpsstps........cVpPlsospaPpsApRPt.SsLssp+hptshsh.h.s.WcpuLtchlpphht ..........................................................................................................................................p.lLls.Gu.s.GQ....LG....p......p........L.t.......p...........h.......h......s.........t.......t............s...............p.......h.......l..uh...sts.....................................................ph.D......l...s...s...........p........s.......l......p......p.....h...l.......................................p..p.....h.....p.......P........c..........l......l........l......N....s...A.......A........a.......T.......s.......V.....D.......t........A.....E.........s....-....................c......h....u..............h.......t.......l...........N.....s.......p....u..s...........p........p........l............A..........p..........s.............s...............p...........p...............h...............u..........u...............h.......l..........l...........a....l...S...T..........D.....Y......V.........F.........-....G.p................................t.s..t..s.....a.....p......E......s.........D.........t.........s.........s.........P..........h........s........h..............Y.......G.....p.....o...K....h.......t.......G....E.....p.......t.....l......tp..................hs...........s.......c............h..........h.........I..........l......R........T..........u.....W.......l.....a...ut....................hG..p.......N......F.......l......p.......T.......M.........l........c.....L.u........pp.......+................c...........p..................l......p........V......l........s...........D.........Q........h........G......s........P........T..........h....s......t......s...L...A..c.h.....h.........h.....p.....l....l...p.....p.t...t.t..................................hG.l.YHh..s..s..s..u.t................s..oW....a....-...a.A....p....t....lhc.t.s.sh.t.........................p.l.p...s....l..s..o.....s......p......a.....s.....t..............A........t.....R....P....t........S.....h...L...stp+.....h.p.t.t....h..........s..h...............h........s..Wppultphht...th.................................................................................................................................................................................................................................................. 0 375 753 996 +4268 PF03035 RNA_capsid Calicivirus putative RNA polymerase/capsid protein Griffiths-Jones SR anon Pfam-B_1282 (release 6.4) Family \N 19.50 19.50 21.00 20.70 19.00 18.00 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.72 0.70 -4.55 34 896 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 582 0 0 699 0 183.40 57 98.34 CHANGED MAGAhlAGLAu...DhluuulGoLIsAGANAlNQ+h-a......................c.Npp...........LQpsSFpHDKEMLpuQlpATppLQtchlsl+pulLsAGGFSssDAARuulsAPhT+l.lDWN...GTRaaAPs......ShpTTsaSGpFsssss............p...................ssssohpopST.Soslossstss..................osssSRTosWVpsQN............p.LpPahpuALpTsaVTPPSSp.uSSs.........uoVSTVP+tlLDSWTss.....FNT+RQPLFAplR ........................................MAGAFhAuLAu....sllusulGSLlsAGAsAlNQ+h-F......................-pNpp...........LQQASFQHDKEMLpAQlpATppLQpphhpl+puhLhtGGFStoDAuRu....AlsAPhT+s.lDWs...GTRYaAPs......upsTTh.uGtFoss.s.......................................................utssohhospT.sotluuss.ss..................hssosRTpsWlppQN............pslpPahpGAh.phsaVTPPuSp.uSo..........uo..VSTVPcth..Sa.s......FNTcR.PhFA............................... 1 0 0 0 +4269 PF00680 RdRP_1 RNA dependent RNA polymerase Bateman A anon Pfam-B_32 (release 2.1) Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.51 0.70 -6.25 39 14992 2012-10-02 12:54:00 2003-04-07 12:59:11 15 154 6054 160 0 11808 1 228.60 22 27.06 CHANGED l.....shss+stlptsshcshhssh.....pcPusLshtDPRhssth...........schths+thhph.h.tplssh.hpshpcshstlhphh.shshtphshtpshpsl........hcsLs.......hsTSsGhPYh........ttKK+chhsptsts........................................th...hhhpshphhtsp.hu.thlhhsslKDELRsh-Kl..................................ptsKTRhhpusPlssslss+hthsshssthhpp.shphshsVGhs..pttWsclhtpLsp..uphhhssDaSsFDuoloPhlhssl..hplhpphh........hphhpshhhphlssshthhcsplh..........clpsGhPSGpssTslhNolhs.lhhphshhchhtshphpp..................p.lc......hhs.YGDDhlluhs.phs.h.....hptlpp..............phtc.hGlphT...sDKops.......hpplsFL+Rph..phspsh...hhshhcpcpIhs.lp..Ws+sspspp.......................plpslshthhcs....spchh.p..clpchhtphlpttsht............Phhtphthcah.t ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..h................h.h......t....h........................................................................................h.........p.............h......l..............s....h....h..........h.t..s..h.h..........................h..tG..P..S.G.ssTo..No.hhp...l..h....h....h....h..t.....h.........p.h...h...............................................................hh....uDD.h.................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +4270 PF00978 RdRP_2 RNA_dep_RNApol2; RNA dependent RNA polymerase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_13 (release 3.0) Family This family may represent an RNA dependent RNA polymerase. The family also contains the following proteins: 2A protein from bromoviruses putative RNA dependent RNA polymerase from tobamoviruses Non structural polyprotein from togaviruses 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.43 0.70 -5.80 36 2606 2012-10-02 12:54:00 2003-04-07 12:59:11 16 43 501 0 2 2727 0 250.60 22 29.18 CHANGED psDhsls.lc..csplp.oc.psh..p.....pcshhtPslRous.p.....tRpsThp-s..LhAhpKRNhssPcLpcssshpphup.plscpFh..cshhspchhc........sshhss.tthspahsphpshptttl...ts.shhsL.pt...lshppYpaMlKsDlKshl-sohph..EhsssQTIsaacKhlsuhFuPlFpplscRlhtsLps.+llh.ssh....hss.hhscphphhss...hps......lElDhSKFDKSQschHthsphtlhctLGlss.lhsh.Wp.sh.......ccpohl.....p...Dhps..Glth.l.aQp+o..GsshTahuNT...l...lshshlupsh..s..lp...psphshFuGDDSLl...hshps...ts.spthsohaNhEsKlhc....h....sh....PYFCuKFLl..p..ss..sss.....hh.VPDPlKhlhKLGpcchhc.....phLp-hapShsDth+.a.c.hshhphhphshhchh+h..th.tshtsltphl.tuhstapsh ............................................................................................................................................................................................................................................................................................................................................................................................................................l...hpp...hhshaushh+.ltct.l....t.h..L..s...phhhhssh.........s..h.s.th.h.tsht........t.s.....hEsDastFDpo.Qsph......h.h.t.hEhhlh.......c.t.h..G......h......sp....llc........................h.....................t............h....oG...Th.hNT.........hs.hh....h....h....hh..........ht.........t........h...hh..GDD.hh....................................................................................................................................................................................................................................h.................................................................................................................. 0 0 1 2 +4271 PF00910 RNA_helicase RNA helicase Bateman A anon Pfam-B_11 (release 3.0) Domain This family includes RNA helicases thought to be involved in duplex unwinding during viral RNA replication. Members of this family are found in a variety of single stranded RNA viruses. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.37 0.72 -3.87 99 3937 2012-10-05 12:31:08 2003-04-07 12:59:11 17 83 1135 0 26 5004 845 96.30 36 6.53 CHANGED lhlhGss.GsGKShhsph.lhptlh...t..................p.sslYs.tsssscaa.sGYp..tQslslhDDhspss.ss....p..htthhpllsossa.lsMAslccKu.h.FsSphllsooNh .....................hlpGsP.GsGKShhssh.luptls.....................................t.s.sss.Y.......hs...s...c.s...ca.....a.....D.....G....Yc......t...Q.t...V..Vlh.DDh..s..pss...ss..........p...Dhphht..ph.l........soss...ahs.shAul.Ep.....KG...h.Fs.Sc.hllsooN...................................................... 0 12 14 23 +4272 PF00940 RNA_pol DNA-dependent RNA polymerase Finn RD, Bateman A anon Pfam-B_1108 (release 3.0) Family This is a family of single chain RNA polymerases. 22.00 22.00 22.10 24.60 20.80 21.90 hmmbuild -o /dev/null HMM SEED 405 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.45 0.70 -6.00 94 680 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 462 20 294 684 441 385.90 35 41.81 CHANGED u+sL.GppGLcWLKlalANla....GhDK.hSh.........p-Rlpasc..cp..h..................cpIhcsA........cs.................................................................................Plpt....p.......aWhp......A-cPaQhLAsChElpps....h.....cpsss..ppa.....hSplPlaQDGoCNGLQHYAALGtDhhGAppVNLhP...u-..c....PpDlYotVuphVpctlp.....p-.................................................stpspphuphl..pst......lsRKllKpoVMTpVYGVThhGuppQlpcpLppht...p....................................................................................................................................tt.hhpsupYluphlhpulpphFsuAcpI.pWLspsAphlsp.ssps................................................VhWsTPhGLPVsQsY+...ctppp.plpos....lpshshpps.stsss..........sppKQt.................sAhsPNFlHSLDAoHhhhouh..pstc.......tslsFuuVHDSaWTHA...........ssl-p.MspllR-pFlclaup.s.llpcLtppappphtp................................................................................................................................................................................hphsslPppG.......s.hDlp...........plhcSpYFFs .....................................................+.L.G..pGl.pWLKlHluNhh..........G..h.cK...hsh.........pcRhtasp..pp..h....ppI.hc.SA..pp.................................................................................Plps..pp.........WWhp.....A-cP..aQhLAsChEltpu....h.....cp.s.s.P..tpa........hSplPlaQDGoCNGLQHYAALGtD.hGAptVNLhs..............u-.p....PtDlYstluph.Vpphhp........p-..............................................................tttst.h...Aphl........tshlsRKllKpoVMTsV.YG............VT....hhGupp..Q.lt.cp.Lpph............................................................................................................................................tt.hhtsupYlsph.hhpu...ltphFpuu..ptl....pW...L.s.p.sA.ch.......lsp.p.ps...............................................................................................................................VhWsTP.l.GlPVhQs.Yp..p.t....p.p....tl.pss..........h.ps.h.hh.p....t..t.h...............................stpKQt............................................suhsPNFlHSLDuo...Hhhhoul.tstc................tsl.s.Fs.u.VHDSaWTHA...........sslsp.hsp.llR-pFlplasp...sllppLh.pph......tph.t............................................................................................................................................................................................................................h..hPtp.G.......s.h-lpplhpS.YFF...................................................................................................................................... 0 102 176 254 +4273 PF03118 RNA_pol_A_CTD Bacterial RNA polymerase, alpha chain C terminal domain Finn RD, Bateman A anon Pfam-B_172 (release 3.0) Domain The alpha subunit of RNA polymerase consists of two independently folded domains, referred to as amino-terminal and carboxyl terminal domains. The amino terminal domain is involved in the interaction with the other subunits of the RNA polymerase. The carboxyl-terminal domain interacts with the DNA and activators. The amino acid sequence of the alpha subunit is conserved in prokaryotic and chloroplast RNA polymerases. There are three regions of particularly strong conservation, two in the amino-terminal and one in the carboxyl- terminal [2]. 23.80 23.80 23.80 24.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.94 0.72 -4.56 121 5961 2012-10-03 02:11:09 2003-04-07 12:59:11 10 30 5358 22 1041 3553 2145 65.70 44 20.39 CHANGED ppp.p..p...phhphsI--L-LSVRuhNCLK+ssIpTlu-Llphocp-Lhcl+NhG+KSlcE...IpctLpc ..........................c....php.lLhhsl--..L.-.Lo..VRShNCLKp..t.sIphluD...Llp..+...oE....s...-....LhKscNhG+KSLpE...lKchLt................ 0 373 707 891 +4274 PF05066 HARE-HTH RNA_pol_delta; HB1, ASXL, restriction endonuclease HTH domain Bateman A, Aravind L, Iyer, LM anon COG3343 Family A winged helix-turn-helix domain present in the plant HB1, vertebrate ASXL, the H. pylori restriction endonuclease HpyAIII(HgrA), the RNA polymerase delta subunit(RpoE) of Gram positive bacteria and several restriction endonucleases [1]. The domain is distinguished by the presence of a conserved one-turn helix between helix-3 and the preceding conserved turn. Its diverse architectures in eukaryotic species with extensive gene body methylation is suggestive of a chromatin function. The genetic interaction of the HARE-HTH containing ASXL with the methyl cytosine hydroxylating Tet2 protein is suggestive of a role for the domain in discriminating sequences with DNA modifications such as hmC [1]. Bacterial versions include fusions to diverse restriction endonucleases, and a DNA glycosylase where it may play a similar role in detecting modified DNA. Certain bacterial version of the HARE-HTH domain show fusions to the helix-hairpin-helix domain of the RNA polymerase alpha subunit and the HTH domains found in regions 3 and 4 of the sigma factors [1]. These versions are predicted to function as a novel inhibitor of the binding of RNA polymerase to transcription start sites, similar to the Bacillus delta protein [2,3]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.28 0.72 -3.63 67 1524 2012-10-04 14:01:12 2003-04-07 12:59:11 8 24 1377 1 259 816 13 68.90 34 20.55 CHANGED hoht-sAhpVLcppu....cPhphp-IhcphhcpuLhpht............u+oPtsolsuplhs-hp...........pshFlcl....tsphuLtsh ....................h.ShIElAhslLcp+u.....csMsas-llscI.ps.hhtpp...............spplcspls.pFYT-LN...........hDGpFlslG...-NpWGLRsW......................... 0 72 138 195 +4275 PF04090 RNA_pol_I_TF RNA polymerase I specific initiation factor Wood V, Finn RD anon Pfam-B_43469 (release 7.3); Family \N 19.90 19.90 19.90 19.90 19.30 19.80 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.48 0.71 -5.04 10 106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 102 0 77 105 0 186.60 21 39.59 CHANGED hhhs.lspss+cs+tpFllhspGhEVlP..........sshs-l+sa....pppHIspLssLLHlNlLRcNWslAY+hFsLLIRlPsVDIRsIWsLGlEILsplsppsss................cFh-Whsshao...s+ssFspsssp+.hAPVFRoGSRoHTPhYllo.LWslLlpsp.....................................ascLh-+LuEhlLpPPY.sDutlaFlhuhC+llcAs-LuscF .........................................................................................sp.psh....+hpHlssLoslLHhslhctcaspAhRsaulLl....R......h....p......s....VD....lR..s..h..WulGsEILhptsppsst................................................cahphh.t.has.......tpt...t...............h......................h.h...hh..h...t......................................................................ht.h...htphh...Pa..t..hh.h.u.h.h....................................................................................... 1 20 40 65 +4276 PF01193 RNA_pol_L RNA polymerase Rpb3/Rpb11 dimerisation domain Finn RD anon Pfam-B_172 (release 3.0) Domain The two eukaryotic subunits Rpb3 and Rpb11 dimerise to from a platform onto which the other subunits of the RNA polymerase assemble (D/L in archaea). The prokaryotic equivalent of the Rpb3/Rpb11 platform is the alpha-alpha dimer. The dimerisation domain of the alpha subunit/Rpb3 is interrupted by an insert domain (Pfam:PF01000). Some of the alpha subunits also contain iron-sulphur binding domains (Pfam:PF00037). Rpb11 is found as a continuous domain. Members of this family include: alpha subunit from eubacteria, alpha subunits from chloroplasts, Rpb3 subunits from eukaryotes, Rpb11 subunits from eukaryotes, RpoD subunits from archaeal spp, and RpoL subunits from archaeal spp. 18.90 18.90 18.90 18.90 18.80 18.80 hmmbuild -o /dev/null --hand HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.95 0.72 -4.85 218 7433 2012-10-02 13:35:44 2003-04-07 12:59:11 19 28 6156 190 1663 5625 2992 199.80 31 64.81 CHANGED lphhhpu.stTluNuLR+hLlsphsslslsshpl.p..t....................................................................................................................................................................................................................................................................................................................................................................................................................................ttpp...............................................................................................hhlclpTs.Gsh........sPpcslpp.AhchLtpphp ...................................................................................................................................-PLE+GaGpTLGNuLRRlLLSS.lPGsAlTplcI-GVhHEaSol.GVpEDVhpIlLNlKtlsl+hpsccpphhplphpGsu.loAuDIhhssslchh....................................................................................................................................................................................................................................................................................................................................................................................s.thhhsshsptsphhhthphptthshssstpscpsptslGhl.lDuhaoPlp+lsYpVEssRVtpps...........DhDKLsl-l.Ts..G.ol...................sPc-Alpt.AA+ILt-pl................................................................................................................................................................................................................... 0 593 1066 1406 +4277 PF02150 RNA_POL_M_15KD RNA polymerases M/15 Kd subunit Mian N, Bateman A anon IPR001529 Domain \N 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -8.06 0.72 -4.26 10 723 2012-10-03 10:42:43 2003-04-07 12:59:11 11 10 427 90 486 620 22 35.70 34 27.56 CHANGED lcFCscCsNhLhspsc+psp....tCRsCsYcp.s-s. ...hpFCscCsNhL.h...s..c..cscpsph.......htCpsCsYpp.h............................. 0 142 257 398 +4278 PF01194 RNA_pol_N RNA polymerases N / 8 kDa subunit Finn RD, Bateman A anon Prosite Domain \N 24.70 24.70 24.70 26.90 24.40 24.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.35 0.72 -3.94 49 498 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 450 97 310 402 203 60.50 55 79.07 CHANGED MlIPVRCFoC.G+llu.......chWccYhphlpp.....G.s.........ucsLDcLGl.cRYCCRRMlLoH......VD...lI-cll ...................................MIIPVRCFTCGKVlG.......scW-pYlphlps............shsc..........................................u-ALDsLGL.cRYCCR.RMlLoH......VDLI-clL............. 0 101 176 260 +4280 PF04990 RNA_pol_Rpb1_7 RNA polymerase Rpb1, domain 7 Finn RD anon Pfam-B_288 (release 4.2) Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain, domain 7, represents a mobile module of the RNA polymerase. Domain 7 forms a substantial interaction with the lobe domain of Rpb2 (Pfam:PF04561) [1,2]. 20.80 20.80 21.40 22.10 20.50 20.70 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.89 0.71 -4.45 42 792 2009-01-15 18:05:59 2003-04-07 12:59:11 7 76 658 90 242 733 168 119.00 55 10.85 CHANGED TTL+pVTssopIYYDPDPpsTlIEEDp-aVpsaa.-hPD..t.-.s.....phSPWLLRlELDRcthhDKcL.oMpplup+IppsFu......sD.lplIaoDDN.A-cLVlRlRlhpsp...................................ct..-p-p.....c....-D...hFL+clEsphLsslsLp .................TTLR+VTusTuIYYDP.............DPp.sT...VItEDpEaVslYY.EMPD........hD.so.........+hSPWLLRlELDRK+MsDKKL.TMEpI.A-KIptuFG....................-D.LssIas-sN.....ucch......lhRlRhhttt...................................................t.tpt...........p.....t.....hL+ph.tphLtth.L........................................................................................................................................................... 0 88 139 207 +4281 PF04563 RNA_pol_Rpb2_1 RNA polymerase beta subunit Finn RD anon Manual Family RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). This domain forms one of the two distinctive lobes of the Rpb2 structure. This domain is also known as the protrusion domain [1]. The other lobe (Pfam:PF04561) is nested within this domain. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null --hand HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.23 0.71 -5.40 21 11314 2009-09-13 15:18:01 2003-04-07 12:59:11 10 107 8899 140 1837 8858 4389 301.50 26 35.23 CHANGED sLlc.Qh-SFstFlppsLp.............-hlpp.sslpspst.............phchplphtplpls.cPphs-..........h.Pp-A+hRslTYSutlYVshchphpps...............................tchpppcValGclPlMhco....h.t....c...htp.....tGhFIlNGsE+VllsQ.hhusshhas.cpcpsuhhshssshhophtttpph.tsppsplht+hspstthshhshhhthths.splhhthhhhth.hph.pphthphptpth....................pthhh.spt.shsthttphhphths+cpphttshphhphphh.+lsstpsscptpshhlshhhphLlhhphshtp.DDhDHhuN+RlchsGpLLps.F+hhhp+Lp+sl+pphptshpcs..........hs.pshlpu.psIosslcphhuou ............................................................................................................................................................................................................................................................................................................h...............................................................h....h....t........h........s..h........................................-s...........cs.hsa..ts.lh..h.hth.....t..................................................................................................p.t.h.....h.G.p........hPhMh..................................................................................................................................................GhFllNG.E+lllsQ.hhusshhh..t.tt.t...h........................h....t......h...hh.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....hh..h.hhh.h....h..DDhcphss+Rlc.su-Llp..sp...a..R.....h....u...L.s...R...hp...+........s.V.+c.ch...s...t..t..s.cs.....................................hs.P.p..p...hI.....s.....h....+....s....lsuulKpFhuou......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 649 1155 1555 +4282 PF04561 RNA_pol_Rpb2_2 RNA polymerase Rpb2, domain 2 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Rpb2 is the second largest subunit of the RNA polymerase. This domain forms one of the two distinctive lobes of the Rpb2 structure. This domain is also known as the lobe domain [1]. DNA has been demonstrated to bind to the concave surface of the lobe domain, and plays a role in maintaining the transcription bubble [1]. Many of the bacterial members contain large insertions within this domain, as region known as dispensable region 1 (DRI). 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.11 0.71 -4.88 22 13001 2009-09-11 22:08:18 2003-04-07 12:59:11 9 96 7019 145 2482 9371 4016 126.70 21 18.82 CHANGED psstlhhppchspssh..................thhssshhptputhhphchstpthhhsh.phppcIslhllh+AlGh.sDp-Ilptls..a..............s..hh.hhh.phpct.sh.op-pALphltp.......hht.sc.splptupchl............t.lt+.pls.+lshp.s.csp...ps..lhhhlctLltht.hshtp.DDhDHhuN+Rlc .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t....................s........h.t..........................................................................................................................................t......h...p.l.......................................t..................................................................................................................................................................................................................................................................................................................................................... 0 840 1548 2077 +4283 PF04566 RNA_pol_Rpb2_4 RNA polymerase Rpb2, domain 4 Finn RD anon Manual Domain RNA polymerases catalyse the DNA dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared to three in eukaryotes (not including mitochondrial. and chloroplast polymerases). Domain 4, is also known as the external 2 domain [1]. 21.60 21.60 22.30 21.60 21.50 21.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.98 0.72 -4.16 12 7585 2009-09-11 14:40:16 2003-04-07 12:59:11 8 53 5403 96 610 6531 395 61.70 56 13.64 CHANGED ValNGsllGspcsPccLspplRphRRpGclss..lulhhs.cpp-l+I.TDuGRhsRPLlIV-N ............lFVNGVWlG...V.H.p.....DP..t...pLVpslpcLRR...+..........s.................l......s..........h....EV.Sl..l..R..DI.....R.....-..........RE.....h+IaTDAGRVhRPLFlV-......................... 0 212 356 512 +4284 PF01191 RNA_pol_Rpb5_C RNA_pol_H; RNA polymerase Rpb5, C-terminal domain Finn RD, Bateman A anon Prosite Domain The assembly domain of Rpb5 [1]. The archaeal equivalent to this domain is subunit H. Subunit H lacks the N-terminal domain. 21.30 21.30 21.70 22.10 21.10 21.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.23 0.72 -4.39 64 636 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 504 98 378 576 240 72.50 49 38.79 CHANGED lNlscHpLVPcHplLoc-EtpplLpcYplc.pQ............LP+IhtoDPls+hh......Gh+.GsVl+IhRcS...pTuGchlsYRlVl .........................lNITcHpL.VP.c.HhlLopE.EppcLLp+...Y.cl.+.-.sQ..............................LPRIptsDPVA+Yh.......................Gl++G..pVVKIlRp.S...E.TA.G+YloYRlV............... 0 129 223 317 +4285 PF03871 RNA_pol_Rpb5_N RNA polymerase Rpb5, N-terminal domain Finn RD anon DOMO:DM07083; Domain Rpb5 has a bipartite structure which includes a eukaryote-specific N-terminal domain and a C-terminal domain resembling the archaeal RNAP subunit H [1,2]. The N-terminal domain is involved in DNA binding and is part of the jaw module in the RNA pol II structure [3]. This module is important for positioning the downstream DNA. 21.00 21.00 21.00 21.20 20.80 20.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.06 0.72 -3.58 36 436 2012-10-11 20:44:43 2003-04-07 12:59:11 9 10 315 91 270 395 9 95.30 34 42.69 CHANGED Mssp.......cpht.RLaRshRTlhEMlcDRGYhl..spcElsholcpF+ppasc..............pspRschph.sp.psc...................ssspIaVhFs....-psplGlKsl+sasp.php ..................c...pEhh.RLaRhp+Tlh........p..Mh+DRGYhV..spcEl...s..o...L-cF+ppau-t.t......................pPpRpcLsh.ssps.s-....................sssplaVhFs....--sp....VGlKsl+taspph.............................................................................................................................. 0 97 154 223 +4286 PF01192 RNA_pol_Rpb6 RNA polymerase Rpb6 Finn RD anon Prosite Family Rpb6 is an essential subunit in the eukaryotic polymerases Pol I, II and III. This family also contains the bacterial equivalent to Rpb6, the omega subunit. Rpb6 and omega are structurally conserved and both function in polymerase assembly [1]. 20.60 20.60 20.80 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.43 0.72 -4.22 57 4597 2009-01-15 18:05:59 2003-04-07 12:59:11 17 6 4471 137 1177 2396 1900 54.00 32 56.84 CHANGED cclhshhss+YclshllupRAcp..lp.hsssshl.pcsp....+PlhhAlpElscshhp.pl ..............pchlc+lss+apLVllAA.+RARQ...........lp..t..G...t...s..........sh..lt...tpss.................KssshALcEIt..pshls........................................ 0 378 740 987 +4287 PF03876 SHS2_Rpb7-N RNA_pol_Rpb7_N; SHS2 domain found in N terminus of Rpb7p/Rpc25p/MJ0397 Finn RD, Anantharaman V anon Hand Domain Rpb7 bind to Rpb4 to form a heterodimer. This complex is thought to interact with the nascent RNA strand during RNA polymerase II elongation[1]. This family includes the homologs from RNA polymerase I and III. In RNA polymerase I, Rpa43 is at least one of the subunits contacted by the transcription factor TIF-IA [2]. The N terminus of Rpb7p/Rpc25p/MJ0397 has a SHS2 domain that is involved in protein-protein interaction [3]. 20.90 20.90 20.90 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.02 0.72 -3.85 145 1033 2012-10-02 11:08:51 2003-04-07 12:59:11 12 19 491 64 703 949 87 71.80 24 30.62 CHANGED pchlplsP.phhs...........................................shppslh.......ptLhpphts+.................hstp......h.G..........lllulhcl..pp................lsc....................Gpl..h.sss..GssahpVpachllF .............................t..hlpltP.phhs............................................phppslp.......pp.L...pch.ts.p.................sstp........h.G...........hllulhcl...pp...lsc....................................GhI..h..sGs.....Gh.s......hhp.......V.pachlVF............... 0 224 387 571 +4288 PF03870 RNA_pol_Rpb8 RNA polymerase Rpb8 Finn RD anon DOMO:DM07082; Family Rpb8 is a subunit common to the three yeast RNA polymerases, pol I, II and III. Rpb8 interacts with the largest subunit Rpb1, and with Rpb3 and Rpb11, two smaller subunits. 25.00 25.00 26.50 26.40 24.00 23.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.78 0.71 -3.98 7 366 2012-10-03 20:18:03 2003-04-07 12:59:11 10 4 309 91 251 321 4 131.00 41 90.57 CHANGED -DIFpVpslDP-GKKa-+VSRlpspSpshc.McLhLDINoplYPlthsDphpLslAooL.h.Dss.ssu....pasP..http.o.hsDpaEYlMYGKlY+lE..Ess..tts.+lu.sYsSFGGLLM+LpGctppLpsFclDpplYLLh++ .............................-DhFsVpslDs...pK..aDR............VSRlpspS.p.s.h.c..hplhLDlNs-lYP.lphu...........D.phplsLAoTL..p.D...G...s.s...-su...................taps........ts.pc...o....hADpa-YVM.aGKlY+.hE.......Ess.....tss..pls.sYsSFGGLLMpLpGshppL..p.s..hclDp.lYLLh++................... 0 84 143 210 +4289 PF05158 RNA_pol_Rpc34 RNA polymerase Rpc34 subunit Finn RD anon Manual Family Subunit specific to RNA Pol III, the tRNA specific polymerase. The C34 subunit of yeast RNA Pol III is part of a subcomplex of three subunits which have no counterpart in the other two nuclear RNA polymerases. This subunit interacts with TFIIIB70 and is therefore participates in Pol III recruitment [1]. 25.80 25.80 25.80 25.80 25.50 25.60 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.02 0.70 -5.25 20 415 2012-10-04 14:01:12 2003-04-07 12:59:11 7 7 320 3 292 406 4 252.70 25 92.27 CHANGED MAsss........pls-ltpcla-thhpp...sphhoQp-Lpsh..hspsshsplhsslppLl-ppLlcLlp..pssc.Lta+hlsp--ApK..........hsshus-Eu.LVYshI-uoGscGIWs+sI+s+oN...Lppshlp+s........LKoLEo+p....aIKSVKsVchPs+KhYMLasLpPSc-lTGGsWF..oD.p-LDspFIssltphlhpalspKoh.sh........................................................................tptpthshssshpuYsTstplhpalscsu..los.........VpLopcDIppLl-sLlYDG+lEplpss.....t.hpsh+sshpshhph.p.........................................s.hsssshspsPCupCPVFchC..sssusloPcsChYhccWLp ......................................................................................................................................t....................hht.h......t......thh..p.l.t.............p.t.hh.hhp..Lhp.t.hphhp.........tsst..lhaphht..pp.t.tt...............................hpsh....sspEt.llYphIcpuGppGIWp+sl+h+os...L..p.....lp+h........LKsL.Es.+.p....hIKsl+.sVp............ts..p+KhYhLhsLpPs.plTGGsWa..sD.tph-tpFlp.ltp.hhpalpppt................................................................................................................t..h.....s...o.pplhphlpp.t....lsp....................hpLs.pslppllpsLlhDspl-.hh.s...............hh...t.............................................................................................................................................................s..s.h..hp..hP..Cs..CPlhp.C..t.su.l..s..s.ppC.YhppWh...................................................................... 0 108 172 245 +4290 PF05132 RNA_pol_Rpc4 RNA polymerase III RPC4 Wood V, Finn RD anon Pfam-B_18856 (release 7.7) Family Specific subunit for Pol III, the tRNA specific polymerase. 22.10 22.10 22.60 24.40 21.30 22.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.57 0.71 -4.16 36 332 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 269 0 241 326 2 126.50 25 30.42 CHANGED .csphalhQlPshLPshhsssssp...........................................pptpcsppspppppsspptpssspt.......hppl........pGplGKlhl+KSG+lplclG.slshD.........................lshGssssFLQ-llslss..........tcps................phhsLGplptKhllTP-h ...............................p..tphhhhQhPshLP.h..stt.t.........................................................tt.tt....tt..p.t..pt..ttp.t.stpt................s.sh.pph......................spG.tlGKlhl+KSG+lplpl.........G..s.......lhhD................................................ls....hG.s...ss...sFlQ-llslst.....................cps..............................phhs...LGplpp.+.hlsoPch...................... 1 76 128 199 +4291 PF03431 RNA_replicase_B RNA replicase, beta-chain Finn RD anon Pfam-B_4422 (release 6.6) Family This family is of Leviviridae RNA replicases. The replicase is also known as RNA dependent RNA polymerase. 25.00 25.00 25.00 25.00 23.70 23.70 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -13.04 0.70 -6.30 6 321 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 25 10 0 316 0 252.20 52 80.15 CHANGED lhspsslt.slus-Lh.shs...........ShuhsuhD.ssDsFc.lsYL+sElLoKa-sauhG.-s....culAatKFLsAEpcCtlpNpchahh.sasEp..hShuEusI+hu.RphIu+LLu-.ssh.shhc+CRFSGGAopsssRpautPuhKaAt.....ptslTsRAlcYshAh+cssu...........s-s+hhclsssN+ssTVPKNuKsDRsIAhEPshNMahQhGlGuhIRcRLRphGIDLNDQTlNQcLAppGSlsssLATIDLSuASDSISlcLVc.LLPPcaachLhcLRSshGhl.DG+llpaEKlSSMGNGaTFELESLIFAAlARSlspllthcsuslulYGDDIIlPocssssLh-VhpaVGFpPNp+KTFosG.PFRESCGKHYFpGVDVTPFYI++sIssLsDLhLlLNplhRWuTlsGlhDPRsasVapKYt+hlP+hLptss...PDsYusuAhlstshhsshstp+tasRhhshlhchtR.h...............chs-hhSYha-hhuch.shhcsu...ss.hs.tps.hhh..t.hp+...h....sots.uhhcshssSc ................................................spDL..thh...h.......uhs.sphD.pscsFp.lsYL+sElhoKasshs.u.sp................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +4292 PF01876 RNase_P_p30 DUF53; RNase P subunit p30 Enright A, Ouzounis C, Bateman A, Dlakic M anon Enright A Family This protein is part of the RNase P complex that is involved in tRNA maturation [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.49 0.71 -4.97 62 477 2012-10-03 00:45:34 2003-04-07 12:59:11 11 9 410 3 326 499 10 141.70 25 49.82 CHANGED hsRlslhhsss....h.....tptlsphpp....taDllulpPtspcshphAspsh..c......lDllohshst...+hshhlc+hhhphAhc+Gl.tlElsaushl...................ps..shsRpphlssh..ppllphs+p..pslllSSuApshhplRuPhDlhsLst.hh...G.....lspscucpuls.p. .....................................hpRlslh.hsss...t.t........ttths..phhp....ta..DllAlpPts....pchhphAspsh....c.....................lDllshshst......+hs.h.h.............hc.+hh.l.ph..Alp+Gl.thElsaushl..............................ts..sttRpphls...ss............tplhphs+u...........+slllS..SsAp..p...............h..plRuPhDlhsL......ht.lh.G.....lspppuptuls........................................... 0 104 184 268 +4293 PF01900 RNase_P_Rpp14 DUF69; Rpp14/Pop5 family Enright A, Ouzounis C, Bateman A, Cerutti L, Dlakic M anon Enright A Family tRNA processing enzyme ribonuclease P (RNase P) consists of an RNA molecule associated with at least eight protein subunits, hPop1, Rpp14, Rpp20, Rpp25, Rpp29, Rpp30, Rpp38, and Rpp40 [1]. This protein is known as Pop5 in eukaryotes. 21.50 21.50 22.20 22.00 20.70 20.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.35 0.72 -4.13 65 500 2009-01-15 18:05:59 2003-04-07 12:59:11 14 16 381 7 356 486 8 109.40 22 67.85 CHANGED RYllhcllh....................................sslsp.....................c.slhptl+pulpphaG-hGsups....sstlhshahss.oth.....uIlRssRsthchlhuuLshlsplss...................ptlhlcsltlSG...TI+psccthlch ...................................................RYlhhplhh...................................................plst......................p.slhphlcpultphaG-hGsuts....................t.tl..p..l......hh....h....sstTth............sIlR.sp.+pthchlhuuLsh.lsplps..............................psshhpslplSGTl+pspphhh.......................... 0 109 192 282 +4295 PF00074 RnaseA rnaseA; Pancreatic ribonuclease Eddy SR anon Overington and HMM_iterative_training Domain Ribonucleases. Members include pancreatic RNAase A and angiogenins. Structure is an alpha+beta fold -- long curved beta sheet and three helices. 21.70 21.70 22.10 21.70 21.50 21.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.09 0.71 -3.83 108 847 2009-01-15 18:05:59 2003-04-07 12:59:11 15 3 206 473 241 891 0 119.30 33 78.61 CHANGED ppsshppFppQHls........sss..s..............hCN.phMppt.phhp.t....pCKshNTFlHps..hssVpulCsptsssCp...tppNCapSpsphplTpCplous.st......hssCpYpsoppp+h.lhVAC-sp...........lPVHhD ................................t..othppFt.pQHhs........sss..sp.................hCN.phMpp+.shppt......pCKshNTFlHps..hssVpslC.tp...ts..l.sC+.........ss.pp.NC..apSpsthplTsCclosusp......hPsCpYpss..t.p..p+.a..llVAC-ss...........lPVHhD.......................... 0 15 18 39 +4296 PF00075 RNase_H rnaseH; RnaseH; RNase H Eddy SR anon Swissprot; SCOP and HMM_iterative_training Domain RNase H digests the RNA strand of an RNA/DNA hybrid. Important enzyme in retroviral replication cycle, and often found as a domain associated with reverse transcriptases. Structure is a mixed alpha+beta fold with three a/b/a layers. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -11.40 0.71 -3.81 65 16967 2012-10-03 01:22:09 2003-04-07 12:59:11 19 181 3870 344 1941 16858 2641 121.40 50 20.38 CHANGED sps.hshaTDGSs.tp...stpu...tAGh.lh.pps...............+..pht.tsls.....tsosQp.uElhAlhtALc.s......ts........ppls.....IhTDSpYl......hp.lhp.......................................sh.p.p.tps....lhs......plhphl...hp.ppp.lhltaV.uH.pGh......s.GNchADcLAppusp ............................................................................s..t.shYs.DGAsspp..............sphG.........pA.Ga...Vs...s+G.................................+......pcs..hsLs..........cTT...NQ.+....s.ELp.A...l.hL...A....Lp...D......SG.............................scVN..........Il..T.DSQ..Ys.....lG.....IIp.........................................................uQPs....c...SE.S.........t.lVN............QIIEpL.............I+...KEc...V..Yl.uWV....PA...H..KGI...............................G.GNE....pVD+LVStGI.R.......................................................................................... 0 638 1217 1694 +4297 PF00773 RNB RNB domain Bateman A anon Pfam-B_1009 (release 2.1) Domain This domain is the catalytic domain of ribonuclease II [1]. 20.30 20.30 20.40 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.05 0.70 -5.15 86 6644 2009-01-15 18:05:59 2003-04-07 12:59:11 14 56 4239 12 2000 5433 2047 308.20 33 41.36 CHANGED RpDL..pch........hhh.oIDsts.scD....hDDAl................plcphs.sGt............................aplhVHIADVopalp.s........oslDp.cAtpRus.........................olYLssp.slsMLPppLu..p....slsSLtssp......................................c.Rhshoshhpl.spp.Gp.......l.ts...hchttulIcspt+loYcplpphlpsttpt.......................................tppLptLh....p.luptlcppRhpp.........Gulph.s.hs....-h.................................ph.l.....................sp......pspshshhhhp.....................ps.ucpllcEhMlhANpssApal.......t.pp..thsslaRspstPs.p........th.phhphh.........................ht..ttt...........htphht........psp..p.hl.......phhlh+sh...ppupY........sspsh.............HauLuh....stYo+aTSPlRRYsDLlsHR.LpthLpp ..........................................................RhDL...Rsh...hh.TID..s.ts..u+D......hD..D..A..l.........................................................................hs....c.pht....sGt............................acLhV............t...........IADVohYVp..s..................................osLDp.EAh..pRu.s.............................SVY..h.s.........s.....p...s...l.P...MLPcpLS.......s.............slCSL....p...Pp........................................-..Rhshssthpl..stp...Gp.................................l...hs......hc..hh.......pull....c.........S..p.t..+.h..oYspVpp..hl.p...s..p.pp.ttt.................................................................................................lhtpl.p..Lt....p..ltphLp.p.tR.h.p+.........................Gu.lsF..-..ps...................Eh....................................................................................+hh.l..................................sp................psc....p.lh.pp....................................Rp.uc.+llEE...hMlhAN.ssA..chl.....................p...ct........thss.laR.l...H.p.t.Ps.c...........phpphhphlt................................................hhth.ht.t..................................shpphhpt...............htsps...tth.l....................pph.hhR.sh.....ppAtY....................................................................sscsh..s.......Ha.GLuh....................phY..sHFT......SPIRR.Ys.DLllHRhl+thl..t......................................................................................................................................... 0 677 1244 1691 +4298 PF02508 Rnf-Nqr Rnf-Nqr subunit, membrane protein Mian N, Bateman A anon Pfam-B_1638 (release 5.4) Family This is a family of integral membrane proteins including Rhodobacter-specific nitrogen fixation (rnf) proteins RnfA and RnfE [1] and Na+-translocating NADH:ubiquinone oxidoreductase (Na+-NQR) subunits NqrD and NqrE. 20.70 20.70 20.90 20.80 20.60 20.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.31 0.71 -4.71 128 4543 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 1639 0 935 2710 1506 193.80 38 92.91 CHANGED hp..ph...plhhsulhhsNsllsphLGlCPhLAVosplpsAlGMGlAsshVlshSshhlull+phl..........................................stlRl.saIhlIAuhVpll-hl...lcu......as..sLYpsLGIFlPLIsTNChlL........GRA-hhAp+................pshhtShlDG.lGsGlGFslsLllluulREhlutG..shh....t..................s.sh.pshslhllssG....AFhshuhllt ................................................................................................pchhplhlsslhhNN.sLlp..hLGlCP.hLA.Vo.pplp..sAlG.hGlAsThVlsloshsspLlcpal...........................shl..Rhlsal.hlIAulVph.l-hl...lct.au...sL..Y.ps..LGI...FlPLIs...TNChll........G.tA....hhsp+...................p.shhpShl..G.husGlGaslslllluulRE.hlusus..l..........................stsh...pshsl..sh..lssG........A.Fhuhuhll.h.......................... 1 309 603 792 +4299 PF03259 Robl_LC7 Roadblock/LC7 domain Mifsud W, Bateman A anon Aravind L Domain This family includes proteins that are about 100 amino acids long and have been shown to be related [3]. Members of this family of proteins are associated with both flagellar outer arm dynein and Drosophila and rat brain cytoplasmic dynein. It is proposed that roadblock/LC7 family members may modulate specific dynein functions [2]. This family also includes Swiss:Q9Y2Q5 Golgi-associated MP1 adapter protein and MglB from Myxococcus xanthus Swiss:Q50883, a protein involved in gliding motility [4]. However the family also includes members from non-motile bacteria such as Streptomyces coelicolor, suggesting that the protein may play a structural or regulatory role. 22.20 22.20 22.20 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.46 0.72 -4.42 41 1879 2012-10-02 21:07:43 2003-04-07 12:59:11 12 15 656 43 934 1705 25 90.00 22 65.84 CHANGED hs.hlhphhtpssulpsslllss-Glslst.....shssscscplAAhsuulhuhuctsspphs..ssplcphhlcscpshlhlsssus...tshL.slls ..............................h.phh.p.p.ssG.lptsllls.s..D....G..lhlss.................sh.sp.s..s.u..........-....p..........l.AA.l.s...u...ul.h...uh.ups.....su.......pphs.......t........ss....lp.phhlchcp.....u.h.lh..lhss..uc....tshL.slh.................................... 0 317 624 826 +4300 PF00480 ROK ROK family Finn RD anon Prosite Family \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.99 0.71 -4.45 14 15339 2012-10-02 23:34:14 2003-04-07 12:59:11 15 52 3744 43 2992 10332 2028 179.30 24 53.05 CHANGED ulDlGuT+lthslhs.suplltpcchsT.....ssspthlsslsshlpch.sphs.......ululussGhlspsp......hlshsPp..hshtc.hslhpplcpchslPVhlpNDANsuAhuEphhssupshpshlhloluTGlGuGllhss+lapG.spG.AGElG...Hhhhs.p...........t.hCsCGpp.GClEshASGpAl .......................................................................................................ulDlGuo.p.l...p.....h..u..l....h.....s................s............u........p........l....l....t......c.....p.....p......h..s.ss.............pss....p....p....h...l....p....t....l....h....p.....h..l....p....p....h.h..t..p...ht..........................ul.G.lu.hP......Gh..lc....p.p................sh.l...h...t...s..s.p.................s.a....p.......s....h....s..........l...t....p.....t....L.........p........p........p......h............s............l...........P........l......h...l.p....ND..ss..s...s..A......l...u......E.............h.............h.............h....................G.............s.............u.......p.....s.....h..........p........s.....h.l......h..l.............s.............l.............G............T...G.....l.....G.u..Gl....l...h.......s.G...c.....l.h...p..G...t......ps..tu..G.E.lG.........H.h..h..h..s....t......................................t.....C....C..Gp......h...G..ClEshhut.s................................................................. 0 1016 1988 2532 +4301 PF02027 RolB_RolC Glyco_hydro_41; RolB/RolC glucosidase family Bateman A anon Bateman A Family This family of proteins includes RolB and RolC. RolC releases cytokinins from glucoside conjugates [1]. Whereas RolB hydrolyses indole glucosides [2]. 21.30 21.30 25.80 22.50 18.50 21.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.15 0.71 -5.13 41 119 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 15 5 14 115 0 190.50 20 62.83 CHANGED hspstapsh.slphlpssp.cLctpLppAhpsapshhppslh.htpthh..h..............................t.hs.-hhh.spphhYlYssppphpphhp.s+hlspsu.spullAsslPPYppslohtthhphhNpls.........hsspps.pchsaFlAl.hPsssFhct.phplpstpst.hhhsFaspp..............................ss.hsa-.l...lAhGcshh ......................hshapsh.clphlpstt.cLptpLppAhpsacshhppslh.h.pth.....................................................h.hs.-hhh.stphlYlYssppthpphhp.p+.ls............pss.tpullAsslPPYppslohtphhphhNpls..........sst..psspchsaFlAl.hPossFhch.ph.lpstp.stphhhsaaspp...............................st.hsa-.l...lAhGpshh........................... 0 8 14 14 +4302 PF01815 Rop Rop protein Bateman A anon PSI-BLAST Domain \N 21.20 21.20 21.20 51.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.01 0.72 -4.52 12 216 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 168 35 7 146 14 57.70 73 91.23 CHANGED MTKQEpsALNMAKFI+uQo.LLLLEKLspLD...LDccAs.CE+LHEpAEpLappLusRlt.p-.p MTKQEKTALNMARFIRSQT.LTLLEKLNELD...ADEQADhCEpLH-cA-ELapSh.sRFts-sp.... 0 1 3 6 +4303 PF00980 Rota_Capsid_VP6 Rotavirus major capsid protein VP6 Finn RD, Bateman A anon Pfam-B_1047 (release 3.0) Family Rotaviruses consist of three concentric protein shells. The intermediate (middle) protein layer consists 260 trimers of VP6. VP6 in the most abundant protein in the virion. VP6 is also involved in virion assembly, and possesses the ability to interact with VP2, VP4 and VP7 [1,2]. 25.00 25.00 33.60 40.00 24.50 17.70 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.15 0.70 -5.83 3 1454 2009-09-11 12:38:02 2003-04-07 12:59:11 12 1 356 40 0 791 0 234.90 80 99.79 CHANGED MDVLYSLAKTLKDARcRIVEGTLYTNVuDIIQQsNQlIsTLNGSTFQTGGIGNLPlRNWsFDFGpLGTTLLNLDANYVENARTTIDYFIDFIDuVClDEMVRESQRNGlAPQS-oLRpLAuuKFKRINFNNSSEYIENWNLQNRRQRTGYlaHKPNIFPYsNSFTLpRSQPtHDNlMGThWLNsGSEIQIAGFDSoCAlNAPuNIQcFEHlVpLRRsLoNATloLLPsAPRlopPsVIPoADGtTTWLFNPVILRPNNVpVEFLLNGQlIssYQARaGTlsARNFDSIRISFQLlRPPNMTPuVsALFPQAuPFsHHATVGLTLRIESAoCESVLuDANEshLSIVTulRQEYAIPVGPVFPPGMNWTELLoNYSsSREDNLQRlFTVASIRSMlI ...........................................................................................................................................................................................................................................................sstTTWaFNPlIL..RPNNVEVEFLLNGQIINTYQARFGTIlARNFDTIRLSFQLMRP..P..NMTPAVsALFPQAQPFp.HHATVGLTLRIESAVCESVLADAsEThLANVTuVRQEYAIPVGPVFPPGMNWp............................................ 0 0 0 0 +4304 PF01525 Rota_NS26 Rotavirus NS26 Bateman A anon Pfam-B_762 (release 4.0) Family Gene 11 product is a non-structural phosphoprotein designated as NS26 [1]. 21.10 21.10 21.50 21.10 20.70 21.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.45 0.70 -4.55 6 556 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 255 0 2 356 2 185.60 87 99.47 CHANGED MSDFGIN...LDAICDNV++spSsScTuSQlSNRSSR+MDFV..D-EELSTYFNSKu..SVTQSDSCSNDLssKaSIIoEAVlCDESAHVSADAlQEK-Eo....lsQlDaslMKWMhDS.DGIslNGGlNFo+uKSKsp....................cpEschT-.KScTNl.sasSlsIsSslGtFNPIppplKhEAls-hFEDEDs-sChC+NCPY+EKYhKLRpRMKsVLlDhIsEM ...........MS.LSIDVTSLPSIS.SSIFKNESSSTTSTLSGKSIGRSEQYISPDAEAF.NKYMLSKSPEDI.GPSDSASNDPLTSFSIRSNAVKTNADAGVSMDSSTQSRPSSNVGCDQlDFSLsK.......GIsVsANLDSCI.SISTsp....................KKEKSKKD.KSRKH...YPRIEADSDSEDY..............VLDDSDSDDGKCKNCKYKKKYFALRMRMKpVAMQLIEDL................... 0 0 0 1 +4305 PF02509 Rota_NS35 Rotavirus non-structural protein 35 Mian N, Bateman A anon Pfam-B_1677 (release 5.4) Family Rotavirus non-structural protein 35 (NS35) is a basic protein which possesses RNA-binding activity and is essential for genome replication [1]. 25.00 25.00 120.20 120.00 18.80 18.60 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.02 0.70 -5.71 3 493 2009-09-11 00:14:00 2003-04-07 12:59:11 9 1 240 7 0 353 0 301.10 90 99.67 CHANGED MAELACFCYPpL-cDus+ahPaN+pAIKCMLoAKVDKccpSpaYDTIlYGlAPPPpFKKRFNTs-NSRGMNYETDMYsKVAsLLs-lLNuIKlsp-K.sA-IlusVISVRHLENLlLRIEN+DDILScsscLllKSVLIAhGLlKEsETTsTAEGGEIVFQNuuFTMWKLDYpSH+LMPIhDsNFlEYKITlNccsPIsD+cV+ELlAELRWQYNKFAVITHGKGHYRVV+YSoVANHADRVYuTFKSspK+GssacFN-LDpRlIWsNWtAFluSMpsGsTLEluK+LLFoKMK.sSsoFKGlTT-RKhDEVShlG ...MAELACFCYPHLENDSYKFIPFNsLAIKCMLTAKVDKKDQDKFYNSIVYGIAPPPQF+KRYNTsDNSRGMNYETsMFNKVAlLICEALN.SI.KVTQS-.VANVLSRVVSVRHLENLVLRKENHQDVLFHSKELLLKSVLIAIGQSKEIETTATAEGGEIVFQNAAFTMWKLTYLDHKLMPILDQNFIEYKITLNEDKPISDlpVKELlAELRWQYNRFAVITHGKGHYRVVKYSSVANHADRVFATYKNNsKSGNshDFNLLDQRIIWQNWYAFTSSMKQGNTlDVCKKLLFQKMKQEKNPFKGLSTDRKMDEVSHVG.... 0 0 0 0 +4306 PF00981 Rota_NS53 Rotavirus RNA-binding Protein 53 (NS53) Finn RD, Bateman A anon Pfam-B_1048 (release 3.0) Family This protein is also known as NSP1. NS53 is encoded by gene 5. It is made in low levels in the infected cells and is a component of early replication. The protein is known to accumulate on the cytoskeleton of the infected cell. NS53 is an RNA binding protein that contains a characteristic cysteine rich region [1]. 22.10 22.10 24.10 24.10 19.60 19.00 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.89 0.70 -5.84 14 573 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 251 0 0 494 0 455.80 70 99.18 CHANGED MATFKDACaHY++lsKLNptlLKLGANssWRPuPssKhKGWCLDCCQaT-LTYCpGCoLaHVCQWCsQYsRCFLDsEPHLLRMRTF+ssITKEDLpsLIsMYshLFPINc+IVsKFhsslKQ+KCRNEahhpWYNHLLhPITLQALslcl-.sclYYIFGYYDsMsptNQTPFpFVNhIshYD+LLLDDlNFcRMuhLPssLQppYAhRYFSKSRFlSpph+plphSDFScphlp.spcsPspthplhRNsop.......hcWNcpCc....LlpstpsYhshhpTSapEpYsVSspshhasptKLphlS+hhKPNYlsSNHtpsAocV+sCKWCslsssapsWpDFRl+clYNslhsFIRALhKSNsNVGHCSSpEplYthl+slhhhspscpaspolpplFshL-PV-lssscYlLhsaplsa-lhsllhpslpsc.lPpILols-htsIlpuII.cWFDlchMRchPhsstoTscL+cLpccscLs-EYDhhlSDsE ....MATFKDACYaYKRINKLN+sVLKLGVNDTWRPSPPTK..YKGWCLDCCQHTDLTYCRGCTMYHVCQWCSQYsRCFL..D..s-PHLLRMRTFKNEVTKsDLhNLIDMYshL....F....PINp+IVsKFIssTRQHKCRNEChsQWYNHLLMPITLQSLSIELD.GDVYYlFGYYDsMpslNQTPFSFsNLlDhYDKLLLDsVNFsRMSFLPssLQQEYALRYFSKSRFISEp.RKClsD.HFStNVlE.NLHNPSFKlQITRNCSE.....hSs-WNtACK....LVKshssYFslLKTSHlEFYSlSTRCRhFTQaKLKlASKhIKPNYlTSNH+TsATEVHNCKWCSINNuYpVWNDFRlKKIYDNIFNFLRALVKSNsNlGHCSSQEKIYEalcDVLsVCD-E+WKhuVscIFNCLEPVELssVcYVLFNHElNWDVINlLVQSl.GK.VPQILTLsDllhIhpSIIYEWFDIRYMRNTPMTTFTlDKLRpLpTtsKTV.-YDSGISDVE................................................................................ 0 0 0 0 +4307 PF04866 Rota_NS6 Rotavirus non-structural protein 6 Mifsud W anon Pfam-B_4831 (release 7.6) Family \N 25.00 25.00 45.80 45.80 19.10 17.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.30 0.72 -3.98 12 253 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 198 0 0 89 0 91.00 90 99.27 CHANGED MN+L.QRQLFLENLLVGVNSTFHQMQKHSINTCCRSLQRILDHLILLQTIHSPAFRLDRMQLRQMQMLACLWIHQ+NHDLQsTLGAIKWISP .MNRLLQRQLFLENLLVGVNShFHQMQKHSINTCCRSLQRILDHLILLQTIHSPAFRLDRMQLRQMQhLACLWIHpHNHDLQATLsAIpWISP..... 0 0 0 0 +4308 PF01665 Rota_NSP3 Rotavirus non-structural protein NSP3 Bashton M, Bateman A anon Pfam-B_1010 (release 4.1) Family This family consist of rotaviral non-structural RNA binding protein 34 (NS34 or NSP3). The NSP3 protein has been shown to bind viral RNA. The NSP3 protein consists of 3 conserved functional domains; a basic region which binds ssRNA, a region containing heptapeptide repeats mediating oligomerisation and a leucine zipper motif [2]. NSP3 may play a central role in replication and assembly of genomic RNA structures [2]. Rotaviruses have a dsRNA genome and are a major cause cause of acute gastroenteritis in the young of many species [1]. The rotavirus non-structural protein NSP3 is a sequence-specific RNA binding protein that binds the nonpolyadenylated 3' end of the rotavirus mRNAs. NSP3 also interacts with the translation initiation factor eIF4GI and competes with the poly(A) binding protein [3]. 25.00 25.00 53.30 53.10 24.50 24.10 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.84 0.70 -5.45 6 508 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 241 10 0 389 0 266.80 87 89.08 CHANGED EoTQthspSIlNuuF-AAlsussSsL-phGIpYDas-.VhuRl+sKachlhDDSGVpNNhIGKAtTIDQALssKhuSAtRNcNWhTsspTlARLDEDVN+LRhMLSuKGIDQKMRVLNuCFSVpR.PGKSSSII.......................pCT+LhK-KIERGE..lEV-Dphh-.+M-lDs...IDWKu+a-phcp+FpShtthV...........sEKYssWV.....hKA+KlsEsMhsLQ.sIupQQs+IsE...hphaNpKLp.+s..lps+.sShIuulEWhhpShph-D-l+sDhpQphNolsVINshpulD......DlE .........ESTQQMVSSIINTSFEAAVVAATSTLELMGIQYDYNE.VFTRVKSKFDYVMDDSGVKNNLLGKAlTIDQALNGKFGSAIRNRNWMTDSKTVAKLDEDVNKLRMhLSSKGIDQKMRVLNACFSVKRIPGKSSSII.......................KCTRLMKDKIERGE..VEVDD.SaVDEKMEIDT...IDWKSRYDQLEKRFESLKQRV...........NEKYNoWV.....QKAKKVNENMYSLQNVISQQQNQIAD...LQQYCNKLE.sD..LQuKhSSLVSSVEWYLRSMELsDDVKsDIEQQLNSIDlINPINAIDDlE............................. 0 0 0 0 +4309 PF01452 Rota_NSP4 Rotavirus non structural protein Bateman A anon Prodom_2202 (release 99.1) Family This protein has been called NSP4, NSP5, NS28, and NCVP5. The final steps in the assembly of rotavirus occur in the lumen of the endoplasmic reticulum (ER). Targeting of the immature inner capsid particle (ICP) to this compartment is mediated by the cytoplasmic tail of NSP4, located in the ER membrane. 25.00 25.00 98.70 98.50 24.80 24.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.11 0.71 -4.93 5 1516 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 459 20 0 1040 0 169.00 81 99.03 CHANGED MEKLTDLNYTLuVITlMNsTL....HNIIp-PGMsYFPYIASVLTVLFThHKASlPTMKlAh+TSpCSYKVIKhVVVTIFNTLLRLuGYK-plToKDElEpQhsRIVKElRcQLcMIEKLTTREIEQVELLKRIYDhLhspsssEIDMSKETN+KsaKTLc-Wu.sKcPY-PT-VlA.s ......M-KLsDLNYTLSVITLMNDTL....HoIIpDPGMAYF...PYIASVLTVLFTLHKASIPTMKIALKTSKCSYKVIKYCIVTIlNTLLKLAGYKEQVTTKDEIEQQMDRIVKEMRRQLEMIDKLTTREIEQVELLKRIHDpLIsRPlDhIDMoKEFNQKNlKTLDEWE.SGKNPYEPpEVTAS.. 0 0 0 0 +4310 PF05087 Rota_VP2 Rotavirus VP2 protein Moxon SJ anon Pfam-B_6280 (release 7.7) Family Rotavirus particles consist of three concentric proteinaceous capsid layers. The innermost capsid (core) is made of VP2. The genomic RNA and the two minor proteins VP1 and VP3 are encapsidated within this layer [1]. The N-terminus of rotavirus VP2 is necessary for the encapsidation of VP1 and VP3 [2]. 25.00 25.00 63.80 63.70 17.90 17.40 hmmbuild -o /dev/null HMM SEED 887 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.69 0.70 -13.64 0.70 -6.62 3 539 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 239 6 0 403 0 737.00 88 99.98 CHANGED MAYRN...+RcpNopppDscpEK-sEpQ-..........-K-+pELKEKVhDKK-sVlTD..Ds.pDhK-psss-NLKlsDpVKcSpKE-SKQLlEVLKTKcEHcKEIQYEILQKTIPSFpPcEoILKKLcDIKP-.AKKpsKLFRLFEPKQLPIYRANGEKELRNRWYWKLKKDDLP-GDYDVREYFLsLYsQVL-EMPDYlLLKDMAVENKNSRDAGKVVDSETApICDsIFQDEETEGsVRRFIADMRQRVsAERNTVcYPAILHPIDYEFNcYFLpHQLIEPLTN-lIaNYIPERLRNDPNYILNMDsNLPoTARYIRPsLLQDRLNLHDNFESIWDTlT+ANYVLARSVVPDLKELVSTEAQIQKMSQDLQLEALTIQSETQFLTGINSQAANDAFKTIIAsMLSQRTISL-FVTSNYMSLISuMWLMTIVPo-MFIRESLVACQLAVINTIIYPAFGLQ+MHYpNGD.RRPFpIAEQQIsNFQVpNWLHFVNsNQFsQVVIDGVlNQsLNDsIRsG+IINQLMEALssLSRQsFsTYPlDYKRSVQRGILLLSNRlGQLVDLTRLlsYNYETLMACITMNMQpVQTLTTE+LQLTSVTSLCMLIGNsTVIPEPpTLFHYYsoNVNFHoNYNERINDAVAIIsAANRLDLYQKKMKuIVEDFLKRLHIFDVsKVPDDQMYRLRDRLRpLPVERRRlDVFsIILNNMDQIERASDKIAQGVIIAYR-MpL-YDEhYGaVNlARDlNGFQQINLEELMRTGDYuQITNhLLNNQPVALVGAIPFVTDSSVISLIAKlDATVFAQIVKpRKVDTLKPILFKINSDSNDFYLVsNYcWVPTSTTKVYKQVPQQFDFRsSMHMLoSNLTFTVYsDLLsFVsADTVEPINAVAFDNsRIMQEL ...................................MAYRKRGs+..REs...Q.Q..NERLQE.KElEps..sDs.M............p.psNN+.K.QQL.SDKVLSQKEEIITD..............sQDDlKIADEVKKSS.KEESKQLLEILKTKEDHQKElQYEILQKTIPTFEPKESILKKLEDI+PEQAKKQhKLFRIFEPRQLPIYRANGEKELRNRWYWKLKKDTLPDGDYDVREYFLNLYDQILIEMPDYLLLKDMAVE..NKNSRDAGKVVDSETAsICDAIFQDEETEGslRRFIA-MRQpVQADRNlVNYPSILHPIDaAFNEYFLpHQLVEPLNN-IIFNYIPERIRNDVNYILNMDhNLPSTARYIRPNLLQDRLNLHDNFESLWDTITTSNYILARSVVPDL..K..E..LVSTEAQIQKMSQDLQLEALTIQSETQFLsGINSQAANDCFKTLIAAMLSQRTMSLDFVTTNYMSLISGMWLLTVlPNDMFlRESLVACpLAIlNTIVYPAFGMQRMHYRNGDPQTPFQIAEQQIQNFQVAN.WLHFlNNNpFRQVVIDGVLNQsLNDNIRNGpVINQLMEALMQLSRQQFPTMPVDYKRSIQRGILLLSNRLGQLVDLTRLLuYNYETLMACITMNMQHVQTLTTE+LQLTSVTSLCMLIGNsTVIPSPQTLFHYYNVNVNFHSNYNERINDAVAIITAANRLNLYQKKMKuIVEDFLKRLpIFDVsRVPDDQMYRLRDRLRLLPVEhRRLDIFNLILMNM-QIERASDKIAQGVIIAYRDMQLERDEMYGaVNIARNLDGaQQINLEELMRTGDYuQITNMLLNNQPVALVGALPFlTDSSVISLIAKLDATVFAQIVKLRKVDTLKPILYKINSDSNDFYLVANYDWlPTSTTKVYKQVPQ.FDFRsSMHMLTSNLTFTVYSDLLuFVSADTVEPINAlAFDNMRIMNEL... 0 0 0 0 +4311 PF00639 Rotamase PPIC-type PPIASE domain Bateman A anon Prosite Domain Rotamases increase the rate of protein folding by catalysing the interconversion of cis-proline and trans-proline. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -10.28 0.72 -3.25 87 5141 2012-10-02 13:30:10 2003-04-07 12:59:11 16 48 2764 71 1422 7342 3338 93.40 30 34.16 CHANGED HILltst..................tp.tts....cppupplhpplpsGt.....s.FsplApphS.Dsso...uppGGcLG..h...hsts.......phssp.Fpcslhshp...Gp.lS.tPl.....co..phGaHllcltc .................................................................................................HILlp....................................t.tts.......cpphpplh....ppl...psGt........................s.Fuc.lA+..pa..S....-...s.s.S........up..p..G.....G-......LG......h...........h.p..s.......................ph.s..st....F....c.....c....s.....l..h.s..hp........h.....Gp.....lo..sP..l...............+o....p.h.G..aHllcl..t.............................................................. 0 466 906 1210 +4312 PF03428 RP-C Replication protein C N-terminal domain Finn RD, Bateman A anon Pfam-B_4463 (release 6.6) Domain Replication protein C is involved in the early stages of viral DNA replication. 22.90 22.90 23.00 23.60 22.60 22.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.85 0.71 -4.82 52 484 2012-10-04 14:01:12 2003-04-07 12:59:11 8 4 191 0 102 471 13 159.10 41 44.88 CHANGED RshThuhltuQhtuppht.ssts.lsKWclaRplscA+shlGlsDRuLAVLsALLSFaPcscL..spcssLlVFPSNtQLuhRApGMusuTLRRHLAsLV-AGLIhR+DSPNGKRYAR+spsGplppAaGFDLuPLluRApElpphAppltA-+pth+th+EclTltRRDltKLIpsuh-E ..............................................t.ht.t..st...t........tt..ssKWplhc.lsc.A+shlGls.cRuLsVLsALLoFaPcs..pl.......s............tp.s...........s..l...l.......VFPSNtQLuhRspGhussTLRRHLAhLV-AGLIhR+D.SPNGKRYARRs..p.s.GpltpAFGFsLuPLlsRut....E.lcthAppltsc+tth+phREplTlhRRDItKLIphuh........................... 0 12 46 66 +4313 PF03055 RPE65 Retinal pigment epithelial membrane protein Griffiths-Jones SR anon Pfam-B_947 (release 6.4) Family This family represents a retinal pigment epithelial membrane receptor which is abundantly expressed in retinal pigment epithelium, and binds plasma retinal binding protein. The family also includes the sequence related neoxanthin cleavage enzyme in plants and lignostilbene-alpha,beta-dioxygenase in bacteria. 17.40 17.40 17.90 18.30 16.40 17.30 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.91 0.70 -5.94 151 2113 2009-09-10 15:57:50 2003-04-07 12:59:11 10 23 714 11 903 1992 868 396.50 24 89.26 CHANGED pusassstpEh.sst....thpVp.GpIPspL.sGshhRNG..Ps.h...............ts....hthpHhFDGDGMlpuhpht.......sG...c....sp.apsRalcTptaptEp..psu...c.hlh.u.ht..............................sshps.............................hpssANTsllha...sG..............................+lLAh.hEsu.hPapl-s....toL-TlGh...hs.hs........................stlt.............tshoAHP+hD.PtTG-......lhsFuh......p..s.t.....tshlphhpl........sssG.............t.hhtp.hsh.shs..t..sshhHDFulTcsYslhhp..Plp...hs..thh...hut.......................................pshtacsppssp.htllsR...c....st..........ptl...............phacs.sss.asaHhhNAa....E..t.ts..............cllh-ssph...ssss.hh.............................................tt...t.hp......................tupLtRaplshp...s...............................................................................s.......ps.pp....phls.....pt...sEFPpl...........s.sphsGpp.hRasY.....................................shpsps......................hsulsKh............Dh....ps.......................sp....tp................................haphs.....spasuEPlFVP....ps..............su..........t......tEDDGallshVhct...pp............t...pSp..LlllDAp..shsp..sl..Acl.pLPt.+l..PhGhHGsalss .........................................................................................................................................................................................tp.............h..l.t..Gp.lP............t.l.pG..hhRsG..s.s.h................................th.HhF......D....G.tu..............hlpthphp..........................................sG.......p.sp..atsRalpopt.hhttt...t.s.........hh.t.ht.........................................................................ht...sssNsslhhh.......ss..............................................chhuh...Ess.....ht...lp...........tsLpThuh..........hs..hp..............................................................s.lt...................shsAHP+.h....D..s....sGp.............................hhshsh.....s...................................shhphhhh......s.ps..................................t..hh.p..h.h....h...t...ssh.hHs....au.l..Tcpa..hl.h.p...slhh.p...thh......ts.............................................ht..ap.....ph...sp.hh....lls+......t...........st.........h............................................th...hcs.......sh....hh.hHhh.........NAa.....E.....ctt..................................pl.lh...hsth...ts.....h...........................................................................................................s.lhchhlsht....s............................................................................................s.......th..pt.....p.lt...........pt...shE.hspl........s....ph..hGp.p.h+asY........................................hht..................................sslh.Kh...............Dh...........s................tp........hp..........................................................ha..s.....s.th...suEPhFVP...ps....................................................su.......t.......tEDDG..hlls.hlhst.......tp...................t..p.u..lhll..DAp.....sh.......pt............lAph..pl..P...pl..PhGh..HGhah..t................................................................................ 0 234 534 747 +4314 PF02318 FYVE_2 RPH3A_effector; RPH3A_effect_N; FYVE-type zinc finger Mian N, Bateman A, Eberhardt R anon Jackhmmer:Q13875 Family This FYVE-type zinc finger is found at the N-terminus of effector proteins including rabphilin-3A [1] and regulating synaptic membrane exocytosis protein 2 [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.06 0.71 -4.20 21 739 2012-10-03 17:27:21 2003-04-07 12:59:11 11 21 94 8 344 696 0 106.60 29 14.78 CHANGED SpLTD-EAcHVhpVlQRDhcLRKKEE-RLucLKp+lpcEso++plLusptphs-opCl+ChpPFpFLlNoK..RQClDCchhlCKs.Cupa........sKpEpGWlCcsCphuRll.KhtSLEWaYcsV ..............................................LpctEtchl.pVltRs.phc.phEp.......c.......R..l..t..c.Lpp..c.l.......E.p.h.+.+...th..h............u...p...........t..........p.................s......p...p.....p.........Ch....hCtp...........h....s..hl.hsss........p.CpsCphplC.pp..Cssh............................pppcthWlCslCpc.p..ppl.hhtoGtWFap....................... 0 57 84 185 +4315 PF04390 LptE DUF532; RplB; Lipopolysaccharide-assembly Kerrison ND anon COG2980 Family LptE (formerly known as RplB) is involved in lipopolysaccharide-assembly on the outer membrane of Gram-negative organisms. The lipopolysaccharide component of the outer bacterial membrane is transported from its source of origin to the outer membrane by a set of proteins constituting a transport machinery that is made up of LptA, LptB, LptC, LptD, LptE. LptD appears to be anchored in the outer membrane, and LptE forms a complex with it. This part of the machinery complex is involved in the assembly of lipopolysaccharide in the outer leaflet of the outer membrane [1]. 23.30 23.30 23.40 23.60 23.00 23.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.01 0.71 -4.48 162 2176 2009-11-10 14:17:04 2003-04-07 12:59:11 7 3 2150 4 501 1407 1106 153.80 20 84.72 CHANGED hhhhslhlu...uC...Gap.hssh.sh...................shthpslhlpss......pst..s.lpptlpcpLttsssthhsst...............hhLplphp.shsppshshs...tsu......ps..scaplshslpaplt...phsss.....phl......shs...sppsas..hs........ss..sl......upptpcpt..hhcchtpphAppllpclst ................................................h.hhhslllu....uC.....G.a+hpsss.th.........................s.phps.hhlsss...........cs.......s.lsctl....cppL...p....ts.s.sphlspp.pt.................spLclttsshsppshol.......psG....................ps...sEa..plhhslshplh......hssp......chh..........sho........sh+sa......s.............sp....sl..........Aps..sp....pch.....lh..p.ch.tccsucQllppl.................................................... 0 147 308 412 +4317 PF04032 Rpr2 DUF363; RNAse P Rpr2/Rpp21/SNM1 subunit domain Kerrison ND, Finn RD, Mistry J, Wood V anon COG2023 Family This family contains a ribonuclease P subunit of humans and yeast. Other members of the family include the probable archaeal homologues. This family includes SNM1 [2]. It is a subunit of RNase MRP (mitochondrial RNA processing), a ribonucleoprotein endoribonuclease that has roles in both mitochondrial DNA replication and nuclear 5.8S rRNA processing. SNM1 is an RNA binding protein that binds the MRP RNA specifically [2]. This subunit possibly binds the precursor tRNA [1]. 22.60 22.60 22.70 22.70 22.50 22.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.31 0.72 -4.11 86 508 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 375 5 347 512 76 95.70 24 51.13 CHANGED p.RlpaLhphAp............................................................................hst.sspl...u+pYlphhcplup+tplc.ls.p..h...KRphC+cCpshLlPGhssclRlp............................................................psh...llh......pChpCup ...................................................................................................................RlpaLhphAp............................................................................h....t.t..ts.s...pl....uchYsphhcplu..p....+tpl...+...ls.p........h...K.Rp....hC+..pCsshLlPGh.s.s.p.l+lc.............................................................................................................t..tph.......lsh........pChpCs...................................................................... 0 97 190 288 +4318 PF01765 RRF Ribosome recycling factor Bashton M, Bateman A anon Pfam-B_949 (release 4.2) Domain The ribosome recycling factor (RRF / ribosome release factor) dissociates the ribosome from the mRNA after termination of translation, and is essential bacterial growth [1]. Thus ribosomes are "recycled" and ready for another round of protein synthesis. 22.80 22.80 23.00 25.70 21.70 22.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.75 0.71 -4.78 133 4805 2009-09-17 04:39:55 2003-04-07 12:59:11 14 8 4668 22 1216 2907 2488 163.70 46 86.18 CHANGED shcp-lsplRT.GRAssulLDslpV-hY...G.s.s....PLsplAslols-sRplhIpPaD.pshlpsIE+AIhs.u....sL.GlsP.p.sDG...phIRlslPsLTEERR+-llKts+.chuEcuK......lulRNlRRDu.cpl.Kchpcpt...loED-h+chpcclQKlTDcalpclDchhppKEKElh ........................................hcccluplRo...G.R.As.s.ulLDtlpV-Y..Y..................G..ssT...........PLsQlAslo..l...s..-uRsLhIs........PaD..+.............o..hltslEKAIhs.S.DL..GlNP....s..s-G..................slIRlslPs.L...TEER.RK-Ls...K.l+.phuEpAK..........VAlRNlRR.DApDpl.KK..hpKct-..IoED-h+c..tp.c.-lQKlTDphlcclDphlssKEpElh................................... 0 421 787 1030 +4319 PF00076 RRM_1 rrm; RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) Eddy SR, Birney E anon Published_alignment Domain The RRM motif is probably diagnostic of an RNA binding protein. RRMs are found in a variety of RNA binding proteins, including various hnRNP proteins, proteins implicated in regulation of alternative splicing, and protein components of snRNPs. The motif also appears in a few single stranded DNA binding proteins. The RRM structure consists of four strands and two helices arranged in an alpha/beta sandwich, with a third helix present during RNA binding in some cases The C-terminal beta strand (4th strand) and final helix are hard to align and have been omitted in the SEED alignment The LA proteins (Swiss:P05455) have an N terminal rrm which is included in the seed. There is a second region towards the C terminus that has some features characteristic of a rrm but does not appear to have the important structural core of a rrm. The LA proteins (Swiss:P05455) are one of the main autoantigens in Systemic lupus erythematosus (SLE), an autoimmune disease. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.06 0.72 -4.34 79 50512 2012-10-02 20:46:34 2003-04-07 12:59:11 17 788 1401 465 29915 55800 1095 67.70 24 23.75 CHANGED laVssL..s.ssoccpLcphFpp.aGtl.hshplhtc.....ptpscGauFVpFpspcsAppAlpthsu.pplpu+plp .................................................laVss.L........s..............p.....s.....s........c....p........p..........L......c........p......h....F.........p........p....a...........G..........p.............l.....t...........p..............s..........c................l............h............p...........c...................s.............s........p...........s............+...........G...............a...............u........F.......V...........p.......F........p...........s........t.......p.......s.......A...p..p.......A....l..p..t...h..ss..t.l.tsp.h................................................ 0 9087 14852 22632 +4320 PF00398 RrnaAD Ribosomal RNA adenine dimethylase Finn RD anon Prosite Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.48 0.70 -5.36 20 6621 2012-10-10 17:06:42 2003-04-07 12:59:11 15 22 4983 44 1737 10162 4608 241.80 28 87.89 CHANGED tsct+pphGQNFLpsppllpcIlcpsslp......pspsVlElGsGpGtlTptL.schs..cpVsulElDs+LschLpccht....psslpl.lppDhhpashPp........................p.atlsuNlPY.sIoosIlcpllp.......ttph..psshlllppphA++hhut.s..hhspLolhhpshs-hphltclspp..................................hFcPsPpVDS...ullclcR+spshh.s.tshppacshlcphhstcspsLtsslpphhstpphpthhc...tlspsshlsthshsphhshaphhsp .................................................................h....pKphGQ.N.FLh.D...p..l...l..........p...p.I.l.p.sh..s..hp..................ps.s..s..l.lEI..GP...GhGu...L..T.p...L....hc..p.s............pp...lh....u...l.....E....l..D....p.......c.......L.....h.......s..h..L...p...c.pht...................h..s..p.....l...p..l...l..p...t...D....s...L...c...h..s..h...sp.......................................................................................................................tt.h...p..l.l...u...N.L......P......Y....p.....I..o....o..P....l..l.h..+l.lp...............................th......h.p.s...h.hlM...h.Q+..E.V......u..cR..l..s..A...t.P..s...s......+..t.Yup.Lolhh.p.h.h.s............p.s.ph.lh.pVs.p....................................................................sFhPs...P.pV-S..................ull...+..........lh.........+.....t.............p...................................................s.........p.........s.......p.h.h.......................p.....ls...ptsFs..p.RRKs.....l....tss...L.......t.......t.......h.....h.....s.....t.....t...........h.....t.....t.....................................t......lt.....p.......p..t..thh........t..................................................................................................................................................... 0 583 1087 1443 +4321 PF04353 Rsd_AlgQ Regulator of RNA polymerase sigma(70) subunit, Rsd/AlgQ Mifsud W anon COG3160 Family This family includes bacterial transcriptional regulators that are thought to act through an interaction with the conserved region 4 of the sigma(70) subunit of RNA polymerase. The Pseudomonas aeruginosa homologue, AlgQ, positively regulates virulence gene expression and is associated with the mucoid phenotype observed in Pseudomonas aeruginosa isolates from cystic fibrosis patients. 28.00 28.00 29.50 56.20 27.60 27.30 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.85 0.71 -4.51 45 788 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 784 1 95 305 19 150.50 57 94.34 CHANGED MLpphcpApE+WGGspclID+WLppRQpLlVpYspLsultsh....tsspphss.pplpsFCphLVDYlSuGHFEIY-plhpcscthupp.uhclApplaP+IptoT-ssLsFND+Ysp.tpts..pltslsp-LSpLGEhLEpRF-LEDpLIchLass ....................MLNQL-NLTERVtGSNKLVDRWLcsRKHLLVAYasLVGIKPu....KEuahpLNEKALDcFCQsLV.DYLSAGHFsIYERIl+KlEGsG....pLscAuKIaPpLEsNTQpIMDaaDsslE.sAIDcDNhLEFQQsLSDIGEALEARFsLEDKLIhLl....t... 0 13 31 65 +4322 PF03873 RseA_C Anti sigma-E protein RseA, C-terminal domain Finn RD anon DOMO:DM07070; Domain Sigma-E is important for the induction of proteins involved in heat shock response. RseA binds sigma-E via its N-terminal domain, sequestering sigma-E and preventing transcription from heat-shock promoters [1]. The C-terminal domain is located in the periplasm, and may interact with other protein that signal periplasmic stress. 21.50 21.50 21.80 21.90 20.60 18.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.61 0.72 -4.08 40 851 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 844 4 108 351 9 55.60 58 26.46 CHANGED sphPVLpTlPhsGsusPVSh.........stst.......ppssspp.QlpcQc+RIsAh..LQ-apLQpRL ................PEoPVFN..TLPhM.G.KASPVSL..........GVPSt....ssssu...pQp..QV.QEQRRRINAM..LQDYELQRRL........ 0 10 33 73 +4323 PF03872 RseA_N Anti sigma-E protein RseA, N-terminal domain Finn RD anon DOMO:DM07070; Domain Sigma-E is important for the induction of proteins involved in heat shock response. RseA binds sigma-E via its N-terminal domain, sequestering sigma-E and preventing transcription from heat-shock promoters [1]. The C-terminal domain is located in the periplasm, and may interact with other protein that signal periplasmic stress. 23.20 23.20 23.20 23.20 22.70 23.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.01 0.72 -3.68 35 1145 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1114 6 217 651 141 93.30 42 44.32 CHANGED Mtc..EpLSALMDGEts-p...pllptLspDp-hppoWpsYHLItDslRu-ss....tshphDluu+VusuL-sEPsh.s.............................ppPtPppsc+ts ...........Mt+..EpLSALMDGEslDs..............-LLs..tL..s..cs.s....E.hpc..TWcsYHLIRDsMRG-ss.........pslchDI.o..uRVhA...Al.EpEPs+ts.........................hhscuQPtPpphpp............................................................................. 0 35 97 162 +4324 PF04246 RseC_MucC Positive regulator of sigma(E), RseC/MucC Mifsud W anon COG3086 Family This bacterial family of integral membrane proteins represents a positive regulator of the sigma(E) transcription factor, namely RseC/MucC. The sigma(E) transcription factor is up-regulated by cell envelope protein misfolding, and regulates the expression of genes that are collectively termed ECF (devoted to Extra-Cellular Functions) [1]. In Pseudomonas aeruginosa, de-repression of sigma(E) is associated with the alginate-overproducing phenotype characteristic of chronic respiratory tract colonisation in cystic fibrosis patients. The mechanism by which RseC/MucC positively regulates the sigma(E) transcription factor is unknown. RseC is also thought to have a role in thiamine biosynthesis in Salmonella typhimurium [2]. In addition, this family also includes an N-terminal part of RnfF, a Rhodobacter capsulatus protein, of unknown function, that is essential for nitrogen fixation. This protein also contains an ApbE domain Pfam:PF02424, which is itself involved in thiamine biosynthesis. 23.20 23.20 23.20 23.40 23.10 23.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.67 0.71 -4.59 118 1246 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 1191 0 247 726 78 133.00 33 87.61 CHANGED Vlslcss......hshVc.spppou..Cu.uCs.spsuCGtthlsphhss...ps.tphpl.......tsshshcsG-pVplulsEsslLpuuhllYllPLlsllhuuhluphlhts........-hhshlsulhuhshuahhs+hh...scphtp......p.thpPh..llph.h ............................VluhpsG....pAhVp.s.phcuu..Cu..oCu..u+.....suCGothlschssp........ps..pplhl........sssp.sLtsGpcVElGlsEs.oLLp.SAhLVYhhPLluLhlsAsLh.phLhss..........-lhulhuu.llGshsGFlls+th...u+.+.hst......cspaQPllLpl.h............................ 0 81 160 208 +4325 PF03113 RSV_NS2 Respiratory synctial virus non-structural protein NS2 Mifsud W anon Pfam-B_2717 (release 6.5) Family The molecular structure and function of the NS2 protein is not known. However, mutants lacking the NS2 grow at slower rates when compared to the wild-type. Nevertheless, NS2 is not essential for viral replication [1]. 21.00 21.00 220.50 220.30 18.50 18.00 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.72 0.71 -4.17 2 45 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 12 0 0 37 1 123.40 90 100.00 CHANGED MsTspsc.TsQRLhlsDM+PLSlET.IhSLT+-IITHpFIYLINHECIVRKLsEpQATFTFLVNYEMKLLHKVGSTKYp+YTEYNoKYGTFPMPIFINHsGFLECIGIKPT+pTPlIYKYDLNP MsTTpssTTsQRLMITDMRPLSlETIITSLT+DIITH+FIYLINHECIVRKLDERQATFTFLVNYEMKLLHKVGSTKYKKYTEYNTKYGTFPMPIFINHsGFLECIGIKPTKHTPIIYKYDLNP 0 0 0 0 +4326 PF04479 RTA1 RTA1 like protein Kerrison ND anon DOMO:DM04303; Family This family is comprised of fungal proteins with multiple transmembrane regions. RTA1 (Swiss:P53047) is involved in resistance to 7-aminocholesterol [1], while RTM1 (Swiss:P40113) confers resistance to an an unknown toxic chemical in molasses [2]. These proteins may bind to the toxic substance, and thus prevent toxicity. They are not thought to be involved in the efflux of xenobiotics [1]. 20.60 20.60 21.00 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.42 0.70 -4.87 6 1213 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 150 0 974 1227 0 215.40 25 69.27 CHANGED IPFlhGsIhEhVGalsRshSSpNsschssallQsVhLLIAPsLYAAoIYMlFu+llclhtscshhLhSu+FsTshFVsuDllShlLQAsGGGLhuss....sS....poTGSpLlhAGLhIQlhhauhFlIsphpFhaplttpshahcc..h............................stpW..hahNhsLhluohLIhlRSIVRlVEFlpGa-GaIIoHEaalYlFDulPMhLssllFlls.hhtNlFchpscs.slp. ................................................hhhG..sh...hEhlGahuRhh.s.....t.....p...s.......s.........s....h...s......s.......a.l.h...Q.h..lhlllA...P.s........h...h.....s.....Aul............YhhLuRll.h..h.....h.....s........s..p.....t.....t............h..l.....p.....sph.hohlFlss.Dl.luhl..lQu....s..G.....G..u.l..h..u..s..u....................ss............hp..h....G...pplh.luG..LslQ.l..hh.h...sh...Fh.hh..s..h.h.........Fh..hR....h..t....p.......t........t...h...t....t.h.t.......................................................................hth.......th...h....hh.s.....L......hh.....us..hh.........I..hl..............Rs.......l.a.R....lsE..........h.......s..........t...........G............h....s.............u.......h.............l......hp............c...............E.hhh.hlhDuhhhhlshhhhsh....h...................hhh.......................................... 1 179 455 813 +4327 PF02334 RTP Replication terminator protein Mian N, Bateman A anon Pfam-B_12997 (release 5.2) Domain The bacterial replication terminator protein (RTP) plays a role in the termination of DNA replication by impeding replication fork movement. Two RTP dimers bind to the two inverted repeat regions at the termination site. 21.00 21.00 21.00 23.20 20.80 20.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.72 0.71 -4.15 3 54 2012-10-04 14:01:12 2003-04-07 12:59:11 11 1 52 17 14 37 0 119.40 66 98.83 CHANGED MKEEKRSSTGFLVKQRAFLKLYMITMTEQERLYGLKLLEVLRSEFKpIGFKPNHTEVYRSLHELLDDGILKQIKVKKEGAKLQEVVLYQFKDYEAAKLYKKQLKVELDRCKKLIEKALSDNF ........................EKRosoGFLlKQRAFLKLYhIT.MTEQERLYGLKLL-lLRpEFKshG..a+PNHoEVYRuLHELl-D.GIL...KQlK.sK.K..EGsKhQEVVLYpF.pD...h..Et..AKLYKKQLKsELDRCttLIcKAlpDNF............................................. 0 1 7 8 +4328 PF02382 RTX RTX N-terminal domain Mian N, Bateman A anon Pfam-B_833 (release 5.2) Family The RTX family of bacterial toxins are a group of cytolysins and cytotoxins. This Pfam family represents the N-terminal domain which is found in association with a glycine-rich repeat domain and hemolysinCabind Pfam:PF00353. 19.50 19.50 20.70 21.90 18.80 18.10 hmmbuild -o /dev/null HMM SEED 653 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -13.02 0.70 -6.26 8 309 2009-09-11 00:38:24 2003-04-07 12:59:11 10 16 156 0 5 280 2 490.10 49 61.72 CHANGED pLsplKsplppshpsststL+pAupssKpuLppAupul+suGKKLILYIPKs..Y-utpGNGLpDLVKAA--LGIEVpR-ERNshAlAppshGTscKlLGLTERGlsLFAPQLDKLLQKppKlusslGSou.slupNLuKApTVLSulQShLGosLuGMcLDELL+spp..Gt..Sph-LAKAGlELsNpLV-NIASussTVDuFoEQlspLGohLpNsKuLGulGsKLpNLP..sLupsGhGLDllSGlLSulSAuhlLuNKcAsTuTKAAAGhELoNQVlGNIsKAVSpYILAQRlAAGLSTTGPsAuLIASsVuLAISPLSFLuIADKFcRAKpLEuYSERFKKLGY-GDuLLApFa+ETGsIDASlTTINTsLuAISuGVuAAuAGSLVGAPIuLLVSuITGlISGIL-hSKQAMFEHVAsKlusKIsEWEKKHG.KNYFENGYDARHtAFLEDsh+lLsshNKEapsERlVuITQQ+WDspIGELAGITRpGDKlpSGKAYVDaFEEGKhLcpcsccFsphlhDPtcGpIDlSso..ppoohLpFlTPLLTPGcEpRERpQoGKYEYITcLhVpGhDsWsVsGVtspsulYDaTNLIQ+s......sssssch+Es+IIucLG-GsDpVFlGSGSoplpAG-GHDsVaYsKsDsGtLTIDuT ............................................................................................................................................pthtphhppus.thKpsh.tuupthppuup+LhLhIPcs..Ycs.pGsulp-LlKAA--LGIcl.hp-tsshthspp.h.sssppllGhT-RGlsl.APQLDpLLQKh.....KluptlGuss.slspplsKstolLSslQshhG.sLuGhsLDpLlpp....t..op.-lAKuul-LhNpLVsslus.spol-uFupQlspLGuhlpsh.tLuulGsKLQNLs..sLs.hu.GL-hlSGlLSuhoAuhhLustsApT.upKsAAGhElsspllGNlsKAlSpYIlA...QR.hA.tG..LSoTu..ssAuLIsSsV.LAISPLuFhshADcFp+ActlcpYucRFKKhsY-GDtLLAtah.+poGsIDAulTsIsTsLuuluuGlu.AAusuSLlGAPluhLVuulTGhISsIL-hSKQAMFEHVApKhts+Is.EWEKp.s...KNYFEpGYDARH...A.L..pDshphL.phs+phtsERslhITQQpWDp.IG-LAuIo+p.u-+..SGKuYlshhcpGthlctp....p...hDs.pG.....IDlSss.....th.o.phlhFhTPhhTPGpE.RERhQoGK.EYhTpL.lpth.DpWplp..tstpushDhopllQhh.......t.ps.ph.ph+l.upLGstsD.Va.uuu.........os.l.uGpGaDhV.Ys+.sphGhLslDup............. 1 1 2 5 +4329 PF00016 RuBisCO_large Ribulose bisphosphate carboxylase large chain, catalytic domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain The C-terminal domain of RuBisCO large chain is the catalytic domain adopting a TIM barrel fold. 20.50 20.50 20.50 20.50 19.80 20.40 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -11.91 0.70 -5.58 16 53466 2009-01-15 18:05:59 2003-04-07 12:59:11 15 20 33109 329 419 47715 394 226.70 77 64.51 CHANGED GItVER-+Ls...KYGRPlLGsTlKPKLGLSuKNYGRAVYEsL+GGLDFhKDDENlNSQPFMRWR-RFLaVhEAlp+ApAETGElKGHYLNVTAsTsEEMacRAEaAKE.lGssIIMhDhlsGGaTAssohApWsRcNs..hlLHlHRAhHuslsR.Q+pHGIpFRVlsKhhRhuGuDHlHsGTV.VGKLEGDtthshGFhchLRpshlppDtucGlFFcQDWuuh.ulhPVASGGIHsh+MPuLlchhG.DDsVLQFGGGTlGHP.GssuGAsANRVALEAhVtARNEGR-hs+E..Gs-llRcAAKhss-LpsAh-lW ................................................GIQVERDKLN...KYGRPLLGCTI.KPKLGLSAKNYG.RA.VYECLR.G........GLDFTKD........D.......ENV...N.SQP.F..MR.WRDRF....L.F.sA.EAlaKuQA.E...TGEI.KGHYLNsTAuT.hEEMhcRA...a.A+E.LG..s..sI.lM.h.D.alsG.GaTA.ssolA.hasRcNs....hL...L..H..l..H.RAhHush....s..R.Q+N.HGh.p.F.R.V.l.s.K.hh.Rh.S.GsDHlHu....GTV.VGK.L.EG-...hshGFh.DhLh-sal.-h.s.hs.p...G.laF..s.Q.D..Ws.....S..h..tV.hPVASGGIHshpM.t.Lh-hhG..DDsVLQ.FGGGTlGHP.G.tsGAsANRVAlEuh......h.u........................................................................................................................................ 0 102 241 333 +4330 PF02788 RuBisCO_large_N Ribulose bisphosphate carboxylase large chain, N-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain The N-terminal domain of RuBisCO large chain adopts a ferredoxin-like fold. 20.70 15.00 20.70 15.00 20.60 14.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.59 0.71 -4.44 16 48498 2009-01-15 18:05:59 2003-04-07 12:59:11 11 16 32360 297 249 42335 208 118.10 84 32.42 CHANGED +hsYasP-YssKDTDlLAsFRlTPQsGVsP.EAAAAVAAESSTG.TWTsVWTDLLTshDhY+u+uYcl-PVPGpssQaaAaIAYPlDLFE....EG..SlsNlhTSIlGNVFGFKAl+ALRLEDhRlPhAYlK ...........................................................................+LTYYTP-Yc.T.K.DTDILAAFRV........TPQ..P..GV..P.PE.EAGAAVAAESST.G.TWT..TV..W..T..D.G.L.T.S........LD.RYKGRC...YcI.....E.........P.....V.......s...............G.........E......E..............s...QY....I.....AY.V....AYP.LDLFE............EG....SVTNMFT.SIVGN.VF.GFKALRALRLEDLRIPsAYsK.......................... 0 62 146 202 +4331 PF00101 RuBisCO_small Ribulose bisphosphate carboxylase, small chain Sonnhammer ELL anon Swissprot Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.15 0.72 -4.16 48 1831 2009-01-15 18:05:59 2003-04-07 12:59:11 15 17 903 230 280 1682 177 80.90 41 69.50 CHANGED phpThSaLPsLospplt+QlcYllspGWssslEaspspphh..................spYWpMWpLPhFsspDsupVltElppC+psaPstYlRlluFDsh+..pspslSFIlpRPs ......................h..tshSaLPsLo..-.-QItKQ..lpYhlspGWssslEascc..t..h.................................................spYWphathPh..hs.pp.ttlh.plttshtt..st.t.al+h.uhDs................................................ 0 88 203 253 +4332 PF02915 Rubrerythrin Rubrerythrin Griffiths-Jones SR anon Structural domain Domain This domain has a ferritin-like fold. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.69 0.71 -3.85 38 3551 2012-10-01 21:25:29 2003-04-07 12:59:11 12 31 1616 63 1303 4084 1248 115.30 23 56.86 CHANGED -hLttAhsuEpsupthYpthAcphcpcs...lAclFpthAcsEpcHAthlh+hhpch..sh..s.....hhp.pht.hh..............slctshpsshhpEctuY.hYhcltcchtppptpch......hcphspsEptHtchactLh .................................Lhtu...hsuE.p.p.u.h.p.hY.....t...hh...A...c...t..Ac..c....-..s....tlu...pl.Fp.phAppE.p.pH..Ap..hh..h..c..h....h...p..p...h............................................................................................h.t..................sl..t..t....h..h...t...s...t...h.t.....p...p.....hh......a...h...p....h...u...c...p..........s........t...t...h...t..th................hpthsp..E.ttHtphh.th.h............................................................................ 0 520 931 1151 +4333 PF01330 RuvA_N RuvA; RuvA N terminal domain Bateman A, Finn RD anon Sarah Teichmann Domain The N terminal domain of RuvA has an OB-fold structure. This domain forms the RuvA tetramer contacts [1]. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.72 0.72 -4.18 33 4290 2012-10-03 20:18:03 2003-04-07 12:59:11 16 6 4259 26 913 2706 793 61.10 36 30.56 CHANGED MIshl+Gplsclstshlll-ss.GlGYpltsstsphhphspst..psplasphllREDu....htLYG ....................MIuhl+G.h...l....h.ch.....p...s.....s.....h.....ll..l-ss....GVGYclpss........sshh...pl.......ts........t.....csplaTahlV....REDu..hhLYG............... 0 307 609 779 +4334 PF02075 RuvC Crossover junction endodeoxyribonuclease RuvC Mian N, Bateman A anon IPR002176 Domain \N 21.50 21.50 21.50 21.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.86 0.71 -4.38 9 3205 2012-10-03 01:22:09 2003-04-07 12:59:11 12 4 3142 4 812 2187 1059 147.50 43 83.83 CHANGED IlGIDPGochsGYulIcp.suppLphlssGsIRTsos.sLspRLhslh-ulppllcpapPshhAIEplFhupNssSslKLAQARGslhLAAspctlsVtEYsPppVKpAVsGpGpAsKpQVphMVp+lLsLsscPpP..tDAADALAlAIsH .....................................................ILGIDPG.phsGaGlI..............c.......t......p......G....p.......p.........l.....p...h...lus.....G.s..I....+T........s...s....s..............s..l...s..pRLp......tIasulsclls.p............a.....p.....P.....DhhAI..Ep.l...F....h.s............+......N.....s.s.o...sL.K.LGQARGsAll.A....us.p..ps..L.P......Vh.EY.ssppVKpu..VsGhG.pA-K....pQVp.tMVp.p.....lL.pLs.s..t..P.p..........sDAADALAlAIsH.................................................................................... 0 280 550 699 +4335 PF02042 RWP-RK RWP-RK domain Bateman A, Schauser L anon Pfam-B_9740 (Release 5.1) Family This domain is named RWP-RK after a conserved motif at the C terminus of the presumed domain. The domain is found in algal minus dominance proteins as well as plant proteins involved in nitrogen-controlled development [1]. 21.90 21.90 21.90 23.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.58 0.72 -4.14 23 430 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 55 0 303 406 6 51.00 45 9.02 CHANGED pppplohcplppYFphPlpcAA+pLs...........VssTsLK+hCRchGIsRWPaRKl+SL ..................h..tslolpsLpp..YFp.hslc-AA..+.p.LG..............................V.s.sTsLKRl...C...R........phG.I.sRWPpRKlppl..... 0 101 213 273 +4336 PF01365 RYDR_ITPR RIH domain Bateman A anon Ponting CP (EMBL alignments) Family The RIH (RyR and IP3R Homology) domain is an extracellular domain from two types of calcium channels. This region is found in the ryanodine receptor Swiss:P21817 and the inositol-1,4,5- trisphosphate receptor Swiss:Q14571. This domain may form a binding site for IP3 [1]. 20.70 20.70 21.30 21.40 20.00 20.40 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.31 0.70 -5.10 18 1311 2009-09-11 08:33:13 2003-04-07 12:59:11 16 91 139 8 659 1057 8 191.60 27 11.19 CHANGED tVhplLp-Lltahsssppctp...p.....c..hh+sh+p+QcLhRp.ulhphVhcllptsascp............hs-ppcttap................clhpLsachLpthppGsRpNQthhtcphs........h.th.hs.G.l.h-slpslLhsN.cLhptlpEtt.lcphluLlc+pG..RcschLDhLpslssusspslcssQ..-hIphpllssGc......DlLlphpl+sshsph ..........................................................................ht.LpcLlhhhhs.ppph.......t...p.........+....sh.+p+Qp.LhRp.shh.p..h.Vhcl..l.p...tsht..tp...........................tpp.pp..t..tap..............................................................................clhp.hsa.chL.p.th...........p...........p.....s.s.RpNQ.t...hht..c..p..ls........................h.....t..hs.G........h...h-......s.........h...ps...llhsN.cLh.p.t.l..p...-t.....t..lc...p....h.Vs....hl...cppG.................Rp.sp.......hL.chLpsl.s.s.s.p..s..p..slcssQ....-hlp.pll.s..s.uc......................-lLlphplhts....h..................................... 0 187 233 423 +4337 PF02026 RyR RyR domain Bateman A anon [1] Family This domain is called RyR for Ryanodine receptor [1]. The domain is found in four copies in the ryanodine receptor. The function of this domain is unknown. 27.40 27.40 28.10 27.70 27.10 27.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.17 0.72 -3.96 38 1192 2009-01-15 18:05:59 2003-04-07 12:59:11 11 75 161 22 593 969 18 92.00 37 8.15 CHANGED sasPpPlDhoslsLspcLp..................pls-+hAENhH-lWApc+l................ptGWpYG.....scss+pHPpLVPYspLsEpEKchsRphupEslKslluhGaslppsc.c ..............................................................tapPpPlDhop..lt.Ls.c.Lc...........ths-+lAENhHslWAtc+l................ptGWpYG....hp.....Dc.s..s..+pHPhLVPYspLs-......c........EKphsRp.spEslK.sLl.uhGaplphs.p.................. 0 108 161 353 +4338 PF00575 S1 S1 RNA binding domain Bateman A anon [1] Domain The S1 domain occurs in a wide range of RNA associated proteins. It is structurally similar to cold shock protein which binds nucleic acids. The S1 domain has an OB-fold structure. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.15 0.72 -3.89 51 46862 2012-10-03 20:18:03 2003-04-07 12:59:11 18 199 5058 118 11675 32137 17037 74.50 26 20.26 CHANGED phphGsllpGpVpslsp..hGhhV-ls...shcGhl.hSplstp............hh.psspshphGcclclpllcl-pppppl.LSh+ ...........................t..phGpll..p.G.p.Vp..p.lss.......hGs.F.V...........-.....l........s................s.........h...........-.......G.l......l........+..l.Scl.s.p........................................................hht.p..s...p.......c...h...l...p........h.G.c.cl....c..VpV...l...c.l....-..t....p..p.c...plsLoh.................................................................. 0 3844 7387 9758 +4339 PF00438 S-AdoMet_synt_N S-AdoMet_synt; S-adenosylmethionine synthetase, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold. 21.30 21.30 22.70 21.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.31 0.72 -3.78 115 5500 2009-01-15 18:05:59 2003-04-07 12:59:11 15 15 4837 44 1389 3870 2374 99.60 55 25.69 CHANGED ppaLFTSESVoEGHPDKlsDQISDAlLDAhLppD..................Ppu.......RVACEThloTuhVlluGEl............oop...AhlDh...pclsRcsl+-IGYsp....sp.hGFDhcos.sVhsslcpQSs ............t.phLFTSESVoEGHP.DKluDQISDAlLDA.lLppD..................Pp...u..........RVACET.......hVp....TG..hVlVsGEI......................TTs.......AaVD...l...pcllRcTl+-IGYsp........u.c...hGFDuco...C.uVlsuIscQSP.................................. 0 486 899 1177 +4340 PF02772 S-AdoMet_synt_M S-AdoMet_syntD2; S-adenosylmethionine synthetase, central domain Finn RD, Griffiths-Jones SR anon Prosite Domain The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold. 21.00 21.00 21.10 22.20 20.30 20.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.56 0.71 -4.17 192 5503 2009-01-15 18:05:59 2003-04-07 12:59:11 11 14 4862 44 1376 3875 2680 120.30 51 30.99 CHANGED c...p.GAGDQGlMFGYAssETspLMPhPIhlAH+Lsc+LuclR..K..sG.t...l..........saLRPDuKoQVTlcY....s.........s..+..P.....l+.lcol.VlSoQH.sssls.................cpl+cslhcpVIcsll.Pt..t....h.lcs..c.TcaalNPTG+F ....................p.GAGDQGlMFGYAssE.Ts.......pL.......MPhPIsLAH+....LscRlu-lR..K.....sG...p....L.......................saLRPDuKoQVTlcY....-s........................s.+...P..l+lDTV..VlSTQH...s..t.-.l.s.........................pc.p.l.ccslhEcl...I.+sVl....Ps....p..............h..L..cc.....p...T...+aaINPTGRF............................................... 2 473 885 1162 +4341 PF02773 S-AdoMet_synt_C S-AdoMet_syntD3; S-adenosylmethionine synthetase, C-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The three domains of S-adenosylmethionine synthetase have the same alpha+beta fold. 25.00 25.00 25.00 25.00 23.80 21.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.85 0.71 -4.44 11 5527 2009-01-15 18:05:59 2003-04-07 12:59:11 11 14 4855 44 1384 3894 2871 137.30 65 35.56 CHANGED IGGPpGDAGLTGRKIIVDTYGGauuHGGGAFSGKD.oKVDRSAAYAARaVAKSlVAAGLs+RC.VQlSYAIGVAEPLSIhV-TaGTuc..hopccLlcllRcNFDLRPGsIlKpLDLt+P...IYppTAuYGHFGRpc..FPWE+ .................IGG.PtGD.uGLTGRKIIVD..TYG.Gh..u.+HGGGAFSGKDPo.KVDRSAAYAARYVAKNlVA..AG.LAc+CElQl.............uYAI.GVAc...PlSlhV.-TFG.T........u...+........l.s..-.pp..l....h...c.h...V....+...c..hF.DLRPtuII....c...hLDL.......t.+.P.........IYppTAAYGHFGR..p..c.......hs..WEp....................... 0 483 898 1172 +4342 PF02574 S-methyl_trans Homocysteine S-methyltransferase Bashton M, Bateman A anon COGs Family This is a family of related homocysteine S-methyltransferases enzymes: 5-methyltetrahydrofolate--homocysteine S-methyltransferases also known EC:2.1.1.13, [2]; Betaine--homocysteine S-methyltransferase (vitamin B12 dependent), EC:2.1.1.5, [3]; and Homocysteine S-methyltransferase, EC:2.1.1.10, [1]. 21.20 21.20 21.20 21.20 21.10 20.70 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.88 0.70 -5.12 16 4674 2009-09-12 00:27:11 2003-04-07 12:59:11 11 22 3246 24 1463 4033 4093 288.50 31 39.11 CHANGED llllDGuMGTpLpspshshsc.hhphh..................ss.s..s.ppP-llpplHcsYhcAGA-llpTsTapuo.........hhuhu-hslscts.pplsptusclARtst-phs......psc............hhVsGulGPhsthhs...sschsu.hsssh-..slhchap.phcsLh-uG.sDlLhhETl.shtps+Ahlphlcp.hhpt.uh.shslh.ussl.-uoshppsushhcuhh..h..sphshlGlNCshsspphp.hhphhsp.......sthlhsaPN..uG.s.sss....htastpssphsp......slccaspsGu....plIGGCCGToPcHIptlucslps .....................................................................llllDGuMG.Th.l.p.....p......h....s...l...s.....t...t..s.h.hht..........................................s.s.-.h..h.s.l.opP-...l..lp..pl.H.p.sY....l........c......AG.A.....D.........l.....l.........p.T.........NT..F..sus.............................hh.s.h..u.c.......a.......s...h...p...p.......h....s...........t-.l....s.tt...usclA.+psscch............................sscp................taVAGslGP.hs...s.....ss.p.s.u.h.t...s..l...oh........-..........plh.pta.p.cphcu.L......l.....-......u.........G.....s.....Dl......l......llE....T...h...Dhhp.s.+A.u....l.....h..ul...c......p...............h...............c.......t.........................s....h................p............hs.......l.........h........s..s....s...l..h.-...u.s..u.....h.p.h.u.u..p..s..h....pu..............h...h...........................p..s.....h..s....l......G..l..NCuh...G...sp...t..h.p.t.h...l...p..t.l.uph...................s..pthl.....s....s....aPN........A..G..L.P..p..ths................pYs..t..........s......s...p...p....hu....p..............hh.p.ca.h.....p..t.Gh..............sll..GGCCGTTPpHI..p..slscslt.s.............................................................. 0 500 891 1214 +4343 PF04689 S1FA DNA binding protein S1FA Kerrison ND anon DOMO:DM04705; Family S1FA is a DNA-binding protein found in plants that specifically recognises the negative promoter element S1F [1]. 20.50 20.50 21.10 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.53 0.72 -4.10 4 48 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 20 0 27 40 0 66.70 66 44.78 CHANGED tsssthEu.KGLNPGLIVLLVlGGhLLsFLVGNalLYsYAQKNLPPRKKKPVSKKKMKREKLKQGVssPGE .....p....tEs.KGLNPGhIVLLVVuuhLLlFhVGNYsLYhYAQKTLPP+KKKPVSKKKhK+E+LKQGVSAPGE....... 1 6 16 20 +4344 PF05116 S6PP Sucrose-6F-phosphate phosphohydrolase Moxon SJ anon Pfam-B_6442 (release 7.7) Family This family consists of Sucrose-6F-phosphate phosphohydrolase proteins found in plants and cyanobacteria. Sucrose-6(F)-phosphate phosphohydrolase catalyses the final step in the pathway of sucrose biosynthesis [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.42 0.70 -5.08 23 845 2012-10-03 04:19:28 2003-04-07 12:59:11 8 21 568 13 267 12656 492 232.40 21 49.05 CHANGED sphllVoDLDpThl........u-stuLtchpslhc..thcp-shLlauTGRo.sshpcLhcEhsL.pPDhllsSVGTEIhY..Gp.shhPDpuWpphLsp.cWs+..phVhchhuchPp.L.phQs-p-QpsaKlSaal-ppsu.shlppLpphLccpsLcl+lI.....aSsGpsLDlLPtsAuKGpALpYLtp+aphs...sppoLVCGDSGNDtpLF.lssshGVhVuN.uppE...hl..htEss+sp.plaaAppcsAuGIl-ultHasl ...............................................................................................................................................................................s..hhlh.DhDtThh.......................tp..........t...h.....h..t..h..t....t....h.hp............t...pt...phhhshsTG....pshp.phh....p....h....h.....p........p..h....s.....h..................p.....P......c..h.......h..........l..ss....l..G.......o.c.lhh.....................hp.....p......h.......h..............s.....p.....s.....a......p......p..........h.......l..............s...................p.......t......a....th.............p...tl......p..............c.....h.......l............t....p.................h............s..........p............l.....h................Q......................p...................t.................p...........p.....s.......t.....................a.....................+.......h.......s...................a......h.........h...c....p.....p...t...t........t....p................................l...c.t......l........p...p..h...h......p..t...p...s....l....p...s...p...h.h.................................h.....p...s......u.....h.....c.....l...D..ll.....P.h.sA.u.Ksp.A.l.p.a.L.h...p.+..a...sls.............hpp...h...l..s..h...GDS....G....N.D...h.p...hL..t.....sh.......t....pu..h.ll..uN...spt-.................................................................................................................................................................................................................... 0 83 184 233 +4345 PF01023 S_100 S_100_domain; S-100/ICaBP type calcium binding domain Finn RD, Bateman A anon Pfam-B_242 (release 3.0) Domain The S-100 domain is a subfamily of the EF-hand calcium binding proteins. 20.30 20.30 20.30 21.00 20.10 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.88 0.72 -4.63 24 1006 2012-10-02 16:17:27 2003-04-07 12:59:11 14 21 63 279 499 923 2 43.00 37 14.75 CHANGED LEculpslIslFHpYSs+cGctpsLsKpELKpLlp+ELssaLcp ........LEpultslIssFHcY.usc-..G..c.p..tp.LoKpELKpL.lppELsshlp........ 0 28 46 114 +4346 PF05124 S_layer_C S-layer like family, C-terminal region TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 22.50 22.50 22.60 22.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.51 0.70 -4.72 21 129 2009-01-15 18:05:59 2003-04-07 12:59:11 7 18 45 0 79 134 6 166.90 23 29.97 CHANGED psluussGaAplhIssslKsl-LG-EalsDaEhhsllpss.......sslchp-s..................hts...pplGlALpYsGDclpslccscphc.lusYA.ph.hDD-sp.s.cLpsaFphc....EpK-lolshGpclp...Vhsu-lhhp...tts..sh.hssPlshLDoEh...uL-su-psLILVGGPVVNplTcELsss..GtlsI..........-spSsATlsllcssAN..GscVLVVAGGDRtuTcpAApALlphl ........................................................................................................................................................................................................................................................................................................................................................................................................................s..ph.................s...p.hs..pslILlGGPVuNtlscp.ltsp....h.lpl.............................................s.sps.u...s.lt..h......l.......c.......sshN......sps.VlllAGu.DR.uTcsAsphh.......................... 0 8 16 59 +4347 PF05123 S_layer_N S-layer like family, N-terminal region TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family \N 25.00 25.00 30.00 28.90 24.70 22.20 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.82 0.70 -4.78 18 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 36 0 27 57 1 268.00 29 44.88 CHANGED KKIGAIAVGuAMluouLuouuh.AspclG-VssFh...sshVK.sGpPNVcIVVGSsA.AAhDVVSAAsIAAKIGSLhYpEssVE..DuSAslshsssu-S--lsl....h...sth.hhussspthlhsuuDsDY..........s.shsssshsshshsuhspsc....ssssLtD...........................LssLhplpDlDPssa...............hs.sDhD.AsEhlhsplsss.......h.ssotshplscDpllYsols..................apsshsuh..tshpsLp.....GhcIPaLGcEhslVclDpD.........DDhlhlGpcsY-Gslcp.G-saslGsGYpVcl ..............................KKIuAlAlGuAMluusLusush.Ahpp.l.uslss.....s.hVp.sGpPNVcIVVGSsA.AAhDVVSAAsIAAKIGSLhYpEtslc..ssusslphpspsc.o-sh.l.....................hss.sstthh.hs..ssss.sY.................s.t...s...ss..hs.....sh..htshsphs.....thhslt-..............................................................................................................ls...shhplpch..DPpsa...............hs.pD.-...us...Ehlhshlpss.................stphpltccphhYholh.....................hpss...ts.....t.htslt..............Gh..plshLGpchhllplsts.........schlhlGp.sapGhlcp.G-shslGsGYplcl................................................................................................................................................................................................................................................................................................... 0 3 6 16 +4348 PF00954 S_locus_glycop S-locus glycoprotein family Finn RD, Bateman A, Mistry J, Guo X anon Pfam-B_357 (release 3.0) Family In Brassicaceae, self-incompatible plants have a self/non-self recognition system. This is sporophytically controlled by multiple alleles at a single locus (S). S-locus glycoproteins, as well as S-receptor kinases, are in linkage with the S-alleles [1]. 21.80 21.80 21.90 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.31 0.72 -10.95 0.72 -3.78 52 2074 2009-09-12 20:30:07 2003-04-07 12:59:11 15 105 84 0 900 2114 0 104.00 27 16.50 CHANGED aRSGPWN..GhRFoGlP..-hp.thsh..hshsFs.....psspElsaoaphs.sssh.hSRlhlossGhlpphsW.ssspsWsh.hattPtc.pCDhYthCGsauhC...sssssPh..CsClpGFhPcs ...........................................................................s.Wt..t..h...........................................h.....h.shs..........pspp.Ehh.as..a..p.h.t.....s...s......s..h......h.s.R.l..sl...s.....s........G....p...l.p.hhs...W..........t.....s...s...p.......p...W.s........h.....h...a...p.....t...P..p.......c...pCD.h......Y......s......hC.Gs.a..u..hC.............s..h..s....s.........s....P.h.....Cs..Cl.p.GFpP......................... 0 43 507 728 +4349 PF00526 Dicty_CTDC S_mold_repeat; Dictyostelium (slime mold) repeat Finn RD anon Pfam-B_96 (release 1.0) Repeat \N 21.30 21.30 21.80 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.84 0.72 -7.18 0.72 -4.07 30 447 2009-01-15 18:05:59 2003-04-07 12:59:11 13 20 8 0 381 438 10 23.70 56 31.53 CHANGED NsCThDoCsspsG..CsHTPlsCDDs .NsCThDSCsss..s.G..CsHTPIs.CDDt...... 0 300 381 381 +4350 PF00277 SAA SAA_proteins; Serum amyloid A protein Finn RD anon Prosite Family \N 21.10 21.10 21.20 21.30 20.60 20.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.41 0.72 -3.67 18 208 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 68 0 67 243 0 98.00 63 73.11 CHANGED WapFhtEAsQGAtDMWRAYpDMREANa+sSDKYFHARGNYDAApRGPGGsWAA+VIScuREshQuhh.........GRGtEDotADQcANcWGRSGtDPN+........YRPpGLPcKY .......................hpFltEAhpGAtDMWRAYs.DMREANYpsuDKYFH.ARGNYDAApRGP...G.Gs....W....A...AcVI........SDAREshQphh......................G+GtEDShADQtANcWGRS.GpDP.N+...............aRPtGLPpKY..................... 0 12 14 23 +4351 PF04455 Saccharop_dh_N LOR/SDH bifunctional enzyme conserved region Waterfield DI, Finn RD anon COG1915 Family Lysine-oxoglutarate reductase/Saccharopine dehydrogenase (LOR/SDH) is a bifunctional enzyme. This conserved region is commonly found immediately N-terminal to Saccharop_dh (Pfam:PF03435) in eukaryotes [1,2]. 22.10 22.10 22.10 24.60 21.50 22.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.10 0.72 -3.90 35 168 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 154 4 103 180 22 102.10 34 17.28 CHANGED hhs.Rp..lELcGHllDShlls+slDpIh-hGGsFcllcFslG+p+sDsSaAclpVpAcspcpL-pILspLpplGAs..s-....p-scLtsuspDtVhP-sFY..oTTNas ...................................h..scplcLcGHLlDohlls+sLDhIh.-hGGsFcl.lchc......l......G.pp.....+.p........ssS.aAclp.VsAsspphL-cIlspLpslGAs....-........p-spht...ss.tstVhPcsFYsoT.h.................................. 0 34 78 97 +4352 PF04092 SAG SRS domain Bateman A anon Pfam-B_1675 (release 7.3) Family Toxoplasma gondii is a persistent protozoan parasite capable of infecting almost any warm-blooded vertebrate. The surface of Toxoplasma is coated with a family of developmentally regulated glycosylphosphatidylinositol (GPI)-linked proteins (SRSs), of which SAG1 is the prototypic member. SRS proteins mediate attachment to host cells and interface with the host immune response to regulate the virulence of the parasite. SAG1 is composed of two disulphide linked SRS domains. These have 6 cysteines that form 1-6,2-5 and 3-4 pairings. The structure of the immunodominant SAG1 antigen reveals a homodimeric configuration [2]. The SRS domain is found in a single copy in the SAG2 proteins. This family of surface antigens are found in other apicomplexans. 20.80 20.80 20.80 20.80 20.60 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.94 0.71 -3.74 58 921 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 7 14 768 924 0 131.00 20 67.81 CHANGED sssssoC..............sssssssplslopp.ssolTlpCussss....hhPs..shsp......................ptCs....t.ttssp.hslpslL..ssssps..Whpt......spsspstsL.slspsshPtss.psFhlGCptpsss.....................sttssCpVpVsV ..........................s...hssC......................ts.ss..sshpl.slopp.psslolpCsssss.......hhPs..shsp.........................phCp...............t.t...ss...s..p.t..h.sl.s..s...l.l.....ssssps......hhpp..................sssssshsL...sls.sshP.t.ps..p..p..h..hhu....Cttssss......................................................................tttssCpVpVsV........................ 0 325 325 768 +4353 PF01259 SAICAR_synt SAICAR synthetase Finn RD, Bateman A anon Pfam-B_1426 (release 3.0) Family Also known as Phosphoribosylaminoimidazole-succinocarboxamide synthase. 19.70 19.70 20.70 21.10 19.40 19.40 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.54 0.70 -5.28 11 4852 2009-01-15 18:05:59 2003-04-07 12:59:11 13 18 4550 22 1252 3379 2876 240.90 34 87.99 CHANGED ph.llsc.....GKs+-IYcl-D..spLLhlspDclSAaDslhcspIstKGplhsphSpFhFphLp.h.lssHhlct.s......................-.sshls+KhchlPlEsVVRsYlTG....ShhKc...sGsh..cGlclsssllEs.hhs-slhsPphpsE...Hs.slu........hsps..hlG.-pssplcchshplaphhcchhtppGlIlsDhKhEFGlDp-s.cllLsDEl..PDSSRhWsssshc....h......hDKQhhRchLsssspuhp.ts....hsphspsl ..........................................................p.thlYc...GKsKclY..........p...........s..............s.........c............shl.l.hh.hpDcho.........A........as...s.h......hp.......p.......p.....l....s.sKGtlsNplosahFctLp.....c.........h....sl........s.....o...Hhlcths.........................................................s...pphl.l..+...+...l......c............h...............l..P..lEsVlR.shh.s.G....ShhK+....h..G.hc........pG....h..tLs.p.slhEh......hh.K.....s..........D.........sltDPhhssp..........Hspsls.........................................hss..t....................................-.p...l.s...pl...+...chs......h......cl....p....hlpchh..t.ptG.....lhLlDhKlEFG....h..........s......p.....s..G.......c......l.lLuDElu.PDosRh..WDtcshc......p..............hD.K-haRp.....Ls.s.h..h.tsap..thh.+l....hh..................................................... 1 399 785 1040 +4354 PF03534 SpvB Sal_SpvB; Salmonella virulence plasmid 65kDa B protein Griffiths-Jones SR anon PRINTS Family \N 20.00 20.00 20.20 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.82 0.70 -5.39 17 349 2009-01-15 18:05:59 2003-04-07 12:59:11 8 62 230 0 113 340 33 223.30 27 12.78 CHANGED LussGssGhAolSlPLPlSsGRG...huPsLuLsYsSuuGNGsFGlGWphslhsIsR+Ts+GlPpYssp.....DpalGPsGEVLlssh.sspGp.phcpt.pthtshshstsaoVoRYpsRlEupFsRlEaWpPpsssss............sFWllassDGplHlhG+sspARlusPp....ss.s+lApWLLEESVo.ssGEHIhYpYcsEDcsss-tschpsp............stoAQRYLppVpYGNhpsutsLashss....s.Pssp..p.........WLFpLVFDYGERssshpssPtaps..........stsWhsRpDsFScacYGFElRTRRLCRQVLMFHph ...................................s.s.GtA.o.holPlslssG.R.u...hsPsLuLsYsSs....u.........G.......N....G......................hGlGWsl.u.s.....u.I.p......R..........c....T.p.......t........s..lP.p...Yssp........-t.h...h.h.suc.hl........stt.st..................tp..................................................tth..t.s.....t..pa.sc.hp.s.t..Ft+lphht..sts..sstt.............aWhlhstsG.hthhG...t.....s......s....su.+.l.....ssst......................t.....tphhpWhl.p.cshs...stGpt.lhYpYttp...s.t..................................................................................................................................................................................................................................................................................................................................... 1 46 73 97 +4355 PF03538 VRP1 Sal_vir_VRP1; Salmonella virulence plasmid 28.1kDa A protein Griffiths-Jones SR anon PRINTS Family \N 21.80 21.80 21.80 22.50 21.20 21.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.12 0.70 -5.38 4 244 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 119 0 42 182 1 201.10 21 19.16 CHANGED sppsSPLL........S+ltstaplsstlhcp..............GYpSlFDIlRhsRcpFIccapt..us+utt.haDhAsuhApQlhppFRpppL................o+tV+tshhpsaSs.s.......PpYts.F.-s...WpphsPssusEussSPVuYLhclYphs.p.E.susspul.slsERRsDLusLhlsscuINppIssLplVNphLSpthpthlp.ps.t..ss.thLupsRaP.pLPYcasppQIphuhsspcspLtcIhppsshsaP...W.......................................................................h.....LSsslssA.schsh...........................................................................................hAsphu.pQQplhsEsltssspp...FYQsNYGlss.sss.hctlshFspQTuloVs ...........................................................h..............................................................................................................................................................................................................................................................ushtYh..hh.......................................t.................h..thspRRPDLtpLhlsp..pshppplssL.h..phh..........................................................................s.h..hPYp.shp.lp....l...s..thtth............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 6 18 32 +4356 PF01758 SBF Sodium Bile acid symporter family Bashton M, Bateman A anon Pfam-B_697 (release 4.2) Family This family consists of Na+/bile acid co-transporters. These transmembrane proteins function in the liver in the uptake of bile acids from portal blood plasma a process mediated by the co-transport of Na+ [2]. Also in the family is ARC3 from S. cerevisiae Swiss:Q06598 this is a putative transmembrane protein involved in resistance to arsenic compounds [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.22 0.71 -4.89 17 4441 2012-10-02 17:06:44 2003-04-07 12:59:11 11 12 2582 2 1463 5194 1717 184.60 24 56.02 CHANGED slhLhlhMashhhplphcchtphhpc...sKhlhluLlhpallhPllhFllu.hhhthhst...................hhhGllllGssPssuhu.las.LucGchs.Lulshsshoo....htshhhsshhhhhlstt.h.ls........h.hhphhtolllhlhlPhhhGhls+hhh.......hh.phhh.hlsshulhullhslhlhhuhpuphlsph ..........................................................................................hLhlhMa.shhhp.lphpch.t.pl.h.cc.................s+...s.............l...h...l.u.l.l.h.pa...ll.hP.hh....h.a.h.L...u....h...h...h...t..h...st..............................................hhsG.l.I.L....l.G.s.sP.s.s.s.h.s.....V.a..........o.hL.uc..GDss..h...................ols.h.sul...so........lhshhhsP......lh...h....h....h...l...s...s.t...h..ls...........................h..h..s..h...hh..S.l..lhh.lll..P..lhhGhlh+.thh...............................h.h..t...p..p..h...h.s..h..l...ss..ho.l...l...ul.lhs.ls.lh.hu.hpsp.lh..t.................................................................................... 0 485 921 1207 +4357 PF03536 VRP3 Sal_vir_VRP3; Salmonella virulence-associated 28kDa protein Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 40.50 33.00 19.10 21.30 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.68 0.70 -5.21 3 62 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 47 13 7 51 0 213.90 60 91.85 CHANGED PINRPsLKLNLPSLNVVssSEtPQMsSTNE+LKNNFNSLHNQMRQMPsSHFKEALDVPDYSGMRQSGFFAMSQGFQLsNHGGDVFIHA+RENPQSKGDFAGDKFHISVtREQVPQAFQALSGLLFSEDSPIDKWKVTDMERVsQQSRVulGAQFTLYVKPDQENSQYSAShLHKTRQFIECLESRLSESGlhPGQYPESDVHPENWKYVSYRNELRSGRDGGEMQcQALREEPFYRLMsE ............................................................................................s.h..sssc..cLKsNFs.L.asQhRphPsoaFK.A.sVPsYSshpQSuFhsMtQG..FQ.lsNH....u.hDVFIHAcREsPQSpGcFAGDKFHISVhR-.VPpAFQALSGLLFSEDSPVDKWKVTDM.....p+Vs.....QQuRVulGAQFTLYlKPDQEsSQYSApaLHKhRQFIpCLEScLScsGVh.sG.ppP-SDV+PEsWKYlSYRNELRS......sRDGuEhQcQtLREEPFYRLMhE............... 0 2 3 5 +4358 PF01536 SAM_decarbox Adenosylmethionine decarboxylase Bashton M, Bateman A anon Pfam-B_600 (release 4.0) Family This is a family of S-adenosylmethionine decarboxylase (SAMDC) proenzymes. In the biosynthesis of polyamines SAMDC produces decarboxylated S-adenosylmethionine, which serves as the aminopropyl moiety necessary for spermidine and spermine biosynthesis from putrescine [1]. The Pfam alignment contains both the alpha and beta chains that are cleaved to form the active enzyme. 21.20 21.20 21.30 21.40 19.90 20.40 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.20 0.70 -5.77 15 693 2009-01-15 18:05:59 2003-04-07 12:59:11 11 8 387 56 340 679 15 293.80 34 87.33 CHANGED sSAhuFEGhEKRLEI.Fhcsthhs-spu+GLRuLs+uplDclLssAcCoIVSohuN-plDSYVLSESSLFVasaKIIlKTCGTT+LLhuIPsILcLAcslu...........hpVpuVpYoRtsFlFPssQsaPHRsFoEEVshLDuaFu....supAYlhGsscps.p+WalYoso.....sps.....pps...pPsaTLEMsMoGLD+-pASlFaKscu...............usAusMTppSGIc+ILPsSpIs........DFpF-PCGYSMNu..l-usshuTIHlTPEDGFSYASFEosh....hshcshslspllp+VLsCFcPscFSVsla....sssss+phpp.shsl-lc.uYshcctshpsls.hussllYtcFs+st ..............................................................s...hFEG.EKhLEl.F.................................................................t........t.......s.......t........s...........LR.s.ls....ttphcphLp.spCpIlS.hp.......s.......ct.......hDuYlL..S..ESShFVash..+.lllKTCGTTpLLhuls.l.....Lc.lAtths..........................................hp.ltplhYoRts.FhhPt.....t.Q..h.PH.csap-EVthLsthFs.............supAYhhG.p..sps....pW...alYsss....................................t................p........................ps.s.TLE.hhMo.tLD.p...hsp..F.ah.s.ps.....................................................................sputthoptoGlp..c..lh...Pt.s....t....l....s................................................sa.FpPCGYShN............u........h.................p......ss......th.TIHlTP..E...s....t....a..SYASFEss...............................p.t.h.s.h.sp.llp+VlthFp..PscFslsla.............t.t.t.t..........t................h...........................................................t............................................................................................... 1 100 180 271 +4359 PF02199 SapA SAPA; Saposin A-type domain SMART anon Alignment kindly provided by SMART Family \N 20.30 20.30 20.50 20.40 20.00 19.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.07 0.72 -4.19 32 428 2009-01-15 18:05:59 2003-04-07 12:59:11 10 66 91 0 167 403 0 33.70 48 10.43 CHANGED GpcpCshGPuaWCpshcsAppCs..AVpHCpppVWs .......GhcpCshGPuaWCpshcTAspCs..AVcHCpppVWs...... 0 47 59 101 +4360 PF03058 Sar8_2 Sar8.2 family Mifsud W anon Pfam-B_2148 (release 6.4) Family Members of this family are found in Solanaceae plants, a taxonomic group (family) that includes pepper and tobacco plant species. Synthesis of these proteins is induced by tobacco mosaic virus (TMV) and salicylic acid [1]; indeed they are thought to be involved in the development of systemic acquired resistance (SAR) after an initial hypersensitive response to microbial infection [1,2]. SAR is characterised by long-lasting resistance to infection by a wide range of pathogens, extending to plant tissues distant from the initial infection site [2]. 27.10 27.10 27.20 27.90 27.00 27.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.40 0.72 -3.66 4 33 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 9 0 0 33 0 83.60 47 99.24 CHANGED MFSKT.LFLChSLAILlhVISSQADAREM.SKAAAPITQAMNSNNIoD.QKoGAGllRtl.Ghhh+hspsh...................CKhCpCp.tlCshC..Ct ..Mh.KsNlF.LChSL.IlLhlISSQssAREM.ScAuA.slTpuMsuNNhTp.pKsG...uulh+plsshhppssp.ssp.....................hhGpsCKhCssph..C.h.......... 0 0 0 0 +4361 PF04790 Sarcoglycan_1 sarcoglycan; Sarcoglycan complex subunit protein Waterfield DI, Finn RD anon Pfam-B_6135 (release 7.5) Family The dystrophin glycoprotein complex (DGC) is a membrane-spanning complex that links the interior cytoskeleton to the extracellular matrix in muscle.\ The sarcoglycan complex is a subcomplex within the DGC and is composed of several muscle-specific, transmembrane proteins (alpha-, beta-, gamma-, delta- and zeta-sarcoglycan). The sarcoglycans are asparagine-linked glycosylated proteins with single transmembrane domains. This family contains beta, gamma and delta members [1,2]. 27.40 27.40 27.80 29.50 26.80 27.30 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.62 0.70 -5.22 16 335 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 85 0 193 287 0 224.50 34 82.30 CHANGED laplGIaGWRK+ClYhhVLLLhllhVlNLsLTlWIlpVhpFs.cGMGsLclppc.Glpl.cGco-F..lpPlYspcIcuRpDpsLhlpS......spslolNs...RNtpGpls..s+lslusct..l.stsptFplpss.st+.LFosDpsphhht.tpLclssPpGu....lFt+uVpTstl+us.sspcL+LESsTRplshcAscGVplcAtAGtl-hpuppDlpLpSs.......cGplhL-A.pslhLs....+LPhups......ssGspps...hYclCVC.ssG+LFhussstsps....Cp .............a.hGlhGh+ppshahhllLLhllhllNLhlTlh............IhtV.....hphs...sGhsphcl...ppp...G..........lhh...c.G.o-h......l..PL.atpplp...uR.spsLhlpS.......spslslpt...hs..Gph.........spL..ls.pt..s.spsptFplpss..stphLFosDtpph........h..phspGs..................lh.pslpTstlp....up..s.p-LplcSssR.lhhcuscGVplputs..lchpst........t-lpLp.S..............-GplhLsu...tslhls....pLPpus....................sst.tpt................haclCsC..ssGpLahs.ssstp................................................................................................. 0 42 56 118 +4362 PF03343 SART-1 SART-1 family Mifsud W, Mistry J, Wood V anon Pfam-B_3690 (release 6.5) Family SART-1 is a protein involved in cell cycle arrest and pre-mRNA splicing [1][2]. It has been shown to be a component of U4/U6 x U5 tri-snRNP complex in human, Schizosaccharomyces pombe and Saccharomyces cerevisiae [3]. SART-1 is a known tumour antigen in a range of cancers recognised by T cells [1]. 20.50 20.50 20.70 20.50 20.00 20.40 hmmbuild -o /dev/null HMM SEED 613 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.63 0.70 -13.46 0.70 -6.17 38 393 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 272 3 287 395 7 455.60 25 80.12 CHANGED -uLSIEETNKlRhpLGLKPLslsss............................ssshcscputshcshpch........pcpscs++cc-ch..pppIpc...u+-ctp.pppKLts..psLu-t...........t-ss.DspuWlpcp+Kh..p+phpttctct.....................tpptpc+pps..spasspD...LsGlKVsHclcchpc..GcshlLTLKD.ssVL-s--t.....D.LpNlsLh-+c+tpcplch+K+p............p.sp...t.tppslLu.pYDE-lpsccpcp.........hph......cspupssct..................................pppccp......pspt+h+hphhsh-.....................spspp.sSDYhs.p-.lK..hKKsKK....................Kctpppp++phh......Dp.t..st..t..s.t.tttt...............cl-pps......................ss.hcpc.p.ps.shs...............DD-DL..QtpLsppR+hth+....c+pchcs.EclAcplcpppspsp........pcpp-.......................csulVls-TSEFlps..Lp........psshtcpttpp...................pcpspp.tsh.psppsptp.............tstssphpps.ss.pcc-........................t..tthssshl-cEsslspGLuusLphLKp+Gllc................ppppchhtppthhth..phptphpcct.c.tss+hsph....R-chh.chppcp+-ppc..............................tY+PcVcLcYlDEhGRphssKEAFKp.LSHpFHGKGsGKtKpEK+lKKlE-E+ .................................................................hSlE-sN+lRhpLGLpPl.h...........................................................t..t..t.p.h............................t.....pptpth...tttltt....+ptt..........htt..ttltc.....................sh..sWl.p.pph.....................................................ttt....ttts.........LtslpVtHthtph.p....ttp.lLTL+D....psl........pptp....................D..L.N.p......hhcppchpcpht.pppt......................t..........................t.....t.l.Lt...pYD-....p.ttpt.pt..............................tttt.h.....t..........................................................................................t.tt..t........t.tp.p..t..p.p.........................t.t.ss-h..hp...tc..h.p.....hKK.K..p.....................................+h...th.pp...................................................................................................t....................................................................................t..............................................--..t-h.....tt.Lttt+pht.h...........t........p...p.....tlh..p.lt.t.ttt.............................................................................tsslhhsthsEFsts.....lt...............t......t.....t...............................................tt..t....t.t.tt.............................t.thtt....p...tt.............................................................................................................h.ttE..hstGhuusLthhpp+uh.lp..........................................................ttt......t..ptp..h.........t.......t....p.pcppt....................................................................hpsplplpYhD-.G+.hs.KEAF+..LSHpFHGKssGK.K..hEK+h.K+hppc........................................................ 0 101 164 247 +4363 PF00269 SASP Small, acid-soluble spore proteins, alpha/beta type Finn RD anon Prosite Family \N 20.60 20.60 20.70 20.70 20.10 20.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.84 0.72 -4.11 66 1862 2009-01-15 18:05:59 2003-04-07 12:59:11 15 3 421 3 380 1047 9 56.40 40 83.13 CHANGED NchlVPp..AcpAL-ph................KhElApElG....V..thppt..G..-lTSRpsG.p....VGGpMVK+MlpttEppht .....................................Nphhl.tAppul-ph....................KaElApEhG...........................V.p..s..s.....t.....sh.o..uRpsG.u........VGGph....sK...RhlphAEppl.t....................... 1 177 310 333 +4364 PF04259 SASP_gamma Small, acid-soluble spore protein, gamma-type TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The SASP family is a family of small, glutamine and asparagine-rich peptides that store amino acids in the spores of Bacillus subtilis and related bacteria. 25.00 25.00 26.00 25.80 24.60 24.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.66 0.72 -3.85 7 317 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 241 0 29 139 0 75.60 64 86.98 CHANGED StTsAQpV+pQNtpSut............t.auTEFASETNsQpV+QQNtQSt.t.t.su.......utpspassEFASETssppV+pQNtpupApKppsS ..............K.ATSGASIQSTNAS...........YGTEFA.TETNVQAVKQANAQSEAKKAQASuA......QSANASYGTEFATETDVHuVKKQNApSAAKpSQSS........... 0 8 20 22 +4365 PF03898 TNV_CP Satellite_CP; Satellite tobacco necrosis virus coat protein Finn RD anon DOMO:DM04608; Family \N 27.20 27.20 28.60 306.80 21.10 27.10 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.30 0.71 -5.26 4 4 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 4 62 0 5 0 199.80 42 98.28 CHANGED MoK+Qsppps....+psstpsV+sIlpsphEpKRFsLlsssNsssTAGTVhNLSNsIIQGDDIsQRoGDpI+hhpphL+sRsTuITsSQo..F.RFIWF+DNpNRGTTPuVTEVLsSAshhSQYNPlThQQ+RFTlLpD.VpLsCSlsGcsIKcpshshstp.tlaYNGAsuVAuSNGPGAlFhL.IGDplsG..paDluhEhhYhDh MoK+Qsppps....+psstpsV+sIlpsphEpKRFsLlsssNsssTAGTVhNLSNsIIQGDDIsQRoGDpI+hhpphL+sRsTuITsSQo..F.RFIWF+DNpNRGTTPuVTEVLsSAshhSQYNPlThQQ+RFTlLpD.VpLsCSlsGcsIKcpshshstp.tlaYNGAsuVAuSNGPGAlFhL.IGDplsG..paDluhEhhYhDh 0 0 0 0 +4366 PF03110 SBP SBP domain Bateman A anon Pfam-B_737 (release 6.5) Domain SBP domains (for SQUAMOSA-pROMOTER BINDING PROTEIN) are found in plant proteins. It is a sequence specific DNA-binding domain [1]. Members of family probably function as transcription factors involved in the control of early flower development [1]. The domain contains 10 conserved cysteine and histidine residues that probably are zinc ligands. 21.60 21.60 22.20 22.70 20.90 21.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.08 0.72 -3.76 30 552 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 54 3 340 562 0 72.80 55 16.94 CHANGED pCQVEuCsuDLSsu.KpYHRRHKVCEhHoKushVl.luGlpQRFCQQCSRFH.LsEFDEuKRSCRRRLAGHNcRRRKsps- .................................CQV-GC.psDL.oss.K.cYHRRHK....VCEhHu..K..ust.V...l.luGh....p..p..RF.......CQQCS.............RFH.LsEFD.p......s......KRSCR+RLss.HNc....RRRKst..t..................... 1 93 251 302 +4367 PF00496 SBP_bac_5 Bacterial extracellular solute-binding proteins, family 5 Middle Finn RD, Yeats C anon PDBSum Domain The borders of this family are based on the PDBSum definitions of the domain edges for Swiss:P06202. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.29 0.70 -5.64 226 27055 2012-10-03 15:33:52 2003-04-07 12:59:11 17 39 4221 130 5660 20528 6027 361.60 19 67.34 CHANGED plhPtLAcs....................p...hssDspsasFpLRcsl+apDG.s......slTAcDVha....o.....hcchhssssssthhhhh.................................................htplpsscshslplphppP.ss.........hhhhhsthstthh.....................................................................tttttsthtppslG..oGPaplpp...ap....sp...plh.lp+.......Nss..Y...W.......................t.sps....tlcplphphl......csssthtth.psGclphh..hths.....ssshtth....ttptthph........................tts...shthhhlthNhp.....................pt...sh.sc..........hplRpAlshAlD+c.tlscth...................h.tshstsstsh.hssshshhtt.ht.......................................................hs....pcA+pLLpcAGhpss.......................................hhhhhsssssstp.phuphlppplpp..lG.......lclplpsh.t.........................................thh........psshshhhh..sasss.hssstshht.hhtssstst ..................................................................................................................................................................................................h.PtlAcp.h.................................................................p...h.s..p.D..s......p...s...a...s...Fp..L..R....c..s.s.....+....a....p...s.....G..p...................................sloA.cD....l..ha........o.................h..p...+...h..h..s....p.s.ss..thhhhh.........................................................................................................................................................hhs.lc...s.h..D...s..t.T.l...p.lpLppP...s...................h..hhth..l..u........s.h.hhh.....................................................................................................................tt.t.t...t...t..t..p..h.s.p...p.s...l.G..........oG.P.a.p.lpp....................apt...sp......plt..hp.+................N.ss........Y.....W.....................................................t..tps................t.lc.p.lph.phh.................-s....ss...thtt...h...ps..Gp....h...p....hh.......t.h..s..............sp.p..h..t.p.h.............pp.s.tthph...............................................................ts....sh.shh....h..l..th.Nhp....................................................ps....sh...ss............hplR.pAls..h..ul..c..+.........p...tl......s.p.t.l....................................................h....t..s.....t....s...p.....s......s....t......sh......hs....s....s...h..h..t..h..s..s.t.ht...............................................................................hs......pcA....+....p....l.....L....p....c....A....G..hp....s.......................................................................................hhh.h.h......t.....s.....p.....s.....s.....t....p....p....hu.......ph.........lp...p.p....hpp.....lG...........l.p.l.p.l...psh...ph..................................................tthhpt....hp.p.t..p.a..c..h..hhh.....s.a...s..ss.......ss..thh..h.t......t............................................................................................................................................................................................................. 0 1562 3337 4515 +4368 PF03480 SBP_bac_7 Bacterial extracellular solute-binding protein, family 7 Bateman A anon Pfam-B_808 (release 7.0) Family This family of proteins is involved in binding extracellular solutes for transport across the bacterial cytoplasmic membrane. This family includes Swiss:P37735, a C4-dicarboxylate-binding protein [1] and the sialic acid-binding protein SiaP. The structure of the SiaP receptor has revealed an overall topology similar to ATP binding cassette ESR (extracytoplasmic solute receptors) proteins [2]. Upon binding of sialic acid, SiaP undergoes domain closure about a hinge region and kinking of an alpha-helix hinge component [2]. 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.60 0.70 -5.30 35 6254 2012-10-03 15:33:52 2003-04-07 12:59:11 8 19 1778 54 1762 5551 7504 278.70 22 82.64 CHANGED hupss.ussssptpus.phFschlpEc.osGcl..plplaP..supLus-t.phlctlpsGs......l-hshsusuhhssh.sPphtl..hsLPFlF.........pstptsc+.hhsuthu..cpLhpphcpp.GlpsL..ua.apNGh+phos.sp+PlpsP-Dh+G..LKlRl.tushhhphhctlGAsPpshsauElYpuLpsGslDGpENshssl.hst+haEVQKYhohos..Hsh...sshhllhspshassLst-hpphlccAspEus.phtpchhpctspphhppltc.....sGhplh.hos.cppsa.pcuhpslac...c.atcp .............................................................................stsp.s.h.t.t.sh..pth.u.ch...l..p...c..t....osG.c.l..plc.l.as....supL..s.s..st...ph..h-..t..l..ps..Gs......l-.hs.h.s.s.s.s.h.h.s.sh..sP..t.hsl.......h..s..l..P.....a.l..h..........p.s..h...p.p.h..tp...h..h...s..u..t..hs...........ppl..h..p...p..h...pp....p....G...h..h.s..L.....uh...h..........s...s...G.h....c...p....h.....ts....t....p....+.....s.....l...p........s..s...s.....D.....l...+...G.....lK...l.R.s.......s....s.....h.h.hp..h....h.c.t.h....G.....A.....s.....P..ss.h..s.h.u.E.lYsALp.p.GslD.u.tE..s.s.h.s.sh..hshp.a..h.E..V..t..+..a.h.s.h..s.s.......ash......s.s..h.h.ll..hs..pp...h...a.ss...L..........s...s..........-............pphlpp.u..s....p.cus....p...h....t....p........p..h....h.............p....p....t.........p.p.....p..t................hp.......p.hpp.........tG.s..pl........h..s..p......p.h.p.sa....pc..s.s.p.s.h.hpp.....t......................................................................... 0 536 1184 1514 +4369 PF01297 TroA Lipoprotein_4; SBP_bac_9; Periplasmic solute binding protein family Finn RD, Bateman A anon Pfam-B_1416 (release 3.0) Family This family includes periplasmic solute binding proteins such as TroA that interacts with an ATP-binding cassette transport system in Treponema pallidum. 22.10 22.10 22.30 22.10 22.00 21.60 hmmbuild --amino -o /dev/null --hand HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.47 0.70 -5.14 141 7592 2012-10-06 15:37:50 2003-04-07 12:59:11 12 12 3969 42 1428 5388 2851 272.40 24 83.10 CHANGED Vlsoh.sltslscpl..uG..-.............t.spVpsll.ss.Gs-.PHsa..pssPpshpplpp...ADlllh..sG.hshE..s....a.lschh.........ttsphthl.......shsp........sl................................................................................phhtt..........................................................................................................ttttcpsp........................................................................................................................................................................................................DPH.lWhsPpsst.thspslsctLsch.......-Pppts...hYppNhppahpcLpplcpphpptls.sht.t........pphls.hHsuas..Yhscpa....GLp...huhh........thss..tpcsospcltplhctl+ppslpslFh-sphssc..hscpl...upcs.G..sp...l.......lhlcsl........................st......sYhphhpp....Nhpsltpul ........................................................................................................................Vlsohtsltshscpl......uG...-.................................p...splp....sl....l.ss....G.s-...s....HsY......cspsp....Dl...t.p....l.....pp...ADlll.a.sG..hshE.....s....a..hp+hh.................thp.ppp.hphl................tssc.....sl.......................................p..........................................................p...p.................................................................................................................................................................................................................................................................................................................................................................................................................................DPH..sWhssppuh..t.h.spsItct.L.sc.h.......DP.p.pps....t.Y.cpNhppahp.c.LppLcp.ph.pp...ph.s...sh.p........+th.l.s..pHs..Aas..Yhucpa................Glp......t..hshh..........slss...-pc....sospplpplh.ch.l..+.....c.p.....p..l.ps.....lFs...E.s..p.s.s.s..c.......sscsl.......uc.cs....u....sp...h............hhlssl....................ttt..t.tsp...sYhs.h.hcp.shpsltps.................................................................................................................. 0 431 876 1187 +4370 PF04405 ScdA_N Domain of Unknown function (DUF542) Yeats C anon Yeats C Family This domain is always found in conjunction with the HHE domain (Pfam:PF03794) at the N-terminus. 28.90 28.90 29.00 30.50 28.30 28.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.56 0.72 -4.58 70 1136 2009-01-15 18:05:59 2003-04-07 12:59:11 9 6 1115 0 170 543 24 56.10 44 24.94 CHANGED ts..pslG-lVsphspuuclFccasIDFCCGGptsLscAs.pcpsl-.stllpcLpslt ....+.spslGElshshP+A.oslFRpYclDaCCGGppoLtcAu.t++slDls.l.scLspL.t.......... 0 52 104 142 +4371 PF02667 SCFA_trans Short chain fatty acid transporter Bashton M, Bateman A anon COG2031 Family This family consists of two sequences annotated as short chain fatty acid transporters, however, there are no references giving details of experimental characterisation of this function. 20.00 20.00 20.10 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.63 0.70 -5.71 4 740 2012-10-02 15:12:49 2003-04-07 12:59:11 9 3 662 0 154 1076 192 418.50 44 98.19 CHANGED hlpRlophhsthVSKaLPDPLIFAhLLThVTFllshsLTspssls.........lVshWGsGFWsLLuFuMQMALIlVTGpALAous.Vp+lL+plASlsKsshpulhLVTFhu.IAshINWGFGLVVGAhFA+....ElARplKGsDYsLLIAsAYhGF.lTWtGGhSGShPLLsATPspslp+lhst..s.pthIPlspTlFSuYNlhIhshlllshPFlhhMhhPKpuElhuIDs......KLltcEhc.pcpls..cDsTlA-+LEpS+lLuhlIuhLGhuYLGhYFacpGF...lolNsVNhhFlhsGlLLHtoPhAYMRAIspAARSsAGILVQFPFYAGI.hMMcaSu..lGG.....lIophFhsVANccTFPlhTFaSuulINhhlPSGGGcWsIQGPhllPAuQALGsDlGKosMAIAaG-tWhNMhQPFWALPALuIAGLGsRDIMGYClosLIFsullhslGLhhl .......................................................................................................................................................................................ht+hsphhsphspRaLPDsal.FuhLLTllsh.l..hAh.hhs.s.psPh.p.........................hl.ph.WG......s...GFW...s..LLuFu.MQMALll..VTGasLAo.os..l++.lLp.p.h.A.p.hs..+ost.p.u.l.hh.Vo..hluhlush...l...NWGFGLVl..GAlhA+....El....A....R....+....l..c....s....s..D...Y.....LllAuAY.GF...lsWtu.G.lSuS.h......PLhhAT.s...G...p.......htchh....u....................lIPh.o-Tl..Fo.s..aslh..h.s..lsl..l.l.s..hP....h..ls.t.h..h....h.....P....c..s........p...c..s...l.s...l.Ds................pllt.c.....-s.s...h......p.....p..........h....s............................t......p..s...............s.P.uE.+LE.p.Sh.....l......L.......o...ll.l....u..h.L..G.l..s.Y.l..h..h....a..F.....p...p.G.hs.........l...s........L.N.........h.........V.....N...h..h...F....L.......h....h....G.l......L.......L......H.....t......T......P...h....s....Y.......hc.........A....ls....p...A....s...+.....o.s.u....G...I.......L.......lQF..P.F.Y.A.G...Ih.u.h..M.t.p.u.u....hs.G.................h......I...o....p..a...F....l..s.....l...A.....s.....c...c..T......F...P..l..h....s.....F..l.....S..u..u..l.l.NhFVPSGGGpWslQuPhllPAApsL.G....sc.........h.........u.........pssMAlAaG-uWsNhlQP...F....W..A..L..P..s..L..u..I.........A.........G..L...t....s....RD...I...M..Ga.....C....l....s...tL....l..hs.Ghlhslshhh............................................... 0 45 86 124 +4372 PF04486 SchA_CurD SchA/CurD like domain Kerrison ND, Bateman A anon DOMO:DM04327; Domain Members of this family have only been identified in species of the Streptomyces genus. Two family members are known to be part of gene clusters involved in the synthesis of polyketide-based spore pigments, homologous to clusters involved in the synthesis of polyketide antibiotics. The function of this protein is unknown, but it has been speculated to contain a NAD(P) binding site [1]. Many of these proteins contain two copies of this presumed domain. 24.20 24.20 24.40 45.70 24.00 24.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.21 0.71 -4.25 16 83 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 40 0 25 85 0 109.00 36 61.55 CHANGED h.RHALoYsl+PGststlAclLu.........hps.sAssDsss.llpTolFh+-shVVRll-VcGDh.s.hhtahu.p..s.tsEpAlsPhLcpsRchu-scuhhshhtcAAhsslppsssts ....RHALoasV+PGstttlAclLA..........thps.pA..tsDssohLhpoolFh+sshVVRhl-Vc....GD.L.t.hhtahu.p..hpssEtAls.hL..cp..sRchucspuhhthatcAuhsslpphs..s................................ 0 9 21 25 +4373 PF02630 SCO1-SenC SCO1/SenC Mian N, Bateman A anon COG1999 Family This family is involved in biogenesis of respiratory and photosynthetic systems. SCO1 (Swiss:P23833) is required for a post-translational step in the accumulation of subunits COXI and COXII of cytochrome c oxidase [1]. SenC (Swiss:Q52720) is required for optimal cytochrome c oxidase activity and maximal induction of genes encoding the light-harvesting and reaction centre complexes of R. capsulatus [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.88 0.71 -4.65 6 2856 2012-10-03 14:45:55 2003-04-07 12:59:11 9 21 1787 30 1082 3642 2070 164.20 24 74.44 CHANGED IuLllusGuthsaLh......LpTsKtspssph.....csplsGPFpLh-.pGc.Fsp-sLpGclSLlYFGFTpCPDICPstLc+lsshlcpLcpc.pIclQslFIolDPcRDTPcVLKcYlpsFcsuFlGLTGshcplKslsccaKVaasps.ssKssp-YhVsHSsFhYLIss-G+hlcta..shs ..............................................................hhhh................................................................................h....s...F....pL.h....s....p...s.G...p.......h...s.....p...s...h....c...G....+....h.hl...............l.a.FGa.TpC..P..D..l..CPs...p...lsp...h....s.ph...h..........c..........p..........l...sp............p......t.......t.......c...........l..p.sl....F....l....o..l...D....P..c......R.......D.....T....s.......p.h.L..p.......p....Y....s.....p...t...F.....s.....s.....p.....h.....h..u.....L...o...G....s..........p.........p.......l.....p...p.....h.....s.....c......p......a......p......l.....h.a....p.....p............s.......................................................................t......t................s...............Y.h....l...sH...........os......hhaLls...pGph.........th................................................................... 1 322 667 895 +4374 PF02036 SCP2 SCP-2 sterol transfer family Bateman A anon Pfam-B_1050 (Release 5.1) Family This domain is involved in binding sterols. It is found in the SCP2 protein Swiss:P22307, as well as the C terminus of Swiss:P51659 the enzyme estradiol 17 beta-dehydrogenase EC:1.1.1.62. The UNC-24 protein Swiss:Q17372 contains an SPFH domain Pfam:PF01145 [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.11 0.72 -3.72 174 3603 2012-10-02 14:08:01 2003-04-07 12:59:11 12 54 1915 19 1132 2747 690 100.00 20 46.14 CHANGED lsplhpt.......p..tttstlpplsu.......shphpl.psh.h.....shhlshp...ss.phpl........hst.t..psD..sslshsssshhplhsuc....sspphhhps+..LclcG.Dhtluhclpslhp .............................................................................................psthptl.pu...............hlplcl..pshsh....................phhlshp........st..plpV................httht...tpsD..solps..s.ss..s.....Lht.lh..stc.......ss.ssh.hhps+..Lc.l.c.G.Dh.pluhplpslh............................ 0 351 606 884 +4375 PF00375 SDF Sodium:dicarboxylate symporter family Finn RD anon Prosite Family \N 19.80 19.80 19.90 20.00 19.40 19.40 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.42 0.70 -5.71 135 10806 2009-01-15 18:05:59 2003-04-07 12:59:11 13 12 3666 24 2273 7267 1474 380.50 26 90.76 CHANGED L...hhplll..ulllGlllGh......................................h..hsphsshlp.hhGslFlphlphlllPL.lhsollsuluslts..scplG...+luhpslhhahhooslAshlG.lhluhlhpPG..........................stts..........ssststpstpssshhp....hlhshlPsN.......hhtuhs........ps....................................................................slLslllFullhGlulstlsp..cs........cslh.phhcshpclhh+llphlht.luPlGlhulhAthsup.hG.....hshlh.sluthllsshluhhlhhhllhslhhhhhsph.sPhch.h+thhsshlhAFuTsSSsATLPlslcssccph..........G..VscplusFllPLGsTlNhsGo.ula.ulsslFl...AphhGls.lo.hsphlhlllssslsSlGsAGVPGuulls.lshlLs..slG...lP.....h.pu....luLllul-hll.DhhRTslNVs.GDsssuhlls+h .............................................................................hhhplll.u.lllG.l.llGh......................................................................................................h.h..t.h..s..p..h..h.....p...lGsh.FlphlKMllhPl.lhs.ol.l.s..u...............lus......h.tp...........hpphG........+l.u.hh.sl.hh.a.h...h.s.ohl.AhhlG.ll.hu......lht.Pu....................................................................hh..ts.........................tt.s.s..h.s.s....t.s..p..s....s.l.hp........hl.h..s.h....l...P...s...N.......hhtuhs........ps............................................................................................................................................................................................................................................................................................................................................................................s.hlsl.lh..FulhhGlu....ltt....hspps....................pslh..phlpsh..sphhh.p.l.l.p.h.l.h................p...hAPlG.l..hulhutslup..hG...........hs.slh..sl.u.phlls.hh.hshll.h.hhll.hs.l.l.h.h...h.s..t.h..s.............sh.p.h...h+t.l...t...ps.hl.....hAas.T...pS.....S...tu...slPhshc...phcc..h......................................G..ls.csls...uhs.lPhGho.hNhsGs.ula.sh.sslhl.......Ap.s..h.........G..l..s...l....s....hsp....h.l.hl..l........l........lh.hl.sSh....G.............s.A.GVsG.........uuhls..lshsLs....shG.......lP..............h...ps.......lullhu....l-h.....lh..D.sRTs....lNlsGss.lsshllup........................................................................... 1 579 1152 1742 +4376 PF02982 Scytalone_dh Scytalone_DH; Scytalone dehydratase Griffiths-Jones SR anon Structural domain Domain Scytalone dehydratases are structurally related to the NTF2 family (see Pfam:PF02136). 20.20 20.20 20.30 20.40 19.80 20.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.20 0.71 -4.65 3 125 2012-10-03 02:27:23 2003-04-07 12:59:11 9 3 89 20 62 135 1 145.70 58 84.45 CHANGED sITFcDYLGLpssLFEWADSYDSKDWDRLRKlIAPTLRIDYRSFLDKlWEAMPAEEFlAMISDKsVLGDPTLKTQHFIGGSRWEKVSDTEVIGHHQLRVPHQ+YTDoThpEVslKGHAHSsNhHWYRKVDGVWKFAGLKP-IRWuEYDFDcVFcDGR-Sa ......................sap-hhuhppssaEWADSYDoK..........DWDRLR+CIAPTL+.........lDYRSF.LsKh..W..EAMPA-EFlAMhSDssVLGNPLLKTQHFl.Gu.o+WE+lSDsEllGaHQLRVsHQ.+Y.......TDso.....hsp..VsV..K......GHAHShNpHaY+Kl-GlWKFAGltPtl.a.phph.............................................. 0 7 26 50 +4377 PF03313 SDH_alpha Serine dehydratase alpha chain Bateman A anon Bateman A Family L-serine dehydratase (EC:4.2.1.13) is a found as a heterodimer of alpha and beta chain or as a fusion of the two chains in a single protein. This enzyme catalyses the deamination of serine to form pyruvate. This enzyme is part of the gluconeogenesis pathway. 22.20 22.20 22.20 22.60 21.90 22.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.76 0.70 -5.37 152 6294 2009-01-15 18:05:59 2003-04-07 12:59:11 10 13 3519 0 918 3774 323 283.70 38 69.93 CHANGED pcp.slsluphhhcNEhs.htssp.p.lttthsphhssMtssscpGL.....ps.pGlhsGslphh+chh.........................................................................................................................ph..tppL.t.................................tt.................................hthhshshhhAhAssEtNAuGGplVssPTsGu.....uGllPuVlhhh.cchph.................................................s-.-plhchLhsuuslGhllKpsAoluGAtsGCQuElGsAsuMAAAulstlhGGoscQlppAuphulpphLGLsCDPluGhVplPClcRNAhuAspAlsuAphA.hts.s.t..ptIshDcVlcoMtpsGpshssth+ETupGG......LAhs ....................................................................................................................................................................................t.p.shslutlhhpNEhs.ht....spp......c...lttthsphhpsM...p...s...sl...c+...Gh.........ps..c.G.lh.s.Gs.l.p.h.h.R+hh.......................................................................................................................................tl...pctL......................................................t..ht.p.c................................................................s...htshs.hlshaAlAVsE...p..N.Au.G.G..pl.VsuP..TsGu...........sGllPuVLhhh...c+at.h.....................................................sc...-phh.+aLhsA..u..AlGhlhK...pNASISGAEsGC.......QuE........VGsAsuMAAAGlstl.h.G.............G.o...............Pp..Q..l....s.AupluhcctLGLsCDPVuG.V.plPClcRNAhu.AspAlsAAcMA..lpt....s.st...stl..s.l.DcVIcoMhpsG+sMs.spa+ETucGGLAh.h....................................................................................... 0 290 558 751 +4378 PF03315 SDH_beta Serine dehydratase beta chain Bateman A anon Bateman A Family L-serine dehydratase (EC:4.2.1.13) is a found as a heterodimer of alpha and beta chain or as a fusion of the two chains in a single protein. This enzyme catalyses the deamination of serine to form pyruvate. This enzyme is part of the gluconeogenesis pathway. 21.40 21.40 23.00 22.00 18.90 21.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.13 0.71 -4.17 190 5230 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 3484 3 788 3051 241 140.80 39 37.95 CHANGED SlF.Dlap.IGIGPSSSHTsGPM+AAphFhpt.lt.............................p..........psp+lplpL..aGSLAhTG+GHuTDpAllhGLhGhpP-slchc.............th..hhtthttpt.l.h...............s......ptlpFs.p.pslhachcphh.s....hHs..Nuhphp..........Ah...sssthlhppsaYSlGGGFl......hs..p...pttst ..............SlF.Dlap.lsIGPSSSHTsGPM+AGptFhst.Lt...............................................p............pss+..lpl..cl..YGSLuhTG+GHsTDhAllhGLhG.pPpslclc..............h..hht.h.tptpl.h.......................tt.......cpl.pas..hp..p..s..l..h..a..c......p..p.l...s.......hH....NuMplp.............Ah.....tss..p.....h....l...hp.p...TaaSlGGG.FIhscc...t........................................................... 1 238 475 648 +4379 PF01127 Sdh_cyt Succinate dehydrogenase/Fumarate reductase transmembrane subunit Finn RD, Bateman A, Griffiths-Jones SR anon Prosite & Structural domain Family This family includes a transmembrane protein from both the Succinate dehydrogenase and Fumarate reductase complexes. 25.20 25.20 25.30 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.95 0.71 -4.28 125 5134 2012-10-03 07:11:12 2003-04-07 12:59:11 17 5 2968 88 1395 3246 2736 114.00 19 77.37 CHANGED hphp+P...........hs.ph......thhp.....p..hsshh.lhpRloGlsLhh.hhhhlhh........h.h.hhhht.sst.sasth.........tshhs..........hhhlh.hhhhhshhaHhhsGl+pllh..D...h.hh......pss........ttps.hhhl.shshhhhhhh .................................h.........................ht..............ssh.t...l.hpRl.oGlllhl......hhhlhh................h.h.....hh...hs...sth...s...apth.................tshhss........h.hht.hhh..h.hhlhulhaHshsGlp..pllh.....D...h...h.........cst.............thph...hlhh...shslhlslh.s....................................... 0 374 831 1144 +4380 PF02810 SEC-C SEC-C motif Aravind L anon Aravind L Family The SEC-C motif found in the C-terminus of the SecA protein, in the middle of some SWI2 ATPases and also solo in several proteins. The motif is predicted to chelate zinc with the CXC and C[HC] pairs that constitute the most conserved feature of the motif. It is predicted to be a potential nucleic acid binding domain. 20.40 20.40 20.60 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -6.81 0.72 -4.38 183 7596 2009-09-11 12:15:01 2003-04-07 12:59:11 10 82 3806 5 1631 5164 1418 20.10 67 3.86 CHANGED RN-sCPCGSG+KYKcC.Cttht .....RNDPCPCGSGKKYKpC.pGp..t..... 1 545 1051 1369 +4381 PF00995 Sec1 Sec1 family Bateman A, Griffiths-Jones SR anon Pfam-B_530 (release 3.0) Family \N 20.20 20.20 20.40 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 564 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -13.08 0.70 -5.88 150 2115 2009-01-15 18:05:59 2003-04-07 12:59:11 18 29 328 14 1402 2034 44 467.10 18 86.95 CHANGED hKlLll..Dptstslluhshphs-Lhpps.Vslh...................tplcs..........................pR...............psh.shpslahlpPs.tcslctl.hp-...................lpp........................................spYppaalhFss..................shs..cshhcpLupss.............shptlpplp-....hhlcals...l-sslF.....oL.....p..hs.............................psa....tthhs..............................................................................................pttppslp....phupuLhsl.hhohsp...........hPhIRhpts....................................................hucpluptltphlpct.........p.htttpps.t.........................................sl..LlIlDRshDhloPLlppaTYQuhla-llslpp...splpl..........psst.ttp............................+.phhLss..pDthasphpttpasclsp...plpphlpcap..p.p..............................ppppttshs.clpphlpp.lPphp.cppsplshHhslssplhpplppcp...Ltc.hhclEQslss.ssststp.....hhp.lhch..l.............ssp.....h...-+LRLlllahlp.....t.t..spphpphpchlpps..t.hs.pph.....thlpplpphsthht..p.................................t..............t..pp.tht.h.......thhpth.pt..p........................ssp.hhsp.ap.......................Phlppll-sl...........................hpsph..sppashhssps.................................t.t...tt........................................................pph....pcllVFllGGsTYsEhpslt.plspptt.................................h.clllGuTsll...........ssppFlp.pl .........................................................................................................................................................................KhLlh..Dp.........hht...ls...h.h..p.....ptlh.p.....t..l.h.h......................lpp.................................pp.....................t.h..ph.....slahl.p.......P....p...............ps.....l..phl..hpc.....................................................hpp..................................................................................hhpt.h..alh....Fss....................................hs..t......h..h...p.l....tptt...........................t.l..t.t.l...-............h.h.sa..hs....h-splh...sh...............p..................................s.h......th...................................................................................................t.t.t..lp.....thsptlhsl.....h.s.h.t....................hP.h.l..ph.tt.....................................................sp..hlsp..lt.p.h.hpp.....................h...p.p.......................................................................s....LlllDRshD.hoslhpp....hTYpuhhp-l......h...s....l....p......t......s..t...hph........ps...t.t.............................................................c.ph.Lst...pD.ha..p..tt.ph.spl..st...tlpp.hpphp....tp..............................................tptt.tsht....chpphl....pp.hPphp.p..tt...t..hs.h..Hh...slspt......h..ph.hp.....t.pt.................h......p...hhphE.....pplh....t.s........ptt.....................p.....h.ch.l..........................................................stt.h.s..stl.Rl.h.h.l.h.hh.......t....t.......t.ptht..p...l....ps...............hs.......pth..............hlpphtt..hsh.h.t...................................................................................tp..ht.....................t...tt..p...........................................t..p.........h..st..at.......................shltplhpph.....................................................hpt.t..........ppt.a..h.h..ps............................................................................................................................................................................................tpth.phllFhl...............GGsoh.tEh..tsh.h..hspt.t............................................hclllusoplhsspphlpt................................................................................ 0 535 782 1152 +4382 PF03908 Sec20 Sec20 Wood V, Griffiths-Jones SR anon Pfam_B-21631 (7.2) Family Sec20 is a membrane glycoprotein associated with secretory pathway. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.89 0.72 -4.28 6 380 2012-10-01 21:14:52 2003-04-07 12:59:11 8 5 278 0 245 459 5 90.80 25 29.24 CHANGED SpspplTcuLhshpphhspslppSs.slpsLssSTpsLpphs-capphpsllppo++LlKphp+p-psD+hllaluhuhFlhsVsYlVhKRI .....................t..spplTp.uLtcspphhspplppS.p.shpsLp..........pSopslpphspcap.s......h.ss..............hlppu+pLlpphtRpphoD....+....h.l....l....h.hu....h..h.hhls..s.lhallh+Rl........... 0 69 131 200 +4383 PF03911 Sec61_beta Sec61beta family Yeats C anon [1] Family This family consists of homologues of Sec61beta - a component of the Sec61/SecYEG protein secretory system. The domain is found in eukaryotes and archaea and is possibly homologous to the bacterial SecG. It consists of a single putative transmembrane helix, preceded by a short stretch containing various charged residues; this arrangement may help determine orientation in the cell membrane [1]. 19.20 19.20 20.20 20.70 18.90 18.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.80 0.72 -4.15 33 504 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 411 11 343 453 48 40.20 40 41.61 CHANGED ouuull+aYs-.-sp.GlKlsPhsVlhhSlsaIshVllLHlhu ...........uushl+aYT-.-us.GlKlsPhsV.Llh.SlsFIssVhhLHlhu........... 1 104 195 283 +4384 PF03839 Sec62 Translocation protein Sec62 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 22.40 22.40 22.40 22.50 22.10 22.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.72 0.70 -4.94 6 341 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 280 0 226 348 2 199.00 29 55.86 CHANGED sK+phFFRsK+ll+hLpstchKppKsKspsc.s........K...psp-cppchhKphhpss.s.tscK.....l.sppttp.cKKc.pKclc.Lpl.pcsQhFsD.s-hYVWlY-PlPhpsalhGllhllull.AhsLFPLWPhhhRpGVYYLSlGuhGhlushhslAIlRhILFl...IlaslshG+.thWlFPNLhtDVGFl-SFpPLYsachp.s.p+sphKKccKsKpKKKcKss .................................................................................h....ha+hh.+h..h..hh.p.th.tttt....p...t.........................t......pt.p....h..p.p.....h...t.......h.....hp..h.tsp+......................tp........t..p.t.t.c..c......p+..ph+....L.ch.....pt..cQhhh..D...s..t.hY.VWlY.-.s.s....p.hhphlhuhlh..llull...AssLFPLWPhhhRhGV....a.YLSlushuhluhhhslAl...............hRhIlFh...lhahhs...h.thWlhPN.Lh.p.D.l.GFh-SFpPlasap.p.......pt.p...Kp.p.t+t..........s................................. 0 76 124 185 +4385 PF01369 Sec7 Sec7 domain Bateman A anon Pfam-B_1629 (release 3.0) Domain The Sec7 domain is a guanine-nucleotide-exchange-factor (GEF) for the Pfam:PF00025 family [2]. 21.10 21.10 21.20 21.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.26 0.71 -4.63 140 2266 2009-01-15 18:05:59 2003-04-07 12:59:11 15 40 354 21 1387 2049 18 181.20 34 16.19 CHANGED p........pth.................c...h.........pshptFs......pp..scculphlhppsh.h..................ttpsspp....lA..........pFL.t.psss...........ls+ptlG-aLuc...tpshs.................................hplLctalch.acFp..................shslspALRphLppFcLP....GEuQpI-RllcpFu....................................p+Yh..........................................................................................................................................................................p.sN.........s.........................................sh.atss..Dssal...LuaullMLNTDLHNs.p....l+.p+........................MohpcFlcNsc...........................uhs.........supc.hsp-hLpplYc.........sIpppcl ....................................................................t........h.....uhphFN.p.....Pc.......cGlphL.ppshl..............................................tpss.pp.lA..............pFL..h..ppcs...............Ls..+..phlG-aLGc......ppphs..............................................................hpVLc.s.al.ch....a-Fs..................s.h.p.lspAL.R...............pFL..t..p..F+LP......G.E.u.Q.c....I-Rl..h-.tFu...............................................pR.Yh...................................................................................................................................................................................................................p...CN...Pt....................................................................................................................................hFtss....Dssal....LuaulI.hLNTDlHss..s.........l..+...cK.........................M.ohccFlcN.+.................................................Gl.s........sG...p....D..lsc-.......hL..........pslYppIppp.h..................................................................................................................................... 0 457 692 1048 +4386 PF01043 SecA_PP_bind SecA_protein; SecA; SecA preprotein cross-linking domain Finn RD anon Pfam-B_507 (release 3.0) Domain The SecA ATPase is involved in the insertion and retraction of preproteins through the plasma membrane. This domain has been found to cross-link to preproteins, thought to indicate a role in preprotein binding. The pre-protein cross-linking domain is comprised of two sub domains that are inserted within the ATPase domain [1]. 23.00 23.00 23.20 23.00 22.70 22.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.58 0.71 -3.72 120 5736 2009-01-15 18:05:59 2003-04-07 12:59:11 15 29 4782 29 1118 4232 2582 118.40 41 14.40 CHANGED s.scp...ssphYppssplsppLpc..s.....................DYplDEKs+sltLT-pG...hp+sEchh.....hl.......................................ssLYsspsh.phh+alppAL+A+tLFp+Dh-YlV.c.Ds....c.....VlIVDEFTGRlMtGRRaS-GLHQAIE .....................................................s.ppsophYtpsscll..p.L.p.c..p..........................-YplDEKs+sltLT.EpGl.pc.sE.chh......tl...................................................................-sLY.s.s...p.Nh.....sLh..Ha..lspAL......+A+hLap+DhDYlV......p.....-.G..........E.............VlIVD.E.aTGRhM.t......G.RRaS-GLHQAlE......... 0 393 750 958 +4387 PF02556 SecB Preprotein translocase subunit SecB Bashton M, Bateman A anon COGs Family This family consists of preprotein translocase subunit SecB. SecB is required for the normal export of envelope proteins out of the cell cytoplasm [1]. 20.80 20.80 21.50 21.30 20.20 19.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.78 0.71 -4.61 114 1987 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 1915 16 404 1065 1207 142.90 37 93.68 CHANGED Mu-ppp..................tPp.hslpphYlKDlSFEsPsuPplF..tpphpPclslplsssuppl...............s..-shaEVsLplslsu+hp..pp.............ssFlsElp.AGlFpI........p.slsp.-plp.hLtltCPslLFPasRchluclspcGGFPPLhLsPlsFsALYppp...h......tppps ..................................................................................p.tFpIp+lYsKD..lSFE.sPsuPplF.....ppch...pPclp..lsl.sstup.pL...............u...-sha.E..VlLpl..oV..o..u.p.s.-c.............ssFlsEV..p.Q.uG..IF.s.I.........t..s.lps.p.ph..sph..LushCPsILFPYARcsIoshls.+.Gs.F.P.tL.L.s.PlNF-ALahphhpppt.t............. 1 112 227 308 +4388 PF02355 SecD_SecF Protein export membrane protein Bashton M, Bateman A anon Pfam-B_844 (release 5.2) Family This family consists of various prokaryotic SecD and SecF protein export membrane proteins. This SecD and SecF proteins are part of the multimeric protein export complex comprising SecA, D, E, F, G, Y, and YajC [1]. SecD and SecF are required to maintain a proton motive force [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.90 0.71 -5.10 19 8057 2012-10-02 18:57:54 2003-04-07 12:59:11 11 21 3723 9 1953 12076 6096 184.70 28 42.02 CHANGED thpshhcshss.shphhphchVGPsluppLtppulhAlhhAhlhIhlYlslRFch.hAhuAllA.LhHDlllslGhhulht.....l-lshssluALLTllGYSlNDTlllFDRlREs...hp+.pptshpclhshulspTLoRTlhTohT.....sLlsllsLhlhGu.....sslpsFuhshllGllsGTYSSlalAssllhhhtpc .............................................................h..hhh..............h.phhp.tcsV....GPs..l.G......p.-..h...h....p...p....u....l....h....A...h.....l...l.u...l....l....h..l..h...l..a...h..h.h....h...a.....c.....h.....t.....h.........u....l..s...A....l..lA....L.......h..t..s..l...l..l..h...l..u....l...h...u....l..h..t................hpl...s..L..s..s..l...A..u....l..lh..s.l..Gh..u..l..ss.sl...l....l...a-...R.I...REp........................l..+...p...................s....p....s...h.....t......p....s.l.s..p....u...h...s...p...s...h....s...p....h..l...s.o....s....l.T........T...l..l...s.s..l....s....L...a...h....h....Gs.............us.l...+..G......F....A...l.....s....l.hl..G..l.l....s..u.h....a.o.ulhlupslh.hh...hh......................................................... 0 650 1298 1669 +4389 PF00584 SecE SecE/Sec61-gamma subunits of protein translocation complex Birney E anon Swissprot Family SecE is part of the SecYEG complex in bacteria which translocates proteins from the cytoplasm. In eukaryotes the complex, made from Sec61-gamma and Sec61-alpha translocates protein from the cytoplasm to the ER. Archaea have a similar complex. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.50 0.72 -4.39 185 4664 2009-01-15 18:05:59 2003-04-07 12:59:11 15 11 4540 10 1213 2641 1958 56.60 28 61.98 CHANGED phhp.FhcpspsEl+.KVsWPo+cEshpsThsVllhlllhulhlhhlD.hhhthll.phlh ............h..hp.Fh+pstpEl+.KVsWP.........o+cEhhpsTlhVhshshlhu....lhlas.lD.hllstllphl............. 0 411 783 1027 +4390 PF03840 SecG Preprotein translocase SecG subunit TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.50 21.50 21.70 21.60 21.20 21.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.46 0.72 -4.24 183 4339 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 4294 4 929 2471 1834 73.50 31 72.34 CHANGED hsllhllhlllulhLlslVLlQpuKG.uuluush.GGGu..stolFGup.sutsh...Ls+hTslluslFhlh.ulsLuhlsp .......phLlllhlllulhllhllLlQpu.Ku.ushuusF..uuGu.....ptslFGsp..tut..sh....LsR.hTs.l.Lu.slFhlh.slsLuhl........................... 0 319 620 789 +4391 PF04856 Securin Securin sister-chromatid separation inhibitor Mifsud W anon Pfam-B_4643 (release 7.6) Family Securin is also known as pituitary tumour-transforming gene product. Over-expression of securin is associated with a number of tumours, and it has been proposed that this may be due to erroneous chromatid separation leading to chromosome gain or loss [1]. 20.30 20.30 23.60 33.30 18.90 18.20 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.90 0.70 -4.65 8 108 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 70 0 48 106 0 208.00 36 95.25 CHANGED hssllhss+EN......usPsstlppscuhh.............................................+.PLuStsp..s+opu................................................tssshpL.+h...Gplhs.shslspsspKuLsshD....pslpsKsst..........pscpsshsscshs.puKKlpuu................................................schshEhhPchp+.hPapP.GaE..uFD........s.-cplp+LsLps..........sPht.....h.hhs--ptptp.hpl.s.sP.......................Lc.ssls.cus.............................................shsuLssl-l ..................................................MssllassKEN..........tpPu.pp.lsspculh.................................................Luotss......hKuhs....................................................tpsplshs+h...GKshss..s.....u.....ls.KssRKuLGsVs....tpsscspssh..................................pp+psshss.cc..hocpss..Kspou.............................................................sssss-saPEIEc..hhPasPl..sFE..oFD.......hPtEcpIu+LsLss..........lPLh.......h.pE-.c..c..pc...phss..P.......................lch..sp.s.E.us..........................................................hhps.............................................................. 0 6 13 23 +4392 PF00344 SecY secY; SecY translocase Finn RD anon Prosite Family \N 22.20 22.20 22.30 22.20 21.80 21.70 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.15 0.70 -5.52 157 6369 2009-01-15 18:05:59 2003-04-07 12:59:11 15 14 5237 19 1533 4121 3873 333.00 34 79.26 CHANGED olhuLGIhPaIoASIIhQLL...................shspLtclppp.GcpGRc+lsphoRhhollluhlQuhuhshhhtths.........................hh..lhhll..pLssGohhlhWLuEhIoc.hGl.G.NGlSLlIhsuIl.sslsts................................lh...phhphh......................t..hshhh.........h........lhhllhhlhl.lhhllalppup++IPlpas+p..............tsp..psa.lPlKlNhuGVlPlIFAsul..lhhPt...hluphh......pt......................................t........hhtp.lshhh...................................hpshhY.hhlahshhlhFuaFas.shshsPc-lAcsl++pGsaIPGlRPG.csTpcaLs+lls+lohhGul.aluhlullP..pllsshht..........................h..huG.TulL....IhVu....lsl-...hhp ..............................................SIFALGIhPYIo......ASIIhQLLp.................slhPpltc.hpKp.G-tGR+KlsphTRY.hTllLuhlQuhuhsh...shsshssh.t.......................................shh..lhhslhLssGohhlh.......WLGEpITc.+Gl.G.NGlSllIFuGIluslPts............................................lh.p.hhpthh.........................................p..s..t..h.shlh.......h.........lhlll.hhlhl.hhhllalp........pupR+I.P.lpYu++................htsp..soalPLKlNhA..GVIPlI....FASSl..lhhPt....sls..pah..sss.................................................t....hh.t.p....lst...hh...........................................s.spsla...hllYsshIlhFsaF...Ys..slt.hNPc..-hA-.....NLK....Kp.Gual..PGlRPG.cpTtcYlscllsRlTh.hGul.alshls.llP...h.htthhs..........................shh...hGG.TS.LL....IlV.sVsh-hh....................................................................... 0 523 976 1295 +4393 PF04628 Sedlin_N Sedlin, N-terminal conserved region Mifsud W anon Pfam-B_5308 (release 7.5) Family Mutations in this protein are associated with the X-linked spondyloepiphyseal dysplasia tarda syndrome (OMIM:313400) [1]. This family represents an N-terminal conserved region. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.85 0.71 -4.29 32 687 2012-10-04 00:47:01 2003-04-07 12:59:11 8 10 305 4 469 992 6 133.60 27 84.69 CHANGED IlGp.....pDsPLaph-Fssst..ps...........................................ppL...tpFlsHuuLDll--hhap..................................ssshaLttlDpa....pphhl.oualTsuplKFlllap..................................................shs-ssl+pFFp-la-hYlKhlhNPFY..........................p.ss............sIp.Ss.....sF-p+lppluc+ ............................................................................................................................lluppspPla.hphss.tp..p.t.t....................................................................................t.l....palsHuuLDll--thht.......................................................................ssshaLt...........hl....p.ph.......pp..h..hl...u...a.l..........T.s......o.......p.......l......+..Flllhc..............................................................................shp-...ssl.+...s......h.Fpcla.ph.Yl.c.h.h.h.NPFY............................p.s..ss.................Ip.S.......tFcpplp.hh............................................................ 1 166 259 386 +4394 PF00477 LEA_5 seed_protein; Small hydrophilic plant seed protein Finn RD anon Prosite Family \N 20.60 20.60 21.10 20.80 20.50 20.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.65 0.72 -3.78 5 271 2012-10-01 20:07:30 2003-04-07 12:59:11 12 10 102 0 87 294 3 71.90 39 87.44 CHANGED MASG.........QEcREELDcRAKQGETVVPGGTGGKSLEAQEHLAEGRS+GGQTRKEQLGoEGYpEhGoKGGpTR+EQhGpEGYpEMGRKGGLSTpDcSGuERAA-EGI-IDESKF ....................................................................................Eu..tp....+GGpsppcphGcEhYpEhGpKGGp.sptcphsp-hYpEhG+KGG......u..s.p.p................................................... 0 20 59 77 +4395 PF03841 SelA L-seryl-tRNA selenium transferase TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.03 0.70 -5.72 11 2029 2012-10-02 18:26:03 2003-04-07 12:59:11 8 7 1584 2 363 1893 697 327.50 37 78.46 CHANGED VlNhTGsVlHTNLGRAlhu....-cAhpAshpuhpc.ssLEaDL-sGKRGsR.salpcLLpcLTGAEsuhVVNNNAAAVLLsLsolApGKEVIlSRGELVEIGGuFRIPDlMcpuGs+LhEVGTTNRTHl+DYcpAIspNTAhLhKVHoSNYplpGFTppVsht-LstLu+EhslPlhpDLGSGsLlDLspYGlst.EPTVp-tlupGsDlVoFSGDKLLGGPQAGIIVG+K-hI-+lpppPLpRALRlDKlsLAuLEATL+LYlpP-+htc+lPTL+hLopshctl+tpApRLpthLtstLu.t.hpVpltsuhuplGuGShPhpcLsShslolpscps......slsuLptthRths.PlIGRlc-stlhLDlRoLt .............................................................................................................VhNhoGsllHTNLGRu..s....ttshpshtpshpt.ssl.E.h......sL.p......p..u......t......R.......u..p.R......p.....h.....l.t.p......h.........l......p.p........ls.u.....A...E.......s.......A....h.......lV.N.......NN...A..A.A.V............h....L.........h.l....s.................s.h....u................p.s................+................E....VVl........S..........R........GELV.EIGGu.....F..R..l....s..-.l.h.p.t...........u.....Gs..pL......h..E..V..G........o......o..N....+...T...+...h..p..DY...c.....pAI......s.E...p..T.Ah..L..h+V...H.SNaslpGF..pptl.sht-ls.t.l...u......+c...........t.............s.........lPllsD.lGS.G...h....c.h.........t.hh..h.....Es.t.h.p.phl.tsGsDLVsFSGDKll...GG....P...Q...A...G.lIl.GK....K.p.....hI..s....p.....l.....p......p.....p......s......L....t..RAl....R.lsKh.o.Lu....uLpss.L...chY.l....p........p..t........p.plss..hph..Lp.ts....t.lt.tA.p.l....lt..................................th..s.u..lGuGs.hP....l.shhh.h......................tht..h.+........sllsRh.pt.hhh-hpsl.t.................................................................................................. 2 127 232 306 +4397 PF04593 SelP_C Selenoprotein P, C terminal region Kerrison ND anon DOMO:DM04433; Family SelP is the only known eukaryotic selenoprotein that contains multiple selenocysteine (Sec) residues, and accounts for more than 50% of the selenium content of rat and human plasma [1]. It is thought to be glycosylated [2]. SelP may have antioxidant properties. It can attach to epithelial cells, and may protect vascular endothelial cells against peroxynitrite toxicity [1]. The high selenium content of SelP suggests that it may be involved in selenium intercellular transport or storage [2]. The promoter structure of bovine SelP suggest that it may be involved in countering heavy metal intoxication, and may also have a developmental function [3]. The N terminal region always contains one Sec residue, and this is separated from the C terminal region (9-16 sec residues) by a histidine-rich sequence [2]. The large number of Sec residues in the C-terminal portion of SelP suggest CC that it may be involved in selenium transport or storage. However, it is also possible that this region has a redox function [2]. 19.20 19.20 20.70 49.00 18.00 17.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.37 0.71 -4.36 4 16 2009-09-13 02:32:08 2003-04-07 12:59:11 9 1 10 0 6 37 0 124.80 54 34.28 CHANGED GQHRQGH.EssDhs.ASEuLQ.SLsQ+KLUR+tCINQLLCKLsc-SEuAsSSCCCHCRHLIFEKoGSAITUQCsENLPSLCSUQGLhAEEpVhESCQsR.PPAAUQ..uQplsPTEASssUSUcNpsKKUKUsSN ............................G.pRpGH.-spDhs.uSEslQ....Q+KLsRK..RCINQLLCKhscDScsA.uSCCCHCRHLlF.Ec.oG.SAlTUQCsENLPSLCSUQGLhAEEN.lhESsQsR.hPPAAUph.SQ.hsPsEA.sssUuU+ppsthscs..N. 1 1 2 2 +4398 PF04592 SelP_N Selenoprotein P, N terminal region Kerrison ND anon DOMO:DM04433; Family SelP is the only known eukaryotic selenoprotein that contains multiple selenocysteine (Sec) residues, and accounts for more than 50% of the selenium content of rat and human plasma [1]. It is thought to be glycosylated [2]. SelP may have antioxidant properties. It can attach to epithelial cells, and may protect vascular endothelial cells against peroxynitrite toxicity [1]. The high selenium content of SelP suggests that it may be involved in selenium intercellular transport or storage [2]. The promoter structure of bovine SelP suggest that it may be involved in countering heavy metal intoxication, and may also have a developmental function [3]. The N-terminal region of SelP can exist independently of the C terminal region. Zebrafish selenoprotein Pb (Swiss:Q98SV0) lacks the C terminal Sec-rich region, and a protein encoded by the rat SelP gene and lacking this region has also been reported [2]. N-terminal region contains a conserved SecxxCys motif, which is similar to the CysxxCys found in thioredoxins. It is speculated that the N terminal region may adopt a thioredoxin fold and catalyse redox reactions [2]. The N-terminal region also contains a His-rich region, which is thought to mediate heparin binding. Binding to heparan proteoglycans could account for the membrane binding properties of SelP [1]. The function of the bacterial members of this family is uncharcterised. 22.90 22.90 23.40 23.30 22.80 22.80 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.84 0.70 -4.98 6 83 2012-10-03 14:45:55 2003-04-07 12:59:11 9 4 48 0 49 89 1 177.60 34 63.13 CHANGED QspSShCK.PPpWpIcDpsPMhNuhGpVTVVALLQASUahCLlQASRLpDLRlKLcppGYoNISYhVVNcQu.pSphhascLKc+luEcIPVYQQ-.sQsDVWpLLNGsKDDFLIYDRCGRLsYHLuLPaSFLoFPYVE-AIKtsYCEchCGNCSLso.ps.-.CKssTh...........tsssKssEsp.....sct.HsH.....Ht++HsHsHcH.s.....ssphpcs.pss.sstsp.Ps..uh.HHHH+H+ ...........................................................................hs.h..............h..phtsLp.+Ltpp....Gh.slpahlVN....pps...Sp............h............ha..Lpp.......pss......tl.......sVYp...Qp.tpsDlWphLsGs...KDDFLIYD..RCGRLs..YHlsLPaS....hL...pasYVEtAI+hsYpcphCGsCo...hp.s............t...s......p.spth......................t.pps..p............t.....p....................................................................................................................................... 0 10 14 34 +4399 PF01641 SelR DUF25; SelR domain Bateman A, Enwright A anon Pfam-B_1539 (release 4.1) Family Methionine sulfoxide reduction is an important process, by which cells regulate biological processes and cope with oxidative stress. MsrA, a protein involved in the reduction of methionine sulfoxides in proteins, has been known for four decades and has been extensively characterised with respect to structure and function. However, recent studies revealed that MsrA is only specific for methionine-S-sulfoxides. Because oxidised methionines occur in a mixture of R and S isomers in vivo, it was unclear how stereo-specific MsrA could be responsible for the reduction of all protein methionine sulfoxides. It appears that a second methionine sulfoxide reductase, SelR , evolved that is specific for methionine-R-sulfoxides, the activity that is different but complementary to that of MsrA. Thus, these proteins, working together, could reduce both stereoisomers of methionine sulfoxide. This domain is found both in SelR proteins and fused with the peptide methionine sulfoxide reductase enzymatic domain Pfam:PF01625. The domain has two conserved cysteine and histidines. The domain binds both selenium and zinc [2]. The final cysteine is found to be replaced by the rare amino acid selenocysteine in some members of the family [1]. This family has methionine-R-sulfoxide reductase activity [2]. 21.90 21.90 21.90 22.00 21.70 21.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.52 0.71 -4.43 18 5387 2012-10-01 21:11:27 2003-04-07 12:59:11 13 18 4111 31 1382 3866 2926 121.70 48 59.33 CHANGED c-Eh+psLos.QacVhpppuTEpPaTsEYscpaEcGIYsslssGpPLFpSpsKF-SGCGWPuFhcPlspcu.IphptDpShsMpRsEVpstssDuHLGHVFs.DGPpp....ssGhRYClNSAuL+Flst .......................................t.tch+ppLo.s...pYtVspcpGTEtP....F.....o.uc....Y...h........c.....p.....t.c...p.........GlYss.h..ssGpPLF.....pS.p.....sKF...-...SGC.........GWPS...Fhc.P.............l.......ss....p.......s........l.......p..htp.......D.......t.S........a............G..............Mt.RsEV+stpssuHLGHVFs.DG..P......ps........s.GlRYClNSsuLcFhs.t........................ 0 441 862 1147 +4400 PF01403 Sema Sema domain Bateman A anon Bateman A Family The Sema domain occurs in semaphorins, which are a large family of secreted and transmembrane proteins, some of which function as repellent signals during axon guidance. Sema domains also occur in Swiss:P08581 the hepatocyte growth factor receptor and Swiss:P51805 19.10 19.10 19.10 19.20 19.00 19.00 hmmbuild -o /dev/null HMM SEED 433 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.70 0.70 -5.96 26 2637 2009-09-11 14:23:39 2003-04-07 12:59:11 14 95 167 27 1239 2166 0 367.60 25 40.30 CHANGED aphhhhc.tpspLaVGA+stlasLslpslp....ph...pplsWsus...ppppcpChh+GKs......ts-CtNal+lLtshsps.+LhsCGTsAapPhCphh..plss.....................................aph.stpp.sGpucCPasPppspsulhs.s.....sp......LYuuoshD......Fhup-ssIhR.hu........ptssl+Ttht...sphhLstPpFVsuasIs.s.t......pschlYFFFpEsus-tt.ts....tcslaoRluRlC+sDhGGpchlp.spWooFLKARLsCShPu..t.s..haFspLQssahl.sss..........pssllaGVFoTspsshtu....SAVCsaslpsIppsF.pGsatpp...csspppWhshps.chP.PRPGpCsssst.........thPDpslsFh+sHsLMtpsl.slpptPlhscpssphphTpIsVD..plpstssp.asVhalGTspGpll.hhlslspss..s...hhlEEhplhpss....tPlpshplsppp .......................................................................................ptplalGu..h.stlatls.ts..ht..................................tht..ss........tph....p..p..Ch.....tspt..........................ppst.Nh.....h.+....l..l.........................h....s...p...........p...pL...............hs..CG...o.ss.ps.hCthh....phss..............................................h.p.h...........t.....p............p.spsts.s..h..sstts.t...s..ulhs.s.....................sp............................Las.u.s...shs.......................a.sps.........h...l..h........R.tht.......................p.t.s....h.c..t..........s.t...ph..l...p...........t.....P..p...Fl.....t....u.....h.........h...............shl..Y..Fh.a.p.ch..shphp............................tp..hh...hoR...lu.R...lC......p.....s..D.G...s..t.p.hl........phhs.....o......a.hcs.............cL.C.shss...............t.........has....L..pssh.h..h.s..tt...............................p.llaulF..o....s..s...s..s.h.s..........................SAl...Csash.p.....s..lppsF..p...u....htpp....p.ts.t........p..t.h...h...s..htt....t....hs......s...+......s.s....t.C..s.ts................................................................ph.s-phh..s...h.h.....c....p......p.s.......lh..........p....h........sl....t..t......p......Pl...hh.....p....p.....t...s..............................hTplsVs........t.s...t....s.......a....sVhFlG........T.....p..p..Gplh..hl..h............tt.............................hhtphthh....................t................................................................................................................................................ 0 199 302 687 +4401 PF01118 Semialdhyde_dh Semialdehyde dehydrogenase, NAD binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1079 (release 3.0) Domain This Pfam entry contains the following members: N-acetyl-glutamine semialdehyde dehydrogenase (AgrC) Aspartate-semialdehyde dehydrogenase 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.45 0.71 -3.81 188 10449 2012-10-10 17:06:42 2003-04-07 12:59:11 19 35 4748 117 2601 11025 5845 122.70 26 35.48 CHANGED +VullGAoGhlGpcl.lclLpp..c...lchhhlhuspp..psGpphsthh.............htshhlps.......p.tth...ppsDllhhu..tssssupphssph..hptGh....hVID...ousaRhcs........................csshslPEl..N...tcplppt ...........................................plullGAoGhlGppl..l...p.h..Ltc......c......s.....l.....p.....h...h......h..h.....huosp.......ps..Gp..pl.s..h................................h..p..sh..s.lps.......hs...psh......pss.D..lsh..h....u....s..u....s...s...s....o..p......c....h.s...s..ph.............hp..s.Gs...........hVI...D...sou..s..a.R.hps.....................................................csshslPEl...N....ctlt..................................................................................................................... 0 809 1648 2203 +4402 PF02774 Semialdhyde_dhC Semialdehyde dehydrogenase, dimerisation domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1079 (release 3.0) Domain This Pfam entry contains the following members: N-acetyl-glutamine semialdehyde dehydrogenase (AgrC) Aspartate-semialdehyde dehydrogenase 20.90 20.90 20.90 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.50 0.71 -4.43 32 8792 2012-10-02 22:00:43 2003-04-07 12:59:11 13 14 4668 113 2044 6116 4228 174.30 26 50.65 CHANGED LpPLhct..stl.ptlhVsohQulSGAGtp...........................................tssshshslshNllPalss..cct..poccchphhs-spphlshss..........u.CsRVPlhcGHopslphcht.....pshslccshphhtst.........stshlhs.....p.phPpsttsts...ssss.VGRlRpDshtsp...lthhsluDNlh+ ................................................................LtPLhc..t....htl...p.p...l.sl.s.oh...pul..SGAGt..p.......................................................t..s..s.h.......h.......s.h........luhNll......Pah..ss.......h....cps..........ps.cc..E..h....+..h.h..s...-...s...p.....c...l...l.....s.ss........h................s.slplP....shcGHspslp.hc.ht......................psh.slpch...hph.h..tps.............................................tshlhsps...............ttthPpsh..tssG........ssts.lG.p.lR....p..D....t..s....sp......hlthhsluDNLhK............................................ 0 662 1325 1741 +4403 PF03925 SeqA SeqA protein Bateman A anon COG3057 Family The binding of SeqA protein to hemimethylated GATC sequences is important in the negative modulation of chromosomal initiation at oriC, and in the formation of SeqA foci necessary for Escherichia coli chromosome segregation [3]. SeqA tetramers are able to aggregate or multimerise in a reversible, concentration-dependent manner [3]. Apart from its function in the control of DNA replication, SeqA may also be a specific transcription factor [4]. 22.70 22.70 23.00 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.24 0.71 -4.59 37 801 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 780 13 101 335 9 177.20 64 98.90 CHANGED MKpIEVDEELY+YIASpTpcIGESASDILRRLLslssp......................pshshspPs.t..............................................................................s.spsttctVp.................slcpLlps-chsppctAVsRFlhlLssLYptsspsFspsh...plpGRsRlYFApscpsLLtuGss........oKPKpIPsoPFWVlTNsNTuRK+thLpplhtphthsspll-clpshl .....................................................................................................................................................MKTIEVDDELYpYIASHTcHIGESASDILRRMLKFoAss..........................t.....usPsht........................................................................ps.t...h...sc.sps...spsl+D+VR........................AMRELLLSDEYAEQK+A...V.N..RFMLlLSTLYoLDspAFAEAT..ESLHGRTRVYFAuDEQTLLpNGNp........TKPKHVPGTPYWVITNTNTGRKpSMlEHIMQSMQFPAELIEKVCGTI.............................................................. 1 13 36 70 +4404 PF04360 Serglycin Serglycin Finn RD anon DOMO:DM07201; Family Serglycin is the most prevalent proteoglycan produced in haemopoietic cells. Serglycin is a proteinase resistant secretory granule proteoglycan [1]. 30.00 30.00 32.00 30.80 28.90 27.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.03 0.71 -4.48 3 52 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 31 0 21 58 0 122.00 45 76.59 CHANGED QpLLpCoRLVLALAFILVauSSVQGYPsRRARYQWVRCsPDSNSANCIEEKGPtFDLLsGESN+IPPPRTDl.PlhpppsLN-lFPLSEDY..............................SGSG.GSGSGSGSGSGSGFLsEMEQEYQPVDENDAFYaNaRShDRNLPSpNQDLGQDGl ...................ss+lsLsLAllhhLtsus.pGhPsp..+ARYpWV+CsPDosSANCl-EKGPhF-L.PGEuN+I.s...hsD.hshpp...p..shschFP..l.SE..-h......oG..sS..G..SGS.GuuSGSG.....hsphc.-h....h-pps..a.s..s...................................................... 0 1 2 5 +4405 PF00450 Peptidase_S10 serine_carbpept; Serine carboxypeptidase Finn RD anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.43 0.70 -5.31 67 3902 2012-10-03 11:45:05 2003-04-07 12:59:11 17 45 756 20 2456 3840 269 353.80 21 80.72 CHANGED PG.....ts...hsh...cpaoGYl.......sls.ttpsppLFYaFh....ESpp....sPppc.PllLWLNGGPGCSSls.Ghh.ElGPFpls......ss..sLhhNsYSWscsANllFLDpPsGsGFSYo..sssss.......hptsD....................ppsApDsatFLtpahpc.FPca....tsp..cFYIsGESYAGpYlPtluptIhptspt..............tlNL+GhhlGNuhs-s......hhphsshhs.....ahattullo.-ctacphpptCphs..............................stsp..tppChsthpphtt.................................slshYsIhpssh..............................................................shsshs.phspt..ahNptsVpcALph.....ss.hpWptCsp.tl........th..sch.pshhshhp...pllpss..l+lllauGDtDhhsshhuspthlcs....LshsstspapsWhh........................sspluGaspsY........ts..loFsoV+GAGHh..VPh.pPptuhthhppalsu .....................................................................................................................................sGal...........................tht.........s....t...p...hF.aa.......hh..........................pu.pp...........ss.t.pp....Plhl.....Wl.............s.G...............G..........P.............G.s.SSh.......h..u......hh..E.h.G...Ph.thp..............................t........t..h......h........Ns..h.o.Ws...p..........h..............u............sl......lal..-p..............Ps....G.......s..........Ga......S..h..s...psstt...................s.p...............................................pt..s.....u.ps...hhthl...p..t........ah...pt....a...s.pa.....................t..s.......s....hals..G........E....SY.u......G.....hh..hPh..luthlhptt.t.................................hs.Lp..........G...h.h.l.s.........s...sh...h..s.....................p.h....thh.........................hhh........h....s.....h....l.....s........p......p...........h....p.....t...h....t...t.......h.......................................................................................s........t....h.t.h...............................................................t.............ths.hshh...............................................................................................................................................................t......h...........a.....h..s......p......t..l...p...p...s.lt.h......................tat.ss......l................................t..t..................s....h...h..........h...h..................tl..l...pt.t.........hpl.hl.h..........s.....G.p...............hD.h.h.s..............s..h...h......u...........s....phh.hpt.....................h.t.h....h...............a...................................................................tt....hG...h............................tt......hshh.l..h...........tuuH........hs.........t......s..s..hh.............................................................................................................................................................................................................................................................................. 1 823 1473 2074 +4406 PF00079 Serpin serpin; Serpin (serine protease inhibitor) Eddy SR anon Overington and HMM_iterative_training Domain Structure is a multi-domain fold containing a bundle of helices and a beta sandwich. 21.70 21.70 21.70 21.70 21.30 21.60 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.21 0.70 -5.32 180 5658 2009-01-15 18:05:59 2003-04-07 12:59:11 15 39 760 254 2183 5686 67 297.10 23 87.17 CHANGED pusssFuhcLa+plspps.......ppNlhhSPhSl......psuLuhlhhGApG..pTtppl..tpsL...th....................................hstpp.......h.tt.........................hppl.hpplpp..............................tshplphANtla..h....p..cs.hplppsFhptscp...hYpup.hpslDFpsssp.utppINsWVpcpT...............pu+Ipcllsst.lss.s..TphlLlNAlYFKGpWpp..tFstppTppp.sFa..hsp.......spsh.pVsMM.ppp.....spaph......t.p....sspl..lcLPY........................ps.....sh.SMhllL..........P........su.lp..plc......ppLs.tt.....lpp.....hhpphp...hp......htl...tl..PKF..plpt....sh..cL.ppsL.pphG..lpchFs.spAD.hSsls...............................................................................s...t.LhlScllHKuhl-VsEcGoEAAAuT.........................................................Fpss+PFlFhIpcppo........................tslLFhG...+lhsP .........................................................................................t.....hshph.hp..h..tt............ttN..hhhSP.h.ul........shu......hh......h.G...u.......p.......s.......p.......T...tt..p..h....p...s.l..th............................................................t.tp...h................................................h.t.pl..hptltt..................................................................tt.t..l.p.h..sst..l..a..h.............p......pt.....h.......t...h........t.p....ah..p...h...pp.........ha..t.............sp....htth.s.Ft.....pt.....t....s........t...p....tI...N..p.a.l..pp..p.T..........................p.s.p.....I..t....p....l....l..t.t.........l...s......t............s.......hh..h...ll.Ns..l...aF...+.........u.p...Wp..p......F...p.....p.........t..T.p.tt......Fh.......hsp.........tp...t......h...tV.hM...tp...........t.t.h..h.........................h..tt...t..h.....pl....lp.lPY....................................ps..ph..ohhl..lL.........................P.............pt....ps....lp....pl..................p.p..l.s....tt...............lt.p.....h..h.p.t.hp...p........ht.........l................hl..P..+F....pl....p..t....ph....s.......l...pphL..p.p....h.G......hp...p....hFs.....t..pus..hsths.......................................................................................................................................................................t....ttlhl..s..p..h..h...pp...s.h......l...p.............l..s.Ep..G..spu.s...u.so.................................................................................................h....h.s+P..Fhh.h..lh....p...p.......t............................lFhGph................................................................................................. 0 518 734 1365 +4407 PF02403 Seryl_tRNA_N Seryl-tRNA synthetase N-terminal domain Mian N, Bateman A anon Pfam-B_518 (release 5.4) Domain This domain is found associated with the Pfam tRNA synthetase class II domain (Pfam:PF00587) and represents the N-terminal domain of seryl-tRNA synthetase. 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.44 0.72 -4.03 89 5178 2012-10-01 23:07:44 2003-04-07 12:59:11 17 14 4789 32 1407 3708 1996 107.80 30 25.05 CHANGED MLDl+hlRp....Ns-tlccpLppRt....ss.hsl-cllpLDcc+Rplhhcs-pLpscRNphS+pIuptcppt.....pcsstlhtcspplspclpsh-t.clpplcsclpphlhslPNlP .........................MLD.l+.hlRp........s.-tVtcpLt..pR................stt..h..sl.-....clhpL.....Dp.cR...Rpl.....hs..csEpLpuc.RNphS...Kp....I...upt...Ktpt...................--spsl...htc..h....pp....lupclc..shcs.clsplpsclpplhhslPNlP............................................................... 0 483 906 1191 +4410 PF01445 SH Viral small hydrophobic protein Bateman A anon Prodom_1504 (release 99.1) Family The SH (small hydrophobic) protein is a membrane protein of uncertain function [1]. 21.90 21.90 21.90 35.70 21.30 21.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.01 0.72 -4.31 12 328 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 18 0 0 315 0 56.10 83 99.85 CHANGED MPAIQPPLYLTFLLLILLYLIITLYVWll.TITYKTAVRHAALYQRShFRWSFDHSL MPAIQPPLYLTFLLLILLYLIITLYVWIlLTITYKTAVRHAALYQRSFFRWSFDHSL. 0 0 0 0 +4411 PF00017 SH2 SH2 domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.69 0.72 -4.17 58 9081 2012-10-01 22:44:06 2003-04-07 12:59:11 19 379 283 390 4864 8244 21 78.00 27 14.81 CHANGED WaaGploR.....p-AEchLhp......tpsGsFLlRcScs.p.Gs...aoLSVppps.........pVcHa+Ipppsss........halssptp..FsoLt-LlpaY ..................................Waa.Gt.l..o..R...............pp..A.EplLtp.................t...sG.s...FL.lR......c..S...ps....p......G.s..........a..s.LS..lptps........................................plpH....h.pI.p..pp...sst................................hh.h......s.....t......t.....tp.......F..s..o..l..pLlpaa............................................................... 1 1219 1597 2956 +4412 PF00018 SH3_1 SH3; SH3 domain Cerutti L, Sonnhammer ELL, Eddy SR, Finn RD anon Prosite Domain SH3 (Src homology 3) domains are often indicative of a protein involved in signal transduction related to cytoskeletal organisation. First described in the Src cytoplasmic tyrosine kinase Swiss:P12931. The structure is a partly opened beta barrel. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.20 0.72 -4.57 61 10749 2012-10-02 18:48:24 2003-04-07 12:59:11 23 695 444 373 5929 20245 89 47.10 29 7.39 CHANGED hALYDapupp..ssELshpcG-hlpllpcsss...sWWcuc.ttt...sppGhlPu .....................hAlYc..a..p.......u....pp.........ss.......E.......L..o.......hp...c..G..-hl.p..l........l......p...c.....s........ss..........sW.Wpu.ch...tt............sp....p....G.hhPs................................... 1 1639 2410 4041 +4413 PF04908 SH3BGR SH3-binding, glutamic acid-rich protein Mifsud W anon Pfam-B_6650 (release 7.6) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.23 0.72 -4.05 6 396 2012-10-03 14:45:55 2003-04-07 12:59:11 10 6 165 7 218 466 811 91.70 40 45.15 CHANGED MVl+VYlASuSGshtIKK+QQ-VlthL-up+IpF-plDIot..cE-pRcaMRcNs....s..p+PssGtPLPPQIFN-DpYCGDYDuFhpApEpNTlhsFLtLs ...................slcVah.uos.oG..s..htIKK+QQcVhthL-A.p+I....pFc...plDIut..sE-pR.caMRcps.........pc..st.suhsLP....PQIFNp...-p....YCGDY-sFh-.ApE.pstl.tFLtL.................................. 0 60 87 142 +4414 PF03579 SHP Small hydrophobic protein Bateman A anon Pfam-B_1121 (release 7.0) Family The small hydrophobic integral membrane protein, SH (previously designated 1A) is found to have a variety of glycosylated forms [1,2]. This protein is a component of the mature virion [1]. 21.20 21.20 21.20 107.20 19.50 21.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.11 0.72 -4.62 3 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 10 0 0 46 0 64.00 66 92.80 CHANGED MsNTSITIEFTSKFWPYFTLlHMILTIISFLIIISIMIAILNKLCEaNsFHNKTLElspuhpNs MsNTSITIEFTSKFWPYFTLIHMILTIISLLIIISIMIAILNKLCEaNsFHNKTLElspthpss. 1 0 0 0 +4415 PF01488 Shikimate_DH Shikimate / quinate 5-dehydrogenase Bashton M, Bateman A anon Pfam-B_336 (release 4.0) Family This family contains both shikimate and quinate dehydrogenases. Shikimate 5-dehydrogenase catalyses the conversion of shikimate to 5-dehydroshikimate. This reaction is part of the shikimate pathway which is involved in the biosynthesis of aromatic amino acids. Quinate 5-dehydrogenase catalyses the conversion of quinate to 5-dehydroquinate. This reaction is part of the quinate pathway where quinic acid is exploited as a source of carbon in prokaryotes and microbial eukaryotes. Both the shikimate and quinate pathways share two common pathway metabolites 3-dehydroquinate and dehydroshikimate. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.66 0.71 -4.06 44 8457 2012-10-10 17:06:42 2003-04-07 12:59:11 15 52 4195 91 2050 10811 5424 121.30 22 33.35 CHANGED -LApphhs...tlp.sppsLllGAGchucLlsptLhsp.uscclhlsNRThc+AppLAcch.....ts....shsls-lp...phlspsDlll.ouTuupp.............llspphlcpsh...t.......h.lhlDIulPRslp.tlsthpsshlYslDDLctl ...............................................h.............tlp..sppsL.l.l..G.A...G...G...s.u...c...u.l.h.h...t..L...h...pt...G........s...p......p...l......h......l..s....N.......R......T.......h.......p.......+......A....p......p....L...A...pph.....................t..t......h....p......s....h....s....h....s.....c....lt...............th..h...t...p....h......D....ll...l...s.u...T....u..u.s.h.................................................h............h..............................................................................................h......................................................................................................................................................... 0 611 1298 1762 +4416 PF00464 SHMT Serine hydroxymethyltransferase Finn RD anon Prosite Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 399 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.06 0.70 -6.10 12 6460 2012-10-02 18:26:03 2003-04-07 12:59:11 14 19 4794 86 1789 5320 4336 351.50 52 88.05 CHANGED Ls-tDPElashlcpEhpRQpcplELIASENasSpuVh-AhGSsLoNKYuEGYPGpRYYGGsEalDhlEsLs.cRAhchFsh-....hVNVQPhSGS.ANhuVYpALlpPtDplhGLsLscGGHLTHGh.ss.......oupaFcuh.YtVs.cTGhIDYDpLccpApha+PKlIVAGsSAYuRllDat+hREIADclGAYLhsDMAHIuGLVAAGVhPSPF.aAcVVTTTTHKoLRGPRGGhIhh+............-h..-ltcKINpAVFPGhQGGPh.HsIAAhAVAhKpAhpPEFKsYpppVlpNA+AhucsLpcpGYcLVSGGTDNHLlLVDL+s+GlsGscuEtsLtpssITsNKNolPsD.cSshlsSGlRlGTPAhToRGFsEt-FpcVutaI ..................................................................................................tphDs-lhph.l.p.p....E.ht.R.Q..p.pp...lELIASENa....sS......uVhpAtGS......hLTNKY......A.......E.G.....Y.P.......G..+.......R..........YYGGCEaVDhlE..pLAI..-R.AKcLF.....GA.....-.................................aANVQ...........P..H.S..GS.........Q......AN.......hAVYh.AL..L.pP.....G...D....ol............hG.....Ms.Lsp.GGHLTH...G.u.l......sh......................S..G+.h.a....p.......hlsY.Gl..c......c...o...t.hIDY.D..p..l..c...c...h..A..h..c......a.+.....P...K.l.I..l.A..G...h..S.A..Y..s...R...h...l.D...a.t...+...h...R....-...........I....A...............D...c.....V...G..A.......hLhVDMAHlAGLV......A.....A...G.....l..a.....P......s.....P.....l......P.....a.......A....c..l..VT.TTT..HKTLR......G..P.R..G..Gh.IL.sp............................................................................p-htKc..l..N..pAlFP.G.h..Q.....GG.P.L...H.V.I..AAKA.V.A.ht..E.A.h..pP-F.K.pYt.ppVlcNA+shA.....c................s......h......h......p......c.......G...h.cl......V....S...G....G....T......DN.H.Lh..Ll...D...L.p.......s......p.......s............l...o.G+..p.A-.th...Ls.ps..sI...TsNK..N.slPh..D...sc.s...P....h..l..T.S.........GlRlGTPAlToRGFtE..t-hcplAphI........................................................................................................................................................... 0 595 1137 1501 +4417 PF04917 Shufflon_N Bacterial shufflon protein, N-terminal constant region Mifsud W anon Pfam-B_6667 (release 7.6) Family This family represents the high-similarity N-terminal 'constant region' shared by shufflon proteins. 27.10 27.10 27.50 27.10 26.10 27.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.35 0.70 -5.36 3 374 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 230 0 47 352 5 249.60 32 68.40 CHANGED MKKaD+GauuLEVGAuLLIVlllIuhuAchhpDYLpo+cWQssAcpsNsaToAVRSYVGKNYoTLLASSTTTTPAVITTsMLKNTGaLsSGFSETNS.GQpYQAhIVRNuQNsELLQAMVVSsGGpuhPhsALpQlAKDITsGLGGYIpDGKTAlGAhRSWSlsLSNYGssoGsGHIAVhLSTDDLSGAtEDoDRLYRFQVNGRPDLNKMHTAIDMGSNNLNNVGTVNAssushSGNVuGpNGTFSGulsGNou.....lTAGGDIRSNNGWLVTRNSKGWMNETHGGGaYMSDuSWLRSVNNKGIYTGGQVKGGTVRADGRLYTGEYLQLEKTAVAGASCSPNGLVGRDoTGAILSCQSG ......................................................................................................................................................................................................ttGh....h.p...h.h.....slhl..hhh.hh..hh..h.t...hts...ptht.p...A...pphs...pAsppYlt...cph.........s.sl......st.s.........................Ph.......h.......h......Th....t...Lhp..p..s.hL.suhptp..N...uQp..h.hh....l...h+..s....s...t.....s....st.....h...puhhh.o....p....G....Gp...s..h.............t....t.hh.h....ut.....hs..u.........GG....hl.......t........p.........s..........t..............u...........h..........G...shtuW...p........s.............ssa............u............t.s...........ss....G+lA.....hh.h....tt.............t..s-.......h....LYR..tVsu+P-.hN....tMps....slshssp...slpshtsh.s.st............t....h........................................................................................................................................................s...ts...s.....ssp......h.t.......s.....G..........p.l......pu......p......sph.stthl....pht.t..s..st.Cs..............sGhhu..hs..st.G...L................................................................................ 0 12 27 33 +4418 PF02973 Sialidase sialidase_N; Sialidase, N-terminal domain Griffiths-Jones SR anon Structural domain Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.21 0.71 -5.33 6 557 2012-10-02 19:29:29 2003-04-07 12:59:11 11 25 252 12 9 354 128 186.90 45 24.36 CHANGED l.psh.hE..ssshsIusGpshcLsuEhs...lphL-pGTlllcFKuspps.ulQSLFSlSNupssN..caFplYlsNo.plGhELRsscuhhNYshups..sslhshhtsssshNTlAhKA-.psKpY+LalNGclltshspssspF..IssIsGlsslpLGuTsRtG.sptYsFsGsIsplplYNcsLoD-tLppcTGtTs ..............................................................................................l.pshphp..shphsh.Ssstt.scLSu..Elp....pphp..pu..TV..hMcFKsDsp.s......uh......s...LFulSsup..ttp.....pYFohhl..h..ss..phslEhRsusupt..Yh..ps....hpl..h..st.phspssh...ss.....t.su-....ctphpLYVNG..........lp.....h...s......S.p.s...s....sF..Ipc..hs...sls.+..s.plG.......A.........s...c.R....s....s...p....s.....ha..s..u....shpIcplolaN+ALos-EVpphot................... 0 5 5 6 +4419 PF03482 SIC sic; sic protein Bateman A, Howe K anon Pfam-B_5 (Release 7.0) Family Serotype M1 group A Streptococcus strains cause epidemic waves of human infections. This family includes the sic protein an extracellular protein (streptococcal inhibitor of complement) that inhibits human complement [1]. 19.90 19.90 20.60 20.20 19.60 19.00 hmmbuild --amino -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.13 0.70 -4.78 8 450 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 7 0 4 450 0 193.50 81 91.33 CHANGED ETYTSRNFDWSGD....DDWPEDDWSGDGLSKYDRSGVGLSQYGWSKYGWSSDKEEWPED.WPEDDWS..........SDKKDETEDKTRPPYGEALGTGYEKRDDWGGPGTVATDPYTPPYGGALGTGYEKRDDWGGPGTVATDPYTPPYG.......................................................................................GALGTGYEKRD.............................DWRGPGHIPKPENEQSPNPSHIPEPPQIEWPQWN...GFDGLSSGPSDWGQSEDTPRFPSEPRVTEKPQHTPQKNPQESDFDRGFSAGLKAKNSGRGIDFEGFQYGGWSDEYKKGYMQAFGTPYTPSAT ................................................ETYTSRNFDWSG...............DDWP.....EDDWSuD........................YuW.....SSD..K........sEDDWS...........SDKKDETEDKTRPPYGEALGTGYEK.RDDWGGPGTV.ATDPYTPPYGGALGTGYEK.RDDWGGPGTVATDPYTPPYGGALGTGYEKRDDWRGPGHIPKPEN.E.QSPNPSHIPEPPQIEWPQWN......GFDGLS.GP.SDWGQSEDT.PRFPSEPRVsEK...P...QHTP...Q.....KNPQES....DFDRGFSAGLKAKNSGRGIDFEGFQYGGWSDEYKKGYMQAFGTPYTPSAT............................. 0 2 4 4 +4420 PF00158 Sigma54_activat sigma54; Sigma-54 interaction domain Sonnhammer ELL anon Prosite Domain \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.98 0.71 -4.78 287 23376 2012-10-05 12:31:08 2003-04-07 12:59:11 21 270 3214 38 6053 30277 7477 165.30 44 32.07 CHANGED llGpSsshpp.lhchlpplA...sos...ssVLlpGEoGTGKEllA+uI.....Hp.......tS.sR...p..st...PFlslNCA..Al..P-s...LlESELFGac+GAFTGAppp+t.GhFEtAsGGTLFLDEIG-hPlshQsKLLRVLQEtplpRlG.u.s.c.s.lplDVRlIAATNcsLpptlp.pGp..FRcDLYYRLNVlslp ......................................................................................lGpo.ss.hpp.lhct.l.p..p....l.A.......t..os.......ss.V.L.I.p.GEoGT.GKE....l....l...A.+.u.l.........Hp..............................tS..s..R.......t.......s....t.......PF.....l...s.....l......N.......C.....A......A..l..........P.c..s..........L........l.....E.........S......E......L.............F..........G..........a......p..........+...........G.........A......F.....T........G......A........p........p.........p.........+.........t.........G...h....F.....E...t.......A......c.......G.....G.....T....L.F.L.DE...IG-h.....P..h.p...........hQ....sK..LLR.V..L....p........-..t...p.....h.......p...R...l.....G...u...s.....p...s.....l...p..l..D.V..R...l.I.u.A......T.s..........c..s...L..p...p.......hl.p..pG.p..FR..cDLaYRLsVhsl.................................................................................... 0 2150 3866 5098 +4421 PF00309 Sigma54_AID sigma54_AID; Sigma-54 factor, Activator interacting domain (AID) Finn RD anon Prosite Family The sigma-54 holoenzyme is an enhancer dependent form of the RNA polymerase. The AID is necessary for activator interaction [1]. In addition, the AID also inhibits transcription initiation in the sigma-54 holoenzyme prior to interaction with the activator [1]. 21.20 21.20 21.80 21.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.36 0.72 -4.49 153 2778 2009-01-15 18:05:59 2003-04-07 12:59:11 15 9 2568 0 623 2038 696 48.40 41 10.45 CHANGED .pLphc.sQpLshTPQLpQuI+LLQhoshELpphlppplpcNPl..LEhpc .............tLpl+.uQpLshTPQLpQuI+LLQLSslELppplpptl-pNPL..LE...................... 0 216 410 521 +4422 PF04963 Sigma54_CBD sigma54_CBD; Sigma-54 factor, core binding domain Finn RD, Wigneshweraraj SR, Buck M anon Prosite Domain This domain makes a direct interaction with the core RNA polymerase, to form an enhancer dependent holoenzyme [1]. The centre of this domain contains a very weak similarity to a helix-turn-helix motif which may represent the other DNA binding domain. 21.10 21.10 21.90 21.50 20.90 20.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.23 0.71 -4.87 22 2858 2012-10-04 14:01:12 2003-04-07 12:59:11 8 8 2612 2 650 2119 1007 194.30 32 42.31 CHANGED --Dt...hpshss.st.oLp-HLhpQlsls.hsss-RhIAhtLIDslD-sGYLp.hsLpElsppLs....sphscVcpVLphlQpF-PsGlhARsLpECLhLQL+phs.....h-.AhthlscpL-LLApRDassLt+hstlcE-DLt-hlp.IppLsP+PGspapssts-hllPDVhVRps.sGtWhVELNs-slPRlhlNppYh .................................................s......htt..h.s...tpoLp-.aLh.Ql.p..l.t....h..o.cp..-.+t..IAphll-slD..-sGYLp.....h..............s........l-.-..l....h..c....phs..................................ls.h..--..l-..t.......l.L.c.hl.Q.p.h.DPsGVuA+sLpECLllQ...Lpphs..........................hhp...A..h.t.l.l...p.c.a...l-.hL....u...p.......+............ca....pp......L.h+.hh........p........l....p.p.c.p.l.....ccsl.s.l.I.p.s.LsP.+.P......G.t..p.h....t....s......s......c........s...p.......Y.......l.......l.PDlhVp..............c......p....s.......s.........c.....a..tVp..L.Nsc.shP+lplsppY............................................................................ 0 226 431 549 +4423 PF04552 Sigma54_DBD sigma54_DBD; Sigma-54, DNA binding domain Finn RD anon Prosite Domain This DNA binding domain is based on peptide fragmentation data. This domain is proximal to DNA in the promoter/holoenzyme complex. Furthermore this region contains a putative helix-turn-helix motif. At the C-terminus, there is a highly conserved region known as the RpoN box and is the signature of the sigma-54 proteins [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.92 0.71 -4.72 29 2839 2012-10-04 14:01:12 2003-04-07 12:59:11 8 11 2603 3 645 2149 926 157.50 46 33.86 CHANGED psalpcplppApWLl+uLcpRtcTlLKVuppIVcpQcsFLpcG.ptL+PLsL+clA-slshHESTVSRlTssKYltTPRGlaELKaFFouul.uossGu-.tSspAl+thI+pLlssEs..pcPLSDspIsphLccpG.lplARRTVAKYREuLsIPuSspR+R ..........................p.pal+pplp-A+WLl+uL.cpRpcTLL+VupsIVcpQps.FF..............pp.G..t.c.t..hKPh...sL+Dl....Aptl........sh........H.E..S........TISRsos...pKY.lpTP.+G.lF.ELKaFF....o..sp..l..........so...........p......s............G........u...-.......s.............S........osu.....I+s.hl+cLIss...E.s......p..K.P.LSDs.+lspl.Lp.-.pG..I.lARRTVAKYRE.pLsIPsSspRKp............................... 0 223 427 544 +4424 PF04546 Sigma70_ner sigma70_ner; Sigma-70, non-essential region Finn RD anon manual Domain The domain is found in the primary vegetative sigma factor. The function of this domain is unclear and can be removed without loss of function. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.22 0.70 -4.71 124 3805 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 2580 2 391 2852 1705 206.60 36 49.01 CHANGED aP.....sslshlLppa-+lpstEh..RLsDllsGalDP.......s..spss..t.hssc.spsptsc.................p-------p-.........................-s-sG.DPE.Atp+Fstlccpapphpp.......sl..pcpG+s..spp...stcthptluclFtth+LsPKpaDtLlpplRshh-pVRtpERtIhclsVcpu+MPRcsFl+tFs.GNETshsWlcphls..sp..csauptLpchps-IpcsQpKLhplE ................................................................................................................aPtslshlLppY-+.hp..s-ph..RLoDll...o......GalDPs.................................tttshAss.s..st..l.ss.chsc.s-hs-.....................................-------Dpss............................sps-s-su.DPclAtp+Fuplp....sQachs+c................ul....pc.p....GRs.......ccp........stttht.......pLu-lFpph+LsPKQFDtLVsphRshh-RlRsQERhIM+LCV-pu+MP+.c.sF.lptFs..u..........N........E..........sst.sWh-thht..ts...+sautt...ltchpp-lhcs.p+LttlE............................................. 0 87 203 293 +4425 PF03979 Sigma70_r1_1 sigma70_r1_1; Sigma-70 factor, region 1.1 Finn RD anon Prosite Family Region 1.1 modulates DNA binding by region 2 and 4 when sigma is unbound by the core RNA polymerase [1,2]. Region 1.1 is also involved in promoter binding [1] 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.71 0.72 -4.05 31 3466 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 3283 1 714 2077 1357 81.80 31 15.80 CHANGED ..chspspl.Kp.LIppGKccGa.lTYcElN-tLP.t-h..lss.EQl--lhphl.s-hGIpVl-.....-s-.....p.......tEp.t...sp-csst-Esp ..................h..t..pppl..+p.Llpp.G..K.c.p.G.a..LTYsEl....N-.pLs....sph.......lDu..-.........Q.lE-llphl..sDhG..IpVh-.......ps..s.-s.-.............sh........hhs-p..............tsDpss.....s........................................... 0 240 448 579 +4426 PF00140 Sigma70_r1_2 sigma70_r1_2; Sigma-70 factor, region 1.2 Sonnhammer ELL, Finn RD anon Finn RD Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.97 0.72 -7.50 0.72 -4.27 183 8438 2009-01-15 18:05:59 2003-04-07 12:59:11 15 36 4678 31 1886 5884 3958 35.90 42 8.55 CHANGED sDslchYL+EIGch.sLLos-EElcLA+clcpGtptt.c .........tDsl+hYL+ElGpl.sL..LT.sE.-ElclA+RlcpG....................... 0 601 1232 1599 +4427 PF04539 Sigma70_r3 sigma70_r3; Sigma-70 region 3 Finn RD anon manual Family Region 3 forms a discrete compact three helical domain within the sigma-factor. Region is not normally involved in the recognition of promoter DNA, but as some specific bacterial promoters containing an extended -10 promoter element, residues within region 3 play an important role. Region 3 primarily is involved in binding the core RNA polymerase in the holoenzyme [1]. 25.30 25.30 25.30 25.30 25.20 25.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.21 0.72 -4.09 82 10957 2012-10-04 14:01:12 2003-04-07 12:59:11 11 41 4723 32 2549 7548 3445 76.80 28 20.25 CHANGED Epls+lp+sp+pLtQchGRpPoscElAcpLshs.ccVcchhphuppslSLchslupctDsphs-hlpDs.s.ssp-ts ......................................plN+lt+s.p...+p...L.h.......Q.........c...l........G..R..-...Po....s...-E...lA.....cch......s.....h.....s.-cV.....p..ch...L...c...h..u..p..c..s..h...S.l...-..s.....P.....l.......u......p...-....p...D...u.....p......l...s.......D......h...ltDpp...P.p..s.......................................... 0 876 1740 2204 +4428 PF03084 Sigma_1_2 Reoviral Sigma1/Sigma2 family Mifsud W anon Pfam-B_1759 (release 6.4) Family Reoviruses are double-stranded RNA viruses. They lack a membrane envelope and their capsid is organised in two concentric icosahedral layers: an inner core and an outer capsid layer. The sigma1 protein is found in the outer capsid, and the sigma2 protein is found in the core. There are four other kinds of protein (besides sigma2) in the core, termed lambda 1-3, mu2. Interactions between sigma2 and lambda 1 and lambda 3 are thought to initiate core formation, followed by mu2 and lambda2 [5]. Sigma1 is a trimeric protein, and is positioned at the 12 vertices of the icosahedral outer capsid layer. Its N-terminal fibrous tail, arranged as a triple coiled coil, anchors it in the virion, and a C-terminal globular head interacts with the cellular receptor [2]. These two parts form by separate trimerisation events. The N-terminal fibrous tail forms on the polysome, without the involvement of ATP or chaperones. The post- translational assembly of the C-terminal globular head involves the chaperone activity of Hsp90, which is associated with phosphorylation of Hsp90 during the process [2]. Sigma1 protein acts as a cell attachment protein, and determines viral virulence, pathways of spread, and tropism. Junctional adhesion molecule has been identified as a receptor for sigma1 [1]. In type 3 reoviruses, a small region, predicted to form a beta sheet, in the N-terminal tail was found to bind target cell surface sialic acid (i.e. sialic acid acts as a co-receptor) and promote apoptosis [4]. The sigma1 protein also binds to the lambda2 core protein [3]. 25.00 25.00 26.30 64.10 16.90 16.50 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.37 0.70 -5.84 4 120 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 42 18 0 98 0 322.90 50 99.77 CHANGED MARAhasFhTshFGs.pslPhscpQlopLLpSSNSPWQct..shshslspuhloTsphPhsGShhYQcShLaSuhlPhlLhspDAW+-hpahchsWTsssLsGLVtAssP..AsP.YpPtuupaaDlppYPpWApchR.LpphYP.Lht.TLLNhhphGPlsYV-T.ssMlSGplsshhMohaG+sFtEIAhpLsQosuNhPhtsDusYDpthRhllSLahLSYlGVlpQssTIpGFaFpoKpRGsutEuWhL.Ys.TpupRlplspR+auahssRSPDWNhDhSalhuusLoAhlhSsRQ.PLluNpuVsNpupNhPGaoussGs.V+tlplhshAsEhIcphhhsGllossctpplptpusshpphhpscLssltspDDtL.ptpPphAR.RlKPFssssWssGpottulAuLAshh .....................................................s.F.Gs.psl.shNcp..SpLLpuuNSPWQhh....h..hu.Glso.os.sPhsGSphYQ.ShLhSuTl.hshtspctWtshphhtLsWos.sLsGLVsA.s.s.AsP.....hQstusph.Dh.sYPpaApc.RthpphY..LhtsTLLshhthGPlhYVcs.ssMhSGtlsphhMs.hGpsFh-hshpLhQus.NhPhp.DusYsp.hphlhulahhShhGhlppptThtsFaFt.tp.u.t.-.hhL.hs.ststth..sttp......ahhstSPcWphs.shl.uu.Lot.lhus.p.t..P.h.hsptsl.s.upsh.sh.o...s.t..s...l..th..p.h.hstthh.phh.sGlhstupttthpt.hstht.hhpttltthhhtss.h.....t..th..hpPa.stpas..G.oh.s.............................................................. 0 0 0 0 +4429 PF02454 Sigma_1s Sigma 1s protein Mian N, Bateman A anon Pfam-B_2133 (release 5.4) Family The reoviral gene S1 encodes for haemagglutinin (sigma 1 protein), an outer capsid protein and a major factor in determining virus-host cell interactions. Sigma 1s is one of two translation products of the S1 gene. 20.10 20.10 21.00 29.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.45 0.71 -4.32 5 20 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 8 0 0 20 1 114.30 61 84.05 CHANGED CQ+uLNpGSRRSR+RsKYTLIhSoGSl+DSMpQpNESSLLSKVGpsWLHQhVppNLQSPDWKALSEPSKQLSMDLIRVLPsWVsEW-sLRQDLQsYAhTpsISL...REWlLRN....sTLDH ...CQKGLNQGSRRSR+RLKYTLIlSSGSsRDSMMQTNESSLLSKVGhsWLHQSVMhNLQSPDWKALSEPSKQLSMDLIRVLPSWVlEWDNLRQDLQSYALoTsISL...REWILpN....VTLDH...................... 1 0 0 0 +4430 PF03842 Silic_transp Silicon transporter TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 25.00 25.00 49.40 33.40 23.50 23.00 hmmbuild -o /dev/null HMM SEED 513 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.80 0.70 -6.42 4 167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 79 0 7 165 0 287.70 62 98.66 CHANGED hhoshssl+hhYShuLlIFSlIIVsALMFspsTKlApDspPhsALllMhhuIlWhSMlEGtQsShVGLPP.VD+sLYKESHPlTa+.suLuaKGDNLDRYLMGRQFMVlhlsFsINLCGuPL-su...-VLGLPpllppIFLsoGIAMILhsshIGQLTsQVNASHCMlDaINsaFhhFTLYssLlIEhoGVMHuSYLIQshhhhluGKPVpTNE.PRouhQshFFWGRVLhSLulLsFuLAVTlpALFsGpTTM...WphIPssVAllLFFlLMSlVGhLEGMQIAFFAVAKl.+pERGsp.Fu+KTCELLF+GpGcNLPGFMlGRQhTVshCFFllARVTTLDIEVGss-NIFGVSDGhQtFhNhGFhGAlITTILASIsWQLsASAFPlAFLsNPhsYIlLhluLhLEATGlCuGAWhLuhIpK+VstFphDEVYVGTPEE.RhutD+sDtphcts...tthhhGssh........................coasspppDhLc.........t.sup.cEttt..tu.s.cTcAL.chscpQ..-Ah-hhsups ......................................................................................................................................tst....-VLGhPshlp.lFLsh.GLuMIlFTChlGQLsoQVNAoHCMlDaINNYFALFTLYsAMslEFSGlMHuuYLIQ.lhutlSGKPI.SNE.P+sGhphhFFWuRVLMSlAILuFshAVslsALFsGpThh...asulsssluVhlFFhhMulVGhLEGMQIAFFAVAKLPtpERGouaFG+KTC-LLFcGNGpNLPGFMIGRQLTVVhSFFlVuulTuLsIpPGpGs.NIFGlSDGAQsFLNaGFpGAVITTILASIoWQLAASAFPlAFLNNPlTYlLLhlALhLEhTGlCuGAWV...................................................................................................................................................................................................... 0 6 6 6 +4431 PF04801 Sin_N Sin-like protein conserved region Waterfield DI, Finn RD anon Pfam-B_6302 (release 7.5) Family Family of higher eukaryotic proteins. SIN was identified as a protein that interacts specifically with SXL (sex lethal) in a yeast two-hybrid assay.\ The interaction is mediated by one of the SXL RNA binding domains [1]. 20.00 20.00 20.60 20.60 19.90 18.60 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.46 0.70 -5.62 15 414 2009-09-10 16:59:54 2003-04-07 12:59:11 8 6 306 0 263 406 1 288.50 21 70.12 CHANGED EED....DPVVpEIsVaLupoLs-..pLYlhQYPl+stttsaDssp....................h.ss+lKPpspclEh-hulDTpSpsYDt.KuE...h.hsG........psupcpsshpsthh-tpsFhSs+shsssscYAVGlhpsGElHLTPLpuIlQhRPShpahD.K..p-pcpKs....cpsup--s-..scp............p..c.hptloV+FuR...tsp+t+ptR.pohphhpph.u-EsWlchpaashp.sspsphc+ptLhupsssss........sshshSsp-YlshLhssstccphs........ssspcsLspppL+shPLh-.Ql+sLhpcu+..VhpFuplhpLl.................spss.....s-plLcsLppsAhLVpGsWVsKS-llasc...u.......tshh.A-shppARDalLapFops.ct.lpRppls...ssspLssc-s.....+-lLsphAp..sp....ss+sWclhhssDc.-F.........p..cas-lVp+..QchhWpupppcLcchh ......................................................................................................................................--....Dslltphslals.............s.tt........plalh..Q...YP..+.st..............s....h..pt......................................................ps+hKsppt.h..l..E.l-hsl.sp....s...t....as...hs..................................................................t.tpt..t.h.ppt.....hh............t.tts......s...s.tYh...h..uh.h......p......p....t...........p....l..HLsPl.p.ulhQhRPphpalD..t........ptp..tpt............ptt.tptttp..tpp.............................................t.hp.lp....h.p..ht.......p.......stpt.....t......t..t.t.....s...h..p..h.tp..E.Whphpaht.p..s...s......................hh............................................................h.......................................................................................................................................................................................................................................................................................................................................................................................................................................h.......................................................................................................... 0 94 148 217 +4432 PF04954 SIP Siderophore-interacting protein Bateman A anon COG2375 Family \N 26.30 26.30 26.60 27.10 24.30 25.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.58 0.71 -3.73 169 1985 2009-01-15 18:05:59 2003-04-07 12:59:11 8 15 1328 1 509 1471 82 120.80 30 41.36 CHANGED schhLLsGD-TALPAlushLEpL...P.sssputshlEVssss-.p.slss........ssslplpWlhRsss............tsshLhp.slpshsh....................................................................ss......ssasW.lAuEusss.+slRcaLhp-tGls+p.plthsuYW+pG ..............................................................................................................................h...hLhluD-oulPAlsphL...EsL..............P.ss.spsp.sl.l.c.V.ss.ssc....p.....Lsp...................hssh.plpWlh+ssp.............tslss.slpp.hph...............................................................................s......................ss........shasW.lsGEupsl.+slR+hlptEhulspp.plpssuYW+t................................................. 0 125 323 436 +4433 PF02146 SIR2 Sir2 family Mian N, Bateman A anon IPR003000 Family This region is characteristic of Silent information regulator 2 (Sir2) proteins, or sirtuins. These are protein deacetylases that depend on nicotine adenine dinucleotide (NAD). They are found in many subcellular locations, including the nucleus, cytoplasm and mitochondria. Eukaryotic forms play in important role in the regulation of transcriptional repression. Moreover, they are involved in microtubule organisation and DNA damage repair processes [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.21 0.71 -4.54 20 5869 2012-10-03 09:55:27 2003-04-07 12:59:11 12 37 3562 114 2282 5061 910 172.30 31 58.89 CHANGED GAGISspuGIPDFRS.csGLas+hspcs..LssPpshhchsphhps...FYsht+chh....pspPsssHphlstLEcps.plhplhTQNIDsLcccAGsp..pllEhHGShspspCssCch.hstpplhpphchtp.s..............................pCspCGu...................................llKPDlVaFGEsLP.cphppshc.clpcsDlllVlGTSLpVhP .......................................................................................................GAGlSs.p.....S..G..I.............P....sF.R......u...........t...s..G.......l.a........p........p........h........p...........t.................l.............s..s.c.......t.....h...h........p.......p...s...h...h.htp....................F..a.p.h.h.c........phh...............pspP......N.h..u...H.hhl..............A....p....L.............p.........c.........t.........t..............h........h.t....l..........lT.Q...........NlD......s.............L........H.......p.............c........A............G..sp.....................pll...ch.HGslhp..s.p....C.....h.....p....C.t......t...........h........s....h.....p.........h..........t...p....h....t...t..t....t...s......................................................................................pC...s...p...C..s.u.....................................................................................................h..l+P.c.l..V........h..F......G.....E...........ls........t....h...p...p......s......h.............p...t............h.........p..........p...u.............D.........lh.lllG.TShtVhP............................................................................................................. 0 811 1362 1902 +4434 PF04247 SirB Invasion gene expression up-regulator, SirB Mifsud W anon COG3094 Family SirB up-regulates Salmonella typhimurium invasion gene transcription. It is, however, not essential for the expression of these genes. Its function is unknown [1]. 29.70 29.70 29.70 29.90 26.90 29.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.48 0.71 -4.39 63 976 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 953 0 140 446 33 123.40 43 97.35 CHANGED h.YhslK+lHlshlslSlsLFllRhhhhhpsus.hhpp+h.....l+IsPHllDTlLLlSGlsLhhlh...phhPFss..sWLotKlhullsYIsLGhhAL..+ps+.....spth+hhA.FlsAlsshhhlstlAhoKt..sh..l ......a.hLhplHLlsluLSlsLhslRaahphppps.....ht.h.....t+a.....s+IlP......llDTlLLLSGIuLhhhs...phhPFostutWLTEKLhuVllYIlLGalAL..+pt+.....o...p..p..sRhhA.F.LA.LlsLhhIlKLAsTKhPlL..................... 0 28 71 108 +4435 PF01380 SIS SIS domain Bateman A anon Bateman A Family SIS (Sugar ISomerase) domains are found in many phosphosugar isomerases and phosphosugar binding proteins. SIS domains are also found in proteins that regulate the expression of genes involved in synthesis of phosphosugars. Presumably the SIS domains bind to the end-product of the pathway. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.28 0.71 -4.54 45 30074 2012-10-02 15:05:26 2003-04-07 12:59:11 17 45 4875 210 6102 21679 8959 126.50 18 45.69 CHANGED hltpsc.plhlhGtGsuht.suhththph.pplshhsshst.usp.....httt.hshlspsclllhlo..hsspstchhpsst.hsppps.sph..lsITspssusluppu...chhlhh.ss.ph.......hpshssphsshtslhht ..........................................................................h...ptp.plhhh.G.tG.s....o....hh...s....u.h....p....h....t..h....pl...p.......p...h.......u...h...h..s...s.....t....s..h...s..u....st....................h.tts......h....s.....h.....l...s......p....s.......s....l....l....l.s.lS.......tSG........p.....o........t........-........h.....lt.....s.....h.......p...h....s..+....p.......p........G....spl............lsl....o...s.......t...s...s..o....s.....l..s.....p.....u.........s..h...s.l...h..h....s.s..ch...........h.s.h..t..s..h.ss.phhhh.hh...h................................................................. 1 1857 3650 4961 +4436 PF05185 PRMT5 Skb1; PRMT5 arginine-N-methyltransferase Wood V, Mistry J anon Pfam-B_4050 (release 7.7) Family The human homologue of yeast Skb1 (Shk1 kinase-binding protein 1) is PRMT5, an arginine-N-methyltransferase [3][4]. These proteins appear to be key mitotic regulators. They play a role in Jak signalling in higher eukaryotes. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.61 0.70 -5.57 35 750 2012-10-10 17:06:42 2003-04-07 12:59:11 11 21 319 29 484 1612 79 345.80 25 63.15 CHANGED -.hssWphWsslRphC.sap.spLpVuLcls..psl.Pst....p.lpRWhuEPlcslllssshFlsN..pp.GaPsLsKtpQplltpahphpssthlls........................................................t.sscpshss...Ylp....Yl+aLhpp........s......shsttcpht..sYpDhLQsPLQPLpDNL-StTYElFE+DslKYp.Y-cAIppALhDhssppctss.........................hllhVlGAGRGPLVcpsLpAuppss..s........plclaAlEKNPsAhlhLp.p+.phcp...W...ts+.VpllpsDMRpapsst.......................KsDllVSELLGSFGDNELSPECLDGhQ+.....aLpps.GISIPpuYoSY...................lsPIsuspLasc...........lpthpp................................ssshEpPYVVphpshthlusp.............................ppsapFpHPstp...............................p..tcNsRapslpFps.pp...cuhlH..........................GFuGYF-usLYpD........lpLSIpPs..........................................s...+oss.MhSWFPlaFPLcpPlhlpcss.........plplphWRpss..spKVWYEWslss ......................................................................................................................................................................................t............................................................................................................................................................................................................................................................................................................................................................................................................................................hllhslGuGpG.Ls.h.uhpAu...........................thcl.Y.AV.E.t...s...s.....A...h.t.h.p.th...p...t.p.t.....h.............ssp...l.pl.....l...p...u...ch.cch....p...h.P.-...................................................................psDlllSEh.h..G..s..h.h....N..E..h...s.E.....s....l.....ut.....p.+...........aL.+ss...G....l....hPsp..hsha.....................luP.h.s.s..t..p....l..a.p.c....................hh.s.h.tt...................................................t.hcps.h...l.....s....p....hp.sh....h.....l.uts..................................................p..s.h.s.F.p..cspts....................................................................tp..p.p.h..pt....t..h.p.F..t....h...pp.........su.hlH................................GhusaF-...s....h...h...h.t.s............................lh..L.S.h..t.Pp.................................................................p........shhpWh...hhh.l....p..pP.l...lptsp............pl..p..h.....h................................s............................................................................................................................................ 0 182 271 391 +4437 PF01202 SKI Shikimate kinase Finn RD, Bateman A anon Prosite Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.08 0.71 -4.36 98 7157 2012-10-05 12:31:08 2003-04-07 12:59:11 17 51 4272 70 1785 5455 2766 155.30 27 64.96 CHANGED hGuGKoTlG+hLActLshpFlDs.Dp.h..I....EppsG....hslspIFp.pc.GEssFRchEpcs...l..pclhppp.....shVluTGGG..slhppc...spphLp.............ppGh.llaLcssh-tlhpRl..p.pcpp....R...PlL.psps.........p.h....h..chlh.pRpslYpc..h.....u......s.h..h.hss.spps..............pplsppllptlp ...............................GuGKoTlGp.t.L.....Ap..p.L........s.....h.p.........Fl....D..........s...D.p..h.........l.............p.pps.s.......tsl...u..p..l..h.s...h.p....G...E...s...s....F......R......p...h.E..p.....p....s........l........p.c....l....t..pp....................ssVl..u....s.......G..G...G......s..........s...hp.cp..........sR.s..hLc.......................................tp..sh....s..l.a...L..c..s.s..h...-..h..lh.pRl....p..tcps............R......Phh...psps...........................ph......h...chlt.....p.Rp..s.h.Ycc....h.....s..........t.h..h..l.ss...spts.............ptlspplht...t................................................................................................................................................. 1 506 1065 1476 +4438 PF02731 SKIP_SNW SKIP/SNW domain LOAD anon LOAD Family This domain is found in chromatin proteins. 25.00 25.00 30.50 30.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.14 0.71 -4.77 28 359 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 296 0 242 348 3 154.70 53 31.04 CHANGED spaI+YTsssp.ss.t........pp......RII+hsppp.....pDPL-P..P+F.K.+K+lspsssSPPsPlL+......SP......sRKLTtEDppcWcIPPslSNWKNsKGYTIsLDKRluADGRu..Lpcsp.....lN.-+FApLo-AL.hA-+pAREElctRschp+phs.+EppcKE-+LRpLAp+ARp-R .......................................s.palR.YTPupQs.sshsp.........tpp........RlI+hVEhQ.....pDPhEP..P+F.K...pKKlPR.G..PPS......P..Ps....PVhH..........SP..sRKl.Tsc-Qpc....WKIPPsISNWKNsKGYTIPLDKRLAADGRG..LQsVp................IN.-pFAcLuEALahADR+AREtVchRAphp++hApKEKppKE-cLRplAQ+ARpcR................................... 0 80 132 199 +4439 PF03217 SLAP Bacterial surface layer protein Mifsud W anon Pfam-B_2530 (release 6.5) Family \N 27.00 15.20 27.10 15.20 26.80 15.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -11.08 0.71 -3.93 27 528 2009-09-14 12:59:11 2003-04-07 12:59:11 9 28 47 0 108 469 0 105.10 28 27.52 CHANGED oK......slMHsAYsYs.psG.c...+l...ss...h.hpshs....sls...h.hs........s..sts..I.sGKpYY......+Vup.s.....cYItAuNlDGTpRh...LK+NAYVYss.supR....ssph.hh+KGoplsTYGushplhNGK.pYY+IGt..s..tpYVKsuNF ......................................................................lh+suhhYs.psG.p...ph......st.........h...hhthp....plp....h..h................s..hhp.....I...sup...pa..Y......plsps......pYl.....pus.N..l...s...u...s.p+p...................lp+.NAYlYppss.p+...............ttp....h....hlpK..Gpplp..s..a..Gs..p.h...h...t..Gc.pY.Yplst....s.....pal+s.s.............................................. 0 10 10 24 +4440 PF03843 Slp Outer membrane lipoprotein Slp family TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.60 20.60 21.00 34.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.70 0.71 -5.24 38 1178 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 813 0 129 486 27 159.70 46 83.93 CHANGED LuGCso.lPptl..tspsssh.shsplpssssshhGppVRaGGhIlslcN.ppspTplEllshPLss.su+P.phs..ppopGRFlAphsGFLDPssatpGRhlTllGslpGhcpsplGchsYpaPVlpspsh+lWp....htpph.hss.......a...s.ah..ashh.....hWt ..............................LuuCso..lP..psI.t.s.ssP.ss.QpshVtVhstPtLYl..........GQpARFGGKV..lsVpN.tpscTcLEIAslPLDS..uA+P.sls..ps..spGRlh.AchsGF.LDPVsa.RGphVTVlGsIsGs...s.GK.IGpsPY.pFhlhps...pGa+hWH......Lppp.V.hssp..........sa..h.Y...G..u.....h.........GaG....u....Y................................ 0 32 65 98 +4441 PF01464 SLT Transglycosylase SLT domain Bateman A anon Prodom_3175 (release 99.1) Family This family is distantly related to Pfam:PF00062. Members are found in phages, type II, type III and type IV secretion systems (reviewed in [4]). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.54 0.71 -4.61 39 14235 2012-10-03 00:09:25 2003-04-07 12:59:11 15 167 3793 20 3295 11652 3304 115.80 22 28.70 CHANGED shhttstpphslpstllhAlstpESsasPpAhS......tss.......................uhGLMQlhssTspthth...........shsphhcPppslpsGspaLp....phhpphst.......shhhuluAYNuGhuphtchhptttptstphhh .............................................................h......ttthtl.s...t.h...lh.ul.h.htE...S....s......a......s.........s...p...A....h......S................sss........................................................................................AhG..LMQ.....l..h...s...s.T..u..p.p..hs.......................................sh.s...ph.h.-P.t...p....s..l.ps....G.s...p.....Y...Lp..........th..h..p..p..hst.......................shhhs.luA.Y.N..u.G....u...p..h.phht...........h.................................................................... 1 900 1993 2657 +4442 PF02258 SLT_beta Shiga-like toxin beta subunit Bateman A, Mian N anon Pfam-B_3684 (release 5.2) Domain This family represents the B subunit of shiga-like toxin (SLT or verotoxin) produced by some strains of E.coli associated with hemorrhagic colitis and hemolytic uremic syndrome. SLT's are composed of one enzymatic A subunit and five cell binding B subunits. 21.10 21.10 21.20 21.50 21.00 20.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.44 0.72 -4.14 2 261 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 133 146 1 142 0 67.60 81 78.02 CHANGED As.DCspGKlEaoKYN-DDTFTVKVssKEhaTsRWNLQsLL.SAQlTGMTVTIKossCcsGuGFuEV.Fp ...................M.AADCAKGKIEFSKYNEDDTFT...VKVs..GKEY.WTSRWNLQPLLQSAQLTGMTVTIKSSTC-SGS.GFAEVQFN.... 0 0 0 0 +4443 PF04102 SlyX SlyX Bateman A anon COG2900 Family The SlyX protein has no known function. It is short less than 80 amino acids and is found close to the slyD gene. The SlyX protein has a conserved PPH(Y/W) motif at its C-terminus. The protein may be a coiled-coil structure. 30.00 30.00 30.00 30.20 29.60 28.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.43 0.72 -3.60 114 1397 2009-09-11 12:49:59 2003-04-07 12:59:11 7 3 1387 1 285 730 213 67.60 42 92.66 CHANGED plppRls-LEh+luFQEcsl-pLNpslscQppplccLpcplphLtp+lcshp.sst...ustsc-...sPPPHY ............hEtRlsELEs+lAFQE.TIEELNtslstpphphs+LpcpL+hLscKlcs...t....p...sSsh......As.pu-E..........TPPPHY.............. 0 67 152 221 +4445 PF02481 DNA_processg_A SMF; DNA recombination-mediator protein A Mian N, Bateman A anon Pfam-B_2252 (release 5.4) Family The SMF family, of DNA processing chain A, dprA, are a group of bacterial proteins. In H. pylori, dprA is required for natural chromosomal and plasmid transformation [1]. It has now been shown that DprA is found to bind cooperatively to single-stranded DNA (ssDNA) and to interact with RecA. In the process, DprA-RecA-ssDNA filaments are produced and these filaments catalyse the homology-dependent formation of joint molecules. While the E.coli SSB protein limits access of RecA to ssDNA, DprA alleviates this barrier. It is proposed that DprA is a new member of the recombination-mediator protein family, dedicated to natural bacterial transformation [2]. 29.60 29.60 29.70 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.27 0.70 -5.44 97 4687 2012-10-01 21:16:48 2003-04-07 12:59:11 10 17 4194 1 1010 3571 633 204.40 38 60.24 CHANGED t.hppts...h.phls.ht-spYPptLcp.l................c...sP.hlLah+G..........shshL....t...slAlVGoRpsoshGtphspplsppLup.t.....uh..slVSGlAhGIDstAHpuAL.....ptt........GtTlAVLusGl..-hh....YPppNppLhpcIhp..p.GhllSE.....assssp.PpptpFspRNRIIuGLopullVlEAsh+SGoLlTAchAhc.u....RcVaA.l....PGsltsspupGsppL....IppG.Atllps ......................................................................................h...t.th.phls.ht-ttYP..h.Lpp...l...............................h...........s.sP.hlLahcG.........................shphLp.............p.plAlVGoRp..s...........ot....hG.p..........phsp.....pls.pp...Lup..p.......uh..s.llS.GLAhG..IDssAHpuAL......p.s.t..................GtTl.A.V.l.usGl..-..t.h....YP..pp.pppLtppIh.....ptGh..llS....E........a..s..s.....s.s..........Ph...t...hp..FPpRN....RIIuGLS.p.u.ll...Vl....EA...........s.......h....+.S...G.SL..ITA.chAh.EpG.......R.-VaA.l......PGsl.ts.s...hSpGsppLIcpG..Apllp.s................................................................ 0 340 676 868 +4446 PF03467 Smg4_UPF3 Smg-4/UPF3 family Bateman A anon Bateman A Family This family contains proteins that are involved in nonsense mediated mRNA decay. A process that is triggered by premature stop codons in mRNA. The family includes Smg-4 [1] and UPF3. 21.30 21.30 22.10 21.60 19.50 20.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.37 0.71 -4.27 6 408 2012-10-02 20:46:34 2003-04-07 12:59:11 10 11 263 3 260 394 1 159.80 33 36.82 CHANGED ppsppctp.KVVlR+LPPsLTcc-hhpplps.Ls-c..................WshFchhsushSacsptYSpshlpFps.sD.lhEFpshFsGal....FlDsK......ssphhAlVEhAPa.QKlspssK..hKcDs+pGoI-pD...PcahcFhcplt..ctpsspsh..s-p.lc+.........tcstppht+h.sTPLl...-Yl+pK+sp+ ......................................p......h...KlVlRRLPPsLTcpph.p.tl..t...s.......ls.tc......................................................p.ha...p.h.....hs.......s.ph...sh.h..........s.p..h.a..........S..............R...AYl.s..Fp..s...-.D...lh.Fpc..pacGal...................F.lDs+...............................G..p..h....sh......VEaAPa..Q+...h....sp...pp.............p+cD..s+.....tGTI...-pD...s-YhpFLEs..h....t.........p...p.p.....hts.......ct.lpc..........................pt.....ps.t.....p..h.....thh..sTPLl......pal+.p+ph........................................................................................ 0 76 129 202 +4447 PF04927 SMP Seed maturation protein Kerrison ND anon Pfam-B_6221 (release 7.6) Family Plant seed maturation protein. 21.00 21.00 21.00 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.04 0.72 -4.01 37 316 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 61 0 192 289 0 59.60 34 56.79 CHANGED tlplG-sL..........pAsuttsucKP...VstpDAAulpuAEs+ssGt...............tshPGGlAA..shpuAAshNtpsst .................lphu-sh............p.sut....suc+P............lstpDAutlp.uAEs+spGt.................psts..GGl..AA..shpuAAshNtph..t........... 0 25 103 158 +4449 PF04355 SmpA_OmlA SmpA / OmlA family Kerrison ND anon COG2913 Family Lipoprotein Bacterial outer membrane lipoprotein, possibly involved in in maintaining the structural integrity of the cell envelope [1]. Lipid attachment site is a conserved N terminal cysteine residue. Sometimes found adjacent to the OmpA domain (Pfam:PF00691). 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.36 0.72 -4.36 146 2731 2012-10-01 23:09:26 2003-04-07 12:59:11 8 6 1710 10 507 1486 1060 70.60 29 49.17 CHANGED GshlspptlsplpsG.Mo+pQVphlLGoPhhs.ssFsss.pW..Ylhphpps.....t.tt..........p..ppplslhF-ssshlpths ..........Gphlstsslppl+.......hG.Mo+pQVthlLG.sP...hhp...s......h...p...sp.....sW..Y..lhpppsu.....p.t...........................p...ppsltltFsssGhlps..s........................................... 0 102 248 380 +4450 PF01668 SmpB SmpB protein Bateman A anon Pfam-B_1766 (release 4.1) Family \N 20.20 20.20 23.20 25.90 19.70 18.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.11 0.72 -4.17 166 4544 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 4483 12 970 2590 1894 67.80 47 43.63 CHANGED ph..l..ApN+KA+a-YtI.-paEAGlsLpGoEVKSlRs.G+sslp-uastl.csG....EhaLhssaIssY.ppush.h ............hl.ApNKKARH-YhI...-.paE..AGlsLpGoEVKSlRs...G+ssLpDuaspl..+s.G....Es..aLhssHIsPYppush.h........ 0 339 651 828 +4451 PF01713 Smr Smr domain Bateman A anon [1] Family This family includes the Smr (Small MutS Related) proteins, and the C-terminal region of the MutS2 protein. It has been suggested that this domain interacts with the MutS1 Swiss:P23909 protein in the case of Smr proteins and with the N-terminal MutS related region of MutS2 Swiss:P94545 [1]. This domain exhibits nicking endonuclease activity that might have a role in mismatch repair or genetic recombination. It shows no significant double strand cleavage or exonuclease activity [2]. The full-length Swiss:Q86UW6 also has the polynucleotide kinase activity. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.89 0.72 -3.79 123 5312 2009-11-23 09:46:11 2003-04-07 12:59:11 16 56 3968 8 1427 3901 1760 78.00 31 16.90 CHANGED lDLHGhphc..-Ahptlpphlppshppsh...................pslh.lIpG+G......................sG.h......L+ptltpaL....hphphl.tht.s....t.G.usGshhVhl+ ........................lDL+Gh.ph-..-Ah....p....tLt.p.a.lspuh.t.psh................................................................pplp.llHG+G........................su..h..........L+ptVt.paL.....tppppVhuap.A....ptG.GsGuhlVhl........................... 0 513 907 1211 +4452 PF00835 SNAP-25 SNAP-25 family Bateman A anon Pfam-B_1606 (release 2.1) Family SNAP-25 (synaptosome-associated protein 25 kDa) proteins are components of SNARE complexes. Members of this family contain a cluster of cysteine residues that can be palmitoylated for membrane attachment [2]. 21.20 21.20 21.50 21.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.32 0.72 -3.19 13 273 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 105 20 132 235 0 55.90 41 26.35 CHANGED PhNK....hps.c.ttstppsW+sNsD......GtVlssQP.tRVhD.tpssh.....sspuGYI..pRI.TNDA ..................PCN+....h+s.c...uacpsWusNpD.........GtVVus..Q....P...u.R.Vsc.pppph........usSGGaI..pRl.TNDA.......... 0 27 39 81 +4453 PF00565 SNase Staphylococcal nuclease homologue SMART anon Alignment kindly provided by SMART Domain Present in all three domains of cellular life. Four copies in the transcriptional coactivator p100: these, however, appear to lack the active site residues of Staphylococcal nuclease. Positions 14 (Asp-21), 34 (Arg-35), 39 (Asp-40), 42 (Glu-43) and 110 (Arg-87) [SNase numbering in parentheses] are thought to be involved in substrate-binding and catalysis. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.32 0.72 -3.79 18 4232 2009-01-15 18:05:59 2003-04-07 12:59:11 12 46 2100 210 1727 3872 2427 105.40 24 47.39 CHANGED +lRLsulDsPEosc..t..................psaGpcApcah+chlhtpclhlh.hsp.....D+YGRhLuhVahs.........spslNttLl+pGhAhsht.hYss..p.tppsphhpsEpcA+cc+hGlWup ........................................................................lRLhsl-..s..P...E...ht.............................................................ps.a....G..pc......A..p...p.h..h.c...ph...l..h....s.....c...p..l......p...h.t.h..sp..................D..c......a......G.....R.h...l.....uh.l.a.hs..........................................sps..l..sptl..VcpG..h.......Ahs........hp....h..............h......s..................................t......h......................p..........t.......h..h..pspppA+p.p.+hGlWp.................................................. 0 583 1042 1458 +4454 PF00209 SNF Sodium:neurotransmitter symporter family Finn RD anon Prosite Family \N 19.70 19.70 19.70 19.70 19.40 19.50 hmmbuild -o /dev/null HMM SEED 523 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.89 0.70 -6.03 15 11073 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 2324 84 3409 8360 1447 275.40 18 89.59 CHANGED R-sWssKh-FlLSslGauVGLGNVWRFPYLCYKNGGGAFLIPYhlhLlhsGIPLFFLEluLGQasppGulslWt+ICPlFcGlGYAshlIshalslYYsVIlAWAlaYLasSFTssLPWspC......spsWNTscCh-.h..psts.............t..sho......t.SPstEFWcRpVLplo..........sGIpchGsl+WpLsLCLlluWlllYFslWKGVKooGKVVYFTATFPYllLhlLLlRGlTLPGAhcGIpaYLpPcap+Lt-PpVWlDAATQIFFSLGlGhGsLlAhuSYNKacNNCYRDullluhlNusTSFlAGFVIFSlLGFMApcpGls...........................................Iu-VAcs.....................GPGLAFIAYPcAlThhPhSPhWulLFFhMLlhLGLDSpFsslEullTullDpaPhhh+...+RElhshhlslhsaLlGLhhl.TcGGhYlhpLFDpYuAo.hsLLhlshhEsluluWlYGss+Fhc-Ip-MlGaRPshaa+hCWpFloPslhlslhlhSllpa..pPLsYss.Ys..........YPsW.u.ulGWhhALSShlslPlahlh+lhpt ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hh................................................................................................................................................... 2 1079 1659 2764 +4455 PF00176 SNF2_N SNF2 family N-terminal domain Sonnhammer ELL anon Published_alignment Family This domain is found in proteins involved in a variety of processes including transcription regulation (e.g., SNF2, STH1, brahma, MOT1), DNA repair (e.g., ERCC6, RAD16, RAD5), DNA recombination (e.g., RAD54), and chromatin unwinding (e.g., ISWI) as well as a variety of other proteins with little functional information (e.g., lodestar, ETL1). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null --hand HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.00 0.70 -5.62 28 15349 2012-10-05 12:31:08 2003-04-07 12:59:11 18 509 4016 6 7427 19108 4238 288.60 21 25.86 CHANGED aQhcGlpWhhphh................................................................................................................psthuGILADEMGLGK..............TlQsIullt......................hLtcttphh..................tP...................................................................................................................................................................................................................................................................................................tLllsPt.ShlpsWhsEhp+as.s.sl..pslsatGsp................................................................p.hhphtphhtshsVllToY-hlh......................+phshlpp..hc..WphlllDEuHpl...K........NspSpltpslpp.lp..sppRllLTGTPlQN....sLtElauLlsFlhPshas.........shc..sFcphh.....tts....................ptptspcthppLppllps......................hlLRRhK..s-l..pcs.....LPsKpcpllhsphoshQpchYp............phhppschhhstss................................................thpphhshlhpL+KhsNHPhLh ..................................................................................................................................................................................................................................................................................................................................................aQ..uh.t.ah................................................................................................................................................................................................................................................................t......th.s.s.lL....A..D..-......M....G.........LGK.............................T....l..p....s..l..s..h.lt...................................h.h.p..t...t..t.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hLl.l..s......Pt.....o...............l....l....t....p....W....t....p...........E....h...p....c....a...h....................p.l.............ph.h....h...h....p....u....sp..........................................................................................................................................................................t..........t...............t...........h.......t.......p.........h......s.......l....l.....l...s...o....Y.phht...............................................................................................................................p....p....h.....p...h....ltp............hp..........a..p...h..l....l..l...D...E........u.Hp.l..........K.....................................N...t..p....o....p......h......h...p.....s..l.....p....p...lp..................spp...+.......l.......h.....L.................T..........G.....T...P..l....p...N.............................sl.t..E..L..a.....u...l..h....p....a.....l..........P..s..h..hs................................................shp.....pF...p.p..h.h.................................................................................................t.......t....t...h...p...p...L....p....p.....h...l...p...s.........................................................................................h.h..l..R..R..h.+..........ppl...........p..............................L..P.........+...............................p...................h......h..h....h..t...h....s..t..............p...p......t....h...Yp.................................................................th.h.t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2588 4337 6155 +4456 PF04855 SNF5 SNF5 / SMARCB1 / INI1 Kerrison ND anon Pfam-B_6054 (release 7.6) Family SNF5 is a component of the yeast SWI/SNF complex, which is an ATP-dependent nucleosome-remodelling complex that regulates the transcription of a subset of yeast genes. SNF5 is a key component of all SWI/SNF-class complexes characterised so far [1]. This family consists of the conserved region of SNF5, including a direct repeat motif. SNF5 is essential for the assembly promoter targeting and chromatin remodelling activity of the SWI-SNF complex [2]. SNF5 is also known as SMARCB1, for SWI/SNF-related, matrix-associated, actin-dependent regulator of chromatin, subfamily b, member 1, and also INI1 for integrase interactor 1. Loss-of function mutations in SNF5 are thought to contribute to oncogenesis in malignant rhabdoid tumours (MRTs) [3]. 21.80 21.80 22.00 21.90 20.30 20.80 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.66 0.70 -4.91 25 526 2009-01-15 18:05:59 2003-04-07 12:59:11 7 15 262 0 346 474 0 197.40 33 41.27 CHANGED ppApp..........tsLVPIRLDl-h.................................-th+LRDTFhWNlsEplloP-tFAphlscDLclPsss...hhppIussIppQlc-Yts.........................................s.ht.p.....................................................................................-hRllIcLsIslupppLhDpFEWDlss.sssoPE-FApphssDLGLuGEFssAIAHuI+EQl.hhpKplh......................Gashsupslt-s-hctth.hss..................hsshhp.spptscpasPhlppLocsElE+pEt-+-Rph....R .................................................................................................................t..sp..c.LVPIRLDh-h.......................................ss..+lRDsFh..WNhp........-.......ph.hos-.....FAph..hscDLc...L.......ss...............hhstIsptIcpQ.lppa.s...................................................................................................................................................................................................................................................................................................s-.Rll.l.pLslpls.ph.....hhDpFEW-hsp....tssPE......pFAhphst-LuLs.G.EFsssIAauI+tpl.h.h.p+...........................sh.h.st...tsh.s.p........................................................h...s.t.scpapPhl..LoptEhE+p.tcppRp.R................................................................................. 0 105 189 289 +4457 PF01174 SNO UPF0030; SNO glutamine amidotransferase family Belitsky B, Finn RD, Bateman A anon Prosite Family This family and its amidotransferase domain was first described in [1]. It is predicted that members of this family are involved in the pyridoxine biosynthetic pathway, based on the proximity and co-regulation of the corresponding genes and physical interaction between the members of Pfam:PF01174 and Pfam:PF01680 [2]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.28 0.71 -4.67 4 1876 2012-10-03 00:28:14 2003-04-07 12:59:11 14 6 1744 30 575 2672 2181 181.00 43 94.15 CHANGED VLALQGAhhEHhctlc+ChsEs..........hsVKcsEpLspsDALIIPGGESTuMuhlhc+hGhh.sLhEFl+NspKshaGTCAGLIhLSpplusp...lhpLsLLcVsV+RNAFGRQspSFppch-Fpshh...psFsusFIRAPVI-clLss-sVplLh-hs....G..clVsAt.Qs.phLusSFHPELuEsshRhacaFlcphVp .................................................VLALQGuhtEH..hp....h....l..c....p.h..G..s...cs...........................htV..+..p.hc.....c..L......p....p........l...D....GLIlPG..G.........E......S....T..T........h.s....+....L............h....c..p.......h....s....h.hpsl.....+.ph.lp.....s....................G....h..P...laGTCAGh.....I.l.L.......A.c....c.....l..t.st...............pp.....h.....L.....u.......t.......h..........D.........l.....s.........V.........c...R.............N..........A............F.........G............R.............Q.......l............-............S........F..........c......s..c........l..c........h....p..G..lu.........................t.s..h.......u....V.......F...I...R......A..P....h....lp.p........V.........G...................p....s......V...c...l..L...u.p.hs.........................s.....c.I.V...A..s+....Qs...shL.usSFHPE..L.....T..s....D...hR.lHpa.Flphh..t........................................................................................................................... 2 210 383 500 +4458 PF00080 Sod_Cu sodcu; Copper/zinc superoxide dismutase (SODC) Eddy SR anon Overington and HMM_iterative_training Domain superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the copper/zinc-binding family is one. Defects in the human SOD1 gene cause familial amyotrophic lateral sclerosis (Lou Gehrig's disease). Structure is an eight-stranded beta sandwich, similar to the immunoglobulin fold. 21.00 21.00 21.00 21.30 20.80 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.28 0.71 -4.04 127 3852 2009-09-11 05:27:43 2003-04-07 12:59:11 15 29 2380 488 1163 3237 322 143.20 32 72.64 CHANGED Asshlpst...................ssp....ltGslphppss..ss......lplpsplpG...L.sP.........................G..hHGhHlHchGcs..sss..........stSAGuHa....NPts..tp..H.Gts..sst.................tHsGDLsNl..hs.sssGsu..phs..hhss.hl....sL.s.....sll....G.+ulVlHsstDDh...................popssGsuGsRlAC.GlI ............................................h...................tsp.......shGslph...ppp...s..ts.........lpl.ssp...l...p.G....L..ss..........................G.....HGFHlHp......h.Gs.....s...sss..............s.t.SA.......G.sH.a......sPts.....tp........H.suP....stt...............................tHhGDLssl..........hs...s....scG.p.A......phs...lhss..pl.........tlps.p.....slh.......G..+ulllHsssDshs.........................st.s.h..Gsu.GsRhACGlI................................................................................ 0 373 655 952 +4459 PF00081 Sod_Fe_N sodfe; Iron/manganese superoxide dismutases, alpha-hairpin domain Eddy SR, Griffiths-Jones SR anon Overington and HMM_iterative_training Domain superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the Mn/Fe-binding family is one. In humans, there is a cytoplasmic Cu/Zn SOD, and a mitochondrial Mn/Fe SOD. N-terminal domain is a long alpha antiparallel hairpin. A small fragment of YTRE_LEPBI matches well - sequencing error? 21.10 21.10 21.10 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.85 0.72 -3.72 25 8157 2009-01-15 18:05:59 2003-04-07 12:59:11 17 14 5135 325 1451 5799 936 79.40 42 40.82 CHANGED sapLPsLPYsYsALEPHIScEThEhHHsKHHpsYVsN.LNshl-sh.-hspc...shEpllhp......shpsultNN.huGHhNHolaWpslu ...........................apLPtLP.Y..........s.h.s.A.L...c.P.a.lstcThchHHsK.HHp....T..........YVsNlNsul...c...st...p.....h...t.sp..........sl..E.c.l.lhp................................shpsu....l....h.NNuGG....HhNH.olFWcsLt.................................. 0 485 915 1223 +4460 PF02777 Sod_Fe_C sodfe_C; Iron/manganese superoxide dismutases, C-terminal domain Eddy SR, Griffiths-Jones SR anon Overington and HMM_iterative_training Domain superoxide dismutases (SODs) catalyse the conversion of superoxide radicals to hydrogen peroxide and molecular oxygen. Three evolutionarily distinct families of SODs are known, of which the Mn/Fe-binding family is one. In humans, there is a cytoplasmic Cu/Zn SOD, and a mitochondrial Mn/Fe SOD. C-terminal domain is a mixed alpha/beta fold. 20.90 20.90 21.10 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.56 0.72 -4.20 27 8609 2009-01-15 18:05:59 2003-04-07 12:59:11 13 16 5213 325 1661 6184 1017 97.60 44 50.45 CHANGED G.E.Ps...GpLucAIscsFGSa-pFKppFspuAsuh.GSGWuWLVhc...sspLplhsssNtsss.hspG..hsPLLslDVWEHAYYlcYpNtRP-YlcsFW.slVNW-.sscca .................................................Pp..GcLtsAI.cpsF.G...S...a-...p.FKp...p.F.s.s..AA.s..s.p.F......G.S.GWuWLV.......h.......s........................................s...............G......+..L.tl...s.s.TsNpssP...l.......s........p..........s.............t........h........PlLsl.DVW.E..HAYYl...........p.Y........p.......N.h......R....sc........Ylpsaa.sl.l.NWp.stp............................................ 0 546 1040 1403 +4461 PF03002 Somatostatin Somatostatin/Cortistatin family Bateman A anon Pfam-B_1891 (release 6.4) Family Members of this family are hormones. Somatostatin inhibits the release of somatotropin. Cortistatin is a peptide that is related to the Somatostatins that is found to depresses neuronal electrical activity but, unlike somatostatin, induces low-frequency waves in the cerebral cortex and antagonises the effects of acetylcholine on hippocampal and cortical measures of excitability [1]. 20.30 20.30 20.40 22.50 18.00 19.50 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.11 0.72 -5.88 0.72 -4.26 5 165 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 79 0 56 144 0 17.70 81 16.29 CHANGED +-RKAGCKNFFWKoFTSC ....RERKAGCKNFFWKTFTSC 0 2 9 24 +4462 PF01680 SOR_SNZ UPF0019; SOR/SNZ family Belitsky B, Bateman A anon Pfam-B_2034 (release 4.1) Family Members of this family are enzymes involved in a new pathway of pyridoxine/pyridoxal 5-phosphate biosynthesis [1]. This family was formerly known as UPF0019. 20.40 20.40 20.40 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.37 0.70 -4.73 10 1980 2012-10-03 05:58:16 2003-04-07 12:59:11 12 7 1849 59 591 1309 540 205.00 67 69.72 CHANGED pGotplK+GhApMLKGGVIMDVVNsEQA+IAE-AGAsAVMALERVPADIRAsGGVARMSDPphIcEIhsAVSIPVMAKsRIGHFVEAQILEAlGVDYIDESEVLTPAD.paHI-KcKFsVPFVCGARNLGEALRRIsEGAAMIRTKGEAGTGsVVEAVRHMRtlsu-IRclpsh.oEDELassAKcltuPY.ELlppltctG+LPVVNFAA .............................pss.clK+GhApMLK..........GGVIMDVsss.EQ.................A........+IAE-AGAsAVMALERVPA.DIRAsG..GVuR.MSDPc.hI....cE....Ihp....AVSIPVMAKsRI.GHFVEAQlLEAlG..VDYIDESEVLTPAD-ta.HlcKppFpVPFVCGA+.sLGE...ALRRI.uEGAAMIRTKG...E.s.GTGslVEAVRHMRplsuEI++....l...p..sh..........s...........-....D....E....L.hshAK.-LtAPY.ELlpp.ltcp.G+LPVVNFAA........................................................... 1 227 402 516 +4463 PF02208 Sorb Sorbin homologous domain SMART anon Alignment kindly provided by SMART Family \N 21.10 21.10 21.80 23.60 20.10 20.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.52 0.72 -4.59 3 265 2009-09-11 11:43:11 2003-04-07 12:59:11 11 12 39 0 84 262 1 46.10 56 5.58 CHANGED lKAsp..u.sshDEsGIPL...pTVDRPKDWYKTMFKQIHMVHKPs..sD ..........lKu.sph.uhGssDEsGIPl...poV..-RPKDWYKTMFKQIHhlp+..s............. 0 6 14 35 +4464 PF04203 Sortase Sortase family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The founder member of this family is S.aureus sortase, a transpeptidase that attaches surface proteins by the threonine of an LPXTG motif to the cell wall [1]. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.44 0.71 -4.54 138 4873 2009-01-15 18:05:59 2003-04-07 12:59:11 8 19 1737 74 783 3560 293 135.30 24 51.42 CHANGED lpIPsls..lsh...s.lhp....s.....ss........tpsht.....Gsuahps.....ss...s.........Gt..ps.sslluGH....p.........sshFtpL.pcl.......c.tGDtlhlps................tsphh.pYc.......Vpsht.............l.....ss..ph......ph.....hs............pts...............pphlTLlTCs....sh.......................sspRhlVhuchs ................................................................................lpIPpls.......ls.h...P.lhp.......G...................ss.........ppsLtp............Gsuahcs.............sshs................Gt...ps....ss..llsG.H.........c.....hsssthF.s.s..L.p.ch.............................c.pGctlh.lps....................................tschh...sYc...............lpp.h.p........................................h.l.........pss....ch.......................ph.............lp.............................pts........................................cchlTLlTCs...sh..................sscRllVpuch................................... 0 283 543 695 +4465 PF04832 SOUL SOUL heme-binding protein Mifsud W anon Pfam-B_3872 (release 7.6) Family This family represents a group of putative heme-binding proteins [1]. Our family includes archaeal and bacterial homologues. 21.20 21.20 21.20 21.70 21.10 21.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.04 0.71 -4.74 71 715 2012-10-02 11:08:51 2003-04-07 12:59:11 7 16 287 15 420 683 1151 165.80 25 74.09 CHANGED shEpPsYpVlpp.sss.......aElRcYsstlhA.........pspsp.ss.hcpu.sspuFphLssYI....hGp.Nps.....ppcIsMTuPVhpps..........................ttts.......................paphpFhh.Pupas.hps...hPtPsDs.plplpch.PupphAslpFSGhss-ppltppttpLpphLpppu...hp.....stu........sshhAtY...ssP.hs.shhRR.NElhl.lp ..................................................s..-ps.apllpp..tsp..........aE.lRpYt..s.h.hhs.................................ps.p..h...p......t.....hptu..hppu............F.tp.LhpYI........tGp..Npt..........ttcl.s.M.TsPVhpph.........................ps........................tps.....................phslpFhl..Psp.....a....p.........ps..........sPtP.s..cs...p.........lp.lp.ch..ss.hph.h.l..h.pFu..................G..h...s...s...pp..s..htp..ptppLtptL.pps.s......ht...........ts..................hhhutYssP...h..h.Rp.NElhh...t...................................... 0 183 274 351 +4466 PF04267 SoxD Sarcosine oxidase, delta subunit family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Sarcosine oxidase is a hetero-tetrameric enzyme that contains both covalently bound FMN and non-covalently bound FAD and NAD(+). This enzyme catalyses the oxidative demethylation of sarcosine to yield glycine, H2O2, and 5,10-CH2-tetrahydrofolate (H4folate) in a reaction requiring H4folate and O2 [1,2]. 20.10 20.10 22.40 23.60 19.30 17.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.99 0.72 -4.16 89 550 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 367 8 172 490 1589 83.10 43 76.88 CHANGED lIsCPhCGs.R-cpEFsatG-Ac.lsRPss..sus.s-cpWt-YlahR-NPtGhatEhWhHstGCtpWhsssRDTlTaElhs..shhAtp ....lIpCPaCG..RsEpEFshuG-A+.lsRPts......sss.hoDc-Wt-YlFhRcNP+Gh+tEhWhHstGCpcWFsssRD.TVTcclhssapst.p.......... 0 36 89 125 +4467 PF04268 SoxG Sarcosine oxidase, gamma subunit family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Sarcosine oxidase is a hetero-tetrameric enzyme that contains both covalently bound FMN and non-covalently bound FAD and NAD(+). This enzyme catalyses the oxidative demethylation of sarcosine to yield glycine, H2O2, and 5,10-CH2-tetrahydrofolate (H4folate) in a reaction requiring H4folate and O2 [1,2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.88 0.71 -4.35 6 546 2012-10-01 23:12:28 2003-04-07 12:59:11 7 2 369 7 174 1588 3279 147.50 26 76.56 CHANGED RAhPup..AhsAslppuluhsLPstsssluoss...olhWluPDcahlls...Eutss.hAsLspulus.hu.ollDlStuRshIcloGspActlLsKusuhDLpscAFsVGtAssThhu+stshlh....RTGsDsFcllVhRSFA-phWchLp-uuuE ....................................................................t........thhtshp.p.s.hGhp.l.P...s.t..s..s..s...spss.....tslhWlGPD..E..WLlhs.......sp.s..t......s......h......tt...t....l....t....p...s..l....s....s......ht......u......l..l..-l......S........s...u..p...s...s..lcl...o....G.sps....c.p.................lL.s...+.u.s.......s....l...D.....L....p...s........p......s...F...s...s......G.....p....s.s...p....T....h....h.....s...+..s...s...l...h....lh................+...s...u...s....c...s...a.clhlh..R..SFA..cahhphLtcAut..................................................... 1 36 91 126 +4468 PF03172 Sp100 Sp100 domain Bateman A anon Pfam-B_3126 (release 6.5) Domain The function of this domain is unknown. It is about 105 amino acid residues in length and is predicted to be predominantly alpha helical. This domain is usually found at the amino terminus of protein that contain a SAND domain Pfam:PF01342. 20.60 20.60 20.60 22.00 20.50 20.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.37 0.72 -4.06 11 240 2009-01-15 18:05:59 2003-04-07 12:59:11 8 13 40 0 80 238 0 98.90 43 21.23 CHANGED phhhEslhp+.FKcpKl-IAsAIc+sFPFLEGLRD+shITc+MYcDs.-uCRNLVPVs+VlYslLocL...E+sFshohLpsLFScVNLccYPcLppIh+SFpssht..s ......t.h.-shh++.F+ppKlEIA.AIp+sFPFLcuLRD+shIo-chac-s.-uh+N.LVPVp+VlYslLopL...E+s...F...s...hshLcsLFSclNLccYPcLhpIh+SFppsh..t.................... 0 5 8 16 +4469 PF03014 SP2 Structural protein 2 Griffiths-Jones SR anon Pfam-B_1375 (release 6.4) Family This family represents structural protein 2 of the hepatitis E virus. The high basic amino acid content of this protein has lead to the suggestion of a role in viral genomic RNA encapsidation. 28.00 28.00 28.00 28.70 27.90 27.80 hmmbuild -o /dev/null HMM SEED 620 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -13.12 0.70 -6.64 2 3837 2012-10-04 01:49:40 2003-04-07 12:59:11 9 3 32 8 0 1287 0 127.50 61 98.02 CHANGED hhGsphsSQ..sLP.................AGuR.tQ..Rs.uutWpsQ.QRPpuA.......sGsAPLTssssAssTtsVPDVDptGAlLhRQYNLsTSPLs.ushuuTNhlLYAAPlsPLhPLQDGTsopIMuTEuSNYAQYRV.thTlRaRPlVPNAVGGauIShuaWPQTToTPTSlDMNSITSTDVRlllQPG.Au.LsIPpERLtY+NpGWRSVETsuVspE-ATSGhlMlClHGoPhNSYTNosYTGsLGhlDFAlcLphRNLoPGNTNsRVoRhpsTA.Hpl+tsssG.AplTTsAAsRFMtDl+a.hGTstsGElG+GIhhsLFNLADTlLGGLPopLlpuAuGQhhYuRPVssANGEPpVKLYhSVEsA.pDKsIhlPHDIDLGsSpVshQDYsNQH.pDRPoPuPAPpRshusLRusDVLhlo.....lTsAE.....hsQshaGuuo.......ThhhhNlhTGspAsApSlDWoKsTlDGh.lpTlpt.Sto..FhsLPhhGK.uhW..GsptAGY.YpYNoTtp-.I.hlpN..GppVsh.sYTs.LGt..oshShlh.ltPhpA.......sD.P..httHThsD.CspChsLGLpsCshQu...pssEhpRLh.+lu+Th.S .......................................................................................................................................................................................................................................................................................PVNSYTNTPYTGA......LGLLDFALELEFRNLTPGNTNTR.VSRY...oS.oARH+LRRGADGTAEL...T.TTAATRFMKDLHF...TGTNG.VGEVGRGIALTL..FNLADTLLGGLP.............................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +4470 PF02090 SPAM Salmonella surface presentation of antigen gene type M protein Mian N, Bateman A anon IPR002954 Family \N 25.00 25.00 25.80 25.60 20.90 20.10 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.87 0.71 -4.21 4 199 2009-09-11 15:23:32 2003-04-07 12:59:11 10 1 185 0 4 50 0 140.00 66 98.38 CHANGED MHSLTRIKVLQRRCTVFHSQCESILLRYQDEDRtLQAEEEAIlEQIAGLKLLLDTLRAENRQLSREEIYSLLRKQSIVRRQIKDLELQITQIQEKRsELEKKREEFQEKSKYWLRKEGNYQRWIlRQKRhYIQREIQQEEAESEEII .................MHSLoRIKVLQRRCTVFHSQCESILLRYQDEDRtLQAEEEAIlEQIAGLKLLLDTLRAENRQLSREEIYoLLRKQSIVRRQIKDLELQIlQIQEKRsELEKKREEFQcKSKYWLRKEGNYQRWIIRQKRpYIQREIQQEEAESEEII...... 0 2 2 3 +4471 PF02510 SPAN Surface presentation of antigens protein Mian N, Bateman A anon Pfam-B_1678 (release 5.4) Family Surface presentation of antigens protein (SPAN), also know as invasion protein invJ, is a Salmonella secretory pathway protein involved in presentation of determinants required for mammalian host cell invasion. 20.10 20.10 20.90 20.60 19.80 19.70 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.97 0.70 -5.39 2 189 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 170 0 7 96 0 271.70 61 90.70 CHANGED MGDVSuVSSStNhLLPQQDEVuGLSEALKKAlEKHKTEY.sDKKDREYGDoFVMHKETALPVLLsAhRpGAPAKSEpHsGpsSGLpHNsKG-FRIAEKLLKVTuEKSVsLlus.tKsDKoAALLSS+NhQlttVuuKKLSsDLKA...VSELADNshtloDDNlKA..sDpKsIsGEGlRKEGs.LAtDVAsSRhAAsNTuKuDDKDHKKIKEssQLPlQPTTIADLSQLSGGDEpMPLAApSK.hMThFPhADGVKt-DsSLTYRFQRWGNDYSVNIQARQsGEFSLlPSNTQVEHRLHDQWQNGNPQRWHLhRDDQQNPQQQQHtQpSGEEDDA ......................................................................................................................................................MGDVSAVSSSGNILLPQQDEVGGLSEALKKAVEKHKTEYSscKKDRDYGDAFVMHKETALPVLLAAWRHGA.AKSEHHNGNVSGLHHNGKGELRIAEKLLKVTAEKSVGLISAEAKVDKSAALLSsKNRPLEuVSGKKLSADLKAVESVSEVsDNATGISDDNIKALPGDNKAIAGEGVRKEGA.......PLARDVAPARMAAANTGKP-DKDHKKVKDVSQLPLQPTTIADLSQLTGGDEKMPLAAQS..KPMMTIFPTADG.VKGEDS....S.LTYRFQRW..GNDYSVNI....QA.....R...QA.GEFSLIPSNTQVE...HRLHD.QWQN.GN.PQRWHLTRDDQQNPQQQQ.HRQQSGEEDDA.......... 0 2 3 4 +4472 PF04573 SPC22 Signal peptidase subunit Waterfield DI, Finn RD anon Pfam-B_4675 (release 7.5) Family Translocation of polypeptide chains across the endoplasmic reticulum membrane is triggered by signal sequences. During translocation of the nascent chain through the membrane, the signal sequence of most secretory and membrane proteins is cleaved off. Cleavage occurs by the signal peptidase complex (SPC) which consists of four subunits in yeast and five in mammals. This family is common to yeast and mammals [1,2]. 25.00 25.00 30.20 25.20 24.00 24.40 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.14 0.71 -5.04 14 439 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 303 0 302 411 5 141.80 28 86.75 CHANGED MpohlsRuNul.huaoLolhAhlshssahSsh.FpchpssssIpshc..hhl+slpcFs.sspc+sDhuhlTFDLssDLspLFNWNsKQLFlYLoAEYcTtpNt.lNQVVlWDKIIhcs-puplsh+shpoK...YhFaDsGsGL+Gt+NloLsLpWNVhPpsGhLshspus.GphshsFPspYp ....................................................ph..Rhpth.hs.h.h....s..h.hhh...hh....hhh..h.s.h....h........p..s......s..s.p..l.p..........................h..ph.p..ah.....h.tp...t...p-.s...lpFs.ls...........sD.......L...p.....s.........l.....Fs.WNsKQlFlYlsAcY........to.....t........p......s.........t.....h.......N.....pl.....slWDpIl...pt......-.p.......s.h...lthp.s.......psK............Y.hh.D...s..s.tl........tt.pshslplpasl.P.sGhl.hs.p........................................................... 0 87 158 247 +4473 PF05122 SpdB Mobile element transfer protein Yeats C anon Yeats C Domain This proteins are involved in transferring a group of integrating conjugative DNA elements, such as pSAM2 from Streptomyces ambofaciens ([1]). Their precise role is not known. 25.00 25.00 53.50 53.30 19.80 17.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.77 0.72 -3.94 3 33 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 23 0 10 35 0 52.40 68 85.52 CHANGED MRIGPVQIGTHRDRHGQTKHAAVCTNDGCGWSADYTSQSAAQLAARTHRCKVS ....RIGPVQVGTahDpR.G+pKHsAACTAPRCGFSADYsSRuAAELAARTHRC.Vp.. 0 2 8 10 +4474 PF03771 SPDY DUF317; Domain of unknown function (DUF317) Yeats C anon Yeats C Domain This a sequence family found in a set of bacterial proteins with no known function. This domain is currently only found in streptomyces bacteria.\ Most proteins contain two copies of this domain. 25.00 25.00 26.20 25.10 24.00 24.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.19 0.72 -4.48 16 112 2009-01-15 18:05:59 2003-04-07 12:59:11 11 2 16 0 67 132 0 64.50 25 43.21 CHANGED hsSPDupstlpaps.........tsssssWpl.....hsssssststWpApFsspsPscLluuhssuL.ssssshpc .......................SPDthhhlta.s.........sstsssWpl.....tsssssssstWtAsFsspTPs-llAuhssuL.sssts................ 0 17 57 67 +4475 PF00435 Spectrin spectrin; Spectrin repeat Bateman A, Finn RD, Stabach P anon Pfam-B_1 (release 1.0) Domain Spectrin repeat-domains are found in several proteins involved in cytoskeletal structure. These include spectrin, alpha-actinin and dystrophin. The sequence repeat used in this family is taken from the structural repeat in reference [2]. The spectrin domain- repeat forms a three helix bundle. The second helix is interrupted by proline in some sequences. The repeats are defined by a characteristic tryptophan (W) residue at position 17 in helix A and a leucine (L) at 2 residues from the carboxyl end of helix C. Although the domain occurs in ultiple repeats along sequences, the domains are actually stable on their own - ie they act, biophysically, like domains rather than repeats that along function when aggregated. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.14 0.72 -3.75 83 21182 2009-09-14 14:25:53 2003-04-07 12:59:11 16 452 349 133 9769 19145 10 103.80 17 36.64 CHANGED pphppFtpcss-hppWlpcp.cthl..ssp-h.spclsslpsLhc...+Hcthcp-lss.ppsplpplp....phuppL.hspt...t.ss...pplpp+hpplsppWppLpphstpRcppLpp ............................................t..hppF...pphpp..h...t...sWlp........c.p.....c...t.............h...l.......p.s..p...s.....h.....u......p...s..h....p.....s....l..p.....t......hlc....................ca..c..s..h.p......p....-..l...p...........s...p..pspl.p.p.lp..............................ptup.pL....hppt................t.ps.....................ppl...p.pp...hpp........l......p.p...pW..p...p.L.pp.hht.p..RpppLt......................................................... 0 2238 2954 5943 +4476 PF01564 Spermine_synth Spermine/spermidine synthase Bateman A anon Pfam-B_798 (release 4.0) Family Spermine and spermidine are polyamines. This family includes spermidine synthase that catalyses the fifth (last) step in the biosynthesis of spermidine from arginine, and spermine synthase. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.43 0.70 -5.44 17 3831 2012-10-10 17:06:42 2003-04-07 12:59:11 12 30 2623 95 1322 3677 1590 224.20 28 64.46 CHANGED hWFpEh.s.....................hss.uhsh+VcclLact+ScaQclhlaco...........psaGclLl............LDGslQhoEcDEahYpEhlsHlshhs+s.....NP++VLlIGGGD........................GGsLREllKHssV.............EclshV-IDptVI-hu+calPphuss......hpcs+lpl.hlsDGhpalpphps.....paDlIIsDsoD.PhGPucs.....LFpctaachhtcsLptsGlhssQu..cs.alphchhpslhpsh+psh...sthhhsslPTYssshhsahlsocp........ps .............................................................................................................................................................................h....................................htht.p..lhptp.o..a.Qcl.l..h.cs.......................ta.Gphhh..........................LDG......h....l.....h..o......p.....t..DE....a....h.Y...pE......h....hs....H...ss..hh.s.cs..............p.s.+.+.VLl.lG...G.GD..............................................................G...u.s...l.R.E...l...h..+a.......s.l...............................................................................................c..p.ls...h..V..E...I....D.......t....t..Vlc.h..s....+....p....a...h..s.p.hsss...........................hcDs...Rhpl..hlsD..Gh.p..alpp.spp....................paD.....lI........I..s.....D......s......o.....D...P..........h....G.............s.ups............La..o.....p.t.FY.pt.s.tc.sLp.s.s.G.l..hls...Qs........tss.h..h..p.......p.....h..h.p.h...h.p.p.h.p..p........h.......F.............st........h.s.lPoa.s.t.hhhhhhtpt...........tshtt.................................................................... 0 463 818 1110 +4477 PF02819 Toxin_9 spidertoxin; Spider toxin Griffiths-Jones SR anon Homstrad Family This family of spider neurotoxins are thought to be calcium ion channel inhibitors. 21.30 21.30 21.50 21.70 21.00 21.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.11 0.72 -3.89 6 29 2012-10-01 22:06:18 2003-04-07 12:59:11 10 1 12 6 5 49 0 39.90 38 58.82 CHANGED Chu.sYc+CshGtpPCCcsR.sCcCslhhsNCcCK..+hlhEhhGhu ...Chs.hhcpCshsppsCCcsp.sCpCshhssNCcCp..+hh.............. 0 1 2 5 +4478 PF02513 Spin-Ssty Spin/Ssty Family Staub E, Mian N, Bateman A anon Staub E Repeat Spindlin (Spin) is a novel maternal transcript present in the unfertilised egg and early embryo [1]. The Y-linked spermiogenesis -specific transcript (Ssty) is also expressed during gametogenesis and forms part of this Pfam family. Members of this family contain three copies of this 50 residue repeat. The repeat is predicted to contain four beta strands. 25.00 25.00 25.70 28.00 22.10 18.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.28 0.72 -4.40 27 646 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 56 6 298 766 0 48.10 42 57.44 CHANGED VGppVpHhac-s..t.hspWcGhVLsQlPspsolaaIKY-sDsslYshpLh ..lG+pVpHsa--G.pt...shscW+GhVLsQVPspPolYaIKY-sDsslYsYpLh.......... 0 15 29 99 +4479 PF05215 Spiralin Spiralin Moxon SJ anon Pfam-B_6625 (release 7.7) Family This family consists of Spiralin proteins found in spiroplasma bacteria. Spiroplasmas are helically shaped pathogenic bacteria related to the mycoplasmas. The surface of spiroplasma bacteria is crowded with the membrane-anchored lipoprotein spiralin whose structure and function are unknown although its cellular function is thought to be a structural and mechanical one rather than a catalytic one [1]. 20.80 20.80 20.80 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.55 0.70 -5.25 4 22 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 13 0 0 21 2 211.00 67 98.10 CHANGED K+LLSILAVFGVSAVGTTSVlACNKTESNNLSRVKTIAAPtTVAAtssppVTKtEIKsuL-sNVLKAVQGVVKTApAsDF.aEVYpDNcGpAL-TlNLcAGpV-VYVQITPAKDKTVVIGKoGYIKVTLPKt...hKsDISsVTVsEQTVtIKsusPpsVTKsELpAVNp.AsLApAVLsAIKsKsssstAS-FtITNNGstGsYSAsKsVEVTVKApDsSspIoGpFKFNAKVTAThs KKLLSILAVFGVSAVGTTSVVACNK.TESNNLShVKTIAAPATVAstp........PKpVT+sEIKTALE....ANVLKAVQGVVKTATAA.DFQFDVYpssKGT.uLpTIsLEuGpV-VYVQITPAKDKTVVIGKoGYIKVTLPK....hKVDIpsVslspQhVtIKAucPKpVpKDELNAVNT.sTLApAVL-AIpphAPNAG..ASDFEITNNsstGsYpstK-VcVTVKAKs-SsNISGpFKhpAKVpAhh........... 0 0 0 0 +4480 PF03533 SPO11_like SPO11 homologue Griffiths-Jones SR anon PRINTS Family \N 21.10 21.10 21.20 22.50 21.00 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.17 0.72 -4.08 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 23 0 15 36 0 42.20 74 10.96 CHANGED AFAPMGPEASFFDVLDRHRASLLAuLRRGGGEPPuGGTRLASS ......AFAPMGPEASFF-VLDRHRtSLLAALRRGGtEPPuGGoRlASS.... 0 1 2 3 +4481 PF05032 Spo12 Spo12 family Wood V anon Pfam-B_51047 (release 7.6) Family This family of proteins includes Spo12 from S. cerevisiae Swiss:P17123. The Spo12 protein plays a regulatory role in two of the most fundamental processes of biology, mitosis and meiosis, and yet its biochemical function remains elusive [1]. Spo12 is a nuclear protein [2]. Spo12 is a component of the FEAR (Cdc fourteen early anaphase release) regulatory network, that promotes Cdc14 release from the nucleolus during early anaphase [3]. The FEAR network is comprised of the polo kinase Cdc5, the separase Esp1, the kinetochore-associated protein Slk19, and Spo12 [3]. 20.50 20.50 21.10 21.30 20.40 20.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.40 0.72 -7.57 0.72 -4.33 9 136 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 117 0 96 116 0 34.20 45 21.64 CHANGED ASPTDsLlSPCSpKLs-HKuKhFtt+spPspLths ..SPSDslhSPCopKLsshKsKpat.pt.ppsphhh............... 1 27 53 81 +4482 PF03907 Spo7 Spo7-like protein Wood V, Bateman A anon Wood V Family S. cerevisiae Spo7 Swiss:P18410 has an unknown function, but has a role in formation of a spherical nucleus and meiotic division [1]. 34.10 34.10 34.70 37.70 21.60 34.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.29 0.70 -4.72 9 132 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 128 0 99 124 0 205.40 40 52.65 CHANGED SPsuhIFRNLLILE-sLRcQshp.+hh+hQaThFLohLhulssahhYtLYasscps....pG.......lhRhhLphsllhhhlTllLFHlSGpY+RTIVlPRRFhs.TNKGlRtFNlKLVKl+ssa.cchhD.lRhh.h.ls...la.hphhlhhutps..u.hhpFhpsspl+spsR...........................lGusDVKLlLsP+sFos-IREGWEIYRsEFWs+EusRRRcppp ...........................................SssstIYhNLLILEsuLRtQhlpLRtRRRpaThFLhlLshhluahsYt.Lahpscpsu...pG..................................hlchh.phsLhu.ullTslLhasoGpacRsItaPRRalssoN+GLRshNsKl.Vhl+ssWhpch.hshlta...............hphhh.....t.ts...u...tahps.s..pp..p................................................................................Gus.lKLlL.s+sFSsphREsW-.YRsEaWt+.EstRRt...h.......................... 0 23 51 83 +4483 PF01052 SpoA SPOA_protein; Surface presentation of antigens (SPOA) Finn RD, Bateman A anon Pfam-B_408 (release 3.0) Family This family includes the C-terminal region of flagellar motor switch proteins FliN and FliM. It is associated with family FliM, Pfam:PF02154. 23.00 23.00 23.30 23.20 22.70 22.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.01 0.72 -4.38 130 6032 2009-01-15 18:05:59 2003-04-07 12:59:11 15 12 2189 8 1289 3714 814 75.30 26 29.28 CHANGED ltpplppsslplpshluctplslp-lhpLpsGcllsLsps.htctlplhl....ssphlhpGchs...thss....phulclpchhppp ............h..t.l.clslplssplGcsphslp-lLpLp.GsVltL-p...ss-...s..lc.l.h.l....suthlupGcls...slss....phul+lscllp..s....... 0 420 802 1028 +4484 PF05036 SPOR Sporulation related domain Bateman A anon COG3147 Domain This 70 residue domain is composed of two 35 residue repeats found in proteins involved in sporulation and cell division such as FtsN, DedD, and CwlM. This domain is involved in binding peptidoglycan [1]. Two tandem repeats fold into a pseudo-2-fold symmetric single-domain structure containing numerous contacts between the repeats [1]. FtsN is an essential cell division protein with a simple bitopic topology, a short N-terminal cytoplasmic segment fused to a large carboxy periplasmic domain through a single transmembrane domain. These repeats lay at the periplasmic C-terminus. FtsN localises to the septum ring complex. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.47 0.72 -3.82 136 7624 2009-01-15 18:05:59 2003-04-07 12:59:11 8 118 2474 2 1762 5585 1681 74.80 19 23.88 CHANGED ssssahlQlu.uhs...spssAcphhscLptp.sht...........sthpssssha+Vhl.GsasspppA..pphtp.pLp.......tthsshlhp .........................................t..ttahlQlu.uhp..........stspA....pp....h.tt..p....Lptp..uhs..................sphtsss..shaRlhl.G.....sass+ppA....pphhp.pLp......tthpshh..h.................................. 0 527 1100 1450 +4487 PF03845 Spore_permease Spore germination protein TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.94 0.70 -5.78 17 1773 2012-10-03 01:44:59 2003-04-07 12:59:11 8 5 343 0 394 2732 134 306.50 19 86.06 CHANGED plostQhhhlIhsh.lGsGlLshstshAcps.t..uWIullluullshlhhhlhhhlhpp..asppslhphhpchhG.KhLGtllshlahhYFlhhush.lRshuEllthahh.csPhahlshhhhhlslYhlhpGlcsluR....hhhhhhhhhhhhhhlhlhshphhchcNLhPlhspGlhslL+u..stsshhsasshElhlhlhsahpspKpst+hshhuhhlsslhYhloshlsIsshus-hsppthaPtlshhcslcls..hl-Rh-hhhlhlahlhhFhshslhhausshuhsplF+hpppp......hlhhlhslhhlhs.lhpsts.h ..............................................................................................................................ls.hphhhhlh...h....lG..s..u.....l.L...sh...sph...hs..pts...t..D....u.....W.l..u...l...l.l.u......sl.hs....h.l..h.h..h...h..h....h.h....l.h.pp............h.s..s..t......s......h....h...p..h.h.........p.p.h..h......G....+........h......l.G.......p......l......l.s.....h....l.......a.......h....h.......a....h...h....h....h.s...s.......h..h...........l.......p....s....h....s.....p.......l...l.p...h...a......l...........h.............s............p.................T...P..h..h..............h..............l..............h............h..............h..............h.h...l.........l.........s.h.........Y....h....s........t.t..G.......hc.........s...ls+............hhthh....h...s...h.h....h.l...h...h....h.h.....l.........h....h..........h............s...........h..........p.................h..........c............h.........p....p........l........h......P...............l............h.......p............p.........u.......h...h........s........l.....lpu........sth.s..h...h....s....a....h..uh.t....l.h.h.....h.l.h.P.a.l....p......p....p.....p....p..h..h.+.....s.h.h...h.u..h....h......h.s.s.l..h.h......hh.h.sl.hsl.s.hh.......u..p.th..tp..h..haP.s...l.ph.h..+h.l.p.ls.......a..l-.R.h.-h....lhl.....h...h....Wh....h....s..lhss..h.....s.hhh.a.sssh..s..hp....p.........l.........h.p.h...pppp........hl.h.h.h.h.s.l..l..hh.h.s.hh........h.............................................................................................................................. 0 202 327 345 +4488 PF00588 SpoU_methylase SpoU rRNA Methylase family Bateman A anon MRC-LMB Genome group Family This family of proteins probably use S-AdoMet. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.89 0.71 -4.31 139 17854 2012-10-01 22:53:19 2003-04-07 12:59:11 14 38 4871 41 4348 12081 7850 141.80 26 56.74 CHANGED shhllLsplpcPtNlGulhRostshGs.pslhl..hp.psh...hssp....sh+s.uhGu.h.hls.hhhh...sshpc.hlpp..lcpt.s.hhl..hus....sh....pup......shh...........ph....sht.......pthsllhGsEspGlspphhctsc.t..hlpIPh.tsph.pSLNlusAsul.hla ...........................................................................................s.hhllLppspcPtNlGslhRossshGs....p...s...lhl.............s.....p........t....sh...................s..tp.................s.h+s.....uhG....u......hp.h..l....s...l.hhh.................s..s............l..s..p.....h............l.p..p............hptt....t.....h...h.l..............hus....st........sup............shh...............ch.............shs......................................................tshsllh......GsE......sp.G..l...s...p..p....h........h....p..t.....s....-....t...............hl.p.I.........P........h......t............s............p.............s.....p...............S.LNlus.AsullhY............................................... 0 1420 2797 3687 +4489 PF03862 SpoVA spoVA; SpoVA protein Finn RD anon DOMO:DM07026; Family Members of this family are all transcribed from the spoVA operon. These proteins are poorly characterised, but are thought to be involved in dipicolinic acid transport into the developing forespore during sporulation [1]. 25.00 25.00 27.40 26.90 20.40 20.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.38 0.71 -4.00 73 1094 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 417 0 209 677 19 114.50 41 85.14 CHANGED hs.hlpAFlVGGlICsIGQllhshh.....h..chssshshlshlhlGulLTGlGlY-clucaAGAGusVPlTGFuNulsuuAlEappEGhlhGlusshFplAGslIsaGlhsualhuLIa .....shlhAFlVGGlICslGQllhchh...........+hssshshsohVhlGAlLsGh.....GlYDclspFAGAGusVPlTGFGNSlspuAhEttpc..shllGlus...shFcluuusIshullhualhuLI............................................ 0 103 167 179 +4490 PF04026 SpoVG SpoVG Kerrison ND, Finn RD anon COG2088 Family Stage V sporulation protein G. Essential for sporulation and specific to stage V sporulation in Bacillus megaterium and subtilis [2]. In B. subtilis, expression decreases after 30-60 minutes of cold shock [1]. 22.00 22.00 22.50 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.89 0.72 -3.95 34 963 2009-09-11 01:05:24 2003-04-07 12:59:11 7 3 793 10 180 528 40 83.90 54 80.87 CHANGED MplTDVRl+plssc...u+lKAhsSlThDssFVV+DlKVI-G.ppG.LFlAMPSRKst-....................................Gpa+DIAHPIssEhRpplpcuVlctYccth .............................................MplTDVRlR...+.lps-......G+MKAlsSITl..Dc....p.FVlHDl+V.I-G.psG.....LFVAMP...S+..+.Ts.D....................................GEFRDIAHPINS-hRpcIQcAVlptYcc..h....................... 0 100 147 165 +4491 PF04293 SpoVR SpoVR like protein Kerrison ND, Finn RD anon COG2719 Family Family member Swiss:P37875 is Bacillus subtilis stage V sporulation protein R, which is involved in spore cortex formation [1]. Little is known about cortex biosynthesis, except that it depends on several sigma E controlled genes, including spoVR [2]. 18.50 18.50 19.80 19.00 18.30 16.30 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.78 0.70 -6.11 62 1383 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1289 0 332 906 166 407.70 54 84.95 CHANGED us-WsFchLpphpccIpclA.ccaGLDsYPsQlElIouEQMhDAYASsGMPltYpHWSFGKpFhpscptY++GhhGLAYEIVINSsPCIAYLMEENohsMQsLVlAHAsYGHNsFFKsNaLF+pWTDApuIlDYLsFA+sYIscCE-+..aGh-tVEplLDusHALhsaGVDRYtRstplShpcEctRp.....p-Rctahp.pphN-LW..RTlP..........pcppttpp..............ppt.+aPtcPpENlLYFlEKpAPhLEsWQREllRIVRcluQYFYPQ+QTpVMNEGWAoFWHYsIhNcLacpGtlsDuhhlEFLpSHosVlhQP.hssPtaoGINPYsLGFshhpDIcRICppPT-ED+cWFP...-l.......AGs.shhcsLc.Ahcsa+DESFIpQaLSPclhR-h+LFsltD-sccs..hpVsuIHsEcGY+plRcpLuppYslush-PsIQVhclDhpGDR ............................................GsDWTF-LLchYhsEIc+lA.chYtLDsYPpQIE.lITuEQMMDAYuSlGMPlsYsHWSFGK+FlcTEphY++GQpGLAYEIV.......INSNPCIAYLMEENTlsMQAL.VhAHAsYGHNSFFKNNYLF.................+sWT......D...................A..s........u....I..lD..............YLlFA+pYIscCEER..YGl-EVEclLDSCHALMNaGVDRY........K....RP.p.K.l.S...h.pE.E.csRp..........................cpREcY..LQ....SQVN....LW....RTlP+.......................cc.ccp..ps.................t...ct+RaPs..E.PQENlLYFh..........E..........Kp..........AP..........L..........L..........E....s....WQREILRIVRKluQYFYPQ+QTQV..MNEGWATFWHYTILNcLY-.......cGclT-.cFMlEFL+SHTsVVhQPsasuPaaSGINPYALGFAMFpDI+RICpsP.........T-.......E.D+.hWFP...........-l............AGu..cWL-sL+aAMcsFKDESFIsQaLSP+lhR-h+hFslhDD-+csh.lcluAIHs-cGY+pIRppLusQYsLushEPNIQVasVDh+GDR.................................................. 0 93 198 268 +4492 PF04232 SpoVS Stage V sporulation protein S (SpoVS) Kerrison ND, Finn RD anon COG2359 Family In Bacillus subtilis this protein interferes with sporulation at an early stage and this inhibitory effect is overcome by SpoIIB and SpoVG. SpoVS seems to play a positive role in allowing progression beyond stage V of sporulation. Null mutations in the spoVS gene block sporulation at stage V, impairing the development of heat resistance and coat assembly [1]. 20.60 20.60 21.50 21.00 20.40 20.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.72 0.72 -4.41 26 629 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 425 4 199 331 10 85.30 63 87.47 CHANGED ME.lLKVSu+SsPsuVAGAlAullRcpupsElQAIGAGAlNQAVKAlAIARGalAPsGlDLlslPAFs-lpI-GE-RTAIKhlVpsR .........MElLKVSu+.SsPNSVAGALAGVlR.E+..G.sA..EIQAI...GA.......GAlNQAVKAlAIAR.G.FV...AP..oGl..DL.lClPAFs-IpI.DG...E....ERTAIKLIVpP........................ 1 106 166 186 +4493 PF03539 Spuma_A9PTase Spumavirus aspartic protease (A9) Griffiths-Jones SR anon PRINTS Family \N 20.90 20.90 21.00 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.03 0.71 -4.32 4 22 2012-10-02 15:32:34 2003-04-07 12:59:11 9 6 15 1 2 24 0 147.80 49 14.35 CHANGED IKGs+LKGaWDSGA-ITCVPthaL.-EcPltpphIpTIHGppcpDVYYlshKIpGRKlpsEVIuTsLDYlllsPuDlPWhhKtPLELTIKlDlccQQcpLLpposLSpcGKchLKcLF.KYsALWQpWENQVGHRRIcPHKIATGTLpP+PQKQY+INPKAKs .........................hKGsKLpuaWDSGApITClPpsaLppEpPltpp.lpTIHGppppsVYYlphKlpGRKlpsEVIuosh-YhllsPsDlPWhhppPLpLTlhlslp-.pcplLpposLscctKppLppLhpKYssLWQpWENQVGHR+IpPHpIATGThtP+PQKQY.INPKAKs.... 1 2 2 2 +4494 PF03779 SPW SPW repeat Yeats C anon Yeats C Repeat A short repeat found in a small family of membrane-bound proteins. This repeat contains a conserved SPW motif in the first of two transmembrane helices. 20.80 20.80 20.90 21.00 20.60 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.78 0.72 -4.59 16 414 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 200 0 160 360 9 51.30 29 53.28 CHANGED hpWshsllGlWhllSPWIl.Ga.ossuuhhhsslIsGllVslLu...lhhutsspc .............hhshllGl...alhhSPWll..Ga...uss.s.u..hhhss.llsGlllslLu.....hhhh.....t............. 0 38 108 138 +4495 PF00494 SQS_PSY Squalene/phytoene synthase Finn RD anon Prosite Domain \N 26.00 26.00 26.00 26.10 25.80 25.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.85 0.70 -5.10 152 3383 2009-01-15 18:05:59 2003-04-07 12:59:11 14 23 1893 58 1260 3142 2434 253.00 23 78.59 CHANGED hcptucoFhhushhLs.tphRpslhslYuasRhsD-lsDpss...............s.stttph..................Lptacptlpthh................................................t......ps....lh.......................psltpshpphp.........lshp..hhtpllcuh....thDlp....................................pp........hh.ohs-.........LppYshtsAus..VGhhhhplh...sh..............ssp.......t..hphA............................p.....p...........lGhAhQlsNllRDlscD................htp...GR.hYLPt-..hh.tp..........sls.p.....plht........................stt.hpthhp.........................phhstAcphhpputshltt.............................l.sttsphshhhshslh.ttlLc...p.lcpssht.h.....hp..tRsplsthc ...............................................................................................................................................h..phucoFhhuh.hhLs..tph.Rpslhs.lYshsRthD..-..lsDsss...............sst.tth..................h.Lptacppl.pp.hh............................................................................................ht.stcs..lh..........................................tsL.t.p.sh...p.pat....................lshp...htshl....suh.....thD..lpt........................sp.........ht.shs-L.tt..YChtsAus..VGhhhstlh.uh..t..............................................pst...............ts..hphA............................pp........lGhAhQl.sNllR.DlsED.....................................hpp......GR..lY...lPt-....h..tp.......h..sl...s....p.....clhttt............................stt..ht..phhp.........................thhppAc.phhppu.ht.hlst.........................................l...s..t..p..s..t..h..sh..h...hshhhh.hthLpt.lptssh..t.h........p...t+h.l....t............................................................................................................ 0 384 788 1058 +4496 PF00299 Squash squash; Squash family serine protease inhibitor Finn RD anon Prosite Domain \N 22.00 22.00 23.20 26.10 21.90 21.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.73 0.72 -4.13 12 37 2012-10-01 19:46:11 2003-04-07 12:59:11 13 1 17 32 0 60 0 28.70 64 90.24 CHANGED RhCPRILMcCK+DSDCLucClClcph.aCG ..RhCPRILMcCKpDSDCLupClChcp.G.aCG... 0 0 0 0 +4497 PF02117 7TM_GPCR_Sra Sra; 7TM_GCPR_Sra; Serpentine type 7TM GPCR chemoreceptor Sra Mian N, Bateman A anon IPR000344 Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Sra is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 18.90 18.90 19.40 19.10 18.60 18.20 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.96 0.70 -5.82 10 141 2012-10-03 04:04:29 2003-04-07 12:59:11 11 3 7 0 141 262 0 276.40 20 94.23 CHANGED MSs.sCAScsclpRhsSLNF+IuQalsLlsIllTFIhTYaAlKllhp+SIFplSTKILLhpNLFaANLHQlhYuIpslphLY+uFFhls-PCshLpoEt-CthYhcVLlsGsSGMlYuQTGLLIERsCATFl+sYcpKpShhsGlsISIlVLhsShuTu+IIIWDDPL-salLuCahaPpcSssRushFhsIsTlLolFNLllSllIh+YNK+LEY..STRFpVusRFpKRElI-STpTICFLshoQFlhhFlYShGlhlL+pI+..phIshcpaahhVVWsYTlPFIAlhFPlLLIYRIRpo+ssRsphIpplTspKQTQ-EHI+QhKshW .........................................................................................................................................s....h.h.h.pS..hhhs.h...h...hhh...h.h.o...h...h.hshh...u...l.....p.hl....hp...p...s...l....a..........p.....u....T+....hLLh..slh.ss...hHp.h..h...............h.....h.......................t......hh...................l....h..........+s.....h..h..h.....sc.sCp.lh..h.pp.....-Ch.........h.h.h.hh.h...s....h...h...h.h....h..ph......u....Lh.l-R..............hhup.h.h...p.h......t.ph.phh..u..h.h...............l......h....hhlh....h.oh.................h........s.h....hh.....h....h.h....s...s...sh...ss.h.h...s.C.hh.....st......s.h..t..p.........h....p....h..hhh..h..hh..h.l...s...lh..s.h.l..h..s.h..h....lh.hh....s.....p.+.h.ch...............p.pp.apl...t........tRapp..hEsl.oopsl...s..hlsh....hQhlhh...hla.shs.h....hhhh...h..p..p...h...s.......h....a....h......h..h.....a.h..a...............s...h..s.ahs...h...hhPhllh.h.p....h..p.http.R...t.......t....I....t.t.h.pt..t........ts....pp.a.h.tp.hpt.W................................................................ 0 43 57 141 +4498 PF02175 7TM_GPCR_Srb Srb; Serpentine type 7TM GPCR chemoreceptor Srb Mian N, Bateman A anon IPR002184 Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srb is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 21.70 21.70 21.90 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.64 0.70 -4.81 3 69 2012-10-03 04:04:29 2003-04-07 12:59:11 11 4 5 0 68 166 0 199.20 35 65.84 CHANGED FHPVYRlAQFYoFhVSsFAhPuLIYFMFcKLFKLsFHGNLKsLLIuYFIolLLFAlhlCFsFGYQFFVPFFl+SNCDLIINuTLFKYGHsoulFlMTlPMlLPluFTIERFsAMKMAcoYE+lRTLLGPVLVllLIIIDshhlYhIappEsFDcsFISFlllPuToA.LsFNoFLWalLYLNIsNFlhNllLLhlH+KLKpRhhh+NoSLSTKYplEEISQSS+FTLIVTFTHLLFF ...........................................................apslaRhu.haphhluhhuh..sLh...a...F...lhh...+l.h...................F..HsNL.K.......h..............l....h..hsYFhshhl...auh...........hhh.h...s.h............h...hphh....h....P.F...........h....s..p...s.p.CsL....l...I.s.......h...h....aKh..hphhhh.hhhTh...s..hhh..P.huhoIERalAhth.AcpYEps..s.hLGPl.L..s...h..h..h...............h.hh.shhl.....hhhlac.s.....E.pFs.ss.l.S.F....hh..h..P.ss......s..........A.........p..h......a...h.h..h..L...l..hl.p.hhNhl.h..N.hhL.lhh......p....p..+h.Kp...................................pps.o..Los+Yph.EE....lhpS.oK.Fs.l..h..lhF.hHllFF......................................................................................... 0 13 20 68 +4499 PF00530 SRCR Scavenger receptor cysteine-rich domain Bateman A anon Reference [1] Domain These domains are disulphide rich extracellular domains. These domains are found in several extracellular receptors and may be involved in protein-protein interactions. 26.10 26.10 26.10 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -11.18 0.72 -3.66 136 8985 2012-10-03 20:35:02 2003-04-07 12:59:11 13 369 133 13 6194 7713 21 96.30 35 47.20 CHANGED ssGss...spGRVEl.h.........s.G.........pWGsVCss.s.......Wshp..sApVlCRp..LG........h.ssshps.tsssh...........ts.ts.....lh..hss.....lp......CpG.sEssLtpC...........pshst..ps...C.sp.t..ps....suVhCp ..............................................................................sGsst...s..pG.RVEV.h.............s..G............pW.GT............V.CDc..s...........Ws....hp....-........Ap...VVCRQ....LG.................................h..usu.h..ps...tssta............................GtG..sGs.........lh....lcs..................lp........................C.......p.......G.....s.......E.........s..s....L...h.pC..........................t......ps.h.st......ps............C...sH...p......cD......Au.VhC................................................................... 0 3267 3815 4627 +4500 PF00319 SRF-TF transcript_fact; SRF-type transcription factor (DNA-binding and dimerisation domain) Finn RD anon Prosite & Pfam-B_6396 (Release 8.0) Domain \N 20.40 20.40 20.40 20.60 20.20 20.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.43 0.72 -4.73 10 5768 2009-01-15 18:05:59 2003-04-07 12:59:11 13 20 1081 36 1424 5480 5 47.50 55 19.44 CHANGED KpIENpoNRpVTFSKR+sGlhKKAaELSVLCDs-VulIlaSssG+LaEauo .................................+I-NptsRQVTFoK...R.R.sGLhKKAh.EL.S.VL..C.D.A-Vu.L.I..l.F....S..s.s....G..KL..a-aso................ 0 300 819 1128 +4501 PF02118 Srg Srg family chemoreceptor Mian N, Bateman A anon IPR000609 Family \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.76 0.70 -4.96 49 400 2012-10-03 04:04:29 2003-04-07 12:59:11 16 10 7 0 395 556 0 228.30 17 81.72 CHANGED hhlQhsYhlPuhhLhlhhlhhlhhs++...ahpsSFatLaphDhlsslhhhl.shhhsRlhhah..lC.hhs.h.hhssshhhshhhhhhhahhthphloplhlolsRho.sVhaPhpapphWp+hh..lllhlhllPahhlWshll...u.ps.lthh...tGshhhsahctl....assho...hapllahlhslhlslhoshlshh+lpths.p+hcplE+pLshhshhhohsahhhshhphh.....hshhhshhs..hhphhhhhhhhs.Dhhslut.............PhlLllhssplRppl ........................................................h.h.Yh..h.sh..h..lh....h.h...h....h...h....h....l...h...h...t...p..t......................pp..sF.....ah..l........h...hsh..........h.....h...............shhhhh...........shhh.Rh..h............s...h...h....h.....................t........s.......h........h....hp.h.h.h...hhh...a.hhh.....h....p.h.t...hhhshsRho.slh..h........h......p...............h...............p...............p..............h...W..p..............p........h......h.....h......h..h....h..hl..h..hh....s.h.h...h..h.h.phhh.............t.thh.h.......h.......s....s...h.h..h.......h....p.h..........................h..sth..............hh...h...h...h.....h..h.h.h..h.h..h...s...l..h...s...sh..h.......s..h....h............p.......h.........t..........t...............h..............t.......t............p..........h.............p............p..............h.............p........p..p....l...h.......h........h..s.h..h...h.sh....hhh....hhhhhphh................h...hh.t...........thhh.......h..hs...Dh...h....sls..............................................shhh.lhhstpl+t.............................................................................................. 0 119 168 395 +4502 PF02290 SRP14 Signal recognition particle 14kD protein Mian N, Bateman A anon Pfam-B_7955 (release 5.2) Family The signal recognition particle (SRP) is a multimeric protein involved in targeting secretory proteins to the rough endoplasmic reticulum membrane. SRP14 and SRP9 form a complex essential for SRP RNA binding. 21.20 21.20 21.90 21.90 20.70 18.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.37 0.72 -4.07 22 297 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 258 6 206 281 2 95.60 33 71.62 CHANGED Lss-pFLocLocLapcsppcu..SValThK+h..........ssps.ps.spssp......shpp....ssp.tsLlRAosG....................pK.KlSTlVpsc-ltp..FhtsYuslh+upMsuL .....................Lps-pFLscLscLap...pscppu..SValThK+h.......................................................shps..ps.spptp................spstp........ssp....sLlRA.o.sG.......................cK.KlST.............lV........psc-lsp..F.ttYuslhKusMsuL................ 0 61 103 162 +4503 PF01922 SRP19 SRP19 protein Enright A, Ouzounis C, Bateman A anon Enright A Family The signal recognition particle (SRP) binds to the signal peptide of proteins as they are being translated. The binding of the SRP halts translation and the complex is then transported to the endoplasmic reticulum's cytoplasmic surface. The SRP then aids translocation of the protein through the ER membrane. The SRP is a ribonucleoprotein that is composed of a small RNA and several proteins. One of these proteins is the SRP19 protein [1] (Sec65 in yeast [2,3]). 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.09 0.72 -3.41 77 498 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 439 21 335 512 73 93.80 33 54.93 CHANGED hllYPsYlDsp+o+scGR+lspphAVcsPphpEItcAs.cpLslpsh.lE.cKtaP+c.....atppGR....VtVphcp.........................sKpplhptlAphlpph+ ...................slYPsYlssp+ohuE.G.RRlspph.......AVc.sPpspEIt-ss..ptl.....G......lssh...lE...sKtaP+-...............a.tttGR....V+Vpl+p.................................t.h..th.sKcplhhhlAphl.ph........................................................ 1 106 190 274 +4504 PF00660 SRP1_TIP1 Seripauperin and TIP1 family Bateman A anon Prosite Family \N 25.00 25.00 27.00 26.30 22.40 21.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.48 0.72 -4.21 14 386 2009-01-15 18:05:59 2003-04-07 12:59:11 12 3 42 0 177 280 0 99.90 47 50.10 CHANGED hsulusAssps........plsELssllsDl+upLs-Yhuhtts.sosh...shPssllslh.th.uohTDDSaTThhoplsFstlophlTtlPWYSoRLh.....PslsusLussususu .......................hsoLAtuD-cl........sl..l.ELsVaVoDI+AHLupYh.Fpuspso-....ThPs-lA-........As..a..s..h..s.sa.TTh..L..T..u...I....s.s..-QVT+hITuVPWYSoRLc....PAIouALupsGIho................................... 0 34 66 111 +4505 PF05022 SRP40_C SRP40, C-terminal domain Wood V, Bateman A anon Pfam-B_9034 (release 7.6) Domain This presumed domain is found at the C-terminus of the S. cerevisiae SRP40 protein Swiss:P32583 and its homologues. SRP40/nopp40 is a chaperone involved in nucleocytoplasmic transport. SRP40 is also a suppressor of mutant AC40 subunit of RNA polymerase I and III. 20.90 20.90 21.40 24.30 18.90 20.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.69 0.72 -3.48 35 306 2009-09-11 14:16:26 2003-04-07 12:59:11 7 10 256 0 204 294 1 71.00 45 13.93 CHANGED s.FpRlc....hpclph.cpcLtDNoYtuts......ssaGcKAsccLthsRGKsFTKEKNKKKRGSY+G.GpIs.hussShKF .....................FpRlc.....pc.l..p.....hc.scltDNoapupt.......ssaGc.....+AsccLthT+GKuFp+EKsKKKRG.....SYRG..G...sIs..hpsp.ShKF....... 2 74 119 170 +4506 PF04888 SseC Secretion system effector C (SseC) like family Finn RD anon Pfam-B_5525 (release 7.6) Family SseC is a secreted protein that forms a complex together with SecB and SecD on the surface of Salmonella. All these proteins are secreted by the type III secretion system [1]. Many mucosal pathogens use type III secretion systems for the injection of effector proteins into target cells. SecB, SseC and SecD are inserted into the target cell membrane. where they form a small pore or translocon [1,2]. In addition to SseC, this family includes the bacterial secreted proteins PopB, PepB, YopB and EspD which are thought to be directly involved in pore formation, and type III secretion system translocon. 29.00 29.00 29.20 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.14 0.70 -5.09 35 864 2009-09-11 03:00:25 2003-04-07 12:59:11 7 2 470 0 55 430 6 247.80 21 61.88 CHANGED lhhthsplhuchtppphpsphpphpphppppp....pchp-hpcplccttcpu...ccApKs.Gl...huKIhGWlusllollsuAh.......hhlsusulGs.....suAhhlusss..suhlutustp.u..............shsuhluptl.spsLsshsh..tth..hsttlstu..lssslsshu.....sssssssushsspsAuc.husthsttht..hht.hhtthhphhtp..tp.........................hhs..........................ptlphss.ssplssslsp.....................Gusphssushpppuspt.A-hthtpst..hptlpshhcphh-phsp..hhcshpphhpthhphls..spusstsplspps .......................h.hhhhtlhhpsttpphps.ht.hpthpcspp....tphpchppphpct.ctt.....ccs..p+...sh.....hutlhshlhshhsslhush..................sh.hhhhGh............shhhusth......shstsuh.t.h..................................................ht.lthth.......stslt.hhsh..t.h...............hs.tt.lssu...hstslt.ht...................hh.sstt.hhtthAtt.hspt...hs.p.h.p...........................................................................................................................................................t....shphssthtp......................................................uhsthtts.hptph.phhAp.th.phh...p.hpphhc..hcthtp.........p..tphhpth.p.hp....ut.thhhht..t.................................................. 0 17 23 39 +4507 PF00474 SSF Sodium:solute symporter family Finn RD anon Prosite Family This family includes Swiss:P33413 which is not in the Prosite entry. Membership of this family is supported by a significant blast score. 19.80 19.80 19.80 19.80 19.70 19.60 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.29 0.70 -5.83 10 13052 2012-10-03 01:44:59 2003-04-07 12:59:11 12 51 3717 6 4094 10595 8262 353.80 21 74.16 CHANGED YaLAGRShssashGhSlsASshSuupFlGLuGsuhtSGluhuhauhstLlslhllhallushat.....psGslThP-YlptRFtu++.lllaLSuLuLLlhlshthouslluGAtLIcpslGlsYpsAlllluuhTslYTlhGGhhAVsaTDTlQullMlhGsllLhlhshhclG..Ghsshhp+hhsAsPph..sDh..........hs...ssuhp.h+sPlstsShWsuhshGhsGl........PHIltRshuuK.....cu+slhpGhlhuhshhlllhsGhluhhhashclAsssP....cssGsplssuNhAaPpLshcLhPshltGlhLAlhlAAlMSoLsulhluuSohFTcDlYppl++cutssEpclsthutlhsLllsulAllsulpsspts.lhhhlphAauhLuushusVlLLulFWcRsNppGAhhGhIlG ............................................................................................ahhuG+.sh.s..s.h.h...h..uhuhh.us....h...S.uhp.h.....l.G..h.s.u....h.s.......a..h...............G....h..s...h.......h.......h..h....s.....h..s............h....h..h..u.....h.h...l..h...h..h...l...h.s....h.ht.......................p.h..s...h...h....T.h..s.....-....hhp..t...R....a....t........s......p........h..............h.......p..h.............l.......u.....u.l......h.........h....l.........l...........h......h....h........h...h.....h.....s...............s.........t.....h.............h.....u.......s....u..........h..........h......h........p.....................h........h..........G..........l.............s...................h..............................h.......u..............l...............h.............l....h....s....h....l...h............h.....h............Y.s...h..h.....G...........G...hhu...s.........s...hs..D.hl...Q.....s.................h.....l....h...l..h..u........h..h....h........h...h..h....h........s........h.....h........t......h..G..............u...h....s....t....h.......h....p.....t...h..........t....h...t..s...........th..................................................................hp......s..s...........................................h............h......h....s...h.............h........s...h......h....h....G...h.h.s......................p.h.l...Rh.hssc..............sh.p.t.h..h.....p.....u.....h.....h.......h......s.....h.......h.......h.....h.....h.....h......h......h.....h....h............h.....h.....h.....s.....h.....h........s.......h....h.....h.....h.....................................................h..........................t...s....s.....p........h......h...h.........h.h.......h.............t........h........h.........s...............s.......h.........h.....h...G.........l....h...h..u....s.....h....h.....A...A.........h.....h.S..o..hs....u...l...s.s..u..osh...s....pD.......l......a.................t...t.......h.....h...........p...............p.....t...............................t.........s..............p..............p............p......h....h......h........h....u....+....h.......h....s.....l......h..........h....u....h....l....s........h....h....h...u.....h...................t....s.....................l...h.......h...h.............h...h...h.u........h.h.s.........us..h...hsh...h.l.h.u.l..a...a.p....+....h..s...t........GA.hhuhhhG................................................................................................................................................................................................................ 1 1309 2409 3400 +4508 PF04686 SsgA Streptomyces sporulation and cell division protein, SsgA Mifsud W anon Pfam-B_5645 (release 7.5) Family The precise function of SsgA is unknown. It has been found to be essential for spore formation, and to stimulate cell division [1]. 25.00 25.00 26.10 25.70 20.70 19.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.20 0.72 -4.33 27 307 2009-09-11 07:36:54 2003-04-07 12:59:11 7 1 100 3 119 297 0 99.20 41 68.34 CHANGED sls..spLRY-ss.DPaAVplsF.psss..stsV.pWsFuR-LLt-Glp..pPsGpGDVRlhPs.tt..spshlhlpL...suPsG...pAllchssstltsFLcRT.thVPsGpEp .....................lPspL+Y-ss....DP.aAV+hsF..+sss......stsV.pWsFuR-LLscGLp....pPsGpGDVRlhPs..t.......uts.tl.hl.pL......puPsG...pAllcsssssLtsFLcRT.plVP.GpE........ 0 30 93 118 +4509 PF00720 SSI Subtilisin inhibitor-like Bateman A anon Pfam-B_679 (release 2.1) Domain \N 21.80 21.80 22.10 23.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.25 0.72 -4.06 22 154 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 96 4 53 172 1 89.80 34 64.64 CHANGED YAPSALVLTlGpGpuAAoAsspRAVTLoCsPssuGTHPustuACApL+u..ssGDhstL...sspssthCT+pacPVsVTs-GVWpG+RVuaE+TFuNp .................................hsh..s..s..sss..+uss....LsC...s...Ps...uGo.HPsstuACApLcs..s..s....G.c.h.ssl.....ssssthCThp.Y.s.P.Vss.sssGsWpG+tVsappsasN......... 0 20 44 53 +4510 PF04056 Ssl1 Ssl1-like Wood V, Finn RD anon Pfam-B_13499 (release 7.3); Family Ssl1-like proteins are 40kDa subunits of the Transcription factor II H complex. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.20 0.71 -4.71 6 389 2012-10-10 16:07:06 2003-04-07 12:59:11 9 14 298 0 266 650 20 173.80 38 45.11 CHANGED VlDsScuMp-pDh+PsRhshhlKhlptFlcEFFDQNPISQlGlIshKsthAc+lT-LoGNPcsHIcuLcoltp.pspG-sSLQNAL-hAptsLptlPuHsoREVLIlauSLoTsDPGDIapTI-sLK+ppIRsSVIGLSAEltlCKcLsppTs.G...tYuVlLDEsHh+-LLhcpssPPPuspsp..csoLI+MGFP ...........................llDhSpu.M...t.-.pD..h+....P.sRh.th..s.lph..h..........ptFlcEaF.-QNPISQlGlls..h+s..thApp.l.o..........-.............lo.....G......NPp....cHlpuL...........pph.......h.....p...........p..s...p......G....pPS..LQNuLchAh.........t.tL.....h.........p......h......P..u....+..s..o..R.E.............l..Ll.lh..u.u....L..s..osD.P.u...s........I.a..psI.ps..L.hp.ppIRlu..ll....GL...uA...plt...lCpplsppTs...G................pYtVhls-..pH..h+-.L.lhph.ssPP....ss..........t..........t.......tt.................tss........L...lh..M.GFP....................................................................... 1 99 149 221 +4511 PF04386 SspB Stringent starvation protein B Kerrison ND anon COG2969 Family Escherichia coli stringent starvation protein B (SspB), is thought to enhance the specificity of degradation of tmRNA-tagged proteins by the ClpXP protease. The tmRNA tag, also known as ssrA, is an 11-aa peptide added to the C terminus of proteins stalled during translation, targets proteins for degradation by ClpXP and ClpAP. SspB a cytoplasmic protein that specifically binds to residues 1-4 and 7 of the tag. Binding of SspB enhances degradation of tagged proteins by ClpX, and masks sequence elements important for ClpA interactions, inhibiting degradation by ClpA [1]. However, more recent work has cast doubt on the importance of SspB in wild-type cells [2]. SspB is encoded in an operon whose synthesis is stimulated by carbon, amino acid, and phosphate starvation. SspB may play a special role during nutrient stress, for example by ensuring rapid degradation of the products of stalled translation, without causing a global increase in degradation of all ClpXP substrates [3]. 20.00 20.00 20.00 20.20 19.80 19.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.74 0.71 -4.61 137 1666 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1653 33 367 1037 412 152.90 35 95.39 CHANGED hs..sht.hhlRulhchl...........lssshp.aIshcsptsG..VplPpphh.c.sp.hhslhtcthtsLplss-t..hphshpFuGhPcplhlPhsAlhuhas.pssh.GhhF-spss.tt..............................................................ststtspsp...ttts.csshcVVp ...........................................h....spRPYLLRAhY-Wl..............lDNphTPa..llVcssh..PG..VpVPhcas.+DGpI..VLN.l...us+...As.s.sL..clsN-t....lpFsARFGG..lP+plhVPluAVlAIY.ARE..N..Gt.GhhF-....sEsshscsss......................................................t.tt.spp.........ssspsspt...s.tts+PsL+VVp.................................................................................................................................................................................................................................... 2 87 197 277 +4512 PF03531 SSrecog Structure-specific recognition protein (SSRP1) Griffiths-Jones SR, Mistry J anon PRINTS Domain SSRP1 has been implicated in transcriptional initiation and elongation and in DNA replication and repair [1]. This domain belongs to the Pleckstrin homology fold superfamily. 25.00 25.00 25.30 26.10 24.80 21.10 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.46 0.70 -5.02 6 417 2012-10-04 00:02:25 2003-04-07 12:59:11 9 9 289 7 279 395 7 199.50 39 38.01 CHANGED FsGFR-pDlscLtsFhpusauhshsEKpLsVpGWNWGpschtGshLoFslsS+sAFElPLosVSQs.lsGKNEVsLEFHpsDsutl......uLMEhRFHlPso.tpptsuD.......ssphFtcslhuhADV.suot-AlshFc-ItlLTPRGRYDIcla.TFl+L+GKTaDYKI.YoSllRLFLLP+pDpppsFFVlSLDPPIRQGQT+YsaLVhpFspDE-h-l .................................sGFppp-hpplpphh.cp..tap..hpltp.+-hsl+GWNWGpschs............t.....s.......L.....sF.s.......lts.+.ss.FElPhu.plSp..s...h...sG...K.....N.EVslEF....p..............s-s...sts......................................pLhE.hRFalPss.........p.......p...t...ttD...............................s...sp...hFhpplhpKAc.lhp.s.sG-slshF..p-.l...hL..TPRGRYDIcha.s.h+L+GKTaDYKI.apslhRlFlLP+t..Dph..ph....hhVluLDPPl+QGQTRY.aLVh.Fpp-E-h..h....................... 0 104 161 234 +4513 PF04722 Ssu72 Ssu72-like protein Waterfield DI, Finn RD, Mistry J, Wood V anon Pfam-B_5993 (release 7.5) Family The highly conserved and essential protein Ssu72 has intrinsic phosphatase activity and plays an essential role in the transcription cycle. Ssu72 was originally identified in a yeast genetic screen as enhancer of a defect caused by a mutation in the transcription initiation factor TFIIB [1]. It binds to TFIIB and is also involved in mRNA elongation. Ssu72 is further involved in both poly(A) dependent and independent termination. It is a subunit of the yeast cleavage and polyadenylation factor (CPF), which is part of the machinery for mRNA 3'-end formation. Ssu72 is also essential for transcription termination of snRNAs [4][5]. 25.00 25.00 27.00 37.90 22.20 21.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.33 0.71 -5.00 27 359 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 268 27 254 362 2 191.20 49 89.59 CHANGED spL.+hssVCASNpNRSMEuHphLtc..uGa.sVpSaGTGStV+LPGsShD+PNVYpFG.TsYc-IYsDLhupcp.ch...YcpNGLLpMLcRN+clKtuPE+Wpc...................ssct...FDlllTCEERsaDsVl-DLhsRt........sphpcsVHVlNlDI+DspEpAhlGuptIL-Lsphlp....................tsspshE-pl.cllscapcca.phshLaslsaY ...................L+hssVCuSNpNRSMEAHphLpc...........p.Ga.sVpSFGTGotV+LPGPuhccPNVYsFs.ToYcphYsDLhp..KDt..cL....YppNGlLpML-RN++lK.sPERaQc.........................sp-h.....FDlllTCEERsaDtVl-DL.sRt...............pphspPVHVINlDIpDNcE.EAtlGuhhIh-Lsptlp.......................tspsh-scls-lLtcapc+p.phshLaolsaY.................... 0 79 124 185 +4514 PF04184 ST7 ST7 protein Bateman A anon Pfam-B_2088 (release 7.3) Family The ST7 (for suppression of tumorigenicity 7) protein is thought to be a tumour suppressor gene. The molecular function of this protein is uncertain. 19.10 19.10 21.70 19.50 18.80 18.10 hmmbuild -o /dev/null HMM SEED 540 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.86 0.70 -6.27 5 405 2012-10-11 20:00:59 2003-04-07 12:59:11 7 7 139 0 132 319 4 378.60 62 93.97 CHANGED luWSWTYLWslWFAlVlhLlYlLRuPLKLpEsLsuVol..FLNTLTPKFYVALTGTSSLISGLILIFEWWYFRKYGTSFIEQVSVSHLRPLlGGVDNoussuS..Sss-stpNRQNVuECKVWRNPLNLFRGAEYSRYpWVTG+EPLTYYDMNLSAQDHQTFFTCDoDpL..RPuDoIMQKAWRERNPQARIpAAapALElN......................P-CATAYVLLAEEEATTIoEAE+LFKQALKAu-shhR..pupphpupupph-A.+RRDTNVlVYIKRRLAMCARKLGRlREAVKMMRDLMKEFPLLSMLNIHENLLEALLELQAYADVQAVLAKYD........DISLPKSATICYTAALLKARAVSDKF..SPEuAuRRGLSTAEMNAVEAIHRAVEFNPHVPKYLLEMKSLILPPEHILKRGDSEAVAYAFFHLQHWKRlEGALNLLHCTWEGTFRhIPYPLEKGHLFYPYPuCTETADRELLPSFHEVSVYPKKELPFFILFTAGLCSFoAMLALLTHQFPELMGVFAKAslsllhusht..h.chcsahPusIhppLsst ...........................................................................................oh................FLsoLTPKFYVALT.GTSSLISGLIh.I.FEWWYF+KaGTSFIEQVSl.....s.......Hlp...P.hhGG........s-.ss.s..p.s.........s...sp..s.......................t.....................t..s............ps.s.ppsls...ECKVWRNPLNLFRGAEYp..RapWsTG.+.EPLTYYDMNLSAQDHQT.FF.TC-oDth.....R.PuD.slMQ+AWRERNP.ARIpAAapALElN...........................cCAsAYlLLAEEEATTIs-AE+LFKQ..ALKAG-shYR..pSQph..Q...Hpu.staEu...hRRD.TNVLlYIK..R..R..LAMCAR..+LGRh+EAVKhMRDLhKEa.P...hshhN..IHENLlEuLLElQAYADVQAVLAK.Y.D....................................D..ISLPKSAsICYTA.ALL.KsRsV.uDKF..SPEsAS+RGLSoAEhs..AVEAIHRAVEFNPHVP.KYLLEMKuL..I.LP.PEHILKRG.D.SEAlAYAFFHLtHWKRlEGALNLLpCTWEG.T.....FRhlPaPLE+GHLFYPYP.CTEsADRELLPs.FHcVSVYPKKELPhFIhFTAGlCS.TAhlAlL..THQaPE.MGlhA+s...h..h..........................h.............................................................................................................................................. 0 33 46 86 +4515 PF03298 Stanniocalcin Stanniocalcin family Mifsud W anon Pfam-B_4401 (release 6.5) Family \N 20.70 20.70 20.90 20.90 20.60 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.69 0.70 -5.32 16 167 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 69 0 77 133 0 193.90 45 78.10 CHANGED Mlh+suL..LllhlLssuu.......aEss.--s..usR+uRhSsposu-VsRCLsuALpVGCGsFuCLENSTCDTDGMH-ICpoFLaoAAKFDTQGKoFVKESLKChAsGloSKhFhslRRCooFQcMluEVQcECYsKLDLCoVA+pNPpAIsEVlQlPspFPNRaYspLLpSLLsCDEETVssVRsSlhu+lGPshusLFplLQsssssssussu .............................................................sh..llhhshs.st..........Es..pcu..u.p+u.RhuhpsoA-l.+CLssAhp..V.G..CGsF.tChE.N.s.o.C-hcG..h.....a.....-.....IChoFLasAuKFDsQGKuFlK-uL.KChA.p...ulp.pKhh.th..R+CsshpcMlhp...lQcECY.Kh..slCus...A+cNscsIsEhlph.shFs...p.c.YscLlp.LLpCsE-shpslpcSl.tpht.shuuLhplLphspss.tp..s................................. 0 17 25 43 +4516 PF02200 STE STE like transcription factor SMART anon Alignment kindly provided by SMART Family \N 25.00 25.00 47.60 47.60 21.90 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.51 0.72 -3.91 8 177 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 156 0 105 177 0 104.80 74 16.69 CHANGED sQlIRRahLssG-.YlSCVhWNsLaaITGTDIVRsllaRFpsFGRslpspKKFEEGIFSDLRNLKsGsDAoLEpPKSPFL-FLY+NuCIRTQKKQKVFYWFSVPHD+LFsD ....................s.QhIRRFhLPoG-.aVSCVhWNNLaHIoGTDIVRCLsFRFQAFGRPV+NpKKFEEGIFSDLRNLKsGoDAoLEEPKSsFLDFLaKNsCIRTQKKQ...KVFYWaSVPHDRLFLD........ 0 29 61 92 +4517 PF02876 Stap_Strp_tox_C Staphylococcal/Streptococcal toxin, beta-grasp domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family \N 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.47 0.72 -3.88 41 3442 2012-10-01 21:38:54 2003-04-07 12:59:11 12 3 194 211 29 1049 1 99.20 31 42.23 CHANGED +......pls.lslahcspppph....spplpssKcpVTlQELDhKlR+aLh..ccapLY.................sushppGhItaphss..sppasaDLa.h.s......pchLphYpDNKslcScpl.+I-VhLp ............................hh.sttp.t......tp.hphsKcploLKELDaKlRchLl..cpapLY.................pushppGpIplphcs..sspashDL...........sccLphpc.scslcuppltcI-V.l.................... 1 16 24 28 +4518 PF01123 Stap_Strp_toxin Staphylococcal/Streptococcal toxin, OB-fold domain Finn RD, Bateman A, Griffiths-Jones SR anon Prosite Family \N 20.70 20.70 20.80 20.90 20.50 20.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.10 0.72 -3.50 22 1277 2009-01-15 18:05:59 2003-04-07 12:59:11 15 2 190 179 10 523 0 85.50 32 37.19 CHANGED NlpphYtshshsttp..thposcphlppsLlapsps.........pslpsEhsspphsppa.KsKpVDlaGl.YthpCht.t............hstshYGGVT.p .........................NlpphYtp.phpphp..shpsscphLspsLlFps.t.........pslpsEFpspshusca.KsKpVDlaGlsYthpChh.t.............tspChYGGVTh................ 3 1 8 9 +4519 PF04022 Staphylcoagulse Staphylocoagulase repeat Bateman A anon Prosite Repeat \N 19.70 19.70 20.20 26.20 18.90 16.60 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.17 0.72 -4.58 22 1401 2009-01-15 18:05:59 2003-04-07 12:59:11 7 18 155 0 5 881 0 26.90 84 26.32 CHANGED RPTaNKPSETNAYNVTTHAsGpVSYGA ..RPT.NKPSETNAYNVTTHANGQVSYGA.. 0 5 5 5 +4520 PF02821 Staphylokinase Staphylokinase/Streptokinase family Bateman A, Griffiths-Jones SR anon PDB Domain \N 21.70 21.70 21.90 31.20 21.50 21.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.29 0.71 -4.33 11 458 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 171 31 11 340 0 122.40 32 79.68 CHANGED sstppsssshplphtspssDhchp.hlhs....hphtsschloppElttthpphlsp.sp.satlhppc.shhtcssshpt.p.hspcpphshhIss+ptthshsscsuhp.hhs...ptc.l.pchhlhcc ............................uph.sss.hl.hsspssDschs.lLpsphh.hsltsGsoLTppclthhspahLst.sa.-atlhEpDsShhhcssshh+.h.hsp-pphoahIp-+ttshstsscsshp.thN...pp..l.pKhhl.KK................................... 0 1 4 7 +4521 PF01017 STAT_alpha STAT; STAT protein, all-alpha domain Bateman A, Griffiths-Jones SR anon Pfam-B_856 (release 3.0) Family STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. STAT proteins also include an SH2 domain Pfam:PF00017. 27.50 27.50 27.70 27.70 27.40 27.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.10 0.71 -4.56 26 626 2009-01-15 18:05:59 2003-04-07 12:59:11 15 30 117 9 231 569 0 170.90 31 24.35 CHANGED h-+Qppl-s+lppl+spsp.cs-psl+tLcchQ................-pasapap.......olpshtchp...hNs.t.ct.hppcphhlpphhp........pLphpRppllpchppllshlcplppslls-EL.-WK+RQQhACIGGP.s.pssLDQLQsWhTtlAEsLhQlRQQLK+lp-LppplsYssDPlspt+spLppplppLlpsLlpS ....................................................................................................pcp.plppplppl+thsp.ph-pch+tLp.....p..hQ...................-.aphpap..............plp....s......ht.php...............pt....s...p.........hpp.c....t....l.pphhp............tLp.phR...t..pllp.c....hp..p....hlshhcthQ....ptlls-ELhpWKRRQQlAs..GGP..s...ps...sLD..pLQsW...hptLAE.lhQhRQ....Ql++hcc.Lpp.p..h.shp.s.pP..ls.pths.L...ptplsplhpsLlpS............................................ 0 34 51 117 +4522 PF02864 STAT_bind STAT protein, DNA binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_856 (release 3.0) Domain STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. This family represents the DNA binding domain of STAT, which has an ig-like fold. STAT proteins also include an SH2 domain Pfam:PF00017. 20.20 20.20 20.40 21.90 19.50 19.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.80 0.70 -4.78 9 708 2012-10-03 00:25:27 2003-04-07 12:59:11 10 35 125 9 273 620 4 217.30 41 33.61 CHANGED FlVE+QPCMP.pPpRPLVLKTtspFTs+lRLLV+h.ELNhplKscslhD+pss-hts.ch.......R+FNlhGoso.KlhNhEco.ssSLus-FpaLph+E.+stsusRsNpcGshhVTEELHslsFEophshtG..LpIcLcThSLPVVVISNssQhPsAWASILWaNhhosps+N.sFFssPPtusWsQLuEsLSWQFSSpst..RGLs.-QLshLA-KLhspsus.....sstploWscFCKEphss+uFoFWhWl-uIlDLlK+ ................................................................FllE+QP........P.Vl.KT.tspFss.pl..RL........LVth.cLNh..phps..s.h...s.....p..hs-.ts..th..................+p...s...h.h.......s......pso..cl....hN.pps...php....ssphhph...ph+..p...pht....t...htR.s...s....c...Gs........VT..EEhaslhF.pop.hshtu...........L....hhp......l......c......ThSLPV.VVIspss.Q.ssAhAolLW.Nhh.s.p.s..+.........s.FssPsts.WsQlsEsLs..hpFpu...st..........RGLs.-pLshLupKLh....s.ssst...........ssh.loWu.pF...sK..................E.s..........l.s.s..+......sF...oFW.Wh-ull-LlK+............................... 0 45 65 143 +4523 PF02865 STAT_int STAT_prot; STAT protein, protein interaction domain Bateman A, Griffiths-Jones SR anon Pfam-B_856 (release 3.0) Domain STAT proteins (Signal Transducers and Activators of Transcription) are a family of transcription factors that are specifically activated to regulate gene transcription when cells encounter cytokines and growth factors. STAT proteins also include an SH2 domain Pfam:PF00017. 26.00 26.00 29.50 26.40 25.30 25.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.69 0.71 -4.16 22 576 2009-01-15 18:05:59 2003-04-07 12:59:11 12 26 102 3 210 502 0 117.70 39 16.63 CHANGED o.WtplQpLpschL-Q.lppLYsssF.PhElRpaLApWIEs..QsW-ths......s-shAshlhpsLlppLpcphpphspp.sshLhphplpcht.pplpsha.ppsPhplsthlpphLppEp+llppApps ...........utWhplQQ.LpschLcQ.lppLY.s.c.p.F.PhE...lRpaLApWIEs...Qs.W-ths.........cpshAohlhcsLl....ppLppphs+.......hs....tE.sshLlphplt+ht.ppL..Q..spa.pcsPhplsthIpphLhpEp+llppApps...................................... 0 29 43 96 +4524 PF03875 Statherin Statherin Finn RD anon DOMO:DM07003; Family Statherin functions biologically to inhibit the nucleation and growth of calcium phosphate minerals. The N-terminus of statherin is highly charge, the glutamic acids of which have been shown to be important in the recognition hydroxyapatite [1]. 25.00 25.00 29.20 35.40 16.70 16.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.24 0.72 -4.08 2 11 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 8 0 1 8 0 39.40 74 68.08 CHANGED DSSEEKFLRRltRFs.G.YGPYQPhs..PLYPQPYQP...QY ....DSSEE.KFLRRltRas.G.YGPYQPhPEQPLYPQPYQP.YQQY 0 1 1 1 +4525 PF00836 Stathmin Stathmin family Bateman A, Mistry J, Segerman B anon Pfam-B_1551 (release 2.1) Family The Stathmin family of proteins play an important role in the regulation of the microtubule cytoskeleton. They regulate microtubule dynamics by promoting depolymerization of microtubules and/or preventing polymerisation of tubulin heterodimers [1]. 20.40 20.40 20.40 20.60 20.30 19.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -11.03 0.71 -4.55 4 392 2009-01-15 18:05:59 2003-04-07 12:59:11 14 5 92 15 157 297 5 129.70 55 68.99 CHANGED SDhcVKpLpKRASGQAFELILpPPSh-usP-hslosPKKKDhSLEEIQKKLEAAEERRK.pEAElLKQLAEKREHE+EVLQKAIEENNNFSKMAEEKLspKMEs.KENREAplAAhLERLpEKDKHsEEVRKNKEhK-.u .......................................DhpVKplpKRASGQ.uFElILp.PPS...............u...s..........hsho.s.P....+.+.KD..hSL.E..EIQKKLEAAEERRK..op...EAplLKpLAE.KREHE+EV..lpK.A..lEE.NNNFsKhAcE...KLp.KM.......EtsKE.NREAplAAhhERL.pEK-c+h.tEVR+sKE.p.................................. 0 26 39 74 +4526 PF02116 STE2 Fungal pheromone mating factor STE2 GPCR Mian N, Bateman A anon IPR000366 Family \N 26.20 26.20 26.30 26.20 25.50 26.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.94 0.70 -5.37 19 170 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 139 1 88 160 0 273.90 32 65.70 CHANGED asPhpphlsapts....G.sshslshsplsphlppplphuIhaGsplGAs.llhlllLhhlo+sc....+oslFllNpsuLhhshl+usL.htYhhusasul.hhhTu.hphlopsshtspsAuslhpslLlssIEhSLlhQlpVlaps.s.+hhthhLhulSshlulssluhhhssslpshh....slhssssst.shhah.....slssIlhusSIsFhohlLlsKLhhAI+pRRhLGL+QFsuh+ILhIMusQTlllPS...........ILhIlpY..hssh..spLssluhhLVsLSLPLSSlWAu ..............sPhpQslshhts.....G..s.hsl....shstl-shhp.tlphsIsausplGAs.hlhL.llhh.h.hopscc..h..........+s.lFllNhluLhlsllRssLhhhahho.sass....hhhhaoGsa..ph...lstu.shpsSlAusllplllsshlEsSLhhQshlhhps..hs....phh+..hhlsslShllulssluhphssslhpsh..................slhp....s..ss....hs.....hh.Wl..................phshIlhssSIsaaohlhssKLlh.A.lhoR...R.hL..Gh.Kp.F.suhclLlIMusQohllPu............lhslLpa.h.ss....s.phsolshslVllsLPLSSlWAt.................................................. 0 14 42 72 +4527 PF02076 STE3 Pheromone A receptor Mian N, Bateman A anon IPR001499 Family \N 24.90 24.90 25.50 26.70 24.50 24.80 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.92 0.70 -5.32 50 604 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 245 0 180 590 1 238.10 28 69.11 CHANGED sshuhluhlLslhPLsWHh+u+NsushhlhhWhhlssLpphlNullWssss.ts.hhs..saCDIss+lhlGuslulPuuslCIs+pLtpIhsscpsphstspp..p+plhhDlhlslhhPllhMuLpalVQuaRasIhcthGChsshhsohsuhhlhahWslllulluslYusLsLhtFh++RtQFsslLpsspSuLohuRahRLhhLuhl.hlhhhPlshaslh..lphppss....hsa.SWspsHss...a.spI.paP.............hsplhhsRWlssssuhlhFhhFG....hup-........AhphY .............................h..hh....h.hhh.st..s.ush...hhhhWhhlhsl.thlNullW.sss..hs.hhP...sWCDI..s..........s+.lhhus.slG.lssush..CIsRpLt.p.Iss....s.ct.s.t.hotpc+................+Rp..hhhDhhlslGlPllhhsL.......phllQspRasIhcthGChsshh.ohsshhlhhhWs.hlhuhsuslYus...lslhhhhp+..Rtph.pphlts.s....Ss.l..s..h.s.Ra..h....R....Lhhhshh.lhh.hhPhshh.hh..h.............a.sat.hH........h..t...l..hs..................h........h....h.shhhh.hhFu.hs.-s...Y........................................ 0 99 134 164 +4529 PF04885 Stig1 Stigma-specific protein, Stig1 Mifsud W anon Pfam-B_6528 (release 7.6) Family This family represents the Stig1 cysteine rich plant protein. The STIG1 gene is developmentally regulated and expressed specifically in the stigmatic secretory zone [1]. 21.90 21.90 22.00 21.90 21.70 21.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.49 0.71 -12.08 0.71 -3.97 14 196 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 43 0 144 189 4 94.50 32 43.63 CHANGED llhlllslslphhshsssssss............p..tppspssshhssspss..ssh..ut....................pCsp..sstICpsst.....ssphsCCpN.+CVDltosctNCGsCsptCca.up.pCCsG.CVslthDpppCGpCsppCt.Gp.CsaGhCsYA ..............................................................................................hh.........................................................................................................................................sC.....t...........C.hsh........s..pCGhCsp.tC.ph.u.p.pCC..sGt.C...Vs...ltsD.ppCGtCsp..........tCstGphChhGhCs.............. 0 50 108 135 +4530 PF05217 STOP STOP protein Moxon SJ anon Pfam-B_6629 (release 7.7) Family Neurons contain abundant subsets of highly stable microtubules that resist de-polymerising conditions such as exposure to the cold. Stable microtubules are thought to be essential for neuronal development, maintenance, and function. STOP is a major factor responsible for the intriguing stability properties of neuronal microtubules and is important for synaptic plasticity. Additionally knowledge of STOPs function and properties may help in the treatment of neuroleptics in illnesses such as schizophrenia, currently thought to result from synaptic defects [1]. 27.00 27.00 27.20 27.20 26.60 26.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.79 0.70 -4.60 20 445 2009-01-15 18:05:59 2003-04-07 12:59:11 7 11 81 0 305 392 1 156.10 15 61.31 CHANGED ut..ClCplCsCu+p..hCp..ppslplslshphh.......SsYcp-a............sspssptss.hsttchps+thsl....................osYppDFphhsh........stpccsh.tpshshsupouYsosa.shss.ssspht.sp.hsshs..hshsspToY+p-Fp.shpt.....sshsscsp.tpthss.....a.s.o............appsapspssssht..................................thppchshssos.Fpuph..........pphca .........................................................t.....................................................................othtpp............................................................................................................................................osh+.pDatsapht.....t.hp..p.p.p.h.t..sss.s..F.pstTo...a......ppcahs..h...th........h.....t...sh.+s.t......h....s...hPhp.s.o............sa+.pah..s.................................................................................................................................................................................................... 0 134 160 207 +4531 PF03088 Str_synth Strictosidine synthase Griffiths-Jones SR anon Pfam-B_1533 (release 6.5) Family Strictosidine synthase (E.C. 4.3.3.2) is a key enzyme in alkaloid biosynthesis. It catalyses the condensation of tryptamine with secologanin to form strictosidine. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.69 0.72 -3.95 8 707 2012-10-05 17:30:42 2003-04-07 12:59:11 11 11 260 14 391 1067 163 85.90 36 22.65 CHANGED sulsVssp.GVlYFTDuSo+Ysh.+plhhshLpGcssGRLh+aDPoT+sT+VLLccLaFsNGlulSsDpoallhsEsshpphh+Yalpts .............................................sslslsps.GplYFTDo.........S.........s...........c..........aph....c..............p.......a........h....hsh...........h..p...u.......c.s.sG..R.Ll+YD...............s.p..T.pps.pVL.lcs.Lt.FsNGVulSsDpsa.......ll....lsETsttR.lh+YalpG.................... 1 98 227 319 +4532 PF04270 Strep_his_triad strep_his_triad; Streptococcal histidine triad protein TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family All members of this family are proteins from Streptococcal species. The proteins are characterised by having a HxxHxH motif that usually occurs multiple times throughout the protein. 20.90 20.90 21.30 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.58 0.72 -4.24 5 2985 2009-01-15 18:05:59 2003-04-07 12:59:11 8 20 316 3 93 1739 0 48.40 47 18.71 CHANGED YTT-DGYIFsPsDII-DpGDAYlVPHGsHYHYIPKc-LSsSELAAAQAYhupK ..........sDGYlFs.PscIlp-.s....u.....s.ualVPH..G..sHaHaIPKspLSt..E.hthAp................................... 0 21 31 53 +4533 PF02516 STT3 Oligosaccharyl transferase STT3 subunit Bashton M, Bateman A anon Pfam-B_1095 (release 5.4) Family This family consists of the oligosaccharyl transferase STT3 subunit and related proteins. The STT3 subunit is part of the oligosaccharyl transferase (OTase) complex of proteins and is required for its activity [2]. In eukaryotes, OTase transfers a lipid-linked core-oligosaccharide to selected asparagine residues in the ER [2]. In the archaea STT3 occurs alone, rather than in an OTase complex, and is required for N-glycosylation of asparagines [3-4]. 27.90 27.90 27.90 28.10 26.90 27.70 hmmbuild -o /dev/null HMM SEED 483 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.93 0.70 -5.53 15 968 2012-10-03 03:08:05 2003-04-07 12:59:11 9 12 617 11 541 870 126 459.70 26 63.95 CHANGED hltlllhuhhhhssshuc.........lFsstphhshh.-hDsYYpaRlsctllpcGa........hsthsaa.pptaYPhG.hlsassllshL.hshhsllhhhhs......hslpplshhhsPlluulsuIssahLs..+clpsctsGlluAhlluluPu.YlsRoluGha..Ds-hhslhlhhhshhhalcuh+sus............hhassluuLshhlhshuWsGh..........lhhhshhsLhllshLlhu+..........apschhhlh...............shshllssls..hl.hshlGat.........hhthhhlhulhphhshsshstt............hhshhpats.....lhhsu.lsplsslh..ssslhsluhhGhlu.h..................sGhhaslhp..........hschpl.hls.ls.apshuhhuhhhul+Fh.lhshPlsl...hasshhlcshtshhhhhu.....................hhthhhssshsh.lhlslssslshlushths.tlhsphhtpttsth+h.....t..........................................ssssssllhstasashhhhshtsp.....Vshsuhu .....................................................................h...h..h.hhhhh...h....h...............hhs..h..h.h...a..........h..hp....EF.DPa.F..paR.tsphlspp..Ga................athhsaFD.t..............saaP...hG...h..l..h..h..G.....o..h..h....s.s..L......h...h..ssth..lh.h.hhp.................hpl.t.s.ls..l..a....h..ush.huu.lss.l.s.sa...hls.....cch.................t......s........s...........t........u........G....l...lAAhhhul.sP...............u.YlsRSh.uG.a..Ds-sl..s.I...h...h...h.h...h.s..h...a...h.a..l...+.ulcpss.................................................hh.a..u..shs..uLshh...h.h..l.u..W.uGa..........................sallsll.s..L..a.shhh.l...l..h.tR...................................................................hsp..pla...huh.................................shh.hhl...shlh.......s.ht..l..sFlu...Ftshp.........s.pth..u.........s..hu...l..h.sl.l...t..l..h..h....h..hthh.t................................tlshttaps.........................lhhhh.....hhh.shhhh........hhslhhls..h.h..uh..lu..s.h..............................................................................................................suhh.hSLhs...............sasp.h...p.h.shh.s.sls.at...s.s..s.......a.uh..hhs...lp..hh....l....hhhP.s.ul..........hhs..h....h...h.......p...........t....p...l.hl.hhhu........................................................................hhs..hhhsssh.l.h..lh..Ls....L..sP..s.....h...s.h...h.....u..u....hshs.....p...........lhs......p...h.....h.t...t.....t...p.......................................................................................................................................................................................................................................................t.s.......ps.h............lh.s.hhshhh..hhhhtsph....shsp................................................................................................................................................................................................................................................... 0 184 311 447 +4534 PF03481 SUA5 Putative GTP-binding controlling metal-binding Bateman A anon Bateman A Domain Structural investigation of this domain suggests that it might be a GTP-binding region that regulates metal binding and involves hydrolysis of ATP to AMP. It is found to the C-terminus of Pfam:PF01300. 22.90 22.90 23.00 23.20 22.60 22.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -11.20 0.71 -4.04 384 2044 2012-10-03 16:42:30 2003-04-07 12:59:11 8 11 2016 4 615 1629 1172 136.70 28 39.29 CHANGED lohEp..lc..pllGt...........h........t..t......t....sp..tP.........p.APGMhhpHYAPputl........h..ls........p.................th...pt.............................................sptlhhlshst......................................htt...........s................hsh...........usptchppsApsLFssLRphDp...........p......sh.chIhsp........s..hs.p....p...Gl..G.tAIhsRLp+A.Au ............................................lTh-plcpllGp.......................th.h........pc..............sp...tP.c.APGMKYpHYAPc.sslhll..p.....................sh....t........p..............................................................spclullshpph................................................................................htth......t..........hhh........h...........ustschpp.sApsLassLRphDc..............p....sl.chI..hsp........................s.hs....p...s.....sl.G...tAlhNRLpKAAu............................ 0 225 402 517 +4535 PF01300 Sua5_yciO_yrdC Telomere recombination Finn RD, Bateman A anon Prosite Family This domain has been shown to bind preferentially to dsRNA [1]. The domain is found in SUA5 Swiss:P32579 as well as HypF and YrdC Swiss:P45748. It has also been shown to be required for telomere recombniation in yeast. 22.80 22.80 22.80 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.11 0.71 -5.03 163 8425 2009-01-15 18:05:59 2003-04-07 12:59:11 13 39 4856 15 2171 6104 3104 175.00 27 51.50 CHANGED phLcpGtllAhPT-osYuLuscst.sppAlp+lhphKpRs..sK..Plslhss.....slpp..lpphst...th.......spthhp............hhpp....hhPGPlTllhph..................pp.......lsp..hl.............sstsslGlRlPspsltttLhc..................th.t.....s...........lluTSANhSG.c...sss.tsspclhpp.l....sstlD.......hllcss........hh.tshsSTll..c.lh......t.p......l.lRpG ........................................t.hlppGtlluhPT-os.YuL..u.......scst..........s...........p.......pA..........l..........p+lhphKp.R.P..t.........s+.........slhlhss........shpp..l.pp.asp......h..........................s.s.t.h.hc.....................................lh..ps.....h.......h....P.......G...PlTh..lltt.....................................tpp......lsp...hl...................ss.s.h.so....lulR....l.....Psp.P..lshtLlp.........................................................th.u.t...P...................lsuo..SA.N....l.S.G.p............ss.s....p..s....s.p......c...l.hpc...L..........s.s..p..l.c......................h.l.l.cu.......s............ht.......t......t..........h.s........STll..Dhs.......st.st.....l.lRtG......................................................... 0 706 1382 1838 +4536 PF00862 Sucrose_synth Sucrose synthase Bateman A anon Pfam-B_484 (release 3.0) Family Sucrose synthases catalyse the synthesis of sucrose from UDP-glucose and fructose. This family includes the bulk of the sucrose synthase protein. However the carboxyl terminal region of the sucrose synthases belongs to the glycosyl transferase family Pfam:PF00534. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 550 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.61 0.70 -6.54 4 604 2012-10-03 16:42:30 2003-04-07 12:59:11 14 9 221 27 131 815 109 398.90 57 63.47 CHANGED TRVHSlRERls-TLoAHRNElLuLLSRh.upGKGILQPHQLlsEaEsI..tED+tKLcD..GsFu-Vl+SsQEAIVlPPWVALAlRPRPGVWEYVRVNVacLsVEELoVsEYLpFKEELV-GSSsuNFlLELDFEPFNASFPRPTLoKSIGNGVQFLNRHLSuKhFHsK-ShaPLL-FLRlHsYpG+oLMLNDRIQslsuLQusLRKA--YLuoLPsDTPYSEFEH+FQElGhERGWGDsAcRVhE.hHLLLDLLEAPDPsTLETFLG+IPMVFNVVILSPHGYFAQsNVLGYPDTGGQVVYILDQVRALEsEMLhRIKpQGLDIsPRILIVTRLLPDAVGTTCsQRLEKVhGTEHoHILRVPFRTEKGILRKWISRFEVWPYLETasEDVApElAtELQupPDLIIGNYSDGNLVASLLAHKLGVTQCTIAHALEKTKYP-SDIYWKKFEc+YHFSCQFTADLIAMNHsDFIITSTFQEIAGSKDTVGQYESHTAFTLPGLYRVVHGIDVFDPKFNIVSPGADhoIYFPYoEpEKRLTuLHPEIEELLYS ..............................................................................................................................h...h.........................h..hh...pEhhh.s..hhhhhR..p.u..phhplp..th.h-.h...ph.Lth+-thh.t.........p....t....L.El..DFtsFp..hPp.p.sp.IGpGhpals+ahou.hh..t...p...p......h....Lhta.L..hph......p...G........hhls.pp.lps..tLp..lhhA..hl.th..ppsatpht.p........hpthGhE.GWGssAt+s.p.hphL.-llpuP-.s.shEtFhuplPhhF.....plVlhSsHGaF.u..Q..ts..V.L..Gh.PDTGGQ........VVYILDQVRAL.Ep.E.h.l.R...l.....c.......Q..GL.....s...l.....h.P+I....l.....l...l....T.R.L.l.P.-..A..h...GT..s..C..s......Q..+..L..E..cl........G..o...c.......t..s.pIL.RVPF+s.pp.G.h.lcpWISRF-.lWPYLE.p.as...c.............D...........s...........s...........t..........E....l...........h.......t...........Eh...........p......u...........p...........P.D...L..I.lGNYSDGNlVAoLLup+LsVTp.........C.......sIAHAL...EK...T...K...YP.......c....SDI...Y....W+...c....h...--KYHFSCQFTADLhAMNpoDFIITSTaQEIAGSK-oVGQYESHtAFTL..P.G.L.Y.RVV.HGIDVFDPKFNIVSPGAD.slYFPYT-pc.c.RLTshHscIEELLas........................................................... 0 25 79 113 +4537 PF02657 SufE UPF0050; Fe-S metabolism associated domain Bashton M, Bateman A anon COG2166 Family This family consists of the SufE-related proteins. These have been implicated in Fe-S metabolism and export [1]). 20.20 20.20 20.20 20.40 20.10 19.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.31 0.71 -4.49 7 2432 2012-10-01 20:52:23 2003-04-07 12:59:11 10 5 1792 5 531 1474 985 125.00 36 81.81 CHANGED lhppF.cstph-c+Yc.LlphGppLsshscchhtptp..l.GCpSplal.htshpssth..hF.u.o-AhlspGlhullhphhsGpTss-Ilshssh.FFpcLult.pLS.uR.pGhpulhtthpphsh .........................................hpsFt.hssWE-+YchlIpLGcpLP.sLs..-...c...h..+...s...p...t...pp...l...pGCpSp.V..Wl............h....h.....p......p....s.........p....s.......G........p...l........cF..p.....G..D..SD..A.t...IV+.........GLlAlllt...hh........sG.pTsp-lhshc...s....p.sa.Fccl..GL...p.p.p.L...S..PoRopGLpAhlctI+stA.t............................... 1 147 317 440 +4538 PF05076 SUFU Suppressor of fused protein (SUFU) Moxon SJ anon Pfam-B_6089 (release 7.7) Family SUFU, encoding the human orthologue of Drosophila suppressor of fused, appears to have a conserved role in the repression of Hedgehog signaling. SUFU exerts its repressor role by physically interacting with GLI proteins in both the cytoplasm and the nucleus [1]. SUFU has been found to be a tumour-suppressor gene that predisposes individuals to medulloblastoma by modulating the SHH signaling pathway [2]. Genomic contextual analysis of bacterial SUFU versions revealed that they are immunity proteins against diverse nuclease toxins in polymorphic toxin systems [3]. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -11.15 0.71 -4.68 132 954 2009-09-17 14:43:38 2003-04-07 12:59:11 8 30 737 5 252 788 16 163.50 19 53.36 CHANGED ts.ssh.slslhhhts..t.......pshhphlThGhS..............................th.ht..sp........phhthELhhtl.................spp.hashphLt...........slA.phshp....pss...h..l....s.Gphl..................s..psht....tso.....phsuhhlhts......hhtssphsth.............stt.......lpahpllPlppsEhpahp....ppG...scsL.hpth..tpt.....shtlhDhpRps ............................................................................................................s.......lslhth.s.............shhpasThGho.................................th.ht....tt.......t.pththELhhtl...................ptt..hhhp.hLt...........slA..phshp....ptp..h...h..............s.Gphl..........................s..pshs........tss...phsthlhs.s......hhtssp.h.sth...................stt................Vpal.llPlsppEhphhp.......ppG........hps..l..hchh..ppt.....shplhDhpR......................................... 0 80 153 211 +4539 PF04198 Sugar-bind Putative sugar-binding domain Bateman A anon Pfam-B_1085 (release 7.3) Domain This probable domain is found in bacterial transcriptional regulators such as DeoR and SorC. These proteins have an amino-terminal helix-turn-helix Pfam:PF00325 that binds to DNA. This domain is probably the ligand regulator binding region. SorC is regulated by sorbose and other members of this family are likely to be regulated by other sugar substrates. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.22 0.70 -5.48 31 3935 2012-10-04 00:26:15 2003-04-07 12:59:11 8 27 2304 24 585 2201 210 247.90 26 76.87 CHANGED sphssshcLEptLcc+auLccshVV..Psss.....st..sss.hpplutsuuphlpp.hlp.ssp..lluluhGcTltssscthss..hphpslphVshhGGhst...hs.hps...lstphAc+h.supsthh.sPshhsssphtcslhpptslpsllphhcpsDlslhGIGshtp..puthhtpshhspp-hppl.pptuuVG-lhGt.aFDtpGphlp.tshss+slulpl-pL+phsphlulAuGppKspAIhAAL+us.hls.sLlTDEpoAptlLs .....................................................t.hplEppLpc+a...u.LpcshV..l.......s..sps.................sp.....tsh..tpplup.suAphLpp..hlp...ssp..........llul.u.aGp.T.......ltsls..c.p.l....ss.........h.....p.h..p..p..l..p....h.....Vsh........t.......G.......Gh......u......p......tht...hp.ss..p.ls.tphApph..su.p.s.p.hl.sPhh.hsssp.......htpslh.pcpslpplh.phhpp.....uclulhGIGshtp.....puth.h....ps.h.h.s.p.....p-.h.......p.p.l.tppsAVG.-lhuh.FaDtcGphlp...thpp+...slGlsL.pp.Lc..........p..........h.....sph.lulA..uGppK.s.pAIh.............uuL+....us..hls..sLlTDpssAptlL............................. 0 153 343 458 +4540 PF00083 Sugar_tr sugar_tr; Sugar (and other) transporter Sonnhammer ELL anon Prosite hmmls-iteration Family \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.46 0.70 -5.91 44 32448 2012-10-03 03:33:39 2003-04-07 12:59:11 19 163 3323 0 14571 74553 5580 356.20 18 81.17 CHANGED hshhsuhuG.hhFGaDsGhIuuhhthhshhhpas.htpppssth.................................................................p.h.ulhVuhh.lGshlGulhsuhhuctaGR+.hullh.ssllhllGsllpshup..........shhhlhlG........RllhGlulGhhsshsPhaluElA...PpplR.GslsshhQLhl.shGIhluhhhshsh.tthss.........tW+l.lulshl.ulhhhhsh.halPESPRaL.l.psch-.cAcpsLt+...hpt.....t.ss.pltc.hsphpts.t.t......hshtplhpths...hh..lhhuhhlphhQQhoGhNslhYYusslFpslGhsss...hhsolllG.llNhshThlu.l...ahl-+hGRR.......phhLhG.......hsshshshhhhs....sshhhsss........phhuhshlshhhhalshFshuhuPlsallsuEhFP.psRstuhulAsssNW..lhsFlluhhhPhhtssls.......hhshhlFsuhhlhhhhasaahlPETKGholEclspha .................................................................................................................................................................h...........h.......h..h.....s.............h....h......s........h................................................................................................................................................................................................................................................h.....h..h...u..h.....h........h....l..u.....t.....h.....l.....G...u...h.........h........h.......G..........h............h.......u......D........+...........h.........G....R.............+....h................s...........h......h.h....s....h.h...h.....h.........h..........h..u..s....h..h........u..h.ss......................................sh.h.h..l...h..hh............................R...h.l...........G.....h...........u.......h.......G......s.........t...............h......s...................s......s.........s...................h......h.....h........s.........E....h...u.....................s.....t....p.......h....R......G....h......h............s...............s.......h.....................p....h......h.......h.......s.........h..............G....h.........h......h....u..........h........h......h.....s....h......h..h.......h.s.................................................W......R....h.......h....h.........h....s.....h.....l....................s......l.....l....h.....h.......h............h....h.....h......h.....l.....s........E...S.......P...t....a.....h.....h................p.........t.........p........................t........p....u.............t.........h...tt..............................................................................h.............t...........................................................................................h............h..h................................h...h......h....h.....h....h.....h..................h........h.......................h...............h....................................................h..................h.........h.................h.........h..................s....................h.........h........t............h......t........h..t............................hh...hs......h.....h......h............h........h....................h....h......h.....h....h....h...s...h...........................hh....h.....c.........p.........h..........G.......R....+..................................h....h....h.....h.....s..................................h..h...h...h...h....h......h.h...h..h.h.........................h..tt..................................................h.....h.....h.....h...h..h...h....h.......h...h....h....h....h....h........t..................s.............................s............s..........h........................h....h......h.........s.......E....h...........h...s............t.....h.....R.............s.......u...h.....u....h.....s....h......................h.................t..h.................h....h.....s.....h........h.....h.......s.....h.....h..h..............h....h.....t.......h.t.............................................h...h...h...h......h..h.....h...h..............h....h......h....h.....h....h.........h......h....h...h......-.s........t.................................................................................................................................................................................................................................................................................................................... 0 3767 7345 11757 +4541 PF01253 SUI1 Translation initiation factor SUI1 Finn RD, Bateman A anon Prosite Domain \N 25.50 25.50 25.50 25.60 25.10 25.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.91 0.72 -4.18 161 2651 2009-01-15 18:05:59 2003-04-07 12:59:11 17 21 1783 5 1143 2005 478 80.00 33 45.58 CHANGED ttppthl.....+lphp......p+.pts...........KtVThl...pG.ls..h.tth............-lcpluKpLKpchusG.Gol...................t...sp.IplQGDppcp.lhp....hL...cpshtscp ...............................s..tpshl.....+Iphp.............pt.pts...........KsVThl...pG.ls.......h.p-h............-LccLu.ppLKK+huCG.GoV...................+..st.IplQ.GDpRcp.lpp....hLtpc.Ghthp................ 0 338 612 910 +4542 PF03846 SulA Cell division inhibitor SulA TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 30.00 30.00 30.10 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.82 0.71 -4.41 9 642 2009-09-10 23:49:51 2003-04-07 12:59:11 9 1 637 6 70 245 12 111.40 62 68.04 CHANGED hpShass+usptShssppsupsssttsssGLISElVYpEDQPhhTQL.LLPLLQQLGpQSRW.LWLTPQQKLSRpWVQpSGLPLsKVhQlSQlsPhpTV-uM.RALpTGNYSVVlGWLs ....................................a.pStasp+uspasssspphAc.suspsssuGLlSElVY+EDQ.........PMMsQL.LLLPLLQQLG..Q..Q..SRWQLWLTPQQKLSREWVQuSGLPLTKVMQISQLuPpHTVESMlRALRTGNYSVVIGWL............. 0 5 17 43 +4543 PF00916 Sulfate_transp Sulfate transporter family Bateman A anon Pfam-B_223 (release 3.0) Family Mutations in Swiss:P50443 lead to several human diseases. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.58 0.70 -5.40 34 7799 2012-10-03 01:44:59 2003-04-07 12:59:11 15 65 2994 0 2983 6879 2252 253.50 24 45.21 CHANGED hGlhRLGhllcalScsllsGFhuGsAlhIhlpQlcslhGlp....sppsthhslhpulhp.hpp..............hph.shlhuh.hLhhLhhhphls.....h......thhhhsssssLlsllluThhshhh........ptcthslshlGclssG..lsshslsp....hshshltphh.huhshullulhEulhsucshuthpshpl.DuN+EhlAhGhsNlluuhhushsuoGuhuRSslNhp.uGs+TtlSslltulhlllsllhlsslhthlPhulLuulllhsshu.Lhchpphhp..la+ls+hDhll ...................................................................................................................................hGhh+.l...Ghlh.p.a.lstsV.lhGF..hsulul.hI...hh..sQ.......ltt..h.h.....G.ht...........................t......t...h.....h.........t...........h.h.....t....s....l.h.......h..t.................................t.......s...h...h...h....u...h....h..s...l.h..h..l...h..h...h..h.hhs....................................................h..h..h...h...h..h...P...s.s.L...l..s..l.l.h......s...s.h.hshhh........................hhp.h.......s.l...s.....h......h.........G......p......l......s......su..........l.s.......s......h.......t.......h..P..........................h...............s.................h............p.................h............l.........t......t......l.....h..s.....s....u..h.....s........l.A...h....lu..h...l.........E...o...l.h..s............u....p..s....h......s......t.....h.............p......s......t..............p........h...s....s.........N...p...Eh..l.u..........Gl...u.Nlh...u...uh...h.u.u.h.s...ss.uuhuRos..........lNhp.u.G..........u+...o....t................luulhtu...lh.l....l...l....h.l.l..h....h.s....s....l..h..t.h...l....P.....h.us.Luulllhsshs.hh.c.h..p..p.h.h.t..hh.+.h.st.-h....................................................................... 0 855 1655 2449 +4544 PF03856 SUN Beta-glucosidase (SUN family) Finn RD anon DOMO:DM02469; Family Members of this family include Nca3, Sun4 and Sim1. This is a family of yeast proteins, involved in a diverse set of functions (DNA replication, aging, mitochondrial biogenesis and cell septation)[1]. BGLA from Candida wickerhamii has been characterised as a Beta-glucosidase EC:3.2.1.21. 25.00 25.00 82.40 81.70 17.00 22.30 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.11 0.70 -5.08 50 297 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 121 0 204 287 0 247.20 45 59.94 CHANGED tDGshsCupF..Posp.Gllulsalu...GGWoul...hs...............s...sussCpcGsYCSYAC.sGhsKoQW.Po.sQsosGtSl.GGLhC+.sGpLa+oNss.tchLCt.GsG..sspspNpl.upsVAlCRTDYPGoEsMlIPThVsuG.s.psLoVsDtssYYpWpGhtTSAQ........YYVNssGVSsE-GClWGosuss..lGNWAPlshGA...Ghss.GhoaLSlh.N........Ps.spst.sFslKIhusssss.luGsCpY-..sGsasu...........sGusGCTV .....cGshsCupF..Posp..Gsluls.alu...GGWusl...hs.................s...supsCpsGsYCSYAC.PGhtKoQW..PS..sQsosGpSl.GGLaCc.sGpLh+osss..c.LC.tGsG..usp..shNch..up.sVAhCpTDYPGsEsMlIPThVsuG.sotsLsVscpss..YahWpG..TSAQ........YYVNssGVSs-DuClWG........o.s..u.ss..lGNWAPhshGAGhss.....G.tT......alSlh.N........Ps.s.sst.sFsl+Isusssus.lsusCph-.....sGsasu...............sGusGCTV............. 0 29 95 169 +4545 PF03439 Spt5-NGN Supt5; Early transcription elongation factor of RNA pol II, NGN section Bateman A anon Bateman A Family Spt5p and prokaryotic NusG are shown to contain a novel 'NGN' domain. The combined NGN and KOW motif regions of Spt5 form the binding domain with Spt4 [1]. Spt5 complexes with Spt4 as a 1:1 heterodimer snf this Spt5-Spt4 complex regulates early transcription elongation by RNA polymerase II and has an imputed role in pre-mRNA processing via its physical association with mRNA capping enzymes. The Schizosaccharomyces pombe core Spt5-Spt4 complex is a heterodimer bearing a trypsin-resistant Spt4-binding domain within the Spt5 subunit [2]. 20.20 20.10 20.20 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.70 0.72 -4.23 64 521 2012-10-02 20:41:53 2003-04-07 12:59:11 8 26 429 5 355 529 81 83.80 32 11.04 CHANGED slauV+s.psGpE+slshhlhp+.....ppps.......l.pIh..Slhssss....lcGYlalEAppp.sslppslpulhplpsh..........th..lslcEh.chL ..........plWsV+C.phGcE+plshtlhpKh.th.tssp.......l.pIhSlhs.-p.....lKGYIYlEA.+p.scVcpAlcGlsslhhth..............th..VPlcEhsclL............ 0 111 199 297 +4546 PF01975 SurE Survival protein SurE Enright A, Ouzounis C, Bateman A anon Enright A Family E. coli cells with the surE gene disrupted are found to survive poorly in stationary phase [1]. It is suggested that SurE may be involved in stress response. Yeast also contains a member of the family Swiss:P38254. Swiss:P30887 can complement a mutation in acid phosphatase, suggesting that members of this family could be phosphatases. 20.90 20.90 21.20 21.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.54 0.71 -5.01 133 3045 2009-09-11 04:46:26 2003-04-07 12:59:11 12 9 2646 51 1005 2370 1174 189.50 39 68.06 CHANGED M+ILlTNDDGl....pAsGlpsLhcsL.pph...t.-..VhVVAPcpppSusupulTlpcPL+l.pph.............................h......haul..s.GTPsDCVtlulptl.h.tp...............P..DL..VlSGINpGsNlGpD.lhYSGTVuAAhEushhGlPuIAl....Shssp.........................t...............a....phAsphstpl.lpplh.ppsh.s........t....sslLNlNl..Ps......hs.hppl..pGl+lT+hGp+.tatpp .....................................+ILloNDDGl........pAsGlpsLtcsL..cph................s-....VhVVAP-.............p.s.+.SG....s.Sp.u....l.T...L...pp...P....L+.h.pht............................................................................ttta.ul..sGTPsDCVhlulssl..hpt....................PDl..VlSGINtG.sNl..GcD....llYSGT....VuA.AhEGt.h.h.G..lPA.lA..l.Shssp....................................................t.........c....a....csAup....h....s....ppl..lpplh.....pps....l....s..........s...........splLNlNl....Ps.......hs...hppl.......+Gl+lT+hGp+t...pt........................................................ 0 297 627 859 +4547 PF02104 SURF1 SURF1 family Mian N, Bateman A anon IPR002994 Family \N 20.80 20.80 20.90 21.00 20.70 19.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.82 0.70 -4.50 181 1742 2009-09-11 11:18:05 2003-04-07 12:59:11 10 8 1323 0 681 1517 1849 208.90 23 76.92 CHANGED s......lhshslhls.LGhWQlpRhp.Kpsll.splppphp...s.sPl.slsth................t.............................................................pappVpls.Gpahsppphhlt.spsp.......p...................st........sGa....hVlsPhphs.........su.........phlLVsRGal.s..........t.tst............s......................................s..lplsGhl+......sp.h.tt.hh.............ts..s.t..sphhhs.........hDlst....hu.....pthsh..........htPhhlp....................................................................................................s..sh........shhp.........hsspHhsYAlpW..F..............uLAhh ...............................h.hhhhshhht..LGhWQl.pRhp...K....pphh....splp...pp..hp....t..sPl..slsph...................ts....................................................................................................................................paR+Vpls.Gpa.s.s.pp.hhlt..spsh................p.............................sp.....sGa....hVlTPhphs.........sG................phl..LVsRGal.st...p..t.s.ts......t........s..............................................................Gp.lsl..sGhl+..........spst.....t.hh.......................ts.....sss.....tthhht.............hch..s.t....hu.................p.thGh.............lts.h.h.lphstp.............................................................................................t.ss..........lss.......shhs.....................hsspHhuYAlpWauhuh.............................................................................. 0 198 411 560 +4548 PF02077 SURF4 SURF4 family Mian N, Bateman A anon IPR002995 Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.74 0.70 -5.03 6 375 2012-10-02 13:32:46 2003-04-07 12:59:11 10 12 249 0 240 803 568 224.50 40 84.33 CHANGED N-hhu+sEDhAE.shh+ps+sYLPpluRLhLluTFhEDGlRhhhQWs-QhpYhspsWshsaahAslFllVsllu.LhGsshVhhRp+VshAsGlLhhlllLQslAYullhshcFLhRNhullGGLLLllAEohlcp+o..hFAGlPshs-..scsKsYh.LAGRVLLlhMFloLl+F-...hSahpll.sIlGsshhlhVsIGaKTKhuAlhLVlhLhshNlhlNuaWolPpppshRDFlKYDFFQTLSlIGGLLLllshGPGtlShDE+KKcW .......................................................................................tphE-hh-...ph.c.hK.aLPtluRhhllsTFhEDulRhhhQWs-Qh.Ylpth..hp.........h.....shhls..p..hFlhlNllu.lsusshl...lhRp..hsphAshsLhsllhh.QsluY...u.l..l.....a....D..h...pF......hhRN.lul.hG.GLLlllu.-.....Shsct+p......hF...A..G..lPp.ht-.....pp.KtYh.LuGRlLLlh..hFh.....s.....h.lh.....p....................ho.h..h.......p.........l........l.............s.l.....lG.h...h...h.......h....l........hVsl.Ga.K....sKh...uAhhLV........lh.L.hhN.l.h.h.NsaW....s.........h..............cs.h.+D..FhKY.D.FFQ.sl...Sl.lGGLLLlVshGPGtlShDE+..KK.a............................. 0 72 116 186 +4549 PF01617 Surface_Ag_2 Surface antigen Bateman A anon Pfam-B_1042 (release 4.1) Family This family includes a number of bacterial surface antigens expressed on the surface of pathogens. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild --amino -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.63 0.70 -5.17 10 3846 2012-10-03 17:14:37 2003-04-07 12:59:11 12 3 1018 0 167 4193 20 149.70 31 84.65 CHANGED ssuGuFYIuscYsPuhupFssFSscEs.....ps.TpsVFGlKpDhsshshspststs....Fs..sYshpapsN.FhGFuGAIGYuMsGsRlElEsuYEpF-sK.....Np.......................................Gsshcs..............................................................DA+casALo+p..s....t.shsssphlhlcN-ulsshSlMLNuCYDlhpEGlPloPYlCAGVGuDhIS.....hhpshNPKhSYQG..KlGlSYsIoPElSlFsGG+YH+VlG.NcFc-Is.shpsssssssup....sthAtlTlsssaFGsElGsRFsF .......................................................................................................................ss.............................................................................................................................................................................................................................h.u.......t.s...Rh-h-h.hp....h.h.......................................................................................................................................................................................................................................................................................................................................................................................................s..culss.hS...sh.lNshYD...l...h.h..-...s...h.......s..l...oPY.....l..ssGlG.........u..s..hls.........................h.hs..p.....t....t.....h..uatu..........KsGlSYp.l..o..Pclp.lasG......uhY...atshs..tp........a...t........................................................................................................................................................................................................................................................................................................................... 1 26 99 118 +4550 PF00084 Sushi sushi; Sushi domain (SCR repeat) Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.70 17.00 20.70 17.00 20.60 16.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.52 0.72 -3.62 66 23751 2009-01-15 18:05:59 2003-04-07 12:59:11 15 763 241 368 12142 20429 36 57.10 26 31.65 CHANGED Cs....P......h..s..ht.t.......spaphssplpapC.psGaph.tut......sthpC...t....supWsst......s.p..C ...........................................Cs....Ps...............pG....hp..........................ssaphG.s.p.lp..a...p..C..p.....s......G......a...pL..tGs.........................sphpC.........ts..........su..p...Woss........P.t...C.................... 0 3225 3866 7035 +4551 PF04099 Sybindin Sybindin-like family Wood V, Finn RD anon Pfam-B_3240 (release 7.3); Family Sybindin is a physiological syndecan-2 ligand on dendritic spines, the small protrusions on the surface of dendrites that receive the vast majority of excitatory synapses [1]. 21.30 21.30 21.40 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.80 0.71 -4.48 11 676 2012-10-04 00:47:01 2003-04-07 12:59:11 7 13 308 17 471 982 9 142.70 28 83.69 CHANGED lasLYlhN+uGsLla.p-appsh.t.............................................................................thsoNEpllluuhhaSl+uIsuplSP............h.up...........sGlchlEossF+LahhpThTGlKFlllT-sss.sth-sLl+hhY-LYoDaVlKNPFYsl-MPI+sELFcppLcphlcsh .........................................................................................................laslalhs.+t.Gshla...pca..t.t...s...t...............................................................................................................t.h.s....ss-...thhl.huhha.........S..lpuhsppLo.P...............................................................................suh...p....h.c.T.spa...+...L.ah.a..pT...T....Gl..K....Fll.h..o......-.s.........t..............t.......................s...h.....h.....c.....s........L....h....p....h...h.h.plYs-a.....V..l..K..NPh....ap.....h......c.....h....P.....l...p.s.-.hFcpplpthlp.h.................................. 0 156 257 389 +4552 PF02383 Syja_N SacI homology domain Mian N, Bateman A anon Pfam-B_1090 (release 5.2) Family This Pfam family represents a protein domain which shows homology to the yeast protein SacI Swiss:P32368. The SacI homology domain is most notably found at the amino terminal of the inositol 5'-phosphatase synaptojanin. 20.40 20.40 21.30 20.40 20.30 20.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.67 158 1561 2009-01-15 18:05:59 2003-04-07 12:59:11 13 28 323 1 1068 1526 19 302.60 26 34.15 CHANGED saGllGllcl..ps........shaLlllTpp.ppVuplt............c.........sl..a.+lpssphls..lspsthp...................................................................................................................................................................................p....pphhphlcp...........lh....ps...ssFYFS.h..s.............aDLT.............pol.....Q.....ppttttt..........................................................th-.pcFhWNpalhpsl.hphp.........................................tsppalhslIpG.............................aspptphtls.........................................................................t..hplsLIoRRSpcRAGTRahpR.Glc-c..GpVANaVETEQIlh..................................s................................................................s....................................sphhSalQhRGSlPlaWpQ..s.....s..ht........hpPplpls.sh-ssh..tuhscHFppL.hppY....................GslhllNLl...........pp+spE..thLsptapptlphh................................p..tpp...........lp..ahtFDFHpp......s...pt..hchcs .......................................................................................................................................................................................................hGllGhlph...t........................hallllTpp.pplu.p.lh.............c...................l...a.clpssphlslppsthp............................................................................................................................................................................................................ppp.hhphlpp...lh.......ts..ssF..YF....S..h......s...................aDlT.....................pshQ....cphttt......................................................................................................................................................tphc.pp.Fh....WNphl.hp..l..hpht.....................................................................................hppahl.sll...pG.............................ahp...t.p.h.ht.......................................................................................................................................tp.hthsLIoRRSpc...+.........AGsRa.hpRG.ls..cc..................GpV.ANhVETEQllh...............................................................................................................................................................................................p..............sphhS..a...lQ..........hR.GS..lPlaWpQ...ss...hp...............hpPp.lpl.....s......th......-s.sh......tuh.p.......pHFppl..hp.pY...............................u.h.h.llNLl..................pp+...stE....thLppt..apptlp.hh............................................t.ttp............................lpahtaDaHp.s+t.....p.................................................................................................................... 1 394 614 887 +4553 PF02078 Synapsin Synapsin; Synapsin_N; Synapsin, N-terminal domain Mian N, Bateman A, Griffiths-Jones SR anon IPR001359 Domain \N 20.40 20.40 20.70 30.30 20.10 18.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.39 0.72 -3.91 4 232 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 91 20 115 258 0 96.20 58 19.27 CHANGED sstR.s+lLLVID-PHTDWuKYF+GKKlhGDhDI+VEQAEFSELNLsAassGGhhVDMQVlRNGTKVVRSFKPDFVLlRQHAauMA.sEDaRsLlIGhQYuGlPS ...................s....p.+hLLVlD.-.pTDWuKhF+GKKlp.G-aDI+VEQAEFSElNLsAausG.uhsVDM...........pVhR.....NG....o...KVV....RS.F+PDFVLlRQHAauM.u.scDaRsLlIGLQYuGlPS.................... 1 22 30 66 +4554 PF02750 Synapsin_C Synapsin, ATP binding domain Mian N, Bateman A, Griffiths-Jones SR anon IPR001359 Domain Ca dependent ATP binding in this ATP grasp fold. Function unknown. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.35 0.71 -5.22 4 298 2012-10-10 13:17:03 2003-04-07 12:59:11 9 8 99 20 154 323 10 166.60 58 35.89 CHANGED NSLaSlYNFCDKPWVFAQLlpIaKpLGsEcFPLIEQTaYPNHKEMLosPsFPVVVKlGHAHSGMGKVKV-NpHDFQDIASVVAlTKTYATsEPFIDuKYDIRVQKIGsNYKAYMRTSISGNWKsNTGSAMLEQIAMo-RYKLWVDoCSEhFGGLDICAVcAlHGKDGRDYIIEVhDSSMPLIGEHQ-ED+QLIsELVlsKMsQ ............................................NSL.SlYNFpsKPWVFuphlp.l.+pLG...............-pF.PLl-QTa...a...P...Na+........p..M.....l..o.h.sp....F..PVVVKhGHAHuGhGK..........l.KV-Nph.DF...QDI.uSVV.A.....hs.....p..T.......Ys..TsE..PFI.D..u.K.Y.....DlRlQKIGsN.Y..K.A.YM................RTSIS.G.NWKsN...T...G.S.AMLE.Q.lA.Mo.-RY....+l...WV.Ds.C..SEh..FGGLDICAVcAlHuK..DG+..DaI.h.E.....V.....h.....ssoMPLIG-pt.tED+pLIs-LV...ls+Ms......................................... 0 29 39 95 +4555 PF00957 Synaptobrevin synaptobrevin; Synaptobrevin Finn RD, Bateman A anon Pfam-B_303 (release 3.0) Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.66 0.72 -4.38 111 2176 2012-10-03 05:55:03 2003-04-07 12:59:11 16 31 380 40 1410 2035 37 80.20 29 38.98 CHANGED ssc+lpplpsplcclpslMpcNl-.+llcRG-+l-tLsc+o-sLpssuppFcppupcL++phWW+Nh....KhhlllshllllllhlIllhhss ......................t..cpltplpsplc-Vps.......l.MpcNI.-.c...........V.L.cRGE+L-........p....L...s...-.......+o.-sLp.ss.u....p.pFc...ppApcl....p....pph..hhpp.h.......thhhhhhhhhhhhlhhh................................ 0 499 779 1125 +4557 PF01284 MARVEL Synaptophysin; Membrane-associating domain Finn RD, Bateman A, Yeats C anon [1] Domain MARVEL domain-containing proteins are often found in lipid-associating proteins - such as Occludin and MAL family proteins [1]. It may be part of the machinery of membrane apposition events, such as transport vesicle biogenesis. 32.20 32.20 32.20 32.20 32.10 32.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.85 0.71 -4.37 93 2286 2012-10-03 17:26:12 2003-04-07 12:59:11 18 15 277 0 1320 2031 0 146.20 17 64.66 CHANGED hhh..tsllRhhp...hlhu..lllhulsushh.......................................................htt..sthsashhsushshlh.shhhlls.h........hh.thshshlhhsh-hlsslhahsuhsshAsthps..................................................................tt....sttspptpAussFsahshhlahssshh .........................................h......hhl+hhp....hlhu.llsauhh.ushh.................................................................st....s.s.....stp.a..hlhlush....salh...sl..hh...ll...h..hh.........hht.p...h...th.s...h..h..s...h.hh.ss.l..h..shh........a.h.s.ushhh.u.tt.hss................................................................................................t...h.sthtAu.........s.s..Fu..ahshhhahsshh............................................................................................................ 0 214 413 813 +4558 PF01034 Syndecan Syndecan domain Finn RD, Bateman A anon Pfam-B_1182 (release 3.0) Family Syndecans are transmembrane heparin sulfate proteoglycans which are implicated in the binding of extracellular matrix components and growth factors. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.17 0.72 -4.32 8 599 2009-09-13 18:19:46 2003-04-07 12:59:11 15 20 109 8 276 557 2 67.10 41 10.96 CHANGED SpslhcRpEVL....AAVIAGGV.VGlLFAlhLVhFllYRM+KKDEGSYuL-EPK...u....Nuu.YQK.sss+EF ............s...............suhllGhV.suhhhsIhll..LahhY...+h..Rp+DEGSYpl-E...s+.......s....................................................... 0 38 59 144 +4559 PF01387 Synuclein Synuclein Bateman A anon [1] Family There are three types of synucleins in humans, these are called alpha, beta and gamma. Alpha synuclein has been found mutated in families with autosomal dominant Parkinson's disease. A peptide of alpha synuclein has also been found in amyloid plaques in Alzheimer's patients. 21.10 21.10 21.20 23.20 20.90 20.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.69 0.71 -4.24 4 229 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 66 9 81 186 0 111.10 60 94.74 CHANGED MDVFhKGhShAKEGVVAAAEKTKQ...........GVsEAAEKTKEGVhYVGoKTKEGVVQuVsoVAEKTKEQAssVGGAVVouVssVApKTVEGAtNIAAAoGlVK+--hsp.....psPQEtstEsh.p...-P.sEuhEtspppG .............MDVFMKGhShAKEGVVAAAEKTKQGVsEAAtKTKEGVhYVG...........oK..TKE.GVVpu..Vs.o.....VAEKTKEQsstVGsAVVouVssVApKTVEG...AtNIA.AATGlVKK-phsp......th.sppth.t................................................. 1 4 10 31 +4560 PF00837 T4_deiodinase Iodothyronine deiodinase Bateman A anon Pfam-B_1631 (release 2.1) Family Iodothyronine deiodinase converts thyroxine (T4) to 3,5,3'-triiodothyronine (T3). 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.41 0.70 -5.36 4 345 2012-10-03 14:45:55 2003-04-07 12:59:11 12 5 107 0 155 377 21 150.70 28 86.45 CHANGED LhhphLhsLlllPhFlusshhLhLLD.spl++Hllthtp+...................sPshshu-hs.RhhThpuL+sVW+uQhLDhaKps+.GGsAPNopVVplsGpc..................C+ILDFupGpRPLVLNFGSCTUPPFhu+hsAFpRLlpcapssADFLllYIEEAHPSDGWshssss...apI.pHQsLpDRlpAAplLLptA...PsCtVVsDTMsNsSstAYGAhFERLaVlQcG+IhYpGG+GP.uYplpElRsWLE+hp ................................................................................................................................................................................................hhhs.t.h..h....h.....p.h.p.ts...........Gt..APs...s.lh...tt....................................................................p....t..hp.hhlYltEAH.........s.........s.Dt....W..........................h..h......p.ps..p-Rh.hAp..h.....tt........................h..hhDth.ts.....at.....hh....................................................................................................... 0 50 60 90 +4561 PF03903 Phage_T4_gp36 T4_tail_gp36; Phage T4 tail fibre Finn RD anon DOMO:DM03599; Family \N 27.30 27.30 27.80 27.70 27.20 27.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.32 0.70 -4.25 6 60 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 45 0 2 59 0 172.50 31 32.80 CHANGED MADLKlGSTsGGSVIWHQGNFPLsPAGDDlLYKoFKIYoEYNKPQAsDNDFVSKANGGT....YtppVhFpcGlslsss...ssshsGIasGsGDGAoh-ssshclhSWhGIGFcsu.....ptsGsttlhhsspssphssRuslpus...hs.s.sP.ss.cLTRK..DY...VDusINTVTA.........NANSRVLRSGD..TMTGsLTAPNFFSQNPASQPSHVPRFDQIVIKDSVQDFGYY .............................................MADLKhGoThGGs.lWpQGNhsL.PsusplhYKsa+.lYoE.sKPpA.s-..shVS.pusGGs...........h.t.lth...ppul..ph......stt..shhhuttsuss...........ts..sh..h.u..shuhtst...........ts..hhh...s.spss.httc.h...h.st................hthp..D....V.u.hshsst.........Nstph..ptss..shsG.L..sssphh..t................................................................................................................................................................ 0 0 0 0 +4562 PF03906 Phage_T7_tail Tail_fibre_T7; T7_tail_fibre; Phage T7 tail fibre protein Finn RD anon DOMO:DM04804; Family The bacteriophage T7 tail complex consists of a conical tail-tube surrounded by six kinked tail-fibres, which are oligomers of the viral protein gp17. 21.20 21.20 21.20 21.80 21.10 20.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.83 0.71 -4.59 10 136 2009-09-10 23:06:56 2003-04-07 12:59:11 9 13 86 0 4 133 238 162.20 35 27.80 CHANGED MAsT....ohhsYshsGosTsFsIsFE.....YLARpFVsVTLlu.....Dp+hLslNsD.YRFsssTTIohopA.hsPAsGasl.IEIRRhTusTDRLVDFsDGSlLRAhDLNlSQlQoLHlAEEARDhs.......ADoIGVssDGslDA+..GR+IVN.LAsussstDAVshtphp.shssosh .............s...hhsaphDGsspsFslsFt.....YLs+p..VhVol.t......-pp..hsls.s-..Ypas..spsoIp..lspA......PA..s.........G......sp.lcl+RsTs..ssshLl-FscGShL.....puhDLshsphQshalApEut.Dhs..........................ss..sh.u.lssc..s.clDAc......uc+Iss.husshs.stDsssht.hp................................................................................................................... 0 1 2 4 +4563 PF02217 T_Ag_DNA_bind Origin of replication binding protein Bateman A anon Pfam-B_827 (release 5.2) Domain This domain of large T antigen binds to the SV40 origin of DNA replication [1]. 25.00 25.00 34.90 34.50 19.10 18.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.25 0.72 -3.59 11 427 2012-10-02 18:54:06 2003-04-07 12:59:11 11 5 43 20 0 407 0 89.50 71 14.60 CHANGED .DhPssLcuaLSpAlhoN+T.ssFLlaTTpEKsppLYspl......p+asspaphh.tpasssth.LallTss.+HRVSAVpNaCpKhCTVSFlhsKGVpKp ......KDFPsDLHsFLS..QAVFSNRTlAsFAVYTTKEKAQILYKKLM.....EKYSVTFISR..Huh..u....uHNI.....LFFLTPH.RHRVSAINNaCQKLCTFSFLICKGVNKE... 0 0 0 0 +4564 PF05010 TACC Transforming acidic coiled-coil-containing protein (TACC) Moxon SJ anon Pfam-B_4807 (release 7.6) Family This family contains the proteins TACC 1, 2 and 3 the genes for which are found concentrated in the centrosomes of eukaryotic and may play a conserved role in organising centrosomal microtubules. The human TACC proteins have been linked to cancer and TACC2 has been identified as a possible tumour suppressor (AZU-1) [1]. The functional homologue (Alp7) in Schizosaccharomyces pombe has been shown to be required for organisation of bipolar spindles [2]. 28.80 28.80 28.90 30.30 28.50 28.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.50 0.70 -4.56 13 318 2009-12-17 14:40:01 2003-04-07 12:59:11 9 5 92 0 143 281 0 199.10 50 21.90 CHANGED paSQKDhDAslphs+cEh..p-...........cs.-h+pKaEchppcshEMtKIlsEaEcTIsQhlE-.......................sp+p...Kplo+cplQcllpEK-Q......................shuDLNShE+SFS..................................................DLFKRaEKhKEVlEGa+K.......................................................NEEsLK....KCsp-YLsRl+KEE.................QRYQALKsHAEEKLc..................pANcEIAQVRoKApuEssALQAsLRKE.......................................QM+lpSLE+sLEQK...........................sKEh-ELTKICD-LIuKMtK ...........................................................................................h..pp.Dhsuslphh+pE..lhppE....................hEspEh..+cKYE.Ep+pEsh.EMcKIVuEYEK..TIAQ..MI...E..-..........................................................................................................cQ+p.........pphS.p...pslQpLh.hEK-Q..............................................................AhADLN.SlE+Shu..............................................................................DLF+RYE+hKpV...lEGa+K.........................................................NEEsLK......KCAp-YLuRl+.pEE..................QRYQALKhHAE...EKL-..................+ANpEIAQVRsKApsEpsALpAuLRKE.......................................Qh+V..cSLE+sLpQK...........................s+EhEELTKICDELIuKhtK......................................................... 0 36 46 81 +4565 PF02202 Tachykinin Tachykinin family SMART anon Alignment kindly provided by SMART Family \N 20.50 20.50 20.90 20.50 20.10 20.40 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.38 0.74 -5.50 0.74 -3.71 12 43 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 25 3 5 60 0 10.90 62 26.56 CHANGED pPcPspFhGLM +P+PppFaGLM 0 0 0 3 +4566 PF04972 BON TAD; BON domain Yeats C, Bateman A anon Yeats C Domain This domain is found in a family of osmotic shock protection proteins (e.g. Swiss:P27291). It is also found in some Secretins and a group of potential haemolysins. Its likely function is attachment to phospholipid membranes ([1]). 23.60 6.70 23.70 6.70 23.50 6.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.71 0.72 -4.22 187 8163 2009-09-11 21:46:55 2003-04-07 12:59:11 12 45 1945 6 2280 5479 2633 61.90 25 43.04 CHANGED psplpstLttp.....tlsst.s.lpVpsp.sGtVhLpGpVs.stpptptAtplApslpG.VppVh.stlplts ...............................tplpstLhtpt......lpsp..p..lpV..psp..sG..tVhLsGpV...optptpp.Atpl.A..p.s..l.sG...Vp..p..Vt.splph..t...................... 0 572 1203 1719 +4567 PF02969 TAF TATA box binding protein associated factor (TAF) Griffiths-Jones SR anon Structural domain Domain TAF proteins adopt a histone-like fold. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.83 0.72 -4.03 10 386 2012-10-10 12:36:46 2003-04-07 12:59:11 12 9 260 1 262 923 23 63.60 41 12.60 CHANGED sollPpEShKVlAESlGIusLs-EsuphLA.DVpYRl+EIsQ-AlKFM+HuKRp+LTssDlDpA..LR ........................................hhst-ol+slAESlGl..s.s.Ls--ss.ph..LApDVpYRl+clh..p-AlKFM+HuKR....p....p.....LTspDlspAL+.................... 0 93 143 209 +4568 PF04658 TAFII55_N TAFII55 protein conserved region Waterfield DI, Finn RD anon Pfam-B_4395 (release 7.5) Family The general transcription factor, TFIID, consists of the TATA-binding protein (TBP) associated with a series of TBP-associated factors (TAFs) that together participate in the assembly of the transcription preinitiation complex. TAFII55 binds to TAFII250 and inhibits it acetyltransferase activity. The exact role of TAFII55 is currently unknown. The conserved region is situated towards the N-terminus of the protein [1]. 25.00 25.00 30.10 27.40 23.40 22.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.18 0.71 -4.74 35 402 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 286 0 275 378 1 157.60 35 40.36 CHANGED lEpQhILRh.P......pss-hl+pulpssshs...........slsh+....p-tR+AsVplssphau..ApLVDLPsIlEuhKThD+.KshaKoADIsQMLlshp................lps-p.shpht.........................................ph.tp.tpcpapa...HGlTPPh+slR+RRFR.+chsc...............pthpplEccVccLLct.............DpcAp......s ......................................................lEpQFILRhss...................p.sphl+phlppsphs.p.................hslphc.........tDsR+uhlpls..s..........t...hs..ApLVDLPsllEuh.K.ThD+.KshaKoADIsQ....MLlstts......................cp.shp.s..................................................tp.htpcpa.as...HGlTPPh+slRKR.RFR.Kphpp........................hph.clE....c-VccLLptDtpA..s................................................ 0 89 145 216 +4569 PF05069 Phage_tail_S tail_comp_S; Phage virion morphogenesis family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family Protein S of phage P2 is thought to be involved in tail completion and stable head joining. 24.30 24.30 24.50 24.40 24.20 24.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.09 0.71 -4.44 46 1328 2012-10-01 19:49:39 2003-04-07 12:59:11 8 5 800 0 174 1065 22 130.20 23 91.49 CHANGED s-hptlpphLstLhtp...sss+ptlh+pluppL+cupppRhptQpsP.DGssatshp.......htt+ps+h+ct..hhpph....phuchlpspsssssss....hGssphhAtlHQaGh....................+s.....t.....plp...............hPsRshL......GloppDcphItchlhpaLs .............................................h...tthpphhptlhtt......tthpt..hhpplup.p.lcp.s.p.ppph.pt...pp.s.P.cG..p..satshp.....................h.+..t..p.....t.t..........p..h...pp....t.....h.............htch............ph.tp..lp..h.p.s....ss.s..ts............G...p.s....t.....h..Atl....HpaGh....................................................................pt...............hp........................................................................hstR.hL.........Ghs.tt.s.......p.l.phl.....h............................................................................................................ 0 44 110 145 +4570 PF02203 TarH Tar ligand binding domain homologue SMART anon Alignment kindly provided by SMART Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.68 0.71 -4.42 88 3556 2012-10-02 01:04:29 2003-04-07 12:59:11 10 31 949 15 528 4206 102 164.70 20 30.56 CHANGED hhp+lsIpspLhhlluhlslLhlssusLuhhuhppuspslpphhpsphtppttlspu...hhpsRhsLsR...hhhhsts...t.....sphlspA.cptlspuppsaptahshsp..ss.t-pp.hssplpppapp.hppulpshhshlpuu.shsthhphsspphpshapshtpshtp....pssps ........................hppl..+lhstLhh...lLuhh.sl..L....l..s.o..uul.shhu...lp...p...s...p...p...sh......sh.pp.....t.p.p...p.s.t..Lsp.s...h...s..........h..h....p....s.R..h...s..Ls+s...............hthhh.sp.....t........................tphlss....A....p....pp....L.p....p...u....p.p.t...a....p....p....a....t........s....hsh.......ts....tst...s...hsp...plpp...p....ap.t....h...ps...ulpt....h....h..p..h.hps....u...p....hs...s..a..hs...t.s..s..p...t..hp..shhpthhtsh.t............h.............................................................................................................................. 0 59 179 344 +4571 PF00539 Tat Transactivating regulatory protein (Tat) Bateman A anon SCOP Family The retroviral Tat protein binds to the Tar RNA [4]. This activates transcriptional initiation and elongation from the LTR promoter. Binding is mediated by an arginine rich region. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.89 0.72 -4.35 83 7178 2009-09-12 06:54:57 2003-04-07 12:59:11 13 3 90 16 0 5391 0 64.90 66 73.19 CHANGED M.....-slDPplEPWp+PGSQPpTsC.NpCYCK+CCaHCQ.....lCFlpKGLGISYGRKKRt.R.R+sspsspsH .................M......EPVDP..pLEPWpH..PGSQ.PcTAC.ssCYCKKCCaHCQ.....VCFlpKGLGISYGRKKRR..QRRRs.Pp.supsH............................... 0 0 0 0 +4572 PF01026 TatD_DNase UPF0006; TatD related DNase Bateman A anon Pfam-B_1370 (release 3.0) Family This family of proteins are related to a large superfamily of metalloenzymes [1]. TatD, a member of this family has been shown experimentally to be a DNase enzyme. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.93 0.70 -5.18 98 8430 2012-10-03 00:45:34 2003-04-07 12:59:11 16 26 4852 19 2301 6563 3019 252.30 29 93.02 CHANGED lDsHsHLsh..th.................tp............hsphl.pc.....sppsslptl.l............................ssussh................t.php..pshplucp..ast.lasslGlHP...tpsp....................................tpchh..........ppl..pph...h.pcscllAIGEhGL.....Da..............hps.sst...........ctQpc.lFcpplplAcch.shPlllHsRc......Apc.........-hlcll........................cp.t.hsth...tslhHs............................................aoGshchspphl.ch.....GhalSluuhlsa.p..upp.....hpcll..pplPh-+lLlETDuPalsP.................................sh+........G.ppNcPshlhpssctlAc.l+s.............hsh--ltphsppNspclF.s ..........................................................................................................................................hDoHsHLs.....ta....................t.t.-................................hspll....tc...A.t.p.s.GV...p....t..h..l.....................................................s..s.u.ssh......................................................p..shp......psh.p.L.upp...............as..t..........l.....a..s.sl.G.l.....H.....P..........hp.spp............................................................................................h..pptsh............ptl..pph...........t....pps.....c....l..V..A..lGE..h...GL.Da....................hhpt.sst..............................................phQpc....sF..ppQl.....p..l.....A.....p.c......h.....s.......h....P.....ll..lHsRc........................App..............................................-.h.h.plL...........................................................................................cctt...stt..............sGlh.H..s...............................................F..oG..s....h.c.h...A.pphl..ch..................................Gha..lu..hu..G...h..lT..acp......upc.................................l+-sh.........ptl.P..l..-.....+l...LlET....D.u....PaLsP..........................................................................................hPh+......................G...cpNcPsh.....l.ht....l.s...chl....Ap..l+s......................h.s....h.-...cluph.oppNstplFt..................................................................................................................................................................................... 0 745 1397 1904 +4573 PF03430 TATR Trans-activating transcriptional regulator Finn RD anon Pfam-B_4420 (release 6.6) Family This family of trans-activating transcriptional regulator (TATR), also known as intermediate early protein 1, are common to the Nucleopolyhedroviruses. 20.40 20.40 25.10 22.50 20.10 19.60 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.95 0.70 -5.88 5 60 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 43 0 1 63 0 525.40 31 92.51 CHANGED pMpslppS...YsusSTPs+ssFspu.pE.sshp.p...ph.phsT-hsschshssh-suh.cos.ss...shos.u.hps.hcpscp.spAssp-ascEpstpshsEtsouhosp........t.hsEohscottsssspR+uS..............ElDSDsDsu-SScG...cKhssKPKhRp+YKKuTIQsssoLoccppasTpICTVAssspIs+YFtpD......................................FusaLpch+o-sshsuNRFSDYISETGYYMFVVKKuEc...KPFE..VlFAKaVsNlspEYTNNYYMVDNRVFVVSlN+lRFMISYKLV+EpGI-IPPSpslCsDApAER.....sshKCYFs-VKc.sFpssLINaFNLDMaYuQTTFVTLMQSlGEsKosMLLNKLYcMaQD+oLFTLPIMLSRKEPslE-s...........spsssasSsYVuQIlKYSKsVpFPpssPsptVhDcL..................slIVTQKSoLTYKYSSVANLLFscYt.p....pDNNA-uLKKVKKE.DGsttLVEQYLotNpN.DcTSHNFIVLsFK..NDERLTIAKKGhEFaWIoGEIKDIsVsDLIcKYs.RasHHVF+IsNVNRRESTThHNNLLKLLuLlLQNLlcL-DlpcaAspshsCpYc .................................................................................................................................................................................................t.........t...........t..t......tp.h....h..stp...p..p.............sps......t.pt.tppso..................................................................c.spp..csspusp......pp.h.+s...Kh...c....phcKtthps.p.tphp.....pp.p.s.p..l.sp.lts.......tph...s..p..hh.c...........................................................htshhtph.....s...spt............pspR.Fssah.psuYYMFlVp.cscs.....c.sFc.........lhasphVpsVs.EYsspYhhlDphVhVVohs+hRFMISYpLlpchtIcIP.ppphspc...thtpp.....ss.p..CaFp-VKs.tFhshLhshFpLDhhYsQsphshLhpSlGEpKsthlhpplhpMhpD+sLFTLPl.loRKEs..pps................................sp..sSsYVppIlchScs...lpF.p....s....s.....sph..hhsp......l...................................s...hhp..pp.s.hTYKYuSVAplLasp.............ppps.sspLhKlKKE.sGshtLlEpYLststs.s.puaNFIllshK.....sDERlTIlKps.-FhWIsu.I.KD...I.ssDlIpKYp.pasHHlFslspsNR+E.sshHNshlKLLuhhhpsllsls-hhphAppphsCpa............... 1 1 1 1 +4574 PF02668 TauD Taurine catabolism dioxygenase TauD, TfdA family Bashton M, Bateman A, Mifsud W anon COG2175 Family This family consists of taurine catabolism dioxygenases of the TauD, TfdA family. TauD from E. coli Swiss:P37610 is a alpha-ketoglutarate-dependent taurine dioxygenase [1]. This enzyme catalyses the oxygenolytic release of sulfite from taurine [1]. TfdA from Burkholderia sp. Swiss:Q45423 is a 2,4-dichlorophenoxyacetic acid/alpha-ketoglutarate dioxygenase [2].\ TfdA from Alcaligenes eutrophus JMP134 Swiss:P10088 is a 2,4-dichlorophenoxyacetate monooxygenase [3]. Also included are gamma-Butyrobetaine hydroxylase enzymes EC:1.14.11.1 [4]. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -12.03 0.70 -4.72 123 5659 2012-10-10 13:59:34 2003-04-07 12:59:11 11 59 1664 78 2098 5388 5861 240.80 19 77.33 CHANGED ph..psls.......................hs.t.........................phtpltptl.tppGlllh+...shs.......h.......sspph.ht..................hupth..G....l....h........................................................tt........pssshhsts........................hsaHsD...s.ah........tssth....thLhsh..psss...............pG...Gp..Thhss..shtsappLs.........sphhptl..p.slphhpshttthh.....................................................ttpP.....l..lphc.............s..sucpshasss.........h......th........................................thscups.....hh.....ctlhp..hh.p.....ps..chphpapap.t.....GDlllaDNtpshHuRs.sa........................stpRplh+sh ........................................................................................................................t...................................................httl.hth..l....ph.....thl.hh+.......s.................h.........s.tt.....t..........................hspth........G...............h...............................................................................................................p.t....t..................................................h.HsDhs...a.................tssth...........thltsh..phst.....................................tG.......Gp.......T.h.a...s..s..hhtAa..ct..Ls..........sthp.phl.........p....slps.h+.s.httsht...............t......................................................................t....hhpP.....l......Vc.pH.............................P....o....G....c..p..s....l..ahst.......hspph...........sh.........................................................................s..s.-.u.pt......ll...............p.Lhp....th..p........ps.......chth.p.a.c.Wp..s....................GD...l.....hhhD.......N.t..ts.Hht...s..........................t..Rhhhth................................................................... 0 584 1221 1766 +4575 PF01361 Tautomerase Tautomerase enzyme Finn RD anon Prosite Domain This family includes the enzyme 4-oxalocrotonate tautomerase Swiss:Q01468 that catalyses the ketonisation of 2-hydroxymuconate to 2-oxo-3-hexenedioate. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.42 0.72 -4.36 28 2760 2012-10-01 20:38:22 2003-04-07 12:59:11 16 3 2116 139 636 2083 293 59.20 29 84.64 CHANGED Phlplclh...cGp....osEQKcpLlcclT-shscshGt.spssltVllcEhspssauluGcshspt .....................Phlplclh........cGc........opEQ.KppLspclT-s..lsc.....s....h.ss...s.....p.p....s..l.p....VlIpEhppssauhuGp.h...t......................... 0 159 365 504 +4576 PF02959 Tax HTLV_tat; HTLV Tax Bateman A, Jeang K anon Pfam-B_1456 (release 6.4) Family Human T-cell leukaemia virus type I (HTLV-I) is the etiological agent for adult T-cell leukaemia (ATL), as well as for tropical spastic paraparesis (TSP) and HTLV-I associate myelopathy (HAM). A biological understanding of the involvement of HTLV-I and in ATL has focused significantly on the workings of the virally-encoded 40 kDa phospho-oncoprotein, Tax. Tax is a transcriptional activator. Its ability to modulate the expression and function of many cellular genes has been reasoned to be a major contributory mechanism explaining HTLV-I-mediated transformation of cells. In activating cellular gene expression, Tax impinges upon several cellular signal-transduction pathways, including those for CREB/ATF and NF-kappaB [1]. 20.60 20.60 22.60 22.20 19.10 18.50 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.57 0.70 -4.89 4 661 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 18 5 0 385 0 177.20 81 65.43 CHANGED HFPGFGQSLLFGYPVYVFGDCVQGDWCPISGGLCSARLHRHALLATCPEHQlTWDPIDGRVlu.....................................................phEPTLGp+LPoLuFP-PGLRPQNlYThWGtoVVChYLaQLSPPlTWPLlPHVIFCHPtQLGAFLTpVPhKRlEELLYKluLTTGslIlLPEDsLPTThFQPuRAP ...............................................SLLaGYPVYVFGDCVQGD.WCPISGGLCSARLHRHALLATCPEHQITWDPIDGRVIGSALQaLIPRLPSFPTQRTSKTLKVLTPPhTHTTPNIPPSFLQAMRK.Y.SPFRN...GYMEPTLGQHLPTLSFPDPGLRPQNLYTlWGuSVVChYLYQLSPPlTWPLlPHVIFCHPtQLGAFLTNVPhKRlEELLYKIuLTTGAlIILPEDCLPTTLFQPsRAP.............................................................................................. 0 0 0 0 +4577 PF00683 TB TGF-bp; TB domain Bateman A anon Pfam-B_82 (release 2.1) Family This domain is also known as the 8 cysteine domain. This family includes the hybrid domains [1]. This cysteine rich repeat is found in TGF binding protein and fibrillin. 20.90 20.90 21.00 21.00 18.70 20.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.77 0.72 -4.39 34 2359 2009-01-15 18:05:59 2003-04-07 12:59:11 12 321 62 9 1128 1987 2 42.80 35 12.21 CHANGED spCptslsst..sTKsp.CCCshGt....AWGss.CE..hCPhps.ospappl ...........tC.s.tsl.sst..sTKpp.CCC..o..h.Gp.........uW...G....ss...C..E....hCPhts..ospaptl............... 1 138 208 540 +4578 PF00566 RabGAP-TBC TBC; Rab-GTPase-TBC domain SMART anon Alignment kindly provided by SMART Family Identification of a TBC domain in GYP6_YEAST and GYP7_YEAST, which are GTPase activator proteins of yeast Ypt6 and Ypt7, implies that these domains are GTPase activator proteins of Rab-like small GTPases. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.90 0.70 -4.86 101 7104 2009-01-15 18:05:59 2003-04-07 12:59:11 13 151 344 13 4605 6792 78 197.50 19 30.10 CHANGED pphRtpl......W.hlhs.................................t....t..pttt.t....ppIc.hDltRThs...................ppt.ttpp.pLpplLpuauhhss...p.lG.......Ys..QGhshlsuhlL.......................hhh.s-pp....................uFhshsplh............thhhpshatpsh......t..lpthhh...hhcpllp.phhPc...lhp+.l.pc........slp..h.ashpW....hlsl.Fs.pshsh.phshR....lWD.hhh....pu......ph.lhp.hslu.ll.phh...cp.pll .....................................................................................................................................................................................................hR..hW..h.t......................................................................................................................................................p.......pt........I...p....hDl....R.oh...............................p.........h.ttt...t.ptL.....h.......p..l.......L....h.......s....a..u.h....h...ps..............p..lG.............................................Y...s....Q........G..........h..s.............lsuslL.....................................................................................hhh....sEtp........................................uFhhhs.tlh..................hhhp..s..ha.p.s.h.............................s...........hptpht................hhp.p.l..l..p...ph.........Pp.....Lhp+..l...pp......................................h.sl..p....t..h....a.s.hpW.............hh.sl.F..........t..p.........p...........h..s.......h....p.............sh+................lWD.hhh.....pu...................................ph..lhh....lslu..ll...h.pt........................................................................................ 1 1678 2429 3584 +4579 PF02970 TBCA Tubulin binding cofactor A Griffiths-Jones SR anon Structural domain Domain \N 23.20 23.20 23.40 23.20 22.90 23.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.78 0.72 -3.91 38 366 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 285 4 243 354 0 91.30 34 79.30 CHANGED QLcIKsusl+RLlKEcthYccElppQpp+lc+h+sc....st.-cYslKpQ..cpVLpEoptMlPchpp+lppshpcLpph..lpstct.....hc-hsptp......cA ..............pLcIKTusl+RLhKEcthYc+EhcpQcp+l.......c+h+s-...........ss.-pY....slKpQ..........pclLpEochMlPcspcRlptAhtcLpph...Lpptpp......hc-hpphh................................ 0 76 124 191 +4580 PF03558 TBSV_P22 TBSV core protein P21/P22 Finn RD anon Pfam-B_3028 (release 7.0) Family This protein is required for cell-to-cell movement in plants. Furthermore, the membrane-associated protein is dispensable for both replication and transcription [1]. 20.60 20.60 20.60 24.80 17.30 20.50 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.14 0.71 -4.88 2 32 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 24 0 0 31 0 185.20 71 95.35 CHANGED MDsEYEQVs+PWNELYKEATLGNKLsVNVG.EDsElPLLPSNaLsKsRluhSGGYIThRhlRI+IlPLVSRpuGVSG+LaLRDIoDTTG+KLHsTELLDLGKEIRLohpHLDFSVSsRSsVPIVFGFE-LVSPaLEGRELFSVshRWQhGLSAQsYSLP.s.WKVhYQE-sL+thhP..KKAsKTsSs. .............MDTEYEQVNKPWNELYKEsTLGNKLhVNVGMEDtEVPLLPSNFLTKVRVuLSGGYIThRRlRIKIIPLVSRKAGVSGKLYLRDISDTT.GRKLHCTEpLDLG+EIRLTMQHLDFSVSsRSDVPIVFGFEELVSPFLEGRELFSlSlRWQFGLSpsCYSLPpuKWKVMYQEDALKsLKPSK.KKASKTDSS.V................................... 1 0 0 0 +4581 PF01840 TCL1_MTCP1 TCL1/MTCP1 family Bateman A anon [1] & Pfam-B_7391 (Release 8.0) Family Two related oncogenes, TCL-1 Swiss:P56279 and MTCP-1 Swiss:P56278, are overexpressed in T cell prolymphocytic leukaemias as a result of chromosomal rearrangements that involve the translocation of one T cell receptor gene to either chromosome 14q32 or Xq28 [1]. This family contains two repeated motifs that form a single globular domain [1]. 25.00 25.00 43.40 40.90 16.80 14.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.81 0.71 -4.61 8 104 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 31 6 50 77 0 104.10 40 95.40 CHANGED MA.ssshpsphPhPPshLhshccsIYEDEapRsWlslsVEoocp.......s.ss+hcsplTVHLpphsslhpEshsss.lssspLPtMWpL.scspYpusDuoaWRLlcHuQhsssEpLlLcLlss .........................t.phss.Ps+LWlhptslY.DEhpRo.Wlslshc.ssthp.................................V+lpQhpV.hG-.shpPoplssS.LPlMWQLY.Ptc+YpusDSphWcIhaHlp..l.pusp-hlLchlsc........ 0 3 3 8 +4582 PF03634 TCP TCP family transcription factor Bateman A anon Pfam-B_1979 (release 7.0) Family This is a family of TCP plant transcription factors. TCP proteins were named after the first characterised members (TB1, CYC and PCFs) and they are involved in multiple developmental control pathways [1][2][3]. This region contains a DNA binding basic-Helix-Loop-Helix (bHLP) structure [1][3]. 19.90 17.00 20.20 20.20 19.60 16.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.60 0.71 -3.80 113 2128 2009-09-15 15:36:31 2003-04-07 12:59:11 8 3 670 0 282 2132 1 128.50 28 54.94 CHANGED sss+KDRHSKlpTutGsRDRRlRLSltsAt+hFsLQ-hLGaDpsScTl-WLlppucsAIpclsss................................sssss.............s..........t.....t....................................................tt.ttth.t.ttspppss..hshsc...ps+....................scuRs+.....u+c.coppchphptthss .........................................ppch.Tt.s.RsRR...hRLshthAtpFFsLQ-hLGaD+sS+Tl-WLlspSKsAIc-Lspp.........................................................p.sss..............s.s..................p...t.p..........................................................................................p...t............t..................p.....tp...............................t..........................tt................................................................................................................................................................................................................................................... 0 30 170 231 +4583 PF03645 Tctex-1 Tctex-1 family Bateman A anon Pfam-B_2986 (release 7.0) Family Tctex-1 is a dynein light chain. It has been shown that Tctex-1 can bind to the cytoplasmic tail of rhodopsin. C-terminal rhodopsin mutations responsible for retinitis pigmentosa inhibit this interaction. 20.50 20.50 21.00 20.70 20.20 19.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -4.18 104 888 2009-01-15 18:05:59 2003-04-07 12:59:11 8 19 260 9 593 840 7 100.80 26 53.99 CHANGED ss.......clppllp.cslpptLts.t.......pY.pt.scspphsppls-plhppl.pp......h.................ppYKalVpshIhp.............................................ppspGl+suopshWDs.....ssDshsohpa..........pNcslaslssVaulhh .....................................s.tlppllcpslpptLts.t.......tY....pt..pp...s....sphsppls......-plhppl.pch.............................................ctYKal.......V..pss.Ihp.........................................................ps.u.t.G.lcsu....opshW.....Ds.....ps..Dshsohpa..........cNpo..h..aslssVaulh........................................... 0 246 318 461 +4584 PF00838 TCTP Translationally controlled tumour protein Bateman A anon Pfam-B_1548 (release 2.1) Domain \N 20.80 20.80 21.20 21.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.05 0.71 -4.25 9 650 2012-10-01 21:11:27 2003-04-07 12:59:11 12 9 399 15 320 645 3 149.40 39 92.31 CHANGED MllYKDlhosDELhSDSash.cllssllaEs-G+hVspp..us-ph.IGANPSAEGu-....EGs--sscpslDlVhsaRLpEp..uFDKKsahsYlKsYMKslps+Lpcpp.Ec..l.hFcKplpsalKplLup..FK-hpFFlGESMss...DG.VslhpYR..EsGtsPahhaaKcGLhE ..........................................MllYpDll..o..s...DEhhSDs.a......ph......c......l.h.s......s..l.......h...a.EV.cu+h..lsp...............sss.s..lG.uNs....SAEts-..............EG...s..-.....s.s..s....pslDlVhs...a..+L.........p.........Eo.........sF.s.....Kcsahs.....Y..l.K.......sYhKplps+.....L...c-...p....p..s..-p......................lp.FppsstthhK.c.l..Lup...a..Ks..hp.F..............ah..G....Eo..Mss......................DGhl...shhpY+........-su...ssPhh.haaKcGLp.......................... 1 93 147 215 +4585 PF03347 TDH Vibrio thermostable direct hemolysin Mifsud W anon Pfam-B_3633 (release 6.5) Family \N 20.90 20.90 20.90 49.20 20.80 18.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.99 0.71 -4.70 2 84 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 15 1 2 77 0 138.50 78 90.33 CHANGED h-LPSIPFPSPGSDElLFVVRsTThpTcpPVpshVpDaWTNRslKRKPYcDVYGQSVFTTuGSKWLouYMTVsINs+sYTMAAlSGYKcGhSoVFsKStphpL.QcaY.SVtsFVsssEpSIPShsYLDETPpYFVsVEAYESGsGphhVMCISNK.SahECcpQ. .h-LPSl...PFPuPGSDElLFVVRsTThpTpuPVNshVsDaWTNRNlKRKPYKDVYGQSVFTTSGoKWLouYMTVNINs+sYTMAAlSGYKcGpSsVFsKS-pspL.Qc.Y.SVusFVGEs.EpSIPShhYLDETPEYFVsVEAYESGsGphhlM........................... 0 0 0 2 +4586 PF01285 TEA TEA/ATTS domain family Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 21.30 22.40 18.50 19.80 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.82 0.70 -5.32 19 717 2009-01-15 18:05:59 2003-04-07 12:59:11 13 5 216 11 331 600 0 283.20 40 76.13 CHANGED sP........ssh.......................hpstuss..sssc.pspu.h+s.c...................pDuEsVWSs-lEpuFQpALth.PPhGRRK..hS-cGK.YGRNELIAcYIhh+TGKpRTRKQVS.................................................SHlQVL........pShLK........tD.shpphsp.psAphssups..............................sstPphpss.lcshspstYs.h.sss.sshht......hpPhs..hssP...............ssshp..shsosplphlpFshalpt.p.psDphs+..HLas+lptsp.p.ssPslch.-l+phhspFPchpuuLc-.................Lh-chP.ssuhhhl+h.hDl.sssh..t-s.........ssuhYhhs........spYEotcNhshs....sSTKV...............ho.sKQVsEKspp...caAthEpsRhhaR.phSshpEhhhsh.ppL++Lsc+YhhNSs......................ltshThhpVlos.......pso.cs.........Lsl..........s.Vacs.ssppHGs.H ..............................................................................................t...............................................................t.........................................t..p..shWs..clEpuF.puLthh....GppK..................hp.pu..+..a.G.RNELIucY............I...h......h+........T.........G....K............p......RTRK...QVS....................................SHIQVL..........................+p...h...pst.l.K...................................p......h...t...h.......t........t.................................................................................................s.....h.p........p...........as...........s.............s.......................................t.s.tth..hthp.............hhp..t............h...............h..h...................h........h................htp..............................t.hh..th..h.ph.............................t..a...........p.hpu..cph...h...ssohs...................ho.s+phscphp................................t..tt+..a....o.............................................................................................................................................................................................................................................................................................................................................................................. 0 80 133 231 +4587 PF03848 TehB Tellurite resistance protein TehB TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.07 0.71 -5.13 6 1247 2012-10-10 17:06:42 2003-04-07 12:59:11 9 8 1191 16 158 6802 2835 180.30 47 77.23 CHANGED FaC+cEDYFpKKYNhTsTHSEVlEAVcsVpPsKsLDLGCGQGRNSLaLuLhGYDVTAhD+NssSIu.Lpcht-+EsLs.lpsulYDINuAslsEpYDFIlSTVVhMFLpscRIPpIIpNMQc+TpsGGYNLIVuAMsTsDhPCslPFSFTF+EsEL+cYYpcWEllKYNEshGpLH+TDtpGNRIKh+FuTML ................................................hhhcsEsY.F.sc.KY...t...h...s..........s..H...S........-.....V.......l......p.......A......h........p..........s......V.......p........s.......G...K.......s....L.DLGC...G.p.G...R....N.....S....L..a...L...A.........t....p........G....a...D.....V...T...A..h..D.....pN.s......h..u...l.......t....p.......l..p.........p.......I...t...p..........t............E.........s......L....-............l......p.......s......t.....l.....h.......D....l....N.....s.......h......o....h..........s.......t......p.....Y.....D....F.....I...l...S....T..V....V.....h....M....F...L.....p.......s.......c....p.......I.....P.......u...l......I...t.N.M.Q.c...p....Tp.s..G....G....Y....N....L...I.....V..s.......A........M..........D.......T...t..........D..a.........P..........C..........s...........l....s........F....P......F...s..F...K.E..........G...E....L..........t..c.Y...Y..c..s..W...E...h...l.K.Y..N..Ess.Gc...LH.+.pDtNGNRIpLRFAThL............................................................................................................................. 0 43 94 129 +4588 PF02765 POT1 Telo_bind; Telomeric single stranded DNA binding POT1/CDC13 Mian N, Bateman A, Griffiths-Jones SR, Sammut SJ, Wood V, Mistry J anon pdb_1s40 Domain This domain binds single stranded telomeric DNA and adopts an OB fold [1]. It includes the proteins POT1 and CDC13 which have been shown to regulate telomere length, replication and capping [2-4]. POT1 is one component of the shelterin complex that protects telomere-ends from attack by DNA-repair mechanisms [5,6]. 21.10 21.10 21.40 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.93 0.71 -4.28 48 329 2012-10-03 20:18:03 2003-04-07 12:59:11 12 6 201 64 177 352 3 138.30 21 19.57 CHANGED assls....t.hppsthhshhullhssphs..hps.pt..scahhshpl.....hD.sh.s.t.............stlplhhappphppL........................P.lpp.GDlltlc........................................................php..............lptasschp.........usssst..................ssatlF...psshsssh.................shhtusp...aphssp-pph...lpsLhphtt .........................h........p.sphlslhuVVhphp.s...hho.+G....sDaptslpl.....sDpoh......................sslps.plFpssh.ctL........................Ptlpp.GD.llhl+...................................................................pl+....................................lphapscht.........slssps.......................uhhhF...........tst.ss.sh....................tsst..hphstp-pphhttLh.h..t......................................................................................... 0 42 85 131 +4591 PF03070 TENA_THI-4 TENA/THI-4/PQQC family Mifsud W anon Pfam-B_2039 (release 6.4) & Pfam-B_7791 (release 7.7) Family Members of this family are found in all the three major phyla of life: archaebacteria, eubacteria, and eukaryotes. In Bacillus subtilis, TENA is one of a number of proteins that enhance the expression of extracellular enzymes, such as alkaline protease, neutral protease and levansucrase [1]. The THI-4 protein, which is involved in thiamine biosynthesis, is also a member of this family. The C-terminal part of these proteins consistently show significant sequence similarity to TENA proteins. This similarity was first noted with the Neurospora crassa THI-4 [2]. This family includes bacterial coenzyme PQQ synthesis protein C or PQQC proteins. Pyrroloquinoline quinone (PQQ) is the prosthetic group of several bacterial enzymes,including methanol dehydrogenase of methylotrophs and the glucose dehydrogenase of a number of bacteria [3]. PQQC has been found to be required in the synthesis of PQQ but its function is unclear. The exact molecular function of members of this family is uncertain. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.19 0.70 -4.58 25 3227 2012-10-02 21:56:19 2003-04-07 12:59:11 11 23 2223 70 822 2427 629 201.20 20 78.91 CHANGED cpplc.hhpphhp..HPFhttltcGsLs+pphptalhpcYhYltshs+hhu...hhhu+ssDhphhhchhpphh-thssE.....lpha.h+hs-tlGlshp-..lppppssPuscsalshhlsh.uppsshtEshsAhhsphhuhtphs..............................pphpphlp........tt..ahcalssassp.acptlpctpchlcplhphhtspt...........hpchpplhhpshpaEhsFh.stshcs .......................................................t....p.hhpth.hp.......H.sFlppltpGsL..s..tpth..phYlhQDh.hYl.t.pasp.h.hu......hhhs.........+.....s.....s.........s....h....c...t.....h..t..h..........h...h.p...p...h..........t.s...h...htsE......................hph.a...pphh.....p.tl...G..l.s.....t...p-............hpp.t.............hs..Pss.........htYssa.hh........ph...s.........tp......s..s........h..t....p..h..h.u....uh.l.s..s.....hhYt.phu..................................pp.l...t..p.p.p.........................t..a.tpWIp.h.au...s..c......a...p...p...hl.p.........th..hp...h.......lsp.h.h.pt..h.sppt........................hpchpphahpusphEhtFa.phuhp.h........................................................................................... 0 226 474 673 +4592 PF04876 Tenui_NCP Tenuivirus major non-capsid protein Kerrison ND anon Pfam-B_6119 (release 7.6) Family This protein of unknown function accumulates in large amounts in tenuivirus infected cells. It is found in all forms of the inclusion bodies that are formed after infection [1]. 21.00 21.00 21.10 277.70 20.90 20.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.09 0.71 -4.72 4 65 2009-09-11 07:41:10 2003-04-07 12:59:11 7 1 9 0 0 61 0 173.80 81 97.70 CHANGED spppsDVuVGPIsGLNYphLYDhLPspVSDNITL.DLK-P-+VTEssKKLILKGsl.lAYHHPLETDshFspVHKHhP-as+SFLEHLLGupspspNuhIDlGhFFslLQspLGDWITcpaLKHsN+MSKpQIKpLlspIIchAKAEuuDTEpYEcVWKKMPuYapsllp.lLHK .V.QRTIEVSVGPIVGLDYTLLYDTLPETVSDNITLPDLKDPERVTEDTKKLILKGCVYIAYHHPLETDTLFIKVHKHIPEFCHSFLSHLLGGEDD.DNALIDIGLFFNhLQPSLGGWITKNFLRHPNRMSKDQIKhLLDQIIKMAKAESSDTEEYEKVWKKMPTYFESIIQPLLHK. 1 0 0 0 +4593 PF03300 Tenui_NS4 Tenuivirus non-structural protein NS4 Mifsud W anon Pfam-B_4315 (release 6.5) Family \N 20.60 20.60 21.00 406.10 20.30 20.50 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.66 0.70 -5.33 5 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 9 0 0 53 0 284.70 82 98.63 CHANGED MSLu+lsSpSKSplLsDDLSE+uAK+h-cuNKKKLALSsRPL.............TKGRhTIDsAATVLGLEPFSFADVRsNoYDMFlAKQDYSIpANR+A+FsIDV-Ph.aF+KPLppaPFFRIATFAlVWIGIKGRAsGTlTFRIIDKSYlDPsDQVEVEVsYPISKNFAVLGSLPNFLuhEDtcNLQV-lsIcDuSVQNCVISRoLWFWGIERTDLPVsMEoQKTVMFEFEPLsD+slNHLSsFuNFTTNVVQ+AVs......sAFTTKuhsElDsthEFGVVKQP+......pIPI..........l+K++Shl- MALSRLLSTSKSKVLYDDLSEESQKRVDNKNRKSLALSKRPL.............NQGRVTIDQAATMLGLEPFSFSDVKVNKYDMFIAKQDYSVKAHRKATFNILVDPY.WFHQPLTHYPFFRVATFAMVWIGIKGRASGITTLRIIDKSYVNPSDQVEVEVRYPISKNFAVLGSLANFLALEDKHNLQVSVSVDDSSVQNCVISRTLWFWGIERTDLPVSMKTsDTVMFEFEPLEDKAINHLSSFSNFTTNVVQKAVG......GAFTSKSFPELDTEKEFGVVKQPK......KIPI..........TKKSKSEV.S 0 0 0 0 +4594 PF05099 TerB Tellurite resistance protein TerB Bateman A anon COG3793 Family This family contains the TerB tellurite resistance proteins from a a number of bacteria. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.51 0.71 -4.45 153 3006 2012-10-03 21:00:09 2003-04-07 12:59:11 8 32 1728 5 694 2123 2633 135.10 20 59.21 CHANGED t.lpph..htphh.sss.sphtss.t............plAlsA....LhsclApADGphspsEhptlpplltpthslsstpspplhptspptppc........ssshhpasptlpcths.........cp+hpllcsLWplAhADG....phcttE-sllcclAplLslscp-hhth.....+t ..........................................................................h.hthth...................t..t..t.............h.sshs....lhsclsp.u.D.Gp.lsps..E...h..phh...p...p......lh.......p.....p.....h....s....L...p....s.....t....p..p..p...t....h...t...p....t.h.p...t...tppp........................................shsh.p..ph....h.p....p....lcp.h...............hch..t.p....t...hLchhhpl....Ah.A.D.G........plcspEcplLpplAphLGlsptph.....h........................................... 0 189 419 574 +4595 PF03741 TerC Integral membrane protein TerC family Bateman A anon COG0861 Family This family contains a number of integral membrane proteins that also contains the TerC protein. TerC has been implicated in resistance to tellurium. This protein may be involved in efflux of tellurium ions. The tellurite-resistant Escherichia coli strain KL53 was found during testing of the group of clinical isolates for antibiotics and heavy metal ion resistance [2]. Determinant of the tellurite resistance of the strain was located on a large conjugative plasmid. Analyses showed, the genes terB, terC, terD and terE are essential for conservation of the resistance. The members of the family contain a number of conserved aspartates that could be involved in binding to metal ions. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.07 0.71 -4.84 93 6495 2012-10-03 02:02:08 2003-04-07 12:59:11 11 11 3058 0 1376 4775 2679 192.90 30 58.96 CHANGED hslhhlEhsLSsDNhhVlullspp...lPtp.......................pc..+slhaGlhuAllhRhlhlhhushLlph.................................shlhhluGhaLlahuhchlht.h.................t.hpphhshsssh........................................hhhsllhIthsDllFulDSlhAshulo................pchhllhsuslhuIlslthhuthlsphlc+ashlphsshslLsalGsch...llps ......................................................TlhllEhlLulDNllhlullssp...LP.s.p......................p.pc.+.A.hh..h..G.l.h.h.A.l.l.h.Rhl....h..lhh.h.u.allsl.....................................................................h......sh..lhhlG.Gl.....F..Ll..apusp.lpcph.................................c.tt..c..pt.h.tt..ss.st..........................................................................................................hhhslhpIh.....lh..DllFu..l.......DS...l...lsshGhs.....................schhlh.h..s..us.ll.Alh..lhhh...s..uphl..s......p..h..lp.+..a..P..t.lth..hshshLhhIGhpLlhps.................................................................................................. 1 380 808 1111 +4596 PF02342 TerD TerD domain Anantharaman V anon Anantharaman V Domain The TerD domain is found in TerD family proteins that include the paralogous TerD, TerA, TerE, TerF and TerZ proteins [1][2] It is found in a stress response operon with TerB and TerC. TerD has a maximum of two calcium binding sites {2] depending on the conservation of aspartates {2]. It has various fusions to nuclease domains, RNA binding domains, ubiquitin related domains, and metal binding domains. The ter gene products lie at the center of membrane-linked metal recognition complexes with regulatory ramifications encompassing phosphorylation- dependent signal transduction, RNA-dependent regulation, biosynthesis of nucleoside-like metabolites and DNA processing linked to novel pathways [2]. 25.30 25.30 25.80 25.50 24.80 24.90 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.19 0.71 -4.96 187 3080 2012-10-09 20:46:33 2003-04-07 12:59:11 13 38 634 6 728 2301 75 182.90 29 70.99 CHANGED hthsLsKGp...plsLscpss.......tplplsls.Wc.ttt..............................tshDLDsSshhlstt.............s+stsssphlFasphpusssul.h.......................sGDshsut....t............................................................................................sEslplsLspls...sslpcllhslolasut................Fuplpsuhl.clhst..................sspplsc......asls....shsscouhlhuclYRc.............................supW+.......hpAlGpGh......ssGlts..lsppaGhtl .........................................................................................lsLpK.Gp...plsLsKpss......htplhluLuWcstt.tt...........................................tshDLDsSshhl..ts.....................s+st.ssschlFa..s..shp.u...s..s.....G.u..l..ht..........................................sGDshTGt.....ut.G.D............................................................................................cEplpl.cLsp.ls.....splc+llh.s.V.sIasups..............sFuplpsu.al..Rlhst..............................sspEls+......acLs....tshutcTuhl...huElYRc.................................................................supWK...FpAlGpGh.......ssGLts.lsptaGh.............................................................................................. 0 203 487 654 +4597 PF03592 Terminase_2 Terminase_small; Terminase small subunit Finn RD anon Pfam-B_3755 (release 7.0) Family Packaging of double-stranded viral DNA concatemers requires interaction of the prohead with virus DNA. This process is mediated by a phage-encoded DNA recognition and terminase protein. The terminase enzymes described so far, which are hetero-oligomers composed of a small and a large subunit, do not have a significant level of sequence homology. The small terminase subunit is thought to form a nucleoprotein structure that helps to position the terminase large subunit at the packaging initiation site [1]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.96 0.71 -3.90 71 1427 2009-01-15 18:05:59 2003-04-07 12:59:11 11 10 1058 51 150 1030 539 147.30 23 79.63 CHANGED LTsKQctFspEYlt.....s.hNATpAAl+A.GY.....St+o...Apshu...sc.LpcscIpphIsEt...hpchppcphhssc-lLphLsclspu-hp-.h.h..t................................................ttspthp.hpsphpD+l+Ah-h.LuK+hs........hp-c...........t.t.t.hhp..ht......s ................................LTtKQcpFsptYlp.....s....h.Ns.TpA...Al......pA..GY.....Ssco.....Ap...spuscLLc.p....s...c.......lpth.I...pch.......pp...ch....hp...c....th.......h..shppl.l..thL...tp...hths..p..pc.h...h..h...t...................................................................................................t.thh....h..p.tsc.cuhch.lh+hht....h....h.pp.............................tsst..h................................................................................................................................................. 1 40 90 117 +4598 PF03936 Terpene_synth_C Terpene synthase family, metal binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_728 (release 3.0) Domain It has been suggested that this gene family be designated tps (for terpene synthase) [1]. It has been split into six subgroups on the basis of phylogeny, called tpsa-tpsf. tpsa includes vetispiridiene synthase Swiss:Q39979, 5-epi- aristolochene synthase, Swiss:Q40577 and (+)-delta-cadinene synthase Swiss:P93665. tpsb includes (-)-limonene synthase, Swiss:Q40322. tpsc includes kaurene synthase A, Swiss:O04408. tpsd includes taxadiene synthase, Swiss:Q41594, pinene synthase, Swiss:O24475 and myrcene synthase, Swiss:O24474. tpse includes kaurene synthase B. tpsf includes linalool synthase. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.46 0.70 -5.17 87 2046 2009-09-15 09:56:15 2003-04-07 12:59:11 11 32 433 92 834 2322 3 234.00 21 48.78 CHANGED lLclA...KLs....FNhhQthappELcpls+Wa+-hsl.scLsh.hR-RllEsYahshushaEPp..aStsRlhhsKhhhlhsllDD.....haD..sauohcE...lc....thscslcRW........s...ssh-.pLPcah+hsapslhchhp-htpchtpc.scp......hh.hhtcthpphhcuahpEucWh......pss.hhPo.h-EYhpsuhhohuhhhhhhhshhsh...sphhs.cpshchlpppsp...lhchhshhsRLh....sDltsa..........................pcEhp+GchssulpsYMcph.ssocE-Ahpclpphlppsh..+phs ..............................................................................................................................................hs...........p.....pht.t.h.......p..........p...W.........h.pp.........th.....t..p...h........h....+.....cc.....hhphhhh..h.hu.h.h.hp..Pp.....hu.tR.h.h.h.s.+.hhshhh.hh..DD............ha.D....shu..sh.c-.............hp...........hhspslp.ph..............................s........sshp..th.s.p..h...h......c..h.....h...h.....h..s..l..hs.....h...h...........p..ch.tt.phtt.p.....ttp...........................h..ht.pt..htphhp.u..hh..h.E.s..c.ah...................tps...hhPs..h...pEYh.p.t.hho.s.uh..hhhshs.hh.....hh....t..hs..cpshp..h.htp..st.............lhph...sshhh+...Lh..................sD.lhoh..........................................p.p..E.........p......c.......s.p......h..s...ss.l....h...h.h...pp......h...s.h.o.pp.Ahpthtthlppthcph........................................................................................................................... 0 148 537 713 +4599 PF02909 TetR_C tetR_C; Tetracyclin repressor, C-terminal all-alpha domain Griffiths-Jones SR anon Structural domain Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.52 0.71 -4.52 18 1930 2012-10-03 00:15:22 2003-04-07 12:59:11 12 6 804 87 677 1699 15 139.70 19 61.05 CHANGED hP.pss-.sWpshLpssA+ShRpALLsaRDGA+lahGo.tsussphcshEspLphLscsGFosccAlhslpslupFslGuV........LEcQu..tpsstpspptss.hppth.s...Pl....Lppshp...shspsss-ssFEhGLpllIsGlcsth ..........................................................st..sWcstLpshAcshRpshhpH..P...t.s......s.p...l.h..h....s...p..........s..........h....u.......s......s......t......h......p....h....h-...thl.t.hL....p....s....s......Gh.s..s.t.p.s....h.t..sht...sl..t....t....alhGts.....................................h.p....p...p....s................t.....t.s..s........t........h..t...t..t..sh.....htth......................Ph..........lt.t.sht......................ht.t.........s.........c.....t..t......F.....p....hGLphllsGlt...h.......................................................................................... 0 228 518 626 +4600 PF03299 TF_AP-2 Transcription factor AP-2 Mifsud W anon Pfam-B_1736 (release 6.5) Family \N 20.30 20.30 23.70 22.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.14 0.70 -4.78 6 424 2009-01-15 18:05:59 2003-04-07 12:59:11 9 5 99 0 242 359 1 190.70 64 48.09 CHANGED ssEVFCsVPGRLSLLSSTSKY+VTVAElQRRLSPPECLNASLLGGlLRRAKSKNGG+pLR-pLcKlGLsLPAGRRKtApVThlTuLVEuEAlHLA+DFuhVCEsEFPu+slApals+pplss.p-hstR+phLhtopplsp.EltclLopDRoPlssp+spshL..-sulQpsLoHFSLhTHGFGssAhpAslpulQshlsEulphl-Khh. .............................................sEVFCSVPGRLSLLS.ST.SKYKVTVuEVQRRLSPPECLNASLLGGVL.R..R...AK.SKNGG..Rs..LRE...+L-KIGLNL...PA....G..R...RKAA..NV...TLLTSLVE.G...............EAlHLARDFG.YlCETEFP...uKAluEa.L.sRpHt.-.s.....s.-...tsRKsMLLA..oKQlCK.EFsD.LLuQDRo..PlG...sS..RP.sPIL.......Ess.I..QssLTHFSLITHGFGsPAlCAAloAhQNYLsEuLphhDKh..h............................................................. 1 59 75 151 +4601 PF02559 CarD_CdnL_TRCF CarD; TF_CarD; CarD_TRCF; CarD-CdnL_RID; CarD-like/TRCF domain Mian N, Bateman A anon COGs Family CarD is a Myxococcus xanthus protein required for the activation of light- and starvation-inducible genes [1]. This family includes the presumed N-terminal domain, CdnL.\ CarD interacts with the zinc-binding protein CarG to form a complex that regulates multiple processes in Myxococcus xanthus [4]. This family also includes a domain to the N-terminal side of the DEAD helicase of TRCF (transcription-repair-coupling factor) proteins. TRCF displaces RNA polymerase stalled at a lesion, binds to the damage recognition protein UvrA, and increases the template strand repair rate during transcription [3]. This domain is involved in binding to the stalled RNA polymerase [3]. The family includes members otherwise referred to as CdnL, for CarD N-terminal like, whichdiffer functionally from CarD. The TRCF domain mentioned above is the RNA polymerase-interacting domain or RID [5]. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.11 0.72 -3.96 152 5749 2009-09-12 00:28:14 2003-04-07 12:59:11 11 11 4259 6 1333 4473 2560 101.50 33 11.35 CHANGED plphGDh......VVHspHGlGpatulcphpl.......sGhp....p-..alhlpats......ss...p...lhlPlsp.lshls.Ralus..psph..hLscLusps..........W....p+ppp......Kl+psshphus-llchhu ......................LcsGDhVVH.pHGlG+ahGl...p.s.h-l.......................sGhp.........p-..YlhlpYss..........sc......pLaVPVs.p..lchlu..RYluu.............psph...sh......Lp+LGusp..............W........p+t+p..............Ksppplc-lAs-LlclY..................................................................... 0 485 917 1141 +4602 PF03529 TF_Otx Otx1 transcription factor Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 27.10 27.10 22.30 21.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.58 0.72 -3.77 22 293 2009-09-10 18:52:32 2003-04-07 12:59:11 8 2 133 0 88 237 0 92.20 55 34.78 CHANGED SPASIS........P...hs.DP.Lus.....uouSCM....QR..........suuYPMo.YsQu...suYu.QG...Ys.u.SoSYFuGlD.CuSYLu.PM.....HsQLsusGusLSPMuusu..Muu.Hlspus .......................................................................................SPASlS......P........LP.-P.h.ss.......u.suSCM....QR....................ssSY.PMo.YsQu...uuYu.QG...Ys..s...SSSYFuGl.D.CuSYLu.PM........Hspht.s.........pLSPMusso.huu.Hhppp................... 0 3 11 34 +4603 PF03849 Tfb2 Transcription factor Tfb2 TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 21.20 21.20 21.20 21.30 18.40 20.70 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.01 0.70 -5.82 24 407 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 301 0 280 375 6 296.10 32 69.68 CHANGED -YLcuLPsplhs+LY.psPATCLAIaR.hLPslAKhalMpMla.-pPlshp-l-pWl+ssu.....ptppppulcpLppL+lh...........................spssst...thlpLNssF+ps.h+pALsGGtppsSFGsssspss.....pcssslshLDpYAtp+WEslLHaMVGostspt..........sSp..sVhpLLpputLhct.....sssthc......ITspGFQFLLQDsNuQlWsLLLpYLchuE........shsMDsV-lLsFLFhL......uuL-LG+uYshss.Lo-TQhphLpDL+DaGLVaQ+ps..psppFY.PTRLATsLTSsssshpssstu.pt...................................pspshppGhlIlETNaRlYAYTsSPLQIAlLuLFsclchRFsNhVsGpITRESlRpALtsGITA-QIIsaLpsHAHP ...............................................................................................................altt.hs.thhppLY.....tp.s..............hshulaR.................LP.l...A+.hlh......thl...a....h....p.p.s....ls...........tslt.........W..........s......t.p........................tpt..ppuhphLtt.L+lh...............................................t........tt......th.lss.Fp..ps.h.........p.s......LhG......u.....sp.....t......s...........................................tt..hs..ht.L-pYu.ppWEs.......lLtahVss....st.t..................u.t..shhplL..tutLhpt..........tt...t.t.........................ITp.tGF.....pFLL.-hssQlWhhhl.Ylp..h.p.................................tht.hs...s.-hLsF..lF.L....................u...pl....G............p....sYphps......h.op...s..h.........p......h.....LtcLt-hGLla...p..p..t........pt....thaa...PT...+L.A......h.........sLsss.s..s........................................................................................................................tttppGhlllETNa+lYAY...........Ts.....S.......Lp.....l.ullsLFsch.h..RhP.N..........hlsu.tlTRcSlppAlttGIT.ApQIIpaLpppAHP............................... 0 104 161 236 +4604 PF03153 TFIIA Transcription factor IIA, alpha/beta subunit Mifsud W anon Pfam-B_3542 (release 6.5) Family Transcription initiation factor IIA (TFIIA) is a heterotrimer, the three subunits being known as alpha, beta, and gamma, in order of molecular weight. The N and C-terminal domains of the gamma subunit are represented in Pfam:PF02268 and Pfam:PF02751, respectively. This family represents the precursor that yields both the alpha and beta subunits. The TFIIA heterotrimer is an essential general transcription initiation factor for the expression of genes transcribed by RNA polymerase II. Together with TFIID, TFIIA binds to the promoter region; this is the first step in the formation of a pre-initiation complex (PIC). Binding of the rest of the transcription machinery follows this step [1]. After initiation, the PIC does not completely dissociate from the promoter. Some components, including TFIIA, remain attached and re-initiate a subsequent round of transcription. 30.50 30.50 30.70 30.70 30.20 30.20 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.47 0.70 -4.56 31 563 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 271 8 357 547 0 253.50 22 80.52 CHANGED +lYcsVI.-DVIsssRpsFt-p.GlDEQsLp-L+plWppKLspopsschsa-sss..sss..stp........................................t...........ss.pp.psps.sssssstshthssG...............thtstsG..hpsthPshss..ss.st.......t...............htthptshstsuuspss...........t........................................................tt.shtp.sss.ht...................................ppscustss.............t...sp.hpstt...........................hc.phhshputth..slpptspt..ttpp....................................................................................................................................hQhDGssssssp............pcp....D.-D-.......................................lppD..sssDDhs.cpDsp-..ssssVhlCpYDK...........VpRsKNKWKhpLKDGIhshsGKDYVFpKApGEuEW ...........................................................................................................................................................a..lh.tpVh.t.....p......F.p......s...t.thl.php.t.W....ptKh...t.ts..h..p...t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..s...................................p.pp.................................................................................pp-..s...D-......pc.....p...t..............tplhlC.aDK...........lpR......s+s+......WKh.LKDGlhphss+.-hhFt+s.s-h-a........................................................................................................................................ 0 117 189 283 +4605 PF02268 TFIIA_gamma_N TFIIA_gamma; Transcription initiation factor IIA, gamma subunit, helical domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4941 (release 5.2) Domain Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIA (TFIIA) is a multimeric protein which facilitates the binding of TFIID to the TATA box. The N-terminal domain of the gamma subunit is a 4 helix bundle. 20.90 20.90 22.50 21.70 20.10 19.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.31 0.72 -4.38 21 360 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 275 4 215 313 1 47.90 50 39.93 CHANGED saYELYR+So...lGtuLTDuLD-LIssupIsPpLAhKVLtpFDKslscsLp .....hYpLYRpoT........lG.sLp-oLD-LIpsspIsPpLAhpVLhpFDKulspALt............. 0 74 117 179 +4606 PF02751 TFIIA_gamma_C Transcription initiation factor IIA, gamma subunit Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_4941 (release 5.2) Domain Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIA (TFIIA) is a multimeric protein which facilitates the binding of TFIID to the TATA box. The C-terminal domain of the gamma subunit is a 12 stranded beta-barrel. 21.20 21.20 22.70 21.50 21.00 20.40 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.76 0.72 -4.15 26 357 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 277 4 214 301 1 50.30 52 42.76 CHANGED psKuKloF...K.GcLcTYRFCDsVWTFIl+ssph+h.....................sppplpVDKlKIVACsu ..........pV+s+.loF...K....G+LcTYRFCDsVWTFllpDspF+....................................spphlp.l.D.K.VKIVACsu................. 0 74 115 178 +4607 PF02291 TFIID-31kDa TFIID-31; Transcription initiation factor IID, 31kD subunit Mian N, Bateman A anon Pfam-B_6729 (release 5.2) Family This family represents the N-terminus of the 31kD subunit (42kD in drosophila) of transcription initiation factor IID (TAFII31). TAFII31 binds to p53, and is an essential requirement for p53 mediated transcription activation. 29.00 29.00 29.00 29.00 28.90 28.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.48 0.71 -4.36 5 368 2012-10-10 12:36:46 2003-04-07 12:59:11 10 8 276 1 247 382 2 121.40 39 53.69 CHANGED cuGhoupsccsPKDAplltpIL+-hGIpEYEPRVlsQLL-FAaRYTosILcDApVYucHA+Kusl-.....lEDVRLAlssplspSFTuPPPREhLLcLAs-RNppPLPQI+PsaGlRLPPDRYCLTusNacL+ ...........................t.........pphP+DAplht.lLpsh.Gl..s..p..Y.EsRVh.QhL-FA.aR..........YsoslLpDA.....tl.....Y.......u......s..H....A..t+.....s...sls.........................s-DlRLAIpsR.....hshpF..p....s...........s.........P.....P+-...h....Ll-lApp+NphPLP.................h.l..h.........s..........h.......G..R..LP...P.-+asLou.sapl............................. 0 80 126 189 +4608 PF03540 TFIID_30kDa TFIID_30kD; Transcription initiation factor TFIID 23-30kDa subunit Griffiths-Jones SR anon PRINTS Family \N 21.00 21.00 21.20 21.00 20.90 20.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.30 0.72 -4.13 20 312 2009-09-14 12:53:53 2003-04-07 12:59:11 8 5 267 0 220 305 3 51.70 52 26.68 CHANGED PhIPDuVTsaYLs+uGFps.......sDsRlsRLluLAsQKFlSDIAsDAhQau+hRs ......PhIPDAVTsaYLs+uGFps.............sDsRl.hRLluLAsQKFloDIAsDAhQas+hRt......... 0 75 121 180 +4609 PF02002 TFIIE_alpha TFIIE; TFIIE alpha subunit Enright A, Ouzounis C, Bateman A anon Enright A Family The general transcription factor TFIIE has an essential role in eukaryotic transcription initiation together with RNA polymerase II and other general factors. Human TFIIE consists of two subunits TFIIE-alpha Swiss:P29083 and TFIIE-beta Swiss:P29084 and joins the pre-initiation complex after RNA polymerase II and TFIIF [1]. This family consists of the conserved amino terminal region of eukaryotic TFIIE-alpha [2] and proteins from archaebacteria that are presumed to be TFIIE-alpha subunits also Swiss:O29501 [3]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.29 0.72 -4.53 7 544 2012-10-04 14:01:12 2003-04-07 12:59:11 12 13 435 2 355 727 205 102.50 21 29.94 CHANGED cLlpElltphhG..tcsh.llcsLh.cucso-E-luc.LtlchsplR+lLhpLa-s+Lsca+Rt+D...-ppsWhpYYWhlphc+l.pllKt+hpphlc+LcctLchEps ............................h..lhcthhs....pt..h..hl..l-h.L.h..p..c........s.....t......l.p..-....--luchlph..pt..pplRplLtpL.p.cc+.l.l...p.h...+...p...p...p-.............................p.s.p..s....h..tp.....h......h...a...h..ls.h..c.ph....h....shl.c.h+hpphhpplctp.t................................................................ 0 106 199 293 +4610 PF02186 TFIIE_beta TFIIE beta subunit core domain Bateman A anon [1] Domain General transcription factor TFIIE consists of two subunits, TFIIE alpha Pfam:PF02002 and TFIIE beta. TFIIE beta has been found to bind to the region where the promoter starts to open to be single-stranded upon transcription initiation by RNA polymerase II. The structure of the DNA binding core region has been solved [1] and has a winged helix fold. 22.50 22.50 22.70 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.12 0.72 -3.98 12 229 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 199 2 152 218 0 70.60 34 24.23 CHANGED s+h.s.LthhV-YhKp+.....scPlol-El.shls.hDIsssllshLc.....p.s+lcasscs.papahslasI .................................a.hshLthlVcahKp+.......scPLol-EIL..-php..lD..l..s.......p....p...hc..phLp............sNsKlchs..c......u.....patFKPhYsl............................ 0 38 67 115 +4611 PF02270 TFIIF_beta Transcription initiation factor IIF, beta subunit Bateman A, Mian N anon Pfam-B_4519 (release 5.2) Family Accurate transcription in vivo requires at least six general transcription initiation factors, in addition to RNA polymerase II. Transcription initiation factor IIF (TFIIF) is a tetramer of two beta subunits associate with two alpha subunits which interacts directly with RNA polymerase II. The beta subunit of TFIIF is required for recruitment of RNA polymerase II onto the promoter. 19.40 19.40 21.20 21.30 19.30 19.30 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.70 0.70 -4.60 25 473 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 278 10 318 464 4 184.60 23 82.69 CHANGED stDlDLssuc..p..pVWLVKlPKYLuppW........schssp....tplG+l+Ipps......ttc.pVslhLscp.ps....................plP+EYslphpssp.......spshhVFoEps..t.ppp...p..........................................................pshscKhALtG+Vs+cspshPsts-..pYt+lhpp+.thpsspP++pVphL-ch..Vspshts..suh+usssch...hh......tccKKp-uKpsRhs+p-LLDhLFpsFEcapYWslKsLccpT+.QPcuYLKEsLcpIAhhsK+GPapspapLKPEY++ ........................................................................................p............p..thaLh+hP.hl....p...tW...........................tt......t...........luplhh..t.................thth...ht................................................................hPppa.h.......h.t................................s.hlhsp...............................................................................................tph.h.Ghl.p..ch.p....hhP.................p............p.....pa..p.hh.t.p.....t......p....t.l...pp............t.h..........t.h..t.t...........................................pttt.+thR..hspptl.shlathFc.c.......h...ash+tLht.h..........p.QP.................a......L+phLppls.h.hp.s....t..h.apL+s-hp..................... 3 103 173 251 +4612 PF01096 TFIIS_C TFIIS; Transcription factor S-II (TFIIS) Finn RD, Bateman A anon Prosite Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.20 0.72 -4.32 109 1781 2012-10-03 10:42:43 2003-04-07 12:59:11 13 25 640 94 1065 1584 290 39.10 43 20.94 CHANGED hpCs.c...Cspccshah.phQpRSADEP.ThFapCh..pCsppW+ ...........pCs..+....C.....t.p.c.c.s..hah.phQ......oRS...A..DEP.hTs....Fap.Cs..pCup+W+.......... 0 343 578 851 +4613 PF04994 TfoX_C TfoX C-terminal domain Bateman A anon COG3070 Domain TfoX may play a key role in the development of genetic competence by regulating the expression of late competence-specific genes [1]. This family corresponds to the C-terminal presumed domain of TfoX. The domain is found associated with Pfam:PF00383 in Swiss:Q9JZR1. It is also found as an isolated domain in some proteins suggesting this is an autonomous domain. 23.00 23.00 23.00 23.30 22.90 22.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.50 0.72 -4.07 48 1181 2012-10-03 02:11:09 2003-04-07 12:59:11 8 3 1046 4 158 663 24 79.70 38 45.23 CHANGED tssR.L+-......LPNlshshEchLp+sGIcolppLcplGAhpuah+l+ps.utslslpL...LauLpGAlpGhHWsslsppc+pcLlpth ................t..pRL+D......LPNhuhplEphLtcsGIcslcpL+tlGActsal+.L+pp....sstlohcl...LatLEGAI.GlH.ss.LPts++pELhch........................................... 0 37 78 123 +4614 PF04993 TfoX_N TfoX N-terminal domain Bateman A anon COG3070 Domain TfoX may play a key role in the development of genetic competence by regulating the expression of late competence-specific genes [1]. This family corresponds to the N-terminal presumed domain of TfoX. The domain is found as an isolated domain in some proteins suggesting this is an autonomous domain. 22.20 22.20 22.20 22.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.02 0.72 -4.14 85 1775 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 1572 2 258 995 70 89.90 27 59.84 CHANGED lh-hl....ush.GslstRpMFGGaGlah.cuhhFAllsc.s...pLaL+u.sspspsha.pstGs....tsata..tpth.h....shsYaplstphh--tptltphs+huhpsuhp ....................c.lstL..us..lph..RsMFGuYulah.cs.......s.l.h.uhlh-....s......cLal+u.scpstphh..sppss....hsash.....th.......shpaahlstphhc.....st..Lhph.chuhpth.............................. 0 60 131 199 +4615 PF00019 TGF_beta TGF-beta; Transforming growth factor beta like domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain \N 20.30 20.30 20.60 20.40 19.70 20.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.86 0.72 -3.76 20 3712 2012-10-02 16:54:34 2003-04-07 12:59:11 15 19 848 111 1288 3220 0 95.50 38 29.32 CHANGED sssCph+pLalsFp.DlGWspWIluPcGYhApYCpGpCsaslssphsso....spAllpsls+hh..tP.pssspPCClPT..cLsslohLahD-sps.hhl+phpsMlVcpCGCp ............................p.ptCp++sLaVsFp.-l.G..W.s.p...W.IlA...P.pu.YpAhYCpGp.Cs.a...sh.s.s.ph..sso........................sps.h...lp.s..l.h..............pth...........s.s......s..s.spsC..Cl..Po..chssloh.LYh...-..p....p....pp......hh..hc..ph.sMllptCuC..................................................... 0 194 298 677 +4616 PF00688 TGFb_propeptide TGF-beta propeptide Bateman A anon Pfam-B_110 (release 2.1) Family This propeptide is known as latency associated peptide (LAP) in TGF-beta. LAP is a homodimer which is disulfide linked to TGF-beta binding protein. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.43 0.70 -5.08 81 3044 2009-01-15 18:05:59 2003-04-07 12:59:11 13 32 870 4 1012 2611 12 185.70 18 54.29 CHANGED sl...........................................s.h....tpthpppphcthcpplLphlGLp..cp...Pps............ttstssP...pah.L.-LYptht..............tppsttpss.........t...t......................usolhoFtsctphpp..........ptptpphhFslSsl...sps.cplstA-.L+lahpt..............sspspphplplaplhp.........tttt.pcLLss+h...lphs.....suWpsFDloss.lpp..Whppspp..NhG.....ltlpl.......hstct.sshssp..t.....t......................tptpPhLlsa ..............................................................................................................................................................................................................s.............h..t.ht......................................................................................................................................sp.l...ht..h............................................h.hFs.h.o....th........pt..p.......l..tAp.lh....lah.t.............................................tttthplp.lhph..t.....................ttt.....tph..lsp+h...........lphpt.................stW....sF..D.V.o..s.....s.lpp...Wh.......tpsp.......s..hG......l.lp.s.......................p...pt...pt.....t..........................................................t.pPhh.h.................................................................................................................................................. 0 166 252 544 +4617 PF01702 TGT Queuine tRNA-ribosyltransferase Bashton M, Bateman A anon Pfam-B_1643 (release 4.1) Family This is a family of queuine tRNA-ribosyltransferases EC:2.4.2.29, also known as tRNA-guanine transglycosylase and guanine insertion enzyme. Queuine tRNA-ribosyltransferase modifies tRNAs for asparagine, aspartic acid, histidine and tyrosine with queuine. It catalyses the exchange of guanine-34 at the wobble position with 7-aminomethyl-7-deazaguanine, and the addition of a cyclopentenediol moiety to 7-aminomethyl-7-deazaguanine-34 tRNA; giving a hypermodified base queuine in the wobble position [1,2]. The aligned region contains a zinc binding motif C-x-C-x2-C-x29-H, and important tRNA and 7-aminomethyl-7deazaguanine binding residues [1]. 23.80 23.80 23.90 23.90 23.60 23.30 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.52 0.70 -5.08 133 5152 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 4473 77 1485 3705 2879 234.20 41 60.66 CHANGED Ecsl.plQpsl.GuD...IhhshDcss...sh.....ss...st.ctscpuhccTh+Wsc+shpth..............................ttpppsLFGIlQGGha.-...LRppusctlt.p...........s..hsGaA......lGGlusGE......sp.pphhpllc.hsss.......hLP............ps+PRYL.hGlG.pPtslltuVthGlDhFDClhPo.RhARpGphh...TppG............................................pl.............plcst.patpD..pPl..-.sCsChsC.............p.......paoRAYl+HLhpspEhlu.pLlohHNltahhplhpplRpuIpps.pht....phhpp......hhp ..........................................................................................................Ecuh.pIQps.L.GS.D..IhM.hF.DECs.......sh............ss.....sh..chsc...cSh-ho.....hR..WAcRshcta........................................phspppu..LFG...IlQ..GuhacD...LRc.pS...scsLs.p.............t..FsGYA........lGGLu.VGE............s+..ppMh.c......ll-...hsss..........hLP.........................................pc..KPR...YL..MG.VG..pP-......sLl.....-u.V.t.+.G.lD.M.FD..........CVhPT.Rs.A....RNGphh.TspG....................................................................................................................pl.........pl+NA..catpDh.pPl...D....p.Cs..C.YoC......................................p...........s.YS.RAYL+HLh+sp....E....h..........l....G....hpLsolHNLpah.pLMpplRpAItp..s..p..htpFhppFh.t...................................................................................................................................................................... 0 514 934 1253 +4618 PF04858 TH1 TH1 protein Kerrison ND anon Pfam-B_6070 (release 7.6) Family TH1 is a highly conserved but uncharacterised metazoan protein. No homologue has been identified in Caenorhabditis elegans [1]. TH1 binds specifically to A-Raf kinase [2]. 20.00 20.00 22.40 21.10 18.70 18.00 hmmbuild -o /dev/null HMM SEED 584 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.84 0.70 -6.36 5 182 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 117 0 129 184 0 427.00 40 78.05 CHANGED Ms-DaEs.scuWspusGcGpttu--supuE..sEsssuVlpECLc+FuT+DYIMEPuIFssLKRYFQAGGoPEsVIphLSENY+AVAQMsNLLAEWLIlTG.......VcPscVQAsVENH.............................................LKsLllK+FDP+KADuIFTEEGETPAWL-pMIcHcTWRuLlY+LAEcaP-CLMLNFTIK................LISDAGaQuEITSlS.TAAQQlEVFSRVL+TuIssILsuGsDDl++s..IEElA+MVCHGpHTYVFuQlMluVLuQEpcGGos......s+RLSQEIp+aAtcpspsATsITlA..LuuSAsaPpACpALuuMLSR................GALNPADITVLa+hYoSSD...PPPV-LIRsPsFL-LLlsuLFKsGVKI.Ns-HKuKYhaLLAYAASVh-..tKp.ps..-cslsKDELKuT..AIEcAHAIhCNss+GsSELlA-lsTLYsCI+aPVVusGVI+WV-ssVTEPuYF+LsT..-osPVHLAlLDEVAssHsuLQsQVLcLLl+LFE....S+pDEL-Ih.QLEhKKsLLDRMVcLLARGaVlPVloYlspChcR.DTDlSLIRYFVTEVLElluPPYSsDFVQLFLPlLEN-sIsGoM+uEG-sDPVoEFI..VHCKu+ahol ..................................................................................................t..t.h..pcthpth.p.DhIME.Pslhs.....lp..pYhps........GG..sP...pp..........llphLSpsYpuhAQhssLlucWLh.hs....................hp..plpthhcsp................................................LKphlhcpFDPpKADslF.otp...u...p.....sPtWL.pphI....t..................c.pWRpL...hYpLAE..t.aPc...CLhLsFsl+...................hI.SDAGaQs...EIsSlo..TAspphpVFo+VLpsslsphlp........s.....p.-.s.hpps....l.-hs+hlCpupHTYlauQshhph......Lspc....t.ps.s.s................h+Rlu...QElpp.A.ppt.psss.........hphs...........Lssu...s........t......s...ps.....ppulsuhLsp...............................ssL...ssuDlp.hLac..as.s.c......PPPlphlR..Pthl-lhh..psLFp.s.u.pl....s........ca+.cahalLAYAussh-......tpt..p........................hsp..-..-.lcs.......T..pAlEp....spsl.p..s.s..tu.t.....s...chh.uplt.pLhpsl.chPlVuhGVl+Wlc.sl..ps.p.aa..p..h.s....-psP.haLslL.-ElsshH.hhp.plLplLhplh-..............spt.s.pl-hh.th.Eh++hllD+hVpLlo..pGhVlPVl.............palpps.htp.phDhSLIRaFVoEV.....................L-hlsPPYos-Flphalshlpstplh.ssh.........t.....ht.Fh................................................................................................... 0 63 75 106 +4619 PF00314 Thaumatin thaumatin; Thaumatin family Finn RD anon Prosite Domain \N 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.25 0.70 -4.87 124 1409 2009-01-15 18:05:59 2003-04-07 12:59:11 12 29 247 50 615 1398 11 174.60 39 73.41 CHANGED NpCsaTVWPushss...u............GGhpLssGpo..hsls...sPsu..h.uGRlWuRTsCsFsu.............u..pupCtTGDC..G..GtLpCs.G.s.GtPPsTLAEaoL..........t...t......DFYDlS.LVDGaNlPhslsPp...........s....sC.....ssuCss.....................................-lsspCPs-Lps......................................uCpSAC...........ts.....FtsspYCC.....ts.ststsCt....PotYSphFKptC..PcAYSYAhDDsoSo..FTCs.....u...ss.YplsF..C.P ......................................................................NpCsaTVWsuhhss.........s...................sG..ht..L...s.Gpo...hsls....ss..ss.....h..uGRhWu........RTsCshss......................sG..phpCtT..GDC....u..G.....hl.....pCp...u..s...ut.P...P.s.TLA..EasL...................st..ss....Da..YD...l.S.LVDG.a.NlPhthsPp...................t...sssC....psssCss..............................................................................................................................................s.ls.t.....CPsp..Lps.....................sh................uCps.sC..................................s.....a.t.p.sph.CC............................s.......tsC.......................so....a..SphF.Kp.t.C.P.pAYSYshD.D.....o....os......ao.Cs........s......ssY.lhF.C............................................................................................................. 0 129 377 515 +4620 PF01946 Thi4 Thi4 family Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes Swiss:P32318 a putative thiamine biosynthetic enzyme. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.60 0.70 -5.26 8 671 2012-10-10 17:06:42 2003-04-07 12:59:11 12 29 532 20 351 23606 13980 162.40 35 48.14 CHANGED SRAhscpYacDLlcYAEoDVlIVGAGsSGLoAAYYLAKp..sLKVAIlEpplS.GGGsWhGG.LFsthVVc+PA+thLcElGItYE-pschhlVtcAA.FsSTlhSKslp.sslKlFNussVEDLIlR-......spVuGVVhNWohVphu..H.....hDP.TIcA+...............lVlsuTGHDushuuhsVKRl..h.ts..tclsG.+uh.hspAEcslV..+sTREVhPGLaVuGMtlutlcGAsRMGPhF ..........................................................t.........h....p.....h...c..h..D...VlIVGAGsuGLs.uA...h...h...L......u...ct.........sl....+....l.s.l..l...E......p........p......h.......t...........G.......G........s......h..............h..........h........G.......u...........h.....h...........................h.................h................h....p......t........................................h..........h...................t....h.........t...........h.........................................................................h.....................................................................h........h.................h..............s.h...h.........h......hp...D.lh.h.........................l......u.h.................................................................h.DP......h....................hhht.sG.H..s..............pp............................................................................t..s.h....uE..hh......s..................................................................................................................................................................................................................................... 0 103 208 290 +4621 PF01964 ThiC ThiC family Enright A, Ouzounis C, Morett E, Bateman A anon Enright A Family ThiC is found within the thiamine biosynthesis operon. ThiC is involved in pyrimidine biosynthesis [1]. The precise catalytic function of ThiC is still not known. ThiC participates in the formation of 4-Amino-5-hydroxymethyl-2-methylpyrimidine from AIR, an intermediate in the de novo pyrimidine biosynthesis. 25.00 25.00 27.60 27.60 24.10 24.00 hmmbuild -o /dev/null HMM SEED 421 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.32 0.70 -5.96 113 2842 2009-01-15 18:05:59 2003-04-07 12:59:11 13 9 2688 6 832 2312 704 430.20 62 77.26 CHANGED TQhchA+cGllTtEMchVApcEsls..................................sEhlRpplApG+llIPsNhsH...phcPhuIGcuhpsKlNANIGsSsstssl-pEl-KhchAhcaGADTlMDLSTGG.clcplRctIlcso.slPlGTVPlYQAh....tchpstlh-ho.-.hhcslccQAcpGVDahTlHsGlshchlpthptt.pRlhGIVSRGGSlhsuWMhtppcENPLYppFDclLEIh+cYDVolSLGDGLRPGsltDAoDpAQhtELhsLGELscRAhctsVQVMlEG.PGHVPlspIctNlclpKclCcsAPFYlLGPLVTDIAPGYDHIouAIGuAlAuhtGAshLCYVTPuEHLuLPsh-DV+EGlIAtKIAAHAADlAKs..hssAp.c+DptMucARtshDWccQFpLulDP-+A+ph+ccshst......pschCoMCG.caCuh..+lspchh ..........................................TQhcYARpGIITsEMEalAlRE.N.h.s.............................................................................sEFVRcEVApGRAIIPANIN.H...PE...E.PMIIGRsFhVKlNANIGNSAloSSI-EEVEKlsWAhRWGA.DTlMDLSTG.c.pIH-..............TREWIlRNS.PVPIGTVPIYQAL....EKVsGhsE-L.TWEh.FRDTLlEQAEQGVDYFTIHAGVhL+alPhTA...cRlTGIVSRGGSIMApWCLs.HH..pENFLYpHF-EICEIhttYDVohSLGDGLRPGSItDANDcAQFuELcTLGELT+hAWca.DVQVMIEG.PGHVPMphIcpNMchphchCcEAPFYTLGPLsTDIAPGYDHITSuIGAAhIGWaGs.A.MLCYVTPKEHLG.LPN+-DV.Kp...GlIsYKIAAHAADLAKG...HPG.Ap.hRDsA..hSKARFEFRW-DQFsLuLDP-pARtaHDETLPp-s...........tKsAHFCSMCGPKFCSM+Iop-lR....................... 0 268 553 714 +4622 PF02568 ThiI Thiamine biosynthesis protein (ThiI) Mian N, Bateman A anon COGs Family ThiI is required for thiazole synthesis, required for thiamine biosynthesis [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.08 0.71 -4.86 8 2899 2012-10-02 18:00:56 2003-04-07 12:59:11 9 10 2786 3 551 4081 1379 188.10 39 45.56 CHANGED GopG+VLuLLSGGIDSPVAuahhhpRGscVshlaFhstshpstcshcKlc+LAplLucaps.hph+LhVhsapclQc-lhc+s....sEshpsVlh+RsMh+lAphhAcchsscAlVTG-uLGQVASQTL-NLpVIpsso.shsIlRPLIGhDK--IlclAKcIGTa-ISIc.c.-sCohhs.K+PsT+AchppVcKhcpclph ................................Tpt+slhL.lSGGlDSs.VAuYhh..h.+....R...G...l...c..lch.l.a...F..................s...h.......s.....u....s....p.......s..h.......t...+......s....p......s........h......t...p....h.....h.....s....c.....a...u........u.................p......l.......c.....h......h....t....V......s.....F.....s.....p...l..........t.....c..I..h....ccs.................sc...s.......h....h.......l.l.......h...+.R..h.....M..h.Rh.A.s..c..l..s..c.c.h...s.s......u.l...l..TG.......E..u.....L......G.........Q..V........u......S.........Q......T......L..p......s..........h.p..............s....I...s..s.........V....o..........s...........s.............lL..R....PL..l..s.......h.......D..Kp-IIplAccI...sT.......-huhp....-hCsl.hs..cpPpscshhpchpt.Etph......................................................................................................................................... 0 185 341 458 +4623 PF00975 Thioesterase Thioesterase domain Finn RD, Bateman A anon Pfam-B_180 (release 3.0) Family Peptide synthetases are involved in the non-ribosomal synthesis of peptide antibiotics. Next to the operons encoding these enzymes, in almost all cases, are genes that encode proteins that have similarity to the type II fatty acid thioesterases of vertebrates. There are also modules within the peptide synthetases that also share this similarity. With respect to antibiotic production, thioesterases are required for the addition of the last amino acid to the peptide antibiotic, thereby forming a cyclic antibiotic. Thioesterases (non-integrated) have molecular masses of 25-29 kDa. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.24 0.70 -4.13 38 5677 2012-10-03 11:45:05 2003-04-07 12:59:11 15 590 2120 48 1644 8115 689 222.20 18 14.88 CHANGED tpLhshP.uuG.usphapsluptlss....hshtslphsuct.......hpshpplscthhstlpphtscs...PaslhGaShGuhlAaElAppLppp...GhtspsLhlhst.sPhh.sttttstthspst...............hhsthpchsth...st.hhtspphhtthlssltushpshtsht.............thshpt...........................hthhhtssc.hsshs....spt.............Wpphsstshp..hchhsGsHFhlhp...t....plhptlpptl .......................................................................Lash......s..u..u...G..sshta...t.s..l....sp....t....Lssp.......hslh..u..l..p.....h.suht.............tt.............h....p....s.....l.....p........p...h......s....p....t......h.......h...p...t...l..........t..........p.....h...........t...........s.....p............s........P............a............h.............L.h.Ga.S.h.Guh..lA..a-...h.A.........p...p....Lppp.........Gp.p.......l...t.tL..h...l...h.....-....s.....h...s.....P..t...h.....t........t...t...t.......t...t...h.ttt.t....................................................................hhttht..p.ht.th...........s.t..h....h.t..p....t....t...h..h.......t.....h..h....h...s..h....h...t.....s...s..h.t...h..h..t.ph...t.........................................................................................h.hh.h.h...s.t....t.....s............s....h..t.............t...............................................................................W.t.t.h.h.t....t.t.ht.......hh..h...s.u.s..H.h.hht....................h................................................................................................................................................................................ 0 397 893 1351 +4624 PF01289 Thiol_cytolysin Thiol-activated cytolysin Finn RD, Bateman A anon Prostie Family \N 19.60 19.60 20.70 20.50 19.10 18.80 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.59 0.70 -6.12 5 995 2012-10-01 20:08:01 2003-04-07 12:59:11 14 5 457 12 47 624 8 335.40 41 87.32 CHANGED ss-pIDcaItGLNYNKNcVLsapGEuIcNhsPKEGhKcuscFIVVERKKKSINTNsuDISVlsSloSRTYPGALllANc-LlENQPDlLsVKRcPlTlSIDLPGMsNpDNpIsVpNPTpSNVssAVNsLVs+Ws-KYSpsa.NlPARlQY--pMAYScSQLpAKFGssFKslNNSLsIDFsAIScGEKQlpVluFKQIYYTVSVNtPsNPSDlFsKSVThE-LQp+GVSAEsPPlYISSVAYGRsVYVKLEToSKSs-VcAAF-AAlKGsSVpussEacsIl-NSSFKAVILGGDAp-cscVVTGDlsslRDlIK-GAsFo+KNPGlPISYTTsFLKDNplAsV+NNTEYIETTSTAYocGKINLDHSGAYVAQFpIoWDElSYDscGpEVVT+KsWDcNs+DKoAHFTTSIPLPGNARNIsIaARECTGLAWEWWRTVlD-RNLPLVKcRNVSIWGTTLYPpsSsTV ....................................................................................................................................................................................................................t.hasuul..ssp.h....ps...t.l.....h.t.ht..phphs.Lsu......s.h..h.p...ss.ushttu.lsplltph..t.t.....t.......sphpYp...s.o.ppl......th..s..ssh.....p....h.......ht..s.sltlsFt....ul.....tp.Gchp..........l.......hsaKQI...a...YsVss..s...sp.pPuch..FtpsVThcpLpthGlsspt..P..salSsVuYGRplYlKLp.Ts....S...+....S....p..c........Vp....A........A....FcAh...l...p........s....h....p....l..t..s....p....s....p..h....p..pI..h....c....pophpAVlhGGssp.ptsp....ll...s.t.p.......hs..l.c...sllp..csup.a.stc..pPuhP.IuYTosFL+DN.....hAs..h.pN.s..o-Y..l.ETps.p.tYpsucl.slDHSGAYVApa.IoW-ElsaDppGpEllp.+sW-tNspshTAtFspsI.l.tN........hRNlplhhccpTGLhWphWpTlh-ppsl.h..phplohhGTTL.sphp................................................................. 0 13 36 41 +4625 PF00108 Thiolase_N thiolase; Thiolase, N-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Thiolase is reported to be structurally related to beta-ketoacyl synthase (Pfam:PF00109), and also chalcone synthase. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.71 0.70 -5.41 22 14120 2012-10-02 12:25:54 2003-04-07 12:59:11 18 36 3652 158 4432 24361 7473 245.80 34 62.42 CHANGED hcsVVIVuAsRTPlG.uatGuhpshsAs-Luuhslcuslc+ss..lssp..pVs-lIhGsVLpuut.GpssARpAuLtAGlPpplPAhTlN+lCuSGLpAlshuuQtItsGpu-lllAGGhEsMSpsPahh.....+sGh+hG.sh.hhD.hl..............hDuLhsshsthhMGlTAENlAcpasISRcpQDpFAlpSppKAttAhpsGpFpsEIlPVpl....p+Gc...hslspDEt.R.ssThEsLupL+PAFpKt.GTVTAGNAStlsDGAAAlllMocs ......................................................................................ppssIls.uhRTP......h..........G....p.......h........t......G.........uh.....p..s...h..p..s.s-...L...uu..................hsl...c...s...h...lp.R...........s..................s.....l.s.s.p.................tl...-....-....V....l.........h..............G....s......V...h..p......s...u..........p.......G...t....s....h...A..R.p...u.......u....l......t...A.......G...........l..P............s......l........s...u...hT......l......N..+h...CuS.Gh.pA............lth...AuptIt....s.......G.....p.s.-....ls.l..A.GGs...E....s..M...S..p..s.P..h.hh...............p....h.s.....h..p...h.....s.....p...............s.t..hh....................................s..s....h....t...s....h.....h.....s....t......h......t......M...G.......o..A..E..p.........l...A..........c..p.....a........s.............l..oR....-....p.Q............Dta.A......h.pSp.p.+.................Att..A..h..p..s.G....h.F..........p.....p.E..I..l..Pl...ph...............tp....G....t......................hhhs..p.....D........Et...R......s.........t.........oo.........h..E.t.........L.up.L.........+..P...s.....F.....p..........t......t......................G.................o..........V......................T.A...............GN.uSslsDG.AuAlllhoc............................................................................................................................. 0 1259 2619 3654 +4626 PF02803 Thiolase_C thiolase_C; Thiolase, C-terminal domain Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain Thiolase is reported to be structurally related to beta-ketoacyl synthase (Pfam:PF00109), and also chalcone synthase. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.41 0.71 -4.69 22 14316 2012-10-02 12:25:54 2003-04-07 12:59:11 13 37 3621 158 4583 11836 4437 125.10 38 31.49 CHANGED lpPLA+IhuaussGVsP.plMGhGPs.Asp+sLc+AGls.lsDlDlhEhNEAFAuQsLust+plshD..................pKVNlpGGAIALGHPlGsSGARIlsTLlpphpccs.uphGlAohCIGGGtGsAhllE+ .................................................h.pPhAcl..h..uhussGs.........-........P....phM...s..h..G.P..s......A.s...p.......+.sLc+.....u.G.......ls....l....s.D....l....DlhE...........lNE....AFA.....u.......Q.sL.us.h...+...p...L.....u......l...s..t.............................................p..+..l..Nl.sGGAIA.lGHPlGAo.......G.......ARlls.oLlpp.......L.......c.......+............c..............s.......s.............p.............h..........G......l......s......o.....h....C..l..G..GG.GhAhllE......................................... 0 1267 2699 3788 +4627 PF00085 Thioredoxin thiored; Thioredoxin Sonnhammer ELL, Eddy SR anon Prosite Domain Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond. Some members with only the active site are not separated from the noise. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.57 0.72 -4.35 50 24231 2012-10-03 14:45:55 2003-04-07 12:59:11 15 236 5259 369 9501 25842 8046 102.30 23 50.59 CHANGED slhhlssps..Fcphltp....s...c.VlV-FaAsWCG.CKtlAPha-clApchpt.....slhlsKlDss.cs..tsluscasVcuhPTlhha+sGptss..chsGu.hstsslhphlppp .................................................h...hstts.a..p.p.....l....t.p............p...........t..s.l..ll........c.....F..a.....A....s......W..........C...u......s....C.....+....t...l......s......P......h....h.......c...c...l...u...p...c...hpt...........................pl...p..h..s....+.........l.......-...........s...........-........p......p........................p..............l......s.................p.....p......a......s..........l......p........u......l......P.T...l..h......h....a.......c.......s......G....p.......ht...........ph........Gs....hs.tp.tl.tphlt..t...................................................................................... 0 3323 5599 7830 +4628 PF00585 Thr_dehydrat_C Thr_dehydratase_C; C-terminal regulatory domain of Threonine dehydratase Bateman A anon Bateman A Domain Threonine dehydratases Pfam:PF00291 all contain a carboxy terminal region. This region may have a regulatory role. Some members contain two copies of this region. This family is homologous to the Pfam:PF01842 domain. 20.80 20.80 21.20 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.83 0.72 -4.39 16 4280 2012-10-02 00:29:19 2003-04-07 12:59:11 13 7 2590 4 1023 2991 981 90.50 33 30.33 CHANGED EtstlttspEtlhshphPEcsGuLh+Flpslush.s.IThFcYRtpss.cpuplLsGlplsptp-..s-hhpplpchuassh-hosscshphaL ................ERs.htptpEtlaslpFPEcPGALhcFl.p.tl....u....s...c........s..IThFpY.R.......p......p........s......s....c.......t......u..........plllGl..p....l.....s.....p.....s...p-...htplhp.pL..p..p.h...G..........a.shhDlocsphh+hal........................................... 0 240 560 825 +4629 PF04163 Tht1 Tht1-like nuclear fusion protein Wood V, Finn RD anon Pfam-B_64620 (release 7.3); Family \N 21.00 21.00 21.10 21.00 20.10 20.90 hmmbuild -o /dev/null HMM SEED 544 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -13.13 0.70 -6.11 2 96 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 92 0 70 102 1 333.90 18 73.00 CHANGED .hF..hR...hha.FhIhIpFh.oSE.hGclpshhphp.lhao-s.suhssLtpha...hhKSTChp-shthhlspC.lhNs..oID..sRlcoAIpholC-FptStl..hPppChh...GS..-Ch.cLESoSpWWhoaoupapclsplCh.s.L.hpKE..lcl.hNlT.l.cpF...l-hhl.HLh.Fp..pDp.N.hlDchsthF.N.s.E.p.ts.s.Rl..E..sLs.hpN.....hVp.c.lhpT.cQLcspIh-hNS.hhN...Essshp.plhpcLs-chs.cNsIt.StIschps-..s.hpc....LlphopD.lpp.hppN.chVNo....L.shohtLKcpLtuhp+..SEpQ.hpt.plLQhhsshLpsSh...hs.p.l..hhshhpphhp.ahthhSsl.usFAhhs.plFuThssl..chLphp++.l.sh.plpl.hlHh.shhhh.hV..ahWhT.phIhR.hualtlp.+haaL...LCull.al.a.KYRsS+hssch...lPh.c....Ya.pahtl..s.YLsshpsSLIDh ...................................................................................................h....................................................................................Chp.Aht...hh.pC..h.....pu.....p..s....l..s.............tphp..s.....AhpLulCEhpsus.h..phPppC.s..............................t.p.h.ts...ClpsLEu.ssQaWToYSushpphsshC..ptsphshEK....-phLpla...pslTclhpph..........h.pt.h...p.p.hh........p...p.p.p.pt.p.....h....h...ppltthh.p..............p..............t...p....t.....p...h....t..............h........htt..........................h..p.............t.....h.t............................................................h....................................................................................................................................................................................................................................................................................................................................................................t........................................................................................................................................................................................................................................................................................... 0 17 39 61 +4630 PF02926 THUMP THUMP domain Aravind L anon Aravind L Domain The THUMP domain is named after after thiouridine synthases, methylases and PSUSs [1]. The THUMP domain consists of about 110 amino acid residues. The structure of ThiI reveals that the THUMP has a fold unlike that of previously characterised RNA-binding domains [2]. It is predicted that this domain is an RNA-binding domain The THUMP domain probably functions by delivering a variety of RNA modification enzymes to their targets [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.63 0.71 -4.31 48 6107 2009-01-15 18:05:59 2003-04-07 12:59:11 12 17 3468 22 1490 4208 349 137.50 18 30.26 CHANGED pclpphh.t.thphphh.tthttlhlhhst.....ttptltphhthh...........hl.ph..h...........hts.tsh-clhctstplhpcphht........oFtVcscptspp................phsuh-lpctlGphlhcphs........pVcLppPDllltlElhpp.pshltlpp ...................................................................................................................................hh..........................t.h.l..t...t............shptlhc.....shhhlh................................shp+.lh..s.........................tphsh.p.....s..h..c.-.la.p.t.s.h..s...l.sh...pph.hsht........................TF.sV....p.....scptscph...............................pass.hclpc.hlssulhcpht..............................pspVsh...p.s...P.....D.....lp...lplpl.pc-..tshl.h...................................... 1 431 822 1180 +4631 PF02511 Thy1 Thymidylate synthase complementing protein Mian N, Bateman A anon Pfam-B_1648 (release 5.4) Family Thymidylate synthase complementing protein (Thy1) complements the thymidine growth requirement of the organisms in which it is found, but shows no homology to thymidylate synthase. 20.10 20.10 20.40 20.30 19.60 19.90 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.35 0.71 -4.94 163 1591 2009-09-11 08:36:29 2003-04-07 12:59:11 10 9 1413 90 459 1297 3344 199.20 24 79.78 CHANGED Vp...........Llsh....................s........s....s-phlstAARssass...........tph.st................................................tcspthl.chlhc..puH.....hSshEass....hoFtlc.soRuls+QllRHRh.hSa.sphSpRYs.......................................................p.hpph..t.........ahl.P......phpp.s....................htp...hhpcthppuh..................................ctYpcll.pt..........................u......hup....EtARhlLP..ustTclhhohNhRsLh+.FlpLRss........ppAQh.EIRplAtthhc.hlpc...hhPhl ......................................................................................................ss.thh..AuRhsats.......................ph.t..........................ppspthl.chl.hc......htH..............tSs..hEass....hoFtlp.lS+sst+..pl..hR......HRh..sSh.......s..phSp..RYs...................................................................p.hpp......p....................ahl.P................phpp.t...............................................tht.chhpp..sh..ppuh...................................csYppll...pt................................................................................s.hup......EhA.RhlL.P..tu.hpTchhhohNhRs.Lhp...Flp..LR.s.s........pc.A...ph...EIRp.lAtthhch.ltphhP..h................................................ 0 198 347 413 +4632 PF00303 Thymidylat_synt thymidylat_synt; Thymidylate synthase Finn RD anon Prosite Domain Swiss:P28176 is not included as a member of this family, Although annotated as such there is no significant sequence similarity to other members. 20.60 20.60 21.20 20.70 20.30 20.00 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.71 0.70 -5.39 141 4583 2009-01-15 18:05:59 2003-04-07 12:59:11 14 12 4044 372 1007 3278 1831 266.00 44 92.72 CHANGED -ptYLcllccIl.......cpGpp+..........sDRTGsG...ThS.lFG..hphRac.Lpcs.......................FPLL.TTK...+Vah+ullpELLW.FlpGsTssphLp.....pp.sl+IW-t.u..............pph...............................................p.G-L.......GPlYGhQWRc...ass.............s..........................pt..lDQlpplIcpl.Ks........NPsSRRhllsA..........WNssDl.......spMALPPCHhhhQFaVs.............................c..G+...............................LSCpLYQRSsDhhLGlP.FNIASYuLLTcMlAphs.....G.....LcsG-FlHshGDsHlY..................................................pNHl-..tl.cpQLpR..pPpshPpLpl....p.....php.......sl.ca....ph-Dhp..l.sYpsas.tIc...................h.hAV .................................................................................................h.ptYl-Lhpc.lL.......cpGs..+..........................sD.RTGo.G.....ThS.l.FG..tQhRFs...LpcG.......................FPLl.TTK......+lhh+ullpELLW..FlcG.sT.......NlphLp...........cp.slpIWD.-WAscs........................................................................................................................................G-L.........Gs.V..YGh.QWRp...Wss....................G.............................................................................................................................pt.lD.Qlpp.llcpl...K..p........s.P.s.S...R.RhI.....lSA...................W.Nss.-l...................cpMALsP.CH.s..haQF..Y..Vs........................................c..G+..........................................................LSCpLYQRSsDlaLGlP...F.N.IASY.A.L.....Ls.pMl....Ap.s..............G.......L.c........sG-FlaohGDsHlY....................................................pN.Hh..-..p..s..c...Q..Ls............R............-P..t...s.h..P...pLhl.............s....csc.........................slaDa..............ph-Dhp....l.sY..csHs.tIK.hslAl...................................................................................................... 0 318 610 831 +4633 PF02223 Thymidylate_kin Thymidylate kinase Bateman A anon Pfam-B_484 (release 5.2) Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.27 0.71 -4.92 25 5133 2012-10-05 12:31:08 2003-04-07 12:59:11 12 17 4611 113 1395 4369 2991 185.50 31 82.85 CHANGED lEGlDGuGKTTpsphltctLpspuht..hhho+EPsuo.lGchlRphLh....ptphsshs...tslLFsAsRhpclt...phItssLppuphVIsDRahhSolAYQuh.tpt..hchlhplspsshh....pPDlslhL-.lsschulpRhpt+sp......ch.tcphchhpph+ptahcLsp..spphhllcAsps.lEplpppI ....................................................................................................hEGh-GuGKoTt....h....p....h..l....h....p.t.L........p....p.............p..........G............h.............p.......hl.......h..TR.E....P......G...G....o.......l......u......E....p.l...R.pllL...........pspp.h.ssps...............EhLLah..A.u..R..tpclt.............ph.I....t..P.....A...L....p.....p....G..p......h..V.........l......s............DR.....a.........h.....s......S.o........hA........Y.............Q........G......s........u........R......s.................l.............s.............h..........p..........h.........l.....t............p.....l........p.....p........h.....s..........hts........hpP............D.L..T....laLD..lss.-...h....u....h....p....R....ht.t.psp.....................s...Rh-.p...E..s....h....c.....F....a...p...+....l....+p.t...Y...h....p........l...s....p.........t.....s..p.......c..........h.......h..h..l...D.A.....s....ps..h-pVhp.................................................................................. 0 460 873 1164 +4634 PF01290 Thymosin Thymosin beta-4 family Finn RD, Bateman A anon Prosite Family \N 21.40 21.40 22.30 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -8.06 0.72 -4.36 25 536 2009-01-15 18:05:59 2003-04-07 12:59:11 15 14 139 15 223 588 0 39.70 49 73.45 CHANGED uDcPchsE.....lssFDKscLKKTETpEKNsLPTKEs.I-QEKptp ....................thsE......lEpFDpsKLK+TETpEKNsLPoKEsIcpEKpt......... 0 80 94 160 +4635 PF00086 Thyroglobulin_1 thyroglobulin_1; Thyroglobulin type-1 repeat Bateman A, Sonnhammer ELL anon Swissprot_feature_table Domain Thyroglobulin type 1 repeats are thought to be involved in the control of proteolytic degradation [2]. The domain usually contains six conserved cysteines. These form three disulphide bridges. Cysteines 1 pairs with 2, 3 with 4 and 5 with 6. 20.60 20.60 21.30 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.01 0.72 -4.62 20 2274 2009-01-15 18:05:59 2003-04-07 12:59:11 13 161 191 10 1102 2134 3 67.10 28 20.34 CHANGED Cphphtph...h............tpsspshYlPsCc.ccGpYpshQCp.....ppG.CWCVDst.GpclsGo.ptpGc..spC .......................................................................ttthtsha...l.P..p.Cc..cpG..t..ap.s............h.Q.Cc.............................psG..hCWCVDtp..G.ppl.s.G...o...p.h.t..sp..spC................................ 0 270 360 665 +4636 PF04278 Tic22 Tic22-like family TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The preprotein translocation at the inner envelope membrane of chloroplasts so far involves five proteins: Tic110, Tic55, Tic40, Tic22 (this family) and Tic20. The molecular function of these proteins has not yet been established [1]. 20.10 20.10 20.50 20.60 20.00 19.80 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.49 0.70 -5.14 13 173 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 89 1 101 185 2 211.40 21 70.51 CHANGED hp.s...............................t..t..psh.R.shsh.......hhhsshAhuu.hsstt.........t.AhuLot..........c-Vhc+LsuVPVFTlssosspPlLsospst....spSluhlalsppDAcsh.Lspl+pppP................-h.upsspVssloLuplYclspt.tt.........csptlsFpFlP-spQlcsAhplhcpsGpph.spFpGVPlFhucu..........LslppcscphhPhFFsK..........EDLpttLc+hppppsclssuhp...IcVssLEsllcshcsuc.ssth-cllFIPPtcul-ah.pphpp ...................................................................................................h..h..........................................................................................shs...........tpl.phLtslPVahlsstps....t..l.....hht..p..................ttls.h...hhh..ptp-Apth.lt.....p.lpt.....tp..............................h..ttthpVhslsL..sps.Y.ph..............................pspsl.hFpalPs.ppl.......psAh.......pl.hpp..p......spt...pt...........ht..G......V....P.lF.scs...............L..sl..p...p.t....s.....c.p........hhPhFFpK...................pslpp.lpph.ppp.p.....t..........t.p....I...tVhslpslltthp.p.....p..pt.hpphhhlPst...ht........t........................................ 0 33 73 93 +4637 PF01826 TIL Trypsin Inhibitor like cysteine rich domain Bateman A anon Bateman A Domain This family contains trypsin inhibitors as well as a domain found in many extracellular proteins. The domain typically contains ten cysteine residues that form five disulphide bonds. The cysteine residues that form the disulphide bonds are 1-7, 2-6, 3-5, 4-10 and 8-9. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.41 0.72 -3.85 167 3970 2009-09-12 21:20:27 2003-04-07 12:59:11 12 461 156 15 2435 3452 1 56.00 32 11.06 CHANGED C.s..ssppap.pC..ss..sC..toCsshps........Cs.........tt..ChtG..C....h....C..psG.al..hsss......pCV.hscC ..............................C.s..sspt.a...p...tC......ss...........sC..st..oCpshsss.................Cs...................pt..C..h...pG..C......h........C..............ss..G...hl.........hsss........pCVshppC............................. 0 585 787 1646 +4639 PF00121 TIM Triosephosphate isomerase Sonnhammer ELL anon Prosite Domain \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.63 0.70 -5.31 56 7470 2012-10-03 05:58:16 2003-04-07 12:59:11 13 10 5835 333 1528 5657 2745 212.10 40 91.49 CHANGED hhlsuNWKMst...shpcspshhpp.hstthts......slc...lhlsPshshLps.spphlp.......ssplt..lGAQNsphps.p.GAaTGElSsphLpDhGspaVllGHSERRphat...EosphlspKhttulcpGLpsl.lClGEolpp...+csspohplltpQlpshlptlstp....t....sllIAYEPVWAIGT.G+sAosppspcsathIRphltph...stphupplpIlYGGSVsssNspclhppsslDGhLlGuASLcs-sFhsIl ...................................................hlhGNWKhNt...sht..t.stth...hpt...htthhss..............ts-......lsl.ssPhhhLst...s.t..ph.hp.......................s.s.p...lt......luAQ.....Ns............hh......pt......s......GAaTGElS.stMLp-hG.spaVllGHSERRp..h..a...t...............EoD....Eh.....lscKsttAh.pp.GLps.IlClGE.......oL.-p............RE.u.....G..p.......o...-..lltpQlcusltslstp...t.....hp..................plVIA.YEPl.WAIGT.Gc.s.A.o.s.ppApphpt.hlRphltt..........stthup.p..ht.l.YG.GSVpstNst-lhs.....p..s.cl....DGhLlGGASLcsptFhtl....................................................................... 0 528 988 1298 +4640 PF02466 Tim17 Tim17/Tim22/Tim23/Pmp24 family Mian N, Bateman A anon Pfam-B_2241 (release 5.4) & Pfam-B_7792 (Release 8.0) Family The pre-protein translocase of the mitochondrial outer membrane (Tom) allows the import of pre-proteins from the cytoplasm. Tom forms a complex with a number of proteins, including Tim17. Tim17 and Tim23 are thought to form the translocation channel of the inner membrane. This family includes Tim17, Tim22 and Tim23. This family also includes Pmp24 a peroxisomal protein. The involvement of this domain in the targeting of PMP24 remains to be proved. PMP24 was known as Pmp27 in [3]. 29.70 29.70 29.70 29.70 29.60 29.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.81 0.71 -4.09 129 1856 2009-09-11 16:05:24 2003-04-07 12:59:11 14 27 332 0 1255 1746 12 125.60 21 61.48 CHANGED ssstchhhss..stuathGslhGuhhsh............................................................hhthh.pssshp...phptslpsstp...s.......hutshushuhlassh........csslpt.h.R.u...+....cDhhNulhAGshoGu.lhut.....ps....Ghpshshu.uhshushuus.....hpth......tthh .............................................................................................phh.ss.stuhhhGshhGsh.hthh...................................................................................................................th....psss..hp......php.t.s.hps..htp....tu........hu.ts...hushuslaush.............psslpp..h...R..s....................K.........cD.hNulhAGshTGu..lhuh...............ps.......Ghpsh.hh...u.u.hh.h.us.ht.uhhphh....h........................................ 0 381 672 999 +4641 PF04821 TIMELESS Timeless protein Mifsud W anon Pfam-B_3454 (release 7.6) Family The timeless gene in Drosophila melanogaster and its homologues in a number of other insects and mammals (including human) are involved in circadian rhythm control [1]. This family includes a related proteins from a number of fungal species. 25.00 25.00 26.50 26.40 24.50 23.50 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.53 0.70 -5.14 23 381 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 261 0 242 392 4 240.50 26 24.27 CHANGED -sssYtlu-DALtsL+DLh+aL+h.D-php..-ltRsLupuplVpsDLlslLspahp..........cs+ls.AslcLLVsLThPlEh....cs..p....ssst++ph..Lpph..uYKcAhhs...pplhpsllc.hhhsshthshu-RopcDpslIchlLhLlRNILtIssssptptcsD........-cpss+sphl.AhppQslhcLlLhlsSs..tppash.....hlLEIlhhhh+s....sspcLhtsstptotpc....sc-Lpshhc+Epu.p.....+thsp.sssoRHSRFGshhhVp .................................thYhhts-sL...tsl+Dlh+hl+h.Dppt...sltptlutsp.llpsDLlP.lLhphtp.........................................pppl.h.ss.lclhV.LT..Ph.h...........tt......................s.t...p..pph...pl.ph..sYKc.uhhp............................tphhtslhc.hhh...hl......t..h.s.......h.......tcR......stc-.........phllchlLhLlRNlLtl...st.s.....t..t......phpsc..................................psts....pspll.shp.p.sl.cLLLhlso...s............p....p....p...ash........hllEIlhhlh+s..............ssp..pL...h...t......s.t.........p.t.........s..h......p..c.....................................................t........t.............p..............L.pshh...p.pE.tt..p.....................pthht..p.ssRHucFGs.h........................................................................... 0 90 138 205 +4642 PF05029 TIMELESS_C Timeless protein C terminal region Moxon SJ anon Pfam-B_5695 (release 7.6) Family The timeless (tim) gene is essential for circadian function in Drosophila. Putative homologues of Drosophila tim have been identified in both mice and humans (mTim and hTIM, respectively). Mammalian TIM is not the true orthologue of Drosophila TIM, but is the likely orthologue of a fly gene, timeout (also called tim-2) [1]. mTim has been shown to be essential for embryonic development, but does not have substantiated circadian function[2]. Some family members contain a SANT domain in this region. 29.40 29.40 30.00 29.90 28.30 28.90 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.88 0.70 -5.58 5 232 2009-09-11 11:39:37 2003-04-07 12:59:11 8 10 182 0 164 239 1 364.30 22 35.30 CHANGED hLHRIAaDLcMsuLLFQLSLFsLFp+lLS-Puussp.....KELcpls...+allRKFlclAssNPKlFVELLFaKssuss+ElspGYsst-stosSKRA.....sWosEEEupLc-Lalsspcl...EspDVV-WILcsLssssRsR+sll++Lh-...hGLtDoscDhhpsKS.......A.ppKupphlLWT........................uDp-l.ELpcLa.......--aRDssDlLs+l.............hcshsu+RS+AclVcK...LLsLGLlo-+RpLp.hKK+RKKpusu.h.sst.p.cs....htE-...E-p...hPcpcsccsccch..utsspsssslcs.pLtppl+pEshphsLhWLQssLhcAA-DcEED.tssQulPLlPLpEcsEEAMENcQFQ+lL+pLGlRsPt...SGpEuaWRIPhhLssspLhhhut.Lut.E..tE.c...EhpschsGEp.u-..Ep...Eccsphhct..pARpRKtGlsoscpc.ths.........cp..+usPKtpph............................................pttptppht........uo-u-pEc-D..hDE......sp.hE.psp-ts..s+p.ttsts-h...sshs.tp..c.s.hssDs-pAD-.usPch+c-KRh...............................tpls.tP.pRR+Lt...llEcDDE ..........................................hhaRlA..achchpshLFplslhplF.ch........lps......t...shs.........+Eh.phs....phll++hhph.h.tp.......p..s.....tlhsEl......LF.K..ts.sss.h.lp.Gatp......s....s.t.pc.t.................hhstt.tptp..p.plh....h.hts.....t.tt.-llphlht.Lss.s.ppR+t.....h......tt.....t.............uh......t..........h.......................................tpt......lhp..................................................................t....thp..lh...........tt.t....p.lt...............................hp......tt.......sp.t..p............hh..s........h..tt...tp.h...t...ttttt....t.............................................................................................................................t................t.......................h..h....tt............p...........s.t..............t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ttt.t..................................................................................................................................................................... 0 52 85 130 +4643 PF00965 TIMP Tissue inhibitor of metalloproteinase Bateman A anon Pfam-B_1239 (release 3.0) Family Members of this family are common in extracellular regions of vertebrate species 21.40 21.40 21.50 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.73 0.71 -4.68 25 416 2012-10-01 21:39:20 2003-04-07 12:59:11 12 6 133 18 187 403 1 147.00 35 78.34 CHANGED -ACSCs.P.sHPQpsFCsuDl...........VIRAKl....Vucp.hpsss...........thhpYclKphKhaKGhsp.....clphlaTssspul............CGlp.L-sN.tcpYLIoG..+h..DGchal..slCsalp.W-sLohoQ++GLsptYptGCs.C+Is........ChshPCh......lousscCLWTDh.hhptsh.upQucHhsC.lpcssupCuW ..................................................uCo.C......cP.Qp......taCp.uch...........Vl+uK..h...........lupp.stt.ss..............................hhhpYplK.hKhaKG.pp...........clph.....laT.s..sspul............CGl.p..Lcss..........pppYLls.G...ph....sGc.hal..slC.s..al....Wsp..Loh...sQ++....ul.....stpYp...hG...Cp....C.plh.............C.........h..sC.............hpu.scClWhsh....h.........p....h.t.............uhpuphhsC..h.p......s.CsW...................................... 1 51 65 110 +4645 PF03549 Tir_receptor_M Tir_receptor; Translocated intimin receptor (Tir) intimin-binding domain Griffiths-Jones SR, Moxon SJ anon PRINTS Domain Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation [1]. This family represents the Tir intimin-binding domain (Tir IBD) which is needed to bind intimin and support the predicted topology for Tir, with both N- and C-terminal regions in the mammalian cell cytosol [2]. 25.00 25.00 121.40 120.10 24.40 21.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.18 0.72 -3.85 3 132 2009-09-10 22:06:29 2003-04-07 12:59:11 9 2 104 8 1 79 0 66.00 83 12.33 CHANGED PDsAASsAEoATKDQLTQEAFQNPDNQKVNIDENGNAIPSGELKDDVVAQIAEQAKAAGEpARQQA PDAAAsAsEoAT+DQLTKEAFQNPDNQKVNIDE.GNAIPSG.LKDDVVApItEQAKAAGEpA+QQA 0 0 0 1 +4646 PF04553 Tis11B_N Tis11B_C; Tis11B like protein, N terminus Kerrison ND, Mistry J anon DOMO:DM04398; Family Members of this family always contain a tandem repeat of CCCH zinc fingers Pfam:PF00642. Tis11B, Tis11D and their homologues are thought to be regulatory proteins involved in the response to growth factors. The function of the N terminus is unknown. 19.10 19.10 22.60 21.20 17.90 15.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.80 0.72 -3.88 6 107 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 40 0 47 82 0 114.70 52 31.66 CHANGED MsTsVlSs.hFDh.-VhsptNKhhsas....s.......PSsus.........sLhDRKsVGTPuhsh.F.RRHSVTsss..............uKF..sQNQhlNs..............LKh-PS.....s.sTuhsNKEN+FRDRSFSEsGER...LL.....ppppPGG ............................MoTTLlSA.haDl..-hLsKs.......p.K.Ls...................s.shs..........shLD+KAVGTP.....suu.........G.......F.RRHSso..p.................sKF..ptNp.hh....uS.....................LK..E.Pu...........sAL..s+E...s+FRDRSFSEsGER...LL.....pQpQ.Gu.......................... 1 4 9 19 +4647 PF01108 Tissue_fac Tissue factor Finn RD, Bateman A anon Prosite Family This family is found in metazoa, and is very similar to the fibronectin type III domain. The family is found in cytokine receptors, interleukin and interferon receptors and coagulation factor III proteins. It occurs multiple times, as does fn3, family Pfam:PF00041. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.52 0.72 -4.31 59 788 2012-10-03 16:25:20 2003-04-07 12:59:11 12 11 75 71 325 782 5 101.60 24 26.72 CHANGED hhhhLlshlLhh..........ssssssslssPp......slphpShNFcplLpWcP.....sts.ssssYoVpap.......htpp..........................pWpsh........Ctphop.ppCDLTsthts....hptsYhsRVpuhsssppSsh ......................................hh.............................stlssP.p......slp.hp.S.....h.N.hc..p.l.L.pWps...........ts....s..tss.sYoVpap............hspp.................................pWpsh.........tC.tphs.p..ppCDlos.thts....hppsY..hsRV+AhhusppS........................................................................................................ 0 20 41 128 +4648 PF00265 TK Thymidine kinase Finn RD anon Prosite Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.29 0.71 -4.71 13 3293 2012-10-05 12:31:08 2003-04-07 12:59:11 13 11 3022 53 635 2063 1299 177.10 38 89.01 CHANGED GpIplIhGPMFSGKSTELlRplpRYpluph+ClllKashDsRauss.....VhoHDshphsAh....sostLp-lhsthpp..hcllGIDEuQFFs..-llpFs-phANtGKhVIlAuLcusFp+c.FssIhpLlPluEpVsKLpAlC.hCa+-.ASFotRLs......sEsplhlIGGs-pYlSVCRpCY ...........................................................................................pl.hhh.GsM.uGKo...p........pLLpt....s.....hp.a..p.............t.....s.hp..s.l....l...h.pssl.D.s...R..hutu................pVs..SR..hGhptpAh....................ss.s...h...h....p....c....l.....h..p.....p......h.........p..p.......p....h.....c...slhlDEsQF.....h.....s...pp.......l.h.p.l.s.c.l.l.s...p...h.s.l..sV..lsa......GLcsDF.+s-hFt.u.othL.L.s..h..A.D..c..ls..c..l..+..sI..C.....h..CG.....+..c.....AohshRlsps...............shh-ucQ.....l.....h...I...G.......G.....sE.........p......YhslCR+Ha............................................................ 1 226 409 532 +4649 PF00693 Herpes_TK TK_herpes; Thymidine kinase from herpesvirus Bateman A anon Pfam-B_186 (release 2.1) Family \N 25.00 25.00 26.90 34.60 22.40 19.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.94 0.70 -5.24 14 508 2012-10-05 12:31:08 2003-04-07 12:59:11 13 2 87 61 0 548 0 249.80 52 70.99 CHANGED GsaGlGKTTTucslsstssst.s.hLahPEPMsYWRshFs.oDslsuIhssQsR+ppGplust........cAshlsAphQupassPYhlhattlsshh...uuphs.s....s.P....slTLlhDRHPlAuslCaPhARYllG-hohpsLluhlAslP.EsPGsNLVVsoLs..sEHhcRLtsRuRPGEplDhshlsALRNVYuhLsNTspaLpsGss.....WRcsWspLshhssshhtphsp.sshstp...t.PsLpDTLFAhhKs.ELhsspGs.LhslHAWsLDuLss+LpsLpVFslDls .....GPHGhGKTTTophLsuhuu.R..DcIVYVPEPMTYWpVLGA..SETIuNIYsTQHRLDpGEISAG........DAAVVMTSAQITMuhPYAlTDAVLAPHI...GGEusuS..pAPsP...uLTLlFDRHPIAALLCYPAARYLhGuMTspAlLuhluhIPss..hPGTNIVLusLP.EccHlcRLu+RpRPG.E.+.lDL..uMLsAlRpVYthLsNTl+YLQtGup......WR-DWGpLsu..sAh..phscsps....s...t.PcIt-TLFslF+sPELhsssG-.LhplaAWsLDsLAp+LpshplFhLDh.c.................................................................................................... 0 0 0 0 +4650 PF03219 TLC TLC ATP/ADP transporter Mifsud W anon Pfam-B_2261 (release 6.5) Family \N 19.30 19.30 20.20 19.30 19.10 19.20 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.75 0.70 -6.02 27 560 2012-10-03 03:33:39 2003-04-07 12:59:11 9 8 194 0 186 483 158 424.20 32 84.80 CHANGED tsptppthuc...........h+thhWPlcthElpKhlPhslhhFhILFNYslLRshKDolVlTuh...GAElIsFLKlWsllPuAllhhllYsKLuNhhopEplFYhllssFLsFFhlFuallYPhp-hlHPss.s.phhshl.sshphhlshhtpWoaulFYlhuELWuolVloLLFWtFANpITplpEAKRFYsLaGlhuNhuLlluGphlhahus.........hhpths.sl...csathplphlhslllluGllhlhlahhls+.V.........Lsssph...........sshppt.+KpKsKholt-ShphlhpS+YlhhIAlLVluYGlsINLVEssWKupl+plYPss.......s-YssFMGphsshTGlsollhh.hlGusllR+hGWthuAllTPlllllTGlhFFuhllFpcpls.hhush.GhsPLhlAVhlGuhQNlhSKusKYSLFDsTKEMAYIPLDpE.KsKGKAAlDVlGu+lGKSGGulIQphlhllh...uohss.sPaluslhllllllWlhuVppLs+pa ...............................................................................................................................................................hpthhhslp...hEhpKhlPhshhhFhIhhN.shlRshKDslllst....uAEhlsFlKhahshP.u.hlhhll..YsKLs.s.h...hshct.lFahlhs.FlhFFslFuhll.a.P..t.....ch....hH.......ps...h..............p..............h..........h..............st...ht.h....hl...h...l...ht.pWoas..laYlhuELWsslhhslLF...WtFANpITpscEAKRFYslhulhuNhullhuG.hlhhhup........................hh..h...ts...ss.th.l....h..h.slllhsuh.lhhhha.....h.hl.s+..l............................lsp.h..................thhpt.cpp+.Kh.uhh...-Sh.phlh....pS+YlhhlAlll.lsYGls.......I.N....L.........VEs....sWK....upl....+....p..ha.P.ss................ppY...ssa..h.Gpa....hh..s.Gl...ss...l................hhh...hl.....u..s...sl...l.RphGWhhuAhlT...Pl.hh.h.....l..T..G......h.h.FF.....u.hlh..F....t......p...h.......s.....h....h.....ss.h.................h..h.......sP.Lh......l..A.lh....l...G....ul..Q..NllS.....KusKYoLFDsoKEMAYIPLDp-hKs.KGKAAlDVlus+lGK..SuGullQphhhh.lh....uohssh..sshlhhh.hhllhhhWlhusttLsppa...................................................................................................... 1 91 121 149 +4651 PF05017 TMP TMP repeat Bateman A anon Pfam-B_1012 (release 7.6) Repeat This short repeat consists of the motif WXXh where X can be any residue and h is a hydrophobic residue. The repeat is name TMP after its occurrence in the tape measure protein (TMP). Tape measure protein is a component of phage tail and probably forms a beta-helix. Truncated forms of TMP lead to shortened tail fibres [1]. This repeat is also found in non-phage proteins where it may play a structural role. 15.00 2.90 15.00 2.90 14.90 2.80 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.78 -6.01 0.78 -6.19 0.78 -2.30 95 345 2009-09-16 13:22:24 2003-04-07 12:59:11 9 15 44 0 65 324 23 10.90 35 10.45 CHANGED WssIpshhssh WsuI+shhos..... 0 54 56 56 +4652 PF02581 TMP-TENI Thiamine monophosphate synthase/TENI Mian N, Bateman A, Griffiths-Jones SR anon COGs Family Thiamine monophosphate synthase (TMP) (EC:2.5.1.3) catalyses the substitution of the pyrophosphate of 2-methyl-4-amino-5- hydroxymethylpyrimidine pyrophosphate by 4-methyl-5- (beta-hydroxyethyl)thiazole phosphate to yield thiamine phosphate [1]. This Pfam family also includes the regulatory protein TENI (Swiss:P25053). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.00 0.71 -5.16 89 5722 2012-10-03 05:58:16 2003-04-07 12:59:11 12 31 3919 62 1431 4669 2676 177.90 29 71.70 CHANGED lYhlTsst.t........tp...hhchlcpAlpu..........G.lshlQlRpK.....p....hss...pphhphApplttlsc..ph..ss.hllND..clclAht.....lsA.......DGVHlGQ...cDh.ssspsRplhusshllGlSspshp-...stpAt...ptus..DYluhGslasTsTKtss..pshGlptl.pphtpthp..........l....PllAIGGIshpslsplh........psGssulAVloAl .............................................................................................................................ahlss................tt...hhphl..cp.s.l..pu.....................G...ls..hlQ..lRpK..................................s......h.s.s......p..c.hh.p......h......stp......l.ttl.s.c........pa..........ss...ll..lN......D.....c.h....cl..Aht..........h.s.A...........c..G....V..Hl..GQ................c.D...h.........s.........s......s.......p......h....+......p....l....h...........s.......s................s.....h.......l.......l.G.lS..s.+..shp-.....htpAt..................t.t.ss......DY.....lu..l......G....s.l.a.s........T.s..o.......Kp...s........s...ss..hG......l....p..t...l...p.ph.t.p.t.hs..............................l..PlVAIGG..I..s..h..p..s.h..s.p.lh..............t.sG...s...sulAVlsul............................................................................................. 0 467 957 1229 +4653 PF00721 TMV_coat Virus coat protein (TMV like) Bateman A anon Pfam-B_746 (release 2.1) Domain This family contains coat proteins from tobamoviruses, hordeiviruses, Tobraviruses, Furoviruses and Potyviruses. 23.00 23.00 23.50 23.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.92 0.71 -4.29 47 557 2009-09-11 05:07:10 2003-04-07 12:59:11 16 2 125 27 6 518 0 140.10 31 54.13 CHANGED sYs...hss...pphhhhsssaschpshhshlpshpuspapspsuRstltstlusl.h......psssshspRFPss.s......h....hlhhtssslt.llssLhsuhc..ocsRhhEhppss....ssssups.s.....ssptssD..ush.sl+ssltplhstLsp.t....sshasp.....spFE ........................Ysh..hss...pphhhhsssascs.plhshhpsuhusphps...ppuRsslppphusl.h......pshsshssRFPss.s.........h....hVhhhssslcsllssLhsuhD..T+NRhhEscs.t.....sPssuEs.s.....sTpts-D......uol.Al+sslspLhstLhp.t.....pshaspspFE.................................... 0 1 1 4 +4654 PF00229 TNF TNF(Tumour Necrosis Factor) family Finn RD anon Prosite Domain \N 20.90 20.90 20.90 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.70 0.71 -4.57 52 1178 2012-10-01 20:41:10 2003-04-07 12:59:11 13 10 195 271 481 1150 10 120.10 23 49.88 CHANGED LpWppst...tshhtsGhphpssp..LllspsGlYalYoQlhFp........tsstt.................h.lshtlhhhssph....sp.hsLlpuhc.sssptts..........hhpslYhGulapLppGDclhlp..ss..p.shlchs..sspoaFGsatl ............................W.tt......h.h...p..shph.p..ssp....Lhl.psGlYalYuQVhFp........ts.tt............................t.h.lsttlhphssth......sp.ph...sL..hp.shp..ss..s.pt..ts...............hhpoh..ahG...G.lhpL.c.pGDcl.lp...ls......p.phlpht....tstoaFGhhhl............................................... 0 63 98 204 +4655 PF00020 TNFR_c6 TNFR/NGFR cysteine-rich region Sonnhammer ELL anon Swissprot_feature_table Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.07 0.72 -3.89 37 2327 2009-01-15 18:05:59 2003-04-07 12:59:11 13 54 156 88 889 2043 25 38.90 30 15.97 CHANGED Cpps..pYpc...ps...t.C..C.shCps..GphhhpsCst.spsT.hC ...................C.ps..pY...ps.......sh....tpCh.C..spCss....uph.hh..psCos...spso..hC.......... 0 184 248 442 +4656 PF01107 MP Tobamo_MP; Viral movement protein (MP) Finn RD, Bateman A anon Pfam-B_815 (release 3.0) & Pfam-B_1906 (release 4.1) Family This family includes a variety of movement proteins (MP)s. The MP is necessary for the initial cell-to-cell movement during the early stages of a viral infection. This movement is active, and it is known that the MP interacts with the plasmodesmata and possesses the ability to bind to RNA to achieve its role [1]. This family also includes consists of virus movement proteins from the caulimovirus family. It has been suggested in cauliflower mosaic virus that these proteins mediated viral movement by modifying plasmodesmata and forming tubules in the channel that can accommodate the virus particles [2] and references therein. The family contains a conserved DXR motif that is probably functionally important. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.83 0.71 -4.92 18 493 2009-01-15 18:05:59 2003-04-07 12:59:11 13 31 165 0 22 530 0 167.20 20 27.73 CHANGED hpplchs-...hlsLsps-phhs..shhpthKpshh++schlhsl............ppsculsplsLlsps.hp..........ctpchsal+luulhlslcsph.cshcsslplsLlDsRh.pstc-.uhlusacsshshtchtFplhPpYulShpD.slc+shplhsphcsls.MccGspPholcassshtloNSphslsh+phhssl ...............................................................................................................h..................p.h.............p.h......pp..hhhs..................tpspth..lsllpt........................ptpphsa..lHlusl.lshchh.h...p......sh.ssshtlsLhDsR.......h..tp...hc-...u....hlushpsshsps.p.hhh.p.h.hPsaslshpD.shppshplhlphcshp.hptGt..psholpht.hh.hhhs...s.h...........h....................................... 0 0 15 18 +4657 PF04052 TolB_N TolB amino-terminal domain Bateman A anon Bateman A Domain TolB is an essential periplasmic component of the tol-dependent translocation system. This function of this amino terminal domain is uncertain. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.14 0.72 -3.96 174 1688 2012-10-01 20:48:06 2003-04-07 12:59:11 8 19 1656 15 404 1275 1658 103.20 37 23.83 CHANGED lpIcIop.GsspshPIAlssFt...spsss........thspplupllssDLppSGhFpslstsshhppstp....sspspassWpslsupullsGpls.ttsss...hplpacLaDlhp..sppl .........................................lcI.Isp.GsssshP.IuVl.PFp........htuss............hPpcluslluuDL.cp.S.GpFsP.lspuphsppsss..........spplphss.W.p.u..l....G.h-A..lVsGpVs...s..scG..papVsapLhDs.tt.............................. 0 106 226 315 +4658 PF03349 Toluene_X Outer membrane protein transport protein (OMPP1/FadL/TodX) Mifsud W anon Pfam-B_3708 (release 6.5) Family This family includes TodX from Pseudomonas putida F1 Swiss:Q51971 and TbuX from Ralstonia pickettii PKO1 Swiss:Q9RBW8. These are membrane proteins of uncertain function that are involved in toluene catabolism. Related proteins involved in the degradation of similar aromatic hydrocarbons are also in this family, such as CymD Swiss:O33458. This family also includes FadL involved in translocation of long-chain fatty acids across the outer membrane. It is also a receptor for the bacteriophage T2. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.48 0.70 -6.11 22 2710 2012-10-03 17:14:37 2003-04-07 12:59:11 11 6 1716 33 577 2172 277 395.70 23 92.57 CHANGED sshssstshAuuh.hst.usuul.uRAhuGpuuhtc.ssushhpNPAshshhcp..sp..hphGhshlssc.lchpssssstph......................tstsssssshhlPphthlh....s-pauaGhulhsshGluocass.sahups..............huspocLtsls.lshusuY+Vs...cclSlGuulshsaspsplpp.......hhs...........................shsshsss...s.sssshhthts-shuhGaplGhhachsc.ssplGhsYpucs+h.chcGphshshssuhhs.h........................shsGplpl...plPsthpluhhHph.s-paslthshphstWSshpcl.........thtts.....tst..hshshs.sa+DshthulGssYphssphTlRuGhsYcpsshsspp.tsshlPsscppa.hShGhoYshs...ptslDhuauahhtccsshsp.s................sshshp.spssshlhulshshpF ........................................................................................................................................................................................s....ss.s.uu.uat.l.tp.......os.s....uh..up..A.hu.Gtus.h.s.s.s.A.u.sh...h..NPAhhshhcp.....sp.......hssuhs..h........l..pss.......h..p...h..p..s..s..s...s..s.t..t.t.................................................................................................psts..h..ss.s..t...h...l...P..s.h......a..h.s..h...l..........s-.p.......h...s..h.G.h.u..h.h..s...s..a.G..h.s.o...-....a...s..s.....s..hsu..t.............................................................................................hu.s.p..s..sl..p...s......hs......l....s.....sh..u...Y.....+...l..s.........p.phShGhGhsh..h.a..s.p.up.lpp............h.s..............................................................................................s..shs......sss...st.h.....s....p...h...p..us....s.hu.h..G..a..s..hGh..h.a..c..l........s...c....s......t..R..h....GlsY+S....clc....h...ch.c...G.s.hs....t..h.......s..h...t.................................................................................s.s.u..p.h....s.l.......slP..phhplus....hpp..l....ssp.h.......sl..phshpaTsWS..p..appl..................................ps.p.s.......................t....sh....h.p..t.p.t.......ta.c.D.sa....phu.lGs.....s..Y.......p.h...........s...c.......p.h...........s..hR..s........Gl..........ua..........Dpo..........shs..........s............p...............p.....t...............s...............h...............p.............l............P........c..............s....-......R.h.........h.............hohG....ss..Y..p.hs........ts....h....slD......h.uh..s.a.h.h.t.p.p.s..p.h.s.p.................................ssh..php...ps.p..s....hhuhphshtF........................................................................................... 0 153 342 479 +4659 PF04281 Tom22 Mitochondrial import receptor subunit Tom22 TIGRFAMs, Finn RD anon TIGRFAMs (release 2.0); Family The mitochondrial protein translocase family, which is responsible for movement of nuclear encoded pre-proteins into mitochondria, is very complex with at least 19 components. These proteins include several chaperone proteins, four proteins of the outer membrane translocase (Tom) import receptor, five proteins of the Tom channel complex, five proteins of the inner membrane translocase (Tim) and three "motor" proteins. This family represents the Tom22 proteins [1]. The N terminal region of Tom22 has been shown to have chaperone-like activity, and the C terminal region faces the intermembrane face [2]. 28.70 28.70 28.90 28.70 28.60 28.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.90 0.71 -4.64 30 269 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 232 0 191 247 0 117.40 30 76.41 CHANGED M.VcLTEVcDEshp........t..hsptpstttss.s-s--.o-s-SDh.sD..-.D........-sETlh-RlsALKDIlPPppRppIusthusssohh+oshshuGphlWsloTSALLLGVPhALAlhsEpQllpMEK.EhshQcsAs-.lLAP ..............................................................................................t.pcs-p-h.pp...-..D.p.........cETlhERlhuLp-hhPspsRpthusshs...tssshspshhpaoupuhWlhoTSuhlLslPlshth.....t...EpphhpMEp.p.p.hppt........t......................... 0 55 92 147 +4660 PF03220 Tombus_P19 Tombusvirus P19 core protein Mifsud W anon Pfam-B_2714 (release 6.5) Family \N 25.00 25.00 26.80 299.80 21.40 15.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.19 0.71 -4.91 4 59 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 3 0 44 0 172.00 83 99.71 CHANGED MERAIQGsDAtcQAsuERWDGGsGuohoPFQLPDESPohcEWRLHpsEpsscpDpPLGFKESWuFGKVVFKRYhRYDhpEsSLHRsLGSWpGDoVNhAASRFhGVsQlGCTYSIRhRGlSlTLSGGSRTLQRLlEMAIRhKho.LQLsssEVEusVSRGCPEusps...cESE MERAIQGNDAREQANSERWDGGSGGoTSPFKLPDESPSWTEWRLHNDETNSNQDNPLGFKESWGFGKVVFKRYLRYDtTEASLHRVLGSWTGDSVNYAASRFFGhsQIGCTYSIRFRGVSlTlSGGSRTLQHLCEMAIRoKQELLQLTPlEVESNVSRGCPEGscsF.ccE..p..... 0 0 0 0 +4661 PF03544 TonB_C TonB; Gram-negative bacterial TonB protein C-terminal Griffiths-Jones SR anon PRINTS Domain The TonB_C domain is the well-characterised C-terminal region of the TonB receptor molecule. This protein is bound to an inner membrane-bound protein ExbB via a globular domain and has a flexible middle region that is likely to help in positioning the C-terminal domain into the iron-transporter barrel in the outer membrane [1]. TonB_C interacts with the N-terminal TonB box of the outer membrane transporter that binds the Fe3+-siderophore complex. The barrel of the transporter, consisting of 22 beta-sheets and an inside plug, binds the iron complex in the barrel entrance [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.39 0.72 -3.78 109 5753 2012-10-03 21:09:15 2003-04-07 12:59:11 9 80 2084 10 1601 5569 1848 76.60 24 29.38 CHANGED ht.pYPppApppsh..pGpVhlphslsssGplps.hpllpuss..t.hL-cuAl.cslcp...hpatPthts.sp...slshph...slpFp...L ..........................h...pYP..tt.At.p.t.th....pG...p...V..h..l..phs.l.s..t.s.G.......p..l..p..........s....l........p.......ll....p............u...s..................s..........s...hlDc..tAl.c.slcp.....h+.....ap.Pshts..Gp......sl.p.hhh....sltFp.................................................. 0 574 1099 1396 +4662 PF00593 TonB_dep_Rec TonB_boxC; TonB dependent receptor Bateman A, Yeats C anon Yeats C Family This model now only covers the conserved part of the barrel structure. 16.60 16.60 16.60 16.60 16.50 16.50 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.66 0.70 -4.48 666 34093 2012-10-03 17:14:37 2003-04-07 12:59:11 19 67 2349 66 8506 31731 11867 284.70 12 36.18 CHANGED sPphul......aphss..............shslhu..saupuacssshsphh..............................................................................................................hssssLcPEpupshElGhchphts..........hphshsh.aptchcshlstsssssshsstsstttttttttttstttthththhtththhhh............................................................................................................................................................tttssttststtsthhtttsthhtththtttthtththshshthsspttsssssshph..suashh-ht.ssY.phspt................hplphs...lpN...Lh..........s.cpYhthhs.t.........................hhh.u............................................sR.shhhslshp ......................................................................................................................................................................................................................................................................................................................................................................................................................................sphuh......ap.hsp..................................phplhs...ua..up..s..h..p...s.....s..s..h..tp..hh....................................................................................................................................................................................................h...ssssL....c....s...E....p.....u.....p...s.....h...-lG...h...c..h..p.hh..s...............................hphs.......hs..h..a.p.p..c.....h.....p.....s....h........l......h.......t......s.......s.......s.....s...........s.......s.................s.....h.......t......s.......t.......t.........t.........t.....t....s...t...h.......t......t.....t..t...h...t.........h..h..h..h...t.h..h..h..t..t...t...t...t..h...h..h.....................................................................................................................................................................................................................................................t.t.t.t..t...t.t..t..t..t..t....h....t....h......h....t....h...h...t.....h....h.....t.........h.....t.....h.....t............t..........h...t........t......h....t...........h..........t....h........t.......h...........t......h......t.........s........t.........t...........h....s........s...........s......t........t.....h....h...p..h................s.......s........h.........t....l....h.s.lt.....hs..Y...phspp..................................................hpl..t..hs.....spN.........Lh.....................s...p..ph.h.t...ht.t................................................h.s.................................................s..R..s..hhhshph.................................................................................................................................................................................................................... 6 2644 5782 7351 +4663 PF01131 Topoisom_bac DNA topoisomerase Finn RD, Bateman A anon Pfam-B_505 (release 3.0) Family This subfamily of topoisomerase is divided on the basis that these enzymes preferentially relax negatively supercoiled DNA, from a 5' phospho- tyrosine linkage in the enzyme-DNA covalent intermediate and has high affinity for single stranded DNA. 23.10 23.10 23.20 23.20 22.70 23.00 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.60 0.70 -5.65 126 9972 2009-01-15 18:05:59 2003-04-07 12:59:11 15 124 5053 35 2152 8171 4882 388.60 29 56.06 CHANGED chshsLspAthsRphhDhllGhslSp.hh....s..........t.....tsl.SsGRVQoPsLthlV-R-cEIcsFh....sp...a..aplp...sp.............h....................ttthh..............................ttpptphhscp..pApphhppl.p.............t......lp..plcpccc..pp.ssP.PashssLQpcAspphu.houpcshplAQpLYE........p.G....lIT..YPRTDSphls.ps......s.hpthhphlpp..t......hhs.....h.th...tp..........stp...sAHcAIhPTtshs.......sphs..........cctp.........lYcLIhcRalAs......hhssshhppsplpl...t........tt.........FpspGppllp..tGaptl...h.........ttpppp................lP...lpp...Gch...........l.h..tp.p...h.tcpTpPP.spaoEuoLlptM-pt.........................GlGpsuThAsIIppLh...cRtYlppt....p.pt...lhPTchGhtlh.chL.......p....h...p..lssschTuphEppL-pItcGchs.hpphlp .....................................................................................................................................p...hsLssAt.ARphhDhllG..hs.h.S.hl......p..................................................p..ptsL..Ss..GRVQossLt.lllcR-..cEIcsFh......scp.....a.....aplpup........h..................pttttp..................................................................hptpphch.h..s.cp......psp..t..l..h.....p.t..lps......tt................hpVp.pl.pp.Ktp..............pp..tsPhPash..o.oLQpcAup.+hs..au......s....ccThpl.......AQ.pLYE............p.t.....hI..........T..Y.RTDSph......l...spp......u.hp.t.shph.l..t..p..p..h.st....................pahs....h....p....h..h..t..t..t.......................pssp........pAH.cAIhP...T.tth................tphs...................................................pppt..plYpLIhp+alAs..h.s.s.uhhcpsslpl.......ph...................................st.hp....F..p..u..p..G.ph.lh...t..Gahpl.............h...........tttppppt.t.........................................................LPt..lppG-t.............................................lph......tp..hp.......pp.+h.TpPP.s+aoEAoLlp...t...h.Eph........................................GlG........p.......suTh......AslIpsl..........c.R....t........Y........lph.............p.+p.........lhsTphGhhlhphl.......p.............................p...........lhs.......p......hTAp....h.....EppLcpI.tp...Gphphpphl............................................................ 0 747 1386 1825 +4664 PF02919 Topoisom_I_N Topoisomer_I_N; Eukaryotic DNA topoisomerase I, DNA binding fragment Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1377 (release 3.0) Family Topoisomerase I promotes the relaxation of DNA superhelical tension by introducing a transient single-stranded break in duplex DNA and are vital for the processes of replication, transcription, and recombination [2]. This family may be more than one structural domain. 25.00 25.00 25.50 26.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.37 0.70 -5.05 47 487 2009-01-15 18:05:59 2003-04-07 12:59:11 10 10 324 17 282 471 28 203.10 51 28.20 CHANGED hKWpoLcHNGVlFPP.Yc.LPcsVKhhY-GcslcLsscsEEVAsFaAshL-o-.aspp.sFp+NFFpDF+chlscpttt............IcchcKCDFophapaappp+....EpKKshocEEKK........tlKtE+-c.h-c.YtaCllDG+.+E+VGNFRlEPPGLFRGRGpHPKpGpLK+RlhPE.DlsINluK-u.lPpPP....sGH.+WpElpHDNoVTWLAhWpE.NIssp.hKYVhL ................hKWppLEHpGslFsPsYc.LPcsV+hhY-GcslcLssc.sEEVAoFautM.L...-.......p.....-......Y....spcthFpcNFFpDa+.c.hsppptt...................hIpshpKCDFs.hapaaptpp.......E..t+.......K..sho+E.....EKp.......................tlKpEp-c.hpppYtaClhDG+.+E+lGNF+lEPPGLFRGR.G-HPKh.....GhLK+RlhPE.DlsINhu.....K...........-utlPpPP...................sGH.+WKEV+HDNpVTWLA.WpE.NIpss.hKYlhL.................................. 0 94 151 229 +4665 PF01028 Topoisom_I Topoisomerase_I; Eukaryotic DNA topoisomerase I, catalytic core Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1377 (release 3.0) Domain Topoisomerase I promotes the relaxation of DNA superhelical tension by introducing a transient single-stranded break in duplex DNA and are vital for the processes of replication, transcription, and recombination [2]. 20.90 20.90 20.90 20.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.56 0.70 -5.26 134 1273 2012-10-02 14:09:14 2003-04-07 12:59:11 15 15 1066 23 472 1266 171 214.00 37 46.11 CHANGED tDucsRpQYhYt...a.t.+-tpKac+hhchtptlsclR.cplpc-L..........psthsc.c+.lAsslhLlD................phhlRlGs-cYsc-st.ohGhsTLRscHlp.lps.............................................................................s.tlpFcFhGKsulcaphplc....Dpplh+slpth...........p...chPGpcLF.............cpls................................osclNpaL+chh..........ts.........hTAKsFRTasuolthhpt.....Ltph...................................................................................................t.....shsptppthstus+pVAthlspT.ul.s+ppYlpspllp..thtt..t.h...tthtph ....................................................................sph+tp...........ps.tKapphhthtphlstlR.tthptph.................ttt..t....p..hAshhhhl-.........thhlRsGspp.tpcpt....o...hGhsoLRscHlp.lpt.............................................................................p.slhFDFlGKDu.I+ahscVt...........h-.....c...+V..aKsLphhh............cs...KtPG--LF.............c+Ls................................oshlNcaLp.clM......ps...................lTAKsFRTYsAS.h.Thtpp.....Lpchs..............................................................................................................................st..s..shsp+hhshscAs+tVAhlssH.p+uV.s+.s..phhphphLptthpt..............s................................................................................................................................................. 0 137 260 379 +4666 PF01751 Toprim Primase; Toprim domain Bashton M, Bateman A anon Pfam-B_500 (release 4.2) Family This is a conserved region from DNA primase. This corresponds to the Toprim domain common to DnaG primases, topoisomerases, OLD family nucleases and RecR proteins [1]. Both DnaG motifs IV and V are present in the alignment, the DxD (V) motif may be involved in Mg2+ binding and mutations to the conserved glutamate (IV) completely abolish DnaG type primase activity [1]. DNA primase EC:2.7.7.6 is a nucleotidyltransferase it synthesises the oligoribonucleotide primers required for DNA replication on the lagging strand of the replication fork; it can also prime the leading stand and has been implicated in cell division [2]. This family also includes the atypical archaeal A subunit from type II DNA topoisomerases [4]. Type II DNA topoisomerases catalyse the relaxation of DNA supercoiling by causing transient double strand breaks. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -4.03 136 24868 2012-10-01 21:47:57 2003-04-07 12:59:11 17 150 8181 82 4223 22716 8361 107.80 31 17.30 CHANGED plhlVEusscstslpphhs..................................htlhsshGphhsh.p...........................................................lptltphh.............................................phpclllssDsDp.....-Gpplshplh.phhtthh..........tchhhspl ............................................................................................................................................................................................pLhlVEGcS.s.u.soh...cpuhs..............................................pp.psllsh....+G+..l.l.N.lpc.sp...hsc........................................................................................................................................................hhpspplpsl.hpsh...........................................................................hc..h..c..cl..llhoD..sDh.............-Gt.tIthhlh..phhht...h............thlhht.................................................................. 0 1416 2671 3539 +4667 PF01533 Tospo_nucleocap Tospovirus nucleocapsid protein Bateman A anon Pfam-B_950 (release 4.0) Family The tospovirus genome consists of three linear ssRNA segments, denoted L, M and S complexed with the nucleocapsid protein. The S RNA encodes the nucleocapsid protein and another non-structural protein [1]. 19.80 19.80 22.00 60.50 19.50 19.10 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.54 0.70 -5.14 11 1122 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 38 0 0 714 0 238.40 61 94.92 CHANGED MSps+.lTccpIpcLLsuuct-VElEp-psphuFNFKsFapsNps.lc.hohssslohLKsRppIhtssKpucas.FsshsIlto...SspVus..sDaTF+RL-uhIRsKhlptl..ocNscsppchhsKlhshPLVpAYGLp..uhhDtsulRlhlhlGGsLPLlASlcohtshuhsLAhYQssK+EpLGI..ppFsThEQLsKVspVhpupuhphscs.cchhcphscILssssPsspGuh..uhc+YsEplpthtss.F ....MSpVK..LTKEsIVsLLTQG+.DlEFEEDQNhlAFNFKTFCLsNLDpIK.KMSlhSCLTFLKNRQSIMKVIKQSDFT.FGKITIKKT...SDRlGA..sDMTFRRLDSLIRVRLVEE......TsNuEsLsoIKoKIASHPLlQAYGL...PLsDAKSVRLAIMLGGSLPLIASVDSFEMISlVLAIYQDAKa+-LGID.KKaDT+EALGKVCTVLKSKuFEMsEDplKKuKEYAsILSuSNPssKGSl..uh-+YsEplshh.phF... 0 0 0 0 +4668 PF00087 Toxin_1 toxin; toxin_1; Snake toxin Eddy SR anon Overington Domain A family of venomous neurotoxins and cytotoxins. Structure is small, disulfide-rich, nearly all beta sheet. 20.70 20.70 20.70 20.80 20.40 20.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.12 0.72 -3.60 48 594 2012-10-03 01:43:02 2003-04-07 12:59:11 16 1 92 141 6 666 0 60.80 38 79.57 CHANGED hpCapp.s....socTCP.tGpNlCY.K......pa.pc...p..+Ghh.l-RGCusoCPpscs.hhplpCC.sT.DcCN ..................................pChpp.t...s.sscoCs.sGcshCYp.K......tapc..........p.ps.hh..lcRGCu.s..sC..Ppscs.....thpl.......p.CCsT.DcCN...... 1 3 3 3 +4669 PF00451 Toxin_2 toxin_2; Scorpion short toxin, BmKK2 Finn RD anon Prosite Domain Members of this family, which are found in various scorpion toxins, confer potassium channel blocking activity [1]. 25.10 25.10 25.30 25.40 24.80 25.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.28 0.72 -3.86 48 130 2012-10-01 23:31:40 2003-04-07 12:59:11 14 1 40 50 1 165 0 31.00 44 65.14 CHANGED lcCssopcCht.sC+chhGpttG.KChNsKC+CY ....l+Csuopp.Chp.sC+cthGhttG.KChNs+C+Ca. 0 0 0 0 +4670 PF00537 Toxin_3 toxin_3; Scorpion toxin-like domain Bateman A, Moxon SJ, Finn RD anon Pfam-B_8170 (release 8.0) Domain This family contains both neurotoxins and plant defensins. The mustard trypsin inhibitor, MTI-2, is plant defensin. It is a potent inhibitor of trypsin with no activity towards chymotrypsin. MTI-2 is toxic for Lepidopteran insects, but has low activity against aphids [1]. Brazzein is plant defensin-like protein. It is pH-stable, heat-stable and intensely sweet protein [2]. The scorpion toxin (a neurotoxin) binds to sodium channels and inhibits the activation mechanisms of the channels, thereby blocking neuronal transmission. Scorpion toxins bind to sodium channels and inhibit the activation mechanisms of the channels, thereby blocking neuronal transmission 21.50 21.50 21.60 22.00 21.30 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.76 0.72 -4.09 33 487 2012-10-01 23:31:40 2003-04-07 12:59:11 13 1 55 78 15 598 0 53.00 38 69.05 CHANGED +D.....uY..Is.pscNCsYpC.....h.h..ssaCsshC+c.pGAcuG.aChahu...hs.ACaChsLPDss ................+-GY....ls..pspsCthpC........hh...ssaCs.p.C........cp..pGup.s....G.YChhhu..........tCaC.sLPDp....... 0 7 7 7 +4671 PF00706 Toxin_4 toxin_4; Anenome neurotoxin Bateman A anon Pfam-B_589 (release 2.1) Domain \N 20.90 20.90 21.80 21.60 18.00 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.40 0.72 -3.75 7 75 2012-10-01 20:50:19 2003-04-07 12:59:11 12 1 27 7 9 95 0 43.60 56 70.66 CHANGED sChCDsDGPslRssshoGTl...huuC...suGWcpCtuhhssIu.CC .sChCDSDGPslRGsoLSGhl..Wl.......uuC...PSGWHpCpuptshlu.CC 0 9 9 9 +4672 PF02079 TP1 Nuclear transition protein 1 Mian N, Bateman A anon IPR001319 Family \N 25.00 25.00 68.00 67.90 24.40 24.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.79 0.72 -4.20 3 42 2009-09-11 05:49:06 2003-04-07 12:59:11 11 1 36 0 15 30 0 52.00 85 95.33 CHANGED STSRKLKSHGMRRGKNRoPHKGVKRGGSKRKYRKSSLKSRKRGDDANRNYRSHL .STSRKLKSHGMRRGKNRoPHKGVKRGGSKRKYRKuSLKSRKRuDDANRNaRSHL..... 0 1 1 2 +4673 PF01254 TP2 Nuclear transition protein 2 Finn RD anon Prosite Family \N 25.00 25.00 51.80 51.70 23.00 22.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.11 0.71 -3.83 4 40 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 25 0 12 46 0 124.20 61 99.72 CHANGED MDTKTQSLPsTHsQPHSNSpPQSHssppCuCS+HCQopSQS.....pssRSpSSSppPtuHR......S.sG+QSQSPuPSPPs+++K+sMHSHpsPSRPso+pCSpsKNRKNLEGKlpKRKtlKRppQVYKsKRRSSGRKYN ..MDTKTpSLPlTHTQ.HSNSpPQS+T...st.CsCo+HCQohSQSCppupps......SpSRSSSQSPsuHp........SsoGpQS..QSPssSPPPK+HK+TMpSHHuPsRPThh+sSCPKNRKNLEGKlpK+KhsKRhQQVYKTK+RSSG........ 0 1 1 1 +4674 PF04406 TP6A_N Type IIB DNA topoisomerase Waterfield DI, Finn RD anon COG1697 Domain Type II DNA topoisomerases are ubiquitous enzymes that catalyse the ATP-dependent transport of one DNA duplex through a second DNA segment via a transient double-strand break. Type II DNA topoisomerases are now subdivided into two sub-families, type IIA and IIB DNA topoisomerases. TP6A_N is present in type IIB topoisomerase and is thought to be involved in DNA binding owing to its sequence similarity to E. coli catabolite activator protein (CAP) [1]. 20.50 20.50 20.50 20.80 20.20 20.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.02 0.72 -4.31 65 534 2009-09-14 12:29:41 2003-04-07 12:59:11 9 9 417 7 349 531 120 69.40 27 18.33 CHANGED shp...psp+.....hsphhhllphl.pctlppsphs.ThR-lYYpspp................hF.ss........QspS-pll-Dlph.hhsl..REphplh ..........................t..tpsp+hsthlhllphl..hchlp.p..sphs.Th.....R-lYYpshp.................hF.ps........Q....sps.DpllcDlsh.hl...tl..sRpsLpl.............................. 0 105 197 287 +4675 PF00590 TP_methylase Tetrapyrrole (Corrin/Porphyrin) Methylases Bateman A anon MRC-LMB Genome group Domain This family uses S-AdoMet in the methylation of diverse substrates. This family includes a related group of bacterial proteins of unknown function, including Swiss:P45528.\ This family includes the methylase Dipthine synthase. 27.30 27.30 27.30 27.40 27.10 27.20 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.45 0.70 -4.63 171 17348 2009-09-13 05:43:58 2003-04-07 12:59:11 15 82 4907 229 4809 13595 4640 206.00 21 64.04 CHANGED plhllGhGsGs..s.-hlThcAhchlp.pA-ll.....h....sss....................p......shphl..h....................................t........hphhpts......................hpphtchl..htthppstt..Vshls.sGDPhla..uhsthlh.t..hlttt...h....hc..llPGlSohpsssuthuhshs.psthhphhh...............s.t..ttthhpph.t...........sslllh........hsst..t.......htplhphL...........hpt...........p.tlhlscp.....huhs.s.Ep..lh.psslppls .............................................................................................................................................................................lhlVGsGs.Gs........p...h...lTh..+uh...........ch...Lp...pA....Dll.....................hhcs.............h......ssphl.hphh......................................................................ttt............ht.hh.phs........................................................................tpch.s.p...h..h........l..p..t.h.p.pG..pp......Vshls..uGDPhla.......uhutc.hl.p...........th..t....p...............t...s...lt....................hp......l..l..PGloushuu..sut.....s.....Gl....s.hs.....t.h...h.pslh....................................thtt..p..pt.p..hp.t.h.ttt.......................t.pol..l.hh...................................husp...p......................lsplh.p.tL......................................................hpt...........s.pp.lsl..s.cc........ho.ps..p.pp..lh.pssltph................................................................................................................................ 0 1545 3155 4100 +4676 PF04201 TPD52 Tumour protein D52 family Bateman A anon Pfam-B_2632 (release 7.3) Family The hD52 gene was originally identified through its elevated expression level in human breast carcinoma. Cloning of D52 homologues from other species has indicated that D52 may play roles in calcium-mediated signal transduction and cell proliferation. Two human homologues of hD52, hD53 and hD54, have also been identified, demonstrating the existence of a novel gene/protein family [1]. These proteins have an amino terminal coiled-coil that allows members to form homo- and heterodimers with each other [1]. 25.00 25.00 29.60 28.80 24.90 23.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.06 0.71 -4.51 4 406 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 101 0 173 372 1 162.90 44 79.83 CHANGED uLlhspshsEsstshssohsuo..sLSEtEp-EL+sELsKlEEEIpTL+QVLAAKEKHhuElKRKLGhsshsEL+QNluKSW+DVpsTsAYh.........tQKsosAhuuVGosIs+KhsD..................h+NSsTFKSFEpKVto....lKo+VuGs+.sGus.hsclluuusssSAp ...............................................................h..s....t....p.s......LoE.pE..p....E....E....L+....t....E....LsK.V...EEEIp..TLRQVLAAKE+HhuE.lK.RKLG........losLpELKQNlu...+uWp.DVpsosAYp+TpE...............................................TLSpAGQKsSAAh....S....slGosIo+KLsD...........................................MRNSsTFKSFE-+Vts....l..KoKVs.usp.ssss.h.p..t........................................................................... 0 40 50 92 +4677 PF00205 TPP_enzyme_M TPP_enzymes; Thiamine pyrophosphate enzyme, central domain Finn RD, Griffiths-Jones SR anon Prosite Domain The central domain of TPP enzymes contains a 2-fold Rossman fold. 27.90 27.90 27.90 28.00 27.80 27.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.75 0.71 -4.41 309 15120 2012-10-03 09:55:27 2003-04-07 12:59:11 17 40 4534 251 3745 11432 6158 134.60 28 23.53 CHANGED lppssphlppAc...+P..lllsGuGshhs.p..Apppltplu-ph.slPV.ssThhG..+Gsls.p..........sc................Ph....hG.hhGhh.u.....stsustsl.pp..uDlllslG.sc...hsp.hs......hs..phs.tassp........s..p.........l..lpl.D......................l-s.........scls+sh.............ss.lsllGDsp.......tsLptL .........................................lpcshph.l..p..p..A.c...+P...l.lhsGuGs...h..s...p.........A.s.p.....p.L...p...c...hs-ph..plPl...s.s...T..h.hG...t.Gs..ls.p............................s.c...................Ph.h...lG..hh....Ght.G......st.su..s.h.sl...pp....uD..l..l..lslG.sR...as.c..ps..tu..........tht..tassp.......s...c.........l..lp.....lD.......................l-s....................sp.l.s.+.hh..........t...sc..lsll.GDsp....tsLpt............................................................................................................................................... 0 1002 2186 3051 +4678 PF02775 TPP_enzyme_C TPP_enzymes_C; Thiamine pyrophosphate enzyme, C-terminal TPP binding domain Finn RD, Griffiths-Jones SR, Mistry J anon Prosite Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.08 0.71 -4.64 327 23243 2012-10-02 16:07:47 2003-04-07 12:59:11 16 110 4932 295 6025 21857 10671 146.80 22 24.62 CHANGED -lGptph.hhtph.......t.spp.als...ss...shusMGhulPsAlGsp..l....s........s..t.....t.pVl.slsGDGu..ahhs.h.pELtossph..sl.slhlllhNN.phhGhlctt...pp.hhhttphss.p...............hts.DastlAcu.h..Gsp.uh...p.lpsh..pcl...ppslccA...h......ppptPsll.-l ........................................................hGp..h.....................stp..hhs......ss........shus..h..G..h.u..l....ssAlGsph................u...........................p.P.....c.....c..pVl..slsGDGu....h..h..hs..h..p.-..L..s..............s.....s.h...p.h..........sl....s..l....t.....l......ll.l.N.N....p..s.....h....u...h..spt.................pp..hh..h.t..t.p.h..s..s.......................................tth...D..a...s..t....l.A.......pu.h........Ght...sh................pl...p.....s.t..............p.c..l.......pp....s...lpcAh........ppss.ssllch....................................................................................................... 0 1846 3761 5012 +4679 PF02776 TPP_enzyme_N TPP_enzymes_N; Thiamine pyrophosphate enzyme, N-terminal TPP binding domain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.87 0.71 -4.77 118 17953 2012-10-02 16:07:47 2003-04-07 12:59:11 13 63 4635 275 4426 14362 7899 169.30 28 30.25 CHANGED hssuchlschLpp.hGlcp...............lFGlsGsphhs.lhcultpt.......tlphlhs+cEpsAuhhAs..uau+..ho.Gc.......sulshsss.GP.GssNslsulssAhts.phPllhlsGpsstpths.........tthhp.ph..D.thsh.....hp..shs.Kh......shplpsssphsphlpcAhptA..hssptGPVhlslPhDlhttpssts. ..................................................................................h.psuphllcsLpp..p..G..V..c.p......................lFG.hP...Gu.shhs..lh-u...l...tppt....................tlc.al....h...sRHEp..........uAuahAp....GaA+.......ho..Gc...........................sG..V.s..lsTS..GP.G.s.s........N..hlsu......lss.A.h.h.-....sl..P...l.lslo...G...pss.sshls.............................pss..aQ.ph..........D....hhsl.........................hp.......sh.s...+a.............................shh.l......p......p......s......p........p.......l......s.........p.........h.......l.......p..............c...Ah.......phA......................s........ucsGPVhlslPtDl..t.h....t..................................................................................... 2 1202 2609 3623 +4680 PF01963 TraB TraB family Enright A, Ouzounis C, Bateman A anon Enright A Family pAD1 is a hemolysin/bacteriocin plasmid originally identified in Enterococcus faecalis DS16. It encodes a mating response to a peptide sex pheromone, cAD1, secreted by recipient bacteria. Once the plasmid pAD1 is acquired, production of the pheromone ceases--a trait related in part to a determinant designated traB. However a related protein is found in C. elegans Swiss:Q94217, suggesting that members of the TraB family have some more general function. This family also includes the bacterial GumN protein. The family has a conserved GXXH motif close to the N-terminus, a conserved glutamate and a conserved arginine that may be catalytic. The family also includes a second conserved GXXH motif near the C-terminus. 30.20 30.20 30.30 30.20 29.70 30.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.65 0.70 -4.94 137 1980 2009-09-12 01:14:50 2003-04-07 12:59:11 12 13 1569 0 659 1576 259 253.30 21 77.55 CHANGED hlWclp.....ps.s......pp.........l.................aLlGThHlsctsh..lsstlpphhppu-slslEh...................thhh..s..spplp.shlsscphpplsthhttht..hs.....phhpphcP.hhlshtlshsthpp............thssp.....Gl-.thtttAt.........tpstphhs....LEshchQlshh.psh...shpp....phphlhpsl......pp.h.....tpt.sph..hpp...hlpt.atpuDh....ptlhph.........htp...........thsph.hcsllscRNpt....hspplp..phhppt...........tthhssVGAuHLsGtpu.....................................ll.................................shLpttGaplpth .................................................................................................................................................s..s.....pph.....aLlGohHh.u.sp.sh.t....s....l....ss....tlhpthppuDslhlEh.....................................h...........s......stsl.p.phl..st.c.p.hp..pL.pph.....p..t.h....hs............................phhpph...a.ls.hh.lt.....htp.hpc.........tlpsph.......Gl-h.th..ttAt..............tppt.hht................................LEstp..Ql.shh...pth.........................s.tt....t..ht.hLts.sl.....................sp..h.....................pss...sch.........hpp.........hhph.ahp.s.s........sshhp...............................hpp.................ht..p.l.hc.s.LlpcR.Ntt......hspplp......th.ttp........................hlssVGAhHLhGt.ts............................................................l..................................................................phLp....................................................................................................... 0 225 409 534 +4681 PF02534 T4SS-DNA_transf TRAG; TraG; Type IV secretory system Conjugative DNA transfer Bashton M, Bateman A, Staddon J anon Pfam-B_1146 (release 5.4) Family These proteins contain a P-loop and walker-B site for nucleotide binding. TraG is essential for DNA transfer in bacterial conjugation. These proteins are thought to mediate interactions between the DNA-processing (Dtr) and the mating pair formation (Mpf) systems [2]. The C-terminus of this domain interacts with the relaxosome component TraM via the latter's tetramerisation domain. TraD is a hexameric ring ATPase that forms the cytoplasmic face of the conjugative pore [3]. The family contains a number of different DNA transfer proteins [4]. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.47 0.70 -5.91 9 2504 2012-10-05 12:31:08 2003-04-07 12:59:11 9 20 1333 0 409 3978 356 393.00 19 69.09 CHANGED passhs-hpptt.hhpctshlhs.h.ths..................hGt...hhstsGpF.........ashlhAsotuhKusullIPshLsasp.olVVhDPKuElaphTuphRcppup.cVhlh-Ptsspsp..paNPlDaIsttss..stscDlhtlsshlhsspsh..........................c-aapssAtpLhsul.............hhhhphpstscshstslphlpshtu-.......csh.hsphhpphpssctststpslushhspsc.............cphoSVhushsuphphassP.lpshsStSDFslcclt..cccsslalslsspshphhsslhplhhp.hhphhtpp.sshctp.ht.....sLFlLDEFspLGhhchhcpuluhhttYslplhhIhQoluQLps....tYsppsApohlsspsshlsaussN....-TAcaISchlGptTlchcssSc.....ssspuso+opohs.spRsLlpPcElhpMssccp............IllhpupsPl+scKshYacc...........................pphpsphsc.t.hhstphthu .............................................................................................................................................................................................................................................................................................................................................................tth.hl.hus.o.tSGK..s.....h...s...h.....l.....h....P....s.....l...............h..........p.......h.....t........t.....S.......h.....llhD..K.......s.....-.......h...........h...........t........h..........s........u.......t...............h.........h.....p....p.......t.......s.......h......c.......l......h........l...hs........................h.................s................p...........p........u...................p.........a...........N.P..h.....t..........h...l.....p...p.....t..........tp........h.h.....s.....h.......t.........l..s..s...h....h...h..s...t.pt.........t.................................................................................t.c..sa.Wtpsup.t.L.hs.uh.........................................................hhh.hh..h..t...t.........t...........s.......h...s.............h.........hp...h....l...t......t.......h.........t....s.t.............................t..h.......h................h..........h..h.t..t.............h......t......t.............t.........................s.........h......h....h...t..t........h...t....t....h......t...t..s.t..........................................................................pphsulhss...h..ts..h...l.s.h......a.....t.............l......t...p....h.h..........s...........t....s.......-...........h.........c..l...p..p..lt..............pp......h........s........la.l.h..h...s...s.....p...p......s...h.............lh.......t..l.h.h..p............h...h.p...h........h.......t...p...t..................t...........t...t..........h....................hhhhh....DEhs.sl...G....h.....h...........h..tp..h..hu.........hhtu.......htlphhhlhQsh....s.Qlpt...................hY.t.p..t.t....h.p..sh..h....s.s...s....h..hh...h..s..tp..........psh..c..lS.p..h...h.Gp.....T....h............s.hs...............................t...................t....p..........s.........s.......p...........t....+....L...h.........tE.lh......h........t..t............................lh....h....t....s..............s.....hh...t..........................................................h.............................................................................................................................................................................................................................................................................................. 0 134 274 337 +4682 PF00923 Transaldolase Transaldolase Bateman A, Griffiths-Jones SR anon Pfam-B_787 (release 3.0) Family \N 20.90 20.90 22.80 22.20 20.20 20.00 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.71 0.70 -5.28 131 6718 2012-10-03 05:58:16 2003-04-07 12:59:11 14 21 4201 128 1447 4243 3165 256.10 30 91.25 CHANGED lahDssshchIc.....chht...lpGsTTNPolht+......Ahp.....tpthhcctl....pphppt...................................................t.hh..lhh..h..hhthh..........sG.hVShEVss.hshDscuslpcA+cLhphhp........t.slhIKlPuT.....-GlpAhcpLp.pc.G...............IplNlTLlFShtQuhtsAcA.............G...........ssllSsFVuRlschh..........................tt.sGltsspphaphacph.s...................a.sTtlhsASh+sshplh..sLsGsctl.Tlssshlcphtp.csts..........................tt..t....hp.p.hh.ph.s-Glc...pFtpsappLhttl ................................................................................................lhhDTusl..ptl+..........chtt.....h.pGsTTNP.S.llhp........t..........hpt.h.c.-.sl.tphppt........................................................................l........................................sG.plSs.EVsu................h.Ds-shl.pc...A+cLhphhs.......................plll..KlPs.T..............................hp.Gl...pA...hchL...p...p.-..G.............................IpsNlT.............LlFShsQuhhsAcA..................G........sphlSPFVG.Rlt-hh..................................tsGltslpplhphacpa.s....................................h..pTtl.....hu.AS..aR..........ss.tplh.........tL..s..Gs..-h.......l..T.ls..sllpphhp..psts...........................t..ptl.......................................................................................h..................................................................................................................................... 0 481 911 1226 +4683 PF00382 TFIIB transcript_fac2; Transcription factor TFIIB repeat Finn RD anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.18 0.72 -4.16 12 2462 2012-10-03 00:42:12 2003-04-07 12:59:11 14 23 536 21 1522 2387 745 70.60 26 32.13 CHANGED ls+hsspLcLsc...hVtcsAtclh+pshcpthlpGRSstulhAAslYlAsRhpphpRohpEIsslspVschol ...................................ht+hssp.L.s..Lsp........pltc...pAtpl...h.c....p...s...h............c...........p.............t...........h.....h......p......G...Rs...s...........pulsA..AslYhA.s..R.....h..p...s.....h..s.+.....o.hc.El.u.sls..p.Vschpl........................ 0 455 879 1280 +4684 PF00405 Transferrin transferrin; Transferrin Finn RD anon Prosite Domain \N 20.30 20.30 20.30 20.30 19.70 20.20 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.08 0.70 -5.57 10 1389 2012-10-03 15:33:52 2003-04-07 12:59:11 12 15 229 250 465 1509 132 257.20 32 86.39 CHANGED VRWCslSssEtpKCpsap-pM+pht...sPsloCV+KuSsl-CIpAIAssEADAlTLDGGhlaEAGLsPYsLKPVsAEsYGo+-pPpTaYYAVAVVKKuSsFplNpLQGKKSCHTGLGRSAGWNIPIG..lLcstLsWtG.spcslEcAVucFFSuSCVPGAcc.sthPpLCQLCsGputsK..CusSspEPYaGYSGAF+CL+DGAGDVAFVKcSTVhENLsscA-R........DpYELLChDNTR+PVD-YKsCHLApVPSHAVVARSsssKE-hIWcLLspAQE+FG+-pop-FQLFSSPsGt....KDLLFKDSAlGFl+lPophDStLYLGa-YhTAI+NLRc ...............................................................WCshup..E.t..KCttht.....t......................tlpC.ht.t.t..s..cClptIh.t.tcAD.sh...slD.u.s.la..A.u................h...tLhP....l.hs......E...h..........t....................................t..........t...................t........t.........t............Y...............h......u.VAVV.....+.....+.....s......s.......s....h.......p..............h..............p....p..................L......p........G...+...+SC..Hou.h..s..p.....s.A.G..W.p.l...Ph...u..........h..t........h..........................................................t....h...s..p...aFs.t.u.C.....s.......P.....G..........s.......p...................................................s....p.LCthCh.G.....................................t.........C..t..............s...t.p.-.Y..h.GasGA..h...........+...C.L.h-...s........t......G-....V.AFl....+..........p..........t...........s....l.........p...........s.....h..........t....s........p.............................................ppap......LLC.....s...s....s.....c...t..s......l...s.....p....a....p...p..CpL.u....ps.P.....s+uV.l.s...R.................................t.......l.h..p.hL....p..........t....t.................................................h..phFts........................psLhFpDss.th..h......................h................................................................................... 0 88 121 296 +4685 PF00868 Transglut_N Transglutaminas; Transglutamin_N; Transglutaminase family Bateman A anon Pfam-B_783 (release 3.0) Domain \N 21.40 21.40 21.50 21.80 21.00 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.47 0.71 -4.07 50 614 2009-01-15 18:05:59 2003-04-07 12:59:11 15 12 106 39 309 576 0 116.20 30 17.57 CHANGED LplpsVDlpppp...NstpHHTccap..........spcLlVRRGQsFplpLphscs...apsstDplplhhphG..stPs.....spGTpsslsl....spttp......sssWsupltspsusp.....lplslpssssAslGcYp.Lsl.....pspst .........................tlpplDh....p...pp...N..ptpHHTpcap...........................................spcLlVRRGQsFplpLphsps....a.sst....-plph.hp.s.G......stPs............spGTpsshsl........sstts......sssWsAtl...p.p..psps.......lslslpoPs.sAslG+YpLplpst.s.............................................................. 0 56 87 169 +4686 PF00912 Transgly Transglycosyl; Transglycosylase Finn RD, Bateman A anon Pfam-B_558 (release 3.0) Family The penicillin-binding proteins are bifunctional proteins consisting of transglycosylase and transpeptidase in the N- and C-terminus respectively [1]. The transglycosylase domain catalyses the polymerisation of murein glycan chains ([4]). 20.70 20.70 20.80 20.90 20.50 20.00 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.16 0.71 -4.96 172 12887 2010-01-08 14:08:55 2003-04-07 12:59:11 17 38 4268 62 2627 9890 3274 179.10 35 26.43 CHANGED sG......t.ltth........tpp+phl.s.hs..............p.l...s.t.......lhpAllusEDccFap......HtGlDhtulhRA.hhpsl.t...sspht.......pGGSTITQQlsKs...h.hL..........p........p.....+o....h......pR...KlpEhhhAhpl......Epp..hoKccILch.YLNpl.h.G.p....ssaGlpuAuphYFuK............sspc..Lshs.....EuAhLAulhpuPs......ta....pP..h......p......scpspp....R...pshl.LppMh ............................................................t......ph........tppRhhV.s..hc................p..l.....sst.......lhpAllA.sEDp...+F.ac.............Ht.Gl.DhhulhR....A....h....hpsl..h................................su.pts...........pGGSTlTQQlsKNh..aL.................s.....................p-..............+o.............h....tR.............Kh.pE.hhl.A.lpl..................Epp....h.o......KccILph.YLNp.lah.G..p......ssaGlpsAAptYFG..K...........................sspc........Lols........................puAhLAGlsp.uPs..........tY.sP.....hp..s...................sctutp.........R..p.s.hV.LppM............................................................ 0 760 1609 2146 +4687 PF00456 Transketolase_N transketolase; Transketolase, thiamine diphosphate binding domain Bateman A, Finn RD, Griffiths-Jones SR anon Prosite Domain This family includes transketolase enzymes EC:2.2.1.1. and also partially matches to 2-oxoisovalerate dehydrogenase beta subunit Swiss:P37941 EC:1.2.4.4. Both these enzymes utilise thiamine pyrophosphate as a cofactor, suggesting there may be common aspects in their mechanism of catalysis. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.87 0.70 -5.63 12 11328 2012-10-02 16:07:47 2003-04-07 12:59:11 16 27 4696 96 2755 11612 8619 262.80 31 48.34 CHANGED .cpphsssIRhLuhDAVppApSGHPGuPMGhAslApVLapchL+psPssPpWhsRDRFVLSNGHuuhLLYuhLHLsGY.DLsh--L+pFR..QltS+TPGHPEhtassGlEsTTGPLGQGIuN...AVGhAlAp+sLAApaNcsGasIsDaaTYVhhGDGCL.EGlSpEAsSLAGpLpLGpLIshYDcNpIoIDGcsphtFs-Ds.ttRacAYGWHVlts..sGcDspuIptAltpA+tsps+PoLIts+TlIGaGussptGscssHGAPLGs--ltth+pthGac.ttsFtlPp-lYstapt+ht..GtptpppWpphFusYtptaPEhuAEhtRRhsGcLP .................................................................................................................h.....ptlRh.s..hphl.t...pu..t...........u......G.......H................G..s......s..huh.A..sh..h...s.L.a..p....c...h......h.p...h.p...P.t.......p.s.........p......h..........s.R.D..R..a.lh.....S........t.GHu.u.hlYuhLa..L............p.............G...................h..................l............s.h.....-c.L.c.s.FR..........p......h.....t.......S....p........s....s......G..........H.......P.c...h....t.h................s...G..........l....-.h.o.TGsL.G.p.Ghss....AlGh..A...h...u...p...+...h...h...t...t................t...........................h.p...h.......s..p.......h..sY...s..hhGDGph.EG..uh..........E........A..........hs....hAup.p....L.s...pLlhhh.D.s.N..t........h...p......l........D..............G.....................s..........p.........h.............h........h.........s...........p..........c............h.........t............t.....+..........a....c.u.....h.......G..W....p.V.....l.p......s...............s....G..........p...D....h....p....t....l.........t....A....h.......p....t....A.......p.......t..................s.......+..............P.olIhs.+.T..h.h...G..h..G..............s........t........h.......t..........s..........p......t.....t......s.......H......t..........t.................h....s................p......t...h.......t..............h.h.......t........h......................................................................................................................................................................................................................................................... 0 827 1666 2288 +4688 PF01818 Translat_reg Bacteriophage translational regulator Bateman A anon PSI-BLAST 1reg Domain The translational regulator protein regA is encoded by the T4 bacteriophage and binds to a region of messenger RNA (mRNA) that includes the initiator codon. RegA is unusual in that it represses the translation of about 35 early T4 mRNAs but does not affect nearly 200 other mRNAs [1]. 25.00 25.00 73.20 73.00 19.80 19.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.63 0.71 -4.58 11 74 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 70 2 0 56 724 121.60 58 92.25 CHANGED MlEIsLpcP-DFLKlRETLTRIGIANsK-KpLYQSCHILQKpG+YYIVHFKELLpLDGRpVclopEDhpRRNsIApLLcDWGLspIlssc...-hsspNpFRVISaKpKsEWpLhsKYpIGp .MlElsLpcP..-D...FLKV+ETLTRIGl.AspK-.KpLYQSCHILpK......pGpYYIVHFKEL.htLDG+psslocEDh.RRNpIspLLpDWGLlcIlsspthhc.hssh..Nph+VIoaKpKsEWpLhsKYsIGp... 0 0 0 0 +4689 PF01997 Translin DUF130; Translin family Enright A, Ouzounis C, Bateman A anon Enright A Family Members of this family include Translin Swiss:Q15631 that interacts with DNA and forms a ring around the DNA. This family also includes Swiss:Q99598, that was found to interact with translin with yeast two-hybrid screen [1]. 20.40 20.40 20.60 21.00 19.60 20.30 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.33 0.71 -4.72 74 629 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 351 42 428 614 144 190.20 24 78.99 CHANGED cctREcllclsR-lsphS+cslht................lHptst..........tt.st.phlpcsppplpplp.phht...............shs.........apaptthssuhQEalEAhshhtal...........................pst....pLhohc-ls..h.............................................................ls.t-YLhGlhDlsGELhRhslssltpGshcp...................................shplhphhcclasthhhlsh...............th......tslR+KhDsh+pslcKlEpslashpl+st ...........................................................................................................phRE.clhchs+-lpttu+chlhh...................lpphpt............................tp.sp.chhppsppplpp.lp.phht................sts....................hapappthp.slQ-h......l.puhsahtal...........................c.sp....sL.l.ohc-lsp.l.h...............................................................h.ls.p-YLhGlhclsuEL.....h........Rhsls.slst.G-.hpp.........................................shplhp.hlpcl.hssaphlsh..................................................ptl.+.++hDsh+.s.......lcKl.EpssYslplRs.h............................................................. 1 140 221 335 +4690 PF02133 Transp_cyt_pur Permease for cytosine/purines, uracil, thiamine, allantoin Mian N, Bateman A anon IPR001248 Family \N 21.10 21.10 21.10 21.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.43 0.70 -5.93 14 4605 2012-10-03 01:44:59 2003-04-07 12:59:11 10 14 1918 2 1514 3768 1528 398.50 19 86.59 CHANGED PVsp.ccRshshhNhhshWhusshslssashuuss.lthGLsaapshlslhhusllsshhlsltuthGsphGlshslhSRuSFG.lhGulhPhlsts.lhAhsWaulpsalGupslhhhltplast....hhs...sh.ssssh.hlsFhlFhllphhhlahshpplp+hhshtuhlh.hsuhuhhhWshstspu....Gslhs..st.s.........hhsulhuslusauohlsshsDFTRaups.psshhsthlslshshslhhhhullssuuuhsh.hGsshWsshplltpa.ss.......hushlhslllshuplusNhusshlusGhshuslhP...hhlsh+puuhhsullulshssWsLhusssp...hhshLshhushLushuGllhADYahlR+uhhplsphat....psuhYha..chGhNacAhsAalsGhhhslsGh..........tsshsshthhsluYhVG .....................................................ppR...p....h....t....h....h.s.hhs...hWh.u.sshsl.s.s.hshuu.hh...h.................h.................h...........G...Ls...........hh.pshlulllGshlh.s.hh.hsh...h.uh.hGsch.GlshhlhsR.h.s.F............G...h.................h.G.u...h.................l...ss.l..lt.s...lh.t.l.........u....W.............au..hps.hhu..u..........t.s...hthhl.s..p.h.hs.....................................hs..h.s.hh.l.s.....h..h..l..h.h..l.l............h...h.h...h...h..h.G...h.....p..s.......l...p...h.h...t...h.ht........s...........s.....h...l...h.l....h..h...h...s...h...h..h..h...h.h.sp.s.sh..........u....h....h.t.........t.s.....t...............hh...s..u..l....s....h..s..s...u..h....a................h.s....h....ss....hu...Da.oRa.......s..p.s........t..p.....s............s......h......h..........s........t....h...h....u....h...h...h..s...h....h...h.h....h.h.....h...u....h...h.....s...s...u...s..ssh...h.............s....t......s.......h....h......s.......s...h...................................h..s.....................uh.h..l..ss.l.l..l.h..h...h..s.l...s...s..s...s...s..s.....s.......h..........h....u..s..u...h...s.h....s..s...l...hs....................h........l....s..h..+...t.....t...s....h....l.....s..u..l.....l....u....h.....l....h....s....h..t..l..........h....s..s........sh................hh..a.........Lshlus.hlsPlh.....Glh.l.sD.Ya.l.l.+....+...t.p.h...p...hst.h....h.............................t..h...................s.h.......N........h...h...u..h...h...s......h..h.h.u.h..h.hs.h.h..................................................................................................................................................... 1 391 822 1249 +4691 PF04236 Transp_Tc5_C Tc5 transposase C-terminal domain Bateman A anon Pfam-B_2955 (release 6.5) Domain This family corresponds to a C-terminal cysteine rich region that probably binds to a metal ion and could be DNA binding (pers. obs. A Bateman).. 27.40 26.40 27.40 26.40 27.30 26.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.48 0.72 -3.75 14 75 2009-01-15 18:05:59 2003-04-07 12:59:11 10 11 10 0 72 54 0 58.60 37 15.42 CHANGED WpKuGYh...spss.sFhTPupaC.Fs.cssssDChhsGCschuFI+Cu+CcphlCFcHFlV.phHhC ........Wa+uGYh...ss+Ps...tFcTPs-ah.Fs...csstssCsh..Csphuhl+CsaCcp.hCFscFh....H.C...... 0 16 22 72 +4692 PF00872 Transposase_mut Transpo_mutator; Transposase, Mutator family Bateman A anon Pfam-B_376 (release 3.0) Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.15 0.70 -5.91 14 6867 2012-10-03 01:22:09 2003-04-07 12:59:11 13 34 1819 0 1273 6707 991 221.60 22 89.13 CHANGED hspsp.hsthtth.sh...thsuts-hhcshhcshhpplhpsEhschlGstcaERsttRp.st.......RNGpps+slsTpsG.pl-lplP+sRsGsF.P.......sllp+hpRp-pulpuslhphYlpGlSTRclpchlptLhG.pt..lSpupVSplscplc-tltsapsRsLpcs.asalalDAhahKlR..sspVhupulhlAlGlssDGpR-lLGhtls..suEutphWpshLpsLpsRGLps.lpLlluDuatGLspAlstsasssshQRChlHhhRNlhstss+cpt.cpltstl+sIapAs-h-tsttth-thhsthss..+aPtlsshh-cuhpcllsFhsFPtsha+plhoTNslERLNcElRRRs+shslFPNtsohlRLlhslLt-h......cccWhtu ..............................................................................h...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................h.....h.os.N.......Et.h.......h.+..t.............................................................h...................................................................................................................... 2 397 800 978 +4693 PF00273 Serum_albumin transport_prot; Serum albumin family Finn RD anon Prosite Domain \N 25.50 25.50 25.80 25.50 25.00 25.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.31 0.71 -4.84 42 692 2012-10-01 21:15:37 2003-04-07 12:59:11 15 8 71 324 248 788 0 168.30 26 84.70 CHANGED ppphspphpphG-cshpulsllthSQth.pssacElsKLlp-lsphtcpCsts-.....pChcshtshhhcclCpppshhs..utlscCCpcstsE.RspChhphcp-p.sth.shth.....s-scchCpsapcspctFhu+alaEhuRRHP.hhss.lLtlAppYpphlpcCCp..sps.ssCh ..................t.pp.Ct.hpphGccthpsh.hllt..hop+hPpsshp-lhplsp-lsphtp...cCC...p..s...st....hsChcs....ths.....lhsplCpppph.hs..splscCCscs.h.h.............p.Rp..Chhth.csDps...........shp....t............stsc-lCpt.pc.spp..thh.s.........p...a..........LaEhu+..++Pp.hsts.LhplsppapphlpcCCp..sps.tsCa.................................. 0 11 18 55 +4694 PF01359 Transposase_1 Transposase (partial DDE domain) Bateman A anon Pfam-B_394 (release 3.0) Family This family includes the mariner transposase [1]. 22.90 22.90 23.00 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.44 0.72 -4.21 63 449 2012-10-03 01:22:09 2003-04-07 12:59:11 13 27 113 11 235 404 0 74.10 30 26.29 CHANGED hY-..N.pRp+sWlpsGpssps.ssKsslas+KlMLsVWWDh.cGllaaELLtsGpTlsu-hYppQLp.cLppslpcKRPphhsR+ .............................a...s.pcp.tpWl...p.sp.s...p...ts.Kspl.atK.K...hhLslaWsh.p..Glla.a-l..LssGcTIsu-hYspQ.Lp.cl...tptlpphp.t............................. 0 51 123 182 +4695 PF01610 DDE_Tnp_ISL3 Transposase_12; Transposase Bateman A anon Pfam-B_1015 (release 4.1) Family Transposase proteins are necessary for efficient DNA transposition. Contains transposases for IS204 (Swiss:Q50911) [1], IS1001 (Swiss:Q06126) [2], IS1096 (Swiss:Q50440) [3] and IS1165 Swiss:Q48788 [4]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.56 0.70 -4.80 128 7865 2012-10-03 01:22:09 2003-04-07 12:59:11 12 34 1808 0 1018 5504 425 158.50 24 62.84 CHANGED lGlDEhphp+.tpp..hhshht.h..ct...........pplltlhtsRsppslppahp.h.s.p..pppplctVshDM.ssatsslpphhPp.Ap..llhD+FHllchhsc.ulspl...Rpp.hpph.......tt..............................lKps.+all..LK........p..cpLspp.pttp............ht...hh....pt.s...pltpAYtlKpphtpha...p.p........p....httupp...hhppWhpth.......tstls.........hpchscolppahptIlshFc..............hoNGhlEGlNs+IKsl+RpuhGa+shcphpt+l ...........................................................................................................................................................................................................................................................th.......hht...th.............................................................................h..ht....hhhh..............................t........ht....t...............................................h.....hh.............t.......h..hh.h....ht....hh.hh.....p.t...................................................h.t....h..tp.h..................h...................htphhtt.h....p...ht..l.h.shht......................ho.Nu.hEu.hNthl..+.hc..p.shGhts.t.hh.......................................................................................................................................... 0 276 676 802 +4696 PF01710 HTH_Tnp_IS630 Transposase_14; Transposase Bashton M, Bateman A anon Pfam-B_1769 (release 4.1) Domain Transposase proteins are necessary for efficient DNA transposition. This family includes insertion sequences from Synechocystis PCC 6803 three of which are characterised as homologous to bacterial IS5- and IS4- and to several members of the IS630-Tc1-mariner superfamily [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.57 0.71 -4.44 9 1697 2012-10-04 14:01:12 2003-04-07 12:59:11 11 12 366 0 262 1502 90 102.50 39 76.09 CHANGED MAYSlDLRpKVlsalEsGGuITEAS+lFpluRsTIYpWLp.+Echush+spsRpp...KlDp-cL+pclcsNPDhhLpEhA+cFGV..pPuolpYthK+MtlTRKKpshh.cpt..+.sphppshs .........................................YShDhRp.KV.l.s.h.h.E.c.s.t.o.h.oE.AS....c....l.......F.......p.......l......S...R.sT.Ia....tW...Lc....hK.......c...c...T........G....-....h.....p.....p.......p....s.....+....t....pp.......tK.l.....D......h...c...c...L...K...s.a..l....p.....c...p......P.......D........s...hl...p......E......l......A......p......c......a......s......s......pss..o..lp.h..ALKt.hGhT.p.KKp..........................ph....................................................................... 0 52 103 234 +4697 PF01797 Y1_Tnp Transposase_17; Transposase IS200 like Bashton M, Bateman A anon Pfam-B_1347 (release 4.2) Family Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases for IS200 from E. coli. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.40 0.71 -4.25 178 5614 2012-10-02 12:35:40 2003-04-07 12:59:11 11 33 2186 40 1185 4727 578 98.60 26 68.64 CHANGED hhphpaHlV...assKYR+pl.lssp.ltpclccll.pplspphphcllp...hsstsDHVHlLlphsPph.ulScllptlKutSS+hlp..pcasphhpphhh......h..WspuYassosG..ss.shcslccYIcsQ .......................................................................................h.....hahh...hh.sp..Rp..th..h.tp...h...t.........pthtp.hh.tp.h.s.p.....h.........t.....h.p.lht..........h.p...h.....s.DHlH..h.L.l.......p...........h..........s.........s.....p...........h.......s............lo......p.......h....h...th.lKu.t.o...u.h....h....lh............pp.....h...........p..h.....t.....h....+hh..................tth...W.s.p.u..Ya.sp...oss...t....s.thlt.p.YIpt.......................................................................... 0 388 819 1034 +4698 PF01385 OrfB_IS605 Transposase_2; Probable transposase Bateman A anon Pfam-B_1210 (release 3.0) & Pfam-B_4602 (Release 7.5) Family This family includes IS891 [1], IS1136 [2] and IS1341 [3]. DUF1225, Pfam:PF06774, has now been merged into this family. 19.00 19.00 19.00 19.00 18.90 18.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.55 0.70 -4.96 39 7021 2009-01-15 18:05:59 2003-04-07 12:59:11 14 26 1439 0 1819 6621 252 194.50 20 55.24 CHANGED ppYp.plsutssQpslccsspAacSFFshhcthcpt........sphP+Yppc....pt.........phhlhhppsphp....hcpsplplsh...............hplp.hcsphphchc.....ps+lhhhhpth....spahsplsh-.hpt......................spspssptsuIDlGlssLsslssspst.......hlhpu.+.ltut.phhs+phuclppp........hptpt........p+ssc+lp+LapKpspphcchlcphsppllpphhph..slpslslGthpthp ....................................................................................................................................th.s..shptsht.ph.....pu....a.pp.ahpth............................................thP..p....a+pc.......t.............................t.....h......p.t...t..t..hp................ppt.....t..l.hlsh............................................h.t.......h..p..h.....t...p.p.p.h..t....................thp.t..h...h.l.tpp..............spaal..s..l....hc...hp.....................................h...t.s....s.p.h....l...G.lD..l.....Gl..p..p..h.s..s..h...o..s..u.p..............................h..p...s....p...............h..p....p....h.....p...p...c..h....t....+..h..p..+..p.l..s....++......................................hptupp..............hp+tptc......l....t.+.....l...a.......p.......+ls....sh+p........Dhh....+..+l..oppl..sp.p..h.t..hh.....sl.EsLthtsh....pp............................................................................ 1 529 1255 1605 +4699 PF02992 Transposase_21 Transposase family tnp2 Griffiths-Jones SR anon Pfam-B_1531 (release 6.4) Family \N 25.40 25.40 25.40 25.40 25.00 25.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.29 0.70 -5.26 49 1289 2009-09-13 07:43:22 2003-04-07 12:59:11 9 104 66 0 750 1419 1 149.60 32 20.18 CHANGED chhRWHtEp+p..pDGh.hRHPuDupsW+phDcpaP.-FAs-sRNlRluLuoDGhNPFu..uspaShWPVllhsYNLPPhhCMKpp.hhLolLIsGPppPGssIDVaLpPLl--LppLWp.pGVcsaDs.ppcpFsLRAhLlaTIsDaPAhu.LSGhss+Gph.ACshChcpTtuhhLcpupKhsahs.HRRFLPhsH.aRppcptFcss..hEppssPp.h...oGcclhpplcslp ................................................p.............................................................................................................................................................hh.G....P.ppP.s...p....s.....l......s..s..aLpP......llc-lt.L.ap....tGh............h..........h........c.t..t.p......p....h...l.+shlh.hshsDhPAht..t.l.u.G..........p.u.h..h.....uC..h.C.p.h..h.......h.....tt......+.........a.t.......h........................................................................................................................ 0 205 337 361 +4700 PF02994 Transposase_22 L1 transposable element Griffiths-Jones SR anon Pfam-B_2299 (release 6.4) Family \N 25.00 25.00 25.00 25.00 24.10 24.90 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.27 0.70 -5.96 3 1528 2009-01-15 18:05:59 2003-04-07 12:59:11 9 29 45 16 1329 351 40 221.10 32 80.72 CHANGED +GKRRNhoNRNQDapuSSEPsoPTSuSPusPNThENpDLDpKuYLhMMlEGlKKDspsSLREplEspuKElQpsLKEhcEsIsKQVEshpEcoEKohKElMEl.......................pKpl+ELKcEl-oIKKpp.EsTL-IEs.tK+pGslDhShoNRIQEMEERISGAEDSIEpIsoTlK-NsKpKKlLsQNIQEIQDolRRPNLRIIGV-ESEDpQLKGPsNIFsKIIEENFPNLK+EhslsIQEAYRTPNRLDQKRNTSRHIIVRToNApNKERILKAVREKGQVTYKGKPIRITPDFSPETMKARRuWTDVIQTLREHKhQPRLLYPAKLSIIIEGETKlFHDKTKFpcYLSTNPALQRIIKE..KsQ+KNuspsLEcsR+ ............................................................................................t..........................................................................................................................................................................................................................................................................................................................................................................................................t.....l.......s...s.Rh.pph....E...-.+.IS.p.lE.......-.t.................c.h.......h....p.......s...p...........c...p.......c..............t......c..............l..............+...p..............p.......p.p....lc...-.......Lpsp.h.......+..R...s...N.lRIIG...lPE..G.pc.....p..t...c.s..s....p.sl.h..pcIh.....t..E..N..hP...s...L....h.c...ch.c...lplp.c..A..p..R...s..s.sh.h..s.s...c...c...s...s.....P...R..p.Ill..K........hh....+.hpsKE+.....IL+s...ARp.+.....p.ls.....a.+..G......ps...Ip.l......sD..hSs.-s.hptR.R.ca.pslh+.L+c+.sl.p.p...l...hYPu....+lp.hphp..G...c.hp.....F.s.t...h........................................................................................... 0 10 330 417 +4701 PF03017 Transposase_23 TNP1/EN/SPM transposase Griffiths-Jones SR anon Pfam-B_1491 (release 6.4) Family \N 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.23 0.72 -4.46 33 134 2009-01-15 18:05:59 2003-04-07 12:59:11 9 22 10 0 57 124 0 67.50 30 18.45 CHANGED ppss..KChllDWssscc.lVuEGchpSs-P...pphV.splPLGPsAspVhVcslh.s-AhlWRPssplhhlt-ulGs ......................hhsh.t.pp.hVAcuplhStsP...pphV.sshsLG.phscVlVcsV...h.p......p-AhL.RPhsplphhtDAl................ 0 0 41 49 +4702 PF03004 Transposase_24 Plant transposase (Ptta/En/Spm family) Bateman A anon Pfam-B_1902 (release 6.4) Family Transposase proteins are necessary for efficient DNA transposition. This family includes various plant transposases from the Ptta and En/Spm families. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.80 0.71 -4.30 40 501 2009-01-15 18:05:59 2003-04-07 12:59:11 9 46 31 0 204 486 0 114.70 17 21.72 CHANGED sschWpshl.....paWpoccuccpScpsppsR......uhuh....hhHpuGppSatpltcchcpch.......scpsshh-lahcTHp.+sDGo......alcp+ucplhcphppplpcp.sphsstsst.s..................phscla...........................hpssutcc+.GRha..GlGuhtpsh ..............................................t..t.Wt.hh.....th.Wt.s...c.hpthStpsptsR.......ttht.......hhHpsGo.....coas......thtcp..hptc.....................sp.ss.hh.......clahps+..p..p..psGp...........stt.s.pp..h..hp...t...........hpp.htp.....t.............................................................................................................................t.................................................................................................... 0 4 77 124 +4703 PF03050 DDE_Tnp_IS66 Transposase_25; Transposase IS66 family Bateman A anon Pfam-B_2526 (release 6.4) Family Transposase proteins are necessary for efficient DNA transposition. This family includes IS66 from Agrobacterium tumefaciens [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.98 0.70 -5.18 126 5890 2012-10-03 01:22:09 2003-04-07 12:59:11 9 35 1314 0 841 6700 1022 192.80 29 59.76 CHANGED +uhsusuLlApllssKasppLPLYRQpphhsct.Gl.plsRuTlusWhtpsuph.LpP.lhstlpptlh.psshlasDETslplLt......tp...sc.sp.pualWshssst................hlhapass...sRu.upts.pph.....Lts....ap......hLpsDuYuuYsp.lh...........slpc..ssChAHsRR+Fh-shpt..................shuspuLpt.IspLYt.......lEpch.......p................s.....-.......pRtthRpppupPllsphcpWhp.....tph........tplsspotlucAlpYhl...sphssLhpalcDGcl.plDNNhuERulRshslGRKNa.LFusotpGuct ..........................................................................h...h.....s....p...t.htt....G...h....lstt..hst.h.h..........ht.....ht...h.....t.....h....t....lh...........h..l..tsD-ssh.....................t.tp..sh.h.Whhhtst.....................hhha...h..t..t+t..t............h.........h...............ht...u........hl...sDt....ht....sat...h.....................h........hChs...H.h.....p..Rt..h...ph....................................ht.hlt.........htt....ha.t.................................hc.t...th.t.....................................p...................ttht.R.p...........lh.t.t.h..t.hh...................................................ph....t.s...h.tp.u.h...t.Yhh....pph.t.h...ahps..u.h..hs...NNhsEpslR.hsls...++s.hh.ts..us.......................................................................................................................................................................... 0 197 489 640 +4704 PF03400 DDE_Tnp_IS1 Transposase_27; IS1 transposase Mifsud W anon Pfam-B_2448 (release 6.6) Family Transposase proteins are necessary for efficient DNA transposition. This family represents bacterial IS1 transposases. 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.65 0.71 -4.04 2 3369 2012-10-03 01:22:09 2003-04-07 12:59:11 8 11 576 0 195 2345 250 104.30 60 82.31 CHANGED hllC.EhDEQWuaVGuKuRQ+WLaYAYsphptsVlAasFG.RT.tThtcLhuLLoPFslshhhoDsWs.Ytpcl.tchHlhtKhaTQRIERpNLsLRp+ltRLuRKolsFS+SVElH-KVIGpalph+ha. .............................................................h..DEpWuaV....G.uK....u....R..Q+W.LaYAY.c........php.c.sV.l..AaVF.GcRT........hsTlt.........+L.h.u.....L..L....o.s..F.....c..V.s...l..aMTDGW.PLYESR..L........K.GK..L..H..V..I...S...K.R..Y..T..Q..R...IE.RHNLNLR......QH...LARL...u...R.KoLSFSKSVEL...HDK.VIG.aalph+aa.......................................... 0 49 122 175 +4705 PF04195 Transposase_28 Putative gypsy type transposon Bateman A anon Pfam-B_1755 (release 7.3) Family This family of plant genes are thought to be related to gypsy type transposons. 20.10 20.10 20.20 20.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.09 0.71 -4.99 2 725 2009-01-15 18:05:59 2003-04-07 12:59:11 7 21 14 0 419 710 1 151.90 37 20.83 CHANGED MActptsh-uphhPsshh.pphpthls+chhPtpthht.hsAhGEuhPTPch..csVhF.pFhhsGhs.Ph.pFFhsILEFYslphtHLsPNulhhlA.FhHhCEhFlGlRPphhLFphhFhlp....hS.PhVVGGshFQ.RGpl.p+Yhshsh+pp.csW+usWFYs..s-.A...LPp.s. ......................................................................tph....p.hls+thh...P..tp..t..hh..t.h.h...h.G.E..u.h.PsPph....csV.hF.sFhh..uGhh.....PhSpFFh..s.lLpFYslphtHLsPNulhplAIFsHlCEhFlG.l...c.PphpLFRhhF..hlps.........hs..s.....s.........V.........s.........G.....u.......sh.Fp.R.s.sl....p..+...Y.h.sh.s...h.+pp..c...s.W+upWFYh.......sp..s........................................................ 1 0 19 24 +4706 PF04693 DDE_Tnp_2 Transposase_29; Archaeal putative transposase ISC1217 Mifsud W anon Pfam-B_5730 (release 7.5) Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.09 0.70 -5.43 3 202 2012-10-03 01:22:09 2003-04-07 12:59:11 7 3 27 0 42 452 37 154.30 27 83.72 CHANGED MsKEL..sRpEYYKALccAlsplhhuMTGlRKDVAsRLlLGuVlGGsAT..EIAQss-MDYETVLKNLDKLANs..cLIElVKKlVtDHPVlLIIDDTHDHKLYARAhPV.SRNGsQhFYCRsHKRFEPAIQLLlIAlKDLssNcoYlIsIIPYIPRKVtE.LKcRGEcuEFKTKI-hhLEhLsoLhscaNVsslVFDSWYVNSKTLpGNTVGELKSNuRVVE...........G-RHVPVuEFPpGEYLVEYL..GTPIKLLVIDsYKchGRRYFFSTDLNDTsEDIITTWENRWDIEVLIRELKALGLEcSSFLTWlRNpGFlsLKALSLLlVpsFKYSLGL+LG ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 23 23 41 +4707 PF04740 LXG LXG domain of WXG superfamily Zhang D, Mifsud W, Aravind L anon Pfam-B_3568 (release 7.5) Family This domain is present is the N-terminal region of a group of polymorphic toxin proteins in bacteria. It is predicted to use Type VII secretion pathway to mediate export of bacterial toxins [1]. 22.30 22.30 22.40 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.12 0.71 -4.53 35 970 2012-10-01 21:44:22 2003-04-07 12:59:11 7 41 392 0 96 785 2 186.40 20 39.12 CHANGED KplDspplhstl-pptpphcphppplpslcpulpslssLcs..LpG+uuculKsaapshahPlhpthhphl-phpphL.pplpstlpsh-stssuhI-psFLcpEL..ppuls+scphhpphpppl..sshhsslsDllpls.hsppshppplppApcchpcsl-+LtshDpptsshhspscsthptlpphlppLpshh.ssut........pssuYp ....................................th.t..pph.t..pphppphpslp...p.ulpp...h...hssss...L.pGcuhsusKsaappshhPlhps...hhphh-thppth...tphl.p.p.apupV.-.s...s..p.shlc.p.shLc....-l.........pplsp..pp.h..pshp...p.p.h........pp....h..h....s.s...t..p................h..s......p...ph....hp.......ph....tptp...+.clpcpLc...+Ltp...F.s...p.pp.s.p...ap.ph.p......ph.p.lppslpplpshh....................s...................................................... 0 29 53 79 +4708 PF04754 Transposase_31 Putative transposase, YhgA-like Mifsud W anon Pfam-B_3820 (release 7.5) Family This family of putative transposases includes the YhgA sequence from Escherichia coli (Swiss:P31667) and several prokaryotic homologues. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.06 0.71 -4.96 54 3243 2012-10-11 20:44:43 2003-04-07 12:59:11 7 7 798 0 393 1853 67 186.50 44 64.84 CHANGED sPHDulFKphhspspsA+-FLchaLPspltplsDLsoLclEssSFl--sL+ppaSDlLaSlcspp............t.cG.....YlYlLlEHQSpsDphMAaRLh+Yslshhp.......+H.hct..........scpp.LPlVlPllFYHGpps.ashshsah-hFs.....sstlAcplhstsa..LlDloshsD-EIhp++p....huhLpLl.KHI+.pR.Dlhchlcplsplltphhpscpplp .............................................................................pPHDAlFKpFLtp.s.-.sARDFlp.....lHL..P..t...l..+..plCD.......L....pT....L....+..L.Es.s...S..F.l.........-....c..s...L.....+.....t.....h.a.SDlL.aS...lcTpp.............G...sG.....Yl..Y.s..l..I.EHQSps-.phMAF..R.........hM.....R.....YuhAA..MQ..............+H..L.-t...............................sa.c.p......LPLVlP....l......LFYHGppo..PY..P....a.S.h..s..W...h.DtFs........sPt.lA+pl.Y.s..p..sFP...LVDlT.lh..PD-..E..Ih..pHR+.........hAlLE.L.l..Q.K..H...IR.pR..DLht.ll.-pl.ssLLsps.ssspQ.p.................................................................................................................................. 1 151 264 314 +4709 PF04986 Y2_Tnp Transposase_32; Putative transposase Moxon SJ anon Pfam-B_5271 (release 7.6) Family Transposases are needed for efficient transposition of the insertion sequence or transposon DNA. This family includes transposases IS1294 and IS801. This is a rolling-circle transposase. 20.70 20.70 20.70 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.12 0.71 -4.54 65 1246 2012-10-02 12:35:40 2003-04-07 12:59:11 8 16 499 0 183 1121 344 156.80 27 48.81 CHANGED GhhuVLHTaGps.LsaHPHlHhllsuGGlstss.....pWtp....sp.t.ahhss+sLuphaRsphlptLppth.t..h........h.t.tthsthltphhpc................................................pWsVhsp.shs..pscssLpYLuRYhpRssIospRlhphs......sspVsF+a+Dhc...............sscpcphsLsstEFlcRhlhHVL.PcGF++lRaYGhLu.....ttcppp ...................................................................................................................Ghhshlppa.Gpt.hphpsHhHh.h.sGshsptt.........................hhh.hch....tp..hhphh.phl.pt.h.th.................htpttt.p.hh.pthtc..................................................tasl.sthhht....ssp....psh....tYLuRYlp+sslutpRLtt.hs.......tspltaphps.p...............ppppphhhhss.-FltRhhhHls...thp.hhRaaGhhu.......pt........................................................................ 0 73 119 151 +4710 PF01498 HTH_Tnp_Tc3_2 Transposase_5; Transposase Bashton M, Bateman A anon Pfam-B_462 (release 4.0) Family Transposase proteins are necessary for efficient DNA transposition. This family includes the amino-terminal region of Tc1, Tc1A, Tc1B and Tc2B transposases of C.elegans. The region encompasses the specific DNA binding and second DNA recognition domains as well as an amino-terminal region of the catalytic domain of Tc3 as described in [1]. Tc3 is a member of the Tc1/mariner family of transposable elements. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.15 0.72 -3.92 26 1241 2012-10-04 14:01:12 2003-04-07 12:59:11 13 82 129 7 1065 1248 10 65.70 24 22.63 CHANGED cRpIlphlccsPp...hohpcLtpphtp..GhslSppTlp+pL+ptGhpu.pps+++Ph.Lotcpt+sRLpFAppHls ...............................................................ostclttpl...............s.h.slStpTlp+hL.+p.t....G.h..tu..p...ht.h+.K.P........h..Lo......t....pppct.RLtaAptH............................. 0 570 817 1048 +4711 PF01527 HTH_Tnp_1 Transposase_8; Transposase Bashton M, Bateman A anon Pfam-B_527 (release 4.0) Family Transposase proteins are necessary for efficient DNA transposition. This family consists of various E. coli insertion elements and other bacterial transposases some of which are members of the IS3 family. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.29 0.72 -4.02 45 12930 2012-10-04 14:01:12 2003-04-07 12:59:11 15 69 2688 2 2404 10672 1730 75.20 20 64.00 CHANGED hpp.pRaocEhKtplVcps...csGtslsclu+chGl.ssssLapW++ph..........suhtssssp....................h.hshpp-spcLc+hhsc ................................tpppa.otE...h...K.h.......p...h...l.p................s.........h........c........s.......s........h........s........l..s.......p.........lA..c....c....h.....Gl..s.s.s.....s.l.hp.W.h+.php................ts.t.s.s..tp...........................................h.t..ptc.tpLpt....t.................................................................................................. 0 628 1441 1931 +4712 PF03221 HTH_Tnp_Tc5 Transposase_Tc5; Tc5 transposase DNA-binding domain Mifsud W anon Pfam-B_2955 (release 6.5) Domain \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.26 0.72 -4.16 194 2088 2012-10-04 14:01:12 2003-04-07 12:59:11 11 91 246 2 1635 1984 4 63.10 21 13.60 CHANGED hsph-csLhpWlpph...pppshslosphlpppApp.lh................ttsshpsSps.....Wlp+FhpRa..slptpp ...................................t.EctLhpWlhph......pppG....hs.......s.otphlpppApp.lh...........................tts.shps.ups.......Wlp+..FhpRa...tlt...h....................... 0 469 953 1437 +4713 PF02281 Dimer_Tnp_Tn5 Transposase_Tn5; Transposase Tn5 dimerisation domain Mian N, Bateman A anon Pfam-B_5683 (release 5.2) Domain Transposons are mobile DNA sequences capable of replication and insertion into the chromosome. Typically transposons code for the transposase enzyme, which catalyses insertion, found between terminal inverted repeats. Tn5 has a unique method of self- regulation in which a truncated version of the transposase enzyme acts as an inhibitor [1]. The catalytic domain of the Tn5 transposon is found in Pfam:PF01609. This domain mediates dimerisation in the known structure. 21.40 21.40 21.40 21.40 20.70 21.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.43 0.72 -3.93 2 403 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 154 6 49 283 17 97.40 52 31.63 CHANGED EuhTlP.hLRtpGLlcEActVEuQSs-TVLp.DEhplLhhhsKs+tc+tptAsSLpWAY.uIA+LGGFhDoKRTGIASWsAlWEGWpsLQS+lsGahsAK-hhApG.pl .............hhlssplhs.........R.hGl.pE.....sps-SCEKILTPoEWKLL.W.l+lcGK.....LP..sQhP.TLK.........WAsLpLAKLGtWHDS.KRTGcPGWsVhWDGWFRLQDMlEGYhlhKSL.......Dp-..................... 0 11 26 31 +4714 PF00576 Transthyretin HIUase/Transthyretin family Bateman A, Percudani R anon Bateman A Domain This family includes transthyretin that is a thyroid hormone-binding protein that transports thyroxine from the bloodstream to the brain. However, most of the sequences listed in this family do not bind thyroid hormones. They are actually enzymes of the purine catabolism that catalyse the conversion of 5-hydroxyisourate (HIU) to OHCU [2,3]. HIU hydrolysis is the original function of the family and is conserved from bacteria to mammals; transthyretins arose by gene duplications in the vertebrate lineage [4]. HIUases are distinguished in the alignment from the conserved C-terminal YRGS sequence. 25.30 25.30 25.30 25.80 24.70 25.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.54 0.71 -4.18 26 1803 2012-10-02 19:08:27 2003-04-07 12:59:11 16 7 1506 439 545 1303 706 109.50 40 82.21 CHANGED ssLoTHVLDsupGpPAuulplcL..........a+hs...ssshp.lsostTspDGRh..tsLlsspphtsGhYcLpFcsucYaputGh................ssFh-hlsltFsls-ss..pHYHVPLLLSPauYSTYRG .............................................s..LosHlLDpspGpPA...........ssVpVpL...............................ch...s........sssh....p..l.s.s...uhTspDGRl............ts..........l.........h.................s...........t..............p.................s..........h..............s.....sG....................t..................Y..+lhF.csucYF...cpp.sh...................psFas..pls......lpFpls...css.......pHYHVPLL.LSP.auYSTYRG.................. 0 132 273 418 +4715 PF03896 TRAP_alpha TRAP-alpha; Translocon-associated protein (TRAP), alpha subunit Finn RD anon DOMO:DM07004; Family The alpha-subunit of the TRAP complex (TRAP alpha) is a single-spanning membrane protein of the endoplasmic reticulum (ER) which is found in proximity of nascent polypeptide chains translocating across the membrane [1]. 23.10 23.10 23.10 23.40 22.90 23.00 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.92 0.70 -5.41 11 349 2009-09-11 14:06:22 2003-04-07 12:59:11 11 8 203 0 190 299 3 225.90 31 89.23 CHANGED M+.h.plLLLhLLuFPusLl.hutspu.hssA..p-...--Es........-D.lsp---DpA.VE-Dp..t.oEt-E---.....tplpuSPcADTTILFVKs........c-FPAsplVKFLVGFTNKGs.cDFlVESlEASFRYPQDapaYIQNFTA..........LPhNslVpPpcpATF-YSFlPuEsh...............uGRPFGLVIsLNY+DusGNsFQ-AVFNQTVsIsEp--GLDGEThFhYlFLuGLulLllluhaQhLpSppR+Rs....tthVEhGTuups-VDhsWIPpETLspl.....sK....uSP+............pSPRpRptKRuAGsD ....................................................................................................................................................................................................................................................................................t..............tsssts.cshhhFsps...........p.hsusp.schLl.uhpNc..G...p....p.shhVpslp....u...Sh....+....hP.t...D.......a.......phhlQNaTA..........h..h.s.t...V.s..tppuThpYsFhs.sc.h...............t..spsasLsl.sls.Yp.Dh..s.....G..p.h.ap.sss.aNpTVsllE.t.p.su.l.DsEo...lFhY..hh.L.s.u.l..s.l.h.s......lh..hht.........p......h.....h.t..t....h...p+p+.......................h.plE.....h..........G.....T...s......s.....t..p.s........h...D..pWl...Ptcp..l.p..................................................................................................................................... 0 55 83 145 +4716 PF04051 TRAPP TRAPP_Bet3; Transport protein particle (TRAPP) component Wood V, Finn RD, Mistry J anon Pfam-B_9946 (release 7.3) & Pfam-B_6495 (release 8.0) Family TRAPP plays a key role in the targeting and/or fusion of ER-to-Golgi transport vesicles with their acceptor compartment. TRAPP is a large multimeric protein that contains at least 10 subunits. This family contains many TRAPP family proteins. The Bet3 subunit is one of the better characterised TRAPP proteins and has a dimeric structure [2] with hydrophobic channels. The channel entrances are located on a putative membrane-interacting surface that is distinctively flat, wide and decorated with positively charged residues. Bet3 is proposed to localise TRAPP to the Golgi [2]. 20.70 20.70 20.80 20.70 18.80 20.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.99 0.71 -4.69 84 1130 2009-01-15 18:05:59 2003-04-07 12:59:11 11 22 333 34 772 1043 17 154.70 25 76.72 CHANGED hFslhauplVs.hhc.............shp-lp.........................................................................ppLcphGaplG......hRLlE..................................chhh+..............................................................tt.tRppchhcshchlspsha+hhauppsss...........................lppspt.....................tpah.lh-p.s.l...............................spal.p.s.....ttht.pL.....................passhhsGll+GuLpshthsspVshpps.h...........h.stsshpI+hph.p ...............................................................................................................................hshhasplVs..hhp......t........sht-lp...................................................................................................ppL-ph.............GaplG......h+LlEphht+..................................................................................................p...tRhpchh-hhchIspshW.ph.has+p..s...c..s.................................lppsp....p...........................spah..lh-pp..l....................................................spal.phs..............p..t.t..tl........................................asshhsGll+GuLpt...l..sht.s.......p.Vshp.s.h..............s..ssh.lcht..h..................................... 0 266 420 622 +4717 PF04956 TrbC TrbC/VIRB2 family Moxon SJ, Bateman A anon Pfam-B_5261 (release 7.6) & Pfam-B_14627 (release 10.0) Family Conjugal transfer protein, TrbC has been identified as a subunit of the pilus precursor in bacteria. The protein undergoes three processing steps before gaining its mature cyclic structure[1]. This family also contains several VIRB2 type IV secretion proteins. The virB2 gene encodes a putative type IV secretion system and is known to be a pathogenicity factor in Bartonella species [2]. 28.40 28.40 28.40 28.40 28.30 28.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.14 0.72 -3.99 114 1258 2009-09-11 06:06:03 2003-04-07 12:59:11 8 2 765 0 305 1048 45 96.80 19 86.80 CHANGED Mp.................hhthhhhhhhshhhhhhhspsAhApss..........sshpsslpslhshlpG.luhsluslsllssGhthhhGphs....hpphhhlllGlslhhuAsplsshl ..........................hhh................hhhh.hhh.hh.h.h.hs..ss.AhApsss................tshpsslpslhs.lpG.luhsluslsllssGh.thhauptp........hppll.h.lllGhs....lh.h.uAsplssh................ 0 47 166 228 +4719 PF03743 TrbI Bacterial conjugation TrbI-like protein Finn RD anon Pfam-B_776 (release 7.0) Family Although not essential for conjugation, the TrbI protein greatly increase the conjugational efficiency [1]. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.83 0.71 -4.80 149 1918 2009-01-15 18:05:59 2003-04-07 12:59:11 9 40 1082 20 412 1681 119 183.10 23 39.41 CHANGED hhl.tGohIsusLhTulsSsh........PG.l.hspVopsla.ossG....ptlLlspGopllGpY.pus...ls.GpsRlhlhWsRllhP.s.....Gts.lsl...stsusDth.GtuGlpGp.VssHahphauuAhlhollus....ssphh.............sssssssssssh...............hhpsssp.shsphupphlcpshslt.PTlplptGph.lslhVs+Dlsh ....................................................................................................................hlstGohIsssLh.oulsosh.................sG...l.hsplspsla..sssG....phlL.l...sp...Go....plhGph...pus.....ls.u...p.s..R..lhlhapch.l.hs..s.........Ghs..lsl...............stsuss.........h..G...tuG..........lpGp.V....s.s...+h.h.phh...usAh.hhuhlss.hsphh..........................................t...tt..psstsspsth.....................................................................................thtpusup..uhsphup.p...hlcp..sh...sl...Ps.lhlptGpt.lslhlscDl.h....................................... 2 81 230 319 +4720 PF04610 TrbL TrbL/VirB6 plasmid conjugal transfer protein Mifsud W, Bateman A anon Pfam-B_5275 (release 7.5) & COG3704 Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.67 0.70 -4.65 129 1847 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 1036 0 437 1663 116 208.00 14 43.80 CHANGED lslhlhlhGhhhh...hst.hptsh.tchltchltlul.hshlhts.....hs...sasshlhsshss.hstth...uu.........sstsssshtthspl..h.stuhshsptl.........hppssshh............hhhhhhth...lhhlsshlhhhls....uh.lhlshlthtllhhhGPlhlshhla.shTpphhppWlsplluhulhllllsllhul....shshhs.shhssh...................ssspsslppshshllhuls...hhhlhhtsPslA....uul .....................................................................................................................................hl.hhh.h.sh.hh.....ht.....t.....sh....phh.ht.....hhh.hsh...hhhhh.s.......hs.........h....phl.h....p.h...t.htt.h...........st..............................ts.h...sth.........h...thsht..h..ht.h..................................................hpphsth.........................hhhhh.hhh.....lhh..l.sh.h....l.hhhhh........shhh.llshl........thhlhh.hh...u.Plhlsh..hh.a.shT.+p.h..hp..s.alp..tllu..h...sl...p...h.hllslllu.l.........hh.shhs..phhsth........................................tts.h....tt....h..h.....hh...hhslh.....hhhlhh.h.s.hht.................................................................................... 1 96 236 316 +4721 PF03461 TRCF TRCF domain Dlakic M anon Dlakic M Domain \N 25.00 25.00 26.00 25.20 24.40 24.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.01 0.72 -4.05 117 4305 2009-09-10 17:08:24 2003-04-07 12:59:11 10 10 4219 3 926 3507 1246 100.20 30 8.89 CHANGED cL....slsAhlP-sYls-.p......pRlplY++luss....pspc-lpclppELhDRFGshPpplpsLlplscl+hhApphslpplptpppp.l....hlpa.sppsshshpthhhhhp .......pLplsAhlP--YI.s.D.p......pRlphYKRluss.............csp..p....-lc-lpsELlDRFG.......hP.....c....sppLLplspl+hhApphGlpc.l....c.....tpsps...l.....tlpF..sppsphp.........hh.............................. 0 316 613 786 +4722 PF03546 Treacle treacle; Treacher Collins syndrome protein Treacle Griffiths-Jones SR anon PRINTS Family \N 21.10 21.10 21.70 21.50 19.50 20.60 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.24 0.70 -5.71 4 216 2009-09-11 10:35:01 2003-04-07 12:59:11 9 8 25 0 59 246 0 175.30 26 51.50 CHANGED sPAPPGKsGPuAsQApstKPEEDS-SSSE.-SDSE-EsPAshsssQsKPSGKsPQVKuASssupps..spKGsPPVsPGKAGPsAsQA......tc.pspSSpcppssptEsPsAh..TpS.spsKP.tpsSQVRssSsss.Gs.........PtKstssA..sQsGKp.EDS-SSSEEESDS-s.....suuPAQAKSSGKl.Qh+sASGPsKtPPQKAGPsATQVKs-+uc-DSESSEEpSDSE-..EAPsAhosAQAKsAlKhsQhKASP+KGTPhossuA+ssPVpVGTsAPpKAuAVoSPssuSSPAlA+GTQ+PsEDSSSSEESESEEE.TAPAsstGQuKSlGKGLpVKAASsssKts.GQGTAPs.PGKsGPuss.VKAEsQED.SESSEE-SsS...EEAAAsPAQVKTuVKpPQuKANsusTRsssAKushSAPGKsVsAssQhK.tSPAKsKPPsRs.QsSsVSsRGQsSVPAVGKAsAsAAQAQsGPVtsspEDSE.SSEEESDSEt..EsPsQsKPSGKTPQVRsASAPuKt .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 6 6 11 +4723 PF00088 Trefoil trefoil; Trefoil (P-type) domain Sonnhammer ELL anon Swissprot_feature_table Domain \N 21.00 21.00 21.10 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.01 0.72 -4.32 86 900 2009-01-15 18:05:59 2003-04-07 12:59:11 13 54 128 38 475 805 8 43.30 33 8.06 CHANGED pCs.l.......ssppRlsCGhss...IopspCp.p+GCCacs...........p..........sssaCFas ......................C...h...ssppRhsCusss......lo...p..ppCp..s........+G......CCass.....................t.................ssPhCaas............. 0 154 186 297 +4724 PF01204 Trehalase Trehalase Finn RD, Bateman A, Wood V, Studholme DJ, Moxon SJ anon Prosite Family Trehalase (EC:3.2.1.28) is known to recycle trehalose to glucose. Trehalose is a physiological hallmark of heat-shock response in yeast and protects of proteins and membranes against a variety of stresses. This family is found in conjunction with Pfam:PF07492 in fungi. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.72 0.70 -5.86 20 2673 2012-10-03 02:33:51 2003-04-07 12:59:11 13 12 1263 20 654 2119 179 396.50 30 72.24 CHANGED +hasDpKpFlDhshhs-..ssplhpthpsph.cs.st..sspsLppalc-sFpts.t.h............th.sPtstp-p.c.ah.h..pcssLRh.sppLsphWss..LsRplptpstths.thollshPp.........PaVVPGGRFpElYYWDSYahhLGLLtSsph-............hu+uMl-NFhahIc+YG+I.NGsRoYYLsRSQPPhLohMlhthhc+h..........ss-thh+chLsslccEasaWhs.......ss+hssVsshu..sapLstYhsss-TPRsESht.DhthAp+hs......-tscs.hYp-L+uuAtSGaDaSoRWlc-..Gps.s.hsslpTssllPVDLNulLa+hEpsIA.....hFssthsp.............hpsushacppAcpR+puI-paLWs--sGhahDYDhppp............cpsshhoAoshaPLWsshusscp....st.hsspslsphppsuhLp...suGlusoslc...oucQWDaPNGWAPhQhlshpGLp+YG....cplAccLAhRWLtpsppuascp....GtllEKYDVsp.......suchG..GGGGEYssQ-GFGWoNGVhlhhLchas .................................................................................................................................................................................................................................h.......................................................l..hht..h...................................................................t.t....hp.....l..........h.....hh+..........................t.......................................p....h....st....................................sall.P....G....G..R..F.pE........h.Y....h...WDoYh.....hh................GLh.s.......t...h...p............hh..tshhtNhh..l..pthGh...l......Nu....sR.YY.....tRSQPPhh..uhMV..hhpt........................s.t.hh....p....p....hh...s....t.l....tt....a.s.a...Wht.................................s.t.....tp.h.s....t..h..................t..........h......Ls.p......Yhs...s..........-.......ss.p.s..-..S..h..h.sh..tsAt.p.s.........................ps.sthatp.l...c..s.s....t.ts..u......aD....h....o......S.R..W......h...tp.....s.............................................h...s...o.....l...............ps..........s......pl...l.......s.........l...D..L..N.....uh.hap.....cphlA.............phsthh..Gc..............................................tpps.ttacphApthp.ps.lp...phhWspp.pGh.ah.......D.achppc.....................................p.....p..p...h..h.s...s....s...u....h.hP..L.as.sh....Aspp..p......................s..p...h...h...s.p...h..t..p...t...p..h.lt.............PuGls.so...php........s.sp.Q.....W..D....t...Ps....u.....W....s.....P.h.pa..hu.....lpGh.p.p.YG...............................pphA.........c.l....utp....alpp........spp..s...a..................t................p..............p..................t..............t.lh.................E..KYs.st.....................................ss..t.....ss.hch.......p.....p...GF.GWoNushh.hlt...s...................................................................................................... 0 245 385 556 +4725 PF02358 Trehalose_PPase Trehalose-phosphatase Bashton M, Bateman A anon Pfam-B_762 (release 5.2) Family This family consist of trehalose-phosphatases EC:3.1.3.12 these enzyme catalyse the de-phosphorylation of trehalose-6-phosphate to trehalose and orthophosphate. The aligned region is present in trehalose-phosphatases and comprises the entire length of the protein it is also found in the C-terminus of trehalose-6-phosphate synthase EC:2.4.1.15 adjacent to the trehalose-6-phosphate synthase domain - Pfam:PF00982. It would appear that the two equivalent genes in the E. coli otsBA operon [2] otsA the trehalose-6-phosphate synthase and otsB trehalose-phosphatase (this family) have undergone gene fusion in most eukaryotes e.g. Swiss:P31688 and Swiss:P93653. Trehalose is a common disaccharide of bacteria, fungi and invertebrates that appears to play a major role in desiccation tolerance [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.31 0.70 -5.45 24 2499 2012-10-03 04:19:28 2003-04-07 12:59:11 11 26 1614 1 1046 2316 209 215.20 27 42.78 CHANGED hhDaDGTLsslspp.s.sshssschhssLppLuucss.hlhllSGRshp....phhsps.slsluupHGh.lRhssuppaps...................s.stshshhcplttlhpphscphsGuhlEpKcsulshHYRpAssc....pspphhpplcsshpsp.slclppG+pllElRPshss.KGtslctllpphs.t..............s..sphslslGDDhT.DEDhFcslppt.......tshshclhsss..stpsopAthhlpsss ......................................................................................................................................................hhDaDGTL........s......sls....sp...P......p.t.....s..h.s.s.s...p..hhpsLpp.L.........us.ps...ss..t...lullSGRshsp........l........c........t..h.....s...........t........h..............p......l....s....lA.GpHG.h-h....+..t.....s........s.....u.....p.....p..hhs.......................................................................................................t.h....s.h...s....h....h...p...p....l.t......t.....L...p....p..h........h........t......p....h...s.....G..sh.....l..E........p..K.t.hul...sh...HY...Rp......A..s........p..c..................t...t.p.....tl..h......pt...h....p.p..h..h..t.....p.h.....t.l.p......lp..t..G..+....p.....l.lE......l..+..Pt........sss..KGp....Al....ptlhpp.hsht.............................................sp.hslhlGD.DhT..DEsuFtslpph........................sh...s.h.h...lhtst...........o.A........s.............................................................................................................................. 0 327 667 904 +4726 PF03973 Triabin Triabin Finn RD anon Pfam-B_20829 (release 7.1) Family Triabin is a serine-protease inhibitor. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.90 0.71 -4.55 4 329 2012-10-03 08:47:39 2003-04-07 12:59:11 8 4 13 3 2 409 1 140.20 25 76.19 CHANGED Ap....s.hMP.Gs.p..tchhssFchpcFFs.scWYlTHucsss+PplCpcapsous.c.........clpas...htS-VpCpsspVpGtcG..aSFpCcsss.....pcFpu.hoVluTDYcNYALlhRCspa.pSshcDsaLVhpRpKpus.Puulpop ...............................................................................................ttt.................tshpsFsspcF....ap..u......pW...Y....VT..H....s....p.....p...s.....o....p.....s..s..l..C..+..p..a.p..s.s.pp...sup..h.hs....hp..a..s.h..s..t....ttt......p.......l....pC..p..s..p....t.............p..p...t....p....pt......h....sFsC........p.....s....ss...........................hph.....p..h.....h.s.llsTDYssYAlhY..RCsph....s...s...t........h.....p........D........N.h.LlLpRp.t..s.t............t......................... 0 1 2 2 +4727 PF02080 TrkA_C TrkA;TrkA-C; TrkA-C domain Mian N, Bateman A anon IPR000309 Domain This domain is often found next to the Pfam:PF02254 domain. The exact function of this domain is unknown. It has been suggested that it may bind an unidentified ligand [1]. The domain is predicted to adopt an all beta structure [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.04 0.72 -4.38 181 18298 2009-09-12 01:06:25 2003-04-07 12:59:11 16 80 4247 44 3835 11843 3990 69.60 20 21.79 CHANGED lhphplst.ss..lsG.cslp-.ht....ltpt.....shhllul..p....Rs......tthhhPss.sthlpsGDhlhlhuptps.lpp..htphhs ........................h...h.l.t.ss..hsG.+slp-..lp.........ltpt.........sshlsul....h...................Rs..............sphhh...Ps...s...sshlp.tGDhlh..l..h.ustpp..lpp.ltp............................... 0 1256 2566 3307 +4728 PF02254 TrkA_N KTN;TrkA-N; TrkA-N domain Bateman A anon Pfam-B_289 (Release 5.3) Domain This domain is found in a wide variety of proteins. These protein include potassium channels Swiss:P31069, phosphoesterases Swiss:Q59027, and various other transporters. This domain binds to NAD. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.38 0.71 -4.09 219 14934 2012-10-10 17:06:42 2003-04-07 12:59:11 13 58 4445 85 3537 12367 5545 116.30 21 31.04 CHANGED llllG.hGchGttlscpLtp.s.p...................llll-pst..pp..lpph..p.pph...h.........slh.GDuocppsLpps.slppsc..sllss...hs.......s-....psslhsshhs.+ph.st..hp...ll..A+spstpp.....tchl.pp..hG.....s..ctllsP ..........................................................hlllG..h.G.p.h.G..t.tlucp...Ltp.pt................................slhll....-....p...s..t.......pp.............lp...ph......p..pph......hp.....................slh..G.D..u.s..c.........c....l......L..c.....p...u....u....l........p.......p....s-.........hll..ss...ss...................sc......psshhsshhs..+p...hhst.........hp.........ll....u+s..psspt.........hph...l....pp.......hG.........h...s.hl................................................................................. 0 1086 2338 3026 +4729 PF02386 TrkH Cation transport protein Bashton M, Bateman A anon Pfam-B_529 (release 5.2) Family This family consists of various cation transport proteins (Trk) and V-type sodium ATP synthase subunit J or translocating ATPase J EC:3.6.1.34. These proteins are involved in active sodium up-take utilising ATP in the process. TrkH a member of the family Swiss:P76769 from E. coli is a hydrophobic membrane protein and determines the specificity and kinetics of cation transport by the TrK system in E. coli [3]. 19.60 19.60 20.00 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.98 0.70 -5.88 26 6295 2012-10-03 11:11:44 2003-04-07 12:59:11 11 18 3786 2 1475 4562 3549 309.50 23 66.68 CHANGED pluttphpuhpphhphlhhhhhhhchlutlhlslhhlhhhthtts..............................h..ha.AhapohSuhssuGhSlpssS.hssFss..shhlphlhshhhlhGu.huFslhhchhhhhh.t...........h..hhhahtsh.hcshh.Llhhuhhslhhhphhssh.................................htthshstplhssaFtssssRTAGFoshDluphssushlhhhlhMaIGussuoTuGGl+ssphthhltsh..........pthhpt..........................................thpstpattcplppphlh....cshhlhlhhhllslspshhL.hhtpss....hhshlFEssSAaGTVGLShGh...............ssshohs......uKlllIhhMlhGRlchlshllhhshhh. ........................................................................................................................................................................................................h.......t.hht.h.h....hh.thhh..phlu.t.hh.hs...h..h...h.l...h..h..h...h..h..h..t.s.............................................................................................ha.p.ulhpuh..Ssh.....ssuGF...s.hh......s......s.......S.....l...h.........ass........h.lphl.lshhlllGG.ls...F...s....l.hh...p..l..h..tt..t.t.............................................h.pha.h+.....s...hphhh....ll.hh...u...h..h..hl.h.h.ht.h.hthh.........................................................................................................s..h..s..h.t...s....h.h.h.s.s.a.h...s.s.s.s.pTuGF.s.o......h......D....h...s.....p.....h....s...........h..s..........h....lh.h.....hhM..................F..IGus..sGST...u...GG...........lKshp.hhllhhhh...............h.h.h.h..p..............................................................................................................th..s..hh.h.....s........+...tl....s....p....c....h..lp........psh.s.h.h.h..h........h........h.h.lh.....h..h........s.......h...h...........h........l..................h..........s.......s........................s....................................h.......shh.hp.s....hS................A.husl......G.......u...h....G..h......................ss..shssh...................uKhllhhhMhhGRlthholllhhh............................................ 0 480 956 1266 +4730 PF02005 TRM N2,N2-dimethylguanosine tRNA methyltransferase Enright A, Ouzounis C, Bateman A anon Enright A Family This enzyme EC:2.1.1.32 used S-AdoMet to methylate tRNA. The TRM1 gene of Saccharomyces cerevisiae is necessary for the N2,N2-dimethylguanosine modification of both mitochondrial and cytoplasmic tRNAs [1]. The enzyme is found in both eukaryotes and archaebacteria [2] 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.29 0.70 -5.54 10 846 2012-10-10 17:06:42 2003-04-07 12:59:11 11 9 491 7 551 1753 342 314.40 25 78.44 CHANGED Gcu+lplsct..ho........lsstssVFYNPtMcFNRDLoVsl........LsshspKhh................................................hplLDALSASGIRuIRaA..LEhsslcclaANDlsscAVEhI+cN......spLNsls.......-.hllhNpsDANhlMt...pppctFcsIDLDPFGSPuPFlDoAlpSlccp.GlLsVTATDsAsLCG.saPcsCh+KYs.AhsL+s-hCHEsGLRILlshlsptAAKYc+ulcPLLSaSpDHYhRVFV+l+cGst+uccshcphGalhaC.tC..........hp....cpssGhschp........scCtaCGschpluGPlWlGPL+DccFlschlctscs.....ttph..pc+lhtlLphlc-E..lDsPhaYshspluphlKlssPPhpcllsuLtphGFcsStTHhsPsuIKTsAPhcslh-lh+ ...............................................................................................................................................................................................................t.............................t.sFaNsh....t....h.....NR.Dlsh.hs............................t....h..t....p.......................................................................................................................................................................p.hL-uhuu.oGl........RulRas..hE............................h..........p....lh.h..ND..............hs.tu.hp..hptN...................hp..hNt.ht......................................h...p.........DAph...h..h............................p.ac.hlD...lD..P...a..G..o.....s.....ss.al-u.Ahp.s..l.tp....t.Gl.....lhlT.sTDhush...s.u.t..chs.hthY......u......u.................h....p..t...h...pE.....h.................ulRlllt.ltptAsphthhl.Pl..h.uh.h.ca..a..hRlhlclh.pu...t.....s.....c.....t.p.....hhh.C.....C......................................................................................................................................ts....s....s.t.....p....h...h.hGPha.sslts.taltph.ht...t.....................h................ph..t.hlph.h....p..........E.......t................s.h.aa.p..p.phsph...ht.h.t......h...hhtt..l.p.GapsohoHht..u.l+TsAs.t.hhth......................................................................... 0 165 299 437 +4731 PF00133 tRNA-synt_1 tRNA synthetases class I (I, L, M and V) Sonnhammer ELL anon Prosite Family Other tRNA synthetase sub-families are too dissimilar to be included. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 601 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -13.17 0.70 -6.52 15 21195 2012-10-02 18:00:56 2003-04-07 12:59:11 17 120 5058 69 6103 31264 24036 374.40 23 54.93 CHANGED shhphWpctsh.FctshptsKs.pts..FslhssPP.sTGplHhGHALspoLpDhllRhppMpGass.ahPGaDptGlssptpVE+KlutcptpshhchGpEcFhscshcattEassph+sphp+LGhhlDas+-hhTh-.phppuVhclFscLaccGLlYRGp+lVsWsssspTALS-hEV..pYKDsc................ushhalsasl......s-ups........plllhTTpP.Tl.ussAlAVpP-........................................-.+hpcLlGpphphPF.......hsRchPllsD.-aVch-tGTGsV+hsPAHs.sDYplGp+asL.....chlNsls-sGshs-ss..................scapGhchFcAcKtllcpLpEpGlLlchcshpHShPaC.RossPl.hhhosQWFV+hcs.......Lscsulcslc....clpaVPcp.tEpcahsWlcNhpDWCISRQhaWGp.IPsWhsc-ssElas....h.ph.tsp.ppcsppshhth.hhchl............phcpssDVLDsWFsSG.hPauslsaP.s.sst-acphaPsDhllpGhD.hhhWhtphlhhuhthpGps......PaKpVlsHGllhDupG+KMSKSlGNslDPh-llc.....................................paGADALRhaLh.sossupDlphS ..............................................................................................................................t..........................................................................................................................................................................................t..t.p..............-....t...................t...........................................u..c....u...........hs...h......................................................................................................................................................................................................................................................................................................................................................................................................................................................u...........t..h................................h.......................................o...................................................................................h................t..h............t............................hs...................sh........p..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.................DWslSR...Qh.h....WGh..lP........h........h........h............t.....p..................t.................................................................................................................................................................................................................................................................................................h..............p........-.h...h.....D.....s....a.....h...........p.........S...........s........h...............h............t...........h.......h...........t.............................................................................................t...............................................h......................p...........................................h..........P........s...s......h....h.....h.t.....G...-..............h.......h....h......a......h...................h........h............s......h...............h.........t..p..........................................P...a...p..........p...l...h...........p...G...h...h.....h.....s......t.........t.........t.......p........K...M.SKS.....h...sN.s..l.s..P....p...h...h.p............................................................................................................................p.h.G..s..D.....hhRhh.h....s............................................................................................................................................................................................................. 2 2043 3764 5105 +4732 PF00749 tRNA-synt_1c tRNA synthetases class I (E and Q), catalytic domain Bateman A, Griffiths-Jones SR anon Pfam-B_350 (release 2.1) Domain Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only glutamyl and glutaminyl tRNA synthetases. In some organisms, a single glutamyl-tRNA synthetase aminoacylates both tRNA(Glu) and tRNA(Gln). 19.80 19.80 19.80 19.80 19.70 19.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.86 0.70 -5.77 18 10836 2012-10-02 18:00:56 2003-04-07 12:59:11 16 90 5178 51 2955 8426 5158 287.60 31 62.06 CHANGED cV+TRFsPpPoGYLHIGpA+sALhNahaA+pasGphllRh-DTssc+pp.EhtcuIh-slcWLGlchp..........tshhpS-+a-thhcastcLIccGhAYhsasosEcLcphRcp.........phs..tps+hsccsLpLa.E-hpptptpstttslRhKlshpus.hshcD.Vthclphss.........cchshsKhcshPTYcFssslsDtl.pITHslRspEahsssspahhlhcAluht..ssat+.hhhlNhssphLSKRKhshh..hphhcshs....shlshLt+hGaos.....puhcEhhstptlhcph.hsh.scuhttFspccLph .......................................................lhsRFAPSPo..G....hLHlG.....ps+o.AL.hsah....h.A........+................p........h......s....G........p....h.........l......L..........R....lEDTD...t......R......p..s.........c..hhc..................s...............Ihcslc.W.......L..GlcWD...............................................ttshh.Q.Sc.R..h..-..h..Ypphhp...pL....l.p.........p.G........hA....YhC....h....so.t.E...c..l.ct...h..Rtt...........................sts.....................t......s...p......Y.......s.....t......c......s..h......p..h....h.............t......p......h.....t....t....t....t....h....t....s.t....t.s....s.lRh.........+......h.......s....................s..........s.........s........h.........t...h..........c.......D................l............t............G...........c...........l.......p..h.....s..s..t.................................h..c..Dh.l......l.......t.+.......t....c..........G....h....P.........s........YshusslDDth.s..ITHllRGp-als.s....s.s...pp..hh..........l..........h...c....s.....l...............u...............h..................t..................s.......P..........................a.t.H.h.s.h...h...l....s.......s.................t.....p...+LSKRc..........t...............s..........h..............................................................t.........................h.c...ths.........sh.l.s.h.l.t.h.h.G..as.......................t..u...p..p......hhp......h...th.............hp........t......tt.thh..pcL........................................................... 1 979 1835 2473 +4733 PF03950 tRNA-synt_1c_C tRNA synthetases class I (E and Q), anti-codon binding domain Bateman A, Griffiths-Jones SR anon Pfam-B_350 (release 2.1) Domain Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only glutamyl and glutaminyl tRNA synthetases. In some organisms, a single glutamyl-tRNA synthetase aminoacylates both tRNA(Glu) and tRNA(Gln). 21.00 21.00 23.00 21.50 20.00 20.60 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.49 0.71 -4.59 111 2972 2009-01-15 18:05:59 2003-04-07 12:59:11 13 80 2452 17 1073 2415 678 183.90 39 29.60 CHANGED AsRhhsVh...cPlcls.lss...h.....s........t..phchshHPpp.schGpRplsau..p...plaI-psD...........h.pcLt..G.ppVRL+t.hhslcs.pcl..p..sGplhp..l.sphc..spohtts......h.Kscu.hlHWVus....ppu.lss.phh.Y-pL...hp.........p.t..........thl......lNPcShthh...puhsEssl...tshphssh...hQFERhGYaplD ...............................APRtMAVl.....cPlKll..IpN..a......................sts.............psEhlph.ssHP............p...p.....s............-...h.G.....s....Rp.lPFo..p.....ElaI-+sDFhEp............................................................ssKpatRL..s.G..pEVR.L+s.u.Y.l.I.c..............s..pc...l..........KDs....s.Gslsp.........lhsoYD.....s.-.ohutss........................psRKVKG..sIHWVSu...............scu....lss...ElRLYD+.L.Fs...............................s.p.Psst.....................chl.....lNPc..SL..hhh....ppuhsEP.u.l.............ts.s...t......s.t.ct.............aQFER.GYFssD......................................... 0 358 646 899 +4734 PF00750 tRNA-synt_1d tRNA synthetases class I (R) Bateman A anon Pfam-B_1276 (release 2.1) Family Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only arginyl tRNA synthetase. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.18 0.70 -5.63 9 5698 2012-10-02 18:00:56 2003-04-07 12:59:11 14 23 4847 14 1682 4669 4137 317.60 27 59.58 CHANGED hlsslLspstph..stthpppcVll-FsSsNsst.hHlGHlR.shlGDuluRLhEhhGasVlRt.alsDaGsQhshhhspL.pttptphosp..ltphpshYttshhch.s-El.......ht.cthphlshLputspp....apphhsp.lh-hh+pphpchhsshDVhhhE....uE..hhpsphschlpcL+cpGhlhEpDGAhhlhssta....G-shsh.lllKSDGshsYhssDlAhhhp+h.pcthDhhlYhlsscpcsahtphaAAspthG.asPc.ssclhtls.hVsLscDtc..+hppRuGssVpLsDLL-.............................uIGlsAs+Yu.lppspsosh.hDh ..............................................................................h....hhtt.ttth..........t.....s.ps.p.pl.h.l-asSsNsst..s...hHlGHhRusllGDulspllc....hhG.a.p.V.h..............+t.alsD..............hG.....sQ.h.......................s..h..L...h.....h..........u.........h.............p......................t..........h..............h.......p.......c...............t........h............h..................................t.....................................l.......s...........t.........h...............t...........p.................h........Y....h......t...t...........p...........h............t................................t...........c.................t...............................................................................................................................h...........t...........p...............p...................u...p......p............h..h...h.................p.h.........p.....s.s.c..tp..............................hp.ph.h...p....p........h...h....ch.............h....p....p....h....p...p....h.....h....s....c....h....sl....h..h.pc.................uc....h..h.....s....s.....t......l....t......p....s.l..p.t.L..cp.p.....G.....h.........h.h...............E...............p............-..........G..........A......h...........h.......l.......p.h..s.....p.a...........................G.c.s..h...s..h......l..lh.K.s...............D......G......sh.hYhs.s.............D..lAhth..+h.............p....h....t...h..c...c..hl.hlhu..scpp.......tahtplhs....s........hct....h........G.....a...........s..................p.....sh....p...l.........h.l....s..........hhh.............h..........h.............c......s...Gc......................thpsR.p..Gs.sl..pL..c-l...l.-E.s........h.p...t......s....h..........h...............................................................................................l.uhsu....lha......lp.ph.pth.Fp........................................................................................................ 0 587 1058 1400 +4735 PF01406 tRNA-synt_1e tRNA synthetases class I (C) catalytic domain Howe K, Bateman A anon Swissprot Family This family includes only cysteinyl tRNA synthetases. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -12.00 0.70 -5.30 11 5726 2012-10-02 18:00:56 2003-04-07 12:59:11 14 22 4818 10 1568 11552 7784 301.80 46 64.24 CHANGED .hsslcptpVshYlCGsTVYDhsHlGHuRshlsFDllRRhLph.hGY-VpaVpNlTDIDDKIIp+Atpptpo..................................................................................................................hpplscpaIpsapcDhcALNVL.PshcPRsT-alscIl-hlppLlc+GaAYsus......GDVYFcVsshcpYGpLSspsl-pLctsspsts.......ttK+sshDFsLWKuuKsGE....suWsSPWG+GRPGWHIECSsMsschLG.....splDIHuGGhDLhFPHHENElAQSpAhas.tphspYWhHsGalpl-sEKMSKSLGNFlpl+DhLppa-schLRahhhssHYRs.L-aoEphlppAps ....................................................................................................................PlpsscV.pMYVC..G.sT..VY...s..h..s..HlGp.A.R.........s.h.l.sFDl.lpRa...Lch....hG.Y.c..V...........p....YVpN....l.T..D.........l................D..........D...K.....I.I.....p+..Ap..cp......u.........s.........................................................................................................................................................................................ht..p..l..s.c.+.a.l.p..t...apc..DhsA.....L.s..l.h............P......s...........h....c........P.....R........A...T....c....a..l.......s.......-..h..I..p....hl.c....p......L.l.c.+GaA..Ytus............................GDVY.F.....c.....l.....p.....p.........h...............t.......s.......Y.........G.....p..L.....S...t...........p..s.l..-....-.....l.p..s..G.u.c.s..css..................................p.pK+s.PhDFsLW.K.s.......u....K............s...G....E.....................su...W...s..........S.....P.........W........G.............p............GRPGW...HIECSAMup..chL.G.................s.p....hD..IHGGGtDLhFP..H..H....E.......NEI.AQ.....S......c.............u..........s.............p.................s......p................p.........a..........s....p......Y...W....h...H....s...G..h..l..p..l.......c....s..........E.................KMSKS.....L.GNFhT.......l.......+-.l........L................c................p................a.....s........s..............p................s......lR.aa.h.l..s..s..H.Y..R..s..slsaS.c.csLppA..t......................................................................................................................................................................................................................................................................................................................................................................................................... 2 550 1024 1339 +4736 PF01921 tRNA-synt_1f tRNA synthetases class I (K) Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes only lysyl tRNA synthetases from prokaryotes. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.25 0.70 -5.78 39 631 2012-10-02 18:00:56 2003-04-07 12:59:11 13 4 615 2 222 4907 4602 353.70 38 66.72 CHANGED ttWs.p.Accll.cRhpt.....s....phlhpTGhuPSGhsHIGsFt.EVlpsshVt+Ahcph.utp......s+llhhuDDhDsLRKVPc...NlPsp..hppaLGpPLoplPDPa.Gs.ppSaucHhpt.hpchL-phGh.-hEahSuo-hYpuGhasctlhhsLcp....h-cIhcIl..hhscc....hpts...........................a.PahPlCscsG+lhps.lhshc.ptsplpY.s............csGcphpsslpsGt.........sKLpW+lDWuhRWtuLuVcaE.hGKDhts....ShshuscIs.clhGtcsPhshsYEhhh.c...ssp......KhSpSKGsslolc-WLchusPEsLpahhhp.cPppthclphc.sl.+hlDEY.ph.cta..........ppstppphtsslaclppsp ...............................................................................................................................................................tWs.c.Accll...c+.tt.........tp..............hlhp.oGhuPSGhsHIGsFt.EVhpsshVt+Ahcpl..upc.......oRhIhhuDDhDs...hRKVPc.............Nl.....P...s.p..th...........lt.pals.hPLoplPDP............a...G......s....apSaucH.pshlpphL-pFGh....-h.EFhS.uo-hY+SG..tacptlhpsLc+..h-cIhclhh.hh....sc-.....cpts.........................................................................Y.Pa.hPlCscsG+lhp.s....s.lp..phDs.pssTlsYcs.........................csGppt...plsl.s.s.Gp.........sKLpWKsD...W...s.M..R.Wsshs..VD..aE....h..G..KDHts....Shs....l.u.....scIs...c..IhGs..c.s.P.....t...h..s....Y....E...h..h...h...c...ssp.........KlSpS+GN.ulolc-hL......c.h.s..s....s.Es........Lt.h.h.h.h.p.......+...P.p..p..A.h.+.l...h......FD................s.I...s...+.s...h...D...E.Y.phhptYh...........ppphcpphtp.saclp...s....................................................................... 1 67 145 176 +4737 PF00152 tRNA-synt_2 tRNA synthetases class II (D, K and N) Bateman A, Sonnhammer ELL anon Prosite Family \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null --hand HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.02 0.70 -5.67 107 16099 2012-10-02 14:22:40 2003-04-07 12:59:11 15 73 4974 111 4475 12333 7486 357.10 28 69.99 CHANGED sc-hRLcaRaLDLRpsphpp.tlphRsclhpslRpahscp.GFlElETPhLspussEG......ARsFlVss..............tpaau.....LsQSPQLaKQlLhluG.h.-RhapIu+sFRsE-h...+scRps.EFTplDhEh..uFsstc-.lhplsEpllpplhc.ph.h.......................slcls..........F.Rloap-Ahcpauu-c.sDh+h.th....ph.p.htph............t.thphht..............................................................................................t....htphhstlhtch.......scc........tlh....t.pthphhalsDaPhh......p......................+PFst..spp...ps.......................shucsaDlllsG..hEluuGuhRlpc.chQpphFcthshs.....tcp..thFsahLcAhca.GsPPHGGlAlGlDRLlMllsstpsIR-VIsFP+spp .........................................................................................................................................................................................................................................................................................................................................-h+hc.RaLD.L.h.p.s..p.t...............t...s....hphRupl..hpslRca............hs...pp...G.......Fl-lE.TP......h.......l.......s.......t.....s...s.....s....pu..........tA+sFhsp.....................................p.hhs......hhLt.........S....P..p.........L.a.....hc......h........L..h....s..u.G......h....-.......+..l..apl.......u......+.sF..R...sE...sh..........s..s..R.HssE..FT..........l-.h..h......u.....a..h.c..h...................p.............-.....lhs..l.s..E..........s..hl..ppl.hp.pl..h..........................................................................thphs..............ts..F.+....lo...a.p-.A....hcp.....h...ts......pp..............c.hc.............th..t..htth..........tth.thph......................................................................................................................................................................................................................................hGtlhsph........hcc.......lh.........-pp.hhpPsalhc.aPht.......h.......................................................pPhst.....ps....ps.s.........................................................................................thsptaD.l.hl.sG............hEl.us.G.pclpcs..c..Q..p...p.pFct......s.hs....................................p--....t............h....t.alc.A.h.cY..G.....h.P.Pp.u..GhulGlDRLlMll...ss....t....p...s.....IR-VIhFPph....................................................................................................................................................................................................................... 0 1514 2790 3758 +4738 PF02081 TrpBP Tryptophan RNA-binding attenuator protein Mian N, Bateman A anon IPR000824 Domain \N 20.50 20.50 24.40 25.60 18.10 17.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.55 0.72 -4.55 10 142 2009-01-15 18:05:59 2003-04-07 12:59:11 10 1 138 252 55 115 0 73.50 64 97.61 CHANGED Mpssp..suDalVIKAlEsGVNVIGLTRGoDTKFHHSEKLDKGEVMIAQFTEHTSAIKlRG+AhItTsaGplcSEuK ......h....tsuDYlVIKAlEsGVsVIGLTRGpDT+FHHoEKLDKGEVhIAQFTEHTSAIKlRG+AhI.TpaGplcS-....... 0 27 44 48 +4739 PF00587 tRNA-synt_2b tRNA synthetase class II core domain (G, H, P, S and T) Bateman A anon MRC-LMB Genome group Domain Other tRNA synthetase sub-families are too dissimilar to be included. This domain is the core catalytic domain of tRNA synthetases and includes glycyl, histidyl, prolyl, seryl and threonyl tRNA synthetases. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.26 0.71 -4.68 112 18502 2012-10-02 14:22:40 2003-04-07 12:59:11 20 111 4989 173 5461 19073 11934 180.70 25 33.50 CHANGED lpptlhphhpchhp...p.GapcltsPhlhppclhpt.oGc.tphhcc...hap...h...........pcttt........c............p......h..........................................................................................................hLpPpsp.slsthatpphhp.hpp..LPl+hhthush.aRpEh....psp.GlhRhRpFpps-hhhass.s.pp..............................................s.pthtchlphhpplh.pp...lGl.........saclh.sspsshst.tppphshcsah.sp ...................................................................................................................................................................................................................................................................................................................ctlpphhhcphp.....ct.G......a.p..ElhsPh.lh....s.t.....c.lapt...o......G..+...h...s..p...a...s..-s.....h...ap....h..................p.s..tp..................................c............p......h...............................................................................................................................................................................................hLtPp...sp....sh.ss..h........ac.s..........p.l...p.............S....................h..+............c.......L...P....l.....p.......hsp.h...u...s......s.a.....RsEh.......pstpGlh..R.....sR..p.F.p.p.t.......-.ta.h.F..sp..s..-p..........................................................................................................hp.c.php.p..h.h..s.h.hp.clh...pp......h..Gl...............sa.+.h..h.h.ts.p..s.sh.u.h....s.u.scpach.ph.....t........................................................................................................................................................... 1 1884 3455 4605 +4740 PF01411 tRNA-synt_2c tRNA synthetases class II (A) Howe K anon swissprot Family Other tRNA synthetase sub-families are too dissimilar to be included. This family includes only alanyl-tRNA synthetases. 19.50 19.50 19.60 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -12.92 0.70 -6.22 25 6692 2012-10-02 14:22:40 2003-04-07 12:59:11 14 24 5160 30 1902 5657 5079 441.50 35 60.86 CHANGED ElRppFLcFFc.c+G...HphlsSusllPc.sDPoLLFsNAGMspFKslFLGtt.....psshsRAssoQ+CIRsGGKHNDL-NVGhTuRHHTFFEMLGNFSFG.....DYFKcEAItaAWElLT..ph.....asls.-+LalTVapsD-........EAhslWpchs.lPcpRIl+hstc.......DNFW.....pMGDs.............GPCGPCSEIaYD+G.chsst.sus.sp....-ssRalEIWNLVFhQaNRps......cGshpsLPc+slDTGMGLERlsuVLQshpsNa-sDlFhslhpthpplouh...s........spsthphuaRVIADHlRulsFhluDGllPuNpGRGYVLRRILRRAlRau.+pLGhcps.FhtcLVssllchhGssaPELccptshlpcllcpEEppFt+TLcRGhpLlcchlpphpp..spslsG-suF+LYDTYGFPlDLTp-lAcE+GlsVDhpuF-pshpc.p+cco+..pstpstt.....hthshpsltcltss......s-FhtYsph.............pspuplhulhp.sschlspltt.Gppsh........llL.DpTsFYAEuGGQluDpG...hlpsss....scFpVpssQchs.uhllHhGpl..ppGs..lplGDpVpuplDppR ......................................................................plRphalpaFt.p..p.s....H.h..............Sssll.P..sDsoLLahNuGhs........h.K.hF.G.................................h.........R..hsssQ+sl....Rss....Dl-sV.GhT.s..RH..pTFFEM..LGNFSh.G.................DY..F.K........c....-AItaAWEhLT.....ph.....hths.-+LasTha.....t.....Dp........................EAhplWtp.............................l...........stp+Ih.h.......p....................DNFW........phGt..............G.PCGP.soEIaaD..+..G......t..hh..s...t...........s....t........t....................-ss..RalElWNl.VF.Q....asc..pt......................................ps..p..h.p.L.P.p..s.lDTG.MGLE..Rls.........ul.........hQ.........t.........hp..........sNa-hDla.hsl..lpthtph.ss....................................................t.t.ts.huh+VlADHlRshsahluD..G..shPuNpGRGYVLRRllRRA.....h....Rau......p......h...L......G............h..............p........t........s......a.....h.hcLl.ss.......l....hp...........h..........t....t......h..a..s..-lh.p.........p..t..t......h.....ltph...lp...tEEppFh..c..........T...............LppGh......tl....hpp............ltp..ht...........................tphls.Gp..sFc.L...a..DT..YGFPl....-LTt-h......h.p.-............t.s..........h..p..lD.t...sFpt.tMtt.Q+.......pcu+...tstt.tt.................t.......t......h.......p...h....t.....................sphhsYp.t...............................p.h.p.u.pl.h.t.l.h.........................p........s.............t..........s........p...t...h.....p........t.....u..p..p..s.h....................ll..L..-p.TPF.YAE..uGGQsuDpG.............hl...t...s..ts........................h...h..t.V...t..D..s..p...+.......h......s...s.......h.......h.h..H.h....s.pl.......t...........u.t........l.....p..hGpt.lphplD............................................................................................................ 1 653 1183 1586 +4741 PF01974 tRNA_int_endo tRNA intron endonuclease, catalytic C-terminal domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain Members of this family cleave pre tRNA at the 5' and 3' splice sites to release the intron EC:3.1.27.9. 21.10 21.10 21.30 21.80 20.60 20.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.66 0.72 -4.18 78 852 2012-10-11 20:44:43 2003-04-07 12:59:11 12 20 456 40 569 852 86 86.70 26 27.42 CHANGED F.h+ahVY+cLRc+.Gahl+sGhK......aGsDFtlY........tttsshsHucalVh............l.lspspth......................shpclhptsRlupuV+KphllAhl.....sppsc.......lsa ..........................hpYhlY+cLRs+.Gahl.p..sGhK............F.GsDal.lY.........ttsPhhhHupa.hVh..................l..h.s.......s.cph..........................................sht.p.lhshsR...lussV+Kphllshl.....p................................ 0 178 316 463 +4742 PF02778 tRNA_int_endo_N tRNA intron endonuclease, N-terminal domain Enright A, Ouzounis C, Bateman A, Griffiths-Jones SR anon Enright A Domain Members of this family cleave pre tRNA at the 5' and 3' splice sites to release the intron EC:3.1.27.9. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.37 0.72 -4.42 65 347 2012-10-01 20:02:48 2003-04-07 12:59:11 9 7 240 33 209 357 30 66.70 25 25.64 CHANGED hpuhL.sspVllt.....s.csspp...LacpuaYG+.............hpsphLpLSllEAhYLhp+.Gtlclh....pptchlshcclh ..................................h.h.tt.lhh........t..pp........lhpp...shYGp........................................h.tphLpLSL.EAhaLshp.GpLplh......ptc.lsh.ph............... 0 50 112 158 +4743 PF01746 tRNA_m1G_MT tRNA (Guanine-1)-methyltransferase Bashton M, Bateman A anon Pfam-B_2049 (release 4.1) Family This is a family of tRNA (Guanine-1)-methyltransferases EC:2.1.1.31. In E.coli K12 this enzyme catalyses the conversion of a guanosine residue to N1-methylguanine in position 37, next to the anticodon, in tRNA [1]. 21.50 21.50 22.40 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.96 0.71 -4.70 23 5148 2012-10-01 22:53:19 2003-04-07 12:59:11 16 19 4707 18 1419 3601 2980 190.20 39 74.76 CHANGED hpph.c+clsplphpshRsasts++phsssphahsuhshhhKhc..h.thp............shpshhlpsps..h.pphh.pl....pp-....cllhLsuchEsh.pclpppt.......hasIGshV.psschss..hhstththh.Glh..st.pL.h-sa.......llttPth....o+shs..lp..pV.-lLLpspc..hpsW..cpulhcshspR. ..................................................................................h.s+AhcpsllplpshshR-asp.s.+.Hp..p.V.D.D.p..PYGGGs.....GM.l..hp....s..p..P..l...hsAlcsh............pt.........................pts+l..Ihh..oPpG....c.hs.Qph.sp-.L.............u.p.p.p............................cL.lhlCG+Y.....E.GlDE...R.lhpt.hss...........pEhSIGDYVLoGGElsA..hshh..DulsR.llPGVL..Gppt.SttpDSFs........................sGLL-hPpY..........................TRPts..acG........hpV.P.-V..L.....lSGsH...t..pIccWR..hcpuLt+Th.+RP....................................................................... 0 488 884 1178 +4744 PF03054 tRNA_Me_trans tRNA methyl transferase Griffiths-Jones SR anon Pfam-B_823 (release 6.4) Family This family represents tRNA(5-methylaminomethyl-2-thiouridine)-methyltransferase which is involved in the biosynthesis of the modified nucleoside 5-methylaminomethyl-2-thiouridine present in the wobble position of some tRNAs [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.24 0.70 -5.90 21 5331 2012-10-02 18:00:56 2003-04-07 12:59:11 11 13 4511 6 1265 6684 5283 320.80 41 86.58 CHANGED t+VlVuhSGGVDSSVuAhLL+cQG.apVlGlaMcpasp...t-phs.........tCsutcDhpDAp+lscpLGIPhhhlsatccYhpcVhp.Flc-YppGpTPNPDlhCN+pIKFuhhhchshp......LGsDhlATGHYARlshs........................spstLtcuhDppKDQoYFLsslscctLppllFPLGchp..KspVRplApctuLt...sAcKKDSpGICFlucps...accFLpcaLssp..pGsIlchc.....GpllGcHcG...hahYTIGQR+GLslut....htcPhYVlcKDscsNplaVup...p.tLhpcplhspphsWhs.....thsst..hpsps+hRapp...s.hpsplphhss.tplcVpFcpshpuVoPGQssVhYps-....tsLGuGhI ..........................................+VlVuMSGGVDSSVs.A.h.L..L.p.c...p..............G...Y...c..........V.h.G.l..aM.+......Wcc.................scppt...............................hC...s...u...t...pD...h...t.D...A...pt...V...s.c...p.l.G.I......s.....h.a.sl...s..F.t...p.c..ah-p..V...hphFl.s..E..Y.p.t..G.RTPNP....s....lhC.N..+......c.I..KFp..shL-hAhp.................L..G..AD..h.lATGH..Y.uRht.p..t......................................................sst..h.plh+u....h...D.....s.sK...DQo.YFLhpL.spc..QLt+shFP.lGc..h..p.........Ks...c.....VRc.lA.p.c..............h..............G......Ls......sAc.KK.D........SpsIC..F.Is-p.p............app.FL..p...p...................a....l.....s.....s.....................p.......sG..p.....h.h....sh..-.................................G.chl...G.cH...p...G................lhaaTlGQR.+..G..LG.....Ius....................stcP..ha.Vl.shDh...p..pN..hlhVu.p...........c....t..L.hspt..l..h..s..p..p..lpahs......................p..p.......s.h..cs.psKhR..Y+p....................tsh........s..p...l..................p..........h..........h..........s..........-.........t.................p..............h....pV..h.F...s..-..P.t.....A...lo..P.GQu.sVaY......p......u.-......hsLGGGhI................................................................................................................................................ 0 456 838 1088 +4745 PF04558 tRNA_synt_1c_R1 Glutaminyl-tRNA synthetase, non-specific RNA binding region part 1 Kerrison ND anon DOMO:DM04413; Family This is a region found N terminal to the catalytic domain of glutaminyl-tRNA synthetase (EC 6.1.1.18) in eukaryotes but not in Escherichia coli.\ This region is thought to bind RNA in a non-specific manner, enhancing interactions between the tRNA and enzyme, but is not essential for enzyme function [1]. 25.00 25.00 25.50 25.30 23.70 24.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.84 0.71 -4.72 27 278 2009-01-15 18:05:59 2003-04-07 12:59:11 10 19 200 1 172 258 0 152.70 33 20.99 CHANGED sss--LhpLFpplGLs-pKu+EhlKNpKlossLpsllppu.sss..sshs+pptsLLasLAo..ph+ssp....hs+pshllphIhsuclKTshQlsAAhcalpspss....shsssch-ctsGV.GV.VT.--lcptVsphlppp.KppIhpc..RYp.hsshhhhscttspL+WAcsp ...............................c.h.pLFttlGLsEpKA+EslKNp.plossLtphlppA.....tts.......sshs+s.....sGsLLYslAo....+l+s..st......+h.shl...lpaIsspKl+Ts.Qls..AAlcYlpspst...tsl-tscF-ctCGV.GVhVTsEpIcpsVpphl..ppp..+tplltp..RY+.hs..hshlh...scs+st...L+WADu................................. 0 70 103 143 +4746 PF04557 tRNA_synt_1c_R2 Glutaminyl-tRNA synthetase, non-specific RNA binding region part 2 Kerrison ND anon DOMO:DM04413; Family This is a region found N terminal to the catalytic domain of glutaminyl-tRNA synthetase (EC 6.1.1.18) in eukaryotes but not in Escherichia coli.\ \ \ This region is thought to bind RNA in a non-specific manner, enhancing interactions between the tRNA and enzyme, but is not essential for enzyme function [1]. 21.20 21.20 21.20 21.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.24 0.72 -3.29 23 240 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 176 1 146 227 0 84.40 35 11.22 CHANGED hhKstlDhplLcLLGPKsEtDltKKpKps+.....sptstt.h.......sspspspss-ssscps.hhEthhG-...hHKPGEN.........Ppsh..Lhpc ......................lKsplDhplLcLLGPKsEuDLt.Kt.Kttc.................tphtp...................ttpsss...pssp.ssspst.h.Ephhup....FHKPGENhp...............Ppoh.hLppc....................................... 0 55 82 120 +4747 PF02091 tRNA-synt_2e tRNA_synt_A;tRNA_synt_2e; Glycyl-tRNA synthetase alpha subunit Mian N, Bateman A anon IPR002310 Family \N 20.00 20.00 20.50 20.90 19.90 19.50 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.97 0.70 -5.30 9 3035 2012-10-02 14:22:40 2003-04-07 12:59:11 10 8 2991 8 663 1810 1805 283.40 66 88.81 CHANGED FQshIhsLQcaWupQGCllhQPaDhEVGAGThpPsTFLRuLGPEPWpsAYVpPSRRPsDGRYG-NPNRLQ+YaQFQVllKPsP-NIQ-LYLsSL+ALGIDshsHDIRFVEDNWEsPTLGAWGLGWEVWLsGMElTQFTYFQQlGGltCcPVosEITYGLERLAMYlQpl-slaDlhWs-u....lTYG-lFhpsEhEhSsYNFEtAsV-hLFphF-.a-cEAhphlc.s..LPLPAY-hVLKsSHuFNlLDARGsISVTERpRYIhRlRsLARtVActYhptREpLGFPL ...................................................FQplILsLQpYWu........c......QGCsllQPaDhEsGAGThHPhThLRAlGPEP.WsAAYVpPSRRPsDGRYGENPNRLQ+YYQFQVllKPSP-N.IQELYLsSLctLGlDPt.HDIRFVEDNWEsP..TLGAWGLGWEVWLsGMEVTQFTY..FQ..QVGGl-Cc.PV.o...uEITYGLERLAMYlQsVDSVYDL.Ws.-G.........................loYGDlF.+QsE.....hEpSpYNFEhADs-hLhppF-paE+E.....Appl..............L..............c..t.t...................L....s.............LP...AY-hlLKASHoFNLLDARGAISVTERppYIhRIRsLu+sVAcsYhtpRctLGFPh..................................................................... 0 211 416 543 +4748 PF02092 tRNA_synt_2f tRNA_synt_B; Glycyl-tRNA synthetase beta subunit Mian N, Bateman A anon IPR002311 Family \N 25.30 25.30 25.50 26.10 21.50 25.10 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.58 0.70 -5.96 167 3055 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 2988 0 666 2359 2774 541.00 41 78.66 CHANGED hLlEIGoEELPuchlpphhppltptlpptLpctpLsa..s.slcsauTPRRLAlhl.pslsppQ.sDtp.Et+GPshphAhstsG.sPT....cAAtGFA+up....Gls.......lcc.Lpht..p.........s.sKu-alhh.ppptpGpsstplLschltphlpsL.shsKtM..RW..G.......s.....t...shcFlRPl+WllsLhssc.....llshpl.......h.G....lpouphopGH.RFht.st..slpl.sssscYhptLcpp.hVlsDhpcR+phItpplpphA.ppt..s...sps.h-..-s..LL-EVssLVEaPsslhGpF-.ccFLp.lPtElLlooM+pHQ+YFslhc.p.sG...........pLhPpFlsVuNhpspc..ptlhpGNE+VlRARLuDApFFacpDp....KpsLpshltcLcpVlFpcpLGolh-KspRlptLAthlA..ptl.............s......s.....s...h.ppspRAAhLsKsDLlTpMVh..EFPELQGlMGphY.Ahts...GE....sptVAtAl....pEHYhPphuuDpLPpo.sGsslAlADKlDTLsGhFulGth.PTGSpDPaALRRuAlGllRIll-pp..hslsLppLlppu.hp.h....tt...............hstps.shpplhsF..hhpRlcshhp..cp.u.....hsh-llpAVlu...ts ........................................................................hLlElGoEELPs+slpshtcphtpphsstLcpstLsa..............s..s..l....p..hauoPRRLAlpVp.sLuppQ.s..Dpp.Eh+GPuhphAh...Ds-G.p...s....o.......KAApGF....sRup.Glo...........l-p...l...chh.p..........................s...cpsEalhhp.tp.ht.Gps.spslL.sshlspslppL.shP..KsMRW....G.................s.....s.....shcFlRPl+tlssLh....uc.c.....l.ls..hpl.......hG........................lpou+so+G..HRFh.......s......s...............t..phsl...s.sA..cpY.phLccp..hVIsDh................pcR+thIhpphcphA.pph........u..............s..ps...s..l..-....-s.......LL-EVsuLVEaPsslhupF-.-cFLp.lPpEsLlhoM+scQ+YFsVhc...p..sG.......................................+Lh.PpFIhVuN...hpscc....pplIpGNEKVlRsRLuDAcFFappDp....................K..........p.....s...L..p..s.....h.l..s+L..p..s..VsFpppLGolt-KspRlptlAshlAppl...................................ss-..sscspRAuhLuKsDLhTsMVh..EFs-LQGlMGtaY.........Apts.........GE.........spsV...AhAl....pEpYhPphAuD.pLPps.....l...usslA...lADKlDTLs.GhFulG.h.PoGopDPaALRRAAlGllR..........Il..........lc.....ps...........h.sls..Lpp.Llppu..hp.hh..ssp..................................................htsspshs-lh-FhhsRh+shhpcp..u.......hshDhlpAVLu.p.................................................... 0 207 417 544 +4749 PF00261 Tropomyosin Tropomyosin Finn RD anon Prosite Family \N 35.00 35.00 35.10 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.67 0.70 -5.27 43 1328 2012-10-03 05:16:33 2003-04-07 12:59:11 15 16 401 54 388 1233 5 180.20 54 81.39 CHANGED KKhpplcs-hDpspEplpcApccL-pp-KptpcAEuEVAuLNRRIQLlEE-L-RupERLssAhpKL-EApKAADES.ERupKVlENRuhpDEE+h-.LEtQLKEAKhlAE-ADRKY-EVARKLsllEsDLERAEERuEhuEuKlsELEEEL+lVuNNLKSLEsucEKuup+E-pYEEpI+.Lop+LKEAEsRAEaAERoVsKLpKplDcLED-LhspKE+YKsls-ELDpThsELsuh ..............................hp.ph-thp.pthtthppp.lc.....tc....cth.p..pA...Eu-VA...uLsRRlp.LlEE-L-.R..upERLsoAhpKLEEA.pKAADES...E..R..uh..KVlEN..Ru.h.pDEE+M-h....EhQLKEAKal.......AE-.....AD+KY-...EVA.....RKLsh.lEs-LERsE-RsEhuE..s+hh-LEEcL+.h.spsLK.uLpst........t-..........c..hsp+E.-pYccpI...+hLsp.+LKEA.....E..sRAEhAE.Rs.VtKL.pKplDcLE...-..p...lhtt+.c.hthpp.h-.sh.-h....................... 0 115 147 252 +4750 PF03301 Trp_dioxygenase Tryptophan 2,3-dioxygenase Mifsud W anon Pfam-B_4263 (release 6.5) Family \N 21.20 21.20 23.40 21.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.12 0.70 -5.33 5 1051 2012-10-01 19:57:26 2003-04-07 12:59:11 8 6 627 72 420 1075 498 187.70 29 87.19 CHANGED -SsQsGspcsScu.GhsYGDYLpLDKlLoAQ+hlS-.ttp.lHDEHLFIVTHQAaELWFKQIlaELDolR+LLsss+l--s+.hLclhctLcRlV+ILcLLssQaolL-TMTPLDFsDFRcYLoPASGFQSLQFRlLENKLGV+supRl.YNtp.YpssFtss....chLLsoEcEpoLLcLlpuWLERTPGLc.pu.s..FWhKapcSVhchLs-LhAptuscpssEVlp+cLst-YcKscEVhtSlhD.p.H-.hltpGpRhhoacAhpGAhMIhFYRD-PRFppPaQLLptLMDIDoLhTKWRYNHVlhVcRMlGS.KpGTGGSSGYtYLRSTlSDRYKVFlDLFNLSTaL .....................................................h..................h.Y.tYlthp.llt.Q.....o.....................sEhlFlh.HQ........s.ELah+.hhaEhpthht....ht..........t..t......h....t...............phltRh.hh.p.l.tt...h.tl.l.t.s..hos.pa...th...R.......Ls.uSG....FQShpaR.lE...............h...hG..............t.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 143 259 354 +4751 PF04820 Trp_halogenase Tryptophan halogenase Kerrison ND anon Pfam-B_2531 (release 7.6) Family Tryptophan halogenase catalyses the chlorination of tryptophan to form 7-chlorotryptophan. This is the first step in the biosynthesis of pyrrolnitrin, an antibiotic with broad-spectrum anti-fungal activity. Tryptophan halogenase is NADH-dependent [1]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 454 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.49 0.70 -5.86 23 1611 2012-10-10 17:06:42 2003-04-07 12:59:11 9 14 630 30 502 3763 1761 272.60 18 79.12 CHANGED +lVIlGGGTAGWhAAAtLu+th.st..hplTLlES-pIGslGVGE.uTlPsIpp.hpphLGl-Et-FhRtTpuTaKLGIpF.sWtp.............tuccYhHsFGs.hGtshthhsFap.....aWLchpttG.hssshssaslsshsApts+F.uss.ps......htslsYAaHhDAuLaAcaLR+hAcs+.GVpRlEGclssVphcs.sGaVsulphc-....Gchl-uDLFIDCSGFRGLLIppsLpsGacDWScaL.sDRAlAs.stsss....sssPYTc...uhAcpAGWpW+IPLQHRsGN.GaVaSScah.....s--cAhstLhsplsupshtpP.chlRFssGR.............R+psWs+NsVAlGLASGFlEPLESTuIHLlps....ulppLlplFPscshs.Ps.tlscaNcpsshEhEclRDFllLHYpsopRsDoPFWcpsR.phslP-sLpc+lcLFpsp...............uphhpttc-hFtpsSWhpVhhGQGlhPctacPLscshss ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s..a.p....h.ptt..hs.....t.h.L.h.p.....u...t...G..sp.h..h.....t....t...h........t......................t.....................t......t...............h..............t....l....h.tp......................t...t...........h....p..s..c..h..hlDsoG.t...u...h.....l.........t..p.............h.....................................................p..........h..p...p..........h.................................t.......s.h....h..h......h....t........................s......t...........................h....s..........t..s...G.........W....W....I.....Ph.....t.......t................................................................................................................................................................................................................................................................................................................t..h.h............h.s.u..hhpP......h.....u....sh....................................h........................................................................................................................................................................................................................................................................................................t....................................................................................................................................................................................................... 1 157 306 427 +4752 PF01371 Trp_repressor Trp repressor protein Bateman A anon SCOP Domain This protein binds to tryptophan and represses transcription of the Trp operon. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.74 0.72 -4.07 19 1612 2012-10-04 14:01:12 2003-04-07 12:59:11 14 4 1574 49 242 835 143 86.80 39 83.21 CHANGED pphptlh-hlhphtspcpp.thhp.lhT.sEhcuLupRlplsc.LLcppho.RcIppchGsShATIoRsupsLchusssapthLcphh ...........................phpphh-hlhshhsp-pph.hhp.lhTssEh..cuLupR.l.clsc.LL+.tp.ho.+-Icp-hGAuhATI.oRs..s.p.uLch.u.s.s.thcthL-chh............................ 0 91 147 201 +4753 PF00290 Trp_syntA trp_syntA; Tryptophan synthase alpha chain Finn RD, Griffiths-Jones SR anon Prosite Domain \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.51 0.70 -5.85 18 4254 2012-10-03 05:58:16 2003-04-07 12:59:11 15 8 3921 94 1062 3690 2893 247.30 35 89.87 CHANGED Fuphptpsp.suFVPFlsAGDPs..hEsolcllcsL.cuGADllELGlPFSDPlADGPsIQpAshRALsuGhThspsl-hlcchR.phsslPllLhsYaNPlhphGhEp....FhtpstcsGlcGlllsDLPhEEusslhphspctslshlhLluPsTs-pRlcplsctusGFlYlVSphGVTGscs.shssplpphlp+lKpho.shPlhlGFGISss-pspphs.tsADGVllGSAlVchlpp......phcptcpslpcltph...sp..hcsus+ ............................................................................................................thptptc.sAh.ls.alssG.DPs..h-pohchlcsL........c....u....G............A.............DhlELGlPFSDPlADGPsIQp..AshRALs..s..G.s..........o..hppsh.c..hlpp....l.R...................p....p.....sph.P..llLMs...........Yh.N...l....hp....h..........Gl-p.....................F.hpcstcs.G.V.cGlllsDlP...h....E...E.u...s......h...t...t.t...s...p...c...t.s...l.s..hIhLssPso.sc...-Rlcplsptu.p...............G.F..lYh.lSh..sGV.TGs......ps....p....h.s.s.s..l.p.ph..ltc.l.+p.h........s......s......h.......PlhlGFGIussppsppht....t...sADGlI.lGSAlVch..lpp..........ph.p.p.........t.........................t.............................................. 0 339 685 908 +4754 PF03222 Trp_Tyr_perm Tryptophan/tyrosine permease family Mifsud W anon Pfam-B_2873 (release 6.5) Family \N 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.24 0.70 -5.65 13 4713 2012-10-03 01:44:59 2003-04-07 12:59:11 8 5 1373 0 483 3542 123 382.30 25 94.07 CHANGED +osphlGGshlIAGTsIGAGMLAlPlssAGshFhsohllLlhsWhhhhtSGLhlLElhpphts.....ssuhsTlucchLG+shpllsslshhFlhYhLhhAYIouuGuILppslsphhh.hshss+sssll.....FsllhusllhhuTp.slDphsplllhuhlluFslshshhlsclcsshLhs.....h..sthhshllsulPlhlsSFGFHusVPSLhpYhsps.l+cl++uIlIGoslsLllYllWplsshGslspspFhtlltpGuslssllpALtthhpSsshthslphFuhhAlsoSFlGVuLGLFDaluDlFKa.pcspsGRhchshlTFlP..................................PLlhulhaPpGFlhAluYAG.lusslassIlPslLsapuRpcpspts.a+VhGGshhllllllhGlllhlsp ..............................................................................h...........l.hGTsI..GAGh.L....hlP..l.s..h..u..u.s.G.h..h.h..s..h.h..h...l..lhs..a..s.hh.h........a.su.h....h.l..h.c..hs..p..sss.............................ssshsslsc..c..a...h....G...+......t......h.s.......h....l.h....s...h.h.......h....h......F.......s....l......a...s....l......s....h.u...Y.......h..........s......u.....s..........u....s......h...........l..........p..........p..........h..........h......s....p...........h....t........h.......s........h......s..........p....h....s...s..hh...........................h.s....l...l...h...u....h...l.....l...h........h....G.......pc.......h....l.......s.....+......h......s......s...h........l..l..h..s....h....l....h......h....h.l.....l....h.......h..h......h..L....l....s......p......h......p.........s...h....Lhs..................h....s..s...s......h...h......h...h.......l...h....h...s.l..P....l....h..l..h.S..F......u.a....p....s.....s....l..s....S....h.......s.........p....h........h......s.....p...c........hc.......+......l....p.....p....s......l......h...h.....u.....o.........h...l.s....L.l...h....a..l...h...a...h...h...u...s..h.h........s.l....s............s...p.............h...h...s..h...ht...p...s.......l..s.s...L..lp..s.h.up..h..hs.....s..........s.....h.....l.p.h..s.s.....s....l..hu.....h..l..A.....lsoS.....F.l......Gl.........h.L..G........l.h...-.......h....l....s....-......L.....h.............p.......h........s.......s....s..................t.........h........u........+...h.......p....o...........h..hl...s..F..l..s..............................................................s.h..h....h....s..l..h.a..P.s..u...h..l....hh.l..uh.uG.....h.............huh...l.h.h.llP.hh..h...hh......t......s....R.p........h.....................................................ap....s......hh.st.h.hh..hhhhhhh........................................................................................................................................................................................................................................... 0 100 201 347 +4755 PF01509 TruB_N TruB family pseudouridylate synthase (N terminal domain) Bateman A anon Pfam-B_792 (release 4.0) Family Members of this family are involved in modifying bases in RNA molecules. They carry out the conversion of uracil bases to pseudouridine. This family includes TruB, a pseudouridylate synthase that specifically converts uracil 55 to pseudouridine in most tRNAs. This family also includes Cbf5p that modifies rRNA [2]. 25.60 25.60 25.80 25.60 25.30 25.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.67 0.71 -4.05 171 5359 2009-01-15 18:05:59 2003-04-07 12:59:11 13 25 4803 48 1566 4094 1764 143.70 39 46.08 CHANGED ++...hh.....p...h+....KlGHsGTLDPhAoGlLslslGc.ATKlhpalh.sss.KpYpuphpLGhpTsTtDtpGpl..l...tp........h.tl.sppp....lcpsl.ppF.pG.pIpQlPPha.SAlKhsGc+LYchARpG..hpl-...hts.RplpIaplcl...lph..p...ts.......plphclpCS+GTYI ............................++lhpsc...KsGHsGTLDPhAoGlLslsl.Gc.ATK.hs.p.alh..sss..KpYpsplpLGtpTsTtD...........s..........pGpl.........l....pp...................s...p.h....s...t..cp.....................lp.p........sl.p..p.a.pG...pI.pQ.....lPP.haSAlK.hpG++hY-hARpG..........p..V..c.........p..ts..R.lsIaplph.....lp...h.....c..ts..........................phphcVpCSKGTYI.................................................... 0 520 968 1305 +4756 PF01456 Mucin Tryp_mucin; Mucin-like glycoprotein Bateman A anon Prodom_3102 (release 99.1) & Pfam-B_3837 (Release 8.0) Family This family of trypanosomal proteins resemble vertebrate mucins. The protein consists of three regions. The N and C terminii are conserved between all members of the family, whereas the central region is not well conserved and contains a large number of threonine residues which can be glycosylated [1]. Indirect evidence suggested that these genes might encode the core protein of parasite mucins, glycoproteins that were proposed to be involved in the interaction with, and invasion of, mammalian host cells. This family contains an N-terminal signal peptide. 29.60 29.60 29.70 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.34 0.71 -4.41 6 1107 2009-09-12 22:44:45 2003-04-07 12:59:11 12 5 11 0 62 1096 1 150.00 37 94.82 CHANGED Mh.....hpRlLC.lLhLAL.CCssVCsTApt..............................................tGQhssssspuusGpsppTTTTTTTpssTTTTTTT.................Tssss....sspsssTTsssTTTTTsspAPupssTTocAPTssssR..........SLGussWVssPLLLhVSAhshT..Ass ......................................................................................................................Mh...pCRLLCALLVLAL...C.C.C..s..sVC.s....s..tpt....................................................................................................................................................................................................................t..........t....t............t.....s......t.....t.....s....ps........p.....t.....p........s...s..s..s..s....s.....p..s.s...s.s...s.ps..sp................................................................................t.t.t........s.......t....t..p....s...s.....p...s.s...s..s.s.s......s......o........T....T....T.T.T..s..sp......A.P....s...........s.....T.T.o.cAP....o.s..o.......TT.+A.P........Stl.RchDGSLuSSAW.....VCAPLlLAsSALAhT..sl.......................................................................................... 0 3 3 62 +4757 PF00913 Trypan_glycop Trypanosome variant surface glycoprotein (A-type) Bateman A anon Pfam-B_1351 (release 3.0) & Pfam-B_2618 (release 8.0) Domain The trypanosome parasite expresses these proteins to evade the immune response. This family includes a variety of surface proteins such as Trypanosoma brucei VSGs such as expression site associated gene (ESAG) 6 and 7 [3]. 22.10 22.10 22.10 22.20 21.90 22.00 hmmbuild --amino -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.08 0.70 -5.55 63 493 2009-01-15 18:05:59 2003-04-07 12:59:11 14 4 12 4 163 485 0 305.80 17 79.35 CHANGED hshhllhshh......sppssussptulpts.sWpslCplop-Lc.plsutshpphpshhsthpphpptth+hplash....tpsssppspthsslsuhh....ppstpshpshhssthppulsAsusuuhltG+IcEalsl...hspspsuuss....CLssssssssssss.........hthstspsphphsssssp...stssstlsssGapshh...........sssssptssssppCpLh.....tsssssGhspsss.........tssslphu.uGhlsls.....tsssthshtthtshttttptttt.hhthhtt..........tptspssssthpssssshpsssphppslpphht.....tptttptpphtptlpphhtsspspthpt....hhttht.p.plstthh....stsptspLs.cIssh.ppLtplLthhptptt .................................................................................h....hhhhh...........h...ts...t...t.t..u.lttp.shptlCsloptL+.t..............lsthst....p....pht.......t....h...t.p.ph....p..ph.p.....t....h..p..h..p....lhhh.........htsst..pss........ptht..h......Lhtth......hpthtphppphpph........t....ptuhtAuu....tA........uhuAGRl-Ealtl...........Fup..ut...........s....susph............Clusssssssptsp....................thp.s.tth.hppspps........htttpshss..tshpsl............................sspshts.s.ss....pss...CsLh.......pssssG..hhtsss..................hppshhau..uGlhshs...............t.t..........................................................................................................................................................................................................tt......................................................................................................................................... 0 121 163 163 +4758 PF03249 TSA Type specific antigen Bateman A anon Pfam-B_3060 (release 6.5) Family There are several antigenic variants in Rickettsia tsutsugamushi, and a type-specific antigen (TSA) of 56-kilodaltons located on the rickettsial surface is responsible for the variation [1,2]. TSA proteins are probably integral membrane proteins. 19.80 19.80 20.30 30.10 19.50 19.30 hmmbuild --amino -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.73 0.70 -5.96 8 365 2012-10-03 17:14:37 2003-04-07 12:59:11 8 1 7 0 1 363 0 374.30 72 96.08 CHANGED IELGDEGGLECGPYAKVGVVGGMITGVESsRLDsADu-GKKHLPLTTuhPFGGTLAAGMTIAPGFRAELGVMYLpNIoAEVEsGK..............ssucADosssTD...uPIhKR..KLTPPQPTIMPISIADRDhGVDlsNlPQAQstts..QlNDs..RuAcRIAWLKsYAGIDYhVKDPNNP.GsMhlNPVLLNIPQGNP...uNssptAhQPsDhsILDH-QWRalVVGlsALSNANKPSlSPVKVLSDKIoQIYsDIKPFAcIAGI-VP-ssLPNSASVEQIQNKMQELN-lLEELRESFDGYI.GNAFAsQIQLNFhIPQtA..QQQQGQG.QQQQAQATAQEAsAAAAVRlLNG..N-QIlQLYKDLVKLQRHAGIKKAMEKLAAQ.t.DutspGuGDsKKKQG...ASEcSccsutu........KETEFDLSMIVGQVKLYADLhTTESFSIYAGVGAGLAYTpGKIDsKDIKAHTGMVASGALGVAINAAEGVYVDIEGGYMHSFSKIEEKYSVNALMASlGVRYNF ........IELG-EG..GLECGPYuKVGlVGGMITGsESsRLDsADs-GKK+LsLTTuhPFGGTLAAGMTIA.GFRAELGVMYLpNIoA....pVE...GKsKs.....................DStGcsKADSu.sGsD.....API.RK.R.hKLTPPQPTIMPISIADRDhGlDlsNlsQAtAtts....Q..LNsE...QRAAtRIAWLKNhAGIDYhVpDPNNP..GshVlNPlLLNIPQGsP...uNs....pRspQPsshsIhs..H-QWRaLVVGlsALS.NANKPSsoPVKVLSDKIopIYSDI+.FAcIAsI-VP-ssLPNSASVEQIQsKhpELsphLE-lR-SF-Ghl.sNAFssQIQLNF.hP.tt....QtQ.GQ..QQQQ..uQsTAQ-AsAAAAVRhLNs..NpQI.QLY+DLVKLpRHAGl+KAMEpLAsQ..........D.stt.............ps..tspscppp....ss.p.pp.....t.t..........+.EsEFD...LSMl..VGQVKL..YADlhhTEShSIYuGlGAGlAaT.GKIDshDlK.pTGMVsS............................................................................... 1 0 1 1 +4759 PF01166 TSC22 TSC-22/dip/bun family Finn RD, Bateman A anon Prosite Family \N 25.00 25.00 25.10 25.90 24.90 24.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.89 0.72 -4.01 10 339 2009-09-11 08:14:02 2003-04-07 12:59:11 13 4 87 2 161 275 0 59.20 65 15.71 CHANGED MDLVKoHLMYAVREEVEVLKEpI+ELh-+NupLEpENsLLKsLuoPEQLsphpu..plpsss .................MDLVKSHLMYAVREEVEVLKEQI+ELhE+NSpLEpENsLLKoLASPEQLuQhpu..pht..ss.................. 0 26 42 96 +4760 PF04668 Tsg Twisted gastrulation (Tsg) protein conserved region Waterfield DI, Finn RD anon Pfam-B_4556 (release 7.5) Family Tsg was identified in Drosophila as being required to specify the dorsal-most structures in the embryo, for example amnioserosa. Biochemical experiments have revealed three key properties of Tsg: it can synergistically inhibit Dpp/BMP action in both Drosophila and vertebrates by forming a tripartite complete between itself, SOG/chordin and a BMP ligand; Tsg seems to enhance the Tld/BMP-1-mediated cleavage rate of SOG/chordin and may change the preference of site utilisation; Tsg can promote the dissociation of chordin cysteine-rich-containing fragments from the ligand to inhibit BMP signalling [1,2]. 25.00 25.00 29.60 27.40 18.40 24.10 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.06 0.71 -3.95 11 135 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 83 0 82 122 0 122.90 45 58.17 CHANGED psphop+SplE-L.-ulPsLFcAlTu..EsDut.hpWslhoFPl....h..hsptpshsphL.s.ss.pts..htsPsssloss........CTVlYhspChShp+C+QoCESMGASpYRWFHsGCCECVGspClsYGspEoRCppC.. ........................................s.s.opKSpVE-l..-s..lP...uLFcALT...Es..Dst...hpWslhoFPl....s.thsphc.s..hspah.ph.s...p...p.....hp......h.s.......h........sss.hpss.............CTVlYhcpChShppC+.oCcSMGASpYR.............WFHsuCCEClGspClsYGspps+ChpC.............. 0 19 25 55 +4761 PF04705 TSNR_N Thiostrepton-resistance methylase, N terminus Kerrison ND anon DOMO:DM04814; Family This region is found in some members of the SpoU-type rRNA methylase family (Pfam:PF00588). 27.10 27.10 27.70 93.80 27.00 27.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.38 0.71 -4.29 3 6 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 6 6 1 11 0 114.70 52 41.82 CHANGED MTELDlIsNsSDPAVQRIIDVTKHSRSsIKTTLIEDlEPLMcSIRAGVEFIEVYGSDooPFPu-LLDLCc+RsIPVRLIDuSIVNQLFKuERKAKVFGIARVPRPARFADIASRu .MsphDlIsstSDPAVQRIIDVTK+SRSslKTsLIEDsEPLscuIpAGVEFIEVYGs-uoPlsscLLshCcpRsIPVRLlssulsNpLFKuERKsKsFGIARVPRPu+FuDlAsR.s. 0 0 1 1 +4762 PF00090 TSP_1 tsp_1; Thrombospondin type 1 domain Sonnhammer ELL anon Published_alignment Family \N 21.60 12.00 21.60 12.00 21.50 11.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.19 0.72 -3.85 31 18854 2009-09-12 10:45:09 2003-04-07 12:59:11 14 787 233 25 10470 16743 573 50.30 28 19.16 CHANGED o.WspWSsCSV.TCG.pGhphRpRhsst......sssCstsspp......schC.phcpC ..............h.u.sW....o.........t....C.......S......t.....o.......C.........G.....s.......G........h.......p.....p.....R.p....+...p..C.....t..s....................s......s...p..............C....t......s......t..t..t....................hp.....C..p.t.C........................................................ 1 2752 3498 6471 +4763 PF02412 TSP_3 tsp_3; Thrombospondin type 3 repeat Bateman A anon SwissProt & Pfam-B_2972 (Release 8.0) Repeat The thrombospondin repeat is a short aspartate rich repeat which binds to calcium ions. The repeat was initially identified in thrombospondin proteins that contained 7 of these repeats [1]. The repeat lacks defined secondary structure [2]. 25.00 13.60 25.00 13.60 24.80 13.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.18 0.72 -4.48 11 4235 2009-09-16 13:31:17 2003-04-07 12:59:11 13 198 363 36 1963 3922 3104 30.90 43 18.51 CHANGED cDuDuDGlGDAC....-pDhDsDGl.shhDNCPhsuNssQ .....................................C................p..t...D...t...D..s..D.G.l..D...p......p.......DsCPhsss.t.......... 0 606 831 1367 +4764 PF03073 TspO_MBR TspO/MBR family Mifsud W anon Pfam-B_1882 (release 6.4) Family Tryptophan-rich sensory protein (TspO) is an integral membrane protein that acts as a negative regulator of the expression of specific photosynthesis genes in response to oxygen/light [1]. It is involved in the efflux of porphyrin intermediates from the cell. This reduces the activity of coproporphyrinogen III oxidase, which is thought to lead to the accumulation of a putative repressor molecule that inhibits the expression of specific photosynthesis genes. Several conserved aromatic residues are necessary for TspO function: they are thought to be involved in binding porphyrin intermediates [3]. In [2], the rat mitochondrial peripheral benzodiazepine receptor (MBR) was shown to not only retain its structure within a bacterial outer membrane, but also to be able to functionally substitute for TspO in TspO- mutants, and to act in a similar manner to TspO in its in situ location: the outer mitochondrial membrane. The biological significance of MBR remains unclear, however. It is thought to be involved in a variety of cellular functions, including cholesterol transport in steroidogenic tissues. 20.70 20.70 21.20 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.98 0.71 -4.68 148 1274 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 1043 0 537 1142 1226 141.80 27 85.38 CHANGED llhlslshssuhhuuhhoss........ss..W.YpsLpKPsasPPs.alFsslWTlLYhhhuluuahlapth.........tpptsphslslaslQLsLNhhWSslFFuh+......phthAhl.llhLhlsllhshhtahp..lsphA.uhLllPYlhWlsFAshLNhs.lhtL ...............................................h.hhhhhshhhuhhuu....h....h....s......t.......tp....W...Yt..s.Lp+P...s.asP...Ps..hlFs.lWsl..L.Y.h.hh.u.l.uuahlapp.........................ttpttphsltl.a.sl.QL.hlNhh..Wo..lF....Fsh+......phhhAh.ltlllLhhh.lhhh.....h.......hha.hp.......lsp...h..A...uh..L....h....l.....P...............YlhWlsFAshLNhslhh.................................................. 0 183 330 441 +4765 PF02956 TT_ORF1 TT viral orf 1 Bateman A anon Pfam-B_1612 (release 6.4) Family TT virus (TTV), isolated initially from a Japanese patient with hepatitis of unknown aetiology, has since been found to infect both healthy and diseased individuals and numerous prevalence studies have raised questions about its role in unexplained hepatitis. ORF1 is a large 750 residue protein. The N-terminal half of this protein corresponds to the capsid protein. 19.60 19.60 20.10 19.60 19.30 19.40 hmmbuild -o /dev/null HMM SEED 525 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.85 0.70 -6.05 58 3965 2012-10-04 01:49:40 2003-04-07 12:59:11 9 3 72 0 0 2670 1 111.10 45 87.44 CHANGED Msa.WWhRR....R+...WhhphhR...................RhRRhhRRh+RRhps..RR..RRR.hhthp+....RphhRRRp....hRR++KKl.hlpQWQPsslR+CpIcGhhPlllsGc...sptspNYshcp--hss.....t..........PaGGGaosppFoLchLY--ap+tpNhWTtSNppLDLsRYhGsphpFYRHspsDFIVpasppPPFphschotsshHPuhlhhsK+KhllPShpT+PpG+thl+l+IpPP+hhpsKWYhQpDlCsssLlsltuoAsshpaPasusposs.sloFts.Lss.hYppshhh............................sstppt.t.h.t.laps........................thYpohts.tpl.p.httssptpp....ttp....t................................sss.Yst..ap.........................................................tlpYcsGhaSshaLssh+h.shp.h..............ssat-lpYN.PhpD+GpGNplWh.p.hoKtsspa...spspschllpslPLWsshaG..Yh-alpppt.tspshhtsthlslhsPYTpP...hstss..sshual.hDhsFspGKhPts.sshlshhhct+W.YPphhaQppsls........slspsGPa..sY.+s-ppss.pLs..hpYcFpFpWGGs ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s..Kps..Y...sKspSKCLltsLPLWAusYG..YhEaCuKsT..GDs..N..IchNsRhl.IRSPaTsPQL.ls..H..ss..P.+GaVPYS..........................................................................................................h......................................................................................................................................................... 2 0 0 0 +4766 PF02957 TT_ORF2 TT viral ORF2 Bateman A, Mifsud W anon Pfam-B_1489 (release 6.4) & Pfam-B_4693 (release 7.6) Domain TT virus (TTV), isolated initially from a Japanese patient with hepatitis of unknown aetiology, has since been found to infect both healthy and diseased individuals, and numerous prevalence studies have raised questions about its role in unexplained hepatitis. ORF2 is a 150 residue protein. This family also includes the VP2 protein from the chicken anaemia virus which is a gyrovirus. Gyroviruses are small circular single stranded viruses. The proteins contain a set of conserved cysteine and histidine residues suggesting a zinc binding domain. 21.10 21.10 21.10 21.10 20.90 21.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.77 0.71 -3.41 17 774 2009-09-11 23:42:44 2003-04-07 12:59:11 10 4 129 0 1 695 0 100.80 26 68.28 CHANGED apPs...hastpthptpWhsshhpoHshhCGCscslcH.......hhp..........................hh+ptssL.ht.pt.tpht.h..sssp-uss.......tssG-s.t..sh......spuDlDhL.hAt-hs-p ....................pss...hhs.ptppphWhpsshpoHushCuCssshtH................Lpph...t...th.t....................s.s.t.s....t.t......h+h..........h.........................................................s.s.t..t...tt.................................t..............................................................t.t...p...........s.................................................................... 0 0 1 1 +4767 PF03542 Tuberin Tuberin Griffiths-Jones SR anon PRINTS Family Tuberous sclerosis complex (TSC) is an autosomal dominant disorder and is characterised by the presence of hamartomas in many organs, such as brain, skin, heart, lung, and kidney. It is caused by mutation either TSC1 or TSC2 tumour suppressor gene. The TSC2 gene codes for tuberin and interacts with hamartin Pfam:PF04388 , containing two coiled-coil regions, which have been shown to mediate binding to tuberin. These two proteins function within the same pathway(s) regulating cell cycle, cell growth, adhesion, and vesicular trafficking [1]. 19.50 19.50 19.60 21.20 19.40 19.40 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.15 0.70 -5.39 5 261 2009-01-15 18:05:59 2003-04-07 12:59:11 11 12 189 0 169 279 0 274.60 30 18.27 CHANGED lV+SEsElEDIlsAVDGLl+VFpVKLYRLPusHAl+VYslLluHLEtHYc+PalLtplSlIRY+IF-WhLpARANuSaHIGYP-uE..pss+VRFSsYLGl-uP...............ppupuosphsLs.hussptsspssslhPsosLTsISI+RuCpVIVpCLKcEpDWpVlQLVLoELPcVLQNKALIQGND..lDuLAsTLhKMhsD.hplE+L.pusstusspoDlHsLVLPALoSLAoYHpaLDsspQ+sIIsuLcpGLIoRpASlCIsoLTILlLEMP-sLhs+LPDLLlcLSKMSuTshlAlPVLEFLSTLlHLPpHLauNFssppYMsVFAISLPYTNPaRYDHYTVSLAHHVIAuWFlKCRLPh .............................................................................................................................................................................................s..........................................................................................................................................................................................................................................................t....h....p...s.......hl..shshhhpsllphL.c.p-sDWcVhphVLs+Lstp......Lp.+sLhh...s...ss...plcpLpssLsph.l....p..................s...h..h..................c...ph.....tt..ss..p..s.hp+sDlthsl.h.sLosLloYHphh.s..+sc..........pc....-hVtsh..pG...l..h..p..........c..........s...A..pt..C..l...hALolCshEh.P..sl....hKtLssll.s+h.o+l.s..oss.hAl...lLEFLusLuRLP.pLYtNFtt-patpVFuIsl.Y...............h....p.................st+....................t................................................................. 0 54 81 133 +4768 PF00091 Tubulin tubulin; Tubulin/FtsZ family, GTPase domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family includes the tubulin alpha, beta and gamma chains, as well as the bacterial FtsZ family of proteins. Members of this family are involved in polymer formation. FtsZ is the polymer-forming protein of bacterial cell division. It is part of a ring in the middle of the dividing cell that is required for constriction of cell membrane and cell envelope to yield two daughter cells. FtsZ and tubulin are GTPases. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. Tubulin is the major component of microtubules. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.38 0.70 -4.52 94 20546 2012-10-03 12:11:42 2003-04-07 12:59:11 20 44 10719 170 3790 14692 2809 152.80 36 51.77 CHANGED pIhslslGsuGsphssphhc.................................................................................psh.s.schlhlsTDspslsp.hpsspp.....lhhspphhpGh..G..AGus.t.hGpp.................................stccshcpI.cctl............c.s.hchhhlssuhGGGTGoGhusllschh+-h.................................s........t.lsluhsshPh.....ph.Eshhc.hsAhhulppL.hcpsDsllllsNspLhc........ls..............spphslp...................ssapp ...............................................................................................................................................phh.t.thhp.....................................................................................................................................................................sshhs...sc.h.l.hlsh.-.s..t..s...h...........ct....s.......+........s...ush.................l...hs.........cp..h....s....h.....G..........G................AGsN.......t....hGc.................................................................................................hhc.s.h-...l...pp..h...................................p...s.h.....p................h..h..lh.tuhG.G.G..TGs.....Ghu.sllh....ph.hp.p..................................................t......h..hhhsh.s.s..h..P.............ph..-s.h...h..c.......h...........t..u.....uh..ppL......hc......p...........sD.....p.....h...hhlsN..pt.lhc..................ls......cp.hth...sa..t.................................................................................... 0 1325 2173 3053 +4769 PF00418 Tubulin-binding tubulin-binding; Tau and MAP protein, tubulin-binding repeat Finn RD anon Prosite Family This family includes the vertebrate proteins MAP2, MAP4 and Tau, as well as other animal homologs. MAP4 is present in many tissues but is usually absent from neurons; MAP2 and Tau are mainly neuronal. Members of this family have the ability to bind to and stabilise microtubules. As a result, they are involved in neuronal migration, supporting dendrite elongation, and regulating microtubules during mitotic metaphase. Note that Tau (Swiss:P10636) is involved in neurofibrillary tangle formation in Alzheimer's disease and some other dementias. This family features a C-terminal microtubule binding repeat that contains a conserved KXGS motif [1]. 21.60 21.60 22.50 21.70 20.70 21.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.32 0.72 -4.26 12 1318 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 98 0 474 1352 0 30.90 51 14.99 CHANGED Vplhsptl.Dlp.pVpSKsGSp-NIKHpPGGGp ..........VpI.spKl.Dhp..pVpSKsGSh-NI+HpP..G..GGp... 0 102 146 270 +4770 PF01021 TYA TYA transposon protein Bateman A anon Pfam-B_90 (release 3.0) Family Ty are yeast transposons.\ \ A 5.7kb transcript codes for p3 a fusion protein of TYA and TYB. The TYA protein is analogous to the gag protein of retroviruses. TYA a is cleaved to form 46kd protein which can form mature virion like particles [1]. 25.00 25.00 26.50 26.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.43 0.72 -3.79 4 183 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 13 0 119 181 0 91.70 76 12.65 CHANGED ACASVTSKEVpTNQDPLDVSASKhpEa-+sSTKANSQQpTTPsSSAVPENtHHASPQsAQsP.PQNGPYpQQsMMTsNQANsSGWuhYG+PSMMPYoP AhASVTSKEVpoNQDPLsVSASpl.EaD+sSTKsNSQQpTTPuoSAVPEN.HHsSPQPASVPPPQNG.Y.QpsMMT.NQA.sSsWuaYt+PSMhsYo......................... 0 88 88 88 +4771 PF03251 Tymo_45kd_70kd Tymovirus 45/70Kd protein Bateman A anon Pfam-B_3418 (release 6.5) Family Tymoviruses are single stranded RNA viruses. This family includes a protein of unknown function that has been named based on its molecular weight. Tymoviruses such as the ononis yellow mosaic tymovirus encode only three proteins. Of these two are overlapping this protein overlaps a larger ORF that is thought to be the polymerase [1]. 25.00 25.00 192.80 192.50 19.60 19.50 hmmbuild -o /dev/null HMM SEED 463 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.37 0.70 -5.80 13 23 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 0 0 23 0 464.50 32 74.92 CHANGED MSNGhPsSsRpshlcpoQtplhpssochtssssstLssslPLcpscGosohsphlRcPsl+hRhpssPPssPQssRs.soLpPL-hstpsuhhscVHETlcVppsstppspL.pssQLPupspRhpSlPpHLphsupttsRlHARRuDVL.uhosstslpslspsssslLQspsusRt.LHRsLshPpsLHLps.RppsuL+sR+ospRpLQsAsppspLAEsthH..s.spPlpppsGILGPsPLhscspR..........sPpsshppssss..........................slLPsPphspuupuaLPsPTossPs+sspulpRslHLHsSpssosclRPpRlRssulQQspspLGHspuLGQSsNLRusppspPo+pplpLhPhssspspsl.hssh........sP.hppp.Sh....hP+Psss.sshstsssp...hpsplPssh.s.......ssphsssss......suhssssssssssssss...............Ps MSNGhPsSsRRshlapSQRplsposSchpspssstLsssLPLspscGosuhsphlRHPslRhtppPsPPppPQssRs.soLpPLthPtppShhpcVHEThpVppsstppscL.pspQLPspspRp+SlPpHlppsu..pptp+lHARRsDVL.uhssptslpshspsssslLQspsuoRt.LHRslsLPcsLHLps.tsposL+sRco.pRpLQsAsppPhLAcsphH.....s.s.slpcpsGILGPsPLsscspR.s..............sPpsthspssss.............................h.s.t.slLPss+h.poSpuHLPssTsssPspsspuLpRPlHLHpSssposchRPpRlRpculpQscspLGH.psLGQSusLRsscpssPs+ptLpL.spPstpspslspssL........sP.httttSh.h..hPpPsshhsshshsosp...hphplPpsh.s.......spth.ssss......sphshtsshpss..sss.h.....ssss..s................................ 0 0 0 0 +4772 PF00983 Tymo_coat Tymovirus coat protein Finn RD, Bateman A anon Pfam-B_1429 (release 2.1) Domain \N 19.00 19.00 20.10 26.20 17.90 17.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.92 0.71 -4.56 9 153 2012-10-04 01:49:40 2003-04-07 12:59:11 13 4 51 27 0 158 0 167.60 34 41.30 CHANGED ps.QssIsssuopLP.ssGppsPoIl.PFQhpssohGsp-susplolAotssluplTohYRHApLspLpAsIpPouhAsupPsTVsLsWVPsNSoATsupILs..lYGGQpFslGGuIsospsIpVPssLssVNPhIKDSVpYTDoPKLLlYSsAsssss..sssTsolpIpGplp ....................h..............s...s.sssslslPFQh.p.shsh.Gst.s.s.t.s.ols.luussslopLsusYRHApLhpLcAhltPshsuhupPholslVWssAs.ssssssplLp..sYGGpphslGGslshsushpVPAsLsplNPhIKsSVsYsDTP+L.hh.sssssssus.....sssh..shlhlpGhl............................. 1 0 0 0 +4773 PF00264 Tyrosinase tyrosinase; Common central domain of tyrosinase Sonnhammer ELL, Griffiths-Jones SR anon Prosite Domain This family also contains polyphenol oxidases and some hemocyanins. Binds two copper ions via two sets of three histidines. This family is related to Pfam:PF00372. 22.30 22.30 22.30 22.30 22.00 22.20 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.22 0.70 -4.19 127 3739 2012-10-01 19:27:11 2003-04-07 12:59:11 15 75 1610 58 1016 3969 31 141.50 35 41.98 CHANGED tspapphs.................uhHt................................................thshs.hHs..ss....hFhsWHRhYlhhaE.....ptLpp.................ts..................sssthtlPYWDWs..........................sps..ht.ssslhs....ss.h.uh....................................t.h...hss...Fs..shhsph.......................................p.cshp+sh..........................ssttspthsstpp........................ltphlht..................................sshpsFpshh...ps.................................thHsssHh.hlG......................................Gp...............h....ushhsushDPlFalHHuplDRlathWQph ..........................................................................................................................................................................................h.psYDhFV................hlHhhsscss.......................................htss.sh.pshch.uHp...us.......uFlPWH.RhaL..L.haE.....+plp+....................ls.........................sspsFslP..a..Ws............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 335 609 862 +4774 PF03064 U79_P34 HSV U79 / HCMV P34 Griffiths-Jones SR anon Pfam-B_2985 (release 6.4) Family This family represents herpes virus protein U79 and cytomegalovirus early phosphoprotein P34 (UL112). 23.20 23.20 23.20 24.60 22.50 23.10 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.82 0.70 -4.86 5 51 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 24 0 1 53 2 236.80 47 48.69 CHANGED RcYGTF-NVscsYcQIVocolcLRRacF-oGClI-FlusSG+CEsausGWIS.MIhWTSETsS......tGSLTlDIssD-GppKTY..pARGtILCSKSITSISQso...EG+-+lLTlspENGKLQlTaVTlsKsu+-s-l+slG.DsKstcpFEKECpAs-RKKp...DD-++K+SuKQKEKRRsED..cK+cEDc+KKpE.......c++psDs-Kpsspc-cu.sttpp..pph.........D.....psscEKRQK.aH-s..-RcLEcQScE .............R+YaTFsNssRlLHQsVspoFDVRQFsFDoARlVsCl-G-G+s.phsKGWLC.ATIMQpu-uuuuu......pstQGhMSlDITuDspLpcph..FsRGuIVhNKoVSSVVGss....ssscuuLLThluEsGsLQVTaVcHhh.psHs...psusssu.....usGsAusAu..AVs.s.....oShGuS.......uGspcGsus.ppppRRRpc..........ppH--cR+Kppp.....................pttsu..GuuGuuGGG.s.GuGSGGppu..sotp....thLc-...........st..pRQK.......ERc..Pspp......................................................... 0 0 0 1 +4775 PF02134 UBACT UBACT_repeat; Repeat in ubiquitin-activating (UBA) protein Mian N, Bateman A anon IPR000127 Family \N 20.50 20.50 20.70 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.08 0.72 -4.40 128 1697 2009-01-15 18:05:59 2003-04-07 12:59:11 16 51 346 44 1116 1656 35 65.80 34 11.72 CHANGED pF-cDDsss..lcalhusuNlRAppasI..s...h...............shhps+tls.upIIPAlAoTsAlluuhs..shEhhKllpsp ..............F-pDDssp....lcFlhuuuNLRApsasIp.s...t...............sp.tps+.t......ls..spI.I.PAlAoT.sAhlsuls..shEhhKlht..t................ 1 397 620 910 +4776 PF01977 UbiD DUF117; UPF0096; 3-octaprenyl-4-hydroxybenzoate carboxy-lyase Enright A, Ouzounis C, Bateman A anon Enright A Family This family has been characterised as 3-octaprenyl-4- hydroxybenzoate carboxy-lyase enzymes [1]. This enzyme catalyses the third reaction in ubiquinone biosynthesis. For optimal activity the carboxy-lase was shown to require Mn2+ [1]. 19.80 19.80 20.40 19.80 19.40 19.70 hmmbuild -o /dev/null HMM SEED 407 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.04 0.70 -6.02 155 3152 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 2165 3 810 2356 887 380.40 36 83.80 CHANGED lptL.ccpG..p....Lhclcp.Vs.spL....Elutlsc+........hhp........t..tss....AlLFcsl...cGh.........hsll.sNla...u.op........cRlshsL.Gh.........phpplsppltphh.p.................ht.hsh.hphh............pusspcshh..tpphD..L..pc.LPlhppaspDGG......alTh...................uhVloc..sP.cs.......NlGhYRhQ.lhs.....c.pcluh+hh..t+css.phap...ch....pcpG...........................c..phPVAlslGs-PshhhuAs.hP...lP...slsEhthAGhLp....Gpslcllcst...sssL.VPAsAElVlEGhls...s........p.ht....EGPFGDasGY.Ys..scp....hPVhcVpslptRc..c.PIa.ss.lsGpPs..tEcphlutsspchhl...shlptthP.p....lh-lhhs.tusha..hsllsIccpa........sGc.........u+plhhuhaustt.h....hsKhlllV....D.cDl-lpDhppVhWAlso.....Rhcss+.....Dlhllsss.s..ss .............................................................................................................................................................................phL.-ppG..pLh+I.st.Vs.sphElst..l.scc.........sh+...................s..tGP....ALLF-Ns...p.Ghs........hPV.l.sNla...G.o........................cRlAhuh..G.................shpplsc...h..l.shhhc....................................h....sh..phl............................psusspp..lh..p....u..-c..l.s......L.............tc..lPl.ps.ast.Duu....................shlTh....ulslo.+..sPpc.........tppNlG..IYR.Q..lhu.....+.s+lhh....+als....p+ssA.hc.hp..ch....t.c.....G............................................c..t.hPlulslGsDPshhluAssP........l.P.........sl...oEh.thAGhLR....Gp.hclsps......s...s.s...lpVPAsuEllLEGhlp.................s....................tcht...E.......G.......P......a...G.....DaTGY....Ys..tscp......hPVhplpploh.Rc...c..sI.apoT.hs.G+..P...s...sE.sshLussh.s......clhl...sl.Lp..p..phP..E....lhD.hahP..p.G.ssa...hAlloh.+K.pY..........sGa.........A+pVhhusaohhpth....asKhlIls..................D....-D...l.s.scDhscVlWAlso.....RhcPsRDhlhlpsssh................................................ 0 231 482 658 +4777 PF01209 Ubie_methyltran ubiE/COQ5 methyltransferase family Finn RD, Bateman A anon Prosite Family \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.45 0.70 -5.25 5 4079 2012-10-10 17:06:42 2003-04-07 12:59:11 13 9 3618 4 1229 27075 9319 227.00 38 91.29 CHANGED pTslKEp+...VpcVFcSVAo+YDhMNDVlSFGIHRLWKc.FTh+psus+cGs.....shLDVAGGTGDlTFcLo-ulGsoG.............KVlllDINEsMLKhGccKl+-pGhh....sIEaLpuNAEcLPF-D.soFDslTISFGLRNsoDh.KuL+EhaRVLKPGGpllCLEFS+PphPlhcpAY-hYuKtVMPhhG+llAc-h-SYpYLsESIRcFPDQ-TLcuMhc-AGFcuVcYcsLTGGlsAlHhGhK ............................................................................................................................tp....Ktpc...VtplF.c.s..l..A.s.+....Y..D....l.....h.....N......c......l....h....S.........h.....G.....h......H.....+....h.......W...+........+......h...s....h.....p.........t....h.........s......s.....c.........G.p...................................p.l..L.....Dl..A...u....G....T....G........D...l....s....h....t....h....u....c...t....s...s..t....p..u......................................................c.V.s..h..s....D.....h......s........p.......s.......M.......L......p......l.....G......+......c.........+.........h....h.........p.....h....s..h...........................s..l..p......a......l.........p...............u........s...............A......p........p........L.............P.................F.................s..........D.......s.........o...........F..........D............s......l........T...........I.......u............F......G............L...........R.............N............V...........s.......-...........h............p.............p...........A......L........c....E........h....h.....R.....V.......L..K....P...G.....G.+..l........l...........l........L.........E.....F.........S.......p.....P......p.......h...............s........h....p....p.....h........Y..........c......h......Y..........h......p.............l.....l...P......h.....h...G...p......l.......l...u...p.....s...t.......c..u......Y...p.....Y.......L...s..E.....S.....I...c.....t.......a.P.....s...p....c..p......L.....t......th...h.p...c.AG.F............p.....p..l......p........a......p......s.ho.uGlsAlHhGhK......................................................................................................................................... 0 414 798 1059 +4778 PF03981 Ubiq_cyt_C_chap Ubiquinol-cytochrome C chaperone Finn RD anon Pfam-B_5272 (release 7.2) Family \N 21.50 21.50 21.50 22.20 21.00 21.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.60 0.71 -4.33 63 1111 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 985 0 313 697 717 153.30 26 61.36 CHANGED pphsls-ThsucaphhsLHhallhhR.................l+sts...ttu.....ptlsQplh-thhpDh-ppl+chuls-hsl...sKph+ph....sptahGthhAYDpulst...sstsLusALh.RNlhpst................shp...phttlssYltpthttLsshsspslhsuth....ta ..................................................................chp..phlsuchphhshcshhshlR..................................LKu-t........hus.....hplpQpLl-pahccs.pchc.c..t..p..h.p-hht........stplpcL....pphh.hhht.sh..hAhss...........hsspL..sthLtpphhhuh........................................................................scshh.....chsuLsu.Vtsslss.lcsluusuhtssh...ssh..................................................... 0 109 180 253 +4779 PF02271 UCR_14kD Ubiquinol-cytochrome C reductase complex 14kD subunit Mian N, Bateman A anon Pfam-B_4192 (release 5.2) Family The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex [1]. This Pfam family represents the 14kD (or VI) subunit of the complex which is not directly involved in electron transfer, but has a role in assembly of the complex [2]. 20.70 20.70 21.20 29.60 20.60 19.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.12 0.72 -4.27 25 388 2009-01-15 18:05:59 2003-04-07 12:59:11 11 4 303 52 242 377 2 101.50 37 76.61 CHANGED lhppPtLs+lhhPlAphahN..huGYRphGL+hDDLlsE...EssslpcAL+RLPccEsYsRsaRItRAtQLSloHplLP+ccWTKsEEDssYLpPYlt-lcpEtpE+p- ..........................h..........hh.s.ltpha.hs......suG..ap..ch.GL+...........hDDll.E..........s.sVpcAl+RLP...cc....h.sRsaRlpRAhp....LShp+plLP+-p.WsK.-...-D..h......Y...LpP...hlp-lppEppE+p...................... 0 77 127 196 +4780 PF02320 UCR_hinge Ubiquinol-cytochrome C reductase hinge protein Mian N, Bateman A anon Pfam-B_11849 (release 5.2) Family The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex [1]. This Pfam family represents the 'hinge' protein of the complex which is thought to mediate formation of the cytochrome c1 and cytochrome c complex. 22.00 22.00 22.20 24.20 21.60 21.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.41 0.72 -4.03 24 379 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 288 52 237 373 5 64.70 37 55.86 CHANGED VDPhppLcEcCttpscCsphhpcappCscRVpucsp........scEsCspEhFDhhHClD+CVA..KLFspLK ................DPhpplcEcCtpptcC..sphhc+a-pCs-RVpscsp...............scEcCsEEhF-hhHshDcCsA..KLFppLK.................. 0 78 131 193 +4781 PF02921 UCR_TM Ubiquinol cytochrome reductase transmembrane region Griffiths-Jones SR anon ref [1] Family Each subunit of the cytochrome bc1 complex provides a single helix (this family) to make up the transmembrane region of the complex. 19.90 19.90 20.20 20.00 19.80 19.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.09 0.72 -3.74 30 409 2009-01-15 18:05:59 2003-04-07 12:59:11 9 10 305 58 228 405 5 61.80 36 24.87 CHANGED upTch..phPDFo......sYcccp.........css-sp+uFoYhhl.GuhulhsAsuAKssVpsFlSoMSAS....AD ....................psh..phPDFs......pYccpp..............pusss+....+uFoYhhs.Gu.....sulssAhuAKssVppFlsoMSASAD..... 1 79 127 188 +4782 PF02939 UcrQ UcrQ family Bateman A anon PSI-blast P13271 Family The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is a respiratory multienzyme complex [1]. This family represents the 9.5 kDa subunit of the complex. 25.20 25.20 26.70 26.40 25.00 24.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.49 0.72 -4.16 7 257 2012-10-01 20:10:32 2003-04-07 12:59:11 11 4 223 52 177 256 0 78.40 35 78.44 CHANGED hhtahshsK.+GIhoYulSPapQ+shsGhFcpul.NsFR.RspophLYhs.PhshhYhlashupcpNphL.pKsstc.hpc ..........................hht.hhsh.s..+..+tlhoYuLSPacQ+shsGhhppul.Ns.aR.Rh+.sphhhVsP...PhlhsYhlhsWupccpchhppKsstt..t................... 0 51 86 144 +4783 PF00984 UDPG_MGDP_dh UDP-glucose/GDP-mannose dehydrogenase family, central domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1105 (release 3.0) Domain The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate [2]. 20.60 20.60 20.90 21.50 20.50 19.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.83 0.72 -3.77 26 6968 2012-10-02 19:36:47 2003-04-07 12:59:11 14 24 3680 103 1988 5681 4792 94.60 34 22.22 CHANGED lpoAEhlKhssNsahAs+IoFhNElupIs-plGsDlpcVhculuhDsRls....ahpsG.GaGGuChs+DshsLsttupphshssp..hhppllpsNpsp ...............psAEhsKlhsNsahAh+IuahNElu.p.ls-p.............h..G..l...slpclhcussh....D.sR..ls...............hh.pP.G.....G.a.GGpClPKDshtLl.t.p.s.p.p...sh..p.....llpthhpsN...t............................... 0 678 1317 1697 +4784 PF03720 UDPG_MGDP_dh_C UDP-glucose/GDP-mannose dehydrogenase family, UDP binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1105 (release 3.0) Domain The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate [2]. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.49 0.72 -3.89 161 6858 2009-01-15 18:05:59 2003-04-07 12:59:11 10 20 3675 103 1921 5532 3768 98.00 25 23.17 CHANGED ulLGluFK..ssocDhR-SPulsllptLhpp...Gu.....p.lpsaDP..hstptthth.........................................tlph..htsh..tcslpssDslllhT.-a.spFcp.h...s.tthhphh....pss.llhDsR..slh.ct ....................hlhGLsFK..ss..oDD..h.R-.Ssuhslhcp..Ltp.t..Gs....................c..Vh.laDP..hspppthth................................................................................................slph.h.p.sh...tpsh.p..s.ADslll.ss.cappacs..h......................t.t..h..................tt.....hlhDs+shh................................................ 0 659 1277 1636 +4785 PF03721 UDPG_MGDP_dh_N UDP-glucose/GDP-mannose dehydrogenase family, NAD binding domain Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_1105 (release 3.0) Domain The UDP-glucose/GDP-mannose dehydrogenaseses are a small group of enzymes which possesses the ability to catalyse the NAD-dependent 2-fold oxidation of an alcohol to an acid without the release of an aldehyde intermediate [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.95 0.71 -4.91 27 7072 2012-10-10 17:06:42 2003-04-07 12:59:11 9 27 3696 103 2033 10113 8347 178.30 31 42.76 CHANGED h+IullGhGYVGLssusshuphG..hcVlslDIspp+lcplspGp.PIhEPGLpcllcpsh...pLphoschppslcpuDlhhIsVsTP.cp.....sptsDLpalpssscpluphlcp...tpllVh+STVPsGoscthhh.llpchstht.........hcatls.sPEFLpEGsAlpDhhpscRlllGspspsst......thhpcl .................................................+Isl.l.G.h.GYVG.L.ssu..s..h..hApt.s........hc...Vl..u....l...........D............l.......s....t...........p.......+....l.....c....t.........l.............s........p........G......p............s......I......h...........E.....s.........s.....l......p.....c..........l.........l.....p.....p.....s.......h..............................................h..................p......l....p...h....o....s......c.....h..p.....p......s...h......p......p.....u..D.hhh.....I..A..V...s.T..Phct..................stp.s.D......h.....s..h.....V.....p.u.....s.s.c.s..lu.p...h.hpp.................hsll.l.h.c.S.T..VP.V.Gss...c..h...h.t......lh.c.p.h.ss..............................php.....ls..a..sPEhlp..pGp..u.l...hD...hh.p.s.sRllsGhssppst.........hh.......................................................................................................................... 0 686 1345 1734 +4786 PF01704 UDPGP UTP--glucose-1-phosphate uridylyltransferase Bashton M, Bateman A anon Pfam-B_1634 (release 4.1) Family This family consists of UTP--glucose-1-phosphate uridylyltransferases, EC:2.7.7.9. Also known as UDP-glucose pyrophosphorylase (UDPGP) and Glucose-1-phosphate uridylyltransferase. UTP--glucose-1-phosphate uridylyltransferase catalyses the interconversion of MgUTP + glucose-1-phosphate and UDP-glucose + MgPPi [1]. UDP-glucose is an important intermediate in mammalian carbohydrate interconversion involved in various metabolic roles depending on tissue type [1]. In Dictyostelium (slime mold) mutants in this enzyme abort the development cycle [2]. Also within the family is UDP-N-acetylglucosamine Swiss:Q16222 or AGX1 [3] and two hypothetical proteins from Borrelia burgdorferi the lyme disease spirochaete Swiss:O51893 and Swiss:O51036. 19.70 19.70 19.70 19.80 19.20 19.60 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.43 0.70 -5.98 8 1651 2012-10-03 05:28:31 2003-04-07 12:59:11 13 13 879 45 747 1343 69 349.50 26 75.79 CHANGED EhsuFhpLapRYlsc..spspplcWDcIcpPs.--ll.....cY-pLpt.s....pphuplLsKLAVLKLNGGLGToMGCpuPKSlIEV............RsshTFLDLtVpQIEpLN+pY.s.ssVPLlLMNSasTcc-TpKllcK..Ysss+lc.IpTFpQSpaPRlsKDoLLPlPpts..sS.s...-tWYPPGHGDlFcSLhsSGhlDsLLAQGKEYlFVSNlDN.LGAoVDLpILNHlIp....ppsEYsMEVT-KTpADlKGGsLhoY-G+l+LLEluQVPpc+l-EFKShpK.....FKlFNTNNl...WlsL+AlKRLl-sspLcL-IIsN.Kpl...s...........cslcllQLETAhGuAIppFcsuhGl.pVPR.sRFLPVKoo.SDLhLlpSsLYsLc.sGolphsstR.t.ssPll+LGsEFpcVusahpRlsuIP.sllELDHLTVSGDVaFGpNloLK .....................................................................................................................................................................................l.................t.tt.h..s.....+huVlhlsGG...............GTph..G..h.ps.P..K..u.h..h..pl..............................t..p..t.hoh....h.p.....l.......s.p.p..l........t......p.......l......p........c........p..........h.......s.....s.......s.......l.P...h.hl.M..sS....Tcc...........s..Thp...hhc..c...........Yh....s...........h...p.hc...........lh.hFp.......Q.s...p...hPt.l.s.t.-u..hl.ltp.............ps.......s..........................t...............PsGpGs....lapuL........h........s..............u.......G.......h...........L....-c.......h.hpp..G..h.c...a.l.alt........sl.DN....Ls.ts....sD.h........hluah.....hp..............p..s....s-h.s.hc...ls.t+s...pss...tpsG..h..l.s...........p.....h...............-..........s.......c..........hp.............l...lEh...u.p.l..st..c........ht....th.p.s...pt.................hhh.F..N.s...s...Nl........ahsh...thlp..p.l........h..........p....t.......t..h..p...h..h..hh..s.K...p....l......c........................ssh.t.lh.....p.....h..t....h.......hh...hsl....h.a..........t.........p...s.h..........s...........l..pV.sR....pcF.PlKss................................................................................................................................................................................................................................................................................................................................................. 0 291 446 619 +4787 PF00201 UDPGT UDP-glucoronosyl and UDP-glucosyl transferase Finn RD anon Prosite Family \N 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.60 0.70 -5.95 14 8949 2012-10-03 16:42:30 2003-04-07 12:59:11 13 81 1315 25 4768 9465 106 279.00 18 62.32 CHANGED GKVLVaPh-hSHWhsh+sllccLlpRGHElsVLtsuuohhlc.tcsuslphcsassuhopc-lcs.hhphhpphhhthsp.sshhphhsthp....chushhtssCpplltNKpLhppLpESpFDVlhsDslhPCGtllAcLLpIPhVa.LRhsPshhhp+tstth.hPsSYVPhllosLSDpMTFh-RV+NMlhhLhhcahhphh.pp.asphhSElLsRPsTls-hhu+AshWLlRshash-aP+PlhPNhsFlGGlpC+PAKPLspEhEtalpuSGE+GlVVFSLGSMVSshsEE+AphIAsALupIPQpVLWRacG.....p+PssLusNT.....hLhKWlPQNDLLGHPpT+AFlTHuGusGlYEuIppGlPMVshPLFGDQhDNht+MpsKGAuVoLNhtpMoSpDLhNALKsVIND.sYKENhMpLSplH+DpPhcPLDhAVFWlEaVMRHKGA+HLRsAAHDLTWaQYHSLDVIGFLLusVsslsFlshKsChasaRKhltttp+s ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p........................................p.u..l..l.h....luh................G........o..h...h....t.............s......p......p...........h........p...t..l.....h.......t.....u....l.......t.....p...............s.....t...........t.....h.....l.......W.....h......h...p..s.............................................t..........h.......t......p......s..h..........................hl..h...p....W.....h.....P......Q..........p..........l..L................s..................H.......s.................t................s.t.s..Fl.o.H.s.G....h.......s.S....s.h.E.ul..h..t.G.V.P.h.l..s...................h................P....h.....h.....u......D.....Q..........h.N....s.t.........h............h...........t...........p...........h...................t........h..........u........h........................h...............................t........................h..........p.....t.....t.....t......l......t...t..l..t...t.l..h......t...t........................................................................................................................................................................................................................................................................................................................................................................tth................................................................................. 0 1059 2520 3921 +4788 PF03152 UFD1 Ubiquitin fusion degradation protein UFD1 Mifsud W anon Pfam-B_3272 (release 6.5) Family Post-translational ubiquitin-protein conjugates are recognised for degradation by the ubiquitin fusion degradation (UFD) pathway. Several proteins involved in this pathway have been identified [1]. This family includes UFD1, a 40kD protein that is essential for vegetative cell viability [1]. The human UFD1 gene is expressed at high levels during embryogenesis, especially in the eyes and in the inner ear primordia and is thought to be important in the determination of ectoderm-derived structures, including neural crest cells. In addition, this gene is deleted in the CATCH-22 (cardiac defects, abnormal facies, thymic hypoplasia, cleft palate and hypocalcaemia with deletions on chromosome 22) syndrome. This clinical syndrome is associated with a variety of developmental defects, all characterised by microdeletions on 22q11.2. Two such developmental defects are the DiGeorge syndrome OMIM:188400, and the velo-cardio- facial syndrome OMIM:145410. Several of the abnormalities associated with these conditions are thought to be due to defective neural crest cell differentiation [2]. 35.60 35.60 36.10 35.70 35.00 35.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.95 0.71 -5.13 37 579 2012-10-01 20:15:13 2003-04-07 12:59:11 9 16 320 2 392 551 43 168.30 40 42.35 CHANGED papppa+sYPluhh....c+p..plppGsKIlhPPSALscLsp....lpl..p.aPMlFcLp...Ns........psp+hTHsGVLEFlA-EGpsalPhW........................MMpsLtLpcGsh.....lplp.sssLPpGsalKlpPposcFLD..IosPKAV.LEssL.RNausLThGDhItIsYssppYtlcllEl.....KPss....AlolIETD.....lpVDFssP ....................................p.apt.apsashshh..............p+....phphGsK............l.hhPPSALcpLsp.................................Lpl....p..aPM..lFcLp.......Nt.........................ps.s..+hoHsGVLEFsA.-.E.Gp..saLPhW..............................................MMps....L.hLp....pGsh.............................lplc...ss.sLPhuoalKLQP..p.oss....FLD......I...os...PKAV..LEssL.R.N.F.us.LTpGDlls..l........s...Y......N...s........c.h.Y.c...l..pVhEs................KPss......ul...sll.ETD.....h...pVDFssP.............................. 0 134 226 333 +4789 PF02512 UK UK_protein; Virulence determinant Mian N, Bateman A anon Pfam-B_2106 (release 5.4) Family The UK protein is an African swine fever virus (ASFV) protein that is highly conserved amongst strains, and is an important viral virulence determinant for domestic pigs [1]. 25.00 25.00 176.40 36.90 20.00 17.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.18 0.72 -10.40 0.72 -3.60 7 23 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 9 0 0 19 0 88.40 80 101.25 CHANGED MSTHssSPKEKPVDhNNlSEKsuVVNNAP............................................................EKPAGANHIPEKSA.cMTSSEWIAEYWKGIKRGNDVPCCCPRKMTSADKKFSVFGKG.LMRShQKss ...........MSTHssSPKEKPVDMNsISEKSuVVNNAPEKPAGANHIPEKSA.EMTSSEWIAEYWKGIpRGNDVPCCCPRKMTSADKKFSVFGKG.LMRShQKss...... 0 0 0 0 +4790 PF03044 Herpes_UL16 UL16_UL94; Herpesvirus UL16/UL94 family Bateman A anon Pfam-B_4392 (release 6.4) Family This family groups together HSV-1 UL16 Swiss:P10200, HSV-6 ORF11R Swiss:P24442, EHV-1 46 Swiss:P28970, HCMV UL94 Swiss:P16800, EBV BGLF2 Swiss:P03221 and VZV 44 Swiss:P09293. UL16 protein may play a role in capsid maturation including DNA packaging/cleavage [1]. In immunofluorescence studies [2], UL16 was localised to the nucleus of infected cells in areas containing high concentrations of HSV capsid proteins. These nuclear compartments have been described previously as viral assemblons [3] and are distinct from compartments containing replicating DNA. Localisation within assemblons argues for a role of UL16 encoded protein in capsid assembly or maturation [2]. 25.00 25.00 42.80 42.80 18.60 18.30 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.20 0.70 -5.45 31 277 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 92 0 0 130 0 223.10 36 94.79 CHANGED Ms.........tsthphl+pFLpcEClWhh.lssssph+lYpussshSshh...........tss.sssspslplplhlh+P+t.....pphhlslhlNGthh.....sssphchhhsptl.t.sphhllhFuslsssshsh..lPs.ss.pss..ssttls.stlhpsup..hl.sp-shsssshu............h.lG.sGAWh...p.ustslYhahls.DLhshCPsh.phPSLu+llsthssCcstt....Cs.Cpspt.tHVsshsshssssss..ussC.ChsPCthhcus....lslpuppsLhsllF.-sphthplsthpttpsslssslscllsGhsssGcplssssssWpLlplsshhSRhhlhuC.sLK+ ...........thss.......phscRLLNDVsVWsp.VRsDshLpIhoAphsLppchp......ttspsss.ssssusLcIaLYLTKPKp.p.tp+ssHITslVNGs+A......hshLp+hssc+oPhG.uchashplu+sphsPsPhE.lPDPpsEP...........................................................................................................................................................................................................................................h....... 0 0 0 0 +4791 PF03252 Herpes_UL21 UL21; Herpesvirus UL21 Bateman A anon Pfam-B_3264 (release 6.5) Family The UL21 protein appears to be a dispensable component in herpesviruses [1]. 25.00 25.00 56.20 56.10 18.10 17.20 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.73 0.70 -6.03 14 85 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 38 0 0 74 0 424.90 37 97.05 CHANGED MEhpYpsslta..psVsFYlsssGsRAYFlCGGClhSVsR.....ttpsuElAKFGLsLRG.G.sD+slAsYVRoELtR.pG..hphuhPsscc-.....VFlDslulL..............s.ss...usEtDLCGth-lEVhDPtLA-ahVSLpsosGLllssuccpsp-+ll+LacsPslsNssSsFlYsPNpssFsLsQApLscLPsuLpsLVcGLFDsIPs..............sRpPlstps...ppT-VIlTupRAApsh............................hstttptstpst++sslSsFVQV+aI.......PRVhshWsspusss.................sstoLpcLhplhhtsDtllhcs........pthsGlspchspA..+sslsptstslFGptut.hsFlGt.....tshslossQ+FsLhQYllp+tchssCYshLccLscsYhss..pcssss..Dpt................sluDssNslhR-sshlGtsuEtllthshhpsth......................ssssupssps-ussLLclApsphth..s.........ssshppp+htcluthLstLYsGtshhsuAhphuchhGsuthlsshh-sphhoAF-cussu....p+hstYLtuLls .............M-htYtpshha..psVhFYlststpRAYFhpGGClhSlsR......tpssElAKFGLslRG.u.ss+slAsYVRoEL..tppG.....ht.shs.spp-.........VFlDslslL..............t.ss...ss-hDlhst.-lEVhD.hLsch.hsSL.sssslhlssstshsp-phlcLhthPslss.ssStFhYsss..sFsLspApLscLPtSLp.LspGLFDslPs..............sR.sLsscs...p+TslllTupRAAcsl............................hsptpsptt+tt++sslSsFVQl+hI.......P.RVhshWsspttss.....................s.slptL.hlhhhuDcllhcs........pt.sG.LpcE...scA..ppslhptshslaGptGt.hsFhGt......uhsLoshQ+FslhQYIlpR.chhsCYsslccLscpYspt..psts..ss.ssps................hlsDssNtlhRcshhhG.hsc.lh.h................................sut.s..-uthlhchhtt..................shs.t..h.h.luhhLshlYtstshhssAh..sphhsss..lhhh.ph..hoAF-...hh....t+hhthl..Lh.h........................... 0 0 0 0 +4792 PF01499 Herpes_UL25 UL25; Herpesvirus UL25 family Bateman A anon Pfam-B_700 (release 4.0) Family The herpesvirus UL25 gene product is a virion component involved in virus penetration [2] and capsid assembly. The product of the UL25 gene is required for packaging but not cleavage of replicated viral DNA [2]. This family includes a number of herpesvirus proteins: EHV-1 36, EBV BVRF1 Swiss:P03233, HCMV UL77 Swiss:P16726, ILTV ORF2 Swiss:P23987, and VZV gene 34 Swiss:P09287. 25.00 25.00 50.40 50.30 22.70 22.30 hmmbuild -o /dev/null HMM SEED 540 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.52 0.70 -6.20 15 156 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 90 1 0 140 0 513.80 29 94.47 CHANGED hthhhts..hh.scs+Nhltsshsshphsp.phutphRspt....phchsph++c..hlpAELDsLttpptstssplsscLcslEp......tltchsps.sslp...............................ssspusppssssps...hs.hstspsssh...............pVsIs.sDPslpacsshps-hlsslYsspupWsso....FGsWYtsLpcshhpcRhhP+sh+ussstssohStcLMssslssLpussphahuDppahuDpsAALCLlsAYh.....utpsus.hP....sohs-LLppLPphlchLss-lpstps..usssapFshsc.sppphhuPhs+tt+YspssFssHtlhshLh+tGVlsthsGt...........scssGsshsD.-lsh...slsshlhusslPhhsccQhhLRuGlsuIpuLlLlapLLpsusVasc+ss+phpLuuLls.sthssss.......ss.utpssshpt............spNFpFLhccYVlPhYptsspsslopLFPGLsuLslstpsttussstpt......hlslous+aQ.....slhcllstcLpp+cs.......stllsAHDuLtlphEcGLulLLppspPppu.hpshtpuQFsV ........s........hhh.scscN.hltss..hhhhhp.thshshcstt...pphthtps++p..hhtstL-sLtt....pttshst-lcp+lcslEp..........................plpph.sssh.....sshp................................................ps.stssptsssuts....tt.ssstssss.tsts................................pltIspNDPsl......pac.oshps-llshlYsspsshsso.....FGsWYtpLpcthhsc...PpshRhsphcssphSpphhssslsuLpssshhhsssp.h.u..pAsLCLhhhYt.............shtss..p.phP.....sohtpLlppLPphl....ctls....ppls....s.....p.......stsshtashpc.Pctpahs.Phsp...YptGshspHsllthLh+puVlsthPG...t..................t..st.phsssstsD.clsh....thsshhht.tps.hhhc-QphLRuslsslsuLlLlh+LLtsssVa.us+hsphhpLuslls.sssssts......sss.stptsh.ht..u...............................tspNFpFLhppYlsPhYttsPs.lplopLFPGLshLslstp..st.....p..shsssp+...............slshuusthQ..........tslhchhhhph.pp+pt.......hcllpsHDulhhpaEpGLGhLhp.shstpt.hpsht.stFNV...... 0 0 0 0 +4793 PF04496 Herpes_UL35 UL35; Herpesvirus UL35 family Finn RD anon Pfam-B_3981 (release 7.5) Family UL35 represents a true late gene which encodes a 12-kDa capsid protein [1]. 25.00 25.00 27.80 28.20 18.10 17.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -9.97 0.72 -3.80 11 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 30 0 0 30 0 99.00 38 74.64 CHANGED ssFDPssPsThos-slpshs.VsLlphLNss.tslpsusptsp.lssA+psllhGtusuhsclR+pHsspTlpRssMFApsDsuoWlRPolGLKRTFsPtlhp ......sFDPssPsThos-slcshh.V-llhtLNss.t.lpsspptsthhpsA+pslhhGtusuhsclRppHsspTlpRpsMFAssDsuoWlRPolGLKRTFsPtlh... 0 0 0 0 +4794 PF03970 Herpes_UL37_1 UL37; Herpesvirus UL37 tegument protein Finn RD anon DOMO_DM03155x; Family UL37 interacts with UL36, which is thought to be an important early step in tegumentation during virion morphogenesis in the cytoplasm [1]. 25.00 25.00 30.40 30.30 18.60 17.80 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.76 0.70 -5.59 9 82 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 33 0 0 78 0 264.90 37 25.28 CHANGED stspshshpsLLssLtuL........upsssstcshssthsucsRsuIupFhhSosplslppscshW+cLhp...slhclYppot.PEAAhLAcNhsGLlhWRlslpWscs..phh-phcpL+plshthTupEslphLopNsLRhSAshGPsshp.hlo-WhshFcssspsshshoscshhpuRt.h........ltphsAuLsppRFsLIYDhPFVQEGlRlluttssWlsPFslhhpphpssshT..PLTRsLFhluLlDpY.hssssssp.....Lp-hFs-slptls .....................................................................................................s.s.phshppLLssLssL........sppsssscs.u.htsuchRuuhutFLLSssslsstcsctpWc..sLhp...tlCtlapspthPEsAhLAENLPGLllaRLslshscs..pshcphchlpchlhshsus-s.ptLssssLRsuAshG.PVph+thls-WlspapslscsshshsPcs.hcAhttts........lspssAsLsp.tauLlashPFVQEGlRhLuhsushls.FsshhpplssuoLT..PLTRALFTLuLVDEY.hsssptss..s..LhttFtcsVptIc................. 0 0 0 0 +4795 PF02282 Herpes_UL42 UL42; DNA polymerase processivity factor (UL42) Bateman A, Mian N anon Pfam-B_5119 (release 5.2) Domain The DNA polymerase processivity factor (UL42) of herpes simplex virus forms a heterodimer with UL30 to create the viral DNA polymerase complex. UL42 functions to increase the processivity of polymerisation and makes little contribution to the catalytic activity of the polymerase. 20.80 20.80 20.80 25.70 20.40 19.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.76 0.71 -4.63 12 135 2012-10-02 11:47:48 2003-04-07 12:59:11 11 2 32 8 0 135 0 137.10 27 63.67 CHANGED sphsLsctpLsclhushts.ssshtssFhlhsccshhlpsshtupplhhslptpthsph.......ss.tlhLu.s-uppsLl..hthspt.psht............phshtlcspsshRpLlQplhhssst..........ht..hss.sss.hslhppp.sshshhh .....................splsLpcspLscllsuhus.tss....hhsohhlhsstshhlasohhuppVhhslcpsthSph.......ss..h..u.scuppsLl.shthspt.psst............phslslsspsshRsllQ+lhsssut..............hss....pssuhslhtpt.suhsshh...................... 1 0 0 0 +4796 PF03117 Herpes_UL49_1 UL49; UL49 family Mifsud W anon Pfam-B_2110 (release 6.5) Family Members of this family, found in several herpesviruses, include EBV BFRF2 Swiss:P14347 and other UL49 proteins (e.g. HCMVA UL49 Swiss:P16786, HSV6 U33 Swiss:P52441). There are eight conserved cysteine residues in this alignment, all lying towards the C-terminus. Their function is unknown. 25.00 25.00 53.80 52.70 18.40 17.70 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.04 0.70 -5.28 16 80 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 52 0 0 75 0 247.60 42 50.98 CHANGED hsspspuLlsAL+ppussVPCGNPh.sMs+sLshpsshpss+sllPlspp........................................................ssplspplhsplLuhslLusllslPlhshsht+shRtttsts.......hhsVlCt-CGHCLNhGKsKhps...hsFsPosh.FYsRDpKEKphhhCssoGRlYCShCGSpplpshplhEhs...h.hGh..shl..RAVlusNAAhslpssspphDlllPChuostsCtu.slL+clolpcLLhLT..upssphhCt+Cps .......s..phpuLluuLRccuupVPCGNPlasMs+thlppaCtsssRaLlPlpshsht.tsp.......sts.........................................h..p.s+lshaulussLRsGLluSVI-LPlhChs+hKCpRahcsts........lhAVVCppCGHCLNhGK-KLcsp..psFsLNSh.FYYRD+QEKuVlassps-hlHCSLCGSpplspp+lYElsptshhGt...hpVp..W+AVlG.NAACulhstphthDlllPC..usRoC.usVllRtloV.+LLpLT..SHupshhCt+CQ.... 0 0 0 0 +4797 PF03121 Herpes_UL52 UL52_UL70; Herpesviridae UL52/UL70 DNA primase Mifsud W anon Pfam-B_203 (release 6.5) Family Herpes simplex virus type 1 DNA replication in host cells is known to be mediated by seven viral-encoded proteins, three of which form a heterotrimeric DNA helicase-primase complex. This complex consists of UL5, UL8, and UL52 subunits. Heterodimers consisting of UL5 and UL52 have been shown to retain both helicase and primase activities. Nevertheless, UL8 is still essential for replication: though it lacks any DNA binding or catalytic activities, it is involved in the transport of UL5-UL52 and it also interacts with other replication proteins. The molecular mechanisms of the UL5-UL52 catalytic activities are not known. While UL5 is associated with DNA helicase activity and UL52 with DNA primase activity, the helicase activity requires the interaction of UL5 and UL52 [see 2,3]. It is not known if the primase activity can be maintained by UL52 alone. The region encompassed by residues 610-636 of HSV1 UL52 Swiss:P10236 is thought to contain a divalent metal cation binding motif. Indeed, this region contains several aspartate and glutamate residues that might be involved in divalent cation binding. The biological significance of UL52-UL8 interaction is not known. Yeast two-hybrid analysis together with immunoprecipitation experiments have shown that the HSV1 UL52 region between residues 366-914 is essential for this interaction, while the first 349 N-terminal residues are dispensable [2]. This family also includes protein UL70 from cytomegalovirus (CMV, a subgroup of the Herpesviridae) strains (e.g. Swiss:P17149), which, by analogy with UL52, is thought to have DNA primase activity. Indeed, CMV strains also possess a DNA helicase-primase complex, the other subunits being protein UL105 (with known similarity to HSV1 UL5) and protein UL102. 20.80 20.80 21.00 20.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.84 0.72 -4.24 38 329 2012-10-02 15:26:12 2003-04-07 12:59:11 10 8 214 0 89 310 49 69.60 23 8.88 CHANGED shtpsshhllplp.........+spsFtClphpHppp.pps...........splalslpssp.ttlhhshhppCF........usK.........CssNphpsthoshss ..............h..pthhlhplt.........p.tshhChpht+t+pups..............lalslchpp.....hshhQ+Ca.......ssc.........Cpspthps.h.....s.................... 0 37 49 70 +4798 PF03049 Herpes_UL79 UL79; UL79 family Bateman A anon Pfam-B_2433 (release 6.4) Family Members of this family are functionally uncharacterised proteins from herpesviruses. This family groups together HSV-6 U52 Swiss:P52469, HVS-1 18 Swiss:Q01003 and HCMV UL79 Swiss:P16752. 20.50 20.50 20.70 21.70 19.90 16.20 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.43 0.70 -5.31 19 61 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 50 0 0 55 0 246.10 36 92.22 CHANGED hlG+alh.pspshoshlhplhhKllpGpsLsohp.-EL+hl+LlhsphashGLplhLLREslsNsGspDsslLsRKVPsEaWphlY-shcphssssc...hLhsEsptupLhh+Ls.pssslhphlspalhpchGL..tlplssp.lpDGNhLFsLGoVhspRLlhlhtFhhhaWGppphEPhVRhhspKlahhYLIlsG+Lplptshap.psssphsGlhphlhpDhhuapG....slspss.............h.p.tp.hDh..lhlhsssl ...hG+al..phsshsphlhpIhpKhhpGpsLsoh+.EEL+ll+LlhshhaphGLpshLLREshsNsGlsDssVLuRKlPspaWhhlYctLcphssstc...hlhsEspuApL.h+Ls.ps.th.hthlscalhcchGL..slslsp-hhpDGNlLFsLGolhsaRLhhlstFhhtaWGpppaEPhlRhhspKhahhYLIhsG+lpls.ssap.ppopc.sGlhshIhcDh+sFtG....slstps.phhp.pp......................... 0 0 0 0 +4799 PF03043 Herpes_UL87 UL87; Herpesvirus UL87 family Bateman A anon Pfam-B_1736 (release 6.4) Family Members of this family are functionally uncharacterised. This family groups together EBV BcRF1 Swiss:P25215, HSV-6 U58 Swiss:P24437, HVS-1 24 Swiss:Q01007 and HCMV UL87 Swiss:P16730. The proteins range from 575 to 950 amino acids in length. 25.00 25.00 25.40 25.40 24.10 24.70 hmmbuild -o /dev/null HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.86 0.70 -6.12 19 90 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 54 0 0 80 0 494.10 43 72.04 CHANGED tLht.hpssp..........hshhP.hssspsththtcF.lLphhptW.s....Nsshtchtpplhpslpppsthlhhhh+hssppslplpshslhcapptl.hhh.p.lphpppK+tst.tshthslsp........ppthltasss..sslaRshhsh....................................................................................................usucplspspsshp.ssptss.......................sRthVshhcRlpaAP+ct............................................shshththp....................phpspsttp.shtppFsslsplSls.sF+VNlFNTNhVINTKlsspptss.....pslhslP+.LTpNFVh+KaohKEPuFTVSlFaSsDhs.pssAINlNIsGshlpFLaAhush+CalPIcslF.PAuluNWNSTLDLHGLENQslVRssR+cVFWTTNFPSslSspcGhNVSWFKAATAsISKlpGpsLpsplh+Els.IlshppAplshsKNplFThLEpRNphQIQsLHKRFLEsLatpsuhLRLs........s+slh+lsppGlFDFSK+hlAHoKsKH-CAL..lGh+hsNslPKllspsKKhRLDcLGRNANaLohh+psspp...hsth+tpll++ll+pLu.....l+p+ ...............................................................................................................h.....ht.p..........hshhP...s.hs..t.hcF.llph.p.Whs....ptshtchhtplhtphpp..thlhhhh+.stppshtlpthsh.capptl.hh....p.lphtpp+p..............t.hcshlhh........pph.lhassp..sslaRslhhh....................................................................................................uhst.hh.sps..t...........................................spthlthhpRl.as.ht.............................................shs....hth........t............pt.s.sttp..hs.pFs.hptlols.phtVNsFNTNhVINhKhshpphst.....hph.plP+.hTpsFVhhKaohKEPuhTVSsFhSsshs.hhsulNlNIpGsh.cFLauhush+halsIcphF.PAslsN.NSoLDlHGLEsQsllRstRpcVaWTTNFPshlSppsslNVGWFKAATAIlP+VSGssLEslLLKELshIpshcplslDas.LHRlFThLEpRNsYQIPFLsKQhl..LFlRsshL+LpGhtpc..lc+hlhcAspcGlFDaSKphhuHTK.KHpCAL..lG.RhsNslPKllspsKKh+LDcLGRNANhLohh+phttt...hsth+hpllhcllttLt............. 0 0 0 0 +4800 PF03048 Herpes_UL92 2111; UL92; UL92 family Mifsud W anon Pfam-B_2111 (release 6.4) Family Members of this family, found in several herpesviruses, include EBV BDLF4 Swiss:P03223, HCMV UL92 Swiss:P16798, HHV8 31 Swiss:P88920, HSV6 U63 Swiss:P24440. Their function is unknown. The N terminus of this protein contains 6 conserved cysteines and histidines that might form a zinc binding domain (A Bateman pers. obs.). 20.50 20.50 20.50 20.80 20.00 20.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.51 0.71 -4.73 18 82 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 66 0 10 71 1 162.60 28 70.67 CHANGED pCphpplpsslTphpslsslYhCspCtcYHVCDGu.p-CsllsTtEGhVCthTGpshssslpsssthsspshpcsp...tphp.p.hhNllpslhpclhpYhppsss.hsclppplh.scGpLpccltslIphTFscChplhsshpps..hsllsSlYIHlIISlYSs+TlYsshlFKsT+NK+aDsllKpMRppWMssL ..........Cphpplpsshs.p.hpltslYhChpCtphHlC.Dts.ppCslls.T.tEu.hVCshTGhshtshhsssp.h.h.pshppst....p.......shltslhp.lhpah.p.s..hstlhpplh.ptsthp.pltp.l.hTFtpshp.hpth..pth..hsllsphalplIIulauptThYsshlhKso+pK+.DslhKphR.tahss.s................... 1 2 8 9 +4801 PF03038 Herpes_UL95 UL95; UL95 family Mifsud W anon Pfam-B_2060 (release 6.4) Family Members of this family, found in several herpesviruses, include EBV BGLF3 Swiss:P03220 and other UL95 proteins (e.g. HCMV UL95 Swiss:P16801, HVS-1 34 Swiss:Q01023, HSV6 U67 Swiss:P24444). Their function is unknown. 21.60 21.60 22.30 23.20 18.70 21.50 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.11 0.70 -5.57 20 82 2009-09-11 15:08:21 2003-04-07 12:59:11 9 2 52 0 2 72 0 333.20 36 87.25 CHANGED MhsLsphp..scscsphsc+YpcuVcLALshCEus.P.sQFKLIETPhsSFLLVTNVlPc-ssshsstss..............................pthchpslphs+hpthptlhshphpsstpptsssss.................hhp.psphhtssYllYcppphphALshNKssllppsLchlssPspWsappssDPLslLWLLFhGP+SaCpcssChhtc+hGp..PGPlLLPPhhYcPspDlpoFhshsppYVhshYpch..................................................us.h.shshsPFchsRl+cslppl...schsspslhl....S+pCLLCsLY+QNph....uppss.ssshu.hIILsstutphhso.....htsp+pssousslLaPsYslssLlsslstsssu ..................................................................................Mhtlst.p....schsst.shRaccuVphALssCEus.P.-pF+LIETP.psFLLVTNllPc-ps.h.shss.................................pt.phpphphsc.sths..h..hcppsshpthsssst..............ss.t.hs.psphhhssYllYpKpphchuLo.NKsphlptsLc.lhsPshhsapsspDs.slLWLLasGP+SaCtcssChupc+tGp..shPsLLP.hhYcPspDh.oahshsphYVashYcshch..................................................u..p.h.ph.hhPhshsRl+cslptl...tchsscplsh.....SRsCLLCsLYpQNcl....Appcs.sssauPllI.sstushp.lo......hoahhPupssssLaPsYclupLlssls.sptu......... 0 2 2 2 +4802 PF04817 Umbravirus_LDM Umbravirus long distance movement (LDM) family Finn RD anon Pfam-B_5103 (release 7.6) Family The long distance movement protein of Umbraviruses mediates the movement of viral RNA through the phloem of infected plants [1]. 25.00 25.00 33.60 33.10 21.00 20.50 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.44 0.70 -5.22 4 24 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 8 0 0 24 0 226.80 42 93.30 CHANGED MoSlINV.ssuKSppscGsstSSVRtGcpptsPtsKPtusHssSR+sKGssHPAsTsKcsppslpuspAsssHpcHtGssl.tEusGuVHssRstRRuRRuGuMcsRQ.TsQPppRtscsch.sERRAplDGLLPPLLDTlsGph.GsAtlLhaCltAl+RpLRp+h.cPlQsspcVAuopGcsssQLs-puspsutsLssDGtGRAstus..psl.pGusV.pVCssCsts ...............MoolINV.ssscu+psRGsspsSlRtGcpctAttsKPtsspsPsRRppGGsaPusss+cspcshptspssssHppHtGoslhREusGGVHssRstRptRRuushuPRQpsssP+QRhApsclssERRAplDulLsPLLDTlctpspGsAslLLaClsAlRRELRc+h.cPVQPsHsVuuoptppusQL.-puspssssLpssGcGpustss.ppslpptssV.pVCssCsh.s.............................. 0 0 0 0 +4803 PF00021 UPAR_LY6 u-PAR/Ly-6 domain Sonnhammer ELL, Bateman A anon Prosite Domain This extracellular disulphide bond rich domain is related to Pfam:PF00087. 17.70 10.00 17.70 10.20 17.60 9.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -10.76 0.72 -3.56 40 1221 2012-10-03 01:43:02 2003-04-07 12:59:11 16 8 118 68 543 1327 0 74.40 20 55.41 CHANGED CasCht..tspsCpss.....sCstspsh....Chsspsth..ss...psphhh+sChp.stC.........ph.shthphstsslph...........sC.CppsLCN ......................................ChsC...ht....s...sts......Cpst.......hpCss.s..psh....................Chs.....s..p....s.....p...h.t..........s..t........t.s.ph.hh+..uC..s.......stC..............p........t.h...s.......h...p...h...ph..................pC..Cp.tshCN............................................. 6 39 65 185 +4804 PF00919 UPF0004 Uncharacterized protein family UPF0004 Bateman A, Moxon SJ anon Pfam-B_1257 (release 3.0) Family This family is the N terminal half of the Prosite family. The C-terminal half has been shown to be related to MiaB proteins [1,2]. This domain is a nearly always found in conjunction with Pfam:PF04055 and Pfam:PF01938 although its function is uncertain. 21.30 21.30 21.70 22.20 20.90 21.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.67 0.72 -4.16 160 8275 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 3943 0 2232 6265 3365 97.10 32 21.38 CHANGED +lhlhThGCphNhhDSEhhtuhLppt.Gaphssp...c.cADllllNTCulppsAcpcshppltchpchc............st.............................thls...........VsGChuQpts..cclhcths..hDhllG .....................lhlhThGCphNhh...DSEthhshL..p.s.t........G......Y....p......h........s.........s...s..........c.....c............A.....D..........ll..llNTCulp-pAppcshptlsc..htphp................t....................................................................................................................................hhl.s................VsGCh.up.pps...c..p.l.h.c.p.h.s..VDhlhG...................................................................................................... 0 840 1539 1927 +4805 PF03649 UPF0014 Uncharacterised protein family (UPF0014) Bateman A anon SWISS-PROT Family \N 20.50 20.50 20.70 22.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.77 0.70 -5.22 3 1867 2009-09-14 12:52:49 2003-04-07 12:59:11 8 5 1800 0 366 1096 131 235.30 37 93.21 CHANGED hpHsllpL.ht-LuaALhLVllAILIua+EKLuLEKcILWuuGRAIIQLIIVGYVLtYIFSlDsssusLLMlulMlTlAAalAp+RINh+uKohlu.aLaITlGAoThISL...AVLIIssslcFpPhYVIPLsGMIlGNTMNoluLAh-+LustVpSEpcpIps+LuLGATPtQAlAsaIRsAIRAALIPTVNpsKoVGLVSLPGMMoGhlLAGuDPlpAucYQIlIMFMILSTAoLSTIllCYLsYRcaaNu .......................................s............sLsl.uh..h.L.....l.l.l.s...l..h...lS.h...t...p...+L......ultK-llhushRAll....QLlll.Gal.Lp.Yl.F.p..l..s..s.h.hl.s..l.Lh.l..lhh.hh.sA.u..a..s..s.....tpR...u.....p.....h.....h.....h....+...s...h...h...............h...l...u...l.s..l..s..s...u.............l..s...L......sl...l....l....l..s..s.sh.....a.tP.t..tlIPIuGMlhGNuMsA....luLshppLtpphpp.c....ppplpptLuLGATs+pAutshl.Rcul+sullPTlDSs+Tl.GLVSLPG..MMoGlIluGssPlpAI+YQIhVh...Fhlh.uss...ul.o..sll.A.s.hLsY+paas........................ 0 125 227 309 +4806 PF01169 UPF0016 Uncharacterized protein family UPF0016 Finn RD, Bateman A anon Prosite Family This family contains integral membrane proteins of unknown function. Most members of the family contain two copies of a region that contains an EXGD motif. Each of these regions contains three predicted transmembrane regions. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.55 0.72 -3.84 145 2676 2012-10-03 02:02:08 2003-04-07 12:59:11 14 8 1140 0 1249 2385 333 76.00 30 60.26 CHANGED a....h..sohshlhlAElGDKTQlsslsLAuth.tpsh.sVhhGsslAhhlssslulhh.Gphl.......sphls.p..hlphluullFlhhG ..................htohshhhlAElGDKTQlsolhLAuca..........s.....h...sVhhGshluhhlsss.....l...ulhh.G.ph.l...........ushl..s.p......hlphluullFlhhu............................... 0 407 768 1060 +4807 PF01170 UPF0020 Putative RNA methylase family UPF0020 Finn RD, Bateman A anon Prosite Domain This domain is probably a methylase. It is associated with the THUMP domain that also occurs with RNA modification domains [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.09 0.71 -4.70 16 3872 2012-10-10 17:06:42 2003-04-07 12:59:11 13 26 3399 14 1064 14743 3401 195.00 31 40.56 CHANGED RsaRsastPusLpssLAtAhlpLushpsspsllDPhCGoGTlhIEAALhutphh...........................................hGsDhct+hlpuA+hNsppsGlschlphhphcsspLp...hss....ps-sllosPPYGh+lupptsl.ppLYpphhcph+chhps...hhshhhspppshppshppt..shcththhplthushpht ...............................................................................................................................RGYR....t.p.Gt...APl+.E.s.LAA...ul.l........h....h...........o........s....W..........p............s..........s...........p.........s.........l...lDPhC...GS...G..T...l..h...I...E......A..A...h.h.u.t..shAP.GhpRthhh.ph..............................................................................................................ttttttt.p........php.....hh..G....s.....D....h...D..s......c...h....l.......c.......h.........A......+............p........N.........A.........c.......p.........A............G.......l...........s.......c...........h..........I.............p..........a........p.....t....h......c...l....p....p...lp.....................spt.................thG...s.....l.l...s....N..P....P...Y...........G.....E.......R....l.................s.................s........c..........t.......t........l...................t.L........Y......p.....t.h....G...p.h..h.cp..hts..........hphh.lh..o..u..p......p..h....p.h.hthp......us+ph+hhNG.lcs.......................................................................................................................... 0 349 629 868 +4808 PF01171 ATP_bind_3 UPF0021;ATP_bind3; PP-loop family Finn RD, Bateman A, Yeats C anon Prosite Family This family of proteins belongs to the PP-loop superfamily [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.14 0.71 -4.79 33 7781 2012-10-02 18:00:56 2003-04-07 12:59:11 15 37 4816 13 2118 7133 3921 174.70 26 46.74 CHANGED phllAlSGGsDShsLLhlLtchttp..h..........plsslHlcHslR.p.pucp-tpalpphCpphs..lslhltphsh......st.....tpslEptARchRYchhpchhtppshphllhAHHtDDQhEThlhpLhRGsuh.....tuLsu..hts.....tp.h........ts.hpllRPLL.shs+p-l.pascppplsahcDpSNts.pYp.RNplRpp ................................................................lhlulS.G.GhDS..h.s.LLp..lL...t...p..h....p..p...p.h.sh...................plts..l...p...ls....a...s...h.....p.....t........t.....u........t.t..........t...p..hl..p.p..h.s.p....p.h.s.............l..s.....h...t.l.hp..hsh............................t..t.....s.p.s.h.p....s.....h.......u......R........p.............h............R..................h...........t...........h...........h....t.....c....h.....s..............p...........c..........h......s........h.....s...............t.............l....shu.H......H..t.....D.......D.th....E.....T.h.lh.....s..l.h..+.G..u.th............t.s.l.t....u...hs...........t.p..................................................................ts...hhl...lR...PL..l.........hh........p.c.p......-..l....h....p....a............sp..........t.....p........tl.........h..h...D...s....s....t....h...R............................................................................................................................. 1 734 1357 1799 +4809 PF01172 SBDS UPF0023; Shwachman-Bodian-Diamond syndrome (SBDS) protein Finn RD, Bateman A, Moxon SJ, Mistry J, Wood V anon Prosite Family This family is highly conserved in species ranging from archaea to vertebrates and plants. The family contains several Shwachman-Bodian-Diamond syndrome (SBDS) proteins from both mouse and humans. Shwachman-Diamond syndrome is an autosomal recessive disorder with clinical features that include pancreatic exocrine insufficiency, haematological dysfunction and skeletal abnormalities. It is characterised by bone marrow failure and leukemia predisposition. Members of this family play a role in RNA metabolism [2] [3]. In yeast these proteins have been shown to be critical for the release and recycling of the nucleolar shuttling factor Tif6 from pre-60S ribosomes, a key step in 60S maturation and translational activation of ribosomes [4]. This data links defective late 60S subunit maturation to an inherited bone marrow failure syndrome associated with leukemia predisposition [4]. 24.40 24.40 25.50 27.00 20.00 24.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.81 0.72 -4.19 103 652 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 473 7 444 614 94 91.60 36 38.64 CHANGED ssssll+h+.p...tGc+..FElllhsspshpa+.........pGp..........phsls-VltspplFpss...s+Gpp.....AucppLppsF.......G.Ts-hpclhcpILc+GElQloscpR+ph .............................ssslVRhK...p...tGc+....FEl..hs..a....s...c...l...hpaR.........sst.................-tcL--VLps.ppVFtss....uKGpt.......................Ascp-LppsF..................................G.Tsc.p-Ihpp..............ILcKGE.lQlopcpRpt...................................... 0 143 255 370 +4810 PF01142 TruD tRNA pseudouridine synthase D (TruD) Finn RD, Bateman A, Moxon SJ anon Prosite Family TruD is responsible for synthesis of pseudouridine from uracil-13 in transfer RNAs [1]. The structure of TruD reveals an overall V-shaped molecule which contains an RNA-binding cleft [2]. 19.20 19.20 19.80 19.20 19.00 19.00 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.33 0.70 -5.79 69 2142 2009-01-15 18:05:59 2003-04-07 12:59:11 13 12 1702 7 755 1714 284 327.60 28 79.21 CHANGED M....................sth.h..htstPtssupl+sp......P-DFhVcEhh..s.hpssG.cG-H.lhlplcKpuhsTttlscplA+hhslst+cluaAGhKDR+AlTpQahSl...hsspp......s.ch.........t......slpllphs.RHs+KL+hGsLtGN+FpIhl...R.sls......st...lppplptlttt.GVPNYFG.QRFGps.usNhhhupthhpu................thp.......tc+....................................................ph+uhalSAhpSaLFNpllSpRlc......ttshspslsGDhhhhts.........stshhhspt....tthpt+ltptclp.ouPLhGp.sthhs.pupst.phEpplLsptsh...hhp...thtphth...cstRRslhlhs.pshp......h...sp.....slplpFtLPsGuYATslLREl ...................................................................................................................................h..................t....t.shhpt..........spDF.V.E...............................h...............t...................s............u....p..........G.....-..a..lhlplhKpshsTh..sh....phLu+....hh....t.l.....p.....+.......p.lu..aAGhKD++AlTpQahshp...hsttp............hssh................................................c....shplh.phs..h+pcKL+lGsLpGNt...Fplh.L.............R..pl.......s............................ts................................lc....pt....L........p...........pl................p.......p................t....G...h.sNYFG.QRFG..t......t...........s..s..........s.........h.......p...........huh..phhps......................................ph.................hppc..........................................................................................................................................................................s..h..Rphals..AhpS...hlFNphlupRlc.................phshp.ps....ltGDhl.hts...................................................ptph.ah.s.....p.t.........tt...ppchtt....t.c...l.................hs...us.L....Gt...s.hh.......s....ptt...sh..th-pthl...stp...s..........ht..........hhh.p....p.h.........p...usR...Rhhhhhs..p.plp.ap..h.....pt...............................................slplpFhLstGuaATsllREl................................................................................................................................. 0 257 430 625 +4811 PF01139 RtcB UPF0027; tRNA-splicing ligase RtcB Finn RD, Bateman A, Eberhardt R anon Prosite Family This family of RNA ligases (EC:6.5.1.3) join 2',3'-cyclic phosphate and 5'-OH ends. They catalyse the splicing of tRNA and may also participate in tRNA repair and recovery from stress-induced RNA damage [1-3]. 20.00 20.00 20.00 20.20 17.80 19.90 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.67 0.70 -5.48 136 2812 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 1686 6 719 2149 307 317.50 32 93.53 CHANGED hpssshlas..ptthl-..................stulcQltslA.sL.....Psl..hc..shsMPDsHhGhG.hsIGuVhuhc.....uhlsPuuVGhDIsCGhphlpT.sLphp-l.....p.s...ph...pcLhstlhcslPsG...........hs......ttst...h..pthpphhtpthphsh.cpth.tht.................................s.ptlsppstp+ut...........t........QLGTLGuGNHFlElp.........h......th.t......h.slpc........sp....lhlhlHoGSRGlGppluscalchhpp....th....tcht...h..pls...........................................................Dc.pLAhhshsoptupcYlpAMshAtsaAhsNRphltchltc........................sh...pphh.........hthphl............h-lsHNhs...........chE..............pH....................tpplhVHRKGATRAh................hGp.llIPGSMGssSYlltGpttutt.......oasSssHGAGRhhSRspA++...................phshccltcphtt.......lpspspps................lh-EuPtAYKcl-pVlpsh..sslschVs+L+PlsslKG ........................................................hhs............l-...................t.uhpQl....hssA....ph........Phh..hp.....hhsMPDh.H....G...h...G....hsIGushshp.................shl.P..uhVGhDIsC..Ghthhps.sl.ttch.......t........p.h.tp.hhptl....ph...hs.u........................................................................................................................................h.pp.h..........................tplGolGsGNHFhElp.................................................lp..t...sp................lhlhlHoGSRGlGptlsp....ahthht.................................................................................................................................................................thshh.....to.....h...pp...Yhtthth...A.taA.hNRphhht.hhp..........................th..tt..............h.....................hsstHNhs.............phc...........................h......................pphhlpRKGAs..u.................................Gt.hl.IPGoMGshSalltGh.ss..................h..hSssHGAGRhhuRsp.s+p.....................h.sh.pp...pt.htt.......l.spp.t.................hh-Eh..........P.AYKs..lctVhts.......tslhphhhpl+.lhshK..................................................................... 0 285 480 624 +4812 PF01205 UPF0029 Uncharacterized protein family UPF0029 Finn RD, Bateman A anon Prosite Family \N 20.50 20.50 20.70 21.10 20.10 20.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.35 0.72 -4.37 179 4099 2012-10-03 01:04:38 2003-04-07 12:59:11 14 14 3898 2 953 2747 154 107.50 44 49.97 CHANGED lK+S+Fluhht.lssp.p......-spphlpplcppa.c....AsH.psaAahl.....s.tssp....................................h.chsDDGEPuGTAGpPhL.plLpt....p.......sltslslVVsRYFGGl+LGsGGLlRAYssusppulp ..................................................................lKKSRFIshl.t...lps..-.........-A+sa..lppl+..p..c..+hc.........AsH.sChAahl.....G.t.p.s..p...................................................................................................h.choDDGEPuG.T.AGhPhL.shLpt....p............slssls.....lVVs...R.....YFGGI..+LGsGGLlRAYususspul.t..................... 0 306 605 816 +4813 PF01256 Carb_kinase UPF0031; carb_kinase; Carbohydrate kinase Finn RD, Bateman A, Yeats C anon Prosite Family This family is related to Pfam:PF02110 and Pfam:PF00294 implying that it also is a carbohydrate kinase. (personal obs Yeats C). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.45 0.70 -5.09 9 4484 2012-10-03 06:25:16 2003-04-07 12:59:11 12 18 4216 36 1246 6104 1631 239.10 30 56.40 CHANGED shlluGspsasGAslhuuhuAhphG..sGllpVts..sshsslpshhPclhscsh............st.h....thhpphcslllGPGLGps.th......hlphlLspppP...lllDADuLthls.p........sthpssslLTPHssEFtRLsssss..........tss+.phspphupphsssllLKGstsllss...suslhhspsGsushApGGpGDVLuGhIuuhLupst.........sshcAshsusahHutAuphhspctuh..shhsspltchlsp ............................................................................................................................................h.hllGG.....s.....t....s....hsGAs......h......h.....u....u......t....A..A....l......+....u....G..................AG.L...V..................p....l..h.s.......t.......p.....s.....h....s....s....l....t..s...t.....h..P..E...h..M.s.tshp.............................................t.h........t........h....p..p.......s....s..s....l.l....l...G....P....G....L....G....p.....s....p.h...s.tp................................hl.p...t...l.....h......p.....p....t....p...P.....................lllD.............A.....DAL..s..ll.upp.......................................thpp.p..h.....l.....lT...P.....HssEhs.R.L.h.u.h.sh.t........................................hpp..s...+...h.t....s...s....p....p..h...s..p......c...h.....s.....s...s......l...V.........L.........K.........G........s........s............T..........l...........l........s..s........................ss.....p.......h..h....l.......s....s....s....G........s.........s.........u...........h........A........o...G.........G.GD.V....L.uG.hIu..u....l...l.u.Q.th........................................ss...h-.A....u..t..s....u.s..h....lHu...hA..uch.hs.t.p...h.....hhsscl.t........................................................................................ 0 429 805 1066 +4814 PF00902 TatC UPF0032; Sec-independent protein translocase protein (TatC) Bateman A, Moxon SJ anon Pfam-B_1212 (release 3.0) Family The bacterial Tat system has a remarkable ability to transport folded proteins even enzyme complexes across the cytoplasmic membrane. It is structurally and mechanistically similar to the Delta pH-driven thylakoidal protein import pathway. A functional Tat system or Delta pH-dependent pathway requires three integral membrane proteins: TatA/Tha4, TatB/Hcf106 and TatC/cpTatC. The TatC protein is essential for the function of both pathways. It might be involved in twin-arginine signal peptide recognition, protein translocation and proton translocation. Sequence analysis predicts that TatC contains six transmembrane helices (TMHs), and experimental data confirmed that N- and C-termini of TatC or cpTatC are exposed to the cytoplasmic or stromal face of the membrane. The cytoplasmic N-terminus and the first cytoplasmic loop region of the Escherichia coli TatC protein are essential for protein export. At least two TatC molecules co-exist within each Tat translocon [1]. 25.10 25.10 25.20 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.57 0.70 -4.94 144 3911 2009-01-15 18:05:59 2003-04-07 12:59:11 13 4 3551 0 920 2811 2612 213.10 33 80.99 CHANGED hpH.LpELRpRllhsllulllshh.l....sa.ha.........................spt...........lhp.hl.tpPhtthh........................................................................................................pllhhs..st-sFh.shl+luhhhulhluhPhllaQlWtFlsP.GL.....acpE++hhhhhlhhuslLFhhGshFuYallhPhshpFhhs....ass......................................th...h..........pshhslspYlshlhphhlsFGlsFpl.PllhhhLsp..hGllssphltctR.+asllhhhllAAllTP.PDshophhlulP ...................................................................h.pHLhELRpRLlhsllulllhhl.shh.hF.................................spp.........................lhp..hl..stPl.h.p.t.h.t.t.........................................................................................................................................................................................................................phlsos..lspsFh.s.lKlshhsulhluhPll.lYQlWsFluP..GL.....Ycp....E.+...+h....hh...shlh..so...slLFhhGhsFuYalVhPhshp.Fhhs........hus....................................................................................................................ps.s........pshhslssYlsFlhpl.hh.uF....GlsFE.l...Pl...llh.hLsh..hGllosppLp.ct.R...+asllshFllu.ullTP.PDlhSQhlLAlP.............................. 0 296 615 790 +4815 PF01206 TusA UPF0033; SirA; Sulfurtransferase TusA Finn RD, Bateman A, Eberhardt R anon Prosite Family This family includes the TusA sulfurtransferases [1]. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.07 0.72 -4.37 195 5303 2012-10-01 20:42:06 2003-04-07 12:59:11 12 41 2909 8 1162 3036 759 69.70 28 50.37 CHANGED tplDspGhtCPhPllcs+cslcph.psG.....phl...p.....llsscssuh.pslspascppGt.pllph..pp...........ps..sta.phhl.cK ............pLDshGhtCPhPllts+cslp....ph.psG......-hL...p..................llsssssuh.psIst.asp.pp.Ga..pllsh.pp...........ss....ssa.phhlpK..................................... 0 395 744 997 +4816 PF02381 MraZ UPF0040; MraZ protein Bateman A anon Bairoch A Family This small 70 amino acid domain is found duplicated in a family of bacterial proteins. These proteins may be DNA-binding transcription factors (Pers. comm. A Andreeva & A Murzin). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.18 0.72 -4.22 23 5872 2012-10-01 20:57:08 2003-04-07 12:59:11 13 3 3008 36 1347 3446 3377 70.70 25 92.00 CHANGED hatGshphplDspGRlhlPupLRpthshp....h.slshGhsspLElastspWcphtpc..tchshsphspRthtch ........................hhtGshphp.l.Ds.p.GRlh...lP......sphRc...t....hs...hp..............t....l..h.l.......h.........G.........h.....p..s.......p.........l.t.lastspW.pp.htpc...tp.hs...p....t.....h................................................... 0 490 935 1179 +4817 PF03650 MPC UPF0041; Uncharacterised protein family (UPF0041) Bateman A anon SWISS-PROT Family \N 20.50 20.50 20.60 22.40 20.40 20.30 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.65 0.71 -4.18 6 818 2012-10-03 12:15:12 2003-04-07 12:59:11 8 10 299 0 552 772 7 104.80 35 81.39 CHANGED hhphth+thWpp.sGPKTVHFWAPshKWGLlhAGluDlKRss-hlSGsQshALluTuuIWTRauhllpP+NYLLuoVNFFltssuusQLsRIs..sYphpssD.hpp...hhhpth.....tucphup .......................h.........hpt..sh.posHF......WuPl...hp.....WGlslA...ulsD.h...p.+...s.sEh.......ISss..............os................AL..sshu.h.lah..............R..au.hh....lpP+Na...hLhusphhstssthhQhhRhh.....pap...h....t...........................ttt............................. 0 173 292 451 +4818 PF03668 ATP_bind_2 UPF0042; ATP_bind2; P-loop ATPase protein family Bateman A, Yeats C anon SWISS-PROT Family This family contains an ATP-binding site and could be an ATPase (personal obs:C Yeats). 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.89 0.70 -5.33 4 3506 2012-10-05 12:31:08 2003-04-07 12:59:11 10 6 3441 0 667 2112 733 275.40 43 93.34 CHANGED hpllIloGhSGAGKoVAl+uLEDhGaYCVDNLPssLLPchs-hht...s+.oplAlshDlRshs.osclhctt.phhpc.thoP+llFL-AccsTLlRRYp-oRR.HPLuucsLsLEu.IstEpchLEPL+u+AsLIlDTSchSs+sLtEplccthtGsptcpholsVESFGFKYGIPlDADhVFDVRFLPNPHWsspLRPhTGh-tsVusal.++sEspcFl.ph+chLshhLPhhc+ps+SYlTIAIGCTGGKHRSVhIAEpLu-YF+uctpssh..pHRsLE+c ............................................pllIloGhSGuGKol.AlpsL........E...DhGaaCVD...N......L....P.ssL....LPc.h.s.c..h...ht.....................t.......s....p........p........c.l.AlslDlR.s..t..s..h.h....s.....p..l.h....ph....lt..p..h..t.s....p.......sh...s..hp.lLFLDAscpsLlpRY.p.-TRRhHPL...u...............s..........p..................s..................h...............h...........l....-........u..Ipt.E....RclLps.L.+stAs..hl.lDTopho......s..+cL+c...p.......lpp..t...h...t...s...p...p..p....p.....p...h....p....l..p....l.SFGF.K.a..G..lP.l.D......A.....D.....hVFDV.R.F.LPNPaa.s.cLR.P....h...TGlDp.sVtsYlhp.ps-sp.pFhpplpsh.Lchh...LPtap.c.-u......+S.............h.LTlAIGCTGGpHRSVhlAEpLuc.hL....p.......p.......c.......t.......p....s.....st......pHR-lt+...................................... 1 224 442 572 +4819 PF01985 CRS1_YhbY UPF0044; CRS1 / YhbY (CRM) domain Enright A, Ouzounis C, Bateman A anon Enright A Domain Escherichia coli YhbY is associated with pre-50S ribosomal subunits, which implies a function in ribosome assembly. GFP fused to a single-domain CRM protein from maize localises to the nucleolus, suggesting that an analogous activity may have been retained in plants [4]. A CRM domain containing protein in plant chloroplasts has been shown to function in group I and II intron splicing [5].\ In vitro experiments with an isolated maize CRM domain have shown it to have RNA binding activity. These and other results suggest that the CRM domain evolved in the context of ribosome function prior to the divergence of Archaea and Bacteria, that this function has been maintained in extant prokaryotes, and that the domain was recruited to serve as an RNA binding module during the evolution of plant genomes [4]. YhbY has a fold similar to that of the C-terminal domain of translation initiation factor 3 (IF3C), which binds to 16S rRNA in the 30S ribosome [1][2]. 20.80 20.80 21.10 21.40 19.60 19.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.49 0.72 -3.98 144 3330 2009-09-11 02:11:35 2003-04-07 12:59:11 16 11 2788 4 855 1854 228 83.80 36 62.62 CHANGED Lo.scp+chLRuhu+pL..cPl.l...tlG+sGlocullpplcpsLcs+ELlKl+lhp.s..stps....pcphsppls.........pp.ss.upllp....hlGcsllLYR ...............LosKQ+paL+uhAHsL...cPl..l.............lGcsGls-slltplcpAL-.p.+ELIKVclhp..s.....sc..-s........pppls-tls..........................................cp.os.uphVQ.....hI.G.+.h..lVLYR.............................. 0 200 497 697 +4820 PF01894 UPF0047 Uncharacterised protein family UPF0047 Enright A, Ouzounis C, Bateman A anon Enright A Family This family has no known function. The alignment contains a conserved aspartate and histidine that may be functionally important. 20.20 20.20 28.00 21.30 18.80 17.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.34 0.71 -4.28 109 2428 2009-01-15 18:05:59 2003-04-07 12:59:11 12 12 2035 22 936 1913 1389 118.70 38 81.59 CHANGED hcITspl...pphlppo..ulpsGlshlFs..HTTAulslNE.......shDPsVppDltphlscllP......pst..............tYpHsp......................ssusAHl+usLh.GsSlolPlpsGcLhLGTWQuIahsEaDs........s+.pRplhlplh .........................h.lTccl..pph.s.p...h....s......s.lps.GL..lplhl..HToAuLslNE............NsDP.s.V+pDhpph....hp+l..l.Pcss...................................tY.cHsh.p.Gs................................DchsAHlK.uull.G.sSls.lPl..p.pG+LtLG..........TWQGIaLsEacs........sp.pR+.llspl.................................... 0 352 620 800 +4821 PF01458 UPF0051 Uncharacterized protein family (UPF0051) Bateman A anon Prodom_3219 (release 99.1) Family \N 20.80 20.80 21.30 20.80 20.70 19.70 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.03 0.70 -4.91 122 7024 2009-09-11 15:27:28 2003-04-07 12:59:11 12 12 3542 6 1556 4676 4761 229.60 32 51.85 CHANGED s.sssstts+hllhltcsupss..ll-p.................s.ssssshpssssclh.lscsAplpahp.lpp.......spssh.phsstts..phtp.suphp.thh.shGuphs+tphpspL.tGpsupsplpulhh.spsppphDhpstlpHtu.p.spop.hh+ullpc.....pu..cslFpGhlplppsA.pposup.pscsLlL.ocpAcssohPpLEIhsDDV.csuHuAol.GplDc-pLFYLpoRGlscpcApplllpu.F ..........................................................................s.thstapRsllls--supls..alEs..................ssshtsss.sh..puulV...El.h.ltc.sApl+...ass.lpsh..........upssh....sh....ss.+R.u.....hs.t.c.suphp..Wsps..ph.G.u.tl..o..h..ph..ssshL..pG-supuchholuh.ss.s...p...Q...p.....t.Ds.ss+hhHhu.p.Tp.SpIlpKul.sps................p.u.ps.sapGhlpltpsA.ptu.c..........up.pscsLll..s..cpupu.-ThPhl-I.......c.ss...sl.ph.pHtAol.u+ls--QLFYLhSRGlscc-ApphlVpGF...................................... 0 534 1049 1341 +4822 PF01933 UPF0052 Uncharacterised protein family UPF0052 Enright A, Ouzounis C, Bateman A anon Enright A Family \N 21.00 21.00 23.00 22.80 20.90 20.40 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.02 0.70 -5.10 32 3153 2009-09-11 09:44:17 2003-04-07 12:59:11 13 5 2883 21 736 1931 763 296.60 33 87.30 CHANGED lVslGGGTGLuplLpGL+ph.................ssclTAIVTVuDDGGSoGcLRcths.hlsPGDlRsslhsL.u-.....pphhtplhpaRFp.....................................................ssstLpGHshGNLhLsuhpphtsshtcAlphhuplLtlcG..+llPh..opcslsLtAchp.....sG.........phlhGEspIsp...................................................ttthlcclhlp.........sppspss.cul-AIp-ADhIllGPGShaTSllP.LllssIscAlpcop.A.hlhlsNlhsp.G.Es......sthsssc+lcshtptsucthhDhlllsspt.sstp.tpphhpcssp.sphsttthcphshphhtsphlttt ...............................................................................................lVslGGGpGLu.hlLpuL+ph........................................ssclTAIVTluDsGGSoGclRcph.s.hl....sP...GDl.RNsLs...uL.......o-....................sphhpp..l.FpY.RFs..............................................................................ssupLuGHslGN......L.hls.u.l.........s.p.h..p.s.s...........h.h.....pA....l....p.hlsplLplcG.....+llPh.....o.p.p.slsLp..Ahhp................sG..................phlh...GE..s..pIsp............................................................................................................................................hps..hlcclhlp.......................................spstAspcs.l.....cA.....IpcA....D..h...IllGPG.ShaTSllPsLll.....sp.lucAl....t.......c........o........p...A.............hlYl.sNlhsp..G.ET.............ssho.stc+l....cslppa.....l....u.....p........hl...D....s....V..l...l..ss..p.......s.....p.t...h.t.p.t.hh...p.c..h...h..s...p.h.sh.t...t....pp..hpsh.......ht.............................................................................................. 0 227 481 642 +4823 PF02130 UPF0054 Uncharacterized protein family UPF0054 Mian N, Bateman A anon IPR002036 Family \N 25.00 25.00 27.50 26.30 23.70 24.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.90 0.71 -4.91 159 4595 2009-01-15 18:05:59 2003-04-07 12:59:11 12 13 4493 7 1042 2973 2301 142.80 31 85.81 CHANGED l..phppptths..................th..lpthhphhttpht.................lslhhsscpplppLNppaRs+DtsTDVLSF.shppsst....................................................LGDIlluh-tstcpAtc.hs.+s..hpcclsaLhlHGhLHLl..G..YDHp.p.p...p-tptMcthEpplLppls .......................................h.tp............................hpphlphsht.p.thttt.............ElolthVDspchppLNtpYR.scD...........pPTDVLSF..shcp.tth.t.h.........................................................................LGDll..Ishphspc..p.........ApEhG..+o................hccchuaLs............lHGhLHLLG....YDHh..p..s......-EtccM.uhppcILpth.t.................... 0 363 685 878 +4824 PF01679 Pmp3 UPF0057; Proteolipid membrane potential modulator Bateman A anon Pfam-B_2192 (release 4.1) Family Pmp3 is an evolutionarily conserved proteolipid in the plasma membrane which, in S. pombe, is transcriptionally regulated by the Spc1 stress MAPK (mitogen-activated protein kinases) pathway. It functions to modulate the membrane potential, particularly to resist high cellular cation concentration. In eukaryotic organisms, stress-activated mitogen-activated protein kinases play crucial roles in transmitting environmental signals that will regulate gene expression for allowing the cell to adapt to cellular stress. Pmp3-like proteins are highly conserved in bacteria, yeast, nematode and plants. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.53 0.72 -4.26 117 1655 2009-01-15 18:05:59 2003-04-07 12:59:11 12 10 1131 0 687 1065 84 50.50 45 65.92 CHANGED tcl..l.hlllAIl....LPPluVal..ctG.hutch..hINllLTlLG.a....lP........GllHAlallhp .............................t..hhcllluIl....LPPLGVhl....tpG..h...GhsF.....lINILL..TL..LG.Y...................lP..............GlIHAhalh.................... 1 179 378 588 +4825 PF01893 UPF0058 Uncharacterised protein family UPF0058 Enright A, Ouzounis C, Bateman A anon Enright A Family This archaebacterial protein has no known function. 25.00 25.00 40.60 40.50 24.10 19.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.00 0.72 -3.93 23 129 2009-01-15 18:05:59 2003-04-07 12:59:11 11 1 72 2 82 129 5 85.20 36 91.95 CHANGED MH....K-ELlpLHphhspl++ahc.......ppsssptFcpY-pLcIsPsHlH+oKsEHKaAIFlLusslAphhu.pc-ts.spplup+h+-hA-csh+ ......M+K-ELlcLHphLsplpchhcp......ppssst.hcpYcpLslpPsaIHKoKsEHKtAlFlLussluphhu.pc.cts.stphstthtp.s.p...t.................. 0 13 54 73 +4826 PF02694 UPF0060 Uncharacterised BCR, YnfA/UPF0060 family Mian N, Bateman A anon COG1742 Family \N 22.30 22.30 22.50 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.62 0.72 -4.03 4 1396 2012-10-02 19:55:49 2003-04-07 12:59:11 10 3 1342 0 296 828 194 106.10 50 96.41 CHANGED hl+olLLFlhAuLsEIGGuaLVWLWlREs+uhhhulsuuIhLsVYGalsTLQP.AsFGRVYAAYGGVFlshulhWGhhVDshpsDhaDWlGAhIsLsGVhVIhaAs.Ru ....................................hl+ohhLFhlsALsEIhG.C.aLsW.L.WL+c.s..tShahllPuuloLuLFsaLLT.LpP..uu...u.........GRVYAAYGGVYlssuL.hWLhlVDG..h+.shaDhhGAlIsLsGhhlIlhu..R............................. 1 88 179 236 +4827 PF02696 UPF0061 Uncharacterized ACR, YdiU/UPF0061 family Mian N, Bateman A anon COG0397 Family \N 19.90 19.90 20.10 19.90 19.60 19.80 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.86 0.70 -6.20 3 2147 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 1912 0 684 1870 2381 431.40 40 89.41 CHANGED soLSslTcths.lspsYsAhDPVsLshF.ssRLla+Ns+LAssLu.....hcPSpLpcNuhsEh..V.st..EuLLsG.......shsPLApVYSGaQFGsaAuQLGDGRGlLLG-.hLsDGsohDWp...LKGAGhTPYSRhGDGRAVLRSoIREsLASEALHpLGIPTTRALSLVpossShspR-ssEP..uAVlhRFAPSHIRFGsFE+F+YRH-oEpltQLuDasIccYau......................Esph..pluDDE........D......KYctFFRcVVuRsAsLVAQWQAsGFAHGVLNTDNMSIhGLTlDYGPFGFLDcYEPuFIsNHsDauGRYSFuNQPAVshWNLQRLApoLSsllus-A..........LN...EALEc................YcpVaLT+YuplMuQRLGls.sLtcpMo.......Ep.....KEhs-sLVspLLslLApo+sDYscFFtpLp......Focs+SDsAsLhht.DE.....Flsu......A-a--WFAth.....hpG.......+LQ......QDhLppVPPoEhAARsoLhcpANPLhVLRsWllEcVl-...Aup+DG........DhosL++LaptLpNPa ...........................................................................................................................................sspll.h.hs.p...t.lA.t.tLu............s.t..h......p..t.t.h.h...........huG......ppl.h....sG.....................................h.p.PlAp...sY.u.G......H..Q......FG.s.......a..ss.....pLG.DGRulL..L..G..E......h.........t....s.....s............G.....p.........p....hD.hp...................LKG....uGhTPY............S.R..h......G....D.....GRAVLRSslREaLsSE..................AMa.t.L.GIPTTR..ALul....lso.s.....p.......sV..h......R..............E........p..........h..........-...s..............GAllhRlAsS.HlRhGpFE....a...a..............h..............h.....p...............t...........p.....................-.......p..............l......+.p........LAD..a....s...I.c.+...aa.P.............................................................................................th.t.ssp...........................p..............tY.hth....hppV...sp....RsApLlApWQsVGFsHGVMNTDNMS.IhG.TlDYGPauFhD.........s.......a-PsalsNp...sD....p.p.G.RYuasNQPsluhWNLt....+LApsL..h..P..l..l...s.......-..t...............................lp.......p.u...L..s.p...................................................................ap.ph.......h.....s...pa....t....p.t....M....c...pKL...Gl................hs...........................p-...Dp...t...Ll..spLh.plM..tp.....pp.s..DYTp.hF...RtLu....................................t..p.t....t....s...s...s..s..l...h............t.t.....................hh.s.p....................................................sta..c.t.Whtta.........tt..........................Rlt.........................pc...................t.s....t.....p.................p.tph..Mp......ps.NPthl.RNalsppAIc...tApp.G...........Dh..s.lccLhpsLppPa.................................................................................................. 0 200 396 551 +4828 PF03401 TctC UPF0065; Bug; Tripartite tricarboxylate transporter family receptor Mifsud W anon Pfam-B_3343 (release 6.6) Family These probable extra-cytoplasmic solute receptors are strongly overrepresented in several beta-proteobacteria [1]. This family, formerly known as Bug - Bordetella uptake gene (bug) product - is a family of bacterial tripartite tricarboxylate receptors of the extracytoplasmic solute binding receptor-dependent transporter group of families, distinct from the ABC and TRAP-T families [3]. The TctABC system has been characterised in S. typhimurium [2], and TctC is the extracytoplasmic tricarboxylate-binding receptor which binds the transporters TctA and TctB, two integral membrane proteins. Complete three-component systems are found only in bacteria [2]. 21.20 21.20 21.20 21.50 21.00 21.10 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.38 0.70 -5.47 9 6762 2012-10-03 15:33:52 2003-04-07 12:59:11 9 22 1130 7 2903 6666 2997 269.60 28 83.24 CHANGED lARhluppMucpLGQPVlV-N+sGAGG.IuushVApuAsDGYTlhl.susuh.slssahYsplsYcsh+DhsPVs.lsssPhVLVVsusSshpslp-LlshA+ssPsKLoaASsGhGoosHLsuElhpucsGsphhHVPYKGuuPAlpDLluG+VDh.MhsslsostshIpus+lRALAVsotpRpstLP-VPTlAEsG.ltGh-shsWaGhhAPpGTPssVlp+LssAhppAhpsPtlhcthpshGhpsh...ssoPpphsphhpuEspRWutLIpchGl ............................................................................ARhlupt.h..s..p..t..L....G...p..s..l.l.V..-N......+.s.G.A.u.G..slG.s..s.t.l.u..c.u.s..s...D..G.Y.Tl.hh....s.s..s.uh....sl....ss...t...l...h...t...p...l...s...a..c......s...hc.D.h..s.P.l.u.h...l.u...p.s.s.h.l.l...l..V.s......s....s....s....P....h....c....o.lp-l...ls..h..u..K.s...p...P...u..p...l.s.a..u...o..u.G...s...G.o.s.s....H....L..s....s.t...h....h....t.p......t.s...G....l......c......h....p....a.......V...P...Y...+....G...u..u...s.A..ls.sll...u...G..plsh.....hh........s...s........h.s.s.....s....h....s.....h.....l.......c......u.G..+..l+.s.L...A......V..s.......u...s.....p.........R.........h.........s.........t...........l.........P.........-.....V..PT........h........s.E.t...............G.....h.......s.........h...p....h.....s.....s.......W....h...........G........l........h.....u...........P..........s.us...P...s...sllpcLssslp.p..sl.p.s..s.p.h..p.......p..p.h.p.p.h..G..h...p...s.h......st.s...s..p.ph..s.p...h...lpp-...htca.tpllct.t.h........................................................................... 0 403 1505 2405 +4829 PF01980 UPF0066 Uncharacterised protein family UPF0066 Enright A, Ouzounis C, Bateman A anon Enright A Family \N 21.40 21.40 21.40 22.60 20.70 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.58 0.71 -4.15 184 2012 2009-01-15 18:05:59 2003-04-07 12:59:11 11 13 1761 6 580 1490 102 122.50 43 54.57 CHANGED p+hssP+Q.sth..............sspuplclhspa....tpuLpGLEpaSHlhllahhHcs..................................hhp.P+h.t.......ss.tphGVFATRSPtRP...NPIGlSlVcLh..plcssp.....LtlpGlDh....................lDGTPllDIKP.Yls...hh.Ds ...............................................KhuhPRQ...sslsp......sspuplcLh.s.a..........ts-ulcGLEs..F..SHlWllFhFHps.............................................................................p...shcsp.V.R..P.PRL..s........Gst+hGVFATRSsaRP...NPIGhSlVcLc..plc..spp....................Lplu.ulDL....................lDGTPllDIKPYlPas..p........................................... 0 215 365 478 +4830 PF03006 HlyIII UPF0073; Haemolysin-III related Bateman A anon Pfam-B_1581 (release 6.4) Family Members of this family are integral membrane proteins. This family includes a protein with hemolytic activity from Bacillus cereus [1]. It has been proposed that YOL002c encodes a Saccharomyces cerevisiae protein that plays a key role in metabolic pathways that regulate lipid and phosphate metabolism [2]. In eukaryotes, members are seven-transmembrane pass molecules found to encode functional receptors with a broad range of apparent ligand specificities, including progestin and adipoQ receptors, and hence have been named PAQR proteins [3]. The mammalian members include progesterone binding proteins [4]. Unlike the case with GPCR receptor proteins, the evolutionary ancestry of the members of this family can be traced back to the Archaea. 26.20 26.20 26.50 26.50 26.10 26.10 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.93 0.70 -4.94 81 4985 2009-01-15 18:05:59 2003-04-07 12:59:11 15 18 3463 0 1622 3448 460 203.00 27 78.87 CHANGED ashppEhsNshTHhlGslhshhsh....hhlhshuthtt....s.ht...........................................................................hlshslashuhhhhhhsSslaHt...hsptc..................scthhppl.D+suIalhIAGo....YsPhhhhsh.....................................p.s.hs.hhhhsh....lWs.hulhGllhphhhh.........................tph..+hlpshhY....lshGahslhslhth..hhthsshs........lhhlhhGGlhYslGul.FYuh+.........................................................h...a.....tHtIaH..lFVlhuuhsHa ................................................................................................................................................htpEhsNshoHh........l.G.hlhs.l..h..sh....s.h..l.....h.h.h.u.s..tt............shht.....................................................................................................................................................................................hhu.h.slas...huhhhha.h.sSolYHs.h......h...st...p..................s+th.hp.th.DHsuI..alhIA....Go.......YTPh..hlhs.h.............................................p...s.........h.u.....h........hl..h...h.l.........................lW.s...h.u.lh.Gllh..c.h.hhh................................pph....+hl.sh......s..h..Y....lsM...GW.hs.l...hslhth......hht.....hss.hs.......................hhhlhh.G.GlhY...ol...Gsl...FYst.+...........................................................................................................................................h.......a...............tHtIaHlFVlsuuhhHa................................................................ 0 475 832 1230 +4831 PF02082 Rrf2 UPF0074; Transcriptional regulator Mian N, Bateman A, Yeats C anon IPR000944 Family This family is related to Pfam:PF001022 and other transcription regulation families (personal obs: Yeats C). 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.41 0.72 -3.86 118 8068 2012-10-04 14:01:12 2003-04-07 12:59:11 15 22 3692 17 1984 8534 1782 81.20 29 53.34 CHANGED Mp.lospscYul+sLlhL...At.ptspt....lsspplAp..pt.sls.saLp+lltpLpcsGllcShRGstGGapLA+sspcIolt-llcul .......................................Mp.loscscYul+.sl.lhl................Ah.....p..s...p....t..t................loh.p.plAc..p..sl.s..saLcplhspL..p+..s.G..l..l..p..o...h......R...G.s.t....G.G.a..p.L....u.+.s.s.pc.Iols-llcs................................ 0 686 1302 1664 +4832 PF03702 UPF0075 Uncharacterised protein family (UPF0075) Bateman A anon SWISS-PROT Family The proteins is this family are about 370 amino acids long and have no known function. 20.40 20.40 20.40 20.40 20.30 20.00 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.13 0.70 -5.56 9 2041 2012-10-02 23:34:14 2003-04-07 12:59:11 9 6 1964 6 505 1648 2620 357.50 42 96.82 CHANGED .hYIGlMSGTSlDGlDhsLlchst...scspLltuchh.PhPssLRpplhsLpps.sssoLpphGpL-pplGhLaucslsphLpppplpsspIpAIGsHGQTlhHpPs.uphPFThQlGDssllApcTGIssVuDFRR+DlAhGGQGAPLVPAFHpAlFtssspspsVLNIGGIuNlSlLhPstsVhGaDTGPGNsLhDAWhp+aps..tsaD+sGtaAupGpVstsLLspLLs-PaFuhPuPKSTGREhFNLsWLpcpLtpt..............spDVQATLsELTApoIs-ultpttss.spcLlVCGGGA+NslLMtRLuthLPs.hpVsoTsphGlssDhhEAhAFAWLAapplssLPGNlPuVTGApphshLGAIaPs ..........................t.hhIGlMSGTSLDG.lDssLsphct....................pp..l...t..h....l....u....s..h..s..........h.....Ph.s.......ss...........L.+.......ptl......h......s.....l......p.p......s.......p..................p......t......s...l........p......p......h...u..pL...-........pp............L.u.p.haApAVp.tLLp.....p...ps..l...p.s.pcI.sAI.GsHGQTVhH..p...P...................p......s............................h..........s...............a..........Tl.QIGD.sshlAtp....T...GIsVVuDFRp.+DlAh..........GGQ.GAPLVPAFHpuL....h.......s.......c............s....s......c.......p......R.......h...l......LNIGG..IANlol........L.............................s....................s.......p.......s...............l.........h.......G..a.....DT.GP.GNhLhDu...Wh..t..+.p.t..u......................psYDcsGpa.A.tpG..p..V......sL..L.p.p.....h......L..s.....-..P.YFstssPK.STGRE.hF....s....hs....W...Lp..p..p..Lp..phst....s...................spD.....l.tATLsc...l.......T....A....hoI.......u....cp....l....t.............t.........h............s.....s............s.......c..................clhV..CGGGu+NshLMtcLtsh.L.........s..................s.......hp.....V.p...oo.-.s....h..G.....l.s...u.....Dth.E.............AhAF.AWLAh.csl.t.Gl..PuNlPuV.....TGAsp..slLGsla..h.............................. 0 141 308 416 +4833 PF02367 UPF0079 Uncharacterised P-loop hydrolase UPF0079 Bateman A anon PSI-BLAST P31805 Family This uncharacterised family contains a P-loop. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.79 0.71 -4.49 14 4392 2012-10-05 12:31:08 2003-04-07 12:59:11 12 12 4337 6 946 3129 2397 122.70 35 71.63 CHANGED ThplGpplup.L.....psGsVllLpGDLGAGKTshs+Glupul.Glpt..tlsSPTFsLlp.Yp....sshhlhHhDlYRLtsh-.hphh.....t.h-h.hs.-ullslEWuEthsp..Lspppl..plplchhspucpph .................................h.tlupplu...ph.h........p.su...s...l...lhLpGDLGAGKTTho+.Glsc....u.L....Ghpt........sV.p.SPT...a...o....ll.c...p....Yp......................sp.h..s.l..a..Hh..DlYRLs.cs.c.E.h.tth............................Ghc-a.h....hs.suls.llEWs.p.t.h.ts....hL...P...s....s.p...l...plplphtspsp...hh................................................................................ 1 332 637 811 +4835 PF03652 UPF0081 Uncharacterised protein family (UPF0081) Bateman A anon SWISS-PROT Family \N 29.20 29.20 29.20 29.60 29.10 28.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.52 0.71 -4.05 174 4409 2009-01-15 18:05:59 2003-04-07 12:59:11 10 7 4296 8 962 2703 2115 133.70 34 91.91 CHANGED tplLulDhGp+RIGlAlu-shthh.Apslpslp.pp.............tshppltpllpch......p..ss.tlVlGlP.........hsh.s.Gststtspts.ccFuppLppph.........sl...s.lhhhDERhoohtAp.phl..h.t.......tsh.pp.....pc++p...hlDphAAslILpsaL-p .............................plLulDhGoKplGVAluD..hs.hh.Aps.Lpslp...sppt.............p..shspl.tcllcca.......p..ss....tlVVGLP.......................hsM....s.Go.p.us.t.sp.ps.cpFup.pLpp.ph.............sl.....s.VthhDERLoTstAc..ptL...h...p..........................tsh..p+............pc++p.....hlDphAAsl..ILpsaL-.................................................... 0 324 638 820 +4836 PF01868 UPF0086 DUF49; Domain of unknown function UPF0086 Enright A, Ouzounis C, Bateman A anon Enright A Family This family consists of several archaeal and eukaryotic proteins. The archaeal proteins are found to be expressed within ribosomal operons and several of the sequences are described as ribonuclease P protein subunit p29 proteins. 21.00 21.00 21.00 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.76 0.72 -4.34 36 446 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 413 9 311 441 62 88.50 28 44.89 CHANGED hlsspNllp....HELlGLclcVlpopssshlGlcGhVlcET+solhltTpcs...tplPKctslFpFplsstp............................VclcGshLhuR.PEcRlc++ ..............h....tpLhp....t-hpGshlpVscSps...suhlGlpGIVlpETcpshhl........l......s.cc...s.....p......h..............+.h..........lPKpsslFp..hplss.t..............................................................................................hplhGpph.hRstcRht+............................. 0 99 173 254 +4837 PF03007 WES_acyltransf UPF0089; Wax ester synthase-like Acyl-CoA acyltransferase domain Bateman A, Auchincloss A anon Pfam-B_1896 (release 6.4) Domain This domain is found in wax ester synthase genes such as Swiss:Q8GGG1. In these proteins this domain catalyses the CoA dependent acyltransferase reaction with fatty alcohols to form wax esters [1]. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.70 0.70 -4.83 16 1646 2012-10-02 12:01:53 2003-04-07 12:59:11 11 16 348 0 475 1277 107 248.90 24 54.16 CHANGED LushDuhFLhhEsssp.hHlGtlslhchs.tsss........hcchtsshtpptphhPhh+p+.hsh.hshststWhsDschDlsaHVRRsALPuPGshcELh-LlucLtupsLDRsRPLWEsallEG.LssGRhAlhhKhHHAlsDGVuuhplhtphhstsPcssshsss.p.ssss....psphpttu....................hshsptltthssulsGsspssschhttsh.........spssshshsss......poshN.sslutpR....RhustplsLscl+tVscthGsTlNDVs .......................................................................................hsshDthalhhE..s..sp..t.....h.H.sutl.hhhp.s......sss.......................hcp.h.h.pt.h.t.p.....p...h.t.hh..P.h.h.c....p.+....h...........h..s.h...s.t.....s.hW..h.p..-s...p.hD..lshHl....R.....+....s....u.L....P......s....P.....G...s......h....c...........-.....L.....h..c.hl.....uc..l.t..up..L.D.+..s.R..PLWEh.....a.l......l....E........G......L...........s......s............s..............R......h......A............l..h.hKhHHulsDGlu.u.hp.lh...tph.h....s.....t.s...s......s..s..s...h..s..s.......h...t...ssts.........tttht..s..................................................................ht...h.hp.t..l..t....t....h......s..s..s..l...t.......s....h....s......p...s.s..h.p.hhttsh................st...p...h..s..h...s....h...s...s.s......................to..h....N....s..s.l...s...s...tR......................+.h..u.s...t.p.hsLscl+tlt.p...t..h.......s...solNDVh............................................................................................................................................................ 0 129 319 422 +4838 PF03653 UPF0093 Uncharacterised protein family (UPF0093) Bateman A anon SWISS-PROT Family \N 24.50 24.50 24.60 25.00 24.40 24.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.15 0.71 -4.25 64 1256 2012-10-01 21:57:53 2003-04-07 12:59:11 8 1 1156 0 386 1034 1528 142.40 37 93.54 CHANGED h..shY.WlKuhHlluVluWMAGLFYLPRLaVYHs-st.sts.ptpphFp....hMEc+Lh+hIhsPAMlsohlhG.lh.lshts....uhh.hs..sWh+s.KlshVllhshaHhhhsthtKchttspsphos+haRhhNElPTlLhllIVlhVllKP....F ...........h...hahWl...KuhHllulluWh..AGLFYLPRL..FVYHupsp.ss...s...........t....p.pphp.....lME++La+.hIhs.PuhlsollhG.hh...Lh.ht.........thhh..h.st..GWhHs..K.....LshVlLLlsa.Hh.h..s..u..t...hh...+phtp.sp....s.p..+.S.t.+.aa..Rh.hN..ElPsll...h...l.s.IVlLVllKPF..................................................... 0 107 248 316 +4839 PF02016 Peptidase_S66 UPF0094; Peptidase_U61; LD-carboxypeptidase Bateman A, Studholme DJ anon SwissProt Family Muramoyl-tetrapeptide carboxypeptidase hydrolyses a peptide bond between a di-basic amino acid and the C-terminal D-alanine in the tetrapeptide moiety in peptidoglycan. This cleaves the bond between an L- and a D-amino acid.\ The function of this activity is in murein recycling. This family also includes the microcin c7 self-immunity protein Swiss:Q47511. This family corresponds to Merops family S66. 19.70 19.70 19.70 20.10 19.40 19.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.71 0.70 -5.46 167 2826 2009-09-11 05:12:09 2003-04-07 12:59:11 10 10 2139 40 547 2166 479 284.20 28 90.74 CHANGED lulluPS....sshpp.p..ph..ppulphLc.shGhplhhuppshpp..ht.....hh...uussppRsp-LpphhpDsslc..uIhsspGGaGusRlLshlD...as.....hl...ppp........PK..hhlGaSDlTsLthslhtps.Ghso..hHGPh.....hss.ht........t..................sshohpphtph...Lhs.............................t.ths.sssththhtsG..........pspGpLlGGNLsll.............spLhGTsa.hs....................phcs.......tILalE-lsEt....sacl-RhLtpLphuGhhcplpGlllGchsth.........s.t.stslpcllpchh..tths.....lPllhshshGHst.sph.....slPlGspspl..s ...............................ltlluPS........sthpt...p...th...ptulppLp....shG......hplh...s..pthhcp............t...hh.....uu.os.ppRhpDLp........phh..p..s..s..s..l..c...hlhssh.G.GasusRLLsh.lD......ap.......tl.......pps.........PK.....lhhGaSDhTulptulhtps...slh.T..aaGPh..hssths..........t.................sshohppahph.....lps..................................................................thssth.th.hs.ss......................pspG.plhGGNLshL..............t.tlh.G.T.sa..hP............................phcs.......tILhLE.-.s.sEp........shcl-R...hLhpLtt....s.G.lhsphpulllGpaptt....................s.stsh..sh.pp.ll.tphh.....tphs..................lPllhshshGHsp.s.p.h.....slPlGspupl................................................ 0 184 356 461 +4840 PF01981 PTH2 DUF119;UPF0099; Pep-tRNA_hydrol; Peptidyl-tRNA hydrolase PTH2 Enright A, Ouzounis C, Bateman A, Mistry J, Wood V anon Enright A Family Peptidyl-tRNA hydrolases are enzymes that release tRNAs from peptidyl-tRNA during translation. 21.10 21.10 21.90 21.20 20.80 19.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.37 0.71 -4.39 69 1006 2012-10-02 19:40:38 2003-04-07 12:59:11 11 13 703 22 562 916 123 109.50 31 60.74 CHANGED phKhllVVRsDL+MuKGKlAAQsuHAulushhph.........hpts...............................phlcpWh.ppGQtKlVL+sp..spccLhcltppAcphGlsstlIpDAG+TQls.PGohTlLulGPuspphlDclTGcLKLL .............................................t.hlllR....s....D....L........p.....M....s........p.........GKlAAQ.su...HAulsshpth............ppp..s..t.........................................phlc.p.Wc...p..s...G..ptKlV..lcs.t........s.p..p....phh.......p...L...t.......t.p.Ap.p....h.....sl....s....st.....l.lpD........A................G..hT...p........l....s....s.......u.........o..h.....T..slul....tPsstp.lsph..stpL+Lh..................................... 1 194 334 470 +4841 PF02021 UPF0102 Uncharacterised protein family UPF0102 Bateman A anon SwissProt Family The function of this family is unknown. 21.20 21.20 21.20 21.30 21.10 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.07 0.72 -4.06 8 3091 2012-10-11 20:44:43 2003-04-07 12:59:11 12 8 3030 1 769 2274 378 92.40 35 72.54 CHANGED tuEshAscaLcopGhpllsRNaRs.phGEIDlIApc..scplVFVEV+sRsussassh.htuVT.+KtcKlhcTAppaLApps..hpssssRhDVlsVh .........................h.uEphA.tpaLp.p.p.Ghplls..pNa+......s......+..........h........G....EIDlIhcc.............sp.......s......l...V............FVEV+h...R...p.....s.....s..t.....aG.s...............s...t...tu.Vo..hpKpc+lhp.sAp.ha...L...t...p..ps......ht...s..s.....s.....s..RFDllul.t.............................................. 0 263 523 664 +4842 PF01875 Memo DUF52; UPF0103; Memo-like protein Enright A, Ouzounis C, Bateman A anon Enright A Family This family contains members from all branches of life. The molecular function of this protein is unknown, but Memo (mediator of ErbB2-driven cell motility) a human protein is included in this family [1]. It has been suggested that Memo controls cell migration by relaying extracellular chemotactic signals to the microtubule cytoskeleton [1]. 19.90 19.90 19.90 20.00 19.70 19.50 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.79 0.70 -5.53 11 840 2012-10-01 19:17:44 2003-04-07 12:59:11 12 10 707 8 506 901 112 265.40 28 88.15 CHANGED .PulAGsaYspssccLpphlc.hhhcshs.t.......tRtlhs.......PHAGYhYSG.sAupuYptLsp....s..-pllIlGPsHsshs.sslulhs.utacTPLGslcVDp-hscpLhpppth......hs.--hsc.htEHSlElQLPFLpahhtc.......hKIVPlhluhps.EsstplGchlscsl+-.s...slllsSSDhsHau.....................................Ppclspph...DchhIctItph....s.cshhphlpphssT.lCGhsPlhlhl.hh+phs......pcuclLcYusSu-lstspsSsVuYAuhl ..............................................................................................sshAGtaYsss.tp.Lpppl.p...th...h..t....p.s..h..t..tt...........................s+sl.ls................PHAGYhYSG.sAAhuY.tt............ls..........sthc.....plhlLGPuHps....hh...p......s....su.l..s.s.......h.......sta.pTP.L.....G.........s.........l.........tl.........D...pchh....p.pLhp..p....t.h...............................hph.....s....ptsc..pp.....E..HSlEhpLPalpphhpp...............h.pl.lPlh.......lG...........t...........h.s............p....h......t....t....p.h....u....phl..sph......h...t..c.p..........slhllSSDhs.H..a..t.....................................................................................st...th.t.pp.h.....D.p..hs...h...........p.....tltph................c.pthh......p..h..l.p..p.....h..t.o..hCG.htPlsshlth.h..pth.t........................hp..h...phlpYtpSu...p.s.p........s.....p.s.p...sVuYuuh.h............................................................................ 0 198 336 436 +4843 PF03706 UPF0104 Uncharacterised protein family (UPF0104) Bateman A anon SWISS-PROT Family This family of proteins are integral membrane proteins. These proteins are uncharacterised but contain a conserved PG motif. Some members of this family are annotated as dolichol-P-glucose synthetase and contain a Pfam:PF00535 domain. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.32 0.70 -5.14 128 4592 2009-09-13 19:27:26 2003-04-07 12:59:11 8 18 2804 0 1355 3742 1049 287.10 16 61.23 CHANGED shhhlh......hlhtthshpplh.............................ptlt...phshhh........lhhu..hh....lshhs..........................................hhl..puh+...........Wphllpt........ht...............plshhps.h...thhhhuhh....hsh...l.hP..uphG............-sh+shhLp................ppulsh.....spuhss.h........lhpp..lhsh....hslhhhshhshhh.............hh..shhhhhhhh...............hhshhhsshhhhhhhlhth................................h.h.hhht+ltptht.phtpshpt........................h.t...h...hhhh.hhohhhahhthh.thahlhtuhu.hsh........shhhhhhhhshshlss....hlP......sPGG..hGshEsshshhhs.....hhGhssstu.....hsh........sllh+hlshhhsh......h...................hGh .............................................................................................................................h....hh.....hhht...t...h...s.ht.plh.............................p.slp.........phs.hhh.............llhu.....hh....hshls......................................hhh.......sh.t..h.p.h.lhpt..............hth..........................clsht...ps....h....hsshhs.h..hst...h..hs...sh..s.G......tsh+hhhhp.....................pt.ulsh.....spsssh....h.........lhtt......hhsh......lslh.h..hs.hlhhhh.............................h..h...h.ht.hh.h...hh........................h..hs..h.s..l.hhh..l....h.hh...hhhhth...........................................................................................h.h..p.h...t..p..h......h..t....p..htt.t.htt..............................h.........hhhh.hhhohlpahsh.sh....hha.h.l..h....h.h.hs.hss..............shhth.h.s.h...h..h.l.u.tlsu.....hls....hhP.GG.lGs...hEsshlhhhs.........hhs.l.s..t.s..ts..........lss..................hLlaRlhhahlsh..hlu............................................................... 0 455 894 1153 +4844 PF03656 Pam16 UPF0108; Pam16 Bateman A, Wood V, Studholme DJ, Mistry J anon SWISS-PROT Family The Pam16 protein (Swiss:P42949) is the fifth essential subunit of the pre-sequence translocase-associated protein import motor (PAM) [1]. In Saccharomyces cerevisiae, Pam16 is required for preprotein translocation into the matrix, but not for protein insertion into the inner membrane [1]. Pam16 has a degenerate J domain. J-domain proteins play important regulatory roles as co-chaperones, recruiting Hsp70 partners and accelerating the ATP-hydrolysis step of the chaperone cycle [2]. Pam16's J-like domain strongly interacts with Pam18's J domain, leading to a productive interaction of Pam18 with mtHsp70 at the mitochondria import channel [3]. Pam18 stimulates the ATPase activity of mtHsp70. 29.10 29.10 29.10 29.20 28.90 28.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.58 0.71 -4.34 3 354 2012-10-01 22:35:57 2003-04-07 12:59:11 8 8 266 8 259 368 4 119.10 36 83.90 CHANGED MA+RsAlQVIlsGsQVVGKAFARAlRQEh.....AAuRsAAuAtssAS+.RusAsSNhu.GISL-ESpQILNVcc...sLNhEEVpK+YEHLFcVNDKSKGGSFYLQSKVaRAKERLDEEL.+IctKE-KcKupsA+T ..................................................MAthlh.plllhGupllGRAFscAh+Qth..........t..uu...p...t..s....s...t...u...t...t..p..u....st.....ts..s...s.t..s......s...h......p.....G..........hoL..cEAppILN.Vpc.........ths..hEc.lt...c..........+ac+LFcsN......-.....p...p..p.......G...GSFYLQSK........VhRAKERL-.......tEl....p.......t.t.p...............t.t......................... 0 84 143 214 +4845 PF03657 UPF0113 Uncharacterised protein family (UPF0113) Bateman A anon SWISS-PROT Family \N 22.70 22.70 22.70 22.70 22.40 22.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -4.58 4 344 2012-10-02 17:37:24 2003-04-07 12:59:11 8 3 328 4 256 328 3 168.10 36 92.25 CHANGED h+lRpssstEhcLIcctLptYG.ts..alpphth.shtGch+-VasVshslhcslc...Lp.apsGhplGs..hsEhth+hphsLtthhhllpsohN.hshls.+uEhLFLYGRDlatcultchsthGp...lhlhNc.s-hlGIG............hpDthhlKNL+DhG.YLR+ ...................................................................R.hpt.E.p.hh...t.h.t...h...h.........h.........................................c....h.......p.p........c...RVYYVs-pl...h.+...h...As....s..........c...........pL..h...S......hGsClGK..Fo.K..........p..s...K..FRL..H..I..T..AL.s..h..LA.aA.+a.KlWlKPsuEhsFLYGNcVlKut...l..GRh...o....E.s...s.s.pat..GVVV..a..o..MsD...lP.LGFGhsu.....ttps.t.p.hp.s..s...s..h..l.l.h.p...............u.......DlGEYLR........................................ 0 87 139 208 +4846 PF03350 UPF0114 Uncharacterized protein family, UPF0114 Mifsud W, Vella Briffa B, Bateman A anon Pfam-B_3587 (release 6.5) & Pfam-B_10597 (release 10.0) Family \N 21.50 21.50 23.40 23.00 21.10 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.59 0.71 -4.20 30 1494 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1367 0 351 823 291 119.80 42 68.01 CHANGED llasoRalhl.hhlGlllutlshhlphhhplhchl..........sphtph............sc.spllLsllslIDlhLluslLlhlshGhYEhFlS+lshtpp...................-pPcWLshhshssLKhKLutsIlsI.ulphLcpah ....................................hauSRWLhsPlYhGL.luhlsLs..l+Fh.pElhHll.............sslh.sh.............................sE..s-l.l.Ls.lLuLl..DhsLlusLLlMVhhuGYEsFVSpL-.lscpp......................-c.pWLu+hssssLKsKlAtSIVuISSIHLL+sFh............................ 0 68 180 274 +4847 PF01594 UPF0118 DUF20; Domain of unknown function DUF20 Bashton M, Bateman A anon Pfam-B_495 (release 4.1) Family This transmembrane region is found in putative permeases and predicted transmembrane proteins it has no known function. It is not clear what source suggested that these proteins may be permeases and this information should be treated with caution. 27.20 27.20 27.20 27.20 27.00 27.00 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -11.86 0.70 -5.45 32 12059 2009-09-10 15:51:07 2003-04-07 12:59:11 11 19 4455 0 2730 8340 2223 323.20 19 87.74 CHANGED hhhhhhhhlhhhshhhhhs....lhhshlluhlluhlhpslhphL.ppttht+hlulhllhlhhlshhshhhhhlhs.hhsphtpllpslP............phhsthtshltplttph.hh............t.ht.thsphlsphhspl.....hstlhshhtthsthhlphlhhllhhhahLhctcphhphlhphhPtph+pphpthhpphppt...ltsalhuphlhullhulhshluhhlhsl...aullhullsslhs.lIPhlGuhlshlP.hlhhhhhss...hthlhslhhhhllp.l.spllpPhlhuct........hslpPlhlllullhushlhGhlGhllusPlhsllpshlpthh ..................................................................................................................hh...hhhh.h.h.hh...h.hh...h.h...hht...............sllhsh....lluhhl....sh.....l....h........p.........P...........l................h...............p............h...............l........p................c................h................t................h..............s.................R............s..............l............u...........s..............h.......l...h.........h......l..l.h...l....h.l....l...s.....h....h..l....h....h...l....h.......s.......l.....h...p...p...h.....s....p.........l....h.p..p..ls.............................................p.h..h....p....p.....h...p....p.....h....l....p....p.....l.....t..t.h..t.h...........................................tth..t..t..t..h.....s....p...h.....h...p.....p...h...t..s..p..h....................................hs.t.h....h....s....h....h...s....s....l.......s.......s......h...h....h.......t....h.......l...l...h...h.......l.h....s....h.......a......h....l....h...-....t....p....p....h..........h....p....h....h....h....p......h....h.....s....p....p.........p....t....p....h....t....p....l....h....p....p.h...spt.......lssaltu.ph....lhul...l.....hGl...h....s....h..l.....u......h......h......l........h........G..l.........sa...ul..l..l.ul..l..s..slhs..hlP.h...l...G......s..hl....u.h.l.P...s....s....l..h..s....h....h........h.............s...............s.......................h.t........h.l....h......l....l....l....h....h.h.ll...p..t.l.pu.s.l....l.pPhlhucs............lslpslh.lllu.l.lhu...u.t.......l...h....G........h...h......G.hllulPlhulhtslhp...h.................................................................. 0 829 1727 2265 +4848 PF03715 Noc2 UPF0120; Noc2p family Bateman A anon SWISS-PROT Family At least one member, Noc2p from yeast, is required for a late step in 60S subunit export from the nucleus [2]. It has also been shown to co-precipitate with Nug1p, a nuclear GTPase also required for ribosome nucleus export [1]. This family was formerly known as UPF0120. 19.80 19.80 19.90 22.30 19.30 19.60 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.84 0.70 -5.56 38 404 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 292 0 273 396 6 277.60 32 42.37 CHANGED psshl-slLKssYpual+ss+h.ssh+ohshINFh+NossELau.lD.shuYphuFtaIRQLAIHLRsulssps........c...............-ua+sVYNWQalauLchWucVLuth..........tspposLcsLlYPLVQlslGsh+L.lPospaFPLRhall+uLl+LS.psTusaIPlhPhLhElLsSs......phs+s.s........................+....pu.shcshDFphsl+sspuhLpo+sYp-ulh-plh-LlsEahshaupsIuFPELshPsllpL++ahKps.......+ss+as+plppLlcKlppsupaIpc+Rsp..lsFuPsspspVcsFhp-hc..hcpTPLspYhts.+ .............................................t..hhpslLKthY.shlps......s..+h...ss...spshs..hIsh.hppohsELau.l....D...shuYppuFhaIRQLAlHLRsuhstpp................K...........................................................-sapsVYNWQalpsLchWspVLut.............spps.LpsLlYPLsQ.....lhlGsh+L.lPo..s.paaPLRhphl+sLhpLu....p....so.ssaIPlhshll..E...lLpps.......php+t..s.......................................p..........ps.sh+slsFsshl+hsps..Lps+sap-.ulh-p...............lh-LlhEahsh..hup..sIuFPELshPsllp.......LKpalKps.....................+ssphs.......ptlppLlpKlppNupaIpp+R.pp..lsFuspctttVpta.pphp...ptTPLstaht........................................................ 0 102 161 232 +4849 PF03661 UPF0121 Uncharacterised protein family (UPF0121) Bateman A anon SWISS-PROT Family Uncharacterised integral membrane protein family. 20.60 20.60 20.70 21.80 20.20 19.50 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.58 0.70 -5.18 7 337 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 274 0 234 322 0 229.50 21 81.74 CHANGED .ps..s..s.s....uuhhpalhsN+l-TAhWhuRlhTlahulhalLPhlssp.uhs....hYp+sLlAsAATSALRLHQRLP.pFp..hSRtFLtphhhEDSsHYLlaSllFl.shPlohsllPVhLFulLHusoaopKlLDshG............pNShhhsR....hlshlphppQNIL+hIACsEIhLMPhslhhhFSGpuullhPFlYY+FLshRYSSRRNPYsRshFsElRlslpslAhpspCPshlp+hlhsuIsFlSRLAPssh .................................................................................................ht..........................................h.ah..phhslh.s.sh.h.ah.l..s..h.h....s.......p.....t..tt....hYphu......hl.uss.h.o......u....l...h...l...a..p.....p..h....thp.........h...s....t...t..h...l...t....p...h...l..t....-.-.sspY..L.....hh.uLh.a.l....h....s..h.....l.h..h......u.llPh.hlaSlhHsu.oas.....c.....p.hL.............sh..s.............................tp.s.t.h.tc......lhp...hVppttps....hht.h...s.AssEl........h.....l.h.h.h.h.l.h..h.lh..t......ptSh........l..h.l.Yhp.FL+hRY...pp....ssaspshatphphhl-t.hh.p...tsP.hhtp................h................................................................................. 0 77 126 188 +4850 PF04297 UPF0122 Putative helix-turn-helix protein, YlxM / p13 like Kerrison ND, Finn RD anon COG2739 Family Members of this family are predicted to contain a helix-turn-helix motif, for example residues 37-55 in Mycoplasma mycoides p13 (Swiss:O05290). Genes encoding family members are often part of operons that encode components of the SRP pathway, and this protein may regulate the expression of an operon related to the SRP pathway [1]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.21 0.72 -3.99 9 1476 2012-10-04 14:01:12 2003-04-07 12:59:11 9 2 1462 5 202 999 31 100.70 44 89.94 CHANGED l-KTh+hshLF-hYtuLLTcKQtsYhpLYYh-DhSLuEIAEcaslSRQAVYDsIKRTpchLppYEpKLpLhpKaphRpclhpclp-ph.p.t...phhc.l ..........................EKs.RMN.hLF-FY.usL...L....T...c...KQ..ps..Yl.E.LYY.h-DaSLuEIAEpasVS....RQAVY....D....N....I.K..R..T.c.....c.l.L.E.-.YEpKLc....lhp....c....a......t.R....p....p....lh...c...pl....t...phhsp..p.........h................................................ 0 92 147 176 +4851 PF03660 PHF5 UPF0123; PHF5-like protein Bateman A, Wood V anon SWISS-PROT Family This family of proteins the superfamily of PHD-finger proteins. At least one example, from mouse, may act as a chromatin-associated protein[1]. The S. pombe ini1 gene is essential, required for splicing [2]. It is localised in the nucleus, but not detected in the nucleolus and can be complemented by human ini1 [2]. 26.70 26.70 27.30 30.50 23.80 26.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.82 0.72 -3.98 19 319 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 281 1 224 212 4 103.40 71 89.84 CHANGED MSRHp.DLlhChKQPGtslGhLC-pCDGKCPlCD...SaV+P..pohVRICD-CSaG.......phss+CIlCG.......s.Gl.....s-AYYChECs+LEKDRDGCPRIlNlGSs+sDh...aap+K+ts ..............Mu+HHP.DLIhCRKQPGlAIGRLCEK.....CDGKCsICD....SYVRP.....sTLVRICDECNaG.......oYQGRCVICG.......GsGl.....SDAYYCpECTh.EKDRD..GCPKIlNLGSo+TDL...FYERKK.hu......................... 0 78 126 188 +4852 PF03658 Ub-RnfH UPF0125; RnfH family Ubiquitin Bateman A, Burroughs AM, Iyer LM, Aravind L anon SWISS-PROT Family A member of the RnfH family of the ubiquitin superfamily. Members of this family strongly co-occur in two distinct gene neighborhood contexts. In one it is associated with a START domain protein, a membrane protein SmpA and the transfer mRNA binding protein SmpB. This association suggests a possible role in the SmpB-tmRNA-based tagging and degadation system of bacteria, which is interesting given that other members of the ubiquitin system are analogously involved in protein-tagging and degradation across eukaryotes and various prokaryotes. The second context in which the RnfH genes are present is in a membrane associated complex involved in transporting electrons for various reductive reactions such as nitrogen fixation [1]. 22.40 22.40 22.70 23.30 22.20 19.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.55 0.72 -4.11 10 1334 2012-10-03 10:59:06 2003-04-07 12:59:11 9 1 1297 2 253 739 157 83.20 51 83.26 CHANGED IcVEVVYAhPc+QhLpclsVs-GsTVc-AIppSGlLphaP-IDLppsKlGIFu+slK..L-ssLKDGDRIEIYRPLlsDPKElRRc ......................................ltVEVsY.A.L.P.-..+Qh.L.hp.lsl.......p-GuTVc-AIcsSGl.L.p.h.......hs-........I........D...L.............s....p...s...K.V.G..IaSRssK.......Lsssl+DGDRVEIYRPLlADPKElRRp........... 0 53 131 196 +4853 PF03458 UPF0126 UPF0126 domain Yeats C anon Yeats C Domain Domain always found as pair in bacterial membrane proteins of unknown function. This domain contains three transmembrane helices. The conserved glycines are suggestive of an ion channel (C. Yeats unpublished obs.). 23.40 23.40 23.80 23.90 23.30 23.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.85 0.72 -4.36 177 6692 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 2470 0 1367 3826 471 80.50 31 75.05 CHANGED lhhhDhlGlssFulsGshhAhp..th......s..h.hssllluhlTulGGGhlRD.lLhsp.h..Phhhh..p.phYs....hsulhuuhl.hhhhhthhh .................hlLDhlGlssFulsGshhAh....ch...ph.........s.....h..hssllhuslTulG.GGhlRD.lL.hsc.h...Plhhh.........c...thYs.............s.s.s.hh.uull.hhhhh...hh.......................... 0 358 812 1115 +4854 PF03673 UPF0128 Uncharacterised protein family (UPF0128) Bateman A anon SWISS-PROT Family The members of this family are about 240 amino acids in length. The proteins are as yet uncharacterised. 20.40 20.40 20.80 20.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.36 0.70 -5.07 5 20 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 20 0 16 22 0 215.40 49 96.05 CHANGED MLlNTLVFETLGsPEKEREFKlKDLK+WGFDLlLGKlDGc-AYFsSchusREsGDKaocsGKEYEIcElLcELPKNs+LaA+IEhEcGpAYLhsaLREED.pNhPlL+pPAsplLhAFaKK+KLspLlKsl+uVGloT-FaKc+.GlcSlPLPYEELPPVARRFLR-ARKVEK-.sGFGRluFAYFGEs+-K-sRYRLpWLLPTIALFDl-IScKlDKsLuhLK ..l.shhlhEshGss.pERcFKhKsLKtWGaDLhhGpI-GccsYFsuchsc+ctG-p....YspcG+EYclpEsh.cElPKNs+LhA+I.hEcGpsYL..hhaLc-ED.psh.lh+.ssthlLhtFacKcKLspLlKtl+sVGloo-hhKcs.hhcuhPLPYEEhPPhsRRhLR-sRcVc+-.TGFGRhsFtYaGE.cDtptpYRlpWhLPTItLFDl-IApclDKsLuhLc.... 0 6 6 9 +4857 PF03647 Tmemb_14 UPF0136; TMEM14; Transmembrane proteins 14C Bateman A anon Pfam-B_2984 (release 7.0) Family This family of short membrane proteins are as yet uncharacterised. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.05 0.72 -3.59 61 574 2009-01-15 18:05:59 2003-04-07 12:59:11 8 12 290 3 347 543 6 92.90 30 64.14 CHANGED phhuhshuuLlssGGlhGYs+sGShsSLhAGlshGslhuhu......uahlp.......spshuhtlul.ssSs.............sLssshshR....hhpop.KhhPsslhhslusshsshhhh .................hhuhsausLlssGGlhGYh+sGShsSLhA.GlhhGslhhhu.............uahhp..........ppshshhl....uL.hsos................................sLssshuhR...........hhp.ot.K.hhPsGlhsshohhhhshh..h...................................... 0 95 186 274 +4858 PF03677 UPF0137 Uncharacterised protein family (UPF0137) Bateman A anon SWISS-PROT Family This family includes GP6-D a virulence plasmid encoded protein. 34.00 34.00 34.50 34.20 32.00 31.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.76 0.70 -4.84 6 77 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 47 0 11 42 3 230.60 45 92.16 CHANGED MupLKp.h-shFKKNppsphEshpKKchpt-.......lhssoLSspEcp+lcpLl..p+YsFcDE.hpccDltulppLouQIKpIp+QpVLLhGE+IhKVR-LL+o..apEssFSuWl.Lsaus+posYNhLsYYELFhsLP-.sLKlEhpplPhpAsYhLASRcGo.E+K.clI+phpG.opophl-Ilc+.hP.l...........ss-p+pppLScphlplL....+llspsscLSp.spslLcpLhcKlp.......spsp ................................MuplKp.h-shFKKs.p...sphpshtK+phphE.......hhssp.LSSpEctphcpLl..EcYsaucE.h.ppDltp....lphLouQlKpIp+QtVLLhGE+IhKVR-lL+s..ap-ssFSuWl.Lsaus+posYNhLsYYELF.sLPc.oL+hEhpplPhpAsYhLASRcGs.ccK.-lIcphpG.opuEllcllc+.hP.h...........ss-c+ppsLupphhphh.....+llpps.s.clSppp...LcpLhcKhp...Ksp............................. 0 5 7 10 +4859 PF03669 UPF0139 Uncharacterised protein family (UPF0139) Bateman A anon SWISS-PROT Family \N 20.90 20.90 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.48 0.72 -4.51 9 201 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 167 0 134 179 1 98.30 39 82.70 CHANGED hssssDPRRss+hpRYKPs..oss......-DhhsDYMNlLGMlFSMCGLMh+hKWCuWlAlhCSsISFANsRs.S-DsKQlhSSFMLSlSAVVMSYLQNPpPhoss ........................h..thsDPR.RssplhpYpsPssps..............-D.ssDYhslLuhlFuMsGlMh..+.hKaCuWhAlhhShhS.aANs...+s....opDs.KQ.h.....h.....SS.F..M.......hSl.AlVhoYLtsstsh......................................... 0 46 71 107 +4860 PF03686 UPF0146 Uncharacterised protein family (UPF0146) Bateman A anon SWISS-PROT Domain The function of this family of proteins is unknown. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.51 0.71 -4.31 6 77 2012-10-10 17:06:42 2003-04-07 12:59:11 8 1 77 1 56 119 15 127.40 30 93.74 CHANGED Ml-lAchIAcEst+G.KVVEVGIGhahcVActLpcpG.hDllAsDIscc...pA..pGlphhhDDlhsPslulYcuAchIYSIRPPPElhssll-lu+tVsAshhIpPLsG-.s...pphKLlNY+Gt.FYth-s ......................h....t.pts..+lVElGlGt.p.hcVAttLp.ct.G..h-.VhssDlptp.............ts....tGlp.....hhhDDlh......s....Ps...h.....plYc..sAclIYS..lRPPsELp.sllcl..A+clsuslllpsLus-t........t.h+lhsapt..hYh............................................................................... 0 10 32 47 +4861 PF03685 UPF0147 Uncharacterised protein family (UPF0147) Bateman A anon SWISS-PROT Family This family of small proteins have no known function. 21.10 21.10 21.20 27.30 21.00 19.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.87 0.72 -4.11 23 121 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 118 5 81 116 72 84.00 45 95.16 CHANGED shtss-cpl+pshthL.pcIlsDsoVPRNIRRAAs-uhctLpscppssuVRAAsAISlLD-ISpDPNMPhHsRThIWsllSpLEol+ ......h..pscpplcpslthL.ppIlpDooVPRNIRRAAs-uhctLpspsps.uVRAAsAIulL--ISpDPNMPhHsRThIWpllStLEol+. 0 23 47 65 +4863 PF03695 UPF0149 Uncharacterised protein family (UPF0149) Bateman A anon SWISS-PROT Domain The protein in this family are about 190 amino acids long. The function of these proteins is unknown. 25.00 25.00 27.50 27.40 20.60 20.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.49 0.71 -4.39 169 2253 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1277 1 349 1168 204 173.10 26 84.88 CHANGED hppLpph..Lp.......ttt.h......shsplcGhLsullsu.....stt..lss.ppWl...shlhs.......................pstt.s..pphhphlh...............phhpphtppL......tp.ts.th...................p.hhspp..ps.......tpspslppWspGFltGh...sl.tppp...................pphssp.......hpthlps...lttlupht......ptt.................spcp..tppthtplhE.hh..sshhl......ap.htttt ...............................................p.lsph..Ls.........ppu.hlssAEhcGllouhlsu......spt...h.s...ppWL...shlas......................p.pu.t.h..pchhphlh...............phhsssuppL.....p-.ts..F......................p.h...hspp..-s..........pcsssltpWspGFhtGlul.htsc......................hspl..ss-................htpulcs.....lt.lup.t.......spcp..................spcp....htpuh-cll-hl+hAsLhlashah.............................................................................................. 0 58 143 260 +4864 PF03681 UPF0150 Uncharacterised protein family (UPF0150) Bateman A anon SWISS-PROT Domain This family of small proteins is uncharacterised. In Swiss:Q9A3L8 this domain is found next to a DNA binding helix-turn-helix domain Pfam:PF01402, which suggests that this is some kind of ligand binding domain. The structure of this domain suggests that these domains oligomerise and due to structural similarities may bind to RNA. The monomer adopts an alpha-beta-beta-beta-alpha fold and forms a homotetramer. Based on the properties and functions of structural homologues of the HB8 monomer, the protein is speculated to be involved in RNA metabolism, including RNA binding and cleavage [1]. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.29 0.72 -4.33 144 2769 2012-10-02 16:06:15 2003-04-07 12:59:11 10 19 1473 7 747 2274 167 47.90 27 43.64 CHANGED pYssllct...p-css....ahsphP-ls....ush....opG-Th-EAhpphp-Alphhlps .................Ysshlp.....tccsu....ahsp..hPDl.s........ush....opG-.Th-EAhppsp-Althhlt............... 0 231 511 642 +4865 PF03692 CxxCxxCC UPF0153; FliB; Putative zinc- or iron-chelating domain Bateman A anon SWISS-PROT Family This family of proteins contains 8 conserved cysteines. It has in the past been annotated as being one of the complex of proteins of the flagellar Fli complex. However this was due to a mis-annotation of the original Salmonella LT2 Genbank entry of 'fliB'. With all its conserved cysteines it is possibly a domain that chelates iron or zinc ions. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.35 0.72 -11.12 0.72 -3.51 35 4094 2009-01-15 18:05:59 2003-04-07 12:59:11 10 5 2313 0 1272 3041 657 89.40 24 56.80 CHANGED hCps....C.GtCCh................htlptpDhtcl.thsth......................................tChhLc.tcst...pCpl.....Yp..pR....PpsC+hhPh....................tththhs.sCs .....................................Cps....C....Gt.CCh..........................................hp..l....t.p..-...ht.c.l....th..t...t...h..............................................................................................sC......hhL.s.....tcst...........pCpl.....Yp.....pR......PpsC+hh.h............................................................................................................ 0 397 784 1044 +4866 PF03672 UPF0154 Uncharacterised protein family (UPF0154) Bateman A anon SWISS-PROT Family This family contains a set of short bacterial proteins of unknown function. 20.90 20.90 20.90 21.50 20.80 20.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.03 0.72 -4.35 5 1101 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1097 0 123 358 3 64.60 49 82.30 CHANGED LsIVLuLLlGlhLGaFISpKhMK+hLKKNPPINEstlRhMhtQMGRKPSEsQINQlM+uhpsQp ...................lllVlAL.l...sGh....lG...GFal.ARKhhpcal..pc..NPPlNE-MlRhMMhQMGQKPSp+KlpQhMptMp+Q.t.... 0 36 71 96 +4867 PF03693 RHH_2 Uncharacterised protein family (UPF0156) Bateman A anon SWISS-PROT Family This family of proteins are about 80 amino acids in length and their function is unknown. The proteins contain a conserved GRY motif. This family appears to be related to ribbon-helix-helix DNA-binding proteins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.58 0.72 -3.81 5 1166 2012-10-02 18:44:02 2003-04-07 12:59:11 9 7 788 2 292 822 140 77.80 31 88.88 CHANGED MuKNTSVsLGEHFsuFIDuQVQuGRYGSASEVIRSALRLLE-pETKLcALRsALIEGEcSG-ucsFDhDuFlsE+cpcssp .......................h.olsL.s-chcpFIcshlp..SGcYsotSEVlR-u...L....R......L.L....c....c....+....E....s.....c....l....p....u....L...R.p....h.l..t...pG....h...p....S...G.......s....p...h....p..p.h.........pth.......................................... 0 68 163 215 +4868 PF04229 GrpB UPF0157; GrpB protein Kerrison ND, Finn RD, Eberhardt R anon COG2320 Domain This family has been suggested to belong to the nucleotidyltransferase superfamily [1]. It occurs at the C-terminus of dephospho-CoA kinase (CoaE) in a number of cases, where it plays a role in the proper folding of the enzyme [2]. 20.10 20.10 20.40 24.40 19.70 19.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.88 0.71 -4.53 107 1502 2012-10-02 22:47:23 2003-04-07 12:59:11 9 16 1107 1 300 1132 38 162.00 27 79.16 CHANGED tttlplss.ass.pWsppapptppplpshl....usphl.......plcHIGSTuV......PGLsAKPlIDlhlsVpshpshc..ph.spsLpshGY.h.ht.c.sh.t..............+chFh+......................sp..+stplHlhtt..uss.phpcpLhFRDaLR..scspttppYtplKppLu...tphsp...shptYssuKs..salpclh..pcA .............................t...l.lhsass.pWtppapc.pptltshl....spthl.......................plcHIGSTul......sslsAKP.IIDIhlt..Vp.shp..p.h.s..ph..tctLpt..l.G....Yhh..t...c...shsp..................+h.h.ht+.........................ttttcsh+lHlhth..sst..phpppLhFRDaLp..spsphsppYsplKppLs......tphst.......shppYspsKssalpplhpc..................... 1 98 188 247 +4869 PF03682 UPF0158 Uncharacterised protein family (UPF0158) Bateman A anon SWISS-PROT Family \N 20.40 20.40 20.60 21.10 20.20 19.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.96 0.71 -4.62 3 182 2009-09-11 06:49:59 2003-04-07 12:59:11 8 7 173 0 63 158 12 143.10 24 66.17 CHANGED QNPLlLRlcRLM-AFAKSDDERDFYLDRlEGFllYIDLDKPQsELDAL.pELEENuDRYCLIPKLSFYEoKKIMEsFVNEKVYDIDTKEKLL-IlQSKcAREsFLEFLYDHcoEQEKWQQFYsERSRIRIIEWLRsNcFQFVFEEDLDhP+pLLEpLK+sLFs .............................................................................l.h.pl....A..ht.s...s...th.p..haLDh.pG..lh.hl....s....-......................s.....t..-h.....-....th..............p..-.........l.....-...p.............s......s...-.R.Yh.hlP.phs.......h..p.t.hplMcsFlpp.l..t.-.ch+ppL..hpsl.p.u.+s.u.acpF+chlh.-.a.p..hccWhpapscph+thhh-.WLcpptht...................................................................... 1 29 50 57 +4870 PF03690 UPF0160 Uncharacterised protein family (UPF0160) Bateman A anon SWISS-PROT Family This family of proteins contains a large number of metal binding residues. The patterns are suggestive of a phosphoesterase function. The conserved DHH motif may mean this family is related to Pfam:PF01368. 20.70 20.70 22.10 23.00 19.90 20.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.07 0.70 -5.01 69 570 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 459 0 317 545 50 288.60 36 89.24 CHANGED hpIuTHsGsFHsDEsLAshhL+h.hstat.............sAcllRoRDsphl.spsD..lVlDVGGhYDs...................pppRaDHHQ+sFsthh........p.thsp........+LSSAGLlapHFG+cllpphht.........................hscpp...lphlapplYpsFlcslDAlDNG..lsph...................t...ta..sshoLuuhluphN..PsWs-.t.......t.tsp-ptFtpAhphsup.hhptlcthstshlsA+slVtpAhpptht...sGcIlhL.sp.hsPW+caLaplEp-tth.................h..aVlascs....spWRlpsVsh.pssoFcsRtsLPcsW+GL+D.c-Lsph..........oGIs.GslFsHsuGFIGGscohEusLpMAch.uLp ................................hluTH.sGsFH............sDEsLAshhL+h..ls..tap..............sucl.lRT.RD..s...phl..sps-................lVlDVGG.YDs...................tppRaDHHQ+sFstsh..........tthts.......+LSSAGLlap.HaGcplltphht............................sptp..l..phlapplYp.sFlcslDAhDN.G.ls.h...................t....tat.hshsLushluphN.........PtWsp.t...........................tsp-ptFtpAhphstp.FhptlphhstsalsA+slVt.......pAh.p.p.phph.......p...sGcIlhL...sp..hsPWKc+LaplE..p-..ht...........................hhaVlascp....tspWRlpsVsh.p.so.Fp.sRhsL.Pc.sWRGL+D.cpLsph............oG..Is.GslFsHsuGF..IG.G...scohEuAlphAphuL...................................... 0 126 196 269 +4871 PF03687 UPF0164 Uncharacterised protein family (UPF0164) Bateman A anon SWISS-PROT Family This family of uncharacterised proteins are only found in Treponema pallidum. They contain a putative signal peptide so may be secreted proteins. 25.90 25.90 26.30 26.20 24.40 25.80 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.91 0.70 -5.90 6 67 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 30 0 13 55 29 294.80 33 72.18 CHANGED sl....thhshat+suthssuhuhssslushAuusu.h.otpc.Kthu+.....Rsl.lPoGGRapsLssuFTALAsDASFFEANPAGSAshs+sELuhFHsstlssSHsETLSaVtpsGphGYGsShRsFaP-.shp..hu.p............KspGhlu....IhNhu+tF.utYRFKGlSlGuNlKsGaR......................supp.pHlsVsuDlGLphshsVAKsFuScEPNhalGLuh+NlGhoVKs........................................ssssssts.ssstssHsTsohltlGFAYRPlpaFLFulGlphthNVpslpsss..hhuhuFhh..hp.hsh.sshhhpG...tt.thouGuEhp.ssh+ls ................................h......hh.h..hhhshhsh...ssthsu........stt..p.pt.sc.....t.l.lsoGG..RhphLssuFTALAsDASFFEANPAGSAshscsElu.hFHsstlssSHh-Tluastptsph.G.YGsS...h+hFas..sh....h........................pthGhlu....IhNhu+th.ttaRFtGlSlGsNlKhGaR..................................pupt.pHlslsuDlGLphshsVuKsFuSpEPNhalGlshpNlGholps....................................................sstpsc.shsohlhhuhAYpPlphFLFuhGlph.hNlpsl.tt.......t.....p......p....hhuhuhhh.shphlsh.uuhhhpu....tphRhusGuEhphsphpl........................................................................................................................................... 0 8 12 12 +4872 PF03691 UPF0167 Uncharacterised protein family (UPF0167) Bateman A anon SWISS-PROT Family The proteins in this family are about 200 amino acids long and each contain 3 CXXC motifs. 25.00 25.00 28.60 28.30 24.60 24.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.50 0.71 -4.86 11 334 2009-09-11 05:14:47 2003-04-07 12:59:11 9 1 317 0 27 155 5 175.40 52 89.45 CHANGED hp....LPpF+YHPc..PlsoGuhcps..sssCsCCspspshhYoGshYsh--lp..slCPWCIADGoAAc+a-GsFpDDhslpss................sspphlcElhcRTPGYsuWQQEhWLuHCsDhCAFlGhVGhsEltsL..psl-slhs-ht....sutchpcLhptLs+sGp.hsuYLFpClaCGpahhahDhu .............sh..psLPpF+YHPc..PLpTG.uF-pD..sVpCsCCcQps..slhYoGPhYsh.....-E.l-...aLCPWCIADGSAAcKFsGoFpDDssl-..ss-.......................hP-EhlcELlcRTPGYpGWQ.Q.EaWLuHCGDaCAFlGaV.G.hs-lcDh...Dt.....hssLccDhc......huh+.p.-ltcsLp+sGc...spGYLFRCLHCGKh+LauDFp........................................................................ 0 10 17 22 +4874 PF03666 NPR3 UPF0171; Nitrogen Permease regulator of amino acid transport activity 3 Marshall M anon SWISS-PROT Family This family, also known in yeasts as Rmd11, complexes with NPR2, Pfam:PF06218. This complex heterodimer is responsible for inactivating TORC1. an evolutionarily conserved protein complex that controls cell size via nutritional input signals, specifically, in response to amino acid starvation. 27.50 27.50 27.60 28.20 27.40 27.20 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.54 0.70 -5.95 19 428 2012-10-01 20:21:22 2003-04-07 12:59:11 8 4 229 0 267 413 2 250.80 23 57.72 CHANGED GFsschLsplLsPt+phCNp+FElslDslhFlGhPlahtcsGpW+ppcppp...t.ttp.......................................ts..ppttpstp......................t.csshsMFplVFlhN.P.......hhEhs.+lccMacallp+lulsL+apQs+psYVt+EsctILpl+-...th.cpptthps..........hhppllppSSLA+slp-sacuIspscIAsLplss....hhhShQIPhpsch.pLPp.pl.Phh.uoaLoo...................................ts..p.p-th.p...............pthl..aauLLLLcDspsllp-lts.ssss.....hls...................chl+hhpP....shSlhplup..............................ssslsh.splcphAhHLlYWR+ARlI.PLss+ssYlVSP.Aslp..............................pL.pstptFpppFPshPoLPpFLshLS...tpP+sauslIP...S+-H+slYhphLuWLlRaGaVTQLpTFlalhlspcIKhc .....................................................................................................................................................................................................................................................................................h...h..l..phshhh...p.p.tal.pp.t.h....tp...................................................................................................................h...h......................................................................................................................................................................................h....shhhh..t.............h.................h..............................................hhp..ps.......sh..ht....................................h......h..hs.ahh.htpAhhl..lp...p...hahhss...................................................at..F..s...................L..hlt.hs.................s.......ht..hh.............................................h.hl..hhht.thh...................................... 0 88 131 210 +4875 PF03665 UPF0172 Uncharacterised protein family (UPF0172) Howe K anon SWISS-PROT Family In Chlamydomonas reinhardtii the protein TLA1 (truncated light-harvesting chlorophyll antenna size) apparently regulates genes that define the chlorophyll-a antenna size in the photosynthetic apparatus [1]. This family was formerly known as UPF0172. 24.40 24.40 24.60 25.70 22.60 24.00 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.95 0.71 -4.82 22 258 2012-10-10 14:49:21 2003-04-07 12:59:11 8 3 170 0 158 241 5 183.70 34 92.45 CHANGED hsplplospAYsKhhLHuhKYPpsuVNGlLluc..........................sspssp............lhloDslPLFH..tplsLsPhLElALspl-sasppps.hsIlGYYpANpph.sDss.ssl.At+lu-+luc....phssAsllhlcNpcls.sscsssht..................lap...ppts+Wspsc.t..hhtptppspchlpph...lpsttapplsDFDsHL-chppDWhNppLsp .........................................phclospAYsKhhLHuuKYP.p.sAVNGlLlup.......................................................pp..p..ss..p............lhl.sDslPLFH..tpLsLsPMLEl.ALs.......l-s.a.......s.p....p.......p.......u......hsIsGYYpAN...pph..pD..ss....s..s.l..AtKlAs+Is-..........tas.sA.sllh.lDNp+hs..p.ts..sshh..................lhp......ppss+Wpt..pc........t.....h......hp.p..tps....pp..hsuph...lcsps..appLlDFDsHLDDlppDWhN.tl.............................. 0 63 85 119 +4876 PF02476 US2 US2 family Mian N, Bateman A anon Pfam-B_2256 (release 5.4) Family This is a family of unique short (US) region proteins from the herpesvirus strain. The US2 family have no known function. 25.00 25.00 30.80 29.80 19.50 18.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.97 0.71 -3.65 15 202 2009-01-15 18:05:59 2003-04-07 12:59:11 10 2 32 0 0 136 0 150.10 66 66.65 CHANGED hLsSshsss.psaHLWVlGAADLCtPsl-plsss+...RLlssclsssWs.GtsW.lPs.......phtshhTss.........Ws.........Ph.sssps...ltss.sshchhYullss...................s.h.Psssspssspsssps.....spssCsss ....HLNSSLIIN.QPYHLWVLGAADLCKPVFDLIPGPK...RMVYAEIADEF..HKSW.QPPFVCGKLFETIPWTTVE..................HNHPLKLRAAGGEDTVVGECGFSKHSSNS..LV+PPTVKRVIYAVVDPARL.......REIPAPGRPLPRh......R.....PSEGGMRAPRRRSRA..PAPARSTAs..AAs.......................................................................... 0 0 0 0 +4878 PF03683 UPF0175 Uncharacterised protein family (UPF0175) Bateman A anon SWISS-PROT Family This family contains small proteins of unknown function. 26.40 26.40 26.50 26.50 26.30 26.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.23 0.72 -4.51 47 419 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 182 0 161 473 43 71.90 24 80.33 CHANGED plPctlh.shp.s.pth.pEl+htlAlpLYppstlShG+AAclAGl.o+hcFhchLuc+sls.hp..hs.c-LpcDlpss .....................................th...ht...t.t...pch+hthAlpLY...ppsclShupAAclAGh.sch-Fh.phLtccsls.hp...h.s..--lpp-lp..h.................. 0 47 129 154 +4879 PF03698 UPF0180 Uncharacterised protein family (UPF0180) Bateman A anon SWISS-PROT Family The members of this family are small uncharacterised proteins. 22.30 22.30 22.60 27.90 22.10 22.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.42 0.72 -4.14 18 218 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 204 0 53 136 0 78.90 47 96.57 CHANGED hp+.IuVEpuLoslp-tL+p+GY-Vlplc....spp......chpssDssVVTGhDsNhhGIpDssTpu.sVIcAsGhTA-ElsppVEp+lp ......t+IGVEsoLo.cVppALpppGaEVVsLp.....scp.......DspuCDssV...VT...G.......pD........o.N...h..hGI...sD.ssh.cu.sVIsApGhTs-EIsppVEsR..t.................................... 0 25 42 46 +4880 PF03701 UPF0181 Uncharacterised protein family (UPF0181) Bateman A anon SWISS-PROT Family This family contains small proteins of about 50 amino acids of unknown function.\ The family includes YoaH Swiss:P76260. 22.20 22.20 22.70 22.20 21.10 22.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.51 0.72 -4.66 18 721 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 718 0 66 197 1 51.70 70 86.32 CHANGED Mh.sshPuLoHEpQQpAVE+IQcLMucGhSSGEAItlVApElR.Ep+ppcppst ....MF.AGLPSLTHEQQQKAVERIQELMAQGMSSGpAIAlVApELR.AsHoGE+I.VA...... 0 4 20 46 +4881 PF03670 UPF0184 Uncharacterised protein family (UPF0184) Bateman A anon SWISS-PROT Family \N 20.90 20.90 20.90 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.81 0.72 -3.81 3 137 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 118 0 63 90 1 73.80 36 65.06 CHANGED MSGPNGDsshSVEDGups--D-FGppEYEAINSMLDQINSsLDcLEERNDcLpu+L+ELLESNRQsRLEFppQLu.cAPp-uSs ..........................................................................................p...p..hAtlNSpLDQLNSsLD+LE-+...sDHLcupL+pLhp........................pt......................... 0 17 22 38 +4882 PF04050 Upf2 Up-frameshift suppressor 2 Wood V, Finn RD anon Pfam-B_14721 (release 7.3); Family Transcripts harbouring premature signals for translation termination are recognised and rapidly degraded by eukaryotic cells through a pathway known as nonsense-mediated mRNA decay. In Saccharomyces cerevisiae, three trans-acting factors (Upf1 to Upf3) are required for nonsense-mediated mRNA decay [1]. 25.00 25.00 25.00 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.20 0.71 -4.49 16 283 2012-10-11 20:00:59 2003-04-07 12:59:11 9 11 232 2 203 287 0 170.80 27 16.56 CHANGED cussSss--Gh.....---shs-t-c-p-....So-Ec-sstsssppst.....co-uE-EplhVoRpp--hDPEsE..AEFDREF-KMMu..................ESh-SRKaE..++ssFDlPLPM+htscsss.......ssspsssE......tssssssTMsFoLhTK.KGNKQQTRsl-lPSDSohAhuM+sQQpA-pEEQQRIKpLVLN ................................................................................ttts.......ps........-ppt.......p.t...-...p-cp...........ppc.-p.p...p.pt..p....-..ttppp.......................psp.p-.cpp.hh......l...pt...t.t.......p..hs...s.p...t-....t-F.ppth.pKMht....................................E.u..h......p...p.RphE....+h..tt.....h..Dl....s.l.......Ph..phpsptpp...........................................ts..t-..................spstsssshsFslLo+...+GNK.QQ.........h..+plplPssSphAhshhpp.ppA...-p-E+p+lKpLsLp.................................. 0 60 102 165 +4883 PF01255 Prenyltransf UPF0015;UPP_synthetase; Putative undecaprenyl diphosphate synthase Finn RD, Bateman A anon Prosite Family Previously known as uncharacterized protein family UPF0015, a single member of this family Swiss:O82827 has been identified as an undecaprenyl diphosphate synthase [1]. 20.60 20.60 20.90 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.40 0.70 -5.22 94 5931 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 4761 48 1691 4210 2997 213.10 40 85.11 CHANGED I.MDGNtRWA+p+....shs.pttGHptGspslccllchshch..GlchlTlYAFSsENa.pRspp.EVshLhpLhpphl.pc.hpphp...p...psl+l+hlGch.stL.sppltptlpcspptT.p....sss.shpLslAlsYGGRpEIh..........cAs+plh...............................................pth.tspls.pp...................................................lsc..ph....l...pptLhs..ssh.P............s...DLlIRTSGEpRLSNFLLWQsuYuE..laFscshWPDFsttchhpAlppYptR...........pR+FGt .............................................................................................IMDGNGRWA+p+....s.h.s....Rs....h....GH...........+tG...hc.sl.cchlphst.ch....GlchLTlYAFSo...ENW.pRP.pp.EVshLMpLhhphl.cp..l.tph.......p....c.....psl+lch.l.G.ch...s..p..L..sppltctlppu.....p...p..h.T...t......................s.Ns....u.......lpLsl.AhN....Y....GG.RtEIs..........pAs.+pls......................................................................................................pp..s...t..p...u.p....l.p....s...p..c..............................................................................................IsE....ch....l.spaLhs....ssh..P...........cs.DLlIRTSGE...p.Rl..SN.FLLWQhAY...uE...h.............aFo.-..sLW......PDFscpc.hhpA.....ltpapp.R.cRRFG.s.............................................................................. 1 575 1096 1447 +4884 PF00449 Urease_alpha urease; Urease alpha-subunit, N-terminal domain Finn RD, Griffiths-Jones SR anon Prosite Domain The N-terminal domain is a composite domain and plays a major trimer stabilising role by contacting the catalytic domain of the symmetry related alpha-subunit. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.56 0.71 -3.97 117 1905 2012-10-03 00:45:34 2003-04-07 12:59:11 15 17 1537 48 464 1638 555 117.10 58 20.98 CHANGED hclsRptYAshaGPT....sGD+lRLuDT-LhlEVE+DaT......sY............G-EspFGGGKVIRDGMGQuptsssst......sl..DhVITNAlIlDa.hGIlKADIGIK-G+IsuIGKAGNPDh.sGVs................lllGsuTElI .........................h.ploRptYAshaGPT....sGD+lRLuDTsLhlElE+Dho.............sY.................G-Esp.FGGGKsIRDGMGQuptsst-t.........shDhVITNAlIlDa..h.........G..I..lKADIGIKcG+IsuIGK.AG.NPDl...sGVs................lllGsuTElI.................. 0 139 288 388 +4885 PF00699 Urease_beta Urease beta subunit Bateman A, Griffiths-Jones SR anon Pfam-B_405 (release 2.1) Domain This subunit is known as alpha in Heliobacter. 25.00 25.00 36.20 30.30 19.70 18.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.02 0.72 -4.16 165 1820 2009-01-15 18:05:59 2003-04-07 12:59:11 15 16 1525 48 471 1322 409 99.30 54 51.53 CHANGED IPGElh.s...ssGc...IpLNsGR.p..slslpVsNoGDRPlQVGSHYHFhEsNsA.LpF.........DRptAhGhRLsIsAGTAVRFEP.Gpp+pVpLVshuGpRplaGFsuhltGtL ..................lPGElh....h...ts..s.-......IplNsG....+...t..shslpVtNoGDRPlQVGSHaHFaEsNsA.LpF.............DR....ptA.....hGhRLDIsAGTAVRFEP.Gp..p+pVpLVshuGpRplaGFpuhlsG.......................... 0 134 289 393 +4886 PF00547 Urease_gamma urease_gamma; Urease, gamma subunit Bateman A anon SCOP Domain Urease is a nickel-binding enzyme that catalyses the hydrolysis of urea to carbon dioxide and ammonia. 23.90 23.90 24.10 24.70 23.80 23.80 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.08 0.72 -3.97 85 1774 2009-01-15 18:05:59 2003-04-07 12:59:11 13 11 1506 51 477 1161 409 96.50 60 54.59 CHANGED McLoP+Ep-KL.llasAupLAc+R+sRGLKLNaPEAlAlIostlhEG.ARDG.+..........oVA-LMshGpplLsc--VM-GVs-Mlp-lQVEATFPDGTKLVTVHsPI ..................McLTPREpDKL.hlhhAA.lAcRRpuRGLKLNaPEAlAlIostllE..........G.....ARDG.+................oVAELMphG..pplLs.................+-DVM-GVs-MIs-lQVEATFPDGTKLVTVHpPI.............. 0 139 295 399 +4887 PF01774 UreD UreD urease accessory protein Bashton M, Bateman A anon Pfam-B_1109 (release 4.2) Family UreD is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid [2]. UreD is involved in activation of the urease enzyme via the UreD-UreF-UreG-urease complex [1] and is required for urease nickel metallocenter assembly [3]. See also UreF Pfam:PF01730, UreG Pfam:PF01495. 20.40 20.40 22.20 22.00 18.20 18.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.25 0.70 -4.85 163 1645 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 1448 2 449 1350 498 205.80 23 73.12 CHANGED pshllpsu..GGlluG.Dplslslplsssu+shlTT.tuAsKlY+..........................u.sut...............................up.QphplplsssAtL-aLPp-sIhFssuphppphplcL...........sssAphlhhEhls...hGRs..u.tG..................................Epa........shsph.csphclhp.....ss..........c..l.hh.-phtL.psst.........tthss.ssh.........suhsshuollhhu...................ts.......p.th.hptl+..thh.......................t.th.hGs.ohh........sshlllRhLusssps...l+phh ..............................hhllssuGGlluG.Dchplslpl.pss..upshlTo.puAoKlY+.............................s..sst.................................................up.QphplplsssuhL-alPpssIsap.sAchtppsplpL......................pssupllhh-hlshGRs..s.pG..................................Eta........phsthpsphclhh.......cs....................ch...l....hh..-phhL.sssp.............t.hst...shh.................................................tshshhuolhhls.................................................................p.thhptlp....thh.........................t.sh..th.uh.otl............sshlhlRhhu.pst.lpth.h................................................................................................... 0 123 268 370 +4888 PF05194 UreE_C UreE urease accessory protein, C-terminal domain Finn RD anon Pfam-B_6279 (release 6.1) Domain UreE is a urease accessory protein. Urease Pfam:PF00449 hydrolyses urea into ammonia and carbamic acid. The C-terminal region of members of this family contains a His rich Nickel binding site. 21.90 21.90 21.90 21.90 21.70 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.28 0.72 -3.88 99 1267 2009-09-11 21:12:50 2003-04-07 12:59:11 7 3 1165 35 255 871 383 91.80 29 53.94 CHANGED sEslhplpu....ss.hpLs.........+hAaHLGNRHlPh......pltss.....tlhlt...tD+VlccMLct...........L.G.....hplpphptPFpPEsGAY..................tt...ctHs..........................HsHs ........................Ecllhlps.....p.sh.hphu.........clAacLGNRHlPs............plpps..........clhl.........hDtll.cchLcp..................................L.G.................hss.pctct..Fpstttsh..............................................sHsH....................................s................................................................................ 0 59 142 201 +4889 PF01730 UreF UreF Bashton M, Bateman A anon Pfam-B_2037 (release 4.1) Family This family consists of the Urease accessory protein UreF. The urease enzyme (urea amidohydrolase) hydrolyses urea into ammonia and carbamic acid [2]. UreF is proposed to modulate the activation process of urease by eliminating the binding of nickel irons to noncarbamylated protein [1]. 20.50 20.50 20.50 23.20 19.70 20.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.52 0.71 -4.08 171 1651 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 1481 11 435 1261 476 150.00 22 63.76 CHANGED hVpc..tsslpsWlpshLppshspsDsshLttsac.uhtt.........sDh......ttltplsphhhAtptotEhRtpspphGtuhhclhsp.......................h.tts.....ssh.................................................sasluauhsutthslshppsltuaLauhlpNhlsAAl+llPLGQssuQclLtpLps....h ..........................................................................h.lpcttshttalpthLp...phs.hs-t..hhlt.tsac...Ah..tt.............................sDh.......ttlhclsphhhAp.p..s.cEhRttspphGpph..hcl..htph.....................................htp.thtptp...........sps....................................................................shslshuhhut..............thuls.........hcpsltsahauhspshlpAAlRhlPLGQhsuQcllhplt..h.............................. 1 117 257 356 +4890 PF04115 Ureidogly_hydro Ureidoglycolate hydrolase Wood V, Finn RD anon Pfam-B_9183 (release 7.3); Family Ureidoglycolate hydrolase (EC:3.5.3.19) carried out the third step in the degradation of allantoin. 20.60 20.60 20.60 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.42 0.71 -4.90 81 1067 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 957 8 269 684 700 164.00 40 89.59 CHANGED h.pp..lp..scPLTt-AFAPFG-VI-s.p................uss.......shhINpGpstRaH-lApl-s.s........su+shlSlFcu.pPpsLP...............................hplchlERHPhGSQAFlPlss.ps..................aLVVV.As..ss.............................................................ssss.sph+AFlssssQ....GVNYt+GsWHtsLhsLs....tsucFhVV......DRh.Gs.....us...Nh-Ehhhsp.shtlp .................................................................h..pLpl.PLopEAFusaGDVIEs..p..........................ttc...........hhhIN.sGhspRaH.DLAhl-hht...................psRslISlhRu.pP.ts.hP.................................................lslchLERHPhGoQAFlPhpu.cs...........................................FlVVV.A....s.s..............................................................-tP.c....uslRAFl..ss..GpQ....GVNYc+sVWHH.PLhuhp....pssDFlsl........DRu.us.........NC-..ths.......t.................................. 0 68 139 207 +4891 PF01014 Uricase Uricase Bateman A, Griffiths-Jones SR anon Pfam-B_1333 (release 3.0) Domain \N 25.00 25.00 26.00 25.80 24.30 23.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.72 0.71 -4.09 122 880 2012-10-01 20:59:24 2003-04-07 12:59:11 13 3 380 220 499 906 5 136.40 24 88.46 CHANGED php.GhsslplLKss...........+hosh...pc+lhpssVssphph.....t..t.sh.hpucs....ssh..shsss..................pcsoltsaAt.......ssSlpphhhphupchL.sphsp.lpslplplssp+ahc...l........sh.pG...........................cu.lhtsscp.puhlps ..........................t.hp.GhsslpVlKss...........+.ssh...tccl.hshsVssphph..................p..htt.sh...hpucs......ssh...shscs..................p+sslhsaAt..s.....ssSlp..phhhplupchL..sph..sp..lppsclpls.s.tcahp..h.......................sh.tG...........................t.....scu.lhhsscp.hs.lp............................... 0 154 286 410 +4892 PF01208 URO-D Uroporphyrinogen decarboxylase (URO-D) Finn RD, Bateman A anon Prosite Domain \N 23.90 23.90 23.90 23.90 23.70 23.80 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.88 0.70 -5.52 173 4750 2012-10-01 21:20:02 2003-04-07 12:59:11 12 17 3576 28 1416 3843 3275 317.70 30 91.34 CHANGED ht.........pcthlpAhpG.cs.s-..........+sPlWhMRQAGRhlsEYpth+t..stsah-hs.psP-lsuElolpPhcpas..hDAuIlF.....uDI.ll.scAhGh.plphhp.sp......G..Phlt....pslp..........s....p-...lp..pL...........ph.ts......t..lshVh-ulphl+c....cl.....ss....clPLIGFsG...uPaTLAsYhltGtssp..shpph+phha......pcPchhcp.Llchls-sshpYlpsQl.cAGAp...slplFD....oauuhLuspp....accaslPah.p+lhstlcphh...............thPlIhaspGs.....s...hlpths..c....sG.s-...s.lul...................Dhps.......sl....stsp..........................................ph.....slQGNlDP.shLh....us.ctlcp.......cspchlc...........httttsaIhNLGHG...lsPpsss-slpthl-sl+ph .....................................................................................................................h..pphhlcAhht..p...ss...................hsPlWh...MRQ............AGRhh..........PEYpth+t...ths..hh.ph.s....pss...-..l..ssE...lT.l..pPlcpas....hD.A.A.IlF.............SDI..hs...p.u.h.Gh.sl...th...t..ut..........G......P..hh.p......pslp........s..........htD....lp....pL...................................t..ss..............clshVhculchl+c...........cl......tt..........................cl.P...L.I..G...F..s.G.........u..P..a..T......L.A.s.Y.hl..EGts.........S+sapph....+.t.h.h.a......pcP.p....h...h+t.L.Lc.pl.scssh...t...Y.Lp........s...Q......l....c......A......G...A.p.................u.l.l...FD.............oW...u..G....h....L...s....s.......ps..............a.ppFsh.sah.p+l.l.stlp..ppts............................PlllF.s.p.Gs................u.t....hlp.ths.....p........sG..sD.......s...lG...l.....................D.W..p..s.........s.l......cstc......................................plu..sphsl...Q.G......N.h..D...P......s.h.Lh....u....s........p...t...l.cp..................clppllp...........hststG.alh.NLG....H..G...l...h..s..p...ss..sE...plpthlctV+p.h.............................................................................. 0 553 997 1240 +4893 PF01175 Urocanase Urocanase Finn RD, Bateman A anon Prosite Family \N 23.20 23.20 23.20 23.50 23.10 23.10 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.66 0.70 -6.47 57 1993 2009-01-15 18:05:59 2003-04-07 12:59:11 13 6 1787 16 520 1576 698 525.80 53 93.68 CHANGED .l+....As+G.....spLs..sKuWhpEAshRML.NNLDP-VAEcPc-LVVYGGhG+AARsWpsa-tIlcsLccLcsDETLLlQSGKPVGlF+THtsAPRVLIANSNLVPcWAsW-cFpcL-ptGLhMYGQMTAGSWIYIGoQGIlQGTYETFstsuRp+aG....G........sLpG+hhLTuGLGGMGGAQPLAssMsGusslslElDpsRIc+RlcptYLDchscsLD-AlthhccApppt.cslSlGLlGNAA-lhscLlcR.....Glh.....PDlVTDQTSAHDPlp.GYlPtGhol--hpchpp..p-P...pthhptucpShscHVcAMLthpptGs.sFDYGNNIRphAh-t.Gl..csA...........FcaPGFVPAYIRPL.FCcGhGPFRWlALSGDP-DIh+TDptlhELhP-sc+L.............ppWlchAc-+ltFQGLPARICWlGhG......-Rt+hGLtFN-MVtsGELpAPlVIGRDHLDsGSVASP.RETEuMtDGSDAluDWPlLNALlNsAuGAoWVSlHHGGGVGhGhS.HuGhVlVsDGT-tAscRlpRVLssDPuhGVhRHADAGY-tA.hpsA+-.ps........lclPhhp .........................................h.l+A.pGsplp..sK..uW.sEAshRMLhNNLDP-VAEpPc-LVVYGGhG+AARNWpsa-tIlcsLcpLpsDpTLLVQSGKPVGlF+THpsAPRVLIANSsLVPcWAsW-+Fp-L-ppGLh.MYGQMTAGSWIYIGoQ......GIVQGTY.........ETFspsu.RpHas........................G......sLpG+hhLTuGLGGMGG.AQPLAushAGusslslEsDpoRI-hRlcptYl.Dchsss.LD-ALshh..................pctpc.pt.cslSluLhGNAA-lh.cllc+........slp.....sD.llTD..QTSAHDPls.G.YlPtGhohE-hpphtp...p..D..P..pthhctucp..SMspHVcAMLshppt.Gs.sFDYGNNIRphAh-.t..Gl..csA...................FcFPGFVPAYIRPL.FCcGhGPFRWsALSGDP-DIh+TDttsp-.lhs.-.s.c+L............................apWlchAcE+ltFQGLPARIsWlGht...........................pRt+lGLAFNEMV+sGElp.APlVIGRDHLDsGSVASPNRETEuM+DGSDAVuDWslLNALlNoAuGAoWVSLHHGGGVGMGaS.HuGhVlVsDGo-cAscRlpRVLtsDPusG..VhRHuDAGY-hA.lcsApE.ps........lplPh..t............................... 0 165 289 419 +4894 PF02083 Urotensin_II Urotensin II Mian N, Bateman A anon IPR001483 Family \N 18.60 18.60 19.30 19.30 16.80 15.80 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.31 0.73 -6.26 0.73 -4.32 8 70 2009-09-10 16:31:46 2003-04-07 12:59:11 10 1 37 0 27 66 0 11.70 67 10.48 CHANGED ssss-CFWKYCV .....tsps-CFWKYCV 0 2 5 12 +4895 PF02393 US22 US22 like Bashton M, Bateman A, Zhang D, Aravind L anon Pfam-B_1016 (release 5.2) Family US22 proteins have been found across many animal DNA viruses and some vertebrates [3]. The name sake of this family US22 Swiss:P09722 is an early nuclear protein that is secreted from cells [2]. The US22 family may have a role in virus replication and pathogenesis [1]. Domain analysis showed that US22 proteins\ usually contain two copies of conserved modules which is homologous to several other families like SMI1 and SYD (commonly called SUKH superfamily) [3]. Bacterial operon analysis revealed that all bacterial SUKH members function as immunity proteins against various toxins. Thus US22 family is predicted to counter diverse anti-viral responses by interacting with specific host proteins [3]. 35.00 35.00 35.10 35.00 33.50 34.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.58 0.71 -4.10 36 718 2012-10-01 20:46:44 2003-04-07 12:59:11 11 4 53 0 8 635 0 131.60 19 44.51 CHANGED sttshtslpphsccppGpplslstst.....thhlhlsshpshh..............tttthpphttthl.spspphhllGhlsp..s............................................................................................................hsphllllsptGpVYsacs..............cplahlA.sslppFhctGlhphtthh ...................................t...s.ttlpphlpchpGpplsLthPt.....shhlhl.s..stpphh...................stphhpphhpt..hh.s..tsp....th.hslGslsthts....t......................................................................................................................................................................................psphl...llls.ptGpVasac.s.tp..............sp.lahlA.cslppah+.hGlhph....h.......... 0 3 3 8 +4896 PF00577 Usher Outer membrane usher protein Bateman A, Desvaux M, Eberhardt R anon MRC-LMB Genome group and Prosite Family In Gram-negative bacteria the biogenesis of fimbriae (or pili) requires a two- component assembly and transport system which is composed of a periplasmic chaperone and an outer membrane protein which has been termed a molecular 'usher' [1-3]. The usher protein is rather large (from 86 to 100 Kd) and seems to be mainly composed of membrane-spanning beta-sheets, a structure reminiscent of porins. Although the degree of sequence similarity of these proteins is not very high they share a number of characteristics. One of these is the presence of two pairs of cysteines, the first one located in the N-terminal part and the second at the C-terminal extremity that are probably involved in disulphide bonds. The best conserved region is located in the central part of these proteins [4-5]. 18.90 18.90 19.70 19.00 18.60 17.90 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.58 0.70 -5.79 34 6892 2012-10-03 17:14:37 2003-04-07 12:59:11 15 21 937 10 466 4642 66 502.70 31 67.01 CHANGED ahshpsGlNlGsWRLRsss..oaspspsp...........ttpappsphaLpRulspL+ucLslG-shTsuclFDohsFpGspLsSD-sMlPss.RGaAPsl+GIApo.sA+VTlcQNGhlIYpphVPPGPFpIsDl.sssss...GDLpVplpEpDGshppasVPhoolshhhR.Gph+YslsuGchcss......sppppsshFhpuohtaGLstshTlYGGshhup.cYpuhuhGlGtslGshGAlShDsTpupuphsspp.....scpGpSa+hpYsKshstssTslplsuYRYSocsahohs-hl.sp........................phphsp+..sphplsloQsl.us.....usl.lsssppsYW..........tssssspphpsuassshts.lsholshShscsptpppt.....DptlulslSlPhsph.s..............hhuohshspspsupss.psGl.Gshh.cpphsYslptuhsssspps....sshshsapushuplsuuhsasps...pphshulSGuhluasp.Glshupths...sThsllcssG.suGstlss....uspTDhpGhuVlshhssYppNplslDsssLPssl-lppsstpVlPTcGAlVhspF ..................................................................................................hthpsGlNlGsWRlRsps..s.a.spss.sp.......................tpap..ptah....pR....sl.slp.upL.tlG-.s.h..o.s...u...s..l...F.D..o.hs.a.p.GspLtoD.cpMLPss.pGa.APh.l..pG..lA.p...o..sApVolpQNGhhIYpotVsP.GsFtIsDL..sssss.....................GDLpVslcEsDGp..ppa.plPa.uol.P.hh.R.Ghh+Ysl...ssG....ch+st...............sttp.p...ps.Fhpush...aGls......s.shT...........h..Y......G.Gh.......hu...p..pYpuhs..hG.h..Ghs..h.......h.G..A.lShD.sTpup.u......p...h....s.s..tp.............p.pG.....p.....Sh.....R.h..t...YsK...p..h....s...p...o...s..T...sh..p....l.s..u..YR....Y.S.o.p.s.a.hshs-hh.tp.t..............................................p.ths..p..+......sphphslsQ.sl..st........huolhl.o.h..s..p..p.........s.YW..............sss....s..pspph.phu..as.s.....s..h.....t....t.....hs..h..ol......uhs....hsp.....st.....pppt......................-phh...l...sl....Sl.Phsth......................hohs.h..s.p.s..p.p..u..t...s.s.p...p..h.ulsG...shh....s...s...p...h...sYs..lpt.uhsp..p..sspss.........ssushsap.us...h..up..h..ss.......u....h......s....h.....s....p.....s..........p.........p..hs....h....uh......s..........Gul......l..s..a..st..G.ls.h..u..p..hs...........-Thsll.c.......A.....s.G...s..t..s..s........l......p.....sp........sstTs..hh...G...huVlshhosYppN...plslDss.s.l....s.s..s.s-l.pp.s.s.tp.lsP..s.c..GAlshspF............................................................................................................... 0 58 144 319 +4897 PF04871 Uso1_p115_C Uso1 / p115 like vesicle tethering protein, C terminal region Kerrison ND anon Pfam-B_6073 (release 7.6) Family Also known as General vesicular transport factor, Transcytosis associate protein (TAP) and Vesicle docking protein, this myosin-shaped molecule consists of an N-terminal globular head region, a coiled-coil tail which mediates dimerisation, and a short C-terminal acidic region [1]. p115 tethers COP1 vesicles to the Golgi by binding the coiled coil proteins giantin (on the vesicles) and GM130 (on the Golgi), via its C-terminal acidic region. It is required for intercisternal transport in the golgi stack. This family consists of the acidic C-terminus, which binds to the golgins giantin and GM130. p115 is thought to juxtapose two membranes by binding giantin with one acidic region, and GM130 with another [2]. 22.20 22.20 22.30 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.88 0.71 -4.23 6 224 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 191 0 157 229 1 127.70 25 12.86 CHANGED tclpsEuptppshAAKhh-h-s+h.tph.suLtQtpppLcpE.....hKuLpt...s..t...tcsspphsplcslKpcLctE.........u.p-ucsEh-DLLlLLuDp-pKlp+hcu+Lp-LGh-V..--......tD-u.tsp--D--E .........................................................................t...............ttt....-.p.....ph...............s..t.L..p.t.p.pp..hcpc...................hctLp....c...t.....p.....t....thp.pp...h........pptpsp..h.s..t..........L.p.s.t+pc...L..c...t.E................................s.p..c..s..cp....E.-DLLlLLuD.-pKlpphKp+LK-LGp.pVp---........t.p..--p.-cppt....p............................. 0 37 75 124 +4898 PF04869 Uso1_p115_head Uso1 / p115 like vesicle tethering protein, head region Kerrison ND anon Pfam-B_6073 (release 7.6) Family Also known as General vesicular transport factor, Transcytosis associated protein (TAP) and Vesicle docking protein, this myosin-shaped molecule consists of an N-terminal globular head region, a coiled-coil tail which mediates dimerisation, and a short C-terminal acidic region [1]. p115 tethers COP1 vesicles to the Golgi by binding the coiled coil proteins giantin (on the vesicles) and GM130 (on the Golgi), via its C-terminal acidic region. It is required for intercisternal transport in the golgi stack.\ This family consists of part of the head region. The head region is highly conserved, but its function is unknown. It does not seem to be essential for vesicle tethering [1]. The N-terminal part of the head region, not within this family, contains context-detected Armadillo/beta-catenin-like repeats (Pfam:PF00514). 25.00 25.00 27.70 25.80 20.00 20.70 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.77 0.70 -5.67 26 295 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 240 4 205 303 1 289.20 30 29.08 CHANGED GNtphQppFuplsV........s.h-...ss.........s..lsVl.sLLphhLt.ssh...psaDlRsAushClcuYhhsNpphptpFLpptIsuapsss..............................................s.psN....lhssLht..p.ph..sh-PYphWFuullLhHllh-sscs....KphhhpV......t..pGspssGE-slosIQs...........loslLlsslp.spD....................RlslGYLMLLssWLac-.sAVs-FLu-toslpsLls......sppssspsslVpGLsuhLLGlsYEFS.opsSPhsRtcLapLlhpplGp-sYhsKlppl+cpslapchp...psphshs.cp..tLP.........-laFDphFlcLhK-sasRlp+Al ................................................................................................................................st..Q-.Fuplps..................................s...ss.shsslssLLh.hlpp..p.............pshslRsAshhChps..ahhcNppsptpllps..h...lsuphsss..........................................................sthss..........lhssLh................ssDs...hps...Wh...AuVhLhHhl.-sspt....Kc.hhcV..................................phss.s.......G..ptslo...hlQp............hs.shL..p.....t....s.sc...............................................phplGhLhLLssWLhpss.AVscFLp..............psuslthLhtt............spp.s.p..pc.ll..pGLsAhLLGlsh.Fs.sp...s..s................hs+p.....plpplltpRlGp-pahp+lstlpcp.hapchp.........p.phstsp......................................phhFDppFschhKc..uhlh+ul............................................... 0 68 111 170 +4899 PF00582 Usp Universal stress protein family Bateman A, Griffiths-Jones SR, Kerk D, Studholme, DJ anon MRC-LMB Genome group Domain The universal stress protein UspA Swiss:P28242 [1] is a small cytoplasmic bacterial protein whose expression is enhanced when the cell is exposed to stress agents. UspA enhances the rate of cell survival during prolonged exposure to such conditions, and may provide a general "stress endurance" activity. The crystal structure of Haemophilus influenzae UspA [3] reveals an alpha/beta fold similar to that of the Methanococcus jannaschii MJ0577 protein, which binds ATP [2], though UspA lacks ATP-binding activity. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.68 0.71 -3.99 231 22843 2012-10-02 18:00:56 2003-04-07 12:59:11 21 106 4199 94 7308 17012 1965 137.10 18 62.93 CHANGED hhc+llVuhD..soppup.pAlphAhp...hupp..tstlhllaVhsstshtttsthhttttttthttttt..................................t.thhthphhhhtssssptlhphscptss-llVhGspu............hsshpc.hl...lGS.ssppllcpuss..PVll.l+ .........................................................................................................................................................pplllsl..D.........so.pp.....up......p....u....l..c..tAhp...........................hApp........s....u.....p....l...p.l....l....p..l.....h......s......s.........s.....h...t....h......s...t.....t..h....t......h.....t...h...t...t...t..h.t..t.t.t...t.tt.............................................................................h.t.t.h.s.t..h.....p..h..h...l.......t.....t.....u.....s.....s....t.....p....s....l.h..c....h.............s.........p.....p....t.......s..........s.........-.......l.lVhG.sps.............................ts.s..hpp..hh..............lG.S...su..ppl.l.p.....p...u..p..s...sVlll............................................................................... 0 2051 4696 6329 +4900 PF03253 UT Urea transporter Bateman A anon Pfam-B_3193 (release 6.5) Family Members of this family transport urea across membranes. The family includes a bacterial homologue Swiss:Q9S408. 25.00 25.00 26.30 26.30 19.50 22.30 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.92 0.70 -5.53 26 784 2009-01-15 18:05:59 2003-04-07 12:59:11 9 9 618 6 188 566 13 275.00 32 84.53 CHANGED phl.....-hlLRGluQVhF.NNPlSGLlILhulhl.....psshhulsullGslhSTLoAhllstc+utIssGLaGaNGsLVGlhluhF.sh........hsh.hhhlhhsuhhssllsuu.LtslhspaclPshThPFslsshLh..lhust+hs.h.st..h........pPssstss.shsthsls..phlpulhhGlGQVahpsNsloGhlhLlulhlsS.lhslaAllGSslGhlsu.LhlusshsslhtGLaGaNslLsslAlGuhFhhhshpotLhulhsslhsshltsulu.hhtslGLPshThPFslsoh.hhLlsssphphhc ........................hlchlL+uhuQVhh.sNshoGLhlLlulhl.....ss.tl.uluuhlGollushhAh.......hls..h......s..c.............s.pl..psGLhGa...NusLsulslslFhst.............phhhlhh.shluohhsshlssA.lp.p.l.....h.p.....a.....clPshThPFllssW.hh....lhh..ssth.phh....sshhh..................hPt......tss.....h.s..ph.ph..................phlpulh.GhuQVahts.s.s.luGllh.llGlhIsShhsulhAlluShluhhhs..hhL.....uu....s...............hss.....It.tGLa..GaNslLsuIAlGshF.t..s.h..p.shlhslh....u.s.lho.sh.lp......huh......sshhtshGlP.shThPFllsoW.lhLhss...................................... 0 43 66 119 +4901 PF01099 Uteroglobin Uterglobin; Uteroglobin family Finn RD, Bateman A anon Prosite Domain Uteroglobin is a homodimer of two identical 70 amino acid polypeptides linked by two disulphide bridges. The precise role of uteroglobin has still to be elucidated [1]. 24.80 24.80 25.10 24.90 23.80 23.30 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.16 0.72 -4.17 32 271 2012-10-01 20:54:19 2003-04-07 12:59:11 12 1 46 12 118 280 0 65.00 24 72.61 CHANGED Cssltphlpthl.s..o.spYct.LppapssstshpAttplKpCsD.phopcs+tpltphhtpIhpS......hC ................Cshh.phlpthl.s..o.stYcthLppasss.tshpAhtplKpCsD.pls.c.s+tpltplhttlh................... 0 9 9 22 +4902 PF03998 Utp11 Utp11 protein Bateman A, Wood V anon Pfam-B_6404 (release 7.3) Family This protein is found to be part of a large ribonucleoprotein complex containing the U3 snoRNA [1]. Depletion of the Utp proteins impedes production of the 18S rRNA, indicating that they are part of the active pre-rRNA processing complex. This large RNP complex has been termed the small subunit (SSU) processome [1]. 25.00 25.00 26.30 25.90 23.60 23.30 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.92 0.70 -4.55 50 346 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 301 0 243 331 3 230.20 32 92.06 CHANGED ++sH+ERuQPpsRp.+hG.lLEK+KDYphRAcDY+cKpppLKtL+cKAtp+NPDEFYatMhssKssc........Ghthtppt.....spthoh-pl..+LhKTQDhsYlcpptps-t+Klc..+hpppL..h..s.tsps................................................+HhlFsDscc-.pphp.tph........................possphlscpps+..p.ptlpt..h...........................t..pphpppptpphppLpp+hpRpcpLpplppchphp+clh..ppt..ppp+lhtsp..............................sssha+a+tpRKR ..............................p+pH+ERuQPttRp.+hG..lLEK+KDYphRAcDa+cKpppL+tL+cKAtp.+NPDEFYatMhss+sps...........Gh+htppp.................................pcpho.-.pl........+Lh+TQDhtYlchptpt-t+Kl-.+LpppL.....th.hsht...sps...................................................................................+HhhFsD.s.cc.......Ehc.p......hp.tp.hh...........................pstsphhsct.sc........phppltp...............................t.tthpphtcppppphppLpp+hpRtppLthhtpchp..hp+thh..........t.t.....phh+h..h.tt.t...............................................t.shahahtpRK+........................................................... 0 84 134 201 +4903 PF04003 Utp12 Dip2/Utp12 Family Wood V, Bateman A anon Pfam-B_10105 (release 7.3) Family This domain is found at the C-terminus of proteins containing WD40 repeats. These proteins are part of the U3 ribonucleoprotein the yeast protein is called Utp12 or DIP2 Swiss:Q12220 [1]. 26.00 26.00 26.00 26.00 25.90 25.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.08 0.72 -4.15 96 902 2009-01-15 18:05:59 2003-04-07 12:59:11 7 30 298 0 660 885 5 107.30 20 13.59 CHANGED s-pphlppll.pslssspIctsltpLPhsal.pLLchlsphhp.pps.....+lphhhtWlphllptHup.lssp...........................plhspLpslpphlppphpplpclhshN.ttLphLtsph ...................................ppphltpsl...pslss.s.plcpslt...pLP.h...........s.....hl.p.LLphlsphhp..pp.s................clphh.ht..WlphlLphH.ushlssp...........................................p.hhstLpsLpphlppphpplp...clhshN.ttLphlht..h.......................................................... 0 207 353 543 +4904 PF04615 Utp14 Utp14 protein Bateman A, Wood V anon Pfam-B_5404 (release 7.4) Family This protein is found to be part of a large ribonucleoprotein complex containing the U3 snoRNA [1]. Depletion of the Utp proteins impedes production of the 18S rRNA, indicating that they are part of the active pre-rRNA processing complex. This large RNP complex has been termed the small subunit (SSU) processome [1]. 21.20 21.20 23.80 21.60 19.70 19.70 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.68 0.70 -13.55 0.70 -6.37 39 482 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 291 0 309 487 11 532.10 22 84.48 CHANGED sS--...............................ps-pp.....--cchpcLhshlssLspcscp......................pp+pttsptptp.sEFsl.....sopst......KLslsDL.lsslp....ssphppstKtLsphppspp.........LcsPLsKcpQ-Rl-RpAAY-popcpls+WpssVppNRcAEpLhFPL....t..sstssshsphhsshpPpT-LEpcltslLpcSsLs.............s-cphsphE.-lphpchohEEh+tRRsELp+hRpLhaRcEtKAKRlKKIKSKsY++lc+Kc+t+...pp.pptLhcsss-tuc--hpch-cpRApERMoLKHKssSKWAKshhp.GhuphDp-sRpuhpE.LppsccLp..cKlpupp.sctspc..ssps.p-s.-cp........t.ps.pththppchpchtt.t..t............stlhshpFMpsuEscc.+cpscpEhctLp......cEhct.-stppc.t.................tppp.GRRpau.....ttpt.sppttppsppphcpthts-cppt.tsc.ptphppt.s........................tpspptpptpcppssscpNsahppspppspssppptpp.................tp.s.t.tpht.t...........ppcpKpppsstschhhspsp.............................sppp---tp.s..........hcpp-lIpc......AFAGDDVV.t-FppEKpcslcc-ssK.-lDhTLPGWGsWuGsGlppp...p..p...++hltKsctl.phcpRKDppLppVIINEK.ps.KKss+a.sspLPaPFco+pQYERSlRhPlG.-WsocpoaQchT+PRVlsKtG.lIcPhctPh ...............................................................................................................tp...................................................................................th.t........h................................................................................................tl.ltph.h..ht..........t...t.....t.hp....h....t..t.ttt....................................lt.h..PL.p........pc.hpRthsht.spp.ht.p.W..hl.........ttc..........putp.l.Fs..................................................h....t..s..ts.hptpl...thh.tst........................................t..c......hthtt...hs..cEhh....+ptc...hthhRtl.....h.ph+u+R.pKIKSKta++lh++t.c.+.............................................pth.......t..ss.........t..s.p.h...t.-htRh.ERhsh+Hp..p....upWA+ph.....hst..c.p.sRttht-...h...thtccLp..p+h..................t.tpt....p.........t..............................................t...h.tt.tt.................................................tt..t..hhpts.ttt.....tt..t.t.t.h......................pp.pt....pt..tt..............................................p.tcp.....t..................t..........t...t..p....tt..............t.......................t.....................................................................................tt..........t..........tt.h.ttt........t...............................................................................ttptp....t...t..p.....t.t...............................................................tt.ptp.t......................................pppthltp......AF.....s...s.D..-l..t-F.p-Ktp...pt....pts...........p.......h.....s.........s..L.PGW.Gp.WsG.s....hp.p......................p+.hh+h..............R+Dtph.pVII.s.E....c..hs..h...............p...tth.sp.lPaPap.p.t.paEtshphPlG.passttshpthhtPpl............hh+.G.lItPhp................................................................................................................... 0 127 183 260 +4905 PF03851 UvdE UV-endonuclease UvdE TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.10 20.10 20.30 20.20 19.90 19.80 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.82 0.70 -5.38 6 508 2012-10-03 05:58:16 2003-04-07 12:59:11 9 6 393 5 182 475 377 265.00 34 70.05 CHANGED hGYVshshtLtsspPshThotTphhphtccEt.............................cpLhclspsNl+shl+hLcaNhuasIphaRLSSslhPhAoHP..chGachhshhsppLpElGclss-ashRlohHPsQFTllsSs+ccVscsAlpDhsYHh+lLcuhtls-p....ulllIHlGGtatsKcssl-cF+cNhtcLPpslKpRlsLENDD+oYTsp-lLslCEchsIPhVhDaHHHsls.....hccssL-.sh.....RIhpTWp+pslp.KlHlSsPtsspshps+p-hhcuchlhsF.pph ..................................................................hGYss..hs..hhL.hs.s.sP...s.tho.hsp.htph.t.cp.c.t.............................c+LpclsppNLcshl+lL+aNhs.a..s..IphaRlS.S.pllPLAoHs........ht...a.....sa.....h.....t...........h...p...pthpclG....ch...spchshRlohHPs........QF.slL.sSsc.c.l..hpsulp-LpYHtchLc.shG.lspp.....shhslHlGGs.Y.G.s...Kctul....-R.FhcNa.p.p.L.....s.p.....pI...+c......plsL...EN.....DD...p..o......a...o.....hc-sL...lscc...h......s..I.PhVaDhHHHhhs..........................pcps.hc.hh...........plhp...TW.........p...........p..........p.sls...s.KhHhSsP+st..t...t.psHschhc.phhhsh...h.............................................................................................. 0 65 119 156 +4906 PF00580 UvrD-helicase UvrD/REP helicase N-terminal domain Bateman A anon MRC-LMB Genome group. Domain The Rep family helicases are composed of four structural domains. The Rep family function as dimers. REP helicases catalyse ATP dependent unwinding of double stranded DNA to single stranded DNA. Swiss:P23478, Swiss:P08394 have large insertions near to the carboxy-terminus relative to other members of the family. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.06 0.70 -5.13 36 18162 2012-10-05 12:31:08 2003-04-07 12:59:11 16 135 4762 24 4265 16492 6532 265.50 23 35.17 CHANGED LNspQppAlpt...hpushLllAGAGoGKT+llspRlsaLlpppt..lsPpp........ILslTFTNKAApEM+cRltp...................hltpt.........................hcthhluTFHohshclL+pphppls.......................hppsFplhDppD..................................phtllcclh......pt.hshstchhp............................................................................................................................................................thpthlsphKsphhpspphtp.................ts.htp.htphappYppphpppsh........................................................................................lDFsDLlhhshp..................................lhpp.............spplhpphpp+a+alLVDEaQDTNthQYpll+hLsspptp...........lhlVGDsDQSIYuaRGAclpNllphpc-as ..............................................................................................................................................................................................................................................Q.t.s..h.............tt.....s....h.....l..l...Au...........A.GoGKT.t...s.l...s...t....+........h....h.h.l...l..............tt.............h.t..s....t.p..........................................ll.slTFTp....t..AAt..E..h.c...pR...ltp..............................................................................................................hh.......................................................................................t.....t...h.hl......t....T.h.....H...u..h...s......p..h....l...p...t........h.......h.s............................................................................................................................h....t..h.p..h...pttp.....................................................................................................................t.h..h..t....thht...............................................t......h............................................................................................................................................................................................................................................................................................................................................h...........h....h...p...t........p.....p...t...h..h..............t.h...t...............................................................t..............t...h...h...t....t....h......h.....p....t......a....p....p....t....h....p....t....t..s..h....................................................................................................................................................................................................................lD.F..s..D...l...l..h.h.s..h..p.......................................................................................hh.pp........................................t.l..h...t...t.h..p...p...p....a...p.hlhl.DEa.QD.o.s...h.Q.h.t.....l....l.....p.....h....l...h.....t...t...t...tp............................................lh..h..V.GDscQuIY...s....a...RGA.p..p...h.t.h.pp................................................................................................................................................................................................................................................................................................................................................................................................... 0 1443 2832 3657 +4907 PF02614 UxaC Glucuronate isomerase Bashton M, Bateman A, Eberhardt R anon COG1904 Domain This is a family of Glucuronate isomerases also known as D-glucuronate isomerase, uronic isomerase, uronate isomerase, or uronic acid isomerase, EC:5.3.1.12. This enzyme catalyses the reactions: D-glucuronate <=> D-fructuronate and D-galacturonate <=> D-tagaturonate. It is not however clear where the experimental evidence for this functional assignment came from and thus this family has no literature reference. 25.00 25.00 25.70 25.50 22.20 20.60 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.57 0.70 -5.87 42 1495 2012-10-03 00:45:34 2003-04-07 12:59:11 9 6 1344 79 293 1024 317 436.20 44 97.60 CHANGED Mp.hhsc.sFLLs.scs...AcpLa+chApshPIhDYHsHLsPp-Ih-s+hapsls-lWLh....GDHYKWRhMRtsGlsEphlTG.susshEKFhAaAcTl.hshuNPlYcWoHLEL+R............hF...Glst.hlspcsAspIWcpsNthLt.s....sshpscplhppuNVchlsTTDDPhDsLchHptls.t.....cpsh..hpVhPuaRPDcslplct.psassalcpLutssshplpshsshhpALcpRhcaFcphGschuDHuls.phhasc.ss..phpthhtph.hssp.hottp.tpapohlhhhLsphhpcpuashQlHlGuhRssNsthhppLGsDsGhDSlsc.s..spsLppLLsphsppstlPKhllYsLNPp.sshhluohhssFp..tt.h..plQhGuuWW.....FsDs.tsGMhcQhpphup.uLhosFlGMLTDSRSFLSY.sRHEYFRRlLCsllGchlE....pGphPs-tphlschVcDIsasNuccaF ..........................................................Mt..Fhs-.sFLLp.s-hA+cLYHcaAcc.PIhDaHCHLsPppIA-.s.hpFcNlsclWLt.............GDHYKWRhM.......Ros.......G.......V..........s..........E.....c.....h.....h........T.....G.......-..........u.....o.....D....hE....K.......FpA.W...ApTl....P...pslGNPL.YHWo.HLEL.+R...............sF.....GIst....lLuspoA-cIasp.sN-h.Ls.p....ssFosRu.lhpp.hNVchluTT..DD.PlDsL-aHtplut.........Dsuas...h+VlPoaRPDKu.hs.....I.-t.ssFsca.l.t.+Luclus.s.s.Ip...passhhp.ALpcRh-aF.st.p.G.C+.sSDHul-...slhasp.......s.........s........-s..clcuIhs+t..luG.p.s..l.......opc.Ehs.pF+oslLlhLut..YtccGWV.Q.hHhGAlRNNNt.chFchLGsDsGaDSIsDps..huptLs+LLsphspps.LPKTILYsLNPp.DNcsluohhGsFQ......stuh.....s.GKlQaGouWW.....FNDp.+-GMpR......QhppLu.phGL....LSpFVGMLTDSRSFLSY.sRHEYFR.RILCphlGchVp....sGEhPs.D..sh..L.uchVpsIsasNAppYF......................................... 0 103 204 246 +4908 PF03786 UxuA D-mannonate dehydratase (UxuA) Finn RD anon COG1312 Family UxuA (this family) and UxuB are required for hexuronate degradation. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.21 0.70 -5.89 10 1615 2012-10-03 05:58:16 2003-04-07 12:59:11 8 7 1348 10 330 1058 320 354.40 48 97.28 CHANGED MchsaRWaGstsDsVoLpcl+Ql.GVcGVVuALacIPs.......G-lWshEcIhchKcpIEsuGLslsVlESVPVHE-IKlGsssR-+YIENYKpTIRNLAcsGI+VlCYNFMPVhDWoRTDLphcLsDGSpALtF-cpclsshsPp..lh+p.sustsas.............................LPGhE.ph.Lsph+phhptY+cIDpEcLa-NLuYFLccIIPVAEEsGVKMAIHPDDPPaP.IaGLPRIVost-shc+llchsDSPsNGIThCoGShGspssNDls-MI+cFuc..RIaFuHlRNlKtp..sscsFhEouHhs..GulDMhslhKAhh-psacG..........hhRPDHG+plaG-p....spPGYuLhsRhhGluYlpGLa-Alpp ....................................Mc.TaR.WYGs.....sDPVoLpclRQh..G.soG.lVoAL.H.c.IPs.........................G.E..l.W..sh-EIhchKthl......E.......s...........s......G......L.pasV.........VE....S..V.P..l..H..E....-....IKtt.s....s....s.h-paItNYppTLRNLApsGIcsVC...YN..F..M..P.V.hDWTRTDLpa...h.sDGS.puLtFDphp..hs.....uh.-hp...ll..c.p...s.u.p.s-as..................................................LPGhE...pth...tL.......sph+ph..L.....thYc..sIscscLR-NhtaFLcsIlPVAEE...sGl+MAlHPDDPPhs...I..hGL.PR.Iloo..h..-Dh.phhlc.s.V..sS..uNGh.ThCTGSh....G...s...c...s..-N........D...L.ss.hl+p.FGs...RIaFsHlRss......p......t....p.............s........s....p....sFaEuuHLs.......GslDMapllKA....ll....-.p.p.act..........shRPDHG..+ph.hsDh..............tspP.GY....uhhuRhhGLu.lpGl..Alp.t................................ 0 117 222 269 +4909 PF03223 V-ATPase_C V-ATPase subunit C Mifsud W anon Pfam-B_2945 (release 6.5) Family \N 23.10 23.10 23.90 29.00 23.00 23.00 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.06 0.70 -5.54 33 459 2009-01-15 18:05:59 2003-04-07 12:59:11 10 4 321 1 294 434 5 354.10 38 95.30 CHANGED ahllShPsptsstps.....hpph.hhs.thtsshssstcFslP.-hKlGT.................LDsLlshSD-LsKlDstlEuslpKltphlt-l...psppspltpshhssst................................................................................................................................................................................................................................................................................................................................................................................................................................................................slssYlpp....FpW...spuKYshcp.slppll...-hluppssplDsDl+s+hssYNssKusLpshpRKp...........................oGsLts+sLs-lV+t.....-cFl..cSEY..LsTlLVsVPKs.hp-a.psYEoLoch..............................VVP+Suphls.....pDsEasLasVsLF........................KKsh--FpppsRE+.KalVR.-FsYsEcthppt+pEhschtscccp.h............................................ssLlRhh+ssaS-sFhuWlHlKALRVFVESVLRYGLPssFtuhllpsst...Kst.c+l+phLtphasaLss....suhststcsc..hs.suL...........hsppEYaPYV ...........................................................................ahLlShPsptpstps.......hpp.hpthp...thp.ss..hu.sshcFsIP.-h.K..lGT...................LDsLlshSD-LuKl.DshsEu.....llpKlsphltcl..h.-.ss..........psplpp..phhsN...s...h................................................................................................................................................................................................................................................................................................................................................................................................................................................................slspY.l.p.p....FpW...shuKYsh..c.p.sLppls.................-hlscp.....lspIDsDlKs+hstYN.slKssL.ps.l.pRKp...........................s..G..sLhsRsLs-lVc..................-c...h....V...-SEYLh..TlLV.lVPKtshp..-W.psYEoLssh.......................................VVPRSophls..........cDs-.hsLasVTLF............................+Kss--Fpp+AREp.KFh.lR.-FpYs.Ect....hcpp+cEhs+lts-c+c.h...................................................u..LlRhh+ssaS.....EsFhuWlHl.KAL.RVFVESVL....RYG....LPs..sFtuhllpssp.......................Ks....++l+phLpphataLsu....suhst..hps.....t.....hp.sul.............hspp-YhPaV............................. 0 99 156 232 +4910 PF03179 V-ATPase_G Vacuolar (H+)-ATPase G subunit Mifsud W anon Pfam-B_1274 (release 6.5) Family This family represents the eukaryotic vacuolar (H+)-ATPase (V-ATPase) G subunit. V-ATPases generate an acidic environment in several intracellular compartments. Correspondingly, they are found as membrane-attached proteins in several organelles. They are also found in the plasma membranes of some specialised cells. V-ATPases consist of peripheral (V1) and membrane integral (V0) heteromultimeric complexes. The G subunit is part of the V1 subunit, but is also thought to be strongly attached to the V0 complex. It may be involved in the coupling of ATP degradation to H+ translocation. 22.90 22.90 23.00 22.90 22.70 22.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.38 0.72 -3.74 34 591 2012-10-02 21:03:42 2003-04-07 12:59:11 10 9 372 2 345 587 15 99.90 33 78.74 CHANGED upssGIQpLLpAE+cApchVscARct+scRLKQAKpEApcEI-pYRtp+Ep-F+chcspphGs..pssstpcl-p-TppcIpplppssppp+cpVlphLLphVhsl+ ........................pppGIQpLLpAE+cAtchVsc.A..Rc...........p......+s......cR.......LKQ.AKcEA.....ptEI.-p..YRtp+EcE.Fc.p.hps.pt..........hGu.......ptshspcl-.p-Tptcl..pplppthppp...pcpVlptLlphVhsl..................................... 0 105 184 265 +4911 PF03224 V-ATPase_H_N V-ATPase_H; V-ATPase subunit H Mifsud W anon Pfam-B_2481 (release 6.5) Family The yeast Saccharomyces cerevisiae vacuolar H+-ATPase (V-ATPase) is a multisubunit complex responsible for acidifying organelles. It functions as an ATP dependent proton pump that transports protons across a lipid bilayer. This domain corresponds to the N terminal domain of the H subunit of V-ATPase. The N-terminal domain is required for the activation of the complex whereas the C-terminal domain is required for coupling ATP hydrolysis to proton translocation [3]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.92 0.70 -5.31 37 456 2012-10-11 20:00:59 2003-04-07 12:59:11 9 17 312 1 294 475 12 285.20 24 62.84 CHANGED shlsphtsplRtp.....lsWpuhhcuphlopcshphl+pl-ph..pp.+tp...hl.ppsss.thsphhl......sllppl...pcp-slpYlLsLlsD.lLsps...thtphhhshtp.tp.p....sapshLp.hhppp.Dthls.h...ushllspllstsshptsp..................chL....thhhsh..Lps...hhss................................................................................pssth...........................hslpsLptlLptcpaRthFhp..scslphLhs.lL...................................................................................sspsslQL.YpsllslWlLSFpsphstphhppp..ll.hLscll+pos..KEKlsRlsluslhNLl...spstpt.........................................hhthhl.sphlt.hlppLppR+a....sD--lh-DlphLp-hL ............................................................................................................................................................................................................................................................ht..ttplhtp.lsWpshhput..hlotpphphlpth-ph.ptp..+tp.........hl..pppss..ths.phhl........slhpph.........p+ppslpYlLshls-..hLpps.....ptsphhhp.tt.tp.t..............at.hL.t...hh.s..pt..-.hh..h......uupllsplhshstp..pt..........................psL....phhhsh..lps.......h.p.s...........................................................................................................................ss.sth..............................phsh.psLphh.L.+hpp.hR.hahp...........ssslp.slhs...lL...........................................................................................................................sspsshQl.YphlhslWhLoFpsthst.p.h..p.t..................ll.hlscllptss...KE.KlsRlhlushpNhl..ppstt..........................................hh..h.l.....spl.t..lp.Lpt.p.+.a....sDtDl.cDlp.L.-hL........................................................................................................................... 0 106 168 246 +4912 PF01639 v110 Viral family 110 Bateman A anon Pfam-B_1518 (release 4.1) Family This family of viral proteins is known as the 110 family [1]. The function of members of this family is unknown. The family contains a central cysteine rich region with eight conserved cysteines. Some members of the family contains two copies of the cysteine rich region Swiss:P18560. 25.00 25.00 48.20 48.00 23.80 23.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -11.11 0.72 -4.49 10 107 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 11 0 0 103 0 107.00 40 82.88 CHANGED llsLLup......hsssptsspL.pTpsPPccELcYWCTYscsCcFCWDCQcGICKNKlhss..ssIhcN-.YlpsChVoRahsp..ChY-lst+I..hHsMsCSpP+sas.saclh ...............h.hhLLu.......hh.ththtpL.tTppPPccELpYWCTYscpCcFCW-CpcGICKNKlhcs.h.shIhcNc.altsCpVoRh...sp..ChY.hss.+h..hH.M-CSpPpsap................ 0 0 0 0 +4913 PF03402 V1R Vomeronasal organ pheromone receptor family, V1R Mifsud W anon Pfam-B_3057 (release 6.6) Family This family represents one of two known vomeronasal organ receptor families, the V1R family (after [4]). 25.00 25.00 25.00 25.00 24.60 24.70 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.93 0.70 -4.99 2 2399 2012-10-03 04:04:29 2003-04-07 12:59:11 9 5 78 0 1464 2009 0 245.10 33 86.01 CHANGED KLlstp+s+.hDL.IuhLSLhpLhhLlhhuhIAsDhFhsWttWsshhCpSLlYLaRhhRGLuLssoCLLsVL.sIhLSsRSSCLsKFKHKssHHISsAhL.h.VLYM.hSSHlLVSIIsTPNLTopDFhaVTQ.CSlLPMSY.hpShFSTLhAIR-sFLISLMsLSohYMVALLhRH+KQspHLpuTSLSsKASPEQRATRoIhhLhSLFVlh.lh-plVhpSRhhahsssl.Y.hplhMsH.YATVSPFVFIsTEKHIlp.hcS ..........................................................h......p.pshD..l.Il.tHLsl..sNh.l....hLL..o.p.G...l......t..s.....h..h..h..s..h..p.........h...s.....-.h.s.C.K.h.lh.Yl.hRVu.R.GLulC...oTCLL.SlFQAlTI.S.P..ss.S.p.hup.l.K.s.+.h.s..+....h...l...h...s.s.hh.h...h.W.l...l..s....h.h...ls....h.....s..h..h..h...h..h...h...u...s...p....N....s.....o....s..s....s.....h.....h..h..s..h...p.....a..C.s..h....h...s...h....s....t..h....h..p......h..l..h..s...s..h.....h.s.h....+....DlhF.l.u.LM.shu.S..GYMVh.l.L.aRH+..+p.VpH...l..Hss.s.hSs..+..ssPE.sRAs+oll.hL.V.s.haV...hhY.s..h.s...s...l...h..h......h....h..h........h..p..p...p..s...h..h..h..p....h..p..h..h.....h.s.......s.a.sslsPhl.hl........................t.............................................. 0 7 20 850 +4914 PF02830 V4R V4R domain Bateman A anon [1] Family The V4R (vinyl 4 reductase) domain is a predicted small molecular binding domain, that may bind to hydrocarbons [1]. 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.03 0.72 -4.13 52 616 2012-10-02 19:02:47 2003-04-07 12:59:11 13 22 405 2 314 617 58 62.00 25 18.38 CHANGED hhhph-sshpttshstsscPlCahhtGhhsGhhs........phhs+p..lhhcEspCtupGss...pCpahsc. ......................phcss.hht.th.st..ss...cP.lC..a.hhsGhhsGhhs.........shhu...pp...lhspEs..pCt.ut.Gcs...pCpFhsp........ 0 95 201 264 +4915 PF01496 V_ATPase_I V_ATPase_sub_a; V-type ATPase 116kDa subunit family Bashton M, Bateman A anon Pfam-B_446 (release 4.0) Family This family consists of the 116kDa V-type ATPase (vacuolar (H+)-ATPases) subunits, as well as V-type ATP synthase subunit i. The V-type ATPases family are proton pumps that acidify intracellular compartments in eukaryotic cells for example yeast central vacuoles, clathrin-coated and synaptic vesicles. They have important roles in membrane trafficking processes [1]. The 116kDa subunit (subunit a) in the V-type ATPase is part of the V0 functional domain responsible for proton transport. The a subunit is a transmembrane glycoprotein with multiple putative transmembrane helices it has a hydrophilic amino terminal and a hydrophobic carboxy terminal [1,2]. It has roles in proton transport and assembly of the V-type ATPase complex [1,2]. This subunit is encoded by two homologous gene in yeast VPH1 and STV1 [2]. 26.50 26.50 26.60 26.60 26.30 26.40 hmmbuild -o /dev/null HMM SEED 759 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -13.24 0.70 -6.68 16 3017 2009-01-15 18:05:59 2003-04-07 12:59:11 14 11 1209 4 1136 2604 291 408.80 17 89.37 CHANGED lstLt-lGlVphcDLNpcls.shQRchlp-h+Rts-h-+pLthlppplp+ttl.h...........ptthp.sh...p.........ph.chptphpclEscl+plpcshcpLccphppLpEhtphLcpspphhcpt......................cthp............htsl.hs.pthp.......tsphlcl....................hluGslspc+hsshcchLh+ss+G..hht.hplcpsh.-sp......hpsallhhpucphtp+lc+Is-shthphashs-pptt.p-hlpplppclp-lpphLcpspsplcphlsthtcplhsapphlpp-Kslacshshhshss..+slIhEuWsPtc-lsplppsLccssstsu...lsslhs.hcsp-pPPThh+ssKFspsFQsIs-sYG.lspYcElsPuhhhhlTFPFhFulMhGDhGaGllhhLhALhllhtcpphsstp.........aph+YIlllhGlFSlhhGhlYN-hFucshslFtSthths.......................hpt.sl....t.s......YPhulDshapsss..tl...s.hhhthSlllGllHhsaGlhluhhNthph+...p.hslhtshlspllalhsIhGhh.h.hh.ta.h..h...............sPslL.hhl.MFLhs....s........................lQshLlhhulhslPllllhtPlhlhtpthp...........................t.............................h...hG...h.htpsIasIEh..sLGsluphsSYlRLaALuLApupLSsVl.sMshthsh.hts......slhhhllhshhhllshsl.llMpuLSAhLHuLRLHaVEFhuKF.YpGsGhpFpPFuhp .........................................................................................................................................................................................................................h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.ha..............h...h............................................................................................................................................................................Ph..t..........ht...hht...h...st..ths......s..h....a..h...............hauhhhsDhu.uh...h.h..h.h.s...h.h.........h......................................................................................................h.h.........sh.shhh.G.hh.h.s.thhu...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.t.hSa.Rlhs..h....hst.................h...h.........................................................hh....h....h.hhht.h...shlps.RL..............h....................hE..t...............t........................................................................................................................................................ 0 428 699 955 +4916 PF02346 Vac_Fusion Chordopoxvirus fusion protein Bashton M, Bateman A anon Pfam-B_822 (release 5.2) Family This is a family of viral fusion proteins from the chordopoxviruses. Swiss:P26312 a 14-kDa Vaccinia Virus protein has been demonstrated to function as a viral fusion protein mediating cell fusion at endosmomal (low) pH [1]. 22.00 22.00 22.00 22.20 21.80 21.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.81 0.72 -4.54 11 335 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 61 0 0 153 1 42.00 54 24.57 CHANGED .plcslEcRLssLpcpapplhcsC+psscslcRLENHhETLR+uMlsLsKKIDVQTG .......................QRLTNLEKKITNVTTKFEQIEKCCKRNDEV............................... 0 0 0 0 +4917 PF02691 VacA Vacuolating cyotoxin Bashton M, Bateman A anon Pfam-B_436 (release 5.5) Family This family consists of Vacuolating cyotoxin proteins form Proteobacteria. These proteins are an important virulence determinate in H. pylori and induce cytoplasmic vacuolation in a variety of mammalian cell lines [1]. 18.90 18.90 19.30 19.10 17.70 17.70 hmmbuild -o /dev/null HMM SEED 981 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.36 0.70 -13.81 0.70 -6.90 2 1164 2009-01-15 18:05:59 2003-04-07 12:59:11 10 6 43 1 2 1174 1 244.10 38 85.28 CHANGED AFFTTVIIPAIVGGIATGsAVGTVSGLLuWGLKQAEpANKsPDKPDKVWRIQAG+GFspFPpKpYDLYKSLLSSKIDGGWDWGNAApHYWlKsGQWNKLEVDMpsAVGTYpLSGLhNFTGGDLDVNMQKATLRLGQFNGNSFTSaKDuAsRTTRVsFsAKNILIDNFlEINNRVGSGAGRKASSTVLTLpuSEtITSpcNAEISLYDGATLNLsS..N.SVcL.GpVWMGRLQYVGAYLAPSYSTIsTSKVpGEhNFpHLsVGDpNAAQAGIIAspKTpIGTLDLWQSAGLsIIsPPEGGYcsKspssP.............QN.......NP.NssQKTElQPTQVIDGPFAGGKDTVVNI.+lNTKADGTl+sGGFKASLoTNAAHLpIGcGGVNLSNQASGRTLLVENLTGNITV-GsLRVNNQVGGhAlAGSSANFEFKAG.DTpNuTATFNNDIpLG+hVNL+VDAHTANFpG.I.hG.......NG...............GhN..TLDFSGVTsKVNINKLhTAuTNVslKNFsIpELlVpTps.ShGpYThFuEsIGspSRIssVpLpTGhpshaSGGVpFKuGcKLVIDEhYauPWNYFDARNlpsVEIs++hh.usPtN.hGpotLMFNNLTLspNAsMDYup..sLTIQGcFhNNQGThNhhVpsG+VATLNsGptAuMhFNN.lDSsTGFYKPLIKINsAQsLhKNpEHVLlKA+.IsYs.Vus.Gss....uhSNsNLpEQFKERLALYNNNNRMDTCVVR..NhsDIKACGMAIGNQSMVNNP-NYKYL.GKAWKNhGIsKTANsopIuV..LG.NSTPTpssssTTNLPTNTTNNARFASYALIKNAPFAHSATPNLVAINQHDFGTIESVFELANRSpDIDTLYANSGAQGRDLLQTLLIDSHDAGYARTMIDATSANEITpQLNsATTTLNNIASLEHKTSuLQTLSLSNAMILNSRLVNLSR+HTNpIDSFAKRLQALKDQRFA ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 1 2 2 +4918 PF03077 VacA2 VACA; Putative vacuolating cytotoxin Griffiths-Jones SR anon Pfam-B_2866 (release 6.4) Family This family contains a number of Helicobacter outer membrane proteins with multiple copies of this small conserved region. 19.50 19.50 20.00 35.50 17.70 18.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.89 0.72 -4.38 10 460 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 51 \N 19 481 0 59.90 41 7.82 CHANGED uhlGYIsGsFcAppI.YITGsltSGNuhuo..GGGAsLsFsuusslslssAslssppsssppS ....hlGYIsGsFpAppI.YITGsltSGNuhso..GGGAslsFsussslslssAshssppsss..S... 1 11 16 19 +4919 PF04333 VacJ VacJ like lipoprotein Kerrison ND anon COG2853 Family VacJ is required for the intercellular spreading of Shigella flexneri. It is attached to the outer membrane by a lipid anchor [1]. 25.00 25.00 34.90 34.60 24.40 24.10 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.37 0.71 -4.86 10 1871 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 1652 0 385 1266 1805 200.70 39 75.89 CHANGED sstsppDPLEuFNRshasFN.sslDpall+PlApGYpphVPcsV+sGlsNFhsNLsEPsohlNpLLQGcscpAhpshsRFhlNTThGlGGLlDlAotsG..Lpppscc.FGpTLG+YGVusGPYlhLPlhGPsTlRDssGsllDth..hPhhhhlss......shuhs+hulpsl-sRAphLss..-sLlcsS.DPYlhhRsAYhQp+pa+lp ......s...thsDPhEuaNRsM.asFN.ssLD...Yll+PVAh.uYp..shsPpPsRsG..........lsNFhsNL.p-PsshlNslLQGcstpuhtchsRFhlNT.hhGhGGlhDVA....uh.s.s......Lp+....pcpc..FGpTLGaaGV....ut....GPYlhLPhhGPtTlRDss...GphsDs...h.......P...h...h......h...hsh...................sh.u.h..u...t.....h...s...l.....ps.......l-...sRA.....pL.L.ss......Ds.L.h.c.s..u.s.DPYhhl..R-AYhQ++ch.h.t.................................................................. 0 94 207 299 +4920 PF04294 VanW VanW like protein Kerrison ND, Finn RD anon COG2720 Family Family members include vancomycin resistance protein W (VanW). Genes encoding members of this family have been found in vancomycin resistance gene clusters vanB [1] and vanG [2]. The function of VanW is unknown. 21.70 21.70 22.20 23.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.95 104 1141 2009-01-15 18:05:59 2003-04-07 12:59:11 8 16 702 0 306 1013 69 131.50 36 30.04 CHANGED tsRspNlplAuptlsGsllhPGE........sFSFNphlG.potpp.GYppu.l...l.hsu.....chss....ul.GGGlCQlSoTLasAshhAsLpllERpsHSh.lsYs......Ph....G.....pDATlsas....h.lDh+F+NsostslhIcs....ths.ssp...lssplau ......................................s.sRspNlpluApplsGslltPGE........sFSFNphlG..p.o.tpp.GYp..puhl...l.h.sG.......chsp............uh.GGGlCQ.....lSoTLYsAs..ht.A....s..L...pl..lERpsHShslsYs..........Ph..........G.....pDATlshs.....h..lDl+F+Nssstslhlps......hhs..ssp..lsspla............... 0 143 249 290 +4921 PF02557 VanY D-alanyl-D-alanine carboxypeptidase Bashton M, Bateman A anon COGs Family \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.72 0.71 -4.31 63 2350 2012-10-02 01:02:30 2003-04-07 12:59:11 12 58 1623 4 473 2508 550 132.00 31 49.45 CHANGED shhlcpcsupuhpph.hpAApcc..Gl....pLh...........h.hSGaRShppQptla..pt.sp..................tpstptstphsA.PGtSEHptGhAlDlussp........h........ptshtpsttscW..............................LpcsAtca.GFhlp..............a.Ppsppp.pGlsYEP...W.......HhRY.lGh ........................................................t...lptpstpshpph...hpA.A....pp...p.....Gh.....plt...............h..sSGa......RSac.pQpp..la.....pthhp................................................ppuppt.stp....h...uA.hP..G.....t.....S.....E..........Hp.....s.....GLAhDlss...s.s..............................................pttht.p.sttspW.......................................................................L..t...c.pA......t...c...a...G.FllR.........................Y..psc...p..p...T.GhtaEP.......WHhRYlG................................................................................................................. 0 157 322 407 +4922 PF04892 VanZ VanZ like family Finn RD anon Pfam-B_5529 (release 7.6) Family This family contains several examples of the VanZ protein, but also contains examples of phosphotransbutyrylases [1]. 24.50 24.50 24.50 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.73 0.71 -4.21 19 3861 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 2101 0 830 2978 740 136.10 18 66.89 CHANGED halhallhlhhhhhFt..h...hh............tshshsLhPhtt...........................shhphhhsllhahPhGhllshhhtphp.....shhpslshuh...hhu...LhhEshQhhhsht.......ssDIsDllhNTlGuhlG.hh....lhhhht+hh.+t .............................................................................................................................hh.h.......................................................hp..h......h.......................................................................hh.h.p..h.hhN..lh....ha....h...P...L.....G..h.....h.....h..h...h....h.h...t..p.ht....................................shh.t..s..l....h...h..u....h......hhu........l.....h...h.EhhQ.h.h.......h.s.ht.............ss-lsDl..hh.N....ol....Guh..l..G.hh.......lhh.hh.......t........................... 0 303 585 720 +4924 PF03490 Varsurf_PPLC Variant-surface-glycoprotein phospholipase C Griffiths-Jones SR anon PRODOM Family \N 20.70 20.70 20.70 21.50 20.60 20.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.66 0.72 -4.29 2 23 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 14 0 5 24 0 46.70 48 14.12 CHANGED FssstWpPQSWMpDhRS.ItchsIsQVhhVGuHsAuoaGlph.SPFGhDAP .......thtWpPQSWMcDLRS.It-huIsQlslsGoHNuuoYGIpptSPhuhDA...... 0 3 4 5 +4925 PF01992 vATP-synt_AC39 ATP synthase (C/AC39) subunit Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the AC39 subunit from vacuolar ATP synthase Swiss:P32366 [1], and the C subunit from archaebacterial ATP synthase [2]. The family also includes subunit C from the Sodium transporting ATP synthase from Enterococcus hirae Swiss:P43456 [3]. 22.20 22.20 22.20 22.60 21.80 22.10 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.84 0.70 -5.32 10 1321 2009-01-15 18:05:59 2003-04-07 12:59:11 11 7 1055 5 489 1086 162 301.60 22 94.11 CHANGED YlsARlRuh+u+LLs-pcascLlcscohEph+hhLEs.s-Yushlsslssh..ssptlE+ALppsLAcpachlhclusupl+phlchhLc+aDlcNIpsLI+uKhsstssEEllshhhPhG...sacphptls-usolEEll.ssLcGT.YtcsLpchLu..-h-.pshtlh.ptLhKtYat-hLchsh.....hpuc-pclhcEalchElDh+NlpshLRuKu.sGLosD-lp..lhlpGGpLtc.tLctLtpu-sh-tllutLEGTpYupslp-stpphtts............lpslEcsLcchllchhschuhhpPloVuslluYllpKEpEl+NL+sIA+sttpslcsEcIcc.ll ..........................................................................................................hpshlRshcstl.Lspppapplh.ps...c.s.h.-...............s.h.t.h.hL.ps....osYtt.h.l..sp......t......t...p.th-ttlpppLh.pp.a.phhhp....u..s.t.....tphlp.hhthpa..hcNlt....lllp....up.h....p..t..p....s..h.p..c.......l.h.........h...............s.........h..G...........h.....p.....p.....h.....t....t...l....h....h....s...p.s.....p....-lh...thl..h...........t..s.....h.....tt......hhp...phh........s..........p.............h....-.............p.......h.......s.l.p.h.l.h.s.h.l.h.c.........t..a.h.t..p..h.h.chsp...........t.s.tp..t-...lhpphl.t..hph...Dh.......pslhhhlpu.....ht....p.....ph....s...t.....s...t......ht..plh.sp.tG.p..l..s.c...t.h.h.tls.p..s.p..s..h-.phhsh...l...p...h..s....h.p..hl.hp.s.h.tt..t.................................pslE...c.h.hhpt..h.p..h.t.p.....uhh.tth...s.h...t..s..hh.ualhh+E.El+NlphIhpshtp.p.ht..ppIpphh.h..................................................................................................................................................... 0 184 295 405 +4926 PF01991 vATP-synt_E ATP synthase (E/31 kDa) subunit Enright A, Ouzounis C, Bateman A anon Enright A Family This family includes the vacuolar ATP synthase E subunit [1], as well as the archaebacterial ATP synthase E subunit [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.23 0.71 -5.04 17 1053 2012-10-02 21:03:42 2003-04-07 12:59:11 13 6 818 14 504 1038 112 184.00 22 89.44 CHANGED hIppEAc-KAp-IpscA--Ehsh.Kschhcppctplcphhc+tcKQs-hp+phthSshh.puRh+lLps+--llpslhcpscccLtplsc-p..tYpphLcsLlhpulhcLt.EsplllpsRccDhpLlcsh.lsphtpchctpht.ps..hhs-s........................c.shGGVlltstcG+IclsNTl-uRL-hhhpphlspIpctLF ...................................................................I.p-ApccAp.cI.t...cAccEhph..htc.hh....pp..t....p..t...p..h...pp...h...hc...+..tc.+.p.s...........c..h..pp..p....h....p....h....Ssh.t........p.s....Rh.....c...lLp.s..+.p-lls........clhp.c....u...........pcc.L.t.p.l..........s......p.........s..........p.....................t...........Y.p.phLpsL..lh..pu....h....h....p.......lt.....c...s..p.h......h......l.h..s.pp...pDhp...l...l.c.ph..h..th..tt......htt......th......t............t.................h.....h.....s-p......................................p.ht.G..GlhlhstsupIplssTh-shlchhh.pphhspltthLF.............................................................................. 0 194 321 422 +4927 PF01505 Vault Major Vault Protein repeat Bateman A anon Bateman A Repeat The vault is a ubiquitous and highly conserved ribonucleoprotein particle of approximately 13 mDa of unknown function [1]. This family corresponds to a repeat found in the amino terminal half of the major vault protein. 20.00 8.30 22.70 8.50 19.30 8.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -7.95 0.72 -3.90 85 662 2009-01-15 18:05:59 2003-04-07 12:59:11 13 12 77 227 332 619 5 43.50 31 23.60 CHANGED hhVLspspALtlcAhpsa....pD.tp.....s....................RhsG-cWllpu..P..tsYlPs ....................hllspspALpL+Alpsa.........pD..tp.......uhp..............................Rh..sG-cWLlpu...s......tsYlP..................... 1 173 214 257 +4928 PF01847 VHL von Hippel-Lindau disease tumour suppressor protein Bateman A anon Swiss-Prot Domain VHL forms a ternary complex with the elonginB Swiss:O44226 and elonginC Swiss:O13292 proteins. This complex binds Cul2, which then is involved in regulation of vascular endothelial growth factor Swiss:P15692 mRNA. 20.00 20.00 20.10 20.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.67 2 136 2009-01-15 18:05:59 2003-04-07 12:59:11 11 5 97 14 79 139 2 121.10 33 56.67 CHANGED RPRPVLRSVNSREPSQVIFCNRSPRVVLPlWLNFDGEPQPYPhLPPGTGRRIHSYRGHLWLFRDAGTHDGLLVNQTELFVPSLNVDGQPIFANITLPVYTLKERCLQVVRSLVKPENYRRLDIVRSLYEDLEDaPsVpKDlpRLoQE+ltpQ+ht- ..........................pS.tst..s.Vh.FsNp.osRsV..shWlsapGc..p.Y.ss..LtPGpth...clpTYhsH.WlFR..Ds.h..Tt..-..th.hV.pp..pc.............lahP.........................t.............t.....t............................s...............IphP..hh....oL+c....s.h..h.l.t...hh.t.....t.............................................................................................................. 0 25 39 61 +4929 PF02209 VHP Villin headpiece domain SMART anon Alignment kindly provided by SMART Domain \N 21.70 21.70 23.60 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.70 0.72 -4.26 15 900 2009-01-15 18:05:59 2003-04-07 12:59:11 14 43 134 38 458 817 14 35.90 47 4.18 CHANGED YLo---FpplFuMo+pEFhcLPtWKpppLKKchtLF ...aLSsE.-F.......p........plF.GMohpEFspLPhWKpspLKKcttLF... 0 137 202 310 +4930 PF04702 Vicilin_N Vicilin N terminal region Kerrison ND anon DOMO:DM04811; Family This region is found in plant seed storage proteins, N-terminal to the Cupin domain (Pfam:PF00190). In Macadamia integrifolia (Swiss:Q9SPL4), this region is processed into peptides of approximately 50 amino acids containing a C-X-X-X-C-(10-12)X-C-X-X-X-C motif. These peptides exhibit antimicrobial activity in vitro [1]. 26.70 26.70 28.60 28.60 26.60 25.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.99 0.71 -12.08 0.71 -4.59 4 32 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 13 0 1 40 0 123.70 35 28.90 CHANGED sGR+s.-DDPppcYEpCpRRCc.-spGp+EQppCEcRCcpphcEcpppp....EDPQppYppCppcCpppE...R.h.pCpQpC.cpaEpp.ppp..............................pRQap-CQpRCppQEQtPccpQQC.RcCREQYpE.p.+Gcc-ph.....ctccpcoEEG .......................thppC.p.Cp..tpt..pQppCpppCcppht.Ecpppp....................c......DPpppYcpCppcCpppc....c.p.p.CpppCpcpaEpc.ppp......................sp+pac-CQp+CppQEQtscpppQC.pcCccpYpEp.h+t.tc..pp.....tpctct-E.t................................. 0 0 0 1 +4931 PF00559 Vif Retroviral Vif (Viral infectivity) protein Bateman A anon Swiss-Prot Family Human immunodeficiency virus type 1 (HIV-1) Vif is required for productive infection of T lymphocytes and macrophages. Virions produced in the absence of Vif have abnormal core morphology and those produced in primary T cells carry immature core proteins and low levels of mature capsid. 25.00 25.00 28.30 28.30 21.70 21.70 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.46 0.71 -4.41 10 6238 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 93 4 0 6291 0 184.00 79 99.34 CHANGED pEKcWlVhlTW+VPc.RIc+W+SLVKYhhYhoKcLptWpY.hHHapssas.a.TsSRllIPL.uc.uc.LcVssYWpL.TPE+GWLSoYAVuIpW...hpcpYhT-VDPssADpLIHspYFsCFo-sAIR+AIRGc+llshCpFPcGHK.QVs...SLQYLALhAl...lpp+pp+scssss+phsccptsshphA+pc.tpspppsGp ............MENRWQVMIVWQV.DRMRIRTWpSLVK.HHMYl.S.KK.AKs...WhY.RHHYESpHP.+lSSEVHIPL..G.........D....A.....+........LVIpTYWGLHT...G.....E......RDWHL.G...Q.GVSIE.W............Rp+...RYSTQ.VDPsLADQLIHLaYFDCFS.-SA....IRpA.ILG+l..V..pP..RC.E.YQAGHN........K.VG......SLQYLALsAL..................ls.P..K.K..h.....K..P...PLPSV.p..K.L.TEDR.W..NKP.QKTKGHRGSHTMNGH.................................................... 0 0 0 0 +4932 PF01044 Vinculin Vinculin family Bateman A anon Pfam-B_1420 (release 3.0) Family \N 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 968 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.98 0.70 -13.84 0.70 -6.55 10 1199 2009-01-15 18:05:59 2003-04-07 12:59:11 14 14 103 65 681 1153 3 334.80 14 87.69 CHANGED .h+T+TlEplLEPlspQVopLV.h+p...uphctstshcLossVAuVppAssNhlchGc-hspposD..Lcc-MssAlpcVcssuchhcpAupphpcDPhSostRsphlcuARulLSusocLLlhhDpu-V+KIlcss+tVpDhLssscsspoh-DLsshhKsluPshs+lschlscRQQELs+.pHR-.LlsuhsslKchuPlLloAhKsalcp....P..slpEAtcNRsalsccMs-tlNpI.cVLQhTo...........oshp+shuuhuscLssAL-hLccs..........hI...lDsushsptRstPp.ccclcullSssutMADu.CshcsRtp......ths..........................................................................................................AEss.............t.................................................................................................................................................sscQALpsLloEhtpsAspps......chssLsssID+.........hptcsc................DL+cplR+AlsD+VSDsFhDToTPLhlLlEAA+u.....G+EcshcE+ApsFp-HAs+LspsApLusuhu..sNccsVchlptoAsQl-sLsPQVIsAA+ILhppPsSKsApEph-shKpQWtDpV+hLTstVD-hTsscDFLssSEsHIhcDls+ChhAlps.........t-scsLsssAuuIttRusRVlhVscsEhDN.EsshaTE+VppAschLcsslsPhhs-ttplAsNstcss....sspsac-s..chlsAsRhVtDAlp..........................................................sIpculLM..pcsPs-lsssophc.....pED..sssRssssscsss......csssssEoscE..tplh.EcKsplstQhpshhtstppLcpElsKWsspGNDIIuhAK+MshlMhEMo-hsRGcG..tTppDlIssAKcIA-Auscls+LA+plAcQCsDpph+psLLthhpRIshhspQLpIhSpVKAshhshus.........Elsspsh-.usppLlpsApNLMpSVhpTV+AA.sAShKhRoc.......uusplpWhhKsPhpp ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 189 244 434 +4933 PF02236 Viral_DNA_bi Vir_DNA_binding; Viral DNA-binding protein, all alpha domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_1651 (release 5.2) Domain This family represents a domain of the viral DNA- binding protein, a multi functional protein involved in DNA replication and transcription control. 23.50 23.50 23.70 38.00 21.60 23.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.80 0.72 -3.82 10 131 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 98 7 0 139 0 87.80 45 18.36 CHANGED PlVSAWp+uMElhstLhE+Y+VDsc...sh+hLP-pu..psa+KlspsaLNEc+hsl.LTFSSpKTFsslMGRFLtualpsaAGltsspas .........PlVSuWEKGM-sMssLME+Y+VDss.+sua+LMP-Qs..ElapKlC.poWlNEE+RGlpLTFoopKoFoshMGRFLpualhuauGIuppsWE....... 0 0 0 0 +4934 PF03728 Viral_DNA_Zn_bi Vir_DNA_Zn_bind; Viral DNA-binding protein, zinc binding domain Bateman A, Mian N, Griffiths-Jones SR anon Pfam-B_1651 (release 5.2) Domain This family represents the zinc binding domain of the viral DNA- binding protein, a multi functional protein involved in DNA replication and transcription control. Two copies of this domain are found at the C-terminus of many members of the family. 25.00 25.00 64.00 39.20 21.80 17.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.66 0.71 -4.03 22 260 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 98 14 0 276 0 122.10 35 50.61 CHANGED stCpshh+tsst...G.c.hChhsshhlspspsl-hssso.-suhtAhhc.P.shsh.ssN.htRssht.......hpNsDh+lss.Dlhp...shphspp.hu.sas-s.....hhsphtapp.thpappshhPpsp .....tCss.p+s....ss.s......G..+.hChhsshhlspscsl-hDssS.-..s.u.hA..hcpP..hsh.spN.hhRNsht..................hsNsDh+hss.Dhhs...s.QhSp+.hu.hFsEs.........shsphtapp.hhtahpsshPsh............ 0 0 0 0 +4935 PF00426 VP4_haemagglut VP4; Outer Capsid protein VP4 (Hemagglutinin) Finn RD anon Pfam-B_161 (release 1.0) Family \N 19.80 19.80 19.80 20.70 19.40 19.60 hmmbuild -o /dev/null HMM SEED 776 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.36 0.70 -6.57 9 3505 2009-01-15 18:05:59 2003-04-07 12:59:11 13 2 491 35 0 2781 0 337.90 67 99.90 CHANGED MASLIYRQLLsNSYoV-LSDEIpsIGopKopNVTVNPGPFAQTsYAPVsWGsGElsDSTsVpPsLDGPY.....QPoohp.PssYWhLlsPsssGVVhEu...TssoshWhAslLlEPNVppssRpYslhGpshQlsVsNsSpo+WKFh-hhKsossusasphsoLhos.+LtuhhKps.splasYpG-oPsuspshhssss.hsslshshps-FYIIPcSQputCsEYIpNGLPPIQNTRNlVPluluSRsIh..RAQsNEDIVISKTSLWKEhQYNRDIlIRFKFuNoIIKuGGLGYKWSEISFKssNYQYoYpRDGEpVsAHTTCSVNGVN-FuYNGGSLPTDFsISRYEVIKENSYVYlDYWDDSQAFRNMVYVRSLAAN.LNsVhCoGGsYsFtLPVGsWPVMpGGAVoLphAGVTLSTQFTDFVSLNSLRFRF+LoVpEPsFoIoRTRsopLY..GLPAuNPNsspEYYEhAGRFSLISLVPSNDDYQTPIhNSVTVRQDLERQLs-LREEFNsLSQEIAhSQLIDLALLPLDMFSMFSGIKSTl-AsKSMATsVMKKFKpSpLAsSlSpLTcuLSDAASSlSRuoSlRSs.SssSsWTslSpplosssssspslSTQsSsIS++LRLKEhsTQT-G.MsFDDISAAVLKTKIDKSsplstssLP-IITEuSEKFIPsRuYRlIccDpVaEsos-G+aFAYKV-TFEElPFDl-KFA-LVTDSPVISAIIDFKTLKNLNDNYGITRcQAhNLLRSDP+VLRsFINQNNPII+NRIEQLIhQCRL ................................................................................................................................................................................psYsVsL.DEIppIGSEK...o.QNV.TlNPGPFAQTpYAPVNWGHGEINDSTTVEPlLDGPY.....QPTTFsPPsDYWILIsS...NTsG...VV.YES...TNNSDFWTAVIAVE....PHVssssRQYslF..GENKQFNVc....NsS.sKWKFlEMF+u....SuQs-FhsR...RTL.TSDT+LsGlLKYG.GRlWTFHGETP+AT..TD.u..Ss.TuN...LssloIsIHo..........EFYIIPRSQES..KCsEYINNGLPPI..Q....NTRNVVPlSLSSR.SIpYp.....RAQ.....VN.E.DIsI..SKTSLWKEMQYNRDIhI+h............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +4936 PF03225 Viral_Hsp90 Vir_Hsp90; Viral heat shock protein Hsp90 homologue Mifsud W anon Pfam-B_2880 (release 6.5) Family \N 25.00 25.00 56.60 56.60 22.90 20.10 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.68 0.70 -6.21 17 124 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 41 0 0 124 0 433.40 32 96.48 CHANGED ssphhtchhphhatcs-hccphpchhshlhpp.tp.sp..ats..t...h.sasushhlpssplhsssssshEhhclhllahhhlp.shhc+osasscshh.sslshtpshtphc..ahshsl.......cshGCpaohpDlpp.....phss.st..hth.plaplsNShG+Llshp-lpstshhsF.plssspsssslssshssNchhhpClplacp...hssspusts+hphppchhcshlshLtspp..hpphscNP.LlhuhhhshhpphsspssuFccNhcslc.lpssahshlcclFthshphs..-scLhhs..lspspls.......pllsp.hhluphlc..pshhs.sss.ssLscclDthlsc.hl.p.ht...t.spshlhsuhLal.....aGpapTNtpRh.ppPpphphshpt......pshphphSslps..lhsplppphPch.......NlhRtWsstRuscAhtlF+shs..FpPhhasplPslhsYMpFDFaKtlshptloc-EhpshpsL+h.s...............................cs+oststpsppchcshlh ........................................................t.p..h.phhphhhtct.hpphhpp.hp.h.tp...................phtssh.hpttph..ss.ss.p..phhllh...h...th.phsshs.tshh..thp...p........hhshs........pp.GphashsD..Vcshhh....s.sss-h..sts-psasLSNSsG+LlsssElcshc...shsF....spshcs.sssssssphpsaLshCls...Lacc.....pshpssshhslhh.hasslhpsLsohs.hhcphscNP.LlsGhll-hhhc.pVahsoFcsNl-sl+hh.puphhsllhslashspspc...D-+lLhs...hsss-hsht..shlslhDshlllGs.lRpl-hhsp.ssshssLsp+lDhhlss.............sssc..Lpsshhas......a.uhapss.pRh.ppst.hph..t.......t.h.hphsslpt..hhs.lpp...ph.......shhRtahut+uphAhpla+phs..F.Ph...plPshhsah.hDFaKtls.shlotcEh.tLpplc..s...............................c..................th.......................................... 0 0 0 0 +4937 PF04530 Viral_Beta_CD Viral Beta C/D like family Waterfield DI, Finn RD anon Pfam-B_4973 (release 7.5) Family Family of ssRNA positive-strand viral proteins. Conserved region found in the Beta C and Beta D transcripts. 23.40 23.20 23.70 54.40 23.30 23.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.85 0.71 -3.95 12 26 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 16 0 0 26 0 117.70 38 71.27 CHANGED h.Hs.sCsCspC..sspsp.hsus.p........pssss.sEpsspt.sh.sshhsppYhhllsslhll..h.hhlYlh....sssssssssYaYQDLNSVclchG..sPlDP-VItAIHHaQcaPFGpsPth IhHS.sCsCscCphssssohsssspp.........sssosVEpTsct.hh.hsshsspYllhlsslsll....hslYhh....sss.ssshshFaQDLNuVpIchuh.sPlDPcVItAlHHWQ+hPFGcsPt... 0 0 0 0 +4938 PF00729 Viral_coat Viral coat protein (S domain) Bateman A, Griffiths-Jones SR anon Pfam-B_870 (release 2.1) Domain \N 23.00 23.00 24.00 24.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.15 0.71 -5.01 13 577 2012-10-04 01:49:40 2003-04-07 12:59:11 13 2 91 43 0 572 4 198.00 26 66.94 CHANGED psGuIhA..PVAhuph.lpsppP+hp.pspG....SsploHpEllsslssoou..hphsss.ss.t.phplNPhNsulFsWLsslAsNa-pYpFsSlplpYlPhCsoTosGRVAhhFDcDupDs.PssRspLushsthspoAPWs................tssLslPsD.......stcRFssDouosDtKLlDlGQ..........lllATYG..uuussslGDlFlpYoVphhpPpsos .....................................................t.....hA..PVA.upp.ls..shsPt.h...poss........sh.sl.ppsEhlss.lpp.....oss......hshp.........saslsPhNs....t..hspl.slApsYsha+hTphclhYlPps....u......s.sss......Gp.lthsa.hD...huDsh.Pssps...p...h...uphushlsousWt.....................tssls..h..s.hD...........t..h.a.t..s......s......p.lh.s.st..............hhh.s.u........ts......hGplhhphtl.h.......s................................................................................. 0 0 0 0 +4939 PF00747 Viral_DNA_bp viral_DNA_bp; ssDNA binding protein Bateman A anon Pfam-B_490 (release 2.1) Family This protein is found in herpesviruses and is needed for replication. 25.00 25.00 40.90 40.80 19.70 19.40 hmmbuild -o /dev/null HMM SEED 1122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.80 0.70 -13.80 0.70 -7.34 11 204 2009-01-15 18:05:59 2003-04-07 12:59:11 12 2 108 2 0 187 0 910.50 32 95.55 CHANGED Mpsc.....csssh..uPluPsuYlYhhtpsschh-hluhLShpsssSslslsPLLhsLTVEssFss....sV+o.hsshsG.ullhKlTo..FpPssahFHssctlss.sps..sLs+LC-cARp+FGhpuaps...ss+spTshtsLCsslGhsPspslhalVVspGFKEhlasGpLlsphttsspVplsss-ul+IPLYPhpLF...spphhs-..st.thshsctFl..psFYsssLSchLFhhVaTuhu.ALRhpsscsllcAuh+QhlpDsppssKLsPcKsapuasupphsu...........h-ps.........chMLsDshssELAhSasulah-ulY-.ssshsas-WPllpsu-spts+lsALssaph+LusHVuAhlFSsNSlLYhoclshhssscppssos....p.ohh+hhphssuhsusscps.pDu+tl.chsG.....ssupspcaospHLAhhCGhuPpLLu+hlaYLpRssshsssspups-lhpalss..soushCshCsGcsspoChpTshhRl+sRhPthspss++-PsVhsshSphYuDsDlLGNauph.sl-pc-s.scsu+ss............................................th..sol-RhhhhtphhcYscKt...phlssssGc..shslps+psFlsllsslppslDspVspFhpslhcsps...R-tltsuspuhslshsPaAsuFsPlhsahaaRolLuVlQsLAL.pspuahsDpPlpGpphscWhhppFQolhssFhsshhc+GFLss+slKlspssstsphhshpshtp.Gphstpsh-s+luRlSltsl+slRVKNRslapuups..shupsah++s.........sc++ps.lpGsLuFLLhpaHcpLFPss+hs......sL.FWpplppNphP.ssslhshE-hsshlpFlhphos-Ys-hsllDlsPsslhsaschhhpNplLphhGapsYhhohhshhsthopps.s..aPslLhpps..phuSstEash+spthpssuhssshTushoppshl+slhspRslVoluloIpKYsGhsGNcclFQhGplGahsGuGV-RNLsPss.u.....tshpFMR.p+allAT.hsshll++.sptssssa-s-ll+ptlhhIl..-utshssss.lhslscslusRsp..p.shDDhLahlDs.EhLAcSlhcphsplh-pGst-aSl-shpcVhcsstp..sth....suutsa-Fuuh ............................p.s....sssGPhualYhh.htt..hc.luhLuhpstss.ssslhPLlhGLTVEtsF.h....ssts.hpshsu.ul.hKlos...ats.shhFHssptl..s.sps...slpchCctARpcFGhpsats....s.th.TshttlCttlshpspphhhalVlspuhKEhlahsphh...phtthtpVpls.ttpsh+lPlYs.pL.F................th..p..t.......hshtpth...hsFas.slschLa.hlhsshu.uLRhppspslhcuuhc.hhc-stpsshLssphpahshtspt.ps.............s....................phhlssshss-hAhuhtshh.tusa..-...t...shppWPlhpstps.t............shhpALsta.h+hush.luAhlFSsNSsLY.ocls.hssss.tstu....................t.shh+.h.h.sshhs.hs.ps.p-u+hh...thps...............hshtspcas.pHLAhhCuhsPtLluphhaYLpRsshhths..hptp..shhpalss....s..ss..CphCttpsptsChpTshhRlctRhP.hstt.+tt.hVhsshsp.asDsDhLGsauth.hhcpt-s....sscs..................................................sh-Rhhh.....phhcp......h.s..ss.......u.p....p..lp.....sttsFhthhsslpphl-pps.phhpslhctpp....R-tlttuspshslshsPYu.u.sPlhthhhhRohhsllQslAL.pstshhhspshpsp.hp....tpFQslhtthhhs..ptGFlss+phpsshsp.hshhhs..hhtp.s.........t.shhshpsclsRlol.s.+phRlKNRslapuups........shs.pshht+s...........sppchs.lpGsluFLLhpaHptLF.Psst..s......s..FWptl.pNphP...sthhshc-hps.ltalhpho.pYtthshlslsPsslhphsp.hhtNthLphhsa......p....pahhshhshhhthspps.s...s.hhhh.t....huu..sh-h..h.hphhh.phs.t.h.t.hsshhsppphlpslhptRshVslslulpKYtGhsG.s.pclFQhGphu.h..Gs...+slss.............htF..R.p+allAs.hss.hlhtt.st.h..tss.h.a-sslhcphhhhh...cuss.hshss.hhshhchlu.sRs.p......ph-Dhhhhl-s.phlupphhchhtpl.pps.s.taSh-shhphhct.tt..........st.h.s....h............... 0 0 0 0 +4940 PF01443 Viral_helicase1 Viral (Superfamily 1) RNA helicase Bateman A, Ahola T anon Prodom_1256 (release 99.1) Family Helicase activity for this family has been demonstrated [1] and NTPase activity [2]. This helicase has multiple roles at different stages of viral RNA replication, as dissected by mutational analysis [3]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.76 0.70 -4.84 72 2712 2012-10-05 12:31:08 2003-04-07 12:59:11 13 100 786 0 67 6146 639 226.40 20 13.31 CHANGED hllcGVsGsGKSohlpphlpp...........hpsssthhstth......spssphpspplcsh.hshht.....schlllDEahhh..t.l.h..h...spsphsllhGDshQhshpsp.....hsph.sph....sphhshphshhhsshshthtspp..........h.pststpssth.....thsspsh......slthtshspphhpshthp..............shssp-spGhpacsVollhpppss...........spphhhVALTRpppslhhhs ......................................................................................................................................................................................................h....GssGsGKo....p.......l...h...phht................................h...s.h...h.phh.......................................p.hp..s...h.t...h......p....s..........h...h............s.....h...ht.........................schl.hlDEsh...h...h...p....s...h......h...h....l....h......t........h......h.....p..s.....p..........p......l.h..h.h..G.Dsp.Q...h.......s..hh.....s....h........t...t.....h....h..h....t...h.h..s.ph...........................s...h.....t...h.......p....h.s..h....R.s...st..pl.st..hlpsh....................................................t..sppps.h................thsspsh...................hth.hhs.h..p..t.....h..t.th.tp......................................................................shTspp.sQG.....h.....T.....a.c..t..V..............h.l......h...........t....s........p.....s....t...s..........................sppc.hhVALTRtppplhh................................................................................................................................................ 0 19 50 61 +4941 PF04521 Viral_P18 ssRNA positive strand viral 18kD cysteine rich protein Mifsud W anon Pfam-B_2612 (release 7.5) Family \N 23.10 23.10 23.20 105.30 22.60 23.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.90 0.71 -4.66 10 25 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 12 0 0 26 0 118.30 47 72.80 CHANGED hsshcsspKhRtplYppLGL..ssVpC+LsussGhsCGMPAAls....h-cucsc.....Lsh.DGaCGEKHcshshShAhR.splpshphcL-pLEc+cEsL+sphpthstst+ssps.s.sstKhs+hKs ..+slhCVSKYRtSVYKsLGL..ssVKCRLPuDCGVNCGMPAAFV....LEcGHPc.....Loh.DGaCGEKH+GYVlSGAWRpAQLRoLNtELDpLE+RtEpLKsQI+sLotst+sssA.sYsPpKls+hK.A.. 0 0 0 0 +4942 PF00998 RdRP_3 HCV_RdRP; Viral_RdRP; Viral RNA dependent RNA polymerase Finn RD, Bateman A, Griffiths-Jones SR anon Pfam-B_315 (release 3.0) Family This family includes viral RNA dependent RNA polymerase enzymes from hepatitis C virus and various plant viruses. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.55 0.70 -6.00 33 17276 2012-10-02 12:54:00 2003-04-07 12:59:11 18 48 263 228 1 13782 0 190.90 62 44.48 CHANGED hhshsushh...sppcphh.lst.hssshhhtsHNpshsNhpRulhERVahVp..tptp....lhs..........cPh.ssFcc.lpthtppltphlh...ptsslo.pchsp.YpGt+tphYp+AlpS.lshpslppc.DuhLcsFlKsEKhslssKsDssPR...lIpPRsPRYNVtlGpaLRhhE++hh+s......lspsatGsohhpGYospptuchlhptWppapsPsAlGhDsoRFDQHVShcALcaEHSlYpusatssp...pLtclLshQLhNpGsuhss-Gh.h+YphcGCRMSGDhNTShGNsllsshhspshh+ths..lcspLhNNGDDCVlIsEpsshcpspp...slpchatcaG.FsspsEcPsYclEplcF..CQspPlhsss.tashsRp.hsshuKDshuhsshpss..ssspsWlsulupsGhslsuGlPlhppFaphhhpsst......thstphhsspash.......shhthhuh.thshpstsssptsRhSaaL.uaGlsPppQhtlEsthsphplptp .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................V...T...E...p.DI.R..sE...E.....tIYQsCsLtPpA.....RpAIcSLTERLY...lG...GPhhNS......K.Gp...sC.GYRRCRASGVLTTShGNTL...T........CYl.KA..pAA.......C.......R.....A..............A....t......Lp......D.......sT.......M....L..V.......CGDD...LVVI.sESsGspEDtt....sh....................................................................................................................................................................................................................................................................t.............................................................................................. 0 0 0 1 +4943 PF02407 Viral_Rep Putative viral replication protein Bashton M, Bateman A anon Pfam-B_1223 (release 5.2) Family This is a family of viral ORFs from various plant and animal ssDNA circoviruses. Published evidence to support the annotated function "viral replication associated protein" has not be found. 25.80 25.80 26.10 25.90 25.60 25.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.04 0.72 -3.98 19 1309 2012-10-02 18:54:06 2003-04-07 12:59:11 11 2 283 2 9 1241 181 77.10 47 29.31 CHANGED uppWsFTLNhsst.......hs.l.hs-clpYAlhtcEcu.p.GpcHLQGalphK++.ppLsplKplhsu.puHhEhsR.Gosc-scsYCpKE ..........................h..hW.sFTl...ot...................t....hl...h............s.hpY.hlhGcEtspp.tTPHLQGahph..ppK..p.p.....hs.plK.thhss....psHlEKAK.GoDppNccYCSKE......................... 0 8 8 9 +4944 PF05101 VirB3 Type IV secretory pathway, VirB3-like protein Bateman A anon COG3702 Family This family includes the Type IV secretory pathway VirB3 protein, that is found associated with bacterial inner and outer membranes [2]. The family also includes the conjugal transfer protein TrbD family that contains a nucleotide binding motif and may provide energy for the export of DNA or the export of other Trb proteins [3]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.85 0.72 -4.00 24 967 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 663 0 249 799 41 88.50 20 43.83 CHANGED Muttt............sPla+uhsRPshl..hGsst-hlhhs.slhsslLhhsstshhsslhslslahhs....phhscsDsh.hptlah+.......phph+saa.....tsp ..................................ssla+uhTRPshl..hGls..hthhlhs....s.....hh...s.sh.....l......hl.....h....h...p......t......h.......h.......h.h.....l.l..s.h.s...l.ahht........hhss....ccDsh.Fhtlhhp.........p.p.t.ha...t..................... 0 42 134 187 +4945 PF04335 VirB8 VirB8 protein Mifsud W anon Pfam-B_1984 (release 7.3) Domain VirB8 is a bacterial virulence protein with cytoplasmic, transmembrane, and periplasmic regions. It is thought that it is a primary constituent of a DNA transporter. The periplasmic region interacts with VirB9, VirB10, and itself [1]. This family also includes the conjugal transfer protein family TrbF, a family of proteins known to be involved in conjugal transfer. The TrbF protein is thought to compose part of the pilus required for transfer [2]. This domain has a similar fold to the NTF2 protein. 20.40 20.40 20.50 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.36 0.70 -4.75 104 1418 2012-10-03 02:27:23 2003-04-07 12:59:11 8 5 855 7 317 1203 73 202.90 19 86.07 CHANGED putph.p-chspsptpp+.phhhl.....uhsuhhluhhssh.ulsh..hssh..pp.h.P...allp.VDptsGtsphlssh....pph....pst.s-shhphhlspalp.sRps.hssssh.p..psappshhh..osssstpphpsah.....t.t...psPhshhupps.h...pVclpSl.shh....ss.....p......shp.Vcapcph..pptsssh....tsp.pasuhlshth...pssh.sppphhtNPLGhhVpsaphspE ..................................................................ph.tpphtthptpp+thhhh......uhhshh..lsh..hhsh.u...lsh....hhsh....pp.hhP...allp..lDp.t..Gp..sp.hls..h...pst........php..s.cshschhlspal....p.hRps.hs..sl..p.....pshpps.hhh..osspst.pphpshh..p.....pssh.pph..tppt.l........plplt.Sl...shh.......ss.............s..................shp..Vc...a..ppph...hptssp.............ttp.papuhlsa.h...p..s.h..sppph......h......hNPhGhhVssaphst...................................................................................................... 2 56 177 240 +4947 PF00286 Flexi_CP virus_P-coat; Virus_P-coat; Viral coat protein Finn RD anon Prosite Family Family includes coat proteins from Potexviruses and carlaviruses. 22.90 22.90 23.40 23.10 22.20 22.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.69 0.71 -4.48 17 1308 2009-01-15 18:05:59 2003-04-07 12:59:11 15 4 134 2 0 1218 0 129.90 41 54.26 CHANGED SNshAosE-lttItsshps.htlPssslstshhplshhCscsuSSshss.pGss.h.s.usshtsls.uhh+tpso.LRphCphYA.llWNhhLspNpPPAsWushGap.psKFAAFDFFDuVpssAAlpPspGlIRpPTptE ............................thAsstplttlhthhtt.hGlspppht.p.shhclshtCucsuSSthsshhGss........s.....sh.sp.s-l.s..ul.h.+..phs.T.LRpFCthYA......lVWNhhLtpspPPANWsttGFptssKFAAFDFFsuVsss.AAl....p..P.hpG...lhRtPTctE........ 0 0 0 0 +4948 PF01347 Vitellogenin_N Lipoprotein amino terminal region Bateman A anon Pfam-B_1280 (release 3.0) Family This family contains regions from: Vitellogenin, Microsomal triglyceride transfer protein and apolipoprotein B-100. These proteins are all involved in lipid transport [1]. This family contains the LV1n chain from lipovitellin, that contains two structural domains. 21.00 21.00 21.10 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -12.79 0.70 -6.29 44 1005 2012-10-11 20:00:59 2003-04-07 12:59:11 17 42 272 1 433 992 1 434.80 17 32.52 CHANGED apss+pYpYpacupshsuL.p.husthuGhtlpuclplpsp.........spsphhhplppsphsphpthhsp.......cshhsssp..........lphphhshhppPhchphssGh.lsclhsspsssshtlNlh+GIlshLQls.hpsspth.pht....................EsslpGtCcThYslpp.sp.....................phlplsKo+shspCpp+hthphGhs.h.sphp..sppppps..lpposssphhlpsp.s..shhIppupsppphhlsPh.htp.ptupspu+ppLsLhphpsps...hs.ssshpshssLlYphssph.....................................................................................................................................................................................t...hPhhhhphssttt............plscplpp.l.ptstphscpcs..+.FspLlpllRssstcplpplhpphtsp...................phhphahDAlspuGTtsAlphlpphlps..cclpshEAApllsshspss.tsspchhchhhpLspssplppp.hLppouhLuaushlpphpss.....................psss.pchlp.ltpphp..pAlsctcp.chhlhl+uLGNhGpspslpshp.hL.upt.......plsthlRlthlhuLcpl..scppP+hlpsllhplahspsppsE..lRhsAhhhLhcopPshshlpplAphspp-s..shQVsuhlhStlc .........................................................................................................................p.Y.ath..ps.........t.h..........t....t...............s.shhhps.l.l.t.............................t.p...h.h.p..h......p..h...t.....h.....t...t.................................................................t...h......t..h....Phhh..th.............p..Gh...l.t.pl..hs..ts..s.h....h.Nlh+ulhshhQhs..hpt..tpt................................................Es..sh.G.Cps.Y.hp...........................................................................h.lh+..p.shppC...p.t.h......h....s.h.t...........................t....h....................h..h...p.t..t..................h.ps.s.p....h.h...............t.....h..h..s.p.pl.h....p...tt..........................sl...h.t.................................................................................................................................................................................................................................................................................................................................h.t....................th..hp...htp.................................tt........t.........h..hhp.hp.hp.pp.....l.thh....tph.............................................................................hhhpsls..hts..sh..hhhphh......tp.t....h...h..h...h...............s.s.....thhp...............h..........................................h......hhht.h.th.h...t.h..................................................................h.tth.h.......h..t...ht...........t...................ph.hhlhsl..t.Nh.....t...............t...h....h...hh.t................t.........ph.....hl.sht...........h.....p.hpthhh.lh.p...t.....c..hRhhA.h.hhhht..s.P.s...htth.h...h.....-...........ph.thh.s........................................................................................................................................ 0 145 187 351 +4949 PF05090 VKG_Carbox Vitamin K-dependent gamma-carboxylase Moxon SJ anon Pfam-B_6307 (release 7.7) Family Using reduced vitamin K, oxygen, and carbon dioxide, gamma-glutamyl carboxylase post-translationally modifies certain glutamates by adding carbon dioxide to the gamma position of those amino acids. In vertebrates, the modification of glutamate residues of target proteins is facilitated by an interaction between a propeptide present on target proteins and the gamma-glutamyl carboxylase [1]. 20.40 20.40 20.60 20.40 20.30 20.10 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.66 0.70 -5.63 10 332 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 226 0 183 348 246 332.40 23 68.03 CHANGED ulFRllFGlLMhl-lhpctuhGalDp+aL-Pphsh+F.hF-alpPLPhshMYhlallMhluAlsIMLGh+YRlSslhFhLsahYIaLLDKToaNNH.YLhuLLuFhLlalsANR...YaSlDuhh.NsslRss.cVPhWNYshL+sQlFIVYFaAGltKLssDWlpG...uh.s+HWLhssaclhl.s-Llsh.lVHas....GLlaDLhlsFLLhac+TRhhuhhhlshFHlMNSpLFsIGMFPYlMLAsoslFFsssW.........s+th...h.phLphlh..............h.t..phhpppsshsp...hh.......ccthhp.tppPhltp+lushFslhahh.QLhLPapHFlhpGps.WTpthYtaSWcMMl+o+ss.asphplcDspTGcpsalsPpsF...pQp+phpspsDhlhQYAppLupphsppshs.....plpIYhDlaVSlNGRhpQRlhDPclDlhpscWssFppssWlLP .......................................................................................................................hRh.huhhh..h.....h.............................h...........s.....h.......h........t......s................h..pa..h....h.p.h..h...s.h....s......h.hh..hl.ahl..h.h.l...hu.h.hhhlGh.hh......R.h.u.h....h.h....h....h.........l.....a.h.h.l...hh.st.s...s.a....s...+......h...l.......h.tl.l.hhhh...h..h..hsss..p......hhSlD.....sh.....h....p.......p...........h.......t..........p.........t.......p........l........s...........h........W.................shhl.lp........hQlhllYhh..AGltK.l.p...s...-...WlpG...........shh..p.ha........hhs...h...p..h......l...........s...p........l..h.......s........h......h.lt...h........h......h..u...ll..h-L..s.h.sh.hL..h....h..c.t.....o......R...h..uhh..hshhFH.h.h.su.h..l...h..s..IG..h..Fs.a..hhl.us.s..l.F..h.s.sph...........p............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 77 128 162 +4950 PF04649 VlpA_repeat Mycoplasma hyorhinis VlpA repeat Kerrison ND anon DOMO:DM04353; Repeat This repeat is found in the extracellular (C-terminal) region of the variant surface antigen A (VlpA) of Mycoplasma hyorhinis. Mutations that change the number of repeats in the protein are involved in antigenic variation and immune evasion of this swine pathogen [1]. 25.00 25.00 59.10 30.80 23.40 14.00 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.71 0.73 -5.60 0.73 -4.18 6 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 3 0 16 51 0 13.00 98 28.24 CHANGED KTENTQQSEA.GT KTENTQQSEAPGT 0 0 16 16 +4951 PF01660 Vmethyltransf Viral methyltransferase Bateman A anon Prosite Family This RNA methyltransferase domain [1] is found in a wide range of ssRNA viruses, including Hordei-, Tobra-, Tobamo-, Bromo-, Clostero- and Caliciviruses. This methyltransferase is involved in mRNA capping. Capping of mRNA enhances its stability. This usually occurs in the nucleus. Therefore, many viruses that replicate in the cytoplasm encode their own [1]. This is a specific guanine-7-methyltransferase domain involved in viral mRNA cap0 synthesis. Specificity for guanine 7 position is shown by NMR in [3] and in vivo role in cap synthesis [4]. Based on secondary structure prediction, the basic fold is believed to be similar to the common AdoMet-dependent methyltransferase fold [5]. A curious feature of this methyltransferase domain is that it together with flanking sequences seems to have guanylyltransferase activity coupled to the methyltransferase activity [5]. The domain is found throughout the so-called Alphavirus superfamily, (including alphaviruses and several other groups). It forms the defining, unique feature of this superfamily [2]. 20.70 20.70 20.70 21.10 20.40 20.50 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.28 0.70 -5.79 122 1584 2009-01-15 18:05:59 2003-04-07 12:59:11 12 52 396 1 1 1647 0 318.10 19 22.63 CHANGED hsaslstpttphLpph.....GI.hsshus.sHsHsssKslEs.hhhphlhshl.sp....sssh..ltlKssKh.phlpptt.........hs...........hhN.hlss+DhtRY.....ss....................................................................p.......................tp..h.h........ttp....hsahtDsLaahshpplsshh.ppsphppLhuolVhP.sEhhh............................tpp..ShhPplYshphp.....................ps..................................................p.......hhahP-uptutuYppsh.sthpaLpsspl..h.tt..................shslphlpShsupHlhhIp+.spths.....................................................................sphtoFt...ctlhhs.....................................phhh.p.hs...p...........phslspshhpplhpYlcol+phshpss.....................................................hu+lpphhsc ................................hs..t.phl.ph.....tl.h.pspt.h....tsHshstshcp...h....hphh........tphs......tshh........l-lGus.hphhhpsp.....................Hs....ssshhss+DstRh.....hp.....phtth.t.ph.t......t...hp..t.....................................................................................hC.p.s................................................................................................................tsCp.hps............t......hshhhpslY.....hshcshsphhhpptht.hhhsshhhs.sphhh.............................tpt..shhs..p.h..s....htctp..................................................p.....hha....stusht..Yscshp.lhpahpsshh.....h..st...............................shhh-hlh.s..hsshh..hh...plst..ssthh.........................................................................p.hs.......ptl.....h..............................t.hl.thhp....t..............hh.ht.....hpph.phl.......uhpphhhhsu....................................h.........p................................................................................................................... 0 0 0 1 +4952 PF00695 vMSA Major surface antigen from hepadnavirus Bateman A anon Pfam-B_168 (release 2.1) Family \N 29.10 29.10 29.30 29.20 29.00 29.00 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.32 0.70 -5.25 4 23454 2009-01-15 18:05:59 2003-04-07 12:59:11 14 10 96 4 0 17711 0 209.40 74 95.40 CHANGED hs.s.uKshsscch-Gs.lh.ph.Aup.lP....Gsh...GphsTh.H..lhs+s.psptlpTh.pshs.PtusupR.uhcpPTPhoPP.hshpPctspKsppuF+Qh.p-...tPpsssphsP..........s.hc.pss.sPllpstShl.oh.........sshltsPsLspc....sohuGlLusLlGL.VuFFLLTKILpIhppLDWWWhSLS.PtGp..CshQNotuQTSsHhssSCP.sCPGFhWhYLRhFIIFLLlLLlshshLhlh.s......................................................pShhhuKh.WE.sSAhFS.lS.LlP..Qh..hluLThhLhLIWMh...usplhshLo.hhsL.ALFFp .................................................................................................................................................................................................................................................-........................LT+ILsIPpSLDSWWTSLNFLGGsssC.GQNSQSPhSNHSPT..SCP.PhCPGYRWMCLRRFIIFLFILLL.CLIFLLVLLDYQGMLPVCP.LlP.....G...S....S....T.TSTG.....PC..+...TCTTP..AQGTSMaPSCCCTK.PoDGNCT.CIPIPSSWAFuKaLWEWASsRFSWLSLLVPFVQWFsGLS..PTVWL.shWMhWaWG.................................................. 0 0 0 0 +4953 PF03762 VOMI Vitelline membrane outer layer protein I (VOMI) Finn RD anon Pfam-B_3481 (release 7.0) Family VOMI binds tightly to ovomucin fibrils of the egg yolk membrane. The structure [1] that consists of three beta-sheets forming Greek key motifs, which are related by an internal pseudo three-fold symmetry. Furthermore, the structure of VOMI has strong similarity to the structure of the delta-endotoxin, as well as a carbohydrate-binding site in the top region of the common fold [2]. 25.00 25.00 25.50 25.50 23.60 23.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.75 0.71 -11.48 0.71 -4.32 22 120 2009-01-15 18:05:59 2003-04-07 12:59:11 12 5 63 2 78 127 1 149.20 36 64.05 CHANGED hlpsspsssaGsWtt.ptCPsGpaspGhplKhEssp....s...........D-TulNulpLaCpphs....................................................................................................sppphlhSs-usaGpWp.phpaCPtsthllGFpL+s-.ppt..tDcsussNhthhCt..........spt..ltGss..sa..GsWsssph.......................................................................Cst.G.slCGlpoplEsspu..............................................................htD.DTuLNslplpCC ...................psssss.aGsWs..p...hCPsG.aAsGFplKsEssQ....sh..s.............................DDTALNuIcLaCspst......................thlpStp.GtaGp..Wo.p..aCP.s...s.hlhuFpLRlE..s.ps.......uDsTA...sNNlpFpC...........ssst.Lp...Gsu...hsa..Gpau.p.h.p........................................Cst......ulCGlpT+lEt.pG.....hh.DDTuLNsl+hh.CC..................... 0 37 43 60 +4954 PF00434 VP7 Glycoprotein VP7 Finn RD anon Pfam-B_116 (release 1.0) Family \N 25.00 25.00 25.60 25.60 22.10 22.00 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.33 0.70 -5.52 8 4737 2012-10-01 19:08:57 2003-04-07 12:59:11 13 1 694 28 0 3775 0 272.50 76 99.95 CHANGED MYGIEYTTILhhLIShlLlsYILKolT+hMDaIIYRFLhVlVlluslss...AQNYGINLPITGSMDTAYsNSTQ-psFLTSTLCLYYPsEAsTEIsDsEWKsTLSQLFLTKGWPTGSVYFpEYuDIsoFSl-PQLYCDYNIVLh+YssslpLDhSELADLILNEWLCNPMDITLYYYQQTsEuNKWISMGoSCTlKVCPLNTQTLGIGCpTTsssTFEpVAooEKLVIpDVVDGVNHKlNlTssTCTIRNCpKLGPRENVAIIQVGGusILDITADPTTsPQspRMMRINWKKWWQVFYTVVDYINQIIQVMSKRSRSLDoAAFYYRV ......................IEYTTlLha.L.ISl.lLLNYILKS.lTphMDaIIYR.FL..LllV.l...l.sh..s+...uQNYGlNL.PITGSMDTsY..sNS.TQp.E..s...FLT..STLCLYYPTE.........A.........uTp.........IsDsEWKDTLSQLFLTKGWPTGSVYFKEYosIssF.SlDPQLYCDYNlVLM...KYD.psLELDMSELADLILNEWLCNPMDITLYYYQQosEuNKW..ISMGoSCTlKVCPLNTQTLGIGC.TTsssoFEpVAps.EKLsIsDVVDGlNHKlslTTs....TCTIRNCKKLGPRENVAlIQVGGuslLDITADP.TTsPQhERMMRlNWK+WWQlFY............................................. 0 0 0 0 +4955 PF00522 VPR VPR/VPX protein Finn RD anon Pfam-B_100 (release 1.0) Family \N 20.80 20.80 25.60 25.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.41 0.72 -4.17 12 5858 2009-01-15 18:05:59 2003-04-07 12:59:11 13 3 90 10 0 5609 0 89.30 80 93.70 CHANGED cEpsPts...pss.REsapEWlt-hlcElpcEAlpHFshcLLhtlhpahappatDp.st.....shchlpllQ+AlFlHa+pGCp.........cuRhGt .......................M.EQAPED...QGPQREPYNEW.TLELLEELK...s....E....A.VRHF.PRsWLHuLGQaIYE...TY..GD...TW...s...G.....VEAIIRILQQLLFIHFRIG.Cp..........HSRIG.IhtpRRsRNG...................... 2 0 0 0 +4956 PF03643 Vps26 Vacuolar protein sorting-associated protein 26 Finn RD anon Pfam-B_4396 (release 7.0) Family Vacuolar protein sorting-associated protein (Vps) 26 is one of around 50 proteins involved in protein trafficking. In particular, Vps26 assembles into a retromer complex with at least four other proteins Vps5, Vps17, Vps29 and Vps35 [1]. This family also contains Down syndrome critical region 3/A. 20.20 20.20 20.30 20.20 20.00 20.10 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.80 0.70 -5.62 8 653 2012-10-02 22:29:00 2003-04-07 12:59:11 10 9 323 8 411 631 10 247.70 41 79.71 CHANGED FGsss-I-IphsstcsRKhl-hct-......cGph-cthlahsGEoVoGpVslslKc.u+KlEHpGI+lEF............lGQIEhaYD+G..........................Npa-.FhsLs+ELAhPGELppspo.aPFEFspVEKs....YEoYtGsNV+LRYaLRVTlsR+h..oDlsKEhDlhV+.............sa...sshP.......................................-sN............ssIKMEVGIEDCLHIEFEYNKSKY....HLKDV.......IlGKIYFLLVR...IKIKaMElulI++EooGs.GPNsasEoETlsKaEIMDGAPVRGESIPlRlFLuGYDLTPThRslNpKFSVKYaLNLVLVDE-D .....................................................................................................h.h.h.t......tt..h.h............................pt.h.cth.la.h.sGEslsG.............t...V.sl.p.+p..sp+l...c.....HpGI+lph...........................lGpI..........Eh...a..h..-ps..........................p..h.-..Fls.hsp-.....L.sh.P....G.....c...l.........p....s..po.asFcF........p.......lcK...........Y.EoYpGhN..V....cl+Yhl+..sol.....Rph....s-l.s.+.p.h.-.h.h...Vp............................sh.....sh...P...........................................................-.hs.................................ss.I.KM.-.V.G...I..E.-..C..L.HI.EF.EYsK..ocY..........cL.+Dl...........IlG+I...YF..LLVR.....lK...IKpMElpll++Eos.......Gs....u...s.s.h...h.p.........E.....scTls+aE......IMDGuPs+G................EoIPIRlaLss...h..p..hoPThc.s.....lN.ppFSV+aaLNLVllD--............................................................................................ 0 146 219 321 +4957 PF03997 VPS28 VPS28 protein Bateman A, Wood V anon Pfam-B_6317 (release 7.3) Family \N 20.30 20.30 20.40 20.50 19.40 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.28 0.71 -4.68 6 333 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 284 19 223 325 4 178.50 41 76.05 CHANGED huELYAIlpsl-pLEKAYl+DslSsoEYsusCpKLlsQaKsshpplpsp....thsSIEcFscKYRLcCPsAlcRIc...............cuhPITlcc....................DpussuKsIA-lVppFITsMDuLRLNhpAhDpLaPhLs-LhsohN+lSclP..DF-s+pKlpcWLh+LspMuASDELo-sQsRQhLFDLESAYsuF.phLp ..............hAELaulIhTl-tLEKA..Yl+Dsl.os.sE......YTssCs+LLsQYKshhpp.hpst.....................ph.ss.l......-p........Ftccac.........l.-CPtAhcR.l.+...............pGhPhTlc...c........................................................s..p..u..s..s..u.phIA-ssppFIThMDsL+Lph.hAhD............p...L.aPhLp-Lhpohs+l......op..........................DFE.............s+.......tKlhpWLhpLspMpAo-ELs-p.QsRphhFDl-pAYpuF.thL.................................................................. 0 74 120 184 +4958 PF04133 Vps55 Vacuolar protein sorting 55 Wood V, Finn RD anon Pfam-B_25168 (release 7.3); Family Vps55 is involved in the secretion of the Golgi form of the soluble vacuolar carboxypeptidase Y, but not the trafficking of the membrane-bound vacuolar alkaline phosphatase. Both Vps55 and obesity receptor gene-related protein are important for functioning membrane trafficking to the vacuole/lysosome of eukaryotic cells [1]. 23.30 23.30 23.60 24.20 22.20 23.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.57 0.71 -4.36 27 382 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 262 0 242 344 0 116.20 37 87.97 CHANGED llhLShhluhGhlLlILuCAL..asNaaPLhsllhalluPlPshl.....spphtssssasspt..........h.-hupFlTGhlVlSuhuLPllLtHssl....IshsAhhholsGshllasoIlhahhhFsts--c ..............lluLSFhhAlGhhhllLuCAL..............apsaaPLhVlhhYlluPl.PhhI.....sp+h.s..s..ss..-.hsssu..........sh-huhF.lTshlVVSuhuLPllLAHu..s..l.........I.phsAhhhslsGshllasTIlsFhhhFtptpp.h.......................... 0 61 118 185 +4959 PF00558 Vpu Vpu protein Bateman A anon Swiss-Prot Domain The Vpu protein contains an N-terminal transmembrane spanning region and a C-terminal cytoplasmic region. The HIV-1 Vpu protein stimulates virus production by enhancing the release of viral particles from infected cells. The VPU protein binds specifically to CD4. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.92 0.72 -4.42 16 6152 2009-01-15 18:05:59 2003-04-07 12:59:11 14 3 58 8 0 5351 0 71.70 65 97.50 CHANGED Mhphph.....lullulhlslIl.sIllWsl....lahcY+ch++Q....+cIpcLlcRIRERAEDSGNES-G-pEE.LssLlc.....hGasNPhh...L .............M.sL..I.....h.u.I.V.uLlVAh.Il.AIVVWoI....VhIEYR.K.ll+Q....RKID.RLI-RIRE..RAEDSGNESEGD..pE..E.....LS.sL...VE........MGc.hs.hs..ls........................ 0 0 0 0 +4960 PF03852 Vsr DNA mismatch endonuclease Vsr TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Family \N 20.60 20.60 20.60 20.90 20.40 20.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.39 0.72 -4.36 7 1075 2012-10-11 20:44:43 2003-04-07 12:59:11 10 8 981 3 248 738 161 73.40 49 47.87 CHANGED sDhhss...tpRptsMp.ps+o+sT+PEhtLtphLaulGhRYRhpctsl.G.....pPDlVhsph+lslFlcGCFWHG+sCtht .........................Dhhsp...tpRS+sMp...uIto+D.Tt..Eht...L.tphL..pupGl.p.aRlp....c...t....s....LP.G................+PDhVlsc.....Y+sVIFsHGCFWHtH.c.C...hh........... 0 76 158 204 +4961 PF00093 VWC vwc; von Willebrand factor type C domain Sonnhammer ELL anon Published_alignment Family The high cutoff was used to prevent overlap with Pfam:PF00094. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.35 0.72 -3.86 20 3816 2012-10-01 23:42:56 2003-04-07 12:59:11 13 320 140 1 1998 3212 3 59.00 33 11.54 CHANGED Ch.psGphYpss-sWpss.........Cp.pCsCps........uplhCcplh..Cs.......ssCsss..................s.GECCs..hC ..................Ch.hsG..p..hYp.sG..-pWp.ss.............Cp.pCpCps..................................GpltCpp....ht....C..s............hsC.sps............................h.hssGpCCP..hC.............................. 0 372 529 1087 +4962 PF02020 W2 IF5_eIF4_eIF2; eIF4-gamma/eIF5/eIF2-epsilon Bateman A anon [1] Family This domain of unknown function is found at the C-terminus of several translation initiation factors [1]. 30.00 30.00 30.30 30.50 29.90 29.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.03 0.72 -4.03 77 1188 2012-10-11 20:01:00 2003-04-07 12:59:11 13 35 311 14 716 1099 4 83.00 29 12.25 CHANGED pctphshLtulpch...h..hc...........s.phh..sh.l......tllptLY.-tDllsE-sllpWhpc...t....stpspsp..lh.cpsptFlcW.LppAEEEsp--- ......................................ptplphLhulpphs....hc..........psphh..sh.l......pllpthY..-tDll.pE-sllpWhpc.....p..............h....s.tcspth.............hh..cp..s...psFlp.W.LcpAEEEsppp...................... 0 217 343 531 +4963 PF03716 WCCH WCCH_motif; WCCH motif Finn RD anon Pfam-B_3194 (release 7.0) Motif The WCCH motif is found in a retrotransposons and Gemini viruses. A specific function has not been associated to this motif [1]. 20.80 20.80 21.30 21.20 20.20 19.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.74 0.72 -6.95 0.72 -4.45 30 1359 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 739 0 2 1020 0 24.40 64 20.75 CHANGED PlhpsCsCPHCPRH.ppppshsppAa ......PlppPCCCPHCPRH.Kp.psMspQAH........... 0 1 1 1 +4964 PF00458 WHEP-TRS WHEP-TRS domain Finn RD anon Prosite Family \N 27.30 27.30 27.30 27.50 27.20 27.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.63 0.72 -4.29 65 996 2009-09-10 19:30:04 2003-04-07 12:59:11 15 80 141 15 582 981 8 55.30 39 11.86 CHANGED LhpplspQG-hVRcLKucKAsKs..p.lcsAVppLLsLKtpa...KphoGp-hpPs...s..s.u ................LhpplspQG-hVRcLK.......up....K....A.sKs....p.l-.........sAVppLLsLKtph....cptp.Gpchpsst.......s....................... 0 175 232 425 +4965 PF00110 wnt wnt family Sonnhammer ELL anon Prosite Family Wnt genes have been identified in vertebrates and invertebrates but not in plants, unicellular eukaryotes or prokaryotes. In humans, 19 WNT proteins are known. Because of their insolubility little is known about Wnt protein structure, but all have 23 or 24 Cys residues whose spacing is highly conserved. Signal transduction by Wnt proteins (including the Wnt/beta-catenin, the Wnt/Ca++, and the Wnt/polarity pathway) is mediated by receptors of the Frizzled and LDL-receptor-related protein (LRP) families [1]. 19.40 19.40 19.60 19.60 17.70 17.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.55 0.70 -5.23 104 10329 2009-01-15 18:05:59 2003-04-07 12:59:11 14 8 7256 1 979 7789 0 159.90 55 94.96 CHANGED hCsplsuLs....cQpplCpppsc........hhtulscGsphulpECQaQF+pcRWNCSshstt.................................shFsp.llppG..oREoAFlaAIouAGVsauls+ACopGpl.psCuCDppt....................pststtt......................................................................WcWGGCSDNl.caGhpFu+pFlDupE........ttpssRsl..MNLHNNcAGRpsVppphcpcCKCH...GVSGSCslKT....................CWp.plssFRplGshL+c+Ycs.AhcVphpppspttth...............................................................................................p.ph+.....s..........sppDLVYl-pSPsaCpps.ph.GshGTpGRpC.............Nco.Sp.........uh-u.CshhC..CGRGasopphphsc.cCpC+.Fp.....WCChVcCcpCppphphasC+ ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................+.T....................CWh.pL..P.s..F..R..s..VGDsLKDRFDG.A.SR.VMhsNsshp.t...ss.p.p.spsssptsst..................................................................................................+pRYpFQL+Pa.NP-HK............sP.............GsKDLVYLEsS..P.s..FCEK..NP+..............L.........G....I....G....THG.......R..tC..............................NDT...SI.................GVD..G..CDLMC..CGR.G.Y+T.p.p.h.hl.lE.RCs....................................................................................................... 1 210 287 607 +4966 PF01822 WSC WSC domain Bateman A anon [1] Family This domain may be involved in carbohydrate binding. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.83 0.72 -4.05 162 2216 2009-09-11 06:24:10 2003-04-07 12:59:11 14 209 242 0 1812 2200 48 80.00 26 17.70 CHANGED YhGC......as...........-s................hhsss.thssssho...sptC.hshCt....tt...u.ash......sulp..up...pCaC..Gsshsstt.t.sspt..............Cs.hsCs.Gss...s......ph.CGG ................................................hGCap.....................-s.......................tt.thh...st...ths.ssshT.......sptC.hsh.Ct..........tt......u..as.a............uGlp...hup.......cCa..C.........G..s...s...hs...s..s.s...tt.s.stsp....................C....s....hs.Cs..Gss....s..........ph..CGG.................................................................. 0 824 1197 1552 +4967 PF02165 WT1 Wilm's tumour protein Mian N, Bateman A anon IPR000976 Family \N 19.60 19.60 21.80 20.50 16.90 16.80 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.16 0.70 -5.32 8 147 2009-09-10 17:23:11 2003-04-07 12:59:11 10 10 53 6 33 118 0 217.70 64 67.01 CHANGED MGSDVRDLouLL...PPVSSLsuuuGuC.ulPVuGusQWAPlLDFH.PGS.PYuSL..............usHSaIKQEPoWG.uuDPaE...DPHCGLuA.FTVHFSGQFTGT.GsCRaG.AFG...........................pPssuQuRMFPNuPYLPuClDSsPshRNQ.GYusVAhDGsPSYGHTPSHHsuQFoNHSFKHEDslS.QooluEQQYsVPPPVYGCHsPoDoCsuSQALLLRNsYNS.DNLYQMsSQLECVTWNQMNoLuSohK.................uHuuuYEoDspos..PhLhSCSuQYHIHTHGV..FRGlQDVRRVPGlsPslVRS.SEooEKR ...................................................................................................s..HShIKQEPsWs.ss-Phc...-.ps.LuA.FTlHFSGQFTGo.usCRaG.sFG............................pss.t..s..RMFssssYLssCh-s..s.RNQ.GYusVsFDGsssYGHTP..oH.HssQFssHSFKHEDshu..QQsohG.-.QQYsVPPPVYGCHTPoDSCTGSQALLLRsPYsS..DNLYQMsSQLEChsWN.hN.LuuolK..................................................................sHuoGYES-spos..PhL..CuuQYRIHTHGV..FRGIQ.DVRRVPGlAPslVRS.SEosEKR................................................................. 0 2 5 15 +4968 PF03303 WTF WTF protein Mifsud W anon Pfam-B_4183 (release 6.5) Family This is a family of hypothetical Schizosaccharomyces pombe proteins. Their function is unknown. 25.00 25.00 64.20 62.40 22.10 21.60 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.78 0.70 -4.96 8 19 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 1 0 19 27 0 238.50 41 80.87 CHANGED MKNNYTSLKSSlDEtDEhKTDHEIDLEKG.LPEYsSEEEusLPPYSDhu+l.......Ss.......sPN....................sHRcpcsScSsDNpos..LlKLLIShlulhVlNhsA.lCh.saKcuhFtcautu-hVLFGlhsh.lCslshIhLhYFYETWsKAV....KVTll..................shuhGLhshp+chhlhhahhah..IlChlLFsshppGpL.ls+uhltS.....sColSAtllhhl.sVsIPhaphcphhstLhpV.hlppshsls ..MKNNYssL+SslDEtsp.hKs......DpEIDLEKG.LPEYsSEEEuTLPPYSDhupl..........us.......sPN..............................s+Rcsc.ssco...scNuss..LlKLLIShhslhllNhsA.lChL.a+cuhFpsauhsphslFGhaCh.sCslshIhLhaFYETWTKAV....Khslh...................................................shuhGlhp.h+chhshhahhhh..lhphhLhshhppstLsL.puhltu.....pCSluAtlhhhl..lhl..ahhcphh.t.hps.hlppshh................................. 0 19 19 19 +4969 PF04932 Wzy_C O-Antigen ligase Moxon SJ, Bateman A anon Pfam-B_5033 (release 7.6) Family This group of bacterial proteins is involved in the synthesis of O-antigen, a lipopolysaccharide found in the outer membrane in gram-negative bacteria. This family includes O-antigen ligases such as E. coli RfaL [1]. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.45 81 1431 2012-10-01 22:04:45 2003-04-07 12:59:11 10 29 1205 0 366 4539 2057 165.80 18 32.83 CHANGED hshllh..hhslhhotSRuuh...luhhlshhhhhhhhhtthhtth................................................hhhhh....hlhshslsshh...............................................................................................................................tphhhplhstststs................................stRh.t.happuhphhtpp.Ph....hGhGh..................hht.h.......hh.hh.sthhtpsHNhalphhschG.lhGhlhhh ...................................................................................................................................................................................................................................................h...llh..hh.slh.h.o.tSRu..u.h.......l.u..h....h..l..s...h....h...l.....h....h.....h......h........h....h....t......h......t.....t...t..t...h...................................................................................................hh.hhh............hlhs..hhl.s.h.sh.......................................................................................................................................................................................................................................................h.p....p...h...h...t..p..l...h...p.tt.t..p..s..t.........................................................................................................shRh.t..ha....p....s.ul.p....h..h.....p...c....p...Pl......................hG..h.Gh..sth.....................................tthh.p.t.h.............h.h.....t...h..h..h.....t...h..h....t....p..s..HN.hL.phhsptG..ll.Gllhh.h............................................................................................................................. 0 159 261 325 +4970 PF00739 X Trans-activation protein X Bateman A anon Pfam-B_458 (release 2.1) Family This protein is found in hepadnaviruses where it is indispensable for replication. 21.50 21.50 21.90 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.79 0.71 -4.27 5 5949 2009-09-16 23:40:58 2003-04-07 12:59:11 14 7 78 2 0 4549 0 103.30 87 89.74 CHANGED MAARLCCQLDPARDVLCLRPVGAESRGRPlPGPhGuLPuuuuSAVSoDHGAaLSLRGLPVCAFSuAGPCALRFTSA..RRMETTVNsHp.LsKsL+KRTLGLSuMSToDL.EAYFKDpLFT-WEELGEEhRLpIFVLGGCRHKLl ....................RhhCQLDs.sRDVLCLRPVuAESpGRPhsGshGslsssSsssVsssaGuHLSLRGLPVCAFSSAGPCALRFTSA..Rp..METTVN.....A...+....p........L....PKV....LHKR........T........LGLS.A.MSTT.DL...EAYFKD.ClF.KDWEELGEEIR......Lhl.FVLGGCRHKLV............... 0 0 0 0 +4971 PF00860 Xan_ur_permease xan_ur_permease; Permease family Bateman A anon Pfam-B_1593 (release 2.1) Family This family includes permeases for diverse substrates such as xanthine Swiss:P42086, uracil Swiss:P39766 and vitamin C Swiss:Q9UGH3. However many members of this family are functionally uncharacterised and may transport other substrates. Members of this family have ten predicted transmembrane helices. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.20 0.70 -5.98 20 15557 2012-10-03 01:44:59 2003-04-07 12:59:11 15 19 3998 1 2877 9790 2601 366.40 23 85.08 CHANGED tphlhhGLQahhuMauusllVPlllutu..........t..lhuhshlsuGluTllQsh....hhGhplslhhGsSFsals......hhhhshG.............shslsulhGullluullhhlluhhGhh.hLh+hhPPlVsGsllhlIGLuLs.luhsthuss....ssu....hsssphhtlulsslshslhhshh...tpshhpphulLlGlhsGalluhhhG..hlshss...............ltstsahths.sh.ausPh......hshslllshhslsllslhEslGshhuluplssppht....p.slp+uhhu-GluolluuLhGu.hPsToaupNlGllslT+lhSphlhhsAullhllhGlhsKluuLhssIPssV.lGGshllhaGhlhuuGlphLppschc..pscNlhIlulolslGlulsth.hh ............................................................................................t..tlhhGlpph.lsM..hhshl.l.......s.Ph.ll...uts........................................t...h.........lh..sss....h.........l..suGl....uoll.sh...............hssh...l.shh...Gh...uhs..hhs...................hhhls.hG.............................................s.h.sh.t....s...............h....h...uu.s....hltu....ll....h.h....l....ls....h....h.....t....h........h...........h..........l.h.c..h.l.P.....ssl.......p.s....s..l.......s....hsIG.....L.L.h................l......u.....l.....p.......s....s.......u......h......h......h.................................ps..........................h.....s....................s.............p........................h....................h...................h..........u............h...........l......h..............h........l......h....s......l....h...l...thh............................hps..h..h...h..h...h......u...l.L..lu..l.lssh...ll.uh...........h...hG........hl.......s..h.ss............................................................lt.ps.s..h....h....t..h....s..............h..t...h....s.hsh..........................h..s....h...s.....h...l....h...s....h...h.s.h...h...l.ls..hh-................sh..Gsl..h......u..l...u......p...h...ss....h.hpt......................pl.p.+...u...lh.u...Duluohlu.uhh.Gs..sssT.s.a.s....Ess..........u.sluhs...........st..s...hhsus...l.hsl.lh.uh...h.s.hh.u..slhth.lP............s...........s.........l.....hu........u........s........sl.l...........h.h.Ghlh.hsu....lph.l.s.h..s..c..hs...pshs.hh.l.s.ulhls...huhuls.h...hh............................................................................................................................................................................ 0 745 1589 2296 +4972 PF04921 XAP5 XAP5, circadian clock regulator Bateman A anon Pfam-B_4702 (release 7.6) Family This protein is found in a wide range of eukaryotes. It is a nuclear protein and is suggested to be DNA binding [1,2]. In plants, this family is essential for correct circadian clock functioning by acting as a light-quality regulator coordinating the activities of blue and red light signalling pathways during plant growth - inhibiting growth in red light but promoting growth in blue light [3]. 25.00 25.00 29.20 28.20 23.60 23.10 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.56 0.70 -4.80 26 325 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 232 0 217 305 9 237.70 43 71.22 CHANGED KK+p+pppscLSFssD--E.........................................pppspspttp.cppp..pt.sh.............................p++lsKNPsVsTs.aL.D+sR-ccEpptR-pLRcEaltpQpt...............lKspEItIsFsYaDGosppt.sV+hKKGsoIhtFLp+spc.ht..........tc.....hpEh+psSsDsLMhVKpDlIlPHHYoFY-FIhsK..spGKoG.sLFsFDscc.................................shphhs-hpspc--opssKVV-RpWYERNKHIFPAS+WE......Y-PtKsasp.hhp ....................................................................................................................t.tttththttLSFs.--cctt.....................................................................................................tppptttt.t..........t.....t...................p...............................p+KluKNPs..V-TS..FLPD+-REccEsphR...........EcLRpEW.tpQEplKsEEIpITFS...........YW........D.....GoGHR...+....sVc..............hKKGs......oltpFLp+shchLt..................p-..............FpEL+ss.uV-pLMalKEDLIIPH....HaoFY..-FIlsK...ARGKS.........G..PLF.sFDV+-..........................................D.lRh.l.sDAssEK-ES.......HAGKVV.RpWYE+NKHIFPASRWEs.....YDPpKcac+Ysh.p.......................... 0 80 119 170 +4973 PF02625 XdhC_CoxI DUF182; XdhC and CoxI family Mian N, Bateman A anon COG1975 Domain This domain is often found in association with an NAD-binding region, related to TrkA-N (Pfam:PF02254; personal obs:C. Yeats). XdhC is believed to be involved in the attachment of molybdenum to Xanthine Dehydrogenase ([1]). 23.80 23.80 23.80 23.90 23.70 23.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.98 0.72 -4.36 129 2575 2009-01-15 18:05:59 2003-04-07 12:59:11 11 15 1586 10 764 2086 952 70.30 30 22.01 CHANGED tphhppspss....sLuTllpspGSsPRpsGupM..llps-G...p.hhGolu.GGs....lEtplhppApphl..psup.....sphhpash ............t.thhtpspss....sLsTllcspGSsPR.ps.G.ApM.lVpsDG............p...hhGol.u.GGs.................lEtplhpc..A.t.....p.h.l.....tssp......sphhph..h...................... 0 243 509 653 +4974 PF03894 XFP D-xylulose 5-phosphate/D-fructose 6-phosphate phosphoketolase Wood V, Bateman A anon Wood V Family Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 [1]. This family is distantly related to transketolases e.g. Pfam:PF02779. 21.20 21.20 21.70 22.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.19 0.71 -4.68 6 1182 2012-10-02 16:07:47 2003-04-07 12:59:11 10 11 939 16 425 1121 49 168.90 47 22.78 CHANGED spsLGpYlRDll+hN..spsFRlFGPDETtSNRLpusaEVTcRtWhpphhs.s.-...l.sctGpVsEhLSEHpCEGWLEGYLLTGRHGhFuSYEuFl+lVDSMlsQHuKWLchs.p-lsWR+cIuSLNllsoSsVW+QDHNGFoHQDPGhlsslLsKKs-...llRlYLPsDANoLLAVsD+Chpo ................................................p.lGpal+-lhchN.....scs.....FRlFGPDETtSNRLt.ula-sT...s+..tW....t...p.hh.s...t.s-..thhss..sGpVh.E.LSEHt.spGaLE....GYlLTGRHG..hFuoYEuFl+llDSMhsQ....HsKWL.chs.p-.......lsWRpslsSLNhlhTSpVa+QDHNGaoHQDPGhlshlhs.K.p.s-...llRlYLPsDANoLLulh-+sh+.................... 0 108 256 360 +4975 PF03469 XH XH domain Bateman A anon Bateman A Domain The XH (rice gene X Homology) domain is found in a family of plant proteins including gene X Swiss:Q9SBW2. The molecular function of these proteins is unknown. However these proteins usually contain an XS domain that is also found in the PTGS protein SGS3. This domain contains a conserved glutamate residue that may be functionally important. 22.20 22.20 25.60 33.50 22.10 22.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -4.49 12 174 2009-01-15 18:05:59 2003-04-07 12:59:11 9 12 21 0 109 154 0 119.60 41 25.99 CHANGED KRMGcLs.csFhcAspp+hs.....tp-sp.pAs.lpshWccplp-ssWpPa+h................ssph.tEllsE-DEcL+pLKp-hGE-VYstVppALhEhNEaNsSG+Y.ssELWNa+-sRKATLcEslshhhp...phpphKR+R ..................................................KRMGELD.+sFhpAspp+hs.............tc-sp.pAs.LsShWpppl.+s..ssWHPFKhlh..................................sssptpElls--DpKLppLKp-aG--lapsVtpALhElNEY..........N.s.SG..........RYsssELWNa+EsRKAT.lcEslpalhp...phct.+p+................ 1 16 53 82 +4976 PF03468 XS XS domain Bateman A anon Bateman A Domain The XS (rice gene X and SGS3) domain is found in a family of plant proteins including gene X Swiss:Q9SBW2 and SGS3 Swiss:Q9LDX1. SGS3 is thought to be involved in post-transcriptional gene silencing (PTGS). This domain contains a conserved aspartate residue that may be functionally important. The XS domain has recently been predicted to possess an RRM-like RNA-binding domain [1] by fold recognition. 25.00 25.00 26.60 26.00 24.20 21.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.60 0.71 -4.15 12 211 2012-10-02 20:46:34 2003-04-07 12:59:11 9 16 28 2 131 198 0 117.00 28 20.35 CHANGED pc-haVWPWhGllsNsssphp.cs+..sG.uusphcpcluc..FsPhcVpsL.Wspp.GasGhuIVcFupsWsGFcsAhth-caF-tptpGK+DWtptp..........t.ptsclaGWsA+t-DYpusshl .........t.p-hhVWP.hsIlhNsstph..pt..ssp..hGhus....pclhcphsp...Fss.h.+.sp.sl....astp....GHpGhullcFspshsGappAhphccpFptpspG+c-Wtppp......................t..ttptlYGalApt-Dhp............................................ 0 16 80 114 +4977 PF04555 XhoI Restriction endonuclease XhoI Kerrison ND anon DOMO:DM04409; Family This family consists of type II restriction enzymes (EC:3.1.21.4) that recognise the double-stranded sequence CTCGAG and cleave after C-1. 25.00 25.00 71.30 35.80 20.00 19.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.14 0.71 -4.90 6 48 2012-10-11 20:44:44 2003-04-07 12:59:11 8 2 42 0 13 50 2 174.20 43 80.90 CHANGED pKQh-oG+sDpGERuGVTuGKNMDGFlsLlhsllptNGLucA-Ia.ps+shLTLPGYFRPTKLWDlLVlp.....cGpLlAAVELKSQVGPSFGNNFNNRsEEAIGTAaDlWTAaREtAFGcp..RPFlGWLMLVEDsPcS+puVp-sSPHFPVF.EF+GASYlKRY-lLCpKLlhEpLYToAslIsSs+susttGcac-hSp .........................tsGctDtGpRuuVTuGKphDGFhtLlh-llptsGlscspla..ppthhsLPGYFRPoKpWDLLVlt.....cGpLlAAlEhKSplGsSFGNNFNNRsEEAlGoAhDlaTAa+EstaGpp..tPalGalhllEDsspSppsV+..s.p.p.PH.Fsl..-F................cGsSYhcRYclhCp+LltEpLYssusllhu.t.t...........s............. 0 4 8 12 +4978 PF00193 Xlink Extracellular link domain Sonnhammer ELL, Bateman A anon Swissprot_feature_table Domain \N 21.00 21.00 21.00 21.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.40 0.72 -3.90 12 1651 2012-10-02 16:37:33 2003-04-07 12:59:11 12 106 63 14 677 1338 95 93.40 40 15.67 CHANGED GhVFahpsss...ploFpEApptChppuAplATstQL.AAacs.Ga-pCsAGWLuDtoVRYPIspPRpsCuGsp...sGVRsh.......Gh..sspppYDsYCa ..............................GhVFahpssp....ploFpEAppsCt.pp.uApl...AossQLhuAact.G.h-p.CsAGWLuDu.oV..R.YP.Isp..P..R...t..tC.uGst............sGVRoh........Gh.....ssp.p.p.a.DsYCa..................................... 1 60 115 287 +4979 PF00867 XPG_I XPG I-region Bateman A anon Pfam-B_776 (release 3.0) Family \N 22.50 22.50 22.50 22.60 22.40 22.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.27 0.72 -3.93 207 2052 2012-10-01 19:52:02 2003-04-07 12:59:11 13 37 569 21 1236 1927 247 87.50 31 14.55 CHANGED chh.Glsal.AP..hEAEAQsAhLs....ppGh......lsulho-DsDsLlF....Gss....ll+ph.....p.tth...................p.hph...tphhpt....ht..l.sp.cp....hlshslLs.....GsD ...........................t.hhGlsals....A..P..hEA-AQCA.hLt..............cp.Gh......Vsulh..op..DoDhLlF......Gss.....tllp.phs......t.t......................................................................................th.lphhph...p..cl.hpt..........hs...h.sp...pp......hlslslLhGsD.......................................................................................................................... 0 441 726 1045 +4980 PF00752 XPG_N XPG N-terminal domain Bateman A anon Pfam-B_491 (release 2.1) Family \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.15 0.72 -3.75 10 1710 2012-10-03 20:43:45 2003-04-07 12:59:11 12 35 531 21 1161 1748 220 97.30 29 15.40 CHANGED MGI+GLhslLpshu....RpsclEsapG..+pLAIDuShaLYQFLpuVRpptGsslps.....sSHLhshFpRhp+lhpaGI+PlaVFDGssP.sLKppslsKRppRRpcs .....................................MGlp.u.Lhphlpshs...................c..t.h..p..........l....c....p.hp..G.............pp..lAl..............D..u..........sha........l...a..p....h.....h.h..u...s...p...t.p.....u.p.shts.................................sa..l...h..s....hhhR.ht.p.L.l...p.h.sI.+P.l.a...........VF..DG..t..s.s...hKpppht.cRppp+t..t................................. 1 407 670 969 +4981 PF01834 XRCC1_N XRCC1 N terminal domain Bateman A anon SWISS-PROT Domain \N 20.70 20.70 20.80 21.10 18.80 20.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.13 0.71 -4.74 3 182 2012-10-03 19:46:52 2003-04-07 12:59:11 11 9 89 13 107 163 1 131.20 46 23.24 CHANGED MPEISLRHVVSCSSQDSTHCAENLLKADTYRKWRAAKAGEKTISVVLQLEKEEQIHSVDIGNDGSAFVEVLVGSSAGGu..TAGEQDYEVLLVTSSFMSPSESRSGSNPNRVRMFGPDKLVRAAAEKRWDRVKIVCSQPYSKDSPYGLSFVRFHS .............Ms.lphpcVlSh.SSpD..s.pas.s-NLLp.s.-..s.......+...p.Whss.p.s.t.p.t..p..hpV.LQLE+..t...IptlDlG.NpGuAFlpl.VGpSuhs......-psa.sLLssoohMSPs-S..+sGpN.spVRhF...ssscllpssupcpWDRl+llCoQPas+c.saGLuFl+h+..................... 0 39 46 72 +4983 PF02162 XYPPX Rhodopsin_C; XYPPX repeat (two copies) Mian N, Bateman A anon IPR000216 Repeat This repeat is found in a wide variety of proteins and generally consists of the motif XYPPX where X can be any amino acid. The family includes annexin VII Swiss:P24639, the carboxy tail of certain rhodopsins Swiss:Q17094. This family also includes plaque matrix proteins, however this motif is embedded in a ten residue repeat in Swiss:Q25460. The molecular function of this repeat is unknown. It is also not clear is all the members of this family share a common evolutionary ancestor due to its short length and biased amino acid composition. 19.00 5.00 19.00 5.00 18.40 4.90 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -6.70 0.72 -4.32 7 228 2009-09-16 16:02:34 2003-04-07 12:59:11 12 13 64 0 33 227 0 14.50 85 15.07 CHANGED GY....PPQGYPPQuhPPQ .......uY....PP..Q.G.YP.P..QG.YPPt...... 0 20 27 32 +4984 PF04690 YABBY YABBY protein Mifsud W anon Pfam-B_5698 (release 7.5) Family YABBY proteins are a group of plant-specific transcription involved in the specification of abaxial polarity in lateral organs [1,2]. 21.00 21.00 21.00 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.27 0.71 -4.01 6 396 2012-10-02 14:16:02 2003-04-07 12:59:11 8 5 153 0 111 371 33 129.80 45 77.58 CHANGED sslhsso-+lCYVpCsFCsTILAVSVP.sSLaplVTVRCGHCTsLLSlslshts....phLsu.sp................lhpshpspspshs..hhppcssosptss..h........Spsppc-hP+.....sRPPEKRQRVPSAYNRFIKEEIQRIKAsNP-ISHREAFSsAAKNWAHFP+IHFGL ..................................................................................................................................................................CsTlLA..VulP.hpp...lh..phVTV+CGHCsNL...lsht............s.................................................................t........t...t...........t..t.p.....t......................................p.tp........Pc.......hs+P.P..EK+p.RlP...SAYNRFhKEEIQRIKAsN....P-IsHREAFSsAAKNWAp......................................................... 0 20 72 95 +4985 PF03895 YadA_anchor YadA; YadA_C; YadA-like C-terminal region Bateman A, Sandt C anon Sandt C Family This region represents the C-terminal 120 amino acids of a family of surface-exposed bacterial proteins. YadA, an adhesin from Yersinia, was the first member of this family to be characterised. UspA2 from Moraxella was second. The Eib immunoglobulin-binding proteins from E. coli were third, followed by the DsrA proteins of Haemophilus ducreyi and others. These proteins are homologous at their C-terminal and have predicted signal sequences, but they diverge elsewhere. The C-terminal 9 amino acids, consisting of alternating hydrophobic amino acids ending in F or W, comprise a targeting motif for the outer membrane of the Gram negative cell envelope. This region is important for oligomerisation [1]. 21.70 21.70 21.70 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.49 0.72 -3.97 110 2384 2012-10-03 10:38:27 2003-04-07 12:59:11 10 481 927 35 313 2186 103 77.90 30 8.25 CHANGED hc+csp...uGlAuAhAhuul..Ppssh..sGphsluhGsGsYpGpsAlAlGsuths.ssp..hhh+husu..........sopussususGsuapW ..........................cpchp...uGhAuAhAhuuL....sQs.t......sup..huhGsGsYp...GpsAlAl.Gsuhhs..ssp.......hhh+husoh..........sopu..chususGsuapa............... 0 79 157 241 +4986 PF02699 YajC DUF219; Preprotein translocase subunit Mian N, Bateman A anon COG1862 Family See [1]. 21.00 21.00 21.00 21.50 20.90 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.60 0.72 -4.31 176 4454 2009-01-15 18:05:59 2003-04-07 12:59:11 10 3 4168 1 876 2416 1646 83.30 32 76.43 CHANGED sh.hht.ll...h.l.lhhslhYFllIRPQp...K+tK....c+pphlssLp+GDcVlTsGGlhGplspl..s-shlhl-lus.......s..lclclp+su.Isplh .............s.h...ll..hl.l..lhhslh.Y...Fh..hl.....R.PQ.p.....K+tK.......c+.pp....hh.suLp+GDcVlTtGGlhGpls.cl..........ccs..hlslclss........s...sclphp+s.ulspl................... 1 300 579 737 +4987 PF04073 tRNA_edit YbaK; Aminoacyl-tRNA editing domain Finn RD, Eberhardt R anon manual; Family This domain is found either on its own or in association with the tRNA synthetase class II core domain (Pfam:PF00587). It is involved in the tRNA editing of mis-charged tRNAs including Cys-tRNA(Pro), Cys-tRNA(Cys), Ala-tRNA(Pro)[2-5]. The structure of this domain shows a novel fold [1]. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.15 0.71 -4.27 91 9872 2009-01-15 18:05:59 2003-04-07 12:59:11 10 31 4219 22 2098 6622 1977 122.00 22 38.51 CHANGED p.sstoh.pchuphhs.........hsspph..sKslllcs...................tcsp...hhlslltuspclshpplpphhu....p.clphA..stccltphh.G....hthGslsPhuh.........ttl.plllDpslhshp.p..lhssuspsstplplsstphhch ................................................................ph..tph.sp.h.h.s....................hs......pph...hKoLlhps.................................................scsp.......hllsll..us.p.cls.........p.K.l....tph..h......G..............s..p..c......lphA......s.cc.l...p.p..h..s....G................hhhGuluP.lGh............................pptl..lllDps....ltp..h.....s...p.......lhsuAsp.pshplhhsstphhp.h....................................................................... 0 669 1310 1755 +4988 PF02392 Ycf4 Ycf4 Bashton M, Bateman A anon Pfam-B_1026 (release 5.2) Family This family consists of hypothetical Ycf4 proteins from various chloroplast genomes. It has been suggested that Ycf4 is involved in the assembly and/or stability of the photosystem I complex in chloroplasts [1]. 25.00 25.00 30.00 29.50 20.20 22.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.08 0.71 -4.96 59 709 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 670 0 68 562 136 177.50 64 96.24 CHANGED p.sctlhpp.IhGSRRhSNYaWAhllhlGulGFLLsGlSSYltp...............sLlsh.........hsspp.......llFlPQGlVMsFYGluGlhlShYLWhsIhWsVGuGYNc........................................FsKppGh..................................................lpIFRWGFPGKNR+IplpaslcDIpuI+l-l.....pEGlsP+R.....slYL+lKGpp-IPLTRl..GpP.lsLsElEppAAELA+FLpVslEG .........................................................................R.ScplWIE.IsGSRKhSNFhWAhILFLGSLGFLLVGhSSYLG+.........NLIsl..........hs.S.QQ......................IlFhPQGIV.MuFYGIAGLFISSYLWCTIhWNVGSGYs+........................................FD+K-.GI..................................................VCIFRW..GF..PGhNRRIFLRFhhKDIQSIRlEl.....+E..G.l..s.RR.......lLYMEI+G.pt.s..IP.LTRT..s-N..lT.REIEQKAAELAhFLRVPIE.............. 0 16 41 59 +4989 PF01737 Ycf9 YCF9; YCF9 Bashton M, Bateman A anon Pfam-B_2211 (release 4.1) Family This family consists of the hypothetical protein product of the YCF9 gene from chloroplasts and cyanobacteria. These proteins have no known function. 20.90 20.90 21.70 22.10 20.50 20.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.82 0.72 -4.53 35 1482 2009-01-15 18:05:59 2003-04-07 12:59:11 12 1 1444 17 54 419 120 57.60 81 94.87 CHANGED lFQlslhALlhlSFlLVVGVPVlaASPpsWspoKsllasGuulWluLVhlVGlLNShVs ..........AFQLAVFALIATSSlLLISVPVVF....ASPDGWSSNK..N....VVFSGTSLWIGLVFLVGILNSLIS......... 2 11 34 46 +4990 PF03795 YCII YCII-related domain Yeats C anon Yeats C, Bateman A Domain The majority of proteins in this family consist of a single copy of this domain, though it is also found as a repeat (Swiss:Q9AJZ7). A strongly conserved histidine and a aspartate suggest that the domain has an enzymatic function. This family also now includes the family formerly known as the DGPF domain (COG3795). Although its function is unknown it is found fused to a sigma-70 factor family domain in Swiss:Q9A8M4. Suggesting that this domain plays a role in transcription initiation (Bateman A per. obs.). This domain is named after the most conserved motif in the alignment. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.94 0.72 -3.79 26 4283 2012-10-02 00:20:33 2003-04-07 12:59:11 9 20 2247 3 1546 3526 595 97.20 23 86.30 CHANGED Mhasllshspsssh.....ppptshhssHlsaLcpLtppGthlsu................GsthshDG..st.....GGhhlh-ssshspApphupsDPhsps..Gshpth.lh.......pap ...............................................................hahl.h.h...t..p..sst.................ppptshh..s.sHh...shl..p....p...l....t....cpG..p.llsu..............................Gs...h..s...s...s..D....u..........s.....t........t......hG.uhh.l.h..c.sp...o...h....-...p....A...p....thupp...DP..a..s..ts....G...h.h..p.....h.lh.h............................. 1 496 968 1287 +4991 PF02182 SAD_SRA G9a; YDG_SRA; SAD/SRA domain Iyer LM, Aravind L, SMART anon Alignment kindly provided by SMART Domain The domain goes by several names including SAD [1], SRA [2] and YDG [3]. It adopts a beta barrel, modified PUA-like, fold that is widely present in eukaryotic chromatin proteins and in bacteria [4]. Versions of this domain are known to bind hemi-methylated CpG dinucleotides and also other 5mC containing dinucleotides. The domain binds DNA by flipping out the methylated cytosine base from the DNA double helix [5].The conserved tyrosine and aspartate residues and a glycine rich patch are critical for recognition of the flipped out base [4][5]. Mammalian UHRF1 that contains this domain plays an important role in maintenance of methylation at CpG dinucleotides by recruiting DNMT1 to hemimethylated sites\ associated with replication forks [2]. The SAD/SRA domain has been combined with other domains involved in the ubiquitin pathway on multiple occasions and such proteins link recognition of DNA methylation to chromatin-protein ubiquitination [4]. The domain is also found in species that lack DNA methylation, such as certain apicomplexans, suggestive of other DNA-binding modes or functions [4]. A highly derived and distinct version of the domain is also found in fungi where it is fused to AlkB-type 2OGFeDO domains [6]. In bacteria, the domain is usually fused or associated with restriction endonucleases, many of which target methylated or\ hemi-methylated DNA [4]. 20.50 20.50 20.90 21.10 20.20 20.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.06 0.71 -4.92 34 554 2012-10-08 19:26:47 2003-04-07 12:59:11 12 47 169 32 371 572 41 149.00 37 23.53 CHANGED p+clGslPGlpVGDhFhaRhEhsllGlHtsphuGIDhhpsps..........hAsSIVsSGGY-.DDpDsuDsllYTGpGGps.t.tpp.....pspDQcLpcGNLALtpSh...............cptssVRVlRGhcptpp.........psthYhYDGLYpVpcaWh-pGpsG.htVa+a+LpRh...sGQ .......................................................................t.phhG.lPG..l.VGshahhRhphs.sGlHtshh....u....G..I.cttssps....................................AhSlVh....u......GG...Yc...DDh.D.pG-.hhYTGpG...Gps.hs.s.s+...................pspDQpL.....p.p..s.NhALthsh................c.pspPVRVlRuh+stpp...................hts.pts..hpYDG..lY+........VhchW...-h.....G..pp..G...hhVa+ahLhR.ss...................................... 1 105 204 288 +4992 PF04794 YdjC YdjC-like protein Waterfield DI, Finn RD anon Pfam-B_5925 (release 7.5) Family Family of YdjC-like proteins. This region is possibly involved in the the cleavage of cellobiose-phosphate [1]. 24.00 24.00 24.30 24.20 21.80 23.90 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.77 0.70 -4.92 114 1651 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 1420 8 307 1052 228 240.90 32 92.26 CHANGED +cLIlsADDFGlotuVNtuIlcuacpGlloSsolMsssPuh.pcAsphs+.p..ts.....tls.lGLHlsLos..GhP.l....h...pthssLlst..............p.GhFhphh........hthh.tphshcpltpElpAQlcpFhs.hGhsssHlDuHpHsH.hhPslhsslhplu.pchsl.....sh.Rh......shp.h................................ht.hstthptphtptGlshss.h...............hhthlhphhpphsp.t..................ssElMs.HPuhh......sst.lhshssh.....stpRtpEhphLsuspltthlppp ....................h.hLIlNADDFGLocuhNhGIlcuhcsGl...loSTohhsNssuh..c....cA.....l...p....Lu+....c....hP.................sLs..lGhHhsLTh..Gc.....P..l............sthPuLsc...............s.G..hhtchh...............p......hh...........p....s.s...l..s.h--l...ppEltuQhp+Fhp..hGt.pPoHlDoH.+H....l....H....hhP.p...lhsllhc.....hA...tchul..........sh..Rh............stpsh................................h.h............lps.spthpstah...sp.slspst.............................hhphLpphhpp.htp............................shElMs.HPAal..............-p..lhp..p.S.ua........shsRhpELclL.sSsplpthltp.p................................................................................... 0 103 183 242 +4993 PF00399 PIR yeast_PIR; Yeast PIR protein repeat Finn RD anon Prosite Repeat \N 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.97 0.73 -6.38 0.73 -4.44 17 804 2009-01-15 18:05:59 2003-04-07 12:59:11 14 15 57 0 500 869 0 17.80 68 21.99 CHANGED suAlSQIsDGQlQAToss ...tuAVSQIuDGQIQATTpT...... 0 82 223 407 +4994 PF03366 YEATS YEATS family Mifsud W anon Pfam-B_2273 (release 6.6) Family We have named this family the YEATS family, after `YNK7', `ENL', `AF-9', and `TFIIF small subunit'. This family also contains the GAS41 protein. All these proteins are thought to have a transcription stimulatory activity 22.00 22.00 22.10 22.20 21.10 21.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.94 0.72 -4.26 63 798 2009-09-11 12:00:18 2003-04-07 12:59:11 11 14 314 7 532 728 2 83.10 35 19.08 CHANGED oHpWslal.........cshss.....................c.c..h.uphlcKVpFpLH.toFss.Ph.RslcpP....PFclsEpGWGEFpltIclaahs......ptp.hsl.HpLplpttshp .......................THpWp..VaV.........+shpp....................................................................................p..c...l.stalcKVhFpLH...Sass.Ph........R.s......l...ccP..........P...FclsEsGWGEF.l.IclaFts......pt+.lslhHpLpLt.pt..s........................... 0 160 267 417 +4995 PF03543 Peptidase_C58 YerHae_surfAg; SurfAg; Yersinia/Haemophilus virulence surface antigen Griffiths-Jones SR anon PRINTS Family \N 22.90 22.90 23.10 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.42 0.71 -4.83 14 182 2012-10-10 12:56:15 2003-04-07 12:59:11 9 21 106 1 40 172 1 170.80 19 15.90 CHANGED Rsoh...ucaGscsshhhsts..............ts........GlC.uLsAcW....lp.css..spshhspLh.uut.cGp...hphpphhshpphphc..tsttsptph.................phppshLpctGlpPptc.hsht................tsupsshsshlpsIhcsGspphhsh+h....sphuuHslAstsp.spp.lsFFDPNaGEFohsscpp.....tsshapp.hhphpptspphhpllshchp ............................................................................................................................................................................................................................................................................................thp..tpshstllptlh.cssu.pshhs.hh......hptsuHAhAs...h...s...........p...sp....p...h......sFFDPNh..GthpFsstc....pFtta.hpta.htp.h....................t.................................... 0 12 23 31 +4996 PF03545 YopE Yers_vir_YopE; Yersinia virulence determinant (YopE) Griffiths-Jones SR anon PRINTS Family \N 25.00 25.00 26.00 59.50 23.40 22.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.14 0.72 -4.03 8 184 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 159 8 7 96 0 69.90 55 15.70 CHANGED EpLt+-HspLAoGNGuLRSLsTuLpGIpcGSphpphps.AupLL-psluGIsLQQWGTsGGpAochVsSA ..poLpcNapchASGNGPLRSLhTsLQsLschscscpLp-hsosLhNhplGuhtFSQWGT...sGGsspcalscA............. 0 2 3 4 +4997 PF03887 YfbU YfbU domain Bateman A anon COG3013 Domain This presumed domain is about 160 residues long. It is found in archaebacteria and eubacteria. In Swiss:Q9EUM2 it is associated with a helix-turn-helix domain. This suggests that this may be a ligand binding domain. 25.00 25.00 31.00 29.70 24.70 24.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.07 0.71 -4.69 4 723 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 710 16 68 261 3 163.40 73 97.45 CHANGED MEMTssQRLILoNQYpLMshLDs......tNAp+Y+RLpsIlEpGYuLch+ELs.+EFusLsEpEC+pllDIhEMY+AlpsShssLsD......ppslsp+RloFhGFDu..spEu+hlsYVRFlVssE.GcYpcF.tsEHthNSQsPMhsKYpRML....ssW+sCP...+pYHLSssEIppIlNA ..................................................MEMTNAQRLILSNQYKMMTML.DP...........sNAER......YR.RLQ.TIIERGYGL..QM.R.ELD..RE.F.G.ELpE.ETCRTIIDIMEMYHALHVSWoNLpD..........pQuID...E......RRV......TFLGFDA..ATEARY..LGYVRF.MVNl.....E..G..RY...TH..FD..AGT..H.GFNAQTPMWEKYQRML....sVWHACP.........RQY....H.LSANEINQIINA......................... 0 11 24 46 +4998 PF02542 YgbB YgbB family Mian N, Bateman A anon COGs Family The ygbB protein is a putative enzyme of deoxy-xylulose pathway (terpenoid biosynthesis) [1]. 25.00 25.00 26.20 26.40 24.80 22.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.78 0.71 -4.40 167 3444 2009-01-15 18:05:59 2003-04-07 12:59:11 11 9 3393 163 843 2387 2171 155.90 48 76.19 CHANGED hRlGhGaDVHph........................spG......ct.LhLGGVcIPa..p...........p.GLhGH..SD..uDVllHAlsDALLGAs...uhGDIGpaFPso.Dsp.....aKsssSthLLpcshphlpppGa.plsNlDsTl.luppPKluPa.hstM+pslAphLslshspVslKATTsEpLGFsGRtEGIAAhAls..Llhp.t ..........hRlGpGaDVHph..................................sts.cs...lllGGVcIPa..c......................p..GLluH..SD..uDVhlHAlsDALLGAs...uLGDIGchFPDT.Dsp.....aKGA-SttLLccshc.hlpp........p..G..a.pluNlDsTl.IAQtPKhs.......P+.lspM+tslAcsL.sh.........s.h.sp..VsVKATToE+.L.GFsGRtEGIA.u.pAlsLLhc.t............... 0 298 571 724 +4999 PF02325 YGGT YGGT family Bashton M, Bateman A anon Pfam-B_983 (release 5.2) Family This family consists of a repeat found in conserved hypothetical integral membrane proteins. The function of this region and the proteins which possess it is unknown. 21.70 21.70 21.70 21.80 21.40 21.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.62 0.72 -4.04 170 4945 2009-01-15 18:05:59 2003-04-07 12:59:11 12 4 3567 0 1081 2737 2206 77.70 26 80.00 CHANGED lphh..lsl.ahhllllps...llSWl........s.....sshs....phlhplT-PlLpPlR.....Rll.P...sl.Gu......l..DhSPllshlllphlphhl.hth .........................hllplahhllllph...lhSWhss........shp.....sshs.........phlhplo-Pll.t..Pl...R.....Rll..P....sh..Gs.........l..DhS..sllhhllLthlphhlh..h........................... 0 315 665 900 +5000 PF04945 YHS YHS domain Bateman A anon Bateman A Domain This short presumed domain is about 50 amino acid residues long. It often contains two cysteines that may be functionally important. This domain is found in copper transporting ATPases, some phenol hydroxylases and in a set of uncharacterised membrane proteins including Swiss:Q9CNI0. This domain is named after three of the most conserved amino acids it contains. The domain may be metal binding, possibly copper ions. This domain is duplicated in some copper transporting ATPases. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.33 0.72 -4.11 23 1534 2012-10-03 05:12:49 2003-04-07 12:59:11 8 45 1045 41 530 1375 156 45.40 29 10.53 CHANGED sp-Pssuhpl....sphphcYpG+tYaFsS-sCcttFcp-PEcYhstt .....................DPVsuhtl.....t..p.ut....h..p.....h.p...YpGp.pY..aFCSppCtppF.ctcPpcYl........... 0 154 311 444 +5001 PF03755 YicC_N YicC_N-term; YicC-like family, N-terminal region Finn RD anon Pfam-B_3743 (release 7.0) Family Family of bacterial proteins. Although poorly characterised, the members of this protein family have been demonstrated to play a role in stationary phase survival [1]. These proteins are not essential during stationary phase [1]. 25.00 25.00 25.10 28.20 24.20 24.40 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.01 0.71 -4.30 185 2490 2009-09-10 21:41:45 2003-04-07 12:59:11 8 3 2470 0 578 1769 632 155.00 32 53.65 CHANGED lpSMTGFu+spt......pss.t.............tphshEl+SVNpRaL-lphRLPcthp.slEstlRchlppp.lpRG+V-splphpts..st.sstslp..........l.Npslhpphhpthpp.lppph..th....t.sls....hsp....lLphsuVlp........pp.t-...-t.pth....lhpshcpAlsplhphRpcEGppLtt. ............lpSMTuauRtch.....psp..h................................tphshElRSVNpRaL-hhhRLP.cp.hc.slEsslRctlppp.LsRGKV-splphcts...st..spsplt.........l.NppLscpllpthpp...lptph..st.......spls....hsc......lLc...h.P.GVhs.............sp.t.p-h..-shpt.......lhsulcp...........ALcshhssRppEGptLpt...................... 0 207 385 486 +5002 PF03853 YjeF_N YjeF-related protein N-terminus TIGRFAMs, Griffiths-Jones SR anon TIGRFAMs Domain YjeF-N domain is a novel version of the Rossmann fold with a set of catalytic residues and structural features that are different from the conventional dehydrogenases [1]. YjeF-N domain is fused to Ribokinases in bacteria (YjeF), where they may be phosphatases, and to divergent Sm and the FDF domain in eukaryotes (Dcp3p and FLJ21128) [1], where they may be involved in decapping and catalyze hydrolytic RNA-processing reactions [1]. 24.90 24.90 24.90 25.00 24.70 24.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.04 0.71 -4.65 82 3610 2012-10-10 17:06:42 2003-04-07 12:59:11 10 26 3275 37 1227 3081 755 164.50 29 35.13 CHANGED shhLMEpAupussc.hlpphh......................sptppllllsGsG......NNGGDGlssARhL..hptGhp.Vpl..........hhhtt.t.phssssppthphhpphs.......hphhp.tsttt...........................sclllDAlhGh.................Ghp.tslcs...t.....htpll.....p..tlNps.......psh.llulDlPSGl..sss.oGps...............s.s.......lp.AchTlohttsK.u ............................................h..tLMcpAGtu....ssp.hhtpth.........................................................................spsppl.lllsGs.G.............NNG..........GDGhlsARh..L...t.t...t..G.hp..Vsl......................hh.ht.s.....p.......hspp.....st.......t.sh.p.t..hp.p...h.u.................st..h...h...s.....t.....h............................................................................s-lllDAlhGh....................................G..lp.....tslcs........s.........hspll.............p..tlNpp.....................................sss..llAlDlP.S..Gl.....suc..oGts.........................uss........................lp.AshTlohtshK.s.................................. 0 398 764 1032 +5003 PF03739 YjgP_YjgQ Predicted permease YjgP/YjgQ family Bateman A anon COG0795 Family Members of this family are predicted integral membrane proteins of unknown function.\ They are about 350 amino acids long and contain about 6 transmembrane regions. They are predicted to be permeases although there is no verification of this. 27.40 27.40 27.50 28.60 27.30 27.30 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -5.71 131 5380 2012-10-03 05:18:07 2003-04-07 12:59:11 9 5 2456 0 1233 3656 4737 347.90 20 93.70 CHANGED RYlhcphlhshlhshhslhhlhhhh.phlphlsph.hpts..ls................hhtllphhh.htlPthlh.hhlP..luhlluslhshupLspssElsulpuuGlShhclhhPhlhhulhlslhshhhsth.lhP...huppphpplhpphhppshtt...........hpsshahptss.................hhlalp..phssssp......hpsl..hlhch...........pp..tshtp..........................................................hl..tAcpuphp.........stthhLpsuphhphstt.....................................................................phphhphpphphphs.hpspphpthttp.......................................................................pphsh.........................tpL...................t.hp.h......pttshs..htphp.hchap+luhPlsslhhslluhs..huhtt..Rtsptts.......hhhulllhhh.aahltthspshutp...shl.ss...hl..usahssl.lhhhlu...hh......lhh+ ...............................................................................+Ylh+phhts....hhh.s.hh.hLlsl.h.hh...h.....p......h.l.c....ls...ph...spus......hs....................h.t.h.s..l.t.hhh.hplPphl....hhlPluhhluslhsLup.LspcSElslhpAs.GhShhplhtss..hhhu.lhls.llshhlsta.lsP....hupppt.p......ph...h.s.p.tt.spssht.................th.p..p..u...h..at.p.pss...............................phla.lc....p..l..s.s..stt......................hpsl..hlhpht.......................ps....tpht..s.......................................................................lh..hAcpup.hp......................sphh.hLpsspph....ph..s.st...............................................................................phphpph...pp..h.p..ht...hs....hps..p..tl...s..s.h..t..h.t.....................................................................................................................................................................................................cphsh...........................................tpL............................................................................t..h..tttsps........ssphp..hp.hap+.lshPl.ss.hhhsLluls....h..u..h..ss....Rp.u.phhs............llsull..lh..hh..aahl.tp.hht.s.h.u..tt.....stl..ss................hl...uhhhssh..lhhh..lu..lh.Lh...................................................................................................... 0 387 789 1034 +5004 PF02326 YMF19 Plant ATP synthase F0 Bashton M, Bateman A anon Pfam-B_984 (release 5.2) Family This family corresponds to subunit 8 (YMF19) of the F0 complex of plant and algae mitochondrial F-ATPases (EC:3.6.1.34). 21.80 21.80 21.80 22.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.24 0.72 -3.41 23 331 2012-10-02 21:03:42 2003-04-07 12:59:11 10 3 267 0 25 589 973 79.00 31 61.04 CHANGED PQLDphTahoQFFWLslh......FhshYl..hlh......pphLPtls+ILKlRpphhpp.h...splppppp...httcshltcs.h..................hshl.pol ...........PQLD..ph..T..YaoQaFWhhlh......hhshYl..hls..........t.hlstIsclLKlRpph.l.ppp.........................................................h.................................................................................................................... 0 9 15 18 +5005 PF01514 YscJ_FliF Secretory protein of YscJ/FliF family Bateman A anon Pfam-B_736 (release 4.0) Family This family includes proteins that are related to the YscJ lipoprotein, and the amino terminus of FliF, the flageller M-ring protein. The members of the YscJ family are thought to be involved in secretion of several proteins.\ The FliF protein ring is thought to be part of the export apparatus for flageller proteins, based on the similarity to YscJ proteins [2]. 23.90 23.90 24.10 24.00 23.50 23.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.24 0.70 -11.00 0.70 -5.17 21 3384 2009-01-15 18:05:59 2003-04-07 12:59:11 12 9 2156 28 653 2296 453 200.40 28 45.09 CHANGED hpthpth.........hslhhhl..hlhsuspt.......tpLYsuLspp-uNcllshLtptsIsscppssssu....lhVspsphspAhhlLsspGL.P+pshsshtplFspsuLlsoshpEps+hhhAlppELucTlspIcGVhsARVHlsLPcc.sshscsspPsSASVhl+hpsshsls.s.lssI+pLVssSlsuLsh.....-ploVl.sstt.....spsspsh ............................................................hhh..............h....hhlsl..h..lhl.....hhh...s...t.ss..p...........hpsL.as.s..L..spp-..u...s..pllshLppt.sIs.achp...s........s.........us........s........lhV.sts.c.ht.p.....s.+h.h.Lsp.pG.L..P..p...s...s.t.s.uh....plhs..p.p.sh.u.sSphtE..psp....hppAlEtELs+.oI.p.sl.ss..VpsARVH.l........u...hPcp...sha.....s...c....p..p..p...s...s..oA.SVhlp.h.p.s...u.ts...Ls...tp.lsuI...hpLVuuuVsuLs.......psVollDpsu.phls...t.....stt.................................................. 0 201 389 515 +5006 PF04650 YSIRK_signal YSIRK type signal peptide Bateman A anon Pfam-B_3441 (release 7.5) Motif Many surface proteins found in Streptococcus, Staphylococcus, and related lineages share apparently homologous signal sequences. A motif resembling [YF]SIRKxxxGxxS[VIA] appears at the start of the transmembrane domain. The GxxS motif appears perfectly conserved, suggesting a specific function and not just homology. There is a strong correlation between proteins carrying this region at the N-terminus and those carrying the Gram-positive anchor domain with the LPXTG sortase processing site at the C-terminus. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.77 0.72 -6.95 0.72 -4.37 66 6800 2009-01-15 18:05:59 2003-04-07 12:59:11 12 627 727 0 247 4894 0 26.50 41 2.13 CHANGED tp+pp+YSIRKhs.lGsuSVhlusslhh ........p+pp+aSIRKho.lGsuSVlluohlhh...... 1 29 57 149 +5007 PF02295 z-alpha Adenosine deaminase z-alpha domain Mian N, Bateman A anon Pfam-B_11136 (release 5.2) Domain This family consists of the N-terminus and thus the z-alpha domain of double-stranded RNA-specific adenosine deaminase (ADAR), an RNA- editing enzyme. The z-alpha domain is a Z-DNA binding domain, and binding of this region to B-DNA has been shown to be disfavoured by steric hindrance [1]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.91 0.72 -4.20 8 330 2012-10-04 14:01:12 2003-04-07 12:59:11 12 22 95 39 79 339 3 62.40 29 16.39 CHANGED stss.cpplLsaLcplG.spssTAhALshplsh.K+-IN+hLYsLp+pGclp+psGsPPlWplssts ....................tp.pphlhphLpphu.tpsspslshshtLs.h..K+-lN+sLYcLp+pupV.h.p..ps.s.p.PPhWhls...t.......... 0 14 20 32 +5008 PF01559 Zein Zein seed storage protein Bateman A anon Pfam-B_181 (release 4.0) Family Zeins are seed storage proteins. They are unusually rich in glutamine, proline, alanine, and leucine residues and their sequences show a series of tandem repeats [1]. 24.60 24.60 26.10 24.60 21.30 23.80 hmmbuild --amino -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.11 0.70 -5.12 9 325 2009-01-15 18:05:59 2003-04-07 12:59:11 11 3 7 0 22 337 0 134.90 41 97.79 CHANGED IIPQCSLAP.uAIIPQFLPPVTSMGFEHPAVQAYRLQQALAASlLQQPIAQLQQQSLAHLTIQTIAsQQ.......QQQQ.FLPALSpLAssNPVAYLQQQLLASNPLALANssAYQQQQQLQQFLPALSQLAMVNPAAYLQQQQLLSSSPLAVuNAsTYLQQQLLQQIVPALo.QLAVANPsAYLQQ..LLPFNQLsVoNSAAYLQQRQQL..lNPLsVANPLVAAFLQQQ..QLLPYNQhSLMNPALShQQPIVGGAIF .......................................................................................................................................................................................................................................................................................................hp.QlshhN.....suY..QQ....hLP..FsQLss.s..ssuaLpQ.Q......h......s.....ssh......p..h....h................................ 0 0 22 22 +5009 PF01754 zf-A20 A20-like zinc finger SMART anon SMART Family The A20 Zn-finger of bovine/human Rabex5/rabGEF1 is a Ubiquitin Binding Domain [5-6]. The zinc finger mediates self-association in A20. These fingers also mediate IL-1-induced NF-kappa B activation. 21.60 21.60 22.20 21.60 21.50 21.40 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -6.89 0.72 -4.35 53 982 2009-09-21 14:31:35 2003-04-07 12:59:11 11 20 182 26 514 896 0 25.10 43 10.59 CHANGED shhC.psGCGFaGsstspshCS+Ca+ .......hhC.tsuCGFaGsstspshCScCa+... 0 109 204 332 +5010 PF01428 zf-AN1 AN1-like Zinc finger Bateman A, SMART anon SMART Family Zinc finger at the C-terminus of An1 Swiss:Q91889, a ubiquitin-like protein in Xenopus laevis. The following pattern describes the zinc finger. C-X2-C-X(9-12)-C-X(1-2)-C-X4-C-X2-H-X5-H-X-C Where X can be any amino acid, and numbers in brackets indicate the number of residues. 23.60 23.60 23.60 23.70 23.50 23.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.21 0.72 -4.08 181 1823 2009-01-15 18:05:59 2003-04-07 12:59:11 11 43 398 8 1124 1782 126 42.70 34 20.23 CHANGED Csh..Cpptsh..h....shp...Cp.CstpFCtpHRh.-sHsC........sthhptps .........................Csh..Ccppsh....h....shp.......Cc..CsphFC.t.pH.Rh.s......-sHsC.shshpt..s....................... 0 351 592 859 +5011 PF00096 zf-C2H2 Zinc finger, C2H2 type Bateman A, Boehm S, Sonnhammer ELL, Gago F anon Boehm S Domain The C2H2 zinc finger is the classical zinc finger domain. The two conserved cysteines and histidines co-ordinate a zinc ion. The following pattern describes the zinc finger. #-X-C-X(1-5)-C-X3-#-X5-#-X2-H-X(3-6)-[H/C] Where X can be any amino acid, and numbers in brackets indicate the number of residues. The positions marked # are those that are important for the stable fold of the zinc finger. The final position can be either his or cys. The C2H2 zinc finger is composed of two short beta strands followed by an alpha helix. The amino terminal part of the helix binds the major groove in DNA binding zinc fingers. The accepted consensus binding sequence for Sp1 is usually defined by the asymmetric hexanucleotide core GGGCGG but this sequence does not include, among others, the GAG (=CTC) repeat that constitutes a high-affinity site for Sp1 binding to the wt1 promoter [2]. 20.80 14.10 20.80 14.10 20.70 14.00 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.01 0.73 -7.19 0.73 -3.30 194 44392 2012-10-03 11:22:52 2003-04-07 12:59:11 21 6603 1241 52 26561 243772 896 23.40 34 6.87 CHANGED apCp...tCs...csFsppspLppHhpp...H ..............apCp.........tCu........KsF.s.p..p.s..s.L..ppHhch..H.............. 0 6055 9206 17573 +5012 PF00105 zf-C4 Zinc finger, C4 type (two domains) Sonnhammer ELL anon Prosite Domain In nearly all cases, this is the DNA binding domain of a nuclear hormone receptor. The alignment contains two Zinc finger domains that are too dissimilar to be aligned with each other. 21.10 18.00 21.10 18.90 21.00 17.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.87 0.72 -3.74 26 7638 2009-11-03 19:32:23 2003-04-07 12:59:11 13 91 562 105 3458 7043 2 66.30 47 14.76 CHANGED phCtVCGD+ASGhHYGlhoCEGCKGFF+Rolppshp..YsCttsps..ChIDKppRspCQhCRh+KClpVGMs+ ........................t.hChVC.G...D...c....u..o....G..h.....H.YG.......l.h...............o..CE.......G.........C.....K..............u.FF.....+.........R...........o.l....p...........p.........p.hp...........Y.p.....C....t...t.....s.......p...s.............C................I......s...........+................p.........p....R.....p..........p........Cp.h..CRh.pKClp.s.GMp........................................ 0 862 1129 2576 +5013 PF01396 zf-C4_Topoisom Topoisomerase DNA binding C4 zinc finger Bateman A anon Pfam-B_1854 (release 3.0) Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.19 0.72 -4.45 27 10157 2012-10-03 10:42:43 2003-04-07 12:59:11 14 102 3740 0 1974 7084 1633 39.50 35 11.65 CHANGED stpCPcCG.upllh+pu+hG...pFlGCoNaPcCcasp.hppp .................pCP..p..C......G....u.....p.....h.ll......+.p....u.....+hG.........hF.h..uCos..Y.....P....c....C.chsp.h...t................ 0 613 1165 1614 +5014 PF02928 zf-C5HC2 C5HC2 zinc finger Bateman A anon [1] Domain Predicted zinc finger with eight potential zinc ligand binding residues. This domain is found in Jumonji [1]. This domain may have a DNA binding function. 21.00 21.00 22.10 21.50 20.80 20.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.10 0.72 -3.90 52 591 2009-01-15 18:05:59 2003-04-07 12:59:11 11 55 220 0 364 545 6 54.70 37 4.03 CHANGED CphC+shsaLStlsCp.sps.schsCLpHhpplC.sC...sssp...........psLhYRash--Lpshl .........CthC+ssCaLSulsCp.s.ps..s.......pllCLpHs..pcLC...sC...sspc...........ppL.hYRYol--L.shh........... 0 88 162 259 +5015 PF01807 zf-CHC2 CHC2 zinc finger Bateman A, Griffiths-Jones SR anon Pfam-B_755 (release 4.2) Domain This domain is principally involved in DNA binding in DNA primases. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.25 0.72 -4.44 19 5345 2012-10-03 10:42:43 2003-04-07 12:59:11 15 58 4422 3 1153 4188 2239 96.10 38 16.85 CHANGED lspphIspltpphDIV-llupY.VpLKKpGcs.ahuhCPFHsEKTPSFoVsspKpaY+CFGCGsuGssIsFlMchcplsFs-AlccLActhslclshpps ........................................h.spphIpplhs..p...s..s..I.l.-lls...p..h....V.....p....L....+....K....p.....G.p.....s.....a........h.u.h..C..P...F...H...s.....E...K.....T..P...S..Fs..V....s.....s...........p......K.....p....h...aHCF...G...C...G....t.u.G...s.s.lsF......l.hc.h.....-......p.l.......s.F.s..E.AVcpLApphslpls...t.................................. 0 413 797 991 +5016 PF05207 zf-CSL CSL zinc finger Wood V, Bateman A, Mistry J anon Pfam-B_12353 (release 7.7) Domain This is a zinc binding motif which contains four cysteine residues which chelate zinc [1]. This domain is often found associated with a Pfam:PF00226 domain. This domain is named after the conserved motif of the final cysteine. 29.20 29.20 29.20 29.60 28.50 29.10 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.31 0.72 -4.46 63 560 2009-09-14 12:18:22 2003-04-07 12:59:11 8 13 302 5 396 552 5 58.50 38 39.82 CHANGED --lcl-Dhpa.cp...ppp...h..ahasC..CGDt..Fplotp-......................Lpcs..............-.lspCsoCSLhl+Vlas ....................................-clclEDhpa...-p.......spp.....h.....ahYPC..CGDp..Fploc--.......................................Lcps..........................-plss..CsoCSLhl+Vla.................... 0 126 209 321 +5017 PF05180 zf-DNL DNL zinc finger Wood V, Bateman A anon Pfam-B_9925 (release 7.7) Domain The domain is named after a short C-terminal motif of D(N/H)L. This domain is a novel zinc-finger protein essential for protein import into mitochondria [1]. 20.90 20.90 21.20 21.70 20.00 20.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.42 0.72 -4.26 36 372 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 277 1 263 362 4 65.70 43 36.22 CHANGED h.lsFTC........phCspRSs+phSKpAYp+GsVllpCPuCcN+HLIADpLuhFt.D..pch.....slE-lltpcG-pl ..............................h.lsFTC..........psC.s..pR.Ss.+phSKpAYc+Gs.VllpCsGCpsc.......HlIADpLshFp...-.......tch...........slE-lltt+Gcp........................ 0 85 151 222 +5018 PF02701 zf-Dof Dof domain, zinc finger Mian N, Bateman A anon Pfam-B_1250 (release 5.5) Family The Dof domain is a zinc finger DNA-binding domain, that shows resemblance to the Cys2 zinc finger [1]. 26.60 26.60 27.00 27.10 26.40 26.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.27 0.72 -4.05 12 858 2009-01-15 18:05:59 2003-04-07 12:59:11 10 12 68 0 436 848 5 61.10 65 20.51 CHANGED pssphltCPRCcSssTKFCYYNNYslsQPRaFCKsCRRYWTcGGoLRNVPVGGGsRKsKpsuo ...........p..pphLpCPR.Cs.S.h.s.TKFCYYNNY....slo....QPRHFCKs..C+RYWTcGGoLRNV...PVGG..GsRKsKpt..t.................. 0 71 291 367 +5019 PF04770 ZF-HD_dimer ZF-HD protein dimerisation region Kerrison ND anon Pfam-B_2002 (release 7.6) Family This family of proteins has are plant transcription factors, and have been named ZF-HD for zinc finger homeodomain proteins, on the basis of similarity to proteins of known structure [1]. This region is thought to be involved in the formation of homo and heterodimers, and may form a zinc finger [1]. 19.40 19.40 22.20 22.50 18.80 18.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.51 0.72 -3.91 36 349 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 39 0 213 332 0 58.20 56 24.97 CHANGED sssspY+ECLKNHAAulGGHAlDGCGEFMsu.utp.ss........ssuL+CAACGCHRNFHR+Espsss ................t...s+Y+EChKNHAAulGGa.AlDGCGEFMsu.Gp-...Go.....................suL+CAACGCHRNFHR+Esp..s............. 0 27 141 179 +5020 PF01422 zf-NF-X1 NF-X1 type zinc finger Bateman A anon Bateman A Family This domain is presumed to be a zinc binding domain. The following pattern describes the zinc finger. C-X(1-6)-H-X-C-X3-C(H/C)-X(3-4)-(H/C)-X(1-10)-C Where X can be any amino acid, and numbers in brackets indicate the number of residues. Two position can be either his or cys. This family includes Swiss:P40798, Swiss:Q12986 and Swiss:P53971. The zinc fingers in Swiss:Q12986 bind to DNA [1]. 21.30 21.30 21.50 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.30 0.72 -3.96 21 469 2009-01-15 18:05:59 2003-04-07 12:59:11 12 11 145 0 254 497 0 21.40 56 4.43 CHANGED CG.....HpCpphCH..GsC.s....C.p ...........CG............HpCpphCHt.GsCtP.....C.p.......... 0 57 98 163 +5021 PF00645 zf-PARP Poly(ADP-ribose) polymerase and DNA-Ligase Zn-finger region Bateman A anon Prosite Domain Poly(ADP-ribose) polymerase is an important regulatory component of the cellular response to DNA damage. The amino-terminal region of Poly(ADP-ribose) polymerase consists of two PARP-type zinc fingers. This region acts as a DNA nick sensor. 21.10 21.10 21.50 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.07 0.72 -3.74 54 620 2009-09-11 00:32:30 2003-04-07 12:59:11 13 68 222 32 406 653 6 80.80 30 14.72 CHANGED EYAKSuRusC+...pCp...pcItKsplRluphlpssth..........hthhpWaHhsChhtcthp.............tshsslp..Gacp.....Lp...-DQcc....l+ctlp ...................................EYAKouRu.sCK....tCp....ppI...t.Ksp..l.Rluth....s.ssht.................uth.pWaH..h..sChhppttph.........................tshsplc.Gapp.....Lp...-Dpcplpctl.t................................................ 0 154 220 329 +5022 PF00641 zf-RanBP Zn-finger in Ran binding protein and others Bateman A anon Prosite Domain \N 23.80 17.00 23.80 17.00 23.70 16.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.50 0.72 -4.84 24 3729 2012-10-03 10:42:43 2003-04-07 12:59:11 13 208 314 29 2137 3538 147 29.60 34 7.44 CHANGED +tG.pWpC..ssCs.hpNpspss+ChtCpus+.s ............u.sWpC.......ss..Cs.hhNh.spp..s.p.C...t..Cpss+............ 0 545 976 1540 +5023 PF02135 zf-TAZ TAZ zinc finger De Guzman R, Mian N, Bateman A anon IPR000197 Family The TAZ2 domain of CBP binds to other transcription factors such as the p53 tumour suppressor protein, E1A oncoprotein, MyoD, and GATA-1. The zinc coordinating motif that is necessary for binding to target DNA sequences consists of HCCC. 19.00 19.00 19.00 19.10 18.60 18.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.14 0.72 -10.63 0.72 -3.97 22 729 2009-01-15 18:05:59 2003-04-07 12:59:11 11 57 134 16 456 660 8 76.50 38 8.52 CHANGED hplpchL.hLlHApp..Cps.t...............spCsh..pCtph+pllpHhpsCpttp...hstChts+pllp....Hh+pCpctc.CsVshs ...............................t..lQppl.hLlHApp.....Cpp..................................ssC.s.l....PpCppMK....pllpHh.ppC............p....t...pp...................sut..Cs.s+p..llu............Hh...KpC....p...c......p....c....CsVshs................. 0 164 240 349 +5024 PF02953 zf-Tim10_DDP Tim10/DDP family zinc finger Bateman A anon Pfam-B_1207 (release 6.4) Domain Putative zinc binding domain with four conserved cysteine residues. This domain is found in the human disease protein Swiss:O60220. Members of this family such as Tim9 and Tim10 are involved in mitochondrial protein import [1].\ Members of this family seem to be localised to the mitochondrial intermembrane space [2]. 19.80 19.80 20.20 20.10 19.70 19.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.70 0.72 -4.68 113 1455 2009-01-15 18:05:59 2003-04-07 12:59:11 10 16 335 20 1012 1331 11 64.70 25 64.21 CHANGED .phlttEtphtphpphhs...clscpCacKClsp.......ssupLs....psEpsClspCss+ah-ssttlupphppp .............................h.tptphpph...t..phhs........pls..ct..CacKC.lss..........hssspLs..........spE.psClsp....Css+ahcssphlspphtp................. 0 311 524 818 +5025 PF02176 zf-TRAF TRAF-type zinc finger Mian N, Bateman A anon IPR001293 Family \N 22.80 20.90 22.80 20.90 22.70 20.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.45 0.72 -3.59 10 1745 2012-10-02 00:06:50 2003-04-07 12:59:11 13 81 229 11 1175 1418 42 55.90 26 18.63 CHANGED Hhps.CPhhslsCsstCst+hl.Rcclp-HlctsCspsclsCpF...GCppphptpsLQcH ......................H.tp.C..h.h...ltC..s.....p...t..s..s.p.p.............l....Rp.....pl.pp..Hh.p............pC.s.p......p............h..p.Cpa..................Cpt..p.h....tpphtpH............. 0 568 732 927 +5026 PF02207 zf-UBR zf-UBR1; Putative zinc finger in N-recognin (UBR box) SMART anon Alignment kindly provided by SMART Family This region is found in E3 ubiquitin ligases that recognise N-recognins [1]. 21.20 21.20 21.20 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.67 0.72 -4.26 99 1193 2009-09-11 04:39:18 2003-04-07 12:59:11 15 56 293 33 848 1163 4 71.30 30 4.46 CHANGED phCst.htpt..........pshYpChsCsh........sss....sslChsCashssHp..sHchhhhhsp...tsuhCDCG...ssps.......hp...p....Cph+..p ......................p.hCshshtst..........pshYpC.p..TCsh...............ss.s.........s.slChsC.tts.sHp..sHchphhhos...tshhC..........DCGssph......hp....t.......Cp...t........................ 0 316 455 695 +5027 PF03470 zf-XS XS zinc finger domain Bateman A anon Bateman A Domain This domain is a putative nucleic acid binding zinc finger found in proteins that also contain an XS domain. 21.00 21.00 21.50 21.90 20.60 20.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.30 0.72 -3.91 12 126 2009-01-15 18:05:59 2003-04-07 12:59:11 9 13 22 0 79 124 0 41.60 45 7.07 CHANGED CPaC.scKKps..Yp.hpsLLpHAsGlGtustp...u+cKApHhALA CPaCssc+..cps..Yp.hp-LLpHAsGl.Gtustp.+.u+c+A.sHhALA........ 0 11 47 67 +5028 PF01258 zf-dskA_traR zf_dskA_traR; Prokaryotic dksA/traR C4-type zinc finger Finn RD, Bateman A anon Prosite Family \N 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.99 0.72 -4.19 241 5127 2012-10-03 10:42:43 2003-04-07 12:59:11 12 11 2754 13 1228 3101 1711 36.80 38 28.18 CHANGED sshuh..Cp..pCG-tIsttR.hphh..s.ssthClsC...pppt-p ................paGh....C-..pCGp.IshtR.LcAh..P.ssphClcC.......pphtE.............. 0 410 784 1014 +5029 PF04071 zf-like DUF379; zf_like; Cysteine-rich small domain Kerrison ND, Finn RD anon COG2158 Family Probable metal-binding domain. 19.60 19.60 20.60 21.00 18.80 17.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.70 0.72 -4.28 24 168 2009-09-10 20:56:46 2003-04-07 12:59:11 7 9 158 0 79 154 6 85.40 33 50.46 CHANGED cphphhh..h...s.pcsCEYYPCH.apsQ...sChaCYCPhYPCtDpph.GcalpspsG.t.tlWuCpsCpllHcs-ssstllcphpphscphp....shc ..........t...aphasscsCcaaPCH..hpsp....NCLFCYCPLYsht-pss.G..p..ahh..sc.sG......lKsCosCtlPHct-s.hshlhpphtphht........pt............................ 0 23 57 68 +5030 PF04354 ZipA_C ZipA, C-terminal FtsZ-binding domain Mifsud W anon COG3115 Domain This family represents the ZipA C-terminal domain. ZipA is involved in septum formation in bacterial cell division. Its C-terminal domain binds FtsZ, a major component of the bacterial septal ring. The structure of this domain is an alpha-beta fold with three alpha helices and a beta sheet of six antiparallel beta strands. The major loops protruding from the beta sheet surface are thought to form a binding site for FtsZ [1]. 20.60 20.60 20.90 20.70 20.40 19.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.44 0.71 -4.67 59 1250 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1235 12 217 840 288 128.60 41 38.95 CHANGED sppllllp.VhAppspthpGspLlpsl.shGhca.G-MsIFHRHtc......sususVlFSlANhhpPG..sF-.-s.hppFsTsGlshFMpLPs.u.suhpsFchMlpsApplAccL.sG.lLD-pRshhTt........pshcpa+p+l ...........................................................p.ptVIlhp.VsA+p.s.ptlsGphLlsulppsGFhF.G.-.MsI.FHR.H.hs......sGsGssLFSlANMspPG..oF.D.s.....-....hs..-Fo..T.sGlolFMp...lPuhG....-..sh..........pNF+LM.LpoApclA--l.GGsVLDDpRchhTsQpLccYpsp.................... 0 42 104 163 +5031 PF00172 Zn_clus Fungal Zn(2)-Cys(6) binuclear cluster domain Sonnhammer ELL anon Prosite Domain \N 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.94 0.72 -4.04 29 13354 2009-09-16 22:05:51 2003-04-07 12:59:11 13 237 239 36 10553 14063 8 39.10 32 5.66 CHANGED puCppCRp+KlKCct...ppP........CtpChpts..hcCp.hspppppt ................uCtpCR..p....R..Kl..K....C..Dt...............ppP..............................C..sp..Ct.cts......hp..Ct..ht......t................... 0 1813 5198 8893 +5032 PF00882 Zn_dep_PLPC Zinc dependent phospholipase C Bateman A anon Pfam-B_1401 (release 3.0) Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.45 0.71 -4.59 152 1056 2012-10-01 21:01:47 2003-04-07 12:59:11 13 12 595 25 245 1095 36 178.70 16 56.63 CHANGED sHhtl.spsht.....hl...h.t.................h.hthsshhhGuhhPDh........h...........................tths+p...................hhh......h.ptshltthspth.t............................hhhhuhhlGhhoHhlsDhhs..H...........................thhttp..hhpHsthEhthphhh..............................................h..httt.thth........h.thltpthtthhtthhht .......................................................Hhhlspphhp.........hh..t...................ltp.hpth.hG..shhPDh........h.........................th......hhpscp.....................................pth....pphtcah..ttshpphpp...................................................tphppts.ah.LGhhhHahsDhsp.......H.................................................h.phhshs....ttHsthEphh-phh.................................h..........................................th....................................................................................................................... 2 119 194 221 +5033 PF04228 Zn_peptidase Putative neutral zinc metallopeptidase Kerrison ND, Finn RD anon COG2321 Family Members of this family have a predicted zinc binding motif characteristic of neutral zinc metallopeptidases (Prosite:PDOC00129). 20.10 20.10 20.10 20.20 20.00 19.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.06 0.70 -5.18 8 1671 2012-10-03 04:41:15 2003-04-07 12:59:11 8 2 1494 0 416 1190 90 272.60 44 93.05 CHANGED McWcutRpSsNlEDRRupsGGuu.hGGGG........hh.thGGtsGlllllllLlGhhaGlDlosllG..........Gpssuss.ss.Qsstspuss...tpcEtspFssslLAsTEDTWsplFp.ctGppYppPsLVLaSpss+SACGtApSAsGPFYCPuDpKVYLDhoFas-hpp+hGAuGDFAtAYVIAHEVGHHVQNLLGIhsKlcptpp.ssocApANpLSVRlELQADCFAGVWAppsptct.h.LEpGDlEEAhNAApAIGDDTLQ+pupGhVVPDSFTHGTSpQRhpWFcRGapSGcPupCDTF ......................................................................................................................................Mchps..+p.Ss.N..l-Dp.R...ss........G.G.....su................h.hh...u...G..h....h....G.l.h....h.l....l.l.l.l....l.u...h.h.....h....G........l...D........s...s..l.hs...............................G.p.s...st.p.............p...s...s..t....s..h.sss....................p-cssc.F...s..s....s...l...L...u...s...T...E...DsWs.p...hF.p...p..h...G.....p..s..Y.p.p.P..+.....L.V....h..a..p....G...t...s...p........T......u......C.G...s...u..p...S.....s..h....G.......P...F...YC.....P.u..Dp..p...........VYl....D....l.S.F.a.c.-.h..c.s.....+h....G..............A......s.....G..............D...F...A.......QuY.....V..I...AHEVGHHVQ...pLL.GI.s.+.lpph..pp...ph....o..p.s..p..sN..pL..SV..RhELQADCa..AG.V...W....u+ph.......p.......p.......p..........s.......l........L....E....s...G.D.l..EE..A.LsAApA.IGDD..cLQpp.up..GpVV.PDS.FTHGTSpQRhpWFp+Gac.o.G....-..spCsTF.................................................................. 0 123 271 358 +5034 PF04298 Zn_peptidase_2 Putative neutral zinc metallopeptidase Kerrison ND, Finn RD anon COG2738 Family Zinc metallopeptidase zinc binding regions have been predicted in some family members by a pattern match (Prosite:PS00142). 20.20 20.20 20.50 35.60 20.10 19.70 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.47 0.70 -4.99 75 1143 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 1121 0 242 727 493 219.40 45 96.58 CHANGED sshhlll.lsshlluhhAQh+VpSsFpKYScVpspsGhTGA-lA+clLcssGlh.D..VpVcpls.GpLTDHYDPpsKslRLS-sVYsusSlAAluVAAHEsGHAlQatpsYusLplRsslVPlsshuSsluhhlllhGllh........tstsLlhlGIlLFusuVLFpllTLPVEFsAS.pRAlthLpspG..llss.cEhp.uu+cVLsAAAhTYVAAAlsulhpLLhh...lhlhtsp .............................................................s..hhlhhlhhhl.lshhAQh+VpSsapKYSpV..psssGh..TGt-lAccILcssGlh..DVpVpp.ss..GpLTDHYDPpsKsVpLSpssYpusSlAusuVAAHEsGHAlQct.puYs.Lc.hRsuLVPVsNhGSsl..u...a.hll....hl..Gllh...........hsssllhlGIl.......Lh.u..huVL..F.plVTLPV...EFsAS.pRAhphLpstG..llsp..cE.ht.tA+KVLsAAAhTYVAAuhsulhpLlRl.lllhtt.p..................... 2 113 190 223 +5035 PF03854 zf-P11 ZnF_P11; P-11 zinc finger Griffiths-Jones SR anon PRODOM Family \N 25.60 25.60 26.20 66.20 25.20 25.50 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.96 0.72 -4.49 3 115 2009-01-15 18:05:59 2003-04-07 12:59:11 9 1 38 1 0 114 0 49.90 61 52.85 CHANGED GPhNCKSCWFcsKuLVcCsDHYLCL+CLsLLLSVS-RCPICKpPLPTKLR G.hNCKSCWFts+uLlcCsDHYLCL+CLslhLssSDhCsICtcPLPT+l.. 0 0 0 0 +5036 PF03367 zf-ZPR1 ZPR1; ZPR1 zinc-finger domain Mifsud W anon Pfam-B_1372 (release 6.6) Family The zinc-finger protein ZPR1 is ubiquitous among eukaryotes. It is indeed known to be an essential protein in yeast. In quiescent cells, ZPR1 is localised to the cytoplasm. But in proliferating cells treated with EGF or with other mitogens, ZPR1 accumulates in the nucleolus. ZPR1 interacts with the cytoplasmic domain of the inactive EGF receptor (EGFR) and is thought to inhibit the basal protein tyrosine kinase activity of EGFR. This interaction is disrupted when cells are treated with EGF, though by themselves, inactive EGFRs are not sufficient to sequester ZPR1 to the cytoplasm [1,2,3]. Upon stimulation by EGF, ZPR1 directly binds the eukaryotic translation elongation factor-1alpha (eEF-1alpha) to form ZPR1/eEF-1alpha complexes [1]. These move into the nucleus, localising particularly at the nucleolus. Indeed, the interaction between ZPR1 and eEF-1alpha has been shown to be essential for normal cellular proliferation [1], and ZPR1 is thought to be involved in pre-ribosomal RNA expression [2]. The ZPR1 domain consists of an elongation initiation factor 2-like zinc finger and a double-stranded beta helix with a helical hairpin insertion. ZPR1 binds preferentially to GDP-bound eEF1A but does not directly influence the kinetics of nucleotide exchange or GTP hydrolysis [4]. The alignment for this family shows a domain of which there are two copies in ZPR1 proteins. This family also includes several hypothetical archaeal proteins (from both Crenarchaeota and Euryarchaeota), which only contain one copy of the aligned region. This similarity between ZPR1 and archaeal proteins was not previously noted. 20.30 20.30 20.30 20.40 20.00 19.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.00 0.71 -5.08 14 842 2009-01-15 18:05:59 2003-04-07 12:59:11 8 10 422 2 572 806 66 154.90 34 67.74 CHANGED cssCPsCstpspT+h..hhssIPaF+EVllMSh.C-+CGa+ssElpsuutlpspG.Rhpl+lps.cDLsRcVlKS-ouolpIPELslEI.Ps.sttGplTTlEGlLpcshctLpps....tpcutcspptpphpcalp+lcpl...hpspp.hTlIl-DPhGNSalps ......................p.o.C.sCt.p.s.s.p.T.+h.......hhspI.....PaF+ElllhohpC-pCGa+...ssElpsuGtlp............pGh+hsLc.l...p...s......p.....D....LsRp.l.lK...........S-ouslp............I..P.EL....-lEl.Ps.s...tG...phTTl..EGlLpphh-tLpppt..................ht..........c......s..s...........sp..........ttp+hppFhp..+.....Lcph....................h.p...s...c..h...s...aTlll-DPhGNSalp........................................................................... 0 199 320 463 +5037 PF00791 ZU5 ZU5 domain SMART anon Alignment kindly provided by SMART Family Domain present in ZO-1 and Unc5-like netrin receptors Domain of unknown function. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.07 0.72 -4.35 30 1349 2009-09-12 23:26:40 2003-04-07 12:59:11 15 421 101 19 679 1023 3 99.30 30 7.68 CHANGED suhpspGhhsstGGpLp.sspoGVSLlIPsGAIspGpphEhYlslsc....cpst.................................hPP..hcpup.oLLSPlVsCGPsGshhhpPVhLplPHCAshs.t..-c...hhlpLKopssp ...................................................................hsphhhsspGGphp..h.p..s..GlpllIPstshs.t.s.p.php...ht....lshhp....c.ss..................................................................................PP....ht.cs.EsL..hS.ll...ph.G.....PsG...h..p.........F...h..p............PVhlplP..Hhushp..t...-p.....lhL+pps..p...................................................................................................................... 0 300 340 472 +5038 PF00569 ZZ Zinc finger, ZZ type SMART anon Alignment kindly provided by SMART Domain Zinc finger present in dystrophin, CBP/p300. ZZ in dystrophin binds calmodulin. Putative zinc finger; binding not yet shown. Four to six cysteine residues in its sequence are responsible for coordinating zinc ions, to reinforce the structure [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.72 0.72 -4.42 10 2746 2012-10-02 13:15:50 2003-04-07 12:59:11 12 308 324 4 1705 2687 25 44.50 31 4.44 CHANGED s+cpscCNhCppsshlGhRY+sL+s.sYDLCpuCFhoG+su+sH+M ..................h...thpCs..t..C......p...p........s....l..h.....G...h...R.a.+C..t..p..C...........s.....a..DLCpsC....a...t.....p.....t...htt.p.................... 0 562 809 1281 +5039 PF04358 DsrC DsrC like protein Kerrison ND anon COG2920 Family Family member Swiss:P45573 has been observed to co-purify with Desulfovibrio vulgaris dissimilatory sulfite reductase [1], and many members of this family are annotated as the third (gamma) subunit of dissimilatory sulphite reductase. However, this protein appears to be only loosely associated to the sulfite reductase, which suggests that DsrC may not be an integral part of the dissimilatory sulphite reductase. Members of this family are found in organisms such as E. coli and H. influenzae which do not contain dissimilatory sulphite reductases but can synthesise assimilatory sirohaem sulphite and nitrite reductases. It is speculated that DsrC may be involved in the assembly, folding or stabilisation of sirohaem proteins [2]. The strictly conserved cysteine in the C terminus suggests that DsrC may have a catalytic function in the metabolism of sulphur compounds [3]. 24.90 24.90 25.40 38.20 24.60 24.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.35 0.72 -3.81 87 1238 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 1122 8 258 699 173 107.10 55 95.17 CHANGED tlphsGpplcsDc-GaLhs.hs-Wsc-lAphlApp.E.........sIs.....LT-sHWcVlpalRcaYtcap...hu.P.slRhLsKshutchG.cKus.oca...LYpLFP.tGPAKpAsKlAGLPKPssCl ..............h...apG+pI-TDs-GYLp-..spWoEslAhhlAcp.E.........GIp...Los-HWEVl+FVR-.FYhE.F.s..TS....P..AIRMLVKAhup.KaGp.EKGN.S+Y...LY+LFP.cG.PA.KQATKIAGLPKPsKCl................ 0 84 153 208 +5041 PF04252 RNA_Me_trans DUF431; Predicted SAM-dependent RNA methyltransferase Kerrison ND, Finn RD anon COG2428 Family This family of proteins are predicted to be alpha/beta-knot SAM-dependent RNA methyltransferases [1]. 20.10 20.10 20.20 21.90 19.90 19.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.15 0.71 -4.91 3 169 2012-10-01 22:53:19 2003-04-07 12:59:11 8 4 163 0 133 172 0 195.20 42 90.41 CHANGED PtlVIEHLEEssSEWL...LLEYpplAcphGs+hllTuupPE...............LRasss+luGVss-hccthsLcRu+VILLDLpAsc-LcPEDAocsoYIVVGGILGDHPPRGRTKcLpTuhhsultsR+LGscQhSlDGAl+TApLIAEG.hRLEEIEFEDsPEl+l-c....sS.sElsL+YAlPKlsGKlLLopGLL-LlKK-luYp-EDLu .....................................................hallEHh-........thu......t......Ws...hLEYtpI..tc...-...s...G...s...c.....l..l..h.os.....sp..................................hh....h..s..h..tph..t..ul..tp......h...p.....p...h..s...h......c..+.u+..VCLLDPtApp-L............s.............P-Du....s.p.........F-hFlFGGIL....G..................Dc.PPR.DRTuELRpp.....Gh.uRRLGshQMTTDTAVRVT+lllcc..phs......l-cI.alDaPEl+hsc.....................pE.uTEM..PFRYV....hs.pp........G.......cPIhPcGMl-LIKcDssps.-...h...................................................................................... 0 38 70 108 +5042 PF04359 DUF493 Protein of unknown function (DUF493) Kerrison ND, Bateman A anon COG2921 Domain This domain is likely to act in a regulatory capacity like Pfam:PF01842 domains. This domain has a remarkable property in that the C-terminal residue of every protein in the family lies up in the alignment. This suggests that the C-terminal residue plays some important functional role (Bateman A pers obs). 21.90 21.90 22.10 27.70 21.50 21.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.83 0.72 -3.61 166 1622 2012-10-02 00:29:19 2003-04-07 12:59:11 9 2 1588 3 338 823 238 83.00 41 87.90 CHANGED hcshl.-FPsp.ashKllGt....s................sss..hhpt..lhpllpphh.s...sstslph+.sSspGpYhSlol.slpspup-plcslYppLs.shttV+hlL ...............h.ppLl.EFPss.FsaKVhGp..A.....................ts-..ls-p..VlEVlp+Hs..P...s-assps+.sSS+GsYpSVol.olpAos.hEQl-slYc-Lu.p.hchV+hVL.................... 0 86 200 280 +5043 PF04205 FMN_bind FMN-binding domain Yeats C anon Yeats C Domain This conserved region includes the FMN-binding site of the NqrC protein [1] as well as the NosR and NirI regulatory proteins. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.82 0.72 -3.82 182 4808 2009-09-11 22:55:10 2003-04-07 12:59:11 9 81 2361 8 1029 3722 716 88.10 25 29.05 CHANGED Ghsu......sl.plhl.......sh.....ssp.spIpsl...plhp........p......................p...........E.......Tsuhu....t.h......................pphh......pphhspp....................................................s.tp.lDuloGATloSpultpulppulpth ..............................................................................................................................Ghtu..sl.plhVsl....stp..up....lt..ul....plhp.............p...........................................................p..........E.................TsGlu....spht...........................h.spah..........pphhupp..........................................................................................tts..ss..l..DsloGATlTopulhpulppsh................................. 0 408 712 888 +5044 PF04432 FrhB_FdhB_C Coenzyme F420 hydrogenase/dehydrogenase, beta subunit C terminus Kerrison ND anon DOMO:DM04087; Domain Coenzyme F420 hydrogenase (EC:1.12.99.1) reduces the low-potential two-electron acceptor coenzyme F420. This family contains the C termini of F420 hydrogenase and dehydrogenase beta subunits [1], [2]. The N terminus of Methanobacterium formicicum formate dehydrogenase beta chain (EC:1.2.1.2, Swiss:P06130) is also a member of this family [3]. This region is often found in association with the 4Fe-4S binding domain, fer4 (Pfam:PF00037). 21.30 21.30 21.40 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.36 0.71 -4.83 123 800 2009-09-10 23:45:23 2003-04-07 12:59:11 8 53 463 0 371 792 228 167.20 20 41.60 CHANGED ptshc+lullGtPCplpAl+chpp........t................hhhhlGlhCspshshpsh..phltchhuls.ps.V.t+..................h-hpp....Gc.....ahlph........t.cus.......hhplslcch............t.hhpss.........CphCtDhssph.....ADlosGs...........hGs..........t.Ghoh.....lhlRocpGpcllctsh.ptshl-spshsst..........htKl .......................................t...hc+lhhlGpPCplpul+ph.pp...t...h...................hhhlGlhCtss.hshpth.pchlpt..h.s.hp.....ps...lhc...................hchcp...............up........hhlph..................p...ssp..............hhphs.hpch.................................t.hhpsu......C.h.t.C.ts.h..ssph.....ADlolGs..................hus..............tGhoh.lllpop+Gpplhptht.pt..hlpht..............thttt................................................................................................. 0 116 255 326 +5045 PF04609 MCR_C Methyl-coenzyme M reductase operon protein C Kerrison ND, Finn RD, Mian N anon DOMO:DM04514; Family Methyl coenzyme M reductase (MCR) catalyses the final step in methanogenesis. MCR is composed of three subunits, alpha (Pfam:PF02249), beta (Pfam:PF02241) and gamma (Pfam:PF02240) [1]. Genes encoding the beta (mcrB) and gamma (mcrG) subunits are separated by two open reading frames coding for two proteins C and D [2]. The function of proteins C and D (this family) is unknown. This family nowalso includes family MtrC_related, 25.00 25.00 29.70 29.00 20.00 20.20 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.69 0.70 -5.59 18 122 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 65 0 81 118 17 201.70 34 82.52 CHANGED M........hhapGGVa+csplh-hlEDl....GGallQ+p.ht.-lhh.hhlP.p-hc.lct.sc.hhGplscuPLsGoElAVVu.SLup+HLPHssCDIuEYLR+sGupoNMlGLARGsG..........+plutlsscEpclIpEHDLAVathGNFcsCIhphpttLh+tl-.l.P.lVVsGuP.-h.h..........YluslGRh.tRh+p.p-lttLcphs--lu+llscpRtslscDP..hoPshVh-hlpppl.sl.t.htPhPls.QhsGhRlKlPYDchs-tIcclc ........................................................................................................................+GplsEs..tts-lslVu.S.uR+Hls+ssC-IophLRctGhp..s..sh..lsLstGsG..............tshhslptcEhc.IpcHclAVhphGNhcspIl.Ktphlhc.lc.lP.llVstuP.-h.................................................t...............................................................................t................................................... 0 17 51 68 +5046 PF04607 RelA_SpoT Region found in RelA / SpoT proteins Kerrison ND anon DOMO:DM04456; Family This region of unknown function is found in RelA and SpoT of Escherichia coli, and their homologues in plants and in other eubacteria. RelA is a guanosine 3',5'-bis-pyrophosphate (ppGpp) synthetase (EC:2.7.6.5) while SpoT is thought to be a bifunctional enzyme catalysing both ppGpp synthesis and degradation (ppGpp 3'-pyrophosphohydrolase, (EC:3.1.7.2)) [1]. This region is often found in association with HD (Pfam:PF01966), a metal-dependent phosphohydrolase, TGS (Pfam:PF02824) which is a possible nucleotide-binding region, and the ACT regulatory domain (Pfam:PF01842). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.55 0.71 -4.08 102 8755 2012-10-02 22:47:23 2003-04-07 12:59:11 12 55 4348 6 1707 5709 2333 115.20 41 20.29 CHANGED GRhKphaSIhcKhpc..+s.............h...thcclhDlhulRlls..pp.........hp-CYpslsll+....shac.hss.....chKDY.............Iu.pPKtNGYpSlHssl................hG.pshhlElQIRThtMcthAE.GluuHhpYKpsts .................................................................................................................GRsKclaSIacKM..pc...+p..................h...th.cclhDlhul...R...lls......pp................................lpDC.Yt.sL..u.h.lH.................sh..a..c...s.hss.........................chKDY.................................Iu...tPKs....N..G.YpS..l..HTsl..............................................hGsps..h..slElQI.RThpMcphAEh.GlAAHWtYKps.t..................................... 0 537 1087 1434 +5047 PF04226 Transgly_assoc Transglycosylase associated protein Kerrison ND, Finn RD anon COG2261 Family Bacterial protein, predicted to be an integral membrane protein. Some family members have been annotated as transglycosylase associated proteins, but no experimental evidence is provided. This family was annotated based on the information in Swiss:P76011. 25.20 25.20 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.45 0.72 -3.89 159 4300 2009-09-13 11:40:02 2003-04-07 12:59:11 8 3 2610 0 729 1905 66 48.10 36 57.58 CHANGED IllGllGAhlGshl..hshhGh....hts.htluuh..lsullGAllLLhlhp.hlpp ......lllGllGAhlGuhL..hs.h.aG.............sluuh.....lsullGAllllhlhphlh.......... 0 177 395 574 +5048 PF04264 YceI YceI-like domain Kerrison ND, Finn RD, Bateman A anon COG2353 Domain E. coli YceI is a base-induced periplasmic protein [1]. The recent structure of a member of this family shows that it binds to polyisoprenoid [2]. The structure consists of an extended, eight-stranded, antiparallel beta-barrel that resembles the lipocalin fold. 20.50 20.50 21.00 21.00 20.30 18.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.38 0.71 -4.28 135 3875 2009-01-15 18:05:59 2003-04-07 12:59:11 8 16 2238 16 1127 2995 1071 164.90 25 82.66 CHANGED sa....pl..............Ds..spoplsFpspc.....h.shsph.......G.pFs.p..hs....G....pl......sshsssth..shslchs..Sl..ss.....t....sthc..pc.lpst......paFcsppaPphsFpus.php.........h................ttt..hphp..GsLTl+GhT+s.lshss...plp.t.....................................................................ts.ps...............hshsup.spls.RscFGlshsthh.................luccV.plplplphp ................................................................................aplDs..sHoplsFphpH............h.shohh......GpFp.p....hs........G..slp.............s.ps..ssspl.........sloIsss...Slsoss......................spRD....pH.L+us.............DFhs.s..s..+aP..p.hoFpoo..plp......................t................pt..spls.....GsLTl+..GlT+s..l.slcs......phh.s................................................................................tu.ps............t.hsGhpus...sp...lp.RpDFGlshshsh.................luccV.plplplph.h................................................................... 0 378 777 993 +5049 PF04431 Pec_lyase_N pec_lyase_N; Pectate lyase, N terminus Kerrison ND anon DOMO:DM04067; Family This region is found N terminal to the pectate lyase domain (Pfam:PF00544) in some plant pectate lyase enzymes. 20.70 20.70 20.70 22.00 18.50 20.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.76 0.72 -3.88 15 105 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 30 0 39 93 0 50.40 37 14.34 CHANGED sIu.EaDEYWpcRu--A+ptshpAYcPcP.sVTs+FNpcVtcuhp..........................ssNsTRRsL ......ls...-chWtp+tc.A+ptshtAYhsDPhslTscFNtcVhcuhp........................................................sRRt...................... 0 4 22 30 +5050 PF05223 MecA_N NTF2-like N-terminal transpeptidase domain Bateman A anon Bateman A Domain The structure of this domain from MecA is known [1] Swiss:Q53707 and is found to be similar to that found in NTF2 Pfam:PF02136. This domain seems unlikely to have an enzymatic function, and its role remains unknown. 21.80 21.80 21.80 21.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.52 0.71 -4.03 17 954 2012-10-03 02:27:23 2003-04-07 12:59:11 6 6 730 10 155 737 7 115.00 23 18.53 CHANGED sssppphppFlsAhsct-hsphhshsspss.........cht-thpslasulpscslcl.........tshpspcsscsphslsachphpsshuch...hshshphphscp-sp.Wp.lcWpsuhlaPphpcsp .............................................t.pscpshppalsulsc..pcasphhp..ssp.u....p...t.sp.-hs-+hpplYsul..t.sc.slpl....................pphph.p..cs.s.c..sp.tplsaphphcT.shGpl.......................hshsh.phsh......s+....cc....s.....p...W+..lcWssuhIaPshpcsp..................................... 0 55 117 141 +5051 PF05224 NDT80_PhoG NDT80 / PhoG like DNA-binding family Wood V, Bateman A, Glass NL, Moxon SJ anon Wood V Family This family includes the DNA-binding region of NDT80 [1] as well as PhoG and its homologues. The family contains Swiss:Q05534 or VIB-1. VIB-1 is thought to be a regulator of conidiation in Neurospora crassa and shares a region of similarity to PHOG, a possible phosphate nonrepressible acid phosphatase in Aspergillus nidulans. It has been found that vib-1 is not the structural gene for nonrepressible acid phosphatase, but rather may regulate nonrepressible acid phosphatase activity [2]. 19.40 19.40 19.70 19.90 16.80 19.30 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.42 0.71 -4.29 16 421 2012-10-03 00:25:27 2003-04-07 12:59:11 7 11 202 16 306 408 0 168.80 30 24.03 CHANGED palsY+RNaFplssshsh.t...sphhptshh..........................tsspphplphFtlclpAhcs.s.....spsIpLlQ.........tospccKs.thsPshsslhst..P...h.h.tsp.hs..t...................................splsphspacRLQFppuTssNt+ppt.....QpaFpLpVpLhs...............................sptsssphlhltphposslIVRGRS....Pusapsp ...........................................................asC.++NaFQlosph..s..h.....sphlp.......................................................sstshhslptFtlplpuhcstt........spslplhQ...............................o.pccp.t...hpPshh.sl....s..................................................................................................................................phsp.hohpRLpFp...puTANN.tR++u...........QpaFhLhVpLhA.....................................t.t.sp.s..phhhlutthSt.lIVRups....Pupaps.p................................................... 1 70 146 243 +5052 PF05225 HTH_psq helix-turn-helix, Psq domain Lehmann M, Bateman A anon Lehmann M Domain This DNA-binding motif is found in four copies in the pipsqueak protein of Drosophila melanogaster [1]. In pipsqueak this domain binds to GAGA sequence [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -8.10 0.72 -4.41 27 1317 2012-10-04 14:01:12 2003-04-07 12:59:11 11 67 254 1 1002 1636 41 43.90 28 10.93 CHANGED sp-pltpAlpslp.....sGt.hSlp+AuphaGlPpSTL..cthctthshpp ................ptplttAl.pslp...........sGp.hS.l.p..cAAphYslPpoTLh.cthpth.....t.............. 0 324 585 881 +5053 PF05226 CHASE2 CHASE2 domain Ulrich L, Zhulin I anon Ulrich L, Zhulin I Domain CHASE2 is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in bacteria. Specifically, CHASE2 domains are found in histidine kinases, adenylate cyclases, serine/threonine kinases and predicted diguanylate cyclases/phosphodiesterases. Environmental factors that are recognised by CHASE2 domains are not known at this time [1]. 22.00 22.00 22.00 22.70 21.20 21.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.31 0.70 -5.19 160 950 2009-01-15 18:05:59 2003-04-07 12:59:11 6 44 512 0 409 1005 1318 318.30 20 45.69 CHANGED lslshhhlhhhhhhtts..............hp.....h....-hth.....aDhhhphp.....s...............sc..llIVsID-p.............Sl...........t......up....WP.WsRshhApLl.cpL.............tpt.tspsluhDl..lF.scP.s............................................sDptLupul..tps...ll...ls.h..............................................htttppts..sht.................................................................................................................................................stlttt..stshGhh.shhh..Ds.DG.....llR....+h.l...........hss.t.hhs....ululphh..............thhhsspshtht...t......................................................t...t..................hhlsa........tus....tt......sh.....plSh..t-l....L.....s..uphs.......................shl....cs.+lVLlGs.o....As.ult......DhhsTPh............t..................hsGVElcAshlsslLs....................sp.hlh.hsshs.hhhhlhhshhhulhh ...............................................................................................................................hhh....hhhhh.h.hhhhh.h.......................hpt....h.....-hhh.....aDhhhphp.......................s..s..s.....sp.lllVsID-p...........................ol.................p........phGp..............WP.asR....s....hhApllppL.............tpt...tsp..sluhDl..hh..sps..p........................................................................................sDptLspsl...p..p..s..........ls...lshh......................ht.tt..ttt....h........................................................................................................................................................s.ltt......ssthGhh..sh.h......Ds..DG.......hlR.....ph.l..........................h.t..ss..p..hhs...........ululthh.....hhhhttp.s.hhh.t.t.t.......................................................................................hs...............s....th..p.......................hhlsa..........tus..............tt................sh.........plSh..t-l...L...........p.....splss................................................shl...cs+lVll...Gs..o......As..uht.........D.hhsTPh.........ts............................hsGVplpAphlsslls.........................tp...hl..h.....h...s..t.h..h..t...h..h.hh.h..h.hshhhhh..h.................................................................. 0 126 275 353 +5054 PF05227 CHASE3 CHASE3 domain Ulrich L, Zhulin I anon Ulrich L, Zhulin I Domain CHASE3 is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in bacteria. Specifically, CHASE3 domains are found in histidine kinases, adenylate cyclases, methyl-accepting chemotaxis proteins and predicted diguanylate cyclases/phosphodiesterases. Environmental factors that are recognised by CHASE3 domains are not known at this time [1]. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.54 0.71 -4.49 180 1210 2012-10-02 01:04:29 2003-04-07 12:59:11 8 127 733 1 487 1312 72 135.90 19 20.96 CHANGED ssphlp+ohpllsph...pplhsthh-tEoG.RGYLLTucppa.LpPYpputsplpptlscLp..phs..s......-sP...tQppc....lpplpthhppth.shhcph...lshpcp.....t..shp...sshphltsspG+phhD.....plRphhs..ph.......tspE........ppllppRptp ..........................................t....lp+shplhsphpplht....tlhshEouhRGallo.s..c.p.s.....a...L.psYppu.t..t.p...hppplspLp..pL..s..t......csP........pQ.ppc....l.p.plpthhp...phh..p.hpth...lshtps..........t.t.p.....sshph..l.p.....s.....s.t.u+.thh-........plRphlsph.......psp.E.......pphhttRt..t............................................................... 0 132 288 404 +5055 PF05228 CHASE4 CHASE4 domain Ulrich L, Zhulin I anon Ulrich L, Zhulin I Domain CHASE4. This is an extracellular sensory domain, which is present in various classes of transmembrane receptors that are parts of signal transduction pathways in prokaryotes. Specifically, CHASE4 domains are found in histidine kinases in Archaea and in predicted diguanylate cyclases/phosphodiesterases in Bacteria. Environmental factors that are recognized by CHASE4 domains are not known at this time [1]. 20.80 20.80 20.80 21.30 20.70 20.00 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.50 0.71 -4.65 95 529 2009-01-15 18:05:59 2003-04-07 12:59:11 8 79 398 0 207 499 16 160.10 17 23.09 CHANGED ppslppphpplpphspDaAsWD-sYpalps..pst.t....alpsNls.sphh..pshpl-hhhllstsGphl.a..tpshshpsst..h.hst....................sltphlsphp.h....................ttttpsshsGllhhsssPhlluupsIhssssps.....sspGsLlhsRhlcsshlspl.pphshhslplth..s .................t..hlp.phpphtphhtDaAhWD-shp..alts......tst..s.............ahpsNls..sphh..pshth-hlh.lh-spuphl.a..t.pst.sh.p.pstt...p.lst....................shpp.hlpphpth...............................sptppshsshhh...hs.st......s.hl.lusss.lpssssps......sstuh.Ll.hs+hlss..phlppl.pphshhslph....st............................... 0 64 129 167 +5056 PF05229 SCPU Spore Coat Protein U domain Yeats C anon Yeats C Domain This domain is found in a bacterial family of spore coat proteins [1], as well as a family of secreted pili proteins involved in motility and biofilm formation ([2]). This family is distantly related to fimbrial proteins. 22.40 22.40 22.40 22.50 22.20 22.30 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.07 0.71 -4.09 127 1343 2012-10-02 17:35:21 2003-04-07 12:59:11 10 3 332 0 336 1103 36 142.70 23 81.23 CHANGED sssshsls..sslsssC...slsus.............slsFG..............ssssssssssss.usl...slsC....o......sssshslslssGtssssss............Rphts..uss......hlsYpLYpDuupoph..Wusssssshsss.................sussp....sls...lYGcl.............ssssss.uG....sYsDTlslT ..........................h...shsss...hslhssC....slsus.......................slsFG..............ss.s...s..s.ss.s..s.ss.s...usl....slp.C..........o.........sssshslslssGtssssss............Rphts.....suss......hls.YpL.Yp....-u.up..o.p.h........as.ssssts.sssst..................sussp............sls...lY.upls.....spsss...s..sG........oYsDTlsl.................................................... 1 48 107 209 +5057 PF05230 MASE2 MASE2 domain Galperin M anon Galperin M Domain Predicted integral membrane sensory domain found in histidine kinases, diguanylate cyclases and other bacterial signaling proteins. 25.00 25.00 25.00 26.60 23.70 24.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.10 0.72 -4.01 21 590 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 566 0 64 308 10 89.60 59 24.31 CHANGED +RhYhsRhlGhslshhsluuslhstshsthlWlLLlhtuhlWPHlAaphApR..uppPhcsEptNLhlDuhhGGhWluhMtFssLPolslLsM .......RRlRLPRAVGLuGMF.LPIAosL..VopPssG..WWWLlLVGW..A.FVWPHLAWQlAuR...AlDPLp..pEIY.NLKsDAlLuGMWlulMGVNsLPSsAhLMh.......... 0 9 19 45 +5058 PF05231 MASE1 MASE1 Galperin M anon Galperin M Domain Predicted integral membrane sensory domain found in histidine kinases, diguanylate cyclases and other bacterial signaling proteins. This entry also includes members of the 8 transmembrane UhpB type (8TMR-UT) domain family [2]. 29.50 29.50 29.50 29.60 29.40 29.40 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.95 0.70 -5.40 33 2684 2009-01-15 18:05:59 2003-04-07 12:59:11 9 142 1051 0 372 1651 78 274.00 22 40.20 CHANGED hhlhhhhhhhlshhhsht.shshssssthslW.hPsGltluhhlhhshpthPsll......lushlhp........hhtthshhhshshshsssltshhushllchhhttttthpph.........................hhhh.sshhsshhsAssshshhhhhshhshs.hstshhhahlushsGsLllsPhshlhhphhhpphth........phh.p..ht.h.hh.hhhhhhhlhhhhshhh.hhh.s.........t...............hlhhssllasAhRaGhpGusl.hhhlhusllhhhththst.hsh..t......h.lp........hal..uhtshssl...hlushlpcpc ......................................h.hl.hhhlhhlh.h.hh..h..h....p.ht.h.h...s..s.........hs..l..a..hPhulh.l.u.hhh...h....h....s....h.t..h.hPslh..............huhhltp.............h..hht....h....s.....h...h.t..s....h......s......hh.....h.h...th...l.....s.l..ls.s....sll..Rhh.h..p..p.s.hshh.......................h.hh.h....hhh..hhhsshshhh.....h..h...h..p......h..s.h...s........h..s.ht..h...h.h.shlup..hs.GsLhh.sP.ls..h.h...h...h...c...h..l.h..p.....p.hh........................................h.h.p..........h.p..p..thh.hhlhhhsl.s.hh...h......h...h...h....s...p...........phhh.................hlhl.....lhah..A...h+hGh..tts..hl.............h.ls.slhh.hhshh..h..st...s.h....................................h..h.................hL...hh.ph...hs...sh....hushhpc.................................................. 0 81 199 290 +5059 PF05232 BTP Bacterial Transmembrane Pair family Yeats C anon Yeats C Family This family represents a conserved pair of transmembrane helices. It appears to be found as two tandem repeats in a family of hypothetical proteins. 25.60 25.60 33.50 26.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.30 0.72 -4.23 181 1557 2009-09-10 14:54:21 2003-04-07 12:59:11 7 2 633 0 358 1134 42 67.90 31 89.83 CHANGED +ohtpRlhHAlhFEhshlhlslPlhAahhshulhpshllsluhshhhhlasalaNhhaD+lhs..h.tsp .......+ohtpRlhHAlsFEshhlhlssPlhAahhshol........hpsh..sLslshslhhhsashlaNhhaD+lhs.t...hhh.... 0 55 137 243 +5060 PF05233 PHB_acc PHB accumulation regulatory domain Yeats C anon Yeats C Domain The proteins this domain is found in are typically involved in regulating polymer accumulation in bacteria, particularly poly-beta-hydroxybutyrate (PHB) [1]. The N-terminal region is likely to be the DNA-binding domain (Pfam:PF07879) while this domain probably binds PHB (personal obs:C Yeats). 21.40 21.40 21.60 26.40 19.70 20.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.99 0.72 -4.22 72 555 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 403 0 227 497 146 40.90 38 29.37 CHANGED hssshLpQlIphYGsshQuhhssYLEpShphFtctQpphpc ....hssshLsQlIphYGssMQGhMssYLEpShphFhchQpphpp......... 0 48 136 179 +5061 PF05234 UAF_Rrn10 UAF_rrn5; UAF complex subunit Rrn10 Wood V, Bateman A anon Wood V Family The protein Rrn10 has been identified as a component of the Upstream Activating Factor (UAF), an RNA polymerase I (pol I) specific transcription stimulatory factor [1] 20.80 20.80 20.90 21.10 20.10 20.30 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.44 0.71 -4.08 2 57 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 56 0 37 47 0 105.20 36 57.86 CHANGED Ms.N..sh.sh.Nllpt.GphshpAc-llt...D.uVPIP....cEl......D...........PDl.l+slp.aATphhLsph.+.hpshDEouLlsLGhLlpcWhcshlTsh.pE.tcp ..............................................................................................l.lPh..hp.h...............sphlP..D.D.L+slHYY.Ao.ph....s...L...s....+.....h.........chh..p...uhDETuLIsLGlLlEcWsc-hlst........t.tt................ 0 4 14 29 +5062 PF05235 CHAD CHAD domain Aravind L anon Aravind L Domain The CHAD domain is an alpha-helical domain functionally associated with the Pfam:PF01928 domains. It has conserved histidines that may chelate metals [1]. 22.70 22.70 23.10 23.50 22.40 22.60 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -12.01 0.70 -4.78 186 1090 2009-01-15 18:05:59 2003-04-07 12:59:11 9 11 934 2 406 1065 129 229.00 20 53.54 CHANGED pt.hhtpplp......phhtp..pssl.tps.........-s..EslHphRVulRRLRohlpha.ps.hh.....pthtthp.pcl+tluptLGssRDh-Vhh........tpht..............sthttt....................................................................shttlhptlppp+..pps....tpphtttLputchpphltpl....pp.hl.tt........................................................................................t...htphssptlp....cthc+ltcthpp.t................................................p..sp.p.h.HclRhtsK+LRYshEhht....sh.............sttt..hp....phhcpl+plQ-hLGphpDhsl.ttph.lpph ..........................................................h..hhtpthtphhtt...pttl..pps.........ss....-ulHphRVuhRRLRohLpha...ps.hh......s...tt...tss....tlppcL+hLuphL.......G.hRDh-Vhh......ppht...............htth...............................................................................................shpt.lhpthppp.p.......pt..s....tp.p....h..h...p..t..L...p....o..tca..tp.Llh.sl....pt....hl..s.t..................................................................................................................httshpphsspplc.....+thccl.h.ct.httht...............................................t..............p..sp.t.hHplRhtsK+lRYshEhht.......h..........................................tt......hp...........phhpthctlQshLGphpDhssttthl...h......................................................................... 0 122 261 346 +5063 PF05236 TAF4 Transcription initiation factor TFIID component TAF4 family Wood V anon Wood V Family This region of similarity is found in Transcription initiation factor TFIID component TAF4 [1]. 24.10 24.10 24.60 24.10 23.90 24.00 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.79 0.70 -5.11 26 410 2009-01-15 18:05:59 2003-04-07 12:59:11 9 8 239 2 271 367 0 237.20 28 39.18 CHANGED lNDVhusAGVsLpEEEthLhus.............ophhtshscpsccps......FLp.ttLtphlpcl.................................u..pcps...........hphss-lhpLlSpAscphLpsllpKhhllScHRpcuhK................................t.phthpS-VptpL+tls.hpKp-c-R+tp....cctthhL............Ec.pp....tcstt.s+t-s-chppRsANsTAthtlG.....s+KKaphhsusst..............Spss.....sht.tsstp.ssh......ptp.......hRtt.s..................Ish+DlLhsLEp-R.hs.p......phlhKuYt ....................................................................................lsDlhshuGVslpEEpt.lhts.................o..phhtp..hpp.p...pc.ps.....................F...L..s..Lpp+l.h.cl..............................................s...pcps..............lpphps-.llshlS......tAsppRLpsllcchhhluppRhps.h+.....................................................................sstp.ht..ssD.l+t.pL+hhpp...h-ph-cp....+..+pp......p.-p-.......hh...h+t....u.c..sp.........................stt.....-D..s-p.ph.Kp+sK.c....h.....ppt-.......tp.h.p.p+.s.ANhTAhtAlG............s+K..K...hph.sss.s...........................................stst...........s....s.tst.t..t.............................hc.t.p.....................................................lsl+DllhshEp-tth..p.......hh.....h................................................................................................................. 0 79 127 203 +5064 PF05237 MoeZ_MoeB MoeZ/MoeB domain Cortese M, Bateman A anon Cortese M Domain This putative domain is found in the MoeZ protein and the MoeB protein. The domain has two CXXC motifs that are only partly conserved. 26.90 26.90 26.90 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.74 0.72 -4.39 41 4751 2009-01-15 18:05:59 2003-04-07 12:59:11 8 17 3320 13 1252 3468 1698 83.20 33 27.48 CHANGED spsPCYRClaPp....st.sssCspu..GVlGslsGhlGslQAhEAlKllsGh..ucslsu...pLLhaDuhsh.cacpl+lp.+cssCssCGsps ..........................t.tsPCYRCLa.sp............stss.h.o.C.s.pu...G..V........luPllGl.lGolQAhEAl.KlL.s....Gh.........ups.h.sG...................+Llha....D.....uhs....h..p.......a+p.l.p.lp...+s.s..sC.s..sCGt..t................................ 0 397 765 1050 +5065 PF05238 CENP-N CHL4; Kinetochore protein CHL4 like Wood V anon Wood V Family CHL4 is a protein involved in chromosome segregation [1]. It is a component of the central kinetochore which mediates the attachment of the centromere to the mitotic spindle [2]. CENP-N is one of the components that assembles onto the CENP-A-nucleosome-associated (NAC) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [4]. 25.00 25.00 32.30 25.50 20.10 24.20 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.07 0.70 -5.42 29 193 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 158 0 124 186 0 374.10 25 94.03 CHANGED s.ss.slh+.hLs+Ls+ssLlsLshpWhppps.................................t.h..tt.lpplpp......hppLpptpsp........KRpll-+IlptsWppGlsLhQlA.lDhth.lh-+Psuh.pWsshplp..................tcsspslsphcsspFlspLppplpslhcsahhl...sRcssLPllhlRIhlhs.s..p.......................ts.psthsop+shYlsasssoPalapSh..sss.h.t...........................+llhpuls+ALotsppphtl...pssphss+s.LpuLhslpG.su....Rps..suhGuao.aA-ssh.........................-.sPL.ssh.pp....tp.................psppp.sppsspppp...........................+pcphspt+Fusospsh............l-phph+l.s..sup.s.t................................................hppspppsplplphsGsclhuGl+cLs......ctGllDspchPuWhT....G..Ecusosuhl+cG .....................................................s.ss.hlh+.tlh+lshspLhslhhtW...pp.......................................................ppLpphphp...........Kcpllp+llphs.ccu..lolhQlA.lDhha.h.p.+spph.h.....Wsshph.................................tpss.t.sl...sthc.ppFhpshppplp..hh+shhhh...hRctp..shlhlRIhh..............................tsthst.sph+shYlsa..spoPa.lahSh...sp....................................chhh.hlspALohsppphpl....thsLss+s.LcsLhslhh..t....tpp..tutusao.at-ts.h..................................c..sL.p......t..........................................ptt.h..ppp..tpc..........................................+hpphsptpFGssspst...............................................lchhp.h+lps.h.psp.sts..........................................................................st.ppp..phhlpFsGsclhtul+pLs.......................tGlhDs..hsshhTs..ccuhs.hhlcpt........................ 0 22 49 90 +5066 PF05239 PRC PRC-barrel domain Aravind L anon Aravind L Domain The PRC-barrel is an all beta barrel domain found in photosystem reaction centre subunit H of the purple bacteria and RNA metabolism proteins of the RimM group. PRC-barrels are approximately 80 residues long, and found widely represented in bacteria, archaea and plants. This domain is also present at the carboxyl terminus of the pan-bacterial protein RimM, which is involved in ribosomal maturation and processing of 16S rRNA. A family of small proteins conserved in all known euryarchaea are composed entirely of a single stand-alone copy of the domain [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.33 0.72 -4.11 114 6817 2012-10-02 14:14:57 2003-04-07 12:59:11 11 26 3995 121 1960 4958 1373 76.30 21 44.59 CHANGED .p.hhhsclhGhpVhsps.G...cplGpl..p-lhl-ht.ssclhhlhlshst...............cphhlPhp......thchtpctlhlpshptth .........................................hhhp-lhGhp..V.hsts..G...................pp...l..G..p.l...p-l.......l....css....u.....s.....c...lhs.......l..t.h.s.t.t.........................................cchl.lPah.............h.pl.cls.s.ctl.hlp......h................................................... 0 622 1275 1636 +5067 PF05240 APOBEC_C APOBEC-like C-terminal domain Yeats C anon Pfam-B01590 Domain This domain is found at the C-termini of the Apolipoprotein B mRNA editing enzyme. 23.50 23.50 23.90 24.30 22.80 23.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.01 0.72 -4.56 45 86 2012-10-02 00:10:39 2003-04-07 12:59:11 9 5 33 4 13 662 0 54.00 44 23.25 CHANGED cspapcGL+pLppuG....splplMsap-FpaCWcsFVcpp.............tps.FpPWcpLpc...p.phLp ......s.apcuLphLppu.G....AplsIMsYs...EFpaCWcsFV-pp.............spP..Fp.PWcsLc-...pSptL.p................. 0 7 8 9 +5068 PF05241 EBP Emopamil binding protein Moxon SJ anon Pfam-B_7320 (release 7.7) Family Emopamil binding protein (EBP) is as a gene that encodes a non-glycosylated type I integral membrane protein of endoplasmic reticulum and shows high level expression in epithelial tissues. The EBP protein has emopamil binding domains, including the sterol acceptor site and the catalytic centre, which show Delta8-Delta7 sterol isomerase activity. Human sterol isomerase, a homologue of mouse EBP, is suggested not only to play a role in cholesterol biosynthesis, but also to affect lipoprotein internalisation. In humans, mutations of EBP are known to cause the genetic disorder of X-linked dominant chondrodysplasia punctata (CDPX2). This syndrome of humans is lethal in most males, and affected females display asymmetric hyperkeratotic skin and skeletal abnormalities [1]. 31.00 31.00 31.20 31.30 30.80 30.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.48 0.71 -5.29 46 404 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 197 0 278 388 0 182.60 27 79.73 CHANGED llshhhsssssllssshhls......thh..phstsc+hshhWashsu.hlHhhhEGhFlhpptphss......................................spshh..........................upLWKEYupuDoRYhss.....DshllslEslTsllhGPLuhhlshtlhpp...................cshRahLQlllshupLYGshlYassphhcuh........shshsp.sp.hYaWhYalhhNslWlllPuhllhpohppls..tAh.sth.....pps .....................................................................h.....h.h.hhlh.hshhhs........hh.......ths.t..hc+hhhhWashs.................u.hl....HhhhE.G.Flh.phh.h.hs............................................................spshh...........................upLW...+...EY.u.pu...DuRYhss........DshllshEhl.Tsh..lhGPluhhhsahlhpp..................................c.shp.a.hl.plh.lu.l....u..plY.G.....shhYas..sphhsu..................s.shsp.s.p.hY.hW........hYhl.h..hNs.lWlllPhhllhpuhtplstuht.....t............................................ 0 68 140 213 +5069 PF05242 GLYCAM-1 Glycosylation-dependent cell adhesion molecule 1 (GlyCAM-1) Moxon SJ anon Pfam-B_7429 (release 7.7) Family This family consists of the lactophorin precursors proteose peptone component 3 (PP3) and glycosylation-dependent cell adhesion molecule 1 (GlyCAM-1). GlyCAM-1 functions as a ligand for L-selectin, a saccharide-binding protein on the surface of circulating leukocytes, and mediates the trafficking of blood-born lymphocytes into secondary lymph nodes. In this context, sulphatation of the carbohydrates of GlyCAM-1 has been shown to be a critical structural requirement to be recognised by L-selectin. GlyCAM-1 is also expressed in pregnant and lactating mammary glands of mouse and in an unknown site in the lung, in the bovine uterus and rat cochlea [1]. 25.00 25.00 36.40 36.40 20.70 20.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.74 0.71 -4.24 4 14 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 11 0 4 22 0 122.40 48 88.94 CHANGED hLstsKDElahcoQPTDA..uAQhh....hSpcpsSScDLSKEPSI.RE-LlSKDsVVIcSs+.PQNQpsp........cuLRsuooQpEETTc.ssuuAoToEGKLscLoppl.KpLtpslcthlshlcslhssASclVKP ........LscscDEhHhcoQPTDA..uAQhh....hSc-plSscDLSKEsSI.+EELlSK-sVVIcSs+.spsQpsp........cslRsus.Q.EETTc.ssp....uAoToEGKLscLuppl.+pLtpshcthhshlcslhssAscls+P... 0 0 0 0 +5071 PF05244 Brucella_OMP2 Brucella outer membrane protein 2 Moxon SJ anon Pfam-B_7448 (release 7.7) Family This family consists of several outer membrane proteins (2a and 2b) from brucella bacteria. Brucellae are Gram-negative, facultative intracellular bacteria that can infect many species of animals and man [1]. 19.20 19.20 19.40 21.90 18.30 19.10 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.70 0.70 -5.21 2 23 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 16 0 0 20 0 179.50 87 61.52 CHANGED MRSLQlEGsLhtu..LpTTPDSPIAAPLTVVLVRRRCGRICALQP.RE.VGDVDVATNLsSPFFDDRVIGNNTSDRTPAASIhQAANNVRHVsVDhV.....lsssVhATLFESDHSREAVSAuEGVtDLAGAVGASRDHVIVDDIsEVTGEGMEFtFlDTNhpTTchDIpcLp.GAuA........YAtI.....p.............................sIslhstlsVlAhh MRSLQlEGsLhtu..LpTTPDSPIAAPLsVVLlRRpsGRICALQPNRELVGDVDVATNLGSPFFDDRVIGNNTSDRTPAASILQAANNVRHVAVDVVVGRASVTTVVVATLFESDHSREAVSASEGVRDLAGAVGASRDHVIVDDIAEVTGEGMEFRFIDTNAQTTELDIRELHDGAAA.GFITIFTIYARIVRSIVEAQFGEGLEGAEFGFRTGGNAECETSALVPAIAVGTGVNVIAAL...... 0 0 0 0 +5073 PF05246 DUF735 Protein of unknown function (DUF735) Moxon SJ anon Pfam-B_7611 (release 7.7) Family This family consists of several uncharacterised Borrelia burgdorferi (Lyme disease spirochete) proteins of unknown function. 23.40 23.40 24.60 24.60 22.90 22.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.31 0.71 -4.57 2 138 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 28 0 10 101 0 180.50 68 99.27 CHANGED hcIPphhcNTpIcKFIpsEh-YtptlLpELKpL.pNF.oINVhpsIpS+YIAlhh.plhshFahKpph.pslssslsulIFAl+pIGTDESFhllFKsFLpsslEVoosE..sG.I.IpLhGsIKoshph.Iu..sKptp+LKKIhh+.....aAu.KKALs.N.hPKsYcpSlYtFIKhlIPIGRllK........IpsTcspph.Thsp .....MKIPNhhcNTEIcKFIhTEhEYAQtLLNELKpLNSNFlSINVhENIKSRYIAIWISQVLSIFYAKTQTLQSITSNINSVIFALRHIGTDESFRLIFKAFLNVDI-VT...TPE..AGVIDISLKGsIKTNFTTFISP..STKKGKR....LKKIllREKK.GYAASKKALVFNSLPKGYDHSIYAFIKtIIPIGRVLK........INspcGpNIITFNN......................... 0 6 6 6 +5074 PF05247 FlhD Flagellar transcriptional activator (FlhD) Moxon SJ anon Pfam-B_7623 (release 7.7) Family This family consists of several bacterial flagellar transcriptional activator (FlhD) proteins. FlhD combines with FlhC to form a regulatory complex in E. coli, this complex has been shown to be a global regulator involved in many cellular processes as well as a flagellar transcriptional activator [1]. 21.30 21.30 21.30 23.70 21.00 21.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.12 0.72 -4.03 29 772 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 693 6 122 317 10 100.70 60 90.55 CHANGED Ms.ss-lLp-I+-lNLSYLhLAQphlppD+A.sAhFRLGISc-hADlLtpLo...AQllKLAsoN.LlC+FRFDDpt.llshL.Tpp.....s+scslsphHAuILhAupsscpl ...............MpTSELLKHIYDINLSYLLLAQRLIspDKA.SAMFRL.GIsEE.MAssLuuLT..LPQMVKLAETNQLVCHFRFDsHQ.TIopL.TQ-............SRVD....DLQQIHTGIhLSoRLLp..s...................... 0 7 46 85 +5075 PF05248 Adeno_E3A Adenovirus E3A Moxon SJ anon Pfam-B_7497 (release 7.7) Family \N 25.20 25.20 25.40 37.00 21.60 24.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.78 0.71 -4.03 11 109 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 83 0 0 74 0 105.20 56 97.52 CHANGED MssssA-hsRL+.....HhcHCRR.+CFAR-slphsYFplPc-H.pG........PuHGVRlslpsshcS+hl+hhstRPlLsEpcpGsoplolhCICs.pPuhHpsLhstLCutYN+s .........pusuAELARLR.......HLDHCRRhRCFARE.utthIYFElPEEHPpG........PAHGVRITlEGsh-S+LlRhFoQ+PlLlE.R-pGsTTlTlYCICs.pPtLHEshCCpLCuEFNKs............... 0 0 0 0 +5077 PF05250 UPF0193 Uncharacterised protein family (UPF0193) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 23.30 23.30 23.30 25.90 23.20 23.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.56 0.70 -4.76 5 117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 86 0 77 119 0 176.20 35 81.17 CHANGED hAupWPSsRVAKG..GlF..HosKApYTKETQDLIKVLMEESKLThLQR++Is-+LRsGEPLPlPcpP+hpQphs...phLs........Msst++NhKKRSLpsI.tSGAac.EhYh.PtcsK..sEKpKpKLQppMA.Gp+l.PDsGhRK+hPR++u-csh-hsEsDRhsELL-EINERsEWLsEMEALGQGKKYRslIpsQIAEKLRELccLDR++SpEs-huht.l ...............................................................................................................t.l..G..sha..ps....stYo.tTp-Ll+lhMcESKlo..hpp+pl.cph+pG.psLPh......p.....Ppsspp.........t..s...........................hh......t........h...+sh...p....hpss.sAYppp.a+..P.p.ss..+........hEKpKp+.LQs..hhA..G..............-.t...+.............................s.........h.tt....cp..t.....c..DhhpE..LlpEIpERtEFLs-MEuLGp.G+.pY.+shIhsEIup+l+chctl-pphp.p.p......h............. 0 29 37 56 +5078 PF05251 UPF0197 Uncharacterised protein family (UPF0197) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 23.20 23.20 23.20 23.90 22.90 22.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.62 0.72 -3.67 6 160 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 121 0 102 147 0 75.10 54 77.54 CHANGED l-pMsRYlSPVNPAVaPHLAsVLLuIGhFFsAWFFVaEVT..........SoKasRslaKELLISLsASlFLGFGlLFLLLhVGIYV ................MpRYsSPVNPAlaPpL.slVLLuIGhFFTAWFFl...YEVT......................So..KhsRslhKELLlullASlFhGFGs....LFLLLhVGIYV.................. 0 37 49 79 +5080 PF05253 zf-U11-48K UPF0224; U11-48K-like CHHC zinc finger Bateman A, Andreeva A anon Andreeva A Domain This zinc binding domain [1] has four conserved zinc chelating residues in a CHHC pattern. This domain is predicted to have an RNA-binding function [1]. 21.10 21.10 21.60 21.30 20.60 21.00 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.14 0.72 -6.90 0.72 -4.20 84 695 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 184 2 441 662 1 26.70 35 12.17 CHANGED clhsCPasssHpl.tpclptHltp.Cpc .....hh.CPasssHpl.tsclppHltp.Cpp.... 0 121 173 310 +5081 PF05254 UPF0203 Uncharacterised protein family (UPF0203) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 21.50 21.50 22.00 22.30 21.20 20.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.60 0.72 -4.12 19 246 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 207 0 182 247 0 68.30 36 54.14 CHANGED Ms..ShutECs-hKccYDpCFNcWYuEKFL....KGctsp..stCschaccYppClppsLpc+sIp......ph.L-pu+cc ............Ms..Slu.pCs-lKccYDsCFNcWauEKFL...................K.G..pssp.......stCsplacpYppClpculcc+tIt......ph.httt+................................. 0 65 101 149 +5082 PF05255 UPF0220 Uncharacterised protein family (UPF0220) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 25.00 25.00 25.70 25.50 23.00 23.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.02 0.71 -5.00 24 399 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 275 0 246 338 0 149.70 34 93.88 CHANGED Mst..p..haRh.....ch.tsspt+phulYlAGsLFuhGaWhhlDAulaSpps.p.st...l.......HlsFlDWlPhlhSTlGhllVNSI-KupLp...s-uhup.............usssAhtARlhLFlGFuLhAGGluGolhVhllKallpth.shsslhhGlANllsNshIhlSsllLWhup.shEDEYs ...............................p...h+......ph..stptpshushsAGsL........FhhGaWlhlDAulh.sp..p......................................plsas..ahsGlhuoluhlhlNuVp.puplpu.-uast.....................GshsthtARlhLF.lGFhLhhGuLhuShhlLhh....talsp......................ths...sla.GlAshh.pNshlhhu.sllhhhup...hE-.a............................... 0 76 123 193 +5083 PF05256 UPF0223 Uncharacterised protein family (UPF0223) Bateman A anon SWISS-PROT Family This family of proteins is functionally uncharacterised. 25.00 25.00 31.60 31.50 24.90 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.91 0.72 -3.87 35 928 2009-09-10 20:58:59 2003-04-07 12:59:11 7 3 925 2 80 364 1 84.60 50 95.57 CHANGED hsYpYPLDh-WSTEEllsVlsFastVEpAYE..pGlcp-cLLssY+pFKplVPuKuEEKpls+-FEcsSGYSsY+sV+pAKcssct.hlph ...hpYpYPlDh.s.WosEEhhtVlsFaspVEpAYE..suVpsccllsuY+pFKplVPSKuEEKpls+EFEpsS.GYShY+uVQtAKppsct.plp.l............. 0 18 40 63 +5084 PF05257 CHAP AXE; CHAP domain Bateman A anon Pfam-B_2845 (release 7.7) Domain This domain corresponds to an amidase function. Many of these proteins are involved in cell wall metabolism of bacteria. This domain is found at the N-terminus of Swiss:P43675, where is functions as a glutathionylspermidine amidase EC:3.5.1.78 [1]. This domain is found to be the catalytic domain of PlyCA [4]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.95 0.71 -4.47 47 5579 2012-10-10 12:56:15 2003-04-07 12:59:11 11 106 1721 22 443 2917 78 122.10 25 30.57 CHANGED pthttpssshst.tsastsQCs.astphhtphss...........h.phhGNAts..........Wstss.thpua........ssstsPpsGslhsa..........tsssssYGHVAhVppV...tsssslpl.EpN..........h..tshshtshcph.st.......s.hsaIh ..........................................................tht........hsttphYstuQCT...ha..shphh..hus...............h.s.hh...G....s...Aps......................Wss.sA...ttpua......................psspsPp.s.Gu.lh.h.............tsssusYG..HVAhVppV...............s.s.s...s...l..p.luE.N..............a.....t.s...h...hhs.+sh.t........t.htaI.......................................................... 0 112 217 369 +5085 PF05258 DUF721 Protein of unknown function (DUF721) Moxon SJ anon Pfam-B_7527 (release 7.7) Family This family contains several actinomycete proteins of unknown function. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.51 0.72 -3.77 167 2098 2012-10-01 19:58:36 2003-04-07 12:59:11 7 3 2068 0 633 1679 1349 88.60 18 55.78 CHANGED sls.pllpph.h..pp..ttht.pshthsplhp.tWppll...GsplAppspshpl.c...............su........s..LhltsssushtppL.phhptpllpclspth.s.....thlpclpl ...................................tthhpph.hpt....tthp.pshthtplhp.pWppll.............G.splu....spscshpl..c..............................su........s..LhltssssshuppL.ph.hpspllpplppth.s.........tslpplph................. 0 197 420 538 +5086 PF05259 Herpes_UL1 Herpesvirus glycoprotein L Moxon SJ anon Pfam-B_7535 (release 7.7) Family This family consists of several herpesvirus glycoprotein L or UL1 proteins. Glycoprotein L is known to form a complex with glycoprotein H but the function of this complex is poorly understood [1]. 19.60 19.60 20.10 20.90 17.50 16.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.40 0.72 -4.59 14 77 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 36 1 0 71 0 104.90 33 53.27 CHANGED shshtshhssslssILphsChs....sshhhsh.sspshspsl.sGIhl+scCssPEslLWacpsppAYWVNPalsspGhh..EDltcsthssss.........+stlhpsLssth .............................hhhtshhspplusILclsCls.s..cshsaph.sss.....shs.sl.sGIhl+hcCss.EslLW.cpstpAYWVNPalhltGhh..-Dlscsuhs.sss.........hctlhpuLssh.t......... 0 0 0 0 +5088 PF05261 Tra_M TraM protein, DNA-binding Moxon SJ anon Pfam-B_7584 (release 7.7) Family The TraM protein is an essential part of the DNA transfer machinery of the conjugative resistance plasmid R1 (IncFII). On the basis of mutational analyses, it was shown that the essential transfer protein TraM has at least two functions. First, a functional TraM protein was found to be required for normal levels of transfer gene expression. Second, experimental evidence was obtained that TraM stimulates efficient site-specific single-stranded DNA cleavage at the oriT, in vivo. Furthermore, a specific interaction of the cytoplasmic TraM protein with the membrane protein TraD was demonstrated, suggesting that the TraM protein creates a physical link between the relaxosomal nucleoprotein complex and the membrane-bound DNA transfer apparatus [1]. 25.00 25.00 34.30 33.90 24.00 20.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.28 0.71 -4.08 8 222 2012-10-02 15:10:05 2003-04-07 12:59:11 6 1 171 17 11 121 0 120.70 70 98.53 CHANGED MPKlQsYVSspVhEcINsIVEcR+tEGAc-pDVSFSSlSSMLLELGLRVYEhQhE+KESGFNQhEFNKlLLEsllKophhss+ILslpsLos+lsuss+F-apsllpsIcccVpEpM-hFFP-s-DE .MA+VplYlSs-lh-KINtIlE+RRpEGA+-+DlSaSusuSMLLELGLRVYEA.....QMERKESAFNQsE.FNKlLLECVVKTQSoVAKILGIESLSPHVSGNPKF.EYANMVEDIREKVSuEMERFFPcND-E........................................................ 0 0 0 6 +5089 PF05262 Borrelia_P83 Borrelia P83/100 protein Moxon SJ anon Pfam-B_6712 (release 7.7) Family This family consists of several Borrelia P83/P100 antigen proteins. 25.00 25.00 25.20 25.20 24.60 24.20 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.78 0.70 -6.22 3 126 2009-09-13 12:24:55 2003-04-07 12:59:11 6 7 67 0 21 122 3 520.80 46 87.98 CHANGED ++ElDpV.NtTlEFlNYsGPHDsVDSA-sIRGIGEsLAuAL+uGsAGDp..uRYuVIHsVDPpsKcGhDADIFIIGupAcVDHIsNlRcIlAGYLpAAYGYccpDAcTIA+FlTIYNAVYRuDLDaFKp+YKpVVTKsLTKE+AGLARRYDQWPGKTQIVIPLocptaSGsl.SuVDTsSISDKKVVE+LREDs-KsV-hRRDMlDL..............................................KERESQEuAKRAQ.hKcEuDpKQc-ADKtppcADpAQcsADKQR+EscQKQQE.......AKNuP-PAcTousKEDK+V...........A......EppK+EhEKuQpEscKssEEApKAKDpt......A--LK+E..uKupEK.......hAEcKotEAQ+-Rc-VAu........................................DhQKhhsps+AcsTutu--....AI-SSsPsYGLKVVDsp+hLSELVLlDLKTEscLRoSulpTIRsRsLYpcsKsLVAIAsT.SGNAslpLVcIDsKoLEVlKESspcIAupSsLl+sup.hhsul..pDDssKWhlu+assK.L-..hhStsclLPaTsa .......................................................................................................................................................................................................................................cKL+DFV..N.hDLEFVNYcGPYDSosTYEQIlGIGEFLA+s..Lh....su..sS..NSsh....hGKYalsRaID-pDKKu.SsDlFSIGucSpLDSILNLRRILTGYLhcuFcY-+uSAELIAKsITIYNAVYRGDLsYYKthYIpsuLKsLoKENAGLSRVYSQWAGKTQIFIPLK+sILSGpl-SDIDlDoLVT-KVVsuLLo.ENE.u.GVsF.AR..DlTDIQsEh+csDQcKIDhE.sshcp.c......................................................................sp.ppphps.pcp.pctos-ppcKE.IESQlDAKK+Q..KEEL.D.....cK..shc....LDKAQ.......QK..L.D...tuc.-sLDlQ.RcsV.+EKlQEsIschN+-KNLP.KPG.DV.SSPK.V....DKQLp.........tphQtp.p.........cssDEsQKREIEK..QIEIK..KsDEE.LlK.sKDt+.......AhDLpp-.....SKuSSKp................................p.hsEcKch.-......uK+phcsluc..............................................................................hsLQcscsQs+sps..T..shsp-........ul-SusP..VF...LEVI..D.Ph.T..NL.GsLQLIDLsTGscLKcSsppGIpR.....YGlY.ER-+D........LV...................VIKh-.SGc....AKLQ....lLs.KhENLKVlSESsF.EIs+sS.SLYVD.S+MILVsV..cDsussW+LAKFSscsL-cFlLSEscIhPFTSF................................. 0 11 14 16 +5090 PF05263 DUF722 Protein of unknown function (DUF722) Moxon SJ anon Pfam-B_6789 (release 7.7) Family This family contains several bacteriophage proteins of unknown function. 29.80 29.80 29.90 31.40 29.70 29.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.73 0.71 -4.01 7 87 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 63 0 15 69 1 127.80 31 92.67 CHANGED MADKLDRIIuDYlsG+LpA+IKuhE.Ralh+pKsD.NLGhRTA.sGsuc..uphLppEt.EsDcELh+L+cphphlshaacsLhtp.EKclIpL+apthtthoWYpVh.cLs......ls.ppA+phahpF+psIhph ..........................MAD+lDclluDYhsGhlpscIcth..c...chhh..+.ppsD..NlGhtsu.sssucsEscslhp...EsDccLt+Lcc.hphl-hhhpsLhs-....-KclI.pL+appt.tthTWhpVu.cLs......lscppA+phhhpF+p.l...h........................... 0 5 5 11 +5091 PF05264 CfAFP Choristoneura fumiferana antifreeze protein (CfAFP) Moxon SJ anon Pfam-B_6800 (release 7.7) Family This family consists of several antifreeze proteins from the insect Choristoneura fumiferana (Spruce budworm). Antifreeze proteins (AFPs) and antifreeze glycoproteins (AFGPs) are present in many organisms that must survive sub-zero temperatures. These proteins bind to seed ice crystals and inhibit their growth through an adsorption-inhibition mechanism [1]. 25.90 25.90 27.10 26.90 25.30 25.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.89 0.71 -4.40 4 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 4 17 0 49 3 59.70 45 103.26 CHANGED MKhhMLIMALA.IsTVSSDGoCsNTNSQlotNSpCV+SThTNCYIDNSpl...............................YsTTCTGSpYDGVaITSSTTTGTpISGPGCoISoCTITtGVsAPSAAC+ISGCTLpAN ..............................lo.NS.C.pSThTNC.lspSpl...........................pSpl.toTCTsSpas.GlhIToSToTsoph.................................... 0 0 0 0 +5092 PF05265 DUF723 Protein of unknown function (DUF723) Moxon SJ, Finn RD anon Pfam-B_6852 (release 7.7) Family This family contains several uncharacterised proteins from Neisseria meningitidis. These proteins may have a role in DNA-binding. 20.90 20.90 21.10 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.12 0.72 -4.17 6 74 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 23 0 6 29 9 60.50 46 34.44 CHANGED huhTFppAtochs-+FPc..lpLlcFsGltcPsoIsCPhHGsVshSsa.puhI+SKaGCPcCu .....................huhoFppAtu+hpp+FPc...lsLl-FsGlhtPsolpCPhHGsVphusa.puhl+.SK.h.G..CPcCu. 1 6 6 6 +5093 PF05266 DUF724 Protein of unknown function (DUF724) Moxon SJ anon Pfam-B_6894 (release 7.7) Family This family contains several uncharacterised proteins found in Arabidopsis thaliana and other plants. This region is often found associated with Agenet domains and may contain coiled-coil. 25.10 25.10 25.20 25.10 24.80 25.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.22 0.71 -4.68 15 90 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 12 0 62 86 0 176.20 27 27.11 CHANGED hhlP....FsK+tshWKshEohEVFKplPQuPHFsPLh.cscE-hREhsAlGhMloFhsLL-cVpsLplD-shSplpslspsFsELEKHGFNVpsPpSRIsK...lLoL+scpocph-EhKshEKchs-c...........p..-sthschEccIlELpRpt.hhpctKEst-.......pEIuphcopAsplcQplpss-h-FposhuAPW ..............................PFsKpt.hWp.hE.sh-VFchlPQpPHFpPL..ph..thREhhAlGhMhoFssLl-plppLpl-D.s.ts.hpshhc.slscLEc.pGFsVpslpsRLsc....LLsl+scpsphhcct+ph-cphtcc.......t....pph-pphtcl-pclh.....cLccpt..htphppttp.................t-lsphpspsptlppphtshchcFpshhst.............................. 0 10 24 45 +5094 PF05267 DUF725 Protein of unknown function (DUF725) Moxon SJ anon Pfam-B_6905 (release 7.7) Family This family contains several Drosophila proteins of unknown function. 20.80 20.80 21.10 22.90 19.90 19.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.77 0.71 -4.22 11 174 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 15 0 81 172 0 122.40 25 50.34 CHANGED sssshospCFshYlPhlNpluspasssYssClssAssptpslssphppsppslppSuppsCssh.psC.......sohssshshFpCaAssusssspshYsISuNAopuAsplpcphpslcspc.pCsNpTp+s .............shpCFshYhshhsplsspYstsYstChsstpssppplspphpppppplpssspphCssl.psC.......s..sh.sss.h.s.hFsCaussuspsspshaslusNAo..ptssplppphphlphpcttCsspupp.......... 0 11 12 50 +5095 PF05268 GP38 Phage tail fibre adhesin Gp38 Moxon SJ anon Pfam-B_7415 (release 7.7) Family This family contains several Gp38 proteins from T-even-like phages. Gp38, together with a second phage protein, gp57, catalyses the organisation of gp37 but is absent from the phage particle. Gp37 is responsible for receptor recognition [1]. 25.00 25.00 31.70 29.40 20.10 19.90 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.92 0.70 -4.96 8 76 2009-09-11 08:17:54 2003-04-07 12:59:11 6 1 63 0 0 45 2 168.10 45 71.99 CHANGED MAVVGlPGWIGpSAVsETGQRWMsuAApcLRlGlPsWMSsMAGRS+EIIHTLGADHNFNGQWFRDRCFEAGSAPIVFNITGDLVSYS+DVPLFFMYGDTPNEYVQLNItuGVsMYGRGGNG....sussusGosGGcsIQNDIGGRLRIsNsGAIAGGGGGGGG...su..suatsphssGGGGGRPFGsGG...suuthSGGsAS....louPGuGu...ussstasGGsGG-VGuuGGpuh...GtssppssGGAAGtAVhGSAPTWpNVGsIYGsRV .................................................sGWlGSSAVsETGpRWMuuAuspL+L.usPhaMSQMsG+Shp.h.holGt............................................................................................................................................................................................................................................................................................... 0 0 0 0 +5096 PF05269 Phage_CII Bacteriophage CII protein Moxon SJ anon Pfam-B_7453 (release 7.7) Family This family consists of several phage CII regulatory proteins. CII plays a key role in the lysis-lysogeny decision in bacteriophage lambda and related phages [1]. 21.30 21.30 21.30 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.11 0.72 -3.98 3 497 2012-10-04 14:01:12 2003-04-07 12:59:11 6 2 366 12 19 217 1 84.00 41 89.93 CHANGED MspAS....TR+EAsRIESuLLN+IAhLGQRKlA-AlGlcESQISRWKsDWIPKhSMLLAVLEWGVsDD..-lARLAKpVAclLTKKKRPsC.TERSE ...................M..ss....pp+..c..ssRhEosLL.pLuhlsQ+shAchlGspESplSR.......s.......-...ah...hhu...hL..hA...aGhss-........hu+h.h+hshs.hl.....T.pcKtPss.sE................................................... 0 0 4 12 +5097 PF05270 AbfB Alpha-L-arabinofuranosidase B (ABFB) Moxon SJ anon Pfam-B_7464 (release 7.7) Family This family consists of several fungal alpha-L-arabinofuranosidase B proteins. L-Arabinose is a constituent of plant-cell-wall poly-saccharides. It is found in a polymeric form in L-arabinan, in which the backbone is formed by 1,5-a- linked l-arabinose residues that can be branched via 1,2-a- and 1,3-a-linked l-arabinofuranose side chains. AbfB hydrolyses 1,5-a, 1,3-a and 1,2-a linkages in both oligosaccharides and polysaccharides, which contain terminal non-reducing l-arabinofuranoses in side chains [1]. 21.30 21.30 21.60 21.90 21.20 21.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.95 0.71 -4.49 11 334 2012-10-02 19:42:32 2003-04-07 12:59:11 8 53 174 16 174 358 3 131.80 27 13.91 CHANGED lS.l+soossYssRYlsHsuoTlNTpVVSSuSussl+pp.......ASapVpsGLAsuu.....ChSFESsDsPGoYlRH..tNFpLhlsANDGoctFpcDATFCPpsGlsu..pGs.SlcSasYPsRYlRHYsNlLalsusuG...a-ssssFpsDsoFhlt ....................................................................................l......tt.....s.....s.hps.sss............p.........us.atl.s.s.G.....L...us.ss.....slShES..sshPGhaL.R.H......tsh.pltLs...t..s...-.u.o..shFppDAT.F..phpsG...hss..sGh..........ohcShs..hPs.p..alR..H.h.s.....h..lhlst..........ttst.aptcsoFhht............................... 0 38 89 139 +5098 PF05271 Tobravirus_2B Tobravirus 2B protein Moxon SJ anon Pfam-B_7517 (release 7.7) Family This family consists of several tobravirus 2B proteins. It is known that the 2B protein is required for transmission by both Paratrichodorus pachydermus and P. anemones nematodes [1]. 20.90 20.90 21.10 197.70 20.40 19.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.56 0.71 -4.40 4 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 3 0 0 13 0 117.70 63 47.08 CHANGED ssDWuohWPNDpLFlsDhhpLVWFDhhsDhVchpHFsuQsssDLSsIPKuFlSFlDNRlPMCINHKGhVYIRVt.su--sYYQKFG-LDVSsFsDshLPPDh-FsFsKVshssscpl. NGDWuoKWPND+LFlDDFGKLVWFDlLsDlVcIoHFVSQsPTDLSsIPKSFISFIDNRlPMCINH+GWVYIRVKh-u--VYYQKFGELDVScFGDShLPPDFEFsFsKVTssVD+pLV 1 0 0 0 +5099 PF05272 VirE Virulence-associated protein E Moxon SJ anon Pfam-B_6573 (release 7.7) Domain This family contains several bacterial virulence-associated protein E like proteins. These proteins contain a P-loop motif. 21.50 21.50 21.50 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.18 0.71 -4.84 10 1163 2012-10-05 12:31:08 2003-04-07 12:59:11 6 20 659 0 186 1048 243 191.70 28 33.69 CHANGED YLcul..pWDGhPRl-ohFhD......aLGsEDspYsptso+hahhuuVARVa.....cP..GsKaDalllLpGsQGsGKSThlctL.GGc..WFoDo...lcshcsKDthptlpGsWIsEluELsuhs.Ku-lEsIKuFITRocDpaRssYG+pspsaPRpslhVGTTNccEaL+DsTGsRRFaPlpss.K..plsh....s-hhpt.h-QLaAEAhhhYcc .............................................................th..tWD.Gh.pR.lpt.....hhhp.........hls.......s....p....s..s..t.......h.s......p......h....++ahluhlA..+...sh.....ps....ss..+...h...c..h..s..ll.L..h.G.s.Q.G.hGKSTa....h.c.....t..L....s..sp.......a.a.oDs........l.p...t..h.....p..s..+....-.s..h.....p....t....l..pt.h....hllp....hs.E.hsu.hs...ppp..h.p.tlKshlo....cphsphRhsYs..c..p.s.pchsRpssFlGToN.p..p.-......a..L.p.DtT.Gs.RRFhslpVp.s....hch..........c.................hpQlaApAh.h...t................................................................. 0 95 149 176 +5100 PF05273 Pox_RNA_Pol_22 Poxvirus RNA polymerase 22 kDa subunit Moxon SJ anon Pfam-B_6584 (release 7.7) Family This family consists of several poxvirus DNA-dependent RNA polymerase 22 kDa subunits. 25.00 25.00 79.50 79.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.04 0.71 -4.65 10 53 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 43 0 0 34 0 182.10 69 99.44 CHANGED MNpHNV+YLAKILCLKsEIh+cPYAlISK-llp+Yss-lcYGDLVTlITVpHKlDss+TVFQVFNESSVsYoPlEcDYGEPIIITSaLQpGHNKFPlshLYIDlVASDlFP+FsRLos-ElsllsSlLQsGDsK..poLKLPKMLETElusKILYHKDhPLKlVRFa+NNMlTGlEluDRuVlsVl .MNQaNVKYLAKILCLKTEItRDPYAVIsRsVlhRYsTDIcYsDLVTlITV+HKIDohKTVFQVFNESSlsYoPV-DDYGEPIIITSYLQ+GHNKFPlNFLYIDVVhSDLFPoFVRLsssEssIVsSVLQsGDuK..coL+LPKMLETEIVsKILY+PsIPLKIVRFFRNNMlTGVEIADRSVloV.. 0 0 0 0 +5101 PF05274 Baculo_E25 Occlusion-derived virus envelope protein E25 Moxon SJ anon Pfam-B_6633 (release 7.7) Family This family consists of several nucleopolyhedrovirus occlusion-derived virus envelope E25 proteins. 20.70 20.70 211.80 211.40 18.70 18.00 hmmbuild --amino -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.15 0.71 -4.68 17 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 53 0 0 53 0 183.90 52 84.15 CHANGED upS.uDSlQhs.p.GphsVK.hNss+lKslRlhaGD..NcISKlhVuEpPLoYs-IlDcGN+.VGsNsVFlGslp-shsss................sssssRsTuNFsIKQFKNhFIVFKsl-.ocIcpsssMlRYEu-uMVYsLIDuosoolP-.LRDV.....SYPIsVhTsNussQLhLKEWsYTQINDuGTLFlKNEKSFR ....spS.SDSlphssp..GphsVK.h.Nss+lKslRlhHGD.....N...KlSKlaVAE+PLoYs-Il-cGN+pVGsNsVFlGTl.-sussS...............s.sssssRsTuNFDIKQFKNhFIVFKsl-ssKIccsssMlRaEuDGMVYCLIDussoolP-..LR-V.....SYPIsVYTsNussQLhLKEWsYTQINDuuTLFlKNEKSFR..... 0 0 0 0 +5102 PF05275 CopB Copper resistance protein B precursor (CopB) Moxon SJ anon Pfam-B_6721 (release 7.7) Family This family consists of several bacterial copper resistance proteins. Copper is essential and serves as cofactor for more than 30 enzymes yet a surplus of copper is toxic and leads to radical formation and oxidation of biomolecules. Therefore, copper homeostasis is a key requisite for every organism. CopB serves to extrude copper when it approaches toxic levels [1]. 20.80 20.80 22.10 20.80 20.60 19.80 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -10.93 0.70 -5.10 81 554 2012-10-03 17:14:37 2003-04-07 12:59:11 6 4 422 0 149 499 67 205.50 41 65.28 CHANGED ssthhthlhhDcLEhptsc..sssu.hsW-spuWhGsDhsRlhlKoEG........Ehppuc..h--u-.sphLau+AIuPaWDhQsGlRtD..htss.s....sRsaushGlQGLAPYaFEl-ushaluccGcsuhRlEuEY-lLLTQRLILQPclEsshhup-DsppulGuGLosh-hGLRLRYE.lsRpFAPYlGVsapppaGsTADhsRspGccsspsphVsGlRhWF ...................................s..th.hhlllDpLEhppsc..ss.ss..huW-spuW.lGuDhsRlal.+oE.G...........EpspGc...sEsu-...sphLau+ulu..PaWDl.uGl..RpD........hps...u..s.........sRsW..A.AlGlQG...LAPYhFEs-uosalupsGpsuhRLcuEYDlLLTpRLILQPphEsshhupcDsppuhG.sGLoss-hGLRLRYE.lp.RcFAPYlGVsasppaGpTu-hs+tpG-.p....s....ppspalAGlRhWF......................................... 0 40 89 124 +5103 PF05276 SH3BP5 SH3 domain-binding protein 5 (SH3BP5) Moxon SJ anon Pfam-B_6742 (release 7.7) Family This family consists of several eukaryotic SH3 domain-binding protein 5 or c-Jun N-terminal kinase (JNK)-interacting proteins (SH3BP5 or Sab). Sab binds to and serves as a substrate for JNK in vitro, and has been found to interact with the Src homology 3 (SH3) domain of Bruton's tyrosine kinase (Btk). Inspection of the sequence of Sab reveals the presence of two putative mitogen-activated protein kinase interaction motifs (KIMs) similar to that found in the JNK docking domain of the c-Jun transcription factor, and four potential serine-proline JNK phosphorylation sites in the C-terminal half of the molecule [1]. 23.00 23.00 23.10 23.50 22.90 22.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.60 0.70 -5.05 8 259 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 95 0 164 239 2 194.20 41 52.73 CHANGED Es-E...lDPRIQtELE+LNpATD-IN+hElELE.......................................................................EA+spFRplLhEuscKLcsluKKLGssI-KARPYYEA+chA+pAQhEsQ+AAhcFpRAsplhsAAKEpVuLAEQpLhppsp..phDsAWQ..EMLNHATp+VhEAEpp+scuEspHpcps+thppApp+lppLEcch+RuIpKSRPYFEhKtpapppLEsQKtpls-LEtcVppAKssYosALRNLEpISE-IHppRpstu....ssuscpsus .....................................................................................................................ht.plp.ELEcLNpuo-pINphEh....pL-.......................................................................-A+.ppaRplL..Eush+Lpt.sp+lG.p.s.l.-cu+PYaEA+phA+pup.EsQpAs.papRAsphhtAA+Ehl.........lAEQtlht..........cpt.....ph...DssWQ.....EMLNHATp+V.-AEpp+hcuch.HpcssthhptA.t+hptLp+p.L++uIt...KS+PYFEhKup.......a...Lcp...KtpVppLptplstA.KtpYp.AL+NLEpIS-pIHtpRpt..................s........................... 0 51 64 122 +5104 PF05277 DUF726 Protein of unknown function (DUF726) Moxon SJ anon Pfam-B_6757 (release 7.7) Family This family consists of several uncharacterised eukaryotic proteins. 22.60 22.60 22.80 23.00 22.40 22.30 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.19 0.70 -5.56 9 559 2012-10-03 11:45:05 2003-04-07 12:59:11 7 18 287 0 381 563 32 271.90 31 41.08 CHANGED ++hKRhhhlGLAsluGuhlIGlouGLtAPlluAGlushh......GssGsss...hLuus.uGsA.llsuhhushGAtlsuhtMp+RsuslcsFpFhPLpsspp............tslhVslss.h.hus.c-lptsWpsLs.s......s-hYuLtWEschLhphGpsl.slLhStshshshQpl.LttTl....................LsuLhuAlpWPhuLhKlu.llDNPWslshcRAhpAGchLA-sLhsRsh.GhRPlTLlGaSLGARlIa.CLhpLuc+.cthGllENVllhGsPssschc.WpphRoVVSGRhVNsYscsDWlLuaLaRssust..pluGhusls...hpslENlDsoslVpGHLsYpcphsplL+tlshc ..................................................................................................t....+hhhhGhAslsGuhllul..ouG.LhAPhluuuh...ushh.............Ghsuhsu.................lus.........h....u.t...s.......hhss.hshhG.uths.s.hhhtphh.t.tlppFthhslttstt....................................................plhlsl.h...h........t.p...p.shhhPa.p.hlt....................t-.aslhWEsphLhphGpsl.phl.h.st.h.h....s..s....hpph.lt.Tl................................................................................hssl.huul.hP.h....s..L.h.p.hu...h.lDNsa..slshs.RuttsG.thLA....c....s....Lh...p...+t......GpRP.lT...LlGaSLGuRlIa.CL.p...L...Ap..........c......t......shG....l..lps.Vhlh.G.uPhss..........p....t....pp....at..........h....+.pVV...u...GRhlNsY.....s..psD..alLu..................h...laR.........t.s.....t...h.........th....t..........lu.Gltslp..........t.......l-..Nhshoph.l.tu.Hh.pY.........hstlLpthth............................................................. 0 135 225 323 +5105 PF05278 PEARLI-4 Arabidopsis phospholipase-like protein (PEARLI 4) Moxon SJ anon Pfam-B_6763 (release 7.7) Family This family contains several phospholipase-like proteins from Arabidopsis thaliana which are homologous to PEARLI 4. 27.90 27.90 28.30 28.00 27.80 27.80 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.63 0.70 -5.02 6 68 2009-09-11 21:08:31 2003-04-07 12:59:11 7 5 8 0 42 67 0 216.70 24 49.53 CHANGED s-ptslhLFP-lhhs.psp.sScs.........................................................pp.ppptppEssEL..........................-hpShhS-Sa..VSVGpY+VRuSVSoTLQuIlDKHGDIAusSKLQShuTRSYYLEsLAuVVhELpSTPL+pLocsRVtEMlAVVKDlESVKIcVGWLRoVLEElsEAscaastpEssssEKEspE+clhhcKpEMEtppp-Lsc+EKElKEhRc+lpEhsu+LG-LEMKRsRL-KphshhuSKVEKF.cGcohlcc ....................................................................................................................................................................................................................................................................................h..s-sh.....VpVstYpVctShsshLptIlcKHGDIuusscLpShphRShYLEsLssllp-L..p.poslc.pLocsclp-hhuslpDl-ssplcVuWL+s...tLsE...lh..-s..hchh.......s.phctschc+ctpc+c.lpttcpEhEt...pp-LtphEpcht-hptph........http.....hsphp....th......h..p........................................... 0 14 24 27 +5106 PF05279 Asp-B-Hydro_N Aspartyl beta-hydroxylase N-terminal region Moxon SJ anon Pfam-B_6767 (release 7.7) Family This family includes the N-terminal regions of the junctin, junctate and aspartyl beta-hydroxylase proteins. Junctate is an integral ER/SR membrane calcium binding protein, which comes from an alternatively spliced form of the same gene that generates aspartyl beta-hydroxylase and junctin [1]. Aspartyl beta-hydroxylase catalyses the post-translational hydroxylation of aspartic acid or asparagine residues contained within epidermal growth factor (EGF) domains of proteins [2]. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.86 0.70 -4.52 9 236 2009-01-15 18:05:59 2003-04-07 12:59:11 6 14 40 0 49 199 0 177.80 40 49.29 CHANGED NGR+GGlSGu.SFFTWFMVIALLGVWTSVAVVWFDLVDYEEVL...............GKL.GlYDADGDGDFDVDDAKVLLG....lK-+.hsc...................................t.s..cEt-shsc.Ettlshctphpplc-ElKEQlpplhcchVhsc.......pp.ct.stE.p.-tc+hhhtsDsD-phcs.tssts+EEhEt.h..Ech.p.t.....cEhsscpcsscspEsVp..cstch+tcsscVs.psh-cp.....................pt.h-pt....Ech.h.ssEcppcsP ............................................ts..ShhoWhhVIALLGVWTSVAVVaFDLVDYcpVl..........................................GKL.ulYD.ADGDGDFDl-DAKVLLs..............hppt....p.........................................................t......h...tt.....th.t.p.....tc.p.t....t....c....................................................t............pt.............................................................................................................................................................................................................................. 1 3 7 23 +5107 PF05280 FlhC Flagellar transcriptional activator (FlhC) Moxon SJ anon Pfam-B_6773 (release 7.7) Family This family consists of several bacterial flagellar transcriptional activator (FlhC) proteins. FlhC combines with FlhD to form a regulatory complex in E. coli, this complex has been shown to be a global regulator involved in many cellular processes as well as a flagellar transcriptional activator [1]. 20.60 20.60 21.00 21.40 19.80 20.20 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.17 0.71 -4.80 23 831 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 727 2 135 359 15 173.70 64 90.40 CHANGED Mu..pKS....llsEucpIpLAhELIpLGARLQlLEoETsLSR-RLl+LYKEl+GsSPPKGMLPFSTDWFhTWpPNIHSSLFhNIYpaLhcpussssl-AllKAYRLYLEplst.pphEP.....l.......LSLTRAWTLVRFhDusMLphopCspCGGcFVsHua-.ppsaVCGLCpPPSRAGKo++suspt ..........................................................MuEKSIVQEA+DIQLAMELIsLGARLQMLES.ET.Q....LSRGRLI.KLYKE..L.R..G....SP...P.P.KG..M.LPF..S..T..DWFM..T..W.EQ.N.l..HASMFh.NAapFLL..Ko.....G..h.Cs..GV...DAVIKA..YRLYLE....Q..CPp.....s..c..-sP..L.......LALTRAWTLVRFVE......S......G.l......LpLSuCNCCGGsFI..T.H....A..H....Q....P.s.s.SF..sCSLCQ...P....PSRAVK+RKLSp.ss............... 1 10 53 95 +5108 PF05281 Secretogranin_V Neuroendocrine protein 7B2 precursor (Secretogranin V) Moxon SJ anon Pfam-B_6776 (release 7.7) Family The neuroendocrine protein 7B2 has a critical role in the proteolytic conversion and activation of proPC2, the enzyme responsible for the proteolytic conversion of many peptide hormone precursors. The 7B2 protein acts as an intracellular binding protein for proPC2, facilitates its maturation, and is required for its enzymatic activity. Processing of many important peptide precursors does not occur in 7B2 nulls. 7B2 null mice exhibit a unique form of Cushing's disease with many atypical symptoms, such as hypoglycemia [1]. 25.00 25.00 44.70 38.80 20.10 22.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.66 0.70 -4.95 12 117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 88 0 64 114 0 200.70 45 85.95 CHANGED hh......tlL.......hhsl.........tsAh.shsP..phhDplS..cschthhhcush-.................p.t.u..c.Ea.tHpu..LhG.QplpGGAtEG.p.........shst.hossslP.......uYssPPNPCPlGhT.tsDG..ClEsh.sTA-FSRcaQspQchh.DsEH.hasss.............................................sphp+sLlh+K..h+stp................tpcppcps.NPaLQGp+Lc.lsAKKsssph. ......................................................hhhh.......h......th.............sssh..uhss.......cssDpVS..-s-l.....pRLh+.GshE...........................phGlup.csEYssHpu...LhGsQpIp.GG..A...pE............GhQ.................+Lu......P.....G......N........IPN..ls....sEhT.scsl.P...................uYssPPNPCPl....GhT.scDG...CLEshssTAEFSREa....Qhp..Q....chh.DsEH.as.sh............................................................uphsKpLlhcK...h+stt...................Rpccsl.NPYLQ...G..p+.Lc.lsAKKus.hh.p........................ 0 18 23 43 +5109 PF05282 AAR2 AAR2 protein Moxon SJ anon Pfam-B_6782 (release 7.7) Family This family consists of several eukaryotic AAR2-like proteins. The yeast protein AAR2 is involved in splicing pre-mRNA of the a1 cistron and other genes that are important for cell growth [1]. 20.40 20.40 20.60 23.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.37 0.70 -5.53 41 321 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 253 2 227 311 4 325.70 23 83.00 CHANGED h............sslllhslP.ts.....hlGIDhhoassssp..F+GlKtlPs..G..hHFlahs.........................................................ppsshuhRhGhahhhp.........................tsclhlh+WspppEshh..ps...........pt....................pthptpspl.tphpph..LssYPh.........................................cshp.pWtsLT.shIs........pllp+lpshs........................................hhlsstpssppcpp...................................ttpptscpt.thtt..........................................................................psplpFs.l.....cpphppusss...........p-lTctshD+SahLppll................................pphpsp.ttlLGELQFuFlhhl.hhtsasuhpQW+pLlpLlssu.pphltp...ptph.....................ahpllclLhhQLt.............chsp-hhhD......hhppstFlt.hl........................htttltpphcplcshlppcashcl.........cs ...........................................hh..ssslllhslP.to........hGIDh.sapsssp.....F+GlK.lPP..G...hHFl.ahu.........................................................t...tphu.Rh..Ghah.hp.............................tp.lhlhcWsttpEpl....t.....................pt....................................................ch.t.tsph.tphcph...LssYsh.........................................pphp..pWhpLo.shl.o........shl..p+lts.s...ht........................................hls.sttt.tptcpp.........................................t...tp...h..................................................................................................tpphpFs.l........pphh.tssss...........t-hTptshDp...ShhLppll...............................................................tphtt...s..tplLGELQauFlshl.hh....tshpuhppW+.pL.l.p.Ll...........hps.ptsh.h.p....ctth..........................ahphlp.lLhhQLt........................phsp-hhh-.................hsts.shltphlp..................................................................t.........h..t.....t.....tl...hpthtphpthlpt..phtWph..t...................................................................................... 0 74 122 184 +5110 PF05283 MGC-24 Multi-glycosylated core protein 24 (MGC-24) Moxon SJ anon Pfam-B_6825 (release 7.7) Family This family consists of several MGC-24 (or Cd164 antigen) proteins from eukaryotic organisms. MGC-24/CD164 is a sialomucin expressed in many normal and cancerous tissues. In humans, soluble and transmembrane forms of MGC-24 are produced by alternative splicing [1]. 25.50 25.50 25.80 25.80 25.40 25.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.58 0.71 -4.50 4 165 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 61 0 82 152 0 116.60 30 77.58 CHANGED huRphh..hAAhCh.usLClLustps.sttssssths.hssssoshsssls.sT....psCE..phNsC.pClNsoh.ssohCsWhpCpsE..saCSutstV....usCp.tNoT-SCSs.sss.......sVsT.puTstPshps.uso.........sosppsTosssTNsTVTPssp.sRKSTFDAASFIGGIVLVLGlQAVIFFLYKFCKS ...................................................s...........................................................................................................................................................................spot.....ss..s................................................................................................s.tt....s.o...p...s...t...s.s...sT.s.oPs..s.........s...+...p....st......FDuuSFlGGIVLsLul.Alhahhh+Fh+u........................ 0 8 13 28 +5111 PF05284 DUF736 Protein of unknown function (DUF736) Moxon SJ anon Pfam-B_7619 (release 7.7) Family This family consists of several uncharacterised bacterial proteins of unknown function. 22.00 22.00 22.40 22.30 21.80 21.10 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.37 0.72 -4.21 66 445 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 185 0 238 454 42 101.00 34 87.91 CHANGED ssI.GpFs...psp.suapGplcTLolsscl.plVPsps..ss-...sAPDaRlhsu.........ss.ElGAuWp+pupc....up-YlSlpLDDPsF.stPlhAsLhp...sp-sctt..asLlWsR.Ppc .................sI.GsFs...pss..s.uasGplpTLsls.sch..pllPspp..ss-...pAPca..R..lhsu..........ssElGAAWp+pupc....Gc-YlSlpLDDPuF.suPl..hAsLhp....s-css.t...asLlWsRs........................... 0 36 145 190 +5112 PF05285 SDA1 SDA1 Moxon SJ anon Pfam-B_6906 (release 7.7) Family This family consists of several SDA1 protein homologues. SDA1 is a Saccharomyces cerevisiae protein which is involved in the control of the actin cytoskeleton. The protein is essential for cell viability and is localised in the nucleus [1]. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.31 0.70 -5.15 38 397 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 283 0 294 403 7 256.40 28 42.13 CHANGED Ms....EsLLQDLspYK.....sSK.-KuVhMAARuL...........luLYRE........VsP-hLp+KDRGK.sAuhulpssctp.........taGcpps.sssI.GlELLtca.......t..ct-pu...ss-s-sDsp.............W...............................ps...t-ss-ssDs..cGpWlsVpsD...p..........................................................oDsEDEcp............................t..t.t.....p.phpsccpsppsppptp-.........................................stcpphspluoo..RILTPADFtKLpELRtptul..sphhsttt......................pcpp--hlsuscIEuhsKht..Kps+EERlApspEGRp-RpcatS....+Ks+Kcsts.pSTTN+EKpR.+KNFhM.hl+K.ts+uKpKpSlp-+ppsLRsHlt+p.....K+tt .............................................................................................hscpLLpDLs..Y+.....pp+.sKsVhhAA+uL...........lpLaRp........lsPphLt++.RG+..sphthp.tt..t.................aGp.p..hs.l...GhE.l..Lt.t.............ttt.......tpptp.p-tt.............h..................................................................tt...ppp.ppDt........psth.h...s....spps......................................................................sDp-p.pt.......................................................................t....t.....t.tt....t............................................................ttttthttlsss..+lL.T.tD.ht+lp.hphptth...pth.ttt..............................................tptttchls.pplpt.hch.....Kts+.......-pRlttshtG+psRtcat................+tp+..t.h..tSpoN+cKt+.pKsh.M..h...h.tp...s+sKt....ph.S.hh.cpp..hhp..tt..+t.....p...t................................................................................. 0 106 161 235 +5114 PF05287 PMG PMG protein Moxon SJ anon Pfam-B_7710 (release 7.7) Family This family consists of several mouse anagen-specific protein mKAP13 (PMG1 and PMG2). PMG1 and 2 contain characteristic repeats reminiscent of the keratin-associated proteins (KAPs). Both genes are expressed in growing hair follicles in skin as well as in sebaceous and eccrine sweat glands. Interestingly, expression is also detected in the mammary epithelium where it is limited to the onset of the pubertal growth phase and is independent of ovarian hormones. Their broad, developmentally controlled expression pattern, together with their unique amino acid composition, demonstrate that pmg-1 and pmg-2 constitute a novel KAP gene family participating in the differentiation of all epithelial cells forming the epidermal appendages [1]. 29.00 29.00 29.40 29.10 28.10 28.90 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.55 0.71 -4.47 15 251 2012-10-03 03:07:01 2003-04-07 12:59:11 7 10 30 0 129 302 2 146.40 33 89.87 CHANGED MSYsCsSGNaSS+ShtsphphPsoSsso..shPoslshssslCoPSopQhuSolhcsCQETCsEPhpCQssssp..............................sssCQsoCapPpsSslsuPCpoThuGsluFsSS..SCpshuh..........tSpsC.slG.sGSsuapslstsossh.shshhot..aC+Pshh.........uS+oh.Qs...............oCapPsCuSuh ............MsasssotshSopSh.....tshhhhPsssss.s...hssslshtsshh.P.S...oh...Q...hso.LhssC...QETChEPssCp.oss................................spsCQ..ss......C..pPp.....sh...h......ssP.Cps.sh..s.tsh..........u.asSs..........uCps.huh..........hopss.slu....ssS.....sshpslshh.sp.sh.s.s.hst......hCpP..h....................s.psh.ps................s.....h................................. 0 14 14 42 +5115 PF05288 Pox_A3L Poxvirus A3L Protein Moxon SJ anon Pfam-B_7718 (release 7.7) Family This family consists of several poxvirus A3L or A2_5L proteins. 25.00 25.00 98.30 98.20 22.20 20.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.29 0.72 -4.24 12 91 2009-09-11 10:15:31 2003-04-07 12:59:11 6 1 41 0 0 44 0 66.90 66 95.24 CHANGED tYplsLcPP++C.SpC.sNLh-alp-Dccsl+hhLtSQPpKhplLKpFLshsRNKphhhKILDpEl+RVLs ..Yph.lp.PK+C.S.pChsNLhcalsEDuNs.I+hlL.SQPpKLKVLp-FLushRNKpFlYKILD-ElRRVLT. 0 0 0 0 +5116 PF05289 BLYB Borrelia hemolysin accessory protein Moxon SJ anon Pfam-B_7729 (release 7.7) Family This family consists of several borrelia hemolysin accessory proteins (BLYB). BLYB was thought to be an accessory protein, which was proposed to comprise a hemolysis system but it is now thought that BlyA and BlyB function instead as a prophage-encoded holin or holin-like system [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.27 0.72 -4.22 3 123 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 28 0 5 92 0 102.00 77 87.44 CHANGED MKLSKNNLELGLTSLSTLIDIFSKFED.EFDEsAHKGFFLVYELYSHYKLIYTANMERLESALTPsIscTLAPINEKINQCIDLVNSDEKNLKISNDLKFNcEGKP .......MKLSKDNlELGLTSLSoLIDIFSKFED.EFDEIAHKGFFLVYELYSHYKLIYTANMERLESALTPs.IstALAPLNEKINQCIDLVNSDEK.NLKISNDLKFNQEGKP.............. 1 4 4 4 +5117 PF05290 Baculo_IE-1 Baculovirus immediate-early protein (IE-0) Moxon SJ anon Pfam-B_7745 (release 7.7) Family The Autographa californica multinucleocapsid nuclear polyhedrosis virus (AcMNPV) ie-1 gene product (IE-1) is thought to play a central role in stimulating early viral transcription. IE-1 has been demonstrated to activate several early viral gene promoters and to negatively regulate the promoters of two other AcMNPV regulatory genes, ie-0 and ie-2. It is thought that that IE-1 negatively regulates the expression of certain genes by binding directly, or as part of a complex, to promoter regions containing a specific IE-1-binding motif (5'-ACBYGTAA-3') near their mRNA start sites [1]. 33.10 33.10 33.10 87.10 32.30 33.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.13 0.71 -4.45 14 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 40 0 0 40 0 136.70 43 54.30 CHANGED lpalsspspapashaIFlPYlKQLppIlpLFhNDaCCsKlVKphtssLssLlscSt+hL+sI+hhNcRhQVlNVFh-s..lYpCNICpDTSsEE+FLKPNECCGYpICNhCYANLWKaso..lYPsCPVCKTSFKSSssssppt ...................t.s.a.hNhalFlPYlKQLptllch.FpND.a.CCtKllpshthtLspLlscstchlcpIcshN+plpVhNVFh-s..sLYECNIC+-sSs-E+FLKPsECCG.YsICNhCYusLWKass..haPhCPlCKTSFKssp........... 0 0 0 0 +5118 PF05291 Bystin Bystin Moxon SJ, Wood V anon Pfam-B_7767 (release 7.7) Family Trophinin and tastin form a cell adhesion molecule complex that potentially mediates an initial attachment of the blastocyst to uterine epithelial cells at the time of implantation. Trophinin and tastin bind to an intermediary cytoplasmic protein called bystin. Bystin may be involved in implantation and trophoblast invasion because bystin is found with trophinin and tastin in the cells at human implantation sites and also in the intermediate trophoblasts at invasion front in the placenta from early pregnancy [1]. This family also includes the yeast protein ENP1. ENP1 is an essential protein in Saccharomyces cerevisiae and is localised in the nucleus [2]. It is thought that ENP1 plays a direct role in the early steps of rRNA processing as enp1 defective yeast cannot synthesise 20S pre-rRNA and hence 18S rRNA, which leads to reduced formation of 40S ribosomal subunits [3]. 28.00 28.00 28.20 30.20 27.90 27.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.76 0.70 -5.48 21 359 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 301 0 250 363 3 287.10 46 68.65 CHANGED Fhs..ps..............spoLuDhIhpKIpE+-sph..........ptthtptps..hP..thsP+Vl-lYcpVGplLS+Y+SGKLPKAFKllPslssWEplLhlTcPcsWTPpAhatATRlFsSshpsptAQ+FhphVLLsRVR--Ip..EsKKLNaHLYpALKKALYKPuAFFKGlLFPLspossCTLREAhIluSVlsKlSIPlLHSuAALh+ls-h.........-asGssShFI+lLL-KKYALPY+VlDulVhHFhRFps.p..................cpLPVlWHQuLLsFsQRYKsDlop-Q+-tLlcLl+t+sHh.tIoPEIRRELhsupsRssp.s.sth .............................................................t...phsLADhIhpKIp-+psp.................tt...h.t.t.....s.sh......s....plss+Vl-lYptVGplLo+Y+SGKLPK.sFKllPs..LpsWEp..lLhlTcP-p............WospAhYpAT.RIFsSshpsphAQ+......FhshVLLsRlR-DIt...EpK..............+LNhH.LY.........pALKK..ALaK.PuAFF.KGlLhPLs.poGsCTLREAhIluSlls+sSIPlLHS.uAAlh+ls-h...........-.asGss.olFl+lLL-KK...............YALPY+VlDulVhHFlRFcspp..................................cpLPVlWHQuLLsFsQRYKs......Dls.p-Q+-tLL-Llchp.s..H..pIsPEIRREL.sutsRs..................... 0 92 141 208 +5119 PF05292 MCD Malonyl-CoA decarboxylase (MCD) Moxon SJ anon Pfam-B_7770 (release 7.7) Family This family consists of several eukaryotic malonyl-CoA decarboxylase (MLYCD) proteins. Malonyl-CoA, in addition to being an intermediate in the de novo synthesis of fatty acids, is an inhibitor of carnitine palmitoyltransferase I, the enzyme that regulates the transfer of long-chain fatty acyl-CoA into mitochondria, where they are oxidised. After exercise, malonyl-CoA decarboxylase participates with acetyl-CoA carboxylase in regulating the concentration of malonyl-CoA in liver and adipose tissue, as well as in muscle. Malonyl-CoA decarboxylase is regulated by AMP-activated protein kinase (AMPK) [1]. 19.00 19.00 22.20 19.60 17.90 17.80 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.18 0.70 -5.76 3 443 2009-01-15 18:05:59 2003-04-07 12:59:11 6 9 296 2 221 458 837 259.70 34 71.11 CHANGED MLsEKFGsscEKLspAIshY..sKs-pthIpl+suAssSRscLl+slNchPGGTu+Vs-MRpplLA......SKsooSth+sLDlshs-lhsSWFSLGhLcLE+LsWooPu.ILpKltEYEAVH.ltGhp-hR+RLuPLsRRCFuF.HEsLAcEPLVFVEVALs-oVAcoIt-lhccGt.phpt--tTTAlaYSIossQPGLuGIsLGNFLIK+VlTcL+KDlPuVoTFuTLSPIPGFtpWLl+pLpupScasp......................pE+sl.hlSD.Sppt.NthEssETLLuVcss-WsTcK+pLsslE+ILMhLCARYLLsEK+.cG+ALDSVANFHLpNGAcLERLNWhGDRScKGIpQSaGIMVNYlY+ ..........................................................................................t..........................................................................s.h.tlhpph.......s...t...uht.Ll.thRtclht.......................t......tlt.l-tthpph...hsp..WFshuhL.Lc.ls..W.p.o.PsplLpKlhtYE...........AVH.....l.p.s.W...Dl+pRlts.......RRCauFhHsthst..................-PLlhlcVAL.spthsssl..t....l.......l.....t...t....................t............t................................t........p..........p...s...s..s.AlFYSISssQ.GLtGlshGshLIKpVlppLp.........p.........-h......Pp.........lppFsTLSPlPGFhpWL.......t........................................................................................................................................................................................................................................................................t.h.p..hhthsAhYl..h..tt+p....p.....G....h..s.h....sPVApFHLtNGAhl.c..lNahuDhS.+GhppShGhMVNYhY...................................... 1 76 127 174 +5120 PF05293 ASFV_L11L African swine fever virus (ASFV) L11L protein Moxon SJ anon Pfam-B_7869 (release 7.7) Family L11L is an integral membrane protein of the African swine fever virus (ASFV) which is expressed late in the virus replication cycle. The protein is thought to be non-essential for growth in vitro and for virus virulence in domestic swine [1]. 25.00 25.00 159.30 159.20 20.30 20.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.77 0.72 -3.85 2 12 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 0 0 10 0 77.30 92 93.55 CHANGED MLEPlLVMAPIPLlLIFLYSYFKIKLHKLITIALFLGCLFFILRDFCFPPMLWp.LpNhT.shNshLGNpSF.VpCpp MLEPILVMAPIPLVLIFLYSYFKIKLHKLITIALFLGCLFFILRDFCFPPMLWTQLHNITSSIN.ILGNNSFQVKCNp. 0 0 0 0 +5121 PF05294 Toxin_5 toxin_5; Scorpion short toxin Moxon SJ anon Pfam-B_7892 (release 7.7) Family This family contains various secreted scorpion short toxins and seems to be unrelated to Pfam:PF00451. 25.00 25.00 26.80 26.50 23.20 22.60 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.27 0.72 -3.74 6 25 2012-10-01 23:31:40 2003-04-07 12:59:11 8 1 11 2 0 29 0 32.20 66 72.07 CHANGED CsPCFTTDPpMppKCpcCCGG+..GhChGPQCLC ChPCFTTDPsMupKCp-CCGGp..GKCaGPQCLC. 0 0 0 0 +5122 PF05295 Luciferase_N Luciferase; Luciferase/LBP N-terminal domain Moxon SJ, Bateman A anon Pfam-B_7906 (release 7.7) Domain This family consists of a presumed N-terminal domain that is conserved between dinoflagellate luciferase and luciferin binding proteins. Luciferase is involved in catalysing the light emitting reaction in bioluminescence and luciferin binding protein (LBP) is known to bind to luciferin (the substrate for luciferase) to stop it reacting with the enzyme and therefore switching off the bioluminescence function. The expression of these two proteins is controlled by a circadian clock at the translational level, with synthesis and degradation occurring on a daily basis [1]. However This domain is not the catalytic part of the protein. It has been suggested that this region may mediate an interaction between LBP and Luciferase or their association with the vacuolar membrane [2]. 25.00 25.00 104.80 104.10 20.80 16.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.63 0.72 -4.18 13 14 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 8 0 0 14 0 81.60 58 8.43 CHANGED MAs..QLspFLsN-AKlDs+VluYMT+pLpL-SVSDFANYWTSsEYE+GVQDDIlupVusFpss.SKPsuKlQlARLRAAW+uAQ MAt..pLspFLss-AKlDs+VluYMT+pLpL-SVSDFANaWTosEYE+GVQDDIlupVssFpss.ScssuKlQlARLRAAW+uAQ. 0 0 0 0 +5123 PF05296 TAS2R Mammalian taste receptor protein (TAS2R) Moxon SJ anon Pfam-B_1498 (release 7.7) Family This family consists of several forms of mammalian taste receptor proteins (TAS2Rs). TAS2Rs are G protein-coupled receptors expressed in subsets of taste receptor cells of the tongue and palate epithelia and are organised in the genome in clusters. The proteins are genetically linked to loci that influence bitter perception in mice and humans [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.02 0.70 -5.53 20 1248 2012-10-03 04:04:29 2003-04-07 12:59:11 8 3 92 0 394 2541 0 286.50 29 96.76 CHANGED Mhssh.slh.hllhlsEhllGlluNuFIsLVNsh-Wl+p++lSslDhILhuLAISRIsLlhllllssahhlhhsshatsuthhphlshhWhhhNphSlWhAThLulFYhlKIAsFSHPlFLWLKhRlspVlsal...LLuolllsh..lsplhpshp...........phppNoThphchpchphhhshlhhp..lsshlPFllsLlShhLLIhSLh+Hp+pMppsuoGhRDPsTcAHspAlKsllSFllLahsYaluhllphhshhhscsplhhhhs.hhshhYPssHShILILGNsKLKpshhplLtph+ .......................................................................................................................hh..h.hlh.h.hthhlGhhuNuF.I..s.lV.Ns.h.-W..l..+..p..p.c.l.u..s.sDhILhsL.ulSR.lh..Lh...h..l.l.hl.....sh.h.hhh.h..ss..hh.s..h.p..h...h...hphh.t.h.h.W....hhhN.ph.olWhuos.LSl.F.Y..hl.K...I..A...s.F.o.....p...sh....F.L....a..L.K.....h...R....l....s..p..l....l.....s......h.l........L.L....G.....s..l...l...h.s..h....h..........h..h...h....l....s...h.....t..h.....p.........h....h...h..t......t.h....................php.p..N....h.....T...h......p...........h.........p......h....p........p....h.....t.....h....h..............h.h..hh.............ls.s...h....l....P....F..h...l........h....L.......l....S...h..h..L.......L.......lh...S.Lh..+...H...h+....p...M....p....h....p....s....p....u....s....p.....D....P....S....s....c.A.H.....l..+...........A....l+....s....l....l....S....FL....l....L..a....h.l.....a...a......l....u....h..h....l..s..h..h....s....h....h....h..............p...p...........p...h...h..h....h..h...s...t...h...l.....h.h...h..a.PusH.S.hlL.I..h.uNsK.L+pshhphh....h................................................. 0 26 90 163 +5124 PF05297 Herpes_LMP1 Herpesvirus latent membrane protein 1 (LMP1) Moxon SJ anon Pfam-B_5174 (release 7.7) Family This family consists of several latent membrane protein 1 or LMP1s mostly from Epstein-Barr virus. LMP1 of EBV is a 62-65 kDa plasma membrane protein possessing six membrane spanning regions, a short cytoplasmic N-terminus and a long cytoplasmic carboxy tail of 200 amino acids. EBV latent membrane protein 1 (LMP1) is essential for EBV-mediated transformation and has been associated with several cases of malignancies. EBV-like viruses in Cynomolgus monkeys (Macaca fascicularis) have been associated with high lymphoma rates in immunosuppressed monkeys [1] 26.00 26.00 26.30 29.00 25.20 24.10 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.39 0.70 -5.53 2 1117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 10 3 0 569 0 128.40 64 101.97 CHANGED ME+DLERGPPGP.RPPhGPPLSSSlGLALLLLLLALLFWLYIVMSsWTGGALLVLYSFALhLIIIILIIFI.RRDLLCPLGuLsLLLLMITLLLIALWNLHGQALYLGIVLFIFGCLLVLGLWIY.LEILWRLGATIWQLLAFhLAFFLslILLIIALYLQQNWWTLLVDLLWLLLFhAILIWMYYHG.RHoDEHHHDDSLPHPQQATsDSuHESDSNSNEGRHHLLVoGAGDGPPLCSQNLGAPGGGPDNGPQDPDNTDDNGPQDPDNTDDNGPQDPDNTDDNGPQDPDNTs...........DNGPHDPLPHNPSDSAGNDGGPPpLTEEVENKGGDRGPPSMTDGGGGcPHLPTLLLGTSGSGGDDDDPHGPVQLSYYD .................................................................................................................................................................................................................................................................................................NLtA.uGGP.sNuPQDPDNTDDNGPQsPDNTDD.....NuP.....QsPDNTD.........DNuP......QsPDN...................D...NGPHDPlPpsP.sDuAGNsuGPPpLTEEVENK.GGDpGP...PsMTDG..........G..........G..........G.....c..........s.H............................................................ 0 0 0 0 +5125 PF05298 Bombinin Bombinin Moxon SJ anon Pfam-B_5347 (release 7.7) Family This family consists of Bombinin and Maximin proteins from Bombina maxima (Chinese red belly toad). Two groups of antimicrobial peptides have been isolated from skin secretions of Bombina maxima. Peptides in the first group, named maximins 1, 2, 3, 4 and 5, are structurally related to bombinin-like peptides (BLPs). Unlike BLPs, sequence variations in maximins occurred all through the molecules. In addition to the potent antimicrobial activity, cytotoxicity against tumour cells and spermicidal action of maximins, maximin 3 possessed a significant anti-HIV activity. Maximins 1 and 3 have been found to be toxic to mice. Peptides in the second group, termed maximins H1, H2, H3 and H4, are homologous with bombinin H peptides [1]. 21.40 21.40 21.50 21.40 20.30 18.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.79 0.71 -4.72 3 225 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 4 1 0 219 0 135.80 84 98.03 CHANGED MNFKYIVAVSFLIASAYARSEENDEQSLSQRDVLEEESLREIRGIGsKlLGGlKTAlKGulK-LAS+alNGKRTAE-HEVMKRLEAVMRDLDSLDHPEEASERETRGFNQEEIANLFTKKEKRILGPVLSLVGuALGGLIK .....MNFKYIVAVSFLIAS.AYARSVpNDEQSLSQRDVLEE.ESLREIRGIGGKILuGlKTALKGAAKELApTYlptKRTAE-.HEVMKRLEAVMRDLDSLDaPEEAoERETRGFNQ-EIANLFTKKEKRILGPVLGLVGsALGGLlK.... 1 0 0 0 +5126 PF05299 Peptidase_M61 M61 glycyl aminopeptidase Studholme DJ anon Merops Family Glycyl aminopeptidase is an unusual peptidase in that it has a preference for substrates with an N-terminal glycine or alanine. These proteins are found in Bacteria and in Archaea. 21.30 21.30 21.70 21.60 20.90 20.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.59 0.71 -4.14 10 668 2012-10-03 04:41:15 2003-04-07 12:59:11 7 4 574 0 253 702 215 119.60 40 20.25 CHANGED .tsthsLlAHEahHuWNsKhhRPA-Lhs.sacpsstssLLWlaEGpTpYaGhllssRoGlhopcpsLctLAtshuphhs.psGRtapolp-oohDs.hlphtRspshsshspppsYYocGtllh ..................tphLuLsSHEYFHsWNVKpl+Psshts....aDhspEsaTphLWhaEGhTSYYDcLhLhRuGllotcpYLchLupslsph.p..ssGRh.hQolu-SSa-A.WhKhY...+...tD.p.Nus...Ns.h.l.SYYsKGuLl.u................................. 0 76 165 215 +5127 PF05300 DUF737 Protein of unknown function (DUF737) Moxon SJ anon Pfam-B_6933 (release 7.7) Family This family consists of several uncharacterised mammalian proteins of unknown function. 23.20 23.20 23.60 23.80 21.70 23.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.18 0.71 -4.33 21 182 2009-09-10 21:09:58 2003-04-07 12:59:11 6 3 44 0 74 148 0 140.90 33 75.13 CHANGED hDEpEplpVl+GIRLSEsVlsRMKEsSsPsstpp..ss.ssus.............................................................sspppl++tssppht.uLcps+ttoppp.sth....hppt.hKRhcpEQhhlQ-EluRlhc+E+pAAp-pLspulLRE+susccERt+AppL.......ARpLE-+EtEL++pDsFYKEQLuRlEE+suEhYKlToEQaHcAAocsEu+hK ...........................DEp-plpllpGl+L.S-sVlpRM+-s.s..ss.t........................................................................................................................................................................................................................t..ttp.........pp..tt..pttp...............................cthtp............+...hppERtuu..........p....c.p....hptul.pc+hpspcEp.cuthh........A+pLpp+-t..lpp.-saY+EQltplEc+s.phY+.osppappAApchcsph+............................................................... 0 5 13 32 +5128 PF05301 Mec-17 DUF738; Touch receptor neuron protein Mec-17 Moxon SJ, Pollington JE anon Pfam-B_6943 (release 7.7) Family Mec-17 is the protein product of one of the 18 genes required for the development and function of the touch receptor neuron for gentle touch. Mec-17 is specifically required for maintaining the differentiation of the touch receptor [1]. This family is conserved to higher eukaryotes. 21.20 21.20 21.20 21.40 20.90 21.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.44 0.71 -4.17 5 201 2012-10-02 22:59:21 2003-04-07 12:59:11 6 2 118 0 116 212 3 110.50 48 33.85 CHANGED SDc.QllYlhtDcsA..uu+utlhGLLKVGpKcLFLaDsptsppclEps.CILDFYVHEScQRsGpG+cLF-aMLpcEpsos+QCAlDRPSsKLLuFLuKHYGLc+sVPQuNNFVLaEGFFsc .................................ppphhYlht-sps...ssp..Gsl..lGhLKV.....GhK+LFlh..............Dp.ptt.p.p.E.hc....sh..ClLDFY......lHEShQRpGhG+cLFpaMLpcEpl.p.P....p..plA.lDRPS.KLLtFLpKHYsLpph..........lP.Qs.NNFVlF-sFFt........................ 0 50 61 93 +5129 PF05302 DUF720 Protein of unknown function (DUF720) Moxon SJ anon Pfam-B_6980 (release 7.7) Family This family consists of several uncharacterised Chlamydia proteins of unknown function. 25.00 25.00 122.80 122.30 23.50 20.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.79 0.71 -4.38 6 105 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 34 0 13 42 0 127.40 45 76.02 CHANGED lYFsI.shLh.SVtsspashuIhA..LQ-NTshQQphspEhhplphlpVPchpKpDs.........Nps.IQshQosNQpIoAsRQhIQppLSuApQpAQs.ppslNosss...QlLQsssALlpTLppl.olhANL ....hYFsI..hLhpSVtlsQpslulhAppLQ-NTstQQpLNpEpsplpassVPcstKpsp.........sps.IQsVQssNQsloAsRpsIQspLSuApQsuQlIpSslNTNsNIhQQlLQssoALlpThsplsSllANL 0 3 3 10 +5130 PF05303 DUF727 Protein of unknown function (DUF727) Moxon SJ anon Pfam-B_7004 (release 7.7) Family This family consists of several uncharacterised eukaryotic proteins of unknown function. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.33 0.72 -4.26 10 194 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 117 1 136 179 0 97.50 31 24.97 CHANGED shclEApAsVs-ltFuVspIsV.ScpLPpss-lsYlNVcThEuspYClELTppGaRlVSppaDplssc..............spltlsopYaETlYuLLDsISPsYREsFG...spLsQ+LccLp .........................................................EA.uhlp-l.htVtph.l...Sp.L.pss...DlhYlNlpThEsppaClclo.p.pGa+l...su.p..hDphssc.............................p.s.tha-..TlhsLLsplSPta+...csFu...ptLhp+Lpt.......................................... 0 46 70 111 +5131 PF05304 DUF728 Protein of unknown function (DUF728) Moxon SJ anon Pfam-B_7223 (release 7.7) Family This family consists of several uncharacterised tobravirus proteins of unknown function. 25.00 25.00 117.50 117.10 18.10 17.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.51 0.72 -3.86 3 35 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 8 0 0 27 0 132.70 84 97.52 CHANGED KCALssC..EVssQuNchTCSMKHANKYNRaLA-KauVKRKCECsNCGWFPAIpVpsDalEVYFCCGMKHLpKC+...............................ScNPKKccR....LNTPKRLFRDDVDFGLstLFu ....TCVLKGCVNEVTVLGHE.TCSIGHANKLRKQVADMVGVTRRCAENNCGWFVClIINDFTFDVYNCCGRSHLEKCRKRhEARNREIWKQIERlRAEcs.sTVKKS+NSKsSKKcFKEcc-FGTPKRFLRDDVPFGIDQLFA. 0 0 0 0 +5132 PF05305 DUF732 Protein of unknown function (DUF732) Moxon SJ anon Pfam-B_7356 (release 7.7) Family This family consists of several uncharacterised Mycobacterium tuberculosis and leprae proteins of unknown function. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.66 0.72 -3.93 65 1009 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 197 0 174 553 1 102.60 25 72.21 CHANGED thhusssuhsu...........huhAAP.A......pA..........s..D.................s...sFLssLp.psGIs..as..ssstAlthG+tVCstlsp.Gt........shspllsplttp.ts.shs...........tpAuhFsshAhssYC....Pphhsth ..................................................................................hs...hhhhshhuh..........hshAuP..A..............pA.....s..................sh..D..................................s.....sFL...ssLp..pt..Gls.......as...........sssp..u...lshG+.tVCptl.ss..Gt.............shtpl......ss...t..l..tp....p.......s..shs..............ppAst.F.s....s.h.AhptY..C....Pphht..h....................................... 0 22 84 143 +5133 PF05306 DUF733 Protein of unknown function (DUF733) Moxon SJ anon Pfam-B_7392 (release 7.7) Family This family consists of several uncharacterised Drosophila melanogaster proteins of unknown function. 25.00 25.00 37.60 37.60 21.20 20.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.95 0.72 -3.70 14 116 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 0 65 113 0 94.30 33 72.45 CHANGED hpPsluYtLFhYRpELsR+pt..chhRlSpoKlhLTcELIupph.p...........sh.tpsSs--Lpt.............LsREl.....a+ccLp................cplc.Rhpchpthuhpp..tctp ...h.PoluYtLFhYRpELpR+ps..phhRlSpoKlpLTcpLIupsh.p..........................sl.ppCSs--Lps..................................LsREl.....FK+cLp..................cplc.Rh+chpphshpp....................................... 0 9 9 36 +5134 PF05307 Bundlin Bundlin Moxon SJ anon Pfam-B_6974 (release 7.7) Family This family consists of several bundlin proteins from E. coli. Bundlin is a type IV pilin protein that is the only known structural component of enteropathogenic Escherichia coli bundle-forming pili (BFP). BFP play a role in virulence, antigenicity, autoaggregation, and localised adherence to epithelial cells [1]. 21.10 21.10 21.30 21.10 20.70 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.14 0.72 -4.12 2 81 2012-10-03 10:38:27 2003-04-07 12:59:11 6 2 47 3 1 100 12 90.00 68 53.31 CHANGED MVSKIMNKKYEKGLSLIESAMVLALAATVTAGVMFYYQSASDSNKuQNAISEVMSATSAINGLYIGQTSYsGLNSNILLNTSAIPDNhKcstpshlT ..........MVSKIMNKK.YEKGLSLIESAMVLALAATVTAGVMF..YYQ..SASDSNKoQNAISEVMS..ATSAINGLYI...G...QT.S.YoG...LsSs...I...L.L..NT.S.AI.PDNY.KDTpNphl......................... 0 0 0 1 +5135 PF05308 Mito_fiss_reg DUF729; Mitochondrial fission regulator Moxon SJ, Eberhardt R anon Pfam-B_6919 (release 7.7) Family In eukaryotes, this family of proteins induces mitochondrial fission [1,2]. 26.30 26.30 26.30 26.30 26.10 26.20 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.76 0.70 -5.01 15 225 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 59 0 110 182 1 204.40 35 70.71 CHANGED hplsl.WpsKPYGSoRSIVR+IGTNLPLtPCPRspFQll..Phsschssssssp...VPShADVuWlAsDEsEoasRlRs-lpspppp.+.sshhshc....RpsSlPsLppcEsphps.....tthss-sAlpKIoALEsELutLRAQIA+IVshQEtpssosush............sss.....o...ssohs.sssp..P....s..........PPPPPPsP.PssuLpsSsSsl............shhpER+p..ppsssscTlspspsKp....phPsML-lLKDMN+VKLR .................................................................l.WpsKsaGus..RSlVR+IGopLsLh..CsRspFphh..........s.h...sphs.......ppss......VsohADlhWlAt...-.E...tc...s.sRh...R..s..p..h.....hsh.....h.p.hh.hp......Rp....SlPslptpc..p..h..............................thss..Alp+h.ssLpsELutLRuQIApIVshpptps.hsss.................................sssuh..ss.........s...................................ss...sl.pssSs.................................shhtEp+t..pptsts.ps....spspp......hssMhtlLKDhp..phKh.......................................................... 0 16 23 52 +5136 PF05309 TraE TraE protein Moxon SJ anon Pfam-B_7677 (release 7.7) Family This family consists of several bacterial sex pilus assembly and synthesis proteins (TraE). Conjugal transfer of plasmids from donor to recipient cells is a complex process in which a cell-to-cell contact plays a key role. Many genes encoded by self-transmissible plasmids are required for various processes of conjugation, including pilus formation, stabilisation of mating pairs, conjugative DNA metabolism, surface exclusion and regulation of transfer gene expression [1]. The exact function of the TraE protein is unknown. 22.40 22.40 24.30 24.10 21.70 21.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.87 0.71 -5.15 17 480 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 351 0 62 301 17 177.20 31 92.14 CHANGED M-hphppsptphhhhth.hhsslhslhhhssllh...sathtscpcpsVhs.shstshslSssusDssYLc.hscshhhLpLNhoPcslDh.hpplLphscPuupspl+stLhcpsppl+ssslsstFhhsplcVsPpshpsclpGpL+TalGsptlss....-h+pYphpasacss.ltLssFtpl.ss...-p .................................................................pt.p.hhh...hhhthl...lsl.l...hsNll..........shpht.s.cpc.s.sl.s.P...sh..s..t..shsVSp.s.sAstsYLpphul.hhtLhLNVoPpsVDtp+psLLphlpPuspsph+...shLtccAc..pIKs.csVsosFh....s.pl.c.s.s.pt.spVplpGh..h+.shl..us.up....s....ph.+.pYhl.hch.p.s.uhhhLtth.p................................................................. 0 17 35 51 +5137 PF05310 Tenui_NS3 Tenuivirus_NS3; Tenuivirus movement protein Moxon SJ anon Pfam-B_7740 (release 7.7) Family This family of ssRNA negative-strand crop plant tenuivirus proteins appears to combine PV2 [1], NS2 [2], NS3, and PV3 proteins. Plant viruses encode specific proteins known as movement proteins (MPs) to control their spread through plasmodesmata (PD) in walls between cells as well as from leaf to leaf via vascular-dependent transport. During this movement process, the virally encoded MPs interact with viral genomes for transport from the viral replication sites to the PDs in the walls of infected cells along the cytoskeleton and/or endoplasmic reticulum (ER) network. The virus is then thought to move through the PDs in the form of MP-associated ribonucleoprotein complexes or as virions [2]. The NS3 protein appears to function as an RNA silencing suppressor [3]. 25.00 25.00 37.10 37.00 19.80 19.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.45 0.71 -4.39 9 133 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 12 4 0 134 0 172.50 48 84.62 CHANGED LLhpsshhplll........................phscH..ua.plhptssstEsh.hhlpsuIWlLoap+shssphhhshsshsusasphhlphcPps...spsKCWhCc....hspssL.hhh.s..lpGF.hssE.YhVshK-Hsu........Ehhh.sshKshY+sspKhcHcYllsost.Pl.sppa ..................h..ppshoph.hshcDh....p.....aclhppRhsphccH..pa.pLhptssDt.sht.hltshIWlhuac+shs-chRhsphhhsuohschhhplKPcs...spTpCWhC+....hpp-sLshtl.s..VpGFsssuEhYhVslpDHsG........c.schssh+shY+ssuKh+HKYllsost.Pl.St+a.. 0 0 0 0 +5138 PF05311 Baculo_PP31 Baculovirus 33KDa late protein (PP31) Moxon SJ anon Pfam-B_7777 (release 7.7) Family Autographa californica nuclear polyhedrosis virus (AcMNPV) pp31 is a nuclear phosphoprotein that accumulates in the virogenic stroma, which is the viral replication centre in the infected-cell nucleus, binds to DNA, and serves as a late expression factor [1]. 25.00 25.00 28.20 27.10 21.70 21.20 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.01 0.70 -5.13 16 50 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 45 0 0 47 0 275.50 34 95.87 CHANGED hssshsplhsKh..-susaNKssh-hlpssINhhEKKKIsYplhshPlhsD.............DKKssKRsKKhISNNKYILFNSWY..TK.RpssWPsSasMWNlhKspspspsFVplFDahEKlGKsIss+pusss.................spssssccpssht..hs.s-l.cEsNc+RsKLYsEFYclLspTFpsssAPusS.IYD.................-+LTRshlppulphFKs.lhhcl.......................ppsttstsussshssos.s.t.............RKRKps................htKpstttp+pppppt.ssshtMssDss-DoQ.MSp .............................................s.ts.tplhsKh..EsSshNKoph-hltshINhhEKKKIsYplhshPshsD.............DK.KssK+sKK..lhoNNKYILFNSaY..TKl+pspWPsSpsMWNlhKspspspsFlcIFDa.h.EKlGKsIps+ppssus.................spssssccppsht.thshs-l.cEss-pRsKLYsEFYplLshTFpsss...A..PusS...IYD.................chLTRshlppuhptFKs.llhch..........................p.s..ssssshssoshstt.p...........RKRKps.................stpp.tttpppptpt..sss.sMssDpspDop.MS.................... 0 0 0 0 +5140 PF05313 Pox_P21 Poxvirus P21 membrane protein Moxon SJ anon Pfam-B_7803 (release 7.7) Family The P21 membrane protein of vaccinia virus, encoded by the A17L (or A18L) gene, has been reported to localise on the inner of the two membranes of the intracellular mature virus (IMV). It has also been shown that P21 acts as a membrane anchor for the externally located fusion protein P14 (A27L gene) [1]. 20.70 20.70 20.70 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.38 0.71 -5.03 10 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 43 0 0 42 0 188.40 56 95.28 CHANGED MSYLSYYNMF-DFsAGAGVpDpELFTcEEEcSFLPKcss..suta....h.......shcs.aP.sILhpNDI+oLlGLILFVLAITTsPlIAlIMIulAShLlPhPSLVIAYCLuhQIh...NsssssslGMSIlCVshSl.lTlhlsSlS...+sshTIoYIILulLFClYAFNlo+hst.pspsss.....tCs+thpuGsKhst-tP ..........MSYLpYYN.MlDDFSAGAGVhD+-LFTEEpQpSFhPKDGGhhps-Y..............ushNca..uIhpNNDVRoLLGLILFVLALhSPPLISllMIhIuShLLPL.sSLVIsYCLshQhh...+sGsuNTlGMSIVCllAAl.IhMAlNshT..sSphhshISYIILhILFhsYVhNIpRpchh+...Shslo.....sCs+sapAGNKhss-hP...................... 0 0 0 0 +5141 PF05314 Baculo_ODV-E27 Baculovirus occlusion-derived virus envelope protein EC27 Moxon SJ anon Pfam-B_7811 (release 7.7) Family This family consists of several baculovirus occlusion-derived virus envelope proteins (EC27 or E27). The ODV-E27 protein has distinct functional characteristics compared to cellular and viral cyclins. Depending on the cdk protein, and perhaps other viral or cellular proteins yet to be described, the kinase-EC27 complex may have either cyclin B- or D-like activity [1]. 25.00 25.00 52.70 52.00 18.60 18.30 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.87 0.70 -5.34 19 59 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 56 0 0 53 0 272.50 40 97.46 CHANGED h+s..ptsKl...RTVTEIlsucsKhpK-YDls-hshKN.sSLcSa-phplhLsluKYMAMlssLphoQsLltlF+s+sssccIlolVhsSLuFVHNRhsPhlspFs.cMcFVlscspchuIPGEPIlF.....pps-cpslhChlDRsoIl+hLE+phDsshphpp.sscc.pthKlhcshpsstp+++cs..t.shp.................tsshplsEs-sTQYlTLLlIhEHAYlHYalL+shshhpYhcoLlsHolhspc..sshtsshsNLLLSKF+FslE-.-p.+pssss......hs...lh ..........h.+s...sKl...RTVTEIlsucpKlpKcYDLu-F-hKNLsSLcSa-shcIKLhluKYMAMLssLphTQPLLplFRs+sss+cIsulVhuSLuFVHNRhpPhVspFst+MEFVls-stchsIPGEPIlF......ps-cp......s......llChlDRsSIl+hLp+pFDschplsppspcp.pth+lhKshssstp++pppp...t.....................sshplsEs-sTQYlTLLlIhEHAYlHYaIhKNashhpYscSLlDHolFssK.+sshssphsNLLLSKF+FslE-.-p.pppsss...thh........................... 0 0 0 0 +5142 PF05315 ICEA ICEA Protein Moxon SJ anon Pfam-B_2792 (release 7.7) Family This family consists of several ICEA proteins from Helicobacter pylori. Helicobacter pylori infection causes gastritis and peptic ulcer disease, and is classified as a definite carcinogen of gastric cancer. ICEA1 is speculated to be associated with peptic ulcer disease [1]. 19.00 19.00 21.10 20.70 18.00 17.80 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.65 0.70 -5.05 2 70 2012-10-05 18:28:12 2003-04-07 12:59:11 6 1 40 0 4 69 19 165.40 58 93.91 CHANGED MphoKpELFLcLApPscpGlSRWVps.EFhGcYpsLpLGNGGSWCRpsSsLA+-ahlEFDKt.TsGNSIDtIRLNGaNpcphFpQ.I+pDIKshhpppsCsMhGVpGpSENTpIEIDHKDGRKsshRVSDlpTQph-DFQsLCKAsNDhKRQICKpCKEoshRasAppIsGNPYsFY.G-.pYs...GCVGCYQYDPlpYRKosscRIhsEuhphs...haphhYppcss .......................................................hpRWlts.EF.sthptLt.hu....Nst.....sWhRtsSshA+ca.lEFDKtp.o.GNSID+IRLNGapscpsF..NQsIRpDIKsaYpppsCsMpGspGpSENTpIElDHK..DGRKsD.RVS....D..hss.Qph.-DFQsLCKAsNDpKRQICKcCKEoGhRasAppI...GN....YsFY-G.........t.pYD..............GCVGCYQYDPlpYRKpsscRIhpEuhphs.................................................. 3 2 4 4 +5143 PF05316 VAR1 Yeast_VAR1; Mitochondrial ribosomal protein (VAR1) Moxon SJ anon Pfam-B_7802 (release 7.7) Family This family consists of the yeast mitochondrial ribosomal proteins VAR1. Mitochondria possess their own ribosomes responsible for the synthesis of a small number of proteins encoded by the mitochondrial genome. In yeast the two ribosomal RNAs and a single ribosomal protein, VAR1, are products of mitochondrial genes, and the remaining approximately 80 ribosomal proteins are encoded in the nucleus [1]. VAR1 along with 15S rRNA are necessary for the formation of mature 37S subunits [2]. 22.10 22.10 22.40 22.40 20.50 22.00 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.70 0.70 -5.85 5 28 2009-09-11 14:04:11 2003-04-07 12:59:11 7 3 26 0 9 39 0 290.00 34 79.83 CHANGED pKhLLKNhLLKMNsNp.MN.shchsp+Ns.h.sKYlpEhNNKGNKLQ+lNNMNNWssQlYNYNKNNsINshlsDKLlNKLLYKLMslK..hINNN....sst..p+IIIsKPhaKHolNKlNI+FYY....YNsNhpshNNNNNpYYhNMINKLMNILN.....NpN.hNMN.NluNILShYYNKKVhIEsIKLpYlYNNN-IhNKYISlhDh-KYNNGLssc.Yp+lLNNhMPKhNspNIpMNYINNINNhNNlKY.NNMI..L......sSNN.l.........NIpNIYNshsINpIsM-LLMaKYLIGWSILaKGRLNKN..lSRosKspLLNGShsNKhYhKs...........................NINpNYKLNYIPNNHNIhNhNNVN..KNGKYNIKVKLNaI ......................................hK.hLh.hp.p..hp........p.............................s..sspL......QplNphNsap..hYpaNps..l..hh.sphhppLLhKhh..hh.....ss.............pIlhS+.shhpHshN+.lsI+FYY.......hp..s....p...s...Np.spYYhshhscl..hNhhs........Np..s.........ssLsNlLShYY.N.KcVpIpPI+LpY.YhNo-IhophI...h.shsp.h.s.psl.hp.a.+hLpshhPhhNsp......It....hsYlss..hs.s......hN...p.ph.NNhh........ssN........................slpNl.Y...pshslpp..hs....shLhhKYLsGholhhKG+h...pp..hsRo.p.h.lhpGoFpNhhh.hs...........................p.h.p.spYKLNYhssNhsh.s.s....p..lN..Ks...GKasIKlKLNhI........................... 0 2 6 8 +5144 PF05317 Thermopsin Thermopsin Moxon SJ anon Pfam-B_7819 (release 7.7) Family This family consists of several thermopsin proteins from archaebacteria. Thermopsin is a thermostable acid protease which is capable of hydrolysing the following bonds: Leu-Val, Leu-Tyr, Phe-Phe, Phe-Tyr, and Tyr-Thr. The specificity of thermopsin is therefore similar to that of pepsin, that is, it prefers large hydrophobic residues at both sides of the scissile bond [1]. 25.00 25.00 52.70 36.10 20.20 20.20 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.80 0.70 -5.47 25 105 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 33 0 57 105 6 261.40 30 40.72 CHANGED hhhpsh.ls..hhhhhstss........PhGlusYul.........shslpTspVlGhhNIsSLp..uastsstt.......shuASLQLNslLp.sshhs.tshtaWlQNVl.......hF.Tsssp...hsalDNlWNhTu.shusloss.l....pGpGtl..........p.YYsYus......shshshPhohhLhlNsohsst.ushltFGYsl.psusl........YDsVslsss.....s.tsA.hhlsGhshss.............tGl.hhDsELVaGGsGsGpsssapshsupLuLaY.hsu..ssapshsssYsaGhDTuEoutslp .......................................................................................h.hs..........h.h.s.ss........PsGlssYGl............hslpTspVlGhlsIsslp....uh.shosst......sshuASLQLNshLp.hph.s.tphtaWlQNVl........F.ssssp...hphh-NlWNhTu.s.huslsss.l....pGpGhl.........ttt.YYsYss.........shhphshPhohhLhlss.ohs.st.sshlsFGYsh.psush......................hYDsVsl.ss........h.su.hhlsGh..s..hss.............hGh.hhDsELVhGGsGsG.p.sshhpph.suhLuLhY.h...ps....sshsshsshYsaG..hDTuEoussl.h................. 0 11 25 50 +5145 PF05318 Tombus_movement Tombusvirus movement protein Moxon SJ anon Pfam-B_4393 (release 7.7) Family This family consists of several Tombusvirus movement proteins. These proteins allow the virus to move from cell-to-cell and allow host-specific systemic spread [1]. 21.30 21.30 21.50 21.40 18.30 21.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.69 0.72 -3.35 6 85 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 24 0 0 86 0 64.50 39 91.02 CHANGED MDsp....ps.p.l.....tsGcpctuGp+GppK..s+RpVApcAl+....K..pussuosGGsWVhVADKlEVoIsFNF ......................s.p.........pV.......shstppthsus+GKpK....s++sVA+DAls....K..su.p.cussGusaVsVADK..IcV..sIpFNF........ 0 0 0 0 +5147 PF05320 Pox_RNA_Pol_19 Poxvirus DNA-directed RNA polymerase 19 kDa subunit Moxon SJ anon Pfam-B_6945 (release 7.7) Family This family contains several DNA-directed RNA polymerase 19 kDa polypeptides. The Poxvirus DNA-directed RNA polymerase (EC: 2.7.7.6) catalyses DNA-template-directed extension of the 3'-end of an RNA strand by one nucleotide at a time. 25.00 25.00 211.80 211.60 24.80 24.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.95 0.71 -4.50 12 52 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 36 0 165.80 63 95.59 CHANGED M--SsDI..sa.S---p..pY-E----...p.tEuhsooDlsshKpSsh+h.puhSpsh--t....pss.+plos+IpsIK+RYTRRISLFElTGIlAESYNLLQRGRLPLls-LSD-Th.+pslL+lllcEIEEGsCPIVIEKNGELLSlsDFDpcGLpaHLDYIhsIWKpQpRh ....MADoDDII.DY...-SDD.s..EYE---E-..-E-uESLETSDlss..pSuYKI.ESASopIEDA....poshK+lus+ISALK+RYTRRISLFEIsGIIAESYNLLQRGRLPLVS-hSDETh.KQNhLHVlIpEIEEGoCPIVIEKNGELLSVsDFDK-GLKaHLDYIIcIWKhQpRY 0 0 0 0 +5148 PF05321 HHA Haemolysin expression modulating protein Moxon SJ anon Pfam-B_7025 (release 7.7) Family This family consists of haemolysin expression modulating protein (HHA) homologues. YmoA and Hha are highly similar bacterial proteins downregulating gene expression in Yersinia enterocolitica and Escherichia coli, respectively. 25.00 25.00 25.60 25.50 24.60 24.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.76 0.72 -3.83 14 1466 2009-09-11 00:35:18 2003-04-07 12:59:11 6 1 560 3 79 307 5 56.20 58 82.15 CHANGED hphR+Cooh-TLEKlh-+p+.p..Lsss.EhpsFpuAADHRLAELsM.sKLYD..KlPssVWp ......h+hR+hpol-oLE+lh-+s+Yp..LoDs.....ELtsFYSAADHRhAELsh.sKLYD..+lPpSVW+............ 0 4 16 48 +5149 PF05322 NinE NINE; NINE Protein Moxon SJ anon Pfam-B_7029 (release 7.7) Family This family consists of NINE proteins from several bacteriophages and from E. coli. 25.00 25.00 30.30 30.20 23.80 23.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.15 0.72 -4.23 3 157 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 136 0 0 53 0 58.20 92 95.15 CHANGED MRRQRRSITDIICENCKYLPTKRSRNKhKPIPpESpVKTFsYluuLhDS+Ws...RaCs..R+TR .......MtRQRRSITDIICENCKYLPTKRSRNKRKPIPKESDVKTFNYTAHLWDIRWL...RHRA..RK........ 0 0 0 0 +5150 PF05323 Pox_A21 Poxvirus A21 Protein Moxon SJ anon Pfam-B_7034 (release 7.7) Family This family consists of several poxvirus A21 proteins. 25.00 25.00 29.10 28.90 18.50 17.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.51 0.72 -3.67 11 60 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 46 0 0 41 0 112.80 61 97.71 CHANGED MIoLFLlLCYFILIFNIIVPsIuEKLRpEacAas+Y+pl.ppcalCVDspLhsYsFssoGlpAphhlDsss.sPLPCS+hspscst..chlsC-..stsslhch+csCu+AYh-LFh MITLFLILCYFILIFNIIVPAISEKMRRE+AAYlsYK+L.sKsFICVDDRLFSYsFTTSGIKAKhAVDscs.lPIPCS+IN-VNsN...csLhCD..pDcsDIs.sFsRSChRAYuDLFF. 0 0 0 0 +5151 PF05324 Sperm_Ag_HE2 Sperm antigen HE2 Moxon SJ anon Pfam-B_7044 (release 7.7) Family This family consists of several variants of the human and chimpanzee sperm antigen proteins (HE2 and EP2 respectively). The EP2 gene codes for a family of androgen-dependent, epididymis-specific secretory proteins.The EP2 gene uses alternative promoters and differential splicing to produce a family of variant messages. The translated putative protein variants differ significantly from each other. Some of these putative proteins have similarity to beta-defensins, a family of antimicrobial peptides [1]. 20.10 20.10 20.80 22.90 18.20 18.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.28 0.72 -4.06 4 65 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 19 0 13 61 0 68.50 59 59.33 CHANGED KphLhP.hsSLLLVALLFPG.SpApplNHpsTEuPRc.pEEusGQGsNcSpLLHHpVKRh.llPRpPPY.Es-P ..............+QRLLP.hsSLLLVALLFP.G.SpARHVNHSuTEu.pEL...REtAsGQGTNtSQLL+HsVKRt.l.PRTP.Y.t................ 0 2 2 2 +5152 PF05325 DUF730 Protein of unknown function (DUF730) Moxon SJ anon Pfam-B_7197 (release 7.7) Family This family consists of several uncharacterised Arabidopsis thaliana proteins of unknown function. 27.20 27.20 27.60 172.60 26.70 27.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.63 0.71 -4.31 3 10 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1 0 0 10 0 117.10 82 83.76 CHANGED MEIRMRMRYGENRRRDKGVPIECDCNAKVVVATS+DPVTSGKLYFSCPYEISDGPGRGCGFKRWWTVALCDEFDMIKEEpsEMKKDLEAANK+VEuQsEKIFLMEKKFETLEKKYESlNKYS .MEIRMRTRYGENRRRDKGVPIECDCNAKVVVATSLDPVTTGKLFFSCPYEISDGPGpGCGFKRWWTVALCDEFDMIKEEToEMKKDLEAANKRVESQsEKIFLMEKKFETLEKKYESLNKYL...... 0 0 0 0 +5153 PF05326 SVA Seminal vesicle autoantigen (SVA) Moxon SJ anon Pfam-B_7065 (release 7.7) Family This family consists of seminal vesicle autoantigen and prolactin-inducible (PIP) proteins. Seminal vesicle autoantigen (SVA) is specifically present in the seminal plasma of mice. This 19-kDa secretory glycoprotein suppresses the motility of spermatozoa by interacting with phospholipid. PIP, has several known functions. In saliva, this protein plays a role in host defence by binding to microorganisms such as Streptococcus. PIP is an aspartyl proteinase and it acts as a factor capable of suppressing T-cell apoptosis through its interaction with CD4 [1]. 23.10 23.10 23.10 23.20 23.00 22.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.71 0.71 -4.34 11 63 2012-10-03 02:52:13 2003-04-07 12:59:11 6 1 32 1 27 173 0 111.50 44 84.82 CHANGED sLQhLapsossThLLlLCLpLtss....cuQ-N..pppslhhshclssss....spspEsTVpLsVpTpl+ECMVlKsYLhSNhslc.GuFNYpaTuCLCsp.PpsFaWDlhsscTsplsssVDll+EhsICPDD ......................thhhpsssshhhLllhL.Lths....puQ-s..sR+hlhhshplPpos....cts-ElTssLpVpT-L+ECMV.lKsY..L..h..S..s..h..s..l.-...GuF.N.Y...p..YT.uCLCs.s.P+TFYWDhh..ssc..T....spIsuVlD..llpEhsICP-D....................... 0 1 1 6 +5154 PF05327 RRN3 RNA polymerase I specific transcription initiation factor RRN3 Moxon SJ anon Pfam-B_7041 (release 7.7) Family This family consists of several eukaryotic proteins which are homologous to the yeast RRN3 protein. RRN3 is one of the RRN genes specifically required for the transcription of rDNA by RNA polymerase I (Pol I) in Saccharomyces cerevisiae [1]. 20.60 20.60 23.90 21.60 19.60 19.70 hmmbuild -o /dev/null HMM SEED 564 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.94 0.70 -6.40 33 404 2009-01-15 18:05:59 2003-04-07 12:59:11 6 12 278 2 283 410 5 426.80 23 78.29 CHANGED sstpsschotphhcp.....hV+cALpphp................pG...-sstacplpphh..........tshptt...-s.s.sphpplLpsLtssVstLDps.sssLVp.ulLsh.pWh.s+spsh.........lctYlpFLssL.............suup.spalstllshLlspFs..spt...ssp.........t....p..phhpphHthLppllchlPsusshL.shLsppFPat.scopcshhs.YlpNLL+ltpYs...spLptclhpLlh-+llclDV..........phps-h--l........-D-ppcth.tt.t.pstppth.-.-sss...............ttp.....s.ps-sp.....................................spphpsl+phsp+LDsllshlhsahcs............shsssphsp...............................s.slFcsLlshFpshILPTapo+asQFLlFahsphpsphs-.pFlspLhclsh........s.sspss.h..R.uAssYLuSalARA+alstpplphlhshLssaLspYltpp-sss...............tssshcc.atsFYussQAlhYlFCFRa+sLh.........................sps.........hsahsslcph.Lp+slhSKLNPLKhCsPsVVstFA+lup+hsl..sYsaoI..lEpNpR...................t+lsphhuts..................t....p.........L-uaFPFDPahL.pS+ch..lcs.YlpW..ptlsspp ...................................................................................................................t....................h..hh.t........................p.........ht.hh..h.................................t.......h..hl..h.t.h..lp....t...ht.l...lp..lhph..W....th.s..th...........hp..aht..hlh.L.................sss..s.aht.shthlhpthh............................................................t.hH..hlp.lh.phhP.s...hl.sh...l....ptaPhh.....ppstt......h....Yhp.Nlltl..hh...stlt..tlhthl...hp+hlplDl......................pht.hpph..........-ct...t.................................t.....p..pt.t..................................................................................................................t..t.hp.hhpplD.hh.hhhphhp...............h...p...t.................................................................................................................................................hap.LhthFpphlLss.atopasQa..lhFahsph.p.................t...............hhc...tFhthLhp.hh..........................................s...sp...ss.h.....R.sussYluSalARuthls...hl...............h.hhphLhpah...pt..a.htp.ptts................................................................ssht.h..at.FYushQAlhYhhsFRhcplh.....................................................................pt............hth..pl.p......hpphlh..s..p..lNP..L+hC..slst.F...utlspthth....ha.hhsl..lEps.pp..........................................l..hhs.......................................................................L-saFPFDPh.L..st.p.h..lt..a..ap................................................................................................... 0 116 178 239 +5155 PF05328 CybS CybS Moxon SJ anon Pfam-B_7102 (release 7.7) Family This family consists of several eukaryotic succinate dehydrogenase [ubiquinone] cytochrome B small subunit, mitochondrial precursor (CybS) proteins. SDHD encodes the small subunit (cybS) of cytochrome b in succinate-ubiquinone oxidoreductase (mitochondrial complex II). Mitochondrial complex II is involved in the Krebs cycle and in the aerobic electron transport chain. It contains four proteins. The catalytic core consists of a flavoprotein and an iron-sulfur protein; these proteins are anchored to the mitochondrial inner membrane by the large subunit of cytochrome b (cybL) and cybS, which together comprise the heme-protein cytochrome b. Mutations in the SDHD gene can lead to hereditary paraganglioma, characterised by the development of benign, vascularised tumours in the head and neck [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.54 0.71 -4.54 48 384 2012-10-03 07:11:12 2003-04-07 12:59:11 7 5 272 30 229 486 257 123.80 29 70.69 CHANGED hsphphlPp....Pthl.GolN..-sh...PtssthcGShHWshERlluluLlPLssssahs.ss.........sslhDusLussllhHsHhGFpusIhDYlsp+haG.thtphAhhlLthuoslshhGlY.hEop-s.GlscslpplWp ...................................................................h...........................t.......st..ssttpuShHWshERllusuL...lP..Lhs...s..sahs..............................sss...hDshL...u...s....s.L...l....l...HsHh...Ghp.uslhDYl.....phhs...thp....phu.h.h.hL.h.h.h.o.sh.shhGla.hpp......p-s.GlscslpplWp....................................... 2 70 121 188 +5158 PF05331 DUF742 Protein of unknown function (DUF742) Moxon SJ anon Pfam-B_3675 (release 7.7) Family This family consists of several uncharacterised Streptomyces proteins as well as one from Mycobacterium tuberculosis. The function of these proteins is unknown. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.37 0.71 -4.48 14 710 2012-10-04 14:01:12 2003-04-07 12:59:11 6 6 167 0 269 736 6 113.50 37 80.96 CHANGED scssshVRPYslTsGRocsst...slsLholVsAtsstP.................sshtPEctpILcLCps...shoVAElAAtLcLPluVV+lLLuDLh-sGtlss+tPhs......sphPDpsLLccVlsGLRpL ....................................................................................................................s..tsshVRPYslTu.GR...Tcssh.........sLsL.sl.Vssp.s.s.ss.....t..t..........................................hsht.sE..+ppIl.p..LCpp....shS...VAEluApLcLPlGVsRVLluD.LsssGhlpl.+p.ssss..............s.....t..........ts..D.....h..sLLccVLsGLRpL.............................. 0 76 213 260 +5159 PF05332 DUF743 Protein of unknown function (DUF743) Moxon SJ anon Pfam-B_4046 (release 7.7) Family This family consists of several uncharacterised Calicivirus proteins of unknown function. 27.70 27.70 27.70 120.10 27.60 27.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.30 0.72 -4.09 4 42 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 18 0 0 36 0 104.60 56 96.36 CHANGED NhGLsLlsolANAhhEGp+l-LsupuLuhpp+hh-sEpcaNhsRLsF-...........KppFppsh-L+lpGslhRhpthRAAGaRlNPYSNGpQlahDEsttApLpSYhsFYKs .NSILGLIDTVTNTIGKAQQIELDKsALGQQR-LALpRhsLDRQALsNQ...........VEQFNKlLEQRVpGPlQSVRLARAAGFRVDPYS.sNQ.aaD-thsA.hhSY+shaKs. 0 0 0 0 +5161 PF05334 DUF719 Protein of unknown function (DUF719) Moxon SJ anon Pfam-B_7667 (release 7.7) Family This family consists of several eukaryotic proteins of unknown function. 20.80 20.80 20.90 22.30 20.70 20.10 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.36 0.71 -4.25 6 168 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 81 0 95 154 0 158.60 48 33.17 CHANGED S..puu.WG....hWGuhupSlLSoAotslATl...hTpVppthpsslGlPsPpELutpsstEcAEpstpssspt-s......-sspGots.....hssuFG.hstlsssVpshGppVloGGLDsLEhIGKKTMslLtEsDPGhhpp+pLh....N+sssLSQVLREAKc+.EchpcshpQlphEppKt..hHathLFD-YpGL .......................................su..Wu....hWG..oW.G.K.S.lLSoAoATVups..............los.VhEK...A....t..soLtI.ss....sph.up.pst...ts.cp....t.............................................ptspssss............su.u.sh...G.hh.......Ssl.o..ssV...........Q.sT..GK....oV.loGGLDALEFIGKK..TMsVlAEuDPGFK+TKsLM............pRssoLSQlLREAKEK.Ec.pphup...plshEps......sHYGhLFDEaQGL................. 0 23 29 56 +5162 PF05335 DUF745 Protein of unknown function (DUF745) Moxon SJ anon Pfam-B_5169 (release 7.7) Family This family consists of several uncharacterised Drosophila melanogaster proteins of unknown function. 22.40 22.40 22.40 22.60 22.30 22.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.37 0.71 -4.77 14 190 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 39 0 120 201 4 174.00 35 58.26 CHANGED sshs...htsss+pKuSsIApKAAp-AKsAsDuQsuAuctAupplKppLA-KAhtAA+AAEAALAGKQQll-QLEpElpEActVVpE.ppuLpsopssApuAttssppAppplpsLpshlcsApsslssh-psAssAQpElsEKsQLL-AA+pRV-hLt+QlpsA+tDa-pTKpAAhKAACAApEA+Q+A .........tttt.......hpspptussIAppAAppAKuAsssQ.uAuptAA.psKtpLAp+.AhpAApAApA.sLsGKptllcpLcpchpEActsVpcEpspLppu.pssspuAtp.......ssppAppp....lpsLpshlpsApsshsssppsAssApp-Lup.....+sphlts.A+pRl-tLpcQltsA+tDappT+pAAhKAssAA.EAppps............................. 0 34 39 84 +5163 PF05336 DUF718 Domain of unknown function (DUF718) Moxon SJ anon Pfam-B_7227 (release 7.7) Domain This family consists of several uncharacterised bacterial proteins of unknown function. 23.40 23.40 23.70 23.40 22.70 23.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.32 0.72 -4.23 61 1431 2012-10-02 00:20:33 2003-04-07 12:59:11 8 7 1252 8 330 844 242 104.20 38 92.26 CHANGED pRhuahhpLc..Psph-EYc+pHsp..l...WPEllptL+psGlpsYSIaLc.tppshLFuhhEhps.....-tshs...thupsslsp+WWshMuslhc..s.ssspPsp..........ssLpcVF+Lc .........................RhAalhplp..P-..t..hcEYp+RHst.....l....WPEl...ps...s...L..+p..p.GspsYuIaLc..pp..........p.....s.....hLFuhlEhcs................Et.c..h..s.....tl.A..so....s....lsQ+WWcaMs....clh....ssP.D..s.......o....Pss.................spLpEVFaL............................. 0 100 218 277 +5164 PF05337 CSF-1 Macrophage colony stimulating factor-1 (CSF-1) Moxon SJ anon Pfam-B_7649 (release 7.7) Family Colony stimulating factor 1 (CSF-1) is a homodimeric polypeptide growth factor whose primary function is to regulate the survival, proliferation, differentiation, and function of cells of the mononuclear phagocytic lineage. This lineage includes mononuclear phagocytic precursors, blood monocytes, tissue macrophages, osteoclasts, and microglia of the brain, all of which possess cell surface receptors for CSF-1. The protein has also been linked with male fertility [1] and mutations in the Csf-1 gene have been found to cause osteopetrosis and failure of tooth eruption [2]. 25.00 25.00 35.40 43.70 24.20 23.60 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.66 0.70 -5.06 9 118 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 29 2 35 134 0 226.10 38 95.29 CHANGED hTstGuut.sPSsshh..Gsc.lLssLLsoNhlsEEsStcsS............ahlGsGplps.pph.cuphpsSstlshphhDQc.lD.....lpcAaLhs.s.McsThchpsNTPpts..........chsSshscDapE.spAplpTh+.p........................PLt.LEthKs.........................shp-p+sLhEh-hs.hScssspshA+hsShs.........LhspGpspQc.uSssPQhPt.VFhlLsPSlILVLLsVGGLLFYph+hRSHpDPQtsDSSstpPEsSsL.TQD.DRQhELPV ......................................t.tPsssh...G...lLsshLsop..hhEEsStpsS............ph.GsGplQs..p.th.ss.hpsSs.lshphhsQp.hc.....lppshL.h.s.hcsThphpspTPptp............chtSs..hh+DhpE.spsplpohh.p........................PLt.LEth+.shp-p+s.h....-h-hs.........hScsssp......shAchsS.s.........Lhspup.ppc.uSssPp.s......LsPS....hhsluGLha............p-spts-us...Pp.t.p.PL.T...D.s..pthphPs.......................................................... 0 2 2 7 +5165 PF05338 DUF717 Protein of unknown function (DUF717) Moxon SJ anon Pfam-B_7144 (release 7.7) Family This family consists of several herpesvirus proteins of unknown function. 21.30 21.30 26.00 65.80 21.10 20.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.65 0.72 -4.02 11 20 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 18 0 0 18 0 56.00 39 70.93 CHANGED sploEpDFppChpFFs+Pl.pllspsupuLsslchscossQpl-pLsLlLDLlGTE .sploEpDFp-CtpFFs+PLpcllsssucuLsslclscSssQplEpLsLLLDLlGTE 0 0 0 0 +5166 PF05339 DUF739 Protein of unknown function (DUF739) Moxon SJ anon Pfam-B_7696 (release 7.7) Family This family contains several bacteriophage proteins. Some of the proteins in this family have been labeled putative cro repressor proteins. 19.50 14.00 19.60 14.40 19.20 13.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.91 0.71 -4.37 11 78 2012-10-04 14:01:12 2003-04-07 12:59:11 6 1 77 2 9 39 3 67.70 43 73.40 CHANGED hsaD..........YScLpG+I...............lE+YGo..............................pasFApAls..lSE+olS.hKLNsKl..................................sWpss-ItKAh-.lLulsp-.............clspYFFph ...............................hsh..............atpLtG.l...............lEKYGo....................................pYsFAhAht...LSERolS.LKLNsKl...................................W..s...tshp.hlth..p......................h.................................. 2 4 9 9 +5167 PF05340 DUF740 Protein of unknown function (DUF740) Moxon SJ anon Pfam-B_7873 (release 7.7) Family This family consists of several uncharacterised plant proteins of unknown function. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 603 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.33 0.70 -6.15 6 176 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 19 0 119 155 0 210.00 18 72.48 CHANGED ssspssp.+RhSTSCcRHP-E+.FTGFCsSCLpERLSsL-ts.......SSSuptPso.Sssul+ulFt.s.sss..............u.hPELRRsKSFSs.+...........ssAusSsu.EPQRRSCDVRs.+soLhsLF.pDD-cplsSs......sss..p.....RcshVs-.lhEE-pEh..EcD--...t................Estcll-Epspc.................hp-EEtKsMKDa.hDL-Spp.+K.ssKc....utSFasAASVFSKKLQKW+pKQKhKKcc..sGsuuutss.....................u-huhGRRSCDTDP..............RFSlDuGRlShDDstaSFDEPRASWDGpLIG+oh......sP.....hPoMhSVlEDuP......lpRuDhph......Psss.lp.pcu-pD..h..PGGSsQTRDYYhD.sSSRRR+SLDR.SsShR+hu...ltElDElKsluNucsSs...........hsppppLRDpsh.s.sNhcsEotE.su.............p.sust.cKKs+RWuK.WsIaGLIaRKsss+.......cp-p.p+hs.ushVERSLSESW.s-LRsst......GsuPKMlR.........sNSslShRS..SG.pGG.............................G.puppp.phlpp+sKsupYSs.cssENGMLRFYLTPh+uptpuuGu ..................................................t..............C.hHP.p..hsGhCs.CLp-RL..lt.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s......................................................................................... 0 11 65 90 +5168 PF05341 DUF708 Protein of unknown function (DUF708) Moxon SJ anon Pfam-B_7259 (release 7.7) Family This family consists of several uncharacterised nucleopolyhedrovirus proteins of unknown function. 21.30 21.30 21.50 41.70 20.20 19.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.33 0.72 -4.25 24 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 55 0 0 53 0 105.70 44 78.30 CHANGED phpWpllss......shlEVsPc-REpAWKDLll.sLpsoPp..oaRTtlp+AshEpFDYKpPIhY-lKp+pLhlss-plhpALs..hPp.sshsshslsshplhhs....FIhslLl ...lRWp..lLNs......DclEVsPEcREpAW+-LlIsllpsoPt..TaRThlpKAshENFDYppPIlYslKsKpLllss...EplppALN..RPt.tshsshNlsshplhLs..FIhslLL..... 0 0 0 0 +5169 PF05342 Peptidase_M26_N Peptidase_M26; M26 IgA1-specific Metallo-endopeptidase N-terminal region Studholme DJ anon Merops Family These peptidases, which cleave mammalian IgA, are found in Gram-positive bacteria. Often found associated with Pfam:PF00746, they may be attached to the cell wall. 25.10 25.10 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.59 0.70 -5.40 19 698 2009-01-15 18:05:59 2003-04-07 12:59:11 9 66 278 0 17 423 8 240.30 39 13.51 CHANGED -pl-hhp-YThsTp......shs.s-sspsct.p-p.......hp.s.KKlEL+NlsslcLhp..-NGp.ppps.LsphPs..ssssYalKVpSss.K-shLPVsSIEEssc-GpslYKlTApsscLlQ.ct-sphp-sasaYltKttsc-sslYhsFpsLVcAMppN.sGTa+LGAsLsAspVphssss+SYlpGpFoGpLlGsp-GKcYAIaNLc+PLFssl.puuolcclsLKsVNIstp.....sclAolAppspsuopIcNV+VsG .......................................plchhptYplpTph.....sasp..upus...c.o.pt.c-.p...........hplshKKl..El+..NIssspLhp.........-sGp.p..phshL...sphPs..sspphalKVpSps.KsshLsVsS.IEEssh-GpplYKlsApApcLlQ..ctssph.pppYsaYlp.K..t.sc.-ssVYhsFppLVcAMpssPsGTF+LGAslsAspV..ssss+S..Y..lpspFpGpLh.o.s-G.KpYuIaNL...c.......+P.......LFssl.pu......uTVcslsLcsVsIsh...........-slAs.lApphpssopIcNV+VsG............................................. 0 2 4 8 +5170 PF05343 Peptidase_M42 M42 glutamyl aminopeptidase Studholme DJ anon Merops Family These peptidases are found in Archaea and Bacteria. The example in Lactococcus lactis, PepA, aids growth on milk [1]. Pyrococcus horikoshii contain a thermostable de-blocking aminopeptidase member of this family used commercially for N-terminal protein sequencing [2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.74 0.70 -5.64 69 4278 2012-10-02 19:46:12 2003-04-07 12:59:11 9 6 2184 65 699 4495 777 284.40 30 81.45 CHANGED Gsllup.ppG...su..+lhluAHMDElGhMVppIccsGhL+hsslGGhsspsl.up+lplhscpG...l.GVlGstsP..Hlhc.stccppshp....hc-lhIDlGsps+--AcchGlplG-hlsh...cschthlssph.lhu+AhDsRhGsslll-ll+pLpcpph...sslahsuoVQEEVGLRGApsustplpP-hslAlDsssu.uDsssssctp.....lGpGsslphhDss.....hlhc.plpchLh-hAccpsIPhQhc.hhssGGTDAuuhphs.ttGlPouslulssRYhHS.sEhhchcDlpsshcL .....................................................................................................................................................Gulhsp..h....pu....tpsu....+lhlsAHhDElGaMVppI....c....c.s....G..hlchps..l...G..G.....a.spsh.upplplpTcp....G....plsG.ll..u..s..hss.........Hhh.........p..s.........t.........tc.........p...p........p.....hp..-lhlDl...Gs..............p...o...c.-.E...s.c.p.h.GIcsG..D.h.....ls.........csp.h...t.h........h...s...sst...lhuK.A.....h.Ds.RhGss.hllclL..c.p.L.p..s..p..p..ls...........hsla.hs.soVpEE...V.G....l............RG....A....p........s....u......s....p....p.......................l......p.P...D......l...................h....l.u...l......Dsuss....sD...s......s......ss.php........hG.pGshl.t.h.h..Dss..........hlhp.p.l..p.c.a.l.h.s.l.A.....c.....c..p.....sIPa.........Q...h.........p....h....h............s...t...........G...G..TD.A...u...u....h...p.h.s...s..sGlP...s...s.slGl...ss..RYhHo....hphhchcDhtsshpL.................................................................................................... 0 268 469 596 +5171 PF05344 DUF746 Domain of Unknown Function (DUF746) Yeats C, Eberhardt R anon Yeats C Family This is a short conserved region found in some transposons. Structural modelling suggests this domain may bind nucleic acids [1]. 23.70 23.70 23.80 31.10 23.00 23.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.76 0.72 -4.44 10 78 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 34 0 16 77 0 63.60 41 34.53 CHANGED c+hcthIRhLSpPlSlh-AA-tlGsscssltchVchFRpalLpLDPSGpaEsRlRLGsRPspsss ......c+hcLFlphLS.PlSshpAuctlGot.ssltchlphaRpalLpLDPSGphEtRlRLGs+ssphss...... 0 4 8 12 +5172 PF05345 He_PIG Putative Ig domain Yeats C anon Yeats C Family This alignment represents the conserved core region of ~90 residue repeat found in several haemagglutinins and other cell surface proteins. Sequence similarities to (Pfam:PF02494) and (Pfam:PF00801) suggest an Ig-like fold (personal obs:C. Yeats). So this family may be similar in function to the (Pfam:PF02639) and (Pfam:PF02638) domains. This domain is also found in the WisP family of proteins of Tropheryma whipplei ([1]). 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.37 0.72 -3.94 108 2176 2012-10-03 16:25:20 2003-04-07 12:59:11 7 369 532 0 988 2359 2162 49.40 32 9.08 CHANGED oaoh....................ssssLPsGLol...........sssoGsloGT....ssss...............G.sashslssossss ...................................................sh.....................ssuLP..sGLoh...........................ssu....TGslSGT.........Poss....................................G.sa.s.lsVosTDss...................................................... 0 490 754 893 +5173 PF05346 DUF747 Eukaryotic membrane protein family Wood V, Bateman A, Murphy T, Mistry J anon Pfam-B_13582 (release 7.8) Family This family is a family of eukaryotic membrane proteins. It was previously annotated as including a putative receptor for human cytomegalovirus gH [1] but this has has since been disputed [2]. Analysis of the mouse Tapt1 protein (transmembrane anterior posterior transformation 1) has shown it to be involved in patterning of the vertebrate axial skeleton. 25.00 25.00 27.70 25.10 24.10 23.60 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.26 0.70 -5.39 36 347 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 271 0 241 343 1 312.30 35 47.39 CHANGED p.....ptlsttphsDllphhlllhsshlLp..........hl.DsS+hYHhIRuQusIKLYllaNlLElsD+LhsShGpDll-sLhpssh.........................................................................................................................................................................................................ttpthhchhshahlulhYlshHuhlLlhQsloLNVAlNSasNuLLoLLlSNpFsElKuoVF......KKF-+EsLFQlotuDllERFpLhlhLhll..ulRNhhph....................................h.tuap.....................tlhushlhVluSElh.VDWlKHuaIsKFNcl+splYpcahplLs.....tDhh............................t.hsts.hls....+RlGhsshshsslhl+hhh.hhphhh....................................................................................................hthlshhhlhlhsahhLlhhKllLuhhllpaupphhp.phc ...................................................................htstphsDllp.hhl.llhsh.hhh...........hl.DhSh.hYHhIR..uQ.usIKLYllaNhLEluD+LhuuhGQDll-sLa.ptt.............................................................................................................................................................................................................................................................................................................................................................................................ptp...p...thht.h..hhhh..ul....hY...s....hhHuh....hlhhQsh......oLNVAhN..Sa.s.............puLLTlhhSNpFsEIKu...........oVFKKF-KpNL...............F.Qlo.s....uDlhERFplhlhLhll..shRNh.ph.....................................................p.ah........................lhsshhhVlsoEhhVDhl.............KHAaIsKFNpI.p.sslYpcaht.Ls.........hDhh..............................................................s....sthsptls....RRhGh.slPlusLhlRsh.hpshp.............................................................................................................hhh.hhhhhhhah..hLlhhKll.uhhLlthupph.t............................................................................................. 0 83 142 206 +5174 PF05347 Complex1_LYR Complex 1 protein (LYR family) Wood V, Bateman A anon Pfam-B_15215 (release 7.8) Family Proteins in this family include an accessory subunit of the higher eukaryotic NADH dehydrogenase complex. In Saccharomyces cerevisiae, the Isd11 protein (Swiss:Q6Q560) has been shown to play a role in Fe/S cluster biogenesis in mitochondria [1][2]. We have named this family LYR after a highly conserved tripeptide motif close to the N-terminus of these proteins. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.93 0.72 -4.17 246 1703 2012-10-01 20:54:40 2003-04-07 12:59:11 10 21 313 0 1190 2012 12 60.50 22 42.65 CHANGED pp..lLpLYRplLR..pu.pp......hsshs..........+phh...ppplRspF+c..s+s...hp.-.ppl.....pth......lppupcpLph ...................pllpLYRphLR......ts..pp........................hst.ts............pphh.........ppt...lR..ppFcp....s+s......hs..-sppI..................pth..........................lppup..h..................................................... 0 377 651 958 +5175 PF05348 UMP1 Proteasome maturation factor UMP1 Wood V, Bateman A anon Pfam-B_18845 (release 7.8) Family UMP1 is a short-lived chaperone present in the precursor form of the 20S proteasome and absent in the mature complex. UMP1 is required for the correct assembly and enzymatic activation of the proteasome. UMP1 seems to be degraded by the proteasome upon its formation 21.50 21.50 22.20 21.50 21.40 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.42 0.71 -4.19 7 345 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 278 0 233 323 2 122.10 29 80.51 CHANGED shpshthtlsuhht.hcshphGhhsshsplhssHPLcss.cN.hptpQcphphphlRph.GlthPLKhsMEpplhpphpR.P.hlsSuphth-lLsGp.-slsFEDhhNsPppuEhhh...p.HthhEhpLGl ..........................................................................s................c......t....G...........s..ht....s.p..l.h....s..s..HPLEtp.ps.aptspcphphptlRplaGhttPl+htMEhc.llp.......p........sp.+.hP..............h.L...s.....S.......S..........s..l..thDlLp..Gp--slsaED.....lh.....s..ss.p..tp..-...ht............p.Ht.hEh+L.................... 0 71 119 180 +5176 PF05349 GATA-N GATA-type transcription activator, N-terminal Finn RD anon DOMO_DM03585 Family GATA transcription factors mediate cell differentiation in a diverse range of tissues. Mutation are often associated with certain congenital human disorders. The six classical vertebrate GATA proteins, GATA-1 to GATA-6, are highly homologous and have two tandem zinc fingers. The classical GATA transcription factors function transcription activators. In lower metazoans GATA proteins carry a single canonical zinc finger. This family represents the N-terminal domain of the family of GATA transcription activators. 20.30 20.30 20.60 20.30 19.50 20.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.77 0.71 -4.10 16 190 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 48 0 70 154 0 155.00 39 41.19 CHANGED MYQo....LAluuspG.s..uYs.ss.GuFlHS.........s..AoSPVYVPToR.VsoMl.sLP.....YLQsstsuppu.................ps..huuHsuWuQs.uu-os.uassuS................sHsPsu.......Fsau.pSPPhuuuuu....RDsu.............YpusLhhss.u...R.-QYush..sRsluGSYsSs......YsAYhoP-lu...s.SW......suGPFDuSV.LHuLQuRs.ushsu.R+ssh..-hL ..................................................................................MYQo....LAhsus...u.s....uYt.ss...usF.hHu...........s..usSPVYVPosR.Vsuhh.sLs.....Y.Lpsstsu.ts.....................ts...supssWs.Qs.us-us.sass.....................ssst.......F.sas...os.....s..hss.usu......R-su...............apu...sh.....u....R.-QYut....RshsGoYsos..........Ys.....s.Yhu...s.slu...s.uW.......suGPF-.usV.LHuLtuRs.ss.ss.Rtssh............................................................... 1 4 12 30 +5177 PF05350 GSK-3_bind Glycogen synthase kinase-3 binding Finn RD anon Pfam-B_18811 (release 7.8) Family Glycogen synthase kinase-3 (GSK-3) sequentially phosphorylates four serine residues on glycogen synthase (GS), in the sequence SxxxSxxxSxxx-SxxxS(p), by recognising and phosphorylating the first serine in the sequence motif SxxxS(P) (where S(p) represents a phosphoserine). Interaction of GSK-3 with a peptide derived from GSK-3 binding protein (this family) prevents GSK-3 interaction with Axin. This interaction thereby inhibits the Axin-dependent phosphorylation of beta-catenin by GSK-3 [1]. 25.00 25.00 26.70 32.10 22.90 24.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.62 0.70 -4.89 4 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 38 10 34 79 0 160.30 40 81.67 CHANGED MPCR+E.................SFLLLpQSVTlG.SsEVDpLVupIGEsLQLcsApcoPsSssts.G........hhAuhPss+uGs..................ssGuhRChhhcptpVR.GRuuPYsVs.sssGuSshs.p.h.................t.C+RGWhR..sssR+.........ts+stD-DDPHcLLQpLlLSGNLIKEAVRRLp.....huucsPsps.PGsh ...............................................MPst+c.............................salLLpQSVTls.StEVDtLVspIGEsLQLcssps.s.t.o.ss..G..............ush.sspst.............s..........tshtChhhcptthR.sRuuPY......sGsos................................+sW.R.......+c......................t.s+ss-.DDPH..cLLQpLlLSGNLIKEAVRRLp.t.................h............................................. 0 5 6 12 +5178 PF05351 GMP_PDE_delta GMP-PDE, delta subunit Finn RD anon Pfam-B_13828 (release 7.8) Family GMP-PDE delta subunit was originally identified as a fourth subunit of rod-specific cGMP phosphodiesterase (PDE)(EC:3.1.4.35). The precise function of PDE delta subunit in the rod specific GMP-PDE complex is unclear. In addition, PDE delta subunit is not confined to photoreceptor cells but is widely distributed in different tissues. PDE delta subunit is thought to be a specific soluble transport factor for certain prenylated proteins and Arl2-GTP a regulator of PDE-mediated transport [1]. 25.00 25.00 30.20 30.10 18.20 18.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.85 0.71 -4.42 23 362 2009-09-11 15:38:04 2003-04-07 12:59:11 6 4 137 20 215 325 4 144.60 44 72.54 CHANGED pspsNlasI-Fs+FpIRDhETGpVLFEls+ssss...........................................tt..p...ssuRhl+YpFsPpa.....................L+l+oV....GAs.lpFoV.G-cPlpsFRMIERHYF+spLLKoFDF-FGFCIPpScNThEpIYEhPslupphhppMlpsPaETpSDSFYFV-s+LlMHNKA-YuYs ....................spt.lhtlchspFplRDh-oGpVLFcls+s.ss........................................................................tsGRhlcYpFssth...........................L+h+sV....u..sp.lpFos.uscslpsFRhlERHYF+sp..lL+.....sa-F-FGFsIPs.SpNThEplYEhP.shSpth.tphlps..psps-ohaFs.-spLlh+s+sch.Y....................................... 0 80 100 155 +5179 PF05352 Phage_connector Phage Connector (GP10) Finn RD anon Pfam-B_13828 (release 7.8) Family The head-tail connector of bacteriophage 29 is composed of 12 36 kDa subunits with 12 fold symmetry. It is the central component of a rotary motor that packages the genomic dsDNA into pre-formed proheads. This motor consists of the head-tail connector, surrounded by a 29-encoded, 174-base, RNA and a viral ATPase protein [1,2]. 25.00 25.00 31.80 30.90 19.30 19.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.89 0.70 -5.19 4 23 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 22 39 0 24 2 271.50 30 88.42 CHANGED SYKo....Is-IQRp+....tNR..WFhaYhpYLhSLAYQhFEWEsLPsolDP.FLEKplHQhGaVuFYKDshhGYIAspGsLSGplshYNQPshap...ASSssYQKpFcLY.....Yc...Dh+Ecs.G......lVIYNNshthPTlshLELFAtcLAELKEhItVNQNAQKTPVlItAsDNN.LShKplYNpYEGNtPVIFspcphD..........oDsIcVFKTDAPYVlDKLsspKpsVWNEhMTFLGIpNANh-KKERhVsSEVpSNs-QIpuSuslaLKuRpEACchINEhYGLNlpVKhRh-IV ....................................................h...............................p..lpshhhplFpaEshPsslssh.LEp.l+Q.G.hVshh+Dthhs.hlhh.tshssthshYsp..s..shFp......uss.sa.pph...clhp....ap...chp.ps.s..............VVhhN....Nsh.hs.ssh-llEaYspcLA-lcto.hplNhpsp+sPhhItus-sN...plSlppLhsclpsGsPhl..hspcshs..........sDs..I-lh....s.ssssshls.cLppphpsphsEhhTaLGIpNssh-KKERhlscEApSNsphlpususIYhKuRpcAlchlNctYGLsIKsp................. 0 0 0 0 +5180 PF05353 Atracotoxin Delta Atracotoxin Finn RD anon Pfam-B_30981 (release 7.8) Family Delta atracotoxin produces potentially fatal neurotoxic symptoms in primates by slowing he inactivation of voltage-gated sodium channels [1]. The structure of atracotoxin comprises a core beta region containing a triple-stranded a thumb-like extension protruding from the beta region and a C-terminal helix. The beta region contains a cystine knot motif, a feature seen in other neurotoxic polypeptides [1]. 25.00 25.00 35.00 76.10 20.80 19.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.72 0.72 -4.12 5 7 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 3 0 8 0 42.10 65 76.62 CHANGED CA+KRuWCuKTEDCCCPMKCIYAWYNQQSSCQsTISulFK.cC CA+KRsWCuKsEDCCCPMKCIYAWYNpQuSCQsTIoulaK.cC 0 0 0 0 +5181 PF05354 Phage_attach Phage Head-Tail Attachment Finn RD anon Pfam-B_59968 (release 7.8) Domain The phage head-tail attachment protein is required for the joining of phage heads and tails at the last step of morphogenesis [1]. 20.80 20.80 20.90 22.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.38 0.71 -4.53 6 548 2012-10-01 22:58:23 2003-04-07 12:59:11 6 2 289 2 4 217 26 112.70 74 98.42 CHANGED MuDFDNLFDsAlutADcsIlcsMGhsAslTSGpLpGuplsGVFDDPEsISaAuuGlRlEsSsPoLFVKTuslspL+RsDTLTIss-sFWVDRIoPDDGGSCaIhLsR.GpPPsssRRR ..............................MtDFpNhFDAAlAtsDcTIhthMGhS..AphT..SGtQsGu.lpGVFDDP.EsluaAGpGVRlEGSSPSLFVRTDsVRtlRRGDTLTIstE......sFWV.....DR.lSP.DD.GGSC.....aLWLs..R..G.PPAVNRRR..................... 0 0 0 1 +5182 PF05355 Apo-CII Apolipoprotein C-II Finn RD anon Pfam-B_6456 (release 7.8) Family Apolipoprotein C-II (ApoC-II) is the major activator of lipoprotein lipase, a key enzyme in the regulation of triglyceride levels in human serum [1]. 22.10 22.10 22.10 24.50 18.80 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.69 0.72 -4.11 6 40 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 29 4 19 42 0 73.00 46 75.96 CHANGED AphsQQDEssSPALLsphQESL.SYW-SAKAAAQcLYpKTYLPAVDEKIRDlYSKSTAAlSTYAGIFTDQlLShL+G- ............p.sQpDE.sSsuLLopVQESLhSYW-oAKssApsLYcKTYlsslDEKlRDhYSKoosAhoTYsGIhTDQlhplLtGc.................... 1 1 2 6 +5183 PF05356 Phage_Coat_B Phage Coat protein B Finn RD anon Pfam-B_51500 (release 7.8) Family The major coat protein in the capsid of filamentous bacteriophage forms a helical assembly of about 7000 identical protomers, with each protomer comprised of 46 amino acid, after the cleavage of the signal peptide. Each protomer forms a slightly curved helix that combine to form a tubular structure that encapsulates the viral DNA [1]. 25.00 25.00 28.90 28.70 23.60 22.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.82 0.72 -4.29 2 14 2012-10-01 20:22:05 2003-04-07 12:59:11 6 1 13 15 2 14 0 80.70 65 99.56 CHANGED MKuMKppIAKFsPVtSFRNLCIAGoVTAAoShPsa.AuVIDTSuVEpAITDGpuDMpsIGGYIVGALVILAVAGLIYSMLRKA ..............MKQpIAKFsPVsSFRN..LCIAGoVT.AAoShPAF..A.....GVIDTSAVEuAIT-GQuDMcuIGGYIVGALVILAVAGLIYSMLRKA.... 0 1 1 2 +5184 PF05357 Phage_Coat_A Phage Coat Protein A Finn RF anon Pfam-B_7225 (release 7.8) Domain Infection of Escherichia coli by filamentous bacteriophages is mediated by the minor phage coat protein A and involves two distinct cellular receptors, the F' pilus and the periplasmic protein TolA. These two receptors are contacted in a sequential manner, such that binding of TolA by the extreme N-terminal domain is conditional on a primary interaction of the second coat protein A domain with the F' pilus [1]. 20.80 20.80 22.80 23.00 20.70 19.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.11 0.72 -4.39 3 41 2009-01-15 18:05:59 2003-04-07 12:59:11 8 3 15 21 0 89 0 63.30 45 24.64 CHANGED MKKllhAlshslPFYTH.........sATTsssCLuKPuhEsSho.NVWK-sco..RYANaEGCLashTGlVlss ....hpK...hhshslPhYoa............susspcsC.uK..............P....shEsShs.Nsap.sps..RYtNapGCLasATGVVVss.... 1 0 0 0 +5185 PF05358 DicB DicB protein Bateman A anon Bateman A Family DicB is part of the dic operon, which resides on cryptic prophage Kim. Under normal conditions, expression of dicB is actively repressed. When expression is induced, however, cell division rapidly ceases, and this division block is dependent on MinC with which it interacts [2]. 25.00 25.00 25.30 37.10 20.80 19.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.78 0.72 -4.49 3 416 2009-09-11 11:17:21 2003-04-07 12:59:11 6 1 215 0 1 106 0 61.40 74 95.88 CHANGED METLLPNVNTSEGCFEIGVllSNKsFTEDAINpRKhEp-LLN-lCIlSMLARL+Lh.KGp+Q METLLPNVNTSEGCFEIGVplSN.sFTEDAINpRKaEp-LLNclCIlSMLARL+Lh.KGptQ... 0 1 1 1 +5186 PF05359 DUF748 Domain of Unknown Function (DUF748) Yeats C anon Yeats C Family \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.73 0.71 -4.21 168 1412 2012-10-03 05:41:17 2003-04-07 12:59:11 6 20 431 0 618 1573 145 150.40 17 38.87 CHANGED htlplsplplp.s.upl..pasD.....pphs..shp..hplpsLshpl.sslu.ot.s.sp......sslp..l....puplspt...uslphpGpls..shs.hpsp..lplshc...slsLsshpPY.hsph.hshplppGpLsh....cLphp....hp.ps.pl.pspsplt..lc....pLp..ls-.....ps.ss.....pshh...sLs....l .........................................................................................lplsplplp.s..Gpl....papD...............tthp...shp......hplsslsh....sl...ssl..s..sh....s...sps.....................sslp....l...................pup.lsst...................uslshpGpls.........shs....php.........hplphp..slsLs.s....hp.sY....hsp.......h.....h.......sh....pl..p....p..GpLsh.......clphp........................hp.ps..pl..p.s...p...s...p.lt..ls....plp..ltp.....t..ps...................hh.......................................................... 1 137 349 518 +5187 PF05360 YiaAB yiaA/B two helix domain Yeats C anon Yeats C Domain This domain consists of two transmembrane helices and a conserved linking section. 20.40 20.40 20.40 20.50 19.80 20.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.30 0.72 -4.42 19 1736 2009-01-15 18:05:59 2003-04-07 12:59:11 9 3 843 0 190 635 32 52.40 37 64.76 CHANGED ahshsasuhlluhshhhlGlasus.hpLstKGaahhshhhslhuslslpKssRD ..........ahhhuhhshllulshhllGLW.sus...h...hLuEKGYahsslhhulFushuhQKshRD...... 2 43 112 151 +5188 PF05361 PP1_inhibitor PKC-activated protein phosphatase-1 inhibitor Finn RD anon Pfam-B_69711 (release 7.8) Family Contractility of vascular smooth muscle depends on phosphorylation of myosin light chains, and is modulated by hormonal control of myosin phosphatase activity. Signaling pathways activate kinases such as PKC or Rho-dependent kinases that phosphorylate the myosin phosphatase inhibitor protein called CPI-17. Phosphorylation of CPI-17 at Thr-38 enhances its inhibitory potency 1000-fold, creating a molecular switch for regulating contraction [1]. 20.70 20.70 21.10 22.10 20.30 20.60 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.88 0.71 -4.51 8 231 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 78 4 125 200 0 108.80 38 79.73 CHANGED MAup+lG+Rhppp.pSss+upu..pustslQ+RpARVTVKYNRKELQRRLDVEKWIDspL-ELYpG+E--MPE.EVNID-LLDLpoDE-Ro++LpslLpuCsssTEsFIsELLt+L+GL+KQptLpppGlchPp.ph.sph............cs ............................t.........................................t......+.+..pu+lTVKYsR+cL...p+...RLslEcWI.ppLpcLYp.....s..pE.-.-.......hP-...E.....IDlD-LLDhpo--pRsp....+l.p.clL...hs....Ch...+P...T....E...s...FIp-LLs+l+Ghp+.s.......................tp................ 2 18 29 61 +5189 PF05362 Lon_C Lon protease (S16) C-terminal proteolytic domain Studholme DJ anon Merops Domain The Lon serine proteases must hydrolyse ATP to degrade protein substrates. In Escherichia coli, these proteases are involved in turnover of intracellular proteins, including abnormal proteins following heat-shock. The active site for protease activity resides in a C-terminal domain. The Lon proteases are classified as family S16 in Merops. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.21 0.71 -5.02 25 7706 2012-10-03 01:04:38 2003-04-07 12:59:11 8 39 4472 58 2231 8849 3459 174.80 32 25.91 CHANGED slsscsLccaLGh.+F+hspA-ccDpVGlVTGLAWTpVGG-lLsIEushhP..GKG.+LplTGpLGDVMKESApAAhSYlRS+ApchGIcscha-cpDIHlHVPEGAsPKDGPSAGlsMsTALVSsLTGhsV++DVAMTGEITLRGRVLPIGGLKEKlLAA+RuGIKpVIlPc-Nc+DL.c-lPcsl+csLclhsVcplD-VL+hALst .....................................................................................................................h........................................................................................G...hh.s..........................................G.....h....h.........lE.....s...........................................................t.........p...............s.........t..............h..............h...o...G..........h........t.............p.......................h.......p....c.....u........h..................s...........t.....h....hp....................t.......................................................................................................................................................................................h...........t................t.....................p...h....H.lp...h...s.t...u.uh....s..h..D...G......PS..A.G..lsh..s..s..A.l...l.............S...s...L.....o...........s.......t.............s...l......c....p....s......l....A.......h...T...G-Iol...pG.c.VhPI..G.G.lcEKl........l....A........A..p.......R.......u.......G.h.......c...t...l.....l.....l......P...............p....p...............N........t.+......-......l......p..............-..............l.........s....p......t.....l..........p....p....t.h.....p.....lh.s.V.p.p.lc-slphh..t..................................................... 0 804 1468 1899 +5190 PF05363 Herpes_US12 Herpesvirus US12 family Finn RD anon Pfam-B_62991 (release 7.8) Family US12 a key factor in the evasion of cellular immune response against HSV-infected cells. Specific inhibition of the transporter associated with antigen processing (TAP) by US12 prevents peptide transport into the endoplasmic reticulum and subsequent loading of major histocompatibility complex (MHC) class I molecules [1]. US12 is comprised of three helices and is associated with cellular membranes [1]. 21.00 21.00 23.70 23.50 20.00 17.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -9.79 0.72 -3.47 2 19 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 1 0 19 0 80.40 53 95.56 CHANGED MSWALchsDhFLDs.Rss.RTYuDVpsEIpKRtREDREAARTAVpDPEhPLLpsPslhs-.As....ptohGsA+pppttsts.uP ......MuWALchsDsFLDs.Rss.RTYADVRcEIsKpuRED.REAARTAVtDPERPLLpsPulhP-hAs....sAohGsA+pppttshh.uP............ 0 0 0 0 +5191 PF05364 SecIII_SopE_N SecIII_SopE; Salmonella type III secretion SopE effector N-terminus Finn RD, Moxon SJ anon Pfam-B_18665 (release 7.8) Domain Salmonella typhimurium employs a type III secretion system to inject bacterial toxins into the host cell cytosol. These toxins transiently activate Rho family GTP-binding protein-dependent signaling cascades to induce cytoskeletal rearrangements. SopE, one of these toxins, can activate Cdc42 in a Dbl-like fashion via its C-terminal GEP domain Pfam:PF07487 [1]. This family represents the N-terminal region of SopE. The function of this domain is unknown. 19.10 19.10 21.80 45.70 18.80 18.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.40 0.72 -4.15 2 141 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 106 0 2 45 0 70.20 79 31.04 CHANGED TpITLSsQpaRIp+p-sp.lKEKoTEKs.hAKSIhAV+NpFIpLpopLS-RF..HppT-.ssTHFHRGsASEGR .TNITLSTQHYRIHRSDVEPVKEKTTEKDIFAKSITAVRNSFISLSTSLSDRFSLHhQTDIPTTHFHRGSASEGR........... 0 0 0 1 +5192 PF05365 UCR_UQCRX_QCR9 Ubiquinol-cytochrome C reductase, UQCRX/QCR9 like Finn RD anon Pfam-B_18986 (release 7.8) Family The UQCRX/QCR9 protein is the 9/10 subunit of complex III, encoding a protein of about 7-kDa. Deletion of QCR9 results in the inability of cells to grow on grow on-fermentable carbon source n yeast [1]. 20.40 20.40 20.40 20.70 20.20 19.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.59 0.72 -4.33 33 239 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 211 52 166 258 1 53.30 36 49.35 CHANGED hsslYpslFRRNSsalssIhsGAFsFEh.AFDsusspla-shN+GKhWKDI+tcY. ......h.ptlYphlh+RsSsassslhsGAFhFEt.uaDtusspla-phN+GKhWKDI+tph...... 0 51 86 138 +5193 PF05366 Sarcolipin Sarcolipin Finn RD anon Pfam-B_33603 (release 7.8) Family Sarcolipin is a 31 amino acid integral membrane protein that regulates Ca-ATPase activity in skeletal muscle [1]. 26.40 26.40 26.80 65.70 20.60 26.30 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.43 0.72 -4.54 2 17 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 17 1 9 12 0 30.60 87 100.00 CHANGED MthsTpELFlNFTlVLITVlLMWLLVRSYQY MthsTRELFLNFTlVLITVILMWLLVRSYQY 0 1 1 1 +5194 PF05367 Phage_endo_I Phage endonuclease I Finn RD anon Pfam-B_22152 (release 7.8) Domain The bacteriophage endonuclease I is a nuclease that is selective for the structure of the four-way Holliday DNA junction [1]. 25.00 25.00 25.60 25.40 22.60 21.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.73 0.71 -4.81 6 64 2012-10-11 20:44:44 2003-04-07 12:59:11 6 1 59 24 2 65 330 136.50 49 98.32 CHANGED MA..uahAKthppVtAaRSGLE-KsuKQLEuKGlKh-YEpahlPYVlPASsH+YTPDFlLPNGIhVETKGLa-o-DRKKHLLIREQHPELDIRhVFSSSRTKLYKGSPToYu-WCEKaGhpFADKLIPVtWL+EsspclPhshLKsKKGcK .........................................thtts.uaRSGLE-+lucpL-spGlpa-YEphclsYllPtp.sHpYTPDFlL.P.N..G..IhlEoKGha........-s-DR+KHLLI+EQaP-.L.DIRhVFSsSpo......KlYKGS.ToYu-WC-KaGhha.A.D.K....h.IPs-WL+E.ph.hs.shhh.ht........................ 0 0 0 1 +5195 PF05368 NmrA NmrA-like family Finn RD anon Pfam-B_25329 (release 7.8) Family NmrA is a negative transcriptional regulator involved in the post-translational modification of the transcription factor AreA. NmrA is part of a system controlling nitrogen metabolite repression in fungi [1]. This family only contains a few sequences as iteration results in significant matches to other Rossmann fold families. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.33 0.70 -5.11 25 4570 2012-10-10 17:06:42 2003-04-07 12:59:11 8 40 1637 75 2377 19410 5486 230.00 20 76.81 CHANGED ILVlGuTGh.GptlVcAulc.....sGapshALsRcs.................cschhpshpstGspll.pGD.ls...........................-+pSLscAlKtVDlVhssss........stpltpthpllcAhKcAG.VK+Fl.Sphuscsc..tspuh.PuhstF-pKtplc+hlcut......GIPaTaVhsshFss.Flsshst...ttsssspsphslhususscslhh...sEcDlGsaslpslcD.Pcph.pshhlphssshLohsElsslapKt...lGKs.........................................................V+Ysp ...................................................................................................lhlhG.A.T...G.p...h...G...t......t...l....l.c.......s.L.lp....................ss...h......p.....l...h.......s.....l.......s...R..ss............................................................p.p..........h...p.....t.....l.......t........t......t.......G.......s..........p........l.......h......p......u..D...h...s................................................................................s........s...u....L........t...p.....A.....h........p............G.......s.......-.......s......l........h...h..h...s..s...................................stch.....t.......p.....t......t.........s......l....l.......c.........A...........A....p........p...........A.......G.....V.....c.........+........h..........l..............h..............o..........s.........h.........s........t.........s........s.........t..............s...........................................................................h........s...........h........h.....s............t.........+......h....p.......h....c....c....h...l.....p.pp........................s.l...s...a....T...h..l......p........s.....u.......h.......a...h.............c................h.......h...s.....hh........................h.t....t............................h.......h.....h.h......s......s...s..s....t..hshh.....................sttD.h...u....t....h....s....s.t........l....l....p.............p.....s.....t....p.......h....................s.....p....h...h.........h.....s...s..........p........t........h........o......h.p......c.l.sthhpch....h..G..pp...................................................................................................................................................................................................................................................................................................... 0 562 1309 1950 +5196 PF05369 MtmB Monomethylamine methyltransferase MtmB Finn RD anon Pfam-B_58618 (release 7.8) Family Monomethylamine methyltransferase of the archaebacterium Methanosarcina barkeri contains a novel amino acid, pyrrolysine, encoded by the termination codon UAG [1,2]. The structure reveals a homohexamer comprised of individual subunits with a TIM barrel fold [2]. 25.00 25.00 148.90 148.60 16.50 16.50 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.52 0.70 -6.32 3 22 2009-09-11 12:54:46 2003-04-07 12:59:11 7 1 14 0 15 29 4 418.70 52 99.42 CHANGED TFRKSFDCYDFYDRAKVGEKCTQDDWDLMKIPMKAMELKQKYGLDFKGEFVPTDKDMMEKLFQAGFEMLLECGIYCTDTHRIVKYTEDEIWDAINNVQKEFTLGTGRDAVNVRKRSVGDKRKPIVQGGPTGSPISEDVFMPVHMSYALEKEVDTIVNGVMTSVRGKuPIPKSPYEVLAAKTETRLIKQACAMAGRPGMGVXGPETSLSAQGNISADCsGGMpSTDSHEVSQLNELKIDLDAIAVIAHYKGNSDIIMDEQMPIFGGYAGGIEETTIVDVATHINAFVMSSASWHLDGPVHIRWGSTNTRETLTIAGWACATISEFTDMLSGNQYYPCAGPCTEMCLLEASAQSITDTASGREILSGVASAKGVVTDKTTGMEARMMGEVARATAGsEISEVNKILDKLVuLYEKNYASAPAGKTFQECYDVKTVTPTEEYMQVYDGARKKLEDLGLVF ...................tKphssYDaaDRA+sGEKsopDDW..DlMpIPhKuhELKpKYsl-F.c.s.E.FlPTDcDMh-+LFpAGhEMLlECGIYCTDTcRllKYTEDEIh-AIsNs.KEasLGpG+DulsVhcRuhsDp...p...tPllQGGPTGuPlSEDlFhslHhSYAhE..VD.sI.VsGVhpolpG+sshPtSPaElhAu+oEsRLlKpAsthAGRPGMultGPETuloupGsl.uuDssGGhspsDSHEVSQLsELKIDl-AlshhAHYpssuslIMsEQMPIFGGYA.GGl.EETsIVDVATpINuhlMosAoaHLDGPVHlRWGSTsTREsLhlAGassusloc...Tch..lsGNQYYssAGPCTEMCLLEsuAQuITDTASGRElLSGVAuAKGVssD+oTGhEARhMGElARAsAGh-lo..ElNtIL-+LVuhYE...c...sassAPt...GKsFpECYDVpT...lsPT-EYhplYDtAtKcLc-lGL.............. 0 5 10 11 +5197 PF05370 DUF749 Domain of unknown function (DUF749) Finn RD anon Pfam-B_54547 (release 7.8) Family Archaeal domain of unknown function. This domain has been solved as part of a structural genomics project and comprises of segregated helical and anti-parallel beta sheet regions. 25.00 25.00 74.80 74.60 21.20 20.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.58 0.72 -4.04 5 27 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 27 2 19 26 2 87.80 44 89.91 CHANGED FlAsLlGIhTlKE.LsEEhcsFV+lKAulDK+ELcDDsclAIlNIcGTTSYaVlFLDsssSlEEI++ELEEs..GAclN+sScpIL++aL FlAsLluIhslcE.Lss.EhpsaVcl+Aul-cR.EL.ccsDplAILNIpGTsSYpVhFlDp.ssI-cIKc-Lcch..sAclNasSccIl+pal 0 4 9 15 +5198 PF05371 Phage_Coat_Gp8 Phage major coat protein, Gp8 Finn RD anon Pfam-B_31655 (release 7.8) Family Class I phage major coat protein Gp8 or B. The coat protein is largely alpha-helix with a slight curve [1]. 19.80 19.80 19.80 21.50 18.70 19.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.63 0.72 -4.41 7 29 2012-10-01 20:22:05 2003-04-07 12:59:11 7 1 21 13 4 45 1 51.20 51 69.77 CHANGED AEssss...AptAhDSLpspAT-hIu.sWshVsslVsAsluIKLFKKFsSKA ......................ss.....AtsAhDuLtspAs-hIGhsWslVVslVGAsluI+LFKKFoSKA 0 0 3 3 +5199 PF05372 Delta_lysin Delta lysin family Finn RD anon Pfam-B_45919 (release 7.8) Family Delta-lysin is a 26 amino acid, hemolytic peptide toxin secreted by Staphylococcus aureus. It is thought that delta-toxin forms an amphipathic helix upon binding to lipid bilayers [1]. The precise mode of action of delta-lysis is unclear. 25.00 25.00 62.60 62.50 17.80 16.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.34 0.72 -6.77 0.72 -4.58 6 86 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 79 3 2 18 0 25.00 94 64.33 CHANGED MAuDIISTIuDFVKLIl-TVpKFTK MAQDIISTIGDLVKWIIDTVNKFTK 0 1 1 2 +5200 PF05373 Pro_3_hydrox_C L-proline 3-hydroxylase, C-terminal Finn RD anon Pfam-B_32425 (release 7.8) Domain Iron (II)/2-oxoglutarate (2-OG)-dependent oxygenases catalyse oxidative reactions in a range of metabolic processes. Proline 3-hydroxylase hydroxylates proline at position 3, the first of a 2-OG oxygenase catalysing oxidation of a free alpha-amino acid. The structure contains conserved motifs present in other 2-OG oxygenases including a jelly roll strand core and residues binding iron and 2-oxoglutarate, consistent with divergent evolution within the extended family. The structure differs significantly from many other 2-OG oxygenases in possessing a discrete C-terminal helical domain. 25.00 25.00 26.90 26.30 20.50 18.40 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.21 0.72 -3.98 5 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 4 5 15 0 100.30 26 34.00 CHANGED spcEshtDsE..sssssPsVcsRKPaT-EscEuILu.lStlIu+tNFRDIVFlLSKlHFpYKVHssEsYDWLlEIuKRsGD-cLVEKAppl+RFhltcRAlGE ...................hshp..ssthpstlssRsths-pphEpLlu.lSplloctNaR-llhhLuKlHFp+-sssspsaDWLp-lA+RoGDsAllpKAcplRcahlpcRshsE......... 0 2 4 5 +5201 PF05374 Mu-conotoxin Mu-Conotoxin Finn RD anon Pfam-B_34209 (release 7.8) Family Mu-conotoxins are peptide inhibitors of voltage-sensitive sodium channels [1]. 19.30 19.30 20.50 20.40 18.20 14.80 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.30 0.72 -4.14 4 7 2012-10-01 22:06:18 2003-04-07 12:59:11 7 1 5 6 0 12 0 21.60 60 58.75 CHANGED RDCCTPPKKCKDRpCKPh+CCA ..R.CCssPKpC+sRpCKPp+CC. 0 0 0 0 +5202 PF05375 Pacifastin_I Pacifastin inhibitor (LCMII) Finn RD anon Pfam-B_35181 (release 7.8) Family Structures of members of this family show that they are comprised of a triple-stranded antiparallel beta-sheet connected by three disulfide bridges, which defines this as a novel family of serine protease inhibitors [1,2]. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -8.66 0.72 -4.44 36 190 2009-01-15 18:05:59 2003-04-07 12:59:11 8 32 46 12 121 244 4 37.20 36 11.86 CHANGED pC..sPGpsaK.p-..CNsCpCstsGh...ssCTh+uC.st.httsss .............sGpsa+..pD..CNsCpCs.sGp...hsCTpphCsst......s....... 0 66 91 112 +5204 PF05377 FlaC_arch Flagella accessory protein C (FlaC) Studholme DJ anon Manual Family Although archaeal flagella appear superficially similar to those of bacteria, they are quite distinct[1]. In several archaea, the flagellin genes are followed immediately by the flagellar accessory genes flaCDEFGHIJ. The gene products may have a role in translocation, secretion, or assembly of the flagellum. FlaC is a protein whose exact role is unknown but it has been shown to be membrane-associated (by immuno-blotting fractionated cells) [2]. 27.00 27.00 27.00 33.50 26.80 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.53 0.72 -4.07 10 60 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 54 0 41 64 2 55.90 39 17.60 CHANGED RlsELENcls+lsss...lsol++ENp-l+sslEcl-EsV....+-llsLYElVSp...plNP.Fl ..............RlsElEscls+lcss...lssl+pENcpl+csl-cl--sl....+clhsLYElVSp...pINP.Fl.......... 0 5 21 32 +5206 PF05378 Hydant_A_N Hydantoinase/oxoprolinase N-terminal region Yeats C anon Yeats C Family This family is found at the N-terminus of the Pfam:PF01968 family. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.01 0.71 -4.76 64 2308 2012-10-02 23:34:14 2003-04-07 12:59:11 8 31 1319 0 1145 2398 1126 178.40 28 21.44 CHANGED plGIDhGGTaTDslhhctsp......................thltshKlhoo......csshpGlppulpphttp.........hsplshlhhuTTluTNAlLEpcG............spsuLlsstGapDhlphshttp.th................ps.h.hc.hltlstRhss.c............................Gp.lpsl.D.ptlcphlpplpsptlpulAVshhaShtNPpHEhpltcllcchs ..................................lulDhGGTFTDslh...hcss....................................................stltshKl.os.........................cs...sspGlhpslpphh.tp..h.t..................hspls.......t..lthGT...........T...luTNAllER+G............s+s.u.Ll..s.o..p..G.a.c.D.h.lt.l.s..p......p..t...p..s..ph......as.....h...............................s...h.hhp..p.......hh..t.l.spRlsh.c.................................................................G.pll.p.sl...-.t.p.p.lcpt.l.p....p.l....t....s.........p..G..lc..ulAlshhaSa..t............p...s..p..H..Etpltclhpc..h......................................................................................................... 0 363 745 976 +5207 PF05379 Peptidase_C23 Carlavirus endopeptidase Studholme DJ anon Manual Domain A peptidase involved in auto-proteolysis of a polyprotein from the plant pathogen blueberry scorch carlavirus (BBScV)[1]. Corresponds to Merops family C23. 25.00 25.00 33.70 43.10 24.50 22.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.85 0.72 -4.01 12 125 2012-10-10 12:56:15 2003-04-07 12:59:11 6 8 50 0 0 124 0 87.30 35 5.41 CHANGED .KNsCVIcAlupslpRpsscllpVLpcpsscclhc-ltpGtGlphhpLp.lFpsFDIsApVs.suchhhlNspGphptpFtlps-HhSa ....hNsCslpAlAsALsR+ps-VLslLscsss.sclh-pLtpGtGlshhhLtphFchFsIsAplshsGchhllNspG+lpuhFclps-Hlpa.. 0 0 0 0 +5208 PF05380 Peptidase_A17 Pao retrotransposon peptidase Studholme DJ anon Manual Family Corresponds to Merops family A17. These proteins are homologous to aspartic proteinases encoded by retroposons and retroviruses. 20.60 20.60 20.60 20.60 20.50 20.20 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.17 0.71 -4.38 8 650 2009-01-15 18:05:59 2003-04-07 12:59:11 8 62 40 0 609 764 6 127.10 25 12.22 CHANGED PTKRElLSplu+lFDPLGhluPlhl+uKlhhQElWpps......luWD-plPp-lppcWtsalcpLssLpQlRlPR.alshssspslpLHuFsDASpcAYuAAlYhRs.p.sssIpVsLlsAKTRVAPlKss.SIPRLELsAAlLLoRLssslpsphshtsschhsWT .................................................................o+Rtlhs.huphaD.PhGhhss..........hhl.hKhhhp.plhpt.t..........hsWc..p...ls..t..ph.......ppW.thhp..ph.......t.......p.t.h..p.hs...R...h..h.................t........t......p......h...p...lh...hFsDAS.p..tuausss....Ylhh..........t.t..t......s.p.h.lh..uKs+l.tP..h..c...t........ol..P.....+hEL.uh.hssph.....l.pth..t........................................................................ 0 393 436 605 +5209 PF05381 Peptidase_C21 Tymovirus endopeptidase Studholme DJ anon Merops A17 Family Corresponds to Merops family C21. The best-studied plant alpha-like virus proteolytic enzyme is the proteinase of turnip yellow mosaic virus (TYMV). The TYMV replicase protein undergoes auto-cleavage to yield two products. The auto-peptidase activity has been mapped to the central part of this polyprotein. 25.00 25.00 34.00 50.10 20.40 20.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.22 0.72 -3.93 13 44 2012-10-10 12:56:15 2003-04-07 12:59:11 7 4 32 0 0 51 0 105.50 35 5.58 CHANGED Ps.hsCLLoAlSspT+lSc-cLWcoLpslLPDS.LsNpElsoLGLSTDHLTALAahYshpsslaSscGslhaGlpsupppIsIoHTsGPP....uHFSs...hh.tssshsG .....PthsCLLsAlSstTslo.cpLWpsLsshLPDSLL.ssp-lsshGLSTDHhssLAahaphpsphhospt.lphGlpsAopphpIpHTsGss.......sHFsh...h........sss......... 0 0 0 0 +5210 PF05382 Amidase_5 Bacteriophage peptidoglycan hydrolase Studholme DJ, Garcia E anon Pfam-B_6845 (Pfam7.8) Family At least one of the members of this family, the Pal protein from the pneumococcal bacteriophage Dp-1 Swiss:O03979 has been shown to be a N-acetylmuramoyl-L-alanine amidase [1]. According to the known modular structure of this and other peptidoglycan hydrolases from the pneumococcal system, the active site should reside at the N-terminal domain whereas the C-terminal domain binds to the choline residues of the cell wall teichoic acids [2,3]. This family appears to be related to Pfam:PF00877. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.81 0.71 -4.58 8 235 2012-10-10 12:56:15 2003-04-07 12:59:11 8 30 204 0 32 361 31 134.80 28 32.91 CHANGED shEpuIAWMtARKG+.VoYSMs.hRsGPsSY.DCSSSVYhAL+uAGASsuGWhlNTEhhHsWLhcNGacLIucNtsWsApRGDIhI..WGt+.GuSuGAGGHsGMFIDusNIIHCNY...usNGIoVsNasppshhuGthhsYlY..Rhssuuoss .................................p..p.hltah.....t.p..c.sp..l.s..YS..M....R.G.....ss...u.......h....DCSuu..lhh.A..Lp.t.u.Gh......s..s.......u.h..h.s......s..Tpshhs.L..p.p..sG..a.ppl.s.....p...s...............h.......s....s....pcGDIhI..hGtp...GtSuG.s.sGHsslh...h....s..t....s..p...hIps...s..h..........s.s.s.............................................s.......................................................................... 1 13 19 26 +5211 PF05383 La La domain Bateman A anon Bateman A Domain This presumed domain is found at the N-terminus of La RNA-binding proteins as well as other proteins [1]. The function of this region is uncertain. 21.30 21.30 23.30 21.90 20.80 21.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.81 0.72 -4.17 33 1403 2009-01-15 18:05:59 2003-04-07 12:59:11 12 36 318 14 875 1319 11 58.80 37 10.47 CHANGED lh+QlEYYFSDpNLspDpFL+pphsc..-GaVPlpllusFp+l+pLo..............hphIlpAL+pS .........lt+QlEaYFS......cpNLs+.DtFL....hpph....c....p..........-......G....a.....VPlpllss.Fp+l+pLo...............sDhphlhcAL+pS............................... 0 260 430 669 +5212 PF05384 DegS Sensor protein DegS Moxon SJ anon DOMO:DM08518; Family This is small family of Bacillus DegS proteins. The DegS-DegU two-component regulatory system of Bacillus subtilis controls various processes that characterise the transition from the exponential to the stationary growth phase, including the induction of extracellular degradative enzymes, expression of late competence genes and down-regulation of the sigma D regulon [1]. The family also contains one sequence Swiss:Q8R9D3 from Thermoanaerobacter tengcongensis which are described as sensory transduction histidine kinases. 24.20 24.20 24.60 26.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.73 0.71 -5.02 21 178 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 173 0 64 164 0 156.90 37 40.64 CHANGED DcIlpchlcslpcSKc-IFcIuEpuRpEapcLppELcplKpclsclIpcsDcLEhppRhARpRLsEVS+pFppYSEp-I+cAYEpAcplQlcLslhRpcEppLRcRRD-LEpRLhslpcTI-RAEpLluQluVsLsaLsuDLcpluctlE-hpp+QphG ..........DpIlpchlcsl-sSKppIFpIuEpuRpEhcpLpcELcplKppltclIcplDpLEhpp+puRpRLsEVS+sFpcaSEc-I+pAYEcAcclQlcLshh+p+EppLRpRRD-LERRLtslpchlERAEpLluQlsVlLsYLssDLppls.hl-shpp+Q.hG................. 0 32 53 57 +5213 PF05385 Adeno_E4 M_adenovirusE4; Mastadenovirus early E4 13 kDa protein Moxon SJ anon DOMO:DM07617; Family This family consists of human and simian mastadenovirus early E4 13 kDa proteins. Human adenovirus type 9 (Ad9) is unique in eliciting exclusively estrogen-dependent mammary tumours in rats and in not requiring viral E1 region transforming genes for tumorigenicity. E4 codes for an oncoprotein essential for tumourigenesis by Ad9 [1]. 25.00 25.00 31.00 31.00 23.30 19.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.51 0.72 -4.10 8 88 2009-09-11 08:31:06 2003-04-07 12:59:11 6 1 68 0 0 64 1 107.20 50 90.15 CHANGED MsLPsLPsPPVscDpuuCIuWLGLAauollDllRsI++cGlhIoPEAEplLsuLREWLaauapTERp+R+DRRRRslCpuRTaFCapKYEpVR+pLh.YDssppTlSl..ts ....MlLPsLPsPPl.c-ppuCIsWLGhAYsslsDVlRsIRh-Glhlos-ApcLLpsLREWLYauahTERp+R+..D.RRRRtlCsuRstFCapKY-pVRKplH.YsssppTlslt.s...... 0 0 0 0 +5214 PF05386 TEP1_N TEP1 N-terminal domain Bateman A anon Bateman A Domain This short sequence region is found in four copies at the N-terminus of the TEP1 telomerase component. The functional significance of the region is uncertain. However the conservation of two histidines and a cysteine suggests it is a potential zinc binding domain. 25.00 25.00 25.60 25.00 24.30 16.80 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.37 0.72 -4.49 8 102 2009-01-15 18:05:59 2003-04-07 12:59:11 6 16 20 0 38 89 0 29.80 72 4.25 CHANGED hEKlHGHlSsHPDILSLENRCLshLPDLQP ..MEK.HGHVSAHPDILSLENRCLATLPDLKs...... 0 4 4 4 +5215 PF05387 Chorion_3 Chorion family 3 Moxon SJ anon DOMO:DM07741; Family This family consists of several Drosophila chorion proteins S36 and S38. The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary [1]. 21.20 21.20 21.20 21.50 21.10 20.70 hmmbuild --amino -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.97 0.70 -4.96 4 75 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 25 0 19 73 0 201.60 45 88.03 CHANGED Mspa.alLhlsAssL..........ApAuYGusGuuG.GhGuuthuAuuuA......tsusGGst.SG..........GshpuusApl.spPspsu+lspsQuphtu.p.s.sYp.hhppusslupSls.sp................................NptpllhppP.sPIIV+pP..PsphlsstPPhVVKsAPVlh+h.PuVlhpp.hlp+hPpPlplpPVYVpVhKPst..Et.hVstspQsYtpsuaGsSphSt............GYGuusutuusAuuAuStusupA ...hsh..hlhAlAA.s.l.........sSANYGsstu.....tu...Gptht.h..ussss...tthsssAsGGstsuu...........p.hsupAth...ps.s.s-tAthhttsQuphtu.p.s.sYp.h.ppupslspSls.sp...............................pNptpllhppP.sPIIV+pP..PsthlsstPPhVV+ssPVlh+h.Pullhpp.hlp+hPp.lplpPVaVpVhKPst..Et.hss.st.sYsQ.s.......p.htt..................uautu.stu.tstsssst.....s............................................... 0 2 2 11 +5216 PF05388 Carbpep_Y_N Carboxypeptidase Y pro-peptide Moxon SJ anon DOMO:DM08350; Family This family is found at the N terminus of several carboxypeptidase Y proteins and contains a signal peptide and pro-peptide regions [1,2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.68 0.71 -3.91 6 94 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 88 0 65 87 0 107.80 26 20.30 CHANGED M+LosSlL..suLAhshssApALul......QpPlu.......................h..ttpsll-psucslu......-sLcslsspl+shWsEMthpFPsplspLpapopPKhtlstKsss..WDF...........pVpssplsNY+L ............................................................................h....hs.st.Ahsh............Qp.ht.................................htpsh..hppht...ctht...........-.lc.sLos-s+slWsElthhaPcs....h.pphp...h...h...o...tPK..ts++scsc.WDa...........lV+su...cl............................... 0 13 34 58 +5217 PF05389 MecA Negative regulator of genetic competence (MecA) Moxon SJ anon DOMO:DM05333; Family This family contains several bacterial MecA proteins. The development of competence in Bacillus subtilis is regulated by growth conditions and several regulatory genes. In complex media competence development is poor, and there is little or no expression of late competence genes. Mec mutations permit competence development and late competence gene expression in complex media, bypassing the requirements for many of the competence regulatory genes. The mecA gene product acts negatively in the development of competence. Null mutations in mecA allow expression of a late competence gene comG, under conditions where it is not normally expressed, including in complex media and in cells mutant for several competence regulatory genes. Overexpression of MecA inhibits comG transcription [1,2,3]. 21.10 21.10 22.10 21.50 20.70 20.10 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.32 0.70 -4.80 42 1309 2009-09-11 15:15:24 2003-04-07 12:59:11 7 2 1134 28 147 674 7 222.50 30 97.79 CHANGED MclERIN-sTlKlhIoh-DLp-RGhshpDlhhspc+sEchFashh-Elcp-csFps.sGslshQVhshp.pGl-lhlTKsp.....pt..t....................ptss-plpchlcpph..............................t-hcspl-.............................hllpFsshEDlIsLucplp..tslts...pLYpacspYYLtlhh...pphsptphcshluhhhEauptoshot..h.hLpEYGchlhppsAlppl+p .......................MchE+Is-sTlKlhlohpDLp-RGhshtDlh.h.s.pc+sE-FFashh-El.....c.--.s.Fhs..sG.sL.h...h..pVhshp.cGl-VhlTKuc............cp..php...s.................................sp.ts-phpchlcppl..............t..p.........................................................p..p.t.php............................................................................ttppsthp..hl..lpFscl-Dllshu..cplph...p...t...hps..pLYp...h..s..s...c..YYLslhh.............p..p..h.sp.hhsphhuhlhEa..u...p...s...os...h...Tp..........t...hL...pEYGchlhscsAlpplp.............................................................. 0 45 89 117 +5218 PF05390 KRE9 Yeast cell wall synthesis protein KRE9/KNH1 Moxon SJ anon DOMO:DM07408; Family This family contains several KRE9 and KNH1 proteins which are involved in encoding cell surface O glycoproteins, which are required for beta -1,6-glucan synthesis in yeast [1]. 20.50 20.50 21.60 21.60 19.70 19.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.54 0.72 -3.70 5 120 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 87 0 79 106 0 98.00 42 36.75 CHANGED IDSKSFTVPYTcQTGKoRaAPMQhQPGTKVTATTWSRKYATSAVTYYSThsuoP-QtTTlTPGWSYTISSuVNYATPAPMPSDNGGWYsPpKRlSLoTRKlNs..hR+l ..............tsaTVPYshQT.GhoRYAPMQhQP.sT+l.T..A.p.....Ta..ohp..asT.SuVohaoT.hhs..s.ss.tTTlTsuhoYolsSt.NhAoPAPhPs-...hY....h..................h.......................... 1 11 35 63 +5219 PF05391 Lsm_interact Lsm interaction motif Bateman A anon [1] Motif This short motif is found at the C-terminus of Prp24 proteins and probably interacts with the Lsm proteins to promote U4/U6 formation [1]. 21.40 21.40 21.40 22.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.69 0.73 -6.64 0.73 -4.30 9 94 2009-01-15 18:05:59 2003-04-07 12:59:11 6 14 80 0 54 86 0 20.50 47 2.66 CHANGED sptsssst.hSNDDFRKhFL+ ..........sscss.hSNsDF+KhFL+.. 0 10 20 34 +5220 PF05392 COX7B Cytochrome C oxidase chain VIIB Moxon SJ anon DOMO:DM07697; Family \N 20.70 20.70 22.00 39.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.79 0.72 -4.40 8 75 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 43 51 35 92 0 75.90 63 98.05 CHANGED MFPLAKsALupLplpSIQQssARQ.uHpK+oP.DFHDKYGNulLASGusFClAsWoYssTQlGIEWNLSPVGRVTPKEW+-Q ...................MFPlu+sALspL.plRSIQQshARQ.uHpK+oP.DFHDKYGNuVLAuGAsFClusWsYssTQlGIEWNLSPVGRVTPKEW+cp... 1 2 3 9 +5221 PF05393 Hum_adeno_E3A Human adenovirus early E3A glycoprotein Moxon SJ anon DOMO:DM07622; Family This family consists of several early glycoproteins from human adenoviruses. 21.40 21.40 21.40 22.50 20.80 21.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.48 0.72 -4.32 3 49 2009-09-11 15:19:00 2003-04-07 12:59:11 6 2 27 0 1 39 0 97.20 47 42.85 CHANGED MTsTTNA.......sTATGLTSTpNhPQVSAFVNNWANLGMWWFSIALMFVCLIIMWLICCLKRKRARPPIY+PIIVLNPNNDGIHRLDGLKsCSFSLsV .........................t.ptss.s..ssh...............s..hTSLA.TNETsVshMasQsau....GL..D..I..T..FLVVCGIFILlVLLYFVCCKARcKSRRPIYRPVIG.......................ps.................. 0 0 0 1 +5222 PF05394 AvrB_AvrC Pseu_avirulence; Avirulence protein Moxon SJ anon DOMO:DM07798; Family This family consists of several avirulence proteins from Pseudomonas syringae and Xanthomonas campestris. 20.00 20.00 21.80 21.50 18.60 18.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.07 0.70 -5.45 6 40 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 22 4 2 32 1 292.60 48 94.26 CHANGED MGCloSKtssluSsshpsutosSs............pt+ushhs.LpGPsptScLpsapQuLVGsARWPDct......N+sssPcphcYscShYppSRthGuSlAsGcIsSFp-LWpcAp-WRhSRhsps-sl...FuosRsPNoc..FVTPLtcPYc.lh-RhsN+pDucschhpDphFhs.csKsaR.sGpIsGEsIPLTplosusDccA..-RhpchhpcLcspuhpDh...ucPNhIsHTsAEYlPpIhcHlEsLYppAlDsuLSppcALchlAclHWWsApAhPDcRGSAAKAEhssRSIApA+Gl-LPPhRpGIVPDlEAMhhuccpFVcsY.uhFEps .....................................................................................................................................................................................................ssh....uS.sstsuuopSp............thpushhs.LpGPsphScLo.cpQSLVGsARWPDct......N+ssT.cphcYs+ShYpuSRhAGuuIASGpIsSFs-LWpcAppWRhSRhuus-so..sFAs.RhPNoc..FVTsLppPYc.ll-RhpN+sDuchchh-sphhttlcsKsaR.pGtIsGEsIPhTplssusDcsA..sRhcsh..pc.cppthpp.httucPshIsHTsAEYl.pIhcHlEsLYhpAlDsoLpc+cAhcllAclHWWsAsAsPDcRGSAAKAEhusRSIApA+Gl-.hPPhRpGIVPDlEAMhp.SccpFVtpYsshF-p....... 1 1 1 1 +5223 PF05395 DARPP-32 Protein phosphatase inhibitor 1/DARPP-32 Moxon SJ anon DOMO:DM07433; Family This family consists of several mammalian protein phosphatase inhibitor 1 (IPP-1) and dopamine- and cAMP-regulated neuronal phosphoprotein (DARPP-32) proteins. Protein phosphatase inhibitor-1 is involved in signal transduction and is an endogenous inhibitor of protein phosphatase-1 [1]. It has been demonstrated that DARPP-32, if phosphorylated, can inhibit protein-phosphatase-1 [2]. DARPP-32 has a key role in many neurotransmitter pathways throughout the brain and has been shown to be involved in controlling receptors, ion channels and other physiological factors including the brain's response to drugs of abuse, such as cocaine, opiates and nicotine. DARPP-32 is reciprocally regulated by the two neurotransmitters that are most often implicated in schizophrenia - dopamine and glutamate. Dopamine activates DARPP-32 through the D1 receptor pathway and disables DARPP-32 through the D2 receptor. Glutamate, acting through the N-methyl-d-aspartate receptor, renders DARPP-32 inactive [3]. A mutant form of DARPP-32 has been linked with gastric cancers [4]. 21.20 21.20 21.50 26.20 20.20 20.00 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.36 0.71 -4.10 12 144 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 40 0 69 136 0 129.30 42 89.78 CHANGED MEs.pu.+KIQFoVPh.tspLDPcAsE.IRRRRPTPATLhhsS-pSSP..DE-psPp.htpsshphs.ppRpp.hshhsPohKtlQhlsEpHLpp.ushpEp.sp.tpp..sp.c.hs.sctpstpTpsp.....pSc.st.s.ppDups-s................G.pGshcpoh.ps.spccpsup....-Ps ..............M-s.ps.+KIQFsVPh....s.plsPctsE.......p........IRRRRPTPAoLhhhs-poSP..--ct.sp.......................hhp.p.ph..ppRhp.sshssPohK...tlQhhhpp.HLtp..tp....p.E..........................................................................................................t................................................................... 0 3 6 21 +5224 PF05396 Phage_T7_Capsid Phage T7 capsid assembly protein Finn RD, Moxon SJ anon DOMO:DM08648; Family \N 24.00 24.00 24.20 24.20 23.70 23.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.59 0.71 -3.97 6 46 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 41 0 2 42 65 122.10 41 41.72 CHANGED spLSEtSYtcLttAGYS+uFVDSYI+GQEALV-QYVsuVlcYAGGpEpFsAIhsHLEusNPsAupSL-uAlps+DLATlKAIINLAGtShsKpFG+pPpRSlTpRAhP.ApPsAsp+...EGFuspu ..spLS--SYpcLAcAGYo+uFVDSYI+G.......Q...EA..........LVcpYVpuVh-YAGGcEpFstlhsHh.csp..NPssupuLssAlpspDlATlKAllNLustohscpFG+pspRsloppAhP.Ap..stpp...puFtsp............................................... 0 0 1 2 +5225 PF05397 Med15_fungi GAL11; Mediator complex subunit 15 Moxon SJ anon DOMO:DM07536; Family GAL11 or MED15 is one of the up to 32 or subunits of the Mediator complex which is found from fungi to humans [1]. The Mediator complex interacts with RNA polymerase II and other general transcription factors to form the RNA polymerase II holoenzyme [2], thereby affecting transcription through targetting of activators and repressors [3]. This family is found in fungi and the small metazoan starlet anemone. 21.60 21.60 21.60 22.80 21.10 20.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.33 0.71 -4.11 19 89 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 83 0 62 99 1 114.00 24 9.50 CHANGED psp.hpp....l+plhpElu+ss.tLpslo..Los-pKstlpppLppspphhupV-sllshhahlopsccss+pLlQMRhhhKc.h....-ts.pGhallsP-hl-Klh.phpKaachh+tpl ..................................sp..chl+plhp-ss+sshphpsls..los-cKstlpppL..ppspphhupl-s.llshhh.hlspsEcsl+pLlQM+hhlpc.h....-....hhhps.hahls.-hlsphh.phpKaaphhhtph.............. 0 16 32 54 +5226 PF05398 PufQ PufQ cytochrome subunit Moxon SJ anon DOMO:DM07133; Family This family consists of bacterial PufQ proteins. PufQ id required for bacteriochlorophyll biosynthesis serving a regulatory function in the formation of photosynthetic complexes [1]. 22.10 22.10 22.90 22.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.57 0.72 -3.94 11 24 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 24 0 8 20 59 74.20 43 95.60 CHANGED MoDhTossPhh....+s++sP+..sEFhlYFAlIFlsAlPhAhlsWhhsslRptolsp+GPlARAWspAchITPhIFuA .MoDhssssPsp....+s++ss+...sEFhlYFAlIhluAlPlAhlsWhhsslRptplsc+GPhA+AWt-ActITPhIFpA......... 0 1 4 6 +5227 PF05399 EVI2A Ectropic viral integration site 2A protein (EVI2A) Moxon SJ anon DOMO:DM07597; Family This family contains several mammalian ectropic viral integration site 2A (EVI2A) proteins. The function of this protein is unknown although it is thought to be a membrane protein and may function as an oncogene in retrovirus induced myeloid tumours [1,2]. 25.00 25.00 40.70 40.70 22.20 22.20 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.63 0.70 -5.10 3 38 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 30 0 21 34 1 200.30 59 94.37 CHANGED MEHKGH.YLHLAFLMTTVF.SLSsGT+sNYT+LWAsSsouWsSlsQNtT.SRN.PEsNNTNPlTPEVsaKuouTscPpTSsPVPLsSTSspchaTPSuspNSPsTup..tNTSKS+GETFKKEVCEENssNhAMLICLIIIAALFLICTFLFLSTVVLANKVSSLKRSKQVGKRQPRSNGDFLASsGLWPAESDTWKR.sKELTGSNLVMQS..oGVLTAsRERKcEEGTEKLT ..............McppG.p.YLHLAFLhTsla.SLSsGT+uNYTcLWssssosWs..sshQNto.u+s.sps.sTsPhTsp...ss.h+s..suTsh...s...p...o...u...t.....ss.LsspSp..........clhhPs.s.spNSssTsp..htsToKS+uElFKK-lCEENssphAMLICLIIIAVLFLICThLFLSTVVLANKVSSL+RSKQsGKRQPRSNGDFL.ASsGLWPAESDTWKR.sKpLTGssLhMQS..oGsLTAsRERKcEEuTEKLs................................. 0 1 2 7 +5228 PF05400 FliT Flagellar protein FliT Moxon SJ anon DOMO:DM07567; Family This family contains several bacterial flagellar FliT proteins. The flagellar proteins FlgN and FliT have been proposed to act as substrate specific export chaperones, facilitating incorporation of the enterobacterial hook-associated axial proteins (HAPs) FlgK/FlgL and FliD into the growing flagellum. In Salmonella typhimurium flgN and fliT mutants, the export of target HAPs is reduced, concomitant with loss of unincorporated flagellin into the surrounding medium [1]. 22.30 22.30 22.40 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.73 0.72 -3.58 109 993 2009-01-15 18:05:59 2003-04-07 12:59:11 8 1 913 12 186 524 21 83.70 30 72.78 CHANGED hhpsuc.pscW.-plspLtpp.pppll.ptl.t..............tlsts.pppttphlcp....Ilsp-p......plpshhptphscLpptlpptppppphspsY ........................hLphAp.ctpW.-pLlt.Ehs.ahphl.ppltp................................s..sshopsh..pcphpshl+p....IL-N-p......pl+pLlQhRh-cLuphlspuppp+slhsuY................ 0 37 94 141 +5229 PF05401 NodS Nodulation protein S (NodS) Moxon SJ anon DOMO:DM07274; Family This family consists of nodulation S (NodS) proteins. The products of the rhizobial nodulation genes are involved in the biosynthesis of lipochitin oligosaccharides (LCOs), which are host-specific signal molecules required for nodule formation. NodS is an S-adenosyl-L-methionine (SAM)-dependent methyltransferase involved in N methylation of LCOs. NodS uses N-deacetylated chitooligosaccharides, the products of the NodBC proteins, as its methyl acceptors [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.23 0.71 -4.92 7 251 2012-10-10 17:06:42 2003-04-07 12:59:11 6 6 221 5 103 7241 2646 154.90 30 58.66 CHANGED opsssapLLpRELAA-DPWRLDuNsFEpcRHsQhLRLSLupGslusALEVGCAAGAFTE+LAPaCpRLTVIDVhPpAIsRsRpRhpc.uHISWlssDVpQFSosEhFDLIVVAEVLYYltDlsphRsAlcNLVpMLAPsGpLVFGSARDAsCpRWGHlAGAETVIuhLsEsLhEVERlcCpGsSssEDCLLu+FppP.uuS .....................................................................t.hht................s....sDP.W..thp....p....p....h..Y.E....p.....c...+....h...t.......t....h...h.....h.h...s.....L..s...p...t...c....a....p...p....s........h..El...GC..u.s.G....t....L.....o....t...p.....L.........A....t....R.....C........s.......c.......L..........l...s......h....D....l........u.....s......p........A.....l....p......h.........A......+.......p.........R.......l............s.........s..............h.......s.........c.........l......p.......h........h......p........s.........s..........l..............s.......p....................h..........P..........p........t........p.............F........D........L.I...V....l....S....E....l...h....Y....Y...L....s.....s........h....s....c....L......p.....t....h....l.p.....p....h...h.t....t...Ls.ss.G.....p..l....l.......................................................................................................................................thhht........................................................................... 0 19 51 77 +5230 PF05402 PqqD Coenzyme PQQ synthesis protein D (PqqD) Moxon SJ anon DOMO:DM07172; Family This family contains several bacterial coenzyme PQQ synthesis protein D (PqqD) sequences. This protein is required for coenzyme pyrrolo-quinoline-quinone (PQQ) biosynthesis [1,2]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.23 0.72 -3.96 330 1200 2009-01-15 18:05:59 2003-04-07 12:59:11 7 19 972 1 415 1109 164 72.60 20 58.04 CHANGED s.phllls.ppth................h..............................pLstsushlac.hl.-..u..t.p...o..lpplsptLtpc.a.ssstp....h.ppDltpalppLtp...psllp ...............................................hlll..pthh.......h.......................pLspsushlhphl.-.....G..p..p..o..lspIsp.tLtpc..a..ssstp.......h.t...pDlhpalpplpp.pthl............ 0 140 270 343 +5231 PF05403 Plasmodium_HRP Plasmodium histidine-rich protein (HRPII/III) Moxon SJ anon DOMO:DM07479; Family This family consists of several histidine-rich protein II and III sequence from Plasmodium falciparum [1,2]. 45.80 45.80 46.30 45.90 45.60 45.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.65 0.70 -13.54 0.70 -4.76 3 1003 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 3 0 3 1004 1 117.00 75 96.41 CHANGED MVSFSKNKlLSAAVFASVLLLDNNNScFNNNLhSKNAKGLNsNKRLLHEoQA....HssDAHHAHHsADAHH.........AHHAADAHHAHHAADAHHAHHAADAHHAHHAADAHHAHHAA.AHHAHHAADAHH...AHHAAD...AHHAAD...AHHAAYAHH...AHHAADAHHAAs.........AHHAADAHHAADAHHAAa.......uHpAHHAA......DAHHAsDAH...HtsDAHHsu.........sAHHAADAHHAADAHHAAc .........................................................................AHH............AHHs..ADAHHAHHsADAHHAHHAADAHH...AHHAADAHH..A...HHAAc...AHH...AHHAAsAHH...AHHAAD...AHHAAc...AHHAA.AHH...AHHAADAHHAss...AHHAss..A...H...H...A................................................................................................................................................... 0 3 3 3 +5232 PF05404 TRAP-delta Translocon-associated protein, delta subunit precursor (TRAP-delta) Moxon SJ anon Pfam-B_7178 (release 7.7) Family This family consists of several eukaryotic translocon-associated protein, delta subunit precursors (TRAP-delta or SSR-delta). The exact function of this protein is unknown [1,2]. 25.00 25.00 26.90 26.60 19.30 18.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.86 0.71 -4.80 16 146 2009-09-11 06:46:05 2003-04-07 12:59:11 7 4 109 0 73 146 0 155.00 43 90.57 CHANGED hhth...hllslsluhsuupsCssPplp.sosYoToDAsIsophsFlsEFoLpCsN.GApsluLaA-Vs.GKlhPVsRssDsu+YQVSWs.EhKpApSGsYpV+haDEEuYuslRKAQRsGEDhsslKPLFTVsVcHpGu.apGPWlsoEllAsllshllhYhAaosKS+l.u ....................hhh.....hhhhhh..hhshss.u.csCtpPp.lo.sShYTToDAsIsocssFIsEhoLpCpN.sspshuLYA-Vs......GK......hPVsRupDs...s+YQ.VSWoh-aKpApuGsYpV+hFDEEuYutlRKAQ.R.ssEDls.slpPLFoVsVcH+.Gs.apGPWVsoElLAAhlulllhYhAFosKoplp................................. 0 28 32 54 +5233 PF05405 Mt_ATP-synt_B Mitochondrial ATP synthase B chain precursor (ATP-synt_B) Moxon SJ, Fenech M anon Pfam-B_7506 (release 7.7) Family The Fo sector of the ATP synthase is a membrane bound complex which mediates proton transport. It is composed of nine different polypeptide subunits (a, b, c, d, e, f, g F6, A6L) [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.62 0.71 -4.78 31 515 2012-10-02 21:03:42 2003-04-07 12:59:11 9 8 409 4 214 1035 712 150.90 25 68.55 CHANGED hhshhhSKclhlhN-EsllshsFluFlhhshKphusshsphhDschcplpsphpps+stphps.lpcpl-ptp.p.p.l.schspsLhpsp+-ssthptcstppptpstlspEl+s+L-ohlphpsshRph.QcclsptlhspVtpplss.sp...hpppsl...ppultplc .....................hhhhhSpclhlhN-Ehl.s.uh.s.h.lu.h..h..h.h.s.h.K...p.h.G.s..s.h.p...phhDtphpplppphp..phps..sphps..lppph-p.t+..phpth...lph.p.p.h.lh...-...s.pc.......p...s..h.t.h..p...hp...t..t....p...p.....p..t...h...ltpp.lc...t...pLc.hh.l..p.h....ps.p....h........+..pp.Q.c...c...h...sp...h...l....h....t...p....Vt.p.phts..p.........ppphl....tpsls.l.................................................................................... 0 74 110 169 +5234 PF05406 WGR WGR domain Bateman A anon COG3831 Domain This domain is found in a variety of polyA polymerases as well as the E. coli molybdate metabolism regulator Swiss:P33345 and other proteins of unknown function. I have called this domain WGR after the most conserved central motif of the domain. The domain is found in isolation in proteins such as Swiss:Q9JN21 and is between 70 and 80 residues in length. I propose that this may be a nucleic acid binding domain. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.02 0.72 -4.02 109 1331 2009-01-15 18:05:59 2003-04-07 12:59:11 10 90 798 5 518 1209 72 76.20 28 11.75 CHANGED st....hhhphsshs...p..sssKFYtlpl..h......t........hplhtcWGR..........lGss.GQpphc...sFs..s.ppA.hptap+lhpcKp..ppG....a..tp....t.pht ...................ht...........h....s...p...pspKFatlpl.........................s.......hplahpWG+...........................l..Gsp..Gp...s...plc.................sFs..sttpA.hcthtK...lht-Ks..+cG.atp.p....s.......................................... 0 180 301 419 +5235 PF05407 Peptidase_C27 Rubella virus endopeptidase Studholme DJ anon Manual Family Corresponds to Merops family C27. Required for processing of the rubella virus replication protein. 21.30 21.30 21.40 364.30 20.10 21.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.05 0.71 -4.53 5 69 2012-10-10 12:56:15 2003-04-07 12:59:11 7 2 10 0 0 71 1 166.00 98 11.55 CHANGED WRCRGWQGMPQVRCTPSNAHAALCRTGVPPRVSTRGGELDPNTCWLRAAANVAQAARACGAYTSAGCPKCAYGRALSEARTHEDFAALSQRWSASHADASPDGTGDPLDPLMETVGCACSRVWVGSEHEAPPDHLLVSLHRAPNGPWGVVLEVRARPEGGNPTGHF WRCRGWQGMPQVRCTPSNAHAALCRTGVPPRVSTRGGELDPNTCWLRAAANVAQAARACGAYTSAGCPKCAYGRALSEARTHEDFAALSQRWSASHADASPDGTGDPLDPLMETVGCACSRVWVGSEHEAPPDHLLVSLHRAPNGPWGVVLEVRARPEGGNPTGHF 0 0 0 0 +5236 PF05408 Peptidase_C28 Foot-and-mouth virus L-proteinase Studholme DJ anon Manual Family Corresponds to Merops family C28. Protein fold of the peptidase unit for members of this family resembles that of papain. The leader proteinase of foot and mouth disease virus (FMDV) cleaves itself from the growing polyprotein and also cleaves the host translation initiation factor 4GI (eIF4G), thus inhibiting 5'-cap dependent translation. 21.10 21.10 21.10 21.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.28 0.71 -4.82 4 452 2012-10-10 12:56:15 2003-04-07 12:59:11 6 8 73 14 0 458 0 200.20 80 11.04 CHANGED MsTocsahl...........lhtsF......hch+slF.s+hstphEhT..........uhPNpcsNCWLNuLhQLaphlD.s...........................LF-thYhsPtshTl-sIc.lp-.Ttl.-LpcGuPPhlVla.lppphpsslGTso+PpplClhsGs.hsLuDFaAulhhKs.pHAVFhhsTS-GWhsIDD-chYP.TPcscsVLs...........asPYD.EsLss-apcphtphLR .......................................MNTTDCFIA...........LlaAl......REIKsLFLoR.sp..GKMEFTLaNGEKKTFYSRPNNHDNCWLNTILQLFRYVDEP...........................FFDWVYpSPENLTL-AI+QLEElTGL...ELHEGGPPALV...IWNIKHL.LaTGIGTASRPSEVCM.VDGTDMCLADFHAGIFLKGQEHAVFACVTSNGWYAIDDEDFYPWTPDPS.DV.L..V...........FVPYDQEPLNGEWKAKVQ++LK....................... 0 0 0 0 +5237 PF05409 Peptidase_C30 Coronavirus endopeptidase C30 Studholme DJ anon Manual Family Corresponds to Merops family C30. These peptidases are involved in viral polyprotein processing in replication. 25.00 25.00 217.90 217.80 20.20 15.60 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.82 0.70 -5.34 5 594 2012-10-02 13:45:52 2003-04-07 12:59:11 8 34 208 139 0 625 0 291.30 52 5.41 CHANGED GLWLGDpVYCPRHVIASsTohSsIDY-stLSllRLHNFSIsSss.sFLGVVSApM+GulLlLKVsQsNscTPKYoF+oV+PGESFNILACY-GsssGVYGVNMRSNaTIRGSFIsGACGSVGYNLcNGoVcFVYMHQLELGoGsHVGSDL-GsMYGGYEDQPslQlEussshhTsNVVAFLYAAIIN......GssWWLpSooloVEcYNcWAtsNGFTslsuoDAFohLAAKTGVsVE+LLcuIpsLNsGFGGKQILGasSLsDEFTPsEVl+QMaGVNLQSG+V+sh.....spsslhlhuFa .GLWLsDsVYCPRHVIsosschssssYsclLsphssHsFpVt..s..sN...VpLsVlutpMpGslLhL..pVshuNscTPKYKFsplpPG-oFolLAsYsGsPsGlYtVsMRoNtTIKGSFLsGoCGSVGasl-t.ssVpFsYMHHLELssGsHsGTDLpGcFYGsahDppssQhsssDshhTsNVlAWLYAAl..IN......uspWFlp...ssosol-DFNhWAhsNuaosl....sss..s..s....lshLuAhTGluVpclLsuIpc.L.psGasG+sILGpssLEDEhTPt-VhpQhuGVpLQuphh+hl....hupphhhhh....h............ 0 0 0 0 +5238 PF05410 Peptidase_C31 Porcine arterivirus-type cysteine proteinase alpha Studholme DJ anon Manual Family Corresponds to Merops family C31. These peptidases are involved in viral polyprotein processing in replication. 25.00 25.00 188.90 186.70 23.90 18.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.45 0.72 -4.17 3 229 2012-10-10 12:56:15 2003-04-07 12:59:11 8 8 11 1 0 264 0 105.00 92 4.26 CHANGED L+WclPIuaPTVECoPuGsCWLSAIFPIARMTSGNHNFQQRLV+VAuVlYRDGpLTP+HL.+ELQVYERGCRWYPITGPVPGlALaANSlHVSDcPFPGATHVLTN LRWTLPRAFPTVECSPAGACWLSAIFPIARMTSGNLNFQQRMVRVAAEIYRAGQLTPsVL.KsLQVYERGCRWYPIVGPVPGVuVaANSLHVSDKPFPGATHVLTN. 0 0 0 0 +5239 PF05411 Peptidase_C32 Equine arteritis virus putative proteinase Studholme DJ anon Manual Family These proteins are characterised by a region that has been proposed to have peptidase activity involved in viral polyprotein processing in replication. 22.10 22.10 23.10 24.50 21.40 22.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.62 0.71 -4.43 4 221 2012-10-10 12:56:15 2003-04-07 12:59:11 7 9 12 1 0 270 0 127.60 80 4.95 CHANGED sp+uC..L.t-.ss.-........G+CWhplF.sh.sh.stscEhchAspFGYQTptGVpGKYLtRRLQVpGLRAVsc..sGshlV.shts.pSWIRHlshAsEsV.ssF.clschcllPNsp....P.h.psp+hFRFGs+KaY ..........hQaGC..LPAD.TVPE........GNCWW+LF.DSLPPEVQhKEIRHANQFGYQTKHGVsGKYLQRRLQVNGLRAVTD.ocGPIVIQYFSVKESWIRHLKLsEEPSLPGFEDLLRIRVEPNTS....PLAsKDEKIFRFGSHKWY.............. 0 0 0 0 +5240 PF05412 Peptidase_C33 Equine arterivirus Nsp2-type cysteine proteinase Studholme DJ anon Manual Family Corresponds to Merops family C33. These peptidases are involved in viral polyprotein processing in replication. 25.00 25.00 29.30 39.20 19.90 17.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.63 0.72 -4.24 4 524 2012-10-10 12:56:15 2003-04-07 12:59:11 7 17 14 0 0 536 0 105.90 66 6.01 CHANGED sYSPPuDGuCGhHCISAIhNchhsssFsTpLsppsRsss-WhSDpDLhQhl.shRLPATl...GtCPSApYhLchsspHWTVThRpG.hAsutLu.ECVpGVC.utEssl .....YSPPAEGNCGWHCISAIANRMVNSsFETTLPERVRPSDDWATDEDLVNsIQIL.RLPAALDRNGACsSAKYVLKLEGEHWTVSVhPG.MSPoLLPLECVQGCC.EHKGGL.... 0 0 0 0 +5241 PF05413 Peptidase_C34 Putative closterovirus papain-like endopeptidase Studholme DJ anon Manual Family Corresponds to Merops family C34. Putative closterovirus papain-like endopeptidase from the apple chlorotic leaf spot closterovirus. 22.40 22.40 22.40 22.40 21.10 20.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.84 0.72 -3.93 5 13 2012-10-10 12:56:15 2003-04-07 12:59:11 6 2 6 0 0 13 0 92.00 74 4.88 CHANGED KlKFIKGKaDCLFsSlAclI+KKPEEVMhFlPHlLDRCISNKGCSLDDLRuICpuYEIKVECEGDCGLlElGSlGLPLGRLlLRGNHFoVsS .....KL+FIKGKFDCLFVSlAEIIHKKPEEVMMFlPHI....hDRClSN.+GCSLDDAKAICEKYEIKIECEGDCGLVECGo.GLSlGRMLLRGNHFoVAS. 0 0 0 0 +5242 PF05414 DUF1717 Peptidase_C35; Viral domain of unknown function (DUF1717) Studholme DJ, Mistry J anon Manual Domain This domain is found in viral proteins of unknown function. 21.30 21.30 82.20 80.70 19.00 18.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.28 0.72 -3.56 2 9 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 5 0 0 10 0 83.60 67 3.87 CHANGED sLlphllspD.S--LhssIEp...D+slSpchlE........cVhIhhG.ShlhhsD.hpMtslpht.GhsG+haC............pIKsscst.pS..tp DLLNFLVNEDIS-ELL-CIEE...DKGLSHEMIE........EVLITKGLSMVYTSDFKEMAVLNRKYGlNGKMYC............TIKGNHCELSS....KE. 1 0 0 0 +5243 PF05415 Peptidase_C36 Beet necrotic yellow vein furovirus-type papain-like endopeptidase Studholme DJ anon Manual Family Corresponds to Merops family C36. This protease involved in processing the viral polyprotein. 25.00 25.00 27.40 41.00 20.40 17.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.46 0.72 -4.14 2 8 2012-10-10 12:56:15 2003-04-07 12:59:11 6 2 4 0 0 10 0 103.90 76 4.92 CHANGED NLlSRPNNCLVVAISECLGVTLEKLDNLMQANssTlsKYHsWLpKKoPSTWpDCRhFADALKVSMaVKVLSDKPYDLsY.VDGAuSslTLaLhGKESDGHFlAA .NLVSRPNNCLVVAISECLGVTLEKLDNLMQANAVTLDKYHAWLSKKSPSTWQDCRMFADALKVSMYVKVLSDKPYDLTYEVDGAGSSVTLHLlGKESDGHFIAA... 0 0 0 0 +5244 PF05416 Peptidase_C37 Southampton virus-type processing peptidase Studholme DJ anon Manual Family Corresponds to Merops family C37. Norwalk-like viruses (NLVs), including the Southampton virus, cause acute non-bacterial gastroenteritis in humans. The NLV genome encodes three open reading frames (ORFs). ORF1 encodes a polyprotein, which is processed by the viral protease into six proteins. 20.20 20.20 20.20 20.60 19.70 20.10 hmmbuild -o /dev/null HMM SEED 535 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.95 0.70 -6.21 5 408 2012-10-02 13:45:52 2003-04-07 12:59:11 7 5 351 10 0 397 1 490.50 74 30.25 CHANGED K.DFSHIKLpLAPQGGFD+pGNTPaGKGsMKslTtuuLlARAsALlHERpDEFQLQGs...sYDFDcNRVoAF+pMAADNGlGllcsMRlGs+LKGVTTlEELKsALKsacIusCQIlYpGusYolcSDGKGuVsV-K...l.spssQsssElusAL+RLRsARARsYsSCsQ-llTSIIQlAGoAFVVsRtVKRh.............................chhu+PshGasttVsctutCEo....E-D--saphpspctcsEG.KKGKNKKGRGKK..asAFSoRGLSDEEYDEYKKIREERGGKYSI-EYLpDR-RYEEELAEscAsE-DFs-EpEhcIRQRlFhh+sTRKpRKEERupLGLVTGSDIRKRKPIDWsPsGsLWADD-RpVDYNEKIsFEAPPSIWSRIVpFGSGWGFWVSPoLFIToTHVIPsGspEhFGVPIspIsIH+SGEFs+FRFPKsIRPDVTGMILEEGAPEGTVsSVLIKRsSGELlPLAVRMGTpASMKIQGRlVsGQoGMLLTGANAKuMDLGThPGDCGCPYlYKRGNDWVVhGVHsAATRSGNTVIC ...........s.sDFSHIKLsLAPQGGFDKNGNTPHGKGVMKTLTTGSLIARASGLLHERLDEFELQGPsLTTFNFDRNKVLAFRQLAAENKYGLhDTM+VG+QLKDV+TMPELKQALKsISIKKCQIVYuGsTYTLESDGKGsVKVDR...VQSso...VQTNNELuGALHHLRCARIRYYV+CVQEALYSIIQIAGAAFVTTRIIKRh.....................NIQ.-LWSKPQVEsTEEsTsKDGCPK.....PKD...s..E.EFVIS..SDDIKTEG.KKGKNKsGRGKK..HTAFSSKG..LSDEEYDEYKRIREERNGKYSIEEYLQDRDKYYEEVAIARATEEDFCEEEEAKIRQRIF..RPT..RKQRKEERASLGLVT.GSEIRKRNP-DFKPKGKLWAD...DDRSVDYNEKLSFE.APP.SIWS..RIVNFGSGWGFWVSPSLFITSTHVIPQGAKEFFGVPIKQIQVHKSGEFCRLRFPKPIRTDVTGMILEEGAPE.GTVsTLLIKRuTGE.LMPLAARMG.THATMKIQGRTVGGQMGMLLTGSNAKSMDLGTTPGDCGCPYIYKRGNDYVVIGVHTAAARGGNTVIC........................................................................................................ 0 0 0 0 +5245 PF05417 Peptidase_C41 Hepatitis E cysteine protease Studholme DJ anon Manual Family Corresponds to MEROPs family C41. This papain-like protease cleaves the viral polyprotein encoded by ORF1 of the hepatitis E virus (HEV). 27.00 27.00 32.50 39.30 26.70 26.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.97 0.71 -4.69 4 183 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 14 0 0 193 0 150.30 78 9.88 CHANGED AQCRRWLSAGFHLDPRVLVFDESsPCRCRThlRKsAuKFCCFM+WLGQECTCFLQPAEGlVGDQGHDNEAYEGSDVDPAEsAhhDISGSYlVsGRpLQsLYpALslPuDlsARAuRLTATVcVopssGRL-CcThlGNKTFpToFVDGApLEsNGPEQYsL .AQCRRWLSAGFHLDPRVLVFDEusPCRCRoFL+KsAtKFCCFMRWLGQ-CTCFLpP.sEGhVG-pGaDNEAaEGS-VDPAE.AplslSGoYhVpGpQLpsLYpALsIPpDlAARAuRLTATVElususGRLpC+TlLGNKTFpTshsDGApLEANGPEQYVL............................ 0 0 0 0 +5246 PF05418 Apo-VLDL-II Apovitellenin I (Apo-VLDL-II) Moxon SJ anon DOMO:DM08067; Family This family consists of several avian apovitellenin I sequences. As part of the avian reproductive effort, large quantities of triglyceride-rich very-low-density lipoprotein (VLDL) particles are transported by receptor-mediated endocytosis into the female germ cells. Although the oocytes are surrounded by a layer of granulosa cells harbouring high levels of active lipoprotein lipase, non-lipolysed VLDL is transported into the yolk. This is because VLDL particles from laying chickens are protected from lipolysis by apolipoprotein (apo)-VLDL-II, a potent dimeric lipoprotein lipase inhibitor [1]. Apo-VLDL-II is produced in the liver and secreted into the blood stream when induced by estrogen production in female birds. 20.10 20.10 20.10 59.90 18.70 17.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.87 0.72 -3.80 3 14 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 11 0 5 15 0 75.10 74 79.88 CHANGED KSIFERD.RRDWLVIPDAVAAYIYEAVNKMSPRAGQFLlDlSQTTVVSGTRNFLIREToKLTILAEQMMEKIKoLWNTKVLGY KSIFERD.RRDWLVIPDAlAAYIYEoVNKMSPRVGQFLsDsuQTsVVsGTRsFLIRETsKLolLAEQLMEKIKsLWYTKVLGY.. 1 0 0 2 +5247 PF05419 GUN4 GUN4-like Finn RD anon Manual Domain In Arabidopsis, GUN4 is required for the functioning of the plastid mediated repression of nuclear transcription that is involved in controlling the levels of magnesium- protoporphyrin IX. GUN4 binds the product and substrate of Mg-chelatase, an enzyme that produces Mg-Proto, and activates Mg-chelatase. GUN4 is thought to participates in plastid-to-nucleus signaling by regulating magnesium-protoporphyrin IX synthesis or trafficking. 21.30 21.30 22.60 22.50 20.70 21.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.88 0.71 -4.34 79 375 2012-10-11 20:01:00 2003-04-07 12:59:11 7 23 109 3 134 482 116 131.80 32 38.92 CHANGED LpphLtspc.......accADcpTtpl..........hlplAsc.....p...pcsalhhs-lpshPspDLpoIDpLWlpYSpG+FGFSVQ+cIa.pul..Gss....................acpFss+lGW.+......ps......sp.....Wh..p....Y.sphs.a.....sls..APpGHLP...............h.hhsthhst.shtt ..................LpphLtptpaccADpcTtpl..........hhplusc................p.......pc..salhh.p-lc..p...h.PspDLpsIDpLWhpaS.pG+..FGFSVQpcIa.ppl..Gsp.............................appFss+l..GW..+..........pp..................sp.......W.h..p......a..sphsa...............sls......AP.pGHLP......sh.h...........th.................................................... 0 20 91 130 +5248 PF05420 BCSC_C BCSC_N; Cellulose synthase operon protein C C-terminus (BCSC_C) Moxon SJ anon Pfam-B_10335 (release 8.0) Family This family contains the C-terminal regions of several bacterial cellulose synthase operon C (BCSC) proteins. BCSC is involved in cellulose synthesis although the exact function of this protein is unknown [1]. 23.10 23.10 23.10 23.80 22.90 23.00 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.87 0.70 -5.32 29 686 2009-01-15 18:05:59 2003-04-07 12:59:11 6 129 625 0 98 549 12 338.60 55 29.89 CHANGED hSpLsshpsPlpschPh..hsG+hhh+sssVplsAGohsss...............hspFGsssstsssst........................sQsssGlululGacsc....sapuDIGTTPlG.FphsslVGGlpassclu.s..huaslssuRRslTsSlLSYAGs+DP.................t..........o...GppWGGVspsGsphshoaDtGps.Glau.shuaphlsGcNVpcNpc..hchhsGhYhpllppscpp.lolGlshhhhpYc+N.uhaTaGpGGYaSPQpYhululPlpastRpsp.ao..aclpuSluhpph+pcusshaPtpshhpsth..........................sshasusSssGhu..YslpAtsEYRlusphhlGuslslppupsYs.spuhlYlRYshssh .........................................................................Y.SDLKAHTTM.LQVD.APa..uDGRhFFRoDhVNMssGoFSTsu...........DGpacssWGTCTLpcCS.Gsc...............................................................................................SQuDoGASVAVGWcN-.......sWp..aDIGTTPM.G.FN..VVDVVGG.lSYScDIG....P...LGYTlNAHRRPISSSLLAFGGQKDu...........................s...........T..GpKWGGVRAsGsGlSLS..YD+GEANGVWA.SLSGDQLoGKNVEDNWR..VRWMTG...YYYKVIN.....pN.N....R.R.V...TlGLNNMIWHYDKDLSGYSLGQGGYYSPQEYLSFAlPVhW.RpRT.EN..WS..WELGuSsSWSHSRs+ThPRYPLhN..LIPsD.app...A...................scQossGG.SSpGFG..YTARALlERRVTuNWFVGsAlDIQQA.KDYsPSHhLLYVRYSAAG.W........................... 0 12 33 68 +5249 PF05421 DUF751 Protein of unknown function (DUF751) Moxon SJ anon Pfam-B_10849 (release 8.0) Family This family contains several plant, cyanobacterial and algal proteins of unknown function. The family is exclusively found in phototrophic organisms and may therefore play a role in photosynthesis (personal obs:Moxon SJ). 21.40 21.40 23.00 23.00 21.00 21.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.82 0.72 -3.68 28 118 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 112 0 51 110 86 60.90 38 59.89 CHANGED FasNVhRYPRYhIohhLGlhhshhpPlhsLh+p.PloulsllhhlluslhhlshsL+AMlsh .FasNVsRYPRYhIohsLGlhhslhpPlhtLh.Ks.PloAlhlluhllushhhlhhsL+AMlG.......... 0 15 36 47 +5250 PF05422 SIN1 Stress-activated map kinase interacting protein 1 (SIN1) Moxon SJ anon Pfam-B_10677 (release 8.0) Family This family consists of several stress-activated map kinase interacting protein 1 (MAPKAP1 OR SIN1) sequences. The fission yeast Sty1/Spc1 mitogen-activated protein (MAP) kinase is a member of the eukaryotic stress-activated MAP kinase (SAPK) family. Sin1 interacts with Sty1/Spc1. Cells lacking Sin1 display many, but not all, of the phenotypes of cells lacking the Sty1/Spc1 MAP kinase including sterility, multiple stress sensitivity and a cell-cycle delay. Sin1 is phosphorylated after stress but this is not Sty1/Spc1-dependent [1]. 19.40 19.40 21.80 21.00 19.00 19.00 hmmbuild -o /dev/null HMM SEED 523 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.56 0.70 -5.99 4 397 2009-09-11 11:45:00 2003-04-07 12:59:11 7 8 204 2 264 433 0 250.20 17 63.17 CHANGED +lpoDDs.......GhC-hllls.Dh..................phc+.p.ss.sGGussphptop.................DLu-sp-ss.scsh-I..a.-hsh++.cssohpc.E+Lc..............................hu+psuh+hKs.shp-...phpssE.cphFp+pEl.........................................s.ssKsPhSuhpShLpcpLtcsstQhsN.htEau+FsGpup...sso++IslalPhpss.-...+..PLplplhsoA+lpElIGalhhQYs.pth-s.Lps...slstYsLalsEDDGEl-pDFPPLDsscPluKFGFosLuLVp+hptsssoppcp.hsh..hs+t...I..sN.ch.h+.h..pu.p.pcts.c.pts........hcYh.ptssp..ssl-lcsolpsAtshphshlpcNuuRuc..............................................................c-.p.phhshpDhhpu.happFhl.hhc.htF..+p-hpLuIsG-hlcI......ptSspha.p.KssSIsus.ls.hCclhEcp....Alh+hhh.pNpsoppasFts.uAshst.lss ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 82 134 215 +5251 PF05423 Mycobact_memb Mycobacterium membrane protein Moxon SJ anon Pfam-B_10885 (release 8.0) Family This family contains several membrane proteins from Mycobacterium species. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.89 0.71 -4.44 7 764 2009-01-15 18:05:59 2003-04-07 12:59:11 8 2 115 0 144 501 0 124.40 29 72.34 CHANGED hlKRhWIPLVIllVVsVuGhoV.RlHuhFGScppsohsss..psscPFNPKpVpYEVFGPsGThAsIsYhDssucPp+lsussLPWShslsTTLsAVhuNlVAQGsusSIGCRIhVsGsVKsERlssplpAaTaClVKSu ...............................................................s.........................................................s............shsP..+.pVsYEVhG..s..s..G..s..hs..s..lsY.h.....D.....s..s.u..ps...pphps....ssLPW..S..hsl..s...s..........s.........s.u....s.s.s...s.l..s..A..p..u...s.....u..s..s...l.sCRIo.V.s......GtVp.s.cp..s....p.t..ts.s.C........................... 0 21 83 126 +5252 PF05424 Duffy_binding Duffy binding domain Moxon SJ, Bateman A anon Pfam-B_11112 (release 8.0) Family This domain is found in Plasmodium Duffy binding proteins. Plasmodium vivax and Plasmodium knowlesi merozoites invade human erythrocytes that express Duffy blood group surface determinants. The Duffy receptor family is localised in micronemes, an organelle found in all organisms of the phylum Apicomplexa [1]. This family is closely associated on PfEMP1 proteins with PFEMP, Pfam:PF03011. 25.50 25.50 25.80 25.50 25.20 25.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.43 0.71 -4.46 409 7405 2012-09-24 07:47:50 2003-04-07 12:59:11 6 68 16 19 63 7892 0 148.80 32 63.35 CHANGED GACAP.RRh+lCsp.NLE.pl.........s....s.p.......s.........h.........ps.............scsLLscVhh.uAKaEGpslhppa.s......pp.................s...............s......op...............lCTsLARSFADIGDIlRG+Dlapsssp...............pccl-psL+pIF...pcIhcplsst...............hpppYpt.s...s....sa.hpLREDWWssNRcp.............VW+AlTCsus...p.....s.s.p.YFpt..s..s.s...........ss.t.....s..s.....p.....p.cC.........pp..s..s.............s.......p.................sPT...hD....YVPQaLR ................................................................................................................................................................................................................................................................................................................................oh.hDIGDI.l..R....G...+........D......La.....huspt.......................+ccL.-cpL.....Kc.IF.p.p.Iapplppt...................hpt.Yt.s....s.......sa.hpLR..EDWWs.s.N.Rp.p..................VWc.AlTCsst....p....s.t....p...YF.+t.s..ss................tt.......p..s....p...p.pC......ts......t...........................t....p......................sP.T..h.D....YVP.QaLR........................................................................................... 0 56 57 61 +5253 PF05425 CopD Copper resistance protein D Finn RD anon Pfam-B_19002 (release 8.0) Family Copper sequestering activity displayed by some bacteria is determined by copper-binding protein products of the copper resistance operon (cop). CopD, together with CopC, perform copper uptake into the cytoplasm [1]. 30.10 30.10 30.10 30.10 30.00 30.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.89 0.72 -3.83 210 1897 2012-10-01 21:57:53 2003-04-07 12:59:11 8 15 1406 0 513 1578 303 102.10 27 27.84 CHANGED ttshpRF....uphuhhuV...ssll....loGlh.h....hhh.........................h...sshhs..osYG.....hhLhsKluLhlshhslushpRh.....hlhPpl......................tsspsht.tlpphltlEhslulslls...lsusL .................h..shtRF....SthuhhuV.......ssll....loGlhs.u..hhh......hs.................h...sslhs...osaG......plLLhKhsLlhlhlsluhhs.Rh...............hlh.P+h................................................ttspsst.......h.hht.hs...t.l.E.hslus.llls...lsuh.............................................................. 0 124 301 422 +5254 PF05426 Alginate_lyase Alginate lyase Moxon SJ, Mistry J, Murzin A anon Pfam-B_11800 (release 8.0) Family This family contains several bacterial alginate lyase proteins. Alginate is a family of 1-4-linked copolymers of beta -D-mannuronic acid (M) and alpha -L-guluronic acid (G). It is produced by brown algae and by some bacteria belonging to the genera Azotobacter and Pseudomonas. Alginate lyases catalyse the depolymerisation of alginates by beta -elimination, generating a molecule containing 4-deoxy-L-erythro-hex-4-enepyranosyluronate at the nonreducing end [1]. This family adopts an all alpha fold [2]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild --amino -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.78 0.70 -5.04 59 980 2010-01-08 16:30:20 2003-04-07 12:59:11 7 39 535 10 303 910 259 247.60 17 47.74 CHANGED slss+hth.ssussppDhhuts.ah......sstps............sshshhp+-Gph.........tphtsstpthtphupss.....tsLuhsathstpttastt..ssch.l+sWhlsststhpsplphup.........................h..h..h..pttsshsspp.pthcp.....Whsphhchhhs.t...tupsctstp......NNHuhWhshpVsuhAhhhscpchhchsh.......pp.htpshh.sQls.sDG...shPpEltR.............s+uhpYssasLtslshhAph....upp.tG...DLat....stsutslt+uhcalhsh..st.s.ppa ..................................................................................................................................................................tsht.h.p.stts.....hthuLtahho..s...pp...p..Yupt.......utch.l.......ps..W...hh...s...t..s.t.h.s.s.phphs............................h.t.l.ht...............hh.sh.hh.......psh.ssh.s..s.p..p.hpth.pp............ahp.p....h.hp.......h.hp...........ttpt..htt......................sNa.shhtshthhuhu.lhh....s....cc....s....h.hppul..............................................pt...h....ht...t....t....h..........s.p.....l.......t..............s......G...............pphtR...............................tpshtYsha...sLtshshhuph....utp...s.......Dla......s.psttl.htuhphhh.h.......................................................................................... 1 102 191 258 +5255 PF05427 FIBP Acidic fibroblast growth factor binding (FIBP) Finn, RD anon Pfam-B_19083 (release 8.0) Family Acidic fibroblast growth factor (aFGF) intracellular binding protein (FIBP) is a protein found mainly in the nucleus that is thought to be involved in the intracellular function of aFGF [1]. 20.60 20.60 20.70 22.80 20.10 19.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.27 0.70 -5.22 7 124 2009-09-10 22:14:15 2003-04-07 12:59:11 6 4 87 0 83 127 1 291.70 43 86.60 CHANGED ss-lDVFluNhTllD.ElYQLWl-GYoso-AVslhhppGhlcphGssuDlltSDshDHYRTaphlE+hLHtPsKLhpQ.hF.QlsPpppuhLIE+YYuhD-uhsREllGKKLSpts+KDLD-lupKTGlpLKSCRRQFDNhKRlFKsVEEh.GsLspNI+QpFlLs-cLAccYAAlVFhAs.RFETsK+KLQYLoFpDFhhCuptlhppWThsh.csts-..hDh-hDKEFL.DL+El+hLlsccc.l...KpLVshpL+sphh..sapEh-uNF+oh.RullsIAssLp+s+-lRshFlDLsEKhIEPh+tssWspppVphFhsphTpSVhsL-s.Rc....psLW-RYMtslpsCLh+MYHs .........................................................h.s-lDlFluNhTllD.-l..YpLWl-GaSss-AVsh.hhp.....pG....h..h..pp..........h.....s.Ast...s.....lltSDshDHYRTFphl.E.+hLcsPsKLh..p.QhhF.QlsPpppthLIE+YYshD-shsRElLGKKLSptp+KD.........LD-lupK........TslpLKSCRRQFDNhKRlFKsV.E-....h................G..........s..LspNI.ppcFlLSc..cLA+cYAslVFhAs.RFET..sK+..K..LQYLoFtDhhhCup.hhp.WThsh.....................p...-......hDh-hD+-FL.-L+-L+.h.Ll...sccc.l...hKpLVshtL+s......h..l........shp-h-sN.F+.sh.RullslAspLp+s+-lRshFl....DLsE+hl-PhRpstWshpplphFLsthst.....s.s..hpl..-h..+c..............psh....Wc....RYMtsl.p.Cl.phYc...................................................... 0 38 49 66 +5256 PF05428 CRF-BP Corticotropin-releasing factor binding protein (CRF-BP) Moxon SJ anon Pfam-B_11928 (release 8.0) Family This family consists of several eukaryotic corticotropin-releasing factor binding proteins (CRF-BP or CRH-BP). Corticotropin-releasing hormone (CRH) plays multiple roles in vertebrate species. In mammals, it is the major hypothalamic releasing factor for pituitary adrenocorticotropin secretion, and is a neurotransmitter or neuromodulator at other sites in the central nervous system. In non-mammalian vertebrates, CRH not only acts as a neurotransmitter and hypophysiotropin, it also acts as a potent thyrotropin-releasing factor, allowing CRH to regulate both the adrenal and thyroid axes, especially in development. CRH-BP is thought to play an inhibitory role in which it binds CRH and other CRH-like ligands and prevents the activation of CRH receptors. There is however evidence that CRH-BP may also exhibit diverse extra and intracellular roles in a cell specific fashion and at specific times in development [1]. 25.00 25.00 25.30 25.20 19.00 18.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.95 0.70 -5.59 7 97 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 73 0 52 94 0 264.40 44 90.03 CHANGED Mpsuucs.................phthl.L.lsshtupsRh.t.pssustshhhLhsuslpRE.p.-..a...hcChshhuh.G.aha.ust.t.sCuhalIuEPDpllpIchcpsslsCpsGshl...........................................................tlhDGW.LpGEhFPuppDH.LPh.cRhs-aCss......hss++hhRSSQNsAhl.aRl.s.GsuFohsVRhh.NshPCNllsQsspGhFTMss.tQtRNCSh.SllaPsslclssLplGt...pslthp..s.hsCtssGDhVElhGusGLDsupMhshuslChshss....sp.pIhCssosVRhVSoGpa.Npssh.hRphD..-h-hsp ..............................................................................................................t.p......t..aph.hcC.hphhuh.G..Fhahu.p.....ps.phhCuhahIuEP-phIplchc.lslsCps.Gshl...........................................................p.lhDGW.LpGEhFPSspDHsLPhp-RYhDaCss.................s.s++shRSSQNVAMlhFRltpsGsuFolol+p..N.hPCNllSQ..ossGtaThssstQ+RNCSa.SllYPsslplucLslGph............ps.lp..h...p....c......s.....hsCt..t...u..DaVElhGusGlDso.pMh.huslChshpss...sp.plsCssosVRhVSSG+a.NplohphR.l-..-lp...p............ 0 10 15 33 +5258 PF05430 Methyltransf_30 DUF752; S-adenosyl-L-methionine-dependent methyltransferase Moxon SJ, Eberhardt R anon Pfam-B_12088 (release 8.0) Family This family is a S-adenosyl-L-methionine (SAM)-dependent methyltransferase. It is often found in association with Pfam:PF01266, where it is responsible for catalysing the transfer of a methyl group from S-adenosyl-L-methionine to 5-aminomethyl-2-thiouridine to form 5-methylaminomethyl-2-thiouridine [1,2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.20 0.71 -4.45 17 1653 2012-10-10 17:06:42 2003-04-07 12:59:11 6 6 1635 15 373 1468 553 123.70 41 22.62 CHANGED schtphtctLhppash...h.Ghhclths.stssLcLhhGDs.pptlspls....tpsDAWFLDGFuPs+NP-MWs.plhptluchscssGshuTaouAuhVR+uLhpAGFp.lp+p.GFGpKREhlpushs ........................................................pht.huppL.tpa....Phs.......hsG.....h.....a.Rl..hhs......t...scl.....sL.....-LhaG..D.h.pc.h..l.spLs....................tplDAWFLD......G.FAP....u.K...N..P...-.....MWops.....LFsthA....+L.u.+...s.....G..u...TlA..TFTu......A..G..h.....VR..RuLppAGFs.h.p.K.....t.+.GF.G.pKREMLpGhh..................................... 0 84 217 310 +5259 PF05431 Toxin_10 Toxin_P42; Insecticidal Crystal Toxin, P42 Finn RD anon Pfam-B_19338 (release 8.0) Family Family of Bacillus insecticidal crystal toxins. Strains of Bacillus that have this insecticidal activity use a binary toxin comprised of two proteins, P51 and P42 (this family). Members of this family are highly conserved between strains of different serotypes and phage groups [1]. 28.60 28.60 28.70 32.90 27.80 28.10 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.40 0.71 -4.67 7 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 11 0 3 42 0 196.30 38 50.73 CHANGED PphPp.sopLpshPcho...sIs.sGs.Pp.....lhG.TLIPCIhVsD.sI.shssphKpoPYYlLc+hQYWpphhuu..hlsstp+pohp.chGhspsppsohlsslshpIssD...thpF.....GstTssh+pQlsppLph.hSpssp.htchpcph-hsNsp.stshphstFlhtppapLhRhNGo.lps..hhshDpchhslpoYPs ......................PphPp..sppL.shPcho...sID..Gh.Ppps.pslhGtTLlPCIMV.sDssl....sp...ss+hKsoPYYllc+ppYW..pphhuu..lls....stpccohs.csGlscssQsoMhsslshpIGuD..hGh+F............GstTpsIKsQloc-LphphSpoop.htcpsEppEhsNss.shshpasuallsocapLpRssGoplps..hhshDpchhsltoYPp............. 0 2 2 3 +5260 PF05432 BSP_II Bone sialoprotein II (BSP-II) Moxon SJ anon Pfam-B_12103 (release 8.0) Family Bone sialoprotein (BSP) is a major structural protein of the bone matrix that is specifically expressed by fully-differentiated osteoblasts [1]. The expression of bone sialoprotein (BSP) is normally restricted to mineralised connective tissues of bones and teeth where it has been associated with mineral crystal formation. However, it has been found that ectopic expression of BSP occurs in various lesions, including oral and extraoral carcinomas, in which it has been associated with the formation of microcrystalline deposits and the metastasis of cancer cells to bone [2]. 25.00 25.00 25.00 40.00 24.90 24.50 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.25 0.70 -5.13 4 53 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 33 0 23 49 0 263.80 57 94.51 CHANGED FSMKNapRRAKhEDSEENGVFKYRPRYYLYKHAYFYPsLKRFsVQuuSDSSEENGsGDSSEEEtEEE.sSNEEtN...NE-S-uNEDE-uEAENhTLSosThGYGu-sTsGTusIGLAAlpLP+KAGshtpKusKc-ESDE-EEEEE...EpEEpEAEV--NEQusNGTSTNST.EVssGNGoSGG....DNGEE.GEEESVTEAtsEGTTVA.......u.TTss..sGhpsTTP.pElhGTTsPPhuKsTTst..GEYEQT.GspEY-ssYEIY-uENGEP.RGDsYRAYEDEYSYYKGRGY-uYsGQDYY .........................FShKNhHRRsKlEDSEENGVFK..aRPRYYLYKHA.YFYPPLKRFsVQsu.SDSSEEsGDGs.S.pEEEtE.EE..TSNE.cEN.....NE-...Spt..NE..DE-..uEu..ENoTLSs...oT.uY.Gt.-sTsGsG.hGL.A.Al......plPKKu.Gt..htpK.us+p.cESD.E......-..EEEEE.....tENEcpEAE.VDENEQGlNGTSTNST.Es..-sGNGuuGG.........DsGEE..tEE.ESVTEAssEuT..Tss.......scTTssssuGapsTTPsp.ts...YtTTsP..PhtcsTTstYts.EYEpT...GsNEYDsGYEl.....Y-.sENGEP.RGDsYRAYEDEYS..Y...YKG+GYDuY-GQDYY........... 0 1 2 5 +5261 PF05433 Rick_17kDa_Anti Glycine zipper 2TM domain Moxon SJ, Bateman A anon Pfam-B_13382 (Rel 8.0) & Pfam-B_6 (Rel 24.0) Family This family includes a putative two transmembrane alpha-helical region that contains glycine zipper motifs [2]. This family includes several Rickettsia genus specific 17 kDa surface antigen proteins [1]. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild --amino -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -9.09 0.72 -4.29 174 3882 2012-10-03 03:18:43 2003-04-07 12:59:11 10 13 1511 0 736 2475 469 41.70 44 28.29 CHANGED hGshhGAssGulhGstl.ut......psthhusll.GAssGuhlGstlu ..............GsllGAlsGGllGspl.Gu.......GsGp.s..l.uTlu.GAlsGGlsGpplt................... 0 114 335 536 +5262 PF05434 Tmemb_9 TMEM9; TMEM9 Moxon SJ anon Pfam-B_12447 (release 8.0) Family This family contains several eukaryotic transmembrane proteins which are homologous to human transmembrane protein 9 Swiss:Q9P0T7. The TMEM9 gene encodes a 183 amino-acid protein that contains an N-terminal signal peptide, a single transmembrane region, three potential N-glycosylation sites and three conserved cys-rich domains in the N-terminus, but no known functional domains. The protein is highly conserved between species from Caenorhabditis elegans to man and belongs to a novel family of transmembrane proteins. The exact function of TMEM9 is unknown although it has been found to be widely expressed and localised to the late endosomes and lysosomes [1]. Members of this family contain Pfam:PF03128 repeats in their N-terminal region. 24.80 24.80 25.50 24.90 24.50 24.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.93 0.71 -4.46 6 196 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 89 0 107 165 0 131.20 46 75.14 CHANGED PY+chsG+Iapp.........NVo.KDCNCLHVV-P...hPV.G+DVEAYCLcCECKYEERSooTIKVsIIIYLSllGhLLLYMlFLhllDPLL.RKhssao..lpo-E-s-DspPhssAtsslstuRucsNVLNRVEaAQpRWK+QVQEQRKoVFDRHshL .......................................uplY.pp.........Nl.o..pc-......C...............sChcVV.cP....M.PV..GtD...lEAYCLhCECKYEERSosTIKVslIIaLollGhLLLYMlaLhLl-Pll...R+........sh.s.p.l..ps--...........-...........s..........tDt..ps.hss...............u...t....s........s..........sts...tu..ssVLp+V-htQpRWKhQVpEQR+slFDRHhhL........................................... 0 27 36 67 +5263 PF05435 Phi-29_GP3 Phi-29 DNA terminal protein GP3 Moxon SJ anon Pfam-B_14111 (release 8.0) Family This family consists of DNA terminal protein GP3 sequences from Phi-29 like bacteriophages. DNA terminal protein GP3 is linked to the 5' ends of both strands of the genome through a phosphodiester bond between the beta-hydroxyl group of a serine residue and the 5'-phosphate of the terminal deoxyadenylate. This protein is essential for DNA replication and is involved in the priming of DNA elongation [1]. 25.00 25.00 449.40 449.30 21.70 20.60 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.70 0.70 -5.18 3 7 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 6 0 8 0 265.90 66 100.00 CHANGED MARsSRIRITcNDKAEYAKLVKNTKAKIoRTKKKYGlDLSuEIslPsLESFcTREQFNKWKc+AESFTNRANpcYQFVKNcYGVVASKAKIsEIt+NTKEAQRlVDEpl-chcDKEaISGGKsQG.TVGQRhplLSPucVTGIuRPcDFDFsKVRTYuRLRTLEEuMccRosPsYYE++MpQLQpNFI+.SVEGSFNS..tADELVE+LKKIPPDDFYELFLhasEISFEpFDSEGssVEASEuhLpKI+SYL-cYcRGDsDLSLKGF MARssRIRIpsNDKA.YA+LVKNTKAKIuRTKKKYGlDLosEI-lPsL-SFpTRtpFNKWKcptpSFTNRAN.pYQF.KNtYGlVASKAKIsEIt+NTKEsQRlVDEphcthcDK.ahuGGK.QG.TltQRhthhSPupVTGIsRPpDFsFscVRSYuRLRTLEEuMt.+ssPpYY-++MhQhp.NFIc..VEtSFNS..huDELlEcLKKIPPDDFaELaLhhsEISFE.FDSEGpsVEssEu.l.KIhSYL-pYcRGDhsLsLKGF 0 0 0 0 +5264 PF05436 MF_alpha_N Mating factor alpha precursor N-terminus Moxon SJ anon Pfam-B_12643 (release 8.0) Family This family contains the N-terminal regions of the Saccharomyces mating factor alpha precursor protein. All proteins in this family contain one or more copies Pfam:PF04648 further toward their C terminus. 21.90 21.90 25.30 25.50 20.00 18.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.85 0.72 -4.30 4 57 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 33 0 25 50 0 85.40 49 57.30 CHANGED MKF.ohhoAsllAAsShsAs..ts.o---ou.pIPAEAlIGYLDLtGDhDIAVlPFSNuTssGLLFlNTTIsshApKEpussLsKRE .........M+Fs.Slhos.hhhAAs.S.shAu..s.....t..spo--..-tA.....plPAEAlIGYLDLtGDpDlAlLPF..SNuTssGLLFlNTTIAptA....tc.E...ps...s.oLsKRE.............. 0 2 12 20 +5265 PF05437 AzlD Branched-chain amino acid transport protein (AzlD) Moxon SJ anon Pfam-B_14345 (release 8.0) Family This family consists of a number of bacterial and archaeal branched-chain amino acid transport proteins. AzlD is known to be involved in conferring resistance to 4-azaleucine although its exact role is uncertain [1]. 22.70 22.70 22.70 22.90 22.60 22.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.19 0.72 -4.06 189 3465 2009-09-11 13:40:13 2003-04-07 12:59:11 7 2 2847 0 681 2161 1469 96.30 21 90.16 CHANGED hhlhIlshulsTalsRhlshhlhtp.hplsshlp...phLphlPs....ulLsALlssslh.....hsttth......ssspll...Aslsshllshhp+.s....hlhollsGhsshhllp ......................hlhllhh.u.l...sTalhRhlPhhlhpp...h....t....h.s....s......hht...........thLphlPh.......ullsulhhsslh.............hst.t.t.................thspll..uhhsss.hlth.h..p+.s....ll.lsllsGhlshhll................... 0 196 408 553 +5266 PF05438 TRH Thyrotropin-releasing hormone (TRH) Moxon SJ anon Pfam-B_14384 (release 8.0) Family This family consists of several thyrotropin-releasing hormone (TRH) proteins. Thyrotropin-Releasing Hormone (TRH; pyroGlu-His-Pro-NH2), originally isolated as a hypothalamic neuropeptide hormone, most likely acts also as a neuromodulator and/or neurotransmitter in the central nervous system (CNS). This interpretation is supported by the identification of a peptidase localised on the surface of neuronal cells which has been termed TRH-degrading ectoenzyme (TRH-DE) since it selectively inactivates TRH [1]. TRH has been used clinically for the treatment of spinocerebellar degeneration and disturbance of consciousness in humans [2]. 27.80 27.80 27.80 28.40 25.30 27.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.17 0.70 -4.94 9 77 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 52 0 28 66 0 185.50 44 86.80 CHANGED shlllLhuL.....sVstu.u....hQtpshsucspsuhD..-lLp+AEp.LLLRShLpphp--pst..ss....Qs-WlpKRQHPGKR.pc-.......................cKRQHPG+R--p........................tp.ts.hsp.+RQHPGKR..ht.....h.-pP...hhhS-loKRQHPG+Ral.....KRQHPG+RphcEEst..............lEKRQHPGKRh...........t.C-shsssGC..uul.L ..................hhl.lhhulh.s.hslstups....h.tps.httpt...-...lLppsct.lhLpp.lpphptcptt...st......t.pWlsKRQHPGKR.p--...................cKRQHPG+R--p.......................................s.s..htst.ss.pKRQHPG+R............sP....hLt.sloKRQHPGRRhlss...KtQ+shccct...--Epc..tthhs................EKRQHPGKRhh.........ssPCss.sssup..sth.............. 0 4 6 13 +5267 PF05439 JTB Jumping translocation breakpoint protein (JTB) Moxon SJ anon Pfam-B_14502 (release 8.0) Family This family contains several jumping translocation breakpoint proteins or JTBs. Jumping translocation (JT) is an unbalanced translocation that comprises amplified chromosomal segments jumping to various telomeres. JTB, located at 1q21, has been found to fuse with the telomeric repeats of acceptor telomeres in a case of JT. hJTB (human JTB) encodes a trans-membrane protein that is highly conserved among divergent eukaryotic species. JT results in a hJTB truncation, which potentially produces an hJTB product devoid of the trans-membrane domain. hJTB is located in a gene-rich region at 1q21, called EDC (Epidermal Differentiation Complex) [1]. JTB has also been implicated in prostatic carcinomas [2]. 20.90 20.90 21.00 20.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.77 0.71 -4.30 4 94 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 76 1 60 88 0 108.80 36 67.73 CHANGED A-uPs+cEKlssppsspsCWhsEcFsVspECuPCSsFcI..Kohs..cChpTGYhE+lsCSsScps.hRSC.RSALh.Ep+.FaKFEsssssluLl.hhlshhRpRQL-R+sh.+lc+Ql ...................................h.p..ss..tsCWhh.EcasVspECpPCosFph..+ohs...tCspT.GYhEplpCs..sSc...p....h+S......C.Rsuhh.Ep+hFWKFEusshslull.shlVhhRpR.L-R+shp+lc+Ql...................... 0 22 28 45 +5269 PF05440 MtrB Tetrahydromethanopterin S-methyltransferase subunit B Moxon SJ anon Pfam-B_15021 (release 8.0) Family The N5-methyltetrahydromethanopterin: coenzyme M (EC:2.1.1.86) of Methanosarcina mazei Go1 is a membrane-associated, corrinoid-containing protein that uses a transmethylation reaction to drive an energy-conserving sodium ion pump [1]. 19.40 19.40 19.40 54.20 19.20 19.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.26 0.72 -3.95 5 53 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 53 0 38 51 5 96.10 35 92.20 CHANGED olV+IAPElsLVMDs-TGsVuctp.cDlIhlSM-PlaE+l-KLEulADDLlNSLDPSpPsLNSaPGREGVahhAGhaoNuFYGFllGLuIsuLLALIL .........hVplsPEhsLVhDspoGlluptt.c-llhlohsPl.-plscL-phAD-LhNSLDPcss.lpSaPGREGshhhAGhhoshaaGFhlGLhl.hsLlshh.h....... 1 8 24 32 +5271 PF05443 ROS_MUCR ROS/MUCR transcriptional regulator protein Moxon SJ anon Pfam-B_1733 (release 8.0) Family This family consists of several ROS/MUCR transcriptional regulator proteins. The ros chromosomal gene is present in octopine and nopaline strains of Agrobacterium tumefaciens as well as in Rhizobium meliloti. This gene encodes a 15.5-kDa protein that specifically represses the virC and virD operons in the virulence region of the Ti plasmid [1] and is necessary for succinoglycan production [2]. Sinorhizobium meliloti can produce two types of acidic exopolysaccharides, succinoglycan and galactoglucan, that are interchangeable for infection of alfalfa nodules. MucR from Sinorhizobium meliloti acts as a transcriptional repressor that blocks the expression of the exp genes responsible for galactoglucan production therefore allowing the exclusive production of succinoglycan [3]. 23.80 23.80 24.50 24.60 23.60 23.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.56 0.71 -4.38 127 729 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 320 1 355 661 38 123.90 40 78.76 CHANGED tsstsllcLTA-IVuAYVS..sNslsss-LPsLIpsVasuLs..............slss...ss....t..ts.....p..................c.pP.AV......s..l+KSl.pPDhllCLEDGK.taKoL.KRHLpocaulTP-pYRpKWGLPsDYPMVAPsYAtpRSpLAKphGLGp ..........................................................s....thlpLsu-IVuAaVu..s..Nslsss-LPsLItpVasuLt......................................plstst.........t..........p.............................................................................p.pP.AV.....s.l+KSl.psDhIlCLEDGK.paKoL.KRHLto+ashT.P-pYRpKWGLP.s-YPMVAPsYAtpRScLAKphGLGp....................................... 0 93 242 268 +5272 PF05444 DUF753 Protein of unknown function (DUF753) Moxon SJ anon Pfam-B_1957 (release 8.0) Family This family contains sequences with are repeated in several uncharacterised proteins from Drosophila melanogaster. 21.00 21.00 21.20 21.20 20.40 20.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.89 0.71 -12.93 0.71 -4.33 80 560 2009-01-15 18:05:59 2003-04-07 12:59:11 7 14 25 0 372 539 0 130.60 24 85.02 CHANGED hpCsosp.s.ssC...tssshst.sppCpst.t......C.hotl........ssss.......htRGChscht........pssCsss.ts.....CpsC.su..s.sCNs.t.hstsR.hpCapCs....sssC....tssss.psphC.ha.....pts-p...Casph....ssshspRGChosh...............ppt..ssssppChh..Csss ............................h..Cpspp..s.sp..C.....ss.sh.hppCpst.t.......C.hohh..........ps.st............htRGChssl.t..t......tttCsss......t...sC.ttC...su....s.....tCNs..t.h.......stsp..hpChpCs......sssC.......ss....tst.C.hh.....t.tpt...Chh.h.....ts..h.hGC.sp..................................s..................... 0 48 79 267 +5273 PF05445 Pox_ser-thr_kin Poxvirus serine/threonine protein kinase Moxon SJ anon Pfam-B_1974 (release 8.0) Family \N 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.67 0.70 -5.69 9 74 2012-10-02 22:05:25 2003-04-07 12:59:11 6 1 50 0 0 102 4 427.40 67 96.40 CHANGED ssh-tpa..hpspcps-TslLG.......DsIYFcalhSQlDhppsWu.Psl+ll+YF+s.Fs+psls+Iscp-YlNPSaFQ.KDcRFYPhNDDFYHlSTGGYGIVF+h.DcYVVKFVaEP.sppapPh-sTuEaTIP+FLYNNLKG...DE+cLlVCAhAMGlNYKlsFLasLY+RVLahLLLlhpIhDsp.Lsl.....paS++hhLKhFs-.+Kss.cFVKLlSYFYPhVlpSNlNVI..N.FsahhaF.......FEHEKRusYhY-RGNIIIFPLA+CSA-Klo.phstchGFt...........SlscYlKFlFLQluLLYIKIYELPsCsNFlHlDLKPDNILlFDS..pcslpIphsspsaVFcE.l+ssLNDFDFSQVusIh.NKKIKsSl+lE.pNWYYDFHF....FsHTLh+sYPEIp.pD.tFspuLpEahh..CpsKsTC-KFRL+VShLHPISFLtcFlt+s.lFSsWIN ............................................................................sss...EaQW..hSscchsDTVILG.......DslYFs.IhSQLDl+QsWA.PsVRLLpYFKN.FN+ETL.KIp-s-YINsSFFQQKDKRFYPlN.DDFYHISTGGYGIVFKI.DpYVVKFVFEs.sKhYS..PMETTAEaTVPKFLYNNLKG...DEKKLIVCAhAMGLNYKLTFLHoLYKRVLaMLLLLIQTMDGQcLSL.....+aSpKsFLKuFNE.RKDSIKFVKLLSaFYPuVIpSNINVI..NY..Fs+MFHF.......FEHEKRoNYp.Y-RGNIIIFPLAhaSA.DKVssEh..Al+LGFK................SLVpYIKFIFLQMALLYIKIYELPCCcNFLHsDLKPDNILLFDS...sEPIh.I.+lpc.ppFVFN...E.IKu.ALNDFD.....FSQVA.........uIl.NKKI.K.N......shKVE.HNWYYDFHF....FlHTLL..KTYPEIE.+DhEFosALEEFIM....C.oKosCDKaRLKVSILHPISFLE..KFIhRD.IFScWIN................................................... 1 0 0 0 +5276 PF05448 AXE1 Acetyl xylan esterase (AXE1) Moxon SJ anon Pfam-B_4814 (release 8.0) Family This family consists of several bacterial acetyl xylan esterase proteins. Acetyl xylan esterases are enzymes that hydrolyse the ester linkages of the acetyl groups in position 2 and/or 3 of the xylose moieties of natural acetylated xylan from hardwood. These enzymes are one of the accessory enzymes which are part of the xylanolytic system, together with xylanases, beta-xylosidases, alpha-arabinofuranosidases and methylglucuronidases; these are all required for the complete hydrolysis of xylan [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.90 0.70 -5.94 11 1046 2012-10-03 11:45:05 2003-04-07 12:59:11 7 10 743 111 195 2479 716 275.50 29 81.06 CHANGED MtlhDhsLp-LppY+upsstPcDFs-FWspsLcEhpplsschcLppsDaplstVcsY-LTFsGhsGu+IcuhhlhP+.sctphPsllcFHGYsus.sG-hp-hLpasusGaushuMDsRGQG.....GtSpD.sssssussh.GahhRGlh-..t-phaYRcVFLDssphl-llhShspVDccRluVhGuSQGGuLALAsAALss+.lKtlls.YPFLSDFcRsh-lsh.pcsYpEltpYF+.hssp+E+EpclFpTLuYhDlhNLAcRlKusVLhulGLhDplCPPST.FAAYN+lpsc.K-lclYs.auHEshssa.p-phhpaLpcl .............................................................................................................................htst....Pt.D.F-tFWpt.phpph...............p.....t..h........p......h...p..h..p.....h.....-..h....t....h...s........t.l..cs.a......c...l....s..a..p..u......h................t........s......s......p......lhuhl...h.h....P........c....................s.....p......t...t.........h......P..h...l.l....p........a..h...G............Y......h..s.................p.....t......h...................h....p.........h........l..............t..........a..........s....s..........t.....G.....a...u.s.lshDs......R..G..Qu..........................sho...pD......ts.........s...s..s....h.t......G...a..h.....h.......c...G....h....s..............-..p.h...a....a......+....c....l....a.........h......D.h.h.p....hl.-.h.l.t.s.h..s...p.....l...D...t.p......+........lushGs.SQG.G.ul.u...l.s.....s...u......u..........L..........s....s..........c.....l...pt......s...hs......h..P.....h...L......s...c....h..p..c.s...h.p....h..s.s........tt.s...Y....p..l..h...p...ah..p........s.....h.....p..t..p........t..phh.t.s..L..u.Yh.Ds..hph..A..p..+l.....p...s...s..h..h..sGh.DplC.......P.....o...aA....hYNtl.............s...s..............t......K...p...hh..l.h......huHtshtsh.....t.h......t.................................................................................................................... 1 79 144 172 +5277 PF05449 DUF754 Protein of unknown function (DUF754) Yeats C anon Yeats C Domain This domain appears to be found in a group of prophage proteins. 25.00 25.00 27.70 29.70 23.30 21.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.71 0.72 -3.96 13 238 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 203 0 35 191 1 82.60 35 88.67 CHANGED llsAllCusIslRL.lsY.RRsGARHRhhsShlAYlLslssGstslpllhu.....phtssuhhcshltslLslhVhpARGNVA+ll+ss ...........hlNshlCssIslpL.hhY.RR.s.suRHRshhSaLAalLhlshussPlphlhG.....ph.sssahtlhlNllhsshVlpARGNVu+lhc..t... 0 3 9 20 +5278 PF05450 Nicastrin Nicastrin Moxon SJ anon Pfam-B_15452 (release 8.0) Family Nicastrin and presenilin are two major components of the gamma-secretase complex, which executes the intramembrane proteolysis of type I integral membrane proteins such as the amyloid precursor protein (APP) and Notch. Nicastrin is synthesised in fibroblasts and neurons as an endoglycosidase-H-sensitive glycosylated precursor protein (immature nicastrin) and is then modified by complex glycosylation in the Golgi apparatus and by sialylation in the trans-Golgi network (mature nicastrin) [1]. A region featured in this family has a fold similar to human transferrin receptor (TfR, Swiss:P02786) and a bacterial aminopeptidase (Swiss:P80561). It is implicated in the pathogenesis of Alzheimer's disease [4]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.59 0.70 -5.12 6 234 2012-10-02 19:46:12 2003-04-07 12:59:11 10 9 139 0 136 395 62 196.70 31 32.33 CHANGED +lllVosRhDosohFcslulGA.SslsuhlslLuAAcuL....p+h.ss.Ss.p+NVlFlhFNGEoaDYIGSpRFlY-h-pGpFP.........stsIu.ssIDhhLElGsluptpu.t...hahH....utpspolsspsh-hLpphpcShuuss.tlh.sssssstlPPoShpSFLRcssshsuVlLssasspa.N+aYHSIhDcs-Nls.......................ps....s..thplsssAollAtuLY .........................hlhsssphD..otoh.......F...........s.....l.....u.s..GAposluuhlshLusAcsL............pch......s...s.h.....s...p.h.........+...slhFs.hFpGE.sa......s.YlGSp....RhlaD...h.cp..s.phs........................................l....p.......h.......csI-...t....hlEls...pluht.ss.t........lahHss....s.t.ps...ps..h....tspl.ch...l......ssl.........ppu..h..shs.....s.h.hhh.......t...s...s...................lPP.o.S.hppFL+......t....p...........s......hs..u....ll.l.ssa..t.t.t.a..N.p.a.YpShhDs.p.tlt................................................................................................................................................................ 2 50 74 112 +5279 PF05451 Phytoreo_Pns Phytoreovirus nonstructural protein Pns10/11 Moxon SJ anon Pfam-B_15695 (release 8.0) Family This family consists of Phytoreovirus nonstructural proteins Pns10 and Pns11. Genome segment S11 of rice gall dwarf virus (RGDV), a member of Phytoreovirus encodes a putative protein of 40 kDa that exhibits approximately 37% homology at the amino acid level to the nonstructural proteins Pns10 of rice dwarf and wound tumour viruses, which are other members of Phytoreovirus [1]. 19.70 19.70 20.00 20.40 19.50 18.10 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.08 0.70 -5.79 4 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 1 17 0 330.40 43 93.21 CHANGED MDssT-phlpLHh-llspcpGapIlpKaDAI+KLpL....stcuspsNISpAAhsplppaupppEAYlASDLs-R-l.ssshhKAlIFVPtSVlssGK..KDLlPYGVlssslIaVPETholLDslp.hlsp...++PlSshllhchh+slph-VlGSsYDoaYYCssScYGKNLIKhusshPs.PpplRLSlsDLshsAhppsHshssphl.phhs+hPsGFpPKsHlL+I.hssh-MEhhcphVpp.huccP.stFpYsDppNlLpRTThFSsscsFSahILWRGW.STYKEhLSQDQlopFhtt.GsuGshssahhTaPShF-EGslYlpYcah...TP....put+uts.hPshsph ............MDs-TpRllpLHs-llppcpGapIloKaDAI+Kl+L....sssussNNlSpAALsKLpcaAspuEAYlsSDLs-Rsl.ssslhKAlIFss+SVlosGK.lKDllPYGVhsusllalPETholLDsI..hlupp..++PhohsllhphsKslph-llGssYDoaYYCsuStYGKNLl+husthPshPsplRLSlGDLsYhAApShHshAspYl.+lFs+LPsGFoP+SHLh+I.hslL-M-pFKchVpphhA+ps.stFpY-DsKNlLpRsshFSsDHpYotlILWRGWuSTYtEhLSQ-QlophpupsGhAGDLGhaphTlsShF-EGtVYlpY+Fh...TPt........ptsSt+utslYPsL.th........ 2 0 1 1 +5280 PF05452 Clavanin Clavanin Moxon SJ anon Pfam-B_15887 (release 8.0) Family This family consists of clavanin proteins from the haemocytes of the invertebrate Styela clava, a solitary tunicate. The family is made up of four alpha-helical antimicrobial peptides, clavanins A, B, C and D. The tunicate peptides resemble magainins in size, primary sequence and antibacterial activity. Synthetic clavanin A displays comparable antimicrobial activity to magainins and cecropins. The presence of alpha-helical antimicrobial peptides in the haemocytes of a urochordate suggests that such peptides are primeval effectors of innate immunity in the vertebrate lineage [1]. 20.90 20.90 21.20 28.70 20.70 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.81 0.72 -3.99 2 6 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1 0 0 7 0 70.50 82 100.00 CHANGED MKThILILLILGLGIsAKSLEEpKu-EEKhh+hlGplIHtlGphVathuhshGDDQQDNGKFYGaYAEDNGKHWYDTGDQ MKTTILILLILGLGINAKSLEERKSEEEKlF+hLG+IIHHVGNFVHGFSHVFGDDQQDNGKFYGaYAEDNGKHWYDTGDQ 0 0 0 0 +5281 PF05453 Toxin_6 toxin_6; BmTXKS1/BmP02 toxin family Moxon SJ, Lee SC anon Pfam-B_16009 (release 8.0) Family This family consists of toxin-like peptides that are isolated from the venom of Buthus martensii Karsch scorpion. The precursor consists of 60 amino acid residues, with a putative signal peptide of 28 residues and an extra residue, and a mature peptide of 31 residues with an amidated C-terminal. The peptides share close homology with other scorpion K+ channel toxins and should present a common three-dimensional fold - the Cysteine -Stabilised alphabeta (CSalphabeta) motif [3]. This family acts by blocking small conductance calcium activated potassium ion channels in their victim [1,2]. 25.00 25.00 41.20 40.90 19.20 19.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.59 0.72 -3.71 5 25 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 10 5 0 30 0 28.00 61 66.19 CHANGED VGCEECPhHCKGKpAKPTC-Nu..VCsCNl ..VuCE-CP.HCpsKsA+spCDNs..hCsCps. 0 0 0 0 +5282 PF05454 DAG1 Dystroglycan (Dystrophin-associated glycoprotein 1) Moxon SJ anon Pfam-B_15784 (release 8.0) Family Dystroglycan is one of the dystrophin-associated glycoproteins, which is encoded by a 5.5 kb transcript in human. The protein product is cleaved into two non-covalently associated subunits, [alpha] (N-terminal) and [beta] (C-terminal). In skeletal muscle the dystroglycan complex works as a transmembrane linkage between the extracellular matrix and the cytoskeleton. [alpha]-dystroglycan is extracellular and binds to merosin ([alpha]-2 laminin) in the basement membrane, while [beta]-dystroglycan is a transmembrane protein and binds to dystrophin, which is a large rod-like cytoskeletal protein, absent in Duchenne muscular dystrophy patients. Dystrophin binds to intracellular actin cables. In this way, the dystroglycan complex, which links the extracellular matrix to the intracellular actin cables, is thought to provide structural integrity in muscle tissues. The dystroglycan complex is also known to serve as an agrin receptor in muscle, where it may regulate agrin-induced acetylcholine receptor clustering at the neuromuscular junction. There is also evidence which suggests the function of dystroglycan as a part of the signal transduction pathway because it is shown that Grb2, a mediator of the Ras-related signal pathway, can interact with the cytoplasmic domain of dystroglycan. In general, aberrant expression of dystrophin-associated protein complex underlies the pathogenesis of Duchenne muscular dystrophy, Becker muscular dystrophy and severe childhood autosomal recessive muscular dystrophy. Interestingly, no genetic disease has been described for either [alpha]- or [beta]-dystroglycan. Dystroglycan is widely distributed in non-muscle tissues as well as in muscle tissues. During epithelial morphogenesis of kidney, the dystroglycan complex is shown to act as a receptor for the basement membrane. Dystroglycan expression in mouse brain and neural retina has also been reported. However, the physiological role of dystroglycan in non-muscle tissues has remained unclear [1]. 22.00 22.00 22.10 22.00 21.60 21.90 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.91 0.70 -5.23 4 207 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 101 1 85 198 0 192.60 39 30.30 CHANGED shF+AhFsGDtptVhNDIpKKIhLVKKLAhuFGDRNoSoITL+sIT....+GSllV-WTNNTL...ppPCPc-Qlttlu++Ih-s-GpPRptFppulEP-a+hoNIoVssouSC+H..hpaIPh...c.IPotssss.sP-csscKSSpDDVYLHTVIPAVVVAAILLIAGIIAMICYR..KKRKGKLTl...EDQATFIKKGVPIIFADELDDSKPPPSSSMPLILpEEKsPLPPPEYPNpssPEThPLNQD.hGEYTPL+DEDPNAPPYQPPPPFosPMEGKGSRPKNMTPYRSPPPYVPP .......................s..Fphhhthc..t...h...s.D..lpKKhhLV++LA.huaGDpssooITlpsI..o......pGoh..lVt..a.Nsol.......hp..Cs.cptlt.hht.ph.l.h.p..p.p....h.p.....p.th....p...s...h..tsp.a.hhphshh..upCpp.....h..s.......h..s....................s.tp....hh.l..slllhhhhl.l.ss.Ihhhhhp......h++pGK........................................................................................................................................................................................................ 0 29 36 64 +5283 PF05455 GvpH GvpH Moxon SJ anon Pfam-B_16017 (release 8.0) Family This family consists of archaeal GvpH proteins which are thought to be involved in gas vesicle synthesis [1]. 22.00 22.00 22.00 22.00 21.80 21.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.05 0.71 -4.64 4 26 2012-10-02 21:54:05 2003-04-07 12:59:11 6 1 22 0 14 71 7 161.80 22 91.78 CHANGED .ssDDp........SDp.S...GLLDQLcsLlEsLAEIE.pEsG+ppspGpIDRGsARIDYDY-VSIGLGscuRsspss-tPss-sscsE.......phEpohHlETRssDDG.-lVVlADLPGVsD-c.lDVsL-sDpsALpl+scDcVVcRlsLDcPsspITshplpNQVLEIRLstos-opG.u-ss ...............................................................................s......................thhptlttllphLs.pl.t.pc.t......tp....hp..tpu..th....s....p..t...p.hphs....YsaslphG...lssc.....s...c........t...tt..t.s.hsp.t.csc..............p..tp.shc......l-...s+t.......p-Du.-ll.VlADLPGVsc...-c.lcVsl-s-pssLsl..p...s...s...c......c...h.l.c...RltL.s.p......s.s...c.ht..ph..shpNulLEl+lpttp.........tt..................... 0 7 13 14 +5284 PF05456 eIF_4EBP EIF4EBP; Eukaryotic translation initiation factor 4E binding protein (EIF4EBP) Moxon SJ anon Pfam-B_5573 (release 8.0) Family This family consists of several eukaryotic translation initiation factor 4E binding proteins (EIF4EBP1 ,2 and 3). Translation initiation in eukaryotes is mediated by the cap structure (m7GpppN, where N is any nucleotide) present at the 5' end of all cellular mRNAs, except organellar. The cap is recognised by eukaryotic initiation factor 4F (eIF4F), which consists of three polypeptides, including eIF4E, the cap-binding protein subunit. The interaction of the cap with eIF4E facilitates the binding of the ribosome to the mRNA. eIF4E activity is regulated in part by translational repressors, 4E-BP1, 4E-BP2 and 4E-BP3 which bind to it and prevent its assembly into eIF4F [1]. 20.20 20.20 22.60 20.80 20.00 19.20 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.40 18 197 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 97 16 104 192 1 107.00 47 76.84 CHANGED MSu....upQ...SpSpuIP.oR+VhlpDsuQLPc.sYSoTPGGTLaSTTPGGTRIIYDRpFLLshRNSPlA+TPPspLPsIPuVTh.sss.hs..........pspppsppopshpsc.......--sQFpMDI ...........................tp...o.opsIP....oRplhl..pDssQLPp.sYuoTPGGT.LaSTTPG...GT.RII.YDRKFLL-pRNSPlupTPP.ppLPsIPGVTsPss...spp............t.pp...ptsp.ptt..........t.....--sQF-MDI.................................................................... 0 30 41 66 +5286 PF05458 Siva Cd27 binding protein (Siva) Moxon SJ anon Pfam-B_5606 (release 8.0) Family Siva binds to the CD27 cytoplasmic tail. It has a DD homology region, a box-B-like ring finger, and a zinc finger-like domain. Overexpression of Siva in various cell lines induces apoptosis, suggesting an important role for Siva in the CD27-transduced apoptotic pathway [1]. Siva-1 binds to and inhibits BCL-X(L)-mediated protection against UV radiation-induced apoptosis. Indeed, the unique amphipathic helical region (SAH) present in Siva-1 is required for its binding to BCL-X(L) and sensitising cells to UV radiation. Natural complexes of Siva-1/BCL-X(L) are detected in HUT78 and murine thymocyte, suggesting a potential role for Siva-1 in regulating T cell homeostasis [2]. This family contains both Siva-1 and the shorter Siva-2 lacking the sequence coded by exon 2. It has been suggested that Siva-2 could regulate the function of Siva-1 [3]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.22 0.71 -4.70 5 98 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 64 0 54 105 0 146.10 34 86.71 CHANGED MPKRSCPFsDsAPLQLKVHVGQRELS+.GVsAERYoREIFEKTKQLLFpGAQAshD+lWsEGCuIsHpPEss+PG.TuAPc.AARGQMLIGPDGRLsRu.pAQASEAsPoG....sAshACSSCVRSVcuKusCSQCERAlCuQCl+s...CuuCuuLsColCTlsDau..DphEclLCoSCAhFEo ......................................................MsKR..Pat.ts.....QhKh+Vu.+plsp.GVhupph.pclaE..+ThpLLF.GApuh..htth.......tp....t....t......p..ps..t...s...th.p...hh.puQ.h.LlG.cG..cL...h..+s...spss.pss.ss...................ssh..s.CusC.h..R..s.ss.....s+...s.......sCupC-RslCspCsps...ChsCuslhCslCu..hs..shs...p...-pslC.sCt........................................... 0 15 20 37 +5287 PF05459 Herpes_UL69 Herpesvirus transcriptional regulator family Moxon SJ anon Pfam-B_15401 (release 8.0) Family This family includes UL69 and IE63 that are transcriptional regulator proteins. 25.00 25.00 32.70 32.60 20.70 17.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.40 0.70 -5.19 30 149 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 84 1 0 145 0 212.20 31 39.51 CHANGED tssttpploapcLlppsapLhppFptcshstphhpsl...Rctslpss.uLtshlAhsDEsLsWhKhphh+sLsl..pscD.IhsTusslhpsLhhKLpshh..pChLps........................................ps...thh+pLsph...................lpcps.pshps.lshslshlspaupsl.ttlpppshp......tltsh.spslpc.YpPGsshshlhpslpsHtppCssppCplthpthlsP..aspGhaFhss ............t...tttploa-pLhtpshtLhhpaphcshstphh+sh...Rphsltst.sLhphLA.sDETLuWhKhphh+s.LPl..pspDPlluTsuulhpsLhsKLpshh..tChLcs........................................ps.....lppLsch...............................hp+p..pshps.hphshshlsphuphl..slpphshp...............hltshsspthtt.YhPGsshshlhchLcsH.pcCps...chCcLphpphluP..YhaGhaFhs... 0 0 0 0 +5288 PF05460 ORC6 ORC6; ORC6_1; Origin recognition complex subunit 6 (ORC6) Moxon SJ anon Pfam-B_16189 (release 8.0) Family This family consists of several eukaryotic origin recognition complex subunit 6 (ORC6) proteins. Despite differences in their structure and sequences among eukaryotic replicators, ORC is a conserved feature of replication initiation in all eukaryotes. ORC-related genes have been identified in organisms ranging from S. pombe to plants to humans. All DNA replication initiation is driven by a single conserved eukaryotic initiator complex termed he origin recognition complex (ORC). The ORC is a six protein complex. The function of ORC is reviewed in [1]. 20.60 20.60 20.90 20.80 20.40 20.20 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.19 0.70 -5.35 28 219 2009-09-11 10:55:20 2003-04-07 12:59:11 8 3 186 1 150 205 0 235.70 21 72.06 CHANGED Lpcll........sphuts.hspcllshuspLhplSp..pppssltsppEluRshlCh.lAup...............+hppth..-lsh.hlch.PlpP+sap+hhshhcpsLtsps..........................................................t.s.t...................................................................................................................................................................................t..sh-lhplsspahlssplhs..ph.phhptacpphht....................p.httt...t...........phtstpppclp.hhslh..pthh....Wht.h..............p.....t.................................thGsMh.pspsha.hopc .................................................tlh........sphu.s....pllptAppLhphSc....ppsuhhttppEhu+shhChclAsp.................ph.phsh........Dhs..hlphs..sls.+hYpphhphhcphLshss............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 25 71 114 +5289 PF05461 ApoL Apolipoprotein L Moxon SJ anon Pfam-B_16464 (release 8.0) Family Apo L belongs to the high density lipoprotein family that plays a central role in cholesterol transport. The cholesterol content of membranes is important in cellular processes such as modulating gene transcription and signal transduction both in the adult brain and during neurodevelopment. There are six apo L genes located in close proximity to each other on chromosome 22q12 in humans. 22q12 is a confirmed high-susceptibility locus for schizophrenia and close to the region associated with velocardiofacial syndrome that includes symptoms of schizophrenia [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.26 0.70 -5.17 21 365 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 72 0 177 313 0 241.10 27 74.16 CHANGED psSpppL..LLTpccs.WppFVspAcLsR--s-sLhcsLtpLht.hshcDpsphp+s.pt...................................+chFL+tFPplKpcLc-pIccL+ALADplD+VH+ssTIoNVVuuSsussSGlLolLGLsLAPhTAGsSLsLoAsGlGLGsAAuVTuloTullEpsspstAcscAsclsssshsphcshtchltp.ssphhshsp.............shhpslcsltpsl+Al+hs+uNPpLs..usApphhssGplSspsupQVpcshtGTA...LAMo+sARIhGuAsuGlhLhhDVhsLVc-SKcLp-GAKoEoAEcLRphApELEcKLpcLsplYcsL ........................................................................................h..............................................................................................................................................................................tp.hFlp.Fsph+tcl....pp..pIpcLcslA-pl-chH+tssl.uslsuuSsu.ss.uG.lhollGLuLAPhThGsS.LhLou.sG..hGlusAuulTul..sosls-tsppppsptpspcl.hts.tppph.c.h..tp..h.htt.h.....thhph........................phh....p.....hpth...pphp.shp..hc.....h....stst.....h.ht...t.h.....s..............t......s...........p.......l..........tps.htsss..........hthspssphhshshsuh.lh....h....Dlh.ls.p.hcLtp.....G.u+sp.Attl+t.AtthpttL..h.t..t............................................................................................................................................................................... 0 41 58 83 +5290 PF05462 Dicty_CAR Dict_CAR; Slime mold cyclic AMP receptor Moxon SJ anon Pfam-B_16614 (release 8.0) Family This family consists of cyclic AMP receptor (CAR) proteins from slime molds. CAR proteins are responsible for controlling development in Dictyostelium discoideum [1,2,3,4]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.17 0.70 -5.53 6 214 2012-10-03 04:04:29 2003-04-07 12:59:11 6 7 115 0 165 697 4 215.50 19 52.21 CHANGED hps.pEIcsoYulLLIADFoSIIGChlVLIGFWRLKLLRNHlTKVIoCFCsTSlhKDlhSTlLTLo....ssAsps.GFPCYLYAIVITYGSLACWLWTLCLAhSIYpLIVKREPEPE+aEKaYahlCWGLPLISTIlMLuKsslchVGNWCWIGspYsGYRFGLFYGPFFhIWulSAlLVGLTS+YTYpVI+NuVSDNKDKHhTYQFKLINYIIVFLlCWVFAVVNRIlNGLshFPsssNlLHTYLSVSHGFaASVTFIYNNPLMWRYaGuKllhlFThFGaFVEsQ+RLEKNKNNNNPSPhusS+uuo .......................................................................................ttt..........................h......Sh..hu...s...hh.ll.hs.a...hh...p.......h+....p.............................+..h.l.h.h......h..s...h...ss...h...h....s...h....h......s...h....h..........h...................t....t......s......sh.......h.C...hQuh.h.....l.phh....u..s..h........h.....Ws.h...s..hA....hs...l...a....h...........s..h...h...p....p......s...p..h....c..+....h...c..h.....h....Y.h..l...h.s....a.G..l..P.h...l..s..s.....h...l.....h....h..............h.........t.....p......t......h......t..............h.......G........s....W..CW..l....p.........s...........p..............h.........s...........s..........h......R.......h....s................h.......FY.s...P.l..a...h....l......h....h....h.........h...h......h...h..h....h..................................................................................................................................................................................................................................................................................................................................................................hhhhhh........................................................................................................................ 0 74 115 151 +5291 PF05463 Sclerostin Sclerostin (SOST) Moxon SJ anon Pfam-B_16740 (release 8.0) Family This family contains several mammalian sclerostin (SOST) proteins. SOST is thought to suppress bone formation. Mutations of the SOST gene lead to sclerosteosis, a progressive sclerosing bone dysplasia with an autosomal recessive mode of inheritance. Radiologically, it is characterised by a generalised hyperostosis and sclerosis leading to a markedly thickened and sclerotic skull, with mandible, ribs, clavicles and all long bones also being affected. Due to narrowing of the foramina of the cranial nerves, facial nerve palsy, hearing loss and atrophy of the optic nerves can occur. Sclerosteosis is clinically and radiologically very similar to van Buchem disease, mainly differentiated by hand malformations and a large stature in sclerosteosis patients [1]. 20.60 20.60 21.40 20.70 20.50 19.70 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.33 0.71 -4.83 4 114 2012-10-02 16:54:34 2003-04-07 12:59:11 6 3 45 3 56 111 0 177.90 50 95.52 CHANGED hphsLl..LlClLh+uCh.........AFKNDATEll.uHs.tsh..cssNNuoLNpARNGGR+spss.hDpss.pQVGCRELRSTKYISDGQCTSlpPlKELVCAGECLPh.lLPNWIGGGa...YWSRRsuQEWRCVsD+TRTQRIQLQCpDGoT.RTYKlTlVTSCKCKRYTRQHNESSHs.pusSss+P........up+pRs+KRtuKsspppLo ..........................................hh..LhClLh+ush.........AFKNDATEIl.phs.ts................s.....ps......p....s..NsT..h.Np.AcNGGR..p....ps.....h-p.p.st.sphuCRELRST+YloDGpCpShpPlKELVCuGpClPh.lLPN....h.IGtGh.....W.Rpsu.p.-aR.Cl.sD+oRTQRlQLpC.sGss.RTYKlplVTuCKCKRaTRpHNpSph.p.....uhpsu+P........sp+..+..+t+suKsspt......................... 0 6 11 23 +5292 PF05464 Phi-29_GP4 Phi-29-like late genes activator (early protein GP4) Moxon SJ anon Pfam-B_16889 (release 8.0) Family This family consists of phi-29-like late genes activator (or early protein GP4). This protein is thought to be a positive regulator of late transcription and may function as a sigma like component of the host RNA polymerase [1]. 21.60 21.60 22.10 222.80 19.60 20.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.46 0.71 -4.24 3 7 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 8 0 8 0 125.40 73 99.32 CHANGED MP+TQRGIYHNLKESEYVlS..NGDVTFFFSSEMYhNKFLDGYQEaREKFNKKIsRlscT.PhNMDMLADITFYSpVEKRGFHAWLKGDNsTWQElHVYALRpMTKPsTLNWSRIpKPKLRERRKSMV MP+TtRGIYHNLKESEYVsS..NsDsTFFFSSEhYLNKFLDGYQcaRccFNKKIpRlssT.PhNMDMLADITFYSpVEKRGFHAWLKGsNsTWpElHVYALRhMTKPsT.sWSRIpKPKLtERRKSMV 1 0 0 0 +5293 PF05465 Halo_GVPC Halobacterial gas vesicle protein C (GVPC) repeat Moxon SJ, Bateman A anon Pfam-B_17013 (release 8.0) Repeat This family consists of Halobacterium gas vesicle protein C sequences which are thought to confer stability to the gas vesicle membranes [1,2]. 22.30 22.30 22.70 22.90 21.80 22.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.34 0.72 -3.95 29 50 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 7 0 20 45 0 32.00 32 48.53 CHANGED VssLhssIsshcppassspsuFcsYu--Fsss ..VssLhssIsshcpphssspsuF-uYu--Fss....... 0 10 14 20 +5294 PF05466 BASP1 Brain acid soluble protein 1 (BASP1 protein) Moxon SJ anon Pfam-B_16137 (release 8.0) Family This family consists of several brain acid soluble protein 1 (BASP1) or neuronal axonal membrane protein NAP-22. The BASP1 is a neuron enriched Ca(2+)-dependent calmodulin-binding protein of unknown function [1,2]. 25.00 25.00 77.60 48.80 23.30 22.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.10 0.70 -4.54 6 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 28 0 28 49 0 175.80 58 98.29 CHANGED GGKLSKKKKGYNVNDEKAK-KDKKAEGAuTEEEGTPKEs..EsQAAsEoT.EVKE.spEEKP-KDApsotsKsEEKEGEK-AsssKE-A.KAEPEKoEuuA-AKsEPsKs......sEpE.AAAsuPAsuuEAP..KAuEsS.....stsAEuAAPup.--.SKEEGEAKKTEAPAu.AupEsKS-uAP.ASDSKPSSsEAAPSSKETsAATEAPSSTsKApsPAAPA-EsKsuEuP..AANSDQTVAVpE .........GGKLSKKKK.GYNVNDEKAKDKDKKAEGAusEEEsTPKEs..EsQsAu-ss.EsKE..tcpKs-pDups...KsE-KEu-K-ss.spE-s.KuEsEK.oEu..s-upsEP.ts......scpp.tAssuPusss-As..Ks.sEst.......su-s.Asst..p.u+EpGEsKKTEAPAs..hpEsKS-sAP.ASDSKPuSsEAAPSSKET.sA....TEAPSST.sKA..tP.u...uss-p...sEsP..ssNS-QoluVp-.................. 0 1 3 9 +5295 PF05467 Herpes_U47 Herpesvirus glycoprotein U47 Moxon SJ anon Pfam-B_17115 (release 8.0) Family \N 25.00 25.00 80.50 80.00 18.20 18.00 hmmbuild -o /dev/null HMM SEED 677 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.45 0.70 -6.68 2 53 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 7 0 0 25 0 119.00 85 96.88 CHANGED MLHISpLGLFLuLFAIVMHSsNLIKYTSDPLEAFKTVNRHNWSDEQREHFYDLRNLYToFCQpNLSLDCFTQILTNVFSWsIRD.QCKSAVNLSPLQNLPRsETKIVLSSTAANKSIlASSFSLFYLLFATLSTYTADPPCVELLPFKILGsQLFDIKLT-ESLpMAhSKFSNSNLTRSLTsFTsEIFFNYTSFVYFLLYNTTSCl.SNDQYFcpSPKPINVTTSFGRsIVNFcSILTTTP....SSTSASlTSPHIPSTNhPTPEPsPVTKN.TcLpTDTIKVTPNTPTITsQTTESIKKlVKRSDFPRPMYTPTDIPTLTIRhNATIKTEQNTE....NPKSPPKPTNFENTTIRIPcTFESsTVsTNsTQKlESTTF.TTIGIcEIssNhYSSPKNSIYLKSKSQQSTTKFTDsEHTTPILKFTTWQssARTYMSHNTEVQNMT...................................................DRFQRTTLKSSsE.PTlQTLSVTPKpKLPSNVTAKTEVplTNNALPSSNSSaSITcVTcEsKpsRMSASTHtEINHTEIsphTPILNAHT.EKSTTPQhsFTAET.LTTSSKsAILTWSNLL.TTPKEPLTNTSLR.TsHITTQLTTSNRTQSAKLTKAplSSQTTNIYPQTITtRST-V ...........................................................................................................................................................................................EESLRMAMSKFSNSNLTRSLTSFTSKNFFNYTSFVYFLLYNTTSCVPSNDQYFKQSPKPINVTTSFGRA.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 0 0 0 +5298 PF05470 eIF-3c_N eIF3c_N; Eukaryotic translation initiation factor 3 subunit 8 N-terminus Moxon SJ anon Pfam-B_17447 (release 8.0) Family The largest of the mammalian translation initiation factors, eIF3, consists of at least eight subunits ranging in mass from 35 to 170 kDa. eIF3 binds to the 40 S ribosome in an early step of translation initiation and promotes the binding of methionyl-tRNAi and mRNA [1]. 20.70 20.70 20.80 20.90 20.30 20.60 hmmbuild -o /dev/null HMM SEED 595 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.99 0.70 -6.43 8 593 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 302 0 373 561 13 373.20 28 67.47 CHANGED +ahpssssp-usDEEDsKRVVKSAKDKRFEEhssslcpI+NAhpIsDhlslppsFDcLsKthpKsh....hp-sstsPshYI+sLstLEDaLNch.ssK-uKKphSpuNuKALNoh+QKlKKsN+..paEDcIs+aRcsPEp-s-cssE---s-ssGssspscs.ssshtsp.lst.....s.pu.................................................................................t....t.tst......hs...h..c.hhhpcscEITassVhKKLpEIhsARGKKsTsRp.......pplcpLpcLscIAcpssp....plcIhFslISApFDlNs.slSsaMPI-hWppslpshpolLDILltsssIhls.phsDpsE..Ep-spc..shsGslRVpGslluFlERlDsEFhKSLQsIDPHos-YVERLKDEsphhsLlpplQcYlEcpGcs.....+sss+lhl+RlE+lYYKscplhch.......................pptt..ss.hsshchssct.shs-sScsLMDtLsshIY.Ksss-Rl+sRAMLCcIYHHALpDcFhpARDLLLMSHLQ-NIpHhDluTQILaNRTMsQLGLCAFRtGhlpEuHpsLs-LhSouRlKELLuQGl.tsR.HE+TsEQE+lERpRQ.lPYHMHINLELLEsVYLsCAMLLEIPpMAAspaDA .....................................................ht.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h......................................................................................................................................................................................................................................................p..t..............h.........................................................................................................................................................................................................ht.h....l.........t..t.....t.tuhL..h.a..uh.tpa..u+-hhh.uth.p.h..........-...p..............lhaNRhhsplGlsAF+.s.h.cspthL.-l......t.t+...........+-.ll.uQt........................t...s.pt...............ph.c+th..hPhH.Hlsh-ll-.h.hhsuhhl-ls..s....................................................................................................................................................................... 0 131 215 314 +5300 PF05472 Ter DNA replication terminus site-binding protein (Ter protein) Moxon SJ anon Pfam-B_17662 (release 8.0) Family This family contains several bacterial Ter proteins. The Ter protein specifically binds to DNA replication terminus sites on the host and plasmid genome and then blocks progress of the DNA replication fork [1]. 22.00 22.00 22.00 23.20 21.10 21.90 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.78 0.70 -5.29 23 673 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 604 5 60 325 12 282.80 59 95.91 CHANGED pclsspFcplppcLttLpptLsstphltA+VaslPsVt+spE+.cslspItVpphhGppAhshulctapclaIp.....pshSoKushRhPGllshsssstp...plhshIscINplKsshEplIsspss.spptRF..............EhVHpp.LPGLlTLpsYRplpslp...s.solRFuWtp......K+sl+sho+..cclLttLcKuhpssttssshs.cppWpttlspEhpplsp..LPpcspL+I+RPVKVQP.lAplaa.....psp.......pcphphssshPlIsl.....sspsPclspL.hsYDssshp....+h+spttshclLls..RLHlYh ......................................................................RLNsTFRQhEQcLAshsspL.pQap...LLlARVFSLPpVpK-sE.....H..sPLs....pIcVp.QHLG.p-ApsLALRHaRHLFIQQ....................QSENRS.SKAAVRLPGVLCYQ..VDNhoQ.tsLhspIQ+INpLKT....TFEclVTVES....t.....LPoAARF...........................EWVHRH..L.P.G.LITLNAYRTLTlLp...sPATlRFGWAN......KH.I.IKNLpR..DEVLuQLEKSLpSP..............RS.VsPWT.REpWQpKLEREYQDIAA...........LPQpA+LKIKRPVK....VQP.IARVWY......KGp........QKQVQHACPTPLIALINpDNGAuVPD.lGEL.hNYDADNlQH...RaKPQAQPLRLIIP..RLHLYV................................................................ 0 6 18 36 +5301 PF05473 Herpes_UL45 UL45 protein Moxon SJ anon Pfam-B_17674 (release 8.0) & Pfam-B_16138 (release 10.0) Family This family consists several UL45 proteins. The herpes simplex virus UL45 gene encodes an 18 kDa virion envelope protein whose function remains unknown. It has been suggested that the 18 kDa UL45 gene product is required for efficient growth in the central nervous system at low doses and may play an important role under the conditions of a naturally acquired infection [1]. This family also contains several Varicellovirus UL45 or gene 15 proteins. The Equine herpesvirus 1 UL45 protein represents a type II membrane glycoprotein which has found to be non-essential for EHV-1 growth in vitro but deletion reduces the viruses' replication efficiency [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.35 0.71 -4.89 11 167 2012-10-02 16:37:33 2003-04-07 12:59:11 7 22 100 0 44 725 1 141.50 17 27.58 CHANGED osospsptPLsshcs...h.............suspspRs+p.s.sss.......................hhhshhslGlllTshlllushl....hslPhsuhpsusCPspWhGlssuClRssspss.....spppAsssCuthsupllspusAptLhsllsshusssus.h.....................hssDultsClpsstussh..p..CspsuhsIC.tsRslSshuphIhcsRpuLpL .................................................................................................................................................................................................................................hhhh...s.h..h......hsh.....h..l.....h........s...hl.....................h....t..........s.............t........s....h......t....t.......t..............CPpc.Whsa...s..s.sC.h.thss.sst............sappApshC.p.s..h..s.....u..p...l...h...s.......s......s..................h..........hh.t............................................................................................................................................................................................................................................................................. 0 11 14 33 +5302 PF05474 Semenogelin Semenogelin Moxon SJ anon Pfam-B_18147 (release 8.0) Family This family consists of several mammalian semenogelin (I and II) proteins. Freshly ejaculated human semen has the appearance of a loose gel in which the predominant structural protein components are the seminal vesicle secreted semenogelins (Sg) [1]. 19.10 19.10 19.10 19.10 16.90 16.00 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -13.19 0.70 -6.22 4 125 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 26 0 7 129 0 273.00 43 97.46 CHANGED MKPIIhFVLSLLLILEKQAAVMGQKGGSKGQLsutSspFPatppu.pYSu.KDKQHsESKGShSIpHTaHVDspDHDpTRKSpQYDLNAQpKTTKScpa.tGpQELhNaKQEGR-+uKsKscFHhlVIHHKGGpspHGTQNPSQDpGNSTSGKGh.SQ.SNTcEpLhspGLuKEQsSsSGAQ+sRTQGGSQSShVLQTE-.Vs....NKQp.ETQNShpNKGpY.NVhEs+pcHSSKlQTSLasAppcRLQHGSKDlFo.........KNQ+QT+NhNQDQEHGQKAHKtShQsSST............................................................EERRLsauEpGIQKDsSKuShSpQTt-KhhsKSQpQlThPSQ-p..up+tNKhS.QSSusEERR.p.GEpGlQKsVspuph..pTE-KIHsKsQNQlTIPSQDQc........................................................................................................................SGQsAKGpSGQSsDREpDLLS+EQKsRHQctspGuhNlVIIEHEsspD++.sQ++spDpNslsT .............................................................................................S.h.lphhaHs..pstD................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 3 3 4 +5303 PF05475 Chlam_vir Chlamydia_vir; Chlamydia virulence protein PGP3-D Moxon SJ anon Pfam-B_18238 (release 8.0) Family This family consists of Chlamydia virulence proteins which are thought to be required for growth within mammalian cells [1]. 25.00 25.00 31.10 31.10 22.20 21.10 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.63 0.70 -5.28 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 20 0 1 29 0 207.60 73 99.67 CHANGED MGNSGFYLpNTQNCVFADNIKVGQMTsPLKDQQlILGTTSTPTAAKhTAc-GIslTs-ossSTNASITlSLDuEAlhK....LIL-QIpD-LVcuIIcNITsSLIQEVIDKIpoDPSLuLoKAFKNFsITNKIQCNGLFTKcNIGTLLGGTEIGoFTVTPDNAsSMFLISADIIASRMEGsVVLALVKEGDSuPCAISYGYSSGlPNlCSLRTsVuNoGssPVTFSLRVGGMESGVVWVNALuNG-cILGlTsTSNISFLEVKPQTNG .........MGNSGFYL.sTpNCVFADNIKlGQMppPLpDQQlILGTpSTPsAAKhTAs-GIslTlSNssusNASIsluLDAEpshp....LIL-pltcpIlDuIhcsIssShlQ-llDKIpoDPSLuLhKAFpNFsITsKIQCNGLFTsSNItTLLGGTEIGKFTVTPcsusSMFLlSADIIASRMEGsVVLALV+EGDopPCAISYGYSSGlPNlCSLRTslpNoGhsPsTaSLRVGGh-SGVVWVNA..LuNGssILGhTsTSNlSFLEVh.QTNu. 0 0 0 1 +5304 PF05476 PET122 PET122 Moxon SJ anon Pfam-B_18328 (release 8.0) Family The nuclear PET122 gene of S. cerevisiae encodes a mitochondrial-localised protein that activates initiation of translation of the mitochondrial mRNA from the COX3 gene, which encodes subunit III of cytochrome c oxidase [1]. 22.30 22.30 22.50 22.60 22.10 21.80 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.86 0.70 -5.14 5 30 2009-09-10 15:37:49 2003-04-07 12:59:11 6 2 29 0 15 26 0 249.50 45 89.80 CHANGED MLlIsRRLVsT.-VR+RLYspCLNt-hsulLspLRQIsVscMDhSLLpLhLs+SspaGHl-oIsYlWYKYVhRH+.LhVEP+LLCDIuslALtQGKLFIPuQlLpHYQsh.YG+Gppt...sEhppaEYELpRIKVEuFAKGTMcsToFsEKWKVFLQ-MDpTLPuss.aclRDFPaLT+A......LhQTDEps......htphLFu-pclsVsNcoSLPLLLNhVLLQ.ssFsL-sRlRlFpcFhpoYRuLPlpDolEIlIK+s..psYh..cLhphl....RGhEIsT ...................s+Rhlos.DlRpRllLssLNtchs-ALshLRpp..ppsshDspLLpshLARAsthAHs-oIsYhWYpaVM.++.LsVcs+LLC.-MAslALap-+hFLPuQhLpHYpsh.hc++ss....sEp-hlEYELhRIKVEuFA+GTMcuTuhREKWKVFLQ-MD.o.....LPupsshRlRDFPphTKu......Lh.ppDEps......huhhLFsc.p.lsIKNcaSLPLLLshlLhp..s.hssps+.+lhtcFh.p...oY+uL..s.L.hDu.pllh+c......cGacl..................ss.................. 0 1 6 12 +5305 PF05477 SURF2 Surfeit locus protein 2 (SURF2) Moxon SJ anon Pfam-B_18437 (release 8.0) Family Surfeit locus protein 2 is part of a group of at least six sequence unrelated genes (Surf-1 to Surf-6). The six Surfeit genes have been classified as housekeeping genes, being expressed in all tissue types tested and not containing a TATA box in their promoter region. The exact function of SURF2 is unknown [1]. 25.00 25.00 28.60 28.60 20.30 21.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.77 0.70 -5.24 8 115 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 75 0 69 99 2 210.00 30 81.53 CHANGED McElPpDV+uFLppHPsLcLs.-up...KVRChLTGHELPCpLsELQsYTcGKKYp......+Lspupss..FsYspaEPHIVsSTKs.pQLFCKLTLRHINKpPEHVLRHlpGKRYQ+ALppYE-CpcpGVcaVPApLhpKp+.+c...spcsuscpP.p+p-s.......hWtPsS....S-E-sutS-DSMoDLYPsphFs.Ks.sp.ps.t....t.-sFpTD.c-....--hthcspp.p...............pKRu...KKQsuuhpKKFKpsH+Kspphp.uSsK ..................................phtthLtppP..hph..sss.......+l+CtlTGHElPsp.hs-lpsYscuKKYp......+.L...p...s..p..p..hs.aspa-PHll..s.ss..c.p...ppLaCKLThcplNKpsEHlh+HlpG+Rap+tLpchEcspp..p..G....h.chh.....up...h..p........c.tp..p.pp.........pp.ss.st.....t.ppt......................hW.P.s......scpp....t.scDs.....hsDh...hss..t.h...tp.t.ttp....t.........................t.psh.....sc.....pp.......ccht......t...........................p........t.......t..tpc.p...............t..................................................................... 0 25 37 53 +5306 PF05478 Prominin Prominin Moxon SJ anon Pfam-B_18226 (release 8.0) Family The prominins are an emerging family of proteins that among the multispan membrane proteins display a novel topology. Mouse prominin and human prominin (mouse)-like 1 (PROML1) are predicted to contain five membrane spanning domains, with an N-terminal domain exposed to the extracellular space followed by four, alternating small cytoplasmic and large extracellular, loops and a cytoplasmic C-terminal domain [1]. The exact function of prominin is unknown although in humans defects in PROM1, the gene coding for prominin, cause retinal degeneration [2]. 31.00 31.00 31.30 31.10 30.80 30.80 hmmbuild -o /dev/null HMM SEED 807 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.42 0.70 -7.00 14 435 2009-01-15 18:05:59 2003-04-07 12:59:11 6 11 96 0 229 399 0 541.40 21 85.92 CHANGED luhststhssupst.....t.htas.hhssppapsps....hsstshsslashs+phls...sVpspP.hPp-hlht...lhpsshshtsphp.p.........clltaphGhllsul....IulLallLMPllGl...CFCCC.RCptsCGGchppp-pppssCc.RthhslhLllholhlhhGllhuFlsNphhppsltpospthppslcDlpsalsssspplctlhsp.YpphpspltppLsssu..phlsstlpsphcupllssLpplhp..............slptscctLpslssphpcL+ptuspLpssLpuhRpplpssLsp..Cpsp........Csslh.shs.ltls....hcasplPslsp.lpslpplscsshsshlpcGhsphpplstplpppspssl....shpptLsshssslcshupplstp.ltsshsphhpssppphcshts.lccYspYRahsulllsslllLllhh.h.hGLLsGlhG.c+pssPsccsCsussGGpFLMhuVuh.FLhshhlhllslhhFllGuNsppllCcsh...pspplh.p.hlDoPh.Lst....htluthL.........phshslpl.....uplhcsCcpNpslassL+Lpshaslschhslpp...hopc.lppphpsl+l...slssl....plLsspu+csLpshtsoslschsassalpplp+slopssLsshApcLctlAsshssss......................l+ssLppcApsl+slppshlsshpshlscLppslpsL-pp....ssphstslsplLsplptupthlsspspphltptocphhsphlshhppYlsaVhpplspclupCpPlushhDsuh.shhCshllD......PhNuFW....huluhCthhhlPslIhuVKLuphY++hc..ss.ss ...................................................................................................h.....................................................................................................................................h.h...hhhh....hh.h....h.hhh.h.hhh.hPhhuh...hhshC..R........h.h......t.......t.......ts......s.p.........p.......pt.....+.hh....h...L.h.h.h..hh.h......hu.h.h.h.ahs..sp.hpt.t..ht...............h.psh.shp.hhpp...............s..ph.p.hlh...ht...p...ph..tlptht.....l..h..l.t....t...t..h...h..tth.t...................................sh.......h..hp.....ht...t.ht.t.lp..p.pl...htt..C.......................C......t...h..t......t.....s.............................p.t....h.........p.........thp....h...t........p.h...t...tt..t.hpth...ht.pht......h................h....t.htp.........t.h..t......p.h......h.....h...hh.t...............t........t...............h....p....ph....t....h.hh.shhh..hlhhlh.hh.h.huLhhG.hu..................ttsuu.hhhh..............uhhh.Fhh..hhhhlshhhhhhGssh.phhCtsh.........tptphh.p....hh...Dp.h.h................h..................................t....lph.....tphhptCpp..s.tshathhp.hpp...hslp...phh.p..hp....h..t..h..t..hpth..ph......shp...h........hlt.t.t..p..L.th..ts..htthp...hh.t.htt..s.........h....s...l..thttpl.pt..st...................................ht....th...t.s.thp..th.......p.h.............h......t......th..plt.lpt....................th..tht.h..lttht.hp..l.t...p.ht.hh..t.sp...thhtt..t.hppYhphh...tht.phutCtPhsthh.pps.....h..hCphhhs......sh...NhhW.....hslhh..shhhh.lP.lhhshph.phap..........s................................................................................................... 0 60 78 167 +5307 PF05479 PsaN Photosystem I reaction centre subunit N (PSAN or PSI-N) Moxon SJ anon Pfam-B_18582 (release 8.0) Family This family contains several Photosystem I reaction centre subunit N (PSI-N) proteins. The protein has no known function although it is localised in the thylakoid lumen [1]. PSI-N is a small extrinsic subunit at the lumen side and is very likely involved in the docking of plastocyanin [2]. 19.60 19.60 20.00 20.80 19.30 19.40 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.67 0.71 -4.19 3 69 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 37 4 31 62 1 113.70 42 77.64 CHANGED Vusup+A.sAlsQuA+PARAssVsVcuSssRRSALLGLAAlLuuTAAlAuSANAGVlDEYLEKSKsNKELNDKKRLATSGANFARAaTVQFGSCKFPENFTGCQDLAKQKKVPFIoEDLcIECEGKDKaKCGSNVFWKW .................................................t...............t......t..s....t...s.p.RtuhlhLuus.lhssA..u....s..u...s..ApAull--YLcKSKANKELNDKKRLATohANhARuaTVpFGoCpFP.NFhGCp-LAhpptV.als-DhtlECcG.+.pth..CuSp........................... 1 6 17 24 +5308 PF05480 Staph_haemo Staphylococcus haemolytic protein Moxon SJ anon Pfam-B_18620 (release 8.0) Family This family consists of several different short Staphylococcal proteins, it contains SLUSH A, B and C proteins as well as haemolysin and gonococcal growth inhibitor. Some strains of the coagulase-negative Staphylococcus lugdunensis produce a synergistic hemolytic activity (SLUSH), phenotypically similar to the delta-hemolysin of S. aureus [1]. Gonococcal growth inhibitor from Staphylococcus act on the cytoplasmic membrane of the gonococcal cell causing cytoplasmic leakage and, eventually, death [2]. 25.00 25.00 32.10 31.90 23.70 23.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -7.91 0.72 -4.27 16 511 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 209 0 20 79 0 43.20 59 96.71 CHANGED MstLs-AIsssVpAGlspDWuplGTSIlsIVtsGVshlu+l...FG ..MpGLh-AItsTVpAA...spDusKLGTSIVsIVtNGVGlLGKL...FG..... 0 2 2 20 +5309 PF05481 Myco_19_kDa Mycobacterium 19 kDa lipoprotein antigen Moxon SJ anon Pfam-B_19467 (release 8.0) Family Most of the antigens of Mycobacterium leprae and M. tuberculosis that have been identified are members of stress protein families, which are highly conserved throughout many diverse species. Of the M. leprae and M. tuberculosis antigens identified by monoclonal antibodies, all except the 18-kDa M. leprae antigen and the 19-kDa M. tuberculosis antigen are strongly cross-reactive between these two species and are coded within very similar genes [1,2]. 26.90 26.90 26.90 27.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.10 0.71 -4.41 3 187 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 93 0 41 162 0 147.20 38 96.88 CHANGED M+RpLpsAVAGssILsAGlSGCSSGNKS.TPSSScTToSouTTAS...PGAAAGs.KVlIDGKDQNVSGSVVCTTAuGsVNIAIGGAATGIAAVLoDGNPPpVKSVGLGNVNGVTLGYTSGTGQGNASATKNGNoYKITGTATGVDMANPhQPVNKSFEIEVTCs .....................................................h.sGC..us..t.....t....s...stts.sss.ssu...........ss.s.s..uG.....s...pV..slDGpsps..s..ss.sVs..Csssu..........G..p..hsI...u.I...G..s.....s..s....s...G....l.uAVlo.s.G.ssP.pV.cSVu..L....GsV..sG......h..oLua..sp..GsG..t..G.sAsss+DGspYpIoGTAsGs.DhuNPhp..slsKsF-IcVTC............... 0 2 19 33 +5310 PF05482 Serendipity_A Serendipity locus alpha protein (SRY-A) Moxon SJ anon Pfam-B_19519 (release 8.0) Family The Drosophila serendipity alpha (sry alpha) gene is specifically transcribed at the blastoderm stage, from nuclear cycle 11 to the onset of gastrulation, in all somatic nuclei [1]. SRY-A is required for the cellularisation of the embryo and is involved in the localisation of the actin filaments just prior to and during plasma membrane invagination [2]. 19.00 19.00 60.30 59.50 18.80 18.30 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -13.02 0.70 -6.07 4 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 32 0 26 68 0 427.70 41 87.27 CHANGED MEpLLtQLslCsELIApG.oCssGpIuWLNEFCATFLDFAS-LKA+LPElAP+..h.EGGsNI-VETIFLCLTQVVTCITQLERTINIEus.uttsp..MTRhHFLDRIDWClRRlhsSLoQLc.pttssssssLEDHoFVELMDLALDHLEsaMEtLusposs..LhI.EEs-op-hhQLuSIVNHIVRHALAFANVALEuDKKALStLCETLLuECuTFhEsuuEhNPGHRKLEALSLERALYuLETaLNEALLHLIFsSll-LENssIp+LRcALQpp.-..Suhs-pLlSDFDTNMDRIQQIGVLAIAFSQDlKTKTIVRSCLASLESLDACIVPALQ..poo.usHHADILEHHFNpElLIFRNlIHEIIDSRSLINNYLDMLAES..IHlAsK..P+saL.LIVQMGuVlh-HFRLPVNYStL..S-.DGKRlHpDhlLIL+EC.AVVslus.VDPKRIVKRLKILYSVLAKLRDlID+s.h.+-.......s.s.......Sc+phTsATRThlR..+ShuKRQRSFV+QptsssVssPps..sSlusSlSp-uDLISFQLTEILRIs ...........................................................................................................................................................LNthCu.hhphsp.hp..h....................Ehhh.LCloQlhhClp.lEpshphpt....................o+.aFlDRlpaClp+lhh.h...........t.t.s...t....htc.sFlplhDhuLDhlt.h.ph..ppspt..............p.h.....s.hpLtshlspllppALAFANVAl.ppDK+ALouLCppllpECs..sFp..cp..s......t........c.sss.u....p....p....KLcAhoL..............EcALYtLEsalN-ALL+L...lFsshhDh-...pt...ulcKL+....sh...Lp........p.s.......sstsp..clIusFDsNhDRlpQIGlhAIAFupclKtKThlRSCLASlESLDssllPuLQ..........u..suushHu-lLppHFppEhhhF+sslpEIIDSpuLls.sh........L-hLs-p..I....cst....pp.....+phL......llQhu...tllh.cHFpL.lNh..psL....sc....p....G.pch....a......p...chlhlLpEC.p.A...l.....l...........h....ss....l-...Pp..............RIlKRhKILholLtKL+ssls..tt................................................................................................................................................................................................................ 0 6 8 20 +5311 PF05483 SCP-1 Synaptonemal complex protein 1 (SCP-1) Moxon SJ anon Pfam-B_19633 (release 8.0) Family Synaptonemal complex protein 1 (SCP-1) is the major component of the transverse filaments of the synaptonemal complex. Synaptonemal complexes are structures that are formed between homologous chromosomes during meiotic prophase [1]. 19.50 19.50 19.50 20.10 19.30 19.40 hmmbuild -o /dev/null HMM SEED 786 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.95 0.70 -13.70 0.70 -6.76 2 74 2009-09-11 15:44:23 2003-04-07 12:59:11 7 4 40 0 35 67 0 520.20 42 79.88 CHANGED hGGDSsaFKohNKCTEsDFthPhshosLSKNtENIDoDPAhQKlshLPhLEQVuNS.sCHYQEGlpDSDhENSEshSRlYSKLYKEAEKIKKWKVShEuEL+QKEsKLQENRKIIEAQRKAIQELQFtNEKVSLKLEEtIQENKDLIKENNATRHhCNLLKETCARSAEKTpKYEYEREETRQVYhDLNNNIEKMIhAFEELRVQAENuRLEMHFKLKEDaEKIQHLEpEYpKElNsKEpQVSLLLIQ.TEKENKMKDLTFLLEESRDKsNQLEEKTKLQsENLKp..EKpcHLTpELEDIKhShQRShSTQKsLEEDLQIATKTIhQLTEEKEsQMEE.NKA+ssHShVVTEhcsTsCoLEELLRTEQQRLEpNEDQLKllTMELQKKSSELEEMTKhpNNKEVELEELKplLuEcppLL.EpKQhEKlAEELpGpEQELhhLLQsREKElHDLElQlTshpTSEpaY.KpVc-hKTELEpEKLKNhELTupsshL.LENKcLsQEsSDMsLELKppQEDI.NsKKQEERMLKQIEsLpEpEhpLRsELE.VRcEhhQptDEVKCKLDKSEEN.....................CNNL+KQlENKsK.IEELpQENKALKKKuoAEsKQLNsYEIKVNKLELELtSsKQKFtEhhssYQKEIE.KKISEEpLLtEVEKAKshsDEAVKLQKEIDhRCQHKIAEMVALMEKHKHQYDKIlEERDSELGLYKs+EQEQSSh+suLEhELSNl+sELlSlKKQLElE+EEKEKLKhE.pENTAhLp-KK ..............................................................................................................p....s...plaSKLacEsEKIKpWKlph-u-lpQKEp+LQEN++hIEsQRKAIQELQ...FtNEplSlKLE-tIpENcDLhpcNNATRpLCNlLK-Tht+osEKhphaE.EREET+plah-.spNlp+MlhAFEpLRhQAEssp.EMp...KlKEph.phpcLcpchctEhp.KEcplulL.hp.p-KEschcclhh.LpEopcphspLpEtpp.ppE.LKpu.tcpctLhpcLp.hp.sh...pct.tsp.cslp....pLp.s...hcph.plhtpK...-sphEp.pph+..ps..ltphphshtpLpp.Lp.E.p.Rhpp....cp.p.p.h..-Lpp+sttltphhc.pp.pchp.lp..Lpp.Ltptp........pp....hpth.tphp.tpppl..h...l.....hptphtchp.....hts.ht.tpp....ph.phpt.phpp...t.h+p.pL..php.l..p.ptth..ph..h..p.p..................................................h......p.ph.......ph.....t...........................h......hc+......t....pp..tp..ct..hpppNchLKKp.stE.pp.p.h.phplspLp.Ehpshpp.htE.hpph.c-lc..pKph.t..tpL.tElcKh+hhssEulK.pcEh-h+CQpKIs-MlALMEKHKtpYD+hlEE+DuELt.hp.+EpEt.u.hctuLE.ELsphcs-l.pl+.pLc.ph.tc.K......................................................................... 0 6 8 17 +5312 PF05484 LRV_FeS LRV protein FeS4 cluster Bateman A anon Bateman A Family This Iron sulphur cluster is found at the N-terminus of some proteins containing Pfam:PF01816 repeats. 20.90 20.90 21.80 35.20 20.10 20.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.03 0.72 -4.39 4 74 2012-10-11 20:01:00 2003-04-07 12:59:11 6 6 68 1 40 71 0 55.80 44 21.33 CHANGED usEsl...s.hs-CRsCpFc.sLL.oGRCs.GcuCVss+puRpIDRFFRpNPpLAscY ........s..tslDWpGp.lcCssCsHc...slh.sp.G+..Cc.s+ACVpDRYARRIDRFFphNPsLAspY... 0 12 28 32 +5313 PF05485 THAP THAP domain Bateman A anon [1] Domain The THAP domain is a putative DNA-binding domain (DBD) and probably also binds a zinc ion. It features the conserved C2CH architecture (consensus sequence: Cys - 2-4 residues - Cys - 35-50 residues - Cys - 2 residues - His). Other universal features include the location of the domain at the N-termini of proteins, its size of about 90 residues, a C-terminal AVPTIF box and several other conserved residues. Orthologues of the human THAP domain have been identified in other vertebrates and probably worms and flies, but not in other eukaryotes or any prokaryotes [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.33 0.72 -3.81 139 1658 2009-01-15 18:05:59 2003-04-07 12:59:11 7 123 120 6 1163 1895 1 83.30 23 22.91 CHANGED ppCss......sC...........tpppppps.lphacFPp...cpp..hht.pWhpshptp................sppt.......tlCspHFpspsh.................ppppLpssA......VP.......olh..ttspt .........................Css..sC.......................tp..p.p..pp.s...lpha........p.F.....Pp................c..p.......hhp..pWhpshpppsh...............t..pppt..........hlCupHFp..sp.sh..........................tp.ptLp.sA.......lPolh.....s............................................ 0 346 432 912 +5314 PF05486 SRP9-21 SRP9; Signal recognition particle 9 kDa protein (SRP9) Moxon SJ anon Pfam-B_7787 (release 8.0) Family This family consists of several eukaryotic SRP9 proteins. SRP9 together with the Alu-homologous region of 7SL RNA and SRP14 comprise the "Alu domain" of SRP, which mediates pausing of synthesis of ribosome associated nascent polypeptides that have been engaged by the targeting domain of SRP [1]. This family also contains the homologous fungal SRP21 [2]. 20.90 20.90 21.90 23.20 20.60 20.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.54 0.72 -4.24 33 286 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 247 3 199 271 1 80.00 33 58.17 CHANGED alpshppFlctSppLhpupPspTRhohKYpt........................................sssslslKsh-spsu.ClKY+TsKut-luRLlshhG.husshs ........hpoh--FhctuppLatusPp.psRhshKYpa.............................................................scupLslKspDsts..CLpY+Tspup-lp+l..p.h.hupLhphh.s............................. 0 59 99 155 +5316 PF05488 PAAR_motif PAAR motif Yeats C anon Yeats C Motif This motif is found usually in pairs in a family of bacterial membrane proteins. It is also found as a triplet of tandem repeats comprising the entire length in a another family of hypothetical proteins. 23.00 23.00 23.00 23.10 22.80 22.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.68 0.72 -3.99 88 2205 2009-01-15 18:05:59 2003-04-07 12:59:11 8 110 944 0 577 1932 242 74.40 27 19.69 CHANGED lhh.GDp.TspGGpV................l.su..usshhhsGpslAhhG..........D..hssCPts.......usssIlp..G..s.s..shhhsGpslAhpGcpssC...........Gs.....pLluu .......................................................................................................................p.sG..l....................h..sG..ssshhht..Gh..ss.AphG.........................................................................D......hs.sCspp................ssshIsp.....G.s..s....slh...l...s.GpPsA.phGc...ps.s.C...................Gu........hlh................................... 0 76 220 365 +5317 PF05489 Phage_tail_X Tail_X; Phage Tail Protein X Yeats C anon Yeats C Domain This domain is found in a family of phage tail proteins. Visual analysis suggests that it is related to Pfam:PF01476 (personal obs: C Yeats). The functional annotation of family members further confirms this hypothesis. 20.60 20.60 20.60 20.60 20.50 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.81 0.72 -4.49 32 981 2012-10-01 23:00:54 2003-04-07 12:59:11 7 7 753 0 144 576 23 58.70 40 67.46 CHANGED MKsaAl.QGDTLDAIClRYYGRT.EGVVEsVLAANPGLAELGsVLPHGTAVELP...DVQTAPVA ...............................pshuh.pGDTl..DuLCaRaY..Gc...o..pG.ss...E.pVL.p..A..NPG.....L.....Aph....s....h..LPtGhtVplP...-lptt......................... 1 29 67 110 +5319 PF05491 RuvB_C Holliday junction DNA helicase ruvB C-terminus Moxon SJ anon Pfam-B_844 (release 8.0) Family The RuvB protein makes up part of the RuvABC revolvasome which catalyses the resolution of Holliday junctions that arise during genetic recombination and DNA repair. Branch migration is catalysed by the RuvB protein that is targeted to the Holliday junction by the structure specific RuvA protein [1]. This family consists of the C-terminal region of the RuvB protein which is thought to be helicase DNA-binding domain. 24.70 24.70 24.70 25.80 24.60 24.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.30 0.72 -4.35 126 4372 2009-01-15 18:05:59 2003-04-07 12:59:11 8 5 4328 12 944 2793 1004 75.80 54 22.41 CHANGED VDphGLDphDR+hLpsllcpasGGP...VGl-TlAAuluE-psTlE-VhEPYLlQpGalpRTPRGRlsTptAYpHLshs ...VDptGLDhhD++lLpslI-pFsGGP...VGL-TLA...AulGE.EpcTIEDVhEPYLIQpGFlpRTPRGR.lATspAYpHhGh.s..... 0 325 633 805 +5321 PF05493 ATP_synt_H ATP synthase subunit H Finn RD anon Pfam-B_3341 (release 8.0) Family ATP synthase subunit H is an extremely hydrophobic of approximately 9 kDa [1]. This subunit may be required for assembly of vacuolar ATPase [1]. 25.00 25.00 28.40 28.40 24.90 24.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.34 0.72 -3.90 34 346 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 230 0 214 332 0 64.60 36 75.96 CHANGED hslllsolhashhuhhu.hhsPK............u.spsllpshllLou.ssCaLhWhlsaLAQhsPLluPphsspp ...................h.slllholha...shl...G..hhss....hhsPK........................GsspsllpshLl.lTu.lCCaLhWhlsaLAQhNPLluPplpp......................... 0 74 113 167 +5322 PF05494 Tol_Tol_Ttg2 Toluene tolerance, Ttg2 Finn RD anon Pfam-B_3575 (release 8.0) Family Toluene tolerance is mediated by increased cell membrane rigidity resulting from changes in fatty acid and phospholipid compositions, exclusion of toluene from the cell membrane, and removal of intracellular toluene by degradation [1]. Many proteins are involved in these processes. This family is a transporter which shows similarity to ABC transporters [2]. 21.50 21.50 21.80 25.20 21.30 21.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.92 0.71 -4.70 160 1882 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 1658 1 420 1153 1732 167.90 28 80.72 CHANGED lppssscllshlpsspsth.......pth.....hphlcphltshhDhpthuphslG.p.......a.............+.psospQ+.....ppFhptFcphLhpoYu.stlspas..s........pp..lpht..thpss.....ppshVpoplhpsss...pslplsaplpp.....tss.p..W+laDlhlp...G.lSllpshRspFsshlppp.G.l-sLlpp.......Lppcs ............................ppsspcshspL+sppsph.ptss...shlpslVcpplhPalshchhutllLGph...a.............+.pAoPtQp........ptahpuFcphLhpsYu.psLs..Yp..s........Qs......hplt....hs........stshsslcssllssss...sslpl-aphpc......psG.s..WpsYDhhsE...G..lShlsohp.spaushlcpp.G..lDuLhtpLcp..h...................... 0 114 234 328 +5323 PF05495 zf-CHY CHY zinc finger Wood V, Bateman A, Mistry J anon Pfam-B_5537 (release 7.8) Domain This family of domains are likely to bind to zinc ions. They contain many conserved cysteine and histidine residues. We have named this domain after the N-terminal motif CXHY. This domain can be found in isolation in some proteins, but is also often associated with Pfam:PF00097. One of the proteins in this family (Swiss:P36078) is a mitochondrial intermembrane space protein called Hot13. This protein is involved in the assembly of small TIM complexes [1]. 23.30 23.30 23.30 23.60 23.20 23.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.78 0.72 -3.82 76 971 2009-01-15 18:05:59 2003-04-07 12:59:11 7 24 596 2 499 817 15 75.40 32 23.00 CHANGED CpHYcps...stl+ssCC..scaYsCt................pCHc-tp.........sHphpRhs.p........tllCshCppt..shsph.............tss.....phhCshCp........ha. ........................CpHYcpp...stl+ssCC..s+..a......Y.sCh................pCH.sc..t.p......................................cH.sh...p+hshp...................tllCuhCppp.shsph...........................tps..............hCshCphh................................................................................................. 0 165 308 434 +5324 PF05496 RuvB_N Holliday junction DNA helicase ruvB N-terminus Moxon SJ anon Pfam-B_844 (release 8.0) Family The RuvB protein makes up part of the RuvABC revolvasome which catalyses the resolution of Holliday junctions that arise during genetic recombination and DNA repair. Branch migration is catalysed by the RuvB protein that is targeted to the Holliday junction by the structure specific RuvA protein [1]. This family contains the N-terminal region of the protein. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.40 0.70 -5.32 110 5721 2012-10-05 12:31:08 2003-04-07 12:59:11 7 23 4390 12 1243 22353 7697 210.90 46 58.50 CHANGED Rl.lssp....tttp.....-....pshEtuLRPppLs-alGQcclKcpLplaIpAA+pRsEuLDHVLLaGPPGLGKTTLApIIAsEhGVsl+hTSGPslE+sGDLAAlLTsLpssDVLFIDEIHRLspsVEElLYPAMEDaplDIlIGcGPuARol+l-LP.FTLVGATTRuGhLouPLRDRFGlhtRL-FYss--LppIlpRoAclLslslss-GAtEIA+RSRGTPRIANRLLRRVRDaApVcuc .............................................................................h..............-..p..h-..ps.L..R...P........p....p....L....c.....-....a.....l.....G.Q...p....c.l...+....p........p......l.....c........l....a........I.............c............A............A...........+........h............R................s............-................s...............L............D..................H..............l...........L..l....a.G.P.PGLGK...TTL..A.....p........I....I...A...s...........E.......h..........G........l........s............l..........+............p........T..........S...............G..........P...........s.......l............-...............+..............s..............G..............D..........L................A..............A...............l............L............o..........s..........L..........p......................t..............D.............V...........L........F...........I.D...........E.....I.....H......R.....L...s...................s...............l......E..........E...........l...........L....Y..P...A........M......E....D.......a.............p....l.......D......I.....h......I.....G......p.......G.......s.......u...A.....+...o...l........+......l....-........L............s...................F......T..L..l.G..A.....T....T.....R......u..G............L...o..sP..L....R..D..RFG..l..h...+h-.a..Y..p.....t..-....L...ptI..l.t.....R...o...A.t..h..h...t...h...t........h...pt...p.u.A....hE.lA....pR.uR..G.TP..R.I.AN....RLL+RVRDaApVpts....................................................................................................................................................................... 0 440 849 1068 +5325 PF05497 Destabilase Destabilase Finn RD anon Pfam-B_4147 (release 8.0) Family Destabilase is an endo-epsilon(gamma-Glu)-Lys isopeptidase, which cleaves isopeptide bonds formed by transglutaminase (Factor XIIIa) between glutamine gamma-carboxamide and the epsilon-amino group of lysine [1]. 26.20 26.20 26.40 28.10 25.30 26.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.35 0.71 -4.01 28 193 2009-09-11 15:14:13 2003-04-07 12:59:11 7 4 73 4 107 201 1 109.90 35 68.13 CHANGED lossCLpCICcspS.GC.tsls.CchD.h.p.uCGhFpIppuYWhDsG+...PGss........apsCusDhsCusssVpsYMp+Yt....psCsss....CcsaARlHsGGPpGCc.....ps.shsYhpplppphs ......................sptCLtClCpstS.GCptshh.Cp.s.......sCGhFpIohsYWhDuGc......sscs.....tt..uapsCss..D.pCAspsVpsYMp+as..pcCssct..sCpDautlHphGshGCp.....s.p..h.s..h..tY.phhppCh.p................... 0 43 56 93 +5326 PF05498 RALF Rapid ALkalinization Factor (RALF) Finn RD anon Pfam-B_4453 (release 8.0) Family RALF, a 5-kDa ubiquitous polypeptide in plants, arrests root growth and development [1]. 21.30 21.30 21.30 22.00 20.30 20.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.46 0.72 -3.15 34 313 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 35 0 202 290 0 64.10 36 55.35 CHANGED tttt.shhsu.sspRhhttp......pYIS.YsALc+splPCs.pcGsoYY..sC.cssspANPYsRGCSsITRC+c .......................s.........s..tpRhlttt.........pYIS.YuA.Lp+s.plPCs..ppGs.SYY...NC.ps.s..........stANPYsR..GCotITRCRt.............. 0 40 107 152 +5327 PF05499 DMAP1 DNA methyltransferase 1-associated protein 1 (DMAP1) Moxon SJ anon Pfam-B_38340 (release 8.0) Family DNA methylation can contribute to transcriptional silencing through several transcriptionally repressive complexes, which include methyl-CpG binding domain proteins (MBDs) and histone deacetylases (HDACs). The chief enzyme that maintains mammalian DNA methylation, DNMT1, can also establish a repressive transcription complex. The non-catalytic amino terminus of DNMT1 binds to HDAC2 and DMAP1 (for DNMT1 associated protein), and can mediate transcriptional repression. DMAP1 has intrinsic transcription repressive activity, and binds to the transcriptional co-repressor TSG101. DMAP1 is targeted to replication foci through interaction with the far N terminus of DNMT1 throughout S phase, whereas HDAC2 joins DNMT1 and DMAP1 only during late S phase, providing a platform for how histones may become deacetylated in heterochromatin following replication [1]. 25.00 25.00 25.00 25.30 24.50 24.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.72 6 129 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 103 0 88 119 0 152.70 44 34.72 CHANGED EEEhLlsELRKIEsRKKEREKKsQDLQKLIotAD..........oTuptR.KpEKKhhKK..Klspp.ts.+pspclsVsE..ouGIKFsDhKuuGVoLRSQ+MKLPusVGQKKlKAlEQhLpEhtVDhsPsPTE-IsphFNELRSDhVLLhELKpAhusCEaEhpoL+H+aEALs.GKslshP ...........................................EEphLltEL+KIEtRKKERE++sQDLQKLIotAD.......................sssp.R....+.tE++h.KK.......Kls.p.p.........+p.s....pt.ssss-.............suGIKFsD.h.+uuGVpLRSQ+M.....K.....LPu...ulGQKKlKulEQhL.pE.luV.-.............LsP.sPTE-lsp.FNELRSDlVLLhEL+pAhusCEaElp.LRHpaEAhs.utsh...t................. 0 29 39 66 +5329 PF05501 DUF755 Domain of unknown function (DUF755) Finn RD anon Pfam-B_4891 (release 8.0) Family This family is predominated by ORFs from Circoviridae. The function of this family remains to be determined. 29.10 29.10 30.00 39.50 29.00 29.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.77 0.71 -4.06 21 74 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 29 0 0 75 0 125.50 27 50.26 CHANGED ps.cpshhsPIsSpKQYKLtsQppps.ptssTsGTo-sshLpppLLKECpKTupLhpL.ppl..............ppccpppsspspp+++K+p+psStppuKKhpsK++.......tpssSSSSSSssuoSSSppSS .........pph.tPlsShcpYK.hTppsps.pssSTsGTs-sssLtppLLKECppTppLh.phh.ppltpp.............ttpp....pp....ppps...s...ppppccpp.+p+phstp..pt++.ppp+++............pppsSSSSSSssssoosppo.s..................... 0 0 0 0 +5330 PF05502 Dynactin_p62 Dynactin p62 family Finn RD anon Pfam-B_4912 (release 8.0) Family Dynactin is a multi-subunit complex and a required cofactor for most, or all, of the cellular processes powered by the microtubule-based motor cytoplasmic dynein. p62 binds directly to the Arp1 subunit of dynactin [1,2]. 22.90 22.90 23.20 22.90 22.10 22.80 hmmbuild -o /dev/null HMM SEED 483 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.84 0.70 -5.73 11 399 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 215 0 284 414 0 252.30 19 75.82 CHANGED P.EpLlaCEDCHQIKCPRCloEEIssaYCPsCLFEoPSStlR.EtN....RCARNCFsCPhCsuPLuVssl-s...........ssuussppGPasLoCuYChWoohDlGlpF-KPssIphQLuK.....................hp....t....su+tht......-h+pshSoauoh-s............tpts...pp.sssLsh-s+FssLKuFYKsQlusssoussD.hth.chus....uFuuPs..uLsRhMslYsuuupl.shhs.spKKs+s+P.s.hR-ALssoEGLpl..ts.sEsshIpphss.......puasulASl-QhhhQ.sPss.........RFsE-LLPlPVLLRTKRSKRCcsC+HILlKPEhKspSsRFRI+LlAlsYIP..sl+...............................sL..ss...............s...s.sssslcsLpPh+shQllLTlKNhhF-PV+VTLuTPusT.PGRluoK...VTlLCPpF-IsuNpDsWpEALp............................hssus.c.uS+uushshst-+ssEuGKVW-+GRNWTolVLEVVPuoLs.......sttssp.phppccDs ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 99 143 231 +5331 PF05503 Pox_G7 Poxvirus G7-like Finn RD anon Pfam-B_4957 (release 8.0) Family \N 20.00 20.00 20.10 100.60 19.80 17.00 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.08 0.70 -5.85 11 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 42 0 0 50 0 356.90 62 98.52 CHANGED MtEt..KQS.....pIFsIloKullpSls+sh..shsssYIpuKAKpLhYssssoh+-ulINuIYspsEssIslsshscLhclLspL+ppusYVsNssEFWRLYNSLhRFTHspSFFssChPTIlsTLATLlTLlLuNcLLaAA-MVEulEsYLFs.upKs.uQELADLL-MKYGLINLVQYKIhPlllGppt......p..hhuu..uuss.ssassEl-+LhELPVKoshlsplYcFLsc+GlsToNNaAEYlAGLKIEElsp...........................tss.t...t.s..sht.ppptshtthc............lLcpApKYSKGHVLDGuVoSPlTtpt.lsspIP..............hosoDlcKFsILEYLYlMRVhANsIK+Ks..tpspsp.GIsLsINS.PFKoITlPus .........t.tEQ..RpS.....TIFDIVSKsIVpSVLRDI..SINoEYIESKAKQLCYC.suSKKESVINGIYNCCESNIcIhD+EpLLKILDNLRsHSsHVCNuoDFWRLYNSL+RFTHTTuFFNTC+PTIlATLsTLITLILSNKLLYAAEMVEYlENpLsS.SNKSMSQELA-LLEMKYALINLVQYRILPMIIGEs..........IIlAGhsGKpPhS-YSuEVERLMELPVKTDIVsssYDFLuRKGIcTSNNlAEYIAGLKIEElpKspphhs................shushANSshlKs+K...SIhPA..NIND+pIMEso+............hLDsuEKYSKGacsDGAVTSPLTuNsT.IooaIP..............ISASDMQKFTILEYLYIMRVMANNVKKKN...EuKNsGGVVhpINS.PFKlIslPp.s.... 0 0 0 0 +5332 PF05504 Spore_GerAC Spore germination B3/ GerAC like, C-terminal Finn RD anon Pfam-B_5052 (release 8.0) Family The GerAC protein of the Bacillus subtilis spore is required for the germination response to L-alanine. Members of this family are thought to be located in the inner spore membrane. Although the function of this family is unclear, they are likely to encode the components of the germination apparatus that respond directly to this germinant, mediating the spore's response [1]. 21.30 21.30 21.40 21.40 20.60 20.90 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -10.82 0.71 -4.48 137 1584 2009-09-10 14:51:44 2003-04-07 12:59:11 6 5 321 1 349 1395 6 166.40 21 45.64 CHANGED lsGsAlF.+s..cKhlu.hLstp-spshphlpschps.Ghlslphsppt................................................hlsh.plhpsc.sclcsph.cssch.p...hpl..plphcupltEhp.sths..h.s....tchlpplEcthpcplccphpphl.cchQ.chpsDshGhGchh+p..phscha.p.h.ppcW..p...ch.aspspl.clpVclpIppsGhhp ....................................sGhAlh..+s...sKhhu.hLstc-shshphlp..sc.hp......t..uhhp.hp.h.spst................................................hlsh..ph.h.psc...sclcsph..pss..p..phpl...clphcupltEht..pshs....h.p.....cphcplcctlpcplccchpphl.c.phQ.chpsDshGhGchh+p..pp.chW.p.h...pcpW....p.....ph..as...cs.pl.plplclpIppsGh..p.................................. 4 178 289 304 +5333 PF05505 Ebola_NP Ebola nucleoprotein Moxon SJ anon Pfam-B_8475 (release 8.0) Family This family consists of Ebola and Marburg virus nucleoproteins. These proteins are responsible for encapsidation of genomic RNA. It has been found that nucleoprotein DNA vaccines can offer protection from the virus [1]. 25.00 25.00 186.20 186.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.16 0.70 -6.46 4 104 2012-10-01 19:59:50 2003-04-07 12:59:11 7 1 21 0 0 58 0 391.30 83 98.86 CHANGED hDYHKILTAGLoVQQGIVRQ+lIsVY.VNsLEuICQhIIQAFEAGlDFQ-sADSFLLhLCLHHAYQGDa+LFLcSsAVpYLEGHGF+FEV+K+-sVpRL-ELLPssouGKNl+RTLAAhPEEETTEANAGQFLSFASLFLPKLVVGEKACLEKVQRQIQVHAEQGLIQYPTuWQSVGHMMVIFRLMRTNFLIKFLLIHQGMHMVAGHDANDuVISNSVAQARFSGLLIVKTVLDHILQKTDpGVRLHPLARTAKVKNEVsSFKAALSSLAKHGEYAPFARLLNLSGVNNLEHGLYPQLSAIALGVATAHGSTLAGVNVGEQYQQLREAATEAEKQLQQYAEoRELDsLGLDDQE+KILMsFHQKKNEISFQQTNAMVTLRKERLAKLTEAIT.........tASps+lus+asDDs-IPFPGPIpssspss..-DsPpDSRDTsIPssVlDP.Ds-.spYsuYp-sthssssDLsLFsLD-D-.DDo+shP........ptsp..Gpps.phpsh.+PP...PGspps.p.+tStshsssspppt..............PptspusRh..LoPlpEEs-s.DpsDs-spSLssLES-D-.....EQs.Dhot.....VAPPAPVY+s.tctctlspsppNs.spTsphtsp-uDh......sSpsppshEETYhHlL+oQGPF-AlNYYHhMpDEPlhFSTccGKEYhYPDSLEEsYPPWLoEKEtl-cENRalslcsQQFhWPVMSh+-KFhAl ..........................................................................................................................................................................................................................VuQsRFSGLLIVKTVLEFILQKTDSGVsLHPLVRTSKVKNEVASFKQALSNLARHGEYAPFARVLNLSGINNLEHGLYPQLSAIALGVATAHGSTLAGVNVGEQYQQLREAAp-AEhpLQ+htEppElpulu.DDpERKILcpFH.p................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +5334 PF05506 DUF756 Domain of unknown function (DUF756) Yeats C anon Yeats C Domain This domain is found, normally as a tandem repeat, at the C-terminus of bacterial phospholipase C proteins. 24.40 24.40 24.80 24.40 24.20 24.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.08 0.72 -3.50 86 1055 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 296 0 344 1014 25 89.40 29 23.80 CHANGED su+AlPYp.clpsphcsuss.plpLph..sN...s......G..psuu.......sFpVhs....pt.t.....h.s........stP.............................cpYsVpu...........Gpp.lpspW..sh.ssssGhYDLtVhG....PsG..FhR+FsGc ......................................................................ttshshp.cl...ps.....p.hc.....ss....s.....u....slpLph...sN......s.........G......pt..us.......sap.Vhs.........tt.h.....t.t.................stP..................................................................t.p.a.sVpu...........Gpp...lps..p.W...sh...sss...suhYD..l..s..Vpu.....ssG....FhR+FsGc............... 0 78 159 274 +5335 PF05507 MAGP Microfibril-associated glycoprotein (MAGP) Moxon SJ anon Pfam-B_8462 (release 8.0) Family This family consists of several mammalian microfibril-associated glycoprotein (MAGP) 1 and 2 proteins. MAGP1 and 2 are components of elastic fibres. MAGP-1 has been proposed to bind a C-terminal region of tropoelastin, the soluble precursor of elastin. MAGP-2 was found to interact with fibrillin-1 and -2, as well as fibulin-1, another component of elastic fibres this suggests that MAGP-2 may be important in the assembly of microfibrils [1]. 28.00 28.00 29.60 40.60 22.50 27.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.86 0.71 -4.36 4 107 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 40 0 49 96 0 120.30 53 77.79 CHANGED hhhGsKsLLhLsAhllsSshhPLGVsuQRGDDVp.spsETFTEDPNLVN....DPuTDET.........VLADItPSTDDLAu......s-KNsTsECRDEKFsCTRLYSVHRPlKQClHQlCFTSLRRMYllNpEICSRLVCKEHEAMKDE .......................................................hs.hhLLhh..h.lsp.t.h.lts.s.hsDpl...p.ps.h-sP.sh.h.s...............sPtsspp...............tl.t-...l...h..Pusss..ss........sscsssh....-CR-EpasCTRLYSVH+PsKQClpp.lCFhSLRRhYllNKEICsRhVCtccEhh+s-........... 0 3 7 16 +5336 PF05508 Ran-binding RanGTP-binding protein Wood V anon Pfam-B_37054 (release 8.0) Family The small Ras-like GTPase Ran plays an essential role in the transport of macromolecules in and out of the nucleus and has been implicated in spindle and nuclear envelope formation during mitosis in higher eukaryotes. The S. cerevisiae ORF YGL164c encoding a novel RanGTP-binding protein, termed Yrb30p was identified. The protein competes with yeast RanBP1 (Yrb1p) for binding to the GTP-bound form of yeast Ran (Gsp1p) and is, like Yrb1p, able to form trimeric complexes with RanGTP and some of the karyopherins [1]. 25.00 25.00 39.90 25.40 22.10 24.00 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.79 0.70 -5.53 18 126 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 112 0 95 128 0 299.00 40 58.01 CHANGED MDplLu+susQAVoF....AIRSGIolASsYAl+pso+aLcplsps-p......pclcpl+p+LcoKIcIlSsAIDLIcLhuAR..........GNTsLEuslsLscsL+t-IspFsp+lsphspt..............spsspspccslppl.cthKcLLsRI--AlPLlNLulTTSGsslsosL.....ssslSPuRLLQASsalsp..............................usspasts.p............slQlGPoFsLohYslFhs.s.+.t.........................-u.pplsWKEsht+upl+lhRh....................................scpscYuYpLcIhEDasDGRYH---..............tshphplslppIs+lFFosSGKLLplpsp....soPVLlLKhsps.ts ........MDthLs...+hspQAhsa....AIRSGIulsusYAlpphophlp...................p............lstppt......pclppLptcLps+IpIlSsAIDhIcLhAAR..........GNTsL.........ESsltLs+sL+.-Ipphsp+lsphsp..p............spsppppctplptl.ptl+pLLtRI--slPLlNLulTTSGssLoosL.....PsslSPuRLLQASsalot..................................................usppas.s.pp...........slQlGPoFsLolYhlFhshs.p...t................................................-u.ppssWKEsh++A+l+lhRhs...................................uptsca.....uYplhIhEDhDDGRhHs.-t............h..ssh+chlslppIsKlFausoGKlLpltsp....ssPVLlLKhDhp.h.............................................. 0 18 46 79 +5337 PF05509 TraY TraY domain Moxon SJ, Bateman A anon Pfam-B_8963 (release 8.0) Domain This family consists of several enterobacterial TraY proteins. TraY is involved in bacterial conjugation where it is required for efficient nick formation in the F plasmid [1]. These proteins have a ribbon-helix-helix fold and are likely to be DNA-binding proteins. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.04 0.72 -4.19 28 257 2012-10-02 18:44:02 2003-04-07 12:59:11 6 3 175 0 28 195 6 48.00 45 68.00 CHANGED lhlpLcpcsNchL.putc+SGRSKppEAtlRLpDHLpcFschhpsphpt ..........hpLD--TNphLltApsRSGRoKThEsthRLpDHLp+FPDFYstEh..p................... 0 4 9 15 +5338 PF05510 Sarcoglycan_2 sarcoglycan_2; Sarcoglycan alpha/epsilon Moxon SJ anon Pfam-B_9181 (release 8.0) Family Sarcoglycans are a subcomplex of transmembrane proteins which are part of the dystrophin-glycoprotein complex. They are expressed in the skeletal, cardiac and smooth muscle. Although numerous studies have been conducted on the sarcoglycan subcomplex in skeletal and cardiac muscle, the manner of the distribution and localisation of these proteins along the nonjunctional sarcolemma is not clear [1]. This family contains alpha and epsilon members. 27.00 27.00 27.20 28.10 25.60 24.30 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.25 0.70 -6.16 5 222 2009-09-12 20:59:17 2003-04-07 12:59:11 8 3 82 0 95 170 0 320.00 44 91.77 CHANGED stlhLLs..hhulLstspSs+ht......TlploVGcLFsHpLEschF....sasscssopp...loY+soLcGYPDLPsWLRYpQcssY+sGFLYGSPTscsAGp.lsIEITAaNRcoFET+RpcLhLsIscsps..LPYQAEFpI+NhNl-chLsospl-sF+stl+s.LWptsPc-LpVlNl-SALDRGGRVPLPPp.pKEGVaV+VGSsspFSopL+El...VpP..pl.uCoQcpsPVtosa-shhopF+lDWCsFpLV-Los.........pssp-ppshp.stG-GhLacDsaauPPs-clscRDYhs-FlloLAlPuhIsLlLhllLuhIMCFpREGhhc+cpc.................ssclQhsHHSuIp.......+SThcLRpMAcsp-lshPLoTLsSacshsu.............-R.PP.spcpsDsTsMPhhpssp ......................................................................h...........slh.t...tssp.........slhs.sGhlFVHsL-p.chF...........h.s.s.sp...........lTapssL.GaPDhPtWLRYhQRosapsGhLYGoPTs-shG+.plIElsAYNRcoF-TsRppLllp..Ihs.s...E...t....LPYQAEFhl+shsVEEhLs..S..pshscF.LsAlps.lWp....PtcLphlNITS..ALDRGGRVP....LPIp...shKEGVYlhVGusssFSsCL+.l.....sPtpph..pCs..p..t.tsllss.cph..hspFhlDWCplsL...V-hsh.........................ss.p.sh.................Gp.GlL...cs..s.....apPPp-s..s..Rs...ahsD..h..llTl.hlP...hlALlLhhlLualMCsRREGh..t.p..t....................................ss.tIQhlHHssIp.......tsTcELRpMu..tsRclsh.PLSTLPhFpshoG.............EhhPP.....ph-ssphPhh.sp..................................................... 0 20 27 57 +5339 PF05511 ATP-synt_F6 Mitochondrial ATP synthase coupling factor 6 Moxon SJ anon Pfam-B_9347 (release 8.0) Family Coupling factor 6 (F6) is a component of mitochondrial ATP synthase which is required for the interactions of the catalytic and proton-translocating segments [1]. 27.90 27.90 28.50 31.60 27.70 27.80 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.32 0.72 -4.10 17 159 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 102 5 80 157 0 92.10 42 71.49 CHANGED Mhh.....phsthhtssholphRRNlGlo.Ashhs..KthDPIQpLFlDKlREY..KppsoGG.hVDAuPEhp+-LpcElpKLpphYGGu...DMspFPpFKFp- ...............................................h.phhshth.+RN.luho.Ashhs...Kt....hDPIQ+LFlDKIREYpsKppu.uG.G...VDuuP-ap+-LpcELpKLpphYGsu.....DhspFPpFKFp-.......... 0 25 33 56 +5340 PF05512 AWPM-19 AWPM-19-like family Finn RD anon Pfam-B_6960 (release 8.0) Family Members of this family are 19 kDa membrane proteins. The levels of the plant protein AWPM-19 increase dramatically when there is an increase level of abscisic acid. The increase presence of this protein leads to greater tolerance of freezing [1]. 19.90 19.90 20.70 20.50 19.60 19.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.77 0.71 -4.19 12 134 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 29 0 80 129 1 126.30 45 74.10 CHANGED hLNLsMYlllluluuWulN+tI.s.Ghp.tsuhu....h.Phah..GNsATGFFl.hFullAGVVGsASsLsGhpH....lRsWpspSLsuAAouuhlAWuLThLAMGLACKEIp.......lGt.RshRL......RshEAFhIILohTQLLYlhulHuGh .............................hLNhsMYlIlhuluuWulN+tI.s....uh..tsshs.....................GNsATsaFl.hFuLlAGVVGhAS.slsGhpH....lRs.WpscSLsuAuusuhlAWsLTlLAhGlACKEIp.......lut..Rst+L..................+slEAFhIILuhTQLhYllhlHuG.h........... 1 15 51 67 +5341 PF05513 TraA TraA Moxon SJ anon Pfam-B_9521 (release 8.0) Family Conjugative transfer of a bacteriocin plasmid, pPD1, of Enterococcus faecalis is induced in response to a peptide sex pheromone, cPD1, secreted from plasmid-free recipient cells. cPD1 is taken up by a pPD1 donor cell and binds to an intracellular receptor, TraA. Once a recipient cell acquires pPD1, it starts to produce an inhibitor of cPD1, termed iPD1, which functions as a TraA antagonist and blocks self-induction in donor cells. TraA transduces the signal of cPD1 to the mating response [1]. 21.30 21.30 21.30 21.40 20.60 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.58 0.71 -3.94 2 236 2009-09-11 07:37:51 2003-04-07 12:59:11 6 2 185 0 14 133 1 112.60 63 97.55 CHANGED MNhshuhtGhsAPVKp+ua.asphshhshhphtthsh.AAl..hhh.tlu.hApupDLhAuGpssVKATFGtDS.VhhhlllAEllVGssMYhhTKNlhhLhGhsll.VFhsVGhshlt ..........................MsslLSVQGASAPVcKK.SF.FSK.F.TR...........LNMLRLs.R.AVIPAAV.L.MMF...FPpL..A.......MAA..p...G.pDLMASG.NsTVKATFGKDSSVVKWVVLAEVLVGAVMYMMTKN.V.K.F.LuGFAIISVFIAVGMuVVG..................... 0 0 1 10 +5342 PF05514 HR_lesion HR-like lesion-inducing Finn RD anon Pfam-B_6954 (release 8.0) Family Family of plant proteins that are associated with the hypersensitive response (HR) pathway of defence against plant pathogens [1]. 22.60 22.60 22.60 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.65 0.71 -4.35 10 108 2012-10-02 13:32:46 2003-04-07 12:59:11 6 3 32 0 56 176 63 133.20 48 77.07 CHANGED MuFlSFlGRVLFASlFlLSAaQEFsEF..GsDGGPAAKsLpPKaNhhssplosph....GhtVPpl-lKplVAAsIuLKGlGGLLFIaGSShGAYLLLlY.AhsTPllYDFYNYch-c.............................................................s-FspLFh+FsQsLALlGALLFFlGM ...................................................MuFhSFhGRVLFASlFlLS.AaQcFsEF..G.s..DG..G.PA.A..KtLp.P.K.h.sl.h.p.p.plo.spl....Gh.t...l..P.c.............l..-...lKp..llA.us..I.s..LKGlGGl...LF...l...hu..S.o..h..G..AaLLL.l..aLuh.h.TPI..l.YDFYN.Y.c.h.-.p............................................................scFspLhhcFhQNlALhG.A..LLFFlGM......................... 0 11 36 47 +5343 PF05515 Viral_NABP Viral nucleic acid binding Finn RD anon Pfam-B_6916 (release 8.0) Family This family is common to ssRNA positive-strand viruses and are commonly described as nucleic acid binding proteins (NABP). 21.50 21.50 21.50 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -3.85 8 209 2012-10-01 20:03:16 2003-04-07 12:59:11 6 2 29 0 0 374 1 97.30 38 84.68 CHANGED MHPaDFN................................LLC.....................................CLH..............................FS+PsLPpDL+lhIastsssspKLsR+sppNKPFpGTSKCAtRRRAKRYNRCFDCGA..aLhcsHsCK..lFsSRApSDsLuVIHEGPAKLYAERoaR.NScAtQLItsDL.hhK ............................................................................................................................................................................................................................................a.p.GpS+sAtRRRA+RYsRCacCGu.......hhhss+.C+..hhsotups-sLtsI+cGss+LhuEp................................................. 0 0 0 0 +5345 PF05517 p25-alpha p25-alpha Finn RD anon Pfam-B_6873 (release 8.0) Family This family encodes a 25 kDa protein that is phosphorylated by a Ser/Thr-Pro kinase [1]. It has been described as a brain specific protein, but it is found in Tetrahymena thermophila. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.33 0.71 -3.97 24 380 2009-01-15 18:05:59 2003-04-07 12:59:11 7 17 149 3 285 336 10 138.70 30 57.31 CHANGED lcpsFcpFs.aG......pustp-Mcs+pFuKhhKDspll..spK.lTsTDsDllFuK..VKupss++.IsappF.ccuLsplApK+tt........pspplhpclh..tutuPthsu....spussVchhsDpopYTGsHKpths..soscsp..GhuGhtphsDco.........ptsshshph .............................................................pp.FptFs.aG..........pusspphssppasKhh+Ds..tlh............Ds+...lTsTDsDllFsK.......l....K......u..+..ssc......p...Is..app..F..pcAl.p.c.lApc+ht........................ph.phhtplh.....tstsPthsu.......................ttsssspphsDpopYTGsHKpphs..tsscsp..uhuGh.phh-tp..................t.s......h................................................ 0 115 149 205 +5346 PF05518 Totivirus_coat Totivirus coat protein Moxon SJ anon Pfam-B_10221 (release 8.0) Family \N 25.00 25.00 34.70 34.60 19.90 19.90 hmmbuild -o /dev/null HMM SEED 759 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.20 0.70 -6.70 7 23 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 20 0 0 24 0 622.50 31 97.28 CHANGED LsuslssstuGshhss............ssaR+YcAtlphpusstGshpushssIaaElGt+ht........+sscshtssstpuhhl-suhs.sss.stsa.shsR+aps.......huspahhhslsullc+lutslAshslhssh.hsstuh+tspshpVsuLuohDu.psussuuV..alPpslc.shssclFslLh.sAsupGusVssDslpLDusTspPhlPslsssshttAhs.sLpllGu.MpuustGshaAhALTpGlHsVloVVuHoDEGGhhRDlLRpstap.PaGGlshuLp...pasulsshspsuhushsuhsDulALsTAAlVAHCDPh..hssshaPTlhssphsssst.ssu.........t...ttpltushs+FuphYhcuLuhhFultuss.thsspplhpAusphlstssp...RHLchsosAPaFWlEPTuLls.hshhGosAEttGYuuhshh.GssRopPsa-shchhGptsos.assaphchposRosshlshhss+PtsGLushcltphsssullLsG............ssssssstc+ttutssls-YlWtRGQoshPtPAEhhNhsuthslphRhhsas-Dtsss..pchPssc-hhcssVThpsst.hslssuspNstssts+RuRstuuptLsp....ARshGhssssph.Its.sPs.tss...st.shspsspsussspssss.sthsstshtssp.u.RGsPltssspHtshRAP.hsR...s.tGssshs.....s.ssss.s......................s.s..sssssss..hssssptsssstsp ....LsuslssspuGphhss............spaR+YcAslphsushtGshpsp.ptIhaElGtths........pttphht...ttuh.l-suhshsss.utpF.GhA++aoN.......Fuspa.h.slsuhsERlu+slAshslhss.....hs.sshttstshhVsuLuThDuP.su.tsuV..aIPRhlssshssclFsVLhtAssGpGuslsoDhlcLDsssppPhlPtlsssshstAhstALRllGuNM.sustGslFuhAlsRGlHpsloVVuHoDEGGhhRslLRpstFtsPaGGlp.uLp....asuLPshsssu.sshuuaVDulALtTAAhVAHsDPhh.hsGshaPThhsustssss.stsst...ss...stt.ttQlhushstFs..YhcuLutlFGhtsss...sttphhsshshhLstss....RHLchtoluPaFWlEPToLls.hshhGosAEttGhuuhsh..ssstshPsa-phphhupssss.aushthchposRpsshhstatspP.suLuslclcQhDssullhsG............sssps.sltc+htAssPlosYLWsRGQSshPAPuEhlNhsushGhhlRhhsas.--hsss..pclPsscEhhpssVohcVstPtGLs.sGssNttsppA+RARo+AsttLupuhhRARsaGtsssspM.l.sssPshts......t.thtphsttsh..st..t.s.shh.sts..s.t.s.pssPh.ss.ppts.chP.hst.hss..us.us.hss.s.s..s.sss.s..................................................t................................................. 0 0 0 0 +5348 PF05520 Citrus_P18 Citrus tristeza virus P18 protein Moxon SJ anon Pfam-B_10309 (release 8.0) Family \N 25.00 25.00 249.60 249.50 17.30 17.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.04 0.71 -4.75 3 112 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1 0 0 101 0 151.10 95 100.00 CHANGED MSGSLGNSTHVDLLRSDSRFLSGWWSFIVNVGDIIVRFALHVPSEDMLNSFSAISNCTIIADGSALLKDNTVVDRLESMNPLAYLLKLAKTTTTICFTMSNKVLFGTTKSEPLSCLAITSDRVLFKVIMGTNVDDSRCGCSIWFYNNGTFQNGLTRCNNLVALFSAT .........THVDLLRSDSRFLSGWWSFIVNVGDIIVRFALHVPsEDMLNSFSAISNCTIIADGSALLKDNTVVDRLESMNPLAYLLKLAKTTTTICFTMSNKVLFGTTKSEPLSCLAITSDRVLFKVIMGTNVDDSRCGCSIWFYNNGTFQNGLTRCNNLV........ 0 0 0 0 +5349 PF05521 Phage_H_T_join Phage head-tail joining protein Finn RD anon Pfam-B_7008 (release 8.0) Family \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.43 0.72 -3.70 131 1460 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 1110 2 227 1023 107 96.10 18 85.37 CHANGED s+Rlslp........ts.ps..p-....s..sGshh....ts.a........tsht..s.lWAplp...sh..supEh.htussstup...h...shplplRa.ps.....sl....ssshR....lh......h.s....schapIpu....lschs.ptp.hlplhs ..........................................p+lph...t....h....tt..ps....s....sGthh.......pp.a........ht.hh........p..saAplp.......sh.....sup-h.....hpu.....ssp.tsp...s........shplhlRh..pp.........cl..........os.sh+...lt...........a..p..............sphasIts...lpssst.ppp.hl.lh...................................................... 0 64 140 186 +5350 PF05522 Metallothio_6 Metallothionein; Metallothionein Moxon SJ anon Pfam-B_1360 (release 8.0) Family This family consists of metallothioneins from several worm and sea urchin species. Metallothioneins are low molecular weight, cysteine rich proteins known to be involved in heavy metal detoxification and homeostasis [1]. 20.90 20.90 27.80 23.90 17.70 19.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.22 0.72 -10.69 0.72 -3.70 7 30 2012-10-05 18:33:37 2003-04-07 12:59:11 6 1 19 2 5 34 0 66.20 52 80.30 CHANGED sssKssCs+-GppCsC.upppClpucCs.sscplCCu.....pCuNAuCKCusuCKCuuG...suCpcGsCss ......ssspssCs+EGppCsC.upCpChpu-Cs.ssccsCCu.......sKCGNAuCK.CGuuCKCuuG....suCscG.sCss.......... 0 5 5 5 +5351 PF05523 FdtA WxcM_C; WxcM-like, C-terminal Finn RD, Studholme DJ, Andreas P anon Pfam-B_6950 (release 8.0) Family This family includes FdtA (Swiss:Q6T1W8) from Aneurinibacillus thermoaerophilus, which has been characterised as a dtdp-6-deoxy-3,4-keto-hexulose isomerase [1]. It also includes WxcM (Swiss:Q93S92) from Xanthomonas campestris (pv. campestris) [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.58 0.71 -4.64 64 624 2012-10-10 13:59:34 2003-04-07 12:59:11 6 12 477 8 145 730 885 125.10 29 67.10 CHANGED hss.p..llclspl....sD..RG....sLoslEtt.pp..lPFcIKRlYYlacVPsstt.RGtHAH+chcQhllslsGShclh.........LDDGpp+..pphhLscPt.hGLalsshlW+phhsFSss.uVhLVLASchYDEsDYIR-YspF.lphhp .........................................thlph.th......tDt..+G....p.L.s...h.h...-t....p......l..P.F..p.lcRl.a.a.l.ass....su.hs...RG.tHAH.....+...p...hp.phhlsl.p.GS..h.p...lh.................................l..-..-...Gp..sp........pph......h..L...s...pss...pG.....L...h....lsshhW+phpsF.S..s.s..s..l.l.l..V.lA...schY-c..p-Y.IhsYp.aht...t........................................................ 0 52 109 131 +5352 PF05524 PEP-utilisers_N PEP-utilising enzyme, N-terminal Finn RD, Studholme DJ anon Pfam-B_69291 (release 8.0) Family \N 21.90 21.90 21.90 22.60 21.80 21.80 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.29 0.71 -4.32 186 5872 2009-01-15 18:05:59 2003-04-07 12:59:11 8 34 3547 22 1028 3717 559 122.50 27 19.06 CHANGED lpGlusSsGlAlGpshhhp....ttt..hphsppts.....tshpt.EhpR..hcpAlppsppcL.p..pltpp....h.tth....................s.....sspu.s.IhcuahhlLpDspLhpplpptIppsp.sAphAlppshpphtphhpshsDt.Yl+ERu ..................................pGlsuusGlAlGcu.hhltpsp..........hshpptsh..........pssst.EhpR.lpsAlppstppL.p.plpp+....h.tph..........................s....p-pu..u.IF-uHhhlLsDscL.hpplpph...I.pppp..sA-tAlppshcphsstapph.s..Dp..YL+ERA............. 0 310 592 817 +5353 PF05525 Branch_AA_trans Branched-chain amino acid transport protein Moxon SJ anon Pfam-B_1869 (release 8.0) Family This family consists of several bacterial branched-chain amino acid transport proteins which are responsible for the transport of leucine, isoleucine and valine via proton motive force [1]. 22.30 22.30 22.60 22.50 21.50 22.20 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.49 0.70 -5.90 21 3775 2012-10-03 01:44:59 2003-04-07 12:59:11 8 4 2395 0 374 2332 41 418.00 37 96.77 CHANGED pp+-hlhlGhMLFulFFGAGNLIFPPhLGhtuGpshhhAhlGFllTuVGLPlLulluluhsGt.ulcslus+ls.hauhlassllYLuIGPhaAlPRTusluFEhulsPhls....ssslsLhlaollaFulshhlSLpPuKllDplGKhLTPlLLlhlhlLhl.tulhpPhGshsss......p.stY.pspshhpGFl-GY.TMDsLAulsFGllIlsul+spGhp......sp+phhphslpuGlIAulhLullYsuLualGAsSsshh..........sssssGuplLsphspthFGshGpllLulllslACLTTulGLlousu-aFsclhP.tlSY+thshlholhShllANhGLspIIplSlPVLshlYPluIsLllLshhpphhpt..phsYpsslhhohlhull-ulpuhhhh........shlsphhphLPLtshGLuWLlPulluhlluhllsphp .................................................................h..+chlhlGhMLFulFFGAGNLIFPPhLG.tAGpphahAhhGFllTuVGLPlLs.llAluh....s..ss.....ul.p.s.l....us....+ls.hauhl...as.sllY..LsIGPhFAl...PRTA.ol.oaElGlsPhhs................sp........sh.........s.........Lh.l.................FollF.FulshhloLpPuKllDplG+hLTPlhLlhlsllhl.tu.h.lp.P.h.......G..s.h..sss................s..ts.Y...ps..s..sFhpGFlpGY.TMDsLA.ulsFGlllVsul..+..pp......Gls....................ppppl.s+.hslh..uGllAulhLsllYhuL..salGupSs...shh...........stssNG.uhlLsphspphF.GshGp............llLuhllhlACLTTulGLlsAsu-aFpp.hh............P.....p....l.....S..Y.........+shlhlhs.lhShll.uNlGLsplIphSlPVLh.hlYPluIsLl.l..Ls.hh..p...p......hh...tp...p...p...h...s....a....t..h.s.hhhs......hlhu.lh-s....l.pss.s.h.........................shl.s.s.h..h.phLPLtphGluWllPulluhlluhlhs..h................................. 0 93 201 293 +5354 PF05526 R_equi_Vir Rhodococcus equi virulence-associated protein Moxon SJ anon Pfam-B_7324 (release 8.0) Family This family consists of several virulence-associated proteins from Rhodococcus equi. Rhodococcus equi is an important pulmonary pathogen of foals and is increasingly isolated from pneumonic infections and other infections in human immunodeficiency virus (HIV)-infected patients. Isolates from foals possess a large virulence plasmid, varying in size from 80 to 90 kb. Isolates lacking the plasmid are avirulent to foals. Little is known about the function of the plasmid apart from its encoding a virulence associated surface proteins [1]. 25.00 25.00 31.80 31.70 22.50 17.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.08 0.71 -4.63 8 29 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 8 22 1 171.60 37 98.50 CHANGED Mhchhh..su+sluustlsAsslh...APuGsAsApslssuusuus................susss.ththspssstshtstsssp-pQYsVHGsVsSAlVYQ+h+lsV-......sGcTF-GDAGGLohPGuuuhWGTLFTsDLQ+LYc-TVSFcYNAVGPYLNINFFDScGslLGHlQuGulSoVlG..IGGGoGuWc .................................................................stthhs.hhsshshsh...sssGhAsAp.lssss.ss.................................sp...p.t..hspsssps.ttpsssp-ppYsV+GslsSAlhYQ+h..plpss......sGKsFsG-AGGlohPGuushaGTLF..TsDLp+LYs-TVSFpa.NAlGP.YLNINFFDupGslLGHlQuGulSTVsGlGGGoGuWp............ 0 0 7 8 +5355 PF05527 DUF758 Domain of unknown function (DUF758) Finn RD anon Pfam-B_6320 (release 8.0) Family Family of eukaryotic proteins with unknown function, which are induced by tumour necrosis factor. 21.20 21.20 21.90 23.40 20.20 21.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.10 0.71 -4.68 14 241 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 96 1 145 222 0 178.10 50 91.04 CHANGED h-sFsSKsLALQAQKKILSKMAoKohAphFIDDTSSElLDELYRloKEaTpN+pEApKllKsLIKlslKlGVLaRNsQFssEELtlspcF+KKl+psAMTAlSFaEV-aTFD+sVLuslLpECR-LL+plVppHLTsKSHGRIsHVFNHFuDs-hLssLYsPpusaRspLpKICsGlNKhL-EGsl ............-sFsSKsLALQAQKKlLSK.M.A.o....Kol.A.phh.I.DDTSSEl...L.DELYRl.oK......EaT.p.....s.....KKEAcKllKsLIKlslK.lulLaRNsQFst-.ELslh-+.F++Kl+..phAMTslSFap........V-aTFD..+....sVL....uplLpEC+-lL+plVp+HLTsKSHGRIscVFsHFuDs-FLssLYs...P.....ss.a+s.pLp+IC-GlNKhLDEtsl.............. 0 31 43 77 +5356 PF05528 Coronavirus_5 Coronavirus gene 5 protein Moxon SJ anon Pfam-B_7342 (release 8.0) Family Infectious bronchitis virus (IBV), a member of Coronaviridae family, has a single-stranded positive-sense RNA genome, which is 27 kb in length. Gene 5 contains two (5a and 5b) open reading frames. The function of the 5a and 5b proteins is unknown [1]. 25.00 25.00 28.70 30.50 21.30 16.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.95 0.72 -4.16 2 99 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 13 0 0 100 0 80.40 87 99.62 CHANGED MNNSK-NPFptAIARKARlYLREGLtCVYFLNcAGQAEsCPsCTSLV.pGphC-EHl.NNNLLSWpAV+.LE+QTPpRR.SN .MNNSKDNPFRGAIARKARIYLR.EGLDCVYFLNKAGQAEPCPACTSLVFQGKTCEEHIsNNNLLSWQAVRQLE+QTPQRQSSN............... 0 0 0 0 +5357 PF05529 Bap31 B-cell receptor-associated protein 31-like Finn RD anon Pfam-B_6449 (release 8.0) Family Bap31 is a polytopic integral protein of the endoplasmic reticulum membrane and a substrate of caspase-8. Bap31 is cleaved within its cytosolic domain, generating pro-apoptotic p20 Bap31 [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.22 0.71 -4.84 40 599 2009-09-11 14:14:48 2003-04-07 12:59:11 7 10 290 0 357 535 7 181.30 24 83.77 CHANGED MoLhaoLVFshLhsEhshhhlLslPlPpslR.+plhphhttshhspphphshhhhlshlllLFlDulpRlh+hssphpttpsspssss..........sphp..u++FauQRNhYloGFoLFLoLllsRshollpcLlphpcphcshppptpttsttt.................................tppsttssElpcLKcclpp.......ccpDlcsLKcQscsLp+EY- ..........................................................................Msl.ashl.hhhLh.sEh.slhhlL.hl.Ph..shR....pplhp............hh..t.h..h..st..hhp..hhh..............hhlhshl.l..l...L.h..l.Dulp.c.hh+hssphp...tptpstss...............................h..p.h.h+hF.huQRNhYl..s..GFsLF.Lsl........l..lpRlh..sl..l....p....plhphpsp.hcshpppspst.t..pttt...........................................ttttt.tt-.htpLppclpt.....................tptch...sh..+pQsctlpp-ap................................................................................... 0 105 188 282 +5359 PF05531 NPV_P10 Nucleopolyhedrovirus P10 protein Moxon SJ, Bateman A anon Pfam-B_7343 (release 8.0) & Pfam-B_6199 (release 10.0) Family This family consists of several nucleopolyhedrovirus P10 proteins which are thought to be involved in the morphogenesis of the polyhedra [1]. 29.80 29.80 29.80 29.80 29.70 29.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.65 0.72 -3.73 21 81 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 63 0 3 70 0 77.80 37 77.83 CHANGED MS..NILlhIRsDIpslssKVsuLQspV.......sslpsNlPshp.l.ttLDAQospLsslpo..............pVssIpsILs.................P-lPs ....MS..pNILhlIhscIpslssKVs.......uLQspV...........sslcssl....ss...l.tpL....Du.sspLsslps..............pVssIpslLs................hP-lP........................................................................... 0 0 1 1 +5360 PF05532 CsbD CsbD-like Finn RD anon Pfam-B_6755 (release 8.0) Family CsbD is a bacterial general stress response protein. It's expression is mediated by sigma-B, an alternative sigma factor [1]. The role of CsbD in stress response is unclear. 40.00 40.00 40.00 40.00 39.70 39.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.42 0.72 -4.29 119 3357 2012-10-02 00:15:32 2003-04-07 12:59:11 7 3 2275 2 706 1779 71 53.50 36 76.85 CHANGED -clcGphcchpGcsKEshGchTsscphpsEGctcpstGcspcphucsK...-tscc .........thcuphcphKGplKEshG+lTsDcphp.tEGct-pssGKsp-thspsK-psct.................. 0 169 400 559 +5361 PF05533 Peptidase_C42 Beet yellows virus-type papain-like endopeptidase C42 Studholme DJ anon Merops Domain Members of the Closteroviridae and Potyviridae families of plant positive-strand RNA viruses encode one or two papain-like leader proteinases, belonging to Merops peptidase family C42. 25.00 25.00 27.40 37.70 20.60 19.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.20 0.72 -4.16 8 200 2012-10-10 12:56:15 2003-04-07 12:59:11 7 8 8 0 0 172 0 73.20 66 10.95 CHANGED tchtDGhCYlAHhthlCAahsRsFccpDa....sLGsaPTVucL+sRlh+paGccALplslRGtYoSRslFHCDYsuuaspsh+slsua..lGG .A.KlRDGQCYlRHVYDVALYFGRRV..DLSV+...RTLGhFPTVGALKAYLVREYGR-SLKVPMRGTYT................................................ 0 0 0 0 +5362 PF05534 HicB HicB family Moxon SJ anon Pfam-B_6090 (release 8.0) Family This family consists of several bacterial HicB related proteins. The function of HicB is unknown although it is thought to be involved in pilus formation. It has been speculated that HicB performs a function antagonistic to that of pili and yet is necessary for invasion of certain niches [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.41 0.72 -4.39 24 1200 2012-10-02 18:44:02 2003-04-07 12:59:11 7 4 929 0 259 814 64 48.00 32 43.10 CHANGED YlpsCpp.GhpPc+taS..GpFslRlsP-LHccluhtAtppslSLNpalppsL .......................t.p....t.....s..t.t.hp....t....p...FsLR.lsppLHccLshtAtppslSlNpalhphL........... 1 79 171 225 +5363 PF05535 Chromadorea_ALT Chromadorea ALT protein Moxon SJ anon Pfam-B_7314 (release 8.0) Family This family consists of several ALT protein homologues found in nematodes. Lymphatic filariasis is a major tropical disease caused by the mosquito borne nematodes Brugia and Wuchereria. About 120 million people are infected and at risk of lymphatic pathology such as acute lymphangitis and elephantiasis. Expression of alt-1 and alt-2 is initiated midway through development in the mosquito, peaking in the infective larva and declining sharply following entry into the host. ALT-1 and the closely related ALT-2 have been found to be strong candidates for a future vaccine against human filariasis [1]. 25.00 25.00 34.70 33.80 19.70 18.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.10 0.72 -3.89 6 43 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 13 0 24 44 0 75.70 56 34.20 CHANGED EalsKGcFVETDGKKKpCcoHpACYDQREPQuWChLKps.QuWTs+GCFC-pKh+uCVIER..psssKLEYoYCuPccsWpCu ..............ss+GcFVcTDG+cKpCpSHpsCYDQREPpuWChLpcN.QuWTs+GCFCDsKL+SCVIER..pN....s....G+L....EYuYCsPcpsWpCp....... 0 16 16 24 +5364 PF05536 Neurochondrin Neurochondrin Moxon SJ anon Pfam-B_7411 (release 8.0) Family This family contains several eukaryotic neurochondrin proteins. Neurochondrin induces hydroxyapatite resorptive activity in bone marrow cells resistant to bafilomycin A1, an inhibitor of macrophage- and osteoclast-mediated resorption. Expression of the gene is localised to chondrocyte, osteoblast, and osteocyte in the bone and to the hippocampus and Purkinje cell layer of cerebellum in the brain [1]. 19.10 19.10 19.10 19.10 19.00 19.00 hmmbuild -o /dev/null HMM SEED 543 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.72 0.70 -6.20 8 217 2012-10-11 20:01:00 2003-04-07 12:59:11 6 15 138 0 160 237 0 430.60 20 74.69 CHANGED psssssL-cCLpLL+up+.Dop+FAuLLLVTKhl+usDhsuts+chlF...-AVGhpFLcRLLpotp...ussss....sccsahsLulolLusFCs..-PElAscppVls+IPhls-slppsss......hshl--sYpsLsulu.uoPpGs+sLlstGslshLsptYss...puashEpAlplLhsLlsshcspshp.-c..thpsllsplucpFssh-sspKFEL...hclLsslLspp....l...lpS..upphhcpLptGlssILps+losupRssALpLAAsLhps...hGspWlhsssp..............ss+FlhLllsluslEVRhsLsE.............pspshtp+pcslosCauLlEhhIphhsc..pt-psll-.....csphhpLhssLpEshusVlcaLpcst-................................-p.+-s.......hllAuVRlLGuWLAE-ssu.h+pclppLLsFhlclt+csapp...hpt...................shDulRalLPuLCplosEccsp+lLhspGusplLsD......sl.............lphhcc.ps..s...suEh.....ul.hhCshhhNlllstssh.hpctusFsuLh+sLlp...........................................................sssusctsslhhSsuhpuhWtDl...sphW ...........................................................................t......htph.thLpttp.DppphsuLhhlpphhp..s.tp..t.st.........lh.................culu...pF.p+LL.ot...............tt..............................t.hhtlulslLtsast.........phttp..ph.hstlPhl.phltttss..........................thhp-shphLhsls.up.tG.pthht...ssls.hlsphh.t............................psh....h-..shtlhhhl.ht..t..ht.......h....pt...h..tl.h.tt....lu..ht.t.psttthph.....................hphLs.hl......................................h..pl..hl.tll.ts.+.s...t................+..h.h.lhtthhph....hu.phh..t...................................t.ahhlllp.hhl-lph.h.p............................h.....tp.ls.sa.lhp.hlt...h.p........tt.....t..............................................h.phhp.hptshshhhthL.t..ph.........................................................p..t...pc..................lhu...lRhls.altpps.t.hp.th....tlhshhhph.tt....t........................................................................hphhLshhpthsh.......p.tstphhht...tt.thlhp..........h.....................................................t......................................................................................................................................................................................................................................................................... 1 53 94 131 +5365 PF05537 DUF759 Borrelia burgdorferi protein of unknown function (DUF759) Moxon SJ anon Pfam-B_7415 (release 8.0) Family This family consists of several uncharacterised proteins from the Lyme disease spirochete Borrelia burgdorferi. 19.90 19.90 21.30 20.10 19.70 19.60 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.61 0.70 -5.71 2 148 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 27 0 12 135 0 319.00 50 87.42 CHANGED MSDKFTIKFKGILDHAATKKAIEQDISKMEKYLKPKKSSLGSTKDIVKNNLSDKKKELS+QSKFESLRERVEKYRLTQTKKLhKQGMGFEKARKEAF+RSLMSDRDKRRLEYKELAKESKAKSKMLAASQGKGLVAKIAIGSALGNlIuNAMSKVGGGLlGF....hKKuVE-soKpc+hQ.LNpshYGs.KE+-slLK.IGtMKGFcRsLEKE-FLppA.VhKGslR-Lc.LN-...pNlhNAschAAMh+SoGhhS.sEsuVpAVsplLtG-hoEhashLK..sthG-KYlEshK.th....QpGuplcLcscI..hh-hhKDhpShtlhthssshEphpssLAshEQTLpsLTssVLcPllslIs.hhsK....ItNFs.hpslINsIhNuIpShhsh...FsKl+uhLPphhGGsGs-s....p..scspsssNs .......................MSDKFTIKFKGlLDHAATKKAIEQDIoKMEKYLKPKKSSLG.STKDIVKNNLSDKKKELu+QSKFESLRERVEKYRLTQTKKLhKQGMGFEKARKEAF+RSLMSD+DKRRL.EYKELAKESKAKsKMlAASQGKGL..VAKIAIGSALGNlIuNAhSKVGGGhlGF....hKKuVEppoKpc+hp....LNpsha.........sc.c.........E+........ptlh......t.......hltt......hKGFERcLEKE-FLppuolhKG.slp...-Lp..LNp...pNlhpAschAAhh+SoGhhu.sEpAlpsVsphLpG-hsphaphhp....sthG..pKYhEshK.th....Qpuu.ph.chc.cl..hhchh.pDhpShtlhthusphpphpssLsphEQoLtslTssllpPllthlp.hhth.....h.tap....ptllt.hh....pu....l....pshhs.................................................................................................................................... 2 7 8 8 +5366 PF05538 Campylo_MOMP Campylobacter major outer membrane protein Moxon SJ anon Pfam-B_7418 (release 8.0) Family This family consists of Campylobacter major outer membrane proteins. The major outer membrane protein (MOMP), a putative porin and a multifunction surface protein of Campylobacter jejuni, may play an important role in the adaptation of the organism to various host environments [1]. 19.90 19.90 21.00 20.90 18.50 18.50 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.48 0.70 -6.10 2 514 2012-10-03 17:14:37 2003-04-07 12:59:11 6 2 136 0 23 333 1 274.10 49 99.06 CHANGED MKLVKlSLVAALAAGAFSAANATPLEEAIKDlDVSGVLRYRY-ouN..pNhs.sSslss.KQDHKYRAQVNFSuAIuDNFKAFlQFDYNu.DGGhGsDsloNspcsLhVRQLYLTYTNEDVATSVIAGKQQLNhIWTDNuIDGLVGTGlKVVNNSIDGLTLAAFAhDSF.ttppssshlsQss.pphp............pssshtlD.stNlYGAAAlGSYDlAGGQFNPQLWLAYhspsAFhYAlDAAYSTTIFDGINWTlEGAYLGNSlDscLcD+hc.ANGNhFAL+GolEVNGWDASLGGLYYGcK-KsohssIEDQGNLGSLLAGEEIFYTsGSpLNGDhGRNIFGYVTuGYTFNETVRVGADFVYGGTKTp.hup..GGKKLEAVARVDYKYSPKLNFSAFYSYVNlD...sssESscHssVRLQALYKF ...............................F.S....s.AsPLEEAIKDlDVSGVlRYR.Y-os.........p.p........p..s....h.......s...........p.s....s.s.......ls.......s.KtpHpa+uphsFpu....AlsDN....Ftuhl...........ph...pY.s..s........-.......s......G..........h....G.........h.............s...t...........h...........p.......s...s...........p........p.t...h....VpphYLsYTspsh.s.TolhhGKQtlshhaT...Ds.s....sGTGl+VlNssIsGLTLA.uhAhDuh.tt...t.............................................................tslYususlGsa-.....hs.QL..WhAhhsp.s..u..h...a.......A...h.Dhshph.hhsshshtlputYhtsshD.sp.......htt.............................................s....suN......hauhphshph.shDhthGh.l..ha..t..pc.p....c....h..o.hsslEDpGph........hh.......sGt..plh....s..ptpt.h..s....ucN....ah..ahthGYoF.sc..hplGh-al.Gt.ps...............t..cc.......E...hssplsYtYS.KLsh.saYuh.hp..................................................................................................................................................................................................................................................................................................... 0 6 14 23 +5367 PF05539 Pneumo_att_G Pneumovirinae attachment membrane glycoprotein G Moxon SJ anon Pfam-B_7428 (release 8.0) Family \N 25.00 25.00 83.60 83.60 21.40 20.50 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.71 0.70 -5.09 3 103 2009-09-11 11:15:31 2003-04-07 12:59:11 6 1 3 0 0 83 0 160.30 60 99.79 CHANGED MGSKLYhIpGsSuuQlslKpsL+lucKllLuIVLSALGLTsToTIALSISISVEQuVLc-C.cTYhusssohaSsosspsTTTsoATTT+DhRGLQTTRTRKhESCuaVQIuYGDMHDRSssVLGGlDCLGLLALCESGPICQRDsps-DsshCRCTlcu+uVSCCKcPKousTTSpTTScPscs.osPsaPSQcsocScPsoQGcQT....oTAspploSTsshhTpcTuTsosucPQspPsPSppG.osos+csuSTsSpcooT.sGsupcHTQRh+TPPos-NsRoshppsTP..TT.hacTt+PTP+PTs-hpsssp.spoSPsulQuNPTTQ.N.lsCcchDPscPp+ICYpVGoYNsulo+sCcI-VPLCSTYspsCMcTYYocPFNCWRRspRClCD-GsGLIEWCCTS ...............................ch.p+hlLulVLSAhGLThTsTIslolsl.VEQshLcpC.csY.utstshassppppsTos.stTssts.ttLQsststK.ESChaVQls.GDMasRS.sVL.......................................................................................................................................................................................................................................................................................................... 0 0 0 0 +5368 PF05540 Serpulina_VSP Serpulina hyodysenteriae variable surface protein Moxon SJ anon Pfam-B_7432 (release 8.0) Family This family consists of several variable surface proteins from Serpulina hyodysenteriae. 22.90 22.90 23.10 44.80 17.40 22.80 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.34 0.70 -5.69 4 43 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 5 0 26 43 0 353.80 47 99.32 CHANGED MKKhLLsslAlLTIuSuSsFGMYGDpDsWIDFLTcGNQFRARMDQLGFVLGNsTIKGTFGFRoQslsTtLGsIl.oGNstNhs.LpsTISsGIGYTS-sFGIGlGYNYTY.........hssslGsHTPVLMlNALNNNLRIAIPVQIAVp+DshsKhs.....pspKDYLGISTDhQIRYYTGIDAFNtIRLYhKYGQsGYKss.....NsspEhFAQShGFEsRhYFLNTslG.NVTINPFIKVsYNTAL..+GsushVRAu-ohhss.....stsh.s.cP............th-.KaD+NPYDVpstAVLGlTANSDhVSLYVEPSLGYpApYhGKhto-p..h....KVpHsLhWGAYAELYIpPVQDLEWYFEMDlNNusS+.......p.sulPVsFuooTGITWYLPtL ............................................................MKKhhLhhhslLohu.sSlFGMYGsp.-sWIDFLscGNQhRARMDQLGFlLGNsTIKGTFGF+.upo....h..s.lGpIL..s.....s.spssht.LtsTISuGIGYTS-sFGIGlGYNYTa..........hu.sshssHTPVLhlNALNsNLRIslPVQIuVpssshsphs.........ppsYhGlS.T.D.sQIRYY..T..GIDAFNtIRlalKYGp.saKss.............................shs....p-hhApShGF-hRhYFLNTslG.NVTlNPFl+VsYsTAL...pGh...uphlts..h-..shhps...............h.th.sstssst................................ssh.sthaD+sPYclsltssLGlTANS..DlVSLYlEPuLGYpsph.h......G+h.ss.tst.........Klp..HtLuWuAYuELYIpPVpDLEWYFEMD...VNNussp.......p.sul......PVsFsuoTGITWYLPth............................................................ 0 7 7 7 +5369 PF05541 Spheroidin Entomopoxvirus spheroidin protein Moxon SJ anon Pfam-B_7488 (release 8.0) Family Entomopoxviruses (EPVs) are large (300-400 nm) oval-shaped viruses replicating in the cytoplasm of their insect host cells. At the end of their replicative cycle EPVs virions are occluded in a highly expressed protein called spheroidin. This protein forms large (5-20 mm long) oval-shaped occlusion bodies (OBs) called spherules. The infectious cycle of EPVs begins with the ingestion by the insect host of the spherules, their dissolution by the alkaline reducing conditions of the midgut fluid and the release of virions in the midgut lumen. The infective particles first replicate in midgut epithelial cells, then pass the gut barrier to colonise the internal tissues, mainly the fat body cells. Whilst spheroidin has been demonstrated to be non-essential for viral replication, it plays an essential role in the natural biological cycle of the virus in protecting virions from adverse environmental conditions (e.g. UV degradation) and thus improving transmission efficacy. In this respect, spheroidins are functionally similar to polyhedrins of baculoviruses or cypoviruses [1]. 25.00 25.00 61.80 61.20 21.70 21.30 hmmbuild -o /dev/null HMM SEED 944 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.53 0.70 -6.94 4 11 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 11 0 1 11 0 866.40 39 92.91 CHANGED sNlPlsscpIpKlsspKYEl+hhLKD-sppF-+h.l-hVVPLYDsss.houVTlESsssslEllELDpTHhRlhl+ssshcEhsa.hsFsssVsp-pVWKYlocLLLsNluhsssKhKLsNaplsLNsKHlphpclcpsLFIhFhDD.GhYGLIT+cNI.NssL.VsKDAoaIplFPQahYhphGRclYlNEKsThDVss-ssNlsLDhpKSVNIuVS...FlsI.YElssstQKcLLKsLlp+YGcFDVYNADTGLlYAKNLsIKs.sT.VIQV-+lPVpLKVKAYhKs.sG+sLChh+ITSST.sDPEYVsSpsAhLGsL.pVYKKFc...+ShLKlhhHscsosNVhPstsLhLELsDspsYshKsSssSRLsVGlYKlsKIYlcNscspIhLcpIcscacCs+pla+EhspL.+................cps+YTscs.FpIlsNsPchslalaG.IpNlsh+sKsshNL+LWGWIlcsDsSRal+hhsDGSlDLDLshKhspsDlsLhpAl+p+YhNslILEhANsY.ssslSLGNp+FpNIFDMc.cscoIspYTNFTKsRQDLNNhuClLGINIGspVNIpsLP..GWlss+EhcILp.Susscl+pFscuFCclsN+RFasMA+DllSLLFMCNYlNIEIsEulC-YPGYllLFARAlKVINDlLLlNGlspLAGYSISlPlcaGss-KTLPpp+pGGV-K+FKchFLKspL+-LM+Dp-FVQsPLYISTYF+ol.-sP.o-NYEKYLl-SusQSQ-lLQGLLNTpNo.DTNARVsSSVhG.asY......-.ssTuEacIu.............................sEALsKhsK.ho+.GNhGLlN+lsE....pC...s.cGh.-Nppl+sph.pp.FsCpPNNNsELIs+YGY+lhDLc+ItplhsshDs...sspcpshh.E-pt.h..s..h.h.a.....spssp....phs.ClppN.pp+ha.pcCscssoCs+......Rps........h...sGYc+sH ..........NIslplh-lpplNDupapV+Fs...-shpFstK.hshVVP.YssD....hVslEouDsNl.l.-hspT+Y..lhapsptsEhshhFoFsssVDp-plhpYlopLlhsNLu.hssK.hlhNhslhlNGhh.shhthspshaIhhaspssshhLloppsIhNpoLlVsKDsoalpl.sQ+lhsactphlYlsEpsT.DVs.sVsNlVLDL+puVsluVS...FlShshEls-ssQKcll.SLlp+hGcFDlhNADTGhVYt+NLslKpssThsh.V-p.sV+lpsKshhh.hcs+slp.hplhSs...Ds-YVsscouohthL.olYhKhhptIKSLLKIhhpDccssshh.GhshslEhpDhNshshphossoRLslGlYplchlYLsspcDpIp.phIc.........aK-YVs.sca-..............p-G+YpshssFhIhuNpPhIslalhGcIhsss.cscsshpLpLhG..lDhcsShY...as-suLsL...a+pcsssloLhcAlKh+alNsIlhcsANhh.sGsIolGN+Ka.NIhDh+tDscTlNpYsNhhhu+pshNs.uslLulslss.VNIQ-LP..sWLss+....LusSusD.l+shlpuFss.ss+caashs+sIlSLLhhsNaINhpIc-oLCcYPGhIhLFARhhKhIN-hLhl.G..phAhYSloh.scauss-phLPas+ptGl.KcFh+pahKsshpsLM+Dcsa.QsPLhI.s.hpslpps.....ssNhtp.lss.......SAsps.sLlptLLsopNspDsss+.Vl.SshGuF..........sNhu-pcIs.............................ppslSpllchhp+sGshGLl.+.s................-NpsI+ph+spspF.sp.Ns.scLIphhuap..DhpthhchhsssD-lP.ssscpshh-EDpt.hh.s..hsh.as....spssp.......s.Chpspspp+hh.spCspsssCs+.......+ps........a...sGh.hsH.............................. 0 0 1 1 +5370 PF05542 DUF760 Protein of unknown function (DUF760) Moxon SJ anon Pfam-B_7508 (release 8.0) Family This family contains several uncharacterised plant proteins. 20.20 20.20 20.60 23.20 19.60 19.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.61 0.72 -3.80 54 380 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 117 0 216 365 110 97.80 30 44.47 CHANGED ssLhpYlpphpP.-tlsplsc.............................................ssSs-lhpshcps....lpullGsL..P...................................sttFpsplpss+-pLupLlhushMoGYaLRphEpRhpL-psLp ...............LhchlpplpP..-plpplsc.............................................ssSs-lh-hhcps....lpslLGhl...P......................................spphsshlpss+ppLup....Lhsu.uhMsGYaLRphE.RhpLEcsL................... 2 48 141 188 +5371 PF05543 Peptidase_C47 Staphopain peptidase C47 Studholme DJ anon Merops Family Staphopains are one of four major families of proteinases secreted by the Gram-positive Staphylococcus aureus. These staphylococcal cysteine proteases are secreted as preproenzymes that are proteolytically cleaved to generate the mature enzyme. 21.60 21.60 21.80 24.20 21.40 21.50 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.85 4 395 2012-10-10 12:56:15 2003-04-07 12:59:11 8 3 221 9 4 118 0 172.80 60 44.73 CHANGED shpsQYVNpLKNFKIRETQGsNuWCAGYTMSALLNATYNTs+YNAEuVMRaLHPNLpGc-FQFTGLTPpEMl+aGpSQGRssQaLNRMsSYNEVDpLTpNNKGIAILGpRVESs.sGhHAGHAMAVVGNAKlNNGQ-VIlIWNPWDsGhMTQDAcSNlIPVSNGDHYpW.uSIYGY .........p..p.QY.NpLcNFKIREpQhsNuWCAGaoMuALLNAThNTspYpAcslMRhLaPplptQch.....s.u....hhP..p....pMIpa.GpoQ.GRs..ph.pths....o...YNpVDpLTKsNhGIhlLup.pVppp....Ns.HhGHAhAVVGN..AKlNs........QEh.....lIhWNPWDsth.hQDAcss..ll.lS..sccYpWYuShhGY............................... 0 2 2 4 +5372 PF05544 Pro_racemase Proline racemase Moxon SJ anon Pfam-B_7562 (release 8.0) Family This family consists of proline racemase (EC 5.1.1.4) proteins which catalyse the interconversion of L- and D-proline in bacteria [1]. This family also contains several similar eukaryotic proteins including Swiss:Q9NCP4 a sequence with B-cell mitogenic properties which has been characterised as a co-factor-independent proline racemase [2]. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.77 0.70 -5.60 14 1248 2012-10-03 03:02:41 2003-04-07 12:59:11 6 8 740 7 380 1522 561 313.20 35 95.24 CHANGED llDsHspGEssRllsGGssslsGsThhE+ppahtp-h..DtlRphlhhEPRGpshhpuslLhPPpcP-AshGhlhhEsssh.sMsGpsoIslsTsLlEsGllshp...pPtscl...L-sPuGlVcspscscsG...psppVplpNVPSFhathDstl-VsGl.GclpVDlAYGGsaaAlV-spphGhslssspAp-Lsshu.tlppAlscphth.HPcts-hstlsas.hsussspsps..suRNsVhhsstthDRSPCGTGoSARhAsLtA+GpLcsG-palpc.SlIGStFcG+l.thscluG+s............AIlPpIuG+AalTGhsphhlDPsDPaspGap ...............................lDsHssGEPsRll....h...u..G.hP..pl....G..tT.hhE+p..p.a.........h......tp.ch...........DtlRphLhhEP.RGassM.GulLs.P..Ps.......c....s....c.ADhGV.lFhcs......s......G.h.h.s.M..CGHuoIulsT.s.hl.E...p.........G..hls.s.p...........ps..t.....p.....hh..l.-.o..P.uG.l..V.ps.ph...p.h....c.su........cst...pVohp.N.V......Pu......Fh....h...p...p...sl.p....l.........-l...........s......s.........h.G......p.....lp.sD........lAaG..Gs.aY....ulV-....spp.h....G..h....c...l..ssp.s.....sppLhphuht....l+p..sl....pp....p....h..t....h.....H.Pp.t..slptlst..l.hh...ss.....s.....pptts...............ss+NsVhh............s.....s............s...t............l............DRSPCGTGT.SA+hAtLhA+GcLphG-.p.ah.pc.SI....l....G...S.h......F.....pGc......l....p..t.....s..p..l.u...s.h..............AllPplsGp.AaloGhsphhlD.s.p.DPhstGF...................................................................................................................................... 0 106 211 297 +5373 PF05545 FixQ Cbb3-type cytochrome oxidase component FixQ Moxon SJ anon Pfam-B_7570 (release 8.0) Family This family consists of several Cbb3-type cytochrome oxidase components (FixQ/CcoQ). FixQ is found in nitrogen fixing bacteria. Since nitrogen fixation is an energy-consuming process, effective symbioses depend on operation of a respiratory chain with a high affinity for O2, closely coupled to ATP production. This requirement is fulfilled by a special three-subunit terminal oxidase (cytochrome terminal oxidase cbb3), which was first identified in Bradyrhizobium japonicum as the product of the fixNOQP operon [1]. 22.80 22.80 22.90 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.19 0.72 -4.32 127 974 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 905 0 280 665 58 46.80 27 73.61 CHANGED h..............shhpuhhslhhhlhFlGllhWAa.pspp..+ppa--AAplPFc..-- ........................shhpuhhslhhhlhFlullhasa..pscp...+ppa--uAplshp.D-........... 0 64 169 223 +5374 PF05546 She9_MDM33 She9 / Mdm33 family Wood V anon Pfam-B_35269 (Release 8.0) Family Members of this family are mitochondrial inner membrane proteins with a role in inner mitochondrial membrane organisation and biogenesis [1]. 20.50 20.50 21.00 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.31 0.70 -4.78 17 144 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 138 0 112 152 5 193.70 44 44.52 CHANGED pphu..++hphhhDslQpslhsAopsLNDlTGYSuIEcLKpsIpphEpcLcps+ppl+psKttYspAIpcRSpSQREVNELLpRKcsWSPsDLERFTpLYRsDHsNpppEp-uppcLp-uEpcs-plpspLhpuILoRYHEEQIWSDKIRRsSTWGTahLMGlNllLFllhQLllEPWKR+RLVtuFE-KV+pAlct.tpppphthpphl ..............................s..hscphsphhDslQsslhsAsppLNDlTG..YSuIEpLKpplpt.Epclcps+ppl+pAKpsYpsAlspRusSQREVN-LLpRKcsWossDLER.FTpLYRsDHtNEptEtcupptLspAEpct-chtspLtpuILsRYHEEQlWSDKIRRhSTWGTauLMGlNllLFllhQlhlEPW+R+RLVpuFE-+Vppslcc.pt.........th....................... 0 32 65 98 +5375 PF05547 Peptidase_M6 Immune inhibitor A peptidase M6 Studholme DJ anon Merops Domain The insect pathogenic Gram-positive Bacillus thuringiensis secretes immune inhibitor A, a metallopeptidase, which specifically cleaves host antibacterial proteins. A homologue of immune inhibitor A, PrtV, has been identified in the Gram-negative human pathogen Vibrio cholerae [4]. 23.70 23.70 23.70 23.70 23.50 23.60 hmmbuild -o /dev/null HMM SEED 646 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.02 0.70 -6.49 5 820 2012-10-03 04:41:15 2003-04-07 12:59:11 6 51 467 0 195 739 218 459.10 29 56.97 CHANGED susQssasGsVRoDKVLVLLVEFuDhs...........HNsIsKpss.....pMYp-DYspEHYQDMLFGccsYoh.cGcslpShKQYYEcQSGGSYoVDGpVocWl+VPtsAAcYGuN.usGsDNpuP+uARDLVKEALc.......sAs-pslDLoQFDQaDRYDhNGDGNhNEPDGlIDHLMIIHAGVGEEAGGGsLG-DAIWSHRapluscshulEGTpusVs....haGGchAAaDYTIpPEDGAlGVaAHEYGHDLGLPDEYDTpYTGsGEPVuaWSlMSSGSWuGKIuGTEPTuFSupsK-FhQKslGGNWhNh.plDhsKLssspG+sssLDQosTKSsRPshV+VsLPpKoVEsIKPApGcatYYSs+GDDL+NTLST.slDLTsuTsAcFcFKuWY-IEADYDFlcVch..VopDGspThh-csGcpssssstcsss.....sGK...WIDtsYDLSsatGKKVcLpF-YlTDGGLAMcGFhlD-luLTVDGcssFSDDAEG.TSpasLsGFT+suG..T+cpsHYYllEWRNHsGoDsGLtp.........h.+aucthuYssGLVVWYsDsSYADNWVGlHPG+GFLGVVDSHPcALVhspsGclAcs...RaQltDAAFShcKTsuhplso...uTsuTasssuLuussoFDDc+sYhspQlPDuGRKlPchGLKhcVluQAcDsSsGsV ....................................................................................................t.............phhslLh-asD.......................................sth....t.............h......p...a..taapphh...as.p.................................u...p...p...h..........oh+pYapppSsspass........s....G...t.l..t......t.Whps.s.tptu...YGss.......s.s....p.........s................s....................s.......t...sp-h.lp-Alt.....................hsh..t...sh.slupaD....ctYD.susGs.pp.sDG.hlDplhllH.AGhGp...ps.G..G....G.h......t....s......s..AIWuH+......t...h....s............t................h......s..h....p..G.....sp.......th.................hhs.s.th...t.s...h..-...Y.....s......h..p.............P...........s.......u........u......hGVhsHEaGH.s.L.G.LPD.Y..D.T.............s...G...t.s..........p...s............l.....t.WolMuuGS..W.....s..G.......ph....s......s.s.t..ssshss....pt.a.p.th.....s....W..hp................h......t..t........................................................................ts.....Gp........aaSs...pu.ss.h...p.s....p...hpp......h....slst...s.s...p..A...p...h...phc...shaplE.........t.....p.......aDah.V....hs.sGt.......shhp................h....p...s......s....t..sts.........................................st........WhchphD.L..ota..tG.p.p.l.pltFpYhTD........uh......s.......h......p......GhhhDshtl...p......s...st..hh.sshc.....s......th...........h.........................G..a......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 83 136 171 +5376 PF05548 Peptidase_M11 Gametolysin peptidase M11 Studholme DJ anon Merops Family In the unicellular biflagellated alga, Chlamydomonas reinhardtii, gametolysin, a zinc-containing metallo-protease, is responsible for the degradation of the cell wall. Homologues of gametolysin have also been reported in the simple multicellular organism, Volvox. 20.60 20.60 21.40 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.17 0.70 -5.50 6 219 2012-10-03 04:41:15 2003-04-07 12:59:11 6 11 37 0 180 218 11 252.00 23 45.09 CHANGED RLLVhILDYSoC...Ga..usolTE-plRslFLGPNpDGsGGlApKYspCSYsKFuLN.sTAFhsVt.VslsCoosVT................uoCS...........WWslSp............tADsAA+All.............GlsAF..uoFoHasYVLPPGlp..CuWAGLAll.P.G+psaLpoSuYGlpRWu......TlMQEAlHNYGLWHSWRNshEYEDaSTAM......G....RGsACPNAuEhSRlGWATP...........AsGG.Gslsuushsss....GostsasLPAThlTGDsNaLRV...lPsWLsshhNuTsAKNLYluhRVsKsG....DuALsupausKVpVHEVNATMDN.uhsspahpSDR+IpFluusssho+usLsA ....................................................................................................................h...h...................................s..htthh.t.CShsph.h....t.s.......hhh....l....s.Cts.............................................ps.s........................h...h.shtt.........................h.u.c...th.s...ps................G....sh.......ss.ap...+.hhalhP...s..t...........C..s....a..s..G.....hu.l....s....G..p............s.h...h.t.h.s..s....h......p..h.s............................slhpEhhHNh.G..LhHu...........h............p............s........s................h.............EY....s...D.os...sM..........G...........pu....t.......C......N.As.ph...........phGW..hss...............h.........tls..ssth..............u.hh.thtl.sst.....s....ts.hlhl............sh............................................................................hahthR.tt.s.........................D....h............................................................................s............................................................................................ 0 70 174 179 +5377 PF05549 Allexi_40kDa Allexivirus 40kDa protein Moxon SJ anon Pfam-B_7591 (release 8.0) Family \N 21.20 21.20 21.20 22.10 21.10 21.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.85 0.70 -5.10 7 19 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 0 0 20 0 234.20 36 68.90 CHANGED pRTFFSNhshAL-ATpsLLsaVPPsRYslPssoLPLDELYGQLHALHpNSLEWLTHIsHssDpllshLNshs..u......psssLuclRsslppLsphlppltss.pphphphppsssoc.hpphpsl-TpLptLHt+lc.hsps.spssss.Pssossssss.sstsscs.LPhYQAtHPTt.CRoYGollasGss.+IPMDIhGRPASTAL+LplplsssspsTpVsYclhDDGhLLLS--lcTtHKLpH..SDsLALLHp+CPNFIYKI+scsLC ........pRsFFoNhshALsuTpsLlsaVPPsRYslPssoLPLDELaG.LHALHpNoLEWLTHIspss-plls.hssh...s......pss.Lscl+stlpsLsphlpplpps.pp...phpspp.tspp.hcplpulcspLttlp..h+lp.hspp.sps.st.Psso.spssso.sststps.LPsapApHPothCRoYGollasGhsh+IPMDlhGR.sSTAL+Lplphp.sspsTpVpaclhDsGhLLhS-plpT.H+Lp+..uDsLuLLHp+CPNFlY+h+spsLC...... 0 0 0 0 +5378 PF05550 Peptidase_C53 Pestivirus Npro endopeptidase C53 Studholme DJ, Finn RD anon Merops Domain Unique to pestiviruses, the N-terminal protein encoded by the bovine viral diarrhoea virus genome is a cysteine protease (Npro) responsible for a self-cleavage that releases the N terminus of the core protein. This unique protease is dispensable for viral replication, and its coding region can be replaced by a ubiquitin gene directly fused in frame to the core. 20.70 20.70 21.10 23.00 20.60 19.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.16 0.71 -4.79 11 688 2009-09-11 10:39:17 2003-04-07 12:59:11 6 19 89 0 1 634 0 153.70 75 17.82 CHANGED MELlcFELLYKTuKQ+PlGVhEPVYDpsGcPLFGEhScIHPQSTLKLPHcRGcA-l.TsLKsLP+KGDCRSGNppGPVSGIYIKPGPVaYQDYpGPVYHRAPLELFsEoQhCEVTKRIGRVTGSDGKLYHlYVClDGCILLKpAoRspscVLKWl+NhLDCPLWVTSC ...........................................................MELhsNELLYKTYKQKPsGVEEPVYDpsGsPLFGE+.u.sIHPQSTLKLPHcRGct-VsTNLtSLP++GDCRSGNs+GPVSGIYlKPGPlFYQDYpGPVYHRAPLEhFcEus.MCEsTKRIGRVTGSDGKLYH..IYVClDGCIl.lK.As+s.p.clL+WlhNhLsCPLWVoSC.................... 0 0 1 1 +5379 PF05551 zf-His_Me_endon Naegl_SSU_RRNA; DUF1519; Zinc-binding loop region of homing endonuclease Moxon SJ, Coggill P anon Pfam-B_7681 (release 8.0) Domain This domain [1] is the short zinc-binding loops region of a number of much longer chain homing endonucleases. Such loops are probably stabilised by the zinc and may be viewed as small but separate domains. The common structural feature of these domains is that at least three zinc ligands lie very close to each other in the sequence and are not incorporated into regular secondary structural elements. The biological roles played by these small zinc-binding domains are presently unknown [2]. 27.00 27.00 27.20 27.20 26.80 26.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.21 0.71 -4.17 11 70 2012-10-05 18:28:12 2003-04-07 12:59:11 6 2 41 18 21 82 4 122.00 30 40.93 CHANGED sCah.psspsst.psGY.plsh+ss..................tGpphYsH+l.shhAsscs.......t.h..h.tt.....uhplSHLCtNutChsPsHLllEspslNppRpsCp...tpsp.h.tst.hhpsC.H.....pPpC..lhshtthscshp ..........................................................................Ch..tsth....psGa.+hphhsp......................ttsp.YhHpl.shhAssptt...........phh.hl.psch.........uhplSHLC.........pNu.....tChpPsHLhlEs+s.NccRppCp............t+...h..ps...th.hhtsC.....H........pspC..hlshhhhsp........................................... 0 12 17 20 +5380 PF05552 TM_helix tm_helix; Conserved TM helix Yeats C anon Yeats C Family This alignment represents a conserved transmembrane helix as well as some flanking sequence. It is often found in association with Pfam:PF00924. 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.33 0.72 -4.30 143 2106 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 1303 14 507 1422 120 52.30 28 24.15 CHANGED tpslsshhspll....salP........plluAlllLllGhllucllpphl.......splLpphsh.Dptls ....................p.hss..sh.ll......sahs........NlluAllIlllGhllA+hlsshV.......s+lhtpt.pl.Dttl....................................... 0 135 297 397 +5381 PF05553 DUF761 Cotton fibre expressed protein Moxon SJ anon Pfam-B_7657 (release 8.0) Family This family consists of several plant proteins of unknown function. Three of the sequences (from Gossypium hirsutum) in this family are described as cotton fibre expressed proteins [1]. The remaining sequences, found in Arabidopsis thaliana, are uncharacterised. 19.00 19.00 19.00 19.00 18.90 18.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.82 0.72 -4.59 85 622 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 28 0 391 568 2 34.10 34 13.27 CHANGED sps-.lsp+AEcFIp+F+cph+.LQ+...........pShp........p..hpphls.+u ......pp-.lDp+A....-cFIp+Fpcp....h+.lQ+............Sh........t.............................. 1 33 211 313 +5382 PF05554 Novirhabdo_Nv Viral hemorrhagic septicemia virus non-virion protein Moxon SJ anon Pfam-B_7684 (release 8.0) Family This family consists of several viral hemorrhagic septicemia virus non-virion (Nv) proteins. The NV protein is a nonstructural protein absent from mature virions although it is present in infected cells. The function of this protein is unknown [1]. 25.00 25.00 228.90 228.80 20.30 17.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.54 0.71 -4.58 3 37 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 3 0 0 29 0 122.00 86 99.65 CHANGED MTTQSAHSTTSFSPLVLREMITHRLTFDPSNYLNCDLDRSDISsTDFFETTLPRILcDLRASTRLPYLHVLDMRISLLERTHYMFRNVPSSPATTGRhSDPELlIISHAEMtlLTsGSESTS MATQPALSTTSFSPLVLREMITHRLKFDPSNYLNCDLDRSDISThDFFETTLPRIL-DLRASTRLPYLHVLDMRISLLERTHYMFRNVPSSPATTGRLTDPGLlIISHAEVGlLTsGSGLTS. 0 0 0 0 +5383 PF05555 DUF762 Coxiella burnetii protein of unknown function (DUF762) Moxon SJ anon Pfam-B_7710 (release 8.0) Family This family consists several of several uncharacterised proteins from the bacterium Coxiella burnetii. Coxiella burnetii is the causative agent of the Q fever disease. 25.00 25.00 202.60 202.40 21.30 20.60 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.65 0.70 -5.14 3 27 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 7 0 3 18 0 237.60 53 99.52 CHANGED M..saFhh.PKIDaQ-+......oNSQss.EcTLQEWLRGA..NssEcAslFIuSspppphPIhu-pp.sSuusLPKLcIc-IcsSIKhVcNpFKKaGLKDEsIlNYIL+aGGINGYsSLGpssLcclsplpGsspcsEN+.........+pTYhV+uKscVpYIEpFcllpls.hDRNsIGcF..luEVK.sollSKsG-IIHsCKKVplts.sApLchFKs+FGsQLphVETI+++LhElLsSLYpRlsplFN........EspNccsRssKpRhPGhu .........l.cIDhpph......psSpss.EcsLpEWLRGA..ss.EcAplFIsss.....PIhupp..sSuusL.KlcI.-hhssIh...s.FKh.GLKsE.IlNYILpaGGIsGYhoLGpsshcclshspGpspcsEN+.........+hoYhV+sKscVpYIEpFchhphs.hDRNpIGcF..lupVK.sollSKsG-I.HsCKKVplts.stpLp.F+p+FGsQLphVpph+h+LhElhsSLhpR....Fp........tthNctsRssppphPhhp. 0 3 3 3 +5384 PF05556 Calsarcin Calcineurin-binding protein (Calsarcin) Moxon SJ anon Pfam-B_7783 (release 8.0) Family This family consists of several mammalian calcineurin-binding proteins. The calcium- and calmodulin-dependent protein phosphatase calcineurin has been implicated in the transduction of signals that control the hypertrophy of cardiac muscle and slow fibre gene expression in skeletal muscle. Calsarcin-1 and calsarcin-2 are expressed in developing cardiac and skeletal muscle during embryogenesis, but calsarcin-1 is expressed specifically in adult cardiac and slow-twitch skeletal muscle, whereas calsarcin-2 is restricted to fast skeletal muscle. Calsarcins represent a novel family of sarcomeric proteins that link calcineurin with the contractile apparatus, thereby potentially coupling muscle activity to calcineurin activation [1]. Calsarcin-3, is expressed specifically in skeletal muscle and is enriched in fast-twitch muscle fibres. Like calsarcin-1 and calsarcin-2, calsarcin-3 interacts with calcineurin, and the Z-disc proteins alpha-actinin, gamma-filamin, and telethonin [2]. 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.97 0.70 -5.17 14 220 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 47 0 93 173 0 230.20 35 93.44 CHANGED Ms.tst..spp++ppustIhp-lst.stp-.t.......pLDLGKKlSlP+DlMLEELSLhsNRGS+hFKhRQ+RVEKFhaEs.......hsspspsphpphsss.ssp...............tph....ust...s.s...sst.ht.....sPpsluPG..usPhKtlP...............................................scKhppsslsKoYhSPW-cAhss-.-hltshtsphPtP.ttp.p.scYKSFNRsAhPFGGac+As+hhshphPc.hp.s...s..hssh.pslssRPSFNRTPhGWhspt....h............lshssEo--L ....................................s.pp+t.s.tlhh-lp................thsLGKKlSlP+DlMLEELSLhoNRGS+hFchRQ+Rs-KahaEs.............hpsps..h.s.....p..h......p..p.hs..ssp................................th......ut...t.......s.s.....ssss.tt..........sspshuPG..usshpthP...............................................scchppsslsKsYhSPWc..pAh.usc.phhtsht.ch.t....p.phscY+SFNRsAhPFGGhppusph..hp..hphPc........s...h.h.p.lstRPSFNRss.GWlsp............................................................................................................ 0 4 15 40 +5385 PF05557 MAD Mitotic checkpoint protein Moxon SJ anon Pfam-B_7761 (release 8.0) Family This family consists of several eukaryotic mitotic checkpoint (Mitotic arrest deficient or MAD) proteins. The mitotic spindle checkpoint monitors proper attachment of the bipolar spindle to the kinetochores of aligned sister chromatids and causes a cell cycle arrest in prometaphase when failures occur. Multiple components of the mitotic spindle checkpoint have been identified in yeast and higher eukaryotes. In S.cerevisiae, the existence of a Mad1-dependent complex containing Mad2, Mad3, Bub3 and Cdc20 has been demonstrated [1]. 30.20 30.20 30.50 30.50 29.30 30.10 hmmbuild -o /dev/null HMM SEED 722 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.47 0.70 -13.42 0.70 -6.44 5 348 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 251 6 232 343 4 476.80 20 85.32 CHANGED DDlcssTTshhst.ts.SplRSths+FLussLcuot.sLusSss...............suuSLpKQsppShsppc.........cAEpI+oKupLIQlEpEltptELcHKRAplELE+cuossA-pYE+Esc+NpELps+lKsLcEpEsshcschpEspEct+th+pKl.........DtsopKLppEKpDptp-A+-sluslsuclSEhQhpA.st-splpsLEoEhp-L+EQLEppp++h.......sEu-cKlQuLpsttsppu-pss+IKcLEpcLpphEs-stlVKo.+pcLhplPcLE+ElppL+-ENc+L+oh+csstLLcEElp-Lco+LERtEch+-clssLELEpEKLpsELpSWcsLtQshsL..sLsTP-DlSp+lstLQpc-lpLsE+ssSlsSss+pLEsopQsLQcchppssupltEt+cKpEcp+shsRRLQ++lsLlTKERDthRAlLcSYDcEpT.ostSsph.p+L.csEDllQcVcsapuchEs...pL..c-sp--lulQKc+scsLcpElchL+pQhsss-p.h..spEtssu...LRh+l-oLEuEpuRLRpEpplLEMcht+hsLpGDYshucTKVLHhSpNPuucAcppp+sslE+LQAEs-+LKcll+tLEcsssps.sDsp.ssuSplouKElA-LKKQVESAEtKpQRLKEVFpsKIpEFRcACYpLhGY+ID.l........sssopYRLTShYAEcc-DsLlFcusuSosu.MpLLEosaStolschI-lalccpsSIPAFLSALTLELFsRpT ..............................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................t..............................................................t......t............p...........t.................t..p...........tt....h.....................t...................ht.....t...............tt................................................................................................t...p..t.hpt..tt.....hlppp..t.ptp......ht......p.................th...t.php...h.tph..t.h........h.t....t........s............................s....s...............t...l................p.......h.............t...p.p.h.h.htp.st.hp........p.......htt.......hp....ptht...ph...t...p.h.........t....t.h.p.........p....................t......t............t......................p..t...........h..t...cl...ppph.lh.cEhchh.+....t...ltt..h...p.t......p....t........t.......t...................................tt....p.hpt...........t............................h.....p.h.....t.....h........p..t......t............t....t......tt.............t.t....................h..p.tplptp..........pthpp..pht........hp...p...l..t..p..............................t...........tph+lLp.hp.sPh...t.....t....p.php.LptE...tp.L.h..t.l.......t..........t..........t..........................t.h..............h......................................t....................h.....tph.c.tp...l..t...p...h...p....h+..pR......LK...pla...pt+..p-F+..csshtlh.........Gaplc.h...........hsp........s...p.h+lpSha.......................t.....p......p...p.............l...h....p..h.........t.....t...s................h.......................................t...h.........ht..l.t....t.phPshh.ush.shp.h...................................................................... 0 80 130 193 +5386 PF05558 DREPP DREPP plasma membrane polypeptide Moxon SJ anon Pfam-B_7798 (release 8.0) Family This family contains several plant plasma membrane proteins termed DREPPs as they are developmentally regulated plasma membrane polypeptides [1]. 23.20 23.20 27.30 25.70 23.10 23.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.45 0.70 -4.47 7 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 20 0 18 49 0 185.70 48 97.38 CHANGED MuYWKoKVLPKIKKlF.-KsG.sKKAAAAEhpKoFD-uKEthsKEFE-KKsELQPKVlEIYEAussEIKsLVKE..c.uGlKKpostVpKFl-ELsKIEFPGuKAVSEAsuKhGPuhlSGPlhalhEKVSTFls...pEpK.cEts............sAspspsptps........tscEK-lVlE.EcKKEEtAsPs.........ts.ss...-.t.tcc.spsssAsA..sEP.Ks .......MuYWKoKVLPKlKKlF.-Ksu.sKKA.AA.AEhhKoFDEuKEplsKEhEEKKTELpPKVVElYEAussElK.sLlK-..Ksuul.KKNSsuVpKFL-ELsKI-FPGuKsVSEAsuKhGsuhluGslhFlhEKVusFlP....cE.K..tcE...s..............sA.sptpptp.s................tspEKc.hsE..EtKccEtsss.s.................ttts.ss.s.s-tt.....tc...tt..tssss....stP.K............... 0 2 8 13 +5387 PF05559 DUF763 Protein of unknown function (DUF763) Moxon SJ anon Pfam-B_7805 (release 8.0) Family This family consists of several uncharacterised bacterial and archaeal proteins of unknown function. 20.00 20.00 22.00 63.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.11 0.70 -5.48 30 177 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 170 0 106 193 49 316.90 44 82.83 CHANGED GsA-LPLHsG+VP.WLhpRMp+LupsIs-lllcEYGscclLcRLucPhWFQuFusllGMDWcSSGoTTsshGsLKcsLs..sc-.lGlhVsGGKG+putpTPcELptlu-+hsLDu...ppLspsSRLsAKVDssslQDGapLYpHsFllo-cGcWuVlQQGMNscp+hARRYHWhust..psFsppPHsuIsG.hppstlLNlss+cuccsRcshl-LlpE.sPs+.lhpphpphhshhp............................h.h.tt+.lhtpslshchhtpsLptshEhsPpcFc-lLhlpGlGPpTlRALuLVAElIYGsPssapDPs.......+auaAhGGKDGhPaPV ................G.ADLPLHsG+VP.WLhpRMp+LuthIschllccYGtcclLcRLucPhWFQuFusVhGMDWcSSGhTTsshGALKcuLs...sp-lGlhVsGGKG+pSRpTPpELhtlu-phulDu...ppLspsSRLsAKVDssAlQDGapLYhHuFlloccGcWsVlQQGMNscp+hARRYHWhup......th.ps..FlppPHsuIsG..hppsp.ll...NLss+cuttsRps.l-Llp-.sPs+.lhpph.ppltsh.t.......................................................................thhhPsc+plhtp...s...lsh+c.lt..tsLtts.hEtsPpsFc-LLhl.GlGs+TlRALALVAEllaGsPspapDPs.............+FSaAhGGKDGhPaPV.......... 0 43 68 83 +5388 PF05560 Bt_P21 Bacillus thuringiensis P21 molecular chaperone protein Moxon SJ anon Pfam-B_7820 (release 8.0) Family This family contains several Bacillus thuringiensis P21 proteins. These proteins are thought to be molecular chaperones and have mosquitocidal properties [1,2]. 25.00 25.00 367.20 367.10 23.80 17.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.22 0.71 -4.90 2 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 0 0 11 0 182.00 93 99.96 CHANGED .sENtsFYKIFThcNNNhChNsTLLE+lFKNNl-EFDFSLVKpNLEHEKNCVITSTMNQTI.FENMNSpEMGpKsYSFhNQTVLNNKGNoSLEEQlSsIF.RCVYMpstKSSSYIK.LEQD.NplchhsSLlFIsPY+pNlh.IhPVsLpLTLlsKNVKpsS.pNlFSGDhHFNMVTMTaLT MTENGVFYKIFTTENNNFCINPTLLERVFKNNLDEFDFSLVKKNLEHEKNCVITSTMNQTISFENMNSTEMGHKTYSFLNQTVLNNKGNSSLEEQVSNIFYRCVYMEVGKSSSYIKPLEQDSNKIRYVCSLLFIVPYKNNITSIIPVNLQLTLLSKNVKQSSSTNIFSGDIHFNMVTMTYLT 0 0 0 0 +5389 PF05561 DUF764 Borrelia burgdorferi protein of unknown function (DUF764) Moxon SJ anon Pfam-B_7823 (release 8.0) Family This family consists of proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 58.90 58.80 21.90 16.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.12 0.71 -4.85 2 127 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 28 0 12 99 0 175.50 65 97.09 CHANGED MIlsLsp.lpaLI+IhpsFKhYh..pphEh-IlNTYNHPYLpKhsTsosNllsLK.-uhEtLhs+s.+sts.hcph.EFplpFplYhlshVL.pt.hDu.pphhhlYthh.-FLHpphaKaphppp.ps-.h.hlsaYlh.hSNhpssGLlslushauNhsaShs.hF...VtsIplLKpE+ ...MIhTLDhlLNHLhpIFKGFKAYATENNFECDIINTYNHPYLSKITsuSSNIIALKFDGTEsLFDHNsRuGsFYENALEFSlNFQIYIIAIVLNApDFDANSRMLhLYuMLS-FLHN+.s.HKYTLtpp...QP-YlsKINFYIYPhSNMQTVGLINLGTKYSNHAYSASlAFNASVKsIEILKEE.h...... 0 8 8 8 +5390 PF05562 WCOR413 Cold acclimation protein WCOR413 Moxon SJ anon Pfam-B_7803 (release 8.0) Family This family consists of several WCOR413-like plant cold acclimation proteins. 25.00 25.00 45.40 25.20 24.60 23.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.16 0.71 -4.76 15 123 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 37 0 50 124 0 164.80 42 87.45 CHANGED LsMKo................stsussllsSDhcELssAA......+KLAsH...AlpL.....uuLGFGso...hLpWlAshAAlYLLlLDRTNW+TNhLTuLLVPYIFhoLPollFshlRG-lGpWIAFlAlllRLFFP++FP-a....LELPuuLILLlVVAPslhAsphRso..hlGssIsLsIuCYLLpEHI+uSGGF.+sAFs+usGVSNolGIllLh .....................................................l.tchpthhhus......cphusp....shhh.....suh.shuss...hLpWlushAAlhLLllccstW+..TshhsuLLVPYlhhphPsslFshhRG-hGp..WlAFlAlllRLFFsppFPs.....LELPsu..hlLLllVAPphhssh.Rss..h..hGshlsLhIusYLl.pHlptuGGh.+puFspupslussluIhll.................... 0 11 35 43 +5391 PF05563 SpvD Sal_SpvD; Salmonella plasmid virulence protein SpvD Moxon SJ anon Pfam-B_7864 (release 8.0) Family This family consists of several SpvD plasmid virulence proteins from different Salmonella species. 25.00 25.00 150.00 149.90 18.60 17.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.49 0.70 -4.83 3 25 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 15 0 1 12 0 202.20 98 100.00 CHANGED MRVSGSASSQDIISRINSKNINNNDSNEVKRIKDALCIESKERILYPQNLSRDNLKQMARYVNNTYIHYSGNCVLLSACLHYNIHHRQDILSSKNTASPTVGLDSAIVDKIIFGHELNQSYCLNSIDEVEKEILNRYDIKRESSFIISAENYIAPIIGECRHDFNAVVICEYDKKPYVQFIDSWKTSNILPSLQEIKKHFSSSGEFYVRAYDEKHD MRVSGSASSQDIISRINSKNINNNDSNEVKRIKDALCIESKERILYPQNLSRDNLKQMARYVNNTYVHYSGNCVLLSACLHYNIHHRQDILSSKNTASPTVGLDSAIVDKIIFGHELNQSYCLNSIDEVEKEILNRYDIKRESSFIISAENYIAPIIGECRHDFNAVVICEYDKKPYVQFIDSWKTSNILPSLQEIKKHFSSSGEFYVRAYDEKHD 0 0 0 1 +5392 PF05564 Auxin_repressed Dormancy/auxin associated protein Moxon SJ anon Pfam-B_7941 (release 8.0) Family This family contains several plant dormancy-associated and auxin-repressed proteins the function of which are poorly understood [1]. 25.00 25.00 25.70 25.00 24.00 22.40 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.94 0.71 -3.50 21 211 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 59 188 1 110.90 35 91.34 CHANGED LWDDlVAGPpP-p.GLGKLR.+hosps..lslpt.s-up................sus..hpRSlohss..................sPsoPsTP..oTPs..oP.ouR.p.-NVWRSVFpPGSN.uT+shGuphFDKPspPNSPTVYDWLYSs-TRS+HR .......................................................LWDDsVAGPpP-p.GLGKLRKh.ohps.....shp...stut..........................tps.thp+Slsh.p........................sPsSPu.ss....ooPs........oP..h....o..sc....c.pps.....WRshh.pss..p..t.tpt.t.usph..pp.s...tPpoPTVYDWh.......................................................................... 0 13 38 49 +5393 PF05565 Sipho_Gp157 Siphovirus Gp157 Moxon SJ anon Pfam-B_7948 (release 8.0) Family This family contains both viral and bacterial proteins which are related to the Gp157 protein of the Streptococcus thermophilus SFi bacteriophages. It is thought that bacteria possessing the gene coding for this protein have an increased resistance to the bacteriophage[1]. 28.60 28.60 28.90 28.60 27.50 28.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.88 0.71 -4.66 29 450 2009-11-04 11:20:49 2003-04-07 12:59:11 6 1 390 0 49 359 53 153.40 29 97.10 CHANGED hpLYELsspatplh.phhpp...hDs-tltDTL-u..lppsh-sKs-shsplI+sl.......-uDscslKtEtcRLt-++Kuh-sclcpLKsYLtptMptsshc+l+s..shholulpKstsuVpl..--stlPscYh.....ts.KlDKpsltc.....sLKsGcclsGApLcpsc.sLpIR .....................................pLY-Lsspatpl.................p...........h-s...-..h....l....t..D..T.L-u.......lpsphcs....Ks-shsphl+sh.......puchc.hhcpEh+RLp....p....++.c...shpspscpLKsY.......LtptMpts......s..hc.....+l.cs....shhplsl.p.Ks.s..S..l...pl.....hD....E.........sh.l....Ptcah.........ppp.+hDKptltc.....sL.K.s.GpclsG.AcLhp.sc..sLhI+........................................ 0 14 28 36 +5394 PF05566 Pox_vIL-18BP Orthopoxvirus interleukin 18 binding protein Moxon SJ anon Pfam-B_7955 (release 8.0) Family Interleukin-18 (IL-18) is a proinflammatory cytokine that plays a key role in the activation of natural killer and T helper 1 cell responses principally by inducing interferon-gamma (IFN-gamma). Several poxvirus genes encode proteins with sequence similarity to IL-18BPs. It has been shown that vaccinia, ectromelia and cowpox viruses secrete from infected cells a soluble IL-18BP (vIL-18BP) that may modulate the host antiviral response. The expression of vIL-18BPs by distinct poxvirus genera that cause local or general viral dissemination, or persistent or acute infections in the host, emphasises the importance of IL-18 in response to viral infections [1]. 21.60 21.60 22.30 22.20 20.80 21.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.74 0.71 -4.12 2 55 2009-09-10 15:00:19 2003-04-07 12:59:11 7 1 18 1 0 49 0 123.30 86 99.62 CHANGED MRILFLIAFMYGCVHsYVNAsEhKCPNLsIVTSSGEFhCoGCVcaMPpFSYMYWLAKDM+SDE.sKFIEHLG-GIKEDETlpThDGtIsTLpKVLHVTDTNKFspYRFTCVLTTlsGVSKKNIWLK ..MRILFLIAFMYGCVHSYVNAVETKCPNLDI...VTSSGEF+CSGCVEHMPcFSYMYWLAKDMKSDEDTKFIEHLGD.GIKEDETVRTpDGsIsTLpKVLHVTDTNKFAHYRFTCVLTTlDGVSKKNIWLK........ 0 0 0 0 +5395 PF05567 Neisseria_PilC Neisseria PilC beta-propeller domain Moxon SJ, Bateman A anon Pfam-B_7966 (release 8.0) Domain This family consists of several PilC protein sequences from Neisseria gonorrhoeae and N. meningitidis. PilC is a phase-variable protein associated with pilus-mediated adherence of pathogenic Neisseria to target cells [1].\ This domain has been shown to adopt a beta-propeller structure [2]. 21.10 21.10 21.70 21.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.18 0.70 -5.40 9 608 2012-10-05 17:30:42 2003-04-07 12:59:11 6 13 415 2 192 644 232 326.40 25 29.43 CHANGED LGDIVNSPIVAVGt.....YLA........................TSANDGMVHIFKps.GuDcRuYNLKLSYIPGTMPRKDIpN.....p-STLAKELRsFAEKGYVG..DRYGVDGGFVLR+l..-.stQcHhFMFGAMGhGGRGAYALDLoKhDuN.P..ssssLF.....DVKcs...spNGpNRVc..LGYTVGTPQIGKTHNGKYAAFLASGYAoKc.IsSssNKTALYVYDLEs.sGT..lI+KIEVPGGKG...GLSSPTLVDKDLDGTVDIAYAGDRGGNMYRFDL.....SsssPsp.........WSVRTIFcGT..KP.............ITSAPAlS+LKDKR..VVIFGTGSDLSEDD..Vsss-tQaIYGIFDsDT..GT..sospsGpGsGLLEQsL..spEsKTLFLoshK ...........................................................................................................................................hGDllpS.........h..hs..s.....................hhh...........................................suANDGMlHhF.................s.....s......s.............s.......t.....................p............p....t.hu.....al.......P..................................p...sl.ht....p.....L....t......t..h...s....p..t....s......a.......t...........cp..Y.hV....D.G...s........s..h..............t....c..s................th.s.s................s..........h+.....shlhGuhstG...............G....+.............u..............h..............aAL....DlT...........t...................s..s...........s...s.h...t...hh....................................p.h.p...s.s.................t....t.....s...s..p.......LG..oh.u...p.P...............l........s........+.............h.............p............s......G...........p...........a....s..........s.......l.h..GsG......Y..........s.........s.......t..s.............................................................s....p......s..........u..........L............al...l...c........h.................p............s.....................Gs..............h...........l........t......p....l.........s....s....s..s...u....ps................G.L.....u...s....s..........s...l..l.....D.......p.............s....s..D.G...h.s.....D......h.....s...Y.......A...G........D.......h.....t.......Gsl.......WRF.D.l...............ss...s...ss.ss.................................h.s...h........t.................h.....t...s.........pP........................................Io.st...P....l............h............s...t..h.................h.l.h.h.GT.....G....p.........h....t..t.tD..........h...s..p...t.p.....thYul...h..........D..t.tt......................................................................ht............................................................................................................ 0 41 123 162 +5396 PF05568 ASFV_J13L African swine fever virus J13L protein Moxon SJ anon Pfam-B_7998 (release 8.0) Family This family consists of several African swine fever virus J13L proteins. 20.80 20.80 20.80 23.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.41 0.71 -4.76 2 103 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 12 0 1 95 2 169.90 80 100.57 CHANGED MDSEFFQPVYPRHYGECLSssssPSFFSTHMYTILIAIVVLIIIIIVLIYLFSSRKKKAAAAIEEEDIQFINPYQDQQWutVTPQPGhuKPAGAoTuSAGKPVhsRPsTN.....+PsTN+PVhDp.sMAsGGPtAASAsA........aPAE.YTTsTTQNTASQTMsA.ENLRQRsTYTHKDLENSL .............MDSEFFQPVYPRHYGECLSPVosPSFFSTHMYTILIAIVVLlIIIIVLIYLFSSRKKKAA.A.AIEEEDIQFINPYQDQQWAEVTPQPGTSKPAGATTuSs....GKPV.TGR......P.....ATNRPss.....s+PVT.....sN.....PV..........TDR.....LVMATGGPAA........AsAAA..................sAHPsEPYTTVTTQNTASQTMSAIENLRQRsTYTHKDLENSL................... 0 0 0 1 +5397 PF05569 Peptidase_M56 BlaR1 peptidase M56 Studholme DJ anon Merops Domain Production of beta-Lactamase and penicillin-binding protein 2a (which mediate staphylococcal resistance to beta-lactam antibiotics) is regulated by a signal-transducing integral membrane protein and a transcriptional repressor. The signal transducer is a fusion protein with penicillin-binding and zinc metalloprotease domains. The signal for protein expression is transmitted by site-specific proteolytic cleavage of both the transducer, which auto-activates, and the repressor, which is inactivated, unblocking gene transcription. Homologues to this peptidase domain, which corresponds to Merops family M56, are also found in a number of other bacterial genome sequences. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.98 0.70 -5.45 12 1807 2012-10-03 04:41:15 2003-04-07 12:59:11 6 87 857 0 421 2510 322 241.60 19 47.77 CHANGED hhlhthslssshslLllhllRhhl++hhushhsYtlWhlV.lthlhshh...............tsshthshspsPhstsssp.............shstsh.thshtslh.hL.hllWlsGsll.....htuhhhh+pp.hl+phs.hps....phLttstcpht...............sPhlhGhh+PpIllPssh..phsscEhchIlhHEhsHl+RtDhhhNhlsshhpslhWFNPllaluh+thchDpElACDtsVLst.ppcpR+pYucslLpshhusss..sshssph...sppsL+cRlhhl .....................................................................................................................................................................h...........................................................................................................................................................................................................................................................................................................................................h.................h...h....h....h........l.W.....l.h...h...h.....h..h..h.....h..h...h..h...h....s..hh.......h...t..................h......h..t....................h...........t....................t..hh............t........h..................p...ht..........................h..........l...h.........s....t....t....l.....t...s...P.....h..h......h.......u....h......h......c....s....h....l.l..l.P....p.t.............ph.....s..p.p.c.l....c.hIlhHEh......sHh.+.p.+D........h.l.hhh..l.....h.t.l....h..t.....h.l.h........W.F..NPh...l..a.hhh...pphppspEhtsD.ctVl........p..p.........t........p....p.tt....p.Yupsll....p....h..s...h..t.t...s.h..........................h..h...s....t..................tpp.lKcRlh............................................................................... 0 214 340 394 +5398 PF05570 DUF765 Circovirus protein of unknown function (DUF765) Moxon SJ anon Pfam-B_8063 (release 8.0) Family This family consists of several short (27-30aa) porcine and bovine circovirus ORF6 proteins of unknown function. 25.00 25.00 63.70 63.60 17.10 16.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.94 0.72 -7.27 0.72 -3.98 3 13 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 0 10 0 28.70 94 99.73 CHANGED MASSTPASPAPSDILSSlPQSERPPGRWT MASSTPASPAPSDILSRLPQSERPPGRWT 0 0 0 0 +5399 PF05571 DUF766 Protein of unknown function (DUF766) Moxon SJ anon Pfam-B_8021 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 25.00 25.00 26.10 25.70 24.00 24.00 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.29 0.70 -5.13 5 128 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 91 0 77 117 0 262.90 52 91.77 CHANGED ChGLYCGRTLLtsNSSp-h.....YS-CGACPRGpRoNsQphCsPCp-sLptYDWLYLGFMAMLPLllHhFFI-hstKs.pKpS+uplhpalSAllEsslAAllTlLlo-PlasL+IpSCcVphLSDWYThhYNPSPsYpTTV+CTQEAVYPLYTIVFVaYhFCLVhMhLLRPlLVsKI...LsVus+hK.ulYuALYFFPlLTlLHAVuGGLIYYuFPYIlLVlSLVuhAlHhSh.Kl-pohKsLl++.shp.++lllLhuHWLLLAYGllSL.ps.plcYDluLLs..LVPsPsLFYlFTlKFT-Pu .....C.GhYCG+sh..L.pss...............au-CG...sCPRGpRs.....Ns.pphCpPCs-sPphYDWLYLGFMAhLPLlLHWF..F.I-h...........hs...tK..pSpssLhpHloAlhEsshAAllTLLls-PlGsLhlpSCcVhh...LSDWYThLYNP.SPcYhsTl........HCTpEAVYPLYTIVFla..YAFCL.V.hMhLlR.P..lLl.pKl........Ls.+ss+....hK....SIYAALYFaPILTllpAVuGGLlYYuFPYIllllSllohAla.hSh..clcp..shc.............Lltp.........Kpl.llLhuHWLLaAYGIlSls..............php..p..hp....c...hshLs..LVPhPuLFYlhTs+FT-Ps................................................................ 0 27 32 56 +5400 PF05572 Peptidase_M43 Peptidase_M46; Pregnancy-associated plasma protein-A Studholme DJ anon Merops Family Pregnancy-associated plasma protein A (PAPP-A) is a metallo-protease belonging to Merops family M43. It cleaves insulin-like growth factor (IGF) binding protein-4 (IGFBP-4), causing a dramatic reduction in its affinity for IGF-I and -II. Through this mechanism, PAPP-A is a regulator of IGF bioactivity in several systems, including the human ovary and the cardiovascular system. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.62 5 563 2012-10-03 04:41:15 2003-04-07 12:59:11 8 39 269 10 378 715 221 143.40 28 22.67 CHANGED sspscsItsatW...DNpKYMNVaIQs-L.ssGuTsNSGsAWYPcoGMos-slARVsFNGtYLus...ssoSosFuuoLTHEFGHFLGLcHTFcGG....CccGsup......DcssDTPspsutch....usspslhN......CsG-hlNspNaMDYNs...CpsMFTQsQVsRMsssL- ................................................................................ttshth..............................................................................................................................................................................................................h.........h.....................h.......................h..........s.....h.....u.....p.....Ths..HElGHaLGLaHs...F..ps.s...............Cs.s......ht.......................Dh.l..sD.T...P.sp...s...p..s..p...s...........ss..s..t.s.h..ss...........................C..s..s...t......s....h............h.......p....NaMD..YosD.....sC.h.....s..p.F..TssQhsRMpt...t.......................................................... 1 151 254 320 +5401 PF05573 NosL NosL Moxon SJ anon Pfam-B_8116 (release 8.0) Family NosL is one of the accessory proteins of the nos (nitrous oxide reductase) gene cluster. NosL is a monomeric protein of 18,540 MW that specifically and stoichiometrically binds Cu(I). The copper ion in NosL is ligated by a Cys residue, and one Met and one His are thought to serve as the other ligands. It is possible that NosL is a copper chaperone involved in metallo-centre assembly [1]. 20.20 20.20 20.20 20.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.84 0.71 -4.50 50 701 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 544 2 243 576 51 139.30 25 73.14 CHANGED hhhLuuCscppss..t.s.sshplsspspC+hCGMhls-aPGPKuQlhhput..ps....haFsss+Dhauahh.....pPEps.+plpAlaVpDMup..ssWppPss....ppaIDAc......pAaYVhGSsppGuMG.splssFuscssAptFAsca.GGpVlpFc-Is.shl ........................................................................h...h.uCt.tttt.......s.htl.p.p.p.s.hC.t.hCsMsl..h..-..hs..t.tuplh..h....pss....cs...........hhFss.ht.shhtahh......................s..c....p.........s......c....p...h.ptlaV.pDhss..............................tpaI-Ac..........cAhYV..hsos.....h.h...u....s....M...G..shlsFusc.p.sAcpFspp...p....G...G+......lls.ac-ls...................................... 0 66 185 221 +5403 PF05575 V_cholerae_RfbT Vibrio cholerae RfbT protein Moxon SJ anon Pfam-B_8029 (release 8.0) Family This family consists of several RfbT proteins from Vibrio cholerae. It has been found that genetic alteration of the rfbT gene is responsible for serotype conversion of Vibrio cholerae O1 [1] and determines the difference between the Ogawa and Inaba serotypes, in that the presence of rfbT is sufficient for Inaba-to-Ogawa serotype conversion [2]. 27.90 27.90 27.90 28.00 27.80 27.80 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.81 0.70 -5.35 2 55 2012-10-10 17:06:42 2003-04-07 12:59:11 6 1 34 0 2 97 69 238.30 95 99.24 CHANGED MKHLIKNYVQKLIKTELDAIQSKSVHDNRNFIYNGEFLILESEFGhHCFPRVQLNHALSYKNPNFDLGMRHWIVNHCKHDTTYIDIGANVGTFCGIAARHITQGKIIAIEPLTEMENSIRMNVQLNNPLVEFHHFGCAIGENEGENIFEVYEFDNRVSSLYFpKNTDIADKVKNSQVLVRKLSSLDISPTNSVVIKIDAEGAEIEILNQIYEFTEKHNGIEYYICFEFAMGHIQRSNRTFDEIFNIINSKFGSKAYFIHPLSSAEHPEFNKATQDINGNICFKYVS ..................................................MKHLIKNYVQKLIKTELDAIQSKSVHDNRNFIYNGEFLILE...SEFGWHC.FPRVQLN.HALSYKNPNFDLGMRHW.....IVNHCKHDT..TYIDIGANVGTFC.GIAA.R.HI.T.Q.G.KIIAIEPLTEMENSIRMNVQLN..N..P.LVEFHHFGCAIGENEGENIF....EV.....YE...FDNR......VS...SL.YFQK.NTDIA.....DKV...K.N.S.QV.LV..R..KL.SSL..D..I.......S..P..TNS..V..VIKIDAEGAEIEILNQIYEFTEKHNGIEYYICFE....F...AMGHIQRSNRTFDEIFNIINSKFGSKAYFIHPLSSAEHPEFNKA.TQDINGNICFKYVS................................................................... 0 2 2 2 +5404 PF05576 Peptidase_S37 PS-10 peptidase S37 Studholme DJ anon Merops Family These serine proteases have been found in Streptomyces species. 19.50 19.50 19.50 19.50 19.40 19.30 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.67 0.70 -6.03 2 98 2012-10-03 11:45:05 2003-04-07 12:59:11 6 1 87 0 31 179 5 421.20 41 91.66 CHANGED AEPKAVDIKDRLLSIPGMSLIEEKPYTGYRFFVLNYsQPVDHRHPSKGTFQQRITVLHKDVNRPTVFYTGGYNVSTNPSRREPTQIVDGNQVSMEYRYFTPSRPAPADWSKLDIWQAASDQHRIFKALKPLYS+NWISTGGSKGGMTATYYERFYPRDMDGVVAYVAPNDVVNKEDSAYDRFFARVGTDECRDKLNGVQREALVRRAPLEKKYAAYAAENGYTFDTIGSLDRAYEAVVLDYVWGFWQYSTLADCADIPADAKNATDDAIWGSVDAISGFSAYTDQGLETYTPYYYQAGTQLGAPTIHFPHIEKKYIRYGYQPPRNFVPRSIPMKFEPWAMRDVDTWVRHNARHMLFVYGENDPWGAERFRLGHGARDSYVLTAPGMNHGANVAGLVPDQKARATARILDWAGVAPAKVQENPSAARPLATFDARLDQRDVEREPALRP ................................................................sst......Dlpc+L.ulPGhohlc.c...h...........s..uY.R.h...aVlpa.sQPlDH...+....+....P...p.p.GoFpQR.....l...hl....h...H..........+.....s....h....s....R...P...T....V..hh.TpG.....Y.s......s....u.......h.....s...P..p....h...p........E........o.pL.....l.......s.....uN.p.....lslEYRaFs.SpP......p....P....t......D.........W....s.....h.......L...o...lh.....QuAsD......H...+..lh.p.A.h..K....p....l.Y....s...t....+.........W...luTGhSKGG.TuhaYRpFaPcDlDsoVsY.V.AP...s.h.h.s.t.EDu..ta....-.t.F.h.......p.p....VG.T............t......-..............CR...c+l..pshQ..h..EsLhR+ss.......Lls+a.cp..ausppshTFc.hlGsl-cuaEhsVL-YsFuFWQau.s.s.s.Cs.s...IPu..tsAoDcpLasalssISuh.s.hasD...p...uhtsYssaaYQAuspLGh.shch.sa.h.cth..l....c....h....Gh.p.......sR.a.l......Pcpl..s..M.c.....F-.shhpclcpWl+csup+MlFlYGpNDPWoApshphhc.G..t+.spa.VastPGusHs.AplusLsts.p+tpAhutltcW....................................................t......................................................................................................................................... 0 13 22 29 +5405 PF05577 Peptidase_S28 Serine carboxypeptidase S28 Studholme DJ anon Merops Family These serine proteases include several eukaryotic enzymes such as lysosomal Pro-X carboxypeptidase, dipeptidyl-peptidase II, and thymus-specific serine peptidase. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.45 0.70 -5.91 12 1264 2012-10-03 11:45:05 2003-04-07 12:59:11 7 20 253 9 917 1384 70 339.30 23 80.92 CHANGED QpLDpFcsssscoapQRYahNspahpss..uPlFLhl...GGEushssthltss..thhphApcaGAhVh.LEHRFYGpShPhsshost.sl+a...LoSpQALtDlApFIpshs..phphtsss.WIsFGGSYuGsLuAWhRthaPcLlhGulASSuPlhAplDFh...EYhpVVtsSlpphus....p.ChsslppuFsplcpLhtospGppsl..ppthphssshspt...s-.pDht.hatslhu.atulVQYs..hDspss.shs.hslpphCphhh......ss......oss..s....hshhh.l.pphstt...shshtpsshsh.hhss...phpssu.....hts.sRtWhaQTCoEaGaaQosss....stthFusshPsshal.chChslFGsshspp.lptslttTN.h..YGG..p.suoNVlhsNGslDPWHsLG..htssssuollshlIpusuHCsDMhsspsuDsspLpsuRphl ...................................................................................hDp..........s....................p.............tpa.....p...RY.h.h..s..t..p..a.a...p...............s............u.......P.l......hhhh..........usE...s.........s...h...t.........h....h.......t..ps..........hhh.p......hAt.....phtuhhlh...h..EH.................RaY...G....pS...h......P...........h............s..........s............s......h......t.....p..........lpa...................LospQA...............L..sDhs..hh....p...............t........h..........p.........t................p.......h..............................t..............t......s.....s..............hlhhGG...............SYuGh..............Lu...AWh...........Rhp........YPc..l..hh.................uulAo.Sus..l..............t......h.......h.......s..h...........ta...h.p..lt...psh....s...................p..C.ttl.ptsh.....p..l..p.p.h.h........t..p...........t..t................ttl...........tp.ht.....h..s..........h..p................p.........................h..................................t..h..h.......pa.............................................................ht.........h..Ct..h..........................t....................................................................h.......h............h.........................................t.............................................t.......................s....ttW..aQ.sC.s.E..h....s....h..........s..s...ss...................t...h...h...s..p....h..s.....h...p....h..h....p.C..............ph....a.............s........................................................t.....h....s..............a.G.u..............................t......s...s....p...lh....a...ss...............G..hDPW.....p........h.........u..........h..........p........t........................s.................p...............s..........h......h..h......t..s.............s.Hp....Dh....................................................................................................................................................................................................................................... 0 328 536 770 +5406 PF05578 Peptidase_S31 Pestivirus NS3 polyprotein peptidase S31 Studholme DJ anon Merops Family These serine peptidases are involved in processing of the flavivirus polyprotein. 20.50 20.50 22.60 25.00 20.40 18.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.32 0.70 -5.13 4 226 2012-10-02 13:45:52 2003-04-07 12:59:11 7 34 30 0 0 244 0 175.10 90 7.31 CHANGED GPAVCKKITpHEKCHVNIhDKLTAFFGlMPRGTTPRAPVRFPTuLLKVRRGLETGWAYTHQGGISSVDHVTsGKDLLVCDSMGRTRVVCQSNNKhTDETEYGVKTDSGCP-GARCYVLNPEAVNISGoKGAhVHLQKTGGEFTCVTASGTPAFFDLKNLKGWSGLPIFEASSGRVVGRVKVGKNE-SKPTKLMSGIQTVSKNpADLTEMVK ..GPAVCKKITEHEKCHlsIhDKLTAFFGlMPRGTTPRAPVRFPTuLLKlRRGLETGWAYTHQGGISSVDHVTsGKDLLVCDSMGRTRVVCQSNNKhTDEoEYGVKTDSGCP-GARCYVhNPEAVNISGoKGAhVHLQKTGGEFTCVTASGTPAFFDLKNLKGWSGLPIFEASSGRVVGRVKVGKNE-SKPTKlMSGIQTVSKspsDLT-MVK...... 0 0 0 0 +5407 PF05579 Peptidase_S32 Equine arteritis virus serine endopeptidase S32 Studholme DJ anon Merops Family Serine peptidases involved in processing nidovirus polyprotein. 24.70 24.70 24.70 25.10 24.60 24.20 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.86 0.70 -5.44 6 275 2012-10-02 13:45:52 2003-04-07 12:59:11 8 13 15 6 0 344 2 299.70 70 11.60 CHANGED LThlWVpFF.....LlsVshhAGVsulslLlshWhLuRhTslsGlVTPYDlHhlTuoPRGu.....uulAoAP-GTYLAAVRRuALTGRsthFlPoshGSVLEGuhRT+psutNsVsVhGSohGSGGVFTIsGpsVVVTAoHVLu.sspARVossGasppLsFKssGDYAhAcs..ssWpGsAPplchu.pshpGRAYWpssoGlE.GllGpssAFCFTsCGDSGSPVlsEDGpLlGVHTGSNKpGSGhVTTPsGcTluhuslKLSEhupHauGPtVPlusl+LPcplIsDVcuVPSDLsuLl-SlPslEG .................................................................................LTILWLVFF.....LISVNhPSGILAlVL...LVSLWLLGRYTNlAGLVTPYDIHHYTSGPRGV.....AALATAPDGTYLAAVRRAALTGRTMLFTPSQLGSLLEGAFRTpKPSLNTVNVVGSSMGSGGVFTI.DGKlKCVTA.AHVLT.GNSARVSGVGFN...QM.LDFDV..KGDFA.IADC..PNWQGsAPKsQFCpDGWTGRAYWLTSSG........V......EPGVI.........GsGFAF.C.F.TACGDSGSPVITEAGELVGVH.TGSNKQGGGIVTRPSGQFCNVcPIKLSELSEFFAGPKVPLGDVKIGSHIIKDssEVPSDLCALLAAKPELEG........................ 0 0 0 0 +5408 PF05580 Peptidase_S55 SpoIVB peptidase S55 Studholme DJ anon Merops Family The protein SpoIVB plays a key role in signalling in the final sigma-K checkpoint of Bacillus subtilis. 21.70 21.70 21.90 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.18 0.70 -4.89 6 510 2012-10-02 13:45:52 2003-04-07 12:59:11 7 3 487 0 114 438 11 195.60 43 45.24 CHANGED hpc.Ku-upY+IGLWVRDSsAGIGTLTFY-PpoKKYGALGHsIoDsDTp+llslcsGpIlcSolsSIcKGspGsPGEl+GhFss-pcsIGslppNophGIFGshppc......hsNhpspsl.Vuhps-VK.GPAcILTsIDscplcpaDIEIVphs.QcuPssKuMVIKlTDs+LLccTGGIVQGMSGSPIIQNsKllGAVTHVFVNcPssGYGlaIEWMLc- ...............................t...tptpY+lGLalRDssuGlGThTFhcspstpaGALGHsIoD..D.Tt.p.l.lpsGplhposlhuIc+GppGpPGEhhu.h.a....p.p.p.h.lGsIptNo.hGIaGphppt............t...h......pcsh.lAhpppl+pG.s.A.p.I.l.Tslcs.p.clcpF-IEIhplh.pptsusKuMl.I.+lTDtcLLpcTG..GIVQGMSGSPI.lQ.sGKllGAVTH..V..F.V.N.D..PspGYG..la..IEhMLp...................... 0 64 95 101 +5409 PF05581 MCP_N Peptidase_S38; Vibrio chemotaxis protein N terminus Studholme DJ anon Merops Domain This domain is found at the N terminus of several methyl-accepting chemotaxis proteins from Vibrio species. 21.40 21.40 22.10 21.80 21.30 21.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.26 0.72 -3.87 2 277 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 155 2 39 173 3 101.50 42 16.53 CHANGED M+FSpKIVAASSsLLLsslALLShpQh.pVR-EIcShVpDSl.EhVcGVppThtpslsu+KulAQ.sTpllp.sPp..s.s+ollppP.lKsoFLhlGhGhEp ...MKFSHKIVsASShLLLsTlALLShpQhhplRsEIcshVpsSlpEhlcGVpsTlps.lsu+KuLAppsTpllphcPs..shlcsllppPhlKsoFLhlGhGhEp........ 0 8 13 28 +5410 PF05582 Peptidase_U57 YabG peptidase U57 Studholme DJ anon Merops Family YabG is a protease involved in the proteolysis and maturation of SpoIVA and YrbA proteins, conserved with the cortex and/or coat assembly by Bacillus subtilis. 25.00 25.00 26.50 26.50 20.90 20.00 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.91 0.70 -5.44 25 309 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 285 0 73 206 2 268.90 49 98.65 CHANGED hclGDlVsRKSYspDIhF+II-Icp.tpstchslL+GlchRLlADAPhcDL.hlcpschpc..pc+phppchpcsl+plhpctp............hhcpcpphpsssshppptphFphPG+VLHlDGDs-YLchCLclYcpLulsshGhtlpE+-QPcclhsLlcca+PDIlVlTGHDAhh...KsctshpDLssYRsS+YFlcoV+pAR.+apPshDpLVIFAGACQSpaEullcAGANFASSPsRVhIHALDPValsEKIAaTsls-sVslp-llcNTITGtKGlGGlETRGKhRhGhPhp.h .............h+lGDlVsR+SaspDIhF+Il-Ict.........clAILpG.clRLlADAPh-DLhtlcpcchpcpt+p..cpphpcsh+hhppchh..............h.c..pcp.ph...p....s..st..s..hp..pp..paFphPG+VLHlDGDstYLcpCLclYpclGlss.GlpspEpE.scclhcLlcca+PDILVlTGHDuhh...Ks..c..t..shtDLsuYRpS+aFVpuV+psR.+h.PshDpLVIFAGACQSaaEALlcAGANFASSPuRl.IHALDPValstKIuaTsh.-hVslhDVlcNTITGtKGlGGlETRGhhRpGhPhp............................ 0 31 58 64 +5412 PF05584 Sulfolobus_pRN Sulfolobus plasmid regulatory protein Moxon SJ anon Pfam-B_8140 (release 8.0) Family This family consists of several plasmid regulatory proteins from the extreme thermophilic and acidophilic archaea Sulfolobus. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.16 0.72 -4.01 6 48 2012-10-04 14:01:12 2003-04-07 12:59:11 6 5 28 0 10 65 3 69.50 40 58.43 CHANGED hEKLTlophILlpLSt+.CtTLEcLpE+TussKphLLVhLTRhaK+GIIhRKWp+aGG+KaREYCLKhREEll .....................p+Lp.pptI.LlhLu+t...CtTLE-LcctTsls+spLLVhLo+Lt+cGlItRcWt+.h..u.G+KaR.c.YCLK..h................... 1 2 3 10 +5413 PF05585 DUF1758 Peptidase_A16; Putative peptidase (DUF1758) Studholme DJ anon Merops Family This is a family of nematode proteins of unknown function. However, it seems likely that these proteins act as aspartic peptidases. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.87 0.71 -4.69 3 313 2012-10-02 15:32:34 2003-04-07 12:59:11 7 33 35 0 292 363 0 145.90 19 16.24 CHANGED VsVpNApGs..hTuCRLLFDSGSELSYISERCINsLGLARTPSRILVoGISusKA-sTRGsoplsIpSRlSssT.LsVpAHVLuKITSSLERpsIDsSlLsVFNDLphADopF..uolAPIDILLGSDYlWusITGpKI+DstGsLIAISSIFGWVITSltus+usoAT .......................................................h..............htsLhDsGSphoalopp.htpt..LtL....t..t..pp....h...h.t.hh..u...t...p....p..s..p.t.tt.htph...p...l.p....h.p..h.p...s...t.t........hp....l...pshs..ls..p.....l.....s.s.pl........hsl....s..t..p.......h..p..th..t..slpl.s.-....th..ppt..tp..hDlLlGs...Dhh.hpll.ttp.h..hpt...tst.hhh.pohhGal.ls.............ssh........................................... 1 167 170 284 +5414 PF05586 Ant_C Anthrax receptor C-terminus region Yeats C anon Yeats C Family This region is found in the putatively cytoplasmic C-terminus of the anthrax receptor. 25.00 25.00 30.00 30.00 21.90 19.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.36 0.72 -3.53 10 141 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 37 0 79 109 0 91.80 65 19.15 CHANGED VRWG-KGSTEEGARLEKAKNAVVpMP-EEhE.P...cPsP+ssPs+pPs.pcKWYTPIKGRLDALWALLRRQYDRVSlMRPpsGD+G..RCINFoRVps ..................VRWG-KGSTEEGA+LEKAKNAVVKhP-pEhE.P..p..sps.p.s.p+pPs.ppKWYTPIK.G+LDALWALLRRtYDRVSLMRPQsGDcG..R.CINFsR.............. 0 2 10 38 +5415 PF05587 Anth_Ig Anthrax receptor extracellular domain Yeats C anon Yeats C Domain This region is found in the putatively extracellular N-terminal half of the anthrax receptor. It is probably part of the Ig superfamily and most closely related to Pfam:PF01833 (personal obs: C Yeats). 25.00 25.00 25.40 36.80 22.90 20.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.33 0.72 -4.18 12 190 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 39 22 100 145 0 102.50 51 22.00 CHANGED lp+SChEILulcPSSVCss.........EsFpVVl+GsGFppu+p.ppVlCoFphNpohohsE+Ps.slcssalLCPAPhLpcsGpshplpVShNsGhSFISoulpITuopCosG ........................c+SChEILusEPSSlCsG.........EsFQVVl+GNGFppu+ss-pVLCoFplN-ohT.l..s.cKPh.sVccsalLCPA.P..lLccsGpp..hslpVShNsGhSFISSSlhITsTcCosG..................... 0 7 16 45 +5416 PF05588 Botulinum_HA-17 botulinum_HA-17; Clostridium botulinum HA-17 protein Moxon SJ anon Pfam-B_8286 (release 8.0) Family This family consists of several Clostridium botulinum hemagglutinin (HA) subcomponents. Clostridium botulinum type D strain 4947 produces two different sizes of progenitor toxins (M and L) as intact forms without proteolytic processing. The M toxin is composed of neurotoxin (NT) and nontoxic-nonhemagglutinin (NTNHA), whereas the L toxin is composed of the M toxin and hemagglutinin (HA) subcomponents (HA-70, HA-17, and HA-33) [1]. 20.80 20.80 21.10 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.97 0.71 -4.31 3 33 2012-10-02 19:42:32 2003-04-07 12:59:11 6 2 16 1 2 40 0 128.20 72 92.70 CHANGED MSAERTFLPNGNYKIKSIFSsSLYLTPlSGuLTFSNTSStNNQKWKLEYluEsNuFKISNVAEPNKYLoYNsaGFIsL....DSoSNcsYWhPIKIAlNTYIIsoLscVN.hDYAWDIYDsNsNIoDQPLLLLPNFDIpNSNQMFKLEKI ..............................MSsERTFLPNGNYpI.KSlFSs.S..L..Y..L..s.sSGuLoF.SNpSShsNQKWplEYhupspsF+hS..NVAEPNKYLuYss...aG...F.I.L.....sS.SN..pshW.PIK.IAlNoYIhhoLshVN..hDYAWsIYDsNpNIhsQPlL.LPNFDI.NSNQhhKLEKl...................................... 1 1 2 2 +5417 PF05589 DUF768 Protein of unknown function (DUF768) Moxon SJ anon Pfam-B_8463 (release 8.0) Family This family consists of several uncharacterised hypothetical proteins from Rhizobium loti. 21.00 21.00 21.40 21.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.98 0.72 -4.04 7 32 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 12 0 19 32 1 62.80 33 72.41 CHANGED MSTRGIsFLccWIucplP-TstuD.llSls-LTpKLhADAKAhGItpp-I-E-suSlaphIlcAl ...MSp+u.pFLcpWIu-pl.sss.spsD..h....l....uhs-.LscchhADActtGIspp-.I.pE-s..GslhchIhpu................ 0 0 13 13 +5418 PF05590 DUF769 Xylella fastidiosa protein of unknown function (DUF769) Moxon SJ anon Pfam-B_8396 (release 8.0) Family This family consists of several uncharacterised hypothetical proteins of unknown function from Xylella fastidiosa, the organism that causes Pierce's disease in plants. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild --amino -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.86 0.70 -5.18 6 104 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 12 0 12 77 0 212.10 35 45.08 CHANGED PPIDSRTGKP.MMAGPWENRDLSLEYFQLDFLGQYTVKHAFINGINIDRCYPGAPPDHVQVVMMoh+sGScoPGIlsphpG+.PPpPhspoAAFIGhcKSNTosch.PcG.oVLRp.Ds.....VsthhKFssLCoAcFsGGN-..IpFGIR........SAsSpSIpuhlpstssLspAs.+......-hhusR.................sFlc.sPcTETRWGNsWTWa.+AalPoPl...GDGlEhWM.TPIGsoGYYlsVphNFhEutRQKNTEsYQRA.........RcLMDGlLQSVVIQKp .........................................................................................................................................................................p............G.pslpp.............phs.hspA.hsGGsp..l.aGhc........suhu.ultttlpsh.thhpts........thhspp.......................................hlc.ssppEpRassshohh.+t..ss.sl...G.sGlE...hhh.TPlGs.uG..hosshphh-utt.pssc.hphA.........ppLhsGl..p.lhpK.................... 0 0 11 12 +5419 PF05591 DUF770 Protein of unknown function (DUF770) Moxon SJ anon Pfam-B_8473 (release 8.0) Family This family consists of several proteins of unknown function from various bacterial species. 20.70 20.70 21.00 21.00 19.70 20.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.99 0.71 -4.67 91 1558 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 971 0 276 902 48 153.90 37 85.07 CHANGED hh..scs.tsRVpIpY-lcssuupcclELPhshhVlGDhoG....tpspsslc-R+hlslD+-NFcsVMcphsPplshpVsNpL..ss....s....splsVcLsFcShcDFsP-slAcQVstL+cLLEhRptLssLpu.hsspsshcchlpclLpssshhcpLtsEhp.ht ..................h.hucs.tsRlpIpY-lcssGupcclEL..Phshh..VlGDhuu.........tp-p.......sslp-...R.....chlsID+.cNFssVMpphsPplsas.V....sN.s.....Lt....ss.....s..............splsVsLsFcShcDFsP-slA+p..Vs.pLppLLEhRptLssLps.hsspsshcph.....lpplLpssphhppLhpcht.hh............................... 0 50 118 197 +5420 PF05592 Bac_rhamnosid bac_rhamnosid; Bacterial alpha-L-rhamnosidase Moxon SJ anon Pfam-B_8527 (release 8.0) Family This family consists of bacterial rhamnosidase A and B enzymes. L-Rhamnose is abundant in biomass as a common constituent of glycolipids and glycosides, such as plant pigments, pectic polysaccharides, gums or biosurfactants. Some rhamnosides are important bioactive compounds. For example, terpenyl glycosides, the glycosidic precursor of aromatic terpenoids, act as important flavouring substances in grapes. Other rhamnosides act as cytotoxic rhamnosylated terpenoids, as signal substances in plants or play a role in the antigenicity of pathogenic bacteria [1]. 35.60 35.60 35.70 36.10 35.30 35.40 hmmbuild -o /dev/null HMM SEED 509 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.52 0.70 -6.29 25 1344 2012-10-03 02:33:51 2003-04-07 12:59:11 6 54 458 3 498 1369 219 462.90 20 56.56 CHANGED lpcsssuphllDhGQNhsGalRlc.VcuspGsplpL+auEhLc............cGslt..TpsL.........RsAcsoDpYIh...........pGc.tt-........papPpFTa+GFRYVclsGh..sp.....hsss..s..........lsuhVlaSD...hccsGsFcsScshlNpLacNshWohRu.NFhulPTDCPQRDERlGWTGDspshu.sAsahaDspshhs+WLpDltssQps.s.................G....shPslsPssh.......ssssssssWusuhlllPWslYppYGDppllccpYsuMctal-alppcsss......hh.thssaphGDWL............sssspTstsll.AoAaascssphhuchAphlGpsp.....D.AccYpshuccl+pAFpscal.sssG..............plsssT.....QTAhsLsLhasLV..........Pcstp.ttssppLschlcpsst+lsTGFl..GTshLhpsLsc..sGcp-lAYclLhpcshPSWhY.lsp.GATTlWEpW-uhh.......tsstMsSaNHYu.hGulspWhapsluGlp.............sstPGa+clhlpPtPs.usLs.A........cusacosYG.cIcscWchc..sGp.hpLplplPsNosApVhL ................................................................................................................h......t.hlh.DhGpphsGhhph...p.....h.........p.....s..........t...u.......t......t......lp.hh..h....uE.....ht.......................................................t..u.p..h...........t.h........................................................h...h.h.......................t.Gt.....t...........................h....st....h..s..h...h.u....FR..Yl..p..lpsh...tt...................t...p..................lpu..h.hhhs.s............ht.....t..u..p..F.p..s..u.s..........hl.N........plap.shhs.......hp...........s.shh.....s..h.....sDC..PpR.ER..hsWhGDhth.s...s.sh.h....h..s..s..t..t..h...hpchlps.h.ts...tp....p.........s...............................G..........hh...s...t....h....h...Pt................................t.s....ssa.....s...........h...h......lhhsap...h.Yhhh.G..Dp....p...hl......pp....h.as.s...h..p...p....hl.....ca.hhp.ptss....................................h.ssh....t.h.u...DWh.................................sht.tt..s..s.t..th.....h....s..ss.h.h.h.hshph..hsph....A..p....hl.G..c.pt...................................c..sppapp...ht.p...p.l.pp...s...h...pp..p.....a..h.s..ppu.............................................tht.s......s.o............psu.......sh....ul.hh.sl.h..........................s.p.p....tt.......t.....t..h..hp......tL............h.p.h..........h........t.......t.........p............s..........h.....t......h.........s......s..G......h.h..............u.......s...al....h......p....s....L.........sp..............tGt......t....ch..s..........hph...h.......h........p....p........hss.....Wh..h.l...p......p......G....A...T...ThW.Et..a..ss..............................s.h.sShsHhh.hu..ussta.hhphlhGlp...............stpsGa.c.phhlpPt.............sslp.s...................puph.....o....s..h....G..t....lp.sp.Wph..........p.......s.........st.....h.php..lplP...s..spu.l.............................................................................................. 0 220 361 461 +5421 PF05593 RHS_repeat RHS Repeat Yeats C anon Yeats C Repeat RHS proteins contain extended repeat regions. These repeats often appear to be involved in ligand binding (e.g. [1]). Note that this model may not find all the repeats in a protein and that it covers two RHS repeats. 20.80 20.00 20.80 20.00 20.70 19.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.35 0.72 -3.69 203 21340 2009-01-15 18:05:59 2003-04-07 12:59:11 9 591 1319 0 3320 21290 603 38.60 28 15.02 CHANGED YD.spGpLhs......hs.t.......ssGpss..p.ap.YD.stGp...ls....ph.....ssssGp..s ..............................................YD.ttGpLsp..........................hT..s..........................ssGpps.........p.Yp.YD..stGp...........ls............ph.........psstG............................................... 0 1007 1995 2696 +5422 PF05594 Fil_haemagg haemagg_repeats; Haemagluttinin repeat Yeats C anon Yeats C Repeat This highly divergent repeat occurs in number of proteins implicated in cell aggregation [1]. The Pfam alignment probably contains three such repeats (personal obs: C Yeats). These are likely to have a beta-helical structure. 21.50 2.30 21.50 3.30 21.40 -999999.99 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.60 0.72 -3.64 157 6916 2012-10-02 14:50:22 2003-04-07 12:59:11 9 294 419 0 1577 9015 69 75.00 16 36.58 CHANGED sssGphplpss.............................................................ht.spsutlssssslslp..usshhs.ssush.stss......................................hplsusGsLsspu .................................................................................................................................................................................................................................h..s.spsG.....t..l.t.u.s...s.s..l.sls......u.s.sl.s...s..s....G...t..l.....u..s..s...s..h..t..h..p..s..tt...ttt....................................................htlts.t.sth...................................................................................................................................... 0 229 664 1231 +5423 PF05595 DUF771 Domain of unknown function (DUF771) Finn RD anon Pfam-B_7023 (release 8.0) Family Family of uncharacterised ORFs found in Bacteriophage and Lactococcus lactis. 22.90 22.90 23.40 23.40 22.80 22.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -10.12 0.72 -4.07 18 367 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 341 0 27 185 0 87.50 33 84.35 CHANGED -stlllscpphp-lpptshcs.h....hsls-lp++h.s.+upshlpcpllpsP+FcKplchp....GhlhaPp.spus+.ahhps+chpcal-cpFpEIh .........................................shllls+pEYpEL.hthshcshW...........hshs-Lc++l..p..tS.cphlpcplLhcP+acKclchp.....GhVhYPs..psts+...WpFpA++hpcFl--aFsEI....................... 0 8 18 22 +5424 PF05596 Taeniidae_ag Taeniidae antigen Moxon SJ anon Pfam-B_8569 (release 8.0) Family This family consists of several antigen proteins from Taenia and Echinococcus (tapeworm) species. 20.80 20.80 21.70 25.90 19.60 18.60 hmmbuild --amino -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.99 0.72 -4.16 11 312 2009-09-11 08:34:58 2003-04-07 12:59:11 6 1 13 0 0 302 0 60.60 39 80.77 CHANGED --sc.sccsspslhKtluEl+c.FFpcDPLGp+lspLsK-hpphspthRpKlRpuLtEYl+sLhsE ..................csp..ppstpslhKtluEl+c.FFcpDPLGp+lspLs+-lstlspcl+tKlRtsLpcYl+sLhpE.... 1 0 0 0 +5425 PF05597 Phasin Poly(hydroxyalcanoate) granule associated protein (phasin) Moxon SJ anon Pfam-B_8339 (release 8.0) Family Polyhydroxyalkanoates (PHAs) are storage polyesters synthesised by various bacteria as intracellular carbon and energy reserve material. PHAs are accumulated as water-insoluble inclusions within the cells. This family consists of the phasins PhaF and PhaI which act as a transcriptional regulator of PHA biosynthesis genes. PhaF has been proposed to repress expression of the phaC1 gene and the phaIF operon [1]. 41.00 41.00 41.00 41.00 40.90 40.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.54 0.71 -4.39 31 264 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 196 0 80 242 32 129.00 33 71.12 CHANGED cKpsps.......pphsspl+cpAcQIWLAGLGAauKsppEGu+lF-sLV+-Gcplc++s+phs-cplsssp.......ppht....phssslps+AsspWD....+LEphF-cRVspALsRLGlPopc-lcALpp+lDpLstplccl.sppp ...........................hpppspphhtclcchu+pIWLAGLGAa...........u+.......s....ppE.......Gu...........+.......h.......F-sLVKcGEphEccspptssc..plspsp.......pphp.......sp.hsps+spstsphs....+lEpsFDcRVspuLsRLGlPo...+p-lcsLpp+l-pLptpl-plstt.t..................... 0 19 45 65 +5426 PF05598 DUF772 Transposase domain (DUF772) Moxon SJ, Bateman A anon Pfam-B_8195 (release 8.0) Domain This presumed domain is found at the N-terminus of many proteins found in transposons. 22.60 22.60 22.60 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.29 0.72 -4.08 160 5949 2009-01-15 18:05:59 2003-04-07 12:59:11 6 34 1706 0 1322 5334 675 71.70 25 21.55 CHANGED GRP.shssphhl+llllphhh.sl.Sscpltctlpcshtaphh.sG......p.........tsPDtoTls..cFR.pthtppt.......hhcplhpplh ....................................GRs.uhs.t.hhl+ll....llth.hh.ul.Sscplpctlpp.slt..h..phF.st....ht..p..........thPc.hoT.ls..caR..pthtppt..........hhpplh.................................................................................. 0 433 891 1052 +5427 PF05599 Deltaretro_Tax Deltaretrovirus Tax protein Nicot S, Moxon SJ, Studholme DJ anon Pfam-B_8606 (release 8.0) Family This family consists of Rex/Tax proteins from human and simian T-cell leukaemia viruses. The exact function of these proteins is unknown. Tax is the viral transactivator; is it a nuclear phosphoprotein that interacts with CREB, coactivator CBP/p300 and PCAF to form a multiprotein complex, which activates viral LTR and stimulates virus expression. Tax is also involved in deregulated expression of numerous cellular genes leading to T-cell leukaemia. Rex is a nucleolar post transcriptional regulator that facilitates export to the cytoplasm of viral RNA not or incompletely spliced [personal communication, Dr. S Nicot]. 25.00 25.00 26.40 26.30 20.00 19.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -9.95 0.72 -3.68 2 33 2009-12-01 13:32:35 2003-04-07 12:59:11 6 2 8 0 0 27 0 57.10 74 73.91 CHANGED M.hhSPLsRhhTEsshhIPSLRVWRLCopRLVs+hhhshFtPPssupPotHLstAssHLGPHRWTRhRLSSslPYPSsPLLPHPENL ......................hhIPpLRVWRLCTtRLVPHLWGTMFuPPTSSRPTGHLSRASDHLGPHRWTphRLuSTl............................ 0 0 0 0 +5428 PF05600 DUF773 Protein of unknown function (DUF773) Moxon SJ anon Pfam-B_8595 (release 8.0) Family This family contains several eukaryotic sequences which are thought to be CDK5 activator-binding proteins, however, the function of this family is unknown. 20.40 20.40 20.40 20.70 20.30 20.30 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.67 0.70 -5.72 14 218 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 140 0 135 217 5 387.50 32 91.65 CHANGED Mp-h.sLPIDIpouKLLDWLVDRRHCshcWQppVhsIRcKIssAlpDMP.Ep-cIhpLLuGuaIsYFHChcIl-ILKsTEusTKNlFGRYSSQRMKDWQEIlphYEKDNlYLAEsAplLlRNVsYEIPuLRKQlsKspQhtpEhpR+ct-hppsAAph+ccYhpsCKphGlpGpNl+pELlsll.pDLPuhhpclutss.us.LpcAl-hYpAFssasp.......p.sstplLPlLpaltc+G.NoTlYEa+pupsPhsVEcP..chp.........phs.-......sss--p....IDW..GD......t.s....ss.u..phtstpIDaGhsh-ss.s..........-........thsGIDWG..............D..upssshpIsl.-sGos.......t...uVA+GpDAhoLLEsspsRspFlDELhELcuFLsQRLsEhp..--ssllshothphuPsllttpTscplpsMLu-VcsllspLoshphQHLFhIpuSPRYV-Rls-pLcQKh+ptchLttptpths-KppEAtpppscLcP+l-hlhppTR-LQK.IEuDISKRYssRPVNLMG ..........................................lsI.DItht+L.-WLlsR+hsshpWpt.lhtl+t+IptAht..s.......hP..pt.pt.........h.......hp.hL.......p..s............s.I....pYhc....s.h.cIl.-lLpto.-.t.s..o+.slFG.pau.up.hhc..-W....ppIl.thYEK-shaLsEhuphlhcsVsYEIPul+KQltKhp...Q..p-hp++pt-hptsttphtppahtt..Ccph..Gl.p.................G........p..s......l.+tEL.hths.ppLPs.h...t.clhtth...tp.............ltpulchYpshst.hh.....................t...ttt...h..Ls.hL..phl.pps....s.shh..p.hpt.tt.t..s....hpps..t.h..........................t...............t.tpst.......IDa..Gs...........s......t.s...t......s..tlDhu......t.t..............................t.stI.sas..............................................t...ph.sh.pts.................t...th.u.p...u...p....th...plL-.sphRsphls-LhELc.FL.......t..Rh.Eh....p...pp...........s...................s.lthp..t.t..s..l...s..........pph.thls.lpthls.lss.phppLh.lhtS.+al-plsp.Lppphpt.thhh.tht.h.t+t.ch..pp.tt.tsphphhhtpo+pLpp...........hEtplSph.aps.R.VplhG..................................................................................................................................................................................... 1 55 77 110 +5430 PF05602 CLPTM1 Cleft lip and palate transmembrane protein 1 (CLPTM1) Moxon SJ anon Pfam-B_8636 (release 8.0) Family This family consists of several eukaryotic cleft lip and palate transmembrane protein 1 sequences. Cleft lip with or without cleft palate is a common birth defect that is genetically complex. The nonsyndromic forms have been studied genetically using linkage and candidate-gene association studies with only partial success in defining the loci responsible for orofacial clefting. CLPTM1 encodes a transmembrane protein and has strong homology to two Caenorhabditis elegans genes, suggesting that CLPTM1 may belong to a new gene family [1]. This family also contains the human cisplatin resistance related protein CRR9p which is associated with CDDP-induced apoptosis [2]. 19.70 19.70 20.30 19.70 19.10 19.60 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.54 0.70 -5.77 26 492 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 253 0 331 471 12 368.50 30 71.07 CHANGED tusltuhlpslhhhhshhhhhs.Fhs......sssssssssts....t.............................htshatpsp.lsltlYsSssshhsshts.s...Llhpccshshush.sp.........shslslPcp...l.ppNGoLahHhhls.psGh.h.st....satstphh.ahsts...LspYh.p+.hp+p+NLLuspc-p...p-pcpcts.............ssphhuaa+PNlTlsllsDpsshshsslPPsltpahplpssssp........YhPllalNpFW.L+cchh.lNpTs..ppLsLplshpsluhaKaphhsph-puhppttph.........Gtsts-..h.-plKchLLDT...N.YLLulThlVolLHhlFEaLAFKNDIpFW+p+.KshhGlSlRollhNsFhQhlIFLYLhDN..cTSaMILsspGhGlllEhWKlsKslclclph...........uhlPh........ltFpD+.t.hopsEp+Tc-YDplAh+YLhhlhhPLlsuYAlYSLlY ...................................................................................................................t.......h..hhhh.h.h...hht.hh.................................................................................................shh.t.t.............h..pl...l.aho.......p.t......tt..........................hhhp...ps.h.hssh..pt.....................thphp...hspp..................h..ppN.G..ol.ah...+haht...hsG................apt..thh....hh.t..........lsp....Yh.....p...tcphsLLsspppt............p...t...ppt...............................sp.hoaa+PplolsllsDths..hshs..plPss..ltp.ahp.h.th.stp......................................YhPllahspahp....htcchh...........l......Npos........ppLs.Lplshps.................luhh+aphh.h..p...hpp.sh.........tp.h.................................................................G...s.....tp-......h.DplKthhl-T...N.YLLulThhV.......ohlH...lF-h.LAFKN............DIpFWpp+.cs.h..GlSs+olhhpsa.phllhLYlhDp..pTshhlhhs.slGhhIEh.......WKlpKshplplp.............................uhhPh.....................hth.pt................o..........EppT.............cpYDp............hAh+YL.shlhhPLhhuhAlYSLlY.............................................. 2 122 178 264 +5431 PF05603 DUF775 Protein of unknown function (DUF775) Moxon SJ anon Pfam-B_8676 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 20.70 20.70 20.90 24.20 20.50 19.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.46 0.71 -4.80 35 312 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 278 0 220 310 3 192.20 30 94.21 CHANGED MFGslsuG+sspssspplss......spalhslsss...........pshsalslFl..................Lssssh.PsshuAuVYaph.P......................tsssaphLGslsssKPSAIFKlst................................hsssps..............psts.lplGISlEPhspltpphsthppspss...................................sstplAp+IlpshaNaLuSFsss..................spshVPhpshccWacKFpp+lppD..PsFL .....................................................hFGslhsG+.s....o...shptlsp.........spalhs...lssh...........pshsHlsVFl..................L.s..s..s..sh..Psshuuu.VYhph..P..............................ssssa..phLG..hlsspKPSAIFKlst....................................ttsts............................tsssssplGISlEshpslttphsshpst.st...........................................shpphAp+llpNhaNahuSFsss..................sspthVPhpshppWap.pFppRlpps..PsFh............................... 1 73 120 179 +5432 PF05604 DUF776 Protein of unknown function (DUF776) Moxon SJ anon Pfam-B_8747 (release 8.0) Family This family consists of several highly related mouse and human proteins of unknown function. 25.00 25.00 30.00 28.90 22.60 23.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.31 0.71 -4.06 4 60 2009-09-14 12:10:08 2003-04-07 12:59:11 6 2 38 0 33 51 0 171.40 52 62.78 CHANGED MEutuKDGEEEoLQoAFKKLRVDAEushsshps.cu.usRsusRouh-..ssKPK.hsS.K-oWHusoRKoSRGsVRTQRRRRSKSPlLHPPKFTaCSstA.PSs.....upLKH+..o.sEPsDstuscGpus....pusssSTALssssacsahhpPhtpss.cssppossc-tAspst........sutsu.sAsc.S ...............McSEAKDGEEESLQTAFKKLRVDAuGSlASLSVG.EGsulRAsVRoAsD..-sKPKssCuSKDSWHGSoRKoSRGAVRTQRRRRSKSPVLHPPKFhHCSThA.sS.s.....uQL.KHK......opsDssD.uuouhGhss.......sus...psS.suLss.sapthsh-Phtsolschsspsppcpt.u.sss.........sSpsoLtssp........................................................ 0 1 3 12 +5433 PF05605 zf-Di19 Di19; Drought induced 19 protein (Di19), zinc-binding Moxon SJ anon Pfam-B_8581 (release 8.0) Domain This family consists of several drought induced 19 (Di19) like proteins. Di19 has been found to be strongly expressed in both the roots and leaves of Arabidopsis thaliana during progressive drought [1]. This domain is a zinc-binding domain. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.33 0.72 -3.84 56 760 2012-10-03 11:22:52 2003-04-07 12:59:11 7 63 135 0 411 688 2 57.80 32 13.39 CHANGED spFsCPaCt.c.c.aDlssLCsHl--EHsh-u+.s.slCPlC.....u.......s.......+.......Vu..p..Dhlu......Hl...ThpHu ......tasCPY..Csc.t.hc.h.s.u.LtcHlppcHp.......p......s.sp.....s...s.......s...CP..lC........u.............t.............p.......hs..s..shhp......HlshcH.......................... 0 82 147 264 +5434 PF05606 DUF777 Borrelia burgdorferi protein of unknown function (DUF777) Moxon SJ anon Pfam-B_8755 (release 8.0) Family This family consists of several hypothetical proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 31.70 31.60 24.90 24.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.03 0.71 -5.10 3 129 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 27 0 9 109 0 173.30 65 98.02 CHANGED MocsYcIYRMNQRLYGHALAQEDlKNWIYSNIFIsRIGTVKEFKQQTQEAIVTIPEFEDLEIHTKNISNINLELSKGDsVLLLQSSINIFDKNDDIHFDKHHFYILSAISPKTLNLIsDTVKI+ANNpIEIANQoTSLKcILDsIISAINGIcIpGsusIDtoSL+......usTopINScINSLFK .........ppDYcIYRMNQRLYGpALuQEDlKNWIYSNIFIh+IGTVKEFKpQTQEAIVTIPEFEDLEIHTKNISNISLELSKGDsVLLLQSSlNIFDKNsDIHFDKHHFYILSAISPKTLNLISDTVKI+AN...........N.pIEIANQoTSLKpI..........LcsIVSAIsGIcl.....hG....ss....s...I-.sSL+......IATopINSsINSLFK............... 0 5 5 5 +5436 PF05608 DUF778 Protein of unknown function (DUF778) Moxon SJ anon Pfam-B_8777 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 22.70 22.70 24.10 23.70 22.20 21.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.79 0.71 -4.23 28 251 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 151 0 153 239 6 122.70 44 60.30 CHANGED lPllSWLhPaIGHhGICsSsGl.IhDFuGsYaV................................................................u...............hDshs..FGsP.................................................s+YapL...................................................................................ctpph..s.....................sspsaDsAlppusccF.pp+saNlhs.......sNCHSaVAtsLNth........................................................pa.......psptsWshlplshhlhlpG+a .....................................................................................................................................lPlloWhhPhIGHhG.ICsSsGV.IhDFAGsYaV................................................................S...............p.DshA.FGpP.................................................s+Yh.pL........................................................................................................................................................................c.p....ps...............................sspsWDsAlppuscca.pp+.......haNLhs............sNCHSaVAhsLNhh........................................................pY.......tssssWNhlpLshhhhltG+a.............................................................................................................................................................................................................................................................................................................................................. 0 65 91 127 +5437 PF05609 LAP1C Lamina-associated polypeptide 1C (LAP1C) Moxon SJ anon Pfam-B_8782 (release 8.0) Family This family contains rat LAP1C proteins and several uncharacterised highly related sequences from both mice and humans. LAP1s (lamina-associated polypeptide 1s) are type 2 integral membrane proteins with a single membrane-spanning region of the inner nuclear membrane [1]. LAP1s bind to both A- and B-type lamins and have a putative role in the membrane attachment and assembly of the nuclear lamina [2]. 19.70 19.70 21.60 20.20 19.40 19.10 hmmbuild -o /dev/null HMM SEED 465 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.51 0.70 -5.82 3 144 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 46 0 66 138 0 318.60 39 82.94 CHANGED MKTRRoTRL.pQHSQQsslQsSPApouRGLRDApuLS+D+pEDEsSSQP-oSQTlSKKTVRSP-EAsVSEDPls+LpRPPLR.PR.-ATpVQpKssF.EEGET.EDDp-SSaSsVTpl+lRSRDScESuDKsuRAuuHas-ShWuLP+SpuDFTAH-pQPSlLoTGsQKsPQEWssQAuRhRTRMtpcsILKSEhGNQSPSTS+pQsssQPss-Sh..VK+KtWW.......LLlLVAALASGlaWFFSTPs...VETTAVQEFQNQMsQLcsKYQGQDEKLWKRGpTFLEKHLNSSpPRoQPAILLLTAARDAcEsLKCLSEQIADAYSSFRSVRAIRIDGAGKAsQDSDTVKLEVDQELSsGF+NGQNAAVVHRFESFPAGSTLIFYKYCDHENAAFKDVALVLTVLLEEETLEASLGLKElEEKVRDFLKVKFTNSDTPNSYNHMDPDKLNGLWSRISHLVLPVQPENALKcGuCL ....................................................................................................................................................................................................................................................................................................................................t.........................................................................................t..................................................................................................................ts.........h.............h.h.hh..h..l.h..t....a..h........s............spt.s.tlpt..............F........sphppLpppa.uQst.LWpRu.hhLp+H.LpsspP.spPAhllhTAup..cuccs.L+CLupplAs.......AYo.o.........pp.sp..s...l.p.IDGss.+shtDSDhVKh.lD.cLosGFcsGppAAVV.H+FEpLPsGSTLIFYKYCDHENAAFKDVALlLTVLL.E.E.c.s.Ltsslu.pEhEE+VRDhLhsKFosSssssSascMDsDKLsGLWSRISHLVLPVts.ptlctt.C................................................................ 0 16 20 30 +5438 PF05610 DUF779 Protein of unknown function (DUF779) Moxon SJ anon Pfam-B_8830 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 25.00 25.00 31.00 30.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.14 0.72 -3.97 40 460 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 440 0 184 402 460 96.70 60 72.22 CHANGED pLIccLpucHG.sLMFHQSGGCCDGSuPMCYPpGEFhlGspDVLLGcI.................sGs.....PFYlSpsQFEYW+HTpLlIDVVsGRG.uhFSLEuPcGhRFLoRSRl ...LlcpLpscHG.PlMFHQSGGCCDGSu.PMCYPpG-FhVGcsDVhLGpl...................................sGsPhalStsQ..a-hW...KHTpLlIDV...VP...GRG...G...hFSLEsPcGlRFLoRSRl.......... 0 51 127 159 +5439 PF05611 DUF780 Caenorhabditis elegans protein of unknown function (DUF780) Moxon SJ anon Pfam-B_8886 (release 8.0) Family This family consists of several short C. elegans proteins of unknown function. 25.00 25.00 29.90 29.10 21.80 17.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.61 0.72 -3.26 3 44 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 6 0 44 25 0 68.00 71 70.29 CHANGED MADKSAYMGAGGYGSGYMGSNASSSGYAREDYAQGGNGGSsNQQQQGSGGNTNPGGQVFKARTDQSCYLGP .....MuDKSAYMSAGGYSSGYMGSNASSSGYAREDYAoG.G.S.GG..u...s..sss.sp..GSG...Gs.sN.sGuQVFKARTDQSCYLGP............................ 0 10 27 44 +5440 PF05612 DUF781 Mouse protein of unknown function (DUF781) Moxon SJ anon Pfam-B_8891 (release 8.0) Family This family consists of uncharacterised mouse proteins of unknown function. 25.00 25.00 41.60 40.30 19.70 23.50 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.18 0.70 -5.76 6 79 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 41 0 56 78 3 311.80 39 96.99 CHANGED MuslhohshlLlhsh.ussAhsop..ps.P..WccuPtpLSch.hpssshhlNPWVap-RMshY+ILlNuTshYhupaGs-NppNsLWGLPLQhuWQh+SGRLsDPT..t.osCG.EhsDshCISssSWWuslNYYLSlIPFLAAVcpGllususcQVpI.sPscssp+FCosYSDCssthP-sMs+WcpFFQhLhplpt..pusaucpDplLpahWsAHtpSLspuopuFsD+hsaYScsElpFppuWsphVDalAAspFsTohspohpFhpsLPPRlLpssD.psP.IsDFTspQNpsLlhlthlNcl..Npllhs.hlpLh+phhpochsR-hhpt.l..hh..P....oohlcllpchhss....ss .........................................hlhh.h..s...ths..sts.......-hhPPLWcpsPuphuDa.lcs....sthlIsPWsY.-..Rhu.hYKIL.lspTspYFspFuspNppNlLWGLPLQaGWQacT...GRLADPo..ptTsCGh-sGDphCISlcS.WWAs.hNYaLSslPFLAAV-oGlhGhuscpVplLP.Psc...sppcFCaslosCpouaPcsMs+WssFaQ........hl.........................p.s........t..s.shD.sL....L+ahWsAHsuoLp.....suhphFp-.R..hpa...YScsEssFscsWshsV-alAAspFsTsl.pohc.FQpu.LPsRhLhssD.hsPhIsDFoshQNtVLhhLphlppl...sp..hhG....h...............................................h............................ 0 9 14 23 +5441 PF05613 Herpes_U15 Human herpesvirus U15 protein Moxon SJ anon Pfam-B_8900 (release 8.0) Family \N 25.00 25.00 210.50 210.50 19.00 17.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.55 0.72 -3.71 2 6 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 8 0 109.30 77 80.20 CHANGED M-sW+RQRLQEhRELCPL.lLMoLSNhhSKlEhlYlKYLFpMDFsThYRaIhsuLTLohTVTKSlVIEhLFIlKRWp-IcQhFpLsl+KspDCaIVAQFsHIPlKRhlhh MDVWKRQRLQECRELCPLPlLMSLSNIlSKlEIIYVKYLFKMDFsTMYRFILPALTLSMTVTKSVVIEMLFILKRWEEIcQFFRLNIRKVNDCalVAQFTHIPVKRhllh. 0 0 0 0 +5442 PF05614 DUF782 Circovirus protein of unknown function (DUF782) Moxon SJ anon Pfam-B_8909 (release 8.0) Family This family consists of porcine and bovine circovirus proteins of unknown function. 25.00 25.00 231.10 231.00 19.10 16.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.55 0.72 -3.86 2 77 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 75 0 104.00 95 100.00 CHANGED MVTIPPLVSRWFPVCGFRVCKISSPFAFsTPRWPHNDVYItLPITLLHFPAHFQKFSQPAEISDKRYRVLLCNGHQTPALpQGTHSuRQVTPLSLRSRSSTFpp MVTIPPLVSRWFPVCGFRVCKISSPFAFTTPRWPHNDVYIuLPITLLHFPAHFQKFSQPAEISDKRYRVLLCNGHQTPALQQGTHSSRQVTPLSLRSRSSTFpQ. 0 0 0 0 +5443 PF05615 THOC7 DUF783; Tho complex subunit 7 Moxon SJ anon Pfam-B_8919 (release 8.0) Family The Tho complex is involved in transcription elongation and mRNA export from the nucleus. 27.70 27.70 27.80 28.80 27.10 27.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.63 0.71 -3.91 24 286 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 245 0 202 276 0 141.00 28 49.99 CHANGED -plh+.pRLlh.......s--+.hpplh+phhphtp.sst...................................cssptttpplhtphsth-hshh+hphhhp.sscpEpcpYpp.pcclcpslp....sh+pp.......htpL+ppLtcA+chhcp+p....caDpLuctIssps...sRpEppc...pLp ..................................-plh+.pRLLh.......s--+.hpplhKp.hhph.ss.ss...p.................................................ccttphhpchhtphuphEhuht+t..phl.hs.hNt+EhEpYpphppcI..psplp........ts+cc...........ItchKppLppA+ph+pp+p........................EYDsLAchIsppP.....sRp-ptppl............................................................ 0 64 104 157 +5444 PF05616 Neisseria_TspB Neisseria meningitidis TspB protein Moxon SJ anon Pfam-B_8925 (release 8.0) Family This family consists of several Neisseria meningitidis TspB virulence factor proteins. 19.90 19.90 20.40 19.90 19.80 19.50 hmmbuild -o /dev/null HMM SEED 502 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.62 0.70 -6.13 3 184 2009-09-11 07:48:12 2003-04-07 12:59:11 8 4 72 0 16 177 2 264.20 41 79.54 CHANGED ah.pF...ApPl+ISDsh.s.stptu+VpKFRPcuS+Fap...........S+lTEAuslEHIPTGAKA..RINAKITASVSRAGVLSGVGKLVRQGAKFGTRAVPYVGTALLAHDVYETFKEDIQARGYQYDPETDKFVKGYEYSNCLWYEDERRINRTYGCYGVDSSIMRLMSDYSRFPEVKELMESQMERLARPYWEhlRc..NRsDSh.F+NYNFsRCYFsWNGGsCsVuKG.DDuRoFISFSLtRNPKYKEEMDAKKLEEILSLKVDANPDKYIcATGYPGYSEKVEVAPGTKVNMGPVTDRNGNPVQVVATFGRDSQGNTTVDVQVIPRPDLTPGSAEAPcAQPLPEVSPAENPANNPsPcENPGTRPNPEPDPDLNPDANPDTDGQPGTpPDSPAVPDRPNGRHRKERKEGEDGGLLCDaFP-ILACsEMGEPS-NsFEDIuIPpsTs-cTWSscNhFPSSGVCPpPKTFHVF....GRQFusSYEPLCTlAE+lRFAVLluFIIMSAFlsFGSLu+E ..................................................................................................................................................huGsGKLsR.GAKh..uTRAVPYVGTALLAaDlYpTFKEDIpspGhpYDsETDKFsK.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................Cs.....................hpat.hC.h...ht.hlhh...h...h.............................................................................................................. 1 5 10 11 +5445 PF05617 Prolamin_like DUF784; Prolamin-like Moxon SJ, Bateman A anon Pfam-B_8935 (release 8.0) Domain Both DUF784 and DUF1278 members are found to be expressed in the plant embryo sac and are regulated by the Myb98 transcription factor. Computational analysis has revealed that they are homologous to the plant prolamin superfamily (Protease inhibitor-seed storage-LTP family, Pfam:PF00234) [1]. In contrast to the typical prolamin members that have eight conserved Cys residues forming four pairs of disulfide bonds, both DUF784 and DUF1278 domains only contain six conserved Cys residues that may form three pairs of disulfide bonds. These two domains may have potential functions in lipid transfer or protection during plant embryo sac development and reproduction [2]. This family has been merged with the DUF1278 family. 20.60 20.00 20.70 20.10 20.30 19.50 hmmbuild --amino -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.35 0.72 -3.85 75 362 2012-10-01 19:46:35 2003-04-07 12:59:11 6 3 20 0 247 334 0 67.40 25 51.37 CHANGED cChs.shhp.......hpsCss-Ihtshh...uc........tplussCC.pslhphscpCast.....hh.s..phshhss............lhshCs ................................hhsshhp.......sttCsh-Ihtslh...sp........sslussCC.pslhphscsCasp.......hh.s..thPhhhs..................lhshCs................ 0 90 128 152 +5446 PF05618 Zn_protease DUF785; Putative ATP-dependant zinc protease Moxon SJ anon Pfam-B_8936 (release 8.0) Family Proteins in this family are annotated as being ATP-dependant zinc proteases. 21.80 21.80 22.30 22.30 20.50 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.67 0.71 -4.49 11 1019 2012-10-02 15:32:34 2003-04-07 12:59:11 6 4 489 2 289 840 428 135.10 33 62.74 CHANGED lhG...plhL.....p.htA+hDTGAhTuSLpAsslhsap+-GppWV+F.....hhspcsppuhshEh.h.t...l+ttts............ptR.VlclhltlGsplhptEhoL.........p-RpthpaPhLlG.RuhhthhuhVDPup+alps+p ...................................................llG.hEhltlsslshs....hcA+lDTGApTSSLpA.......hc.......Ic........Fc...+.s.G..p.......p.........W.V+F......................hp.p..s.s.......p.p.s.p....h......hE..t........l...hchtc.I.+sSsuc...........sppRhVlchplp..lG.s.tph.p..hEhTL.........sDRspMsa.lLLG.RphhcsthlVDsuppalt.............................. 0 70 152 236 +5447 PF05619 DUF787 Borrelia burgdorferi protein of unknown function (DUF787) Moxon SJ anon Pfam-B_9013 (release 8.0) Family This family consists of several hypothetical proteins of unknown function from Borrelia burgdorferi (Lyme disease spirochete). 25.00 25.00 63.60 63.50 19.30 18.70 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.20 0.70 -5.79 2 137 2009-09-10 23:38:07 2003-04-07 12:59:11 6 2 27 0 11 119 0 334.20 72 97.88 CHANGED MPpDTISVSLhpsRl.ssp.NYYNPLLlYKo.......sshs.hhLsLoVsNaEc.lcpLEhppGptpDp.tK...EcpL..LppAMusFF..up-uLKSsshalY.sp.cElKcaLKsphHsFVVhlNpttDs.......hpsDa-th+ps..FhVhSTK-pplhplhKsKspoEhcp.IAlYSsNc.NLHLKFhu.YLHQASIFHAVNPYGM.LsuTPlhDDolIspLRpupINFYSLLNETG.DGh.AFKEuVsLuGsSIDEhFThaYIKNEuIhELIRIWNKNsRtNSKLSALpLsGuhsNtYTuulEChh+chhpRGLIl.YKplplplsso.tLpLpLpVslpYN.ShNuVsLlITsQ-Is ....MPQDTISVSLlDSRIQAS+PNYYNPLLVYKTAKIKVNKDuAsaKhLsLTVNNYEKpIETLEK-NGNGpDQFGK...EKTL..LKTAMSsFFNSoEESLKSAsLFIYKDK..P..EELKpYLKsHRHoFVVLINTp.GDs..S..D..DGLplYKDDYsKFKtsSsFFVFSTKEQEIKELFK.DKuNoEK-RNIAVYSNN+DNLHLKFIStYLHQASIFHAVNPYGMsLuA.oPLl.DDTlIsKLRsAKINFYSLLNETGLDGlsAFKEGVDLuGsuIDEtFTYHYIKNEAIlELIRIWNKNNRQNSKLSALQLSGARDNAYTSAIECLLKRFlDRGLIlpYKsLpLTLSsTsQLKLELSVNITYNFSINuVuLVITTQDIV....... 0 6 6 6 +5448 PF05620 DUF788 Protein of unknown function (DUF788) Moxon SJ anon Pfam-B_9014 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 20.70 20.70 20.70 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.14 0.71 -4.40 30 317 2009-09-15 15:32:55 2003-04-07 12:59:11 6 4 279 0 218 302 2 158.30 25 90.41 CHANGED MAspusKKpAppNpphLptlphhhlshsslallh..hhhhspss..shhs.......hhlhshsthhshatLcphu+Ppas.........tpupLlcsG.DLpttG.h..hEYhhDllYlohhsplhshl..osKhWal.aLllPsauhYKlhs.hl..........hs.hhutssttttpt.............pspSKRQpKhE+Rtp+ ..........................................................AscutKphhppNpptLphhhhhhlsssslahll...hhhhsp..ho.h.shhh.......hhhhshshhhs.hh.hhpphu+Ppas..................psGp...Lhc.....uG..DLs.htG....l.........sEahhDllhlThhsplhshh..oshhWhh.aLllPsauhY.hlas.hh......................htsh.hstsst.ttt................ttppK+pp+hc++................................................. 0 76 119 180 +5449 PF05621 TniB Bacterial TniB protein Moxon SJ anon Pfam-B_9028 (release 8.0) Family This family consists of several bacterial TniB NTP-binding proteins. TniB is a probable ATP-binding protein [1] which is involved in Tn5053 mercury resistance transposition [2]. 20.10 20.10 20.10 20.10 20.00 19.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.89 0.70 -5.66 2 273 2012-10-05 12:31:08 2003-04-07 12:59:11 6 5 203 0 59 418 28 254.90 42 95.42 CHANGED MDEYPlIDLSHLLPAAQGLARLPADERIQRlRADRWIGYPRAVEALNRLEsLYAWPNKQRMPNLLLVGPTNNGKSMIVEKFRRsHPsuoDADQEHIPVLVVQMPSEPSVIRFYVALLAAMGAPLRPRPRLsEMEQLALALLRhVGVRMLVIDELHNVLAGNSVNRREFLNLLRFLGNELRIPLVGVGTR-AYLAIRSDDQLENRFEPMhLPsWEAN-DCCSLLASFAASLPLRRPSPIATLDMARYLLTRSEGTIGELAHLLhAAAlsAVESGEEAINHRTLSMAsYhGPSERRRQFERELM .............................................................................................................Hl...h...ht.hss..p.-...RI..phlc.tsRWI.GYs..pA....phLsp.Lps.L.hs.h.P.p.pt.R..M...P...s....L.LlVGsoNNGKo.....h...Il.c+.F....p.+...p...H..........s..........s........s....s....D....s....c...t....c.....t....h.............PVlhlp.h....P..spP.s.....h....+h...Y.sul.L...s...tht...............A..P...h.......+.s.....p.........s....p.....l.....s.c...h..c..p.s..l...t..L...lR....c.h....s.l+M..LlIDEl.Hs..l.L.u.G.ss..h..p.....p..R.....p.h....L...N....h...L+.aL..uN..E...L...p..IP..l....V....u..V..G....T..............c..............-A...h....h.......A....l....p....o.D.......s.....QLt...s...R......F.....-......s......h.....h......L..P...h...........W.....c.....h....s.....c........-.....h.....p.....p...L.....L...sSF.t..ts.L....P...L..+.+..s.S.s.l.s..s....h...-.h.AphlhshSp.GslG-ls+LLhsAAlhAlcoGcEtIstchlp.......................h................................................................................................................... 0 9 29 48 +5450 PF05622 HOOK HOOK protein Moxon SJ anon Pfam-B_8981 (release 8.0) Family This family consists of several HOOK1, 2 and 3 proteins from different eukaryotic organisms. The different members of the human gene family are HOOK1, HOOK2 and HOOK3. Different domains have been identified in the three human HOOK proteins, and it was demonstrated that the highly conserved NH2-domain mediates attachment to microtubules, whereas the central coiled-coil motif mediates homodimerisation and the more divergent C-terminal domains are involved in binding to specific organelles (organelle-binding domains). It has been demonstrated that endogenous HOOK3 binds to Golgi membranes [1], whereas both HOOK1 and HOOK2 are localised to discrete but unidentified cellular structures. In mice the Hook1 gene is predominantly expressed in the testis. Hook1 function is necessary for the correct positioning of microtubular structures within the haploid germ cell. Disruption of Hook1 function in mice causes abnormal sperm head shape and fragile attachment of the flagellum to the sperm head [2]. 35.00 35.00 35.00 35.10 34.70 34.70 hmmbuild -o /dev/null HMM SEED 713 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.52 0.70 -13.24 0.70 -6.72 9 557 2009-09-11 15:29:52 2003-04-07 12:59:11 7 9 169 1 312 507 1 523.20 26 58.15 CHANGED Ms...c.KhELC-SLlpWLQTFpluAPCpslp-LosGVAhAQsLpQIDPuaFsEuWLs+IKpD.VGcNWRLKsSNLKKILpulh-YYp-lLupplS-thlPDltpIuEpSDssELGRLLQLILGCAVNC-cKQ-aIQpIMsLEESVQ+sVMsAIQELho+cpssosus-shssh-pQL++hh.-LpEs.p-+-ELAQRCpEL-hQlhhL.EEKsuLhpENphLp-chsph-..........oh.sPsplsu++a.pLQpQLEQLQEEsaRLEuA+DDhRl+Cp-LEK-lhELQpRN-ELToLApEspuLKDElDlLRpuuDKsspLEupl-oY+KKLEDLsDLR+QVKhLEE+NshYhcpTlpLEEEL+KAsusRuQlEsYKRQVQ-LHsKLspESp+A-KhtFEh+pLcEKh-ALpKEKERLltERDoLRETs-ELRCuQhQtspLs.ts...ussu.ot-sLAAElhPsEhREpllRLQ+ENKhL+htQEGu.pp+.stLQphL--ANppppcLcoppRLsppRIhpLptQlE-LQKtLppQGu+s-s..sspLKpKL-tHhcpLpEsp-Elp+KpthlE-LpPctsps.spKIsELpsALppKDp-h+AME-RYK+YlEKA+pVIKTLDPK.p....AssEl.hL+p..........QLsE+DtclptLEp-h.cts+.hR-.EEpLIloAWYNhGhshQ+cAh-uRLsshus.........sGQSFLApQRpsTssRRshssp..s....souK ...............................................................l.h.Wl.po.....h........t..............t.................t...p.....h....pLssGhhhtplh.pI.sst..hs.......p.....p.l.p.p.p...sssshpl...+...h...pNLphllptlhpY.......hp-h..Lt.p......l.............lPs...l...........hIu.....c......p....s....s.........t.......E......l...t..................+L.L..Ll...LGCA.V........pC.ppKp.........caIppI.t.L....-.ssQt...........sl....hst...I.Q...E.....lh.p...........p......p....p.......s.....s.......p.........s....t......t....s.......t.p........h...-......t..t..........................p........p......h....h.....p.......l......p...c..h..ht....c..p..-.p.h..........t...p.......p.....h...t....-...h...p..p..l.....L.p.p...E.+s....s.L...............t..p.....p........h..t...p.p.h..s..p..p................................................................p.t..s.p.p.h.h.u.h...c....hh...p....h..p.t.p....lcpLpp....E..........c....h.....p...t..t.......h.Dh....+h.....ch.....p...ph..........-....t..c.....l..tclp...p.c.sp.p...............Lhs.....A....c.p...spth+DElD.LRppup+....ss+LEsplpph+.c.+Lp-..hp.h+tpl..ctLcEpN.t...hhhpp...p.h.L......E-pL....c.......t.s.p.......s.......t....p.s...p...l.....c.......p.......hc.+.......p...h.............p.Lptc.h.pp.ptp.ctp.....p..h...h-hp...p....lp.......cchp.....t....l.....cp.....p....t...p....l..........p.h...t....p....h.......p......p.......h......c.pL...c.hs.................t....t.....t.tt..............................st.s.p...s......h......s.t.El...............................t..h..p..p....p..hh....+.........L......p....hE......N.....p....L................t.........p.....t..............................................t..............h.t..........p....lc.pt...p...........t...h.t.....p.lc...pp............p............p.p........p.......h.....l......p.t.p...h....tp...p...p...t.....t.....p..tt..........pt...........p...t.p.h...pt.......th....p........ph.........t....p.t............................h..pp.....................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................................................................................... 0 90 122 209 +5451 PF05623 DUF789 Protein of unknown function (DUF789) Moxon SJ anon Pfam-B_9113 (release 8.0) Family This family consists of several plant proteins of unknown function. 20.60 20.60 23.40 21.40 18.60 20.40 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.98 0.70 -4.94 19 248 2009-01-15 18:05:59 2003-04-07 12:59:11 7 10 38 0 149 236 2 246.60 32 51.85 CHANGED oNLERFLcusTPsVPsphhsp...sshps.psh.....pppphsaFtLsDLW-sFcEaSAYGhGVPl.......pLssu.-sVhQYYVPYLSAIQl.....ass.pphhs...................................Rp.s-su-st......S-u.Scsp...................thtpt.psushspst.....tsphGplhFpYhEpssPasR.PLs-KlspLupcaP...........tLpoL+SsDLuPuSWhSVAW......YPIY+IPss.slcDLsuCFLTYHoL..uoshpss.sp......................................t.spclsLPsFGLAoYKhpGslW..................ss..pts...p-....ppphtoLhpuADsWL+pl........pV.pHsDapFF .............................sl-pFltsso.P.l.......hsp........tthpt.pth...............t.pt..hF.LsDlW-tacEhSuY..Gh..u..Vsl.........hLsss...c..plh..QYYVPaLSAIQl.asp..tt..............................................pt..pps.ptt...t..t..sps.st.t..............................................ptt.p.pt.........pt.s..hFpYhEpp..Pa...tR.PLh-K.........lppLupphs.............tLhohcosDL.PsSWhuVAW......YPIY+IP.s....s.pcl.s.ssFLTaHsL..usshptt.........................................................................................t..tphsLsshGltoY+hp..u.phW.............................s...t..p.......pphh.tsL.psAssaL+th.......................ps..pH.DapaF............................................................. 1 25 93 119 +5452 PF05624 LSR LISCH7; Lipolysis stimulated receptor (LSR) Moxon SJ, Mistry J anon Pfam-B_9152 (release 8.0) Domain The lipolysis-stimulated receptor (LSR) is a lipoprotein receptor primarily expressed in the liver and activated by free fatty acids [1]. It is thought to be involved in the clearance of triglyceride-rich lipoproteins, and has been shown in mice to be critical for liver and embryonic development [2]. 25.00 25.00 30.60 30.00 22.50 22.10 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.42 0.72 -4.23 3 176 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 39 0 70 137 0 48.90 70 8.91 CHANGED cWLTVLlIllGALLLlLLlGICWCQCCPHsCCCYVRCPCCPsTCCCPE+ ....-WLhVllVlLGuhLlhLLlGI.C.WCQCCPHoCCCYVRCPCCP-pCCCPc..... 0 3 9 27 +5453 PF05625 PAXNEB PAXNEB protein Moxon SJ, Mistry J, Wood V anon Pfam-B_9269 (release 8.0) Family PAXNEB or PAX6 neighbour is found in several eukaryotic organisms. PAXNED is an RNA polymerase II Elongator protein subunit [2]. It is part of the HAP subcomplex of Elongator, which is a six-subunit component of the RNA polymerase II holoenzyme. The HAP subcomplex is required for Elongator structural integrity and histone acetyltransferase activity [2]. This protein family has a P-loop motif. However its sequence has degraded in many members of the family. 20.50 20.50 20.50 20.70 20.40 20.10 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.14 0.70 -5.62 30 316 2012-10-05 12:31:08 2003-04-07 12:59:11 6 5 262 3 225 365 7 338.60 25 89.19 CHANGED oSFp++tht......................p..p......hP.....GlRPS.spsupsoTSTGsssLDplLuGH.GLPlGohLLIEEsuoT-auulLL+YFuAEGlVp..............spclaVsu.h...sppaspcLPGhhpsssppcccs.......................sppps-cMKIAWRYpplschpsussssp........................sasHpFDLTK+L.sps.hsslsalshsss.........................................................................................s.....asullpplpshIppp.........sssslhRlsIPSLLsPshYsspsupsp.......llsFLHuLRuLlRt.ss.psssllTlPhsLas+....ssuLsphlcpLsDuVIcLpPFstp.........cttsss....tK..pGLl+lpKLPsLs-+u.shhscps....DaAFKLuR+K.FpIEpauLPP--s-ppsppsst.....................sstsscpsLDF ....................................................................................................................hs.GhRsS.s.hs.u.p...hsSTGhssLD.plL...uG............GLslG.s..lL...............l.E...E..........st.............hs.s......authLh.+hFhApGllp..............spplhlhu.h...........stthhpp...L.......Pu.h..tssptpp...p.p.s...............................................................s.pp..ppphKIAWRYpth.sphpss.sssp................................................tasH.taDlo+ph...p.h.....tsph.hhh....t................................................................................................................................................htpllpplpp.hlppt.................ttpplhRlsl.u.LhuPhh...h..s.s.p.....t..sp.t......................lhpFLasLRuLlR..........s.........s.h.....s..........sshlTl.PhpLh...........sssl......hthlpplsDsllpLpsFstp.................pttts.....hct.p...Gll+lc+lPhh...sshs...shhspst.........................-h.uF+lpR++.FhIc.hplP.s..sptttptttt......................................................................................................................................................... 0 83 128 187 +5454 PF05626 DUF790 Protein of unknown function (DUF790) Moxon SJ anon Pfam-B_9309 (release 8.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 20.20 20.20 20.60 20.60 19.60 19.90 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.14 0.70 -5.94 13 146 2012-10-11 20:44:44 2003-04-07 12:59:11 6 3 123 0 80 152 24 374.90 31 88.07 CHANGED MLPpELLcs+.tpcGcIhPpaus....-cl-LAcpVIphF+pplGcphG-LpcclcplE.ptpNYKhVRGhA+lL.....cRp.spF-ssoslDPhclRchLFct....G.ssocpERpclLpc..........sAcch......sso.p-lE+ulaADhEEE+lLsch..............................PsIoP--Ll+pYNLSLlQTLlFcAhclsl....plsss...aKcllRtIKtLGLMYpl.s.............................................t.t.hplclsGPASLh+hoc+YGsulAKLlPtllpscpWpl+A-ll......pscRlYpFcLsSppshhh.chppt......................................................aDSslEcpFspchptlh.uhclhREP-llplGppsaIPDFhlp+.ssp+VYVEIVGFWT.EYL++KlEKl+csshshLlllscE.L....t.tphchsspcVIhF++.KIshscVhthL+ ......................................................MLsp-Llph+..hpstplhPhahs.....pphpl...ApclIphFpttlGps..pu-LpcplppLp..p.................t.............Da....+lhRGLu+lL.................ccp.spFE.h.hu.sl-PtclRpplFpt..........sss..h.......p.......p.....p......R..ptsLpp..........lAppL......tl...pclpcuhYADLc-pphLsp.h...............................s..s...sP-pLlcpYNLuLsQsllacAsclpl.....pst..ss....aKtlh+h....lKhhtL..Mahlpt.............................................tsp.shplpl-GPsSLh.ctop+YGhphA+hlPsll.pssc.....WplpAplt............p....ts.p.+shphpLss.p.s.shh.s..c..h.s.s..s..p....................................................aD.StlE.psFupca..pp....h.....t.....p..sWpl.REs-hlsls....s..p.VhIPDFt.l..........p..................+..s.......sp.......chhlEIlGFWssEYLc+Khppl.cps.p.t...s...Lll.sssE..L....ussph..pt....hs..t.pllhF+p.plp.psVhphl................................................ 0 18 52 69 +5455 PF05627 AvrRpt-cleavage NOI; Cleavage site for pathogenic type III effector avirulence factor Avr Moxon SJ, Coggill P anon Pfam-B_9342 (release 8.0) Domain This domain is conserved in small families of otherwise unrelated proteins in both mono-cots and di-cots, suggesting that it has a conserved, plant-specific function. It is found both in the plant RIN4 (resistance R membrane-bound host-target protein) where it appears to contribute to the binding of the protein to both RCS (AvrRpt2 auto-cleavage site) and AvrB, the virulence factor from the infecting bacterium [1]. The cleavage site for the AvrRpt2 avirulence protein would appear to be the sequence motifs VPQFGDW and LPKFGEW, both of which are highly conserved within the domain [3]. 20.30 20.30 20.80 20.30 20.10 19.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -7.88 0.72 -4.91 22 371 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 42 2 166 352 0 36.90 43 27.04 CHANGED spcpspslPKFG-WDsss.PASA-GFTVIFsKsR--Kcos ......t...ptssVPKFG-W.D.sss...susupsaTslFsKsR-cKps.s... 1 22 94 132 +5456 PF05628 Borrelia_P13 Borrelia membrane protein P13 Moxon SJ anon Pfam-B_8766 (release 8.0) Family This family consists of P13 proteins from Borrelia species. P13 is a 13kDa integral membrane protein which is post-translationally processed at both ends and modified by an unknown mechanism [1]. 25.00 25.00 27.00 27.00 19.50 23.20 hmmbuild --amino -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.79 0.71 -4.36 5 121 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 37 0 9 87 0 126.40 51 78.98 CHANGED TsLpYESpKsslL.APFLLNLFLoFGIGSFVQGDYIGGGAlLGoQVLGGILllTGhIlss....ssssssp.huIoGtlLhGIGsLTIAuSYITSIIIPFTFANRYNANL+K+LGIuLAGFEPNFDIGIN.....G...FQLSFKKSY .............hhhY-opKpssl.sPFLLNLFLsFGIGSFsQGDhlGGuhlLGF.....shlGhhL.hhsGhh..L.sh..............s.spcs...hhhGthlh...hhGssThsso+lsplIlPFTFANpYNcpLKppLslsLuGFEPsFDluhs.....G.....FpLSFKKSY....................... 0 5 7 7 +5457 PF05629 Nanovirus_C8 Nanovirus component 8 (C8) protein Moxon SJ anon Pfam-B_9354 (release 8.0) Family This family consists of a group of 17.4 kDa nanovirus proteins which are highly related to the faba bean necrotic yellows virus component 8 protein whose function is unknown [1]. 25.00 25.00 151.80 151.70 17.50 17.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.07 0.71 -4.42 3 31 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 9 0 0 30 0 151.80 70 100.00 CHANGED MADWFcSsLKTCTHVCDFscIuuDS...QQ-lhCCDSM+GKLp-PRKVLLVSCpVSFNGSFYGuNRNVRGQLQlSMp-DDGVsRPIGYVPIGGYLYHNDYGYYEGc+TFNLDIESDYLKPDEDasR+FpVSIlN-NGLD-pCDLKCYVVHSlRIKV M..DWAESQaKTCTHGCDWKsISSDSu-NRQYVPCVDSGsGR.KoPRKVLLRSIEssFNGS..FpGNNRNVRGFLYVSIRDDDGtMRPVLlVPFGGYGYHNDaYYFEGcupspCDI.uSDYlsPs.DWSRDMEVSISNSNNCN-.CDLKCYVVCSLRIKE 0 0 0 0 +5458 PF05630 NPP1 Necrosis inducing protein (NPP1) Moxon SJ anon Pfam-B_9369 (release 8.0) Family This family consists of several NPP1 like necrosis inducing proteins from oomycetes, fungi and bacteria. Infiltration of NPP1 into leaves of Arabidopsis thaliana plants result in transcript accumulation of pathogenesis-related (PR) genes, production of ROS and ethylene, callose apposition, and HR-like cell death [1]. 21.00 21.00 21.10 21.10 20.30 20.90 hmmbuild --amino -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.56 0.70 -4.60 27 493 2009-01-15 18:05:59 2003-04-07 12:59:11 6 12 160 3 323 429 0 179.10 32 74.56 CHANGED sssshthcapPhlch.scGChPhPAlsssGssuGGLps........o.Guh................................suuC+-put..suQsYuRusp..N..G..hhAlMYuaYF.KDpshs..shGuHRHDWEaVVVWlss....ss.plhtVosSuHGsappt.s.sssph-Gs+sKlsYapshss.sHshchsssss-..........p.sLlsW.....sshss.ssRctL.......pssDFGsA.......slPh..KDusFtspLspA ...........................................................................s..phhhpapP.Lch..s.sGChsaPAVss.sG.s..s...uu.G..Lps.........o...Gs..s.............................................s.usCpsss....uQ.......l........Y..sRush..s....s......hhA..IM.YuWYFPKDp.s.....ss...............s..h....G.HRHDWEpl.l.V.Wlss......................ss..pl...h.u.l..u...sS...s.H.u..s....app.............t............s..........s............h.....s....G.....s..psp.ltYhtsh.h......sHthphsss..s........G.....p.....................htsLlhW..............-thss....suRsu.L.......p.s.s.s.FG.p.A........p.h.Ph..pDs.sF.tpLtt.................................................................. 0 86 163 303 +5459 PF05631 DUF791 Protein of unknown function (DUF791) Moxon SJ anon Pfam-B_9328 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.28 0.70 -5.76 3 263 2012-10-03 03:33:39 2003-04-07 12:59:11 9 6 145 0 180 646 62 301.10 33 72.54 CHANGED FYYLVFGGLAAVVAuLELuKouKDRlNToPuFNSFKNNYLLVYSlMMAGDWLQGPYVYYLYSoYGFuKGDIGpLFIAGFGSSMLFGTIVGSLADKQGRKRAClTYCIlYILSCITKHSPpYKVLMIGRlLGGIATSLLFSAFESWLIAEHNK.............................RGF-sQWLSlTFSKAlFLGNGLVAIISGLFANLLADsFGFGPVAPFDAAACFLAIGMAIILuTWSENYGDsSDsKDLlsQFKVAApAIASD.plhhLGtlQSLFEGSMYTFVFLWTPALS..PNDEEIPHGFIFATFMLASMLGSSlAuRLLuR+.LRVEuYMQIVFLlSAsoLhLPIVTshLVsPSpVKs-GLSLsuulQLLGF ......................................................................................................................................t....sF.pF..p.pal...hVY..L..h..h......su...D....W....LQ....GP...Y.l.Y....h.....L.Y.p.p..Y.u..hsct....p.....I.u.h.L...a...l..s.Ga..u.S..uh.l.h.....G.s.h..s.....G.....s.....L.....u.Dp.h.G.R+p.u....C.l.h..a..s.l.h.Y.........l..s....C.....l......o..............K..h.s.s..p.a..h.....l.L.h.....l.GRl.L..G.Gl....u.T.....o..LL..F....S...s.F.E......u...Wh..l..t..E...H.t.c...................................................................................+..s.a...s....t..p...h..L..sh..T.F.u.pu.s.......hh.N.ul..l..Alh.uG.l..hup....hl.s..s.........h.........h..u....h............t.............lu.PF..sA..h.shL...s...l.u..hs.l....lh.tsW....s..E....N...a.G..s.............................t......p...p..........s.....h...........................t..p.h.....t...t........u...h........p....s............l...h...s........D............p+....l....hhLGh.h..Qu.lFEu.uh..Yh..F..VFLWTPs..Ls.............spt....t...........t....l....P....h...Gh....lFus.FMhupM....lGSslh...hhhs........p.p....hp..s...t..hh..hshhh..sshsh.h...................................................................................................................................................................................................................................................................................................... 1 75 112 149 +5460 PF05632 DUF792 Borrelia burgdorferi protein of unknown function (DUF792) Moxon SJ anon Pfam-B_9387 (release 8.0) Family This family consists of several hypothetical proteins from the Lyme disease spirochete Borrelia burgdorferi. 25.00 25.00 36.90 36.70 23.10 22.30 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.46 0.70 -4.99 2 136 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 28 0 10 109 0 194.90 67 97.49 CHANGED hsIKNKN..NsNhEpKK.tEh.p-.......EIhpII+DlhTQlFsLFGADNFLlLFPR.Dh+GFGYVPQLFFIKPKTpLIoRTYNTSCSKRPsINYYDRKAEYVSYNsVMTGEpISLsGGILTShYKDMLSlhKhTVFGNhhhRFDuHLsKEQLANRlQAQVPFoIYSPTFGLKELAlITsLoFKDhPFIDEVEVSLShElVKTFcLEKYKG ........................................pph.tp...p.......EIo+II+DVlTQIFALFGADNFLVLFPRhDLKGFGYVPQLFFIKPKsELIoRTYNTSCSKRPV.INYYDRKAEYVSYNPVMTGEpISLNGGILTSLY.K-.MLSlLKMTVFGNohLRFDuHLsKEQLANRlQAQVPFSIYSPTFGLKELAlITSLoFKDTPFIDEVEVSLSlEIVKTFs..LEKYKG...... 0 6 6 6 +5461 PF05633 DUF793 Protein of unknown function (DUF793) Moxon SJ anon Pfam-B_9395 (release 8.0) Family This family consists of several plant proteins of unknown function. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.40 0.70 -5.71 7 188 2012-10-01 23:20:42 2003-04-07 12:59:11 6 6 24 0 111 233 0 230.20 22 80.18 CHANGED hPAT-aQu........hhShLSlRRsQhss......ssspppE.p....EL-sFQ++VAE+FhDL........usSs.........tslLSlpWltKLLDsFLsCppEF+uIlhstts..QISKsP.hDRLls-hh-RulKALDlCNAlhsGl-plRQap+hhEIsloALcpp........PLG-GplRRAK+ALhsLsIuh.s-....Ksuuuusss..ppNRShsRshs.........ht+RohG............ph+S.oWuVSRsWSAuKQlQAhsuNLssPRus-ssu...Ls.PVYhMoSVhlhVMWsLVAAlPCQ-R.GLtsHhs.sP+phpWAuslhSla-+Ih-E.K++-+Kp.usGLhcEhpphE+hu+.L.EhsDuhcFPhs--c.p.EVtpcVpEhhplCcthcsGL-PhpRpVREVFHRlVRSRoEhL-sL ................................................................................................................................................................................................h..hsh.ah..hh..h....ttht.hh...........h........-chh..hh-.sh+hLDlh.sht.tlt.l..pt..phhh.hsht.h....................................................................................................................................................................................shh.ht..hhhh.h.hhs.hh....s......................t......au.sh..l.p.l.t-.t...........tt............................h.htEht.hc.....h............hht............t................................................................t.....h...h........................................................................................................................................................ 0 21 78 93 +5462 PF05634 APO_RNA-bind DUF794; APO RNA-binding Moxon SJ, Eberhardt R, Barkan A anon Pfam-B_9606 (release 8.0) Family This domain contains conserved cysteine and histidine residues [1]. It resembles zinc fingers, and binds to zinc [2]. This domain functions as an RNA-binding domain [2]. 25.00 25.00 33.30 25.70 19.80 17.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.32 0.70 -5.13 7 186 2009-01-15 18:05:59 2003-04-07 12:59:11 6 9 21 0 116 189 0 158.30 36 77.96 CHANGED Ls......ptht.ts.spppp.psp.sDlP.....+.c+KPaPhPhKclpcRAKEchp.tp.t.p+.L..PPcNGhlV.pLVPVAcpVhpARphLlpsLspLl+..VVPVpsC+aCsEVHVGshGH.h+oCcG.ssstRpu.HpWssGslpDVllPlEuYHLaDphs+.RIpH-pRFshPRlPAllELCIQAGV-lP-aPs+RRppPlh.htt.cllD ...........................................tht.............................................................................thh.lApcshpA.aptlhpGlp+Lhc...hhsVpsCtaCsEVHVGshGHph+....h....Ctuh.cpptRsGpHsWppusl-DllsPh.saHlhD.h..Gt.......lp..c-.hRh.Ys+hPAVV....ELClQAGspl.P.c.............................................. 0 10 68 94 +5463 PF05635 23S_rRNA_IVP Ribosomal_S23p; 23S rRNA-intervening sequence protein Moxon SJ, Eberhardt R anon Pfam-B_9648 (release 8.0) Family This family consists of bacterial proteins encoded within an intervening sequence present within some 23S rRNA genes [1-3]. It folds into an anti-parallel four-helix bundle and forms homopentamers [4]. 23.30 23.30 23.40 23.30 23.00 23.20 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.20 0.72 -4.15 122 1034 2009-09-11 08:57:14 2003-04-07 12:59:11 6 5 486 10 434 1051 267 104.30 26 82.27 CHANGED psac...-LclWp+uhcLshplYc.lopshP.p.cpauLpsQlcRuulSIsoNIAEGhuRp..osp-ahcFLtIApGSssElcopLhluhclsalsp.pph......ptlhpphpclp+hlsuh .................................hppl.happuhphshplap.hs.p...ph..s.p...p....c...p..a...s.L.ss.QlpRu..........usSlsuNIAE.Gh..s...+p..opp-ahphLpIA.......tuShtE.hph.L..ls.hc.hs...a..l.s.p..p.ph......pt.lhpphpplhphl.t.h.................................................................. 0 178 349 412 +5464 PF05636 HIGH_NTase1 DUF795; HIGH Nucleotidyl Transferase Moxon SJ, Anantharaman V anon Pfam-B_9692 (release 8.0) Family This family consists of HIGH Nucleotidyl Transferases 19.90 19.90 19.90 19.90 19.80 19.60 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.40 0.70 -5.43 10 1483 2012-10-02 18:00:56 2003-04-07 12:59:11 6 2 1444 1 204 1046 11 364.90 35 97.14 CHANGED McssGIIsEYNPFHNGHhYHLppucc.spssltIAVMSGsFlQRGEPAIlsKWpRucMALpsGsDLVlELPssaSsQuA-aFApGAVcILspLu.l-sLsFGSEpsslccapclAclhs-+ttchcphl+p.L.scGhSYPpuppcsac.....phtG.c.thp...htoPNNILGlpYsKAlhptspsIpshoIKRpuAsYHDt-l...ccphASATuIRptl.......................tpclctlc+hVPssohcll....p.hsshsshscaFshLKYpllt.osccLcsIapVsEGl-sRlh+ulppupsac-hlcLlKTKRYThoRlpRlLTalLlNhcp.........psstssaI+lLGFocKGQpaLpphKKshs.....lPlIT+luptshct.....hph-l+AopIYpLs.....ppshspp-Fs+sPI ...........................................MphsGlIsEYNPFHNGHtYtlppu+p...t.s...s...l....h.I.s.lMSG..NF..lQ..RGEPAIlsKWsRAcMALpsG.sDLVlELPhhhulQuA-hFApGAV.plLstLu...l-.s...lsF.......G.o.E...ps.plps...hpplAchhp..cp....t...t..ph.p.ph.lcp.h.p.p.u.hSYPpthp.t.hh.p.........................p.h.h.t......p...hphs............tsPNsILGlpYhKAl...t.....h..s..p..s..IphhsIpRpu.....usaHst....ph...............ctp......hAS....ATuIRptl..........................................................p..p......s..t....l.cp.h....lP...psshthh........................s.....s...h...h..s......h..c...pa.......as..h....L........+.Y...pl...l..........t...otp..c......LpsIap..l..sE...Gl....-sRl.........pps....l....p..p.......u....p......o......hcc.....hlctl.KTKRYThsRlpRllhalLlshp...t...........................tppt.sp.hl+lLGhsc+Gp.paL+....plKpp.........lsl....l...o..+l.sp...p.......s.h..ph........................hthph+As..p..lYph........st.....ppsasp.s............................................................. 2 79 138 173 +5465 PF05637 Glyco_transf_34 galactosyl transferase GMA12/MNN10 family Wood V anon Pfam-B_6000 (release 8.0) Family This family contains a number of glycosyltransferase enzymes that contain a DXD motif. This family includes a number of C. elegans homologues where the DXD is replaced by DXH. Some members of this family are included in glycosyltransferase family 34. 20.40 20.40 20.50 20.40 20.30 20.10 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.58 0.70 -4.86 14 639 2012-10-03 05:28:31 2003-04-07 12:59:11 7 9 211 4 419 634 57 209.70 22 55.28 CHANGED pssclVllhuSs.pss.....s..tp.hltpslcN+l-YAc+H..........GYshhahpssshhh...thsss....WuKhPhl+psMpcaPcAEWIWWlDpDAllhshphsl.chhh..phLpphhhpp...........sshpph.cpshs.cupshhhlhspDasGlNuGSFLlRNspauthllDhWs..............-Phhtppsts...pcEQsALsallttHsplhsphuhls.................+hlNuYp.ut.t...............................hs.c-GDhllHFsGCpstssCt ..............................................................................s....hlhlhh.p.t.h............h..p.hpN+hcYschH...................Gaph.h.h.....tshhp.........tphtt................Wt.Kls..hlRphh...h......taP..c...sc......ahaalDt-shhhs.shp.........................................s.pth.p...h................p..s.....s..lh......p...p....shtu...l..NsGsFllR..s........spWu....h...hL.-.h.Wh...........................h.hh......t.h.......tt-.QsAl.....hl....h...t.............p................h....htp..hs.hl..............................................tchh.p.t...a..t.................................................ht..pch.s.hlsphsGCp.t............................................................................................................................... 0 104 243 356 +5466 PF05638 DUF796 Protein of unknown function (DUF796) Moxon SJ anon Pfam-B_9698 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 21.80 21.80 21.80 21.80 21.30 21.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.75 0.71 -4.15 152 2770 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 1013 13 450 1429 75 135.10 28 72.81 CHANGED alpl.s...G.....Is.........G.pstpp..s.HccpIplhuasasls.shs.......sutssGpsshpslphsKhlD+uSPhLhpuhss.Gcplppsplphh+.....ssG............p.t.cYhplpLps.shlsslsssss.............tt.hshEslslsYspIphpap ............................................Ylpl.s..G.......Is..............G..pshtp....s..Hcsp..I.lhuapasls....s..........su.....tsuGps...shpshphsKhlD+uo.PhLhpuh....ss......Gcpltp.....splph.hR......sssG.............p....cYhphpLps.shlsslshphs......................ttphp.hEplslsYpcIphpa.h......................... 0 69 183 306 +5467 PF05639 Pup DUF797; Pup-like protein Moxon SJ, Bateman A anon Pfam-B_9797 (release 8.0) Family This family consists of several short bacterial proteins formely known as (DUF797). It was recently shown that Mycobacterium tuberculosis contains a small protein, Pup (Rv2111c), that is covalently conjugated to the e-NH2 groups of lysines on several target proteins (pupylation) such as the malonyl CoA acyl carrier protein (FabD) [2]. Pupylation of FabD was shown to result in its recruitment to the mycobacterial proteasome and subsequent degradation analogous to eukaryotic ubiquitin-conjugated proteins. Searches recovered Pup orthologs in all major actinobacteria lineages including the basal bifidobacteria and also sporadically in certain other bacterial lineages. [1] The Pup proteins were all between 50-90 residues in length and a multiple alignment shows that they all contain a conserved motif with a G [EQ] signature at the C-terminus. Thus, all of them are suitable for conjugation via the terminal glutamate or the deamidated glutamine (as shown in the case of the Mycobacterium Pup [1]). The conserved globular core of Pup is predicted to form a bihelical unit with the extreme C-terminal 6-7 residues forming a tail in the extended conformation. Thus, Pup is structurally unrelated to the ubiquitin fold and has convergently evolved the function of protein modifier. 20.10 20.10 20.10 20.10 19.50 19.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.44 0.72 -3.48 15 351 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 337 8 108 262 19 65.20 44 96.99 CHANGED Mus+csGGGQpcsscptpt.t.....ssssussptRpcclo-DVDDLLDEID-VLEpNAEEFVRuYVQKGGp ............................ttt...........ttpt.p.......ptt....stssssuppppc..ch.s.--l.DDLLDEIDsVLEpNAE-FVRuaVQKGGQ.. 1 32 78 100 +5468 PF05640 NKAIN DUF798; Na,K-Atpase Interacting protein Moxon SJ anon Pfam-B_9801 (release 8.0) Family NKAIN (Na,K-Atpase INteracting) proteins are a family of evolutionary conserved transmembrane proteins that localise to neurons, that are critical for neuronal function, and that interact with the beta subunits, beta1 in vertebrates and beta in Drosophila, of Na,K-ATPase. NKAINs have highly conserved trans-membrane domains but otherwise no other characterised domains. NKAINs may function as subunits of pore or channel structures in neurons or they may affect the function of other membrane proteins. They are likely to function within the membrane bilayer [1]. 20.00 20.00 21.20 21.30 19.40 19.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.50 0.71 -5.36 13 270 2009-01-15 18:05:59 2003-04-07 12:59:11 9 2 79 0 133 229 0 164.50 53 73.51 CHANGED MGsCSG..RCoLlhlCsLQLlsALERQVFDFLGYQWAPILuNFlHIllVILGLFGTlQYRsRYllsYslWssLWVsWNlFIICFYLEV.GpLS+.............................DoDL.LTFNlShHRSWWhEpGPGClsp.ls..sushshcs++hloVsGCLLDYpYIEVlHSuLQIlLALhGFlaACYVsplhh-EEDSFDFhsu..................PhYho....s .......................................................ls.sLERQlFDFLGY...QWAPILuNFlH.IlhV...ILGlFGTlQYRsRYlhsYs.lWhsl.....WVsWNlFIICFYL.EV.GsLS+.............................DoDl...hTFNhShHRSWWhEpGPGChhp.s...ss...................hs.cspphloVsGClL-apYlElhHSu...lQIlLA.LhGFlaACYVsphhhEE..EDoh..ch.............................h........................................ 0 21 34 71 +5469 PF05641 Agenet Agenet domain Bateman A anon Pfam-B_2551 (release 8.0) Domain This domain is related to the TUDOR domain Pfam:PF00567 [1]. The function of the agenet domain is unknown. This family currently only matches one of the two Agenet domains in the FMR proteins [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.17 0.72 -3.82 34 609 2012-10-02 16:56:36 2003-04-07 12:59:11 7 25 73 4 302 580 6 67.00 32 12.42 CHANGED hppGstVEVpSccpsh..csuWatAhllpt.sct.....chpVcapshphpstts..............hp-plshpp.....l....RPsPP... ..................tcGscVEV..h..S.cs.ssh...suWahApVhhhpuc..............hahlcYsshs.ss..........................hsEhVshcc...................l..R.PssPs..t............................................... 0 52 141 206 +5470 PF05642 Sporozoite_P67 Sporozoite P67 surface antigen Moxon SJ anon Pfam-B_8657 (release 8.0) Family This family consists of several Theileria P67 surface antigens. A stage specific surface antigen of Theileria parva, p67, is the basis for the development of an anti-sporozoite vaccine for the control of East Coast fever (ECF) in cattle. The antigen has been shown to contain five distinct linear peptide sequences recognised by sporozoite-neutralising murine monoclonal antibodies [1]. 26.10 26.10 27.70 27.30 26.00 26.00 hmmbuild -o /dev/null HMM SEED 727 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.25 0.70 -13.24 0.70 -6.67 3 31 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 10 0 10 32 0 320.00 20 85.01 CHANGED MpIlHFLLTIPVIFVSGGDKMPAGESPRTSKPSPLVTLESAlTQPSKDP...FKTVuALSKATKVWKSAVSlSDDSKTVPTPVSEPhITRSFQEP..................VSQESElQs-TEhNpuscGS+oDSE-DDD...............................................-pEEEDNKSTSSKsGKGSpKu.QPGVSoSSGSTTSsTssoTslSQTGLG..uSGSH....AQQDPuVu.sGV.........VGVPGLGVPGVGVPGsG...GsGshPGVGVstsGVuPGVGVG...........................G.GGV..........................PGVGl......................................tSsoSpEG--sDDpEc-t-s+shp.......................................................................PGVGlPGVtV...........GsoTSSoSTToPSoSTTTTTPoSSG.PSs.Gu.GoSsRNAVTRpTDSISGPIPSPG-PRAITGQM..........GEcEpFAVQFLGDFKPKPRRYEGscT-ssKLKKFIFEEVKSLVpTLINLKLAIANDFVEITEKLKKpNQNHVPKLKLLKGsQFDTKQKVANVLKGFNSLYFVlFMNLNLAKEVN+PEELAEhLWKLNTIPDKVu+EFELAlEKTKuSEKKsELEEAFKSIslGFKIAYYATNDILSSITNSVYsLIKLKNFGDDFVTEVRKSLQMVPHQKNLNGSSFIVKISEhMNKKGTEsQDQTSGuGS..............KGTEGGSLRG.DLTEEEVLKVLDELVKDVSEEcVGIGDLSDPSSRSPscKPAcLGPSLVIpNVPSDPoKVTPTQPSNLPQVPTo.....G.GNGT-Gs-QsPuusNG......................oGNGEGGKDLKEGEKKEGLFQKIKNKlLGSGFEVASlIIPMTTIIFSIVH ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ss....................... 0 1 5 5 +5471 PF05643 DUF799 Putative bacterial lipoprotein (DUF799) Moxon SJ anon Pfam-B_9829 (release 8.0) Family This family consists of several bacterial proteins of unknown function. Some of the family members are described as putative lipoproteins. 20.20 20.20 21.60 20.30 18.90 18.70 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.47 0.70 -5.02 3 303 2012-10-01 20:48:06 2003-04-07 12:59:11 6 5 289 0 76 216 14 207.60 45 91.96 CHANGED MKPLILGLAAVLALSACQVQKAPDFDYTSFKESKPASILVVPPLNESPDVNGTWGMLASTAAPLSEAGYYVFPAAVVEETFKQNGLTNAADIHAVRPEKLHQIFGNDAVLYITVTEYGTSYQILDSVTTVSAKARLVDSRNGKELWSGSASIREGSNNSNSGLLGALVGAVVNQIANSLTDRGYQVSKTAAYNLLSPYSHNGILKGPRFVEEQPK .......................................................................................h.hh........h.....hs.s..sl..h..L..ouC.s....st.....s...p.h..D.Y.o..uF..KpSKP+S.ILVLPPlNcSsDVpAoauhLo.psTh.P.LA.E..uGYYVhPVA.lV-ETFK.QNGLTsAuDI..puls.PsKL+cIFGADAsLYloVspYGTsYpllsSsTpVoAsA+LVDLRoGchLWs..G..S..A...o.s..p...-........s.u..s..s..u...s....u....G.....L.lGhLVs....AsVpQIsso...loD.cuapluuhsuspLLSs..sp.s...G.lLhGPR........usth........................................................... 0 15 39 58 +5472 PF05644 Miff DUF800; Mitochondrial and peroxisomal fission factor Mff Moxon SJ, Eberhardt R anon Pfam-B_9868 (release 8.0) Family This protein has a role in mitochondrial and peroxisomal fission [1]. 21.10 21.10 21.80 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.72 0.70 -4.74 9 222 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 83 0 105 189 1 189.30 36 94.98 CHANGED huE.scl..EhcYTcsISppMRVPc+lKssush.--p.hhspsss..........cph.MpVPERIVVhGpspch.shStPR-lpL.pSs....P..uhlplpTPPRVlTLo-+.hs.h-.Estssssss...........spE.+shsph+Rtpusucsu.hppsupls+sD..............uhh...................s......u....p.........................shsshssslEshosc.tls...sLRRQIlKLNRRL.hlEtENcpRtpREhlhYulslAaaLlpoahWLpR ...................................................................................................h..p.tasptIsppMRlPcpl+sss.....t.....p.......t........hpp.t...............tph...MpVP-RIllh.Gpstc................hs...P.t.-ltl.puh........P...t.ltlpsPPpllTLscp....hs.h.p.pt.....t..................p..h..s.phhc.ts....s........tptt...ptt.......................................................................................................................................................................ths...hts.hp...shs.c.plh....LRRQlhKLNRRl.hLE.pstpRtpREhlhYslsluahllpsahWhpR..................................................... 0 24 33 63 +5473 PF05645 RNA_pol_Rpc82 RNA polymerase III subunit RPC82 Moxon SJ anon Pfam-B_9884 (release 8.0) Family This family consists of several DNA-directed RNA polymerase III polypeptides which are related to the Saccharomyces cerevisiae RPC82 protein. RNA polymerase C (III) promotes the transcription of tRNA and 5S RNA genes. In Saccharomyces cerevisiae, the enzyme is composed of 15 subunits, ranging from 160 to about 10 kDa [1]. 21.90 21.90 22.30 22.00 21.80 21.10 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.51 0.70 -4.85 29 304 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 256 2 191 294 1 231.10 19 43.22 CHANGED sthhcLhpppal.plsp....s.....t.h.tpthpph..ssshsphcpps-sc.phptphtshhppsp.sh........t...t.ht..suh+t.......................hcssls..hphNh-+FphphRsptLlphs........................cs+lGpsouplYcthL+hhcppssshpts.............................shshosp-lt+tl.spshslpts............................................................................................................phlspaLpLLusss.htFlpchus...GpahVsapclhppL .........................................................................................................h.pLhtttal.ps.......................t....h...tt........s.....t.ph....hpt.....p..h...h.....htt..htphhpt..sph.t..............pt.tt..hs.t.sput+ptt.........................................pts.h..-sslh..hplNa-+FthphRsptllshs..........................ps+hspsouplh...cshLchs..chpssp.ht.........................................ohshohspIhcpL...tphs..lsts............................................................................................................................................................................................lppaLplLspss....hhtc.....t.......s.ahl.hpphh.............................................................................................................. 0 53 97 151 +5475 PF05647 Epiglycanin_TR DUF801; Tandem-repeating region of mucin, epiglycanin-like Moxon SJ, Coggill P anon Pfam-B_1480 (release 8.0) Pfam-B_13922 (release 26.0) Repeat The unusual mucin, epiglycanin, is membrane-bound at the C-terminus but has a long region of this tandem-repeat at the N-terminus [1]. It was the first mucin identified to be associated with the malignant behaviour of carcinoma cells [2]. Mouse Muc21/epiglycanin is thought to be a highly glycosylated molecule, which makes it likely that its function is dependent on its glycoforms. Cells expressing Muc21 are significantly less adherent to each other and to extracellular matrix components than control cells, and this loss of adhesion is mediated by the TR portion of Muc21 [3]. This family also now contains the repeat that was the C. elegans protein of unknown function (DUF801). 27.00 14.00 27.00 14.00 26.70 13.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -9.97 0.72 -4.17 6 290 2009-09-10 21:16:35 2003-04-07 12:59:11 6 13 12 0 150 263 0 55.90 52 110.76 CHANGED sossuoSTsSuuASTssNsuSS.sTSuuTSTsosouS.SsTSSGoSTAsNsASosTTuuSuTtssTGTpTTo ......tstuST.sSs.usSTuTNopSS..sTS.SGsSTsTNScS.STTSSGuST....ATNStSSsTs....................... 0 114 114 114 +5476 PF05648 PEX11 Peroxisomal biogenesis factor 11 (PEX11) Moxon SJ anon Pfam-B_2629 (release 8.0) Family This family consists of several peroxisomal biogenesis factor 11 (PEX11) proteins from several eukaryotic species. The PEX11 peroxisomal membrane proteins promote peroxisome division in multiple eukaryotes. 23.80 23.80 23.80 23.80 23.60 23.70 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.33 0.70 -5.08 61 883 2009-01-15 18:05:59 2003-04-07 12:59:11 9 7 300 0 540 829 5 206.20 18 85.16 CHANGED lsphlcalsposGRDKlhRslQYsu+hluhhh.pp...s..stphsshhcpl.....csphohsRKhhRhh+slstlpsutthh..sp.t......Dsll+hhsllppluhuhYhshDplsWlsphGlhpspstp....+hsphusthWhhuLlhullpslhplhthppphpphtt........................psscsspptp.................phtpcphshhhsllpshsDlhlslstL...uhl...phssshlGlsGhlSSllGlhsha ...........................................................................h..hhtp...ut-+hh............+h.hpah.sphh..h....................................t.t.............tt.httl.........................ptp...huh...sRp..............h...hRhh....p.........l....phhp...thh..th...........tp......................s..h..hphh...t.......h..h...p...p..hs.shYh...h...h....-pls....a.......l..s.....p..h....Gl.....hs......hpttt...................phtp...........h.ush....h....W.h.hullhulht....p..hhplhth.tpptp.phtt.......................................t........p.........................................................thhppp.h.hhhsllp..shsDhhhslstl......thh........ths.sshlGhhGhhSulhuhhp....................................................................... 0 168 289 432 +5477 PF05649 Peptidase_M13_N Peptidase family M13 Studholme DJ anon Swiss-Prot Family M13 peptidases are well-studied proteases found in a wide range of organisms including mammals and bacteria. In mammals they participate in processes such as cardiovascular development, blood-pressure regulation, nervous control of respiration, and regulation of the function of neuropeptides in the central nervous system. In bacteria they may be used for digestion of milk. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.46 0.70 -5.42 156 3507 2009-01-15 18:05:59 2003-04-07 12:59:11 8 35 1646 10 1445 3238 341 342.10 22 54.54 CHANGED s-DFYpassGsWlcpssIPs.cpsph..............us.FspLpc...pspppl+.............................pllcph...tt.t.............................................................................................................ssstp+htshYpushD.hssh...........cptGhpPlpshlpp.lsu.h................p............spts........................................................................ltphluph....hts..............shhsh.h.....VssDh..cs...s...sp....hlhlsp.......su.luL..PccsYYhppp..................................tphhpsYp.paltclhph......h......G..............................ht.s.............pspptAp...plhshEp.pLA............phphsp......tcpcc.pph................Ys.hsh.......s-Lpph.....h..ss......ls.............a.pphlpshhh.............................st........p...pllltp.....................ss...ahptlspllps...sshp.....slpsYlhaphlpshush..Lspshpptphpa.t......................sltGhtp..ts......+hcp.slsh.ss...st..lGpslGp............hYVcc..hFssc.u....Kpphpphlpslppuapppl...p.shsW.......Mssp....TKppAh.pK..............Lsshp.cI.GYP ...................................................................................................................................pcDFapass.G..t.....W..hpp..t..lPs......spsph......................................us..Ftt.L.t.c...p..pp..hlp.....................................phlpp........................................................................................................................................................................................ss..pph.t....phYpts.....hc...pth.........................cth.G......h....p...P..l..h.......t.hlpp...lt..s.....h..........................................sh.s....................................................................................h.phl.s..p.ht..htht.s...................hhsh.h......lssD.....ps...s.......pt.........hlh.hst................su..lhL........PspsYYhp.pp.........................................tphhptah..phhtphhth......h.............u..........................................................h.t.p......................psp.p...hp...p.llphEp...plA.......................phhhsp.........tcp...p..-..t..th.................................Yp.hs.h.......s-.hpph............hst...........................................hs...............h.ttahpthh..............................p.s.....p.......plll.p..........................p.....ah....p....t...h..s..p..ll...sp.............p.s..hp..................hhps.a..hh.h.p.hl.p........thssh.....Ls.p.php..p.h.pht.a.t...............................sl..Gh.p......s............ph+t......shph.sp...........s....h...s.slGt........................................hYspp.....hF....s...p...u..................K...t....p.........h.pp....h.l.pplhp.......sa.pppl............p...p........s.W.......h..st.p..T+p..pAh.tK..............lst..ht.tl.GYP.................................................................................................. 0 507 719 1169 +5478 PF05650 DUF802 Domain of unknown function (DUF802) Yeats C anon Yeats C Family This region is found as two or more repeats in a small number of hypothetical proteins. 22.60 22.60 23.10 24.10 22.50 22.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.57 0.72 -3.85 40 307 2009-01-15 18:05:59 2003-04-07 12:59:11 6 8 124 0 97 342 6 53.40 35 15.48 CHANGED losph-sosuslAs.......sWspALApQppuspuLsppLcsuLsphApsF-p+uuuLl ........losth-sosuslus.......sWpsALApQppuscsLupphpsuLsphusoF-p+SAuLl........... 0 13 31 67 +5479 PF05651 Diacid_rec Putative sugar diacid recognition Yeats C anon Yeats C Family This region is found in several proteins characterised as carbohydrate diacid regulators (e.g. Swiss:P36047). An HTH DNA-binding motif is found at the C-terminus of these proteins suggesting that this region includes the sugar recognition region. 21.40 21.40 21.40 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.50 0.71 -4.75 54 1274 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 1153 0 179 746 9 132.30 47 35.86 CHANGED LsppLAppIVpRsMplIsaNlNVMsppGlIIuSGDspRlGphHEGAlhulspp+slpIspsssppLp.GV+.PGlNLPlhapscllGVlGITG-P-plptauELV+hsAELllEQuthhcphpWcpRtpEchlhpLl ..................LcsphAQcIVsRsMcIIs.p..N.INVMD.tcGhIIuSGDpcR.IGph.HEGAlLsLs.ps.....+.s.V-.I.-c.s.s.AppL+....G.V+.tGINLPLp.h.c.scllGVIGlTGE.P.-plRpYGELVpMsA.EhhlEQspLhc..lt.-pRh+EEhlhpLl............................. 0 51 97 134 +5480 PF05652 DcpS Scavenger mRNA decapping enzyme (DcpS) N-terminal Moxon SJ anon Pfam-B_9894 (release 8.0) Family This family consists of several scavenger mRNA decapping enzymes (DcpS) and is the N-terminal domain of these proteins. DcpS is a scavenger pyrophosphatase that hydrolyses the residual cap structure following 3' to 5' decay of an mRNA. The association of DcpS with 3' to 5' exonuclease exosome components suggests that these two activities are linked and there is a coupled exonucleolytic decay-dependent decapping pathway. 25.00 25.00 28.80 27.30 24.50 23.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.49 0.72 -3.68 20 268 2009-01-15 18:05:59 2003-04-07 12:59:11 7 6 220 18 181 258 0 107.40 33 32.37 CHANGED pphlscFchp+lLspsspsKsIsLLGpls.......s..csAIlllEKTsFph...-pl............................huhhss.................hpplcpltpNDIYtWhlshhtpcls............slKlsLIaPATppHI+KYspQ ...............................................t..lspFchp+lLsp...........ssp..sKpls.LLGp.ls...............s..csAllllEKssFth.....-ps.........................................hshhsu......................htclp..lhsNDIYthahshssp....p...hs..............................slKlslIaPAT-pHIcKYppQ............... 0 52 88 144 +5481 PF05653 Mg_trans_NIPA DUF803; Magnesium transporter NIPA Moxon SJ, Eberhardt R anon Pfam-B_9876 (release 8.0) Family NIPA (nonimprinted in Prader-Willi/Angelman syndrome) is a family of integral membrane proteins which function as magnesium transporters [1,2]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.95 0.70 -5.46 7 1396 2012-10-02 19:55:49 2003-04-07 12:59:11 9 14 342 0 935 1629 226 236.20 25 61.05 CHANGED hp.D.hhGlhLAlSSSlFIGuSFIlKKKGLh+hut.uuhRAupGGauYLpEhlWWuGhloMhlGEsANFAAYAFAPAsLVTPLGALSlllSAlluphhLpE+LphhGhlGChLsIlGSsllVlHAPpEppltSlpplhphhs-PuFllashhllssslhLlhhhtP+hGpppllVYIslCSlhGuLoVhSVKAlGIAIK.ohs.Gts.ltashsWlhlllhlhClshQlNYLN+ALDhFNsulVoPlYYVhFTT.slhsShIhFK-Wts.shsslhspLsGFhTllsGsFLLHsh+Dhs.sp ..............................................................................h.......Gh.lsh..u......shh...u..uh.....l..p......+...h..............u..h.h...p..t............t..............................t......s........h.....s.Y...h.....pp.h.....W..W.....h.G.h...h.........h..................h..........h.l.G..p....h.ss....F...s....AY....uF.......A....P.........s.lV..................sP..L..G.u.lu.l.....l...h..........s......slluthhL.p...E..plp........h..hs........h.......l........G.........s.h..lslh.G.s.s.l.l.....l....h...h.....u......s......p.p...p...t..h...........s...hp......ph.......t.h.h.h...p...........................s.....F.l..ha.h.h....h..........h........hh......h......h...h..........h...........hh......h......h...........t.................................t...................s.......t.........p...........p........h............h.....l........h..l.....ls.....u......h...................h.G..u....h....o....lh...ss....K.u.....lu.hhl.t.....h.t.....s............h..............h..a.h..h...h...h..hh.h..h..s.h..................hh..Qh..alNcuL...p.......hass..sh.......l.Ph...halhaT.....sl...hs.....u....hlhapt.a.t...s....p............hh..h..hhG..hhhh.h.Glhll.p.........t.......................................................................................................................................................... 0 287 502 740 +5483 PF05655 AvrD Pseudomon_AvrD; Pseudomonas avirulence D protein (AvrD) Moxon SJ anon Pfam-B_9946 (release 8.0) Family This family consists of several avirulence D (AvrD) proteins primarily found in Pseudomonas syringae [1,2]. 25.00 25.00 59.70 33.70 18.80 18.80 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.06 0.70 -5.55 3 71 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 50 0 13 69 0 260.20 35 96.63 CHANGED MQDLSFSoIENHLGPAKDRFFGDGFKHVEYSARHVNLTESA..VcAoIoLSYPANWSKKNcSuELlPHLSSIDALTISlNLSQDILLNcFKSID.....HCWVRRISIKAGsKPEEDLR-INA..KITKESQGLDSpGDThLIFGGNVGTMTVQLEFIIPAAHEI-TlKDS..............oEKSCYSLHFKNRTQFIDDIIFYSPLN.AIS+LFVAaDsE.PNFLPGGIEANYPNIlNPVDSLVSHAQIAQALLYKLDGLTR-ESNTLWMRsLNIIAENPAKRpAATRLLVTELKRANIVSlKGcNWRVAEVAGHMNGIThSSSVAHLLPL .................................tsl-phLGstcpRaFGpGa+psphphpph....thstpt.....hpu.ho....l..sYPu.sWScKsss.-lhPHLSoIDAlhlusplup....tllh...p..ph....p.u.l-.......psa.l++loI+AGpcP....E.DLp..s..l..s..A..plsppt.s.ps..ss..t..hhFt.spl.usMplplph..hh...s.....t...cph.....t.shpss..............sp.cphao.ta+s.......+pphlpclhh..ss.hs..shu.p.lhh...t.stt.sph....Ghpu.s.a.s...shhs.lDsLlshAQluQsLlY+LDslsRspSsTLWMRplsl..h..spp.....Ph...t..ht..t.t..lhs....ph.csphl.phtsppWR...sph...s..........s..t...hphpsplAH.L......................................... 0 1 8 13 +5484 PF05656 DUF805 Protein of unknown function (DUF805) Moxon SJ anon Pfam-B_2800 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 26.80 26.80 26.90 26.80 26.50 26.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.91 0.71 -4.28 159 3368 2009-01-15 18:05:59 2003-04-07 12:59:11 9 20 1884 0 465 1828 801 123.10 28 77.08 CHANGED pahsFpGRusRp-aW.hhhLh.hllhhh......hh.hlshhh........................................thhshlhhlhhl...shhl.......Ps....lA...lsl..RRLHDhs.+SGW...................ahLl.................................................hhl...........P.hlGh............................................lh...hll....hh.h................h.u...p.s......sNp.aGssPts ......................................................................hhsFpGRspRp-.a........W..hhhL.h...h.l.h.h.h.l....lh..hlthhh......................................................hhh.sll...s.h.l.a.s.l....h...shl..........Ps...........hA.......lslRRLHDps..+SGa...................Whll.........................................................................................................hhl....P..hlsh..........................................................lh....llh....hh.s................hs.u...sss..tN+.aG.ss..h............................................................ 0 122 259 367 +5485 PF05657 DUF806 Protein of unknown function (DUF806) Moxon SJ anon Pfam-B_7291 (release 8.0) Family This family consists of several Siphovirus and Lactococcus proteins of unknown function. The viral sequences are thought to be tail component proteins. 25.00 25.00 26.30 26.10 24.40 24.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.51 0.71 -4.09 9 73 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 68 0 13 71 1 122.90 31 97.19 CHANGED hpPsppsppllpshshshlDpVYhssIPcEhlsNsspThVLlTEstssPspauNssFpuhshGVElQIFYphsls.pDhp..ssEIpLh+thpcscWplssS+sHhhDPDTs....QhpKshYhs+sh.I ...........................hPl.pltpllpshshshlD..plahsslPpEhh..cssspThlLl..TEssss.stauNssFpshshsVElQIFYppshs.pDhp.......phElpLh+hhpsscWhlss.u.+sHshDPDTp....Qlppshahs+sh........... 0 4 7 10 +5486 PF05658 YadA_head Hep_Hag; Head domain of trimeric autotransporter adhesin Yeats C anon Yeats C Repeat This seven residue repeat makes up the majority sequence of a family of bacterial haemagglutinins and invasins. The representative alignment contains four repeats. 20.20 20.20 20.30 20.20 19.80 20.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.39 0.73 -7.61 0.73 -3.49 239 9521 2012-09-20 11:56:39 2003-04-07 12:59:11 9 620 916 22 1068 8054 662 27.80 37 11.89 CHANGED AsGpsusAlGssupA..sussosAlGssup .............AsGpsSlAlGssutA...sussSlAlGsso................ 0 242 496 794 +5487 PF05659 RPW8 Arabidopsis broad-spectrum mildew resistance protein RPW8 Moxon SJ anon Pfam-B_7373 (release 8.0) Family This family consists of several broad-spectrum mildew resistance proteins from Arabidopsis thaliana. Plant disease resistance (R) genes control the recognition of specific pathogens and activate subsequent defence responses. The Arabidopsis thaliana locus Resistance To Powdery Mildew 8 (RPW8) contains two naturally polymorphic, dominant R genes, RPW8.1 and RPW8.2, which individually control resistance to a broad range of powdery mildew pathogens. They induce localised, salicylic acid-dependent defences similar to those induced by R genes that control specific resistance. Apparently, broad-spectrum resistance mediated by RPW8 uses the same mechanisms as specific resistance [1,2]. 24.00 24.00 24.10 25.10 23.60 23.90 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.75 0.71 -4.66 10 146 2009-09-21 12:32:58 2003-04-07 12:59:11 6 17 20 0 59 159 0 131.50 27 27.49 CHANGED PluElhs.........GAALGluLQlLa-Alp+AKD+ShTT+sILcRLcATIpcIoPllscIDKlScch--s..R+VlEcLK+LLEcAssLVEsYAELR.RRNllKKaRYpR+IKELEuuL+WhlDVDlQVsQWsDIKELhAKMSEMsTK.....LDcItsQP .....................................clhs.........GusLGhsht.Lh....cslhcspc+uhph+shlc+LpsTlcsIpPhlhc.Icphs..pchsps...pc.hccLpphLccuhpLlcphsclp.phshh.++hp.Y...tp+lcpl-tplthhhpsplplp.htDlpclhsthtt............................................................. 0 11 45 49 +5488 PF05660 DUF807 Coxiella burnetii protein of unknown function (DUF807) Moxon SJ anon Pfam-B_7114 (release 8.0) Family This family consists of several proteins of unknown function from Coxiella burnetii (the causative agent of a zoonotic disease called Q fever). 25.00 25.00 112.70 111.80 21.70 16.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.61 0.71 -4.52 4 23 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 2 11 0 138.40 57 59.30 CHANGED MLtApLhF.GphslGPI.PcSYFGNsGAI.sPcIc.KlATGcYTIThLSuRslppTQPulVHIslhD-sNtsFCsRlsKulpFTYcPpNo-.FPWKhhhpIAlpEt-hhtIcpKp....l.VTh+hhpcupGslVssGhhptcshhNV .......MLtApLhF.GphslGPI.PcSYFGNsGAI.sPcIp.KhATGcYTIThLouRslppTQPssVHIsh.D-sNtsFCsRlsKslpFTYcPpNSp.FPWKhhhpIAlpcGDGohhDsp.....VMlThthtNpsstAlhssshsRspAclNs 0 2 2 2 +5489 PF05661 DUF808 Protein of unknown function (DUF808) Moxon SJ anon Pfam-B_7112 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 25.00 25.00 35.80 26.10 22.10 22.10 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.88 0.70 -5.59 47 1348 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 1301 0 257 880 93 282.80 53 95.12 CHANGED MAGuSLLsLLDDIAolLDDVAlMTKlAAK..................KTAGVLGDDLALNAQQVoG..VpA-RELPVVWAVAKGShhNKhILVPAALLISAalPW.........lITPLLMlGGhaLCFEGhEKlhHKahHp..........p.cppptpuhscss........hDLsAaEK-KIKGAIRTDFILSAEIIsIsLGoVustshhsQlhVLusIAllMTlGVYGLVAGIVKLDDhGLaL.p+sus.....htctlGtuLLssAPhLMKsLoVlGThAMFLVGGGILsHGlPslHHh...lc.hstth.......ussluslsPsllNullGllAGulllhll ...........hAhuSLLsLLDDI.......Aol.......L.......DDlulMuKlAAK..............KTAGVLGDDLALNAQQVoG..VpAsRELPVVWuVAKGSLlNK.l.I.L.V.PhALlISAFhPW.........hITPLLMlGGua.....LCFEGsEKllHhh.s+....cc...........pp...p...pph.t.ss.............tDshthEKcKlKGAIRTDFILSAEIlsIoL.GhV..u...p.........s..s....lhsQllVLusIAlllTlGVYGLVulIVKlDDlGhaLs...c...+uus........................hhpulG+GLlhhAPhLMKsLSlVGTlAMFLVGGGIl...V........H.G.....l........s.....s........L........H.....H.h.............lEchst.t..t................uuhluhll.ssllshllGhlhGulllh........................................ 0 55 142 209 +5490 PF05662 YadA_stalk HIM; Coiled stalk of trimeric autotransporter adhesin Yeats C anon Yeats C Motif This short motif is found in invasins and haemagglutinins, normally associated with (Pfam:PF05658). 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.52 0.73 -6.66 0.73 -3.98 75 6882 2009-01-15 18:05:59 2003-04-07 12:59:11 9 631 823 15 787 5827 118 22.40 61 6.44 CHANGED ppIoNVAsG...ssuTDAVNsuQL ...ppITNVAuG....ssuTDAVNsuQL.. 0 128 332 567 +5491 PF05663 DUF809 Protein of unknown function (DUF809) Moxon SJ anon Pfam-B_7264 (release 8.0) Family This family consists of several proteins of unknown function Raphanus sativus (Radish) and Brassica napus (Rape). 25.00 25.00 107.20 104.70 20.90 19.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.18 0.71 -4.25 4 16 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 11 0 131.20 97 99.95 CHANGED MITFFEKLSTFCHNLTPTECKVSVISFFLLAFLLMAHIWLSWFSNNQHCLRTMRHLEKLKIPYEFQYGWLGVKITIKSNVPNDEVTKKVSPIIKGEIEGKEEKKEGKGEIEGKEEKKEGKGEIEGKEEKKEVENGPRK ...MITFFEKLSTFCHNLTPTECKVSVISFFLLAFLLMAHIWLSWFSNNQHCLRTMRHLEKLKIPYEFQYGWLGVKITIKSNVPNDEVTKKVSPIIKGEIEGKEEKKEGKGEIEGKEEKKEGKGEIEGKEEKKEVENGPRK.... 0 0 0 0 +5492 PF05664 DUF810 Protein of unknown function (DUF810) Moxon SJ anon Pfam-B_5709 (release 8.0) Family This family consists of several plant proteins of unknown function. 19.20 19.20 19.40 19.30 19.00 18.80 hmmbuild -o /dev/null HMM SEED 677 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -13.12 0.70 -6.25 10 230 2009-01-15 18:05:59 2003-04-07 12:59:11 6 14 75 0 160 222 1 430.80 22 48.43 CHANGED uLocsDLRETAYEIllAustuS.....tshhh...pthtppc..................................S+sKhhLuL+tpt....................................ttssupst+sc+shohsElMRhQMcloEphDsRlR+sLL+hssuplG+RhEsllLPLELLpplps.o-FsD..cEYcpWQ+RpL+lLEtGLlh+PslshscosppspcLRpll+s.....uhspslsouppoEsh+oLRpsshSLutR.upsGh...su-sCHWADGYP..LNl+LYptLLpusFD.hD-ssll-El-EllELlK+TWssLGIs.cslHslCaAWsLFpQaVhTu..E.tLLtsAhstLt-VutDtp.ssp-slYlK................hLsSsLSulhuWu-K+LhDYHcoF......scssl......sh-slVoluhssu+lLuEDsspth+cptt.........ss+-+l-pYIRSSl+sAFsc...........cuphtts+uspssttsLA.LAc-ss-LAhpEpphFSPlLppWHP.usGVuAupLHpsYGscL+paLuuhscLT.DsVpVl.uAscLEpsLVphh...usss.-DGGculh+.ch.PaElEshhusLVhsWlpt+l-clpphV-RshppEsWcPtSs.cpsaAsSAVElhRllcETl-tFFtL.IPh+sshLpsLscGlDpuhQpYsspshuuhGu+pshlPslPsLTRhpcsotlhshhKKchhssstscc+tspth..............shslspLCVRLNTLcalhocLcsLE+ ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................................................h............Pa..........ph....-phhhsh...lhpWlptp.pph..phlppshpp.....-.....p.......a.....p...sh........s.............t........p........p.taus.Ssl-lh+hhpp..............sl-th.h.tl........h...s.......c.......t...h...h....sLhpuls.....thht......Yhphh.....t.th....s...s....c..p..p.hh.P..h.s..LoR.ppt...h.....hh.+ct..............p................................................hh.p.hl+lNslpahhpplp.l-................................................................................................................. 0 27 89 125 +5494 PF05666 Fels1 Fels-1 Prophage Protein-like Yeats C anon Yeats C Family \N 25.00 25.00 47.40 26.90 19.70 18.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.27 0.72 -4.59 12 669 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 339 0 28 193 6 49.60 50 79.54 CHANGED sAhA..spssslhoPusGVlCDph..lCtD..........StGlShslTccYhGp+ .............u.ts.ssu..o.AsGVhCDsh..lChs..........S.stIs.phT+hhhhp+.................... 0 4 10 19 +5495 PF05667 DUF812 Protein of unknown function (DUF812) Moxon SJ anon Pfam-B_7417 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. 30.00 30.00 31.60 31.20 29.40 27.80 hmmbuild -o /dev/null HMM SEED 594 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.71 0.70 -6.01 10 190 2009-09-10 22:47:38 2003-04-07 12:59:11 6 4 106 0 126 188 1 373.50 26 88.19 CHANGED M..-ElDpIIlHoLRQlGs-lsE-lpuLcsF..........ToshlVcsVs+CLplIsPsl..sLP+oLP.s.....uMApRFpluopLApACp-lG..YRu-IGYQTFLYsNss-lRpllMFLlEpLPR-p.suscus...pPlu.............psstLccpIspsLppQLp...............hPWlPphsR.............h...s..shptFpsp.hsl..sspsp.tt.ssthpphapp.hl.slo......tslssolLpsssuplshsschch-a.utsh......E-hcpcKppplpp+ltshhppstt....s......suucpls-llpp.t.t.........s..hspppt..........spphuss.usssssshssEcstpppcps-L-uLpppIcclpsplpphpsch+phpuplpplp-phpstcppssclEpphKlKc+TstLLsDsEsNl....sKLpullsuoup+hhpLssQWEs+RsPLl-phcpL+thpppp.scoppphccIcslRpcIcclspElpsKsphappL.spaEshs+s.VsRouYTcRILEIIuNI+KQKp-IsKILsDTRsLQKEINsloupL-RTFsVTD-LlF+-AK..+DEtsK+AYKhLAsLHssCs-LlcsV--TGslpREIRDLEcQI-sEct+s ......................................................................................................................................................t.....h.........p....l.th..........sst.hhphh.thl..lp........t........ths......h.P.........shst+hphs.tluphh....p..G..h...phua......p............p..hLYss...p.....h+tl..hhhLhppL.stt...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ht..h......h.t..t.ht..t.h......t.h..p...t.t.t...........p....tt.ht.h.tph..h.h.ps.t.....h......pLp.hhtttttph.plttpWp...thch....h.tphpthp...........ttp..h....tplp.hctphpth..chtt+pp.hppL.t.........phpphspt..h......s.......R..YhpRIhEhhtsl+KQct-I.+lh.-s+.lQpphs.httplpRpashs--hlappu+........p-...........h+.....psY+.Lstlct.httlhthl..hs.h.pphh....chp.pl........................................................................................... 0 48 64 98 +5497 PF05669 Med31 SOH1; SOH1 Moxon SJ anon Pfam-B_7443 (release 8.0) Family The family consists of Saccharomyces cerevisiae SOH1 homologues. SOH1 is responsible for the repression of temperature sensitive growth of the HPR1 mutant [1] and has been found to be a component of the RNA polymerase II transcription complex. SOH1 not only interacts with factors involved in DNA repair, but transcription as well. Thus, the SOH1 protein may serve to couple these two processes [2]. 25.00 25.00 25.00 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.63 0.72 -4.29 34 308 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 274 4 217 302 0 103.10 44 60.92 CHANGED sshsRFplELEFVQsLANPpYlpaLA...pp.........pYa..................p-.pFlsYL+YL.pYW+cPcYu+alhYP.pCLahLc.lLpsppFRpslsssss....hphl....pphhhpattht.pthph .......t.phRFplELEFVQsLANPpYL.s.............aLA...Qp...................tah.................................c-psFlsYL+YL.pYW+..cP-Yu+aLpYP.tCLahL-.LL.Qp.-.pF...Rptlsssps......hch.l..tppthhtW.tht.....t......................................... 0 73 119 181 +5498 PF05670 DUF814 Domain of unknown function (DUF814) Bateman A anon Pfam-B_738 (Release 8.0) Domain This domain occurs in proteins that have been annotated as Fibronectin/fibrinogen binding protein by similarity. This annotation comes from Swiss:O34693 where the N-terminal region is involved in this activity [1]. Hence the activity of this C-terminal domain is unknown. This domain contains a conserved motif D/E-X-W/Y-X-H that may be functionally important. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.88 0.72 -4.10 39 2812 2009-09-13 15:36:20 2003-04-07 12:59:11 8 19 2349 0 939 2335 311 89.80 31 16.57 CHANGED ahhoss.ahllsG+ssppN-hL.sh+hhcppDlahHscthsuuHVll+spstps.p........slp-AAphAstaS+.h+tuspssssY...........sphcalpK ............h...sssshhIhVG+NshQN-.L..sh...K..h..Acp..s.D...l..W.......h....Hs+.cl.PGSH.Vl..l+s..s..t..s.s.s.p.............................slp-A....ApLAuh..aS...........c...ucpu......sp.lsVca...............sth+pl+K............................................... 0 362 602 811 +5499 PF05671 GETHR GETHR pentapeptide repeat (5 copies) Bateman A anon Pfam-B_8059 (release 8.0) Repeat This pentapeptide repeat is found mainly in C. elegans. The most conserved amino acid at each position leads to its name GETHR (Bateman A unpublished obs.). The family also includes a divergent repeat in a microneme protein Swiss:Q26588. The function of this repeat is unknown. 21.50 21.50 21.90 21.70 21.10 21.20 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.12 0.72 -4.80 3 9 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 3 0 9 25 4 23.90 70 46.64 CHANGED GETHRGETHRGETRRGETHRGET+R ....shRGET+RGET+RGET+RGET+p. 0 6 6 9 +5500 PF05672 MAP7 E-MAP-115; MAP7 (E-MAP-115) family Bateman A anon Pfam-B_8157 (release 8.0) Family The organisation of microtubules varies with the cell type and is presumably controlled by tissue-specific microtubule-associated proteins (MAPs). The 115-kDa epithelial MAP (E-MAP-115/MAP7) has been identified as a microtubule-stabilising protein predominantly expressed in cell lines of epithelial origin [1]. The binding of this microtubule associated protein is nucleotide independent [2]. 28.50 28.50 29.00 29.00 28.10 28.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.28 0.71 -11.64 0.71 -4.87 3 338 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 64 0 136 296 0 159.00 47 22.95 CHANGED APuussNuuAsuKPoAGTTDsEEATRLLAEKRRQAREQRE+EEQERREQEEpDRpcREELtpRsAEER.sRREEEARRpEEEcAREKEEp..............hpRpAEEc....thpEQEEQER..lQKQKEEAEARuREEAERhRLEREKHFQQpEQERLERKKRLEEIMKRTRKSEVS-phKKpDsKs ................................................sssssK.ssAGTTDsEEAs+lLAE.KRR.AREQ+E+..E...EpE.R.......h...c...pE.....Ep.-.........R......t....t.....+.......E.E..t......p+.h...tE..E...............R.t+......pE.-E.uR+.h.E.-....c.......pt.R.c..c.tcp................................t.t..tcEc...........tppEp...EE..pE+..........lp+QKEEAEu+..u+EEAE+.RhEREchhppp.EQE....RhERKK....RlEEIMKRTR+...o-s...stp.pt............................................................................. 0 13 29 65 +5501 PF05673 DUF815 Protein of unknown function (DUF815) Moxon SJ anon Pfam-B_6403 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.51 0.70 -5.45 75 1159 2012-10-05 12:31:08 2003-04-07 12:59:11 8 7 1089 0 330 2454 848 243.70 43 76.12 CHANGED spshAapWc......s..uh.Lpsl.............ppscslpLspLhGl-cQ+ctLlcNTcpFlpGhPANNlLLaGuRGTGKSSLVKAllspasspG..........LRLIEls+-DLtsLPpllshl+....spP.p+FIlFsDDLSFEps-ssY......KsLKulL-GulpupPsNVllYATSNRRHLlPEhhsDsts........ssElHss-ulEEKlSLSDRFGLhLuFashsQ-pYLplVcpas.............pph....ulshst...........cpLcpcAlpWuhp+GuRSGRsAhQFhpclsGch .....................................................................................s.shAFhWp.........stpsh..LpPl.............tp..ss.t.lpLscLhGl-p.Q+.c....t......L.......h...c.......N...T.......c.........p.....F.....l..p...G.........h.........P.........A...........N..N.........s..L.L.aGuRGTGK.SSLlKA.ll.spats.pG.........................L...R..L.....l...E.....l....p..+.....c..D...L.........t...s.....L....s....c.......l..l....s....h....l...+...................p.p....s........+....F.....I.l.F...CD.......D....L.............S.......F..........Ec...s.-.s...sY...............K....s.....LK..u....l...L....-..G...u..l.p.s........p.P.s....N...VllY..AT.SN.RR.HLlsEphp-Nps.........ssElHsu-u.lEEKl.S..LSDR...FGLWLuFashsQccYLphVctah...........................pch......slshsp..............-pl+tcAl.pWuhpRGuRSGRsAhQFhpphtGp.h............................................................................................. 1 102 225 279 +5502 PF05674 DUF816 Baculovirus protein of unknown function (DUF816) Bateman A anon Pfam-B_7178 (release 8.0) Family This family includes proteins that are about 200 amino acids in length. The proteins are all from baculoviruses. This family includes ORF107 from Orgyia pseudotsugata multicapsid polyhedrosis virus (OpMNPV) and a variety of other numbered ORF proteins, such as ORF52 Swiss:Q91F03, ORF140 Swiss:Q9YMI8. The function of these proteins is unknown. 24.60 24.60 31.80 31.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.96 0.71 -4.54 14 62 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 58 0 0 57 0 161.60 54 71.62 CHANGED olDVD-Fu+QLIADKCssLIEsppMLPtslLullKpA+c-YhcsPoscNYpplK+LhsQTKYV--SI-YKNFNRtlhLIAhphhlNKu+-hFssY..Kshl-sshKRLppINPDlKSSP+AMLpHYpECL-....ph-sP+.tD-HHLloFuKEIhTKIFl-slc.ho.sNpuslshs ..lslDsFA+QLIsDKCSsLIEspshLPsNlLsIlKpARDcYFcsPosKNY-.lKKLh.pTKYhDDSIDYKsFNRRlLLIuhKhuLNKupsaFssY..KsllEsAlKRLspINPDlKSSP+AMLQHYpECLE....NlDsP+.sDEHHLlTFuKEIATKIFl-sl-.aSa.sNpSslph.p... 0 0 0 0 +5503 PF05675 DUF817 Protein of unknown function (DUF817) Moxon SJ anon Pfam-B_7331 (release 8.0) Family This family consists of several bacterial proteins of unknown function. 25.90 25.90 49.70 49.70 25.80 25.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.60 0.70 -4.96 39 378 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 367 0 80 340 18 223.70 50 85.60 CHANGED phlhFshKQAhuCLFssllhhsLhlT.+ll..shssLsRYDhLLIhsLlhQhhhltotLEThDElKVIhlFHllGhsLElFKlp..hG..SWSYPEtuhhKlhGVPLaSGFMYASVuSYIspAWRha-lchtpaPshahsssLushIYhNFFTHHalhDhRWhLhshshllFhRTaVhFplpssph+MPLlLuFhLIuFFIWlAENIuTFauAWpYPsQtcuWphVcluKloSWaLLlIISh ...............phhhFshcQAhuCLFssllFlsLhlophh....shsslhRYDhLLIhsLlhQhhhlthtLEThDElKVIslFHllGhsLElaKsc..hG..SWuYP..-tuh.KlhGVPLaSGFMYAuVuSYIsQAWRhhclchppaPshhhshsLushIYhNFFTHHalhDhRWhLhshhhllFh+ThVhFpltst.p..h+MPLlLuFhLIuFFIWlAENIuTFaGAWpYPsQhpuWphVHluKloSWaLLVllSh.................. 0 23 47 64 +5504 PF05676 NDUF_B7 NDUFB7; NADH-ubiquinone oxidoreductase B18 subunit (NDUFB7) Moxon SJ anon Pfam-B_7077 (release 8.0) Family This family consists of several NADH-ubiquinone oxidoreductase B18 subunit proteins from different eukaryotic organisms. Oxidative phosphorylation is the well-characterised process in which ATP, the principal carrier of chemical energy of individual cells, is produced due to a mitochondrial proton gradient formed by the transfer of electrons from NADH and FADH2 to molecular oxygen. The oxidative phosphorylation (OXPHOS) system is located in the mitochondrial inner membrane and consists of five multi-subunit enzyme complexes and two small electron carriers: coenzyme Q10 and cytochrome C. At least 70 structural proteins involved in the formation of the whole OXPHOS system are encoded by nuclear genes, whereas 13 structural proteins are encoded by the mitochondrial genome. Deficiency of NADH ubiquinone oxidoreductase, the first enzyme complex of the mitochondrial respiratory chain, is one of the most frequent causes of human mitochondrial encephalomyopathies [1]. 21.90 21.90 22.50 22.00 21.80 21.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.52 0.72 -4.56 14 256 2012-10-02 15:44:21 2003-04-07 12:59:11 8 6 227 0 183 245 2 65.10 43 53.76 CHANGED .MlATpEEMptA+LPLt.RDaCAHhLIsLp+CRp-pa.hs.atCccERHsY-pCpY-DalhRhKch- ...........hATpcEMppA+lPLthRDhCAHhLIsLp+C.R.pcp.a.hs...a...pCccERHsY-cCp.Yc-ahhRhtch-............ 0 62 102 150 +5505 PF05677 DUF818 Chlamydia CHLPS protein (DUF818) Moxon SJ anon Pfam-B_7510 (release 8.0) Family This family consists of several Chlamydia CHLPS proteins, the function of which are unknown. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.21 0.70 -5.72 5 50 2012-10-03 11:45:05 2003-04-07 12:59:11 7 2 27 0 22 183 59 298.60 28 74.67 CHANGED soILcocP+PplshFSSc+A+puaE+RctHPlLaKILslIhcIlKhLIGLIlFlPLGLaWVLQKlCQNsILPuuuhLhop.phChc....splL+psFlspLcshhppscVSSs+RVsIQpD-LlIDoLuI+lPsAcPcRWMLISLGNS-sLEshshlttcc-...SlpclAKtsGANILVFNYPGVMSSpGsloRENLuKAYQACVRYLRD+.sGPpA+QIIAYGYSLGulVQAtALsKElsDGSDuToWlVVKDRGPRSluAlAsQahGsIGshlAsLsGWsIcSsKpSc-LsCPEIFIYusDp.cssLIGD.GLFp+EsClAsPFL-sPpl.chsGsKIPIuEpsLpHpsPLccsTIpcLAssIlc+LS .....................................................................................................................................................hthhKhlh.hhh..P.slha.h..hs.t..lhst....h............pphhp..a.h.tthpt...h.ppt.h..s...phpRlsIp.t..D.plhlDsht.l.p.h.s.....p....A....+ss.R.W.hLhuhGNupsaEp..hh....s....lhplAKp...h.....p..uNlllFNYPGVhpS........pGps..s.p.c.s.LspuapAsV.+.YLcDp..pGspAcpIlsYGaSLGu...u..Vp...A..t.ALppp.hp..sp-shpahhlKDRu.pSlushA....pp..........hh...s.......l.s..thhhp....lh...sWphs....u.K.ucpL...s...-l..hh.......hth......s............t....p........l.tc.thh..p.shAt.hhp.........pK..l......c.t.l.................................................................................................. 0 12 12 20 +5506 PF05678 VQ VQ motif Bateman A anon Pfam-B_7960 (release 8.0) Motif This short motif is found in a variety of plant proteins. These proteins vary greatly in length and are mostly composed of low complexity regions. They all conserve a short motif FXhVQChTG, where X is any amino acid and h is a hydrophobic amino acid. The function of this motif is uncertain, however one protein in this family has been found to bind the SigA sigma factor Swiss:Q9LDH1. It would seem plausible that this motif is needed for this activity and that this whole family might be involved in modulating plastid sigma factors (Bateman A pers. obs.). 20.40 20.40 20.60 20.90 19.90 20.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -7.23 0.72 -4.91 49 612 2009-09-11 20:58:54 2003-04-07 12:59:11 9 6 26 0 403 538 0 30.40 35 13.43 CHANGED thhsspllps-sssF+slVQcLTGtssssss .............hssphlp.s-.ss.sFRslVQcLT.Gtssss.......... 0 56 239 327 +5507 PF05679 CHGN Chondroitin N-acetylgalactosaminyltransferase Bateman A anon Pfam-B_8249 (release 8.0) Family \N 19.70 19.70 20.40 19.90 18.90 18.30 hmmbuild -o /dev/null HMM SEED 499 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.70 0.70 -5.79 13 643 2012-10-03 05:28:31 2003-04-07 12:59:11 11 14 100 0 386 631 1 400.30 22 63.56 CHANGED HlssCl+shhSs+cD.VplGRCVpchuGl.sCshsa..Qulhacsaphs........hcuastph+scph+sAlTlHPlpsPshhYRLHsYhtpl+lpchppcphtLcp-ltphoph.tsps.tpsphP....................LGlsPs...hpPpsRp-llpW-als..+phh.uscss.P+pplcusp+p-lsDllspsh-plNpptp....sl-F..+pLhhGYRRhDsh+GhDYhLDL........Lhh++tRG++............hsVpR+hal.+shoc...................................................lEll.sssasscu............oRlslllPLsu+..sshhh+FlcsaEpsCLcs..ppsstLhllhahcss-spphsp.......pplhppLcs+a..ssu+lsalslp......................ssshSpslAlDlAs+..+hsh-oLlhlssschsFps-FLsRsRhNTIpGhQlahPIsFspYcPch.........................tp...psstsss.hclspss..........GaFcchuaulsuhYpuDhlpuchthhsp...................chps.GlEslclh-hhlpss.p...................................l+VhRusEPuLVa........la+slsCD.ssL-ptphchChsoKsps....luSpcpL ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................th......a...h...........h...........t.......pp.........lp....t........p...t..tl....-...s.lt.tshc.lNpp.p..................hh.ph.....pc.l.hp..G..h...hRh-.h+......GhcYh.Lch.........h...hptp......p................................h..h+..+.h..l..hp.s.hu.t...............................................................................................................l.hl...s....h.....htp........................................shlplllP......ltsp..hc.hh.pFhps.atph..shps....pppshLhllhhhp...p.pt.h.tp............thh.tphp..p.ch......shtphphl..tlp............................t..thscstuLp.hu.sc........th...s...s.c.s..LhFhsDl.chhhsschLpps....Rhps.....l.GhpsaaPlhFppasPth...................................................t................tt....sh.ppts.............G.aacp.uaGhsshYpuDahph...tth.s.........................phptWGhED.h..c..lhcthlpss...........................................LclhRs...ulhp..............ha.+..h.C.........s......p..hs.t..hphCh.sth.t......uuh.................................................................. 0 115 143 250 +5508 PF05680 ATP-synt_E ATP synthase E chain Moxon SJ anon Pfam-B_6116 (release 8.0) Family This family consists of several ATP synthase E chain sequences which are components of the CF(0) subunit [1]. 21.00 21.00 21.00 21.20 20.10 20.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.10 0.72 -4.03 17 264 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 228 0 172 254 0 78.20 28 86.96 CHANGED hss...SPhlpssRaSALshGlsYGhh+hppLp...cpcpct+chctppthltptKht.tK++pscpc..hpthspuop.....................shht ..........h......SshlplhRaSALshGlhYGhh+.ptLp...cptptp+chctcpphlppsKtthtKpht..t..................................st.t.................................. 0 50 90 135 +5509 PF05681 Fumerase Fumarate hydratase (Fumerase) Moxon SJ anon Pfam-B_2085 (release 8.0) Family This family consists of several bacterial fumarate hydratase proteins FumA and FumB. Fumarase, or fumarate hydratase (EC 4.2.1.2), is a component of the citric acid cycle. In facultative anaerobes such as Escherichia coli, fumarase also engages in the reductive pathway from oxaloacetate to succinate during anaerobic growth. Three fumarases, FumA, FumB, and FumC, have been reported in E. coli. fumA and fumB genes are homologous and encode products of identical sizes which form thermolabile dimers of Mr 120,000. FumA and FumB are class I enzymes and are members of the iron-dependent hydrolases, which include aconitase and malate hydratase. The active FumA contains a 4Fe-4S centre, and it can be inactivated upon oxidation to give a 3Fe-4S centre [1]. 25.00 25.00 25.70 25.50 21.70 21.70 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.63 0.70 -5.32 170 3534 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 2307 0 710 2256 603 274.30 41 60.95 CHANGED ltcsltchhpcsshhLssDhhpulpcAhcpEcush.u+tsltplLcNuclAspt..........phPlCQDTGhssl.FlclGp.pVphss...slp-slscGVccuYp..-ssLRhShVtc.Ph.pRpNTsc....N....sPAllchchl................sG...-plclphhsKGG..GSENhotl.....thL........sPup...ultcaVl-sltphGss..sCPPhhlGlGIGGTs-cAshLAK+.ALh+.lsptp..s..........chtcLEpElh-plNpLGIGsQGhGGpsTsLsV+lpphP...sHsAShP.VAlslsCtAs.R+sphpl .................................p.hstpAhpcsSahL.ssahppltp....hh.cc..c..........ust....sKhlhhphLcNuclAAcs..........hhPhCQDTGhsh.l.hsKhGpcVhhsG..........s.cpslscGVhpuYh..-ssLRhStss..s.shhcchNTGs..........N....h.PA..lch.h.V........................................................sG.....Dc..hchhshsKGG..GStNKohh................shL........sPu............slts.all-p.h.po....hGsu..ACPPh.hlulsIGGTutcsslhstK.A.h+..lDph..sstt.............u...+shcLEpELh-psppLGlGAQ.hGGphhshDl+lhphP....RHuAShP.VuhsluCu.As.Rph+hpl.................................... 0 239 456 591 +5511 PF05683 Fumerase_C Fumarase C-terminus Moxon SJ anon Pfam-B_2085 (release 8.0) Family This family consists of the C terminal region of several bacterial fumarate hydratase proteins (FumA and FumB). Fumarase, or fumarate hydratase (EC 4.2.1.2), is a component of the citric acid cycle. In facultative anaerobes such as Escherichia coli, fumarase also engages in the reductive pathway from oxaloacetate to succinate during anaerobic growth [1]. 21.80 21.80 22.00 35.20 20.10 21.70 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.30 0.71 -5.19 72 3515 2009-09-10 16:06:45 2003-04-07 12:59:11 7 8 2299 1 713 2175 505 198.00 43 47.11 CHANGED GoGsspLpsPs...lcpaPcl.shcss...sss..............................+R..VsLss..lTc..--lppacsG-sLLLoGcllTGRDAAHK+lh-h.lspGEsLP..VDhcs+hIYYVGPV...DsltcEsVGPAGPTTuTRMDpaTcphLcpsGLhuMIGKuERGstsl-AI+cHtusYLhAlGG.AAaLlu+uIKpucVVuFt.-LGM.EAIacF-VcDM.PVoVAVDscGsulHppuPtp ..............................................................G.h.t.Lp........hp.hPt......t.........tps.................................hclsLsp.Phpc....ppL.sph.luspl.LoGsIls.uRD.h.AHt+ltEh...l....p.p.......G......c..s......LP...............l..+..s..+sI..YYAGPs.............css..s..shssGS......hG..PTTusRMD.s.a.sc.p.h..sp.sGshlMluKGsR.u.p.p.sp.-A.C.+KHsuhYLsulG.G.AAsLuppsIKcl.E.sV.tas.ELGM.EAIWcl-V-DF.PshlhlDs+GNshapph..p.................. 0 244 461 596 +5512 PF05684 DUF819 Protein of unknown function (DUF819) Moxon SJ anon Pfam-B_9034 (release 8.0) Family This family contains proteins of unknown function from archaeal, bacterial and plant species. 20.70 20.70 21.60 21.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.95 0.70 -5.64 6 548 2012-10-02 17:06:44 2003-04-07 12:59:11 7 5 422 0 194 501 310 333.30 25 86.51 CHANGED uhLhuhuAhslWs...Ec+sKht..usVsuhllshLluhhhsslGLl...sspossasslhshLLPtsI.LLLhpsDLR+Il+.us+LLhhFLluSluhhlGsllAahlhp.h....lusDsW+hsAALhGSaIGGusNhlAhpssLsVssoshusulssDslhhulahhlLhhLsuht...pttshA-TsplpussschspspspEc+psss.schhhhlulShhlsuluthlushh.......h.hul.....susshsslhusslsLlhuFsshhsh..PuupcluplLlYshhsllGususlhslls.APhlhLauhhhlhsHlslhLulGKLF+l-LphlhlASpANIGGPsoAsshAsAhs.suLlssGlLhGlhGhulGTalGlhhG.hlpph .............................................................hlhh.hhuhhhhh.......p..p.....hh.h.t.......thlsuhl.hhhhhshlhsshs.lh........................................................t..s....ha..h.h.phllPhsl.lhhh..phDl+.........t.l.hp......u..s.+hlhhFhhu.shuh.hl.....G...s...l..uh...hl...h.t.h..................h....s.c........h........h.........p..h....h..us....lsuS.al.....GG.usNhsAht.p.......h..h...t......s..s..s.....s....h.....huhhlssDslhhslahhhL...hhhss...h...t....h...h...p..t...h..ht.s...c.t.pt.h......p....t.....h...t.....p....p...h............t.t........t....t...............t...p............................s..hhh.hl...u..h.u...hhhs.s..lup....h....hushh...............................h..................t.ha..hllhsh...sl.u..lh.h..s..h.....sp.....h...h.ph........s..Gu.pp.lushhlY.l.hlsslGh.th.s...lt..t.l...ls...s.Ph...hh...l.h.u..h..l..h..l..hlHhh.l.hl....h...lu.+.lh+...........h..............sl.....hh.lhlASp.ANl.GGsso.A.ss......hAs....Ahp..suLsssulLhullGYslGsahGhhhu.hl...h............................................. 0 70 128 167 +5513 PF05685 Uma2 DUF820; Putative restriction endonuclease Moxon SJ, Bateman A anon Pfam-B_7809 (release 8.0) & Pfam-B_8730 (release 14.0) Family This family consists of hypothetical proteins that are greatly expanded in cyanobacteria. The proteins are found sporadically in other bacteria. A small number of member proteins also contain Pfam:PF02861 domains that are involved in protein interactions. Solutions of several structures for members of this family show that it is likely to be acting as an endonuclease. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.28 0.71 -4.90 170 4083 2012-10-11 20:44:44 2003-04-07 12:59:11 7 12 450 7 1583 4539 272 163.40 17 81.91 CHANGED hp-a.hphs........ttth+hEh..hsGplhh..h...sshs.shHsp.hsstLhttl..t.h..hpt..ths..hshss.shshph....................tts.h.PDlshhptp.......hpphspt...................shsPslllElhSsssp...t-.htp.................KhthY.......t.thGlttaal......lD............spppp.......lplaphs..tt.......tap.hph......ttsphhph..sls.sht....lslp ..................................................................th..th.........t..t...p.h.Eh..hpGtlh.......h...........ss.s...Htt..h.ttlh.....thl.....tth.......hpt.....t.s......hhhhs..shtlth......s.............tpshhtPDlh.lhttp.........tph.pt...............................hstsPplllEll........S...s.u...sp...tDhtt.........................................K.h.t.h.Y.......t..phG.ltcYal......l-..............s.p...p...p.p...........lp.lapht....tt........tat....th........t...t..h....................t............................................................................................... 0 486 1227 1544 +5514 PF05686 Glyco_transf_90 DUF821; Glycosyl transferase family 90 Moxon SJ anon Pfam-B_6682 (Release 8.0) & Pfam-B_7101 (Release 8.0) Family This family of glycosyl transferases are specifically (mannosyl) glucuronoxylomannan/galactoxylomannan -beta 1,2-xylosyltransferases, EC:2.4.2.-. 20.50 20.50 20.80 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.28 0.70 -5.78 19 1264 2012-10-03 16:42:30 2003-04-07 12:59:11 7 25 380 0 784 1222 143 256.40 18 53.19 CHANGED s..ps.sCPsYFRaIccDLcPWcps.GI..oR-hlcRu..+ppAthRlsIlsG+lYlcpaccshQoRDhFThWGllQLLRcYsG+lPDlELhFsCsDhPhl+ptsap.t.......sPPPLFpYCusspolDIVFPDWSFW.GWsEl......Nl+.WshhhtclpcusscspWpcRtPhAYW+GNssVsp..Rt-Ll+Cs.hos.p.hsAclhpp............................Dhs.t.c.uh+pssl.spCpa+YpI.l-GhAhShp.KYILuCDShsLhlps.Yh-FF.+sLhPhpHYhPlcsttc..spsl+.tlpWupspsptApcIucpGSpFlpccLpM-hVYDYMaHLLsEYAKLL+aKPplPpsusElss-shsC.hp.....Gh.+phh.pSh...sssps.PCph.PsPhp..tht.hhp+ptp.h+pVcphE .............................................................................................................................................................................t.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.......t...........t...........a.t.p.+....shh.h.....a+....G..........t.........t.............t.................................R.........t...h...h..t..h.......t..............t..........................h......t.s.thh.t.............................................................p.t.t.............hh...p........h...s....h.t-...t...h.......p.aK......Y...l..l..sl..-.........G...s.s...h.S.s..p...h...thl..htssSlsl.t......t.....s..h..........a........h......................-.....aa.....h...p...............t........L.....h.....P...h....h....H....Y..l..P...lp..p.......s.................hp-....l........p...........h....l...............p........a................h.........p..........p........p.......s.........p.....t...A...p...pIu.p.p.u..p.p.ahp............p.....lp..tt.h......Y.hhpl.h........pauph............................................................................................................tt..................................................................................... 0 263 474 641 +5515 PF05687 DUF822 DUF822; Peptidase_M15_2; Plant protein of unknown function (DUF822) Moxon SJ anon Pfam-B_7149 (release 8.0) Family This family consists of the N terminal regions of several plant proteins of unknown function. 25.00 25.00 26.20 25.70 21.30 21.00 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.18 0.71 -3.85 15 154 2009-01-15 18:05:59 2003-04-07 12:59:11 8 6 22 0 105 163 0 141.80 46 37.09 CHANGED uuusRpPTW+ERENNK+RERRRRAIAAKIauGLRApGNYsLPKHCDNNEVLKALCpEAGWlVEsDGTTYR...KGs+Ps...t.-hsGsSss..sSPsSShp..........SPhsSYpsSPsSSuFPSPoph-.hsht..........ssllPaLpshs.........ssSuhs .........ussRhPoW+ERENNK+.RERRRRAIAAKIauGLRtaGNYpLPKHCDNNEVLKALCpEAGWsVEsDGTTYR..........KGs+Ps.........phhGs.Sss.....soPpSShp..........SPhsS.pssst.ssphsSssp.s.......................h.s............sss.................................................................................................. 0 15 66 86 +5516 PF05688 DUF824 Salmonella repeat of unknown function (DUF824) Moxon SJ anon Pfam-B_2973 (release 8.0) Family This family consists of several repeated sequences of around 45 residues. 23.80 23.80 23.80 23.80 23.70 23.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -8.15 0.72 -4.32 25 1631 2009-01-15 18:05:59 2003-04-07 12:59:11 6 24 187 0 31 888 3 47.80 36 14.57 CHANGED Dush...s....AuKsKpGEslslsVTs+Du.sG.sPlssssFsLpRscuhsRpss .................stsh...s.......AuKsKtGEslslsVTs+Du..sG.sPlssssFslpRu.suhsRps...... 0 0 0 13 +5517 PF05689 DUF823 Salmonella repeat of unknown function (DUF823) Moxon SJ anon Pfam-B_2973 (release 8.0) Repeat This family consists of a series of repeated sequences (of around 180 residues) which are found in Salmonella typhimurium and Salmonella typhi. Sequences from this family are almost always found with Pfam:PF05688. 21.00 21.00 21.20 21.40 20.90 20.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.47 0.71 -4.94 30 1826 2009-01-15 18:05:59 2003-04-07 12:59:11 6 36 210 0 42 1044 6 180.10 34 58.02 CHANGED GhTsAsGpsolsloQssuhGlKTslsAslssssssoss..hsVIFTVlTSPDospApMWGHMs-TlTs....sGhsF+RPhLsuEhsusss.......ohh-sNEsWuh.hs...sth.ssssst..Cs.hshlPshspLpuLYsspssst..lpTshGWPstts........YhSuspsssss.......apsls.LssGspsshsssss...shhoCl ..............................................GhTsusGssolslsQs.suhGl+Tslssshhs..s..s....ss..oss..hsVIFTVlTSPDos...pAphWGHMs-TlTs....sGhsFcRPh.LtuEhsusss........shh-sNEsWut.hs.....t.tssssst......Cs...hshlPs...hspLp....uLYss....h...s....ss.s....l.poshGWPspts...........Yhuu.spsssss.......atsls.Ls.sGsps.psssss.....thhoCl.................................... 0 1 2 22 +5518 PF05690 ThiG Thiazole biosynthesis protein ThiG Moxon SJ anon Pfam-B_1138 (release 8.0) Family This family consists of several bacterial thiazole biosynthesis protein G sequences. ThiG , together with ThiF and ThiH, is proposed to be involved in the synthesis of 4-methyl-5-(b-hydroxyethyl)thiazole (THZ) which is an intermediate in the thiazole production pathway [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.39 0.70 -5.30 28 2805 2012-10-03 05:58:16 2003-04-07 12:59:11 9 11 2724 15 705 5198 2724 239.00 49 90.63 CHANGED pIus+sFsSRLllGTG+YsS.plhpcAlcASGspllTVAlRRhstss..stssllchlc.pshplLPNTAGCpoAcEAlpsA+LAREsh......sssalKLEVIuDs+pLhPDshpTlcAAEhLlc-GFtVLPYsssDsllA++Lc-sGCuAlMPLuuPIGSGhGlpNhhsLclll-ps.cVPlIVDAGlGsPS-AAtAMElGsDAVLlNTAlApApDPlsMAcAhthAV-AGRlAa.AGhh.p+c.hAsASSP ..................................................................................................................h.IuscpFsSRLhhGTGKaso.plhpcAlcASGupllTlAlRRhsh.....t..p.......................p.............sslLchl......ts.......lslLPNTuGs+oAcEAlhsAcLAREsh.......sosalKLEllsD.s.+..hLhPDs..lETlcA..AEhLl.c-GFh.VLPY...s..s.sDPl.l.s.+R...L..c.-.s..GC.u.....A....V....M....P...L...G...A...P...I.....G.........S.........s...p....G..........l..........t..........s...............t.........h..........L....c...l..........I......l...c.....pu.....s......V..P...V..l..V..D..A..G.IGsPS......c......A....At.AM.E.h.G.sD.AVLl...NT....A...I...A...t...A...pD...P...l....t..MAc...Ah..+.h.A.V.-....A.GRhA..a..AGh...hs..p..p.p....h.A.pASSP..................................................... 0 222 452 596 +5519 PF05691 Raffinose_syn Raffinose synthase or seed imbibition protein Sip1 Moxon SJ, Eberhardt R anon Pfam-B_3204 (release 8.0) Family This family consists of several raffinose synthase proteins, also known as seed imbibition (Sip1) proteins. Raffinose (O-alpha- D-galactopyranosyl- (1-->6)- O-alpha- D-glucopyranosyl-(1<-->2)- O-beta- D-fructofuranoside) is a widespread oligosaccharide in plant seeds and other tissues. Raffinose synthase (EC:2.4.1.82) is the key enzyme that channels sucrose into the raffinose oligosaccharide pathway [1]. Raffinose family oligosaccharides (RFOs) are ubiquitous in plant seeds and are thought to play critical roles in the acquisition of tolerance to desiccation and seed longevity. Raffinose synthases are alkaline alpha-galactosidases and are solely responsible for RFO breakdown in germinating maize seeds, whereas acidic galactosidases appear to have other functions [2].\ \ Glycoside hydrolase family 36 can be split into 11 families, GH36A to GH36K [3]. This family includes enzymes from GH36C. 20.00 20.00 20.10 20.10 19.80 19.80 hmmbuild -o /dev/null HMM SEED 747 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.28 0.70 -7.00 11 570 2012-10-03 05:44:19 2003-04-07 12:59:11 7 15 210 0 281 620 6 368.90 23 73.23 CHANGED hsls-upLhl+s.pslLocVP-NVo....uo.ssss....s..........ssspGuFlGFots.cscS+HlsolGpLcsh+FhSlFRFKlWWoTQWlGpsGpDlphETQalllEhs......p.s.......pt.csYlVlLPllEGsFRouLQsGcsDcVcIClESGSopVcsSoFpsllYlHsussPapLl+-Ah+Al+sHLsTF+hLEEKolPulVDKFGWCTWDAFYLsVsPpGVhcGlKsLu-GGsPP+FVIIDDGWQSIspDsc...c..DutNlVhuGpQMssRLppacENpKF+cYcss..............................................................................s..ppssGhKuFlcDLKpcFpul-pVYVWHALsGYWGGVRP..tssc.h-uplh.PpLSPGltuTMtDLAVDcls.tGlGLVpPcpAp-hY-ulHSYLussGlsGVKVDVhplLEhLuppaGGRV-LuKAYacALosSlsKNFsGNGsIASMpHCNDFhaLuTKQIulGRVGDDFWspDPsGDP.GsaWLQGlHMlHCAYNSLWMGpFIQPDWDMFQSsHPsAEaHAASRAIuGGPlYVSDslG..sHNFDLLKKLVLPDGSILRspaauLPTRDCLFcDPL+DGpolLKIWNhNKasGVlGsFNCQGAGWs.cs++s+sasEsspsloGol+scDlEWpsptss...phshst-asVYh.ppuccLllhs..ussl.lTLcshpFELholsPVpcls.ttslpFAPIGLlNMFNSGGAlp..slcassp...........................sVclcV+GsG+FtAYSSp+Ph+Ctl-Gs-lEFpYps-.GhVslpl ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hht...sMt.....s.............................................................hRsu.....D....ah.......................Hh...shhsshh.h.t.....h.h....sDaDMF..................o........p...........................ut......h.hu..+hhsGsslYloD...s....t..........th............s.......ht.lt.h..h..................................................................................................................................................................................................................................................................................................................................................................................................................................................h............................................................................................................................................................................................................................ 0 71 176 235 +5520 PF05692 Myco_haema Mycoplasma haemagglutinin Moxon SJ anon Pfam-B_3547 (release 8.0) Family This family consists of several haemagglutinin sequences from Mycoplasma synoviae and Mycoplasma gallisepticum. The major plasma membrane proteins, pMGAs, of Mycoplasma gallisepticum are cell adhesin (hemagglutinin) molecules. It has been shown that the genetic determinants that code for the haemagglutinins are organised into a large family of genes and that only one of these genes is predominately expressed in any given strain [1,2,3]. 20.00 20.00 22.50 22.20 18.10 18.00 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.79 0.70 -5.77 31 148 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 6 0 49 106 0 382.60 42 66.32 CHANGED spAlS..pLps.KsNA-phAstFhKpsLsKspLous.............tssspp...QPuNYSFVGYSVDlss.............tppshPNWNFAQRpVWsussp.ss..........................................................stspssssLTDVSWIYSLu...GssuKYTLoFsYYGPo.TuYLYFPYKLVKsuDs.pVuLQYpLNssss.ptIsF............................................s..t..t.ttt...sspssssssphNssPTVs-INVAKVoLosLpFGpNTIEFSVPss......KVAPMIGN......MYLTSs....ssNpsKIYs-IFGN.s.sppsssouVTVDLLKGYSLAosaSTYltpFosL......sssssppssssYLlGaIGGss..sRsssss.........sNh.psPssssss....RTaTIYVNAPpsGsYaISGuYl........osssRsLpFsh....ssssss...NsVolsshspsNWoT....LuTFDT.............ssssssssossssc+TLsLpKGLNKIlluGs.....ssssAPaIGNLTFTLps ..........t..shp..pLpp.KpNAsphss.F.KpsLs+splsus.............tspsptQPuNYSFVuYSVDlss..............tpt.PNWNFAQRpVWsssst.ht..........................................................sttpssssLTDVSWIYSLs...GsssKYTLsFsYYGPs.TuYLYFPYKLVKsuDp..VuLQYpLNssss.ptIpF..................................................................sstssths.sPTVssINlAKVsLosLpFGpNTIEFSVPts........KVAPMIGN......MYloSs....tsNpppIYssIFGN.s.sppssposloVDhLpGYSLAosaSTaltpassh.......sst.ppsp.hYLlGalGGss..sRsshss..........Nh.p.P.ssssp....RTaThYVNAPpsGpYaIpGsYh........osssRsLphps.....sstss...NslTls.h.tpsNasT....LtpFDT.............ttsssssss.ssss++TLsLpcGLNKIll.uGs.....sssssP.lGNLsFTLp................. 0 48 48 49 +5521 PF05693 Glycogen_syn Glycogen synthase Moxon SJ anon Pfam-B_2874 (release 8.0) Family This family consists of the eukaryotic glycogen synthase proteins GYS1, GYS2 and GYS3 [1,2]. Glycogen synthase (GS) is the enzyme responsible for the synthesis of -1,4-linked glucose chains in glycogen. It is the rate limiting enzyme in the synthesis of the polysaccharide, and its activity is highly regulated through phosphorylation at multiple sites and also by allosteric effectors, mainly glucose 6-phosphate (G6P) [3]. 19.10 19.10 19.10 19.20 19.00 19.00 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.23 0.70 -6.28 7 829 2012-10-03 16:42:30 2003-04-07 12:59:11 8 5 520 24 319 797 26 398.40 37 87.51 CHANGED EVAWEVANKVGGIYTVl+SKA.Vos-EhGDpYshlGPhp-psh+sEVE.lEspsss........l+tslpuMps+GhplhaGRWLIEGsPpVlLFDluSuAatLscWKs-lW-tCpIGlPapDpEoNDAllhGahlAhFLtEFpspsp.........s.tVlAHFHEW.AGVGLhLsRt++lsluTlFTTHATLLGRYLCAGu.sDFYNNL-pFsVDpEAGKRpIYHRYClERAAsHsAHVFTTVScITulEAEHLLKRKPDllTPNGLNVhKFuAlHEFQNLHA.sK-KIp-FVRGHFYGHlDFDLDKTLYFFhAGRYEFsNKGuDhFIESLARLNahLKssss....csTVVAFlIhPApTNsFNVEoLKGQAVhKQLcDTlsplpcplG+RlF-pslp....Gp....lP.-hc-LLppsDhlhlKRslhAhpR..poLPPlsTHNMlDDusDPlLssIRRspLFNpptDRVKVlFHPEFLSSssPLlslDYE-FVRGCHLGVFPSYYEPWGYTPAECTVMGlPSlTTNLSGFGCFMpEplp..-spsYGIYIVDRRa+uh--SlpQLspaha-FsppSRRQRIIQRNRTERLSDLLDW+pLGhaYhcARphALp+saP-.a.h...sphhssst.........h+hsRPtSsPsSP..........osuphuS.psS-sEDs. .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................suhuh.h........hch.....th......lsT..lFTTHAThlGR.ls..uss..hshYs.L..as..........hDt.A.tc........h.........tl.p+ashE+tusH.scsFTTVSpITuhEspcLLc+ts..Dhlh.P.NGhp...s..hhst.h...ppFpsh+t.t+t...+l.phspu...c...h.hG....p.....................s...h...p...-.p.TL.hhhhuGRYEatNKGhDhFlEuLsRLNh......hp..tt........phsVlAFhhhPs.............................................................................................................................PhhTH.h....h.s...pD.lLs.l+phthhNp.tD+VKllFhPpaLs..ussslhsh-Y....-hl...h....Gsc...L...slasS...................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 100 174 261 +5522 PF05694 SBP56 56kDa selenium binding protein (SBP56) Moxon SJ anon Pfam-B_2816 (release 8.0) Family This family consists of several eukaryotic selenium binding proteins as well as three sequences from archaea. The exact function of this protein is unknown although it is thought that SBP56 participates in late stages of intra-Golgi protein transport [1]. The Lotus japonicus homologue of SBP56, LjSBP is thought to have more than one physiological role and can be implicated in controlling the oxidation/reduction status of target proteins, in vesicular Golgi transport [2]. 19.70 19.70 19.70 19.70 19.60 19.50 hmmbuild -o /dev/null HMM SEED 461 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.55 0.70 -5.99 28 437 2012-10-05 17:30:42 2003-04-07 12:59:11 6 7 298 1 210 468 423 379.30 42 93.69 CHANGED hpssPs.YsoPttAh.puPtEclsYVssl.ssss..+.PDhLAsVDVDPpSsTYupVlpRl.hPshGDELHHhGWNAC.SS..Ca..scsst...ERRaLllPuLpSSRIYllDT+sDPRpPpltKlIEPpElhp+sGhopPHTsHChssG.IhlSsLG..ss-G..sGsG.GhhLLDp-TF-lhGpWEhsptsthhuYDFWap.tassMloSEWGsPsthccGhsspclhsG.pYG++LHhWDhsp++hhQolDLG.s-sthsLElR.hH-PscshGFVGss.....LSuSlapaa+.-c....sp.W.sscKVIslPuc.s-..u..LPsh......................PsLlTDI.lSLDDRFLYlSsWhpG-lRQYDlSDPtpP+LsGpVhlGGllp+u................shPsspsppLsGGPQMlplShDG+RlYlTNSLYSuWDcQFY.P-hl..GuaMlplDlDsct.GGLsls.sFaV...DFGpt....hhAHplRh.GGDsoSD.ah ........................................sPs.a.oP...Ah..p.....u..PtEplhYh.sl..........st......s...ts.....DhluslDlcPpS.ssYsp....llpph.h.s.hsDELHH.GWNsCSS..C.....scsth....pRphLllPuLhSuRlYllDst.-P....ptPplh........K...........l..l-sp-lht+sshuhPHT..sHChs..sG.lhlSsLG.....s.cG..su.G...GhlllDs-.................F...........plh.G.pW...........Eh......s...........t.......t.s...........................huYDFWap..t+ssMlSo.-Wu..sPs..hhp...p..Gh..s....tc.lts.s..hYG....pplahWDhptpchhQolcLG.t.p.t.hsLElRhhHsPsts.GFVGss.....Lsuslhpaa+....p.s................up..WphccsIplss..sp.sh.LP.h......................PsLlTDI.lSlDDRaLYhssWhpG-lRQYDloDPtpPhLsGplhlGG.l.cs.............................h....psppltG.GPQMlp.lS.hDG+RlYhTsSLausWDc..QFY.Pc.hh.........su.h..hl..........lc.ssst..GGhpls.sFhV...DFsp....t........hhsHphRh.GGDsooD.ah.................................................................................................................................... 1 60 99 164 +5523 PF05695 DUF825 Plant protein of unknown function (DUF825) Moxon SJ anon Pfam-B_8370 (release 8.0) Family This family consists of several plant proteins greater than 1000 residues in length. The function of this family is unknown. 18.10 18.10 19.40 18.10 17.90 17.90 hmmbuild -o /dev/null HMM SEED 1390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.50 0.69 -14.39 0.69 -7.17 7 1423 2009-01-15 18:05:59 2003-04-07 12:59:11 7 12 537 0 50 1419 2 520.60 26 64.48 CHANGED MKcQp....hK.hIhEL+EI.+p.+s.pahhcSWTphNSVGohI+IFF+pERhlKLFDPRIhShLLSRNhpGSpSNpaFTIKGVlLhVluVLIYRhNp+.NMVERKshYLptlhPIPhN.ht.tNDT.EEuhGsoNhNhLIhs..LL.hPKGKpI.Es.hLsspEsThVLsITKht...............................pt.lAtI-..hKEKt.p.Lch........................................................................................................................................................................................................................pLKtS.s..h-hh-slp.EsSEYts.hNp+-...........................................................IpQ.cEcu..WcP.S.lphERpc............................c+.h.pphhPcEhc.cFlGN.T+SlRsaFSDRWS..ELaluS.....sPhE+.Th-pKLLKpp..lsFV..RcSEs....pEIlsLaKIITYL..QpTsSIauISSDPGhshl.Kpp.Dhsp......hpKN.hFshhphFa-ps+...hhcpsFcS..EERlpEhsDLFTLsITEPD.VYcptau...a....s.YtLsppphL..sclhNpRsc.KppSLhlL.slh...-ctc.ahcRIhpK.l..Isstp.....+.Khss.hsp.hhEuls..chIpshI.......puYlRslhNRFahhN+SsRNhc.tIppsQhGs-sLNpRThMKhpIppc..............................sYh.KWSsuocshQEaLEHFlSE...........QKscFpssFD.h.........................phphsp..IsWStshcK..KDh............s+..hh............hh.K.hhFhoKhlhhLuNsLshFh..VShGNhPIpRSEIpI.ELKGsscpLC.......NpLLESIGhpIVHLKKLKP..hLL--ass...SKFlIstuThu.Fh.NclP....hID.hpscpNp.cSFDNpDS.FShI.pDp-NWLNPsK.FppSSLIuSFYKAN+LRFhspsHHFpF.CpKRFPFsVE+s+.pNpsahYGQFLNlLhl+pKhFSLClGccKHsahp+sTISsIES........QVSNIFIPpDF...QSG-pp....YsL.KSFph.o+ssPhV+..RsIYSIh-hSsTPLTEsQIVNhERT.CQPh.D..hNLSDSEtpNhcphLN..FsSNMGLIHo.hS-c.L.SE.....KcKppsh..pl+ct..............................................scKtpMaphhQpDShFShLS.KWNLFpTYhP.FhTSsGYKYLs.lFLDsFS-lL.hhSpt.....VSIh........pDIh.......slSWcILQ.chshhph.lpoE............Ipppahp..NlhLucEhI+RN..sps..h.THLR.sNshEFhYSILhLlhVhGYLlhhalhhVSpAF.ELQp-hcplKSLM.PS.h..IELRKLlD+YP.SE.NSFhLKNlhlhshpplt.Sl.p.Rthuhshph.........l.ulhtpppshNlshID.p.llshlsss.spIs.ptsshplSHsu .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 3 30 41 +5524 PF05696 DUF826 Protein of unknown function (DUF826) Moxon SJ anon Pfam-B_7303 (release 8.0) Family This family consists of several enterobacterial and siphoviral sequences of unknown function. 25.00 25.00 26.50 28.10 21.50 18.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.73 0.72 -3.84 3 296 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 140 0 1 137 0 78.60 72 87.10 CHANGED MSEITSLVTAEAVKEVLRSEEVpSALKQKLRHNLEARLDAEVDAILDELLGs.AuP.sE..AG-oTAsD......s.ss-usEP.QP .MSEI..TSLVTAEAVK-VLRSEEVRSALKQKLR+NLEARLDAEVDAILDELLGss..AsPtsE......AGDsoApc...........usEP.pP................ 0 0 0 1 +5525 PF05697 Trigger_N Trigger; Bacterial trigger factor protein (TF) Moxon SJ anon Pfam-B_8447 (release 8.0) Family In the E. coli cytosol, a fraction of the newly synthesised proteins requires the activity of molecular chaperones for folding to the native state. The major chaperones implicated in this folding process are the ribosome-associated Trigger Factor (TF), and the DnaK and GroEL chaperones with their respective co-chaperones. Trigger Factor is an ATP-independent chaperone and displays chaperone and peptidyl-prolyl-cis-trans-isomerase (PPIase) activities in vitro. It is composed of at least three domains, an N-terminal domain which mediates association with the large ribosomal subunit, a central substrate binding and PPIase domain with homology to FKBP proteins, and a C-terminal domain of unknown function. The positioning of TF at the peptide exit channel, together with its ability to interact with nascent chains as short as 57 residues renders TF a prime candidate for being the first chaperone that binds to the nascent polypeptide chains [1]. This family represents the N-terminal region of the protein. 21.30 21.30 21.50 22.20 21.20 21.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.66 0.71 -4.39 152 4472 2009-09-11 09:24:54 2003-04-07 12:59:11 8 7 4374 14 995 3073 2205 145.40 31 33.23 CHANGED Mpsphp.p.hsshpt.plplplsspclppplppthpchs+....psplsGFR....GK.VPh..pl.......lcppaGp.p.lhp-slpcllp...cshpculpc....p....plp.sl.upP...pl............p.hp....hppsps..hp...ashph-lhP-l...clss.hcsl...pl..pp.ts.pls-c-l-pplcplp ....................MplohE.p.hpshpt.plolslss-plcstlcpthpc.lsK....p.l..p.lsGFRK..........GK...V..Ph...sl.......lcp+..a....G..t.p..lhp.-slsplls....csatcA.lp-..........p.plp..sl..upP.....pl...............ss.tp......hc.p.Gcs.....ht....assphpVhPEl...c.L..ss...h...c.s...l...cV..cc.ss..-VoD.p-V-ptl-pl............................... 1 347 667 850 +5526 PF05698 Trigger_C Bacterial trigger factor protein (TF) C-terminus Moxon SJ anon Pfam-B_8447 (release 8.0) Family In the E. coli cytosol, a fraction of the newly synthesised proteins requires the activity of molecular chaperones for folding to the native state. The major chaperones implicated in this folding process are the ribosome-associated Trigger Factor (TF), and the DnaK and GroEL chaperones with their respective co-chaperones. Trigger Factor is an ATP-independent chaperone and displays chaperone and peptidyl-prolyl-cis-trans-isomerase (PPIase) activities in vitro. It is composed of at least three domains, an N-terminal domain which mediates association with the large ribosomal subunit, a central substrate binding and PPIase domain with homology to FKBP proteins, and a C-terminal domain of unknown function. The positioning of TF at the peptide exit channel, together with its ability to interact with nascent chains as short as 57 residues renders TF a prime candidate for being the first chaperone that binds to the nascent polypeptide chains [1]. This family represents the C-terminal region of the protein. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.40 55 4501 2012-10-02 13:36:56 2003-04-07 12:59:11 9 11 4242 11 991 3166 2160 155.90 23 35.84 CHANGED oL--L+ppl+cplp.cphppthppphccpllctLlcpscl...-lPpuhlcpEhsph....hpphtpph.t.tGhshpph.p..t...stcphccphccpAc++V+hsLllsclucpcplplo---lppplpchAp.pYs..h.pspplhphhtpptpht....tlcsslhccKsl-hllp ............................................................ol-cL+tcl+cplp.cph.cpt.hc.sp...h+ppsl-tllcs.s.cl....-lPpuh...lcpElcp....h..............hppht........pph...t...G...h......s....cp..........t..........st-p.....h.+c.phcc....p.Ac+RV+hs.Ll.....L.s....tl....s..cs..cp..l..c..ss-.--...lpp.t.......l.p......c.hAp....pYs...........ss..cp.......lh......p...h...h........pptph......tl+pslhpc+sl-hlh.t.................................................... 1 347 658 841 +5527 PF05699 Dimer_Tnp_hAT hATC; hAT family C-terminal dimerisation region Albrecht M, Bateman A anon Albrecht M Domain This dimerisation region is found at the C terminus of the transposases of elements belonging to the Activator superfamily (hAT element superfamily). The isolated dimerisation region forms extremely stable dimers in vitro [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.63 0.72 -4.36 49 2390 2009-09-11 20:57:40 2003-04-07 12:59:11 9 113 188 1 1830 2467 15 79.20 19 15.44 CHANGED -LcpYhp-.shhp.p.........hclLpaWp...tppscaPhLophApclLulPloosuu..EpsFSss..s+hlsct+splp.pslpuLlshcphl .....................................................................................s.h.t..aWt........tpp.tp.aPt....LtplApp....l..L.s.lPsoosss....ERsFSsh......tph.h.s..c.p.Rsplp.pphpt..lhhhp....................................... 0 435 1044 1597 +5528 PF05700 BCAS2 Breast carcinoma amplified sequence 2 (BCAS2) Moxon SJ anon Pfam-B_7922 (release 8.0) Family This family consists of several eukaryotic sequences of unknown function. The mammalian members of this family are annotated as breast carcinoma amplified sequence 2 (BCAS2) proteins [1]. BCAS2 is a putative spliceosome associated protein [2]. 21.20 21.20 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.36 0.70 -4.88 18 311 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 249 0 219 303 7 197.40 30 87.94 CHANGED hss..p............hlDuLPYlDpthp.pssh..+pt....uspLlp-Eh+...phtsstshhpplP...tsshp.h.p.slhpsEh-Rhppt.....pP.httlDh.pRYclPtPsuspts....c.ptWppslpsutsthpHpshRhpNLELLscYGssuWchhN..............cpL-shlpphp+pLtphKcplp-lNtpRKhpQppsut+LppLEppWtpLlucsh-lEhAstpLE....tpltph+ccttp...............p ................................hss...............hhDuLPYhDpth..ssth......+pt.......sttLlppEhp.............thps.s..p.s......hhs......L..Ps.......s.s...............hs.h.p.shhppEh-.Rltsp..................pP...hthlsh..pRYc...hs...sP...ssspts...............shssWpp..slppuhsplpHtshR.hpNL.-.L...hpp.aGs.s...A..Wt...hhN................ppLpph.lpphpcpLpc.....h+..cplpplNhpRKttQ.p.suscLp.tLEppWhphVu+Nh-lEhsh.pLc.......ppl.ph+pp............tt................................ 0 72 117 173 +5529 PF05701 WEMBL DUF827; Weak chloroplast movement under blue light Moxon SJ anon Pfam-B_6516 (release 8.0) Family WEMBL consists of several plant proteins required for the chloroplast avoidance response under high intensity blue light. This avoidance response consists in the relocation of chloroplasts on the anticlinal side of exposed cells. Acts in association with PMI2 to maintain the velocity of chloroplast photo-relocation movement via the regulation of cp-actin filaments [1]. Thus several member-sequences are described as "myosin heavy chain-like". 33.00 33.00 33.00 33.00 32.90 32.90 hmmbuild -o /dev/null HMM SEED 522 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -12.94 0.70 -6.04 36 300 2012-09-25 08:00:20 2003-04-07 12:59:11 6 10 31 0 200 315 0 319.10 18 60.30 CHANGED uAPFESVK-AVSKFG.Ghl........DWKtt..psh..ERRpt.hcpEL-psQcElschKcph-sAEssKspslpEL-pTK+llEELKhpLE+sppEcpQA+QDoELAclRs.cEhEpGluscsSss.....uKspLEsA+pRassAlsELcuVKpELcpl+c-assllsc+-sAhp+AcEAhsuuKcsE+pV--LThELlAsKEuLEpu+AAHhcAEEc+hssshu+-pDphsWcpELcQu-cELppLppcl..husp-LcuKLcpsoshLhsL+sELsuahp.uclsp.s..............p.....hp.t.lsus+cELEEsKtsl-KApsEsssL+ssusSL+uELEpEKusLssL.+p...+EuhAuhsluuLcsELp+scpElthsps+EpcsccthsE.lsppLQQAupEu-pA+ttAcsAp-Elp+sKEEAEpAKAussshEtRLpAshKElEAAKASEclAlsulcA.LpEpc.sstpsps.t....SssslTLolEEYtpLS++ucEuE-hAsp+VusAluplEtAK-oEs+oL-+Lcpsp+-h-p+KpALctAh .........................................................................................................................................................h.tth..h..........p.th..ph..sp..httht.tlt........................................................................................................................................................................................................................................................................................................................................................................h.....-Lpthp.t.......lpphtt-...ht...htt...lpt..cl..tp.ptthtphpp....p.....t.tt..h..tlp.ch.....thttpht.h..tp.tt......................t.h...hpphtt-hpth..p.......p.ph..h.tphp..p..htthp.ph....p......t....t..hh..................................................................................................................................................................... 0 30 112 160 +5530 PF05702 Herpes_UL49_5 Herpesvirus UL49.5 envelope/tegument protein Moxon SJ anon Pfam-B_7354 (release 8.0) Family UL49.5 protein consists of 98 amino acids with a calculated molecular mass of 10,155 Da. It contains putative signal peptide and transmembrane domains but lacks a consensus sequence for N glycosylation. UL49.5 protein is an O-glycosylated structural component of the viral envelope [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.25 0.72 -4.12 8 35 2012-10-01 21:33:21 2003-04-07 12:59:11 6 1 24 0 0 48 0 93.50 35 98.32 CHANGED M.sS.shhphshslslssllllulspusss-...................tthsltppt...cFWcuuCSA+GVsIstuouAoVlFYluLlAVllALLuhuYpACFRLFTuShhpccW ...................................................h.hsh.hhlshhlhhuhspus.ss....................t.shh.ptt..hsFWcAuCSA+GVsIstsossoVhFYluLlAVhVAlluhAY+ACFRlhTsshhppp...................... 0 0 0 0 +5531 PF05703 Auxin_canalis DUF828; Auxin canalisation Moxon SJ, Eberhardt R anon Pfam-B_7298 (release 8.0) Family This domain is frequently found at the N-terminus of proteins containing Pfam:PF08458 at the C-terminus. It is a component of the auto-regulatory loop which enables auxin canalisation by recruitment of the PIN1 auxin efflux protein to the cell membrane [1]. 22.80 22.80 23.50 23.00 22.00 22.70 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.73 0.70 -4.55 11 176 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 19 0 114 162 0 205.20 37 58.36 CHANGED PpTPp-s.MEFLSRSWSlS.AoElS+ALts...................................t.t.ppshShsuspsspllh.t...............................stssusPlsPpc.ls..cssphh+ss...............................hTlG+Wh+c+ct..........K++-csRscsAplHAAVSVAuVAAAlAAVsuussss.s....................stpsuKhssAlASAupLlAo+ClEhAEthGAD+-plsoAVpSAVsl+SsGDlhTLTAAAATALRGAAsLKsRt.KEspp....AAlhPsE+ut ....................................................................s.ps.MEFLSRoWS.S.u.p.lspsh..................................................................tt......tt......hh...................................................sh...t..h.....p...h.h................................................................holu+Wh+cpct..............++K-csRhcpAplHAAVSVAulAAAlAAlsAuss.ts.........................s.tp.ss+h.shAlASAAsLlAupC.lEhAEthGAc+-plsusVpSAVss..posuDlhTLTAuAAT........uLRGAAsL+sRh.pc.h..........Ash.Ph-c.s...................... 0 12 62 89 +5532 PF05704 Caps_synth Capsular polysaccharide synthesis protein Moxon SJ anon Pfam-B_7575 (release 8.0) Family This family consists of several capsular polysaccharide proteins. Capsular polysaccharide (CPS) is a major virulence factor in Streptococcus pneumoniae [1]. 24.50 24.50 24.50 24.50 24.40 24.30 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.88 0.70 -5.24 28 400 2012-10-03 05:28:31 2003-04-07 12:59:11 7 9 331 0 91 614 130 240.00 21 70.01 CHANGED +...........hh.h..tt..h.hppphhhphLccphpphl.pashppppp.pppp........IWhhWhQGh.-sA.PclV+pClpSl++pt..ssaplllLoccNlccYlslPchlhcKhcpGhl...shspaSDlLRlpLLspYGGlWlDATlaho..sslss.hh......cssFFsh....ppstppspshshupW.......................hhluu.ppsp.llshhcchhhpYa.pppsphhD..YFlhchhhpls.......hcp..ph.pphhch.h..hsNtp...ahLtthlpptascphapplppp.o.slaKLoYKhphsptpp....poaap+l ..............................................hhhh.....................................................p....p..tp..+........IWhhWh.Q.G...-sh..P.pllcp.C.hpShc+hh..ssaclll..ls.....c.....c.....N.....l.p..cY..l...s.....h..P..c..h...lh..p+h..p.pst..h.........sh..s....ah....SDllRlsLLtpYGGlWlD..uolhho...psl.sphhh..........ppshash..........pps.t....tt.......p..........h.h...p.....pa............................................hlsu..pps.s.....h.hp.hh.pch.h..h........t...aW..ppp.p.........Y.hh.h.phhh.hh.......hp.......................s.s.......h...h..p.......t.........h....t........phph.................................................................................................................................................................... 0 14 42 68 +5533 PF05705 DUF829 Eukaryotic protein of unknown function (DUF829) Moxon SJ anon Pfam-B_7638 (release 8.0) Family This family consists of several uncharacterised eukaryotic proteins. 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.43 0.70 -4.67 43 555 2012-10-03 11:45:05 2003-04-07 12:59:11 9 10 206 0 411 571 34 210.60 20 72.88 CHANGED slll..........................lhGWh.Gups+altKYsshYpc..tGhpslhhpssshphhh.s......ptlpsshcplhphhtspptpt...............sllhHsFSssGhhhhtshh....p...hppppthtphh..lpGhl....hDSuPutsph.htshpuhuts.hsps..............................ht.hhhhhhththhhhhhhhh.pssphhppt...hpshppssht.......ssp......LalYScsDtllshc-lEpahsptcp..pGhp.VpttpacsSsHVsHh+paP..cpYhspltcah .................................................................................llllhuWh.sup.+pltKYsph.Ypp........uhp.hlh.hpsshhphhh.t.......................................ptht.hhp...t.lhth......h..p.p.tt.....................................llhHsFSsu...Ghhhht.hhh.........................t...hp.ptp........t..p.hh..............l.....t....uhl....................hDSsP..s..ph....shtshshs....h.h.....................................................................h..hhhh.hh.hh..h.h.h....h...hh..h....h.h......s..h...h.h...tp............hp....ht........t.........................h.p......halYS..psD.lhshps...l-p.hhpttcp....................tuh....lp...a..tosHs..sH.hp..t..........p.Yhthh.ph.................................................. 0 122 232 349 +5534 PF05706 CDKN3 Cyclin-dependent kinase inhibitor 3 (CDKN3) Moxon SJ anon Pfam-B_5217 (release 8.0) Family This family consists of cyclin-dependent kinase inhibitor 3 or kinase associated phosphatase proteins from several mammalian species. The cyclin-dependent kinase (Cdk)-associated protein phosphatase (KAP) is a human dual specificity protein phosphatase that dephosphorylates Cdk2 on threonine 160 in a cyclin-dependent manner [1,2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.11 0.71 -4.80 2 187 2012-10-02 20:12:17 2003-04-07 12:59:11 7 6 136 7 85 382 124 132.60 34 62.73 CHANGED MKPP.SIQsSEFDSSDEEPl--EQTPIpISWLsLSRVNCSQFLGLCALPGCKFKDVRRNlQKDTEELKShGIQDlFVFCTRGELSKYRVPNLLDLYQQhGIlTHHHPIsDGGTPDIuSChEIMEELsTCLKN.RKTLIHCYGGLGRSCL.AACLLLYLSDoISPpQAI ............................................................................................................................................................................Rs.lppD.hppL+.s..G...hps.l.hs.hhspsELsp.....h..p..V....s..s...L.h..c...hhpp.tG..h.....h.h..a..HhPIsDs..s..s.P..-...h....s...p...t..h....p.....l.h...p..E....L...t..t.t...Lp.......s..sp....+..sl.l..HChGGhGRo.s.L...lA.ApLLl.........l.............................. 0 29 44 58 +5535 PF05707 Zot Zonular occludens toxin (Zot) Moxon SJ anon Pfam-B_3320 (release 8.0) Family This family consists of bacterial and viral proteins which are very similar to the Zonular occludens toxin (Zot). Zot is elaborated by bacteriophages present in toxigenic strains of Vibrio cholerae. Zot is a single polypeptide chain of 44.8 kDa, with the ability to reversibly alter intestinal epithelial tight junctions, allowing the passage of macromolecules through mucosal barriers [1,2] 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.55 0.71 -4.84 17 575 2012-10-05 12:31:08 2003-04-07 12:59:11 7 5 398 2 85 1421 205 183.60 20 52.20 CHANGED hlhllsGtsGuGKThhAVsh.lhs....slp...p....GRhlhT.NlstLs.lc......ph.p.....hp................................h....thht.W.ps....s..psullVlDEspp..hassRsh.st...........................................................shl.shapptRHhGhDllllTQshshlccplR.s.Lsphth+hp+t.shth.tpaphsh..shp.......thtphhp.......hphhphPKptFuhYc.Sss.ps .......................................................................................l.hhpGhsGuGKo.h...s..l...h....l.......................tht..........p............s..R....l...hs...s....lp...s.lp...hp............th..t.....................................................................................................................t....p.h..h..pa...tt............................ts.u...l...ll.lDE.s..pp..h..ass.cts.....pth...........................................................................................................................................................................................sphh..ph.h....s.pH........R..H......h.......G.......h.......Dl..h..l..h....TQ.sh...s...p....l.....c.p...l.R...p....hs...c...hthch.t..p.t....h.....h.G..h.tth.hphph.......hhpt................ptht.........tthhphspthathYp.Ssp.t....................................................................................................................................................................................................................... 1 21 49 67 +5536 PF05708 DUF830 Orthopoxvirus protein of unknown function (DUF830) Moxon SJ anon Pfam-B_5425 (release 8.0) Family This family consists of several Orthopoxvirus proteins of unknown function. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.17 0.71 -4.24 33 2265 2012-10-10 12:56:15 2003-04-07 12:59:11 7 15 1518 6 287 1396 159 163.10 23 72.02 CHANGED plppGDllFhsss.sslsptlss.......SphsHsuIhhspss.....................s.hhVlEA.hsp.....sschssLpcFlpc......psphtVhRLpsh.....ttthppshphAhphlGpsYshsa..t.....................sc+hYCochVtcsY.pshGl.hsphphhtphhh................................sPpshhcssplptlh ..............................................................................................................hpsGDl.l.Ftp...s....t......u....sh.......sttIphh.op............u...huHluIh.ls..cs........................................................................hl..h.....Eu........hss..................sV..p..hs.s.L...p..p.ahpc...........spchs.lh.R.lssh..........s...pptpc.l.......s..p....p.......u.......p..h....l..s....p..s.....Y..s.hs..h..h.hp..........................................................................................s.sp.h.a....CSchVhcsa..tth.s...h..................h............................................................h............................................................................................................ 0 118 176 241 +5537 PF05709 Sipho_tail Phage tail protein Moxon SJ, Bateman A anon Pfam-B_5084 (release 8.0) & Pfam-B_10063 (release 10.0) Family This family consists of several Siphovirus and other phage tail component proteins as well as some bacterial proteins of unknown function. 20.30 20.30 20.30 20.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.63 0.70 -5.24 71 1621 2009-01-15 18:05:59 2003-04-07 12:59:11 6 6 917 9 143 1105 17 296.70 14 91.64 CHANGED hhhlhshshtsss.....pshpphs.stsG..th.hhs...sphss+plslshhlhst.....shtc...........hpth.+ccltphl..tsccs.hhLhhssc........................................ss+hahshhssp...hsh...cpt..sshs.phslsFhs.spP.at.o...........h..............................................................................................................................................tspthshstsh.hh..................slhNtG...........sspspPh.....lclphp...ssssh.....hslhs.....................................................................................................................................................................................................................................................................................................................................hpsG-....pltls....ptssslhlsuhp.hhsthshs...........pphhtltsGtNp.lplps....ssshplph.pa+..hah ...................................................................................................................................................................................h......h.....s......ph.phs..shsG......th....hhs.......t.p.hp.sh.plslthhhts........sht.s................hp..h.hcclhphh.......stc....p....h....hlh..h.ps.p.......................................ss..hh.ah.s..h..h..s....ss...hsh......ppt......hs.s.......phslphhs....pP.athu................................................................................................................................................................................................................t.tph.th.s.tsh....................................................slh.N.s.G...........shp..s.tsh..........lclpht......tss.sh.....ht.lhp..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hpsuc.....hlhlc.........pppp..pl..h..h...s....s....h......hhsth..s..ht.............................p.h...tlts.G..Np.lphp.........s.hp.hph..pa..ha.................................................................................................................................................................................................................................................................................................................................................................................................... 0 59 105 126 +5538 PF05710 Coiled Coiled coil Yeats C anon Yeats C Family This region is found in a group of Dictyostelium discoideum proteins. It is likely to form a coiled-coil. Some of the proteins are regulated by cyclic AMP and are expressed late in development ([1]). 21.40 21.40 21.40 21.60 21.20 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.63 0.72 -3.52 10 120 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 4 0 120 121 0 84.10 32 90.54 CHANGED MTIlASISSIGNsKSoSKSsluSFuSuo.S..hGSNSIuC....GuCGGuu...hshssssssuGlt......lGsslD..LsusssossGhsuuhhG...................us.....SCGC .....MTlhuSISSlu...ss...p..o.sS...K.S.s.l..u..S.huuu.o.S.....uSN.SlAC........GuCGGuu...........ss...ssGhh........hsh.sh...lsss..thstuh..hss..t.........................tC....................................... 0 102 120 120 +5539 PF05711 TylF Macrocin-O-methyltransferase (TylF) Moxon SJ anon Pfam-B_5055 (release 8.0) Domain This family consists of bacterial macrocin O-methyltransferase (TylF) proteins. TylF is responsible for the methylation of macrocin to produce tylosin. Tylosin is a macrolide antibiotic used in veterinary medicine to treat infections caused by Gram-positive bacteria and as an animal growth promoter in the swine industry. It is produced by several Streptomyces species. As with other macrolides, the antibiotic activity of tylosin is due to the inhibition of protein biosynthesis by a mechanism that involves the binding of tylosin to the ribosome, preventing the formation of the mRNA-aminoacyl-tRNA-ribosome complex [1]. The structure of one representative sequence from this family, NovP, shows it to be an S-adenosyl-l-methionine-dependent O-methyltransferase that catalyses the penultimate step in the biosynthesis of the aminocoumarin antibiotic novobiocin. Specifically, it methylates at 4-OH of the noviose moiety, and the resultant methoxy group is important for the potency of the mature antibiotic. It is likely that the key structural features of NovP are common to the rest of the family and include: a helical 'lid' region that gates access to the co-substrate binding pocket and an active centre that contains a 3-Asp putative metal binding site. A further conserved Asp probably acts as the general base that initiates the reaction by de-protonating the 4-OH group of the noviose unit [2]. 21.10 21.10 21.10 21.10 21.00 20.80 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.66 0.70 -5.30 16 452 2012-10-10 17:06:42 2003-04-07 12:59:11 6 12 353 11 134 489 715 213.40 27 70.32 CHANGED ssssppLYLDLl++lloNhlY-D.sh....................ssshlscssacscsRspGpDWPslAHTMlGh+RLcNLQcClEpVLtDGVPGDhlETGVWRGGACIhMRuVLtAaGlpDRsVWVADSFpGhPssssssHshDpthc.LHpaNclLuls..lEpVRpNFpRYGLLDDQVRFLPGWF+DTLPsAPl-cLAVLRLDGDLYESThDALssLYPKLSPGGaVIlDDYsl.PuCRpAVcDYRscaGIs-sIpcIDtsGVYWR+o ........................................................................................t................................................................................................................................h.t..h.t.p..sl..GshlEsGV.a+G...u...u...s...h........h...h...............t...sh...l.........t...s...h...s.........p.......s........R.......p..l..al.hDoFpG.h....Ps....s.sh....p.....p........h............t........c.....h........s..........................h........p......p.....h.....s................h.......t........s.....s...........h-p..V.....p......p...........s.....h..p...p...a.s..........h...h..s.....c......p.lc.....h.l..pGhF.p-.T....L.s....s.....s.....s.....h......c....p.......l.......All+lD....s.........D.h.YcS...Th.su.LptlY..s+..l........s...G.G.........hlll...DD.Y.....s....h...........s...s.....t....p....Al.p-...a...ht..p.ht...................................................................... 0 54 98 113 +5540 PF05712 MRG MRG Moxon SJ, Mistry J, Wood V anon Pfam-B_5530 (release 8.0) Family This family consists of three different eukaryotic proteins (mortality factor 4 (MORF4/MRG15), male-specific lethal 3(MSL-3) and ESA1-associated factor 3(EAF3)). It is thought that the MRG family is involved in transcriptional regulation via histone acetylation [1][2]. It contains 2 chromo domains and a leucine zipper motif [3]. 20.50 20.50 21.50 22.70 19.10 20.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.31 0.71 -5.41 33 687 2009-01-15 18:05:59 2003-04-07 12:59:11 8 14 280 9 443 658 7 206.40 31 57.75 CHANGED ssst.cusc+ppst............p.th....hpcspl...plplPspLKphLVDDWEhlTKpp..+lltLPs.phsVspILccahpp......h...p.ppspsss..........p.p.............................hlcElls..Gl+hYFspsLu..phLLY+hER.QYtplhppp........................................................................................................................................................................sshs.SplYGA.HLLRLhlp...LPphlutos.hDppolshLhpplpphlpalspp...pchFsppsY ......................................................................................................................................t.p.sppp............t..........ppcth...........hpc.pl...plplP-pLK.hLV..D.DW-hls+pp............pLh..tL.Ps....p.hsVssIL-cYhph...............hp...ppsssss................t.....................................................hlp.Ells..G...l+...pYFshhLs..s.LLY+..aERsQYt.clhtpp................................................................................................................................................................................................................................................................................................................................................................shs.SplYGA.HL....LRLF............V+...lsthL....uho......s....hspc.sl..t.......hLhp.hlp.pFL....paLscpt..sphFstp.................................................................. 2 114 174 307 +5541 PF05713 MobC Bacterial mobilisation protein (MobC) Moxon SJ anon Pfam-B_2832 (release 8.0) Family This family consists of several bacterial MobC-like, mobilisation proteins. MobC proteins belong to the group of relaxases. Together with MobA and MobB they bind to a single cis-active site of a mobilising plasmid, the origin of transfer (oriT) region [1]. The absence of MobC has several different effects on oriT DNA. Site- and strand-specific nicking by MobA protein is severely reduced, accounting for the lower frequency of mobilisation. The localised DNA strand separation required for this nicking is less affected, but becomes more sensitive to the level of active DNA gyrase in the cell. In addition, strand separation is not efficiently extended through the region containing the nick site. These effects suggest a model in which MobC acts as a molecular wedge for the relaxosome-induced melting of oriT DNA. The effect of MobC on strand separation may be partially complemented by the helical distortion induced by supercoiling. However, MobC extends the melted region through the nick site, thus providing the single-stranded substrate required for cleavage by MobA [2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.45 0.72 -3.99 27 1793 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 994 0 198 1402 105 47.80 25 39.04 CHANGED ll.......ppLutlGsNlNQIA+phNs.....thss.cph.h.httLh.plpppLpplpc ........................hpLsplGsNlNQIA+phNp.........tth..ttt..ph..................l..tl.t.......h........................................ 0 71 131 175 +5542 PF05714 Borrelia_lipo_1 Borrelia_lipo; Borrelia burgdorferi virulent strain associated lipoprotein Moxon SJ anon Pfam-B_7866 (release 8.0) Family This family consists of several virulent strain associated lipoproteins from the Lyme disease spirochete Borrelia burgdorferi. 20.80 20.80 20.80 20.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.37 0.70 -5.23 18 520 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 34 4 27 422 1 182.80 25 78.06 CHANGED ppppscchKNsLLsD..........L+NLIEpAsp-+-KYhK+hcEEPs-Q....YGIhu..FcpL.Wstu.spEslu-s.oc+ShpYR+hsYus.LNslcss.cLKchocIlh.........hutpsptlaNlhpplGtslDpllspLYsKKDsLsKL-Is.cLc+LKNshEKlLSlKphlScMlsQLLLDYpssps.I+TDssKLcsals.pl.pQh.cKpcEu-pLKspIhoI.tsl ..................................................pp.pp...p.h.hsp...........pp....h.c.s...t.....pchcp.h.h.K.....p.h.....c.tp..ppp......................h.thtt.......hchlth.........h................h-.h..tts.....pphph++hhYos.L.s.appp.clpphtpIlp..............t..thh....t.thlh....p.h....h.....hsl..php.....l-phlt.hl...p..p..p.Ds..L.p.php.p.plcpLhpph-p.LplKpphtctlp..phl.-......Ypp..N..p...stI+s...-.pt...Ltp.ahp...............p....tp..th..........h.................................. 0 17 17 17 +5543 PF05715 zf-piccolo Zf_piccolo; Piccolo Zn-finger Yeats C anon Yeats C Domain This (predicted) Zinc finger is found in the bassoon and piccolo proteins (e.g. Swiss:Q9JKS6). There are eight conserved cysteines, suggesting that it coordinates two zinc ligands. 30.00 30.00 30.10 33.90 29.00 29.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.50 0.72 -3.96 12 212 2012-10-03 17:27:21 2003-04-07 12:59:11 8 9 36 0 101 168 0 59.80 62 3.12 CHANGED s+shCPLCpTp...luo.s.sNFNTCTpC+spVCNLCGFNPsPHLTElpEWLCLNCQhQRAL ......s.pshCPLCpTp....lu.o.p.-.s....PNaNTCTpC+spVCNLCGFNP.sPHLo.E.h.pEWLCLNCQhQRAL........ 0 4 17 41 +5544 PF05716 AKAP_110 A-kinase anchor protein 110 kDa (AKAP 110) Moxon SJ anon Pfam-B_5702 (release 8.0) Family This family consists of several mammalian protein kinase A anchoring protein 3 (PRKA3) or A-kinase anchor protein 110 kDa (AKAP 110) sequences. Agents that increase intracellular cAMP are potent stimulators of sperm motility. Anchoring inhibitor peptides, designed to disrupt the interaction of the cAMP-dependent protein kinase A (PKA) with A kinase-anchoring proteins (AKAPs), are potent inhibitors of sperm motility. PKA anchoring is a key biochemical mechanism controlling motility. AKAP110 shares compartments with both RI and RII isoforms of PKA and may function as a regulator of both motility- and head-associated functions such as capacitation and the acrosome reaction [1]. 25.00 25.00 26.70 25.90 24.80 24.10 hmmbuild -o /dev/null HMM SEED 685 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.55 0.70 -13.31 0.70 -6.54 5 156 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 35 0 76 141 0 335.10 35 35.52 CHANGED oP+KSLSKIASELVNETVoACS+NhssDKAPGSGDRs.uo.QS.PsL+apSTLKIKESsKcGpGPDsRPGSKKSFFYKEVFESRNuGDA+EGGR.hPt-RKhFRs.-RPDDFosSIScGIMTYANSVVSDMMVSIMKTLKIQVKDTTI.ATIVLKKVLLKHAKEVVSDLIDSFMKNLHNVTGoLMTDTDFVSAVKRolFSHGSQKATDIMDAML+KLYSVlhAKK.PEplRKscDKSESYSLlSMKuGsGDPKsRN..LNFAoMKSEsKlREKspocs.ssKEcTCAETLGEHIIKEGLTlWHpoQQNpsKSsuhptu.....ppQhtss.-hshthP.D.sphs..tsPpsPEKsENFMs-SDSWAKDLIVSALLLIQYHLAQ.............................GGspDA+SFlEAAuoTNhsPssSPss+DEu+L+Ss.l.hs-.EpsEKKDLpSVlFNLIRNLLSETIFKs-csCEuKs+cp.lKE-+uspCERPl.......Sssss+hsED-E.TsGAlSGLTKMsssplDGpMNGQMVDHLM-SVMKLCLIIAKSCD..AuLAELGD-KSGDASR.oSAFP-NLYECLssKGTGoA.EAlLQNAYQAIHNEhRuhSuQPPEGCthPcVIVSNHNLTDTVQNKQLQAVLQWVAASELNVPILYFAGDDEGIQEKLLQLSAsAVEKGpSVGEVLQSVLRYEKERQLDEAVGNVTRLQLLDWLMuNL ........................................................................t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p..llV.N.s.psph..tpLpAsLQWlAASphslPhlYFhtsp-s.hEK..........h.plsthstcKuapVG-lhptVhpatK...c..p.pt..tp.sphp.LhDWLh.......................... 0 4 8 28 +5545 PF05717 TnpB_IS66 Transposase_34; IS66 Orf2 like protein Moxon SJ anon Pfam-B_5707 (release 8.0) Family This protein is found in insertion sequences related to IS66. The function of these proteins is uncertain, but they are probably essential for transposition [2]. 22.10 22.10 22.30 22.30 21.80 22.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.29 0.72 -4.62 135 3276 2009-01-15 18:05:59 2003-04-07 12:59:11 8 9 1155 0 503 2167 362 95.80 41 89.63 CHANGED sss+laLsstssDhR+GhsGLuslVppslth-PauGslFlFps+..RtD+lKlLaWDusGhsLhhKRL..EcG.+Fh.W.P..sspt..slpLos..tQLphLLcGlDhppsp.t..sh .......................s..pla.lssuhsDMRpGhsGLuthVppsh.p...-P.a.S.G.p.lF.lFp.G+..+tcplK.lLahDus.G.hs.LhhKR.L..E..p.G..+Fh......W..P...sscps...hhtLo...tQlsh........LlcGlshpt.t.....hh.............................. 0 117 266 386 +5546 PF05718 Pox_int_trans Poxvirus intermediate transcription factor Moxon SJ anon Pfam-B_5843 (release 8.0) Family This family consists of several highly related Poxvirus sequences which are thought to be intermediate transcription factors. 21.40 21.40 22.30 25.20 18.70 18.70 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.39 0.70 -5.94 12 61 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 0 44 0 375.30 68 99.35 CHANGED MccLFpaL+sIEDcYsRTIFNFHlh+ss-lusIYshh+-+IuspshFsclV.sscl+psIKKLlYCDIplTKHIINpssYsshssssppss..KhuQaFDIphssss..hStRTs-IF-pDKSSLlSYIKTTNKKpKlDYGEIKKTlHutspo..sYFSG++SD-YLSTTV+sspspPWIKoISKRhRlDIhspuIlT+GKSSILQTIEIlasNRTCVKIFKDSThHlILSKDKsEpuClsllsKLFpsYclLFtLlaslTtsptFtphtssuscllssssF-EKlshI+pht..c.YGlpNFKlGMFNLTastsIsaTVFPSLLct.sSKIKFFKGKKLNIVALpSLc-CtpYVp.ApslLctMpcRSphLsslsItosSV-pLKpLLh ....................MDsLFoFLH.EIED+YuRTIFNFHLISsDEI.GDIYGlMKERISuEshFDNIVhscDI+sAIK.KLVYCDIpLTK.HIINpssYPlaNcoups....KpspaFDINoDsus..ISpR.TVEIFEREKSSLVSYIKTTNKKRKVsYGEIK.KTVH..GGTsu..NYFSGKKSDEYLSTTVRSshsQPWIKTISKRMRVDIINHSIVTRGKSSILQTIEIIFTNRTCVKIFKDSTMHIILSKDKcE+GClchIDKLFhVYhsLFlLhcDIhpN-hFcEVAslss+VLouTshDEKLhlIK+hA..DsYGVsNFKIGMFNLTFIKul-HTVFPSLLD-.-SKIKFFKGKKLNIVALRSLEDClpYVocSEshl-hM+ERSsILNuIDIETtSVD+LK-LL.l............... 1 0 0 0 +5547 PF05719 GPP34 Golgi phosphoprotein 3 (GPP34) Moxon SJ anon Pfam-B_7957 (release 8.0) Family This family consists of several eukaryotic GPP34 like proteins. GPP34 localises to the Golgi complex and is conserved from yeast to humans. The cytosolic-ally exposed location of GPP34 predict a role for a novel coat protein in Golgi trafficking [1]. 23.00 23.00 23.80 23.30 22.60 22.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.53 0.71 -4.25 88 612 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 392 9 331 621 2 216.50 28 82.18 CHANGED Ls--lhLLuhcsp.pGp....hhssshphu..LsuuhLhELsltGclpls..........ps+......l.tsh..sspsss......................-s..lLcps....lptlt......ptc.spp.....spsWlpt.ps............+sh.pctl...tpp...Lssc.Gllcpccp..phLshh...hh...pas.hsD...sstcpplpp+lpssl....sttss............................................sscsss.......LlsLhtus.slhpphh.ss...............................ppscptlpp...........lsptshhst.........................................................slcpt......lpphpsAs ......................................LhEEl.LLL.uLc-c...pGhh...hhsssluhu..LpGshL....l....ELuhcGRlplp...................pc+........................l..hlh.ssssTG......................-s..lLD-uLchl+............spt..s.o.........lpsWI.chhsG..............+sl.R-+l.scs....LV-+.GlLps.E+p....sh.L.h.h..shssaP..ls-...ss.h+pclhc+lpssl.....p.ssts.................................................shRshu.......LlsL.AaAu..sllcssh..ssh..p...c........................ppscphltp..........h.p.hs.............................................................................tht................................................................................. 0 118 192 272 +5548 PF05720 Dicty_CAD Cell-cell adhesion domain Yeats C anon Yeats C Family This family is based on a group of Dictyostelium discoideum proteins that are essential in early development ([1]). Swiss:P16642 and Swiss:P16643 are located on the cell surface and mediate cell-cell adhesion. 19.00 19.00 20.50 19.10 18.50 17.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.52 0.72 -4.34 6 17 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 4 0 16 20 1 76.10 30 37.11 CHANGED IhN--GESTISGKuaPLPsPhIaPsPYhhRFhcYchEGpLWsNcEFclKSGKIEasGEEaDIPpSpsshhKh.D-ptshI.l ...........l.sptGcSoIpGhshshPs.h.aPsPahhph.h.p..YphEG.s.l.asppcFclpSuKlEhsGc.ca-lPsSpssh.c..-p...................... 0 15 16 16 +5549 PF05721 PhyH Phytanoyl-CoA dioxygenase (PhyH) Moxon SJ anon Pfam-B_5670 (release 8.0) Family This family is made up of several eukaryotic phytanoyl-CoA dioxygenase (PhyH) proteins, ectoine hydroxylases and a number of bacterial deoxygenases. PhyH is a peroxisomal enzyme catalysing the first step of phytanic acid alpha-oxidation. PhyH deficiency causes Refsum's disease (RD) which is an inherited neurological syndrome biochemically characterised by the accumulation of phytanic acid in plasma and tissues [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.38 0.70 -4.30 39 3239 2012-10-10 13:59:34 2003-04-07 12:59:11 8 56 1181 23 1723 3267 6124 205.00 16 65.62 CHANGED accsGalllcs....hlss.pplsthppphppl.............................tthtsstphphhhptpttsststhh.....ts.pthtthhcs.............hhsshscphlspssh.................htshh....hh+tsphGsss...shHpDhthh........ttP....tphls....hhlAl-Dhsh-NGshhhlPGSHph..t....hh...th.tt.............................tpthlsh.hctG-sllacsplhHuotsNpost...Rpu .................................................................................................................................................................................................pppGalllps....hhs...pph.t.t.h.p....p...t.h.t.p.h..............................................................................h.....t.t........t...h.....t...h...h..h.t....t...t....h.....h.t.....t.....t.hh....................t...hh.th.hhps.................................thhshh.pplhstphh....................................................h.pshh..........hh.p...s.t....Gsts............sh.H..........p...........D...h.t.h..h...............................hps..............phls...........hhl.sl.s..-.s...s.......c..s...G..........s...........hh..h..........l.....P...G.....S......H..p....h........h.................h..t.tth.tt........................................................................t..t..t....h.h....h..t..h....ps.Gsslhapsp.hh.H..s..u.s..tN..tott...R..................................................................... 0 633 1084 1476 +5550 PF05722 Ustilago_mating Ustilago B locus mating-type protein Moxon SJ anon Pfam-B_5804 (release 8.0) Family This family consists of several Ustilago mating-type proteins. The b locus of the phytopathogenic fungus Ustilago maydis encodes a multiallelic recognition function that controls the ability of the fungus to form a dikaryon and complete the sexual stage of the life cycle. The b locus has at least 25 alleles and any combination of two different alleles, brought together by mating between haploid cells, allows the fungus to cause disease and undergo sexual development within the plant [1]. 25.00 25.00 37.50 35.50 20.10 19.60 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.85 0.70 -5.10 2 41 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 6 0 1 40 0 123.50 69 42.78 CHANGED Il+KFARpDRsRMKhLVpAKh.uSs.Ss.s.uTpsSLssNLDDlLp-NLG+.LTPsDKppFEDDWsSMISWIKYGVKEKVGDWVYDLsAAsKKo.P+sG.sRsVTTsApRpPARKTtstspsKs+pAp.RASpTPShDST...StLESTPELShCSTADsSFSoFsSshShupasPFQp..pl.QSPolpsRGsRKVKALPKRAupp.PsE...lsN....................................................GpIPFl.........sLSsAFs ....ILKKFAREDRSRMK+LVRAKLSSSNQSoPPS.................................................................................................................................................................................................................................................................................s.......... 0 1 1 1 +5552 PF05724 TPMT Thiopurine S-methyltransferase (TPMT) Moxon SJ anon Pfam-B_5821 (release 8.0) Family This family consists of thiopurine S-methyltransferase proteins from both eukaryotes and prokaryotes. Thiopurine S-methyltransferase (TPMT) is a cytosolic enzyme that catalyses S-methylation of aromatic and heterocyclic sulfhydryl compounds, including anticancer and immunosuppressive thiopurines [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.50 0.70 -4.90 12 1046 2012-10-10 17:06:42 2003-04-07 12:59:11 6 7 838 11 423 2913 640 189.50 24 87.81 CHANGED hstshWpppWtssphsacQppsssLLh+ahss..ps.sshRsLVPhCGpuhDhhWhAspt+.VlGl-lS-hAlpchhp-ts.....pPp..hpphssh+c.............DhFshpsppl..hDhlaDRushsAlsPchRspYupphhpLL.ssuc..hhLlTLpYs.scts.GPPF.Vst.....tEhctlhs.sshcltplcp.tDsLss..chtt.slpthtEplahl.p+p .........................................................................................thWp.pp...a...t...p...s...p.....h...s...a...c......t....p..........s.......s.....s....h......L...h....p....a.......h...............p...........p.....................t...........................t..........t................t.....t......c....lLl.P........h.CGc.u..h..D..h..h..a.L....A..s...p...G...........a..p.V.hGl-l..S..p..h..A..l...p...p...h...h...p.cp.th..................p.p.....ht..p..h......s......s..h...pt...................t....t..l...p..h..h..t.u..Da....F......s.......h...s..........s....t......t......h......t.....t.....h..chl.a.D..p.s...h....h....s..A..L......s.........p.....h.....R....t.....p.......Y..s......ppht....pl..ltssup.............h.L..l.....s........h.....t...a.......s........t.....s....p.......h...t.....G...P..P...a.s.l.s..t............t-l.pth...hs....h...h...p.l....h.................................t.................................................................................................................................. 0 151 249 342 +5553 PF05725 FNIP FNIP Repeat Yeats C anon Yeats C Repeat This repeat is approximately 22 residues long and is only found in Dictyostelium discoideum. It appears to be related to Pfam:PF00560 (personal obs:C Yeats). The alignment consists of two tandem repeats. It is termed the FNIP repeat after the pattern of conserved residues. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.03 0.72 -4.00 34 6356 2012-10-02 21:32:02 2003-04-07 12:59:11 7 134 22 0 5165 6328 244 42.60 32 40.91 CHANGED FN.Qs.lphsslP.............................pulppLph.ussFsp.lp..hssLPpSlppLphupp ..............................................................aN..p....l..t.......s...s...l....P...................................................................s..o..l......p...pL.pF.....G....p.......p....F....N...Q....s..lp.............s...s....l...P..s...S..lp.p.Lphu................... 0 2305 5161 5165 +5554 PF05726 Pirin_C Pirin C-terminal cupin domain Yeats C anon Yeats C Domain This region is found the C-terminal half of the Pirin protein. 21.30 21.30 21.30 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.24 0.72 -3.95 34 2522 2012-10-10 13:59:34 2003-04-07 12:59:11 8 12 1617 3 1004 2315 1370 103.60 28 34.49 CHANGED alDlpLpsGuchphs.hspsapshlYll...pGs.s...........plsupp.........lstpplslhu...sGsplhlpus...pss+....hlLluGcPlsEPlhhaGPFVMsop-EIcpAhp-acpG..+Fs ................................................................hDlplp.s.Gu.p..hsls....h.s...p....s....a.....p.shlYll...cGs..l.........................................pl..supp.......................lp.ss..p.hslhs.............sGs.p...l...p...l.p....As.............psuc..............llLl..u.G..c..Pl.sEPls.ha..GPFV.Msoc-EIppAhpDapsG.ch........................... 0 307 618 837 +5555 PF05727 UPF0228 Uncharacterised protein family (UPF0228) Bateman A anon SWISS-PROT Family This small family of proteins is currently restricted Methanosarcina species. Members of this family are about 200 residues in length, except for Swiss:Q8TMK1 that has two copies of this region. Although the function of this region is unknown the pattern of conservation suggests that this may be an enzyme, including multiple conserved aspartate and glutamate residues (Bateman A. pers. obs.). The most conserved motif in these proteins is NEL/MEXNE/D, where X can be any amino acid, which is found at the C-terminus of these proteins. 25.00 25.00 30.80 29.90 21.10 21.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.83 0.71 -4.39 13 26 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 5 0 26 25 0 120.50 39 64.74 CHANGED MThNYoI-Ysoshhst+YYIhlDKDch.DltcE.LpK-......csWT.s....sI+KGDYYlIsVSEQAIcDcsFLslLcK.NlQVKKhVaChIpF..sDGS.......+s.WIPEcDAlRIKNELEpNEsVhoVthDYlhs .................hshNYsI-YNsshhsscYYIhVDcDch.slhpE...LpKs......csWh.s........lKKGcY.YlI.lSEpslpDcsF............LslLcKpsLQlKKhVhChIpF..ucGo.......cs.hIsEcDAl+IKNELEpNEpllhVth-.l..s........ 0 9 13 13 +5556 PF05728 UPF0227 Uncharacterised protein family (UPF0227) Bateman A anon SWISS-PROT Family Despite being classed as uncharacterised proteins, the members of this family are almost certainly enzymes that are distantly related to the Pfam:PF00561. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.98 0.71 -4.57 9 1870 2012-10-03 11:45:05 2003-04-07 12:59:11 7 4 1161 0 305 1448 549 179.10 38 93.88 CHANGED hllYLHGFsS.SPtsHct.l.hpahspcshhlshss.p.tcp.pphlpclcphltphsscp...shlsGsuLGGYaAphlu.hpGl+p..VlhNPsltPhcshsshlsc............Epa+.hphKslpsht.............hcp.s+hhllhu+tDElLD...pcssuchpshhphVh-ss.sHtFpshucalpplhuFts ............................................................................llYL.HGF...sS..SP..........t......s.........t....c..........t.....s..l....h....h......p...a........lst...c.h................l........p...h.............s............p........................................p....c...h.........h....c..............l.........-.....p....h......l....t.....p...p......s.s..cp................hllGsSLGG...YaA.......p.h....l....u..........h.....h.......s.....l...tt..........V..l....l.............N...P..u...l..p...P...a......E......h...p....s...h........l..s...p....................................................................................E.......p..h..p......h..s.....h....K..s.....h.ph.......................................................cs.....D.phhllhppsD......E......l......LD.......pps...s...t.....h.....h.....t.....s.....h.....h.....p.........h.....V...h....-ss..sH.tF...p..s.hp.s...ahp.p.IhsFh.s.............................................................................................................................................................. 0 55 142 229 +5557 PF05729 NACHT NACHT domain Bateman A anon [1] Domain This NTPase domain is found in apoptosis proteins as well as those involved in MHC transcription activation [1]. This family is closely related to Pfam:PF00931. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.90 0.71 -4.59 26 5874 2012-10-05 12:31:09 2003-04-07 12:59:11 7 749 661 0 4173 9193 904 159.70 19 16.53 CHANGED cslllpGpAG.GKTsLhpplshtWApuph.p......hphlFahss+phspts.......uLs-lltsphspsssshsc..............h...lhphspRlL..............hllDGhDElts..s...........p.t...hLLtsLlpcplLspsplllTsRssuhtp...ltptlpps...............hhpltuFspsptpphh.pcaassp .....................................................................................hlhlpG.sGhGKo.s....l....s.........p.........p.........h..........h.........h.........p..........h........s........p........s......p........................pt................h..th.....h.....a...h...h..s...h..p....p...h..s..t......p.t.......................sl.t.p....h..l......h.....p........t......h....s....p....h....t....t......h.tp............................................................................h...h.....p.....p...............t.....p.....l..l....................................................................h..l.l..D.G...l..DE...hp...t..p......................................................................h...h..l...t..........p...l.......l........p......t..................h....s.....p....s........p......l....h.......l...o.......o...R..s..t...t..h..............h..t.....h....................................h.l.........h............t...............t................................................................................................................................................................... 0 1337 2441 3535 +5558 PF05730 CFEM CFEM domain Bateman A anon [1] Domain This fungal specific cysteine rich domain is found in some proteins with proposed roles in fungal pathogenesis [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.52 0.72 -4.15 114 969 2009-01-15 18:05:59 2003-04-07 12:59:11 6 17 154 0 757 948 0 66.30 25 23.45 CHANGED thsslPsCAhsChsp.sh......ss..Cs...hsDhs....ChC...stsshtsslssC.....lt..ssC.s.ss.......ps..............ssh....................shss.shCu ..................t..stlPsC.u......h..s.Chtp.sh....s......ss...Cs........hsDhs........Cl..C........sps....s....ht...s......s...ltsC.......lt.....ss..Cs..sp.......-t...........................ssh....................shsp.shCs............................ 0 152 421 636 +5559 PF05731 TROVE TROVE domain Bateman A anon Bateman A Domain This presumed domain is found in TEP1 and Ro60 proteins, that are RNA-binding components of Telomerase, Ro and Vault RNPs. This domain has been named TROVE, (after Telomerase, Ro and Vault). This domain is probably RNA-binding. 21.30 21.30 28.20 28.00 20.30 19.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.09 0.70 -5.70 9 567 2009-01-15 18:05:59 2003-04-07 12:59:11 6 42 303 6 249 524 40 237.30 18 39.07 CHANGED lpsspGshsapssspscltp........h....LshuspssoaYtstpc.spsphpcll+tltpt-sphllphhhhhppctphtchsshlhAlAlhtp.s.....+...................tshchhscVs+hPocLhphlpa..................ph..tsspptsshs+slR+ulschasphs....thtLsKY...c.pRsuhph+DlhRhsH.cssss................thsthhpahhct........................h.h...h.pphhptpssptlphLpua.......p.....t.hpchhh........................................shE+.los+ls............stpVWcsLlps..hPhhAhLRNLsslscsGV.spsps....hVhpRLsctctlc+SRhaPachLsAhcshspucup.+sp..............................................................hpW.ss.plhp....ALEpAhthu.hcNlsPhss .......................................................h...................................................................h..hht..h.......p.s..h..hp....h.hpt.t..h...h.p...hhhs...hh............................t...................................................................hh..hh.thhp.sp.hhthhth................................................................................................................hst..pthh.phh.p.h.....t.thhph.......p..tt.t.p.tc.....p..t.......................................................hh......h.........................................................................................................................................................................................................................................................Wttlh.p...hsh.th.hhpLtph.t.s..h..t....................lhthltp...t........lhpuh.hPhphh.A.p.h..t........................................................................................................................................................lt.uh.hs....h.............................................................................................................................................................................................. 0 97 149 203 +5560 PF05732 RepL Firmicute plasmid replication protein (RepL) Moxon SJ anon Pfam-B_5929 (release 8.0) Family This family consists of Firmicute RepL proteins which are involved in plasmid replication. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.05 0.71 -5.03 4 349 2012-10-04 14:01:12 2003-04-07 12:59:11 6 2 297 0 26 301 50 118.70 27 83.06 CHANGED hppRhtshhpGoEpaINpsoGElhEhphlhhcppshNFsKsalppllphLDLlGNpKs+ls.allcNLN.psNslItTsRclAccTGhSLpTVppThKhLp-uNhlK+ps.GVhhlNP-lLh+Gscpc+h.lLlpFpph-pEspchp.........................................EsuLhpht.hK .....................................................................................................................................................................h.-..l......l..G.....p..K.....c......h..cllp.a..lh..-.s...h-...ps....s.........h....l.....h.....h...s...h.s......-..l......p...cc..l.....s......h..S.+.sT.lhpshKhLc-pp.ll...p.....+.h......p...........p...........G......h.........Yh...lN........................................................................................................ht.................................................. 1 7 19 23 +5561 PF05733 Tenui_N Tenuivirus_N; Tenuivirus/Phlebovirus nucleocapsid protein Moxon SJ, Bateman A anon Pfam-B_5998 (release 8.0) & Pfam-B_19756 (release 10.0) Family This family consists of several Tenuivirus and Phlebovirus nucleocapsid proteins [1,2]. These are ssRNA viruses. 25.00 25.00 25.10 26.50 18.10 24.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.34 0.70 -5.07 12 395 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 94 10 3 299 0 203.80 40 85.00 CHANGED -hpchh.chuspAlc.....csplhsalsphsYpGaDAt+ll.pl..Lpc+u...Gcsat+DlphhIVhphTRG.....sh.cKh...hsstGucpsppLls+Yslhp..Gssu.DsITLuRlAuhFsshohpslpslo..s+LsVsuoshssh.uhsh.hh.hhPpFhsL.ssohP....cshtphlhshHhLahh.ho+ph.ssh+tcp+s.....-llchhsp.hshshsup.lsspc....R+clltsFtl ..................phpchhs-hupptls.....hs-l.sastphtYtGaDstplI.tl...LK-pG.........G-shsK.DhphhIlhthsRG.....shhpKh...hsstGuppstsLls+YGlhpp..Gss.uhssITLuRlApshs.shoppssttlp..thhsVsusshshh.ups.shh.hhPpFhtL.s.ohs....cuhs+hLhssH.la.hphoKph.s.ch+ttp+s.....-hhp.hpt.hhhsspSt.h.pcp....++.h...h...................... 0 3 3 3 +5562 PF05734 DUF832 Herpesvirus protein of unknown function (DUF832) Moxon SJ anon Pfam-B_7683 (release 8.0) Family This family consists of several herpesvirus proteins of unknown function. 18.80 18.80 21.90 72.40 16.90 16.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.36 0.70 -5.15 13 32 2009-09-11 10:40:49 2003-04-07 12:59:11 6 1 24 0 0 29 0 222.20 27 50.08 CHANGED MphslPVhGlspppp....pcWppllssFtscsssspsls.L++hFc.......tcschuhLuSLllLhphltsspptpp+ts.Llpsh.ts+hlApplacclhspps.pps.h-phFt-C+sRLtLLLEpsCGChpChpsscuLpcsp.thhR.P+LpPHpppstutshLsplaNpslLssssslscapltsLh.sscphsshs.chpsEsshlusClhhCWLaalLppalps-lpslcpsl ..hp.slslhhlspcpp....ppapplhssFts..psssspslt.l+phap........pschuhLuuLlhLhphl.sstphtp+.s.Ll.shptshalspplah+lhspps.tps.hcphatcstsRLthllEpuCGC.pChpssctLpphp.thhcPP+lpPHpcpC.utshLstlhppslLssshslStasls-Lh.ssppa.thsssh...phEhshlsoCL.hCWlahllhcalpp-hthLcpsl. 0 0 0 0 +5563 PF05735 TSP_C TSPC; Thrombospondin C-terminal region Bateman A anon Pfam-B_1875 (release 8.0) Family This region is found at the C-terminus of thrombospondin and related proteins. 25.00 25.00 25.40 26.50 18.50 23.90 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.24 0.71 -4.75 10 444 2009-01-15 18:05:59 2003-04-07 12:59:11 7 85 95 6 222 377 2 188.30 65 21.57 CHANGED QsDPsWVVhspGsEIhQslNSDPGLAVGccsasuVDF-GTFFIsssoDDDYlGFlFuYQssu+FYlVMWKKusQoYWpspPFcAsApsGlpLKlVcSsTGPGptLRNALW+TGsTssQV+LLW+DPhslGWKc+TAYRWpLpHRPsI.GhIRlphY-Gs+LlhDSGNIaDsTl+GGRLGVFCFSQEpIIWSNLpY+CNDsl.P .................QIDPNWVVh.sQGhElVQTh.N...S.D.PGLA.VGa................stFsuV.DFpGTFaV.....N..T.s..p...DDDYAGFlFGYQ......s......S......SpFYVVMWKQ....s.pQTYWpssPhRAhu.sG.lp.....LKs.VpSs..TGPG..EaLR.NALW.HTGsTss.....QVRhLW..+.D.P.+Nl..GWKDhTuYRWpLpH.RPps.GaI..R....VhhY.EGpplhAD..SGslhDp..T..htGG.RLGlFsFSQE.lhaSsL+YcCp-.............................................................................................. 0 43 61 129 +5564 PF05736 OprF OprF_membrane; OprF membrane domain Bateman A anon Pfam-B_4079 (release 8.0) Domain This domain represents the presumed membrane spanning region of the OprF proteins. This region is involved in channel formation and is thought to form an 8-stranded beta-barrel [3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.14 0.71 -4.76 4 245 2012-10-03 17:14:37 2003-04-07 12:59:11 6 2 226 0 16 668 18 163.40 62 55.34 CHANGED MKLKNTLGhAIGSLlAATSFGALAQGQGAVEhEsFhKKpasDSV+phcs.G...GuSlGYFLTDDVpLsLoYDcsppsRusDsTGspKltGspouLcA.YHFGssG.DuLRPYVpuGhuHQSlsNl.ssGpsGRsQSThAssGAGsKaYFT-NhYARAGVEApYsLDpGch....-auAhVGlGVNFGGuu ......................................................................................TLGlsI.G.o.llAA....T..ShG..A..LA......Q...G.....Q.....G..A...V..E.hE....h....F...h....K..K.....a...D.....S...s.....+.....s........h.....c........s........G..........Gu...u.lGY.....FLTD....D....V..pL..t..L......u.....Y.s..c....s....H....s...s.......R...u.........D....s........s..t....s....p.....p.....I....K..G..s.s...T.....uL....DA..h..YH......F.ss......P.G...DsLRP.Y..VS.A..G.....F..u......a.....Q..S..l.......s....................ps....u.....+........s..G.....R...........D...p....S..T..F..........A......N.......l.......G....u..GsK.aY.FT-NFY..ARAGV.....E.A.pY..N...I.DpGcs...........E.W.uP..uVGlGVNFGGu.u.................................................................................... 0 2 4 11 +5565 PF05737 Collagen_bind Collagen binding domain Bateman A anon Pfam-B_5000 (release 8.0) Domain The domain fold is a jelly-roll, composed of two antiparallel beta-sheets and two short alpha-helices [1]. A groove on beta-sheet I exhibited the best surface complementarity to the collagen. This site partially overlaps with the peptide sequence previously shown to be critical for collagen binding. Recombinant proteins containing single amino acid mutations designed to disrupt the surface of the putative binding site exhibited significantly lower affinities for collagen. 20.80 20.80 20.80 20.90 20.60 20.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.58 0.71 -3.99 15 1660 2012-10-02 17:35:21 2003-04-07 12:59:11 7 115 482 8 145 1485 0 134.50 19 22.34 CHANGED ssshaKpGhhDsssss...l+WplslNtshpsl..csAslpDhlssGQsLshsSlclhchshptstpssths............ptssstshtpsssssFplshss...slssuhpIsYpTpIT-tstp......papNpA.....pLssssl-shpss .............................t...h.Kh.G....hs.....spssp.....lpWp.lslN.......t..s..pppl....pss.slsD.s......h.s......s........s........p.......p....l....s..............s...S.l..p.......l.....hp.s..s.....hsts..t.sh.s.....ht....................s..t...t.hs...s.t.s.s.s.s.....s.a.s.lpatp...........thspuahlpYpT..pl...s...st...s..p................ph.pNpu......ph..st.p........s........................................................................................... 1 80 110 128 +5566 PF05738 Cna_B Cna protein B-type domain Bateman A anon Pfam-B_366 (release 8.0) Family This domain is found in Staphylococcus aureus collagen-binding surface protein. However, this region does not mediate collagen binding, the Pfam:PF05737 region carries out that function. The structure of the repetitive B-region has been solved [1] and forms a beta sandwich structure. It is thought that this region forms a stalk in Staphylococcus aureus collagen-binding protein that presents the ligand binding domain away from the bacterial cell surface. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.39 0.72 -4.21 135 13573 2012-10-02 19:08:27 2003-04-07 12:59:11 8 388 1204 80 1267 12494 454 73.40 23 22.10 CHANGED lpGsphpLhspssssht.......................hsTsssGphphssL......ssGs.YpltEsp.sPsGYphsssshthsht.spt.......ttlp ..........................................................................................ltGspa..pl..h...s..p...s...s.ph.htp..............................................................................hs.T.....s...p..s.....G.....p.....h..p...h...s.....s..L..............................tsG..p...Yplp..E.....s......p...u....P.....s.........G..Y.p.h...s...p...s.s.h...t.h.phtttt..........thh............................................... 0 652 998 1144 +5567 PF05739 SNARE SNARE domain Moxon SJ anon Pfam-B_6285 (release 8.0) Family Most if not all vesicular membrane fusion events in eukaryotic cells are believed to be mediated by a conserved fusion machinery, the SNARE [soluble N-ethylmaleimide-sensitive factor (NSF) attachment protein (SNAP) receptors] machinery. The SNARE domain is thought to act as a protein-protein interaction module in the assembly of a SNARE protein complex [1]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.79 0.72 -4.28 105 4036 2012-10-03 05:55:03 2003-04-07 12:59:11 14 51 377 62 2609 3820 24 61.80 22 22.43 CHANGED cpccslpplppslt-Lcplhh-lus.lcpQschlDcI-ssl-pspsplcpuppc..l..pcshphpp ...................ppcpplpplppsltcLpplhh-lup.l..c.p.Qs.c......h.l..Dc.........I-psl-pspsplcpuppc..l...p+shp................. 0 824 1372 2046 +5569 PF05741 zf-nanos Nanos; Nanos RNA binding domain Moxon SJ anon Pfam-B_5908 (release 8.0) Family This family consists of several conserved novel zinc finger domains found in the eukaryotic proteins Nanos and Xcat-2. In Drosophila melanogaster, Nanos functions as a localised determinant of posterior pattern. Nanos RNA is localised to the posterior pole of the maturing egg cell and encodes a protein that emanates from this localised source. Nanos acts as a translational repressor and thereby establishes a gradient of the morphogen Hunchback [1]. Xcat-2 is found in the vegetal cortical region and is inherited by the vegetal blasomeres during development, and is degraded very early in development. The localised and maternally restricted expression of Xcat-2 RNA suggests a role for its protein in setting up regional differences in gene expression that occur early in development [2]. 21.30 21.30 21.40 23.40 20.90 18.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.26 0.72 -3.99 36 305 2009-01-15 18:05:59 2003-04-07 12:59:11 8 4 133 4 168 260 18 55.30 54 25.74 CHANGED CsFC+N.............NGEspplYpSHtL...............KDtcG+...VhCPlL.RsYsCPlCGAsGDpA....HTlKYCPhs ....CsFC+p...............NGEuctlYsSHtL...............Ks...cGp...VhCPlL.Rp.Y.sCPlCGAo.G-pA....HTlKYCPh.t... 0 34 47 129 +5570 PF05742 NRDE DUF833; NRDE protein Moxon SJ, Eberhardt R anon Pfam-B_6481 (release 8.0) Family In eukaryotes this family is predicted to play a role in protein secretion and Golgi organisation [1]. In plants this family includes Swiss:A9X6Y0, which is involved in water permeability in the cuticles of fruit [2]. Swiss:P54797 has been found to be expressed during early embryogenesis in mice [3]. This protein contains a conserved NRDE motif. 20.70 20.70 20.80 20.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.92 0.70 -5.09 12 889 2010-07-12 16:41:27 2003-04-07 12:59:11 7 4 729 0 464 881 260 231.70 27 87.70 CHANGED MCllhhtapsps....pacLllAuNRDEaasRPoh.ht.Wt.sss.plLuGhDlcp.......uGTWlGlspcG...+huslsNlpps.cp..st..hSRGtLV.s-aLsu.ssss...tpahcpLt...ppupcasG......FNLlhs-hp............clphhoNpsst..h...tLssGhashSNu..cssW.Ksptu+phhtchlt..pssp...ptl.hppLhplhssp...........thhsDspl.ppus.h.php+..h..Louhalc....stpYGTRusTllhVcpcu.csphhERphtspss..........hppschph ............................................................MCllhh...thpsps............th.LllhuNRDEa.....hpR.P..o.t...s..h........t.h.W...............t.......s..............s..............s....p.......l..luGp.Dhps.............GGTWlGl.s..p.p..G.................+hAsLTN..............hp..p......s....t....t...........................s...s................t.SR.GtLl.t.....pa.....L.......p......u....s.ss......................pa.h.pplt.........t.p..s...p.t.....Y..s..G..............................FNLlhu-hp............................t...hhh.....hs.....s.p.st.........t...............................................................l......s.............sG.........h.a..........s..lSN.uh................h.........s.....s.....s........W....Kh.h.t............tpthhtph.lt.............tt..tt........................pthh..thht.sp..........................................................hss....t..t.l.....pp.uh.......thtp...............h.u.s..hh.....lt.................t...YGT.Rspohlh...lp.tp......t....ph.hhEt...h........................h........................................................................................................................................... 1 145 263 376 +5571 PF05743 UEV Tsg101; UEV domain Moxon SJ, Bateman A anon Pfam-B_6022 (release 8.0) Domain This family includes the eukaryotic tumour susceptibility gene 101 protein (TSG101). Altered transcripts of this gene have been detected in sporadic breast cancers and many other human malignancies. However, the involvement of this gene in neoplastic transformation and tumorigenesis is still elusive. TSG101 is required for normal cell function of embryonic and adult tissues but that this gene is not a tumour suppressor for sporadic forms of breast cancer [1]. This family is related to the ubiquitin conjugating enzymes. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.49 0.71 -4.43 23 474 2012-10-02 15:28:41 2003-04-07 12:59:11 8 17 278 17 281 627 11 113.90 38 28.07 CHANGED shc-lhsllptap....sLpspscsasa.sDGpsppLLsLpGTIPlpapGss.YNlPlhlWl.csYPh......ssPlsalcPTssM......sI+hsc.aVDspGcl.aLPYLppWsh...sSs.....LlsLlpphts....sFsccsPl ..........................................................hpchhphht.a.....sLp.h.hc..sasa..scGp..sp..Lls..LsG.T.I.PV.ac.Gss...YsIPl..slWl.csYPh...........................sPP.ls.a.V.p...PTssM........hI+...suc...aVDs....s.G.+l...YlPYL..p..p..Wpp..........spSs..............LlsLlphhhs....hFuc-PPl................................................. 0 94 152 221 +5572 PF05744 Benyvirus_P25 Benyvirus P25/P26 protein Moxon SJ anon Pfam-B_6153 (release 8.0) Family This family consists of P25 and P26 proteins from the beet necrotic yellow vein viruses. 25.00 25.00 29.20 133.60 20.80 20.70 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.62 0.70 -5.03 3 211 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 6 0 0 182 0 213.60 72 99.80 CHANGED DlDphMPVFDlAY.DssHtPYll+RosHEVVhsVussGFICYPL.V..DLNDssslsshlYHN+lKTMRLpVsIpNNcusassFRAhsRFlVFSTPslSsWVNNGCpSLFSPFVGVNSsIDcpLL+RDS+GloVLaDRVY+VsRaT-hF.sVDFThNFRGPGNYsLsNusNaPsATTsDSIYVACVssWlsNsVFRLpSDSVuWVHSGLapGPVL-FGQsL.sAPDcD.DGVsDDG ....MGDILGAVYDLG.....HRPYLARRTVYEDRLILSTHGNICRAI....NLLTHDNRToLVYHNNTKRIRFRGLLCAhHsPYCGFRALCRVMLCSLPRLCDIPINGSRDFVADPTRLDSSVNELLV...SNGLVIHYDRVHcVPlHTDGFEVVDFTTVFRGPGNFLLPNATNFPRPTTTDQVYMVCLVNTV.NCVLRFESELTVWVHSGLYsGDVLDVDNNVIQAP....DGVDDs..... 0 0 0 0 +5573 PF05745 CRPA Chlamydia 15 kDa cysteine-rich outer membrane protein (CRPA) Moxon SJ anon Pfam-B_6389 (release 8.0) Family This family consists of several Chlamydia 15 kDa cysteine-rich outer membrane proteins which are associated with differentiation of reticulate bodies (RBs) into elementary bodies (EBs) [1]. 23.60 23.60 144.20 143.90 23.50 23.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.94 0.71 -4.41 5 37 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 35 0 4 18 0 131.10 62 84.93 CHANGED MS........ststsu..psllslVpshsup.ulspslpsposslTLlN.llGWs+h+hlpPlRsSKIlpSRAFQITLlVLGIlLVIAGLALhFlLpuQLGsNAFaL.lIPAlIGLVKLLlTSLsM.EcsCTPEKW+LCK+lLtToEDILDDGplNNSNKIF .....................M...........STVPVVQGAGSS.NSAQDISsposPLTLKp..............RISNLLSSTAFKVGLVVlGLLLVIA..sLlFLVSAASFVNAIYL.uIPAIlGCVNICVGILSM.EGaCSPERWhLCKKlLKTSEDIIDDGQINNSNKVF... 0 1 1 3 +5574 PF05746 DALR_1 tRNA-synt_1d_C; DALR anticodon binding domain Bateman A anon Pfam-B_196 (release 8.0) Domain This all alpha helical domain is the anticodon binding domain in Arginyl and glycyl tRNA synthetase. This domain is known as the DALR domain after characteristic conserved amino acids [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.55 0.71 -4.14 104 7484 2012-10-02 19:03:26 2003-04-07 12:59:11 10 22 4857 7 2023 5803 2941 115.90 26 19.29 CHANGED lQYAaARlpSlh+...+uspth.phttst.hh..............pc.pEhpLhtpLhpFscslppuscp...........hpPchlspYLhcLAstFspFY...p........ps....lls............scpp.h...........puRLtLspsstpsLcpuLpLLGlps.-+M .......................................................hpYAasRlpuIl+..........Kus..t..p..h......t.h...t.s..ss.hh................................pcs..pEhpL.h.ppltp..asc..hlp..ssspp................................tpPpt...lspY..Lh.c.Lussas.pFY..s...............ps..........hlls......................p-.s..p..t....................psRLs...Lhptstpslp..puh.t.h.l...Glp.s.ppM................................... 0 661 1256 1662 +5576 PF05748 Rubella_E1 Rubella membrane glycoprotein E1 Moxon SJ anon Pfam-B_6726 (release 8.0) Family Rubella virus (RV), the sole member of the genus Rubivirus within the family Togaviridae, is a small enveloped, positive strand RNA virus. The nucleocapsid consists of 40S genomic RNA and a single species of capsid protein which is enveloped within a host-derived lipid bilayer containing two viral glycoproteins, E1 (58 kDa) and E2 (42-46 kDa). In virus infected cells, RV matures by budding either at the plasma membrane, or at the internal membranes depending on the cell type and enters adjacent uninfected cells by a membrane fusion process in the endosome, directed by E1-E2 heterodimers. The heterodimer formation is crucial for E1 transport out of the endoplasmic reticulum to the Golgi and plasma membrane. In RV E1, a cysteine at position 82 is crucial for the E1-E2 heterodimer formation and cell surface expression of the two proteins. The E1 has been shown to be a type 1 membrane protein, rich in cysteine residues with extensive intramolecular disulfide bonds [1]. 25.00 25.00 89.80 89.70 18.90 16.00 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.81 0.70 -5.89 2 838 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 11 0 0 460 0 312.50 94 88.37 CHANGED LTAVVLQGYNPPAYGEEAFTYLCTAPGCATQsPVPVRLAGVRFESKIVDGGCFAPWDLEATGACICEIPTDVSCEGLGAWVPAAPCARIWNGTQRACThWAVNAYSSGGYAQLASYFNPGGSYYKQYHPTAC-VEPAFGHSDAACWGFPTDTVMSVFALASYVQHPcKTVRVKFHTETRTVWQLSVAGVSCNVTTEHPFCNTPHGQLEVQVPPDPGDLVEYIMNYTGNQQSRWGLGSPNCHGPDWASPVCQRHSPDCSRLVGATPERPRLRLVDADDPLLRTAPGPGEVWVTPVIGSQARKCGLHIRAGPYGHATVEMPEWIHAHTTSDPWHPPGPLGLKFKTVRPVALPRsLAPPRNVRVTGCYQCGTPALVEGLAPGGGNCHLTVNGEDlGAhPPGKFVTAALLNTPPPYQVSCGGESDRAoARVIDPAAQSFTGVVYGTHTTAVSETRQTWAEWAAAHWWQLTLGAICAL.LAGLLACCAKCLYYLRGAIAPR ..............................................................................................................................................................................FHTETRTVWQLSVAGVSCNVTTEHPFCNTPHGQLEVQVPPDPGDLVEYIMNYTGNQQSRWGLGSPNCHGPDWASPVCQRHSPDCSRLVGATPERPRLRLVDADDPLLRTAPGPGEVWVTPVIGSQARKCGLHIRAGPYGHATVEMPEWIHAHTTSDPWHPPGPLGLKFKTV.RPVsLPRALAPPRNVRVTGCYQCGTPA.LV...EGLAPGGGNCHLTVNGEDVGAFPPGKFVTAALLNTPPPYQVSCGGE.................................................................................. 0 0 0 0 +5577 PF05749 Rubella_E2 Rubella membrane glycoprotein E2 Moxon SJ anon Pfam-B_6726 (release 8.0) Family Rubella virus (RV), the sole member of the genus Rubivirus within the family Togaviridae, is a small enveloped, positive strand RNA virus. The nucleocapsid consists of 40S genomic RNA and a single species of capsid protein which is enveloped within a host-derived lipid bilayer containing two viral glycoproteins, E1 (58 kDa) and E2 (42-46 kDa). In virus infected cells, RV matures by budding either at the plasma membrane, or at the internal membranes depending on the cell type and enters adjacent uninfected cells by a membrane fusion process in the endosome, directed by E1-E2 heterodimers. The heterodimer formation is crucial for E1 transport out of the endoplasmic reticulum to the Golgi and plasma membrane. In RV E1, a cysteine at position 82 is crucial for the E1-E2 heterodimer formation and cell surface expression of the two proteins [1]. 19.30 19.30 19.40 594.70 18.70 19.20 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -12.04 0.70 -5.15 3 68 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 10 0 0 85 0 267.00 97 26.88 CHANGED GLQPRADMAAPPsPPQPPRAHGQHYGHHHHQLPFLGHDGHHGGTLRVGQHHRNASDVLPGHWLQGGWGCYNLSDWHQGTHVCHTKHMDFWCVEHDRPPPATPTPLTTAANSTTAATPATAPAPCHAGLNDSCGGFLSGCGPMRLRHGADTRCGRLICGLSTTAQYPPTRFGCAMRWGLPPWELVVLTARPEDGWTCRGVPAHPGTRCPELVSPMGRATCSPASALWLATANALSLDHALAAFVLLVPWVLIFMVCRRACRRRGAAAA GLQPRADMAAPPsPPQPPRAHGQHYGHHHHQLPFLGHDGHHGGTLRVGQHHRNASDVLPGHWLQGGWGCYNLSDWHQGTHVCHTKHMDFWCVEHDRPPPATPTPLTTAANoTTAATPATAPAPCHAGLNDSCGGFLSGCGPMRLRHGADTRCGRLICGLSTTAQYPPTRFGCAMRWGLPPWELVVLTARPEDGWTCRGVPAHPGTRCPELVSPMGRATCSPASALWLATANALSLDHALAAhVLLVPWVLIFMVCRRACRRRGAAAA 2 0 0 0 +5578 PF05750 Rubella_Capsid Rubella capsid protein Moxon SJ anon Pfam-B_6726 (release 8.0) Family Rubella virus is an enveloped positive-strand RNA virus of the family Togaviridae. Virions are composed of three structural proteins: a capsid and two membrane-spanning glycoproteins, E2 and E1. During virus assembly, the capsid interacts with genomic RNA to form nucleocapsids. It has been discovered that capsid phosphorylation serves to negatively regulate binding of viral genomic RNA. This may delay the initiation of nucleocapsid assembly until sufficient amounts of virus glycoproteins accumulate at the budding site and/or prevent non-specific binding to cellular RNA when levels of genomic RNA are low. It follows that at a late stage in replication, the capsid may undergo dephosphorylation before nucleocapsid assembly occurs [1]. 25.00 25.00 25.20 25.20 17.10 16.40 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -12.05 0.70 -5.02 3 110 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 10 0 0 117 0 208.20 97 34.49 CHANGED MASTTPITMEDLQKALEAQSRALRAELAAGASQSRRPRPPRQRDSSTSGDDSGRDSGGPRRRRGNRGRGQRKDWSRAPPPPEERQESRSQTPAPKPSRAPPQQPQPPRMQTGRGGSAPRPELGPPTNPFQAAVARGLRPPLHDPDTEAPTEACVTSWLWSEGEGAVFYRVDLHFTNLGTPPLDEDGRWDPALMYNPCGPEPPAHVVRAYNQPAGDVRGVWGKGERTYAEQDFRVGGTRWHRLLRMPVRGLDGDSAPLPPHTTERIETRSARHPWRIRFGAPQAFLAGLLLAAVAVGTARA MASTTPITMEDLQKALEAQSRALRAELAAGASQSRRPRPPRQRDSSTSGDDSGRDSGGPRRRRGNRGRGQRRDWSRAPPPPEERQESRSQTPAPKPSRAPPQQPQP................................................................................................................................................................................................... 0 0 0 0 +5579 PF05751 FixH FixH Moxon SJ anon Pfam-B_6803 (release 8.0) Family This family consists of several Rhizobium FixH like proteins. It has been suggested that suggested that the four proteins FixG, FixH, FixI, and FixS may participate in a membrane-bound complex coupling the FixI cation pump with a redox process catalysed by FixG [1]. 21.90 21.90 22.40 22.20 21.70 21.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.71 0.71 -4.38 118 910 2012-10-01 19:30:51 2003-04-07 12:59:11 6 3 891 0 266 726 75 141.60 21 86.55 CHANGED +phhsa....hllshhusllssslshhhhAhsshs.uLVs-shYctupshstpl.....sctcttpsLGhp.splphss..........splplpl...tpGtP.htstplshplh+PTpsppDh.shhls...tsssG..hYpu...h.....hpGpWplcl................pssspta+hptclh .......................thasa.......hllhhh..hsllhsslshl.h...lA.h..p.s.....s...slVs...-sY....Y.cpu.pshs.t.ph.....sph.p.t..tp.p.hshp..sp.l.phps..............................sthplph....tpu.....ts......sp..tlp.lhhh+P...sp.....tp.....pD.......h...pl.lp........tsG.......haps.shpt...........hpG.pW.lcl................p..t.st.ah......h.......................................... 0 74 168 218 +5580 PF05752 Calici_MSP Calicivirus_MSP; Calicivirus minor structural protein Moxon SJ anon Pfam-B_6811 (release 8.0) Family This family consists of minor structural proteins largely from human calicivirus isolates. Human calicivirus causes gastroenteritis [1]. The function of this family is unknown. 21.20 21.20 21.20 72.00 20.50 20.40 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.07 0.71 -4.65 14 126 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 121 0 0 88 0 165.00 49 99.90 CHANGED MSWhsGALtsuGSLVDhAGTISsIVhQQRQls.h....pcQNcl.pcWhs+QEsLQcct.-lo+-LulpGPstRVpuAlsAGFsslsARRLAGSsERVhYGhLDRPIhptushpu.IppT+HLpshpuALosFKpGTs.aGpPsPPphthspPt.pssssplNL.GasPsSSsl ..........MSWLVGALQssGuLVDhAGTVSsIVYQpRQls.L....cpQNpLhpsWMsKQEtLQ+ctM-Loc-LulNGPAhRVpuAl-AGFDsVSARRLAGSuERVIaGhLDpPIhptsshsu.lppTpHLsslsuuLuTFKpGTs.FGp.PsP....Pp.h.psGsPt....pss.sP.plNl.GapPGSSss........ 0 0 0 0 +5581 PF05753 TRAP_beta Translocon-associated protein beta (TRAPB) Moxon SJ anon Pfam-B_6857 (release 8.0) Family This family consists of several eukaryotic translocon-associated protein beta (TRAPB) or signal sequence receptor beta subunit (SSR-beta) proteins. The normal translocation of nascent polypeptides into the lumen of the endoplasmic reticulum (ER) is thought to be aided in part by a translocon-associated protein (TRAP) complex consisting of 4 protein subunits. The association of mature proteins with the ER and Golgi, or other intracellular locales, such as lysosomes, depends on the initial targeting of the nascent polypeptide to the ER membrane. A similar scenario must also exist for proteins destined for secretion [1]. 21.60 21.60 21.60 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.08 0.71 -5.12 22 254 2012-10-03 16:25:20 2003-04-07 12:59:11 9 7 174 0 139 284 8 166.30 32 74.51 CHANGED hh.hh.lhhhhs.shupp-s........sARlLspKplLscahVpsc-lsVpYsIaNlGsusAhcVplsD.suF.stcsFplVuG..phosph-plsPsssloHshllcP+p.GhFshouApVoY.....+ssccssphQhuho..ot.upssIlup+shs+pFost....hh-WhuFushshsshsl.....PhhlaasSKspYpth.pK ..............................hh....h.hhhl.hh.h.shsps-p............sAtllspKpl.Ls+.hhVtuc-lslpaslYNhGsu.sAh-Vpl.sD...sua....s......s-sF.......pl.Vu...G..........hshph.-.RlsP..u..u.sl....oH.shVlc.P.pp.hGhFshosAploY......hsppp.....st.l.t.huho........os.sthsIls.+th.s++Fs.t............hhcahshsshsh.shhh.....shhlh..SKpphtt....tt................................................................... 0 45 68 105 +5582 PF05754 DUF834 Domain of unknown function (DUF834) Bateman A anon Pfam-B_9258 (release 8.0) Domain This short presumed domain is found in a large number of hypothetical plant proteins. The domain is quite rich in conserved glycine residues.\ It occurs in some putative transposons but currently has no known function. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.25 0.72 -4.17 18 515 2009-01-15 18:05:59 2003-04-07 12:59:11 9 34 5 0 411 501 0 64.80 32 26.11 CHANGED psssssshpsTspsscpssuscpsGuss.RlDsDsGAPsVsGpstGADElscssA+shssss.ocuDstssG ........................................t...........s.sttptstscp..uG..GusR..l.Ds...ssGsPAV...s..spstusDclstssAcPptsss.p....p...tcs.tuss.................... 0 0 0 12 +5583 PF05755 REF Rubber elongation factor protein (REF) Moxon SJ anon Pfam-B_6903 (release 8.0) Family This family consists of the highly related rubber elongation factor (REF), small rubber particle protein (SRPP) and stress-related protein (SRP) sequences. REF and SRPP are released from the rubber particle membrane into the cytosol during osmotic lysis of the sedimentable organelles (lutoids). The exact function of this family is unknown [1]. 25.00 25.00 26.50 26.30 22.20 21.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.33 0.70 -4.59 10 99 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 31 0 41 94 0 182.90 39 85.39 CHANGED -pspsEEc+...LKYL-FVQsAAlYslssFSsLYtaAKDpSGPLKPGV-sVEusVKsVVuPVYpKFasVPh-lLKFlD+KV-soVp-LDR+VPPlVKpASuQAhohh..hs.hs.tlAsEVppsGlhtsAp.......shspshhs+......tLhspYEPhAEphAVpsW+tLNpLPLhPpVAplslPTAAahSEKYNcsVshsu-+GYsVAsYLPLlPTE+IuKVFtc-s ..................................t...ptpptc...LKaLtFVphAuhpshh....thuslYtaAKcpu.GPL.+sGVpsVEssV+sVluPVYp+FcslP.-lLtFlD+KVD-slpc....lDc+lPs.....hlKpssspAhshhptss.............s.tlsuEsppsGshtssp......................................lhscaEs.hA.tp.hulpsWptLNphPhhPpVsphslPsAAahoEKYNpslhshsc+GYthstYLPllPhEcIu+sFt...t................. 0 4 29 35 +5584 PF05756 S-antigen S-antigen protein Bateman A anon Pfam-B_7194 (release 8.0) Family S-antigens are heat stable proteins that are found in the blood of individuals infected with malaria [1]. 25.00 25.00 92.00 91.90 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.27 0.72 -4.01 4 57 2009-09-15 09:48:53 2003-04-07 12:59:11 6 1 6 0 1 58 0 62.60 79 62.61 CHANGED MNRILSVoFYLFFlYLYIYKTYGKVKNTDcELSNIYGsKYYLRsGhhNpKNGKGpKYEDLpEEtEGENDDEEDSNSEESsNDEENtLIEGQspu .......VoFYLFFlYLYIYKTYGKVKNTDpELSNIYGTKYYLRsGhFNcKNGKGpKYEDLEEE.................................. 0 1 1 1 +5585 PF05757 PsbQ Oxygen evolving enhancer protein 3 (PsbQ) Moxon SJ anon Pfam-B_6905 (release 8.0) Family This family consists of the plant specific oxygen evolving enhancer protein 3 (PsbQ). Photosystem II (PSII)1 is a pigment-protein complex, which consists of at least 25 different protein subunits, at present denoted PsbA-Z according to the genes that encode them. PsbQ plays an important role in the lumenal oxygen-evolving activity of PSII from higher plants and green algae [1]. 25.30 25.30 25.30 25.50 24.90 25.20 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.07 0.71 -4.90 18 175 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 69 5 82 172 2 161.80 26 81.35 CHANGED phsssusspssstRusls....lRAst.........t.t..pssRRAllG.LlAsulsuuuhspAshAs.........sh.htlshssP.sushsGs.spspARDhpLs.lKcR......FaltsLuPs..EAAtRsKcSAp-I.lsl+shI-+Ks..WsaVpspLRL+uuY..LRaDLsTllSuKP.c-cKpsLh-Ls.scLFpsl-sLDaAs+pKsss-ApKaYtcThssLs-VLApLu ..............................................................................................ht.................................................................................................................ts+sh.....t.t......hh...hsst..tshtRh+psApcl.hslcs..h....I...-ccs...WshlpptLRh+uua..L+hDLpsl....Isu.p.P.c..cc.+pslpcLs.scLFsslscLDhAs..+pKssspspphYtpshpsLs-lluhh........................................................... 0 20 52 70 +5586 PF05758 Ycf1 Ycf1 Bateman A anon Pfam-B_6040 (release 8.0) Family The chloroplast genomes of most higher plants contain two giant open reading frames designated ycf1 and ycf2. Although the function of Ycf1 is unknown, it is known to be an essential gene [1]. 29.20 29.20 29.40 29.40 28.40 29.10 hmmbuild -o /dev/null HMM SEED 929 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.64 0.70 -13.90 0.70 -6.83 44 1439 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 1082 0 33 1491 2 450.30 52 62.57 CHANGED SVVlVGLYYGFLTTFSIGPSYLFLLRAplh..........EEGTEKc.............lSATT.GFIsGQLhhFISIYYuPLHLALGRPHTITVLsLPYLLF+..FFasN.pK...cFh..sasp...sspNSh...RNhsIQslFLNNLIF.QLhNhhILPSShLsRLVNIYhFRCNNKhLFVTSSFlGWLIGHILFhKWlt.....LlLhWIc.........p.NpS.................Icp...........Ipss......KY..LVscltN.h...+I.....................................................................FSILLFIoClaYLGRhPs.PlhTKKL..pp...............................................schcct........ccppphch-p..ct.....ttscpcpchs.tpps......lh..ccpts.pp.........................................................................................................................p.hp.......pp........phhhF.cKPllTlLFDYcRWNRPhRYI...K.Ns+F-..ssV...RsEhSQYFFtTCpSDGKpRISFTYPPSLSTFh-MI.p++ls.....lhshcK.h...s--.....LYsp...WlhoNcpK+ssLssEFlNRIcsLD+u.....lh........................h-lLEp..RoRL..C..sscscpc.........hLsKh.....................YDPhLNGsaRGp...............I..cchhSss...............................l......hsc...s.hcs.hcphh.....lN+laslLh............sssapchEpc......s..........hcslspph...h..........l.p.s.p...............................................................................................................................................t.p....l........tph.........pppppphhphlhshlhss..pppph.p....cs...................................ltI.pEIp......KKVPRW...SYKLhs-......lEptct-...cp...sst-ppIRSRKsK.+V....llhss.......pp...tp.................................................................................................................................................................................pcptc..-...hslhRY.SQQSDFRRclIKGSMRAQRRKslIWchFQsslHSPL.......FLD+h-Kh...hhF.sF..slsphhc........................hla+..........sa..hp.Kppchch.s...pccphpc.............Kppc.ph.....pcpcRlp.IAEtWDsl.hAQhlRGhlLlsQShLRKYIlLPsLIIuKNIuRlLLFQhPEWsEDac-WsREhHlKCTYNGlpLSEs....EFPcsWLpDGIQIKILFPFpLKPWHcSKhp.........................t.p...hpcpt.c+c................................................................sF..CFLTVWGhET-lPF.GsP+KpP....SFFcPIh ......................................................................................................................................SVVVVGLYYGFLTT..FSIGPSYLFLLRA+VM..........EEGTEKc.............VSATT.GFIsGQLMMFISIYYAPLHLALGRPHTITVLsLPYLLFH..hF....hNN....pp.....p.hh.............s.GS............TT.RNSM..........RNL.SI...QCV.......FLNNLIF.QLFNH.F.I.LPSSsLARLVNIYMFRC.NNphLFlTSSFVGW.LIGHIL...F.MK.WhG.......LV.LhWIc.........pNs........................................IcSN........................................................KY..LVSELR.NsMs..RI...................................................................................FS..ILLFlTC.VYYLG..Rh.PS.PlhTKKL...KEs..........................................................................................................................................SchEEh.............ctcpcsc..lEss..Eh.......ctspp-.pct...tpp.........................t..........pp........h..............................................................................................................................................................................hp.t...p.p.cs....ch.thhcp...................ph.h.a...h.....-K....sh.lThhFsacRWNRPhRYI....t............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 4 26 30 +5588 PF05760 IER Immediate early response protein (IER) Moxon SJ anon Pfam-B_6450 (release 8.0) Family This family consists of several eukaryotic immediate early response (IER) 2 and 5 proteins. The role of IER5 is unclear although it play an important role in mediating the cellular response to mitogenic signals [1,2]. Again, little is known about the function of IER2 although it is thought to play a role in mediating the cellular responses to a variety of extracellular signals [3,4]. 21.70 21.70 27.70 23.40 20.50 20.20 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.20 0.70 -4.49 10 171 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 48 0 78 166 0 176.10 27 97.89 CHANGED MEstl-AQpllSISLtKIaNSRsQRGGIKLHKNLLVShVLRsARQlYLS-+YutlYhttpttpstsssspp........................................................................................susstlpPPSutt........................................lsspspSP-....sss-P....u.hpsssucs.ssust..sssssGs.psspsosLDhsocVlTTVEsuhLpp............................................sCss..p.u.t.t.spsssspRKhpsuuhtS-uucss...........uhsPs.KRuRhE-.sspshu-sp-up.........puNloNLISIFGSuFSGLLS+pssusps...hstp..........hCsKpALusLusWoRAIVAF ...........................................................hth-ApplhslSlhKhhpSRhQRGGl+LH+sL.lohVhRsARp......l......Yhst........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 10 18 33 +5589 PF05761 5_nucleotid 5_nucleotidase; 5' nucleotidase family Bateman A anon Pfam-B_2948 (release 8.0) Family This family of eukaryotic proteins includes 5' nucleotidase enzymes, such as purine 5'-nucleotidase EC:3.1.3.5. 27.00 27.00 27.40 27.30 24.90 24.70 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.40 0.70 -5.97 21 680 2012-10-03 04:19:28 2003-04-07 12:59:11 9 8 168 12 393 595 21 368.90 30 81.95 CHANGED lFsNRsLsLcsIchaGFDMDYTLAhYps.phEpLuF-hsh-+Lls.hGYPpslh.phpYDPpFslRGLhhDptpGNLlKlDtaGplt..ssaHGhc.Lss-Elt-hYssp.plphcpst..........pathlsTLFuLPEssLhAslV-ahp......................................sss.hphsappLacDV+cAlDpVHhcGpLKpclhpclp+Ylh+DsclshhLp+L+psGKKLFLlTNSsasYsDthMsYlhst..s..........sWRshFDlVIVsAcKPtFFs-sp.PhRplDscsGpLphsp................hssLc+G.........plYpGGsh.phpclhGh+.GsclLYlGDHIauDlhcoKKppGWRTshll.ELccElclhsscptthsp..LppLpslhsclpsphs...................................ctsshpt.slpphppchcplppshcphFN..aGuLF+s.GsppohFupQlpRYADlYsoplsNhLpYssthhF+us+s..hhPHEsss.ps ...................................lasspphphpplp...hhGFDhDYTLs.Yp..th.ctLhaphshchLl.p..htYP.pp..lh..p..h.p...a.-.s.s..Fs.h.R...GL...hhDh.hG.LlKlDt.at.l....hsh+G.h.p.l........tt.p.......-l.hchYssp..hl...ht.t.p..................phh..h.slFslP.EhhLhupll-hht..............................................................................p...t...p..h..h.s.....hphapD...V..p...s......Al......p...s....H.......h.......p..........G.............hh.......p.......t.......h.......h.......ps.p....+Yl...p.s.s.p.h...hhLpc........hpp.t.G.t.......p..lFLlTN.Ssapass..thMpa.....h.hs..................sWpshFDlllspAp...........KP.....tFFs...c...tp...sh+pl.s......pp..u.t....h...t....hs.p.................................................h.t...lp..pG................tlY...pt......G...sh.....t....h....h....c....l.............h........s.....h...c......G....p.....c......l..LY.hGDHlauDl..hpsph.t..puWRThhl..l.ELt..p..Elphhtppp...h.t...httLp...htph......................................................................................................................................p.tt....htt..h.tphp.h....t.h.p..as...hG.hh+s...p.ohF..pl.+auslYhuph.shh...a...hhahs........h.Hc.....s........................................................................ 0 140 203 293 +5590 PF05762 VWA_CoxE vwa_CoxE; VWA domain containing CoxE-like protein Bateman A anon Pfam-B_2956 (release 8.0) Family This family is annotated by SMART as containing a VWA (von Willebrand factor type A) domain. The exact function of this family is unknown. It is found as part of a CO oxidising (Cox) system operon is several bacteria [1]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.30 0.70 -5.21 17 2445 2012-10-10 16:07:06 2003-04-07 12:59:11 9 3 1560 0 642 2460 867 184.40 23 42.62 CHANGED Ltctltt....hhhthstcR...ph.spt+st+lDh++TlRtslpputp..........hthhh+p..Rppcs.clVlLlDlSGSMptaoshahtlhpAlhtsht..cschFsFuTplsclTptl.pttDsspuLhcs..p..h...huGGTcIupuLtphtphhstps...tusVlllSD.....uh-psshstLtpplscLppput+llhLsPh..tthsuacsp.stulcAhhspV.sthtPscphpsl ................................................................................h.................h..h.....t..h..........t..tphDh..p..tTl..p.....t..s.h...t..p..hh...................................h.h...tp........+..p...s....p...s........p.lll..hlDsSG.SM.........s...........s...........a..........s.........t.......h..........t.......h......t.......l....h..p.....u.......s.....h..s...p.hp..........+s.cl.h..h...F.s....T...p......l.......s.......c..........h..........p.........t......h.......s.....................s..........p..h...l......h.hh.........p..........ht.......G.G.............T....c.......lu....p.sh.t....h....h..h......p..h...h......t....t..........t...............p...s...s.....h..lllSD........h.h..t..t....t............s....h...h...p...p...l.t.c...h....p.....t.+..hhh....hss.......thsth.................................h........................................................................................................... 0 197 410 528 +5591 PF05763 DUF835 Protein of unknown function (DUF835) Bateman A anon Pfam-B_3039 (release 8.0) Family The members of this archaebacterial protein family are around 250-300 amino acid residues in length. The function of these proteins is not known. 28.10 28.10 28.80 37.30 26.90 28.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.79 0.71 -4.61 38 148 2009-01-15 18:05:59 2003-04-07 12:59:11 7 16 24 0 116 168 35 135.30 25 39.06 CHANGED G.uhhhsspp......hhthlcsh.p....sslsloRp...sPcpact.........lhWlT+sp...uc..s.....ulsPTpLthlh-hlhcalcp..tss....lVllDsLEYLlL.NGFculhKFLtsLKDhslhpsut.lIlhsc.cuhs-+-hslLc+Ehp ..............................p.....hh.hlpp.h.pth...ssLsloRp...sPcphp........th.hlWlopsp...sc....p........slsPopLt.hlhch.lhcalcp.ttps.....lVllDslEYLll.NG....FcslhKFLtsL+Dhs.lhpsuh.lllsl-.pslsc+phslLpRph........................ 0 19 27 73 +5592 PF05764 YL1 YL1 nuclear protein Bateman A anon Pfam-B_3088 (release 8.0) Family The proteins in this family are designated YL1 [1]. These proteins have been shown to be DNA-binding and may be a transcription factor [1]. 29.00 29.00 32.00 29.70 28.90 28.90 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.92 0.70 -4.76 37 325 2009-01-15 18:05:59 2003-04-07 12:59:11 8 7 271 0 234 329 1 213.70 27 44.43 CHANGED htsRs+RusAGp+hppLLptE.-t--...pp...................hFpE---DpEaptppp-ppp................................................................Dsshsssp.s-...........cssss--t-uE+t.....ptcc..+++hhtpcthhc.hh...........h+h+h..............................phps.sssc.c.....tpppc.p.h.ohhss........tRpSsRpoThpppptsht+Lp-p-t+Rtph.t......t+++tptpp+thTQEE+LtEAccTEchNh+SLp+acchEtpKKc......pshpp+ph.pGPhlpahSst ................................................................sRspRssAGp+hpp.LLptE.c.p..D-h..t..t............................hFp.E..-..ps...D.c-aptppp-p-s..............................................Ds-hsps-.s-.............psssss-t-tc+...........................++.+..hhh.ppth..htsht............h+hph......................................................t.pt.tt.t.p............t.p..p.tp........h.s.ht..........................tRpShRposh...p.pt.th..+.h...pcppt.....pc..ht.......................tp+ct..t....tppc.hTQEEhLtEA.t..TEchNhcSL...ppapch.Etc+Kc........pshtp+t..h.tGPhlpahSh.s............................................................................. 0 76 128 195 +5594 PF05766 NinG Bacteriophage Lambda NinG protein Moxon SJ anon Pfam-B_7000 (release 8.0) Family NinG or Rap is involved in recombination. Rap (recombination adept with plasmid) increases lambda-by-plasmid recombination catalysed by Escherichia coli's RecBCD pathway [1]. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.48 0.71 -4.55 19 590 2012-10-05 18:28:12 2003-04-07 12:59:11 7 4 389 0 37 440 99 170.90 48 96.91 CHANGED h..+ps+.+KCK..sCsshFsP.tpshQ.pVCSscCu......hthtp.....tppccpc+ptpptERpcl+tRKc+LK..s+u-ah+-sQtshNcalR.hRDh..shPCISC.......Gp...........hpsupacAGHYRosGAsspLRFp..NlHpQCshCNpahS.....GNlls..YRhsLlc+IGhEcVEhLE.usHpspKasI-EL+plpshY+uKh+cLc+......cuA ..........................................tp..cR+CK...CtphFhP.hht....sQ.hh...C.s..ppu............hth.tc.........ttpttcc..cp+...ppcpcp+c..ch+h+Kh...slK..s+u.hWl.+.p.AQpAhNtaIR..pR.....Dt.........s..hs....CISC.......Gs...........hp.u.up..aDAGHYRosuAsP....pLRFsEpNlHpQCs.sC.Np+.KS.......GNlVs...YR..lp..LIs...R...IGp...EAV-plE.....uNHst+RWTlEE..h+uIKucYppKLKcL+p.....uct........................................ 0 3 14 26 +5595 PF05767 Pox_A14 Poxvirus virion envelope protein A14 Moxon SJ anon Pfam-B_7009 (release 8.0) Family This family consists of several Poxvirus virion envelope protein A14 like sequences. A14 is a component of the virion membrane and has been found to be an H1 phosphatase substrate in vivo and in vitro. A14 is hyperphosphorylated on serine residues in the absence of H1 expression [1]. 25.00 25.00 25.00 25.80 23.60 23.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.19 0.72 -4.03 9 50 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 42 0 1 25 0 89.70 60 99.01 CHANGED MDhhthlpNhaSslllsGIlLLhsACIFAFl-FSKsspss-.sWRALSllsFIlGhllllGhllaohYsRaCt...sosshcssRh.Nso-IELs ..MDhMhhIuNYFS.GVLIuGIILLllSCIFAFIDFSKsT.u.P.Tc.TW+sLSIhuFILGIlITlGMLIYSMWG++CsP..sSsVIcs.....NpSDIplN.............. 0 0 0 0 +5596 PF05768 DUF836 Glutaredoxin-like domain (DUF836) Moxon SJ, Bateman A anon Pfam-B_7010 (release 8.0) & Pfam-B_2829 (release 14.0) Domain These proteins are related to the Pfam:PF00462 family. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -10.03 0.72 -3.72 48 1303 2012-10-03 14:45:55 2003-04-07 12:59:11 9 7 1259 7 450 2272 1309 77.50 25 77.65 CHANGED plhLau+ssCsLC-.......thpphLpt...hthptt.........hplphlDIs.....ssss..Lhp......................+Ys.clPVlthss............................tthhphtlst-pLtphLp ...............lhLa.s+t.sC.tLC-.......pspph.Lpt......l.t..t.chs.....................hplph.l...DIs.........psss........Lhp.........................cYs...........c...l.P.V.lhhss.............................tphhha.hctppLpt.l.h.................................................... 0 104 261 371 +5597 PF05769 DUF837 Protein of unknown function (DUF837) Moxon SJ anon Pfam-B_7035 (release 8.0) Family This family consists of several eukaryotic proteins of unknown function. One of the family members (Swiss:O02197) is a circulating cathodic antigen (CCA) found in Schistosoma mansoni (Blood fluke) [1]. 21.30 21.30 21.40 21.30 21.10 20.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.23 0.71 -4.75 9 231 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 103 0 133 212 1 152.70 36 73.36 CHANGED MosTl-plLhDAKslspRL+-+Dshu-sLl-potslp+pVcuM+........pYQE-hp.lpclu+.+sp....hhlpQENpQI+-LQQEN+EL+soLEEHQpALELIMuKYREphhphhhspKhs.sEhhhp...hpcphsp.lpsQh-+IsEMusVM+cAlplD-pphspppEplspLchENctLR-lLpIS+p ....................................................................shohpphlhDA+tLhpRL+-p-stu-sLlppstslpppl.t......uh+......................papE-h.p.p.l.pchuc...+sc.........hhlppE..Npp.l....c-LQpEN+...........ELphuLEEHQpALELIMsKYR....c...phh....plh.hsp+..h...t...s.t.hht..........h.p.....pp....p...sp.lptp....hc.pIpE....MutVMppAhp...hD-pp...t.p..pphl..pL.p.ENptLRchLtIsp................................................... 1 38 46 78 +5598 PF05770 Ins134_P3_kin Inositol 1, 3, 4-trisphosphate 5/6-kinase Moxon SJ anon Pfam-B_7042 (release 8.0) Family This family consists of several inositol 1, 3, 4-trisphosphate 5/6-kinase proteins. Inositol 1,3,4-trisphosphate is at a branch point in inositol phosphate metabolism. It is dephosphorylated by specific phosphatases to either inositol 3,4-bisphosphate or inositol 1,3-bisphosphate. Alternatively, it is phosphorylated to inositol 1,3,4,6-tetrakisphosphate or inositol 1,3,4,5-tetrakisphosphate by inositol trisphosphate 5/6-kinase [1]. 20.20 20.20 20.20 21.70 20.00 20.00 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.04 0.70 -5.41 5 297 2012-10-10 13:17:03 2003-04-07 12:59:11 6 6 113 8 186 306 8 257.30 33 79.58 CHANGED usshp+RhlVGYALosKKlKSFlQPuLluLARpRGI-lVslD.oRPLsEQGPFDlIIHKLhD............cEW+cpL-EFR-AHP-VsVLDPPcAIc+LHNRpSMLQlVu-Lcl...uscscRlGlPcQlVlhpD.uuulsculspAGL+FPLIAKPLVADGTAKSHEMSLlYcQEGLptLcPPLVLQEFVNHGGVLFKVYVVGEsl+VV+RhSLPDVSpccL..ssucGsFsFspVSsssuou-......-sEl..Dh..ulAEhPPcslLc-LA+ALRRuLGL+LFNFDlIR-spstDRYhVIDINYFPGYuKMPuYEsVLTDFlho ........................................................................................................................hhh..t+.tph.....th....s........ttp.sh...hl.ls...phslt..pQ..G..s.h.DlllHKlh.s..........................................................thh.pthp..cahp.t..HPps.hl.lDP..uIpp.........LhsR.p.hphl.t.c....lph...............t......tt.......p.....l.t.......s.P...h.....lp.....p.......s.......t......sh.....p.h...lt......ps..........uLphPh.lsKshV...Ac...Go.p..SHcMullas........p.cu.L.........p...t........l......p.....s.........P....h...........V..lQ.pFlNHsullaKVaVlG-ph.ps.VpR.SL....s..hs..t.t..p........sp.t..t....h.hh....p....tp.....l.Sp.ts..ss...........................tth....-............t..........hp.h..P...s.t....t....h.....l.....pp.....luptLRptLGlp.LFs.hDlI........h...............pt..t........s.............s..........p.a.hlIDlNhFP...G.............atthsta.t.hhphh..p......................................... 0 71 119 151 +5599 PF05771 Pox_A31 Poxvirus A31 protein Moxon SJ anon Pfam-B_7044 (release 8.0) Family \N 22.40 22.40 23.20 66.00 20.80 19.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.49 0.71 -4.28 6 59 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 30 0 0 49 0 112.70 75 84.00 CHANGED M.-IFphL+.hE+phFsshslslPKcKpshsaKshsFIFYpPKcsplp+YlssuslaHoDhlVhGKVlIsshKhLLhYMDLpYYGlohsGshY+LGpSIccLSLcppKlhs+hot Ms.SILNTLRFLEKTSFYNCNDSITKEKI.KIKHKGMSFVFYKPKHSTVVKYLSGGGIYHDDLVVLGKVTINDLKMMLFYMDLSYHGVTSSGsIYKLGSSIDRLSL.NRTIVTKVN.N...... 0 0 0 0 +5600 PF05772 NinB NinB protein Bateman A anon Pfam-B_4884 (release 8.0) Family The ninR region of phage lambda contains two recombination genes, orf (ninB) and rap (ninG), that have roles when the RecF and RecBCD recombination pathways of E. coli, respectively, operate on phage lambda. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.49 0.71 -4.49 13 563 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 435 2 37 310 28 118.00 46 85.64 CHANGED hLRspph+pNAlphIpplPsDscKPlVlpIp-tTRoLpQNu+hWAhLsDlScQV.WaG++LssEsWKslFsuuh....tpcp-slPGL-G.thVslGpSTu+MoVpcMu-LIEhlpAaGsppGV+aoD ..............................lRs.thppNAIpAl.pl.......Ds......ppPl..hlpIp..-.....p..RolcQNp+hW.A.hLsDlSR.QV.W....H.......Gc....hLssEs.WKslFTAuh..........t............p.....Q............cslPG......l........-G.GhV.....hlG.psTS+MpVuchsELIElhtuFGs.E+.G..V+aSD................................. 0 2 15 21 +5601 PF05773 RWD RWD domain Bateman A anon Pfam-B_3991 (release 8.0) Domain This domain was identified in WD40 repeat proteins and Ring finger domain proteins [1]. The function of this domain is unknown. GCN2 is the alpha-subunit of the only translation initiation factor (eIF2 alpha) kinase that appears in all eukaryotes. Its function requires an interaction with GCN1 via the domain at its N-terminus, which is termed the RWD domain [1] after three major RWD-containing proteins: RING finger-containing proteins, WD-repeat-containing proteins, and yeast DEAD (DEXD)-like helicases. The structure forms an alpha + beta sandwich fold consisting of two layers: a four-stranded antiparallel beta-sheet, and three side-by-side alpha-helices [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null --hand HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.71 0.71 -4.18 119 1878 2012-10-02 15:28:41 2003-04-07 12:59:11 17 79 325 8 1276 2204 23 119.30 20 19.67 CHANGED psp-.pppEl-uLpuIYs-ch....................................hpppsstphplpl........................................................ptt.sphs............................................................................slpLphphsp...sYP.ppsPt.lp...lpsstttt...........................................tphpplpppLpchhppth.....G.pshlaslhphlp-..hl ...............................................................................................................................................................................................................................................................ctpppElcsLpu..I.Y..scph..............................................................................htpt.s.s..p..h.p....lpl.................................................................tt.ttsthh..................................................................................................................................................................................................................................................sl..p..Lp..l..p.....hPp.......sY..P..p......psPh..lp.....lpsttthp...............................................tphp.p..l.tppLpphhppth.....G..psh.lasllphlp-..h............................................................................................... 0 383 606 961 +5602 PF05774 Herpes_heli_pri Herpesvirus helicase-primase complex component Moxon SJ anon Pfam-B_7045 (release 8.0) Family This family consists of several helicase-primase complex components from the Gammaherpesviruses. 25.00 25.00 53.10 50.20 21.00 18.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.59 0.71 -4.15 8 24 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 19 0 0 23 0 125.00 30 37.95 CHANGED Wl-oLphslsLhh...ssps.psILcslLslhassRcsTsFWLLPpualpptsppPslPhDCluPp.halhTcsGshp.Whs-asLPsslsYpsYlppllclhphlp.t..sstpscph..hL-sapplLsLF ..WlssLshslslhh...ssho.tsllcpllslhappR+sosFWLlPpsaspthshp.PslPhDCluPp.halhTpsGshp.WhtshsLPsslsYthYlpphlclhphl.........ssppscphp.hL-tapplLsLF................ 0 0 0 0 +5603 PF05775 AfaD Enterobacteria AfaD invasin protein Moxon SJ anon Pfam-B_7107 (release 8.0) Family This family consists of several AfaD and related proteins from Escherichia coli and Salmonella bacteria. The afa gene clusters encode an afimbrial adhesive sheath produced by Escherichia coli. The adhesive sheath is composed of two proteins, AfaD and AfaE, which are independently exposed at the bacterial cell surface. AfaE is required for bacterial adhesion to HeLa cells and AfaD for the uptake of adherent bacteria into these cells [1]. 21.50 21.50 22.50 34.00 20.00 21.40 hmmbuild --amino -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.20 0.72 -4.10 10 168 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 136 11 1 79 0 107.60 55 72.60 CHANGED +cshuupL+DGt+lATGRIhCREsHTGFHlWhNuRQssG+PG+YIlQGp+DopHcLRVRluGpGWpssst-GtpGllppGpE-QshFDVhsDGNQplsPsEYllSVSGpCl ...sh..FRAGHVPDGMVLAQGaVTYHGSHSGFRVWSDEQKAGNTPsVLLLSGQQDPRHHIQVRLEGEGWQPDsVsG.RGAILRTAADNASFSVVVDGNQEVPADTWTLDFKA...CAL. 0 0 0 1 +5604 PF05776 Papilloma_E5A Papillomavirus E5A protein Moxon SJ anon Pfam-B_7244 (release 8.0) Family Human papillomaviruses (HPVs) are epitheliotropic viruses, and their life cycle is intimately linked to the stratification and differentiation state of the host epithelial tissues. The kinetics of E5a protein expression during the complete viral life cycle has been studied and the highest level was found to be coincidental with the onset of virion morphogenesis [1]. 25.00 25.00 130.60 130.50 22.80 20.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.22 0.72 -3.83 5 25 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 9 0 0 24 0 91.00 80 99.74 CHANGED M-VVPVDlTssTTSsSLLPLVIALTVCllSIILIIhISEFlVYTSVLVLTLLLYLLLWLLLTTPLQFYLLTLSLCFLPAFslHQYILQTQQ MEVVPVQIAAGTTSTLILPVlIAFsVCFVSIILIlaISDFlVYTSVLVLTLLLYLLLWLLLTTPLQFFLLTLhVCYhPAhYIHpYIVpTQQ. 0 0 0 0 +5605 PF05777 Acp26Ab Drosophila accessory gland-specific peptide 26Ab (Acp26Ab) Moxon SJ anon Pfam-B_7275 (release 8.0) Family This family consists of accessory gland-specific 26Ab peptides or male accessory gland secretory protein 355B from different Drosophila species. Drosophila males, like males of most other insects, transfer a group of specific proteins (Acp26Ab and Acp26Aa in Drosophila) to the females during mating. These proteins are produced primarily in the accessory gland and are likely to influence the female's reproduction [1]. 25.00 25.00 31.40 30.60 22.10 18.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.05 0.72 -3.54 4 17 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 8 0 2 28 0 88.50 77 86.89 CHANGED MNYFAVLCIFSCICLWQFSDAAPFISVQSSSQSRSQKVMNGMLRTLYDYSVQDSVNDATGHLIHTHKADFNSDVMSP-EIEpVRQQLNMA ..........MNYFAVLCIFSCICLWQFSDAAPFISVQSSSQSRSQKVMNGMLRTLYDYSVQDoVNDATGHLI+THKSDFNSDVMSPEEIE+VRQQLsMA.... 0 1 1 1 +5606 PF05778 Apo-CIII Apolipoprotein CIII (Apo-CIII) Moxon SJ anon Pfam-B_7283 (release 8.0) Family This family consists of several mammalian apolipoprotein CIII (Apo-CIII) sequences. Apolipoprotein C-III is a 79-residue glycoprotein. It is synthesised in the intestine and liver as part of the very low density lipoprotein (VLDL) and the high density lipoprotein (HDL) particles. Owing to its positive correlation with plasma triglyceride (Tg) levels, Apo-CIII is suggested to play a role in Tg metabolism and is therefore of interest regarding atherosclerosis. However, unlike other apolipoproteins such as Apo-AI, Apo E or CII for which many naturally occurring mutations are known, the structure-function relationships of apo C-III remains a subject of debate. One possibility is that apo C-III inhibits lipoprotein lipase (LPL) activity, as shown by in vitro experiments. Another suggestion, is that elevated levels of Apo-CIII displace other apolipoproteins at the lipoprotein surface, modifying their clearance from plasma [1]. 27.10 27.10 27.20 29.20 27.00 27.00 hmmbuild --amino -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.30 0.72 -4.59 6 42 2009-09-11 01:40:59 2003-04-07 12:59:11 7 2 27 1 16 35 0 65.00 61 62.42 CHANGED EsE-uSLLuhMQGYMcpATKTApDALoSVQESQVAQpARGWMT-SFSSLKDYWSoFKGKFTsFWDSsPcs ..........EuEDsSLLuhMQG....YMp+ATKTApDALo.SVQESQVAQQA.....R..sWhTDuFSSLKDYWSoh+sKaSt..FWD.sPE.s....... 0 1 1 1 +5609 PF05781 MRVI1 MRVI1 protein Moxon SJ anon Pfam-B_7407 (release 8.0) Family This family consists of mammalian MRVI1 proteins which are related to the lymphoid-restricted membrane protein (JAW1) and the IP3 receptor associated cGMP kinase substrates A and B (IRAGA and IRAGB). The function of MRVI1 is unknown although mutations in the Mrvi1 gene induces myeloid leukaemia by altering the expression of a gene important for myeloid cell growth and/or differentiation so it has been speculated that Mrvi1 is a tumour suppressor gene [1]. IRAG is very similar in sequence to MRVI1 and is an essential NO/cGKI-dependent regulator of IP3-induced calcium release. Activation of cGKI decreases IP3-stimulated elevations in intracellular calcium, induces smooth muscle relaxation and contributes to the antiproliferative and pro-apoptotic effects of NO/cGMP [2]. Jaw1 is a member of a class of proteins with COOH-terminal hydrophobic membrane anchors and is structurally similar to proteins involved in vesicle targeting and fusion. This suggests that the function and/or the structure of the ER in lymphocytes may be modified by lymphoid-restricted resident ER proteins [3]. 20.10 20.10 20.40 20.80 19.90 19.90 hmmbuild -o /dev/null HMM SEED 538 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.63 0.70 -6.11 3 180 2009-09-11 20:56:11 2003-04-07 12:59:11 7 5 50 0 88 165 0 389.00 34 73.00 CHANGED psEPcDGALDVsRsppCPuPTusPlPGssLpGCsRMN-DsSp-ENGVG+ssuESluQpREhhspsSS.PL.RtTSSo-GTlTSS-sGp-ILsMASsDLDcKsLCcKEE-sRAASPslc...........tQGTSLAc-sIuhpsSsuVuKslspLEAuEE.E.......ToEpccKESAuG-oVlSslPcsoVKpVNl+QSENTSA...NEKEVEAEFLRLSLGFKCDWFTLEKRVKLEERSRDLAEENLKKEITNCLKLLESLTPLCE-DNQAQEIlKKLEKSIslLSQCAARVASRAEMLGAINQESRVSKAVEVMIQHVENLKRMYAKEHAELEELKQlLLQN-.SRSssPsEDDsspptRSpSLSL.SKPSSLRRVTIASLPR..........................NlGNsGhVSGMENNDRFSRRSSSWRILGoKQSEHRPSLhRFISTYSWADAEEE..........Ks-lKs+DssEPpGEEsVERTRKPSLSE++usTpcWD+uolsSolASWVTaLQuSaRKAN...+ALWLouAhIlLlAALMSFLTGQLFQouVEAAPTQpGDSWsS.LEHILWPFTRLpH-GPPPV ....................................................................................................................................................................................................................................................................................................................s.t.....t...h..t....t............................t.tthh..sps.hshth.ph.ptttE.c..........................s.t.ttc.....ttt.s........u.s..hs...s.s.l+..lphppu.ssSA......sEKEVEs.FlpLSLuF+sD.aTLEpRlp.tERpRsLsEENhcKEl.....p......Nhht.lp...........u.................ssL..cc..sptpEhhpKL.csIshLpphssRluSRAEhlGAlpQEpR.hS.KAsEVMhQaVENLKRh.YtK-HAEL.EhKp..lh.QNp....Ru.h....s.s.........-D....tssp...ph..+p.Shs...uKs...shRRVolAslP+............................................shtss...............s.s........s...h.s..t+sss.....t.s....s..........s.......c.hp...hsc.p--...........p...t......................................t...............p...h...hh...h...h..phs.......phlhh.h.hhh...llhshlhuhh.t...h....p.s....tts..................................................................................................................................................... 0 15 22 50 +5610 PF05782 ECM1 Extracellular matrix protein 1 (ECM1) Moxon SJ anon Pfam-B_7421 (release 8.0) Family This family consists of several eukaryotic extracellular matrix protein 1 (ECM1) sequences. ECM1 has been shown to regulate endochondral bone formation, stimulate the proliferation of endothelial cells and induce angiogenesis [1,2]. Mutations in the ECM1 gene can cause lipoid proteinosis, a disorder which causes generalised thickening of skin, mucosae and certain viscera. Classical features include beaded eyelid papules and laryngeal infiltration leading to hoarseness [3]. 25.00 25.00 27.80 27.80 18.50 18.10 hmmbuild -o /dev/null HMM SEED 544 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.10 0.70 -5.79 3 78 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 33 0 30 80 0 339.00 43 97.22 CHANGED MGTsSRAALVLACLAVASAASEGGFKASGQRELsPE.LhpHLQEVGYAAPPSPPLSRuLPhDHPcTSQHuPP.FEGQSEVQPPPS.EshPl.pEch.sh..Ps.ccsGPslPQEAIPLQKELPPPQVPIE...............QKEsKPAPhuDQSPPEPESWNPAQHCQQGRsRGGWGHRLDGFPPGRPSPDNLcQICLPsRQHVVYGPWNLPQTGYSHLSRQGETLNlLETGYSRCCRCRS+TNRLDCAKLVWE-uhoRFCEAEFSVKTpPHhCCp+QGEARhSCFQEEAPpPHYQLRACPSHQPsIS.G.ELPFPPGlPTlDNIKNICHL+RFRSVPRNLPATDslQRpL.ALhpLEtEFQRCCRQGNNHTCTWKAWEDTLDGYCDREhAIKTHHHSCC+YPPSPsRDECFARRAPYPNYDRDILTLDLSRVTPNLMGHLCGNpRVLSKHKQIPGLIQNMTARCCDLPFPEQACCAEEEKLAFI--LCGPRRNhWRDPALCCcLSPGDEQINCFNTNYLRNVALVoGDTcNAKG.GEQGPTpGTNuSPTSEPKEE ...........................................................................Mtsh.hAAhlLhhlAlsusAptGu..s.tphp.hP................................................................t....................t................................................tt.ls.Qpc...........t............................................................hp..t......u..l..sFPPGRPSs-NLppIClspRp+llYGPasLP.poGaSHLsRQGcslNhLEhGYopCC+...sp...s...p...spp.....LpCsc.sWccshppFCctEaSsKTpsa.CCp..pGpt..RhtCFpptuPpspY................................................................................................................................................................................................................................................................................................................ 0 1 5 11 +5611 PF05783 DLIC Dynein light intermediate chain (DLIC) Moxon SJ anon Pfam-B_7447 (release 8.0) Family This family consists of several eukaryotic dynein light intermediate chain proteins. The light intermediate chains (LICs) of cytoplasmic dynein consist of multiple isoforms, which undergo post-translational modification to produce a large number of species. DLIC1 is known to be involved in assembly, organisation, and function of centrosomes and mitotic spindles when bound to pericentrin [1,2]. DLIC2 is a subunit of cytoplasmic dynein 2 that may play a role in maintaining Golgi organisation by binding cytoplasmic dynein 2 to its Golgi-associated cargo [3]. 19.70 11.00 19.90 11.00 19.60 10.90 hmmbuild -o /dev/null HMM SEED 472 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.64 0.70 -5.82 8 500 2012-10-05 12:31:09 2003-04-07 12:59:11 6 6 234 0 324 498 9 276.10 25 71.25 CHANGED -EGQNLWSuILSEVSTcucSKLPSGKsVLVhGEcGSGKTTLluKLQGsEcsKKGRGLEYLYlsV+DEDRDDhTRCsVWILDGDLYHKGLLKFAlstpslc-TLllhVlDMSRPWohM-SLpKWsSVLR-HIDKL+IPPEEh+-hEQ+ls+sFQ-YlEPt-sh.sGSPQRRsspssu.D--SllLPLu-slLTpNLGlPllVVCTKCDAhosLEKEHDY+DEHFDFIQuHIRRFCLQYGAuLIYTSVKE-....KNLDLLYKY..lVHKlYGFsFssPAlVVEKDAVFIPAGWDN-KKI.uILHENFpslKsEDsaEDhIsKPPVRKhVH-KEl..tAEDEQVFLhKQQSlLuKQPsTss.RssESPsRussGSPRsssRouPssVuSsuPhso.sKKsDPNlKsuusS.EGVLANFFNSLLoKKoGSPGusus......GGsssstsosstsos+KoGpKslLTDVQsELDRhSRKs-phls.ssu.Tps ........................................................................................................................................h..h......G.t...sp.p.hh.......h....................................t...................................................................u.....LtYha.............h.p...h....h....c............t......t...p.................p.................s....s.....h......h....pha..L.s....s.s.....h....ts....Ll.p.....l.s...psl.....phhlhhhhDhop...P...W.hhcpL.pp.a.h.p.h....l....p.p.hl.......p..h.....p............pt...........h...p.....t.h.p.ph.p.hpp...................................................................................................................................t.......................s....h.....hhhhh..................tsc.h..............................h.tp.p......t.....hp....p..hDhl...hhRphhhth.......suu...Lha...h.s.........................ph....hh..p...............hhp.....h..............t.......................hhcpptlhlP.shDs.thI..thh...pth...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................pt........................................................................................................... 0 125 167 249 +5612 PF05784 Herpes_UL82_83 Betaherpesvirus UL82/83 protein N terminus Moxon SJ anon Pfam-B_7466 (release 8.0) Family This family represents the N terminal region of the Betaherpesvirus UL82 and UL83 proteins. As viruses are reliant upon their host cell to serve as proper environments for their replication, many have evolved mechanisms to alter intracellular conditions to suit their own needs. Human cytomegalovirus induces quiescent cells to enter the cell cycle and then arrests them in late G(1), before they enter the S phase, a cell cycle compartment that is presumably favourable for viral replication. The protein product of the human cytomegalovirus UL82 gene, pp71, can accelerate the movement of cells through the G(1) phase of the cell cycle. This activity would help infected cells reach the late G(1) arrest point sooner and thus may stimulate the infectious cycle. pp71 also induces DNA synthesis in quiescent cells, but a pp71 mutant protein that is unable to induce quiescent cells to enter the cell cycle still retains the ability to accelerate the G(1) phase. Thus, the mechanism through which pp71 accelerates G(1) cell cycle progression appears to be distinct from the one that it employs to induce quiescent cells to exit G(0) and subsequently enter the S phase [1]. 20.70 20.70 20.80 23.10 19.10 20.40 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.80 0.70 -5.70 18 118 2012-10-03 01:18:03 2003-04-07 12:59:11 6 1 26 1 0 132 0 340.30 31 63.36 CHANGED shs.luspllphlh.pscsslcPcEs+llcTGlsVpVsp.sollhlsp.sspspssht.pp...........LplKapshcsp.-hcs.lslsVpN.os+s...lssus..pPl.ulhVFALPLstVslssLplaps.............htppth.pssucsslpphstphWplRlsloplpWscppsphhtsshhasosFhhpspshPLptl.sssc.lsCSptssplp+sphhscs....hlhlaL......phpppsPPsplFlplul.hspp......................sclshp+NPcPaLp.pscNGFslhsP+slplp.sscpsplhlsssFcSsp.asulFhPpsIPGlSlsssshhsppslhlclpuhpc.slplcthpsLGtl+FFs+shlhh .........................................s.hu.luspllptlhspssssltPcET+lLpTG.lclpVsp.PSllhlsQhsscussspp.ppp..........Lplpasshs....sp.-hc..s...lsVsVpNsosRs...lsPup...csh.ulhVaALPLchVslsslsla+h............tscccp..+phssscAslctssp.....phWpsRlTVoslsWs+ppsphp......t......ss......h.hasouFlhsspslsLp..........p......l.s........sAcpLsCS.tsTpls+hpllscp....hlplal..........cp.hspss..P..sspLFhHlsl.pspc.......................tcl.shpRNPpP.ah..+s.a.c....cN.GFp..VlsP+shtl+.sGchsplhlcsuFpS..cp..ahuLhhPcsI.P.GLSIpsshlhsspplhlclpuhtc.slclcthpslGtlaFFcpchlh.t... 0 0 0 0 +5613 PF05785 CNF1 Rho-activating domain of cytotoxic necrotizing factor Moxon SJ anon Pfam-B_7489 (release 8.0) Domain This family consists of several bacterial cytotoxic necrotizing factor proteins as well as related dermonecrotic toxin (DNT) from Bordetella species. Cytotoxic necrotizing factor 1 (CNF1) causes necrosis of rabbit skin and re-organisation of the actin cytoskeleton in cultured cells [1]. Bordetella dermonecrotic toxin (DNT) stimulates the assembly of actin stress fibres and focal adhesions by deamidating or polyaminating Gln63 of the small GTPase Rho. DNT is an A-B toxin which is composed of an N-terminal receptor-binding (B) domain and a C-terminal enzymatically active (A) domain [2]. 25.00 25.00 131.30 130.50 18.00 15.80 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.69 0.70 -5.50 10 55 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 44 2 6 44 0 278.90 37 34.56 CHANGED slpphushs...hppLspGsIslLKG+Glluu+cspp..............Plshhlshl+aDsss.h.t.........pupDpshaG.lhuhsGssltsopsu.....PuoslGpaasptsLosNlsll+VsNGsRGssGl+IsLs-lpcupPlllTuGsLSGCTolsApKcshlYtaHsGps.sstusWhTup-GVpplhcstpsLupsssPslssspNNs.LV-aLucsFDpulIsYsG+sccssut.t........cNVslFDYsts.hscpstsRlGsuhsLlo.ssssslsVpoLuEDhslN..ususchsVL.ss.....h.Ksh ..............phupps...hptLs+GsIslLKGRGsluup+pps..............sl.FhIphsRaDp................puhDphhas.hhuhsGhshhss.su.....suohhupaacchsLospssII+VsNusRGssGIKIsLc-VpcupPlIlTuGsLSGCTTlsApKcuYlYthHTGpo.pshusa.TussGVpphhcshchLstps..hspltshhssD.LVsYhScsF-culIsYsuppcpssupht........cNVslFsY..h.hpshs.hthGsuhTlls.pssGslslpoLuEshulN..usssphslLps................ 0 1 1 3 +5614 PF05786 Cnd2 Barren; Condensin complex subunit 2 Moxon SJ anon Pfam-B_7477 (release 8.0) Family This family consists of several Barren protein homologues from several eukaryotic organisms. In Drosophila Barren (barr) is required for sister-chromatid segregation in mitosis. barr encodes a novel protein that is present in proliferating cells and has homologues in yeast and human. Mitotic defects in barr embryos become apparent during cycle 16, resulting in a loss of PNS and CNS neurons. Centromeres move apart at the metaphase-anaphase transition and Cyclin B is degraded, but sister chromatids remain connected, resulting in chromatin bridging. Barren protein localises to chromatin throughout mitosis. Colocalisation and biochemical experiments indicate that Barren associates with Topoisomerase II throughout mitosis and alters the activity of Topoisomerase II. It has been suggested that this association is required for proper chromosomal segregation by facilitating the decatenation of chromatids at anaphase [1]. This family forms one of the three non-structural maintenance of chromosomes (SMC) subunits of the mitotic condensation complex along with Cnd1 and Cnd3 [4]. 21.70 21.70 22.10 21.70 20.80 21.60 hmmbuild -o /dev/null HMM SEED 725 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -13.54 0.70 -6.02 15 462 2009-12-23 17:17:02 2003-04-07 12:59:11 9 6 285 0 333 491 3 510.80 21 88.67 CHANGED psp.t+psstusptchsoshp........lshNDDptE+ppR...........................RRhosh-ttsopsuuss.u...........st.pshhshhps.pIhpsacpClKLuo-NKIsscNuWshsLIDahsDl...ppK-u-hsNFQhAusTLDAusKIYusRVDSVas-shKlLuGL......u..p..pcspcpss-G..ss-sc-s-utthtpcsspcKs.++K..RsstuTls.shsslslp+h-hchsVDPLFpKsuAsF..DEsuupGLhLNpL.ulDupuRl.....lFDSs-sst.............................ppupsschssssplDhosLt..................................hs.Lspl-cpsICPSLssap..hs-sst..s..Fsctsp-hcs-sht..ttt...............shs-ctp.tshuFDlsu-sts.....................stusuG-ttphcstpEsh.cspcspccsspht-s.tssD....hshsLst........s.hSYFs.sphppsWAGP-HWKaR+h+.................pscsssposut+pKKc.KcshclDFsssl-....-hhFp.ssusoslohsphpppscspsh.LPsDhHasscpLl+LaLKPps+hsp............hs.cpcssspsss-hD-h.asNpp.....................tsDssshhPsh.utssD.-DDstsFsDssshh..ps.st..t.sutttstt.t.shhshhtsplhusP....................++Vphl.VsYA+sAK+lDh++LKpshWphlscph.sssp.ts..................................ctp-psppssptpFoplhpsLtshhP.sphtcslSsShuFlCLLHLANE+sLpLpss-s........L-DlhIpps ..........................................................................................................................................t......................................................................................................................................................................................................t.hpph.hch..scN.KIsttNsa....s....ht.LIDhhtp..h........................c........p....t....p....hN............F...ph.....A.........usoL-u....ssKIYs.RVDulhs-sh+llusL................................s..........t..t.............................t....t..t...t.................t.........................................t..t.........t.............t.........................t.........p...t.................t..T.l..phttlp..pph..-.........t.....h....DPhFtph..........sth..D.susttl.hhs.L...............t.pl.......hhsst........................................................................................................................h.............................................................t..l.....htth............................................................................................................................................................................................h.......................................................................................................................................................................................................................................Wt.u.......t.....aWpht..................................................................................p..p.t..............h.pa.........t......................h...h.t....................................................................................h..............................................................................hP......c.hth..t.h.ph.......hst..h.........................................................................t...........tt............................................................................................................................t......................................................................................................................h....s...............................................h..t.....l.aschu..KphDh+tLKpthap.ht.........................................................................................................................h.hpplhpt.l.....p..hs..............hpp.......lS..hhFhslLHLANEptL.l.....t...........tp............................................................................................................................................................................................................................................... 0 132 200 290 +5615 PF05787 DUF839 Bacterial protein of unknown function (DUF839) Moxon SJ anon Pfam-B_7480 (release 8.0) Domain This family consists of several bacterial proteins of unknown function that contain a predicted beta-propeller repeats. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 524 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.90 0.70 -5.57 15 1732 2012-10-05 17:30:42 2003-04-07 12:59:11 8 25 946 0 506 2061 586 374.30 27 79.74 CHANGED hsauslssssD.....chlsVPcGYchcllspWGD.lhss.Pth......s..tptt.hGhNpDGMuhFsl.......................sps.....+s.LLlsNHEYsssslhh.......................tstshstcpVptthAAtGsTllpl.c...ttta.hs.suthNRR...........................lptso.h.l..sGPsuGppll+ot...........ts.spGsplhGThsNCAGGsTPWGTaLTCEENhss...hFus....................pphG.....sth...........pFs...............sccaGaVVElDPhcP..posPhK+TALGRFpHEusslh..csuR.sVlYhGDD...cpssahY+FVsscphpsts.....ttspsLh-sGsLpVA+hsss....................GsscWlsLs.s............................................................ps....st..htsts-lhlpsRhAA.ctlGAT+McRPEslt.ps..h.....cVYhshTNss.tp......................h.....ssuNsRscNtaGpIh+ah.sss.....phsstp.pW-LalhuGsstssp.........s.stsshhssPDNlshDstGcLWlsTDGssst.t.......................huts.G......h.sts..pup.chhh.ssPtsuEhsGssFoPDucThFVslQHPG ..................................................................................................................................................h...............................................................................................................................................................................................................................................................................................................................................................................................................hhG.hh.sCus.u.o.PWsThlosEENhss............hF..s.st..tt............................t.p+.h...G.h....t...h....................................................................ca.shs..................spspcaG..allE..l..D....P..........hss......p....us.......s........h.......K+TA..LGRFpHEsst.h.h.............t.....s....sp......hlsYh.GDD..........pp.p.alYKFVuptthp.t.................tts.ph......L......s......pGsLYV..A+.a.s.ss........................................................GsspWl.Ls..s..............................................................................................hp...................tss.....h..s..tspl...h..l..s.RhAA.shhGA.....T........hcRsE.ht.p..st..................phahshop.t..........................................................................................Gt.l.h..t..h............t...................................t....h...............h......h.........s.................................................hssPDsh...h.h....s...............h.......l.h.l..pD.t............................................................................................................................................................................................................................................................ 0 144 336 453 +5616 PF05788 Orbi_VP1 Orbivirus_VP1; Orbivirus RNA-dependent RNA polymerase (VP1) Moxon SJ anon Pfam-B_7493 (release 8.0) Family This family consists of the RNA-dependent RNA polymerase protein VP1 from the Orbiviruses. VP1 may have both enzymatic and structural roles in the virus life cycle [1]. 20.00 20.00 20.30 32.30 17.50 19.90 hmmbuild -o /dev/null HMM SEED 1301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.07 0.70 -14.19 0.70 -7.37 6 192 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 69 0 1 156 0 507.60 51 99.54 CHANGED VQsApLl+RulpRlhpGI+hctscutshYYKaS-phR+hRcK+GspYK+-s-.hE+phchptcpLYGLPVl+EuSWc-lhs...hpphppssLclalcSlLchp-L-PEEEFLRNY+Vp......cphpshp-FVEpRA+sEhQlaGDlslKtWsuhLhElupphKHpPLGLpVMucFVpRFGpPFHQNsRDLSQlcDashsYooPLLFEMCssESllEFNhhhRM+EEsIppLEFGsp+lsPhcLlREhFllCLPHPKKINNhLRAPYSWFVKhWGVGucclhVLpStGGDDRNSK-VhYssF++hpNhYu.slh+SpFYpco+cpNhpKlcEtlpYSQ-LGsHshshPlF.+ML+sVYpTsFsPpchSHlILASahLSIQTITGYGRAWVKNsuoDhEKQhKPssuNhlsRVsDhT+N.FIQAYpEAcc+GE-IVpPEDMYTShLRLA+NTSSGFSTplpVhK+YGPshc.+cpchI+IsSRIKALVIFpcGHcIFssEpLcKKYNTVE.YQoKGSRDVPIKATRhIYSINLSVLlPQLIVTLPLNEYFu+lGGSTpP-s++lGGKIIVGDLEATGSRVhDAADTFRNSuD.sIhTIAIDYSEYDTHLT.aNFRpGMlpGIRcAMp+YpcLRYEGaTL-ElIEaGYGEGRVssTLWNGKR+V.hKhsh-cYhpLsEp-Rl...cGsFK.sPhGVhPl+slclsp+lplc.Dsc-hlLVuPsDGSDLAh..lsTHLSG.ENoTLlhNShHNLAIGTlIQEElp+cssslloFhSEQYVGDDTLFYTpLpspcscthDpllcTIF-TIt+sGHEASsSKThhsPFSVEKTQTHAKQGlYIPQDRMMlISSERRKDIEDVpGYlRSQVQThlTKlSRGFSH-LAplILMhKToFlGaRKhKRTIhEsGtYRDR+FDSscEDGFTLhhlRsPLsLYlPV-WNGaGAHPuALNlVMTEEhFlDSlhhuphc-hhtPlh+Ihsss.PsWNETpuDKRtlsTcTKMSFFSKMARPsVpoVLsssElhctVcpLPLG-FSPspIS+TMMHSALLKEspARoLLoPsYEl-YQKplNsWlp.psshphpup-hpISTsYsKlFcVhFEsshtpuhhh..FPD.NLSPp.Fa.QKhhlGsRhSsRsRhSY.VDRIDSILRuDVVMRG.......FITuNpIlslLEclGhsHSAuDLshlFolMNl-s+VAE+LupYIsp-KIRFDAhKL.K+GICGDEFSMSLDVCTQsMlDcal+aP+pLTKTEhDAVsLYsSQllMLRAApGlshp+hclsVosEcK++aKVRtsRFpoHlPKh+hlK+L.hshcRLSuRhlpNQFV ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................LaYscLpopctphhsphhpsIFsTVtKCGHEASsSKThhsPaSV................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 1 1 1 +5617 PF05789 Baculo_VP1054 Baculovirus VP1054 protein Moxon SJ anon Pfam-B_7511 (release 8.0) Family This family consists of several VP1054 proteins from the Baculoviruses. VP1054 is a virus structural protein required for nucleocapsid assembly [1]. 20.40 20.40 49.70 49.00 20.20 19.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.26 29 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 57 0 0 56 0 332.90 39 96.61 CHANGED phspsso.+.ss.a+Pl+h.s+.pQC.hHPpRANC+lh+hhs..........c.........................pshh.HhThl.sshahshsscPYYphLlpspt...shtt.httahNAsphhshlplc.......sssEcahuI-EAGEpNhshl+hVlKslhchlss.s...............schYl.lhhcchalDhlYSph+sl.lLPQcMYslappp...................ptP..hsphhphholPps-c.........uhpSQ.IY+oFLlYNTVLTMhLKQpNPFN-..psKsIShIlRsLGpCP.....sN+sRlKsC-LpaGGs.sP..GHlMCPP+-MVK+Ia+YAKWshsPNNY+RYacLIsc.sshps......................t..shhLhDWhsFlssFpsYFh ....................hspCsS.K.sP.a+Pl+h.s+.sQChlHPp.RANC+lh+phs..........s..ps....................pshh.HhThl.sshahshsscPYYptLLpssp.....................phtt..RttalNAs.phhshVplc.......sssEcFhuIDEAGE+Nhssl+hVl+slhchhss.s...............schal.LhlDchalDllYStaRsl.lLPQchhslhpcp......................ptP..hsphhphhsVPso--.........uhpSQhIY+oFLlYNTVLTMlLKQsNPFNs..psKsISlIlRsLG+CP.....sNK-RlKCC-LpY...GGs.sP..GHlMCPPREMVK+IFHYAKWscsPNNY+RYa-LIsc.ss.ps.th..........s.tt..t.ts..sLhLhDW.NFls-FpsYFh............................................ 0 0 0 0 +5618 PF05790 C2-set CD2; Immunoglobulin C2-set domain Bateman A, Finn RD, Moxon SJ anon Bateman A Domain \N 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.70 0.72 -4.01 27 320 2012-10-03 02:52:13 2003-04-07 12:59:11 10 44 68 56 79 345 1 77.80 26 27.95 CHANGED losssss.ssLss-sLphs....s.hhp.plphcp.ppKs.cspphhhshp...p-sG.sapChsp......sts.pphpsshpl.shs .....hosssts.plLss-sLsho..h.t..s....s.hh.p.plp..hcs....stKs....h.....cspphhhshp..hp-sG.sapCpsp.........sps.pphpsphpl.s......... 0 5 5 12 +5619 PF05791 Bacillus_HBL Bacillus haemolytic enterotoxin (HBL) Moxon SJ anon Pfam-B_7539 (release 8.0) Family This family consists of several Bacillus haemolytic enterotoxins (HblC, HblD, HblA, NheA, and NheB) which can cause food poisoning in humans [1]. 25.00 25.00 25.00 25.80 24.90 24.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.05 0.71 -4.92 13 739 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 144 1 36 537 0 178.80 32 47.98 CHANGED pNhSLu..spshpculcptGSpshlhssYuhhIlpQsslslsslo..sh.sph.tslhpcQcsA+sNAppWlDphKPpllpssQsIlsYsTpFpsYYssLlpsl.....-ppD+sshpsslscLpspIppNQcsscpllp-LpsF+splscDopsFpscs......splpsILpuss.AsIstLcpcIpshpssIpps.s .......................................phths..stthppshpphGuphhhhphYuhhllppPslshpslo..p..sth.tslhpcQc.A+tsAppWlsph.KPQLlc.TspsIlsYs.....T.....p.....F.....psYYsoLlpsl...............sppDpssLpc....uls.cL....p....s....p....IppNQppsppll....p-Lpca+s.plspDspsFpscs......splpuILtups.usIstLppcIpphpssIptp..p........ 0 14 21 23 +5620 PF05792 Candida_ALS Candida agglutinin-like (ALS) Moxon SJ anon Pfam-B_7578 (release 8.0) Repeat This family consists of several agglutinin-like proteins from different Candida species. ALS genes of Candida albicans encode a family of cell-surface glycoproteins with a three-domain structure. Each Als protein has a relatively conserved N-terminal domain, a central domain consisting of a tandemly repeated motif of variable number, and a serine-threonine-rich C-terminal domain that is relatively variable across the family. The ALS family exhibits several types of variability that indicate the importance of considering strain and allelic differences when studying ALS genes and their encoded proteins [1]. Fungal adhesins, which include sexual agglutinins, virulence factors, and flocculins, are surface proteins that mediate cell-cell and cell-environment interactions. It is possible that both the serine/threonine-rich domain and the cysteine residues in the C-terminal and DIPSY Pfam:PF11763 participate in anchoring the terminal domains inside the wall, so that only the inner part of Map4p, including the repeat region, is sticking out as a fold-back loop then able to act in adhesing [3]. 25.00 11.00 25.00 11.00 24.50 10.90 hmmbuild --amino -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.61 0.72 -3.82 189 1619 2010-01-07 11:49:09 2003-04-07 12:59:11 8 56 22 0 1025 1644 0 32.10 46 32.08 CHANGED sThT.phWoGohsoTsTh...oss.....PGtTsTVl..Vp.lP .................sTsT..pa..Wo....t.S.h.sTToTl...Tss.......PG.u.TcTVl...lc..P..... 0 289 393 1025 +5621 PF05793 TFIIF_alpha TFIIF-alpha; Transcription initiation factor IIF, alpha subunit (TFIIF-alpha) Moxon SJ anon Pfam-B_7586 (release 8.0) Family Transcription initiation factor IIF, alpha subunit (TFIIF-alpha) or RNA polymerase II-associating protein 74 (RAP74) is the large subunit of transcription factor IIF (TFIIF), which is essential for accurate initiation and stimulates elongation by RNA polymerase II [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 528 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -13.00 0.70 -6.13 8 332 2009-01-15 18:05:59 2003-04-07 12:59:11 7 11 190 9 210 310 2 299.90 25 69.07 CHANGED uut...suos.spslpEYsVRVspsscK.+YalMR.......FNupcsV-hspWs...tsphER-.s.+.hht.pphh.-hGtGSEas+stREEuRRK+aGh..+ca+.-sQPWlLchst.c-G..+pF+Gl+EGGsoEpAoYYlFs+ss-uu.FcAaPlspWYNFsPlApYKsLThEEAEEcappR+KThNtFslhhhKthpss.s.hu-csE-c-t....pKuGsG+u+.....L+I+DhpsD--.-ussS-pu-ED--Etts.+Kc......s+hAKN+Kp..sDpK+p+RsuD-...-sh-.DS-DGD-EG+EhDYhSDsuusus-.-..........................E+E-hhusEcssKt-.cQs-cs-EcE-EcsEc-ut.SKctKKsKK.psKpsch..c-Sp-s-sSD-SDsDs-susshhpspKpKc.t+-EslsSusSusssStPupPossstsopuKRKhs...........spsSpsPsSsssKKlKhEssPpSs.tKuhPso.St................ssossuu-hslTEEsVRRYLh.RKPhTTpELLsKF+sKpssLSoEcsV.....sshApILK.+IsPlpKshpsphahsL+p .........................................................t......................................................................................................................................................................................c.t..PWhlc......c.s...........tta.G..hcu.u.s...........ssahl.hh.......st.....t...hphhPhppa..YpFp.htpapsLohEEAEptht..++pp....s.t..h.hh..+th..t..tt........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 63 99 165 +5622 PF05794 Tcp11 T-complex protein 11 Moxon SJ anon Pfam-B_7604 (release 8.0) Family This family consists of several eukaryotic T-complex protein 11 (Tcp11) related sequences. Tcp11 is only expressed in fertile adult mammalian testes and is thought to be important in sperm function and fertility [1,2,3]. The family also contains the yeast Sok1 protein which is known to suppress cyclic AMP-dependent protein kinase mutants [4]. 20.40 20.40 21.80 20.70 20.00 20.30 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.33 0.70 -5.77 30 596 2009-01-15 18:05:59 2003-04-07 12:59:11 8 8 254 0 373 557 18 379.30 23 61.79 CHANGED pLtH-Ishs.shph+...Ps.su.cu+hKcphpcsaW-tlppplstt....................sPpasphlpLhppl+-hLhSLL...ss+h+spIsElLDh.-LlpQQhc.+GulD...hsphupalhslhtphCAPhRD-tlpchpppl...................pclVptLRtIFplL-hMKlDhANapIpshRPhLhcso.V-YE+ctFpphl...p+sssslssopcWhpcstscthpphpptst.............................................t.ss.ssh.llhpuhlcL..lhptptspsFPETLhhDpsRLtpL+splppLsslusslllhpshlssthpss.........sphhs+lppplhsLlpt......sss+hscslpsIu.plspp............htpshsppstsslssshpsslputLtphhpss.ssVhplhcp+lhphlhstlh...............................ssspcshsssssshs.ht.............................ElpplupphspLssashpVaGs.aYhch..........lpph ............................................lhH-l.hs.phphc......s...su.............psch+chht.....psaWctlppplptt..............................................................sph.s..phltL...h..p..pl+-....hLh.s.hl...............................sch.+...sp.....IpEsLDh.-ll.pQphc...pG.shc........................httlspalhsh...htp.hCAPhRDpt.lpphtp.h.........................................t.phVp..hl....+tlh....plLchMK....h...........DhsNatl.....p.hp.PhL..hppu...lpYEpptFpchl........................ppts.......sL...shoppWhppshpph.ht.ph..pts..............................................................................................s..sh..hlh..puhlpL............h..s......t......ct..h.P.E.TlhhDptRltplptphpplh.hhusshLlhpshht.t..h..s..............................t.hp..plt.phhhs....llpt.......................p....p.pp......sh.sls..ph..t.......................................................tsh..t.t..t....s...sh....s..p.p......t....h...........s.l.....t.p...hp...p......s.slhplhpp+lhhhl.shlh.................................................................................t.ttt.....h.shsh.......................................................................clttlst.thhp.lhthp.tVaus..hY.th....h.................................................................................................................................... 1 87 166 269 +5623 PF05795 Plasmodium_Vir Plasmodium vivax Vir protein Moxon SJ anon Pfam-B_7631 (release 8.0) Family This family consists of several Vir proteins specific to Plasmodium vivax. The vir genes are present at about 600-1,000 copies per haploid genome and encode proteins that are immunovariant in natural infections, indicating that they may have a functional role in establishing chronic infection through antigenic variation [1]. 25.50 25.50 25.80 25.70 25.20 25.40 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.73 0.70 -5.25 125 471 2012-10-01 19:45:41 2003-04-07 12:59:11 6 4 6 0 383 877 0 296.30 13 89.84 CHANGED Lsutch.........Ycchsp........p.tp.........p.hsshCp.phpph.........pplp....clCpKlhctLc..hlsptp.pppct............ChalpYWlY-pltphhp......tp..p.h..hhsclhplhpph.ppth...........p.pt..............................................Cp..hphh....hcp..hcccKpLaDYacsacsIcpphpspspp..C..pp.Yh....................pYlpphppLYcc..a....ppp..Cspttp.................shCscahcp..ppcY.sPpslLspL.pCt.t.......................................tpttt...t...............tttht...t..t..........t.t...t..............................................................................................tt.....tth.....ss.hhshh........hh.hh.hhh.p.............................................htp.hppthtppppthpp.httphth.h.t.tstt...t........................pph.ls....Ypss .......................................................................................................................................................................h....t......h.................th..........phh.p..phhp.hp.........htp.......ttp....................C..ahpYWl..hsplhphht.p.................hhpphhp..hhpph.p..pt.................p..pp..............................................C.p....h.ph........hsp...hcchKtLa-Yh.p.s.ap.pl...p..p.t.....h....p..stp.......pt....C....pp.ah..................................pYlpthtp..h.Ypp....h..........ppp.....Cspttp.................thC.pa...h.ph................pph..ps...p...plltpl....pstt..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hh....h.h.p............................................................................................................................................................................................................................................................................................................................................................. 0 0 1 300 +5624 PF05796 Chordopox_G2 Chordopoxvirus protein G2 Moxon SJ anon Pfam-B_7672 (release 8.0) Family This family consists of several Chordopoxvirus isatin-beta-thiosemicarbazone dependent protein (protein G2) sequences. Inactivation of the gene coding for this protein renders the virus dependent upon isatin-beta-thiosemicarbazone (IBT) for growth [1]. 25.00 25.00 126.20 126.00 18.50 18.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.37 0.70 -5.21 12 66 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 41 0 0 48 0 213.00 60 96.15 CHANGED MsFR-LILFaLuKaLlTcsctuh+phlSLCRuFslchpcllscFhsp+hh+plopslppss.lLsElslsFPssll+-LlpLRLp+FsKslKtSh+LssshpGIsll.cspplhlhpsNDpLlsaLh+cYsPplYpYs.........stPsshsGu..............+lllCGhsploFasYhho.+IsoNpclcVlVT-pCIppLLp.tNtpLLcplF..c+usssls+sL+cIF....YSl MPFRDLILFNLSKFLLTEDEESLElVSSLCRGFEISY-DLIoYFPDRKYHKaIsKVF.EHsD.LoEELSMEFpDTTLRDLVYLRLYKYSKsIRPCYKLu-NhKGIVVI.KDRNIYIREANDDLIEYLLKEYTPQIYTYSs.........E+ssIsGS......................KLILCGFSpVTFMAYTTS.HITTNKKVDVlVoKKCIDcLlDPINYQILpNLF..DKGSGTINKILRKIFYSV... 0 0 0 0 +5625 PF05797 Rep_4 Yeast_TAF; Yeast trans-acting factor (REP1/REP2) Moxon SJ anon Pfam-B_7680 (release 8.0) Family This family consists of the yeast trans-acting factor B and C (REP1 and 2) proteins. The yeast plasmid stability system consists of two plasmid-coded proteins, Rep1 and Rep2, and a cis-acting locus, STB. The Rep proteins show both self- and cross-interactions in vivo and in vitro, and bind to the STB DNA with assistance from host factor(s). Within the yeast nucleus, the Rep1 and Rep2 proteins tightly associate with STB-containing plasmids into well organised plasmid foci that form a cohesive unit in partitioning. It is generally accepted that the protein-protein and DNA-protein interactions engendered by the Rep-STB system are central to plasmid partitioning. Point mutations in Rep1 that knock out interaction with Rep2 or with STB simultaneously block the ability of these Rep1 variants to support plasmid stability [1]. 25.00 25.00 28.40 27.90 21.70 21.50 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.07 0.70 -5.74 6 16 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 15 0 5 17 0 267.80 26 83.62 CHANGED pss+Elhphlp-shhhhshhslYsPppshllTs+GohplP-sYpcYPhLAlaYVpYhlpK.Pasll.s-L..-WP-PYVVlNTIh+RLpsH+ah.uNp.tsshs-cl+phIusul-IPpsp..csEhLsp.tKs+phstph..phh-s.ss+pclpcFFscLp-hsthushasshoKlclhlSCpuhh.upu.clphhApslR+lWltEhlhshsspcpc..tshDp..ssh-Dsp.....sptppsuus.p.scAp...........................uaWchlcsLpcp.p.pspp.pphchlAshlhuppcthtshpppRcpsRshLYhpl+thL.pphphpha+Gspshps.plKlSL+hpcp ......................................cth..hh.pshh.h...hla..tpp.l.T.pGhh.lPcpYtpa.hLAhhaltahhpp.sh.hh.ppl..-WPcPhlVhNTlhcRh.pH..h..tph..sthhppl+thlu.slphs.pt.....hht..tp.h.h..th..phhcs..sppclptFFphLpshsthtp.a.s.oKhhlhhSspshh.tps.phphhtpslRp.hltEhl.phhppcpp...hh-p..sst-Dsp.....sptp.su.stpsspup...........................thhp.lssLpcp.pspspp.sphchluthlhup.pt.tphphpppph+shlYh.l+thl.tsphplchYRGsphpps.slKlSL+.pp..................... 0 1 3 4 +5626 PF05798 Phage_FRD3 Bacteriophage FRD3 protein Moxon SJ anon Pfam-B_7781 (release 8.0) Family This family consists of bacteriophage FRD3 proteins. 25.00 25.00 142.30 142.10 21.50 19.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.38 0.72 -3.66 4 35 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 31 0 0 15 0 75.30 80 99.28 CHANGED MAKVsIDlVDFEYhEElIRNRYPELSIsSlpDopFaolplslpGPLEcLppFMsNEYCDGMcsEDAcFYhGLIcp MAKVDIDIVDFEYIEEIIRNRYPELSITSlpDop.FhShQIVIEGPLEDLTRFMANEYCDGMDSEDAEFYMGLIEQ 0 0 0 0 +5628 PF05800 GvpO Gas vesicle synthesis protein GvpO Moxon SJ anon Pfam-B_8221 (release 8.0) Family This family consists of archaeal GvpO proteins which are required for gas vesicle synthesis [1]. The family also contains two related sequences from Streptomyces coelicolor. 21.00 21.00 21.00 21.60 19.00 20.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.01 0.72 -4.12 17 117 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 92 0 50 129 1 98.40 33 85.36 CHANGED tppptt.ps.......sttssphhsht-shptAppshp-Lht+chEulsuspps-D.GWpl.VEVlEpptlPDTpDllupYElpLDssG-lhuYcRlcRYcRGchcp ...............................................tttt.................t..shhpshctAtpplscLhGcp...s-uVoulpps.......--...GWpltVEVlEh...p...RlP...c...Tssl.....LAp.....YEVpLDscGcLhuY+RlcRYpRGpsc.t... 0 17 39 48 +5629 PF05801 DUF840 Lagovirus protein of unknown function (DUF840) Moxon SJ anon Pfam-B_8265 (release 8.0) Family This family consists of several Lagovirus sequences of unknown function, largely from rabbit hemorrhagic disease virus. 25.00 25.00 213.00 212.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.40 0.71 -3.95 2 29 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 9 0 0 26 0 113.10 93 97.04 CHANGED MSEFlGLsLAGAusLSsALLRpQELtLQKQALEsGllLKAcQLupLGFNP.EVKsllV.GpuhspNhRLSNMHNDuSVVNuYsVhNPsSNGIRpKIKShNNSVKIYNTTGESss MSEFIGLGLAGAGVLSNALLRRQELQLQKQALENGLVLKADQLGRLGFNPNEVKNVIV.GNuFSSNVRLSNMHNDASVVNAYNVYNPASNGIRKKIKSLNNSVKIYNTTGESSV 0 0 0 0 +5630 PF05802 EspB Enterobacterial EspB protein Moxon SJ anon Pfam-B_8424 (release 8.0) Family EspB is a type-III-secreted pore-forming protein of enteropathogenic Escherichia coli (EPEC) which is essential for EPEC pathogenesis [1]. EspB is also found in Citrobacter rodentium [2]. 21.60 21.60 21.70 21.70 20.70 21.50 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.07 0.70 -5.48 3 178 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 139 0 5 79 2 257.00 53 97.58 CHANGED MNTIDYNNQVhsVNSVSESTTGSuSuTAo.s.SIsSSLLTDGKVDISKLMLEIQKLLGKMVTlLQDYQQ+QLAQSYQIQQAVFESQNKAIEEKKAAATAALVGGAISSVLGILGSFAAINSATKGASDIAQKAoSASSKAlsAASEVATKALVKATESVADAAEEASSsMQQAMATATKAASRTSGVADDVAsSAQKASQlAE-AADAAQ....KASRlSRFhAAVDKITGSTAFVAVTSLAEGTKTLPTTISESVKSNHEINEQRAKSVENFQQGNLELYKQDVRRoQDDIASRLRDMToAARDLsDLQNRMGQSuRLAG ................................................................sNss.psTTsusu.s.ss...u.sSSLLTDG+VDls+LhLElQKLLtKMVslLQDYpQcQLuQSYpIQhAVFESQNKAI-E....K.KAA....ATA...ALlGG..hISSsL....GI.LGS.F.........A......Ah...N..s...AsKG..Au-lAp...p...su.S....sSSKu.......hs...AAS-....susKsLsKAoESlADss--AoSsMQpshsTss+AASRsSsVsDDlA....+ASphAEphADAAp....Ksuhhs+hhsuss+losoTsFlsVTSLAEGTKTLPTTlSESVKSsH-lsEQRhKSlENhQtuNL-hYKQ-VRRsQDDIsoRLRDhTossRDLh-l.NRMGQusRLAG.................................................... 0 0 1 2 +5631 PF05803 Chordopox_L2 Chordopoxvirus L2 protein Moxon SJ anon Pfam-B_8620 (release 8.0) Family This family consists of several Chordopoxvirus L2 proteins. 20.80 20.80 22.80 22.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.00 0.72 -4.36 8 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 40 0 0 39 1 87.00 50 96.83 CHANGED AEVLhsKL+sIEp-NhhNEKllDCIIpE.IE+ppaalhRPhlRLllDllIllIVls.lhlRllKRNYplLLlhhhhYlhhchhtYhtl ...hEVIsDRLDDIVcQNlADEKFlDalI+t.LE+QsPuILRPllRLhIDlLLhVIVIhIFTlRLVsRNYphLLsLlslslsLslFhah..L...... 0 0 0 0 +5632 PF05804 KAP Kinesin-associated protein (KAP) Moxon SJ anon Pfam-B_8674 (release 8.0) Family This family consists of several eukaryotic kinesin-associated (KAP) proteins. Kinesins are intracellular multimeric transport motor proteins that move cellular cargo on microtubule tracks. It has been shown that the sea urchin KRP85/95 holoenzyme associates with a KAP115 non-motor protein, forming a heterotrimeric complex in vitro, called the Kinesin-II [1]. 19.40 19.40 19.40 19.50 19.30 19.30 hmmbuild -o /dev/null HMM SEED 708 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.38 0.70 -13.27 0.70 -6.61 3 298 2012-10-11 20:01:00 2003-04-07 12:59:11 7 19 178 0 190 401 17 448.30 28 74.69 CHANGED VKGGSIDVHPTEKALIVNYELEATILGEMGDPMLGERKECQKIIRLRSLNAsTDIAALAREVVEKCKLIHPSKLNEVEQLLYYLQNRKD.....................Spp+SGA+K+EpouKhK.................DPPPaEGsElDEVANINDIDEYIELLYEDlPEKVRGSALILQLARNPDNLEELppNETsLGALARVLREDWK+SVELATNIIYIFFCFSSFSQFHuLIsHYKIGALCMsVIDHELKRHEoWpEELsKKKK...........................ssEEcP.....................................ts...++DYEKSh.........................................KKY+GLlKKQEQLLRVAFYLLLNLAEDsKlELKMRNKNIV+MLVKALDRDNhELLILVVSFLKKLSIasENKNEMuEhsIVEKLsKLlPC-HEDLLNITLRLLLNLSFDTGLRsKMIplGhLPKLVtLLuNDNH+tIAlCVLYHlShDDKsKSMFTYTDCIPMlMKMllEsssERlDLELIALCINLAsNKRNAQLICEGNGL+hLM+RALKF+DPLLMKMIRNISQHDGPoKsQFIDYVGDLARIIops-DEEFVVECLGILANLTIPDLDYEhILpEFpLVPWIK-KLcPGAAEDDLVLEVVlhlGTVAsDDSCAALLAKSGII.SLIELLNAKQEDDEIVCQIIYVFYQMVFHEATREVIIKETQAPAYLIDLMHDKNsEIRKVCDNTLDIIAEaDEEWAKKIKLEKFRWHNSQWLEMVESQQhDDuEQhL.YGD-chE.....PYIpESDILDRPDL ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t........hs...h.hh.L..hNhA.s....p.c.ph..h.p..tt.hlth.....Ll.ch.l..p....p..p..s...h..-..lhh......slshlppLu..h.h...-.N.Ksph...........s..p...s..hltt.Ls+hh................s..p.p..........c........l..p....sh....t....hLhN.loas..s.th.Rp...phlp.sGhlP...hLs.thl.........s.....s..t..p..h..p..................h...s..h.....p.....h....Lhpluh.D.-.p........+..............t.hh.s...T.-..s..l.........................l.hph.................l...hp.s.......p...cl..p......h.p........h......h.hhhN.Lu.s.p.c.........hs...c......s.....pt.L....t.Lh.c...h.h.......p......t...........s....l...l...h...p...h...lRNl...S.....H.....................p.........h.h..p.............a.........lt...s............ls.t....l.t......t........c..p..........c...p....h.hcsluhLtNLsh.....t.........ph..hh........................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 77 116 158 +5633 PF05805 L6_membrane L6 membrane protein Moxon SJ anon Pfam-B_7771 (release 8.0) Family This family consists of several eukaryotic L6 membrane proteins. L6, IL-TMP, and TM4SF5 are cell surface proteins predicted to have four transmembrane domains. Previous sequence analysis led to their assignment as members of the tetraspanin superfamily it has now been found that that they are not significantly related to genuine tetraspanins, but instead constitute their own L6 family [1]. Several members of this family have been implicated in human cancer [2,3]. 21.80 21.80 64.80 59.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.36 0.71 -4.75 8 302 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 46 0 161 252 0 186.50 40 95.13 CHANGED MCosKCuRClGhSLlsLALhsIlANlL.LaFPNG-TpYsocs+LSpaVWaFuGIlGGGLhhLhPAhVhluhcpcsCCGCCupEshGKRsuMLoSlluullGllGuuYCllVuulGLs-GPhChs......s.................pstWsYsFss......Tp...GpYLhDsolW.SpCpEPpHlV.WNVoLFSILLsluuLEhlLCLIQVlNGlLGsLCGhCps ........MCotpCu+ClGhoLlsLulhsllsNlL.LhFPsGcsp..h..sp...ps......plop.VWahuGllGuGlhhlhPAhhhlshtp.pssst.C...httt..t.supR......h..sM...h.....tSllhuslGllGusYChllSuluLtpGPhChh......................................sttWtYsFps.......................pt....usYL....h...s....p...ohW.........s....pChEPtplV.WNloLFSlLLsluslphlLChlQllNGllGslCGsCp......................... 0 6 14 57 +5634 PF05806 Noggin Noggin Moxon SJ anon Pfam-B_7925 (release 8.0) Family This family consists of the eukaryotic Noggin proteins. Noggin is a glycoprotein that binds bone morphogenetic proteins (BMPs) selectively and, when added to osteoblasts, it opposes the effects of BMPs. It has been found that noggin arrests the differentiation of stromal cells, preventing cellular maturation [1]. 19.60 19.60 19.60 19.80 19.10 19.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.83 0.70 -5.05 6 133 2012-10-02 16:54:34 2003-04-07 12:59:11 7 3 68 1 66 126 1 199.30 36 87.85 CHANGED hllushhLLl.LslhhctusCQHYYhLRPlPS-sLPll-L+EcPDPlaDP+E+DLNETpL+slLGs.pFDssFMSltsP.c-ptuGs--Ls-.-L....p.sGhMPtEIKsL-F.Ds..hGKK+KsSKKL+R+LQhWLWuYoFCPVlYsWpDLGsRFWPRalKsGSCaSKRSCSVPEGMsCKPsKSsHLTlLRWRClp............................................................R+suhKCAWIPlQYPVIo-CKCSC .................................................................s...............................................h.........p...Ds......hs....P+.pcL.spphL+phLGs.paDstaMuhs.P...t................t......h...s...u........sp...pl.s...t....p........tsthsp.-.l+....sL..-.h...c...........h.....Gp..+.....+..lu+....Kh+R+.......lp.WLWs.....o..a.....CPVhYsWpDLGsRFWPRYlKhGsC.h..o...c...+..S........CS..hPc..G.MsCKP.....u..cSsplolLRW+.C....p..................................................................................................................t..p..ss..ppCsWI.lpYPlIo-C+CSC.............................................................. 0 19 27 47 +5636 PF05808 Podoplanin Podoplanin Moxon SJ anon Pfam-B_8548 (release 8.0) Family This family consists of several mammalian podoplanin like proteins which are thought to control specifically the unique shape of podocytes [1]. 29.00 29.00 29.20 29.60 28.90 28.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.03 0.71 -4.68 4 44 2009-09-11 20:54:06 2003-04-07 12:59:11 6 2 33 1 24 54 0 151.90 41 76.82 CHANGED MW+VPVLhaVLGSAhhWs.ApGuohsRs.......EDDlVTPGspDuhVTPGlEDplsTsGATct..ES.GhAPLVPsppEpsT+..hE-LPTstsosH-tcEppST........TTlpV.....VTSHSt-K.........su-ETpTTscKDGLuVVTLVGIIVGVLLAIGFlGGIIIVVh+KhSGRYSP .......................................................................................................p..hhhhl.Gs..h...h..u.ttushs.s........EDsh.TsGht.sshshsGh..EDphsTsusopc...pS.uh.os.LVso...sspos.Ts.h.ph..EDhsT.s-ST.sHupcpopSs........Tss.sV.....sTSH..Ss-K................sss-TpTT......l-.KDGLuTVTLVGIIVGVLLAIGFIGGIIlVVhRKMSGRYS.......... 0 4 6 8 +5638 PF05810 NinF NinF protein Moxon SJ anon Pfam-B_8528 (release 8.0) Family This family consists of several bacteriophage NinF proteins as well as related sequences from E. coli. 20.00 20.00 20.20 20.00 19.90 19.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.25 0.72 -4.17 4 198 2012-10-03 10:42:43 2003-04-07 12:59:11 7 3 178 0 2 84 1 53.30 71 91.73 CHANGED MlsP.QspuYEpESltRAL.CAsCuppLcs.ElHVCEcCsAE.Lh.pDsNusMtEE.DDE ............MlsP.QshpYppESV-RALTCANCGQKLHVLEVHVCEcCCAE.LM.SDPNSSMYEE.-D-.......... 0 0 0 1 +5639 PF05811 DUF842 Eukaryotic protein of unknown function (DUF842) Moxon SJ anon Pfam-B_7096 (release 8.0) Family This family consists of a number of conserved eukaryotic proteins of unknown function. The sequences carry three sets of CxxxC motifs, which might suggest a type of zinc-finger formation. 23.80 23.80 24.00 23.80 22.60 23.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.28 0.71 -4.69 17 215 2009-09-11 00:15:37 2003-04-07 12:59:11 8 5 139 0 146 221 2 122.90 32 78.96 CHANGED cppppclpsAlpphlsslpcphlhp...........hQtphF+CuscChpDpssoh-plppClEpCpsPltcAQphlpsElupFQsRLpRChhsCpDchcsth..p.s...ctchsp...phEpCsspClDcplsh.LPshhpph+csL .....................................pttclptulpphlpslp+phl+p........................hQ...tthacCu.ApCCp....D.pps.......ohcpVppClE+CpsP.ltpA.QshlpsELppFQ...........sRL..pRCshpCpD+hcsp...hsss..s.p......c.phtp.............................ph-sCsspClDcahpl.lPshhcch+ps.............................. 0 47 72 111 +5640 PF05812 Herpes_BLRF2 Herpesvirus BLRF2 protein Moxon SJ anon Pfam-B_7251 (release 8.0) Family This family consists of several Herpesvirus BLRF2 proteins. 25.10 25.10 25.30 35.60 25.00 25.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.57 0.71 -4.29 15 36 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 23 6 0 31 0 118.10 37 90.26 CHANGED sshohE-LsscLs+LchENKsLK+plppuss............Ps-chLTssQKEslIsusls+LoupApcKIEt+VcppssslVT+pph-cslpslolRlcVShc-.tht..............sps+pRRu+S+o+ .s.shohE-Lstclp+LplENKsLK+pltpuss............ssDphLTsspKEuhIsussstLsutAt+KIEt+V+ppsstlVT+pphpssLtslolRl-VSh--stht............t.sps+pRRutS+oR..... 0 0 0 0 +5641 PF05813 Orthopox_F7 Orthopoxvirus F7 protein Moxon SJ anon Pfam-B_7318 (release 8.0) Family \N 25.00 25.00 36.50 36.50 24.60 18.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.93 0.72 -4.09 3 39 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 19 0 0 28 0 73.10 85 94.88 CHANGED MTLVMGSCCGRFCDAKNKNKNKKEDVEEGGEGYYDYKNLNDLDEFpTRVEFGPLYMINEEKSDINTLDIKRRYRHAIESVYF ..MTLVMGSCCGRFCD...AKNKN......K..KEDlEEttEGCYsYKNLNDLDEu.sRlEFGPLYMINEEKSDINTLDIKRRYRHsIESVYF........ 0 0 0 0 +5642 PF05814 DUF843 Baculovirus protein of unknown function (DUF843) Moxon SJ anon Pfam-B_7353 (release 8.0) Family This family consists of several Baculovirus proteins of around 85 residues long with no known function. 22.50 22.50 22.60 22.50 22.10 22.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.81 0.72 -4.19 15 82 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 82 0 1 50 0 82.30 40 94.80 CHANGED MhlhhshlsLIlluFlhsKspshSpLllhlLlLFVlFlhlLplYYl..ps-Ss.stDL.TccsK+lKKKpcLpcAFDAILNKNsSS ...............M.IhhhlLuLlslGhlhs+hcuhssLllhlLlhhllFhllLpl..a..hs..KT-Ss.spcl...o.p.K.....sKpsKKKRplppthDAllNKNpSS........ 0 1 1 1 +5643 PF05815 DUF844 Baculovirus protein of unknown function (DUF844) Moxon SJ anon Pfam-B_7453 (release 8.0) Family This family consists of several Baculovirus sequences of between 350 and 380 residues long. The family has no known function. 25.00 25.00 29.60 29.50 20.60 19.70 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.99 0.70 -5.60 22 58 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 52 0 0 57 0 349.20 38 96.85 CHANGED MosltLFLcIEpLKNpl.DppMphsIWsKFFPLLu-s...solsLshspl.-FLsssAph.upsshsppNAAlsSQa..............sssssssssstss.tpslhNlass....sstsssssssshshppY+psspKllpYYo.usToSo-F+VpDlVtsMlYLu+oP+Y+PLapLLEssh.pc-h-ChPsloscphpsll-hLRsLhshsohplDapslclh+sohs+shN.PlsRaP+Vhlhpsts....lspDKcsol--Lll-Rh-tlppLcsQphlsu...ss+IPaCsDsphIscLl+h.hcsaslsRMaYNAsNoIFYsTMENYAssNCKFslsDYNpIF+shDpl+Ehspp......hppsttuDsLslhLusss .MSsltLFLcI-pLKspI.DppMphsIWP+hFPLLuDs...sslsLshspl.pFLsssApt.upsshsssNAAlASQa...........ssssssssssssssstpsllNlFss........th.sspssssshshppaRpsspKllpaYoLssToSo-F+VpDlVhsMlYLu+oP+Y+PLapLLEssh.p--h-ChPshossphppllDhLRsLL-hPootlDasslclh+sohs+shN.PlsRas+lhlhpsss....lspDK+sTlEELllERuctIppLcPQQalsu...sscIPaCcDscFIscLlKh.hDsasLsRMaYNAANSlFYTTMENYAluNCKFslcDYNpIFKl.hDslR-hssp......hhtp..sppsDsLNlaLusts............ 0 0 0 0 +5644 PF05816 TelA Toxic anion resistance protein (TelA) Moxon SJ anon Pfam-B_7534 (release 8.0) Family This family consists of several prokaryotic TelA like proteins. TelA and KlA are associated with tellurite resistance [1] and plasmid fertility inhibition [2]. 30.00 30.00 30.50 30.70 29.10 28.60 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.91 0.70 -5.73 81 1233 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 1042 0 200 815 46 325.10 30 86.17 CHANGED pspphupplD.ts.tslhs.aGspsppphupaSpphLsp.V+sp..............-suslGchLspLh....pplcph-sschtt...ppp.shlu+..hhs.+httplpchhs+YpsspspIDpIhtpLpptcspLh+DsthL-phappshpaappLshYItAGchthpclcpph..lsthppcsp....tpt..hthpphs-hpphlptL-pRlpDLphsctlulQshPpIRLlQpsNppLscKIpouhssTlPlaKsplslAlsLtcQ+psscshpsls-sTN-LLppNA-hL+psoh-hs+pspcuslDlETLcps.psllsTl--shpIpp-upppRppsppcLpphpp-l+ppLhphc ..................................htphsppls..tsp.psllsaGsssQ..pphupaSp.phLsc.Vpsp.................-.lu.slGctLspLh.......pplcph.....ss....s-..l........ppc.shhp+....lFp...+...s...c..pslpchhs+YQslssplD+IshpLp+ppspLh+DhthL-pLY-pNppaacsLshaIhA.......Gctph.......pc.......lpsch..lPthppcup......sssQ..hshpplschpphl-cL-pRlpDLphu+plulQosPQIRhIQpsNpsLs-KIpoulhsTIPlaKsphslAlsLh+..Q+pussutctls-sTN-LLppNA-hLKpsslEsA+pspcuhl-l-T.......Lccotpsl...lpslpEohpIppcG+ccRptucpcLtphpp-lKpcLhp..p........................ 0 65 133 162 +5645 PF05817 Ribophorin_II Oligosaccharyltransferase subunit Ribophorin II Moxon SJ anon Pfam-B_7633 (release 8.0) Family This family contains eukaryotic Ribophorin II (RPN2) proteins. The mammalian oligosaccharyltransferase (OST) is a protein complex that effects the cotranslational N-glycosylation of newly synthesised polypeptides, and is composed of the following proteins: ribophorins I and II (RI and RII), OST48, and Dadl, N33/IAP, OST4, STT3. The family also includes the SWP1 protein from yeast. In yeast the oligosaccharyltransferase complex is composed 7 or 8 subunits, SWP1, being one of them [2][3]. 21.30 21.30 21.60 21.30 21.20 20.80 hmmbuild -o /dev/null HMM SEED 637 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.78 0.70 -6.32 9 364 2009-01-15 18:05:59 2003-04-07 12:59:11 9 4 255 0 205 337 3 381.10 26 91.52 CHANGED us.u.tshhhlllLsLhssshA.hoso+aLoss-hsRhppshsp....shsDLcsAYYulhsLp.LGhpssD..tppsCchlcsslssSS...h-slFYAupuppsLu.Ccls..lss-scshLhAulp-DuolsQ.IYauVuuLssh.....GLsl...supplhpALp.splSK..........E-olhAshhAhpsAutLuptAs.................Lsshl-clEDhssphDElsGshLQFEsGLssTALhVsusa+LuspluspsslptEQllpLsshhhS+pshpohp-sFsllpAhusLSsN+aalPlllhhsGsutl.pcpslL+lpVTsVLupPLotAsVplspA.sSpoopssllppsshs...hcssl...............apLNhhsspPssGhYshslpl...cu-hRhlhsp.spLKVpV..oscVuIsss-lullDpDp.uhusKpp+VsaPsKscsshsADSpQphsLpFQLhDssoGtslsPHQsFV+L+NpcTtQEllFVAEPDSpphYKFELDsutRtp-.FsphSGpYsl.LIVGDAslENshLWNVAcl.LKFs-......c.ssushpupshatPKPEIpHlFRpPEKRPPphVSssFTALlluPLllLhlLWhKLGsNlSNFshSsu....sIlFHlGhuAhLsLhhlaWh+LNMFpTLKYLulLGshTFLsGNRhLuphAs+p ............................................................................................................................................................................................................................................................h.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.h.l.....s....t..s...t.....h.p.sHQ.sFlhl......t.......p...p..o.t.-h.a.s.h............p.p......t....t.....hp..hp...l.sh..tph.....p.h......s..s......hph...lll..Gs.ss...hp..s..shh.hpl.........spl.lph.st.............................t...s..s.s.h....t....s..has............p.EIpHlFRts..tKpPPthlS.sFs.......s.h.l.......lssh.hhLhhhW.hp.l..G.......s...N..l.s.p.h....shu......s....................sh.hFa.h.u....hu.uh.h....s....l....h...h...l...a...ahphshFpTL.hhhhlushshlsGs+hLtp.t............................................................... 0 69 112 167 +5646 PF05818 TraT Enterobacterial TraT complement resistance protein Moxon SJ anon Pfam-B_7686 (release 8.0) Family The traT gene is one of the F factor transfer genes and encodes an outer membrane protein which is involved in interactions between an Escherichia coli and its surroundings [1,2]. 28.30 28.30 28.60 29.40 27.90 28.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.37 0.70 -4.92 15 363 2009-09-13 11:01:34 2003-04-07 12:59:11 7 3 289 0 41 252 5 196.60 62 87.08 CHANGED sL-VpTKMSpTIFL-PVsssc+pVYlpl+NTS...D+sl.slcspltssLpu+G..Y+llssP-cApYhlQsNVLps-Kts.ss.upshLpsGatG...AsuGAAlGsuluuhs..SuuuGuslGhGLAG....GLlGhsusAhV-DlsYpMlTDlQIpE+scssVhsspt....puslKpG......................sSuspsQ.TpocsoshpcY+TRVVosANKsNLchEEApPlLEcpLA+sIAGIF .....................NL-VpTpMS-TIWL-P....uuc+TVaLQIKNTS.....D.K-h...s...LpuKIssAlpAKG..YpVVouPDcAaYWIQANVLKADKMDLRE..uQG.aLspGYEG...AAsGAA.LGAGIT..uYN...SsSAGA.TLGVG..LAu..........GLlGMAA.D.....AMVEDlNYTMITDVQI.....uERT..+..s.sVpTDN.....VAALRQG...........................TSGuKlQ..TSTETGNpHKYQTRVVSsANKVNLKFEEA+PlLEDQLAKSIANIL............................................................................................ 0 9 21 33 +5647 PF05819 NolX NolX protein Moxon SJ anon Pfam-B_7801 (release 8.0) Family This family consists of Rhizobium NolX and Xanthomonas HrpF proteins. The interaction between the plant pathogen Xanthomonas campestris pv. vesicatoria and its host plants is controlled by hrp genes (hypersensitive reaction and pathogenicity), which encode a type III protein secretion system. Among type III-secreted proteins are avirulence proteins, effectors involved in the induction of plant defence reactions. HrpF is dispensable for protein secretion but required for AvrBs3 recognition in planta, is thought to function as a translocator of effector proteins into the host cell [1]. NolX, a soybean cultivar specificity protein, is secreted by a type III secretion system (TTSS) and shows homology to HrpF of the plant pathogen Xanthomonas campestris pv. vesicatoria. It is not known whether NolX functions at the bacterium-plant interface or acts inside the host cell. NolX is expressed in planta only during the early stages of nodule development [2]. 25.00 25.00 238.20 32.40 20.40 20.00 hmmbuild -o /dev/null HMM SEED 624 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -12.97 0.70 -6.16 5 94 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 40 0 19 88 1 324.40 28 76.06 CHANGED psDSsLhSohDsL.hp.hapLhuulppssp............ssusuhsA.sssuusD.QusQP.p.tT........Fs.tcl+tscAPPshpGS.oVTWpGGTLosoELQIlusLNsHKDphslpatpLpDKINDPsTPPDLKSALQuLpKDPpLF.AIGSQGDG+hGGKIKutDLh-FucpH.QV............................lTWsuGTLspspLEIhShLspH+D.hPlcauplptKINDPuTPPph+tAlpuhpQsPt.hhAhss....................................................h.sPlpupplTWsGGoLopsELpIVAsLNRHKDhCPlpWpsLpsKspDPuhPPDLKAAlpuLQQDPcLFaAIGSQGD.G+CGGKITAKDLS+FS..cHHuQVApYs-pQAcuYsQNYIPSDSs-sscPSVMTENDAMRELYRYSDYLPKcLsh-sFKQIVDGDScTKKCPPQVIAAAQYFlsHP-EWKuL..huGsp-+VuKsDFLQ+ASSuMHLTpsELcTLcTINSHQ-sFFGDGp-lTRDKLAoMucD-SLDPAVR-AAoQLLuDPLLFGLLNNAITGYKT+HuFFsFGGGHTVDSGsISpKDFp+FYssMTuANKTVQpPKTHsAsSsApQcAVADMhMG+ADQPDIKusKKsGGAFp+ulc-hLKh.SKlhDhhSsulSALuuIPllG ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 1 8 10 +5648 PF05820 DUF845 Baculovirus protein of unknown function (DUF845) Moxon SJ anon Pfam-B_7739 (release 8.0) Family This family consists of several highly related Baculovirus proteins of unknown function. 21.20 21.20 24.30 23.20 18.50 15.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.61 0.71 -4.36 20 63 2009-09-11 05:36:17 2003-04-07 12:59:11 6 1 59 0 0 61 0 118.20 55 54.02 CHANGED lsRlKYDucLLl+YLFDstspt.............ssssNlIKICKV+V+KTsGolLAHYYA+IplSNGYsFEFHPGSQP+TFQslH...o-GplltlhlLCDECCKcEL+sFV-GENsFNlAF+NCEoILCKR ..........................sRlKYDu-LLl+YlFDshss..............spshNVIKlCKV+VKKTsGolLAHYYApIplSNGasFEFHPG..S.QP+TFQslH......oDGhlItlhlhCD-CCKcEL+palcGENsFNlAF+NCESILCKR..................... 0 0 0 0 +5649 PF05821 NDUF_B8 NDUFB8; NADH-ubiquinone oxidoreductase ASHI subunit (CI-ASHI or NDUFB8) Moxon SJ anon Pfam-B_7830 (release 8.0) Family This family consists of several eukaryotic NADH-ubiquinone oxidoreductase ASHI subunit (CI-ASHI) proteins. NADH:ubiquinone oxidoreductase (complex I) is an extremely complicated multiprotein complex located in the inner mitochondrial membrane. Its main function is the transport of electrons from NADH to ubiquinone, which is accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space. Human complex I appears to consist of 41 subunits [1]. 20.40 20.40 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.26 0.71 -4.62 10 254 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 204 0 156 248 0 145.40 30 83.01 CHANGED +ulhsupplsp..sssulhhpusRsAuGh.sKDhhPGPYP+TsEERtAAAKKYshRsEDYpPY.PDDGhGY.GDYPKLP.shohccRDPYYsWDaP-LRRNWGEPlHhDhDhYscsRls....ou.sshsWpoMshalhuFlGhM...lhhaahsEsaPsY....pPVusKQYPasshhh..hsDspK.P..VsHYsF ................................................t..........................................................-Ypsh..sD..-shth.GDYPplP.shs.....pppRDPahsWDcsp.RRNaGEPlH...hD...hD...h...a...t..s..phs....pss..s...h..s......hth..hh..h...shluhh.....hhhhhh.hp...h.........pPshs+paPht............................................................................................ 0 51 78 121 +5650 PF05822 UMPH-1 Pyrimidine 5'-nucleotidase (UMPH-1) Moxon SJ anon Pfam-B_7840 (release 8.0) Family This family consists of several eukaryotic pyrimidine 5'-nucleotidase proteins. P5'N-1, also known as uridine monophosphate hydrolase-1 (UMPH-1), is a member of a large functional group of enzymes, characterised by the ability to dephosphorylate nucleic acids. P5'N-1 catalyses the dephosphorylation of pyrimidine nucleoside monophosphates to the corresponding nucleosides. Deficiencies in this proteins function can lead to several different disorders in humans [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.47 0.70 -5.06 11 291 2012-10-03 04:19:28 2003-04-07 12:59:11 7 4 132 17 172 406 7 214.30 40 78.27 CHANGED hTLS+ath.NGcRsPTsasIh-ss.p.ls--s+pchppLac+YaPIEIDPpholEEKhPaMlEWWsKoHsLLhpttlp+scIsclV+cscstLRDGhcphFcpLpphslPshIFSAGlGDllEpllRQA.sVhasNlKVVSNaMpFD-sGhLsGFpu.lIHTaNKNsosLc.sopYacpl.............csRsNIILLGDSlGDlsMADGVs......sspsILKIGFLND+VEcpl-+YhcuaDIVLlcDpThDVssuILphIh ....................................................................hTLo+ath.sG.p+s.sosasllcss.thlsp....-hppchhpLhppYaP..IElDPphohcEKhshMhEWasKuHsLl.hpttlp+tpltphVt..c.u....s..hhLR-..G..hcphFppLpppslPlhIFS.AGlGD.l.l.Epll+Q..................t......ss..h...a..............sN.l+lVSNa...MpFs..-s............G..hL.p.......G.......F....cu..p.L.I..HsaN.K....p.p..ss.l....p....ss...t..aa..p..p..l...............p.s.+...sN.....l.lLLG....DShGDlpM..uDGl........sh.pplL+IGFLN..-c...l..-......c.......hcpYh...csaDIVLlpD.p..ohclsptllptl......................................................................... 2 63 82 126 +5651 PF05823 Gp-FAR-1 Nematode fatty acid retinoid binding protein (Gp-FAR-1) Moxon SJ anon Pfam-B_7852 (release 8.0) Family Parasitic nematodes produce at least two structurally novel classes of small helix-rich retinol- and fatty-acid-binding proteins that have no counterparts in their plant or animal hosts and thus represent potential targets for new nematicides. Gp-FAR-1 is a member of the nematode-specific fatty-acid- and retinol-binding (FAR) family of proteins but localises to the surface of the organism, placing it in a strategic position for interaction with the host. Gp-FAR-1 functions as a broad-spectrum retinol- and fatty-acid-binding protein, and it is thought that it is involved in the evasion of primary host plant defence systems [1]. 25.40 25.40 26.00 26.00 25.30 25.30 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.70 0.71 -4.26 11 100 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 26 1 68 82 0 146.00 26 76.31 CHANGED sIPpEaK-LIPpEVs-ahpslTsEEKssLKElh+s.apcaKsE-EhlsALKEKSPsLapKAcKLcshlKpKl-uL.ss-AKAFlpclIApuRpl+sphlsGpKPol-pLKphscshlscYKALos-AK--LpcpFPhlsphhpsEKhQslhsphLs ..........................tpaKphlPt-lhphhpslos--Kthl+...-lhps.....a......t.....p.....a..p.....sp--h.lssLKcKSPpLap+hpcl..pshlcpK.lssL..ss-u+tFlccl........ls..ps+.p.l.hsphhs...Gp.p..s......h....p.c...l....Kph......scphhspacuL..spp..sKp..-Lcps..FPtlsphhpscchp.................................. 0 33 42 68 +5652 PF05824 Pro-MCH Pro-melanin-concentrating hormone (Pro-MCH) Moxon SJ anon Pfam-B_7863 (release 8.0) Family This family consists of several mammalian pro-melanin-concentrating hormone (Pro-MCH) 1 and 2 proteins. Melanin-concentrating hormone (MCH) is a 19 amino acid cyclic peptide that was first isolated from the pituitary of teleost fish. It is produced from pro-MCH that encodes, in addition to MCH, NEI, and a putative peptide, NGE. In lower vertebrates, MCH acts to regulate skin colour by antagonising the melanin-dispersing actions of small alpha, Greek-melanocyte stimulating hormone (small alpha, Greek-MSH). In mammals, MCH serves as a neuropeptide and is found in many regions of the brain and especially the hypothalamus. It affects many types of behaviours such as appetite, sexual receptivity, aggression, and anxiety. MCH also stimulates the release of luteinising hormone [1]. 20.60 20.60 20.80 20.90 20.00 20.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.92 0.72 -4.49 2 63 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 45 0 30 57 0 76.60 70 52.87 CHANGED h.SppstpKHNFLNHGLsLNLsIKPYLALcGSVAFPAENGVQsTESTQEKREhGDEENSAKFPlGRRDFDhhp...s+hYpsh.pl .........................+ssGSKHNFLNH.GLPLNLAlK..PYLALKGSVA.FPAENGVQNTESTQEKREhGDEENSAKFPIGRR..D..FDMLRCMLGRVYRPCWQV.......... 0 3 5 14 +5653 PF05825 PSP94 Beta-microseminoprotein (PSP-94) Moxon SJ anon Pfam-B_7865 (release 8.0) Domain This family consists of the mammalian specific protein beta-microseminoprotein. Prostatic secretory protein of 94 amino acids (PSP94), also called beta-microseminoprotein, is a small, nonglycosylated protein, rich in cysteine residues. It was first isolated as a major protein from human seminal plasma [1]. The exact function of this protein is unknown. 22.00 22.00 22.40 22.00 21.90 21.90 hmmbuild --amino -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.48 0.72 -3.71 6 122 2012-10-01 23:42:56 2003-04-07 12:59:11 6 1 52 7 63 145 0 86.20 31 74.93 CHANGED uC.lI.pchlPs-pocECTDLKGNKHPLNShW+TcNCEhCoCccspIoCCThsupPVGYD+++CQ+IFpKEsCpYSVVEKpsPuKTCsVsuWlh ............................................ttChD.h.c.Gp.h.+.slsopWpocsC.pCoC.h.c.s.G.lsC...Csts.tpPlsasp.ppCptlhcpcsCpaplVcKp-PppsCt................................... 0 8 10 18 +5654 PF05826 Phospholip_A2_2 Phospholip_A2; Phospholipase A2 Moxon SJ anon Pfam-B_7918 (release 8.0) Family This family consists of several phospholipase A2 like proteins mostly from insects [1]. 20.20 20.20 20.20 20.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.62 0.72 -4.05 23 357 2009-09-11 05:31:23 2003-04-07 12:59:11 7 7 104 1 215 359 0 92.80 33 37.16 CHANGED lhPGTKWCGsGNhAss.YsDLGstpcsDpCCRsHDpCsphIsu...hps+aGL.pNsshaTh.+CcCDppFpsCLps.ssssh.........uphlGphYFslhps.CathppPp ...................................hPGThWCG.Gs...Ats..hp..-L.G..h...ttsDtCCR...p..H..D.p.Cs.p..h.Its...hps.caul.pNhpha.T.............hp.....HCsCDp.chtpCLpt...hssth.........uphlG.haFs.llph.CFtht..t.............................. 2 73 90 162 +5655 PF05827 ATP-synt_S1 Vacuolar ATP synthase subunit S1 (ATP6S1) Moxon SJ anon Pfam-B_8145 (release 8.0) Family This family consists of eukaryotic vacuolar ATP synthase subunit S1 proteins [1]. It also contains BIG1 ER integral membrane proteins which are involved in cell wall organisation and biogenesis [2]. 37.60 37.60 37.80 37.90 37.50 36.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.70 0.70 -5.09 31 258 2009-09-11 20:52:15 2003-04-07 12:59:11 7 4 174 0 154 240 0 286.90 23 83.89 CHANGED ssPhllhSspt....h...hssssuphpsssplhshhpphLusCsoctYlhlsQP.......GlpssDFsttps.........hspLpshhptussslthshl........s.lshppLtphlpcpCss.sthlsssstp..ph.th.....csRllhlphs.Lssst......ppRtphLpspDphltpllsplsSsp..YT..............llhso...t....................................httpssptslatt.hsshsts.......hch-hp..................p.........p..pshssp...................................................................spppsssph..hhtcaQ............................aaosGlahullsslhhlslLhhulphlhS ...........................................................................................................................tsPhhhhSspt...............s.ppup.ltos.pl.s..hlp.hLp...s.spsshlhl.Qs.......tlphpDFsthts..........hspLp..ptlt....u.s..sslhhshl........s.hs.hspLtphlp.cphs....s..shhlshts.t...ph.t.........................stllhlchs.hs...ts..........ttpphLpt.s.D......phlsp.lls.pl.upp...YT................slhTuh...s...............................................................p.t..shh...hp..hp.s..............th-hp..........................................h.p...............p....t.tsp............................................................................................................................sspspss.h.......hhpcaQ................................................................................................FFoPG.IaMullsslhhlhIhhhGlphl.p............................................................................................................ 0 29 69 121 +5657 PF05829 Adeno_PX Adenovirus_PX; Adenovirus late L2 mu core protein (Protein X) Moxon SJ anon Pfam-B_8179 (release 8.0) Family This family consists of several Adenovirus late L2 mu core protein or Protein X sequences. 25.00 25.00 57.40 57.40 18.60 17.60 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.25 0.72 -4.46 18 117 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 86 0 0 63 0 44.30 69 55.74 CHANGED pppRp+thp+ch+GGF.LPALIPIIAAAIGAlPGIAusAltAupt ...............+RRRRAhpRRLpGGF.LPA.LIPIIAAAIGAIPGIASVAlQASp.h 0 0 0 0 +5658 PF05830 NodZ Nodulation protein Z (NodZ) Moxon SJ anon Pfam-B_8202 (release 8.0) Family The nodulation genes of Rhizobia are regulated by the nodD gene product in response to host-produced flavonoids and appear to encode enzymes involved in the production of a lipo-chitose signal molecule required for infection and nodule formation. NodZ is required for the addition of a 2-O-methylfucose residue to the terminal reducing N-acetylglucosamine of the nodulation signal. This substitution is essential for the biological activity of this molecule. Mutations in nodZ result in defective nodulation. nodZ represents a unique nodulation gene that is not under the control of NodD and yet is essential for the synthesis of an active nodulation signal [1]. 24.90 24.90 25.00 27.60 24.40 24.80 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.27 0.70 -5.85 6 136 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 108 5 13 135 8 169.20 59 89.55 CHANGED pp-RaVlSRRRTGFGDCLWSLAAAWcYApRTGRTLAIDWRGSCYL-pPFoNAFPVFFEPIpDIuGVpVICDDpINphSFPGPFFPsWWNKPSI-ClYRPDEQIFRERDELs-LFQAQ-Ds-ANTVVCDACLMWRCDE-AERpIFcSlpsRsEIpARIDAlYpEHFpGaSlIGVHVRHGNGEDIMDHAPYWADs-lAL+QVCsAIccAKALsHs+PV+VFLCTDSApVlDplSuhFPDLFslPKpFQAcQAGPLHSAsLGl-GGhSALlEMYLLuRCDTVIRFPPTSAFTRYARLhVPRVIEFDLscPuRLllIDcsupphsA ............................................................LAuAWpaAppTGRTLsIDWRGSCYLDpPFoNAFPVFFEPVp-IAGV.VICDDcINphSFPGPFFPsWWN....+PoIDClYRPDEQIFRERDELcpLFQupcDs-ANTVVCDACLMWRCDpEAERpIFRoIKPRsEIQARIDAIYcEHhts...lG.......................................................................................................................................................................................................................................... 0 6 10 12 +5659 PF05831 GAGE GAGE protein Moxon SJ anon Pfam-B_8207 (release 8.0) Family This family consists of several GAGE and XAGE proteins which are found exclusively in humans. The function of this family is unknown although they have been implicated in human cancers [1]. 20.80 20.80 20.80 20.80 20.40 20.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.48 0.71 -3.94 7 212 2009-01-15 18:05:59 2003-04-07 12:59:11 6 3 21 0 48 231 1 97.00 41 61.49 CHANGED MuaRuRop.R.sRPRRslpssph.lGshl..E.Po-E....pPppEEPPTcSQD.sPuQERE.DpGAuthQssclEAD.QELspsKTGsEsGDGPDsptthLPp.EphKhPEuG.....-tpsQV ............MshRuRop.R.sp..s..R.t..ps.sp..lGshh.pp...o-E.....pspptEPPTcsQs.ssupctp.spGAst.sQs.sc....hEAs.QEh..shsKsGsEptDGPDlpttslPs.EpsKhsEsG.............. 1 30 30 30 +5660 PF05832 DUF846 Eukaryotic protein of unknown function (DUF846) Moxon SJ anon Pfam-B_8404 (release 8.0) Family This family consists of several of unknown function from a variety of eukaryotic organisms. 20.50 20.50 21.80 20.70 20.10 20.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.90 0.71 -4.48 59 469 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 293 0 302 430 4 135.80 41 65.60 CHANGED pssHP..h....shhhalhh+hsullhYlh...sshh.......ps.alhpF.llhllLluhDFWhlKNloGR.hLVGLRWWs-ss.......p..cG...............p.spWhFEotsss....t.h.ssh-.............uplFWhslhssshh...............Wllhslhslhphphha...lhlshluhsLshsNhhuah.+C ..................................................................h..tHPlsshFHlhF+suAllhYlh.sshF...........sss.FlhtF..lhhlLLLuhDFWsVKNloGR.lLVGLRWWNplc..............-.-G.................c.SpWlFESpcss.......pph.sts-..................u+lFWluLhssP..ll...............Wllhshh...sl...hph.p.hpW...LhlVhhulsLshsNlhGah+C..................................... 1 111 166 246 +5661 PF05833 FbpA Fibronectin-binding protein A N-terminus (FbpA) Moxon SJ anon Pfam-B_8577 (release 8.0) Family This family consists of the N-terminal region of the prokaryotic fibronectin-binding protein. Fibronectin binding is considered to be an important virulence factor in streptococcal infections. Fibronectin is a dimeric glycoprotein that is present in a soluble form in plasma and extracellular fluids; it is also present in a fibrillar form on cell surfaces. Both the soluble and cellular forms of fibronectin may be incorporated into the extracellular tissue matrix. While fibronectin has critical roles in eukaryotic cellular processes, such as adhesion, migration and differentiation, it is also a substrate for the attachment of bacteria. The binding of pathogenic Streptococcus pyogenes and Staphylococcus aureus to epithelial cells via fibronectin facilitates their internalisation and systemic spread within the host [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 455 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.56 0.70 -5.95 114 2820 2012-10-02 21:21:44 2003-04-07 12:59:11 6 21 2261 20 808 2533 427 354.70 22 68.70 CHANGED DGhhlpullpELppp......LhsuRlpKlhQPppp...-lhlslR.s.tpppht.LLlSupsshsRlplT..pt.shtsPtsPssFsMlLRKaLpGutlhpIcQ.hsh-Rllplplpsc................s-h.Gctthhp........LllElMG+HSNllLl-pps....pIl-ul++ls.p.sp.hRslhPGptYhhPPs.pphsPh..phpp.c.phhph....l............tttls+tLhppapGlSshhucEls....hcss.......p.....h...thpplhpthpphhpplp.............................thpP..phhh.t...........psassl.s...hp....t.......ppasoluphL-paYtp+sppc+lp..p..pppcLp+h...lpsplc+tpcKlpphpcpLppscpu.....-pa+hhGELLpshlatl.ppGhpp.lpl.saas...sp...........................tl....pIsL-sphoPspNAQcYac+YpKhKpuhptlppplcpscpElpYL-sl.splp.............pu.s.p-lp-I+cELhppGYl+.....cppp.cpc+ppp ...............................................................................................sh.htthhpp.lp........lhst+lppl...p.........p.p.............pl.....hh.lp...t..........t.......t...........p..h.......L..ll.sh...........h.sRlpho........p.....t...h.....t...........s...........s.......s.s.........FshhLRKalpsuhlpplpQ.h.......t.-Rllphp.hppt..................................................................h.....hp.........lhlE.lh.G+.uNl.lLs..-........p...........p...........p....p........I.....l-sh..+.+.ls.p...........sp.......hR......lhs.u...Y............h........P........s.t.......t.........t.h.ssh.................phpt.t....thhph.....................h........................p...l......p......h....uh...u..hspch................h.................................................................................t.h.......t......t...h...t...t.hh.t..........................................................................................s...............................................a..h......................ht...........................atsh..sphl-taa..t....t....p.t..........p...p....p....c..lp.....p........ptppl....ch..........lppp...h.p...+.pp+lt.p.h.p..p...p.h.t.t.sppt...............cph.p..hu-ll.....ts.hh...tl...........p....s.........p.p...hpl.....sahp....tp....................................................................................................l..tI.L.s...t.h..ostpNA.ptYa....p....p....h.p....K..h....+p...t.h....p....hl....ppt..lp..ts.p.p.pl.t.a..hpp.h.t..lp................h...s...tl...ht.c.h........................................................................................................................... 0 311 543 709 +5662 PF05834 Lycopene_cycl Lycopene cyclase protein Moxon SJ anon Pfam-B_8336 (release 8.0) Family This family consists of lycopene beta and epsilon cyclase proteins. Carotenoids with cyclic end groups are essential components of the photosynthetic membranes in all plants, algae, and cyanobacteria. These lipid-soluble compounds protect against photo-oxidation, harvest light for photosynthesis, and dissipate excess light energy absorbed by the antenna pigments. The cyclisation of lycopene (psi, psi-carotene) is a key branch point in the pathway of carotenoid biosynthesis. Two types of cyclic end groups are found in higher plant carotenoids: the beta and epsilon rings. Carotenoids with two beta rings are ubiquitous, and those with one beta and one epsilon ring are common; however, carotenoids with two epsilon rings are rare [1,2]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.23 0.70 -5.54 20 839 2012-10-10 17:06:42 2003-04-07 12:59:11 7 25 565 0 264 2841 3261 323.20 23 76.95 CHANGED DllllGuGsAGhtlAtcl.....u+suLpVsLl-suPsh.hssNa.sWsschpDLu......LtsslpasWs.sttVths-pspphhu...huYuplspctLcctlhc+hs.tsushhhsu+Vsplspsss.......hshssGp.....pItuRhVlDupG.ssss......suhpsuhQshhGlElcl-ps.aDssthllMDaR.....spQp......cs.sFlYshPhSssRlhlE-TphussssLsh-sL+p+lhshhcs.hGlplpclhc-EpuhIPl..GGslssh.pc.....shshGssAGhsHPuTGYSlstulutAs....slAp.hls.s..............shuhpshtshasp-RtcpcsFF.hhshhLhlphch-uppphhcpFacLPpthhptFhuu+LolsDhlhluht...h.slssssshh ............................................................................................................DllllG.uG.sAGhslAtpl.......................s..p..h...u...l....p...V.....h.......h.....l...............-....................s...........p.......h................h........s......s......s.....h.......s...........W....s..........t..h..p..s.hs......................................l.t..s..h....l......p.....t...p.......W..........p.....s......s......h........l.....h.....h.........s....s.....t......p...h...t.....hs...................hs.Y.....s.........h...........l....s...........p.p..........t.........L..c..p.......t..h.......h.p......c...s.....h........t......t.u.......l.....p..h......h..p...u..p....V....t...p...l.htpss..........s.hl.h..s.s..s..Gt.......................pl...p.A..phVlDA...sG...hstp...................tsh..s...u....h......Q....h....h.......h.......G..h...h..c........s......p..t............s....a..s.........p....p.......h.h.h...M.D..hR............s..............................ths..s.F.h.....Y.....sh.....P..h.u.....s..s...p....h....h..l....E.....T.....hs..........s....p.s....h...s....h..pt.hpp..thhth.l..p........p....h..........sh..p....h.....p...l....c.........c.E....h...s.h.......lPh.....t..s....s.....h..pt...............hhth.Gssuuhs+Pso..G....Y....h....s....ts.tt.us........hlup..hh.ht..............................t.t....th..h.th...hsh.t..hh..h..p...t....h.h....hhs.......hhhth....tth....hh.p.ha..h...tl.....hh..tFhstt.....t.h.h.................h.................................................................................................................................................................................................................... 0 73 187 242 +5663 PF05835 Synaphin Synaphin protein Moxon SJ anon Pfam-B_8588 (release 8.0) Family This family consists of several eukaryotic synaphin 1 and 2 proteins. Synaphin/complexin is a cytosolic protein that preferentially binds to syntaxin within the SNARE complex. Synaphin promotes SNAREs to form precomplexes that oligomerise into higher order structures. A peptide from the central, syntaxin binding domain of synaphin competitively inhibits these two proteins from interacting and prevents SNARE complexes from oligomerising. It is thought that oligomerisation of SNARE complexes into a higher order structure creates a SNARE scaffold for efficient, regulated fusion of synaptic vesicles [1]. Synaphin promotes neuronal exocytosis by promoting interaction between the complementary syntaxin and synaptobrevin transmembrane regions that reside in opposing membranes prior to fusion [2]. 21.10 21.10 22.90 22.60 20.50 20.30 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.95 0.71 -4.02 8 251 2009-09-10 22:35:29 2003-04-07 12:59:11 7 3 90 11 158 207 1 124.40 40 88.74 CHANGED suFlsKQhlGsQLssVputLGsc.t-EGDs...sAp......EE-tEhpEAlREtEE+RKtKaRKMEpEREpMRQGIRDKYuIKKKEE......pp-spsh.....EGpLGRcKKoPcElAsEAsps----ttpuhhspslcpLss.plp-lhsK .................hsFhhKphlus.thKshst..h....l....GG-.......--t-s....pAp.................cc.EEhQctlt..pp...c.c.......ERcAcas+hEAER.EphRppI..RDK........YtlpKpEp.......c-sphphA........l..s.c.s..hc.....Ah.stpsp.-.E.E..E.p.p....s.l.s.lp.l.s.slpphhpK.............................................................. 0 32 47 91 +5664 PF05836 Chorion_S16 Chorion protein S16 Moxon SJ anon Pfam-B_8659 (release 8.0) Family This family consists of several examples of the fruit fly specific chorion protein S16. The chorion genes of Drosophila are amplified in response to developmental signals in the follicle cells of the ovary [1]. 25.00 25.00 40.00 39.50 23.30 20.20 hmmbuild --amino -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.64 0.72 -4.16 5 37 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 17 0 8 39 0 107.10 79 78.20 CHANGED GSGu.YuDVVKssETAEAQAAALTNAAGAAASAAKLDGADWYALNRYGWEQGRPLLuKPYGPLDsLYAAALPPRSFVAEIDPVFKKSsYGGuYG-+olTLNTGAKLAVuAl .....G.YGASYGDVVKAAETAEAQASALTNAAGAAASAAKLDGADWYALNRYGWEQGRPLLAKPYGPLDpLYAAA.LPPRSFVAEVDPVFKKSpYGGSYGp.pAaLpTsSKLuVVAI.......... 0 1 1 5 +5665 PF05837 CENP-H Centromere protein H (CENP-H) Moxon SJ anon Pfam-B_8705 (release 8.0) Domain This family consists of several eukaryotic centromere protein H (CENP-H) sequences. Macromolecular centromere-kinetochore complex plays a critical role in sister chromatid separation, but its complete protein composition as well as its precise dynamic function during mitosis has not yet been clearly determined. CENP-H contains a coiled-coil structure and a nuclear localisation signal. CENP-H is specifically and constitutively localised in kinetochores throughout the cell cycle. CENP-H may play a role in kinetochore organisation and function throughout the cell cycle [1]. This the C-terminus of the region, which is conserved from fungi to humans. 23.00 23.00 25.30 25.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.10 0.72 -3.86 16 119 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 108 0 88 130 1 106.20 27 46.14 CHANGED LShthhphppsppsspccLpslchcpLpl+ppsppllpElhphsccpcshcc....s.chpppLcplcuch+po+p+hcshpslhpulllGSGVNWAEDccLpslVLc.p ...........................................................................ls..h.p.tptptsh.ccL.tslchcplplpppspphhtclhplppcpcpppc.....................s.chppplcplcpphctp+pphpsl+plhp.ullluSGVsWAcDtpL+clVLc.......... 0 18 34 60 +5666 PF05838 Glyco_hydro_108 DUF847; Glycosyl hydrolase 108 Moxon SJ, Bateman A, Eberhardt R anon Pfam-B_8737 (release 8.0) Domain This family acts as a lysozyme (N-acetylmuramidase), EC:3.2.1.17. It contains a conserved EGGY motif near the N-terminus, the glutamic acid within this motif is essential for catalytic activity [1]. In bacteria, it may activate the secretion of large proteins via the breaking and rearrangement of the peptidoglycan layer during secretion [2,3]. It is frequently found at the N-terminus of proteins containing a C-terminal Pfam:PF09374 domain. 21.20 21.20 21.40 21.60 20.10 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.63 0.72 -3.86 20 820 2012-10-03 00:09:25 2003-04-07 12:59:11 7 13 534 9 151 587 217 85.40 39 42.35 CHANGED phhsplls+EG............GYsscPcD.GG.......s.....TsaGITttThcshuh............su-l+sLTc-pAh.sIYctcYWp+h+hDpls...tuluhplFDsuVNpG ....................................hhstllucEG............G..YV....sc..PpD.GG............s.......TpaGIT.sThpAhuh......................................tu-h+s..L.T..c..spAh...pIacppYW..psphDpls...tsluhplhDsuVNtG............... 0 33 91 125 +5667 PF05839 Apc13p Apc13p protein Wood V anon Wood V Family The anaphase-promoting complex (APC) is a conserved multi-subunit ubiquitin ligase required for the degradation of key cell cycle regulators Members of this family are components of the anaphase-promoting complex homologous to Apc13p [1]. 21.50 21.50 21.80 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.93 0.72 -4.01 7 186 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 177 0 130 157 0 81.00 31 56.67 CHANGED hDup...hphctsths.lhssapp...DpLP..-Dlh.lP.scL.shp..t--.hhPDpcAshGhp+totpp+..................EssWpDLuLt-Lhppushlss ..............................hDSp....hphpt.hhsll.-tWpp...Dp.LP.h-Dlh...lP...sp..h.................................P-tcts.Gh..s..+to.ppp.........................................E.tWpDLuLppLhpss....s........................................... 0 29 57 98 +5668 PF05840 Phage_GPA Bacteriophage replication gene A protein (GPA) Moxon SJ anon Pfam-B_8738 (release 8.0) Family This family consists of a group of bacteriophage replication gene A protein (GPA) like sequences from both viruses and bacteria. The members of this family are likely to be endonucleases [1,2,3]. 20.90 20.90 21.00 20.90 20.50 20.80 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.27 0.70 -5.75 11 1036 2012-10-02 18:54:06 2003-04-07 12:59:11 8 4 599 0 93 1026 15 329.10 36 50.68 CHANGED s-hcchAschAsthpthhpphs-.........p...h.tthhpsYphlAph.shshslp...Pth...........pt...tp.h.sslhRhhs-cWWhcpLpphththhEp..hshs.Vs+ptusYsSpcslp-hRtppptsh-alKuhslcsE-u..pphsLt-hhhtS.SNPthRRsEhMsRl+GhEchAcppGhhulFhTLT.APS+aHAhhcss....+.......W.s.As.Pp..-spcYLssla.shhRAchp+ttlphaGlRVsEPHHD..............GTsHWHhlhFhps..............ccpcplsplhpsa........Alc.DpcEht.cGhh.................psRh+uEhlcspKGoAsG.YlAKYIuKNIDuptltt........chsc-o.u+php-suts..VhuWsphaRIRQFpFaGhsshtsaRELR+hsst ....................................................................phpphus.hus.h......hh.pthh....................t.............p..h..h.p.haphlut...shthp...h....Pha........................tt........p.h.sulhRhhsscWWhcpLhh.phphpEt.hh.A.hs.V............s+..c..............t......osYsSpcslpchRtp+pt.sh-ah+uh.l.s--u...phsL.shh.to.uNP.hRRsEhMsphtGhEhhActcG.h.ulFhTlT.sPS+aH.u....s..h....p.......s..G.....t............psp.................Ws.s...ut...s+....sspcYLs.sha..uhh....Rtthp...c..t..s.......l.........p....h.......aGhRlsEPHH.D..............GTsHWHhhhFh+s.........................................cp.hcpl.....ss.lhcca..........Alc-..D.p.cE..htpss.........................................................tsRF+uEhl......cs........p...+Gs.ssu.YIAKYIuKNlD...ut.u.lst.....................phsc-o....G+sh......p-...o...scp......shuWAphaRlRQFp.h.Gh.s.s.htsaRELRplst.t............................................................ 0 3 38 69 +5669 PF05841 Apc15p Apc15p protein Wood V anon Wood V Family The anaphase-promoting complex (APC) is a conserved multi-subunit ubiquitin ligase required for the degradation of key cell cycle regulators Members of this family are components of the anaphase-promoting complex homologous to Apc15p Swiss:O94688 [1]. 23.80 23.20 24.40 24.40 23.70 23.10 hmmbuild --amino -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -3.39 27 114 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 113 0 84 115 0 115.10 25 31.83 CHANGED sapLWhs...................ssps.pssppppsssttspp.................shhpsss.ss.......tsh..tt..tsssplusLhhcEcthctRcpsItshGaoaI+PhGlsKTMhth+..ccc......tEpEct.tsu ......................................hpLa.s......................s....pstpppt...p.........................h.p.st.ss..t.s...h..tttt......hppspLspLhh-EpthcpRctsIpshGasWl+PhGlsKTMhph+....EEc................tEpcEt.t........................ 0 14 38 68 +5670 PF05842 Euplotes_phero Euplotes octocarinatus mating pheromone protein Moxon SJ anon Pfam-B_8825 (release 8.0) Family This family consists of several mating pheromone proteins from Euplotes octocarinatus. Cells of the ten mating types of the ciliate Euplotes octocarinatus communicate by pheromones before they enter conjugation. The pheromones induce homotypic pairing when applied to mating types that do not secrete the same pheromone(s). Heterotypic pairs (i.e., those between cells of different mating types) are formed only when both mating types in a mixture secrete a pheromone that the other does not. The genetics of mating types is based on four codominant mating type alleles, each allele determining production of a different pheromone. The pheromones not only induce pair formation but also attract cells [1]. 25.00 25.00 25.80 69.00 23.50 22.60 hmmbuild --amino -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.24 0.71 -3.81 6 8 2009-09-11 05:26:38 2003-04-07 12:59:11 6 1 1 0 0 9 0 134.00 47 88.96 CHANGED FKMTSKVNTKLQSQIQSKFQSKNKLASTFtTs.phK..s..-s.....hTGC.sTN..hC...u.sCusTsss.t.C.......sssGQNshDhha.hWhs.C..........hssYssCl..usssYthYSu...CG...C.sshs.ushtD....hhts.Chsa FKMTSKVNTKLQSQIQSKFQSKNKLASTFQTSSpLKstC..Dshpp..lTGC.sTN.ssC.hpu.sCSuTGsDp-hC.......ssVGQNllDhhFtpWus.C..........aNDYssClpaAspsYshYSusEhCGC.sshp.usapD....hh-uhCss.h 0 0 0 0 +5671 PF05843 Suf Suppressor of forked protein (Suf) Moxon SJ anon Pfam-B_8911 (release 8.0) Family This family consists of several eukaryotic suppressor of forked (Suf) like proteins. The Drosophila melanogaster Suppressor of forked [Su(f)] protein shares homology with the yeast RNA14 protein and the 77-kDa subunit of human cleavage stimulation factor, which are proteins involved in mRNA 3' end formation. This suggests a role for Su(f) in mRNA 3' end formation in Drosophila. The su(f) gene produces three transcripts; two of them are polyadenylated at the end of the transcription unit, and one is a truncated transcript, polyadenylated in intron 4. It is thought that su(f) plays a role in the regulation of poly(A) site utilisation and an important role of the GU-rich sequence for this regulation to occur [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.91 0.70 -4.67 30 583 2012-10-11 20:01:00 2003-04-07 12:59:11 9 42 284 15 447 699 16 211.40 22 22.92 CHANGED ohsalthM+shRRspG.......lpuuRplFpcAR.ccsclTaclYlAsALlEaassKDhshAtKIFElGhKhFss-spallcYL-aLIplNDssNsRsLFEpslsp..lssp....................................s+tlap+ahcYESpaG.-LsslhpLEcRhtphFP-.........-splphFscRYph.shsshpsp-ltt.tpphtsp.hh.............................................................spss.+Rslpp.s.p..................................................ststssss.sptt.pssstP............................lPpsIshLLshLPsspha..sus....hhssccLlcllp.psslP ..........................................................................................................................................................................................t...................t.R...hhtps...h...........................p.....pl..hh..t.h..Ahh.....Eahht...pc.....p............AhplFEh..uL....+..p.....asc...p...s....c...h..hh.tYl.-....a.l...h...c.h..s...-.....t.....s..p.................s..R...slF.ERs.l.sp....lsscp.........................................................................p.lap+alpaE....pph....G...cht....slh....p..l..c.p...Rhh..p..h..h.p..................................t..h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 173 257 377 +5672 PF05844 YopD YopD protein Moxon SJ anon Pfam-B_8937 (release 8.0) Family This family consists of several bacterial YopD like proteins. Virulent Yersinia species harbour a common plasmid that encodes essential virulence determinants (Yersinia outer proteins [Yops]), which are regulated by the extracellular stimuli Ca2+ and temperature. YopD is thought to be a possible transmembrane protein and contains an amphipathic alpha-helix in its carboxy terminus [1]. 25.00 25.00 27.20 27.20 21.70 21.10 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.89 0.70 -5.38 8 87 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 76 1 11 45 2 279.90 45 93.75 CHANGED MIsSDYsNusopsuslTEshsspsptsRossTsHEAAu.sppspuhppsslPtLs+PShshDsctVophsup.L-Soh-MMSLLFcLARpARE.Gl.QRDIENKhlIusQpAQVDEMRpGAKLMIAMAVVSGVMAusSAlhGuFShuKuuKsIKQ-KuLsuNIAGRppLIDsKh-thussGpp..sRttlG+lWpssQsu...DpsALpsLsKcF-pssu+tQlhNTVhQSlGQMuNSAVQVpQGcSQAcAKEDEVpATIuQsEKQKAEDsMSFNsNFMKDlLQLhQQYuQSQNQAWKAAFGVs ..................................................tshh......s.uup.sAhsoEsht.ps-h..+s.sTtppAtsltp..uuht+spsstL.pPppslss...shlpputu-.LsuoLolLhLLhclA+cAREhGl.QRDIENcAsIsAQKsQVsEMpsGApLMIAMAVVSGlhAusS..sVsuuhuhhKssKtlKQEps.LNsNIsGRcpLIDsKhpthustupp.ssRcplG+lW....pspQss...DpstLthhs+cFchpsupspshNushQ...slGQMANSA..lQVcQGhSQAcsKEcEVpAoIAtspKQKAE-sMsasssFMKDVLpLhpQYspScspAh+AAhGVV............................ 0 2 4 6 +5673 PF05845 PhnH Bacterial phosphonate metabolism protein (PhnH) Moxon SJ anon Pfam-B_9057 (release 8.0) Family This family consists of several bacterial PhnH sequences which are known to be involved in phosphonate metabolism [1,2]. 25.00 25.00 31.70 31.70 19.20 18.40 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.18 0.71 -4.83 73 670 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 649 1 125 414 89 188.20 48 96.50 CHANGED LtsGFs..........-sVtcuQ....psFRullcAhA+PGpltpl..........t...ss...Pssls.sAsuulhLTLhDt-TPl.......WLsss.hssssltsaLpFHsGuPlsspsppAsFAlhssspshs....sLspashGos-YPDcSsTLllplsuL.........ssG...........................slpLpGPGI..csptpluhs.LPsshhsth...............ptspsh..FPhGlDllLssGs.plhuLPRTT+lcs ................psuFh.PVpDAQ+oFRpLLKAMSEPGlIVsL.............................pphp.pu.......hpPLs.hAosuVLLTLsDsDTPV.......WLuss.ls.s-hVspsLRFHTsAPLVspPcpAsFAls....--uhust...........pLsuhusGTshsPEtuATLIlQVs.SL.........uGGc....................................hLRLoGsGI..t-cRhIAPp.LP-....shlccL...............spRsps..FPLGIDlILTCG-..+LlAlPRTT+VE.................... 0 22 65 91 +5674 PF05846 Chordopox_A15 Chordopoxvirus A15 protein Moxon SJ anon Pfam-B_9149 (release 8.0) Family This family consists of several Chordopoxvirus A15 like sequences. 21.10 21.10 25.20 24.60 18.70 18.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.05 0.72 -3.93 8 45 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 37 0 0 27 0 90.50 59 96.22 CHANGED MFVDDsTlIlYs+...WPoCLlssss+alsFPssN..ShTFcsh.hKIsp.shpSVLLlNPShlcLLKICVYl+RhpWcGcIhILFEpcNKPPPFRL .........................MFVDDNSLIIYST....WPSsLocooG+lIshPcN+..SaTFKEs.FKlD...........E.SlKSILLVNPSuIDLLKIpVYh+RIKWhGcIalLFEpENhPPPFRL.. 0 0 0 0 +5675 PF05847 Baculo_LEF-3 Nucleopolyhedrovirus late expression factor 3 (LEF-3) Moxon SJ, Mistry J, Carstens EB anon Pfam-B_9292 (release 8.0) Family This family consists of LEF-3 Nucleopolyhedrovirus late expression factor 3 (LEF-3) sequences which are known to be ssDNA-binding proteins [1]. Alkaline nuclease (AN) and LEF-3 may participate in homologous recombination of the baculovirus genome in a manner similar to that of exonuclease (Redalpha) and DNA-binding protein (Redbeta) of the Red-mediated homologous recombination system of bacteriophage lambda [2].\ LEF-3 is essential for transporting the putative baculovirus helicase protein P143 into the nucleus where they function together during viral DNA replication [3]. LEF-3 and other proteins have been shown to bind to closely linked sites on viral chromatin in vivo, suggesting that they may form part of the baculovirus replisome [3]. 25.00 25.00 44.40 74.10 19.30 18.90 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.20 0.70 -5.54 19 45 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 42 0 0 46 0 347.40 34 89.27 CHANGED pcctssss.uupsu..phKcshKpVsG.cLlsKshlSlsNchaY..hF+FLlDNpscsYYGstppFpsLh.spsYclsLsa.cp......+l.Isca.cpspsh-psls.VKp..hLptpDF-s--hVsVlAKL+hGFKhlss.ssYKhVFplNhtcstsssp...lhQVEChANhKplusshKs..pshpsts-LhcaahcspsphFsLhRVKC.Qposts...FhNashpshTplEhs.ppsspsh.s.sp.sphs.NISRuNK+lhptpls.phpsEp....pupc+FolpaphtDp.......schhKusaYlcspssppts...ppt...................splpKLphDlsQLsshlpcslhcshIYVssDssss...shNlL.GLTKaDhDs.ppYpsl .................t..ph.stss.s.sssp..ph+cphKpVsG.pLlsKshhSINNEsaY..hF+FLl-NtscsYYGssppFpshc.spsY-lsLsYscp......+lhIsca.cpspsh-phls.l+c..hlptp-F-s--sVoVlAKhKaGFKhlsu.shYKhVF.lshtss.tssss...ssQVEChuNhK+lssshKs..cslpsps-LhcahhcspsphFsLaRlKCQposss....aKNasltshTplEhs.ppsspsh.stpp.sshs.NISRuNK+lhptpls.plpsEp....pus-RFslpaphp-p..............schlKusaYlcsppspptp....ps...................splpKLpsDlNQLssLIpssll+VhIYVssDssss...NhNVL.GLTKa-lDs.spYpsl....... 0 0 0 0 +5676 PF05848 CtsR Firmicute transcriptional repressor of class III stress genes (CtsR) Moxon SJ anon Pfam-B_9312 (release 8.0) Family This family consists of several Firmicute transcriptional repressor of class III stress genes (CtsR) proteins. CtsR of L. monocytogenes negatively regulates the clpC, clpP and clpE genes belonging to the CtsR regulon [1]. 21.90 21.90 22.20 26.70 21.10 21.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.87 0.71 -4.25 48 1181 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 1166 2 147 497 0 148.80 48 97.41 CHANGED Mp.NlSDlIEpYlKplLppop...hlEI+RuElAspFpCVPSQINYVIsTRFThc+GYlVESKRGGGGYIRIhKlphscpt.pllpplhptI.GssIoppsApslIppLhE-plIocREupLhhuslscpsLs.hthstcsplRAclL+uhLppLph .....+NhSDlIEpYIKplL-pS.s.........hlEIpRuplAspFpCVPSQINYVIpTRFT.p+GYlVESKRGGGGYIRIsK..lchp-.p.p...ph....lppl...l...p..hI.G.p......p.lSQ.ptupslIptLl-cplITcREupLlhus.ls......c......c.....sLu...........pcshlRAplL+plLpplp.h...................................................................................... 0 60 102 126 +5677 PF05849 L-fibroin Fibroin light chain (L-fibroin) Moxon SJ anon Pfam-B_9321 (release 8.0) Family This family consists of several moth fibroin light chain (L-fibroin) proteins. Fibroin of the silkworm, Bombyx mori, is secreted into the lumen of posterior silk gland (PSG) from the surrounding PSG cells as a molecular complex consisting of a heavy (H)-chain of approximately 350 kDa, a light (L)-chain of 25 kDa and a P25 of about 27 kDa. The H- and L-chains are disulfide-linked but P25 is associated with the H-L complex by non-covalent force [1]. 25.00 25.00 46.80 45.70 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.45 0.70 -5.03 5 31 2009-09-10 22:28:38 2003-04-07 12:59:11 6 1 12 0 2 33 0 225.20 41 90.96 CHANGED PSVTIsQYS-NEIPPclDNGK..SSulI-RAFDlV.DGGDsNIYILNlQQILNDhAsQGDutSQApAVAQTlAIlu-LSuGIPGDACAAAsVlNAYosuVRSGNsuuhRpALus..aIp+luoNlsLIsQLspN............PsSlRYSsGPuGsCuGGGRSYsFEAAWDuVLusucsh.SSLlNEEYClAKRLYNAFNlRSNNlGAAITAsulssVspVspplhuolsslLRulANGGNAsGAAAsApstLsNAA ..................p-ls.ph-.Gp..spsllspAaplV.Dsu-hsIahLslpphlhD.hAsQsD.ssSQuhAluQThGIluELouslsGDuCutupllsSYsshhp.oG.NpAs..hppAlss..YssplspslstlshLhpN............PstlR.psus.st..uCuGG.GRuYsaEtsWD.hLusus.....t.sLlNEphChu+RLYsuhstRSNsluAAhsAussssspQlhctshspIsshLpssssGssssshstshpptlspu..................................... 0 1 2 2 +5679 PF05851 Lentivirus_VIF Lentivirus virion infectivity factor (VIF) Moxon SJ anon Pfam-B_9439 (release 8.0) Family This family consists of several feline specific Lentivirus virion infectivity factor (VIF) proteins. VIF is essential for productive FIV infection of host target cells in vitro [1]. 25.00 25.00 210.60 210.30 19.90 19.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.77 0.70 -5.69 4 25 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 5 0 0 26 0 235.20 54 97.13 CHANGED huDEDWQVSRpLFsVLQGGl+sAMLYIosl.schEpt+hK+shKKRLtchEstFIhpLR+AEGI+WSFHTRDYHlGaV+EhVAGoo.PsuLRLYVYISNPLWHppYRPsL.saNpEWPaVNhWIpstFMWDDIEsQpIhpuscsu.GWsPGMlGlVIKAFSCsE+Kh-hTPs.lIRGEIDPpcaCGDCWNLhClRNSPPsoLQRLAMLApG+.scSW+GCCNpRFlSPaRTPsDL.VlQspssaphLaphcL .........................................M.Ylpc...ppppEpphKpFKc+LuhpEh+WIR+LRhsEGILWSFHTREWHsshV+ELVAGTG...sLKLYCYISpPlW+.RYRPTl.cWNppWPYuNlWlT-tFMW-.IppppIhhsGcVostaPPGaIuLllKAYoCpp+K+DlThtcIIhG-hc.pKWCuDCWsLIllRNTPPhTLQRLAhLALGRKlhsWhCKssaRFhpsRhTPLDpcIl.sssspEsLWh...h........ 0 0 0 0 +5680 PF05852 DUF848 Gammaherpesvirus protein of unknown function (DUF848) Moxon SJ anon Pfam-B_9475 (release 8.0) Family This family consists of several uncharacterised proteins from the Gammaherpesvirinae. 24.30 24.30 25.10 28.60 24.20 24.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.81 0.71 -4.35 13 27 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 22 0 0 24 0 140.60 32 96.32 CHANGED pssspK-hltpsLEuslNK+suVSlhDRFGpssuLFcpQappTpcul+stpph+cppclcshlsslcspIpp+p+ElutLpp.hsh+KlschEcLsD+lpEL+--lchEL-slpt......supp-shsupssplc-sIhpWRLEsLPcVPs .....ssspK-hltpsLEuslNK+suVSlhDRFGtssslFptQappspcul+shpph+cpppltshlsslcpplpppppElshLpt.hsh++lpchEtLp-clt-L+--lphcl-tlp.......ttpp-shssppsp.c-sIhpWRL-pLPpVs... 0 0 0 0 +5681 PF05853 DUF849 Prokaryotic protein of unknown function (DUF849) Moxon SJ anon Pfam-B_9059 (release 8.0) Family This family consists of several hypothetical prokaryotic proteins with no known function. 23.20 23.20 23.30 23.20 22.90 23.10 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.60 0.70 -5.51 131 1281 2012-10-02 01:07:48 2003-04-07 12:59:11 7 8 668 31 447 1226 1571 267.90 29 87.36 CHANGED +lIITsAlTGuhpT.spsP.tlPlTPcEIAcsAlsAhcAGAullHlHsR...pscsGp...P.op-schapchlstI+pt..s-.hllslT...TGGu................sshsh...c-Rlthltth......pP.EhsoLshGohNFu...........................-hlapNo.s.lcphhpphp.ptGl+PEhEsaDsGc.lhphtphlcc.Gll.c..sPhhlphVh..Glh....uGh.sucscslh...hhhpphsc......shp.W.........oshuhG.+pQhPhsshuhhhGGpV.RVGLEDslahs+GpLA..oNAphVc+ssplscphGtclATPsEARphLuL ..................................................................................................................llITsAlsGuhto.spsP.tlPlTPcElupsAhpuhcAGAullHlH..sR.......sp.s.Gp.................s.ot-sshapchlptI+pt....s....s-.hl.lp.lo...oGuu.......................sths.......ppRh.thlpth........tP-hsoLshGohNas...........................shla.ss.s.lcphhpthp.pt.Gl+sEhEhaDhup..lht.h.tp.h.............hcc..Gll..p.....s..P.h.h.hphlh.......Gl..........sGh..sssspslh......h.h.h..pphsp.............sst.W......................................sshuhG..+pp..h..s..h..sshuhhhGG.p.l.RVGLEDslah...s...c.....G.p....hA.....oNuphVccshplhcth.utplAo.stEARphLtl................................................................. 0 95 238 346 +5682 PF05854 MC1 Non-histone chromosomal protein MC1 Moxon SJ anon Pfam-B_9146 (release 8.0) Family This family consists of archaeal chromosomal protein MC1 sequences which protect DNA against thermal denaturation [1]. 20.50 20.50 21.00 20.90 19.90 19.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.16 0.72 -4.02 5 61 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 47 2 35 62 51 93.70 51 83.19 CHANGED ssKRNFALRDcDGNEhGVFSGKQPRQAALKAANR....Gco-.cAs.s.IRLRERGTKKVHIFcGW+hcVcAPcs+PsWMPscIoKPNVKKpGIEKLE ...........t.sKRNFsLR-pcGpE..huVFoGppPRQAALKAApR........Gpss..tt..t....IRLRE..+GT.cKVHlacGWthp.sAPc..sp.P.s..WMP..s.......cI...sKspVpKpGlE+l-......................... 0 6 26 33 +5684 PF05856 ARPC4 ARP2/3 complex 20 kDa subunit (ARPC4) Moxon SJ anon Pfam-B_9272 (release 8.0) Family This family consists of several eukaryotic ARP2/3 complex 20 kDa subunit (P20-ARC) proteins. The Arp2/3 protein complex has been implicated in the control of actin polymerisation in cells. The human complex consists of seven subunits which include the actin related proteins Arp2 and Arp3 it has been suggested that the complex promotes actin assembly in lamellipodia and may participate in lamellipodial protrusion [1]. 22.20 22.20 23.80 23.30 21.90 22.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.93 0.71 -4.87 21 366 2009-01-15 18:05:59 2003-04-07 12:59:11 7 5 294 15 220 305 4 158.30 66 89.93 CHANGED MusslpPYLsAVRpTLpAALCLpsFsSplVERHNKPEVEl........s.........sS..tEL........lLsPlhluRs-pE..+sLIEsSlNSlRlSltlKpu....DEl-cILs+KFs+FhtpRA-s...FhILRRKPlp...............GYDISFLITNh................HoEpMh+pKLVDFIIpFhp-lDKEIS-hKLslNuRARhsAcpaLppF ......................................MotoLRPYLssVRsoLpAALCL...........pNFuSQs..VERH....N+PEVEl........p...................oS...EL........LLpPlhIuRNE..........pE............+VLIEsSINSVRlSItlKQA....DEIE+ILsHKFhRFhh.RAEs...FhILRRKPlc...............GYDISFLITNa................HTEpMhKHKLVDFlIpFME..ElDKEISEMKL.lNARARhVAEpFLp.......................... 0 72 122 181 +5685 PF05857 TraX TraX protein Moxon SJ anon Pfam-B_9375 (release 8.0) Family This family consists of several bacterial TraX proteins. TraX is responsible for the amino-terminal acetylation of F-pilin subunits [1,2]. 22.10 22.10 22.10 22.50 21.90 22.00 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.72 0.70 -4.70 27 1213 2012-10-02 17:00:17 2003-04-07 12:59:11 6 5 883 0 132 947 36 217.00 23 91.75 CHANGED sst-hLKhlAllhMlhDHls...hlhhsshsh................hhh......lGRlAFPlFshllAhNht+.......stsht+hhtRLhhFullupssahhhss...........shhshNllFThhlshhslhhlcptphhthh....................hhhslhhhlshhshs-YshsG.......................lhhslshahhhpp..........................s.hhshslhhhshshhh...........tphhAhhslsllslh...........ststhph.hh+hhFYhaYPsHLhlLhllth ..................................................h..tthlKhlAhl.hMllDHls.....hh....h....h..p.......h...............................hth......................lGRhuhPlFsahhs..sh.+.......T+sppcahhRLhhaul...ls...phs...h.hlhsh..............................s..h.h.t..tNlh.h.o....l..hluhhhlh.h....h....c....t...hp...t...h..tth.........................................hh.hhlhh.hh......h..h....h.s.....sau..hs.u..........................................ll.h...h...hlh...ahhhcp.....................................h..phh...h.hs.s....hhhhhh.shshh............................................stth.h.h.hh.sh.sh.lhhh......................t.tp.u.hp.t.h.h..+.hhFYhFYPsHLhllsllt.h............................................. 1 41 72 102 +5686 PF05858 BIV_Env Bovine immunodeficiency virus surface protein (SU) Moxon SJ, Bateman A anon Pfam-B_9413 (release 8.0) Family The bovine lentivirus also known as the bovine immunodeficiency-like virus (BIV) has conserved and hypervariable regions in the surface envelope gene [1]. This family corresponds to the SU surface protein. 19.90 19.90 20.80 27.80 19.20 19.80 hmmbuild -o /dev/null HMM SEED 548 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.94 0.70 -6.55 4 54 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 9 0 0 73 0 176.00 30 83.22 CHANGED MDQDLDRsERGEtcGcS-EhR.LlQE-lDcGRLTs+EALpsWh..NNGEIHPWVLsGMhShGVGML..hGVYCplPssllWlLlhQLCIYhulGETSRcLDssSWpWVRuVhIluILGTLoMAGTsLA-ss.u.................ol..NIT..shp...DT-.....Ph......hLhhLhLssILGlLGlIlshRRSNstsILuARDslDWWLSANpEIP.KFshPIILISSPLAGhhGaaVMc+a.chhctGCQhCGSlS.MWGMLL.EIGRhLs+REWsVSRlhVILhISFSWGMahs+..VpAptpHlAMVTSPPGYRIVNDTScAPWFCFSsAPIPoCpSSpWGsKYapEKlNpTlVcQlhcptEtHoRAoWIE.PDLFEEVlYELALLSANuS...........hQV+ssNsTDlCsopNSopssspTM...ThLcLRtplSsTWlsNSSLQFsVHWPaVLlGhNsSp.ostsaNsssWIATNCMDPIpLNcSp.........c-L.KNa.sRsloCVsu..shophs.tpsTLCGaNTsCLpFGp+uhSTNSLlLCQ+Nsh......sNcpFaSLSHSFSKQASt+WILVKVPSYGFVVVNDTtsP .................................................................huhulGhl..hGlhh.lstshhhhLh.pl.slhhuhGEssRpl......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +5687 PF05859 Mis12 Mis12 protein Wood V anon Wood V Family Kinetochores are the chromosomal sites for spindle interaction and play a vital role in chromosome segregation. Fission yeast kinetochore protein Mis12, is required for correct spindle morphogenesis, determining metaphase spindle length [1]. Thirty-five to sixty percent extension of metaphase spindle length takes place in Mis12 mutants [1]. It has been shown that Mis12 genetically interacts with Mal2, another inner centromere core complex protein in S. pombe [3]. 25.00 25.00 25.20 26.80 24.60 23.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.85 0.71 -4.38 18 240 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 205 0 163 219 0 146.10 27 49.73 CHANGED hlT-ahGasPhohls-lINAls-hhacshsAhEpslhc+hph.....................Gschhs.........pEIcpGotKlcsLLcsplDctF-KhElalLRslhslP.stLL.cshh+.ht+psphphpp........ppptcs-hphcpt.hp-LEpphplptpLccp .............................................h.scahuasP......ohls-llNuls-h.lac.shsulEpsLhpp.st.........................................ssp.hs...............-IcpGs.pplcsllcsplD+tF-+aEhasLRslhsl......P...stL.....l....t.shh+.hpphp.phshpt.............................ppptch-.phpph...hpcLppptphpt.Lpp............................................................ 0 45 83 128 +5688 PF05860 Haemagg_act haemagglutination activity domain Yeats C anon Yeats C Domain This domain is suggested to be a carbohydrate- dependent haemagglutination activity site ([1]). It is found in a range of haemagglutinins and haemolysins. 19.60 19.60 19.90 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.44 0.71 -4.36 57 2008 2012-10-02 14:50:22 2003-04-07 12:59:11 8 357 848 3 480 2049 208 124.30 28 7.14 CHANGED spsssssspsssst....sssshslsshshsssuuhshpsappFsls......psuslh.s..p....................sssspsIlNcV..susssSpIpGhlc.s.t......spAslhlhNPNGIhhsusuplNsushhlsTsssshpts ...................................ss..............ht.......sss.sh..s...hls....Ish.s......ss.u.u.huhspa.p.pFsVs..........ppu...slh...N...p.............................sssupsILNcV......s..u....s..............s...sSp..lpGhlc.lhG.......tpA..pVhlsNP.....sGIsh.susuh.l.N.s.sphsloTupspht..s......................... 0 60 245 377 +5689 PF05861 PhnI Bacterial phosphonate metabolism protein (PhnI) Moxon SJ anon Pfam-B_9004 (release 8.0) Family This family consists of several Proteobacterial phosphonate metabolism protein (PhnI) sequences. Bacteria that use phosphonates as a phosphorus source must be able to break the stable carbon-phosphorus bond. In Escherichia coli phosphonates are broken down by a C-P lyase that has a broad substrate specificity. The genes for phosphonate uptake and degradation in E. coli are organised in an operon of 14 genes, named phnC to phnP. Three gene products (PhnC, PhnD and PhnE) comprise a binding protein-dependent phosphonate transporter, which also transports phosphate, phosphite, and certain phosphate esters such as phosphoserine; two gene products (PhnF and PhnO) may have a role in gene regulation; and nine gene products (PhnG, PhnH, PhnI, PhnJ, PhnK, PhnL, PhnM, PhnN, and PhnP) probably comprise a membrane-associated C-P lyase enzyme complex [1]. 22.70 22.70 22.90 52.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.10 0.70 -5.93 46 678 2009-01-15 18:05:59 2003-04-07 12:59:11 7 3 652 0 125 469 126 346.70 67 97.64 CHANGED MYVAVKGGEcAIpsAapLLsppRRGD....suhspLolsQIppQLsLuVsRVMoEGSLYD.-LAALAlKQApGDhlEAlFLLRAYRTTLPRhuhotPl-TusMtlcRRISAsFKDlPGGQlLGPTaDYTHRLLDFsLhuE...........................................stsPps..................................tscshscs...hP...+Vh-hLppEGLlpsp.........................s....sss.....p.ssDlTREPLsFPssRutRLQsLARGDEGFLLALuYSTQRGYG.cs..HPFsGElRhGpVsVplss.EL.......GFslplG-lplTECp.hVNtFpGsts.....psPpFTRGYGLsFGpsERKAhuMALlDRAL...pstEhsEss..uPAQDpEFVLuHsDNV-AsGFVpHLKLPHYVDFQuEL-LlR+hRpctspt ...........................MYVAVKGGEKAI-AAHtL.-scRRGD....sslPELSVuQIcQQLsLAVDRVMTEGulhDRELAALAlKQAuGD.VEAIFLLRAYRTTLs+lusScPlDTspMRLERRISAsYKDIPGGQLLGPTYDYTHRLLD.FoLLAs.........................GEsPshs..............................................................ss-uptpssP+VhuLLu+pGLhchE..............................p...DsG.......upPsDITRpP.saP....s....oRouRLQpLhRGDEGaLLALAYSTQRGYG.RN..H.PFAGEIRsGhl-VpI....sPEEL.......GFAVslGElhhTECE...M.VNtF.lssss.............EPP+FTRGYGLVFGhSERKAMA..MALVDRAL...pAsEaGEcs..s.uPAQDEEFVLuHuDNVEAuGFVSHLKLPHYVDFQAELELL+RLppEps.pu................ 0 22 66 91 +5690 PF05862 IceA2 Helicobacter pylori IceA2 protein Moxon SJ anon Pfam-B_9436 (release 8.0) Family This family consists of several Helicobacter pylori specific IceA2 proteins. The function of this family is unknown. 25.00 25.00 57.10 26.30 23.20 16.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.85 0.72 -4.26 2 35 2009-01-15 18:05:59 2003-04-07 12:59:11 6 2 20 0 0 32 0 51.90 64 104.66 CHANGED MAlVlKVVNGKIQEaENG.aKRTYsSNhlsspssGtlVAssTuKGKVEc.........h ......MAlVlKVVNGKIQEYENGsaKRTY.GSNsVsVpluGuIVAssTuKGKVEEYcNG............... 0 0 0 0 +5692 PF05864 Chordopox_RPO7 Chordopoxvirus DNA-directed RNA polymerase 7 kDa polypeptide (RPO7) Moxon SJ anon Pfam-B_9596 (release 8.0) Family This family consists of several Chordopoxvirus DNA-directed RNA polymerase 7 kDa polypeptide sequences. DNA-dependent RNA polymerase catalyses the transcription of DNA into RNA. 25.60 25.60 25.70 107.00 25.50 25.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.22 0.72 -3.88 7 45 2009-01-15 18:05:59 2003-04-07 12:59:11 7 1 40 0 0 20 0 63.00 78 99.96 CHANGED MVF.LVCSTCGRDLSEtRY+LlIcctpLKcVLtslpp.CCRLKLSTQIEP.RNLTVpPhLDIN MVFQLVCSTCG+DIScERY+LII++coLKcVLsoVKNpCCRLKLSTQIEPQRNLTVQPLLDIN... 0 0 0 0 +5693 PF05865 Cypo_polyhedrin Cypovirus polyhedrin protein Moxon SJ anon Pfam-B_9652 (release 8.0) Family This family consists of several Cypovirus polyhedrin protein. Polyhedrin is known to form a crystalline matrix (polyhedra) in infected insect cells [1]. 25.00 25.00 499.30 499.10 21.50 17.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.58 0.70 -5.16 2 14 2009-09-11 12:22:05 2003-04-07 12:59:11 6 1 4 3 0 17 0 247.00 95 99.77 CHANGED MADVAGTSNRDFRGREQRLFNSEQYNYNNSLNGEVSVWVYAYYSDGSVLVINKNSQYKVGISETFKALKEYREGQ+NDSYDEYEVNQSIYYPNGGDA+KFHSNAKPRAIQIIFSPSVNVRTIKMAKGNuVSVPD-YLQRSHPWEATGIKYRKIKRDGEIVGYSHYFELPHEYNSISLAVSGVHKNPSSYNVGSAHNVMDVFQSCD.AL+FCNRYWAELELVNHYISPNAYPYLDINNHSYGVALSN+Q MADVAGTSNRDFRGREQRLFNSEQYNYNNSLNGEVSVWVYAYYSDGSVLVINKNSQYKVGISETFKALKEYREGQ+NDSYDEYEVNQSIYYPNGGDA+KFHSNAKPRAIQIIFSPSVNVRTIKMAKGNuVSVPD-YLQRSHPWEATGIKYRKIKRDGEIVGYSHYFELPHEYNSISLAVSGVHKNPSSYNVGSAHNVMDVFQSCDLALRFCNRYWAELELVNHYISPNAYPYLDINNHSYGVALSN+Q 0 0 0 0 +5694 PF05866 RusA Endodeoxyribonuclease RusA Moxon SJ anon Pfam-B_8996 (release 8.0) Family This family consists of several bacterial and phage Holliday junction resolvase (RusA) like proteins. The RusA protein of Escherichia coli is an endonuclease that can resolve Holliday intermediates and correct the defects in genetic recombination and DNA repair associated with inactivation of RuvAB or RuvC [1,2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.94 0.71 -3.70 96 2230 2009-01-15 18:05:59 2003-04-07 12:59:11 6 7 1256 7 191 1349 208 112.50 30 86.93 CHANGED ls.h..........P..psptp.......R.hs.......sthh.s.pcsppa+..ptlthhhtpthhth....h..p...........h..lplphhh.hscph....h...........................DlDNh...hKslhDuls..........th................lapDDsQ..lschpsp.phhspp.....s+ltlplppl .............................................................sh.sP...sssshh........R.hp..........tsphahs.ptspca.+.p.t.l.t.h...hlppphh...chs.....h..................................lplc.hhh...sscph........p.................................................DLDNl....hKAshDALs...................+u.........ulhh.DDpQ.....lschp.l..h...ht.....hs.s................s+ltlpIpc.......................................... 0 62 115 159 +5695 PF05867 DUF851 Protein of unknown function (DUF851) Moxon SJ, Coggill P anon Pfam-B_9669 (release 8.0), Jackhmmer:Q9N4S5 Family \N 25.00 25.00 25.50 25.50 21.80 21.20 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.63 0.70 -5.13 7 28 2009-01-15 18:05:59 2003-04-07 12:59:11 6 4 8 0 28 23 0 198.80 29 48.97 CHANGED splscspNlEVpsR-VD-sphERVhcKapcEK+pssphsts...lDEKSKlLMDRV.sKKPa.hK..pp-pG.hLhDE.SoFYppsss..KK+.csSh...-DoYsphPKLtDVpKhsspNVascp...GVPFWAVplEPsEEDhpss-.sIoVGoEHlEhY+sKplsLpTI.sTKLhLsElQPLs-LhKRD-lHFsPcLVFSNTlRSLlpspth-....tc.+p...........psKs--ptcst...+lpF-ppcscpa.hYpRsN ...........................pspplplpppplDptphc+lhcKhpppKtp.........s.........lD-Ks+lLhDRl.sKKPa..K.....ppppt.hh..h.D-.o..sFac.p...........pKh.............ptp....-DsYs.hPKLtsVhKhst.psla.pts....s.V.PFWAh..hhc.PsEEDh..............ps.s.-.ssIsVso-HlEhapp+clsLpo.h..ppp.lhhs.atPhs.LhcRDchaFpscllFSNTlRohlpht................p............pt...cptt......plph........h..Y.............................. 0 11 15 28 +5696 PF05868 Rotavirus_VP7 Rotavirus major outer capsid protein VP7 Moxon SJ anon Pfam-B_9690 (release 8.0) Family This family consists of several Rotavirus major outer capsid protein VP7 sequences. The rotavirus capsid is composed of three concentric protein layers. Proteins VP4 and VP7 comprise the outer layer. VP4 forms spikes and is the viral attachment protein. VP7 is a glycoprotein and the major constituent of the outer protein layer [1]. 24.00 24.00 24.10 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.83 0.70 -5.21 3 112 2012-10-01 19:08:57 2003-04-07 12:59:11 6 1 9 0 0 90 0 217.90 65 99.92 CHANGED MshLLLLVlAAsAsAQLsIlPpocPEICVLaADDathDsNpFsGNFTNIF+oYNSVTLSFaoYcSosYDVIDIISKcDhSSCsILAIDVscuoMDFNTFLQSsNECoKYAAsKlHYlKLPRsEEWFuYSKNLSFCPLSDSLIGlYCDTQLssTYFslScuusYDVTDIPEFTEMGYVFHSND-FYIC+RIS-csWlNYHLFYR-YusSGTVS+pVNWGNVWSGFKTFAQVVYKILDIFFNsKRNlEPRA ...............................lhP.spPElClLaA.sDh..ps..spastNFTpIFcSYNsVTlShhsYsSsNYDV..IDI..LS+hDa..StCp...ILAIDVhcPpM.DFl...oFLQSsNpCSKYuupKIHY.KLsps-EWFVY..SKNLKFCPLSDsLIGhYCDTQlssTYFsLSss.p+.Y-VTDlPEFTphGYsFaS.s.-sFYlC+RloEctWlNhHhFY..R..c.ssSGsluptIsWsNVWoshpTFAQhlYKILDlFFN.spRshEPRA............................ 0 0 0 0 +5697 PF05869 Dam DNA N-6-adenine-methyltransferase (Dam) Moxon SJ anon Pfam-B_9691 (release 8.0) Family This family consists of several bacterial and phage DNA N-6-adenine-methyltransferase (Dam) like sequences [1]. 27.10 27.10 27.20 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.41 0.71 -4.84 17 691 2009-01-15 18:05:59 2003-04-07 12:59:11 6 13 478 0 57 456 132 166.20 39 83.69 CHANGED .ptopp.cpstDpWpTPptlFhulsuhFG.....asLDssusccNAhCspaaTtE-NuLppDWspc......Gsh.FhNPPYS+............hp.alc+Ah-ppt+usphVhhlhsusspsh..Wacc....ADcltaIp...............GRluF..s...sstptpstssp...uuslhIac.h..htstthohls+ppLhtpGp.hht.h.tt ....................................s...upp.tc.spDpWpTP..lFtulsh.FG.....FhLDss..u...s....s.c...N...A....h.C..s..t..ahT........t......p...D.......sA......Lsp-.Wsp.+........................GAl.asNPPYSp.................................hc.alc+A..uptp..pt...t.....p..h.Vhh.lhsts.....sV.h..Whpc.h..h.............sDc...lt.h.Ip........................G.RIsF..P.....hsh-.Kpssot.....uuhlhlac.h....t.hhohls+stL.shGt.........t.......................................... 0 14 31 43 +5698 PF05870 PA_decarbox Phenolic acid decarboxylase (PAD) Moxon SJ anon Pfam-B_9737 (release 8.0) Family This family consists of several bacterial phenolic acid decarboxylase proteins. Phenolic acids, also called substituted cinnamic acids, are important lignin-related aromatic acids and natural constituents of plant cell walls. These acids (particularly ferulic, p-coumaric, and caffeic acids) bind the complex lignin polymer to the hemicellulose and cellulose in plants. The Phenolic acid decarboxylase (PAD) gene (pad) is transcriptionally regulated by p-coumaric, ferulic, or caffeic acid; these three acids are the three substrates of PAD [1]. 24.60 24.60 24.80 29.40 23.80 24.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.05 0.71 -4.76 8 316 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 298 19 83 233 0 152.10 50 91.04 CHANGED LssFlGKHFIYTYDNGWcYElYlKN-+TIDYRIHSGhVGGRWVKDQcVaIV+LuculYKlSWTEPTGTDVSLshs.s-++lHGsIFFP+WVt-cPE+TVCFQNDHIsLMcpYREtGPTYPphVVsEFAoITalcDsG.sN-sVIAsAP.u-Ls--assshp ......................LpsFlGpHhIYTY....D........NG....WcYEhYlKN-pTlDYRIHuGhVuGRWV.KDQpspIVpl.s.culYKloWTEPTGTsVuLsh..s-+hlHGsIFFP..+..Wlh...-cPEhTVCaQN..-HlslMct.R-th.sYPphVlsEFAsITahtcsGhsN-pVIspsP.sthspDh.ts.h............................................... 0 16 40 61 +5699 PF05871 ESCRT-II DUF852; ESCRT-II complex subunit Moxon SJ, Wood V, Mistry J anon Pfam-B_9765 (release 8.0) Family This family of conserved eukaryotic proteins are subunits of the endosome associated complex ESCRT-II which recruits transport machinery for protein sorting at the multivesicular body (MVB) [1]. This protein complex transiently associates with the endosomal membrane and thereby initiates the formation of ESCRT-III, a membrane-associated protein complex that functions immediately downstream of ESCRT-II during sorting of MVB cargo. ESCRT-II in turn functions downstream of ESCRT-I, a protein complex that binds to ubiquitinated endosomal cargo [1]. 21.00 21.00 22.60 22.40 18.50 18.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.63 0.71 -4.25 25 310 2009-01-15 18:05:59 2003-04-07 12:59:11 7 9 278 16 218 304 1 140.00 35 58.95 CHANGED .hYsFPPFFTlQPNssTRppQLssWssLlLsYC+ap+lapLs....l.......h-s...s.............s.sLFpNps..IpR........+Lu.ptlchllsplhcp....u...pAEal..s............ps+o.....................phhIhWRps-EWusllhpWlpcsGppsoVhTlYELspGDpTp.spEFHsL-ps .................YsFPPFFT......l..........QPs...h.....sTRppQlptWss.......LlLs.Ys....+.pp+hapls.........l.......-t......p......................................s.sLFpNpp....IpR............+Ls.-shphllctltcp.........G...........psEal...c............ps+s............................................................phhIhW+p..s-EWu.sllhpWlpcsG.........ppsoVhTlYELspG.-sTt.....spEaash-..s....................................................... 1 77 121 179 +5700 PF05872 DUF853 Bacterial protein of unknown function (DUF853) Moxon SJ anon Pfam-B_9798 (release 8.0) Family This family consists of several bacterial proteins of unknown function. Swiss:Q8YFZ2 is thought to be an ATPase. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.68 0.70 -6.15 9 1311 2012-10-05 12:31:09 2003-04-07 12:59:11 7 6 1266 0 310 1381 194 478.50 52 96.43 CHANGED pshhssssssp.lhlthphuNRHGLIsGATGTGKTVTLQhLAEuhSsAGVPVFhADVKGDLSGlutsGpss-KlttRhtphGls.sap.pAhPVhhWDlaGc.pGHPlRsTISEMGPLLLuRLLsLNDsQpGVLNlsF+lAD-cGLLLLDLKDLpAlLpal.uDpAcphsspYGNlosASlGAIQRsLLsLEpQGAcpFFGEPALclpDlMRTc.ssGRGsIslLuA-KLhp.sP+LYuTFLLWLLSELFEpLPEVGDsDKPKLVFFFDEAHLLFsDAPKsLl-+VEQVVRLIRSKGVGVYFVTQNPhDlP-sVLuQLGNRlQHALRAaTP+-QKAVKsAAcTFRsNPsFsstpsITpLGsGEALVSsL--KGsPshVpRshlhPPtSRlGPlospERstllpsSPltGhY-pslDR-SAYEhLst.+sutuscttppshsttpt..............................psuhhGshpshthus...pst.R..ushppslsEuhsKSssRolssQluRplVRGlLGoL....+R .......................................................................................................ts....tpsspplhLh...thANRHG..LIsGATGTGKTV.T.LQ.h....LAEuh...S.ch.G...VP.VFhAD.VKG..DL.o..GlA.p.s...G....s....s....s....-.K....l....h....t....R.hp..p.l.....Gls...-...apspA..PV...hhWDla....GE........pGHPVRAT...................l...........S..-hGPLLLuRLLsL.....N.....DsQpGVLsllF.+lAD...-......p...G.....L..........LLLD........hKDLRAll.......pal........u.......-..s..u..............+..............p........h.p.sp..YG...NlSsA......SlGAIQRuLL....sLEpQGA.s....pFF..G.E.....P.hL-l.pDhM...RTD....ss....G...+GlINlLuA..-+Lhp..tP.+LYu..sF.LLWhLS....EL.a.E.p.L.....P.....EsG.D..h-..K...PK.L..VF.FFDEAH.....L.L...F.......s......D....A......P.......p......s....L.....l.-.....+....l..E...Q....VV.RL.I...RSKGVGVaFVTQNP..tD.lP-sVLuQLGNRVQHALRAFTP+DQKAVKs..AApThRsNP.s..a.D.sppAIspLGsGEALlShL-tK.G.sPosV...-Rshlhs...PpS+hGPlotc.ERsslls.p.SPlh.G+Y-pslDR.ESAYE..hL.pp.p....h.p.ss....s.p...t.....t.p..s....s...s.s.p..t.p.t...........................................................t.ss..hh..s..s.h..p..s.h.hh.u....................s..ts+..........sttpps....l.s...p.......sh...s....K.S.s.sR............plsp........pI....l...RGlLGol........................................................................................................................ 0 80 193 258 +5701 PF05873 Mt_ATP-synt_D ATP synthase D chain, mitochondrial (ATP5H) Moxon SJ anon Pfam-B_9814 (release 8.0) Family This family consists of several ATP synthase D chain, mitochondrial (ATP5H) proteins. Subunit d has no extensive hydrophobic sequences, and is not apparently related to any subunit described in the simpler ATP synthases in bacteria and chloroplasts [1,2]. 21.30 21.30 21.30 21.30 20.60 21.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.00 0.71 -4.60 5 370 2009-01-15 18:05:59 2003-04-07 12:59:11 7 8 281 3 234 361 1 149.30 28 75.57 CHANGED AuR+lAppoI-WuuLuEplPsNQKAphsulKohsETapoRVusLPEcPPsIDWAYYKpNVs..+uGLVDuFcKKY-ALKlPaP.......E..DKYoupVDAEcKtssKsIupacspScsRIQEYcKcLEKlKshlPaDQMThEDasEsFP-otLD.l+K.saWP........HTPEEpl ............................................Ahp...t.lDWsthsp..p..l..s..s..p..p.t..sths.........uhKp+s.-.phppcl.t.t.L.s-pPssIDauaY+sslt...psulVDchEKp...apuh.K.l.s..hs..............h..sc..httl..-u.E...pp..shpssp..phh..ptsptclpphpcpLpplcs.h.h.PF-phTh--hspshP-ht............................................................... 0 67 113 179 +5702 PF05874 PBAN Pheromone biosynthesis activating neuropeptide (PBAN) Moxon SJ anon Pfam-B_9874 (release 8.0) Family This family consists of several moth pheromone biosynthesis activating neuropeptide (PBAN) sequences. Female moths produce and release species specific sex pheromones to attract males for mating. Pheromone biosynthesis is hormonally regulated by the Pheromone Biosynthesis Activating Neuropeptide (PBAN) which is biosynthesised in the subesophageal ganglion (SOG) [1]. 21.50 21.50 22.40 22.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.91 0.71 -11.46 0.71 -4.95 10 47 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 27 0 11 55 0 136.30 57 89.94 CHANGED Ma.tophlslhlllsl..sVlAosND..lKD-G.DRGAHSDRuG.LWFGPRLGKRSL+lSoEDNRQAFa+LLEAADALKYYYDQLPY.Ep.QADEPET+VTKKVIFTPKLGRShu...h--+sa-NVEFTPRLGRRLoDDMPATPuDQEhYR.DPEQIDSRT+YFSPRLGR..TMsFSPRLGRELuY-haPsKlRVARSsNKTpST ...........................................................................................................................................KKlIFTPKLGRSlu....c.p.pa-slEFTPRLGRRLu-DMPA..TPuDQ..E..hYp.DPEph-SR.T+YFSPRLGR..TM..sF..SPRLGRELuY........................... 0 5 6 11 +5703 PF05875 Ceramidase aPHC; Ceramidase Moxon SJ anon Pfam-B_9877 (release 8.0) Family This family consists of several ceramidases. Ceramidases are enzymes involved in regulating cellular levels of ceramides, sphingoid bases, and their phosphates, EC:3.5.1.23. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.91 0.70 -5.26 41 599 2009-01-15 18:05:59 2003-04-07 12:59:11 7 7 310 0 390 567 261 225.10 22 87.94 CHANGED sGaWG.sTSslDWCE-NYslS...YlAEahNTloNhlFllhuhhuhhpsh+.................pchctpa..hlshlGhhlVGlGShhFHhTLpY...................hhQLLDELsMlassslshashhsphp...................shphphhluhhlhshushlThhahh.hppPslHpsuaullsshllhhshhhhpp..l.s..................................tsc+plhphhhhulslFlhGahhW.lDphhCshhpphRpth..hPhuhhhEhHGWWHlhTGlGsYhhllhhtaLcshhpsp..ppphphha..ht.....slPtlhh ..........................ht..oS.hsa............CE.tsY.hs...hlA.EhhNT..loNhhahh.huhh..s.hh...hhp...........................pt.h.p...th....hlh.hh..h.h.h.lV.G.lGShhFHh.TLp..a...................h.hQ.h.h.DELshl.ashhhhhhh.hhshth.........................................p.p.h.hh.hhhl.hhhs...hhho.hhhhh.....p..s...hhp.p.h...s..ashhshhlhhhshhhh....th.......................................................................htt.phhth...hh...h....uhhh.....ahhuhhhW.hD..phh.Cphhpthp..h.............................hHuh.W.......Hlhhu..hu...sYhhh.hh..hhph.......t.t....h...h...................h....................................................................................................... 0 100 198 301 +5704 PF05876 Terminase_GpA Phage terminase large subunit (GpA) Moxon SJ anon Pfam-B_9892 (release 8.0) Family This family consists of several phage terminase large subunit proteins as well as related sequences from several bacterial species. The DNA packaging enzyme of bacteriophage lambda, terminase, is a heteromultimer composed of a small subunit, gpNu1, and a large subunit, gpA, products of the Nu1 and A genes, respectively. Terminase is involved in the site-specific binding and cutting of the DNA in the initial stages of packaging. It is now known that gpA is actively involved in late stages of packaging, including DNA translocation, and that this enzyme contains separate functional domains for its early and late packaging activities [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.87 0.70 -6.16 53 1654 2012-10-05 12:31:09 2003-04-07 12:59:11 7 10 711 0 162 1653 633 434.80 36 87.54 CHANGED h.lssc..u..........uupsGpW.......ctspsPYht-lh-slussp..................hccVshhpuAQlGtTp.hhhshluYhIcpsP.ushLhl.PTpstAccascs+lsPhlcsoPsLp........pplus....h.p+cssNolhpKcFs.G.GhlhhhGAsSsssL+spssRhlhhDElDta..stsl.....ssEGcPhsLAcpRspoF...sptKhlhsSTPshcs.....................sopIpptappSDpR.+aaVPCPHCGchp.....lp.app......l+a....sps..........pscsAtahC...cCushIp-ccKst...hh..tt.Gc.....Wlspss.........................................tsscpsuFalsuhYSPh...tsWsplspcalpA.......ps-spt...........LpsFhNTpLGcsW.cpc.u-..th-hppLhsRt..E.sa..............tup......VP........susl.hLTAGlDVQ.pc......RlElplhGWGpstEs.....WllD+tllh....G..............cPsstt..sattLsplLpcpat+s.sGsp.hslsshulDoGs...........................ttTpp.VYsas+pp.t...............ttpVhslKGsss..stshlppsptpsss..ttt.....ultLahVussshKsplh......spLphp...........t.ssGhlHFP.....p.....t.....hsppaacQLsAEphh..+htc.Gh..hhtWp+hp..pRNEALDshVYAhAA...thhhshp.....phcW ......................................su.t.s..a.........thhPa.h...t.hsshusp...................hctVshhtsApsGho...hl.s...hhsY..hl....pc....ts.Lhh....TpttAcpa.+p+lt.hhcs.Pt..lh.........tl..ss...............pc.psNslh.Kp.F...s.h.hhhhGh.usp.hpppshchhhhs-hs..ta.....s.h.......ptEGssh.LuppRhps...........h.lh.SoPt.hch......................sstI..phhp.pusph..+a..ahsCP..H...CGc..........ht...ats........................lph.........................p......................tspsshh.C...............psss.hlttpphp............................p..uh.........altctp.............................................................sc.hsFa.h.s.husa...poW.pllhchhps..................hts.tt..........h+shlNTshG.sa....t..sc...p.ctphl.p+h......h.....................tt.s.VP...............st.s..h..hLsAslDsQ.hp.......Rh.h.lh..Ga....u.tt....Et.....allD+..l.h..t.........................c.sp........h.hhls.t.h.h.p.+.p.a.ht..sstp..htl.thshDsG.G...........................h.sp..sYph.+ppt................hh+lh.hKGsuh..ht.....t..l..h.shsh...........ps.....p.t..................sl.L.h.ltssshKc..l.......sthhhp...................pshs.hh.h+FP...............p..............hs...hppLshEp.....+..c..Gp.................W.pp.t....ttNEAhDhhVYA.Ah...hhh............................................................................................................................. 1 49 97 131 +5706 PF05878 Phyto_Pns9_10 Phytoreovirus nonstructural protein Pns9/Pns10 Moxon SJ anon Pfam-B_9947 (release 8.0) Family This family consists of the Phytoreovirus nonstructural proteins Pns9 and Pns10. The function of this family is unknown. 20.30 20.30 20.50 20.80 20.20 20.20 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.75 0.70 -5.42 3 17 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 10 0 1 17 0 296.10 38 94.27 CHANGED uGKLQDGVAI+RIsDAIshFsNYohG-LlssRchsluTLHslR+NlGLAWPslLhNCalHTSSHhGVMKFlLDIAhShRFGDFTLLGusGssDPFsDlclIaTKoCpsLGhsDs-FLphsDsFuYhhsSFL-cEul+usVDMplGIHNIED+YVhRhESIhcFI+tYYTtSh-DV..lsWLEKL-uAcuGlLuspKSKcQMRuElsplRscIss+IpLYINoacNSap-HaRElAcpYsslWs..losGssAcEsps-AT.....sSuSpsTusuAELs-VsctuDsNEscLp...F+Rc-DAucsAsSchsSLSG-DutpG ..............................................................................GKLQDGlAI+RIsDAIphFppYphG-L.psps.tlspL+slRtslGLAWPslLhNCalHoSSHhGVh+FllDIAhoh+hGsFTlLGusGs.DPFsDlslIaoKoChsLshsDssFLp.s-pFu.hhsuFLptpulputV-MphGlHsIED+YVhRhpsI.chlphYastS.c-l...sWl...KL--ussuhh.tpho+tQMps.ltpsRshIss..cIppYlNpppsS.p-HhRplAppYsp.Ws..lsssssut.spssss...........ssts.sss.suphssss.hsc.s-s......hptp.-ttp.hss.hss..upsu..G.............................. 1 1 1 1 +5707 PF05879 RHD3 Root hair defective 3 GTP-binding protein (RHD3) Moxon SJ anon Pfam-B_9973 (release 8.0) Family This family consists of several eukaryotic root hair defective 3 like GTP-binding proteins. It has been speculated that the RHD3 protein is a member of a novel class of GTP-binding proteins that is widespread in eukaryotes and required for regulated cell enlargement [1]. The family also contains the homologous yeast synthetic enhancement of YOP1 (SEY1) protein which is involved in membrane trafficking [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 743 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.42 0.70 -6.61 8 439 2012-10-05 12:31:09 2003-04-07 12:59:11 7 16 244 0 317 440 4 457.00 25 63.95 CHANGED hGSQSoGKSTLLNHLFGTsFssMDA.spRpQTTKGIWlA+sssl...........cssILVMDVEGTDGRERGEDQD.FERKSALFALAsSEVlIlNMWEHQlGLYQGANMuLLKTVFEVNLpLFhpp.....ss+KoLLhFVIRD+lGsTPLENLpcoLpcDlp+IWsSluKPtuhEsoslsDFFDVpFsALsHK.hpp-pFpppVppLcpRFhpu.............hssGsFts-YH+clPADGFohYA-pIW-pIcsNKDLDLPTQQlLVApaRC-EIusEshpsFhssh....Ep..p.cEsspuuslssLGppLsslhpcslpcYDspASRYccuVYppKRppLcpKlss+lpssaQshLssL+pshl-sFcpulspulcuGp......sFucuVpsptpcslpcFccpscohsl.pssWss.cchhtKLs+DI-spsuplRstcLpcLsschE+plpspLS-sVphhhsssu..................+-sWDslhphFcppscsAlpthpsthsuF....-hs-s.pssphltsL+phSWshlcsKs+EEh..splLh+L+-RF--lFRYDsDGhPRlWpsc-DI-uIa+pARppoLpLlsVLohhRLuDss-sl.ps.hhuhpssssssusp..csl.ss.sslspsphcpltsptpILTcspppslhspFK+ps-hshs-A....KRStlsspTplPsahallLlVLGWNEFMslLRNPLahhlhhlsusssasLa.pLsLhG............PshphlpsshsplhslAcc+hcpshpsscsh+uutphcssusscspps ..................................................hGsQSoGKSTLLNtlFsspFth.st...t.......t.............psT+G..hahsh............................................................hll.hDhEGhcu...E..p.u..c.p.ps....aE+.....p...uLFsl..uh..uclhllNha.........ts.s.hpslhp..hth.h.............................h.hs...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tt................................................................................................................................................ 1 98 205 280 +5708 PF05880 Fiji_64_capsid Fijivirus 64 kDa capsid protein Moxon SJ anon Pfam-B_9976 (release 8.0) Family This family consists of several Fijivirus 64 kDa capsid proteins. 25.00 25.00 31.60 31.50 19.10 18.80 hmmbuild -o /dev/null HMM SEED 561 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.66 0.70 -6.17 5 168 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 8 0 0 162 0 486.90 86 99.98 CHANGED MsDI+LsIAPDLIHsGVPQRLSDTIILNDRPpITLLsHF-sLFcEsNIIKuP+suSspoTVNIYIKp-LL+RLHDRLuoV-ToTLPNIoQl....KEal+sFFpN-lQsIFpsLsNN-lsssFVGVTT+GLSLFAsAKsDAEQIERVQI-TLTEGNlTLKPhSADGlEVILDDsYINlVsKllGh-VpKLI-KCC+ElPAchGIlT..DEVKhhlpTGKLRlDGGYDYNCPSSoTDVT+YGsYDc.FScpMFscLspFaNlSLolVPVuALKslHlh-cELspLDuDKSLLEQsWoAlTSFlESaclKTK..............lKsDDsDchcLscLsssKsNpcusp....AolssoDKsh...........l-WYp+sF.sscT-KulsLsRsElhE.tAcsoSsllc+VKltFushhFEclsssupEKolhV...-TsuG-MTLDpYRuIu-VLNuIWKRGKDlAVcsFDYIKLGlEKA.oHLuslLhKKYNlTlDDIlNFI-KGPSYLAsLsKlsDWsLIAKlIIsSVLPsIIQuVYKoDPSsslMNSlLIo+AsNLI+sD+cRLpcKspucss.s.uNTsc+-sssKlllcKlso .....MADIRLDIAPDLIHNGVPQRLSDTIILNNRPTITLLSHFNsLFHEpNIVKSPHlASSQTTVNLYIRKHLLTRLHDRLQTVETSTLPNITQL....KEHIpSaFcNEHQPIFQTLTNNNLSDEFLGVTTFGLSLFATSKLDAEQIERVQIETLTEGNVTLKPFSADGLEVILDDSYIGIlGKIsGLEVHKLLDKCCREVPAQMGILT..DEVKLLhRoGKLRIDGGYDFNCPAS.TTDVTHYGGYDQ.aSRQMFE+LNLFaNISLSIIPVSALKTlHlFEKELSsLDADKSLLEQTWSuVuSFlETWpVKoK..............sKs-D.DEYELTuLSsLRpshDGsS....sSSPasDKKF...........I-WY.KTF..uKhEKGSSLR+sElE-KssouTSshsKpVKIHFsVQYFDEhKsNGHEKSVsV....TpKGEMo.L-.YRKIGElLSAIWKRGKuLAsPChDYIKLGVEKA.aHLAPVIMKKYNLTIDDIIpFI-hGPSYLAKLDKIDDWSLIuKLIITSVLPNIIQAVYKTDPSNNVMNSVIISRANNLLKuDRDRLlKKAhoANsSo.SN.SspEHsQKIVLNKVTR.............................. 0 0 0 0 +5709 PF05881 CNPase 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP or CNPase) Moxon SJ, Mazumder R anon Pfam-B_9997 (release 8.0) Family This family consists of the eukaryotic protein 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP). 2',3'-cyclic nucleotide 3'-phosphodiesterase (CNP) is one of the earliest myelin-related proteins expressed in differentiating oligodendrocytes and Schwann cells. CNP is abundant in the central nervous system and in oligodendrocytes. This protein is also found in mammalian photoreceptor cells, testis and lymphocytes. Although the biological function of CNP is unknown, it is thought to play a significant role in the formation of the myelin sheath, where it comprises 4% of total protein. CNP selectively cleaves 2',3'-cyclic nucleotides to produce 2'-nucleotides in vitro. Although physiologically relevant substrates with 2',3'-cyclic termini are still unknown, numerous cyclic phosphate containing RNAs occur transiently within eukaryotic cells. Other known protein families capable of hydrolysing 2',3'-cyclic nucleotides include tRNA ligases and plant cyclic phosphodiesterases. The catalytic domains from all these proteins contain two tetra-peptide motifs H-X-T/S-X, where X is usually a hydrophobic residue. Mutation of either histidine in CNP abolishes enzymatic activity [1]. CNPases belong to the 2H phosphoesterase superfamily. They share a common active site, characterised by two conserved histidines, with the bacterial tRNA-ligating enzyme LigT, vertebrate myelin-associated 2',3' phosphodiesterases, plant Arabidopsis thaliana CPDases and several several bacteria and virus proteins [2]. 25.00 25.00 25.80 30.30 17.40 17.20 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.56 0.70 -4.95 5 71 2012-10-03 21:31:48 2003-04-07 12:59:11 7 6 44 11 32 67 0 204.90 55 53.52 CHANGED LPLYFGWFLuKR-E-sLRKTup-FLEpLGNLKAFKK+LptFsuED..K+KlDLlpYFuK.sPslLHCTTKFCDYGKAAGAEEYAQQEVVKKSYoKuFTLoISALFVTPRTsGARVELTEpQLtLWPsDADKE......L.PsDsLPRGSRAHITLGCAA-VEsVQTGIDLLEFVKLQKAG+-GEpVGELsG...GKLoYauNGMWMLuLu+KIEV+oIFSGYYGK.GssVPppGuKKGtplhppCTIl ...........................LPLYFGWFLsKcupEp.l+psu.sFLcpLsshcAFKKc.l..pp.F..ss..ts..ccclDLhpYFs+.....sPGlLHCTTKFCDYG..KAsGA-EYAQp-sVKcS.Y.uKu....FpLolSALFVTP+TsGARVc....LoEpQL....LWPsDsDK............................lsss-sLPtGSRAHlTLGCAusVEsVQTGLDLLEIlp.pKuG...pcGEpVuEl...st..........................GpLh.hGpGRWhLsLs+phclcAlFoGYYGK.tcs.Vsspuspct.t.hppCoI.................... 0 8 10 14 +5711 PF05883 Baculo_RING DUF855; Baculovirus U-box/Ring-like domain Moxon SJ, Bateman A, Dlakic M anon Pfam-B_9633 (release 8.0) Domain This family consists of several Baculovirus proteins of around 130 residues in length. The function of this family is unknown, but it appears to be related to the U-box and ring finger domain by profile-profile comparison. 21.20 21.20 21.30 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.88 0.71 -4.33 25 57 2012-10-03 15:03:13 2003-04-07 12:59:11 6 1 56 0 0 68 5 133.80 38 95.21 CHANGED MLlTlph.pD+ctala+hFpchWsp.slECtICh-cIss..cGVVslT-suslNLEKMFHspChcRW..p...ppcpRDPFNRsl+YhFsFPPco.c-spuhL-ch+.uFI...GD-ctD+hasspapR.ssspp.hlDlELDFsphL .......................MllTlph.pD+ptaha+hFpchWs.ph.shECpICh-cIss..cGVVslo-sutLNL...EKMFHspClc...RW...p.....ppc....sRDPFNRsl+YaFsFPPcohcECpshL-ch+.sFI...GDcctDchapp.apR...lpstp.hlDlELDFpphh........................ 0 0 0 0 +5712 PF05884 ZYG-11_interact DUF856; Interactor of ZYG-11 Moxon SJ anon Pfam-B_9445 (release 8.0) Family This family of proteins represents the protein product of the gene W03D8.9 which has been identified as an interactor of ZYG-11. ZYG-11 is the substrate-recognition subunit for a CUL-2 based complex that regulates cell division and embryonic development [1]. 25.00 25.00 25.50 41.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.88 0.70 -5.33 6 31 2009-01-15 18:05:59 2003-04-07 12:59:11 7 2 8 0 31 27 0 260.50 29 89.67 CHANGED hPsssp-G....Psusssss....uNtG.ssstp-AuIshVh+TYupaGpDlpQhsscGlRphspslpp.ossslpphpoohppcosc.............llssLh-ho.PtpluopsIlEhFuhSSlLLllAolSSllGGYlLAPlFGIlIsslGAAIhusLVlPhhusY..LNtcsGSluupRltLllhshsQGVLhGauhsasalsupPFsslTslluSFuYPllshp.sTuRsslLGsssGsSlhhHhslGhlpGuLossYFlLouhYTluAlsLIQ..IAhRsQocss.hphYshlLVuhhlsuKshVYGlFGssc .........................................................................t....................................................h.t-hpt..........psh+.h..shpp.sssphtphtsphp.pspp................llssl.-.hp..P.thss.slhchF.shoollLlshuluohlGuYlLuPlhslhhsthGAslluslllPshshY...Lst...............c...stuhst..p..Rh.LlhhuhsQGlLhGaulua.shls.u.pPhshlTsllhuFshslls..sp....ss.sRsslLuhssGsuhhhplshGhlpGuLoh.sYhlLosLYohuuhshlQ......l.Ah+.s.hss.s.hphYphlL.....Vs.hlhuKshsYulhGst.t............. 0 9 13 31 +5714 PF05886 Orthopox_F8 Orthopoxvirus F8 protein Moxon SJ anon Pfam-B_9539 (release 8.0) Family This family consists of several Orthopoxvirus F8 proteins. The function of this family is unknown. 19.90 19.90 21.90 26.40 18.30 17.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.07 0.72 -4.14 2 53 2009-01-15 18:05:59 2003-04-07 12:59:11 6 1 30 0 0 35 0 62.00 73 96.53 CHANGED MEGSKRKH-SRRPQQEQEQ.RPRTPPSYEEIAKYGHSFNVKRFTNtEMCLKNDYPRIISYNPPPK ..............MEGSKRKH-SRR.QQEQEQ.RPRTPPSYEEIAKYGHSFN.VKRF...T....N.-E...MCLKNDYPRIISYNPPPK... 1 0 0 0 +5715 PF05887 Trypan_PARP Trypano_PARP; Procyclic acidic repetitive protein (PARP) Moxon SJ anon Pfam-B_9554 (release 8.0) Family This family consists of several Trypanosoma brucei procyclic acidic repetitive protein (PARP) like sequences. The procyclic acidic repetitive protein (parp) genes of Trypanosoma brucei encode a small family of abundant surface proteins whose expression is restricted to the procyclic form of the parasite. They are found at two unlinked loci, parpA and parpB; transcription of both loci is developmentally regulated [1]. 40.00 40.00 40.20 40.30 39.90 39.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.30 0.71 -4.37 2 71 2009-09-10 16:45:05 2003-04-07 12:59:11 6 12 35 4 18 70 31 104.10 51 41.22 CHANGED MAPRSLYLLAlLLFSANLFAGVGFAAAA-tstspslsKGGKGK...............-.-stPE...EstPE.ps..EstPE.ps..EstPE...........EsEPEPEP..........GAATLKSVALPFAlAAAALVAAF ...................................................................................................................tPc...sE..scPEP..E....PEPEPEPEPc..s...sEscPE...psEPEP.EPEPEP...E..PEPEPEPEPEP-Pssu..sh....................................... 0 6 17 17 +5717 PF05889 SLA_LP_auto_ag Soluble liver antigen/liver pancreas antigen (SLA/LP autoantigen) Moxon SJ anon Pfam-B_9614 (release 8.0) Family This family consists of several eukaryotic and archaeal proteins which are related to the human soluble liver antigen/liver pancreas antigen (SLA/LP autoantigen). Autoantibodies are a hallmark of autoimmune hepatitis, but most are not disease specific. Autoantibodies to soluble liver antigen (SLA) and to liver and pancreas antigen (LP) have been described as disease specific, occurring in about 30% of all patients with autoimmune hepatitis [1]. The function of SLA/LP is unknown, however, it has been suggested that the protein may function as a serine hydroxymethyltransferase and may be an important enzyme in the thus far poorly understood selenocysteine pathway [2]. The archaeal sequences Swiss:Q8TXK0 and Swiss:Q8TYR3 are annotated as being pyridoxal phosphate-dependent enzymes. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.11 0.70 -6.00 5 254 2012-10-02 18:26:03 2003-04-07 12:59:11 8 5 169 15 177 1048 87 327.70 33 79.28 CHANGED MDTsscsc..ssGlGEREuRVhocLspcslacFsHGlGRSGsLl-sQ.PKAsGuSlhs+LTNcLlpchL+hLGl+tlcssFVVPhATGMSLuLChouhR.++scAKhVIWPhhDcK...SslKAsppAGFchtlVEsll-GDpllTDlssVEctl.cchssEslLsVlSTsosFsPRsPDslcEIAcICu-YDVPHlVNuAYGIQspphhchlppAh+hGRlDAVVpSlDKNFhVPVGGuIIAAhc-salpEIScsYPGRASAsPstclLIoLLuLGssuYtcLh+cQKEsFshLcEpLccLuEchs..............EpllcsP-NPISSAhTlcsls.hpp...ts.spLuucLasR+VTGsRsVpsssshsT..........S+hppY.ssYlslsSAIGl+-EDV-pllcRL-csLc ...............................................................................s.p..t....hthGEREsRhhs.lht...h.............th..hHGlGRSGsl...t.Q.PKAhGuolh.plss.hh.phh+.....hGh........t..h.tt...s.h..hlP...hs.T.G.M.s.l.s.L.sh.s.h.......p...t....+...................p.u...............c.....h.....llas...p....l.....D.pK............os.a.Ku....h..tA...G....h...p.s..h..l....l...p...s...h...h.......p......s......-.......t....l...h...s...s....l...p...s...l..c..ttl....pc...h..G.......p...................s....l..L..s.l.h.o..T.sosF....u....PR................h..s...D..p.............l.....tpluplCtphslPallN......sAYu...l.......Q....s..p....h..h.p........l.p.t.s.....p...h.............GRlDs.hVpShD.KNF........hV.Pl.G...Gul..l..A...u.....s...t..p...h.................l...p...t.....l.......u...p....Y........sGRA........oto........s.................l-........l........h.h....o.L..L.......t...L..Gp.s.u..h.tp.h.lt.pp.h..t..a..h.lpp..p..l..p..chupph.s.......................................c.p.lhps.s.cN.IS....hsh.sh....p....s....h..t.t....tt....................p.th...h.luuhL..apRtl.oG.s.R....l....phts...............t..........sY.lshssslGhptp-l..phhhpplcchh........................................................................................................................................................................................................ 0 68 98 148 +5718 PF05890 Ebp2 Eukaryotic rRNA processing protein EBP2 Moxon SJ, Mistry J, Wood V anon Pfam-B_9615 (release 8.0) Family This family consists of several Eukaryotic rRNA processing protein EBP2 sequences. Ebp2p is required for the maturation of 25S rRNA and 60S subunit assembly. Ebp2p may be one of the target proteins of Rrs1p for executing the signal to regulate ribosome biogenesis [1]. This family also plays a role in chromosome segregation [2]. 18.80 18.80 19.30 21.20 17.50 15.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.03 0.70 -5.09 28 350 2009-01-15 18:05:59 2003-04-07 12:59:11 7 4 297 0 244 346 2 252.80 33 78.17 CHANGED --sDh-Lp-h..tsphcsshsl.....hpcp+hsINNstuLpssLccIph......t.lsasE+.sls.......us..psh-.plpDhp........................DDhpREluFY+Qu.sAVhpAhspLcchsVshpRPsDYFAEMlKoDEHMpKl+pKLlsEtsuhctSEcAR+tR-lKKFGKpVQstplQcRp+EK+-hl-cIKph+Kp+p..........tt.-phDht..........h--sst.....tsstp+s..............tt..+stss...tKRptKspKFGaGG+K+tpKpNstpSosDhsGFss+h...................+ut...h.................sspRPGKu+R ...............................p-..lpp.h....t.....c.sthsh.....h.tp+.hsl.Ns.ssuLpppLpclph............phsahE+hsls...............................us......pshs.ph.Dhp............................DDh..p..R.EhuFY+Qu.s..AshpuhspLcphs.lPhpRPsDYFAEMsKoDpHMp.Kl+p+..LhpcpsshctuEcs++.RphKKaGKpVQ.sp............pl..p..cR..pc-K+..chl-pl.+ph+Kt+p....................cthDhh..................pcttt.......tpptppt............................ttttpt.tss..........tKRp..tK.spKaGa....G.G+K.+..ht.KpNstt..Sts.D.hsuap.tpth..........................+ut.........................tttR.GKthR.......................................................................... 0 85 133 202 +5719 PF05891 Methyltransf_PK DUF858; Hydroxy-O-Methy; AdoMet dependent proline di-methyltransferase Moxon SJ anon Pfam-B_9735 (release 8.0) Family This protein is expressed in the tail neuron PVT and in uterine cells in C. elegans [worm-base]. In Saccharomyces cerevisiae this is AdoMet dependent proline di-methyltransferase. This enzyme catalyses the di-methylation of ribosomal proteins Rpl12 and Rps25 at N-terminal proline residues. The methyltransferases described here specifically recognise the N-terminal X-Pro-Lys sequence motif, and they may account for nearly all previously described eukaryotic protein N-terminal methylation reactions. A number of other yeast and human proteins also share the recognition motif and may be similarly modified [1]. As with other methyltransferases, this family carries the characteristic GxGxG motif. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.46 0.70 -5.04 7 406 2012-10-10 17:06:42 2003-04-07 12:59:11 7 5 288 3 286 1393 487 201.70 37 81.52 CHANGED ctcphYpcAlsYWpsVpsoV-GhLGGa...G...plsshDl.GScsFLppLhtc.hs...stppphhALDCGAGIGRlTKslLhchh...........scVDlVEPVppFlspucp.Lup......tpt+ssshassGLQ-aoP-....ts+YDlIWhQWClGHLsDp-LluFhpRCptuLpPsGhIVlKENhsppsh.hhDcpDsSlTRs-shh+plFccuGLpllupchQcGhPc....ELasV+MYALp ..........................................................................................YtcuhpYWp.p...l..suosc...Gh.L...G.....Ga......s......plsph...Dl..pu.Sp...pFLpc.l......h.t....t................................st...s....t.h....p....hALDCGA..GI.G.R....l......T...+p.l...L..h....h..h...........p..p..V......D..l...V..E...s.sp.pFl...p..ps..p..p......ltt.....................t.t.t...+.l....t...p...h..a..s..h.G.L.....Q..-..a..s...P.p........................t.p.Y.Dl.IW.h....Q.W.s..l.GH..L...T..D......p.c.......L..l..p.FLpR.C.+pu.L.........p...............s.....G......h..l....l..l...K...E.....N..h...s..p...........p.....s.....h.............hh.....D........p........p...D.....S......S..V..T....R.....s............p...h..h+p....lhpp..A.GLplltpchQpsh...Pc.........p..lhsVhhaAL............................................................. 0 89 151 227 +5720 PF05892 Tricho_coat Trichovirus coat protein Moxon SJ anon Pfam-B_9763 (release 8.0) Family This family consists of several coat proteins which are specific to the ssRNA positive-strand, no DNA stage viruses such as the Trichovirus and Vitivirus. 25.00 25.00 27.50 26.90 19.00 18.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.38 0.71 -5.09 11 513 2009-01-15 18:05:59 2003-04-07 12:59:11 6 5 30 0 0 510 0 176.60 48 77.61 CHANGED hutptclRstl..........cshLhAths.spcs..ucsGhs+sh.......YLcolFG.IAlhGTSccTpah....................spVs.ltsphupc..hsshuphslpphVsph+saussss-GslpuhThRQlCEPFAppA+-sLlhhtphGsaopLhpKhscsGtKpPpVMFDFNsGLshptLo.tpctsVIpshNpRLFRTEGAKuVFsApuSssEpul-l .....................................................................ucot..hE........hLcolFuNIAl.GTS-pT.pFh....................-hsspVKshtspc.....sl.GphsLpElVshlKsFpsTSS-sslsshTFRQlCEsFAspARshLVcht.+GlaTNLaspMPcsGpK.PplMFDFspGLsM...hhhs.hspppVIssMs+RLhpTEhAKu..EAp.uSsppsLpl............ 0 0 0 0 +5721 PF05893 LuxC Acyl-CoA reductase (LuxC) Moxon SJ anon Pfam-B_9766 (release 8.0) Family This family consists of several bacterial Acyl-CoA reductase (LuxC) proteins. The channelling of fatty acids into the fatty aldehyde substrate for the bacterial bioluminescence reaction is catalysed by a fatty acid reductase multienzyme complex, which channels fatty acids through the thioesterase (LuxD), synthetase (LuxE) and reductase (LuxC) components [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.27 0.70 -5.75 13 358 2012-10-02 17:28:28 2003-04-07 13:39:30 9 4 316 0 112 2771 1191 335.20 21 69.60 CHANGED hsslslppllshlhpsupcWpssp.phh..h...htphhuYspphhp..hct...hhhhCp+suLhcll-p-L.up.chLD-al...pts...sY.+AhP+GlshHlluGNVPLlslhSllculLsKNssllKsSSSDPhhsssLlpohh-lDsst.........slscolSVlYa.cuscspLscplh..spADsVlAWGG--Alchlsp.ptsspschlcFGsKhShull-ssA.....slspAsculAcDIChaDQpAChSsQslalp...ssslcEFspcLuptLs+hscllP+up.shsEpAthotpct-.....shhsth..tVh.pspspsWhllhSptt........thhspPL.pRolhl+tlscl.-llphlpps.ps..QTlul.hshpuphthhsp.LuttGVpRIscsGhhshacsGtsHDGhhsLpRLV+a .....................................................................................................................h........................................................................................................t.....t.l................h........l.......t.............t...h...p..t...l....c..p..ah..........p..t...........t.........p...shsh.uh.sh.Hl.h.u.G.Nl.P.h.l.u..h.a.u.l.l.p.u.l..L.s.tNt.s.l.l..KhSu...s...-...sh..hhshl...h....p....ph....h.p...l..s.s.p.....................l.tp.p..l.s...l..l...a....ttt.........p.t..p..l....s.c..ph......s..p...s..D.....s..l...l.....u...a.G...G...s...p........s...l..c....h.hp....t....hh....s.s...p....s....p...h....l..t.....a....u.s.+.h.S...h..ulls.sps.......php.p.hh.p.t.hA.p.Dlh....h....ac..Q..uCtS...s.p.p.l.al.....t.............p.h.c.th.hp.........t.........l...ht.........t.........h........t....t...h....................h...h..s.....t....t....................s...........t....t....t..s...t..h...t....h.........t.............................h...tt....................h..................t.......t.h.............l....h.....p...........................ps...h......t......sh............l...h....l..s...ph.phh.........l.......t........QT.huh...h.....t.p.h......hs.......hh.hG..s.Rhs..G..........................aDG...h..l.phsp................................................................................................................................................................................ 1 42 80 95 +5722 PF05894 Podovirus_Gp16 Podovirus DNA encapsidation protein (Gp16) Moxon SJ anon Pfam-B_9825 (release 8.0) Family This family consists of several DNA encapsidation protein (Gp16) sequences from the phi-29-like viruses. Gene product 16 catalyses the in vivo and in vitro genome-encapsidation reaction [1]. 20.90 20.90 21.40 27.10 20.80 20.60 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.24 0.70 -5.11 4 17 2012-10-05 12:31:09 2003-04-07 13:56:03 7 1 16 0 0 18 2 330.60 31 93.00 CHANGED M-KKh.aap.pKlLSYstlhshlI..GARGlGKoYAhKph.lcchlppGcQFlYLRhYKoEltKspNpaFsDlt.paPspcFhVKG....p+hYlc..tchhGahIPLSshQu.KS.uYPNV.TIhFDEFlt..EKs.ssY.PNt....VcshlslhsTV.Rt+...-cVRslChuN...AVolhNPYFlhFth.Ps.NppapspsphllphlssccatsthRcsRFGphIsGhA..Yt-hSlDNpFsss......oclFV.++o+supasFuIhhsstphul.........WlDhppuhhahspu+sPcscpl.aALTscDL..sEsthLlhshpsshhLpphsSsa+KGhL+F-spVlRphhh-lht..hI .............................................................h.ph.s..h.hhh..Gt+slGKoashp.h.hpchhp.GtphlhlRh.csphtph.ts.ah.sc..l......a.s..hc.VKs............................+chhhD..tKhh..GahhsLSshpph+usuYPpVpsIlaDEFh...-.Kc...NhsYlPNE....sstL.slh-oVhRhR...-cl+sIsLuN...usollNPYFs...aash...s.sKp.hp..tps-sL.l.ph.s.scsapsEp.p....tpLhcGou..YuchSLDNcFhss......p.h.l.+...t.....ptspKhlasIhas.sthlG.V.........Wh..sh.phhhhlspspDPsppsl.h.hp.sDh..ptt...l...hsphhh+hLssthhst.LhFDs..hRhlu.p.................................................................................................................................. 0 0 0 0 +5723 PF05895 DUF859 Siphovirus protein of unknown function (DUF859) Moxon SJ anon Pfam-B_9891 (release 8.0) Family This family consists of several uncharacterised proteins from the Siphoviruses as well as one bacterial sequence Swiss:Q8K6J6. Some of the members of this family are described as putative minor structural proteins. 20.10 20.10 20.50 20.50 19.60 20.00 hmmbuild -o /dev/null HMM SEED 624 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -12.95 0.70 -6.26 11 110 2009-01-15 18:05:59 2003-04-07 14:01:24 7 5 86 0 4 104 3 484.90 28 85.00 CHANGED Ms-FaSNNDRGY+lpLhV-QVuQ.sssNNTSpVRh+LsLhNs....TsTFupYsCsuhVphpGQplsaSupPShlohspolpLIDpTlTlsHsuDGoKThuhsApFsGSGGaSPGTLsIuupsaTLosIPRuSoVS....VusshlGsslTIsIsRpSusFTHslpYp.aGspsGsIuo.slsTSsoWTssl.DhAspIPNSTSGpGTIhVcTYssGshIGop..osshphsl.PsSV+PohoGIoLoDsNosAppllsuss.FlQIhSNl+VsFNGAoGtYGSTI..pGYaAEI........VGtNQosspNGGsLG..hhNasGpsTlRApVoDSRGRpS-sh-spIolLcYFuPuLsFostRsGpsssplslhRpA+IAPLoVNGlQKNpMpLTFclusluossaosDsGsASGsWoshopLssos..AsLuGsYsusKSapVhGpLpDpFoSTsFphsVssEpVVh.......oh-+sG.lGIGKhhE+...GuLDVsGDIYA......ssp.IQQaQLT.NNGts........hpps...Nhl.-sGthhlssSu.sNPs...u...hh...hp.pss.p.hhQTFhuss..hhh...p.sh..ssWtsWpcaupscshsh............hpTsWp.sGhps..shat+sGsshThp.....hph.sst......cshslsshPpchhss...ahhslsu ................................................................................h.h.h...p....s..up.shssNoSpVphphhl...ps.....shoas....h..s......s....h......l.lsGpphp....o...spssh...........s.s..pp.h.L.hstphT.lsHsuDGoK.ohs......h.Aph...ss..s...sGhp...s..sl.s...h...stshsLssIPRuS..o..lo..........ss.sshl.G.s.sholsIs..Rt..SosF...oHsl..pYp...au...ut.pts....l.us..sh.s.T....osoaT.sl....s......hA...splPsusSGssslhlp..TasGus...lGsp.....ohshshsl..Ps..o..hcPohs.u..l.olo-ssshstpllssss..FlQhhSplplsh...ssusG.YGSTI..puYpsEl........sststssssss.u....thG...hhshsGphTlpApVsDSRG+pSsshshplsVltYhsP.th.s.a.sshRsspsss........lp..lhhpspI.u.Plslsu..pKNhh.p.loapss.hss....s.hshsputAu....ass.....pphstss..hsh..u.G.....sassspSa.lhshlpDtFs.s....s.p.h...phs.lss..tpVlh.......shs...psG...lulG...Khh-p.............G..sl-s...G..sh.h......ts..h.....p.h.pLh......................s.h..sG.hh.....ss......spP......................................................................................................................................................................s............................................................................................................................................. 0 0 1 3 +5724 PF05896 NQRA Na(+)-translocating NADH-quinone reductase subunit A (NQRA) Moxon SJ anon Pfam-B_3622 (release 8.0) Family This family consists of several bacterial Na(+)-translocating NADH-quinone reductase subunit A (NQRA) proteins. The Na(+)-translocating NADH: ubiquinone oxidoreductase (Na(+)-NQR) generates an electrochemical Na(+) potential driven by aerobic respiration [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.35 0.70 -5.20 77 764 2012-10-02 20:27:15 2003-04-07 15:35:01 6 4 731 0 166 1388 819 253.40 47 56.78 CHANGED hI.pIKKGLDLPIsGsP.p........Q.tIp.susssppVAllGp-YhGh+.PoMhVc.GDpVKtGQsLFpDKKssGVhFTAPuSGpVssIsRGt+RVlpSlVI-l-..s...p........-p.sFppastt..pl...ssLsp-pVpppLlpSGLWTAlRTRPaS+lPss-u.sPpuIFVoAhDTsPLAADspllIspppps......FpsGLslLu+LTsG+.Valspsssss.lPhs.....shssVp.hcpFsGPHPAGLsGTHIHalcPl.uts+sVWplsYQDVIAIG+LFhTGcl .................................................IpIKKGLDLPIsGtP..p.........Q..sIp...su....slppVAl....lG...--YlGM+..PoMtV+tGDpVKKGQsL.F.E...DK.K.s..PGV.h...F...TuPsSGpVs...s....I.....s......R.....G....t...+....R.....V.....L.....Q....S......V...VIcl-..G...s........-pls..F..s.c..a.s..s.p...pL....usLsp-pV+...ppLlpSGL........W....T....A....l.R.T.R..P...F....S...K...l....P...s....s....-...u...pP....p.....u...I...F.V..o.AhDTNP.L.AADPpllIppp.p..cs...........FpsGL.s.l.L...o.+...L..T.c..u...K...V..al.C..p...s.....s...s..s.......s...lPtp.........shss...Vp.....hcpF.sGPHPAGLsGTHIHFl..pPV..uts+s.....VWpI...sYQD.VIAIG+LFhTGcL................................................................................................................... 1 45 95 138 +5727 PF05899 Cupin_3 DUF861; Protein of unknown function (DUF861) Moxon SJ anon Pfam-B_2000 (release 8.0) Family This family consists of several proteins which seem to be specific to plants and bacteria. The function of this family is unknown. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.32 0.72 -4.59 33 1583 2012-10-10 13:59:34 2003-05-01 16:21:48 7 10 1032 14 492 2347 591 76.00 27 46.50 CHANGED tstsushptGlWpsoPGcaphphsp.pEhs....allpGclpls...........s-....sGps......hplpsGDhhhhPtGh......pusWclt-s.l+....KtY ...............................h.t.ptshhhshac....s.ss.G...p.............h....p....h....h....h....s...p....t...E..hh...........al..lpGplpl.s...........st.......sGcs......h.p.lssGDhhhhPsGh.........pssWcs.th.h+Kha....................................... 0 106 251 370 +5729 PF05901 Excalibur Excalibur calcium-binding domain Moxon SJ anon Reference 1 Domain Extracellular Ca2+-dependent nuclease YokF from Bacillus subtilis and several other surface-exposed proteins from diverse bacteria are encoded in the genomes in two paralogous forms that differ by a ~45 amino acid fragment, which comprises a novel conserved domain. Sequence analysis of this domain revealed a conserved DxDxDGxxCE motif, which is strikingly similar to the Ca2+-binding loop of the calmodulin-like EF-hand domains, suggesting an evolutionary relationship between them. Functions of many of the other proteins in which the novel domain, named Excalibur (extracellular calcium-binding region), is found, as well as a structural model of its conserved motif are consistent with the notion that the Excalibur domain binds calcium. This domain is but one more example of the diversity of structural contexts surrounding the EF-hand-like calcium-binding loop in bacteria. This loop is thus more widespread than hitherto recognised and the evolution of EF-hand-like domains is probably more complex than previously appreciated [1]. 21.00 21.00 21.10 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.86 0.72 -3.61 68 1070 2009-01-15 18:05:59 2003-05-01 16:38:01 6 44 876 0 230 825 85 37.90 39 17.10 CHANGED shpsCsphpusstAthaht..................psshss.....cL...........DtDsDGluCE .............................................atsCpphpss.s.us.lhts.................................cPuYss...........+L................DRD..p...DGlACE..... 0 63 151 202 +5730 PF05902 4_1_CTD 4.1 protein C-terminal domain (CTD) Moxon SJ anon Reference 1 Domain At the C-terminus of all known 4.1 proteins is a sequence domain unique to these proteins, known as the C-terminal domain (CTD). Mammalian CTDs are associated with a growing number of protein-protein interactions, although such activities have yet to be associated with invertebrate CTDs. Mammalian CTDs are generally defined by sequence alignment as encoded by exons 18-21. Comparison of known vertebrate 4.1 proteins with invertebrate 4.1 proteins indicates that mammalian 4.1 exon 19 represents a vertebrate adaptation that extends the sequence of the CTD with a Ser/Thr-rich sequence. The CTD was first described as a 22/24-kDa domain by chymotryptic digestion of erythrocyte 4.1 (4.1R). CTD is thought to represent an independent folding structure which has gained function since the divergence of vertebrates from invertebrates [1]. 25.00 25.00 26.60 26.60 23.90 18.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.67 0.71 -4.03 9 519 2009-01-15 18:05:59 2003-05-01 16:43:59 8 12 80 0 154 372 0 107.90 60 12.64 CHANGED ts-Isocchslstspspshs.pshph..ssss.-sslLlospTITuEohSTTTTTHlT....K........................TVKGGlSETRIEKRIVITGDsDlDHDQALAQAI+EAKEQHPDMSVT+VVVH+ETEl .......................................................................................p.....hp-sPllpTETKTI..TYE....us........ph...D.s.s.....s.....sh-...sGV..LhoAQT.ITSE.T....s.S....TT.TTTHIT.....K........................TVKGGISETR..I..EKRIVITGDuD.ID.HD.Q....A.....LAQA.IKEAKEQHPDMSVTKVVVHpETEl..................... 0 21 33 77 +5731 PF05903 Peptidase_C97 DUF862; PPPDE putative peptidase domain Bateman A anon L. Iyer Domain The PPPDE superfamily (after Permuted Papain fold Peptidases of DsRNA viruses and Eukaryotes), consists of predicted thiol peptidases with a circularly permuted papain-like fold. The inference of the likely DUB function of the PPPDE superfamily proteins is based on the fusions of the catalytic domain to Ub-binding PUG (PUB)/UBA domains and a novel alpha-helical Ub-associated domain (the PUL domain, after PLAP, Ufd3p and Lub1p) [1]. 29.20 29.20 29.20 29.40 28.90 29.00 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.12 0.71 -4.43 41 818 2009-01-15 18:05:59 2003-05-02 09:40:48 9 22 288 2 549 790 22 135.60 31 40.10 CHANGED t.VhLpVYDLo...p....................shhLGh...........Gl..aHoul........laGp.Eahauu..........sGlh..................pspPtp.....p.G.p.pcsl......lGpTclsppthcpal...pplup..c.....apussYcLls+NCNcFosclsphLsGcpI.....PsalpcLsphshphshsptl...hshhh ...................................................h..VhL.VYDls......p............................hhhlGh................Gl..aH.oul..................laGh.EahaGu............sGlh.........................pspPtp............s.sh.phccsl.............hlGp..Tp..hstp..phpphl.....cplup...p.........apussYcLlp+NCNcFosplsp.h.L..s...G.......p.......t...I..............Ppa.lscLsphs.....hht.h........................................................ 0 198 333 456 +5732 PF05904 DUF863 Plant protein of unknown function (DUF863) Moxon SJ anon Pfam-B_7732 (release 8.0) Family This family consists of a number of hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 25.00 25.00 32.60 32.60 18.90 21.50 hmmbuild -o /dev/null HMM SEED 805 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.64 0.70 -6.27 7 79 2009-01-15 18:05:59 2003-05-02 10:12:57 6 3 15 0 56 66 0 465.10 22 72.35 CHANGED hhs..shSh...........ctuostpssl...shh.tpsspV.pspss........KhR++MlDLQLPADcYlDs-ptps.Gpp....................cphpssspp....p.ppssssss....lsl....psopGluDLsEP...............Vpsppspshu..hohDhhu+hsss.ucspsptlphs...............s............................EssptKsss+ssu....stp.ls.ssp..p.h.spu.p........P.....ps..phthhcERophshE....hppts.th.hps...l-tusssphPp.s..h.p.......s..h..hsH..uu....osuhsp+s...hssQppPhh........................phcosupsssuhssp..shspNthhpG.SssStpp.h.hsuhs.p..s.s.tt.sspt.hhp....................shp.s.ps...................tpppp.sslPWLp..t...p....................................t.hsLpss.........................ps..tssh..hclp.ssspppppIhh..h.tp..hs.....cp.s.......uhhhust.pclpp.h....hsLshN.ss-hsh........s...ptccstct...AssRshIDLN..ssoEDp.Epsshsup.th......psKh.h.IslEs..s.Es...-.p..........pcps........t.ssss.sEhh+hAAEuIVAI.....spcs.ssuSs.s-h..pp..LpWFs-hhtoptp-h.pp.-h..t...........sssphD.FEuhTLpLpETpt-EYhscPhsP..E..ph-c.os....s.s+PRRGpARRGRp+RDFQ+DILPGLsSLSRHEVoEDlQhhsGlh+usshsWp.SGhs+....hpussRGR.....hpss..sp..........sssssshsp..ss............DpulsGWGpsTRRsRRpRCP ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t....AAp.ll.h.............hp..sp.......l.hFsp.h..............p....t...h.....t.ptt.......sttshD.FEthTLpLpEhp.--hhshs.hs..t..p.cp.........p+.hpsphRRuRp.h.+DFQ+-ILPuLsSLSRpElsEDlphhtshhpupthphp.os.h.sc.ts..tsts.R.s+..................................................................................................................................... 0 6 31 43 +5734 PF05906 DUF865 Herpesvirus-7 repeat of unknown function (DUF865) Moxon SJ anon Pfam-B_9911 (release 8.0) Repeat This family consists of a series of 12 repeats of 35 amino acids in length which are found exclusively in Herpesvirus-7. The function of this family is unknown. 20.10 20.10 24.10 87.20 17.70 16.30 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.70 0.72 -4.37 2 12 2009-01-15 18:05:59 2003-05-02 11:12:20 6 1 1 0 0 12 0 35.00 95 98.36 CHANGED MGSHPFRQEpPpPHNPLTFKPVKTTGTAVsFSAGF MuSHPFRQERPQPHNPLTFKPVKTTGTAVAFSAGF 1 0 0 0 +5735 PF05907 DUF866 Eukaryotic protein of unknown function (DUF866) Moxon SJ anon Pfam-B_8299 (release 8.0) Family This family consists of a number of hypothetical eukaryotic proteins of unknown function with an average length of around 165 residues. 22.90 22.90 23.20 23.10 22.10 22.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.17 0.71 -4.74 9 382 2009-01-15 18:05:59 2003-05-02 11:19:09 8 12 290 2 254 373 4 144.50 32 84.37 CHANGED MV+huLplpAsLENlpsLpPst....psa.aahKlKCsNCGElo-Khptlohs-pVsh.uG+usu.Nhs.KCKhCuREsSIsIlsushpshsh-DS..tchpsIssF-CRGhEPl-FtPpssWtApusEoGo.Fc-IDLpEt.-Ws-YDEKspsoVuIhphcppFp...hhK ..............................hhL.lpAp.L.cslsplpP.s.........psa...ahh+lpC.ssCtEhptphhhl...sh...-..ph..th....u...u+......G.p............A..shl.K........C.KhC.t.................+-sShs..l.....h...s...........s.....p....h..............p.....s....hs..h.c......ss.....................tchpsl...ltF-CRG.lE.s-FpP............p......s............t..ahspuh.oGshF..p....sl.cL..p........-t....-WhDYDEKstppVulhclphph....h.................................................................................... 0 80 135 205 +5736 PF05908 DUF867 Protein of unknown function (DUF867) Moxon SJ anon Pfam-B_7778 (release 8.0) Family This family consists of a number of bacterial and phage proteins with no known function and is present in Bacillus species and the Lambda-like viruses. 27.10 27.10 27.60 30.10 26.30 26.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.19 0.71 -5.22 37 384 2009-01-15 18:05:59 2003-05-02 11:22:44 6 3 199 2 70 273 50 188.60 34 69.65 CHANGED Ypsap-Lppspp.tpDapIphpph.s.oplllhA.HGGuIEsGTSElscthu.....ssaShYlFEGl+tps.NpsLHlTSs+FD...EPhslphlpppchslolHGYtspp.......ppshlGGpDcthuctlscpLpptGFss....tstsscluGhpssNIsN+sp......sutGVQLElSsu.RcshFcshshppcsht.....spshhcalpul ....................atshs-Ltpppt.....DapIcsppp.s.SplhllA.HGGGIEsGTo...ElA.ctlA.................phssashYhFcGl+pp.......s...N..p.pLHlTSo+F...D....-Phhhchlc.p.p..phslSlHGhsusc.......phlhlGG.p.....Dcp.L.tptlscpLpp.t.....GasV..............tssP.pcluGhpssNlsN+sc......p.s..sGlQLELostlRcthFcshphspcshpp....s.hhh.pasp.................. 0 14 34 60 +5738 PF05910 DUF868 Plant protein of unknown function (DUF868) Moxon SJ anon Pfam-B_8013 (release 8.0) Family This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 20.60 20.60 22.40 21.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.14 0.70 -4.94 19 260 2009-01-15 18:05:59 2003-05-02 11:39:36 7 9 24 0 155 257 0 251.40 32 81.23 CHANGED tsuspssVTslYpscl..ss+.tl.lplTWo+shh.....ups.Lolslssss................................pshs+hsh.pPhhFt+++GS+....................sh.pssusp...l..-laWDLosAKF...uu...uPEPlsGFY.VAVVsssEllLLLG......Dhc+-sh++stss.......sushlu++E+la..........G.p+hasTKApF..t-sGppH-lsI-C.......sssG.....tp....s.plhlslDuKhVlQV++LpW+FRGNpTlhl.DGhsV-lhWDVHsWLF..........u..suss...p..........AVFhF+sp....tu.-p.....................phh.pttstsssp...p....................t.sFsLllhAaKh .........................................s...pshsoslYpspl........ttt.th.lsloW..s+shh......spu..Lslslp..sss.......................................p..h+hp.h..pP.hh.h..h+++.....GoK......................ph...pst.us.p......l..claWDLssA+F......uu...uPEPhss.aY.Vul.VsctEhsLlLG.........Dhpc..-uh.......++ptst......s.h...psshlu++E+lh...........G.p+hasT+ApF...t-.pGp.H.-lsI-s......................tttu............t-....s..phhlplDuch.llpV++LpWKFRGNpsl....hl..ssh.VcVhWDVHsWLF...................................s..ss.s.......p...........AlFhF+st........s.pt..................................................................p.................................................................tFsLhl.Ah+............................................................................................. 0 12 87 123 +5739 PF05911 DUF869 Plant protein of unknown function (DUF869) Moxon SJ anon Pfam-B_8094 (release 8.0) Family This family consists of a number of sequences found in Arabidopsis thaliana, Oryza sativa and Lycopersicon esculentum (Tomato). The function of this family is unknown. 40.00 40.00 45.50 41.20 39.80 37.50 hmmbuild -o /dev/null HMM SEED 769 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.73 0.70 -13.58 0.70 -6.30 12 276 2009-01-15 18:05:59 2003-05-02 11:43:02 6 6 18 0 199 290 0 328.00 19 77.52 CHANGED KEsLltQHuKVAEEAlsGWEKA-sEshuLKppL-sss.tphshE-RsuHLDuALKECh+QlRps+EEpEpplpDshtppopph-phchpLEt+lt-hppclhcssuENssLoc.Lpt+pphl.clscp+s.h-sphpsLpspL-usEKE.soL+YEl+slpKELEIRscE+shSh+SA-sAsKQHLEslKKIuKLEAECQRLRshVRK+LPGPAAlApM+.EV-th..upshs-sRppts.u..p....................................pp...hpccs..hht+hhthE-EsKhL+-uLuc+ssELQhScNhhAcpsuKLp.h-sph.................................t.hsEpt..-DthSsupSWA.suLlSELpph.KpcKt.spuphtpssu.clpLMDDFLEMEKLAs...lsstussssppsspstsscppsthst..pppt..ps.................................p........t+hltplp..plLccIctphsphppupspsppp.p...................................sh.pp.pshpp..hs..pcLpshluph.....pchht.Lpp.hphhp-...tpsshpchlp-hshshphhlsp.psLp-...........ttplp..h.h...................................................................pchEphcspptphth.........p..p.ph.....pE.EpplpplpspLp.tspcSpshsEsplcshspp.culc.+t.clc.chp.hp.plppL-sELpcE+tspp-thsKsp-lcpclp+.ppptsttph.......cp-h+h+Q-pEIuuAAEKLAECQETIhsLG+QLpoLp.s.p-thlso ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t................................................................................ 0 29 121 163 +5740 PF05912 DUF870 Caenorhabditis elegans protein of unknown function (DUF870) Moxon SJ anon Pfam-B_8400 (release 8.0) Family This family consists of a number of hypothetical proteins which seem to be specific to Caenorhabditis elegans. The function of this family is unknown. 27.10 27.10 27.20 27.50 26.60 27.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.63 0.71 -4.27 6 55 2009-01-15 18:05:59 2003-05-02 11:45:43 6 4 5 0 55 53 0 106.80 24 63.69 CHANGED pFpAtIcCslstsaWCG-LYllEcDshh....................................................................HDlLppcKFCTSEppKph+FTVsPtsDF.oscaEhsYhhNHNCTADGcshCVKPpcopcVssast.+oVcFsI-AtsNGcsppCcsP ............t..th.lpCs.........t.....hWCuplhlaE.Dh.h.h...........................cDll.t.p.p.pFCo..s.pt.p....c...p...a.ca..s.h...p...s......suD....h..o..................spYEhshhlpHNCos.sG..p.h....h..Cl.p...p...h.s.h.....t.h.................................................... 0 15 19 55 +5741 PF05913 DUF871 Bacterial protein of unknown function (DUF871) Moxon SJ anon Pfam-B_8510 (release 8.0) Family This family consists of several conserved hypothetical proteins from bacteria and archaea. The function of this family is unknown. 21.50 21.50 21.50 21.50 20.80 21.40 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.62 48 1290 2012-10-02 15:38:38 2003-05-02 11:49:29 6 2 773 2 132 806 4 344.50 34 98.71 CHANGED tLGlSlY.scs.thccscpYlchupphGFsclFTSLh.sc...sst.cphhpphpclhphApphshclhlDlsPplhppLsloapsLshhcc.hGlsGlRlDhGFospphAphops..slcIpLNhSs...........spchlcplhshpsshsplhusHNFYP+..TGLuhcaFhcpschh+chG..l.couAFlsup.ssppGP..lh-...GLPTLEpHRths.hspspcLhtsshIDclllGsshsScc-Lcpluph.pcphhpLclp...shs-h-tcll.cphHppRsDsuc.VlRSspoRh.....hht.ppsh.sp.ss.s.hp+GslsIDNptYG+YpGElQIshp-hssct+sNVVGclhc--l.LLchIcPhpcFphhtp .....................hlGlSlY.pcs....hpcsctYlchutchGFpplFTS.Lh.sp....ccp.cphhpcapcllshApphshclllDlsPslhcpLshoap......s......L.p...........a.pc.hGhhGlRlDhGho.s..pp.uthotp..slK.IpLNhSs...........spphl.cslhshps..shs...p..LhuCHNFYP+..TGLuh-aF.cpschh+chu..l.p.o.hAFlsup..s.sph.G..P...lpc...GLPTLEpHRph..p...hst..uccLht..oshl..D-VlIGsshsSccplcplup..h..pcp..hhpLclp....tths-hppphl.h.p.p.hH.........h.RsDhsc.llRSs......poRh...................hhp..ppsh.....sp..po...t.t.hp+GslsIDNptaG+YpGElQIslp-..hss.cs.+sNVVGpIhc--l.LLshlp.sh.ppFphh.p................ 0 42 78 105 +5742 PF05914 RIB43A RIB43A Moxon SJ anon Pfam-B_8571 (release 8.0) Family This family consists of several RIB43A-like eukaryotic proteins. Ciliary and flagellar microtubules contain a specialised set of protofilaments, termed ribbons, that are composed of tubulin and several associated proteins. RIB43A was first characterised in the unicellular biflagellate, Chlamydomonas reinhardtii although highly related sequences are present in several higher eukaryotes including humans. The function of this protein is unknown although the structure of RIB43A and its association with the specialised protofilament ribbons and with basal bodies is relevant to the proposed role of ribbons in forming and stabilising doublet and triplet microtubules and in organising their three-dimensional structure. Human RIB43A homologues could represent a structural requirement in centriole replication in dividing cells [1]. 25.00 25.00 32.00 25.30 24.50 20.20 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.17 0.70 -5.83 13 202 2009-09-10 21:00:25 2003-05-02 12:53:47 7 4 109 0 131 193 4 312.80 31 95.74 CHANGED Ma+lcls...D.cEstAlE+RRstEpERpsRIFNu+sRshGVDlcALcpQVpE+KhpEpsE+p+-cuYsspphp.DclsthhccEpcpcp+pLs+clppFRppaQ+hEspREFDL..DP-sl+K.shPsRluD-Dsp...hGsSShQpFpGEDLppssR++hQpEQ.RpWlcpQhpE+ppActpc+pADplh-phthphDpRAtcLuphEcpsRptlppAspcaNcA.Accpptccppc+pp-p--NhAEIpNploSDhLTENPsVA....pSshuPaRVlstpWKGMoPcQlpsIRcsQ-pQhpEKccpRppEpth-tpWpppphphuRAhhhLEcpccRhp+ph+cpLsphNppLAtEQ..cAppcaLpcplYsNpPTspYapQFNToSR .........................-.....tltt++ph-.pRppRhhss+.RhhGlDhpsLptQlt-+c.pcthEp.tcptthstp.ht.cplhthh-pctpcpp+tlp+tl.ppap.pp.h.Q.p.cppREaDL..DPtthp....................K.thPsc.us..sDsp....u.SuhQh.....F.GEDLshtpRp+hQpcQ.RtWh.pQ.pEppptctpcctt.-.thhsptphph.-ppttcltph-ppsRpthttshtpaNps.....Atcptpccppc+pp-pcsshsEItp.lp.uD.hLoE.sPp..A....tst..hu...s.pR..ll..s...paKGMo.-Qhptlpp.hQppQhp-p...tph.cp..t-pth-tpWp.ppphp.s+shh.h-cpppc.ppph.pctls...p.N..pLApcQ..ptppp.h.t.lh.p.NtsstpaatQFspssR......................... 0 54 63 92 +5743 PF05915 DUF872 Eukaryotic protein of unknown function (DUF872) Moxon SJ anon Pfam-B_8741 (release 8.0) Family This family consists of several uncharacterised eukaryotic proteins. The function of this family is unknown. 22.10 22.10 22.10 22.20 21.20 22.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.77 0.71 -4.22 13 281 2009-01-15 18:05:59 2003-05-02 12:57:19 7 3 132 0 171 286 2 107.10 29 77.02 CHANGED Y..psLss.D-D.............hcs...........pFspssss.p+ss...............h+sIhhAhhLLllGshLllhGhhlhss..t.ussspuhshhlhGhlhFlPGhYastlhahAh+Gh+GasF.plPsF ....................................................................................................................ht.........p.................................hKsIhlAhhLhllGs...h.LlllGshlhss....h.ht.....sssspuh.shhllGhLhFlPG.hYalh.lhYhA.h+Gh+GaoasplP.a.................... 0 52 86 126 +5744 PF05916 Sld5 DUF873; GINS complex protein Moxon SJ, Studholme DJ anon Pfam-B_8759 (release 8.0) Family The eukaryotic GINS complex is essential for the initiation and elongation phases of DNA replication [1-3]. It consists of four paralogous protein subunits (Sld5, Psf1, Psf2 and Psf3), all of which are included in this family. The GINS complex is conserved from yeast to humans, and has been shown in human to bind directly to DNA primase [4]. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.75 0.72 -3.76 176 1118 2010-01-07 10:55:00 2003-05-02 13:00:16 6 12 372 22 798 1076 14 113.40 15 50.89 CHANGED sclPhahut..Lhc....................pshspl..p..phhshc..hpthptp...............................tps.t..hplt...................................................................hthsph..........................p.+p.........................hlps..hh....ctRhtKlpphshph.......................................ttstthh..spLottEh ..............................................................................................................................................plPhahu....L..hp................................p...hh..pl.......phhphp...hpthpt-...............................tpp....hslp..................................................................................ahp.h.sph.h......................................................cch+p.............................................hlps...hh.ctRhpKlpphs.hph.............................................ttstthhptLs..E....................................................................... 0 230 409 648 +5745 PF05917 DUF874 Helicobacter pylori protein of unknown function (DUF874) Moxon SJ anon Pfam-B_8835 (release 8.0) Family This family consists of several hypothetical proteins specific to Helicobacter pylori. The function of this family is unknown. 21.90 21.90 22.10 22.10 21.60 21.80 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.50 0.70 -5.61 2 170 2009-01-15 18:05:59 2003-05-02 13:02:31 6 4 43 0 11 184 0 203.70 45 100.99 CHANGED M.sl+sh........s.hAshhhph.pphKphpshpNlhhSlhGh..thhctl+s.lKKp.KKSs........hhCtpsKphDDhl......tp+.N..sWap.u.GlThhsuhLhssC.AsDpsKp.EltQtpKEAENARDRANKSGIELE.......QE+QKTpp.............KsEQE+QKTEQEKQK...........tspstIclEQppQKT.ppppch.ppQKDhlpcsEQNCQENHNQFFIKKLGIKuGIAIElEAECKTPKPsKTNQTPIQPKHLPNSKQPHSQRGSKAQEhIAYLQKELE.LPYSQKAIAKQVsFY+PSSIAYLELDPRDFpsTEEWQKENLKIRSKAQAKMLEMRsLKPDPQAHLsTSQSLLhVQKIFADVsKEIcsVANTEKKsEKAGYGYSKRM ...............................................................................................................................................................................Iclt.......QEpQKT.....................................................................................................................................pt.......p..................................................................................................................................................................................................................... 1 10 11 11 +5746 PF05918 API5 Apoptosis inhibitory protein 5 (API5) Moxon SJ anon Pfam-B_8916 (release 8.0) Family This family consists of apoptosis inhibitory protein 5 (API5) sequences from several organisms. Apoptosis or programmed cell death is a physiological form of cell death that occurs in embryonic development and organ formation. It is characterised by biochemical and morphological changes such as DNA fragmentation and cell volume shrinkage. API5 is an anti apoptosis gene located in human chromosome 11, whose expression prevents the programmed cell death that occurs upon the deprivation of growth factors [1,2]. 22.20 22.20 22.30 22.40 21.90 22.10 hmmbuild -o /dev/null HMM SEED 556 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.78 0.70 -6.01 3 205 2012-10-11 20:01:00 2003-05-02 13:15:00 6 6 113 2 125 191 0 385.30 36 88.30 CHANGED pIEKLYEFsERLSESsDK.SQNV-DYEGIIKhSKTohKsKQLASQLIPRYFKFFPSLATEAFDAahDlhDDsDlGVRVQAIRGLPLFCKDTPDhlSKIlDVLVQLLNTEEPVERDAVHKALMSLlRQDTKASlTALFpHsusT.TTDEQIREKVL+FIRDKVlPLKGELLKPQcEMERHITDLIKKSLpDVTG-EF+MFMDFLsSLSIFGGKAP.ERMQELVEIIEGQADLNup...FphsDs-hl-RhIpChphAhPaFuRGAPSS+FLsYLNK+IlP..sFDpL.....PEERKLcLLKALAEMSPYTTAQ-.ARQlLPSIVpLLh.YMP........htcTs.phpFoasECLLYAhHpLu+KsPNATNSLCGYKIVTGQPSDRLGEDFSEhNKDFTERLThVEDLTKATMKKLTQGMoEHsKAMSsAKTDEEKuplKTK+.QsTTTGLRTCNNILAMTKsLatplPsFp+DhslsLSWhVssp....shlh++HtshoFh.............s.hspspohhGKRPA...NGuGNNV.uAKKuRsSN..QpQlVNKuuEGIS+sGu..SatGRGRsRGpGR+u..GGGRGRGp.sRGFW ......................................................................................t....lhp..thLsp.u.t..-....pp........p.tYp.IlpusK..u.s.sK.tKpLAuQhIs+FFppFPpLuppAlsA.hDLsE..D--...............h..t.lRhQAI+tLP.hsps......-.hs+luDlLsQ....LLt.......s.--ssEhphVppuLhslh+.DsKuoLsulFppl............p...s.-.....-.......hRE+....slpFlt..sKlhsh...phhp....pEhE..chlht..K.K.sL.p.D..VT..........u.pEF.hhM.phLtshp....ht.....shpuhpp.Llcll.p..QApLptt................hpss...Ds-...pl-Rhlp.Cht.A.hPhF..........sc.sspS...opFl.sahsc.p.llP...hstl...........................phpL..clLKhhAEhssa.......s.......s.s.-.....pp.l....lhphLhc.YMP.......................tt.p.p.t.....phpFoaVECLLashH.pLu++.hPs.ht.h....................u-phKDFp.R.....Lphhtc...hhp..shl+pL...p.uh...............pscstptl...K....scc..phphsuL+hssNI.slh+sLac.sP.ahss....ls.LSWh..tp..............................................................................................................................................................................pt........................................................................................................................................ 0 46 63 92 +5747 PF05919 Mitovir_RNA_pol Mitovirus RNA-dependent RNA polymerase Moxon SJ anon Pfam-B_9906 (release 8.0) Family This family consists of several Mitovirus RNA-dependent RNA polymerase proteins. The family also contains fragment matches in the mitochondria of Arabidopsis thaliana. 25.00 25.00 27.70 27.30 18.60 18.60 hmmbuild -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.78 0.70 -5.55 14 122 2012-10-02 12:54:00 2003-05-02 13:27:16 6 2 72 0 14 89 0 240.50 44 75.64 CHANGED hphc.psphhsh.......+uGP.......suhShhoslhchhuhhtps..hphlphl...................h...h.hlsahpph..hhpsh...................hhsphtsh................lGKLulhc-stGKsRlhAhsDhhoQhlLpPLHshlFshL+plPp.DtTFsQp.shshhppchppt......aaShDLSAATDRhPlslQpclLshlhus.phupsWtsLLlsRsY..........tssss.lpYuVGQPMGAhSSWAhhsLoHHllVphuAh+s..ths.FscYlILGDDIVItscpVAppYhplMsc.LGVcIS.sKohlS..psoaEFAKRaht.stt-lSslslp..........uIlpslpp.hthhhs.lhchht+Ghp.......phls.h.ch.t..hlhh....tt.hhhsthhh....ht.......slphtpsllshpphhphltphh............t..hhs.thh................hshlhthlphshstphpslhhcphsphhhh.sphpsh.h.........hpp.pch.ph......L.tuh.hshh.plpchhsthhpptshsl.thhpshh...hshcsl ...................................................................................................................................................................................................................................llAMhDahoQhhL+PlHstLFphL+clPQ.DRTFsQssh.ch.ssccs........haShDLoAATDRFPIslQcclLhhl.............auc..phAsuW.tsllVpcta............hp.ths.lpYsVGQPMGAaSSWshFTLSHHllVphsAhhs...thspFp-YllLGDDIVIpNcpVAKpYlplMsp.LGVslScuKohVS....c.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 4 8 14 +5748 PF05920 Homeobox_KN Coprinus_mating; Homeobox KN domain Moxon SJ anon Pfam-B_4610 (release 8.0) Family This is a homeobox transcription factor KN domain conserved from fungi to human and plants. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.08 0.72 -4.25 99 3978 2012-10-04 14:01:12 2003-05-09 13:11:53 6 46 463 5 1329 4786 10 40.00 54 12.00 CHANGED WLhpphpp..PYPoc...p.....pK..........p..pL......uppT..ulohp..........Q.........lss.....WFINAR+R ........................................WhtpHh..ca...PY.....PoEs.........................-K..............................h...tL......App.T..GLs..p.................................Q......................................lsN......W......F.INtR+R....................... 0 325 626 968 +5750 PF05922 Inhibitor_I9 Subtilisin_N; Peptidase inhibitor I9 Yeats C anon Pfam-B_52 (release 8.0) Domain This family includes the proteinase B inhibitor from Saccharomyces cerevisiae and the activation peptides from peptidases of the subtilisin family. The subtilisin propeptides are known to function as molecular chaperones, assisting in the folding of the mature peptidase [1], but have also been shown to act as 'temporary inhibitors' [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.73 0.72 -3.41 160 3390 2009-01-15 18:05:59 2003-05-19 17:25:16 11 124 1056 23 1630 3258 93 81.50 20 11.78 CHANGED pYIVhhcpt.........................................hstsp......hssttphhp...............................................tsslhpsYct.......shsGauupLspppl.cplp.ppPsVthlp.Dphhplp ..........................................................................................................YIVh.hppt........................................................t.tp.................tthtphhp....................t...............................................tttsplhps...Ypp..............shsG......au.u.p.l......s.p....p.p.....l..cp.l........p......p..p..Ps....V.t.t....Vp.-phhph........................... 0 332 967 1374 +5751 PF05923 APC_crr APC cysteine-rich region Yeats C anon Yeats C Motif This short region is found repeated in the mid region of the adenomatous polyposis proteins (APCs). In the human protein many cancer-linked SNPs are found near the first three occurrences of the motif. These repeats bind beta-catenin [1]. 19.90 19.90 19.90 19.90 18.50 19.80 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.34 0.73 -6.92 0.73 -4.15 30 697 2009-09-11 05:34:41 2003-05-20 13:04:25 7 65 69 3 367 643 0 25.90 45 5.86 CHANGED s-ssppas.sEcTPhs.FS+so..SlSSLo ..........-s.ppas.lEsTPhs.FS+so...SLSSLo.. 1 42 74 178 +5752 PF05924 SAMP SAMP Motif Yeats C anon Yeats C Motif This short region is found repeated in the mid region of the adenomatous polyposis proteins (APCs). This motif binds axin [1]. 19.60 19.60 19.70 20.20 19.50 19.40 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.28 0.73 -6.64 0.73 -4.65 11 249 2009-01-15 18:05:59 2003-05-20 13:43:35 6 48 46 2 126 209 0 20.40 55 2.10 CHANGED ---.ElLpECIsuAMP++pp+ ...s-D.-lLpECIsSAMP++pp+.. 0 7 18 54 +5753 PF05925 IpgD Enterobacterial virulence protein IpgD Moxon SJ anon Pfam-B_1987 (release 9.0) Family This family consists of several enterobacterial IpgD like virulence factor proteins. In the Gram-negative pathogen Shigella flexneri, the virulence factor IpgD is translocated directly into eukaryotic cells and acts as a potent inositol 4-phosphatase that specifically dephosphorylates phosphatidylinositol 4,5-bisphosphate [PtdIns(4,5)P(2)] into phosphatidylinositol 5-monophosphate [PtdIns(5)P] that then accumulates. Transformation of PtdIns(4,5)P(2) into PtdIns(5)P by IpgD is responsible for dramatic morphological changes of the host cell, leading to a decrease in membrane tether force associated with membrane blebbing and actin filament remodelling [1]. 20.70 20.70 21.10 20.80 20.00 20.40 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.80 0.70 -6.29 2 189 2009-01-15 18:05:59 2003-05-20 14:51:26 7 3 152 1 10 130 1 503.80 70 94.95 CHANGED MpIpsh.hHpsShpot-uaKuhpcT...pGhpllShQthtsuc...........Rp.GsphhN..YLQpQ.TS.puht.LashcRshlhhtAhhL.GtpsshlpuMhpQh-hhKl.............ucs.K+Nl.EL...IutchptQ.Gl.sKct...htsth+ph.sp.LNNpsWpsIppslsaNG+pYs.p.hPAucMKIGsKsIFsptYpGKGlCshsT+p.+HhsNhWhSpV.Vc--GK-..lFsGIRHGVlSsYt.cKss..RtVuAcNKAcElhoAALaS+.ELLspALuGcsVsLKlVSsuLLTsoslhGtEtoMlcDQh+AhppL.op.Gc.h+LhIRNpDG.LQpVKlp.cVssFNhGVNELALKhGhGh+ssDphNuEulhpLLGsshh.puh.GGWsuEhltp.PsshpsV.hLApQIK-Ihppp.p+pDsGEPYKLuQRhAhLAapIsAVPsWNCKSGKDRTGM.DuEIKREIIphHpTt.hSt.sSh.sSttp+lFpplLhNSGN.t.pE.NTGssGNKVMKpLs.psLpLSYtcRlGD.pIWp.VKGhSShl ........................................................................................................MpIpsh.hHpsShpot-uaKuhpcT.hapGMQILSGQGKAPAKAPDsRPEIIVLREPGATWGN..YLQHQKTSNHSLHsLYNLQRDLL...TVAATVL...GKQDPVLTSMANQMELAKVKADRPATKQEEAAAKALKKNLIEL...IAART.QQQsGLP.AKEAHRFAAVAFRDAQVKQLN...N..QPWQTIKNTLTH..NG..H.....H...YT..NTQLPA.AEM.K....I..GAKD..IFPS.AYpG.KGVCSWDT+NIHHANNLWMS.........TVSV.H.E.DGKDKTLFCGIRHGVLSPYH.EKDPLLRpVGAENKAKEVLTAALFSKPEL..LN...+AL...p...GEA...VSLKLVSVGLLTA...SNI.....F......G...........K.EGTMVEDQM..RAWQSL.TQP.GKM.IHL.....KIRNKDGD....LQTVKIKPDVAAFNVGVNELAL.K.....L.G...F..GLKASD+YNAEALHQLLG..N.........DLRPEARPGGWVGEWLAQ.......Y.........P.D..N.....Y...E......V.....VNTL...ARQ...IKDIW.KNN.HHKDGGEPYKLAQR.LAML......AH......EI.......D....AVPAWNCKSGKDRTGMMDSEIK.REhISLHQ..THM..LSAPG.S.LPDSGG....QKIF.QK..V..L..LNS...GNLE.IQKQNT.GGA.GNKVMKNLS..PEVLNLSYQ.KRlGDENIWQSVKGISSLI.......................................... 0 5 6 8 +5754 PF05926 Phage_GPL Phage head completion protein (GPL) Moxon SJ anon Pfam-B_1860 (release 9.0) Family This family consists of several phage head completion protein (GPL) as well as related bacterial sequences. Members of this family allow the completion of filled heads by rendering newly packaged DNA in the heads resistant to DNase. The protein is thought to bind to DNA filled capsids [1]. 19.80 19.80 21.70 20.60 19.60 19.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.51 0.71 -4.35 33 714 2009-01-15 18:05:59 2003-05-20 15:03:08 6 3 529 0 73 507 5 136.30 41 87.08 CHANGED sIsNs.GFWPDlsltchRcthRl-uolsspRLcpAllsAhupVNs-Ltsa+spppuuGassLusVPu.spls.GcuhhltpYcRAVashA+AsLhE+YpshDsTss.GpcpAcclspohs-LhRDucaAIpclhGpsR.ssVELI ..................................................IpNs.sFWPDlslp-hRpthRl.sus.Vo..s..sRLppshhuAlucVNsELhpa..+ppppst..GassLA-..V.......PA......s.........p.l.............s..G........cShplhaYppAVashA+A.LhER.YpshDsTsp..G...s++u....-plspsts-LWRDs+WAIucltstP.R.shV-LI.............................. 1 8 29 51 +5755 PF05927 Penaeidin Penaeidin Moxon SJ anon Pfam-B_2675 (release 9.0) Family This family consists of several isoforms of the penaeidin protein which is specific to shrimps. Penaeidins, a unique family of antimicrobial peptides (AMPs) with both proline and cysteine-rich domains, were initially identified in the hemolymph of the Pacific white shrimp, Litopenaeus vannamei [1]. 25.00 25.00 25.50 26.30 20.30 20.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.98 0.72 -3.44 8 55 2009-01-15 18:05:59 2003-05-20 15:26:31 6 1 11 2 0 54 0 72.10 65 97.04 CHANGED MRLVVCLVFLASFALVCQGQuYKuGYTRPlPRP.......Pa.....G.tshtsh.slC.suC+tLohSpARuCCsRLGRCC+htKG ................................MRLVVCLVFLASFALVCQGQsY+GGYTRPlPRP...........Pa.....G..t.....Phhsh.suC.suC+..sIohSpAR.uCCpRhGRCCHlhKG.. 0 0 0 0 +5756 PF05928 Zea_mays_MuDR Zea mays MURB-like protein (MuDR) Moxon SJ anon Pfam-B_3145 (release 9.0) Family This family consists of several Zea mays specific MURB-like proteins. The transposition of Mu elements underlying Mutator activity in maize requires a transcriptionally active MuDR element. Despite variation in MuDR copy number and RNA levels in Mutator lines, transposition events are consistently late in plant development, and Mu excision frequencies are similar [1]. 20.80 20.80 21.20 23.40 18.10 19.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.37 0.70 -4.90 4 19 2009-01-15 18:05:59 2003-05-20 15:35:39 6 1 2 0 1 21 0 203.00 85 94.63 CHANGED MDhPPSKVVADAVEAARAAAVAASEARCAVFVAEKEAKAAVQFAAIAVDKVEAVKASSNVDLVDFKYHVNIKNSLRYAIQEMRRQoKLLHSVQKLCSTIPEVQGGKIGKVRGHLEHVCKELDKTSIVCEEDLETKNPTWDLYDNPSVDDEHPLDDDELGDGYSTEDPELWEMVF-DFKWEEIKANVSFEEHYRVINYRFEEINDRNM .......MDhPPSKVVADAVEAARAAAVAASEARCAVFVAEKEAKAAVQFAAIAVDKVE....AVKA......SSNV...DLVDFKYHVNIKNSLRYAIQEMRRQoKLLHSVQKLCSTIPEVQGGKI.GKVRGHLEHVCKELDKTSIVCEEDLETKNPTWDLYDNPS.V.DDEHPLDDDELGDGYSTEDPELWEMsFEDFKWEEIKANVSFEEH.RVINYRFEEINDRNM.............. 0 1 1 1 +5757 PF05929 Phage_GPO Phage capsid scaffolding protein (GPO) serine peptidase Moxon SJ anon Pfam-B_1730 (release 9.0) Family This family consists of several bacteriophage capsid scaffolding proteins (GPO) and some related bacterial sequences. GPO is thought to function in both the assembly of proheads and the cleavage of GPN [1]. The family is found to function as a serine peptidase, with a conserved Asp, His and Ser catalytic triad, as in subtilisin, and as represented in MEROPS:S73. The family includes SwissProt:P25478 from Enterobacteria phage P2 which cleaves itself and then becomes the scaffold protein upon which the bacteriophage prohead is built - a mechanism quite common amongst phages [2]. 21.50 21.50 22.80 22.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.87 0.70 -5.15 5 734 2009-01-15 18:05:59 2003-05-20 16:35:53 6 3 536 0 77 566 6 255.80 42 93.46 CHANGED SKaFRIAVEGATTDGRsIpR-WI--MAAoYDPcVYGARINLEHIRuhLPDGsF+AYGDVTALKAEEI-.sG..sGKLALFAQIEPT-DLlslNKu+QKlYTSMElsPKFADTGKAYLVGLAVTDsPASLGTEhLoFu.toAKsNPLAsRKQNP-NLFTsAEEssLEFEE.....lsE..oVssuLhs+VKsLFs+K-ASD...DARFuDVpEAVEsVAEHVQs..utTEppLuEtE+Ahot.cQplsspt-cpspsFscLKsoL-+T-uhuQptRPsATGGGu..slLTDC ....................KhFRluVEGsT.sDGRpIptpalppMAcoYsP.p.V.YsApINlEH........h+..u..hh...P...s...u....F.p.paG-VsuLp.uE..-..Is-..ss...LtGK..hALaAclpPT....-c.Llph.s.+.puQKlaTShE..lpP..p.FAsTG+AYLVGLAsTDsPASLGTEh.LpFs.....tps.p........p.ss.......l...s...sp........+.....t..s.s...t..s....L...h.os..A....-s.t..l..E..h....p-................tp...p.t.s.sh.hs...+V.p..ul......h...s...+...pptuD....................DApF..s...cl..pc....A......Vp.h..VApp.p...p...h.s...th...pttLs-.c..pp.........t......cptl............p.....t......p....pthstL...pppLp...p.p-u....p.......R...pATG.usu....s.hss............................................................................................................................................................... 0 10 33 56 +5758 PF05930 Phage_AlpA AlpA; Prophage CP4-57 regulatory protein (AlpA) Moxon SJ anon Pfam-B_2048 (release 9.0) Family This family consists of several short bacterial and phage proteins which are related to the E. coli protein AlpA. AlpA suppress two phenotypes of a delta lon protease mutant, overproduction of capsular polysaccharide and sensitivity to UV light [1]. Several of the sequences in this family are thought to be DNA-binding proteins. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.25 0.72 -4.24 23 2885 2012-10-04 14:01:12 2003-05-20 16:46:44 7 8 1195 1 461 2198 442 49.80 31 65.72 CHANGED pRhlRhtEVhphsGluRosIYchhc......cs.cFPppl+LGsRuVuWhpuEl-pWl ............................hlchtplh.phs.G...l..o.cs.tl.Yc.h.lp.........cG..pFP..p.s.l.+..l...G...............R...s.....s..s.......W.h..poElctWl.................... 0 82 241 355 +5759 PF05931 AgrD Staphylococcal AgrD protein Moxon SJ anon Pfam-B_2868 (release 9.0) Family This family consists of several AgrD proteins from many Staphylococcus species. The agr locus was initially described in Staphylococcus aureus as an element controlling the production of exoproteins implicated in virulence. Its pattern of action has been shown to be complex, upregulating certain extracellular toxins and enzymes expressed post-exponentially and repressing some exponential-phase surface components. AgrD encodes the precursor of the autoinducing peptide (AIP).The AIP derived from AgrD by the action of AgrB interacts with AgrC in the membrane to activate AgrA, which upregulates transcription both from promoter P2, amplifying the response, and from P3, initiating the production of a novel effector: RNAIII. In S. aureus, delta-hemolysin is the only translation product of RNA III and is not involved in the regulatory functions of the transcript, which is therefore the primary agent for modulating the expression of other operons controlled by agr [1]. 20.50 20.50 20.70 20.80 19.60 19.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.30 0.72 -4.33 21 248 2009-01-15 18:05:59 2003-05-20 16:54:19 6 1 222 0 8 56 0 45.10 49 97.59 CHANGED hplhshlhchhstlFphIGslAuhssCsuaFDEPEVPcELTcLac .........ppLhNhFh-hlsslhcsIG.lAuhssCshlhDEsEVPcELTpLaE.... 0 2 2 8 +5760 PF05932 CesT Tir chaperone protein (CesT) family Moxon SJ, Bateman A anon Pfam-B_2921 (release 9.0) Domain This family consists of a number of bacterial sequences which are highly similar to the Tir chaperone protein in E. Coli. In many Gram-negative bacteria, a key indicator of pathogenic potential is the possession of a specialised type III secretion system, which is utilised to deliver virulence effector proteins directly into the host cell cytosol. Many of the proteins secreted from such systems require small cytosolic chaperones to maintain the secreted substrates in a secretion-competent state. CesT serves a chaperone function for the enteropathogenic Escherichia coli (EPEC) translocated intimin receptor (Tir) protein, which confers upon EPEC the ability to alter host cell morphology following intimate bacterial attachment [1].\ This family also contains several DspF and related sequences from several plant pathogenic bacteria. The "disease-specific" (dsp) region next to the hrp gene cluster of Erwinia amylovora is required for pathogenicity but not for elicitation of the hypersensitive reaction. DspF and AvrF are small (16 kDa and 14 kDa) and acidic with predicted amphipathic alpha helices in their C termini; they resemble chaperones for virulence factors secreted by type III secretion systems of animal pathogens [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.52 0.71 -4.26 52 1366 2012-10-01 22:01:34 2003-05-20 17:02:43 8 4 525 41 194 749 35 112.90 19 76.61 CHANGED pLlpphuppluh...slshscsusssLth..spthhhlhhsppsspLllts.luphssstt......hhpplLphNh.h.tphtsshlulc.pssplhLptphshpt...lsts.phpshlpthlstscphp ......................................................hlpphupplsh...slt...h..s..p..s.shhtlhl....sp.hhhh.h..h.s.t..p...sp..h...l.hlhshlst.s.sss.s..........hhtplLph.N.h.h.tppssstlu.hs.pstpllLhhphshsp...hssp.pltshlpshlpphcth............................ 1 71 103 148 +5761 PF05933 Fun_ATP-synt_8 Fungal ATP synthase protein 8 (A6L) Moxon SJ anon Pfam-B_2993 (release 9.0) Family This family consists of fungus specific ATP synthase protein 8 (EC:3.6.3.14). The family may be related to the ATP synthase protein 8 found in other eukaryotes Pfam:PF00895. 24.20 24.20 24.30 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.34 0.72 -3.93 18 148 2012-10-02 21:03:42 2003-05-21 09:35:49 8 1 141 0 36 135 596 47.90 49 86.49 CHANGED MPQLlPFYFlNplsauFlllolLlYlhSpYILPphlRLalSRhhIs+L ..MPQLlPFYFhNQlsauFlllslLlYlhSpYILPphlRLaloRhaIsKL......... 0 12 25 31 +5762 PF05934 MCLC Mid-1-related chloride channel (MCLC) Moxon SJ anon Pfam-B_2711 (release 9.0) Family This family consists of several mid-1-related chloride channels. mid-1-related chloride channel (MCLC) proteins function as a chloride channel when incorporated in the planar lipid bilayer [1]. 25.00 25.00 26.20 25.40 24.00 23.90 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.21 0.70 -12.78 0.70 -6.21 2 95 2009-01-15 18:05:59 2003-05-21 09:46:09 6 3 52 0 42 97 0 343.90 40 92.71 CHANGED hpLhLh.sLhLsssYu.asD-WIDPoDMLNYDAASGpM+p.....................oQt.hthusEhc.sPDhoCusEhpEhhpKL-sLphpl-..EpK+hEc.cSpSpslF+RYLNKILIEAG+lGLPDEshschHYDAEllhphEhL.EIQpFLNstDWpsGALDDALSshLlpFKaHs.EpWKW+FEDSFGVDsYslhMllLClLClVhLlATElWTaltWaTQL+RlLIlShlhShGWNWMYLYKlAFAp+QAElAKhpshsp.CupKhsWotSla-Wh+uuhTapsDPCpcYachLlVsPhhhVPPTKALAlTFTsFlTEPLKHIGKGhGEFlpALhpEIPhhhplPVLIhhAlhlLuFhYGAGpuV....plhR+lsGPEpE.P.slcPpcppRpc.I-..........Dh+h.sthG...............pl.R-pDV..........spl.+t.slsDh-upppPsV.......shsscPp-TGtlhupsTsc..spEspp.tKsl..StpDppsNTEus.At.p..........................Rsp-PVspsht ..............................................................................................................................................hlh...h..hh...s..t...sD-.WlDPtDMLNYDusotpM+p..............................t.......p.....s....t.......s.p.tp...h..pl..h..pht..ppp.pht.p..ps............pst..lF+RaLp+hL.-ht+hGhspt............YDsplhhphp.h.El.thlttt.t.hp..uslspuls..ph..Lh...ph+.ps....ptWpWpFEs.Fsl.-..shhhlh...lslLhlVhll.s..TplaohltW....ahQ.lpRl.hhlsFlhShhWNWhaLYKhAaAp+...pspl...s..Kh.t..hsp.Cs..cc.hsW....tslh..-ahptth.Th..ps.DsCpcYaEhLlVsPhh.VsP.oKALulThTsFlsEPL.KalGpGhuEFlcuLh+-lPh.hhp.lP.VLl...h..hslsl.lshhahss.th....................................................................................................................................................................................................................................................................................................................................................................................... 0 10 12 25 +5763 PF05935 Arylsulfotrans Arylsulfotransferase (ASST) Moxon SJ anon Pfam-B_3266 (release 9.0) Family This family consists of several bacterial Arylsulfotransferase proteins. Arylsulfotransferase (ASST) transfers a sulfate group from phenolic sulfate esters to a phenolic acceptor substrate [1]. 24.20 24.20 24.60 24.40 24.10 24.10 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.83 0.70 -5.99 33 999 2012-10-05 17:30:42 2003-05-21 09:52:45 6 8 517 6 144 1072 349 451.30 30 83.36 CHANGED hlhlNPY...ssuPLoAllthsspphsslploVpsK.tpsuhsIsasss....ppthps+s..........lPVhGLYscatNpVplphsp..sGcs...hpcshplpssslssthsst..........pspshK.sssthtspLYhlss..................................s.pshsahlDspG-lRWahs.sthhstt............hcphpsGphhhsps...........pp.hhchDhhG+..hlpp.tcLsssa.............shpH-hhphsN.......GNhLltsup.p.h......s.......psscolcDhllElD.psGpllchWDlhclL..DPhRs..h...............................ptshsshsGss.su+sWhHlNultYDtpDDSlIlSuRHQsullKIs.....cspclpWIluspc..GWscp.......h.pphlLpPlct.G..h...............pusFcasasQHsshhlss....p.....lhlFDNGssRuhppsshs..............c..YSRuVcY+IDppshTVpQlWpYGKp+.G.chYSslsSss..-ahsc.sshhlhSusssh.............t.tt.........................lsEhchps...pcsth-hphpss.t.........sYR .......................................................................................................................................................................................h.l.hsPY..s.sPLoAhl....ts...h.ts.lplplhsp...tps..ltaphs.......h..sa...........lPlhG..........Lh.ph.Nplplphtp....uph......p.th...h.sssh..........................phps.p.sssthpstL.Yhhsp..................................t.hsshshhhDppGp..h..R..aa.h.s..sth.h.s.........................hp.p...h....p.G.t..hh.h.sts..........................pc.h.h.c..aD.h.h..Gp.....hlhp......hcL..s..pta....................................................p.h..p.Hs.h...pt.s.N...............G.s..hllt.s...u..p....shhp.........s.............................hphpT.lc.....D.t.I.l.....E.V.......D..p....s....G.....p..lV.c.....W...c.hh.clL......DshRs.s.hh....................................................................pp.hu..s..h...sG...su...ss+sW..hHlNSl.......s.......Y..........D...s......p......D...............D............u..IIlSuRHQsullpIs..........csp.pl.+..WIL.u..ssp...uW..ptt........h....tth..lLpPVctpG..l........th............tps..sFcaoa.sQHsuahlspt...............h.lhsF.DNG.ssR..uh..-p.Ps..h.s........................................hp..YSR..hVp.....Y...p...I....D.p..p..p..h..TV......p...plW.....p........Y...G........K..-........c....G................c.......a..Y...S.s.hsSss...ch.ss...sshhhatushs.h...............tp...s....................hlpEhp.tp.ppshhEhph.us.................................................................................................................................................... 0 48 86 119 +5764 PF05936 DUF876 Bacterial protein of unknown function (DUF876) Moxon SJ anon Pfam-B_3279 (release 9.0) Family This family consists of a series of hypothetical bacterial sequences of unknown function. 19.60 19.60 20.00 20.10 19.20 19.50 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.11 0.70 -6.03 124 1520 2009-01-15 18:05:59 2003-05-21 09:55:25 7 6 940 0 277 1100 65 410.80 30 94.64 CHANGED HFQQQ-Rah-thhctpspshssas..WGhppLp.lDpphLshG+lslppspGlhPDG.Thashstss.sh.PssLslss........................sstspl.VaLAL.Pltpssssp.ss.spt..............ssssRa..tspttcltDtpssssp.......................tplpluphslc...Lhhpp-..spssa...sslslARlhc..ps-GslhL.Dps.FlPPhLshtu.sshLtphlpclhshlptRucsLutRh.ssssp.s..usu-lscFhh...LpslNchtPhlpHl.hptsplHPEpLYppLhplsGcLsTFo........ps+psss.hPsYpHcshtssF.ssLhptLRphLssV..lpppshslsLp.pp...phuhhhuhlpDsplh.p..supFlLuV.+Ashssc.pLpppFs...ppsKlGus-clcpLVs.pLPGlsLpsLsssPp.plPh+sGhtYFpL.-.psushWpph.tpuuuhAlalsuph..s.sLcl-laAl+ ....................................HFQQQpRah-ahhppphpshs.sa...WGhsplp.lsp-hLs.G+ltlppspGlhPDG.ThFsh...Psps..sL.Psslslps........................sptssh.laLAl.Plhpssttp.hs.spp..................pthuRa..p.ptt-l..pDhputtts........................tslpluphpl+...Lhhpp-.......sps..u.a...hslPls.Rlhch...pss...GsltL.Dcs.FIPPhlshpu..ush............L.....t.ph...lpclhshlptRupsLupRh..ss.ssp.t......usA-Vu-Fhl...LphLNRhpshlpHh.tp.hstlHPEpLYppLspLsG-LhTFo..........s+tsss.hssYpHcs.tssF.psLhstl+phLssV..L.spslslsLp...ph...p.uhhhAslp.-spLh..ppusalLAV.+AshPs-.pLpppFs...tpsKluus-clcpLVsspLPGlsLpsLsssPp.plPa+sGhpYFpL..D..pputhWpph.tpuuuhAh+luGsFs..sLphpLWAl+.............................................................................. 0 46 113 193 +5765 PF05937 EB1_binding EB-1 Binding Domain Yeats C anon Yeats C Family This region at the C-terminus of the APC proteins binds the microtubule-associating protein EB-1 [1]. At the C-terminus of the alignment is also a Pfam:PF00595 binding domain. A short motif in the middle of the region appears to be found in the APC2 proteins (e.g Swiss:O95996). 25.00 25.00 53.00 53.00 19.50 21.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.20 0.71 -3.89 2 49 2009-01-15 18:05:59 2003-05-21 10:46:52 6 13 34 8 26 49 0 167.40 79 6.09 CHANGED RSGRSPTGNoPPVIDsV.-pup.p.csuKDsps+pNsGNGsVPh....LENR.pSFIpVDu.DpKGT-.Ksh.NN....pETNEsolsERTsFSSSSSSKHSSPSGTVAARVTPFNYNPSPRKSsu-soouRPSQIPTPVsNsTKKRDSKT-oT-SSGoQSPKRHSGSYLVTSV .........................RSGRSPTGNTPPVIDSVSEKGNsshKDS...KDNQuKQN.VGNGS..sPhRThGLENRLNSFIQVDuPDQKGTEsKPGQ.sNPVPsuETsESSlsERTPFSSSSSSKHSSPSGTVAARVTPFNYNPSPRKSSADSTSARPSQIPTPVNNNTKKRDSKTDoTESSGTQSPKRHSGSYLVTSV....... 0 1 4 10 +5766 PF05938 Self-incomp_S1 Plant self-incompatibility protein S1 Moxon SJ anon Pfam-B_3292 (release 9.0) Family This family consists of a series of plant proteins which are related to the Papaver rhoeas S1 self-incompatibility protein. Self incompatibility (SI) is the single most important outbreeding device found in angiosperms and is a mechanism that regulates the acceptance or rejection of pollen. S1 is known to exhibit specific pollen-inhibitory properties [1]. 21.10 21.10 21.20 21.10 20.50 20.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.90 0.72 -3.95 45 443 2009-01-15 18:05:59 2003-05-21 10:49:09 6 15 25 0 271 427 0 103.40 25 60.77 CHANGED spVhIhNcL....s.....ssps.Lt..lHCpSp..-cDLGhchlps....sppasapF.....css..hh.tsThFhCphph....sstt.ptpFclYpsppc.t........tp.shWps+c...DGhahhppthsh................phphsWp ..........................h.tlhlhNph.....s.....ssps...lp..lHCpSp..-.c.D...LG.p...h...lt.......upph.p.a.pF.......c.s....hh..tsThF.h...Cshpa.......st...........hphpFcsapsp.c.-.ttt.............tp.ChWplpc....-G....lYhhppp.t.t.......................................................... 0 78 141 181 +5767 PF05939 Phage_min_tail Phage minor tail protein Moxon SJ anon Pfam-B_3296 (release 9.0) Family This family consists of a series of phage minor tail proteins and related sequences from several bacterial species. 21.10 21.10 21.20 22.10 20.90 20.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.41 0.72 -4.10 33 1252 2009-01-15 18:05:59 2003-05-21 10:57:39 8 7 595 0 60 582 108 107.20 47 95.89 CHANGED hETFsW..pspsss.ssssp.pV+pspFGDGYpQpsusGlNscppsasloasGscsphts.....lpsFLcRHuGs+uFhWTPPhsshslah..ssphpssshuushhsloATF-QsFp ...............hcTF+W..cl+ssM..pVsopPsVppV+FGDGYpQRtssGL..NspL..+TYs.V.T.hp....V.s+.p-.sps.......L-uFLscH.G.G.h+AFLWTP.P.a.s.h+p.l+V....sC..t..p..W..Ss....p......s.uh....hh.s..phoApFEQVV.............................. 1 5 23 41 +5768 PF05940 NnrS NnrS protein Moxon SJ anon Pfam-B_3395 (release 9.0) Family This family consists of several bacterial NnrS like proteins. NnrS is a putative heme-Cu protein (NnrS) and a member of the short-chain dehydrogenase family [1]. Expression of nnrS is dependent on the transcriptional regulator NnrR, which also regulates expression of genes required for the reduction of nitrite to nitrous oxide, including nirK and nor. NnrS is a haem- and copper-containing membrane protein. Genes encoding putative orthologues of NnrS are sometimes but not always found in bacteria encoding nitrite and/or nitric oxide reductase [2]. 23.50 23.50 31.80 26.30 23.40 23.40 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.47 0.70 -5.56 122 829 2009-09-11 14:42:48 2003-05-21 11:06:32 7 3 628 0 253 740 124 369.60 30 94.48 CHANGED t.s...lhp.hGFRPFFLhuulauslulsl..Wlhh...h.sGths.hs........hsslhWHsHEMLFGFusAllsGFLLTAlts.WTGp.slpGtsLh.......sLhsLWLsuRlsh.hhs..........hshhlshhlDhuFlshhuhhlu+.lhpu.+p.....hRNlhhlshLhlhshsNhhha.....hthh.tts.shtttshpsulhhlshllsl..lGGR............lIPhFTpshlt........sttst....shs.h.l-tsslhsshhshht.....hhhs...shl.suslhhsAuslphlRhh+.WpshtshppP.LLahLHluYhalslGhhlhu....huhh........shs.shusulHslslGulGshlLAMhoRsoLGHTGRs..L.tssts.hshuashlhhAAlhRlhsshhh...shhhhhltluushWshAFulashpYsPh..Lh....pPRh ..........................h..slhp.hGFRPFFLhuulaAslulhl..Wlhh.....h..sGth.s...h.........hsslhWHsHEMlFGFusAllsGFLLTAlts.WTGh.ss..spG.tsLs.......sLsslWLsuRlhh..hhs......................hs...hhl..ss...hl...-...shFhh..hhuhhh....up..ll.t.u.+s........hRNh..h...hlshlhhhsh.ss.hhha.....hthh....tts.h..h.....htshpsul..hh.h..s.lllsl..lGGRllPhFTpptls..........................................t..thh.................shhh...h-thsl.hshlhhhlh..................hhhh..........s.l...suslhhsA...us..h.p.hh.Rlhc.Wpsh.tshpcP..lLhhLHluYhalslGhlhhu..................huth.......s.h...s.ssulHhhslGulGshhLuhhsRsuLGH.TG.Rs...l...hss.s..hthAahhlhhAAllRshushh......shh....hhhltluuslWshAFslashpYsPhLhpsR............ 0 68 149 209 +5769 PF05941 Chordopox_A20R Chordopoxvirus A20R protein Moxon SJ anon Pfam-B_3744 (release 9.0) Family This family consists of several Chordopoxvirus A20R proteins. The A20R protein is required for DNA replication, is associated with the processive form of the viral DNA polymerase, and directly interacts with the viral proteins encoded by the D4R, D5R, and H5R open reading frames. A20R may contribute to the assembly or stability of the multiprotein DNA replication complex [1]. 25.00 25.00 68.10 67.80 17.90 16.50 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.98 0.70 -5.69 12 69 2009-01-15 18:05:59 2003-05-21 11:13:31 8 1 42 0 0 58 0 326.60 57 77.69 CHANGED MoopsDLppLKELL+L+cslclu-ppsp-+YNuLV-WAopsYW+lulp+lssscsSIscYYpss+scs.FtLcsGcYhFLshpFGssalYh+G.shhELGSG.sthpIscch+shh-tllsc.sDlcFLRFVhF+ppWllEDshSchps.PhshLchAup.Glpsssalplc.lccsshFs--DYsslcphhtsh...csFhhsulChl+-Gs.cRpllDFhphsaspVcsI-LE.lssNhalPplITtsGpplLV+DlpHLlcS+s+hsoFVsV++h+shhlLs-psstss.E.o+uEsLpRIlcph.Gs-aFlN.GpYlSKl.sshslpplos+.LGl.hsCssl- ...MTS.SuDLoNLKELLsLYKSL+FSDuAAlEKYNSLVEWuTsTYWKIGV..pKVAslETSISDYY-ElKN...KP.FsI-PGhYIFLPlYFGoVFIYSKG.NMhELGSG.NohpIPD-hRSACsKVLcusssI-FLRFVLhN.NRWIhEDAlSKYpS.PVNIFKlASEYGLNlspYLcI-.IEEDTlFs-EhYulIE+uFc.....DpF.thSIsYIKhG-h+RpVVDFFKaSFMYIESIKl-+IG......DNlFIPolITKSGKKILVKDVDHLIRSKVREcoFVpVKKKNoFoILhDhsGsGo.E.TRuEVI+RII-.oI.GRDYYVN.GKYhSKV.GsAuLKQLoNK.LsI.s-CsTV-..... 0 0 0 0 +5770 PF05942 PaREP1 Archaeal PaREP1/PaREP8 family Moxon SJ, Bateman A anon Pfam-B_2248 (release 9.0) & Pfam-B_9342 (release 9.0) Family This family consists of several archaeal PaREP1 and PaREP8 proteins the function of this family is unknown. 21.00 21.00 21.00 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.45 0.71 -4.22 116 500 2012-10-01 22:14:54 2003-05-21 11:19:01 6 1 40 3 319 475 0 112.80 25 72.44 CHANGED s+LctAhh.hc.AtchLccG.htpAutKhapAhcthlpAlAtt.....h...hptcpphtttt.................hhptlsplhpphs.pplhhhhshAhs.LHtht..hh-sp..hshschpsppcshccll .............................hl..Ahh.hp.A.chLc....cG........hhpAutKhapAhcthlpALuht...t.h....hppctch.tpp................................hhptlsplhpc.lG.ttlh.hh.sh...Ahp.LHtht..hh-sphthschppptpshccl............................................................................... 0 102 136 238 +5771 PF05943 DUF877 Protein of unknown function (DUF877) Moxon SJ anon Pfam-B_2566 (release 9.0) Family This family consists of a number of uncharacterised bacterial proteins. The function of this family is unknown. 19.40 19.40 19.70 20.00 18.60 18.60 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.21 0.70 -5.99 95 1739 2009-01-15 18:05:59 2003-05-21 11:22:27 7 7 993 0 325 1173 101 394.70 43 83.71 CHANGED l-phIup..IDptlSpQ...lstIlHpscFQpLEuuWRGLcaLVppo-...sspslKIclLsloKcELhcDhc..cus-hsQSsLa+plYpp.EaGphGGcPaGsllGDYpFspssp......DlplLppluplAAuAHAPFluuuuPphFGh-sap-L.ssh+DlsplF.-ss-..YspW+uhR-oEDuRYlGLshPRhLhRhPYus.cssPlc...sFsacEp........ss..scccYLWuNAAauhAsplscuFscaGWsspIRGspuG....GtVps..LPsH........hapss.GshphKsPTEltIoDRREtELuc.GFIPLshtKsoDhAsFauupSlp....KPpha.........psppAssNu+LuupLPYlhssoRhAHYLK.VlhR-pIG..Sapp.pp-lEp.LNpWlppYVsssss.ssp-h+A+pPL+pAcVp......VpElsGpPGaYpsshhl+PHaQh-shsssLpLVucLs ......................................pthIAc..lDpplScQlshIlHps-FQplESsWRGLchLVppT-.....hccslKlcl.L..sloKc-Lhc.Dhc..pus..-hsQ.SslaK+lYpp...EYG.p.hGGEPhusllusYtFspos.......DlpLLptlupVuAsAHuPFIuusuPphhth.cSapEL.ss.+DLsplF...-ssc.YspW+uhR-S-DoRYlGLThPRhLhRlPYu....s.cssPVc...sFsatEp.l....................ss.....sHpcYLWuNuAauhAsplscSFtcauWC.stIRGspuG......GsVc..sLPsH........hatot..GtlphKhPTElhIoDRREhELAc.GFIPLshpKsoDhAuFFu.ApSlQ.....KPt.a.............pst-AssNucLu.upLPYlFh..hsRhAHYlKsl.R-pIG..Sa+-.Rp-hEp.LNpWItpYVsspps.ssp-s+u++PLpsAc.V...Vp-.l.cG.p.PGaYpsthhlRPHFQh-GhshsLpLVucLs.............................................. 0 64 150 236 +5772 PF05944 Phage_term_smal Phage small terminase subunit Moxon SJ anon Pfam-B_2645 (release 9.0) Family This family consists of several phage small terminase subunit proteins as well as some related bacterial sequences [1]. 21.00 21.00 21.50 25.00 20.40 18.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.50 0.71 -4.49 36 711 2009-09-11 11:40:06 2003-05-21 11:27:24 7 4 530 0 70 559 9 130.40 42 55.90 CHANGED hL.hpLspDpppLKslpShpcKsphKRc.lLPpYhsalpGsL....suGpGtQD-llspshlWtlDsGDhcsALclApYAlcas.LshP.-pFpR.ohsshlA-ElsshAt....pshpsupsh-sthhpp...........sh....clssst..DhPDpl ...............LhpLttDpccLKulpSpptKAthK+E.LL..P..catsWl-GsL............pu....Gt.u.t..Q.D..-.Vlh.plMlWtlDsGDhssAL-IucaAl+as.LsMP.tpapR.sssshlsEElus.sA.....ps.t.t.su...p...s.h...D....s....s....hLhp...................sh.....-lssst..DMPDp............................. 0 8 30 51 +5774 PF05946 TcpA Toxin-coregulated pilus subunit TcpA Moxon SJ anon Pfam-B_3639 (release 9.0) Family This family consists of toxin-coregulated pilus subunit (TcpA) proteins from Vibrio cholerae and related sequences. The major virulence factors of toxigenic Vibrio cholerae are cholera toxin (CT), which is encoded by a lysogenic bacteriophage (CTXPhi), and toxin-coregulated pilus (TCP), an essential colonisation factor which is also the receptor for CTXPhi. The genes for the biosynthesis of TCP are part of a larger genetic element known as the TCP pathogenicity island [1]. 25.00 25.00 66.60 65.80 19.30 18.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.70 0.71 -4.26 14 136 2012-10-03 10:38:27 2003-05-21 12:21:49 7 3 59 7 5 90 0 125.20 64 58.82 CHANGED uuLVsLGKlSsDEA+NPFoGsshsIhuh.cNuAu....NKuFAIpVsGLTQsQC+oLlTsVGDhFsYVsVpsuuusAhutLsDFtss.usAs.sGsGllKSlussupsLsLs-I.HlppLCp....ssusFuVshG .sGLVSLGKlSADEAKNPFTGTsMsIFSFPRNuAA.....NKAFAIoVsGLTQAQCKTLVTSVGDMFPaIsVKpuAh.sAlADLuDFETosAsAA.....TGsGVIKSIAPuSsNLNLTsITHVEpLCs....GTusFoVAFG........ 0 1 3 5 +5775 PF05947 DUF879 Bacterial protein of unknown function (DUF879) Moxon SJ anon Pfam-B_3751 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 29.80 29.70 23.10 21.80 hmmbuild -o /dev/null HMM SEED 602 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.70 0.70 -6.16 134 1615 2009-01-15 18:05:59 2003-05-21 12:25:26 7 4 942 0 303 1242 93 573.10 31 98.90 CHANGED lLcYYpcELsaLRchutEFAppaPclAscLuhpt...spssDPaVERLlEGFAFLsARlppKLD--aPchTcuLLphLaPpYLpPlPShollphpPs.s...tlspuhplsRGotlpopssp.............tst....................CcFRTsp-lsLhPlplspuph....psssts...................puslclpL.....pssss..shupLs...........lcpLplaLsG.spthutpLachLhppshshhlp.sssp..............hsL.ssss...lp.sGFss--uLL..Phst.psFpGaRLLpEYFshP-+FhFhclsGL.......t..htts.........pphclhlhl.....tppsst.httplsspphpLaCoPllNLFp...ppu-PlplstppsE.Yhlhs.cpppstshElaSlcpVpuhpps..............tppp...............ah..Paauhpc...ttttst..pst.Yaph..RRcsphhsp..p.............salollcts........tpshp.......sh.ctLolclh.CTNRcLPpp.Lsh......sphshshtpu.ssstslpslpsPotPhts....hss.............phtWRLlSpLuLNaLSLh..........pusps.L+plLpLYshp.......ssstspcpl-ulhslpscssschls.....sshsRGlclpLslDpssF..sssshaLFusVLc+FauhasolNoFspLplhspppsc.hhpWssphGppsll .............................cYYpcELsaLRch.ut-FAptaP+lAthLuht.....tps.sDP.VERLlEuFAFLouRlppKlD--aPEhTcuLlphLaPpYLpPhPSh...ullphpPchp.....phspshh.lsRsotltopsst.............tst........................CpFpospDlpLhPlp.lppsph....pssstt.....................puslplph................pstss...hshu.pLs.................LscLphaLsG..-phhsspLathLtpphhshhlp.sssp..................hsL...spth.......lp.hGFss..p-uLL....P.hst...s....s....apG....YpLLpE..YFsaP-+FhFhclsGl.............................sthhtth..tspthplhlhh.......p.p.sh.hthplstsphpLaCsPlINLF....+cu...-...slplstp.psE.Y.lhs.sp.p.p.s...t.caE.laSVspVhuhpps..............sspp....................a.PF.uhp+...psthst.....pst..Yaph...R..pcpphhspthp.......................phalullcts..........tssh.........stcslSlpl.h.CTNRcL.Ppp.lt.......sshshshpss..ssstshpslptPotPhhP....hcs.............phtW+LlSpLu...hNaLoLh........................tssps.L+phLt.lasht.............psphspp..pl-....ulhplcppsls+hhs.............s.....hhsRGlphpLsl-ppsF...stsshaLFGpVLp+FhuhYsulNoFsplslhstpptc.hhpWs.+hGpp.................................................. 0 46 112 205 +5777 PF05949 DUF881 Bacterial protein of unknown function (DUF881) Moxon SJ anon Pfam-B_4053 (release 9.0) Family This family consists of a series of hypothetical bacterial proteins. One of the family members Swiss:Q45543 from Bacillus subtilis is thought to be involved in cell division and sporulation [1]. 25.00 25.00 25.50 25.40 23.20 22.20 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.64 0.71 -4.75 103 1279 2012-10-02 00:20:33 2003-05-21 12:37:39 7 1 506 2 348 917 13 150.00 28 57.10 CHANGED ppclcphphhuGhssVpGPGlplTlsDsstt.............s.s.hhlpsp-.lhpllN-LhsuGAEAIuINs.........pRlsssotIcss..Gs..slhlss.phhss....P..asIpAIG-....sssL.psuls.sushhphhpth......ulplplpppc......plpl.suhsss....phpaAcss ................p..phpphthhuGhssVpGPGlplTlsDssts.................................t.s..hl.pspD..lhpllNpLhsuGAEAlsINs.....................pR.lsssotl+ss....Gs..slhlss.psh.u.s....P...YsIpAIG.c....spsL.psuls.....s..u..s...h.lpthcph......Gls.hp.lppp.c......plsl.suhsss..thpaAp.................. 0 139 265 314 +5778 PF05950 Orthopox_A36R Orthopoxvirus A36R protein Moxon SJ anon Pfam-B_4070 (release 9.0) Family This family consists of several Orthopoxvirus A36R proteins. The A36R protein is predicted to be a type Ib membrane protein [1]. 25.00 25.00 34.90 34.90 20.30 19.50 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.06 0.71 -4.30 3 57 2009-01-15 18:05:59 2003-05-21 12:45:40 6 1 22 0 0 43 0 129.00 92 68.68 CHANGED MMLVPLITVTVVAGTILVCYILYICRKKIRTVYNDNKIIMTKLKKIKSSNSSKSSKSTDSESDWEDHCSAMEQNNDVDNISRNEILDDDSFAGSLIWDNESNVMAPSTEHIYDSVAGSTLLINNDRNEQTIYQNTTVVIN-TETVEVLNEDTKQNPNY ..MhLVPLITVTVVAGTILVCYILYICRKKIRTVYNDNKIIMTKLKKIKSS..NSSK.SSKSTDsESDWEDHCSAMEQNNDVDNISRNEILDDDSFAGSLIWDNESNVhAP..STEHIYDSVAGSTLLINNDRNEQTIYQNTTVVINE.TETlEVLNEDTKQNPSY..... 0 0 0 0 +5779 PF05951 Peptidase_M15_2 DUF882; Bacterial protein of unknown function (DUF882) Moxon SJ anon Pfam-B_4115 (release 9.0) Family This family consists of a series of hypothetical bacterial proteins of unknown function. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -4.78 10 1137 2012-10-02 01:02:30 2003-05-21 12:54:22 8 4 1078 0 236 840 393 149.00 54 64.95 CHANGED puuucsRsL+LaplHTGEKsEhsYhcsG+YspcuLp+ls+lLRDaRRNEss+MDPRLFDLlaplhppoGocsh.IpVVSGYRSPATNuhLRoRS+GVAKKS.HMlG+AMDFaIPGVsLK+LR-sulchQsGGVGYYPpSGSsFVHhDVGsVRpW .....................................................hu.oscPRhLsLpNL.H.TGEsl.c.sc.Fa.c.G.c.s.Ylp-pLs+L..NHFhRDaR...s.N...cl.+sI.DPt.L...FDpLa..c....l...Q...s...hL...G..o..+...c...P.....lplI...SGYR..S.sTNsp.LR..u.+.....S...+..GVAK..+SYHh...+G..Q..AMDF..+I...............-..G......l..s........Lup....l.Rc..A......A..L......u...h....+..AG...G..V...G..Y......Y..PR..S....N...............FVHIDT.....G.PsRpW..................................... 0 56 120 171 +5780 PF05952 ComX Bacillus competence pheromone ComX Moxon SJ anon Pfam-B_4222 (release 9.0) Family Natural genetic competence in Bacillus subtilis is controlled by quorum-sensing (QS). The ComP- ComA two-component system detects the signalling molecule ComX, and this signal is transduced by a conserved phosphotransfer mechanism. ComX is synthesised as an inactive precursor and is then cleaved and modified by ComQ before export to the extracellular environment [1]. 19.80 19.80 19.80 20.50 19.70 19.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.60 0.72 -4.46 11 61 2009-09-11 08:33:53 2003-05-21 13:51:57 7 1 39 0 11 48 0 54.90 33 97.18 CHANGED M......Q-llsYLlcNPEVLcKltsG-AoLlGlsscpspsIlcuFpch.hhotts.h.W.s................t ......M...Q-llsYLlcNP-VlcclppGcASLlGlscc...phpuIlcuFpphphhp.hp...W...p............. 1 5 7 8 +5781 PF05953 Allatostatin Allatostatin Moxon SJ anon Pfam-B_4313 (release 9.0) Repeat This family consists of allatostatins, bombystatins, helicostatins, cydiastatins and schistostatin from several insect species. Allatostatins (ASTs) of the Tyr/Phe-Xaa-Phe-Gly Leu/Ile-NH2 family are a group of insect neuropeptides that inhibit juvenile hormone biosynthesis by the corpora allata [1]. 33.50 2.10 36.40 2.50 26.90 2.00 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.30 0.74 -5.77 0.74 -2.90 21 453 2012-10-01 21:03:17 2003-05-21 14:00:26 7 15 40 0 100 478 0 11.20 60 36.41 CHANGED KRssp..YuFGLG .........KRst.......YuFGLG. 0 52 66 88 +5782 PF05954 Phage_GPD Phage late control gene D protein (GPD) Moxon SJ anon Pfam-B_4333 (release 9.0) & Pfam-B_12199 (release 10.0) Family This family includes a number of phage late control gene D proteins and related bacterial sequences. This family also includes Bacteriophage Mu P proteins and related sequences. 23.50 23.50 23.70 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.12 0.70 -5.22 283 5310 2012-10-01 22:58:23 2003-05-21 14:04:18 6 63 1555 10 1086 5029 137 285.80 18 48.10 CHANGED pc.uls.p.hphplsh...h...................sssslthtt...hlGpslslpl..................................t.....hpGhl.................sph......t..............hus...cts.........Y...plslpshhthl........shp.p.cs+h.......a...ps.polscIlpplhs...............htLpt...t......hshhs.........assQap.EoDhsFlpRLhccpGlhah..hpp.tt...............p..pLllsDss.ss.....h............stsl.hhttsssss......tpslppap.htpphtsspspspsash...cpsptt.................h...t.............................................ss..hph..hpa.....................sst....................spup...phAch+h..-thpspspphpu..tussps.........l.tsGthhplsst............ts.hsp.......pallhplpaphp..ssh .................................................................................................tthp.p.hphplph..h...........t..........pt.sh.htt.......hhsp.hplpl..................................................................p...hpGhl.....................sth......p..........hsp................stt.......hh........a......plpl....ps..h..h..hhh.............shp...p..sp+h...................................a........ps....p...o....l.....pIlpplhp.............................p....htlpt....p................h..t.hc...............a...ss.Q...at...Eo..DhsFlp.RLh.....tc......t.........Glhhh..hcp..t......................p.pLlhscss..ps....................................sttl.shh.s...sttps....................shp.php...ht..t..p.......h..t.s.s...p...lt..hps.ash.......cpspht...........................................................h...t.......t..p...................tts..hth......hpa.........................tsp.............................tpup..thu.p..h.ph......-thppts..........t..phpu...........tu.st.t..............l..hsGthhplps...................psthsp.....pahlspspaphp.p..t................................................................................................................................................... 0 223 517 791 +5783 PF05955 Herpes_gp2 Eq_herpes_Gp2; Equine herpesvirus glycoprotein gp2 Moxon SJ anon Pfam-B_4360 (release 9.0) Family This family consists of a number of glycoprotein gp2 sequences from equine herpesviruses. 25.00 25.00 318.80 318.00 20.60 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.61 0.70 -5.02 5 19 2009-01-15 18:05:59 2003-05-21 14:10:55 6 1 9 0 0 20 0 226.70 69 33.06 CHANGED KNFMEASCTVET..NSGLAIFWKIGKASVDAFNRGTTHT....RLMRNGVPVYALVSTLRlPWlNVIPLTKITCAACPTNLluGDGsDLNSCToKSTTIPCPGQQRTHIFFSAKGDRAVCITSELVSsPTITWSVGSDRLRNDGFTQTWYGIQPG.VCGILRSEVRIsRssWRhGtso+DYLCElsASD..AKTSDYKVLPNAH...STSNFALVAATTLTVTILCLLCCLYCMLTRPRASV KNFhcAoCTVET..N.GLuhFWKIGNASVDAFsRGTTHT....RlMRNGVPVYALVSTL+lPWlNVIPLTcITCAACsoNhltGstsDLsSCTsKSTTIPCPGQQRTHIFFStKGDRAVCITSELsS.PTITWSVGSsRL+NsGFoQTWYtIQPG.VCGILRSEV+IpRsoWRhGuso+DYLCElosSD..uKTSDYKVLPNAa...STSNFALVAATTLTVTILCLLCCLYCMLTRPRASV....... 0 0 0 0 +5784 PF05956 APC_basic APC basic domain Yeats C anon Yeats C Motif This region of the APC family of proteins is known as the basic domain. It contains a high proportion of positively charged amino acids and interacts with microtubules [1]. 25.00 25.00 71.10 38.20 18.60 18.30 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.47 0.70 -5.32 4 93 2009-01-15 18:05:59 2003-05-21 15:32:18 6 35 37 0 46 80 0 308.10 51 13.43 CHANGED slhRGRTVIYsPustspsps.......soP.pKpuPPspspsusssKsPS.uQpRS+SLHRsuK.SphusLS.PsRSuTPPuRhuKoPSSSSSQTSssSpPh.p.pshsTpsuG...slPG.....PusuL..VP..pSPuRshhA.........pp+KTQ+SPVRIPFMQpss+..PsPLu.....pshsEPusRGhsthpussGA.uu+LtLlRMuSshSSGSE.SDRSG...FhRQhTFIKESPu.hLRR+RsEhSSh-StusospsASPpRupsthPAVFLCSSRCpEL+su..............PcQu.s.t...pQppPtuRss.u.h......ssRRToSESPSRLPl.Russu+.........sETsKRaASLP+ISlhRRssSusSlhouSu- ...............ul.RGRThIalPuspspopS.......ToP.pKpuPPh+s...s.u.....KsPS.uQptopS..RsuKsS.p.u-LS.ssRpso....ut.sKu.PSp..SuSpsSTPS+Ph.p.hspshQos...G.sph.PG.....Pssp.l.op...lP.ssS.PupAspp......ppsps+ho.pSPsR....Qs.sK...ssLu..so...shspsEsuS+G.hsthpsusGu.sp+lpLsRMoSs+SSGSE.SDRSt...hlRQ.TFIKEuPo.hLRR+hpE.uS..hE...S.husoops.sSPpRupsthPsl...Ssphs-hphu..............Pp.sss.....tptpPttRts.hu...........RopSESPSRLPl.Russh+..........EpsK+.uSLP+lSshRRssSuoSlhoAsu-............................................................... 0 2 7 18 +5785 PF05957 DUF883 Bacterial protein of unknown function (DUF883) Moxon SJ anon Pfam-B_4421 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 40.00 40.00 40.00 40.00 39.80 39.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.97 0.72 -3.59 59 2283 2012-10-02 00:15:32 2003-05-21 16:01:49 8 2 1022 0 347 911 46 93.30 38 87.31 CHANGED cplts-l.cpLhsshEcLLcpsustuspphppLRp+hpptLcps+splssstcs....stc+u+puscss-cY.......V+-pPWpulGluAu.lGhllGlLlu..RR ...........................t.clps-l.ppLs-oLE-VLcS.s....Gsp...ucpc....hpcl....Ru.+A....c....p.sL....ccs+tRls....psscs.......ltppu....+pAsspADcY...........V....+EcPWpu....lGlu.A.A.VGlllGlLLuRR...... 0 46 135 243 +5786 PF05958 tRNA_U5-meth_tr tRNA (Uracil-5-)-methyltransferase Moxon SJ anon Pfam-B_4661 (release 9.0) Family This family consists of (Uracil-5-)-methyltransferases EC:2.1.1.35 from bacteria, archaea and eukaryotes. A 5-methyluridine (m(5)U) residue at position 54 is a conserved feature of bacterial and eukaryotic tRNAs. The methylation of U54 is catalysed by the tRNA(m5U54)methyltransferase, which in Saccharomyces cerevisiae is encoded by the nonessential TRM2 gene. It is thought that tRNA modification enzymes might have a role in tRNA maturation not necessarily linked to their known catalytic activity [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.10 0.70 -5.76 9 7304 2012-10-10 17:06:42 2003-05-21 16:10:13 6 37 4224 7 1556 6444 1014 275.40 29 64.81 CHANGED sYstQLp-Khs+LpshhAPFpuP-.EVFcSPspHYRMRAEFRlWHE..s--haYhMF-QtsK....p+lRl-pFPhASphINcLMstLlssh+ssssL++KLFQV-FLoTLSG-hlloLLYH+pLD-pWcpsApsLps.....thslslIGRu+tpKIslcpDYVsEpLsVsGRpahYRQlEsuFTQPNusVNpKMLEWAh-sspsppG.DLLELYCGNGNFoLALAppFc+VLATEIuKsSVsAAQaNIstNslDNlpIlRhSAEEFTpAhsthRpFpRL+..GIDLcSYphsTIFVDPPRAGlDs-ThcLVptY-RILYISCNPETLppNLppLpcTH+loRhALFDQFPYTHHMEsGVLLp++ ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..............p.................s.............s.........p.........u.FhQ...s..N...s........t.........s....p.......p.....h....h....p....h....A....h....c......h.........h.............p.......h...........p...............t..........p.......c................l.....l......DhaCGh...Gs..h..u....L.....l.................A..................p.....p....s............p..................p........V..h...G..l.......El...s.p..u..ltsA..p..t..NA.......t.h.N...s...l...s....N...s....p.....a..h...t...h.t..u....p.p..h.h.......t.h.......t...............................................................t.....-.s..l...l..l.DPPR.sG..........h.s...p.......p.......h...h.....c.......h........l..........t..............p........h......p..........+......I...l.YlSCNP..s.T.L.A......R...D.................l..........p...............h.....L..............s..............p..........s..............Y............c............l........p........c...l......t.....................h........DhFPpTsHlE..s.lslLp+.h........................................................................... 0 504 946 1296 +5787 PF05959 DUF884 Nucleopolyhedrovirus protein of unknown function (DUF884) Moxon SJ anon Pfam-B_4679 (release 9.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of unknown function. 25.00 25.00 134.90 134.60 18.80 18.60 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.19 0.71 -4.98 27 58 2009-09-11 07:37:26 2003-05-21 16:12:33 6 1 54 0 0 55 0 184.70 33 92.00 CHANGED MslpLYpspsp...-s.loFphspshNSlhhahFphtss.ssssp............TRLVSGYEps+....sIshphsssss..............csuYllSChRsPplhhcLh..pp.astslshsVV+..spsp.............splWHVluV+KspEstph.p+lpulpVps...u.-ph..asKpLltlsGNlPusFlpuLsp.shsstp-lcslplhtPplpl .MslpLYpspsp...-s.IoFphssshNSlslahFphtsssssssp..............TRLVSGYEsu+....sIshphsssss..............csuYllSClRhPhlhpcLh..pp.aopPluhllV+..spsp.............splWHVluVRKspEhtss.p+lpulhssp..sG.-ph..asK-LlhlsGNlPusFlsuLp+.shsshp-lcslplhhPplpl....... 0 0 0 0 +5788 PF05960 DUF885 Bacterial protein of unknown function (DUF885) Moxon SJ anon Pfam-B_4405 (release 9.0) Family This family consists of several hypothetical bacterial proteins several of which are putative membrane proteins. 22.10 22.10 22.10 22.30 21.80 22.00 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.00 0.70 -5.35 150 1292 2009-01-15 18:05:59 2003-05-21 16:37:19 6 10 669 6 493 1292 1570 510.40 24 89.45 CHANGED sP.hs.ohhGht.tt...........tphs....D..h.Sspshppphpthpphltp..........................L.pslchs....sLsspsplshplhph..hppthtthpap...............t.hslspht..........uhhstl.shh.st.p.hp.........shp-..............................................hcsalsRLpshsphhsp.hhpphcts..htpGhh.P...chhhctslsph.........................................pshls................tshp.......ps.hhsshtph.t............thstpppsplttpsppslppplhPAhpph....hsahp..sphhsss...........pstGhhphP..........................sGcthYpht...lcthTT.o.chos--lHplGlpEVsRlpuEMctl.hpph.....Gh...p........s.....................sltchhphLps-....pah..p.........................s...t..ppllsphpthhpp............hp.stls.chF...t..plPc.sshtVchlss.......hhpsssss.uhYts.ss....Dss...pPGhaalNh.hs..........hpphspasht....sLshHEuhPGHHhQluhspEh.ts.lPthRph..s.....hh.o.AasEGWALYuEp.Lu.cEh.GhY.p.....cP......................................................hsc.................hGpLp.hchaRAsRLVVDTGlH..sc...............tWoR-p..............................................................................................................Alsahtcso.shsp..................................................................................................ssspsEl...-RYhs.h..PGQAhuYKlGtlcIhcL.RpcAc...ppL...Gcc...FD.......l+pFH-hlLppGulPLslL-ppl...ppal .......................................................................................................................................................................G.tt.ts...........phs-..h.Sstshptphthhpphltp..........................L..pths.s........tL.s.s..p.pp..lshp.lhp....t..ph..p...thp.ttpht..............................hslstht.............uhhtt..l....th..h..s.....hshp..............s.pc.........................................................................................................................hp.shhtRLps.lsthhsp..hhtp..hcts....httGhh.s......phhhpth.ltph...........................................................................................pshhs..............t..............tshhh.sshtph.s.............hststp..splptph.tpslttthhsAapph................hsalpsphhsts...........pp.uh..ph.s..........................sGcphYphh...l+thss.s.sh.ss.........c-la.p.hGhpEltRlps-hppl.tcpl........sh......t...........s...................................shtphhphLcs...-....th........................................................tt......ptlhp.hp..t.hhcc................hp.stls..phF......plPc..t.h.lc.l.s..........hh.t..sstss..uh...Yhs...ss.....Dts.....cPG...hhahss...s..............hpphs.pa..p....h...........shshHEuhPGHHhQhuhstph..ts.lstaR+h..s............hh.o.uasEGWALYu.....Et.Lh.p-h..Ghh.p......DP.............................................................................hpp........................hGhLs.hphhR..AsRlVlDhGlHhp.......................................tWoppp..............................................................................................................Ahsahtpps.shsc.....ss....spsEl.....cRYhs...h....PGQAhuYplGphplhcL...RcpAc...pph...Gsp....FD...............l+tFHstlLp.GulPlslLcptlt..................................................................................... 0 191 354 434 +5789 PF05961 Chordopox_A13L Chordopoxvirus A13L protein Moxon SJ anon Pfam-B_5061 (release 9.0) Family This family consists of A13L proteins from the Chordopoxviruses. A13L or p8 is one of the three most abundant membrane proteins of the intracellular mature Vaccinia virus [1]. 24.10 24.10 29.60 35.80 24.00 24.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.37 0.72 -4.04 8 55 2009-01-15 18:05:59 2003-05-22 10:44:54 6 1 35 0 0 40 0 68.40 66 99.31 CHANGED MIuDllLlIICVsIIGLIVYGIYNKKoosppspPus...EcY.KhEslKTuYVD+LKsuHLsSFYKLF.Sup MIGILLLIGICVAVTVAILYuhYNKIKNsQNPsPus...lNsPPPEs+N.TKFVNNLEKDHISSLYNLV.SS.u 0 0 0 0 +5790 PF05962 HutD DUF886; HutD Moxon SJ, Rainey P anon Pfam-B_5160 (release 9.0) Family HutD from Pseudomonas fluorescens SBW25 is a component of the histidine uptake and utilisation operon. HutD is operonic with the well characterised repressor protein HutC. Genetic analysis using transcriptional fusions (lacZ) and deletion mutants shows that hutD is necessary to maintain fitness in environments replete with histidine. Evidence outlined by Zhang & Rainey (2007) suggests that HutD functions as a governor that sets an upper bound on the level of hut operon transcription [1]. The mechanistic basis is unknown, but in silico molecular docking studies based on the crystal structure of PA5104 (HutD from Pseudomonas aeruginosa) show that urocanate (the first breakdown product of histidine) docks with the active site of HutD. 25.00 25.00 30.70 30.60 23.20 23.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.27 0.71 -4.56 106 985 2012-10-10 13:59:34 2003-05-22 10:47:25 6 6 936 6 178 684 43 176.70 33 90.44 CHANGED lchsch.ssPWKNGGGtTcEIsh.aP.........sGu......shcsFs...WRlSlAslupsGs..FShFsGl-RhlslLpGsGhpLphs...........ut.p....pth..........htPhp.shuFuG-sslsupLhsGs.spDFNlMsRcsthpuplphh....ss..............shph........susssl......lashsG.s....hp.......ls.s...........sphtLpst-sl................hhcs......................ptsl...lpsp....ut.lhhlpl ..............................chpchsss.W+NuuGpT+EIss..a..P..............................sut......c-Ft.....WRhSlAolussGs..FShFPGh-RhlolL-G.sthhL.pss.............................sp...s....cs................LpPhp.PauFuuDtslpucLhsGt.shDFNlMoRhsh.tpuplc..hh......pt.............................................shph...........supssl..........laslsG..s.......hp..lss............................phhh.sspssh....hhcs............pppl......hpst....uthhh............................................................................... 0 33 72 125 +5791 PF05963 Cytomega_US3 Cytomegalo_US3; Cytomegalovirus US3 protein Moxon SJ anon Pfam-B_7187 (release 9.0) Family US3 of human cytomegalovirus is an endoplasmic reticulum resident transmembrane glycoprotein that binds to major histocompatibility complex class I molecules and prevents their departure. The endoplasmic reticulum retention signal of the US3 protein is contained in the luminal domain of the protein [1]. 25.80 25.80 26.10 39.90 20.10 25.70 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.37 0.71 -4.95 3 63 2009-01-15 18:05:59 2003-05-22 10:51:40 6 1 10 4 0 61 0 179.40 54 97.57 CHANGED MKLVhlLAuLAsLl.LshuDusPRPlssIsS....clTuh.sHFpVEENcC+lHMGplYFRG+lSGNFTc+HF...VsaGIVSpSYhDNLpVouEQaca.cAGsYFEWNVpGtsVsasVDpVDVpLSosWGDPKKWAoCVPQVRsDYoS.shhWYhQhuMhccoWuhhhhsVhhYsLShhhLshhhVhholp.......MRFF ............................MKPVLlLAILAVLh.LRLADSVPRPL-VVVS....EI+S..AHFRVEENQCWFHMGMLaaKGRMSGNFTc+HF...VsVGIVSQSYMDRLQVSGEQYHHDERGAYFEWNIGGaPVsHTVDMVDITLSTRWGDPKKYAACVPQVRhDYoSpoI.WYLQRuMRccsWsLlh..RTllsYhhulslLVLlsVtVSs+.......hRF............... 0 0 0 0 +5792 PF05964 FYRN F/Y-rich N-terminus Yeats C anon Pfam-B_1170 (release 8.0) Family This region is normally found in the trithorax/ALL1 family proteins. It is similar to SMART:SM00541. 21.00 21.00 21.00 21.50 20.80 20.90 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.57 0.72 -4.51 45 555 2009-01-15 18:05:59 2003-05-22 10:52:30 9 76 143 1 361 515 8 52.30 35 2.49 CHANGED huoLtlhsLGpllssp..sFHspphIaPlGYpusRlYhS....hpcspc+shYpCcIh- ...................GulhlpslGpllspp......t..acspptlaPlGYp..soRlYaS....hpcsp.++ChYpCpIh.t................ 0 123 177 264 +5793 PF05965 FYRC F/Y rich C-terminus Yeats C anon Pfam-B_1170 (release 8.0) Family This region is normally found in the trithorax/ALL1 family proteins. It is similar to SMART:SM00542. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.72 0.72 -4.17 48 561 2009-01-15 18:05:59 2003-05-22 10:57:39 9 65 144 1 363 525 9 85.00 28 4.25 CHANGED sspPhFcls.s--......s...hshpusSssssWppllcplpp..h+pppt...........tslsG.-hFGLspsslhpllEsLPsscpCppY...phchtc .................................................t..tPhFcIp..s--.........s....hshpusSspssWp.pllctlpp.....hRppsp......................hth...tt...lsGtchFGlspsuVhpllEpLPGscpCppY.....pFch......................... 0 122 175 266 +5794 PF05966 Chordopox_A33R Chordopoxvirus A33R protein Moxon SJ anon Pfam-B_4799 (release 9.0) Family This family consists of several Chordopoxvirus A33R proteins. A33R plays a role in promoting Ab-resistant cell-to-cell spread of virus [1] and interacts with A36R to incorporate the protein into the outer membrane of intracellular enveloped virions (IEV) [2]. 22.50 22.50 22.50 22.80 22.40 22.40 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.34 0.71 -5.52 6 66 2012-10-02 16:37:33 2003-05-22 11:01:42 7 1 42 2 1 66 0 170.40 47 96.24 CHANGED hsspps.Dh.c.s-ptsuFhGSTIYGsKL+.KKphtKKs+sluIsLRIullsSllSLhsIsshLAlQhspCcssp-t.psspulsshs.hosptphstSth.....cpCpGIha-GtCYphssEspo.Fs-AspsCtscuusLPu.ssLhpc.........WlhDYL-GTWG--Gtsl........sKpps.-lpssDlSsEhRsYFCV+Shs ................................................................hhss..........-sc-ppTuhhuuTlYGcKlp.u.Kp...K++RsIulCIRI...ShVISLLSh.ITh.oA.hL.hl.c.L...NpChSss.Eushoctulsssu...u..o.spc.csuuSsop.......................cSC...s.G...LaY.pG.u.CYIhHS-hph.FsDApAsCsscuSoL..Ps..oDllsT.........WLhDYlE-TWGsDGNsI........sKoos...-h...p-uDlSpEsRKYFCVKoh...................... 0 1 1 1 +5796 PF05968 Bacillus_PapR Bacillus PapR protein Moxon SJ anon Pfam-B_4892 (release 9.0) Family This family consists of the Bacillus species specific PapR protein. The papR gene belongs to the PlcR regulon and is located 70 bp downstream from plcR. It encodes a 48-amino-acid peptide. Disruption of the papR gene abolishes expression of the PlcR regulon, resulting in a large decrease in haemolysis and virulence in insect larvae. A processed form of PapR activates the PlcR regulon by allowing PlcR to bind to its DNA target. This activating mechanism is strain specific [1]. 21.10 21.10 21.20 22.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.20 0.72 -3.90 3 118 2009-09-10 20:50:31 2003-05-22 11:30:16 6 2 101 0 3 32 1 47.80 83 93.33 CHANGED MKKLLIGSLLTLAMsWGISLGDTALEKsQlISHssQEVQLASDlPFEF ....MKKLLIGSLLTLAMAWGISLGDTALEKSQlISHNDQEVQLAuDlPFEY. 0 0 1 1 +5797 PF05969 PSII_Ycf12 DUF888; Photosystem II complex subunit Ycf12 Moxon SJ anon Pfam-B_4945 (release 9.0) Family Ycf12 has been identified as a core subunit in the photosystem II (PSII) complex [1-2]. PsbZ has been shown to be required for the association of PsbK and Ycf12 with PSII [2]. 19.60 19.60 19.70 19.70 19.40 19.40 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.97 0.72 -7.29 0.72 -4.32 40 232 2009-01-15 18:05:59 2003-05-22 11:34:02 6 2 219 13 31 179 2 32.40 54 71.40 CHANGED M..NhElIsQLssLsLIllAGPlVIsLLuhRpGN.L ......M..NlElluQLssLuLIlluGPlVIsLLAhR+GNL...... 1 11 26 31 +5798 PF05970 PIF1 DUF889; PIF1-like helicase Moxon SJ anon Pfam-B_4988 (release 9.0) Family This family includes homologues of the PIF1 helicase, which inhibits telomerase activity and is cell cycle regulated [1][2]. This family includes a large number of largely uncharacterised plant proteins. This family includes a P-loop motif that is involved in nucleotide binding. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.09 0.70 -5.51 13 2557 2012-10-05 12:31:09 2003-05-22 11:41:20 9 118 754 0 1533 3113 626 193.20 19 32.75 CHANGED pLssEQ+pla-pIlpulhs.spGthFFVsGaGGTGKTaLWpslhstlRS..cscIVLsVASSGlAuLLL.GGRTAHSRFtIPlshsEtSpC..sI+puoclA-LlpcsuLIlWDEAPMssRpCFEALDRoLRDIlppps....sKPFGGKsVVlGGDFRQILPVlp+GuRspIlsAslssShLWpc.s+.lLpLT+NMRLtssthstpctcclpcFupWlLslGsGclssssstt.......shIplPpDLLlppss.cslcsllsclYsc..llpshss.sahppRAILsPsN-sVsclNsallspLsG-E+pYLSsDols.ssps..c.-hlYPsEFLNSlsssulPsHhL+LKlGsPVMLLRNLs.ohGLCNGTRLhlTpL ..........................................................................................................................................................................................h..h..h........G......uGsGKo....hhhp..............h...............t...........h..........................................h..h.hh....us.o....u.....h.u..u...........h..............t..........G.........................T.h..H....p..h......h.....t......h.................s...........................t......................................................................................................................................................................t.............h............t.....h.p.....hllhDE.....hsMh.....t.....t................h..t....h......l.......p.............h...p.h..................................t...F..G..G..h.......ll..h..GDhhQl........P.............l.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 583 1068 1392 +5799 PF05971 Methyltransf_10 DUF890; Protein of unknown function (DUF890) Moxon SJ anon Pfam-B_5064 (release 9.0) Family This family consists of several conserved hypothetical proteins from both eukaryotes and prokaryotes. The function of this family is unknown. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.91 0.70 -5.27 4 1276 2012-10-10 17:06:42 2003-05-22 12:58:10 7 11 1113 3 405 2121 363 263.00 45 81.71 CHANGED tA.psuLHPRNRH+u+Y.DFstLhpssPELcpaVhlNPtGc.SlsFADPhAVKALN+ALLtpaYuVs.WDIP-GaLCPPlPGRADYIHalADLLucusss.hss..thRuLDIGsGANCIYPLlGspEYGWpFlGo-lDs.ulspAptIlpuNspLsstIclR+Q.ppptIFsGlIttsE.pYDhThCNPPFHAShs-AptGopRKhpNLtp.s..........s.s.s.LNFGGQtsELaCEGGEssFlt+MhcESptFApQVhWFooLlSKusNLssLpcpLcplGAsclphhEMAQGQK.SRFlAWoFhs .............................................................................p...+ssLHPRN+H.ps.cY..Dh.stLsps.P-.L.ppalh.......h....s......s....t..G.........c.S.l........DFusP..AVKtLN+AL.LtcaYuls................WD..IP...t...saLCP.PlP...G..........RADY..IHa.l.....A.....D......L.....L.........u........p.......s.........s......s.......s...lst......................psp.hLDI..G..sGA.NC..I....YP.L.l..........G.s..p.....c..Y........G........W..p..F.s...G.o...................-lsst........ulssAp....tI.l..p..sN...ss......L..sp.t......I..c..l..R....p........Q........p.....p....s...t.........t.................I...F..........s....G.l....l...p...p.....s.....E.......pa...........D...hThC.NPPFH..sS...t...t...t...A.....p...t......G...o...p...R......Khp..s..Lth................................................t......LNF....G.Gpp...p...E.L.W....C-..GGElsFlp+......M.I..pES..t..t........a..t...c..........p...........V..hWFToLlS..+..tp..NL...sslhctL...pc..........h......G......s..s....p.....l....hhhE..M..uQ...G.......Q.K.t.S.RhlAWoFh............................................................................................ 0 123 207 320 +5800 PF05972 APC_15aa APC 15 residue motif Yeats C anon Yeats C Motif This motif, known as the 15 aa repeat, is found in the APC protein family. They are involved in binding beta-catenin [1] along with the Pfam:PF05923 repeats. Many human cancer mutations map to the region around these motifs, and may be involved in disrupting their binding of beta-catenin. 19.40 19.40 20.10 19.50 18.90 19.00 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.92 0.73 -6.36 0.73 -3.62 9 272 2009-01-15 18:05:59 2003-05-22 13:01:02 6 30 58 2 142 273 0 16.00 53 1.96 CHANGED c.-QPhDYShKYuEcp .-.DpPhsYSlKYu--p...... 0 13 22 62 +5801 PF05973 Gp49 DUF891; Phage derived protein Gp49-like (DUF891) Moxon SJ, Bateman A anon Pfam-B_5075 (release 9.0) & Pfam-B_6067 (release 14.0) Family This family consists of hypothetical bacterial proteins of unknown function as well as phage Gp49 proteins. 23.90 23.90 23.90 24.00 23.80 23.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.05 0.72 -3.97 226 2071 2012-10-03 00:18:00 2003-05-22 13:01:19 9 4 1184 0 551 1798 191 86.10 24 77.55 CHANGED calpsL....cp.........ht...sclhtplppl.ptG........hhsshp.....pGlhElRl..chss..saRlhashctsp....hl.llLpuhsKpo...........pp-IchAcphhc ......................................hpth...spp.........ht...tplhhtlphltptG.........shspphp........pslhElRh....phss...hhR.lh.as.hchsp..........hl..llLpuh.pKpo.......p....p.h.pp-IchAcphh...................... 0 143 347 476 +5802 PF05974 DUF892 Domain of unknown function (DUF892) Moxon SJ anon Pfam-B_5115 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. 23.70 23.70 23.70 23.80 23.60 23.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.80 0.71 -4.42 111 1538 2012-10-01 21:25:29 2003-05-22 13:04:39 7 2 728 12 337 830 49 154.30 41 91.00 CHANGED slc-lalctL+DlYsAEcQhhcsLschuctAps.scL+suhcpHlpETcsQlcRL-plhcth.GtpspuhpCcuhpGlltEupp.lhcp.hpcps.lpDAsllsuuQthEHYEIAuYGoLhshAcpLGh........s.-ssplLppsLpEEpssDctLsplA.p.....shssttus ...................................p-hahchLpDsauhEKQhpphLtphA...........pths.....N....sc....L.p....tt....hcpHLpET+sQIppl-pll-pp.s.lphp......shK..suMpulhtpupp...lh.t...s..pcp..V+suh...uu.thE+aEIA.s......YsoLhshAcplG.........s.cuhtlLcphLpEEKthsphLpphh.......psspphh....................................................... 0 95 191 252 +5803 PF05975 EcsB Bacterial ABC transporter protein EcsB Moxon SJ anon Pfam-B_4764 (release 9.0) Family This family consists of several bacterial ABC transporter proteins which are homologous to the EcsB protein of Bacillus subtilis. EcsB is thought to encode a hydrophobic protein with six membrane-spanning helices in a pattern found in other hydrophobic components of ABC transporters [1]. 25.90 25.90 26.10 30.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.26 0.70 -6.00 52 1578 2009-01-15 18:05:59 2003-05-22 13:16:12 7 2 1026 0 136 965 1 282.60 25 96.02 CHANGED hppLapcRhpphhpcph+Yl+alhNsHhllhLlh.llGhhuhhYsphLpph..sssh..hh...hllsllhhhllh.hGpltThlccADplFLLshEpc.hpsYlppuhhaShlh.hhh.sllhllhhPlhh..thshshhthlhhhlhhlhhKhh.hhhphphhhhp.............phh.hhhhhhlshhhhhhhh..........hhhhhhhhhlhhhhhhhh.th.ptphhsWcthIphEpc+ttphh+hhshFT-V.plppps+..RRpaL.Dhll...+hlttpp.ppsahaLahRuFlRus-ahulhlRLshluhlhlhh........lsp...alshllshlhlYllshQLhslappactphhhpLYPlsppp+hpuhppllphllhl.sllhslhhhl.thphhhsllhlhssllh ..........ttla.pRhtthhpp..hY.phhhNshhhhhhlh.hhuhhhh.Ysphlpph......s.ph....hh.....h.h.hs.lh.hhhh...ht.ltohhctsDhlFLLshEpp.hp.ahptthhhohhh.hhh.hlh.hhlhhPlhh.....ths.h..hhhhhl.....h.hhhhhhh..hhhph..hh....................................................................................................th...hs.Wphhlt.Epp+h..hhphhshF...T...s...V.t.lp.p.p.st....+RtaL..chll...phhth.....ttha..LahRsalRss-hhslhhRLhhluhlhhhh..........l........hlshhlshlh.YllhhQhhshapt..ht.ht..hh.plaPlstt.t.p.tsh.p.hl.hhh.h.hhl.h...lhhh.hhh..t.hhhhhhhhhh.hh........................................................... 0 25 62 96 +5805 PF05977 MFS_3 DUF894; Transmembrane secretion effector Moxon SJ, Eberhardt R anon Pfam-B_4880 (release 9.0) Family This is a family of transport proteins. Members of this family include a protein responsible for the secretion of the ferric chelator, enterobactin [1], and a protein involved in antibiotic resistance [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 524 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.56 0.70 -6.39 10 4536 2012-10-03 03:33:39 2003-05-22 13:26:29 8 16 2237 0 1344 14705 3116 396.30 20 94.58 CHANGED sSuaAPLRpssFRslWlAolluNlGshhQsVAuuWLMTolSsSPlMVALVQAAuTLPlFLLulsAGAlADsaDRR+lMlsuQslhAsuSshLslLutlGhloPWhLLuhTFLhGsGuALssPAWQAoVs-lVsRcDVPAAVuLNSlGaNlsRSlGPAlGGlllAuaGsAssFAlsslSYhslIsslhpWKhcsssssLPpEsltsAl+AGLRaothSssl+sVLsRuslFGLuASAlhALLPLVARDpLuGsAhsYGlLLGuhGsGAlsGAlhss+LRcRlsu-pLlthAsluhAhssLsLALosshWlsslsLhluGAuWlsALoslssuVQhusPcWVhGRsLSlYhslhhGGhAAGSalWGsVAEshulssALlsuAsuLLluuhluhhhtLPptcs.DhsPtsc..aspPhVAhsLcPRsGPlllpIEYpIsc-ctssFLcsMtEhR+lRpRsGAcsWsLtRDLpsPppWlEpFhssoWh-aLRppcRlTpADttVpp+lpAhHsGspPPplp+hl...cRPs ...........................................................................................................h.................s....a.t.h....h...h...h...u..p...h...l.o......h....u..........h........h.......h.......t...l........A....h.....s....h........h.......l........h........p.......h.......o........t.......o.......s.......h..h.....l................u....l.....l..s...h.....h...p....h....l.........P...h.....h.....l....h.......u.....h.......h.....u.......G.s.....l.......A.....D............R....a.s.....+..+....+....l........h.....l.....h...s.....p.......h....h....t..s.....l.......s....s....h....h......L.......s........l.......h........s.................h.......h.......s.....h.......h....s.......l..........h.......h.......l....h....h....h....s....h....l....h....G....h....h...s....u....h....t......s....P...u....h...p....u...h....l...P...p..l...V..s....c......c......p..............L...s......p....A....s....u...l......s...............s...h......s...........h...............p.............h....u.........p...l.....l...G..........P....s.......l..u.........G.......l..........l...........l..............u.............h.................h..............G............s.........s.............h..................s..........a...............h..........l.......s.........u........h............s........a......h....h.......s......h.....h......s.........l..........h.....................t...........l...........t............h...............t..............h.............................................t........s..............h..............s..............p..........p................p................h.......h............t........s..................l...........h.............t.......G.............h......c.............a.....l.....h......p.....p..............h.....l........h........h......h..........h........l.....h.......s.....h....l....h.....s........h.........h..........s.......s.....s......h.....h......s........l.......h............P....h...........h...u......p.........p......h.......h........t..............h.........u........s....t..........t..............h..........G.......h........l........h...u....s....h.....u....l.....G..u..l..l..G...u...l.....h...h....s......t......h........t........p.....p.......h...t...........t.......t......h.......h........h.......h.........u.........s........h.........s.......h.......u.........l........s.....h.......h....h.........h..............u.....................l............s.................s..........s.............h..............h.........l...........s.........h...........h.......s.......l........h.....l....h..........G.......h.......s...h......h.......h......s....h......s....s.........h........p.............s.......h.....l.............Q....t.....t.....s...P..s....p......h......h....G....R......l.......u......l....h...t....h....h....h..h.....u.......u.......s........l....G...s....h...h...h....G...h....l....u....p....h....h.......u.....h...h........h....s.....h.....h.....h.....u.....u....h...s...h..l..h...s..s....h..h..h..h...................................................................................................................................................................................................hhthh..................................................................................................................................................................................................... 0 468 932 1185 +5806 PF05978 UNC-93 DUF895; UNC-93_Ce; Ion channel regulatory protein UNC-93 Moxon SJ, Pollington J anon Pfam-B_4965 (release 9.0) Family This family of proteins is a component of a multi-subunit protein complex which is involved in the coordination of muscle contraction. UNC-93 is most likely an ion channel regulatory protein [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.74 0.71 -4.73 11 630 2012-10-03 03:33:39 2003-05-22 13:29:58 11 10 174 0 479 1149 24 133.40 22 29.75 CHANGED LGhuahhlFoAasopuFItEuVIcSlp-ps.ss.IssaAGYYuhAllYhsFohusLhsPslVshlosKWuhlluuhsashF.hGFLahNpaahYhoSAllGhGuuhlWsGpGsYLoppso+cThE+Noul.WAlhpsSLlhGGlhlhhh.aphpsstuh .........................................................................................................t.t...G..hhu..hullY..ss.h....s.lus.h.h....s.P....s.l.l....ph.l..G....s.K...h....ol..hl....u....s.h.s....Y..s..h.....a............h.s.....u........h.....a.....s......p.......h............a..............h....h....h....s.u.u.sl.l..G.h...uuu...............h.lW.su....p.u....s...a..........l.......o......p...........h.....s......p..........t...........t.....p..........h........t........p..........................sh.......h.h..............................................tt.................................................................. 1 173 248 405 +5807 PF05979 DUF896 Bacterial protein of unknown function (DUF896) Moxon SJ, Bateman A anon Pfam-B_5209 (release 9.0) Domain In B. subtilis, one small SOS response operon under the control of LexA, the yneA operon, is comprised of three genes: yneA, yneB, and ynzC [1]. This family consists of several short, hypothetical bacterial proteins of unknown function. These proteins are mainly found in gram-positive firmicutes. Structures show that the N-terminus is composed of two alpha helices forming a helix-loop-helix motif. The structure of ynzC from B. subtilis forms a trimeric complex [2]. Structural modelling suggests this domain may bind nucleic acids [3]. This family is also known as UPF0291. 21.50 21.50 21.50 22.70 21.40 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.07 0.72 -4.38 39 1638 2009-01-15 18:05:59 2003-05-22 13:33:07 7 4 1273 5 166 633 4 62.90 48 80.57 CHANGED cchlcRINELA+KpK.spGLTscEptEQppLRpcYLcsFRsuh+splcslpVlDtp.GpDVTP-KlK ........chlcRINELA+KcK.s.pGLTt-EptEQppLRc-YlcsaRpsh+ppl-slK.l..l..D.c.GpDVTP-Kl+............ 1 59 108 136 +5808 PF05980 Toxin_7 toxin_7; Toxin 7 Moxon SJ anon Pfam-B_5254 (release 9.0) Family This family consists of several short spider neurotoxin proteins including many from the Funnel-web spider. 27.00 27.00 27.20 29.50 26.80 25.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.16 0.72 -3.91 30 91 2012-10-01 22:06:18 2003-05-22 13:43:09 7 2 7 4 0 96 0 33.90 49 50.57 CHANGED GCLs+N+aCsshoGP+CCSGLpCKplSIhcohCl .uCLhhsQaCNALus.+CCssapCKhls.h-uhCl. 0 0 0 0 +5809 PF05981 CreA CreA protein Moxon SJ anon Pfam-B_5258 (release 9.0) Family This family consists of several bacterial CreA proteins, the function of which is unknown. 25.00 25.00 52.10 51.90 23.20 19.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.69 0.71 -4.61 61 1188 2009-01-15 18:05:59 2003-05-22 13:50:15 7 3 1169 0 227 627 37 130.60 57 80.30 CHANGED lGpVussa.hlGsD..Ill-AhcDPcVpGVTCalSpscsGslcthh..shhEDsS-uSIACRQsG.PIphs.....cp.c....pGEpVFpcppSllFKsL+VsRhaDtcpsoLlYLsYSc+ll-GSsKpSlSsVPlhssps .................IGsVsTVF+hhGsDc+IVVEAFDDPcVcsVTCYlSRAKTGGIKGsL....GLAEDsSDAuISCpQV...G..P.Ipls.....D+lK.....pGEVVF++RoSL.lF...KoLQVVRhYDsKRNsLsYLuYSDKl......l-......GSPKNulSAVPlhshtt.................... 0 42 100 159 +5810 PF05982 DUF897 Domain of unknown function (DUF897) Finn RD anon Pfam-B_8040 (release 9.0) Family Family of bacterial proteins with unknown function 21.40 21.40 23.80 37.70 21.30 21.30 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.99 0.70 -5.64 7 322 2012-10-02 17:06:44 2003-05-22 13:58:16 7 2 288 0 114 310 521 322.00 38 95.31 CHANGED PslhFFhhGhlhuhh+S-LtlPtslspsLohhLLhsIGh+GGhtlpp...pslst.hlhshssulhLuhLlshhshhlLp+hsplcphDAhAhAuhYGSlSusTaAsAlohLpc.Ghsa-uahsAhlslMEhPAllsulhl...........................................................slhttp.s....sttshspsp.......t........tpllcEuhhssulslLlGGlhIGlloG.puh.hlp...sFh.slFpGlLslFLL.MGhpAuc+lt-Lpphu.hhllaullsPllhuhlulhlG.......hhs..GhssGshllhAVLuASASYIAuPAshRhulPcANPolYlusSLulTFPhNlhlGIPLa.hthAplhh.t ..........PhlLFFhhGhlusll+S-LclPtslhchLolYLLlAIGh+GGhpLsp...sslss.llh.hlsulhLuhllPlluahlLpthsphspsDuAulAuaYGSlSAsTFs...suluhLp...s....u..lsa-uahsshlAlMEhPAIlluLhL..............................................................hph..t.tp..s............t..t.....................h........................htpll+EshhssullLLlGGllIGhl...s...G.....p...Ghp.lp......sFhsshFpGlLslFLL-MGhsAuc+Lpc.L.p.p.s.G.h.hlsFullhPlltuhlGlhlu......hhh...shusGssllhulLsASASYIAsPAAhRhAlPEAsPolhlsuSLGlTFPaNlhlGIPLY.htl.uph...h........... 0 27 78 100 +5811 PF05983 Med7 MED7; MED7 protein Moxon SJ anon Pfam-B_5278 (release 9.0) Family This family consists of several eukaryotic proteins which are homologues of the yeast MED7 protein. Activation of gene transcription in metazoans is a multi-step process that is triggered by factors that recognise transcriptional enhancer sites in DNA. These factors work with co-activators such as MED7 to direct transcriptional initiation by the RNA polymerase II apparatus [1]. 25.00 25.00 25.10 25.00 24.90 24.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.84 0.71 -4.46 33 334 2009-01-15 18:05:59 2003-05-22 14:01:54 6 7 289 7 234 313 1 170.70 35 71.90 CHANGED tsouhaP.PPP.ahchasppp............................................-lphhhPPs.....P..ppssYpsFGp.aphc-.............tlPs.L-spGlppLYsps...........................................ts+tpEL+KLs+SLLlsFLE.......................Ll.....slluhsP....pphcp..Klc-lphlhlNhHHLLNc.YRPHQuREoLIhlhcpQlcc++pplcplcptt-clcphlp ................................................................................suh.P.PPs.ah.+.aTscN.........................................................................ph.t.hh..PP............sP...hpssYphFGspapsc-...................hl..s.LEs..p.GlcpLaPtp..................................................................................hD++pEL+KLs+SlLlNFL-.......................Ll.......................slLhpsP.........................pphcc.....Kl-Dlph.LFlNhHHLlNc.aRPHQAREoLhhhhE.Qhcp+pppscchpcph-clpchl.p.......................................... 0 79 128 191 +5812 PF05984 Cytomega_UL20A Cytomegalovirus UL20A protein Moxon SJ anon Pfam-B_5345 (release 9.0) Family This family consists of several Cytomegalovirus UL20A proteins. UL20A is thought to be a glycoprotein [1]. 25.00 25.00 156.60 156.50 24.20 21.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.32 0.72 -3.81 2 14 2009-01-15 18:05:59 2003-05-22 14:13:27 7 1 6 0 0 20 0 101.60 84 97.33 CHANGED MuRRlhlLuLLAVoLsVALAAP.QKpKRSVpsEpPusotsGsshT.ptplp.opsGphsDssE-...-YDVLI...-Go...pP..ppssEp.psptKEspp MARRLWILSLLAVTLTVALAAPSQKSKRSVTVEQPSTST..NuD.NTTPSKNVTLSQGGSTTDGsEDYS.G.-YDVLITDsDGopHQQP.QEKTDEHKpEHTKENEK 0 0 0 0 +5813 PF05985 EutC Ethanolamine ammonia-lyase light chain (EutC) Moxon SJ anon Pfam-B_5363 (release 9.0) Family This family consists of several bacterial ethanolamine ammonia-lyase light chain (EutC) EC:4.3.1.7 sequences. Ethanolamine ammonia-lyase is a bacterial enzyme that catalyses the adenosylcobalamin-dependent conversion of certain vicinal amino alcohols to oxo compounds and ammonia [1]. 23.00 23.00 23.10 29.80 22.80 22.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.61 0.70 -5.29 41 1134 2009-01-15 18:05:59 2003-05-22 14:16:49 6 4 1071 12 212 698 20 234.30 47 80.75 CHANGED ssssts.sasc.L+phTsARIuLGRuGsu....lsTpplLcFphsHApARDAVasslDh..ttltttlps.........hsh.lplpSpAsDRspYLpRPDLGRcLsscuhtpLpspsstt..............sDlsIVlADGLSutAl..pppuhslLtsLhstLts....shpluPh.llspQuRVAluD-IGEhL........sA+hlllLIGERPG..LoosDSLGhYlTat.P+sG.psDA...........cRNCISNI.+.ssGLs.spAut+lshLlppuhchphSGVsLK .....................s.hcs-shpcLRp.TsARl.shGRAGsp.....hTpslL+FhADHupA+DuVhptls.........c-hlcs......................shlpV+SpssDKshYLsRPDhGRR.LssEuhcsL+spssts..............sDVplVluDGLSosAI..psNhtplLssLhsuLct......tGlpluss.hhV+huRVtltDpIGElL........GA+lVllLlGERPG..Lusu-SLSsYh......sYs.P...+.....su.psEA............cRsCISNI.H..puGhP.....ssEAAuhlspLh+chLcpKtSGlsL.................... 0 58 119 168 +5814 PF05986 ADAM_spacer1 ADAM-TS Spacer 1 Yeats C anon Pfam-B_1693 (release 8.0) Family This family represents the Spacer-1 region from the ADAM-TS family of metalloproteinases [1]. 20.80 20.80 20.80 21.00 20.70 20.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.37 0.71 -10.35 0.71 -4.22 23 1464 2009-01-15 18:05:59 2003-05-22 14:39:08 9 158 90 0 835 1201 1 112.10 30 10.54 CHANGED cslpGsFscpp.t.tGYhcllpIPsGAspIpIpEhphSs.....saLAl+sss..GcahLNGpatlshspp.hphsGoh.hcYsp..sss....t.EpLp..uhGPlpEsLhltlLsp..tspssul+YcahlP ..........................hlpGsaspst.......hG...Y...p.llpIPsGApp.Iplpph.phus...................................saL...A.l....+sps....GcahLNG...s.as...l...s...hs..tp...h......ph.s.G.o........h..hcYpp....sss..............thEplp..uh..G..P..hp-sLhl.......lLht..............t.....p.....sss...lcYpahl............................... 0 119 178 440 +5815 PF05987 DUF898 Bacterial protein of unknown function (DUF898) Moxon SJ anon Pfam-B_5347 (release 9.0) Family This family consists of several bacterial proteins of unknown function. Some of the family members are described as putative membrane proteins. 28.00 28.00 28.80 28.00 27.90 27.70 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.10 0.70 -5.55 83 1019 2009-01-15 18:05:59 2003-05-22 14:40:54 8 9 864 0 134 586 125 248.30 35 91.26 CHANGED .pshphpFsGsu.uEaFtIWlVNhLLollTLGIYosWAKVRp+RYFYuNTplsGpsF-YpusshpllhGhL.lulslllhahlssphs........shhu..............hshhlhhhhhhPall.h...+uhRFphtpTsaRslRFsFpGshtpuhhshlhhslLs.sl....oLGlhh..........................................PhhhtphpcahhspspaGsppFshc...ssstshatshhh......shhlhlhhhhhhshhhh.hhs.hh.ht........................................hshhhhhhhshhlhhhhhhs...ahpsthtNhhhspspl........sthpF..pSslpstphhhlhlsN..........................................................hllhllTLG...................................LhhPastlR.hhR...ahscshsltu.s.slsplhusp.ppp..suAhu-t...huDsh ...........s..pphhFpGsu.tpaFhlhllshLLTIITLGIYssWuhh+hc+ahhtshplssp.htathpshtlhhshl.hhh.......................................................................h.hhh.hhhhh.hhhh....tshpaph..othpsl+h.htsthhthhh.h.hhh.hhhhh.......shhh..hh................................................................shh.......h.......h.hpt..h....G...ht.p.......htht.hhhhhhh.......hhhhh.hhhhhhhhh...........................................................................................................................hhh..hhhhhhhhh.........hh.st..phhhsthth........t.h..ph...tsphp.hthhhhhh.............................................................hh..hohG........................................lhhshh.hh.hhph.hpth.l.....sh.th.......tt................................................. 0 42 86 108 +5816 PF05988 DUF899 Bacterial protein of unknown function (DUF899) Moxon SJ anon Pfam-B_5437 (release 9.0) Family This family consists of several uncharacterised bacterial proteins of unknown function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.55 0.70 -4.90 51 520 2012-10-03 14:45:55 2003-05-22 14:44:00 7 1 316 0 226 988 197 222.60 38 92.43 CHANGED h..sp.slsoRpcWhtARppLLscEK-hTRppDplAspRRtLPh.Vcl-cs....YpF.....-sspGp.soLsDLFs.GRsQLlVYHFMF..uP...............sa.ppuCsuCShluDphsGthsHLpp+DlsLssVSRAPlscltsa+cRMGWp.hPWhSShsuD...FNhDFsVshstt.............................................................p-hPGh.SsFhRcs....GpVaHTYSo...........huRGh-slhusaphLDhTPhGRsEs.................htWhR+HD.......c...Y ......................................................slsspppWhsARctLLscEKt..hTRttDtlAA...pRRtLPh.VcV-.cs....Y.pF..........-uspG.t..soLsDL..Fp..G.RsQ.LlVY+..aMa.....uP..................sh..ctuCsuCShhs.Dph....G.tl.sHLpp+DsohssVSR............A............Plscltsa+pRMGWp..hsWhSot......s.us...FstDass.ht..t..........................................................p-hsGh..os.Fh.R............-s.....sp.VaHT..Y.S.o...........huRGh-tlhssashLDhsPhGRpEs................h.WhphpDt....................................................... 1 55 114 164 +5817 PF05989 Chordopox_A35R Chordopoxvirus A35R protein Moxon SJ anon Pfam-B_5472 (release 9.0) Family This family consists of several Chordopoxvirus sequences homologous to the Vaccinia virus A35R protein. The function of this family is unknown. 18.80 18.80 19.10 19.30 17.70 18.70 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.10 0.71 -4.66 9 54 2009-01-15 18:05:59 2003-05-22 14:47:44 7 1 39 0 0 44 1 157.80 49 96.24 CHANGED .shhsllTshGVlpIpscph.+sh.sDLGI.sl..l-hlGPYtluolplp.lssshlpppslpcCYlutNGhllpCScpspLshPlpplasuaho+sshllChDc..h.+Lhl-sphQPFYlhoShslhcu..cIlEVYNLYpcGDYphIlNPSssFLphLscpsshCLoDpsGWlIlDu.Ksc ...MDtshlITshGlLpIsD.....Tlh-DLsI.oI..hDhIGPYlIGsIchspIs.lc-IchuDMpcCYFuhpGpllspspsphhphslhpIpsAYho+ssh.l.IsCDh..chhL.lcspaQP...FalhsulslhsA..pIlEsYNLYptGDYphIlNPSsshhhpL..ppshCloDGsGWhIlDs.Ksp..................................... 1 0 0 0 +5818 PF05990 DUF900 Alpha/beta hydrolase of unknown function (DUF900) Moxon SJ, Bateman A anon Pfam-B_5475 (release 9.0) Family This family consists of several hypothetical proteins of unknown function mostly found in Rhizobium species. Members of this family have an alpha/beta hydrolase fold. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.76 0.70 -5.04 13 508 2012-10-03 11:45:05 2003-05-22 14:51:33 7 13 338 0 213 832 107 214.60 24 44.33 CHANGED spsutpshlsscltpsts+clllFVHGaNssF--AlhRhAQIsaDhs.......................hsu..ssllFoWPSpup...lhs.............YsYD+E..SssaSRsALEchL+hLAps........sscclpllAHSMGsaLshEuLRQluhcst..................shss..KlspVlLAAPDlDl.DVFspQhp...........shG+.s........chTlasSp-DRALp.lSphlsGsssRlGshsss.....pptltstGl.....slVDloclcuuDh.lNHspFussPpll....pLlusR ................................................................................ts..........................t.ccl...h..l...FlHGa.N...s....s....a...p.......-.....u........h..h+....h.A...Q.l...s...p...s.......t................................hps.....ssll....Fo..W...P...S.p..u..s.......h..hs.......................................Yh..h..D.+c......us..p..h....o..pss.......L.t....c......hL....p.t.l.....s..ps..........................................s...scclpllAHSMGshlshc...u...Lc..phshpst...................................................................th.t....+lss.l.lLs.u.P..Dl.Dh..-..s..Fp.ppht..................th.s.p.t.........pholasSpcDpALt.hSp..h..l........t....u..s....h.....R..lGt.hss............h.......tl.............hlDh...sph..ts.........t.....h.t..Hs.haht....h............................................................................................................................................ 0 53 129 169 +5819 PF05991 NYN_YacP DUF901; YacP-like NYN domain Moxon SJ, Bateman A anon Pfam-B_5522 (release 9.0) Domain This family consists of bacterial proteins related to YacP. This family is uncharacterised functionally, but it has been suggested that these proteins are nucleases due to them containing a NYN domain. NYN (for N4BP1, YacP-like Nuclease) domains were discovered by Anantharaman and Aravind [1]. Based on gene neighborhoods it was suggested that the bacterial YacP proteins interact with the Ribonuclease III and TrmH methylase in a processome complex that catalyzes the maturation of rRNA and tRNA [1]. 24.60 24.60 24.90 25.40 24.50 24.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.22 0.71 -4.50 69 1318 2012-10-03 20:43:45 2003-05-22 14:58:10 6 12 1288 0 277 838 38 157.20 34 60.56 CHANGED LlVDGYNlIsu.....WscLcclt.cts........L-sARppLl-hLssYpu.aps.hclllVFDAphltG.tpchpph....tslpVlaTc-scTADsaIE+hstpltp.t.....ppVhVsTSDtspQhhlhupGAhRhSuc-hhp-lppsppplp.p........chpp....tpp..ppphhp.......ptlspchhcphcchRc .........................LlVDGYNhIus......WspLpplt.....cts....................................L-...pARppLlctl......u.s.Ys..u..hpu..hc.......lllVFD.AhhstGh..p...p.p.h.ph.....slcVlF..T..+E..s..E...TADsaIE+hstc.hts.t.............ppltVsTSDhsEQhs.l.a.upG.AhRhSu+ELhpclppscp.plp.p..............phpp....hpp....pp.hht..............h.spc.htphcchh......................................................................... 0 115 208 250 +5820 PF05992 SbmA_BacA SbmA/BacA-like family Finn RD anon Pfam-B_8139 (release 9.0) Family The Rhizobium meliloti bacA gene encodes a function that is essential for bacterial differentiation into bacteroids within plant cells in the symbiosis between R. meliloti and alfalfa. An Escherichia coli homolog of BacA, SbmA, is implicated in the uptake of microcins and bleomycin. This family is likely to be a subfamily of the ABC transporter family. 20.00 20.00 20.40 20.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.07 0.70 -5.23 10 817 2012-10-02 13:23:42 2003-05-22 14:58:56 7 3 763 0 82 1485 1772 298.70 62 77.94 CHANGED s+cWQhWSlLGTulILFshaFsVQluVtlNsWYGPFYDLIQcALusPsulThs-FYpulhsFhtIAhluVsluVlNsFFVSHYlFRWRTAMN-YYhu+WspL.RHIEGASQRVQEDTMRFuplhEsLGlullculMTLIAFLPlLasLSspVshLPllG-lPauLVhAAllWSLhGTlhLhlVG..IKLPGLEFsNQ+VEAAYRKELVYGEDc.ssRApPtTLcELFssVR+NYFRLYFHYhYFNluRlhYLQsDs.lhshllLsPoIVAGplTLGlMsQIoNsFG+VRGSFQYLlNSWoTIlELhSIYKRLRuFEupIp ...............................................................................................PH+WQ.h.W.S.IL.G.o.u..L..I..I.Fl..T.WF.h..V.p.V.uVAl..NAW...Yu....PF...Y.DLIQ...s...A...L.......o.......o...P.......c....p.......V...o.......l....pp....F......Y..p.....p....l.u.............VFLG.I..All.AV.lI....u.V.L.....N............sF...F............VS.H.YV...FR.W....R.....T......A.....M...........N...........E...........a....Y.........M......u.......p.......W.............p..........p..........L.........R..............H..........I..........E........G.......A.........A...........Q............RVQEDTMRF...A............o............T.......L............E.......s............h............G.....V.....S.......F....I.sAlMTLIAFLPVLlTL..S..s...H..V..s..c...........L...........P...I...l...G....a....I....P..au....LVhA..AIlWSL..h...G.TGL..L.A.V...V.G..IKLP.G.LEF+NQ.R....V.E.....A.AY..R.........K..E.L........V........Y........G.E........D........D........A........s.........R........A.........s.........P.......s.....T........V.R..E.....L.....F...o....u...VR.........+.......N..Y...F...R...L.YF..H.Y.......h.......Y....F.......N....IA.......RILY.......LQ....V..D.N.....l.......FuLalLFPS.IVA...GsITLG.LMoQITNVFGQVRG...uFQYLINSW.....TTl.......VE.....LhSIYKRLRuFE+pL.c.............................................................. 1 16 35 59 +5821 PF05993 Reovirus_M2 Reovirus major virion structural protein Mu-1/Mu-1C (M2) Moxon SJ anon Pfam-B_5584 (release 9.0) Family This family consists of several Reovirus major virion structural protein Mu-1/Mu-1C (M2) sequences. This family is family is thought to play a role in host cell membrane penetration [1]. 25.00 25.00 61.20 54.00 17.70 17.50 hmmbuild -o /dev/null HMM SEED 674 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -13.03 0.70 -6.71 2 56 2009-01-15 18:05:59 2003-05-22 15:04:45 7 2 25 36 0 67 0 614.20 48 98.52 CHANGED GNspo.VpThNlTGDGNsFpPou-hoSTAsPulsLpPGhLNPsGh.WhslG..TSVso.suL...........................AlVs.p.t.hshVsEthhtsFoKA.hslph.p.hhpth+shsss.hhSshhThsssYVGsoAtpALsN..pps.VhhsThQsM.lp.oItAth+slthWp.cLcsA.ThLssslshGpsSCshpSllthhcD.LP.DsLhhpYPpEhhsshh+R.sulp....Dsp.sss.scslstVAuuh.AsoupsssL.tpSphhpQAhshhsut-.-lItu.sPlPssVFs.s.tPtsYpsthlK.--ApW...Isch.sosh.hpshs.oGssatlpL..GsTRVlshsthhs.hhsLDluGp.h..-Ts.D.ssphsuFllhQoplPhp.hTsAupIsthTVVshsth.A.s.uhss..QshhshhpLthsaE.Eplsth.P.h..YhLuTFl.sushosoNhopPslhDuLLThoPl.htEsThKGtVVstlVPAplhus.TsEplsuuLsNDAhhhh.s+hsKlAp........A.KhDDsAsss.hS..uV....QhtlSQltps.ss....P..lL.+hApRAhphFlusPShhlp.uhPVLopps.h.ALspGVtTuLRTtSLutGVpsAspKLpuppSlpshpQuFhDpluohaPu. ...........GNAoSlVQshNlpGDGNhFsPSAEToSoAVPSLSLsPGhLNPGGhsWhhIssphssosPuuLRhMTotDlsphu..shsNSoGhlPopuhh.sss.s.c.EsLsVlT-HAIspFsKhpMAhEhsR-aLDtptVsstSsphpsalsYVDCaVGVSARQAhsN.FpppVPVITpoR.s..ahs.ShQshLpsLs.WEhDlRphhTlLPTssshGcloCcM+SVVphlD-QLsDsSLCRhYPcpAAsAlA+RNGGI+...Whssspsps.u.AsNslAASshushAsssPLsEKSstsEpuMcLVsssslDIlsSpsPlsuSVauhsscPpsYNlRTL+l-EAhWLR.h.tosshshphp.hssppuTphHh.L...ptGopVlNL-QhusMhF.lsluGKsYK.cssa...DPss++lsllVhQSKIPFEtWTsASQIsuIs...VutVplaA.t-SShss...ppIIupTSLuYhFERETlsssssElNhYLLsTa..sss.osss.s..P..DsWDulhTloPLouGpVTlKGssV-pVVPuDLlGuYTPEuLsAALPNDAuhlhts+AsKlAc........AIKh-DDussDEsSPhSsPIQG.LAlpQ..L-T..s..t..G.sRhhpP...uhLp+lASRAhphFlGDPpoILpQusPVLpDsslWsuhsQGV+sulRTKSLSAGV+oshsploAspSlQsW+QGFLsKlpshF............... 0 0 0 0 +5822 PF05994 FragX_IP Cytoplasmic Fragile-X interacting family Finn RD anon Pfam-B_8072 (release 9.0) Family CYFIP1/2 (Cytoplasmic fragile X mental retardation interacting protein) like proteins for a highly conserved protein family [1]. The function of CYFIPs is unclear, but CYFIP interaction with fragile X mental retardation interacting protein (FMRP) involves the domain of FMRP which also mediating homo- and heteromerization [1]. 20.50 20.50 20.60 21.30 20.40 20.40 hmmbuild -o /dev/null HMM SEED 820 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.27 0.70 -6.76 9 299 2009-01-15 18:05:59 2003-05-22 15:10:11 6 10 123 1 179 271 13 647.30 51 67.18 CHANGED scscclhsls.puLplLSphsshVhEh.huaKlhsPss.p...sNctsstt............spcYERAsRYNYoctE+huLlpllAhIKuL...pplhtctEollsptlpcsI.spLQ-FspppLcEsl++ss+....KK-hlpsllpsl+shhsDa.suhc.tpcsthpsc+c........pst.clpl...........L.hlRo.LpSLlssc.thsc+p...hppphcupplpsIctFhpcShaastLLshspsLppssDLSpLWa+EaaL-ls.........................+RlQFPIchShPaILocalLpsp-.PuLh-hlhYPLslYNDuAthALhph+pQaLYDEIcAEsslCh-phsaclu-psFshh+phuushhhDKphhsphpp.th.h....pP..........susRapsLLpQp+lpLLGpsIDlstLluQRlNtthhcpL-tAIsphEupslpulltl-tLl-l.+lTHpLLucah.slssF-shhppAppsso..u.pu+IhlHl.....a.cLshDhlPsYsapuss.R.......h..ttt.tRccss..t.ha.husKslstuasshhpthssFlus.HF+slsRlLshpulsllhpplLc.....hlpsshtsalpshhphhspls+l..PhhshGs.ushshFptplpshlpas-l+.clhpshRplGNslhFsthl-pAL.hpcpstshhpAhshps.hsps...hpcsp-..s.....hppLcphauhLp.hs.l.................s-lhh..phpsshulhc.lLpchcsFl.s.........htss..ssulhsl-oss...pFttlWSslQFlaChPhs.ss-h..........ss.phFG-GlhauusslIsLhsQpc+F-slshChHllplpc....ssscDch.+hlslp+hl-phppaplhssphFtlhsp ......................................................................................................................................................pthh-lsLpGlQLLSpWoutlhEh..hoWKLlHPocch.....pNc.cCPsp................AEEYERATRYNYosEEKhAhlElIAMI....KGL...pslMsRhEolhspAIRpslYutlQDFsQhsLc-PLRpAh+....pKKshl................sll.slRcshsDW.t..u.hc..s....s-ssh+ucK-s..........psshplplsphss..................................................................................tssssQLYMlRThlESLlu-+.sss....K...+...s.............h.+p.pl-..u...hl.tl-pFacpSaaasaLlshS.csLppssDLSQLWaREFaLEhT........................t+RIQFPIEMShPWILTDHILc....op-.................suhhEhVLYsLDLYNDSA.aALstFp+QFLYDElEAE..V...NLCFD...Q..FVaKLu..-pIFsaYK....hAuShLLDK...+h+.s.-sps.uh.h....hP.....................ssRY-.o.LL+QRHVQ..................LLGRSIDLN+LIoQRlssshh+ul-.AIsRFEupDL.o.uIV...EL-hLL-lNRhoH+LLscah.sLDsF-uMhcEANHNVo...............u....PaGRITLHV.....FhELsaDFLPNYCYNuuTpRFV............................+s.hshs....pt.....pR-K...sp..sp......a..laGSK...........LNhAapphhs.YpsFlGsPHhpslsRLLGYpGlAllhcpLL+.h....c.llps...s...l...h...pal...csL.h.psMPK......C+L..Pph-YGSsGlLpaaptpLpslhpYs-lKo.hhpshRElGNsllFshLlEpuL.s.cEssDllpAAP.FpsllPcsh.......hKc...s..p..c.p.sp...........hp+L-spauslp..hss.lp+hus.p.............................................................phthA+-uDLLT+ERLCCGLShFEslLsRl+saLpcs.....................lWp..G.s..P.sN.GVhclD-Cs...EFHRLWSAhQFVYClPss.spEh.........................os.EphFG-GLpWAGC.lIsLLGQQ+RFchhDFsYHlL+Vp+.......................Du+Dc...lc..sl....L...c...+....hs-RlR+aplLNsplhslL................................................... 0 74 97 142 +5823 PF05995 CDO_I Cysteine dioxygenase type I Finn RD anon Pfam-B_8006 (release 9.0) Family Cysteine dioxygenase type I (EC:1.13.11.20) converts cysteine to cysteinesulphinic acid and is the rate-limiting step in sulphate production. 21.40 21.40 21.90 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.97 0.71 -5.10 8 920 2012-10-10 13:59:34 2003-05-22 15:12:25 7 10 728 11 390 865 131 155.50 22 77.31 CHANGED Mp.........pssshpssossshlspLtphhpppthsVsphttLhpshpsshs-Wt...hhhhaDtspYTRstl.tGssch-lhLlsWsPGpuoslHDHusStGshplLcGpLpEotashsct...............h....shhspphhhpsGsshhpspttlH+l...........tNsSsscsAVSLHlYhPPhsshsha-pp ......................................................................................................................................................................l...th................ph......h........s.......p.p...Y..tpphl...ps.........s..s.............c..hs.l........h..l..ls..W.sPGps..o..slHD..Hsssash.hs........lLp.G.p..l..p..............E..p.tap.hspp.....................................h......h.s..p.p.h.h...h.t.st..s..s...h.....h........s..t.....ts.l.Hcl.......................tss.s..t..sp...s.ulSlHlY...........................s........................................................... 0 107 212 322 +5824 PF05996 Fe_bilin_red Ferredoxin-dependent bilin reductase Moxon SJ anon Pfam-B_5667 (release 9.0) Family This family consists of several different but closely related proteins which include phycocyanobilin:ferredoxin oxidoreductase EC:1.3.7.5 (PcyA), 15,16-dihydrobiliverdin:ferredoxin oxidoreductase EC:1.3.7.2 (PebA) and phycoerythrobilin:ferredoxin oxidoreductase EC:1.3.7.3 (PebB). Phytobilins are linear tetrapyrrole precursors of the light-harvesting prosthetic groups of the phytochrome photoreceptors of plants and the phycobiliprotein photosynthetic antennae of cyanobacteria, red algae, and cryptomonads. It is known that that phytobilins are synthesised from heme via the intermediary of biliverdin IX alpha (BV), which is reduced subsequently by ferredoxin-dependent bilin reductases with different double-bond specificities [1]. 25.70 25.70 26.20 26.30 25.30 25.60 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.30 0.70 -4.88 62 239 2009-09-10 22:50:27 2003-05-22 15:14:14 7 4 127 41 111 271 579 213.70 26 84.15 CHANGED hpplppthpph...slpPh.slssshpthpuphstp....phhIpshhapstthR+l+lptscsGsuLplLssVhaPs.pa.DLPlFGsDl..Vshsst.llA.llDlpPl...ppc.ta.pcahpsLtslh.........ph.shhspspplPt-up.aFSPhhlasR.....ssspp.csthhsthpcYLplahphhppApsh......sss.p..................htclhpuQppYspaptcpDPuRtlLp+hFGppWu-cYlcphLF- ........................................................t...h.t.htth.thtsh..lsttht..hpuphttt....phhlpshsaps.phR.+.h+hthhcsGpshplLpsVhaPps.........pa.D.......LPlFGsDl..lshsst..l.hs..hlDlpPl.......tpptp.h.....ppYhp...tLtsLh..........p..t...h..h.s..pspclst.up.hFS.shslasR..........ssstp.....tppthhsthpcY....Lphahp.hhpp.up..........sstp..................................................................................htphhpuQppYsphptc.pD.s+tlLp+hFGppWu-calpphLFs............... 0 31 72 102 +5825 PF05997 Nop52 Nucleolar protein,Nop52 Finn RD anon Pfam-B_8003 (release 9.0) Family Nop52 believed to be involved in the generation of 28S rRNA [1]. 19.70 19.70 19.90 19.90 19.40 19.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.44 0.70 -4.91 35 436 2009-01-15 18:05:59 2003-05-22 15:24:39 7 5 303 0 284 427 4 187.10 27 44.25 CHANGED tphths+pLAus-cpsR-+ulc.pLppalsppspp..........sch-hhKlWKGLFYshWhsDKPlhQpcLAsclu.pLlpshpssp..................sthtFhpuFhpoMs+EWssID+aRlDKahhLhR+hlppthphLpcppWct...phlpchhpllhcp...hltsps......hspGlthHlh-lal.....-ELtclhtt........................................lsspslhtllcPFhpl.htpspschLhp.....plpppla..cpl ..........................................t....hhppLAus-phhRppulc.tLppaltsppth..........................st.-hhKlWKGLaYshWh.pD+P..lhQppLupplu.pLlt.sh.stt...................................s.hhFhpsFapohs+EWt.uIDphRlDKahhLlRhhltt............shp..h.lpp............p..................t.Wpt...............phlpthhphh.pt.....hh.sps...................stGlthHhh-lal.....-ELtcsht.t.............................................................................httt...hhlpPahph.htps.sphlhp.phtptlh...h................................................................................... 0 93 146 224 +5827 PF05999 Herpes_U5 Herpesvirus U5-like family Finn RD anon Pfam-B_8027 (release 9.0) Family This family of Herpesvirus includes U4, U5 and UL27. 25.00 25.00 37.00 36.00 18.60 17.00 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.62 0.70 -5.89 14 74 2009-01-15 18:05:59 2003-05-22 15:37:14 6 2 28 0 0 71 0 432.70 41 71.31 CHANGED lcsLpFh--hsthaptlTKhsAlGpYussplhshs.p+sTlutahRhLsEhhstLahphcthh...p.clcsEshtpLtshLsshhspthcssh.spstl..FacscaFtphpssLh.hatlLCuCsEC...R.phFhhatthsht.......++s......stsl+lpshsth..ss.............hcLPhh.HLsttpshsLpsplu+DLGh.Sh..IppslEcp+.LPI.shs.pluhs.pc+slLplhoNIVhhLFllppl+phIhpELshhhchasctltcLphsh........................................Etth..hLssssshpshphhs.......hhc+l...+...hthlpssusp.phhpphp-slchuFpls.atphs.......hhhHhhhltp.....................................ssh-...LstssNLl.aF...tst..p.h.hl.t...slp-s.Ls....s........T+...hhsG...........shshhhpGhpaFt.......................chc.+ththKhhslc+ht.chhh ...................lc-hpalPEhScuuKpIuhAlATGQYsVsoLlsY+.shGTho+YLRpLCshs--LahRL-Gsl....sLhL-sE-+ElItRhLPsslC+tLslcY+sp+sAh.FF+soFhsRsEuAL+clYusFCpCG-.s...R..cthspststsus............................ccus........hS.usssSpascL..Ecs-.LRLppstp.................LGsh+LPAIRHLTAs-ssRVpsuVuRDLGF.uc......WSpoLscca.FLL.PsGh.usus.Pc+GYAhYLASNsVLsLplIRlLRssIR+EasAslRhLoG-Vp+LlRhh.........................................cspu..ALlRpuhuQs.sppR.......tLpRh...+..h.c-lcRhphspssFlcsFCDaL-lspRIPDY+ulS.phpREhLhLHsF+LRR.hhs................................ssosE...ss+spRLlha..l++Gcs.spDtstL.phuosLSDsELS....N........s+...p+As.....s.........lVuhsssulch...........................csHhp+h-RLaVRRhRs+cV.s................................. 0 0 0 0 +5829 PF06001 DUF902 Domain of Unknown Function (DUF902) Yeats C, Bateman A anon Pfam-B_3539 (release 8.0) Domain This domain of unknown function is found in several transcriptional co-activators including the CREB-binding protein, which is an acetyltransferase that acetylates histones, giving a specific tag for transcriptional activation. This short domain is found to the C-terminus of bromodomains. The 40 residue domain contains four conserved cysteines suggesting that it may be stabilised by a zinc ion. In CREB this domain is to the N-terminus of another zinc binding PHD domain. 25.00 25.00 28.50 28.50 23.70 23.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.39 0.72 -3.82 8 198 2009-01-15 18:05:59 2003-05-22 15:46:14 8 23 88 31 105 157 1 41.80 80 1.84 CHANGED PVMpthGYCCG+KhsFsPQsLhCYGKpLCTIsRDpsYasYps ....PVMQSLGYCCG.RKhEFSPQTLCCYGKQLCTIPRDAsYYSYQN...... 0 30 41 68 +5830 PF06002 CST-I Alpha-2,3-sialyltransferase (CST-I) Moxon SJ anon Pfam-B_6887 (release 9.0) Family This family consists of several alpha-2,3-sialyltransferase (CST-I) proteins largely found in Campylobacter jejuni. 23.10 23.10 23.10 24.80 22.00 23.00 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.95 0.70 -5.14 7 193 2009-01-15 18:05:59 2003-05-22 15:52:33 7 2 103 15 9 148 3 226.00 41 89.60 CHANGED .KslIIAGNGPSLKsIDYphLPK-aDVFRCNQFYFED+YaLG+cIKuVFFNPsVFhpQYaThppLlpNsEYpl-pIhCSsh.Nhshl-spp.hc......hptaFsssp.Gap.hlKpLctF.s.alKY+ElY.spRITSGVYMCslAlALGYK-IYLsGIDFYtsstshYsF-spppNlhphhsshpppcsp..hHShphDlpALphLpKhYtlplYuLsPsS.LspahsLus...shs.sFhl.cK.psYhpDILlssp.s..ph.t.h.hK+.+ltpNlh.hL.h+DlLch.p-lK+hhKEK ..................K.lIIAGNGPSlKpIDYSh.LPpDaDVFRCNQFYFED+YaLGKchKtVF..aNs.hh.pQhhThhpLlpNpEYchE...Ihhosh.Nhtphct..c.hhc...........................h.paasssp.uas.hhppLct.F.s.ahpapch..a....sp+hToGlYMhssAlAhGYK-IYLsGID..FY........pphs.ahFcpppppl.p....ht.ppptp..hHoh..p...hDlpAL.hhpcp..YtlplYslsPpS.Lspah.LuP........p.s.tFh...cK...shhp.-hlhss....tph.........tp......p....h...p...h..................................................................................... 0 3 7 9 +5831 PF06003 SMN Survival motor neuron protein (SMN) Moxon SJ anon Pfam-B_7026 (release 9.0) Family This family consists of several eukaryotic survival motor neuron (SMN) proteins. The Survival of Motor Neurons (SMN) protein, the product of the spinal muscular atrophy-determining gene, is part of a large macromolecular complex (SMN complex) that functions in the assembly of spliceosomal small nuclear ribonucleoproteins (snRNPs). The SMN complex functions as a specificity factor essential for the efficient assembly of Sm proteins on U snRNAs and likely protects cells from illicit, and potentially deleterious, non-specific binding of Sm proteins to RNAs [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.80 0.70 -5.28 3 460 2012-10-02 16:56:36 2003-05-22 15:59:04 7 15 193 8 279 592 1 149.70 19 54.01 CHANGED GQSDDSDIWDDTALIKAYDKAVASFKNALKNEDCopPuEs-EpNPGsKRKNNKKNRSRKKCNAAPLKcW+VGDSCNAVWSEDGNlYoATIoSIDtKRGTCVVsYTGYGNcEEQNLADLLsPsoD...s-pt.pEsNlNETEaSTDESDRSS+SHcs+s......pNpspu+sS.WNsRFPPsPPPsPPGF....GRHGEKhc.shPPFLSGWPPPFPsGPPMIPPPPPMSPDusEDDEALGSMLISWYMSGYHTGYYLGLKQGRMEAAluKcuHpK ....................................................................................................................................................................................................................................................t......t..W.p..sG-pC.Ah.a.tpDG..p...h..Y.ApIttl..s...p...tt..ssh...lhatsY.sNt..E.......hts...lh...........tt.............t....t......t........................................................................................................................................................................................................................................................................................................................................................................ptt..................................................................................................... 2 91 127 207 +5832 PF06004 DUF903 Bacterial protein of unknown function (DUF903) Moxon SJ anon Pfam-B_7037 (release 9.0) Family This family consists of several small bacterial proteins several of which are classified as putative lipoproteins. The function of this family is unknown. 21.10 21.10 21.60 28.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.13 0.72 -4.44 30 1612 2009-01-15 18:05:59 2003-05-22 16:01:35 7 1 599 28 138 407 4 49.80 54 68.54 CHANGED shVlsppDGppIlTpsKPchDc-oGhhpYcchs.GcctpIN+DpVpplpEh ..sYVMpTpDGRpIlTDGKPplDsDTGhlSY+Dtp.GNppQINRsDVpphlEh.......... 0 10 29 85 +5833 PF06005 DUF904 Protein of unknown function (DUF904) Moxon SJ anon Pfam-B_7038 (release 9.0) Family This family consists of several bacterial and archaeal hypothetical proteins of unknown function. 30.00 30.00 30.10 30.00 29.90 29.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.42 0.72 -3.68 35 833 2009-01-15 18:05:59 2003-05-22 16:04:09 7 2 821 4 109 271 20 75.10 62 94.17 CHANGED MshElL-cLEpKlppAl-TIsLLphEl-ELK-cptt.t.......ptpppL.pENpcL+................pEpssWpcRLcuLLGKlcpl .....MSLEVFEKLEAKVQQAIDTITLLQMEIEELKEKNssLspEs........QphpcpREpLE+ENppLK................cpQsuWQERLpALLG+MEEV.............. 0 15 45 82 +5834 PF06006 DUF905 Bacterial protein of unknown function (DUF905) Moxon SJ anon Pfam-B_7072 (release 9.0) Family This family consists of several short hypothetical Enterobacteria proteins of unknown function. Structural analysis of the surface features of the protein YvyC has revealed a single cluster of highly conserved residues on the surface. Additionally, these residues fall into two groups which lie within the two largest of the three cavities identified over the surface. The conclusion from this is that these two cavities with, Leu 58, Glu 75, Ile 82, and Glu 83 and Pro 86, conserved, are likely to be important for the molecular function and reflect the cavities found on the surface of the FlaG proteins in Pfam:PF03646. 22.30 22.30 22.30 22.50 22.10 22.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.46 0.72 -4.60 10 520 2012-10-01 21:18:35 2003-05-22 16:06:47 7 3 290 1 14 219 2 65.50 62 87.11 CHANGED LhsLP-GPFTRcQApA...VuAsYpNVhIED.DQGsHFRLVVR...ss-GphVWRsWNFEPsAGchLN+YIpopGIh+ ...........h.hLP.-GsFoRcQA.A...VsstYRNVFIED.DQGsHFRLVlR....s-GphhWRsWNFEssAGhhhNphlts.GIl+...................... 0 2 4 9 +5835 PF06007 PhnJ Phosphonate metabolism protein PhnJ Moxon SJ anon Pfam-B_7179 (release 9.0) Family This family consists of several bacterial phosphonate metabolism (PhnJ) sequences. The exact role that PhnJ plays in phosphonate utilisation is unknown. 25.00 25.00 54.90 54.80 24.20 17.80 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.78 0.70 -5.62 32 691 2009-01-15 18:05:59 2003-05-22 16:13:10 6 3 653 0 123 424 122 272.60 76 95.83 CHANGED psssYNFAaLDEpoKR.IRRulLKAlAIPGYQVPFuSREMPhshGWGTGGlQlThulIGtsDsLKVIDQGuDDosNAVsIR+FhppTs.GVsTTc+Ts-ATlIQTRHRIPEpPLpcsQILVhQVPhPEPLRhlEPpEscs+pMHA.t-YuhhaVKLYEDIs+aGcIspoacYPVhVNsRYlMcPSPIP+FDsPKhcpssAL.LFGAGREKRIYAVPPYTpVcsLsF-DaPFclppaps.......sCuhCGuocSaLDElhhcDpGs+ha...CSDoDYCpp+htptpt .........sh.sGYNFAYLDEQTKRMIRRAILKAVAIPGYQVPFGGREMPMPYGWGTGGIQLTASlIGcsDVLKVIDQGADDTTNAVSIRpFFpRVo.GVsTTERTs-ATlIQTRHRIPETPLoEDQIllaQVPIPEPLRFIEPRETETRpMHALEEYGlMpVKLYEDIARFGHIATTYAYPVKVNGRYVMDPSPIPKFDNPKMcMhPALQLFGAGREKRIYAVPPaT+VESLDFDDHPFoVQpWD-.......PCAlCGSscSYLDEV.Vl.....DDsGsRMFV..CSDTDYCcQppptt.pt.............. 0 22 65 90 +5836 PF06008 Laminin_I Laminin Domain I Yeats C anon Pfam-B_1925 (release 8.0) Family coiled-coil structure. It has been suggested that the domains I and II from laminin A, B1 and B2 may come together to form a triple helical coiled-coil structure [1]. 30.50 30.50 30.70 30.60 30.20 30.40 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.71 0.70 -5.07 9 327 2009-01-15 18:05:59 2003-05-22 16:14:52 9 117 70 0 165 325 1 235.80 22 9.88 CHANGED p.......lhuls.hTGsh.tsh+l.tphcshpppLpphhtthstpppplpsh-pt.lpsLtp-s-sLtccsspshspupplppsscpThppApsLtttIcpltpslpplhpphtthsp...pt.pssspslppthtpup+hLpplRtR.s.hpp.ppsA-t-hctAptLLsclpphhpp.pt-spuLhpslpcpLscaps+LpDhcphLccAtspsp-ApcLsttsptshp.thpcpppclpcppphhscpLssupshLtpssthLpthspsh ....................................t...p....l.sls..sush....t.hch.hphps....hpp.pLpphhsthp.......tp.plp.h-pp..hpsL..p-hc..pLtp+s........p..ts.ttcupp.h....tpssppshppAppL.p.lpplhpp....lp...t..l.pp.h..thst....tpphss..tplpphhtpspphlp-hRp+...s..httppp.A-tEhptAp....t..LLpplpphhtp.ptc....sp....sl..hps...lpcpLscaps+LpDhpphLpcA..hsps+pAp.....plsttNptphp.thpcphpplpptppphpphLppups.Ltpsp.hhp.hpp..h............................... 0 21 33 81 +5837 PF06009 Laminin_II Laminin Domain II Yeats C anon Pfam-B_1925 (release 8.0) Family It has been suggested that the domains I and II from laminin A, B1 and B2 may come together to form a triple helical coiled-coil structure [1]. 27.90 27.90 27.90 28.20 27.80 27.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.75 0.71 -4.42 8 340 2009-12-23 11:36:11 2003-05-22 16:21:42 7 125 84 1 171 348 0 130.80 28 5.22 CHANGED uKphAscAscosspVLctlpslspsltphppsluplsssht.hpt.hp..stspsltsAssuV+sLpcpAspLl-+Lcslcphcss.t.....LScNlucIKcLIuQARctAspIKVuspFcscoslcl+s.pcssshsshTsl ........................................................................KthsppAscpstpVhptl..pslp.pslpp.h.....pps...hsphpss..............t...........s.hspsl...s..sA.ss....s..VpsLp...p..phsp.Lhc.KL.csl.......c....php....s............locsIscI+EL.IsQARct.As.p..lpVuhpF.s..Gpsslpl+s..shsshpshTs.............................. 0 24 38 93 +5839 PF06011 TRP DUF907; Transient receptor potential (TRP) ion channel Moxon SJ, Mistry J, Wood V anon Pfam-B_5564 (release 9.0) Family This family of proteins are transient receptor potential (TRP) ion channels.\ \ They are essential for cellular viability and are involved in cell growth and cell wall synthesis [1]. The genes for these proteins are homologous to polycystic kidney disease related ion channel genes [1]. 28.50 28.50 28.60 28.50 28.30 28.40 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.48 0.70 -5.91 75 554 2009-01-15 18:05:59 2003-05-22 16:36:52 7 15 147 0 432 554 9 419.00 23 50.82 CHANGED ulsassAsluGlullsouhsushut..............................u.......ssAscl......ussshslFtYaQshuhsGMhuVs.hPslhtuaspNFtWShGlIphsFhQphhsha....hpuTGG............ssoshhsshtsh........sh.s........................................................R..............th.h...............................................................t.s...ss..hhhp.......GlpRluhhssIcposhahTu.hhhallhlhhlslhlhhhKhhlclh...................h.+tthh.psschtpa......Rppah...slltG...slhRllh.lhas.lsllshapFspsD..............................................................................................................S..suslllAslhlllhhullsas................................................sh+llhhu+cphphhpss..uhhLYsD.......pslp+aGalYspa+ss.taaahlsslsYhhl+ulhIuhsQs..sGhsQslslhllEhlhllslhhhRPahsKpo.NhhNI.sIsllphlsslhhlhFsshas.stsspulhG.llhhllpAshslhLhlhl...llssllslhp+sP.cs+hps..........................................hpDsR....sSFhppts .......................................................................................................................h.ltasssslsshullsoshhu.hhu................................................s....sshsph.................usshhs...lhtahQthshhGhhuls.hPshht........sa.spshtW.Sh.G.ll...phs...h...hpph..hs.ha...tsoGG.....................sss..h.h.s..tsh.........................................................p........................................................................................t..............hhh.........Glpphu..hhslptsshh..hTu.hhhah.lhlhhhhhhlhhh+hhl.chh......................................................................................h..+hthh..tpph.ta......Rp.pah...sh.htG...slhRllh..lha..hs..lhshap..h...st...ts......................................................................................................................S........sushllAsl.hl.hh.hhshh.s.ah..............................................................sh.phhhhs+.p.ht..ts.......shhLasD................thlp+aGhlY.spa+ss.taaahlshlhahh.l+uhhluhsQs................sGhs..Qslslh.llEhhhllhl.hhh+Pa...hs...+po.Nhhs.l.hlsslph.lsshhhlhF.ss..h...s..sthsp.....sh.....hG.hlh...hllpu...shslhL..hlhh.........hlpsh.ht..lhpc.ps.chphp....................................t.............................................................................................. 0 105 229 366 +5840 PF06012 DUF908 Domain of Unknown Function (DUF908) Yeats C anon Pfam-B_6534 (release 8.0) Family \N 25.00 25.00 26.40 30.30 21.60 24.50 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.77 0.70 -5.13 25 318 2009-09-11 20:50:55 2003-05-22 17:01:12 7 40 239 0 225 312 1 287.90 26 8.83 CHANGED sccLllslLcFophLLEpCsNRslYsSh-.+LssLLsosshcllhusLcllhthupRh..tpt.....st.h..p.hppp.th.hp+lhplAtsastssh..........................................................sstscthuLschh...........tccph.pphsplphpYYhss.......................................................t....+pstttts............................t.ttpsssspsuhphhclsppphpppslp-lhcpthpp..lP............pchpa-hhp+lRhs+uhsssph.....RppllshRlhAIuslshlh...spststs+lhph-s.......hhppLs-Llphs...s.....plspplpshulpuLpulup..++sphss........llssLusslsHGlLhpllRphlspl ......................................................................cpLlhtlLpFotlLlEpsh.sRplYuSh-....+LssLLsosshpllhusLpllhhhupR............p.h.tphhtpphp..hp+.l.pLApsa.............................................................................sspppshuLspts......................tph.tphs.slphpaYsps............................................................................................................................................ptp..ts..spshphhcl.p.ph.h..s.p.s.......tplhpphhp....lP............pppphtlhp+lRlApuhsstpp.....R.phlphRLhAlshLsh.t.....s.pp.hspll.ts.................lh...ppLs-..l.lphs...s...............................ph.hp...l..pshuLpsLsulsp..........cp...s+hss...............llsshus..s..s.HGhL.hllRpslpt........................................................................................................................................ 0 71 121 188 +5841 PF06013 WXG100 DUF909; Proteins of 100 residues with WXG Moxon SJ, Studholme DJ anon Pfam-B_7198 (release 9.0) Family ESAT-6 is a small protein appears to be of fundamental importance in virulence and protective immunity in Mycobacterium tuberculosis. Homologues have been detected in other Gram-positive bacterial species. It may represent a novel secretion system potentially driven by the Pfam:PF01580 domains in the YukA-like proteins [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.81 0.72 -3.98 120 3120 2012-10-01 21:44:22 2003-05-23 10:31:35 7 26 880 30 526 1700 48 84.80 18 66.58 CHANGED plplsspplppsAsphpptssplpshlp.plpsph.stl.tu..sWp.GsuupuFpsthp.phptsh.pphhptLpplsppLppsuppapps-p ....................hphs.ttlp.st.u.sphps.t.upp.lcsh..hp.plpsph..psl..tu....sWp...Gt.u.sssap.s....t.h....s.p....hppsh.pplh...p.hLpplspplppsusshtpt-t................... 0 169 352 458 +5842 PF06014 DUF910 Bacterial protein of unknown function (DUF910) Moxon SJ anon Pfam-B_7253 (release 9.0) Family This family consists of several short bacterial proteins of unknown function. 25.00 25.00 31.00 30.90 19.30 17.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.87 0.72 -4.05 26 915 2009-01-15 18:05:59 2003-05-23 10:36:21 6 1 911 3 89 305 0 61.60 50 83.25 CHANGED MKTLYDVQQLLKpFGhhVYlGcRhaDIELMtlELccLYcusLlD+..csYhpAcllLp+E+phE ...M+ThYDVQQLLKpFGhllYhGcRlaDIELMplELpcLYcutLlD+..p-YLpAchlL++EHchE...... 0 19 44 67 +5843 PF06015 Chordopox_A30L Chordopoxvirus A30L protein Moxon SJ anon Pfam-B_7254 (release 9.0) Family This family consists of several short Chordopoxvirus proteins which are homologous to the A30L protein of Vaccinia virus. The vaccinia virus A30L protein is required for the association of electron-dense, granular, proteinaceous material with the concave surfaces of crescent membranes, an early step in viral morphogenesis. A30L is known to interact with the G7L protein and it has been shown that the stability of each is dependent on its association with the other [1]. 25.00 25.00 36.40 36.30 18.30 18.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.49 0.72 -4.37 10 57 2009-09-11 08:38:21 2003-05-23 10:41:39 7 1 46 0 0 32 0 72.70 57 98.13 CHANGED -EDINEuNF.HLLosLSNssp...DsEFuATLSsl+ElIStINhKlLuINKKSKKNsRss-p...hsaVs+..REssRY .hEDlNEANFsHLLhNLSNNKD..lDspauuTLSll+ELlStINhKIhsINKKSKKNo..+o.lEp....lpasuu..REhsRh.............. 0 0 0 0 +5844 PF06016 Reovirus_L2 Reovirus core-spike protein lambda-2 (L2) Moxon SJ anon Pfam-B_7350 (release 9.0) Family This family consists of several Reovirus core-spike protein lambda-2 (L2) sequences. The reovirus L2 genome segment encodes the core spike protein lambda-2, which mediates enzymatic reactions in 5' capping of the viral plus-strand transcripts [1]. 18.00 18.00 19.80 19.80 16.30 16.20 hmmbuild -o /dev/null HMM SEED 1289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.02 0.70 -14.02 0.70 -7.60 4 56 2009-01-15 18:05:59 2003-05-23 11:11:48 6 2 22 3 0 58 0 1162.20 43 99.73 CHANGED MAsVaGlpLsspLsosTsR+hhp.hpaDpLlosLpsssh.tp.aRuLc.psss.shoslQha.PLNuas.ssthltpshsacsWccaIp-+hpALss.LhRtYPlss.usRhlNPllhsAlsuuFLsspshsthLshLFls...cssIsslhssshohopch.sstcslhTPAGpKYlpLpuYsso.sssPshFuKclssYApsaYasshssp.phhapasuusslL.HFDpPT.G.HhLlPshuss.uhsohllstsshlLlESsL-phphNApAssuosVsRlDQsYHslhusp.sspsoLthRLssLSLLAlpGhQhss.lPspPThusVuuFluRLhu.Gcspphl..Rsc.lhlh.-SPhshsssst..YlphpssphshoIGshshlsDsspPltaLPQYc.ATs.shspupDuhpcssh.PLhspauhhhoGsAhhphhDhstcss.VassspLsphPssYFst-cthpRsLFSphRuhuDRShlKDsAslpahushlsPsssp.lLssuhSMAYlGASusHussDpPlIIcslhuGolPGVPhPpplpQFGYDVspGoIhDlshshPTGTFtFVYSDVDQVpDussDlsAosRtshuhLshshphTssGGshVVKlNFPTpshWpplFpphusphoolaLlKPhlsNslElFLl.FusR...ssuuLpsosulhhFLlshatR.psLscshsplP.hGs...lDDGsoshGlsslcl.sPshSshstshtlsshuhhsuhsssppShhsh.-SpGspssTIhu+RT.hSppRhsRLt.VPhlhssolshQpRhhssss.pLFsspusssTplh.Luhhhshhssussh.sls+hLDLGTGPEsRILSLlPsshsVThsDsRPsAps.usas.shTsalphDYLssuhhsustsDslTsIhSLGAAsAuAuhsLhsulpQLIphlssusspplaLQLNsPLsssuSl..sllEIDppsppYhFsshtRsEPYus.sAL.phlcslhPussloWhThSsohcWhcYslt.uoolo.sDIshAhphS+hsPIh+IDhsphPhch.PsshhVGtpsslplsuhsspsshpsphsuVplhossssNhsuhhuslul.assspscasLshsPspPGIhsh..llshsss.lshGShsIssPssolslsaP..upLDFT.uGsDsclssssaYcLulF..........lhhDGpaphsNP-+.t.hsoAussRVlpalhDluDsalLhYlCDVossslGchIthPLs-l.phsaPsNsslhhShPasuststlp.sGs.hssLss.shVLPsuhtlhshSTultsuhPTahVPsGsYshVhl ...........MAplhGlRLusoLSuPs.cphsppaThc-hhSsLchss...cPW+sL+sptss.slsAVpLhhPLpGhl.sh.hhthshsaspaEpahp.hLpsLt.plLRhYPIusYpschlNshlsNAlVuAFLSN.sahchLshLhls...cs..lpDlhssGhslppah..hctssl.ssAGpKa.lQ.hpsYs.s...ssDPsLFuKpLpsYuhsaYh.shpsh.pahhpH.SsussLlHaD+PoNG.HhLlso.oshs.Assh.lsAhsulLLESCLpQ.thNsps.supPVsR.spshh.h.us.a.ttpsoLpYpLhsLS.hhhNGYQhsc.hPtp.ssthluuhlupLhs.uts.Tsl.Pp.thhplhhDSP.shsGtst.salp+.tht.Ghplusl..hpssspsVtWhPQas.spu.ssD.u.s.lu+hTpLPLRscYushWsGsAL..asshsRppt.Vh..p.hsQhPssYFssD-pauRShFS.hRtltDRSLlKDTAsLhah.p.lsssps+chl.supohsYhGASusHus.sQP.lIcPhhpGplsGV..P.SV+QhGa-VspGsIsDlthPhsoGsa.FVYSDVDQV.sGcsDLshSSphhpS.Ls.hh+hThsGGShVlKhNFPTphVWpaI.ppl.P.hoShhLhKPhVoNNlELahl.Fu.+....puuhpsousVhhFhhsphtRYcsLps..pplPShG....lDDuhoVoslphlsl..suhSshpptttluh.ulhsslGst+hSlt.Y.-SatsplhsIh...oPtStphhsRLtYlPhl.Ppol-VQtRshhsusP.lFs..hsss.sp...LohhYshtloussa.-...schhLDLGTGPEA+.LphlPss.PVThsDhRPhs.PSGCWs.shTsFLphDYLssshlhustuDlVoClLSLGAAsAstshThctuhpQLlp.h.scusspslhlQlNCPhs.sstsl.+thLElspTNppYhF.phGR.EPa.shsuL.cIscshhPshslphhshs.sLpWhchAlhpssoloSssIhlA.hh.+ahPlhhhchcthshph.ssshhVGpsholsl.sapspssapshhssshhhohpsss.suh.stVos..ssspsphsLshs.ussGIho...llts.ss.lSLGShVl-uPDsslo.haP..ApLDholuGTDl-lplssaYclhh.F..........sh.DspapIhp.Dp.......us...sohshNhhhDhuDhahhhhlpDVpspslGhaI.+.L..Lso.shPsstchFLShP.-...hhVp.sGs..s.h.sutshs.PpsW.slssohsh.suhPoahVPPGc.YsLs......... 1 0 0 0 +5845 PF06017 Myosin_TH1 M_tail; Myosin_tail_2; Myosin tail Yeats C anon Pfam-B_12631 (release 8.0) Family \N 31.10 31.10 31.10 31.10 30.90 31.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.14 0.71 -5.01 27 904 2009-09-13 17:10:37 2003-05-23 11:23:47 8 48 257 0 561 839 2 186.30 23 19.42 CHANGED h+htAsclhpsKKccptpSl....RtFhGDYluhcp.psphpphhtspt........t........pllFust....VsKhsR....psKsppRtllLTspslYllt............sthphtlK++lslsplpulSlSshpDshhllHh......psptD.llpssaK.........oEhlohLpcphpp.tt..pLplp.husslphph+p........st.pshphthstssss...hhcsspsthhphsss .......................................+..Asplhts.....+KpphttSl.....R.FhG...DYLshpp.....s.sp...htp..h.htppt.............................................plhFush.......VpKhsR............ptKsppRhllL.......TspslYll.............................sttphpl.Kp.pl.s.lss...........lpulS.l...Ss.hpDshhllHl...................................ppcsDhl..l.ps.s.ht............hEhlohL.hpt.hpp..tt.....plp.lp.hus.phphphpt.............tt.t..hphth.s.ts.....t...............htt.tp.........t....................................................................... 0 191 263 401 +5846 PF06018 CodY CodY GAF-like domain Moxon SJ, Bateman A anon Pfam-B_7573 (release 9.0) Domain This domain is a GAF-like domain found at the N-terminus of several bacterial GTP-sensing transcriptional pleiotropic repressor CodY proteins. Presumably this domain is involved in GTP binding. CodY has been found to repress the dipeptide transport operon (dpp) of Bacillus subtilis in nutrient-rich conditions [1]. The CodY protein also has a repressor effect on many genes in Lactococcus lactis during growth in milk [2]. 29.50 29.50 29.90 29.60 28.60 29.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.92 0.71 -4.99 17 1124 2012-10-02 14:34:25 2003-05-23 11:26:54 9 5 1084 9 136 463 0 176.60 45 67.49 CHANGED sLLpKTR+lNslLQ+u....sspslsFp-huppLu-VIcsNlallS++G+lLGYuhp...tphps-Rhcp.hhp-+pFP--YspsLhplt-TpuNlslssphosFPhEscp.FtsGlTTIVPIhGuGcRLGTLlLsR.scpFsDDDLlLuEYuuTVVGhEIL+t+s-ElEEEARp+AsVQMAIso ..........................pLLpKTR+lNslLQ+s.........htptlsapcluppLu-llcsNsaIlup+G+lLGYshp....ph.p..s.-Rlcp...hh.p...p...+...p...FP-.-.Yspt.lhplh-TcuNlslc..ss.ho.lF.PsE.s.+-hFs.s.u.l.TTIs.PIhGu.GcRLGTLllhRs..cccFsD-DLlLuEYuuTVVGhplLptpp-E..lEcEsRc+sAVpMAINo...... 0 61 93 115 +5847 PF06019 Phage_30_8 Phage GP30.8 protein Moxon SJ anon Pfam-B_7692 (release 9.0) Family This family consists of several GP30.8 proteins from the T4-like phages. The function of this family is unknown. 25.00 25.00 36.40 51.50 21.00 20.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.60 0.71 -4.42 2 31 2009-09-11 08:57:46 2003-05-23 11:33:00 6 1 30 0 0 23 0 112.20 68 99.49 CHANGED MpTINLNATlKs+sasG....hhsEh.WhlhuVpGDll.spTs-G.usDFsapIplcNFFTG.IYcLNoslhGpI.....EppEhG......WY..sARpRAEpLIEKhKthGhlD.t+WphlK .........MKTINLNAsVKTKCFNG....KYcETMWFLMAVEGDIIEVETTEGMGTDFTFTIQVHNFFTGWIYELNTVIVGKI.....EQNELGE.....WYYVTARQRAERLIEKMKKVGKLDMpHWKVVK.. 0 0 0 0 +5848 PF06020 Roughex Drosophila roughex protein Moxon SJ anon Pfam-B_7712 (release 9.0) Family This family consists of several roughex (RUX) proteins specific to Drosophila species. Roughex can influence the intracellular distribution of cyclin A and is therefore defined as a distinct and specialised cell cycle inhibitor for cyclin A-dependent kinase activity [1]. Rux is though to regulate the metaphase to anaphase transition during development [2]. 25.00 25.00 25.10 30.20 21.80 21.50 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.03 0.70 -5.76 3 54 2009-01-15 18:05:59 2003-05-23 11:40:35 6 2 16 0 8 55 0 283.40 60 98.36 CHANGED MNAPEEHKcTPLEVIHEFIKGVDDGTIRRDLGEDCILSYYSRNVRGAKAITGFLRTQLTpRYKHEsFEEAAplAKGDELLLQARFGRSFDuERRRIYEEKERsGTTsLHLHAESDDEEVNEEFSTTLITPPRPSSYNLNoLKYVEACGLLNRRDEHlYGGLDLGESCAVHLTLGYRSTaLPGGQVSGFEICLAVYDRGLTSLNRSTLlPPPhuISFuRRANARCNPTTDDEuDoEEDSPPPTuRRGVRRTLFTEENTQEEE..........DuDPDPIPEVE....QEQPAPQQAEETAREAVNIPVDLPTPsETTNsSSYTPRKRhQTTNGNEVPPKRTPGPQRMRF ...................................cTPLEVIHEFIpGVDDGoIRRDLuEDCILSaYSRNVRGAKAlTGFLRsQLThRYKH-sFEEAtplthGDELLLpARFGRSFDhtRRRIYEEKERsu.sT.....LHlH.sE.SDD...Eps...Nc....EFSoo......LITPPRPSSY.N.L..pSLKYVEuCGLLN+.Rs.EH.VYGGLDhG.EoCAVHLTLGYRpT.LPGGpVSGFEICLAVYDRGL.psLpRSTLss.P.huhS...hsRRup...hRCNsTTDDEuDsEED.PPPTuRRsVRRTLFTEENTQcEE..........................D.D.s.Pl.E.p............................QpQP..APQ..Q.s....ppsup...............s....VDlsTP.chTshsShTsRKR.Qt............................................................................ 0 1 1 4 +5849 PF06021 Gly_acyl_tr_N Glycine_acyl_tr; Aralkyl acyl-CoA:amino acid N-acyltransferase Moxon SJ anon Pfam-B_7828 (release 9.0) Family This family consists of several mammalian specific aralkyl acyl-CoA:amino acid N-acyltransferase (glycine N-acyltransferase) proteins EC:2.3.1.13. 25.00 25.00 32.30 25.90 22.50 23.00 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.39 0.71 -4.91 10 198 2009-01-15 18:05:59 2003-05-23 11:49:27 6 4 36 0 101 166 0 181.00 41 67.51 CHANGED Ml.hLQuuQhLQMLEKSLRKSLPESLKVYGTVFHINcGNPFNLKALVDKWPDFpTVVlRPQEQEMTDDLDHYTNTYplYSKDPcpCQEFLuosEVINWKQHLQIQSSQSuLsEsIpsLAAoKSsKVK+opsILYhssETtK+LsPSLh-sKp.LssssG+P+...ulcQchFKLSoLDVoHAuLVN+aWpFGGNERSpRFIERCIpsFPo ................................Mh.LpssphL.hLcp.LcpplPE..SL.K.VYGsl.hplN+GN..PFph-llVDpWPDFpsVlsRPQ.cp-MsDDhDaYTNsYplaoKD.pphpchLtps-VINWcQthQI.Q..u...Qp.sL..s.-sl.pplA..ss.Kplp..V.chp.pt..hLhhh.phh.p......t..s.s..s.........csp........t.tp..p........t.ppt.h.+.ho.LsloaAsLVNchWphGGNE+ShRaIpchIpsFPo...................................... 0 4 8 26 +5850 PF06022 Cir_Bir_Yir Plasmodium variant antigen protein Cir/Yir/Bir Moxon SJ anon Pfam-B_8754 (release 9.0) Family This family consists of several Cir, Yir and Bir proteins from the Plasmodium species P.chabaudi, P.yoelii and P.berghei. 20.40 20.40 20.50 20.50 20.10 19.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.06 0.70 -5.17 119 1443 2012-10-01 19:45:41 2003-05-23 12:01:26 6 3 6 0 1275 1447 0 235.70 29 83.60 CHANGED M..scp....h..C......ppFptlpphhsDc.Lspssp...YpFpspt..hppYC.ss.s.............Cs..................sD..l-KIsAGsLaLhsp.hats.s.hp..p.s+sphs.lltYIhIWLSYhLsh.csppphs..slp-FYspaIpsssc.Ypp..............pIsss.psYs..sYK-lI-++ptlhs..hshpplSKFY-AFKhLCsMYsph.....st.psssCsphhppAscFVcKYcc.LNp.......sss..scsosYspl...LSoLSsDYsNhKpcCss.....ptph..ssLPshcp.......................................................p.ssSSSSIssKL....lslL.IFs.AIulFLGIu...YK .....................................C....t.h..l.p.hssp...t.ptp...hph.p.t......hptY.Css..t................pC.p...................ss..hc+l.suuhlaLhpt.hhtp..t...t.............tpt..p..hh.Yhh.lWLu...YhL.s.pp.p..p..t..h.s.....slppFYspaIps...s...p...p...Ypp................lpts......pt...hp......s..ac.-.lI-c+pph.hs..hs..h....p...lScaY-sFK.LCphYsph.....s.t...pss...p.spp......h..p..p...Ap..c.Fs....cc.Ycc.....L.pp.......s......s.....sc....s....s.....sY.ppl...LssLSsDYssh.....Kp.ppss...........h....sslsp..h.pt...............................................................................hspsssh.h..spL....h.lh.Ihs.uhshhlGl.YK................................................................................................................................................................................................................................................. 1 0 903 1275 +5851 PF06023 DUF911 Archaeal protein of unknown function (DUF911) Moxon SJ anon Pfam-B_8782 (release 9.0) Family This family consists of several archaeal proteins of unknown function. 20.20 20.20 20.50 20.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.06 0.70 -5.20 8 70 2012-10-11 20:44:44 2003-05-23 12:12:43 7 2 49 0 43 95 1 261.40 34 98.55 CHANGED Mhhphp.h..+hL.+Rl+th.tscsVsEELRGWpWspPPVpPpsh.ltLolSDls.sYCsTtRDVYL+hVLshRGc.sstlhhGpsIHcsatpulc-l+phhhuucs.s.sh.p.hhtpth....h..phhc....huctlacalsphhpuchscl+...ut.sttsc.Sluhhs..lPhhsEasVDGSsLGLSshlpVDA..hhLs..lVlEhKsGpapcpHcLALAGYALAlESthElPVDaGhLlhl.shNssl.+hpsclhhIu-sLRpEFLEtRDcsh-lltsssDPGluhcCsssCPFhchCp....t ............................................................................................h..+.L.+chh.sh.ttsslsEELRGWsappPPlpPhth.htLulS-lu.tYCsTtRDlYLR.+.Vhthpsc....s.t.shhhGphlHclhtp.shp...php+hl.t.s....h............t..h...............h....h..t....th..............hspph.p.................hsctlhchh..sh..plhuc.hpc.hh.......st....h...........Shs....s......lP..l.h......sEht....VD.Go.LGLS.t..L.ps.......DA.h.h.hs........ll.lEh....K.h..Gp.....h..p...c..h.HcLuLAGYALAlEuthElPlDhGhllYl.sh.s...tsh...chphcshhIussLRpcFl-tRDchh-hltpttD.PG.hs......p...Cs.tsCPFhphCpt............................................... 1 19 29 35 +5852 PF06024 DUF912 Nucleopolyhedrovirus protein of unknown function (DUF912) Moxon SJ anon Pfam-B_8809 (release 9.0) Family This family consists of several Nucleopolyhedrovirus proteins of unknown function. 26.00 26.00 26.60 26.00 25.90 25.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.59 0.72 -3.96 24 60 2009-01-15 18:05:59 2003-05-23 12:15:59 7 2 52 0 9 56 1 102.10 28 58.08 CHANGED LDVPY-RLGspp.+V-YIPLKLALsD...............sssssssststss..s..spsshsptsss..........pphshhpllllulluhlslhlLL...YsIYYFVILR-...+pp.......ss....t.pPsal ............................................LDlPY-RLuspp.pV-YIPLKLAlsD...............stsspssspppsss.s..hs.pss.hsstsss..........tspts.hhsllllulluhhslhllL...Ys...IY...YFVILR-+pppts............................ 0 8 8 9 +5853 PF06025 DUF913 Domain of Unknown Function (DUF913) Yeats C, Sammut SJ anon Pfam-B_6534 (release 8.0) Family Members of this family are found in various ubiquitin protein ligases. 24.70 24.70 26.30 25.10 18.90 23.60 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.09 0.70 -5.71 11 332 2009-01-15 18:05:59 2003-05-23 12:19:24 7 42 240 0 227 319 4 324.60 25 10.30 CHANGED hSRslchLDshlcuspsAhuhh.osu+GhDslsshIcaEsppshc...........ssptuuuh.spppssshs.......ap...IsYhppphL+hhh+Fls+hhp......uuGhscsLRNLlD..spLlsSL+hlhcpAclFGupVaSsAssllSsFIHNEPTSaullsEAGLocuFL-Als.............................................................ssIhsusEAlosIPsAhuAICLNuSGLcLFpo.p.sAlcsaFcIFpSPpHlKsLpcs.......-hssuLGoshDELlRHpPuLKsslhsulIchlscluhLs+shshspshuu+h.......................................thhssshststthsts.hhp.hsso.sspspsss......................s..thshs-hlhsVuRFLtuhhpNpusCp.FIcpsGlEhlLslhsLssLPhDF ..................................................hshphl-hhht........hshh..s.tpGhshh..hlphElphshp..............................................................p.s..ts..s.p.hp..h..................hp.......h..pptthlKshLph.lp+hhp..................ssshsctlRp....lhD..usL.puL+pIlpN.sc.haGssla.hAsslloshlapEP.o.huslp-sGLs..psh.Lculh...................................................................................................................psl....sop-slsslPsshuAlCLNspGLp.hhp..p..pshc.phh.clhhSspal.s...hppp........................-hssslGsuhDELhRHpPsL+sshhssl.lphlpclsthsps..t..t....h.s.......................................................................................................................................t..................................................................................................................................h...l..hhp.l.s......p...s....t...thp.t.Fl.t.ttGl..llp.hhth.ths........................................................................................ 0 72 122 187 +5854 PF06026 Rib_5-P_isom_A Ribose 5-phosphate isomerase A (phosphoriboisomerase A) Moxon SJ anon Pfam-B_5144 (release 9.0) Family This family consists of several ribose 5-phosphate isomerase A or phosphoriboisomerase A (EC:5.3.1.6) from bacteria, eukaryotes and archaea. 26.00 26.00 26.20 28.80 25.70 25.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.95 0.71 -4.95 58 3557 2012-10-04 00:26:15 2003-05-23 13:17:25 9 10 3267 42 923 2338 895 171.00 42 74.26 CHANGED lsuVsTSppopthspphGI..sltslsph...splDlslDGADElD.sp.hshIKGGGuALhREKIVAssuccFllllDpoKhVp.pLG..pa..PLPVEVlPhuhphlh+plpp....hGs..ps..plRhs.t......tshlTDNGNaIlDspht...I.pD..PttlpcplpplsGVVEsGLFssh.A-hlllGspcGsp ......................uVsoSptospphcpl.GI.....l.h...s..l.s-V........s.p...lDlhlDGAD....El....s......sp......hphI....KG.....G......G......u..A......LhRE.KIVAssuc+.aI.sIs.DpS.K.h..Vc..hLG...pF.............PLPVEVlPhutstVtRpltc........................hGs..pP...phR.s.t...........................hlTDNGNhIlDlphh.......pI........c...................Phtlpptls.tlsGVV-sGLFssh..A-..h..lllGst-Gs.................................... 0 249 520 743 +5855 PF06027 DUF914 Eukaryotic protein of unknown function (DUF914) Moxon SJ anon Pfam-B_7017 (release 9.0) Family This family consists of several hypothetical proteins of unknown function. Some of the sequences in this family are annotated as being putative membrane proteins. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.09 0.70 -5.60 5 558 2012-10-02 19:55:49 2003-05-23 13:27:53 7 12 225 0 378 1427 836 252.40 26 71.49 CHANGED pclpch.hTK+hLIuluLGQlLSLLlTusAhTSuYLAc+.tlNsPshQTFLsYsLLsLVYTshhlaRpGscphh.hIKRKWWKYhLLALlDVEANYLVVKAYQYTolTSlpLLDCWAIPsVllLSWhFLKsRYRlh+IlGVVlCIlGVVhlVsSDVlAGcRu..uGSNPllGDhLVLuGATLYAVSNVsEEalVKsLspsEllGhlGLFGAIISuIQluIFE++-LtuIHWos-huLLalGFALsMFLLYSLhPILIKsoSAThaNLSLLTSDhWSLlIGlFlFHYKVsWLYhLAFsTIhlGLIlYSspppcpsEsscsplpshpccG.ss-sscTsc .......................................................................................................h............................................................................................................................................................................................................................................................h.....h.h.a....h...h.h....u.h...h.D..l..p.u.N.a..h....h.s..h.Ah..p....Y....T.o.l..s.S.....h.p.LLsshs.Is.....h.s.h.l..l..S.hhhL...t.h...Ra.phhphlG.lhls.l.h.G...lsh.l....s......h..u..D.......h........h......s............s.........p............p.......t..................s.....u...........s............s...............l.....h...GD.....l.l...s...lhu.A.s......l.....Yu...l.ssV..h........c.E....h...h......V........p...p.....h...s...h.....h...c...h....l..u..........h...h.....G...l..F..G...h......l......l....s....u.....h....t...h.......h.......h........h......c.....h......t......t.......h......t..h.............h.....s................t.......h......h................h...h...h..h......s.....a...s..h....s..h....h....h.h...a......h.h..s...h...h........h.....h..h...s.s.u..s.......h..s....l..u..l....L.....o.....s..sh...a....ul.......hht.h.hh...a.t...............h.p.h...h.a....l.u..h....hhh..h...hG....hh..h.......................................h..................................................................................................................................... 1 129 205 295 +5856 PF06028 DUF915 Alpha/beta hydrolase of unknown function (DUF915) Moxon SJ, Bateman A anon Pfam-B_7094 (release 9.0) Family This family consists of several bacterial proteins of unknown function. Members of this family have an alpha/beta hydrolase fold. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.57 0.70 -5.29 13 1150 2012-10-03 11:45:05 2003-05-23 13:34:06 6 5 577 4 145 1113 409 233.40 31 85.76 CHANGED psp.psstpphhPTIaIHG.uGospShstMlsplhscts.sspcsLs..hsVsp-GplphpGplsKcsppPhIclsFccNcs..uohccpupWlcplhscLpppYphpphNhVGHS.GGhuhshYhhcYusccshPplpKhVsIuusFNslps.sts.shh.hhppt.spspTshaptlhpp.ppplssslcVLsluGshpssppoDGhVshsSShoh+alhtpsucsYpcphhsGcpAsHSpLHENspVschltpFLapp ............................................................t......h.pp.hP.T.lalHGau.....uo.t...........pu.ptMlpphp.cps..s......s..........ppllp...........spV..spsGpl.p..h.p..Gp..........l.s..ps.s..ppPll.pl.t..Fcs.N+p...........sshppp........ut........a.....hppslpt.L..p.p............p.....Y.p...h...p..p..hNhV..GHShGsl...sh..s.hYh.............hp..a....s....p.........c......p...s..l....P........plp.KhVsI.....u.....u.sa...Ns...h.....................s..........s.t..........................p...................................................t.t.u....P.....s.....p...h...s......ph...........apph..hth.......c...p.hhs.p.slpVLs..IhG......s..................p..........s..........s....pSD.GtVs.sSutul+Ylltsp.sp.sYpEhph..p...G..p..s..ApHSpLH.-.N.t.pVs.phlhpFLat................................................................................................................... 0 31 73 109 +5857 PF06029 AlkA_N AlkA N-terminal domain Yeats C anon Pfam-B_13157 (release 8.0) Domain \N 26.00 26.00 26.10 26.30 25.50 25.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.48 0.71 -4.03 90 1096 2012-10-02 11:58:57 2003-05-23 13:41:40 6 11 1039 41 244 792 63 115.30 42 29.96 CHANGED slpL..sYRPPacWsthLsFhutRAlsGlEtl...........................ss..s...........YtRolplss......s.....p......................G.hlplphs.........stp......ptlplpl....sh...sthpsLt.slluRlRRlaDLDADPtsIsstL..pshhsshlstpPGLRlPGuaD .......h.hpL..sapPPaDWshhLuFLAuRAVsGVEpV...........................s-s.......h.......Y....tRols..lsp...........t........p.................................Gllssps.s....................................t.p...........cs..Lclslss............shhsshs....pslA+.h.pRLFDLcssPptlsusL..........us.Lsss..cPGLRlPGshD....................... 0 58 130 199 +5858 PF06030 DUF916 Bacterial protein of unknown function (DUF916) Moxon SJ anon Pfam-B_7106 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.00 21.00 21.00 21.00 20.70 20.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.63 0.71 -4.32 23 922 2012-10-03 16:25:20 2003-05-23 13:51:56 7 3 256 0 77 620 6 120.50 33 35.26 CHANGED FuVpsllPcNQssps..oYFDLphcPspppsLplclpN.soccplplclssssAhTNssGhlsYspss.sphD..poLcashschlchscp.....lslsspps+plslslpMPscsacGllhGGlahp-c ........................FsVpshlP.-N.Ql..ccp..poYFDLthpPsp..pQp..lplplpN.poc.cclslpls..lssAs.T.NsNGll-Yspsp..tchD.....poLp......hsls-llph..scp........l.plsscpppslshp.lphPp....c.sF.sGlllGGlhhpp...................... 0 27 62 67 +5859 PF06031 SERTA SERTA motif Bateman A anon Pfam-B_7533 (Release 9.0) Motif This family consists of a novel motif designated as SERTA (for SEI-1, RBT1, and TARA), corresponding to the largest conserved region among TRIP-Br proteins [1]. The function of this motif is uncertain, but the CDK4-interacting segment of p34SEI-1 (amino acid residues 44-161) includes most of the SERTA motif [2]. 25.00 25.00 30.30 29.20 24.10 23.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.63 0.72 -4.66 15 271 2009-01-15 18:05:59 2003-05-23 13:59:28 8 2 72 0 175 259 0 37.50 46 11.67 CHANGED lLslSLcKLpphcs.sEssLRRSVLIsNTLRRlQsElc ...lLslSLhKLp.phct...sEPsLpRSVLIsNTLR+IppEh..... 0 23 39 93 +5860 PF06032 DUF917 Protein of unknown function (DUF917) Moxon SJ anon Pfam-B_7195 (release 9.0) Family This family consists of hypothetical bacterial and archaeal proteins of unknown function. 26.20 26.20 26.50 29.80 25.60 26.10 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.88 0.70 -5.90 51 579 2009-01-15 18:05:59 2003-05-23 13:59:58 7 16 394 2 281 514 91 333.10 33 69.65 CHANGED l..otpDl-sluhGuulLGoGGGGsPahGphhstptlpps....tplcllsh.--l....sD-shllssuhhG..uP.s.......lshE+......ls.sGs.EhhpAlctlpc.hhGpc.lsulhshEhGGsNulhshh..sAuthGlPllDuDuMGRAaPphpMsThtlt.Ghss.....sP.hsls-tcG.............sssll.ss........hss..hhsEclsRshss..phGutsshuthPhsuppl+ctu.lhsTlShuhcIG+Alppuctppt..shlpsllchh...sG.......phLF.pGKls-lc...Rc.spsGFshGpsplpGhpt................pspphpl.FQNEhLlAtps.............spslsssPDLIslLDt-supPlsopp...........l+YGh+VtVlulPssshhpotp.Gl-hsG....PpsFG..h.shsatPlp .............................lstpsl-sluhGuulLGoGGGG..sPahuthhshptl.cps.....tslcl....lss.--l......s--shhhs......suhhG..A..P..s.............VhhE+........hs..pGs..Ehhcshctltc...h.........h...s...........pc.hsuhhshEh............GGsNuhhslh..sAA..ph......G...lP..llDuDsMG.RAaPph......pMsThtlt..Ghss.........sP.hslsDtpG....................shslh....p.s.............hss...ths.EplsRshss........phG..........u.t.shhuhhPhsGppl.........+phu..lhsslohuhclG+sltt.sptppt.....phhpsllchs...sG..............hhLF.pG.K....lh.-Vp........Rc.s......psG.FshGpshlpuhst...............................tspphtl.FpNEpLhAtcs........................spslAhsPDLIshlDhco.....upslsopp.............................l.+YGh+.VtVlulsssshWpotc.GlchsG....PphFG...h..chca.sl................................ 0 113 191 243 +5861 PF06033 DUF918 Nucleopolyhedrovirus protein of unknown function (DUF918) Moxon SJ anon Pfam-B_7213 (release 9.0) Family This family consists of several Nucleopolyhedrovirus proteins with no known function. 25.00 25.00 179.60 179.50 19.30 17.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.68 18 39 2009-01-15 18:05:59 2003-05-23 14:01:47 6 1 36 0 0 38 0 152.40 46 91.30 CHANGED LEFDslslDLRHVsF..stsuts-.......tEYIlFLNVKRAhYpNFplssDhSLETLAlalYpssphslsG.sphs+sssas-h....lsaNpsD+spSllI-Lss-..ARlVVAKplpssEpYHQRlSGalDFE+R....Hpps....shlc.ssstRstLDREhEIKLhp LEFDslslDLRHVpF..stsst..ss.......pEYIIFLNVK+AhYpNFplssDhSLEoLAhalYcpsphslsG.sphp+ssshs-h....lsaNcpD+NpSllI-Lsp-..ARllVAKplpssEpYHQRVSGalDFEpRHsps.........hlc.ssppRstlDREhEIKLhp. 0 0 0 0 +5862 PF06034 DUF919 Nucleopolyhedrovirus protein of unknown function (DUF919) Moxon SJ anon Pfam-B_7250 (release 9.0) Family This family consists of several short Nucleopolyhedrovirus proteins of unknown function. 21.00 21.00 21.80 21.00 19.80 19.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.95 0.72 -4.35 19 53 2009-01-15 18:05:59 2003-05-23 14:04:59 6 1 52 0 0 48 3 60.20 35 44.30 CHANGED spspsLppQLscIsptK+plsIc.pHaE+l++lTKsspElpcl-p+lhchR.cFLsaGlppF ......p.pppsLcpQLscIsptK+p.lslp.pHaE+l++ITKsspElppl-p+LhchR.pFLpausppF.............. 0 0 0 0 +5863 PF06035 Peptidase_C93 DUF920; BTLCP; Bacterial transglutaminase-like cysteine proteinase BTLCP Moxon SJ, Sammut SJ, Eberhardt R anon Pfam-B_7277 (release 9.0) Family Members of this family are predicted to be bacterial transglutaminase-like cysteine proteinases. They contain a conserved Cys-His-Asp catalytic triad. Their structure is predicted to be similar to that of Salmonella typhimurium N-hydroxyarylamine O-acetyltransferase Swiss:Q00267, in Pfam:PF00797, however they lack the sub-domain which is important for arylamine recognition [1]. 20.50 20.50 21.00 21.50 19.80 20.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.25 0.71 -4.84 28 779 2012-10-10 12:56:15 2003-05-23 14:06:43 6 1 470 5 245 627 58 155.80 33 71.69 CHANGED MssGuhTStPlGHYEFCpcpPsECssss.....tsssslpLTsphWpplhcVNtsVNpsIpPhTDh-laG.hcEhWuYP..sstGDCEDYsLhKR+hLhc.sGhPsusLLlTVVRpssG-GHAVLTVRTD+GDFlLDNLsscVhsWs-TsYpaLKRQSpscsGcWVslpcscss.hVuS .......................................................................................................s...t.t....h....p...pLtpVNphhNpplp.hs.DhclaG.....pcDYWuhP....h....s.......s....sG....DCEDaslhKhhpLhp.hGlssspLhl..o.h....V+.....s.............p.....s.......p.......u.HhV.Ls..h...t..T..s..p..u..-.......hl.LDNlssplhshs.p...................................tt............................................ 0 47 124 172 +5865 PF06037 DUF922 Bacterial protein of unknown function (DUF922) Moxon SJ anon Pfam-B_7397 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.40 20.40 20.40 21.40 20.20 20.30 hmmbuild --amino -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.14 0.71 -4.91 18 220 2009-09-11 05:34:11 2003-05-23 14:39:37 6 3 125 0 60 151 15 158.70 36 77.91 CHANGED aYuIsGpTus-L.csLuppGPhht.pupRa.GtTphchshc.hsYtpp.sGtCslssscspLplphpLP+hp..ppsss-lphhW-shhuslc+HEcsHucIA+shs+clEpslhuL..tscssCpplcthlsphssclhpcacppppcFD+lEtsstsphpthlL ................................hYsIsGpTss-L.culuppGPhlt......tp..+h.utTphphshc.hcascp.sssCpVtsspspl+hphoLP+hp..pphuPtlthhW..DshhssI+RHEcsHschA+shsp-lERphhuL...scscCpplRtslsKhhschhpspcpppppFD+VEhsNhsNh.phlL................................. 0 9 28 39 +5867 PF06039 Mqo Malate:quinone oxidoreductase (Mqo) Moxon SJ anon Pfam-B_7465 (release 9.0) Family This family consists of several bacterial Malate:quinone oxidoreductase (Mqo) proteins (EC:1.1.99.16). Mqo takes part in the citric acid cycle. It oxidises L-malate to oxaloacetate and donates electrons to ubiquinone-1 and other artificial acceptors or, via the electron transfer chain, to oxygen. NAD is not an acceptor and the natural direct acceptor for the enzyme is most likely a quinone. The enzyme is therefore called malate:quinone oxidoreductase, abbreviated to Mqo. Mqo is a peripheral membrane protein and can be released from the membrane by addition of chelators [1]. 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.68 0.70 -6.32 17 1946 2012-10-10 17:06:42 2003-05-23 14:49:47 10 3 1474 0 309 1213 419 469.70 48 94.87 CHANGED ppspsDVlLIGuGIMSATLGshL+ELcPsWpIplhERL-ssutESSNsWNNAGTGHSALCELNYTsEtsDGoIDIsKAlcINEpFplS+QFWuaLVcpGhlpsPcsFINslPHhSFVhG--NVsaL+KRapALpppsLFcsMcaS-D.-plpcWhPLMM-GRsssp.lAAThhstGTDVNFGtlT+pLhppLppp.shplpas+-VpDl+Rss-GsWpVslpshpsuppp.slpu+FVFIGAGGuALsLLQpSGI.EuKsauGFPVuGpFLtssNP-llcpHpAKVYGKAsVGuPPMSVPHLDTRhl-GK+sLLFGPFAsFSsKFLKpGShhDLhpSl+ssNlhsMLusGhcphsLsKYLlsQlhhSp-cRhssLRcahPpA+s-DWcLhsAGQRVQlIKcstcpG.GhLpFGTElVsucDGolAALLGASPGASTAsslML-lLc+sFs-+hp..pWpsKlK-hlPSYGhcLsscspLhcclpt.oucsLpL ......................................................pppcsDVlLIGuGIMSATLGohL+ELpPpWsIplhERL-ssAt.ESSNsWNNAGTGHuALsELNY...T..s.p.ps.DGo...l.-IpKAlpINEpFplS+QFWuahVcpGhlp.sP.csFIpsl.PHMSFVhG-cNVpFL+pRapALppp.sLFcsMca..o-..Dh..p..pl+cWhP..Lh...M...cG....R....c....s....s....p........l.A.A.T.th.-.tG...TDVNFGtlTRpLhp....p.L.....p.....p.....c......s..sp..lp..h..spEVpslc....+.....p.....s.....-.s.....p......WpVpl..p.s....h......p.s..G....p.t..p...phcu+FVFIGAGGu.AL.LLQKoGIPEuKchuGFP..VuGpFLhsp.N.P-llpp.HpAKVYGKAslGAPPMSVPHLDT.R.h.....l.D.GK+sLLFGPFAs..Fo...sK.....F.....LK...s......G...S...h..h...DL..hp..S..l+...s.....s....N....l.....h.....s.Ml......s......s.G..lc...NhsL....s.KYLlsQl.....h.....h..o.....c......-c.....R...h..psL+caaPp.A+.sED..W.c..Lh..p..A.G.QRVQlIKcst.cp...G....GsLpFGTEVVsupD.G.olAALLGASPGASTA.s.slMLclLc+..s..Fs-chp..pWps+l.KphlPSYG.h.p.Lsp.c.th.hcchhp.TscsLtL........................................................................................................................................... 0 65 172 257 +5868 PF06040 Adeno_E3 Adenovirus_E3; Adenovirus E3 protein Moxon SJ anon Pfam-B_7475 (release 9.0) Family This family consists of several Adenovirus E3 proteins. The E3 protein does not seem to be essential for virus replication in cultured cells suggesting that the protein may function in virus-host interactions [1]. 25.00 25.00 29.90 27.40 24.20 20.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.92 0.71 -4.18 4 61 2009-01-15 18:05:59 2003-05-23 14:54:17 6 1 40 0 0 51 0 125.00 48 70.68 CHANGED IKsEhphoau.....L.LhQPhL.sh.Qhhpt..p+TF.ll.soosSshP......LP.TNp.pophppRapRsLhpoNTTh.+TGGELRG.PTs....sPW.VsGLlsLGlVAGGL.LlLCYLYhPChoYLVVLCCWFKKWG.. ...............h.......phs.....LsLapPhl.GTYpC.pGPCpHTFsLVssTssSThs.......PETsp..tph.......l..s.oNTs..+TGGEL+s.P.T-t..hsPaEVVGallLGVVhGGhlhlLs.hYLPCas.lhlhhCWh++hG.......... 0 0 0 0 +5869 PF06041 DUF924 Bacterial protein of unknown function (DUF924) Moxon SJ anon Pfam-B_7600 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.80 20.80 20.90 21.10 20.40 20.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.34 0.71 -4.55 154 813 2009-01-15 18:05:59 2003-05-23 15:02:49 6 15 724 2 309 757 800 176.50 40 82.71 CHANGED lLcFWF.............ptstsp......p.W.FtpssshDppl+pRFts...hhp.tAspGc...Ls.......pWtss.s............pGtLAhlILLDQFsRNlaR...soscAFAs.DshALslAppAlspGhD..p..plss.......pRhFhYhPhhHSE...sls.QcpulpLapph.....ss....................tp..............................slcaApcH+clIcRFGRFPHRNslLGRpSTs-EhtFLpp.sGt ................................................................lLcFWF........................pttt.p.......t...W.F.......s......p.......s.....s......s...hDtpl+p+F..tshhp.tAtpuc.....Lt................................pW..pps..s.............pGtLAhlIlLDQFsRNhaR.......so..sc..AFA..s....DshALslApp.A.l.sp.G..h.D..p....pLsst.....................pRhFhYhP..ahHS...E...shs..Qcpu...lpLappl.......s.s.....................................t.t.......................................s.hcaAhcH+sIIcRFGRaPHRNslLGRtSTsEEhtFLppsG................................... 0 92 175 252 +5870 PF06042 DUF925 Bacterial protein of unknown function (DUF925) Moxon SJ anon Pfam-B_7663 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. This family was recently identified as belonging to the nucleotidyltransferase superfamily [1]. 25.00 25.00 25.60 25.40 23.10 23.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.08 0.71 -4.67 40 786 2012-10-02 22:47:23 2003-05-23 15:05:00 6 3 739 1 139 566 26 158.90 40 86.99 CHANGED L..ctscp....LsL.P-hhLuAGhlRshVWstLcshss..ssls.....DlDllYFDst...-hohct-tplpppLp.phhP...thsW....-V+NQARMHlhpss........sPYpSopDAlupasEpsTAlGlcls.............tssplclhAPaGL--lashplpPN.st....c.phslappRl.tKpWpppWPpLplht ..............LphhcpLtL.schalAAGhVRNhlWshLpspss.hss.s.....DlDVIaFDs....cho.cpphtlE.p+L.ppphP..................phpW..plKNQuhMHh+ssc........sPYsSopDAhS+aPEpsTAlGlRLs..............................cc.sphELhAPaGL-DlhshpV+Ps.P+....ctchplYppRltpKsWpp+WPpLph..t.................. 0 33 66 101 +5871 PF06043 Reo_P9 Reovirus P9-like family Finn RD anon Pfam-B_8265 (release 9.0) Family \N 25.00 25.00 219.50 219.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.86 0.70 -5.37 4 51 2009-01-15 18:05:59 2003-05-27 10:08:25 6 1 7 2 0 34 0 262.00 57 99.91 CHANGED MAD.pRRsFGuYpIpEIThppsQsshNssp....QssSsTpsphusp+hPlLDDGIacLlshLlcGssF-copYsGF-YSHLPsLERsFNTASsYVscpaplhhEphsLctY-hppsISlpuP-FolsLEa.hKscspuppp..EN-...shcslss+lls...........LlslhsR-pE-.hsppl.EGEsAllslhKMalpGFLhaLGcN.ssYD+QLsIEKYRPLLluIlGYEahhuhcs.pKclN+laYpLATFsNYPFslLRapLpSllssPs.IcpcItK-GLFK.IsosshhG.sppoV..hhRGIssSpSFLN.K+YRphRoRhsuNVcplIpsDhSplchss .......sFGuYpIpElhhppsQsNhNsNo....pNopsTpsphuhp+hPlhsDGlatLhs.LLcGssF-KohYpGaDYSHLPNLEpsFNTASsYVstpYcIshsEhpLcsYshscohSVh.P-FohsLEa.lKscppoDpss..cENE............phKPpT++IVs.............pLlsLhNR-p.E.hsEpl.cGEhAlIslFKLYIpGFLhHLs.N.s.......................................................................................................................................... 0 0 0 0 +5872 PF06044 DRP Dam-replacing family Finn RD anon Pfam-B_8314 (release 9.0) Family Dam-replacing protein (DRP) is an restriction endonuclease that is flanked by pseudo-transposable small repeat elements. The replacement of Dam-methylase by DRP allows phase variation through slippage-like mechanisms in several pathogenic isolates of Neisseria meningitidis [1]. 22.70 22.70 23.10 26.80 22.20 22.60 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.84 0.70 -5.39 6 296 2012-10-11 20:44:44 2003-05-27 10:14:51 7 5 224 2 16 88 13 170.80 60 96.61 CHANGED MsLaFNppLuKsYKSpSQIlRVLSEsWVt+puYCPNCGsp.lspFsNNpPVADFYCspCpE-YELKSKK...uplushIsDGAYpTMIERIpSDsNPNFFFLTY.sc-acVsNFllIPKHFFTP-hII+RKPLussARRAGWIGCNIsLsplPEuGKIFLVKDpQlh-s-pVhcpapKsLFLRppsh.pSRGWhL-IhpCIDKls.spFoLsplYpFEscL+h+aPpNNaIKDKIRQQLQlLRDKGhIEFlGRG+YRKl ......................................................................MpLaFshpLscp.ppsop+hRlhoEsWl.+puYCPsCGspPhp+F....tNN+PVADhaCspCpEpaELKSKp.......tshu..s..sls..DGAYtTMhcRlpuDsNPNFFF...............................................................psuplhp.p.VhcpappsLFLRppsh..pp+GWhltlhpCID.pl..ppFoLppMYcF.E.scLp..FspNNHIK-KIRQQLQILRDpphIEFhG.RGhY+K.......... 0 7 14 15 +5873 PF06045 Rhamnogal_lyase Rhamnogalacturonate lyase family Finn RD anon Pfam-B_8355 (release 9.0) Family Rhamnogalacturonate lyase (EC:4.2.2.-) degrades the rhamnogalacturonan I (RG-I) backbone of pectin [1]. This family contains mainly members from plants, but also contains the plant pathogen Erwinia chrysanthemi. 19.90 19.90 20.10 20.00 19.60 19.50 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.30 0.70 -5.04 13 168 2012-10-02 23:57:29 2003-05-27 10:28:12 6 17 36 0 104 163 0 162.80 35 28.74 CHANGED M...................................................p.p......VllDNGllpVTlSsPsGhlTGl+.YsGl.cNlLch..scppsRG.............YWDlVWs.sGpps.....hh-h..................lcGocFcVIsps-EQlElSFsRTas.S.csssl.....PLNlDKR...aIMh+GsSGhYoYuIhE+lpsWPulslsphRlsFKLspcK.........................................FcYMAluDsRQRhMPhssDRsssRG..psLAYpEAVhLlcPp-spa+...GEV .........................h.............t.............lhlcNGllplohopPtGhloulp..YsGh...pNllc......spt.....pspG.................YaDhsWp....uttt..........hth..................................................................................................hpssphpll.p....spp.lElSFhp.a..s.p.sp.h..............slsl-h..+........hlhhcusSGhYsYuIh-+.tthP.shsl.sphRlsFKLppcp.........................................FpYMAluDspQR.MP.sp.DR...ps..t.LuY.EAVhLspP.psph+...GpV.............................................. 0 12 62 85 +5874 PF06046 Sec6 Exocyst complex component Sec6 Finn RD anon Pfam-B_8361 (release 9.0) Family Sec6 is a component of the multiprotein exocyst complex. Sec6 interacts with Sec8, Sec10 and Exo70.These exocyst proteins localise to regions of active exocytosis-at the growing ends of interphase cells and in the medial region of cells undergoing cytokinesis-in an F-actin-dependent and exocytosis- independent manner [1]. 30.10 30.10 30.50 30.50 30.00 30.00 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.79 0.70 -6.43 38 580 2012-10-03 17:31:52 2003-05-27 10:42:54 8 14 271 2 371 521 5 457.30 20 72.59 CHANGED FpplstlhctFcctlhtlhtshlphscps.sshlVcll+Il-tEEppDpchtthpphtt......................................................tpsRsa+c+hhcslcpulpppap.....pstpthssctth.thL-plt.alhs-LhhVpptlsshaPs.caplhchahp.hYHptlpshLt................pl....sss-hsutcllslLsW.scpYts..hhsphthh..................tsplpPllsssth.ppLhccYhphltpplp-WhsplhcsEpppahp....spsP-.....................ct-G.............hatspsshphaphlppQlplA...usohpscllthslcphsphlpphppshhphlc-chcp..............................................sshh-aLlAhsNsphtst-..........hhsshppcatshls.....p.ht.thsslhsshsclsspshptllchlat.D..Lpshhscla..o....ppWhss.........pshcplssTlp-Yh.sDhpp.lp.sh.hphhhpphhcpllhpYlptlhp.......+phthp.spctpphsc+lppDspthhphFpphh........thhhsphphlphlhthl.....hps.s.l........................hh.hpslhspY.Dhshs......alpulLpsRsDhs+uphppllpphpphhtshp............slhscl .............................................................................................................tht.h.p.htptl..hh.p.h.hst....tt...thlhphhhllp...E-t..ctth.t.t..............................................................................s+ta+phhhphlp..tshptpht.................t....t.....t..........h..............Lp.ht.hhhp-L.h.stphh....hhP..capl............hphhhp.hYHpthtphlt................pl.......t.t...ht..stphht.........l.ltW..ph..Y.t...hhtp.tht...................................tpltshl....p.......ppL.ppahp..h.tphppa.htphhp.t-hp.t..a.p.....tt.Pp....................................p.pG......................hh..o..h.s.h..l.h.phl....p...pplphu.....................stshptphh..hs.lpth....thlpp..h.ppth..ph........hccphtt................................................................................thhhphhlAhhN.sp.thhp...........................h.ph...ppph...........t.................................th..tth.sshspl....tpp.......shphllp.la..D..lpshh......tcl.h...s.......pcWhss.................pshctlhsT.hpcah.t-hpp..lp....s.h..hph.hhtphtcpllhc.Ylptlhp....................................t+h.hp......tpp.....ppp.hsp+hh...p-hptlhthFpphs.........................p.h.p.hl.t..hhthl..............hps.s.l..........................h.tht..shhp.pa.sDh..p.p......altslL.thR....s.shspp.hptlht..htt................................................................................................................ 0 97 158 257 +5875 PF06047 SynMuv_product DUF926; Ras-induced vulval development antagonist Finn RD anon Pfam-B_8083 (release 9.0) Family This family is from synthetic multi-vulval genes which encode chromatin-associated proteins involved in transcriptional repression. This protein has a role in antagonising Ras-induced vulval development [1]. 25.00 25.00 40.30 40.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.34 0.72 -4.30 10 233 2009-01-15 18:05:59 2003-05-27 12:38:40 6 4 173 0 162 228 3 102.50 61 26.07 CHANGED ppshsYGuALLPGEGsAMApYlpcGKRIPRRGEIGLTSEpIpsFEclGYVMSGSRH+RMsAVRlRKENQVYSAEEKRALAhhstEERtKREscllupF+ElIc++ ......................s..hsYGpALLPGEGuAMApYVpp..G..K..RIPRRGEIGLTS-EIusFEssGYVMSGSRH..RRMp..AVRlRKENQlYSA-EKRALA.FNpEERpKREs+llusF+-hlpc.......................... 0 68 90 125 +5876 PF06048 DUF927 Domain of unknown function (DUF927) Finn RD anon Pfam-B_8364 (release 9.0) Family Family of bacterial proteins of unknown function. The C-terminal half of this family contains a P-loop motif. 20.60 20.60 20.60 20.70 20.50 20.40 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.85 0.70 -5.14 39 800 2012-10-05 12:31:09 2003-05-27 14:04:52 6 16 569 0 136 598 113 272.40 28 41.09 CHANGED FpLs.....psGlhhhthscppps.......hhlsuPltlhAcscDs..p...stsauhllp.aps.DGph.+phshstplLtus.uschhtpLlshGhs.hsspt....hptLspaLpph.....pst.pspssspsGWp....ts.........sFlhscpsh..ussstp.lhh....pstpht..................tthptpGTlcsWpcpluphstGNshLhhul...usAhuusLL...phhs.hpusshHhhGsSSsGKoTshplAuSVaGsPs........thh+SWpuTsNuLEuhAutpsDshLsLDElupscs+-..sssl.lYhluNGpGKtRuspsGss..+sspp ........................................................................................................ptpGhhh.t...tppt.s...............hhlssPltlhsphpD...t.....sss.t.hllp.......sspp..pphs.hst.shl....spcthhpLhph..Gls.lssppt......ppL.spalpt......ssh.hsphsspsGWh.....tu...................................sal.h.s.s.p.l.....hss...s..tp......lhh.......sspsht.......................th.p....spG...Th.csW..pppltp..sp.G...N.h..l.h..hul....ssu.LuusLL......p.hs...hpuh..hhc...h.hGp....SSsGKo.TshplAs.S.VaGsPs..........hhpoWp.uTpNuLEuhAutps.sh.lslDE...l....u....p..s........ss..+p.........sssh.hYsl....ssGpGKtRushsGps..+s.p.t............................................. 1 49 87 118 +5877 PF06049 LSPR Coagulation Factor V LSPD Repeat Yeats C anon Yeats C Repeat These repeats are found in coagulation factor V (five). The name LSPD derives from the conserved residues in the middle of the repeat.They occur in the B domain, which is cleaved prior to activation of the protein. It has been suggested that domain B bring domains A and C together for activation ([1]). 30.00 1.60 33.50 1.60 29.60 1.50 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.07 0.77 -5.52 0.77 -3.41 33 291 2009-09-16 13:35:40 2003-05-27 14:42:21 7 11 8 0 203 308 3 9.00 64 11.44 CHANGED oLSPDLsQp .TLSPDLuQT.... 0 0 0 0 +5878 PF06050 HGD-D 2-hydroxyglutaryl-CoA dehydratase, D-component Finn RD anon Pfam-B_8369 (release 9.0) Family Degradation of glutamate via the hydroxyglutarate pathway involves the syn-elimination of water from 2-hydroxyglutaryl-CoA. This anaerobic process is catalysed by 2-hydroxyglutaryl-CoA dehydratase, an enzyme with two components (A and D) that reversibly associate during reaction cycles. This component contains one non-reducible [4Fe-4S]2+ cluster and a reduced riboflavin 5'-monophosphate [1]. 28.40 28.40 28.60 28.40 28.20 28.30 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.25 0.70 -5.63 163 2269 2009-01-15 18:05:59 2003-05-27 14:48:43 8 13 1163 10 497 1880 94 306.50 21 44.28 CHANGED hshhsPpElltAu.sh.l...sht..l...........tht.phhh.sstppslssshCshl+s.hu...............................sshh.hschl..l..ssssC-spp+hhp..hhsphts.................hhhhchPpptpp............t......shphatcphc.c..........lhttlcphpupplsp-tl...tpslchhsptcct.hpchhcht...tthPsslss.tphh.hlhptthh.hs.tthhphhptlhschptth.cts.h.........tptthRlhlsG.sPhh.sshph..phlE.psGshllspphsh......sh+hhtshl.ts....................s.luc+hh.p.hss..sh........pp+hctltchhccaps-GllhhshphCcshs..............htthhlcchlcct.slPhlhl-sDh....pssshuplpsRlcAFhE.l ................................................................................................................phhh.pclhhAA.sh.s........sht....L.........................p.h...p.p..ht.pslstshsshl+p.hs..........................................................................................sshh..h.sshh..l..spshss.sph+hhp...hhu.tht.........................................hhhhphs.p.pt....................t...........shthhp..pchp.c........................................lh.plp.hhsp.......l..scpsl.....t.slt..l.hsptpct.lpphhchs...........tt.P.s.lss...tphh.t........hl...hsupat...h..........pphphhhs.pht..tth..ctsph.........hpsh.+h..h.hss.hPhh..s.ph.......phlE...ps....Gh.....shp....sh............hh+.h.h.thh.ts...................................l..hpphh....t.hsh................ctcht.hpphhccap....csl..hthhhpt.sp.sht.....................h.p.h.lcch.lcp...s.lPhh.l-s-.......psss.sGplhs+hpshhp.............................................................................................................................................. 0 271 417 461 +5879 PF06051 DUF928 Domain of Unknown Function (DUF928) Finn RD anon Pfam-B_8442 (release 9.0) Family Family of uncharacterised bacterial protein. 21.00 21.00 21.10 22.30 20.70 20.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.42 0.71 -4.70 25 177 2009-01-15 18:05:59 2003-05-27 15:01:16 7 5 39 0 58 178 1 184.20 25 72.24 CHANGED pptGGu...........oRGs......Cs.........tsptsLTALlP...p......ss......hGlTssppPThah....YlPtssssts...pFs....LtD-p.scplYpss..hslss.psGIlulsLP..ssss.sLphGcsY+Whhs..lhC..sspssuss......hVcGhl....pRVphssslppplppss.shcpsslYAcsGlWYDsLssLA.pL+pspPpsstltpcWppLLc..SlsL.pplustPll ..................................t.ptuuuoRss........C.............tsp.s.lssLlP...tsp................................hGhTsstpPThah....YlPtssspph........................pFs........Lh..-pp......p...p..lYpps................hs.l...s...s..p.......s..GIlslsLP......tsss..sL.......cs.s...ppYpWths..lhC....sspsp.s..ss...............hVpGhl....pR..lp.hs......sslpppl.........pp.........ss...s..........hppstlYAp....pGlWaDsLspLu.plppppPps.thtttWppLLp.....ulsL..ptlsp.sh....................... 0 3 38 58 +5880 PF06052 3-HAO 3-hydroxyanthranilic acid dioxygenase Finn RD anon Pfam-B_8515 (release 9.0) Family In eukaryotes 3-hydroxyanthranilic acid dioxygenase (EC:1.13.11.6) is part of the kynurenine pathway for the degradation of tryptophan and the biosynthesis of nicotinic acid [1].The prokaryotic homolog is involved in the 2-nitrobenzoate degradation pathway [2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.95 0.71 -4.79 5 387 2012-10-10 13:59:34 2003-05-27 15:14:34 7 5 349 8 226 543 220 145.40 44 74.95 CHANGED MhslssINlcpWVcEN+GsFpPPVCNKCMH.p-QLKVMaVGGPNpRKDYHIEEGEEaFaQLKGDMVLKVlEcGssR....DllI+pGEhFLLPuRVPHSPQRFANTVGLVVER-RhssEpDGLRaYVusoscsLaEcWFYhpDLGTQLsPlIc-Fa ..................................................slNl.pWl-ENpcLL.pPPVs.Nhpla......p.s...s..ahVMlVGGP..N.tRp.DYH......l.s.p.s................E.a.FYQhcGsMhL+.lh.-..c.....G.....ph..+.....D.l..I+EG-hFLLP.................uplPHSPtR.h.u.sT...lG.LVlERc.R..p.s.p.h.DslcWYC...s..p......C.......s......c............h......laEt.F.h...p..D..lsTplhshhpcF................................. 0 68 127 184 +5881 PF06053 DUF929 Domain of unknown function (DUF929) Finn RD anon Pfam-B_8458 (release 9.0) Family Family of proteins from the archaeon Sulfolobus, with undetermined function. 25.00 25.00 32.80 32.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.87 0.70 -5.29 18 134 2009-01-15 18:05:59 2003-05-27 15:16:42 6 2 92 0 25 86 8 229.60 41 83.04 CHANGED p+llhlslllllll.lllhlhhsph...............l..t.........spsshs....phhKlSspshussGpstlhaloW.GCPhGAssSWsLYlALp+aGsl..slphphSss........scshsshPGLlFhsa.s.....p..lpFpshY....lYsphlsto.hNGpsl....sphlthG.p.lppp.....lPstlYpllppYpsps..........s.s......Hls.sIlIoGstGoYhhsGs...ls...Pshl......Ssh.......sssYVlpp.Lcsss.......IhsuAshIpphIpc ..........................+lhYlslulLAls.lIIhuslltlppopps.........................................................hps.pstlucAlsSIspssYspVusGo.....plsKl..u.N.p..s....s..pcG..KVclaYVGuEaCPaCAhERhPLssALSRFGsF.SuL+splSSP.......tp+phuNIPTlTFcNY................................cYoSpY........Vsh...-uhEhu.DRpGc.I..................us.....LPpstpslasc..Ysstp...........................uIPFS.hhGshssssP...Sh....Phhh..............pc.....sPptVlcs.LsNPNStpAp.tIluuANLhTApICp....................................................................................................................................... 0 5 10 23 +5882 PF06054 CoiA Competence protein CoiA-like family Finn RD anon Pfam-B_8535 (release 9.0) Family Many of the members of this family are described as transcription factors. CoiA falls within a competence-specific operon in Streptococcus. CoiA is an uncharacterised protein. 20.30 20.30 20.40 20.50 19.80 20.20 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.47 0.70 -5.57 14 1177 2012-10-11 20:44:44 2003-05-27 15:38:20 6 7 1110 0 142 756 21 285.00 25 86.93 CHANGED M...L.sAhsppGphlslhpttp.p.......cppaaCPsCtuplhl.....KpGphhhsHFAHcshpsCphhtEsEStpHLtsKthLYpalpp.pc.p.................VplEhYlsElpQhsDlhlNpp...........................................lAlElQCSplshpclpcRTpuYpptGhpVhWllG....pplhhpc............................ploplppphlahsp.shshahach-ht..cph+h+ahhhpcltG...................................................+hahtpcphsht.sphhphhp........................................hsa.pp.h.ph.h.h..pht.hIppplhhtp.h.hp.pc..........chYp................pGpsllphhhtsha......................................htshp.h...p....p...pthhphpp.slchahcsa......................hhhhpcptpt.hpplhsshahhp..h ........................................M...h.hAhstptph.l.h..hp.....t.............cptahCPs....CtppVhL.....+.pG...p....hhsH...FAHc.p.h.p.s.C..p....h..h.........E..s.........ES..cHhtsKthLhp..hhpp..ps.p.........................................Vp...l.Ehh..L.......sc...l.p....Q...hsDl.h.l..sp.p...............................................hAl.El..QC.Ssls.......cplhcRopuYpphGhpVhWl..LG.............pph..hhcp................................................................................plp.h..p.h.thh...hhs....ph..thaha.ph..p....p.hhhha.hh.p......ts............................................................................................................................phah.t.......pth.....tph.plhp....................................................................................p.....h..h...h................h..l.p.p.h.........................hh...................t..h........h.............................................................................................................................................h.......................................................................................................................................................................................................................... 0 36 78 111 +5883 PF06055 ExoD Exopolysaccharide synthesis, ExoD Finn RD anon Pfam-B_8604 (release 9.0) Family Among the bacterial genes required for nodule invasion are the exo genes. These genes are involved in the production of an extracellular polysaccharide. Mutations in the exoD result in altered exopolysaccharide production and defects in nodule invasion [1]. 22.10 22.10 22.70 47.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.96 0.71 -5.18 82 395 2009-01-15 18:05:59 2003-05-27 15:49:10 7 1 292 0 170 394 61 182.00 27 87.18 CHANGED pslsshLcplt....................psppsph..olt-llcshGc+uFusl..lllhuLhhlhPls..lPGhs.olhulhlhlluhQhlhGRcphWLPphltcRplspcplppslpphpshsph.l-+hh+s.RLphlssssh.hplhsuhslhlsshh.hL.h..lPFssslP.uhAlslhululhsRDGllhllGhlhshsshshhshlh .......................t..hsphLpphh................t...ptpsspl..olt-llstlucRu..FGhl..lllhuLPshlPls...PGho.slhGlslllluhQhhhGpc.psWLPphltc+slpppplpthlpphtshlpp.lE+hh+P.Rlthlssshs.tpl.hGlhlhlhulhlhLP....lP.h.oNhlP.uhulhlhulGllpRDGlhhlhGhlhshhshshhhhh.h............... 0 48 110 136 +5884 PF06056 Terminase_5 Putative ATPase subunit of terminase (gpP-like) Bateman A anon Pfam-B_7152 (release 9.0) Family This family of proteins are annotated as ATPase subunits of phage terminase after [1]. Terminases are viral proteins that are involved in packaging viral DNA into the capsid. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.65 0.72 -4.41 16 859 2012-10-04 14:01:12 2003-05-27 16:53:51 7 16 638 0 101 1339 101 56.20 37 11.50 CHANGED -sRcpAphLYapGapss-IActLshp.stTVauWpcR-pWcshssl.pplptulcsRlsp ..................-sRppAt..hL.Y..a....Q...G....a....ps.s.cIAchLsh...........p..ssTVauWKc.RDp.WcchsP..l..pphp.shtuRhhp........................... 0 19 53 78 +5885 PF06057 VirJ Bacterial virulence protein (VirJ) Moxon SJ anon Pfam-B_7524 (release 9.0) Family This family consists of several bacterial VirJ virulence proteins. VirJ is thought to be involved in the type IV secretion system. It is thought that the substrate proteins localised to the periplasm may associate with the pilus in a manner that is mediated by VirJ, and suggest a two-step process for type IV secretion in Agrobacterium [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.15 0.71 -4.52 12 339 2012-10-03 11:45:05 2003-05-28 10:33:45 6 5 263 0 92 319 18 180.10 39 47.26 CHANGED pDphAlhhSGDGGWR-lD+clustLQppGlPVVGlDSLRYFWsERoPppsAsDLsRlIchYpc+WpsppVlLlGYSFGADllPtsaNcLPssp+spVp.huLLuLu+psDachplpGWLGhss-Gtt.s.ss-ltplssshl.ClYGp--cD.ssCPsLctpss-sluhsGGHHFDtDYtsLAcpllsuhcsRh .........................DhlslhhSGDGGW.R.-lDKsluptLpppGlsVVGlD..........S..L..RYF.W......s....c+....oP.....pph....AsDLsclhppYptcWttp...cl...hL...lGYSFGADlhPhsascLsss.psplphlsLLuhu.ppusFplclpGWLGhssct....sssPtlspls.s..s.hlhClYGp-EcD...shCs..s.......pt.p.shchlpLPGGHHFDpc.Y.slAptllpthpt.t................................................................ 0 19 38 64 +5886 PF06058 DCP1 Dcp1-like decapping family Finn RD anon Pfam-B_8271 (release 9.0) Family An essential step in mRNA turnover is decapping. In yeast, two proteins have been identified that are essential for decapping, Dcp1 (this family) and Dcp2 (Pfam:PF05026). The precise role of these proteins in the decapping reaction have not been established. Evidence suggests that the Dcp1 may enhance the function of Dcp2 [1]. 20.60 20.60 23.00 20.90 20.50 20.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.34 0.71 -4.07 9 430 2012-10-04 00:02:25 2003-05-28 11:45:17 8 7 272 7 284 392 3 112.70 32 29.81 CHANGED hspttppl...sLssLpphDPYIpcIlDhuuaVAlYpFssptscWpKs-lEGThFlYpRuspPhauahIhNRhshcshlEslspshchplpssFLlYR....ssp..pIhulWFYspp-sp+IhpLhppll ..............................................t...tppl...sLssLpRh.DP...h.IppIlph.uupsslYpFs.p......s.s....p.WpK.p-..l.EGoLF.l.....h.....p....R.......s.........s....p........P..........p.......a...shhlhNR...hs...h..........c.Nhscs.l..........sp..s....h.-...hp..lpt..s....a..lla+.........................ssp....pl.hulWFaspp.-pppl.thh................................................... 0 76 137 220 +5887 PF06059 DUF930 Domain of Unknown Function (DUF930) Finn RD anon Pfam-B_8283 (release 9.0) Family Family of bacterial proteins with undetermined function. All bacteria in this family are from the Rhizobiales order. 25.00 25.00 32.90 45.10 19.90 19.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.23 0.72 -3.77 20 145 2009-01-15 18:05:59 2003-05-28 11:50:12 7 1 106 0 50 101 3 99.30 44 51.86 CHANGED ltpLssppRlpQlCssEAhcplt+spssapPDplsuashssshhsGsslcAsGAAFRSpspWYclsF+Ccsss-shsVhSFsaplGctIP+ucWschtL..hs ....Lc+L-PssRLEQtCDlEAME+Is+-pstapsD+VlAYuaucPhhptNolcAsGAAFRS+pcWY+LuapCcsDs-phslpSFuYtIGsclPp-pWc++hLh.P.. 0 7 23 32 +5888 PF06060 Mesothelin Pre-pro-megakaryocyte potentiating factor precursor (Mesothelin) Moxon SJ anon Pfam-B_8552 (release 9.0) Family This family consists of several mammalian pre-pro-megakaryocyte potentiating factor precursor (MPF) or mesothelin proteins. Mesothelin is a glycosylphosphatidylinositol-linked glycoprotein highly expressed in mesothelial cells, mesotheliomas, and ovarian cancer, but the biological function of the protein is not known [1,2]. 17.50 17.50 17.60 17.50 17.20 16.80 hmmbuild -o /dev/null HMM SEED 625 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.30 0.70 -13.09 0.70 -6.47 3 103 2009-01-15 18:05:59 2003-05-28 12:04:28 7 2 42 0 65 83 0 409.20 29 80.15 CHANGED MALPTARPLLGSCGSPICSRSFLLLLLSLGWlPhLQTQTT+TSQEAALLHAV.sGssDFASLPTGLFLGLsC-EVSGLSME+AKELAMAVRQKNIsLpsHQLRCLARRLP+HLTsEELDALPLDLLLFLNPAMFPGQQACAHFFSLISKANVDVLPRRSLERQRLLsuALKCQGVYGFQVSEADVRALGGLACDLPG+FVA+SSEVLLPWLAuCsGPLDQuQpKAVREVLRSGRsPYGPPSTWSVSTLDALQGLLsVLDESIV+SIPKDVlAEWLQ+ISRDPSRLGSKhTVlHPRFRRDsEQKACPPGKEPpcVDEsLIFYQNWELEACVDGTLLAsQMDLVNEIPFTYEQLSIFKHKLDKTYPQGYPESLIQQLGHFFRYVSPEDIRQWNVTSPDTVKTLLKVSKGQKMs........AQVIALVACYLRGGGQLDEDhVKALssIPLSYLCDFSPQDLHSVPSSVMWLVGPQDLDKCSQRHLGlLYQKACSAFQNVSGLEYFEKI+TFLGGASsEDLRALSQHNVSMDIATFKKLQVDALVGLSVAEVQKLLGPHIsDLKTEEDKSPVRDWLFRQ+QKDLDoLGLGLQGGIPNGYLVLDFNVREAFSSsAsLLGPGFVLsWIPALLsALRLS .....................................................................................................................................................................................................................h..h...hh..h...................C..hh..h.t.thp.h..t..s....t.t.l..tuhtC......t....h.t....................lsttph.h.........LGsLsC....sh.sst.l......t....us.....lL..tL.tC....ls.sQt.uhp..hl.ttts.hG.Ps.Ws.tslptLt.L..h........ht..........h...h.th.........p.........th........hh......h.h......t......t.............t.................t.....h.........................t........h..............h..p........t.t......tt.............h......s.............s..t..l...pp.p...h..h.h...Ypp......p.hcs........ClssthLtsp.hs.l...th.sh.shpp.plhKt..KLsphY..Pp..G.hPEs.lphls.hhhhh.o..p-ItpWslTS.-TlhuLLt.s....s.......p...............spstsllt+a..lttt.G.p.lstshh....h..h.s.t.hLC.hs.pplpsl...sp.hh.h.st.s..clssCs..pphslLas..KAc.A...F......t.....s.......s..t..s.....t.Ya.hhpsaL................G................G.As...........h.....p-LptLuptNlsM...DhsTFhpLp.p.l...LsltpVptLLG.pl.sLpt.cpps.lptWh.p..pptLst.LGls........................................................................................... 0 4 14 26 +5889 PF06061 Baculo_ME53 Baculoviridae ME53 Finn RD anon Pfam-B_8086 (release 9.0) Family ME53 is one of the major early-transcribed genes. The ME53 protein is reported to contain a putative zinc finger motif [1]. 25.00 25.00 121.70 66.10 23.40 20.20 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.36 0.70 -5.45 21 63 2009-01-15 18:05:59 2003-05-28 12:06:23 6 2 54 0 0 62 0 311.00 28 87.23 CHANGED +DLRh+FLSpps+plhpull+FAoNYVpGhhpspshh.hsp.t.hphc......hsppopCs.tCtt+F+csschh....Lasllcp.h...s............ssp.s+FchsCpcCtppht.....phssaplYPplsLtslccLsctGFhppYlFPlchchppppcphhhhst....cshacslcpIlpc.KcsNEpIhpIsLp..ThGtllhpEs.psshlc+hps.h.t...ss-ls..hhsssSphhphlpspt.ps..hTYFhEVht+sapph.hs..alsahs+sstth.....CphC+.sKhY..cps....PVLaCS+CGFTsshaFp.....p...hsslhap.csVp..spp...hpsptlhYYDlshat ......................pDLRh+FLSppppplhpuhlcFAssYlpGhhp.pph..hth.t....p.......hhppopCp.pCptpF+pspp.........hLasllcp.h................sspss+F+hsCtcCtpphph........hslhELYPslsLtsVc+LsphGFlp+YlFslshshphpp+phhlhsh....pclapslppIlpp.KpsNEpIppIpLp..ThucllhcEshpshhlcptpp.......ss.lp..hh.tsSphhphlpstp..s..hTYahcVhh+happh.hs...alsaashsspt......CphCp.sKhY..psp.....PVLaCSpCGFTsshaFp.........h.pphtYh.ppVp....hpp.....h+sp..tlhYYDhphh................................ 0 0 0 0 +5890 PF06062 UPF0231 Uncharacterised protein family (UPF0231) Finn RD anon Pfam-B_9027 (release 9.0) Family Family of uncharacterised Proteobacteria proteins. 25.00 25.00 25.00 25.00 24.50 24.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.54 0.71 -4.23 38 786 2009-01-15 18:05:59 2003-05-28 12:11:05 6 3 779 0 93 332 7 116.70 58 95.91 CHANGED M-YEFp+s.lsGphhAchSMsHEslGpWhsEElupshpplsplhstIpplcsuptt..-hpLhGpEhoLhlss-ElhVpAN.....sLth-..p-.EhE-shphYDpESh.AhCGLEDFpphLpuWpsFlp ............MDYEFLRD.lTGsV+VRhSMGHEVVGHWFNEEVKcN.LuLLDEVEpAA+slKGSER...sWQpsGHEYTLWhDGEEVMV.RAN........QL-Fu...GD...EM..EE.G.MsYY...DEESL...SLCGlEDFLpVlsAYRsFl.................... 0 8 29 63 +5892 PF06064 Gam Host-nuclease inhibitor protein Gam Finn RD anon Pfam-B_9063 (release 9.0) Family The Gam protein inhibits RecBCD nuclease and is found in both bacteria and bacteriophage [1]. 25.00 25.00 34.10 34.10 20.20 18.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.33 0.72 -3.89 2 349 2009-01-15 18:05:59 2003-05-28 12:30:04 6 1 243 4 3 150 1 94.40 91 97.43 CHANGED MNAYhh.DRlEAQsWsRHYQQlAREEKEuELADDhEKGLs.HhhESLChDcL.RHGAsKKuIoRAFDDDVEFQERhuEalRYMsEThu+HQlDI-SE. .........MNAYYIQDRLEAQSWARHYQQIAREEKEAELADDMEKGLPQHLFESLCIDHLQRHGASKKAITRAFDDDVEFQERMAEHIRYMVETIAHHQVDIDSEV............... 0 0 0 1 +5894 PF06066 SepZ SepZ Finn RD anon Pfam-B_9064 (release 9.0) Family SepZ is a component of the type III secretion system use in bacteria. SepZ is a gene within the enterocyte effacement locus. SepZ mutants exhibit reduced invasion efficiency and lack of tyrosine phosphorylation of Hp90 [1]. 22.70 22.70 22.80 72.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.19 0.72 -4.00 4 76 2009-01-15 18:05:59 2003-05-28 13:21:08 6 1 53 0 1 38 0 96.70 75 99.39 CHANGED M-AANLSPSGsVLPLAsoINGNssVDEsTGVMpPENGssRslRlhAGlALusoALAAlGsGIAuhCs-suSpp.hLGLGIAuGVLGGlTulGGuLAMKYA .M-AANLSPSGuVLPLAATINGNsSVDEpTGVMpPENGsSRslRIlAGlALuTTALAALGTGIAhACopsuSop.aLGLGIAoGVLGGVTAlGGGLAMKYA 0 0 0 1 +5895 PF06067 DUF932 Domain of unknown function (DUF932) Finn RD, Pollington J, Bateman A anon Pfam-B_9083 (release 9.0) & Pfam-B_002957 (release 23.0) Family Family of prokaryotic proteins with unknown function. Contains a number of highly conserved polar residues that could suggest an enzymatic activity. 20.10 20.10 20.40 20.10 20.00 19.60 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.48 0.70 -5.06 43 1425 2009-01-15 18:05:59 2003-05-28 13:23:55 6 4 644 0 189 1175 404 205.70 51 80.09 CHANGED sLushSsRYphlpspplL.....psLpccu.t...shhpotutl+s.GR+has..hhRhtcsspl.......ssthss.llLhsSHDGosuhphhhsshRlVCsNTLshu..usssu..sl+V.H..psssshclt-utpplt.h..pta-phptphcthtphplsps-tpsahpsslshp..................s.tpstpssstshsplhphhcscsp........psohWushNuVsEals+tths.Rss...............cp+tspultGh..sspLpptta ...............................................................HpSRSc.+YsYIPTIslL........csLpcEG.FQ.....PFFACQoRVRD....GRRtaoK.HMLRLR.R.supI.........sGpclPEIILLNSHDGoSSYQMlPGhFRhVCpN...GhVC.....GpshG........ElRVPH..+Gsl..V...t.c..VI.E.G..AYEVl.............u.....lFD+lp-pt-AMp.....slhL..ssst..ppshApAALshR...............................Ys......-c+pPV..Tssp.IL..o.P.RRpEDh.............tpDLWosaQplQENh.lKG.Glo..GRSA.........pG........++h+TRAlpuIDsDl+LN+ALW............................................................................................................... 0 37 102 151 +5896 PF06068 TIP49 TIP49 C-terminus Moxon SJ anon Pfam-B_9170 (release 9.0) Family This family consists of the C-terminal region of several eukaryotic and archaeal RuvB-like 1 (Pontin or TIP49a) and RuvB-like 2 (Reptin or TIP49b) proteins. The N-terminal domain contains the Pfam:PF00004 domain. In zebrafish, the liebeskummer (lik) mutation, causes development of hyperplastic embryonic hearts. lik encodes Reptin, a component of a DNA-stimulated ATPase complex. Beta-catenin and Pontin, a DNA-stimulated ATPase that is often part of complexes with Reptin, are in the same genetic pathways. The Reptin/Pontin ratio serves to regulate heart growth during development, at least in part via the beta-catenin pathway [1]. TBP-interacting protein 49 (TIP49) was originally identified as a TBP-binding protein, and two related proteins are encoded by individual genes, tip49a and b. Although the function of this gene family has not been elucidated, they are supposed to play a critical role in nuclear events because they interact with various kinds of nuclear factors and have DNA helicase activities.TIP49a has been suggested to act as an autoantigen in some patients with autoimmune diseases [2]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.24 0.70 -5.75 16 859 2012-10-05 12:31:09 2003-05-28 13:25:51 8 13 383 10 602 1792 868 352.50 52 84.15 CHANGED RIusHSHI+GLG......LD-sh................................................................pscpsup.........GhVGQhpARcAAGVIlchI+ptKhuGRulLlAGsPGTGKTAlAluIucELG.pcsPFsslsGSElYSsEhpKTEsLhQAFR+uIGlRIKEppEVhEGEVV-lph....-pPhs...spshptsplsLKTschppphcLusclhEsLpKE+VpsGDVIhI-psoGplp+lGRS.stup-aDl..psscaV.hPcGElpK+KElVpsVTLHDlDV.hNuRs...QshlulF....pscpsEIpsclRppINchVscalEpGpAEIVPGVLFIDEVHMLDIECFoaLNRALESshuPIllhATNRGhspIRGT.DhhSPHGIPhDLLDRlLIIsThPYsccEI+pILcIRspEEclpls--Ah-hLscIGpcoSLRYulpLlssApll .....................................................................................................................RluuHSHI+GLGLc.p.s.h............................................................................................p..sc...s.up.......................Gh.VGQ..pA.RcAu.Gll.l.....c.hI......+p.......tK.......h...A..G.......R...A...l...L...lAGsPG......TGKTAlAhuluQp...LG...scsPFsshsGSEl.aShEhpK....................TE.s.Lh.ps.FR+uI.GlRIKEp.pElh....EGEVsElp..............-p.....shs.........spsh.ppshl..sL.KT..s.chpphhcLssphh-ulpKE+VpsGDVIhI-tsoGtlp+lGR.S...s..h..u..p-..aDh...............tsp.c.aV.hPcG-lpK+KElV.psVTLH-lDV.h.NuRs................Qsh.luhh....pscpsEI.psclRppINp....h..Vscah-pGhAElVPG....VLF.IDEVHMLDIE...CFo..aLNRALES.sh...AP.I.V..Ih.A..oNRGhspI...RG............T.....s............h..pSPHGIPlD.LLDRllI.Ip..T.sYstcElc.pIlp...........IRupp.Eslplsp-.A.lshLs.clGt...coSLRYAlQLlosApl.l............................................................. 0 228 347 506 +5897 PF06069 PerC PerC transcriptional activator Finn RD anon Pfam-B_9117 (release 9.0) Family PerC is a transcriptional activator of EaeA/BfpA expression in enteropathogenic bacteria [1]. 22.50 22.50 22.80 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.02 0.72 -3.95 4 532 2009-01-15 18:05:59 2003-05-28 13:31:51 6 3 323 0 17 287 0 72.20 37 68.03 CHANGED M.l+D+lAcpLEtKGhYRRAA-RWucVMl.lp.oDtcR+hss.+Rs.ClpKup+sPls.s.Nho-l+pAVs+sapcMGlshsscclFRpYpss ...........h.....phApcLEu+GhaRRAAshWhtshtpsc.o-spRp.hhtRRppCLpKuu.......+......shs.......................................s..................... 0 1 1 8 +5898 PF06070 Herpes_UL32 Herpesvirus large structural phosphoprotein UL32 Finn RD anon Pfam-B_9125 (release 9.0) Family The large phosphorylated protein (UL32-like) of herpes viruses is the polypeptide most frequently reactive in immuno-blotting analyses with antisera when compared with other viral proteins [1]. 25.00 25.00 55.60 55.40 19.40 19.00 hmmbuild -o /dev/null HMM SEED 839 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.42 0.70 -13.57 0.70 -6.08 9 59 2009-01-15 18:05:59 2003-05-28 14:01:11 6 2 25 0 0 58 0 674.30 31 98.07 CHANGED Ms.....LsFltLs++sVspLspFLcsL.pcssVDLcpHP+llpcC...stcpLpRcosLaNpLlLWLtYYcpLph+pPDhpsLhc-hchptsslschspthshhp.spuhscLsslspssFts-hpscscll.pshhs...LA+h...tttp.lshG......hsFlNL+sc-spclccNLpssppNMhphpslcls..pptNssLVsslNKLlYLG+lllslppSWpcLpctCLs+Ipphp+pLl+pl+pshuFsusYspNlLc+sV.-GpospslLchLtEDasIacsu...............LEasD................................................................................................................................shpSpp-cc--.sssp..shh.................................................................................s.ptssh.sp-Esssccc.t.tu.tst....sppp..soh..Dlpsuspsst.sss.tsspothh.E...........................Pttustss.s.ph.u.....s.p.Phhuss..ssphsP.p..........s...thshtsshh..........hss.s.hs.s..shstshsshsshchpspsth.sp.p.PsspcsosssophssspR....tsps......p.sspspEshss..pctpsoDspsVhphtph..t..h.......sspssp+ucpphhht.phssstssss......spspPshs.h..sssssshsphhp.s.....h.ss..ph+shtsh.spshss.sosl+P.shssts...sssptlssspsssssttsthPuutus.........................p..p..............st.thpss.utph.....................hpssshscl.tthsthshtssssus..hp...........s.hopthsuss..........................................stsshsthtsspss--sltsllp+hptctpp........................pc .........................Mu.LtFltLp+psVstlhpFLppL.ph.slsLptHP+lltcC...ttppLpRcohLaNchhLWLtYaccLph+pPD...hpslLpchc.ptstlsphs..hshP.hs..thcuhsclslh-cs...atsDhh.cs-l.l.+ul.s...LA+h...ph.p.hspuh.....huFlpL+sp-Vpclt-NlcsstpNhhhhcslclp.shp.ssshV.shN+LlYhG+LhhslppSWppLpchCLsRIpphp+hLltth+.p..uFspsYspNlLc+sV.sGsoA.sLLchL.EDFtlahsu..................LcasD.t.......................................................................................................................hhhpSt.-scs.Dss-..............................................................................................ttssh..t-..Esssptt..........................D.tss.tph...........................................P.hu.tssss..t.ssh.s.sstppshssst..shphpP..................h..ssh...........hss.t.................phphsspsps.sp.p.Psp.pssss.ou.psstt.....shu....tsctsspcpEt......c.s.puoDstpVhphtc..............ssphspp....up.phhhthphsp..Ttss......sppcP.ht.hh.ss.ssshsphhtss..............php.tp.h.sss.ssssohl...shssth...sp.p.l.ssttsps.t.st.Pputsp.........................p..p.s.....p.......ss..hhsp.usph..........................hpsh..shpth...psstts...............s.hotthtut.............................................................................t............................................................................................................. 0 0 0 0 +5899 PF06071 YchF-GTPase_C DUF933; Protein of unknown function (DUF933) Moxon SJ, Studholme DJ anon Pfam-B_10000 (release 9.0) Domain This domain is found at the C terminus of the YchF GTP-binding protein (Swiss:O13998) and is possibly related to the ubiquitin-like and MoaD/ThiS superfamilies. 20.10 20.10 20.70 20.50 19.70 19.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.65 0.72 -4.19 44 5012 2012-10-03 10:59:06 2003-05-28 14:04:36 8 16 4777 7 1296 3379 2253 82.60 63 22.65 CHANGED LtTaFTsGscEVRAWTI+pGspAPpAAGlIHoDFE+GFI+AElluY-Dhlph.s...oEstuKEsGKhRhEGK-YlVpDGDllpF+FN ..........LtTYFTAGsc.EVRAWTI+pGspAPQAA.GlIHTDFE.+GFIRAEslu.Y........-Dhlp.h....s......uEpu..............A..KEAGKhRhEGK-YlVpDGDlh.pFRFN.................. 0 463 849 1106 +5900 PF06072 Herpes_US9 Alphaherpesvirus tegument protein US9 Moxon SJ anon Pfam-B_7164 (release 9.0) Family This family consists of several US9 and related proteins from the Alphaherpesviruses. The function of the US9 protein is unknown although in Bovine herpesvirus 5 Us9 is essential for the anterograde spread of the virus from the olfactory mucosa to the bulb [1]. 22.40 22.40 22.40 44.00 22.20 22.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.01 0.72 -4.14 10 46 2009-01-15 18:05:59 2003-05-28 14:28:17 6 1 28 0 0 46 0 57.90 49 47.07 CHANGED CYYSESDNETAs-FL+RlG++Q.spR+..RRRpChlsluhlhsslllCslSuhLGulluhhl CYYSESDsETAs-FLhRhGRpQ.shR+..RRRRshhsluhlhsslllsslSuhlGulluhhh.. 0 0 0 0 +5901 PF06073 DUF934 Bacterial protein of unknown function (DUF934) Moxon SJ anon Pfam-B_8947 (release 9.0) Family This family consists of several bacterial proteins of unknown function. One of the members of this family Swiss:Q8YEW3 is thought to be an oxidoreductase. 25.00 25.00 25.20 25.10 23.70 23.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.45 0.72 -4.36 75 595 2009-01-15 18:05:59 2003-05-28 15:08:50 7 3 527 0 191 481 128 109.70 38 66.70 CHANGED GVhlss---sc.pL....ts..pLspl..sllAlsFPsFsDGRuaSpApLLRcRhGapGELRAlGDVLhDQlhaMpRsGFDuFslcs....stshpsAhc.shscFosh....YQsuss.....pstsha..pRp .....................uVhlssc--sp...sL....ts......cLspl..slIulpFPsFsDGRuaShAcLLRpRhGapGE..LRAhGDVLhDQlhahpRsGFDuaslcs....cpshc....c....shc....sh....scassh....YQsust....t..s............................. 0 42 107 148 +5902 PF06074 DUF935 Protein of unknown function (DUF935) Moxon SJ anon Pfam-B_10021 (release 9.0) Family This family consists of several bacterial proteins of unknown function as well as the Bacteriophage Mu gp29 protein Swiss:Q9T1W5. 20.30 20.30 22.30 21.70 19.90 20.10 hmmbuild -o /dev/null HMM SEED 516 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.43 0.70 -5.79 54 588 2009-01-15 18:05:59 2003-05-28 15:19:29 7 5 459 0 91 595 77 428.60 21 92.12 CHANGED M.sp...........s.ht.....t.ttphpsph..tthshhtt.h...psupGlo.sschssILcsstpGs.lps.t-Lh..h.t+DsHltuplppRKtAl.....hul-Wplpssss.sstcp+hA-hlc-hlpchs...........hccllh-hhDAlhhGaushEl.hW..tpps...shahspslph+s.caFphsppst..........cl+hpsss.......Gp...L.shpalh+pp+ucss..shttGLh+hlhWsahFKphul+.aspFhEhYGhPhtlGKYss.uAspp-+ss....LLcAltslspsAuulIPcsMplEhhcAus....uusssapphlcas-pplSKAlLGQThT..op....sssuspAtuplHs-VRcDlhpuDsc.lspTlsptLltslhtlNh...tsstphPphph.ppEstDhpt.......hu-shppLssh.Ghchst...pahp-chsls.sptspsshshsstss............stth.........................................stttt.t................pstlDshhsph............hsphpts.hcshlpslhshlppusoh--hhppLh.phh.sphsssphtchlucuhhsAclhGphss ..................................................................................................................................................................................................................................................................................................hh....p...hhpt.tts..........plh.....p...DspltuthtpR+tul.....hshphpl.ssps...........t.tchlpchh..pph...................hp.ch.lhphh.-ulhhGaushEl...a..............thps.........th.hh.ttlhh+s....p...h.F...t...hs..p.s.t...................h.hphp.ss....................G..........l..s...hta.lhth.hp.upts.....t.uLhphhha.hhhKphulc.ahpFhEh..a..G..h..Phh...lGKhss...ss.....spp-hpp..............L...hpu...ltp...h..s.ps..u..u.ul.......lPp.............s..............p.............l..-....hhpsss......sssssa..p..ph.hcat-pplu+slLGpsl..T..op....spsuohAhupsHp-.....Vpp-lhpuDsc.lttslsptll............h.......hhtlNh.........sspthP.ph.htt...p...-s.tD.....hpt............hupth.p.L.s.s.h.G.h.p.lst.....pal....pcph.slststtt.p.thht....s......................................................................................................................tp..hpthhtth...........htt..ht.hht.h......th...ht....t.ts..tth.......t......tl.............t.h..phs.tth.......p.ht.hh..uph.G.................................................................................................................................................. 0 39 71 83 +5903 PF06075 DUF936 Plant protein of unknown function (DUF936) Moxon SJ anon Pfam-B_10047 (release 9.0) Family This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 20.10 20.10 21.60 20.80 19.30 19.20 hmmbuild -o /dev/null HMM SEED 580 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -13.03 0.70 -5.59 13 173 2009-09-11 12:47:55 2003-05-28 15:22:28 7 4 21 0 122 161 0 420.20 24 96.01 CHANGED LsPGVLhKL.Lp..sMsos.lK..ssGEHRSu......LLQVhsIVPA.Luuu...-LaPspG.FalKlSDSoHSsYVSLss.-csDLILSsKlQL.GQFIaV-+...L-suoPVPll+GlRPlP.GR+Ps.VGsPcDLhusc..s.hpt.........................................................shppp+tssssRphshssss+p......................shsuSPssht.h.hshp..................phsptpsuhhhsup..hptsSPsstppsss........t...................Guuph+Kpssshst............stpsps+s.stppssttpsssspsPs+sp............ptth.tthpchshsssspssspospppss...t...................p.hphsstslsWsSLPssLs+LGK........EshcpR-sAthuAlcALcEAoAsEsll+sLptFoELop..suKt-sPtssl-pFLphappltpsphh.hpSlststsssstsp.......................pppsuhp.lpu...AlsTshsshslh...........................................ppp.sshpph..sth.pptppspsstpt..............ptsGls-oscLAppLppEupsWFLcFlEcuL-pG.hctp.............ptpssuclut......hLSQLKpVscWLDplsssppp..................-pl-pLR+KlYthLLpHVposAuulss ....................................................................l.sGlL.+l.lp...t.pss..p......sphRss......lLQVhtllPh.lstt.......p.h...tp.G.FhlplSDu.+uhYsp.s..tpt-hll.ssp.pl.Gphhalp+...l..p....u.PVPhhhsh+sls.tRt...h..hGs..Ptshh.........................................................................................................................................................................................................................................................................................h................t.............................................................................................................................................................................................................................................................t.........slss..l...up.....................thhp.+p.A..huhpu.p-A.hsp.llp.hp.auplp...ssp.tss..hhp.tFhth.t.h.p..h................................................................t......tt.t.....................................................................................................................................................................................................tthtt.hpLt..l..-.ttWFhtalEthL-.t..th......................ttpht.............hhps.pWlp..................................................................s............................................................... 0 14 78 104 +5904 PF06076 Orthopox_F14 Orthopoxvirus F14 protein Moxon SJ anon Pfam-B_10072 (release 9.0) Family This family consists of several short Orthopoxvirus F14 proteins. The function of this protein is unknown. 21.90 21.90 22.40 125.40 21.00 17.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.50 0.72 -3.92 2 33 2009-01-15 18:05:59 2003-05-28 15:26:25 7 1 19 \N 0 20 0 72.90 86 100.00 CHANGED MKH+lYSEGLpISsDhNSIIuQ.ST.DhDIEIDEDsIhELLNILTELGCDVDFDEsFSDIsDDlLESLhEQDh MKHRlYSEGLuISsDLNSIIuQQS....ThDhDIEIDEDDIMELLNILTELGCDVDFDENFSDIADDILESLIEQDl 0 0 0 0 +5906 PF06078 DUF937 Bacterial protein of unknown function (DUF937) Moxon SJ anon Pfam-B_7321 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 24.40 24.40 24.40 24.80 24.30 23.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.90 0.71 -3.94 89 984 2009-09-10 15:56:24 2003-05-28 15:43:14 6 6 888 1 262 664 43 125.00 30 71.04 CHANGED l-plhu....t..htt............................hsthlGh.....................................................ssspsssuh..ssh..lssll.......uuh......ttt.sts..................................sGhsu........................Lhstlppssh......................................ssthtshlu..su.s.sh.............usshlsplhsu....psshsplupp...oGlst..spltphLshhhPhllshLssp .................................................h.........................................................................................................................................................hstl..l.........Guh.....tt........t..ts...................................GGlps........................LlppL.ps.u..G.L....................................Gshl...s...o..Wlu....ptsNpsl...............uscp.Lpssl.G.....ssslssLup.+.........hGlss.spssshLuphLPpllDtLoPp............ 0 53 152 207 +5907 PF06079 Apyrase SHAPY; Apyrase Moxon SJ anon Pfam-B_7593 (release 9.0) Family This family consists of several eukaryotic apyrase proteins (EC:3.6.1.5). The salivary apyrases of blood-feeding arthropods are nucleotide hydrolysing enzymes implicated in the inhibition of host platelet aggregation through the hydrolysis of extracellular adenosine diphosphate. [1]. 20.30 20.30 20.50 35.50 19.70 19.40 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.87 0.70 -5.26 20 238 2009-01-15 18:05:59 2003-05-28 15:48:53 6 4 129 8 104 237 2 249.00 43 77.24 CHANGED LDpsStspcct..soWtShl++GpLphsssppslolcW-pt.pltlpS+huhKGRGMELSELlsFNGKLYolDDRTGllYcIpcs..cllPWVILsDGDGsssK...............GFKuEWsTVK-ppLYVGShGKEWTTosGphhNpNPhWVKsIshcG-VpphNWhspYcplR.sAhGIpsP.GYlIHEussWSsptppWaFLPRRsSpEpYsEptDE+pGsNhllousEsFpclcslcls..shsPs+GFSSFKFlPsTcDplIlALKopE.....csGp.sATYlosFsl-G+lLLsEp+I.sDsKYEGlEFl ................................................DptSt..t.p.tp..poahShl+hG.Lphs.......s...............s.t.plslch-.t...phhlpo.....phu.pGRGMELSELlhFNGKLYohDD+TGllac..l..ct......s.........c.........hlPWVILs-GDGs.s.p.K.....................................GFKuEWh...TVK.--cLYVGuh.....G+EWs.sspGphlspsshWVKhl.shpG.cVpphNWsspYptl+.p.AhGIp.P.GY.hhHEus.WSthhppWhFLPR+sSpp..Ys-t.DEp.h.GsN.hll.ss.t.sFtp.lp.s.pls.....h..st+GFSuF...........KFlPs....opDplIlALKo.E..........tsp..htoalhshsl.p.G.......phlh.-........p.l.ts.KaEGltFh.......... 1 48 57 86 +5908 PF06080 DUF938 Protein of unknown function (DUF938) Moxon SJ anon Pfam-B_8833 (release 9.0) Family This family consists of several hypothetical proteins from both prokaryotes and eukaryotes. The function of this family is unknown. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.24 0.71 -4.78 6 529 2012-10-10 17:06:42 2003-05-28 15:51:31 7 5 465 0 233 728 387 188.80 39 90.05 CHANGED +.aAPAspRN+-PILuVLpphlssssp...lLEIASGTGQHAsaFAsthPshpWQsSD.spphht.SItA....atcptulsNltsPlpLDlots.hsspt.h..........ssslDulhshNhlHISPWsss.GLFsGAGclLsptulLhlYGPYspsGchTusSNtsFDpoLRsRsspWGlRDlc-lhuLAtppGLpLpchlsMPANN+sllFp+ ..................................husAs-RN+pP..ILp......V.L...pp...h..l....sss.sp..........VLEluSGTGQHusaF..At.th..........P......t..l....p.WQ.P.S.D.hstpt....ht...........SItA.............ah..t.p......s.....s.l..s.N.....l.t.s.P.l..t.L...D..ls.ts....a.sht..................................................shDulhsh..N.h.l.HIs..sas.s...s.pu.LFtuA..u............p.hLt.s........s..Gh.L........hl.YGPaphs.GphT.ut.SNttFD.t.pL+p+.s.P.t.h..Gl..RDhc.s.lttLAttpGLt......LtchltMPA..NNhhllap+........................................................................................ 0 62 122 184 +5909 PF06081 DUF939 Bacterial protein of unknown function (DUF939) Moxon SJ anon Pfam-B_8873 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.76 0.71 -4.31 33 2302 2012-10-02 19:04:43 2003-05-28 15:53:42 6 4 1115 0 242 2189 63 142.20 28 47.03 CHANGED h+IGhRTlKTulussLAlhlAphL..sLs.hssAGIlsILslQsTppcSlpsuhpRhhuslluhlhuslhhtlhGa.sslshGlhlllhIslslpL+lp.cGlssusVlllplhsppshsahh.hhschhLlhlGlslAhLlNlaM ...............................plGhRTlKTulAshLu.h....l....l....s.t.h.L.............sl.s.....h....h.h....Au....l.s.A.l.l.u.l....p..s..oh.p..p.S..l.p.hu.hsRlhush...lG..shlu...l....l.....h......h...h.....l............h.......G........p.......s.......h....h.......s....h......u.....l....h...........l.....h......l......h....I.....s...l......s......h.......t.......h.......p........l........p........t...........G........l..........s..........s..........u..........s.....l....h........l....l..........t...........l.....h..........s..........t..........p..........s......t..........s..........h..........h....h......h....hsc.h.lh.hhIGlhh.AhllNhhh.................................................................. 1 87 155 210 +5910 PF06082 DUF940 Bacterial putative lipoprotein (DUF940) Moxon SJ anon Pfam-B_9128 (release 9.0) Family This family consists of hypothetical bacterial proteins several of which are described as putative lipoproteins. 21.10 21.10 25.80 24.90 20.20 20.20 hmmbuild -o /dev/null HMM SEED 658 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.83 0.70 -5.97 38 1147 2009-09-11 20:50:12 2003-05-28 16:06:17 6 5 706 0 120 707 920 573.90 47 92.49 CHANGED sS.ssaGss.GLlpMPoARMts-Gphuhshsts.........sp..........Yp+asholQlhPWlEsohRYopl......psh.as.........s.h..SGsppapD+uhDhKhRLh..cEota.lP.plulGlcDhuGTG............lFsGEYllAoKph...........Gsh-hTlGlGWGhlGspsslsssh......h..spRs.t...htspGGplshspaF+Gssuh.FGGlEYQsPaps.LpLKlEY-ussYpp-hssp.....shppcSs.aNaGshY+hschhclpLuYp+G............sTlshuholptNh.....sshpss.hhsssssshpsp......ssp..............................................................tshpss...........................hpphhpphhtpsGaphtplt.hcss..slslthc..p.s+YcspscAhtRsApllusshPss.lcpa+ll.pptshPhsps..............plct.tthpshtphphh..s.sphtstsphsps...h.sps......thhppth.ca....sauluPhlsQShGsP-shahaslGlpssusYphssshhlsGslthslh..sNaDchph.............................sus.sLPRVRT.hRpYsppssspLspLpLsahtphupshYspshuGYLEpMauGVGuElLY+P.hsusaAlGl-hNal+QRDh...-shhGh.............t-hcs..........................hT.......G...HsosYap.s.....pshhhplssG+YLAsDhGsTlslu+pFcsGlhlGAaAThT...DlSu-EaGEGSF.sKGhalSIPhDhhhscPopspushshtPloRDGGQhLspphpLY-hTcsp ...........................................................SQSDFGGs.G.LLQhPoARMA.EGEh.SlNY+DN...............DQ.................YRaYSsSVpLFPWLEsTlRY..T..DV...RT+pYS........phEsF...SGDQoYKDKuFDhKLRLW...EE...u...YW...lP.QVAlGtRD.I...uGTG.....................LFD.uE....YlVASKth...........GPF.DF.T..LGhuW.GYhGsuGN.lpNPhCp.....hSD+aC.Rs............pp.AGslsh....SphF+GP.A.Sl.FG...GlEYQ.....T....PWpP...LRLKLEY-GNsYppDFA.G.........+L.QtS.+..FNVGAlYR.s.s.sW.ADlNL.SYERG............N.ThMFGhTL...RTNF.....NDLRPs..hpDss+PtYpPt.....Ppsth.......................................................................phos......VAsQLThLKYNAG..htsPpIQ..h+s...c.....TLYhoGpQ..hKY.RDo+Eul.RA.NRI.lhNsLPp..........G.l...........cpIplT.pp.R..s.hs.VTT.....................ETDV.ASL+p...pLsGps....t...........hp.tp.p.h.s.......puht...............................pGahIccsRa.....saphsPs..LsQSl.GGPEsFYMaQLGlhuoAchWhTDHLLh..sGulFsNlh..NN.YDKFp...o.........................PtDS.pLP.RVRT+lR-YV..pNDVYlNNLQANYFtcLG...N.GFYGQVYGGYLETMauGsGuElLYRP.lDusWAhGlDsN.YVK.QRDW...csM.M+F..............T.DYSs..........................hT..........G...alTA...Y.WsPs.....hhpsVLhKhSVGQYLAtDKGuTl-luKRFDSGVsVGsaAslo...sl..St--..Y.GE.Gs.F.o..KGhYlSlPhDLho.sPsRsRAsluWoPLTRDGGQ.LuR.KapLYsMTu-........................................................................................... 0 24 69 97 +5911 PF06083 IL17 Interleukin-17 Finn RD anon Pfam-B_9152 (release 9.0) Family IL-17 is a potent proinflammatory cytokine produced by activated memory T cells [1]. The IL-17 family is thought to represent a distinct signaling system that appears to have been highly conserved across vertebrate evolution [1]. 21.00 21.00 21.70 22.70 20.90 20.30 hmmbuild --amino -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.09 0.72 -3.92 20 312 2009-01-15 18:05:59 2003-05-28 16:46:01 6 4 67 10 176 271 0 81.80 39 48.52 CHANGED pRSlS..PWsYpls.......cDssRYPphlsEAcCLCpGClsup.GcEshsh.sSVPlhppllVLRRp............spssstuachchhpVsVGCTC.Vh ......pRSlSPWsY.pls.......pDs.sRaPp.ls-ApCh.CpG..C..l.......s....s......Gp...c.s..tsh..sSlPlhpphhVLRRp.............spssthsachchhtVsVGCTC.V................ 0 39 51 81 +5912 PF06084 Cytomega_TRL10 Cytomegalovirus TRL10 protein Moxon SJ anon Pfam-B_8875 (release 9.0) Family This family consists of several Cytomegalovirus TRL10 proteins. TRL10 represents a structural component of the virus particle and like the other HCMV envelope glycoproteins, is present in a disulfide-linked complex [1]. 20.20 20.20 21.20 260.60 19.20 19.20 hmmbuild --amino -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.96 0.71 -4.41 5 22 2009-09-11 05:08:22 2003-05-28 16:48:09 6 1 6 0 0 21 0 149.10 91 87.75 CHANGED VCAETTVATNCLVKTENTHLTCKCSPN.......sTSsTGNGSKCHAhCKC.RVTEP...ITMLGAYSAWGAGSFVATLIVLLVVFFVIYAREEEKNN.TGTEVDQCLAYRSLTRKKLEQHAAKKQNIYERIPYRPSRQ.KDNSPLIEPTGTDDEEDEDDsV .VCAEsTVsTNCLVKoENTHLTCKCsPN.......soSNTsNGSKCHAMCKC.RVTEP...ITMLGAYSAWGAGSFVATLIVLLVVFFVIYAREEEKNN.TGTEVDQCLAYRSLTRKKLEQHAAKKQNIYERIPYRPSRQ.pDNSPLIEPTGTDDEEDEDDDV 1 0 0 0 +5913 PF06085 Rz1 Lipoprotein Rz1 precursor Moxon SJ anon Pfam-B_8925 (release 9.0) Family This family consists of several bacteria and phage lipoprotein Rz1 precursors. Rz1 is a proline-rich lipoprotein from bacteriophage lambda which is known to have fusogenic properties. Rz1-induced liposome fusion is thought to be mediated primarily by the generation of local perturbation in the bilayer lipid membrane and to a lesser extent by electrostatic forces [1]. This family Rz1 and the Rz protein Rz (Pfam:PF03245) represent a unique example of two genes located in different reading frames in the same nucleotide sequence, which encode different proteins that are both required in the same physiological pathway [2]. 27.90 27.90 28.50 32.30 26.60 27.80 hmmbuild --amino -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.23 0.72 -4.54 7 97 2009-01-15 18:05:59 2003-05-28 17:12:01 6 2 60 0 3 60 0 40.80 57 61.59 CHANGED CsSpPss...C...lKPPsPPAWhMpPssDhpp.LNtIhSsScpp ...........CtSsPPV......C...scPPsPPAWhMpPssDhQp.LNuIISsSEst... 0 2 2 3 +5914 PF06086 Pox_A30L_A26L Orthopoxvirus A26L/A30L protein Moxon SJ anon Pfam-B_8938 (release 9.0) Family This family consists of several Orthopoxvirus A26L and A30L proteins. The Vaccinia A30L gene is regulated by a late promoter and encodes a protein of approximately 9 kDa. It is thought that the A30L protein is needed for vaccinia virus morphogenesis, specifically the association of the dense viroplasm with viral membranes [1]. 21.00 21.00 21.00 22.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.75 0.70 -4.88 3 96 2009-01-15 18:05:59 2003-05-28 17:20:21 7 2 32 0 0 88 0 208.20 60 42.94 CHANGED MANIINLWNGIVPTVQDVNVASITAFKSMIDETWDKKIEANTCISRKHRNIIHEVIRDFMKAYPKMDENRKSPLGAPMQWLTQYYILKNEYYKTMLAYDNGSLNTKFKTLNIYMITNVGQYILYIVFCIISGKNHDGTPYIYDSEITSNDKNLINDRIKYACKQILHGQLTMALRIRNKFMFIGSPMYLWFNVNGSHVYHEIYDGNVGFHNKEIGRLLYA ...........MANIINLWN...GI.VPhVQDVNVASITAFKSMIDETWDKKIEANTCISRKHRNIIHEVIRDFMKAYP.KM.DEN.........+KSP.LGAPMQWLTQYYILKNEYaKTMLAYDN.....G...SL.NT....KFKTL.NI.Y......M..ITNVGQYILYIV.FCIIS.GK.NHDGTPY...I...YD...SE.ITSN.DKNLIN-RIKYACKQILHGQLThALRIRNKFMFIGSPMYLWFNVNGSpVYH-IYDtNsGFHN+EIGRLLYA........................................ 1 0 0 0 +5915 PF06087 Tyr-DNA_phospho Tyrosyl-DNA phosphodiesterase Finn RD anon Pfam-B_8155 (release 9.0) Family Covalent intermediates between topoisomerase I and DNA can become dead-end complexes that lead to cell death. Tyrosyl-DNA phosphodiesterase can hydrolyse the bond between topoisomerase I and DNA [1]. 22.00 22.00 23.00 22.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.78 0.70 -5.86 27 520 2012-10-02 13:01:53 2003-05-29 09:34:23 7 27 273 43 378 517 1 347.80 22 66.87 CHANGED Phplhhoslhshpsp.......tsssslsLc-lLus...plppsh.a....sFhhDlsaLlsphs.phhp...plhhlpG...ppt.phhphttt.....th.NlphltsthstsF...GsHHoKMhlhhac-s....h...+lVl.osNhh.tDWs.....hsQshWhSshl........spsssst.........stscF+p-LhpYLppYt......tsl....hpplccaDFSsl...sstLlsSsPGpap.........ph..p..aGaspLtplLpcpss.s......tpspphsl.lsQsSSIGolsss.t.ahpsphhssLhhss...............................tt.ppppsphpllaPoscpl+sShs.GasuGuslha+hp....p+ppp.hl+..shhp+Wtup............sptRspshPHlKsYh+hs..........pphpslpWhllTSANLSpsAWGs......htptssphplpuaEhGVLh......Ppths..................h.shshpts..t.....................hth.hPac.lPhpsYs ............................................................................................................p..slplp.-llt.....l..sh......sahhDhtalhtth....th..h.............lhh.lh..s.........t...t.t..th..t.......................h..plphh.s.h......h...........ushHoKhhllh....htpt..........h...+lll.ouNhh........DWtp..............hpps..........lahpsh...Phh.............tttt................................p.stFpt...cL.htaL.tht........t.h.................hp.l.p.p.h.Dauts...pshhltSsPGhat.....................................p......hGhhtLtphlpphs............................p.....h..l.......hQh...SSlGs.....h............tthhp..sh..............................................................................hpllaPo.pplpp.Sht..G...h.....u...u.s.s.l...........hp..........p.............t.......hlc.......h........h..t.................................................ttRptsh..s.HhK.....hhhths.............................................t...pph.sWhhls..SuNLSpsAW.Gt................................p.ttsph.lpsaEhGVlh........Pt.ht......................................................................................................................h.hPh..hs............................................................................. 0 116 204 316 +5916 PF06088 TLP-20 Nucleopolyhedrovirus telokin-like protein-20 (TLP20) Moxon SJ anon Pfam-B_7657 (release 9.0) Family This family consists of several Nucleopolyhedrovirus telokin-like protein-20 (TLP20) sequences. The function of this family is unknown but TLP20 is known to shares some antigenic similarities to the smooth muscle protein telokin although the amino acid sequence shows no homologies to telokin [1]. 23.50 23.50 23.60 26.50 22.30 23.40 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.04 0.71 -4.58 21 50 2009-01-15 18:05:59 2003-05-29 09:38:37 6 1 47 1 0 53 0 192.40 31 97.19 CHANGED MAossssTsD.IsVpsplsc-.tccslLsFhlc-EY+LpKhulGAaslpllsop.LssLtpstp..sslssG-YsllaNhsps.spplsslLhslKssslKKusslF+lhhhs................................................ophhhsuss............sppsspsps---s..................sss-shshsts................ssssssK+QKLDcptps ......MAossssTsD.IuVaspl-cE...tscs..lLoFlVc-EYHLKKLuVGAYslsIl-oph.LssLtp.ptp......ssluCGsYllhaNhscp......sstlNsILFNh+.sshLKKusslF+lhahs.............................................................................................tptpsphhtss.s...........sppsspsps---s.......................psss-ssshsts.........................t.stssssK+QKlDcs...................................................................................... 0 0 0 0 +5917 PF06089 Asparaginase_II L-asparaginase II Moxon SJ anon Pfam-B_7673 (release 9.0) Family This family consists of several bacterial L-asparaginase II proteins. L-asparaginase (EC:3.5.1.1) catalyses the hydrolysis of L-asparagine to L-aspartate and ammonium. Rhizobium etli possesses two asparaginases: asparaginase I, which is thermostable and constitutive, and asparaginase II, which is thermolabile, induced by asparagine and repressed by the carbon source [1]. 25.00 25.00 34.20 34.10 24.50 17.90 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.82 0.70 -5.75 87 514 2009-01-15 18:05:59 2003-05-29 09:45:46 7 4 476 0 211 516 948 313.30 37 94.11 CHANGED hlclhRGu...................llEShHpGpsVVsDscGp......llhthGsssps.sasRSuhKPhQALsllpoGAspth.........................................slsscc.LALsCASHsGpshHsctstshLtchGLs-ssL......pCuschPhst.st.thhtsstpPstlppNCSGKHAGhLssspthGhshcsYlpscHPlQptltpslp-lsuhss..sph..uhDGCusPsaAhsLpslApuaAphAsu...t.t...s.t.pttsht+lhcAMttaPphVAGpGRhsTpLMcshsGplluKsGAEGVhssul..........sstGlGlAlKlpDG.usRAttssssplLtplsh..........l....sss.hstltphhtsslhst.pGh.sGpl+ss .................VclhRGs...................hlEShHpGpssVlDs.cGp......llhshGDscps.sasRSuhKPhQALsllco....Gsspth.........................................shssp-LAlsCASHsGE.tHlptstshLtcs..G.....LspssL......pCsschPhpt.sh...thhps.stp...PoslppNCSGKHAGhLAs.stt...........t...........G...........h......s......h...............csYlp.sHPlQ....ptltpsltclsGhsh..sth...GhDGCusPsaAlsLpsLA+uaAphAsu...........t..s........s.t.tttshpRlhcAMtsaPthVAGs.sRhsTtLMctssGtlluKsGAEGVhshul..............sc...G...hGlAlKlpDG..u.sRAttsssstlLpp.Lsh..........h...s.s...t.ttLtpht..p..s.....hhst....tGh.lGplp.s................................................ 0 69 144 187 +5918 PF06090 Ins_P5_2-kin DUF941; Inositol-pentakisphosphate 2-kinase Finn RD anon Pfam-B_9098 (release 9.0) Family This is a family of inositol-pentakisphosphate 2-kinases (EC 2.7.1.158) (also known as inositol 1,3,4,5,6-pentakisphosphate 2-kinase, Ins(1,3,4,5,6)P5 2-kinase) and InsP5 2-kinase).\ This enzyme phosphorylates Ins(1,3,4,5,6)P5 to form Ins(1,2,3,4,5,6)P6 (also known as InsP6 or phytate). InsP6 is involved in many processes such as mRNA export, nonhomologous end-joining, endocytosis and ion channel regulation. 22.70 22.70 22.80 23.30 21.70 21.30 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.34 0.70 -5.39 38 330 2009-09-11 15:15:53 2003-05-29 09:47:06 7 11 270 17 238 346 0 342.40 19 82.54 CHANGED YhuEGsA...Nllhphssss..............................hht+lLRlcKhtts................................ttpppchhcahc..phl+..PLhu....p.lsstplVtls..pphlppl..spplp.........................Rst.R.th...hlssscshuLLlsDlsshst....................................................sslslEIKPKWha...............................p....hpsphCRhCshpthK..................t.tthsptppaCPL...DLh.....usshpclhpAlcs.hsp......spssh+h.htt..........................................................lschLhp...............................................sslLp+LhphQc........................................................ts.pthshpth.shspphtluMTh+DCohhIphp.pt............................................h.hpsplhDLDhK..shpKhppah.ch-pcL ...........................................................................................................................................ahuEGssNllh...thtt..t.......................................................hlLRhhKh..s................................................p..pahp....phhp....hhs..................phlhs..p..lltls......hphlptl..st.lt................................R...+hth..........hs.htpshuhhlsshs..hs...............................................................................................................................................tsshslElK..PKhhh........................................................................................t..t.hptphCRhChh...phhK......................tt..tt...hpp..ppaCPL-Lh.....................uss.hpchhtAlpthhtp.......................spssh+h..tt..............................................................................................hthltphLhp..............................................................................................................................................................................................sslLtcLhp.h.Qt.......................................................................................................................................................................................................t...t.p..ths.h.tt...p..h......pp..hhluhTh+DColhlphp.tt..............................................................................................................................................hpsplhDLDhK..shtph.hah.ph-tp...................................................................................................................... 0 82 128 193 +5920 PF06092 DUF943 Enterobacterial putative membrane protein (DUF943) Moxon SJ anon Pfam-B_7843 (release 9.0) Family This family consists of several hypothetical putative membrane proteins from Escherichia coli, Yersinia pestis and Salmonella typhi. 25.00 25.00 25.10 28.50 23.20 24.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.11 0.71 -4.93 16 442 2009-01-15 18:05:59 2003-05-29 09:58:29 7 2 256 0 34 201 0 148.80 46 96.19 CHANGED MKp+pptllh...ullLhuslh..YhhW.thRPVEI................lsVHpcss......stIlV+sFPlTD+GKIsWWhcNKshLKpKYsIPcss.cGhaolhhWDFG-GYc...........cpupsDhhCF-DhK.opcNCI-K.Nhlhslcphp.ssphhFshssspY.hp.........csGclsK..htp- .................................MKtc.pKllh...sLhLlssl....Y.hW.phRPVpI................lhsapDss.....sshllVcchPhTDpsKINWaLcppsclKspaslPcsu.csaasI...DhGsGap...........EtshpDLhCFsshK.os-NCIsK.NhLMsIpch..cNshha.hh....DtspYQLs.........ppsKIp+shp.s............................ 0 5 16 23 +5921 PF06093 Spt4 Spt4/RpoE2 zinc finger Moxon SJ, Kerrison ND, Finn RD, Bateman A anon Pfam-B_7906 (release 9.0) & COG2093 Domain This family consists of several eukaryotic transcription elongation Spt4 proteins as well as archaebacterial RpoE2 [2]. Three transcription-elongation factors Spt4, Spt5, and Spt6 are conserved among eukaryotes and are essential for transcription via the modulation of chromatin structure. Spt4 and Spt5 are tightly associated in a complex, while the physical association of the Spt4-Spt5 complex with Spt6 is considerably weaker. It has been demonstrated that Spt4, Spt5, and Spt6 play roles in transcription elongation in both yeast and humans including a role in activation by Tat. It is known that Spt4, Spt5, and Spt6 are general transcription-elongation factors, controlling transcription both positively and negatively in important regulatory and developmental roles [1]. RpoE2 is one of 13 subunits in the archaeal RNA polymerase. These proteins contain a C4-type zinc finger, and the structure has been solved in [3]. The structure reveals that Spt4-Spt5 binding is governed by an acid-dipole interaction between Spt5 and Spt4, and the complex binds to and travels along the elongating RNA polymerase. The Spt4-Spt5 complex is likely to be an ancient, core component of the transcription elongation machinery. 25.00 25.00 27.10 30.20 24.40 23.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.85 0.72 -4.10 37 482 2012-10-03 10:42:43 2003-05-29 10:20:38 8 1 446 10 332 435 72 70.90 37 68.11 CHANGED hRAChhCpllpohspF..pGCsNC..hhtht.ts.pplt-sTSpsFcGllulhcPpc.SWlA+a.c.lsphhPGhYAlpVs ...........RAChhCpllp..ohsp....F.....p.....GCsNC..hh.t.....ts.t.....s.csTSssFcGllsl..hcPpc.SWVA+a.p....l....sthhsGhYAlpVp.............. 0 105 189 271 +5922 PF06094 AIG2 AIG2-like family Finn RD anon Pfam-B_9771 (release 9.0) Family AIG2 is an Arabidopsis proteins that exhibit RPS2- and avrRpt2-dependent induction early after infection with Pseudomonas syringae pv maculicola strain ES4326 carrying avrRpt2 [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.73 0.72 -3.52 90 2098 2012-10-02 16:39:48 2003-05-29 10:21:23 7 27 1701 10 779 2120 898 101.30 28 64.81 CHANGED lFVY...GTLpps.psppthlt.........tsphhspsspht.......hhhhthtsYPsll............ssst................................tVpGp.lapl...sp....pt..LptLDphEt.......pss.hYcRtplplt.............hssstt............pAalYhhp ..................................................................lFVY...GoL..ppt...pss.p.phht....................ssph.h.u..p.h..t..hht..........h..h.h..h..s...h.s.t..Y...P..uhls.......................sss........................................................pVpGE..V..Y.p.l.......ss..............sp......LspLDthcs.................psstYpR.p..h.lp.s.........................................hs.......................pAahYlh........................................................................... 0 261 470 649 +5924 PF06096 Baculo_8kDa Baculo_8Kda; Baculoviridae 8.2 KDa protein Finn RD anon Pfam-B_8370 (release 9.0) Family Family of proteins from various Baculoviruses with undetermined function. 25.00 25.00 36.30 35.80 19.70 19.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.29 0.72 -4.09 15 32 2009-01-15 18:05:59 2003-05-29 10:31:12 6 2 30 0 0 25 0 64.50 44 78.56 CHANGED ssYslppFYNssR+PLKsTTLHsGNIspssYEsVhhhR..+LhC+Esh..Puc+-hpFps.+saNKEN ......s.slppFYNssRpsLKsTTLHDGNlstpsYcsVh.sR...+LhCpEsh..suc+-hKhpp.psaNKEN........................... 0 0 0 0 +5925 PF06097 DUF945 Bacterial protein of unknown function (DUF945) Moxon SJ anon Pfam-B_9171 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 22.40 22.40 22.50 24.10 22.30 22.30 hmmbuild -o /dev/null HMM SEED 460 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.37 0.70 -5.71 50 1334 2009-09-11 15:55:27 2003-05-29 11:00:00 6 4 964 0 145 797 18 449.70 33 95.70 CHANGED MKK...hhhGlllulsslhsuusaasGpphEsphpptlsphNt.........sssl..plphhsacRGhFSSpsphplpssssthpt.............shplshssplpHGPhshsp...lpphphhsthh.sp.......sthhp.hhthhtspsPhphssplsasGssssslslsshsh....ptssssls...auGsphphshs.pshpphshpsshsslplsh...........sttplplsslphs.sstptsphs......hhlGspshplcplslssp...............................spphhplpshsht.psphsp........ss...splsuphshslsslphssp.slGssplshplsplDupuhpphhphYpspst..................s...hpptthpplhsslstLLpssPplslssLohcs.spGchshsls..lsLsss..sptttts.......tplhp..shphssphslstshlschht.......................................ph.t..ts.........................t.uppplpthsshhhh.plhs.cssslpsshphssGplsl.NGpch...Pltph ..........................................................MpK....sAsGVIlALull..WsG.GsWYT.GpplE.splpch....lpphNtth+.........hst.psslploYpNacRG....h..FoSphQhhl..p..sut.s..lc..............................sGQpllhspsVcHGPhPlst...Lt+hN.....llPuhAuhpspL.........sNN-hspsLF.ths..K.s.cs.Phphss..Rh.u....a..u.G.ss.so........slslt..PhpY......pptstKlu.....F.usup.hphsuD...pshp.s...l...s.lpGcs.pshhlshs....................................ptsplohpslphD.upuphspht.......hhlG.pp.chplpp.hsIssc...............................Gcchs..l-uhp..hpscoch..sp...........Dt....ctlNspLsYpLs..s..Lph.....p.........s....p.....p....L......GS....Gcl....oLtssplDupA...h+QF.ppYsttspt.h....................tsthups..hhQpt.s....sp.t.......hhstLPhLhcucPslslsPluWKN..upG.E......shNLs..l.ltDP.....upsptss...............sp.hc.....lKoLshcl..plPh.shsoEhhpQhs..............................................................................h..EGh.pt-pAp................................+hAcpQlp.u...h...shGQM..hpLhT..l...p..sN..slshp.L.......pYss..Gplsh.NGQcM...s.E-F................................................................................................... 0 32 63 112 +5926 PF06098 Radial_spoke_3 Radial spoke protein 3 Moxon SJ anon Pfam-B_9453 (release 9.0) Family This family consists of several radial spoke protein 3 (RSP3) sequences. Eukaryotic cilia and flagella present in diverse types of cells perform motile, sensory, and developmental functions in organisms from protists to humans. They are centred by precisely organised, microtubule-based structures, the axonemes. The axoneme consists of two central singlet microtubules, called the central pair, and nine outer doublet microtubules. These structures are well-conserved during evolution. The outer doublet microtubules, each composed of A and B sub-fibres, are connected to each other by nexin links, while the central pair is held at the centre of the axoneme by radial spokes. The radial spokes are T-shaped structures extending from the A-tubule of each outer doublet microtubule to the centre of the axoneme. Radial spoke protein 3 (RSP3), is present at the proximal end of the spoke stalk and helps in anchoring the radial spoke to the outer doublet. It is thought that radial spokes regulate the activity of inner arm dynein through protein phosphorylation and dephosphorylation [1]. 23.80 23.80 23.80 24.10 22.50 23.70 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.73 0.70 -5.41 14 179 2009-01-15 18:05:59 2003-05-29 11:14:06 6 7 117 0 124 177 8 265.80 39 61.68 CHANGED hsY........uNIMaDRRVhRGNTYutp...h.up.-.........pppppuppcuhs+K+t+phhss+.........oPsss-GRcphsVQTE.YLEELpD+l...Es-h-sQTDsaL-RPsoPhaVPtKoGtDstTQI..-.....GELFDFDhEVcPIL-VLVGKTlEQALlEVhcEEELtplRppQcpa-phRsAELsEsQRLEtpE+RppcEKERRhpQc+pthcc-+EspcKlsARsFApsYLu-llssVhssLcspGaahD.lcp-lEpsFhPWLhpEVtppltpp....h.u+slLDplIc-lsp.ph.thhptpp .........................................................................................................htNlMaD+RVhRGsTau.....h.....s..p...p.h.....................ppp.pptp++shs+..Kptpp...h...p.p.........TP.PlcGRpHhslQTE.hY..LE..ElsD+h...Eh-h..-sQTD.sFL.D+PsTPh........FlP.........sKoGhDsuTQI...-.....G-.LFDFD.hEVc.PlLEVLVGKTlEQuLlEVhEEEELssL+tpQptacphRssELsEsQRLEtp.E+Rpp-EKERRhpQphphhcpcpEsppKlsApshAptY.lusLlssVhssLc.spGaFhDslcc..-lEhsFhPWLhp-Vtpphppt....hhuRp....lL.......-.llpplhppp........t................................................ 0 65 76 101 +5927 PF06099 Phenol_hyd_sub Phenol hydroxylase subunit Moxon SJ anon Pfam-B_10062 (release 9.0) Family This family consists of several bacterial phenol hydroxylase subunit proteins which are part of a multicomponent phenol hydroxylase. Some bacteria can utilise phenol or some of its methylated derivatives as their sole source of carbon and energy. The first step in this process is the conversion of phenol into catechol. Catechol is then further metabolised via the meta-cleavage pathway into TCA cycle intermediates [1]. 25.00 25.00 32.80 32.30 17.90 15.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.70 0.72 -4.14 23 83 2009-01-15 18:05:59 2003-05-29 11:20:04 6 1 69 0 24 79 5 58.60 42 70.33 CHANGED pstP..sh-hp.p+YVRVpucpssuFVEF-FuI.GpPELhVELlLPpsAFctFCtsNpVhcLss ......p..s..h-.h.p+aVRVpucpsssFVEF-FuI.GcPELhVELlLPpsAFcpFCptppVhphs........ 0 1 13 22 +5928 PF06100 Strep_67kDa_ant Streptococcal 67 kDa myosin-cross-reactive antigen like family Finn RD anon Pfam-B_9995 (release 9.0) Family Members of this family are thought to have structural features in common with the beta chain of the class II antigens, as well as myosin, and may play an important role in the pathogenesis[1]. 23.00 23.00 23.00 23.00 22.90 22.80 hmmbuild -o /dev/null HMM SEED 500 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.63 0.70 -6.15 8 1128 2012-10-10 17:06:42 2003-05-29 11:20:18 6 6 924 0 207 793 40 447.70 47 85.31 CHANGED -sKSAYlIGoGLAuLAAAsFLIRDGQMsGc+IHILEELslsGGSLDGhphschGYVsRGGREM.ENHFECLWDLFRSIPSLEhsssSVLDEFYWLNK-DPNaSpCRlIcKpGpRlDsDGcFTLopKuhKEIhcLhhTsEEcLsDhpIc-lFS--FFsSNFWlYWpTMFAFEsWHSAhEMRRYlMRFIHHIuGLsDFSALKFTKYNQYESLVLPhlpYLKsHuVsFcYsspVpNIpVDhosuKKlA+pIhhp.+sGcccoIcLT.sDLVFVTNGSITESSTYGssspPAPhTcEL..GGSWpLW+NLApQSs-FG+P-KFCpclPppsWhhSATlTscsc+I.sYIE+ls+RDPhuGKsVTGGIlTlpDSsWLhSaTIsRQPpFKcQscN-lllWlYuLYSDpcGDYIKKPhcECTGpEIspEWLYHLGVPp-cI--LA.pcssNTIPVYMPYIToYFMPRshGDRPhVVPcsSsNLAFIGNFAET....sRDTVFTTEYSVRTAMEAVY ....................p+pAYllGuGlAuLAAAsaLIRDupMtGppIpI..l..E.ch.......s..hs....GGS.h.....D.....G....t...............t...............................s.............p..cGalsRGGREM...-.pHF..EshWDLa+oIPS...LEhs.s.hSVLDEa..hh..l..s....c...c....D...P..s..h.u...p..sR...................ll....c..p..p..G...p...c.....h...s...s..s.uphsLspps.h-lhcLhhss...E...-...cLtshpIp-hF..s.-..-..FFpoNFWhhWpT.MFAFE.WpSAhEh+RYhhRFlaal..sGlschouL+FT+YNQYEShllPhlpaLcspGVpFpass....pVpslp..h..c..h..s...s..s.....p......K......l.....A.p....p...I.hhp....p...s.......G.........p...p..............c...pIpLs.sD..L..VFlTNGSlTEuoohG.Dp.so.......P.....A.....s...............s......p..-h.....uuuWpLW+plAp.....p..s......s..p..FGpP-hFsp.....shscpsW..hShThTs..p...s.p.p.l.h.shI.cplsppcP...hsG...p.hsTGGIlThpDS..sWhhShslsRQPpF+sQs.c.sphssWhYuLascp.Gsal+Kshh-CTGpEIhpEhLYHL.......Gl....s.......p.......p...p......h......cclu..t.......p.tssslPshMPYITuhFMsRshGDRP.VlPcsShNLAFlGpFsEo.....cDsVFTsEYSVRTAM.AVY............................................................ 0 53 115 164 +5929 PF06101 DUF946 Plant protein of unknown function (DUF946) Moxon SJ anon Pfam-B_10066 (release 9.0) Family This family consists of several hypothetical proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 19.80 19.80 20.00 19.80 19.50 19.70 hmmbuild -o /dev/null HMM SEED 536 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.84 0.70 -6.51 15 441 2009-01-15 18:05:59 2003-05-29 11:22:46 6 38 192 0 316 436 14 234.70 18 27.46 CHANGED s.spsFsLPusLPsWPpGpG.FAsGpIsL.GcLEVspl....osFp+VWsstputscstGsTFacPs.ulPcGFasLGaYsQPNs+PLpGaVLVA+Dluus................ssL+pPlDYTLVhsossht.spps.........sGYFWLP.PP-GY+AlGhlVTssspKPsL..DcVRCVRuDLT-pCEspshI..hs...ssu............hslassRPscRGhtupGVtVGTFhCpsts.ssc...sssIuCLKNlcss.LpuMPNlcQIcALIcpYuPhlYFHPcEpYLPSSVsWFFpNGALLa+KGcps...s.PI-ssGSNLPpGGsNDG..pYWlDLPsD-ps.+EpVK+GsLcSuclYVHVKPshGGTFTDIsMWlFhPFNGPAplKluhh.slsLu+IGEHVGDWEHFTLRISNFoGELWplYFSQHSGGpWVDAsDLEFhp........suNKPVVYSS+HGHASFP+PGhaLQGss..GlGIRNDsA+SchhlDooh+YcIVAAEYLGsulV..EPsWLpYMRcWGPplsYsscsElcplhphLPh.hLRhsh-sllpplPhELhGEEGPTGPKEKsNWpGDE ............................................................................................................................h.h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hGpHlGDWEH.hlRh........u................hahSt..H..t...t.Gt..h....h......t..h....p....h.............................+shhasu.tsHu.as..G....................................................................................................................................................................................................................................................................................................................... 2 86 186 267 +5930 PF06102 DUF947 Domain of unknown function (DUF947) Finn RD anon Pfam-B_9959 (release 9.0) Family Family of eukaryotic proteins with unknown function. 23.00 23.00 24.60 24.40 22.50 22.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.33 0.71 -4.43 30 317 2009-01-15 18:05:59 2003-05-29 11:27:36 7 9 285 0 227 311 3 168.00 32 54.68 CHANGED ++psK+uPhEhSSK+.VsphRplhts..........p+s..hhRDPRF-....shuGp.............hs.shh++sYsFL.-DhRppElpclccpLccsK.....ss.......................cE+-cL+pplpshcschcshpp.+cpEpcllp-a++pE+-tlp.pGKp..PaaL..KKS.....-h+Khlhtc..Kacph..............cs+plcK.....hlE++RKKhsuKE+K ..................................+tsKctPhEhSoK+...Vsp..hRplhts............ppp.....hhRDPRFD.........shsGp.............hs..p...h.hcKsYsFL.s-h.RppEhpp...L.+cpL+cp+.........ss...........................................cc+cclcptlpphcsphpspcp.+cpcpclhpch++cc+ct..hc..pGKp..PaaLKKS..........Eh++hhLtp....+acph..............p.stpl-+hl-++RKKpsuK-++........................... 1 78 128 189 +5931 PF06103 DUF948 Bacterial protein of unknown function (DUF948) Moxon SJ anon Pfam-B_10104 (release 9.0) Family This family consists of bacterial sequences several of which are thought to be general stress proteins. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.80 0.72 -4.12 42 1305 2009-01-15 18:05:59 2003-05-29 11:30:03 6 7 1253 0 178 574 36 89.90 36 61.21 CHANGED IuslIhAlAFllLVlalhhsLtpl....pps...lppsspTlpslppplssltppospLLsKsNpLh-DlstKspslsPlhpAluDlupSVpcLNsu ....................lAulIsAlAFllLllall....hsLhpl..................p+s.......lDcsscTlc.......sls.......spV.......sslh...ccoscLLsKsNtLh...-D....lss.....KlsslsslhsAVsslupSVpsLNs.................................... 0 49 111 147 +5933 PF06105 Aph-1 Aph-1 protein Moxon SJ anon Pfam-B_7927 (release 9.0) Family This family consists of several eukaryotic Aph-1 proteins.Gamma-secretase catalyses the intramembrane proteolysis of Notch, beta-amyloid precursor protein, and other substrates as part of a new signaling paradigm and as a key step in the pathogenesis of Alzheimer's disease. It is thought that the presenilin heterodimer comprises the catalytic site and that a highly glycosylated form of nicastrin associates with it. Aph-1 and Pen-2, two membrane proteins genetically linked to gamma-secretase, associate directly with presenilin and nicastrin in the active protease complex. Co-expression of all four proteins leads to marked increases in presenilin heterodimers, full glycosylation of nicastrin, and enhanced gamma-secretase activity [1]. 25.00 25.00 26.90 26.00 20.50 23.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.64 0.70 -5.12 14 233 2009-09-11 12:57:32 2003-05-29 11:43:55 7 3 130 0 130 218 1 207.00 40 91.12 CHANGED TlssFFGCsFlAFGPulALFhhTIApDPlRVIlLlAuuFFWLVSLLlSSllWa.lhhslpsp.........lhFGlhhSVlhQEsFRahaa+LL+...............Ks-cGLpul......scpsphs...................................................................hsp+hlAYVu.GLGaGlhSGsFuhlNlLuDuhGPGTlGlcu...........sSphaFlsSAhhoLshlLLHTFWuVlaFsuC-pppahplshV.....VsoHLhlSsLTLLN..shapsoLlssYhlhllhulhAahssGGsltsh........p+hlssc ....................hhhhaGCshlAFGPshuLFhholAtcPl+.lIhLluuu...FFWLlSLLluSllWh.hhh.lpsp............lhh...uhhhSVhhQEhFRh.haa+ll+.............................Ksp......cGLt.sl.....stp.t.p..s..........................................................................shc.hAYVu.GLGFGlhS.....ulF.shl.NlL.....ucuhG..PGshGl..cu...............ss..aFLsSAhhohsllLLHsFWullhFsuh....-c...pp.....a.........htlhhV..........lhsHLhsShhThl.N......hhtssll..s.ahlhl.huhhAhhhsGushtsh.......................... 0 44 63 93 +5934 PF06106 DUF950 Staphylococcus protein of unknown function (DUF950) Moxon SJ anon Pfam-B_8992 (release 9.0) Family This family consists of several hypothetical proteins from different Staphylococcus species. The function of this family is unknown. 25.00 25.00 42.70 42.20 22.20 18.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.40 0.72 -3.99 3 163 2009-01-15 18:05:59 2003-05-29 11:48:11 6 1 123 1 4 53 0 108.80 74 96.00 CHANGED MTLoQQLKpYITcLFQLs+sETWcCEulEElAEDILPE+YIcsSPLuHKILpTaTYYNDELHElSIYPFLhYhsKQLIAIGYLD.NFDMDFLYLsDT+psIIDERYLLpcGG ...KThTQELKQYIT+LFQLSNNEsWECEALEEAAENI.LPERFINcSPLsHLTLETYTYYNDELHELSIYPFLMYANNQLISlGYLD.HFDMDFLYLTDTpNsIIDERHLL+pGG.... 0 0 0 4 +5935 PF06107 DUF951 Bacterial protein of unknown function (DUF951) Moxon SJ, Eberhardt R anon Pfam-B_8994 (release 9.0) Family This family consists of several short hypothetical bacterial proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 25.20 38.50 21.90 24.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.75 0.72 -4.45 42 1332 2009-01-15 18:05:59 2003-05-29 11:51:50 6 4 1322 0 177 594 6 58.70 54 86.09 CHANGED caplGDIVcMKKsHPCGo.......scWEllRhGADI+IK.ChsCu+hVMlsRtcFEK+lKKllp .........YplsDlVEMKKsHsCGs.......N+WcIlRhGADI+IK.CpsCsH.VMhsRp-F-+KlKKll......... 0 75 122 152 +5936 PF06108 DUF952 Protein of unknown function (DUF952) Moxon SJ anon Pfam-B_8995 (release 9.0) Family This family consists of several hypothetical bacterial and plant proteins of unknown function. 21.60 21.60 23.00 21.80 20.40 20.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.10 0.72 -4.12 102 618 2009-01-15 18:05:59 2003-05-29 11:55:22 7 18 597 3 250 535 577 93.40 34 69.98 CHANGED IsstspWptuptsGpatsuslDht...-GFIHhSTspQ......lt.pTAs+aFuu...pssLlLL......slDsstLus..s..............L+WEs.................uc.uGtLFP........HLYG......sLslsAVhtsps ...................hstppWptApt..s.Gphtsusl-h.....-GFIH.h.S.o.s.p.Q..................................lt.tsA.s+aFsu.........ps-LlLL......tlDsstLss..s..................l+aEs............................uc.suthFP...............HLYG......sLslsAVlts..s........................ 0 78 169 211 +5937 PF06109 HlyE Haemolysin E (HlyE) Moxon SJ anon Pfam-B_9001 (release 9.0) Family This family consists of several enterobacterial haemolysin (HlyE) proteins.Hemolysin E (HlyE) is a novel pore-forming toxin of Escherichia coli, Salmonella typhi, and Shigella flexneri. HlyE is unrelated to the well characterised pore-forming E. coli hemolysins of the RTX family, haemolysin A (HlyA), and the enterohaemolysin encoded by the plasmid borne ehxA gene of E. coli 0157. However, it is evident that expression of HlyE in the absence of the RTX toxins is sufficient to give a hemolytic phenotype in E. coli. HlyE is a protein of 34 kDa that is expressed during anaerobic growth of E. coli. Anaerobic expression is controlled by the transcription factor, FNR, such that, upon ingestion and entry into the anaerobic mammalian intestine, HlyE is produced and may then contribute to the colonisation of the host [1]. 25.00 25.00 26.40 26.20 22.20 21.70 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.91 0.70 -5.41 2 433 2009-01-15 18:05:59 2003-05-29 12:19:47 8 1 323 25 1 123 0 198.90 81 97.36 CHANGED hsADpTVEsVKsAI-TADtALDLYNKhLDQVIPWpTFs-TlKELSRFKpEYSQuASsLVG-IKoLLMsSQD+YFEATQsVYEWCGVsTQLLsAYl.LFsEYsEKKASAQKsILIKVLDDGIhKLpcAQpSLhsSSQSFNsASGKLlALDSQLsNDFsEKSsYFQuQVDKIRKEAYAGAAAGVVutPFGLIISYSIAAGVVEGKLIPtLKpKLKSVpsFFpoLusTVKpANpDIDtAK.KLpsEIusIG-lKTETETTRFaVDYDDLMLp.Lp-uApKhI.oCNEYQKRHGKKs...l.. ..hA-pTVEVVKsAIETADGALDLYNKYLDQVIPWpTFDETIKELSRFKQEYSQAASVLVGsIKsLLMDSQDKYFEATQTVYEWCGVsTQLLuAYIhLFDEYNEKKASAQKDILI+lLDDGlsKLNEAQKSLLsSSQSFNNASGKLLALDSQLTNDFSEKSSa.............................................hpN+LKusQsFFTTLSNTVKQANKDIDAAKLKLTTEIAAIGEIKTETETTRFYVDYDDLMLSLLKtAAKKMINTCNEYQpRHGKKTLhEVP............... 0 1 1 1 +5938 PF06110 DUF953 Eukaryotic protein of unknown function (DUF953) Moxon SJ anon Pfam-B_9087 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 23.10 23.10 23.10 23.50 23.00 22.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.57 0.71 -4.66 6 278 2012-10-03 14:45:55 2003-05-29 12:22:26 6 11 184 2 189 321 7 113.10 28 71.30 CHANGED sspGa-EFpcslcp..........tppsKslashFoGuKD.ssGcSWCPDCVpAEPVIc-uLKc.sst...-shFlhspVG-RshW+DPsssFRpssphKlTulPTLL+asss.p+..Ls-cpstpssLVEhhFsE ........................................................t......th.phhpt.............pp.s.p..s..l..alhFh.usc.D..ssGp.SWCPDCVpAEPs....lpp.s...h.....pp....hsp.......sshhlhsp.V..G.p+.s.h.W.+.c....s..s.NsFRp...phplpslPTLl+a...p..s.............s....t+.........L.tt.ph.t.phlt..................................................... 0 69 105 157 +5940 PF06112 Herpes_capsid Gammaherpesvirus capsid protein Moxon SJ anon Pfam-B_9200 (release 9.0) Family This family consists of several Gammaherpesvirus capsid proteins. The exact function of this family is unknown. 25.00 25.00 25.80 25.30 21.10 20.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.93 0.71 -4.16 8 23 2009-01-15 18:05:59 2003-05-29 12:31:16 6 1 19 0 1 19 0 156.30 31 92.09 CHANGED MuptRl+cPhVQGRLEc-aPsc.LlschssLsQsNMoss-Yshs+RsYLVFLIApapYDpYlcpppGIpRK+H.......LpuL+upt...............pssstpsSuhSussuuuouls..................lSuoSso.uhoSuPuSLsuu....................suhosSsusussossssppKK ..Ms.hRl+cPhlQGRLEpDaPspPLlschpsLsQsNhosspYthspRsYLVFLhAQapY-tYlppptGlpR+cH.............lpshRspt.........................t..hsuuhSussuussusss.su..............lusouho..ohoSussSLtuu....................ootssususutsp....t.................................................................. 0 0 1 1 +5941 PF06113 BRE Brain and reproductive organ-expressed protein (BRE) Moxon SJ anon Pfam-B_9280 (release 9.0) Family This family consists of several eukaryotic brain and reproductive organ-expressed (BRE) proteins. BRE is a putative stress-modulating gene, found able to down-regulate TNF-alpha-induced-NF-kappaB activation upon over expression. A total of six isoforms are produced by alternative splicing predominantly at either end of the gene.Compared to normal cells, immortalised human cell lines uniformly express higher levels of BRE. Peripheral blood monocytes respond to LPS by down-regulating the expression of all the BRE isoforms.It is thought that the function of BRE and its isoforms is to regulate peroxisomal activities [1]. 20.70 20.70 24.70 24.50 18.60 17.40 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.17 0.70 -5.40 2 136 2009-01-15 18:05:59 2003-05-29 12:45:02 7 3 79 0 78 128 0 247.10 45 81.89 CHANGED MSPEVALpRISP.LpPhlsSVVhNG+VGLDuTNCLRlTDLKoGCTSLTPGPsCDRFKLHIPYAGETLKWDIIFNApYPELPPDFIFGEDA-FLP-PStL.pLspWssuNsECLL.lVKELlQQYHpaQCpRLRESSRLhFEYpoLLE-PpYGcNMEIYAGKKNsWTGEFSARFLLKLPVDFSNIPsYLLK...............................................................................................Vlpaps-hhp+lslh................................F ...........................................................................................................................sDRFpLhIPY..sh-.tl+WDlIFsuphPphsPDFIF.G.......-.D......sc.FhP........-......s......s...t......l..t...s.....LspW...........ssssPcsLL..hllpELl...ppY+paQppRl.t..E..s..s..RL...hFEhpTLlpc.th.s...phcl.hs..shpps....ht.s.hl.lphs...lD..hs...p.l.P....h..h....c......p...sttsh..sh...l.s.sa.pssp.ss..ts.hP..cL.hL..SPp.l-.cuLhusssl+lPsas.s..G.hCLh-Yl.PplpphLpppV.p.slpshchRRcaItAhhshaGp.s.lEhDs..hhpKhohLh...tsFs..FLVH.ltlP.....h..FP+pQPslhhQSs.H................................................................ 0 26 38 59 +5942 PF06114 DUF955 Domain of unknown function (DUF955) Finn RD, Yeats C anon ADDA_4938 Family Family of bacterial and viral proteins with undetermined function. A conserved H-E-X-X-H motif is suggestive of a catalytic active site and shows similarity to Pfam:PF01435. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.45 0.71 -4.39 77 5058 2012-10-03 04:41:15 2003-05-29 13:02:17 8 46 2529 3 1113 3932 890 119.90 16 37.22 CHANGED scphGlplhhhs............hsttsttthth.t.......................sthlhlssp.hsttcptasluHELuHhhhpspt........................ht.tttpthEhpspth....AsthLlPpphhhtthtpthp.hp...........................lsptaplohphhthclp ..............................................................................h...........................................hh.hh.........................pthI.h.l.s....p....p....h....s....t..s.....p....p....t..a..sluHEL..u.Hhh.hp...ptt.....................................................t..t..t..p..p...p....t....h..E..h.....p....A....s.tF............AsthL..h..P..p........h..h....t....h....h.........t....h.h....th.................................................ltphh.tlohthh..t...................................................................... 1 408 776 964 +5943 PF06115 DUF956 Domain of unknown function (DUF956) Finn RD anon Pfam-B_9146 (release 9.0) Family Family of bacterial sequences with undetermined function. 25.00 25.00 49.50 49.30 24.70 18.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.50 0.71 -4.49 22 736 2009-01-15 18:05:59 2003-05-29 13:05:51 6 1 702 0 71 320 4 116.70 50 94.62 CHANGED MsQSlNTKV-LsssGTuYhGhu.cYGKIhlGDcuFEF.YN-pNVccaIQIPWpplshVhAsVh..htGK.hIsR.FpIhTc+tGpFhFuSK-otclL+hlRcalss-+lV+u.olhpsI++tF+ ..............MsQSlNppV-LsssuTSahG..lu..chGKhhlGDpuhEF.Ys-pNVccaIQIPWsclstlhAsVh......G+.hIsR.FplhTc.K.GpFhFASKDstplL+hhRcalssD+lV+..o.hhpsIppth................. 0 19 38 53 +5944 PF06116 RinB Transcriptional activator RinB Moxon SJ anon Pfam-B_9294 (release 9.0) Family This family consists of several Staphylococcus aureus bacteriophage RinB proteins and related sequences from their host. The int gene of staphylococcal bacteriophage phi 11 is the only viral gene responsible for the integrative recombination of phi 11. rinA and rinB, are both required to activate expression of the int gene [1]. 21.80 21.80 26.00 25.70 21.70 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.52 0.72 -4.35 6 399 2009-01-15 18:05:59 2003-05-29 13:10:11 7 2 218 0 7 163 0 48.60 80 91.72 CHANGED MIK+ILKIhFhluMYEluKYlTcELhlhLTuNDDVE.sPpDFs..sDHhHLN..th. ...........MIKQILRLLFLLAMYELGKYVTEQVYIMMTANDDVE.APSDa.............lhtt. 0 3 3 7 +5945 PF06117 DUF957 Enterobacterial protein of unknown function (DUF957) Moxon SJ anon Pfam-B_9300 (release 9.0) Family This family consists of several hypothetical proteins from Escherichia coli, Salmonella typhi, Shigella flexneri and Proteus vulgaris. The function of this family is unknown. 25.00 25.00 27.50 27.00 24.30 23.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.07 0.72 -4.43 5 548 2009-09-11 11:38:57 2003-05-29 13:13:47 6 2 255 0 4 177 0 63.60 84 81.52 CHANGED MphLTshoALDVLIuWLpDNIDpGS-IIFDNDEDNTDSAuLLPslE+ARpslRcLcuLu..pl+usR ........MKSLTTETALDILIAWLQDNIDCESGIIFDNDEDKTDSAALLPCIEQAREDlRTLRpLQ..LLcQNR..................... 0 0 0 2 +5947 PF06119 NIDO DUF958; Nidogen-like Yeats C, Myerscough N anon Pfam-B_1159 (release 8.0) Family This is a nidogen-like domain (NIDO) domain and is an extracellular domain found in nidogen and hypothetical proteins of unknown function [1]. 25.00 25.00 25.00 25.60 24.80 24.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.30 0.72 -3.28 27 552 2009-01-15 18:05:59 2003-05-29 13:30:38 9 172 103 0 395 611 5 85.80 32 7.07 CHANGED sNTFQslLs...oDuspoasl.FhYsp..lpWsssp.....t...t...th.ApsGFsuG-s........sphasl.Pusc..tslhsLhps.oNsGhsGhWhF+l.sst ...............NTFQsVLs......oc.us.....t....oasl.F.Ysp..l.....pWsssp.........t..........t.h.s......A....psG.Fss.Gsu..................sphasl.PuSpp......psl......h....s.....ltps....oN...s...u..hs.GhWhF+lst.t............................................... 0 136 197 302 +5948 PF06120 Phage_HK97_TLTM Tail length tape measure protein Moxon SJ anon Pfam-B_10088 (release 9.0) Family This family consists of the tail length tape measure protein from bacteriophage HK97 and related sequences from Escherichia coli O157:H7. 23.80 23.80 23.80 28.00 23.50 23.70 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.95 0.70 -5.08 2 304 2009-01-15 18:05:59 2003-05-29 14:09:55 6 6 199 0 11 409 2 245.40 55 27.07 CHANGED QpsLNSsTuVhshlhsGAhG...LlGGlPGllMLGAGAWYshYQpQEQARpSAhpYAsTIEplRsph+pMS.sphosN.upsp.uLctQNphIppQcpKltplpstl..YpthLAssuso.ssahhN......-AspchAs.sstLAVEptRLpQM.sKppphQpllpslpcptl....ppssE.stshpSLh.M......asRL....Npl..uRQuhAs.P.+.....lPt...sppQppAlEKupRpLELStLpG.sKthtphtasAsDLsLsss......RpthlshtlEohRp.pA..Npsp+KGs ...............................QptLNssTuV...GoRLhoGALG......LVGGlPGLl....ML....G..A.u.AW.YTL..Y...QNQEQARESARQYAhTIDEIspKssuMSLPEhoDNEu+TRtALsEQNRLI-EQtS+l+uLpcK...It..s.Yp...h.sLss.su.hss.s.Ghh..l...s...s...tsVT-sLApATcp.LA...VEQsRL....sQMQp..KupS.IQ-lL..A.GLE-RRV.A....LI...RQQA.AEQN...+..sYQShLl.M..N.GQaT.E.....FNRLLGL.GNELLQQ..RQ.GLVNVPLR..................................LPQAT....LD..DKQQoALspoERE.LAL.SRLKGE.tKERsRLGYAADDLGF.VG.-....sYQpARQpYIsNuL-AWRNNps..NKPKu.................................................................................. 0 0 1 9 +5949 PF06121 DUF959 Domain of Unknown Function (DUF959) Yeats C anon Pfam-B_25471 (release 8.0) Domain This N-terminal domain is not expressed in the 'Short' isoform of Collagen A [1]. 25.00 25.00 27.00 27.00 24.90 24.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.57 0.71 -4.67 3 37 2009-01-15 18:05:59 2003-05-29 14:10:13 9 19 24 0 16 34 0 173.50 48 14.16 CHANGED LLpCpLsuAcA-ssSLSs.p..sWLWhPpT-sS.sAoolucPQuSoPVQSTE....sTTTHVVPRsGpTEpuTTPASSE.PsEl.....lE-GcQpss.GssssTPTVs.shhssAuSPDh..........sEENIAGVGAEILNVAcGIRSFVQLW.EDoVsscS...ApT.VPDTsl.PhVLAoP..lSSsPQsssTTLW.SSuIPSSPuApTTEAGT...LuuPTpLP .........LL.CsLsuApAchhsLs......WLW.sppsss.htsslscPpss.sVQsTt....ssTTHVsPps..G.TE.tTss.uSsc.P.E.......Etupt.........oPo.......sAtSPDh..........pEENIAGVGAKILNVAQGIRSFVQLW...cDoss.scS...ApT.sssoss.PhsLPsP......SSsPQpssTTLh.SpshsSSPsspTTEAGT...LssPT.................................. 0 1 2 7 +5950 PF06122 TraH Conjugative relaxosome accessory transposon protein Moxon SJ, Coggill P anon Pfam-B_10166 (release 9.0) Family The TraH protein is thought to be a relaxosome accessory component, also necessary for transfer but not for H-pilus synthesis within the conjugative transposon [2] [3]. 25.00 25.00 28.50 28.40 24.40 24.30 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.28 0.70 -5.64 41 471 2009-01-15 18:05:59 2003-05-29 14:14:35 6 3 341 0 63 366 37 338.70 34 76.51 CHANGED hsshF.sshsh.uNsTsPuuapuQstGaaoGGulhsRssh..cshpLhslshPshpAGCGGIDhFhGuFSFINu-plVphh+sIsuNAs....uaAFpLALpslsPplspshppLpphsptlNphshsSCphApulVsshh..spt..sss.ppphsps...hu..spsuhhsDahsu.pptstsssppss...............hstsssssttpp.hhhstNlsWpulp+sshh...........tsspphtEhlMSlsGTlIh.......sssussh.hst.hssssshlss......LlsGss...t.......hclapCs...ss.....spCl.......s.pshslstt.............pulpsplpphl..............puIhsKlts.c.....ssLos....scpsFl.ss..oslPlhchlphtssh.thshusp.hpphu-hlAh-lhhphLpph ....................MsphF.sph...tuNsTpPusapuQstGh.hsGGSlhsRspl..ps..hpLlShshPshsAG...CGG.IDhahGSFSFINu-Qlhphl+pIhuNAs....GYhFpLALpshsPchpssh-hLQchspplNphshsSCphAQulVsshh......spt....pss.pppssps...lu.......spsshhuD..asuu.pptss.suuppss......................hspAssp..pccthhhshNlhWpALp+sphh................................tuspcLtEhlMoloGollh.................sssuphs.h.ss.hs.ss..ps...hlps....................hhc.GGs.............................sclYpCs..ss..........spCL.......sssssolspt......................................puLpsplpphL.........................suI.sKhss..c.....ssL.ss....pEKuFl.ss..oslP....lhphl..hss.....ususshh.phs-hIuh-hhhpalp-............................................................................................... 0 18 33 50 +5951 PF06123 CreD Inner membrane protein CreD Moxon SJ anon Pfam-B_10187 (release 9.0) Family This family consists of several bacterial CreD or Cet inner membrane proteins. Dominant mutations of the cet gene of Escherichia coli result in tolerance to colicin E2 and increased amounts of an inner membrane protein with an Mr of 42,000. The cet gene is shown to be in the same operon as the phoM gene, which is required in a phoR background for expression of the structural gene for alkaline phosphatase, phoA. Although the Cet protein is not required for phoA expression, it has been suggested that the Cet protein has an enhancing effect on the transcription of phoA [1]. 19.70 19.70 20.00 19.80 18.10 19.40 hmmbuild -o /dev/null HMM SEED 430 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.13 0.70 -5.94 48 880 2009-01-15 18:05:59 2003-05-29 14:19:36 7 2 798 0 101 509 60 380.70 47 94.10 CHANGED pslhhKhhhluhLhLLLlIPLhhlpslIpERpphpppslscIupShuupQplsGPllslPap.cphpppp....stpthpphpp.....................hhhlLP-pLslsuphpsc.RpRGIYps.VYpupsplpupFshs...t.p.phhstpplh.........hspshLsluloDh+GIpsssplplsG.p.slsh.........pPGs.............thsthsp..Gl+sslsthshpps.........tlsashsLpLpGoppLullPlGcsoplsLpSsWPHPSFsGsaLP..spRplossGFpApWpsothupshsphh..........ttss.sshsttu.............huVshlpPV-tYpps-RAsKYulLFIsLTFhuFFlhEllpphtlHPlQYlLVGLALslFYLLLLSlSEHlGFshAYlluususlhLluhYlsslL+uh+puhsauuhLssLYulLYslLptEDaALLhGSllLFhlLuslMhlTR+lDWYs ...............................................................................s.sLhaKhssLhshhlLLLIPlhhlcplIsERucYRscV.ssIppSoSGsQ+llGPllslPhoEhhpsp-......ppKpsppp+sh....................hhahLPEsLhVcGs.ss.Et.R+hGIYpupVapu-hsl+AcFcls.........ph..p-l.s..t..sslt.........hucPalVluluDsRGIsslps..plsG......p.sL.sl............................EPGs.......................................ultpsspGl......HhP..Lsps.shtpp.............................sLplsh.sLsLsGTuslSlVPsG+sochsLsSNWPHPSFhGsFLP...scRclo..ts....GFpApWpoShh...AsN.hsppa......................sss..pph.sh...shsu......................FoVulhsPsDpYQlo-RAsKYAILhIsLTFhuFFlFEsLosp...RlHPhQYLLVGLuLVh..FYLLLLuLSEHlGFshAallASLhsslh.ulYLpuVL+uh+puhlF...shuLlhL.GlhauL.LpStDsALLlGoslLhlsLuuhMalTRplDWYt.......................................... 0 29 52 73 +5952 PF06124 DUF960 Staphylococcal protein of unknown function (DUF960) Moxon SJ anon Pfam-B_10198 (release 9.0) Family This family consists of several hypothetical proteins from several species of Staphylococcus. The function of this family is unknown. 25.00 25.00 25.80 25.70 24.10 23.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.94 0.72 -3.74 11 633 2009-09-11 15:24:04 2003-05-29 14:22:46 6 1 565 4 34 211 0 93.80 41 90.95 CHANGED sRYAShGlVoSLPs-lIDshWhII....DppLKsVhsL-sllpFpLlNspGplol+FSQcpssshlshDhshhasPha..Pp+VallDpss+ET....IlLPcE .......tRaAShGlsoSLPs-lIDohWhII....Dc.hLKsVhcL-plLpFpL.h..s.s.pG.....p....l...o.h+FSppp.sst..l-....aDasp..a..cspa..Pt+VhVlDpDspET......ILLPEE........ 0 8 15 25 +5953 PF06125 DUF961 Bacterial protein of unknown function (DUF961) Moxon SJ anon Pfam-B_10221 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 26.20 26.00 24.40 23.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.18 0.72 -3.84 12 903 2009-01-15 18:05:59 2003-05-29 14:24:23 6 1 354 1 46 274 8 100.40 42 88.16 CHANGED .hchlVs-hpcTFGsLcFuu.sc.lhtps.sGphs....hpRoYsLhSslQ.uc.I.VolPApAG.KcFs.ptcVcLlNPhlsshuhthhp.G..hssahlcADDlVh ..................hphlV.Dh-pTFGpLcFuu.cc.Vh.ps........tsGssos...hpRoYsLhSssQ.uc.I.VslPAp..ss.K..cF....sYptcVcLlNPhhs.shuhthhp.G.....ssahl+ADDlVh..................................... 0 22 33 35 +5954 PF06126 Herpes_LAMP2 Herpesvirus Latent membrane protein 2 Finn RD anon Pfam-B_9147 (release 9.0) Family Family of Kaposi's sarcoma-associated herpesvirus (HHV8) latent membrane protein. 20.80 20.80 21.20 139.90 19.90 20.70 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.76 0.70 -6.09 2 12 2009-01-15 18:05:59 2003-05-29 14:30:16 6 1 3 0 0 12 0 457.10 70 99.80 CHANGED ph+hahWshWhhALlsChhClolh.ClhT....NoI.hhusl.Vhh.LFsohhNhYsQ...Spp.+a.uS..lG...IluCIsht.WshoTpsolohsClhhhulLSllTAhLuLhtphpssKhpl-pGlLChshhhVLIhsMhlph.NsWpps.hFhPl.hhL.l.FlahFATspssslKLsusV.hICuGllhuhPs.hChoHoChushhuhslSsIalGhTGlhhTh++pWhssp+GlhoFLLLQGGVLsT.ThThtlLhIpppp.sN.cGp.hLLhsChhhLaChasWQSFpKASLssGhLaLhhAWopsGsCVpLVhLhssGhTpGlhohlICl.slhSThQulLVhYLY+Ep+lVuhNsh.ppRh.IYT.cps.Htps.......NHLuppl...PPLPsh.h..sRl.SpsTD......RspsopshsplEhQplpp-..pshsYASILssss..spcsSsp.sQSGhS..pVsssushplD...sshQPsD-lYEEVLFPps ....hhhFFWNLWLWALLVCFWCITLV.CVTT....NSIDTMASLLVMCILFVSAINKYTQAISSNNPKWPSSWHLG...IIACIVLKLWNLSTTNSVTYACLITTAILSLVTAFLoLIKHCTACKLQLEHGILhTSTFAVLhTNMLVHMSNTWQSSWIFFPISFTLSLPFLYAFATVKTGNIKLVSSVSFICAGLVMGYPVSCCKTHTCTATAAGLSLSSIYLGFTGIISTLHKSWAPPKRGILTFLLLQGGVLTTQTLTTELLAITSTT.GNIKGHEILLLVCLIFLWCLYVWQSFNKASLVTGhLHLIAAWSHTGGCVQLVMLLPSGLTRGILTMIICISTLFSTLQGLLVFYLYKEKKVVAVNSYRQRRRRIYTRDQNLHHND.......NHLGNNVISPPPLPPFFRQPVRLPSHVTD......RGRGSQPLNEVELQEVNRDPPNVFGYASILVSGAEESREPSPQPDQSGMSILRVDGGSAFRIDTAQAATQPTDDLYEEVLFPRN............. 0 0 0 0 +5955 PF06127 DUF962 Protein of unknown function (DUF962) Moxon SJ anon Pfam-B_10320 (release 9.0) Family This family consists of several eukaryotic and prokaryotic proteins of unknown function. The yeast protein Swiss:P25338 has been found to be non-essential for cell growth. 24.00 24.00 24.00 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.21 0.72 -4.09 66 1112 2009-01-15 18:05:59 2003-05-29 14:30:38 6 4 665 0 460 999 358 125.90 25 82.98 CHANGED apoFt-FaPFYLspHpssssRtLHalGoslllhhlhhs.lhssp......................................................................h...hhllsh.lsGYuFAWlGHFhaEKN+PATFcaPLaSlhuDahMahphlsG+l ......................................................................................................................................................tshtcahshY....hs.Hps.hNh.tlHhlulshll...h...slhhh....lsph.s................................................................................................................................................h.hhsl.u..lhl...lGa..s...h.Qa..l.....GHh.aEtp+PAh.hcs.l...uLhhs.hhlht.lhh..h.............................................. 0 141 254 370 +5956 PF06128 Shigella_OspC Shigella flexneri OspC protein Moxon SJ anon Pfam-B_10333 (release 9.0) Family This family consists of the Shigella flexneri specific protein OspC. The function of this family is unknown but it is thought that Osp proteins may be involved in post invasion events related to virulence. Since bacterial pathogens adapt to multiple environments during the course of infecting a host, it has been proposed that Shigella evolved a mechanism to take advantage of a unique intracellular cue, which is mediated through MxiE, to express proteins when the organism reaches the eukaryotic cytosol [1]. 23.60 23.60 23.60 35.20 23.50 23.30 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.81 0.70 -4.91 3 72 2009-01-15 18:05:59 2003-05-29 14:34:55 6 1 24 0 2 67 1 245.20 77 66.94 CHANGED VEISNH.QcK+PLN+KHHTVDFGANAYIIDHDSP...YGYMTLTDHFDNAIPPVFYHEHQS.FLDsFsEVscEVSRYVHGuKG+pDVPIFNTKDMKLGlGLHLIDFIRKSKDQuFKEFCYsKNlsP.VuLDRIINFVFQsEYHIPRMlST-NFKKlKLR-ISLEEAVcASNYEEINspVTsKKhAlQALaaSIsNpK-DVALYLLSNFcFT+QDVAShc+.......sLY.DlEYLLScHGASsKVLEYFIN+GLVDVNsKFcKsNSGDTMLDNAlKYcNuEMIchLLKaGAh.D++a .......VEISsH.QcphPLNppHHTVDFGANAYIIDHDSP...YGYMTLTDHFDN......AIPPVFYHEHQS.FLDpFpEVs-EVSRYVHGspGppDVPIFNTKDM+LGlGLaLIDFIRKScDQuF+EFCYsKNlsP.VsLDRIINFVFQ.EYHIPRMlST-NFKKl+lR-ISLE-AlpASNYEEINppVTsKKhAhQALhhSl..sNtKtDhALYlLSpFphT+QDVhphc+.......pLY.DlEYLLStcsushKVLEYFINpGLVDVNp+FpKsNSGDsMLDNAhK.csucMIchLLK.GAh.sp+a............................................. 0 2 2 2 +5957 PF06129 Chordopox_G3 Chordopoxvirus G3 protein Moxon SJ anon Pfam-B_10417 (release 9.0) Family This family consists of several Chordopoxvirus specific G3 proteins. The function of this family is unknown. 22.30 22.30 22.40 22.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.26 0.72 -3.91 12 50 2009-01-15 18:05:59 2003-05-29 14:37:30 7 1 42 0 0 33 2 107.80 52 98.97 CHANGED usLl.lhFFllFLllsYahsahPTNKhpluVpphs.-ttlhKptssshss.hhsThlFscs-phlssplpshYcupputVslhpsscKhsFpLshccDVRsLLPILLLSK .u.sLLYLllFllFlsluYYFoYYPTNKLQhAVhEpscENAII+QRN--l..Po.oL-TsIFTcssolsSupIpLYYNSshG+llhuhN.uKK+TFNLhcDsDIRTLLPILLLSK.. 0 0 0 0 +5958 PF06130 PduL Propanediol utilisation protein PduL Moxon SJ, Bateman A, Finn RD anon Pfam-B_10447 (release 9.0) Domain This family consists of several bacterial propanediol utilisation protein (PduL) sequences. The exact role of this protein in propanediol utilisation is unknown. Sequences containing this domain usually have two tandem copies (Bateman A, pers. obs.). 25.00 25.00 26.90 30.00 19.20 18.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.26 0.72 -4.16 125 1579 2009-01-15 18:05:59 2003-05-29 14:40:30 7 10 646 \N 291 1010 41 75.90 35 69.09 CHANGED phsVhlusRHlHhopcDscthhs.............psGphsshcs...sGs+.shhcsVhlhuss+hpsphclsts-ApuhGlp .........s..slhVupRHIHho.pDsctLhs.............psGphsshch...sGs+.shhcsVhlhss.s+htsphcIsts-ApshGl..... 0 156 224 254 +5959 PF06131 DUF963 Schizosaccharomyces pombe repeat of unknown function (DUF963) Moxon SJ anon Pfam-B_10581 (release 9.0) Repeat This family consists of a series of repeated sequences from one hypothetical protein (Swiss:Q96WV6) found in Schizosaccharomyces pombe. The function of this family is unknown. 21.00 21.00 21.30 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.47 0.72 -4.68 8 150 2009-01-15 18:05:59 2003-05-29 14:45:17 6 5 8 0 146 151 2 35.90 79 70.47 CHANGED ITSSoslNSSTPITSSTslNSSTPIsSSSlLNoSTP .....ITSSoVLNSSTPITSSTVlNoSTPITSSoVLNSSTP....... 0 144 144 146 +5961 PF06133 DUF964 Protein of unknown function (DUF964) Moxon SJ anon Pfam-B_10600 (release 9.0) Family This family consists of several relatively short bacterial and archaeal hypothetical sequences. The function of this family is unknown. 22.10 22.10 22.40 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.31 0.72 -4.03 233 2444 2009-01-15 18:05:59 2003-05-29 14:52:23 6 3 1220 12 316 1032 15 107.70 22 85.69 CHANGED slhDpAppLsctlppo-EapshcpucptlpsspcupplhpcFpphQpph.pphpphGc..s..ccsppchpphtpplptpshlppahpuppplsplls-lsphIspsls-h .........sIhDpAppLsctlpps-phpshcpucptl.psspcsp..plhpcapphQpph..pphp.phGc.hs.....ccstpchpphtpplptpsllppahpsppphspllp-lsphItpsls-................. 0 99 196 262 +5962 PF06134 RhaA L-rhamnose isomerase (RhaA) Moxon SJ anon Pfam-B_10641 (release 9.0) Family This family consists of several bacterial L-rhamnose isomerase proteins (EC:5.3.1.14). 19.60 19.60 19.60 19.80 19.40 19.40 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.41 0.70 -5.57 5 861 2012-10-03 05:58:16 2003-05-29 14:57:12 6 2 844 20 102 524 40 402.70 65 97.99 CHANGED MslKpsYEpAKpcYppWGIDVEEAL+pLKQlPISIHCWQGDDVsGFElscGELSGGIDVTGNYPGKApTPEELRpDLEKALSLIPGKHRVNLHAIYAETDtEsVERDEIEP+HFENWV+WAKcpGLGLDFNPTLFSHPKAcDGLTLAHPDc-IR-FWI-HCIASRKIuEYFGKELGTPuLTNIWIPDGYKDIPSDRLTPRKRLcESLDcIFuEEIDEpYNlDAVESKLFGIGSESYVVGSHEFYMGYALoNcKLCLLDTGHFHPTEsVSNKISSMLLYoDcLALHVSRPVRWDSDHVVlLDDELREIALEIVRNDALD+VpIGLDFFDASINRIAAWTIGTRNMIKALLaALLpPsucLKcLQEEGDYTcRLAlhEEhKTYPFGAIW-hYCEpMsVPVGE-WLcEVKtYEKEVLLKR ..............................................................................................plEQAaElAKpRaAAlGlDVEcAL+pL-+lPVSMHCWQGDDVsGFEN.P-.GuL.T.G..GIQATGNYPGKARNusELRuDLEpAh.............p.LIPGsKRL.NLHAIYL...E.oD...p.s..VsRDpIcPEHFcsWVEWAKtNpL.GL...D...F...NPoCFSHPh.Su.D.GF...TL...SHsDcpIRQFWI-HCKASRRlSAYFGEpLGTPSVMNIWIPDGMKDI..T.VDRLA.P.R.Q.RL.ls.ALDEVlSE.K.lcPAHpIDAVESKLFGIG...AES..YTV..G..S..NEFYM..GY....A....sS....R......p.s...s.L..CLDAGHFHPTEVISDKISAshLYVPpL.L.LHVSRPVRW.DSD..HV..V..L...LDD...ETQAIAsEIV...R.c.....c.L..h...D.........R...V...H...IG..L..D.FF.DASIN...RIAAWVIGTRNMpKALLRALLEPTspLRcLEssGDY.T.u.RL.AL.LEEpKSLPWpAVW-hYCQRpcsPsGu-WL-sVRsYEKclLSpR................................................................................................. 1 34 68 84 +5963 PF06135 DUF965 Bacterial protein of unknown function (DUF965) Moxon SJ anon Pfam-B_10661 (release 9.0) Family This family consists of several hypothetical bacterial proteins. The function of the family is unknown. 25.00 25.00 27.30 39.10 24.40 17.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.63 0.72 -3.93 34 1369 2009-01-15 18:05:59 2003-05-29 15:32:34 7 1 1357 0 173 496 5 78.80 59 90.15 CHANGED hDcTMpFchscpc.pppl+-lLpsVYpALcEKGYNPINQIVGYLLSGDPAYIspap-ARsLIR+lERDEIlEELV+sYLp ................DcTh+Fc.h.c-sp..cp.clp..-sLpsVYpuLpEK.GYNPINQIVGYLLSGDPAYIPRaNsARN.IR+hERDEIlEELV+hYLc.. 0 76 119 147 +5964 PF06136 DUF966 Domain of unknown function (DUF966) Finn RD anon Pfam-B_8637 (release 9.0) Family Family of plant proteins with unknown function. 22.40 22.40 22.70 42.50 20.70 20.10 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.35 0.70 -5.02 15 135 2009-01-15 18:05:59 2003-05-29 15:43:41 8 4 19 0 91 127 0 321.00 29 72.73 CHANGED VsVVYYLoR.NGp..LEHPHFlEVhluSpsG.......LYLR........DVIsRLssLRG+GMAuhYSWSsKRSYK..NGFVWaDLu-.DDlIhPs...sspEYVLKGS...............Ellcss.p....................s..sp.thps......p...spp.p....ssshsshs++pstsh...........................................uShshsEY+lhKsp-.tt.......tt..huuDASTQT--shptpp.h...................................sspscsspls+p-h.SPssos......tuss-oLEsLh+A-uphhpu.phhppppht..................+h+AoulLMQLISCGuhSsKc.........tpuhthspph+sphspu.hsp...tts................ph....tphphE-KEYFSGSLlE..TKppp........thsuLKRSSSYNs-Rusph ....................VtVVYYLsR.sup..LE.HPHahEVshsu..p.s...........LhLR...................................DVhpRLssLRG+GMsshY......SWSsK...RsYK..NGaVWpDLs-.-DlIhPs....pu.s.EYVLKGS...............Elhptsss..................................................................p.t...pp........t.......t.t........tss.ss...p.t.pptts..............................................................................................................................................................ss.s...tchps..cspt..................hstssuTpTccptpt.t.t.........................................................ttptptsplspcph...ssssss.......tsps....pshc.slhpscs...p.hh.ts.p..p..p...t.........t......................+h+sus.lLh.QLIo.CG.uhuspp..........tshh.h....hp.ph..............................................h....ht.th.cp-hFSGullp.pt..t...........s.L+pSsuhs..h................................................................................................................................................................................................................................................................................................................. 0 17 54 74 +5966 PF06138 Chordopox_E11 Chordopoxvirus E11 protein Moxon SJ anon Pfam-B_10685 (release 9.0) Family This family consists of several Chordopoxvirus E11 proteins. The E11 gene of vaccinia virus encodes a 15-kDa polypeptide. Mutations in the E11 gene makes the virus temperature-sensitive due to either the fact that virus infectivity requires a threshold level of active E11 protein or that E11 function is conditionally essential [1]. 21.00 21.00 22.80 51.10 19.20 18.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.54 0.71 -4.05 8 48 2009-01-15 18:05:59 2003-05-29 16:57:42 7 1 34 0 0 33 0 127.00 59 97.10 CHANGED MELVNIFLESDsGRVKLth-.ssptCtpp.tsphh+AlchFlslL+KYIcV-cSTFYLVIKDpDIFYFKhDKGplo.l-NEFaTFscsLhFlc.sa..scITGIpFllTDTMslpIhP+sshtVlupSsNp+aY MELVNIFLETDuGRVKhsIcNs-chCsop.......hIs+FlElLucYI+l-pSpFYLVVKD.KDIFYFKCDRGSISlVsNEFYV.FDEsLLFVc.Da..opVTGVEFIVT-TMPs+IlPKssaAVISsssN+KFY...... 1 0 0 0 +5967 PF06139 BphX BphX-like Finn RD anon Pfam-B_8664 (release 9.0) Family Family of bacterial proteins located in the phenyl dioxygenase (bph) operon. The function of this family is unknown. 24.70 24.70 24.90 177.70 23.50 24.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.80 0.71 -4.58 7 27 2009-01-15 18:05:59 2003-05-29 16:59:10 7 2 26 0 4 18 1 135.70 63 97.89 CHANGED MK..........psRsFLlAlGlFYLhNLlGTLPFtshuLhshMYPGVthpsutPhFpLLpDAWhVVGLQLuAIGlVALWGARDPhRYh.AllPVVIsTElVsGlWDhYSlsWu+.AlhFuLsTLllHslWIsWuLhshRuspppt.......t ....MKpuRlFLIAlGlFYlhNLlGTLPFuohGLFuhMYPGV-LcsGsPlFsLLpDAWAVVGLQLGAIGsVALWGARDPhRYh.AVlPVVIATEVVDGLWDFYSIlWSHEAhWFGLsTLlIHslWIsWuLaAWRAhtpp.sh. 0 1 3 4 +5968 PF06140 Ifi-6-16 Interferon-induced 6-16 family Finn RD anon Pfam-B_9299 (release 9.0) Family \N 25.20 25.20 25.20 25.30 24.80 25.10 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -4.20 27 261 2009-01-15 18:05:59 2003-05-29 17:08:26 8 6 81 1 139 237 0 77.60 48 56.27 CHANGED AslG.Gsh...AVsAsPllLuAlGFTuuGIAAuSlAApMMSuAAlANGGGVAAGSlVAsLQSsGAAG..LSssusslluusGuslGuhl .....................hhG.ush...sV...sus....Ph....s....Lu.....AlGFTuuGIAAuSlAApMMSsuAlA.NGGGVuAGSLVAsLQSlGAsG.....luh.s.u...p.h.hlussGushsh..h...................... 0 52 68 93 +5969 PF06141 Phage_tail_U Phage minor tail protein U Finn RD anon Pfam-B_9209 (release 9.0) Family Tail fibre component U of bacteriophage. 25.00 25.00 26.90 26.50 20.40 19.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.62 0.71 -4.13 8 798 2009-01-15 18:05:59 2003-05-29 17:25:50 6 2 357 23 15 274 3 127.30 64 99.43 CHANGED MpKHopIRpAVLsALcpphusssoaFDG+PuFl-.Ep-LPAVAVaLTDApYTGphlDpDsWQAsLHltVFL+AQAPDSELDhWMEp+IaPALp-VsGLssLIsTMsspGYDYQRDDEMAhWuuADLoYpITYsh ........M.KHT-IRAAVL-ALc.cp.....cs.u......ATh.FDGRPulh.........D....EpDhP.AVAVYLT.-ApY.TGEplDs.DTWpApLHIEVFL.A.Qs.PDSELDtW.......ME..S+I...YPAhs...sIPALS..sL..IsoMls..pGY-YRRDD-huhWuSAD.LTYsITYEM............ 0 0 3 8 +5971 PF06143 Baculo_11_kDa Baculovirus 11 kDa family Finn RD anon Pfam-B_9424 (release 9.0) Family Family of uncharacterised Baculovirus proteins that are all about 11 kDa in size. 22.20 22.20 22.20 22.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.88 0.72 -4.62 16 47 2009-01-15 18:05:59 2003-05-29 17:40:57 6 1 45 0 0 42 0 85.70 38 88.72 CHANGED Mppssuplsst.s...............sSll-tDQLsQlVoRNRoFl+DFlLVICuhlVFVhIlLFllLlhsI.psh-h.ptp+.phppshLuNhDh+ ...............Mp.pstthhs...s...............sSshstDQLpQlV..sRN+oFl+-FlL.VlCuh.llFVhlllFlhLl..hsI.pshEh.....psp+.phppshLtNhDhR...................... 0 0 0 0 +5972 PF06144 DNA_pol3_delta DNA polymerase III, delta subunit Finn RD anon Pfam-B_9452 (release 9.0) Family DNA polymerase III, delta subunit (EC 2.7.7.7) is required for, along with delta' subunit, the assembly of the processivity factor beta(2) onto primed DNA in the DNA polymerase III holoenzyme-catalysed reaction [1]. The delta subunit is also known as HolA. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.62 0.71 -4.75 24 4182 2012-10-05 12:31:09 2003-05-29 17:51:43 8 10 4065 11 910 3675 1752 168.00 20 49.90 CHANGED YllhGp-.hLlpcsppplhptshppshp-hshhph-hsps.hp..lhpphpohshFus++llhlp.s.........psthstp.hppLpphlpphsscslLllhus..KLscchc...hhKhLpp...puphlcsps.....ctpplhpalpphspphslplspcuhphLhthhpsshttltpplp+Ltl ........................................................................llhGs-.hL..hpctt.s...t...lh.pt.....h......h......t....p.....s..h......p............-....h.p.....h..h...p..h...-......h......p...p...s.........h..p.............l....h.....s.......p.s.ps.h...s.h...F....u...s..c......+...llllc.s..........................ppt.spp...t...h....c..t....L.h.p....h..h....p....p.........s.....s....s...sl...L...ll..ht...s....+....lsc.ppc......................hhctLp..p.........pu.h..h..l.p...s......t.p...........ct.p.pl....pa.lp......pc.h....p.p.t....s...l.....p.....l.....s.....s.....s.....A.......h.p.......hL....h......p.......t....h.......s.......s.......s....lhtltpplp+Lt........................................................................................... 1 308 602 775 +5973 PF06145 Corona_NS1 Coronavirus nonstructural protein NS1 Finn RD anon Pfam-B_9242 (release 9.0) Family Bovine coronavirus NS1 encodes a 4.9 kDa protein [1]. 25.00 25.00 29.10 29.00 18.20 16.90 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.90 0.72 -7.33 0.72 -4.64 2 37 2009-09-11 06:11:25 2003-05-29 17:53:54 6 2 34 0 0 20 0 28.80 90 77.53 CHANGED MphKFVFDLLsPDDILHP.NHVp.IIRPI MpTKFVFDLLAPDDILHPSNHVNLI.IRPI.. 0 0 0 0 +5974 PF06146 PsiE Phosphate-starvation-inducible E Finn RD anon Pfam-B_8639 (release 9.0) Family Phosphate-starvation-inducible E (PsiE) expression is under direct positive and negative control by PhoB and cAMP-CRP, respectively [1]. The function of PsiE remains to be determined. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.88 0.72 -3.74 96 1842 2009-01-15 18:05:59 2003-05-29 17:55:37 7 5 1500 0 409 1034 1208 68.20 32 49.00 CHANGED lsslLhlllhlElhphlhtYhcpp.cl..tlphllhhAlhAlsRtlIl.htsthps..h.....htlussllhLuh ..............thlhhFlhFEhluh.l.l.p.Y.hpss...Hh..PlRahlhIuIoAllRhlI.ls.cc.s.shss.................lhhuuulLlLs................................. 0 88 221 323 +5975 PF06147 DUF968 Protein of unknown function (DUF968) Finn RD anon Pfam-B_9463 (release 9.0) Family Family of uncharacterised prophage proteins that are also found in bacteria and eukaryotes. 30.10 30.10 30.20 30.20 29.80 30.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.32 0.71 -4.41 23 1469 2009-09-11 20:49:19 2003-05-29 18:00:05 6 7 701 0 60 1205 78 190.80 45 67.20 CHANGED sspcLp.shsppplssh...slptplphsDs+plohtphph.aAltNDl......tphh...PpsshR+hhph.hphhpuh.....-sclshup.s.so...pphtpclh..................chllshsh-s-sPhsahppsthpp...cpalha.ltspsCslCG....+tssDhHHl....IG+Gps+hthctaDhallsLCRcHHpElHp.sspsF-cKYthph..lhlpcpls+tLsls ...............................................................................s.s-pLp.phsppNhssWhlsllccshshspp+clolsELsW.WA.lpNpl.................sDsL..............PEushR+uL....t...l.t..c.tlpohh....RESDIlPu.c.psA.T...sIl.pp+sK.....................cpVlultVDPESPt........Sahp+.......sKh+.Rh.........ccYTRW..VKoQsCss.C.....G.......KP.AD.......DPHHl...........I.GHG..p.GGMG.T.....Ku.aDlFsL.PLCR....cHHs......E......LHA..s...shuFE..-KaGSQl...LlhRFls+Ahsh.......................................... 0 11 27 47 +5976 PF06148 COG2 COG (conserved oligomeric Golgi) complex component, COG2 Finn RD anon Pfam-B_9559 (release 9.0) Family The COG complex comprises eight proteins COG1-8. The COG complex plays critical roles in Golgi structure and function [1]. The proposed function of the complex is to mediate the initial physical contact between transport vesicles and their membrane targets. A comparable role in tethering vesicles has been suggested for at least six additional large multisubunit complexes, including the exocyst, a complex that mediates trafficking to the plasma membrane. COG2 structure reveals a six-helix bundle with few conserved surface features but a general resemblance to recently determined crystal structures of four different exocyst subunits. These bundles inCOG2 may act as platforms for interaction with other trafficing proteins including SNAREs (soluble N-ethylmaleimide factor attachment protein receptors) and Rabs [2]. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.47 0.71 -4.28 19 316 2012-10-03 17:31:52 2003-05-30 09:15:08 6 10 269 1 222 448 1 134.30 27 23.60 CHANGED LsFscspFht................................................ssFssspFlpp.ppptssL-pLpp-LctY.phLcppllcLlNpDYtD.FVsLSssLhGh-cplppl.................cssLtphpcclpuh+splptthpclpsplpc+pplcpp+....phLchLlphtpsls+lEcll ...............................................tpttFht..................................................ssF.D.s...-pFls..........p...s..........c.....+..ts..sL-s...L...+...s...-...Lcth...h....phLpspll-LINcDYsD.Fls.......LSs...sLsG..h.-.....c....t.....lppl..................ps..sL...t...p...h...+.c..c...l..ts.l..+s.p.....l.ppthptlpptlpcpcplcppc.......thlptllpl.hcplpclEph................................................................................................................................ 0 73 126 185 +5977 PF06149 DUF969 Protein of unknown function (DUF969) Finn RD anon Pfam-B_9723 (release 9.0) Family Family of uncharacterised bacterial membrane proteins. 24.20 24.20 24.30 29.80 24.10 24.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.23 0.70 -5.09 29 934 2009-01-15 18:05:59 2003-05-30 09:23:58 7 2 917 0 119 446 13 214.90 46 92.60 CHANGED hhsLlGlsllllGFhL+aNslLVVhlAullTGLsAGhshhclLsslGpuFlssRhlolhhLl.LPVIGLLERaGL+E+ApshIs+l+uATsGRlLhlYLllRploAAlGL.uLGGHsQhVRPLlAPMAEuAAcsphG....cLscpp+-+l+AhuAAsDNlGhFFGpslFlAhGulLLhpuhLcph.Ghp.l-shplulauIPTAlsAhlIauhRhhhhD+pLt+ ....hlhLlGIsllVlGFhL+hsslLlVhlAGllTuLluGhuhs......clLphlGcsFlssRslslFllh.LPllGLLERaGLK-pAtsLIpKlKuhTsG+lLhlYhhlRplsAAhul.slGGHsQhVRPLls..PMAEuAAc.sphG................cLscc.c-clKAhAAAs-NhGhFFGpslFlAsGulLLlpuhlcph....Gh.c..lp....shplAlhuIPsAlhAllltulphhLhD++Lt+.......... 0 26 59 93 +5978 PF06150 ChaB ChaB Bateman A anon Pfam-B_7743 (release 9.0) Domain This family of proteins contain a conserved 60 residue region. This protein is known as ChaB in E. coli and is found next to ChaA which is a cation transporter protein. ChaB may be regulate ChaA function in some way. 20.60 20.60 22.50 22.20 19.50 18.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.76 0.72 -4.17 56 786 2009-01-15 18:05:59 2003-05-30 09:37:16 7 2 707 1 126 393 17 63.10 55 67.80 CHANGED LPtslp.pLPt+ApcIahcsFspuhcpa..ts...........EpsAt+lAWsAV++cYhK.hssc...W..ls+s ..........LP-oV+pVLPuHAQDIY+EAFNSAW-QY..KDttcRR.scsSREETAHKVAWA.AVK+-YtK..s-DDK...W+KKp............. 1 32 76 101 +5979 PF06151 Trehalose_recp Trehalose receptor Finn RD anon Pfam-B_9846 (release 9.0) Family In Drosophila, taste is perceived by gustatory neurons located in sensilla distributed on several different appendages throughout the body of the animal. This family represents the taste receptor sensitive to trehalose [1,2]. 20.30 20.30 20.40 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.33 0.70 -6.12 8 405 2012-10-01 21:54:26 2003-05-30 09:38:08 8 5 31 0 147 529 0 255.80 24 92.58 CHANGED Mppot..hcK.........................................pahp.soFHcAluPVLhlAQhFulMPVsGlpup.cPccl+FpWpSlphhaoLlhhlhshschuhuhphVhssulshcolssLlFhlssllsalsFlpLARpWPpIhRpWstVEphhhpssYp.ht+tshu++lpllullllssuLsEHhLhhsSuhh.sshphppCc..hs..shpsYhhpppsplFhlhsYosahshhhcahNsshTFlWNFhDIFlMhluhGLutRFpQLspRlcpht+psMspsaWpclRpcalsLscLlchlDcAlSsllLlShuNNlYFICsQlL+SFpshs.shhctlYFWFSLlaLluRThhlhLsASSIsDEu+csLpsLRpVPocuWCsEVpRFuEpltoDpVALSGh+FFhLTR+LlhuMAGTllTYELVLlQhppsscltp....C .........................................................................................................................................................................................................................hhhuphhshhPl.sl......ttl.pF.thhs..hhhsh.......h.h....hh........h.h.h.h....h.h..h.p......shph........p............sh.h.h.hhh..h.hh.hu.pW.s.lhh.htth-...h.......ht.......ththtpp...l.hhhhhh....h.hu.........................................................................................................................................................................htph...h.......................................................................h.......+..h.th..hh..hp..t.ht..h..............................................................................................................h.........h.a......h...h....s..h...hhl.R..........h........h...h...s.u.l.......t..................h.........h........................a....t...........p........h......h........h...h.........hsu.t.a.hp...h........................................................ 0 46 60 120 +5980 PF06152 Phage_min_cap2 Phage minor capsid protein 2 Finn RD anon Pfam-B_9879 (release 9.0) Family Family of related phage minor capsid proteins. 21.30 21.30 21.60 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.08 0.70 -5.47 9 238 2009-01-15 18:05:59 2003-05-30 09:44:04 6 7 205 0 35 224 6 309.60 23 78.56 CHANGED M.....tl..sspph.h.st.hsDlYstLpt-lhsphlcplKsptsh......upshhpWQhpKLsclthLsppslchlu+hoGhspctlpphlcssGhpshpsh-ptlscs.................lppt.........hpshtshl.phLsuYtpQshscL.NhlNpTlLpo......s.tsYpsIIpcT.................sstllsGhKT.ppAlcpsltcahcpGlsu.hlDKuG++WohEuYsRsVl+TTstpsaNchcpc+hc-YGlclshhSpHsuAR.tCuPlQG+ll..ssputsppp.ssK...Y.ul.s....huYGsPuGhhGlNCpHphhPaIsGVsp.....spp.phD.c-stcshphppcQRhhERpI+phKcphhhtcphsDcchhthtpptVRphQstl+thlpss ...........................................................................................................................................................h.phapthp.plh..hhcplht................tp...Wph.phpphthhpp..p.hphlsph.s.stptlpphhp...pthhphhpp..hpp.h...t.........................................httt.....................t......t..pphl...psh....p...ps.hpsh...p.hspsh.tp...............htphYpph.lpcs.................h.hp.h.s.Ghh..oh.ppA.lppsl.hp......hhppGl.s..hhD.p...u.....G.....+...paph-s...Yschslposstpshsphptpthp-h...Gh-..hh.lStHssAR........s........t.........C......u.......hQGclh.........hht.............s.c....a.s.l.s...........hthu...p....ssG.........hhGhNC+Hhhhsah..Glsp...............pp...hs.......c......p......s.....p.............hp...hpppQ..Rh....hERpIRptKcchh....h.tc.th....t.c.......tp................hpthpttlp..thptthpphlpt........................................................ 0 15 29 32 +5981 PF06153 DUF970 Protein of unknown function (DUF970) Finn RD anon Pfam-B_9915 (release 9.0) Family Family of uncharacterised bacterial proteins. 21.10 21.10 21.10 21.10 21.00 20.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.30 0.72 -4.09 11 826 2012-10-01 21:59:08 2003-05-30 09:47:45 6 1 766 3 149 414 5 106.90 52 98.97 CHANGED MKLlIAIVQDpDuscLhcuLs-psFcsTKLAoTGGFLKuGNTThlIGlED-+V-cllolIK-sCpsR-plVsshuPhusssDsYlPaPVEVpVGGATVFVlsVEpFhph ................MKlIlAIVQDpDSscLucpLlcss.a.RATKLATTG....GF....L+uGN........TTF.llGl-D-RVD-lLslIcp.sCpsRcQ..hV.o.s......ss.h...s...s.o..s-...u.a....l..PY.PV.EVpVGGATVFVhPV-tFcph.......................... 0 68 112 135 +5982 PF06154 YagB_YeeU_YfjZ YagB/YeeU/YfjZ family Bateman A anon Pfam-B_7771 (release 9.0) Family This family of proteins includes three proteins from E. coli YagB, YeeU and YfjZ. The function of these proteins is unknown. They are about 120 amino acids in length. 25.00 25.00 27.00 27.00 24.10 23.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.23 0.72 -3.80 10 822 2009-09-10 17:55:17 2003-05-30 09:48:58 6 2 339 6 28 413 9 90.00 81 87.89 CHANGED sssPpWGLpRslTPpFGARLVQEGNRLHYLADRAGlsGpFS-spsp+L-cAFPphlKQLEhMLtSGELsPRpQHCVTLYpsGLTCEADTLGSaGYVYIAIYPTpt .....................PphsARLVQEGN+LHYLADRAGIRGtFSDADAYHLDQAFPLLMKQLELMLTSGELNPRHQHTVTLYAKGLTCEADTLGSCGYVYLAVYPTPc................... 0 5 7 15 +5983 PF06155 DUF971 Protein of unknown function (DUF971) Moxon SJ anon Pfam-B_10230 (release 9.0) Family This family consists of several short bacterial proteins and one sequence (Swiss:Q8RZ62) from Oryza sativa. The function of this family is unknown. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.14 0.72 -3.57 109 1053 2009-09-11 11:45:35 2003-05-30 09:50:28 7 18 818 6 442 927 860 84.50 30 39.78 CHANGED oplplpp.ts.+hLplsasDGp...................p...........hplssEhLRVtSPSAEsp..............GHtsspp..............hh.suKpsVsIpsl-PVGsYAl+lsFsDGH-oGlYoWsYLh ..........................................................................l.hpp..tp.+hLplpass..Gp...................p..hplstchLRshsPuA-sp..............s+ss..spc.....................hh..ss+.psVp..lt..s.l...c...s.l.G.p...YA..l.plsF....s..D...G.Hso.......GlasWsaLh............................................ 0 150 254 350 +5984 PF06156 DUF972 Protein of unknown function (DUF972) Moxon SJ anon Pfam-B_10235 (release 9.0) Family This family consists of several hypothetical bacterial sequences. The function of this family is unknown. 30.00 30.00 30.00 30.30 29.80 28.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.72 0.72 -3.70 43 1035 2009-01-15 18:05:59 2003-05-30 09:54:09 8 1 1029 0 114 334 1 105.50 41 94.11 CHANGED M-Kp-la-plsphEppltphhpplspLKppltpllEENspLclENp+LRc+Lpchpt.........................ppppsppt........hucuh-NLt+LYpEGFHICs..haYGp+R.p-E-.ClFCLs ................................MDK+-lF-slsphcppltphhp-lpplKppltpLlEENssLclENpcLRc+Lscl-t...................................tcs.hc..............htcu+-NLtplYpEGFHlCs...aYGp+R..p-E-ChFClp.................... 0 36 66 90 +5985 PF06157 DUF973 Protein of unknown function (DUF973) Moxon SJ anon Pfam-B_7947 (release 9.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 21.30 21.30 21.40 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.91 0.70 -5.44 8 109 2009-01-15 18:05:59 2003-05-30 09:57:02 6 3 37 0 58 97 7 227.70 21 91.95 CHANGED sElpGlpKLRsGsLahlluslluhIshIlhls......................s.h.hhs.h.shltsulhllIlsll...lslluhl+l+SGFsILs.usu+DlGhGtTGshLlllGhllllIGsl...lslh..................lhuh.llaIGsILslIGtILlGluhY+lGchYsssllKlGGILllIsI............lsFIGaILsYlGLscVhsph...............tPhsshps.usplpQVGhGsL+uNGhAplTlYSphpssIlSApI-GTNhpso..pIsPthLpsGpNsIpIpFs.sssshlsGohYhlsLsls...Nu.slplsllYQP ................................................................h.ult.l+pG.lhhhlh.ll.hlh..hhhh..................................................hh..h...h...hhh.hthl......lhl...ls.h.h.h..h.ppGF..Lt.pht...shthG.....hus.l.hl.luhlhhl..huhl...hsl.......................................h......lh...hlu..llh..hlGhlhl.s.h.shhplGpha....p.pshhphuGIlhhlsh.......................................lshlGhI.lhYh...ulspl..ht................................................................................................o.....tl.p..l...sh...h.s..........s..l..G.N.l.h.h........h..tt.Y.l.l..t...s.....h.h.h............................................ 1 14 23 48 +5987 PF06159 DUF974 Protein of unknown function (DUF974) Finn RD anon Pfam-B_9042 (release 9.0) Family Family of uncharacterised eukaryotic proteins. 25.00 25.00 25.80 26.20 24.00 23.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.76 0.70 -4.92 19 285 2009-01-15 18:05:59 2003-05-30 10:02:37 8 7 233 0 202 302 2 228.30 33 55.68 CHANGED LsLPpuFGslYLGETFushlsssN......................sospsVpsVslcAEhQTsop......................+lsLtsssss..ssst.........................................lpsspslppllpa-lKE.GsHlLsCoVsYpps.........uGc....p+tFRKhapF.s.psPLsV+TKhtshts..........................chhLEAQlENho..psslhL-pVpL.-ssptapssslsh-sshss..ssph.t..........................................sh.LpPs-.scQhlFhlp.cst................sppstthcspsslGpLsIsWRoshG-+GpLpTupLtpp ..................................................LsLPtsFGslalGE..TFushlslpN.......................ssspsl.pslhl.cA-lQTsop......................+lsLs..ss.s.ss.......ssp.........................................Lpssss.lscllca-lK.......EhGsHlL....sssVoYsst..........................sGc..........................phhFRKhapF...s.hpP.L.sV+TKhhshpst....................................................chaLEAQlpNho..sushh..h-.................pVp..L.-ss..a..pssplN.....sptsp...s..p...............................................sh..L..p.Pt-..spQalaplc.ctt...........................htps.hhhcuh.s.sl.G.cLsIsW+o.shGE+GpLpTupLtp.h.............................................................................................. 0 69 106 160 +5988 PF06160 EzrA Septation ring formation regulator, EzrA Finn RD anon Pfam-B_9703 (release 9.0) Family During the bacterial cell cycle, the tubulin-like cell-division protein FtsZ polymerises into a ring structure that establishes the location of the nascent division site. EzrA modulates the frequency and position of FtsZ ring formation [1]. 32.70 32.70 32.80 32.70 32.40 32.60 hmmbuild -o /dev/null HMM SEED 560 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.75 0.70 -6.33 37 1051 2009-09-13 07:39:20 2003-05-30 10:10:28 7 3 1032 0 101 562 3 545.60 34 97.35 CHANGED lllulllllllhhhsshhh+++hhcpIccLEpcKpclhshslp-Elpcl+pLpLsGpopppFccWcpcas-lsssphsclEchlh-AEshscpa+Fh+A+ptlsplcshlsthEpplcpIpptlscLhps-ccNptplpphcctYcpl+KslLspsapaGsuhstlEcpLppl-pcFppFspLspsGDalcAcclLpplccchtpLpphhccIPsllpclpsshPsQLp-LcsGYcchhppsYhhsc.hslcpclpplppplpps.stL.ppL-l-cspppsppIpccIDpLY-lhE+ElpA+phVcpptpplsphlp+spppsppLttEl-+lppsYtLscsElpps+phppclpplppphpphtcplspppssYStlp-phcph.cpLcpIccpQtclpcplpsLcc-EtpA+cplppacpclpplcRhlc+pNLPGlPpsalphhhpsssclcplsccLsph.lNh-plsctLp.spsslcpLcccop-llpsAsLsEpllQYuNRYRspppplppuhpcApplFcp.acYppul-hlupALEplEPGuhcRlpcsY.p ..............................................................................................hlluIllll.l.l..shslth..h.l..R....++.pppp.l-tLE-RKpE.lhsLPls-ElcplKph.p.Lh.Gp.o.pst.FccWppcWs-lospph...uclEppla-AEsh.scpF+Fh+Apptlsphpptls.hEpslppIhpsLs-Lhcp-ccNstclpcsh-hYc-hp+pVhsspcpaGpAhsplEcpLcslpscFspF.sLsssGD.lcAppllsshccchhtLpphh-cIPsLlpchppplPsQLpDLc.t.GY+cLhc.ps..Yphs.c.hcl-pclpplppplcpspttl.ppL.-L-pAptcstplp-cIDslYDlhE+ElcA+chV-pppshlschLp+h+cpNppLtpElpclppsY.hLs-..s-hppl.R..php..s-lpplppsh....pclt.pphpcp.shsYS.lp-pLcpl.cplpsIcccQhcl.......pcpLpplcc-EhpAccplpphpsclcpl+Rhhc+pNLPGlPpsalp.hhhpss...pplccl.ppLppp.INl.cplschlphsssshpsL-ccopcllpsAsLsEpL.lQYuNRYRppcp.....plpcuhscA.clFcp.acYctul-hhupALEplEPGlsp+ltppa............................................................. 0 24 53 77 +5989 PF06161 DUF975 Protein of unknown function (DUF975) Finn RD anon Pfam-B_8494 (release 9.0) Family Family of uncharacterised bacterial proteins. 29.20 29.20 29.30 29.40 29.10 29.10 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.88 0.70 -4.63 12 1830 2009-01-15 18:05:59 2003-05-30 10:32:24 6 8 936 0 225 1227 109 171.80 22 89.60 CHANGED M........oppElKphA+ppL+spWGsslLlhllhhl...........hhhshhlshhtshs.p.h..............llhhllsshlphushhshlcls+p..ppsphppshssF...cpFhphlls.lLhslhhhLaull.hh.h.l.h...................................hhhh..hl.llh.ullhs..htYS.s.alLh-p.c....hGshcAlscStphMKGhKWchFlLpLSFlGW.............................hlLshloh....................Glsh........laLl.........PYhpTsphhFYcsltttp .............................................................................................................h......th+..t...h...p................hh......h........................................................................................................................................................................hh..h..h....h....h..u.........hh..lth...ct....tp...t....h..t.........h...hh......t.h.........hh.h.....l.h..h.h.h.h..h......................................................................................................................l....h.....h....hh.hhhh.....htat.....s.a.lh...hp.t.p.........hs.....phhptShthMpG.+hph.hhL..Lphls..W.................................................hhLs..h..h....sh..................................u.ls.........................................hhl........................PYh.....hs.h.aYttl............................................................. 0 77 151 182 +5990 PF06162 DUF976 Caenorhabditis elegans protein of unknown function (DUF976) Moxon SJ anon Pfam-B_10032 (release 9.0) Family This family consists of several hypothetical Caenorhabditis elegans proteins of unknown function. 24.40 24.40 24.40 24.40 23.30 22.90 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.94 0.71 -4.73 6 29 2012-10-01 19:48:29 2003-05-30 10:42:21 7 3 6 0 27 54 0 151.70 37 68.88 CHANGED MLSMCN+aPhF-ssc..RhVh.phacllTVFDss.puppPSsAVIVa-ELsKus.ssphLsh.KMEpSYtKVDplspchsspph+aAIHLuSHS.KNsIQIhpoAaSsGYTpcDKcGplPEGsKVKCsGsETsh+TpVsCEcVVK-VNEah-ps+pKFG-Lclpshpc .................................................hCp.....................sl.VTuFs..s..s.h.Et..p..sPS.ssV.l......DELhKps....ssphl.h..K..h.phuY-cVsc+l....PEL.h.p.p..p.s.chslHLu.sHslcN..sIhhpppAFusGYsppDh.sG.h.lPE.GN.+..s.....p.s.....s.....sp-.....p...sh+oplcC-pLVccVsE+hsLDGp.K.aGGLpVcpS-............................... 0 12 13 27 +5991 PF06163 DUF977 Bacterial protein of unknown function (DUF977) Moxon SJ anon Pfam-B_10135 (release 9.0) Family This family consists of several hypothetical bacterial proteins from Escherichia coli and Salmonella typhi. The function of this family is unknown. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.73 0.71 -4.39 3 656 2012-10-04 14:01:12 2003-05-30 10:44:53 6 3 360 0 19 337 25 123.10 59 90.83 CHANGED MAKPFTQEEREcIKuRIIGLVRcsGRhTlsQLEstTGAsRaosc+hLR-lLAsGDlYpsG+hGlFsSEQAh+sWppAtcKh..........sD.sLIhp.PDGEIRRYDSpQNI...ICsECRKSEVMQ....RVLAFYQGNFQ ..................MAKsFTpEEREcIKupllELVRpSGRcTlRQL..EsKTGATRahhphLuR-LVASGDVYp.SGh..G.lFPSEQAh+DW..ppARcKh...........sDPsLIhpLPDGEIRRYDR+.NI....ICpECRcSEsMQ....RVLAFYpGsh....................... 0 3 9 16 +5993 PF06165 Glyco_transf_36 Glycosyltransferase family 36 Finn RD anon Pfam-B_9110 (release 9.0) Family The glycosyltransferase family 36 includes cellobiose phosphorylase (EC:2.4.1.20), cellodextrin phosphorylase (EC:2.4.1.49), chitobiose phosphorylase (EC:2.4.1.-). Many members of this family contain two copies of this domain. 20.60 20.60 20.90 20.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.03 0.72 -4.21 116 1154 2010-01-08 16:32:43 2003-05-30 10:48:30 6 29 637 23 350 1064 71 109.20 27 9.38 CHANGED DssGpahYlRD.poGchWSsohpPs....p.sss...tpYp..spaG.uhupFpppp......sslpsplphh.Vsh-c....ssElp+lpLpNpuscsRplplToYsEh.VLusstsDpup.thophhspo ............................................DpsGcalYl..R...Dtp..........o...G.........c...hWSsohpPs...................p..pss....t.pYc.........sca....GhuaopFppp.t......sslpsphphh.Vs.hcc.....ssElpclplpN...pos..csRp.lplhuYsEh.sL.u.s.h.p.s-ssphhhsph............................ 0 112 228 276 +5994 PF06166 DUF979 Protein of unknown function (DUF979) Moxon SJ anon Pfam-B_10323 (release 9.0) Family This family consists of several putative bacterial membrane proteins. The function of this family is unclear. 25.00 25.00 63.00 63.00 21.50 21.30 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -11.96 0.70 -5.46 30 939 2009-01-15 18:05:59 2003-05-30 10:50:52 7 3 918 0 123 458 15 303.60 46 97.51 CHANGED L-hlYhLhGllhlhhAhhohpD+sp..P+RaGoulFWulhulsFlhGs.................hlPshssGslVlshullAuhttlshGphppsssp...ptcptAp+LGN+LFlPALhlPllollsuhh.hspl................uoLlulGlGsllAlllAhhhT+sp.stpslpEucRLl-ulGWAulLPQhLAsLGslFssAGVGcslupllushlPtssthluVlsYslGMALFThIMGNAFAAFsVhTAGIGlPhllsphGGNPAlhuAlGMhuGaCGTLhTPMAANFNlVPAALLEl+D.+.uVIKsQlPsAlsLLllsIhLMYaLsF ...............-hhYhlhGllhlhsAhhshcD+ss..PpRhGouhFWslhulsFlhGs.................hlPs...hshGhlVllhullAhhptVphGphc.phsc...p...ctptpAp+LtNKlFlPALhlsllsllhuhh.hspl.................................sollululusllAhlsuhhh..T..+sp.PppslpEusRhlpplGhuuILPQlLAsLGslFssAGVGcllu+llusllPs-shFluVhsYslGMsLFThIMGNAFAAFsVlTAGlGlPhllth.GuNPAlhuAluMhuGYCGTLhTPMAANFNlVPAALLEh+D.cNGVIKAQhPsALslLlhpIhLMYhLsF................................ 0 26 61 94 +5995 PF06167 Peptidase_M90 DUF980; MtfA; Glucose-regulated metallo-peptidase M90 Finn RD anon Pfam-B_8651 (release 9.0) Family MtfA (earlier known as YeeI) is a transcription factor A that binds Mlc (make large colonies), itself a repressor of glucose and hence a protein important in regulation of the phosphoenolpyruvate:glucose-phosphotransferase (ptsG) system, the major glucose transporter in E.coli. Mlc is a repressor of ptsG, and MtfA is found to bind and inactivate Mlc with high affinity [1]. The membrane-bound protein EIICBGlc encoded by the ptsG gene is the major glucose transporter in Escherichia coli. MtfA is found to be a glucose-regulated peptidase [3], whose activity is regulated by binding to Mlc available in the cytoplasm, which in turn has been released from EIICBGlc during times when no glucose is taken up. A physiologically relevant target for this peptidase is not yet known. 20.90 20.90 21.30 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.39 0.70 -5.00 74 1002 2012-10-04 10:46:43 2003-05-30 10:52:23 7 4 957 2 233 631 88 237.40 45 88.21 CHANGED Mh.......h..............htht+cppltppshs....shWpphlpp.lPhhptLsss-ptcL+phsplFLscKpapGspGlplTDph+lsIAAQACL.lLpls.....hsaYsshppIllYPssFlsppptt.....Dcs..GlVHchcpshsGEuWpp.GPVlLSWpslhtus............st-G....aNVVIHEFAHKLDhhsGs.AsGhPsL....tss.h.......shppWspshppsacphppphptspp...................shlDsYuAssPAEFFAVsoEsFFppPptltppaPplYptLspFY+ .................................................hhhh..................phpcthp..p.pslP......WpcsLs..lPlLssLotpEpt+L.hslAptFLppK+ls.s.....lpGhE.LsshhpspIAhhhCLPlLcLG.....l-Wh-GF+EVLlYPusFVVcccac.....D-hGlVH...stchlpSGpSW.pQ.GPllLsW.DlQcoh............ssuG....aNLlIHEhAHKLDhc..NGc.AsGlP.h.l.........sh.p........-lsuWc+.sLc.s.Ahsslpccl-hss-t.................tusIDsYAAocPAEhFAVlSEYF......FoAP...-LhtscFPuLapphspFYp......................... 1 73 138 190 +5996 PF06168 DUF981 Protein of unknown function (DUF981) Finn RD anon Pfam-B_8691 (release 9.0) Family Family of uncharacterised proteins found in bacteria and archaea. 28.50 28.50 29.00 29.80 28.30 28.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.34 0.71 -4.52 13 68 2009-01-15 18:05:59 2003-05-30 10:55:04 6 1 57 0 29 64 5 187.60 30 95.22 CHANGED Mu.........FIDsLslhLhsLuhshllhAhhhlpshlshc.......cculcsuhhPhh.sLGlhhhloGlauphTW......PLPuSY...NILFhDsahlhGlhlluhululapshc...............LcshuhhuLhlGLhsIhYGssshtasL.Tt-P...............l.AhLuLYhlsGLAulL..ssslhlDphctpt.............lhhllthlhLlluullAhaIGhpAlhuHL..s.sas ..........M...FlDsLslhLhhlshshllhAhhhhtshhsht...............................cpshscshhPhh.slGlhhllhGlauphTW......PLPu...SY....NILFhDsahlhGlslluhululhh.s..hc..........................Lpshuhh..ulhhGLhsIhhGsuhhsauL.TpcP................h.uhLuhahhhGLAulh..ssslhhpp.p.t..............hhhhlhhlhLhluulluhhhGhtuhhuHlst..s....... 0 13 18 24 +5997 PF06169 DUF982 Protein of unknown function (DUF982) Moxon SJ, Eberhardt R anon Pfam-B_10431 (release 9.0) Family This family consists of several hypothetical proteins from Rhizobium meliloti, Rhizobium loti and Agrobacterium tumefaciens. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 22.20 22.20 22.20 25.10 21.30 21.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.49 0.72 -4.24 51 308 2009-01-15 18:05:59 2003-05-30 10:56:52 7 2 50 1 157 327 1 74.20 29 77.43 CHANGED ppPVtlthss.tthp.tlpost-AhchL.pcWPh.p..cG.tapsAl+sCtsAlsGphssppARcAFlsAAccAsl.hh.s .......t.pPVtlthss..s.thp.plpost-AhchL..pcW.Pt..p..cG.tapsAlcsCtsAlsG..ptsspsARcAFlsAAccAslhh..t.......... 0 7 49 64 +5998 PF06170 DUF983 Protein of unknown function (DUF983) Moxon SJ anon Pfam-B_10629 (release 9.0) Family This family consists of several bacterial proteins of unknown function. 21.30 21.30 21.30 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -9.95 0.72 -3.87 62 469 2009-01-15 18:05:59 2003-05-30 10:58:27 7 4 239 0 180 411 137 84.30 38 61.56 CHANGED FcuaLKlsspCssCG.-hsatcusDGPAahsIlllGalllshhlhl-hsapPshWlphsl..ahPlsllhsLhLLpslKGhllulQas .....FcuFLplsspCssCG.Dasatc....u.sDG.PAahlIl..ll.GallVshhl...hlEhsh.s.s.shWlahsl..ahPhsllh.oLhlLpslKGhlluhQa............ 0 50 108 132 +6000 PF06172 Cupin_5 DUF985; Cupin superfamily (DUF985) Finn RD, Bateman A anon Pfam-B_9217 (release 9.0) Family Family of uncharacterised proteins found in bacteria and eukaryotes that belongs to the Cupin superfamily. 25.00 25.00 25.60 25.20 24.60 24.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.87 0.71 -4.27 19 1200 2012-10-10 13:59:34 2003-05-30 11:06:00 6 9 1116 34 350 843 354 132.00 40 81.62 CHANGED ApplIcpLsLpsHP.EGGaa+ETaRussphs.s..............RshsTuIYFLLsp..sshSpaHRl.cu-EhWHaauGssLplhhhsssGphps.....hpLGhDl.ttGpps......QhlVPsGsWhuu....................ps.ssaoLVGCsVAPGFcFcsFELhc ...................................pphIcpLpLtsHP.EGGaa+cTh+ustphst..............................................RuhhTuIYFL.Lps.........ssh............S+..aHRl.su.DElWa....aa.uGs.sLplahl.s......s......-...G..p...h.ps............................hpLGhDl...tpGp.p.......................QhsVPtGshhuu..............................................pssssau...LVuCh........VuPGF-FccFELh............................ 0 101 206 283 +6001 PF06173 DUF986 Protein of unknown function (DUF986) Moxon SJ anon Pfam-B_10711 (release 9.0) Family This family consists of several bacterial putative membrane proteins of unknown function. 22.20 22.20 22.30 22.50 21.40 22.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.99 0.71 -4.39 12 608 2009-01-15 18:05:59 2003-05-30 11:09:38 7 1 599 0 47 234 2 144.70 70 97.43 CHANGED TslhLllhIshhLhYAhYDpFlMsh.hKGKThLpVpLK++s+lDulIFlsLIuILl.YpNhts..pGs.lToaLLssLsLlulYluaIRtP+llFKppGFFauNlFIpYu+I+pMNLSEDGlLVI-L.pp+RLLlplpplcDLEKIhphhsphp ....TDLVLILFIAALLAaAIYDQFIMPR.RNGPTLLuIsLLRRGRlDSVIFVGLIs...ILI.YNNVTs..HGAhlTTWLLSALALMGF.YIFWI..RsPKIIFKQ+GFFFANVWIEYuRIKsMNLSE....DG...VLVMQL.EQRRLLIRVRNIDDLEKIYKLLlss........................ 0 4 14 32 +6002 PF06174 DUF987 Protein of unknown function (DUF987) Finn RD anon Pfam-B_9329 (release 9.0) Family Family of bacterial proteins that are related to the hypothetical protein yeeT. 25.00 25.00 36.80 36.70 24.50 23.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.15 0.72 -4.46 3 583 2009-01-15 18:05:59 2003-05-30 11:12:23 6 2 284 0 11 152 0 65.30 91 89.63 CHANGED M+IIo+tcAMcIaRQHPuSRLFRYCTGKYQWHGSAsHYTGRDVsDIoGVLAVYAERR+DusGPYlc ...............MKIITRGEAMRIHpQ...HPASRL...FPFCTGKYRWHGSAEAYTGREVQDI.PGVLAVFAERRKDSFGPYVR..... 0 4 4 8 +6003 PF06175 MiaE tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE) Moxon SJ anon Pfam-B_10761 (release 9.0) Family This family consists of several bacterial tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE) proteins. The modified nucleoside 2-methylthio-N-6-isopentenyl adenosine (ms2i6A) is present at position 37 (3' of the anticodon) of tRNAs that read codons beginning with U except tRNA(I,V Ser) in Escherichia coli. Salmonella typhimurium 2-methylthio-cis-ribozeatin (ms2io6A) is found in tRNA, probably in the corresponding species that have ms2i6A in E. coli. The miaE gene is absent in E. coli, a finding consistent with the absence of the hydroxylated derivative of ms2i6A in this species [1]. 20.70 20.70 21.20 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.73 0.70 -5.00 6 1063 2012-10-01 21:25:29 2003-05-30 11:15:23 6 3 665 4 298 910 594 146.00 32 95.80 CHANGED phLsPlhpFLpCsTPpsWlEpAht.tsLDllLLDHptCEhKAAtoAh.Llt+Ys..................................................PhspcLlschstLh+EEL+HFcQVhplhctRsIshsslsAuRYApuLhutVRTpEPppLlD+LlVGAaIEARSCERFAtLAPaL.DsELAKFYsSLL+SEARHapDYLpLApphuuc.DlScRlphhutlEAELIpoPDsEFRFHS ...................................L....Ts.tWl..s...tp...hL.DHh.CE.KAutsAh.hhhpa.................................................................tL.t.h..lhpEEh.Ha..Vhthh.t+sh.h.......ts.Ystthht.h..p....p...........p..........hh.hD.Llhuuhl.EARSpERhthls..............h.....-......pl.tpaYhtLh.SEupHat.ah.hA.ph.........t..........................ltt+ht.hh.hEtp....ll....t.......t.......phH........................... 0 81 175 256 +6004 PF06176 WaaY Lipopolysaccharide core biosynthesis protein (WaaY) Moxon SJ anon Pfam-B_10767 (release 9.0) Family This family consists of several bacterial lipopolysaccharide core biosynthesis proteins (WaaY or RfaY). The waaY, waaQ, and waaP genes are located in the central operon of the waa (formerly rfa) locus on the chromosome of Escherichia coli. This locus contains genes whose products are involved in the assembly of the core region of the lipopolysaccharide molecule. WaaY is the enzyme that phosphorylates HepII in this system [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.50 0.70 -5.14 4 512 2012-10-02 22:05:25 2003-05-30 11:21:56 6 5 478 0 34 372 53 198.00 52 81.44 CHANGED MIhpp+I+shsVFhK-NssKYhplhsDFLoYshpslKVFRsIDDTKVhLIDTcYG+hlLKVFuPKsKpsERFhKShlKGDYYEpLhhpTDRVRsEGlpulNDFYLLAERKTLpas+sYIMLIEYIEGVELsDhP-IsE-lKscIppSIcpLHpHGMVSGDPH+GNFIlppstlRIIDLSGK+soApRKAKDRIDLERHYGIKNElKDhGYYhLIY+KKlRphlR+lKGK .................................................................................................................................MI.p.ph.pshphahccss.hY.plhp-hLshphphlKVhRs.I-.DTKV.L..IsT..th.G.hlhKVauP.Kh.Kh.sERF..h.KS....h....l.K.t...DYYc....pL.hhp.TD.RV...Rs.....EGh.pslN.Da.a.L...LAE.+.K....T....L..p...a....s+h..Y.......lMlIE.........YIEGlcL..s-...h...................-......I.......s......-......-.......l....K......s........c.......l......p.....p.S.....I.....c.......c.....L.....HpH.....GM.....VSGDPH+G.N.F.I.l...p..p..s..t.....lR......lID..L......S..GK..+..s..o..t....h..K......A+..D.R.l..sh..E..R..H.h..sItNp.....l....+D.h.GahhlIa+.pK.l+p..h..l+clKsK.............................................................. 1 8 19 25 +6005 PF06177 QueT DUF988; QueT transporter Moxon SJ, Rodionov D, Bateman A anon Pfam-B_10800 (release 9.0) Family This family includes the queT gene encoding a hypothetical integral membrane protein with 5 predicted transmembrane regions. The queT genes in Firmicutes are often preceded by the PreQ1 (7-aminomethyl-7-deazaguanine) riboswitches of two distinct classes [1-2], suggesting involvement of the QueT transporters in uptake of a queuosine biosynthetic intermediate. 21.30 21.30 21.40 21.30 21.00 21.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.86 0.71 -4.24 45 1266 2012-10-03 02:46:00 2003-05-30 11:23:59 6 1 1098 0 152 637 4 152.20 33 91.44 CHANGED slstsAllAAlYllLTlh..lsslSaGslQFRlSEhh.NhLsh...as+.+YIhGlslGshIuNlh.Ss.h....Gll..DllhGshuTLlsshlshhltchhpph.hh.......phhlsslhhslsh.hhIAh.pLshhhp.......l....PF...........hhT....a.holulGEhlsh.llGssllhhlsKplc ..................lsphAll.AAlYllLTlh....lsslS..a.G.shQFRlSEhh..N.hL..sh...ash...KYlhGlslGshluNla....os..h.......Ghl.DllhGshsTllslsls..hhltt...phtt...h..............phhhsulhholsh.h.h..l.Ah...Lsh.hhp...................l......P..F.............ah..o....a....hosulGEhhsh.llGs.lhhhlsK+l................................ 0 69 110 132 +6006 PF06178 KdgM Oligogalacturonate-specific porin protein (KdgM) Moxon SJ anon Pfam-B_10852 (release 9.0) Family This family consists of several bacterial proteins which are homologous to the oligogalacturonate-specific porin protein KdgM (Swiss:Q934G3) from Erwinia chrysanthemi. The phytopathogenic Gram-negative bacteria Erwinia chrysanthemi secretes pectinases, which are able to degrade the pectic polymers of plant cell walls, and uses the degradation products as a carbon source for growth. KdgM is a major outer membrane protein, whose synthesis is strongly induced in the presence of pectic derivatives. KdgM behaves like a voltage-dependent porin that is slightly selective for anions and that exhibits fast block in the presence of trigalacturonate. In contrast to most porins, KdgM seems to be monomeric [1]. 20.40 20.40 20.50 21.20 20.20 19.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.56 0.70 -4.84 7 1133 2012-10-03 17:14:37 2003-05-30 11:29:46 8 1 545 2 73 394 5 209.70 33 93.83 CHANGED lusss.A.shl-hRctYppsoct..cshltlupphspGhGh.l-s......sss.pspt.s-httshNEls.hsh.aKssDphslpPGh.lpstsssSsYpPYL+spYphssshslslRYRapapphous.hcs-pss.ct.chssahsYplhc...paphsap.p.ahcsspap...ussccppaEhssphtY+h.sppWpPYlEls.lshsss....sscR.QsphRVGlpYpF ........................................................................sh.s.A..shl-.Rct....Y....t........sS..ct....cshlt...lu...ph.s.GhGh.l-u.........................sssh.p.sp..+.h..s.-httsssElp.hsYhaKh..s.D.pholpPGh.l.h............c.................ss.Ss..uo.tYt...PYl+lsashs..s..shshulRYRYsa...psh....s.....s...s...s...h............s............u............-.........h...........ss...p....ssa..ch........ssYhsYplss.....cF.sasapsp.h.ahc..s..s.cap...............huNscKa.sa.EhshshpY..+h....s....p...p....apPYhEl.s.....lsppss............sD..cR...Qspa..RlGlpY.F................................................................ 0 3 17 46 +6007 PF06179 Med22 SURF5; Surfeit locus protein 5 subunit 22 of Mediator complex Moxon SJ anon Pfam-B_10889 (release 9.0) Family This family consists of several eukaryotic Surfeit locus protein 5 (SURF5) sequences. The human Surfeit locus has been mapped on chromosome 9q34.1. The locus includes six tightly clustered housekeeping genes (Surf1-6), and the gene organisation is similar in human, mouse and chicken Surfeit locus. The Med22 subunit of Mediator complex is part of the essential core head region [2] [3]. 21.20 21.20 21.20 21.20 20.30 20.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.44 0.72 -4.03 21 283 2009-09-11 07:34:40 2003-05-30 11:36:54 7 11 233 15 196 266 1 107.50 27 61.35 CHANGED LL........p+lcpslpslhspFpcllchAp.............................spssphstssp-shthpscssplVRAs-sLLpLscslKEhhIL................ssh.slscshtppppphctcp.p.sp.ltpLh-chhsslhph ..................................................................p+lcsslpslhpsFppllphAp.............................spssphspspp-saphcsc..ussl...lRAuEsLhpLspclKphhlL................schsslsEs.hpp..pppphctpp..tphpp.l.plhcch..phh......................................... 1 61 104 155 +6008 PF06180 CbiK Cobalt chelatase (CbiK) Moxon SJ anon Pfam-B_10975 (release 9.0) Family This family consists of several bacterial cobalt chelatase (CbiK) proteins (EC:4.99.1.-). 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.52 0.70 -5.30 8 718 2012-10-01 23:23:09 2003-05-30 11:41:05 6 9 587 5 138 730 108 247.40 37 87.61 CHANGED KAILlVoFGToYp-s+chTI-pIE+clscpFPDY-lh+AFTSshI..I+KLKcR-s............lplsTPtpALp+LtcpGYc-VIVQsLHlIPGcEYEclhchV.........pcapssFccl..clGRPlLth.......s-DYcphlcsl+cphs.h..+sEslVaMGHGosHtususYupLsplhcs.psasslaVusVEuYPsl-sVlcclccpshcc.....VpLhPLMlVAGDHApNDMAuD-p-SWKsthcssGhcV.ps......hlcGLGEhcchpsIalcHI+sslp ............................................................pAlLlVSFGToap-opctsIsuhEcclttsa.P-........hclhcAFTSphI...lc+L.cp+cs...............l.p...l.ssPhpALpcLtppGYpc..VhlQsLHlIsGtE.Y-.c.lh.p.p.V......................p.t...h.....c...s..t.....F.p......c.l....plGp.PL.Ls................p.tp.Da.........pplhpA.l................p.........p....p.........hs.....................h............t...p..........scslVhMGHGo.....p...H...........A...........u..s....Y....u.s.L...c...p.hhps......t.s.h...............p...sa...l....GsV..........E..u...a.P..p...l.....-.p.ll...p.pL..+....p....p......s..l..c.p..................VpLhPhMlVAGDHApND......M............A.........u....-...........-..........t.D..................SWK..st..h..ptt.G..h..p.s...p.s....................hL.p.G.LGE.sslpphalpHlcpAh.t................................................................ 0 71 113 129 +6009 PF06181 DUF989 Protein of unknown function (DUF989) Moxon SJ anon Pfam-B_11062 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. The haem-binding domain towards the C-terminus has been merged into Cytochrome_C, Pfam:PF00034. 22.40 22.40 24.10 23.10 22.30 22.30 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.09 0.70 -5.34 60 485 2009-01-15 18:05:59 2003-05-30 14:07:51 6 4 442 0 164 459 1793 262.90 44 72.00 CHANGED Mtu....al...h-WlsLhlRWlHVIsuIAWIGuSFYFlhLDtuLpcsss....hpcGlpGEhWtVHGGGFYHlpKYhlAPspMP.Ec.LHWFKWEuYsTWLSGhALLsllYahsAplYLI...DPs.....hh-L.oshtAlululusLslGWllYDhLC+..SsLGc.......psslLullLallllssuauhsplFoGRAAaLHlGAhhuTIMsuNVFhlIhPsQRtlVAslcuGcsPDPp...hGtpAK.RSsHNNYhTLPVlFlMlSNHYPhsauspaNW.lIlullhlhGshIRHaFNt+Hss.....p.ppshashsusshlhslhhhluss ...............................uhhh-Wlshhl.RWlHVlsuIAWIGuSFYFlhLD..u.Lpp.ss....h.pGs.G-.WtVHGGGFYphpKYhlAPspMP..-cLpWFKWEuYsTWlSGhsLLhllYhhssshYLlD.s.....h.h.sh....u.sh..Altlulu.LshGWllYshlCc........LGp...........psthLhlll.hhhllshuashs.p.....l..FoG.R.AA..a..LtlGAhhuTIMsuNVFhlIhPsQ+tlVu....s...lhs.....GcpPDPt......hG...ctuK.R...ShHNsYhTLPVlFhM.....l.S.N.HYs.hsauspaNW.l.lhsllhlhGshIRpaFsh+..Htt.....p.t..hhshhs.sshhhhhh.hhht.s............................................. 0 34 89 125 +6010 PF06182 ABC2_membrane_6 DUF990; ABC-2 family transporter protein Moxon SJ, Eberhardt R anon Pfam-B_11079 (release 9.0) Family This family acts as the transmembrane domain (TMD) of ABC transporters [1,2]. The family includes proteins responsible for the transport of herbicides [1]. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.43 0.70 -4.81 12 2005 2012-10-03 10:13:34 2003-05-30 14:10:04 6 3 813 0 438 1383 277 223.40 23 87.05 CHANGED llhhssshhhhhllapp..sssluGWsht-hlhlhGh....hhlhpslhshhhtsshtclscpVccGshDhhLLKPlshhh.lhhcphs.htlhcl.......lhGhhLlsYsshhlslshTshpllhhllsllsGslIhhulhhhhushuFWhhcs.thh.tlhhul...hshupaPhshasthl+hhhsFllPhshhshhPAphhLG+hsh....hhhlhs.hlhullhhslothhWptGlcpYpSsG ......................................................h....hhshhhhtslapp......hs.lp....G......aohpphhhhhhh.........hlsph.l.tp.h.h.h...s...h.h.hhsc..l.+....cGplshhLlRPls........h....hphl...h....pp...h....s.........p...h....l.h.h.l..............hlu..h..l..l.l..h..h..h..h..........t.......h.......s...l..s......h.....s..h...h...p....h.......l..l....a.l....l..sl...hhu..h.l.l.hhtl...plhhu..........hhu.Fah.pp...h...t...u...h...h...h...l...h...hsh...hs.h.uh...h..Plsha.....s.....p....h....lp..hl..l..sa...lPFuh.hsahPsthh...l..s.c.h.ss................hh.s..l.h.h.....hh...hsllh.hslu.hhlWcpulcpaputG..................................................... 0 170 299 381 +6011 PF06183 DinI DinI-like family Bateman A anon Pfam-B_3085 (release 9.0) Family This family of short proteins includes DNA-damage-inducible protein I (DinI) and related proteins. The SOS response, a set of cellular phenomena exhibited by eubacteria, is initiated by various causes that include DNA damage-induced replication arrest, and is positively regulated by the co- protease activity of RecA. Escherichia coli DinI, a LexA-regulated SOS gene product, shuts off the initiation of the SOS response when overexpressed in vivo. Biochemical and genetic studies indicated that DinI physically interacts with RecA to inhibit its co-protease activity [1]. The structure of DinI is known [2]. 20.30 20.30 20.70 20.40 19.90 18.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.06 0.72 -3.95 11 1722 2009-09-11 10:20:43 2003-05-30 14:12:56 8 2 573 1 94 517 4 64.00 40 80.24 CHANGED lPsGul-ALpsELp+RlpppaP-..stVpV+tuSusuLSlsGsscc-..KchlpplLpEhWEs.DsWhs ..................hssGAl-ALtsELo.+RlpttaP-..spV.pV+.h.s.uu.ssLolssspc--..KpplpchLpEsWEsADpWhh........ 1 1 15 53 +6012 PF06184 Potex_coat Potexvirus coat protein Moxon SJ anon Pfam-B_11093 (release 9.0) Family This family consists of several Potexvirus coat proteins. 25.00 25.00 164.00 163.80 19.80 17.20 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.98 0.71 -4.85 2 45 2009-01-15 18:05:59 2003-05-30 14:13:12 6 1 3 5 0 45 0 148.20 92 83.07 CHANGED hstRpNRRttSRsutho...DhhYsoLT.tuTToapRpsFPsLtsMGDRsFQVluhpI.ssSAuPhhYpARLYsPtDsDsVtuTGlQh.GTsPRThRhpshsGQNsWF.GNTppspsIlAIDGLhoppusssPpNsVhVphsaRlAPSELQSu MVGRRNRRQRSRVSQMT...DIMYGSLTLGSTTTWTRKNFPGLANMGDRPFQVISAKIVVSSASPMLYQARLYSPHDDDNVGSTGLQMSGTTPRTHRMRALPGQNTWFSGNTSSTQVIVAIDGLKTKToDVTPQNAVAVQISYRVAPSELQSA.... 0 0 0 0 +6013 PF06185 YecM DUF991; YecM protein Moxon SJ, Bateman A anon Pfam-B_11108 (release 9.0) Family This family consists of several bacterial YecM proteins of unknown function. 25.00 25.00 44.80 43.70 18.50 18.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.33 0.71 -5.07 6 775 2012-10-02 15:00:03 2003-05-30 14:16:58 7 2 766 1 89 341 6 181.20 59 95.31 CHANGED spp.shpplhpcLssF.pKIppLuchLsLDLothphDHIALRVNspQoAchhppha.cpGcllS-NhINGRsIhlIcL-pPLphusapI-slELPaPup.KpYPpEGWEHIElVlPupsps...hspchhthhstls.papt.ppsslpVKlSSPKuEGERLsNPTIAhpa.....sssCIKlHPauIKcIlcSE ..........................p.h-ELpDluuDLPRFppAlccLupRLGL......sl.osLpADHISLRCHQNsTAERWR+GFpQCG.ELLS.ENhINGRPICLFKLcEPV.........pVu.......HWp.........hsllELPWPGE.KRYPHEGWEHIEIVLPG..-PET....LsAR..A.LA..LL......oD.............-....G...........L...................o..PGIsVKTSSPKGE+ERLPNPTLAVTD......G+sTIKFHPaSIcpIVASE......................................... 0 11 29 61 +6014 PF06186 DUF992 Protein of unknown function (DUF992) Moxon SJ anon Pfam-B_11128 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.20 21.20 21.60 34.60 21.10 21.10 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.75 0.71 -4.86 29 199 2009-01-15 18:05:59 2003-05-30 14:18:40 6 1 134 0 76 154 10 145.90 43 86.49 CHANGED sussuhssssssssspst.lclGsLpCps..suusGallGSsppLsCsFpsp..utss-tYsGslp.+hGLDlGhTppothsWuVhAP..ssphspGsLuGsYsGsuAsAolGlGlGANlLVG.GospuluLQPlSlpuQsGlNlAsGlsplpLp .........ss.hhshssss.s.usp.spst.sclGhLsCcl..usulGhllGSspclsClF...+spt.sss.-pYsGsIp.KlGlDlGhTspupLuWuVaAP..ssshstGuLsGpYsGAoAsAulGlGhGuNlLsG.GStpoIuLQPlSVQGQpGLNlAsGlssLpLp....... 0 11 33 47 +6015 PF06187 DUF993 Protein of unknown function (DUF993) Moxon SJ anon Pfam-B_11260 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 24.00 24.00 26.20 27.00 21.40 23.70 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.30 0.70 -5.93 21 227 2012-10-03 05:58:16 2003-05-30 14:21:26 6 2 203 1 82 200 28 375.10 58 98.45 CHANGED ltLPssDGslcsYpLtusshh...tsssuPh.....huRlAaAAAHVVADPhusscPh.ussulDW-uTLAFR+aLWsLGLGVAEAMDTAQRGMGLDWssApELIcRShuEApuh........uup.....lAsGsGTDHLs....PusstsL-sVlsAYcEQlssVEtsGG+lILMASRALAtsA+uPDDYtcVYu+lLspsccPVILHWLG-MFDPALpGYWGSs-hcsAh-TsLslItspusKVDGIKlSLLDps+ElshRcRLPcGVRhYTGDDFNYsELItGDsptaScALLGIFDAIAPAAStALpsLspGDsspa+slL-PTVPLSR+IFcAPTpaYKTGlVFLAWLNGaQsHFpMlGGhpSuRulsHhA-lFRLADpAGLLsDP-lAstRM+tlLult.Gl .........................lpLPsscuslpsYpLtus.ss...ts...t..ssPt.....hsRlAaAAAHVVuDPhsDssPh.sssAlDW-AThAFR+HLWuLGLGVAEAMDTAQRGMGLsWsuApELIRRShAEA+ss......GAc...........lAsGsGTDHLs......................suts...tsL--VlcAYEEQluhlEptGGRhILMASRALApsA+uPDDYh+VYu+lLuQsccPVlLHWLG-MFDPuLcGYWG.Sp..ch-sAhcTslslIpspssKVDGIKISLLDtc+E...lslRpRLP.......c....GV+hYTGDDFNYsELIt......GDs......p............taSHALLGIFDAlAPtASsAlssLssGDsppF+ull-PTVPLSR+IFcAPTpaYKTGlVFLAWLNGHQsHFsMluGhQSsRulhHhA-lFRLADpAslLscP-LAspRM+pLLAlaGl............. 0 21 52 68 +6016 PF06188 HrpE HrpE/YscL/FliH and V-type ATPase subunit E Moxon SJ anon Pfam-B_11055 (release 9.0) Family This is a prokaryotic family that contains proteins of the FliH and HrpE/YscL family.\ These proteins are involved in type III secretion, which is the process that drives flagellar biosynthesis and mediates bacterial-eukaryotic interactions [1-2]. This family also V-type ATPase subunit E. This subunit appears to form a tight interaction with subunit G in the F0 complex [3]. Subunits E and G may act together as stators to prevent certain subunits from rotating with the central rotary element [3]. Pfam:PF01991 also contains V-type ATPase subunit E proteins. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.19 0.71 -4.55 10 187 2012-10-02 21:03:42 2003-05-30 14:28:14 7 1 172 0 31 328 31 164.80 25 79.76 CHANGED MLs+RpIsLs.ususL.pPllRREpLAsshpAcslLp-ARpQA-plLspAcpcA-thpp....pApApFWppAsuhLpslQpQREplpppsloss-pLLspA.LppLLDETspupRhpALLRQLlsuQhsEusATLhCHPsptssVAcWLsspu.phtWpLpsDsoLssDoL+LsTApGsFslsWsshpctLl .......................................................................................s...llptt.Ls.p.phpApsl.LppA.+p...QApp...llppAcpc..A.ptlhp......pu..t...p....pA....tp.h....h.......p...............p.....ss...s.h....Lt...t....hp..tp.t....-s.L....p.p....tl........h.p....t.spc....llp..p.u.L...p...plL.c.c.s...s.......t.t......p......h..p.t.L...l.pp...L.....h....tt...................h....l...h..t..s...............................................................................................h............................................................................................................... 0 10 19 26 +6017 PF06189 5-nucleotidase 5'-nucleotidase Moxon SJ anon Pfam-B_10008 (release 9.0) Family This family consists of both eukaryotic and prokaryotic 5'-nucleotidase sequences (EC:3.1.3.5). 20.20 20.20 20.20 30.30 19.00 20.10 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.69 0.70 -5.58 43 426 2009-01-15 18:05:59 2003-05-30 14:40:48 7 3 318 0 174 428 360 259.30 45 76.67 CHANGED cYQh-+EspsLpPGsAFPhV++LLulNpthtp.........p.hVEVVLLSRNss-TGLRVFsSIpHYGLs.IoRAsFouGcSPapYlsAasssLFLSustpDVppAlcsGhsAApllsssspssp..............ssp.LRI......................AFDGDAVLFSDEoEplappp.G...LcAFpcpEpppsppPLssGPa+sFLtsLpplQpp...........h....ss..hpsPIRTALVTARuAPAH.ERVI+TLRsWslclDEAhFLGGlsKuphLcsapscIFFDDQpsHl-uAu..ptlsouHVP.aGluNp ........................................pYQhp+Esp.LpPGsAFshV+..................tLLslNpp..htp...........p.hV..EVVLlS..RN.sspoG.l..RlhNSIpHYuLs..IsRusFTuGcuPhs.YlcAatssLFLSussc-VpcAlpt.GhuAAs..lh.ssst...p.t.stt..............ssp.LRl......................AFDGDAVLFSDEoEplappp.G.........LcsFhpaEptptspPLstGPh+sFLpsLscLQpp...h..........sp....psPIRTuLVTARSAPuc.tRsl+TLRpWGlclD.EAhFLGGhsKushLcthpPcIFFDDQhhHl-uAp..ttssuuHVPaGlsp................. 0 31 73 120 +6019 PF06191 DUF995 Protein of unknown function (DUF995) Finn RD anon Pfam-B_11307 (release 9.0) Family Family of uncharacterised Proteobacteria proteins. 19.50 19.50 19.60 19.80 19.40 18.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.36 0.71 -4.82 14 170 2009-01-15 18:05:59 2003-05-30 14:50:04 7 4 93 0 32 83 2 145.20 43 80.92 CHANGED sshlLssulsususstusssss....sApshousElaplYpsKoWhWcs...GAGYFsscsRpFpAWopcssutSaupGRWhlossGpLChcAsW+spsGsusupT.....CFsHRpts.GsIYQ+R-PDGsWYVF++ssspssDEatKLhcuDhVu .............................................sslLLhussuhAsss.tus...sspct..huspsMoAhcLhpLYts+oWpWsD...GuG..Y...aosK..........c+...pFsAhscpssu+SaupGRWhlTDsG+lChKAtW..psttGs.....s....s.Ap.T.....CFtHRht.D.GslYQ....+.+ps..s.GpWYVFRHsPhQcsDEApKLlpuDhl............................................... 1 2 14 19 +6021 PF06193 Orthopox_A5L Orthopoxvirus A5L protein-like Moxon SJ anon Pfam-B_10342 (release 9.0) Family This family includes several Orthopoxvirus A5L proteins. The vaccinia virus WR A5L open reading frame (corresponding to open reading frame A4L in vaccinia virus Copenhagen) encodes an immunodominant late protein found in the core of the vaccinia virion. The A5 protein appears to be required for the immature virion to form the brick-shaped intracellular mature virion [1]. 22.40 22.40 22.60 22.40 21.30 22.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.23 0.71 -4.31 13 77 2009-01-15 18:05:59 2003-05-30 14:53:24 6 1 37 0 0 67 3 234.50 51 91.58 CHANGED MDFhsKaSptLspoupsKss...hhhp-Ehssss++spsl-htLKSpEphYQ+QLREQLA+cNMhtssctsI...............................................Ppp.tTNooSshsNl....puDss.spsoSll...p..sulpDIlpcas......................................................................................SVcc-hppLQsEos-LVosltsAREhThcAIspIh.pLsKtFp..phpc ......................MDFFNKFSQGLAESSTPKSS...IYYSEEKD.sDpKKDEAIEIGLKSQESYYQRQLREQLARD.NMhsASRQPIQPLQPTIHITP....................................L.ssosAPTPKPRQ.QTNTSSDMSNLFDWLSsDss.s.sSSLLPALTP.NuVQDIISKFNKDQKTTTsPSTQPSQTLPTTTCTQQSDGsISCTTPTVTP.QP...shsssssTPTssssssssptp.NPGAsSQQNLDsMuLKDLMSsVE+DM+QLQAETNDLVTNVhD.AREY...TRRAIDQIL.QLVKGFE..RFQK....................................... 0 0 0 0 +6022 PF06194 Phage_Orf51 Phage Conserved Open Reading Frame 51 Finn RD anon Pfam-B_11352 (release 9.0) Family Family of conserved bacteriophage open reading frames. 21.10 21.10 21.10 50.20 21.00 20.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.97 0.72 -3.88 3 378 2009-01-15 18:05:59 2003-05-30 14:54:32 6 1 199 0 3 179 0 75.10 89 98.57 CHANGED MTD.SssKEYLNQFFGSKRYLYQDNERVAHIHVVNGTYYFHGHIVPGWQGVKKTFDTAEELEsYlKp.HGL+aEEuKQLoLF ....MTD.sARKEYLNQFFGSKRYLYQDNERVAHIHVVNGTYYFHGHIVPGWQGVKKTFDTAEELE.hYIKQ.HGLEYEEQKQLTLF.. 1 3 3 3 +6023 PF06195 DUF996 Protein of unknown function (DUF996) Finn RD anon Pfam-B_11375 (release 9.0) Family Family of uncharacterised bacterial and archaeal proteins. 26.00 26.00 26.30 26.90 25.30 25.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.86 0.71 -4.18 16 81 2009-01-15 18:05:59 2003-05-30 14:56:10 8 2 55 0 50 89 2 129.20 25 69.88 CHANGED llullGhVLlLlulttlScths-cclFppaLhuhIhslluhllhhshlh.....huhhhhps.hsshhs.h.hhsh..lhshLushlllallhlluu.......YFh++AactluphoGlshFcpAuhlhaIGAlLhllh.lGhlIllluhI ................lullGhlLlLlulttlucthscpclFpphLhuhIlsll.uhllhhlhhh.................hthhhh.h......h.....h.........h.sh..............hsshlsshllha....lhhllus..........hah++uachluphoGhshFpsuuhhhhhGAlLhlll.lGhllhhluhI................ 0 15 24 34 +6024 PF06196 DUF997 Protein of unknown function (DUF997) Finn RD anon Pfam-B_11382 (release 9.0) Family Family of predicted bacterial membrane protein with unknown function. 20.70 20.70 21.00 22.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.92 0.72 -4.47 24 837 2009-01-15 18:05:59 2003-05-30 14:58:27 7 1 834 0 88 313 6 75.50 55 89.98 CHANGED pQApKEAhaAluLsllYhlhWhlhAYh.susss.......hhGFPhWF.hSCIhhPllFhllsahhVKhlF+DlsL-csp.ps ....l.QAHKEARWA..LuLTL.lYLAsWllsAYLP.G.s.u.sG............hTGhPcWFEhACILsPLlFIhLCWhMVKFIa.RDIsLED-D..ut........ 1 16 39 63 +6025 PF06197 DUF998 Protein of unknown function (DUF998) Finn RD anon Pfam-B_11425 (release 9.0) Family Family of conserved archaeal proteins. 27.00 27.00 27.00 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.46 0.71 -4.98 111 925 2012-10-01 21:22:51 2003-05-30 15:01:43 8 6 680 0 301 768 89 184.90 17 80.56 CHANGED llusslhhhs..hlhsthhhsua.shhppslS-Lus..........sstsalh..ssuhlhhulhhls..hulhhhh.....ttshttphushlls...lhulu.hhhsG..lastsss.....................hH.hhsuhlsalthslusl....lhu..................hhhthhphhtsluhhhhhsshhhhhhs..................hhGlhpRhhshsh.hsWlhhhuht ...................................h.hsshhhhhs...hhhtt.hh..ts..s.h..s.....hppslStLusht...................ushthha..p.hshllsGh.hhlh..........hshhlhtt................ttph...h.tthss..sh..ls...................hhGlu..hlhsG....lash.sss........................thlH..shs.u....h...l....sh...hshhlshl...........lhs.................h.htt..ht.h..hph.hh.h.lshlhhhhsslhhhst...........................h.h.hGlhpRl.shhhh..hhahhhhs..h............................................ 0 104 188 251 +6026 PF06198 DUF999 Protein of unknown function (DUF999) Finn RD anon Pfam-B_11426 (release 9.0) Family Family of conserved Schizosaccharomyces pombe proteins with unknown function. 25.00 25.00 258.50 257.50 19.30 18.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.88 0.71 -4.41 3 9 2009-01-15 18:05:59 2003-05-30 15:35:05 6 1 1 0 9 8 0 141.20 83 55.62 CHANGED MVEDVCNVDLEQGLDLCKPEKVNKQSQRSRQSRQSLFTNTIKPQKDKMNIKTNKIKEFLNDLFTEFSKFHNSYYPDGRISTRSKhRWPLLIIWSIIIVFAIDKKFEVK-FLSIW.....INENRFYSEIWGPIAIYVCLFVLLLLuLI MVcDspNVDLEpGL-LCKPEKVNK.........QsLFTNhIKPQKDKhNIKTsKIK.FLNsLFTEFSKFHsShYPDGRISTRSphRWPLLIIWsIlIVFAlDKpFEVpcFLSIW.....INENRFYSEIWsPIAIYlCLhVLhLLuLI 0 9 9 9 +6027 PF06199 Phage_tail_2 Phage major tail protein 2 Finn RD anon Pfam-B_11427 (release 9.0) Family Characterised members are major tail proteins from various phage, including lactococcal temperate bacteriophage TP901-1. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.45 0.71 -4.10 63 727 2009-01-15 18:05:59 2003-05-30 15:44:01 6 8 629 0 118 479 169 135.50 20 78.71 CHANGED stpG+Dllltlphsss...s....tatplushpspslshsu-sl-so...op-......s.sthtphlsssGsppsolSusGl.ht.s..s...ssppplcptahsuphsc........apl.................h...hsshu.............p..hpGsah...loul-hu.ustsspsoaslshsusGtlsh ....................................................................pGcDhllhhphsst..t......shhphuh.p.spp.ls.hs.t-ossss....opD.............................thltssGs.hps.o.lSssul..hp.s....c.........stsptlccshhssthh-.........acl.........................t...hssts.................p....hpu.ah.......lsuhphs.ust-shsphphphsssGthp..................................................................... 0 35 76 96 +6028 PF06200 tify Zim; tify domain Bateman A anon Pfam-B_3326 (release 9.0) Domain This short possible domain is found in a variety of plant transcription factors that contain GATA domains as well as other motifs. Although previously known as the Zim domain this is now called the tify domain after its most conserved amino acids. TIFY proteins can be further classified into two groups depending on the presence (group I) or absence (group II) of a C2C2-GATA domain. Functional annotation of these proteins is still poor, but several screens revealed a link between TIFY proteins of group II and jasmonic acid-related stress response. 20.90 20.90 22.90 21.50 20.00 20.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.17 0.72 -7.26 0.72 -4.69 48 482 2009-09-14 12:07:42 2003-05-30 15:52:11 9 12 39 0 250 488 0 35.50 41 13.34 CHANGED ssssssQLTIFYuGpVhVF-slss-..KAptlhplAup ...........stssQLTIFYsGpVhVF.Dslss-..KA.pslhhLAu............... 3 42 154 208 +6029 PF06201 PITH DUF1000; Thioredox_dimer; PITH domain Yeats C anon ADDA_10869 Domain This family was formerly known as DUF1000. The full-length, Txnl1, protein which is a probable component of the 26S proteasome, uses its C-terminal, PITH, domain to associate specifically with the 26S proteasome. PITH derives from proteasome-interacting thioredoxin domain. 23.70 23.70 25.60 25.00 22.10 22.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.96 0.71 -4.44 56 613 2012-10-03 19:46:52 2003-05-30 16:00:39 8 18 298 2 434 619 10 146.30 31 56.41 CHANGED sLhspIDhsplpsLNp.....sss....susttslhcshppc.t...............hlpS.Ds.......D.-QLllpIP..Fss.slKl+Slhl.p..u....................ssspsPpplKlFhN..ppslsF-ssp.shpssQslc.................Lsp.......shpshhphs....L+hs+FpsVpsLolal..........psN.u...s-pT+ItaIslhGps ......................................LhshI-hsplpsL..Nc.......................ssp..........tuht...slhcshp.p......................alcS..D..s........D.EQLllplP..Fss.sVKl+.Slhlpus.........................ssspsP+plKlFh..N..tpshsF-sup...shp....Ps...Qslp..............................Lsp..............shpssh.phs....L+.hs.+Fp.sVps......Lo.lFh................psN.G........t-pT+ltalslhGp................................... 0 153 247 362 +6030 PF06202 GDE_C Amylo-alpha-1,6-glucosidase Bateman A anon Pfam-B_3607 (release 9.0) Family This family includes human glycogen branching enzyme Swiss:P35573. This enzyme contains a number of distinct catalytic activities. It has been shown for the yeast homologue Swiss:O93808 that mutations in this region disrupt the enzymes Amylo-alpha-1,6-glucosidase (EC:3.2.1.33). 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.53 0.70 -5.74 14 1196 2012-10-03 02:33:51 2003-05-30 16:24:27 9 30 973 0 546 1693 78 369.30 22 41.39 CHANGED huospFlscp..........................t.ts.ollAGa.WFu....c.WGRDohIuLsGlhLlsGRa--A+sllhsFuphh+.......+GLIPNthss.sspsh....YNosDAoLWalpulpcYhchssD.hthL....pchaP......................slppIlpsahpGsca....................sltlD.pcuLlhsGs...shTWMD.......AtlsGh.slTPRsGpslEINALWYsAL....+hhpphup.hltc.............tpppYpplApplpssFc+tF...hs.................................................................................scpshlhDslsssp.....pDhplRPN.lhAloL.....s...slhss-pt.thlchspcpLLsPaGL+TLsPcD.sYpGhYsG.sp.sRDtA......YHpGTlWsWllGhalcAhh+hspp...ppt...........ls.hpslLp.h.tal.pushuslsEla-....u-ssasspGChsQAWSVuEllR ...............................................................................................................................................................h.................................hhAGhPaFs.....p..aGRD.s.h.I....u.h.......u..l..h..Lh...s..t......p.....p.............u.c.t.h.l.....h......t.h.s.t..h...t..........................G.h...l.......sp..h..t........t........sp.........t..st...................Y......s....s....s..D..ushaalhs..ltpY...hch....s.s...-....hphl.......pch.h.P..................................................................................................h.lp.p.h...l...p....h...h.......h.....p.....u.....h.p.............................................................p.h.thc.......p.s.....u..h.....l....h....t....s....s....t.....h.........s....ts..W...h...D..................................st....p.s.h....shs.P....c..t..s....t......s....l.E.......l..pu.h.hY.s.A.l.......ph.hu..p.l.s.p..t.h.h.t.t........................................................hstp...h..p..phApplc.p...sF..pc.tF....hs.................................................................................tc..t.s.hh.t...p...h..l.cst.p...........ts.h...p.l.R...s...N.......h.....h.....s....h..s.....l...........................s.........sl.h..s.......c...p.s...t...p...s...l....c..h...h.....p...p...p...L..h..s...s.....h....Gl....R....T..L..ss.................p..................p..h..s..................Y.p...s........................hu......................YHpGshWs...a.huhhhpuhh+.hsh...............................................................hp..h.t..t.hht......h........t..h......t.t......t.........ht...tls.E...l...hs.........ts.s.......s.....h.......h.......s..hus...sQAWusuth.............................................................................................................................................................................................. 0 183 334 452 +6031 PF06203 CCT CCT motif Bateman A anon Pfam-B_314 (release 9.0) Motif This short motif is found in a number of plant proteins. It is rich in basic amino acids and has been called a CCT motif after Co, Col and Toc1 [1]. The CCT motif is about 45 amino acids long and contains a putative nuclear localisation signal within the second half of the CCT motif [1]. Toc1 mutants have been identified in this region. 20.60 20.60 20.60 20.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.14 0.72 -4.33 42 1411 2012-10-01 19:54:00 2003-05-30 16:45:03 9 19 143 0 549 1397 13 44.30 54 10.78 CHANGED Rcspl..hRY+-KR+pRpFsKpIRYtsRKthA-pRsRlKGRFs+psp .............RcApl..hRYREKRKp...R........p..F.....c..K..+....I.....R........Yt....oRKt........hAEpRPRl+.GRFs+ps.s....... 0 133 347 463 +6032 PF06204 CBM_X Putative carbohydrate binding domain Finn RD anon Pfam-B_9110 (release 9.0) Family \N 21.10 21.10 21.10 21.40 20.20 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -9.20 0.72 -4.49 57 1056 2009-01-15 18:05:59 2003-05-30 17:12:43 6 25 595 23 316 973 50 63.10 32 5.20 CHANGED sshGsFssss+EYl........hhlhss.pT.....PsPWlNlluNspaGhhlSpsGuGYoahpsu+phclT.WpsDsl ........................thGhFsppspEYl................h.sp.ssT.................PtPWsNlLuNs.paushlSssGuG...Yo.a.tss+phplT+acsDss....... 0 99 204 247 +6033 PF06205 GT36_AF Glycosyltransferase 36 associated family Finn RD anon Pfam-B_9110 (release 9.0) Family \N 21.00 21.00 21.00 21.50 20.80 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.82 0.72 -4.24 111 1114 2009-09-11 09:10:52 2003-06-02 09:55:43 6 25 623 23 331 1025 61 89.50 26 7.62 CHANGED upstas...phspthAah.sss.....tstsuassDRscFlGRstslssPtAl....tpstLSsosGsslDPsuulptplpLtPGcptclsFhlGhupspcp .......................................p..p.thtp.hsah..sss...............tpssua-sDRppFlGp..s..psh...........ss......PtAl................ttspLos.os..us...shDPsuulp.t.p.l.p.LtPGcptclsFllGhupspp.t..... 0 103 213 259 +6034 PF06206 CpeT DUF1001; CpeT/CpcT family (DUF1001) Moxon SJ, Bateman A anon Pfam-B_11004 (release 9.0) Family This family consists of proteins of proteins belonging to the CpeT/CpcT family. These proteins are around 200 amino acids in length. The proteins contain a conserved motif PYR in the amino terminal half of the protein that may be functionally important. The species distribution of the family is interesting. So far it is restricted to cyanobacteria, cryptomonads and plants. It has been shown that CpcT encodes a bilin lyase responsible for attachment of phycocyanobilin to the beta subunit of phycocyanin [1]. 20.30 20.30 20.40 20.80 19.90 20.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.94 0.71 -4.61 51 207 2009-01-15 18:05:59 2003-06-02 10:29:07 6 2 125 0 80 226 183 162.10 27 83.45 CHANGED hphschLuGcaoNppQAhcs.......PshaA+IplhaR..PLs.....h.hhpuhuhasEQsYshsstpPYRp+lh+lh.p....sstlhlcNaslc-spcatGAupc.sc..hLpplss-sLphh.pGCs..hhhpppss.t....apGplEPGppChl.R.cGptTYLsSphcl...spp.phhShDRGaDscTccplWGShsG.PacFp+ .......htlsphhsGcasNppQAhps.........P..sh..aspl.clhhc..Pls............h..s......shshahEQ.saths..ppPYR.Rlhclh.s......pspl.lpsYtlcc..p..pahusspc..p........hhptls..p.....plt.h...sCs..hhh.phpsp..s.............apGpspPGptChs.p...pGptoalsophpl....ppp..phhohD+GhD.psspplWGuhtG.sacFp+.......................................... 2 18 50 71 +6035 PF06207 DUF1002 Protein of unknown function (DUF1002) Moxon SJ, Bateman A anon Pfam-B_10868 (release 9.0) Family This protein family has no known function. Its members are about 300 amino acids in length. It has so far been detected in Firmicute bacteria and some archaebacteria. 23.80 23.80 25.30 24.90 23.60 22.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.30 0.70 -5.03 47 783 2009-01-15 18:05:59 2003-06-02 10:30:15 6 2 723 0 105 508 7 227.60 38 70.76 CHANGED LGtsLoppQ..KpphLchh.....s.stsssphlpVTsp-.pcYLsshssp.tphuopshSSuhlphpspG.......pGlpVcs......pNIThVTspMYtNAhlTAGlpcAplhVsAPhtVoGouALTGlhKAaE...us.GpplspcpKpsAs-Elsssupl.......s-shup-csssllsclKc-lscpp..p.....ocp-lppIVpphhpphsls....Lossphspllslhtclpshslsh...sphps....QLsp.lp .................................LGsuLo-sQ..KppsLctL.....sssc..st...plhTlTssshsKhhs.sss....ss....uhphaSSuhIpphsss.......sGlpVcI.s....cNITpVTpsMYpNAAlTuGlccAcIpVuA...Phs..VoGpuALsGlYtuhE....ss.GtplsppsKclApEELpshSslsttp.......pscss.h.s.s.sKLssAlA-IKptlAKtp.ps.....hTc-Dl+..pIVcc...s....lcshsls......lTssQls.Ilshhhp.lppssl.p....ssshsppLscl.................................... 0 32 63 82 +6036 PF06208 BDV_G Borna disease virus G protein Moxon SJ anon Pfam-B_10516 (release 9.0) Family This family consists of Borna disease virus G glycoprotein sequences. Borna disease virus (BDV) infection produces a variety of clinical diseases, from behavioural illnesses to classical fatal encephalitis [1]. G protein is important for viral entry into the host cell [2,3]. 25.00 25.00 149.00 148.90 20.50 19.90 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.66 0.70 -5.58 2 36 2009-01-15 18:05:59 2003-06-02 11:26:44 6 1 6 0 0 36 0 431.60 79 99.96 CHANGED MQ.SMSFLhGFGTLVLALSApTFDLQGLSCNTDSTPGLIDLEIRRLCHTPTENVlSCEVSYLNHTTISLPAVHTSCLKYHCKTYWGFFGSYSADRlINRYTGTV+GCLNNSAPEDPFECNWFYCCSAITsEICRCSIpNVTVAVQTFPPFMYCSFADCSTVSQQELESGpAhLSDGSTLTYsPY..pSEsVNpTLNGTILCNSSSKIlSFDEFRRSYSLANGSYQSsSINlTChNYTSSCRPRLhRRRRDTQQIEYLlHKLRPTLKDAWEDCEILQSLLLGVFGTGIASASQFLRGWLNHPsIIGYIVNGVGVVWQCHRVNlTFMsWNESTYYPPVDYNGRKYaLNDEGRLQTNTPEARPGLKRVMWFGRYFLGTVGSGVKP+RIRYNKTSHDYHLEEFEASLNMTPQhsIASGHETDPINHAYGTQADLLPYTRSSNhTSTDTGSGWVHIGLPSFAFLNPLGWLRDLLAWAAWLGGVLYLIsLCVSLPASFARRRRLGRWQE .........MphSMS.LIuFGTLVLuLSApTaslQuLpCNTDSTPuLIDLEIRRLCHssTENVISCEVSYhNHTohpLsAsHsSChKYHCKTYWGFFGSYSsD+lIsRaTGssctClNsS.sEDPF.CNWaYCCSAIss-lCRCSIoNspVuVpoFPPFMYCSFADCSTVSpp-LpsGpAhLSDGShLhasPY.Lpp-VVNpThNGTIhCNuSSKlVSFDtFRRSYuLtNsSYpspSlNlTCsNhoSpCpsR.hRRRRDhpplpYLsHKLRPhLtDAWEDCEILQSLlLGsFGoGhuuASQFLRtWLNHsDIlGYIVNGlGVlWQCpRVNVoFhsWNESTYYPPVDhNGp+haLNDEuRLQTsoPEAhPGLKRVMWaGRhaLGTVsSG.+P+Rl+YN+oSHDYHL-EF-hShNhTPphulAoGHETsPINHAaGTQusLLPYsRSSNlTSTDTGSGWVHIGLPSFAFLNPhGWLRDlhuWAAWLGGlLYLlpLshSLPs.hARRRRLGRWpE. 0 0 0 0 +6037 PF06209 COBRA1 Cofactor of BRCA1 (COBRA1) Moxon SJ anon Pfam-B_11228 (release 9.0) Family This family consists of several cofactor of BRCA1 (COBRA1) like proteins. It is thought that COBRA1 along with BRCA1 is involved in chromatin unfolding. COBRA1 is recruited to the chromosome site by the first BRCT repeat of BRCA1, and is itself sufficient to induce chromatin unfolding. BRCA1 mutations that enhance chromatin unfolding also increase its affinity for, and recruitment of, COBRA1. It is thought that that reorganisation of higher levels of chromatin structure is an important regulated step in BRCA1-mediated nuclear functions [1]. 18.70 18.70 19.20 19.70 17.70 18.60 hmmbuild -o /dev/null HMM SEED 475 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.56 0.70 -5.89 6 144 2009-01-15 18:05:59 2003-06-02 11:29:37 8 8 89 0 96 136 2 349.50 39 69.02 CHANGED MPSlQPVVMsVhKHLPKVPEKKLKlVMuDKELY+sCAVEVKRQIWQDNQALFGDEVSPLLKQYIlEKEssLFSo-lSlLHsFFSPSPKTRRQGEVVQKLTpMIGKNVKLYDMVLQFLRTLFLRTRNVHYCTLRAELLMSLHDLDlsEICoVDPCHKFTWCLDACIREKFVDuKRARELQGFLDGVKKGQEQVLGDLSMILCDPFAINTLAhSslRpLQELlup-uLPRDSsDLhLLLRhLuLGpGAWDMIDSQVFKEP+lD...sEllT+FLPhlMSlhVDDpsFNl-sKLPs-EKpsshh....PssLP-sFp...................................+FlpENRVACElGLYYsLHIsKQRNKNALhRLLPuLV-shsDhAFsDIFLHLLTupLsLLuDEFusEDFCoulFDsFLLTuhopKENVHRHsLRLLhHLHpKVsPs+lEsLhKuLEPspQpsEsl+ELYspLt-Klps...p+ssPssp.-ssuh-.sLtuVsssssh .......................plpPVVMtlh+ph...c...ls-+hL......chlhtDc-LYpsssh-VKRQIWpsNpuLFuDEVuPLLppYIhE.......KEthL.hs....phs...p..........FF.t..s...PKsR.R..Q..up..l.l.+LspMlG.psl+LYDhlLQF....LR...TLFl+T+NlH....YCTLRAELLMuLHDh-lt....-IhshDPCHK.........FsWCLDACIR-+.VD.KRsRELQuFL-.sl.K+Gp.EpVLGDLSMlLsDPaAhNhLshSsl+.Lpc..Ll.s.p-.sLPR-s...Lh.LLLRhLuLG...uA..WpMI.-oQsF.KE.P+h-...hpllspFLPhhhuhh...l-D.h.hp.l.pt....+L..ss-c+tsh.h....ssshs.-....sh....................................................tal.....pppthup..luhaYhl+h......h......p.+.....sp.....s..l..+lL.shLsp..shs.c..hu.h.t.D..FL..H..lhs.Lh.....h..s-...ch...s....cp.hs.p.lh-tF.hhts.s.ppssp+H......hL+hlhhla.+..l....shhtt.l.ct.hpP.tt.....t........p..t.h.thh......................................h...................................................................................................................... 0 47 56 76 +6038 PF06210 DUF1003 Protein of unknown function (DUF1003) Moxon SJ anon Pfam-B_10814 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 24.60 24.60 27.40 24.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.48 0.72 -4.13 71 827 2009-01-15 18:05:59 2003-06-02 11:31:43 6 4 766 0 249 630 135 107.60 40 53.07 CHANGED sGShtFllhasshlslWlslNshhhht..ht.................................FDPYP..FILLNLhLSh.AAhtAPlIhhuQNRQss+DRlpucpDhplNl+uEhElphLtcclstlttp.tphhtctpphpt ............hGohpFllhhslllssWlllNh..hshht....hp.................................FDPYP..FILLN...LhhSs.AAhtAPlIhhuQNRQ-cR.D..RlpsppDhcl....shpuct-hchLtcclstlthp.t-hhsc...hp.p................................. 0 81 162 212 +6039 PF06211 BAMBI BMP and activin membrane-bound inhibitor (BAMBI) N-terminal domain Moxon SJ anon Pfam-B_11792 (release 9.0) Domain This family consists of several eukaryotic BMP and activin membrane-bound inhibitor (BAMBI) proteins. Members of the transforming growth factor-beta (TGF-beta) superfamily, including TGF-beta, bone morphogenetic proteins (BMPs), activins and nodals, are vital for regulating growth and differentiation. BAMBI is related to TGF-beta-family type I receptors but lacks an intracellular kinase domain. BAMBI is co-expressed with the ventralising morphogen BMP4 during Xenopus embryogenesis and requires BMP signalling for its expression. The protein stably associates with TGF-beta-family receptors and inhibits BMP and activin as well as TGF-beta signalling [1]. 19.00 19.00 20.20 19.70 18.30 17.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.62 0.72 -4.05 4 68 2012-10-03 01:43:02 2003-06-02 12:43:19 7 2 50 0 36 72 0 100.80 68 38.57 CHANGED puSLlSlWLQLELCAMAlLLTKGEIRCYCDAPHCVATGYMCKSELNACFT+lLDPQNoNSPLTHGClDSLhsos-sCpucsscs+otsosP...LECCHEDMCNYRGLHD ..............sshh.lWLQLELCAMAl.LL..TK.GE...IRCY.CDAsHCVATG...YMCKSELs.ACFSRLL..DP..Q.N....oNS.P...LT.HGCLDSl....AS....T..u.DlCpA+Q..ApNHS...G....o.shP.........oLECCHEDMCNYRGLHD...... 0 7 11 20 +6040 PF06212 GRIM-19 GRIM-19 protein Moxon SJ anon Pfam-B_10760 (release 9.0) Family This family consists of several eukaryotic gene associated with retinoic-interferon-induced mortality 19 (GRIM-19) proteins. GRIM-19, was reported to encode a small protein primarily distributed in the nucleus and was able to promote cell death induced by IFN-ß and RA. A bovine homologue of GRIM-19 was co-purified with mitochondrial NADH:ubiquinone oxidoreductase (complex I) in bovine heart. Therefore, its exact cellular localisation and function are unclear. It has now been discovered that GRIM-19 is a specific interacting protein which negatively regulates Stat3 activity [1]. 25.00 25.00 25.20 27.90 20.70 24.60 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.63 0.71 -4.44 11 296 2009-01-15 18:05:59 2003-06-02 13:27:30 7 4 250 0 189 281 4 119.70 37 86.33 CHANGED suu.sK.QDhPPPGGYtslsa+RshPKpshSGauMFAshhGhhsaGhathhpts+Ec+R.pIE.hsARhAlhPlLpAE+DRcaL+.L++Nh-EEAclMKDVPGWKV....GEsVF+T...-+WVsPhhpEhYshpsh- .................s.....h.QDhPPsGGY.tslpY+Rs.lP.p.+G.hs.........uhshhsshhuhhsaGaaplhps.s+cp.c.c......L.thEchtuRlsLhPlLQAEcDRc.hl.+phppshpcEtp.l..McsV...sWcs........................sp....pVapo...s+ah.Ph...........t....................................... 1 68 106 154 +6041 PF06213 CobT Cobalamin biosynthesis protein CobT Moxon SJ anon Pfam-B_10956 (release 9.0) Family This family consists of several bacterial cobalamin biosynthesis (CobT) proteins. CobT is involved in the transformation of precorrin-3 into cobyrinic acid [1]. 23.00 23.00 23.10 23.70 22.90 22.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.76 0.70 -5.27 6 469 2009-09-11 12:48:28 2003-06-02 13:36:36 7 7 445 0 122 386 1322 275.30 25 44.87 CHANGED +soss.sssEPFKRAlssClRuIAGst-lEVuFus-RPuLsusR......ARLP-lP++sopp-hAlsRGLuDSMAL+hApHDP+lHu+htPpGt.sARAlF-AVEQARVEAIGApsMsGVAcNL...ouML-DKYpKushscl..ss+uDAPlEEALuLhVRE+LTGcssPpu...........AcplV-LWRsalEpKAutDlcpLuusl-DQpAFARlVRDMLuu.....................h-hAEEhGD-pspsDpE-s..D.-DsPpp-pp--pus-EppGs-s...u.uc-...u-uos--tEpu-hEuu-uos-DhsD-sDt .......................................................................................................t.........pshKpAlstss+AlutchplcVsa.....ss....s....ps...t..h....supp.........hcL..P....p..h.s.cc.hs..tp.ch..s..hs..RGhuDuhAL+ltapDsplHtpht.Ppss....ARsl......a-AlEpsRlEulGucths.GsupNL...sshl..p...p..c.hpcs.p.hs..ph..pscs.s..usL.ttu...l..uhh...l...R....p+loGps.s.Pss.....................stpll-....L....hRph.....l..pp....puusp....Ls.sLt.......ps..l.p......DQt.......u...FA.......c.hs...+c..hlss....................................h..p.h..s.....p..c....u..p.c..p..p...s...-....p..pps......p...ppps.p....p.p.......p...p.pp.t...u..s.p..p.p...ps.ssp..........s.s.pp.........spss..p.p.....p..t....p..p...tt..pt..s..p.t.t....................................................................................... 1 33 78 95 +6042 PF06214 SLAM Signaling lymphocytic activation molecule (SLAM) protein Moxon SJ anon Pfam-B_11112 (release 9.0) Family This family consists of several mammalian signaling lymphocytic activation molecule (SLAM) proteins. Optimal T cell activation and expansion require engagement of the TCR plus co-stimulatory signals delivered through accessory molecules. SLAM, a 70-kDa co-stimulatory molecule belonging to the Ig superfamily, is defined as a human cell surface molecule that mediates CD28-independent proliferation of human T cells and IFN-gamma production by human Th1 and Th2 clones [1]. SLAM has also been recognised as a receptor for measles virus [2]. 25.00 25.00 25.30 79.80 23.00 21.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.65 0.71 -4.19 4 58 2009-01-15 18:05:59 2003-06-02 13:48:11 6 1 36 6 14 52 0 122.10 64 38.47 CHANGED MDPKGhLShphLLhLSLAhcLShGTGtulMsCPc.ILtpLGSslhLPLssE.pIsKSMNKSIHILVTMAcSPtsolcKKIVSLDLscGusPRaLpDGY+FaLENLoL+ILcSR+EsEGWYhhoLEENV ....................MDPKG.LLSLshlLFLSLAFchShGTGtshMNCPc.IltpLGSslLLPLTpE.tIsKSMNKSIHIlVTMAcS.tsSVc+KIVSLD.sEuGsPRYLcDtY+FaLENLoLtILESRKEcEGWYhMTLEcNl.. 0 1 1 1 +6043 PF06215 ISAV_HA Infectious salmon anaemia virus haemagglutinin Moxon SJ anon Pfam-B_11275 (release 9.0) Family This family consists of several infectious salmon anaemia virus haemagglutinin proteins. Infectious salmon anaemia virus (ISAV), an orthomyxovirus-like virus, is an important fish pathogen in marine aquaculture [1]. 19.60 19.60 20.80 22.90 18.90 18.60 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.43 0.70 -6.16 3 464 2009-01-15 18:05:59 2003-06-02 13:54:06 6 2 3 0 0 388 0 318.40 88 99.80 CHANGED MARFIILFLLLAPVYSRLCLRNYPDTTWIGDSRSDQSRVNPQSLDLVTEFKGVLQAKNGNGLLKQMSGRFPSDWYTPTTKYRILYLGTNDCTDGPTDMIIPTSMTLDNAARELYLGACRGDVRVTPTFVGAAIVGLVGRTDAlTGFSVKVLTFSSPTIVVVGLNGMSGIYKVCIAATSGNVGGVsLINGCGYFNTPLRFDNFQGQIYVSDTFEVRGTKNKCVLLRSSS-sPLCSHIMRNVELDEYVDTPNTGGVYPSDGFDSLHGSASVRTFLTDALTCPDIDWSRIDAASCEYDSCPKMVKDFDQTSLGNTDTLIMREVALHKEMIuKLQRNITDVKsRVDAIPP...NIFISMGVAGFGIALFLAGWKACIWIAAFMYKSRGRIPPSNLSVA .......................RLCLRNaPDTTWlGDSRSDQSRVNPQSLDLVTEFKGVLQAKNGNGLLKQMSGRFPSDWYTPTTKYRILYL....GTNDCTDGPTDMIIPTSMTLDNAARELYLGACRGDVRVTPTFVGAAIVGLVGRTDAITGFSVKVLTFsSPTIVVVGLNGMSGIYKVCIAATSGNVG..G..VTLINGCGYF...NTPLRFDNFQGQIYVSDTFEVRGTKNKCVLLRSSSDTPLCSHIMRN.VELDEYVDTPNT.GGVYPSDGF.DS.LHGSAS...VRTF..LTDAL...TCPDIDWSRIDAASCEYDSCPKMVKDFDQTSLGNTDTLIMREVALHKEMISKLQRNITDVKh....R.V..-Ah.s....................NIF.....IS.MGVAGFGIALFLAGWKACIWIAAFMYKSRGR........................ 0 0 0 0 +6044 PF06216 RTBV_P46 Rice tungro bacilliform virus P46 protein Moxon SJ anon Pfam-B_11281 (release 9.0) Family This family consists of several Rice tungro bacilliform virus P46 proteins. The function of this family is unknown. 25.60 25.60 25.60 793.10 24.80 25.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.33 0.70 -5.62 2 12 2009-01-15 18:05:59 2003-06-02 13:59:52 6 1 2 0 0 13 0 388.60 77 100.00 CHANGED hNhEhPYSIHhlsKN+VPIYcQGNLFHoEhouRLSHlSpGLlDHLFTF.SDNTERVRKLHllu-alY.hE.EhpoYp.Eh.pLp-QVSpLp.pspp.+tpltp.+-hlEGLREPlKKPIYTTQDKEtLRsFFCcERSMEYIYaHIKRLAppSYYSHLNKLQKDhE.hRGhYhSFLTNVKFLVLhEsGhWTs.sIEsp.hspS.L.loQccGEc.hpKGll.hN.E.EuG.Y.LostFluDLYAHGFIKQINFTsKlPEGlP.lItEKl.pYKFPGuNTlLIE+EIP+WsFs.MKRpTphRTplYIhpsaRsFYGaSPl+sYEPITPEEFGh-YYSWEphhE-Dct-VVY.ShsT+..KVs+E..aAWPpcDuDhMSCoTShtEEahHRht.A hNhEhPYSIHhlsKN+VPIYcQGNLFHoEhouRLSHlSpGLlDHLFTF.SDNTERVRKLHllu-aLY.hE.EhpoYp.Eh.pLp-QVShLp+pspp.Rtpltp.K-hIEGLREPlKKPIYTTQDKEtLRsFFCcERSMEYIYaHIKRLAppSYYSHLNKLQKDhE.hRGhYhSFLTNVKFLVLhEsGhWTs.sIEop.hspS.L.loQccGEc.lpKGll.hN.E.EuG.Y.LostFluDLYAHGFIKQINFTsKlPEGlP.lItEKl.pYKFPGuNTlLIEREIP+WsFs.MKRpTphRTplYIhpsaRsFYGaSPl+sYEPITPEEFGh-YYSWEphh-EDEu-VVY.ShsT+I.KVsRE..aAWPpcDuDhMSCTTShE-EahHRh-... 0 0 0 0 +6045 PF06217 GAGA_bind DUF1004; GAGA binding protein-like family Moxon SJ, Bateman A anon Pfam-B_10604 (release 9.0) Domain This family includes gbp a protein from Soybean that binds to GAGA element dinucleotide repeat DNA [1]. It seems likely that the this domain mediates DNA binding. This putative domain contains several conserved cysteines and a histidine suggesting this may be a zinc-binding DNA interaction domain. 26.10 26.10 26.20 34.20 23.80 26.00 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.28 0.70 -4.45 10 153 2009-09-12 21:59:08 2003-06-02 14:01:58 7 3 38 0 65 172 2 273.70 39 97.75 CHANGED MDscGh....Rphu.aac..............Ps..sshK.phuhQLMSols..-RDst............+-pNl.httppuuh.........tpRD.........................Mua..pDsWlsp.........+Dsthhsh..hpss............s..hthlsso+shcphQ...pp......pht-sshs.h.c.c...s.PlspssssspcstpspttppspsPKssKsc..KsK+tsssspcpsstss....c...............................sKss+Ksh-l.sINtVuhDhSslPsPlCSCTGsspQCYKWGsGGWQSACCTTslShYPLPhsTpRRGARluGRKMStGAFpKlLp+LAuEG.aDLSNPlDLKsHWAKHGTNKFVTIR ..........................................................MDsss.......Rphs.....ac...............................hh.......hplMu..hs..-Rsst..............pcts..h..h.ttptuh.........ttRD.............................hsh..R-shlsp............pcpthhsh..hpps........................................ht.h.....ts.pph.pt.p..................................ht.tsshs.......c..p.......t.s..l.p.p...ss..t.tp...s.t..s.K...p.p....ppsp.psK.s.psp....+sp+....sthspcpsp.ps....c...................................................s+ssh..Ks...-...l.sl..Nt....lshD..SshPsPVCSCTGsspQCY+WGsGGWQSACCTTslShYPLPhssp+RtARluGRKMStGAFpKlLp+LAuEG.aslusPlDLKsaWAKHGTN+alTI+............ 0 8 43 52 +6046 PF06218 NPR2 Nitrogen permease regulator 2 Finn RD anon Pfam-B_11335 (release 9.0) Family This family of regulators are involved in post-translational control of nitrogen permease. 27.50 27.50 29.50 28.60 26.90 27.30 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.37 0.70 -5.78 16 363 2012-10-01 20:21:22 2003-06-02 14:04:55 6 7 244 0 279 366 2 293.80 27 83.34 CHANGED GF.P.IpsIFYohFHPTpGoKlhaQsPsssl.................tsshhsFDol+sYlIPKPpLCNKLlTlp..hscY+llGYPVsI.psscYuRNuF.FNhsFVFsh-.scossYEssl+RLG+hhplLEEQophLSct-p..s.h....................................................................st.pIpsLlpplapDLNNYuEChIPlDsuNoIsIKLFPlhPsPss.lpsapVPIhslpLpsllD.sNWD.TMhKIlPaINGlNSlt+IucLADs-hpLs+pCIQHLlYYsCltllDIFQFSNIYAPTsplpsFlpss.shucECQsYVhssssp.............................st.t..s......................................................................plPopssLFpLYpSLsQGpTlK-Whtpphpt....l..ssIDlRRhIpFGll+ulIYRlapYPl....hsshs.......sptcchhph.tp......................hDclssctshs.tslpct..............pslhc .......................................................h.....l.slhhs.F...................G...shl.hp.P.t.l......................................................................................................Fsshp.alIs+.thptp..h..lphp.............thpl..l..uhP..l.h..t..p....p.......Yt..RN.....t........h.F..NhshVht.p....p...............s.............h.....c......hlp+.huthhh..hE.p.sthlSp......t...........................................................................................................................................t......l..l....h..lhp-L.....N..t.........t....shl.l......sp....s..slplKlh........h.........sPs...h...p......pVPl....h..p...h..t.......th..t....pWD.Th.pllsaIsGhppltpIu...upsp.tLsh.s..lppLhaYtsl...........l...hshh.aush.....Yh..p.....h.....tp.h...h.t........s................h.p...tC.tal.....................................................................................................................................................................................................................................l..hthh.tth..p.............u......shtphh................lD.R+hl.FGl.cthlhph.pash...................................................................................................................................................................................... 0 108 156 235 +6047 PF06219 DUF1005 Protein of unknown function (DUF1005) Finn RD anon Pfam-B_11366 (release 9.0) Family Family of plant proteins with undetermined function. 20.40 20.40 32.60 24.80 19.70 19.40 hmmbuild -o /dev/null HMM SEED 460 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.60 0.70 -5.82 9 119 2009-01-15 18:05:59 2003-06-02 14:07:22 7 3 18 0 76 127 0 316.70 42 96.87 CHANGED MDPCsFVRlhVGsLuL+h.......ssssspsssussc.ps...sChCcI+l+sFPhQhsslPLlshs-ups.......ssspolAAsFaLscu-lctlhp+spah..........usLclsVYs........GR+GssCGV..suut+LlG+hplslDLcuu...-uKsslhHNGWlslGppptpp.ts....sAELHLpV+sEPDPRFVFQFDGEPECSPQVlQlQGshKQPlFoCKFupcpsu..ps.p............t.W..ouhso-p..-ptu+ERKGWplTlHDLSGSPVAhASMlTPFVsSPGoDRVoRSNPGAWLILRPsGst...........oWcPWGRLEAWRERG....hpDoLGYRF-Ll.-G....usulshAEooIS.uc+GGcFsIDh.tt......ssh.s.ttphphuuhustuuus..tss.t....................GFVMSupVpGEGKsSKPhVcVuspHVTCsEDAAhFVALAAAVDLShDACRLFs++LR+ELpp ...................................MDPpsFlRl.lstLul+hP..............................t..t...s...sC..Cpl+l.th....P......p......s.hPhh...t.............ts.ussF.Lststlphh.t..s...................Lpl.las........G..u......t..t.....CGh......ttphlGhhpl.ls..ths....ucs..hhhasGWhsluptt............tsphpl.V+hEsDPRalFpFss-sthuP.lhQl.....p......G......s.....h+QPhFoCK...Fuhcp.s................................t.....tt.t+E.RKGW.lhlHDLSGSsVAhA.MsTPFVsSs.GsspVuRSNPGuWLIlRP.uss.............oWpPWGRLEsWREc.G...........D.slsh+hcLl...s.......sssl.hu-ssls.sppGGpFsIDh..............................................................t..GFVMustV.pGEu.+sS..+.Ph.VpluhtHVsChtDAAhFhALuAAlDLSh-AC+.Fpp+hR+chp......................... 0 10 47 64 +6048 PF06220 zf-U1 zf-U1; U1_C; U1 zinc finger Moxon SJ anon Pfam-B_10606 (release 9.0) Domain This family consists of several U1 small nuclear ribonucleoprotein C (U1-C) proteins. The U1 small nuclear ribonucleoprotein (U1 snRNP) binds to the pre-mRNA 5' splice site (ss) at early stages of spliceosome assembly. Recruitment of U1 to a class of weak 5' ss is promoted by binding of the protein TIA-1 to uridine-rich sequences immediately downstream from the 5' ss. Binding of TIA-1 in the vicinity of a 5' ss helps to stabilise U1 snRNP recruitment, at least in part, via a direct interaction with U1-C, thus providing one molecular mechanism for the function of this splicing regulator [1]. This domain is probably a zinc-binding. It is found in multiple copies in some members of the family. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.08 0.72 -4.39 11 661 2012-10-03 11:22:52 2003-06-02 14:07:45 7 17 296 1 456 687 10 36.70 41 15.13 CHANGED MP+YYCDYCcsaLTHDo.SVRKtHsuG+pHtpslpcYY .........+.aaCDYCcsalscDosSlRKpHpsGppHpcNVpca...... 0 152 235 353 +6049 PF06221 zf-C2HC5 Zf-C2HC5; Putative zinc finger motif, C2HC5-type Finn RD anon Pfam-B_11300 (release 9.0) Domain This zinc finger appears to be common in activating signal cointegrator 1/thyroid receptor interacting protein 4. 24.30 24.30 25.20 25.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.67 0.72 -4.21 33 300 2009-01-15 18:05:59 2003-06-02 14:20:43 8 6 263 0 212 293 1 53.90 44 10.63 CHANGED s+ptCsCtAppHsL......ssNCLsCGKIlCspEG.......sPCsFCG.sslhs.....sppppplhpt .....s.p+.CsC.ApcH.L......hsNCLsCG+IlCppEG.......GPCh.FCG..s.lho.....pp-.p.h...p............ 0 72 119 178 +6050 PF06222 Phage_TAC Phage tail assembly chaperone Finn RD anon Pfam-B_11379 (release 9.0) Family \N 25.00 25.00 39.10 39.10 22.10 19.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.56 0.71 -4.59 6 219 2009-01-15 18:05:59 2003-06-02 14:27:42 6 2 157 2 7 135 0 121.60 75 96.51 CHANGED hspsLRslALsshuGaRHKT.VsVPEW-GspVsLREPSuEAWhhWpEll+...G.ucDD-s.loVsEKhpR.....NhpADVpLFlDVLhDpshQ...RVFS-DDppQVttlYGPVHuRLLppALEL....lsssE-A+K. .......MtKDLKTLALARLSGFRHKT.VKVPEWtNVSVVLREPSAEAWYLWQEVLN.........G.DGE.DDDT.LSVVAKTRR.....NLEADVTLFCDVLCDTDLQ...RVFoPDDREQVLAVYGPVHARLLRQALEL....IADAESARKK................................... 0 0 1 5 +6051 PF06223 Phage_tail_T Minor tail protein T Finn RD anon Pfam-B_8084 (release 9.0) Family Minor tail protein T is located at the distal end and is involved in the assembly of the initiator complex for tail polymerisation [1]. 25.00 25.00 26.20 25.00 24.10 23.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.37 0.72 -3.61 4 777 2009-01-15 18:05:59 2003-06-02 14:39:43 7 2 329 0 11 264 0 96.40 61 80.47 CHANGED +LAREFRRPDWRpMLuEMSATELGEWu-aFsppSFSDsLLDAcFAoLKuLlsuLVoGsscsA.tDFSLLscpEuh.E+TD-ELMpLGEGIoGGVRYGPDSpPG ..........htRsDWRtMLAGMoSTEYADW++FYpTHYFpDs.LDMHFS...GLpYsVLSLFF.u..DPDM.HP....uDF..SLL..s...c.c..E....u.....p.E....h..E....D..-h..LMQKAAGLAGGVRF.GsDG............. 0 0 1 5 +6052 PF06224 HTH_42 DUF1006; Winged helix DNA-binding domain Finn RD, Bateman A, Eberhardt R anon Pfam-B_8424 (release 9.0) Family This family contains two copies of a winged helix domain. 24.00 24.00 24.00 24.40 23.80 23.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -12.14 0.70 -5.10 119 1653 2012-10-04 14:01:12 \N 7 6 1193 0 494 1415 196 321.90 24 76.36 CHANGED lccLsslQhss....spstahsLauRls.sa...psstLpph.......hpc+plhchashcuo..ll.shc-h.hhphthtshhttt....t...............hphhstphpph.hstlhptl..tspuPloss-lttthttp...............tht....sspthhp.hlhhpG.lshsshp.uh.........phashs-ch...........................................................................................................................................ppps.....hctLlpchlpuhG.sAoss-lttaht..ls.t.....ps+tslt..thhtttcL.hthplc.....t.t....hashs...........s.....sthsssss.......h........plLssFDshlhs+pc..ppclh.s..hta+hcla.sss.sph.........shslLhsGpllGphchcsc..cps......usLplps...h...hscs.......................tts...slttphpchupa ..............................................................................................................lp+huhlQhss....s+usalsLauRls..sa........s.p..hL-cu..........htct..clhch.Wupcus..hh.Ptp.Dahh.hc.t..thhs.tphs......hhh...........................tshh.p.ptpt..htp...llptl...psc.G...P...........l..pus-hppthttt.....................th.h.php.........st+p.........h.....L-..tL........h..ts.........G.cl.........hsst.+.p..shp.........................+sYcL..sc+l...............h....Pp........tps..............................................................................pscu.........hthhlcphhcuhG..ss.......t.p....plssa..hp....Lphs.......shps.h..hp.....thht.t.t.pl..lsVp.l-.................t.ht....hahps..................................................c............tth.stt.ss.........s.......tlLuPFDsl.lhs+c+....tppLa.s.hpa+hEs.a.sssspp.............shslLhpGpllGph-h+hc..+pt..........shLplhs........hhhpt.....................htt...sltpthpphAta.................................................................................................. 1 183 350 445 +6054 PF06226 DUF1007 Protein of unknown function (DUF1007) Finn RD anon Pfam-B_8585 (release 9.0) Family Family of conserved bacterial proteins with unknown function. 21.00 21.00 22.00 21.90 20.80 20.20 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.44 0.70 -4.59 46 668 2009-01-15 18:05:59 2003-06-02 14:57:21 8 5 643 0 141 463 41 197.50 28 83.76 CHANGED pthtlhhuslssh.........sAtAHPHsal-hpspllhssss..lsul+htWpFD-haSuhll...thcpspcuphsp..pcLpplupsshpsltphpYFThlh.ssGcpl+as.p.....................Ps........chthshcs....s........plhltFslsLsp.Phshp.tt......lslpsYDPoYalshsasp.c..sslplssts.tsCphplhpPsssss...........tpuLshspts......ssshGthFAp+lplpC ..................................tt...hhhshhshh.......uhAHPHsFlchpspllhcspt..lsulchpWphD-hhSuslLh........cs.tsut.ss.cthpcLutpl.h.ssl.hs..p+Y...FTplh....t..s...Gp..p..lKhp..p.....................Ps..............phthshcs.....p................................plllpFsl.sLsc.P.sls..sps......hshssaDPoYYlshpYsp...c.......sslshs..s......ttCphplhpPpsstt.h.........tht...toL.sptt.ts.......s.slGp.FApplplpC............................................... 0 29 75 108 +6055 PF06227 Poxvirus Orthopox_N1; dsDNA Poxvirus Moxon SJ anon Pfam-B_10619 (release 9.0) Family This is a family of dsDNA viruses, with no RNA stage, Poxvirus proteins. 21.10 21.10 21.20 21.80 20.00 21.00 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.21 0.71 -4.71 37 475 2009-01-15 18:05:59 2003-06-02 15:01:29 7 1 40 23 1 377 0 156.50 20 92.13 CHANGED M...............................................sp.s.............................................................hphsahhtsphs.tpcslpsllpcYhhaRsh....hhtppphG+lFpcLhpaDp.Ahpha...G.slp.....phhpshh..s.......hpshphhhphphshp.......hhpsptlIGlhuhluchhs.........thhslh.hhhphls.pphpp .............................h......................................................................................................pashtcphs...p.sslpsllp-Ylhapsp.....t.p.shGpla+plhoacp.Ahcha...u.slp...............plhpph..................hpshs.al+hhhshs.......h.pspshIGlsuhlschhs.........phhpshhhhhph.l.s...h..t........................ 0 0 0 1 +6056 PF06228 ChuX_HutX DUF1008; Haem utilisation ChuX/HutX Moxon SJ, Bateman A, Eberhardt R anon Pfam-B_10620 (release 9.0) Domain This family is found within haem utilisation operons. It has a similar structure to that of Pfam:PF05171. Pfam:PF05171 usually occurs as a duplicated domain, but this domain occurs as a single domain and forms a dimer. The organisation of the dimer is very similar to that of the duplicated Pfam:PF05171 domains [1,2]. It binds haem via conserved histidines [2]. 25.00 25.00 25.00 25.90 24.60 24.20 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.47 0.71 -4.80 38 458 2012-10-01 19:49:10 2003-06-02 15:03:01 8 6 445 9 79 1005 54 137.40 45 67.28 CHANGED lApcLsloEh-VlpALPtchsshhsu..sphpplLpslspWGp.VTsIl+stsuIaEh+ushPpGc.hu+GYaNL......htppst..LcGHL+h-shstluhVs+PFh.Gpc...otSltFFsppGcslFKlYLGRDc.pRpLhspQlptFcsLt ..........................lAcphsso.h-VVc.sLP.pp..........h.s.h.s.sG........s+hsslh.-.s.l.s.c.WGs..VTTlV+os.ssIhEhpGt.lPp....G......p......h......u....+.......G....Ya...NL.........hu+cG....lpGHlKh-NsspIALlp+sFM..Gh-............otSltFFsppGp.shFKIaL.GR..D.-...+RpLhs-QVstF+sL........................................ 0 14 41 60 +6057 PF06229 FRG1 FRG1-like family Finn RD anon Pfam-B_8590 (release 9.0) Family The human FRG1 gene maps to human chromosome 4q35 and has been identified as a candidate for facioscapulohumeral muscular dystrophy. Currently, the function of FRG1 is unknown [1]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.13 0.71 -4.84 13 253 2012-10-02 19:42:32 2003-06-02 15:04:21 7 11 189 1 166 289 1 170.60 35 65.39 CHANGED uSYlcAhssGhhThGtPHcs.spsPsPcElFsAs+l.u-p+luhKoGaGKYLuls..scGlloupu-AIuspEQaps...........lFpss+sAhhu..............ssssFlSssp.tp..h.upS+psupsEhlclRucssp.......shpsphclthpsRhcs+.pt.p.lKthQ....chpc+cL......RLsc--s+cLK+ARc-GshHEslLDhRsKhKsD+Y ...................................................lpshspG..shutPHp............................p.asAs+.l...u.sp...p...lu.hKo.u..a..G+YLu..hs..pp..G..l..lsupo-AlushEpaps.....................................lh.p....s....s..p..hAhhu.............................................ssssFlsssp..st....h.upuc...su....up....p-..hlclRusApc..................phppp..cl...p.s.+....t....ps..+tp..php.s..Kthp..........phpc+cL...................+l..scc-s..+hLK+A++-GphHEtLLDhRsK.hKtD+a........................................................ 0 58 83 127 +6058 PF06230 DUF1009 Protein of unknown function (DUF1009) Finn RD anon Pfam-B_8705 (release 9.0) Family Family of uncharacterised bacterial proteins. 25.00 25.00 57.40 31.60 21.60 24.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.07 0.70 -11.13 0.70 -4.97 83 491 2009-01-15 18:05:59 2003-06-02 15:08:55 6 4 486 0 172 434 1229 213.30 35 75.92 CHANGED lGpllchL+ppGlscllhsGsl.pRPth.sslc......Dhpslpllsthh..uh.ptGDDuLL+ulhphhEp.pGhplluuc-lhscLLs.spGsLsptpPspppppDlthGhplspslGshDlGQusVVpsGtVlAlEuhEGTDshLtR..................supLst...........ttsuVLlKssKPsQDhRhDLPslGspTlcssscAGlsGlulEAGpslllDpptslptA-ctGlhlhu ..............Gcllchh+ppslscllhhGsV.pR.P.th..ps...l.p.......Dhtshplls.h....tl..spGDDsLLpullshlEp.pGhcVlusp-lhssLls..s.sGsloctpPssp-ppDIphGhchAptLu.tlDlGQusVVpsGtVlAlEu.hEGTDthlpR...................................sucLtp................puuVLVKhsKPtQDhRhDlPsIG.pTlcshtcuGhsGlAlEAG+slll-.p.pcslthAsctGlFlh........ 0 74 121 142 +6059 PF06231 DUF1010 Protein of unknown function (DUF1010) Finn RD anon Pfam-B_11467 (release 9.0) Family Family of plasmid encoded proteins with unknown function. 25.00 25.00 27.80 28.00 16.80 15.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.90 0.72 -3.98 8 110 2009-01-15 18:05:59 2003-06-02 15:14:07 6 3 46 0 6 54 0 88.70 68 92.27 CHANGED MpAtoYSSAhPCS..................+GFpsFLASSACsASATSYaSCSAAPLpWPSAFSWAAPhhKAGRSlLAFGSN...sAlK.....P......TRlhRuAYLuR .........................QTAFSFSuslQphhChFSG......LRLhuLRtFsVFLASSPCVASASSYtFCSAVPPRW+SsFSpLAPVAKh+LSVLASGSN...ISVK.....P......TRILRSAYLuR................................ 0 0 4 5 +6060 PF06232 ATS3 Embryo-specific protein 3, (ATS3) Finn RD anon Pfam-B_11504 (release 9.0) Family Family of plant seed-specific proteins. 20.90 20.90 21.00 21.10 20.70 20.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.82 0.71 -4.54 15 103 2012-10-02 11:40:54 2003-06-02 15:18:41 6 3 23 0 59 169 0 120.20 37 65.84 CHANGED shptsppscptus...CsYsVpIcTSCsSPthTpDpISlsFGDAhGNpVYssRL............cDPsu..u.......sF-+CooDoFp.lpGsCht.pIChLYLhRsGsDGWhPEoVpl...Ys....ps.pPV..TFhaNphlPc.sVWYGaNhC ...........h.h.........tstp..CsYslhIpTSCsSst.hTpDpl.Slt.F.GDu..GNplass+L................csPss....t.......sFEcCusDoFp.l..pGsCh....tIC..alYL...h..R..s...G...p...D...GWhP-hVpl.....as.......tsp......p.ss......sF.as.p..lPp.s.sWYGashC............................................ 0 4 43 53 +6061 PF06233 Usg Usg-like family Finn RD anon Pfam-B_11528 (release 9.0) Family Family of bacterial proteins, referred to as Usg. Usg is found in the same operon as trpF, trpB, and trpA and is expressed in a coupled transcription-translation system [1]. 20.70 20.70 20.90 24.80 20.00 19.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.74 0.72 -4.12 27 204 2009-01-15 18:05:59 2003-06-02 15:31:43 7 1 184 0 72 150 47 81.30 60 86.78 CHANGED h.LpGYGLTTAEIhY+hPDHPplLQoaVWQ-YDLAPcFPtLhcFL-FWpcElEGsLHSVRhsHp+LIpPuEaRsVsG.hpLH .........h.QLtGYGLTTAcIhY+MPDHPpLLQoaVWQDYDLAP-FPtLpcFl-FWQcclEGPLHSVpasHp+LIusuEWRsVpGEhhLH..... 0 14 39 48 +6062 PF06234 TmoB Toluene-4-monooxygenase system protein B (TmoB) Moxon SJ, Iyer LM, Burroughs AM, Aravind L anon Pfam-B_10626 (release 9.0) Family This family consists of several Toluene-4-monooxygenase system protein B (TmoB) sequences. Pseudomonas mendocina KR1 metabolises toluene as a carbon source. The initial step of the pathway is hydroxylation of toluene to form p-cresol by a multicomponent toluene-4-monooxygenase (T4MO) system [1]. TmoB adopts a ubiquitin fold [2]. Although TmoB is a component of the T4MO system, its precise role remains unclear. 25.00 25.00 43.20 43.00 21.50 20.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.77 0.72 -4.12 10 40 2012-10-03 10:59:06 2003-06-02 15:32:22 7 1 34 35 21 39 1 84.00 43 95.51 CHANGED MALFPlhusFEtDFVlQLVuVDT-cTMDpVAttsAHH...........SVGRRVss...pPGpllRVR+pGssphhPRssplu-uGlpPhEslEllF-c ...MAlFPlhusFptDFVlpLVsVDocsTMDpVAstsAaH...........oVGRRVts...pPGtslRVR+pGst...phhPRshplA-uGlpPh-slElhap.s.. 0 10 13 18 +6063 PF06235 NAD4L NADH dehydrogenase subunit 4L (NAD4L) Moxon SJ anon Pfam-B_10667 (release 9.0) Family This family consists of NADH dehydrogenase subunit 4L (NAD4L) proteins from the mitochondria of several parasitic flatworms. 22.20 22.20 22.40 28.20 22.00 22.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -9.98 0.72 -4.20 12 62 2009-01-15 18:05:59 2003-06-02 15:37:12 6 1 44 0 0 69 0 85.30 41 99.60 CHANGED MlolhLlhssllllShhLshs+hhNhLIlLENaNVLlLLhCLlhoht-s.+hlFIshMsl.slEl.lhLlVlspshcpuShh.hluh ....MlslhLlh.ssllllShhLshtRhlNhLIlLEsFNVLlLLhsL.Lh.oht-s.+hlFlslMslhTlElslhLlVlsplhcpushh..ssh... 0 0 0 0 +6064 PF06236 MelC1 Tyrosinase co-factor MelC1 Moxon SJ anon Pfam-B_10673 (release 9.0) Family This family consists of several tyrosinase co-factor MELC1 proteins from a number of Streptomyces species. The melanin operon (melC) of Streptomyces antibioticus contains two genes, melC1 and melC2 (apotyrosinase). It is thought that MelC1 forms a transient binary complex with the downstream apotyrosinase MelC2 to facilitate the incorporation of copper ion and the secretion of tyrosinase indicating that MelC1 is a chaperone for the apotyrosinase MelC2 [1]. 21.40 21.40 25.70 38.40 20.70 20.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.70 0.71 -4.14 9 48 2009-01-15 18:05:59 2003-06-02 15:43:53 6 1 34 23 15 62 1 126.10 37 87.36 CHANGED lssAAslA.AuushuusspAssAsstuttst..ss.s.........sshssuFDEsY+GRRIpGtssuuGtt..............tauGuatVhlDGh.LHlM+sADGoWlSlVsHYcssPTPhAAARAAVDELp.GA+LhPhPss ..............................................h.hh............h.shhs..h.ssuusstustss.tsssst........sss.sssF.DEsY+GRRIpGsss....s...uustt.............ttuss..a....pVplDGR.LHVMRp....ADG....oWlSsVsHY-sa..sTP......LtAARAAVDELt..GApLtsh...s.... 0 3 9 15 +6065 PF06237 DUF1011 Protein of unknown function (DUF1011) Finn RD anon Pfam-B_11463 (release 9.0) Family Family of uncharacterised eukaryotic proteins. 25.00 25.00 35.70 29.30 24.00 22.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.29 0.72 -4.13 17 198 2009-01-15 18:05:59 2003-06-02 15:47:15 7 3 89 0 126 188 1 100.00 46 22.74 CHANGED phsssphsalhhlluhlNuhoNGlLPSVQSYSCLPYGshAYHLussLSslANPlACFlAhFl.hRSlshlusLol.lush..FuuYlhshAuhSPsP.LhssshG ...........................h..tsphsaLhsLlAhsNALTNGVLPSVQSYSCLPYGphAYHLussLuuhANPLACFLAM.hl....sR..SLshLusLol.lGsh..huuYlMshAslSPCP.LhspssG......................... 0 35 46 86 +6066 PF06238 Borrelia_lipo_2 Borrelia_lipopr; Borrelia burgdorferi BBR25 lipoprotein Moxon SJ anon Pfam-B_15000 (release 9.0) Family This family consists of a number of lipoproteins from the Lyme disease spirochete Borrelia burgdorferi [1]. 25.00 25.00 26.80 26.60 22.40 21.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.62 0.71 -4.02 2 99 2009-09-11 01:58:06 2003-06-02 15:48:34 6 2 26 0 6 66 0 103.50 81 97.31 CHANGED MQNNTIGLGLNLLSSLTNIAKTDTNIDHNYINTFSKVIDFFYKTYISTLKSMETAESTKIFEEIpDILKYNIEIIEAIShDKsK+IITSLKApRNKIMKEYIKhLKRuENA .....................................MQNNTIGLGLNLLSSLTNIAKTDTNID..HNYINTFSKVIDFFYKTYISTLKSMETAESTKIFEEIQDILKYNIEIIEA....ISTDKSKRIITSLKApRNKIMKEYIKILKRGENA............................ 0 4 4 4 +6067 PF06239 ECSIT Evolutionarily conserved signalling intermediate in Toll pathway Finn RD anon Pfam-B_9306 (release 9.0) Family Activation of NF-kappaB as a consequence of signaling through the Toll and IL-1 receptors is a major element of innate immune responses. ECSIT plays an important role in signalling to NF-kappaB, functioning as the intermediate in the signaling pathways between TRAF-6 and MEKK-1 [1]. 24.30 24.30 24.30 35.60 24.20 24.20 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.64 0.70 -5.49 11 101 2009-01-15 18:05:59 2003-06-02 15:57:31 6 5 84 0 66 104 0 212.70 39 51.63 CHANGED psptpssusppscps...hhsps.pstpp.hpt...lsshcslFcps..pp+sKssFlchlchFpc+s.V+RRGHVEFIYsAL++MpEaGVE+DLpVYptLL-VFPKthFhPpNlFQphFhHYPRQQpCuIclLEQMEsaGVhPstEschlLlplFG++SaPhpKahRMhYWhPKFKplNPaPlPpclPs....Dsl...-LApluLcRMss.DLpuclTVaQ.hs.spsu.s..c..csaIVuh ....................................p................................tp..p.tps...Ls.hcphFpps..tp+sKssFlpslchFtp+s.l++RGHV-FIYsAL++MtEaGVE+DLtVYptLLDVFP.K.thFh........PpNhhQphFh....HYP+QQpCulslLEQMEsa...G........VhPspEhchlLlplFGc+uaPhpKahRhhYWhP+FKN..lNPaPlP.cslPp....Dsl...-LAphuLc+hss.Dlpuclolap.hs.tsc-shs.....psaIVu........................ 0 24 29 49 +6068 PF06240 COXG CoxG; Carbon monoxide dehydrogenase subunit G (CoxG) Finn RD anon Pfam-B_9339 (release 9.0) Family The CO dehydrogenase structural genes coxMSL are flanked by nine accessory genes arranged as the cox gene cluster. The cox genes are specifically and coordinately transcribed under chemolithoautotrophic conditions in the presence of CO as carbon and energy source [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.47 0.71 -4.51 15 764 2012-10-02 19:24:03 2003-06-02 16:18:11 8 6 531 3 309 754 612 139.90 26 60.58 CHANGED Gphclsss.ppVachLsDP-plssClPGspSlpttu.sEacsplplplGsl+usa....suclphsplcsssphh.lpscGpGttAGhuhssshhhhspssst.oclsasscsclGGhlAplGu+llcussc+llschhpslusclu ...............................p.plssshppVWssL...sD.sch.ltsClP....G.s.p.p......l....p..t....t.....u...ts...p.....a.p..u....s.....lp.l.cl..G..P...l..pupa................pGpl..pls..c...h...c....ss..p....p..hs...lsuc.G.pu..u..t.u.u.......h..st....u.....s....s.hs..h..h..h........p..s....s........s.....s...T.......plsass...csp....l.u........G.+.l...AplG..s..+.l..l.sssu....c.+lhspFhsshstth.t...................................................................... 0 93 183 250 +6069 PF06241 DUF1012 Protein of unknown function (DUF1012) Finn RD anon Pfam-B_9320 (release 9.0) Family Family of uncharacterised proteins found in both eukaryotes and bacteria. 32.90 32.90 32.90 33.00 32.20 32.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.31 0.71 -4.73 2 84 2009-09-11 06:58:58 2003-06-02 16:22:51 7 5 32 0 50 86 2 169.20 58 22.88 CHANGED ARAIIILPTKGDRYEVDTDAFLSVLALpPIQKMESIPTIVEVSSsNhYDLLKSISGLKVEPVENsTSKLFVQCSRQKDLIKIYRHLLNYSKNVFNLCSFPNLsGhKYRQLRLGFQEVVVCGlLRDGKVNFHPNDsEELMETDKLLFIAPL...KKphLYTDMKhENhTVsp..DTRKQVaEcK+SRLpKIIhRPpKSLSKGSDShKGP .........ARAIIVLAp.ctNADQSDARALRsVLSLsGVKEGL.+GHlVVEhSDlDNEsLVKLVGG-lVET..VVAHDVIGRLMIQCARQPGLAQIWEDILGF-NsEFYlKRWPpLDGhpFcDVLlSFPDAlPCGl...K...p...s.G.KIllNP-DsYV.Lp.....E......GDElLVIAE................................................................................................................DDDo....................................................... 0 6 32 41 +6070 PF06242 DUF1013 Protein of unknown function (DUF1013) Finn RD anon Pfam-B_9390 (release 9.0) Family Family of uncharacterised proteins found in Proteobacteria. 20.60 20.60 20.70 106.40 20.20 19.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.65 0.71 -4.19 35 348 2009-01-15 18:05:59 2003-06-02 16:32:47 6 1 348 0 115 277 1365 141.00 57 63.33 CHANGED LEVpuIADG-VApGIpGhDPlssGQLTp-EI-+scpDPsh+L+lt.sp..shhscpK....KGsRYTPVS+RQDRPDAIhWll+NHPElsDuQIsKLlGTTKsTIpuIR-RoHWNusNIcPpDPVsLGLCoQh-LDttVp+..Aucct ..LEV+uIADGEsApGIKGhDPlssGQLTpEEIp+uEcDPsa+LKLp.sp...stl.s..c.pK.+..KGPRYTPlS+RQDRPsAIhWLl+NHPELpDuQIu+LlGTTKsTIppIRsRTHWNsuNlpPhDPVsLGLCSQlDLDhtVp+Aucpp..... 0 32 69 82 +6071 PF06243 PaaB Phenylacetic acid degradation B Finn RD anon Pfam-B_9426 (release 9.0) Family Phenylacetic acid degradation protein B (PaaB) is thought to be part of a multicomponent oxygenase involved in phenylacetyl-CoA hydroxylation [1]. 20.70 20.70 21.00 21.90 19.30 19.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.12 0.72 -3.85 43 641 2009-01-15 18:05:59 2003-06-02 16:40:32 6 3 601 2 212 450 72 92.20 56 92.12 CHANGED ppWPlaEVFlRucpGLsHpHsGSLHAsDschALptARDlYTRRpEGlSIWVVtuspI................................................................................sASsPs-..............+s.hFcPupDKsYRHsoaY.....clP..........................-s......lp+h .......................WPLaEVFVRuKpGLsH...+...HVGSLHAuDscMALcsARDlYTRRs.EGsS.IWVVtuupI.............................................................................sAS..s..P-E+uthF-PApsKlYRHPTFY......plP...DtltH................................... 0 56 132 180 +6072 PF06244 DUF1014 Protein of unknown function (DUF1014) Moxon SJ, Coggill PC anon Pfam-B_11009 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 21.30 21.30 22.60 22.50 21.20 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.66 0.71 -3.90 14 313 2012-10-02 14:16:02 2003-06-02 16:51:30 7 4 282 0 212 300 4 148.50 32 66.20 CHANGED T+AQItcphpc-pptpct.pptppt.......-h.hEc..shNhph.-pspsE.............................................ARol--AIusLSlsc.....shD+HPE+RhKAAapAaEEspLPcLKpEpPshRLSQhKQlLhKEWpKSP-NPhNQ+ ........................................................................................................................t............t...t..t...t.pttp..t.ttth.....t..hp...c....ht...s.....p....t.....t....tt..-...............................................................................................Aps.lDsAls....sL.olssp.............thDRHPERRh.KAAYpAFEEpcLPclKp..E..p..P..uLRh.....sQhKphlhKcapKSP-NPhNQ........ 0 75 120 176 +6073 PF06245 DUF1015 Protein of unknown function (DUF1015) Finn RD anon Pfam-B_9451 (release 9.0) Family Family of proteins with unknown function found in archaea and bacteria. 19.70 19.70 19.80 20.20 19.60 19.50 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.30 0.70 -5.97 97 830 2009-01-15 18:05:59 2003-06-02 16:53:50 6 6 778 0 268 769 822 388.30 33 97.45 CHANGED plpPF+ulRsstchstpluu.....PYD.VlsspEtcphhptsPaohl+lphPchshs.ths..pp..YppAtcphpcahppGlLhpDppPshYlYcpphs.s.........ps...ppGllussslc-Yp.....pshI++HEtThts+cpDRlphlcsssANsuPlFlhYpc..spplspllpphh.p....ppPhh-assp.sGlpHplWhlsDtphlpplpphF.tthp................slYIA..DGHHRhsoAhphtpcpcppss.ths.upE.saNahhshL..hscspLpIhshpRlV+sL.suhs.cpalpplpph.Fplpphs.....................................................t.htssppppauhY.lss.paYtLph+p....t...t.p..ss.........lppLDVslLpchlLpslLGI..ps.Rs-p+IsaltGhcslh.....h.ctVcsGp.hplAFhlhPsslcplhsluDsGplMPsKSTaFtPKLtSG..Lhl+pl ...................................................................l+PF+ulR.Pst..p..h.st.......plA.u.....PYD.VlsscEs.cphhp.ssshShl+l.psEhshs..shs.hsp..............Yp+AscshppahpcGhlhpDpcsshYlYc.phs.u................+sQpGlVssssl--Yt.............ssh..I++HEhTht-+EtDRhchlcsssAphsPlFlsYcs.....ppt....l...p.pllppht.p..........ppPhY-Fss.....DGhpH.phWhls-p..phlpplsptF.t.p.h.s...................slYIA...DGHHRsAoAsp....lttc+cpp......t............s....t....ht..sp...............c....taN.............aFLull..hscspLpIhsYNRVV+..cL.....pshs.cpF.L.pp.lppt..F..p.lcphs.........................................................p.hp..P.s..p..hpsauhY.lss..p.aY..pLss+t...t..th.sp.........cs............................................lpsLDVsl.Lpsh..lLs.lLuI.....pD.Rs..-..pRI-FluGl+ultt............ctVssup.hplAFslaPsshcplh.slADsGplMPPKSTaFEPKLcSGLhl+ph.............................................. 1 143 226 256 +6074 PF06246 Isy1 Isy1-like splicing family Finn RD anon Pfam-B_9462 (release 9.0) Family Isy1 protein is important in the optimisation of splicing [1]. 20.20 20.20 20.30 21.10 19.50 19.50 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.73 0.70 -4.98 31 342 2009-01-15 18:05:59 2003-06-02 17:00:10 7 8 291 1 245 340 3 234.50 38 85.85 CHANGED MARNsEKApSMLsRapptppsphthhct....+RPphsscsssLspAE+WRppll+EIo+KVscIQssuLu-apIRDLNDEINKLh+EKppWE.+Ip.pLGGPsYp+hus.p..hhDscGp..clsG....scGY+YFGtAK-LPGV+ELFE.ppppt.........tps++sRtc........hh+plcssYY.Ga.tDE......-DshLlthEtphEcphhpp.h.p......................t..thtttp.pphht.h.t..t.................................................................th.lPopp-lpp.llc+++pcLLp......Kh ............................................MARNsEKA.ohLsR.apptptt-tth.................hc.........RRPhhsocspslspsE+WRt.pll+.EIo+KVspIQs.............suL......u.-apIRDLNDEINKLhREKtpWEhpIc.pLGGPsY....t+.hus+...hhD...p.-G+...ElPG..............s+GY+YFGtAK-LPGV+ELFEpps............................ccs+tc...............h.h+pl.Ds.pYY.GY..hD-.......-DuhLl.h.Et.phEcphhtph..h................................................................................tttp...........p...h.....t..tt.......................................................................................................hhth.lPo.p-lpp.llc+++.cLlpc......................................................................................................................................... 2 89 138 204 +6075 PF06247 Plasmod_Pvs28 Plasmodium ookinete surface protein Pvs28 Moxon SJ anon Pfam-B_11346 (release 9.0) Family This family consists of several ookinete surface protein (Pvs28) from several species of Plasmodium. Pvs25 and Pvs28 are expressed on the surface of ookinetes. These proteins are potential candidates for vaccine and induce antibodies that block the infectivity of Plasmodium vivax in immunised animals [1]. 26.00 26.00 26.20 27.30 25.00 25.90 hmmbuild --amino -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.65 0.71 -12.07 0.71 -4.73 11 164 2009-01-15 18:05:59 2003-06-02 17:02:55 6 2 22 5 12 166 0 197.10 58 88.67 CHANGED KVTVDTlCKNGhLIQMSNHaECKCspGaVLhsENTCEcpV.cCcc.-slsKsCG-YuhChspus.sp.psh+CsChsGYhlspslChPscCpshsCG.sGKCIlDssNssps.sCSCsIGKl....pNsKCTtsGcTcCsLKC.KtsEcCKhsstaYcClscsuustsusutsp................................................sss.......SlhsGhSlhsILsLhllal...hl ...KVTs-T.CKNGalVQMSNHFECKCN-GaVhh.sENTCEEKp.-Csp.pslNKsCG-YuhCssst.ssppcsh+CGCI.GYTlhp-..lCsPs+Cps...l.CG.pGKCILDssNsNso.hCSCsIGps...--sKKCsKsGcTcCoLKC.KsNEcCKpspsaYKClsKsuustsuuutss..................................................s.us........SlhNG.uVhsIL.h...hah..s............................................. 0 3 5 10 +6076 PF06248 Zw10 Centromere/kinetochore Zw10 Finn RD anon Pfam-B_9476 (release 9.0) Family Zw10 and rough deal proteins are both required for correct metaphase check-pointing during mitosis [1,2]. These proteins bind to the centromere/kinetochore [2]. 19.30 19.30 19.60 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 593 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.86 0.70 -6.36 12 279 2012-10-03 17:31:52 2003-06-02 17:10:26 8 11 205 0 185 344 1 435.20 19 61.30 CHANGED h......................tpsstlph-.Dhptpls+lpp+lcchpscVpshIsccYs..-FhsshpusppllppsctLsc-l.DlLps.lps-lhtsLpsuss-hsphpppLccsthslpllppL..chcsth.....cphspALpsKsYltsAchLpch+shLctlps.cspp.............LchhpuLphEhplptppLl.pLsccap+Ll.aphsop.......pshhpspLpLsptpsp.........LssVl.Ahsh.GpLppplcthsphLlc.llcPLIspPshhsss-pppc.s.....lhLsappp.......pophc+sssppVap+lhhllcsLpphhhslssppp..........lhpllGchlhcclschlIccCLshuIP..ssuschpp.appllppsp-FEptLK-htals.s-s.osL.cYAcslssaFssK+CpcVlssARpLMpp-hashVh..........lsPp........................................................................t..t.cstppluptohshPsChlScSsp.cLMcLhhpsLpEs.ssusttsuspLhholcslhpLahssVPphHcc.LtplPQhuAlaHNNCMYluHaLhshuaphph.....h...hsoFsDlVsshRphGscsFhsQhphQ+upLh-tLsutp.Fpph.ssccphos .................................................................................h..t.....phptpl................p.ht....p.....ph........ph...........hpp...s.......pth...htp....spthtpcl...........th..h.pt....h....t...p................................h.tt..plpss...scht.lpp..plp..p.p.hhph...lp.p....l.....tphpphl............p.ph..ptshtp...t.phhtuh..p......hlpphp.thl..p......................p......................thhp.lp.c.....ht..pppl..tLtp.apphl.h.p.......................phphtt.t.t.tt...............lp.phl.uht.h.t.l..t....th..tp...htp.lhp.hlpP.hl........s...h..............t....................l...ph..............................tt.t..s.ttlhtpl...h.lhph.lpp.l.................................................h.p.huphhh.......p..h.phllpphL..ulP...ps.pphtp.ap.pl.lp.pstp.Fppt.Ltp..........hthh............t.................p...........tL.paspph..hahp+hp.phl.ts.Rplh..t......t.............................................................................................................................................................................................................................................................................................................................................t...................................................................h...............hh.ha...............t.....sh....h..Nsh.ah...h...............................................................................................h......hp.....t...h..............h........................................................................................................................................................................................ 0 64 100 147 +6077 PF06249 EutQ Ethanolamine utilisation protein EutQ Moxon SJ anon Pfam-B_11530 (release 9.0) Family The eut operon of Salmonella typhimurium encodes proteins involved in the cobalamin-dependent degradation of ethanolamine. The role of EutQ in this process is unclear [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.69 0.71 -4.67 4 785 2012-10-10 13:59:34 2003-06-02 17:11:56 7 4 744 3 116 676 368 139.80 48 71.45 CHANGED hoKELlEpLl+pVlcEKhu.tpst.p.s.h+phD.SGIhulKL.sspsscpsRhDTGNP+DVVhTpDLhTL-ESsRLGAGlMEMK-TTFsWTLsYDElDYVIEG+L-llIDGRplsAcAG-lIhIPKGSpIcFSVss.A+FlYVsYPAsWpS ................................................................................................hscpLltpLhcK.VhpEK.uhp.........t..p...s.h.pphs...GhuulKll-usol+hs.RFDs.up.Pc...sVuhT.DL..VT..t.-..-...u...s.s..MuAG.FM.ph.....-...s..u...h..F..P..W..T..L..s..Y......D...E..IDh..VL..E..G..-..L...cV..c..c.-...G.....c..T...h.l..A+..AGDVhFIPKGS..o..I..cF..u..T..s..u..s..s.+..FlY.VsaPANWQS........................ 0 41 76 99 +6078 PF06250 DUF1016 Protein of unknown function (DUF1016) Finn RD anon Pfam-B_9571 (release 9.0) Family Family of uncharacterised proteins found in viruses, archaea and bacteria. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.79 0.70 -5.46 7 1895 2012-10-11 20:44:44 2003-06-02 17:15:21 6 10 943 0 343 1531 191 214.50 28 90.17 CHANGED lht-I+phI-puRpcshpuVNupLshhYWpIG+hIlEtpptGp.+AtYGtpllppLu+cLsp-as..+GFospNL+pMRpFhhhasc..hhtsLhtpLoW.....oahplL.plcst.pRsFYhcpshcptWSsRpLptQIsohhYERphlS+c.pcshpp....h.stpphpsc.sl+DPYlL-FLuLt-p..a.E+-LEpullp+LppFLLELGsGFuFVuRQ+RlplDsccFaIDLLFYphtL+Chlhl-LKlGcFphp.hGQMphYLpah-ch.+pPs-sPsIGlILCssKscs.lcYsLptpppslhsuEYKh.LsscEpLtpcL.pt ...............................................................................h...h.ttp.thh.thN..hh.hha..lGp.l.t........pttaGtthlttlu..tlt.th....pG..hs.psl..hh.ah..a.......................................................h.......h......p............t....h.....................t...........p...................l..........s...........h....................p.h......h...t....h.t.p.....tt...a.Y...s.htttWo.t..L...htt..hat+.....s...........................................................s....hh+s.h.h-hht...h.....t........h...Etphcttlhtph.pahhEhGtsF.a.spp..h..h.t.t......p.hhhDh........lhaph.hpthlh.h-lK.t.ht.t.huphthYhthhpt........t.-.t.slGlllCtttt....hph.h.......t.hhhupa.........h.............t.h.......t.......................................................................... 0 95 224 306 +6079 PF06251 Caps_synth_GfcC DUF1017; Capsule biosynthesis GfcC Finn RD, Sammut SJ, Bateman A, Eberhardt R anon Pfam-B_9574 (release 9.0) Family Many bacteria are covered in a layer of surface-associated polysaccharide called the capsule. These capsules can be divided into four groups depending upon the organisation of genes responsible for capsule assembly, the assembly pathway and regulation [1]. This family plays a role in group 4 capsule biosynthesis [2]. These proteins have a beta-grasp fold [3]. Two beta-grasp domains, D2 and D3, are arranged in tandem. There is a C-terminal amphipathic helix which packs against D3. A helical hairpin insert in D2 binds to D3 and constrains its position, a conserved arginine residue at the end of this hairpin is essential for structural integrity [4]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.48 0.70 -5.19 17 1054 2012-10-03 10:59:06 2003-06-02 17:25:22 6 19 714 4 140 613 338 216.40 40 55.92 CHANGED tsshsVplhtssp...phplsassshRLtplVtps.............pshshaW.uAsLh-..tpupscpppQplLtpLuphup.....pssschtushppLtpQLptlplstRlhhsLD.DhlRlscptNP.L.pGpYhLhlsP+sspVolhGhV..p.......psGshsapsttsspsYLpptsLlssA-pS.salIpPDGpspptPlAYWNcpah-hsPGuhlaVsF..t.osh.pphs..sLNppIlpLLpNRhsh ..........................................................................................................................h.stssVpla..Gp...ppsholsssEplhpLVsQP...................hhsp...WWP.uA.l.ls..-p......A......p......u..t.......Ah..p.......pp.....Q....tlh.s.p...LA.p..htA...................-uss..D..s..A..A......s.I.......ps......l.....Rp.....Q.....lps..Lc....l......T......G...R.......h..l..p.L.D.P....D..hVR..V...s..E....p....u....N.P.PL...p.............GsYTLals..spP.s.TV...TLhGhl..S.........psGp.P.ap.sGR.sVssYL....p....s....p.s...h.L.u.G.AD.+s.........h.s.aVlhP-Gc.o..p..p........A..P..V...A..h....W...N..K...R..H..l..E.....P...PGShlalGh..usplh....stp.s..sLNspIlpsLTQRlP.t............................................ 0 33 78 110 +6080 PF06252 DUF1018 Protein of unknown function (DUF1018) Moxon SJ anon Pfam-B_11125 (release 9.0) Family This family consists of several bacterial and phage proteins of unknown function. 27.00 27.00 28.90 27.70 26.40 26.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.61 0.71 -3.59 54 451 2009-01-15 18:05:59 2003-06-02 17:38:09 7 3 363 0 82 413 9 116.80 26 75.27 CHANGED hDD-sYRshLtp.....hsG+pSspchs.spLppVlcthcp.pG..F............hppstt...................p.ts...t..hs.......pss.hcKIcAlWhphtptsshtp.sct.ALssal+Rhh...........tlsslpaL..p.sppsppllpuLKp...Wpp.Rt ......................................hDD-sYRshLtp.....h.s.GKs..Ss+chshspLp...pVlpthcp..pG..F...................p....pttt...................p.ss.............t.shs...........pts.hp...KIpAlWtphstth...........s..p.....uLs....tal++hh..............sls.p.....lpaL..s..sppsppllpsLKp...hppR................................................................ 0 30 58 73 +6081 PF06253 MTTB Trimethylamine methyltransferase (MTTB) Moxon SJ anon Pfam-B_11132 (release 9.0) Family This family consists of several trimethylamine methyltransferase (MTTB) (EC:2.1.1.-) proteins from numerous Rhizobium and Methanosarcina species. 25.00 25.00 25.80 25.70 19.40 17.60 hmmbuild -o /dev/null HMM SEED 505 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.52 0.70 -6.74 8 451 2009-09-10 22:07:33 2003-06-02 17:43:21 6 2 119 2 177 465 1554 462.60 30 97.03 CHANGED tRpuGRtuR+A.Russuspphsh.plspshsshplLs--plEtIHcsuh+lLEElGI-shs-.pAl-lacpAGAcVt.cstRVRh-+ulltEhlposPusFTlHARNPp+slplGGpslsFussuGuPpVpDl-+GR.R.GsLtDapNhs+LApthsslHhhGsslCEPhDlsss.pRHL-slauplphSDKsFhsospu+tRApDulEMhtIstGt..-phcssP.sshoIINsNSPhhaDtsMsculhshActGQsllloPFsLuGAMuPVTlAGALsQpsAEALAGlALsQLlRPGAPVlYGsFsSsVDM+SGAPuFGTPEsupushsuGQLARRYsLPaRTuuu.suSphsDAQAAYEoshuhauAlLuGuNllhHuAGWLEGGLssSYEKhllDh-hLtthhcalpulslsE-sLuhDAItEV...GPGGHFhGspHThc+YpTAFYpPhluDapsFEpWsEpGupDttpRAsclWKchLA-YEtPhlDsAlcEsLc....-alA+R+schG ...........................h...............................................hphLsc-plptI...apsuhclLE-lGlphhss...pAlclh+puG.A.p...V............ps......p..pV...+hscsllcchl.t.pAPp..pFTh.puRss...c..c.slplG..G..p..p.hhau..ss.hus.........P.hl.hD.h.c.s....s+...R........uoh....pDhtshs+Ls.phhsslchs.u.s.ss.stP..pD..ls.....ss..ppcl.s..h..hhs..p..l.phocKs........hh.....ss......s......s..s.....pps......pc.....s.......l..-M........sp.....lhhGt.......-...hc..pps.....hh...h.s.h.hs.ss..SPLpaspshhpslhthuctsQssllss.hshuGAhuPsolAGslsQs.AEsLAGlshsQllpPGsPsla.Gsasosh.DM.+..o.G.usshGoPE.shhshusuQl.A.+.h.a.sLP.h+suuu.hssuKhsDsQAuaEsshohhhuhhu.GsNhlhHuuGhL-uhlssShEKhlhDs-hlshhp..+hh.cG..l..ph..s.........-..c..s..l..uhD.sI.c.c.V.......G..P.s.GHaLuptHT...ppaps.s.a.ahP.tlh.Dpp..shcp...Wp.tt.G.t.hshhp+AptthcchLss....a...p......P...l....DsslcctLp....thl....................................................................... 0 84 140 150 +6082 PF06254 DUF1019 Protein of unknown function (DUF1019) Finn RD anon Pfam-B_9681 (release 9.0) Family Family of uncharacterised proteins found in Proteobacteria. 19.60 19.60 21.50 20.70 19.20 18.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.17 0.72 -3.82 12 911 2009-01-15 18:05:59 2003-06-02 17:45:42 6 1 343 4 26 400 0 86.30 43 56.64 CHANGED phs-........slspNRQKlF.RWLcs..DopttR-plptLhPAIhusL...PhEh+sRlps.sshh.hlspht.KEhs-Appulhh......hh+-ls-tIssh .............................hPc..ALupNpQKIF.RWlcp..DT.DthEKhpALlPAI.cAh..............P.h..Lhs+...hpstpShha........+Els-s+-pl...........l+-sD-hlts................................................................................ 0 2 7 12 +6083 PF06255 DUF1020 Protein of unknown function (DUF1020) Moxon SJ anon Pfam-B_11136 (release 9.0) Family This family consists of several MafB proteins from Neisseria meningitidis and Neisseria gonorrhoeae. The function of this family is unknown. 25.00 25.00 46.30 41.60 24.20 24.10 hmmbuild --amino -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.92 0.70 -5.23 15 288 2009-01-15 18:05:59 2003-06-02 17:46:46 8 13 49 0 16 255 0 227.60 44 62.34 CHANGED DuFIRDshQRQHYEPGGKYHLFGssRGSVu-RoGpIslhpspoHplGNLLI...QQAuIcGsIGYpsRFSGHGHEVHSPFDNHAS+SsSDEuGuVD-GFTVYRLHW-GaEHHPADGYDGPQGGsYPtPsGARDIYSYalKGTApsIKLNhTDsRShuQRlhDcasNAGSsFosRADEAsK+haEasPcLDRhGNuMEslNGlAsGAlNPFluAGEulGlGDIlpGsspulshAA.......M+sLGsLSsEuKhAsIssLuDsAthcKsutsAl+cWAspNPNAApTlEAllNVh.AAhslhth.................K+spLsssAhPhGKuAVSssFutu....YpsP.tuRs ............DshlppshptpHaEPsGKYHLFGs.RGpltpRshtlthhp..s..huslh....ppsshpG.lGY.s+FSsHGaE.HuPFDpHsucSsSc.tGssstG..FolYplphpG.EhHPtDuYDGPpGusYP.PtGARD.YoY.lpGsupph+hN...........ssptshtpRh.cp.tshuu.h.ptss-u.+hhhc.ssphsh.uNtM-.lpGls.tGAhNPhlsuhphlGhGshh.uh..s.s.ss.......hpslu.LssEsphAhhs.Lts.A.h.cpshpuscpWhptpPNhstTspsshslh.AA........................KhspLs.sths......sKsss....................h............................................ 0 5 5 5 +6084 PF06256 Nucleo_LEF-12 Nucleopolyhedrovirus LEF-12 protein Moxon SJ anon Pfam-B_11198 (release 9.0) Family This family consists of several Nucleopolyhedrovirus late expression factor-12 (LEF-12) proteins. The function of this family is unknown [1,2]. 25.00 25.00 141.00 121.10 20.30 20.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.15 0.71 -4.51 12 26 2009-09-11 05:10:50 2003-06-02 17:51:17 6 1 25 0 0 23 0 176.70 38 87.59 CHANGED spcpFppRLsYVssIsshMpcTLchMshpGthocsDAsTLClADDTAAWlCGRscsss.FsSFRl+hut..F.ppsspsLp+FhFEESLtQ+h.....tshs+YTYMNYsl.hcs.lAIKLsVhht..chcsD........sLsYFVshtsupspshs.........................csssshphpLs.......hEchlEEhlspussp ........pppFspRLpaVssIsshh+csl-hMsppsthopsDAsoLClADDTAAWlCGRlssss.FVSFRl+hst..F..c+sspsLc+FtFEESLtQph.....pshs+YsYMNYol..pshlAlKLlVahc..shpss........sLsYFlpststpspppsp.h.+....................cpss.hphsLsh......hEphlE-shsptlV........................ 0 0 0 0 +6085 PF06257 DUF1021 Protein of unknown function (DUF1021) Moxon SJ anon Pfam-B_11556 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 26.30 30.20 20.80 19.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.51 0.72 -4.29 44 1201 2009-01-15 18:05:59 2003-06-03 09:26:38 6 1 1190 2 152 402 3 76.60 46 88.55 CHANGED hspsLssIKpsl-s+lGc+lpl+AssGRKKhhc+pGlLccTYPulFVVc..LDpp......psshcRVSYSYoDlLTcsVplpa ...............sppltcIKppl-sHlGpplhlpApsGRK+p..hpRpGlLtETYPSlFlV-..L-ps......psshp+VSYoYoDlLTEslplpa.... 0 56 98 125 +6086 PF06258 Mito_fiss_Elm1 DUF1022; Mitochondrial fission ELM1 Moxon SJ, Eberhardt R anon Pfam-B_10595 (release 9.0) Family In plants, this family is involved in mitochondrial fission. It binds to dynamin-related proteins and plays a role in their relocation from the cytosol to mitochondrial fission sites [1]. Its function in bacteria is unknown. 21.80 21.80 21.90 23.40 21.20 20.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.77 0.70 -5.48 31 342 2012-10-03 16:42:30 2003-06-03 09:28:41 6 2 271 0 131 321 1802 296.30 25 87.47 CHANGED sGpcsQshGLucALuh................................shphpplthpp...................................h.th..h.ssshs...shPcLlIuuG+pTthhshhl++htsst.hsVtlhcP+hs.hppFDLllsPpHD......................pl.sssNV.....lhohGulptlssttltpstt.....ht...+splslLlGG.soppaphssp.htplssplpslhppts.h.plhlTsSRRTPssstshLpphht.....hstl.la..stss....sNPahshLuhADslhVTsDSsSMlsEAsuoG.sPVhlhsls..s.pps.+hschhspLhppGhl+sasuh..hpt..ht.htPLs-s.....s+sApplhpchth ......................................................................................................sGppsQshuLsptLsh..................................................................................................hp..h..hplphpt............................................................thh........s.hh......sht...t....h...th.htt..hpt.....PslllusGpps.t.hshhl+....+....hh..ts.hhslplh.....c......P.phs....hstFDhllsPpHD.............................................th.tssNVlhhhGulptlssttlttst.th.....t...h.....h.................psh.l.slllGG.ssp..p.apaspp.htplhptl.pplhpsps..hslhlosSRRTPtps.tshlpptht........t..s..pl.la......stss.....N.P.ah.u.hLuhA-tlllTsDSlSMloEAsuoG.pPVhlh..shs.....t...pps...+hpp.hhppLhpp.thhp..h.s.t...h.t.........h.s..lp-stchAt.lht....t.................................................................... 0 45 94 109 +6087 PF06259 Abhydrolase_8 DUF1023; Alpha/beta hydrolase Finn RD, Bateman A anon Pfam-B_9720 (release 9.0) Family Members of this family are predicted to have an alpha/beta hydrolase fold. They contain a predicted Ser-His-Asp catalytic triad, in which the serine is likely to act as a nucleophile [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.30 0.71 -4.78 6 556 2012-10-03 11:45:05 2003-06-03 09:31:10 7 6 239 0 146 545 20 174.30 31 36.72 CHANGED DPtspshAApssGDl-sA-+VSVsVPGlsoss...ppohsshhttstthttcshstpptAu.ss..ssAsIAWhGYpsP........ussuhsssTsspAcsGAscLsthlcsLpAso..sstcloLFsHSYGSlVsuLAhcs.....ss.VsDlVlhGSPGhcAsssApLGs-.u+sasMpus-DaIppl.Pplus .....................................................................................tpshAslulG.ss.D.s.A.cc.VuVh.VPGssosl...................pssh..s..s...h.p.p.s.ts...h..t..........t....p............h.....hp..tp.t..s.u....ss...........ssAsIAWhGYcs.P.......................ss.s.h.hp..s.s.ss.s.h.A.....csG.AspL....sp.Fhpu...L................p.....u............s.......p.....ss.......s...s..c.............lTllGHSYG.S.l.lsu.h.A.hpp.................s.h.t.ss-.l.V.h..h..G...S.P..G..h..............s....................s...s........s..........s......u.......p......L.sh..s...............u.+s..as.hp.u..s..s.D.lt.......s.............................................. 0 60 119 141 +6088 PF06260 DUF1024 Protein of unknown function (DUF1024) Moxon SJ anon Pfam-B_11584 (release 9.0) Family This family consists of several hypothetical Staphylococcus aureus and Staphylococcus aureus phage phi proteins. The function of this family is unknown. 21.20 21.20 22.30 22.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.91 0.72 -3.83 3 370 2009-09-11 11:00:23 2003-06-03 09:32:37 7 1 210 0 4 184 9 79.50 82 96.81 CHANGED MNNREQIEQSVISASAYNGNDTEGLLKEIEDVYKKAQAFDEILEGlsNAhQ-ulKEsItLDEAlGIMsuQVlYEYEEEQENE .......MNNREQIEQSVISASAYNGNDTEGLLKEIEDVYKKAQAFDEILEGlsNAhQculKEsItLDEAVGIMsGQVVYKYEEEQEN-................................ 0 3 3 4 +6089 PF06261 LktC Actinobacillus actinomycetemcomitans leukotoxin activator LktC Moxon SJ anon Pfam-B_11552 (release 9.0) Family This family consists of several Actinobacillus actinomycetemcomitans leukotoxin activator (LktC) proteins. Actinobacillus actinomycetemcomitans is a Gram-negative bacterium that has been implicated in the etiology of several forms of periodontitis, especially localised juvenile periodontitis. LktC along with LktB and LktD are thought to be required for activation and localisation of the leukotoxin [1]. 25.00 25.00 27.10 140.60 22.50 20.80 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.99 0.71 -4.50 2 18 2009-01-15 18:05:59 2003-06-03 09:42:17 6 1 11 0 0 10 0 139.60 91 99.45 CHANGED MSuTEYAPFYLRFIQFPsNEV.LhEYWpLVQN.VpKlp+ITVhhAQIlGhhuEKsIWKYQuTFNDGhLEsEVAK.-VSLTLRNSALLVASAIVIHFKSNFTNLLILSQITQYppahPh.KKSKY.PLYhSCLLRRRLTEFKITLLPLPWG MSGTEYAPFYLRFIQFPSNEVLLYEYWKLVQNFVQKVSKITVRLAQIVGILGEKTIWKYQSTFNDGMLEGEsAKQEVSLTLRNSALLVASAIVIHFKSNFTNLLILSQITQYCRHRPKPKKSKYFPLYLSCLLRRRLTEFKITLLPLPWG...... 0 0 0 0 +6090 PF06262 DUF1025 Possibl zinc metallo-peptidase Finn RD anon Pfam-B_9726 (release 9.0) Family This is possibly a family of bacterial zinc metallo-peptidases. Although they carry the HExxHxxGxxD motif, they are missing a final methionine which would class them as Met-zincins. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.09 0.72 -4.20 78 911 2009-10-19 15:30:34 2003-06-03 09:47:56 6 14 671 2 290 712 69 87.30 28 61.27 CHANGED hssVslhV-Dh..Pstt.hsphtht...pLLGLYcGlsLo-+ush...uthP..DpIhLaRcPlLchh......s-cs-..Ls-.VpcsllHEIuHHFGlSD--lct..lsh ..................................................tl.h.ltt...s.....................................lhG.ap.Ghslsp...ps........ssthP..s+IslYRcPl.phs.........ssc--..Ls-..hVpcsllHEluHaaG...ls--clctht......... 0 93 203 253 +6093 PF06265 DUF1027 Protein of unknown function (DUF1027) Moxon SJ anon Pfam-B_11526 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 81.90 81.60 23.40 22.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.93 0.72 -4.11 32 1012 2009-01-15 18:05:59 2003-06-03 10:12:50 6 1 1008 1 97 433 0 85.50 54 52.69 CHANGED lplsshpacllcNh+-uF-tEthtpRaS-lLsKYDYIVGDWGh-QLRL+GFYcDsspps.hDp+IosLpDYLhEYCNFGCAYFVLc .....lplsshtacLlcNa+-uFc.EpFtpRaS-l.LsKYDYIVGDWGh-QLRL+GFYcDsspcs...hsp+ISsLpDYLhEYCNFGCAYFVLc......... 0 20 49 73 +6094 PF06266 HrpF HrpF protein Moxon SJ anon Pfam-B_11646 (release 9.0) Family The species Pseudomonas syringae encompasses plant pathogens with differing host specificities and corresponding pathovar designations. P. syringae requires the Hrp (type III protein secretion) system, encoded by a 25-kb cluster of hrp and hrc genes, in order to elicit the hypersensitive response (HR) in nonhosts or to be pathogenic in hosts. The exact function of HrpF is unknown but the protein is needed for pathogenicity [1]. 21.70 21.70 22.60 23.50 21.30 20.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.39 0.72 -3.85 7 64 2009-01-15 18:05:59 2003-06-03 10:41:15 7 1 59 0 13 36 0 73.40 46 98.66 CHANGED M.ShsuLQRRLDsshpcApsplD-AshpuuEu.ho.sD.aAFhEAuhphusAShAssQpLpsKHGLsKAlIstIp .....MhShpuLQppLDoshsRAposlDDhsLsuupG.hot-DhpAFsEASpphusAS.AAsQphpstHtlsKslI-uhp.. 0 0 3 8 +6095 PF06267 DUF1028 Family of unknown function (DUF1028) Finn RD, Bateman A anon Pfam-B_9747 (release 9.0) Family Family of bacterial and archaeal proteins with unknown function. Some members are associated with a C-terminal peptidoglycan binding domain. So perhaps this could be an enzyme involved in peptidoglycan metabolism. 25.20 25.20 40.70 32.70 22.70 22.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.13 0.71 -4.87 78 406 2009-01-15 18:05:59 2003-06-03 10:49:56 7 10 357 2 175 408 353 190.90 39 75.84 CHANGED TFSIlA+sspTGphGlAluSs.lAV....Guh.lsas+AGVGAVATQu.hssPth.GspuLchLppG.hsAppslpplhssD..sttchRQlsllDspG....psAsaTGspshsh...tuphsGps..hustGNhLssspVlpAMspuFpsss..................st...Lu-RLlsALcAGpsAGGDtR....u..hpSAALhVss....ptsa..........shlDLRVDhpsc..P...ls-L ...................TFSIlA+..ss.......c...oG.......p..hGlAluSp.hAV.....GAh.sPascAGVGAVATQs.h...ssPsh..Gs...puL..chLc.p......G.....hssppuLctllssD.............sttphRQlsllDspG...............psAsaTGpcs...hs..h.............tuth..sG..c....s.................hsstGNhLuutpVlpAMspuFpsss.....................ut....Lu-RLlsALpA...G.sAG.G-tp......s...hpSAA..LhVls...ptsa...........sh.lDLRVD.pps...PlscL................. 0 57 107 144 +6096 PF06268 Fascin Fascin domain Moxon SJ, Bateman A, Finn RD anon Pfam-B_11660 (release 9.0) Domain This family consists of several eukaryotic fascin or singed proteins. The fascins are a structurally unique and evolutionarily conserved group of actin cross-linking proteins. Fascins function in the organisation of two major forms of actin-based structures: dynamic, cortical cell protrusions and cytoplasmic microfilament bundles. The cortical structures, which include filopodia, spikes, lamellipodial ribs, oocyte microvilli and the dendrites of dendritic cells, have roles in cell-matrix adhesion, cell interactions and cell migration, whereas the cytoplasmic actin bundles appear to participate in cell architecture [1].\ Dictyostelium hisactophilin, another actin-binding protein, is a submembranous pH sensor that signals slight changes of the H+ concentration to actin by inducing actin polymerisation and binding to microfilaments only at pH values below seven [1]. Members of this family are histidine rich, typically contain the repeated motif of HHXH [3]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.38 0.72 -3.97 23 817 2012-10-02 19:42:32 2003-06-03 12:37:00 8 24 166 30 411 843 24 107.60 21 63.95 CHANGED ssta.ssc+hsscLsAstts.psh...........plapLphpss...shs..L+.opsG+YLshtscGpltspsp....tssstFplchp....u+hshhptspG+YLs..ussupLpu.pusssupsELasl ............................................................................................thh.sspp..t.h.p.lsustt..p.tph...........ph.apLp..hssp..................shth...l.....+..o.p..sG+..Ylsh.ts.cG.s..ltssss.........ssssh...F.p.lphp...........sph...s..h..h..ptssG+YLs...hss.sG...pLt.u.pupssu..tsEhath......................... 0 103 156 257 +6097 PF06269 DUF1029 Protein of unknown function (DUF1029) Moxon SJ anon Pfam-B_11672 (release 9.0) Family This family consists of several short Chordopoxvirus proteins of unknown function. 25.00 25.00 63.20 62.70 17.20 16.10 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.75 0.72 -3.96 11 43 2009-01-15 18:05:59 2003-06-03 12:41:20 7 1 37 0 0 21 0 53.00 63 99.48 CHANGED MITNYEPlILlsIlshullsNhplSpKsKlDIIFhlpollFhWFlFHFlHSVh MISNYEPLlLLsIhCssLLhNFslSSKTKIDIIFslQoIVFhWFIFHFVHSsI.......... 0 0 0 0 +6098 PF06270 DUF1030 Protein of unknown function (DUF1030) Moxon SJ anon Pfam-B_11673 (release 9.0) Family This family consists of several short Circovirus proteins of unknown function. 25.00 25.00 128.20 127.60 16.20 14.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.85 0.72 -4.43 2 10 2009-09-11 20:47:49 2003-06-03 12:43:03 6 1 6 0 0 7 0 53.00 95 88.33 CHANGED MVFIIHLGFKWGVFKIKFSELYIHGYTDIVVLVVaTVFERSAEAYhVaISpuL MVFIIHLGFKWGVFKIKFSELYIHGYTDIVVLVVFTVFERSAEAYVVHISRGL 0 0 0 0 +6099 PF06271 RDD RDD family Bateman A anon Pfam-B_1111 (release 9.0) Family This family of proteins contain three highly conserved amino acids: one arginine and two aspartates, hence the name of RDD family. This region contains two predicted transmembrane regions. The arginine occurs at the N terminus of the first helix and the first aspartate occurs in the middle of this helix. The molecular function of this region is unknown. However this region may be involved in transport of an as yet unknown set of ligands (Bateman A pers. obs.). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.86 0.71 -4.28 114 4557 2009-09-14 12:06:15 2003-06-03 12:48:13 7 44 2499 0 1260 3598 868 135.70 17 61.94 CHANGED hushhpR.hhAhll.Dhllhhhlhhlhshhhhh.............................................hhhhhhhhhhhhl.........hhhhahhhhhth..tGpT.lGKhlhsl+lls..............psupth....sht..pshlRthh....................................................h.hhhhhhhhlshlhhhh......spc+..p..slHDhlu.s .............................................................sshhpR.hhAhll.Dhhl..lh.h.l.h.h.h.h...hhhhhh..................................................................................................................h.h.h.h.h.h..h..h.h.h...h.hl............h.h.h.hah..h..h..hhth......tGpTlG+thhtl+lls............................tsu.p.p.l.......shh.......p.sl...l.Rthh.........................................................h.hh..h.h..h..h.h..h..l..sh..lhshh...........s.tc+.....p.shtDhhu.................................................................... 0 414 844 1088 +6101 PF06273 eIF-4B Plant_eIF4B; Plant specific eukaryotic initiation factor 4B Moxon SJ anon Pfam-B_11679 (release 9.0) Family This family consists of several plant specific eukaryotic initiation factor 4B proteins. 28.00 28.00 28.40 28.20 27.90 27.90 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -13.00 0.70 -5.74 6 148 2009-01-15 18:05:59 2003-06-03 12:55:18 6 5 33 0 89 145 2 231.40 23 78.73 CHANGED MS...KPWGGl...GAWAh-AERAE-EptppA......sA.......tAsAsotsFPSL+EAAsupu....p+KhKphoLSEFphssYsts.........potGLTpp-hh.LPTGPRQRSt-Ehp.uRLGGGFpSY...t.........................GGGRpsYuGGFDsDpRs.tsRsSDhspsS..................................RAD-lDDWu+sKK..PhPSh-ptcpu.RYsu.......................hGGGGGG............uht.......Ghts.stusDsDpWuRuc..ss.........hscsG..................cERPRLlLpP.Kt-ssuo.s.Pss..tpS+PuPFGAARPRE-lLAEKGLDW+Kh-o-IEtK.....TSRPoSupSSRPuSApSspstSsh.p....Gs-sss+sRPKVNPFGDAKPREVlLpE+GKDWRKIDhELEH...RcV-RP...ETEEEK.LKEEIppL+tcLccppuh..p.ttstp-.......ptls-hIpphE+pL-hLshELDsKVRF........G.+.hpRPGSGAuRsusaspps.scupsh-sp..ht.M-RPRSRGssDshs+PsD-.RpuFQGS+..ERG ......................................................................................................................................................................................................................................................................................t....................................pu-..tpW...+............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 19 56 75 +6103 PF06275 DUF1031 Protein of unknown function (DUF1031) Moxon SJ anon Pfam-B_11618 (release 9.0) Family This family consists of several Lactococcus lactis bacteriophage and Lactococcus lactis proteins of unknown function. 19.90 19.90 19.90 119.10 19.30 19.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.66 0.72 -3.94 2 29 2009-01-15 18:05:59 2003-06-03 13:04:35 6 1 24 0 3 21 0 79.00 63 97.37 CHANGED Msthph.shhKL.sLAhpsphs.satp.SVhIosRTKtsHELsplYhDIpshaNpuKpMpWspLaphh.KpLTcsh.l.. .MlKTNFlTLKKLaGLA+NNNasss+K-LSVKISGRTKaNHELSQLYLDICNKYNHSKQMKWp-LYKIL.cELTcshtI-.. 1 1 1 1 +6104 PF06276 FhuF Ferric iron reductase FhuF-like transporter Moxon SJ, Bateman A anon Pfam-B_11690 (release 9.0) Family This family consists of several bacterial ferric iron reductase protein (FhuF) sequences.\ FhuF is involved in the reduction of ferric iron in cytoplasmic ferrioxamine B [1]. This family also includes the IucA and IucC proteins. 20.80 20.80 20.90 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.83 0.71 -4.28 156 3190 2009-01-15 18:05:59 2003-06-03 13:08:12 7 11 1404 17 525 1984 20 162.10 18 31.46 CHANGED ptWhppYhphhlsPllthh..hpaGlsh.......psHhpNhllhhpst.GhPsp..hhh+Dhtsthhh.st.......h.............t.tt..hh.....p.hhpphhhslhhsplt.lhstLsp........stlscpthWphltphlpthhpphsphttt............hhtthhhtsslstcs.......................Rhtstsptts ....................................................................................tWhppYhphhl..Pllthh...hpaGluL..........-sHh.QNhllth.......cp.........G.....hssp....hhl.+Dhpst.hh.hhpt......................................................................t.ps...hhht....p.hhpphhtsl.....l.spLttlltslsp.......t..ssls...EphlWphltphlpphhsp.hsphssp....................hthhc.h.h.t..p.hshct....................th.......t.................................................................... 0 119 287 418 +6105 PF06277 EutA Ethanolamine utilisation protein EutA Moxon SJ anon Pfam-B_11716 (release 9.0) Family This family consists of several bacterial EutA ethanolamine utilisation proteins. The EutA protein is thought to protect the lyase (EutBC) from inhibition by CNB12 [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 473 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.58 0.70 -5.89 19 699 2012-10-02 23:34:14 2003-06-03 13:13:58 6 2 662 0 49 1989 1083 442.30 60 97.76 CHANGED cp.lhSVGIDIGToTTQLlFS+LplcNhAushsVPRlpIl-KEllY+SslhFTPLhspstIDsctl+pllcppYcpAGIs.psIcTGAVIITGETARKENAppllpsLSuhAGDFVVATAGPcLESIIAG+GuGAtphSc-+pssVlNlDIGGGToNlAlFcpGclhsTuCLclGGRLI+hDs.stclsYIuPthcpllcchulp..lplGspsshpplpplsctMsphLhpslshp.hoshhphLhhsc............slss..stplcs.....loFSGGVA-tlYpptpt....-.FcaGDlGhlLGpAl+psshhp..phplhpssETIRATVlGAGsHTsplSGSTIhhs.p-.hLPl+NlPVl+hst--ct.s..pslspulppplshFcl..csppptlAlAhsGptsssatplpslApulhpuhpphhppphPLllVl-pDhuKVLGpsLhshLshptsllCIDulplpsGDYIDIGpPlhsGpVlPVllKTLlFs ..................................................................................................................................+.pLLSVGIDIGTTTTQVIFS+..LELs.N......cAuVSpVPRaEhIcR-IsapSPVaF..T...PlccQutlcpscLKsLIL..c..QYp.uAGI.sP.EuVDSGA.lIITGESAKp..+..NARssVhALSpSLGDFV..VASAG.P.H.L...ES.V.I.A..G.+GA.G..AQ.o...L..S..E..Q..R..h..C.R.VLNIDIGGGTuNYALFD.AGKlSGTACLNVGG.....R.....L..........LE.....TD......u....QGR....V.......V...Y.....A.....H..p..P..G..Q...h...I....l.....D..E.....s...F...G...u.......G....T...-....u....+....u....L...o....u.u....Q..L......s......Q..V........s....R..R.....M......A.....p.....L........IV.....EVIsG...s.......L...S....PL....AQ...u.L...M..Q..T.s............LLPA....sls...P.El.....ITL.S.GGVGECYRpQPA.......DPFCFuDIGPLLATALH-HPR..LR......EMNVQFPAQTVRATVIGAGAHTL..SLSGSTIWLE..sV.pLPLRNLPVslPh--.......................pDLssAWpQALh...Q.LDL..-PcTDsYVLALPAoL.PVRYAAlLTVIsALlcFVA.Ra..PNP+.PLLVVAtQDFGKALGMLLRPQL.......Q.L.PLAVIDEVhVRuGDYIDIGTPLFGGSVVPVTVKSLAFP.......................................................................................................................................................................... 1 24 36 43 +6106 PF06278 DUF1032 Protein of unknown function (DUF1032) Moxon SJ anon Pfam-B_11729 (release 9.0) Family This family consists of several conserved eukaryotic proteins of unknown function. 23.70 23.70 25.10 24.10 22.30 23.30 hmmbuild -o /dev/null HMM SEED 565 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.85 0.70 -6.03 4 230 2009-09-11 13:33:15 2003-06-03 13:15:43 6 10 116 0 163 219 1 251.70 20 56.13 CHANGED MNFhEAALLIQGSAClYSKKVEYLYSLVYQALDFISsK++s+QhousspDGspcsss..APppsEp-FhuLDDlssscs....NVDLKcDphssslhIlPLhP.ALVsP-EhEKpssPLhSppGElLASRKDFRhNTChPpPpGsFhL-.lGhs.hpslp.h........httsQ+-.scuEppPMEl..S.susPs.s.phSpE...tsEussssGGh....-E-A-sGAE........thtlEssE.hppcpushpsRtYhLR-R.ptssEssupLpETPDPWpsLDPF-Sh.EsKPF+KGKsYoVPsGl-E.hsupKRKRKGssKLQDFtpWa.sAah-ts-ucRuRRKGPTFADMEVLYWKHlKEpLcs.RphpRRcusp.....paLsctEptlhPlEE-RlEDslE....s.GsADDa.-sE.....-hsE.stus.spEsA..Dl-s.sh.spLpYEELV+RNV-LFlssS..........QKasQETtLSpRlR-WEDsIpPhLQEQEpHssFDIHpYGDplsupaSplspWpsFApLVAGp.saEVCR.MLASLQLANDYTVEloQpPGL-suVDTMuLRLLTpQRAHpRFpTYsAPShsQ. ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................lttWp..h...L.......p..p..apht..sp.hht.h.........................................................................h......................................hsp.h.s.L.h.................................................................................................................................................................................................... 0 66 97 134 +6107 PF06279 DUF1033 Protein of unknown function (DUF1033) Moxon SJ anon Pfam-B_11732 (release 9.0) Family This family consists of several hypothetical bacterial proteins. Many of the sequences in this family are annotated as putative DNA binding proteins but the function of this family is unknown. 25.00 25.00 62.50 62.30 23.20 21.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.84 0.71 -4.29 16 683 2009-01-15 18:05:59 2003-06-03 13:18:02 6 1 679 0 40 200 0 112.80 45 97.96 CHANGED MYpVlchhGDaEPWWFh-sWp-DIlppppF-sap-AhpaYpphW.chcppaspapo+pshhsAFWNpsDpcWCE-C-EsLQpY+SlhLLpspcsh.s..phh.thtppst....t..Cplpt ..MapVlcMaGDaEPWWFh-sWpEDIVppppF-sa.DAhKaYpphW.chcppas.Ypo+pshhssFWsspDp+WCE-C-E.LQpY+SLhLLpsp.pVl.s.ppht.hhphpst.....t...Cph..h............... 0 6 15 30 +6108 PF06280 DUF1034 Fn3-like domain (DUF1034) Yeats C, Moxon SJ anon ADDA_1269 Domain This family consists of several domains of unknown function which are present in several bacterial and plant peptidases. This domain is found in conjunction with Pfam:PF00082, Pfam:PF02225 and is often found with Pfam:PF00746. This domain has a structure similar to an Fn3 domain [1]. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.68 0.71 -3.66 47 863 2012-10-03 16:25:20 2003-06-03 13:19:24 7 76 562 3 234 844 9 117.40 23 8.11 CHANGED lsLpph.ssp..hsFslsl+NhuspshoYplsst.....shTch.....s..ppGhhhtstts.t....................shshsssslTVsAsuotslslslss.sthtt....................pshalEGalphpsscss..............plolPYh .....................................................lsLtpl.scp...hpaslpl+N.hus...p..s..hTaplssp........shT-t.............s...pps.p.h..stp..t.hs..................................shshs.tsslTlsAsuo.tslssslsssp.s.hsp.......................sh.alE..G.FlpFpuspst..............slolPah...................................................... 0 57 142 195 +6109 PF06281 DUF1035 Protein of unknown function (DUF1035) Moxon SJ anon Pfam-B_11733 (release 9.0) Family This family consists of several Sulfolobus and Sulfolobus virus proteins of unknown function. 25.00 25.00 46.10 43.70 21.30 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.67 0.72 -4.19 8 29 2009-09-11 12:40:51 2003-06-03 13:21:24 7 1 15 0 1 33 0 63.20 65 61.94 CHANGED VLFsPIlSpVN.LTTstoaTTh...SGTl..TposhlsNPQYVGSSNATlVsLVPlFYLLVLIIVPAVluY+lYKpE VLhssIhSpVNsLT...........SGTs..........PphsGo.NATLlsLVPLFYlLVLIIVPAVlAYKIYK..c.. 0 0 0 1 +6110 PF06282 DUF1036 Protein of unknown function (DUF1036) Moxon SJ anon Pfam-B_11760 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.10 21.10 21.50 21.40 20.70 18.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.72 0.71 -3.77 23 269 2009-01-15 18:05:59 2003-06-03 13:24:19 6 4 205 0 101 224 15 113.00 37 65.94 CHANGED sApA-aclCNpTsshVslAlGY+sss..uWsoEGWWplssssCpsl.lpGsLssRaYYlYApDss....tts....pWsGchphCsp-.scFsI..tGsp-ChsRGapcsGFtElDTup.ppsWTlpLT- ...........................ApADh+lCNsTpshVulAlGY..+s.ts......uWsTEGWWplss.usCcTl...lcGs.L.s..uRYYYlYAcDsp.............tuu........pWsGp..s.phClt....-...pcF.pI...pGsp...D...ChuRGap+sGFtEhDTuc.ptsWhlpLo............................................ 0 25 55 70 +6111 PF06283 ThuA DUF1037; Trehalose utilisation Moxon SJ anon Pfam-B_11803 (release 9.0) Family This family consists of several bacterial ThuA like proteins. ThuA appears to be involved in utilisation of trehalose [1]. The thuA and thuB genes form part of the trehalose/sucrose transport operon thuEFGKAB, which is located on the pSymB megaplasmid. The thuA and thuB genes are induced in vitro by trehalose but not by sucrose and the extent of its induction depends on the concentration of trehalose available in the medium [2]. 31.90 31.90 32.10 31.90 30.90 31.80 hmmbuild -o /dev/null --hand HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.44 0.70 -4.63 91 1197 2012-10-03 00:28:14 2003-06-03 13:27:10 6 87 601 10 469 1171 683 219.50 23 52.37 CHANGED +lllascs.tthpp....h..h..............shtssltphhpt....stthpssshp-..tpshsc.....pp..........LsphDlllhhsst...t..............sc.phtpslpcaVt.sG.tGllslHsuthsth............p..........apc.lhG................................................................................................................................................uphphHs...........stpphplh..lhsssHPlspGl...s.pt.....................aph.p-EhYshph.hPpss....pVLhoh..............................tsGcsashuashph..............................GtGRlFYpshGH......p..........sh.sa.pssshpphlhsulpWAs ....................................................................................hlhtt....tt..pt....p...h..............shtttltp.hhp...............ttsh..p..s..s..s..t...p..s....p..s...hsp............................pp..........L.s..p.hDl.llhhsst........tpt.........l.........................ss..pth....pt.....l....tph....lp.sG.tGllslH.u.s....hs..sh............................p..................app..lhG..................................................................................................................................................u..t.h....pt....Hs...............shpphplh........lh...s..s.....s..HPlsp..Gl.......pp.................................aph..p-E.hYs..hph...sPp...t..............hpl..Lhshp..................................................tsup.s..aP.....huWsh.ph..............................GtGRlFYsshGH.........................p................st..sa.p.ssshpphlhsulpWu................................................................ 0 213 376 431 +6112 PF06284 Cytomega_UL84 Cytomegalo_UL84; Cytomegalovirus UL84 protein Moxon SJ anon Pfam-B_11948 (release 9.0) Family This family consists of several Cytomegalovirus UL84 proteins. The open reading frame UL84 of human cytomegalovirus encodes a multifunctional regulatory protein which is required for viral DNA replication and binds with high affinity to the immediate-early transactivator IE2-p86 [1]. 25.00 25.00 74.40 32.90 18.10 17.80 hmmbuild -o /dev/null HMM SEED 530 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.70 0.70 -6.09 4 46 2012-10-03 01:18:03 2003-06-03 13:30:03 6 2 15 0 0 42 0 419.40 53 97.45 CHANGED MPRAD.TLRNtsR.uRsRKus.............................................................................h.sD-TILTLTDpHclKpPLs++GTY+LIQLHlDhpPp-LQHPFQILLSsPLQL.....................................Es.ss+pDscERGlLCsVuNtDSDIhPshSLFPApsG.CpIlRulIDpQLTQMuIVRLSLNlFAL+IhsPLh+pLPLRRKsspHsALHDCluLHLP-LTFps...............Dts.sscss.TPpLTlpV+pALCWHpsEGGISGPRGLTSRISVRLSEuThpshGPulFGpL.hDs.-SP.DLsLSSLhLYQDslLRFNVThcsuppphPSsP.VSFRLRLRRcTVpRPFFSDsPLPYFlPR+ps..ssGL-VplPY-LoLKsSHhlRIYRRFYGPaLGLFlPHNRQsLpMPVTlWLPRTWLEIoVlssssH...................................lP+soVLGpLYFISSKHTLNRGpLSALTaQVKSuLHspP.......pQLSlLGASluLQDLlPMRlusP..-PpsppQQpssTppspPVTlAMVCsch ......................MPRsD.sLRNtsR.sRsR+s.............................................................................h.tsspTILTLTDpHcl+pPhs+cGTY+LIQLHlDhpPp-LpcPFQILLSTsLQL.....................................Es.ss+pDscERGlLCsVuNpDSDlhPuhSlFPApsG.CpI.lRulIDpQLTcMuIVRLSLNlFAL+llsPLl+pLPLRRK..sscH..TALHDCluLHLP-LTFcs....................s-ts.TspLTlp.V+p.ALCWH..psEGGISGPRGLTSRISsRLS-sThpsLGP....ulFGpL.hDs.-SP.DLsLSSLhLYQDshLRFNV.....T..ssppphPusP.VuFRLRL.RRpTVpRPFFSDsPLPYFlPs+ps..stGL-VplPY-L..oL..KsSHpLRIYRRFYGPaLGlFVPHNR.QsLpMPVTlWLPRoWLEloVlssspp..................................phPRsslLGcLYFISSKHoLNRGpLSAhsHQVKSo.LHups.......p.QLSlLGASIuLpDLlPMRlusP..pPps..pp.ppssTp.spPVTlAMVCsp.h................................................ 0 0 0 0 +6114 PF06286 Coleoptericin Coleoptericin Moxon SJ anon Pfam-B_11996 (release 9.0) Family This family consists of several insect Coleoptericin, Acaloleptin, Holotricin and Rhinocerosin proteins which are all known to be antibacterial proteins [1]. 25.00 25.00 30.70 30.60 23.50 18.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.90 0.71 -4.29 3 16 2009-09-10 22:13:49 2003-06-03 13:36:34 6 2 8 0 2 17 0 111.00 48 95.23 CHANGED MMKLYIlFGLIAlSAAYVVPEtYYEP.YP-sAua-utRlEP..lSPAEL+aspslsDEsEl-.P.hYIRERRSLQPGAPNFPMPGSQLPTSITSNVEKQGPNTAATINAQHKTDRYDVGATWSKVIRGPGRSKPNWSIGGTYRW ......................................................................................................................................RRSLQPGAPNhs..spp.P.h.plossluR.p.s.s.NTpusINlQ+KG-saDhsAGWSKVlRGPsKuKPTWHVGGTYRW. 0 2 2 2 +6115 PF06287 DUF1039 Protein of unknown function (DUF1039) Moxon SJ anon Pfam-B_12079 (release 9.0) Family This family consists of several hypothetical bacterial proteins from Escherichia coli and Citrobacter rodentium. The function of this family is unknown. 25.00 25.00 26.70 26.90 20.10 18.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.07 0.72 -4.08 5 254 2009-01-15 18:05:59 2003-06-03 13:44:08 6 1 241 0 11 57 0 65.90 55 72.96 CHANGED lAAVNHuLhupAHAILNALPplIPDccsptlCEAlLLFGLN-pscAs+tLAs.hssE-AQsLRuLls ..hAGVNHuLhups+AhLsALs.IlPDKKlpLlC.AlLLhGLN-.hcAsKlLuD.hD..-A.hLphLh....... 0 2 5 8 +6116 PF06288 DUF1040 Protein of unknown function (DUF1040) Moxon SJ anon Pfam-B_12122 (release 9.0) Family This family consists of several bacterial YihD proteins of unknown function [1]. 20.80 20.80 21.20 21.30 19.10 20.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.66 0.72 -3.75 15 736 2009-09-11 13:49:48 2003-06-03 13:47:46 8 2 731 1 66 206 2 85.40 78 96.16 CHANGED MKCHRlNELlELLpPtWp.K-P-LsLlphLpKLApEuGapGsLs-LTDDVLIYHLKMRso-KsEhIPGLKKDaEDDFKTALL+ARGl ........MKCKRLNEVIELLQPAWQ.KEPDLNLlQFLQKLAKESGFDGcLsDLTDDILIYHLKMRDSAKDAsIPGLQKDYEEDF..KTALLRARGV......... 0 5 21 45 +6117 PF06289 FlbD Flagellar protein (FlbD) Moxon SJ anon Pfam-B_12586 (release 9.0) Family This family consists of several bacterial FlbD flagellar proteins. The exact function of this family is unknown [1]. 20.60 20.60 20.60 21.40 20.50 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.66 0.72 -4.60 55 409 2009-01-15 18:05:59 2003-06-03 15:07:04 6 1 404 0 147 323 3 59.70 43 81.93 CHANGED MIpLTRLNGc..pFhLNu-hIEpIEssPDTsITLssG+KalVpEss-EVlc+llpa+pcIht ....MIplT+L..NGp.....tFhLNschIEplEp..sPD.TsITLssG+Khl.........VcEos-EVlc+lhpa++clh.h...... 0 75 120 133 +6118 PF06290 PsiB Plasmid SOS inhibition protein (PsiB) Moxon SJ anon Pfam-B_12789 (release 9.0) Family This family consists of several plasmid SOS inhibition protein (PsiB) sequences [1]. 20.60 20.60 21.80 26.90 19.50 18.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.74 0.71 -4.77 5 311 2009-01-15 18:05:59 2003-06-03 15:14:08 6 3 206 4 8 165 0 137.20 79 96.56 CHANGED MKTlLTLsVLpTMoupEhEDaRsAGEDhRRELSHAVMRDLTsPuGWsVNAEYRSEFGGFFPVQlRFTPuH.-+FcLAVCSPGDLSPhWllVFlosGGQPFSVVpshcsasPEtIoHTLuLlAsLDAsGYShASIIolLutEGuQ .....MKTELTLNVLQTMNAQEYEDIRAAGSD.RRELTHAVMRELDAPsNWsMNGEYGSEFGGFFPVQVRFTPAH.ERFHLALCSPGDVSQlWh.LVL.......VNuGGcPFAVVQVQ+tFuPEAlSHoLALAAoLDAQGYSVNDIIHILMAEGGQ............... 0 1 1 5 +6119 PF06291 Lambda_Bor Bor protein Moxon SJ anon Pfam-B_12850 (release 9.0) Family This family consists of several Bacteriophage lambda Bor and Escherichia coli Iss proteins. Expression of bor significantly increases the survival of the Escherichia coli host cell in animal serum. This property is a well known bacterial virulence determinant indeed, bor and its adjacent sequences are highly homologous to the iss serum resistance locus of the plasmid ColV2-K94, which confers virulence in animals. It has been suggested that lysogeny may generally have a role in bacterial survival in animal hosts, and perhaps in pathogenesis [1]. 21.90 21.90 23.10 22.20 21.00 20.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.97 0.72 -10.15 0.72 -4.06 2 335 2012-10-01 23:27:00 2003-06-03 15:22:13 6 3 252 0 24 139 10 94.10 75 96.65 CHANGED MKKMLhusALAhLITGCAQQTFTVtNK.TAVsPKETITHHFFVSsIGQ+Khl.tsKhsGGsENVVKTETQQTFVNuL.GFIThGIYTPhEsRVYCSQ ....................................................MKKMLhus.AL.A.h.LI.TGCAQQTFT..Vt....N..K.PT...A...V..s..PK...E.T..I..TH.HFF..V..S..GI.GQ.cKTVDAAKICGG.A.E.N.VVK.TETQQTFV.NGLLGFI..T..LG.IYTPLEARVYCSQ........... 0 5 10 14 +6120 PF06292 DUF1041 Domain of Unknown Function (DUF1041) Yeats C, Moxon SJ anon ADDA_1192 Domain This family consists of several eukaryotic domains of unknown function. Members of this family are often found in tandem repeats and co-occur with Pfam:PF00168, Pfam:PF00130 and Pfam:PF00169 domains. 20.70 20.70 20.70 21.00 20.20 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.36 0.72 -3.87 13 481 2009-01-15 18:05:59 2003-06-03 16:54:33 12 20 88 0 229 460 0 98.60 43 7.23 CHANGED psVV+cClcsshpssYphlhspsp..hsp..t.....................tphs..ppssss+pL-.hh+LhpLslsllppsccpYu.shpt.Fs..hslhp.pAEshWoLFusDMctALEtp .....................................ppVV+cClctsh.ssYphlhp.s+..hspt.t..........................t..c.hs......cp.sssh+pL-.h.+LhpLh..lpllpps....cppas..............s..........h.........pt.Fs...slht.pAEshWsLFA.DMchALEtp.............. 0 50 65 143 +6121 PF06293 Kdo Lipopolysaccharide kinase (Kdo/WaaP) family Studholme DJ anon Krupa A, Srinivasan N Family These lipopolysaccharide kinases are related to protein kinases Pfam:PF00069. This family includes waaP (rfaP) gene product is required for the addition of phosphate to O-4 of the first heptose residue of the lipopolysaccharide (LPS) inner core region. It has previously been shown that WaaP is necessary for resistance to hydrophobic and polycationic antimicrobials in E. coli and that it is required for virulence in invasive strains of S. enterica [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.23 0.70 -5.26 14 2201 2012-10-02 22:05:25 2003-06-03 18:59:37 9 20 1403 0 513 8874 747 187.20 22 64.55 CHANGED spaWttpucslspspGRtssWFs.tph....pssh++YaRGshhu+ls+D.atas.hspspuhtEFpllccLcctGlsVP+slAushh+huht..apA.lLoEclssspDLsshltp.......tphppthhctlGphItchHpsslsHsDLss+sILlctpt....t..chhlIDh-+sthph..ss+WppcsLspLhRSh........slthscpDattlhpuY ..................................................................................................................................................t.................h.ht..hhhtt..h....t....phhp.s...h...h......h....s......h.........t.......sh..+Eap.hl..cc...L.p...c.....h...G....l...s..s.....s....p..s....l..u....h...t....t...t.....t....h.....s....s.........h....p...u....h...l....l..T..E.......c.l.......s....s.....s....h...s...lps.h..h.t..p........................psp.h..p...p..........t......l...l........c....t...l.u....p..h....l....t.ch...H.t.s...s.lpHs...Dhhl..pplL.l.+tst.............................thchh...lI..Dh...c..+...up......h.....+..h........t....spc...h..p....p...c..D.L..h.tL.h...................h...t..t...h..h....................................................................... 0 135 275 414 +6122 PF06294 DUF1042 Domain of Unknown Function (DUF1042) Yeats C, Coggill PC anon ADDA_2448 Family Spef is a region of sperm flagellar proteins. It probably exerts a role in spermatogenesis in that the protein is expressed predominantly in adult tissue. It is present in the tails of developing and epididymal sperm internal to the fibrous sheath and around the dense outer fibres of the sperm flagellum [1]. The amino-terminal domain (residues 1-110) shows a possible calponin homology (CH) domain; however Spef does not bind actin directly under in vitro conditions, so the function of the amino-terminal calponin-like domain is unclear [1]. Transcription aberrations leading to a truncated protein result in immotile sperm [2]. 25.00 25.00 25.50 25.80 24.70 24.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.90 0.71 -4.64 6 395 2012-10-03 10:10:54 2003-06-04 11:42:13 6 13 119 1 270 374 4 139.10 24 23.64 CHANGED LspWLpp...lspolss+shsRsFSsGhLlAEll++aaPp.lDhpsassusSlphKLsNWupl.pKsLsKLshplscslhcplhsspPGssEhLLapLhthL...p+pcppulsshptpshp.posh+h.slpppshp-ulhphhpp....u..p.lcpstslt ..............................lhpWlpp....lsloh...+sh.p....R.D.F.SsG.hLl.AEllp.pa.h.Pphl-hcsass.u.sShptKlsNWspL.................c.............+s.......Lp+...L...s..h.p..ls.p.....p.hhcsl..h.psp.s.Gsspt....lL.hpLhttl..........ppp...p...t.t....t......................................................................ttt..................................................................... 0 107 137 206 +6123 PF06295 DUF1043 Protein of unknown function (DUF1043) Moxon SJ anon Pfam-B_12007 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 28.80 28.80 29.10 28.90 27.80 28.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.71 0.71 -4.50 5 884 2010-01-13 14:53:39 2003-06-04 14:20:45 7 2 879 0 118 373 38 126.10 54 92.06 CHANGED YAlIGLlVGlIIGhlIsRLTssplKpQpslcKELEsuKtEL-pQRpELpDHFAcSAELL-TLu+DYpKLYQHMAKoSssLLPpLstpsNPFspptt.ps-cSNspE.....lppQPRDYupGAoGLl+spcKc ........................YALIGLVV.GI.IIGA.V...A.MR.F......G.NRK.....L......RQ....Q..p....uLQ.hELE.....K.sKsEL-EYRpELVoHFA+SAELLDTMA+DYRQLYQHMAKSSosL.L..P...E..l.s.s........c.....s.NP.F+.sRLs......E.....S...EA..S....NDQu............PV.ph.PRDYSEG.AS.GLLRsttK.c........................................................... 0 20 48 87 +6124 PF06296 DUF1044 Protein of unknown function (DUF1044) Moxon SJ anon Pfam-B_12045 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.46 0.71 -4.27 36 879 2012-10-03 00:18:00 2003-06-04 14:22:33 7 2 693 0 167 538 38 103.50 27 90.35 CHANGED MRlFKs+hFs+aA++ct.lsDpsLtcAlcchtpGhlDADLGGGVhKKRlucs.GpGKpGGaRollha+t..uc+hFFlaGFAKs-p.u....NIoscEltsh+plApthhshocppLspLlpsttlhEl ............................................................................p..t....h.t....h.......t.h.........t.s...h....t.......s....L...G..G..G.....lhK..hR..l..s..ps..tp........GKpGGhR....sI......h.h.h....ps.....s.p.p.ha.a..h.h.hYuKs-h.s.....sIospEhtsh+phuphh.......................................... 0 34 101 130 +6125 PF06297 PET PET Domain Yeats C, Moxon SJ anon ADDA_3209 Domain This domain is suggested to be involved in protein-protein interactions [1]. The family is found in conjunction with Pfam:PF00412. 25.00 25.00 25.80 25.40 24.50 24.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.37 0.72 -4.38 18 570 2009-01-15 18:05:59 2003-06-04 14:23:54 9 11 111 0 287 491 0 98.40 44 17.57 CHANGED phshpssssoccDssssp..pYsWsPPGL.s.chlppYhphLP--KhPhlGStGt+aRp+QLlaQLPsaDp-sphCcuL.s-cEt+phc.FspppKcEuL.GhGsV+hhs ...............h...hpp.shuccsssh.sh...pYtWsPPGl..p.c....sppYhp.h.L....Pc-KhPhssS.Gt....pa..R..h+QLhpQLPsHDp-sphC.ps.L...s.......-c.E.t.+Ehc.Fsp.ppKcEAL.GhGsV+...s....................... 0 56 76 170 +6126 PF06298 PsbY Photosystem II protein Y (PsbY) Moxon SJ anon Pfam-B_12212 (release 9.0) Family This family consists of several bacterial and plant photosystem II protein Y (PsbY) sequences. PsbY is a manganese-binding protein that has an L-arginine metabolising enzyme activity [1]. 20.40 20.40 23.30 20.90 20.00 20.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.59 0.72 -4.43 22 176 2009-01-15 18:05:59 2003-06-04 14:34:32 6 4 112 0 72 159 18 35.00 45 49.48 CHANGED MDhRlllVlhPIllAuuWAsFNIG+sAlpQlpp.hhp ........D.RlllVlhPll.AhuWsLFNIhpsALsQlp+.h..p..... 0 16 49 63 +6127 PF06299 DUF1045 Protein of unknown function (DUF1045) Moxon SJ anon Pfam-B_12127 (release 9.0) Family This family consists of several hypothetical proteins from Agrobacterium, Rhizobium and Brucella species. The function of this family is unknown. 24.30 22.60 25.60 23.80 24.20 22.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.87 0.71 -4.56 43 281 2012-10-03 21:31:48 2003-06-04 14:48:08 7 3 253 0 93 273 102 158.20 40 65.60 CHANGED TLKAPFpLAsGtstspLhsAspsFAtphsPhslstLtlsplGsFlA..LlPssss..ssLppL...AussVppFDsF...RAPhots-luRR+sstLospQcphLpcWGYPYVh--FRFHMTLTGpL...stsppstlhshLpstasslh..psltlsslALFsEsssGssFpl ..............TlKAPF+LAssto..pscLhAAhtpFAustsPhslstLplpp...l........u....uFhA....LsPspss......stLppLAsssVpsFDtFRAPho.-.A.E.l..ARRcsptLotcQcp.LtcWGYPYVh--FRFHMTLTG.l...stpcps...t..lppsLcphass.h.ls.psltlsslALFsEsptGuPFp.............................. 0 13 48 65 +6128 PF06300 Tsp45I Tsp45I type II restriction enzyme Moxon SJ anon Pfam-B_12151 (release 9.0) Family This family consists of several type II restriction enzymes. 25.00 25.00 25.30 25.00 21.40 20.60 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.78 0.70 -4.94 3 36 2012-10-11 20:44:44 2003-06-04 14:53:36 7 2 22 0 3 35 5 222.00 67 99.39 CHANGED Ms..NhWTcLSIEYANQRSYLDDLFQVYsTIP-SlRoIsEKlWSNVEcAFK....RKDN....LuLIcELLN....LDLFPIKDSYlA...YLK+DKo.....ALE........RNPRTINRICGRLYEMGLNKIaEKsSEPKETNRQIGPMFKcWh+cKSLG................VEP....VDLusFIANE-DAIL..cASDclMtDFAKEaLGYpHc.....KGLDFlARFN+o.....YlIGEAKFLTDFGGHQNAQFNDAIuTIEAc.sl+AIKVAILDGVLYIcupNKMaKulsohYK-aNIMSALVLR-F...LYQ.......L ...................................................M....NhWTchSIEhANQRsYLDcLFpVYPhIP-ulREIDuKlWSNlEYHFK....pKDN....LuLIsELLN....LDLFPIKDSYhA...YLKRDKS.....ALE........RNPRTINRICGRLYEMGLNKIFEKCSEPKETNRQIGPMFKcWlNNKSLG................VEP....VDLNDFIANEsDAIL..+ASDslMuEFAKoHLNY+Hc.....KGLDFlARFNKK.....YIIGEAKFLTDFGGHQNAQFNDAISTI-sP....NlKAIK.VA.ILDGVLYIcSNNKMpKhLs.TpY+NapIMSALVLR-F...LYQI.............................. 0 2 2 3 +6129 PF06301 Lambda_Kil Bacteriophage lambda Kil protein Moxon SJ anon Pfam-B_12201 (release 9.0) Family This family consists of several Bacteriophage lambda Kil protein like sequences from both phages and bacteria. Induction of a lambda prophage causes the death of the host cell even in the absence of phage replication and lytic functions due to expression of the lambda kil gene [1]. 23.70 23.70 23.90 23.80 23.40 21.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -7.95 0.72 -4.53 3 292 2009-01-15 18:05:59 2003-06-04 15:00:54 6 2 219 0 6 103 1 42.10 66 73.92 CHANGED MDQoLMAIQoKFsIAsFIGDcKMaREAV-AaKcWI.h.hhRpK ....MDQsLMAIQoKFsIAsFIGDEKMaREAV-AaKcWI.l.hhRpp........ 0 0 0 1 +6131 PF06303 MatP DUF1047; Organiser of macrodomain of Terminus of chromosome Moxon SJ anon Pfam-B_12243 (release 9.0) Family This family, many of whose members are YcbG, organises the macrodomain Ter of the chromosome of bacteria such as E coli. In these bacteria, insulated macrodomains influence the segregation of sister chromatids and the mobility of chromosomal DNA. Organisation of the Terminus region (Ter) into a macrodomain relies on the presence of a 13 bp motif called matS repeated 23 times in the 800-kb-long domain. MatS sites are the main targets in the E. coli chromosome of YcbG or MatP (macrodomain Ter protein). MatP accumulates in the cell as a discrete focus that co-localises with the Ter macrodomain. The effects of MatP inactivation reveal its role as the main organiser of the Ter macrodomain: in the absence of MatP, DNA is less compacted, the mobility of markers is increased, and segregation of the Ter macrodomain occurs early in the cell cycle. A specific organisational system is required in the Terminus region for bacterial chromosome management during the cell cycle. 20.00 20.00 21.70 21.10 19.50 19.00 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.02 0.71 -4.29 13 737 2009-01-15 18:05:59 2003-06-04 15:36:37 7 1 719 0 67 231 1 145.90 71 97.72 CHANGED MKYQQLENLEsGWKWpYLlKKHREGEsIT+alEpStA-suVppLlplEppPscV.cWIcpcMsPcLcN+hKQuIRA+RKRHFNAE+QHTRKKSIDL-YtVWpRLSthupchGsTLSETIshLl-EAE+KEpYucphSuLKpDL+cLLs .................................MKYQQLENLESGWKWKYLVKKHREGELITRYIEuSAAQEAVchLLuL.....ENEPVhVssWI-cHMNP-LlNRMKQTIRARRKRHFNA.EHQHTRKKSID..LEFhVWQRLAGLAQRRGpTLSETIVQLIEDAEpKEKYAspMSSLKQDLQALL.G.............. 0 5 19 47 +6132 PF06304 DUF1048 Protein of unknown function (DUF1048) Moxon SJ anon Pfam-B_12247 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 29.60 29.60 29.60 30.10 29.10 29.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.21 0.72 -3.71 16 339 2012-10-01 22:34:14 2003-06-04 15:38:59 6 1 278 4 47 204 1 101.10 38 82.38 CHANGED ppsl-pK+Ea+tacsRscsLPcDYptsaccIp+YlaphG.sschtshhtIhssll-LFEcuAA-G+sVt-llG-DVAuFsDpLls-sts..atDKaR-pLNcsls ...................t.Khls-KKEY+thh.tRl.tALPpDYphlaccIppYhWp.Fu..stsGtsM....hplhp-llDLFEpuAA-Gpplh-lsG-DVusFADpLlssst...s...as.cK.+ccLNcpl............... 0 25 39 44 +6133 PF06305 DUF1049 Protein of unknown function (DUF1049) Moxon SJ anon Pfam-B_12262 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild --amino -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.13 0.72 -4.50 84 2233 2009-09-13 05:33:55 2003-06-04 15:41:11 6 3 2122 0 510 1349 240 65.60 24 60.14 CHANGED hpNsphVslsalhupsp..hsLulllhhsFshGhllGh.Lhshhhh..h+h+hphpphp+......plpphppplsp.p ...........................pNs.p.Vshsa..l.h......s...p.sp....h.........sLs...l..l..lh..sshs..hGhllGh.llshhhh.......h+.h+h....pl.tchp+.......clpphppp.....t................................................... 0 138 316 417 +6134 PF06306 CgtA Beta-1,4-N-acetylgalactosaminyltransferase (CgtA) Moxon SJ anon Pfam-B_12320 (release 9.0) Family This family consists of several beta-1,4-N-acetylgalactosaminyltransferase proteins from Campylobacter jejuni [1]. 20.20 20.20 20.30 21.00 19.30 20.00 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.27 0.70 -5.23 2 149 2012-10-03 05:28:31 2003-06-04 15:48:29 6 3 96 0 12 116 1 256.00 46 86.71 CHANGED MLFQSYFVKIICLFIPFRKIRHKIKKTFLLKNIQRDKIDSYLPKKTLlQINKYNNEDLIKLNKAIIGtGHKGYFNYDEKSKDPKSPLNPWAFIRVKNEAITLKASLESILPAIQRGVIGYNDCTDGSEEIILEFCKQYPSFIPIKYPYEIQIQNPKSEENKLYSYYNYVASFIPKDEWLIKIDVDHhYDAKKLYKSFYIP+ppYcVlSYSRlDhha.p-pFalh+sppGpILKtPGDCLhIpNhNLhWKElLI-..sssaKhsTtps..pNh+ShE.LKh+pRIhFpTELNNYHFPFlKpaRtpDhhphNWlSl--FhK.YhpplpppI-.pMlphcTLKKlahpLh ......................................................................................................................t..........HpGaFsaDpcupssKSPLNPWAFIRVKNEulTLcsSLcShLPAIQRGVIGYNDC....sDGS.cEl...ILEFCKpYPS......FIPl.p.Y.....P...........Y...............El.....p.l..pN.....s.p.......S...-......c.N..p.LYs.......YYNYs..hSFIP......psEWlIKIDsDHhYDAKK..L..YK..SFYhs.cpph..csl.YsRlsFhh.stp...h..al.pstp.ht..hh.t..GDphll.s.s..a.E...................psh..shE.Lt.h+p.hhhh..p.sEL.paHFPhhK..Rptsh.ph.shlsl-..-F.hK..atc....ppIp.pMlpcchl.phh.p..h................................................................................................................... 0 6 8 9 +6135 PF06307 Herpes_IR6 Herpesvirus IR6 protein Moxon SJ anon Pfam-B_12322 (release 9.0) Family This family consists of several Herpesvirus IR6 proteins. The equine herpesvirus 1 (EHV-1) IR6 protein forms typical rod-like structures in infected cells, influences virus growth at elevated temperatures, and determines the virulence of EHV-1 Rac strains [1]. 25.00 25.00 105.90 105.80 24.10 17.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.50 0.70 -5.00 4 38 2009-01-15 18:05:59 2003-06-04 15:55:52 7 1 12 0 0 18 0 133.80 66 83.56 CHANGED spustchh.Ns.sthlGhsYFRRCRpEhNEGaaApVPsGYFPVtPSphPshV.Vcu......hsGEsluFpslPsP+h-hRFaKQLpDGTFVRLPF.YP-EhYEsE.pPhtsRaYlpADscssSS....ssPSsLhEphFscVPsult.EthspWsGPK+lPlPscRYVLKhGaE..hp.pVTEDAFp.lsTphLRL-htst..................ss...sAt.s.ptspVpts .ssssu.....sTGPGCEGGLGGWRLFKACRHEQEDGLYAMLPPDYFPVVPSSKPLLVKVPAPGASPDRTGGAVHFECVPAPRRPLQFFRQLYDGT........................................................................................................................................................ 0 0 0 0 +6136 PF06308 ErmC 23S rRNA methylase leader peptide (ErmC) Moxon SJ anon Pfam-B_12332 (release 9.0) Family This family consists of several very short bacterial 23S rRNA methylase leader peptide (ErmC) sequences. ermC confers resistance to macrolide-lincosamide streptogramin B antibiotics by specifying a ribosomal RNA methylase, which results in decreased ribosomal affinity for these antibiotics. ermC expression is induced by exposure to erythromycin [1]. 25.00 25.00 53.40 53.40 17.10 17.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.96 0.72 -7.11 0.72 -3.90 3 48 2009-01-15 18:05:59 2003-06-04 16:01:11 6 1 29 0 4 15 0 27.50 96 98.65 CHANGED MLVFQMR....NVDKTSTVLKQTKNSDYADK MLVFQMR....NVDKTSTVLKQTKNSDYADK 0 1 1 1 +6137 PF06311 NumbF NUMB domain Yeats C, Bateman A anon ADDA_13736 Family This presumed domain is found in the Numb family of proteins adjacent to the PTB domain.. 19.90 19.90 20.60 20.60 19.70 19.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.25 0.72 -3.88 8 177 2009-01-15 18:05:59 2003-06-05 14:09:23 7 4 74 0 80 195 0 86.40 63 15.01 CHANGED sPaAIPRRHAPs-.LhRQGSFRGFsulupp.......SPFKRQLSLRlN-LPSThQRpp................-hpspsPlsEhEst..uts......D.SISuLCsQIosuFS ................PaAIPRRHAPlEQ..LsRQGSFRGFPALSQK.......SPFKRQLS....LRlNELPSThQR+T.........................DF.lKss.V..P..ElE......G-u............-.SISuLCoQIosuFu............................................ 0 16 22 47 +6138 PF06312 Neurexophilin Neurexophilin Moxon SJ anon Pfam-B_12369 (release 9.0) Family This family consists of mammalian neurexophilin proteins. Mammalian brains contain four different neurexophilin proteins. Neurexophilins form a family of related glycoproteins that are proteolytically processed after synthesis and bind to alpha-neurexins. The structure and characteristics of neurexophilins indicate that they function as neuropeptides that may signal via alpha-neurexins [1]. 20.70 20.70 21.00 20.90 20.20 20.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.63 0.70 -5.10 6 508 2012-10-03 16:25:20 2003-06-05 14:41:06 7 8 43 0 338 454 0 177.90 29 47.06 CHANGED Rlht+cssssssshtp.oshtss.........p-aWDhLus.o-lpt........s.sRsKR+Pll.pTGKhKKhFGWGDF.aSNIKTVKLNLLITGKIVDHGNGTFSVYFRHNSTGpGNVSVSLVPPSKlVEF-lpQQhhlEsK-SKs.FNCRlEaEKlD+upKTuLCsaDPSKlCaQ-QTQS+VSWhCSKPFKllC.lYIuFYSsDY..+LVQKVCPDYNYHS-TPYhsSG .....................................................................thht............................................................................................................t...Kha..G.GDF.pup.l.p.o.....s....p.....L....p.....h....h..ss..G+l.s.D.asNGTapV.Fph.hsGps.sl..SVpLV.PScuVph....................................p........ph......h...c.t.p...s+s....Fps.p.h..c...........t.p.....s.p..c.s...p..c..s....s......l...s...................s.......s......s.......p......h...Cphppsps...ptsWhC.KP..h..+..l...C...hl.s..ah.p.st....................................................................................................... 0 100 120 202 +6139 PF06309 Torsin Torsin Moxon SJ anon Pfam-B_12047 (release 9.0) Family This family consists of several eukaryotic torsin proteins. Torsion dystonia is an autosomal dominant movement disorder characterised by involuntary, repetitive muscle contractions and twisted postures. The most severe early-onset form of dystonia has been linked to mutations in the human DYT1 (TOR1A) gene encoding a protein termed torsinA. While causative genetic alterations have been identified, the function of torsin proteins and the molecular mechanism underlying dystonia remain unknown. Phylogenetic analysis of the torsin protein family indicates these proteins share distant sequence similarity with the large and diverse family of (Pfam:PF00004) proteins. It has been suggested that torsins play a role in effectively managing protein folding and that possible breakdown in a neuroprotective mechanism that is, in part, mediated by torsins may be responsible for the neuronal dysfunction associated with dystonia [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.65 0.71 -4.25 8 383 2012-10-05 12:31:09 2003-06-05 14:44:56 6 6 102 0 205 556 182 119.00 39 36.68 CHANGED ChFt-CCs...-scIshshpuL-pDLsspLaGQHLspclVV+olKualpss.pPpKPLVLSFHGaoGTGKNaVucIIAcNhYRsGL+SsaV+hFlAThcFP+sppl-pYKhcLpsplppohptCpRSlFIF .........................................C.h.th.Ct......pp.....s..hp...........uLcp-Lpp+LaGQHLApc........l..l........lpAl.p.u.a.......l...s.s.......p.PcK..P.L.s.L.S.hHGaoGTGK....N....aVupllA..csl...a.p.p.G.......h.c.SsaV+...hFlu.s.......hH....FP.Ht..p..plp..h..Y.......+............cpLpphlpsslstCsp.SlFlF.................................................. 0 48 64 114 +6140 PF06313 ACP53EA Drosophila ACP53EA protein Moxon SJ anon Pfam-B_12718 (release 9.0) Family This family consists of several Drosophila ACP53EA accessory gland (seminal) proteins. 25.00 25.00 34.90 30.30 19.40 22.50 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.30 0.72 -4.05 6 87 2009-09-10 23:46:03 2003-06-05 14:46:14 6 2 13 0 18 93 0 90.20 30 76.51 CHANGED asKhhcCscVus-ulupLlcpsIPsVppLtpCsDY...pPtpsKspShltal+lsYpFhKKhlhccPcCLlhhlspussLl+Pahcpl-oLpClsE ....hp+hLcCscluhcususLhpcsIPsl+pLtpCssF...pP..pst..sh.shhthlclsYpFl++.lh.sp.cCLlsslpchhshlpPalppl.shpCh..... 0 4 4 11 +6141 PF06314 ADC Acetoacetate decarboxylase (ADC) Moxon SJ anon Pfam-B_12720 (release 9.0) Family This family consists of several acetoacetate decarboxylase (ADC) proteins (EC:4.1.1.4). 29.80 29.80 29.80 29.80 29.60 29.60 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.49 0.70 -5.02 72 670 2012-10-01 19:21:02 2003-06-05 14:46:38 6 12 525 20 280 644 110 225.10 20 74.15 CHANGED shPl.hss....shs............st-hlhlpYcTDscslppllPt.sLclsp.......slVth.hhchssso.uhh..........sYpEsu.sl.Vp.ap................Gp....p.GsYshhhalD...scsslsuGRElhGaPKKlup.plpp.............s.ssslsGslch.suhplspsshshctps.s..tshtthpt...........s.hhh+hlPs.hsu.tsp...lscLVphthsshsht......tsasGs.usLpL.tsp.shssls..........pLsl.hclluuhahhsshs......lshsphlhc............L .....................................................................h...............................stphhh..hh..apss.ct...l..p...pl.l......P.........t....s.....Lchsp...............slVt...h..hh..c..hs.css..u..................sYsEhuh...s.l.....Vp..ap.........................sc...............t.usa..shthhlc...sp.slsuGREl.aGaPKphAs.hs.lpp.........................................pssphtusl...sh...su.....htlsphshshtt...s......h..htt.....................hhhchlsp....ss....................hsphl.h...hpsh.th.................thh.Gs..uplpl.......t..............t.h.t.ls..........pLsl..hch.l...tu.hh..................................................................................................................................... 0 83 180 237 +6142 PF06315 AceK Isocitrate dehydrogenase kinase/phosphatase (AceK) Moxon SJ anon Pfam-B_13100 (release 9.0) Family This family consists of several bacterial isocitrate dehydrogenase kinase/phosphatase (AceK) proteins (EC:2.7.1.116) [1]. 20.80 20.80 20.80 27.90 20.60 20.70 hmmbuild -o /dev/null HMM SEED 562 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.86 0.70 -6.23 39 830 2009-01-15 18:05:59 2003-06-05 14:47:17 6 3 792 6 133 519 192 541.90 62 96.03 CHANGED lAcsILpGF-paathFpplTttAcpRFEpuDWpulQpAs+cRIphYDc+VppssppLcp....hsspths..............tshWtplKptYhsLlhs+.p.ElAETFFNSVhs+lac+phhps-hlFVpsshs....csc..sh.pshh+sYhss.....ssLppslpcllp-ashphPa-DLpRDlphlhptlppphss.......hthssshplclLpslFYRNKuAYlVG+lhs..ssp.....hPFll......PlLpscsG...tLhlDslLhspsclullFSFuRuYFMV-sphPuuhVpFL+sLhPpKspuELYoulGhtKpGKT.FYR-hlpHL.cpS..sDpFllAPGIKGMVMhVFTLPSasaVFKlIKD+FussKpho+....tpVpsKYpLVKpHDRVGRMADT.EasslsFP+pRFSs-LLpELpphsPSplc...hpGD....pllI+HLYlERRMsPLNlYLcpAs-p.........p....lcpAlpE....YGpAIKpLAAANIFPGDMLhKNFGVTRHGRVVFYDYDEIpYlT-sNF.RplPtPcp.E-E...........huuEsWYSVuPpDV..F.PEc.FshFlhussplRchFhchHu-Lh-ssaWpshQppI+pGpltDVaPYcpptR ......................IAQTILQGFDAQYGRFLEVTuGAQQRFEQADWHAVQQAMKsRIHLYDHHVGLVVEQLRsI...Ts.u.p.s.sD............................ssFLhRVKcHYTcLLPDYPRFEIAESFFNSVYCRLFcHRsLoP-+LFIFSSQP.......ERRaRol.PRPLu+cFaPc.....cGhcshLhRlloDLPLRL.WpNpuRDIcYIlppLpEsLGsc........pLscs+lQVANELFYRNKAAWLVGKLls....ssu..s....LPFLLPI......Hps...-........cG...................cLFlDTCLTosAEASIVFGFARSYFMVYAPLPAALVEWLREILPGKoTAELYhAIGCQKHuKTEsYREYLsaL.p.ts...............sEpF..ItAPGIRGMVMLVFTLPGFDRVFKlIKD+FAPQKEhot....A+V+ACYQLVKEHDRVGRMADTQEFENFVL-K++IuPALht.LhpEuspKIsDLG-......pIVIRHLYIERRMlPLNIaLEQs-sQ.................Q....LRDAIEE....YGNAI+QLAAANIFPGDMLFKNFGVTRHGRVVFYDYDEICYMTEVNF.RDIPPPRYPEDE............LASEPWYSVuPsDV..F.PEE.FRH.aLCuDPRltsLFEEMHADLFcA-YWRuLQsRI+-GHVEDVYAYRR+QR............................................ 0 19 60 96 +6143 PF06316 Ail_Lom Enterobacterial Ail/Lom protein Moxon SJ anon Pfam-B_12325 (release 9.0) Family This family consists of several bacterial and phage Ail/Lom-like proteins. The Yersinia enterocolitica Ail protein is a known virulence factor. Proteins in this family are predicted to consist of eight transmembrane beta-sheets and four cell surface-exposed loops. It is thought that Ail directly promotes invasion and loop 2 contains an active site, perhaps a receptor-binding domain [1]. The phage protein Lom is expressed during lysogeny, and encode host-cell envelope proteins. Lom is found in the bacterial outer membrane, and is homologous to virulence proteins of two other enterobacterial genera. It has been suggested that lysogeny may generally have a role in bacterial survival in animal hosts, and perhaps in pathogenesis [2]. 20.80 20.80 21.70 20.80 20.00 20.70 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.13 0.71 -4.80 2 1349 2012-10-03 17:14:37 2003-06-05 14:47:45 6 3 430 0 22 1298 2 177.20 65 97.07 CHANGED MRKlCAsILSAslhLssuGsPA.AuEHQSTLSAGYLpspTDhPGSDDLNGINVKYRYEFTDTLGLITSFSYANAEsEQKTHYsDTRWHED.VRNRWFSVMAGPSVRVNEWFSAYAhAGVuYuRVSoFuGDYhplTssctKpp-hLTtSDsuRhSpTuLsauAGVQhNPTEslsVDlAYEuSGpGDWRT-uFIVGsGY+F .....................................MRKlhAsILSA.s.lh.L.ss.u.G...s..P.A..h..A.u..E..c..Q..uTLSAGYL..psp........T...s...s.......P.......G.......S......D......s.........L....p....GINVKYRYE.F.T.D....s..LGhlTSFSY.A....ssc........s...c..Q..h.T....+.Y............S.....D........T..R.......W.......H.E....D.SVR.N...R......WFSVMAGPSVRVNEWFSAYAM..AGVAY..SRVSTFSGDYLRVTDNKGKTHDVLTGSDDu..R..HSNT..SLAWGA.GVQFNPT.ESVsIDlAYEGS.......G...S....G....D.....W.R.T...D....GFIVGVGYKF.................................................. 1 2 6 10 +6144 PF06317 Arena_RNA_pol Arenavirus RNA polymerase Moxon SJ anon Pfam-B_12490 (release 9.0) Family This family consists of several Arenavirus RNA polymerase proteins (EC:2.7.7.48) [1]. 32.40 32.40 32.60 39.20 31.70 32.30 hmmbuild -o /dev/null HMM SEED 2207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -14.22 0.69 -15.07 0.69 -7.99 14 233 2009-01-15 18:05:59 2003-06-05 14:48:14 6 1 46 2 0 207 0 1086.60 42 99.88 CHANGED M-EplsElKDLlpKals-c-cLucQKhsFLuQscs+hlLhEGLKLLShhlElDSCctsuCpHNocpKoV-sILp-pGllCPoLPhVlPDGaKLsGssLILLEsFVRssPpsFEpKacpDhsKLhsLKcDLppsGITLlPllDGRosY.schhPDWssERhRaLLhcLLchuppssch.hEEpEYpRLscSLSsspsps.GlEslpsLK-sphsa.-+Lhc.hatGlNsclstscl+tclhclaphFRpcl.cpGhhcspFh+o-+ppLlppLsp..hh....hstttpsslsph.hshs+hhsllt.Lhhp.lcptpptpp.ssp.ph......p.ahplLShhNKlKuLKVhNTRRpsLLhLDlIhLshhhclhctpstthpp.....hhlGpthhuVNDRLlsl.tsh+.lcKhlppp...................spahphsp+hLp+uppsL.hsslohhhhtsD..h-hhhplu.phh.c....th+PshpYppspppthssstpphc..........t-php.L+tLSSlSLuLVNSMKTSuos+lh.NEtshtRY.tsVRC+EsYhQcF.hhcuhthhLlYQKTGEpS+CYSl.hsc......hsEhsSFYuDPKRFFLPIFSp-VLhphs-pMlSWL-hss-....lpslcsLL+hLlLsILssPSKRsQpaLQGLRYFlMAaVs-hHHh-LhsKL+.-hlTsoEhllh+LhssLhphlLspuscs...hop+FKFlLNlSYLCHLITKETPDRLTDQIKCFEKFlEPKlcFsslhVNPspsu..TtEpE-thlcuhc+FhSKchpsps-...hKpPGVS+-llShhlShFNsGpLhhpu-LKp...DPhpsShouTALDLuSNKSVVlsKhs-.GphlhpYDapKLVSssVsplsEpFppKGKYpLs.c-h-atI.+sLosLV.spppspssp-chs...........-pLsE.....-tschFcpl+psVphsls+hcpspphp...st...tpppshss...Lcplhssc....thh+hI+uElShH.VcDFDsslLst-hYcElCsuVY.sssh+spYFhpcshc.CPLshLhKNLoT+sYp-pEaFpCFKhlLlQhshsphhG+YcHhp+ppLsFp.-sh+Lc--VRISpRESNSEAlu+tLshohhTsAuL+NLCFYS-ESPTpaTSlussoG+LKFuLSYKEQVGuNRELYlGDLpTKhhTRLlEDYhEAlspphchoCLNsEKEFE+AIhsMKhsVs.GpluhShDHSKWGPhMSPhLFLshLpsLchc..cstsclc.p-sV.slLpWHlHKlVEVPFNVlpAhhpualKRpLGLMspsupoloE-FFaphFp.pGhVPSHISSVLDMGQGILHNoSDhYGLIoEpFINYsLchlasshssuYTSSDDQIoLhcpshhch.-...p-s-EaLplLEFHpaLSspLNKFVSPKSVhGoFsAEFKSRFFVWG-EVPLLTKFVAAALHNlKCKpPpQLAETIDTIhDQsVANGVslclVstIpcRT.pLlcYSsYP.sPFLhhpcpDVKDWVDGoRGYRlQRpIEslhs-s..sphIRsssRKlas+lKpGclaEEahlshlupsss-uLpshhphhus-p.phc-.LpapWLNLsuatsLRhVLRpKlh.oushsL-cEclPTLlKTlQSKLSpNFTRGApKlLuEAINKSAFQSSlASGFIGhCKohGSKCVRssptt.lYIKclhsplpsppt..s.hss.thslhhCppsLcp.........lspa.RPlLWDYFslsLoNAhELGsWVhupsph.....sphshhhsNPs.aaslKPpss+.LED+lGhNHllaSlRR.YPplFE-HLhPFMSDLsss+hpWssRlKFLDLCVALDhssEsLullSHllKhKR-EhYIVhps-LuhuHsRpspslspEhslSspclCpNFhhQlhF-ShlpPhVhTTpsLtSFhWFschhpLP-.-uttcLG.hosFl.clhppslpRsMahcDLthGYsh.shchsphhls.ssplhs.sLsspcphpslp-hhthhus.cshslclslolcap+pSpchchcRphshphphps.hphssl............hplcslshhhSGulssHhlLDshpLlhspPhFpGcps..hslsshhhcs.lsts.sptshhpplclshssahh-L.scasacllGP-sp.sPLVlccGtlhcssc+lushpsplpspslhhhlssL..-cp-tltshLssLa.ahtsss.p..t..lchsppshthhh-tacphLscsLcshs-Wh-FtsaulCaSKShsslMIpsspGshRLKGhhCR.LhtsspshE .............l..h+.hh.p.h.p..th..Q+..hLsQ.p...lhhEGhKLLS.hlEl-Ss.tpsC.hN..ph.lp.lL.c.tlhsPsLs.lhPDGaph.spsllLLEsFVRss..pFEpKa.tDh.KL.plptDl...sl.LlPllDGRssa.sp.hs-Ws..+hR.hlhplhp..p...t..hE-.EY.RLh.SLt.h.sp..Gh-sh..l.c.t..a..pl.t.hh.sl.sphp...hh.tl.p.h..ap..l..tt.....h...p...hlt.h.............................h..p..p...hh..l......................................h..hhS.hNKlKu.+lhNTRR..hh.hDhlhh...h..................hh.t.h..sNDRhlsh..h.p.h.phh.tp......................h...h..hh.p.................l.......p...t.......................P.h.Yp..t...................................hphLSplsLuhhNSMKTu.ss+hh.Np.t..pa.t.V.hpEsa.Q.h....t....LhYQKTGEpo+CYul...................SFYsDPKRaF.slhSt.Vl.thh..MhsWlt...........ht..hhhlhl.hls.PoKR.Qh.lQshRYhlMAhssph...pL.pKl....ho.s-..h..h...lh..lht..t.p....hsthFKahLNlSYhCHhlTKETPD.......................................................................................................................................................................................................................................................................................................................................................................................................................................EuluctLsho.hhsuhL+NLCFYSp-SPppasuhusssGpLKFuLSYKEQVGuNRELYlGDLpTKhhTRLlEDa.Euhs..hphoCLNs-pEFppAlhsMp.sVp.u.hshSh....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +6146 PF06319 DUF1052 Protein of unknown function (DUF1052) Moxon SJ anon Pfam-B_12539 (release 9.0) Family This family consists of several bacterial proteins of unknown function. 22.00 22.00 22.70 22.30 21.40 20.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.96 7 250 2012-10-11 20:44:44 2003-06-05 14:49:30 7 1 247 1 81 215 98 150.30 46 88.32 CHANGED MsIlp.hs.sPLhDGRQSEpAhhVRRGsQRLLhphtausLPELsLASGRRADLluLotKGEIWIlEIKSSIEDFRVDRKWP-YRhHCDRLFFATHssVPt-IFPEECGLlLSDGYGAchlREAPEH+lAsATRKulhLpFuRsAAtRLhhAEhsshcs ..............................................+.stpAh....hltRGVpRLhh.s.hG.huslsElsLs..sG..R.RADllAlutK....GEI....WIVEIKSSlpDa.+uD...+..KWs-YRsaCDRhFF.As...cs...s.l...P...p......-......l...h......P-.-..sGLI..........lu..........D.....u....Y.....G.......A.........-..IlR-APEH+.LAuApRKulhL+FARsAAtRLhthp.....ss............................................. 1 22 51 61 +6147 PF06320 GCN5L1 GCN5-like protein 1 (GCN5L1) Moxon SJ anon Pfam-B_12526 (release 9.0) Family This family consists of several eukaryotic GCN5-like protein 1 (GCN5L1) sequences. The function of this family is unknown [1,2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.65 0.71 -4.28 10 239 2009-09-11 00:23:16 2003-06-05 14:49:52 8 9 196 0 159 223 4 113.00 30 62.23 CHANGED hlKEHpp+Qupp+phQE+h++EAIsuAssLTpuLVDslNsGVAQAYlNQK+L-sEAKpLpssuusFuKQTsQWLpllEsFNsuLKEIGDVENWuRsIEsDM+sIsssLEhsYcspp.sps....S ......................................................pt.p...-ct+p-Al.tus.s....s.lspsLs....c.p.L....sstVsp...ha...hNp++L-pEs+p.Lpt.pssphsKQ.osp.Wlp.hs-shspuLK......El.GDlpNWAc.lEhDhpslsp...sLchs.ctt......s......................... 0 44 78 122 +6148 PF06321 P_gingi_FimA Major fimbrial subunit protein (FimA) Moxon SJ anon Pfam-B_13339 (release 9.0) Family This family consists of several Porphyromonas gingivalis major fimbrial subunit protein (FimA) sequences. Fimbriae of Porphyromonas gingivalis, a periodontopathogen, play an important role in its adhesion to and invasion of host cells. The fimA genes encoding fimbrillin (FimA), a subunit protein of fimbriae, have been classified into five types, types I to V, based on nucleotide sequences. It has been found that type II FimA can bind to epithelial cells most efficiently through specific host receptors [1]. Human dental plaque is a multispecies microbial biofilm that is associated with two common oral diseases, dental caries and periodontal disease. There is an inter-species contact-dependent communication system between P. gingivalis and S. cristatus that involces the Arc-A enzyme [2]. 30.50 30.20 30.50 30.20 30.40 30.10 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -12.17 0.70 -5.20 32 270 2012-10-02 11:46:57 2003-06-05 14:50:11 6 5 62 6 21 280 1 299.40 14 54.52 CHANGED hsuCop--sst.ssss..............Aplolsl.......tpstssssssssp....sEstIpsLslhlFs..tssth.h........scspsshpss.............h............tst....sG.stplhllAN...................................t.h.hsspshs...cht..shhtplsts.sts....tssLsMouthtss..shhsup..................ph..h.............................sssts..............p..t.ssslpLpRssA+lslsthp..........hphssp.pphtshhh..............hpplalhps...........+pp.s.h......................................................phaGs.shsp...........hssst.......................ahhu........phsshs...sthsss.s.........hsstshh......sssh .............................hhuCsp-p.ps.s..sssst..........t.....splolsl..........tsstssp.sssssp.................sEs..p....IsslplhVFs.....tss..phh..h..................s.psspshtss...........hht...............pss.....sG.ppplhllAN..........................ts.h.ssspsls......chp.....shhsphssp...stp.............t.s...slhMouthtss.....shhssp...........................................................................................p..ttsss..lplpRssA+l..sl....hp...........................h.hs...t..ts..hhshh................................ppl.ahhth................ppp.t.h..............................................................................t.hss.th............................t..............................a..t........................................................................................................................................................................................................................................................................... 0 2 16 21 +6149 PF06322 Phage_NinH Phage NinH protein Moxon SJ anon Pfam-B_12973 (release 9.0) Family This family consists of several phage NinH proteins. The function of this family is unknown. 26.00 26.00 26.50 27.40 24.50 25.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.03 0.72 -4.35 4 231 2009-01-15 18:05:59 2003-06-05 14:50:30 6 2 200 0 2 53 0 62.60 66 96.08 CHANGED MTaoVKTIPDMLlEsYGNQTEVARhLsCsRsTVRKYstDKEGKhHAIVNGVLMVHRGWsctpDt ..MTaTlKTIPDMLlEsYGNQTEVARhLsCsRsTVRKYhtDKEu+tHAIVNGVLMV.HpGhpt.hD.h................. 0 0 0 1 +6150 PF06323 Phage_antiter_Q Phage antitermination protein Q Moxon SJ anon Pfam-B_12730 (release 9.0) Family This family consists of several phage antitermination protein Q and related bacterial sequences. Phage 82 gene Q encodes a phage-specific positive regulator of late gene expression, thought, by analogy to the corresponding gene of phage lambda, to be a transcription antiterminator [1]. 25.00 25.00 29.00 28.90 19.40 18.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.52 0.70 -4.97 9 404 2009-01-15 18:05:59 2003-06-05 14:50:50 6 1 228 0 15 190 0 214.30 70 99.44 CHANGED Mssp..LpalRpQLhsATADLSGuTKGQL.AahEpAph.DTspYpRK...+lRltDcsTGchITlsNPPlsGKQShAKGuuIsLVp.VEaSTuSWRRAVhSL-EcppAWLLWsYS-shsacaQVsIspWuWpcF...psphss++luuKThERL+pLlWLAAQsVK....sELtGc-h..YphpE.LAsLVGVopcNWScsast+WhsM+chFhpLDppALLsss+sRSpQKss..ppslAcls ..........................MNsQY.LQaVREQLhlATADLSGATKGQLEAWQEpA.F.DTGpY+RK.....K.RhhD...EVTG.....K....MIThDNPPIsGKQShAKGoSIsLVSPVEFSTSSWRRAVLSL-EHpKAWLLWCYStSlpWEaQlsITQWAWsEF...psp.usRKIAGKT.-RLKpLIWLAAQsVK....uE.LhGtEs..YEYQ-.LA.LlGVToKNWScTFTc+WVAMKHIFhpLDS-ALLhVhRTRSKQKAsF.pQslAKl............. 0 0 3 10 +6151 PF06324 Pigment_DH Pigment-dispersing hormone (PDH) Moxon SJ anon Pfam-B_12230 (release 9.0) Family This family consists of several eukaryotic pigment-dispersing hormone (PDH) proteins. The pigment-dispersing hormone (PDH) is produced in the eyestalks of Crustacea where it induces light-adapting movements of pigment in the compound eye and regulates the pigment dispersion in the chromatophores [1]. 20.30 20.30 21.30 22.30 20.10 20.00 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.90 0.72 -6.13 0.72 -4.59 7 54 2009-01-15 18:05:59 2003-06-05 14:51:14 6 1 44 0 18 66 0 17.80 76 21.14 CHANGED NSELINSLLGlPKlMN-A NSELINSLLGLPKsMN-A.. 0 3 5 15 +6152 PF06325 PrmA Ribosomal protein L11 methyltransferase (PrmA) Moxon SJ anon Pfam-B_12272 (release 9.0) Family This family consists of several Ribosomal protein L11 methyltransferase (EC:2.1.1.-) sequences. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.03 0.70 -5.29 31 4673 2012-10-10 17:06:42 2003-06-05 14:51:37 8 31 3837 31 1275 19899 6047 251.40 29 79.81 CHANGED sWhElplpssp-stEs....lsshL....h-.h....G.uhuVsl.......p.Dup..........t.-psl..a..E.......sh....spt..l..pt............stlh.Ah.......astcs..-.htt.......lls.....t.....lp.t.....ht......hhh.....pts.......phpl.....ppls-pDWspsWpcha+Ph+huc+lhIsPoW.c-h...Pp....sstlslcLDPGhAFGTGTHPTTphCLphLEph.............lpsG..c....sV..l.DlGsGSGILuIAAh+LGApclhuhDlDshAVcuup-NhchN......plss..pltl...h...L.........t-...hs........p.........tph...DVlVANILAsslhpLssplhthlKssGhllhSGIlpcptctV.t.c.sh.p.p.uFpl.shhpcp-Wsslsup+p ...........................................................................................................................................................................................h......h......t.......s.....s.h...h........p.............................................................................t........................................l...u.h.......h.......t...t...............t................hh.t.........l.t..t.............................t..th.....................................t.h.p.h.......ptlt-.pDW.ptW.h.p...a.c......Ph..ph........s.....c..p...h.....h.I.h.PoW..pph.....t.............sth..l.lDP.Gh...A.F.G........TGp..Hs.T.T.t....hs..l..p...h....L.....-..p.h............................................h..s...u.....c.........sV...l...D..l..G..s..G.S..G..I......L..u....I...A........A.......t.........+.......L..............G.....A.........p........c..........l.....h.....u......h....D.....l......D....s............h.......A.......l.....p....s......u.........c.....-......N.....s.....c....h.....N............................s..l.ss.......pl...p.l.........h...........h...........................t...-.....h.................................p..............tph.D......l....l......l......A......N........I.......L........A........s.................l......h.............h.........h......................h..........t....h...h.......t.........s....G.h...h.h..h.......S...G......l...l...t...p...p........h...p.......l..h..p..th....t.t...th....t.l.........h....h..t.p...tt.Wstlhh.............................................................................................................. 0 424 747 1006 +6153 PF06326 Vesiculo_matrix Vesiculovirus matrix protein Moxon SJ anon Pfam-B_13088 (release 9.0) Family This family consists of several Vesiculovirus matrix proteins. The matrix (M) protein of vesicular stomatitis virus (VSV) expressed in the absence of other viral components causes many of the cytopathic effects of VSV, including an inhibition of host gene expression and the induction of cell rounding. It has been shown that M protein also induces apoptosis in the absence of other viral components. It is thought that the activation of apoptotic pathways causes the inhibition of host gene expression and cell rounding by M protein [1]. 20.80 20.80 21.40 39.40 20.30 18.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.41 0.70 -4.90 11 133 2009-01-15 18:05:59 2003-06-05 14:51:58 7 1 41 2 0 94 0 202.50 45 96.55 CHANGED M.ppLpKhht.tt.t..s+pppsp......h...tPPuYp-.............PoAPl....hGh..-th-ph-..thph.phhhcsplplpoppPhcohp-shpslstW.cpYsG.hsp+PFa+hlhlhsuppL+tss...sssspscYsuphpGpshlhHplu.hPPhhph.csaspsaphtttptslslshplphssopcpssslhchh.....shpcpschpphhphaGL.hpppssspalh .................................................................................ap....htEPTAPo............LGIIQsKCKRAD........WLIKSHLTITTNYEIKEWuTWDRAISDILDLYDGNPVFKPILLFVYYVLAYNARKIP....GPoNGVRYGAYFDELTTV..WHAI...PELMNQEsDYSYNHRVlHRKIQYVISFKIQMSSTKRRTSPIESFIE..VTsEGLKHTPQFTTILDRARFVYSLTGGRYVIH.................. 1 0 0 0 +6154 PF06327 DUF1053 Domain of Unknown Function (DUF1053) Yeats C anon ADDA_1330 Family This domain is found in Adenylate cyclases. 21.10 21.10 21.10 21.10 20.90 20.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.41 0.72 -3.67 12 508 2009-01-15 18:05:59 2003-06-05 15:02:07 9 11 76 0 281 378 0 96.70 28 8.88 CHANGED schhpsWss-+PFtplpptsp.stt.t.............................................ulshtpsph........ppstppsts.--El...D-hltpsI-thSspch...+p-clppahLhFp-suhE+cYspht-shF ................................................................pWs.st.pPF.ph.tt.pp.hs..pt.p.h..................................................................sls.tppp.............cstppp.tps--.El...s-hlhpuI....DupSsc+h...........+u-clpp..hsLhFpc.psLE+c...Yppts.shF.................................................. 0 40 60 162 +6155 PF06328 Lep_receptor_Ig Ig-like C2-type domain Yeats C anon ADDA_13736 Domain This domain is a ligand-binding immunoglobulin-like domain [1]. The two cysteine residues form a disulphide bridge. 20.30 20.30 20.30 21.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.97 0.72 -4.11 12 304 2012-10-03 02:52:13 2003-06-05 15:36:22 6 14 53 4 105 260 0 86.50 28 10.89 CHANGED hsc.sshhhsss.llplGSshohsCllK..ppChp..psssphIlW+hp...h.Pppphphlscps..SplThsshtshps.......sLhsChlspuspcph ...........h....h..hhPsp.ll.plGSslohpCllK..ppst......ssup...pIl.Wphsht.tlPpppYphlscps..SpVThss.hsttps.......slhs..Ch.t.tp..p...................... 0 6 14 37 +6157 PF06330 TRI5 Trichodiene synthase (TRI5) Moxon SJ anon Pfam-B_13220 (release 9.0) Family This family consists of several fungal trichodiene synthase proteins (EC:4.2.3.6). TRI5 encodes the enzyme trichodiene synthase, which has been shown to catalyse the first step in the trichothecene pathways of Fusarium and Trichothecium species [1,2]. 23.80 23.80 24.40 24.60 22.30 23.70 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.26 0.70 -5.81 3 159 2009-09-10 20:52:56 2003-06-05 17:43:25 6 2 67 40 56 165 0 252.40 45 91.90 CHANGED M-sFPTEYFLsTuVRLLEsI+YRDSNYTREERIENL+YAaNKAApHFAQPRQQpILK.VsPKRLpASLRTIVGMVVYSWuKVSKEsMADLSIHYTYTLVLDDS.SDDPHssMLoFFDDLQAGREQKHPWWsLVNEHFPNVLRHFGPFCSLNLIRSThDFFEGCWIEQYNFpGFPGSDDYPuFLRRMNGLGHCVGGSLWPKELFDEQKpFLEIToAVAQMENWMVWVNDLMSFYKEFDD.RDQsSLVKNaVTCDEITLDQALEKLTsDTLHSScQMl-VFuDKDPclM-TIECFMHGYlTWHLCDaRYRL+EIYE+sccQETcDAtKFRKFF-QAAcVGAV-sEEWAaPsVAaclEVRAspcu-VK-uQ.pAlLoSlEss ....................................................................................................................................................................as+lo+.EhhsslSIahTYsllLDDS...p-DP..h..ssMhsYa.sDLpuGc.pQ.t.H.P.WW.tLVNEHFP.NVLRHFGPFCSLNLIRSTlDFFpGC........W....I..E.........Q...a....N...F..tG..a...P...G.......Sp...D..YPpFLRRhsGLGcshusslWPKc.FsEp...ph....FlE......h.s......sAls.phpsahsalNDlhSFYKE...s.tp-phshl.pshshspthoh.puLc.cl.sp-slpu...s.c.phhtl..h...s.ptss.phhtshptahpGahhhHh..t.RY+h.-h.......................................................................................................... 0 17 28 48 +6158 PF06331 Tbf5 REX1; Transcription factor TFIIH complex subunit Tfb5 Studholme DJ, Wood V anon SWISS-PROT Family This family is a component of the general transcription and DNA repair factor IIH. TFB5 has been shown to be required for efficient recruitment of TFIIH to a promoter [3]. 20.30 20.30 20.80 30.30 20.00 19.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.13 0.72 -4.20 30 254 2009-01-15 18:05:59 2003-06-05 18:43:11 7 1 225 6 186 224 3 67.20 35 85.84 CHANGED MspAh+G.lLlcCDPulKplllplDs.pphs....aIlE-LD.-THLllp...sshlphlKpclschhspspass.p ..............MspshK..G.VLlpC.DPuhKphlLplDpppshs..........FIlc..DL..D.-sHlhlp...sphlphLcp+lschh-pssa....t.............. 0 57 94 151 +6160 PF06333 Med13_C TRAP240; TRAP_240kDa; Med13; Mediator complex subunit 13 C-terminal Studholme DJ, Wood V anon [1] Family Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function. Med13 is part of the ancillary kinase module, together with Med12, CDK8 and CycC, which in yeast is implicated in transcriptional repression, though most of this activity is likely attributable to the CDK8 kinase. The large Med12 and Med13 proteins are required for specific developmental processes in Drosophila, zebrafish, and Caenorhabditis elegans but their biochemical functions are not understood [4]. 19.10 19.10 19.80 19.10 18.60 18.60 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.60 0.70 -5.94 23 356 2009-01-15 18:05:59 2003-06-06 10:13:06 7 8 224 0 227 344 0 402.80 26 25.44 CHANGED Vlallsshspss..........uhlshsttFhsL.....pthpspsph.ts-lhLpIlPhchlhss....pphhl.s.schtsLuhslYspC....Ps.tt.tph......sss.hshlscshPcpIsF+lhss..........................pssshpcsshLHlAYs+ShDpcWlsAAWSDspGphppTpoasls.pspshpp........shpplps-IWphohsL...........hspstu+hplllsRlss.lsc-ELtpWppLssp.t.......................spplsLsllsVssssclh...........................................sss.s...s......htspspooPssssts...........shhossphtsusss.tss...................................s.................tss.t.ts-thlh....Dhsp-saullhspsLspssslpch..AltSGYLl+ps...................sss..cshhphcVNllpss..........psshcp..............lL+plLppYRsLusLspshGlpctpt..........................uhlPWHlsslt+ ...............................................................................llahl.s.s..s.................t..p..shhhhtLhpta.phh.p.t.h.....t..tsph.lQllPhp.hlhp.....pp.hh...pphtplAhpsaspC....................s.t.t...pshh............hsh.hhh....hs...ph.Pp.l.......plhss........................................sps.p.c...ssLaluYs....h..S.hDp.RWlhAuhTD.hGchhcTthhs.lshs.sp............ttpp.....................shp...shpclWc.hshsl............hphs..sh.WRlVIsRl.........G.....t.lscsElc..sWshLlspts...........................................................................................ss.loh...slls.h-spsshhh...........................................................................................p.ss.s...hst...............ssthsTPtss.s.ps..............hshsoss.h.tsssss..s.............................................................................................................................................s.ts.......hs..s..s....t....h..........t..tp....ps...t..hh...p...ct...pp.............t..lhp.....sLu.GYhlpps...........................................................................................ssh..cs.h..thp..ls....hpss.................t.h.p..................................lL+.lLptYpsLuhLshs...s.ht.ctp............................ss.lPhHh.sh.................................................................................................................................................................................................................................................... 2 55 103 176 +6161 PF06334 Orthopox_A47 Orthopoxvirus A47 protein Moxon SJ anon Pfam-B_13263 (release 9.0) Family This family consists of several Orthopoxvirus A47 proteins. The function of this family is unknown. 25.00 25.00 81.80 81.70 19.90 24.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.80 0.70 -5.27 2 47 2009-01-15 18:05:59 2003-06-06 10:35:14 6 1 18 0 0 40 1 220.20 90 98.68 CHANGED MGNKNIKPSKENRLSIL.KD+MDSFKRGS.....FREKS+ATIQRFSSLRREHlKVDHPDKFLELKRtIYtIIQKSSSIDVDKRTKLMSNIKTMMINPFMIEGLMTSLEsLDPDNKMSYSSVMILGEFDIINISDNcAAFEFINSLLKSLLL.........EYSISNDLLYsHINALEYIIKNTFNVPERQLILRsQYLTPIFSDLLKYAGLTIKSNILMWNKpFIKPVSDLYTShpLLHCVT. ..........................................MGNKNIKPSKENRLSIL.KD+MDSFKRGSWATSSFREKS+ATIQRFSSLRR...EHlKVDHPDKFLELKREIYtIIQKSSSIDVDKRTKLMSNIKTMMINPFMIEGLMTSLEsLDPDNKMSYSSVMILG.EFDIINISDNEAAFEFINSLLKSLLLLNTpQLKLLEYSISNDLLYsHINALEYIIKNTFNVPERQLILRGQYLTPIFSDLLKYAGLTI.KSNILMWNKpFIKPVSDLYTShRLLHCVTV.... 0 0 0 0 +6162 PF06335 DUF1054 Protein of unknown function (DUF1054) Moxon SJ anon Pfam-B_13269 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.10 21.10 21.40 22.20 20.80 20.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.27 0.71 -4.64 22 540 2009-01-15 18:05:59 2003-06-06 10:37:23 7 2 532 2 56 275 3 191.60 48 95.26 CHANGED FopcDFcsFplcGL-sRMpslpppIcPKhptlGpphushLos.sG.cEhasHlAKHARRoVNPPsDTWlAFussKRGYKhhPHFQlGLW-s+lFlWhAlIaEs...pKtphupthhcphshlpp.lPscahlShDHscscsh.hsphs...LpphhcRhpsVKKuEhLlG+pls+c-slh.hss-chhphlcpsacpLLPlY .....FpsKDFcsFsV-GLDtRMpAlpp.I+PpLcpLGEhFusahospT.G.-sFasHVAKHARRoVNPPcDTWVAFussKRGYKMLPHFQIGlaccplFlhFulhaEsc..sKsphA+s.F.cch.pt.lpp....LPcDahl...SlDHhKPsthhl+.-hspcpLpcslpRhtsVKKuEFhlu+tIsPp-tph.posctFlAhlEpTacphLPhY........................................................................... 0 21 35 48 +6163 PF06336 Corona_5a Coronavirus 5a protein Moxon SJ anon Pfam-B_13277 (release 9.0) Family This family consists of several Coronavirus 5a proteins. The function of this family is unknown [1]. 25.00 25.00 28.10 28.10 21.60 20.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.14 0.72 -4.10 3 85 2009-01-15 18:05:59 2003-06-06 10:42:25 6 1 12 0 0 84 0 63.90 84 99.93 CHANGED MKWLTSFGRAVISCYKuLLLTQLRVLDRLILDHGP+RoLTCARRVLLVQLDLVYRLAYTPTQSLV .MKWLTSFGRAhISCYKuLLLTQLRVLDRLIL-HGP+RsLTCuRRVLLhQLDLVYRLAYTPTQSLV. 0 0 0 0 +6164 PF06337 DUSP DUF1055; DUSP domain Yeats C, Bateman A anon ADDA_8548 Domain The DUSP (domain present in ubiquitin-specific protease) domain is found at the N-terminus of Ubiquitin-specific proteases. The structure of this domain has been solved [1]. Its tripod-like structure consists of a 3-fold alpha-helical bundle supporting a triple-stranded anti-parallel beta-sheet [1]. 29.10 29.10 29.20 29.10 28.60 29.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.54 0.72 -3.60 139 806 2009-09-10 16:33:45 2003-06-06 10:51:54 7 46 214 12 544 743 3 98.80 29 10.54 CHANGED spthYllst.p.........Whppappalpts...ptt.....................................................................................................................s...........sI...s.....Nsslhpspt............................plc....pltpstD.ahll.........sp.psWphlhph.Y.u...uustlhppsht ..............................................................................................t.hYllst...p.........Wacp.Wcpa.V...thp..s.s.................................................................................................................................................................PG...........PI...D.....NosLhpsp.s.s...............................pL+.t....pLhpstD.Yhll............PpcsWphLhsh.Y.G.....Gs.slsR.h..p.................................... 0 219 288 414 +6165 PF06338 ComK ComK protein Moxon SJ anon Pfam-B_13324 (release 9.0) Family This family consists of several bacterial ComK proteins. The ComK protein of Bacillus subtilis positively regulates the transcription of several late competence genes as well as comK itself. It has been found that ClpX plays an important role in the regulation of ComK at the post-transcriptional level [1]. 19.10 19.10 19.10 19.30 18.70 18.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.78 0.71 -4.82 19 719 2009-09-11 05:13:32 2003-06-06 10:58:32 6 2 443 0 58 336 0 150.30 29 87.47 CHANGED M......ppph..c...........YhIsptTMslhPhhpstpph..ocllEh-ssahs..h+PhcIIc+SC.+aaGSSYtGRKcuTpclhGlopKsPIhlsPppslYaFPTpSsppt-ChWluapaIcphcphctpcs.hlhFtNGpolpLslShpShcsQhtRouhL+hchpcchctptp .......................................................................p.h.lptts.MhlhPhh..st.th..ocllchcpshh...hps.clIc+SC.+haGsoa...G.++ttopclhs...I..spK.sPIhl..sPh......h.hhFPTc...S...p.....p.sps...lWlshpa.Icshc...t...l..cs..ppp....hlpF.N.s..po.lplcl..ShpslppQhpcohhlhh.hp.p.p...st....................................... 0 15 32 49 +6166 PF06339 Ectoine_synth Ectoine synthase Moxon SJ anon Pfam-B_14542 (release 9.0) Family This family consists of several bacterial ectoine synthase proteins. The ectABC genes encode the diaminobutyric acid acetyltransferase (EctA), the diaminobutyric acid aminotransferase (EctB), and the ectoine synthase (EctC). Together these proteins constitute the ectoine biosynthetic pathway [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.50 0.71 -4.21 60 443 2012-10-10 13:59:34 2003-06-06 11:03:08 7 3 411 0 130 434 87 124.80 50 95.48 CHANGED MIVRsL.p-ltsT-Rc..Vpup..sWpSpRlLLccDsMGFSFH.ThIhAGoEpchaYpNHlEuVYClpGcGclEslssGcsatIcPGshYsLscHD+HhLRApp..-h+hsCVFNPPlTGpEVHDEsGsYsh.s ..............................MIVRsl.c-hppT..-Rc..Vsup.......sWcSpRhlLtcD.sh..GFSFH.TsIaAGT-oc....lH.Y.pN.H.l...E.uVYClpGEGElE..s...l..........t.....s....G....c..sasIp.P.GolYlLDpHDcHhLR..utp..-hphsCVF..NPPlTGpElH.DpsGsYsl.t.............. 0 41 87 113 +6167 PF06340 TcpF Vibrio cholerae toxin co-regulated pilus biosynthesis protein F Moxon SJ anon Pfam-B_13058 (release 9.0) Family This family consists of several Vibrio cholerae toxin co-regulated pilus biosynthesis protein F (TcpF) sequences. TcpF is known to be a secreted virulence protein but its exact function is unknown [1]. 25.00 25.00 432.60 432.40 18.40 18.20 hmmbuild --amino -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.97 0.70 -5.31 5 44 2009-01-15 18:05:59 2003-06-06 11:25:21 6 1 39 2 1 18 0 316.70 87 93.87 CHANGED FNDNYSSTSTVYATSsEA.......TDSRGSE..HLRYPYLECIKLGMSRDaL-N.CVTVSFPTsS-hFYGAYPucEE...GKpRTKEDFQARLLSGDYsuLEKLYlDaYLAQTTYDWcIPTRDQIEoLVNLAK-GKLoosLNsEYlcGRFLTKsD.GcYsIVsVGsV.ADsoPVKLPAlVSKRGLMGToslVNAlPsEIaPNIK.....lYpsTlSpLcPGssFcAshEYDssELpKHGYSs....lsDVpucVLVGVPuuc.GVIYsPVYpEos+sYahSsNhPGKclYslSTNDlHNGapWSssMFSNuKY+TQlVLTKuDGSGV+LYSKAYSEpF FNDNYSSTSTVYATSNEA.......TDSRGSE..HLRYPYLECIKIGMSRDYLEN.CVKVSFPTSQDMFYDAYPSTESD.GAKTRTKEDFSARLLAGDYDSLQKLYIDFYLAQTTFDWEIPTRDQIETLVNYANEGKLSTALNQEYITGRFLTKEN.GRYDIVNVGGV.PDNTPVKLPAIVSKRGLMGTTSVVNAIPNEIYPHIK.....VYEGTLSRLKPGGAMIAVLEYDVSELSKHGYTN....LWDVQFKVLVGVPHAETGVIYDPVYEETVKPYQPSGNLTGKKLYNVSTNDMHNGYKWSNTMFSNSNYKTQILLTKGDGSGVKLYSKAYSENF 0 1 1 1 +6168 PF06341 DUF1056 Protein of unknown function (DUF1056) Moxon SJ anon Pfam-B_13260 (release 9.0) Family This family consists of several putative head-tail joining bacteriophage proteins. 21.80 21.80 23.20 23.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.07 0.72 -3.94 6 36 2009-09-11 05:07:47 2003-06-06 11:28:42 6 1 35 0 6 37 0 63.10 37 99.65 CHANGED M.......IFKphFshIWclFDVLhFILAhIslslTsFhhshshGsIololshhLhGhlSElloc..pKGGD ............M.IhKphFphIWp.hFD.lLhFILuhIslslssFhhshshGslululshlLsGhlSEhlsp...Kt......... 1 2 3 5 +6169 PF06342 DUF1057 Alpha/beta hydrolase of unknown function (DUF1057) Moxon SJ, Bateman A anon Pfam-B_13294 (release 9.0) Family This family consists of several Caenorhabditis elegans specific proteins of unknown function. Members of this family have an alpha/beta hydrolase fold. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.87 0.70 -5.60 6 46 2012-10-03 11:45:05 2003-06-06 11:38:02 7 2 10 0 44 763 581 261.50 31 89.71 CHANGED EPKhhpKLVKFpscpt+hV-lpAVYEDohsSGSshGTVVuFHGSPGSHNDFKYlRsph-chsIRFIGlNYPGFc.TsuY.sQpasNhERpsYocALL-cL-lc..GKlIhhGHSRGCENALQTAssh......ssHGlVMlNPsGhRhHKGIpPh.RhpolsalaphLPp..hlusuh......hathhculGhKVpcGEEAsuAhRuhhshuLEcQlt.I-KLpcps.sKphIsFGGKDaLlEEEIVhEsLc+apGLsHFsh.ccpIoEc-htKIhcSFsssQpGsSVFlAcDsHFQNKopA-LlAEss+uhFD ....................................................................................................hph......tp.hplp.As.YpDs..hs...s..G..os..h....G...T...V.l.u.h.HG.o.P...G.S..H.p.D.F.K.Y..lp...s..h..L....-.p..h...s..lR.hIs...lNaPGF....t......o.s...............s.h....p................p..p...a.....s..N....ER...p..s.a.s.....p...u...l....lp.pL.s..lp.....s+.l..lhlGHSRGsE.NA...Lph..u..sth...................s..h..G...hs....hl.N...s...sG....h+...H..+..u...Ip.P..h.....+.h..t.h.......ls.h...lhp...h..l.p....hhhsth......................hht.hh.chl..Gh..+...l..p...s..G...c.....A......hsuh+s..h.ph..shcpp.h....l..cchpcps..h...........+hhlsauupDaLlEccI..Ehht..ta..p.shpHa..h.ppp..s-...c.p...h....c...Ihpsa.st.s.pptsol.htp-sHa.pKppAchlucsstthh.......................................................................................................................................................................... 0 21 26 44 +6171 PF06344 Parecho_VpG VPG_P3B; Parechovirus Genome-linked protein Yeats C anon ADDA_6741 Domain This family is of the Parechovirus genome-linked protein Vpg type P3B. 25.00 25.00 37.30 36.00 17.80 16.50 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.28 0.72 -6.56 0.72 -3.98 4 49 2009-01-15 18:05:59 2003-06-06 12:02:01 6 2 11 0 0 50 0 20.00 88 0.92 CHANGED RAYNPTLPVAKPKGTFPVSQ RAYNPTLPVsKPKGTFPVoQ 0 0 0 0 +6172 PF06345 Drf_DAD DRF Autoregulatory Domain Yeats C anon ADDA_2536 Motif This motif is found in Diaphanous-related formins. It binds the N-terminal GTPase-binding domain; this link is broken when GTP-bound Rho binds to the GBD and activates the protein. The addition of DAD to mammalian cells induces actin filament formation, stabilises microtubules, and activates serum-response mediated transcription ([1]). 22.10 22.10 23.30 22.70 19.80 19.10 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.16 0.73 -6.14 0.73 -3.85 3 216 2009-01-15 18:05:59 2003-06-06 13:25:51 6 12 65 11 89 181 0 15.00 91 1.47 CHANGED GVMDSLLEALQSGAA GVMDSLLEALQSGAA 0 13 19 50 +6173 PF06346 Drf_FH1 Formin Homology Region 1 Yeats C anon ADDA_2536 Family This region is found in some of the Diaphanous related formins (Drfs) ([1]). It consists of low complexity repeats of around 12 residues. 40.00 40.00 40.50 40.40 39.90 38.60 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -12.57 0.71 -12.86 0.71 -4.72 4 113 2009-01-15 18:05:59 2003-06-06 13:37:54 7 15 26 0 46 81 1 139.50 65 14.81 CHANGED PPPPPLP..uGssIPPsP...................PLPsGssIPPPPPLPGGssIPPPPPLPGsAu.IPPPPPLP.GssuIPPPPPLP.GusuIPPPPPLP.GussI.PPPPPLPGGuGI.PPPPPLPGtsul.PPPPPLPsGsGl.PPPPPhPG..AsGIPPPP..PGhGhPPPPP.FGhGVPssPsLP ............................................PP.sPPL.s..GssIP.sP...PPLP...GsuI.PP.PPP....LPG.su.IPP.PPPLP.Gs..uIPPPPPLP.G..s.uIPP.PPPLP.G.s.uI.PP.PPPLPGus..GI...P...P.P....PPLPGts.Gl..PPP....PPLPsGsGl...P.P.P..PPhPG..usG.IPPPP...PshGhPPP.P....t.......................................... 0 6 6 10 +6174 PF06347 SH3_4 Bacterial SH3 domain Moxon SJ, Bateman A anon Pfam-B_13248 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. These are composed of SH3-like domains. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.55 0.72 -4.42 29 1011 2012-10-02 18:48:24 2003-06-06 13:57:49 8 42 560 0 333 3517 2117 55.40 27 37.89 CHANGED cssplslRpuPsssupVs.shlcsGlssplt.pspspWp+lp.ssGtpGWltpstLhG .............................sstlslRsuPu.tsutlh..sh....l..c.t.G.h..s.l.p.l......t....p.....p.....p.....s.......s.....W....pcl.....c...s.....s....G.....h......p.....GWltpshL.G.................. 1 110 232 271 +6175 PF06348 DUF1059 Protein of unknown function (DUF1059) Moxon SJ anon Pfam-B_13303 (release 9.0) Family This family consists of several short hypothetical archaeal proteins of unknown function. 20.50 20.50 20.80 20.50 20.20 20.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.71 0.72 -4.15 17 231 2009-01-15 18:05:59 2003-06-06 14:01:29 6 2 177 0 99 216 25 56.20 26 83.59 CHANGED hK.pFpCuslGhsCuachp.AsoE-EllctlttHA+hsHGhsclPp-hlscl+psI+cs ..................h.Ctt..h..s..hs..Cshtlp.Aco-sEllctsspHAtssH.s..hs..s.s.-hhppl+t.h....................... 0 21 46 78 +6177 PF06350 HSL_N HSL; Hormone-sensitive lipase (HSL) N-terminus Moxon SJ anon Pfam-B_13329 (release 9.0) Family This family consists of several mammalian hormone-sensitive lipase (HSL) proteins (EC:3.1.1.-). Hormone-sensitive lipase, a key enzyme in fatty acid mobilisation, overall energy homeostasis, and possibly steroidogenesis, is acutely controlled through reversible phosphorylation by catecholamines and insulin [1]. 25.00 25.00 25.10 29.80 17.70 24.40 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.84 0.70 -5.34 6 166 2009-01-15 18:05:59 2003-06-06 14:21:03 7 6 104 0 86 151 0 251.30 43 37.71 CHANGED hDpptlFsoLhslCc-NhuaFu.....uspsEsupRLpGsasulp-HhcsscPLVppltohhHcaDFDppTPGNGYRShlaVVcsCltHhl+huRYlsuNRsoIFFRpua.ltElEAYsphLspLpshLhasQpLhphs.ssGcLF.tsEu+ssEchlpchsTlpphCFYGRCLGFQFssSIRPhLphluIGMsuFuEsYhppp..usIuhAsSShaTSGKYhlDPELRupch.pIoQNhcVcFhKuFW.lsEoElhpoLsShsuopVKVNRllolPPEslpLPhspsscho................VsIssPpAHhGsG.PVpsRLIStshRcG..u .................................................................hhtsL..htlsp-NhsaFs...........ts.schupRl.sshhtl.p-phhtlcshltplh.thsp...haDh....D.pT..PuNGYRSll.psspsClhHhlp.hs.+hlts.sRpolFFR..t.sa.htElEAY.hthLsp.Lp.........shh....hs..ppLh.ths....psGt...L..F....s-p..........thstphlpchsolcpsCFYGRCLGF..Q.Fssul+shLphluluhsoauEp..Ytppp....suluhsspSlhooG+ahlsPELRutch.cIhQshclcFhKuFWslsE....plh..........pplsshsusslpVschlplPsp.shphPhts....s.phs.......................VsIssP..AHhGsu.PV.hRLlShchRpG..u............................................. 0 32 39 65 +6178 PF06351 Allene_ox_cyc Allene oxide cyclase Moxon SJ anon Pfam-B_13374 (release 9.0) Family This family consists of several plant specific allene oxide cyclase proteins (EC:5.3.99.6). The allene oxide cyclase (AOC)-catalysed step in jasmonate (JA) biosynthesis is important in the wound response of tomato [1]. 21.60 21.60 22.20 22.20 18.90 21.00 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.04 0.71 -4.82 10 96 2009-09-12 22:21:42 2003-06-06 14:28:10 6 2 48 19 35 100 1 168.90 62 71.60 CHANGED -su+PuKVQELpVYEINERDRGSPAYLRLSpKsV.......NSLGDLVPFSNKLYoGsLcKRlGITAGIClLIQHpsEKKGDRYEAIYSFYFGDYGHISVQGsYLTY.EDTYLAVTGGSGIFEGsYGQVKLpQlVFPFKLFYTFYLKGIp.DLPpELLussVsPSPoVEPoPAAKACEPHAVlsNF ..............................................................................p.upPsKVQELpVYEINERDRsSPAYL+.L.Sp.Kps...........NuLGDLVPFoNK............LYoGsL...pKRlGITAGlClLIQHhPEK...........pG...........DRYE.AIYSFYFGDYGHISVQGsYLTY..EDo..YL..AVTGGSGIFEGsYGQVKLpQlVFPFKLFYTFYLKGIs.DLPtELlspsV.PSPsVEPsPtAKAsEPpAslsNF................... 0 10 25 30 +6180 PF06353 DUF1062 Protein of unknown function (DUF1062) Moxon SJ anon Pfam-B_13377 (release 9.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.80 21.80 22.00 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -3.97 15 241 2009-01-15 18:05:59 2003-06-09 11:21:01 7 1 234 0 34 162 2 140.90 45 72.15 CHANGED lNAp+KhLDlWLIYKCspCDpTWNhslFpRhslccIsPtLL-uLpsNDtshlctaAashtsL+RNstcls.t.s-acltcchhsssssphs...lclplphshPhsl+LspLLtppLtLSRoclcpLh-pGhIpssstt...sctL+pc .....................VNuQKKlLDVW.IYKCo+CDhTWNloLFSRlsVucIs+-LasRLhuNDsuslphaAaDsulLKRNsAELu.upPD..F+Ip-R.hll.ul.s.uapp......lsVsVRlupsFpVpLLSlLK+QLhLSsAElKRhlEsGpIoGlohK..hu+KL+..s....................... 1 14 20 25 +6182 PF06355 Aegerolysin Aegerolysin Moxon SJ, Bateman A, Macek P anon Pfam-B_13415 (release 9.0) Family This family consists of several bacterial and eukaryotic Aegerolysin-like proteins. It has been found that aegerolysin and ostreolysin are expressed during formation of primordia and fruiting bodies. It has been suggested that these haemolysins play an important role in initial phase of fungal fruiting. The bacterial members of this family are expressed during sporulation [1]. Ostreolysin was found cytolytic to various erythrocytes and tumour cells [2]. It forms transmembrane pores 4 nm in diameter. The activity is inhibited by total membrane lipids, and modulated by lysophosphatides. The potential use of aegerolysins is reviewed [8] with special emphasis on their properties which would allow thier use in therapeutics. 20.60 20.60 21.10 21.80 20.10 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.63 0.71 -4.28 18 123 2009-01-15 18:05:59 2003-06-09 11:40:34 8 1 67 0 67 114 0 120.00 26 84.40 CHANGED uQWVplcIhsphsptslpI+NspLshG+aacsss+cpploss-lsthhIts.ssptplsusGRpss.oGTEGshDlh...Du..-p+IsplaWDsPauu.+sNpaphpcpsscYtlEhushs.psuGslGsVslcl....s+p .......................................phlt..hplhs.phs.tt.sltlcNspLph.....GKaa...p.ss.......cspclossplsthshts.ssph...plsusGRpsssoGTpGshDlh........ss..s..p+l..spl..hWDs.Pasu..sNpaph...ps...ts.....sp...Ytlthsuhs..p.sshGplslpl...t........................ 0 11 37 56 +6183 PF06356 DUF1064 Protein of unknown function (DUF1064) Moxon SJ anon Pfam-B_13437 (release 9.0) Family This family consists of several phage and bacterial proteins of unknown function. 24.60 24.60 25.00 24.80 24.10 24.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.66 0.71 -4.10 5 417 2012-10-11 20:44:44 2003-06-09 11:42:11 6 1 361 0 39 303 62 112.80 39 85.21 CHANGED +SKYNAKKVEaDGIVFDSKsEspYYQpLcsshsusph-+l-lQPKFELhsK.FRKsGpl.RsIcYlADFslRp-u.cllEVIDVKGMlT...c-a+IKAKLFch+Yhp......hKs.KhpGpsahps ..................................hsKYsuKK.lp.h.cGIsFDSKsEscYYph.Lcp....p..pss....p.h..s...cl-lQP+FEL.s............p....h...p...........p.p....................R...sIpYlADFslap.ps...t............hl....c.VlDVKG...hhT.....cshplKtKlFchp....Y.p......h.h.h..p........................................ 0 13 26 29 +6184 PF06357 Omega-toxin Omega-atracotox; Omega-atracotoxin Moxon SJ anon Pfam-B_14633 (release 9.0) Family This family consists of several Hadronyche versuta (Blue mountains funnel-web spider) specific omega-atracotoxin proteins. Omega-Atracotoxin-Hv1a is an insect-specific neurotoxin whose phylogenetic specificity derives from its ability to antagonise insect, but not vertebrate, voltage-gated calcium channels. Two spatially proximal residues, Asn(27) and Arg(35), form a contiguous molecular surface that is essential for toxin activity. It has been proposed that this surface of the beta-hairpin is a key site for interaction of the toxin with insect calcium channels [1]. 21.70 21.70 23.10 22.20 20.80 20.50 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.12 0.72 -4.37 2 17 2012-10-01 22:06:18 2003-06-09 11:58:20 6 1 5 2 0 20 0 37.10 79 71.75 CHANGED SssCIPSGQPCPYsc.CCStSCTaKpNENGNoVpRCD ..SsTCIPSGQPC.PYNENCCSpSCTaKp.NENGNTVKRCD. 0 0 0 0 +6185 PF06358 DUF1065 Protein of unknown function (DUF1065) Moxon SJ anon Pfam-B_14830 (release 9.0) Family This family consists of several Benyvirus proteins of unknown function. 25.00 25.00 50.00 49.90 20.30 18.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.58 0.72 -4.33 2 32 2009-01-15 18:05:59 2003-06-09 12:33:53 6 1 5 0 0 18 0 99.80 96 94.41 CHANGED MVLVVKVDhSsIVLYIVAGsVVVShLYSPFFSN-VKAuuYAGAlF.suGCIMDRNSFAQFGuCDIPKaVA-SIo+VAhKEhDADIhschssVss+hVsLhEslh.lh.RlF MVLVVKVDLSNIVLYIVAGCVVVSMLYSPFFSNDVKASSYAGAVFKGSGCIMDRNSFAQFGSCDIPKHVAESITKVATKEHDADIMVKRG-VTVRVVTLTETIFIILSRLF 0 0 0 0 +6187 PF06360 E_raikovi_mat Euplotes raikovi mating pheromone Moxon SJ anon Pfam-B_14708 (release 9.0) Family This family consists of several Euplotes raikovi mating pheromone proteins. Diffusible polypeptide pheromones, which distinguish otherwise morphologically identical vegetative cell types from one another, are produced by some species of ciliates. In the marine sand-dwelling protozoan ciliate Euplotes raikovi, pheromone molecules promote the vegetative reproduction (mitogenic proliferation or growth) of the same cells from which they originate. As, understandably, such autocrine pheromone activity is primary to that of targeting and inducing a foreign cell to mate (paracrine functions), this finding provides an example of how the original function of a molecule can be obscured during evolution by the acquisition of a new one [1]. 25.00 25.00 51.10 50.70 17.60 17.20 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.55 0.72 -3.74 6 6 2009-01-15 18:05:59 2003-06-09 13:02:30 6 1 1 6 0 12 0 35.30 37 62.72 CHANGED DhC-pAsAQCshThCp.hCt...sp.hC.lsVhss..Cs DhC-pAsAQCshThCp.hCt...sp.hC.lsVhss..Cs 0 0 0 0 +6188 PF06361 RTBV_P12 Rice tungro bacilliform virus P12 protein Moxon SJ anon Pfam-B_14960 (release 9.0) Family This family consists of several Rice tungro bacilliform virus P12 proteins. The function of this family is unknown [1]. 25.00 25.00 25.50 67.40 23.60 18.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.42 0.72 -3.82 3 11 2009-01-15 18:05:59 2003-06-09 13:05:48 6 1 2 0 0 11 0 100.10 90 100.00 CHANGED MSADYPTFKEALEKFKNLESDTAAKDKFNWVFTLENIKTTADVNLASKGLVQLYALQEIDKKINNLTAQVSKLPTTSGSSS.AGAIVPAGSNTQGQYKAPPKKGIKRKYPA MSADYPTFKEALEKFKNLESDTAuKDKFNWVFTLENIKTTADVNLASKGLVQLYALQEIDKKINNLTAQVSKLPTTSGSSS.AGAIVPAGSNTQGQYKAPPKKGIKRKYPA 0 0 0 0 +6189 PF06362 DUF1067 Protein of unknown function (DUF1067) Moxon SJ anon Pfam-B_15074 (release 9.0) Family This family consists of several hypothetical Mycobacterium leprae specific proteins. The function of this family is unknown. 25.00 25.00 25.40 213.20 23.90 22.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.32 0.72 -3.43 2 7 2009-01-15 18:05:59 2003-06-09 13:07:25 6 1 1 0 0 7 0 97.00 97 76.46 CHANGED MTTPTPQGHDMHTKTPLPRGANNYPHTHACIDIAFSTAQVPSPWHHQHVDQAASTTDMLTCAALIVSTAAKHTKPHRKQAVSHPsTKTPQHSppR.p MTTPTPQGHDMHTKTPLPRGANNYPHTHACIDIAFSTAQVPSPWHHQHVDQAAsTTDMLTCAALIVSTAAKHTKPHRKQAVSHPPTKTPQHSKTRQQ 0 0 0 0 +6190 PF06363 Picorna_P3A Picornaviridae P3A protein Yeats C anon ADDA_6741 Family This family consists of the P3A protein of picornaviridae. P3A has been identified as a genome-linked protein (VPg) and is involved in replication ([1]). 21.10 21.10 21.10 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.36 0.72 -4.01 4 57 2009-01-15 18:05:59 2003-06-09 14:59:20 6 4 18 0 0 59 0 99.70 76 4.63 CHANGED phhs.lcE.hphphspLIEthEshhtPpsSsFtCFAsph.s.K.+.pAscKVhsWsps+hpphhsFV.RNKuWhTlhSsloShlSILhLVhhhaKKEcpc-E ..........DAIPYIDEYLNIEMSTLIEQMEAFIEPRPSVFKCFAoKl.usps+.KAuKEVV-WFSsKIKSMLSFVERNKAWLTVVSAVTSAISILLLVTKIFKKE-SKDE......... 0 0 0 0 +6191 PF06364 DUF1068 Protein of unknown function (DUF1068) Moxon SJ anon Pfam-B_14602 (release 9.0) Family This family consists of several hypothetical plant proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 25.00 25.00 35.60 33.60 21.80 21.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.29 0.71 -4.66 3 84 2009-01-15 18:05:59 2003-06-09 15:51:09 7 5 22 0 54 84 0 152.60 49 70.17 CHANGED MA+HTAALKlGLALLGLSMAGYILGPPLYWHLTEALAtVSAoSCPACsCDCSohPLLTIPptLSNuSFsDCAK+DPEV.....NEDTEKNYAELLTEELKQREAEShEKHKRADsGLLEAKKlTSSYQKEADKCNSGMETCEEAREKAEcALVEQKKLTShWEERARQKGWK-GuTKPsVK ..............................hhlhuls.shhlhGPsLYW+............h..........tp......sh...s........................t.........ts.......s.....CssC.....CDC.....s.s..sl.hpls........ph..pDCu+pDP-l.....scEhEKsas-LLsEELKLpEt.ApEppc+ss.hsLlEAK+lASQYQKEA-KCNuGhETCEEARE+AEusLscp+KLTuLWEpRARQhGWccs.......t......... 0 6 37 46 +6192 PF06365 CD34_antigen CD34/Podocalyxin family Moxon SJ anon Pfam-B_14609 (release 9.0) & Pfam-B_17463 (release 8.0) Family This family consists of several mammalian CD34 antigen proteins. The CD34 antigen is a human leukocyte membrane protein expressed specifically by lymphohematopoietic progenitor cells. CD34 is a phosphoprotein. Activation of protein kinase C (PKC) has been found to enhance CD34 phosphorylation [1]. This family contains several eukaryotic podocalyxin proteins. Podocalyxin is a major membrane protein of the glomerular epithelium and is thought to be involved in maintenance of the architecture of the foot processes and filtration slits characteristic of this unique epithelium by virtue of its high negative charge. Podocalyxin functions as an anti-adhesin that maintains an open filtration pathway between neighbouring foot processes in the glomerular epithelium by charge repulsion [2]. 24.40 24.40 24.60 24.70 24.30 24.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.41 0.71 -4.67 8 185 2009-01-15 18:05:59 2003-06-09 16:06:09 7 2 43 0 85 157 0 179.50 35 45.08 CHANGED lLsLsEsosCEcatpsscEpLlc.llCcshpushs..tstColpLApspssppsllhslsscsplssp...hphL+c+ps-LcchGIp-hshpcpusccshpc+..hs.hlIslVshuu.lLLlhslhshYssapRcS.+hspQRLsEELphVENGhHDNPTL-Vh.EsuSEMQEK+.ssLNGEhs-....SWhsP...hss.sKcDl.-....EEDTHL ................................................................hLphscs..C..t.hppp.pstp.Lhp.llCpt.ttphss...tts.Cpl.Lupsp.p.ppllh...slsscptlssp....hphLccp.ppcL.c.clGIpsh..shts.sstpshpp+......hlIsLVs.G..hLLslhhhshYhhh.pRRS.p.stpRL...sE-h.h.sE.NG.psssslpsh..tspsEhQcK..sslNtth.t-.........p...hhs......sp.h+pch.......tDTcL................ 0 3 7 29 +6193 PF06366 FlhE Flagellar protein FlhE Moxon SJ anon Pfam-B_14631 (release 9.0) Family This family consists of several Enterobacterial FlhE flagellar proteins. The exact function of this family is unknown [1]. 25.00 25.00 31.20 30.90 24.80 23.00 hmmbuild --amino -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.23 0.72 -4.18 13 533 2009-01-15 18:05:59 2003-06-09 16:11:06 8 1 520 0 49 181 1 97.60 72 81.20 CHANGED GssLspRG.sssSsPLpssssls..sGhlToVsWRYplsussPsGLpl+LCsto..RCVtLsG.usGpTcuFsG.sAspshpFlaclsGpGpL.PsLpVpSNQVlVNY...R ...............GlTLsaRG.ShSSuPLosp.pPs.......u.G.lMTLVAWRYQLhG.P...TPuGLRVRLCSQS..RCVEL-G.QSGTTsAFuGlsAsEPLRFlWEVPGGGR.L.IPsLKVppNpVIVNYR.......... 0 3 15 29 +6194 PF06367 Drf_FH3 Diaphanous FH3 Domain Yeats C anon ADDA_2536 Domain This region is found in the Formin-like and and diaphanous proteins [1,2]. 20.00 20.00 20.10 20.00 19.90 19.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.06 0.71 -4.91 37 1087 2012-10-11 20:01:00 2003-06-09 16:19:41 11 51 230 17 649 1003 4 187.70 26 17.15 CHANGED suappllcAhhshtthtt..psRF.psllsshcp...................pspplchpsusMthINtllsss...p.-.........lphRlHLRsEhhthGLpcllschcph....cs-pLppplphFcppctcDhp-lh..........p+hc........shpl-hccssplhchl...hpplpcopupshh.lSlLQHlLLh.......psctttt.phhcll-phlsplslppph.t.........ppctslphslppL.......lc ..........................................uachlLpAhsphtphpt..ppRF..psllpthcp........................................................ppsslphtlAsMphINsllpus......-..-..............lpaRlHLR.EF....hphGL.....p....p.hLp..c.L.+ph................cs-p.Lp....hQlp.hap-p.tttD.tpLh..............................................cchc.........................shph...s.h.....c.......ps..p...p...l...hphl...........hpp.l.pc.o.ps.pshh...lShLp+hLhh...............pc...ttt.phhpl.l-phlpplsh.pph.p...........p.php..ht............................................................................................................................................................ 1 194 270 470 +6195 PF06368 Met_asp_mut_E Methylaspartate mutase E chain (MutE) Moxon SJ anon Pfam-B_14693 (release 9.0) Family This family consists of several methylaspartate mutase E chain proteins (EC:5.4.99.1). Glutamate mutase catalyses the first step in the fermentation of glutamate by Clostridium tetanomorphum. This is an unusual isomerisation in which L-glutamate is converted to threo-beta-methyl L-aspartate [1]. 25.00 25.00 72.00 44.00 21.70 21.00 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.49 0.70 -6.30 5 223 2009-09-11 06:01:28 2003-06-09 16:20:39 6 3 212 6 51 172 5 425.00 53 90.69 CHANGED lPs+KRFu+tLc+Asp-GKlLsQPRAGVuLLDEHIcLLcsLpcEGuADLLPoTIDSYTRpN+YEcAtlGLc+St-uspShLNGFPlVNHGVcuCR+llcuV...-hPlQVRHGTPDARLLAElslAuGFoSaEGGGISYNIPYoKuVsL-col-sWQYCDRLlGhYEEpGl+INREPFGP.LTGTLVPPSlSpAluIlEGLLAsEQGVKsITVGYGQsGNlsQDIAAl+AL+ElupEYLssauasDlsloTVFHQWMGGFPcDEuKAaulIuhuuslAuhSGsTKVIVKoPcEAsGIPTusuNAsGL+sT+phLsMl-cQ+ls.h-sl-pEpulIKcEs+uILcKlFELGcGDlA+GTV+AFEsGVLDIPFuPScsNAGKhhPsRDssGslRlL-hGsVPlsc-lcphH+c+lcERAchEGRElSFQMVlDDIhAVScGRLIGRP .............................lPcpKpFuhtLhcAcpcG+TLsQPRAGVALh-EHIcLLcsLp-E..sDLLPoTIDuYTRlNRY-EAtsGIccSl-uGpShLNGhPlVNHGVsuCR+lsEsl...ptPlQlRHGTPDARLLAEIuhAuGFTSaEGGGISYNIPYuKcVoLE+SIccWQYsDRLhGh.YE.Ep.GlcINREPFGP.LTGTLlPPhISpulAIIEGLLAlEQGVKSITVGYGQsGsLsQDlAAIpuLRELucEYhppaGa...sDhcloTVFHQWMG..GFPcDEuKAFulISaGAulAuhuGATKVIsKoPHEAhGIPTstANhpGL+so+Qh.L.s.Mls-.QchP...s.ssl.-hE.h.ElIKpEsRAlLsKVaELG..sG.DlA+GTVhAFEAGVLDVPFAPuttNA.GKllPsRDNsGAIRlL-sGslPlsc-Ih-hH+chlt.ERA+hEGRpsoFQMVlDDI.AlS+u+LlGRP......................... 0 21 37 46 +6196 PF06369 Anemone_cytotox Sea anemone cytotoxic protein Moxon SJ anon Pfam-B_14701 (release 9.0) Family Sea anemones are a rich source of cytotoxic proteins. Cytolysins comprise a group of more than 30 highly basic proteins with molecular masses of about 20 kDa. Cytolysins isolated from the sea anemone, Heteractis magnifica, include magnificalysin I (HMg I), magnificalysin II (HMg II) and Heteractis magnifica toxin (HMgtxn). These are highly homologous at their N-terminals. HMg I and II have molecular masses of approximately 19 kDa, and pI values of 9.4 and 10.0, respectively. Cytolysins isolated from other sea anemones Actinia tenebrosa (Tenebrosin-C, TN-C), Actinia equina (Equinatoxin, EqT) and Stichodactyla helianthus (ShC) exhibit pore-forming, haemolytic, cytotoxic, and heart stimulatory activities [1]. 25.00 25.00 25.00 25.20 20.90 24.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.21 0.71 -4.86 3 46 2009-01-15 18:05:59 2003-06-09 16:28:07 7 1 24 19 21 55 0 153.10 36 87.33 CHANGED -VAGAVIDGASLoF-ILcKVL-ELGKVcRKIAVGVDNESGtTWTALNTYFRSGTSDVlLPacVPNoKALLYoGRKS+GPVATGAVGVLAYhMSsGNTLAVMFSVPFDYNWYSNWWNVKIYcGKRRADQ+MYEELYYN.NPaRGDNGWapRNLGY.GLKhRGFMTSuG-AKLpIHISK ...............................................................................hpRplslthpN.oshph.hs.t...sYh.SGtsp..ls..l...tpshhhshpKspGs.sATGuVGVlsYt..h......ss......u......p......T......lAlMFSVPaDYNLYSNWasVtl..ap.sp+..p..sDpphYcphY.s.........s..s.....h....cs.c.s.sht..pt..........uh.....ul.......c.......hcuhMss.GpAhlplcl............................................ 0 3 8 17 +6197 PF06370 DUF1069 Protein of unknown function (DUF1069) Moxon SJ anon Pfam-B_14815 (release 9.0) Family This family consists of several Maize streak virus 21.7 kDa proteins. The function of this family is unknown. 25.00 25.00 28.20 204.90 20.50 18.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.24 0.70 -4.96 2 13 2009-12-01 13:51:58 2003-06-09 16:30:04 6 1 13 0 0 7 0 197.60 99 100.00 CHANGED MGSKCKAIAGGNDHVQSSSLDGTNSSVRYILPLRSHSQPTRELVEVDVALARLPTsIRWRNVRTNPPVGLHVEQPTPFHHEAMTQLPAHLPCRRPGFEGVRVGKYSLRGRSLSAGSGVIHQPHHTGSGVGVPAGCRVSNEVVVDGNLVRQSLAGVAVAlVRSSGIGVDEVTYASGGDRYHGGPGMLECLDLEGWPIGLAASPLQPS MGSKCKAIAGGNDHVQSSSLDGTNSSVRYILPLRSHSQPTRELVEVDVALARLPTCIRWRNVRTNPPVGLHVEQPTPFHHEAMTQLPAHLPCRRPGFEGVRVGKYSLRGRSLSAGSGVIHQPHHTGSGVGVPAGCRVSNEVVVDGNLVRQSLAGVAVALVRSSGIGVDEVTYASGGDRYHGGPGMLECLDLEGWPIGLAASPLQPS 0 0 0 0 +6198 PF06371 Drf_GBD Diaphanous GTPase-binding Domain Yeats C anon ADDA_2536 Domain This domain is bound to by GTP-attached Rho proteins, leading to activation of the Drf protein. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.08 0.71 -4.94 20 1367 2012-10-11 20:01:00 2003-06-09 16:33:03 8 48 240 17 827 1229 2 151.50 21 17.18 CHANGED htps.s-cplpphFtcllc-h...sLsccc+...cshhshshpcKWphlhpcppsphpt.............................pppspptSPcaYlcpLpssshsp..............ppLcSLpVuLpopPluWVppFhphpGhssLhslLpphppcpsps............p.shcpcacll+CLKAlMNNpaGhcpsLs..pppslhhlupSLsosp.pTpphsh-lLosLCl .............................................................................................................................................th............sls...p.t......t...h...t.h..s..hK.hphh.p...................................................................................................................................tt.......st.hh.p.h.t............................................phlppL..c...ht......L......p...........s......p....h.........s.........WlppF...........t..........p..Gh....shLhp....hL.tt.......h............t.........................................................................p....h.hlhC.l.+AlM.......N...p.p..................G.h.......p...h..h.ht............p.p.s.......l....ls.u.l.....s.....tp.........t...hhh...s..h..clLshlC........................................... 0 248 348 591 +6199 PF06372 Gemin6 Gemin6 protein Moxon SJ anon Pfam-B_14816 (release 9.0) Family This family consists of several mammalian Gemin6 proteins. The exact function of Gemin6 is unknown but it has been found to form part of the Pfam:PF06003 complex. The SMN complex plays a key role in the biogenesis of spliceosomal small nuclear ribonucleoproteins (snRNPs) and other ribonucleoprotein particles [1]. 25.50 25.50 25.90 25.80 25.10 25.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.24 0.71 -4.93 2 70 2009-01-15 18:05:59 2003-06-09 16:41:22 7 1 63 2 51 78 0 152.90 40 95.14 CHANGED hspWhchS..papsas.K.V+llhs-.+phcGWlhssDPVStsllLsp.hE.GthSsoslhGHulpphEh.pEh-aph...EKL.asatph-spGa...DLEc++oslh+WLEKN+lsVT.p...pchlCVhGVLsI-PPYs.EsCpSSN.IILpRIQcLIQsh.sspp ............................tW..hsPhpapsYlhKpV+Vpss.-tpcacGWlhTsDPVS......usl.VLl.....shhE.c.u.p..h..o.l.pslhGHAVpsVEsls..-.u...-.....p.....ps+....E+L...chFh..st...-.s..p.u..hS......E-...LccRKs.....sL+cWLccN+IPlsE....pucstpoL.sVA.GVLTI-PPYsPEsCsSoNpIILuRlQsLIpsh................ 0 15 18 34 +6200 PF06373 CART Cocaine and amphetamine regulated transcript protein (CART) Moxon SJ anon Pfam-B_15325 (release 9.0) Family This family consists of several cocaine and amphetamine regulated transcript type I protein (CART) sequences. Cocaine and amphetamine regulated transcript (CART) peptide has been shown to be an anorectic peptide that inhibits both normal and starvation-induced feeding and completely blocks the feeding response induced by neuropeptide Y and regulated by leptin in the hypothalamus. The C-terminal part containing the three disulfide bridges is the biologically active part of the molecule affecting food intake. The solution structure of the active part of CART has a fold equivalent to other functionally distinct small proteins. CART consists mainly of turns and loops spanned by a compact framework composed by a few small stretches of antiparallel beta-sheet common to cystine knots [1]. 21.10 21.10 21.60 22.40 19.80 18.20 hmmbuild --amino -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.56 0.72 -4.02 9 78 2009-09-10 15:42:36 2003-06-09 16:46:50 6 2 44 1 33 82 0 70.40 66 65.26 CHANGED sss.spEK-L...ltALpEVLEKLQsKRIs.hEKKhGpVPhCDlGEQCAlRKGuRIGKLCDCPRGosCNaFLLKCL ......................p..p.ppEKpL...l-ALQEVLcK.....LcoKRI..PlaEKKaGQVPhCDsGEQCAVRKGARIGKLCDCPRGosCN.FLLKCL.......... 1 1 7 15 +6201 PF06374 NDUF_C2 NDUFC2; NADH-ubiquinone oxidoreductase subunit b14.5b (NDUFC2) Moxon SJ anon Pfam-B_15334 (release 9.0) Family This family consists of several NADH-ubiquinone oxidoreductase subunit b14.5b proteins (EC:1.6.5.3). 21.70 21.70 21.90 24.10 21.30 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.68 0.71 -4.09 5 104 2009-01-15 18:05:59 2003-06-09 16:51:15 6 2 82 0 56 110 0 105.50 39 95.41 CHANGED uRsss-PLcFLPD....EARSLPPPKLsDPRLlasGlLGYCoGLhDNhlRRRPVhhAGLHRQLLFlTuFVFAGYYalKRcNYhYAVRD+-MFuYIKLHPEDFPEKDKKTYGEVLEsFHPVR ....................................s.........hLss....csp.LPP..Ptlhs.st..lahGh.hGassu..l..lsNhl....p....RRPhh.uGlHRplLasoshhhhGYalsKhpshh....aAt+DpphhpYlcLHPEDFs...c...p.-+..Kpau..-lhE.ahPlR........ 0 14 19 38 +6202 PF06375 BLVR Bovine leukaemia virus receptor (BLVR) Moxon SJ anon Pfam-B_14559 (release 9.0) Family This family consists of several bovine specific leukaemia virus receptors which are thought to function as transmembrane proteins, although their exact function is unknown [1]. 26.40 26.40 26.90 26.90 25.00 25.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.33 0.71 -4.39 16 148 2009-01-15 18:05:59 2003-06-10 10:48:18 6 9 89 2 77 143 0 137.40 48 14.42 CHANGED .opEElc+RREARphEQsNNPaYLKuuspspsutss.........htsh-cIPlspI-Lsl....PLc.....lsG.......hhtSDKYlptppppp..........pcKscK........KcKK++++.....pKcupph....sss---D......tPlchVNpshtE.MPEGAp.SD...pcccstcssDPHRALDIDL- ....................................................h.sEEELtRRREAR+pEQANNPaYlKSSPospKphpss..............sslE+IP..VspIDLoV....PLK..............VPG.......hshSDpYlKh--cR+.........tp.cKcK+..........K+K+pccc.....cKttp+hpts.toES-EDh.....sPsp.Vshss.tE.MPEsAhsSD.....-...-cpDPNDPa+ALDIDLD.................. 0 26 31 58 +6203 PF06376 DUF1070 Protein of unknown function (DUF1070) Moxon SJ anon Pfam-B_14060 (release 9.0) Family This family consists of several short hypothetical plant proteins of unknown function. 25.00 25.00 30.30 30.30 21.20 15.90 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.51 0.72 -4.09 15 109 2009-01-15 18:05:59 2003-06-10 10:50:05 7 2 18 0 64 105 0 34.80 61 50.00 CHANGED Asus...t..s.DGpslDQGIAYlLMhsALslTYLhH ............APAP.....AP.oSDGpuIDQGIAYlLMllALlLTYLlH.. 0 4 35 50 +6204 PF06377 Adipokin_hormo Adipokinetic hormone Moxon SJ anon Pfam-B_14600 (release 9.0) Family This family consists of several insect adipokinetic hormone as well as the related crustacean red pigment concentrating hormone. Flight activity of insects comprises one of the most intense biochemical processes known in nature, and therefore provides an attractive model system to study the hormonal regulation of metabolism during physical exercise. In long-distance flying insects, such as the migratory locust, both carbohydrate and lipid reserves are utilised as fuels for sustained flight activity. The mobilization of these energy stores in Locusta migratoria is mediated by three structurally related adipokinetic hormones (AKHs), which are all capable of stimulating the release of both carbohydrates and lipids from the fat body [1]. 22.90 22.90 22.90 24.20 22.50 22.00 hmmbuild --amino -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.44 0.72 -3.82 15 60 2009-01-15 18:05:59 2003-06-10 11:50:57 6 4 44 0 22 63 0 52.90 42 44.60 CHANGED QLNFoPsW..GKRuususu................................tssC+s.ss-slhhIY+lIQsEAp+llpCpc .QLsFSPsW..GKRSssssu................................tssC+s...ss...-sLhtIY+hlQsEAp+hlpCpp........ 0 8 10 19 +6205 PF06378 DUF1071 Protein of unknown function (DUF1071) Moxon SJ anon Pfam-B_14587 (release 9.0) Family This family consists of several hypothetical bacterial and phage proteins of unknown function. 20.50 20.50 26.80 27.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.05 0.71 -5.10 11 191 2009-01-15 18:05:59 2003-06-10 12:28:58 6 1 183 0 11 110 95 162.20 42 77.30 CHANGED pohFEpLusIcVsc+lEK....KssLoYLSWAaAWsplKKthPsATapl+cFst.th.t.sh.........sahcophGahVpVsVTl....cslocpphLPVhDaRNKsl.................tK..PosFDINpolhRCLVKAlAhaGLGLYIYuGEDLP.........ch.pp.pp.pshppp.tpppsthsp ...................................olFEpLsshsVN-+hEp..............K...s..sLsYLSWuaAapElKKlpPssohcltEas..s.p.s.sY..........ahsstpGa.hVpVuVTV....csh..TcspaLP..VhDaRNK.ul...............................tK..sTsFDINKuhpRChVKA.lAh.H.GLGLYIYsG.EDL.Ppss...p.tclp-c.pph.s.pQphtc.tsh..ppt.......................... 0 3 8 9 +6206 PF06379 RhaT L-rhamnose-proton symport protein (RhaT) Moxon SJ anon Pfam-B_14617 (release 9.0) Family This family consists of several bacterial L-rhamnose-proton symport protein (RhaT) sequences [1,2]. 19.00 19.00 19.70 19.70 18.90 17.70 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.36 0.70 -5.46 2 639 2012-10-02 19:55:49 2003-06-10 12:32:38 7 2 582 0 70 330 44 313.70 70 99.30 CHANGED MsNAIhhGIhWHLlGAASAACFYAPFKQVKpWSWETMWSlGGlVSWLILPWTlShLLLPDFWtYYGpFsluTLLPVFLFGAMWGIGNINYGLTMRYLGMSMGIGIAIGITLIlGTLMTPIlpGpFDVLltT.GGRMTLLGVFVALIGVuIVohAG.LKERtMGIpAEEFNLKKGLlLAVMCGIFSAGMSFAMsAAKPMHEAAuALGlssLYVALPSYVlIMGGGAllNLuaCFIRLAplpNLSlKADFSlA+PLlIoNILhSALuGLMWYLQFFFYAWGHA+IPtQYDYMSWMLHMSFYVLCGGlVGLlLKEWKsus++PVAVLslGClVIIlAANIVGLGMAu .....................................................MspAIhhGIhWHLIGAASAACFYAPFK+VKcWSWETMWSV.GGIVSWlILPWsISALL.LP......s...FWA...YYu.pFsl.S...TLLPV...FLFGAMWGIGNINYGLTMRYLGMSMGIGIAIGITLIVGTLMTPI......I........N.......G........N..F...D.V.L.Is..T.E.G..G..R...M.TLLGVhVALIGVGI.VTRA..GQLK.....E...R.K.M.........G.....I.....K.......A......E......E......F.....NLK......K.....GLlLAVMCGIFSAGMSFAMNAAKP.M....HE.AA.A....A.L.......G.V.........D....P.....LYVALPSYVlIMGGGAllNL...GFC..FIRLAKVK..........sL.....S..l...KADFS.L.....AK....P....LI..IpNlLLSALGGLMWYLQFFFYAWGHARIP..AQ..YDYh..S..WMLHMSFYVLCGGlVGLVLKEWpNAGRRPVsVL..SLGCVV..IIlAANIVGlGMA........................................................................ 0 23 47 59 +6207 PF06380 DUF1072 Protein of unknown function (DUF1072) Moxon SJ anon Pfam-B_14592 (release 9.0) Family This family consists of several Barley yellow dwarf virus proteins of unknown function. 25.00 25.00 35.80 35.60 19.70 19.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.17 0.72 -4.29 8 42 2009-01-15 18:05:59 2003-06-10 12:58:32 6 1 7 0 0 41 0 38.70 54 93.28 CHANGED MDDLHVIAVClLAhTVLoG....lGAVhGCChGChpss.sssps MDDLHVIAVCl..LAhTVLoG....luAVlGCChGChps..su....... 0 0 0 0 +6208 PF06381 DUF1073 Protein of unknown function (DUF1073) Moxon SJ anon Pfam-B_14928 (release 9.0) Family This family consists of several hypothetical bacterial proteins. The function of this family is unknown. 26.40 26.40 26.40 28.10 25.00 24.50 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.80 0.70 -6.00 25 555 2009-01-15 18:05:59 2003-06-10 13:00:25 6 4 357 0 63 498 42 335.50 22 77.28 CHANGED shsphphtuhYpsshls+phl-hsA-DhsRpshplputst....................cphpplcsphcclplppplp-slpasRLaGuuhllhtl..........cspphspPl..s.-plttuuhctlsslcthhlpss...hlspsshussaGcPphapl.s.st..............stcIHpSRllhh..........................ththPh..pht+.s.phaGpSllpp.lh-tlpshDsotsusupL.......lactplssl+ssshpplhsssp...splhcclshhpphcu.pulhllDs.......p-chpphssshu.GLc-llsphhptluusucIPhs+LhGpsPsGhNuoG-uDhcsYYDplpuhQ-pclpsslcpLhchlhhSthu........spslpa..cFsPLhpho-p-+A-ltpppu-uspthlss .................................................................................................................h........thY.p.shhhtphl-..u-Dhh+psh..lputt.....................cphsthp.t.thpcl...p..lpptlhpslh....t...R..haGh.uhlhlhs..........pttshppPl.......p.c..th...hthchl.h.h...ph.h....lsssh..hhp.csh...u........sshs...p.sp.a.l.........................uhcIHcSRllhh..........................th.h.h.....p.s...hh...G.shlpp..lh-tL.pshssshsu.ssph....................lhpttlp..hhpsp........p.....hpphhs..tt..........t...................psltp.pl.phlpphp.sNp.G.hhhhss..........p-ph-shp.h......s.hu.sLc-h.l....s.httpluussphPhs+Laupp.spGhsusGcuDh..pNYYDhlpu....lQE....p.....hhps..sh....p+...lhchht..hs..............th.h..cFssLh.hoppppsphthphsphhpth...h............................................................................... 0 13 33 46 +6209 PF06382 DUF1074 Protein of unknown function (DUF1074) Moxon SJ anon Pfam-B_14962 (release 9.0) Family This family consists of several proteins which appear to be specific to Drosophila melanogaster. The function of this family is unknown. 21.30 21.30 21.40 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.46 0.71 -4.31 3 76 2012-10-02 14:16:02 2003-06-10 13:06:12 6 4 15 0 41 84 4 103.50 23 66.99 CHANGED Mussplptsp.hhst.lSluup...D.sspthsphhsh.IR.Rs.phstshhphtp.++htthGRKRGRKEYCPPIYKRQKVARVTNNGYLNFMTEYKKRFYGLSPQDMVHYAAKQWTQLSMAEKEAFKSKKPSTITLKSPAQYVACEMKSDVAGGQQSSCQRQSPSARLRESERRSSRSKTLCRSA ................................................t................................................................................................................s.................+p..p..p...u.....l..o.s....s.uYlNFlRpa....++....+....a..s....s..Lps.p-llppAA+tWspLoptcKpta...cp.................................................................hh...t............................................ 0 7 7 30 +6211 PF06384 ICAT Beta-catenin-interacting protein ICAT Moxon SJ anon Pfam-B_15027 (release 9.0) Family This family consists of several eukaryotic beta-catenin-interacting (ICAT) proteins. Beta-catenin is a multifunctional protein involved in both cell adhesion and transcriptional activation. Transcription mediated by the beta-catenin/Tcf complex is involved in embryological development and is upregulated in various cancers. ICAT selectively inhibits beta-catenin/Tcf binding in vivo, without disrupting beta-catenin/cadherin interactions [1]. 25.00 25.00 28.30 26.60 24.50 20.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.51 0.72 -4.20 8 132 2009-01-15 18:05:59 2003-06-10 13:28:22 6 4 77 3 83 108 0 76.50 47 44.90 CHANGED MsRDltlGKlucDlYsQQKVEILhAL+KLGEpLoP-EEAFLsspAuAshSQFEKVop.slGuGcK..lhAhAuSplEsspK .............MsR-hhsGKhsc-hYhQQKVElLhALRKLGppLTssEEtFLps...A..Gssh..SQ.hpplsp..s...lspGsc..lhAhup.pscctp.................... 0 22 28 52 +6212 PF06385 Baculo_LEF-11 Baculovirus LEF-11 protein Moxon SJ anon Pfam-B_15073 (release 9.0) Family This family consists of several Baculovirus LEF-11 proteins. The exact function of this family is unknown although it has been shown that LEF-11 is required for viral DNA replication during the infection cycle [1]. 21.20 21.20 27.30 27.00 21.10 18.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.05 0.72 -3.89 24 60 2009-01-15 18:05:59 2003-06-10 13:31:41 7 1 58 0 0 54 1 94.50 40 79.17 CHANGED CLTRS-VYAllREsINp+K+sh-scNVsAHl.-.ssFss.ppYIRssls+hhIhpucppp...+plshHtpRlpplFsLp.poLcpEYpts.hs+h.tspp CLTRSEVYALlREsINp+K+shpscN.VsAHl.-..ssFps.ppYIRtNls+.hhllsucppp...+plshHtcRlsplFsLp..p...oLcpEYpps.ls+httt.p...... 0 0 0 0 +6213 PF06386 GvpL_GvpF Gas vesicle synthesis protein GvpL/GvpF Moxon SJ anon Pfam-B_15376 (release 9.0) Family This family consists of several bacterial and archaeal gas vesicle synthesis protein (GvpL/GvpF) sequences. The exact function of this family is unknown. 23.60 23.60 24.20 24.20 22.70 22.70 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.62 0.70 -4.79 68 377 2009-01-15 18:05:59 2003-06-10 13:37:45 6 6 145 0 171 417 19 222.00 22 91.36 CHANGED h.sp.ulYlYuIl...sssssh............t...............hsh.Glssss..Vhs....l..phssluAlVSssstsp...pstccsl...........hsHppVlcplhcp.s..slLPh+FGslh.ps.......tcs..lpphL.psttcphpptLsclpGphEhulKlhhsp.t..hhpplhtps.plpthptphttt..........s..stsh..pph...phsphlpptlppcppphspplhctLpslutpsp.pcs....................................hs-chllNsAaLVspsctspFsptlcplspphss...lslchoGPhPPYsFs..shph ......................................hYlYulh.........tt...................................th..Glssts...lhh...........l.......thssluAlVu.p.ss.tt....pstccpl................hsHppVlptlhtt..s...slLPh+FGslh..ts........pcs.lpphL.ptptpphtptLpplpG+sEhslKshhsp......tth..tts.th....tt..ttth.ttt..................s...sts.a.tph....phtphhtpthttptpphspplhptLpshu.psthpts.............................................spp.lhshuaLlsps......pt......s.t.Ftptlppltpphs...t......hplchsGPhsPYsFst............................. 0 67 130 164 +6214 PF06387 Calcyon D1 dopamine receptor-interacting protein (calcyon) Moxon SJ anon Pfam-B_15400 (release 9.0) Family This family consists of several D1 dopamine receptor-interacting (calcyon) proteins. D1/D5 dopamine receptors in the basal ganglia, hippocampus, and cerebral cortex modulate motor, reward, and cognitive behaviour. D1-like dopamine receptors likely modulate neocortical and hippocampal neuronal excitability and synaptic function via Ca(2+) as well as cAMP-dependent signaling [1]. Defective calcyon proteins have been implicated in both attention-deficit/hyperactivity disorder (ADHD) [2] and schizophrenia [3]. 25.00 25.00 38.30 38.20 22.10 21.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.03 0.71 -4.92 3 132 2009-01-15 18:05:59 2003-06-10 13:45:12 6 2 41 0 72 117 0 169.40 55 98.17 CHANGED MVKLGsNFuEKGuKpPulEDG..FDTVPLITPLDVNQLQhusPDKVVVKT+TEYQP-p+sKGKtRsPpIAEFTVs.p.t.scRh.solLVuFALAFLuCVVFLVVYKAapYD+.oCPDGFVLKHKpCIPtoLEuYYoEQDPSuRc+FYTVIuHYsLAKQSsTRuluPWhSuhuttK................................sh+EsEsPcKuu ............MVKLGsNhu-KssKtPss.....EDG..FpTlPLITPL-VspLQhPsP-..KVl.V.K.T+TEYps-pK.KGKhRsPKIAEFTls...t.sc.+h.solLlhhALAFLsClVFLVVYKsapYD+.uCP-GFVhK......pppCIPtuL-uYYopQ....D.ssuR....p+....FYTVIs..HYslAKQohoRuluPWhos.hut.p................................s.pEscsspp..u................................... 0 6 9 25 +6215 PF06388 DUF1075 Protein of unknown function (DUF1075) Moxon SJ anon Pfam-B_14186 (release 9.0) Family This family consists of several eukaryotic proteins of unknown function. 25.00 25.00 25.20 27.90 20.40 24.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.86 0.71 -4.54 12 137 2009-01-15 18:05:59 2003-06-11 11:03:00 6 1 80 0 83 122 0 130.30 34 90.73 CHANGED thGphhR.h......Ruhuos.phpc..............tt.stpsPtus...pph.ppsst.++Psph-K+hLlWoGRaKo.-EIPphVS.EMl-suRNKhRVKlsYlMIuLTlluChh.MllSGK+AscR+ESlsphNL-h+s+apEt........uhpupuc ...................................................................t..................................................tp......tsts....spt.h.......pclsh...++Posa-KKlLlWoGRFKpt--IPph..lS...EMl-sA+NKhR...VKlsYlMIuLTllGChh.hlhpGK+AspRp..E...olsphNlc++tch+..Ect..............t....................................... 0 17 23 49 +6216 PF06389 Filo_VP24 Filovirus_VP24; Filovirus membrane-associated protein VP24 Moxon SJ anon Pfam-B_15734 (release 9.0) Family This family consists of several membrane-associated protein VP24 sequences from a variety of Ebola and Marburg viruses. The VP24 protein of Ebola virus is believed to be a secondary matrix protein and minor component of virions. VP24 possesses structural features commonly associated with viral matrix proteins and that VP24 may have a role in virus assembly and budding [1]. 25.00 25.00 333.70 333.60 20.20 19.30 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.75 0.70 -5.16 3 32 2009-01-15 18:05:59 2003-06-11 11:17:39 6 1 20 4 0 28 0 252.00 62 100.00 CHANGED MAKATGRYNLVsPK+-hEKGVl..FSDLCNFLlTPTVQGWKVYWAGlEFDVsQKGMTLLsRLKoNDFAPAWAMTRNLFPHLFKNPpSsIQoPIWALRVILAAGLpDQLLDHSLIEPLoGALsLISDWLLTToToHFNlRTppVKDQLShRMLSLIRSNIlNFINKL-TLHVVNY+GLLSSIEIGTooasIIITRTNMGFLVEVQEPDKSAM..-o++PGPVKFSLLHESsLKPaopscpSuhpSLIMEFNSpLAI MAchosRYNL..PppshEKuls..Lsshspalhp.oltGWpVhWush.FclsppGMsLLH+LKoN.hsPtWp.TRNLFsHLFpNPpSTI.pPhhALRllLusuLpDQ.LpQSLI.shpuhlphlS-WLLhpsTothplpsphlt..Lo.cMhpllhuslhpFhNKL.sLHVVN.pGh.SSIEIthosppIIITRsNMGFLVEVpc.Dhpsh..pohhstsVhFuLltEusL+taoQhppup..sLh..hNSplAI 1 0 0 0 +6217 PF06390 NESP55 Neuroendocrine-specific golgi protein P55 (NESP55) Moxon SJ anon Pfam-B_16185 (release 9.0) Family This family consists of several mammalian neuroendocrine-specific golgi protein P55 (NESP55) sequences. NESP55 is a novel member of the chromogranin family and is a soluble, acidic, heat-stable secretory protein that is expressed exclusively in endocrine and nervous tissues, although less widely than chromogranins [1]. 22.60 22.60 22.80 22.70 22.30 22.50 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.00 2 34 2009-01-15 18:05:59 2003-06-11 11:26:26 7 6 21 0 16 42 3 183.70 46 67.10 CHANGED MDRRSRsQ.hRRARHNYNDLCPPIGRRAATALLWLSCSIALLRALAoSssRAQQR.AAQRRoFLNAHHRS.....AAQVhPEs..sESDHEcp-hEPpL..PEC.EYpp--a-hE..SETEsES-IESETEh....ETEs-TAPTTEPETEPEDE.G...P+tsTFpQSLTpRLpAL+LpSsDASPpRA.PoTQEsESsppGEEPp....DpDPRDPEEp.E.+cEEppQ.+RCKs++Ps.RRD.SPESPs++GsIPIRRH ....................................................................................................................................................s.sp...spSDp-pp..phc.pL..sEs..-ap.p-h-hE..oETEsES-lEoET-..h....ETEs-Tt.PtTEPETE..PED-pG....P...+.tsTF......sQ..SLTpRLpAL+lp.SsDsp.pps.PosQpspssppGEE.p..P....pD.+sPEEp..-.+.p.....pQ.+RCKs+.+.s..RRD.SPESPs++GPIPIRRH........................................................................... 1 1 4 7 +6219 PF06391 MAT1 CDK-activating kinase assembly factor MAT1 Finn RD anon Pfam-B_16773 (release 9.0) Family MAT1 is an assembly/targeting factor for cyclin-dependent kinase-activating kinase (CAK), which interacts with the transcription factor TFIIH [1]. The domain found to the N-terminal side of this domain is a C3HC4 RING finger [1]. 25.00 25.00 27.00 27.00 19.90 19.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.40 0.71 -4.73 6 318 2009-01-15 18:05:59 2003-07-03 16:31:22 8 7 278 1 239 314 1 168.20 31 55.13 CHANGED LRKspFRsQlFEDspl-KEV-IRK+lhKIaNKpp-DFs.uL+EYNDYLEElEpIlaNLspslDl-pTccKlctYcKpNK-sIt+N+t+hssEQc.LEptLchE+cpcpc+R.thpp-Epppcht.KcpsKppllD-LpoSshssshlls.p+Ksps.hphEtthcK.cph+psshssuh+pu...phshsslp+lcEthasapPlp ...............LR+spF+hQhF-D.tV-KEV-IR+RVhpl.aNKp--DF....s.oLc.-.YNDYLE..c.......l..E-I..laN.Lss...s..l..D...l..t...p..T...ctclp..pYpp-Np.p.I.t.pNp.t.+.h..........pp.-ppt........hp.ph.ph.Epp.t...p..p+..+...tp...tptpph..t.tt+pthlspLt..........t.u.....p..........s...t.hlt...t.ttt.....t.p.................................................................................h............................................................ 0 76 130 197 +6220 PF06392 Asr Acid shock protein repeat Finn RD anon Pfam-B_20230 (release 9.0) Repeat The Asr protein is synthesised as a precursor and the cleavage is essential for moderate to high acid tolerance [1]. 21.00 21.00 21.10 21.00 20.10 20.90 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.33 0.72 -6.55 0.72 -3.57 8 470 2009-01-15 18:05:59 2003-07-03 16:34:35 6 3 450 0 18 119 1 19.30 85 18.79 CHANGED HHKKQH....KsAsEQKAQAAKK ...HH.KKQH......KAAPAQKAQAAKK. 0 1 3 8 +6221 PF06393 BID BH3 interacting domain (BID) Finn RD anon Pfam-B_16321 (release 9.0) Domain BID is a member of the BCL-2 superfamily of proteins are key regulators of programmed cell death, hence this family is related to Pfam:PF00452 . BID is a pro-apoptotic member of the Bcl-2 superfamily and as such posses the ability to target intracellular membranes and contains the BH3 death domain. The activity of BID is regulated by a Caspase 8-mediated cleavage event, exposing the BH3 domain and significantly changing the surface charge and hydrophobicity, which causes a change of cellular localisation [1]. 25.00 25.00 27.20 48.50 24.30 19.80 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.28 0.71 -4.95 7 65 2009-01-15 18:05:59 2003-07-03 16:42:05 6 2 36 5 27 64 0 163.70 51 94.68 CHANGED M-.cV.sNGs....-phpphLlhuFLp...psscspFpcELpsLupE..LPs...th.tp..sELQTDGNRsu+..hpsthEs.DuEspE-llRpIAtpLAphGDcL-+p..I+PtlVsuLssphhNpoLsEE-hppsLAsslppLhpohPsDhEpEKshLllsMLLsKKlAspsPSLL+cVF+TTVsFIsQNhhsYlcpLsRp..- ......................ts-phTsLLlauFLp...ssssspFpcELcsLGpE.......Lss......hpt......-ELQTDGNRsS+.....hhtchEs.DSEsQ.......E-..llppIAtpLAplGDph-+u..I.PtLVssLAhQhhNsshSEEDRpppLAsAlEplh..Qsh..Pt..DhEpEKshLlLuhLLAKKVAsHoPSLLRcVF+TTVNFINQNLhsYlRsLsRN................... 0 1 2 7 +6222 PF06394 Pepsin-I3 Pepsin inhibitor-3-like repeated domain Finn RD anon Pfam-B_13438 (release 9.0) Domain Pepsin inhibitor-3 consisting of two domains, each comprising an antiparallel beta-sheet flanked by an alpha-helix. In the enzyme-inhibitor complex, the N-terminal beta-strand of PI-3 pairs with one strand of the active site flap region of pepsin [1]. The two domains are tandem repeats of sequence, and has therefore been termed repeated domain. 20.60 20.60 21.00 21.20 20.40 20.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.49 0.72 -4.32 13 63 2009-09-11 00:18:44 2003-07-03 16:51:00 8 2 18 2 35 64 0 70.30 29 53.54 CHANGED P+ss+p.oFsThssT.sthhhsGChVpsN+lYlsstalRDLTssEtpELpsFcpchssYpstlppplppphpsLhus ....................p..sFss.hsso.s.hhhsGChVpsN+lYssuhhlR-LTspEhpELppappchstYpp.lppthppphpsh............................ 0 14 18 35 +6223 PF06395 CDC24 CDC24 Calponin Wood V, Studholme D anon Pfam-B_32837 (release 8.0) Domain Is a calponin homology domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.67 0.72 -3.92 17 130 2012-10-03 10:10:54 2003-07-03 16:58:00 6 10 124 0 99 201 1 89.40 42 9.83 CHANGED DPVTpLWphFppGsPLChlFNslpP..cp.LsV.......ssss-h+hCKtulYcFlhusKpcLsass--hFsISDlaussTsshlKVlpVlsplL .........DPVspLWphhRpGhPLhhlaNulp.P.....p.p...L.s.l........................ssss-t.+..tsKtu.lacFl.AChp....c...L.s....a.ssp......-hFhIoDL.a...u.s..s.ToGFVKVlpsVsplL................. 0 26 55 87 +6224 PF06396 AGTRAP Angiotensin II, type I receptor-associated protein (AGTRAP) Moxon SJ anon Pfam-B_15509 (release 9.0) Family This family consists of several angiotensin II, type I receptor-associated protein (AGTRAP) sequences. AGTRAP is known to interact specifically with the carboxyl-terminal cytoplasmic region of the angiotensin II type 1 (AT(1)) receptor to regulate different aspects of AT(1) receptor physiology [1,2]. The function of this family is unclear. 25.00 25.00 26.50 26.00 22.90 22.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.10 0.71 -4.55 6 84 2009-01-15 18:05:59 2003-07-03 17:24:07 6 4 70 0 53 93 0 144.00 36 83.59 CHANGED MELPAVNLKlIlhVHWLLToauCls..hsGuYuasNFsILAhGVWAlAQRDSIDAIpMFLsGLluTIFhDllhIuIaaspss.......ltDhhRFSuGMAIlsLlLKPlSChhlYHMaRERGGph.htpGFlGsSp-R.SSYQsIDu.-sPADPhss.ps.u.ss.RGY .......................t.PhlplKsIhhlHalLsoWuhl...s....hsuuYtasNFslLshulWAltp+..DS.l-Alph.hLs.hhshoIhhDIlplulaa.sp.hs............h..tsh.+FusuhuIlsLlL+.PlSshhlY+hapcR....GGph.......h....s.h..t...ssppp.puYpsIDp........P.tt................................................................... 0 17 20 34 +6225 PF06397 Desulfoferrod_N Desulfoferrodoxin, N-terminal domain Finn RD anon Pfam-B_11142 (release 9.0) Domain Most members of this family are small (approximately 36 amino acids) proteins that from homodimeric complexes. Each subunit contains a high-spin iron atom tetrahedrally bound to four cysteinyl sulphur atoms This family has a similar fold to the rubredoxin metal binding domain [1]. It is also found as the N-terminal domain of desulfoferrodoxin, see (Pfam:PF01880). 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.95 0.72 -4.56 8 428 2012-10-03 19:45:42 2003-07-04 09:46:30 7 6 397 31 132 306 13 35.50 48 24.00 CHANGED scphpVYKCplCGNIVEVLcsGGGpLVCCGcPMcLh .......h...hchYKCppCGNl.V.E.VhpsG.GG.p..LsCCGc.Mch..... 0 65 116 129 +6226 PF06398 Pex24p Integral peroxisomal membrane peroxin Wood V, Studholme DJ anon Pfam-B_56111 (release 8.0) Family Peroxisomes play diverse roles in the cell, compartmentalising many activities related to lipid metabolism and functioning in the decomposition of toxic hydrogen peroxide. Sequence similarity was identified between two hypothetical proteins and the peroxin integral membrane protein Pex24p [1]. 24.50 24.50 24.60 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.55 0.70 -5.66 54 690 2012-10-01 21:10:52 2003-07-04 09:52:51 6 64 241 0 464 811 3 261.70 22 35.45 CHANGED s.hosslLsssssplsppLuhhasal...scllpllTWossshhhohLllhsahslh.Yh..............thl.h.hshllhslhh.uhlaha....ts.h..............................................................................tp.pppPoLc......................................................-llhsLp...............slps+sshllpshstls.hh.........hpspsloshlFhhlhloslhlhls........hhllsh..+hlhlhsG.shlloaHsphpcshtphh...............hlsuhchp.............................................................hp....phh.shhpp..chh......h.ps.ps.p......................hph.laE.QR+h.luh...uWos.hhos-chshs.......sc...thsp.Ps..........................h-chp.Pp.....................sWcWs-..........pcWclDhsspthlp............sppc................GWlY..........................................ss...appsph.pDuhscasRRRRWhR ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hph.tlaENQR....h.....h...s...........sW.s..s........t....hh...s...-.ch.....sao..............st...ttpp.................................................................hcsh.p..h.Pp......................tWcW.s..............................cWplDh...s..t.hh..............................................................................ptpt......................................GWhYs...............................................................sh..........t..p.p....t...tcph.p.....ph.sRRR+WhR.................................................................................................... 0 129 242 373 +6227 PF06399 GFRP GTP cyclohydrolase I feedback regulatory protein (GFRP) Finn RD anon Pfam-B_63435 (release 9.0) Domain Tetrahydrobiopterin, the cofactor required for hydroxylation of aromatic amino acids regulates its own synthesis in via feedback inhibition of GTP cyclohydrolase I. This mechanism is mediated by the regulatory subunit called GTP cyclohydrolase I feedback regulatory protein (GFRP) [1]. 25.00 25.00 28.20 32.70 23.90 23.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.80 0.72 -4.39 4 69 2009-01-15 18:05:59 2003-07-04 10:35:58 8 1 53 35 39 61 0 80.00 67 93.65 CHANGED PYlLISTQIRhEsGPThVGDEaSDPpLMshLsAcKhpsLGNNFpEYaVc-PPRlVLsKL-+lGYRVlSMTGVGQTLVWCLHKE ......PYlLISTQIRhEsGPTMVGDEpSDPcLMpaLGApKpssLGNNF.EYaVsDPPRlVLDKLE+pGFRVlSMTGVGQ..TLVWCLHKE......... 0 10 13 22 +6228 PF06400 Alpha-2-MRAP_N Alpha-2-macroglobulin RAP, N-terminal domain Finn RD anon Pfam-B_44514 (release 9.0) Domain The alpha-2-macroglobulin receptor-associated protein (RAP) is a intracellular glycoprotein that binds to the 2-macroglobulin receptor and other members of the low density lipoprotein receptor family. The protein inhibits binding of all currently known ligands of these receptors [1]. The N-terminal domain is predominately alpha helical [1]. Two different studies have provided conflicted domain boundaries [2,3]. 27.30 27.30 27.40 44.40 26.40 27.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.51 0.71 -4.44 10 108 2009-01-15 18:05:59 2003-07-04 12:32:49 6 3 73 7 51 104 0 113.90 50 33.65 CHANGED lsLl.hlllhhsshssuspuuKYSREsN.........EshssscRcsscEFRMsKLNQlWEKAQRL+LoslKLs-LHoDLKIQEKDELsWKK.LKs-GhDcDGEKEA+LRRsLslIho+YGL....DG+KDscplc .....................h....h.hhhh..sstttusKYS+EtN..........p...s.ssh+.p.....tp.FRMtKLNQlWEKApR.h....+..L.osV+Lu.-LHuDLKIQE+DElsWKK.LKs-GLDc....DGEKEA+LpRsLs...VILu+YGL....DG++Dsp.l.p.............. 0 12 15 32 +6229 PF06401 Alpha-2-MRAP_C Alpha-2-macroglobulin RAP, C-terminal domain Finn RD anon Pfam-B_44514 (release 9.0) Domain The alpha-2-macroglobulin receptor-associated protein (RAP) is a intracellular glycoprotein that binds to the 2-macroglobulin receptor and other members of the low density lipoprotein receptor family. The protein inhibits binding of all currently known ligands of these receptors [1]. Two different studies have provided conflicted domain boundaries [2,3]. 21.80 21.80 22.00 21.90 21.20 21.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.78 0.70 -4.48 11 128 2009-09-10 22:25:31 2003-07-04 12:54:44 6 3 87 4 66 123 1 200.30 44 62.32 CHANGED -hhcDP+L-KLWpKA+TSGKFScEEL-sLhREFpHHK-KIcEYpsLL-olu+sE..................................EhacNlIsPp-.sshKppsLps+Hs-LK-+hRsIspGaDRLR+loHpGasotpEFpEPRV.-LW-hApsuNFTccEL-Sh+EEL+HFEsKlEKHpHYQcQLElSHpKLKHhtthGDp-Hls+spE+ashLE-+hKEhGYKVKKHhQDLouRIS..th+HNEL ................................hhcD.+LpKLW.pKAcsSGKFSsEELcpLhcEF.HHc-KlcEYpsLL-slu+s-...............................-hpc.NsIs.sp........-....sthKt.pslps+...cs..-L.K-+hRsIppGhDRL.+.+.lopp.Ga.s.stpEFpEPRVh-LWclA.p.su.N.FTpcEL-.Sh.+.....EEL+HFEuKl-KHpHapcpLclu+pKh+....tthGDtE+lscppE+ashLE-+hK..cl.uhKV+KhhpDLps+I...hh+HsEL.................................. 0 16 21 44 +6231 PF06403 Lamprin Lamprin Moxon SJ anon Pfam-B_15493 (release 9.0) Family This family consists of several lamprin proteins from the Sea lamprey Petromyzon marinus. Lamprin, an insoluble non-collagen, non-elastin protein, is the major connective tissue component of the fibrillar extracellular matrix of lamprey annular cartilage. Although not generally homologous to any other protein, soluble lamprins contain a tandemly repeated peptide sequence (GGLGY) which is present in both silkmoth chorion proteins and spider dragline silk. Strong homologies to this repeat sequence are also present in several mammalian and avian elastins. It is thought that these proteins share a structural motif which promotes self-aggregation and fibril formation in proteins through interdigitation of hydrophobic side chains in beta-sheet/beta-turn structures, a motif that has been preserved in recognisable form over several hundred million years of evolution [1]. 25.00 25.00 27.60 27.40 23.40 22.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.23 0.71 -4.43 2 8 2009-01-15 18:05:59 2003-07-07 09:25:55 6 1 2 0 1 7 0 122.90 78 98.60 CHANGED MAAshQALLVlALLHLATATPVlsKppVSThSTGaLGHPVGGLGYGGLGYGGLGhuGLGVAGLGYGGLGYPGAALGGsYTHHAA...................LGGLGYPLGIGAGVVAPHVVpuKlAAPLAPVVAAI ...........................MAAshQALLVlALLHLATATPVlsKppVSThSTGaLGHPV.GGLGYGGLGYG..GLGhuGLGVAGL.GYGGLGYPGAALGGsYTHHAA...................LGGLGYPLGIGAGVVAPHVVpuKlAAPLAPVVAAI......... 0 1 1 1 +6232 PF06404 PSK Phytosulfokine precursor protein (PSK) Moxon SJ anon Pfam-B_16071 (release 9.0) Family This family consists of several plant specific phytosulfokine precursor proteins. Phytosulfokines, are active as either a pentapeptide or a C-terminally truncated tetrapeptide. These compounds were first isolated because of their ability to stimulate cell division in somatic embryo cultures of Asparagus officinalis [1]. 25.00 25.00 38.10 34.70 21.10 19.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.46 0.72 -3.21 39 143 2009-01-15 18:05:59 2003-07-07 09:27:33 7 2 32 0 69 132 0 80.10 35 86.39 CHANGED lLLL.h.s.sh...stAARs....tPs.tspp.tt..........stsppscttt...................................sCc...t.ts-EE.C.LhRRsLs.AHhDYIYTQc+ps ...........................................h.LlL.h...ph....stAAR........Ps.tttp..t....................ttttsptt...................................thh..ttE.............................sC-...t.ts-.EE...C.L.hRRsLs.AHlDYIYTQc+p.... 0 8 37 52 +6233 PF06405 RCC_reductase Red chlorophyll catabolite reductase (RCC reductase) Moxon SJ anon Pfam-B_15577 (release 9.0) Family This family consists of several red chlorophyll catabolite reductase (RCC reductase) proteins. Red chlorophyll catabolite (RCC) reductase (RCCR) and pheophorbide (Pheide) a oxygenase (PaO) catalyse the key reaction of chlorophyll catabolism, porphyrin macrocycle cleavage of Pheide a to a primary fluorescent catabolite (pFCC) [1]. 24.60 24.60 25.30 25.30 24.10 24.50 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.55 0.70 -5.44 8 86 2012-09-25 10:41:40 2003-07-07 09:28:14 6 2 40 10 34 91 13 202.40 42 84.35 CHANGED AHRElMlsLsuplEsRLGup..LLPsslPsDVp.FcN..tuGsApGSLclRSGspuSs.....................................IDFhLtSWlHCclP..tGGAlNITSLslaLNuSTDAPHFlhEhIQuSPTSLlllLDLlPRKDLsLHPDYLccYYEsTtLD.+pRppltcL.PcspPYhSsSLalRulhSPTAlhsoI-ssp..utttplEEIlpsclussAK-VLplWL-pCs..ssscE...ls-sERpthtKRDpllRpKoIElDLsuslP.RhFGt-VAsRVltsIRcAF .................................t.h..h.t...tpht....hLPsslPsDVp.aps...s..Gs...AtuSLclRsGttuS......................................IDFhltSWlHsclP..tuuulsITol.saLNuSTcAPpFlhEhIQuossSLlllLDL.sRKDLsLpP-YLccYYpsTtLD...ppRppl.cl..PpspPYhSsSLalRSshSPTAlhhplts.tt.......tth-pllp..ltshu.thh.thWlpths..tttt......h.t.tpt..h.tRD..hppt.hEhs.s.phs.phFs.thsspllt.h.t......................................... 0 4 18 25 +6234 PF06406 StbA StbA protein Moxon SJ anon Pfam-B_12747 (release 9.0) Family This family consists of several bacterial StbA plasmid stability proteins [1]. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.76 0.70 -5.49 7 732 2012-10-02 23:34:14 2003-07-07 09:29:06 6 2 457 34 70 604 104 269.10 33 89.60 CHANGED M+lhlDDGSTNIKLAWh.E.cGcl+shlSsNSFK.-Wsssh...sst...pshNY.lDG.c+YoaD.lSssulhTTcspYQYSDVNllAlHHALhpSGLtPQsV-lsVTLPloEaaDpssQsNhtNIpRKKsNlhR.lp..lps.u-sFsI+pVsVhPESlPAuFpsLtt..lsphES.LIlDLGGTTLDluplhGphsGISclasssplGVSllTcuVhpsLsh.ssocsSpahADclIppRpDpsaLpphIpstschstlhpslpct.cpLtp+VhpslupFsshs+.VhlVGGGApLltsAlKpthth.st+hhhsssPQFsLV.uhhth .......................................................................................................................................l.hDDGopshKl.h..p..ttt.hh...lo.N....S.Fp.t.at......h....t.....tshNY...l.s..s.....pasa..c.h...o.....p.......l.Ts..ph..t..a...QYsp..hNhl..AlpHALh.p..o.Gl.....t.s.p....Vs..lsVT.LPlo...-a.hs...p.ps.Q.s.tp.I..pR...KptNlh...+..lp.........hpt.....up.....s.F...s...I...c...sV...pVh..PE...Sl.PA..s..ap.h.Ltp...........hs.....t....h....-....p.....h...L.....I.......lDlGGTTLD..l..u..h..l....t..u..p..hs..s.....l.o...p..s...h..s..ss...p...lG........V.S.hhsculhps.h........tt....s....s.............h...c...s..S.....p...hh...A.spl.............Ip..p.R......p-....t...........s...a..l..p...p.h.....Ip...s...t.......s.p...hs...........tlh.s.s...I....p....pt.....cpLtp...c.Vhp....s....l......s.....p....F.....p...s.......h.s+....V.......hllGGGA..tl..l.....ts.ul.+pthth....ttp.hhh.tssQhsLs.uhh............................................................................................................................................................ 3 21 40 57 +6235 PF06407 BDV_P40 Borna disease virus P40 protein Moxon SJ anon Pfam-B_15995 (release 9.0) Family This family consists of several Borna disease virus P40 proteins. Borna disease (BD) is a persistent viral infection of the central nervous system caused by the single-negative-strand, nonsegmented RNA Borna disease virus (BDV). P40 is known to be a nucleoprotein [1]. 25.00 25.00 53.20 53.20 18.70 17.70 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.29 0.70 -5.60 3 171 2009-09-10 21:38:57 2003-07-07 09:32:44 6 1 17 2 2 111 0 184.30 67 98.87 CHANGED MPPKRRLVDDADAMEDQDLYEPPASLPKLPGKFLQYTVGGSDPHPGIGHEKDIRQNAVALLDQSRRDMFHTVTPSLVFLCLLIPGLHAAFVHGGVPRESYLSTPVTRGEQTVVKTAKFYGEKTTQRDLTELEISSIFSHCCSLLIGVVIGSSSKIKAGAEQIKKRFKTMMAALNRPSHGETATLLQMFNPHEAIDWINGQPWVGSFVLSLLTTDFESPGKEFMDQIKLVASYAQMTTYTTIKEYLAECMDATLTIPVVAYEIRDFLEVSAKLKEEHADLFPFLGAIRHPDAIKLAPRSFPNLASAAFYWSKKENPTMAGYRASTIQPGASVKETQLARYRRREISRGEDGAELSGEISAIMKMIGVTGLN ..........................................................................................................................................................................................................................PHEAIDWINuQPWVGSFVLuLLTTDFESPGKEFMDQIKLVAuaAQMTTYTTIKEYLsECMDATLTIPsVAhEI+-FL-sosKLKtEHuDhF.aLGAIRHsDAIKLAPRsFPNLASAAFYWSKK............................................................................................. 0 2 2 2 +6237 PF06409 NPIP Nuclear pore complex interacting protein (NPIP) Moxon SJ anon Pfam-B_16418 (release 9.0) Family This family consists of a series of primate specific nuclear pore complex interacting protein (NPIP) sequences. The function of this family is unknown but is well conserved from African apes to humans [1]. 27.00 27.00 27.30 27.40 26.20 26.20 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.83 0.70 -5.65 5 342 2009-09-13 05:25:39 2003-07-07 09:37:51 6 5 26 0 83 271 0 107.00 34 36.57 CHANGED VINTLuDHcHsGscFpGs......PWlhIIIsFLRpYKhsIhLCTohLsVSFLKTIFhScNGHDGSTDVQQRAWRSNRpRQcG.................NKIGLKDVITLWRHVETKVRAKI+KhKVTTKIN+HDKINGKRKTAKcH..LRKLSMKECEHAEKERQVSEAEENGKLDMKEIHTYKKMFQRAQELRRRAEDYHKCKIPPSARKPLCNWVRMAAA..EHRHSSGLPYWPYLTAETLKNRMGHQPPPPTQQHSIsDNSLSLKTPP..ECLLpPL....PPSsDDNLKssP .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 69 69 71 +6239 PF06411 HdeA HdeA/HdeB family Finn RD, Bateman A anon Pfam-B_63431 (release 9.0) Domain HdeA (hns-dependent expression protein A) is a single domain alpha-helical protein localised in the periplasmic space. HdeA is involved in acid resistance essential for infectivity of enteric bacterial pathogens. Functional studies demonstrate that HdeA is activated by a dimer-to-monomer transition at acidic pH, leading to suppression of aggregation by acid-denatured proteins. The gene encoding HdeA was initially identified as part of an operon regulated by the nucleoid protein H-NS [1,2]. This family also contains HdeB [3]. 21.90 21.90 21.90 22.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.26 0.72 -4.02 33 942 2009-09-11 09:58:26 2003-07-07 12:09:41 6 2 576 13 43 189 4 94.30 39 85.95 CHANGED htlshhhhshsssshs.........ssu.ss.spss..........ssppMTCcEFlsLsspthssVshWhhshsp.phKssDhV..Dhppl-sl.sPpllchCKcsPppplh-h ...............................................hht...uhIhhGhlhhssl...........sNAtsA.tAs-..........sspshTCp-Fl....s....LssphhssVshWshs.ss.phKstD.sV..Dlpth-ss.sPtll-hC+psPQsslhc................... 0 8 19 28 +6240 PF06412 TraD Conjugal transfer protein TraD Moxon SJ anon Pfam-B_11863 (release 9.0) Family This family contains bacterial TraD conjugal transfer proteins [1]. Mutations in the TraD gene result in loss of transfer [2]. 21.90 21.90 22.30 22.00 21.60 21.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.93 0.72 -4.38 53 460 2009-01-15 18:05:59 2003-07-07 13:19:58 6 4 282 0 117 388 14 64.10 29 63.06 CHANGED hpsttR+ccTRchIpLGGLVlKAG.......Lst.-+uhlhGALl.hucphcsstt......pphapttGpphFptc .................p..pR+tcTRchItLGuLVsKAs.......lcsh-+phlhGhLlshuchhptspt.......ttapttGpphhpt................ 0 18 57 85 +6241 PF06413 Neugrin Neugrin Moxon SJ anon Pfam-B_11274 (release 9.0) Family This family consists of several mouse and human neugrin proteins. Neugrin and m-neugrin are mainly expressed in neurons in the nervous system, and are thought to play an important role in the process of neuronal differentiation [1]. 21.80 21.80 22.60 21.90 21.70 21.40 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.45 0.70 -4.53 6 219 2012-10-04 14:01:12 2003-07-07 13:23:26 6 6 192 0 152 221 0 144.00 29 49.44 CHANGED MEssGAP.RpLTW-AhEQIRYL+cEFPEpWoVPRLAEGFsVSTDVIRRVLKSKFlPolEpKLKQDtKV...........hK+hu.s..h..L.usususKhLsuGp...........SlSsuLLhPGcEsuS.tspsHShALKshcpp.ppsssstppps+sKplQsLc-h.shVssssshGc.tp....ssoDspttccthsGsL.SDpcLEELptsEhGc...ss+VVQRGREFFDSNGNFLYRI ...............tth.....sP.+.+Lo.-sh-tIRhL+pp.P-paosspLA-pFplSPEsIR.RILKS.KWp.P.o.......t...-....p.c......c..ptt................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 40 74 118 +6242 PF06414 Zeta_toxin Zeta toxin Moxon SJ anon Pfam-B_12374 (release 9.0) Family This family consists of several bacterial zeta toxin proteins. Zeta toxin is thought to be part of a postregulational killing system in bacteria. It relies on antitoxin/toxin systems that secure stable inheritance of low and medium copy number plasmids during cell division and kill cells that have lost the plasmid [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.02 0.71 -5.22 40 1247 2012-10-05 12:31:09 2003-07-07 13:29:24 7 17 949 8 252 6029 2517 168.70 21 56.57 CHANGED hsphhss........pt...sp.cpPhullluGQPGAGKopltcthhpph......psshlpl-sD-hRpha................PpYpth.p...t..s.psuphsptt....ushhs-plhspAhcp+hslll-uThpss-hspc.hhcpL+.....c.tGYplplhhlsss.chShttstpRappp...t...............G.Rhlstctactsa.....psh.cslpplcppt..hhsthh.lh...........spss.splYcsphs ..........................................p................ppPh.h.hl.l.uGtsGAGK.oolh....p..h.h.hp.p.h.............tsshl....h...I.........s.....u.....D..p.....h......+.p.....h..............................................................s.p..a.....t......................p.....s..p....h...s..p........................u..s.....p....h..s....p...p....h...l....p...p......h.....h...p...p.....t...h......s....h.l....h..E....s....T.....h....p......s..h....s.......h.........p......p..hhphh+.................................p...tG...Y.p...l.p...l..h.h...l.s..s........p......t...lu.h....p....s.h.Rhtp...............................................................u.+hs.sp..p.thp...hh.....................tt.h..psh.t.th............p.h...lh...........p............t................................................................................................................................................ 0 67 159 208 +6243 PF06415 iPGM_N BPG-independent PGAM N-terminus (iPGM_N) Moxon SJ anon Pfam-B_1338 (release 10.0) Domain This family represents the N-terminal region of the 2,3-bisphosphoglycerate-independent phosphoglycerate mutase (or phosphoglyceromutase or BPG-independent PGAM) protein (EC:5.4.2.1). The family is found in conjunction with Pfam:PF01676 (located in the C-terminal region of the protein). 21.20 21.20 21.20 21.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.45 0.70 -5.01 35 2910 2012-10-03 05:58:16 2003-07-07 13:45:27 8 9 2783 9 666 2087 892 216.40 43 44.36 CHANGED IspuIc-GcFapN.slhpshscscpsssslHlhGLlSsGGVHSHhcHLhALlclAtc+Glc+ValHsFhDGRDVsPpSutsalcclpshhpc.huhG...cIATluGRYY.AMDR.DpRW-RlcpAYcshs...Gcup.phpsulpslpsu...YspshoDEFlhPolIss...t.suslcDsDuVIFaNFRsDRARQlocshsppc.FcsFpRpph...pl.paVshTpY-sslss.slAF ......................IspuIc-GpFh.p.N.tsLhsAlcps+p.....s.s.p..s.lHlhGLlSsGGV.......HSH.cHlhAh.l-lAtcp.G..s..c..+.lYlHAFLDGRDssP+SAtshlcchppthsc......lGhG......clAolsGRYY.AMDR.DpR....W.-RVc..cAYchls..............u.........c.G..p......................p.s.s.o............Alpulpsu...............Y....s......c.......s................s.......DEFVpPsllts....................susl.....pD.GDulIFhNFRsDRA..Rplocshss.........t...........-...F............c.......s............Fp...............Rpph..................sl...pa.l.shTpYs.sslps..hha............................... 0 260 470 585 +6244 PF06416 DUF1076 Protein of unknown function (DUF1076) Moxon SJ anon Pfam-B_2653 (release 9.0) Family This family consists of several hypothetical bacterial proteins exclusive to Escherichia coli and Salmonella typhi. The function of this family is unknown. 25.30 25.30 25.50 39.00 22.20 25.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.57 0.71 -4.38 12 614 2009-01-15 18:05:59 2003-07-07 14:06:50 7 1 106 2 3 296 0 106.30 47 63.59 CHANGED M.pspupspppc...............s....lpsKIspssFsVsspchpCspptlpCPITLshPEcGVFV+NutsSplCoLaDpsAhocLlpcsuhHPLSREPlossMIlu+-cChFDts+tsFsIh ..........................p...pp...............tt.p.L.sKIppCsFsVs.pchpCspphlpCPITLs.PEcGVFl+NSpsSplCoLYDpsAhscLlccshsHPLSREPIosSMIVp+-pChFDsp+tsFllh........ 0 0 0 3 +6245 PF06417 DUF1077 Protein of unknown function (DUF1077) Moxon SJ anon Pfam-B_6645 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 25.00 25.00 43.10 38.10 22.10 21.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.61 0.71 -4.80 30 340 2009-01-15 18:05:59 2003-07-07 15:38:58 7 7 291 0 233 300 6 118.90 39 66.13 CHANGED sppspppppppLthKKAW-lAluPhKslPMNlFMhYMoGNSlpIFPI.MhlhMhlhsPlKulho.ssssFcslcss....................plhhs+llYllhplhshul.........ulaKhpsMGLLPsssSDW.....Luappspph ..............ts...ptpppptLhhKKuW.-lAluPhK.plPMNhFMMYM.oG.NolpIFsI.MM.VhMhhhpPlpulhu.ssssFchhcss............s.............................phhh.+llYlhhplhslul.........ulaKhpuMGLLPTptSDWLua.t...t............................ 0 85 134 195 +6246 PF06418 CTP_synth_N CTP synthase N-terminus Moxon SJ anon Pfam-B_226 (release 10.0) Family This family consists of the N-terminal region of the CTP synthase protein (EC:6.3.4.2). This family is found in conjunction with Pfam:PF00117 located in the C-terminal region of the protein. CTP synthase catalyses the synthesis of CTP from UTP by amination of the pyrimidine ring at the 4-position [1]. 22.30 22.30 24.40 23.50 21.80 21.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.89 0.70 -5.31 98 5282 2012-10-05 12:31:09 2003-07-07 15:48:15 9 16 4818 13 1383 3730 3009 264.10 58 50.53 CHANGED sKaIFVTGGVlSSLGKGlsuAS.lGtLLcsRGhcVohhKlDPYlNVDPGTMSPaQHGEVFVT-DG.............uETDLDLGHYERFlshsho..+psNlToG+.......IYpsVIpKER+G-YLGpTVQVIPHlTsEIKppIp.....ps.sp.............t...sDllIlEIGGTVGDIEShPFlEAlRQhph-h.GpcNshalHlTLVPalpsuGEhKTKPTQHSV+-LRslGIQPDlllsRo-cs.lscsh+cKIALFCsVspcsVIsshDlp.sIYcVPLhLccpGlsphlhccL.......pLs...tps.sh..scW ...........................................TKYIFVTGGVVSSLGKGIsAAS..........LutLLcsRG...Lp............VT.h.K....l....................D....PYINVDP...GTMS......P...a......QHGEVFVT-DG.............AETDLDLGHYERFl.c.hs.....ho..+psNlTTG+..........IYp..........s.V..l.cKERRG.D.YLGs..............T.V..Q.......VI.PHITspIK-+lhcsuct...............................t...sDVlIsE.........I.........GGTV.............GDIESLPFLEAlRQhph-l.........G.+..-sshalHlTLVPYl....t..uuGEh.......KTKPTQH.SVKE..............LRu.....lGIQPDl....LlsR..o..-.c.s....ls.p.sh+pKIAL.FCsVs.t.cuVIpshDsc.oIYclPhhL.ppQGlDphlsc+LpLs....s.psshspW.............................. 0 458 871 1151 +6247 PF06419 COG6 Conserved oligomeric complex COG6 Wood V, Studholme D anon Pfam-B_10345 (release 8.0) Domain COG6 is a component of the conserved oligomeric golgi complex, which is composed of eight different subunits and is required for normal golgi morphology and localisation. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 618 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -13.04 0.70 -6.39 29 369 2012-10-03 17:31:52 2003-07-07 15:53:51 6 10 276 0 265 379 2 501.50 27 82.22 CHANGED DuLptLsphshpNThpsRRpLRh-lc+cllcsNsphlc-FtpVscpLcclssslsplspsscphppplssspppTpsllp-sssLhpp.ccplchKpplLpuFps+FhLsptEhthLssstp......sls-cFFplLs+scpI+cDCchLL..us-..spphGLclM-psspplpsAhp+La+ah...Qcca+sL.....sh-ssph...sssl+cAlphLt-RPsLFpssLDpaspuRcpsLs-sFhsALTtsss.............ust.....+PIEhsAHDPlRYlGDMLAWlHSssVsE+EsLcuLFthpscph.cs.p..hp....................ctls-lls+.lpuluRsL+.RlEpllps.....cccslhhYclsNLLpFYpshFs+l.lsssS.....LlpslpsLpchuhppahshhpsplsslpss........ht.sssD..LtPPcaLt-hLppLpslhcs..hpsShss.............sscp............sphphllppsl-PhlphsppsuttLsshpp..............slahlNsLhhhcoslssasasp........c+lp.lpsplsphtspLhphQhphllppoGLsslhshlp.h.........................p.p.htslssh.hhp.psL.tpssppLscFLPoA.LhDhpssL.ppLpSsphspslsccusctFsc.aphlcthlhsstpt.......................hh.+sstclt.slL ..................................................t.Lst.hhtsohpsRRpLRtpl-cc.lt.stphlptFttlt...c......................p................Lcpltpplpthspsspp............hppplp.ss....p....tpTts.lltpsspLptp...ppplph+pplhpsFhpcFp.....LotpEhthLpss...................sl....sppFFpsLt+sppI+pcsc.hLL..ttp.....................ppphGl-l.M-phshh....ptuhp+Lh+Wh..........................ptchcsl...................s.-ssph.................sshlppuhp.hL.pc.R.P.sLa.p.sl-phupsRcpsl.ctFhpALTtuss..........................................st.............+PIEh..pu.HDPlRYlG..........DMLAWlHpshssE+EhlpsLh.....h.tst.p...................................................................................pthppllsc.hpuls+sl+.RlEpll.s................................p....s.lhhaplssLLtFYp.h....hpth..lttps...............L..hpsltphp..s.phahs.hp......phttl.tp.................................ss.D..L.Ps...l.phlt.Lhtlhts...hpsohh....................ttpp.......................................tph..llpthl-Phlphsp.t..u..tt...hts.p...................................................thahlNsh.hhpsslt.aphsp.............phhp.lpttlpt.hppLhp.php.llppsGLt.hhphlp.........................................tshsp...hp..tl..tth..phstaLs.ts.....hh..pl.thl.ssthhpplhppshp.hst.at.l.thlht..p....................................hh.hs..plt.h.............................................................................. 1 104 152 220 +6248 PF06420 Mgm101p Mitochondrial genome maintenance MGM101 Wood V, Studholme DJ anon Pfam-B_35151 (release 8.0) Family The mgm101 gene was identified as essential for maintenance of the mitochondrial genome in Saccharomyces cerevisiae [1]. Based on its DNA-binding activity, and experimental work with a temperature-sensitive mgm101 mutant, it has been proposed that the mgm101 gene product performs an essential function in the repair of oxidatively damaged mitochondrial DNA [2]. 27.00 27.00 36.30 35.80 18.80 18.70 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.17 0.71 -4.93 27 148 2009-01-15 18:05:59 2003-07-07 15:57:53 7 2 140 0 116 150 1 168.10 61 59.67 CHANGED lDWspSaaGLuppPFocEsuclLhsPlsspDlEIKPDGllYLPEIKYRRILN+AFGPGGWGLAPRG-olVs.....s+hVoREYALlspGRLVSlARGEQsY.F.sscuIPTAoEGCKSNALMRCCKDLGIASELWDPpFIRpFKpcaspEsasEHl...sTKKKKKlWh+K...ppphsYPaK .....................................lDWopSaaGLuspPFscEssclLltPlsscDlEI.KPDGllYLPEIKYRRILN+AFGPGGWGLsPRuEolV.o......sKhVTREYALlspGR..........LVSlARGEQ-YF...u.cG...IPT..AoEGCKSNALMRCCKDLGIASELWDPpFIRcFKtpaspEsFVEHV...sTK+K+KlWhRK...-cplpYPaK............................ 0 40 69 100 +6249 PF06421 LepA_C GTP-binding protein LepA C-terminus Moxon SJ anon Pfam-B_425 (release 10.0) Family This family consists of the C-terminal region of several pro- and eukaryotic GTP-binding LepA proteins [1]. 22.00 22.00 23.00 23.70 21.70 21.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.35 0.72 -4.14 45 4879 2009-01-15 18:05:59 2003-07-07 15:58:45 7 24 4651 11 1211 3357 2420 106.40 65 17.86 CHANGED lNG-hVDALShIVH+spAhp+GRplspKLK-lIPRQ.F-lsIQAuIGu..KIIARETIKAlRKs....VlAKCYGGDloRK+KLLEKQK-GKKRMKplGsVElPQEAFlulL+h ................lNG-pVDALShIVH+DpA.pRG+tls-KLK-LIPRQ.F-lsIQAAIGs...+IIARp.....TlKAlRKs....VL.....A.....K....CY..GGD.l.SRK+KLLEKQKcGKK..R..M..K..p..l.Gs...VElPQEAFlAlL+....................... 0 417 786 1029 +6250 PF06422 PDR_CDR CDR ABC transporter Gauthier C, Studholme DJ anon Pfam-B_1005 (release 8.0) Family Corresponds to a region of the PDR/CDR subgroup of ABC transporters comprising extracellular loop 3, transmembrane segment 6 and linker region. 20.50 20.50 20.60 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.33 0.72 -4.36 167 1227 2012-10-03 10:13:34 2003-07-07 15:59:49 7 44 184 0 900 1245 0 97.70 26 7.84 CHANGED shlPs.Gs..sYt.shs.ssppsCu.ssGuhsGps.....hVsG-s.YlptuasYphuHhWR.....NaGIllAFhlhFhslhllssEhhpsspupG-sLlFpRuph.pt......................tttt.....sDtE ..................................hlPt..G..s...sYt...shs.....tppsCs..hsGuhs...Gp..s......hVsGc.s.YlptsasY............phuH..hWR......................NaGIlhuFhlh.Fhhhh.lls.s..Ehh....p...h....tts...t.u.p.h.lla.+sph.th....................ptt............................. 0 239 480 779 +6251 PF06423 GWT1 GWT1 Wood V, Studholme DJ anon Pfam-B_15982 (release 8.0) Family Glycosylphosphatidylinositol (GPI) is a conserved post-translational modification to anchor cell surface proteins to plasma membrane in eukaryotes. GWT1 is involved in GPI anchor biosynthesis; it is required for inositol acylation in yeast [1-2]. 29.60 29.60 29.60 29.60 29.50 29.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.07 42 299 2009-01-15 18:05:59 2003-07-07 16:01:32 7 11 251 0 222 304 3 145.70 30 28.81 CHANGED tNREGlhShh.GYlAIFLhGpsh.Ghhllspphsh.......................................ttthhphlhpLhhhshlhhhlhhls.......thshslSRRlANhsYVlWVsuhNsthLhhasll-phhhsshh.....................................sspllpAhNpNGLslFLlANlLT..GhVN .........................................................tNREGlhShh.GYluIaLhGh...sh.Ghhlhspptp...........................................................................................................pphhphh..hpLhh.huhhhhhhhhlsp........shs.slSRRhANhsYll.WlhAhshh...hLhh...hh.lh-t...l...hh.sh.................................................................................sstllpAhNpNsLhhFLlANllT..GlVN................................. 0 72 114 179 +6252 PF06424 PRP1_N PRP1 splicing factor, N-terminal Wood V, Studholme DJ anon Pfam-B_6467 (release 8.0) Domain This domain is specific to the N-terminal part of the prp1 splicing factor, which is involved in mRNA splicing (and possibly also poly(A)+ RNA nuclear export and cell cycle progression). This domain is specific to the N terminus of the RNA splicing factor encoded by prp1 [1]. It is involved in mRNA splicing and possibly also poly(A)and RNA nuclear export and cell cycle progression. 20.90 20.90 20.90 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.50 0.71 -3.83 33 327 2009-01-15 18:05:59 2003-07-07 16:04:43 7 42 271 0 240 327 4 141.10 45 16.43 CHANGED sPsGYVuGlGRGATGFoTRuDhGsu+t.........................................pppp-cDsschs-............stst.uLFupst.h..Dc-DcEADcIYppIDc+MccRR+pc+Ep+pcp-hpchctpp.......PplppQFuDLKRsLusVo---WtsIPEsGDhTtK ..................................AP.sYVsGlGRGATGFTTRSDl..GPAR-uss..t.t...............................................tttppp.ppp-c-.-cchp-...............s.st..uLF.uss...Y...DcDD-EADtIYp.tlDc+..MDcRRKc+REtRp+pEhEcactpp..........PKIppQFuDLK.............RpLusVo--EWtsIPElGDhpt+............................ 0 85 135 200 +6254 PF06426 SATase_N Serine acetyltransferase, N-terminal Wilbrey A, Studholme DJ anon Pfam-B_1192 (release 8.0) Domain The N-terminal domain of serine acetyltransferase has a sequence that is conserved in plants [2] and bacteria [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.29 0.72 -3.78 111 1759 2009-01-15 18:05:59 2003-07-07 16:08:33 9 11 1568 37 424 1080 487 92.10 42 34.01 CHANGED lWppl+pEAcpssppEPlLuualauoILpHsolcsALuapLusKLuss.phs..s.hLp-lhpcAhts.cP............plspusp..sDlhAlh-RDPACpp.ahpPlLaaKGFpAlQu ...........................................lWppI+tEActhsppEPhLuuFhauol.LpHpsLtsALuahLAs+....Lsss......h...s...u.......htlc-lhcc.shts...cP.................phhtsst..sDIpAVhpRDPAscp.aspPLLYhKGFHAlQs................................ 0 116 251 340 +6255 PF06427 UDP-g_GGTase UDP-glucose:Glycoprotein Glucosyltransferase Studholme D, Wood V anon Pfam-B_4648 (release 8.0) Family The N-terminal region of this group of proteins is required for correct folding of the ER UDP-Glc: glucosyltransferase. 21.20 21.20 22.60 22.50 20.90 20.60 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.20 0.70 -5.03 20 379 2009-01-15 18:05:59 2003-07-07 16:09:48 6 11 262 0 258 355 5 194.50 35 14.04 CHANGED SDhhhphsuhh...ush.ppcs.....Rhsh.phls.spposlpl..s.tppcs........shhclsAllDPlo+puQKlssllphLschl..slsl+lahNPps....phs-lPlKsFYRaVlp.sp.pF.sssGthss.PtAhFsslPsspLLThsl-sP-uWlVpshcu..paDLDNIhLpclsss..........................VsApYcLEplLlEGaspDhssssPP.RGLQLpLs..opssshhs..........DTIVMAN ....................................................................t...........Rhph.phhp...spao.slpl..s.....ps.cs.............hhcllAllDPlocp.AQ+hsslLh.lLpplh..sspl................+...lahN.sps.......cls.-..hP..l.K.p........FYRaVL-.sp..pF.ss.s...s....phs..t....PhApFh.slPpssLLTlshpsPpuWhVp.....shco..saDL.DNIhLpplpss............................VpA.YELEalLlEGHshD.......h....ss.s...p......P....P.RGLQhhLG...Tp..pp.Phhs..............DTIVMAN.............................. 0 86 135 206 +6256 PF06428 Sec2p GDP/GTP exchange factor Sec2p Wood V, Studholme D anon Pfam-B_10665 (release 8.0) Family In Saccharomyces cerevisiae, Sec2p is a GDP/GTP exchange factor for Sec4p, which is required for vesicular transport at the post-Golgi stage of yeast secretion [1]. 22.30 22.30 22.60 22.90 22.10 22.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.33 0.72 -4.16 20 379 2009-01-15 18:05:59 2003-07-07 16:11:44 6 8 194 24 259 345 1 101.60 31 20.52 CHANGED scltccccp+tpsEpppspLppElE-LTASLF-EANcMV....usA+tEppshchKNcpLccQL+Ep-sll-sLQtQLpsLKplhhs.....................hpspp.....spppptsst ........................p.pLp.ccppppchpph+pplppELE-LTAuLF.............EEApcMV....tpAp............h+p...sp.......hE+pLcEscscl-sLQtplttLKplh.p.....................................pt........................................................... 0 71 121 196 +6257 PF06429 Flg_bbr_C DUF1078; Flagellar basal body rod FlgEFG protein C-terminal Moxon SJ anon Pfam-B_807 (release 10.0) Domain This family consists of a number of C-terminal domains of unknown function. This domain seems to be specific to flagellar basal-body rod and flagellar hook proteins in which Pfam:PF00460 is often present at the extreme N terminus. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.41 0.72 -4.06 463 11734 2009-01-15 18:05:59 2003-07-07 16:15:12 8 19 2105 7 2780 7802 1705 79.40 24 23.35 CHANGED ssLp...phussL.a...ttss.............Gs..s...hs.s........ssss..............p...l..tpGhL..EsSNVsslcEhsshIsspRsYEhssKhlpsuDphhpp.sspl .........................................................................................t...........h........................t..........................s.ss.....................tl....tpGhl....E....tSN.V.sl.spEhsshIpsQRuYpu.Ns+sls.o.s.cphhpphlp....................... 0 883 1749 2237 +6258 PF06430 L_lactis_RepB_C Lactococcus lactis RepB C-terminus Moxon SJ anon Pfam-B_717 (release 10.0) Family This family consists of the C-terminal region of RepB proteins from Lactococcus lactis (See Pfam:PF01051). 20.30 20.30 22.60 47.70 19.90 18.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.65 0.71 -4.06 13 139 2009-01-15 18:05:59 2003-07-07 16:23:41 7 3 51 0 9 140 2 119.00 64 33.07 CHANGED KRhADDNSYKL-..DcsY.EDKspKEpsEcpLhhcAMcS.YTKLLhEpFLLSshDhTDsulMAGLQKNVYPLYDELK-LRGLNGVK-HLSYVuSKQEsYSKRNlAKYLKKAIEQYLPTVKRQDL ...............KRhADDNSYKL-..DpsY.csKtpKpcsEctLhh.pAMcS+YT+LLhEshLLSsh-hpDTslMuGLQcpVYPLYDELK-LRGLNGVKDHLSYVuSKpEsY..S..K+N.lAKYLKKAIEQYLPTVKRQDL....... 0 1 1 7 +6259 PF06431 Polyoma_lg_T_C Polyomavirus large T antigen C-terminus Moxon SJ anon Pfam-B_214 (release 10.0) Family \N 22.40 22.40 23.00 24.00 21.10 22.30 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.37 0.70 -5.82 3 1776 2012-10-05 12:31:09 2003-07-07 16:44:43 6 5 48 37 0 501 0 116.30 75 64.60 CHANGED TKQVSWKLVTEYAlETKCEDVFLLLGMYLEFQYNsEpCKKCpKKDQPsHFKYHEKHYANAtIFADSKNQKSICQQAVDTVLAKKRVDSLHMTREEMLTERFNaLLDKMDLIFGAHGNAVLEQYMAGVAWLHCLLPKMDSVIYDFLKCIVaNIPKKRYWLFKGPIDSGKTTLAAALLDLCGGKALNVNLPLERLNFELGVAIDQFMVVFEDVKGTGAESRDLPSGHGINNLDsLRDYLDGSVKVNLEKKHLNKRTQIFPPGIVTMNEYSVPKTLQARFVRQIDFRPKsYLRKSLpsSEFLLEKRILQSGMTLLLLLIWFRPVADFAsuIQSRIVEWKERLDpEISMYTFS+MKaNVuMG+sILDasREEDSEsEDSGHGSSTESQSQCsSQVS..-sSGsDoQSQsS......aHlCKGFQCFc+P .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................SSTESQSQC.SQVS..EASGADTQEHCT....................YHICKGFQCFKKP..................................... 0 0 0 0 +6260 PF06432 GPI2 Phosphatidylinositol N-acetylglucosaminyltransferase Wood V, Studholme DJ anon Pfam-B_33496 (release 8.0) Family Glycosylphosphatidylinositol (GPI) represents an important anchoring molecule for cell surface proteins. The first step in its synthesis is the transfer of N-acetylglucosamine (GlcNAc) from UDP-N-acetylglucosamine to phosphatidylinositol (PI). This step involves products of three or four genes in both yeast (GPI1, GPI2 and GPI3) and mammals (GPI1, PIG A, PIG H and PIG C), respectively. 23.90 23.90 32.70 24.00 23.80 23.50 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.95 0.70 -5.06 37 324 2009-01-15 18:05:59 2003-07-07 17:06:08 6 9 277 0 237 310 4 271.90 29 80.42 CHANGED W+KlLY..hcQsYPDNYo..D.poFLppLppNssltpa.........sahpllh-shslsQplssVslhhllFshlhppp..............................................lss.slhhhssshshhualh...hthht.t.s...................................................................................................................hhpsh+ohlllhhhlhhLSPlL+oLTcohSoDoIauloshlhlhalhhaDY..u.............htsh.stt...................htsslShNhulhuullLASRLsoshpVFshllhulplF...sLhPhhppp.l+ths.hhphhhshhhshhshhsh.............hhhshhhhlhahhhllhlshlsPh.......ahltlQpa.............KspIpGPWD ..............................................................................................WcKlLa...+Q.sa..PDNYs..D.tsFLcpLpcNhphp.Y.........................sah.tllhcssslsQplsslslahhha.hhhhptt........................................................................................l.sPh..lhhhsshh.shlGalh...hthh..t.t......................................................................................................................................................h.phhtsl+.osl...lh..hhhhhsLS..PlL...+oLTcohSoDoIaAho.hhhhlhp....lh.haDYu...........................................ht...st............................sslShNsAl.huossLASRLs..........oshcsFshhhhu.lplF...sLhPhhppp.l+...th....s....t.ht.hh..lshhh.sh.hshhsl....................h.hshhhhhh.hh..h..h..h..h..hh...hh..sPh.......hhlt.hQpa.............KppltGPWD................................... 0 79 127 193 +6261 PF06433 Me-amine-dh_H Me-amine-deh_H; Methylamine dehydrogenase heavy chain (MADH) Finn RD anon Pfam-B_20644 (release 9.0) Domain Methylamine dehydrogenase (EC:1.4.99.3) a periplasmic quinoprotein found in several methyltrophic bacteria. Induced when grown on methylamine as a carbon source MADH catalyses the oxidative deamination of amines to there corresponding aldehydes. MADH is a hetero- tetramer, comprised of two heavy chains (H) and two light chains (L). The H-chain forms a beta-propeller like structure [1]. 19.50 19.50 19.50 19.50 19.40 19.20 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.11 0.70 -5.81 5 83 2012-10-05 17:30:42 2003-07-08 09:40:06 6 2 76 105 32 104 19 332.60 33 86.65 CHANGED DARRVYVhDPuHFAAlTQhYsIDGcouRlLGMTDGGFLPNPVVASDGSFFApASTVYSRIARGKRTDYVEVlDPpTapPIADIELP-uPRFLVGTYsWMsuLTPDNKsLLFYQFSPuPAVGVVDLEGKuFcRMlDVPDCYHIFPouNsoFFMHCRDGSLt+Vuaus-GsoK.hKsTEVFHsEDEYLINHPAYSs+SGRLVWPTYTGKIFQADLSupcAcFLsPIEAFTEAEKADsWRPGGWQQVAYHRApDRIYLLuDQR-cW+HKsASRFVFVlDAcTGKRLsKIELGHEIDSIuVSQDAKPhLYALSsGsKTLaIFDAsTGKELuSVDQLGRGPQlIhTuD ............................................tt....hhl.D....t.h....s.+.laV...hD..uDs....t+..l..LG.lsuuasss.hs.l.o.s..Dt+phYlAsTaasRss+GpRTDllphaDssTLphsuEI.lPst.+t...s.s.s.p.tsh.uhosDG+hhaVhNhTPusSVoVlDhsu+KV.l.p.pl-sPGCshl..Y..P..s..u..s..s.s.Fo.uLCtDGshhsVsLDssGK.s.s...pp.p..o..s..hF...s.sc.D.s.lFppssh...st..hsuphhF.soYsGpVhsschous.s...s.....p....h....t..t.shp......l........h.s-u.c.+............t...........t......sWRPGGaQ.hAlpt.....spp+hY.V.L.M.H.pG.s.p...toHKDsGTpVWVhDhpo+cRlsRhsL.t.p...t....ss...SlsV..op.D-.cPllaulss.ss........uslhVhDAt.o..G.c.h+shc.tlGps........hh.................................. 0 7 17 24 +6262 PF06434 Aconitase_2_N Aconitate hydratase 2 N-terminus Moxon SJ anon Pfam-B_2605 (release 10.0) Family This family represents the N-terminal region of several bacterial Aconitate hydratase 2 proteins and is found in conjunction with Pfam:PF00330. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.27 0.71 -4.91 94 1563 2012-10-01 19:37:30 2003-07-08 12:39:19 8 6 1505 2 323 1212 908 208.10 67 25.23 CHANGED loVFKV.sGETNTDDLSPAs-AaSRPDIPLHAhAMLcschss............slppItcLK.pKGhP....lAYVGDVVGTGSSRKSAsNSV.LWahGcDIPaVPNKRsGGlllGGp.IAPIFFNTsEDSGAL........................PIE.sDVsplphGD..VIsIh..PacGclpc.....tsG...................................................................................................................c...llupFpLpPsslhDEVRAGGRIPLIIGRuLTsKARpsLGL...ssSslFtpP......ppPss.su+Ga ...................................................................................lTVFKV.sGETNTDDLSPAP.DAWS..RPDIPLHALAMLKNsR-GItP-...sG.hGPl+pIEtLp.pKGaP....lAYV.GDVVGTGSSRK.SATNSV.LWFhG-DI.PaVPNKRuG..Gl..sLGGK.IAPIFFNTMEDAG..A..L........................PI..E...l..D..V.Ss.LsMGD..VI..D..lY..PYcGclpp.....tsG.........................................................................................................................E....llAsFcLKo-VLl.DEV.RAGGRIPLIIGRGLTsKAREuLGL...st..S..-lFRpscs.sA-.Ss+Ga........................................................... 1 74 178 263 +6263 PF06435 DUF1079 Repeat of unknown function (DUF1079) Moxon SJ anon Pfam-B_1911 (release 10.0) Repeat This family consists of several repeats of 31 residues in length and seems to be exclusive to Moraxella catarrhalis UspA proteins. The UspA1 and UspA2 proteins of Moraxella catarrhalis are structurally related and are exposed on the bacterial cell surface where can function adhesins [1]. This family is commonly found with the Pfam:PF03895 family. 21.00 21.00 23.40 21.80 20.50 19.40 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.45 0.72 -3.94 4 220 2009-01-15 18:05:59 2003-07-08 12:51:55 6 19 12 4 9 218 0 29.60 52 16.20 CHANGED lushhc.tcsascpQoEsIcuLsKASutNTs lssh.ELtpQ.sp+po-.IcsLsKASutNTp.... 0 0 9 9 +6264 PF06436 Pneumovirus_M2 Pneumovirus matrix protein 2 (M2) Moxon SJ anon Pfam-B_2318 (release 10.0) Family This family consists of several Pneumovirus matrix glycoprotein M2 sequences. This family functions as a transcription processivity factor that is essential for virus replication [1]. 25.00 25.00 134.20 134.00 22.00 17.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.90 0.71 -5.00 4 75 2009-01-15 18:05:59 2003-07-08 13:06:16 6 2 17 1 0 72 0 156.70 58 82.92 CHANGED pWPsHsLLVRpNaMLNplL+shDRo.DsLS.ISGAuc.DRTp-asLGsssVlpsYIsu.sNITKpSAChuh.pllppLppsDlKphRDpcVssS.+ltlaNhVlSYI-.s.KNstphIp.LKRLPt-hLKKhhKhhI-lpsulphpNssssslpDttNs pWPs+sLLlRpNaMLNplL+shD+S.DsLS.ISGAuc.DRTp-asLGsssVlpuYIss.sNITKpSAChuhppllppLppsDl+phRDNc.ssS.+VtlaNhVlSYlE.s.KsstphIp.LKRLPt-hLKKhhK.hlDlptuls.pNsppsslpDos..s... 1 0 0 0 +6265 PF06437 ISN1 IMP-specific 5'-nucleotidase Studholme D, Wood V anon Pfam-B_43910 (release 8.0) Family The Saccharomyces cerevisiae ISN1 (YOR155c) gene encodes an IMP-specific 5'-nucleotidase, which catalyses degradation of IMP to inosine as part of the purine salvage pathway. 25.00 25.00 84.50 84.40 23.60 19.20 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.37 0.70 -5.76 18 157 2009-01-15 18:05:59 2003-07-08 13:32:53 6 4 137 0 127 156 1 373.80 47 89.08 CHANGED MoSRYRVEY......tLKsHR+DpFI-W.............IKGLLAV.PFVLHut..t...................t....s.tthspcs+cRYuEIFpDVEpLIsc+Ithspp.sp.................spSRL+hLVPSIGsFFT.LPLp-AFhhpDc+RsISpRRhVuPSFNDlRhILNTAQlhuLsctt....................pLcLlTFDGDVTLY-DGtSLssssPVIsRLl.................cLLppslplGIVTAAGYsc...Ap+YhpRLpGLLcAlpsss.sLsssQKpNLlVMGGESNYLF+assss...stsLp.lscccWlLPcMpsWsppDIsplLDhAEpsLpchhppLsLP....ssIlRK-RAVGIlP..........pssh+lhR.EpLEEhVLslQppLEp...........................stsu..pcl.FCAFNGGsDVWVDIGDKuLGVpsLQ+aa...........t..ssIpsocTLHVGDQFhSsG.ANDFKARLuusTsWIAuPpETVplL ........................................................................p.....................pst..................................................lcuLLAs.PFVLau..t..........................p...hstcscppYs-IhcDlEthIp-+.Ithppt.st...................................t.S+LphLVPolG.sFFT.LPLpcAFhhpDppRhISpRRaVsPSFNDlRhILNoAQlhulsc.t.............................tLcLlTFDGDVTLY-DGtsLp..sssPlIs+ll.................cLLcpshplGIVTAAGYsp...As+Y.pRL+GLL-Alpss....sLsspQKpsLhlMGGESNYLFcasts...........htLp.......lsc...pcW.....h......hs...cMtsW.....s.c.....p..DIpplLDhAEpsLpphhppLsLP....ssIlRKcRAVGIlP...............................ssh+h.R.EpLEEhVLslQppLEh..............................................st.su..pclPFCAFNGGs...........DVaVDIGsKuhGVpshQpaa..............ssIpsscTLHVGDQFhssG.uNDFKAR..............susThWIAsPtETVplL......................... 0 35 73 108 +6266 PF06438 HasA Heme-binding protein A (HasA) Finn RD anon Pfam-B_27216 (release 9.0) Domain Free iron is limited in vertebrate hosts, thus an alternative to siderophores has been developed by pathogenic bacteria to access host iron bound in protein complexes. HasA is a secreted hemophore that has the ability to obtain iron from hemoglobin. Once bound to HasA, the heme is shuttled to the receptor HasR, which releases the heme into the bacterium [1]. 26.30 26.30 27.30 32.80 25.50 26.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.36 0.71 -4.92 7 96 2009-01-15 18:05:59 2003-07-08 15:17:58 7 2 67 21 10 66 4 204.00 39 99.08 CHANGED MohSlsYsusauuaolusYLs-WuusFGDlsHtsups..sssNTGGFssGs..hsGoQYAlpSosss.tAFIAsGs....L+Yoh..sPu........HTLaGpLDoluhGcsL...uGGsu.us..asLss.-VoFssLsLsu.hupG+s......G.sVHpVlYGLMpGsouuLtsslsslLt..shGlSlNsTFDpluAAsss.s...husss.VshVGVQ-sspDhhLAA ..........................MosoIpY.sopausaoloSYLpcWusp....FGD..lsptsups...........ps....pGuFssus..........FsG..o..QYAl..s...Sop.......u..s.s..tu.h.......I....ApGD.......L+Yoh....Pp..............HThaG.p..l.DoLphGcsL...........s.su.su.sG....hpL-phclsFs.sLDlsu-h-su+shhp.........shpG.shHKulYGLM+GNssP.hL-hl........c....................ApGIslsTshKDluIASQhtsss..huDA..P.hlDTVGVh-.s.p-hLLAA......................... 0 1 3 5 +6267 PF06439 DUF1080 Domain of Unknown Function (DUF1080) Yeats C, Eberhardt R anon ADDA_10682 Family This family has structural similarity to an endo-1,3-1,4-beta glucanase belonging to glycoside hydrolase family 16. However, the structure surrounding the active site differs from that of the endo-1,3-1,4-beta glucanase. 26.30 26.30 26.30 26.40 26.20 26.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.51 0.71 -4.40 189 1312 2012-10-02 19:29:29 2003-07-08 16:01:14 6 119 323 12 583 1442 1367 194.00 21 46.62 CHANGED sh...tsLF..s........Gc..sL.sG....Wp...sh..................................................................h.D.G.........................hlh.........................ssptsshlhoc...p.pas.sFpLcl-a.+.................ss..ss..............NSGlhh+sp........................................h..uh.EhQlhs......sttt..........................ptsGulY............................................................st...spsshssscWNphcI.hspusp............lp.h.hlNGhhlhchsphpst.t..............................ppGhIuLQ.sHu..........s............lp..FRNIpl+ .................................................................................................................................................hh.LF.s.Gp..sh.su....Wp...thttt...............................................................................................................lp-.G.........................hlh.........................tssssu..lh...o.....p.....c...pa.s.....sF.pLcl-a.+h...............st..su.............................NS.G....lhhpsp......................................h..uh.EhQlhs.....sttt.........................ttsGu.lY.................................................................st..spsshs...sG.cWNp....hcI..hs.p.usp..............lp..h.hl..N.G..h..hl..hchsphss.h................................................tpGhIuLQ..sHu............s.............tlt...FRNlplc....................................... 0 323 505 562 +6268 PF06440 DNA_pol3_theta DNA polymerase III, theta subunit Finn RD anon Pfam-B_27631 (release 9.0) Domain DNA polymerase III (EC 2.7.7.7) is comprised of three tightly associated subunits, alpha, epsilon and theta. This family contains the theta subunit. The structure of the theta subunit shows that the N-terminal two thirds is comprised of three helices while the C-terminal third is disordered [1]. The function of the theta subunit is poorly understood, but the interaction of the theta subunit with the epsilon subunit is thought to enhance the 3' to 5' exonucleolytic proofreading activity of epsilon [2]. 32.30 32.30 32.40 36.10 30.60 32.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.41 0.72 -4.39 11 627 2009-09-13 11:27:20 2003-07-08 16:11:56 6 1 540 7 57 179 3 74.00 76 95.14 CHANGED M..saNLApLScE-hDKlNVDLAASGVAaKERhNMPVlsE.VEREQPEHLRpYFRcRLtaYRptSppLs+hs.....-P+h .......pNLAcL-QsEMDKVNVDLAAAGVA...FKERYNMPVIAE.sVEREQPEHLRuWFRERLIAHRLASVsLSRLPYEPK........... 0 2 12 37 +6269 PF06441 EHN Epoxide hydrolase N terminus Yeats C anon ADDA_11591 Family This family represents the N-terminal region of the eukaryotic epoxide hydrolase protein. Epoxide hydrolases (EC:3.3.2.3) comprise a group of functionally related enzymes that catalyse the addition of water to oxirane compounds (epoxides), thereby usually generating vicinal trans-diols. EHs have been found in all types of living organisms, including mammals, invertebrates, plants, fungi and bacteria. In animals, the major interest in EH is directed towards their detoxification capacity for epoxides since they are important safeguards against the cytotoxic and genotoxic potential of oxirane derivatives that are often reactive electrophiles because of the high tension of the three-membered ring system and the strong polarization of the C--O bonds. This is of significant relevance because epoxides are frequent intermediary metabolites which arise during the biotransformation of foreign compounds [1]. This family is often found in conjunction with Pfam:PF00561. 22.50 22.50 22.80 22.80 22.40 22.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.64 0.71 -4.15 143 1051 2012-10-03 11:45:05 2003-07-08 17:03:29 7 12 454 6 597 1123 575 110.20 35 27.04 CHANGED pPF....plplscspLs-L+pRLptoRa...Pspts.sss............hp.hGsshshlccLscYW............................ts.sa..D................WRptEscLN...paPpFpTpI........-..................GlcIHFlHh+S......pps.sAl.PLllsHGWPGShhEFhclI ...................................pPFclplsppplsDL+pRLptoRh.....sst.....sss................ap.hG.hshshlcclscYW...................................ps..sY..D..................WRptE..ttLN...p.aPpFpTpI.......-..........................GlsIHFlHh+o.............................tps.sAl..P...L.l.lsHGW......PG..ShhEFhclI................................................................................ 0 184 337 490 +6270 PF06442 DHFR_2 DHFR; R67 dihydrofolate reductase Finn RD anon Pfam-B_27527 (release 9.0) Family R67 dihydrofolate reductase is a plasmid encoded enzyme that provides resistance to the antibacterial drug trimethoprim. The R67 dihydrofolate reductase does not share significant similarity to the chromosomal encoded dihydrofolate reductase [1]. 25.00 25.00 27.20 92.50 22.80 22.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.64 0.72 -4.35 3 33 2009-01-15 18:05:59 2003-07-08 17:07:36 6 1 13 8 0 18 0 78.00 81 96.15 CHANGED MscSSNEVSsPVAGpFAFPSNATFGLGDRVRKKSGAAWQGQIVGWYCTsLTPEGYAVESESHPGSVQIYPVAALERVA ....................M-pusNEVSsPVAGpFAFPSNATFGhGDRVRKKSGAAWQGQIVGWYCTpLTPEGYAVESEuHPGSVQIYPVAALERls.. 0 0 0 0 +6271 PF06443 SEF14_adhesin SEF14-like adhesin Finn RD anon Pfam-B_36358 (release 9.0) Family Family of enterotoxigenic bacterial adhesins. 21.90 21.90 22.10 22.70 21.60 21.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.03 0.71 -4.60 2 40 2009-01-15 18:05:59 2003-07-08 17:32:33 6 1 19 7 0 37 0 129.40 63 98.20 CHANGED +St.ulhlhALhsCGuApAAshVGshAsVpAslshuAQNThsAsWoQDsuhoGsuVsAhQKlGTLsIphTGsHsuV.luGcssu.SGGlhTlPFhsstGQslFRGRhput.hps.ssshlst.u.GW+lsuoQ-shNlslpsF.stsslPAGpaTATFYlQQYQs ................................................ssTluAQNTTSANWSQDPGFTGPAVAAGQKVGTLSIT.ATGPHNSVSIAGK.GASVSGGVATVPFVDGQGQPVFR.GRIQGANIN..DQANTGIDGLA.GWRVASSQEThNlsl.............................. 0 0 0 0 +6272 PF06444 NADH_dehy_S2_C NADH dehydrogenase subunit 2 C-terminus Moxon SJ anon Pfam-B_1662 (release 10.0) Family This family consists of the C-terminal region specific to the eukaryotic NADH dehydrogenase subunit 2 protein and is found in conjunction with Pfam:PF00361. 21.50 21.50 21.60 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.06 0.72 -3.66 136 25512 2009-09-10 16:36:32 2003-07-08 17:37:30 6 3 12030 0 41 25653 0 54.00 43 16.22 CHANGED LaFYLRlsYshsLThsPsssssth.pW..Rhps.pp....shhls.hhhsholhhL.PlsPhlh.s .....LFFYLRLuYssTlTlsPs.ossphp.pW....+hpp..sp.......shh.lu...hh...sshShhLL.Pl.o.Phlh..................................... 0 5 8 18 +6273 PF06445 GyrI-like AraC_E_bind; GyrI-like small molecule binding domain Finn RD, Anantharaman V anon Pfam-B_36124 (release 9.0) Domain This family contains the small molecule binding domain of a number of different bacterial transcription activators [1].\ This family also contains DNA gyrase inhibitors. The GyrI superfamily contains a diad of the SHS2 module, adapted for small-molecule binding [3]. The GyrI superfamily includes a family of secreted forms that is found only in animals and the bacterial pathogen Leptospira [3]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.75 0.71 -4.19 88 6066 2012-10-02 11:08:51 2003-07-08 18:33:54 10 24 2274 26 1074 4279 124 155.40 16 64.70 CHANGED hphclhphsshplhulcppsshsp............lsphh.pplhthttppth.................tssshhulhhspst......spspphchthshthst........................ptstshphhplsuu.paAshpap.Gsh......ssltpshpplastalspssh....thtsssshEhYhss.........ttpphhs.-lhlPlp ......................................................................................................................................phplhph.s.php.lh...u....h.....p.t..p...h.s.p.t...............lsphh..p..ph..ht.hhhppth......................ssth.hul..h.h.s.ps.........t..t..sp....p...h..p.h..p..h..s.h.ss.sp........................................psspshp..h.pl.s..u.G...pYA.sh.p.hp...Gsh...........psht.c..shtt.lap..alsp.p.sh......................phtss.s...p....h....E..h..Y..hs..s.t..........pstphph..-lalPl.................................................................. 0 387 718 907 +6274 PF06446 Hepcidin Hepcidin Finn RD anon Pfam-B_41732 (release 9.0) Domain Hepcidin is a antibacterial and antifungal protein expressed in the liver and is also a signaling molecule in iron metabolism. The hepcidin protein is cysteine-rich and forms a distorted beta-sheet with an unusual disulphide bond found at the turn of the hairpin [1]. 21.90 21.90 26.30 25.80 19.90 19.70 hmmbuild --amino -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.71 0.72 -3.56 20 197 2009-01-15 18:05:59 2003-07-09 10:43:59 7 1 91 5 26 181 0 55.70 42 64.69 CHANGED chEEhtspDsssAttpptss-shth...hRpKRps.phshCtFCCsCC..+hpG..CGhCC+F .....................pLEEsh.upDs.ssA.stpEhsh-Shhhs.t.Rp..KR.p......t..hC+.FCCsCC..phps..CGhCC+F......... 0 1 3 13 +6276 PF06448 DUF1081 Domain of Unknown Function (DUF1081) Yeats C anon ADDA_12201 Family This region is found in Apolipophorin proteins. 25.00 25.00 25.00 25.20 21.90 23.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.51 0.71 -4.21 22 170 2009-01-15 18:05:59 2003-07-09 11:20:46 6 14 99 0 78 163 0 116.10 27 3.77 CHANGED sschppp...sCh.shlc.hhGlphChsh..shssl.ps......puh..........PLusPshhcl.lcts-.phctYphpushctpp....ss.....cpl+htlcs.Gupss+-upsslpas+cpcsh............................louclptsshsuc .............+.shssCp..shhs....GLphCssh..sas..sssss.................sus.........shaPLoGssphpl.Lc.ss.phcpYshphsachpc....ts........................cshchshps..G.........sp..pcsshshp..hspppts.............................hp.pl....h...t.................................................................... 0 21 28 55 +6277 PF06449 DUF1082 Mitochondrial domain of unknown function (DUF1082) Moxon SJ anon Pfam-B_2173 (release 10.0) Family This family consists of the C-terminal region of several plant mitochondria specific proteins. The function of this family is unknown. This family is found in conjunction with Pfam:PF02326. 25.00 25.00 44.80 43.80 23.00 20.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.51 0.72 -4.08 5 109 2009-01-15 18:05:59 2003-07-09 11:38:49 6 3 77 0 7 82 0 49.70 77 30.50 CHANGED DLLGK+RKIThISCFGEISGSRGMERNILYLISKSSYST.ss..oGWtIT...C+N DLLGKR.R.KITLISCFGEISGSRGMERNIhYLISKSSYST..........sGhtITC+N........ 0 1 4 6 +6278 PF06450 NhaB Bacterial Na+/H+ antiporter B (NhaB) Moxon SJ anon Pfam-B_5993 (release 10.0) Family This family consists of several bacterial Na+/H+ antiporter B (NhaB) proteins. The exact function of this family is unknown [1,2]. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 515 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -12.95 0.70 -5.88 4 838 2012-10-02 15:12:49 2003-07-09 11:48:14 7 2 810 0 104 725 503 497.90 75 99.01 CHANGED M.hohupAFh+NFLGpSP-WYKlAIlsFLIINPIlFFhISPFVAGWLLVAEFIFTLAMALKCYPLQPGGLLAIEAlhIGMTSAppV+cElhANlEVLLLLlFMVAGIYFMKQLLLFlFTKlLlpIRSKhLLSLuFChAAAFLSAFLDALTVlAVlISVAVGFYuIYHKVASGpsh.tDpDhosDs+IpE....hp+s.LEpFRuFLRSLhMHAGVGTALGGVhTMVGEPQNLIIAcQAsWpFGEFhlRMSPVTlPVFICGLLTChLVEKh+lFGYGtpLPDpV+cILsDaDcppccpRTpQDKhKLhVQAlIuVWLIsuLALHLAuVGLIGLSVIILATSFTGVTDEHulGKAFpEuLPFTALLsVFFSVVAVIIDQpLFuPlIpaVLssE-p.TQLuLFYlhNGLLS.lSDNVFVGTVYINEsKAALhsGhIThcQF-LLAVAINTGTNLPSVATPNGQAAFLFLLTSALAPLIRLSYGRMVaMALPYTlVLoIVGLhuIpFh.LtssTtaFhshGhIhs ......................................MclShGpAhh+NFLGpSPDWYKLAlllFLIlNPllF.hl.uPFlAGWLLVAEFIFTLAMALKCYPL.PGGLLAIEAVhIGMTSs....cH.................V+EElAANLEVLLLLMFMVAGIYFMKQLLLFIFT+LLLuIRSKhlLSLuFClAAAFLSAFLDALTVVAVVISVAVGFYGIYH+VASu+s....-.-..sDhpDDS.+I..-.c....c.h+ssLEQFRuFLRS...LMMHAGVGTALGGVMTMVGEPQNLIIAK.sAG......Wc..FG-F..FL.RMu.P.VT.V..P..V..L.I..C..G..L.L.TChLVEK..h..R..hF.G..YGp.pL..PE..+.V.R.cV..Lp.pFDcpu.R+.p.R.T..+..Q.D...K..l.+L.......I.V..Q..A..l..I..GV...W.L.V...s....A.L.A.LH....L......A..E.VGL....I.G.LS..V....I.IL.ATu...hT..G.V.TD.EHA.I.........GKA.F.......p.....Eu.L.PF.T..A.L..L..TV..FF.S.VVAVIIDQpL.....Fu.PII..Q.FVL.Q.....A...S..-.H....uQ..L....o...L..FY.l......F..NG..L...LSSISDNV.FVGT.l...Y..I...NEAKAAhEs.........G....sI.ohc......Q.a.EL......LAVAINTGTNLPSVATPNGQAAFLF..LLTSAL...APLIRLSYGRMVW..M..ALPYTlVLTLVGLLsVEFh.LsPsTEWhhphGWIu.o..................................................................................................................................................... 0 21 42 77 +6279 PF06451 Moricin Moricin Finn RD anon Pfam-B_56760 (release 9.0) Domain Moricin is a antibacterial peptide that is highly basic. The structure of moricin reveals that it is comprised of a long alpha-helix. The N-terminus of the helix is amphipathic, and the C-terminus of the helix is predominately hydrophobic. The amphipathic N-terminal segment of the alpha- helix is mainly responsible for the increase in permeability of the bacterial membrane which kills the bacteria [1]. 25.00 25.00 25.50 25.10 19.30 19.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.84 0.72 -4.46 2 23 2009-09-10 20:06:58 2003-07-09 11:52:21 6 1 10 3 6 26 0 39.20 59 54.24 CHANGED uKIPItAIKpsGKAlGKGLRAlNIASTApDVasFhKPKKR+ ..uKIPltAIKKuGKAIGKGLRAINIAuTAHDVaoahKPKK++.... 0 3 6 6 +6280 PF06452 DUF1083 Domain of unknown function (DUF1083) Moxon SJ anon Pfam-B_2203 (release 10.0) Domain This family consists of several domains of unknown function exclusively found in bacterial xylanase proteins (usually at the C-terminus) although it is tandemly repeated in a number of family members such as Swiss:P38535. This family is always found in conjunction with Pfam:PF00331 and usually with either Pfam:PF02018 or Pfam:PF00395. The function of this family is unknown. 20.80 20.80 20.90 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.34 0.71 -4.41 35 615 2009-01-15 18:05:59 2003-07-09 11:58:29 6 91 276 3 254 667 374 179.60 17 25.09 CHANGED I.DGps.DssWssAp...lshsphh....tus.....sssoush+sLWD.-csLYlLscVsDsl.lscsssss....a-pDulEIFlDpsNsK...sstYpssDhQY+lsasNpso....hssss...........hssshpousphss.sGY.....llEstIsh............psl.....sssssph...lGFDltlND.ssssGsRpuhhsWsDsosss......apssusFGslpLttp ................................................lDG.h..-t.W.ppst....hs.....h.h................ts.t..........ssh..psp..s+.hha..D..c....psLYlh..s.......p....l...p....Ds.....p.....h.....p.....p......tttss.............ha...p...s.Ds.lE.l.a.l...D....ssssp.......psta.....p.......h......p.....s......s....t..h.........t....h.....t.h..s...s..h..t.s........hphtt..................................................hs.sth..p...s..t.sp.........h.........s.........s...sua..........hhEhtIPh.....................psl........t..t.ts.ph.....huhs...h..........h.p...............................................................t........................................................................................................................................ 0 130 216 243 +6281 PF06453 LT-IIB Type II heat-labile enterotoxin , B subunit (LT-IIB) Finn RD anon Pfam-B_61882 (release 9.0) Domain Family of B subunits from the type II heat-labile enterotoxin. The B subunits form a pentameric ring, which interacts with one A subunit. Thus, the structural arrangement of type I and type II heat-labile enterotoxins are very similar [1]. 25.00 25.00 139.10 139.00 20.60 19.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.54 0.71 -4.44 2 7 2009-01-15 18:05:59 2003-07-09 12:17:58 6 1 2 15 0 7 0 121.70 64 99.77 CHANGED MS.KKIItAFVlMsullSsQsaAGsSpaF+s.CNpTTAslVtGVpLpKYIuDlNsNTcGhYVVSsTGGVWhIstu+DYPDNhhouEhRKhAMAAlLSsh+VNhCApsuSSPNhIWAhEL-tE MsFKK.IuhhhlhhsIsSl.sYAGVScpFKDpCspTTAclVpuVQLsKhhSDlNpso+GIYlsSSTGtsWaIPGGp.YPDNaLSsEMRKIAMAAVLSssRVNlCASpA.oPNHlWAIELttE.. 0 0 0 0 +6282 PF06454 DUF1084 Protein of unknown function (DUF1084) Moxon SJ anon Pfam-B_12888 (release 10.0) Family This family consists of several hypothetical plant specific proteins of unknown function. 25.00 25.00 25.20 25.20 24.90 24.20 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.83 0.70 -5.37 10 304 2009-01-15 18:05:59 2003-07-09 12:39:16 6 6 72 0 206 287 1 218.40 26 53.16 CHANGED husosssult....ssWWDplNE.SshWQDsIFauLuulYGLVSsVALlQLIRIph.RVPEYG..WTTQKVFHLhNFlVNGVRAl.lFuF+.....+pV.plpPcllp..tlLLDlPGLsFFoTYsLLVLFWAEIYaQARu...LsTD+LRsuFaolNuVlYhIQIsIWlhlWhpPss....sl.hluKhFhAslShhAALGFLLYGGRLFlMLRRFPIESKGR+KKL+EVGhVTsICFoCFLIRClh....hslsAFDscAcLDVLsHPlLNhlYYllVEILPSuLVLFILRKLPPKRspsQYHPIp ....................................................................................hhh.........................................h.lshhhhhluhhuhhQ.l.h.......ph.......h....+.........p.hs......hs...hp+lhhhh.......h...l..s......s......h................h.Rsh...hFhhh.....................ph.....ht.........t..hhp......hlLhshPsh...ha.aosasll.llaW...sclha.....p.u.ps........h..t...p..t....l...p.........hhhl.Nsh..lY...h.h....tl..hl...al...h......h..h.hp...t.....................hh.hh..th......h.h....u....sl.....h...h.h.uh....u.FhhYGhpLahh..l.........p............p...........h.........p......t..p.t.....p......p..p...c..h............c..lshlshl....s.hsFhh+shh.......hh......h......s.....h.................t....h...t..........h.....t.....l...h...........hhhahhsEhlPs.h.lLahh............................................................. 0 75 157 185 +6283 PF06455 NADH5_C NADH dehydrogenase subunit 5 C-terminus Moxon SJ anon Pfam-B_3060 (release 10.0) Family This family represents the C-terminal region of several NADH dehydrogenase subunit 5 proteins and is found in conjunction with Pfam:PF00361 and Pfam:PF00662. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.01 0.71 -4.90 78 9824 2009-01-15 18:05:59 2003-07-09 12:48:11 6 10 5550 0 166 9403 1036 165.20 32 35.29 CHANGED YShRllaashhGpsphsshsslNEss.hllsslhtLuhGSIhuGhhlohhlhP..psshhshPhhlKhhALhVollGllhuhplsshohp.hp.s......shppF.shhhFhP.lspphhsphsLhhGpplspp.hDpuWhEhhGspGlhph.hshophhpshQpu..lKhYLhhFllslllhhhhh ...........................................................................YShRlh...a..ashhG.p.....p...a.....o....l..p...s....l..N...-.p....s....h.h..lp..sh.htLhhhSIh.............uG...........................h...........l...............s..................................I...............h.................P......p................s......................h............h................s...................h.................P............h............................L...K......h......h.AL....h..V....o....l....l.G.h..h.l......u.............h.......-...........l...s....p...h....o.......................p.....h....h.....................p.....p.................t..................s...............h.hF..shh....hahPsl.ph.hhshh.s.Lhh.u..plhpp..hD.sW...E.h..h.Gsp.u.h..ph..h..p..o.hh.hp...Q.p.s..hK...h..Y...hh.hhlhhhhhhh........................................... 0 50 91 119 +6284 PF06456 Arfaptin Arfaptin-like domain Finn RD anon Pfam-B_5314 (release 7.5) Domain Arfaptin interacts with ARF1, a small GTPase involved in vesicle budding at the Golgi complex and immature secretory granules. The structure of arfaptin shows that upon binding to a small GTPase, arfaptin forms an elongated, crescent-shaped dimer of three-helix coiled-coils [1]. The N-terminal region of ICA69 is similar to arfaptin [2]. 32.00 32.00 33.60 33.40 31.20 31.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.31 0.70 -5.01 11 535 2012-10-03 12:17:00 2003-07-09 12:49:09 8 12 97 10 285 472 0 209.60 33 55.43 CHANGED lhpKlc.......ppa+po+QlhpcphG+tpcp+h.ssDsEL-splElL+sspcpYtsllchscshpptlhplspsp+tLGchFppluh..Kspphscthstsu-sh+hluKpt.sLhsslphhlsclsTahs+sIsDThhTlcphEssRhEYcuhphclK-hspELsPpsstphshaRpsQsph.........ppsKc+a-KL+sDVh.Kl-LL-ps+spslsppLttapsslutaappsuctL ..........................................p+hp.......ppahpT+Qhh.cphG+tpc......ssD..s-.L-sp..lE.........lh+sspcphpsllch..sct...hppp.hhplsppp.ptLGchhpshu....co.thtcthshsucs.phhs+p.t...sLh.ss.lshhhp.slsThhp+sIpDThhTlpphEp..s.RhEYc.uhhh.hc-.h...s...hsPps........t..tp..h.+h..ctsQtph.........................pt.t+.p.pa-Kh+tDVh.KlchLtts+sphhpppLhha...p.ssl.taattstp.h...................................... 0 83 103 191 +6285 PF06457 Ectatomin Ectatomin Finn RD anon Pfam-B_63420 (release 9.0) Domain Ectatomin is a toxic component from the Ectatomma tuberculatum ant venom. It is comprised of two subunits, A and B, which are homologous. The structure of ectatomin reveals that each subunit is comprised of two helices and a connecting hinge region, the forms a hairpin structure that is stabilised by disulphide bridges. The two hinges are connected by a disulphide bond [1]. 25.00 25.00 82.10 82.00 18.30 17.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.84 0.72 -4.25 2 2 2009-09-11 06:57:38 2003-07-09 13:43:24 6 1 1 2 0 2 0 34.00 47 95.77 CHANGED hsphlh.TlCPTlcshAKKCpGsIAThIK+cCsK hsphlh.TlCPTlcshAKKCpGsIAThIK+cCsK 0 0 0 0 +6286 PF06458 MucBP DUF1085; MucBP domain Galperin M, Moxon SJ, Bateman A anon Pfam-B_4243 (release 10.0) & Galperin M Domain The MucBP (MUCin-Binding Protein) domain is found in a wide variety of bacterial proteins. The domain is found in bacterial peptidoglycan bound proteins and is often found in conjunction with Pfam:PF00746 and Pfam:PF00560. 22.60 22.60 22.60 22.60 22.40 22.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.82 0.72 -3.39 115 1918 2012-10-02 15:23:12 2003-07-09 13:51:03 7 136 317 12 279 1856 8 103.00 26 27.90 CHANGED hspssshTlpYlct.s......Gspls.ss...........................-hlT..G..hh...........................................scsa...ssss...sI..sGY...............................sht..sh..sss.ss.hshssss.sV ..................................................h.sKslTRTIpYhts.s..............Gpp.....s......t....t.....s.....h.....s.Q.s..l........sao...................Ro.............ssh.DpVT........Gplsh..............................ssWss...............tsssa....stl...ssP......sl......sGY................ssshsssstpsV..................................ss.ssp...sh....phsVs..Yptpst...................................................... 0 49 98 163 +6287 PF06459 RR_TM4-6 Ryanodine Receptor TM 4-6 Yeats C anon ADDA_12111 Family This region covers TM regions 4-6 of the ryanodine receptor 1 family. 25.10 25.10 26.30 26.30 25.00 24.70 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.99 0.70 -4.50 21 301 2009-01-15 18:05:59 2003-07-09 14:40:33 7 42 92 0 144 243 0 247.80 36 6.13 CHANGED sMPDPTQstl+G.....-h.ptccsth.ptssstp.h.......ustcEschhs-.....hhGlthcKEGu..h...hssssGLuDhuph.s.tssospsssht+p.........................ttttpsps-sphsDhcsGEK.....pp.sc.ppp...ppsh.t.httppp+spctcc..thhhphatt.............lphhppKhlNYLARNFYNLRaLALFlAFAINFILLFYKVosp.ss---c.t.........................................tsthss..----..-csh.hahLp......EooGYM.tPsLphLAllHTlISFhClIGYYCLKVPLVI ...............................................................................................................................hP-PTt.tl+s...........ph.pt...t...t..h..pt....t.t.h.......t.tt-t-hhs-.........hhGht.......c+EGu..p......htspsGLuDh...p..s...t.s..s..ttsshtct.........................................................t.ttptc..ps..cscc....A..-hEsGEK......ttt..ctp..t.............sp..p.p....tptpsp+h....tcs..ps.h.sphatt..................lp.happK..hLs..YLARNFYNLRaLALFlAFAINFILLFYK.Vosp.sstc.tpt.t............................................s.sshsp...-c--...--shlaahLp......ESo.GYM.pPsLRhLAllHTlISFhClIGYYCLK.....VPLVI.................. 0 20 31 85 +6288 PF06460 NSP13 Coronavirus NSP13 Yeats C anon ADDA_12678 Family This family covers the NSP13 region of the coronavirus polyprotein. This protein has the predicted function of an mRNA cap-1 methyltransferase function ([1]). 20.10 20.10 20.10 20.30 20.00 19.90 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.66 0.70 -5.55 8 426 2012-10-10 17:06:42 2003-07-09 15:53:49 7 37 250 4 0 450 56 280.30 61 5.15 CHANGED uu-WpsGYuMPsLYKhQchsLE+CNLaNYGtslsLPsGIMhNVAKYTQLCQYLNoTTLsVP+NMRVLHLGAGSDKGVAPGoAVLRpWL..........PpssILVDNDlsDYVSDAchSlhGDCsTlhh-sKaDLlISDMY....DsppKslst-NsSK-GFFoYl.suhI+-KLALGGSlAIKlTEaSW.NtcLYcLhp+FuaWThFCTuVNsSSSEuFLIGlNYLGc.ss+spIDGssMHANYlFWRNoshhphSt.SlhDhSKFshKhpuTsVVsLKpppls-hVhuLlcpGKLLlRsssphlhhusphVsss ....................................................ussWpPGYuMPsLYKhQphsLEhCsL.NYGtslsLPsGIMMNVAKYTQLCQYL.NTs.TLsVPaNMRVlHhGAGSDKGVAPGo..sVL+QWL............PsGolL.VDNDlsDaVSDAcsolhGDCsolhhpsKaDLlISD.MY....DspTK.plsttNsS.K-GFFTYl.sshI+pKLALGGSlAlKIT...EaSW.NtcL.Y.cLht.c.FuaWThFC.T.sV.N.AS...SS.EuFLIGlNYLGc..sK.pIDGpsMHANYIFWRNoshhphSuYSlFDhuKFs.LKL+uTsVlsLK-sQlNDhVhuLLc+G+LLlR-ss+hlssuD.LVN........................................... 1 0 0 0 +6289 PF06461 DUF1086 Domain of Unknown Function (DUF1086) Yeats C anon ADDA_2403 Domain This family consists of several eukaryotic domains of unknown function which are present in chromodomain helicase DNA binding proteins. This domain is often found in conjunction with Pfam:PF00176, Pfam:PF00271, Pfam:PF06465, Pfam:PF00385 and Pfam:PF00628. 25.00 25.00 30.70 29.20 19.80 19.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.89 0.71 -4.41 4 315 2009-01-15 18:05:59 2003-07-09 16:22:00 6 49 98 0 143 249 0 148.60 65 9.03 CHANGED .ssup........pctspsspRP.R++tR-sp-c..sPLhtt.GtplcVLGFNtpQRchFlpslMRaGh...s.hp.p..V.cL+tKo.c.FKtYu.LFh+HlsEsssDNSsoF........uDGVP+EGLsppcVLsRIulM.Ll+cKVQ.hEcasGc.shPphh. .........tsssE-s--p.......cs..t..sRR..sR+.p..L.R..s-+DKP...LPPLLARVGG.NIE.VLGFNsRQRKAFLNAlMRaGMP..............PQDAF.soQWL......VRDLRGKSEKEF.K..AYVSLFMRHLCEPG...A...DGuETF........ADGVPRE....GLSRQ.HVLTRIGVMSLlRKKVQEFEHlNGcaShP-L..h................ 0 29 51 89 +6290 PF06462 Hyd_WA Propeller Yeats C anon ADDA_3361 Family Probable beta-propeller. 20.70 20.70 20.70 20.90 20.60 20.30 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.05 0.72 -7.38 0.72 -4.30 73 1060 2009-01-15 18:05:59 2003-07-10 09:19:19 7 43 91 0 606 1024 6 32.70 33 16.01 CHANGED stVWAls.pc.........GplhhRp.......GlopssPpGss..Wppls ........tVWAls.pp........................GplhhRp.......Glos.p.s.PpGss..Wppl........ 0 133 188 369 +6291 PF06463 Mob_synth_C Molybdenum Cofactor Synthesis C Yeats C anon ADDA_4938 Domain This region contains two iron-sulphur (3Fe-4S) binding sites. Mutations in this region of Swiss:O14940 cause MOCOD (Molybdenum Co-Factor Deficiency) type A. 24.00 24.00 24.60 24.10 23.60 23.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.86 0.71 -4.45 66 3853 2009-01-15 18:05:59 2003-07-10 14:48:39 8 19 3317 8 1051 2921 1184 127.60 32 37.19 CHANGED hplRFIEhMshGpspph......cphlshpplhstlpppath..hsst.tps...susAchapl...........ssst.up.....................lGhIsshops.FCssCNRlRLTu-GpL+sCLatpps.h-LRshL+sss.stt....LtphlppultpKtttath ..............................................plRFIEhMshG.p......s.s.....th...tt...........pphlotpplh..sp.l...p.p...c.h...l......tt.hp...tpp.............suPAphaph................s.sht...uc.............................lGlIsshocs.FCuoCNRlRlou-GpLh.hC......LFupp....s..hsLRshL....Rss.......s....pppt.............Ltpt.lpps.l.ppK.ppa..h.............................................. 0 333 659 885 +6292 PF06464 DMAP_binding DMAP1-binding Domain Yeats C anon ADDA_4672 Domain This domain binds DMAP1, a transcriptional co-repressor. 27.00 27.00 27.00 27.70 26.80 26.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.26 0.72 -3.47 20 421 2009-01-15 18:05:59 2003-07-10 16:02:57 6 20 154 0 223 376 0 97.80 28 7.19 CHANGED ssssLPt-VRc+Lp-L-h-hspGslTpKGYpKK+s+LLppFL.................................tscsppphtsppsth+cc.chpp-lapps.VpAhLsKptppchuhshsscccssh ...................s.......clpt.pLtpL-.-.hp.pGDITpKGYpK++spLLt...tal......................................................................t........t...........tst.......p.ph.phtst.psp..php..........pcha.htt.lpuhls+...p.c.........p................................................................................................... 0 35 62 128 +6293 PF06465 DUF1087 Domain of Unknown Function (DUF1087) Yeats C, Sammut SJ anon ADDA_2403 Domain Members of this family are found in various chromatin remodelling factors and transposases. Their exact function is, as yet, unknown. 20.80 20.80 20.80 21.10 20.40 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.18 0.72 -4.05 14 335 2009-01-15 18:05:59 2003-07-10 16:07:05 8 54 109 0 159 267 0 64.50 60 3.87 CHANGED pthttcp-ps.s-shtt..ps.cssaW.cLL+++YEpppt-ctppLGKtKRsRKQVsYs-t.sslps .....................c..Etcc.EElEREIIKQE..E.s..V.D...PDYWEKLL...........RHHYE..QQQEDLARsLG.KGKRlRKQV..N...YNDu.uQED............ 0 34 65 106 +6294 PF06466 PCAF_N PCAF (P300/CBP-associated factor) N-terminal domain Yeats C anon ADDA_4771 Domain This region is spliced out of Swiss:Q92830 isoform 2. It is predicted to be of a mixed alpha/beta fold - though predominantly helical. 19.70 19.70 43.40 23.00 19.20 18.00 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.75 0.70 -5.05 3 179 2009-09-11 14:37:09 2003-07-14 11:28:16 6 8 84 0 111 162 0 215.60 57 31.42 CHANGED RIuQRKAQVRuLPRAKKLEKLGVYSACKAEEoCKCNGWKNPpPsss.PRtDLQQs.sssLoEpCR..SCcHuLAuHVSHLENVSE-EMNRLLGhVlDVENLFMSVHKEEDsDTKQVYFYLFKLLRKCILQRs+PVVEGSLG.cPPFEKPsIEQGVLNFVQYKFSHLuo+ERQTMhELAKMFLNpLNYW+LEoPSQRRtRSssEDlSsYKlNYTRWLCYCHVPQFCDSLPRYETT+VFGRTLLRSVFTlsRRQLLEKs ...........................t.s..pKl.KLuhaSuCp..upp.CKCsGWK....sPpssst.....s+..............hDlp..p....h.....sshs-.CR..oCpHsL.s.sHluHL.-slSE-EhNRLLGhVlDVEpLFhsVH...KEE..D.s.D.TKQVYFYLF.K.LLRKsILp.hs+PVlEGsLt....pPPFE+PsIp..p..uV.NFV.YKFSHL.s.s+EpQTh.hELuKMFL.plNaW+LEsPoph...RtR..s...s.-.Dh....usYKlNYTRWLCYCpVPphCDSLP+YETopVFGRoLL+SlFsshRRQLL-p........................... 3 27 34 74 +6295 PF06467 zf-FCS zf_MYM; zf-MYM; MYM-type Zinc finger with FCS sequence motif Yeats C anon ADDA_4806, Iyer L Domain MYM-type zinc fingers were identified in MYM family proteins [1]. Human protein Swiss:Q14202 is involved in a chromosomal translocation and may be responsible for X-linked retardation in XQ13.1 [2]. Swiss:Q9UBW7 is also involved in disease. In myeloproliferative disorders it is fused to FGF receptor 1 [3]; in atypical myeloproliferative disorders it is rearranged [4]. Members of the family generally are involved in development. This Zn-finger domain functions as a transcriptional trans-activator of late vaccinia viral genes, and orthologues are also found in all nucleocytoplasmic large DNA viruses, NCLDV. This domain is also found fused to the C termini of recombinases from certain prokaryotic transposons [5]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.75 0.72 -4.32 56 1780 2012-10-03 05:12:49 2003-07-14 11:43:49 9 35 200 2 719 1712 47 41.30 25 16.45 CHANGED ttssspspCphCpp.h..stpt......hphp......uphppFCSpsChspa ..........t...hhthpCshCpp.h.tptpp.........hphp......GphcpFCSpsChspa...... 0 60 95 259 +6296 PF06468 Spond_N Spondin_N Yeats C anon ADDA_5023 Family This conserved region is found at the in the N-terminal half of several Spondin proteins. Spondins are involved in patterning axonal growth trajectory through either inhibiting or promoting adhesion of embryonic nerve cells ([1]). 20.90 20.90 20.90 21.60 20.60 20.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.17 0.71 -4.62 17 420 2009-01-15 18:05:59 2003-07-14 12:07:36 8 35 159 3 258 396 23 167.40 30 35.50 CHANGED tA+YclsFpGhWSppoHPKcaPhh..ss+aSsllGuoHossYphWp.GphASsGl+phAEtGsshtL-pElct..tup+l.....................................polhpshu...hsuh.sssGpssuphcVDs.pHphlShlshlsPSPDWhVGVsul-LCpss.sWh-phsl-LaPaDAGTDSGhTapSsNtsTlPt-hlppITop.PscPtuPFYsPcupphsP .....................................Yph.h.t.Ws.t.aspp.aP.................spaSsll....G.suHsss.a.p.haphG.........ph..A.SsGl+phAEtGsst.tLt...p.-hpt..........t.sp..t..h.........................................................olhpshu............t.h....s.s.s.u..p....s...ps..ph...p.l-p...p+.ph.lShh...shlsPSPDWhV...G...l...s...u...hcLCp....ts..sW..hpp.hs...hsL...hP..aDAGTDuG.o....a...uss..p.P.t.h....hp..s.....p..ssah....................................................... 0 79 111 198 +6297 PF06469 DUF1088 Domain of Unknown Function (DUF1088) Yeats C anon ADDA_5036 Family This family is found in the neurobeachins. The function of this region is not known. 20.80 20.80 20.80 20.80 20.70 19.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.18 0.71 -4.84 6 183 2009-01-15 18:05:59 2003-07-14 13:31:26 6 14 83 0 96 151 0 165.00 61 7.11 CHANGED EGRLLuHAMKDHlVRVANEAEFILNRQRAEDVHKHA-FESpCAQYsADRREEE+MCDHLIsAAKaRDHVTAsQLlQKIlNILTsKHGAWGs.u.sSpsp-FWRLDYWEDDLRRRRRFVRNPaGSoHsEATLKAuhEa...........sssE-plhph+cshpSQshsspN..scsEL.....lL-uD ..............EGRLLsHAMKDHlVRVANEAEFILNRQRAEDVHKHAEFE..............Sp..CAQYuAD+REEEK..MCDHLIoA....AK+RDHVTAsQ.Lh.QK........IlNILTNKHGAW............G...s...........s......u......t.............S..........p..........h.....+-FWRLDYWEDDLRRRRRFVRNPhGSTHsEAoLKu.AlEa................................uss.E.-tl.hKuKpsh+SQs..lssQN..sEsElhL-u............................................ 0 26 35 61 +6298 PF06470 SMC_hinge SMC proteins Flexible Hinge Domain Yeats C anon ADDA_5420 Domain This family represents the hinge region of the SMC (Structural Maintenance of Chromosomes) family of proteins. The hinge region is responsible for formation of the DNA interacting dimer. It is also possible that the precise structure of it is an essential determinant of the specificity of the DNA-protein interaction ([1]). 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.20 0.71 -4.14 102 3948 2009-01-15 18:05:59 2003-07-14 13:45:41 8 39 2700 16 1605 3541 451 116.80 25 9.96 CHANGED hpu.lhGhls-llpl..sppac..t.AlpssLGsplp.sllVcstpsApphlphlc.pt........p.....hG+s.shlsLsplpspthp......................................sst.llphlph.s...phptsl..phllussllscsl-pA..tpls .................................................h..GlhGtlu-Llpl...cp...cap....t..AlEsuLG.s..........s.hp.pll.V..-..s..p..c..s..Appslph.L+.pp................................p......hGRs..TFl...PL.s..p..l.psps.h.s..tt.tt.........................................................................shssh.hh.-.ll.p....h..-.........tphps.sh....p....lLG.ssllscslcpAppl.............................................................................. 0 578 968 1353 +6299 PF06471 NSP11 NSP11 Yeats C anon ADDA_6050 Family This region of coronavirus polyproteins encodes the NSP11 protein. 25.00 25.00 37.30 36.70 18.30 17.80 hmmbuild -o /dev/null HMM SEED 594 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.02 0.70 -6.32 11 410 2009-01-15 18:05:59 2003-07-14 13:59:22 7 36 245 13 0 420 0 562.60 60 10.13 CHANGED scLQu..p..ssGLFKDCuKs.pslpPAaAsTalSlsDcaKss-sLAVplss..sssloYp+lIShMGF+hDlslsGYpsLFlTRDtAlRpVRuWlGFDVEGAHAstsNlGTNlPLQlGFSTGVsFVVpPpGhlsTcpGsshcsVsAKAPPGEQF+HLlPLM+KGpPWsVVR+RIVQMluDhLssLSDhllFVhWAtGhELTThRYFVKlGhEppCp.Cu+RATCYsSssss...YuCa...+HulGsDYVYNPahlDIQQWGYsGsLosNHDthCsVH+sAHVASuDAIMTRCLAIHDCFsKsV-WslpYPhIuNEppINpuCRhVQphll+AAlpsh+ssslaDIGNPKGI+CVsp.-scWpaYDppPlsp...sVKpLcYsYtsHtp.FtDGLChFWNCNVDpYPpNulVCRFDTRshSpLNL.GCNGGSLYVNKHAFHTPAaD+pAFt+LKPhPFFYYsDosCEshp.......cpV..sYVPL+ossCITRCNlGGAVCpKHAs.Y+pYlEuYNhhssAGFolWVs+sFDsYNLWpTFop...LQSLENlAYNVVKpGpFsGlsGELPVAIlNDKVal+ssssDshlFsNpToLPTNVAFELaAKRplphpPsls ........t.LQs.....psTGLFKDCSKphs.GlHPuaAsTahulsspaKss-s.LAVslss.hspshTYp+LIShMGFKhsh.....slsGYpshFITR-EAIRpVRuWlGFDVEGsHAst-slGTNlPLQlGFSTGlsFVVpspGhVsTcpGspFphVsAKuPPGEQFpHLIPLMp+GpPWpVVRhRIVQMluDpLpsLSDpVVFVhWAtG.hELTohRYFVKIG.EpsCs.Cs+RATCasSposs...YuC..W...+H....ul...G...hDYlYNPhhlDlQQWGYoGsLp.NHD.aCsVHtsAHVASuDAIMTRCLAla-CFsKcVsWslpYPhIuNE.plNsuCRhlQ+hhl+AAlhs.+hsllaDIGNPKuI+CVtp.-lpa+FYDtpPlss...sV+pL.YsYtsH+DpFtDGLChFWNCNVDpYPsNulVCRFDTRsLSsLNLPGCNGGSLYVNKHAFHTPsFD+sAFppLKshPFFYYsDSPCEhhs......sspl..DYVPL+SssCITRCNlGGAVC+KHAp.YRpYl-uYNhhsoAGFolWlh+sFDsYNLWpTFop...LQSLENVAYNlVppGHFsGhsGEhPsuIls-+Vhs+ssshDstIFpNpToLPTNVAFELaAKRsI+.hPpl...... 0 0 0 0 +6300 PF06472 ABC_membrane_2 Ald_N; ABC transporter transmembrane region 2 Yeats C anon ADDA_6479 Family This domain covers the transmembrane of a small family of ABC transporters and shares sequence similarity with Pfam:PF00664. Mutations in this domain in Swiss:P28288 are believed responsible for Zellweger Syndrome-2 [1]; mutations in Swiss:P33897 are responsible for recessive X-linked adrenoleukodystrophy [2]. A Saccharomyces cerevisiae homolog is involved in the import of long-chain fatty acids [3]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.51 0.70 -5.28 26 2221 2012-10-02 13:23:42 2003-07-14 14:08:31 10 15 1284 0 895 2380 1271 271.60 24 45.53 CHANGED hphsssFhpplhpLl+lhl.PphhspcshhLhslshhLlhRThlslhluphsuplhpsllp..............pshctFhhtlh.pahhlulssohlsshlcYhpscLtLpaRppLocalaspYLpsp.saY+huslDs....plsNsDQplTpDlppFs...........s.ssusLaoslsKPllDlhlasapL....hpssGttus..............hhlhsYlhhuus.ll+tlssPhucLssccp+hEGcaRahHsRLlsNuEEIAFYpGpchE+pplpppapsLlpphptllph+hhhshh-shlhKYhhsslGallsulPhF .................................................h........phh..l.hp.hh..s..p.t..hpt.............hh.h.....l..............h..h..l..hh..h........h.l...h..ts.h........l.s....lhls..p.hs.....s.phh...s.u.L....p....................................................................p.sh..ptFh..p..hlh....h............a.......h...........h.............lhh...hh.....s..h.h.s................s.....h..............h.......pa........l.p....p.h.L....tl.pWR....phLTcth...hsp.ah...............s............s....p...s........YY+l..p...h..hsp.........................t.h.-.N.P................DQ....RIspDlpths............s.sshsLh........h........s...l.lpsllsll...hFshh.L....aph..uG.shsh.......................................hhhh.l...ashhu..sh...l.h.......p............h.lu..p...................lhpLshppp............+hEusa.........R...hshs+lpcp.......uEpIAh........ap..G...c...p.h...............Ec...p...............pl...pppFps.lhpsh.....pp.......l.......hptp..hh.......h.......shhpshh.....phhhshhshllhus.h................................................................................... 0 255 477 704 +6301 PF06473 FGF-BP1 FGF binding protein 1 (FGF-BP1) Moxon SJ anon Pfam-B_14221 (release 10.0) Family This family consists of several mammalian FGF binding protein 1. Fibroblast growth factors (FGFs) play important roles during fetal and embryonic development [1]. Fibroblast growth factor-binding protein (FGF-BP) 1 is a secreted protein that can bind fibroblast growth factors (FGFs) 1 and 2 [2]. 25.00 25.00 41.80 25.60 20.00 21.60 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.86 0.70 -4.76 9 118 2009-01-15 18:05:59 2003-07-14 14:09:56 7 2 40 0 70 108 0 192.60 24 96.26 CHANGED lphLohL.LLlsthhhspstK..psKstptussppt.p...s...............pG+FsTK-pssCoWtl...hpts.pslsL+VcCpptsps.....asCtasGpPppC.tapscsptYWKQlsppLR+p+phCpssp.lLKoRlC+Kssspuph.................+hlopsh.ssptscpchppssspcpstsptpss.cpp........t.ttshssphPch.s.....psss.tppp+hAhEaCsEoWpSLCsFFlshhpG ..........................................shl.hLhs.hhhsthtp......t.cttttpt....stt..............................pG+F.op.c....p.t....sCshth.............pt.t.tthpLplpCp....p.....spp.......a.C.atGpPp.C.tatsp.phYW+QlhttL++.+.phCpsst.sL+splCp.+.ts...puph...................phhspsh.st...t....sppp.......tt...t...st.....t...........t................t.............t...t...hh.s.......s.ph...............pss.s...p..ptchs.paChcpapSlCsFFlshhps............................... 0 3 11 31 +6302 PF06474 MLTD_N MltD lipid attachment motif Yeats C, Bateman A anon ADDA_7289 Motif This short motif is a lipid attachment site. 25.00 25.00 25.10 25.30 23.40 24.20 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.45 0.72 -3.76 11 113 2012-10-01 23:27:00 2003-07-14 14:17:28 7 3 100 0 15 67 2 26.30 54 5.12 CHANGED Mp...................hsphuhlhh.sLLsGCQo ......................htVphShVhA.hLLVGCQS 0 3 3 9 +6303 PF06475 Glycolipid_bind DUF1089; Putative glycolipid-binding Moxon SJ anon Pfam-B_14397 (release 10.0) Family This family has a novel fold known as a spiral beta-roll, consisting of a 15-stranded beta sheet wrapped around a single alpha helix. It forms dimers. It has some structural similarity to the E. coli lipoprotein localisation factors LolA, Swiss:P61316 and LolB, Swiss:P61320. Its structure suggests that it may have a role in glycolipid binding. Its genomic context supports a role in glycolipid metabolism [1]. 25.00 25.00 28.10 26.80 22.00 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.00 0.71 -5.13 30 291 2009-01-15 18:05:59 2003-07-14 14:32:26 6 2 259 2 90 250 11 175.50 34 93.19 CHANGED tslpWcsh...-ssGhEplpl....ppsusultssuhlh.sppsups.hulpYclpsDssWpo+phplsshhutt.tplpltp-tcGpWhh.sGp...shssl-GslDlDluhoPFTNTLPIRRLsLst..GpstplsssalphPshp.lshspQpYopl..ss....phY+Ycusstu.......AclsVDccGhVlDYPsLacRl .........................t..lpWpsh....-.hsthEplpl....ph.ssptlpsputll..utpssps.auhpYclt.sDtshts+chslpshhutt..ppLpltpD..tcG...t.....Whs..tssp...............thsshs.GslDlDlshoPFsNsLPIRRLGLtp......ucst..slsVlYVslP.....-.....hs.VssspQsYosh.ss..............pth+acossss.......sslsVDs-GhVlDYPsLhcRh........ 0 24 50 68 +6304 PF06476 DUF1090 Protein of unknown function (DUF1090) Moxon SJ anon Pfam-B_14862 (release 10.0) Family This family consists of several bacterial proteins of unknown function and is known as YqjC in E. coli. 21.10 21.10 21.10 21.20 21.00 20.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.50 0.71 -4.42 40 769 2009-09-11 09:53:20 2003-07-14 14:36:26 7 2 736 0 83 307 8 111.90 51 89.98 CHANGED slsshlsssshus...ttt..hsGCssKtpsIppQIphA+taGNpp+lsGLcpALppVpscCoDsuLtp-+ppKltc.tcpcVsERpp-LpcAppc.......Gcs.-KIsK+pcK..LuEAppEL...pcA ..............lu..AlsLhsl.S.uuohA.....sohCQcKEQsIp.KEISYAcKHpNQsRI-GLpKALSEVRANCoD...opL+A-HpKKIA.......c.pK-.......EVAERQpDLsEAKpK.......GDA.DKIsKRc+K..LAEAQ-ELKK.l...................................... 0 4 19 50 +6305 PF06477 DUF1091 Protein of unknown function (DUF1091) Moxon SJ, Bateman A anon Pfam-B_14929 (release 10.0) Domain This is a family of uncharacterised proteins. Based on its distant similarity to Pfam:PF02221 and conserved pattern of cysteine residues it is possible that these domains are also lipid binding. 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.23 0.72 -3.55 534 1212 2012-10-01 19:31:57 2003-07-14 14:38:56 8 13 19 0 714 1209 0 83.10 21 48.20 CHANGED lphplhp+t.....ssacs.h.lashs.hDhCca...l...................p........hhphhaphhp..phS.Nh.scs....CPa...............stp....h..hlc.s.......hhhst......phlP.........l..P.pGp..Yhl .....................................hhhp........ssac.h..las.hs.h-hCch...l..pp.....p.........p...s......hhphh..aphhp.....ph.o..N....h..scs.......CPh.........................pss..h....hlc.s.....hhhst..................phlP....h.PpGpYhh.................................... 0 114 175 487 +6306 PF06478 Corona_RPol_N Coronavirus RPol N-terminus Yeats C anon ADDA_7507 Family This family covers the N-terminal region of the coronavirus RNA-directed RNA Polymerase. 25.00 25.00 84.40 84.40 18.70 18.00 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.25 0.70 -5.76 11 440 2009-01-15 18:05:59 2003-07-14 15:04:52 8 32 218 0 0 416 0 326.60 64 6.25 CHANGED G.SSAARLEPC.NGT-sDhVhRAFDI....NKcVAslGKaLKsNCsRFppl.Dcc.......DuaFVVKRsTcSsh-HEQohYshLKsCsAVAcHDFFsa+cs+shhsNluRpcLTKYTMMDLsYALRpFDEpNC-lLKEILVhhGsCcpsY...F..-sKsWYDPVEN.DIaRVYApLGplVspAhLKsVtFCDsMVcpGlVGVLTLDNQDLNGsFYDFGDFlpshPGhGVPlssSYYSYMMPlhuMTNCLAuEsFhcuDl.upsaKsaDLLcYDFTEaKhsLFsKYFKYWs.pYHPNCsDCtDDpCllHCANFNsLFSTTIPsTuFGPLsRKlFlDGVPhVsTsGYHFKpLGlVaNpDVshHssRLS .............SusARL.PCusGsssDVshRAFDIhN..ppsAGhupaLKsNCsRFQc......l....Dc.....c.....ss.L...DuaFVVKRsThosYp+EpshYphl....K........s........s........ssVApHDFFpFchstshlspIsRpcLTKYTMhDLsYALRHFDcpsC-sLKEILVpYus...Cc-sY....F.ppKDWYD.VENPcIhpVYt+LG.lVppALLpsVpFsDshV-tGlVGVLTLDNQDLNGpaYDFGDFlpsAPGsGVslsDSYYSYhMPllsMTcsLssEpahc.Dl.tpsa+paDLLpYDFT-cKhpLFsKYFKYWs.psYHPNCh-C.DDRCIlHCANFNlLFShllPsTuFGPLVRKlFVDGVPFVVosGYHaKELGVVhN.DVshHp.RLS.. 0 0 0 0 +6307 PF06479 Ribonuc_2-5A Ribonuclease 2-5A Yeats C anon ADDA_8069 Family This domain is a endoribonuclease [1]. Specifically it cleaves an intron from Hac1 mRNA in humans, which causes it to be much more efficiently translated. 22.50 22.50 22.50 22.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.90 0.71 -4.29 32 447 2009-01-15 18:05:59 2003-07-14 15:15:02 7 36 256 47 292 416 4 126.20 36 13.94 CHANGED c+LpFLpDVSD+hEhcsRcs.SshLphLEs.sutpVl..p.scWpp+lspshhssLs+a.R+..............YpssoltDLLRslRNKpcHYcEh.sccl+chlGslP-sahpYFspRFPcLLltsYpslt...phppcchFppYaps ...................................................................pcLpFlp.DVSD+hEhE.s...p................c..............u....s.......l.l.p...tLEp..s.......uptVl.......t..scWpppl.sts.l.h..s.sL.t....+.a...Rp.........................Ypuso.ltDLLRAlRNKpcHYcEl....P.........tcl..........p.p..............t............l...........Gs...l....P-..s.......a..hp...........YFssRFPpLLhp.sYpshp....thpp.-phFp.Ya................................ 0 100 156 224 +6308 PF06480 FtsH_ext FtsH Extracellular Yeats C anon ADDA_8169 Family This domain is found in the FtsH family of proteins. FtsH is the only membrane-bound ATP-dependent protease universally conserved in prokaryotes ([1]). It only efficiently degrades proteins that have a low thermodynamic stability - e.g. it lacks robust unfoldase activity. This feature may be key and implies that this could be a criterion for degrading a protein. In Oenococcus oeni FtsH is involved in protection against environmental stress ([2]), and shows increased expression under heat or osmotic stress. These two lines of evidence suggest that it is a fundamental prokaryotic self-protection mechanism that checks if proteins are correctly folded (personal obs: Yeats C). The precise function of this N-terminal region is unclear. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.29 0.72 -3.88 164 4553 2009-01-15 18:05:59 2003-07-14 15:26:04 10 9 3836 1 1188 3236 1943 99.30 18 15.13 CHANGED hhlahllhlllhhlhhhh....................ttt....................pspplsa..opF.........h.phlp...psplppltlp...........tpph....................................................h.s.ttpsspthtshhhss....sth.pp.............ltpthtpt.slphssp.t ..................................................hlalllhlllhslhp.h.................................sssps.....................................sspplsY..SpF....................l.pplp.......pGpV.......c.plplp...........spphs...............................................................................................................s..pppss.......sp.....h.....ps....h...h.s.ss...........t.....pp..............L.sthhpt.slp............................................................................................. 0 356 719 981 +6309 PF06481 COX_ARM COX Aromatic Rich Motif Yeats C anon ADDA_8118 Motif COX2 (Cytochrome O ubiquinol OXidase 2) is a major component of the respiratory complex during vegetative growth. It transfers electrons from a quinol to the binuclear centre of the catalytic subunit 1. The function of this region is not known. 20.90 20.90 21.40 20.90 20.50 20.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.97 0.72 -4.29 98 1360 2009-01-15 18:05:59 2003-07-14 15:39:44 9 4 1276 4 243 775 21 47.10 36 14.66 CHANGED K....so.spsLs.tssYtpLu...pPSEppPVpaauoVpssLFpsllspahtttp ........KtS.spsLs.hssapcLA.........tP..SE.....h.......s....VpYFSsVcPsLFpslls+ahstt...... 0 42 105 169 +6310 PF06482 Endostatin Coll_NC10; Collagenase NC10 and Endostatin Yeats C anon ADDA_828 Domain NC10 stands for Non-helical region 10 and is taken from Swiss:P39059. A mutation in this region in Swiss:P39060 is associated with an increased risk of prostrate cancer. This domain is cleaved from the precursor and forms endostatin. Endostatin is a key tumour suppressor and has been used highly successfully to treat cancer. It is a potent angiogenesis inhibitor ([1]). Endostatin also binds a zinc ion near the N-terminus; this is likely to be of structural rather than functional importance according to ([2]). 23.20 23.20 23.70 23.90 23.10 22.80 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.15 0.70 -5.03 11 375 2009-01-15 18:05:59 2003-07-14 15:52:32 6 57 98 17 154 356 3 193.10 29 28.00 CHANGED GsusGssshpohpsMlspu+phPEGsLlalh-cp-LYlRVRpGa+plhLsshsPlssss................................PPsshhh........................................s...t.hhspP.phsphp...................ts.hhppsPtPssssss...cscpp.tPsLHLlALNsPhSGsMRGIRGADFpCFQQARssGLtGTFRAFLSSRLQDLYSIVRRADRtslPIVNL+D-VLFsSW-ulFoGutu.hpsusRIaSFDGRDVLpDssWPQKhVWHGSsscG+Rhs-oYCEsWRTsspusTG.ASSLt.uG+LL-QputSCpssaIVLCIENSFMTptpK .............................................................................s.............................................................................................................................................................................................................................................................L+hhALNtP..s.G.sh......p.............AD....h.CapQucthGh..hs....T.a+AFLSS......+lQsL.plVp.s-R..thPllNh+splLF.sWpshFs.s.p.......t.u..h.....p..t..laSFsG+slhtcstWP.K...lWHGS.s...tG...R........ppaCcsW+stt....h.u.uu.l.............s............t..hl.t.Q..p.........t..CttthhlLClE............................................................. 0 36 49 101 +6311 PF06483 ChiC Chitinase C Yeats C anon ADDA_8334 Family This ~170 aa region is found at the C-terminus of Pfam:PF00704. 20.50 20.50 22.50 20.50 18.90 20.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.05 0.71 -4.54 20 239 2009-01-15 18:05:59 2003-07-14 16:00:39 6 10 178 0 35 173 3 178.60 51 18.04 CHANGED EYhMGsThTohhY-KFssA.oPYG.sp.tushshPspulDlslshosFtlGDsNYPIsPclplTNNSssslPGGochpFDlsTSsssphpD....QSGhGlpVlsSGps.sGsNlGGL-s-FHRVuhoL.....PuWpoLAPGuoh-lshsYYLPlSs.PSNasVshsGppYulpt-aPpLP...hss.suuGs......G ..EYthGsTMTphhY-KFpsA.oPYG.s+...hussshPspslDlsVslsuFplGDpN.YPINPKlsFTNNoslsIPGG..ocFpFDlPsSusDssKD....QSGuGLKVIsSGHT.pusNlG.GLcGshHRVAhoL.....PuWcoLPAGuoY-lDMVYYLPlSG.PuNaoVplssppYuhph-..PsLPss-.hsssGss.ss........... 1 10 16 28 +6312 PF06484 Ten_N Teneurin Intracellular Region Yeats C anon ADDA_8545 Family This family is found in the intracellular N-terminal region of the Teneurin family of proteins. These proteins are 'pair-rule' genes and are involved in tissue patterning, specifically probably neural patterning. The intracellular domain is cleaved in response to homophilic interaction of the extracellular domain, and translocates to the nucleus. Here it probably carries out to some transcriptional regulatory activity ([1]). The length of this region and the conservation suggests that there may be two structural domains here (personal obs:C Yeats). 20.20 20.20 27.90 38.40 19.40 18.80 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.21 0.70 -5.34 4 409 2009-09-11 06:34:01 2003-07-14 16:08:25 7 28 37 0 154 368 0 172.60 36 11.44 CHANGED RSLTp.RpDTE+RYTSSSADSEDuKls..KSYSSSETLKAaDpDSRhsYGsRVKDhVH+EsDEFSRQGssFoL+-LGhGEssPsHhusYRoDMGLPHpsYSlSsuSDADTETDGlMSPEHAVRLWGRS.TKSGRSSCLSSRANSNLTLTDTEHENTEN..........................GPPLHCSSASS.SPl-QhP....PPPS.AANpsQttLLGsS.....uApsupDS-SE-EFuPNSFLVKosSGNlhsPttAsup..ssaQNHSRLRTPPLPLsHsHoPS..HHsASINSLNRuNYTpR.SN...PSPAPTDpSsssEsPsu.Q-SlpsQDNWLLNSNlPLETR.................................HFLFKPG.GTSPLaCTTSPGYPLTSSTVYSPPPRPLPRNTFSRPAFsLKKPYKaCNWK ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................shp.t............p....................st.........s.hppsWlLsSNhsLEo..R..........................................................pFLFK.u.Gooshhssss......sY.shsosoVYosPsR.LPRsohs.R.hFphpKs.+hCsW+................................... 1 7 22 53 +6313 PF06485 DUF1092 Protein of unknown function (DUF1092) Moxon SJ anon Pfam-B_14522 (release 10.0) Family This family consists of several hypothetical proteins of unknown function all from photosynthetic organisms including plants and cyanobacteria. 20.10 20.10 20.50 31.90 18.10 18.00 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.74 0.70 -5.44 37 156 2009-01-15 18:05:59 2003-07-14 16:19:01 6 2 107 0 74 169 167 260.20 36 87.46 CHANGED WELDFYSRPll-pcGKKhWELLIssssps........FcaschCPuscVNShWLpsALpcAltt........uhttPp+lRsaRspMpohlp+AspplGlpshPSRRTauLhcWLpcRpcplYPpp.GY.s..hssss.sh.tssPhPLP-ulpGDp...WsauuLshuslp-ht.-W...slsFss....lhPls....hsLss-........t.lPGlhlFSppRuLsLAuWluGLEPspLphpss......pLlLEuG.sD+Wlluslp.sspspttupsapps+ppupGLpFlulQssPpspsFsGFWlLc- ...........WELDFYSRPllDtpGKKhWELlIC-ssts........hpasphCPssplNShWLppAlppshtp........uh.hPpplRhFRspMpshIp+Aspcl.....ulpshsSRRThuLhpWLp-R.pplYspp.Gapt.....ts.ssls..hsp.ssPhsLP-sLhG-p...Ws...FlsLshuslp...-...h....-h...sls.Fup....hhPls....hsLsss........shIPGlhlaSs.RuhsLAuWhuGLE.stLphpss.............tLlLEsGhs-+Wllushp..sspstt....tApsaEpsKptupGLHFLulQs.sssupsasGFWLLpp........... 0 17 55 70 +6314 PF06486 DUF1093 Protein of unknown function (DUF1093) Moxon SJ anon Pfam-B_15034 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.50 21.50 21.50 21.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.50 0.72 -3.62 59 1369 2009-01-15 18:05:59 2003-07-14 16:22:07 6 2 510 4 84 672 2 79.20 26 65.84 CHANGED sshhttcshYspIsss...ucppspp.............tpYpYphpuaccsGcc+plpa...sus.....+pL+psuYLKlphpspp..........VpsacEVpcc- .............................shhttcshYsplsps........upchsp..............pthpYphsuasccGcccplpasus........ppL+.pssYLKlhh....ps+c............Vpsa-Elpcc.................................. 1 23 44 63 +6315 PF06487 SAP18 Sin3 associated polypeptide p18 (SAP18) Moxon SJ anon Pfam-B_15078 (release 10.0) Family This family consists of several eukaryotic Sin3 associated polypeptide p18 (SAP18) sequences. SAP18 is known to be a component of the Sin3-containing complex which is responsible for the repression of transcription via the modification of histone polypeptides [1]. SAP18 is also present in the ASAP complex which is thought to be involved in the regulation of splicing during the execution of programmed cell death [2]. 20.80 20.80 20.90 21.60 19.50 18.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.62 0.71 -4.31 26 277 2009-01-15 18:05:59 2003-07-14 16:37:32 7 8 228 5 202 279 0 126.60 42 57.63 CHANGED t.t.lDRcpTCPhLLRlF.hppspaasls-as............................splP.tsELQIYTWhssTLRELspLl+-.....s..ssR++GTphsFtllaPDt+p.........upYls+-lGoshs..Gt+ss..................................D-sKTLpst+FpIGDalDluI ...............................s...lDREKTCPh..LLRVF.hp.sGpHHph..s.-Fs.....................................t....uslP.psELQIYTW.......hDuTL+ELosLl+-............shstsR++GT+hsFslVaPDh+p..................stahh+-lGsshs....Gpcss........................................................................D-s+TLtsh+FpIGDYlDlAI.................................. 1 70 107 158 +6316 PF06488 L_lac_phage_MSP Lactococcus lactis bacteriophage major structural protein Moxon SJ anon Pfam-B_13945 (release 10.0) Family This family consists of several Lactococcus lactis bacteriophage major structural proteins. 20.20 20.20 20.20 31.10 19.70 20.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.74 0.70 -5.36 2 78 2009-01-15 18:05:59 2003-07-14 17:14:50 6 6 55 0 7 54 0 214.50 62 72.90 CHANGED MKLDYNSREIFFGNEALIVADMsKGSsGKP.FoNHKIVTGLVSVGSMEDQAETNSYPADDVPDHGVKKGATLLQGEMVFIQTDQALKEDILGQQRTtNGLGWSPTGNWKTKCVQYLIKGRKRDKlTGEFVDGYRVVVYPHLTPTAEATKESETDSVDGVDPIQWTLAVQAT-SDIYLNGDKKVPuIEYEIWGEQAKDFsKKMESGLFIhQPDT.LAGtlTLVAPslsNVpTtTKGNNDGTIVlPsTLKsSKGpsIKVTuVIKDs+GpVATNspLAPsVYIVTFSA-GYtDVpAGVuVTs+s .........................................................................................................................................................................................................................................................................................................LAGsl..TLVAPshs.s.sTTus.KGssDuThslPsTLKDScGusVtVTSVIpsupGpssTN.GpLusGsYhVTaSA-GY-DVTtulsVTD..... 0 2 2 2 +6317 PF06489 Orthopox_A49R Orthopoxvirus A49R protein Moxon SJ anon Pfam-B_14072 (release 10.0) Family This family consists of several Orthopoxvirus A49R proteins. The function of this family is unknown. 25.00 25.00 79.10 78.80 19.20 18.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.21 0.71 -4.26 2 38 2009-09-11 15:19:44 2003-07-14 17:19:49 6 1 18 0 0 29 0 147.10 88 99.93 CHANGED MDEuYYSGNLESVLGYVSDMHTcLASIoQLVIuKIETIsNDILNNsIVNFIMCRSNLNN........lYhh.c.pIY.......................aY+..................................................SpDlpERh. .....................MDEAYYSGNLESVLGYVSDMHTELASISQLVIAKIETIDNDILNpDIVNFIMCRSNLDNPFISFLDTVYTIIDQEIYQsELINSLDDNEIIDCIVNKFMSFYKDNLENIVDAIITLKYIMNNPDFKTTYAEVLGSRIADIDIKQVIRcNILQLSNDIRERYL........... 0 0 0 0 +6318 PF06490 FleQ Flagellar regulatory protein FleQ Studholme DJ anon Pfam-B_13480 (release 9.0) Domain This domain is found at the N terminus of a subset of sigma54-dependent transcriptional activators that are involved in regulation of flagellar motility e.g. FleQ in Pseudomonas aeruginosa. It is clearly related to Pfam:PF00072, but lacks the conserved aspartate residue that undergoes phosphorylation in the classic two-component system response regulator (Pfam:PF00072). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.16 0.72 -3.90 47 334 2012-10-01 22:20:39 2003-07-14 17:20:29 6 6 327 0 79 856 142 112.00 30 23.81 CHANGED +lLll-ssspRppsLssILcFlGEp....sphhsssph.....tstthpsphpuhslhhssss......ptlpsltpthsthPlLllGcpsts....phss......llGpl-hPlsYspLs-hL++sQh ..............+lLlI.-DD.upRRpsLssIL...c.FlGEp....spsh.s.u.p.ph.....sphshs.ssh.ps.h...h..l..h.st.ssp......thtphLp.pl..h..s...t..s..s..a.lPlLlhscps.s......chss..........................hlG.p...L-hPhsYspLp-.uL++sp................................................. 0 17 34 59 +6319 PF06491 Disulph_isomer DUF1094; Disulphide isomerase Moxon SJ anon Pfam-B_14101 (release 10.0) Family This family of proteins has disulphide isomerase activity, EC:5.3.4.1. It has a similar fold to thioredoxin, with an alpha-beta-alpha-beta-alpha-beta-beta-alpha topology. It has a conserved CGC motif in the loop immediately downstream of the first beta strand. This motif is essential for activity [1]. 25.00 25.00 31.10 31.10 19.30 17.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.67 0.71 -4.32 28 820 2009-01-15 18:05:59 2003-07-14 17:22:22 6 1 476 4 121 346 200 135.90 57 94.58 CHANGED Y.cplVpPMRpELopuGFcELpTsE-V-pshp...p-GTTLVllNSVCGCAAGlARPAAstAlpp.-++PD+LVTVFAGQDKEATs+sR-.YF.hsaPPSSPShALhKDG-lVahIcR+cIEG+ssptIsppLtsAF-caC .....YMpplVpQhRsElspuGappLpTuEsVcchhp...pcGTTLVhlNSVCGCAuGlARPAAspulph..-+pP-+LVTVFAGQDKEATs+hRE.YF.tshsPSSPSaALlKssclVchlcRHpIEG+-h.slhtpLpssF-c.C............ 0 44 86 107 +6321 PF06493 DUF1096 Protein of unknown function (DUF1096) Vella Briffa B anon Pfam-B_15011 (release 10.0) Family This family represents the N-terminal region of several proteins found in C. elegans. The family is often found with Pfam:PF02363. 25.00 25.00 26.20 26.20 18.20 18.20 hmmbuild --amino -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.74 0.72 -4.01 10 23 2009-01-15 18:05:59 2003-07-15 10:07:56 6 4 5 0 23 16 0 53.70 49 14.66 CHANGED pusslR-KRQ.uCGCA..ssQPoCuCQpusps.............QpSCSC.ps.QP.........ouCuCA ..t.usslR-KRQsCuCAP.spQPpCuCQ.psshs..s............tQ.sCoC.psssP.Q.......suCsCA................ 0 6 12 23 +6323 PF06495 Transformer Fruit fly transformer protein Moxon SJ anon Pfam-B_13780 (release 10.0) Family This family consists of transformer proteins from several Drosophila species and also from Ceratitis capitata (Mediterranean fruit fly). The transformer locus (tra) produces an RNA processing protein that alternatively splices the doublesex pre-mRNA in the sex determination hierarchy of Drosophila melanogaster [1]. 26.10 26.10 26.60 26.10 25.20 26.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.69 0.71 -4.40 4 112 2009-01-15 18:05:59 2003-07-15 14:07:55 6 5 25 0 9 111 0 154.90 62 79.37 CHANGED MDADSSupp.RDoR..................RcuRpKE.KlPYFADElREpDRlRpLRpRtpppTR..............RoRSRSRSpSu-RssppRR+RpRSpsRp+Sto.........Rp+osSS....pRRRRpRS.pR.hs.sP+IIsh.V.VPstDa....YG.........hSuM.tuhsYth.PRP....PPa......PPhPFRYRt.sPFhstPRF .............MDADSS.stp.R.D.TR.........................RRPRQRE....KMPYFADELRERDRVRNL.RKLKTT..QKR.TP..TPPPRERRSRpARoRSRSRTHSsEQSRCpRR.RSR.....SYV.....R.QRS..GS.........RH.Qo.SS.Ss.....sRRR+SRSRSR...RSRTPRIITVPVPVPAAEY.uYAYs..........h......................................................................................................... 0 2 3 6 +6324 PF06496 DUF1097 Protein of unknown function (DUF1097) Moxon SJ anon Pfam-B_15055 (release 10.0) Family This family consists of several bacterial putative membrane proteins. 22.30 22.30 22.30 23.20 22.20 22.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.94 0.71 -4.38 53 1004 2009-11-03 13:31:05 2003-07-15 14:16:43 6 2 730 0 102 428 24 140.50 53 85.64 CHANGED AlosGlLuulWshl...AsshsLs.sW.....sGFlussoaFAt.tsGhpGhhtohsoshoGlhWAhlhltuuuhh.sh......shhuhlhsulsshhMshtAph.phL.uFlPGsFlGssuTFAs..................ssshhhllsuLllGslhGashphhuthLhp ...................................AlTTGILSGlWuWV.....AsuLG...Ll.oW.....AGFLGCTuYFAsPpGG...hKGLhhShsT.hSGhVWAhlIIhuS..uhhspl........plluYll.TullAF..lMClQ...A....+p...h....LL.SF...lP.GTFIGsCATFAu..........................................tGsW...p..lVLPSL....hlG.hlFGYhMcsoGlaLu.t............ 1 23 50 72 +6325 PF06497 DUF1098 Protein of unknown function (DUF1098) Moxon SJ anon Pfam-B_15446 (release 10.0) Family This family consists of several hypothetical Baculovirus proteins of unknown function. 21.30 21.30 21.50 40.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.19 0.72 -3.93 22 51 2009-01-15 18:05:59 2003-07-15 14:19:53 6 1 51 0 0 48 0 97.70 30 85.45 CHANGED ppp......ppsppsspsss............sssplLpuLsp...posAphIlsDsStsKpssLppLuppStsAK+llcuIps.sp-slpl.ss.cslslLcllsDIasNph ..............p......ppstps.ss.sp...........hssschLpsLNp...pTsAshIlsDso.sKppuLphLuppSssAKplL...-slps.ssssl+L.sshcslslLcllusIaDNph.. 0 0 0 0 +6328 PF06500 DUF1100 Alpha/beta hydrolase of unknown function (DUF1100) Moxon SJ, Bateman A anon Pfam-B_15719 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. Members of this family have an alpha/beta hydrolase fold. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 411 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.38 0.70 -5.90 5 738 2012-10-03 11:45:05 2003-07-15 15:59:26 6 3 717 10 104 985 155 396.10 61 97.14 CHANGED tsSKNLSETLFpsHKQAKETSoLTQYMPoSp..slLDsl-pcoupuWYRpLRRLQWlWQGlDPlEhE-VLARIASSKHSRTcD-WLDTVMGYRSGNWsYEWs+lGMhHQ++AsEcssE.sAu-phFsAALhYSIAGYPHLKuDNLAlQAQVLAN+AYpEAAK+osYTlKQLEFPapc.uKIoGaLHLPs.TDuPaPVVLVSAGLDSLQTDMWRLFRDYLAP+DIAMLTIDMPSVGaSS+WPLTEDSSpLHQAVLNpLsslPWVDHaRVGLlGFRFGGNAMVRLuFLEs-KlKACVsLGAPVHDLFoSPcKLQpMPKMYLDVLASRLGKusVDlcSLuGQMsAWSLKVQGFLSGRRTKTPILAhuLEGDPVSPYSDNQLVAhFSssGKAKKIsSKTIocGYEQSLDLAIcWLEDELp ..................................................................................................MoQANLSETLFKPRF..KHsETSTLV...RR...h..s+..Gup..sslQ..S..A...LDG.c.olsHWYRMINRLMWIWRGlDPpEIL-VQARIVMSDAERTDD-LaDTVIGYRG...GN...WI.YEW..A....pQA....M.s...W....Q...Q...KA....s...t....E....p....D....s....hoGR.a.W.LH.AAsLYsIA..A..Y......P...H.....L.....K.....G.....D...-....L....A....E....Q..A....Q....A....L.....u.....N.......R...A....Y.E....E...AA....Q..R..L....P.Go...h..Rp..h.EF.s.....l.........P......G.....Gu......P..........I..T..G....F......L......H.....M...P........K.......G...............D.....G......P...F....P....T.V.L.M..C.GG....L.....D...u..h.....Q....T...D.......Y.......Y......s...L....Y....E...+...Y...FA.PRGIAMLT..ID.........M.........P....S...........V..G......F.......S........S.....K.......W....K.....L.....T.....Q.....D....S....S..l..L.....H.....Q.....+.....V.....L...+....A....L....P....N..V......P.......W......V...DH...TR..VAA.FGF.R.F.GA.NV.A.V....R.LA..YL.E.u.s...R......L...KAV.A..C......L...GP...V...V...H...s..L...LS....D.........p....pQ.....p....p.....V.....P...E.....M.....Y....L.DV...L..ASRL.GM.H...D..A..S......D-.A.......L..R.VE.L.N...RY.SLKVQGLL..G.RR...CPTPM.LSGaWKNDP.FSPEED.SRLIToS.SuDGKLlEIPFsP..V.Y+..NFD.+uLpEIocWIE+RLC.................................................................................................................................. 1 16 42 73 +6329 PF06501 Herpes_U55 Human herpesvirus U55 protein Moxon SJ anon Pfam-B_15779 (release 10.0) Family This family consists of several human herpesvirus U55 proteins. The function of this family is unknown. 20.40 20.40 20.40 21.40 20.10 20.20 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.53 0.70 -5.91 3 7 2012-10-03 01:18:03 2003-07-15 16:04:15 6 1 5 0 0 51 0 426.60 42 95.40 CHANGED oS-Ls+llQlslDaN-.lQuc.Nao.lhlcCllS+T+PVLLLITDGTEScsEDVhFsu+sLcccpsIKI+lhPlsRoI.Ps+VplFulPIapIoSSLlIpD.shh..KENhDPpcaEQaGlhspsIuTTNlIs.lpsVpNcslElolTlFNIsWccSsYQsplccosG++LpTlaoVFSlNTspCPYWpshF+..SshPlC+VpMISEPsVSVYKIEFssPlLpVFLRshsLsspNsRFsVspEChLRLsF.ScPshsoVoLNlsMPYFKICuDtKslEVFFP-cMoLssNcsKcIsLRGTFpNhsAVGLFIPcposVlp.aPFlWpPpEoF+l+VoC-RospVTEHDIIG+VYFIo++lFR+sF+PsusuDhKSclEtspNoscsFclaFLGNcFFussLP-LTLHPhhshcYEclQusuNIpp.s.NcsPShpRlRl .............................................................................ospLs+llQlshDhNc.l.sp.sap.lhlcChlSpT+PslLhITDGTEScsEDVhFssphLcpppsIKI+lhPlsRol.Ps+lplFulPIah.osuL.IpD.shh..KENhDPhhaEQaGlhshsIuTTNlIs.hpsVpNcslEholhlhNlsWpcSsYQsphhcp.G++LhhlaolFSlsTppCsYWpshFc..ShhPlC+VpMISEPslSlYKIEFssPhLplFLRshsLsppppRFslspEChLRLsF.S.PshsoVoLNhsMPYFK.IC.uDtK.slEVFFPscMoLs.NpsKcIsLRGpFpNhph.VGlFIP.spopVhp.aPFlW.spEsh+l+loC..-+.ospVTEH..DhlG+laFlo.++lFR+.sF..+shu.suDhKShlpt..spNo.scsFclaFhGNsF.u...L.P-LTLH.Ph.....cYEch.upuN.ph.s.scp.Shh+hRl..................... 0 0 0 0 +6330 PF06502 Equine_IAV_S2 Equine infectious anaemia virus S2 protein Moxon SJ anon Pfam-B_15780 (release 10.0) Family This family consists of several equine infectious anaemia virus S2 proteins. The function of this family is unknown. 18.90 18.90 20.80 22.30 17.00 15.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.31 0.72 -4.11 2 41 2009-01-15 18:05:59 2003-07-15 16:07:45 6 2 2 0 0 41 0 65.30 75 83.19 CHANGED MGlFGKGVTWSA.HShG.SQGE.QPL.PNpQpp.ohR+p.ha..N.IVIhhsl+ptWQppcpQ-TKK .....MGLFGKGVTWSALHSMGV.SQGEYQPLSPNKQNQQTH+KtIhWYINPIVIMhAIKpKWQRQETQDTKK.... 0 0 0 0 +6331 PF06503 DUF1101 Protein of unknown function (DUF1101) Moxon SJ anon Pfam-B_15836 (release 10.0) Family This family consists of several hypothetical Fijivirus proteins of unknown function. 25.00 25.00 77.70 77.60 18.50 18.00 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.07 0.70 -5.64 5 109 2009-01-15 18:05:59 2003-07-15 16:10:46 6 1 9 0 0 40 0 317.00 82 99.67 CHANGED M-RuoREHsKFSKANT+sEs+pMRhYKDDSsD-lsYSEIsVGlooooP+MuLSDYFSuVSloF-sEtRl-ElcPhlYuDLpF.p-pYspDVDLNLLlWQLLSuNQDS+ALCVNlLRMlsTluhGNAaIsc.GpY+Y.spsTs-pTss-DlDuLRlluRlAKIlIKsshsKsD.L+ssQcpLIpYapG+ua+SloLoWDSKSlLsolHGYS.TSEslLDaYIRpKL.DLFKuLpssNLVYGGNYpLVYQlLFYYYIlTNGRaSoGFosR+-S..IKoYslPNDsPusCNso.PRKPoLSLMaIRAlLlIsLIKDYSPlKplPlYlppLElEcPhpNoshlTDuGIRoEs-shssosslsts..LPsFSSsuu ..........................pEMRIYKDDTAsGLCFSEINVGCTSooPKMuLSDYFSSVSCSFDGEMRhPDlPL+hYGDLHF.H-QFTNDVDLDLLCWQLLSSNQDSRALCVNILRMlTuLSLGNAFISE.GRYHY.AlDTTEpTSAEDsDALRhLuRlAKIVIKNslcppD.lshAQQsLIYYYFGsSapGIHLNWDS+SSQ.SlHGYS.TSEsCLDHYIRMKI.DLFpGlRsKN.VYGGNYQLVYQALFYYYllTNGRFSSGFsVRKDS..IpSYFlPN-sPSsCNVo.PR.KPSLSLMFIRAlLlhhLIKDYSs.......................................................s.................................... 0 0 0 0 +6332 PF06504 RepC Replication protein C (RepC) Moxon SJ anon Pfam-B_15903 (release 10.0) Family This family consists of several bacterial replication protein C (RepC) sequences. 22.80 22.80 23.30 23.50 22.50 22.70 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.98 0.70 -5.46 5 105 2012-10-04 14:01:12 2003-07-15 16:17:02 6 1 87 0 15 72 6 265.80 61 93.70 CHANGED tscsAMsaDLTHARHDPAHCLAPGLFRSLKRGERKRLKLDVTYsYG-DcplRFsGPEPLGADDMRlLQGLVAlAG.....P+GIlLoPEPcSEuG+QLRLFLEs+WDAlEpDAMVVKGSaRpLASElGYATDGGusFKAIRESIERLWsVSVIVp+GuKRQGFRLLSEYASDEs-GRLFVALNPRIAEAIlGcRPHTRI-MAEVRALQTDPARLlHQRLCGWIDPGKSGRVELDTLCGYVWPD-ANuEAMKKRRQTARKALsELAAVGWTVNEYAKGKWEIoRPKPsu ..................................................................................................h.....pasLoHsRHDPAHCLAPGLFRuL...KR.....GER...K.RsKLDVT.Y.cYG-G.ccIEFsGPEPLGADDLRILQGLVAMAG......PsGLVLuPE.PpTEuG+.QLRLFL.....EP..K.WEAVst..D.AMVV.KGSYRALA+EIGhts.DuGssh..KtIp-CIERLWcVSI...IA..Q.....sG.R..K..RQ.....GFR...LLSEYASDEs.DG..RLYVALNPLIApAVMGG..uQHVRIsMcEVRALco-sARLlHQRLCGWID.PGKo....G+sslDTLCGYVWP.sE.A.su...uTMR...K....RR.Qp.lRcALs.ELs........A........L.GWTVsEaAtGKa-IsRPKssu.............. 0 4 9 12 +6333 PF06505 XylR_N Activator of aromatic catabolism Studholme D anon Pfam-B_2890 (release 9.0) Family This domain is found at the N terminus of a subset of sigma54-dependent transcriptional activators in several proteobacteria, including activators of phenol degradation such as XylR. It is found adjacent to Pfam:PF02830. 20.80 20.80 21.00 21.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.09 0.72 -4.60 16 223 2012-10-02 19:02:47 2003-07-15 17:16:43 6 10 157 0 87 229 15 101.30 39 18.53 CHANGED DLspplpFusp-GcIWLs-QRMLLlHsuuLuuLR+ELlpslGh-+ARGhhhRhGatuGtRDAclsRchRssusthshFhAGPQLHsLEGhV+Vpslph-....hDhp .........DLpppLpFsspsGpIWLs-pRMlLl+ssuhu.s.LR+ELIcsLGh-pAR.......GlhhRhGYtu....Gh+DAcls.+chh.ss...s...s.hphFhsGPpLHsLcGhV.+VpslphchD..p......................... 1 17 50 76 +6334 PF06506 PrpR_N Propionate catabolism activator Studholme D anon Pfam-B_10794 (release 9.0) Domain This domain is found at the N terminus of several sigma54- dependent transcriptional activators including PrpR, which activates catabolism of propionate. 23.10 23.10 23.10 23.10 23.00 22.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.60 0.71 -4.87 47 943 2009-01-15 18:05:59 2003-07-15 18:17:36 6 25 691 6 118 633 15 172.70 37 30.19 CHANGED plstchst.hclp..lhpsshp-ulphhcph.tt.ctsDlllutGu.sushl+ppl.slPVV.lpsouaDllpALtpA+....chs.....sclulVsappsh..u....hpphpphl.sl..slpthsapstc-scstltplpppG..hplllGsulsschA.pphGlpulLlhS.cpolcpAhccAhclsphtcpc...tp+ ..........................s.l.hca-p.ssls...l.phsh-.cAls......hlcct..ts..ccsDsIIusGu.suuhLKs...+l..s.....l...PVl.I+sSGaDl....LpALsc.At.....c..hs........usIGlV.sa..ppsl..su....lhsapcth..sl.....clcpts.a...h....o.....c...E.-......uc....splp.cL+....usG..h-slVGuu.l.l...s.D.lA...cc..t...Ghs..u.lhl..hS..ssoVRpAhp-Alchschpppt....t.............. 0 31 67 92 +6335 PF06507 Auxin_resp Auxin response factor Studholme D anon Pfam-B_2015 (release 9.0) Family A conserved region of auxin-responsive transcription factors. 21.10 21.10 21.70 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.66 0.72 -4.12 32 687 2009-01-15 18:05:59 2003-07-15 18:33:44 8 14 62 0 312 710 0 77.80 45 10.75 CHANGED AupuhsstohFplhYpPRsosS-FlVshp+ahcuhp.p.aslGMRFKMtFEsEDusc++a...tGslsGls-hDPh+WssScWRsLp ...........................Au+AsussohFplaYpP....R........s.S.s.uEFllshs+ahc.uht..ppholGMRF+M..t.F.Es...E-.u.uc..+...........Ra..........hGTIsGlsD...h...D.P.h+WssScWRsLp.......................... 0 40 196 261 +6336 PF06508 QueC ExsB; Queuosine biosynthesis protein QueC Studholme D, Eberhardt R anon Pfam-B_715 (release 9.0) Domain This family of proteins participate in the biosynthesis of 7-carboxy-7-deazaguanine. They catalyse the conversion of 7-deaza-7-carboxyguanine to preQ0 [1-3]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.56 0.70 -5.05 33 3226 2012-10-02 18:00:56 2003-07-15 18:54:36 8 14 2983 7 768 3867 2443 199.30 40 86.40 CHANGED +AlVlhSGG.DSTTsLhhApcph..hEVhsloFsYGQR.HptEl-sAcclucths.........l.c+cllDlshLpplu...sSuLTcssht.lsc....ph.t.....cslPsTaVPuRNhlFLSlAuuaA-slGupsIhhGVsppDaSGYPDCRsEFlcuhppslsLu......tsptlpI.psPLh.LsKu-IhcLutcLG......lshchThSCYpG......tcu.uCGcCsuChLRpcGapchs ........................................................................................................................................+AlVlaSGG.D.ST....T....C.L....h..h....A....h......p...p..a.......p..c....V..p.......sl..o....F.....s...Y....G...Q...R.....Hc..........t.......E...l...-...s.........A....c...p...l.....Apc.l.G.............l........p....H..+..l.....l....D..l..s....h...L.....s.p.....l.u...........s.suL.....T.c....cs.h.t...lsp...............tt.................ssl..P.s.Ta....V.P..u.RNhl...F....LohA.u.sh......A....ph....su...c......t......l...h..h...G.....V....s...p.....s..D.....a..S........G........Y...P.......D..C..R.....sp.......F..l....c..uh...p...t.sl.s.Lu............hspsh..p.l...cTPL..ha..l.sKA-hh...tLu.p...p.h.G..............hsh.pp...ThoC..YpG....................utuCGcCsuCpLRtpGhpphh.................................................................................................................................................... 0 218 457 625 +6338 PF06510 DUF1102 Protein of unknown function (DUF1102) Moxon SJ anon Pfam-B_16043 (release 10.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 22.20 22.20 22.60 24.40 21.70 22.10 hmmbuild --amino -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.95 0.71 -4.39 9 41 2009-01-15 18:05:59 2003-07-16 09:38:25 6 2 17 0 35 48 0 144.00 39 74.21 CHANGED sIVoDDsELIDLTPlQPYuYl..ssGKLslDISssNP.NYPGY.GcGlSPsSpYsF-EhFsVSNcLWEs...shPIsVpIp.Spsstlphauu-h-s..ussG..........sslsFoVthsssVslGM.Fsssscs.G.s.ptplsIcAhthusE ......................sIVsDDsELIDL.sPlQPYAYl..ssGcLsIDlSssNP.NY.P.G...............Y............GpGlSPsSpYsF-EVFpVSNcLWEs............h.sI.....sVpIs.Ss...ssplphausshss..ssss...........sslsFsl..G-sVplGM.hsssGt.s.G.shptplsIpAhthtsp................................................ 0 5 14 25 +6339 PF06511 IpaD Invasion plasmid antigen IpaD Moxon SJ anon Pfam-B_16150 (release 10.0) Family This family consists of several invasion plasmid antigen IpaD proteins. Entry of Shigella flexneri into epithelial cells and lysis of the phagosome involve the IpaB, IpaC, and IpaD proteins, which are secreted by type III secretion machinery. 25.00 25.00 26.00 25.60 24.60 24.20 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.21 0.70 -5.44 3 264 2009-01-15 18:05:59 2003-07-16 15:13:00 6 2 198 33 19 191 0 261.50 40 88.15 CHANGED tNhouSssPulsAuRupssosGpuAEpVpAsVcoTTu..........a-TpcsIppSsAAhKtppuQQTLpcTPstEl-EsssppTluspphsusLNuLAKSGaulSAEQ+EsL+Ssh.....SAPspAchuGuPM.....AtstpsISDuELWDMISssIucIsDsYLGVYENVVusYTDFYQAFSDILSpMAGWISPGG.DGNoVKLNVDSLKuALooLKKcYo......NKcslLFPAQo.suGhpTuSEuEAcKWlKELGLPD......SCV.KAusGGYVVlVDMTPIssMlsDLsuLGSGoELELDNAKYQAWQSGFKAQEENLKNTLQTLTQKYSNANSLFDNLVKVLSSTISSCLETAKSFL ......................................................................................................................................................................................................h...p..tp.Lshlu+..pt..s..lstptp.p..Lp..............SAPp...p.s..hss..h..............lSctElWshlupsIssIs-..sYLtVYEslVusYTpaYQsFS.-.lLSp.h.uGWloPGc.DGNolKLs.VsSLKstlppL..lsKYs...............sp.La.....Pups.......h....s..sopt-A..p.pWlpELshs...............upl.ptpsuG......YVV......hlshs...PlppMlpsl....sul......G...u...s.......u....t..l........hss...Ac.YQAWpuuFpuQc-NhpsslQoLspKYSpANShaDNLlKVLSusISo.h-oAKsaL................................ 0 3 6 11 +6340 PF06512 Na_trans_assoc Sodium ion transport-associated Vella Briffa B anon Pfam-B_16808 (release 10.0) Family Members of this family contain a region found exclusively in eukaryotic sodium channels or their subunits, many of which are voltage-gated. Members very often also contain between one and four copies of Pfam:PF00520 and, less often, one copy of Pfam:PF00612. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.87 0.70 -4.39 62 1005 2009-01-15 18:05:59 2003-07-16 16:03:13 8 31 158 0 361 1059 0 206.90 31 13.21 CHANGED DsEhNNLQlAlsRIpRuhsalKpsltshhp.thht+h....pphst...........................cttsp.hph.hsphhtst...................pth.t........h.tsuhpph.....pppp.tshhh...Nsshsl........sVPI..........AssESDh-......p............---tsSppS..................................t--scc......chpt.....................sSpSEsSTlDhcss.....tEt............-th..sc.t--.h-....P-cCFs-sCh++aPsht.lDhspshhphWWsLR+TCapIVEHsaFETFIIFMI ..........................................................................................................................................................................................................................................DsEhNNLQlAlsRIp+ulsalKpplh....phhp....thht.p.p......tthtt.......................................p..pp..p...htp...h.tst..................................................p.ptt..................tht.suhp..t.h...................pp.p..hshht.....ssshsl....................sVPI..........A.suESDhEs.p......................................s--hsSpss............................................................................................................-tspp...............................................................ssSsS-......sSTl..Dht.s...t-..................................................................p.h......-.tp....-.........P-sCFs.-.............sC....h...........p......+a...C..tp.ls..h.ppuhG+.hWWsLR+TCapIVEHsWFEoFIlFMI....................................................... 0 36 56 189 +6341 PF06513 DUF1103 Repeat of unknown function (DUF1103) Moxon SJ anon Pfam-B_16075 (release 10.0) Repeat This family consists of several repeats of around 30 residues in length which are found specifically in mature-parasite-infected erythrocyte surface antigen proteins from Plasmodium falciparum. This family often found in conjunction with Pfam:PF00226. 25.20 25.20 26.50 25.30 24.70 25.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.38 0.70 -4.94 2 22 2009-01-15 18:05:59 2003-07-16 16:04:55 6 6 2 0 3 23 1 120.30 56 36.58 CHANGED KVLGEGDKEDVKEKNDGKKDKVIGSEKTQKEIKEKVEKRVKcKCKKKVKKGIKENDTEGNDKVKGPEIIIEEVKEEIKKQVEDGIKENDTEGNDKVKGPEIITEEVKEEIKKQVE-GIKENDTEGNDKVKG.EIITEEVKEEIKKQVEEGIKENDTEupDKlhG.EIITEEVK........EGlKENDTEsKDKVIGQEIITEEVKKEIEpQEEK ..........................................................................................................................................cl.....-p.pc...........hcc..pc..slKEpDTEsKDKVIGQEIIhEEVKcEh.....ctl+c.....NcsEsKDcVIsQEIlsE-Vpct.......t........ 0 3 3 3 +6342 PF06514 PsbU Photosystem II 12 kDa extrinsic protein (PsbU) Moxon SJ, Bateman A anon Pfam-B_13782 (release 10.0) Family This family consists of several photosystem II 12 kDa extrinsic protein (PsbU) proteins from cyanobacteria and algae. PsbU is an extrinsic protein of the photosystem II complex of cyanobacteria and red algae. PsbU is known to stabilise the oxygen-evolving machinery of the photosystem II complex against heat-induced inactivation [1]. This family appears to be related to the Helix-hairpin-helix domain. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.15 0.72 -4.14 24 89 2012-10-03 02:11:09 2003-07-16 16:26:02 6 2 78 17 32 170 35 91.40 44 59.54 CHANGED stsDtchst.hG..pKlDLNNusVRsFpphPGMYPTlAuKIlpsuP...YcsV-DlLslssLo-cQKpllccah-sFsVscPpssh.tG.DRlNsGlY ................sDtKlsschG...pKlDLNNuslRsFpphPGhYP............oLAuKIl....p....s...u...P....YcsV-DVLslsGLo-pQKpllcp.hc..pFsVTsPp.th..pG.DRlNsGhY.......................... 0 10 23 30 +6343 PF06515 BDV_P10 Borna disease virus P10 protein Moxon SJ anon Pfam-B_16237 (release 10.0) Family This family consists of several Borna disease virus P10 (or X) proteins. Borna disease virus (BDV) is unique among the non-segmented negative-strand RNA viruses of animals and man because it transcribes and replicates its genome in the nucleus of the infected cell. It has been suggested that the p10 protein plays a role in viral RNA synthesis or ribonucleoprotein transport [1]. 25.00 25.00 27.40 61.40 23.90 17.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -9.98 0.72 -4.15 3 40 2009-09-11 07:51:39 2003-07-16 16:31:46 6 1 7 0 0 39 0 81.20 67 99.97 CHANGED MSSDLRLTLLELVRRLNGNATIESGRLPGGRRRSPDTTTGTVGVTKTTEDPKECTDPTSRPAPEGPQEEPLHDLRPRPANRKGAAIE ...MSSDLcLTLLELlRRLNGsuTlESGRLsGGRRRSPDTTTGoIGVTKTpEssKEChDPTuRsAPcusQEEPhHDLRPRstsRKGAslE 0 0 0 0 +6344 PF06516 NUP Purine nucleoside permease (NUP) Moxon SJ anon Pfam-B_15961 (release 10.0) Family This family consists of several purine nucleoside permease from both bacteria and fungi [1]. 21.10 21.10 21.10 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.80 0.70 -5.60 31 328 2012-10-01 20:25:13 2003-07-16 16:35:58 6 4 268 0 136 310 52 309.80 42 85.63 CHANGED lssKVhIloMFE.........sEhpsWh-t.....sh.........s+sIslPGhs..assl+Cssc.ulCtlsTG.GphNAAuolsALshsspFDLo+TYaLlAGIAGlsPptuTlGSssWA+YsVpssLpaEIDsREl.....PssWsoGYhshsspp..PsphPsssYs..........TEVFpLNstLpshAhpLo+ssp..LsD.ossspshRtpYssts........AppsPtVlpsDshoucsaapGshLs-hhpsasclhTsGsGsYsoTupEDsAThcALtRhupsGhlDhsRlhlLRTuSNFspPssGto..uhcpLhpts....tGGhssAl-NlYpsGsslVcsIlspWsp.acsGlss ...................................ssKVhlloMFt............sEupsWl-p........hth.........scp.l.slPGLSs.YPslcCssp.slChlsTGhG.sNAAuolhALshSs+FDLp+T..........YFLlAGIAG.l-PppGTlGSAsWA.+.YsV-huLpa..-l...DsR.Eh.....Pt....sW..s.sGah..u...lss.......p.p..Ps..ppPs..h.s.at...............TEVFpLNspLtshAhuLo+s.lp..LtD.ospupAhRt+Ystts..........AspsPpVhpCDThousTaapGshLu-thpsWoKlhTcGpGsYCoTtpEDNAThpALpRuApsu+lDhsRlhlLRTuSsFDRP.hsGpo..uh-sLhshs.......GGFs.Ah-NhYpsGsslVpsIlspWsp.acpul.s........................................................................... 0 35 73 113 +6345 PF06517 Orthopox_A43R Orthopoxvirus A43R protein Moxon SJ anon Pfam-B_16577 (release 10.0) Family This family consists of several Orthopoxvirus A43R proteins. The function of this family is unknown. 20.90 20.90 21.70 53.10 18.20 20.10 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.45 0.71 -4.75 2 47 2009-01-15 18:05:59 2003-07-16 16:39:31 6 1 19 0 0 35 0 194.30 91 99.60 CHANGED MMhKW.ISILThSIhPVLuYoSSIFRhH.sp-lELCYG+L.FD+l.N.VNIKY.P.aIPYRYNFINpTLTVDEhcc.NVhFT+u.FLKa+YuS.LssuLlVuLuspLKYNDlpC.VNVSChIKNLhTpTSTILTSKHhTYSL+RSpC.hIIGYDSIIWYKD.IsD+......YNGIYDFTAICMLIASTlIVhlYhhK+IKMN. ......MMMKWIISILTMSIMPVLAYSSSIFRFH..SEDVELCYGpLYFDRIYN.VVNIKY.P..HIPYRYNFINRThoVDELDD.NVFFTHGYFLKHKYGS.LNPSLIVSLSGNLKYNDIQCSVNVSCLIKNLATSTSTILTS.KHKTYSL.HRSpCIsIIGYDSIIWYKD.INDK......YNDIYDFTAICMLIASTLIVTIYVFKKIKMNS....... 1 0 0 0 +6346 PF06518 DUF1104 Protein of unknown function (DUF1104) Moxon SJ anon Pfam-B_16082 (release 10.0) Family This family consists of several hypothetical proteins of unknown function which appear to be found largely in Helicobacter pylori. 25.00 25.00 27.50 28.70 24.80 24.10 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.06 0.72 -3.85 12 166 2009-01-15 18:05:59 2003-07-16 16:41:54 6 1 63 1 18 153 0 89.50 44 61.91 CHANGED ADFSKposcELhphuuslssp-hsDhthElcKRhtchpht-u+pa+pph+pthpcphsphohc-tpca+ptl+cthpc.+l-shSscEtcchGl ....pDFSphsDc-LlchAGpV.ssp-llDY+hElpKRhctMst--pK.p.F+tph+chAcKNlupMS.cDac+h+c-l+cslcc.+hKsho.cEh+t.GL............... 0 4 10 18 +6347 PF06519 TolA TolA C-terminal Moxon SJ anon Pfam-B_16081 (release 10.0) Family This family consists of several bacterial TolA proteins as well as two eukaryotic proteins of unknown function. Tol proteins are involved in the translocation of group A colicins. Colicins are bacterial protein toxins, which are active against Escherichia coli and other related species (See Pfam:PF01024). TolA is anchored to the cytoplasmic membrane by a single membrane spanning segment near the N-terminus, leaving most of the protein exposed to the periplasm [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.19 0.72 -4.01 9 916 2012-10-03 21:09:15 2003-07-17 14:43:38 6 1 772 6 109 684 85 92.70 45 28.50 CHANGED ssstpuGssGs-lspYuu.IpptIQp+hhcsssatGKsCslcI+LuPDGh...lhslps.uGDsslCpAAluA.A+ssKlPhsPos.sVYEphKshsLsh ....................................sstpsussu.u-l.ssYAupIpsAI..p..u....+....h.h.-....s....s.s.a.t.....G.K...p..CsL..+I+..L..A..PD.Gh....L..l....s...l...p...s..c....G...G...DP..A....LCpAAluA...AKhAK....lP..+P..P....Sp..sVYEh.aKsusLcF.................... 0 13 34 72 +6349 PF06521 PAR1 PAR1 protein Moxon SJ anon Pfam-B_16232 (release 10.0) Family This family consists of several plant specific PAR1 proteins from Nicotiana tabacum and Arabidopsis thaliana. The function of this family is unknown. 25.00 25.00 26.00 64.70 22.40 20.90 hmmbuild --amino -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.18 0.71 -4.65 4 68 2009-09-11 00:11:32 2003-07-17 16:32:16 6 1 14 0 38 70 0 144.60 49 87.47 CHANGED sIsCENLscsoCuFAISSoGKRCVLEKph+RSGEEsYTCRTSEIEA-KlpNaIETDECIpACGVDRpsLGISSDuLLEupFTpKLCSstChcaCPNIVDLYFNLAAGEGVYLP+LC-pQtGpuRRuMuEI+SSGl...............lAPuPtSpl...cssNhhhsPAhuPh ...lhCEpLst-sCAFuVSSSGpRCVLE+thhpsGpht.YpCpTSElh.s-..+.lppaIETDpClcACGVDRtoVG..I..SSDuLh.Es..pFopKLCSspCappCPNIVDLYhNLAAGEGlaLPcLCpspcsss.RRt.h.h-...hhSsu................hAsts.................................................................... 0 3 32 35 +6350 PF06522 B12D NADH-ubiquinone reductase complex 1 MLRQ subunit Moxon SJ anon Pfam-B_16238 (release 10.0) Family The MLRQ subunit of mitochondrial NADH-ubiquinone reductase complex I is nuclear [1] and is found in plants [2], insects, fungi and higher metazoans [3]. It appears to act within the membrane and, in mammals, is highly expressed in muscle and neural tissue, indicative of a role in ATP generation [3]. 21.50 21.50 21.50 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.32 0.72 -4.28 55 447 2009-01-15 18:05:59 2003-07-17 16:38:21 6 4 210 0 280 434 1 69.80 27 72.13 CHANGED hp+P.plhPLhsslususshssh.lsR.hhtNP-VphsKcsp...........s-.hpcas.pp.pKahpspt.......-hhshhppt.sch ....................hcP.plhPLhs...hlususshAshhhhRhhhtsP-Vp.hs+psp....................s-shpcht.pt.hKhht.p........ph....p............................................. 0 82 142 207 +6351 PF06523 DUF1106 Protein of unknown function (DUF1106) Moxon SJ anon Pfam-B_16281 (release 10.0) Family This family consists of several hypothetical bacterial proteins found in Escherichia coli and Citrobacter rodentium. The function of this family is unknown. 21.10 21.10 21.10 80.30 21.00 17.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.02 0.72 -4.16 2 25 2009-09-10 17:00:18 2003-07-17 16:40:58 6 1 18 0 1 16 0 89.10 89 70.05 CHANGED MASLWK+LFY.sGpRRRYFEptEHSFSIlCGRLRGIVlThKCSpGIIYLSIKVsPNNppHlhLYpKK-YlFDKLKElFPDEAIEFoIEYEN MASLWKRLFYSSGRRRRYFEEGEHSFSILCGRLRGIVLTIKCSNGIIYLSIKVSPNNRNHVFLYHKKDYVFDKLKEIFPDEAIEFTIEYEN 0 0 0 1 +6352 PF06524 NOA36 NOA36 protein Moxon SJ anon Pfam-B_16330 (release 10.0) Family This family consists of several NOA36 proteins which contain 29 highly conserved cysteine residues. The function of this protein is unknown. 25.00 25.00 39.60 39.50 23.00 22.60 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.43 0.70 -5.17 3 242 2009-01-15 18:05:59 2003-07-17 16:49:12 7 2 207 0 71 226 0 192.60 70 97.66 CHANGED MPKKKTGsRKKAEKQRERcKEIRuS..sVDLA+HPCNAsMECDKCpR+QKoRAFCYFCNSVQKLPICAQCGKpKCMMKoGDCVlKHAGVYoTGLuMVGAICDFCEAWVCHGRKCLoTHACsCPLpsApClECERGVWEHGGRIFpCSFCpsFLCEDDQFEHQASCQVLEuENFKChSCNRLGQYSCLRCKsCaC-DHVRRKGFKYDKs.KslPCPKCGY-ToETKDLSMSTRSaKFGRQTpGtcS.DD-pGYGuYapNhuSscYGDstussYGYcuDDDE--.SusDYDEEpDtDDDDsE-sD-..TopNcG-c-sDstA ................................................................................p..+..+QKsRAFCYFCpulQRLPhCApC..GKsKCMhKo.GDCVlKHsGVaoTG...............L.uMVGAICDFCEAWVCHGRKCLooHACoCPLtDA........sClECERGVW-HGGRlF+CuFCpsFL............................................................................................................................................................................................................................................................... 0 24 28 49 +6353 PF06525 SoxE Sulfocyanin (SoxE) Moxon SJ anon Pfam-B_16349 (release 10.0) Family This family consists of several archaeal sulfocyanin (or blue copper protein) sequences from a number of Sulfolobus species. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.56 0.71 -5.03 6 66 2012-10-02 17:41:00 2003-07-17 16:56:28 6 2 39 0 29 130 19 176.80 30 82.35 CHANGED IssllVlIlllGlulY............................s.sphshlSosooosoToooos...............PutlsLPYsSsNKTVFIhLsVpoous..tFNaNGTSsGphKIYIPAGWsVhVpFhNpESLPHNLslVQNsTshPNssslSuDGKIlhhVGsosSNYtssGISSGpoAsGlhssluAGhYWlACGIsGHAcSGMWssLlVSsNVTsPYslh ..........................................................................................................llhllhhuh.hh...............................s..t.p..o.s..sosps.os.tsot..................................sst.....hs.sss...p.+TV.lhlh.shss.us........hNasGoup...G....ph.....plhl..PA......GhsV......hlphhN.p.p.u.l..s.....Hshhl..l....s...s..s.t.h.P...s.....ss...slu.t...G+..I.....l.h..h.l.Gs...o.s...us..h..s...p.G.l....u.........Gpss...s.s.h...h.....sls...AGhYhlsCsIs.GHAtsGMWs.llVSsshp.P....t.......................................................................... 0 7 14 26 +6354 PF06526 DUF1107 Protein of unknown function (DUF1107) Moxon SJ anon Pfam-B_16434 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 25.00 25.00 47.30 47.20 19.90 16.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.83 0.72 -4.16 21 718 2009-01-15 18:05:59 2003-07-17 16:58:32 7 1 677 1 82 194 5 63.70 70 91.41 CHANGED hRhFcpYpPpplA+aVKsLF+GplaIpGlGtFcFDpG+lLl..Ppps-ppphpshpEVNppIppLp .MKIFQRYNPLQVAKYVKILFRGRLYIKDVGAFEFDKGKILI..PKV+DKhHLSVMSEVNRQVMRLQ... 0 6 23 51 +6355 PF06527 TniQ TniQ Moxon SJ anon Pfam-B_16755 (release 10.0) Family This family consists of several bacterial TniQ proteins. TniQ along with TniA and B is involved in the transposition of the mercury-resistance transposon Tn5053 which carries the mer operon. It has been suggested that the tni genes are involved in the dissemination of integrons [1]. 25.10 25.10 25.10 25.10 24.90 25.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.10 0.71 -3.96 43 576 2009-01-15 18:05:59 2003-07-17 17:04:04 6 10 398 0 177 503 23 138.90 19 29.75 CHANGED hLshp.lsh.hssEshsSahuRLAttp..shssh...................psahtc.hG..hshpslhpsp........sslstlAphuGhss...spLtthsh.................................spstspt....hpl..tuchlspphl...ptsp.hRhCPtCL.tpDhtt.......stttshhRhtWplsslpsCspHpshL ......................................................................................................................hh....h.sEsltSaltRhAhtp.......thts.h.............................t.phhtp.....hs......hs...ht..t.hhtst................t.ltt.l..u..t....h.....s..s.h.s.s.....spL.p.ph..sh.......................................hphtpt...............hph.......htth.h.sp..phh...........t.t...h+....h....CPhCL...............ptsah+.htWpl..s..hht.sCspHpshL................................................................................................................................... 0 27 90 129 +6356 PF06528 Phage_P2_GpE Phage P2 GpE Moxon SJ anon Pfam-B_15359 (release 10.0) Family This family consists of several phage and bacterial proteins which are closely related to the GpE tail protein from Phage P2. 20.90 20.90 20.90 22.30 20.50 20.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.97 0.72 -4.51 17 525 2009-01-15 18:05:59 2003-07-17 17:09:05 7 2 406 0 55 217 1 39.10 54 85.46 CHANGED MADIAslFHWsPu-htsMoLsELhcWRE+AhhRSG.sss- ...MADIAsIFHWsPu-h.sMolsEllsWR-+..AhtRSGsscp......... 0 7 24 42 +6357 PF06529 Vert_IL3-reg_TF Vertebrate interleukin-3 regulated transcription factor Vella Briffa B anon Pfam-B_16154 (release 10.0) Family This family includes vertebrate transcription factors, some of which are regulated by IL-3/adenovirus E4 promoter binding protein [1]. Others were found to strongly repress transcription in a DNA-binding-site-dependent manner [2]. 25.00 25.00 84.80 69.30 22.60 20.10 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.06 0.70 -5.47 2 48 2009-01-15 18:05:59 2003-07-17 17:13:27 6 2 37 0 24 43 0 293.00 70 72.99 CHANGED SSsuYAQEIQKLSsSTsVYFQDYQoSKsslsSFVDEHEPuhVuuSCISVIKHSPQSShSDhSEhsSVEHTQtS.hQusCRSPENKF.lIKQEPlELE...REsR--RGoYpsSIY.sYMGooFshYSHSPPLLQVptSoSNSPRTSEsD-GVVGKSSDGEDEQQVPKGPIHSPVEhppVHAT.VKVPEVNsSALPHKLRIKAKAMQVKVEAhDs-a-uhQKLSSPhDh.uKRHF-LEKHGspshsHSShsPFSVQVTNIQDWSLKsEhWHpKELssKhQsuhKTGVVElKDssYpVSEuENLYLKQGIANLSAEVsSLKRhIsTQ.ISASDSt ......................................SSsAYAQEIQKLSsSTAVYFQDYpoSKus.h.suFlDEHEPohVuSSCISVIKHSPQSSLSDlSEsS.SlEHoQtS..shQusCRSPEsKFQhIKQEPhELEsasREsRDDRGoYpuSIYQNYhGsoF.s.GYSHSPPLLQVNRSSSNSPRTSETDDGVVGKSSDGEDEQQVPKGPIHSPVELKpVHATlVKVPEVNS..SALPHKLRIKAKAMQIKVEAhDsEa-uTQKLSSPlDMouKRHF-LEKHsssshVHSS.LoPFSVQVTNIQDWSLKsEHWH.pKE...LsuKhQsuh........KT.GV..........VEhKDsuYpVS-sENLYLKQGIANLSAEVsSLKR.LIsTQ.ISASDS.... 0 1 3 6 +6358 PF06530 Phage_antitermQ Phage antitermination protein Q Moxon SJ anon Pfam-B_3979 (release 10.0) Family This family consists of several phage antitermination protein Q and related bacterial sequences. Antiterminator proteins control gene expression by recognising control signals near the promoter and preventing transcriptional termination which would otherwise occur at sites that may be a long way downstream [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.71 0.71 -4.56 4 887 2012-10-04 14:01:12 2003-07-17 17:17:27 7 1 490 0 56 475 2 118.00 44 91.35 CHANGED hRDIp.V..........LERWGAWAtssptslpaSPIAAGF.KullPh+spsRP.CsDDDGhIIssshspLpp.psschhsLLlsYYl+G.St+AIA++pthScspItK+Lp+AEGhI-GsLSlhsVRL-hDthlctt ...................................................hRDIp.V..........LERWGAWAs..s...s.p...p.c.lsa.Ss......IAAGF.....KG.L.l.....P.....p...+....s.K..o.R..P.p.C..s.DDD...uh.l.IsuChAR.L.p.+....psp...ch.a-....LLlsY....YV.h.GhohhulAc+......+.....psS-.shItK..cLp+AEGhl-G.hL.hhLsl+LEMD..h..p...................................... 0 7 18 33 +6359 PF06531 DUF1108 Protein of unknown function (DUF1108) Moxon SJ anon Pfam-B_16830 (release 10.0) Family This family consists of several bacterial proteins from Staphylococcus aureus as well as a number of phage proteins. The function of this family is unknown. 25.00 25.00 32.70 32.70 22.50 22.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.83 0.72 -3.83 4 313 2009-01-15 18:05:59 2003-07-18 11:23:49 6 1 192 0 4 97 0 84.90 65 99.43 CHANGED MYYKIG-lppKlIsVsGFDFKLtVhKpchuIpIpVhDhpsssIcuhhVsDENDLYhAhDlhpQuI.EWIEpNTDEQD+LINLVM+W .MYYchG-lppKlIsVsGFDFKLtlhKpchuIpIpVhDhpsssIcuhhVsDENDLYhAhDlhpQuI.EWIEENTDEQDRLINLVMKW.... 0 2 2 4 +6360 PF06532 DUF1109 Protein of unknown function (DUF1109) Moxon SJ anon Pfam-B_17952 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.60 21.60 21.90 21.90 21.30 21.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.82 0.71 -11.38 0.71 -4.90 54 293 2009-01-15 18:05:59 2003-07-18 11:29:26 6 3 227 0 122 289 32 192.70 31 93.87 CHANGED LusshpPlpthshspphuhuhhsuhssuhllhhhh..hGlRsDlspshtsshFahKhuhshsLulsAhhssh+LuRPs.uptttthhhlslshshlhlsuhhplhss.sssthh.shlhGpshh..hChhsIsllulPshsuhhhulRp.hAPoc.thAGAsuGLsAGuluAhsYuhaCs-sussFlulWYsluhhlsuhlGAllGsRlLRW .............................................LssphpPVpphshtphhhhshlhuhs..sussl.hhh..h..h.G.h.R.s...Dlsts.h.t.s.shF......hhKhshs..hhlussuhhshhtLuRPt..ptshh.hh.hl.sls..hsslhh.uuhhphhts....sst.sh..shlhG.tshh.......hC.hhIsllSlPsh.suhl.hulRp.hAPs+PslAGhsAGLsuGuhushl.Yu.haCs-suhsFlulWYslulshssslGAlhG.hhLRW......................... 0 29 59 86 +6361 PF06533 DUF1110 Protein of unknown function (DUF1110) Moxon SJ anon Pfam-B_18243 (release 10.0) Family This family consists of hypothetical proteins specific to Oryza sativa. One sequence (Swiss:Q943P1) appears to be tandemly repeated. 24.10 24.10 24.20 24.40 24.00 24.00 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.21 0.71 -4.48 6 38 2009-01-15 18:05:59 2003-07-18 11:34:57 7 3 6 0 21 23 0 156.90 35 84.03 CHANGED MAAE..AWRuRFRcRVsEAAp...RhEsVcEsLAsAhsHL...susMlAuD..tAAAARsRIQLAhGtLt-ASpsLA.AMSlMpuAcLLsh+.Gs.hs..h.httIupLGDp....YLAE+sAshKL+tAtcDAc-AastVDtCRGHLDAlLLLLDH.s+LPuVsshI-pERh.AAsuDLpAAIGpschGsEhAVsARQDVSG ...............c...sW+shF.pRVs.sst...phctlpt.Ltss..tl.....s.h.s..........Asssts+h....ttL.pASppLu.AhuhMtuAcLLAh+..Gsu.....sstt.hs....slspL....sDp....hh....s.ppAhh+LptAtpcAccAastl-tCRGHLsAlhhLLc+....thssVpshlptEhh.sAss.LpsA............................................................. 0 0 1 13 +6362 PF06534 RGM_C Repulsive guidance molecule (RGM) C-terminus Moxon SJ anon Pfam-B_18263 (release 10.0) Family This family consists of several mammalian and one bird sequence from Gallus gallus (Chicken). This family represents the C-terminal region of several sequences but in others it represents the full protein. All of the mammalian proteins are hypothetical and have no known function but Swiss:Q8JG54 from the chicken is annotated as being a repulsive guidance molecule (RGM). RGM is a GPI-linked axon guidance molecule of the retinotectal system. RGM is repulsive for a subset of axons, those from the temporal half of the retina. Temporal retinal axons invade the anterior optic tectum in a superficial layer, and encounter RGM expressed in a gradient with increasing concentration along the anterior-posterior axis. Temporal axons are able to receive posterior-dependent information by sensing gradients or concentrations of guidance cues. Thus, RGM is likely to provide positional information for temporal axons invading the optic tectum in the stratum opticum [1]. 21.60 21.60 21.60 21.60 21.10 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.40 0.71 -5.09 23 234 2009-01-15 18:05:59 2003-07-18 12:07:50 8 6 64 0 115 193 1 170.90 49 43.56 CHANGED Cs-QKlYpApoD-..LPuAFsDGopsGG-............psspoL...........cIhEpssG...pHVEIpA+YIGTTIlVRQlGpYLThAlRhPE-lspshp.-ppDlp..LClpGCPtsppIDhpphhtps.....................t.st......shshc....sAps+C+-p........hsVpDhYFpSCVFDLLTTGD...ssFThAAhsAhcDsctLcPspp..+h+l .................CsDQKVYQApsD-..LPAAFsDGSpsGGD...........ttsusSL...........pIsE+ssG...pHVEIpA+YIGTTIlVRQlG+YLTFAlRMPE-lspuh-...-ppsLp......LClpGCPhsppI-hpthptps.......................s......s..sash-....oAps+C+Ep........LPVEDlYFQSCVFDLLTTGD...sNFThAAasAL.EDl+hL+sspc+hH........................................................... 1 17 28 67 +6363 PF06535 RGM_N Repulsive guidance molecule (RGM) N-terminus Moxon SJ anon Pfam-B_18263 (release 10.0) Family This family consists of the N-terminal region of several mammalian and one bird sequence from Gallus gallus (Chicken). All of the mammalian proteins are hypothetical and have no known function but Swiss:Q8JG54 from the chicken is annotated as being a repulsive guidance molecule (RGM). RGM is a GPI-linked axon guidance molecule of the retinotectal system. RGM is repulsive for a subset of axons, those from the temporal half of the retina. Temporal retinal axons invade the anterior optic tectum in a superficial layer, and encounter RGM expressed in a gradient with increasing concentration along the anterior-posterior axis. Temporal axons are able to receive posterior-dependent information by sensing gradients or concentrations of guidance cues. Thus, RGM is likely to provide positional information for temporal axons invading the optic tectum in the stratum opticum [1]. 24.00 24.00 24.30 24.00 21.50 23.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.31 0.71 -4.23 14 210 2009-01-15 18:05:59 2003-07-18 12:35:16 7 4 61 0 111 164 0 164.30 57 41.22 CHANGED C+lp.+Csupaspsp.p.s..htt................sstaCpuLRsYshChpRTARsCRGs..LsYHSulptlpcLhppaNCScsusTsps....................s.tsss.csCsY....................sp......apaCGLFGDPHLRTFpccFQTC+VpGAWPLIDNpYLsVQVTNsPVs.GSs....ATAToKlTlIhKs ...............................................C+I..+Csu-ahusTut.ps.hh.s.s......................ss-aCpALRuYAhCT+RT.A.RsCR.Gc..LsYHSAVhGIcDLMsQ+NCS+-GPTSps.......................................s..ptpsss-.C..pYcpp.......................httcsssPs......YhHCGLFGDPHLRTFpDcFQTCK......VpGAWPLIDNsYLsVQVTNsPV.lsGSu....ATATsK...lTIIFKs....... 0 14 25 63 +6364 PF06536 Av_adeno_fibre Avian adenovirus fibre Vella Briffa B anon Pfam-B_16053 (release 10.0) Family This family contains avian adenovirus fibre proteins, which have been linked to variations in virulence [1]. Avian adenoviruses possess penton capsomers that consist of a pentameric base associated with two fibres [2]. 24.90 24.90 25.00 82.70 18.50 24.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.38 0.72 -4.29 6 42 2009-01-15 18:05:59 2003-07-18 14:02:22 6 1 6 6 0 36 0 96.10 66 20.54 CHANGED YhsSGsusLppaTA+u.NSSs.uFsCAYYLQQW.opGLlhoSLYLKLDpsphushPostsspNA+aFTFWVuua.pphN.StIpssTloPSTsphss TFVSGSsSLsoYNAshVNSSupsFSCAYYLQQWNlQGLLhTSLYLKLDSsTMGsRPGDhsSsNAKWFTFWVSAYLQQCNPSGIQAGTVSPSTATLo........... 0 0 0 0 +6365 PF06537 DUF1111 Protein of unknown function (DUF1111) Moxon SJ anon Pfam-B_16636 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 499 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.50 0.70 -6.22 6 817 2012-10-03 10:02:11 2003-07-18 14:21:28 6 16 490 0 304 843 222 260.40 28 76.15 CHANGED YTcloA.GGcTT.TFDASsSG.HGFSTPAsNLssspLA.HLpGDtpFETuFTTAPNuEH......PELDGLGPVFNNADCNSCHQRDGRNSTPpluuGpsRVKLGS-....AGIFLRIStAsspsChpG...oAsNNYCAPIsVPsFGuQLFH......RGVLpARsDWQpN.FhGQADVYLSYEhpoVoYs..........DGopVoLKKPlFpVENPYDAPGEoctSsNlTSsLLQsDVL......................MGWRNGMPVFGLGLLEAIuEAsILAhVDEsDoNpDGISGRANaVFDAlKAQuGDspPVSLGRFGWKANTPSVRVQSLGALRGDhGITNPLF..P-ESItGTuLHDSYLTRTGFVDTGss.sGtPEASAEFSDs....VVFYAETLAVPARRNVssspVREGA+LFsQlNCouCHsPoFsTKsSG-..lGGhPMs-uLKGQTIYPFoDMLLHDMGEGLADuRPDFLAoGsEWRTRPLWGIGLTQTVNPQAGFLHDGRAATLEEAILWHGGEAptSppsFMuLops-RuQLlsFLMSL ............................................................................t...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...............h.h..a.t.l...usP..t.c................t................h................t................t.....t.s......GtthF..p......htC..CHhsp.h.T......................................................................p............l.P.aoDhllHDhG........t..h..........sD...........t.............h..............p...................s.....p....ut...........a.RTsPLWG....hG....h..............................t.....................s...t...t....h.L.HDGRA.csh.EA..lhWH.......u.....G.............-ut......utp.h.t.hs.tpRttlltFlpu....................................................................................... 0 99 179 252 +6368 PF06540 GMAP Galanin message associated peptide (GMAP) Moxon SJ anon Pfam-B_16759 (release 10.0) Family This family consists of several galanin message associated peptides. In rat preprogalanin, galanin is C-terminally flanked by a 60 amino acid long peptide: galanin message-associated peptide (GMAP). GMAP sequences in different species show high degree of homology, but the biological function of this family is unknown [1]. 20.40 20.40 20.40 35.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.01 0.72 -4.25 6 53 2009-01-15 18:05:59 2003-07-18 14:33:41 6 4 37 0 24 47 0 56.90 53 47.48 CHANGED GKRELpPE.--h+PGuhDRsluEsNlVRTIlEFLoFLHLKEAGALDpLPslP.AtSuEDspcS ...GKRELpsE.--h+sG....uhc.Rsls.-sNIVRTIIEFLoaLHLK.Es...GAL-pLP....sh..SsE-ht.................. 1 1 3 7 +6369 PF06541 DUF1113 Protein of unknown function (DUF1113) Moxon SJ anon Pfam-B_17933 (release 10.0) Family This family consists of several bacterial proteins of unknown function. 19.70 19.70 20.10 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.08 0.71 -4.47 52 996 2009-01-15 18:05:59 2003-07-18 14:35:25 6 5 523 0 214 836 24 148.40 27 59.94 CHANGED hhhFhlYSFlGWlhEsl...asu.lpc..++al.NRGFLhGPhCPlYGhGulhll...hhLtthpp..........shhhlFlhuhllsoslEYlsuallEplFpt+.aWDYSchhhN.........lpGRlCLhhSlhWGlhullhlchlpPhltphlphlPhhht.hhlshllhlhhlhDhlholh ..........................................................hhFhlauhlGWhhEsh....a...ss....lpp.......tc.ah....tcGh..Lh.G.P....hs...P...lY.GhGslh..lh.......hh..lp.hpp.....................shhhl.alhu...hllso...slEYlouhhlEt...hF...p.hp...hWDYSs.h...hN.........lpG..p...lsL.hulhW.Glhull....h...lphlpPhl..ph.h.t...h.l....h.hh...hhshhhhhhhhhDhhhoh.h........................ 2 87 144 175 +6370 PF06542 PHA-1 DUF1114; Regulator protein PHA-1 Vella Briffa B, Sammut SJ, Pollington J anon Pfam-B_16084 (release 10.0) Family This family represents the protein product of the gene pha-1 which coordinates with lin-35 Rb during animal development. The protein is expressed during embryonic development and functions in the cytoplasm. PHA-1 acts in a parallel pathway with UBC-18 to regulate the activity of a common cellular target [1]. 21.50 21.50 23.00 22.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.75 0.70 -5.75 8 118 2009-01-15 18:05:59 2003-07-18 15:08:16 6 3 5 0 117 123 0 344.80 28 74.95 CHANGED VN+oFNhullcpIRp-acpVll+s....hcp..Dhtpc.........pp.spulhlNsp+lphpphp...pahRFLKpss+I+Vp+lhlc-h.....cphshps-l..HpsIhcsLlsssh..pplcEFhGhssIC.pGChtCtcIutsCpsYGPlQhsh...hhttc+HF-tLpls-hhlthlh..........lps.p.s.......thh-phIssploCD+LplhLsshh..pthsh......lsR-llDhlltpWpVKoVcl........tah............hptphshlphppchhs..hpTpPtupa.sL-pV-Islphu...spshtp.h+ppt....Sl.sF-NhIuNV+RIFPTpclplcLPppl..h.hs.ssh-cFlpsllcMs.+-sp....RNS+Ish+La....sppl..ph...........lsshhpcspsh+...........................p.l..h..........p.ph.pupsaph ................................................lNKshN..hLphlRppapphplch..............................ps..hlaI..Nh++lp.ppl..........................sa...F...FLspsstV+V....cclhhcpl.......................t..htpph....HchIhppLIG.sst..................pplpplIGh-...-l.C..tG....C..pC....plApc..Ch.-.YGPlphps..hpphp..sppa+cLploDtLhcpIAp..........lppspppp......shppLsphIhs..sISCDpLslhl..s.E....p..h............pps..........................hPREVl-hll+KWsVKSlcl............phhpp............tWhphs...hFTplc...h.sc.......h........ppppsch....+hp+VpVsls.S..hsp.sh.hp....ht........ts.apNhIsNlRRhF.scplohp.hs+hh.....h...slcchhpsllphhph-pp....ppLpl.shpha................................................................................................hp....h....h........................................................................................................ 0 15 16 117 +6371 PF06543 Lac_bphage_repr Lactococcus bacteriophage repressor Vella Briffa B anon Pfam-B_16088 (release 10.0) Family This family represents the C-terminus of Lactococcus bacteriophage repressor proteins. 20.10 20.10 20.10 20.40 19.50 20.00 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.43 0.72 -4.43 4 31 2009-09-11 15:56:52 2003-07-18 15:42:07 7 3 23 0 5 22 0 49.90 55 35.06 CHANGED cpppcslDLADlV........DDsKVDWDcWVSFDG+PLoDEVKcAMKthhGKcLpD .....ppEslDLAcLV........DDsK.VDWD+WVSFDG+PLTD-VKpAhKhlhGK+LpD... 0 2 2 4 +6372 PF06544 DUF1115 Protein of unknown function (DUF1115) Vella Briffa B, Sammut SJ anon Pfam-B_16104 (release 10.0) Family This family represents the C-terminus of hypothetical eukaryotic proteins of unknown function. 27.60 27.60 27.80 27.80 27.50 27.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.61 0.71 -4.46 37 568 2009-01-15 18:05:59 2003-07-18 15:54:08 7 14 302 0 389 542 2 126.60 26 28.30 CHANGED hhsa+I....ts.p+Rh+lhpsA+phpLoGhsl.hs..psullsVEGspcshccah+h.l++lpW......tt.tchptppppsp.h................................p...hhhps..tp+pF.pta.p.t..............s.s.sphhchLpc.......ps.stphaphsh ..............................................hsa+l...h.t.s.s..p+RhKlppsAppLtLTGhsl.h..........t......c......ss............lllVEG..spcu...hccat+Lhl+RlpW...................pp.ptppp-s.tp.t...........................................................h.hhhc..u..t.....p..pR..tF...ppa..p.p.................p.s.sthpchLpc........ps.hpchaphh.................................................... 0 135 200 296 +6373 PF06545 DUF1116 Protein of unknown function (DUF1116) Vella Briffa B anon Pfam-B_16143 (release 10.0) Family This family contains hypothetical bacterial proteins of unknown function. 20.10 20.10 21.00 20.50 18.90 18.00 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.28 0.70 -5.25 21 940 2009-01-15 18:05:59 2003-07-18 16:05:16 6 5 543 4 68 423 39 213.70 60 44.22 CHANGED lphtPCHcasAVGPMAGlhSsSMPlalVcs.pstGscuassLNEGlG.KlLRaGAhsp-VlpRLcWhccsLuPsLppAlt.....tpGsl-LpslhApALpMGDEsHsRNtAuosLLl+pLsPhll....p.sshspppht-VhpFlussDtFFLNLsMAAsKshh-AAp...slssSolVTsMuRNGscFGIRluGh.GscWFTuPAssspGhhFsGaut-DAssDlGDSA ...........lsF.PCHchsAVGPMuGlTSASM.MhlVcN....tT.GNcAYsNhsEGlG.KVlRFGA.spsVlsRLpWMRDVLuPhLpsAlt.....httsIDLpsMhAQulpMGDEhHpRNhAuosLLhpALsPtIh.........p..hsasp.....pphtEVh-FlusoD.FFLslhMAhCKAAMDAut...tIctuolVTsMsRNGspFGlRVSGL.G.spWFTuPsp.p.s.pGhhF.sG.as.EDussDhGDSA........................... 1 30 46 57 +6374 PF06546 Vert_HS_TF Vertebrate heat shock transcription factor Vella Briffa B anon Pfam-B_16244 (release 10.0) Family This family represents the C-terminal region of vertebrate heat shock transcription factors. Heat shock transcription factors regulate the expression of heat shock proteins - a set of proteins that protect the cell from damage caused by stress and aid the cell's recovery after the removal of stress [1]. This C-terminal region is found with the N-terminal Pfam:PF00447, and may contain a three-stranded coiled-coil trimerisation domain and a CE2 regulatory region, the latter of which is involved in sustained heat shock response [1]. 20.50 20.50 21.00 20.70 20.00 19.90 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -12.22 0.70 -4.99 6 211 2009-01-15 18:05:59 2003-07-18 17:17:15 6 3 60 0 80 199 0 220.80 29 52.29 CHANGED uaSuoshhuPDssspsGPIISDlT......ELspuSPssosssSl-sp...sS.PllhIKE.....EPsSPupSPc.sE..P.t.phssGssh.ssT.hSPsshls..SILpEs-Ps............................s.phCLSVACLDK..................................sELpDHLDoIDssL-sLQsMLSu+uFSlDoosLhDLFSsSlsh..sDhslPDhcsSLASIpchLSstc.....stspEupsupscosKQLlpYTA.PLhhh............suus-s.uSsDhPh.h.ELt-uShhop..t..E.PT.uLLs.p.pPhsc-PshS ............................................................................................................................sthht.ct..hpss.II.DlT.........................E..h...s.P....ss....sh......-..p.............t..P.lh..lhE.....................E.sss.....pu....sp.pp...........s................sps....sp....s.h...s.sss.ls..S.Lppp.psss...s.........................................hs..slshL..s+...............................................................................................................sEL.DaLDuhDssL-shQshLsuctaolDss.hL.hDlFssuh........h..............sshtu.pp.hhps.........h..p...s...p..p......s....sppLlpYT.u.P.hhh....................s.s.....ss...s-hPh.h.EL......tss.h..p..............Po.uhL..............t.................................................................. 0 6 11 38 +6375 PF06547 DUF1117 Protein of unknown function (DUF1117) Vella Briffa B anon Pfam-B_16251 (release 10.0) Family This family represents the C-terminus of a number of hypothetical plant proteins. 25.00 25.00 33.40 33.20 21.60 19.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -11.02 0.71 -3.99 7 55 2009-01-15 18:05:59 2003-07-21 17:18:57 7 5 15 0 37 55 0 107.60 53 28.91 CHANGED sVG.LTIWRLPGGGFAVGRF..uGGpRuGERpLPVVYTEM.DGGhNss...suPRRISW....uSR.....ssRSpER.tG.ltRhFRNhhuhFuphpsupSsSut.tStssp..p.....pspp..olhSpSsRRR ..sVG.LTIWRLPGGGFAVGRF..uGGtRsuERc..L..PVVYTEM.DGGFNsu.........uuPRR..I.SW.....ssp............su+upps...tGhltRhFRNhFusFGR..hpsss.S.s.Sup..t.ut........................................................... 0 3 22 31 +6376 PF06548 Kinesin-related Kinesin-related Vella Briffa B anon Pfam-B_16517 (release 10.0) Family This family represents a region within kinesin-related proteins from higher plants. Many family members also contain the Pfam:PF00225 domain. Kinesins are ATP-driven microtubule motor proteins that produce directed force [1]. Some family members are associated with the phragmoplast, a structure composed mainly of microtubules that executes cytokinesis in higher plants [2]. 24.00 24.00 24.60 24.40 22.50 21.90 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.78 0.70 -5.94 10 95 2012-10-05 12:31:09 2003-07-22 10:00:52 6 7 24 0 51 91 0 320.50 36 43.48 CHANGED uPT-SLAASLQRGLcII-.HppssAsRRSoVuhSFcphshpPs.tsssKlsuuVQo.Pc-ptspt...s.hLCssC+pptsssus.p-.ssh.p..h..hsss.............KuspKVlttAl+REppLEphCs-QAuKIpQL.............oullsQh+csp..c.pp......spt...............lL+pt.p.scsEslppphEsKclpEEhc.shpph.h-luE+EsLLcEIp-LKsQLp...hssotoosphpuSLLthohQlRp......sh..ttsscstpcsL-cER.+WTEuEScWISLTEELRl-LEusRphAEKhchELcpEK+CoEELcDALpRAMhGHARhlEQYsELQEKYs-LLp+HRplh-GIsDVKKAAAKAGhKG.sGoRFAsALAAELSALRlEREKERchLKcENKuL+hQLRDTAEAVQAAGELLVRLREAEEAsolAcERsstsEpEs-KLKKQlEKLK+KH-pElsThKQ...hLAES+LP .........................................................................t..p.pLAuSlp+Glplh-.apts..h.ppu.hthsht.ht.p.........phssth.................hh...sp......t................................................................pt...h.t.s..cph.hct.C.cQsscI.pL..............................................................................pth.pc......tph...hs-+E.L.tEItpL+tpLp.............p.t.t...........h....p.p.............................ps.....t......h.E.EScWlsLs--LRh-l-sp+.hhtc.p.ELp.EKhsstElp-AhphAh.GHuRhlEpYs-LpEha.tLhthHphh.tGlt-VKptAu+A.uh+G.tt.tFhtuhusElosl+hp..+E.......ppNctLp.QLpDTAEAVpAAGELLVRL+EAEcAhs.upcphh.hppEspchhpph-cLK++ac.Eh.shpphL.-uph............... 0 7 26 35 +6377 PF06549 DUF1118 Protein of unknown function (DUF1118) Moxon SJ anon Pfam-B_17963 (release 10.0) Family This family consists of several hypothetical plant proteins of unknown function. 22.50 22.50 22.60 22.90 22.40 22.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.54 0.71 -3.95 5 63 2009-01-15 18:05:59 2003-07-22 11:33:40 7 3 28 0 33 64 1 105.80 48 53.76 CHANGED +LEK+KVLSsVEKoGLLS+AEcLGlTLSSlE+LGLLSKAEDLGLLSLlENsAuhSPuALASlALPLLlAAIAAVVLVPDDSssLVAlQAVlAuALVlGGouLFVGSVVLuGLQESD ..................plEchKlLopsEKAGLLStAEchGloLSslE+.LGLLSKAE-L..GlLShs..ps.u....oPus..Lho..lA.LsLLlAussslhlVP-Dssh.VslQsV.lAhshslGuuuhasuSsllusLQpu........ 0 13 24 29 +6378 PF06550 DUF1119 Protein of unknown function (DUF1119) Moxon SJ anon Pfam-B_17985 (release 10.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.18 0.70 -5.27 5 64 2012-10-02 13:41:03 2003-07-22 11:36:36 6 2 60 0 45 154 100 273.30 36 93.22 CHANGED suLFVlVQlLALLLusPhpsuEhQ..AaEsPTpslNSIa.YIVhlLlFThFVLIsIK+cKKWlIptlIYlslVu.VlaYVFhlLLoll.slAGhlNllSlllAIuLshLLYtYPEWYVIDlAGshlAsGlSALaGISFGVlPAVVLLIlLAVYDAISVYKTKHMIoLAcGVMDL+LPILFVIPcspsYSFls-oF-su--tEA..........tssaFIGLGDAVMPSILVVSAAlFl-o..l.uGlpYssLsAMuGTLlGhlVLhhhVhRGRPpAGLPaLNoGAIAGFLIGsLhuGl ....................................................................h..hhlhlQlhALhLs.....shtt.sshp..shEsPpsssNslh.Ylshl.Llh.TuhhLhhh+..hshphllphll....hhshhh..hhhYVa..tsll.s...hh..............h...h..................hhuhlhu.l...u.ls.sh.L...h....h.YP.E....WYV.ID.hsGllhu..A.G.su.ulFGISh.u.ll.P....s.llLLslLAVYDAISVYtT....cH........MlsLAcG.....Vh.-h....+lPllhVlPpphsY..Sahc.....t.....s.....htt.tt.pt.t.t...........................................................tsAaFlGlGDslhPolLVs........SAsh..F.....h..s.s.....s......l.............h...l...s....l..P..u...L.s.AhlG...ol.hGh.h.l.L.h.h.h.V.h.+.G+spAGLPhLNuGAIhGallGslhs........................................................................... 0 10 37 42 +6379 PF06551 DUF1120 Protein of unknown function (DUF1120) Moxon SJ anon Pfam-B_17948 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild --amino -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.42 0.71 -3.78 7 319 2012-10-02 17:35:21 2003-07-22 11:42:21 7 1 164 0 52 239 31 98.50 32 43.39 CHANGED LKVTGplssuACTPpLoGGGsVDaGphpsssLpss....chspLGhKsholsIoCsusstlAhsupDsRtsT.......t..tsshsh....shs..Gtt..s.ssttFGLGpossGtKIGAau .......LKVpGpls.suuCTsp..l.s.s.GG.llDaGpls...supL.ps.T......p.s.s.pL...u.p.+.s.hol.oIsC.s.u..sTh...luaossDNRssS.......................................GlG.................................................................................... 1 7 17 40 +6380 PF06552 TOM20_plant Plant specific mitochondrial import receptor subunit TOM20 Moxon SJ anon Pfam-B_17991 (release 10.0) Family This family consists of several plant specific mitochondrial import receptor subunit TOM20 (translocase of outer membrane 20 kDa subunit) proteins. Most mitochondrial proteins are encoded by the nuclear genome, and are synthesised in the cytosol. TOM20 is a general import receptor that binds to mitochondrial pre-sequences in the early step of protein import into the mitochondria [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.21 0.71 -4.70 6 100 2012-10-11 20:01:00 2003-07-22 11:48:36 7 11 46 1 51 128 5 147.30 31 55.26 CHANGED FDRLlhFEHsRKsAEsTYtpNPLDADNLTRWGGALLELSQFQsls-uKpMIpDAISKLEEALlIsPtKH-AlWClGNAaTSauFLosD.sEA+.pF-hAopaFQ.AhsppPsNplY+KSLEhssKAPcLHhthhppt.hp..hGst........ssussSsKohKpKKoS-hKYDlhGWVILAsGVVAWluFAK ..............................................hF-.spctuptthtpsP.D..s-s...LspWGtALlELup...h....pps.....-u....h.p.hlp-AlsKhccALtlsPppa-AlaslGsA........houhu.......h.hp....s-....p....tpA....pphFcc....AsphFppAhs.cP...ssphYppuLph...................................................................................................................................................... 0 16 36 43 +6381 PF06553 BNIP3 BNIP3 Moxon SJ anon Pfam-B_18014 (release 10.0) Family This family consists of several mammalian specific BCL2/adenovirus E1B 19-kDa protein-interacting protein 3 or BNIP3 sequences. BNIP3 belongs to the Bcl-2 homology 3 (BH3)-only family, a Bcl-2-related family possessing an atypical Bcl-2 homology 3 (BH3) domain, which regulates PCD from mitochondrial sites by selective Bcl-2/Bcl-XL interactions. BNIP3 family members contain a C-terminal transmembrane domain that is required for their mitochondrial localisation, homodimerisation, as well as regulation of their pro-apoptotic activities. BNIP3-mediated apoptosis has been reported to be independent of caspase activation and cytochrome c release and is characterised by early plasma membrane and mitochondrial damage, prior to the appearance of chromatin condensation or DNA fragmentation [1]. 26.00 26.00 55.60 29.30 25.90 25.90 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.48 0.71 -4.93 14 210 2009-01-15 18:05:59 2003-07-22 11:56:35 7 2 85 6 101 187 0 181.60 45 95.99 CHANGED MSssttss................EsuLpsSWVELphsssssspss...............................st.p.lPssSp..sG-hE+lLL-AQ+Epup.SSR....sSSps-SP.ps.oP..pssth.hssp..spps...ppp-cshct..t......KssDWlhDWSSRPENlPPK-FhFcH........PK+.........osshShRpotshKc.ulFSs-hLhlhlPollLSHlLulGlGlaIGKRLsh.spsoh ..................................................ttLpuSWVELphssssss.ss.......................................................................................................s...t.p.lPuosu.hsGD.hEKILLDAQHESup.SS+.......sSS+CD..S..........P.ts.ps..pp..sst...hssc..psp+s..soQSEE-shEtc+-h-.........hpKsuDWlhDWSSR..P..ENlPP.K..EF.hF+H.....................P..KR.........osoLSMRpous..M...KKGGlFSuEFLKlFlPSLlLSHl.LA.lGLGlYIG+RLssso.s..h............................ 0 17 28 61 +6382 PF06554 Olfactory_mark Olfactory marker protein Moxon SJ anon Pfam-B_18049 (release 10.0) Family This family consists of several olfactory marker proteins. Expression of the olfactory marker protein (OMP) is highly restricted to mature olfactory receptor neurons in virtually all vertebrate species from fish to man. 25.00 25.00 76.60 76.50 19.10 17.70 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.74 0.71 -4.68 4 40 2009-01-15 18:05:59 2003-07-22 12:01:40 7 1 27 7 9 43 0 138.60 65 94.58 CHANGED hELsFs.DhQLTchMRLRVQSLQQ+GpK+QDGE+LL+ssEpVYRLDF.ScQcLpFspWNVsLpuPG+lsITGTSQhWTPDLTpLMTRQLLEPsulFWRps...-s-slcC.EADAQEFGERIAELAKlRKVMYFLIsFt-GssPtslcCSlsFpp ........h-hshs.D.pLTc.MRLRVpSLpQRGpKRQDGE+LLpPsEuVYRLDF.ppQ.+.LpFpRWsVsLcpPGKVTITGTSQhWTPDLTNLMTRQLL-PsAlFWRc-...-s-shchpEADA.EFGERluELAKlRKVMYFLhsFt-GscPsslcsSlsFp... 0 1 4 8 +6384 PF06556 ASFV_p27 IAP-like protein p27 C-terminus Moxon SJ anon Pfam-B_18169 (release 10.0) Family This family represents the C-terminal region of the African swine fever virus IAP-like protein p27. This family is found in conjunction with Pfam:PF00653. It has been suggested that the family may be a host range gene involved in aspects of infection in the arthropod host, ticks of the genus Ornithodoros [1]. 25.00 25.00 26.80 26.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.89 0.71 -4.25 2 20 2009-01-15 18:05:59 2003-07-22 12:41:11 6 2 16 0 0 9 0 129.00 90 57.50 CHANGED ATLGIIGLKKMIDSYNDYasNEV.VKHKNRVYTHKRLEDMGFSKsFMpFILANAFhPPYRKYIHKIILN-RYFTFKFsAaLLSFHKVNLDNQhTYCMTCGIE.IpKDENFCNACKsLNYKHYKhLNFSVKL ...........ATLGIIGLKKMIDSYNDYYNNEVFVKHKNRVYTH.KRLEDMGFSKPFMRFILANAFIPPYRKYIHKIILNERYFTFKFAAHLLSFHKVNLDNQTTYCMTCGIEPIKKDENFCNACKTLNYKHYKTLNFSVKL........... 0 0 0 0 +6385 PF06557 DUF1122 Protein of unknown function (DUF1122) Moxon SJ anon Pfam-B_18183 (release 10.0) Family This family consists of several hypothetical archaeal and bacterial proteins of unknown function. 25.00 25.00 33.90 65.80 24.60 23.60 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.16 0.71 -5.10 6 37 2009-01-15 18:05:59 2003-07-22 12:43:05 6 1 37 3 20 29 0 155.50 43 86.76 CHANGED lG.pl...pLcu.clhQs+hpE.psFplhl...ss+..pLspshhFpGRt.YYhPWIEI.shsP.LR........ptslEscLacFlhshLssuG+lFVpYhcD+EThptL.+GhsPAsTcLGFpLLKtGFTWFKsWYaPEGh.EGG.KlQApKPLoc-ccpRpLppLhcElK.phltph.sStlt ....hhhht.......s.sl+ps+hpE.hshpLhl....ssc..plspsphFpGRs.YYsPWlEl.sh.Phhp.........spslEcphacllhpaLsPGu+LFVpYlcDpETtptL.+GhsPs-T.LGhpLLpsGFTWFKDWYFPEGG.EGs.KLQusKPlst..th+pLp.lht-lc..p.h.........t.......... 0 7 10 16 +6386 PF06558 SecM Secretion monitor precursor protein (SecM) Moxon SJ anon Pfam-B_18197 (release 10.0) Family This family consists of several bacterial Secretion monitor precursor (SecM) proteins. SecM is known to regulate SecA expression. The eubacterial protein secretion machinery consists of a number of soluble and membrane associated components. One critical element is SecA ATPase, which acts as a molecular motor to promote protein secretion at translocation sites that consist of SecYE, the SecA receptor, and SecG and SecDFyajC proteins, which regulate SecA membrane cycling [1]. 25.00 25.00 28.50 28.50 24.80 24.70 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.94 0.71 -4.53 10 529 2009-01-15 18:05:59 2003-07-22 12:49:01 7 1 522 0 41 182 1 145.30 72 84.14 CHANGED MVAASLGLPsLssuupsss..Pupuoops+psSspssFssLALLcsssRRP..oFoVDYWHQHAIRTVIRHLSFAh.APQslPsupEs.......sPLQsQHLALLDTLsALLTQEupPPsllR...phspssFhP...usaplulWIuQlQGIRAGPptLS ..............MVAASLGLPALS.NAAEsNA...PAKATopNHpsS.........AKVNFuQLA.....LLEAssRRP.......Na.SVDYWHQHAIRTVIRHLSFAM...AP..QTLP..VAEEo........hPLQ.AQHLAL...LDTLSALLTQEG..TPsptsh....Rl-aAaFsP..QAp.FSoPlW.ISQAQGIRAGPQRL.... 0 1 9 24 +6387 PF06559 DCD 2'-deoxycytidine 5'-triphosphate deaminase (DCD) Moxon SJ anon Pfam-B_18211 (release 10.0) Family This family consists of several bacterial 2'-deoxycytidine 5'-triphosphate deaminase proteins (EC:3.5.4.13). 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.94 0.70 -6.07 17 532 2012-10-03 01:18:03 2003-07-22 12:52:18 6 3 521 4 109 933 819 232.00 47 87.95 CHANGED hstsh.....sGILP-csItshhusGtIssshshsssQIQPASLDLRLGspAaRVRASFLPGsuRoVs-RL.--hthHclDLocGAVLETGCVYlVPLhEuLuLPsslsAsANPKSSTGRLDlFTRlITDputtFDplsAGYsGPLYsEISPRTFslLVRsGsRLsQlRFRpGpshLs-s-LttlHtppsLssu..tsh.ssGlulSVDLtuptss.LVGYRAK+HoGVlDlD+hGsacst-FWEPlhspsup...LILDPGtFYILsS+EAVplPPshAAEMsPasshVGEFRVHYAGFFDPGFGpssAGGsGSRuVLEVRuHEsPFlLEHGQsVGRLVYE+MsstPssLYGtsluSNYQuQGLKLSKHFtu ..............................................................................p...........LsDpDIc.Ahl-.pG..t.luh...s.s.t.s...s...-pIpsAolDlRLGs..+...s..a.R.s....+....Au....F......L..s.....G.P.p.c.....p.V.us.t.L..-+..l....h..........c...E......I...s....L..s...-..G....t..s..L..c.s......Gplh.lss.......hLESlsLP.u-lsuhhsu+SShuRLsLhs+VhAc..............................................................................................................................................................................................................................................................................................................................h............................................... 0 27 64 85 +6388 PF06560 GPI Glucose-6-phosphate isomerase (GPI) Moxon SJ anon Pfam-B_18250 (release 10.0) Family This family consists of several bacterial and archaeal glucose-6-phosphate isomerase (GPI) proteins (EC:5.3.1.9). 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.26 0.71 -5.26 6 215 2012-10-10 13:59:34 2003-07-22 12:56:42 6 3 164 25 77 297 86 161.90 26 66.57 CHANGED VR+hpDhtulhhDEpsapth.E.hsc-ssY-Vp-htps-c-u-......LpaslThl.PGplGcEahhTpGHaHsp.....ss+PElY.slcGpushLLQp.P-s..........-ltVltscttsslhVPPhauHpolNsG-ssLlhsshhsuDsupcY-sIs+tuGhphhllcsGt..h.ppssss.c .......................................................h.tt..thh.s.............t.sphhYp.l.h.thht.ppctp......L.aslThl.hPGplGp.Eh...hhTpG.HaHsh.........hsps....ElY.slpGcuhhl.L.Qc...-s............csh..s..lpsptGshlhl..PPsauHp....oINs.G.cp.....s.Ls..hushhsps..huhc..Ytslpptthht.hh..h.h.ppGt............................................. 0 22 51 67 +6391 PF06563 DUF1125 Protein of unknown function (DUF1125) Moxon SJ anon Pfam-B_18065 (release 10.0) Family This family consists of several short Lactococcus lactis and bacteriophage proteins. The function of this family is unknown. 25.00 25.00 28.70 28.30 24.10 23.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.84 0.72 -4.27 2 19 2009-01-15 18:05:59 2003-07-22 13:02:46 6 1 16 0 3 7 0 54.60 66 81.59 CHANGED MTV.lKsphsphIlpFchGsDIEsFSsuFLa+KIKhhcIKNt.-L.hhLEDTKND ..MTV.lKsphsphIlpFchGsDIEsFSsuFLa+KIKhhcIKNt.-L.hhLEDTKND 0 1 1 1 +6392 PF06564 YhjQ YhjQ protein Moxon SJ anon Pfam-B_18260 (release 10.0) Family This family consists of several bacterial YhjQ proteins. The function of this family is unknown. However, the family does contain a P-loop sequence motif suggesting a nucleotide binding function. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.51 0.70 -5.01 9 638 2012-10-05 12:31:09 2003-07-22 13:44:36 7 3 570 0 80 5749 2006 228.30 57 95.90 CHANGED MslLuLQGlRGGlGTTSlsAALuWALQhLGEsVLVIDhSPDNLLRlpFNlDacppcGWARA.LDGpsWp-uuhRYsstLDLLPFGpLsssEhEs.pt.hpsshutaspslptLpttupYcWlLlDLPt...GhSslTRphlphsDpsLsllpsDANCHlRLHQQsLPsusclLlNchphuSQLQpDLaQLWL.QoppcLLPl.llHRDEAhuEsLAuKQPLGEYRsDSLAAEElhTLANWCLlpht......puss .................................................................................MAlLGLQGV.RGGVGTT..T...lTA.A.L..A....W.u.L..Q.h....L..G.......E....N.V..LV..l..D.....A.......s...P.....D.....N......L.........L.........R....L........S......F........N.......V........D.......F.......s..........H.....p.......p........G........W.......A........R..........u.....h......L.........D.......G......p.......D...........W.......R..........D.........A.....G......L.........R.........Y.......T......S...........Q....................L.....D.....L....L........P...............F.....G........Q..........L....o.......h......p.....E......p.........E.......N..............s........Qp...........W....Q.......p......c.......L.......u.......-........I.........s...o....u....L.........Q.....p..L........K......A.............S.......G...R......Y..p..W..I..Ll..D.L.Pt........s.uS...l.T...+.Q.L..........l..S.L.C...DH.oL..A....l...l.......p............V............D..............A.............N............C.............H............I..........R.........L............H...........Q............Q..........A.......L...........P..........s..........G.........A.........H..........I........L.....I........N.....s...........h.......R.......I....G.........S....Q....l...........Q.D...D......l.....Y....Q....l....W..L....Q...S.......Q.....R....R.........L......L.....P.....h......l.......I.H......RDE.A.M.A.ECLA.u.K.Q.Pl..GEYRSD.uL..AAEElLTLANWCLLp.uG..hKo....................................................................................................................................................................................................... 0 5 16 51 +6393 PF06565 DUF1126 Repeat of unknown function (DUF1126) Moxon SJ anon Pfam-B_18695 (release 10.0) Family This family consists of several eukaryote specific repeats of around 35 residues in length. The function of this family is unknown. 21.20 21.20 21.90 21.20 18.70 19.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.06 0.72 -4.47 100 901 2009-01-15 18:05:59 2003-07-22 13:48:32 7 10 125 2 576 907 19 33.10 40 13.93 CHANGED llpYaLsDDTlplhE.shscNS.Gh.stGpaLcRp+ .......hlpYaLpDDTlplhE.sh..hcNS..Gh.stGph.l+Rp+.... 0 273 335 466 +6394 PF06566 Chon_Sulph_att Chondroitin sulphate attachment domain Vella Briffa B anon Pfam-B_16515 (release 10.0) Domain This family represents the chondroitin sulphate attachment domain of vertebrate neural transmembrane proteoglycans that contain EGF modules. Evidence has been accumulated to support the idea that neural proteoglycans are involved in various cellular events including mitogenesis, differentiation, axonal outgrowth and synaptogenesis [1]. This domain contains several potential sites of chondroitin sulphate attachment, as well as potential sites of N-linked glycosylation [2]. 25.00 25.00 71.60 70.70 22.40 21.40 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.84 0.70 -4.74 3 37 2009-09-11 06:23:37 2003-07-22 13:53:27 6 4 24 0 17 45 0 225.00 74 43.01 CHANGED lPsPEAGSAl..........EAEssV+SVPAWEsRANDTREcAGsPAAG-DET....ShpEsGSEpAsVGPGVGPEEuLEASAAVTuTAWLEA-SPG..............LGGVTAEAG.SGDoQuLPATLPTPDEALGoSosSsAlPEATEA........SsP......PSPuPGDKPSLlPELPKESPlEVWLNLGGSTPDPp.............uPEPTaPhQGTLEPpPASDIIDIDYFEGLDGEGRGsDhG+FPGSPGTSE+HPDsGGETPSWSLLDLYDDFTPFDESDFYPTTSFY....DDLEE ............................................-suSAh..........EA-t.h+us.shEs+ANsTp-tsu.PsAG---s....ohpt.G..up....u...........hGPEEs.LpASAAVTuTAWLEs-oPG..............LGGsT.sEsG.SGDsQuLPATL.sPcEsLspSshPPAhPEATEA........SsP......PSPTPG.....DK.SP.....usELP....KESPLEVWLNLGGSTPDPp.............GPEPTaPFQGTLEPQPASDIIDIDYFEGLDGEGRGADLGSFPGSPGTSEpHPD..T-GETPSWSLLDLYDDFTPFDESDFYPTTSFY....DDL-E........ 0 1 1 3 +6395 PF06567 Neural_ProG_Cyt Neural chondroitin sulphate proteoglycan cytoplasmic domain Vella Briffa B anon Pfam-B_16515 (release 10.0) Family This family represents the C-terminal cytoplasmic domain of vertebrate neural chondroitin sulphate proteoglycans that contain EGF modules. Evidence has been accumulated to support the idea that neural proteoglycans are involved in various cellular events including mitogenesis, differentiation, axonal outgrowth and synaptogenesis [1]. This domain contains a number of potential sites of phosphorylation by protein kinase C [2]. 29.40 29.40 36.20 37.20 21.80 29.30 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.78 0.71 -3.98 2 68 2009-01-15 18:05:59 2003-07-22 13:57:31 6 5 29 0 34 83 0 77.70 59 21.61 CHANGED LYLLKTENoKLR+T.KaRTPSELHNDNFSLSTIAEGSHPN...........................DDPuAPHKlQDPLKstLK-EEshsI..ShuPc.EGuKG-.sshGVsCLpNNLs ...............LalLKTENsKL.R+psKaRs.sSEhHNDNFSLSTIAEGSHPN.................................pD-ssh...................p.php-shK.......s...K--tshsl..ShsPc.Est+.................................................. 0 1 4 10 +6396 PF06568 DUF1127 Domain of unknown function (DUF1127) Moxon SJ anon Pfam-B_18606 (release 10.0) Domain This family is found in several hypothetical bacterial proteins. In some cases it represents it represents the C-terminal region whereas in others it represents the whole sequence. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -7.85 0.72 -4.54 170 1368 2009-01-15 18:05:59 2003-07-22 13:57:49 6 3 640 0 307 914 205 40.10 32 58.60 CHANGED ts..lhpthtpa....pptR....pop.ppLspLo-+pLpDlGl.sR..u-lpp ..........................h.tlhpshpp.W...........pct+..............pst.p....tLpphoDcpLcDIGl.sR...p-l............ 0 40 125 194 +6397 PF06569 DUF1128 Protein of unknown function (DUF1128) Moxon SJ anon Pfam-B_18651 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 20.60 20.60 21.40 21.40 20.10 19.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.27 0.72 -4.25 15 417 2009-01-15 18:05:59 2003-07-22 14:07:06 6 1 417 0 40 132 0 68.40 50 95.73 CHANGED hSppopEslphMI-pI+cKLplVNhullcs-cFssspa-DLc-IY-hVM+K-oFSPSEMQAIs-ELGsLRK ........ht..spE....MVtuI+EKLphVNtGllcs-cacsss.E-LpDIY-aV.p.REphSPSEhpAIA-cLGpLR+.... 1 11 23 32 +6398 PF06570 DUF1129 Protein of unknown function (DUF1129) Moxon SJ anon Pfam-B_18737 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 29.10 29.10 29.20 29.10 28.90 29.00 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.24 0.70 -5.01 23 957 2012-10-01 22:34:14 2003-07-22 14:10:58 6 1 920 0 81 444 0 203.50 30 88.26 CHANGED cLTKKNp-alaphp+pL.hpsuho---hpthLp-hlPcll-sQKpGhTARpLaG.sPophspplhps.pptscssp............hhhh.................LDsuLhhlulhslhhGlhshFuptstssh.....GllTLllsuhsuGhshhhhh+alh......hc+scRsshhKthhhlshshllWhslashsuh.LPsslNPsLsshlhllIGulAhulRaal++KYsIpuu .......................Lop+Nppalh.h..ppph..htssh..o-p-.hctllpcllspllpsQp+GhoApsLaG.sPsphAcuhtpptt.h.pcpsc...........s.hh.hh..............................hDssLh...l...h.....u.l...h...u....llp.u.l....h....s..aF.sp...t.supsh........Gl..l...ol.lh.hu....l.l.uG.hsh.h.h..h.ahalhth.......shspSp+..s..sha..+tl.hlll.h..shh.ha.h.l.lFh.s...suh...L.s..ss.lN..s.h..L..s.s..hshh.Ilu...slhhsl.+.aal+++hsl.s....................................................................................................... 0 15 37 60 +6400 PF06572 DUF1131 Protein of unknown function (DUF1131) Moxon SJ anon Pfam-B_18811 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.60 20.60 23.00 21.10 18.20 17.00 hmmbuild --amino -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.93 0.71 -4.78 8 552 2009-09-14 12:02:21 2003-07-22 14:28:26 7 2 547 2 43 181 4 169.20 76 88.35 CHANGED SuhsWSuhuPaNWF...GS.ShEVT-QGVGsITAuTPLsEsAIscALsGDYRLRSGMcTssGplVpaFpAhKscpltlVIsG-.cGsVsRIDVhDscIsTssGVKIGTPFSDLYSKAFGsCppusuD.-ssuVECtAspSpHlSYlFoGcWuGPEsLMPsDDsLKsWcVSKIIW+p ................SSVNWSAANPWNWF...G..S..STcVSEQGVGcLTASTPLpEpAIADALDGDYRLRSG.MKTsNGN.VVRF.FEsM..KG.D..N..VAM.V.ING-..QGT.lSRIDVLDSDIPussGVKIGTP.FSDLYSK.A.F.G.NCQKAcG.D.Ds.puVECKAEGSQHISY.FS.G.EWSGPEGLMPSDDTLKN.W.KVSKIIWRR................... 0 4 12 26 +6401 PF06573 Churchill Churchill protein Moxon SJ, Bateman A anon Pfam-B_19061 (release 10.0) Family This family consists of several eukaryotic Churchill proteins. This protein contains a novel zinc binding region that mediates FGF signaling during neural development (unpublished obs Sheng G and Stern C). 25.00 25.00 36.20 35.90 18.20 17.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.71 0.71 -4.07 7 78 2009-01-15 18:05:59 2003-07-22 14:37:27 6 3 49 1 36 61 1 99.20 68 61.12 CHANGED MCssCVpcEYPDRGshCLEsGSaLhNFVGCupCspRDFVLIsN+.ssp--DsEEIlTY-HhCKNCcHVIAcHEYTFoVsDDaQEYTMLCLLCG+AEDolSlLPDDPRQ.sPLF ....MCGsCVcKEYPsRGNTCLENGSFLLNFsGCAsCsKRDFhLIoNK.Slc.EED..GEEIVTYD.HlCKNCHHVIARHEYTFSlhDEaQEYsMhChLCGp.u-DohSlhP-DPRp.t.L........................ 0 10 12 19 +6402 PF06574 FAD_syn Flavokinase; FAD synthetase Moxon SJ, Mistry J, Eddy S anon Pfam-B_18632 (release 10.0) Family This family corresponds to the N terminal domain of the bifunctional enzyme riboflavin kinase / FAD synthetase. These enzymes have both ATP:riboflavin 5'-phospho transferase and ATP:FMN-adenylyltransferase activity [1]. They catalyse the 5'-phosphorylation of riboflavin to FMN and the adenylylation of FMN to FAD [1]. This domain is thought to have the flavin mononucleotide (FMN) adenylyltransferase activity [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.74 0.71 -4.60 62 4652 2012-10-02 18:00:56 2003-07-22 15:00:27 7 15 4320 17 995 3311 2386 148.80 31 49.28 CHANGED h.tt.pssllslGsFDGVHhGHppllspshphA....pppsl.ssVhTF-P+Ppphhp............pptshc.LoshccKhchlpphG.lDhlhhlsFsppFuplouccFlpphLlppLps+tlllGhDFpFG+pppGshphLpphupths.apVphlssh..phsstclSSTt ....................................pssslsl.GsFDGVHhGHptllpphpc.hA.....................c..p..p..s..l............s.s..VhTFcPpPtp..hht.......................ppsshp....Lsshc-Khchl....tp..h..G......lD......hlhh.l..pFs...ppF...u...p...los.ppFlpphLh.tpL.p.s.+.hlllG.DFpFGp...p.+.p..Gs.hphLpph..ut.....t.....h....s..Fp.lph...ss.ph.....p...p.s.....+.l.SST........................................................................................................ 0 342 672 856 +6403 PF06575 DUF1132 Protein of unknown function (DUF1132) Moxon SJ anon Pfam-B_19091 (release 10.0) Family This family consists of several hypothetical proteins from Neisseria meningitidis. The function of this family is unknown. 25.00 25.00 83.70 83.50 22.20 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.39 0.72 -3.70 4 69 2009-01-15 18:05:59 2003-07-22 15:24:27 7 1 39 0 3 29 0 94.10 64 89.32 CHANGED hALYKYQPSSKYFGQSMAlIAppEF.EFs+ssKpasllEsFSaFLN+RlsHNIWKIYFSDESshaI+.....cS.csG+pl+cF.asEhsDs.ssFsslFs LALYKYQPSSKYFGQSMAlIAQpEF.EFs+.sKp..sVI-CFSaFhN+RlpHsIWhI.FSDpSphhI+......cS.csG+placF.asEhsDs.ssFsslF......... 0 3 3 3 +6404 PF06576 DUF1133 Protein of unknown function (DUF1133) Moxon SJ anon Pfam-B_19184 (release 10.0) Family This family consists of a number of hypothetical proteins from Escherichia coli O157:H7 and Salmonella typhi. The function of this family is unknown. 21.70 21.70 21.70 21.70 21.60 21.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.19 0.71 -5.09 4 284 2012-10-04 14:01:12 2003-07-22 15:29:15 6 3 180 0 2 194 0 164.10 76 95.54 CHANGED MI.PopsGKSGEhlRLpTLESlWIQGKLRMWGRWSYIGGGpuGNMFNpLLuStKlTKTAlNEALRRMKKuGIcKPELEAFhREhlsuKpKShLAaCoDsEuLpIDtVIupVLsc..asGLhullhpRYchR.hSKRpMAEpLpspHP-WshhTCcRRI-sWLulAEphLYsPMsDuFG .......................MIYPp.TGKS.GEHLRLpTLEuVWIQGKLRMWGRW....SYIG.GG....KoG.NMFNQLLsSKKLT.K...TAINEALRRMKKuGl-KPELEAFLR-MIsGKQK.SW..LuHCT.D.uEALhIDRV....IuEV.LAc..aPGLlslL+Q............RYcGRGMoK.............RKMAELL.N-..............s.H.P.............EW............shp............TCE+RIttWLtVAEahLYhPMh.uF.................................... 0 0 0 1 +6405 PF06577 DUF1134 Protein of unknown function (DUF1134) Moxon SJ anon Pfam-B_19217 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 27.80 27.80 28.00 27.80 27.30 27.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.96 0.71 -4.74 26 198 2009-01-15 18:05:59 2003-07-22 15:31:14 7 1 193 0 77 155 20 160.70 59 75.36 CHANGED ssoYsp-EllsuucsFFGpsotGLApllEcsFpchGpPNGYIlGcEuSGAhlsGLRYGEGsLapKssGc.++lYWQGPSlGaDhGGsuuRshhLVYNLssspsLYpRFsGlsGSAYlVuGhGhshLppssllLlPIRoGVGhRLGlNlGYLKaTpp.TWsPF .................ssoYot-ElVcuGHcFFGssStGLAsslE+AFppaG.h.PNGYILGEEGSGAFluGLpY.GEGTLYTKNA.G.c....++laWQGPSLGaDaGGpGoRsMhLVYNLsslpsLYpRauGVsGSAYlluGlGhssL+pssIVLVPIRo.GlGARLGlNlGYLKhotpPTWNPF............ 0 20 46 58 +6406 PF06578 YscK YOP proteins translocation protein K (YscK) Moxon SJ anon Pfam-B_19248 (release 10.0) Family This family consists of several YscK proteins. The function of this protein is unknown but it belongs to an operon involved in the secretion of Yop proteins across bacterial membranes. 20.60 20.60 20.60 20.90 20.50 20.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.59 0.71 -4.93 9 84 2009-09-10 17:11:24 2003-07-22 15:37:43 7 1 78 \N 13 49 2 201.50 43 94.20 CHANGED sulTsaQhRFCPAuYlccualPsphhhlLs.LPpWRspPslNthLL--huL....-suachPssLGuLALhPputLppLLthLGulLHGpAlRpslLupslpplhsllGp-GtRhLlpQh-lLIGsWPsGWQ+PLPpplc-shhEpsuLpF...WL.....uAsp-hs.sWtpRLsLRLs.ssssssW.lupppRsLApsLChKIAKQVsPQChHLLK ........................................hlTsaQLRFCPAtYlH.spLPuhh......h.p..lLshLPpWRc...s..u..tLNuhLL-cau.L....Dssac..pPpsL..GuLsLhPQutLE..tLLshLGhlLHG.pAlRpslLusslp.p.LhsllGp-shR.ll.Qt-LLIG.sW...PstW.QRsLPsph-pth....h.psGLsF...WL.....AAhpstspsWs+RLsL..RLshsss.stsW.l....sEppRsLApsLChKlsKQVhPpC.HLhK............................................................................... 1 3 5 6 +6407 PF06579 Ly-6_related Caenorhabditis elegans ly-6-related protein Moxon SJ anon Pfam-B_19267 (release 10.0) Family This family consists of several Caenorhabditis elegans specific ly-6-related HOT and ODR proteins. These proteins are involved in the olfactory system. Odr-2 mutants are known to be defective in the ability to chemotax to odorants that are recognised by the two AWC olfactory neurons. Odr-2 encodes a membrane-associated protein related to the Ly-6 superfamily of GPI-linked signaling proteins [1]. 21.00 21.00 22.20 21.30 20.20 19.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.94 0.71 -3.93 4 68 2009-01-15 18:05:59 2003-07-22 15:46:10 7 1 9 0 59 59 0 113.00 30 62.49 CHANGED CMS.hYpshapa..hp+hYpcPtsFospCcDsph-so.h.os.C..pohCVTlhps.cVhuG.hht+tYhRGChsslhh+GaNpo..+Tluhhpp.p.C+shotopLF.sstpc-..t.uclplCSChGshCNhS ........................CMS..Ypsha.....lpphYhhP+sFTcpCpc.pht.tu.hsss.C..pohCVolhEs.sl..............h.ut....h........htptaIRGChsclhhpGhNps..tThth.hpppp.Cpphptp..pLa...th.tp....pplplCoChsshCN................. 2 21 30 59 +6408 PF06580 His_kinase Histidine kinase Vella Briffa B anon Pfam-B_794 (release 10.0) Family This family represents a region within bacterial histidine kinase enzymes. Two-component signal transduction systems such as those mediated by histidine kinase are integral parts of bacterial cellular regulatory processes, and are used to regulate the expression of genes involved in virulence [1]. Members of this family often contain Pfam:PF02518 and/or Pfam:PF00672. 21.30 21.30 21.60 22.70 20.80 20.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.71 0.72 -4.06 82 7029 2009-01-15 18:05:59 2003-07-22 15:49:28 8 139 2576 0 1632 5611 368 82.50 32 15.92 CHANGED clchLpuQlsPHFLaNoLssIpthsphs..sppstphlhpLuchlR..hsl...psppphloLp-ElphlcsYltlpph..Rascclph..ph ...................clcsLpuQINPHFLFNoLssI..p..t..hh..ph..s...s......cc..spphlhpLSphhR..............hsL......pps............p...........ch.....lsLpcElphlcsYltlp.ph..Ras.c.+lphp.................... 0 783 1309 1483 +6409 PF06581 p31comet DUF1135; Mad1 and Cdc20-bound-Mad2 binding Moxon SJ anon Pfam-B_19462 (release 10.0) Family This family is involved in the cell-cycle surveillance mechanism called the spindle checkpoint. This mechanism monitors the proper bipolar attachment of sister chromatids to spindle microtubules and ensures the fidelity of chromosome segregation during mitosis. A key player in mitosis is Mad2, and Mad2 exhibits an unusual two-state behaviour. A Mad1-Mad2 core complex recruits cytosolic Mad2 to kinetochores through Mad2 dimerisation and converts Mad2 to a conformer amenable to Cdc20 binding. p31comet inactivates the checkpoint by binding to Mad1- or Cdc20-bound Mad2 in such a way as to stop Mad2 activation and to promote the dissociation of the Mad2-Cdc20 complex [1]. 24.10 24.10 24.20 25.50 23.80 24.00 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.64 0.70 -5.52 3 64 2009-12-03 16:30:54 2003-07-22 16:00:29 7 2 48 2 39 60 0 238.90 56 86.91 CHANGED SsAAAPsL-WYEKPEETHAsEV-.LETVhPPAQEPSNsuEPFCPRD.LVPVVFPGPVSQEsCCQFTCELLKHILYQRpQLPLPYEQLKHFYRK.VPQAEDss+KKsWhATEARNRKCQQALAELESVLSHLcDFFARTLVP+VLILLGGNALSPKEFYELDLSRLAPFGVDQGLNTAACLRRLFRAIFLADPFSELQTPPLMGTIVMVQGHRDCGEDWFQPKLNYRVPSRGHKLTVTLSCGRPSVPAMASEDYIWFQAPVTLKGFHE ................................................Lchh-cst-spsspl-.L-p...sspcs.ps.E.hpPcD.hVPVVFPGPVSQEGCCpFTCELLKHIhYQRQQLPLPYEQLKaFYRK...oP.....QA..E-hh+KK..spsssEso....sRKCQQsLuELESVL...SHL.E.shFAR.TL.VPRVLILLGGsALSPKEFYELDL..S.pLuPhS...h...-pSL.oTuAC........LR+LFRAlFhADsFSEL.QuPPLMGT...lVMsQ.GHRDCGEDWFRPKLNYRVPoRGHKLTVTL..S.....C....G...c.Pu.....l...............s......h.......A..h..EDYIWFQAPVTLKGF+E.............................. 0 7 9 20 +6410 PF06582 DUF1136 Repeat of unknown function (DUF1136) Moxon SJ anon Pfam-B_19544 (release 10.0) Repeat This family consists of several eukaryote specific repeats of unknown function. This repeat seems to always be found with Pfam:PF00047. 20.30 20.30 20.70 20.30 19.70 20.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -7.48 0.72 -4.17 78 617 2009-01-15 18:05:59 2003-07-22 16:05:55 7 56 46 0 370 869 0 29.10 36 5.14 CHANGED lhh-oQcPp.....uL..c+IppLEsspp...+...pps ....llh-oQHPp.....uL...c+IppLEsspt...t+....-........... 0 135 170 323 +6411 PF06583 Neogenin_C Neogenin_C-term; Neogenin C-terminus Vella Briffa B anon Pfam-B_16188 (release 10.0) Family This family represents the C-terminus of eukaryotic neogenin precursor proteins, which contains several potential phosphorylation sites [1]. Neogenin is a member of the N-CAM family of cell adhesion molecules (and therefore contains multiple copies of Pfam:PF00047 and Pfam:PF00041) and is closely related to the DCC tumour suppressor gene product - these proteins may play an integral role in regulating differentiation programmes and/or cell migration events within many adult and embryonic tissues [2]. 25.00 25.00 29.10 27.50 20.30 21.60 hmmbuild --amino -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.40 0.70 -4.86 10 213 2009-01-15 18:05:59 2003-07-22 16:44:25 7 31 76 1 91 194 0 242.30 41 21.17 CHANGED KDLKPPDLWIHHEchELKslDKSscssPsh.TDTPI.posQDlsPlssohpopsp..s+psSYpGtEoE-uhS.......SLAuRRuhRsK..MMhPhDoQPsps.VlSAhPl.oLDsspa....G.LsSPspuY..HtphShhshshssho...shst..pspusposssTPpsshlPsusupst......................................................................................................................s.ssEEssupSlPTA+lRP......oHPLKSFAVPulPsss..u.hpPtlP.oPlhsQ....................................suso.hstppVKTASlGphG+sRSP.hP...VoVPoAP-lhEsu.+hhEDo-s.......................................sYEsDELScEMAsLEGLMKDLNAITuu ...............................................................................................................................................................................................KDl+PPDLWIHHEchEhKsh-Kus.ssss.h....p-oPl..ps.s.QDl.ssss.pu.pop.p..p+ps...S.....apGt-sE-s..S........oLuuRRuhRsK..h......MhPhDuQs.s....ps...h......t.sh.sh..-pspa.....................Ptpta..h...s.......s....hho.....ht.............sp.ph.ss.pss..hhssspsp....................................................................................................................................stppssupolsouphRP......sHPL+SFusP...hlPssh...s..tstl...oPhhsp........................................tu.s.h.hp.VKTASlG.hhG...+.sRsP.hs....VsVPsAP-l..E..ps...+hh-Dsts.........................................saps--LopEMApLEGLMKpLNAITss....................... 0 20 28 56 +6412 PF06584 DIRP DIRP Studholme DJ, Guo, JH anon Pfam-B_2017 (release 9.0) Family DIRP (Domain in Rb-related Pathway) is postulated to be involved in the Rb-related pathway, which is encoded by multiple eukaryotic genomes and is present in proteins including lin-9 of Caenorhabditis elegans, aly of fruit fly and mustard weed. Studies of lin-9 and aly of fruit fly proteins containing DIRP suggest that this domain might be involved in development. Aly, lin-9, act in parallel to, or downstream of, activation of MAPK by the RTK-Ras signalling pathway. 21.90 21.90 22.20 37.30 20.70 19.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.40 0.72 -4.19 14 225 2009-01-15 18:05:59 2003-07-22 17:01:08 8 4 120 0 127 239 1 104.50 44 15.67 CHANGED FYSslD+shFp.cs-FtphL.cphs.h+sp+LTRsEWphIRpphGKPRRhSttFlpEERpcLpphRcplRph...pthp.pt...phshhcsL.....Pp-..lsh.PLslGp+VhAhh.t ..............FYSsID+....PhFp..c...s-Ft.hLtEp................h...P.L+.op...+LTRsEWshIRR.M....GK.....P.RR.hSsuFhcEERpcLcppRpplRtL.......Qpp+hss..........su.hcsL..................Pcc..lPh.PLslGs+VoApL+.s................................. 0 44 67 99 +6413 PF06585 JHBP Haemolymph juvenile hormone binding protein (JHBP) Moxon SJ anon Pfam-B_19686 (release 10.0) Family This family consists of several insect-specific haemolymph juvenile hormone binding proteins (JHBP). Juvenile hormone regulates embryogenesis, maintains the status quo of larval development and stimulates reproductive maturation in the adult insect. JH is transported from the sites of its synthesis to target tissues by a haemolymph carrier called juvenile hormone-binding protein (JHBP). JHBP protects the JH molecules from hydrolysis by non-specific esterases present in the insect haemolymph [1]. The crystal structure of the JHBP from Galleria mellonella shows an unusual fold consisting of a long alpha-helix wrapped in a much curved antiparallel beta-sheet. The folding pattern for this structure closely resembles that found in some tandem-repeat mammalian lipid-binding and bactericidal permeability-increasing proteins, with a similar organisation of the major cavity and a disulfide bond linking the long helix and the beta-sheet. It would appear that JHBP forms two cavities, only one of which, the one near the N- and C-termini, binds the hormone; binding induces a conformational change, of unknown significance [1]. This family now includes DUF233, Pfam:PF03027. 27.40 27.40 27.70 27.50 26.40 27.30 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.35 0.70 -5.56 92 924 2009-01-15 18:05:59 2003-07-23 12:02:26 6 11 65 12 627 1020 0 226.30 17 89.17 CHANGED Mphhhhhh.........hlhhhhhshtttt....................................st.hptCphs.........hspCltpshpp.hh.phss.G..lPch...slssl-Plplsplplpt.....tshshphshpshplhGhss....hplpch....phchp.phphphphthP.plphpG.cYphpGp.llh.......lslpGpGphphshpshphphphphphht..t...ssp..pahpl..pphchphc.l.sphphphpNLFss.sptLspshsphlNpshptlhpphps...slpcshsphhhshhsclhpph.............Phcclh ...............................................................................hhhhh.h....................................................s..ht...Ctht...............hspClhpshpt.hh..htp.G........lPph..slssl-P.hh...l........s.p...hphp......ttsshthphsh.pshplh.Gh.ss............hplpph.............phshp..ph...p.....h..p..h..phphP..p.lph.p...u.p...Y.phpGp..lhh............lslp.upGp.hphsh.ps.hphphp.hphphhp...t.....................ts..pahpl.....pph.phphp..l.tph.p..hph..p.sl.hs.t...sp...l.s.p..hhsphlN....pshptl....h....pphps.....ltpthsp.h.hhs.hhs.phhpphshpph........................................................................ 0 184 250 520 +6414 PF06586 TraK TraK protein Moxon SJ anon Pfam-B_19687 (release 10.0) Family This family consists of several TraK proteins from Escherichia coli, Salmonella typhi and Salmonella typhimurium. TraK is known to be essential for pilus assembly but its exact role in this process is unknown [1]. 26.00 26.00 26.00 26.10 24.50 25.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.35 0.70 -5.23 26 481 2009-01-15 18:05:59 2003-07-23 12:08:45 6 7 366 0 71 334 15 227.20 29 80.82 CHANGED M...........hhsllhusslhussh..........tstuspslshss............suphslslSssshNRlslssD..+Is.........slsssputh.....p.pc....psssGslhlp.......shsstshThalpTccGhs..aSltlsPcsssu.colhls..............shstsppcs...tsaEssssY.pptlsplspuhhs....Gphss.Gaphtslspps.......................h......hstsLphp.ttsasGsphpuhhac..lcNtsspslslpEpcFhp.....sust....Alhlspt..pLtPGppsplal .............................................................................h.hhs.........h.....................huspslshss.......................................GsphslslSsss.NhhslP.s-..clh..........................................slss..s.sth.......t.pp.........psss.G.sllls..........ossppP.hohhlp..sc..p..G..h..s...hSlphlP.+.c.ssu.+slpLs...........................ssht.sssccA......ssW..E.s.u.....s.PY...pshlhsl.pultt.......GclPs.Gat.h.ss.pcs..........................ht.........ssssl..shh..tsthas..Gsc.hphhhat...VcNts.pslplpEpsFap.....sush......AVh.hpp..tpLhsGtchclYV.................................................................. 0 19 37 55 +6415 PF06587 DUF1137 Protein of unknown function (DUF1137) Moxon SJ anon Pfam-B_20097 (release 10.0) Family This family consists of several hypothetical proteins specific to Chlamydia species. The function of this family is unknown. 20.10 20.10 21.60 179.40 17.00 19.40 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.87 0.71 -4.57 3 37 2009-01-15 18:05:59 2003-07-23 12:11:45 6 1 36 0 4 15 4 159.60 70 98.01 CHANGED MTKFLFaGLFCSLuLLllACsThVAIIKVDsICDVSCMNK.HFpcAPPFLKIKKLGV+KQIsSPE+QFFaC+IDKSCMELHFSsoSYuCKElLS+LSGHIpTQshEKtMpFRGNGGLLNYQDsSLsVYDCRFpVDPl..asoPDAE-Eh....AsGGMKTLSLSLL MTKFLFHGIWClVVLlLCAClTALAVVKMGcFTNPTLVHQDssTPAPPFLKIKKLGVRKRIISPEKQhFYCTIDKSCMELHFSNTSLHCRELLSHLTGsLQTETsERAMFFRGTGGLLNYKDYSLSVYNCCFSINs....ssP-uscEh....AEGGMKVLSLSLL. 0 1 1 3 +6416 PF06588 Muskelin_N Muskelin N-terminus Moxon SJ anon Pfam-B_20299 (release 10.0) Family This family represents the N-terminal region of muskelin and is found in conjunction with several Pfam:PF01344 repeats. Muskelin is an intracellular, kelch repeat protein that is needed in cell-spreading responses to the matrix adhesion molecule, thrombospondin-1 [1]. 19.60 19.60 19.70 19.70 19.40 19.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.37 0.71 -4.63 4 139 2012-10-03 19:46:52 2003-07-23 12:22:06 6 16 96 0 85 129 2 172.00 54 26.47 CHANGED .pchLsYpIapaSSYSusYlPpNILlDsPpDtsSRWSspT......Ns..QYllLKLc+sAlVpsITFGKacKsHVCNlKKF+VaGGh-pcpMh.LLpuGLKNDsshETFsLphKp.p...p.hsspYlKIVPLhuWGssFNaSIWYVEL+GlDDs.hlpsph+.hshhhEtcul+hCL+aFRppGah-hapsLpcpoplplEHs ..................................phLsatlaphS.SaSss.YlP.c.......NILVDpPsDQoSR.WSopo...........................N...PP.QaLlLKLc+PAIVpsITFGKYEKoHVCNlKKFKVaGGh..s.-..ENM.............sELL.puGLKND..stETFsLKHcl-..............p.....phFPsRaIKI...VPLh.....SWGP.SFNFSIWYVELpGI-DPc.lVp....sslphYspaREpEAI.RL....CLKHFRQ.psYh-AFcuLQ+pTpltLEHP......................................... 2 40 44 63 +6417 PF06589 CRA Circumsporozoite-related antigen (CRA) Moxon SJ anon Pfam-B_19386 (release 10.0) Family This family consists of several circumsporozoite-related antigen (CRA) or exported protein-1 (EXP1) sequences found specifically in Plasmodium species. The function of this family is unknown. 22.20 22.20 22.90 25.00 21.40 21.80 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.81 0.71 -4.91 3 21 2009-01-15 18:05:59 2003-07-23 12:28:07 6 1 8 0 7 21 1 134.10 69 98.50 CHANGED MKILSllFL.LFslILsN-ALG-N...sNGchGSGNspKKpsKptSGEPLIDVHDLISDMVRKEEELVclTK+KSsYKLAToVLAoALGVVSAVLLGGAGLVhYNTEKGRHPFpIGuSKsGDuA.scssS.sssEPoP...uP....p-ssPsAophcDssLsSGsEu ....MKILSVFFLALFF.IIFNKESLAEK......TNKGTGSGVSSKK.....KNKKGSGEPLIDV....HD.....LISDMIKKEEELVEVNKRKSKYKLATSVLAGLLGVVSTVLLGGVGLVLYNTEKGRHPFKIGSSDPAtst..................................................... 0 2 3 6 +6418 PF06590 PerB PerB protein Moxon SJ anon Pfam-B_19494 (release 10.0) Family This family consists of several PerB or BfpV proteins found specifically in Escherichia coli. PerB is thought to play a role in regulating the expression of BfpA [1]. 25.00 25.00 127.20 127.10 21.90 19.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.89 0.71 -3.85 2 14 2009-09-11 14:43:35 2003-07-23 12:35:24 6 1 4 0 0 12 0 102.70 94 100.00 CHANGED MKNNLREEKEVVhDGChNVLSLPStWKAITPKKNNsTSEIIVFFIPPKASYHIILKYspTKHCELFFSDHITGEQDlIYSQsAFFSHVINHIIALVDVLNKKSYASNVIKFLITMEGGGDILSESKRAP MKNNLREEKEVVFDGCMNVLSLPSGWKAITPKKNNsTSEIIVLFIPPKASYHIILKYNKTKHCELFFS................................................................ 0 0 0 0 +6419 PF06591 Phage_T4_Ndd T4-like phage nuclear disruption protein (Ndd) Moxon SJ anon Pfam-B_19553 (release 10.0) Family This family consists of several nuclear disruption (Ndd) proteins from T4-like phages. Early in a bacteriophage T4 infection, the phage ndd gene causes the rapid destruction of the structure of the Escherichia coli nucleoid. The targets of Ndd action may be the chromosomal sequences that determine the structure of the nucleoid [1]. 25.00 25.00 26.10 25.30 21.00 20.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.00 0.71 -4.64 2 31 2009-01-15 18:05:59 2003-07-23 13:29:45 6 1 28 0 0 26 0 148.50 58 98.82 CHANGED .KYhThpDL.ssGupVlusl+sGEa..Go..pK-hhS+.GFYFhV.up.DhR......VuARFaVGpQRSKQGhsAlLSHIRQtRSQLARThusNNl.YsVhalsAppMKPLTTGaGKGQLALAFTRNHpSEYQTLpEMNRhLADNF+FlLQuY ..............................pYMTlpDLpsAGATsIGslK........sGEa.hhGsP.pKD.....I.LocPGFYFlV.uchsu.....ssVuARFYVG.NQRSKQGFsuVLSHIRpRRSQLARTIA..sNs..lsY.sVaYlPASKMKPLTTGFG..KGQLALAFTRNHpS-YQTLEEMNRMLADNFKFlLQAY.... 0 0 0 0 +6420 PF06592 DUF1138 Protein of unknown function (DUF1138) Moxon SJ anon Pfam-B_19518 (release 10.0) Family This family consists of several hypothetical short plant proteins from Arabidopsis thaliana and Oryza sativa. The function of this family is unknown. 25.00 25.00 25.50 25.50 20.30 19.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.38 0.72 -3.97 5 64 2009-01-15 18:05:59 2003-07-23 13:30:54 8 1 30 0 35 50 0 72.60 62 80.72 CHANGED uKYIIGuLsGSFAlAYVCD+aIADcKlFG..............GTTP+TVosKEWGpATDEKFQAWPRTAGPPVVMNPISRQNFI.VK ...........sKYIluuLlGSFAlAYlCDhhlu-KKlFG..............GTTP+TVosKEWapsTDc.KFQ.AWPRT.AGP..PVVMNPISRQNFI.VK........ 0 4 18 27 +6421 PF06593 RBDV_coat Raspberry bushy dwarf virus coat protein Moxon SJ anon Pfam-B_19630 (release 10.0) Family This family consists of several Raspberry bushy dwarf virus coat proteins. 25.00 25.00 25.20 405.60 23.80 17.10 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.60 0.70 -5.17 2 18 2009-01-15 18:05:59 2003-07-23 13:33:28 6 1 3 0 0 17 0 272.80 94 100.00 CHANGED MuKKAVPP.VKAQYELYNRKLNRAIKVSGsQKKLDASFVGFSEuSNPtTGKPHADMSMSAKVpRVNTWLKNFDREYW-NQFASKPlPRPAKQVLKGSSSKSQQRDEGEVVFTRKDSQKSVRTVSYWVCTPEKSMKPLKYKEDENVVEVTFNDLsAQKAGDKLVSILLEINVVGGAVDDKGRVAVLEKDAAVTVDYLLGSPYEAINLVSGLNKINFRSMTDVVDSIPSLLNERKVCVFQNDDSSSFYIRKWANFLQEVSAVLPVGTGKSSTIVLT MSKKAVPPIVKAQYELYNRKLNRAIKVSGsQKKLDASFVGFSESSNPETGKPHADMSMSAKVKRVNTWLKNFDREYWDNQFASKPlPRPAKQVLKGSSSK......SQQRDEGEVVFTRKDSQKSVRTVSYWVCTPEKSMKPLKYKEDENVVEVTFNDLsAQKAGDKLVSILLEINVVGGAVDDKGRVAVLEKDAAVTVDYLLGSPYEAINLVSGLNKINFRSMTDVVDSIPSLLNERKVCVFQNDDSSSFYIRKWANFLQEVSAVLPVGTGKSSTIVLT. 0 0 0 0 +6422 PF06594 HCBP_related HCBP_repeat; Haemolysin-type calcium binding protein related domain Moxon SJ anon Pfam-B_20041 (release 10.0) Domain This family consists of a number of bacteria specific domains which are found in haemolysin-type calcium binding proteins. This family is found in conjunction with Pfam:PF00353 and is often found in multiple copies. 21.40 21.40 21.60 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.93 0.72 -4.32 116 953 2009-01-15 18:05:59 2003-07-23 13:37:28 6 179 153 0 316 1046 112 43.60 28 9.86 CHANGED D..Lllplsso......sDplplpshasss........shtl-plpFADGTsWstssl ......................DLllplt..ss........sDplplpsaFpss...............shpl-plpF.uD.G..oshshsp.............. 0 121 261 274 +6423 PF06595 BDV_P24 Borna disease virus P24 protein Moxon SJ anon Pfam-B_20092 (release 10.0) Family This family consists of several Borna disease virus (BDV) P24 proteins. The function of this family is unknown. 25.00 25.00 36.00 36.00 19.70 19.50 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.41 0.71 -4.55 3 112 2009-01-15 18:05:59 2003-07-23 13:42:16 6 1 11 0 0 86 0 143.90 73 99.99 CHANGED MATRPuSLV-SLEDEEDPQTLRRERSGSP.RPRK.VPRNALTQPVDQLL+DLRKNPSMISDPDQR.TGREQLSNDELIKKLVTEL...AENSMIEAEEVRGTLGDISARIEAGFESLSALQVETIQTAQRCDHSDSIRILGENIKILDRSMKTMMETMKLMMEKVDLLYASTAVGTS.APMLPSHPAPPRIYPpLPuuQssD...phDIIP ..................................................LLcpl+KNPSMISD.DQR.TGREQLSNDELIKpLVTEL...AENSMIEAEplRGoLuDIuARlEuGFESLSuLQVETIQssQ+sDaSDSI+lLGENIKILDRSMKTMMETMKLMMEKlDLLYuosAlGss.APMhPSHPuPs+lYPpL........................... 0 0 0 0 +6424 PF06596 PsbX Photosystem II reaction centre X protein (PsbX) Moxon SJ anon Pfam-B_20149 (release 10.0) Family This family consists of several photosystem II reaction centre X protein (PsbX) sequences from both prokaryotes and eukaryotes. 25.00 25.00 31.00 31.00 20.30 19.70 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.75 0.72 -4.28 22 156 2009-01-15 18:05:59 2003-07-23 13:45:41 6 2 118 15 61 149 93 38.30 48 46.53 CHANGED MTPSLu.NFl.SLlhGuhlVVlPlssALlhlSQpD+lpRs ....hTPSLp.NFLhSlluGu.lVllsIssAllhVSphD.VcRp. 0 17 43 56 +6425 PF06597 Clostridium_P47 Clostridium P-47 protein Moxon SJ anon Pfam-B_20156 (release 10.0) Family This family consists of several P-47 proteins from various Clostridium species as well as two related sequences from Pseudomonas putida. The function of this family is unknown. 19.30 19.30 19.30 19.30 18.30 17.50 hmmbuild -o /dev/null HMM SEED 456 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.56 0.70 -5.97 12 113 2010-01-04 16:41:40 2003-07-23 13:50:17 6 2 34 0 16 91 2 408.90 26 80.93 CHANGED ThGWDhVauhshphVNct.hphppts.spF..S..hucttplpGsFssWpIhsG......Gsup.lplthPltp...Gsht.hpssshslsGhssslplcLsah.....P.....t.s..otsssphpl.s..hpss.tpps.............t.....tss..........h.pshhptllhphl......pNhcphsalFuolNls..h.pt.phpWhpPs.hpYAYts.sss.psuhLGlLuhsssRsh...u.pLppplDsshlsssupsGhLIScplFlcsllLPsLspsF.ssustssFclhspssp.......hpltsstplsltslpsuuhhYpPhlspFslp.lpsshlphphps+s-ls.Glshhhpshshpshphhhpsssppplsap.spsP...phopcsahsshslIsthlhuslsthlhts................Itutltstls...................s.hs.hlphsss.lpWss...shhslosssLssshphpG .............................................................ThGWDhVhusshcslNct..hch...ts.......tcF.....sh..pspthph....p..GpFssWplhsG......GsGpplRlKhPIKs.....uhhp......h.....ps....sshshs..s.s..s....h..lplc.Lsah.....................s....p..p.s.....s....ts....s...phph.h.h.psppsp.......pcs.....................t...............p.hhptlhtphh........pNhpphshIFusl.ls...t...chpWhpPsthpYuas......s..ss...........s...........s......ssuhLulLshlss+sh...o.p.ppslDss.hl....s.p....so..p.s.....uhlIScphFlcphlLPs.L.spth.pthssssFplhsp.sp.......htlpNsp....pl...sht..slp...s...s...s...h.hh...P...hl...s..p..hplp..lps..shlhlphp.......st.s..cls...Ghpthhphhp..p...hph.htsssppphsac...hpps....phsppst.s.hphl..th.lhuslsthhhts....................ht..s.h.h.l...............................s..h......l.as....p..php.h.h.s..................................................................................................................... 0 1 7 12 +6426 PF06598 Chlorovi_GP_rpt Chlorovirus glycoprotein repeat Moxon SJ anon Pfam-B_19883 (release 10.0) Repeat This family consists of s number of repeats found in Chlorovirus glycoproteins. The function of this family is unknown. 21.00 21.00 24.40 22.00 20.40 20.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.64 0.72 -4.10 36 321 2009-01-15 18:05:59 2003-07-23 13:53:39 6 11 7 0 0 321 19 34.10 45 25.53 CHANGED aFlGNGohLoulsss....lsustshDIhGNVsus.uNV .aFhGNGotLTGlsss....lsushshD...I.h.GNVhus.uNV.... 0 0 0 0 +6427 PF06599 DUF1139 Protein of unknown function (DUF1139) Moxon SJ anon Pfam-B_20355 (release 10.0) Family This family consists of several hypothetical Fijivirus proteins of unknown function. 25.00 25.00 26.10 25.90 21.20 21.20 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.05 0.70 -5.25 3 19 2009-01-15 18:05:59 2003-07-23 13:56:02 6 1 7 0 1 19 0 275.40 53 99.24 CHANGED M-psps-EYuSY.pStslEFDPpcPpINLVNsDFDEsDYsDL-VlLLosDasclpslAl.RIKNAP-YTsEIFEclDslssFs-LFDoEIlEcWsDhDsFhDLRls-sEs-FEhlSShLT+HhQollsspPslLWoolspLAKpSVIQus-D.FsllNYWchMNRRWEhIs-ELRlsFlFRAFcLKusQhtpVS+ILSsSLhFPGLNLIGK+ShIPMhoNaSIPEYLDHWFPTDDYcSDNYLpFIRFsElssscWKKIVVQaYLRpVFS..+VRTKl.......LIAssDVDaWYoLFMRTLIF+SMl+TKsLIKslLNa ...................................MsYshuscYuSh.+SssLEFDPpDPElNLlNQ-FDEsDYpDL-VN.LS-DLSslNLlAT.RIKNuP-YTsEIF-ShDsP.PFApLl-pEIu-EWCDhsNFhDLRlV-sEs-FEFVSSHITcHLLIlLNSNPNlLWTussLLsKlSLlQcssN.F-llNYWcAMsRRW-lIsD-LKhGFVFRAFsLKuNQFElloKLLSDSLhasGIslIGK.SMlPMlTlHSIs-YlDHWF.T-sapSDNFhSFI+htpITVPKWKKlVVQFYLRQlFS..RsRTpV.......LhApsDlDaWYslFM+TLlFKSMh+TKphlKplLN........ 0 0 0 1 +6428 PF06600 DUF1140 Protein of unknown function (DUF1140) Moxon SJ anon Pfam-B_20379 (release 10.0) Family This family consists of several short, hypothetical phage and bacterial proteins. The function of this family is unknown. 25.00 25.00 25.30 74.20 22.00 21.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.29 0.72 -3.91 3 56 2009-01-15 18:05:59 2003-07-23 13:57:53 6 1 51 0 5 32 0 100.70 57 95.67 CHANGED MTsEDIVpsYpclILKcIFKEIpp.hKoKERA-ls+pKlAEoGhuVRTSRHWKAsuNlEFYI+EhpctLcQLtELDRpa+WScKLHQDRapFVoKYscVLEEY..RptN ......-llppYpshlLKhIhhcIpp.hKpKE+A-lst.chAEsGs..sVRTStaWKusuNhEFYhpEh.cthsthtElDR.hpWSc+LHQ-phKFVpKY.clhEcY..RpuN..... 0 2 4 4 +6429 PF06601 Orthopox_F6 Orthopoxvirus F6 protein Moxon SJ anon Pfam-B_20433 (release 10.0) Family This family consists of several Orthopoxvirus F6L proteins the function of which are unknown. 21.30 21.30 21.30 38.70 21.00 19.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.53 0.72 -4.05 2 33 2009-01-15 18:05:59 2003-07-23 14:00:38 7 2 20 0 0 17 0 71.80 88 96.03 CHANGED MSKILTFVKNKIIDLIpNDQIKYSRVIhIEESDSLLsVDEVaANHGFDCVEMIDENI.NENlEQYKT-SFhp MSKILTFVKNKIIDLI.....NND.QIKYSRVIMIEESDSLLPVDEVHANHGFDCVEMIDENI.NENlEQYKT-SFhp...... 0 0 0 0 +6430 PF06602 Myotub-related Myotubularin-like phosphatase domain Vella Briffa B, Bateman A anon Pfam-B_795 (release 10.0) Domain This family represents the phosphatase domain within eukaryotic myotubularin-related proteins. Myotubularin is a dual-specific lipid phosphatase that dephosphorylates phosphatidylinositol 3-phosphate and phosphatidylinositol (3,5)-bi-phosphate [1]. Mutations in gene encoding myotubularin-related proteins have been associated with disease [2]. 21.40 21.40 21.40 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.10 0.70 -5.68 24 1877 2012-10-02 20:12:17 2003-07-23 14:09:10 9 82 275 6 1093 1689 23 293.90 29 40.75 CHANGED pLaAFtapsttst.t.p......ua........................phhchhpEacRh..........................................................Gls..spt........WRloslNcsYplCsoYPttllVPpsloDp...tLtpsupFRuppRlPVloahH.cstAsIsRsSQP....LlGh..spRstc............DE+ll.psl..................................................htspspscchhIhD.........................sRsphsAhAN+ApGuGhEspstY..................pschhFhsI...tNIHshRpShp+l...................hcss.....hptss.spalosL-sotWLpHlpslLpuushlsptl...c.cpsSVLVHCSDGWDRTsQlsSLApLhLDPYYRTlcGFpsLlEKEWluFGH+FucRsGHh................................................................tspsspphSPlFlQFLDCVaQlhcQFPsuFEFsEtFLlplhcHhaSCpFGTFLsN.sE+ERtcp ...........................................................................................................................................................................................................................................................th...............................p..t-hp.R...................................................................shs.....................W..+l....o.th...Np...p.a....p..............l...........s.oYPth...........lhVPt..th...s....-p........l...t..s.uthh...............pttRhP....................s..ls.........a...................h......p....t..s.t.....s.sl..h.R.....s..u...p.............su......t.ps.p.............................................--ph.l....thh.................................................................................................................................................................................................p...hl...hD........................................................................................t+s....h.t.t..............t....h.h.u..c...............h.......................................................................................hp...h...ht.h...........sl...+.....hp....p....ohp.pl...................................................hphs......................s.......tp.ah.....p..tlc.........po..t....W.L..p.h.....lp........hLptuh.l..sp.hl...............p...tt...sVl.lp........p....p.-G.hD.h..T...s.p.l...su.LsplhL......D.P.a...a........RT....l..cGF.............s......L...l-+-WlshGH.FtpR..hs.ph...........................................................................................................................................................................................................................t.t..p.pp.uP.l.......F........h.FlD.sVaQ...............l..h.pQ.......aP.ttFE....Fsp.ha........L..hh.lh.ph...........hus.auoFLhs.sttpR...h............................................................................................................... 0 353 476 753 +6431 PF06603 UpxZ DUF1141; UpxZ family of transcription anti-terminator antagonists Moxon SJ anon Pfam-B_19606 (release 10.0) Family The UpxZ family of proteins acts to inhibit transcription of heterologous capsular polysaccharide loci in Bacteroides species by interfering with the action of the UpxY family of transcription anti-terminators. As antagonists of polysaccharide locus-specific UpxY transcription anti-terminators, the UpxZ proteins exert a hierarchical level of regulation, insuring that only one of the multiple phase-variable capsular polysaccharide loci per cell characteristic of this genus is transcribed at a time. 21.20 21.20 22.40 21.80 20.30 20.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.29 0.72 -4.02 13 133 2009-01-15 18:05:59 2003-07-23 14:15:26 6 1 25 0 19 121 0 104.90 51 69.86 CHANGED plpsLQpsAHELLYLGhDGuPIYoDcFspLNpEVhppsssLasp..+GsTsEEEAsLCLALLMGYNATlYs.pGDKEp+hQslL-RsasVL-pLPASLLKspLLThCYG ..............hpsLpphAH-LlYLGhDGuPIYoDchspLNpEVacpsssLYsp..+GsTsEEEAslCLALLMGYNAohYs.pG-KEp+lQplLcRsaslL-pLPASLLKspLLThCYG....... 0 5 11 19 +6433 PF06605 Prophage_tail DUF1142; Prophage endopeptidase tail Vella Briffa B anon Pfam-B_16284 (release 10.0) Family This family is of prophage tail proteins that are probably acting as endopeptidases. 27.70 27.70 28.00 27.70 27.40 27.60 hmmbuild -o /dev/null --hand HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.75 0.70 -5.51 23 808 2009-09-16 13:04:28 2003-07-23 14:19:08 6 22 565 1 80 610 5 344.80 18 55.62 CHANGED hs.plNsptsloFolhpscpNtpsashlsp..cphlha.....sspcYhIpphstps.Gpsht.hplpApH.lh.-lpsphlhpph..........sGs.........................holcshhc..hshpsoG..aoYpltss.hsshphp...shGs.pstL-lhppllcpa.GsEapsss+plp.lh..cplG.ppsshhlRataNhsslshphDssslhTtI+GYGc......................................................................................................................................................................................................................................................................................................................tppsstsphh.....h.hcYpSP.A.ch..aGh......+husslpD-RhTstssLcpthKpplp..csPchSlslsh.phpp...hhth.pcstlGDhlhlls-s...lGlshclRlVuhpp.hshssph...clshush......ppshsclhsplspsh+thp ......................................................................................................................................h...t.Nttt.lshsh....st..p.........phl.p....t.hl.h.......ptp.p......Y..hI.h.hs.p...tu.pt....lplpApc..hh.c.hp.p.p.hh.....sph.................sss........................................................hohpphhp......h..s...p...tos.....h.p.a.p.hh..ss...hp.p.h.p.hp.........phsp..pstl..-hh......p......phlppa....s..t.-....h.........h......h.........s.........s.....p..........p....l.......t....hh..p.p.....h....u.pc......ss..h..h..lphthNh..pslp.hphsspplhTtI..h..sa...G.p...........................................................................................................................................................................................................................................................................................................................................................................h..s.s.t..p..st......s.tht............h.h...p.......a.p.s....s.s...ch....aGh.............................h.u............s..h...p.-.pc...h...p..s...t...c..s.......Lh....ch..s.cp...plp........sp....Pp..hoh...sls..hh.l.t...........hsh.pphphGDsVh.lhpcp....hG..hs.s..plRllchpp.....pshhst....h....pplslush.................pcshh.p.h.p.thtp......p................................................................................................................................................................................................................. 0 36 62 77 +6435 PF06607 Prokineticin Prokineticin Moxon SJ anon Pfam-B_19802 (release 10.0) Family This family consists of several prokineticin proteins and related BM8 sequences. The suprachiasmatic nucleus (SCN) controls the circadian rhythm of physiological and behavioural processes in mammals. It has been shown that prokineticin 2 (PK2), a cysteine-rich secreted protein, functions as an output molecule from the SCN circadian clock. PK2 messenger RNA is rhythmically expressed in the SCN, and the phase of PK2 rhythm is responsive to light entrainment. Molecular and genetic studies have revealed that PK2 is a gene that is controlled by a circadian clock [1]. 21.30 21.30 21.50 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.47 0.72 -3.89 7 187 2009-01-15 18:05:59 2003-07-23 14:30:20 6 5 59 2 84 172 0 94.80 39 70.82 CHANGED M+.phhphslLL....Lll........ssucuAVITGAC-+DhQCGsGhCCAVSlWlRulRhCTPhGppG--CHPhSHK.....................VPa.GKRhHHTCPCLPsLsCs+hsss+Y+Ch .................................................................hhh....hhh...........s.....hh.l....oGuC-.+DspC....Gs.G....h.CCA...h..S.hWh+.u..l..phCsPhGppG-pCH..Pho+K........................l..P..a.....s.......c..R.ha..ppCPChPsLhCpph..................... 0 26 29 41 +6436 PF06608 DUF1143 Protein of unknown function (DUF1143) Moxon SJ anon Pfam-B_19953 (release 10.0) Family This family consists of several hypothetical mammalian proteins (from mouse and human). The function of this family is unknown. 20.40 20.40 20.40 20.40 20.20 18.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.21 2 50 2009-01-15 18:05:59 2003-07-23 14:32:26 6 2 35 0 31 49 0 132.60 55 82.63 CHANGED hAspC.hspphhLQRQNLsCaLpNPHhGSlI.ADGHGEVWTDWNsMSKFhQYGWRCTTNENuYSNRTLhGNWNQERYDL+NIVpPKPLPSQFGHhFETTYDssYspKhP.STHRFKREPHhFPGHQPELDPP+YKCTtKSTYMssYScs .................................h.hahtNPph.GSllpAsGHGEVWTDaNshSKFhQYGWRCTTNEssY.SN+TLhGNWNQERYDl+slV.QPKPLP..SQ.FuHYFETTYD..oSY.N....s.K...hP.S..........T..H...RF......K....REPHhFPGHQPELDP..PpaKs..T..t.+SshM.sYs........................................................ 0 10 13 17 +6437 PF06609 TRI12 Fungal trichothecene efflux pump (TRI12) Moxon SJ anon Pfam-B_19969 (release 10.0) Family This family consists of several fungal specific trichothecene efflux pump proteins. Many of the genes involved in trichothecene toxin biosynthesis in Fusarium sporotrichioides are present within a gene cluster.It has been suggested that TRI12 may play a role in F. sporotrichioides self-protection against trichothecenes [1]. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 599 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.97 0.70 -6.33 2 214 2012-10-03 03:33:39 2003-07-23 14:37:16 8 8 86 0 151 7876 315 457.40 22 86.41 CHANGED MTssV.EcGlDLESQPDDRhRApALAToAsELP-GYYpSPRllASFAuFShNVsATYFVLQASASALPNILQDlGQSENpuLFSTLWThGQAVSILhMGRLTDRFGRRPFVIhTHIlGLVGAIVGCTAsKFNTLLAAMThLGVAAGPAGuSPLFlGELMSNKTKFLGLLhVShPslshs.huPYhGQRLuIQGsWRWIFYIYIIhSsIAVhLIllWYaPPSFtQLHGKKspKR-ELAKlDWIGIhLV.sGsSLFLLGVSWGGpPNsPWNSuKlIGLhoSGhGoLVlFALYEVaGKP.pPhlPPuLFKDTRGFVCILlISSIMGuMpLsLsIhYPQQVlNIFGSSLKNWpETAWMoATASFGThAGVhlLGslFHLlRHIRWQILVGAhWLTAFLGAMSSlNRDNKNuAIALShhoGFVVuWAQDITMLhVQFITTDEsLGVAFuVVAAuRPFhGSIFTAAFISlYoNpYP+EluoHLoSAhRGTshPQuSFsSLLEAApoGRh-AVpALPGMTspIuuVVSpAMADSYTASYANVYYFAMALGVIPIIASLCMRDhDpYLTDHVPHQlYDRKpAcKDVL-GsS-o.sSPhIhShs-hc. ......................................................................................................................................h...............................................................................................................................................................................................................................................................................................................h.....h.....s......h..........h..................h.....s...........u...h.........h..............h....h....G.p.......l..o..D..h..h.G.R.+............h..h....l...h..s....t....h...l..h.l..l..G...........I...l...s..s..s......A........p...s.........h..........s.....t....h...l..u...u.......s...h............h.......G...h........u....s......G.......h......t..........h....h..........h........h......h.......u.....-.....h...........h...........s................+......h......+...........h.........h.......s......h.....h........h......h.........h.....s...................l.....h..................................h.....u.....s......h.......h.........u.........p..........h.........h.....s.........h.....t.........s.........s..........W.....R..a........h......h.....h..h...h....h..........l.....h...s.......s......l...u.....h....l....h......h......h..............h..........a..........Y..............a............P..............P..........s.....................h...................p..........h............p..........s........p.......t..........h.......p.........p.............t................p.....................l.......t............c........l........D.....a........l........G...h..h...L......h..h..s...G.l.s....L..FL.l....G....l....................s......a..G..G..............t......................s......W..s.S...u.+.l..l..u...........l.l.....h....G..h.......s..h..L..l.s..F....s..l...a....E.......h....a.......s...t.................p....p.............P.....h......h.....P.......p...L...h.p.......p............R....s......h...s....h....h....l....l.....l......s....h...l.......G......h..........h...h..s....h.......h....h...a..P....p....t.h.....h...s...l..a..s.....p...s...............................h....s..h...h.......s...h.....s...........h....s.h......u....h.h..h.u..s....h....l..............h....h......l....h.....p.......h....h....t...c...h......+......h...........h...l.......h..u...s......h...h..h...s...s...h...h..G...s....h....u...s..h..p.....p..s........t...h...h....h....u.h........h...h.....s....s.........h..s...h...u.h....h...............s........h....h......s....p...h......h....s....s......t.....p.................l..u...h..s.......u..l..h....h.......s..........R.........h....h....G....u....l....h.h.s....h....a...........s..l.h........s.......t.h...............h...s....t.........l....s.............s...h...h................s...h.s...........t...........h............h...h..t......h.......h.............s....................h........l........s.u.h.s.......t.......h...t...hh.......u....h.t........uas.u...h.......s..ahh..s.hs.h.uhl..shh.sshhh.....s...t...............................................hh............................................................................................................................................................................................................................................................. 0 41 82 125 +6438 PF06610 DUF1144 Protein of unknown function (DUF1144) Moxon SJ anon Pfam-B_20026 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 32.00 32.00 21.40 19.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.76 0.71 -4.43 14 617 2009-09-10 16:34:19 2003-07-23 14:39:04 8 1 613 0 40 182 3 138.80 78 95.31 CHANGED Mh..pss.phRsAsADTFAMVVaCFlsGMhIElhlSGMoFEQSLuSRLLSIPVNIhIAWPYGhaRDahlRput+luss+.ah+sluDllAYVoFQSPVYAsILhsVGAshcQIlTAVoSNhVVShhMGlsYGYFLDhCRRhFRVs ....MF...SPQSRLR.HAVADTFAMVVYCSVVNMhIElFLSGMSFEQShhSRLVAIPVNILIAWPYGhYRDLhMRsARKlSPo.G.WhKNLADlLAYVTFQSPVYlA.ILLsVGADWHQIhAAVSSNIVVSMLMGAVYGYFLDYCRRLFKVS............ 0 4 10 25 +6439 PF06611 DUF1145 Protein of unknown function (DUF1145) Moxon SJ anon Pfam-B_20029 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 20.60 20.60 21.60 20.90 18.50 18.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.76 0.72 -4.34 33 759 2009-01-15 18:05:59 2003-07-23 14:41:03 7 1 737 0 88 256 38 60.30 54 67.24 CHANGED hllsLGKhlhLhhWhhllhNLlhPFssslslhlslhhshhllMHslQlllhpush.tc..s ......MLINlGRLLMLsVWuF.LILNL..VpPFP+PLN..IFVNVALlFhlLMHGhQLALLKSTlPKDu......... 0 8 25 56 +6440 PF06612 DUF1146 Protein of unknown function (DUF1146) Moxon SJ anon Pfam-B_20141 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 28.30 27.60 24.20 23.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.15 0.72 -4.05 27 920 2009-01-15 18:05:59 2003-07-23 14:47:08 6 1 916 0 83 368 1 47.40 37 64.26 CHANGED LhFIsluaWALpul.+h-phh+ps..+shQsplLhlhloIulGhhVSsFFL ......llhIhluaauLpul.+h-phhKps..pstpl+lLhlFluIslGahVSsFhl.......... 0 18 43 62 +6441 PF06613 KorB_C KorB C-terminal beta-barrel domain Bateman A anon Pfam-B_20369 (release 10.0) Domain This family consists of several KorB transcriptional repressor proteins. The korB gene is a major regulatory element in the replication and maintenance of broad host-range plasmid RK2. It negatively controls the replication gene trfA, the host-lethal determinants kilA and kilB, and the korA-korB operon [1]. This beta-barrel domain is found at the C-terminus of KorB. 25.00 25.00 26.50 25.50 24.40 23.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.74 0.72 -4.52 4 67 2012-10-01 19:11:18 2003-07-23 15:26:40 6 3 57 6 10 53 6 59.80 61 17.01 CHANGED uDPD+hKKsll.VEHDsRsAtllLNRRPsstGhhal+YEDsGt-hEs-hGslKlshLhEu ....sDPDKLKKAIlQVcHD...sR.PARLlLNRRPsu-GaAWLKYEDDGpEFEADLusVp.LVALlEG. 0 1 5 8 +6442 PF06614 Neuromodulin Neuromodulin Moxon SJ anon Pfam-B_20438 (release 10.0) Family This family consists of several neuromodulin (Axonal membrane protein GAP-43) sequences and is found in conjunction with Pfam:PF00612. GAP-43 is a neuronal calmodulin-binding phosphoprotein that is concentrated in growth cones and pre-synaptic terminals [1]. 25.00 25.00 51.40 51.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.32 0.71 -4.33 3 57 2009-01-15 18:05:59 2003-07-23 16:13:59 6 3 36 0 20 47 0 160.20 64 70.86 CHANGED sKKDEuPuu-uVENKcGEAoTATEAosA-ouKsDEPoKDG.........SoPoEEKKGpGuuDsuoEQPAPQAsss...SEEKsASA.ETESATKASTDNSPS.KAD-APsKEEsKKADVP.hlTshAsTTPAAEDATAKAssQPppETuESSQsEEKpDAVEETKPoESAQQEEuKEEEuKADQENA ...........................pKKDEuP.s.A-GVEKK.GEGsss.TEAAPAsGsKs-E.s.uKAG.........EoPSEEK.KGEG...DsuoEQsAPQAsAs...SEEKA..GSA...ETESATKASTDNSPSSKAEDAPAKEEPKQADVP.AVTsA..AATTPAAEDAAAKATAQPPTETuESSQuEEch.-AV-ETKPpESApQ-EsKtEEscADQEpA................... 0 1 1 5 +6443 PF06615 DUF1147 Protein of unknown function (DUF1147) Moxon SJ anon Pfam-B_20563 (release 10.0) Family This family consists of several short Circovirus proteins of unknown function. 25.00 25.00 32.70 53.20 20.20 19.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.99 0.72 -4.14 3 8 2009-01-15 18:05:59 2003-07-23 16:21:12 6 1 6 0 0 6 0 54.50 98 100.00 CHANGED MYTSLWGHLGVVKANGLLILQTRKPHTGNHLETSGGMVTMVKKWLLLMTFMAGCRGMIY MYTSLWGHLGVVKANGLLILQTRKPHTGNHLETSGGMVTMVKKWLLLMTFMAGCRGMIY 0 0 0 0 +6444 PF06616 BsuBI_PstI_RE BsuBI/PstI restriction endonuclease C-terminus Vella Briffa B anon Pfam-B_16289 (release 10.0) Family This family represents the C-terminus of bacterial enzymes similar to type II restriction endonucleases BsuBI and PstI (EC:3.1.21.4). The enzymes of the BsuBI restriction/modification (R/M) system recognise the target sequence 5'CTGCAG and are functionally identical with those of the PstI R/M system [1]. 25.00 25.00 30.50 30.40 23.20 22.80 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.64 0.70 -5.62 22 135 2012-10-11 20:44:44 2003-07-23 16:25:22 6 3 130 2 34 117 7 280.90 40 82.08 CHANGED EutpIhhp.uhstp.ps....EpSALs..............s-upp.hhshsshhshtcpthu+.YAsNTREolRcpTl+phl.ushAl........s-psshPssSs+ssYplpsphhsL.........LcuhsoshhpcpLsthhhpRhsLlpphA.spttspIsVphPsGcphpLuPG.pS.LhKsllEpFAPRFhss.stllalu-o......GsKhshhDcpLhpp.LGlslcuctchPDllLh..tcsah......lhlEsVsScGPlstcR+ppLtpLhpsu...pssllaVTAF.sRut..hp+hls-lAWcT.VWhAspPsHLI+h.sGtphltPap .....................Atpllhpluhsttpps....-puAlshLAlhslp.tptW....ssA......ps.....hlshp.s.lhsahccta.........s.+..........YAsNoREThR+pohHQFhtAslsl.ss-cPs+ss.NSPpssYplpsthlsl.........L+sas......ost....ac..ppLs..saltp+tsLhppaApp+ptsplPVplssGpphpLSPGtpspLh+ulIE-FAPRFsss.uhllYlGDT......upKhshh-tthLtt.LGlslss.HsK...hPDVVLa.ps+sWL......lLlEuVTScGPVss+R+tELtcLhtsu...suGll.aVTAF.sRst...h...p+alu-lAWETEVWlA-tPsHhIHa.NGs+FLGP.......... 0 9 24 27 +6445 PF06617 M-inducer_phosp M-phase inducer phosphatase Vella Briffa B anon Pfam-B_16267 (release 10.0) Family This family represents a region within eukaryotic M-phase inducer phosphatases (EC:3.1.3.48), which also contain the Pfam:PF00581 domain. These proteins are involved in the control of mitosis [1]. 21.70 21.70 22.70 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.95 0.70 -5.32 8 226 2009-01-15 18:05:59 2003-07-23 16:31:08 8 5 44 1 76 243 0 186.80 32 45.42 CHANGED LDSPushDsptspE.................hslpphpShsQ+LLGsSPAh+p.osSsuLDp..hp.tD..th.sssENKENEua.FKhP.h+.h....hpuhh.htpt+-hhsQRpsSAP.hMh.Ss.p+h-l...E-ushhhLuoPpo..ssspsss....cED......DGFlDhL-uEpLc--.tsPsu........MtsLhoAPLVhphpc.ptt...hhsRs.tLaRSPShPsSlsRPhLKRlERspDc-sPspsKRR+Ssos..ptc.tcsppP+p...pl...hp.t.SLCchsIEsl .........................hDpsu.hDsp...p.....................htt.psh..cLhtpSPs.h..opspu.ct........s.....th..s..spsKEN...-s..hK...........................pp.hhpp..ut..........o.s......p..p.....tt.p.h.hh..p.....ssstt.p........................tED......pGF..h-hhps-.l....cs.-....st.........h..LhssPhh.............t.....................s....+s....tLacSPSh.sp....sh...Rs...hLKR..-+.p-ps..P.psK++ps...s.............p....ps.p.p....h............S.sp.pIpph............................................................. 0 4 7 25 +6446 PF06618 DUF1148 Protein of unknown function (DUF1148) Moxon SJ anon Pfam-B_20595 (release 10.0) Family This family consists of several Maize streak virus proteins of unknown function. 25.00 25.00 248.80 248.60 21.10 17.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.68 0.71 -4.13 2 13 2009-01-15 18:05:59 2003-07-23 16:45:23 6 1 13 0 0 6 0 114.00 99 100.00 CHANGED MRSHTPPEGTVIMVVPtCWSVWIWRDGLSALLPALFSPAEEGFFLETRLFQFESSPRFLLDVDMADCLSRAWNKWPGIALTWWCWYIRDRVAPIHSLSTKYpLLRGSALASLQN MRSHTPPEGTVIMVVPACWSVWIWRDGLSALLPALFSPAEEGFFLETRLFQFESSPRFLLDVDMADCLSRAWNKWPGIALTWWCWYIRDRVAPIHSLSTKYQLLRGSALASLQN. 0 0 0 0 +6447 PF06619 DUF1149 Protein of unknown function (DUF1149) Moxon SJ anon Pfam-B_20513 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 32.30 32.00 21.00 17.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.73 0.71 -4.21 12 561 2009-01-15 18:05:59 2003-07-23 16:47:41 6 1 557 5 40 153 0 123.10 52 97.03 CHANGED MplhR-pEFVspYHaDsRNhtWEcENGsPETslcVsFQLlcp-cttp.......sTsllslLpFhIVhDc..FVISGhISQhs+lhsRllscPsEhsQ-ElcpLutPLl-hlcRLTYEVTEIALDcPGlsLEF ............................Mpl+R-.pEFVspYHaDsRNhtWEpENGsPETcl-VsFQLlp..c..-pENp.......sTullllLsFhIVFDc...FVISGsISQlNHIpsRlls-sSElsQEEVEpLuRPhlshlpRLTYEVTEIALDhPGlNLEF........ 0 5 14 24 +6448 PF06620 DUF1150 Protein of unknown function (DUF1150) Moxon SJ anon Pfam-B_20612 (release 10.0) Family This family consists of several hypothetical bacterial proteins of unknown function. 25.00 25.00 26.10 36.90 24.30 17.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.35 0.72 -4.16 48 249 2009-01-15 18:05:59 2003-07-23 17:04:49 6 1 246 0 83 175 52 74.80 38 89.73 CHANGED sphs.t...thussplsYVRslpss-Lspch........splsssppLaAlHuA-GpplALscDRchAFshAppp-hpPVoVH ........h..ho.pphApLGtGclAYVRplcs--lspphPt.....hs.lss.u.hpLaALauAsGpPIsLsDsRpsAhtsAtpc-LpsVolH... 0 21 51 61 +6449 PF06621 SIM_C Single-minded protein C-terminus Moxon SJ anon Pfam-B_21144 (release 10.0) Family This family represents the C-terminal region of the eukaryotic single-minded (SIM) protein. Drosophila single-minded acts as a positive master gene regulator in central nervous system midline formation. There are two homologues in mammals: SIM1 and SIM2, which are members of the basic-helix-loop-helix PAS family of transcription factors. SIM1 and SIM2 are novel heterodimerisation partners for ARNT in vitro, and they may function both as positive and negative transcriptional regulators in vivo, during embryogenesis and in the adult organism [1]. SIM2 is thought to contribute to some specific Down syndrome phenotypes [2]. This family is found in conjunction with a Pfam:PF00989 domain and associated Pfam:PF00785 motif. 21.50 21.50 21.50 21.50 21.40 21.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.76 0.70 -4.83 5 112 2009-09-10 21:39:22 2003-07-24 10:21:55 7 9 43 0 54 89 0 243.70 41 40.29 CHANGED SPosTu.......sEsRKuuKSRso+sKoKsRTSPYP..QYSuFpsDRSESDQDSsWGGSPLTDoASPQLh-ss-csuo....SCsYRpYSDPtSLCYG.FPL..DcHsLScc+sHhHocsC-...uusCEuuRYFLGTPQuGREsWWcsARSlLPLsKSSPENtcua.ElousHtA.lHslcuRGHWDEDSAVSSsPDu..uGSsS-SGDRa+s-pa+SSPpEPSKhETLIRATQQMIK......EEEsRLQh+KhPs-hsLAspsuLuKuap.......spasQushsusVCRusu.s.......................s..spSPssLSRLSS..PpP..............................Dclo+Ss .................oshsT........p-sRKhsKs+.op.K..oK.R....Ts.PYP..QYSuFph-+..Es.s.p.upWtuS..PhsssAuPQ..p.t...scsus......hhh.paS...ShpYG.FsL..Dpphhsp.c+..hh.sp....hsp...Gu.CEsuRaFLuT..su.tEs.Wt.upshlP..op.usscsh.t...sshs+hs..s.phcu...uth.t...cc..s.s..s..Ps.....ssut.cptstht.t.http..........p..s.t......shhts....phht...........sph...h.................................................................................................................................................................................................................................................................. 0 2 6 20 +6450 PF06622 SepQ SepQ protein Moxon SJ anon Pfam-B_21060 (release 10.0) Family This family consists of several enterobacterial SepQ proteins from Escherichia coli and Citrobacter rodentium. The function of this family is unclear. 22.70 22.70 22.90 23.70 22.40 22.60 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.06 0.70 -5.53 2 115 2009-01-15 18:05:59 2003-07-24 10:27:28 6 2 105 0 1 46 0 275.20 84 98.26 CHANGED Mp.LNSQ.NMKINDFYLPLLsVIGhGRLYITsEGHACHAYFREVSGNGhRFTLshSGYEGpFWlSEEQhhQWCpELFPYS-SRLIPEDhIKLMlLWshpssLPEsDsSVDDVQFThLNKDlYPVIENNNGcNRLNVIILchTVQSLQYLIN-NWQhVPHSNTlFFDGYIAPGWTDYPlTcLolGDSLRLYHVDDSpERcCWLVINNPLATVKLsDNNL.lsDVQAAD.LChISNEsVMsRIYCsIGTIpVDIHhLRNhKKDDhIsSsGYHLFGGsRLIRNNThIAYGoIlKINEDFY.slSlVCD .................Mp.LsSQLNMKINDFYLPLLPVIGTGRLYITScGHACHAYFREVSGHGIRFTLTYSGYEGRFWISEEQFIQWCQELFPYSESRLIPEDhIKLMILWV...MQTALPE.GDVSVDD...VQFTMLNKDlYPVIENNNGENRLNVIILETTVQSLQYLINDNW......QFVPHSNsLFFDGYIsPGWTD...YPVTEL...pVGDSLRLYHVDDSpERcCWlV.INsPLATVpLsDNN.L..lsDV..AADLLsslSNEsVMsRIYCsIGTlHlDIHMLRNsKKDDIIsSsGYHLFGGCpLIRNNTTIAYGSIVKINEDFYFTVSlVCD..................................................... 1 0 0 1 +6451 PF06623 MHC_I_C MHC_I C-terminus Moxon SJ anon Pfam-B_21327 (release 10.0) Family This family represents the C-terminal region of the MHC class I antigen. The family is found in conjunction with Pfam:PF00129 and Pfam:PF00047. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.72 0.72 -6.86 0.72 -4.47 14 3480 2009-01-15 18:05:59 2003-07-24 10:35:40 6 6 87 0 199 3309 0 27.80 76 7.99 CHANGED NoGGKGGsYs.AsupDSuQuSDVSLsssK .........S.SGtKG....GSYSQ........A.ASSDSAQGSDVSLTA............. 0 71 71 72 +6452 PF06624 RAMP4 Ribosome associated membrane protein RAMP4 Moxon SJ anon Pfam-B_20959 (release 10.0) Family This family consists of several ribosome associated membrane protein RAMP4 (or SERP1) sequences. Stabilisation of membrane proteins in response to stress involves the concerted action of a rescue unit in the ER membrane comprised of SERP1/RAMP4, other components of the translocon, and molecular chaperones in the ER [1]. 21.00 21.00 21.00 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.85 0.72 -4.43 16 291 2009-01-15 18:05:59 2003-07-24 10:42:24 7 7 191 0 175 245 1 60.90 43 70.61 CHANGED MuusQRM+htNcpaspNlspRGNVPKSh+sp.-cKaPVGPhLLuLFlFVVCGSAlFQIIppIph ..............................sppRh.+.h.uNcK.a.s.K..N.l.T...p.R...GsVscop...+p.....p...p...p...chPV.GPhLLu...lFlFVVsGS...ulFQIIpshp............ 0 57 96 143 +6453 PF06625 DUF1151 Protein of unknown function (DUF1151) Moxon SJ anon Pfam-B_21020 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 25.00 25.00 31.80 31.00 19.70 19.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.79 0.71 -4.38 9 174 2009-01-15 18:05:59 2003-07-24 10:44:28 6 3 74 0 98 151 0 114.10 50 72.76 CHANGED hucPshl-ssssELIhP+KLlNPstsSts+Q-LHRELLhNpKR.GLslppKPELQ+VhEKRKccpllcppc..EEpptp+.....o-LEpELh+RpQ+L...EphEhcptp.pEEpcp.....PEFl+V+upLR+ .................AcP-Yh-tsss.ELI+P.+KLlNPVKsSRsHQ-LHRELLMNpKR...GLuhpsKPELQ+VhE+R+RsQl..lKp+c...EEtpt++.........osLEtE.Lh+RQQ+L...-QLE.pptt.p..EEpcs........PEFl+V+tNLRR................... 0 15 29 60 +6454 PF06626 DUF1152 Protein of unknown function (DUF1152) Moxon SJ anon Pfam-B_21029 (release 10.0) Family This family consists of several hypothetical archaeal proteins of unknown function. 20.60 20.60 20.70 21.80 20.50 20.30 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.90 0.70 -5.30 14 77 2009-01-15 18:05:59 2003-07-24 10:46:02 7 3 71 0 40 74 6 273.70 29 79.54 CHANGED LllulGGGGDVVuAhslsthltp...G.csslGslsWERhVhDPhPGPlshsplpp.sptlsctlhlssscoashRGGRhhhsQsspsucsLspp.V..ahlD.hcGsptlscuLpchhth.thDtllGVDsGGDlLApGsE-sLtSPLADulsLAsLs+lc....puhLtVhG.GuDGELsh-YllpRlu-lA+pGGhluhhGlscpssclLccllchssTEAStlslt.Ah+GcaG.hpIRsGsRpVhlsshuslhFhhDPpslhphsphA+ll.csotol-EAsctLp.phGlhTElchEcsL..Apt .......................................hhhuhGGGGDlhsuhhhhthh.t...t....pshluslsW-Rhh..DPhPG...Ph.hpphpt..h..lsttlhhls.pshs.t...ssh.hhsphsp..hhp..hht.....hhhlsspsGs.....psltculcphhpphthDtllhVDsGGDlLucGsEpsLtS...PL...tDuloLAuls.plpt......sshlsl.hG.GsD..G..E..LstshlLppluclspt..sGhLuhhul..sp.........p..s......s......p......h......l......cp....ll.......p...p.sso....E.A....Stl...s.......ht...Ah+Gh.aGph.....pl..Rsus..p..plhlsslsulhahhD.pt.l.hpts.hhphl..psotslc-Apphlp.thsh.oEhphE..h...t..................... 1 19 28 35 +6455 PF06627 DUF1153 Protein of unknown function (DUF1153) Moxon SJ anon Pfam-B_21038 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 23.50 23.50 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.98 0.72 -4.12 22 282 2012-10-04 14:01:12 2003-07-24 10:47:34 6 1 259 2 92 204 26 84.70 58 93.15 CHANGED Mah+KhpGP+tVsLPDGoshTpADLPPssT+RWVApRKAuVVcAVttGLlot-EAhcRYsLS-EEFtuWtpAlscHG.suL+sT.plQcYR ...................psp.+hVIGPDGoPLTlADLPPssT+RWVlRRKAEVVAAVRGGLLSl-EACpRYsLTsEEF.hSWQpuI-cHGhsGLRsTRIQpYR.............. 0 24 58 71 +6456 PF06628 Catalase-rel Catalase-related immune-responsive Vella Briffa B, Coggill P anon Pfam-B_16304 (release 10.0) Family This family represents a small conserved region within catalase enzymes (EC:1.11.1.6). All members also contain the Catalase family, Pfam:PF00199 domain. Catalase decomposes hydrogen peroxide into water and oxygen, serving to protect cells from its toxic effects [1]. This domain carries the immune-responsive amphipathic octa-peptide that is recognised by T cells [2]. 20.90 20.90 20.90 21.50 20.50 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.86 0.72 -4.11 343 4316 2009-01-15 18:05:59 2003-07-24 10:55:15 7 11 2870 287 1207 3539 63 67.50 25 12.26 CHANGED chctsp..D.pasQssthap.sh..sssE+p+llsshstpL....upsspt....lppRhls.hhtpsDsshuppVA.cuLs ..................p.tpt.-.pasQstthap.sh....sst....-....+p+.l.....hsshutpL....spls.ct......IppR.hls.hhtcs.DsshuptVAcsLt........... 0 326 674 986 +6457 PF06629 MipA MltA-interacting protein MipA Moxon SJ anon Pfam-B_8359 (release 9.0) Family This family consists of several bacterial MltA-interacting protein (MipA) like sequences. As well as interacting with the membrane-bound lytic transglycosylase MltA, MipA is known to bind to PBP1B, a bifunctional murein transglycosylase/transpeptidase. MipA is considered to be a structural protein mediating the assembly of MltA to PBP1B into a complex [1]. 21.00 21.00 21.00 22.80 20.90 20.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.07 0.70 -4.94 16 1671 2012-10-03 17:14:37 2003-07-24 11:34:39 7 3 1070 0 339 1085 89 223.70 28 84.76 CHANGED phslGs...uushssc..h.uu.cchpltshPhlshptts....hhhcssshuhtlhsssshpluhssthshstt.....csssptt........ptlss.................+cushssGlthth.hs.hplpsphp..tsl.ssscGhpsslshshshplu..phplssuhulsatsppasptYaGVssppuAtSsLsp.YsusuG.hsssluhsspYhls-phshtstsshoRLtusstcSPlVccchphshhhu.......hsYpF ........................................................................................................................................t.holGu...G..s.uhspp....acs....c..p.s..s.h...s..l..P..l..l.s..Ycu-s...............Fah.c.s.......h.....s........h.....G..a..h.L....h.....p..s..s.....s.....s..p.....lul..s.uh....a....ssh.th...........................cs.sc..ss.ctth..........................ptL-c.........................................Rc.u..oh..h.uG.l.u...h......t...a.....h...p......p.....a.G....h....l.c..s.s..hs.........sDs..h....s...s..u...s......G.......h....s.......h.......c.......h.u.hh...a.p.h.p....hu............sh......s...l.oPulGl..pasocs..hscYYYG.VoppE.u...s......+.........S........G.....l.....tu...Y.....s...s...s.s..u...as...sh..lp.lous.Y....s..hs....tc.....W.....s.....lhusupYo..+L.s.s-.l.s.D.SPh...V-...+....sh...sh...hh...ssG...loYcF.................................... 0 55 144 243 +6458 PF06630 Exonuc_VIII Enterobacterial exodeoxyribonuclease VIII Moxon SJ anon Pfam-B_11449 (release 9.0) Family This family consists of several Enterobacterial exodeoxyribonuclease VIII proteins. 25.00 25.00 28.90 28.40 24.60 24.60 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.14 0.71 -4.95 4 316 2009-01-15 18:05:59 2003-07-24 11:35:34 6 4 227 0 12 263 0 174.40 44 23.76 CHANGED huhh.hhhscKA+Kp.G..suhhWsoschEuss.ApLshhllcuGhc.sDahKsVtsNhPVVN-LPPEGphDhTFCp+YpLu.cDuhTahhIPGs..PuosAtDps....sssssTp...sGEDhTE....EEN..h.lS........stpLPlRahs.H.....hTasupDth.hhHlsRApc..tlTsLthspcsShlpsL .......................................................shhs.hhscKs+Kp.G.KsuhhWsosch.Ssu.uphshhhscuGhc.....cDahKsVtsNhPVVNDLPPEG.hDhpFCsRYphu.cDuhThhhIs.hs..ss.s.ss.t-ps.......................ssss.NTs....hsuEDhoE............hE-s...h.lS.....................stpLPlRahs.H.....hT.asupDth.thHlspspc.ssVTALthspcsShLpsL................................................................................................ 0 1 2 7 +6459 PF06631 DUF1154 Protein of unknown function (DUF1154) Vella Briffa B anon Pfam-B_16329 (release 10.0) Family This family represents a small conserved region of unknown function within eukaryotic phospholipase C (EC:3.1.4.3). All members also contain Pfam:PF00387 and Pfam:PF00388. 19.80 19.80 19.90 19.90 19.30 19.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.33 0.72 -4.30 11 207 2009-01-15 18:05:59 2003-07-24 11:38:13 6 15 89 0 109 225 1 45.30 44 4.24 CHANGED +c..shsPlslEsL+ppKuahKlhKKQQKEL-oLKKKHsKE+suhQK ........h......ltshslE-LKQpKualKlh...KKQpKELcsL+KK.HtKcpsshp....... 0 23 31 68 +6460 PF06632 XRCC4 DNA double-strand break repair and V(D)J recombination protein XRCC4 Moxon SJ anon Pfam-B_21077 (release 10.0) Family This family consists of several eukaryotic DNA double-strand break repair and V(D)J recombination protein XRCC4 sequences. In the non-homologous end joining pathway of DNA double-strand break repair, the ligation step is catalysed by a complex of XRCC4 and DNA ligase IV. It is thought that XRCC4 and ligase IV are essential for alignment-based gap filling, as well as for final ligation of the breaks [1]. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.14 0.70 -5.59 5 165 2010-01-05 15:47:52 2003-07-24 11:41:38 7 2 131 30 88 172 0 255.40 26 82.27 CHANGED MERKVSRIsLoSEPslsaFLQVuWEKTLGSGFVITLTDGHSAWTGcVSESDISQEADDMAME+-KYVDELRKALVuuAGPA.....ssYsFsF.....SKEopaFSaEKsLKDVSFRLGSFcL-KVsNPAEVIRELICYCLDTIAEpQAKNEHLQKENERLLRDWNDlQGRFEKCVsuKEALEsDLYpRFILVLNEKKAKIRSL+.chLsElQEhEK..slKpKpETo.......ssSDposDcDulYD...GSTDEEsEuPsss.........................StpsPAsls+DDSLlSS.DlsDIAPSRKRRQRMQKNLGTEPKhAoQEpQhQEKEKsssusPposKK-tsSAEsMSLETLRNS.SP-DLFD ...............................................................................................................................................h.-s.s..sa.sphp.t.lt......ttt.t....tp...aht.htphh...t.ss............apht...............sp.p...ph.h.hc...K.ph..p..s.l....ph....RL..G.s.h.pLppstss.sp.hlp.........-.lhsas...lsshsc.ps..csp...c...L..pp....cs...-...+...L........p...-h...s...clp...sphE+hlssKcthEs-Lap+FlhVLNEKKsKIRsLp.ch...L.....sp.....sp...p..pc.....shppc..t.pss.................hps..c.sss.cc....s............D......t.o...s.c-c.p.pt......................................................................................................................................................................................................................................................................................... 0 20 38 62 +6461 PF06633 DUF1155 Protein of unknown function (DUF1155) Moxon SJ anon Pfam-B_21101 (release 10.0) Family This family consists of several Cucumber mosaic virus ORF IIB proteins. The function of this family is unknown. 25.00 25.00 86.00 85.80 20.70 17.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.28 0.72 -4.69 2 5 2009-09-11 08:20:59 2003-07-24 11:44:03 6 1 1 0 0 8 0 41.00 79 91.52 CHANGED MSATLSTTLSFEPPLSLLAEPGTWFADTMDFc+pp.lthhpp MSATLSTTLSFEPPLSLLAEPGTWFADTMDFRKKHSVRWYpp. 0 0 0 0 +6462 PF06634 DUF1156 Protein of unknown function (DUF1156) Vella Briffa B, Eberhardt R anon Pfam-B_16387 (release 10.0) Family This family represents a conserved region within hypothetical prokaryotic and archaeal proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.20 21.20 24.20 22.70 20.50 19.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.25 0.72 -4.22 43 252 2009-01-15 18:05:59 2003-07-24 11:47:17 7 11 214 0 122 263 47 69.50 31 7.82 CHANGED sslPlcpIsttut+EKph..ppG.phosLHhWW.ARRPLuusRAllhAuLls.....................t.th-tpphhpll .......shPlptlsttut+E+ph..ptu.p.psLHhWW.ARRPLussRAllhAsLls................................t.chp.h....................................... 2 53 89 106 +6463 PF06635 NolV Nodulation protein NolV Moxon SJ anon Pfam-B_21143 (release 10.0) Family This family consists of several nodulation protein NolV sequences from different Rhizobium species [1]. The function of this family is unclear. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.41 0.71 -5.16 2 48 2012-10-02 21:03:42 2003-07-24 11:47:37 7 1 43 0 13 247 7 191.10 32 93.17 CHANGED MTADh.ssPAAPphRsLGPLIPAupLEIWcsAhpAhAAAERH.Q+VRuWARhAYpREhApG+sEGhpAGAEEMAtLIuQAssElApRKAVLEppLPQLVhEIlp-LLGAFDsGchLV.sVRHAIEppYpssEVCLHV.PhpsDhLApEFtsaDG.-GRP+lRIcsDPsLSscpCVLWSEaGNVDLGLsAQhRALRLGFG.LoEcuE. ..................................................t..h....................ll......As.pht.hh-Ah.thlsAA.+ccA..ppl+......spA..p.p......t.......aE...p.p.R..A....c....Gac-G.h...p.p...G........sc........c....h.......A..t.......L....l.......A..p....s.......s.....u....c....s.......s........c..h.h.......A.......s...L.......E...p....p....lsp.......L..V...lph....lRpl....L.G...t..h..D.....s..p...E..hll.+.s.lppAl...st.h.tps.p.p.lsL+VsP.s.c.V....-...hL....p.p.ph....s..p..h..s...s........s.h....p.l.pI...sDspLusspClLtophGsV-hGL-sQLcALRhuh..........t....................................................... 0 3 6 9 +6464 PF06636 DUF1157 Protein of unknown function (DUF1157) Moxon SJ anon Pfam-B_21250 (release 10.0) Family This family consists of several uncharacterised proteins from Melanoplus sanguinipes entomopoxvirus (MsEPV). The function of this family is unknown. 22.70 22.70 23.20 475.70 18.10 22.60 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.27 0.70 -5.86 5 5 2009-01-15 18:05:59 2003-07-24 11:49:41 6 1 1 0 0 5 0 367.40 38 96.28 CHANGED IYFV..IVGIIFlIaaIIasl....NpYlshhpNs.NaNhos.hhppls......lspc.hl-FhIIpGSh.-NlIlsRshDGlQslKlIPTFVFLoNalps-lIsYGGF.NsoLsFPsNNIhN+S.sWIYGGWFIluNGNGDaICV+SNN.sNI+RaNcscsllKaL..sYNsucsN.....DsIIloNIlYKhc.EIlLINIscIussNs.ss-YYsILGNILlYIKsNaINDKpFIIsGpsGLNSKYIpLAI-TIF.lDslISSCYDGuITYVcNN.lasQSSFIlIDKcLCPYGVRFGlRY.LENshspaNLlLYATIYNcNpcspplEYFNDcLSINIFESIKSpsNYsDcYVNW-cIDISsLssNFE.sslsLlDDss.oYo+cpL-sIlNK IYFV..IVGIIFlIaaIIasl....NpYlshhpNs.NaNhos.hhppls......lspc.hl-FhIIpGSh.-NlIlsRshDGlQslKlIPTFVFLoNalps-lIsYGGF.NsoLsFPsNNIhN+S.sWIYGGWFIluNGNGDaICV+SNN.sNI+RaNcscsllKaL..sYNsucsN.....DsIIloNIlYKhc.EIlLINIscIussNs.ss-YYsILGNILlYIKsNaINDKpFIIsGpsGLNSKYIpLAI-TIF.lDslISSCYDGuITYVcNN.lasQSSFIlIDKcLCPYGVRFGlRY.LENshspaNLlLYATIYNcNpcspplEYFNDcLSINIFESIKSpsNYsDcYVNW-cIDISsLssNFE.sslsLlDDss.oYo+cpL-sIlNK... 0 0 0 0 +6465 PF06637 PV-1 PV-1 protein (PLVAP) Moxon SJ anon Pfam-B_21397 (release 10.0) Family This family consists of several PV-1 (PLVAP) proteins which seem to be specific to mammals. PV-1 is a novel protein component of the endothelial fenestral and stomatal diaphragms [1]. The function of this family is unknown. 22.10 22.10 22.30 22.10 20.70 21.70 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.58 0.70 -5.91 2 56 2009-01-15 18:05:59 2003-07-24 11:54:25 6 2 31 0 29 44 0 320.80 46 79.83 CHANGED MGLuM-+.usYARsGsppRGCWYYLRYFFLFVSLIQFLIILGLVLFMlYGNVHsoTESsLpATEhRA-uLYSQllGLoASQuNLoKpLNhohhsK-slMQ.hLssRR-h-RINASFRQCQGDhlhYhN.pRahAAIILSEKQCp-QhK-hNKoC-ALLFhLspKVKTLEhElAKEKslCoKDKESlLhsKR.sEEQL.tCsKsREhQpQEpQlscEpLpKVQuLClPLDp-KFphDlhshWRDSlI.RoL-sLsY..aa.LhsEhASlRRsC-phPulMooKlEELARuLRAsIERVsRENu-LpRQKLEhppuhpAuQEA+t+stpEAQAREspLpAECuRQTQLALEEKAsLRtpRDNLt+ELEt+KREhEQLRhElslR.SALDTClKsKS.PhhPs.Rs.GPsPNP.PIDPASLEEFK++ILESQR.Pss.PsA.sSG .......................M-+.usY.R....sG.....s...ps+GCWYYLRYFFLFVSLIQFLIILGLVLFMVYGNs.HsoT.E.upLps..TEpRA-sLhupllsLpuppsNLoK-LNlTspuK-ul.QhllssRR.......DL-RINASF+Q.CQ...s-hh.a.sp.+ahsAIlhSEctCtp..php-hNpoCpuhhhhLtpKsK..sl..EhEh.ppKhlCsK-K-u.hhhsKp.s.EpQhttC.sctpt.p.QEhQlsptpLppVpshChslDppKh..ph.plWR-Sll.+sLssh.....sa..ahs..s-.....h.tlp+.CcphPtlMtsKlppLAptL+hsItpVstENtclpRQK.thppshtuspctttt..pEsttp......phpt-ss+QspLALEEK.ssLpcc+-tL.+pLEt+c+phtthphplslp.tsL-sCl+sK.......................................................................................................................................................................................................................................................... 0 1 5 12 +6466 PF06638 Strabismus Strabismus protein Moxon SJ anon Pfam-B_9849 (release 9.0) Family This family consists of several strabismus (STB) or Van Gogh-like (VANGL) proteins 1 and 2. The exact function of this family is unknown. It is thought, however that STB1 gene and STB2 may be potent tumour suppressor gene candidates [1]. 25.00 25.00 28.40 25.90 21.10 20.80 hmmbuild -o /dev/null HMM SEED 505 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.80 0.70 -6.19 10 386 2009-01-15 18:05:59 2003-07-24 11:55:16 6 3 160 0 106 275 0 335.00 52 94.50 CHANGED H+-R.pR-RH+s.pS+.....cspSRu-KSVsIss....P.spPhhsus.............sstpht.-sQDDNWGETTTAVTG.TSEcSISpEDlsplsK-hEDss.shsCcRYLGhulushLGLLAhlTPlAFllLPplhW..............R-cLcsCGssCEGLalSLAFKLLILLIuoWALFhRps+AslPRlFlFRALLllLlhLhs.hSYWLFYGVRlL.cu+........-csY+GIVpYAVSLVDALLFIHYLAlVLLElRHLQPtFslKVlRSTDGESRaYslGpLSIQRAAVWlLEpYY+DFPVYNPsL.sls.....KpRtuK+huGFKVYsV...DGs.ssss.suQSRAhlAAAARRRDsSHNEhYYEEAEaERRVRKR+ARLVsAVEEAFTHIKRh.........psE-p.....t.Psp.MDP+EAAQAIFPSMARALQKYLRoTRQQshHTMESILpHLAFCITHsMTPKAFLEpYLssGPTlQYs+-pttscpWTLVSEEsVTsuL+cGssFpL+psDFSLlVTV++IPal+LoEEaVDPKSHKFVL+LQSETSV .....................................................................................................................................................TTA.........VT.G..TSE.tS.ShED.lu...p.hsh-.............pDo...s...G...h...s.C...pRYhGsTV.A....h.sL.uh..luFloPlAh....llLPpl.h.a..................h.p.L.sCsstCcGlhlSlAFKLllLhIGhWAl......F..h.R.....p.p.uslPRlFVFRAhhLVllhlhs.hSYWLFYhVpI....-up................sh.sYp....ul.V........pYssShsDsLlFIHYluVlLhEl.Rp....L..pPhahlKlVRSsDGESR.YslGpLSIQRAAlalLpp.YYp-Fsla................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 31 38 66 +6467 PF06639 BAP Basal layer antifungal peptide (BAP) Moxon SJ anon Pfam-B_21444 (release 10.0) Family This family consists of several basal layer antifungal peptide (BAP) sequences specific to Zea mays. The BAP2 peptide exhibits potent broad-range activity against a range of filamentous fungi, including several plant pathogens [1]. 25.00 25.00 26.10 25.80 20.90 20.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.54 0.72 -4.27 3 15 2009-01-15 18:05:59 2003-07-24 13:29:27 6 1 4 0 4 14 0 74.00 36 76.60 CHANGED MlLLASFVuHA+IISGETKEsSNTRSMTMT.TRuuuplIluDNKsuLCYLDuthLEYlC++Tp+CY+oLKcCLEaC .............................MlLLAS..hVhHAphlsGps+EsoNstShTMT..T..puusphllu.....-scsulCYL....cuh.hhYsCc+Tp+CY+sls-CLt+C 0 0 2 3 +6468 PF06640 P_C P_prot_C-term; P protein C-terminus Vella Briffa B anon Pfam-B_16385 (release 10.0) Family This family represents the C-terminus of plant P proteins. The maize P gene is a transcriptional regulator of genes encoding enzymes for flavonoid biosynthesis in the pathway leading to the production of a red phlobaphene pigment [1], and P proteins are homologous to the DNA-binding domain of myb-like transcription factors [2]. All members of this family contain the Pfam:PF00249 domain. 20.00 20.00 20.00 28.60 18.80 19.80 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.74 0.70 -4.34 5 26 2009-01-15 18:05:59 2003-07-24 13:58:00 6 4 9 0 7 24 0 179.70 52 62.13 CHANGED AIAIDMSKLQSA-+RRGGRTPGRSPKoSu............o+oKQsDsDpPGsEAtuss.............uAASSPR..HSDtAR..SsVVDP-..PNQPN.SSSGSTGss.EtssSuEDATGPWlLDPIELGDL.W.EAES...EMDALMsIGssupDuAslEGL-Al..sspAQVDDLF..DMDWDGFAAHLWGGPEQp.DHuA..plQQAAEPpssAu..........................tAuAAuAsuCoPDE+cLEAFEoWLLSDSF ............AlsIDhSKLQSA-+RRGGRTP..GpsPKuus............p+sKpsDsspPttcAt.uss.............uuASSPR..pSD.......VVsPs..sNQPN.SSSGSsGss.-tssSpEDAoGPWsL.-Pl.E.hGDL.W.EA-S...........EMDALhshGssu.DusslpGltsl..sspAQsDDLh..DMDWDGFAAcLWGsP..t...p+uu...lppAuEP.....................................sshusssD-..LEuFtoWLLSDSh........ 1 0 4 7 +6471 PF06643 DUF1158 Protein of unknown function (DUF1158) Moxon SJ anon Pfam-B_21508 (release 10.0) Family This family consists of several enterobacterial YbdJ proteins. The function of this family is unknown 25.00 25.00 31.80 83.20 18.30 18.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.92 0.72 -3.66 3 460 2009-01-15 18:05:59 2003-07-24 14:09:29 6 1 452 0 20 95 2 79.10 88 99.95 CHANGED MKHPLETLlTAAGILLLALLSCLLLPAPSLGLTLAQKLVshFHLMDLNQLYTlLFCLWFLLLGAlEYYVIRFVWRRWFSLER .....MKHPLETLhTAAGILLMAFLSCLLLPAPALGLTLAQKLVohFHLMDLSQLYTLLFCLWFLVLGAIEYFVLRFIWRRWFSLAD... 0 1 1 11 +6472 PF06644 ATP11 ATP11 protein Moxon SJ, Hammonds G anon Pfam-B_21093 (release 10.0) Family This family consists of several eukaryotic ATP11 proteins. In Saccharomyces cerevisiae, expression of functional F1-ATPase requires two proteins encoded by the ATP11 and ATP12 genes [1]. Atp11p is a molecular chaperone of the mitochondrial matrix that participates in the biogenesis pathway to form F1, the catalytic unit of the ATP synthase [2]. 22.00 22.00 22.40 23.10 20.20 21.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.85 0.70 -4.90 22 313 2009-01-15 18:05:59 2003-07-24 14:22:25 6 7 259 1 214 313 1 231.70 28 77.06 CHANGED hptppsshhc+YcsKlpphupppshpslp....cLcpthcp.............................ppt.tshsppspphpshct..s..s......st.ts...........KsLsShlclE+lcshstcE..lctlWcthat.....pcsslsAslP.h-pYcthhspA+psPhFlLPLPRp.................pGhEhaalQWp......................ssphlFTsLtpYKl+t-hAtPahslpaah-Lsp-KslVLMpGplpscst.............losp-AQhLh.slQpFY.............sttspp+hpLLcsFs+ts.pcFchpcllp.h ...............................................................t....t.h.t+YtpKl.phtpp..s......p....tlct.....................................................................................t...t..t.........t...................................s.....................................KsLsslhclchlcp.h.ss..cE...lptlWpthat......spps.lsAsls.sppapthhspAppsPhFllPLPRp............................pGhEhahhQat.....................................ssplhFTsLtpYp...h+s.-hAtsphslpaas..-.Lt..............c............pK.........GlVLMpGph...ssh..................lssp-AphLs.plQhFY....................tptsppphpLlcpFspts.ppFchppllpp.h...................................... 0 68 118 178 +6473 PF06645 SPC12 Microsomal signal peptidase 12 kDa subunit (SPC12) Moxon SJ anon Pfam-B_21331 (release 10.0) Family This family consists of several microsomal signal peptidase 12 kDa subunit proteins. Translocation of polypeptide chains across the endoplasmic reticulum (ER) membrane is triggered by signal sequences. Subsequently, signal recognition particle interacts with its membrane receptor and the ribosome-bound nascent chain is targeted to the ER where it is transferred into a protein-conducting channel. At some point, a second signal sequence recognition event takes place in the membrane and translocation of the nascent chain through the membrane occurs. The signal sequence of most secretory and membrane proteins is cleaved off at this stage. Cleavage occurs by the signal peptidase complex (SPC) as soon as the lumenal domain of the translocating polypeptide is large enough to expose its cleavage site to the enzyme. The signal peptidase complex is possibly also involved in proteolytic events in the ER membrane other than the processing of the signal sequence, for example the further digestion of the cleaved signal peptide or the degradation of membrane proteins. Mammalian signal peptidase is as a complex of five different polypeptide chains. This family represents the 12 kDa subunit (SPC12). 21.40 21.40 21.40 22.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.54 0.72 -4.17 30 303 2009-01-15 18:05:59 2003-07-24 14:34:31 8 4 253 0 216 311 3 76.70 32 70.93 CHANGED MDFpGQ+hAEplhplllslsulluhllGYhhQphuholalhhsGhslosllslPsWPh.Yp+pPlpWtpstsp.........sscp ..........hDapGQ+hAEplhphllhhs.............ul........l...uFlhGYhhpshphslalhhuGhslosLl..slPsWPh.Y.p..+pPlcWhsst.t......sptt........... 0 73 122 179 +6474 PF06646 Mycoplasma_p37 High affinity transport system protein p37 Moxon SJ anon Pfam-B_21350 (release 10.0) Family This family consists of several high affinity transport system protein p37 sequences which are specific to Mycoplasma species. The p37 gene is part of an operon encoding two additional proteins which are highly similar to components of the periplasmic binding-protein-dependent transport systems of Gram-negative bacteria.It has been suggested that p37 is part of a homologous, high-affinity transport system in M. hyorhinis, a Gram-positive bacterium [1]. 20.10 20.10 20.20 22.00 19.60 17.70 hmmbuild --amino -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.08 0.70 -5.41 6 52 2012-10-03 15:33:52 2003-07-24 15:07:28 6 1 48 5 17 60 2 353.50 34 84.94 CHANGED sKFhcsaSpphs.KLccs........cVslohsh.lDDutoplsslposps..DFAFlsSpulso...Ns.pchsshlQTLTsuFKFDpshD.aYsDGs....LcphAcchsplFscs....P.YpsWsDEsQ............cWsGs+YpFlY..-P.scLlsFYRGMILIsGo-pplspIKcAWNpKNWssFpNaGIh.pGposSuG+ahL.-pLl+KHFs.tp.s..sLtpDhsssPsKYpssp...GR-IGpsssh+IsFDDtsSFAWTcNcpsups...YsPp.............pNsKlEILohT-PhlYDIGlFsKslspchtshIuEsFIpLAKsspDhYGPslGYNGY+hIsDFsKEVlclhpKAhG ............................................tFhc.hpppFN.cL.Kptp.tp+ph.cVphslps.spDp.pshlspL.psscs.....Dluhssssphls..........pspppsls..tl......QTt.ThtFpassss..sthYpDGs.p.pDsLRp......hAcctsclasch....s...YssWp..-pp...p............pasGs+Yp.hY....c.s...sc....lsp.aYRGhIhIs..G...s-..psppcIhKAW-sKcW-sFhpaGIl.au.cssSuGKY+hp.sLl++HFs...pphs......slpp....Dhp.pp.....spYhstt...........uspl..Gp..p..s....c.........h.......+......IuFDDE.G.S.a..u..WTcscpsupp.....apssc...................................................p.NsplchLTlTsPhsYD...lslh...c.p...u...lscpplcLloculhsLupsppso.YGshsGYN..tYp..hIps.p...hhp.htt................................ 0 9 15 16 +6476 PF06648 DUF1160 Protein of unknown function (DUF1160) Moxon SJ anon Pfam-B_21501 (release 10.0) Family This family consists of several hypothetical Baculovirus proteins of unknown function. 21.40 21.40 21.40 22.60 19.90 21.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.56 0.71 -4.34 14 51 2009-01-15 18:05:59 2003-07-24 15:13:58 6 1 49 0 0 44 1 123.50 30 93.34 CHANGED FlsplhpshshssKVAhVstpLcpaLp-ht..pD-pFppKhhpllcMFlspcIsl-slhsllsuVDuh.cLTcpQI-YLssplatNppllpIlpsFl-tp+Ls---Is-lupFLVpElssAhhYp ..................................FhsplhpohshssKVuhVphplppaLp-hp...pDcsFppKhppllpMFlppclss-slhslhsssDsl.cLocpQl-YLhsplhpN..........splhpIlpp..al......c........tpc........L........sc--.lstlupFLVpchspA....t............................................ 0 0 0 0 +6477 PF06649 DUF1161 Protein of unknown function (DUF1161) Moxon SJ anon Pfam-B_21545 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of unknown function. 25.00 25.00 37.10 43.10 18.70 17.70 hmmbuild --amino -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.46 0.72 -4.04 24 623 2009-09-11 10:51:00 2003-07-24 15:16:02 7 1 568 0 68 233 4 53.30 70 55.37 CHANGED CEclKu-IptKI.ANGV..ouaTLEIVsN-pss..........ssupVVGpC-ssT+KIlYpR .CERlpSDIsQRIINNGVPtouFTLoIVPNDQVD.Q................PDSQVVGHCANDTHKILYTR. 0 5 17 44 +6478 PF06650 DUF1162 Protein of unknown function (DUF1162) Vella Briffa B anon Pfam-B_16458 (release 10.0) Family This family represents a conserved region within several hypothetical eukaryotic proteins. Family members might be vacuolar protein sorting related-proteins. 20.30 20.30 20.30 20.30 18.70 20.20 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.96 0.70 -5.19 39 689 2009-01-15 18:05:59 2003-07-24 15:23:28 7 58 271 0 493 739 4 250.30 20 8.17 CHANGED hplslauPYhllN+TuhsLthcscshttpstsp.s.............................chttPhhFSFsc.s...tpps+shl+lsc.op........WSp.....shShDulGsstplth.sssp.p...........................................tth....lGlslppGpGcYth...TKlVTluPRallpN+hs.slpltE.ss.........p.......ph....................h.pltssp..................................hhPhahh.ppstp.ppLs....l..ph.t.....ssp....WSusFtlscl.ushal+l..........................................hp..t...sstpthl+l-lhhcsuThFlphsstppp.hPaplcNh...o..............................cpp.hhaaQpss...p.................thp.hthplsPpshhsYAWD.Psutp .............................................................................................................................................................................................................l.lhsPYhhlNcT.s..h...L.hp.t.p..t............t.........p..................................................................p...shha.......satt..p...........ptsts.lpl....sp.ut...........................................WSp..........h.uh......-....shu.......s.h.t.tlhh......t.tpt.......................................................................lG.lslp...upt....p....h........T+lVoh...s......P+allp.Nc..s....s.h.pl.phtp.ss............p......ph.......................................................h.plts.sp...............................................................................................p.h..sh.ah.......pt.s.tp....ph.lp..........l.....ph.t.......spp........................WS.ts.Ftls.c....ss.....h..hlpl.................................................................................................................................................................................................................................................................................................tpp.t...h...lpl-l.t...h.....p.....t................s.................o....h................h......l.php....p..p..p..t..........P.hhl.Nh...o..............................p...l.ahQt..........................................................h.l.Ptp....h.asWs.Phh..t.............................................................................................................. 0 190 271 399 +6479 PF06651 DUF1163 Protein of unknown function (DUF1163) Vella Briffa B anon Pfam-B_16509 (release 10.0) Family This family represents the C-terminus of hypothetical Arabidopsis thaliana proteins of unknown function. 19.40 19.40 22.40 75.80 18.60 17.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.34 0.72 -4.43 6 20 2009-01-15 18:05:59 2003-07-24 15:30:57 6 1 2 0 17 20 1 68.50 55 39.95 CHANGED hsGlIsKsIMcDIKERp-lRFGSRLhLPDCRcsTsGpMsYsCDEspLRFEPGSppKAThFsc.aPpChhl .hsGlIsKcIMcDIKE++EV+FGSRlhLTDCRcsToGsMsYsCDEssLRFEPGSEhKAT.hFGs.aPpCh.h....... 0 6 6 6 +6480 PF06652 Methuselah_N Methuselah N-terminus Moxon SJ anon Pfam-B_3337 (release 10.0) Domain This family represents the N-terminal region of the Drosophila specific Methuselah protein. Drosophila Methuselah (Mth) mutants have a 35% increase in average lifespan and increased resistance to several forms of stress, including heat, starvation, and oxidative damage. The protein affected by this mutation is related to G protein-coupled receptors of the secretin receptor family. Mth, like secretin receptor family members, has a large N-terminal ectodomain, which may constitute the ligand binding site [1]. This family is found in conjunction with Pfam:PF00002. 21.50 21.50 21.60 21.70 21.30 20.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.64 0.71 -4.72 10 146 2009-01-15 18:05:59 2003-07-24 15:38:27 7 8 23 6 75 182 0 156.10 28 34.30 CHANGED CsaaDTVsIScup+L.NGSYlYEsllIPA+LTGcYDF+llsDGs+hpVp.cHlRGCVCKLKPCIRFCCP+cplhss..upChsshs-..pLschc...Pal.slTLsDGSlsph.....+hps-hlVpp-..pP.C-cMasLsc.pphhDcaoL...........FENGolhR+hD.phhLsKpEYCLpPhpa...sssSl..cIhP+sC .......................................CsahDTVslot....th..........suSY.apslllPsph..h..u...Ysa.hh...ts..p..s..pH.l.RGC....sCpl+sClRhCC.P.tphhst.....spC.sshpp....hhthp..shl.slT.h.s.sso..s.ph.....chhpchh.lppc...hP.Cp..ch..a....h...lsp........pasL...........a.ENGolh+p.D....p.thlspp-YClt.h.h.....sp...sh....lhsh.C......................................................... 0 14 17 58 +6481 PF06653 Claudin_3 DUF1164; Tight junction protein, Claudin-like Moxon SJ, Coggill P anon Pfam-B_21553 (release 10.0) Family This is a family of probable membrane tight junction, Claudin-like, proteins. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.11 0.71 -4.52 24 100 2012-10-03 00:20:40 2003-07-24 15:41:17 6 4 7 0 96 107 0 156.80 19 88.16 CHANGED Mth...................lhhlshh..lhluhlhshlulFo.sWhs.psh.................shGllPa.s.....s......ahshuuhhhalohshhhhhh..lhhhhshtplp+pGastphRphFhhIuhhshllslLplsAhlLhulshsphttph...s.................tLGYSua.lslsoAllshsss...uLuhtl................ucpp.h...........p ............................lh.lshh..lhhuhl.LshlulF.o.....ssWhs.p..................................thGl.l.Pa.s...tt.s................Whss.suhhhhl...shshhhhhl...lhhhhhhhplhppshst.p...h.+..hhhhhlshhuhlhslhhhl.uhhlhu..ssh..sph.t..h.....................tlGa.ShW.lslsu...sl.lshhsh...hluhhh...........t...................................... 1 31 34 96 +6484 PF06656 Tenui_PVC2 Tenuivirus PVC2 protein Moxon SJ anon Pfam-B_15006 (release 10.0) Family This family consists of several Tenuivirus PVC2 proteins from Rice grassy stunt virus, Maize stripe virus and Rice hoja blanca virus. The function of this family is unknown. 25.00 25.00 1054.90 1054.70 16.90 16.60 hmmbuild -o /dev/null HMM SEED 785 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.32 0.70 -6.75 5 33 2009-01-15 18:05:59 2003-07-24 16:25:15 6 1 6 0 0 34 0 784.10 74 94.19 CHANGED lplSCstsssPC+psphLNGYYIEcGuICYN+AsINLaET..CFoGKYDp+lPVH..FSc...FGGpRYlcCDDEIlscso.LVGFpQscaTSKsLPINscNu-LVSYspuc.cuFhGlVYVEslcYC...spsscscpIlNcusuLph.slCpDGlLhuusSEC-lsVuETEFclPSCussKLPlYDDpIcVCpNssC+NVoCTsSslClsYDRMDFlhRlKNY-CS+oY+YalYllILllIIllChssIsIINILlhLKP...VFWLlKKllaulsGLCHR+PslpcsElDMupVRVV-Ds--GLLlsE-S+APNSNVs-plhpKAR+h-NGLIYIPYILhhLsLlhoous..CpDLVSSLSNIEVCsupsCcasSKlpLTLhNTPQDFCFKopSDVYKIRlspIoV+CLSRPLYYTNSYKRsIu+-DWKCFEG.cCosDsSpSIWDKS-S.LHYDYC.VsDFHlFSYC..PFYH.YNWKRIpYcPTS+LACoV+KCs-sQFEIsGYlo.KNGpVl+ElSGaoSsY-usIloIoLLSYNot+LP+EYVECDuKAYcRsuNDLGSFDKELhGSIQCPT+-DAhpLTsKCpTKlpul.EDpssIpY-EsDGlspLscTtoEPL+sVlVSpsGISLDThDlaPVTLoIpopccIoSIlTS+IShNsTsCcIKGVERKlKKTlI+VcosoKllLSDlLsCcDLAsCSLTFNs-....c+uECaTTSY+ssuoGssIpC+FlYSGDo...IhCKYsVSPl-IsVVSPplDloSF-uVKcSoQNWssFlh-hIRDNPKLTIVASILPIGhlLKTlKu .VKVPCSARAPPCKLTYELNGYFIENGLICYNRASVNYFET..CYTGNYDYKLPLHPSFSK...FGGHVYLSCDDAILQNVS.LVGIQQTEYTSSPLLITNSNSEKISYSNLK.TGFLGIVYAVETRACIQPDQAKKPEEIINHGVAI..KPSCTDGVLYYINSACEVNVSDQTFSIPSCESVKLPTYDDTIEVCDKGGCQNVTCHPGEICDKYERMDMIMRIKNYQCSHIYRYSLYSIILFFVIVIVFTLITIMNILFFLKP...AFWLLKKVLYSMVGLCHRRPVVDEVSVDMSTVRVVDEAEEGLLVVEDSIAPNTNVSDKVKRKGRKVENGLIFIPYlLMILLLVCSAES..CQDLVSSISNIERCTNNSCDFISKMKLTLLNTPQDFCFKTSTDVYKIRFNSVRVMCLSVPLYYTNSFKRVISREEWKCFEGEGCRTDGTHSIWGESTS.LSFDYC.VTDFHIFSYC..PAYH.YNWKRIEYEPTSSRACTIMKCMDTKFEIVGYIQ.KNGHVLKELGGITSKYDSPLVSISLSNYNSARMPREYAECDGKAYLRTANDLGSFDKELLGNIQCPTKEDAVVLSSKCKTKILSN.EDLPVIRYlERDGVDMLEHVKSEPLKDVLVSSSGISLGTLDLFPVELNLQFKEAITSIITSKISLNGTSCKITGIERKFKKTTVSIESSNKVYLSDILACEGLAVCPMILNNI....KKGTCITTTYYSVTVGSMIKCKFIYSGDT...LMCKYDVSPLEITVISPSLDVSSFEAVKTSTTNWMELLAGIVKDNPKLSLVASIIPIGLILKTIR.S 0 0 0 0 +6485 PF06657 Cep57_MT_bd DUF1167; Centrosome microtubule-binding domain of Cep57 Moxon SJ anon Pfam-B_9878 (release 9.0) Domain This C-terminal region of Cep57 binds, nucleates and bundles microtubules. The N-terminal part, family Cep57_CLD, Pfam:PF14073, is the centrosome localisation domain Cep57 [1]. 24.40 24.40 25.10 31.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.41 0.72 -4.03 29 210 2009-01-15 18:05:59 2003-07-24 16:32:35 8 7 119 0 127 209 0 76.60 34 11.72 CHANGED pcsohpsutsstpuL...upllptLpDEhsHhphchpcLtt.hpplDsohspcpRcpLtpclcpLlcclEtKu-QIhpLhcl .....................s.psSpuss-sL...u-lLpsLQDEhspMsh..-HppL...h+plpc...o..spch+csLcpELEpLlc+MEtKucQIsKL++h..... 0 27 44 77 +6486 PF06658 DUF1168 Protein of unknown function (DUF1168) Moxon SJ anon Pfam-B_9807 (release 9.0) Family This family consists of several hypothetical eukaryotic proteins of unknown function. 22.10 22.10 24.30 24.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.20 0.71 -4.67 36 257 2009-01-15 18:05:59 2003-07-24 16:56:40 7 5 228 0 194 254 3 141.50 35 69.22 CHANGED Pc+thtlPctscphs.t.....ssPEhVpNV.GSSAGAGSGEFHVY+ptRRREYpRlchh-cpsc+-ppscpappKpcEpcctsEEKTtK+RtKRpK+KpKpppt+ptcpts................ttppspppspssssssccs...........t...tsthtsc ...............................p+.h.lPpt.pp.hs.p......ssPEhVpNV.GSSAGAGSGEFHVYRphRRREYpR.chM-ppsc+cctcpEFpp+hccpcptsEE+TsKpRtKRpK+Kp++ttt+ptptpp..............................tttt.tttttpt.ppttpttptt.......................ssttt.t................................................................................... 0 68 105 155 +6489 PF06661 VirE3 VirE3 Vella Briffa B anon Pfam-B_16550 (release 10.0) Family This family represents a conserved region within Agrobacterium tumefaciens VirE3. Agrobacterium tumefaciens (a plant pathogen) has a tumour-inducing (Ti) plasmid of which part, the transfer (T)-region, is transferred to plant cells during the infection process. Vir proteins mediate the processing of the T-region and the transfer of a single-stranded (ss) DNA copy of this region, the T-strand, into the recipient cells. VirE3 is a translocated effector protein, but its specific role has not been established [1]. 19.50 19.50 22.90 24.00 18.50 17.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.95 0.70 -5.31 4 15 2009-01-15 18:05:59 2003-07-25 11:30:11 6 1 9 0 3 14 0 270.50 55 49.60 CHANGED psRLEsP.+K+KYsuDhpllsKLDsGFRGEIuYKlhGNcpLRlDpspELT+E+Gll+KTKpVLKRsssTGplYLuhHERKoWsSVooHpYupDGoLRtKHVKYKDGRFEE+WERDEsGsLlRTRYhNRGRLssthh+PlSEEhuAPYcSGs-pRLYRcLTRQsGS++ETFERDDKGNLELlupKRhGFS+pohKutDRpTSpTpIRKLGGAFSKSYRSLLDKEGNElGRDllSHRRLaNKRSAlYD-uTGQLKShKHTFGKIYKuEupYLsAclKcVSKKILGVTVtR+LssLSEcEhpAp+LRshEsutH+QAWQc ...........ARLEsPppK+KYsuDMpllDKL-sGFRGEIuYKhhGNcpLRlDsspELT+E+Gll+KT+cVLK.RsspTGsVYLuhaE+KoWtSV..oSH.YupDGoLRsKHVKYKDGRFEEKWERDEsGtLhRTpYhNR.sRL....FpPlSEclusPYcSGs-NRLaRcLTRppGS+pETFERD-KGNLELIGpKRhGFS+sSsKu.DRpTSpTpIRKLGGAFSKSYRSLLDpEGNElGRDI.SHRRLhNKRSAlYD-uoGQLpSsKHTFGKIYKSEotYLsAclKcVSKKILGVTVtR+LssLScpEh-AQ+LRstEustH+pAWQc......... 1 0 2 2 +6490 PF06662 C5-epim_C C5-epim_C-term; D-glucuronyl C5-epimerase C-terminus Vella Briffa B anon Pfam-B_16571 (release 10.0) Family This family represents the C-terminus of D-glucuronyl C5-epimerase (EC:5.1.3.-). Glucuronyl C5-epimerases catalyse the conversion of D-glucuronic acid (GlcUA) to L-iduronic acid (IdceA) units during the biosynthesis of glycosaminoglycans [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.19 0.71 -5.01 18 182 2012-10-03 02:33:51 2003-07-25 13:05:58 8 7 150 0 112 191 17 180.00 42 36.94 CHANGED Q-.ppGuWshshshphhps.t..LssGWhSAMuQGpAlSlLsRAYphTp.DpcYLsuAt+Als.aplsspcGGlhsshhsh......hsWYEEYs.....ToPsoaVLNGFIaoLlGLYDhs.............stphsscApplFppGlcSLKthLshaDs.GshohYDLpahs.hspsPsluthpYHshHlp.LphLhsIs.s..-.happhhc+Wpu ................................D.ppGGWsh.VpRplh.psht..LpsGWhSAMAQG.........pAlSsLs.RAYhhTp.DcpYLpuAh+Ahtsa+hs...upp..sGVhuh.Fhsp.............asWYEEYP.....TsPsSaVLNGFhYSLlGLYDLpp..........sstphuc-A.ppLappGhcSLKthL.PLYDT..G....SsohYD...LRHhh......lshuPNlAphcYHshHlpQL.hL.sls.p...s.hh..tphsc+Wp......................................... 1 36 52 86 +6491 PF06663 DUF1170 Protein of unknown function (DUF1170) Vella Briffa B anon Pfam-B_16582 (release 10.0) Family This family represents a conserved region of unknown function within MAGUIN, a neuronal membrane-associated guanylate kinase-interacting protein. This region is situated between the Pfam:PF00595 and Pfam:PF00169 domains [1]. All family members also contain an N-terminal Pfam:PF00536 domain. 21.90 21.90 23.60 24.40 17.10 16.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.12 0.71 -4.37 6 166 2009-01-15 18:05:59 2003-07-25 15:36:33 8 13 39 0 95 132 0 172.60 47 23.89 CHANGED huss..oK+-ssAlpDLYIPPPPutPYoPRDEpGshss-sts+.ph.shsVsKGSESPNSFLDQEsR+R.FslsEpDplsashph-psh.hss+hR-sTPTYG+LRPISMPsEhNWhu-s-D.uKh+RpuR.pEsSLhRYhS....s-+Is..sEE.......ahhuRsusccphhR............................u++KScpusosu...chSLLsSh ............................h.sossK+-psAl.DLYIPPPPu..PY......PR.DEpGshsspshpp..ph.shPstKGSESPNSFLDQE...R+R.Fsl.s-.pDpl..sYph-psh..stth.REpTPoY...G.K.R..PlSMPs-hNWhGth-s.s+.+t.cuR...t...EsuLhRYhS....NE+Is..tEE.......a.hhpRsspccphh+................................................sKKKutpuso.o...c.SLLsS.............................................................................................................. 0 2 8 50 +6492 PF06664 MIG-14_Wnt-bd DUF1171; MIG-14_Ce; Wnt-binding factor required for Wnt secretion Vella Briffa B, Pollington JE anon Pfam-B_16593 (release 10.0) Family MIG-14 is a Wnt-binding factor. Newly synthesised EGL-20/Wnt binds to MIG-14 in the Golgi, targetting the Wnt to the cell membrane for secretion. AP-2-mediated endocytosis and retromer retrieval at the sorting endosome would recycle MIG-14 to the Golgi, where it can bind to EGL-20/Wnt for next cycle of secretion [1]. 20.90 20.90 21.10 21.10 20.60 20.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.80 0.70 -5.59 15 334 2009-09-13 15:31:17 2003-07-25 16:08:38 7 4 113 0 203 313 0 260.00 30 52.85 CHANGED tpC-plslhclGhLcaspYllslphss.p.h.............plp.slphphhphNssFTpl.lhh+hlFhlhohlhhshFsppl+plshpsWhlEQKhhshLhslLllhNsPhahhslhhsssahllLsslhQulFhshLhhFWLlhhcplhl...pspRcsl+s.YhsKllhlsllhlshllhshhp+sppLcDPhaShhssshshpshhsahhlsslh...YlLaLlahIl+sasplcspth..h.....................................................Ra+alhlhslhshshsllhhhhs...........hsph.thtpo..SApFhshYulhNlYlasltalYuPScp .......................................pCs.l.hhclG.lsap.Yhlslph.s.pph...................plp..slp...h..h...hhp..NsuFop.l.lhh+hhhhsh.s.hllh.....sh.....a..h.+..pl..............p.hhs.h.s...hhlE..Khh.hLh.hhhhhN.....Ph..h.ohhh....s.....h.h.....hh..h.sslhQuhF.hshLL.FWl.hhht.....thhh............ptp+.p.plp.......h..Yh......p.....lhhlshh.hshhhhshh....p....pss.p.LpsP.hYphhss.shsh.p.hh.....hhFhhlsshh......Yhla..Lhahlhpshpplpth.....l............................................................................RhKFLhhhohh..shsholhhhhhp..h.th.....................t..hs.......h.sh..s........uu.FhuhYGhhNhYlaslhhlYuPut..................................................... 0 98 111 160 +6494 PF06666 DUF1173 Protein of unknown function (DUF1173) Vella Briffa B anon Pfam-B_16904 (release 10.0) Family This family contains a group of hypothetical bacterial proteins that contain three conserved cysteine residues towards the N-terminal. The function of these proteins is unknown. 25.00 25.00 31.70 30.70 18.30 23.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.20 0.70 -5.63 17 141 2009-01-15 18:05:59 2003-07-28 10:16:29 6 2 109 0 50 143 15 350.90 28 91.03 CHANGED patlsspshst........sssplQshLspAappttR...PhCLC..p..tssltMYlA+hs.sp.allKRMPtoGtcHss..uCsSYEss.phSGLGtlhGsAlp-.scsGpssL+lsFuLo+tssRss......Psupsst..ss.slcsssp+loLRuLLHaLW-cAtLscWsPshsGKRsWuhVRphLLpAAcphps+uhsLu-hLaVPEsFps-pKstlupR...RpstLushth..t.Gpp+hhlllGElKphssuR....Gc+lsl+Hh.PshP..hhlcschacRlt+pFssELchWcuspst.....+LlsIAphs....hsssGhspl--luLMhVoEpWIPhEostEt.LlctLssp+RRFlKsLRashssstPhssslLhDspspssuhalVsutsspca..ctslcchl.scsthss..WhWcsup ....................................tt.s.th.pthLtpAa.t..ppt.p......shChC.....p..pss.....h.hhl..tph.......s...sp...ahltRhPsoGtp...H..ss..sCs.ac......sssch.SGhu..........t.hhusslpt.spsGphsl+LshuLscts.spts............stststt....t..ss.pss..tpphoLhulLchLWppAtLspWtPshs.G...+R.shsh.V+ttLlpAApphtsp.tt....s.LschLal.t.aps....pp+ptlttc............ptthluphth....ts.pchhlllu.lpchsss+h.....upp...lsl+ph.ss...hs........hhh..spshac+lp+RFstE...LthWpssp............+llslut.hs............hstsuhspltcluLMhVo.cp.WIPh-osaEhhlhcpLstppRpFlKsLRashsts.........tshsshhLhDst..t......s.sshh.V.u.hs.s..pa...cpthcphh...spphhss..W.hWcst.t.................. 0 8 21 39 +6495 PF06667 PspB Phage shock protein B Moxon SJ anon Pfam-B_21806 (release 10.0) Family This family consists of several bacterial phage shock protein B (PspB) sequences. The phage shock protein (psp) operon is induced in response to heat, ethanol, osmotic shock and infection by filamentous bacteriophages [1]. Expression of the operon requires the alternative sigma factor sigma54 and the transcriptional activator PspF. In addition, PspA plays a negative regulatory role, and the integral-membrane proteins PspB and PspC play a positive one [2]. 24.30 24.30 24.40 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.59 0.72 -4.04 36 768 2009-01-15 18:05:59 2003-07-28 11:43:46 7 3 748 0 106 251 47 74.20 63 97.85 CHANGED Ms...h.hlhsPlllFhlhVAPlWLlLHY+oKppsupGLopc......-pppLppLhppA-+Mp-RlpTLEpILDAEsPsWRp+ .....................................MS..ALFLAIPLTIFVLFVLPIWLWLHYS...N.R.u.s.p...up.L..S.Qu......E..Q..QRLtQLsD-AcRMRE..RIQALEsIL.DAEHPNWR-.............. 1 20 44 74 +6496 PF06668 ITI_HC_C ITI_HC_C-term; Inter-alpha-trypsin inhibitor heavy chain C-terminus Vella Briffa B anon Pfam-B_16674 (release 10.0) Family This family represents the C-terminal region of inter-alpha-trypsin inhibitor heavy chains. Inter-alpha-trypsin inhibitors are glycoproteins with a high inhibitory activity against trypsin, built up from different combinations of four polypeptides: bikunin and the three heavy chains that belong to this family (HC1, HC2, HC3). The heavy chains do not have any protease inhibitory properties but have the capacity to interact in vitro and in vivo with hyaluronic acid, which promotes the stability of the extra-cellular matrix [1]. All family members contain the Pfam:PF00092 domain. 21.50 21.50 22.50 21.50 21.20 20.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.07 0.71 -4.88 29 404 2009-01-15 18:05:59 2003-07-28 12:08:42 7 13 48 0 193 292 0 177.50 33 20.30 CHANGED soGlsVNGQlIGsct..s.tp+pcTYFupluIshpp.chp.lElTspplsl.cuppp.shsWp-osslspsulplols+ppslsVolsssloFsllLH.phhKtpPhppDaLGFYlhsoc+hSupsHGLLGQFhpt.-hclhchhsGsDspK...............s-AohpVKucplsVTRthpKDYppc....GppVsCWFV+N .........................................................oGlpVNGpllGs.t....tppppTYFuplslhhpp........c..hp...lElo..scp.Isl....p.G....spp..shsWpcosh.l..t.p.....ss...lplo...ls.....+pps..lsl...olscslsFsl.llH.phh+.p............sh..........pp.-aLGhYlhso.cphSspsHGLL.G...........Q..........F...hpt.chclhs..sutssp+...............scA..shhVKupplsVT......Rt.p+DYpps.....Gp..plsCWFl+N........................................ 0 19 28 87 +6497 PF06669 X_fast-SP_rel Xylella fastidiosa surface protein related Moxon SJ anon Pfam-B_21796 (release 10.0) Repeat This family consists of several Xylella fastidiosa surface protein specific repeats which are found in found in conjunction with Pfam:PF05662, Pfam:PF05658 and Pfam:PF03895. 22.60 22.60 23.60 22.80 21.70 22.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.36 0.72 -4.04 2 73 2009-01-15 18:05:59 2003-07-28 12:12:23 6 31 27 0 9 81 0 68.30 60 6.35 CHANGED NAVQsQASQPVTFoGNEGuVKRoLGQuVVISGESSTAGTYSGGNLKSVVDEAAGtIHLQLADSPKFGNVl .........NAVQsQASQPVTFoGNEG.o.VKRoLGQuVVISGESST.A.G.TYSGGNLKSVVDEAAGuIHLQLADSPKFGNVl................ 0 3 8 8 +6498 PF06670 Etmic-2 Microneme protein Etmic-2 Moxon SJ anon Pfam-B_22009 (release 10.0) Family This family consists of several Microneme protein Etmic-2 sequences from Eimeria tenella. Etmic-2 is a 50 kDa acidic protein, which is found within the microneme organelles of Eimeria tenella sporozoites and merozoites [1]. 21.90 21.90 238.40 64.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.40 0.70 -5.75 2 8 2009-01-15 18:05:59 2003-07-28 12:17:37 6 2 2 0 0 10 0 302.50 73 99.96 CHANGED MARALSLVALGLLFSLPPSSAVRTRVPGEDSFSPESGVLSGTDAPERRP..............lVPGLsEGNCGRLTVRNGLSVDETIKVTSAGWTKSERDFIVSLVADETRKVVQLRESEGASGASGPGPAPAEKPPSGQGSAEEAPKGEGGQEKPSVPLIAVRIHGSGGDKGESAPQSAVLLYGNDESEPTEVPLETAAGPTTPLMVLITQQNPKEVEVRVLAWIST.......................DATTGKGSWKENSVVVGSSLSGRDLTVNLSDCGPSSLRVYGSASADLVTVKEGMCEADDPELIALTRPHTSAASPLPAEEGDVAQDAQQSAGAQQEAEsQEVGEPQQEAAAAEQGSSAAESDTQQSS ...MARALSLVALGLLFSLPPSSAVRTRVPGEDSFSPESGVLSGTDAPERRP..............lVPGLVEGNCGRLTVRNGLSVDETIKVTSAGWTKSERDFIVSLVADETRKVVQLRESEGASGASGPGPAPAEKPPSGQGSAEEAPKGEGG.QEKPSVPLIAVRIHGSGGDKGESAPQSAVLLYGNDESEPTEVPLETAAGPTTPLMVLITQQNPKEVEVRVLAWIST.......................DATTGKGSWKENSVVVGSSLSGRDLTVNLSDCGPSSLRVYGSASADLVTVKEGMCEADDPELIALTRPHTSAASPLPAEEGDVAQDAQQSAGAQQEAEAQEVGEPQQEAsAAEQGSSAAESDTQQSS......... 0 0 0 0 +6499 PF06671 DUF1174 Repeat of unknown function (DUF1174) Moxon SJ anon Pfam-B_22180 (release 10.0) Repeat This family consists of a number of Caenorhabditis elegans specific repeats of around 36 residues in length which are found in two hypothetical proteins. This family is found in conjunction with Pfam:PF00024. 19.70 10.10 220.50 10.10 15.30 10.00 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.33 0.72 -4.31 36 528 2009-01-15 18:05:59 2003-07-28 12:22:07 6 7 5 0 394 512 0 23.10 67 46.28 CHANGED SGEETTTuAVTEASG...EETTTu...AVTEu .....sctsTs..uA.......VTEASG...EE.oTTu...AsTEu........ 0 67 130 394 +6500 PF06672 DUF1175 Protein of unknown function (DUF1175) Moxon SJ anon Pfam-B_21722 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 210 residues in length. The function of this family is unknown. 20.60 20.60 20.60 20.60 20.50 20.30 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.42 0.70 -5.00 13 358 2012-10-10 12:56:15 2003-07-28 13:11:22 6 3 342 0 32 141 11 193.80 79 93.81 CHANGED .hhhthpspssstcucs...thLssppStsFRsWFVRIAppQlRps.ss+W..cp+DCAGLVRFAspEAL+tHDucWhcssGhs........s+Yh.PchsL.usphhhtpphp.....pssGp..husassAhsLlppNo+Flu+D...lspApPGDL.lFFcQtDsp...HLMIahGc.........allYHTGshsc.s-stlRsVolppLhpa.DscWpPhssNPsFlGlYRhsFLs ............................tlLhLlsalsCsV.............AHS...EMLNVEQSGLFRAWFVRIAQEQLRQG.PSPRW..YQQDCAGLVRFAANEs.LKVHDSKWLKSNGhS..................SQYLPPEMTLTPpQRQLAQNWN.....QGNGK..TGPYVTAINLIQYNSQFIGQD...INQALPGDM.IFFD.QGDAQ....HL.MVWMGR.........YVIYHTGS........A...TK.T..........DNGM.RAVSLQQLMTWKDTRWIPNDSNPNFIGIYRLNFL.A...................... 0 10 15 25 +6501 PF06673 L_lactis_ph-MCP Lactococcus lactis bacteriophage major capsid protein Moxon SJ anon Pfam-B_21754 (release 10.0) Family This family consists of several Lactococcus lactis bacteriophage major capsid proteins. 19.40 19.40 20.10 19.60 18.00 18.80 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.11 0.70 -5.67 2 7 2012-10-03 06:22:39 2003-07-28 13:15:30 6 1 7 0 0 9 0 346.70 95 81.31 CHANGED MGsNETQEhMKQAIEAGVKVRELEsKVEELNKEREELKKEREAsIPSEKPpDsERKFMRELGsKMsEMPEQGFLREFANuusLNVVNSLGSITSKYARKSGIYDGAMKARFQGLTLAEDGVDDTFIpGTFKAGTDKNKuQTAoKRSLRPQMAEAYLQMDKATVRGVNDSGALSEYVMSEMVNRVIQKVEaNMILGSsDGSNGFYGLKTATDGWTKQIEYTDLF-GITDAVAECSISDAITIVMSPQTFAELRKAKGTDGHSRFNELATKEQIAQSFGAVNLETRVWMPKDEVAVYNHDEYVLIGDLNVENYNDFDLRYNVEQWLSETLVGGSIRGKNRSAYLpKKuS ................................MGANETQEIMKQAIEAGVKVRELEAKVEELNKEREELKK.EREASIPSEKPQDsERKFMRELGDKMsEMPEQGFLREFANuuDLNVVNS..LGSITSKYARKSGIYDGAMKARFQGLTLAEDGVDDTFIpGT.FKAGTDKNKuQ.TATKRSLRPQMAEAYLQ..MDKATVRG..VNDSG.ALSEYVMSEMVNRVIQKVEYNMILGSADGSNGFYGLKTATDGWTKQIEYTDLFEGITD..AVAECSISDAITIVMSPQTFAELRKhKGoDGHuRFNELATKEQIAQSFGAVNLETRVWMPKDEVAVYNHDEYVLIGDLNhENYNDFDLRYNVEQWLSETLVGGSIRGKNRSAYLKKKuS.......... 0 0 0 0 +6502 PF06674 DUF1176 Protein of unknown function (DUF1176) Moxon SJ anon Pfam-B_21791 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 340 residues in length. Members of this family contain six highly conserved cysteine residues. The function of this family is unknown. 34.80 34.80 35.00 35.10 34.50 34.70 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.28 0.70 -5.33 26 714 2009-09-11 20:41:29 2003-07-28 13:20:13 6 5 643 0 80 389 14 321.60 47 92.18 CHANGED lshsshsss.....shthpacDWpVsCDNspsCpAsuhsscp.s.........lslhlpRpAGssssl.phclph.up.tts.........stthpLhlDGcshuhhssphptt.t..................phsssplsAlLpslppupplslt..sust.......hplShsGhsAuLLhhD-hQuRlGTssAllcK..GspPssuVssAsshPsl..ssssssst.....shstpptpthtpssh..........hpsspsp..tttssh..pst................lstLssspsLlhhsC.hsGAYN.ps.shWllscp.............tshpsphlshp..................sssas..suplsshtKG...RGluD.CGstspWha....DGpp..FhLsptsppupCcshtuus.....sW.hsahsp ............................................................hs..hphlWAA......PuQ+sFuDWQVTCNNQNFCVARNsG-HpG..........LVMTLSRSAGA+TDA.lLRI-t...GGltsPcAp.....cutIAPRLL.LDGcPLu.h..sus+W+loPahLh...............TsDsATITAFLQhIQ-.up.AITL+..sGsQ........TlSLuGLKAALLFIDAQQKRVGSETAWI+K..GsEPPLSVPPAPALKpV..Al.lNPTPs.....PLS.E.ERsDLLDYus............WRhNGhc..CS..LDPLRREsp.................VoALTDDKA..LhhlsC..EAG...AYN.TIDLAWlVSRK.............Ksh.so.RsVRLRL.......PFssutEos-........hELMNAsFDEKoRE..L..V..TLAKG...RGLuD.CGIQsRWRa....DGpR....F.RLVRYAsEPoCDsWHGPD....AWPTLWIT.R....................................... 0 9 26 58 +6503 PF06675 DUF1177 Protein of unknown function (DUF1177) Moxon SJ anon Pfam-B_21818 (release 10.0) Family This family consists of several hypothetical archaeal and and bacterial proteins of around 300 residues in length. The function of this family is unknown. 25.00 25.00 63.10 62.90 19.80 19.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.76 0.70 -5.81 22 133 2009-01-15 18:05:59 2003-07-28 13:23:24 6 2 116 0 50 126 4 262.00 54 88.23 CHANGED clpplss......pGsTDFl+lhlsGptG+..GGsAPTlGIlGRLGGIGARPpphGLVSDADGAlsAlAsALKLhcMpc+GDhLsGDVllsTHICssAsshPHcPVsFMsSPVshspMN+pEVcPph-AILSIDTTKGNRllpa+GFAIoPTlKEGYIL+lS--LlcIhphsTG+.shshPlTpQDITPYsNslYHlNSIlQPs.lsTsuPVVGVAITucssVPGsuTGAoc.sDl-pAuRFslEVAKtaspGpscFYDppEatcLpphYGshspLQphGcp ...................p.hpplpup.....pGsTDFl+lhIPGppG+.tGGsAPTLGIlGRLGGIGARPphlGhVSDuDGAlsAlAsAhKLhcMpp+GDhLsGDVllsTHICPcAPTpPHcPVsFMsSPl-htsMNcpEVs.sphDAILSlDTTKGNRllN++GaAlSPTVK-GYIL+lS--LLclhphsTGc.shsaPlTpQDITPYGNGlaHlNSIlQPu.sATsAPVVGVAITopssVPGCuTGAoH.sDlstAsRFslElAKtFGpGpspFYDppEappLhphYGshs+LQshGp.t.... 0 12 25 39 +6504 PF06676 DUF1178 Protein of unknown function (DUF1178) Moxon SJ anon Pfam-B_21872 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. 26.50 26.50 26.90 30.90 24.20 26.40 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.99 0.71 -4.00 79 438 2009-09-13 03:22:43 2003-07-28 13:25:38 6 3 420 0 154 372 1244 143.00 40 98.30 CHANGED MIhasLpCs.psHpFEuWFtSussF-sQtspGLVsCPhCGSspVpKslMAPplssuppt.....................tssPssssssts............................pttltclpccltpso-.VGccFAcEARchHh.GEsscRsIaGcAos-EActLlEEGIsVhPL..P.....hsscpp...s ...........MIhasLpCs.pu.HpFEGWFtSus-F-pQtpptLVsCPsCGospVpKtlMAPplstupsptt...............................................s.sh.t.t....................................................ttl.tthtpht+pVhpss-.VG-+FA-EAR+IHa.GE.s..tRuIhGpAos-EscsLhEEGI-lhPL..P...h.st...t.................... 0 37 91 118 +6505 PF06677 Auto_anti-p27 Sjogren's syndrome/scleroderma autoantigen 1 (Autoantigen p27) Moxon SJ anon Pfam-B_21881 (release 10.0) Family This family consists of several Sjogren's syndrome/scleroderma autoantigen 1 (Autoantigen p27) sequences. It is thought that the potential association of anti-p27 with anti-centromere antibodies suggests that autoantigen p27 might play a role in mitosis [1]. 23.20 23.20 23.20 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.23 0.72 -4.19 14 282 2012-10-03 10:42:43 2003-07-28 13:32:39 7 6 238 0 193 271 18 40.90 37 20.80 CHANGED cl.cphuchLhpGApMLscpCspC.GsPLFc..KsGclaCPsCp ....t.h.phhuchLLpGhpMLscpCs..p..C....G..sPLh.......c.......+.p.G.c.h....h...CssCp....... 0 83 124 160 +6506 PF06678 DUF1179 Protein of unknown function (DUF1179) Moxon SJ anon Pfam-B_21899 (release 10.0) Family This family consists of several hypothetical Caenorhabditis elegans proteins of around 106 residues in length. The function of the family is unknown. 25.00 25.00 27.10 26.00 24.00 23.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.54 0.72 -3.72 4 16 2009-09-10 23:27:31 2003-07-28 13:37:01 6 1 4 0 15 15 0 97.00 34 92.33 CHANGED MaSFshIhcsIsL.FGs.FLLlssllpCp..SKKKF+sDt+scLV.....uPhppssscpspt.tsPsuQsPPs+hPhEpospht.....-DTLANV+SlPPcpp.t.ct.chKpKp .......................h.hlhpplhL.....hlLhhsll.pCp..pKKKFcsDt+ApLV.....sPss.sssc....ssps.psPsuQ.........sPPs+pPlEposp.t.....EDTLANV+SlPPEKS...pp.chKh..t....... 0 7 7 15 +6507 PF06679 DUF1180 Protein of unknown function (DUF1180) Moxon SJ anon Pfam-B_21907 (release 10.0) Family This family consists of several hypothetical mammalian proteins of around 190 residues in length. The function of this family is unknown. 24.80 24.80 24.80 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.14 0.71 -4.35 7 117 2009-01-15 18:05:59 2003-07-28 13:39:38 7 1 66 0 80 126 0 144.30 30 90.58 CHANGED M......uhhs.h.ssshLLhLLs.L...sulhpcAssAss.....pPtspp.s..PPuPosu.......................Psuh..hts.tGssussSsGusLss..sss..spPhhpRALhVhsshSAhlllYFllRshRhR+Rp+KTRKYGVlcTshtshEhsPL.EpDDEDDD.TlFDhp..RR .......................................................................................................h.t......................................................................................ssst.......................................sst..................tstt...s.p...s.s.s.ssslhsp.s..s.......h......-h...s.h.hp...pul..hVlhuhouhlllhhll....R.shRh+....+.+t.......+K..oR........+YGllsss...-shEhs..PL...-pD...DE--DpTlFDsph........................................................ 0 15 19 43 +6508 PF06680 DUF1181 Protein of unknown function (DUF1181) Moxon SJ anon Pfam-B_21912 (release 10.0) Family This family consists of several hypothetical proteins of around 120 residues in length which are found specifically in Trypanosoma brucei. The function of this family is unknown. 25.00 25.00 167.70 167.00 17.70 16.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.69 0.71 -4.43 2 14 2009-01-15 18:05:59 2003-07-28 13:41:54 6 1 3 0 10 14 0 112.40 87 82.70 CHANGED MFQLVSssEVNKAYVSYPGGVSMCLRFPFCHCVWAHAMTLIEISGHYHRWVARGtSEDWDYSNSFVVVCsVLLENIAssEREGKCHLTFHAATSMH+sYMLVALpGKsVKAKVSFRF+EV ....MFQLVSsAEVNKAaVSYPGGVSMCLRFPFCHCVWAHAMTLIEISGHYH+WVARGESEDWDYSNSFVVVCsVLLENIAsNEREGKCHLTFHAATSMH+sYMLVALpGKVVKAKVSFRFKEV. 0 0 10 10 +6509 PF06681 DUF1182 Protein of unknown function (DUF1182) Moxon SJ anon Pfam-B_21992 (release 10.0) Family This family consists of several hypothetical proteins of around 360 residues in length and seems to be specific to Caenorhabditis elegans. The function of this family is unknown. 25.00 25.00 26.60 26.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.66 0.70 -5.14 2 21 2012-10-03 04:04:29 2003-07-28 13:44:35 8 1 6 0 20 18 0 203.70 36 63.80 CHANGED h.N.......................NhsLsslTsTPhTYRDRIhhEF.lpuTshlFshhLsIFhhhR.tlhhohKsTIhFVTLGoFlLslPLhhhQhahVh.L.uhhpPhYTlhVCohlKshsSSTTSshQVLPhAVulYRYhhVVhpt+.ssWFVlsVH.IlohIFhlhAhLNaPhGE.ppND.ChsLRFSpuMEhVRI..TLhhNhhAlhlNhsIhpFVK+a- .................h...................................s.ThTshpaRDRIhhEahlpssshlhshhLsIF.lhh+.plhhphK..............sTIhFVTlGoFlLslPLhlhQsahVhhL.ss+pPpY.o.lhlCohlKshsSuTT.SstQVLPhs................VulYRYhlVVhct+hs.saFVlsVHsllohlFhlhAhLNaPhG-.ppNDhChhLRFSpuMEhVRI......TLhhN...hhAlllNh..sIhpFVK+a-...................................................................... 0 9 10 20 +6510 PF06682 DUF1183 Protein of unknown function (DUF1183) Moxon SJ anon Pfam-B_22014 (release 10.0) Family This family consists of several eukaryotic proteins of around 360 residues in length. The function of this family is unknown. 30.80 30.50 40.90 36.50 29.70 30.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.27 0.70 -4.82 14 202 2009-01-15 18:05:59 2003-07-28 13:50:05 7 6 157 0 140 197 2 268.30 35 85.91 CHANGED h.hLlaLLlh.......stssps..hsspc.....tlLLpcVpsLTLhtsRhTouRRsuPlPQLKClGGSu.tCptapPcV..lQCtNpG..aDG.DVQWpCcA.sLspsa+hGps-VsCEGYcts-DtYlL+GSCGlEYpLpLTEtGccKhspth.pph.sthps.........................ps.ppt.t.....sshlhhlhhlhllAahlYthhhps.t.pttshsspsGhss...................tsss.tsts..uPPPPh.p........sshsss.s.sstssss.u...........tstts......tPGFW..TGhusGu..hhGYLhGppps..........pphstshss...........ta....ss.t..sptoupussusssou...T+ouSGaGuT+RR .......................h........................t...........ts.....tlLLpcVpsLTLp.t.s.+hTouRRssslPQLpCl........Gs..ot.hC...s..h...apP.cV..hpCpNpG...............aDs...DVQWpCpA......s......Ls...tpa+h..G..p.ssVsCEGYcts-DsYlL+GSCGlEYp.......LthTchGt.p+htp.h.t.t.th........................................................stt.............hshlhhl.hhlhslsahl...Yphhhts..t.t..........s.ttsshst.......................................hhtt..s..s..s..sPP..PPh............................t.ss.......st.............................tt.ts......tPGFW..oGhusGu....hhGYhhGpppt.................................t...s..s...............h...........st.....................s..s......s.t..ustsss.....p+puoGaGuTpRR...................................................................... 0 55 76 110 +6511 PF06683 DUF1184 Protein of unknown function (DUF1184) Vella Briffa B anon Pfam-B_16725 (release 10.0) Family This family contains a number of hypothetical proteins of unknown function from Arabidopsis thaliana. 25.00 25.00 26.90 39.90 22.70 23.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.26 0.71 -4.67 4 36 2009-01-15 18:05:59 2003-07-28 14:53:55 6 2 2 0 29 36 0 122.20 37 85.12 CHANGED R..pNK+.LppSRYSPY.....luThcp........EpQKEEAIRLGVELSLFVAEAMFLLSDsL.................................aVhETYIKPKNGVY..................FshGlt.LspIV.IL.....pssplVKspsFc+hNQELKKLEE+LRSsK-sSEANGFsREtI+SsILpLWKSLFEso..hh.s.KshsLEhF ................................................................................pp.K-EslpLGV-LSlalAEuMhlLsDsl.........................................................................................................h.thhpp..hppl-pplts.p.h.......R-.hc.ph..hWcp.h.................................................. 0 9 9 9 +6512 PF06684 AA_synth DUF1185; Amino acid synthesis Moxon SJ, Eberhardt R anon Pfam-B_22398 (release 10.0) Family This family of proteins is structurally similar to proteins with the Bacillus chorismate mutase-like (BCM-like) fold. This structure, combined with its genomic context, suggest that it has a role in amino acid synthesis [1]. 20.00 20.00 24.00 35.60 19.80 17.10 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.00 0.71 -4.91 55 331 2009-01-15 18:05:59 2003-07-28 15:01:51 6 3 227 4 131 326 890 173.60 46 89.66 CHANGED IRKhlshlE-hht.EGGtss......spPl+psussAVlc.NPaAGR.aVEDLpP.lhchut.LGthLspcllssLGs.scplEuYGKuAlVGssGElEHu.ALhHs.hhGtshR-sl...............................ssupullPuop+hGusGstlslPltHpssualRSHFsohchslsDAPpsDElllsLuhosGGRsHs......RlG ......................IRKhlshlE-ohh.EGG.+ss......spP.l.+hsussAVl+.NPaAG+...aVEDLpP.lhshus.LGphLsccllsslGs.uctlEuYGKAAlVGhsGElEHAuAllHs.+.....hGsthRcAl................................s...uKuhls.sspRGusGsslplPlhaKssthhRSHa.olphplsDAPtsDEIlVslususGGRsHsRlG.................. 0 20 56 86 +6513 PF06685 DUF1186 Protein of unknown function (DUF1186) Moxon SJ anon Pfam-B_22662 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 250 residues in length and is found in several Chlamydia and Anabaena species. The function of this family is unknown. 19.80 19.80 19.90 20.10 19.60 18.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.33 0.70 -5.21 7 84 2009-01-15 18:05:59 2003-07-28 15:06:29 6 3 79 0 27 61 0 224.90 33 79.09 CHANGED M..ptll.ph.h.pthh.c.tlcthl.p+pthhP.Ll.hLcph.pph.pllsctsa.hHlYAhaLLAQFREppAaPLIl+hhShst-..lhhclsGDllTEDLuRILASVssGclphIppLIEspsls.YVRuAAIpuLlsLVuptplSREplIpYFtpLhpt+Lc+csSa..lWsSLVsssssLhPtELh.pIp+AapssLl-shFIshEDVEp.lshpp.-psLpcLhps..apLIpDslt-hEpWhp.....shEs ..................................hppllppLhh.pp.shlPc-AlctAl..pppplsP.LLplL-cAhcc...ss....-lhs-ssh.....psh...l.aAhaLLAQF.REoRAhPLll+Lh.uhpp-...h.cslhG.DslTEDLu+ILASVs-.-hshlpcL...I-ssplssYV+sAAluuLstLlshpplsR-pslpaatpLl...shtL.ccp..sh......shssLl.s....shssLhstELh..IpcsFp...tsLl..-...h.l.sh.........-cV.p.ht.p...-t.ht.h..t......lp.................s.................................................................................................. 1 8 18 21 +6514 PF06686 SpoIIIAC Stage III sporulation protein AC/AD protein family Moxon SJ, TIGRFAMs, Coggill P, Bateman A anon Pfam-B_22771 (release 10.0), TIGRFAMs Family This family consists of several bacterial stage III sporulation protein AC (SpoIIIAC) and SpoIIIAD sequences. The exact function of this family is unknown. SpoIIIAD is the an uncharacterised protein which is part of the spoIIIA operon that acts at sporulation stage III as part of a cascade of events leading to endospore formation. The operon is regulated by sigmaG [1]. 21.00 21.00 21.20 21.50 19.00 20.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.60 0.72 -4.17 87 1045 2009-01-15 18:05:59 2003-07-28 15:12:29 6 3 398 0 251 634 10 57.50 29 83.45 CHANGED hIhKIsGlullsthlstllKcus.sphAhhlslsutllllhhslshlsslhpslpplh ....hlhKIsGluhlsthhsplhK..csG.ppshAthlpLsGtllILhhslshlsslhcslpslh....................... 0 135 210 225 +6515 PF06687 SUR7 SUR7/PalI family Moxon SJ, Coggill PC, Bateman A anon Pfam-B_22775 (release 10.0) Family This family consists of several fungal-specific SUR7 proteins. Its activity regulates expression of RVS161, a homologue of human endophilin, suggesting a function for both in endocytosis [1,2]. The protein carries four transmembrane domains and is thus likely to act as an anchoring protein for the eisosome to the plasma membrane. Eisosomes are the immobile protein complexes, that include the proteins Pil1 and Lsp1, which co-localise with sites of protein and lipid endocytosis at the plasma membrane. SUR7 protein may play a role in sporulation [2]. This family also includes PalI which is part of a pH signal transduction cascade. Based on the similarity of PalI to the yeast Rim9 meiotic signal transduction component it has been suggested that PalI might be a membrane sensor for ambient pH [4]. 24.10 24.10 24.10 24.30 24.00 24.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.57 0.70 -4.77 99 777 2012-10-03 00:20:40 2003-07-28 15:22:48 7 5 142 0 570 851 0 230.30 18 62.52 CHANGED hthlshll...hhsuhlhhlhssluss..t.........lpphahhph.sstth...................................................................................................................................t........................hpauhaGaCts.......................................t.pstpCop.spsuashssh..p.ht.p......................................thslPsshps......t......h...hphhhhhhlluhh...hohlshlh.hl.h...........................................uhhtt............................................................hhshlshlhshluh..lhshlusslssshalh..hht.hpp...........husphhshsWhslussllshlh .......................................................................................................................................................h..hhshhhhhsuhlhhl.....l.shluss......s.......lpshhhhph..shtt.h................................................................................................................................................................................................................................................................................................................................................................................................................................t.h..ph..................hphulauaCts.........................................t.tt.ss.C.....op..spss.a.s.hssh......p.http.......................................thsl...P.s..shps............p...h..h.....hph.....hh........shhhluhh....hshl...shlh.sh..h................................shhhp........................................................................hhsh...h...s..........hlhshluh.lhshlushls....ss.h.as.h...httthppt...............htsphGsp..h..h.sh..hWhuh...shsllshh.h..................................... 0 135 305 487 +6516 PF06688 DUF1187 Protein of unknown function (DUF1187) Moxon SJ anon Pfam-B_22781 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of around 62 residues in length. Members of this family are found in Escherichia coli and Salmonella typhi. The function of this family is unknown. 20.70 20.70 22.20 40.90 19.00 18.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.16 0.72 -3.85 2 125 2009-09-11 12:17:23 2003-07-28 15:25:53 6 1 113 0 2 55 0 58.80 68 86.50 CHANGED YpIsATIcK.GGsPspWT+YSc..hTtpECcK.hStcKEAGhshp.cV+l.sF.Cccl.sp .YKITATIEKEGGTPTNWTRYSKSKLTKSECEKMLSGKKEAGVS.REQK.VKLINFNCEKL.SS............. 0 1 1 1 +6517 PF06689 zf-C4_ClpX ClpX C4-type zinc finger Bateman A anon Pfam-B_465 (release 10.0) Domain The ClpX heat shock protein of Escherichia coli is a member of the universally conserved Hsp100 family of proteins, and possesses a putative zinc finger motif of the C4 type. This presumed zinc binding domain is found at the N-terminus of the ClpX protein. ClpX is an ATPase which functions both as a substrate specificity component of the ClpXP protease and as a molecular chaperone. The molecular function of this domain is now known. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.41 0.72 -4.44 130 4410 2009-01-15 18:05:59 2003-07-28 15:29:21 8 8 4219 9 955 2727 1965 40.10 59 9.68 CHANGED phpCSFCGKspscVc+LI.AGs.s...saICcEClclspcIlpc- .........h.lhCSFCGKoQc...-...V...+KL......I.....A..G.P.u.........VaIC....DECl-LCs-IIcEE....... 0 326 634 816 +6518 PF06690 DUF1188 Protein of unknown function (DUF1188) Moxon SJ anon Pfam-B_22902 (release 10.0) Family This family consists of several hypothetical archaeal proteins of around 260 residues in length which seem to be specific to Methanobacterium, Methanococcus and Methanopyrus species. The function of this family is unknown. 26.60 26.60 27.20 60.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.64 0.70 -5.24 8 32 2009-01-15 18:05:59 2003-07-28 15:31:43 6 2 30 0 21 29 2 244.00 40 96.65 CHANGED hchGITEoVKTlpS+l+ltDIlp-IucKKAsAIptaLEsEcFc..pAlIFGuYLoGualApsLsKcs.EVhlVDIpPaL+cll..spsI+Fh.......chhsshss.s.DLlVDLTGLGGlsP-hLS+LsPcVLIVEDPpGsh.DtsIpphsNTpERLs...pusK+GlL+TactuhsoKTSGTMTLoV-slp-uss-lpElDGVLYAlPsL+aaEtlLF+.E+DhcpFLsclspPAlTVSSLc-...h-sDElLpcNluhIpShVcE ................chGITEpVKThcSch+lhDIlpcIscKKupAlp.aL-utchc.......psllhGuYLoGsalAptLpc.t.pcVhllDIpPal+pll..spsl..cFh.........chhps..ph..s.DLllDhTGlGGl..ss-hLuchs.scVhIVEDPpush.DptIhchsNT.cRls....sutc+GlL+Ta..uhhSKTSGTMTLTl-slhcusp-lpcl-GVLYulssLcaaEtllF+.E+DhcKFlsplstPAlTlSolsc......-s-cllppNlscIpShl....................................... 0 6 11 17 +6519 PF06691 DUF1189 Protein of unknown function (DUF1189) Moxon SJ anon Pfam-B_22923 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 260 residues in length. The function of this family is unknown. 20.70 20.70 20.80 21.00 20.50 20.30 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.49 0.70 -5.06 12 358 2009-01-15 18:05:59 2003-07-28 15:44:05 6 2 333 0 41 223 1 238.10 27 93.78 CHANGED MNlFpphhKSlYSP+DIAthRFQuIGKoIlYlhLLollsslPssYahsoslppuhsshppslpcDlPsFpIpsGcL....ps-tppsIphppsshsIlFDsosohpscp....ltsppsululLK-chll.sssGpsQphsYshhss.slsKcDlhshlsphculh..llslhslllalhssAhpFIpVolLAhIGhllpshtp+pLsY+plWploAYSlTLsTVFFsIMcsLphsVPsshhlsahVshIlLaLslKElP ...............................phhp.hhh.uh.a.sspchtph+..ht.thhKsIlYlhlLshlhslP........hshhhh....sslp........pslphspptIsc......clP.D.FpIc.NGpL.......p.s.c.tp.ps..h..h..p.ps...........sh....l...hs....FD.P.s..up.hsppp............l...s....pt.p....u....l...hhL...p.Dchll...hs..........s.......G.....t.o...p.o........h.........sYs.........p...hh...h...........slsp...psl.p....s...hl.s..t..h.....csh.....h........h.hlh.l.hh.llla...lh.plhhhF...l....s....l..h.l...lA.h.l..G...t.h.....h.u.th........p..p....pl.sa..tp..shplssYuhTlPslhhhIhph......h..p.....hh..hssuhhlh..hhlshlhha.lsh+p...................................................... 1 16 31 35 +6520 PF06692 MNSV_P7B Melon necrotic spot virus P7B protein Moxon SJ anon Pfam-B_22334 (release 10.0) Family This family consists of several Melon necrotic spot virus (MNSV) P7B proteins. The function of this family is unknown. 20.80 20.80 22.60 22.10 19.40 19.40 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -8.99 0.72 -4.19 2 21 2009-01-15 18:05:59 2003-07-28 15:45:34 6 2 3 0 0 21 0 60.50 83 73.55 CHANGED MACY+CDSSPGDYSGALLILFISFVhFhITSLSPQGNTYVHHFDNSSlKTQYVGISTNGDG .........MAChRCDSSPGDYSGALLILFISFVFFYITSLSPQGNTYVHHFDNSSVKTQYVGISTNGDG... 0 0 0 0 +6521 PF06693 DUF1190 Protein of unknown function (DUF1190) Moxon SJ anon Pfam-B_22972 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 212 residues in length and is known as YjfM in Escherichia coli. The function of this family is unknown. 21.80 21.80 22.40 22.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.35 0.71 -4.19 26 1138 2009-01-15 18:05:59 2003-07-28 16:26:18 6 2 707 0 103 463 18 162.00 45 76.05 CHANGED cscps...sslapsss-Chpsssshst.....pCpsAYppAhscttcsAP+YsoptsCcu-FGtspCst.......................spsppu...............Gt..ahPhMuGahhuph...hsssst.h....ssQPlapots.....t....s.tspahsusGpshssuhpss...pshssscs........shtspssso+sl..SRGGFG..posuupouhGu ................................sDtssohYpsssDC.ssssst.us...........................CssuaNNAhpEt.psAPKasTp-sC.ucFG.tpC...............................................pspppS...................WhPlhuG.ahhuRl.............hts..s.t..........spQslasSts.su............suhshapssoGc..hstutsG.........+p...h.sscT........................uhus..tss.....T..s.....oTV....oRGGFG.cSsutpupht............................................................. 0 15 40 66 +6522 PF06694 Plant_NMP1 Plant nuclear matrix protein 1 (NMP1) Moxon SJ anon Pfam-B_22388 (release 10.0) Family This family consists of several plant specific nuclear matrix protein 1 (NMP1) sequences. Nuclear Matrix Protein 1 is a ubiquitously expressed 36 kDa protein, which has no homologues in animals and fungi, but is highly conserved among flowering and non-flowering plants. NMP1 is located both in the cytoplasm and nucleus and that the nuclear fraction is associated with the nuclear matrix. NMP1 is a candidate for a plant-specific structural protein with a function both in the nucleus and cytoplasm [1]. 25.00 25.00 25.50 25.50 20.90 24.80 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -11.89 0.70 -5.50 2 45 2009-01-15 18:05:59 2003-07-28 16:31:52 6 4 27 0 22 47 0 238.30 52 89.86 CHANGED MAuKQMEEIQ+KLuhLpYPRANAPAQSLLFAGhERYtLLEWLFF+LLGD+SPFoQQNhQGDulDRDEEssRIQaLAEIApFLGITsosDsEAIQGRGSYE-RhEhL+LIVDLVEAShYADNPEWSVDcQltKDlQLlDuIAEKQuQIFSEECKLFPADVQIQSIYPLPDIu-LEhpLS-.op+h.sLQpMVp-LASKasYNPsE-Ys-sEhKLRtaLpSFL-Ts+oFNTIYTKEI+PWTHMMEVPQLHGFGPAANRLLEAYphLhKFLGNL+sLRDSasAhAAGS..osusEPSSVT+IIS-CEoALThLN+sLuILSsSlAREp ...........................................M-.lppKL..LtYsRupssuQpLLaA.GhERYtLL-WLFF+LLGD+SP.FoQQshQG-.uh.RDEEssRlQhL..hcIAph............LGlssphD.-sIpGcsoacpphthlp.I......l-lV...-ushhu-N...EaS..........lD-QhtKDlpLlDuIu..E+Qu...lFS-..ECKLFPsD.V..Q.I.............Q.....S..h..slPDls-LE.phuc.s+h.......h.....sLQphVp-LAuKas.Y.NP.sE-as.EsE.p.L+tpLpoFL-os+sFNhIYoKEI+PWTHhMElPQLHGhGPAANRLLEuYphLLKFLuNL+sLRDSasAhuhGS...........s......ustPSSVo+IlS-CEsALThLNcsLuILSsSlAR-..................................... 0 6 17 20 +6523 PF06695 Sm_multidrug_ex sm_multidrug_ex; Putative small multi-drug export protein Vella Briffa B anon Pfam-B_16747 (release 10.0) Family This family contains a small number of putative small multi-drug export proteins. 26.10 26.10 26.20 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.52 0.71 -3.95 36 377 2009-01-15 18:05:59 2003-07-28 16:38:41 6 1 337 0 147 343 71 120.40 33 70.77 CHANGED ELRGAIPhul.shGls.hpuhllullGN.lLPl..PhlLhhlchlhpahpc.hthhpt....lhshlhc+scc.pspplc+a...u..alGLhlFVAIPLPGTGsWTGuLsAhlLslch+cuhhAlhlGlllAull ....................ELRGAlPhul..sh....G.....ls....h.tuhllu...l.lGN...hlPl.............Phlhhhhcplh..pa........htc....p..h..htt..........hhs.h...hhc+u...cc...tsppl..c+h...u....ahGLhLFVuIPL.PGTGAWTGsLhAslLshch+puhhAlhlGlllAulI........................................................ 0 64 112 132 +6524 PF06696 Strep_SA_rep Streptococcal surface antigen repeat Moxon SJ anon Pfam-B_22674 (release 10.0) Repeat This family consists of a number of ~25 residue long repeats found commonly in Streptococcal surface antigens although one copy is present in the HPSR2-heavy chain potential motor protein of Giardia lamblia (Swiss:Q24984). This family is often found in conjunction with Pfam:PF00746. 21.70 21.70 21.70 21.70 21.40 21.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.60 0.72 -6.88 0.72 -4.10 32 550 2009-01-15 18:05:59 2003-07-28 16:42:00 6 17 71 12 39 523 0 24.90 51 8.42 CHANGED AsYpAtLupYps-LAplQKsNu-tc ..AsYpApLupYps-LA+VQKsNA-sc... 0 3 11 25 +6525 PF06697 DUF1191 Protein of unknown function (DUF1191) Vella Briffa B anon Pfam-B_16754 (release 10.0) Family This family contains hypothetical plant proteins of unknown function. 26.40 26.40 26.40 35.50 25.90 26.30 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.99 0.70 -5.59 11 98 2009-01-15 18:05:59 2003-07-28 16:46:46 7 4 18 0 70 84 0 258.40 34 85.36 CHANGED po..pttslpsuphLDshlpDauh...Ruhp.+h+TGllYpl.sLPuNL..SGIchussRlRuGSLRRpGs.cas...-FslPsGlsVpPhscRlllVhpNLG.NaSs.lY...Ysls.G..YcllSPVLGLLsYsAsspusssp......pls.lhus.tsPIplsF..s.hsssstss.......usshCssFs.sG...sss.hss..ss.sCtsp.c.pG...HaulVl.ssps.tss..st..................ttp.ttW+.....hhsuhssGshlLGlL..llstls+hK+...+p+hc-MERcA.psE..sLc.suhVG+oRA.PsAssoRTpP .....................t.......tsscsLDthlp-hAh......+uh.....p..+TGhlass...plP.uNL......oGl..clu..slRlRuGSLhppGs...pat....pFtlPstlhs.......pPhs.c.RlslVapsLG...NaSs.hY...Y.sls..G..YphluPVlGLhsYsusshssss.......pls.lhus..tsPIhlpF..sshpssst..s........ssspClsFshsG......psp...hp..shsssssChsp.p.pG..HaulVV.psps..ss.sss..........................................ptp.phWh.....hls....u....h....ssGhh.h...LslL...s..hl.lh.hhs..+h++.cp+hppMERpA.psEsLp.hshVGco+A.PsAssTRTpP.......... 0 8 45 56 +6526 PF06698 DUF1192 Protein of unknown function (DUF1192) Moxon SJ anon Pfam-B_22780 (release 10.0) Family This family consists of several short, hypothetical, bacterial proteins of around 60 residues in length. The function of this family is unknown. 24.30 24.30 24.40 24.40 23.80 24.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.86 0.72 -4.16 27 170 2009-01-15 18:05:59 2003-07-28 16:48:47 6 1 169 0 63 127 143 58.50 47 83.82 CHANGED DD.-hP+p+ss...p.lupD.LohLSV-ELppRIuhLpuEIsRlcsthspKpAs+sAA-ulF+ ...................D-.Dhs+.+su.......t.lupD.LulLSVsELcpRIALLpuEItRLcA-ht+KssoRuAAEALF+.... 0 17 37 47 +6527 PF06699 PIG-F GPI biosynthesis protein family Pig-F Wood V, Studholme DJ, Finn RD anon Pfam-B_8602 (release 8.0) Family PIG-F is involved in glycosylphosphatidylinositol (GPI) anchor biosynthesis [1-3]. 21.70 21.70 21.80 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.67 0.71 -4.36 25 274 2009-01-15 18:05:59 2003-07-28 17:00:18 6 13 240 0 190 266 2 180.90 27 70.52 CHANGED sallshhhlhlhhh..hhhh.hht...sshssLth.slhshslhpllYhhlp...hsssspp...............spp.................pshhhtlhull...lslllosPhhhhlllLaGAPl.lpplhcTaLhulHhShLshhPlhslhusshps..WpclFshpt.hs...hht.....sslssllGuWlGulPIPLDWDRPWQpWPIollsGAalGhhlGh.hlushh ...................................................................................................................h.........h.h.......h....ht....s.h.hhh....h..hhh..hha.hhlp....sststt.......................................................................................thh.thhphh...lhhlhus.hhthlhlLaGAPl..h.ph..hhc....Thlh.....uhhhohLsh..h.Ph.....hh.....lhGs...........s.hps..................Whpl.h....sh..t...t.shs...............hht................sslusllGA.WlGAhPIPLDWDRsWQtWPlshshGuhhGhhlGhhlu...h............................. 0 53 97 153 +6529 PF06701 MIB_HERC2 Mib_herc2 Artzt K, Studholme DJ anon Pfam-B_6026 (release 8.0) Domain Named "mib/herc2 domain" in [1]. Usually the protein also contains an E3 ligase domain (either Ring or Hect). 19.40 19.40 19.50 19.80 19.30 19.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.46 0.72 -4.01 9 640 2009-01-15 18:05:59 2003-07-28 18:16:04 8 155 98 2 410 595 5 64.20 47 5.03 CHANGED GsRVVRGsDW+Ws-QDGs.uthGpVhp.......tupcuhlsVpWDsGspNsYRhGhcGpaDL+ls-ss .....GsRVVRGsDW.....c..W..sc.QDGGpG...p......hGpVsp.......................................t..s..sc........uhlsVpW.D.sG.s.p.ss..YRhGhc.Gt..aDL+lhs..................... 1 148 173 300 +6530 PF06702 DUF1193 Protein of unknown function (DUF1193) Vella Briffa B anon Pfam-B_16766 (release 10.0) Family This family represents the C-terminus of several hypothetical eukaryotic proteins of unknown function. Family members contain two conserved motifs: DRHHYE and QCC, as well as a number of conserved cysteine residues. 23.60 23.60 23.60 23.60 23.50 23.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.56 0.70 -4.93 19 305 2009-01-15 18:05:59 2003-07-29 09:31:20 7 7 92 0 209 297 0 201.40 42 44.38 CHANGED Fhhssu..............sNhCFaGcC.YYCcs-aAlCG..ps-hlEGSlshaLPs..hhshppaRsPWpRoYpcscp.AcWEssssYCss.VKpp.PYDpGtRLLDlIDhulFD...................FLIu.........Nh.......DRHHYEoFpchsst.......shl.............................................laLDNu+uFG+ss+DEhSIL..APLhQC.ChlR+STh.RLpllp..s.ttLocshcEuhtpD....lsPlLscsHlpAl-RRLthlhpslcpChcppG..tpVl.pD ............................................................hosusNhCFaucC..YYCpppcslCG..ps..c..hEGSlshaLPs....hh.htphRpPWtRoYpc.tph.A....pWEh.....s...sYCpt...V+ph.P...Y..s..pu..RLLDlh...DhslFD...................F.LhG....................NhDRHHYE..sF..pt..s.st................shl...................................................................laLDNu+G.F...Gp.spD..EhSI.L...uP.....L.QC.C............h...........IR+STh.+LphLtt...s.....htLopl..hccuh.t.pD......lsP...l...L...s-....sHLpALDRRLthlLpsVcpChcp......G...ttVh............................... 0 51 67 138 +6531 PF06703 SPC25 Microsomal signal peptidase 25 kDa subunit (SPC25) Moxon SJ anon Pfam-B_22374 (release 10.0) Family This family consists of several microsomal signal peptidase 25 kDa subunit proteins. Translocation of polypeptide chains across the endoplasmic reticulum (ER) membrane is triggered by signal sequences. Subsequently, signal recognition particle interacts with its membrane receptor and the ribosome-bound nascent chain is targeted to the ER where it is transferred into a protein-conducting channel. At some point, a second signal sequence recognition event takes place in the membrane and translocation of the nascent chain through the membrane occurs. The signal sequence of most secretory and membrane proteins is cleaved off at this stage. Cleavage occurs by the signal peptidase complex (SPC) as soon as the lumenal domain of the translocating polypeptide is large enough to expose its cleavage site to the enzyme. The signal peptidase complex is possibly also involved in proteolytic events in the ER membrane other than the processing of the signal sequence, for example the further digestion of the cleaved signal peptide or the degradation of membrane proteins. Mammalian signal peptidase is as a complex of five different polypeptide chains. This family represents the 25 kDa subunit (SPC25). 22.80 22.80 23.30 23.40 22.70 22.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.98 0.71 -4.66 32 365 2009-09-10 23:37:10 2003-07-29 10:05:20 6 7 290 0 231 339 3 149.90 27 76.60 CHANGED slKpshD-ulsshltph....G..........appsapLhDs+LhlGhsulslAshuhhhDh..hhsa.po+.shhhhsVshYhlLsslLhhashhhEKshlahuppct.....sscplpls...........ophpK.....asPhYpl..plshpcspstt.....p..hphpts.hscaFstsGhhhtshapphlsp.lssl ..............................tlKpslDDulpphLhpt.....t.....................atEsatLhDsRLhlshhushlAhhuhhaDh..hhsFspo+....shlhhsVh............s...........Y...Fl...h.ulLslasha.h..E.Kshhhhuhpct.............ssct.....hplu.............Sphc.+.............asshYpL..plshtsspstt..........pp..hphsps.hspa...........FcpsGhlh.shapp.lsphhp..h....................................... 0 84 131 191 +6533 PF06705 SF-assemblin SF-assemblin/beta giardin Moxon SJ anon Pfam-B_22934 (release 10.0) Family This family consists of several eukaryotic SF-assemblin and related beta giardin proteins. During mitosis the SF-assemblin-based cytoskeleton is reorganised; it divides in prophase and is reduced to two dot-like structures at each spindle pole in metaphase. During anaphase, the two dots present at each pole are connected again. In telophase there is an asymmetrical outgrowth of new fibres. It has been suggested that SF-assemblin is involved in re-establishing the microtubular root system characteristic of interphase cells after mitosis [1]. 30.00 30.00 30.30 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.61 0.70 -5.23 6 409 2009-01-15 18:05:59 2003-07-29 10:19:12 6 3 47 0 84 409 2 191.20 61 89.87 CHANGED ssousKLEHVsE+Fuuhas-lEpEKQsRRlsEuoRhpllpEuls+LEKulEAElKRRAESDKQlQsHFEuEl+sLpERptpQlsDhpsulKsAl-uLup+lpDLHoll+-ER-pRRsDIEHLAsSLVsKVNECVuAlDEERspRhpcpshhhK+luEDlhplpp+lDTEKssR-u-lSuLRoE.lH-sluNRNlsDEQFcshVLDElsulKuALshEREERlAEDDEIVQAlNDYT+ALQ-GL+lVss .........................................................................hcsR...RVDDDTRVKMIKDAIAHLD....RLIQTESRKRQuSFEDIRE...EVKKSADNMY.LTIKEEIDT.MAANFR.KSLAE.......MGDTLNNVETNLQNQIAI..HNDAI...AALRKEALK.SLNDLETGIATENAERKKMYDQLNEKVAEGFARISAAIE...KETIARERAVSAAT..TE.Ls.ssh............................................................................................. 0 59 69 80 +6534 PF06706 CTV_P6 Citrus tristeza virus 6-kDa protein Moxon SJ anon Pfam-B_22973 (release 10.0) Family This family consists of several Citrus tristeza virus (CTV) 6-kDa, 51 residue long hydrophobic (P6) proteins. The function of this family is unknown. 25.00 25.00 30.10 30.00 20.00 18.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.69 0.72 -4.48 2 15 2009-09-14 12:01:29 2003-07-29 10:22:59 6 1 2 \N 0 16 0 50.70 86 98.70 CHANGED MDCVIQGFLTFLVGIAVFsAFAtLIIIVITIYRCThKPVRsASPYGTHATl MDCVIQGFLTFLVGIAVF..SAFAGLIIIVITIYRCTTKPVRNsSPYGTHATV................. 0 0 0 0 +6535 PF06707 DUF1194 Protein of unknown function (DUF1194) Moxon SJ anon Pfam-B_22986 (release 10.0) Family This family consists of several hypothetical Rhizobiales specific proteins of around 270 residues in length. The function of this family is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.22 0.70 -5.35 35 226 2012-10-10 16:07:06 2003-07-29 10:26:08 6 6 125 0 81 566 155 199.90 36 75.78 CHANGED ss.DltLlLAVDVStSlDtpEhplQR-GaAsALssP-VhpAlhu.GshGclAlshhEWuGsspQplllsWolIcsspsApshAspltssPpttspt.TuIusAlsa.usshhspsshtuhRRVIDlSGDGssN....pG.sPs..tAR-tshstGl.sINGLsIhssss.s..........sLssYYpssVIuGPGAFVlsspsac-FscAlRRKLlhEluuhs ...........................................................................................s.DltLlLAlDVStSlDt...sEh..pl..Q..pcGh.A...t.A.L.p.s..sp.lh.pA.l.hs...u....G.plAlshhEWuu.spQ........plllsWphIss...tsupshAspl..tst.......s.....p.t.....tsph..TulusAlth.us.s.l.h..s.....ps.s....h.......uhR..+....VIDlSGDG..sN..........pG...sPs......tuRctsh..s.....p.G..l...sIN..GL...sIhspss...........................sLs.t.YYpspVIuGP......G.....AFVhsspshp.-FscAlRcKLlhElus.s.................................................. 1 15 38 51 +6536 PF06708 DUF1195 Protein of unknown function (DUF1195) Moxon SJ anon Pfam-B_22828 (release 10.0) Family This family consists of several plant specific hypothetical proteins of around 160 residues in length. The function of this family is unknown. 25.00 25.00 78.30 78.00 21.70 22.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.11 0.71 -4.80 7 56 2009-01-15 18:05:59 2003-07-29 10:29:54 6 2 19 0 25 53 0 141.80 53 85.58 CHANGED M+tsct...hPsoTsosssp....hsupppsuttu..hhG+GRYKhWALAAIhLLAhWSMhTGoVoLRWSuGsl...sphs-DlshPhhDDLDVLEMEEREKlV++MWDVYTpot..pl+LPRFWQEAFEAAYEELsSDsssVp-AAlSEIA+MS.lRplpl-sss.cSs .................................s..............thts....utsu..lhGKGRYKhWALAAIhLLAhWSMhsuoVoLRWS.u..GsL...sphssDlssPlh.DDLDsLEMEEREKlV++MWDVYTpop...clRLPRFWQEAFEAAYEELsuDs.ssVR-AAlSEIA+MS.l+plpl-.ss.p..p................. 0 3 14 20 +6537 PF06709 DUF1196 Protein of unknown function (DUF1196) Moxon SJ anon Pfam-B_23162 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 51 residues in length which seem to be specific to Vibrio cholerae. The function of this family is unknown. 21.70 21.70 24.90 24.70 19.40 18.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.76 0.72 -4.03 2 41 2009-01-15 18:05:59 2003-07-29 13:17:12 6 1 18 0 4 14 0 48.50 92 96.46 CHANGED MTVPLEAFVMCVFLMPTLPFKGVAKGIYAKQHSIKsaHIHKTKMLHlDIFR MTVPLEAFVMCVFLMPTLPFKGVA.KGIYAKQHSIKsHHIHKTKMLHIDIFR. 0 4 4 4 +6539 PF06711 DUF1198 Protein of unknown function (DUF1198) Moxon SJ anon Pfam-B_23016 (release 10.0) Family This family consists of several bacterial proteins of around 150 residues in length which are specific to Escherichia coli, Salmonella species and Yersinia pestis. The function of this family is unknown. 25.00 25.00 42.00 41.80 20.80 19.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.85 0.71 -4.39 5 511 2009-01-15 18:05:59 2003-07-29 14:21:16 6 1 506 0 25 130 1 147.00 77 97.65 CHANGED MlWIILATLlVVFIVGFRVLTSDTRRAI+RLSERLsIcPVPIESMIDQMGKTAGsEFIRYLcRPsEuHLQNAAQVLLIWQssIVDuSDcNlphW+RLLpKARLAAPLT-sQlRLALGFhRELDPDAaELssFQpRYNphFpPE-GVaW ...MIWIMLATLAVVFVVGFRVLTSGuRKAIRRLS-RLNIDVVPVESMlDQMGKoAGcEFLRYLHRPDESHLQNAAQVLLIWQlVIVDGSEQNLhpWHRlLQKARLAAPITDAQVRLALGFLREhEP-MQ-INAFQhRYNAFFQPtEGVHW................. 1 1 5 14 +6540 PF06712 DUF1199 Protein of unknown function (DUF1199) Moxon SJ anon Pfam-B_23160 (release 10.0) Family This family consists of several hypothetical Feline immunodeficiency virus (FIV) proteins. Members of this family are typically around 67 residues long and are often annotated as ORF3 proteins. The function of this family is unknown. 25.00 25.00 111.00 110.70 21.60 18.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.53 0.72 -4.15 5 6 2009-01-15 18:05:59 2003-07-29 14:25:47 6 1 2 0 0 4 0 51.80 73 79.54 CHANGED MLaRNScsVPAuIYRSNNIFsNNQuSGSMETSTISSPSRRIRNNFLGLLGTR ML.RNuchVPs.IYRsNNIFssNQsSGSMETSTISSPSRRIRNNFLGLLGTR 0 0 0 0 +6541 PF06713 bPH_4 DUF1200; Bacterial PH domain Moxon SJ, Bateman A anon Pfam-B_23245 (release 10.0) Domain This family consists of several hypothetical proteins specific to Oceanobacillus and Bacillus species. Members of this family are typically around 130 residues in length. The function of this family is unknown. Members of this family have a PH domain like structure [1]. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.36 0.72 -4.11 46 441 2012-10-04 00:02:25 2003-07-29 14:29:16 6 2 256 0 64 351 8 72.70 29 53.89 CHANGED TpYpl..psspLhlpsG.hh+pc.IslpcIppIp.sps.hs....usAhohcplpIpYs..........thsp.lhISPcccccFlptLpcps .............+Ypl..psssLhl+tG.hh+pc...Islc-I+pI..cpstp.h........u.ths.hcpLEIp.Ys..........tacs.hhl..PpccccFlshlccps............................................ 0 24 53 57 +6542 PF06714 Gp5_OB Gp5 N-terminal OB domain Bateman A, Mesyanzhinov VV anon [1] Domain This domain is found at the N terminus of the Gp5 baseplate protein of bacteriophage T4. This domain binds to the Gp27 protein [1]. This domain has the common OB fold [1]. 25.00 25.00 27.10 25.80 24.70 23.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.17 8 60 2009-09-11 05:11:09 2003-07-29 14:46:33 6 7 59 3 0 62 590 132.00 37 21.49 CHANGED GlHPsQKs+u-shGlsTE-LLWMoshpslTSAAlSGIGpSPTGlVEGTaVaGaFLDKapQsGllLGTYsGIYp-KPssscGFsDPsGpYP...RYlGNDVNlLARGGhp.clu.sp..soshI...........QDtNoslAlsPD-pPhsEI.sDssPc ........G.Hs..+.ps-spGlPTEcLPWholl.PsooAuhS.G.lGtSsTGll.pGotVhGaaLD.ch.pps.slllGoh...sGhhpppssppcGFsDPsGpYP...hhlGsDsshLstGs.....phh.st..ps..h...........pstNhshuh.sst.s.st...sspP.......................................................................... 0 0 0 0 +6543 PF06715 Gp5_C Gp5 C-terminal repeat (3 copies) Bateman A, Mesyanzhinov VV anon [1] Repeat This repeat composes the C-terminal part of the bacteriophage T4 baseplate protein Gp5. This region of the protein forms a needle like projection from the baseplate that is presumed to puncture the bacterial cell membrane. Structurally three copies of the repeated region trimerise to form a beta solenoid type structure [1]. This family also includes repeats from bacterial Vgr proteins. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.33 0.74 -7.26 0.74 -3.66 89 445 2009-01-15 18:05:59 2003-07-29 16:39:43 7 22 205 27 84 435 64 24.00 35 5.73 CHANGED usto.hsVtuNpohsVsu.spopsls .......tspopsVtsNRTpsVss.NcT.ols.......... 0 16 36 60 +6544 PF06716 DUF1201 Protein of unknown function (DUF1201) Moxon SJ anon Pfam-B_23269 (release 10.0) Family This family consists of several Sugar beet yellow virus (SBYV) putative membrane-binding proteins of around 54 residues in length. The function of this family is unknown. 20.90 20.90 22.30 22.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.85 0.72 -4.15 2 5 2009-01-15 18:05:59 2003-07-29 16:55:40 6 1 3 0 0 5 0 51.60 68 96.63 CHANGED MDCVLRSYLLLAFGFhICLFLFCLVVFIWFVYKQILFRsTs.SNEARaN+STVV MDCVLRSYLLLAFGFLICLFLFCLVVFIWFVYKQILFRNTPPSNEARFNRSTVV......... 2 0 0 0 +6545 PF06717 DUF1202 Protein of unknown function (DUF1202) Moxon SJ anon Pfam-B_23300 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 335 residues in length. Members of this family are found exclusively in Escherichia coli and Salmonella species and are often referred to as YggM proteins. The function of this family is unknown. 21.10 21.10 21.30 22.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.83 0.70 -5.15 4 434 2009-01-15 18:05:59 2003-07-29 16:59:04 6 1 410 0 5 129 3 303.60 78 92.48 CHANGED hLhoG.shADspsPTENILK-QFpKQYHGILKLDuITLKNLDucGNQATWSAEGDlSSu-DLYThVGpLADY.llEpTWTKDKPVKFSAMLTSKGTPASGWoVsFYShQhAASD+GRslDDIKTNsKYLIVNS-DFNYRFuplcuuhssQKsSIsuLcc-lpALDKphlsApKtADAYWGKsAsGKphTRt-AFKKlpppRD-FNKpNDSpsFAhKY-KEVYQPAlsAC+KQSEcCYEls....IQQKRDhDIpEQRRQsFLKSpcLsRKlQsDWITLEKGQYPLshKVpclppppssIhMKIcDINpu...aK+ ..........................................MLMTGNuWADGEPPTENILKDQFKKQYHGILKLDuITLKNLDAKGNQATWSAEGDVSSSDDLYTWVGQLADYELLEQTWTKDKPVKFSAMLTSKGTPASGWoVNFYSFQAAASDRGRVVDDIKTNN.KYLIVNSEDFNYRFSQLEoALNsQKNSIPALEK-VKALDKQMVAAQKAADAYWGKDANGK...QMTR..E-AFKKI.HQQRD-FNKQNDSEAFAVKYDKEVYQPAIAA.CHKQSEECYEVP........IQQKRDFDINEQRRQTFLQSQKLSRKLQDDWlTLEKGQYPLTMKVSEINSKKVAILMKIDDINQANERWK.K............................................................... 0 2 2 3 +6546 PF06718 DUF1203 Protein of unknown function (DUF1203) Moxon SJ anon Pfam-B_23313 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 155 residues in length. Family members are present in Rhizobium, Agrobacterium and Streptomyces species. 20.60 20.60 21.40 21.00 20.50 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.45 0.71 -4.28 20 173 2009-01-15 18:05:59 2003-07-29 17:06:05 6 1 171 0 60 169 7 117.00 36 73.61 CHANGED GhPCRpsL+cuctG-plLLLuYpPa........ssssPYsEsGPIFlptpsspttsu.-slP...........sl.sutshslRGYsu-s+Il..sucllsss-.......hsuthcclLscs-VAalHVRsAcpGCYtsRl-R ..GhPCRhsL+cupsG.Ep.llL.lsYp.h........ssssPYptsGPIFl+ttssssh....s..s....t....lP...........hl.psch...hslRuYsucshlV..sucVspuss.............lcptlc.clFs.ss..-V..sYlHl+sAc.GCatsclcR............ 0 10 36 47 +6547 PF06719 AraC_N AraC_N-term; AraC-type transcriptional regulator N-terminus Vella Briffa B anon Pfam-B_16798 (release 10.0) Family This family represents the N-terminus of bacterial ARAC-type transcriptional regulators. In E. coli, these regulate the L-arabinose operon through sensing the presence of arabinose, and when the sugar is present, transmitting this information from the arabinose-binding domains to the protein's DNA-binding domains [1]. This family might represent the N-terminal arm of the protein, which binds to the C-terminal DNA binding domains to hold them in a state where the protein prefers to loop and remain non-activating [2]. All family members contain the Pfam:PF00165 domain. 24.20 24.20 24.30 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.65 0.71 -4.92 170 1635 2012-10-10 13:59:34 2003-07-29 17:09:29 8 4 1088 0 429 1324 58 152.60 33 50.71 CHANGED Ts..lsulplhR.sspsst.h.sslYcPulsllsQGpKpshlG.ccsapYcstcYLlsolslPhpsplhpASs-pPhLultlclDhshls-Llhchs.ts.tstsstt.................ulsssphsssLh-AhhRLlcLLcp.P.p-hslLuPhlhREIhYRLLpGspGstL ....................................lsslplhp.sp.ps.h.s.hs..ss.h.Y.cPul.sllhQGpK.hsh..l..u..c.c.s..a....p..YDsscYLlloVsLPhpscs...Aos..-tPhhulpLslDht.lt-Llhchspstt.hps.shst.................Glssuslsp..pllsAs.RLL.c..l.h..c..p.P..hD..t.p.l.Lus.IhREIlY+lLpGspGstL........................... 0 101 207 325 +6548 PF06720 Phi-29_GP16_7 Bacteriophage phi-29 early protein GP16.7 Moxon SJ anon Pfam-B_23362 (release 10.0) Family This family consists of several bacteriophage phi-29 early protein GP16.7 sequences of around 130 residues in length. The function of this family is unknown. 25.00 25.00 47.60 46.80 21.40 20.60 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.70 0.71 -4.48 2 6 2009-09-10 16:38:04 2003-07-29 17:11:43 6 1 5 10 0 7 0 129.30 60 99.11 CHANGED MEAILMIGVlsLCVIFLLSGRNNKKhQEARELEDYLEDLNpRlsQRTQILSELNEVIoNRSlDKoVNhSACElAVLDLYEQSNIRIPSDIIEDhVNQRLQoEQ-VLNYIETQRTYWKLENQKKLYRGSLK .......................MEAILMIGVlsLCVIFLLSGRNNKKKQEsRELEDY..LEDLNpRlsQRTQILSELNEVIoNRSlDKoVNhSACElAVLDLYEQSNIRIPSDIIEDlV.NQRLQoEQEVLNYIETQRTYWKLENQKKLYRGSLK.. 0 0 0 0 +6549 PF06721 DUF1204 Protein of unknown function (DUF1204) Vella Briffa B anon Pfam-B_16832 (release 10.0) Family This family represents the C-terminus of a number of Arabidopsis thaliana hypothetical proteins of unknown function. Family members contain a conserved DFD motif. 21.00 21.00 21.00 25.10 20.50 20.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.41 0.70 -5.11 2 9 2009-09-11 06:55:55 2003-07-29 17:14:31 6 2 2 0 4 16 0 169.10 40 38.05 CHANGED c+.s.Lhc+.lcShShEAsphKsthtsLAhp+phhup...-hsRsp.Dhcc.+cKhsELtsRhhSEhKRLRpRR.cYAp..pppAL.+hAs.Fpu..........DhlthpPKFh-aNQVsGNlthL-tLVEuGElEhKSs-hM.RLlADt-tLcAEVcuFtIT-l.csDFDVhTLFtcl..-p+.....sus.s.TEuEc.t-spsEstuQtt.chsG.h.scsthAss.- ...................................................................................................................p.KhscLtsRhhuE.KRLRppR.chAc..p..pptL.+htp.hptchptl+hcl.DcphthpPKFh-h........NQVsGslthL-tLl-sGphEhKSs-hM.RLlADt-tLcAEVcuFtIT-l.csDFDVhTLFtcl..-p+.....sus.s.TcuEc.t-spsEstuQtt.chsG.h.scsthAss.-.................. 1 4 4 4 +6550 PF06722 DUF1205 Protein of unknown function (DUF1205) Vella Briffa B anon Pfam-B_16780 (release 10.0) Family This family represents a conserved region of unknown function within bacterial glycosyl transferases. Many family members contain Pfam:PF03033. 20.70 20.70 20.70 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.38 0.72 -3.75 33 513 2012-10-03 16:42:30 2003-07-29 17:22:06 7 8 340 18 67 391 0 95.10 41 24.87 CHANGED lDssPsShch...s...su..hssl..shRYVPYNG...uVlPs..WLhps....ssRs.RVslTLGsottp.h.tsst.sslscllsulucL.DsElVsTlsspptttL.uslPsNVR .................................IDssPPShpl........s..........su.....pPsl...sMRYV....PYN..Gu....AVh.s.Whccs.........scR.t.R.lhloLGTspsh...st.........Ghc.......................hluhlh-..u..su....-l.DAEl.llpls.ss.sps.sL..psLPsNVR............. 0 21 47 59 +6551 PF06723 MreB_Mbl MreB/Mbl protein Moxon SJ anon Pfam-B_471 (release 10.0) Family This family consists of bacterial MreB and Mbl proteins as well as two related archaeal sequences. MreB is known to be a rod shape-determining protein in bacteria and goes to make up the bacterial cytoskeleton. Genes coding for MreB/Mbl are only found in elongated bacteria, not in coccoid forms. It has been speculated that constituents of the eukaryotic cytoskeleton (tubulin, actin) may have evolved from prokaryotic precursor proteins closely related to today's bacterial proteins FtsZ and MreB/Mbl [1]. 56.40 56.40 56.40 56.40 56.30 56.30 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.76 0.70 -5.84 31 4314 2012-10-02 23:34:14 2003-07-30 09:48:34 8 4 3147 5 973 7141 4347 321.20 54 95.81 CHANGED usDluIDLGTANTLV..YV+G+GIVLsEPSVVAlcps.....sppllAVGpEAKpMlG+TPusItAlRPh+DGVIADa-hsEpMLKaFIp+lps..ppth.hpP+lllClPoGlTsVE+RAlp-uuppAGA+cVhllEEPhAAAIGAGLPVpEPsGsMVlDIGGGTTElAVISLGGIVhSpSlRVAGDchDEuIlpYlR+pasLlIGEpTAEpIKhEIGoAh.spcsc...ph-l+GRDlloGLP+slplsupElp-ALp-slstIl-u.l+psLEpTPPELuuDIh-+GIlLTGGGALL+GLDchlpccTslPVhlA--PLsCVAhGoGcsL-phctlpp .............................................................................................................s.pDluIDLG.TA.N..T..L..l....YV...+...G...+.........G...I........V.........L...........s.........E.............P.......SV..VA.lcpc............................sppl..h....AV..G.c......-.....A......K....p....M....l....G.....R....T................P..G....N....I................t.......A.......I......R.........P..............h...............K.............D.......G.......V..........I..............A........D..........F..............p.........l.......T........E.......p..........M....L..p..........a......F.I....c.p.lps..........pp...h.h......s......pP...Rl...llCVPsGsTpVE.+R.AlcE...uA..h..t..A..G..A..R..-.Va.LIEEP..h..AA..A....I..G...A.G....L.....P........V....p....E.....P....s....G....s.....M..VV...DIGGG.TT.EVA..V..I.....S.....L...........G...........G.............l....V.....h.....S.....p.....S.....l.....R......l....uGD......+.h......D....-....A....I.....l....s....Y....l.............R....+....p......Y.............s...........l.........L.......I......G...............E.............p.............T.............A.........E.......c...I........K...h..........c.....I...G.....o......A..h...s...s.....s....c..sc..............ph-...V.....R............G............R............s..........L............s............s............G.............l............P...........+...........s.......h...........s...........l....s....S........s....E.........l...........h....E.A....L.........p.E.sl..s.........t.........I....V.s.A....V.+.s.sLE..p...s......P.........P.....E.....L......A......u.....D.I..h.-.........+........G.........l........VL.TGG.GALL+......sLDclL....t-E.T.....u.......l.....P.V.h.l.A-..-.PLsCVAhGsGcuL-.h-h..pt................................................................................................................................. 0 366 693 848 +6552 PF06724 DUF1206 Domain of Unknown Function (DUF1206) Yeats C anon Yeats C Family This region consists of two a pair of transmembrane helices and occurs three times in each of the family member proteins. 22.30 22.30 23.20 22.90 22.20 22.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.23 0.72 -4.22 149 831 2009-01-15 18:05:59 2003-07-30 09:49:12 6 3 248 0 368 915 30 72.50 25 76.68 CHANGED Rh......GhsupGllahhlGhhs........ltsAh..st......supssutsuultpLhsp.PaGthLLsllulGlluaulaphhcAhhcph ............RhGhsupGllahhluhhs..........lth...Aht...s.......supssstsuultpL....hsp.P.hGphLLhllulGlluhulaphhpAhhtt.......... 0 107 231 321 +6553 PF06725 3D 3D domain Bateman A anon Bateman A Domain This short presumed domain contains three conserved aspartate residues, hence the name 3D. It has been shown to be part of the catalytic double psi beta barrel domain of MltA [1]. 23.80 23.80 24.00 24.20 23.70 23.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.45 0.72 -4.00 41 3329 2012-10-01 21:39:58 2003-07-30 11:02:56 6 38 2021 13 572 2394 113 70.00 40 19.85 CHANGED polAVDsslIPlGohlalpu.................hthslAtDTGuAIKGs.+lDlahuosscAsp.hG..p+psplalLt .................h.slAVD.plIP..lGohl..alps........................................ht..hhl.AtDTG.G..AI...K....Gs..+lDlah.Ghs...s........cAsp....h.G...++ps.c.ValL............................. 0 203 382 471 +6554 PF06726 BC10 Bladder cancer-related protein BC10 Wood V, Moxon SJ, Coggill PC anon Wood V Family This family consists of a series of short proteins of around 90 residues in length. The human protein Swiss:O60629 or BC10 has been implicated in bladder cancer where the transcription of the gene coding for this protein is nearly completely abolished in highly invasive transitional cell carcinomas (TCCs) [1]. The protein is a small globular protein containing two transmembrane helices, and it is a multiply edited transcript. All the editing sites are found in either the 5'-UTR or the N-terminal section of the protein, which is predicted to be outside the membrane. The three coding edits are all non-synonymous and predicted to encode exposed residues [2]. The function of this family is unknown. 19.60 19.60 20.20 19.80 18.60 17.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.15 0.72 -3.95 11 187 2009-01-15 18:05:59 2003-07-30 12:38:41 7 1 168 0 112 155 0 61.80 41 52.91 CHANGED MaCL+ahlPlLllPh...s.uhhpAsshFhh.laLluhhlcppPCsaCullhhhhhhs.s..psshssh .....MaCLp.hhL.PlL.lIPh...p..hh.upshFhh...hall.....uhhl.++PCshCullhLhhhhh.s....ssWus........... 2 23 43 82 +6555 PF06727 DUF1207 Protein of unknown function (DUF1207) Moxon SJ anon Pfam-B_23454 (release 10.0) Family This family consists of a number of hypothetical bacterial proteins of around 410 residues in length which seem to be specific to Chlamydia species. The function of this family is unknown. 20.30 20.30 25.20 22.50 15.80 18.40 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.18 0.70 -5.63 8 77 2009-09-11 12:14:12 2003-07-30 13:56:56 6 1 75 0 30 57 13 311.30 36 83.55 CHANGED +hlcssh.sll.csss..loLPsDplhups..spIsphlp-lPhlTuVEIsE...............pp..sshsptsstsss..cpps.sscs.sshtslWLPpGp.LFpPLlADPRtsp.SAuaR..ascplsGs...+lGuVuFG-slPhhR.sslu+apssh-lGLQuGVFSsFDLDsPSosLlNoDFhVuhhhShtsupaShRhRlYH.SSHLGDEFLLp+..sshsRhNLShEulDLhlSachtP..lRVYGGsGaIhp+-so.sl+PahhchGlELR.uPatl....s.LhucPlFAhch+saEppcashD.ShhhGhEau+FpshG...RKlphllEYapGaS.pGQFhREpscYhGhthpYtF ....................................................................................................................................................................p.t..sh...shhpslaLPp.ss.LFsPLlADPRpsp.SAuhR.....hs-ps.hGp................pluussFGuchhhhR..h-h..upaps..sh-lG....lQG.uVFSsFsL-p.....spu...........sh.VNo.DFhlus.hh.sas.........h....s...........ca...ShRhRlaHlSSHLGD........EF.....l.......Lsp..........Ps.h.p...R........h.N.hScEulDhhsShch........sPp....lRlYGGhGYll...s+...-.o.stcPhhhpuGlElR.....Pash.........ssLcupPlaAh.c...h+....haE-pcashD.o..hhsGhEau.....+hps.lG...RKlRshlEYapGaSpcGQFh+-pssYhGhtlpYsF......... 1 12 16 27 +6556 PF06728 PIG-U GPI transamidase subunit PIG-U Wood V, Moxon SJ anon Pfam-B_7677 (release 9.0) Family Many eukaryotic proteins are anchored to the cell surface via glycosylphosphatidylinositol (GPI), which is posttranslationally attached to the carboxyl-terminus by GPI transamidase. The mammalian GPI transamidase is a complex of at least four subunits, GPI8, GAA1, PIG-S, and PIG-T. PIG-U is thought to represent a fifth subunit in this complex and may be involved in the recognition of either the GPI attachment signal or the lipid portion of GPI [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.54 0.70 -5.60 64 358 2012-10-03 03:08:05 2003-07-30 14:05:33 8 7 283 0 247 623 14 321.40 29 81.80 CHANGED hsuhhlRlhl...hhsshs.phLss+lEl...................STPlsSa+plpE.GhaLhppu.l...........sPYsGul.hH..........psPLlLhlhshl..........................h.............hsllahlhDllsAhhLhth...........................................................thppstpph.tt..t........................hsshhlushYLhNPhslhoslutSTsshsNhhlhhslhsss.p.........sp...............hhhuslslAhuoa.lohYshhLlhPllhhhhp........................................................t.phh.hhhp...hhhlahsslhsLhh.hSah.lsu.......................sas...FlpssYshhlph...p-Ls...PNl..GLaWYFFsEhF-pF+sFFlhlFplhs.hhallP.LolRLpcp...........PhhhhhlhlulhulFK..sYPolu...............DsuhaLuLlslapp.l......................hthh+ash..lss.sshlhshlLuPlhaaLWIhtGoGNANF.FauloLla .....................................................................................h..shhhRhhl....hhssh.....lstpsEh...................soPlsS..a+p.............l........E.GhhLhp.s...h...........sPYsGsh.hH.........................psPll.l.hhthl............................................................htllahhhDhlsAhhLhhh................................t............................................ptht.....t.........................................................shhluhhYLhNPhsl.h.oC.lu.tS.osshsshhlhhhlhtsh.p...............sp................................shh.uhhhl.uhuoa.hShYPhhLhsPhlhhhhp............................................................................................t.hh...h..hht............hhhhh.h.s.slh.........hlhh...hShh..hhs.......................sap......al.tsYGhhl.h......DLs......PNl...G...La....W...Y....F....F.h....E.....hF....-.p....F....p.........F..F.....lhl...Ft.lph....hha.hhP....Lsl...+h..+cp..................Ph.h.h..hhhhlulhulh...K..sYPolu.....................D...suh...a...huhlslatp...l.........................h..h.h.+..h.....hhh...hhhlh..s.hLhPhhaaLWIhtGouNuNF.aauhoLsa......................................................................................................................................................... 0 91 140 206 +6557 PF06729 CENP-R NRIF3; Kinetochore component, CENP-R Moxon SJ anon Pfam-B_23665 (release 10.0) Family This family consists of mammalian kinetochore sub-complex proteins CENP-R, also referred to as nuclear receptor co-activator NRIF3 proteins. NRIF3 exhibits a distinct receptor specificity in interacting with and potentiating the activity of only TRs and RXRs but not other examined nuclear receptors. NRIF3 as a co-regulator that possesses both transactivation and transrepression domains and/or functions. Collectively, the NRIF3 family of co-regulators may play dual roles in mediating both positive and negative regulatory effects on gene expression [1]. CENP-R is one of the 15 components that make up the constitutive centromere associated complex (CCAN) part of the kinetochore. A sub-complex of CCAN, consisting of CENP-P/O/R/Q/U self-assembles on kinetochores with varying stoichiometry and undergoes a pre-mitotic maturation step. Kinetochore assembly is a cell cycle regulated multi-step process. The initial step occurs during interphase and involves loading of the 15-subunit constitutive centromere associated complex (CCAN). Kinetochores are multi-protein megadalton assemblies that are required for attachment of microtubules to centromeres and, in turn, the segregation of chromosomes in mitosis [2,3]. 27.00 27.00 32.30 32.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.65 0.71 -4.51 11 37 2012-10-08 12:26:51 2003-07-30 14:33:12 7 3 28 0 15 54 0 117.50 55 78.39 CHANGED +K+slssYSPTTGTpQhSPaSSPTSsptQ-h+NGPSNGp...cp.sc.slscRtpspspD.DtFMsLhScV-pS.EcIhElhpNLoSlQALEGSRELENLIGlSpuSChLppEhQKT+cLMoKspK.cLhcKpputlPs+-h ....+KKslhsYSPTTGTpQhS.FuSPoSsccQcHRNG.SNtp..pcp.sc.sLocptpspTc-.DtFhhLhSKVEp..EcIhchhpNLoSlQALEGSRELENlIGlSpsSp.LptEhpKTptLMops.c.pLhc+.pstlPt+t...... 0 1 1 5 +6558 PF06730 FAM92 DUF1208; FAM92 protein Moxon SJ, Eberhardt R anon Pfam-B_23546 (release 10.0) Family This family of proteins has a role in embryogenesis. During embryogenesis it is essential for ectoderm and axial mesoderm development [1]. It may regulate cell proliferation and apoptosis [2]. 23.60 23.60 23.60 23.90 23.40 23.20 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.59 0.70 -5.01 3 180 2012-10-03 12:17:00 2003-07-30 14:52:42 6 5 78 0 107 155 0 185.00 52 69.77 CHANGED MhRRs....L-sRDAQTKQLQ-AVoNVEKHFGELCQIFAAYVRKTARLRDKADLLVNEINuYAuTETPNLKL..GLKsFADEFAKLQDYRQAEVERLEAKVVEPLKsYGTIVKMKRDDLKATLTARNREAKQLoQLERTRQRNPSDRHVISQAETELQRAsMDAoRTSRHLEETIsNFE+QKlKDIKsIlSEFITIEMLFHGKALEVaTAAYQNIQNIDEDEDLE ..................................R-sQs+.lpsslsNsEKHFG-hCplFAu.YsRKTARLRDKuD.......LVppl.tsYAsoEs.P.pL+h..GL+sFA-...chAKl.QDYRQAE........VE.RLEuKV.VpPLKtYG..sllK.pR--lKtshpARs+EhKQhppLE+hRQ+sPS.D.Rp.h...I.s..p...AEo-Lp+AshDsoRoo+pLEEs.......lssFE+QKl+DlKpIhu-FlpIEMhFHuKALEVhosAaQslpplD.-cDL.............................. 0 23 29 51 +6560 PF06732 Pescadillo_N Pescadillo N-terminus Moxon SJ, Wood V anon Pfam-B_77615 (release 9.0) Family This family represents the N-terminal region of Pescadillo. Pescadillo protein localises to distinct substructures of the interphase nucleus including nucleoli, the site of ribosome biogenesis. During mitosis pescadillo closely associates with the periphery of metaphase chromosomes and by late anaphase is associated with nucleolus-derived foci and prenucleolar bodies. Blastomeres in mouse embryos lacking pescadillo arrest at morula stages of development, the nucleoli fail to differentiate and accumulation of ribosomes is inhibited. It has been proposed that in mammalian cells pescadillo is essential for ribosome biogenesis and nucleologenesis and that disruption to its function results in cell cycle arrest [1]. This family is often found in conjunction with a Pfam:PF00533 domain. 25.00 25.00 32.10 32.10 23.40 21.60 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.89 0.70 -5.15 9 361 2009-01-15 18:05:59 2003-07-30 15:27:31 6 7 309 0 244 367 12 258.90 46 46.38 CHANGED KpKhcpGsAspYlTRppAl+KLQLoLsDFRRLCIlKGIYP+EPK+.KKKsNKGSot.+saYhtKDIpaLhHEPIlpKFR-YKlFh+Klp+Ah.u+t-hpssK+Lhss..+PsYpLDHIIKERYPTFhDALRDlDDALSMlFLFSshPsop+..........lpsphlppC++LssEahpYVhco+uLRKsFlSIKGlYYQAElhGpclTWllPa..tFspshsoDVDF+VMhTFlEFYpsLLuFVNFKLYpsLsLhYPP+l-sphcppttst.uhcht.susphshths.stpttcsh ..........KKhppGpApsYITRspAl+KLQlSLsDFRRLCIhK...........GIYP.REP+p......KKK.ss.KuuosspTaYahKDIpaLhHEPllpKFR-hKsah+Kl.p+Ah.u+s-hssscRLccN........+PpapLDHIlKERYPTFlDALRDLDDsLohlFLFushPs.ssp...................l.sph.lphCpRLshEa.tY.lhts+uLRK....sFlSIKGlYYQApl.......Gp.....p..lhWlsPa..pF..spp.hs..sDVDaRl..MhTFlEFYpT..LLuFVNF+LYps...l..sLh.Y.PPpl-tptpptt..t.thtsh.......h..........................ttt.t............................................. 0 91 139 204 +6561 PF06733 DEAD_2 DEAD_2 Vella Briffa B anon Pfam-B_1713 (release 10.0) Family This represents a conserved region within a number of RAD3-like DNA-binding helicases that are seemingly ubiquitous - members include proteins of eukaryotic, bacterial and archaeal origin. RAD3 is involved in nucleotide excision repair, and forms part of the transcription factor TFIIH in yeast [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.63 0.71 -4.86 33 3488 2012-10-05 12:31:09 2003-07-30 16:01:32 10 48 1979 5 1199 2928 287 131.00 24 17.76 CHANGED YsSRTHoQlpQslcEL++l.thh................hpsls.LuSRcplClpsclpc.tt..tpsls...thCpphtpph...........................C.aapsh.....ph.t....pphhhsslhslE-Lhc......hGcptphCPYass.RchlppA-lllhsYpYLlssphRpth.......lphcsslVIlDEAHNl.csstp...hhShclopppL ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s......h.t..........C........Pa...........h.......h......u....Rc....t......h..............p...AD........VV..lss...ap.h.l.....h.s..s.h..h..t..ctsh....................s.c.s...p......l.l.......l.........hDEAH..pLs-hupp..hhuhsls.................................................... 1 423 690 993 +6562 PF06734 UL97 UL97 Vella Briffa B anon Pfam-B_1747 (release 10.0) Family This family represents a conserved region within viral UL97 phosphotransferases. UL97 participates in the phosphorylation of the nucleoside analog ganciclovir (GCV) to produce GCV-monophosphate [1]. 25.00 25.00 43.90 42.60 19.40 19.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.24 0.71 -4.92 8 208 2012-10-02 22:05:25 2003-07-30 16:47:50 7 1 30 0 0 154 0 170.10 60 38.99 CHANGED NlLIsss.cssppIh+AVLCDYSLSEsHPp.aNcRCVVVFQpT+TlRhlPsSpa+Ls-hYHPAFRPlsLQKllll-P+ApFPss.uspRaCss-LCALGpVlsFCLlRlLDcRGhccVRtssEstLFphAspACcAhtpcclsuhuDAChhlLAcQLuYhusLLG.--ss-hhs+hhcFlcsps-ps .NVLIcVNPHsPSEIlRAALCDYSLSEPYP-.YN-RCVsVFQETGTARRIPNCoHRLRECYHPAFRPhPLQKLlls.-.P+upFPss.uhpRaChS-LsALGpVluFCLhRlLD.......+RGhcEVRhsoEshLFthAstACRALtptplscsuDAChLlLAtQhuYsAsLLG.-cssslhu+hhcFlEs+hsp......... 0 0 0 0 +6564 PF06736 DUF1211 Protein of unknown function (DUF1211) Vella Briffa B anon Pfam-B_2594 (release 10.0) Family This family represents a conserved region within a number of hypothetical proteins of unknown function found in eukaryotes, bacteria and archaea. These may possibly be integral membrane proteins. 25.00 25.00 29.00 25.60 24.90 24.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.05 0.72 -3.84 95 961 2009-01-15 18:05:59 2003-07-30 17:11:10 6 3 572 0 286 777 47 86.70 33 41.94 CHANGED pRl-AFoDuVhAIhhTllVL.............-l...........................phPp...........sthhpsLh.shhsphhsYllSFlhluhhWhsHHphaphlp..plsptllhhNhhhLhhls..LlP ............................pRl-AFoDuVlAIlhTllVL.............-l...........................phPp...................ssshpuLh..phhsp.hhsYllSFlhluhhWhsHHpl.Fp.hlc..clstpl.hhhNhh..hLhhlohlP................................... 0 97 171 217 +6565 PF06737 Transglycosylas Transglycosylase-like domain Bateman A anon Bateman A Domain This family of proteins are very likely to act as transglycosylase enzymes related to Pfam:PF00062 and Pfam:PF01464. These other families are weakly matched by this family, and include the known active site residues. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.69 0.72 -3.68 18 1273 2012-10-03 00:09:25 2003-07-30 17:12:09 9 31 590 2 302 1024 36 75.60 45 27.18 CHANGED sAssssWDtlApCESGGNW.ulNTGNGaYGGL.QFstuTWpuhGGspYus.sAstAoRppQIulA-+lhssQGhuAWPsC ..............s..ssssWDt..lApCESGGN.......W.ul.......N.....T.G.....NG.a.....a.GGl.QFstuTWpuhG.G..st.a.t..s....pss....tA........o+........p....pQ....IslAp+lh.......s.s.QG...hG...AWPsC............................................ 0 77 214 286 +6566 PF06738 DUF1212 Protein of unknown function (DUF1212) Vella Briffa B anon Pfam-B_1646 (release 10.0) Family This family represents a conserved region within a number of hypothetical membrane proteins of unknown function found in eukaryotes, bacteria and archaea. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.05 0.71 -4.84 181 3852 2012-10-02 11:53:07 2003-07-30 17:20:57 7 11 2765 0 655 2941 60 188.90 23 54.74 CHANGED hGchLlpsGupshRVEcshpclu.pshGl..p.spshhsssulhloh.....ttsspshophppl.pspslshp+lsplpplscpltp..splsl.c-s...ppcLcpIpppt.....Yshhhhs.luhuluuu..uhshL.hGGs..hhchhhuhlsuhl......shhlphh.hs+tphsthhhp.hhuuhlsuh...........................luhhhh........th..shsh.shshh.hhuulhhLlPG ....................................................sGphLLpsGucshRV--shpRlu....p.s.h....G.h....sp..spshlsssuIhhoh...................sspsh.ophpcl..p..s.......pslshp+lsplpplscp..htp.............pp..l..sl..cps.......pp...cLppIptt..p.....Y.s.h.h.hh.s..lusu.lu.s.u..sFshL...h..G..G..s.....hhs.slhshluusl........................uhhlpth.l..s.+h..t..h.h...h.ls.....hl........uuhlssh...........................lu.hhhh......................ph..sh...s....t..s...s.hh...hhuulhhLVPG........................................................................................................... 0 207 396 545 +6567 PF06739 SBBP Beta-propeller repeat Yeats C anon Yeats C Repeat This family is related to Pfam:PF00400 and is likely to also form a beta-propeller. SBBP stands for Seven Bladed Beta Propeller. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -7.54 0.72 -4.17 52 656 2012-10-05 17:30:42 2003-07-31 09:37:18 6 61 93 0 325 796 561 36.30 30 14.83 CHANGED tahp..lG.......ssssps..spuIuhDppGN.lYlsGpTsu..sat ...............asp..lG............ss.ss..s......upu....lulD.ss.GN.lYlsGtTsu..sh......... 0 218 271 288 +6568 PF06740 DUF1213 Protein of unknown function (DUF1213) Vella Briffa B anon Pfam-B_2177 (release 10.0) Family This family represents a short conserved repeat within Drosophila melanogaster proteins of unknown function. Approximately 50 copies of this repeat are present in each protein. 21.30 21.30 22.20 21.30 19.90 21.20 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.32 0.72 -3.84 29 411 2009-01-15 18:05:59 2003-07-31 11:07:59 7 12 12 0 314 459 0 29.70 64 20.54 CHANGED KEsSRPESVsESlKD-s........tcs..cSRhESl ...KEsSRPtSVAESVKDEs............EKo.ttSRRESl.................. 0 53 53 203 +6569 PF06741 LsmAD Ataxin-2-like; Ataxin-2_N; LsmAD domain Vella Briffa B, Bateman A anon Pfam-B_2543 (release 10.0) Domain This domain is found associated with Lsm domain [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.41 0.72 -3.88 26 396 2009-01-15 18:05:59 2003-07-31 13:05:57 8 12 263 0 244 409 5 70.30 42 7.87 CHANGED FG.VpSTaDEcL..YTT+L-+uss.pa+ppppcA-+lA+EIEsp.sotshHlt-ERuh..h.s-sshDEE-pYSuVpRs ....aGVpoTYDpsL..YTs.L-+sss.pa+cREtcAsclA+EI...Euo..............sp.Rst.....htscsshs..EE-KYSuVpRp............. 0 77 130 192 +6570 PF06742 DUF1214 Protein of unknown function (DUF1214) Vella Briffa B, Finn RD anon Pfam-B_2721 (release 10.0) Domain This family represents the C-terminal region of several hypothetical proteins of unknown function. Family members are mostly bacterial, but a few are also found in eukaryotes and archaea. 25.00 15.00 25.00 15.10 24.90 14.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -11.29 0.72 -3.56 331 1606 2012-10-01 20:00:45 2003-07-31 13:11:51 6 13 655 9 542 1441 270 108.30 21 30.86 CHANGED ss.t-uhY.hh....st.hDssuph.....lsGst....t.apl......phs.s.thPss........t.FWSloh..Y....stt.......hhts....s....schs.............ls..s..............................ssDG.ohslhlu.s......ss.s.........................N..WL.................t.s................ahlhhRhYt..P.pp .............................................................ss....tshY..h....s..hDssGp......LsGsp.......t.htl.....phs.s.thPss...........t.FWol..o.h....Ys.pt.h.....hhts.......s.........hsp.hs......lss.....................h..ssDG..olslhlusps..Pss.s...........N.Wl.....h.s.s.......s..hhlhhRhYh..s...tt............................. 0 109 283 416 +6571 PF06743 FAST_1 FAST_Leu-rich; FAST kinase-like protein, subdomain 1 Vella Briffa B, Fenech M anon Pfam-B_2858 (release 10.0) Family This family represents a conserved region of eukaryotic Fas-activated serine/threonine (FAST) kinases (EC:2.7.1.-) that contains several conserved leucine residues. FAST kinase is rapidly activated during Fas-mediated apoptosis, when it phosphorylates TIA-1, a nuclear RNA-binding protein that has been implicated as an effector of apoptosis [1]. Note that many family members are hypothetical proteins. This region is often found immediately N-terminal to the FAST kinase-like protein, subdomain 2. 24.80 24.80 24.80 25.30 24.70 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.09 0.72 -4.26 23 389 2009-09-10 21:09:11 2003-07-31 14:39:19 10 9 79 0 206 364 2 70.70 27 10.78 CHANGED lssllhsFupLN..YcPsst-pFaspltptLpsphsths..PptllsllaSLshlppaPhshlsplhsssFlscL .............ltpllhsFupLN..Y....p...P...s.pp.....c.p.Fh.....spltptLtpc..hsths......Ptt....llsllaSLshlpphP..phlptlhsspFlpph..... 0 32 49 104 +6572 PF06744 DUF1215 Protein of unknown function (DUF1215) Vella Briffa B anon Pfam-B_2952 (release 10.0) Family This family represents a conserved region situated towards the C-terminal end of several hypothetical bacterial proteins of unknown function. A few members resemble the ImcF protein, which has been proposed [1] to be involved in Vibrio cholerae cell surface reorganisation that results in increased adherence to epithelial cells line and increased conjugation frequency. 21.00 21.00 21.20 21.00 20.90 20.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.34 0.71 -4.48 111 1351 2009-01-15 18:05:59 2003-07-31 14:55:03 7 9 894 0 251 1140 28 123.70 27 10.91 CHANGED lpphss........phPsslpshhsplsspuhphlhpsuts.pLsppWpspVhp.appsluGRYPFs.suspDlulsDFschFuss.GhlcpFhpppLtshlc.ssss.Wphcshss..sh.....slssshLpthppAppI ........................................................up........phPsPlpshlsp....lssp........uhphlht.tutp..lspp...Wpss..Vspsap..psluuRYPFs..sospDsols-FpcaFuss.GhlDsFapppLtshl-..ssss..ah.csssst...sh.....slpss.hLsthppAppI...................................... 0 35 96 172 +6573 PF06745 KaiC KaiC Vella Briffa B anon Pfam-B_2234 (release 10.0) Family This family represents a conserved region within bacterial and archaeal proteins, most of which are hypothetical. More than one copy is sometimes found in each protein. This family includes KaiC, which is one of the Kai proteins among which direct protein-protein association may be a critical process in the generation of circadian rhythms in cyanobacteria [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.26 0.70 -5.24 26 1953 2012-10-05 12:31:09 2003-07-31 15:32:50 8 20 758 154 980 16013 6155 206.50 22 73.85 CHANGED lpTGI.GhD-lh.p.............GGlPcspslLloGssGTGKTlhuhQFLhpGhhchGEsGlaVolEEss....psl+cshc.phGWDlpchEccGhlsllDuhsssht............ptslcpLhppLppsl+chsA..+RllIDSlosL..hhppssh.sRphlhpLhphl.cthGsTulhsophs......stpshus.GVEc.slDGlIhLchcchst............chhRolhIhKMRuTsHshpcaPF-Ioc.pGIhlhsttt ......................................................oGl.tlDphl..t............................GG..l...ts..phhL.l....t.G.ss...GoG.K......T..hh....u......h.............p.........F.....l.........h...........p...............u.....h........p........p.............................u.......-............t..............s......l......a...l....s..h.c.....E..s........................tp..l.......h...p.....p..........h.........p.......s..h.......G.......h.........s........h........p.......t........h...................t..........p......t.............l......h....h.h..p.....h..st..............................................ph.p.t..h....h....p..t......l....t..p....t....l....c....p...h....p.s...........p..h.....l..l......l..D.....S....l........ss.........l............................h..........h............t.............s....................t...........t.........h.......+........p..............l........h.....p.......l.......h.....p....h.l.......p......p....h....s......h......o......s.......l...h..s.s...p.h.t................t.t..t..h..s..t..t..l......p.p.......l..s..Ds.l.lhLc....h.....tt..............................phtR....h.lplhKhR.s......s.t..t.t.t......t...h.h.aplsp..tGl.lh...h........................................................................................................................................................................................................... 0 262 574 792 +6574 PF06746 DUF1216 Protein of unknown function (DUF1216) Vella Briffa B anon Pfam-B_3048 (release 10.0) Family This family represents a conserved region, within Arabidopsis thaliana proteins, of unknown function. Family members sometimes contain more than one copy.It has been reported that this domain will be found in other Brassicaceae. 19.70 19.70 20.40 19.80 19.30 19.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.93 0.71 -4.39 14 50 2009-01-15 18:05:59 2003-07-31 15:42:00 6 3 5 0 38 49 0 129.60 28 39.42 CHANGED FFEpLKuhMph.ssh....t.pssKshps-MKupAstLhcAMuuhp....stSuchppplppph.phhKTLp.hp.h.tphh.t..tctcpptohphThpQppElcpslsKWppllopFVcosspspup..ShspSS.........ssssssSS ...........................FFppLKuaMphhsss....t.ssuK.sh.sch+upustLhcAMuuhp.uph.upSuc.ppplppshhphhKTl..h...h.t.h.....hc.cppustphThpQppElhpshsKWppslspFVcosspspup..S.s...uS...t...........s.t......tt........................................................................................................................ 0 21 21 21 +6575 PF06747 CHCH CHCH domain Westerman BA, Poutsma A, Steegers E, Oudejans CBM, Bateman A anon Westerman BA, Poutsma A, Steegers E, Oudejans CBM Domain we have identified a conserved motif in the LOC118487 protein that we have called the CHCH motif. Alignment of this protein with related members showed the presence of three subgroups of proteins, which are called the S (Small), N (N-terminal extended) and C (C-terminal extended) subgroups. All three sub-groups of proteins have in common that they contain a predicted conserved [coiled coil 1]-[helix 1]-[coiled coil 2]-[helix 2] domain (CHCH domain). Within each helix of the CHCH domain, there are two cysteines present in a C-X9-C motif. The N-group contains an additional double helix domain, and each helix contains the C-X9-C motif. This family contains a number of characterised proteins: Cox19 protein - a nuclear gene of Saccharomyces cerevisiae, codes for an 11-kDa protein (Cox19p) required for expression of cytochrome oxidase. Because cox19 mutants are able to synthesise the mitochondrial and nuclear gene products of cytochrome oxidase, Cox19p probably functions post-translationally during assembly of the enzyme. Cox19p is present in the cytoplasm and mitochondria, where it exists as a soluble intermembrane protein. This dual location is similar to what was previously reported for Cox17p, a low molecular weight copper protein thought to be required for maturation of the CuA centre of subunit 2 of cytochrome oxidase. Cox19p have four conserved potential metal ligands, these are three cysteines and one histidine. Mrp10 - belongs to the class of yeast mitochondrial ribosomal proteins that are essential for translation [2]. Eukaryotic NADH-ubiquinone oxidoreductase 19 kDa (NDUFA8) subunit [3]. The CHCH domain was previously called DUF657 [4]. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.01 0.72 -3.96 53 994 2012-10-02 15:44:21 2003-07-31 16:10:01 8 11 296 4 623 930 5 35.40 25 22.76 CHANGED Ctt-hpphhpChppssp...p..hspCpphhpthpp.Chpp ..........CspchcphhsChppssp....p...hppCpct.pshpp.Chh..... 0 169 274 457 +6576 PF06748 DUF1217 Protein of unknown function (DUF1217) Vella Briffa B anon Pfam-B_3199 (release 10.0) Family This family represents a conserved region that is found within bacterial proteins, most of which are hypothetical. Some members contain multiple copies. 22.80 22.80 24.60 23.00 21.20 22.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.89 0.71 -4.14 53 429 2009-01-15 18:05:59 2003-07-31 16:22:50 7 6 178 2 178 422 24 141.70 27 69.03 CHANGED AaGL--.hhs+Ahl+KlLps..shs-ssoaAN+Ls.........Dp.RYpchusAFsFss.t....Gsh................................................sspssstt......................................................................................................................psssspYhppph-ppsGspspulRlALYFpRps.........sslso..................................shclLuss.............sLpphhpTuhGLs.sshushc...l-tQhthlcctl.......phschsD.Pppht ..................................................AaGL-s.thscuhl+KlLps..shsD......s....uhsNpLs.........Dp.RYtphApuFsFssp.Gsh.........................................................................................................................ss..Qopsth.....................................................................................................................pshhstYh........p....p.....s....h..-.p....psus......p.......sp....ss.c...........A.....h.....Yapcph...............sslss..................................s.-lLuDp.............tLhshhhsuhGls.sphst.t....................................p.............................................. 0 36 78 108 +6577 PF06749 DUF1218 Protein of unknown function (DUF1218) Vella Briffa B anon Pfam-B_3286 (release 10.0) Family This family contains hypothetical plant proteins of unknown function. Family members contain a number of conserved cysteine residues. 21.70 21.70 21.70 22.60 21.60 21.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.12 0.72 -10.31 0.72 -3.53 39 580 2009-01-15 18:05:59 2003-07-31 16:26:16 7 5 148 0 209 417 0 76.60 39 56.25 CHANGED sAhlhLhhuQllssssstC....hCs...ups.......hhs.ttsp+shulhhhlloWlshslAhshLlsGustNuhpth........................s.sChhl+cGlFuuuAs.......lsLhsslh ...........................................psllhulo+C....hCh.....G+s..............lsP....uusRuhsllhFlsSW.lTF.........llAEuCllAGutp.NAhHs.............................................................................................. 0 27 118 166 +6578 PF06750 DiS_P_DiS Bacterial Peptidase A24 N-terminal domain Yeats C anon Yeats C Domain This family is found at the N-terminus of the pre-pilin peptidases (Pfam:PF01478). It's function has not been specifically determined; however some of the family have been characterised as bifunctional ([2]), and this domain may contain the N-methylation activity (EC:2.1.1.-). It consists of an intracellular region between a pair of transmembrane. This region contains an invariant proline and two almost fully conserved disulphide bridges - hence the name DiS-P-DiS. The cysteines have been shown to be essential to the overall function of the enzyme in [1], but their role was incorrectly ascribed. 23.00 23.00 23.20 23.80 21.90 22.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.27 0.72 -4.19 156 2675 2009-01-15 18:05:59 2003-08-01 09:33:18 8 8 2521 0 532 1905 721 94.00 36 36.27 CHANGED llG...hhlGSFLNVVlaRlP......tth.......................................slhhP............t.SpCPpCppplpha-sIPllSaLhL+G+.C+tCpstISh+YPllEllsullh...hhshhthuh ............................lhGhhlGSFLsVlIhRlP.......ht.h..............................................................slh.h.P.p.S+CspCpppl+hh-.IPllSa.L.h.L+G+.C......RpCp.spIuhpYsLlElls.u.llh.lhhhhhh..h.................... 1 185 349 455 +6579 PF06751 EutB Ethanolamine ammonia lyase large subunit (EutB) Moxon SJ anon Pfam-B_6325 (release 10.0) Family This family consists of several bacterial ethanolamine ammonia lyase large subunit (EutB) proteins (EC:4.3.1.7). Ethanolamine ammonia-lyase is a bacterial enzyme that catalyses the adenosylcobalamin-dependent conversion of certain vicinal amino alcohols to oxo compounds and ammonia. The enzyme is a heterodimer composed of subunits of Mr approximately 55,000 (EutB) and 35,000 (EutC) [1]. 25.00 25.00 31.30 31.10 21.50 21.50 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.43 0.70 -6.15 12 1141 2009-01-15 18:05:59 2003-08-01 09:56:23 6 3 1070 18 217 709 31 435.60 63 95.67 CHANGED sYpFcsL+-VhAKAs.t+SGDpLAGlAAsSupERVAA+hlLuclsLpchhpssllPYEpDEVTRlI.Dshstttas.lpphTVu-hR-WLLscpss...ssplptlu.GLTsEMlAAVoKLMpN.DLIhsApKhcVss+hpoTIGlpGphSsRLQPNHPTDDPpGIsASlL-GLhaGsGDAVIGlNPssDossulsclLchh-clhp+acIPTQsCVLuHVTTphEAIcpGsPlDLlFQSIAGoEtuNpuFGlslAlLcEApphuLshpp.ssGsNVMYFETGQGStLSusAHaG.....VDQ.ThEARsYulA++acPhLVNTVVGFIGPEYLYDuKQlIRAGLEDHFhGKLhGlPMGsDlCYTNHhcADQsDh-NLhsLLssAGsNalMGlPsuDDVMLNYQTTuaHDshhlRclhGL+PhsEFEpWLpchGhhtt.sG+Ls.tuGssuhF .......sYpFcslK-VLAKAs.hRSGDhLAGVAAsSupERVAAK.VLu-hs..............ls-............lhNsPVIsY..E..-DpVTRLI.DshscsAas..p..I+paTlu-LREalLS-c.To.....s.sc..lthlpcGLTsEhVAAVuKlMsNtDLIhuA+Kh.V..lp+hpTTIGl....PGshSsRLQPNcspDDlpuIAAplh-GLsaGsGDAVIGlNPVTDslcsl...s+lLchltslIp+asIP..T..Q..u..CVLAHV..TTQ..IEAI.cR.G.A.PssLlFQSIuGoEKuN.csFGlsL.............A.hL.cEA+tsuhphs.R..............h.......sGp.....NshYFETGQGSALSAsApaG.....sDQ.ThEARsYGlAR.......+a-...............P..............FL.............VNTVVGFIGPEYLYss+QIIRAGLEDHFhGKLhGlsMGCDsCYTNHA-ADQND.-NLhhLLusAGCNaIMGlPhuDDl.........MLNYQTTuFHDshslRplLsLRPuPEFEcWLEpMGIh...s..s.G.+LoppuG-ss................ 0 60 124 172 +6580 PF06752 E_Pc_C E_Pc_C-term; Enhancer of Polycomb C-terminus Vella Briffa B anon Pfam-B_16811 (release 10.0) Family This family represents the C-terminus of eukaryotic enhancer of polycomb proteins, which have roles in heterochromatin formation [1]. This family contains several conserved motifs. 19.70 19.70 19.80 20.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.56 0.70 -4.85 5 124 2009-09-11 20:40:39 2003-08-01 10:02:32 7 4 42 0 55 110 0 215.70 53 29.54 CHANGED uFTAEQYQQHQQQLVLMQKQQLtQLQQpQQsssSosssss......................pulVSKTLDSASAQFAASALVTo-.QLLuhKSKE-sVhtsGVNGVVsuSGsYKuLHl................TsSAlsph..s..SuuSossuPosslsoSsupoossHtlsAhu...........sAosQsLhGNNlpLoVPosluTVssVoPlssRHlsRTLusVPsSALKLAAsANs..pVPKV.TuSSolD.luRENHEsEKPALNuLAENTVAMEVT .......................uhTtEQaQpHQQQLs.MQ+QQ..LAQlQQp....Qts....ppSSppTp.p..............................shpshhSKTLDSASApFAASAlVou...hhu.thK-pssht..slNGVl.sS.G.......s.KsLa.................TshAL..............uSo.h.sus.h.posuHoshsH.l.sAhs...........ouosQsL.hNN..pLTsssplssVssluPlNs+h.sRT.u.s.s.P.oALKLA..ss..Ass...pVPKV..ssSSu..lsolsRENHEsE+.uLNsIA-sTVAMEVT...................................... 0 3 8 22 +6581 PF06753 Bradykinin Bradykinin Moxon SJ anon Pfam-B_7085 (release 10.0) Family This family consists of several bradykinin sequences. The skins of anuran amphibians, in addition to mucus glands, contain highly specialised poison glands, which, in reaction to stress or attack, exude a complex noxious cocktail of biologically active molecules. These secretions often contain a plethora of peptides among which bradykinin or structural variants have been identified [1]. 20.70 20.70 20.90 20.90 20.40 20.40 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.55 0.72 -6.57 0.72 -4.36 2 35 2009-12-03 16:03:04 2003-08-01 10:06:19 7 5 8 0 0 37 0 16.50 85 34.95 CHANGED LQRRPsGFTPFRGKFHSQS s...RPPGFSPFRGKFHSQS.. 0 0 0 0 +6582 PF06754 PhnG Phosphonate metabolism protein PhnG Moxon SJ anon Pfam-B_6667 (release 10.0) Family This family consists of several bacterial phosphonate metabolism protein PhnG sequences. In Escherichia coli, the phn operon encodes proteins responsible for the uptake and breakdown of phosphonates. The exact function of PhnG is unknown, however it is thought likely that along with six other proteins PhnG makes up the the C-P (carbon-phosphorus) lyase [1]. 25.00 25.00 26.20 26.20 23.70 22.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.69 0.71 -4.58 50 665 2009-01-15 18:05:59 2003-08-01 10:41:11 7 3 645 0 125 413 78 145.30 51 96.70 CHANGED sshssRptWMulLA+A.....ssscLsshhsslsh....PsaphLRsPEsGhVMVRGRhGGoGusFNLGEhTVTRssV+Lss.....Gp..lGauYlhGRD+p+AElAAlhDALhQpsstt.......splpppllsPLttttsspctp+AAcsAATKVDFFTM..VRGED ...........................s.pAsRQ+WMuVLA+o.....pss....ELsA+hpA.L.sl....sssYclIRAsEoGLV.lpuRMGGTGcp.FhhG-sTlTRAuVRLsD..............................GT....l..GYSaVLGRDKpHAEtsALlD.ALhQps...c.ph.......psLpcsL..IsPLcAcphsRhAs...RpAE...lsAoRVDFFTM..VRG-s.................... 0 22 66 91 +6583 PF06755 DUF1219 Protein of unknown function (DUF1219) Moxon SJ anon Pfam-B_4928 (release 10.0) Family This family consists of several hypothetical proteins which seem to be specific to the Enterobacteria Escherichia coli and Shigella flexneri. Family members are often known as YeeV proteins and are around 125 residues in length. The function of this family is unknown. 25.00 25.00 28.30 25.80 23.60 23.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.38 0.71 -4.57 8 829 2009-01-15 18:05:59 2003-08-01 10:48:13 7 4 345 0 38 369 4 110.90 74 90.28 CHANGED MpT.PssssptApsCPSPVsIWQpLLoaLL-QHYGLTLNDTPFuDEsVIpEHI-AGISLsDAVNFLVEKYsLVRIDRcGFSsppQSPhloulDILRAR+ATGLhppps..sl.........tt+ ..................MKTLPsohsRtASpCPSPV.sIWQpLLoRLLDQHYGLTLNDTPFADERVIEQHIEAGISLCDAVNFLVEKYALVRTDQPGFSsss.pSQLINSIDILRARRATGLMTRcNYRTVN.sIThG+ast..................... 0 3 8 21 +6584 PF06756 S19 S19_C-term; Chorion protein S19 C-terminal Vella Briffa B anon Pfam-B_16839 (release 10.0) Family This family represents the C-terminal region of eukaryotic chorion protein S19. In Drosophilidae, the S19 gene is known to form part of an autosomal cluster that also contains s16, s15 and s18 [1]. Note that members of this family contain a conserved PVA motif, and many contain Pfam:PF03964. 25.00 25.00 110.80 110.00 21.50 19.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.76 0.72 -4.12 6 18 2009-01-15 18:05:59 2003-08-01 11:00:52 6 2 14 0 8 21 0 76.80 66 38.58 CHANGED uuss.uGsYGGt....huPRaTVQPAGsTLLYPGQNSY+sYsSPsEYSKVlLPVRAAuPVAKLYlPEN...sYGupsGY ..............tuh..uGNYGtR.tGhhuPRWTVQPAGATLLYPGQNsYRsYVSPPEYSKVlLPVRsAuPVAKLYlPEN...pYGsQhs.......................... 0 1 1 4 +6585 PF06757 Ins_allergen_rp Insect allergen related repeat, nitrile-specifier detoxification Moxon SJ anon Pfam-B_5947 (release 10.0) Family This family exemplifies a case of novel gene evolution. The case in point is the arms-race between plants and their infective insective herbivores in the area of the glucosinolate-myrosinase system. Brassicas have developed the glucosinolate-myrosinase system as chemical defence mechanism against the insects, and consequently the insects have adapted to produce a detoxifying molecule, nitrile-specifier protein (NSP). NSP is present in the small white butterfly Pieris rapae. NSP is structurally different from and has no amino acid homology to any known detoxifying enzymes, and it appears to have arisen by a process of domain and gene duplication of a sequence of unknown function that is widespread in insect species and referred to as insect-allergen-repeat protein. Thus this family is found either as a single domain or as a multiple repeat-domain [3]. 21.60 21.60 21.60 22.70 21.00 21.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.95 0.71 -4.79 38 335 2009-01-15 18:05:59 2003-08-01 11:05:02 8 5 49 0 131 369 0 174.00 23 79.51 CHANGED stsLpsDhp-Fl.sLlPhcp....ltplstcYhhsDschppslpYLpos-Fpplapplhs.hs-lpshlpalc..spGlslhs.......hlNplsshl...sls.l..........pPp.hh............pstulsuhlc-llulLPhcclpALap-Khps.os-Fpthactlpo.-Fpplhppht....ssp-lppllpcL+.cpGl...Dlcpllchl ....................................shptchp-Fh.slls..hcp....l.plhtpYhhpDschppslpal.pss.p...F..tphhpplts.hP-hp.slhsalp...ppslslst...........hl..cplpph.l.....th.ph......................s.s..hp......................pspshpsals-slslLP..hsplpuL..apcK.hps.sstFpphhcslpop-acplhpslh....pscphpslhppLp.ppul...clchlhp..h.................... 0 31 38 108 +6586 PF06758 DUF1220 Repeat of unknown function (DUF1220) Moxon SJ anon Pfam-B_6292 (release 10.0) Domain \N 20.30 20.30 20.60 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -9.07 0.72 -4.17 15 961 2009-10-27 09:18:29 2003-08-01 11:14:26 8 17 26 0 206 961 0 64.40 48 40.55 CHANGED EE..-QpPssPRLs.....tEL.-sEE.EVLQDSLDcChoTsSshh-ls-SsQPYcSsFaohEEpcVuhALsVD ...................EE..-QsPssPRLS....................pELh-scEs.EVLQD...SLDcCYSTPSshhEhsDSsQP.YpSs.ha.o...LEEpcVuhuLDls................ 0 156 156 156 +6588 PF06760 DUF1221 Protein of unknown function (DUF1221) Vella Briffa B anon Pfam-B_16837 (release 10.0) Family This is a family of plant proteins, most of which are hypothetical and of unknown function. All members contain the Pfam:PF00069 domain, suggesting that they may possess kinase activity. 25.00 25.00 91.50 32.80 24.10 23.30 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.45 0.70 -5.09 5 29 2009-01-15 18:05:59 2003-08-01 13:54:25 6 3 13 0 22 30 0 205.70 50 30.18 CHANGED +DDIp.INQRQCsLLLDlauLAacoVAsElRuNLRF-EKpTKWKsLEQPLRELHRVFREGEAYVRpC..LDPK.cWWAKAIshapNTDCVEaHIHNLLsCluVVlEAIEsAGElSGhDPDEluR+RLVaSRKYDK-WpDPKLFpWRFGKpYLVo+DlCuRhDpAW+EDRWlLlptLQEKKsSuSs...sLoKpE+RLADLLhKsL.ss.p..sGKLaPSSlLLGSKD .......pD-lp.IN.RQCsLLh-hhshAapslutElRtpL+h-E+t.TKW+sLEpPLRELaRlh+-GEhYVRpC..L.-s+..s......WWu+Ahshppsp-CVEaHlHNLLhChslVlEAIEsAG.El.oG..D.-Eht.R.+RLlhucKYD.+-h.DP+LFp.a+FGKpYLloc-lssRh-tAW+EDRWlLhphlcE++tsuu........loKpE+RLADlLhtth..............GK.LaPuSlLlso....... 0 2 11 17 +6589 PF06761 IcmF-related ImcF-related; Intracellular multiplication and human macrophage-killing Vella Briffa B anon Pfam-B_3476 (release 10.0) Family This family represents a conserved region within several bacterial proteins that resemble IcmF, which has been proposed [1] to be involved in Vibrio cholerae cell surface reorganisation, resulting in increased adherence to epithelial cells and increased conjugation frequency. Note that many family members are hypothetical proteins. 20.20 20.20 20.30 21.30 19.50 20.10 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.89 0.70 -5.13 95 1440 2009-09-11 20:39:52 2003-08-01 16:13:50 7 11 892 0 268 1260 33 297.20 22 27.04 CHANGED llssLsslpphstths....sssshhh.chGLapusplssssppsYpptLpphllPtlhpplpppLpssh.....................psscthYpsL+sYLMLs.cts...c.hcsshlpsahtp....pWp.pphsss....ht....pptLttHLstlh.............pts.tshshsssLlppARphLsph....shspRlYpplcpph...spthschsLsctsGss..........sstlF...........pppsspshtt.s..lPGlaTtpGacphFhsplsphspphtp-p.WVL.Gpt....t...sthsssshp........pLtpclpphYhsDYsspWcshLsslclt......shss........lspuhphLpsLuus.sSPLhpLlpslscpTpL .............................................................hLs.lpp.h.t.tht..h..ctsshhh.chGLYpuppltststpsYhphL...pph...hLPslhppltptLpss.....................................................................ssscpthssL+sYlMLs...c......p........p..hs...s.thlp...pahtp..............pWp..p..phssp..............ht....pptlhtHlpthh..............................pts.....h.p.sp...plltps+p..Ltph....shtpRlYpp.lttp.h......st.............t.s....shsLtchl.Gss..........hstlF..............tptss.............lPshaT+pGapphh.tphpp.hsp.tttt-s.WVLsptt.............p...sh.sps.shp.................plppplpphYhsDYsspWpshlsslplp........shss........lspshp.hLphlsss.suPlhpLhpsltcpTp..................................................................... 0 40 100 180 +6590 PF06762 LMF1 DUF1222; Lipase maturation factor Vella Briffa B, Eberhardt R anon Pfam-B_3454 (release 10.0) Family This family of transmembrane proteins includes the lipase maturation factor, LMF1. Lipoprotein lipase and hepatic lipase require LMF1 to fold into their active states [1,2]. The precise role of LMF1 in lipase folding has yet to be determined [3]. 29.30 29.30 29.90 32.70 28.90 29.20 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.42 0.70 -5.67 25 402 2012-10-02 13:32:46 2003-08-01 16:39:26 9 7 240 0 234 358 72 338.00 35 69.50 CHANGED auFtW-uLLLEsGFLulFLuPs................psssstlslaLhRWLlFRlhFGuGllKl.R........ssssW+-LTuLpYHaETQPhPsPlSaahHpLPchh++hpssusahsElllPaLlFsP.p.lphhuuslhlshQlhllloGNauFhNaLTllL...................shuslD.......DshLshl.hst......................................................................................................................................p.ss.t..........................................................................sss.sshhhhhlslhlslhlhsLSh........hslhshlstc......Qh..............................................................................................................................................................................Mppsh.ssa+lVNoYGuFuslT..+pR.ElllEGot-st.......ssWc.....................................EYEF+sKPGslpR........hPphluP.YH.RLDWhMWFAAhsstp...............................................tssWhhsFlt+LLps-ps.......sLsLLtcs....PF.sspP.......P.palRAphY..cY+Fosh.pchtpsGt........WWpRphlt.pahPslsLss ..................................................................................................................................................................h.FtW-shLLEsGFLslhl.us........................ths...ss.hlshh.hhRWLlFRl.hu.....u.GllKh.p........ts.sWh..sLTsh....aHaETQPhPsPhuaahH.p.h...Ph..h++hpshus...ahhpl..slPhh.hFhP..............p..ht..........hhuhhh.llhQl..hllhoGNasahNhLTlll...................shuhhD.......st.h...thh..hs..................................................................................................................................................................................................................................................................................................................................................................................................................................................h...s.hhht.hls.hhhthh..lhhlsh............ss..hshhu.p................p...................................................................................................................................................................................................................................................................h..ptsh.s.hplsNoYGh.Ftphs..................ptR.ElllEGotss..t................s.Wp.....................................-YEFhhKPG.s.pR........hP....huP.ap.RLD.W.MWFAAh.ssh........................................................ttsWhhthh.+LLp...sc.t.......sl.pLltp..s.....PF..p.pP..........P.palRuphYpYpFop....tt...tptt.......................WWhRphh.t.ahPsht...p....................................................................................................... 0 97 133 191 +6591 PF06763 Minor_tail_Z Prophage minor tail protein Z (GPZ) Moxon SJ anon Pfam-B_6085 (release 10.0) Family This family consists of several prophage minor tail protein Z like sequences from Escherichia coli, Salmonella typhimurium and Lambda-like bacteriophages. 20.00 20.00 20.10 20.00 19.80 19.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.25 0.71 -4.65 8 914 2012-10-01 22:58:23 2003-08-04 11:55:40 6 3 408 0 33 448 7 176.00 56 97.65 CHANGED lKGLEpAIcNLsulD+phVPpASAhAlNRVAppAlStosppVA+ET........ulPhKLV+pRsRLp+Aosc.KspA+I+VNRGNLPAIKLGoApVRLS+R.....G.tu.h.htGSVL+lGpaRFpcAFIQQLsNGRWHVMcRlsG..............KsRYPIDVVKIPlAuPLTpAF-pphcRlhcp-hPKpLtaALppQLRLhLpR ...................................................................................lKGL-pAlcNLspls..+pAVPtAoAhAINRVAssAIupousQVARET........tV..RKLV+cRsR..L...p.+Aosc....pspAR.IhV.N..RG...sLPsI.K..LGps.....hhs.pR..............G......ptss.Sl..Lh.l.Gp+.R.h.sAFI.Q..p...L.t.NG....R.WHVMQRlsG......................KNRYP.I.DV....V.K...IP.hAs..P..L.spA..Fcpsh-...RI...h+EcLPKpLuYALppQLRhslKR....................................................................................................... 0 6 13 21 +6592 PF06764 DUF1223 Protein of unknown function (DUF1223) Moxon SJ anon Pfam-B_6655 (release 10.0) Family This family consists of several hypothetical proteins of around 250 residues in length which are found in both plants and bacteria. The function of this family is unknown. 26.80 26.80 27.00 30.40 25.70 26.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.31 0.71 -4.50 72 362 2009-01-15 18:05:59 2003-08-04 12:34:50 6 3 322 1 146 334 61 196.70 33 76.90 CHANGED VVELFTSQGCSSCPPADphLuc...Lupc.............s.sVlsLuhHVDYWDYlGW+DsFupstaTpRQ+uYuptt..stptlYTPQhllsGppphsGsctspltstlptttst....htlslphs..ts.t.plplpssssssht.........hplhlshass.ttssplp+GENtG+plsasNlVpshptl...utWsG.ts.shphsh....shsstts............tssVhlQp ...................VVELaTSQGCoSCP.PADphluclusc..............s.sVlsLuaHVDYWDY.lGW+DsaAsppaTpRQpuYs....p..th..stctlYTPQhllsGpsp........hsGsstssltstlsssttp.....sslslphp..tps...s..plslslssst.tst................tshclhlshaps.shssp.lpcGENtG+slsapplVpshppl...uhWsG....tshph.p.lsh..s...tt.ts...........usslhlQ.......................................... 0 36 85 111 +6594 PF06766 Hydrophobin_2 Fungal hydrophobin Vella Briffa B anon Pfam-B_3587 (release 10.0) Family This is a family of fungal hydrophobins that seems to be restricted to ascomycetes. These are small, moderately hydrophobic extracellular proteins that have eight cysteine residues arranged in a strictly conserved motif. Hydrophobins are generally found on the outer surface of conidia and of the hyphal wall, and may be involved in mediating contact and communication between the fungus and its environment [1]. Note that some family members contain multiple copies. 21.20 21.20 21.40 26.10 20.90 20.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.96 0.72 -4.29 45 136 2009-01-15 18:05:59 2003-08-04 13:34:33 6 3 55 24 75 116 0 64.80 45 56.32 CHANGED hCP.uG.Laus.PQCCuosVLGlAsLDCpsPsssssssssFpshCAut.GppspCCslP.lsGQulLCpsP ..............Cs.uG..Laus.PQCCuTDVLG.lAsLDCtsPspssssussFpshCAsh.Gp.p.ApCCslP.lh....GQulLCpss............. 1 9 33 59 +6595 PF06767 Sif Sif protein Moxon SJ anon Pfam-B_7884 (release 10.0) Family This family consists of several SifA and SifB and SseJ proteins which seem to be specific to the Salmonella species. SifA, SifB and SseJ have been demonstrated to localise to the Salmonella-containing vacuole (SCV) and to Salmonella-induced filaments (Sifs). Trafficking of SseJ and SifB away from the SCV requires the SPI-2 effector SifA. SseJ trafficking away from the SCV along Sifs is unnecessary for its virulence function [1]. 27.60 27.60 28.20 28.40 26.90 27.50 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.16 0.70 -5.67 4 347 2009-01-15 18:05:59 2003-08-04 13:38:12 6 5 122 2 7 127 0 249.90 46 72.77 CHANGED MPITIGNGaLKSEIhIsPPupT+EsWWKlLWE+lKDaFFSTG+AKADShlHEMLFuDsPPTRERLs-IFFELKALACASH+DRFQVYNPHEsDuTIIaRIhDENtcsELLRITQsTDTFSCclMGcsYFLh+-pPslLKSaPQMThTINKRYSElV-.sLPSTLCLpLAGsPhLSVPLcNI.tYLYSEhcKsNLDEWKsQEKssYLAsKIpSGIEKsh+hLpHANISESTQQpAFLETMoMCGLKslETsPP.THIPI.KhVcEVLLADKtFpsFLsoDsssSQSMLAEIIEsISDpVF+ALFRhDPQAIQKMAEEQLTTLHlRuppQpGs.LCCFL ...................MPITIG.pGaLKSEIh...opssp...soKES...a...a...pl.LWEKIK...DFFFoTt+AcADpCI+ELh.a.spp.sPTspRL..p-lFhcL+ELASsSCR-pFpl.s.casDspI..Ihp..h.D.pN.sEN...LpIh.ppDta.h-lMsphhhhhcs.psshLK.a....sphshhhpphhs....E.lsh.hPppL.Ls.uGu.h...hsVsLcNIcthLhs.hcKGpLstWKtQE+hshluu+Ip.GItps......ssIs-uhppph.hhcsht.ssLKphth.ssasp.slpphV.psLhtschh.shL.pss...Sts..hLs-lhEhlu.pVapulFp.s.p...................................pp....................... 0 0 0 3 +6597 PF06769 Plasmid_Txe DUF1224; Plasmid encoded toxin Txe Moxon SJ, Mistry J anon Pfam-B_7662 (release 10.0) Family The plasmid encoded Axe-Txe proteins in Enterococcus faecium act as an antitoxin-toxin pair. When the plasmid is lost, the antitoxin (Axe) is degraded relatively quickly by host enzymes. This allows the toxin to interact with its intracellular target, thus killing the cell or impeding cell growth [1]. This family contains many hypothetical proteins. This domain forms complexes with Axe antitoxins containing Pfam:PF02604. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.69 0.72 -4.16 13 1544 2012-10-03 00:18:00 2003-08-04 13:57:46 8 6 1142 20 262 983 101 78.40 41 89.94 CHANGED FTp-uW-DYhaWQpsD+Khlc+IN+LIc-spRs.PFcGIGKPEPLKtcLoGaWSRRIscEHRLVYhls....DsplsllusRYHY ...................................as.puapD.Yh.aW..p.p.p.D.+.+.h.l.K+Isc....LIc-.lp...R.s......P..a.p......G...h..G.KPEsL...K.t.s.Ls..G.h...WS.RRIs.cc.HRLVY.p..V..s.....-..c...p...l..h..I..h..us+h.HY..................................... 0 87 185 230 +6598 PF06770 Arif-1 Actin-rearrangement-inducing factor (Arif-1) Moxon SJ anon Pfam-B_6086 (release 10.0) Family This family consists of several Nucleopolyhedrovirus actin-rearrangement-inducing factor (Arif-1) proteins. In response to Autographa californica multicapsid nuclear polyhedrosis virus (AcMNPV) infection, a sequential rearrangement of the actin cytoskeleton occurs this is induced by Arif-1 [1]. Arif-1 is tyrosine phosphorylated and is located at the plasma membrane as a component of the actin rearrangement-inducing complex [2]. 21.10 21.10 22.30 22.00 20.70 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.80 0.71 -11.43 0.71 -4.94 17 37 2009-01-15 18:05:59 2003-08-04 14:04:58 6 1 34 0 1 39 0 193.90 30 63.07 CHANGED hsplhhhV...........hulhGslsp+aAlLl-hE.sspuVhNhShl.shlaGshlhhssshshhthh......tchp.........pshYhpshlsllshhshlphhlFlsh.s.lhc.GHlPsLDshhRcYDpcSlCWsGls......htDsNul..........sp..NChhh........shh..hhCltCRhEhhpcEPThhpppphslhhhslhllllpsashal.hcchpp .........hhhh......h....hshhshlhulhGhhsscaAlLl-hE.sspuVhNhSsh.hhsaGhhlhhsshhsl..hh...............sphp......pshYhthhhhlhsslshlphhlalsh.shlhc.GHlPsLDVhhRcYDh-uhCWsGIV.....hhDsNsI.........hsp..NChhh........shhhhCstCRt.hhpsEsThhpppphslhhhlhhlhslpsasLah.hpchh.t.......... 2 1 1 1 +6599 PF06771 Desmo_N 1111; Desmo_N-term; Viral Desmoplakin N-terminus Vella Briffa B anon Pfam-B_3693 (release 10.0) Family This family represents the N-terminus of viral desmoplakin. Desmoplakin is a component of mature desmosomes, which are the main adhesive junctions in epithelia and cardiac muscle. Desmoplakin is also essential for the maturation of adherens junctions [1]. Note that many family members are hypothetical. 25.00 25.00 74.70 73.50 23.40 18.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.99 0.72 -4.06 20 59 2009-01-15 18:05:59 2003-08-04 14:20:02 6 1 52 0 0 60 0 86.30 44 11.83 CHANGED +Y+GsDVs....spTVpNLL+TIsohSpps+s..ssss-hlp+IRsIIlhaRPsLpp.p.scLs.sscLllcuLps..sss.......+pI...THNaNYKYDYN ....+YtssDVs....spTVpsLLpTINoMSpRsKs..sss-chLp+IRsIIlhaRPpLps.+.hDLp.lPpLlhpuLts..sss.......ppI...THNaNYKYDYN... 0 0 0 0 +6600 PF06772 LtrA Bacterial low temperature requirement A protein (LtrA) Moxon SJ anon Pfam-B_8368 (release 10.0) Family This family consists of several bacteria specific low temperature requirement A (LtrA) protein sequences which have been found to be essential for growth at low temperatures in Listeria monocytogenes [1]. 21.60 21.60 21.70 21.90 21.30 21.50 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.01 0.70 -5.54 9 969 2009-01-15 18:05:59 2003-08-04 14:56:32 6 11 713 0 309 814 19 321.40 22 81.25 CHANGED pc+so.hELFFDLlFVhAlsQlo+tLhpshshstus.....phllhhhulWWsWhppoahsNhhss-ph.sRlhhhs.MhhsllLusulstsF.....psps.hhFsluYshMplupul.aLhAs+.ttshusppshhplushlhluulhWlsuuLh...tstRhhlahlulsl-hhsPh.......hstpshcph.lchtHlAERhuLhsIIsLGEol....lslssshhphshshtshhhhh.uFlsslshWhlYFtpstptss++tpssu.......hhasYuHl.IsuuIhlhuVu.-hslppshscshhtthh.....huusllFLhGhhhhptshtttthss+hlshshL......hLhsshLsshhlpshssllhlsVAhh.hss ....................................................................................pps.shhELFFDLlFV..h....Al....s....p..hsph...lhpt.st.h.h...........pallhhhhlWhhWhtpohasNta..sp.s.....s...h...h..hh...h...h...hhp...M...hh..hl...hh....u...s...u......l.....s.........s..a................................ppph......hh.F...h....l.........s....h..s..h.h..pls.h.sh.p.a......l...h.t....h...p......hs..s.t..c....t...s..h.....h..t....hs..h..h.h...h.l.s..s.h..h....h..h........hu..h....h...h..s.........tth....p.......h....h...h..h...h..l...u..l...h...l.p...h..h.s.sh.....................................ht..p..h..h....t...h.h......s...l...p.h....Hls..ERhuLhsIIhh....G...Esl...................l....u.ls...s....s.....h....h.......t........h.....s.....h...............h.....h.....h.....h.....h.......th.....h.....l.....hhs......lahh...Y.a.....s.....p...p.h..h.s..c..p.h.p.p..ts...............hhh.hY.s.H.h.lh.........hu.l..hh...hs.su.h.t...h...h...........t.......t........h......t.....t....h............h............h...h........................hhu.hh...l.a..hh....u..hh...hhh....h.......h..t...h...........p..hh....hh...........hhhh...h.l..............h......h...........................h.................................................................................................................... 0 85 174 256 +6601 PF06773 Bim_N Bim protein N-terminus Moxon SJ anon Pfam-B_8427 (release 10.0) Family This family represents the N-terminal region of several mammal specific Bim proteins. The Bim protein is one of the BH3-only proteins, members of the Bcl-2 family that have only one of the Bcl-2 homology regions, BH3. BH3-only proteins are essential initiators of apoptotic cell death [1]. 19.10 19.10 19.90 44.30 18.40 16.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.97 0.72 -4.21 2 63 2009-01-15 18:05:59 2003-08-04 15:02:05 6 2 30 0 17 67 0 37.00 90 23.84 CHANGED QPSslSs-Csp.EGGQLQsspR..phRP...GAPTSLpo.. QPSDVSSECDR.EGtQLQPAE...R..PPQLRP...GAPTSLQTEP. 0 1 1 4 +6603 PF06775 Seipin DUF1226; Putative adipose-regulatory protein (Seipin) Moxon SJ, Coggill P anon Pfam-B_8703 (release 10.0) Family Seipin is a protein of approximately 400 residues, in humans, which is the product of a gene homologous to the murine guanine nucleotide-binding protein (G protein) gamma-3 linked gene. This gene is implicated in the regulation of body fat distribution and insulin resistance and particularly in the auto-immune disease Berardinelli-Seip congenital lipodystrophy type 2. Seipin has no similarity with other known proteins or consensus motifs that might predict its function, but it is predicted to contain two transmembrane domains at residues 28-49 and 237-258, in human, and a third transmembrane domain might be present at residues 155-173. Seipin may also be implicated in Silver spastic paraplegia syndrome and distal hereditary motor neuropathy type V [1]. 19.20 19.20 19.70 19.20 19.10 18.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.08 0.71 -4.80 30 347 2009-01-15 18:05:59 2003-08-04 15:05:20 9 9 225 0 212 337 0 188.80 26 50.23 CHANGED shhhllhlu....lhhY..........ssaYYsalPp..lshpcslahpassss...................PhAplsls.........................tphlhs....sQsYclslcLchP-SshNh..plGhFhVslsh...............................hopsups..................ls.............posRsshLpYcSshlchhpolhhhPhalhGhp.......cEpQplpVphhp..ca.cs............tsPssu.................................................lplplps+p........hQlYsA.pLplpAchsGlRalhYsa.lhShlVhsssha .................................................................s..hhlhhhu....hhha.h.hY.htahPp...lth.pslahp....appsp..............................PhApssls......................................................................................................................tphlhh........sQsY..clslpLph.PcSshNt......plGh...Fhl.shph...........................................................................hstssph..........................................ls............posRssh........LpYc..Sshlphhps.hhh...s.....hhlhGht.......pppp.hlplphhp....ph...p...t..........................ssss.................................................hhlplpspt................hpl.YsA.plplpAchsGl.R........hlhapa.hhoh.hlhh...................................................................................................... 1 72 124 175 +6604 PF06776 IalB Invasion associated locus B (IalB) protein Moxon SJ anon Pfam-B_3703 (release 10.0) Family This family consists of several invasion associated locus B (IalB) proteins and related sequences. IalB is known to be a major virulence factor in Bartonella bacilliformis where it was shown to have a direct role in human erythrocyte parasitism. IalB is upregulated in response to environmental cues signaling vector-to-host transmission. Such environmental cues would include, but not be limited to, temperature, pH, oxidative stress, and haemin limitation. It is also thought that IalB would aide B. bacilliformis survival under stress-inducing environmental conditions [1]. The role of this protein in other bacterial species is unknown. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.30 84 752 2009-01-15 18:05:59 2003-08-04 15:18:53 7 2 278 12 212 542 263 127.70 25 66.93 CHANGED ausWsltCtpstt..spp............CplhQ.ltsp.psphlhphslhphss.....t.sssh..hplhlPhGhhLssGlslplD......ssp.stp..hsashChs.....sGChApls.lssshlsth+...pGspsslshhssss........ps.lslslo.L....pGFssAhcsls ............................................................................................tsWphhCsptt.....spp...............CthhQplhs....p......p.s...ph..s.hs..h.slhphts................t.tphhhhl.hhPhGhhlssGltlplD.................stp.hhp.....htas.hChs.....suChApss.lss.cllssl+...pG.p.s.hslphhssss........ps..lshsls.L......pG.FutAhssh................ 0 35 108 144 +6605 PF06777 DUF1227 Protein of unknown function (DUF1227) Vella Briffa B anon Pfam-B_3660 (release 10.0) Family This family represents a conserved region within a number of eukaryotic DNA repair helicases (EC:3.6.1.-). 20.30 20.30 21.20 22.70 19.10 18.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.68 0.71 -4.50 30 339 2009-01-15 18:05:59 2003-08-04 15:21:32 6 14 283 0 228 329 6 141.90 50 19.11 CHANGED pplpclKpsDpp+Lp-EYc+LVcGLcpus..scp.t-.hhss....PVLP--...lL.........cEAV.PGNIR+AEHFluFL+RhlEYLKsRl.......+lp+VlsE.......................oPhoFLp+lcc.................pstI-pKsLRFCuERLpsLlpTLcIsclc-assLphlAsFATLVuTYpcG ..............................p.plpchKpsDtp+LpsEYp+LV-GL......+pAs....s...Rp....pDthhuN..............PlLP-D...lL.............pE.AV.PGNIRpAEHFluFL+RhlEYLKs.Rh.........+VpHVlpE..................................oPsuFLpclpp.................hshI-+KPLRFCuERLpSLl+TLE.....ls....-.lpD.assLp.lAsFATLVuTYp+G................................................................................ 0 80 129 194 +6606 PF06778 Chlor_dismutase Chlorite dismutase Vella Briffa B anon Pfam-B_3770 (release 10.0) Family This family contains chlorite dismutase enzymes of bacterial and archaeal origin. This enzyme catalyses the disproportionation of chlorite into chloride and oxygen [1]. Note that many family members are hypothetical proteins. 25.00 25.00 26.10 25.80 23.90 23.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.27 0.71 -4.60 15 1117 2012-10-02 00:20:33 2003-08-04 15:31:48 7 6 979 83 232 766 77 187.80 35 69.25 CHANGED --RpuhlsEhpsh.hpph...pcptcus.tslYslsGh+AD..lhlahhppsh-cLpplpscFp+op.luchshssaShlSlhc.SsYhs..............psclpupLhsclP...t+YlshYPMsKsh....sWYhLPhE-RcclMc-HGhhu+sassc..V+phhosShGlsDaEWsVsa-ucDltpapclVpchRhsEApp+hsc.tsPFhlG ........................................................................................pRtthhp-htphhpph.........tpp.ppss.puhYslsG.+AD..lhlahht......shcp.Lpph.scF.p+op....lu..ca.h.P..saShVulhc.upYht..................................sal..ApLhs.chP.....sca..lshYPhsKpt.....sWYhLs.EERp+hht...-...H...GhhG.Rp.Yss......V+p.hsuShuh..s...D...aE...Whlsh.u..-..-.lh..p..hhclhh-hRhsEspt.+.ht-..ssFasG................. 0 88 169 216 +6607 PF06779 DUF1228 Protein of unknown function (DUF1228) Vella Briffa B anon Pfam-B_5646 (release 10.0) Family This family represents the N-terminus of several putative bacterial membrane proteins, which may be sugar transporters. Note that many family members are hypothetical proteins. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.84 0.72 -3.96 12 952 2012-10-03 03:33:39 2003-08-04 16:37:05 9 2 879 0 193 2042 466 85.10 42 22.01 CHANGED sluMGlGRFsaTPlLPsMht-tthohusuualAoANYhGYLsGAlhAuhuhtt..up.RhhlhuGhhAoslLhhuMuhhsuhhshhl ................................luMGlGRFhYT.....P..hLPlMh....A.....-....s.....t..h.....o..h..sphua.lASuNY.sGYLsG.u.Ll.hu.hu...thc.........sc.h.+...h.L...h...s...uh...l.A...o..u..l..L.h..Lu.M.A.h.h.sshhlh...................................... 0 27 80 135 +6608 PF06780 Erp_C Erp_C-term; Erp protein C-terminus Vella Briffa B anon Pfam-B_4561 (release 10.0) Family This family represents the C-terminus of bacterial Erp proteins that seem to be specific to Borrelia burgdorferi (a causative agent of Lyme disease). Borrelia Erp proteins are particularly heterogeneous, which might enable them to interact with a wide variety of host components [1]. 22.80 22.80 22.90 28.60 22.50 22.70 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.10 0.71 -4.35 12 100 2009-12-15 09:40:32 2003-08-04 17:05:26 6 2 23 0 5 82 0 136.90 47 40.69 CHANGED I+sLscKIDcINpDIDuIpspoo.........Vuuc-VhDKITGPlYDcFTDss...suIYpsW..ssLEp-.E-ptLt+LLcELp-sRssLRoKLNEuNp.h....htpsEPpLK-sVsVS-IKpDLEKLKSpLE-VKcYLcspsNFEEIKtYlpsS ......IcslscKIDcIsp-IDuIptpos............VuucEVcDKlTGPlYDchTDus......sSIYssW....ssLE.-.E-ptL..tcLl.cELp-sRssLRoKlpEu...h.....hpscsphK.-sV...pVu-IKpDLEKLKStLEcVKcYLcspsNFE-IKthlps...... 0 5 5 5 +6609 PF06781 UPF0233 Uncharacterised protein family (UPF0233) Moxon SJ anon SWISS-PROT Family \N 22.10 22.10 22.20 23.40 22.00 22.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.39 0.72 -4.36 23 435 2009-01-15 18:05:59 2003-08-05 10:43:42 7 2 428 0 115 274 249 89.00 38 89.48 CHANGED MP+SKsR.p+ssp....sspssscsssphp..tssPsWasslMhuLMllGLlWllVaYlossp........hslssLG..sWNlslGFGlhllGhLMThRWR .................................................................................MPcSKlR+pssh........psss...s..pR.ss.+.hps...sssssWasslhluLhLlGLlWLlVaYluusp............l..hhssLG...sW.NhsIGFulhhsGhlhohtW+...... 0 36 85 106 +6610 PF06782 UPF0236 Uncharacterised protein family (UPF0236) Moxon SJ anon SWISS-PROT Family \N 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 470 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.49 0.70 -6.07 4 519 2012-10-03 01:22:09 2003-08-05 10:45:30 6 3 191 0 142 510 9 310.10 18 93.00 CHANGED MK.....pll...s.s.htp.h.h.EpLhcsshphpEL.pplhcplsKlhtElltthLEElDphltEs.RcKp+ahlccKc.p+sLhThhGsloFcRsYYhs+...E-G+a..saLLDcsLthscppRlocslchcsVEtAVp.sSYR+uucpltp.ss..hlS+pAl+phlhEss........ph..p.spp++VRhLYIEADt.ahuhQ...............ctputps+LlhIHEG...cp.susc.cLlNp+aahs...EuscDhWtclpcal.ppYch-shcc.lhINGDuAsWI+EGhp.ahs+upa.LDRFHLs+.lh+shS+pP+h+Ecsp....+tlpcsDccGlhtllpchhsstc-EpccccIpchh+hIcs..puI..R.YRc.........pGlpuhuAttplschaSuRlpphshuWScpGL+tMh+lhshphsGsshtclchsc+p....s.hthspcpItpApc+lppphs-plppth.slppG+hs.Iapsh.uL+.uthl .........................................................................................................................................................................................................................h............................................................hh..hlpth...Dt..l.h........pp.............c...t...tt.aph......ppp...........+sl...hhhG.plpap.R..p..h.a..hpt.....ptt..p..........a..lDp.hth.t.h.tphs...h....hhp..u......sac.pss...h.............lot.tlhphl..ht.......................................t..h...........l...hlE...sDt.hl.........................t....t....t..................t...h......hh..p................................t..h.t............h................t............th......th.t.hh..pta.t.....t......lhh.uDuu..........h.....h.p.....t.h.....h.t...........h..........pt........a.....h.D.aHh.p.l........phh..t.................t....h.....t.hh......phl........p.....t.h...h.p.p....h...t................t...........t.....t.........p.h..t.hhphl.p..........h..................................h...hu.....t.....p...p..h.h..s...Rhptt.h.Wp.tGhp.h.thh...h....p..t................................................................................................hh.............................................................. 0 43 92 104 +6611 PF06783 UPF0239 Uncharacterised protein family (UPF0239) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 47.30 46.50 24.60 24.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.76 0.72 -4.06 2 75 2009-09-10 19:00:48 2003-08-05 10:46:39 6 1 65 0 48 74 0 82.80 48 94.00 CHANGED .u.shshS.schs-.shhp.LlRYGLalGAlFQhlCl.Ahll......ps.s.sSpPcouEVTc+.ttsV.oh.++.+K.pKKKR. ..........MuuplshSPPElPEPTahEsLLRYGLFlGAIFQLlClLAIIl..P........h.sK.Sc...c.s...-...s.....p.sS...-s...+...ou.Ess+.K...sK...s...s.ssphp.++sKKEsKKKR............. 0 7 11 30 +6612 PF06784 UPF0240 Uncharacterised protein family (UPF0240) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 25.70 25.30 24.70 24.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.07 0.71 -4.59 8 106 2009-01-15 18:05:59 2003-08-05 10:47:43 6 1 87 0 68 102 0 162.80 34 90.81 CHANGED MG...uhluRtlRpFNlENRAc+hIS+pKPsPAPKHPSTpcsLccplopaP-lhcEls+KDspLLopLK-VYVsSpDshs.......pscuspspspsK.h+Ls+....-apashh-scslPKGKlolhEALpLlNNH+hpP-pWTAcKIApEY+LcppsVssLL+YFhTFEV+I.Pscs..K+slpo+ .........................................................MG...uhlsRhh+sFNlE...sRAcR.I.u+.K.....P..psAP+asos..p.h....pcp.hp.pPchtccls.pKDs.pL.hsh...LKcV.YVsSpDPss...........t.t..s.css.t..sp..p.p..+.hpl.s+................shpashh..p...pplP+...G+lolhpALp.......lLs..sHp......hpPppWT..A-KIApEY....pLc.....ccVpslLcYF.hsFpVhl...Pspp...pp................................ 0 20 25 45 +6613 PF06785 UPF0242 Uncharacterised protein family (UPF0242) Moxon SJ anon SWISS-PROT Family \N 24.30 24.30 24.60 25.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.34 0.70 -5.79 3 41 2009-01-15 18:05:59 2003-08-05 10:49:24 6 1 40 0 7 21 0 372.60 60 90.66 CHANGED MLhVKKhlHsC.uRYapYLhPVVAlLLPLVCaPFLSpSQKhYGYFVFolISSLGWFFAIGRRE+QLKTAAGQLLQTKIRKLTEpDEGLRpIRESlEERQpESsQL+lQNQKLlNQLhHlRGVFlKTKG-hQKLEsLltHL+EENQCLQlQLDALlQECsEKpEEsQELNRELAETLAYQQsLNDEYQATFoEQHNMLDKRQlYIGKLEuKVQDLMCEIRNLLQLESsIsENLPu+sl.AsSp-lstQLlSELKKIAFKsENIEAASSLTASRYlRTDoSVHNYSLECRQLFDuLREENLGMLFVYAPQSQRAVFANALFKTWTGYGlEDFLKh-SDVVISGhsQWccDL+o.SRpERSGKIVIKTKu+GplPFYYCLssLNKGPLspHVLGVLYPl+t-sLpu ..........................hhhsKphhH.h.uRYapYL.PllAlLLPlssaPFLS-pQhhYu.FlFsllSSLGWhFAIGhRE+QL+sAAGQLLpsKIRKlTEpDEGL+pIREolEcRQpEoppL+lpNpKLlpQLhpsRp.VFhpsKG+hp+hEpLs++L+EENQpLQlQL-Ahs+EpsEK..EsQEL.pELpEsLAYQQpLpDEYQATFsEQHsMLDKRQsYIGpLEuKVQDLMCElRNLLQLE....utpp.NLPucss.....uSp-Vs.uQLl..Eh+KIVF+VEshEAAsSLTA.RYhRoDs......SsHNY.....SLsCRQLFDuLREENLGMLFlYAPhuQRlLFANuLFpsWTGYGlEDFLpc-SDVVlEGhuQWEcDLh.op.S.RsERSGKIVIKTKsaGshPFYYClssLsKGPFspHlLGVLYPA+hshhp............................ 0 3 4 6 +6614 PF06786 UPF0253 Uncharacterised protein family (UPF0253) Moxon SJ anon SWISS-PROT Family \N 21.60 21.60 21.70 46.10 21.40 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.12 0.72 -3.86 8 619 2009-01-15 18:05:59 2003-08-05 10:50:18 7 1 618 0 44 123 2 66.00 80 98.68 CHANGED MphY..CEhlRchYupIGSGDtGYlPcAIsCAlKsLN-IAuD-uLPpcVREcAAaAAANLLlSDacDc ....MEKY..C.ELlRKRYAEIASGDLGYVPDALGCVLKVLNEhAAD-ALSEuVREKAAYAAANLLVSDYVNE.. 1 3 11 26 +6615 PF06787 UPF0254 Uncharacterised protein family (UPF0254) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 109.70 109.30 22.50 20.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.92 0.71 -4.71 11 29 2009-01-15 18:05:59 2003-08-05 10:51:15 6 1 29 0 20 29 1 161.60 46 94.29 CHANGED MIpVATAECFTHGKIGpcIHthApGY............chh.cashsh.p....hsVhVlAuhFIPolpGlcoLLshc.sPcP..chshc.hsKsYsEEpDhcVAphMAcAlKchhssDIuIGTTAGIG+GuIsIlo-cpphlhoSDlauDLhps..EpIhpRQcsGIp+ulchFhplLp .MIoVATAECFTHG+IGhcIHthAsGY.....................ch...ch.hs..t.........tslpVluuhFIPolpulcolLslc..PEP..Dhshc.hsKsYsE-pDhcVAhlMAculKchhssDIuIGTTAGlGRGuIsIlT-csphlhTSDVau.DLlps..-sIhcRQcsGIcKulchhlclLp. 1 5 10 16 +6616 PF06788 UPF0257 Uncharacterised protein family (UPF0257) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 26.00 25.10 24.50 22.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.59 0.70 -5.13 5 467 2009-01-15 18:05:59 2003-08-05 10:53:10 8 1 451 0 19 170 1 225.00 80 98.48 CHANGED VKKhlLlshLsllLsGCDNscuLlSFTPEMASFSNEFDFDPLRGPVKDFTQTLMNEpGEVoKRVoGTLSQEGCFDTLELHDLENNTtLALVLDANYYRDAEThEKRVRLQGKCQLAELPSAGVoWETDDNGFVVSAoGKEMcVpYRYDuEGYPLGKTTKosDpTLSVsATPSsDPRKKLDYTAVSLLN-+plGNVKQSCEYDsYANPVsCpLlIVDESVKPAVpR+YTIKNoIDYY ..........................hKh.LL.sLLshhLsGCDpscsh.SFTPEMASFSNEFDFDPLRGPVKDFTQTLMDEQGEVTKRVSGTLSEEGCFDSLELLDLEN...NT...l....VA.....LVL.DAN..YY..RDAETL.EKRVRL..QGKCQ.L.A.E.LPS.AG....V.S.W.ETDD.NGFV...IK...A..SSKQ...MQ.M...E...YRYDD.QGYPLGKT..TKSND..K..T.L..S.V.S.A..TP.STDPI.KKLDYTAVTLLNNQRVGN....VKQSC.EYDuHANPVDCQLIIV.D.EGVKPAVERVYTIKNTIDYY................. 0 1 2 10 +6617 PF06789 UPF0258 Uncharacterised protein family (UPF0258) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 65.00 64.80 21.00 18.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.92 0.71 -4.18 3 82 2009-01-15 18:05:59 2003-08-05 10:54:07 7 1 37 0 53 66 0 142.50 52 26.69 CHANGED KDW......p+ccKcupsQtDpPGosusshpsKuusLVEQVFssHlaPQ.......uLoS+hKlNPLass.RhsEht-sGRGRPSWTl-DY...........A+spGccuRlTPL.DLQTQESL.......NPNNLEYWMEDIYTPGYDALLRRKEApLRRs+VCKlhALIsAAVsTlILVlVIPICTl+S ..........................................................Qh-....tt..hp......pps..h.l-pshssc.hPs.........SLpotMKsNPLYsDhRLsEhhE.++spPSWTIEEY.....................s+pu.pps+.sth.sLpsp.p........NPNsLcaWh-DlYTPGYDoLLK+KEsch+RuKlC+lhuLIhhussslILVllVsIsTh........ 0 2 6 19 +6618 PF06790 UPF0259 Uncharacterised protein family (UPF0259) Moxon SJ anon SWISS-PROT Family \N 27.90 27.90 27.90 28.20 27.80 27.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.69 0.70 -4.93 6 639 2009-09-11 20:38:19 2003-08-05 10:55:36 6 2 623 0 78 301 78 229.20 59 95.61 CHANGED MsITspsLh+DTapFhpspIhsIlhlSlLuAFIollIshlhpPsstpLhslhp...shchpuopS.lh-llpsMol-QpplLL+hSlsppFSuLIGsshLlGulITlIshlSptK+h.SlhpuIshsh.hlP+LhlLhFlTThllQlGhhLhllPGIhluIlLSLSPIILshc+hslhsSI+hShpIoapNl+lluPullhWlssKhlLlhlhSpFslls.pluhlIhNhhhN.llouILIIYLFRhYML....LRs .....................................................MSITApSVYRDoGNFFRNQFh....TIL..LlSLLCAFIT..VVLGH.s.FSPS..DAQlAQLs-....G.sl..u..GS..uG....LF-.L....VQNMoPEQQQILLpASAAS....TFSGLIG...NAILAGGlILl..IQLVSAG....pRV..SALRAIGA.SAP..l.LPKLFILIFLTTLLVQIGIMLlVVPGIIMAIlLALAPV.MLVp-.KM.Gl.FAuMRSSMRLs....W.A.N.MR.L.V.A.....PAV..l.....u.WLLAKTLL.L.LFA.o..S.FA.s...L..T..Pp...l.G..A.V.LANTLSN.LISAlLL.IYLFRLYML..IR.Q.......................................................... 0 10 28 53 +6619 PF06791 TMP_2 Prophage tail length tape measure protein Vella Briffa B anon Pfam-B_3868 (release 10.0) Family This family represents a conserved region located towards the N-terminal end of prophage tail length tape measure protein (TMP). TMP is important for assembly of phage tails and involved in tail length determination. Mutated forms TMP cause tail fibres to be shortened [1]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.17 0.70 -4.71 30 1212 2009-01-15 18:05:59 2003-08-05 10:57:24 8 8 525 0 82 1164 33 176.50 41 24.61 CHANGED pptttshp+uGhSscphstAhRtlPAQhTDIssuLuuGpsshhlhhQQGGQlKDhFGGhGsAh+ulsshlhullsPhsl...uAA........usuuLuhAaYpGupEscpascALllT......GstAGsTuupLt...........shAcplus.ssuThusAupsLspL.....suoGphsupphptlupAssphp.csTGpul--hlppFs+LucDPlcAshcLscphpFLTsupYpQIpu ..........................................................................................................s....hthppAGhSstphthshp.......h....lstQhsDlsspLAsGps.hhlhhQQGuQ.......lts..ua....G....G..........h.........h.....htsh......uGhl...sh....Ms....sh.s.......hss................................AsuuLu..h.A...a.Y.p.G.......p....pp....h....ptFN+sLhLo..............G.s.uGh....Tu.sphh......................................shucuhpu..sGhT......p.psutsL.....u..tl..................................VtuGsh.sut.pht..lspusspht..pusG...p.Vscs.hptFt+L.psDPssuhhuhspph+.lTApQlt.lt.p.............................................................................................................................. 0 6 36 56 +6620 PF06792 UPF0261 Uncharacterised protein family (UPF0261) Moxon SJ anon SWISS-PROT Family \N 18.70 18.70 19.10 19.10 18.00 17.50 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.16 0.70 -6.00 49 512 2009-09-11 06:27:46 2003-08-05 11:40:03 6 9 446 0 153 391 143 339.40 39 90.09 CHANGED tpplhllGThDTKscELpalpshlcs..........tGhsslhlDlu..shsssshss.....-lotp-VApstssshpslhss...tD.RspAhpsMucuhsthltphhcptplsGllulGGosGTuLsssuM+.uLPlGlPKlhVSTlA.SG..slssYlGssDIsMhhSVsD.luGLNpIo+plLuNAAsAlsGMs.shptsp.........................spKPhlGlTMFGVTT.ssVstlpstL-sp...GaEslVFHATGsGG+AMEcLscpGhlsuVlDlTTTElsDhlhGG.lhsAGs-...RhsAAu+sGlP.VlSsGAlDMVN..FGuhcTlPc+aps.....RhhatHNsplTLhRTos-Estpluchlup+LNp...spGsVthllPhtGlSslDt.GtsFaDs-ucsALhssLcpslps..shplhclstHINDstFAcssltthtchhs...p .............................plhlhuThDTKspEhhaltph.ltt...........tGhpshhlDluhh.....t.......ts.........slstppVAtttst..sh...ttlhst........D.+spAhthMu.uhpthl.ph.p...........lsulluhGGosGTslhssshp..tLPlG.hPKlhVSThA..SG..ssssYl..GssDlsMhhSVsD.luG...LNplSRtVLuNAusAluGhst.t.h..p..pst...........................ppKP.hlGlTMFGlTT.PslpthpttLppp.....sY-slVFHATG.s.GG+uMEpLh.pp.thhsuVlD....hTToElsD.lhGG..Vhssssc...RhpshucstlP.lhusGAlDMVs..Fs.s.po...l.Ptpats...............R.hatHN..tlolhRoos-EstthuchlutKLst.........spu...lthhlP.tGhSthDt.Gtsaa-s....cA.tAhhp..tlctsl.t..........phplhchshpINcstFApthht.h.phh.p......................... 0 39 99 125 +6621 PF06793 UPF0262 Uncharacterised protein family (UPF0262) Moxon SJ anon SWISS-PROT Family \N 25.00 25.00 26.20 26.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.84 0.71 -4.51 24 243 2009-01-15 18:05:59 2003-08-05 11:41:08 7 2 240 0 86 194 178 157.40 58 97.24 CHANGED hspsp.tR....LhclcLDEtolupssPDlEHERsVAIaDLlE-NpFs.Ps...Gst....sGPYpL+Lult-sRLlF-IpsEsss.lhsalLSLsPFRRllKDYFhIC-SYYpAIRouoPspIEAIDMGRRGlHNEGSplLp-RLpGKl-lDaDTARRLFTLICsLHh+G ...........................................................................................s...psRLscVpL.DE.oIuRuoPDl.EHERuVAIFDLlEEN.sFp.PsGcpt.......sG...PY+L+LSlh-sRLlFsIpp...EsG....ssl.ssHlLSLoPFRRlV+DYFhICESYYpAIRoA.oPSpIEAIDMGRRGlHNEGSpsLp-RLcGKl-lDFDTARRLFTLlCVLHW+G... 0 25 55 66 +6622 PF06794 UPF0270 Uncharacterised protein family (UPF0270) Moxon SJ anon SWISS-PROT Family \N 20.10 20.10 20.70 20.90 19.70 19.00 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.03 0.72 -4.22 53 891 2009-01-15 18:05:59 2003-08-05 11:42:09 7 2 881 1 129 338 38 69.50 64 94.00 CHANGED MI.IPap...pLss-TLpNLIEcFl...LREGTDYG-pEhSLppKlpplpppLcpGcsVlVaSELcE.....olsIhsKpph ..........Ml.IPWQ....-LuPETL-NLIESFV...LREGTDYGE+.Eco...LEQKVA-V++QLpsGEAVLVWSELHE.....TVNIMP+spF............. 0 19 51 95 +6623 PF06795 Erythrovirus_X Erythrovirus X protein Moxon SJ anon Pfam-B_9167 (release 10.0) Family This family consists of several Erythrovirus X proteins which seem to be found exclusively in human parvovirus and human erythrovirus. The function of this family is unknown. 21.00 21.00 21.10 182.00 20.50 20.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.69 0.72 -3.96 8 19 2009-01-15 18:05:59 2003-08-05 12:00:56 6 1 4 0 0 16 0 80.90 95 98.78 CHANGED MDSYLTTPMPYHPVAVhQNLEEKMQYYLVKTYTSLGKLAYNYPVLTMLGLAMSYKLGPRKVLhTVLQGFMTLGIANWLSWE MDSYLTTPMPYHPVAVhQNLEEKMQYYLVKTYTSLGKLAYNYPVLTMLGLAMSYKLGPRKVLhTVLQGFMTLGIANWLSWE 0 0 0 0 +6624 PF06796 NapE Periplasmic nitrate reductase protein NapE Moxon SJ anon Pfam-B_9066 (release 10.0) Family This family consists of several bacterial periplasmic nitrate reductase NapE proteins. Seven genes, napKEFDABC, encoding the periplasmic nitrate reductase system were cloned from the denitrifying phototrophic bacterium Rhodobacter sphaeroides f. sp. denitrificans IL106. NapE is thought to be a transmembrane protein [1]. 25.00 25.00 30.30 29.70 20.70 18.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.69 0.72 -4.66 42 268 2009-01-15 18:05:59 2003-08-05 12:06:26 6 1 240 0 73 165 7 54.70 49 93.40 CHANGED Mupssts......tpp+ppEl+sFlFLsshLaPlLoVshVuuYGFhlWhhQl.lhGP.PGt ..............Msp.s..ps....spc+uhEh+uFLFlsVlLFPlLSVuhVGGYGFlVWhhQh.lhGPPG....... 0 13 25 52 +6625 PF06797 DUF1229 Protein of unknown function (DUF1229) Moxon SJ anon Pfam-B_9402 (release 10.0) Family This family consists of several hypothetical proteins of around 415 residues in length which seem to be specific to the bacterium Leptospira interrogans. 25.00 25.00 400.70 400.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.67 0.70 -5.63 2 57 2009-01-15 18:05:59 2003-08-05 12:32:44 6 1 38 0 1 51 0 393.60 94 99.59 CHANGED MSTHFSLKSASVITDYLFKFRIFSLPAICWICSTLIGFGTVNGRLSLFVIGLSFIISIFLLKNIKWNISSTFSFLLVISFLLAYFFFYKTPNMPQHLDGKLNPILYVFKAFPTLFSFFIIhALPSLKQKKLFFIGIALGMFVFAIINSIATLVYLEPPYYGKAYHFFYKMEYNSPGITILASMLPIVLFCFNGYLLKIDKKLKWQNlFFIFVFLISLFISFLFSARTLFFLIIANIIILVLIRLWKIYSIPNKGIYYKFIIGFLILFVSCSSIYFFLKETYIGQRIMNGIYSEKLNHHVDYWNTlKKDFFIYPKITIGSEYTFWYHNIFFDSHKTSGPITALILYIYSVFhFLIuLKKSLKRDYRSFRYFHFYICFIPYLMTTIPWESSESQMVALFAGLGALITTVDDQTPEM ....MSTHFSLKSA.SVITDYLFKFRIFSLPAICWICSTLIGFGTVNGRLSLFVIGLSFIISIFLLKNIKWNISSTFSFLLVISFLLAYFFFYKTPNMPQHLDGKLNPILYVFKAFPTLFSFFIIFALPSLKQKKLFFIGIALGMFVFAIINSIATLVYLEPPYYGKAYHFFYKMEYNSPGITILASMLPIVLFCFNGYLLKIDKKLpWQNVFFIFVFLISLFISFLFSARTFFFLIIANIIILVLIRLWKIYSIPNKGIYYKFIIGFLILFVSCSSIYFFLKETYIGQRIMNGIYSEKLNHHVDYWNTIKKDFFIYPKITIGSEYTFWYHNIFFDSHKTSGPITALILYIYSVFIFLIALKKSLKRDYRSFRYFHFYICFIPYLMTTIPWESSESQMVALFAGLGALITTVDDQTPEM............... 0 1 1 1 +6626 PF06798 PrkA PrkA serine protein kinase C-terminal domain Vella Briffa B, Bateman A anon Pfam-B_3917 (release 10.0) Family This is a family of PrkA bacterial and archaeal serine kinases approximately 630 residues long. This family corresponds to the C-terminal domain [1]. 25.00 25.00 25.50 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.61 0.70 -5.09 43 1353 2009-01-15 18:05:59 2003-08-05 12:42:19 7 5 1291 0 335 779 130 252.60 58 39.39 CHANGED lpKh+lYsGEsl..hs.psc.slpEl+cpusp...........EGMsGlSsRalhctlupslsp...psppsslNPlplhppLEpslcp....tplspEs.cc+Ylc...hlc.hl+pEYpEhltcElp+Aah..uhcEthppLhspYl-plcAalpcp.......+l+Ds.TGc.h-PD..Echh+uIEEplGI.spptpcsFRpElhsal..uphsppG.cphsa.....ssaE+L+....cslE+KLaushc-hlplhohsup..hsDcEppc+hcphlpRh.pchGYs-psApcllca ................................................................hSKMRVYDGEoLK..DTDPKAK..ShQEYRDhAGV.......D...............EGMsGLS...TRFAFKILS+VFNF........D+s....EVAANPVHLhYVLEQQIER....EQFPpEp.tERYL-.....FlKsaLhP+YsEFIGKEIQTAYL.ESYSEYGQNIFDRYVsYADF.WIQDQ.......EYRDP.-TGplhDR-u...........LNsEL.EKIE..K.........P.......A.........G.........I...SN...........P....K.........DFRNE.IVNFVL.........RARAsNsG...+NPsW.......TSYEKLR...........sVIEKKM...F.S.N.T.E-.LLP.VI....S....FNAK...........sS....s--QKKH-DFVsRM..hE+GYTcKQVRLLsEW...................................................................................................................... 0 95 195 270 +6627 PF06799 DUF1230 Protein of unknown function (DUF1230) Moxon SJ anon Pfam-B_9232 (release 10.0) Family This family consists of several hypothetical plant and photosynthetic bacterial proteins of around 160 residues in length. The function of this family is unknown although looking at the species distribution the protein may play a part in photosynthesis. 25.00 25.00 28.90 53.10 20.90 20.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.92 0.71 -4.33 30 130 2009-01-15 18:05:59 2003-08-05 12:58:07 6 1 106 0 65 129 126 141.00 42 70.98 CHANGED CPVP.-QpPlNEYppLpsSWhFoWsshsttsah+pLhhhWllshhlss.slAuuSashpcsshchllsusluuhllshLlllRlYLGWsYVtcRLhSpsVpYEESGWYDGQhW.KP.-hlt+D+LlupYpV+PlLsRLpp...ohshhs .....CPVPhEQpPlNEYppLppShhFuWsshs..hhsa..ht+L...........shl.W.h.hs.hhlsu....Pl.Au.uSas..spppsl+hlLuussuuhhllsLlllRlYLGWsYVtcRLhStsV.YEESGWYDGQhWhKPsElLsRDRLluoYpV+PlLpRLppThsh.s........ 0 17 44 58 +6628 PF06800 Sugar_transport Sugar transport protein Vella Briffa B anon Pfam-B_4126 (release 10.0) Family This is a family of bacterial sugar transporters approximately 300 residues long. Members include glucose uptake proteins [1], ribose transport proteins, and several putative and hypothetical membrane proteins probably involved in sugar transport across bacterial membranes. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.90 0.70 -5.31 17 1288 2012-10-02 19:55:49 2003-08-05 13:02:27 7 5 732 0 128 848 304 260.80 40 91.36 CHANGED hsllssKl.GGpPhpQhhGhTlGALlFulllhlhppsshs...phhlhullSGhhWulGQhhQF+uhchlGVS+sMPISTGhQLVGsoLhGVlsFtEWssshphllGhlAlllIllGhhLTuhpccpct..pp..pshp+ulhhLllSolGYhhYsll.sph......hslsGhsslLPQAIGMlluullhsh........chhhpKhohhNllsGlhWuhGNlhhlhSst..hsGlATuFSlSQhuVllSTlGGIhhLsEKKT++EhlhlhhGllLllluulhh .............................................................................sllssKh.GGpPhpQhlGsTlGA.L.lFu...l...l...l.h..l...h.....s..p.....s..s.....h...s...........sslllu....llS.Gh.h.Wu.hGQhtQh.+uh.......p...hlGV..SpsMPl.ST.G..hQL...Vu..soLaGVlhh...t..E...Ws..s.h.......t.....p....h....l.l.Ghl.A.l.l.l..l.l.l.Gs.h.h.T.u.hp-cppt...........ts.......stsh.++u.l.h....h.L....l.l.S....ol.....GY..h...s.Ysll.sph..................hs...l.....s..........G........h.s.A.lL.P...Q..u.l..G.........M..l...l.u.......u...llhsh.................pch..h.h...p....K...h...o...h.h...N...ll...s.Glh.Wu...h.GN....L.h....h.llStt...tsGl..A.Tu..F.sLSQh.u.Vl.lSTLGGIhhLsE.....+..K.....T.....+...+...p...h....h...h...l....l...l...Gl..l..LIll.uull.h........................................................................ 1 41 76 103 +6630 PF06802 DUF1231 Protein of unknown function (DUF1231) Moxon SJ anon Pfam-B_9856 (release 10.0) Family This family consists of several Orthopoxvirus specific proteins predominantly of around 340 residues in length. This family contains both B17 and B15 proteins, the function of which are unknown. 25.00 25.00 105.70 105.60 18.30 17.60 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.28 0.70 -5.56 3 51 2009-01-15 18:05:59 2003-08-05 13:11:15 6 1 18 0 0 39 0 300.00 94 99.99 CHANGED MSRKFMQVYEYDREQYLDEFIEDRYNDSFITSPEYYSAEKYMCRYTTLNHNCVNVRRCALDSKLLHDIITNCKIYNNIELVRATKFVYYLDLIKCNWVSKVGDSVLYPVIFITHTSTRNLDKVSVKTYKGVKVKKLNRCADHAIVINPFVKFKLTLPNKTSHAKVLVTFCKLKTDITPVEAPLPGNVLVYTFPDINKRIPGYIHVNIEGCIDGMIYINSSKFuCVLKLHRSMYRIPPFPIDICSCCSQYTNDDIEIPIHDLIKDVsIFKNKETVYYLKLNNKTIARFTYFNNIDTAITQEH-YVKIALGIVCKLMINNMHSIVGVNHSNTFVNCLLEDNV ..MSRKFMQVYEYDREQYLDEFIEDRYNDSFITSPEYYSAEKYMCRYTTLNHNCVNVRRCALDSKLLHDIITNCKIYNNIELVRATKFVYYLDLIKCNWVSKVGDSVLYPVIFITHTSTRNLDKVSVKTYKGVKVKKLNRCADHAIVINPFVKFKLTLPNKTSHAKVLVTFCKLRTDIT.IEAPLPGNVLVYTFPDINKRIPGYIHlNIEGCIDGMIYINSSKFACVLKLHRSMYRIPPFPIDICSCCSQYTNDDIEIPIHDLIKDVsIFKNKEhVYYLKLNNKTIARFTYFNNIDTAITQEHEYVKIA.LGIVCKLMINNMHSIVGVNHSNTFVNCLLEDNV. 0 0 0 0 +6631 PF06803 DUF1232 Protein of unknown function (DUF1232) Vella Briffa B anon Pfam-B_4265 (release 10.0) Family This family represents a conserved region of approximately 60 residues within a number of hypothetical bacterial and archaeal proteins of unknown function. 20.80 20.80 20.80 20.80 20.70 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.68 0.72 -4.51 172 1698 2009-01-15 18:05:59 2003-08-05 13:26:27 7 20 1207 0 565 1384 202 40.10 37 27.25 CHANGED htst.hh.lhuuLhYhlsPlDlIPD..hlsslGhlDDhsllshs .............h.hs+hh.lhuuLsYhl.P....lDlIPD..hlsslGalDDlsllsh........ 0 189 354 460 +6632 PF06804 Lipoprotein_18 NlpB/DapX lipoprotein Moxon SJ anon Pfam-B_10405 (release 10.0) Family This family consists of a number of bacterial lipoproteins often known as NlpB or DapX. This lipoprotein is detected in outer membrane vesicles in Escherichia coli and appears to be nonessential [1]. 20.00 20.00 20.20 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.65 0.70 -5.67 6 1056 2009-01-15 18:05:59 2003-08-05 14:05:53 6 3 1001 10 191 616 78 267.20 36 77.58 CHANGED sYLcossLcshpsP....tsuhPph.ssYsIPQsshpGulGKpVDIRPPpQsLpLIsGARs-p..sGEsoLhLlcp-cts.clWppVtchLp-+pIPlpppsDut..lpTDWVsWsptDE-hphtuRYpIShhEsssppuhpVsLlsh+csstlpssshos+pRYNshMhNllouphDpshR-pApcpApchVppIslshGoDcoGhPllIsRuPYNVlWQRLPslLPKhGFsIc-RspSQGTlpsKYtussDc.WpplGsKs....-LKoGsYph.lGDLGNRoSlshTDssG+slspu.hcuLVsVLpAlls .....................................................................................................................................................sYLpu.ssht.lp.sP...sGhh.h.Ph.......tss.........sYsI..P...s...s...p..s...p...Gu...lG...c.sl..D..I...R...P.P.......t..Q......s..LsL..l...s...Gu...R.sph.....sG-..su...l...l....l..c.....s.....s.....p.....s..s.....plW....spVhph....l....pt....+..s.hsl..pp..cs.Dus.....lpT.............D.......W..............l...........p.........W.s..............c........h.........D.............E................D.................p.......h.............c.u..R..Y..p..I....o...ht......p..shp..t..ulp..V....pLl....s.hc....p.........u..t.t........s....s...s...ss.....s..h...QR.....Y..............s...s...t...MhN.h.lsst...LDpst...s.c..s..........s..............ss.....t.......t.....s....c.......t..s...............s..s.....h..........sl....p...........u....u..D...c.....o......G....h......P.h...Ll.lRuPFshlWpRLPsAL-+lG.hcVsDpsRSQGshtV....p..Y........ps....h........s.......-.......u.........t......W......p.c........L..G...u.pc................ss.L..s..u.G..c........Y+l.pV..G...D...L....s..N..R..o.S.l.ph.lDscG+sLopupscslVuVhpAsh............................................................................ 0 31 84 141 +6633 PF06805 Lambda_tail_I Bacteriophage lambda tail assembly protein I Moxon SJ, Iyer LM, Burroughs AM, Aravind L anon Pfam-B_7725 (release 10.0) Family This family consists of tail assembly proteins from lambdoid and T1 phages and related prophages, e.g. the tail assembly protein I (TAPI). Members of this family contain a core ubiquitin fold domain [1]. The exact function of TAPI is not clear but it is not incorporated into the mature tail. Gene neighborhoods reveal that TAPI co-occurs with genes encoding the host-specificity protein TapJ, and TapK, which contains a JAB metallopeptidase fused to an NlpC/P60 peptidase. It is proposed that the TAPI protein is processed by the peptidase domains of TapK [1]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.60 0.72 -4.16 15 1136 2012-10-03 10:59:06 2003-08-05 14:19:07 7 4 504 0 59 868 33 71.30 56 35.56 CHANGED shsplpLtGslt......p+aG+htthsVpThuEulpALusplsuacphh.pth.....atlhhst+N.uccsl.t....ttshsGssI+IlPhstGu ...............................h.................................p..T..u..A..EAIRALu.hQl.PuFRppls..............................-GWYQVRIuG....cDsutstL.ApLp..Es..LssGuVIHIVPRlAGA.................................. 0 6 22 42 +6634 PF06806 DUF1233 Putative excisionase (DUF1233) Moxon SJ anon Pfam-B_9240 (release 10.0) Family This family consists of several putative phage excisionase proteins of around 80 residues in length. 21.30 21.30 21.40 21.60 21.10 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.33 0.72 -4.43 13 472 2009-01-15 18:05:59 2003-08-05 14:24:08 7 1 322 1 19 108 0 65.40 48 88.75 CHANGED VI.....ls.PscWVsEclLhAlTGLptusIp+ARcpuWhpG+EY++VuPDGpPcss.upshYNhtpIspWIcp....pP .........l..sscWlsEp.LhshpGl..pttsl.p.+h.Rcp.saMpG+EYKHlu.s-GpPtcp..S.ChYNhccIspWIEpQt..su............... 0 1 5 12 +6635 PF06807 Clp1 Pre-mRNA cleavage complex II protein Clp1 Moxon SJ anon Pfam-B_9787 (release 10.0) Family This family consists of several pre-mRNA cleavage complex II Clp1 (or HeaB) proteins. Six different protein factors are required in vitro for 3' end formation of mammalian pre-mRNAs by endonucleolytic cleavage and polyadenylation. Clp1 is a subunit of cleavage complex IIA, which is required for cleavage, but not for polyadenylation of pre-mRNA [1]. 28.90 28.90 28.90 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.34 0.71 -4.54 16 510 2009-09-11 15:53:51 2003-08-05 14:32:10 9 17 273 2 375 523 8 194.00 21 39.16 CHANGED GWVpGhGhclLlchlcsasss....hVlVl..sp.E+LhscLppthhsp......................lpllclsKutGVs...........................pRspchR+phR-ppIhpYFh.G.hpssh...hPashsspFsDl................pha+l.....hssssL.lshpspsssh+l......thslhppllAlSh............tpts-sll.......ssslsGFhhlptVDlp+chhslLoPsstp.L.PsshLlhus.lpa.cs ................................................................................................................................GWhps.GhphLhchlpthpss....hllhl.....s....p....c+..hh.s.pLp..p.ph..spp..............................................lp.llt..l.scs..sus.s......................................................................pp.s..pp..h.t...pp.hRc.plp..pYFa....u.........h...p...ts..h....................Phs...ts.s..h..ssl..................................................................p.acl........shssss.....l.....h.....h.p.t....p....s...s..tcl...............................sl.t.p.llulsh................................................................st..t.t.p..s..hh......................p.ssshGashlpsl...............D......h..p..+p...hhpl.LsPh..sh.....L.....s.p...hLl.hst.h...................................................................................... 0 144 216 312 +6636 PF06808 DctM DctM-like transporters Vella Briffa B, Bateman A anon Pfam-B_4075 (release 10.0) Family This family contains a diverse range of predicted transporter proteins. Including the DctM subunit of the bacterial and archaeal TRAP C4-dicarboxylate transport (Dct) system permease. In general, C4-dicarboxylate transport systems allow C4-dicarboxylates like succinate, fumarate, and malate to be taken up. TRAP C4-dicarboxylate carriers are secondary carriers that use an electrochemical H+ gradient as the driving force for transport. DctM is an integral membrane protein that is one of the constituents of TRAP carriers [1]. Note that many family members are hypothetical proteins. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.27 0.70 -5.92 18 7152 2012-10-02 15:12:49 2003-08-05 14:32:48 7 20 1871 0 2023 7864 11933 404.70 26 85.53 CHANGED lshhhlLhhhuhPluhulhlsul..hhhhhhsss...........sh..lhspphhs.......u....lsshsLhAlPhFlLhGslhppsGhuccllchssullG+h+GGhuhsslhuuslhuulSGSulAsssslGuhhlPhM++pGYssshAuAlhAuuushu.llPPshshllauh..............hsslSluslFhAGllPGll....................................hhlslhhsshhhAp.+psh.s............................tscsphtphhpuhhpuh...........................................uLhlsllllhulhhGh.....aTsTEAuuhulshu..Lhlullh..........h+chshcclhcs........Lhpsucss....usVhhllAuuulhuahlohsslshtlushlhulss..................st..hhhLllsslhhl..llGhhl-ssAsllIlsPlhhPlhtphGlDP................lahGlhllhshslGhhoPPVGhslaluuuIu..............p.sshhpsh+tllPalhshhssLlLlshlPtl ..................................................................................................................................................................hhhlhh.h..h....hu.h..P.l.u.h...u.l.shsuh.......hh..hh..h..h..ssh..............................................................sh...h.h..spp.h.hs.............................s....h.s.s.h.s...L.h.A....lP..hF...l...L...h.....G.....tl...hppuGlucclls.hs.tt.h.........hG..+....h.......G....G..Lu.....hs....s..l.l...u...shlh...uu.l.S...G....SusAsssuhG....sl....hlP....hM........h.+.t...G.Ystp.h.u.sulh...suuu..sl.G.....l..IP.....PS.hs..h.ll.auh............................hss..hS....l......u......pL........F....h....A..G...l....l...P....G..l..l..................................................................................................................................................h...s..h...s...h...h....h....h....s...h....h....h.....up......+.t...s...h..s.s................................................................................................t....p..h....s....h.....t........p....h......h......p....s.....h...h....p....u....h............................................................................................................................................................................................................s.L...h..l....l....l..l...l..G....u..l....h.s....Gl..........................hT.P...T...E....A.....u....u........l....u......s...h.hu........l.h..l.uh.h................................................................h+p.h..s...h....p..p...l..h.ps.................................................................lhp.s....s.pso........uh.l.....hhl..lus.............us....h.h.u.h....h.h..s..h.t.t.l..s.t.h.l...s....p.h.l.h....s.l.s..s.............................................s.........hh.h.L..l..l..l....l..h....h.l....llG.h..hh.....-h...s....s.....h..hl.l..h.s....P..lh.hP.l.h.....h....p....h......G.......l.D.s........................................................................................la.F..Glhhhh.s....ht.lu.h....l....TP...PlG.hs.l.F.s.s.s.u..lu.....................................p..hpht.p.lh.+.s....l.l.....P..a...h.h...s...h.h.h..s.Lhl.ls.hhPt..................................................................................................................................................... 0 616 1374 1743 +6637 PF06809 NPDC1 Neural proliferation differentiation control-1 protein (NPDC1) Moxon SJ anon Pfam-B_10407 (release 10.0) Family This family consists of several neural proliferation differentiation control-1 (NPDC1) proteins. NPDC1 plays a role in the control of neural cell proliferation and differentiation. It has been suggested that NPDC1 may be involved in the development of several secretion glands. This family also contains the C-terminal region of the C. elegans protein CAB-1 (Swiss:Q93249) which is known to interact with AEX-3 [2]. 19.20 19.20 19.30 27.80 18.70 19.10 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.26 0.70 -5.65 2 114 2009-01-15 18:05:59 2003-08-05 15:07:05 6 6 79 0 62 116 0 192.70 37 65.07 CHANGED MATPlPPPSPRHLRLLRLLLSGLlLGAALpGAsAt+PDsssCPGSLDCALKRRA+CPPGAHACGPCLQsFQEDQpGhCVPR..hppsPu.....sphEscI...........shLhQELA..cKEuGHS...s.PL.cstQ+L.EP.ATLGFSthGQtLE.GLPST.GTsoPhPHTSLuS.sSSsPVpMSPLEPpGtpGsGLsLVLILAFClAuuAALuVAuLCWCRLQREIRLTQKADY.ATAKuPsSPusPRISPGDQRLApSAEMYHYQHQRQQMLCLERHKEPPKEL-oASSDEENEDGDFTVYECPGLAPTGEMEVRNPLFDHusLSAPlPuPpS.PsL. ...................................................................................................................................................................................................................................................................................................................................................................................................................................h....hlhshhh.s.ss..h.Al.h....s.lshh....p.h..ppp.+.hstc.sDa...s.shus.s..uss.t.s.t......s.s.G.Dp+LApSApMYHYQHQ+QQhlshE.......ppp...p............sp..t..hp..s.s.pSD--NE-....GDaTVYECPGLAP....T..GEMEV+NPLF..cts........................................................ 0 21 27 45 +6638 PF06810 Phage_GP20 Phage minor structural protein GP20 Moxon SJ anon Pfam-B_8431 (release 10.0) Family This family consists of several phage minor structural protein GP20 sequences of around 180 residues in length. The function of this family is unknown. 31.00 31.00 31.50 31.20 30.40 30.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.97 0.71 -4.78 21 296 2009-01-15 18:05:59 2003-08-05 17:04:18 6 1 261 0 41 246 4 154.40 25 79.64 CHANGED pcplcplht.cupslt....Ks+hscs...pp....chcsl+pQlspRDpQlp-Lpc.pspDs--Lppcl-cLcppNc....-appclpphphssAlchALschsu+sscslhuhlspDplcLcc-s.lhGL--QIcsL+EScsYLFsppp....pPssuh.......sssssu .........................................................................................p.hptl.......t.tspplp......K...s...chsph.......pp.......phcslcpp.......lppt-cplccL+c.....p.....stss--L....ppplccLppcs...cp....ttchppchpphph..ssAlchALps..h.....pA...p...s...s...cs.lh.uh...l.D..h-...plclsc-u..lpGL--tlcsLK..cSc..s..aLFtppp...t..pPt.t...........sss............................................ 0 23 36 39 +6640 PF06812 ImpA-rel_N ImpA-rel_N-term; ImpA-related N-terminal Vella Briffa B anon Pfam-B_4308 (release 10.0) Family This family represents a conserved region located towards the N-terminal end of ImpA and related proteins. ImpA is an inner membrane protein, which has been suggested to be involved with proteins that are exported and associated with colony variations in Actinobacillus actinomycetemcomitans [1]. Note that many family members are hypothetical proteins. 20.60 20.60 21.00 20.60 19.80 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.68 0.72 -4.20 143 1962 2009-01-15 18:05:59 2003-08-05 17:19:59 7 7 932 0 298 1333 37 61.00 28 14.43 CHANGED DWstlpchutpLLtpcoKDL+lssaLspAhh.ctpGhsGlspGlpLLssllppaW.-slaPths ..........sWstlpchuhpLLtspuKDl+lssahhhuht.+ptGhsGhspGlpLLsthlpp.aW.ss.haPp..h................. 0 43 112 200 +6641 PF06813 Nodulin-like Nodulin-like Vella Briffa B anon Pfam-B_4440 (release 10.0) Family This family represents a conserved region within plant nodulin-like proteins. 24.70 24.70 24.80 24.90 24.60 24.60 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.63 0.70 -5.12 15 586 2012-10-03 03:33:39 2003-08-05 17:21:06 8 13 122 0 360 786 6 218.90 26 41.90 CHANGED +WhsllAulWlQusuGssasFu.sYSusLKSsLGhsQppLNhLuVApDLGcslGhlSGlssphhPsWsVLhlGAshshlGYGlhWLsVopplstLPhWhlhlhlsluuNSpsWhNTAsLVoCl+NFPtsRGlVlGlLKGYsGLSuAlaTplYpulhss....cssshLLllAllPslVsLsslahlRsp......spssppps-sthFhhhhhlulhlAsYLlshsllpphhshopuhhhshsulhl.lLLlsPLslPl ......................................................Wh.h.hhuuhhlt.hhsG.ss.YhFu........hYSstlK.s.t.h.s......h.sQppls..h.luh....hp.s.lG.t.s..h.G....h.h...u...Ghl......h.p......h......h.............s................s..hhllhlGu.h.hshh.G.Y...h...h...h..........a..L..s..........l...s......t......t...........h......s...................s........h.h.h........h.s...l.h.h.h...l.u..ss..u....t.s..ah...................s.Tu...sll.......osh....pNF..P..p..s..R..G.sslulh+uah.G.L....SuAlhop.lhp.s..hats...........s.s.ss...h...l.L...hlAhh....ss.h.....l....s.l..h.h.h.......h......h.l..+...h.............................ttt....tpt...hh....h......h........h.h...h...slh.hu..h.aLhhh..hht.t.........h.sp...h..h..hhh.h..hhlh.Phhls................................................................................................... 0 73 240 310 +6642 PF06814 Lung_7-TM_R Lung seven transmembrane receptor Vella Briffa B anon Pfam-B_4367 (release 10.0) Family This family represents a conserved region with eukaryotic lung seven transmembrane receptors and related proteins. 25.20 25.20 25.20 25.30 25.10 24.90 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.75 0.70 -5.46 15 836 2012-10-03 04:04:29 2003-08-05 17:23:36 8 8 290 0 523 853 12 251.00 26 55.43 CHANGED lscoGhYslhhh.sp.p......t.psshshpsplsa+NspGYLsAp-hPLh.hYshMslsYslhullWhahhh+ah+DllplQhaIsullhluhsEhsFaah-YthhNspGhs...spshslas.lhuuhKtolstlLlLllShGYGlV+PpLGshhp+lhhlulhhhlhuslhhlhppsuths-tp....thlhhhlPl..uhl.hhhlhWIFpSLsc.ThcpL+h.+RshsKLplYR+Ftsslshullhohsahhhp.lhh...pthsshpptW+.tWlhs.saW.cllshslLllIshLWRPopNs. ......................................................s.Y.lh..h..h.................................s..h....p...h.ph.p.h....p.h...ps........s.....h..u......a.Lsus....phP.h.............hYhh..hslhYh...lh.u.h................hWhh.h.h........h..p...h..h..p.............s......l..h.plphh.h...su...l...lh...hthlphh...hh.hh.pa...p..hs..............p.pG............................sps....hhlhh.lhphh+tshhhhllllluhGauh.lK.s..h......L...u.......p........t.............p....+...l.hh.lsh....h.lhusl....h.........l..........l...........h..........................s......................s..s.t...................................h.lhhhlsl............shh...hh....hh.W..............h....p..p.............L...pp...oh.p...p.h....+..................h....tp...p............hs..KLp.....la...............R+.ah.h.ll.h.hlh.....h..ohh...hhhh..h......................h...pWp....W.h....shh.ph.hhhhh....hh.hhh......hha..RPstss............................................................................................................................. 0 155 276 408 +6643 PF06815 RVT_connect rvt_connect; Reverse transcriptase connection domain Bateman A anon Bateman A Domain This domain is known as the connection domain. This domain lies between the thumb and palm domains [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.26 0.72 -4.14 47 20194 2009-01-15 18:05:59 2003-08-05 17:58:29 8 56 614 372 0 19630 0 90.20 85 13.88 CHANGED sYYcPpKsLhAclpKhGpsQWsYplhQ..pp+..sLKsGKau+t+ssHsNshcpLApslpKIucEuIVIWG+lP.pFcLPlp+Es..WEpWW.s-YWQsoWIP-hEFlsT ..............................................................VYYDPSKDL.....IAEIQKQGQGQWTYQIYQ...EPFK...NLKT.GKYARhRuAHTNDVKQLTEAVQKIuTESIVIWGK.TP.KF+LPIQKET..WE....................................... 2 0 0 0 +6644 PF06816 NOD NOD; NOD1; NOTCH protein Guo J, Studholme DJ anon Guo J Family NOTCH signalling plays a fundamental role during a great number of developmental processes in multicellular animals [1-2]. NOD and NODP represent a region present in many NOTCH proteins and NOTCH homologs in multiple species such as NOTCH2 and NOTCH3, LIN12, SC1 and TAN1. Role of NOD domain remains to be elucidated. 19.40 19.40 19.60 19.40 19.00 19.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.64 0.72 -4.88 26 289 2009-09-11 05:54:01 2003-08-06 11:46:38 8 241 92 8 132 222 0 56.10 43 2.77 CHANGED PtpLApGsLlllVh.lsP-ph..ppsussFLRcLSplL+TsVph+pDspGpsMlaPahGp ....PtpLApGsLVlVVL.hsP-pL..hpsut.sFLRp.LSplL+Tslph+pDupGp.MlaPYaG...... 0 23 35 81 +6645 PF06817 RVT_thumb rvt_thumb; Reverse transcriptase thumb domain Bateman A anon Bateman A Domain This domain is known as the thumb domain. It is composed of a four helix bundle [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.30 0.72 -4.40 41 66982 2009-01-15 18:05:59 2003-08-06 12:13:26 9 77 671 372 39 66594 2 64.00 90 13.78 CHANGED KWplQpIpLPcp-..phTVNDIQKLVGcLNWAu.Ql.YsG.I+sKpLCKLlRG.sKsLs-hVthTcEAchElpcN+ .......................KWTVQPIhLPEKD......SW.TVNDIQKLVGKLNWA......S.QI..YsG..IK..V+..QLCKLLRG.sK.ALTEVlP.LTcEAELELAEN................. 0 13 13 14 +6646 PF06818 Fez1 Fez1 Vella Briffa B anon Pfam-B_4593 (release 10.0) Family This family represents the eukaryotic Fez1 protein. Fez1 contains a leucine-zipper region with similarity to the DNA-binding domain of the cAMP-responsive activating-transcription factor 5 [1]. There is evidence that Fez1 inhibits cancer cell growth through regulation of mitosis, and that its alterations result in abnormal cell growth [2]. Note that some family members contain more than one copy of this region. 26.40 26.40 26.90 27.60 26.30 26.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.61 0.71 -4.30 8 300 2009-01-15 18:05:59 2003-08-06 14:17:55 10 3 66 0 170 232 1 152.80 38 31.58 CHANGED TKWEVCQKSGEISLLKQQLKESQAELuQKuuEIluLRuQLREuRupLpspEtphpcLc-uh+T+sLELElCEsELQR++sEAELLREKss+l-tElscL+-Ahuup..........t..s..h...............................ESDE...AKupcts......................tsshpsLRppl-RLRAELptERp+tEcQusuFEcER+lWQEEKEKVIRYQKQLQpsYlpMYpRNppLE+cLpp ...............................................................................................................................opWEVC...QKuGE..ISLLKQQL+-uQsElspKhuElluL+s..p.L+..-sRuphptp-tph.tL....pt.uh..........ps.ct.phc..tptph..................................p.......................................................................................................................tpt...st..t............................................................................................h.th...p.tphppLpsEL....ERpttcp.t.sFptERhsWptEK-+VlpYQ+pL.Q.sYlpMYpRNptLEptlp....................................................... 0 17 33 82 +6647 PF06819 Arc_PepC Archaeal Peptidase A24 C-terminal Domain Yeats C anon Yeats C Domain This region is of unknown function but is found in some archaeal Pfam:PF01478. It is predicted to be of mixed alpha/beta secondary structure by JPred. 21.20 21.20 21.20 62.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.18 0.72 -4.14 10 35 2009-01-15 18:05:59 2003-08-06 14:21:48 6 2 35 0 27 37 0 111.70 35 30.52 CHANGED Vh.phllhlhplpLIhtllpALTsttls.ccKpV-ELKEGDILt-hIhlpssG.Vhh-pushhcRhKphLcsEpscslp....ccllsssuEGLocEpIEcLKKLssEGKlpsEhpV .hl..phhlhlhtlpLl.hllhuLpsttls.-c+pl-ELKEGDILt-hIhhcsst.Vhh-psshhcRlKphlpstphps......tchllhssuEGLocEsIEhLK+LhpEGKlpsch.V..... 0 4 7 18 +6648 PF06820 Phage_fiber_C Tail_fib_C-term; Putative prophage tail fibre C-terminus Vella Briffa B anon Pfam-B_5030 (release 10.0) Family This family represents the C-terminus of a prophage tail fibre protein found mostly in E. coli. All family members contain a conserved RLGP motif. 25.00 25.00 26.60 25.90 22.70 21.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.07 0.72 -3.84 2 1275 2009-01-15 18:05:59 2003-08-06 14:42:52 7 14 118 0 2 871 0 62.30 64 34.03 CHANGED hphRLGPAsIlEosppGhhPtpD.tlITtlshL.stDppplQshhp.LQlhhuDG.WpsltGhc .........hphRL.GPAsIlEospNGhhP-pD.tlITtlshL.stDtpQlQslhR.LQlhhuDGsWpsltGhc................. 0 0 0 2 +6649 PF06821 Ser_hydrolase DUF1234; Serine hydrolase Vella Briffa B, Eberhardt R anon Pfam-B_4941 (release 10.0) Domain Members of this family have serine hydrolase activity. They contain a conserved serine hydrolase motif, GXSXG/A, where the serine is a putative nucleophile [1].\ This family has an alpha-beta hydrolase fold [2,3]. Eukaryotic members of this family have a conserved LXCXE motif, which binds to retinoblastomas. This motif is absent from prokaryotic members of this family [3]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.11 0.71 -4.66 24 1243 2012-10-03 11:45:05 2003-08-06 14:56:12 8 3 948 5 341 1631 220 168.30 26 89.49 CHANGED hLIVPGapsSus.sHWQoaWpcphssu.pRVcQt-W...ppPshs-WlstLppslsttsts.......sllVAHSLGClsss+hs.....hpttc..pVtGALLVAPsDs.............p...tstthtsFuslPpp.LsFPo.....llVAScNDPasshppApphApsWGuthlslGpuGHINs-SGaGsWPpGhhlLsp ......................................................................................................................................hlIl.G..h..p..u..s..ss...sH..W...sh.h....pc.p.l..st....t.......h........c...V.....p...........h.......sa...........pp.P...s....h....s....p........W.....h.........t..t....lp...p.......tl..s....t...t....p...p.s..........s.hlVuHSLGslssl+ah..............ttpp...pl.t.G.h.l..LVA.s.h.c..t............................................................t.....t..s.....h.h..p..s.h...s.....s....h..s...p...p..t..h..s.h.ss....................hl.....l..u.S.cNDsa.ss..h..p.p.u.p....pl..A.p.sh..s.....u.p..hhtl..spuG.Hhss.psGassastshth...p.......................................................................................................................................... 2 59 167 254 +6650 PF06822 DUF1235 Protein of unknown function (DUF1235) Vella Briffa B anon Pfam-B_4988 (release 10.0) Family This family contains a number of viral proteins of unknown function. 25.00 25.00 39.30 39.20 20.70 18.30 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.75 0.70 -5.42 11 73 2009-01-15 18:05:59 2003-08-06 15:05:26 7 1 38 0 0 65 0 226.30 54 91.54 CHANGED hIPaFs.s+I......+splhcLss.s.......hhpYht..HsphIlpEIc+al...s-plhssshlslphacpppshp...pspspshS+lllClpuAp+GGslllpsphss.p+pllp.sssphllLSPLucYsVopVp+GshlllslclsIPSMc....lhhhstpsl+asNslplLhPhpts-lsFsl+plhDhpsscllCEQllINpcWYTlls..sssp+lhlPSh..ChGpoh..chsaspss.cc-hlpp..llshpsPF-.hlhPp+sIYsuh.lt...E+llYG+l ...............hhPhFshSKI......pshl...sNss.......pphYhcsEHp+IIocEIsRpM...DEsVLLTN.ILSVEVVNcNEMY....HLIPHRLSpIILCISSl..GGCVISIDNDlN.sKNILTFPIDHAVIISPLuchsV..VsKGsshllllcsDIPShR....llsoh.sssIhYsNsLsLls.lshS..VFlIRploDhh.s++lC-QIFhNs+WYolIs....hssKpasIPSs..ClGhos...spYlssoh-pDhlh+..lhNl-hPFD.hla.+hpsYsSlsl+...EpILYG+I. 0 0 0 0 +6651 PF06823 DUF1236 Protein of unknown function (DUF1236) Vella Briffa B anon Pfam-B_5056 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function. Some family members contain more than one copy of the region represented by this family. 20.60 20.60 20.60 21.70 20.30 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.19 0.72 -4.25 57 317 2009-01-15 18:05:59 2003-08-06 15:12:02 7 9 104 0 164 322 7 64.40 33 37.89 CHANGED YVppp.l.....tshsh..ptclsVGsslPcs..Vplpsl..Psp.....s...sYpYslVN-.chVlV-PpoRcIVpllc ..............................hlppp.s...shth...phclsVGsslPcs...Vp..lpsl...Psp......s......sYpYslVs-.chVlV-PpT+clVpVlp... 0 22 74 102 +6652 PF06824 DUF1237 Protein of unknown function (DUF1237) Vella Briffa B anon Pfam-B_4981 (release 10.0) Domain This family contains a number of hypothetical proteins of about 450 residues in length. Their function is unknown, and most are bacterial. However, structurally this family is part of the 6 hairpin glycosidase superfamily, suggesting a glycosyl hydrolase function. 20.10 20.10 20.70 20.60 19.30 19.70 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.29 0.70 -5.87 51 1064 2012-10-03 02:33:51 2003-08-06 15:25:53 6 9 860 15 263 760 58 389.20 45 85.81 CHANGED D.-LupLF.cNsaPNTL.........DTTlca...............................................ttspspTFVlTGDIsAhWLRDSosQltsYlslsccDt...........pLppLltGsIspQschllhsPYsNAFpss.......sstput.t.sDps.........p..t................s.hVaEpKYElDSLsh.lpLutpaacpT.G-s........shhssp.WhpAlcpllclhcpppp.................................tpsh.ssYsFpRpTshuo-TLshsGhGsPls..TGLlpSuFRPSDDAolasahIP.uNhahsstLcphucl...ht.tthtsc...................LuppspphupclcpuIpcaGllp..........H.scaGclaAaEVDGaGuthhMDDANlPSLLuLP.hLGals.hcDs........................................................................................lYpNTR+hlLSpp.NPYahcGpthcGIGGPHlGhphsWPMSlllpuhTo.............sD-pEItpsLphlhsosuGhGlhHEShcssss..pcaTRsWFAWANohFuchllc ...........................................t..chsphF.ppsassTL.........-TTVch..................................................c-ssTFVlTGD.IPAMWLRDSoAQl.pPYL......hlAccDs....................pLpphItGllpRQhphlhhDPYANAFN.s.........sht....G.........ap..s.......DcT..............chs..........................s.hlWERKYElDSLCYPlpLAYhhW+pT...Gco......s.Fsp.....p..ahpAhcpILclaphEQc......................................t...t.p.SPYpF...R.....sTs.....p.......tpDTLspsGhGs....sssh..TG.MsWSuFRPSDDACpYuYLlP.SNhFAVVsLthlpEI....hps.lt....h.p.c..p.....................................lstcspcLtcEIppGIcpauhsp.............p..sph.......tcl.YAaEVDGhGs..t..l.MDDu.N.VP.SLL.uhP.YLGass..h-D.........................................................................................l..YQsTR+hlLSpc.......NPYaap..Gp.....hupGIGSsH....................os......h...pYl.WPluLuhp...GhTo.............pD....csEhcphLchLlsoDu.GTGhMHESFc..sssP.....spaTRpW.................FuWANhhFsELllc.................................................................................... 2 95 173 226 +6653 PF06825 HSBP1 Heat shock factor binding protein 1 Wood V, Studholme D anon Pfam-B_20266 (release 10.0) Family Heat shock factor binding protein 1 (HSBP1) appears to be a negative regulator of the heat shock response [1]. 24.00 24.00 24.00 24.00 23.90 23.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.72 0.72 -4.39 20 305 2009-01-15 18:05:59 2003-08-06 17:00:52 7 4 201 2 193 288 2 50.70 47 56.10 CHANGED sp-LosaVpsLLpQhQs+FpsMScpIls+ID-M.......................upRID-LEpols-L..hspsGs- ..........h.p-LTshVpsLLpQhQs+FQsMS-p...IlsR......lD-M.......................usRID-LE+sIsDLhspsGh.......... 0 60 94 143 +6654 PF06826 Asp-Al_Ex Predicted Permease Membrane Region Yeats C anon Yeats C Family This family represents five transmembrane helices that are normally found flanking (five either side) a pair of Pfam:PF02080 domains. This suggests that the paired regions form a ten helical structure, probably forming the pore, whereas the Pfam:PF02080) binds a ligand for export or regulation of the pore. Swiss:Q8L3K8 is described as a aspartate-alanine antiporter ([1]). In conjunction with Swiss:Q8L3K9 it forms a 'proton motive metabolic cycle catalysed by an aspartate-alanine exchange'. The general conservation of domain architecture in this family suggests that they are functional orthologues. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.21 0.71 -4.76 34 4439 2012-10-02 17:06:44 2003-08-06 17:46:47 7 8 1308 0 585 2491 77 168.70 31 61.72 CHANGED slslslslGhhlGplph......hulsLGsusGsLlsuLllGpht........hshshsthlpslGLslFlhslGlpuGssFhsulpps.Glphhhhulllsllshllshllu+..lhchshshssGhhuGuhTsosulusus-t.sp.shss............luYussYshGhlhhllhs.tllhh ....................lhlslslGlhlGpl.pl..........tulpLGhuhG.....sL.....hsulllGp....h.....t..h.hh..................hs.sh..p...h.lpp...hGlhLFlhsVGlpuGs.sFhs.s.ltps..G....h....phh.hhulllsh.lshlls.hll.u...+...lh........c....h....s....h....s..hhhG..hhuGuh.Tss.PuLuhusst........sp...s...s.hsu................................................................luYAhsYsluhlhhllsspllh.h................................. 0 157 335 485 +6655 PF06827 zf-FPG_IleRS Zinc finger found in FPG and IleRS Bateman A anon Bateman A Domain This zinc binding domain is found at the C-terminus of isoleucyl tRNA synthetase and the enzyme Formamidopyrimidine-DNA glycosylase EC:3.2.2.23. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.82 0.72 -4.26 143 6847 2012-10-03 10:42:43 2003-08-06 17:58:27 9 22 3732 62 1464 4839 1582 29.50 34 5.63 CHANGED uphC.R..Chph.hpchth..stctshhCs+Cpph ..........Gc.CtR..Ct..s.....h..lpclsh.....ss+sshhCscCQp..... 0 441 915 1228 +6658 PF06830 Root_cap Root cap Vella Briffa B anon Pfam-B_5867 (release 10.0) Family The cells at the periphery of the root cap are continuously sloughed off from the root into the mucilage, and are thought to be programmed to die [1].This family represents a conserved region approximately 60 residues in length within plant root cap proteins, which may be involved in the process. 20.80 20.80 22.50 22.40 18.60 18.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.73 0.72 -4.13 18 153 2009-09-10 23:47:36 2003-08-07 11:46:05 6 5 23 0 81 151 0 55.20 54 17.43 CHANGED uFKFasLSscVcGVLGQTYRsDYVNp.lcluusMPVMGGsccYtTSuLFusDCsVuRF ......sFKFa.sL..ospVcGVLGQTYRssY.ls....lcluss...MPlM.GGtscYts.SsLFusDCtVuRF...... 0 9 43 70 +6659 PF06831 H2TH Formamidopyrimidine-DNA glycosylase H2TH domain Bateman A anon Prosite Domain Formamidopyrimidine-DNA glycosylase (Fpg) is a DNA repair enzyme that excises oxidised purines from damaged DNA. This family is the central domain containing the DNA-binding helix-two turn-helix domain [1]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.00 0.72 -4.23 19 5357 2012-10-02 21:21:44 2003-08-07 11:51:56 9 25 3675 70 1277 3734 2235 91.20 32 32.59 CHANGED LGP-PL.........p..sh...............t.htptltc...p....+.....+slKshLLDQplluGlGNIYsDEsLFtutlcPpphAssLstpchphlhpslpplLpcAlphstsshph.s ...............................................LGP-PL.............sssh.ss.......................p.h..l..tp.t.l.tp......+......p.................ps.I..K.shLL..DQ.p.ll.....u...GlGNI.......YssEsLap...Atlp.Pp+.....ussL......o......t......t............c......h......p......t......LhpsltplltpulptGGoo....th...................................... 0 400 799 1061 +6660 PF06832 BiPBP_C Penicillin-Binding Protein C-terminus Family Yeats C anon Yeats C Family This conserved region of approximately 90 residues is found in a sub-group of bacterial Penicillin-Binding Proteins (PBPs). A variable length loop region separates this region from the transpeptidase unit (Pfam:PF00905). It is predicted by PROF to be an all beta fold. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.98 0.72 -4.06 111 1158 2012-10-03 16:25:20 2003-08-07 12:26:00 7 11 1137 0 250 947 46 88.70 30 12.10 CHANGED ststts..tts.ss.pIhhPscGuhltls.s..........t........ttplslcstuu.ps.........hhWhl.......sGp.l.spstptcphthts....tsGhapLol..lDssGpssp..lph ..............................ss.s.....ssp.hsLplsusp..-.Gu.hlphhsst.........s................pssLs.lpssGG.su........chaWaL...................NGcsl........s..pp.....s.c.slshph....tptGc..a....pLhV..hD-sGphss..Vph............... 0 69 147 199 +6661 PF06833 MdcE Malonate decarboxylase gamma subunit (MdcE) Moxon SJ anon Pfam-B_10907 (release 10.0) Family This family consists of several bacterial malonate decarboxylase gamma subunit proteins. Malonate decarboxylase of Klebsiella pneumoniae consists of four different subunits and catalyses the conversion of malonate plus H+ to acetate and CO2. The catalysis proceeds via acetyl and malonyl thioester residues with the phosphribosyl-dephospho-CoA prosthetic group of the acyl carrier protein (ACP) subunit. MdcD and E together probably function as malonyl-S-ACP decarboxylase [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.39 0.70 -5.16 9 323 2012-10-02 13:07:06 2003-08-07 12:47:24 6 2 311 0 94 1349 480 245.50 37 82.37 CHANGED shhsuLhss...tpuhsssstVlcGphst.sst.lsVlussN.......putlGl.EuhuLAtsV.-sl....ppts+pPIlsllDssSQthuRR-EhLGIppsLAthupuhshARhuGHslIGLlhGcAhSGAFLAaGhtAscLlALs..Gs.l+sMshsuhARVTphoVEtLcsLAtosPshA.sl-NYsphGhlpthhssppsps.uss.hs.lpp..tt..tDt........pRhsGtpRtupphspclhccsh .....................................................................hhsuL...h.st.......hpuh..s.s..ss...p....lh..sG..p..h..st...ss+hluVV...ss...sss.h..................RstpGEVG..L.......E..G..as..L..A.......ps..V..-s.l....stcpss..KRPIlsllDVsSQAYGRREEhLGIapALAsuucuYApA.Rh.AG..H..PlIGLlsG+Ah...SG...A.F...L...A...H...G...h.Q...AsRLlALss..Gshl+sMuKtuA.A......R.V..........T.h..R.........o..........V..........-.......pL......E.....p....L....Ap.o.lPshAYs.lcNY........sp.L........GhlpplLs..s......s..s.c.s.s.s.s..st..l.s..h.lppslsp.shtDh................pchtutpRtupthspchhpt..................................................................................... 0 12 40 65 +6662 PF06834 TraU TraU protein Moxon SJ anon Pfam-B_10708 (release 10.0) Family This family consists of several bacterial TraU proteins. TraU appears to be more essential to conjugal DNA transfer than to assembly of pilus filaments [1]. 25.00 25.00 29.50 25.80 23.40 24.10 hmmbuild --amino -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.02 0.70 -5.29 58 720 2009-01-15 18:05:59 2003-08-07 12:50:46 6 3 485 0 138 610 39 276.00 32 88.84 CHANGED uphlss..hsssshsCl.shplsGhsh.....................hhhC.ss.sshh+sulpluaapPstlVsshpssts.Phltstplshu.....................shsp.tttsssppspssh....sFhpschhuaPhhhhh.shlss...........................................................................................................hsC.ssssh....halSplDsh.Wpssh..thl.....PEAll.Gt.htl..uussuss...................saGslYPhsGh.sspssshpuuullupRhsshlp...R..pshlat..shus................................uhshh..hstPlhc...pppa+aQhlhPhssss......Ctsasposhh.t...spphssss.ssauahlWRhhsCCtt ......................u.thhsslsshthsCl.shpluulph....................hhhC..ss...s.hhh+hultluaapP.tllsshpsPGs.s.lt..hthhshs.............................tstt.s.ssptppcpssh....sFhpschataPh.h.h..hl.shhss.............................................................................................................................hsC.tsush...........lhYLSElDPh.Wpss...h....sthlt............PEAllhus..hs.uAssu-s....................................................has.ts.Gs..hYPhsGa..s.....spssshpuusllupRhsthlp....R...Qshlhp..shut.s......................................uhCh....hssPlh.....+ppa+aQhltPhssss..............................C.pshs.c...oshhht.........sps.s.....s...s.....t....psauahlWRhhsCC........................................................... 0 30 71 111 +6663 PF06835 LptC DUF1239; Lipopolysaccharide-assembly, LptC-related Moxon SJ anon Pfam-B_11065 (release 10.0) Family This family consists of several related groups of proteins one of which is the LptC family. LptC is involved in lipopolysaccharide-assembly on the outer membrane of Gram-negative organisms. The lipopolysaccharide component of the outer bacterial membrane is transported form its source of origin to the outer membrane by a set of proteins constituting a transport machinery that is made up of LptA, LptB, LptC, LptD, LptE. LptC is located on the inner membrane side of the intermembrane space. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.79 0.71 -4.87 64 2204 2012-10-01 21:43:16 2003-08-07 12:52:59 8 16 2061 1 556 1696 1443 166.70 19 80.55 CHANGED hllhhllsuhh.hhhhptpsst.........sthpspps-ahspshpsppaspsGp.hpaclpuschc+as.ssst.........shhspPslhhapssp....................WplpuppuplsptsphlpLhssVtlpphtspsp..............hpplpTsphplphpsphhpo-psVplpsssh..thsGhGhcuslcspphpLhspV+...sp.a- ..........................................................................................h..hhh.lhsh...hhh...tt.................ptpp.ssh...h.......pp..hph.h........h.s....p.Gt...h.....pap.lhu.p...c.h.p.hhs...sps.h..........sh.h.p.p...P...h...h..pha.spsp..............................ssh.p.lpAccu..p.lss.....s.p..hlh...L.hu.s.Vplpsh...ttp..............................htplpTc.p...hplshp...s..p...p.l..p..o...-..c......Vp..l..h...s...ss..h......ph......s.G.h.t.hc..ssh..ps..p.phplh.p.pVps.............................................................................................. 1 161 349 464 +6664 PF06836 DUF1240 Protein of unknown function (DUF1240) Moxon SJ anon Pfam-B_11130 (release 10.0) Family This family consists of a number of hypothetical putative membrane proteins which seem to be specific to Yersinia pestis. The function of this family is unknown. 22.00 22.00 22.00 22.00 21.50 21.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.17 0.72 -3.77 16 192 2009-01-15 18:05:59 2003-08-07 12:54:50 7 2 47 0 51 115 0 92.90 45 71.30 CHANGED DcIpFSusVhIhhFSsPLlhYhhshulashIpN+hPKapcphs+hLshlAlhShllSFPlSFYVcYKLKupuYlVCsRISWMSPNsYVKDlpLCc .............DhIhFShtlsl.hlhusPLLhYh.hhuh.ahhI........hN+.........s........KhN...s.hlsphLs...hlAl.luhllSh.hShYlsYpL+ptGYlsC.sRh...S.WMS..P....NpYVKDlpLCc............ 1 0 22 26 +6665 PF06837 Fijivirus_P9-2 Fijivirus P9-2 protein Moxon SJ anon Pfam-B_11357 (release 10.0) Family This family consists of several Fijivirus specific P9-2 proteins from Rice black streaked dwarf virus (RBSDV) and Fiji disease virus. The function of this family is unknown. 25.00 25.00 30.60 189.80 24.20 22.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.39 0.70 -5.20 3 20 2009-01-15 18:05:59 2003-08-07 12:58:00 6 1 6 0 0 20 0 206.50 68 99.93 CHANGED MDs.ppSV.hDoYTFpCPFEL...AKIclcuhpNsMp-VoNFsslF-hshSDSElDD+VDsLElcVEcsssPLl+RtYGKVG+IlahIlSFLFFGIFKLVLKhFYHLF+CVhCNPLoRsllSIIFTIlFYhhLhVsIYLLaaFFGDsIIpslNsLNp.cSsNFlNSTQshsuKVEEpVtKIIQsspLlFuppDpNslQpN-KussVsNGsTsNaTLFN MNP.QSSVNlDTYTFNCPFEL...AKIQIEShKPIMQDFSNFD-IFERsLSDSEIDDRVEpLElDVEuKVDPlVRR+YGKlGHIIlMIISFVFFGIFKLTLKMFYHLFRCVCCNPLIRGIhSIlFTILFYhLlhVsIYhVYaFFGDQI.tsYNoLsphcsSshINSTp.....VEEKVcNIIH-GSLFFGohDpsTGplpElEpQVsNGGTVNYTLFN. 0 0 0 0 +6666 PF06838 Met_gamma_lyase Alum_res; Methionine gamma-lyase Vella Briffa B, Haft D anon Pfam-B_5307 (release 10.0) Family This is a putative pyridoxal 5'-phosphate-dependent methionine gamma-lyase enzyme involved in methionine catabolism. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 405 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.14 0.70 -6.17 14 999 2012-10-02 18:26:03 2003-08-07 13:00:36 6 5 922 20 186 1314 419 385.50 52 96.17 CHANGED h.plhcpAEtslhshFcplDchschNQh+VLpAFpccRlS-pHFssoTGYGYDDlGRDTL-pVYAcVFGAEuALVRPQhVSGTHAIusALFGlLRPGDELLhlsGpPYDTLEEVIGl+Gp.stGSL+-FGIsY+pVsLptpGclDapslppulp..spTKlltIQRSpGYuhRPShoIs-ItchIphVKplNPslIlFVDNCYGEFlEppEPscVGADLhAGSLIKNPGGGIApsGGYlAGKp-hlEtuuYRLTuPGIGpEsGAohsphp.haQGLFLAPHVVuEAlKGAhhsAtlh-clGasspPpasspRoDlIQulpFsscEKhIAFCpuIQtuSPIsualpP.PusMPGYEDcVIMAAGTFIQGSSIELSADGPlRtPYhuYlQGGLTasHVKlAlhtAlsplh ....................................................................................................h.tlhpclEppltshaccl-ch.s.hNQhKVLpAFpcp+lo..-.pcht.s.o.T.G.YG.Ys...Dh...G...R...Dp....LEcl...YAp..lFtu.E.s.AL.VRPQllSGTHAIuhALhulL...RP...GDELlh...lT...G.pPYD.TL..pEVIG.l............p.Gp..uh....GS.L.t.E.aG.lsYcp..lsLp..-t..Gpl.Dh-sltpsl.p..p.p.T.KllsIQRS+GYu.pRsShsls.............cIcchIs..hl..K.plp...P....s.lllFVDNCYGE.FlEppEPscl.G.AD....LhAGSLIKNP.GG.Gl.A..s.GG.Y.I.s..G.+.c.-.Ll.Et.s...u..Y.RLTuPGIGpEsG..A..oL.s..s.h..p..p..h..YQ........Gh....FLAP....+.V...VupAlKGAlFs..A..t..h..hE.chGh.p.ss...Ppast...R.oDlIQsVpFpstEphIuFCpuIQtuSPlsuahsP.PshMPGYED-VIMAAGTFIQGSSIELSADGPIRsPYtsYlQG.GLTYpHsKlAlhpAlpphh................................................................................................................... 0 77 135 161 +6667 PF06839 zf-GRF GRF zinc finger Bateman A anon Bateman A Domain This presumed zinc binding domain is found in a variety of DNA-binding proteins. It seems likely that this domain is involved in nucleic acid binding. It is named GRF after three conserved residues in the centre of the alignment of the domain. This zinc finger may be related to Pfam:PF01396. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.49 0.72 -4.03 14 1335 2012-10-03 10:42:43 2003-08-07 13:01:28 7 90 280 0 771 1259 16 45.30 32 10.05 CHANGED shCs.CG.phshhhssp.sG.NpGRpFYpCPhsc......tCsFFpWsDps ............hCp.C....u..p..s..h..h.hs.sp.p.s..u..sNp....GRp.........FasC..stspt..................ttC..s.FFpWt-........................ 0 155 290 523 +6668 PF06840 DUF1241 Protein of unknown function (DUF1241) Moxon SJ anon Pfam-B_11380 (release 10.0) Family This family consists of several programmed cell death 10 protein (PDCD10 or TFAR15) sequences. The function of this family is unknown. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.92 0.71 -4.97 6 159 2009-01-15 18:05:59 2003-08-07 13:03:01 6 3 106 19 95 131 2 130.10 42 55.91 CHANGED hs--ss.lsuh........sL.llhcPlhscLE+.cchs.....AsQpL+sAhhKuEppsPGhohDllssIlc+t..-lsVNhsEolLRhtutss-....EYphsRpEscFpELs+KAtsLKpILS+IPDEIsDR+sFLETIK-IASAIKcLLDAVNpVachlPs....hosKpAlEc .............s.......hsSh........sL.slhhPlFscLE+..hshu......AAQoLRuAhhKAEppsPGlTpDllhpIlc+t..slplNhsEolL.Rhtusts-....Eahl.pRsE.tFQ-Lsc+uhuLKpILS+IPDEIsDRhpFLpTIK-IASAIKcLLDsVNplhp...hh.......ps+pAlEp................. 0 34 42 70 +6669 PF06841 Phage_T4_gp19 T4_Gp19; T4-like virus tail tube protein gp19 Moxon SJ anon Pfam-B_11507 (release 10.0) Family This family consists of several tail tube protein gp19 sequences from the T4-like viruses [1,2]. This famiyl also contains bacterial members which suggest lateral transfer of genes. 19.20 19.20 19.30 19.20 18.70 19.10 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.57 0.71 -4.53 97 689 2009-01-15 18:05:59 2003-08-07 13:07:47 7 4 303 0 272 645 2092 139.90 19 81.40 CHANGED ssFpFtVp.h..............ssht..................suFpcloGLshchpshpa+c..Guss......hhh....+hPGthcassloL+RGls...p......spp..lhpWhp......pshpsth........................++slslpLhscsu.p...................l.h.sWplhpAaPs+h...........su.sslsA....susplAlEolcLsa-thp ...........................................................h.F.lp.h..................ss.........................hth..p.pssuhp......hp......hps.hphtp...Gsts............hhh......phs.G...p.hp.a.s.s.l.oLcpslsp........spt.hh.pWhp.............pstssth..p.....................................ccshslpl....h.spss..p.........................................sl.h..paplhsuaP.sch...........ss.ssLsu....sssp.l.shpslplsacth................ 1 94 202 251 +6670 PF06842 DUF1242 Protein of unknown function (DUF1242) Moxon SJ anon Pfam-B_11544 (release 10.0) Family This family consists of a number of eukaryotic proteins of around 72 residues in length. The function of this family is unknown. 25.00 25.00 25.60 28.20 24.70 23.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.70 0.72 -4.45 32 317 2009-01-15 18:05:59 2003-08-07 13:10:49 7 9 230 0 201 288 0 35.50 49 38.14 CHANGED LLhVlLLlICTCoYl+thhPullDp....s+.sGhhGhFW .....LLsVlLLlICTCsYl+phhPullDp..............s+.sGhhGlFW.......... 0 63 105 161 +6672 PF06844 DUF1244 Protein of unknown function (DUF1244) Moxon SJ anon Pfam-B_11743 (release 10.0) Family This family consists of several short bacterial proteins of around 100 residues in length. The function of this family is unknown. 21.20 21.20 22.70 21.80 20.00 18.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.31 0.72 -4.15 69 402 2009-01-15 18:05:59 2003-08-07 13:15:05 6 6 397 4 128 343 734 67.20 61 61.82 CHANGED VQNIDLMNLAGFCRNCLS+WYp-AAp-pGlplsc-pAREhlYGMPYs-WKupaQscAosEQhAuFcts ...........VQNIDLMsLAGFCRNCLS+WYpsAA-cpG.l-lshD-AREtVYGMPYsEWKupaQscAosEQhAAFct.p............................ 0 37 73 98 +6675 PF06847 Arc_PepC_II Archaeal Peptidase A24 C-terminus Type II Yeats C anon Yeats C Domain This region is of unknown function but is found in some archaeal Pfam:PF01478. It is predicted to be of mixed alpha/beta secondary structure by Prof. 26.20 26.20 28.10 26.90 26.00 19.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.23 0.72 -3.56 21 99 2009-01-15 18:05:59 2003-08-07 13:22:34 6 2 90 2 63 95 11 90.10 24 34.07 CHANGED hhKchlhhhhGh+hclp................p....hc.cpthhLsptp-.tt.h...hh.hpsh.psssphptplpcatcc....cp..lWVTPGlPFllsIshGalluhlhGDh.l .............................................................................h......h.h.u..hpht.................t....ht.t.hh.hhptct......................h...h.htshtp.sspphpcthpphscc........cp..lWVoPGlPallslhlGallullhGDhh..... 0 15 39 53 +6676 PF06848 Disaggr_repeat Disaggregatase related repeat Moxon SJ, Mistry J, Adindla S anon Pfam-B_11958 (release 10.0) Repeat This family consists of several repeats which seem to be specific to the Methanosarcina archaea species and are often found in multiple copies in disaggregatase proteins. Members of this family are also found in single copies in several hypothetical proteins. This repeat is also known as DNRLRE repeat and is predicted form a mainly beta-strand structure with two alpha-helices [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. It is found in some cell surface proteins. 27.60 27.60 28.30 27.70 27.30 27.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.13 0.71 -4.86 16 49 2009-01-15 18:05:59 2003-08-07 13:26:16 6 15 13 0 31 50 4 168.90 48 32.12 CHANGED DNRLREuSP-sVap-osaIDlGuhsssG+YRDlhhF-LScYsssspIss....ATLSLYWYYPsspsRPEDTVlElYRPAuuWNPsYVSWNpRDcslsWpNsGGDWYDKNGVLQGSTPYATlTlKGSsLPDNRYYELDVTDLVKEYVSGKYENTGFLIKARoEssNYIAFYSs-sssEsQcPKLsl ....................DNRLREuSP-sVap.sosaIDVGuhsss.+Y...RDlhhFDLSpas..s.ssclss....AsLSLYWYYPs..up..sRscDTllElYRPAou.....WsssYVoWNp+DpslAWpNsGGDWYD+sGlhQGsTPYAolTl+GSpLP.D.N+YYElDVT-LVpEYlSG+YEN...TGFLIKuRsE.ss.NYIA........FYSs-sssEsQhP+Lpl............................................................................... 0 13 14 14 +6677 PF06849 DUF1246 Protein of unknown function (DUF1246) Vella Briffa B anon Pfam-B_5448 (release 10.0) Family This family represents the N-terminus of a number of hypothetical archaeal proteins of unknown function. 25.00 25.00 113.00 112.20 21.60 20.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.55 0.71 -4.35 43 187 2009-01-15 18:05:59 2003-08-07 13:58:34 7 1 109 19 117 182 116 124.90 42 35.14 CHANGED IuTluSHSALpIhcGAKcEGF+TlslCpcuR.-phYpcFs.............hsD-hlll-cap-lh..chtpcLhcpNuIllPHuSalsYlGh-plEs.htVPhFGNRplLRWEs-Rstcpp...LLccAGI+hP+tac .IuTluSHSALpIhcGAKcEGFcTlslsp.+sR.cthYpcF..............hhD-hlll-pas-ll..chtccLhcpNuIllPptSFlsYl....Gh-plEs.hpVPhFGNRplLRWE.-Rstcpp...LLccAGlchP+ha... 0 29 58 87 +6678 PF06850 PHB_depo_C PHB_depo_C-term; PHB de-polymerase C-terminus Vella Briffa B anon Pfam-B_5697 (release 10.0) Family This family represents the C-terminus of bacterial poly(3-hydroxybutyrate) (PHB) de-polymerase. This degrades PHB granules to oligomers and monomers of 3-hydroxy-butyric acid. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.16 0.71 -5.10 11 544 2012-10-03 11:45:05 2003-08-07 14:31:15 6 2 381 0 215 658 140 200.80 49 46.95 CHANGED GGPIDsRtsPTsVNpLApc+sh-WFcpNlIhpVPhsYPGtGRcVYPGFLQLuGFlSMNhDRHlpAHhDhatcLV+GDG-pA-+HpcFYDEYLAVMDhoAEFYLpTVcpVFpcatLPpGchtpcGphVDhssIpcsALhTVEGENDDISGlGQTpAApcLCsuIPcs+KtaahQsuVGHYGVFsGpRaRppIhPplt-FIcchs ....................................................................GGPI.DsRtsPTsVNpLAp...p+shsWFcpNlIppVPhsaPGtGR+V..YPGFLQhsuFhuMN.-RH.h.puHh-happL.lcGDs.s.sA-tHRcFYDEYh.A.VhDhsAE...aYL-TlcpVFQcatLsp.Gphph...c.....G......c.....h.....VcPssI.+csALhTlEGEpDDIoGsGQTcAApcLCoulPssp+p+ahtsssGHYG.lFsGpR.WRppIhPtlccFIpp.s.............................. 0 42 108 152 +6679 PF06851 DUF1247 Protein of unknown function (DUF1247) Vella Briffa B anon Pfam-B_5762 (release 10.0) Family This family contains a number of hypothetical viral proteins of unknown function approximately 200 residues long. 25.00 25.00 29.90 194.50 19.10 18.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.89 0.71 -4.55 17 44 2009-01-15 18:05:59 2003-08-07 14:34:31 6 1 39 0 0 43 0 148.50 51 72.01 CHANGED LGDVlQpMGRp.shLLtc..KKD--FcIspph-LS-ps+-YLNhLQpEKLapCRLCYp+sDph..RC-FH++YlFspstchphD-YVpFLNS-MGllSaVELYYsYLus...ss.W+hsAphhL+cLTsFpSlp-LLsaYNYphspDsDsssaEhMD LGDVlQpMGRp.phLLpc..KKD.--FcIsEph-LS-ps+-YLNhLQpEKLapCRLCYp+scsh..RC-FHKKYlFccshchtsD-YVpFLNS-MGlISaVELYYoYLus....ss.W+hsAphhL+-LTuFpSlpcLLsaYNYshssDsDpssaEhMD. 0 0 0 0 +6680 PF06852 DUF1248 Protein of unknown function (DUF1248) Vella Briffa B anon Pfam-B_5811 (release 10.0) Family This family represents a conserved region within a number of proteins of unknown function that seem to be specific to C. elegans. Note that some family members contain more than one copy of this region. 21.80 21.80 21.90 22.70 21.40 21.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.10 0.71 -5.00 10 63 2012-10-02 22:59:21 2003-08-07 14:38:07 7 2 5 0 60 63 1 176.10 26 53.84 CHANGED p-slDllhNPs-chlDsaMKhaGNpRhsFK+EDIupW+cSFsDtY+htlhsLKGTs+lItosHslpF+PLsss.c.psahalGhuWIcP-YRG+ssh+lh-shsppct+s.psDNhlApssphupsFW+KhpG...+sDhGH...plhYlSaYchpDhplP-cL-hsG...IsVKNAREVPc+DIlcYDpol ..........................h.tsl-llhNPspchhDpah+...hh.Gp..p.R.hsF+psDlphWppuFcc.YphhhsshKs...............T..........s.....cllussphhpapslp...s.c.tshhhhGhhahsP-YRupsh.h.+.lhsphhhc...hpp.s.ssN.ssu.p.sss...phtp..happhhG...tpchuc....hhYhShYchs-lhlPcs.LshsG...lhlKss.p-VstcDllpYDpsl................................................................. 0 18 22 60 +6681 PF06853 DUF1249 Protein of unknown function (DUF1249) Moxon SJ anon Pfam-B_11475 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. 25.00 25.00 31.30 31.00 19.20 19.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.54 0.71 -4.48 50 853 2009-01-15 18:05:59 2003-08-07 15:20:06 7 1 849 0 120 359 264 119.90 59 83.45 CHANGED h+Lh+LLPstcp..sp...shphphs......................shph......plcllEso+YToh.............................................lclspptsts..........hhssPplpVRlYHDA+hAEVlsspphp+lcshYsYPNtpMaphDEKhQlNtFLu-WLpaCLcpG.....pph.tsl .......................QLRRLLP+sDu..sGE.....oVuYQVu............................sspY....RLTIlESTRYTTL.............................................VpIc...QTsPulo.........aWSLPSMoVRLYHDAhVAEVCSSQQ.Ia.RFKARYDY.P.NKKLHQRDEKHQINQFLADWLRaCLAHGAhA..lss....... 0 23 49 86 +6682 PF06854 Phage_Gp15 Bacteriophage Gp15 protein Moxon SJ anon Pfam-B_11759 (release 10.0) Family This family consists of bacteriophage Gp15 proteins and related bacterial sequences. The function of this family is unknown 19.90 19.90 20.90 20.70 18.70 19.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.38 0.71 -4.75 9 188 2009-01-15 18:05:59 2003-08-07 15:23:09 6 1 166 0 19 164 6 160.00 27 87.85 CHANGED pLs-slhchapacsp-YplDLSFcsVL+la-lhcDcplosspKsplslclLh...........ttphhph.cctsplhlcIhppaIsh-p..c-tlphDlcGN.MP.......ptp..pc+hhsappDA-aIaASFhQsYpIsLlcppsK......................LpWhcFpALLsuLs-sThhppIIpIRphEhsp..ptstcERpplhKLKstYpL ..................................................................................................L...h.p.h.htsttatlshsFppslphhplhp.Dp.plst..+h.hhlphhh.......................sh..tp.t...hhh.....hhp...phl.ht....t......c...ss.h.....................t....ppthhshp.Duch..IYuuFhptY.sIcL.hcp.p..sc......................hpWhcFpALhpuLs.-c...T.hppIltIRsh-.sp....ttspc...c+cphpchpphYtL.......................................................... 1 12 17 19 +6683 PF06855 DUF1250 Protein of unknown function (DUF1250) Moxon SJ anon Pfam-B_11942 (release 10.0) Family This family consists of several short hypothetical bacterial proteins of around 70 residues in length. Members of this family seem to all belong to the order Bacillales or Lactobacillales. The function of this family is unknown. 21.10 21.10 21.50 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.19 0.72 -4.44 47 1637 2009-01-15 18:05:59 2003-08-07 15:27:40 7 2 984 4 102 381 1 46.20 35 64.48 CHANGED hAspsapDpsFPKpspcacclSsYLEh.susah.shssFDcuaptY. ..LA-hhhcDtuFPKpscc.....acplpsYlcp.ss.....shp.shsshDchaphY.................... 0 20 45 79 +6684 PF06856 DUF1251 Protein of unknown function (DUF1251) Moxon SJ anon Pfam-B_12000 (release 10.0) Family This family consists of the N-terminal region of several hypothetical Nucleopolyhedrovirus proteins of unknown function. 19.80 19.80 20.30 23.70 18.00 17.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.72 0.71 -4.61 20 50 2009-09-10 22:02:51 2003-08-07 15:30:53 6 1 41 0 0 50 1 118.40 34 54.34 CHANGED chslssKs+sshh+a.................KVplplss..sscalQATFsstpcpVslVN.psppc.IlFDGFsc..-DEutTsPFlV.GsLhul....pspp.hthpVRchscAhEs.pTlL+lFlNEAhl..psshss ................hslss+s+pshh+a.h...............+Vhlpl-u...sspalQATFpst..pcpVslVN.psppp.lhFDGFsc...-DEupThP..Fll.usLpsl.....pssp..shcV+-hscAhEp.sThL+lFlNEAhl..tsph.s............. 1 0 0 0 +6685 PF06857 ACP MdcD; Malonate decarboxylase delta subunit (MdcD) Moxon SJ anon Pfam-B_12010 (release 10.0) Family This family consists of several bacterial malonate decarboxylase delta subunit (MdcD) proteins. Malonate decarboxylase of Klebsiella pneumoniae consists of four different subunits and catalyses the conversion of malonate plus H+ to acetate and CO2. The catalysis proceeds via acetyl and malonyl thioester residues with the phosphribosyl-dephospho-CoA prosthetic group of the acyl carrier protein (ACP) subunit. MdcC is the (apo) ACP subunit [1]. The family also contains the CitD family of citrate lyase acyl carrier proteins. 25.10 25.10 26.20 26.00 22.90 22.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.80 0.72 -4.07 40 1378 2009-01-15 18:05:59 2003-08-07 15:34:27 6 3 1149 0 198 624 11 86.00 40 84.14 CHANGED cIpcsAlAGTLESSDl.VplsPu.s.sslplplpSs..Vt+QFGppIcpllpcsLpphuVpssplplsDKGALDCVl+ARlpsAltRAsc .........cIppsAlAGTLESuDlhIpltPhps..p......s...........l..clplsSS..VpcQFGctI+pslh-sLs..+hsVpusplslcDKGALDCll+ARlpsultRAu.t.......... 0 44 96 143 +6686 PF06858 NOG1 Nucleolar GTP-binding protein 1 (NOG1) Vella Briffa B anon Pfam-B_5853 (release 10.0) Family This family represents a conserved region of approximately 60 residues in length within nucleolar GTP-binding protein 1 (NOG1). In S. cerevisiae, the NOG1 gene has been shown to be essential for cell viability, suggesting that NOG1 may play an important role in nucleolar functions [1]. Family members include eukaryotic, bacterial and archaeal proteins. 20.90 20.90 20.90 22.30 20.70 20.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.76 0.72 -4.09 25 503 2012-10-05 12:31:09 2003-08-07 15:51:30 9 15 386 1 340 554 80 57.60 51 10.12 CHANGED sIEhQAIsALsHLpuslLFlhDhScpCGY...olcpQhcLacpI+slF.spPlllVlNKhD ............sIEMQuITAL.AHLRus.lLYhhDlSEQCGa...olcpQl.pL.Fcs.I+PLF....s....N.K...P....lllVhNKsD........... 0 115 195 278 +6687 PF06859 Bin3 Bicoid-interacting protein 3 (Bin3) Vella Briffa B anon Pfam-B_5564 (release 10.0) Family This family represents a conserved region of approximately 120 residues within eukaryotic Bicoid-interacting protein 3 (Bin3). Bin3, which shows similarity to a number of protein methyltransferases that modify RNA-binding proteins, interacts with Bicoid, which itself directs pattern formation in the early Drosophila embryo. The interaction might allow Bicoid to switch between its dual roles in transcription and translation [1]. Note that family members contain a conserved HLN motif. 21.00 21.00 21.20 21.10 20.90 20.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.38 0.72 -4.07 5 294 2012-10-10 17:06:42 2003-08-07 16:17:37 7 11 155 6 192 292 6 98.80 41 22.77 CHANGED cFDVILCLSVTKWVHLNWGD-GL++hFRRIYppL+PGGlLILEPQuWDSYcKR+KlSEslppNYpsIcl+PDcFpcaLlsscVGFpohE.LsulssusSKGF.cRPIhlFpK ............paDllLCLSlTKWlHLNaGDpGLpphFp+laphL+s...........G..GhLllEPQsWpsYp...+t....c....p........h...s............c.......p...h....h....pp....a.plp......h...hPp...p..F.t....phLht....l.GFt................ht................s.....h.pR.l.hh............................................................ 0 71 95 146 +6689 PF06861 BALF1 BALF1 protein Moxon SJ anon Pfam-B_12069 (release 10.0) Family This family consists of several BALF1 proteins which seem to be specific to the Lymphocryptoviruses. BALF1, inhibits the antiapoptotic activity of EBV BHRF1 and of KSBcl-2 [1]. 25.00 25.00 27.50 27.10 21.10 20.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.05 0.71 -4.86 2 16 2009-01-15 18:05:59 2003-08-07 16:34:37 6 1 13 0 0 16 0 171.80 39 88.56 CHANGED M.....TDsVF.cs......APs.ss-DchscushLhhRsMhAsahpD..pGLsh.thlhhRLIKt.hKK-cKhaA-lsspsus.sshHuHlphlhohhRAlY-DHhD.W.RlRslhshsVsaAhRNh.sDpEsAuhlLsuhAcaLsLYRRhWhuRhGGh.huLRRtFPlpWhhhtls..hhp. .............................................clucoS.lhh+shaAVho.pD..c-.LslsttVLscLlKtSl++sh+las-.Lssps..uchuuccs+lphlhsllRtsYsDphD.apRLpssLsYsslahshsh.sDpcssuhVhsslA+ahshaRphWhuRlGGhspuLR+pFPspWshspLp.aLpp.h...... 0 0 0 0 +6690 PF06862 DUF1253 Protein of unknown function (DUF1253) Vella Briffa B anon Pfam-B_6227 (release 10.0) Family This family represents the C-terminal portion (approximately 500 residues) of several hypothetical eukaryotic proteins of unknown function. 19.10 19.10 19.10 19.10 19.00 19.00 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.50 0.70 -6.08 36 368 2012-10-05 12:31:09 2003-08-08 10:01:07 7 4 289 0 255 543 41 379.60 36 61.37 CHANGED KoRs+llKNsp+Lpcptp................t-.-hRDQGFTRPKVLIllPhRssAhchVph.Llplhss...............pphcN+p+FpcpF.......ppptss............ppK.....PcD....................applFpGNsD.DhF+l.....GlKh..o+Ksl+LY......SsFYsSDIIlASPLGLchllps...........................................................sp......KKc-hDFLSSIEllllDpschl.MQNWpHltplhcalNphPpc.t+s.sDFSRlRhWhlsspA+haRQTllhosa.sPphNSlhsppstNhpG+l+hpshhpt.....................................uslsp.........lslpl+QhFpR..hc........usSlhsssDsRFcaFsssllPplh........sshtsssLIalPSYhDalRlRNYh+..........ppsloFusIsEYosppclsRuRphFhpG+tplLLhTERhHaa+RYplKGl+pllFYu.Pp.PpFYsEll.phlstost...............hc.s.ssspslYSKaDuhpLERIVGscRAsphlpup.pc...sapF ..........................................................................shll+sstpltt.t.t.....................p.-hRDQGhTRPKV.LlllPhRpsshchVph.lhp..lh.s.......................p.ps+pRF.ppas...........tts...............................+.....PpD..........................apt.lF..t.G..N...D...DhF+l.....Glph............s++o.......l+la......utF..YsSDIllASPLGL+hhltt................................................................ttt...........+cp-hDF.L.S....SIEllllDpA-hh.hMQ.NW-Hl.....hlh.p.plNh.Ppc.....tHs..sDhuRl.R..WhLss.u+ahRQTllhosh..s.sp.hsulhs.p....h...sh.....NhtGplphps..t........................................................usltp.........lhl.pl....QhFpR..hc.........sps.hh.p..s.D..s..RFpaF...sp.....pl..l.Pphh...........ss..stsLIa..lPSYhDaVR...lRNa.hp..........ppplsFstIsE......YoptpplsRARphFhpG.c..tphLLhTERhHaa+RYp.l+...G..l+pllaYthPp.PtFYsElh.sh.lttstt.....................................tthsspsla.o+aDshpLpplVGspRstphhpsp..tssa.F............................................................ 2 101 152 218 +6691 PF06863 DUF1254 Protein of unknown function (DUF1254) Vella Briffa B anon Pfam-B_5911 (release 10.0) Domain This family represents a conserved region about 130 residues long within hypothetical proteins of unknown function. Family members include eukaryotic, bacterial and archaeal proteins. 22.20 22.20 22.20 22.30 22.10 21.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.93 0.71 -4.31 340 950 2012-10-01 20:00:45 2003-08-08 10:59:30 7 11 469 9 332 860 184 127.90 25 28.84 CHANGED hNphhphp.thhs.tp..pslls...sNsDTlYohuhlDL.pssPlllplP.s.h.s.Rah.shtlhDhappsh.....................h.....st........ststuGp.ah..l.ssPsa...........................................ts.p.......................hh...cusTshshllhRshs......ps.ssD..hss.l.....pt.lQcthp..lts ..............................sph.hhp.th.sssp...pslss.sNsDTlYohualDL.sssPlllplP.s..h.t.RahshthhDhappsh................sshst..........stspuGp..al.l.ssPsa........................pG.h...........hh.hhcssTphshlhhRshh......ps.spD...hts.s.pt.lpcthplh................................................................ 0 80 165 252 +6692 PF06864 PAP_PilO Pilin accessory protein (PilO) Moxon SJ anon Pfam-B_12430 (release 10.0) Family This family consists of several enterobacterial PilO proteins. The function of PilO is unknown although it has been suggested that it is a cytoplasmic protein in the absence of other Pil proteins, but PilO protein is translocated to the outer membrane in the presence of other Pil proteins. Alternatively, PilO protein may form a complex with other Pil protein(s). PilO has been predicted to function as a component of the pilin transport apparatus and thin-pilus basal body [1]. This family does not seem to be related to Pfam:PF04350. 22.50 22.50 22.70 25.40 20.70 22.40 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.40 0.70 -6.12 9 257 2009-01-15 18:05:59 2003-08-08 11:01:29 7 1 201 0 33 208 3 375.10 26 91.58 CHANGED salAGLcWpshsp.tp+ssRphu+s.........tsAshhluhphtspptpuphphsuhls....h.httpphaSLAhhhLshhtssGYGIapLspt....pahFlAsssGl.ushuDlVGotsplhpAhppFLshN.sPpp............uWpshusspassshpohstsLSs.....pthRts+LstVtps.phhhhullhlLhuhhh.ua.happ..scsthhststphtActph..ptss.s.....ls.PWAshP.hssFLptChslhpslPVolAGWRhshucCss-G....lRhpYpthsGuTlscFupRlp-la..sppPsFsLs-GuppGslhlPhshp.sppshpsEslPssusQl.+hsSahQphplplslsElpss.shst.Dtps....hP...W+EYsFphpTplsPctlhs..phs-sGlRhsSlshpLps.GpFpYphcGpl..YAp ................................................................................................................ahssLpWp.hst....t....p......pp.....t+.t..........shphhh........ttp..tphhh.suhlt....................thpth..aSLAhhhts...hp...s.....h..hu.........l..apl...s-t.......chhalAsh.sG..t....sh..u..D.lsGs.pp-l.t..pth....ph.ahshs...t.s.t..............tW.p.lh......p....s...c.hs..s..s...p....p..h...t.t..Lss.....................tp..+hs.....pLs.....l....t.p...ptph..hhs..ul....l...h.h..l..s...s..s.u.hs.s.a...a..p.p.p..t...c...s..h..h...s...t...stph.t.Actph.t....pp.s..p.s.s........ls..P.Wss.PshssFl.cuCss...h...h.pt.P...lult.G.W+hstupCssps.....hphhYpp..tsu...uTh..t..sF.t....pscpla......sshPshsl.sG.p..usl.shsl.....s.shs.ht.D-sls.ssp..l.clhoh...hQphpl..p......t.l..s.....E...h....s.........s......s....hss.stps.............P..pW+paphsh.p........osls.P.p.tlh...hhp....t.sGlRlpplthplpt..uphpap.pGplYup.......................................................................................................................... 0 10 18 24 +6693 PF06865 DUF1255 Protein of unknown function (DUF1255) Moxon SJ anon Pfam-B_12498 (release 10.0) Domain This family consists of several conserved hypothetical bacterial proteins of around 95 residues in length. The function of this family is unknown 25.00 25.00 38.90 38.10 21.90 21.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.04 0.72 -3.72 85 1071 2012-10-10 13:59:34 2003-08-08 11:06:28 6 2 1050 2 221 522 128 93.60 55 95.56 CHANGED hhcsNhYFDGpVpShuhphs-hscpTlGVMhPG-.....YpFuTutsEhMplluGshpVpLPGps.....cWpsasuG-sFpVsANSsFpl+Vp...psosYlCpYh .....MlpuNpYFsGKVKSIGFspussG+ASVGVMs.GE.....YTFuTupPEcMTVlSGALpVhLP.sss.....-WpsapAGpsFsVPGpScFcLpVs...E.sTuYLC+Yl................ 0 53 122 175 +6694 PF06866 DUF1256 Protein of unknown function (DUF1256) Moxon SJ anon Pfam-B_12377 (release 10.0) Family This family consists of several uncharacterised bacterial proteins which seem to be specific to the orders Clostridia and Bacillales. Family members are typically around 180 residues in length. The function of this family is unknown. These proteins are related to peptidase family M63 and so may be peptidases. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.94 0.71 -5.05 13 439 2012-10-01 20:58:29 2003-08-08 11:11:13 6 1 326 0 117 354 6 163.40 45 84.31 CHANGED hlphs-tpAhhpltshLppal.......scclVllCIGTDRssGDuLGPLVGphLKphphs.hpVYGTLc-PVHAlNlccplccIcKcapsPaIIAIDACLGcs..pslGcI.lcstPlcPGpuVsKpLPsVGDlSIsGIVNlos.hEah...lLpssRLphVhchAcsIupu .......................h..hpp..u.tplssh...Lhshl..h........ppsl.lllC..IGTDRSTGDuLGP......LV.Gop.L.......cp.....h...t.h...s..p...h...pV.aGTL...-c.....P....VH..........A..........h..........N...Lc.............-...plppIpp....pas.ssaIIAlDA.CLGch..ps..l..G.p.IplupGPl+PGuuVsKcLPsVGDlpIsGIVNls..Gh.hE..ah...VLQNTRLslVMcMA-lIup.u......................... 1 58 95 104 +6696 PF06868 DUF1257 Protein of unknown function (DUF1257) Vella Briffa B anon Pfam-B_5975 (release 10.0) Family This family contains hypothetical proteins of unknown function that are approximately 120 residues long. Family members include eukaryotic and bacterial proteins. 21.00 21.00 21.90 21.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.24 0.72 -4.14 40 175 2009-01-15 18:05:59 2003-08-08 11:32:11 6 1 109 0 65 168 172 102.10 35 80.65 CHANGED pALpDLGhphcp.sp.ptVRGY..cGQT.hpA-lsl...p.ssuhDIGFpWNusp..YELVsDLphWp..Qslsl-cFLsploQ+YAhpolLspospp.....GFplsEpppspDGSIcLV .........................uLpDLGl...shcp...tp..tsVRGY..pGQp..hpA-lll....p.s....ssYDlGFphNuss..Y-Llu.D..hWp....pphshppFl..splsQ+YAhpplLscsppp.....GapssppppttsGshpl................................ 0 19 47 63 +6697 PF06869 DUF1258 Protein of unknown function (DUF1258) Vella Briffa B anon Pfam-B_6065 (release 10.0) Family This family represents a conserved region approximately 260 residues long within a number of hypothetical proteins of unknown function that seem to be specific to C. elegans. Note that this family contains a number of conserved cysteine and histidine residues. 26.50 26.50 27.30 26.90 26.30 25.40 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.70 0.70 -5.30 3 40 2009-09-13 14:59:23 2003-08-08 11:37:06 7 5 6 0 40 38 0 189.30 29 31.56 CHANGED AEEARKLWpYsKNNFSTHsYCNtCGK.VLuspcKCNhCssuPVATFVRIGuFSQI+ELVEoYlD-ILEIREQLKsGRNl-HNLuSPFFS+aWcsESpNHL+LSTVlSIDGV+ISGNKKKLWPVSLlLVDLPoGLMQKSTNlILEGIVECSENPSTsLWNALIPhIhSDVEuHoGRV+NITFoC+ITTCSADQPAKRAFFGF+uHSSuhSCFFCLSPETLYK+GGssRKEpRPGaLTIlDScNGcNGFosKsSKIV.HVl ............................................................t.Ch....tp.t.t.......s..............s....t............t.hhpa..hRhshhtQl.pllptah.cIhpl+cpLppupphpHsLsu.ahp.chht..p.E..pt.ptL.plohl.hulDGlpl...tG.s.p....pKlWPlohhllDLPsu.MQ+ssslllpulhEsppsPSThlWNtlhshlhsDhptt.ttlts.hphphhIho.hsuDQ.....P................A.....+R..shauh+uHpup.SC.ashs.tThhKhts................................................................... 0 4 13 40 +6698 PF06870 RNA_pol_I_A49 A49-like RNA polymerase I associated factor Finn RD anon Pfam-B_20222 (release 10.0) Family Saccharomyces cerevisiae A49 is a specific subunit associated with RNA polymerase I (Pol I) in eukaryotes. Pol I maintains transcription activities in A49 deletion mutants. However, such mutants are deficient in transcription activity at low temperatures. Deletion analysis of the fusion yeast homolog indicate that only the C-terminal two thirds are required for function. Transcript analysis has demonstrated that A49 is maximising transcription of ribosomal DNA [1]. 20.10 20.10 20.20 22.00 19.00 20.00 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.15 0.70 -5.89 24 337 2009-01-15 18:05:59 2003-08-08 12:05:22 7 10 258 10 242 330 1 321.60 20 82.87 CHANGED psspsscFsLYKcc..........p..pschllpGE.s-+LcYsGps.ssss.ps......cYhVGlaD.popplclapA.hhsh...........cssscsccshct.thcp.sss+shppR.suLGEAFGT+K......uK+AlsshccN+lc........u-pLpcsthcls-sltpsstshsspp-h.p.sssss..+PhP.sNl-AsslE-lYsl-sIIspcEashlc.lsshltppcspccLp.hhP..sp.upaVtc+L............................................tpLtptpsh.....c+lplLaYlSlLlslhpp.........RplppKpsLhp+ht.....sP-lllsslLc+FThsp.....pstphphhI..ssppc-KLlsYllslhLHlc.sFhV-lssLu+-Lslcss+lhpla+slGsplKt.s.sphpthulscussssaKlAoL+lPh..+hPchpcct...+R .............................................t..............................ptphlltup.ptplpahupp.t..t.t...............pah...lulhs.pstphp.lh.s...thh.h........................................t...ttp....t..........t.....t.ttp.htt+....ptLhpsF.GopK......t+.+sl...ps.hp..Nt....ls...............................sp....t....h.....p.....t.....s....t..thh........p....s...h............p.s...p...t..s.......s..........pph.t......t.pts.....p.lP.hshsAs..psp-lY.hcs................ll..s..t....t.....h.p..h..lt........t..h......tt.tt...tptl..........t...spalh....ppl...............................................pt.l.........t...........tp......t...........pp......hph...lhalph..Llthhtt.......................................+ph..+pt..hhtt..............................hPchl.ptlhppFss........................p....hh...stp.tshlhsahhslsLhl............s....sa...t.s...-...hssLtp..-L..p..hp...pphhphh+tlGsplpt..................................tt.tthc..hupLtlPh....hPt..tptt................................................................... 2 81 127 193 +6699 PF06871 TraH_2 DUF1259; TraH_2 Moxon SJ anon Pfam-B_13298 (release 10.0) Domain This family consists of several TraH proteins which seem to be specific to Agrobacterium and Rhizobium species. This protein is thought to be involved in conjugal transfer but its function is unknown. This family does not appear to be related to Pfam:PF06122. 26.30 26.30 27.30 31.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.22 0.70 -4.88 8 33 2012-10-03 00:09:25 2003-08-08 12:48:21 6 1 21 0 11 38 0 194.50 55 98.47 CHANGED MLDAALIKcCADPSLKPAIVEQFlsuAGSsDPLAVTV+SGsRllLVPKsTTsDEAMAlIRQalGpslVRVGLTQaPAGVGVp-Au-LKPDLVDACENlRhGTAhFAKVhRIVsKWYGNPTucDVhPQlF-DAlhAWKTG.FEGluVFpA-DPGsussspssstss-cs.pscsouc.ssustssspsups.s-ss+AGIRIDLStIGu .MlDAALIcpCADPuLKPAIVEQFlttAGSsDPLAVTV+SGsRllLVPKspTsDEAhuLlRp.lG+s.lVRVGlTQaPAG.lGlh-AupLK.sDLVDsCcNlRhGTALFAKVhRIVsKWYGNPTsp-VhPQlF-DAIhAWpTGhFEGsuVF+A-DPGsss....hspssstpptps.tppsssp.tsus..s..sstsups.sDsspAGIRIDLStIGu......................................................... 0 1 4 8 +6700 PF06872 EspG EspG protein Moxon SJ anon Pfam-B_13549 (release 10.0) Family This family consists of several EspG like proteins from Citrobacter rodentium and Escherichia coli. EspG is secreted by the type III secretory system and is translocated into host epithelial cells. EspG is homologous with Shigella flexneri protein VirA and can rescue invasion in a Shigella virA mutant, indicating that these proteins are functionally equivalent in Shigella. EspG plays an accessory but as yet undefined role in EPEC virulence that may involve intestinal colonisation [1]. 25.00 25.00 69.10 69.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.04 0.70 -6.12 4 134 2009-01-15 18:05:59 2003-08-08 12:57:48 6 1 111 10 1 57 0 370.00 72 98.16 CHANGED cShhhNuhpNsSA.hsL-uslcVsSshcpsWs-hohAE+LLKlLohGlasPcYotsERpphpcLLolLcPlhPtsNEhG+VtApFSDGSSLRISVTpSE.IEsplpTP-s-Kl.olhLEuNEQNpLLpSL.PlshHMPYIpsH+uLsph-lsstpuM+pLlsFsuKLSTolIPasspTcPLSGsTPFsSlahDThRGL.....GNoKlSlNGV-IPtcAQtLLpstLGLKDTtSSPspNlIppGIshcpAppIlpcSpssp-.....QKAhlsshLCpPEhsoAICSAFYQSFsVPAhhLpH.RIphASpa.upRSLshP..............NAsl...sIuISpSSsGulaVoSpsGshIMAPc-RsNtlGhhT.RTSYEVP.Gs+Cp.sEhsRslpP..+YuuSEsY.pN .......MINGLNN..sSASLVLDAAh+VNSsFKKsWs-MSCAEKLLKVLSFGLWNPTYoRSERQoFQELLTVLEPVhPhPNELGRVpApFSDGSSLRISVTNSELlEAEIRTs-NEKI.olLLESNEQNRLLQSL.PIshHMPYIQVHRALSEMDLTDssSMRNLLuFTSKLSTTLI.PHNsQTDPLSGPTPFSSIFMDThRGL.....GNAKLSLNGVDIPssAQKLLRDALGLKDTHSSPsRNVIspGISRHcAEQIARESSGSDc.....QKAEVVEFLCHPEAATAICSAFYQSFNVPALoLTHERISpASEYNuERSLDsP..............NACI...NISISQ.SSDGsIYVoSHTGlLIMAPEDRPNEhGMLTNRTSYEVPQGVKCpIDEMVpsLQP..RYuASETYLpN........ 0 0 0 1 +6701 PF06873 SerH Cell surface immobilisation antigen SerH Moxon SJ anon Pfam-B_13151 (release 10.0) Family This family consists of several cell surface immobilisation antigen SerH proteins which seem to be specific to Tetrahymena thermophila. The SerH locus of Tetrahymena thermophila is one of several paralogous loci with genes encoding variants of the major cell surface protein known as the immobilisation antigen (i-ag) [1]. 22.00 22.00 23.60 22.10 21.50 21.20 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.07 0.70 -13.87 0.70 -5.85 11 200 2009-01-15 18:05:59 2003-08-08 13:01:24 6 9 2 0 192 207 0 156.00 17 99.89 CHANGED MpsKsLlICLll.......pphhlSVhSshsGssVsCss..ssssCssosssssPshtG.....CSW...........sGsssssCtIsDCsClsss...ssoGLTDhFCpSCtuss......ssaANsAGoACVuoSuSCssspps.....sWssuDCsLCsPsTPAhsut.......uCsACSu.....hoSuaTDusCsACuo...............suoshspslFANoAGouCVAuSASCsSsSRuss...AWTsuDCthCsPso......Phhsus.p.sssoSCsusossToGhTDupCNuCu...................ssuSssspslFANsAGSuCVAoSAoCsouspuss...sWTsuDChhCsPsTPuh...hu..ssooClA......Csuhoos.WTDAsCsuCh.sASsssps...............lFAsusGSuCVAuohSCNtosRuSNpWTDuDCALCNG............................Tuss.uNQYASuDGSSCQuTp......sSuThSuphhlShLLlhSuLLI ....................s..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 192 192 192 +6702 PF06874 FBPase_2 Firmicut_FBPase; Firmicute fructose-1,6-bisphosphatase Moxon SJ anon Pfam-B_13194 (release 10.0) Family This family consists of several bacterial fructose-1,6-bisphosphatase proteins (EC:3.1.3.11) which seem to be specific to phylum Firmicutes. Fructose-1,6-bisphosphatase (FBPase) is a well known enzyme involved in gluconeogenesis [1]. This family does not seem to be structurally related to Pfam:PF00316. 21.50 21.50 21.60 21.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 641 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.95 0.70 -6.48 33 868 2012-10-02 19:15:56 2003-08-08 13:13:26 6 1 816 0 91 645 36 621.00 51 98.14 CHANGED h+YLcLLScpaPTlspsuTEIINLpAILNLPKGTEHFlSDlHGEYEAFpHVL+NGSGsl+cKIcDlFu.ssLscsEKppLATLIYYPc-KL-ll+p..ppc.slc-WY+hT.....LhRL....Icls+hsuSKYTRSKVRKALPc-FuYIIEEL.Lacpsctss.KcpYYspIIpoIIclscA--FIlAluplIQRLVVDHLHIVGDIYDRGPts-hIMDpLh...pYHSlDIQWGNHDlLWMGAAuGscsClANVlRIusRYuNLshlEDuYGINLhPLAsFAhchYp.csPC.sFpPKh...st.phsppEhphls+hHKAIuIIQFKLEuplI+R+PEFcM-cRhLLc+Isa-cuTIsL.sG+pYtLpDTsFPTlDPpsPYcLTpEEc-ll-+LhtSFhsSEKLp+HhcFLhsKGuhYLhYNuNLLaHGCIPLsEDGshcphpI..tGcpYpG+pLLDhh-phlRcuah.pcspppcchupDhhWYLWsGtsSPLFGKccMTTFERYFIpDKpTHpEpKNPYYpLR-cEclCcpILcEFGLss....cpuHIINGHsPVKsppGEsPIKANGKllVIDGGFSKAYQppTGIAGYTLlYNSYGhpLVoHpPFpSpccAIpctpDIlSsphllEpsspR+pVpDTDlGpcLppQIpDLctLLtAY+pG ................................+YLcLLuppasohpchuTEIINLEAILpLPKGTEHFlSDlHGEYEAFpHVLRNGSGsl+pKIp-lFt.spLsppEhs-LssLlYYPE-KLpLlcp....ppp..p..h.psWYhhTlp+L....Iclh+hsSSKYTRSKVRKALPcpasYIIEEL.Lacss.c.h..ps.Kc.sYYppIlpplIplppA--a...IluLuhhIQRLllDHLHlVGDIYDRGPtPDpIMDpLh...sY.H..S..lDIQW..GNHDlLWhGAhuGScsClANllRIsARYsNL-llEDuY.GINLRPLhsaAtchYp...s-s...tFpPKht..tpp..phsppE.p.ls+hHpAIulIQFKLEstlIcRRP-FcM-cRllL-KIsa-pssIs.l.pGp.......pYsLp....DosF.Tl.....s.....s....csPhcLstEEc-lhs+LhhSFppSEKLp+HhpFLhpKGShYLsYNuNLLhHGCIPlsEsGphcshpl..p.G.c.p.Y.sG+pLLDhh-hhlRcua.....s...pt........p....p....pc....chusDhlWYLWsGchSsLFGKctMoTFERYFIs..D......KtoH+EcKNPYYp.L..R..-.ct..p......hsc+ILcEFGLss....-puHIINGHTPVKphcGEsPIKAsGKhlVIDGGFSKAYQppTGIAGYTLlYNSaGhQLVuHpsFsupEcslpps.tDhhSh+pll-p.phpRphl+DTshGccLppQIptLchLhcsh..t...................................................................... 0 39 65 82 +6703 PF06875 PRF Plethodontid receptivity factor PRF Moxon SJ anon Pfam-B_13241 (release 10.0) Family This family consists of several plethodontid receptivity factor (PRF) proteins which seem to be specific to Plethodon jordani (Jordan's salamander). PRF is a courtship pheromone produced by males increase female receptivity [1]. 19.40 19.40 19.50 19.70 19.20 18.90 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.30 0.70 -5.12 2 260 2012-10-02 01:28:15 2003-08-08 13:18:57 6 1 58 0 45 239 0 204.80 59 94.64 CHANGED RSTSLLTFLVVSLSTATSLAMAEINDVADLSSDTIVhFSEVQKFAEDIQSSADSLLPTYLSFQGAPLSDPDYQLPHIKVsNLPTAAMDYDTFh+QTDETRLpNNLYFYSAIVEFLKEAMTEQEDLNPAELuLKAKFEEAMANSNTLISKISDIMTQMGMSVTITLPKPLVVPFcGSAYFpKKLRGGVVCKEYKERVhLTKRDF.hLAcKYQG.L ............................................RSTuLLTFLVVSlSoATSLsM......A......-hs......DVA-LSpDTIVLFSEsQKFAEclQSsADSLLsTYLSFQGAPLSDPDYp....LP+.IK.V.s.NLPTAsM.DYDTFhpQTDEsRLpNNLYFYSAIVEFL+tAMTEQE.DLNPAELuLKAKFEEAMANSNTLISKISsIMTQMGMS...V.....TITLP..c..PL.V..VP..F...c.G.SAhFpKKLRGGVVCKEYKERVhLTKRDFphLAcKYQG................................................................. 2 1 4 13 +6704 PF06876 SCRL Plant self-incompatibility response (SCRL) protein Moxon SJ anon Pfam-B_13253 (release 10.0) Family This family consists of several Plant self-incompatibility response (SCRL) proteins. The male component of the self-incompatibility response in Brassica has been shown to be encoded by the S locus cysteine-rich gene (SCR). SCR is related, at the sequence level, to the pollen coat protein (PCP) gene family whose members encode small, cysteine-rich proteins located in the proteo-lipidic surface layer (tryphine) of Brassica pollen grains [1]. 21.80 21.80 21.80 22.00 21.50 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.14 0.72 -4.01 43 154 2012-10-01 23:31:40 2003-08-08 13:22:27 7 1 12 1 49 160 0 65.60 27 86.40 CHANGED Fll....SHsQ.-VEAshh........C..tppsasG.pCussGscp............Chpphpp.....hcpcshpCpCs......tthpsp+hCsC..ph ......................hp.-VEA.shhpt......C...tp..tt.hs.G.pCu..ssGs.p.p............Chpthpp.......hpppshpCpCp.........t.pppphCpCp.h.............. 0 25 26 28 +6705 PF06877 RraB DUF1260; Regulator of ribonuclease activity B Moxon SJ, Bateman A, Eberhardt R anon Pfam-B_13601 (release 10.0) Domain This family of proteins regulate mRNA abundance by binding to RNaseE and inhibiting its endonucleolytic activity [1-2]. A subset of these proteins are predicted to function as immunity proteins [3]. 23.30 23.30 23.30 24.80 23.10 22.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.79 0.72 -3.38 67 1287 2012-09-25 12:41:39 2003-08-08 13:24:37 6 5 1101 1 189 635 25 103.70 42 67.55 CHANGED h.pppspcllptLhcsGs.-.st.a.lEaahhhsc.cphcchutchhptGac.lps.tp.............-c.ss..shathphstphhhshctIsptpppl.plApchssp.YDGWGs .............Qc-ETR.IIcpLLEDGS.DP-uLYsIEHHlss.cDh-sLEKAAV....-AFKhGYE.Vs-sEEh.........EsE-Gc....hlhssD.lhs.Ess...Lss-hIDuQV-pLhsLAEKassp....YDGWGT................... 0 32 79 135 +6706 PF06878 Pkip-1 Pkip-1 protein Moxon SJ anon Pfam-B_13784 (release 10.0) Family This family consists of several Pkip-1 proteins which seem to be specific to Nucleopolyhedroviruses. The function of this family is unknown although it has been found that Pkip-1 is not essential for virus replication in cell culture or by in vivo intrahaemocoelic injection [1]. 25.00 25.00 116.10 115.90 24.30 19.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.69 0.71 -4.36 19 40 2009-09-14 12:00:35 2003-08-08 13:28:33 6 1 39 0 0 37 0 162.40 33 93.32 CHANGED stIcphpsK.NslhcQa-pKVhsahpKss.s................-sttp-hhhLuAplaG.pEQL.uLpp..ssscpc+.h-FlsDl.s-L.Dhssp-lcphhptsc.s......ahhpKYpssph.pthppsac.p..pFlKhhcpFlsKRps.h.............hp........sss....sshL-ELVhLKsslIKHLCshEpLs ....pIcplpsKcsslpcpY-pKVhsah+Kss..................-stts-hhhluAplaGhcEQLhuLpp..stsccc+.l-FlsDl.s-L.Dhss--l-plhttps.s......hlspKYpssp.l.....scslppsa-pptppFlKllcpFlsKRss.a.............++........ssssplL-ELVhLKsslIKHLCshEpLh. 0 0 0 0 +6708 PF06880 DUF1262 Protein of unknown function (DUF1262) Vella Briffa B anon Pfam-B_6733 (release 10.0) Family This family represents a conserved region within a number of proteins of unknown function that seem to be specific to Arabidopsis thaliana. Note that some family members contain more than one copy of this region. 19.80 19.80 20.60 41.10 18.60 18.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.33 0.72 -3.69 9 75 2009-01-15 18:05:59 2003-08-11 13:15:06 6 3 13 0 52 73 0 99.20 45 26.52 CHANGED EGPsSGlLVlp....DEcutsc..pCaGhCh..csplpGLPFPQNptLoVca.........psGpGcsphsa..tDsVVFlPVlsQPLSSNRYYsl+tsG+HuGcssAso.+EEDtVoCCFC ..............EGPNSGhLVIp....DE-upsp...s.....C..aGhsh..c.s.pl.c.sLPFPQNppLsVpa.............................ptstsppp.sh..h-sVlFIPVLsQPLSSNRYYslct.pGKH.pGpspssu..+E-Dhss..CCF................. 0 10 25 36 +6709 PF06881 Elongin_A RNA polymerase II transcription factor SIII (Elongin) subunit A Vella Briffa B anon Pfam-B_6598 (release 10.0) Family This family represents a conserved region within RNA polymerase II transcription factor SIII (Elongin) subunit A. In mammals, the Elongin complex activates elongation by RNA polymerase II by suppressing transient pausing of the polymerase at many sites within transcription units. Elongin is a heterotrimer composed of A, B, and C subunits of 110, 18, and 15 kilodaltons, respectively. Subunit A has been shown to function as the transcriptionally active component of Elongin [1]. 25.30 25.30 25.30 25.50 23.90 25.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.16 0.72 -3.67 21 318 2009-01-15 18:05:59 2003-08-11 13:51:23 6 5 229 0 221 299 0 106.30 27 22.28 CHANGED lp-lGslPaplLcPlLc+s.os-QLhclEcts..PtLhp-oD-LWpcahpRDF....................ccph.p-.-.......sWR-hYh+hp-cpcp+h....cpLppplppspsp+spt.+p.....hhlpsh ..................lt-lGs.lPaplLc.PlLp+s...os-QLhclEc..ps.....stLhp-...oD...cLWppahp+DF...........................................cp.p..t.spc.h-........................oW+-hYh+hpcpp-pcl........ptLppshpstptpK.pt.pt...h.....h....................... 0 66 111 176 +6710 PF06882 DUF1263 Protein of unknown function (DUF1263) Vella Briffa B anon Pfam-B_6668 (release 10.0) Family This family represents a conserved region located towards the C-terminus of a number proteins of unknown function that seem to be specific to Oryza sativa. 21.60 21.60 24.30 23.80 21.10 18.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.36 0.72 -4.43 8 155 2009-09-10 17:20:14 2003-08-11 14:02:11 7 12 3 0 132 154 0 86.80 58 53.41 CHANGED MG+..sp.cSAGSRCNV..tutLSADDhTGVRPVTDRSFLSTTRQFShLHVCPSSYNDFLAMVuMKPGhYLsGTDVPoPGssTPAPARDECLEALIlPTGhGEACsRPPlAT .....................................................................................................o.hsDFLAMluMKPGM.LsGTDlPTPGVsTPAPARDEs.EALIIPTGRGEACsRPPVAT........................... 0 0 0 0 +6711 PF06883 RNA_pol_Rpa2_4 RNA polymerase I, Rpa2 specific domain Finn RD anon Pfam-B_4721 (release 10.0) Domain This domain is found between domain 3 (Pfam:PF04565) and domain 5 (Pfam:PF04565), but shows no homology to domain 4 of Rpb2. The external domains in multisubunit RNA polymerase (those most distant from the active site) are known to demonstrate more sequence variability [1]. 21.30 21.30 22.40 26.50 21.20 20.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.91 0.72 -4.01 31 319 2009-01-15 18:05:59 2003-08-11 14:20:06 7 28 270 0 214 329 3 58.50 41 5.23 CHANGED lGalssctApplsssLRhhKlt..........s.ppplP..............pLEIuaVPso...ps..GpYP.GLYlFossuRMhR ...........lGass...c.tuttlucsLRhhK..Vp...................u..ppplP..............pLEIuaVP.o.....ps..GpYP.GLYLFossuRMhR............ 0 77 123 181 +6712 PF06884 DUF1264 Protein of unknown function (DUF1264) Vella Briffa B anon Pfam-B_6839 (release 10.0) Family This family contains a number of bacterial and eukaryotic proteins of unknown function that are approximately 200 residues long. Some family members are annotated as putative lipoproteins. 25.00 25.00 34.70 27.90 18.80 19.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.12 0.71 -4.81 28 339 2009-01-15 18:05:59 2003-08-11 15:00:49 6 7 284 0 133 261 2 167.10 51 67.11 CHANGED shsPlcplstHLsu..........FHhYucDPsR.plEApHYCs.+lsE.......DhtQCllYDussssA+LlGlEYlISt+LFpTLPs-E++LWHoHsaEV+SG.Llh............................Ps.....lPps....AE+stMcclhshYGKTaHhWQs....DRGDsLPLG.PpLM..huFop-uplc........tLlcpRDc+h.Glss...ct+RcpRt-.lpts ...........................p..sPlctIssaLsu..........FHhYusD......hst....QhEAHHYso.hLNE...............DlhQslIYDus.sps.ARL........hGVEYIISE+LFcT.LP.sEEKKLWHSHpYEV.......KSG.Lls.................................PG.........lPps........s-+shMpcllsTYGKTWHTWps.....DRsc.......sLPhGhPtLM..MuFT..sD..GQlcs.......sL.lt-RDcRh...GlDTpth..+cpRp-.ls......................... 0 41 76 110 +6714 PF06886 TPX2 Targeting protein for Xklp2 (TPX2) Vella Briffa B anon Pfam-B_6863 (release 10.0) Family This family represents a conserved region approximately 60 residues long within the eukaryotic targeting protein for Xklp2 (TPX2). Xklp2 is a kinesin-like protein localised on centrosomes throughout the cell cycle and on spindle pole microtubules during metaphase. In Xenopus, it has been shown that Xklp2 protein is required for centrosome separation and maintenance of spindle bi-polarity [1]. TPX2 is a microtubule-associated protein that mediates the binding of the C-terminal domain of Xklp2 to microtubules. It is phosphorylated during mitosis in a microtubule-dependent way [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.73 0.72 -3.90 20 364 2009-01-15 18:05:59 2003-08-11 16:21:19 6 9 95 0 225 353 0 56.10 38 11.62 CHANGED h+o-cRAccRpEF.pKlcEKppthctppppsEtppcpEEEptl+QLRKpLV..aKApPh ........h+s-cRAccRp..E.....F.p+lcEKppthE..tp+pptctppc..............EppEp-.l+pLR.K.p.Ls..aKApPh...... 0 58 137 180 +6715 PF06887 DUF1265 Protein of unknown function (DUF1265) Vella Briffa B, Pollington JE anon Pfam-B_7101 (release 10.0) Family This family represents a conserved region approximately 50 residues long within a number of proteins of unknown function that seem to be restricted to C. elegans. The GO annotation for this protein indicate that its a protein involved in nematode larval development and has a positive regulation on growth rate. 25.00 25.00 59.40 58.40 18.10 18.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.35 0.72 -4.30 7 20 2009-09-11 00:32:59 2003-08-11 17:07:06 9 1 4 0 17 22 0 47.90 52 15.25 CHANGED EELhpNhEDlhYVppLLllAcsu+hpslpssC.ATll.aHhpDFhR.h .EELhKNaEDlhYVCNhLIlA-Du+FsslpsCClATllhYHFsDFhR.... 0 10 13 17 +6716 PF06888 Put_Phosphatase Putative Phosphatase Vella Briffa B anon Pfam-B_7115 (release 10.0) Family This family contains a number of putative eukaryotic acid phosphatases. Some family members represent the products of the PSI14 phosphatase family in Lycopersicon esculentum (Tomato) [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.64 0.70 -5.20 9 337 2012-10-03 04:19:28 2003-08-11 17:25:44 7 8 166 0 194 898 85 208.20 31 78.97 CHANGED hlllFDFD+TIIDtsSDshVVcthssppl.ppLpsoh....WNphMsRhhp.LpsQG.hs.t-l+psl+slPlsPthlchl+th..ps.th-LhIlSDANpFFI-phLcttulpshFsc.IhTNPushDu.pGpLpltPYHs.....Ho.Cs..hCPsNhCKGhVl-chhspt..pcGhthcRllYlGDGssDaCPsl+LppsDhshPR+GaPhachlsc....ssthl+ApVh.WssGt-lpchLhtll ........................................................................lllF.DF...DpTIl-..p.....s..........S..D.........s..h.........l......l.....p.......t..h.........s..t.....p.........p.....h...........p...p.....L...t....t...p.....h....p.....................W..s..p..h..M...........s....c.hhp.............L...........t.p..p.......u.....h....p............p.......c........l..t....p.....s.........l..................p.p.....lP....h...s..s..th....h.p.hl..ph.h..............t...t.........s.............h-..lh..llSDu.NshaI-phL....ctt.u...........l..p.p.....h..Fs..c..lhoNP.u.t.h.......s..t.....pG......p.....L....p....l....p......P...aHs.............................Hs..Cs.....hC....P....s..N...h...CK..t.......tl..l.p.....c.hhpp........................t..u.h................hp.+..llYlGDGssDh....CPshp.LtttD.hsh.sR......+..s......a....s..h..chltp...........p...hp..u.plh.Wpsu.-l.p.h................................................................................................... 3 60 104 151 +6717 PF06889 DUF1266 Protein of unknown function (DUF1266) Moxon SJ anon Pfam-B_13878 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 235 residues in length. Members of this family seem to be found exclusively in the Enterobacteria Salmonella typhimurium and Escherichia coli. The function of this family is unknown. 21.80 21.80 22.20 21.90 20.70 21.60 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.90 0.71 -4.46 66 1230 2009-01-15 18:05:59 2003-08-12 09:14:11 6 6 645 0 86 576 3 174.40 29 66.70 CHANGED pptLppsWGIsc+c....shhpplptlh..spGcp...............................................................hthhpphtp.htthttttspphththphhpth..................htttuhhAWDhuRhshlsRh...uhhsGalsccEshphhhpsuppspptasSWc-ahtuYhhGpthWtssssppphtttth...............pLhpssp.........uPat.................t.ls........Wp .............................................................................................................h..sLpspWGIpsp-shhphl.phh......sssHu........................................................sphts.htphhcps.....p.hpchhsths.-p.sp.....thsp..alutp..............................suttsIhAWDhsRMuaLoRh...ushNsalsEE-uhal..psth+A..pchacoWccYhsuYhhGRhYW.....ps.sps.c..pphhht...chht.h................plhtssc....paat...........sLPWp.................................................................. 1 32 58 75 +6718 PF06890 Phage_Mu_Gp45 Bacteriophage Mu Gp45 protein Moxon SJ anon Pfam-B_10848 (release 10.0) Family This family consists of Bacteriophage Mu Gp45 related proteins from both phages and bacteria. The function of this family is unknown although it has been suggested that family members may be involved in baseplate assembly. 30.10 30.10 30.30 30.10 30.00 30.00 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.98 0.71 -4.79 8 376 2009-09-13 01:58:40 2003-08-12 09:25:51 7 2 328 0 52 300 4 162.70 32 86.85 CHANGED uRAVVoulssspKpQslQl+lhAGEspDcVE+LpsYGaoSsP.sGAEAllshsGGcRSHuVsVVVsDRRaR.pGLpsG-VuLYccEGcplpLT+sGclI.sus.......Kolplp.......uuppspF-oP.sphTGslcsssD.u..............Gsshus.cpstshsh.GHtH+-susGus ...........................RuslshlssutphQsl.Qlp.h.husEstsslE+hp.YGF.oS.s.s...sGuEulllhlG....GcRS+uVllslpc.ccYRhpG.LpsGEsAlYs.c.pG.p..p.lpLp.+.tGhl.lcsss..............................cs...lplp...............AsspsphpsP...lps.oGp.lpspss.s...............................utshss....t.h....shs.........sHpHppsstt..s........................................... 0 12 26 42 +6719 PF06891 P2_Phage_GpR P2 phage tail completion protein R (GpR) Moxon SJ anon Pfam-B_10918 (release 10.0) Family This family consists of P2 phage tail completion protein R (GpR) like sequences. GpR is thought to be a tail completion protein which is essential for stable head joining [1]. 25.00 25.00 27.00 26.80 24.40 24.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.44 0.71 -4.37 48 720 2009-01-15 18:05:59 2003-08-12 09:29:35 6 5 527 0 86 497 3 130.00 37 85.41 CHANGED M..........hK.ppL+shLhssl..sphtsss-p....lcsal-sGplhsssp..u.uh.........chpYshslhlpcass...csshlhsslhsWLpspps-...p.c..tcppshpFcl-l.scsss......DlpIplp.LsEclhltcct......sGtlph....pttt.s ..........M...KspSLRpALscul...shhpsNP-p....LplFV-sGslhsTut..ohSa................-aRYslslhlpDasG...-.shLhsPlLsWLc-NQPDlh..ssp..h+ppshsFps-I..cs-ss......DlslsL..LTERVllsps.......pGs..thpth.................... 0 8 34 60 +6720 PF06892 Phage_CP76 Phage regulatory protein CII (CP76) Moxon SJ anon Pfam-B_13444 (release 10.0) Family This family consists of several phage regulatory protein CII (CP76) sequences which are thought to be DNA binding proteins which are involved in the establishment of lysogeny [1]. 21.30 21.30 21.70 21.50 20.60 20.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.70 0.71 -4.92 15 350 2009-01-15 18:05:59 2003-08-12 09:37:11 6 2 313 0 50 250 8 153.50 44 91.22 CHANGED -apsSKps....+FDpACpsFAspaNlscLAccsGhss.QhLRNKLNPpQPHpLTssELltITchT.sDpTllsuhLhplsslsss.Pssps...uc....sl.ppslcsoupsG-luptAlph.uscRlTpspKcpllppApuuIppLuLlhtslEsRhQ.AsPshuhssDslsu .....................DaphSKHs....HFDpACRuFAlcH..N..h..s...pL..Ac+sG..Mss....QsLRNK..L..N..P.p.Q.P.H.p.L.TssElhhLTDlT..EDuTL......lDGhLAQIp..CLP.s.V....P...lNEs.....uc....tpLs.chVhsAT....A-lGclAus...AV...S..s-.hhTsutR...+shlsslNushRhhuLhAhslpuRlQ..usPshsuuVDslo.u....................................................... 0 7 19 38 +6722 PF06894 Phage_lambd_GpG Bacteriophage lambda minor tail protein (GpG) Moxon SJ anon Pfam-B_11957 (release 10.0) Family This family consists of Bacteriophage lambda minor tail protein G and related sequences. The role of GpG in tail assembly is not known [1]. 19.20 19.20 20.40 20.20 19.00 18.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.77 0.71 -4.51 7 773 2009-01-15 18:05:59 2003-08-12 09:47:44 6 2 336 0 10 332 0 122.70 54 90.90 CHANGED MFLKoEpFpasGsSVsLSELSALQRIEaLpalppcstph-sp......uccpssh.phslchsAaLVuhSLWcu.........s.sp-spplpppVhtsWsh-AluputphVLhLSGM......psssppsspss ...........MFLKTEpFEYNGVSVTLSELSALQRIEaLAhlppcAEQ.t...Eos.........uscpl...sl.....cc...hlcTuAaLVAMSLWHsHs.Ks.p.sShsEsVppIcQEVlTTWPs-Alup..ApssVLpLSGM..hshpsssssppstc..t.................................................................................................. 0 0 1 4 +6724 PF06896 DUF1268 Protein of unknown function (DUF1268) Moxon SJ anon Pfam-B_11819 (release 10.0) Family This family consists of several bacterial and phage proteins of around 115 residues in length. The function of this family is unknown. 21.90 21.90 22.40 22.10 21.70 21.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.44 0.71 -3.74 4 65 2009-01-15 18:05:59 2003-08-12 09:56:36 6 1 61 0 12 57 1 118.20 23 90.05 CHANGED Mc...IKlsclpc+oaEVKTS.+NlcKMacaQLshActp-pIusups.p.sclshsh.......LcshltFloslLsLsKcEh-KL.t-LEhpchhclsshlVthh.GhoD-pI-pu.sccsDs..uc ...............................................l.h....p..l....p+shpVpsospslc+hpch....Qlthhctp...s.plp..csp.t.h....phhch...phph.............lcchhcFlpplLsLsccph-Kl.-c.l.-hpchtchsshlsh+lp..GhsDcplchs.tpp.....p................ 0 3 6 10 +6725 PF06897 DUF1269 Protein of unknown function (DUF1269) Moxon SJ anon Pfam-B_14034 (release 10.0) Family This family consists of several bacterial and archaeal proteins of around 200 residues in length. The function of this family is unknown. The family carries a repeated glycine-zipper sequence- motif, GxxxGxxxG, where the x following the G is frequently found to be an alanine. As glycine-zippers occur in membrane proteins, this family is likely to be found spanning a membrane. 22.40 22.40 22.40 22.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.01 0.72 -3.99 40 415 2012-10-03 03:18:43 2003-08-12 10:27:11 7 3 359 0 124 303 40 101.70 31 38.74 CHANGED AluGuhWGhLlGllFhsPLlGh....AlGAuuGAluGu.Lo..DhGIsDsFl+-lucsLpPGooALFlLl+csssDKVlpclptas..GcllpTSLSc-cEppLpcALspu .......................................htGuhhGhLlG.llh..hs..slhGh.......................AlGAusGAluGs...hs...D...hG...Ic...D...s...h...h...cclupsLss.Gouulhlls.cc.ts.-cVhs.s.lp.shs..upllppsls..tt..l.t.....t.............................. 0 53 82 106 +6726 PF06898 YqfD Putative stage IV sporulation protein YqfD Moxon SJ anon Pfam-B_13823 (release 10.0) Family This family consists of several putative bacterial stage IV sporulation (SpoIV) proteins. YqfD of Bacillus subtilis (Swiss:P54469) is known to be essential for efficient sporulation although its exact function is unknown [1]. 25.00 25.00 26.40 29.80 24.60 23.50 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.05 0.70 -5.76 8 428 2012-10-01 23:48:22 2003-08-12 12:30:40 6 2 409 0 89 349 7 353.90 27 95.03 CHANGED KhpappFh+GhVplclpGhsIE+FLNhshppsI.lhsl++hsspslshplsLpDhKKl+plsK+spCKlphlcR+GhPFllhRh+++hulllGhllFllllahLSshlWcIDIpsscshsEa-lRppLs-hGVKpGshpFsl-.lpKlp+cLhpshssIhWlGVclcGTol+lcVVEKppP...-.tpps-PpNlVAKKcGhIpRlaVpcGpslVKlsDhVKKGDlLVSG.lGpEspcpt............VsAcG-VhAcTWYEsplcV.LpsphpshTGcshssaalphtuhplsl...shpcpEFccacp.pcp+sh.hh.aphPhphs+pphYEspppptchsK-EAVccutKhuccclpcplucsuclhscKVh+cclEsGKl+LplhhpV.EsIu ...............................................hh.Ghlplclp.G.hshE+FlN.shpptlhlas.l........p+hs.ps.hhhph.lp-h+cl+slh++sps+lpllp+hG.hPFhht+hh+.ppsh.hlGhllFhhhlhhhSshlWpI...-I..p...Gs..p....s..ophtlhptL...c.c.h.Gl+.Ghhphpls.spclpcplpp.ph.s.s.lsWlulclcGTphplcls...E...+p.pP.................phtp.pppP...........p...........slVApKculIpchhsppGpslVphsDhVcKGplLlS..G.h.h......s..p..c..tp...t........................V..p....AcGpVhuc..T..aYpt.ps..plslppp...hp...hh.TGpphpp.hhlphsstpl..hl.........s..h..tp....ppac.ph.c.p.p...pp.ph.p.h.h.t.h.h...l.P...lthtpphhhEhcphptph...o....c...cpAhphutchspc.plppp.l....s.pp.s.pIls.......c.......p...l.h......p..h...c..s..splchplhhps.EsI............................................... 0 47 74 79 +6727 PF06899 WzyE WzyE protein Moxon SJ anon Pfam-B_13849 (release 10.0) Family This family consists of several WzyE proteins which appear to be specific to Enterobacteria. Members of this family are described as putative ECA polymerases this has been found to be incorrect [1]. The function of this family is unknown. 25.00 25.00 29.40 28.60 22.90 22.70 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.74 0.70 -5.79 3 583 2009-09-11 10:39:43 2003-08-12 12:41:03 6 2 559 0 43 231 0 431.40 85 99.60 CHANGED MTLuQFGGLFVVYLLullFIATLTYpEFRRVRFNFNVFFSLLYLLTFYFGFPLTClLVFRFGVuVVPVEhLLpALLSATuFYGIYYVTYKTRLRpRsus.PRsPlFTMNRVETNLTWVLLALIALuTVGIFFMQNGFLLFRLcSYSQIFSSDVSGVALKRFFYFFIPAMLVVYFL+QDpRAWlFFLlSTVAFGILTYlIVGGTRANIIIAFALFLFIGIlRGWITLWMLAAAGVlGIVGMFWLALKRYGLNVSGDEAFYTFLYLTRDTFSPWENLALLLQNYD+IDFQGLAPIVRDFYVFIPSWLWP-RPSLVLNTANYFTWEVLsNHSGLAISPTLIGSLVVMGGVLFIPLGAIVVGLIIKWFDWLYEpGKAEoNRYKAAILQSFCFGAVFNMIVLAREGLDSFVSRVVFFCVIFGACLllAKLLYWLF-oAGLI++...RTsuLshsNAts ............MSLhQFSGLhVVWLLsTLFIATLTWFEFRRVRFNFNVFFSL.LFLLTFFFGFPLTSV...LVFRFDVGVAPPEILLQALLSAuCFYA.VYYVTYKTRL...R..............KR.V........u.......D...VP.....R..RPL.F..TMNRVETNLTWVILMGIALVSVGIFFM.HN.GFLLFRLsSYSQIFSSE.VSGVALKRFFYFFIPAMLVVYFLRQ...DSKA.WLFFLVSTVAFGLLTYMIVGGTRANIIIAFAIFLFIGIIRGWISLWMLAAAGVLGIVGMFWLALKRYGh...NVSGDEAFYT.....FLYLTRDTFSPW....ENLALLLQNY..DN..IDFQGLAPIVRDFYVFIPSWLWPGRPShVLNSANYFTWEVLNNHSGLA.............I..SPTLIGSLVVMGGALFIPLGAIVVGLIIKWFDWLYELGNREsNRYKAAILHSFCFGAIFNMIVLAREGLDS..FVSRVV.FFlVVFGACLhlAKLLYWLF-SAGLIHK...RTpS..psQVEG............... 0 1 11 27 +6728 PF06900 DUF1270 Protein of unknown function (DUF1270) Moxon SJ anon Pfam-B_13907 (release 10.0) Family This family consists of several hypothetical Staphylococcus aureus and phage proteins of 53 residues in length. The function of this family is unknown. 20.60 20.60 20.60 23.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.89 0.72 -3.82 2 250 2009-01-15 18:05:59 2003-08-12 12:43:48 6 1 175 0 2 69 0 52.40 89 98.91 CHANGED MSNIYKSYLVAVLCFTVLAIVLMPFLYFTTAWSlAGFASIAhhIFaKEYFYtc .MSshYKSYLlAVLCFTVLAIVL.M.PF.LYFTTAWSIAGFASIATFIFYKEYFYEE... 1 2 2 2 +6729 PF06901 FrpC RTX iron-regulated protein FrpC Moxon SJ anon Pfam-B_14005 (release 10.0) Family This family consists of several RTX iron-regulated FrpC proteins which appear to be found exclusively in Neisseria meningitidis. FrpC has been shown to be related to the RTX family of bacterial cytotoxins. FrpC is found in the meningococcal outer membrane. The function of this family is unknown although it is thought to be a virulence factor [1]. 25.00 25.00 162.40 162.10 19.50 19.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.83 0.70 -4.88 2 63 2009-01-15 18:05:59 2003-08-12 12:52:41 7 1 29 0 4 51 0 244.80 90 99.31 CHANGED MRPYATTIYQLFILFIGSVFTMTSCEPVN..............EQTSFNNPEPMTGFEHTVTFDFQGTKMVIPYGYLARYTQDNATKWLSDTPGQDAYSINLIEISVYYKKTDQGWVLEPYNQQNKAHFIQFLRDGLDSVDDIVIRKDACSLSTTMGERLLTYGVKKMPSAYPEYEAYEDKRHIPENPYFHEFYYIKKGENPAIITHhNNRlNQsEEDsYSTSVGSCINGFTVQYYPFIREKQQLTQQELVGYHQQVEQLVQSFVNNSsKK ....................................MTSCEPVN..............EQTSFNNPEPMTGFEHTVTFDFQGTKMVIPYGYLARYTQDNATKWLSDTPGQDAYSINLIEISVYYKKTDQGWVLEPYNQQNKAHFIQFLRDGLDSVDDIVIRKDA.CSLSTT.MGERLLTYGVKKMPSAYPEYEAYEDKRHIPENPYFHEFYYIKKGENPAIITHRN..+....phtEssYSTSVGSCINGFTVcYYPFIRE......K......QQLTQQELVGYHQQVEQLVQSFVNNsSKK.... 0 4 4 4 +6730 PF06902 Fer4_19 DUF1271; Divergent 4Fe-4S mono-cluster Moxon SJ anon Pfam-B_13906 (release 10.0) Domain Members of this family contain three highly conserved cysteine residues. This family includes proteins containing divergent domains which are most likely to bind to iron-sulfur clusters. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.32 0.72 -4.15 22 670 2012-10-03 08:56:42 2003-08-12 13:05:51 6 12 636 0 116 1472 82 63.60 50 63.97 CHANGED YpGtplslhhstslCtHuupCl+...stPpVFc.tc+.PWl.....pP-suss....cplhphlspCPSGALsahcc ...........YoG-cIDVaaN.ssIC.p.HSGNCVR...Gss.c..lFsl....c...R...K....P....W...I.................hP..D...p.s.sl..............s.s...l.l.cV..I.-.o.CPSGAL+YpcK............................................ 1 45 79 101 +6731 PF06903 VirK VirK protein Moxon SJ anon Pfam-B_13955 (release 10.0) Family This family consists of several bacterial VirK proteins of around 145 residues in length. The function of this family is unknown [1]. 20.50 20.50 30.20 39.70 19.70 16.40 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.11 0.72 -4.26 14 134 2009-01-15 18:05:59 2003-08-12 13:13:00 7 1 124 0 27 101 2 97.70 37 70.18 CHANGED spsLsshsplhp.ALssGKsVslslDLspCpsp...sssssssps+GGhpIcuahIssDsoluFuDpHFTlssc...G+PlhpFlRYplps-Gssphssh....shshPsap .........s.sLsshsslhpALssGcsVslslDLupCpsc....tss.ssps+GGhplcuahIpsDsoluFSDsHFTlssc...scPIppFhRYplpssGsspFsshhhshssh........... 1 6 10 17 +6732 PF06904 Extensin-like_C Extensin-like protein C-terminus Vella Briffa B anon Pfam-B_6925 (release 10.0) Family This family represents the C-terminus (approx. 120 residues) of a number of bacterial extensin-like proteins. Extensins are cell wall glycoproteins normally associated with plants, where they strengthen the cell wall in response to mechanical stress [1]. Note that many family members of this family are hypothetical. 23.50 23.50 23.60 24.90 22.80 23.40 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.40 0.71 -4.50 81 626 2009-01-15 18:05:59 2003-08-12 13:17:00 7 3 464 0 171 507 50 171.40 35 62.79 CHANGED sshChssLsttG....sph..sslssts....tsuCultssV+lpt.....suslslsssshh..sCshAtuhstWhcpslpPAApphhGp..slsplcphuoYuCRshsst...........u..u+lSEHApusAlDluuFpLsDGpcIoVhcsWp..s......spctpaL+pl+cuAC.thFsTVLGPch.sshHpDHFHhDh......uGht....hCR ............................................s..Ch.th.Lp...tts....h.h..pp.....h.sshp...ssupCslspslclps.......husltls.s..u.h.hh..sCshAhshAhalcpslpPsApphh.tp..plspIcphGoYuCRsh.pp.............s...u+hSEHApu..............sAlDluuFpLuD..G......+c.IsVh.cs.W.t...p...............tcpt...s....aL+...slps.......uuC.....phFssVLG.Psh....Nu.sHt...sHFHlDh...........suhthC+.................. 0 33 86 125 +6733 PF06905 FAIM1 FAIM; Fas apoptotic inhibitory molecule (FAIM1) Moxon SJ anon Pfam-B_13985 (release 10.0) Family This family consists of several fas apoptotic inhibitory molecule (FAIM1) proteins. FAIM expression is upregulated in B cells by anti-Ig treatment that induces Fas-resistance, and overexpression of FAIM diminishes sensitivity to Fas-mediated apoptosis of B and non-B cell lines. FAIM1 is highly evolutionarily conserved and is widely expressed in murine tissues, suggesting that FAIM plays an important role in cellular physiology [1]. 20.70 20.70 21.30 20.80 20.30 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.25 0.71 -5.10 7 135 2009-09-11 08:59:28 2003-08-12 13:19:24 8 6 84 3 95 143 0 165.10 43 80.59 CHANGED Mo...................DlVAhWDVsLuDGlH+IpFEHGTTSGKRVlaVsG+EllR+-WMFKLVGcETF.lGstpsKATIpI-AlSG..FuYEYoL-IsGKSLcKahEsRsKso+sWlhplDG.....t-hRlVL-KDTM-Va.....sNGpph-TtGEFs-sGo-T+FohusapChIpuhSSGpKRpGIlHpLllDGhc......lsps ..........................-lVAhWsVsLs.DslH+IEFEHGTToGKRllhVDG+.EhlR..+....-.WMF.KL.V...GcETF..plG...ps........+ssI...pIDAl.....uG.....Fs.YEYoLclsGKSLc+.......ahEspoK.phpoW.l.hpl.s.G................p.chRlVL-K..-T.h-l...W.........................sNGpph.Eo.su...EFV-sGT-.T+Fplu..sp.s.shIpAh.SSGp++pGIlHsLhlssptls.h.................................... 0 22 27 74 +6734 PF06906 DUF1272 Protein of unknown function (DUF1272) Moxon SJ anon Pfam-B_14128 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 80 residues in length. This family contains a number of conserved cysteine residues and its function is unknown. 23.10 23.10 23.20 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.32 0.72 -4.12 35 286 2009-01-15 18:05:59 2003-08-12 13:22:27 6 1 282 0 95 231 22 55.90 62 72.70 CHANGED M.LELRPNCEsCD+DLPP-u.-AhICoFECTFCssCs-shh.pslCPNCGGpLltRPhR ...M.LELRPNCEpCDpcLPsDS.-AhICo.aECTFCAsCs-ph..pshCPNCGGELVcRPhR.. 0 14 45 64 +6735 PF06907 Latexin Latexin Moxon SJ anon Pfam-B_14203 (release 10.0) Family This family consists of several animal specific latexin proteins. Latexin is a carboxypeptidase A inhibitor and is expressed in a cell type-specific manner in both central and peripheral nervous systems in the rat [1]. 25.00 25.00 26.10 26.10 18.90 18.10 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.50 0.70 -5.18 4 94 2009-01-15 18:05:59 2003-08-12 13:28:15 7 2 41 3 42 84 0 188.10 41 88.21 CHANGED slsss+thhphAApsALHahNa+pGSPStLhlLtpVQcupuhh.PpcGpKhalhFSTEpY.......cGE.phGpCoAcVha..hsppPtPsVNsTCs+hhsKpphQEEDaphYcphppLKpPLcu.sIPDsaGpIsssh+.lWcLAalGSSYVMWcpoTpsoaYhLsQloSV+QhppsDDsI-FDaTVLLHEluTQEIIPC+haLVWaPG+PlKVKYpC..-ppu.EE ......................................hsssph.htpAAtss.phhNhptGoPptlhhlttVppup.hp........PtpGpKaplpFosEch..........p.tp.hhspCoAcVha....tspcs..tP.t.lphThpt.ht.Kp..pc-D.thYpph+p.h+pPLpu.sIP.D..saGplsPphpPlhcLAhlusuY..lhWppoTEpphYhhsplpoV+Qh.pps.D.DhI-hDYslLLH-h.soQ.EIIPhphpllWhPthshK.VKaps...hps.............. 0 2 5 17 +6736 PF06908 DUF1273 Protein of unknown function (DUF1273) Moxon SJ anon Pfam-B_14270 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 21.60 21.60 22.10 22.60 21.00 20.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.02 0.71 -4.61 41 1168 2012-10-01 21:16:48 2003-08-12 13:44:34 6 5 1090 1 113 625 5 170.30 37 95.22 CHANGED M...pplhVTGY+uaELGI.Fp-cDPclplIKpAlc+pLhphlE..-.G.l-WlIhuGpLGhEhWusEVsh-L+p-.YPpl+lAllhPFpspuppWsEsNQt+hpslhppsDFscslsc.psYp.........sPtQh+phspFhlc+TDuslllYDpEpcGpsKYhhchhcchpppp.sYslphlsh--Lp-hs...p- .....................................M.pslhVTGY+uFE.Lu.l.Fp-.c.cP.clp.hIKpsl++cL...pphl.-..-..G..l-.WllhsG.pLG.hEhWssEVshEL..+..p-..Y.s....lpl..AslhPF..psau.ppWNE.s...NQt+hsphhppsDaVc.lhp.p.Yp............sPtQh+phspFhL-....po-sslLhYD......cE..p.c......up..s.cY.hhpthppht......sY.hchloF-cLpphhp.t.................................................................. 0 35 66 91 +6738 PF06910 MEA1 Male enhanced antigen 1 (MEA1) Moxon SJ anon Pfam-B_14358 (release 10.0) Family This family consists of several mammalian male enhanced antigen 1 (MEA1) proteins. The Mea-1 gene is found to be localised in primary and secondary spermatocytes and spermatids, but the protein products are detected only in spermatids. Intensive transcription of Mea-1 gene and specific localisation of the gene product suggest that Mea-1 may play a important role in the late stage of spermatogenesis [1]. 19.00 19.00 19.40 19.40 18.80 18.30 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -4.60 2 97 2009-01-15 18:05:59 2003-08-12 13:52:09 6 2 84 0 51 84 1 122.60 50 88.78 CHANGED MAsVVLGGDTMGPERIFPNQTE-LG.HQGPoEGTGDWSSEEPEEEQEETGuGPAGYSYQPLNQDPEQEEVELAPVG-G.DssADIQDRIQALGLHLPDPPLESEDEDEEGAsALssHSSIPMDPEHVELVKRTMAGVSLPAPGVPAWApEISDAQWEDVVQKALQARQASPAWK .....................................................................................................................................................................................Y.PLs.............t.t.............................t.......t............t.t.....hth..h.h....sp...ss...ps..tDE.-E....E...GAsAhs...s..+S..SIPMDPE.HV..E.l+psMAslsLPs.ulPsWAptls-tpW.cc..l.ptlptpp..................... 1 10 15 30 +6739 PF06911 Senescence Senescence-associated protein Vella Briffa B anon Pfam-B_7525 (release 10.0) Family This family contains a number of plant senescence-associated proteins of approximately 450 residues in length. In Hemerocallis, petals have a genetically based program that leads to senescence and cell death approximately 24 hours after the flower opens, and it is believed that senescence proteins produced around that time have a role in this program [1]. This family extends to the higher vertebrates where the full-length protein is often a Spartin, associated with mitochondrial membranes and transportation along microtubules [2]. 21.60 21.60 21.80 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.68 0.71 -4.58 59 334 2009-01-15 18:05:59 2003-08-12 13:54:44 7 9 197 0 227 312 3 172.70 26 36.16 CHANGED slspG......uspls+GlhtsushsuptlppGuphhpp+.............h...ps.....spp.......s..............h.....plsPps...............ppplc....cl+phopsstplo....pthlssVspsustlusslspphst..............................tthtshhs..lhhuolpuhuplh....DulEpuu+slhpssussosphVpH+YGppAGpls..pcshsssusssh..sshsstpltp+Alh .............................IhsGuuhlupGllpsu-.hsuptlppGush.hpp+............................h.......pP.......ppc.......P...................................sphsPss...............pcplc.................ts+phottusplo....pt.h..lssVsps.usshutplusphtt.............................................................t.....tp..t.hpsths....lhsuSlpu...Fuslh....sulEpAu+plhsssossssphVpH+YGppAupso..ppshsustsssh...sshsl.tlth+Al...................................... 0 86 137 184 +6740 PF06912 DUF1275 Protein of unknown function (DUF1275) Moxon SJ anon Pfam-B_13896 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown although a few members are thought to be membrane proteins. 25.40 25.40 25.50 25.50 25.20 25.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.30 0.70 -5.13 176 2258 2009-01-15 18:05:59 2003-08-12 13:56:48 6 14 1641 0 642 1872 52 202.00 21 85.04 CHANGED htluhhLu.hsuGhlDAhualsh.tthFsut.hTGNhshluhslu..tssht.shth....lhslhuFlhGshhushl............httttthtpththsLhlpsslLhhsuh...lshthss.........................hhshhhluhshGhQsushp..plssssl.sTThhTGsls....shuhsls...phlhtp...st....................hpphhhhhshlsuFhhGullGuhhhphhu.h..hulhhssslhhhlsh.hh..h ...............................h..hshhLs.hsuGh.....lD.Ahsa....lsh.......t.............hasut..T..G..........Nlshluhsls....pssh.t...t....shth.........................lhslhsFhhGshhsshl............................hpphp..h..tph.hhhslhhp..sll.lhh.suh...lstths.............................................hhslhhluashGlQssshp...pl.ss.h.......s.h.sosh.hTGslpphs..hsls..........phhhsp.......pp.......................hpc.hhhhhsllhuFhlGu.lhG.u.hh...h..t..h.hs..h...ts.lhhsshhhhhhhh...h...................... 0 157 349 499 +6744 PF06916 DUF1279 Protein of unknown function (DUF1279) Vella Briffa B anon Pfam-B_7316 (release 10.0) Family This family represents the C-terminus (approx. 120 residues) of a number of eukaryotic proteins of unknown function. 21.80 21.80 22.20 22.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.12 0.72 -3.68 31 454 2009-01-15 18:05:59 2003-08-12 15:35:11 8 12 287 0 318 442 4 100.30 29 39.87 CHANGED p+hKplh+cYGhlulusalslShhshuhhYlhVpu....GlD.lsshlpplshs............................................................pshsp.h.ts..shuphslAYul+Khh.tPlRlslTluhTPh ................................................phKphh+cYGhsslslalslShlshshhY......hhlps.....Gl.-...lsshhphlshs......................................................................pshsp.t...h.tsu...thuphslAYulaKhh.hPlRhslTluhTP.................................................................................... 0 108 170 257 +6745 PF06917 Pectate_lyase_2 Periplasmic pectate lyase Moxon SJ anon Pfam-B_14500 (release 10.0) Family This family consists of several Enterobacterial periplasmic pectate lyase proteins (EC:4.2.2.2). A major virulence determinant of the plant-pathogenic enterobacterium Erwinia chrysanthemi is the production of pectate lyase enzymes that degrade plant cell walls [1]. 21.30 21.30 21.50 36.40 20.70 20.90 hmmbuild -o /dev/null HMM SEED 557 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.86 0.70 -6.26 5 122 2009-01-15 18:05:59 2003-08-12 15:38:44 7 1 69 3 19 88 5 490.50 53 98.49 CHANGED AolhusuupAQDs-RLouVKpYlDNVLsKAuDpY..t+PTPLLADGVDPRTGKQlEWIFPDGRsAVLSNFSAQQNLMRVLlGLSsLTG-s+Y+pRAEcIVRYaFs+YQD-o.GLLYWGGHRFVDLKTLQPEGPSEKEMVHELKNAYPYYDLMFuVDs-ATARFI+GFWNAHVYDW+sLETSRHG-YGK...........sMGuLWQScFEQQPPFFATKGLSFLNAGNDLIYSAS...LLY+aspDsGALsWuKRLAcQYVLPRDsKTGLGVYQFTQPLKR-.EPsDDuDTHSKYGDRAQRQFGPEFGssALEGNMLLKGRTSTLYSENALMQLpLuKcLGssGs-LLcWTlDGLKAFA+YAYNsssNTFRPMlANGpDLSsYsLsRDGYYGKKGTVLKPY.PAsNEFLLSYARAYsLssDs-LW+VARGIApsQGLG-lGsssGu-lKlNMpTsNs-PYALFALlDLYQAoQss-YLcLA-+lGDNllcpRalcGFFlAssc+pYAcVDsIEPYALLALEAAlRNKPsuVAPFLNGAGFTEGuYRhsDGosRVSTRDNELFhLpsGEhLpPNsKK ....................h..........h.hshLosl+pYVD.V.shupsph...pPoPLLADGhDshTtQ.h.W.FPD..G.+csslSNFuuQQNhhRshsuLS.lotDPpYpppAc..spYahQp.h..cpS.GLhYWGGHRFlsLcTLpsEGPupK-pVHELKpthPYYDLhholDp-tThpFlpGFWpAHV.DWchL-huRHGpYuK.......................hssl......uph.p.P...tTKGLoFlNAGsDLIYuA....hhhcYptD..................tuAhsWuK+LhcQYVLsRsscTGLsVYQFops.pRp..PsDDsp...TpShaGDRApRQFGPEF..G.hAhEuNhhh+.chps......Lh.-NsLh.Lplh+p..t.ss-lLpWslDGLKsah+aAYs.psNThRPhhssGQDhSsYsLsRDGYYGtKGoVlpPa..hss-aLlshsRAat....l.spD..Lhclhtslh..c.tLu-ls.........ps.t+t......hph.phppssusPYhLhALl-LhptsQssphhoLA.plGDslh+p+Yh.cGhFhtSsp+pYhclDs..PhALLsL.AAhpNK.tAls.FlssuGahcGsY+hs.GpuRh.hc.s.la........................ 0 1 8 12 +6746 PF06918 DUF1280 Protein of unknown function (DUF1280) Vella Briffa B anon Pfam-B_7457 (release 10.0) Family This family represents a conserved region approximately 200 residues long within a number of proteins of unknown function that seem to be specific to C. elegans. 25.00 25.00 25.40 27.30 24.80 24.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.25 0.70 -5.15 7 110 2009-01-15 18:05:59 2003-08-12 15:40:44 9 4 9 0 110 87 0 176.60 23 29.56 CHANGED phhs+pshpp.Rh-phLcthpp...sss-hspFlpphl+hlsppsp.sa+..hoscEThhhht+hcLScsph+phKphh+phlGh-lLsShcplppL+ppLus.spYKlop.hssp....G+hlphhplh....cl.csltcRlEpLstpspLl.Ds..tscIhlslhuDpGutpTKlClsItNsppPNSshpllhluhasssDststlpcahsslh..Lschppl ..............................................................................................................h.......h...ph..hht.htp................t....p.p.hh.thh.phhtpp........php..ho..pshh.......hh.phphopt.hp.hKphhpp....hhh-.h.....sshpphtphpp.hss.p....aphp..........spth.h.....th.h.....slpphltpRLppLttpspLh..hcp....tspIhlsluGDKGuspsKlsl.ltNlppPNsspsllhluhapusDshpslpp.htslhpphNpl............................ 0 27 56 110 +6747 PF06919 Phage_T4_Gp30_7 Phage Gp30.7 protein Moxon SJ anon Pfam-B_14625 (release 10.0) Family This family consists of several phage Gp30.7 proteins of 121 residues in length. Family members seem to be exclusively from the T4-like viruses. The function of this family is unknown. 29.60 29.60 30.20 69.90 22.80 29.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.67 0.71 -4.12 2 33 2009-01-15 18:05:59 2003-08-12 15:45:57 6 1 32 0 0 18 0 121.30 86 99.55 CHANGED MNYINFERKYVSNGIAGS.-sICLWKHpNGoVCEIEQhMTPNYVYMRFENGITVSITMcGSNFKIALDDDFRQRDLGTHPCWNGsNRKLLVKTWIRHILSNRAKPEHLEAIFDVVLNEFDI ....MNYINFERKYVSN...GIAGSIDTICLWKHQNGSVCEIEQYM..TPNYVYMRFENGITVSITMEGSNFKIALDDDFRQRDLGTHPCWNGsNRKLLVKTWIRHILSNRAKPEHLEAIFDVVLNEFDI.. 0 0 0 0 +6748 PF06920 Ded_cyto Dedicator of cytokinesis Vella Briffa B anon Pfam-B_7154 (release 10.0) Family This family represents a conserved region approximately 200 residues long within a number of eukaryotic dedicator of cytokinesis proteins. These are potential guanine nucleotide exchange factors, which activate some small GTPases by exchanging bound GDP for free GTP. 20.60 20.60 20.60 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.90 0.71 -4.97 25 1240 2009-01-15 18:05:59 2003-08-12 15:48:52 8 27 253 8 667 1086 2 172.80 32 10.33 CHANGED T.F-+spslp+Fha-sP..Fohs..G+.sp..G....slccQa+R+TILossssFPhl++RlpV..hpcppl-l............oPIEVAI-plppKstELstthspp...........shphLQhhLQGslsssVNtGPhchAcsFLsp..h....sth.tcphp+L+pshc.........cFhptsppALclpcpLhttc.tp..hpccL-psa.pphpppLps ...........................................F.+s.plp+FhappP........Fp.hs.........s+...tc..........s..................phpc.ahc+TlLT..Ts.p..s...........FPhlh+....RhpV......hp.p..p.p.h.pl....................................oPlEsAI-phppKsp...-...L...pp.h..h....pptt...................................sshp......LphhLpGsV......ss.s...V.N....u.......G.......s....hpY...........tc..s.....Fhscp.h.....................c..stc..+lp...pL+phhp.......................p.h...t..hhtpuLpl.pcc.h.l.t.t.-...pt........hpccLpppa.pphtpplp.t........................................................................................ 0 199 276 456 +6750 PF06922 CTV_P13 Citrus tristeza virus P13 protein Moxon SJ anon Pfam-B_14711 (release 10.0) Family This family consists of several Citrus tristeza virus (CTV) P13 13-kDa proteins. Citrus tristeza virus (CTV), a member of the closterovirus group, is one of the more complex single-stranded RNA viruses [1]. The function of this family is unknown. 25.00 25.00 184.90 184.70 20.00 18.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.56 0.71 -4.24 3 58 2009-09-11 02:40:01 2003-08-12 16:01:31 6 1 1 0 0 53 0 114.90 92 100.00 CHANGED MSIRRVWLKVMAVITVLWYGKEPSISEGYNALMNDDFKFIDTHFSNVSYAKKCYDLANFDLDFLRIVIIPLSGGTVNESRADRTNVSEIVESHVSDRDRM+ILLRNKRIQIPSLLPCDN MSIRRVWLKVhAVITVLWYGKEPSISEGYNALMNDDFKFIDTHFSNVSYAKKCY-LANFDLDFLRIVIIPLSGGTVNESRADRsNVSEIVESHlSDRDRMsILLRNKRIQIPSLLPC.... 0 0 0 0 +6751 PF06923 GutM Glucitol operon activator protein (GutM) Moxon SJ anon Pfam-B_14714 (release 10.0) Family This family consists of several glucitol operon activator (GutM) proteins. Expression of the glucitol (gut) operon in Escherichia coli is regulated by an unusual, complex system which consists of an activator (encoded by the gutM gene) and a repressor (encoded by the gutR gene) in addition to the cAMP-CRP complex (CRP, cAMP receptor protein). Synthesis of the mRNA, which initiates at the promoter specific to the gutR gene, occurs within the gutM gene. Expressional control of the gut operon appears to occur as a consequence of the antagonistic action of the products of the autogenously regulated gutM and gutR genes [1]. 25.00 25.00 30.20 29.90 19.40 19.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.12 0.72 -4.41 26 859 2009-01-15 18:05:59 2003-08-12 16:06:25 6 1 822 0 71 301 39 109.20 43 81.79 CHANGED hhhL..IlhhshAallQhlLGahQl+pFN+sapplp.....ppG..+VulG+ppG+h+sGsllllulD.cpspllcuphMpGlTVFARh+slsthpGhsltplp...shlhtps+hsppAl .........................l..sLlslAlIAas..sQluLGhaQIp+FN+sacpLp.....QpG..+VulG..Rs.uG...RF..K.s.pslVhlAlD.-ppRls-shhM+GlTVFAR.pclsshsGhclt-lp....scllhscD.LsQpAl........................... 0 23 39 56 +6752 PF06924 DUF1281 Protein of unknown function (DUF1281) Moxon SJ anon Pfam-B_14730 (release 10.0) Family This family consists of several hypothetical enterobacterial proteins of around 170 residues in length. Members of this family are found in Escherichia coli, Salmonella typhimurium and Shigella species. The function of this family is unknown. 25.00 25.00 35.20 35.10 23.20 22.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.61 0.71 -4.76 4 277 2009-01-15 18:05:59 2003-08-12 16:09:33 6 1 198 1 17 220 2 126.30 69 46.55 CHANGED LhITGpss.lDtlppWssGchlPhYR+AlpQSI+LFLAGCAGlLpP..TcshcYs.aPsLlutGsGtsSspNlAFp+WLsLLppsVsLDtpssp.I-RLYhQSGluphKWEslPssA+cphsslhspphuDWFGls ...........MaFSGEPAQIAEIKRLASG.AVTPhYRRATNEGIQLFLAGSAGLLQs..TEslcaEPCPGLTAA.G......R...G.sVSPENI.AFTRWLTHLQsGVLLDEQNCLMLHE...LWLQSGTGpRRWEsLPDDsR-oITshFTsKRGDWCshW..................... 0 0 4 13 +6753 PF06925 MGDG_synth Monogalactosyldiacylglycerol (MGDG) synthase Vella Briffa B anon Pfam-B_8075 (release 10.0) Family This family represents a conserved region of approximately 180 residues within plant and bacterial monogalactosyldiacylglycerol (MGDG) synthase (EC:2.4.1.46). In Arabidopsis, there are two types of MGDG synthase which differ in their N-terminal portion: type A and type B [1]. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.30 0.71 -4.35 12 972 2012-10-03 16:42:30 2003-08-12 16:37:42 6 10 725 0 253 763 37 160.40 26 40.98 CHANGED HppuAculp........pthptt.h.pphplhhhDshp.hssh.hphhhps.Yhhhl+h.....u...slathhahhsp.ch.....pshhsthtthhsp.cltshlpchpPDllIsTHPh.tth.l.hL.+p+phh.pshhs.sllTDats.H.hWh+.tlDthalssp-htcchlp.pGlspsplhshGIPl ......................................................................................HhpsApulh........pthpp...........sp.....hplhhhDh..ht.t.t.p.P.h..h..ss.lhpph..Y.hp..h.ch...................h.....ph.at.h.hah..h.s..p.......c.hh........ppph.h.......h....h....p...h....hhh..p....+...lh.p...ll.........p.c......tcPD..l..Il....sTaP.h.........h.l.t...l...+.......p.......c...th....h........s.......l.P.ls...sVlT.D....a.......h.H..phW..lp.st..s.-.tYa.Vuoc.......cs.+p.phlp..hG.lssspltsoGIPl........................................................ 0 109 193 226 +6754 PF06926 Rep_Org_C Putative replisome organiser protein C-terminus Vella Briffa B anon Pfam-B_6517 (release 10.0) Family This family represents the C-terminus (approximately 100 residues) of a putative replisome organiser protein in Lactococcus bacteriophages [1]. 21.40 21.40 22.30 47.80 20.30 20.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.12 0.72 -3.89 3 25 2009-01-15 18:05:59 2003-08-12 16:56:25 6 3 20 0 5 17 0 91.90 62 35.97 CHANGED NSLLS+FLDoFINFSSKNISKRAMApsEFlKLPSFQK-QAVIGAcNYIQsYKNEHPDDcTG+YSVNuYsFL-NsMFMsYQEKVKADTGYDEDLGF ..NSLLS-YLDoFIpFSSKNIuK.RAMApsEFhKLsSEpKcQAVIGAcNYhpWYKpEsP-DcTtKFSlNuYsFL..-sshFcsaQpKVKsc...cEsLGh........... 0 3 3 3 +6757 PF06929 Rotavirus_VP3 Rotavirus VP3 protein Moxon SJ anon Pfam-B_14798 (release 10.0) Family This family consists of several Rotavirus specific VP3 proteins. VP3 is known to be a viral guanylyltransferase and is thought to posses methyltransferase activity and therefore VP3 is a predicted multifunctional capping enzyme [1]. 18.10 18.10 52.30 52.30 17.00 16.20 hmmbuild -o /dev/null HMM SEED 684 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.49 0.70 -13.31 0.70 -6.64 3 582 2009-01-15 18:05:59 2003-08-13 11:02:47 6 3 240 0 0 463 0 551.40 85 83.42 CHANGED MKVLAL.RRulspsYADTQVYsHDssKDYYENAFLISN.lTTHNILYLD.YSI....KslEILNKSGIAALIhIshD+LtILI+SNFTYDYphcIVYLHDYSYYsNNE.IRTDQaWLTpTNIEEYLLPGWKLTYVGpsGpETRGHYsYSFhCQNTATDDDIIY-YIYS.....NulDF.........pNFlLptlpcRMTTAVPFcRLSNRIFR-+LFS...K..KssINIGPRNESMFThLc..YPpIpNYSANualVSDLI+LsQE+WlGK+ISQFDIGQFKNMhNVLTsIYYYYNhYKuKPpIYMIGSAPSYWIYDl++Y.sFsIETWDPLDTPaSocHHKchFsIsDlcKLKDNSILYIDIRSDRssADWRcWRKpVEEETIpNLEIuYKYLcsGKs+lCClKMTAMDlELPhouhLLHaPTTcI+SEaYLLlDpahLpcpKRFVPKGlhYSFINNIhTDNVFISssYKVKspN-.aVVALYuLSN-hNsR-KVIcasNsQKpolITVRlNNTFcsEh+lsFKosYDaTFLPSDFosc.sTIlTSYDGYlGlFGLSISL-SKuTGNNHLFIlsusccYsplDoFuoHhGISRRSHSlRFSESATThSGYIFRDloNGKFNLIsTNlENuVSGHVYNALlYYRYNYoFDLlRWIpLHupDcVcIcGG+YYEHAPsELlYACcSAhVFApLQsDLTll+YSNclpcYIpsp ...........................MKVLAL.RHSVAQVYADTQsYlHDDSKDEYENAFLISN.LTTHNILYLN.YSl.....KTLcILNKSGIAAVElQS.DELFs.LIRCNFTYDYENNIlYLHDYSYYTNNE.IRTDQHWITKTDI.DYLLPGWKLTYVGYNGKsTRGHYNFSFhCQNAATDDDIIIEYIYS..............NELDF.........QNFLLRKIKERMTTSLPIARLSNRVFRDKLFPSlVshacKVIN.VGPRNESMFTFLN...FP.TIKQFSN..GAYlVKH.TIKLKQE+WLGKRVSQFDIGQYKNMLNVlTTIYYYYNLYaSKPIIYMLGSAPSYWIaDlKQY.SDFpFETWDPL..DTPYSo.h......HHKELFa.pD.VsKLKDNSlLYIDIRTDRtNhDWKEWRKlVEpQTlsNLNIAYKYLuTGKAKVCCVKhTAMDLELPIoAKLLHHPTTElRSEFYhIlDlWDhhs.I.KRFIPKGVhYuaINNlhTENVFIQ....PF...KLKs.p.s-...YIVALYALSNDFNsR...pDVIpLINpQKpuLI....TVR........hNNTFKDEPKVsFKNIY......DWTFLPTDFEhp.-SIITSYDGCLGhFGLSISLuSKPTGNNHLFIlsGTDKY.KLDQaANHMuISRRSHQIRFSESATSYSGYIFRDLSNN.NF..NLIGTNVENSVSGHV......YNALIYYRYNYsFDLKRWIYLH.ShGKsulEGGRYYEHAPIELIYACRSA+EFAhLQDDLTVLRYuNEIEtYINK.V........................................................ 2 0 0 0 +6758 PF06930 DUF1282 Protein of unknown function (DUF1282) Moxon SJ anon Pfam-B_14843 (release 10.0) Family This family consists of several hypothetical proteins of around 200 residues in length. The function of this family is unknown although a number of family members are thought to be putative membrane proteins. 23.40 23.40 23.60 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.39 0.71 -4.60 19 937 2012-10-01 22:34:14 2003-08-13 11:06:06 7 8 874 0 113 703 106 178.10 39 84.19 CHANGED llspPupp.applcpcccshthhhlhhllhhhlhs.ssshhhssh..hhthhst.............s.phohts....uhhhulhshhhh..hhhlhuhh...hphhschas..upss...hppshhhuuYsssPhhluuhsshh.slhhsh.............................shhlhhsaslhLlahGl.hhh..sl.cppcuhhhssh.....hllhsl .................................................................LaoHPscE.hpsIp..p.Es.E.ol.u+.a....Ys.a..HV..LLhA...u....IP...VlC.....Aa..I..G.T..T.QlGW.s.hG-G.s......................l.pLohho...........ulslAVlhYslhL.....AGVAlMGtl.....I.aW.M.A.RsYs..p+PS.....ls.+ChlFAGYs...AT...PL.F...L.u.GL..V.AL..YPllWLsu..........................................................llGslAL.h.YosYLLYLGlPoFh....sI....s+.-EGl.FSSSsl....ulGl.....Llltlhhh.h................................. 0 19 46 85 +6759 PF06931 Adeno_E4_ORF3 Mastadenovirus E4 ORF3 protein Moxon SJ anon Pfam-B_14868 (release 10.0) Family This family consists of several Mastadenovirus E4 ORF3 proteins. Early proteins E4 ORF3 and E4 ORF6 have complementary functions during viral infection. Both proteins facilitate efficient viral DNA replication, late protein expression, and prevention of concatenation of viral genomes. A unique function of E4 ORF3 is the reorganisation of nuclear structures known as PML oncogenic domains (PODs). The function of these domains is unclear, but PODs have been implicated in a number of important cellular processes, including transcriptional regulation, apoptosis, transformation, and response to interferon [1]. 25.00 25.00 37.60 37.60 17.70 17.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.53 0.71 -4.02 7 84 2009-09-11 07:31:22 2003-08-13 11:13:02 6 1 67 0 0 45 0 111.80 67 97.19 CHANGED CLRMsVEGALpcLFsMpGhsLpp.hhcIIptW+sENYLGhVQsCuhMIEEh-ss.uFulllFL-VRV.sLLEAsVpHLENRlhFDLAVhFHQcSGG-RCHLRDL+FplLtsRLE .....CLRMpVEGALpELFpMsGlDLppphscIIQGWKNENYLGMVQ-CshM.I..-El-su.uFsllLaLDVRVEsLLEATVEHLENRlsFDLAVhFHQHSGG-RCHLRDLHFpVLRDRLE................ 0 0 0 0 +6760 PF06932 DUF1283 Protein of unknown function (DUF1283) Moxon SJ anon Pfam-B_15035 (release 10.0) Family This family consists of several hypothetical proteins of around 115 residues in length which seem to be specific to Enterobacteria. The function of the family is unknown. 25.00 25.00 94.50 31.80 22.40 17.80 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.88 0.72 -3.96 9 515 2009-01-15 18:05:59 2003-08-13 11:31:22 6 2 510 0 40 138 1 84.80 85 74.80 CHANGED sTs+lVl-SGDoAhS+psAp.sKEQWNDT+uLRpKVNpRsEKEaDKhDtAhDu+DpCppSsNlNAYWEPNT.RCLDRRTGRs.IsP ............ETsKLVIESG.DSAQSRQcAAMEKEQWNDTRsLRQKVNKRsEK.......EWDKADAAFDNRDKCEQSANINAYWE....PNTLRCLDRRTGRV.ITP...... 0 1 8 23 +6761 PF06933 SSP160 Special lobe-specific silk protein SSP160 Moxon SJ anon Pfam-B_14947 (release 10.0) Family This family consists of several special lobe-specific silk protein SSP160 sequences which appear to be specific to Chironomus (Midge) species [1,2]. 25.00 25.00 25.80 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 756 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.58 0.70 -6.55 2 5 2009-01-15 18:05:59 2003-08-13 11:37:41 6 2 3 0 1 5 0 618.40 65 77.63 CHANGED MNIKVILVCALVAIFFAQVAEGGPIANFVGFlISLLFSLFEVMlSVVFDVKSFTSLSssTsNuThPuFAopVGGGRFosIhpuoFNhIAMISANlQAIQSGSGSsSusSSSSssSTosSsoTTSssoTTooNSTT.......SSNSTSSGLTouASVVSLIDThAWVYQDSSVGIAYLMVSILALFYGQSVSAPPYsDLGIPALPANsSGsGVPQSVQI+AAIsYlNlTI.FIshTGQQFEDLQGPVsTDCGCPNTTSVAPLVhEWEAIhAALpAhAsuoASuNSTosSsSTosoTTsSsSTTTTNSTToTNSTSSSNSoTIAGsIDIuANhTlALQsLQALLhQEuTCAPsLAANAKKSGVR-huPCKsu..uSuCA+SGp+KVKRKARLEKMRAKsRRAVGNR.KGSMKKRVRSRAKKFGcAAKSGVRRYRKNIKaVYIPPVMASLNAYAALMASLS-SIS.QS-SALNSTDpACNSTussTDtAVIsATssVTDMFsNFTAMVlNNTVAaPNCTQaADMAL.MlSQIN-QIIuCGSQSDsApSSIYhNVTIsIVAMAQEYNNFASMSDKCTRSFANSWLWMYIKWVFYRMGMTSGlPNFLACQTKApSSLTAhLASFNATlsATIuAASANsSpVQSSEAuCIESSLu-AusILtMaEsAYQNCssPGSV..PstssTTTooTTTTTTTTTTAAPTTTTTKAANAPFTYPLCsL.MossCshGhssCTYPhISSAGCCPSGKTLNTGLGGRGCCK .........MNIKVILVCALVAIFFAQVAEGGPIANFVGFlISLLFSLFEVMlSVVFDVKSFTSLSsPTsNuThPuFAopVGGGRFosIhpuoFNhIAMISANlQAIQSGSGSsSusSSSSssSTosSsoTTSssoTTooNSTT.......SSNSTSSGLTouASVVSLIDThAWVYQDSSVGIAYLMVSILALFYGQSVSAPPYsDLGIPALPANsSGsGVPQSVQI+AAIsYlNlTI.FIshTGQQFEDLQGPVsTDCGCPNTTSVAPLVhEWEAIhAALpAhAsuoASuNSTosSsSTosoTTsSsSTTTTNSTToTNSTSSSNSoTIAGsIDIuANLTlALQNLQALLhQEuTCAPsLAANAKKSGVR-huPCKsu..uSuCA+SGp+KVKRKARLEKMRAKCRRAVGNR.KGSMKKRVRSRAKKFGKAAKSGVRRYRKNIKaVYIPPVMASLNAYAALMASLS-SIS.QS-SALNSTDpACNSTussTDtAVIsATssVTDMFsNFTAMVlNNTVAaPNCTQaADMAL.MlSQIN-QIIuCGSQSDsApSSIYhNVTIsIVAMAQEYNNFASMSDKCTRSFANSWLWMYIKWVFYRMGMTSGlPNFLACQTKApSSLTAFLASFNATlsATIuAASANsSSVQSSEAuCIESSLuDAAsILtMaEsAYQNCoAPGSV..Ps-T.sTTTTTTTTTTTTTTTAAPTTTTTKAANAPFTYPLCsL.MossCshGhssCTYPhISSAGCCPSGKTLNTGLGGRGCCK....... 0 1 1 1 +6762 PF06934 CTI Fatty acid cis/trans isomerase (CTI) Moxon SJ anon Pfam-B_14967 (release 10.0) Family This family consists of several fatty acid cis/trans isomerase proteins which appear to be found exclusively in bacteria of the orders Vibrionales and Pseudomonadales. Cis/trans isomerase (CTI) catalyses the cis-trans isomerisation of esterified fatty acids in phospholipids, mainly cis-oleic acid (C(16:1,9)) and cis-vaccenic acid (C(18:1,11)), in response to solvents. The CTI protein has been shown to be involved in solvent resistance in Pseudomonas putida [1]. 25.00 25.00 26.10 34.10 23.60 23.40 hmmbuild -o /dev/null HMM SEED 694 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.53 0.70 -13.15 0.70 -6.28 33 247 2009-01-15 18:05:59 2003-08-13 11:44:16 6 2 227 0 54 225 50 647.60 51 88.45 CHANGED AssPTRLF.lDApssppWRp+GFasVLs...ps.tushpAuLhh+MLpLKpppPLsssshL.s-shDhulsRsppCPs..cEassYtpspPptGMPaGhPuLossEassLtpWLppGA.....shstps.s.ospEtppIspWEsFLNpsuhKppLsARYlYEHLFLAHLYF..........tssp...sscFFcLVRSpTPsGpPlchIuTRRPaDDPGsc...........hYYRLhPlpuoIVcKTHhsYsLssp+LpRhppLFhss-a.pVspLPuYpsphuANPFpoFpsIPscARYpFMLDpApahlhsFIKGPVCRGQlALNVIpD+FWVhFhcP-ps.hsssssahpppsshLtLPupt-ssss.hhs.WhpYuc...ppscYhcs+pphhschh.ts....hshshlWsGs.s.NcNAhLTlFRHFDSASVhKGLlGphPKTsWllDYPLLERIaYhLVAGFDVYGNVuHQL.TRLYMDhLRhEGEsNFLpLLPtcpRcshhssWYpssu.plpp.alpts..hshspPoulpapos.sPKpEhhpplhp.+hsslts.sp.sls...sst..............hppscph...Lsplsuhtus.ulsh...LPE....lohlhl................csssG..cpp.lYSLl+NpAHsNluhLhuEchchpPppDoLTlh.GllGSYPNhhapl.pps-lssFVstlpshcsppD.a.ppll-+aGlRRosPpFWpa.Dplpphh+pspPlEuGlLDhNRaENc ...................................................................................................................AssPTRLa..DApsTppWRptGFaPVLs...ps.tuNhpAulhARhL..KcppPLPtpspL..cshDhSlsR.pppCPo..pEh-patpspPphGMPaGhPsLospEYsTLhpWLppGA......shspth....shospEts.lspaEshLNpsutKppLsARYlYEHLFLuHLYF..............s.tp.ps+FFpLVRSpTPPGp..............PlchIsTRRPaD.DPGs-..........+lYYRLhP.QsTIVcKTHhPasLspp+ltph+phFlssDY.pV.spLPuYpPphuANPhpsFhslPscARapFhLDNAp.hlhuFIKGPVCRGQlALNVIpD+FWVhFhDP-+sph.spsspFhtppsstLpLPupp-sssh.slo.W.l..pYut...ppscYhcA+s-hhsphhtsG....lohshlWcGs...s.NsN.AsLTlFRHFDSASVlpGLlG-sPKTsWlhDYsLLERIHYLLVAGFDVYGNhGHQLhTRhaMDaLRhEGEsNFltLLPtDhRcp.husWYQspu.phptal.pts.shsh-pPTulpa.oc.sPKpELhtpLtc.pltsl.u.sc.sIs...pst.............hhtpsEts...Lpplsphtut...GLhs.......lPp.......lsMLhl................cspp.G....ctp...laTLl+NpAHoNlutLhsEptpt...pPtpDsLTll.GVlGSYPshhhsl.pp.splsphsptltshcs-.pDY..tLl-+aulRRSsPpFWsauDtlppah+pspPlEhGlLDhNRaEN......... 0 11 26 42 +6763 PF06935 DUF1284 Protein of unknown function (DUF1284) Moxon SJ anon Pfam-B_14822 (release 10.0) Family This family consists of several hypothetical bacterial and archaeal proteins of around 130 residues in length. The function of this family is unknown, although it is thought that they may be iron-sulphur binding proteins. 25.00 25.00 25.30 26.60 24.00 22.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.83 0.72 -3.85 30 215 2009-01-15 18:05:59 2003-08-13 11:48:14 6 2 214 0 76 160 6 105.20 33 74.01 CHANGED GYSssFlcNhscIhpcl.t...ppstpIplVsssDcICtsCPsptcst.Cp....sps...pVpchDc....pVlchL...slcsGphhshschhppltcphps.sclcclCpsCcWhphsh ......GYSssFVcNhctIst.+L.....spspsIplVsusD-ICsshsst.s.csp...Cp............s-s......VpthDc......tVhchL......uLc........sGc...hph.s.p.hht.p.h.hcchhs.sslcchCpsCpW.sls......... 0 25 52 62 +6764 PF06936 Selenoprotein_S Selenoprotein S (SelS) Moxon SJ anon Pfam-B_15061 (release 10.0) Family This family consists of several mammalian selenoprotein S (SelS) sequences. SelS is a plasma membrane protein and is present in a variety of tissues and cell types [1]. The function of this family is unknown. 21.30 21.30 21.50 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.48 0.71 -4.76 5 99 2009-01-15 18:05:59 2003-08-13 12:01:14 6 4 75 1 58 110 0 156.90 34 90.36 CHANGED M-.hcD-sshsNc.lPshEspuL..sFLpsoVGphLSpYGWYLLlGCVllYlLIQ+Lst+h.RulpsRpoptshospDP-pVVRRQEAlEAARhRMQE-LDA+AscFKEKQ+QLEEEKRRQKIEhWDRMQEGKSYK.sSu+lsp-uosEsSoSSul.KPKo...EKKPLRuoGYNPLTGsGGGSCSWRPGRRGPSSGG ..............................h...........................h..sls.hLusYGWYll.hsslllYllhp+.lptc.h...+......shp.p...pp....p..t.s...t..s.s....h-P-sVs++QEAlt....AARh+.MQEELsAp..s..c....ca....+...EK....+phEEEK......R+pcI......chW-shpp..G+u.h+..st.p.....tppss..tsu......suosh.K.K....cp+sL.RsusY..NPLsGp.uGussuaRPs+Rs.sstGG..................................................... 1 25 28 42 +6765 PF06937 EURL EURL protein Moxon SJ anon Pfam-B_14914 (release 10.0) Family This family consists of several animal EURL proteins. EURL is preferentially expressed in chick retinal precursor cells as well as in the anterior epithelial cells of the lens at early stages of development. EURL transcripts are found primarily in the peripheral dorsal retina, i.e., the most undifferentiated part of the dorsal retina. EURL transcripts are also detected in the lens at stage 18 and remain abundant in the proliferating epithelial cells of the lens until at least day 11. The distribution pattern of EURL in the developing retina and lens suggest a role before the events leading to cell determination and differentiation [1]. 22.00 22.00 22.20 23.80 20.60 21.90 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.99 0.70 -4.98 6 69 2009-01-15 18:05:59 2003-08-13 12:32:25 6 2 45 0 39 56 0 244.50 59 93.19 CHANGED MpEEEQFVsIDLNDDNICSVCKLGTDp-TLSFCHlCFELsIEGVP+SsLLHTKSLRGH+DCFEKaHLIANQDCsRSKhS+SsYEtVKsIlSKKINWIVQYAQNKDlso-oEsSKsoQH.LhsaRHQs-+KLLPQ.DSQVPRYSuKWh.EuNuuuhSshuQplLEp+csp-FsLGhLppssst......LWs.sp.spsQKpEpssSus.sp.QRpasHaSREEL.NpMoltEL+QLstpLLpQIQ-VFEELottVQEKDSLuSELHVRHlAIEQLLKNCSKLPCLQhGRAGhKu ...............................................................................MNEEEQFVNIDLNDDNICSVCKLGTD+ETLSFC.HlCFELNIE...GVPKSsLLHT+SLRGHKDCFEKaHLIANQcCP............RSKLSKST.YEEVKoILSKKINWIVQYAQNKDlDSDSECSK...ssQH.pL...hsFRH+s-cKLLPQFDSQVP+YSAKWl.-GssuulSsps.QpIL...EppcssDFtLuhLpsssus..........LWs.sp.s.psQ+pEcs.us..stsssppp.p.+YSREEL.ssMolsElcQLsscLhpQIQ.-VFE-LsttVQEKDSLuSpLHVRHlAIEQLlKNhSKLP....pLQh.GRs.Gh+.................................................................... 0 7 9 19 +6766 PF06938 DUF1285 Protein of unknown function (DUF1285) Moxon SJ anon Pfam-B_15060 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. The structures revealed a conserved core with domain duplication and a superficial similarity for the C-terminal domain to pleckstrin homology-like folds. The conservation of the domain- interface indicates a potential binding site that is likely to involve a nucleotide-based ligand, with genome-context and gene-fusion analyses additionally supporting a role for this family in signal transduction, possibly during oxidative stress. 25.00 25.00 32.20 31.20 24.90 24.30 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.93 0.71 -4.23 81 468 2009-01-15 18:05:59 2003-08-13 12:35:09 6 2 462 3 150 424 462 143.40 43 74.16 CHANGED hcItpDGoWaYpGoPIsR.sLV+LFSolL++-s-....caaLVTPVEKlsIpVEDAPFlAV-hchp.t.puptp.sLpFpTNluDhlhsG.s-HPLRlp...............................hssps..sEs.sPYlpVRss......L-AhlsRslaYcLl-luppps...stshhulhSuGthFs ...hcIptDGoWaY.GoPIsRtsLV+LFo.olL.++-sD.....caaLVTP.VEKlsIcV-DAPFlAV-hchp.s.ps....psp.s.LpFpT.NlsDhlpsss-H.PLRht...............................hpsts.....sp..pPYltVRss......LcAhlsRslaYcLVph.upttc..........pG.thhulhSuGthF...................................................................... 0 44 85 116 +6767 PF06939 DUF1286 Protein of unknown function (DUF1286) Moxon SJ anon Pfam-B_15105 (release 10.0) Family This family consists of several hypothetical archaeal proteins of around 120 residues in length. All members of this family seem to be Sulfolobus species specific. The function of this family is unknown. 25.00 25.00 32.50 45.10 24.30 23.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.83 0.71 -3.90 8 53 2009-01-15 18:05:59 2003-08-13 12:41:15 6 1 24 0 15 48 0 109.80 56 66.48 CHANGED MKLpTHYVFohGLLTLlsShhh.......sFYhSLllSullSllGNoLIDRLGHpEIpspYGh.IssRTPLTHThPRSlhWGLlsulP.lIhhLtahYGa...........phlhhlLlsGllsGPSHMLL ...........M+L+THYlFSsGLLTLLsShlh.....p.FYhuLllSullSVlGNoLIDRlGH+EItT+YGh.IPVRTPLTHTlPRSVlWGlloslP.hIlLLhaaYGh...........ppllLllLLsGVlVGPSHMLL. 0 4 6 14 +6768 PF06940 DUF1287 Domain of unknown function (DUF1287) Moxon SJ anon Pfam-B_15116 (release 10.0) Domain This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. This family is related to Pfam:PF00877. 25.00 25.00 38.30 38.20 22.00 19.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.24 0.71 -4.83 25 504 2012-10-10 12:56:15 2003-08-13 12:43:34 6 2 497 0 44 231 32 160.30 69 78.88 CHANGED llsuAcpplsssltYDsuYhplsYPsGDVPtspGVCTDVVlRAhRp.hslDLQchVHEDM+pNFusYP..+hWGLp+PDsNIDHRRVPNLpsaFpRputsLslotss.......p-YpsGDIVoWtL.sss.LsHIGIVSD+cpssGh.PhllHNIGsGsp..EEDhLFpa....cI...sGHYRa ............IADGARpQIGsTLFYDPAYhpLoYPGGDVPpERGVCSDVVIRALRS.QcVDLQKLVHEDMAKNFApYP..QKWpLKRPDSNIDHRRVPNLETWFoRHsKT.....+PhSKNs.......SDYQAGDIVSWRLDN....G....LAHIGVVSDs.hA..R.D.GT.PLVIHNIGAGAQ...EEDVLFuW....+M...VGHYRY...... 0 17 30 38 +6769 PF06941 NT5C 5' nucleotidase, deoxy (Pyrimidine), cytosolic type C protein (NT5C) Moxon SJ anon Pfam-B_14848 (release 10.0) Family This family consists of several 5' nucleotidase, deoxy (Pyrimidine), cytosolic type C (NT5C) proteins. 5'(3')-Deoxyribonucleotidase is a ubiquitous enzyme in mammalian cells whose physiological function is not known [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.28 0.71 -4.79 6 770 2012-10-03 04:19:28 2003-08-13 13:06:10 7 7 677 18 215 585 488 168.80 25 85.77 CHANGED R.lhlhVDhDGVhADFhGthlphasccFtcs..lh.E-hpu.ashcpaG.hpPspt-plapa.hpp.uFFtsLEPlPGAh-sL+clusht.c.VhIsssshh+Y.H..ult-KhpWl-pHhshl.htsllls+-KslVtuDlhIDDpPcNltphps.sh.pILFsssaN+Hlc...sphRssSWp-sachI..lpuKth .....................................................................p....lslDhDtVLsDh....hst....hl....ch...hs..........at....t..p........l..p.........h.c..t..h....p...s.....h......h...c.t.......................-..ht.....t.h......h..hph...h.cp..s.s..a...F..p.......p...l...c.s...h..P.s.A....E...sl.+cLs.c...p.........h...........l.a.I.so.sshch.p..............sh.p..-........K...h.c..W.Ltc.a....hs........h..........l....s.............p.....c..h...l.....h..........s.....t.........c.......K.......s..........l.........l.........p......u...........D........h....L........IDD..pPcp..h.......t................h..t..s..t.s.......lha.ss......saN..p...p...........h.RlpsW..p-h.t..........ph.................................................................................... 0 84 130 168 +6770 PF06942 GlpM GlpM protein Moxon SJ anon Pfam-B_15323 (release 10.0) Family This family consists of several bacterial GlpM membrane proteins. GlpM is a hydrophobic protein containing 109 amino acids. It is thought that GlpM may play a role in alginate biosynthesis in Pseudomonas aeruginosa [1]. 25.00 25.00 25.10 27.50 22.20 24.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.27 0.72 -3.79 14 669 2012-10-01 21:19:26 2003-08-13 13:30:26 7 1 664 0 57 200 0 106.30 68 95.42 CHANGED lKsLlGAhsVLlIulLSKoKsaYIAGLVPLFPTFALIAHYIVGoERuh-sLRsThlFGlaSlIPYhsYLhulYYFohhhpLstsLssAslsWllsAslLlhsWs+hp ....IKAALGALVVLLIGlLuKTKNYYIAGLIPLFPTFALIAHYIVASERGIEALRsTIlFSMWSIIPYFlYLloLWYFoGhMRLPsAhlGuVsCWGlSAWlLIhCWIKhH......... 1 7 19 38 +6771 PF06943 zf-LSD1 LSD1 zinc finger Moxon SJ anon Pfam-B_15249 (release 10.0) Domain This family consists of several plant specific LSD1 zinc finger domains. Arabidopsis lsd1 mutants are hyper-responsive to cell death initiators and fail to limit the extent of cell death. Superoxide is a necessary and sufficient signal for cell death propagation. LSD1 monitors a superoxide-dependent signal and negatively regulates a plant cell death pathway. LSD1 protein contains three zinc finger domains, defined by CxxCxRxxLMYxxGASxVxCxxC. It has been suggested that LSD1 defines a zinc finger protein subclass and that LSD1 regulates transcription, via either repression of a pro-death pathway or activation of an anti-death pathway, in response to signals emanating from cells undergoing pathogen-induced hypersensitive cell death [1]. 20.20 20.20 20.30 20.20 19.70 20.00 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.87 0.72 -6.91 0.72 -4.33 23 365 2009-09-10 17:13:47 2003-08-13 14:14:43 7 6 52 0 186 373 2 25.00 54 21.25 CHANGED Cs....GCRslLtYPhGAsuVRCuhCpsV .......Cu....uCRslLhYPhGAs..SV+CulCpsV.. 0 32 115 160 +6773 PF06945 DUF1289 Protein of unknown function (DUF1289) Moxon SJ, Eberhardt R anon Pfam-B_15170 (release 10.0) Family This family consists of a number of hypothetical bacterial proteins. The aligned region spans around 56 residues and contains 4 highly conserved cysteine residues towards the N-terminus. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 20.20 20.20 20.30 20.50 20.00 19.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.81 0.72 -4.49 164 1514 2009-01-15 18:05:59 2003-08-13 14:58:21 8 8 1024 0 413 1099 1383 49.30 37 55.23 CHANGED pSPClslCphst.sslChGChRoh-EItpWpphsssc..+ptlhp..plstRtst .............SPClu..lCph-t.........pshCpGChRot-ElhsWpphocsc..+ptVhp.hhppRh..h.................. 0 68 178 296 +6774 PF06946 Phage_holin_5 Phage holin Moxon SJ anon Pfam-B_15309 (release 10.0) Family This family consists of several Listeria bacteriophage holin proteins and related bacterial sequences. Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. It is thought that the temporal precision of holin-mediated lysis may occur through the build up of a holin oligomer which causes the lysis [1]. 22.40 22.40 22.90 22.60 22.30 22.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.13 0.72 -3.82 2 44 2009-09-10 23:38:34 2003-08-13 15:04:36 6 1 41 0 4 35 2 88.70 41 94.32 CHANGED MEFGKELLVYMTFLVVVTPVFVQAIKKTELlPSKWLPTVSILlGAILGALAT.LDGSGSLATMIWAGALAGAGGTGLFEQFTNRuKKYGcDD. .............................plLhahohLsllssl..h..VQuIKK.T.cl..lPsKalPsVSlllGslLGAlAs.lss.ss.sLsshlWA...Gu....lAGhuuTGLFEthTp..R.......................... 0 1 1 4 +6775 PF06947 DUF1290 Protein of unknown function (DUF1290) Moxon SJ anon Pfam-B_15248 (release 10.0) Family This family consists of several bacterial small basic proteins of around 100 residues in length. The function of this family is unknown. 24.40 24.40 39.60 39.60 24.30 24.30 hmmbuild --amino -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.95 0.72 -4.08 36 498 2009-01-15 18:05:59 2003-08-13 15:07:14 7 1 497 0 137 265 5 88.80 52 78.09 CHANGED slPstausYLulAllAALDolFGulRAtl-cpFDsclFloGFFuNslLAuhLsaLGDpLGV.s.LahAuVlVFGlRlFsNhAhIRRhLlc .......lPsshpPYLsIAllAALDAlFGulRAhLccpFcs+VFVsuFhuNVllAALlValGDpLGV..s.L.sAslVVhGlRIFsNsAuIRRhlh.t....... 0 58 106 124 +6777 PF06949 DUF1292 Protein of unknown function (DUF1292) Moxon SJ anon Pfam-B_15310 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown. 21.30 21.30 21.30 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.77 0.72 -3.63 79 1823 2009-01-15 18:05:59 2003-08-13 15:12:29 6 4 1459 0 278 887 96 76.20 32 73.45 CHANGED EpGsEhphpllhsh-s.-..s.ppYlllhPht......pp.-p....t...-lhhhph.tp...--s...spttLhsl..EsD.-Ea-hVtcshpshh.sE .............EpGNEp..ha-llh.sh-s.E.....s.KpYllLhPss..............tp----.............ph...El.shph.ts...--s.....sp...u.cLtsI...EsD..-EW-hlpElasohh-E....................... 0 123 207 245 +6778 PF06950 DUF1293 Protein of unknown function (DUF1293) Moxon SJ anon Pfam-B_15399 (release 10.0) Family This family consists of several bacterial and phage proteins of around 115 residues in length. The function of this family is unknown. 25.00 25.00 25.00 36.00 24.50 15.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.31 0.71 -4.19 4 44 2009-01-15 18:05:59 2003-08-13 15:18:40 6 1 39 0 3 27 0 108.10 62 98.04 CHANGED MA+s.VFVLGIsIhWNstpG-p.ApLNlSRPL+pVNuEKFKRRTlGEsGDVNPpaDQPLhID+cYAh+LE+oGAhVPRREY-lcltlNP-DPLAGuIVsELIPVDsElKKHFpASLK .........MAp....lhGhsIph.stpG....so.ApLNV.RPlcpVNsEKFtphslG.ssDl.P..cQPLhI-.sYAhhL.cTtAhVPpREYplRht.NP-DPLt.slssELIPhDp-lKKaFptohK.......... 0 0 0 1 +6779 PF06951 PLA2G12 Group XII secretory phospholipase A2 precursor (PLA2G12) Moxon SJ anon Pfam-B_15422 (release 10.0) Family This family consists of several group XII secretory phospholipase A2 precursor (PLA2G12) (EC:3.1.1.4) proteins. Group XII and group V PLA(2)s are thought to participate in helper T cell immune response through release of immediate second signals and generation of downstream eicosanoids [1]. 32.50 32.50 32.90 37.40 28.00 32.30 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.55 0.71 -4.65 8 176 2009-01-15 18:05:59 2003-08-13 15:23:26 6 5 97 0 98 157 1 169.50 40 81.12 CHANGED sLuL.hL........ssutusc............EscssDWhhsLpoIRsGl+plcsYhsuAL-LlGGcDGlCQYKCpsGps.....PlPRPGY+sP.PNGCGS.hFGl+....hDl.GIPu..MTKCCNQHDhCYDTCGpsKpcCDpcFphCLcsICsDlpKoLGhspsVc.ACEosVchLFsoVhpLGCKPYL-SQRuAClCphEEKsEL .............................hhhhhhh.................................pt.hsDh..sltslRsuhcplptYhsuhl-Lh.G...G.psGhC.p.Y.+....Cp.......Gpt.....PhP..R....uY+sstPNGCG...S.hhGlp....hsh.GIP..u..hTKCCNpHDhCYDTC........Gs.sKtcCDtcFphCLppIC..s..cl...p+.o..L......G..............hsppVp.......A.C-osschlFssVh.p.LGC+Pa..hsSQ.RsAChCt.E-+t-L........................... 0 25 39 62 +6780 PF06952 PsiA PsiA protein Moxon SJ anon Pfam-B_15432 (release 10.0) Family This family consists of several Enterobacterial PsiA proteins. The function of PsiA is unknown although it is thought that it may affect the generation of an SOS signal in Escherichia coli [1]. 25.00 25.00 27.70 27.70 23.30 23.30 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.62 0.70 -5.30 7 330 2009-01-15 18:05:59 2003-08-13 15:30:40 6 2 215 0 9 171 0 217.40 69 97.79 CHANGED M.spSpALVsLpstpQAAhpAlhhsEppRcpGspLsuhPas+sFhRlL.sGSuRIssoshRpIsuhhhsscp+hssL.Qh.tALDhLIpStGEhCPLPLohDVtucLFPpVhatcs-RRhp+pslshpRp.RpcuRcsEQphhhhQNLLuQA.sELsFpSPETVsoWYsRWSDc.h-tp-LtshFWpWpsRFsSLsuh-hhphps-PLatVMaEl.hhsREssctlRthERW.VPNKLts .........MSARSpALVPLSsEQQAAhpAVApTEcRR+QGsTLu..taPYAuAFFRCL.NGSRRISLoDLRFFhPuLTsEEhHGNRL.QWLhAlDhLIEopGEVClLPLPuDAucRLFPSVhFRhpERpR+KosLsh..QKYSRQpsREAEQ+thtYQsLlAQApIELAFHSPETVGSWauRWSDc.Vs.....EHDLETlFWpWspRFPSLuuh-RapWQ-.PhWpVIhEAuhAuR-AsctVRphERWMVPNKLc.p.................................................... 0 1 1 6 +6781 PF06953 ArsD Arsenical resistance operon trans-acting repressor ArsD Moxon SJ anon Pfam-B_15383 (release 10.0) Domain This family consists of several bacterial arsenical resistance operon trans-acting repressor ArsD proteins. ArsD is a trans-acting repressor of the arsRDABC operon that confers resistance to arsenicals and antimonials in Escherichia coli. It possesses two-pairs of vicinal cysteine residues, Cys(12)-Cys(13) and Cys(112)-Cys(113), that potentially form separate binding sites for the metalloids that trigger dissociation of ArsD from the operon. However, as a homodimer it has four vicinal cysteine pairs [1]. 20.70 20.70 20.70 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.58 0.71 -3.95 38 439 2012-10-03 14:45:55 2003-08-13 15:32:12 6 7 386 8 103 319 11 115.90 37 90.33 CHANGED Mp+..lplFDPAhCCSTGVCGs-VDppLlpFuADlcWl.KppGlplcRaNLuppPhuFscNssVpshLcpuGt-sLPllLVDGpllhsGcYPoRpEL.....spahGlshspstts...........................ssuC.Cuusst....CC ..........................................................MpplplF-PAhCCSTGVCGsplD.tLlphusshphl...+.p..p.........Glp.lpRaNLuppP.tFspNptVpphLpppGt-sLPlhLVDGclshsGpYPopcEl.....ucahulsh.p.........................................sssC....Cu.st.......CC........................................ 0 45 72 86 +6782 PF06954 Resistin Resistin Moxon SJ anon Pfam-B_15476 (release 10.0) Family This family consists of several mammalian resistin proteins. Resistin is a 12.5-kDa cysteine-rich secreted polypeptide first reported from rodent adipocytes. It belongs to a multigene family termed RELMs or FIZZ proteins. Plasma resistin levels are significantly increased in both genetically susceptible and high-fat-diet-induced obese mice. Immunoneutralisation of resistin improves hyperglycemia and insulin resistance in high-fat-diet-induced obese mice, while administration of recombinant resistin impairs glucose tolerance and insulin action in normal mice. It has been demonstrated that increases in circulating resistin levels markedly stimulate glucose production in the presence of fixed physiological insulin levels, whereas insulin suppressed resistin expression. It has been suggested that resistin could be a link between obesity and type 2 diabetes [1]. 25.00 25.00 25.30 25.90 23.70 24.80 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.20 0.72 -10.72 0.72 -3.71 11 72 2009-01-15 18:05:59 2003-08-13 16:39:31 6 2 28 12 30 67 0 82.10 54 76.16 CHANGED ssopCol-pll-cKIK-sLuphp..Pssl.....pKp.LSCsSVpopG+LASCPuGhsVTGCuCGauCGSWDIppcsTCHC.QCu.sl..DWTsARCCpL ..................sto.ssl-phlpcKIp-hhssl......psl........pph.LpCpSVpSpGcLAoCPsGhsVTGCuCG.uCGSWDlR.s.-TTCHC..QCu....sh..DWTsARCC+l... 0 6 6 7 +6783 PF06955 XET_C Xyloglucan endo-transglycosylase (XET) C-terminus Vella Briffa B anon Pfam-B_20045 (release 10.0) Family This family represents the C-terminus (approximately 60 residues) of plant xyloglucan endo-transglycosylase (XET). Xyloglucan is the predominant hemicellulose in the cell walls of most dicotyledons. With cellulose, it forms a network that strengthens the cell wall. XET catalyses the splitting of xyloglucan chains and the linking of the newly generated reducing end to the non-reducing end of another xyloglucan chain, thereby loosening the cell wall [1]. Note that all family members contain the Pfam:PF00722 domain. 20.50 20.50 22.20 21.10 19.20 17.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.04 0.72 -4.26 134 896 2009-01-15 18:05:59 2003-08-13 17:05:34 7 5 108 13 411 924 0 52.80 34 18.85 CHANGED C................sssssW.hs.pp......Ls..spptpph+WVpppaMlYsYCsDppRaPts..hPs.EC ..............................ttsttW.hs..pp........Ls.sppppphcWVpppaMlYsYCsDpp.R.aPt......s...hPs.EC........ 0 61 251 337 +6784 PF06956 RtcR Regulator of RNA terminal phosphate cyclase Studholme D anon Pfam-B_17814 (release 10.0) Family RtcR is a sigma54-dependent enhancer binding protein [1] that activates transcription of the rtcBA operon. The product of the rtcA gene is an RNA 3'-terminal phosphate cyclase [2]. This domain is found at the N terminus of the RtcR sequence. RtcR, and other sigma54-dependent activators, contain Pfam:PF00158 in the central region of the protein sequence. 22.70 22.70 22.80 23.00 21.20 22.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.19 0.71 -4.95 17 470 2009-01-15 18:05:59 2003-08-13 17:23:00 6 3 455 0 52 277 17 182.20 74 34.98 CHANGED h++pVslGFLGosLD.....tGpuspRWp+WRPoVuLCQp.D.L.lDRLELLH..ss.................RtpsLA-pltpDIspVSPcTEVchp.hplcsPWDFcEVYusLaDFscuYsFDsEpE-YLlHITTGTHVAQIChFLLsEuRalPA+LlQTSPsRc...ptppssGohslIDLDLSRYDpIAsRFspcpp-sluFLKSG .......................MRKTVAFGFVGTVLD....YAGRGSQRWSKWRPTLCLCQQ.ES.LVIDRLELLH..Ds.................RSRSLFETLKRDIASV.SPETEVVuVEIELHNPWDFEEV...YACLHDFARGYp..FQPEKEDYLIHITTGTHVAQ.ICWFLLAEARYLPARLIQSSPPRK.......KEp.P.+....usG-VTIIDLDLSRYNAIASRFAEERQQTLDFLKSG............... 0 14 22 37 +6785 PF06957 COPI_C Coatomer (COPI) alpha subunit C-terminus Vella Briffa B anon Pfam-B_20121 (release 10.0) Family This family represents the C-terminus (approximately 500 residues) of the eukaryotic coatomer alpha subunit. Coatomer (COPI) is a large cytosolic protein complex which forms a coat around vesicles budding from the Golgi apparatus. Such coatomer-coated vesicles have been proposed to play a role in many distinct steps of intracellular transport [1]. Note that many family members also contain the Pfam:PF04053 domain. 19.70 19.70 21.30 20.60 19.50 19.10 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.43 0.70 -5.94 5 402 2009-01-15 18:05:59 2003-08-13 18:19:08 6 18 285 10 268 405 8 362.60 35 33.49 CHANGED KGhFEGuLhohuttuAsu+tsh.lss-csA.hs.tshus-DWG.EDsD.lslDt....DuhspucDGLu-s--GE.sc-s-EEGGWDV.E-DLsLPPELDsPK.suGsA-SuhFVsPspGhuVSQpWsNsSsLAuDHlAAGSF-TAMRLLHDQLGVlNFuPaKsLFLcsYAuSRooaRAhusL.Pu..lslaPpRNWSEou.KN..usPAluaKLsQLscRLQuuYQhTTsGKFsEAVEKF+SILlSIPLlVVDoKpEVAEAcQLIsIsREYllGLpMElcRK-LPK-sL-QQKR.sELAAYFTHCcLQPVHhILsLRSAlNlFFKhKNaKTAAoFARRLLELuP+P-VApQsRKVLQACE+NsTDusQLNYDh+NPFVVCGuoYVPIYRG+P-VoCPYCGAsFsPshEGplCTVC-VApIGKDulGLRISPLQsR .....................................................................................................................................................................................................................................h....s.........p........t....tt...p.....s..........t.tt.....s............t......t.t--...s..u..G.W..Dh.......--.......D..lpls...-.........h...ph....s.t.....................s...s.t......p.s..s.hss....sstG.......s......sp..h...Wsp.N.S.tLssDHltAGuF-oAhpL.LpcQlGllsFtPhK.hFhphatsu+ohh.uhssh.Ps....lh.s.h.pRshp..-s..s....pp.....shPslsh.plsp.Lh.p.c...Lp.t.u.Yph.hssuKhp-AlctF+sILholsll.s.Vcs...cpE..hsEsppllplsREY..llu.Lph..ElcR+p..l..s.......p.....s...sh.......c.......p..........+.....R.hE..LAAYFT+spLQssHhhLsLpsAhshhaKhKNatoA............usFAc...RLL....-.....h...........u.....s......p......s......c.....h.............A.p...p.....sRKlhs.ts.E.+..s..s..p..D..shpl.s......a..D............a......s.......s...F......slCuu.oas...P..I..Y.c......Gp.ss.......hpCPhsGupYp.s.pa+..GplCplsplsplGtss.GL+l................................................................... 1 98 152 226 +6786 PF06958 Pyocin_S S-type Pyocin Vella Briffa B anon Pfam-B_20020 (release 10.0) Family This family represents a conserved region approximately 180 residues long within bacterial S-type pyocins. Pyocins are polypeptide toxins produced by, and active against, bacteria. S-type pyocins cause cell death by DNA breakdown due to endonuclease activity [1]. 22.50 22.50 23.00 22.50 21.70 21.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.85 0.71 -3.82 39 258 2012-10-01 19:36:59 2003-08-14 10:59:20 7 25 167 8 54 270 1 134.20 31 24.52 CHANGED hpphttstsslshsVRhph...pstsGph..p........lhsl+T..s.tsssspVsVhps.thsppsshap........sssssshslhaoPssss.....tssssosss........................sss.sssshsssslststsp...spshPs.ptt-hcD...hIlsFP..tsSGltPlYlh ...........................phAtstuossh.VRhph...ps.sGp...p........lhul+T..utss.shspV.Vtth.phstpostYc......hh.scs.sss.slhWTPssss.....hpssspTuss........................sss.....hsshhstslsssssp...stshPhs-.c-acD...aIlla.P..hsSGl.PlYlh................. 0 3 12 30 +6787 PF06959 RecQ5 RecQ helicase protein-like 5 (RecQ5) Vella Briffa B anon Pfam-B_20083 (release 10.0) Family This family represents a conserved region approximately 200 residues long within eukaryotic RecQ helicase protein-like 5 (RecQ5). The RecQ helicases have been implicated in DNA repair and recombination, and RecQ5 may have an important role in DNA metabolism [1]. 25.00 25.00 26.00 26.30 19.60 23.70 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.18 0.70 -4.69 2 54 2009-01-15 18:05:59 2003-08-14 11:51:20 6 4 30 0 22 45 0 185.70 53 21.94 CHANGED KSCuAtAE.sEPs-YDIPPsSHVYSLKPKRlGAGFsKGsCsFQTATELhtpo+.pcQAPps.htGtpEPPuhsCsL.DEDtScPhPG.ptcs.GuSsphGsPSPEKKsKuSStGS..AKuRASKKQQLLATAA+KDSQsIsRFhCpRsESPsL.ASsPcuEsAsPSCtsV......PtKhT.pEsGAtGH.sAs.QTEt..RERP ..................SCuApucssEPsEYD.IPPASpVYSLKPKRVGAGFPKGSssFQTATELhcpop..p-QAPps..psGcpEPPSpsCsL.DEDpScPLPGPp..sEsPGuSApsGsPSPEKK.......sKuSosGSslAKuRASKKQQLLAsAA+KDSQsIuRFFC.p.R.s.cS.Psh.s.SuPcA.Es.As..PS.Ct.GVpus..hsPEKho.cED.GA..tG+.sAssQTEt..REt.................................. 0 1 1 6 +6789 PF06961 DUF1294 Protein of unknown function (DUF1294) Vella Briffa B anon Pfam-B_3405 (release 10.0) Family This family includes a number of hypothetical bacterial and archaeal proteins of unknown function. 25.00 25.00 27.20 25.50 24.50 23.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.64 0.72 -4.01 144 1488 2009-01-15 18:05:59 2003-08-14 13:39:17 8 8 1458 0 268 969 50 54.40 44 47.80 CHANGED sllsFhlYuhDKptAp....cspW........RlsEpoLhlluLl.GGhsGAhlutphhRHKTpKttF ....NllsFhlYuhDKppAc.....+.ptW....................RlPEpsLhlhulh.GGshGAhlutphF+HKTpKhhF......... 0 88 162 221 +6790 PF06962 rRNA_methylase Putative rRNA methylase Vella Briffa B anon Pfam-B_3461 (release 10.0) Family This family contains a number of putative rRNA methylases. Note that many family members are hypothetical proteins. 20.70 20.70 20.70 21.20 20.60 20.60 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.81 0.71 -4.48 17 1285 2012-10-10 17:06:42 2003-08-14 13:50:07 7 7 1268 14 179 706 33 135.50 43 72.68 CHANGED +VYuFDIQcpAlppTpp+lcphh...p.....lcLlhsuHEplpcal.p....sl+uulFNLGYLPuuDKpIsTpscoTlpAIpclLsllp.sGlIslVlYaGH-tGchEKcAVhpFsppLsQpcapVhpaphlNQhNsPPhllhIEK ..............pVYuFDIQcpAlcpTpp+Lcptt...h.s.....................spLlhsGHE.s.lc...p...alsc.................lcA...uIFNLGYLP.......u..u..D..Ks.....llT+.PpTT.lpAlppllpl...LshsGhIslllYaGHpsGphE+-AVLcasppL.sQppap.V.hpYphlNQhN.sPPallhIEK....................... 1 74 125 155 +6791 PF06963 FPN1 Ferroportin1 (FPN1) Vella Briffa B anon Pfam-B_3588 (release 10.0) Family This family represents a conserved region approximately 100 residues long within eukaryotic Ferroportin1 (FPN1), a protein that may play a role in iron export from the cell [1]. This family may represent a number of transmembrane regions in Ferroportin1. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.28 0.70 -6.10 19 342 2012-10-03 03:33:39 2003-08-14 15:26:02 7 7 156 0 257 446 7 355.50 26 83.29 CHANGED ppslhhLYluHhLusWssRMacFussLalsplassoLLhsuVYuLVcohSslllushlGphlD+.sRl+..slRpsllhQplshslusuhhhhlhhhsp.h...............shlhhslasssslluslspLAssAsolslERDWlVVlu.st.sssLsphNushRpIDLhsclluPllsuhlhuhhShphushhlhshNlsshhlEahhlhpVYptsPtLtcppctppscsptspp...............................tt.t.phthhpphtphhcps.hssWptYhpQsVFLAuhuLuLLYhT.VLSFGshMssaLhpp.GhsshhluhhRususlsGlsGTashPhltp+lGhlRoGlaulhhQhsCLhssVsuhhhssss.................................................................stlsshhLhsGVAhSRlGLWuFDLuVppllQ-...................slsEucRshluusp.uLQslh-LLpashsIhhspPcpFthhshISausl ....................................................................t.....hla.h.uh.huth......ssRhapFuhslh.hhtl.a..s......t...sLh.sulhuhh.shshhhhushlGphlD.p..s.Rht...shph.l....................hhpphshhh.......sshhhhhh..h.h...............................th.hh..h...hhhh.hhhhusltpLsshs.tl....slpRDW.................l...........lsls....t....t........p.....tttL.st.....hNuhh+pl................D.h.splhuPhhhu..lhs...h.........u..........................h............h..shhhh.shshhShhhchhhh.tl.Yp.ss.t....Lt.ptt.t.tt.......................................................................................................hht.th..t....htth......htsa......Yhp.ps.sh.....h.....suhuhu....hLYhT..VL......u...........a.ss.hhssa....hhpp...Ghss.hluhh...huhuulhulh.uThhhshl..p.+h.G..h.l.+sGh....hu.hhhQ.....h.hLhhslh.sh...hh.s..................................................................................hh.sh..hhhh.ul.h.huR..............h.............GLW.aDLsh....tplh...Qp...................tl...tspRshhsuspsuhp.hh-lhthhh..sl.hhspPptFthhhhlSh.hV......................................................................................... 0 80 137 210 +6792 PF06964 Alpha-L-AF_C Alpha-L-arabinofuranosidase C-terminus Vella Briffa B anon Pfam-B_3625 (release 10.0) Family This family represents the C-terminus (approximately 200 residues) of bacterial and eukaryotic alpha-L-arabinofuranosidase (EC:3.2.1.55). This catalyses the hydrolysis of nonreducing terminal alpha-L-arabinofuranosidic linkages in L-arabinose-containing polysaccharides [1]. 28.50 28.50 28.70 29.00 28.30 28.20 hmmbuild -o /dev/null --hand HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.15 0.71 -4.50 145 1287 2009-01-15 18:05:59 2003-08-14 16:17:07 7 28 613 70 462 1241 79 201.20 22 33.87 CHANGED DEWusWh...........................tss..thhtpt.ohcDALstushLssht+puDpV+hAshAQllNlls...............................slhhpss.t.phhhsssYashphaupats.ussLssplpss................................................................................................................................................................................tstsshshl..sssAsh...spsstp....lhltllNh.s.t.tshslplslp...............................Ghps......pssptphL...suss.hpspNoh....ppsppVtPp............pt....p....ssph.shsLPsh......S ............................................................................................................................sEassh.............................ts.....thhtpt.shpsALstushLhsht+p.uD...h...VchAsh..A.Llssls....................................sllhhss..t..phhtssoYashphaupp..ts.upsLssplps.............................................................................................................................................................................................................tthpssshlsss.Ash......spcstp.....lhl...t..l.lNhssp...........shslslslp...............................Ghps......pssptphL...s...u..s..s....hpsp.Noh....ppPpp.........VtPp..............ppth.p.ht..s.sph.phplsshS........................................... 0 167 325 398 +6793 PF06965 Na_H_antiport_1 Na+/H+ antiporter 1 Vella Briffa B anon Pfam-B_1828 (release 10.0) Family This family contains a number of bacterial Na+/H+ antiporter 1 proteins. These are integral membrane proteins that catalyse the exchange of H+ for Na+ in a manner that is highly dependent on the pH [1]. 19.80 19.80 20.50 20.40 19.10 19.10 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.40 0.70 -5.68 17 2392 2012-10-02 17:06:44 2003-08-14 16:48:31 7 9 1869 3 527 1704 1782 354.40 44 91.60 CHANGED lpcFlcsEuuGGllLhhuAslAlllANS.sLupsYhuhlcs.htht...h.hsholtpWINDGLMAlFFLllGLElKREhltGpLushppshLPslAAlGGMllPALIYhhlNhssPtshpGWAIPsATDIAFALGVLuLLGpRVPsSLKlFLhuLAIlDDLGAllIIAlFYossLshssLhhAhhslhlLhsLNRhsVppLhsYlllGllLWhslhpSGVHATlAGVhluhhIPhcttpu.....................................p..pSPlc+LEHtL+PWVuFhllPlFAFANAGVSluGhsls.l.ssPlsLGlslGLhlGKslGlFhhualAl+LsLAcLPcGhsWsplhuVulLCGIGFTMSlFIupLAFss....thhs.sKlGlLhGSlhuAllGhhhL .....................h.pcahpp-ssGGllLlhuullAhlhANS..s...........h.u.t.hY.pshh......c...h..............l......t......h....t............h......s......t........h.t......l......s...hslhh.WINDuLMAlFFhllGLElKRE..lh..p.GpLssh.cpAshPhlAAlGGMllP.....ALlYh..............h..............h..............N...h.......u..s.............s.............................s....h.pGWAIPsATDIAFA.LGlL.u.L.LG.s+lPhuLKlFLhuLAIlDDLGAIllIAlFY...T..s...s...L...u...h.hsLsl.Ahhs....ls..l.L..........h.l............L........N..........h........h........t...........l........p........p........h...hh....Y.l.ll..G.l.l.L.....WhslLcSGVHATLAGVlluhhIPhctp..p.s...........................................................................................puP.....h.c+lEHsL+PaVAalI..lP..lFAFANAGVslp.u.h...s...l.................s.sl......ss.............lsL....GIhhGLhlGKslGIhhh.saLul+ht.l...ApL.PpGhsap.plhululLsGIGFTMSlFIusLAFss........tst..h..hs.huKlGILh..GShhuAllGahhL.................................. 1 161 342 452 +6794 PF06966 DUF1295 Protein of unknown function (DUF1295) Vella Briffa B anon Pfam-B_3514 (release 10.0) Family This family contains a number of bacterial and eukaryotic proteins of unknown function that are approximately 300 residues long. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.79 0.70 -5.07 12 924 2012-10-01 22:51:20 2003-08-14 16:53:11 7 9 605 0 506 2016 1927 217.80 24 75.13 CHANGED hallsthptpsshlDhhWusshslluhhshhh......stu.hshRphllhhLsslWulRLuhalhhR.shptG..EDhRasphRcphstt.........phhh.hahhQulhlahluLPlhlstustss......sh.tshshlGhslalhGhhhEshuDtQL.sF.................KtsPtN+G+.hhssGLWpaoRHPNYFGEhlhWWulalluhss..hpsh..hslhuPlhhThLLlaVSGlPlhEtph.+phttt.ta+pYpcpT ......................................................................................................................................ahht.h.tp....t.hh.Dhhhuh...s...s.h.hshhh.hhh.......................tts....s.hp.ph...l...l...h.hh..s...hlWuhRLsh...a.hh..h.R...h.......h....t.........s.....-D..Ra........t.....th+..p..p..hs.........................h.thhh..ha..h..hQ..u.....l.h.l..h...h..l.u...l.Ph....h..h....s.........s.........t.....ss...................sh....s..h...h.s....h.l..u..h...s..l.......a...l....h.....G..h...h....h.....E..sl...u.....D......h....Q...h...h.p.F................................................+..p.....s....s...t.....s.............c...........G...........c.....hh..........s.....s.........G.L.W.pa..oRHP....N...YFG......Eh..hh..W...h....u...l......h..l.h.u..h....s.s...................ht......s..........h.........................h..s..l...........h....u...P.l.....h.....h.hh.l...L.h..hh..o...G.hsh.hEp....h..c.ph...............s..tYppYppps.................................................. 1 210 340 440 +6795 PF06967 Mo-nitro_C Mo-dependent nitrogenase C-terminus Vella Briffa B anon Pfam-B_6998 (release 10.0) Family This family represents the C-terminus (approximately 80 residues) of a number of bacterial Mo-dependent nitrogenases. These are involved in nitrogen fixation in cyanobacteria [1]. Note that many family members are hypothetical proteins. 19.30 19.30 20.30 47.80 16.70 15.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.92 0.72 -3.83 13 139 2009-01-15 18:05:59 2003-08-14 17:19:46 6 5 53 0 56 140 8 83.00 57 56.74 CHANGED -lLpPlRpWLDulcIpst+lAHhlC+LIPsQCPFERDlsLFGRhlhHIPPLCKLNPLYEplVuLRFRALsYLADECGEDIopYs ........lLpPlRpWLDsl-lcssclA+h..lC+lIPuQCPFERDIpLFG+plhHIPPLCKLNPLY-plVuLRFRALsYLAD-CGEDlotYh........ 0 5 41 56 +6796 PF06968 BATS Biotin and Thiamin Synthesis associated domain Finn RD anon Pfam-B_5417 (release 10.0) Domain Biotin synthase (BioB), EC:2.8.1.6 , catalyses the last step of the biotin biosynthetic pathway. The reaction consists in the introduction of a sulphur atom into dethiobiotin. BioB functions as a homodimer [1]. Thiamin synthesis if a complex process involving at least six gene products (ThiFSGH, ThiI and ThiJ). Two of the proteins required for the biosynthesis of the thiazole moiety of thiamine (vitamin B(1)) are ThiG and ThiH (this family) and form a heterodimer[2]. Both of these reactions are thought of involve the binding of co-factors, and both function as dimers [1,2]. This domain therefore may be involved in co-factor binding or dimerisation (Finn, RD personal observation). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.92 0.72 -4.14 127 5028 2009-01-15 18:05:59 2003-08-15 13:00:51 8 10 3397 6 1177 3436 382 97.40 30 26.94 CHANGED slNh..LpPh.pG......os.h.........ts.....t.lssp-hl+hlAhhRlhhPp.stlpluuG+pthhcc.h.s...lshhuuusuhhsGsY..........................hssss.cshscshphlpchG.h......ps ...................PINh....Lhss.pG.....Tsh............ts..spsls.sh-hl+hlA.shR.lhhPp.s.tlRlSuGR.p.hh.cp..hts.....hshhu.uuNu...l...h...hGsh........................................................................Lhosspps.cpDhphlpchGhp................................................................ 0 421 778 1014 +6797 PF06969 HemN_C HemN C-terminal domain Finn RD, Bateman A anon Pfam-B_833 (release 10.0) Domain Members of this family are all oxygen-independent coproporphyrinogen-III oxidases (HemN). This enzyme catalyses the oxygen-independent conversion of coproporphyrinogen-III to protoporphyrinogen-IX [1], one of the last steps in haem biosynthesis. The function of this domain is unclear, but comparison to other proteins containing a radical SAM domain (Pfam:PF04055) suggest it may be a substrate binding domain. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.95 0.72 -3.91 87 5381 2012-10-04 14:01:12 2003-08-15 13:44:13 11 4 3710 1 1096 3872 632 66.00 23 16.25 CHANGED lstp-thpEhlhhsLRh..ptGlshpphppphs.t.....h.t.h.tthlpphtpp..GLl....phss..stlplTppGhh ..........................stc-phcE.hhhsLRh..ppulshsphp.ppa.Ghs..........htphh...tp....lp...p...hhpp......GLl...............p.h.s.s.....stlplTpcGhh............................. 0 314 660 891 +6798 PF06970 RepA_N Replication initiator protein A (RepA) N-terminus Vella Briffa B anon Pfam-B_1808 (release 10.0) Family This of family of predicted proteins represents the N-terminus (approximately 80 residues) of replication initiator protein A (RepA), a DNA replication initiator in plasmids [1]. Most proteins in this family are bacterial, but archaeal and eukaryotic members are also included [2]. 30.00 30.00 32.70 31.90 29.90 29.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.60 0.72 -4.19 55 1268 2012-10-04 14:01:12 2003-08-15 15:21:35 6 6 685 0 75 856 25 73.30 38 29.74 CHANGED ppFaplPKsLhpsppY.cpLSs-AKlhYulLhDRhpLSl........+Ns....WlDcc.GplYhlaosc-Ltp..hLssuc.pKll+lKKEL ..................paaplPK.hLhp.s............pp.a..cplSs-AKlhYulLhDRhpLSh............+Ns......W..lDc-..GplYlla.ospcLhc..hLsp.uc.pKlh+lhcEL............. 0 29 43 58 +6799 PF06971 Put_DNA-bind_N Put_DNA-bind_C; Putative DNA-binding protein N-terminus Vella Briffa B anon Pfam-B_3389 (release 10.0) Family This family represents the N-terminus (approximately 50 residues) of a number of putative bacterial DNA-binding proteins. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.25 0.72 -4.20 17 1817 2012-10-04 14:01:12 2003-08-15 15:45:00 8 3 1640 24 349 1010 155 49.40 47 22.82 CHANGED ppIPcAshKRLPhYaRhLctLhppslcRlSScpLu-tlslsuupIR+DFS ........pIP.c.A.ThKRLPlYYRhlppLps..pG.l.c+lSSpplu-AltlcSAoIRRDFS. 0 159 268 317 +6800 PF06972 DUF1296 Protein of unknown function (DUF1296) Vella Briffa B, Eberhardt R anon Pfam-B_4035 (release 10.0) Family This family represents a conserved region approximately 60 residues long within a number of plant proteins of unknown function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.60 21.60 21.70 26.00 21.40 21.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.06 0.72 -4.01 7 106 2012-10-01 23:03:33 2003-08-15 16:05:18 6 3 19 0 70 100 0 57.30 55 7.87 CHANGED lPsth+phlQsl+ElVss..S-t-IauhL+EssMDPsEssp+LLpQDsFHEVKSKR-KKKE ....lPsusRKhlQulKEIVss.ho-t-IYAsL+ECsMDPNEssp+LLsQ.D.sFHEVKpKR-KKKE........... 0 13 40 54 +6801 PF06973 DUF1297 Domain of unknown function (DUF1297) Vella Briffa B anon Pfam-B_3819 (release 10.0) Domain This family represents the C-terminus (approximately 200 residues) of a number of archaeal proteins of unknown function. One member is annotated as being a possible carboligase enzyme. 23.80 23.80 24.90 31.30 23.50 23.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.09 0.71 -4.98 40 191 2012-10-10 13:17:03 2003-08-15 16:23:52 7 2 111 19 120 188 129 185.00 43 52.81 CHANGED EapcKhcplhcpGlIspcslcpspIEEYllGshashpaFYSPlpcc........l...EllGlDpRhEoNlDGhhRlPAppQl.ch....slpPpallsGphPsslRESLL.pVF-hG-+aVcAo+clhsPGlIGPFsLQoll...sc-...L-hlVF-lSsRIsGGTNlahs.GSPYStLhascshShGRRIA.......hEI+pAlcpscL-cllT ...............-a.cKhpphhcpGllppcslppshIpEYllGs.h.hpaFYSPlpsc.................l...ElhGhDpRaEoslDGlhR.lPAppQl.cl....plpPpallsGphPsslRESLL.cla-hG-+aVcuscclhs..P.GlIGPFsLpoll......sccL-llVF-lSsRIsuGTNhahs.GSPYShLhascPhShGRRIA.......hEI+pAlcpspL-cllT.............. 0 30 60 89 +6802 PF06974 DUF1298 Protein of unknown function (DUF1298) Vella Briffa B anon Pfam-B_4362 (release 10.0) Family This family represents the C-terminus (approximately 170 residues) of a number of hypothetical plant proteins of unknown function. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.68 0.71 -4.42 14 691 2009-01-15 18:05:59 2003-08-15 16:32:11 8 13 255 0 271 581 59 145.90 22 32.32 CHANGED KsSKCR..WGNahGhlllPFsl.uLcsDPL-Yl+pAKuhhDRKKpShEAhhoYhhh+hllKhFGhKsusslhpRhhsNTThsaSNllGPsEEISFhGHPIsYlAsSsYGpPpALsIHa.SYssKhhIsluVDsslIPDPH+LCDDhEESL+hhKsA ............................................................................s.........GN.hu..h..h..h..h..s.h......h....t......s...sP....l.phlpt.......h..p..t..th...p...t......t.+...p.....p...h...p......t......h.........h...h..........h.....t....h...h....h....h..h...h...u.......t...h...h....t....t...l.....h....t...t...h...............s......h........s.l..s...lSNV.PGPp-.....lhhsG.u......l..s..t..h.hs...s.s.h.s..t.s.p...........u....L..sl..olh..SYssp.lshulhssp..ssls...-spclssthtcuhppl.................... 1 71 176 236 +6803 PF06975 DUF1299 Protein of unknown function (DUF1299) Vella Briffa B anon Pfam-B_3488 (release 10.0) Family This family represents a conserved region approximately 50 residues long within a number of proteins of unknown function that seem to be specific to Arabidopsis thaliana. Note that many family members contain multiple copies of this region. 25.00 25.00 45.20 45.20 19.90 17.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.39 0.72 -4.10 4 21 2009-01-15 18:05:59 2003-08-15 16:40:27 6 4 1 0 0 21 0 46.50 84 17.30 CHANGED HDEas-stDQEAYlILSDDEsNGTAPTEKESQPpKEETTEVPKEEsV HDEHDETEDQEAYVILSDDEDNGTAPTEKESQPQKEETTEVP+Epph 0 0 0 0 +6805 PF06977 SdiA-regulated SdiA-regulated Vella Briffa B anon Pfam-B_2520 (release 10.0) Family This family represents a conserved region approximately within a number of hypothetical bacterial proteins that may be regulated by SdiA, a member of the LuxR family of transcriptional regulators [1]. Some family members contain the Pfam:PF01436 repeat. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.59 0.70 -5.43 24 899 2012-10-05 17:30:42 2003-08-15 17:25:23 6 9 524 1 91 519 74 223.90 40 78.35 CHANGED stulhLssY+ssl-.u+slsGlccs.lSuLTYsPppcoLFuVsNp.sspLlELShcG-lLR+lPLtG.FsDsEulpalusGphslsDERpppLhhlplsspTpslshs-...hpphsLGhs..tssNKGFEGlAaDstspRLhluKERsPhtlaclpu.......ss.sltsssptsh.t..pplhl+D...LSuLpaDtpoGHLLlLSDESRhllElD.tpGc.lShhoLhtGhsGLccslPQAEGlAhDccGsLYlVSE.PNLFYhFc ..............................................................................................................................................................................p.sh.hpsYcssls.tK.lsGlps.s.lSuLTass....pspp.LFushNp..P.........u..........t..l..V..ch....o.p.p.G.c.l........l.+p.hP.L........sh........h.........p........DhEsIEa.l.GsN.p...as.l.u.....-..ER..c.......h.....t..lhhhpl......................o.......t..s..p..l...t.......hh.cl.p.lshp....pspNpGFEGLAastpscpha.h.h......K..E...+.....pP.....lp....lhcspt.................................ssspLphucstslp........tthplp.D.......l......SGhp..a..s.tp.pspLllL.ScES+hl..hEls..pGc.hls.....hsLs+G..p..GLpc..sI.Q.uEGIAhD.s.p.s.slYIVS.E.PN.hFY+Fo.......................................................... 0 20 42 75 +6806 PF06978 POP1 Ribonucleases P/MRP protein subunit POP1 Vella Briffa B anon Pfam-B_7848 (release 10.0) Family This family represents a conserved region approximately 150 residues long located towards the N-terminus of the POP1 subunit that is common to both the RNase MRP and RNase P ribonucleoproteins (EC:3.1.26.5) [1]. These RNA-containing enzymes generate mature tRNA molecules by cleaving their 5' ends. 22.20 22.20 24.00 24.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.44 0.71 -4.63 38 354 2009-01-15 18:05:59 2003-08-18 13:52:12 6 10 265 0 265 358 4 151.60 31 21.10 CHANGED FlpuRphEIcslppuhppuKtstsoRsFQplPRpLRRRsASHNl+RlP+RLRsRAt+EMhpsss.....................ttphhh+hcst++l.ph.tp...............................................................................................ltp.s.spt+ap+RQcp......p..pWL.THlWHAKRh+Msc...............hWGaplPh..sPTpKsaRssaR.....tspcsslsaDhSYhss....ltlpGs ...............................................................................................................................................................................................................................................FhtuRthElpth.tuhpppp................sp..hsF.Qp..lPRphRRRshSHNs+RlP+.RhRthshpch.pstt.......................................h.p.c.ttt.................................................................................................................................................................................ht...p....ca..t+Rp.pp........p....hWL.THlWHAKRh+Mhc...............hWGatls...ps..stKsaRssaR..................t.pptslh.DhSYhssltlps.t..................................................................................................................................... 0 104 157 222 +6807 PF06979 DUF1301 Protein of unknown function (DUF1301) Vella Briffa B anon Pfam-B_8295 (release 10.0) Family This family contains a number of eukaryotic proteins of unknown function that are approximately 160 residues long. 21.70 21.70 23.90 23.70 20.20 20.00 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.76 0.71 -4.01 7 128 2009-01-15 18:05:59 2003-08-18 14:35:39 7 4 104 0 91 127 1 128.50 34 59.20 CHANGED uVKhFShSTSlsulsh.P.lLLcpuhthsu..hshtlhhhGlhGFFTFlTPlLLHhlTKtYVhcLaa-ssp-pYTAsThshhLpcppThF+.cDVslP-lsthFToFhscsKuLhVsPsLFPsPpcY.+lMGYDK .......................................VKhFShSTShhulshhPhl...hhpss..ths.u....lsh..p.shhsuh.luhFohh...TPhlLH.alT..KsYVh+L..aa.p...ss............o.D..........oYpAhThs.hlhpcppp...........sF+.sDV....p....lP.-.....ss.+hF...TTFhAcs+shhVssthF.ss.pca.+lMGY-............................ 1 28 43 70 +6808 PF06980 DUF1302 Protein of unknown function (DUF1302) Vella Briffa B anon Pfam-B_7023 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function that are approximately 600 residues long. Most family members seem to be from Pseudomonas. 19.90 19.90 20.70 20.10 19.50 19.70 hmmbuild --amino -o /dev/null HMM SEED 554 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.66 0.70 -5.94 64 345 2012-10-03 17:14:37 2003-08-18 15:53:48 6 3 208 0 139 351 210 566.70 31 94.05 CHANGED hsFshG.-...lpssaDoolShGsuhRspstspshh...........................................tssstssssDDGsLNac+.G-shSphlKuhp-LpL+assh.......GhFlRGphaYDhthp-sstp....................................s.ss.pscpttspsscLLDAaVatsasl..GcpP.sslRlGcQVlsWGEShF...ltsG...I.NshNPlDls.thptPGuElKEsllPsshl.suhuLocsLSlEuaYQ..acWcpotlsssGTaFSssD.hhucGsps..h............................h.hhss.ss.sh...............................shhh.css-tcuccsGQaGlslRahsttL..-TEaGhYahNYHs+hPhlussssshsshs......................................................................spYhhpY.sEDIcLaGhSFsTslG......ssuluGElSYRsstPl.tlsssplh...........huuhssts...................................shss...Gt.hp.......G...Ytct-shQhploslphhst..........shGAspholluEluhsalhul...ststhphs.t.ssht..............................................t..tstspshsTpsuaGY+lthphsYsslas.GlsLsPslsauHDlpG..hoPs...sssF.sEGp+ululGlshsYp...spapsslsYssFaG.............Gshs......................shsDRDaluholshoF ............................................................................pFshG..-...lpupaDoolohGsuhRspstspsll................................ststs.stssDDGslNFc+..G-shSphhKulp-LcLpatsh.......GhFlRGphaYDhthpDpspp.....................................hhs...s.ss.ppct.hpupsscLLDAFVatsasl..GstP.hslRlGcQVlsWGEShF...hts...G...I.suhNPlDls...thptPG.uEl.KEsl.................lPlshl.huhsLT-sLSlE...uaYQ..hcWcpohlsssGTaFSssD.....hh.u.cGspthhh................................................hss..tt......................................shhh.c..us-p.cscc..u..GQaGlsl+ahsptL..-TEaGhYahNYHu+..tPh...luspsushsshs..s..................t................................................................spahhpY.PED.I+LaGhSFsT..s..lu......ss.uluGElSYR.stPl.tlssspll...............hushssts...........t........................ss.s.........GtthpGahRt-shQhphohh.phas...........shGA-pholluEluhs+ltsl...shsphphs..tsss.ht........s..s........................................shsp..tspssshsTpsuaGYclthphsYssVas.GlslpPslsau+DlpG.hoPs.....h.ssF...tEGpKulolGlshsYp...spapsslsYssFaG.......uphs...................................shsDRDaluhohphsF................................ 0 33 60 105 +6811 PF06983 3-dmu-9_3-mt 3-demethylubiquinone-9 3-methyltransferase Vella Briffa B anon Pfam-B_6583 (release 10.0) Family This family represents a conserved region approximately 100 residues long within a number of bacterial and archaeal 3-demethylubiquinone-9 3-methyltransferases (EC:2.1.1.64). Note that some family members contain more than one copy of this region, and that many members are hypothetical proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.46 0.71 -3.75 25 2984 2012-10-02 15:00:03 2003-08-18 16:55:30 8 7 1713 10 686 1992 166 128.00 27 87.56 CHANGED sKIossLWF...D.spAEEAspFYsSlFsNScltslschspst.su.....tGsVhsspFslsGpsFhuLNGG..PpFpFscAlSFhlsCcsQcElD+hWstLstsGu........ppu.tCGWl+D+aGlSWQll ..........................................................sPaLhF....s....up...ucEAhsaYpp.....s.F.............s........s........u.....p..l.....h.....p.h.......p........p....h......s....p......s.....t.....st................ps.....p.....l...h.....au.....p......h......p.l..s.G.p...s...h..h....s............D.......u..s............s.s...h.....p.......h..........s.......s....u.......h.....S.......l....h...l..s..............s.........c..........s..p.....p.....E......s....c......c...h...aspLsssGpl.h.............pthhts.taG.hlpD+FGlsW.l.......................... 0 219 460 593 +6812 PF06984 MRP-L47 Mitochondrial 39-S ribosomal protein L47 (MRP-L47) Vella Briffa B anon Pfam-B_6890 (release 10.0) Family This family represents the N-terminal region (approximately 8 residues) of the eukaryotic mitochondrial 39-S ribosomal protein L47 (MRP-L47). Mitochondrial ribosomal proteins (MRPs) are the counterparts of the cytoplasmic ribosomal proteins, in that they fulfil similar functions in protein biosynthesis. However, they are distinct in number, features and primary structure [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.02 0.72 -4.08 8 327 2012-10-02 11:59:50 2003-08-18 17:11:16 8 5 288 0 229 380 11 87.70 41 34.85 CHANGED LhEFF-s.....c+sh..sEpc.s.KsG...+uWssppLRpKShsDLHpLWYshLKE+NMLhThcpchpcpphs...hPuPERlcKV+pSMcslcpVlpER .....................................L.pFFss.............pps.h....s.p..p.c...sp.tG.......RuWss.pELRtKSh-DLHpLWaVhLKE.RNhLhTh...cpctp...+ppht..............hss.s-.R.h....c.+...Vc.........poMppIcpVlpER...................... 0 77 126 190 +6813 PF06985 HET Heterokaryon incompatibility protein (HET) Vella Briffa B anon Pfam-B_8200 (release 10.0) Family This family represents a conserved region approximately 150 residues long within various heterokaryon incompatibility proteins that seem to be restricted to ascomycete fungi. Genetic differences in specific het genes prevent a viable heterokaryotic fungal cell from being formed by the fusion of filaments from two different wild-type strains [1]. Many family members also contain the Pfam:PF00400 repeat and the Pfam:PF05729 domain. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.32 0.71 -4.02 164 2623 2009-09-13 05:28:11 2003-08-19 10:54:52 6 149 90 0 2311 2726 2 139.30 21 21.73 CHANGED YtsLSYsW....Gsst.........................tttl......................hplsps.lpsAlphs.+.p.........h.....uh.cY.....lWlDslCIsQ....s.ss......-hspplspMtplYppAthslshhusss.............................pt.....................h.shp.h.....pthtt.tht.............................................................................t..hhpp....ltp......s.WasRuWslQE .....................................................................YhsLSasW.........Gssp................................................................................p...t.h...h..t...........................h.t.l...s...p....s.......l.p.s.Alphh...+.p...............h..............sh..ca..............................lWlDs......l...C.I....s.Q................s....st.............t.......-.........hs.ppls.t...M.tp..lYpp......Athsls.hhu..sss................................................................s..tt...................hh.p...............t..............................................................................................................................................t............h..p.......t..h..h....pRhWshQE................................................................................................................. 1 357 1215 1922 +6814 PF06986 TraN Type-1V conjugative transfer system mating pair stabilisation Vella Briffa B anon Pfam-B_8400 (release 10.0) Family TraN is a large cysteine-rich outer membrane protein involved in the mating-pair stabilisation (adhesin) component of the F-type conjugative plasmid transfer system. TraN is believed to interact with the core type IV secretion system apparatus through the TraV protein [1][2][3]. 21.60 17.10 23.00 17.90 20.30 16.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.24 0.70 -5.02 17 647 2009-01-15 18:05:59 2003-08-19 13:20:02 6 8 342 0 93 486 42 199.50 26 41.21 CHANGED stCphspc.pCh...Essts+h..lsGlsl...........sh..sCWctptsYpCss...............................ss..TCstlpsp.......sCphspppCh.p...pptuhChptphsapC.pps..ssss...phhCusssaClsGsChpsppctss..-FscusutLuAlspAsp-hsss..........shplFpGputpCp+thhG.hssCCss.pGaG......slu...............ltp...CsstEcsLuptcppshsh.lGoYC..ucK..llusChp+KcsYCsFpSKLu+llQpQG+p.QLGhuaGosKpssCpu............lTs .........................................................................h.................................h...............................th.....Ch.t..p..p..h.s.......................................s..sCt.h.tp.......tCt..h..p.ppCh.................tttu.Chh...hYp...Ct.p..............tt........p......t.....s.ss..t..t.....t.............pt..thtp.hs.hthh.t.ht...thts.................h.hFtGp...........C..p.h.....h..h.s....h........sCCpp..tsh.s.........t........................................................................lht.....CsppEhtlsttc.tt....ph.sh.lGpaC.sp+.....hlu...sCl.pc..+cs.aCpFpSpLupIlppps+......Q....L....t....h.....s....aGss.p....ps....pCpuho.s................................................................................. 0 21 47 72 +6816 PF06988 NifT NifT/FixU protein Moxon SJ anon Pfam-B_5696 (release 10.0) Family This family consists of several NifT and FixU bacterial proteins. The function of NifT is unknown although it is thought that the protein may be involved in biosynthesis of the FeMo cofactor of nitrogenase although perturbation of nifT expression in K. pneumoniae has only a limited effect on nitrogen fixation [1]. 20.80 20.80 20.90 21.90 19.80 17.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.03 0.72 -4.50 36 178 2009-01-15 18:05:59 2003-08-19 13:33:27 6 3 167 1 86 157 12 63.60 47 82.34 CHANGED M.KVMlRcssp.ssLssYVPKKDLEEslVsh-..pschWGGhlsLuNGW..pLtlPshsssspLPlTV-A M.KVMlR+sss..sLosYVPKKDLEEslVp.hE..psshWGGhlpLuNGW..pLtlPphsscspLPlTl-A... 1 22 52 68 +6817 PF06989 BAALC_N BAALC N-terminus Moxon SJ anon Pfam-B_5793 (release 10.0) Family This family represents the N-terminal region of the mammalian BAALC proteins.\ BAALC (brain and acute leukaemia, cytoplasmic), that is highly conserved among mammals but evidently absent from lower organisms. Two isoforms are specifically expressed in neuroectoderm-derived tissues, but not in tumours or cancer cell lines of non-neural tissue origin. It has been shown that blasts from a subset of patients with acute leukaemia greatly overexpress eight different BAALC transcripts, resulting in five protein isoforms. Among patients with acute myeloid leukaemia, those overexpressing BAALC show distinctly poor prognosis, pointing to a key role of the BAALC products in leukaemia. It has been suggested that BAALC is a gene implicated in both neuroectodermal and hematopoietic cell functions [1]. 25.00 25.00 51.00 51.00 20.00 19.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.52 0.72 -3.64 2 32 2009-01-15 18:05:59 2003-08-19 13:40:07 7 1 21 0 15 28 0 51.80 76 40.27 CHANGED MGCGGSRADAIEPRYYESWTRETESTWLTYTDSDA.PSsAAsDSGPEAGGLpA MGCGGSRADA.IEPRYYESWTRETESTWLThTDSDuP..PSu.A..As.........DSGsEAGG................. 0 1 3 7 +6818 PF06990 Gal-3-0_sulfotr Galactose-3-O-sulfotransferase Moxon SJ anon Pfam-B_6301 (release 10.0) Family This family consists of several mammalian galactose-3-O-sulfotransferase proteins. Gal-3-O-sulfotransferase is thought to play a critical role in 3'-sulfation of N-acetyllactosamine in both O- and N-glycans [1]. 20.20 20.20 20.20 20.30 20.10 20.00 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.61 0.70 -5.81 3 598 2012-10-05 12:31:09 2003-08-19 13:49:40 6 17 61 0 434 689 22 279.80 24 77.98 CHANGED MhPhLttLQ+ts+hhht+tLlLslLhhooFLLLlaphA..shHucLt.sTP.FuuSCsP.t..............EuPPRsN...IsFLKTHKTASSTVLNILFRFAE+HNLoVALP.sGsRscFsYPphFuA+aVc.....ussupp.FNIlCNHLRFchsEVc+LMPssTlYlTILR-PAthFESSFpYYssYsPAFR+lPuus.L-AFLpuP-sYYcsucH.pAhYA+NsLaFDLGhDN-hsPps-puYVpu+ItEVERRF+LVLIAEYFDESLVLLRRLLsW-LDDVLYFKLNARuASsVuRLSuEstu...RARoWNALDu+LY-HFNATFWR+V.AchGRcRlcREVccLRcARcRLtolClsDGsAL+sAAQIRDcpLQPWQPSGKsDILGYNL+sGluspssplCpRLlMPElQYhscLhApQ .............................................................................................................................................................................................................................................................................................................................................................................s....h..pp.........lhFl..KoH.KouooT..l..sl....l.h.R..............au..p..cpL.ph..s.........hP.....t.......t.........t..h....t.a.......P............h..t..t.p....h...l..........................t.............as...l....h.s.p......H..h..........c.......a..p..........h...t..t...l.t.pl.h....s.s..s....s...h...........a.hoIl....Rc.Ph.t........hcS.F..t..a..a.........t............................................h...........h....h............t........h..................s.......................ltt....F..l..p.....p..P.............t....a....h......p..............t......t.....................t...........h.........h........h...+......N.......h.....h....a....D.h.G...............................................t........t.............t................h.p......t...h.lt.tlp.p....tFpLVhlhEaa-ESl..lLL+chhs...W......phcDll........h...h....t.......h....s...................p....t............t...p..............................t.......h..s........t...t..................ph..p...tas.h...Dhh...LYpaFNt.........ohWp+l...tt......G...t......h.tpht....hpth.tt..h..t.C....................................................................................................................................................................... 0 290 302 342 +6819 PF06991 Prp19_bind MFAP1_C; Splicing factor, Prp19-binding domain Vella Briffa B anon Pfam-B_8343 (release 10.0) Family This family represents the C-terminus (approximately 300 residues) of proteins that are involved as binding partners for Prp19 as part of the nuclear pore complex.\ The family in Drosophila is necessary for pre-mRNA splicing, and the human protein has been found in purifications of the spliceosome. In the past this family was thought, erroneously, to be associated with microfibrillin. 20.90 20.90 21.50 21.30 19.90 19.90 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.94 0.70 -5.12 5 330 2009-01-15 18:05:59 2003-08-19 13:57:47 6 8 269 \N 245 315 2 231.40 35 59.81 CHANGED EtElhptE-EccupEEEEE.ESEpEE-TDSEDDME.....PRLKPlFlRK+DRs...TlQE.+E+EttKpctL.EtEAKRtAEERK+pTtKIVEEslKKELEtcKs......p.cAsls..sV-TDDE.N-ppEYEAWKLRELKRlKRDREEREshEREKtEIE+hRNMTEEERRspLRpNPKVlTNKssKGKYKFLQKYYHRGAFFh.DE--E........VhKRDFSpATtED+FDKTILPKVMQVKN..FGRuGRTKYTHLV-EDTTcasSsWsusss.NpKF...s++AGGhRssF-+PsuKKR ..........................................................tct.p...p..tp..p.p.-p...psp..E..p.......o..-sE--...................hl.+P.lFl.pKppR.............o.htp...ppt.t.........c..p...pp.h...c.t..p....t....c...c.h....h.cp..R..+...p.s.p.....llc-ph.c+-httppt................t.........ss........hp......sss.T.Ds....ssc........t.........EYpAWKlRELcR...l.KR-R-..t.......h.-..th-+E+tElERhRshT-..EERpt.......-.......c..t..t..s.c..............p..p.c...t.....KG...Kh.....tFh.QKYaH+..GAFa...Dp.tc.p..........l.h+R..D...hs.s.s.p.h.-Dp.h.s+phLPchhQV+s...hG+pGR.T.KYpcLhspDTs.....pa..s......s.....h........................................................................................ 1 81 132 197 +6820 PF06992 Phage_lambda_P Replication protein P Moxon SJ anon Pfam-B_6611 (release 10.0) Family This family consists of several Bacteriophage lambda replication protein P like proteins. The bacteriophage lambda P protein promoters replication of the phage chromosome by recruiting a key component of the cellular replication machinery to the viral origin. Specifically, P protein delivers one or more molecules of Escherichia coli DnaB helicase to a nucleoprotein structure formed by the lambda O initiator at the lambda replication origin [1]. 21.10 21.10 21.10 21.50 20.60 19.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.42 0.70 -5.08 2 541 2009-01-15 18:05:59 2003-08-19 13:59:08 6 2 340 0 27 376 18 200.50 54 94.62 CHANGED MKNIAAQMVNFDREQMRRIsNNMPEQYDEKPQVQQVAQIINGVFSQLLATFPASLANRDQNElNEIRRQWVLAFRENGIToMEQVNAGMRVARRQNRPFLPSPGQFVAWCREEASVhAGLPNVSELVDMVYEYCRKRGLYPDAESYPWKSNAHYWLVTNLYQNMRANALTDAELRRKAADELspMTARINRGEsIPEPVKQLPVMGGRPLNRsQALAKIAEIKAKFGLKGASV ....................................MKNIAAQMlNFDpEQMRRIANshPEQY.D-.+s...QV..p.Q.V....A..p.IINuVFSQLhAsFPA.S.l..AN..R-.QsE.l.NE..l..RRQWVLAFpENGITTMEQVsAGMRVA..RRQ..sRPFLPSPGQFVAWC+E-Au.V.hsGLPsluELlDMlaE...YCR+RG...L...Y.P.Du.E.sYPW..........+S.NA.aYWLVTNLYp.sMRAsuLTDuELRR+AuDELspMss.RIsRG...EsI.P.EPV.KQLP.shu.s.RPl..spsp.AL.u+IAEl+AKhGLKGu......................................................................................................... 0 3 8 15 +6821 PF06993 DUF1304 Protein of unknown function (DUF1304) Moxon SJ anon Pfam-B_7246 (release 10.0) Domain This family consists of several hypothetical bacterial proteins of around 120 residues in length. The function of this family is unknown. 23.80 23.80 25.20 25.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.63 0.71 -4.36 73 1298 2012-10-01 21:33:42 2003-08-19 14:01:57 7 7 1246 0 231 804 18 108.50 39 89.68 CHANGED ssLVAl.hHlYIhhLEhFhaspstst+.sFshs..-hhptsps..LutNQGlYNGFLAsGLlhGlhhsss......shtlhhhhLhsVllAulaGuhTu.s+.+I........hhlQulPAllALlhlhl ..................hlVAl.EHhYIhhLEhhthsS..ct.ss+.sF.shsh.-phpppslp.LhtNQGlYNGhLulhL...lau...l..ahups..........hplshhhllhllsAAlYGul.Tu..s+..pI..........lhpQGh.sAlLALlull................................ 0 65 139 189 +6822 PF06994 Involucrin2 Involucrin Vella Briffa B anon Pfam-B_8443 (release 10.0) Family This family represents a conserved region approximately 60 residues long, multiple copies of which are found within eukaryotic involucrin, and which is rich in glutamine and glutamic acid residues. Involucrin forms part of the insoluble cornified cell envelope (a specialised protective barrier) of stratified squamous epithelia [1]. Members of this family seem to be restricted to mammals. 20.90 20.90 25.30 20.90 19.00 20.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.13 0.72 -4.00 38 508 2009-01-15 18:05:59 2003-08-19 14:22:41 6 23 16 0 38 498 0 41.10 53 104.60 CHANGED Q-..spE.ELHLG+QQp.......csQE.....E..LaLGc..p.pQcspE.ELH..LGc ....................QE..s.EPELpLGKQQp......Qcs..E....PE..LpLGK..QQQQEspEPELpLsc..... 0 0 0 0 +6823 PF06995 Phage_P2_GpU Phage P2 GpU Moxon SJ anon Pfam-B_7670 (release 10.0) Family This family consists of several bacterial and phage proteins of around 130 residues in length which seem to be related to the bacteriophage P2 GpU protein (Swiss:O64315) which is thought to be involved in tail assembly [1]. 25.00 25.00 25.00 25.00 24.60 24.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.26 0.71 -4.72 55 1047 2009-01-15 18:05:59 2003-08-19 14:26:17 6 5 735 0 148 714 14 118.70 37 69.33 CHANGED G.FhFsl.pTssapphpRptsaRasspsRlGt+PuhQalGsup-pIoLsGllaPchpG........GthsLptL+thAspGpshsLlpG...sG.pl.hGhaVIpsls-TpohahssGssR+l-FolsLc+hs-s ..............................GhaVFtl...pT...lsa..QplppppsaRassssRlsc+..su.hQalGssp-pIsLuGlL..hP.El.s.G...........G.choLp.tLcphAc.pG.+u.asLl...-.....G.....sG...pI...aG..h..aVIpslspTp.opFhtsGts++I-FoLoLcRsD-................ 0 28 71 113 +6824 PF06996 DUF1305 Protein of unknown function (DUF1305) Moxon SJ anon Pfam-B_9388 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 300 residues in length. The function of this family is unknown although one member (Swiss:Q93IT4) from Salmonella enterica is thought to be involved in virulence [1]. 20.30 20.30 20.60 20.30 19.50 19.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.71 0.70 -5.39 121 1497 2009-09-11 01:13:09 2003-08-19 14:44:15 6 5 955 0 274 1048 48 297.70 28 87.63 CHANGED h...tpstpa...sFaphl+.ll-..............p..htspt....s......hG....psttstp-slRhpspsuLuFssu-l...ssl.....................tt..psss....hp..lpsshhGLhGssu..PLP.haaoEhl..hpR.............ppp-sshtsFlDlFpHRhhsLaYRuWpchphs.sshcp..ss.........s.......-tasphLhuLsG.......hu...t...........htt.............................................ptshs......sst.hLtauGhlsppsRospuLp....slLsphFs..ssVplcpahspWlplstsppspLG............tt...sspLGtsshlGpclhDtpu+hclpluPlshppatpaLP...........sG.p.....thtp.....LtpllchalGtphsa-lpLhlptcplssspL..Gsst......pLGhsuaLup ..........................................................................................t..pshpasFaphhc.hLc................p....htstp...............hu..tttpstp-tlRhpsssuluFsss-ltsl.................................................tp...pssst.........hclpstahGLhGspuPLP..ha.YhEhl..hp+.............tcpcsshtsFhDlFsHRhhshha+sWpchp.hh.hsacp....ss............p..................-tauthlhuLsG.......ls..tp......pt.....................................................ch.sls........ppt...hLuauuhLspt..sR..oscslp....slluphF......s...sslpl..ppahh+hlslssspp..spLG............tt...sshL.GtssllGpplhDhps+hplplssLshppahpFLP................sG.p........ph.s..Ltthlchals.phsa-lpL.tl.t..pp..ss.s.hpL..........Gssp................pLGaouaLG.......................... 0 44 112 192 +6826 PF06998 DUF1307 Protein of unknown function (DUF1307) Moxon SJ anon Pfam-B_10058 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. Some family members are described as putative lipoproteins but the function of the family is unknown. 21.70 21.70 21.70 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.56 0.71 -4.08 14 1049 2009-01-15 18:05:59 2003-08-19 15:49:10 6 3 820 1 25 350 1 121.10 40 78.69 CHANGED EocTY..pus.hsGh.clplTYoY.KG.DcVlKQoscsplpYssLGlspcEt.....A.....cchlps.scpYpslcGlp.clcYpDctshEplslDYpKsDhccl.ppLsGht.s...s.csuKsIShccotchLcppGaKc ...................................................................EoKpa...sus.ls.Gp.-ltlT..YsY..KG.DKVLKQoocspIp.Y..ssl.G.ssscEp........A...........tKhLc.s..l...us...p.Y.KsI..tG..VccKlsYp.Ds..h..A..E.plsIDhpKlDhcsL......ptluGh...phs.......s..cssK..sI....SMtph.ppllctsGaKE.......................... 0 7 9 18 +6827 PF06999 Suc_Fer-like Sucrase/ferredoxin-like Vella Briffa B anon Pfam-B_8856 (release 10.0) Family This family contains a number of bacterial and eukaryotic proteins approximately 400 residues long that resemble ferredoxin and appear to have sucrolytic activity [1]. 29.50 29.50 29.50 29.70 29.40 29.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.82 0.70 -4.34 28 603 2009-09-13 18:23:08 2003-08-19 16:01:51 7 12 371 0 360 589 30 219.30 22 64.87 CHANGED sLhGTupshppalllpp.........stsWsp..pt..tt.......slhpt......lsshlppptpshthth.lshpsscps................tspssp................................lllhsthlhht.....hcthpp-hltslLshshs.......................t...th..httspc..ps.hlhVCoHusRD+pCulhG.slhpchcpphsspsh.p...................................Vh.sSHlGGH+FAssllla..................spGhaaG+lsPcplssllcp....hscsphlhchhRGht .......................................................................................................................L.Gssts.htpal.lltp...........tsWsp....chhptp..........th.tt........................lt.t.h.hp.t..t.h.t...t.h..t.........t.h...hhh...psst.....................................................t.t.p...sp...............................................................................................................shlhsthhhh..h..........htt.h....p.h.h..p.t.hls..hsht.............................................................................t.............t.stt...psshlLlCoHup....R..D....t+..CGl.t.G.s..ltpphpcpltt....t....sh...........................................................................................lh.sSHlG...GH+aAuNllla................................................................................spGhaaG+lpP...c.s.sp.sllct..........hhpG.cl..l......hhRG.............................................. 0 98 225 312 +6828 PF07000 DUF1308 Protein of unknown function (DUF1308) Moxon SJ anon Pfam-B_13288 (release 10.0) Family This family consists of several hypothetical eukaryotic sequences of around 400 residues in length. The function of this family is unknown. 25.00 25.00 26.30 25.60 19.30 24.50 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.13 0.70 -5.81 24 258 2009-01-15 18:05:59 2003-08-19 16:33:44 6 6 206 0 175 260 1 339.80 24 77.84 CHANGED +LpsplpuEL+hLp+lpsthssh.....................................................h.pplpSoNLsaaculhpthcp.ppslsult+sF......................shpc.sts.........plhVDlVussGpp..WVKV.stsscpLhhphttps..............................p.sshsllc.ApshlpAupts.sta+p.Pclhhlh........................................hss..ssl.hcpl+phG.....lsl...............................hhtstststss.....h.......tsshtph.s........s..htshssplNlDsTsLlAhVSslSa.sssph.............hpc+hltcQhchE.+pcPlLsp.L.sthcu+cL.........................................................................................................................lsscpAscchp-Il-TlGGssE+pRuppLhtpl.hl.s.....................................Pst...phppLht.ttlphpslplFuhssthchshhTsNtth.lcuApppGhhhsVhp+ss+sLo .......................................................................................................................................h.p.lttEhp.Lpph.t....................................................tth.oo...Nhsahpslhp....hpp...........pslsul.p.F................................................thtt..............tlhVDlVs..s..sG..tp..WlKshshps.ctLh...hhtpu............................................................p.s..phs.llcpApchlpAup........tp....h..tapp.....P+..lhhhh...................................................................................................ssh.s.h..hcplc.phG........lp.....................................................................t.ptst.....................t.s..p..t..h..t...s....................p...hsphspp...l..NLDhTsLlshVSsloa.sssph....................................................hppchlp..cQh.....p..E........pp.....p..............lhsp.L.s..hhp....s..+.pL.........................................................................................................................hssppAhpphppIlphlGsssE+pRsp.Lht.p.......s..................................................................ph.pl........p.p....ph..a.shs.thph.hhss....s...t....th....lpts..p..uh.....hh...s................................................................................................................................................ 0 49 91 138 +6829 PF07001 BAT2_N BAT2 N-terminus Vella Briffa B anon Pfam-B_9101 (release 10.0) Family This family represents the N-terminus (approximately 200 residues) of the proline-rich protein BAT2. BAT2 is similar to other proteins with large proline-rich domains, such as some nuclear proteins, collagens, elastin, and synapsin [1]. 25.00 25.00 25.20 25.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.38 0.71 -3.95 10 243 2009-01-15 18:05:59 2003-08-19 16:36:34 6 1 87 \N 112 219 0 174.00 54 10.12 CHANGED MS-+uGQssKuKDGK.KYuoLSLFcTY...KGKS.LEsQK..ssVssRHGLQSLGK.VuluRRMPPPANLPSLKAENKGNDPNVslVPKDGTGWASKQ-Qs-scsoss.sspsPEopPsss.ttsss.....pP.ss..sss.ls.uus+SWA.sSsppGupG-Gspt..plssap+cEFPoLQ.AAG-QDKusK-psssDpu ...........................................................................MS-+uGpssKuKDGK.KYuoLsLFspY....KGKS..lE..s..QK..ssVss.R.H.GLQSLGK.....VuhuRR.MPPPANLPSLKAE..NKGND.PNVsl.VPK..D.G.T.GW..AoKQ..-...Qp...-s....c..s...s.....s....s...t..ss...psPEs.P.s..t.ttssss.................ps.sust..s.sspVs..susKS.....WA...ssp...tu...up...s.s...G..h..t...lspa......ppEFPoLp.....AAG-Q-KssKEpts....s...................................................... 0 25 38 65 +6830 PF07002 Copine Copine Vella Briffa B anon Pfam-B_9705 (release 10.0) Family This family represents a conserved region approximately 180 residues long within eukaryotic copines. Copines are Ca(2+)-dependent phospholipid-binding proteins that are thought to be involved in membrane-trafficking, and may also be involved in cell division and growth [1]. 20.70 20.70 20.70 21.40 20.60 20.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.90 0.71 -4.45 18 1157 2012-10-10 16:07:06 2003-08-19 16:56:31 11 19 130 0 676 958 9 139.10 46 27.26 CHANGED SLHaIusp.psNsYppAlptlGpslpsYDsDchhP...AaGFGu..phs..-hsVoHs...Fslshssps.spCsGlpGllssY+pslPplpLhGPTsFuPIIstsschApt....psuupYaVLLIloDG..lT............shppTh-AIVpAScLPLSIllVGVG ..........................................SLHalsPh...t....NtY......pAlhuVG.pllQcY..Do...D.....Kh......FP.................A.aGF..GA....pl..Ps...c.h.........p.V.SHp..............Fsl..............Nh...s...sp.s.....s...C.p..G.l......pG.l...l.puYppsLs.p....lpL......h........GP.T........s.......Fu.PlIscsAch.A......t................p...s...u......s.....p....Y.al.................LLIlTDGs...lo...........................Dh...p..pT.pcAIVp....AS.p..L.PhSIIIVGVG............................................................... 0 224 337 496 +6832 PF07004 SHIPPO-rpt DUF1309; Sperm-tail PG-rich repeat Vella Briffa B anon Pfam-B_8734 (release 10.0) Repeat This family represents a short conserved region carrying a PGP motif that is repeated in eukaryotic proteins of sperm-tails. Shippo orthologues from some species may include up to 40 Pro-Gly-Pro repeats. 20.00 3.00 20.00 3.50 19.90 -999999.99 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.70 0.73 -8.34 0.73 -2.80 323 5278 2009-01-15 18:05:59 2003-08-19 17:13:05 7 51 129 0 3778 5557 23 31.00 24 51.20 CHANGED sPGPG..s.Y...sspp...................................ttssh...shus+pptt ......................................................sPGPG....s.Y...........sspp....................................................................................t.sth......sht.t....h.................................................................... 2 2124 2616 3195 +6833 PF07005 DUF1537 Hop; PF07005; Protein of unknown function, DUF1537 Vella Briffa B anon Pfam-B_8609 (release 10.0) Family This conserved region is found in proteins of unknown function in a range of Proteobacteria as well as the Gram-positive Oceanobacillus iheyensis. 20.90 20.90 20.90 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.29 0.70 -4.67 131 1723 2009-01-15 18:05:59 2003-08-20 09:55:19 6 19 1153 4 415 1360 256 211.80 26 49.09 CHANGED Qoptp.....luhlshssl....tG..st..slpsplsphtspuh.phslsDAhsspDLttlupAstphs..........LhsGuuG..luhuLst.....thtttshhst.tst.s.......................................ss...sllluGSsSssTppQlsth...psshss....lplDsttlh.....t.......ts.h.....ppshshstsphsps...ss.llassssspsspsspt.phthtttu.h............lppsLuplstpl.h...........pt.u..lppllluGG-TSusVsps.Lulsulclss .......................................................Qst..ss.lshpsl......tpG..st....slpttLspl....p.....pp..Gh..phsVlDAls-pcLphhu.pAltp.ts..............LlsGuuG..luhuL.up.....p.hsptstttt...t..............................................sstsllluG...SsSphTppQlsth....c..p..t..s..ss...........hplD..stphl.......................p...htshh.......ppl..hphshsthspt....ts...llhso..t..s..s..p....s.htslpp.....th.t.....t.....p..thupt...........................lpphhuplstpl.h...........tp.s......lst.h.llsGG-TSu.sVsps.LGlpuhclt....................................... 1 108 236 339 +6834 PF07006 DUF1310 Protein of unknown function (DUF1310) Moxon SJ anon Pfam-B_10153 (release 10.0) Family This family consists of several hypothetical proteins of around 125 residues in length. Members of this family seem to be specific to Listeria and Streptococcus species. The function of this family is unknown. 30.10 30.10 30.20 30.30 29.50 30.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.40 0.71 -4.60 18 373 2009-01-15 18:05:59 2003-08-20 10:41:32 6 2 106 0 22 303 0 116.40 36 85.44 CHANGED K..+WllllhhhLlslhhlGuthYhcccpc....pc..EMhpllpS-EAKcVaEctLKslDscAhTccGlI+oYcIDcpSIcpNPMGGI.VcLlINcDscLhlpasLp+ss.pGcLcuuuhshSscLscLL .....................hlhhhhlslhsh.h.hl.G.st.hhhs.p...cp........pp...EMhpllpScEsK.plhEctL+plDscAhTc...c...GhIpSYcIDcpSlcpNPMG.G.I.lpllIN.sD.clhlphslp+.....ps.....Gc.lpssu..hs..hStcLscLL.................................. 0 5 6 17 +6835 PF07007 DUF1311 Protein of unknown function (DUF1311) Moxon SJ anon Pfam-B_10506 (release 10.0) Family This family consists of several bacterial proteins of around 120 residues in length. Members of this family contain four highly conserved cysteine residues. The function of this family is unknown. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.54 0.72 -3.89 182 2203 2009-09-11 12:01:10 2003-08-20 10:44:19 7 31 1231 2 507 1711 82 90.10 22 47.59 CHANGED sC....psst..sssphth.Cs..stp..hpthDpcLspsY...pphhpphs.............tttt........ptLc...p....uQ+sWlph.R-tpCthttt..........t...........tsCh..hphsppRhtpL ..........................................................C....ttst..st.hphhh..Ct...s.tp..hpthDtcLspsY...pphhpphp............t.tp....................stLp.p......uQ..psWlph..R-t.pCt.h...t.............................hsChhphsppRhttL............................................ 0 105 233 375 +6837 PF07009 DUF1312 Protein of unknown function (DUF1312) Moxon SJ anon Pfam-B_10829 (release 10.0) Family This family consists of several bacterial proteins of around 120 residues in length. The function of this family is unknown. 29.60 29.60 31.60 30.10 29.30 28.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.37 0.71 -4.26 40 812 2009-01-15 18:05:59 2003-08-20 10:52:32 6 4 615 16 142 554 21 112.80 29 80.84 CHANGED ll.llllllluhhslhhh.............t.psspsthAlIpl-G+lhpplsLsc.ptspphpl....psspthNllclcsspl+hp-us...CPDplCV+pGhIs+sGps.IlCLPp+llIcIp...uspp.pp ............................................................................lIhhlllhu.hhslhhh......................tttppsssthAl.l.pl.....-G..c..hc.phsLsp..t..ppphpl...................sspsthN.hl-.l.cssclRlp-us...CPDplCV+pGaIs+.sG.......po..IlCLPc+lllclcuspt..s.............. 0 74 124 133 +6838 PF07010 Endomucin Endomucin Moxon SJ anon Pfam-B_10834 (release 10.0) Family This family consists of several mammalian endomucin proteins. Endomucin is an early endothelial-specific antigen that is also expressed on putative hematopoietic progenitor cells. 20.90 20.90 20.90 23.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.73 0.70 -5.05 3 39 2009-01-15 18:05:59 2003-08-20 11:05:11 7 2 26 0 17 52 0 160.40 53 87.87 CHANGED MRLLQVTlLFFLLSNSLC+SEsuK-stNsSLs...ETSTTKASlTTPshVSl.pNsNKPTsGTPPKGTTsS-lsKTSLMoTlsSLTTPKHEltTTTcGVlKNESSThKlTVsNsTlSNAVSTLsSSQNKTENQSSIRTTEI.ssTslL.sDApPKpTuT.oSASLTTA+ThSQlQ..DTEDGKIAoToSTTPSYSSIILPVVIALIVITLLVFTLVGLYRICWKRDPGTPENGNDQPQSDKESVKLLTVKTISHESGEHSAQGKTKN .........................................................................................................................................................................................................s.....................................s.s...........t.tp.sss.t.s......ShSu.II.LPVVIALIVITLSVFlLVGLYRhCWKsDPG.s.EsGs.-QPQSDKESVKLLTVKTISHEoG....................... 0 1 1 4 +6839 PF07011 DUF1313 Protein of unknown function (DUF1313) Moxon SJ anon Pfam-B_10989 (release 10.0) Family This family consists of several hypothetical plant proteins of around 100 residues in length. The function of this family is unknown. 20.40 20.40 20.50 60.40 20.30 18.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.98 0.72 -4.32 12 107 2009-01-15 18:05:59 2003-08-20 11:07:00 6 2 42 0 55 103 1 87.10 52 69.78 CHANGED stutDuclhpshppSFsQVQulLDQNRhLIpclNpNHpSRhsDsLsRNVuLIRELNsNIp+VlslYuDLSssFupsh..ctupp....ussosss ...........s.tsDsKlhpsFp+SFsQVQslLDQNRlLIsEINQNHES+hPDNLoRNVuLIRELNsNIpRVVsLYuDLSsoFs.+oh.csssp....upss..s................ 0 9 37 47 +6840 PF07012 Curlin_rpt Curlin associated repeat Moxon SJ anon Pfam-B_10299 (release 10.0) Repeat This family consists of several bacterial repeats of around 30 residues in length. These repeats are often found in multiple copies in the curlin proteins CsgA and CsgB. Curli fibres are thin aggregative surface fibres, connected with adhesion, which bind laminin, fibronectin, plasminogen, human contact phase proteins, and major histocompatibility complex (MHC) class I molecules. Curli fibres are coded for by the csg gene cluster, which is comprised of two divergently transcribed operons. One operon encodes the csgB, csgA, and csgC genes, while the other encodes csgD, csgE, csgF, and csgG. The assembly of the fibres is unique and involves extracellular self-assembly of the curlin subunit (CsgA), dependent on a specific nucleator protein (CsgB). CsgD is a transcriptional activator essential for expression of the two curli fibre operons, and CsgG is an outer membrane lipoprotein involved in extracellular stabilisation of CsgA and CsgB [1]. 20.70 20.70 21.10 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.46 0.72 -4.23 22 2923 2009-01-15 18:05:59 2003-08-20 11:21:28 7 10 529 0 240 818 103 32.30 40 50.36 CHANGED GssNssslp.QtGssN......tuhlsQh.GssNpsplsQtG ..............sssp.hplp.Q.t.GssN............sAh.ls..Qp..Gs.ss.cssloQhG.......... 0 40 67 153 +6841 PF07013 DUF1314 Protein of unknown function (DUF1314) Moxon SJ anon Pfam-B_10999 (release 10.0) Family This family consists of several Alphaherpesvirus proteins of around 200 residues in length. The function of this family is unknown. 25.00 25.00 90.20 89.90 22.40 16.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.14 0.71 -4.88 5 24 2009-01-15 18:05:59 2003-08-20 11:24:38 6 1 18 0 0 20 0 170.40 41 75.45 CHANGED c+sLLcuLuGRslDLPGGG-.LtIssssGps...hs+FspuGouRsshl+aIGRAhTsGssRcFlIatu+-..GsVYGYEsuTGLHhLAcSLHDFLsp+GLSQRDLhVlcGshhcuclssLsh..phh+soS-ssplssp.oschstsTsssuctspSoo+Rs...oLuslss....shpsp+lls+GS scsLLcsLuGRslDLPGGs-.ltIssssGps...ht+appsGsp+hshs+hIGRAhs.GssRcFllhhs+s..uslaGYEsuTGLHhLApSLH-FLpppGLSpRDLhlhcushhshphpslsh....hpsoops.plshp.sss..hsTtsssptppooocRs...sLsslss.....hpspphls+GS............................. 0 0 0 0 +6842 PF07014 Hs1pro-1_C Hs1pro-1; Hs1pro-1 protein C-terminus Moxon SJ, Vella Briffa B anon Pfam-B_11205 (release 10.0) Family This family represents the C-terminus (approximately 270 residues) of a number of plant Hs1pro-1 proteins, which are believed to confer nematode resistance [1]. 25.00 25.00 68.20 63.10 23.00 22.60 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.88 0.70 -5.06 5 41 2009-09-11 08:44:19 2003-08-20 12:04:16 7 3 20 0 24 44 8 248.10 56 59.96 CHANGED MpRCsYTLGLGEPNLAGKPsLcYDAVCRPsELHALK.......csPYsD+I-NpENQsLaTIHQILESWIauSspLLsRIssRIEcc+FEKAAsDCYlLERIWKLLAEIEDLHLLMDP-DFL+LKcQLpIKSou.cs-AFCFRSKGLVEVsKMSKDLR+KVPsVLuVEVDPsGGPRlQEAAMKLY..cRKoEaE...KIHLLQAMQAVEuAsKRFFFGY+QLVAAMMGSAEANANRslh..SsESsDSLoQlFLEPTYFPSLDAAKTFLG-FWs+c ......MptCsaTLGLGEPNLAGKPsLcYDtVC+PpElHuLK.......posa..hh..c.NpENcsLaThHQIlESWlpsuppLLpRlscpIcs+saEtAApDCallERlWKLLs-lEDLHLlMDPDDFLRLKpQL........ul....+......oss..p...s........u.....uFCFRS+tLlcls+hs+DL+++VPplLuVEVDPpGGPRlQEAAM+LY........pc+sthE...............KlaLLQAhQAlEsAh+pFFauY+QllssVMGouEupustshh..ss-uhDsLoQhFLEPsYFPSLDAAKTFLupaWsp.p............... 0 8 17 20 +6843 PF07015 VirC1 VirC1 protein Moxon SJ anon Pfam-B_11309 (release 10.0) Family This family consists of several bacterial VirC1 proteins. In Agrobacterium tumefaciens, a cis-active 24-base-pair sequence adjacent to the right border of the T-DNA, called overdrive, stimulates tumour formation by increasing the level of T-DNA processing. It is thought that the virC operon which enhances T-DNA processing probably does so because the VirC1 protein interacts with overdrive. It has now been shown that the virC1 gene product binds to overdrive but not to the right border of T-DNA [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.35 0.70 -5.28 4 74 2012-10-05 12:31:09 2003-08-20 12:08:44 6 2 60 0 21 1997 816 212.80 28 93.83 CHANGED MpLlThCSFKGGAGKTTALMGLCuuLAucG+RVALFEADENRPLo+W+ENAhppusWDstCElasADELPLLEsAYEpAEspGFDYALADT+GGuSELNNTIIASSsLLLIPTMLTPLDlDEALuTYRYllELL.luEsLsIPsAILRQRVPssRLToSQphh.-MLEpLPlhDsPMaERDAFAAMK-RGMLHlsltNhupsPoMRLhhRNlctAh--lshluchlpcsLEu ........................M.llshsS.KGGuGKTT..u.h..hhLu..s.tl....A.....p.....p..G.....p....p..Vs.....llDAD....s..N.........p....s...l.s.p....W.....t..p..t....s.......t..t...s.......s...h......s......s........h...p......l....h..s..s..s...c......s...h..l...c..th....h...p....p...s...p..t..p.s.h...s.al.l.lDh...c.G....s..u...o...t..h..s.s...h...s..Iu.p..u.D.lllI...Psthoth..Dh...c....pAhp.sh.phlt..c.......h...psh..p..h...p....I...P..h....s.V..l.h...o..+..s...s....s....t....t..h.pp...t.p..c.h.h.p.-h.....h......p...p......l.....P.......l...h...p...s.......hh-.R....sAa..pu..h.hp.h.G..h.............................................................tt................................................................................................................................................... 0 7 17 19 +6844 PF07016 CRAM_rpt Cysteine-rich acidic integral membrane protein precursor Moxon SJ anon Pfam-B_11042 (release 10.0) Repeat This family consists of several 24 residue repeats from the Trypanosoma brucei cysteine-rich, acidic integral membrane protein precursor (CRAM). CRAM is concentrated in the flagellar pocket, an invagination of the cell surface of the trypanosome where endocytosis has been documented [1]. 19.10 19.10 27.40 19.60 16.00 15.40 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -7.89 0.72 -4.60 2 132 2009-01-15 18:05:59 2003-08-20 12:15:35 6 4 4 0 60 150 0 23.90 95 108.27 CHANGED .sDDCNITGDCNETDDCsITGDCN .ETDDCNITGDCNETDDCNITGDCN.... 0 2 60 60 +6845 PF07017 PagP Antimicrobial peptide resistance and lipid A acylation protein PagP Moxon SJ anon Pfam-B_11014 (release 10.0) Family This family consists of several bacterial antimicrobial peptide resistance and lipid A acylation (PagP) proteins. The bacterial outer membrane enzyme PagP transfers a palmitate chain from a phospholipid to lipid A. In a number of pathogenic Gram-negative bacteria, PagP confers resistance to certain cationic antimicrobial peptides produced during the host innate immune response. 21.90 21.90 22.90 24.40 21.40 18.60 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.96 0.71 -4.89 10 610 2009-09-10 16:46:51 2003-08-20 13:10:46 6 3 581 4 62 268 7 143.80 76 77.22 CHANGED pNVupTWsps.spDLYlPslTWHNRasYDcEKIDsYNE+PWGuGYGhSRYDEcGsWHGLYhMAFpDSHN+aEPIsGYGapKhWpPsp..pDa+hGlGaTAulTARc-a.sYIPIPslLPLASluY+plohQuTYIPG....TYNNGNVLFuWhR ..........................NlApTWpQPEHYDLYlPAITWHARFAYDKEKT..DRYNERPWGuGFGhSRWDEKGNWHGLY..AMAFKDSaNKWEPIuGYGWEpTWRPL.s.D.-NF+LGLGFTAGV..TAR....DN.W.NYIPlPVLLPLASlGYGPsTFQMT...YIPG....TYN.NGNVYFAWMR................................ 0 4 23 43 +6847 PF07019 Rab5ip Rab5-interacting protein (Rab5ip) Moxon SJ anon Pfam-B_11031 (release 10.0) Family This family consists of several Rab5-interacting protein (RIP5 or Rab5ip ) sequences. The ras-related GTPase rab5 is rate-limiting for homotypic early endosome fusion. Rab5ip represents a novel rab5 interacting protein that may function on endocytic vesicles as a receptor for rab5-GDP and participate in the activation of rab5 [1]. 21.40 21.40 21.60 21.50 21.10 21.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.67 0.72 -3.85 38 446 2009-01-15 18:05:59 2003-08-20 13:25:37 7 12 273 0 301 409 2 82.00 27 60.80 CHANGED pslhasRphh.........ullhGlsuGlLsLpGhh.Ghlhahlsshhhshhhhs..phtpss.........cpaatshpclh.hpGlhsulss..........Fl....lsWshh..........Y ............sl.ahRphh.........ullhGlsuGlLsLpGhh.GFlha..hlhshhlsh.lhhs...pht.p.s.................................cpha.t.u.hhplh.ppG..hhs..uhhs........................Fl....lhWhhhY............................................... 0 96 159 244 +6848 PF07020 Orthopox_C10L Orthopoxvirus C10L protein Moxon SJ anon Pfam-B_12732 (release 10.0) Family This family consists of several Orthopoxvirus C10L proteins. C10L viral protein can play an important role in vaccinia virus evasion of the host immune system. It may consist in the blockade of IL-1 receptors by the C10L protein, a homologue of the IL-1 Ra [1]. 25.00 25.00 34.80 32.80 22.10 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.31 0.72 -3.45 4 34 2009-01-15 18:05:59 2003-08-20 13:30:38 6 1 19 \N 0 28 0 62.90 80 98.48 CHANGED MSucGt....SpGGhapsFhps.sGusKts+ohoSGGG........uMWGG......................GuSuGlpuGVpGGVNGGVNuGsuKI ...MSSKGG.....SGGMWSVFIHGHDGSNKGSKTYTSGGG...........GMWGG.......................SSS..GVKSGVpGGVNGGVKSGTGKI.... 0 0 0 0 +6849 PF07021 MetW Methionine biosynthesis protein MetW Moxon SJ anon Pfam-B_11086 (release 10.0) Family This family consists of several bacterial and one archaeal methionine biosynthesis MetW proteins. Biosynthesis of methionine from homoserine in Pseudomonas putida takes place in three steps. The first step is the acylation of homoserine to yield an acyl-L-homoserine. This reaction is catalysed by the products of the metXW genes and is equivalent to the first step in enterobacteria, gram-positive bacteria and fungi, except that in these microorganisms the reaction is catalysed by a single polypeptide (the product of the metA gene in Escherichia coli and the met5 gene product in Neurospora crassa). In Pseudomonas putida, as in gram-positive bacteria and certain fungi, the second and third steps are a direct sulfhydrylation that converts the O-acyl-L-homoserine into homocysteine and further methylation to yield methionine. The latter reaction can be mediated by either of the two methionine synthetases present in the cells [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.11 0.71 -4.93 6 667 2012-10-10 17:06:42 2003-08-20 13:50:55 7 7 656 0 204 5525 4101 186.90 43 86.71 CHANGED RsDachItchI.PGSRVLDlGCGDGsLLpLLp-pKpVcGpGlElspsGVscClA+GLsVlQGDhDpsLspasD+uFDaVlLSQTLQATRNPcpVL-EhLRIG++sIVSFPNFGHW+sRhpLhs+GRMPVTcsLPYsWYsTPNIHFCTltDFhpLCc-lshpl-cpsAlcttut.h...hspah.NahGchulFhl .............................................................R.Dh.p..l..Itc.h..l......s...s...S...R...V...L.DL..G.C.G........D.G....p....L....L....p....h.......L.....t........c......p...........+.........p......s........p....G...........h....G.....l.........E....l..........D.........p........s.......s........l..........h.........s......s.........l......u..........+..........G.........l......s........V.............I....p..........t......D........L......-.........c.....G.............L.......s......p.................F..........s......D.....p......S...F...D...h.V.l..L...S....Q.......T...L....Q...A...l....c.....p...P....-.....t......l.....L.....c....E....h....h....R...l.......G....+.....p.....s..I.....V.....o.F.....P..N......F.....G...a........W...+.....s..R.h.....p....l..h..........p......G.+.M...P......V.....o..cp....L......P.Yp.WYs..T............P..N...I.+.......h...s.TlcDFE.tLstch.s..l..cl.l..c...R.hs.l.stpp..............hsphhPNLhuphAlahl.................................................................................................................................... 0 67 135 169 +6850 PF07022 Phage_CI_repr Bacteriophage CI repressor helix-turn-helix domain Moxon SJ, Bateman A anon Pfam-B_11145 (release 10.0) Domain This family consists of several phage CI repressor proteins and related bacterial sequences. The CI repressor is known to function as a transcriptional switch, determining whether transcription is lytic or lysogenic [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -9.03 0.72 -3.89 14 693 2012-10-04 14:01:12 2003-08-20 13:57:29 8 9 476 2 136 1419 82 63.90 28 33.13 CHANGED sslcRlhcAYGFpscppLusaLulu+SThu......................................................................................................shhtR-phP.u-hllpCuLETGsSLpWLssGcGphh .......................hlcRlhpsh...sh..p..s......p..p..pLuch.LGlupuTlu......................................................................sh.h.t..R...s..s..h..P...s-.h.ll..p.h.u.h...c..s..G.lslpW.LhsGpGt................................................... 0 33 82 113 +6851 PF07023 DUF1315 Protein of unknown function (DUF1315) Moxon SJ anon Pfam-B_11170 (release 10.0) Family This family consists of several bacterial proteins of around 90 residues in length. The function of this family is unknown. 25.00 25.00 32.80 32.80 20.30 19.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.27 0.72 -4.10 26 846 2012-10-03 10:59:06 2003-08-20 14:00:57 7 1 840 0 114 362 43 88.00 57 95.55 CHANGED h-pllpshTPElYppLppAVElGKWPDGpsLTsEQ+-sshQAVMhYps+Hshssccho....GplshKoKpphpt.......................pslthhp ....l.--lIsuMTPEVYQRLsTAVELGKWPDGVALTpEQKENsLQlVMLWQARHNs-AQHMTIsTsGpMVMKSKQpLKccFuhs........................scshshhc..................... 0 17 40 80 +6852 PF07024 ImpE ImpE protein Moxon SJ anon Pfam-B_11208 (release 10.0) Family This family consists of several bacterial proteins including ImpE (Swiss:Q93EC9) from Rhizobium leguminosarum. It has been suggested that the imp locus is involved in the secretion to the environment of proteins, including periplasmic RbsB protein, that cause blocking of infection specifically in pea plants [1]. The exact function of this family is unknown. 25.00 25.00 38.40 77.50 23.60 18.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.53 0.71 -4.16 36 372 2009-09-10 16:41:21 2003-08-20 14:24:06 8 4 347 1 71 256 11 123.30 43 45.34 CHANGED sussFsWlsDuDsRLGPshEll.ssGpYhWlPaupIpslchpsPusLhDllWpPspls....ltsGststualPsRYsuotps........................ssAh+LGRcTcWp-.hups.shhGhGQ+hahssss-huLh-lpplph ........sssFsWluDSDoRLGPVhEll.suGsYhWlPFupIpSLchspPspLpDLlWpPspls....LhsGssttualPsRYsuScpu..................................sDulRLuRcTsWp.........-....sucs.....sshulGQKsWlT.s...pG-huLhDltphpF...... 0 13 30 52 +6854 PF07026 DUF1317 Protein of unknown function (DUF1317) Moxon SJ anon Pfam-B_12646 (release 10.0) Family This family consists of several hypothetical bacterial and phage proteins of around 60 residues in length. The function of this family is unknown. 20.60 20.60 20.90 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.92 0.72 -4.09 2 125 2009-09-11 12:57:57 2003-08-20 15:10:15 6 1 114 0 4 77 0 57.50 79 96.27 CHANGED MKHsHDsIpVGtlhhsYSsh+pGWlhPGhplh+NPhpA.phAEEhNNhhtulpsc...Ls ...........MpHPHDNIRVGuITFVYSVTKRGWVFPGLSVIRNPLKAQRLAEcINNKRsAVCTKHL.LS.................. 0 1 2 3 +6855 PF07027 DUF1318 Protein of unknown function (DUF1318) Moxon SJ anon Pfam-B_11321 (release 10.0) Family This family consists of several bacterial proteins of around 100 residues in length and is often known as YdbL. The function of this family is unknown. 20.80 20.80 21.00 20.80 20.70 20.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.04 0.72 -3.75 54 739 2009-01-15 18:05:59 2003-08-20 15:21:49 7 2 731 0 142 388 33 94.30 54 81.96 CHANGED hhuhsu......hAh.sLspA+upGlVGEphsGYLulVt..sss-s.psLVpsINtpR+ApYpclApcN.slol-pVuphAupKhl.p+stsGpalps.sGpWh.+K .....................................L.hosss.hAL..TL-EARoQGRVGEThsGYL......s.....s......l+.............s......D......AE.....T..ppLVp-INttR+AuYQQLAc.........p......N.sl...o....l..--...lAK...lAGQ.KLV.tRA+PGEYVQGINGKWlRK......... 0 43 80 112 +6856 PF07028 DUF1319 Protein of unknown function (DUF1319) Vella Briffa B anon Pfam-B_10434 (release 10.0) Family This family contains a number of viral proteins of unknown function approximately 200 residues long. Family members seem to be restricted to badnaviruses. 28.10 28.10 28.20 59.30 26.30 28.00 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.64 0.71 -3.94 11 50 2009-01-15 18:05:59 2003-08-20 15:23:18 6 1 29 0 0 50 0 127.20 41 80.65 CHANGED Spps...pL-YLDLAopsKsS..sp-LAHNLplsscRlsLts+V...........................plpplppI.+p.ctpLcclcpclcpLpc-LpsLcp-alpR+PLSKp-Vc-LVlcIoEQPKhIEcQoEtLocELpccV-cl ............t...pL-YLDL.tots...Klo..Np-LuHNLplsh.RhsLss+V...................................plcphppI.cp....lcchp.pclctLppcLpsLpp-alc++PLoKp-VccLVlcIuEQPKhIEcQs.tLoc-Lppclccl................. 0 0 0 0 +6857 PF07029 CryBP1 CryBP1 protein Moxon SJ anon Pfam-B_11415 (release 10.0) Family This family consists of several CryBP1 like proteins from Bacillus thuringiensis and Paenibacillus popilliae. Members of this family are thought to be involved in the overall toxicity of the bacteria to their hosts [1,2]. 19.50 19.50 20.80 20.30 19.20 18.10 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.91 0.71 -4.49 5 45 2009-01-15 18:05:59 2003-08-20 15:29:38 6 1 21 0 4 42 0 155.90 37 85.48 CHANGED pElPcplo.ctppKlPFCCVVSIP+GFchVusscPKLVYsLssLSllKETCRKsVpVD-CGpAEVDLHVLKVKGCIPFIuNlEVcPIsscpsCoosPHccpISLSCp-oVCVDHVLKCSVssLPDacIDCcaVsVsDLplpPl+EssCQFVKIoGpFsFaYl ........................thp.ctpp+lPFCCllSlPpGFplhss....s.p.KlVYslssLuhlKETC+KslpV--CGpsElDLpVLKlKGCIsF.lsNh.plc..Phsppphsoo.s....s.........+sppIsLSCpsoVsVDplL..KhSVs..p..LPchpIsspplplpDlplpsh....pEssp...phl+loGtFpFh........................................................... 0 3 4 4 +6858 PF07030 DUF1320 Protein of unknown function (DUF1320) Moxon SJ anon Pfam-B_13638 (release 10.0) Family This family consists of both hypothetical bacterial and phage proteins of around 145 residues in length. The function of this family is unknown. 20.70 20.70 21.20 20.80 20.60 20.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.61 0.71 -4.41 48 429 2009-01-15 18:05:59 2003-08-20 15:31:54 7 1 362 0 75 343 19 130.70 24 89.87 CHANGED YsThsDLhsphuppplhp.................L..sscs.........ssths.shlppAls-AsstIDuaLtuR.Ys...LPL..ssVPslLpphssslAcYhLhspcst.....s-t......lpccYc.pAl+hLcplusGclsLGlss.........ssssssssststhpuspp ................................Yso.pD.lh.tphspp...tlhp..........................L.....ospp...........ssthsps.hlppAls-.Asu.IDuYLtsR....Yp...............l.P.l..............sss.Psl..LpphssslAh.Yp.L.tspps.........s-p........lcccYc.pAlchLcplusGcl..sLulss........ttts..tt.s...h.....tt.................................... 0 32 58 67 +6860 PF07032 DUF1322 Protein of unknown function (DUF1322) Moxon SJ anon Pfam-B_13233 (release 10.0) Family This family consists of several hypothetical 9.4 kDa Borrelia burgdorferi (Lyme disease spirochete) proteins of around 78 residues in length. The function of this family is unknown. 19.60 19.60 20.00 21.40 19.10 19.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.73 0.72 -3.78 2 124 2009-01-15 18:05:59 2003-08-20 15:53:41 6 2 29 0 8 73 1 72.70 69 85.59 CHANGED MsK............sEhtppYFphlD.l..cssKYYFPllhsICoYcDVKKh.YcELLEVNRlAslKLpKEhYEhhLuh ..............MpKhN+DIDKAIASLNEoRKKYFNLLDEI..KNDK..YYFPVIMNICSYcsVKKLPYDELLEVNRlA-lKLEKE.LYELILSK............... 3 5 5 5 +6861 PF07033 Orthopox_B11R Orthopoxvirus B11R protein Moxon SJ anon Pfam-B_13245 (release 10.0) Family This family consists of several Orthopoxvirus B11R proteins of around 70 residues in length. The function of this family is unknown. 25.00 25.00 141.50 141.50 20.80 19.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.36 0.72 -4.50 2 31 2009-09-11 15:28:16 2003-08-20 15:58:21 6 1 12 0 0 30 0 71.10 94 81.57 CHANGED DTsssNVEDIMNEIDREKEEILKNVEhENNKNIpKNHPsEYIREALVINTSSsSDSIDKEVIECISpsVGI ..DTDV.TNVEDIhNEIDREKEEILKNVEIENNKNINKNHPSEYIREALVINTSSNSDSIDKEVIECISHDVGI 0 0 0 0 +6862 PF07034 ORC3_N Origin recognition complex (ORC) subunit 3 N-terminus Vella Briffa B anon Pfam-B_10452 (release 10.0) Family This family represents the N-terminus (approximately 300 residues) of subunit 3 of the eukaryotic origin recognition complex (ORC). Origin recognition complex (ORC) is composed of six subunits that are essential for cell viability. They collectively bind to the autonomously replicating sequence (ARS) in a sequence-specific manner and lead to the chromatin loading of other replication factors that are essential for initiation of DNA replication [1]. 22.30 22.30 22.30 22.80 20.60 22.20 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.95 0.70 -5.47 13 289 2009-01-15 18:05:59 2003-08-20 16:27:21 6 7 223 0 199 294 2 270.20 24 40.30 CHANGED K..R............Kputspcsh..hpssspsscchhsphRacsapphWpplcsph-clQcphNt+lh-sLlcFlccstpstptp...h.tp.............pclPTAALls...............GlNhsDH.hpFpsLoppL+sssssaVshLpS+DCsu........lKthlpplltQL......hcsssslctc-p-...........ht.p+sphslssLsoWYp..................pppphsssp.pp................................PlVVIlcDhEsFsspVLQDFIlIhSpalpclPllLVFGlATossulHplLPapVSShLslclFQo.ussppLssVlDclLLosphPF+LSuKVhplLpsIFLYHDFSlpuFI+Gl............KhuhlEHFhspPLSsL ..................................................................................................................t.p..................................p.thphhp.hWtt.pt.ph..p.........pl.pphptphhppl.pFlppt..t...........................................tplPsuhlhh......................GsNh.ssp.hhhppL..tp..tl.p...p.....p........s........s.....h...l...s.L.p.ut-sss.........................................l.+thlppllpph....................................hs.p..s..p.t..p.....................................t..ph...th....s...hp.Lt..saat........................................p..pht..................................................................................................................................................................lVllhcDhE.uFs.splLpDhlhl...h.....S..p..a........h..p........c...........l...........Phh....llhGl..A..T..os..ph..hc...phLsps.sss..h.......Lph....p..h....F...p......sspphlsplhcpllh....s....s.p....h....s..........h...p..l..utplhp.hLhs.hah...pshSlpsFlpul............phshhpHFaspPLShL.................................... 0 59 105 159 +6863 PF07035 Mic1 Colon cancer-associated protein Mic1-like Vella Briffa B anon Pfam-B_10041 (release 10.0) Family This family represents the C-terminus (approximately 160 residues) of a number of proteins that resemble colon cancer-associated protein Mic1. 20.80 20.80 21.20 23.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.69 0.71 -4.89 5 183 2009-01-15 18:05:59 2003-08-20 17:08:55 7 2 131 0 123 177 0 150.30 34 25.00 CHANGED chhtccsYlluIlMEYLRSLsctpIssptpLacMlIcpLApuscFscLppFVoY+lLp-SKsLAhhLLShuspsosl+QLGlDML+RluhAHDhIlEVLLpcGplL-ALRaA+chtslssVpusKFLEAApposDsQphaAIhRFFoE+p.+h.hh.uFssshcTsE ............................................h..pallullhEYlRSLsp...hp.Is....sp.ch.LacllIptLlppp.t.ahhLcphlpY+VlsDSKslA..phL..LShp..........s..h............a..........ss.s..pQLuLDML+RLs.up-tIlElLLsctplLtALRah+sh.....tt...hss.........l...ss..p+.........FL-AAhpspDs.lhaslh+FFpp..........F..................................... 0 47 65 97 +6865 PF07037 DUF1323 Putative transcription regulator (DUF1323) Moxon SJ, Bateman A anon Pfam-B_12705 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 120 residues in length. This family appears to have an HTH domain and is therefore likely to act as a transcriptional regulator. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.52 0.71 -3.94 10 925 2012-10-04 14:01:12 2003-08-21 10:48:22 6 1 468 0 36 265 4 117.00 62 94.57 CHANGED MTPEELAchoGYoRQTIN+WVRKcGW+TpPhPGVpGG+ARLlHlsppVREFIpsss..........RlsEssAtY.sssscusl.s.llsslcpMTssEQcQLssLLsREGIsGLLpRLGIR-s ......................MTsEELAchhG..h..uR..QTlN+Wl..Rc.cGWp...Tp.hPGV..pG.G.+.AR...Ll..H.VspQVREaIpNs...........cs.EshAEh..Asuu.cAPh.t.llohscpMTssEQcphsphLsREGI..pGhLpRLGIc-S.................................................................................... 0 3 3 21 +6866 PF07038 DUF1324 Protein of unknown function (DUF1324) Moxon SJ anon Pfam-B_12735 (release 10.0) Family This family consists of several Circovirus proteins of around 60 residues in length. The function of this family is unknown. 25.00 25.00 140.50 140.30 19.30 18.10 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.11 0.72 -3.94 2 9 2009-01-15 18:05:59 2003-08-21 11:20:11 6 1 6 0 0 7 0 59.00 97 94.82 CHANGED MpCTLVFQSRFCIFPLTFKSSASPRKFLTNVTGCC.ATVTRlPLSNKVLTAVDRSLRCP MTCTLVFQSRFCIFPLTFKSSASPRKFLTNVTGCCSATVTRLPLSNKVLTAVDRSLRCP 0 0 0 0 +6867 PF07039 DUF1325 SGF29 tudor-like domain Moxon SJ, Bateman A anon Pfam-B_13304 (release 10.0) Domain This domain is found in the yeast protein SAGA-associated factor 29. This domain is related to members of the Tudor domain superfamily such as Pfam:PF05641. The SAGA complex is involved in RNA polymerase II-dependent transcriptional regulation. The membership of the tudor domain superfamily suggests this domain may bind to RNA. 25.30 25.30 25.40 30.20 25.10 25.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.91 0.71 -4.53 20 328 2012-10-02 16:56:36 2003-08-21 11:28:18 6 4 260 14 233 311 5 129.80 33 39.46 CHANGED phG-pVAAphp.......psstc.........p-WIlucVlphsscs..p+YEVhDsDs-...ppt...taphot+clIPLPp.......s.cssstspFPpGopVLAlYPp.....TTsFY+AhVhss.................sp....ptsspYpLcF-D-p.sst....hpVsp+aVlshsp ....................................h..GspVAh+sp..........ttstc................ppWIlscVlp.hstps...p+......YEVpD.-s-..............ppp.......paphotpplIPl.Pp.......t.pssshs.F.stGppVLAlYPp.......TTsFY+AhVhus.............................................t........pt.p..s...s...YplpF-s-ptsst.....hpVspRaVlsh..p.......................... 0 86 129 193 +6868 PF07040 DUF1326 Protein of unknown function (DUF1326) Moxon SJ anon Pfam-B_11597 (release 10.0) Family This family consists of several hypothetical bacterial proteins which seem to be found exclusively in Rhizobium and Ralstonia species. Members of this family are typically around 210 residues in length and contain 5 highly conserved cysteine residues at their N-terminus. The function of this family is unknown. 25.00 25.00 29.30 29.30 24.80 24.70 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.14 0.71 -4.52 34 220 2009-01-15 18:05:59 2003-08-21 11:31:31 6 2 187 0 80 216 231 181.30 31 86.66 CHANGED htsCsCshhCPC.hs......t.PTp..GtCcuhhuhcI-cGpasslcLsGLsluhlhchPGs.ht-G.....shcssh....alD-cAo-tQhcALhpIhoGps....Gu.hushusllu.chhusthsPIcacscspt......ps..lplsshhphth-shhpPhsGsstcs.hslssssa..........hphucsspsph..pshGh.sachss..ppu ....................h.tsCsCshhCPC.hu......tsPTp..G....pCcuhhua+I-cGpas-lcLsGLsluhls....ph.P....Gs....lt...cG.....shcssh....alD-RAossQtcALhpIaoGps....GGhhuhFuph.lu.chhuschAPIca-scscp......ts....lcls.sh.hp....s.hsPhsGsstcs.hslssssap..h.st...hthucss.psph...cshGh.shshsspp............................................................................................ 0 23 49 63 +6869 PF07041 DUF1327 Protein of unknown function (DUF1327) Moxon SJ anon Pfam-B_11630 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 115 residues in length which seem to be specific to Escherichia coli. The function of this family is unknown. 25.00 25.00 25.50 25.30 24.30 23.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.48 0.71 -4.17 3 518 2009-01-15 18:05:59 2003-08-21 11:33:54 6 1 251 0 5 248 0 105.10 69 80.18 CHANGED MTQDYELVVKGVRNFENKVTVTVALcDKc+FDGEIFDLDISLDRVEGAALEFYEAAARRSlRQVFLDVAAGLCEGDEpLPEKRPlILEAQsVhITY+GKLPGhITGSLKTPPs .....MppcYELlVKGIpNa.sKlTVTVALc.tth.sh.l.DlsISLDRsEGAsLEFYEAtA++ps+QhFhDVAAGLCEGDt.LPEKRPlILEAQsVhITY+GKLPGhITGSLKhPP.......................... 0 1 1 1 +6870 PF07042 TrfA TrfA protein Moxon SJ anon Pfam-B_12321 (release 10.0) Family This family consists of several bacterial TrfA proteins. The trfA operon of broad-host-range IncP plasmids is essential to activate the origin of vegetative replication in diverse species. The trfA operon encodes two ORFs. The first ORF is highly conserved and encodes a putative single-stranded DNA binding protein (Ssb). The second, trfA, contains two translational starts as in the IncP alpha plasmids, generating related polypeptides of 406 (TrfA1) and 282 (TrfA2) amino acids. TrfA2 is very similar to the IncP alpha product, whereas the N-terminal region of TrfA1 shows very little similarity to the equivalent region of IncP alpha TrfA1. This region has been implicated in the ability of IncP alpha plasmids to replicate efficiently in Pseudomonas aeruginosa [1]. 20.20 20.20 20.60 20.20 20.10 19.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.93 0.70 -5.59 2 183 2012-10-04 14:01:12 2003-08-21 11:42:26 6 2 103 0 39 169 15 194.10 48 81.52 CHANGED K++TAGt-LAcpVSEAKQsALLKHTKQQIK-MQLSLFDlAPWssohtAhPNDhuRSAlFTsRNKKlPR.ALQspsIaHVNKDVcITYTGlELRADDDELVatQVLEYAptTslGEPloFTFYELCQDLsWSINGRYYT+AEECLoRLQAoAMtFoSpRlG+LESVSLl+RFRVLDRGK+TSRCQV.IDtEIVVLFAGDHYTKFlWEKYRKLSPs.stRMFDYFuoH+EPYPLKLETFRLMCGSDSTRsKKWREQVGEAC-ELRtsGLVEpAWVNDDLVHCKR ......................................................................................................................s..hhRsslFs..........................................th.l.apG.pL......pt........la.....t.h..h.p...tpt..hs.....t.....h.h...s.hthhptlsht.......st...t.ht.h.tsltR..ht.s.th...t....h.....pt.t..t.t.......................hh.t........................p....t............t............h....h..ht.p.hh.hat.t.ho....h...ht.hR+Lo..P.s.....A..RR.h..F..DY...auoH...+..-.P...Y.PlKLETFRLMCGS-S.s..R.sKKWREQ.l.ucAC-EL+csG.L.V-s.AW....l.s..s........................................................................................................................................... 1 7 19 31 +6871 PF07043 DUF1328 Protein of unknown function (DUF1328) Moxon SJ anon Pfam-B_12535 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 50 residues in length. The function of this family is unknown. 21.00 21.00 21.70 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.05 0.72 -3.97 105 1343 2009-01-15 18:05:59 2003-08-21 11:48:34 8 1 1124 0 357 687 48 39.40 58 70.82 CHANGED llFhlIAllAulhGFuGlAuuAAuIAKILFalFLllhll .....lIFLVIALIAAsLGFGGl.A.GsAAGhAKIlFhVhllLFlV.................. 0 81 194 276 +6872 PF07044 DUF1329 Protein of unknown function (DUF1329) Moxon SJ anon Pfam-B_12608 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 475 residues in length. The majority of family members are from Pseudomonas species but the family also contains sequences from Shewanella oneidensis and Thauera aromatica. 23.10 23.10 23.20 23.10 22.90 23.00 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.11 0.70 -5.52 104 761 2009-01-15 18:05:59 2003-08-21 11:53:27 6 4 350 2 275 746 454 269.20 22 78.32 CHANGED sIoApN...hspYp..........cpLosGQhAhlKpYPs.a+hsVYsT+RosshPptlhcss+pNAspscLsss..Gsu.lp...shh.su..lPFP.l.....P..psGhEllWNahhRa+...Gt.shppthsphsspssGshs..hsphppphhhhhhh.stsstp..........sshhhhhh.phpuPu+luGsslLl+-slDp.htp.sRpAWhYssG....QRRVRRAPshAYDsPt.sus-GhtTsDshchFNGusDRY.-WKLlGK+..EhalPYNsYcl.........tsssh+as-llpssHlNP-hhRYELH...RVWVVEATLKsGtRHlYuKRsaYlDEDoWphs.hsDtYDs+GpLWRsspsah........h.hY-ssssh.ssspshaDLpuG.RYhs.shss...cpst.shph......s.ph.pp....spFossAL.Rp ...............................................................................................................................................................................sas.............ttu.......phh.p..h.t.......................h.....sGp...........t....thh......t.t......................tshh.hhh.hhptPscltGsshLshphhsp.....p.pptW.hY.su.....RRVRRhsshs..hcsPhhus.ssh.....hDsh..ph.apst.p.c..a.paphhGpp...hh............................................................php.+...csahlpup.p.....pch...as++hhalDp......Dsa.hh.hh-.YDtp.GpL..a+h..h..........h...p....h.h..h...th.hhhsl.st.+hhh.thtt....tt......h...........h.......t.apstsl.t............................... 0 91 161 228 +6873 PF07045 DUF1330 Protein of unknown function (DUF1330) Moxon SJ anon Pfam-B_12466 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.76 0.72 -4.11 136 1006 2012-10-02 00:20:33 2003-08-21 11:56:04 6 4 649 38 383 984 2554 64.20 30 62.05 CHANGED sptap.cYtptsssshtpaGGc.hLs+...uGphpslEG..hp.schVllcFPohcsApsaYpSs-YQ.tht ....................................ptat.pYtptsss.s.h.p.p..a..GG.c.....hl..s+....G...u..ph........csl...E...G.......t.sh...s+s..VllEFPohpsAhshYpSs-YQts...... 0 92 214 298 +6874 PF07046 CRA_rpt Cytoplasmic repetitive antigen (CRA) like repeat Moxon SJ anon Pfam-B_12798 (release 10.0) Repeat This family consists of several repeats of around 42 residues in length. These repeated sequences are found in multiple copies in Trypanosoma cruzi antigens, Swiss:Q26907 contains 23 copies of this repeat. 21.40 21.40 22.60 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.60 0.72 -3.97 5 164 2009-01-15 18:05:59 2003-08-21 13:07:06 6 8 12 0 5 162 0 41.10 84 82.01 CHANGED EAEKRKAAEAAKsAEAEKQRAAEATKVAEAEKQKAAEATKVA .....EAEKQKAAEATKVAEAEKQKAAEATK..VAEAEKQKAAEATKVA..... 0 0 0 5 +6875 PF07047 OPA3 Optic atrophy 3 protein (OPA3) Moxon SJ anon Pfam-B_12863 (release 10.0) Family This family consists of several optic atrophy 3 (OPA3) proteins. OPA3 deficiency causes type III 3-methylglutaconic aciduria (MGA) in humans. This disease manifests with early bilateral optic atrophy, spasticity, extrapyramidal dysfunction, ataxia, and cognitive deficits, but normal longevity [1]. 25.70 25.70 26.70 26.70 24.60 23.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.53 0.71 -4.38 19 343 2009-01-15 18:05:59 2003-08-21 13:16:55 7 6 247 0 243 347 2 139.30 29 65.35 CHANGED PlhKLusLul+plSKPlAshlKppApps.hFRsh.......lhls.AQhhHhh-hphph+hhGh.tpss.......................................pIpPLNE-tAlplGu-LLGEhhIFs.....................VuuuhllhEapRpupp...-s+KEEthppclppLcpchpcLphtl.-phppp ...................................PhhKLssLhl+plSKPl.....A........st.l.....KppA+.........p.......p.h.F.Rph........h.lshAQhhH..........th-h+h+.h.th.hs..h..t.psh...........................................................................................pl.+PLsEppAl-h.........GAcllu.EhhlFs.....................VuuulllhE.hhRpppp....ptp+c..-....php....p....chppL...ppphpplt.t.p.....t................................................................. 0 76 126 198 +6876 PF07048 DUF1331 Protein of unknown function (DUF1331) Moxon SJ anon Pfam-B_12865 (release 10.0) Family This family consists of several Circovirus proteins of around 35 residues in length. Members of this family are described as ORF-10 proteins and their function is unknown. 25.00 25.00 93.80 93.80 24.60 17.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.82 0.72 -4.49 3 11 2009-01-15 18:05:59 2003-08-21 13:19:41 6 1 6 0 0 8 0 35.00 96 95.77 CHANGED MSTAQEGVLTVVALTVYPKVRERRVLKMPFFLLQR MSTAQEGVLTVVALTVYPKVRERRVLKMPFFLLQR 0 0 0 0 +6879 PF07051 OCIA Ovarian carcinoma immunoreactive antigen (OCIA) Moxon SJ anon Pfam-B_13720 (release 10.0) Family This family consists of several ovarian carcinoma immunoreactive antigen (OCIA) and related eukaryotic sequences. The function of this family is unknown [1,2]. 25.00 25.00 25.70 26.20 24.40 24.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.38 0.72 -4.66 13 172 2009-01-15 18:05:59 2003-08-21 13:37:10 6 3 78 0 83 165 0 101.90 43 49.93 CHANGED usu..ptsstpts.ptss.........tsuhsYhhop-Et+Vh+ECNcESFahRuLPlusluhllTtulVppGaLpsssRFGulPKVslAsllGYhlGKlSYhpsCtEKhhcL.NS.LG- ......................tt.s...........................huhtah.opEEp+lhpECpcESFWhRulPhussSMLlTQuLlpp.GhLuupP+aGSl.PKlhhAslhGYhsGKlSYhpsCQEKF.cpLc.N.S.LGp............................ 0 20 28 45 +6880 PF07052 Hep_59 Hepatocellular carcinoma-associated antigen 59 Vella Briffa B anon Pfam-B_10544 (release 10.0) Family This family represents a conserved region approximately 100 residues long within mammalian hepatocellular carcinoma-associated antigen 59 and similar proteins. Family members are found in a variety of eukaryotes, mainly as hypothetical proteins. 22.70 22.70 23.10 23.10 19.00 18.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.47 0.72 -3.66 46 244 2009-09-14 11:59:59 2003-08-21 14:00:37 6 3 211 0 181 244 2 100.10 35 33.04 CHANGED cpFs..spTspps.....Dp+M...hpYIEpcLt+........R+t..............ttppssstspstsp.ps..t.............Lhc.hPspLp.............tptt..ttphh..uuIsE...VDLGh-s....Kl+NIEcTEcA++cLh .......................pFsscTspp-....cDtc...M........hcYIEsELtK...................R+shs..................tppp.pppsp.p.stpc.............tLac.lP-plc....................tppspp.hSsphh.....uGIsE...VDLGl-u......Kl+.NItpTEcAKtcL........................ 0 62 94 139 +6882 PF07054 Pericardin_rpt Pericardin like repeat Moxon SJ anon Pfam-B_13724 (release 10.0) Repeat This family consists of several repeated sequences of around 34 residues in length. This repeat is found in multiple copies in the Drosophila pericardin and other extracellular matrix proteins. 21.20 21.20 22.50 21.20 20.00 20.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -7.95 0.72 -4.05 5 240 2009-01-15 18:05:59 2003-08-21 14:29:02 6 13 10 0 103 238 0 33.30 73 44.13 CHANGED QPGYGSQPGVGuQTGAGQPGYGoQPGIGGQTGAG .....QPGYG..oQPGl.G.GQ.T..G....u..G..QPGYG..oQPGlGGQoGhG.... 1 40 40 83 +6883 PF07055 Eno-Rase_FAD_bd scADH; Enoyl reductase FAD binding domain Vella Briffa B, Coggill P anon Pfam-B_10602 (release 10.0) Family This family carries the region of the enzyme trans-2-enoyl-CoA reductase, at the very C-terminus, that binds to FAD. The activity was characterised in Euglena where an unusual fatty acid synthesis path-way in mitochondria performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. The full enzyme catalyses the reduction of enoyl-CoA to acyl-CoA. The conserved region is seen as the motif FGFxxxxxDY [1]. 25.00 25.00 27.10 33.60 23.00 22.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.93 0.72 -4.03 65 659 2012-10-10 17:06:42 2003-08-21 15:13:17 7 3 532 5 160 538 83 64.90 50 16.50 CHANGED tlD-csRlRhDDhELcs-VQspVppLWsplTs-Nl.h-loDapGYKpEFLpLFGFsl-GVDY-ADV ......lDscsRlRhDDaELcsDlQpplpcLWsQlTs-Nl..ppLoDasGYKpEFlpLFGFsl-GVDY-ADV............. 0 41 82 120 +6884 PF07056 DUF1335 Protein of unknown function (DUF1335) Vella Briffa B anon Pfam-B_10636 (release 10.0) Family This family represents a conserved region approximately 130 residues long within a number of proteins of unknown function that seem to be specific to the white spot syndrome virus (WSSV). 25.00 25.00 142.30 141.60 19.40 18.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.64 0.71 -4.41 5 17 2009-01-15 18:05:59 2003-08-21 15:24:03 6 2 2 0 0 12 0 128.90 46 28.59 CHANGED DKsFpFSPLYRhIoc+LSsAsh+cc-phIVoTDFLlGLGFSscNVo+pLKuMEpsh.ppGhppohVPVsDICHRppYKGchIsNPIsuSaSspCLIVPLshLGtlFSpssaPSutsl-sYhusLssAVllY .DKsFpFSPLYRhlpcpLSpAsh+cp-hhIloTDFLlGhGaospsVs+pL+sMEphh.ppshtpohVsVh-ICHRhpYKGthIsNPIhtSaSspCLIVPlshLGhlFupssaPSutslcsYhusLh.AlllY 0 0 0 0 +6885 PF07057 TraI DNA helicase TraI Vella Briffa B anon Pfam-B_10638 (release 10.0) Family This family represents a conserved region approximately 130 residues long within the bacterial DNA helicase TraI (EC:3.6.1.-). TraI is a bifunctional protein that catalyses the unwinding of duplex DNA as well as acts as a sequence-specific DNA trans-esterase, providing the site- and strand-specific nick required to initiate DNA transfer [1]. 20.60 20.60 20.60 22.90 20.30 19.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.45 0.71 -4.13 8 282 2009-01-15 18:05:59 2003-08-21 15:48:57 6 11 224 2 13 255 4 121.40 78 9.48 CHANGED VuLSRhKpHVQVYTDN+psWssAlsp..usptuTAHDlLEP+sDRpVtsAsRLhuTAppLccTAhGRAVL+puGLtpspohA+FIuPG+KYPpPaVALPsaD+NGKpAGlhLssLsscDss.hRsltGE ...VALSRMKQHVQVYTDNRQGWTD.AINN..AVQKGTAHDVhEPKsDREVMNA-RLFSTARELRDVAAGRAVLRQAGLAGGDSPARFIAPGRKYPQPYVALPAFDRNGKSAGIWLNPLTTDDGNGLRGFSGE............ 0 1 1 9 +6886 PF07058 Myosin_HC-like Myosin II heavy chain-like Vella Briffa B anon Pfam-B_10658 (release 10.0) Family This family represents a conserved region within a number of myosin II heavy chain-like proteins that seem to be specific to Arabidopsis thaliana. 19.60 19.60 37.30 20.70 19.30 19.10 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.01 0.70 -5.36 3 95 2009-01-15 18:05:59 2003-08-21 15:55:57 6 5 21 0 62 100 1 292.60 55 56.63 CHANGED MVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLoQSVRELEEAVLAGGAAANAVRDYQRKFQEMNEERKTLERELARAKVoANRVATVVANEWKDuNDKVMPVKQWLEERRFLQGEMQQLRDKLAIoDRAAKSEAQLKEKFpLRLKVLEEoLKGPoSSuoRsTo.uRSpSNGPoRRQSLGGAEs..KFTSNGuLSKKsPSSQLRtSLTusuooVLKHAKGTS+SFDGGTRSlDRSKlLlNGP.uNasLNcKuoEsopcuE.....p.sSE+KsEpEDscAsoEDSVPGVLYDLLQKEVIoLRKAucEKDQSL+DKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKE ...........................hVDDlQN+NQELhKQIEICQEENKILDKhHRQKVuEVEKLoQTV+ELEEAVLAGGAAANAVRDYQRphpEhNEE++TL-RELARAKVoANRVAs.VVAN..EWKDsNDK.......VM....PVKQWLEE...RRh..............LQGEMQpLRDKLAluERsA+uEAQLK...........-KhpLRLKVLE-uL+ssss.s.s.sps.....s......s+o.....s.........su...s.RR....pS.......lGus...-shsph.s.N..Ghh.++ps..s.p.RsSl.s....sss.....l.l..+.puc.........t..s..S.....+ShDG...u..s+ul-.......p.t....K.hh..hsG..............h.....h.p.........p.tsctspttp................................p.ssp.p.t.....p.....phss....s.....pspD.VsGhLYDhLQKEVlsLRKusc-KDpoL+DKD-AIEMLuKKV-TLTKAMEVEuKKhRRElAuhEKE.............................................................................. 0 14 40 51 +6887 PF07059 DUF1336 Protein of unknown function (DUF1336) Vella Briffa B anon Pfam-B_10173 (release 10.0) Family This family represents the C-terminus (approximately 250 residues) of a number of hypothetical plant proteins of unknown function. 20.70 20.70 22.70 21.00 20.30 19.00 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.49 0.70 -4.94 20 403 2009-09-14 23:05:42 2003-08-21 16:08:14 7 20 43 0 274 397 12 194.50 33 37.25 CHANGED Wo.ssssssFplRGcoYhp.DKpKhPAssh.hhp.lGlDhFpusc.+............hccIupphst.stpstpp...tls.......alhllNlQlPs..............u-shSlVlY.....Fthpcshpc.........tsLlcRFlctD.....DuF+.......ppRlKllssllp.G....sWhl+psV..hspKPsLlGptsppsah....cGsN......YhEIDlDlus.ShlA+sllchhhuhlsslllDluhhIQupstEELPEplLsslRLNcl- ....................................Ws..sssssFhlR.GpsYhp.....c...........+p..............Khs.usp..hhp.lusDhhpssp...+..................................hcclstp.........t........s...thhtp................th.........hhhllNlQ.l.Ps...............................spthShVhY.............ahhpp.h.t.........................pllp+Flp..sD........csF+..............................spRhKllst..lsp.u............................shhl+.phV....sppssLl.s.p..thp.pp.ah............pGss........................Yh.ElDlDlts..ShlAptsl.phhhs.hlpslslDhuhllpu...ppt-ELPEp..lLGssRlspl-........................ 1 101 204 245 +6889 PF07061 Swi5 DUF1337; Swi5 Wood V, Studholme DJ anon Wood V Family Swi5 is involved in meiotic DNA repair synthesis and meiotic joint molecule formation [1][2][3]. It is known to interact with Swi2, Rhp51 and Swi6 [1]. 20.60 20.60 20.90 32.70 20.50 18.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.87 0.72 -4.20 20 177 2009-01-15 18:05:59 2003-08-21 16:39:21 6 8 155 0 119 162 1 83.60 31 40.18 CHANGED phpshpsphssLcp.pht.....phcpplu-hpup............psPpppVc+HI+LLHEYN-lKDlGQtLlGhIADtRGl+tt-lacEaGlshs- ................................................................p.......phtplcp.p.t............thpp..plpph.tsc..............................t..tstptlccHIphLHEYN-lKDlGQtLlGhlA.sh+GVphp-lap-..aslshp.................... 0 39 62 92 +6890 PF07062 Clc-like Clc-like Vella Briffa B anon Pfam-B_11218 (release 10.0) Family This family contains a number of Clc-like proteins that are approximately 250 residues long. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.46 0.70 -5.08 5 78 2012-10-03 00:20:40 2003-08-21 16:52:45 7 3 13 0 70 76 0 179.30 26 62.97 CHANGED plllhsSLlLolVuhsLouAAlhTPSWQVVDlREaRuhHQHGLWLDCsRtpp...Hllp..s.hs-sPLHCsYKFDhDuh.sYppsl-shD....psussGEsc+HpFaGWH+ulLhlhlhShlhAuLSlhSGlCAsCssupA....lhaoIhlslAuLluslu-uIFFlsAaRVDsRFl.pGlVGTYEQ+lGYAFYLplsGshlallAllhAsLsoYhoFlsuc .........................................t....hhshlhhlluhhL.hhuhhoPuWQ.l....sphp-hpt..hppGLWhsChp..p...................................t..........hpChY+Fs....ss...ht....t..c..............tssttsc.p.pcpF........a.....s.W+huslhhhhhuhh..huhlul...hh...uhCu.sh.shu....lhhslhh.hhsslhuhhu.hlFhhhuph....h.......-s+al....uhsspYE......pc..hGhuaYlthhush.h.hhuhlhuhhhsh......p................... 0 26 34 69 +6891 PF07063 DUF1338 Domain of unknown function (DUF1338) Vella Briffa B, Bateman A anon Pfam-B_10864 (release 10.0) Domain This domain is found in a variety of bacterial and fungal hypothetical proteins of unknown function. The structure of this domain has been solved by structural genomics. The structure implies a zinc-binding function, so it is a putative metal hydrolase (information derived from TOPSAN for PDB:3iuz). 20.90 20.90 22.30 21.60 20.70 20.50 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.84 0.70 -5.36 94 1224 2009-11-20 14:21:51 2003-08-21 16:56:52 8 5 1137 6 261 849 170 336.20 41 87.14 CHANGED tpphhpuL.phYhp.csPpstslh.cLlsp....sspplhp..........................DHhAlRoh........sssshslssls+hFtshGatssGh........Ych.sstp...lpuptFc.s...............csst........s+lFlScLcl-plo...sphpphlppll..........hsttshphl............pth.........................thlsthLts..................hpW..pt...sshssYppLtpESchsAWlss.pGhphNHhTspl.............................-Icplppthpp.....pGhshp....ut............................................................................................lcGsPp.....hhLcQoShhA.p.slpF............t-s.....sptphsusFhEhtpR....................................................................................................st........t....p.hha-sFlssSAstIFcSTh ................................................................ppFupAhSsMYppE.VPpYuTLl.-LVuc.......lNhsllptp.thtt..................sthtRls.ERHGAIR..........lGostELusLpRhFAhhGMhPVuY........YDL.spAus...PV+STAFRPl...............DsssLth.....sPFRV.FT.ShL+lEh...lp..pttLRppstclL.....tpRplFospshpLl............cph-tp.............GG.hsttpAppFltps.Lcs..................F+W..cpps.VsppsYcsLppEpclhADVlsh.GsH.INHLTPRs............................LDIDpVpshMsc.....pGlp.K.....sh............................................................................................IEGPPcpp.sslLLRQTSF+ALcEsVhF.............ssp.......hpGshsuRFGElEpRGhALT.cGRthY-thht...........................................................................h.........t......tshsps........hhttGhh.tpPlsYEDFLPsSAAGIFpSNL.......................................................................... 0 52 116 189 +6892 PF07064 RIC1 DUF1339; RIC1 Vella Briffa B, Wood V anon Pfam-B_11581 (release 10.0) Family RIC1 has been identified in yeast as a Golgi protein involved in retrograde transport to the cis-Golgi network. It forms a heterodimer with Rgp1 and functions as a guanyl-nucleotide exchange factor [1]. 20.90 20.90 24.10 22.50 18.40 16.50 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.61 0.70 -4.94 21 304 2009-01-15 18:05:59 2003-08-21 17:06:16 8 12 258 0 221 323 0 244.90 30 21.67 CHANGED sLc-uLWhasG.pthp.........sWhsh.s.......t......tp.....lsc.l.ls..l-hYPLulLhsculllGsps-hh............pppsssashhchphcoplaL..lLpthL...........hps...........hstpAhplspphppLsYFsHsLElLLHpVL--Esss..............s.psslLPsVluFLpp...aPpaL-lVVpCsRKTElppWphLFshlGsPp-LFccCLppppLcTAuuYLllL...................pshE.stus......spppsl+LlphAlcppcW-Lst-LsRFLtulDsststh.p ...............................................................................................................l.psLWh.tG.tthh................lWh.sh.............................t.t..p................h..t.l.ls....hphYPLulLhppu..l..llGspschh................................php...hth.....shhphphp.o....plaLpplLcphL....................tps...........hs.pAltlu...pp...h.....p.p..LsaF.sHsLElLL.H.p.VL--.Esssp..................................................................sh.cslL...splhphlpp................F.s...a............LclVVpCsRKTE....hthWp.h.LFs.hl.u.s.Pp...-LFE.......cCLp.tppLc.TAuuYLllL............................................pshE...ssss..............sp...ppus.pLlpt....A........hp.......p........sc........W-Lst-LhRFLtulspstp....p.................................... 1 73 119 183 +6893 PF07065 D123 D123 Vella Briffa B anon Pfam-B_10915 (release 10.0) Family This family contains a number of eukaryotic D123 proteins approximately 330 residues long. It has been shown that mutated variants of D123 exhibit temperature-dependent differences in their degradation rate [1]. D123 proteins are regulators of eIF2, the central regulator of translational initiation [2]. 19.30 19.30 19.30 19.30 18.70 18.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -12.13 0.70 -5.34 23 384 2009-01-15 18:05:59 2003-08-26 10:47:36 9 6 285 0 263 392 8 253.20 27 73.76 CHANGED WYsh...F+ph.Th+ohlI.PLPc..shlcYLtpDG..lhlspppss...........psssssphs-..pt.sDspstp..........ssscFPEhp..pplp-AIppLGGtVhPKLNWSAP+DApWIsssso...lpCpsssDlaLLLKuS-alsHDLsc..satt.s.D............cpsttt........saELVLRcWh.clpPuhEFRCFV......KspcLluloQRDh.sYY-aL..tc....c-plpstIppFa....c-pl...h.pF....sscsaVhDVYlsp.....s+lhlIDhNPauc.pTDsLLFoWsELpshtt..........ptpsp.-hRll..........sstsphts.ppasp.plPhDhl-sops ....................................................................................................................................................W......att..s.pshhl..ls......thhpYL.tDs..hhls...t.........................................pttt.p..p....t...tppttt.................................pas-h......p..tplppsI.pp...hG...Gt.......V....hPKLNWSuP+DAhWh....s.so.......................hpCpshs-laLLL+uSs.hls+DL..pp...satt.s.s...................................................t....h..............th.LlLRcah..sl..p..PuhEFRCFV................+pppLl.uloQR-h...p...aa....s.al....t...........ppplhptIpp...Fa.................pppl...........t..pa.......psalhDl.Yhsp......................t+lhllDhNPau...h....T..csLLFsWpE.l.t.t........................................................h.ph...........................................s............................................................................. 1 101 163 227 +6894 PF07066 DUF3882 Phage_Lacto_M3; Lactococcus phage M3 protein Moxon SJ anon Pfam-B_13997 (release 10.0) Family This family consists of several Lactococcus phage middle-3 (M3) proteins of around 160 residues in length. The function of this family is unknown. 22.10 22.10 22.20 22.30 21.80 22.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.10 0.71 -4.38 2 63 2012-10-03 01:22:09 2003-08-26 11:41:59 6 1 61 0 2 51 62 148.30 62 96.44 CHANGED MpKhLAIDFSTus.....TGYAFRp..ssphhVGSI.Ahs.pKsshERsh.IssuIp-lI-casLhsYalhIEpPIhshppKtsIoLhpsNG.FlGshcshhN.GYs.lsNSKWCuYHLIpGKpt.RK.pSlElLKuhslVssNshsDshADAasILhYsEp .............................MSKALAIDFSTSN.....TGYAFRNP.LTNE..YVV.GSI.A.GGKSKDPL.E.RAKlIADGITEl....IEHYNLFD...YF.....IYIE.E.PIITFKSKGNISLIRANGSF.LGV..M.RNR...HN.IGYVDls.NSKWCGYHL...IKG.KSthRKs...QS.IE.IL.KS.a.sIVP..cc.....cIN.DD.ADAFCILLYVES.................................................................. 0 1 2 2 +6895 PF07067 DUF1340 Protein of unknown function (DUF1340) Moxon SJ anon Pfam-B_14083 (release 10.0) Family This family consists of several hypothetical Streptococcus thermophilus bacteriophage proteins of around 235 residues in length. The function of this family is unknown. 25.00 25.00 263.00 262.80 24.40 23.50 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.42 0.70 -5.20 3 13 2009-01-15 18:05:59 2003-08-26 11:44:03 6 1 13 0 0 14 0 235.10 71 100.00 CHANGED MSGKYcYAGLTKELHQRLVlEFsALKEcHsRTFTKHIMETKQCNRppARKYFQRFDNVIKERSKLSPuTLDDMREYLT-GLVNDLQEYLSEHYSAsSsSCKPDTsKTNAGLTEELF+QYRcEIcsLRAAHPNsFAsYIMEVKGCSpQQA+TIpTAINTIYTEIGILTPRKVIQLEGLLSRELFGKIAKYVFNKYEWPESLDSEVDRIYLEYRTKGDLGLEKESVKRALYKAIYMGL .MK.+YEYAGLTKELHQRLTlEFDAL+EcH+R.TLTKYIMETKpCsRhpARKYFQRFDNVlKERSKLSPuTLDDMREYLTDGLsNDLQEYL.KpYosRsspC+PDsDKsNAGLscELFhQYRcEIppLRAsHPNshssYIM-VKGCopQpAsoIpTAINTlYTElGILTPRKVIQLEGLLSRELFGKIAKYVFNKYEWPESLDSEVDRIYLEYRTKG-LGp-KESVKRsLYKAIuMGL 0 0 0 0 +6896 PF07068 Gp23 Major capsid protein Gp23 Vella Briffa B anon Pfam-B_12245 (release 10.0) Family This family contains a number of major capsid Gp23 proteins approximately 500 residues long, from T4-like bacteriophages. 19.20 19.20 20.40 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 493 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.80 0.70 -5.97 4 1586 2009-01-15 18:05:59 2003-08-26 11:48:55 6 2 183 1 0 1357 2718 119.50 36 80.33 CHANGED Ll-KWKELLEGEG..LPEIAN.SKQAIIAKIFENQEKDFpsuPEYKD-KIAQAFGSFLTEAEIGGDHGYsApNIAAGQTSGAVTQIGPuVMGMVRRAIPNLIAFDICGVQPMNSPTGQVFALRAVYGKDPlAAsAKEAFHPMYuPDAMFSGQGAA.KtFsslsAostTssGDIYp.HFFpETGosY...LQAohtVTIsutAssuthhDAEIhKQMEAGsLVEIAEGMATSIAELQEGFNGSTDNPWNEMGFRIDKQVIEAKSRQLKAAYSIELAQDLRAVHGMDADAELSGILATEIMLEINREVVDWINYSAQVGKSGMTphsGSKAGVFDFQDPIDIRGARWAGESFKALLFQIDKEAVEIARQTGRGEGNFIIASRNVVNVLASVDTGISYAAQGLAoG.FsTDTTKSVFAGVLGGKYRVYIDQYAKQDYFTVGYKGuNEMDAGIYYAPYVALTPLRGSDPKNFQPVMGFKTRYGIGVNPFAESohQAPuuRIQSGMP ........................................................................................................................tt.........................................................................................................................................................................................................................................................................................................................................................................................................................................th.......s........s....s....s..s...s.a.....scMuFoI-KhoVsA+oRAL.KAEYohE..LAQDLKAlHGLDA.EsELuNILo................................................................................................................................................................................................................................................................................................................................................................................................ 2 0 0 0 +6897 PF07069 PRRSV_2b Porcine reproductive and respiratory syndrome virus 2b Moxon SJ anon Pfam-B_13261 (release 10.0) Family This family consists of several Porcine reproductive and respiratory syndrome virus (PRRSV) ORF2b proteins. The function of this family is unknown however it is known that large amounts of 2b protein are present in the virion and it is thought that this protein may be an integral component of the virion [1]. 25.00 25.00 113.80 113.70 22.30 22.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.60 0.72 -3.69 2 55 2009-01-15 18:05:59 2003-08-26 11:57:46 6 1 6 0 0 55 0 72.30 84 100.00 CHANGED MGSMQSLFDKIGQLFVDAFTEFLVSIVDIIIFLAILhGFTIAGWLVVFCI+LVCSAlLRsRPslHsEQLQKI. .MGuMQSLF-KIGQLFVDAFTEFLVSIVDIIIFLAILFGFTIAGWLVVFCIRLVCSAlLRuRsAlHsEQLQKIL. 0 0 0 0 +6898 PF07070 Spo0M SpoOM protein Moxon SJ anon Pfam-B_13263 (release 10.0) Family This family consists of several bacterial SpoOM proteins which are thought to control sporulation in Bacillus subtilis.Spo0M exerts certain negative effects on sporulation and its gene expression is controlled by sigmaH [1]. 20.40 20.40 20.50 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.36 0.70 -5.00 9 513 2012-10-02 22:29:00 2003-08-26 12:03:17 6 1 383 0 138 438 6 209.40 36 81.77 CHANGED FKKhLAShGIGuAKVDTlLppsslhPGtslpGpV+lhGGsssQ-I-pIpLcLss+Y.s.EscDsc...........sppshshspaplstuFsIpsGEc+shPFslslPapTPlTh.....t.chpVhlcTsLDIAtAlDssDhDhlsVcPpPhh-ulLpAl-pLGh+l+ps-sEps+h...GtpLPFVQEhEasPs.G.Y+t.hcElEllFhtssssL-llhElD++tpG.pGhLu- .......................hKKhLAShGlGuApVDTlLpp.....tshhsGpplpGpl+lpGGsssQ.pI-pIplpLssphhh..E.s..s..Dpc.....................ht.p.s.hsltphplst.s.FslpsuE.p+plPFphplPh-.TPlTh.......s.ttpValcTsL...DIstAl.DPsDcDhlpVpP.PhhpsllpAlp.p.LGF+h+p..s-sEpu.h....htpp.l.P.F.hQEhEFhPs.utatGthcElElshl...h.s.s.c.tlc.llhElDR+......sGhh..................... 0 31 86 118 +6899 PF07071 DUF1341 Protein of unknown function (DUF1341) Moxon SJ anon Pfam-B_14024 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 220 residues in length. The function of this family is unknown. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.50 0.70 -4.81 13 679 2012-10-03 05:58:16 2003-08-26 12:09:50 6 3 511 18 56 391 4 207.80 59 88.40 CHANGED Y+sRVCLNVLAuSl-NAc-IY-AAEGHVLlGVLSKNYsoV-sAlsDM+cYsphl-NAlSVGL.GAGDPpQStMVucIScplQPQHlNQVFTGVGsSRALL...GQs-TllNGLVSPTGpsGhVcISTGPlSSp.ptcuIVPlETAIAhL+DMGGoSlKFFPMGGLppc-EYpAVAc.ACAccuFaL.EPTGGIDL-NFEpIlpIAL-AGVc+VIPHlYSSIIDpt ..................................................................Y+sRVsLNVLAtshpNA+-IY-AAEGHlllGVLSKsYsoV-pAVs-MKcYtAtls.u.lSVG.L.GAGDPsQ.thVucIutphpPpHVNQVFTGsG...h..oRuhL....Gtp-T..hlNuLVSPTGpsGhVpISTGPhSSp..ussuhVslETAIthlpDMGusSlKaFPMsGLpph-EapAVAp.ACA+psFhL.EPTGGIDL-NFtpILp...IAL-AGVp+lIPHlYSSIIDp................. 1 9 21 39 +6900 PF07072 DUF1342 Protein of unknown function (DUF1342) Moxon SJ anon Pfam-B_14075 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 250 residues in length. Members of this family are often known as YacF after the Escherichia coli protein Swiss:P36680. The function of this family is unknown. 20.80 20.80 21.10 23.50 20.60 19.80 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.19 0.70 -4.68 63 930 2009-01-15 18:05:59 2003-08-26 12:13:27 6 1 924 2 168 466 123 206.70 50 83.59 CHANGED LRLEtLhpplpthhstsct..hppchshpsLF-ll-lh-RsDl+o-LlK-LE+QpppLpsapshPslDpptLppllpclcpstssLhsss.+hGptL+-scaLsulRQRhuIPGGsCsFDLPuhHhWLpps.cpRpp-lppWhssltPLtpulsllLcLlRpouphpp.hApsGhaQpsh..pu...pLLRlcls.s..shaPpISGpKhthuIR ..............LRlEaLlpQLshslshsDp..ssslpFFRsl.-LLDVhE.Ru-.lRoELLKEL-RQppcLp.sWhsVPGVDQspl-uLlppLcssuusLhoAP.RlG.QhLREDRlluhVRQRLSIPGGCCSFDLPoLHhWLH.L.P.psQRcsplpsWluoLsPLspALohlLcLIRpSusFR.+Q.suhNGFYQcNus.-A...cLLRLpLsL..-..pplYPpISGHKsRFAIR...................... 0 29 82 126 +6901 PF07073 ROF Modulator of Rho-dependent transcription termination (ROF) Moxon SJ anon Pfam-B_13280 (release 10.0) Family This family consists of several bacterial modulator of Rho-dependent transcription termination (ROF) proteins. ROF binds transcription termination factor Rho and inhibits Rho-dependent termination in vivo [1]. 20.80 20.80 20.90 22.10 19.50 20.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.46 0.72 -3.97 31 733 2009-09-11 04:47:18 2003-08-26 12:43:39 7 1 727 1 80 248 6 74.20 59 89.79 CHANGED .IsCcpYDYlElACh++h.lpLpL+sG-plpGpAh-htppt.....cKpEaLh.....................lcpp.uspptlcLDpIsuhsshp.sPcFuplhl ..PINCDDYDsLELAC.HHLhLTLpLKD...G.EpLQAK...AsDLlpR......KNVEYLl.....................lEs.u.....GEopELRLDKIsSFS....HPEIGTVVV....................... 1 9 26 52 +6902 PF07074 TRAP-gamma Translocon-associated protein, gamma subunit (TRAP-gamma) Moxon SJ anon Pfam-B_13437 (release 10.0) Family This family consists of several eukaryotic translocon-associated protein, gamma subunit (TRAP-gamma) sequences. The translocation site (translocon), at which nascent polypeptides pass through the endoplasmic reticulum membrane, contains a component previously called 'signal sequence receptor' that is now renamed as 'translocon-associated protein' (TRAP). The TRAP complex is comprised of four membrane proteins alpha, beta, gamma and delta which are present in a stoichiometric relation, and are genuine neighbours in intact microsomes. The gamma subunit is predicted to span the membrane four times [1]. 21.00 21.00 21.40 21.80 20.80 20.60 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.04 0.71 -4.73 9 147 2009-01-15 18:05:59 2003-08-26 12:49:58 7 3 109 0 80 132 1 154.60 62 87.79 CHANGED EEELLLQDFSRNVSTKSSALFYGNAFIVSAIPIWLFWRlHpM-lhsShllFslhThsSTaLlAhAYKNsKF.LKHKIAh+RE-AVoREVspcL..uDDKKhoRKEKDERILWKKNEVADYEATTFSIFYNNALFLslllhuSFalL+ohsPohNYllShusAuGLlALLSTG ...................EE-LLLQDFSRNlSsK..SoALFa.GNAhIV.SAlPIWLaWRIapMDLh...p..S.ullaslhTLlSTYLlAFAYKNlKFlLKHKlAtKREDAVo+EVoRKL...ADs+K.MSR.KEKDE.............R...............ILWKKNEVADYEATTFSIFYNNsLFLslVIluSFFlLKNFsPsVNYIlSlusuSGllALLSTu................ 0 30 36 60 +6903 PF07075 DUF1343 Protein of unknown function (DUF1343) Moxon SJ anon Pfam-B_13635 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 400 residues in length. The function of this family is unknown. 25.00 25.00 25.10 25.00 19.30 19.20 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.30 0.70 -5.61 109 613 2009-01-15 18:05:59 2003-08-26 12:52:12 6 11 514 0 245 629 524 355.00 36 82.47 CHANGED VGLlsNpouls................t............shpp.........................slDhLhpt.sl...plptlFuPEHGh+Gs...spAGt.plssshDscTGlPlhSLY...................Gps...+.......................+PosphLc..s......lDlllFDIQDVGsRaYTYIsThthsMEAs.....................................uctshphlVLDRPNPhsGthl-GPlL-.ta............cSFVGhaP.lPhtHGhTlGELAphhsschhl....................................s....scLpVlshcsWpR.........shhasp.ssh.al.PSPNhPssposhlYPuhslhEG.T...........slStGRGTspPFp....lhGAPa..lc.s......tlhtthpt.t...l.GshapshtFpPp......hp+..apGchCtGlplp.l......s.pthcshpsuhhhlphlpc.hY............spphth.t..........th.......h.h-hLsGsspl+ptI.........ps.Gtshcc....ItppappclppFpphRpp.YLL..Y ...................lGLloN.Tuls.................p....p.h.pp.......................hl.DhL.hp....p......sl.....plsslFuPEHGhRGs..............spAGt...plss.thDs...p..TGlPlhSLY......................Gcs...+...........................+PsschLp..s........lDlllFDIQDVGsRaYTYIsThthsMEAs.....................................Ac..tsh.....p......hlVLDRPNP.sG..t..h..l-GPl.L.-..pa............pSFVGhhs.lPhhHGhTlGELAphhNsE.hhl..............................................................s....scLpVlshp.........sWpR.............sh.hast.tth..a....l.PSPNlPs.poshlYPuhsl.hEG..T...........sl.S.GRGTshPFp....llGAPh..lc...s.............thhtth.pt.t............lsGhtF.ps.htFpPt.........hs+....apuphCtGlplp.l...........s.pthp.s.h.c.hsh.h.hlphltc...hY..............spphtht....................th........thhDhLsGssplRptl.........pt..Gtsh..pc....lpptWppslppFpp.hRppYLLY...................................................................... 0 119 188 224 +6904 PF07076 DUF1344 Protein of unknown function (DUF1344) Moxon SJ anon Pfam-B_13761 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of around 80 residues in length. Members of this family are found in Rhizobium, Agrobacterium and Brucella species. The function of this family is unknown. 23.10 23.10 23.10 24.90 23.00 23.00 hmmbuild --amino -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.68 0.72 -4.40 9 104 2009-01-15 18:05:59 2003-08-26 12:55:15 6 1 91 0 25 57 3 61.00 56 67.66 CHANGED .As-sEGpIpplDpcuholTLDDGKTYpLPtEhch-uLcsGhKVlVhYspss.GcphlsDlp ....hApDsEGpITcIsKDocTITLDDGcTYKLPuEFD...luAlsPGMKVLIhYDlVD.tpRhITDIQ.................. 0 1 11 14 +6905 PF07077 DUF1345 Protein of unknown function (DUF1345) Moxon SJ anon Pfam-B_13768 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 230 residues in length. The function of this family is unknown. 24.00 24.00 24.10 24.20 23.80 23.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.08 0.71 -4.76 56 475 2009-01-15 18:05:59 2003-08-26 12:57:27 6 1 441 0 171 432 34 171.60 32 81.43 CHANGED hshshs..hstplLlGWsssshlYLslshhhhhp.tsspclRcpApppDcstsllhhlsslus.......huSlsAIshtLssu+p.t...spshchslshsolhhoWhhlpshFulHYA+hYY...........ttstptsGLpFP...s............pppPcYaDFlYFSFsIGhTsQsSDVslso+phRRlsLhHullSFhFNoslLAl ..............................................................................hh....t..hthlluWsshhhlYLlhhhhhhhp.h.ss.ppl+phAtppDpst...hhlhhlshhAs.......lsSlsulhh.Ls...ssp..p.......sphhphslshh...olhhu.WhhltshFulHYA+hYY...........ttstttsuLpFP....t..................ptpPsYhDFlYFSasluh...ssQToDVslsopphR+ssLhpullSFhFNssllAh................... 0 40 93 132 +6906 PF07078 FYTT DUF1346; Forty-two-three protein Moxon SJ anon Pfam-B_13991 (release 10.0) Family This family consists of several mammalian proteins of around 320 residues in length called 40-2-3 proteins. The function of this family is unknown. 19.50 19.50 20.00 20.50 18.50 19.40 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.09 0.70 -5.12 2 70 2009-09-14 11:59:21 2003-08-26 13:00:14 6 4 40 0 32 87 0 230.00 53 92.94 CHANGED RFuTRLhGATATPPP.PPKARSNENLDKIDMSLDDIIKLNRKEGKKQNFPRLNRRLQQSusRQFRMRVRWGIQQNSGFGKsSLSRRGRVhPGKRRPYGVITGLAARKATGIRKGISPMNRPPLSDKNIERYFPALKRKssLLRQNEVQRK.VAsLKRPNQLNRKNNIPsNFTRsGNKLSHQKDTRQATFLFRRGLKVQsQLNoEQLlDDVVAKRTRQWRTSTTNGGILTVSIDNPGAVQCPVTQKPRLTRTAVPSFLTKR-QSDlKKVPKGVPLQFDINSVGKQTGMTLNERFGILKEQRAsLTFsKGGSRFVTVG ...................................................................thshoLDDIIKLN+KE...t..p......+.........p.phsp..hp..R+.h.ppss....sp.......php...h.R.s.RW.GlQQ.pu.G.h..G+..stl.s..R....Rs+h..hP...GKRRs.GVITGLAARK.ssul+KGlSPhNRsPLSc.K.....shp..p.h....PhlpR...pss..R...ps-hQR...+............hs.lp+stthpR.p...............h....R.s........h..ptQ+-sR..QApFLF+R.G........LK.VQsQlp.tphhs...ss.pR...TRQW..RoSsssuGILTVSIDNPsAhp.....p..P.s.+s.RL...sRsshPsFlhK+-ps-.K.KlPKGVPLQFDI..NSVGK.QTuMTLNERFtILK-QRsshs..s+.GSRFVTVG................................ 0 1 3 14 +6907 PF07079 DUF1347 Protein of unknown function (DUF1347) Moxon SJ anon Pfam-B_14317 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 610 residues in length. Members of this family are highly conserved and seem to be specific to Chlamydia species. The function of this family is unknown. 24.00 24.00 24.30 75.50 23.30 23.90 hmmbuild -o /dev/null HMM SEED 550 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -12.73 0.70 -6.30 6 40 2009-09-14 11:58:54 2003-08-26 13:26:39 6 2 37 0 7 25 0 547.60 57 85.61 CHANGED spQQ+.tLLCFQGFlLQKQ+phpQSEcIFSKIYcEhpsu.FlhKEElLGGRILNAFFL-NI-hM-hllspLcQpsupSsYLsLFcsLlsYKQKpacpAlpsLShWpsplcpocssLLDlNIppLhSDFlL-sIpAHSLIEhGcFuEGRsILNRIIcKlLKREssWsu-sYDphVLMLSRSYFLELppSppscIYPDYYEMILFYpKKl+ulDQpuYccFlPp-ELhShlM-HlFVVPc-+LsPLMQllchWE+aYhsPNYsLVlcsLlc+Fhosspplt+lCpuIss.c.IEpLKc+LI-sFuclLStpVpplpTscApQsLuLLKILDsslShSEKLllSscsLpcIlupDDtpaTpL+cYLsLWEpIQSYDlDRQQLVHYLhhuAKpLW+pGssD-KALNLL+LILpFTsYDIECENlVhLFlKQsY+QsLStHuhsRLLKLEDFIo-sGLTsIsluEtEIANFLADAEaLaupG-Y+KCYlYShWLTKlAPSs.sYRLLGLCLhENKpYpEAW-hLppLP.NcchaDSKVQKALlLCQKHlsKDhtso ........pQpphYlLCsQGFoLQhQ+KFQESEcIFSRIaccpsSuPFlLpcELLpGRILNAYFLNNLsLMscplsELE+hsG.spsaLhhFKALaAY+sKpYchAl-sLSpWhu+VcpTcshhLDTNl.cLFSsalLEcIuA-SLIts+RauEGRlllNplhNKlFsREasWssDhYNRlVLMLGpSYLLELpEuspuDLhP-YYEhILFYpKph+uhDttAYcpFhPEs.LVsTIMQHlFVlPEspLPhaMphLhMW.ENpYVHPcYSLVlEphcstllp.Du.csp+ICpAIAcS.K.Ic+LKE+LI-hFu-.LshsVpQspTlpAcQYLALLKhLDPcsS.hu+KL..LLSpKplhNhVCpDDuQYo+LKDYLhLWEEt-.tDVDRQQLVHYLhauAK+LW+tGQ.-EtsLcLLKtILhFopp-h..tC.NpshchVKahYsQALuh+shT+LlhlENFl-EVGLPphlsS-AE.IANpLADApYLFu+GDY+hChlYSSWLs+VAP.SscAhpLLGLsLhEpK-YsEALEshpcLP..sE-hasSpVpKA.lLC.Kalu+ppcpp............... 0 1 2 6 +6908 PF07080 DUF1348 Protein of unknown function (DUF1348) Moxon SJ anon Pfam-B_14137 (release 10.0) Family This family consists of several highly conserved hypothetical proteins of around 150 residues in length. The function of this family is unknown. 21.10 21.10 21.70 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.74 0.71 -4.39 7 617 2012-10-03 02:27:23 2003-08-26 13:50:36 6 5 563 4 278 595 44 138.20 62 88.57 CHANGED RPPLPPFTtETAhpKVRhAEDuWNSRDPs+VuLAYTpDShWRNRuEFhpGR-tI.tFLoRKWp+Eh-YRLIKELWAFsGNRIAVRFtYEW+DDuGpWaRSYGNENWEFDEpGLMp+RcASINDlPIuEu-R+a+W.sLGcRPsD ................................RPPlPPF.T.t.EoAhpKVRhAE.DuWNoRDPp+VuLAYTsDSt.W.R....NRuEF.lsGRpcIhsFLpRKWp+El-YRLIKELWAFs..sNR.............IAVRFAYE.W....H......D...D.u.G....p.WFRSYGNENWEFs.c.s....GLMp+RaASINDlPIpEu-RhF+W...P.Gt.RPDD........................... 0 67 157 220 +6909 PF07081 DUF1349 Protein of unknown function (DUF1349) Moxon SJ anon Pfam-B_14150 (release 10.0) Family This family consists of several hypothetical bacterial proteins but contains one sequence (Swiss:P40893) from Saccharomyces cerevisiae. Members of this family are typically around 200 residues in length. The function of this family is unknown. 28.00 28.00 28.80 28.40 27.90 27.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.08 0.71 -4.93 37 701 2009-01-15 18:05:59 2003-08-26 13:54:08 6 18 553 4 274 639 31 174.40 25 72.45 CHANGED pWhNcPtpaphss.ctlplsTssp.TDFWppTaYGF..ppcsuphlhhpspsc.F.ohpl+lps.sapshaDQuGLhlhlD-c......sWlKsulEa.sDGhspluoVVTp.shSDWuss.h...ssscphahRloRpssshplph.ShDGppaphlRlsh.a.................sspshplGhhuCSPp.psGhpspFs-hplsss..psh ...........................................hh.p.P.t.hphp......p...ptlplps.tsp.TDhWpc.....o..h..Y.u..F.........ptp.s......u.........h..h......h.....hp.h.....t..........s...........s..F..phplplph...sh..p...p..........haD..QuGLhlhh...sp.p...............................pWlKs..ulEa.....s-uhs...p.lu..oV.l.Ts.....s...hSD......Wu.sssh.................sss..p.....p..h..h.h...RlsR....p....t..s..s.......h....tl...h...u.h...-.............u...........p.p.ap..h...R.lsh..ht.........................tsp..shp...lGhhsss.P.......p......ppG......hp...sp..Fpphplp.......h......................................................................................... 0 73 157 225 +6910 PF07082 DUF1350 Protein of unknown function (DUF1350) Moxon SJ anon Pfam-B_14167 (release 10.0) Family This family consists of several hypothetical proteins from both cyanobacteria and plants. Members of this family are typically around 250 residues in length. The function of this family is unknown but the species distribution indicates that the family may be involved in photosynthesis. 21.70 21.70 21.80 24.50 20.40 21.60 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.63 0.70 -5.31 3 176 2012-10-03 11:45:05 2003-08-26 13:57:17 6 3 104 0 94 183 162 222.70 31 76.11 CHANGED EW+EIRGNWVLVPppPlGlIHFLGGAFVATAPpLTYRWLLEcLGcAGYVVIATPFVNTFDHtAIApSVLN+FEhsLERLp+pGulssuhLPlYGLGHSMGCKLHLLIGSLY-VERAGNILlAFNNYPAKQAIPa........lDpFsTul.............ulEFTPSPpETN+LIQEsYsVRRNLLIKFsNDDIDQTAuLRsILps+FuDMVTApsLPGNHLTPLGQDlKWQTGuEFSPLDALGQWlKQSLapDLspLp+slLcWLNP .................................tts.ahhhP.......p....Ph...ullcFlGGuFlus.s.PploY+hhLEpLu.p.p.G.a.hllAsPa.ss..s.....FDH.thAppl...ht.p....Fc.p.......s...h.p..tL.....p........p.........................h................t........L...P......h......a.......u............lGHShGshlHLL..l..........s.s...........h...h....ss.....p......+...s.......us..lhhuFNN..hssppulPh...........................hp..ph..s........h......................................................................t...EFsPoPpE.....T.pl.lpp...Y...t...hpps..LLl..+FpsDpl.D.po..L..ph.Lp..t.......c......s......s..h.......h..p......hh.Ls.GsHhtP........htt..................................................................................................................................................................................................................................... 0 29 65 86 +6911 PF07083 DUF1351 Protein of unknown function (DUF1351) Moxon SJ anon Pfam-B_14178 (release 10.0) Family This family consists of several bacterial and phage proteins of around 230 residues in length. The function of this family is unknown. 21.60 21.60 21.60 22.50 21.30 21.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.18 0.70 -4.71 14 177 2009-09-10 23:43:51 2003-08-26 14:05:05 6 2 167 0 23 155 3 211.50 23 68.34 CHANGED p-lp..lphpPApIp.hshEphcpplsplspcY.pshlVTs-shppcKpphAcLpKltKslsccRhclc+phspPhs-F-thhK.plhp.lcsslspIspslK-h--+p+p.+hcpl+thlschsschtl-tp.hcph.....hpssahssshshKK.lhcplcphlptchpchpphpsscpsIppt.........stp.....slssssYlchLcp.psls-lhsphcsD....h-hp+pp ....................php..lphpsutIp..sh-plcptlsphlscY..psh..h...sT...s..-slp-sKp...sRAcLN....Kltptl-spRKclK+phscPhc-F-pplK.c.hh...t..lc.pshspIspslKpaE-pp+ptRhcplpthlschstpht..lsh.p.hc.h.........hpspahsc.....sh......shKK..hhcc....lss...hh....tt...hpchpphcpspphlpph.............stt.............hshss.sal.phlcp...shs-lht.hcps....h.h................................................................... 0 7 16 21 +6912 PF07084 Spot_14 Thyroid hormone-inducible hepatic protein Spot 14 Moxon SJ anon Pfam-B_14186 (release 10.0) Family This family consists of several thyroid hormone-inducible hepatic protein (Spot 14 or S14) sequences. Mainly expressed in tissues that synthesise triglycerides, the mRNA coding for Spot 14 has been shown to be increased in rat liver by insulin, dietary carbohydrates, glucose in hepatocyte culture medium, as well as thyroid hormone. In contrast, dietary fats and polyunsaturated fatty acids, have been shown to decrease the amount of Spot 14 mRNA, while an elevated level of cAMP acts as a dominant negative factor. In addition, liver-specific factors or chromatin organisation of the gene have been shown to contribute to the regulation of its expression [1]. Spot 14 protein is thought to be required for induction of hepatic lipogenesis [2]. 19.80 19.80 25.40 21.00 18.80 18.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.01 0.71 -4.36 10 191 2009-01-15 18:05:59 2003-08-26 14:17:21 7 4 83 1 106 186 0 134.70 30 90.42 CHANGED MQls.DohspKpSLhNAMNRFluAVNNMDQTVMVPSLLRDVP....Lsppstc...............phpssss......ts.h.stpsDMYsaYlLLKSIRNDlEWGlL.+p......u..Esst+cc...ssssutsscEs...s-tDLEpQFHYHLpGLHoVLSKLT+KAspLTNRYKcEIGhushup .........................................p....KpslhssMpRahusVpsM-pTlMlPSLL.RDl.l.tptt.p......................................................t.ttDhYphY.hLKu.I+s-l-aG..............lh....pt.......................t...p.t.t..ppp..................p...s...tt.pt...ttp.-L....E...tpF+hHlpGLaplLscLTppAp.LTp+Ypp.hG......t............................................... 0 22 32 69 +6913 PF07085 DRTGG DRTGG domain Bateman A anon Bateman A Domain This presumed domain is about 120 amino acids in length. It is found associated with CBS domains Pfam:PF00571, as well as the CbiA domain Pfam:PF01656. The function of this domain is unknown. It is named the DRTGG domain after some of the most conserved residues. This domain may be very distantly related to a pair of CBS domains. There are no significant sequence similarities, but its length and association with CBS domains supports this idea (Bateman A, pers. obs.). 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.00 0.72 -4.43 171 2967 2012-10-03 03:17:47 2003-08-26 14:50:23 7 34 2600 6 655 1966 228 107.60 30 20.44 CHANGED -lsctLsucllsusptttcp.lpchhluAMshpshlphlct.....sslllssGDR..sDl.luAltss.......huulllT.Guhpsssplhclscptt....lPllssshDTassuphlsph ................................................................-lschlsupllsss.-th...p+c..lpphsl.u..Ahs...l.p...s...h..lc..a...hcs.............GsLlls.uDR....s-.l.hlsAhhss..............huulLlT....G...G...ac....s....s...sclhcLscctt..............lPllpss...hsTapsuthlpp.................................. 0 237 457 587 +6914 PF07086 DUF1352 Protein of unknown function (DUF1352) Moxon SJ anon Pfam-B_14369 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 190 residues in length. The function of this family is unknown. 22.20 22.20 23.00 22.70 22.10 22.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.35 0.71 -4.66 10 145 2009-01-15 18:05:59 2003-08-26 14:54:03 7 3 112 0 88 133 0 171.20 36 94.47 CHANGED MASRuGPRAuGTDGSDFpHRERVAuHYQMS.....sshKSElKKLIhlHhLIWlLlsApluVupL........tLlS+cpVuhPYQWEYPYLLSllPSllGLhuhP+NpISYLVlSMIStGLFuluPLlYGshthFPtupcLa+HGKAhchhhshoAls.............lMYllhVlAlQVHuWQlYYSKKLLDuWhssTpcKK++ ...........MASRu.G.sRsuGTDGSDFpaRp+VAs+YQhS..........sphKuclKhhl.....hhHhllahlhhA+.lssshL................hh.lpc.pls.Ph.W.E...Y..s.ahl..SllsohlGlhuh.RNplshLhh.h.l.uhhlhul...hPllau.h...hhsts.phhppscu.ch.hs.shs..............lhYhhhhl...uhQlHsa.plY.auhpLlpuWhststtc....................................................................... 0 29 41 64 +6915 PF07087 DUF1353 Protein of unknown function (DUF1353) Moxon SJ anon Pfam-B_14433 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 100 residues in length. The function of this family is unknown. 22.50 22.50 22.50 22.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.30 0.72 -4.52 35 353 2009-01-15 18:05:59 2003-08-26 14:55:52 6 3 263 0 55 251 186 91.10 36 62.82 CHANGED pacLhps..........hhhpssu.....hhhhVPtG..ap.TDhASIP+hhhslh.sPa.ucYhpAAllHDaLhspt........................ths+cpADplFhcuhtshG..Vsth+thhhahAV ..........................................aclhps..........Yh.p.ps....hl.VPtG.....Fh.TDhAS....lPRl......F.W......s.lh..PPh....GcYhpAullHDaLhcps..........................hps++pADhlFh-uMphhG..Vs+aKthlhYhAV..................................... 0 14 33 45 +6916 PF07088 GvpD GvpD gas vesicle protein Moxon SJ anon Pfam-B_14302 (release 10.0) Family This family consists of several archaeal GvpD gas vesicle proteins. GvpD is thought to be involved in the regulation of gas vesicle formation [1,2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 484 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.87 0.70 -12.68 0.70 -6.18 5 21 2012-10-05 12:31:09 2003-08-26 15:10:38 6 1 15 0 11 437 87 367.10 29 79.27 CHANGED lcRFFsG-sG+TLLINGAPGTGKTLFTIRGLDVLcR-uDVLYVSTRVDQ-TVaEMY......FcsHuSLD..KTulL.............DLhQDPFtLPhDVDVPFEpLsL-SLLEWVDtIsAsuc+LTIAFDSWcLlYEYLAsRHDsPPD..IcTVTNQLVsLARsuGlRLlLVoETAssSsLEYIVDGVVTL..pVK-D-RGRTRRsLRLEKLRGVRIGNRLQP...FTLADGQFpuITPVELlTscTssspuTWEPts.NssA+FSTGIGDLD..+ILSGGaNR...GSVVHLDLGsDLSRDAWSVLsLPAIRNFLupEMGVAVVPP+EGSPGLLHNDL...NsVLo+uVFDTaCHVFETYAGPo+ut........stpaLsshhTso.SDAlsP.......................stlssp-........asoPlEGGpLcYDPYhEhlEplRcQS-GPLLHVISMDTAapAFETRLGDFANYVA.....LHNDusILITKsGTtLRTRADRVADMHFRLEp.SG-AIsLYGENPLTPLLGIGlccSpsIPKIpLTEMV ..............................................................ppFFptcsGpoLLI.pGtPGTGKThF..o..lc.h.L...s...s...l.p..c..c..t.s..s.h.Yl.S.....T.R.V.s.p.-.sl.a.cha...............h...ct.t..ls............ht..ll...........................c.h.p..p....D.....h..t.....h.........h...t....h...s....l.....Ph.............h...s......l....-.s....l...h...p.a.l....c.p...l.s.s....t...s..c.p...s.....hl.s.lDS...W.....h..lh-h.L.u.s...c.a.ssspc.......l...cslp..s...pLs...........h.....h...c.....c.t.s.....s.+..L...l...l....V......h....E....s....s...c......p.......p.....s........L.........-.....Y.ls.DGVVoL...........phc......ps...p..G.R.hh...R.LpL-KLRGlpIpppha....FTL.t..s.G..pFp..s..hss.s.p.h.h.spp....h.hcPhs...ss.pscaSTGhtDhD..pIhsGGhph...GShlhL-hspsls.pha.hls.h.shtNhlp.thtshll...................................................................................................................................................................................................................................................................................................................................................................................................................... 0 6 9 10 +6918 PF07090 DUF1355 Protein of unknown function (DUF1355) Moxon SJ anon Pfam-B_14563 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 250 residues in length. The function of this family is unknown. THe structure of this domain was solved by the Midwest Center for Structural Genomics (MCSG). The structure has been classified as part of the Class-I Glutamine amidotransferase superfamily. 24.40 24.40 24.50 25.00 24.20 24.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.27 0.71 -4.90 13 830 2012-10-03 00:28:14 2003-08-26 15:19:49 6 10 622 13 194 554 141 168.00 35 34.26 CHANGED +VLllG.ESWstphpHhKGFDpFsol+acpGAchLLpslcsushclcaMPAHp.utpsFPhsh...EpLssYDsllLSDIGuNThLL.stsahc.p.sPstLcLl+-YVspGGuLlMlGGYhSFpGIpu+Apa+pTPlt-VLPVs..........sLshDDR.......VEhPpGhtspshu..cHPlspGLus......-W...P.lLGaN...cl ............................................................+VLhlu.tp.H.t.hp.+.s..h.............K.-.tu.ssh.lh.slhp.s.s...c..D..hhPscp...s.hsFPpsh.....-pL..spaDsIllscluusshL...................sstLc.hI.t.cY.V.p.p.G.Gu.LLMlGG..SF......upusappTs.Lu.-VLPVp.............h.s.ss..cp............lE...pP.....tsp..hps.h..u...cHPlspshs........................ca...s.hhth........................................................................ 0 79 129 155 +6919 PF07091 FmrO Ribosomal RNA methyltransferase (FmrO) Moxon SJ anon Pfam-B_14605 (release 10.0) Family This family consists of several bacterial ribosomal RNA methyltransferase (aminoglycoside-resistance methyltransferase) proteins [1,2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.60 0.70 -5.39 14 272 2012-10-10 17:06:42 2003-08-26 15:26:17 6 1 225 6 8 986 235 174.10 50 65.63 CHANGED uutKYRslsPssVcRlspcth.spptshtpshKtsKp+LHplhGAal..tts.htthL+pltp.....sssssD.puhpsh.t............t....hhuhHuST+ERLPh..LccFYsplFutlss.PsoVhDlACGLNPLAlPWhshssss.sYhu.DIDpshh-hlsshLshlssttpsplp..Dllss.ssssssDlsLlLKslPsLEpQctGuuhcLlctlsu.hlVVSFPT+oLGGRs+GMtpsYstthEuhsstcsap.hpchphusE.Llal ...............................................................................pphsshcp.cccuphh..........................................................KuLQhl...oApCVKQVEVIRA..R....................R....LLcGpAST...o.....GY.F-N........IEH..C.I....DE....E..FG..p...s..o..l...N..D...K...LLLlGS..G....A..Y..PMTLIQV..A.K.E.T..GA..SV....IGIDIDsQA.V.D.LGR..R..I.....VN.V...L...A........P...N...E.......D..I.sIo........DQc.V.u.....p..L..K.D..I.pD.V.T...a........................................................................................................llhppshsh......................................................... 0 6 7 8 +6920 PF07092 DUF1356 Protein of unknown function (DUF1356) Moxon SJ anon Pfam-B_14617 (release 10.0) Family This family consists of several hypothetical mammalian proteins of around 250 residues in length. The function of this family is unknown. 22.00 22.00 22.00 22.00 21.80 21.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.18 8 192 2009-09-13 19:50:27 2003-08-26 15:34:03 7 5 60 0 95 173 0 197.30 43 86.84 CHANGED pED..............ps-chLsccspptsIuQFPYVEFTGRDSlTCPTCQGTGcIPptQ.NpLVALIPYSDQRL+PpRTKLYVhLSVlLCLLsSGLVlFFLFPRSVhVsssGlKSVpVsFscpsphVhLslTuoLNIoNsNFYsVpVsslouQVQahKsVIGptphoNlohItPLup+QlsaTVpsclusp.oYhYhaCTlsoIKVHNIVlaMQsoVphSYhuHspQsolEoYcYVDCGuNoT ..............................t...................t.t..spttppt.huphPh.chsG.p.sul...TCPTCQGoGcIPp..tpcspLVALIPauDQRL+PpRTKLYV.hhu....VhlCLLhuuLs..l......FFLFPRSl.Vp.s..G..lpss..hVs..as...........tpp....l.hLslTshLNIoNsNaYsltVps...lospV......a......p.sVlGphph.ssh...Is.Ph.s.p..h.asl.s..h...ttt.....o..ha...hCTh.pIpVHplllhhp..solph.oYh.uH.pQ.s.ppapYVDCttNso......................................................................... 0 19 29 50 +6921 PF07093 SGT1 SGT1 protein Moxon SJ anon Pfam-B_14698 (release 10.0) Family This family consists of several eukaryotic SGT1 proteins. Human SGT1 or hSGT1 is known to suppress GCR2 and is highly expressed in the muscle and heart. The function of this family is unknown although it has been speculated that SGT1 may be functionally analogous to the Gcr2p protein of Saccharomyces cerevisiae which is known to be a regulatory factor of glycolytic gene expression [1]. 27.00 27.00 28.50 27.10 26.80 26.80 hmmbuild -o /dev/null --hand HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.94 0.70 -6.13 33 327 2009-01-15 18:05:59 2003-08-26 15:46:34 6 13 251 0 233 314 3 469.20 23 84.08 CHANGED sVcYpla............t.tsspsphpplpphhtclhscls.hh.....ps.YIWQ.+-sFpLphpptp....................sssaltGpTpa.GDsl-D.EWaIVaLLhclo+pa..sslhs+VhD.sDGEFLLIEAAchLPcWLsP-supNR.VaIppGcLpIIs............t.spt.spslolppAlphlp.spsp.thhtSsplpsslppR.lcsY..Pcp.lppsl...HcAplhlPtplAtlL+p..cPpLlusAVpAFYhR..DPl.sl+.....sspshphF.....................Pcsh.VpsoVpFT+s.L..YAQLhpQpFs.Psph........................hthPssscs..tactt-.......LGhKL............................usGFEhLhspsctptpssp...........ss.thpshLcsLpcsGhhp.s.l.sspchpphhp.s............................................................................t...t...spDDDsWLs.ls.--L-p.Lpp....+ttpppthp.spppp.....s.....................................sLpclspphpsFlsc.tusa-Gs......-h.................................................................................................................................................................................................spsspcpslsh........D....t-pFhc.hhcchLshsss-.ss..s..............................sshtc.csp-s-sps--.............t.p............stpplpphhppM-pELttoshhp.....................stttpt.pstsp............................tptsstsp.t.....pslDlD ..........................................................................t.lpp.h.phht....h.............tYlWp.ppshp..lphh.tp........................................h...hlh..G.hhpa.GD.slcD.EWhlValLhplocpa....splhh.+...l...D.sDGE..FLLIEA..Aph.LP..pWl.p..P.-.s..........s.pNR.V.alppGpLtlls................................................sps..sl.pAlph..lt..spst..thht.........sttlptth.tR.lpt..a.....Ppp...hpp.sh.........HpshshlP.tt.l.u.tlLcp..pPphlu.AlpuF.h.R..Dsh..sh+......................tspt.h..hF........................................................s.p.ph.lhsslphT+s.h..YApLh..pp..pa....ss.....................................hs..tp......thtthp.................lGhKl...........................................................spGhEhlhp...p.t.pptt...........................................s..ht.hhppL.pps..t....l.sst.ph.phhp....................................................................................................pDs-pWhp.hs.pph.-p.Lpt.............t.tt.t..t.tt..t....................................................thtphspphptFlpp....us...hcGs.......c..h...........................................................................................................................................................................................................p.p.pt.hph...................................-.......ppF.p..hhcph.hs.......tt....tt...........................................................................t..t....p.tt............................................................................................................p.tthpph.tth-tELtttth.p........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 80 131 195 +6922 PF07094 DUF1357 Protein of unknown function (DUF1357) Moxon SJ anon Pfam-B_14833 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 225 residues in length. Members of this family appear to be specific Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 21.90 21.90 22.20 40.60 21.70 21.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.43 0.70 -4.90 3 130 2010-01-12 09:55:36 2003-08-26 16:05:37 6 3 27 0 10 107 0 192.30 66 94.63 CHANGED MpsKE-pEDSpclsSpsspV..psDsllISApEFEEY++aK-psNscSKtoH+DLSINERlo+ELAEVpEREulpcpLLhEApRINEIDTLAscaLSNHFNKEsLLAKGYSLKEIlpAQuRELIRKYVssEQIKAIAKVssl-HIDGchLEQLlcLAKVNIKQRKNAEsNa+olsplRsNh.VKhcE+sSLpNSNFhPIN+TELscAM.Npacph+IQFYpNpK+ ................hhEcEEKEDLpsQsK--pQl..KuDTKVISspEFEEYh+hKEQu.N......s.K.....s.K.EosRDLSINERITKELAEVEERERlEKQLLLEAERINEIDTLAKAHLSsHFNKEsLLAKGYTLKD.IMQAQRRELVRKFVPIEQIKAIAKsSDISHIDGEILEQLVSLAKVNIKLRKNAsSsSSSVDuIKGNIhhKSEERsSLLDSNFVPINFTEFVQAISNTYKQRRhQFYENLKR........... 0 5 5 5 +6923 PF07095 IgaA Intracellular growth attenuator protein IgaA Moxon SJ anon Pfam-B_14923 (release 10.0) Family This family consists of several bacterial intracellular growth attenuator (IgaA) proteins. IgaA is involved in negative control of bacterial proliferation within fibroblasts. IgaA is homologous to the E. coli YrfF and P. mirabilis UmoB proteins. Whereas the biological function of YrfF is currently unknown, UmoB has been shown elsewhere to act as a positive regulator of FlhDC, the master regulator of flagella and swarming. FlhDC has been shown to repress cell division during P. mirabilis swarming, suggesting that UmoB could repress cell division via FlhDC. This biological function, if maintained in S. enterica, could sustain a putative negative control of cell division and growth exerted by IgaA in intracellular bacteria [1]. 25.00 25.00 26.10 31.00 23.70 21.20 hmmbuild -o /dev/null HMM SEED 706 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.52 0.70 -13.02 0.70 -6.78 10 600 2009-09-10 22:06:59 2003-08-26 16:21:33 6 1 546 0 41 315 4 641.80 75 99.32 CHANGED MSTIVllLAhlLAClllsGhhlth+tR.R+.tlshh.uFucsTsRKLTu-ERuAIEsYL.pp.sp.......hhsoPssouuoussspLsLsspSDsVYslT+AITRYGlooD-PNKWRYYLDSlEVHLPPFWEQYIs--NsVELI+TsSlPLVISLNGHoLp-ahp-ts.u.sLppsuss.....pASIRpEESEplELLNIRKET.EE+ALs+PsGl+EAsLIsAuFLLaFFuLloPsVhlPWLsusAlLLluhGLWsLFpPPucssLRElHCLRGTPKRWGLFGEssQGQIsNISLGIIDLIYPsHWQPYIApDLGpKTDIDIYLNRQVVRQGRaLSLHDEVKNFPLQ+WhRNhllsAGSLLVLlLLlhaVPLslPlKLoloWL+GAQTlElToVspLEcAtLRlGDTL+spGTGMCYl.sPsp..hSsppsosFhPFDCSuIYWNsAsPLPlPES-Tl-KAoALlpoVN+QLHPp.-s-s+VNPsLuSAIQKSGMlLLDDFuDIVLKTQDLCss-.sDClRLKNALVNLGNsKDWsoLVKRAcuGKL-GlNVLLRPVSAEoLEsLVsooTusFlhRETs+AApuLNSPPPGGFLItSDEGKQLVspPhPsl......sLaDYsuhEQWpELQRLusMLLHTPFpAEGIlTslsTDANGTpHIsLHS.PDpsoLWRYlGTTLLLLshlsshlaNuVhslRRhp+sRpRhp-IQ+YY-sCFNspLhPss....c ...................MSTIlIFLAALLACSLLAGWhh+V+SR.R+.pLPWssAFsDAQTRKLTPEERSAVENYL..-.sLoQ........lhQVPG...P..TG..A..SA.APISLs.LNAESNNVhhLTHAITRYGIoTDD.PNKWRYYLDSVEVHLPPFWEQYINDENsVELIhTDoLPLVISLNGHT.L.Q..EYM.QEoR..GYALQs.ssST.........................QASIRGEE.SEQIELLNIRKETHEEYALSRPpGLR..EALLIVASFLhFFFCLIT.PDV..FVPWL.sGGAlLLLuAGLWGLFA.PPu..K..............SuLREIHCLRGTPRRWGL.FGENsQEQIN.NISLG.IIDLlYPAHWQPYIuQDLGQQTDIDIYLD.R.HVVRQGRaLSLHDEVKNFPLQHWLRSTlIA.uGSLLVLFM..LLFWIPLDMPLKFTLSWMKGAQTIEATSVKQLscAGVRVGDTL+lSGTGMCNI+ouuT..W.S..u.po..NSPFhPFDCSQIIWN....DApuLPLPES-LVNKAsALopAVNRQLHPK.PED...-....SRV......S..ASLR......SAIQKSG.MVLLDDFGDIVLKTADLC.......SAc.....DDCVRLKNA.....LV.N.L....G.NSKD....WsALVK..RANAGKLD..GV..NVLLRPVSAESL-NLVsTSTAPFIo+ETARAAQSLNSPAPGGFLIsSDEGS-hVDQPWPSs......sLYD.YPsQEQWsAFQ+LAQMLMc...TPFsAEGIVTpIaTDANGTQHIuLHsIPD+SGLWRYLuTTLLLLsMlsSAlYNGlQAaRRYQRHRTRMhcIQtYYESCLNPpLhss.p................... 0 1 9 24 +6924 PF07096 DUF1358 Protein of unknown function (DUF1358) Moxon SJ anon Pfam-B_14731 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 125 residues in length. The function of this family is unknown. 20.90 20.90 20.90 21.50 20.50 20.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.51 0.71 -4.44 5 99 2009-01-15 18:05:59 2003-08-26 16:24:23 6 4 80 0 65 112 0 110.40 41 68.73 CHANGED MtothspsGpossu..psssu-cERKFRl+GGAFLGsVAusuAlAGFS+TLuhAKKuDPcaF.sKGlpuolAL.EoGoSLALRALGWGTLYAaLGTGsICFGlWKLoGA+sMpEFRpKMGoIFPRI .............................................tststcc+hFhlpuuh..FLGsVuusuhLuGFspTLuhAKKpsPcaF.s+Gsh..ustuL.EoGuoLALRALGWGoLYAhsGsGllsaulWKhhGl+shp-FR.KMtshhPtl............... 0 23 29 44 +6925 PF07097 DUF1359 Protein of unknown function (DUF1359) Moxon SJ anon Pfam-B_14784 (release 10.0) Family This family consists of several hypothetical bacterial and phage proteins of around 100 residues in length. Members of this family seem to be found exclusively in Lactococcus lactis and the bacteriophages that infect this species. The function of this family is unknown. 25.00 25.00 133.60 133.50 20.50 19.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.30 0.72 -3.97 2 9 2009-09-11 08:58:17 2003-08-26 16:27:01 6 1 6 0 3 4 0 98.10 68 94.24 CHANGED M.pEhpl..pcpItphppKhsRLpplIHtl+pQ....cll.DchpsscIppssKFthpLs...uhhcsshpIsVGTLIsLLcpNIEsNTslhsELstcLGI-lc MsQEITlDFSEQIAKsQTKIsRLKchIHcVRcQ....KIVLDDlKNNHhs+DTKhELNLG...GVLKCSVKINVGTLIPLLEQNIEDNTsLIpELAKELGIDIK 0 3 3 3 +6926 PF07098 DUF1360 Protein of unknown function (DUF1360) Moxon SJ anon Pfam-B_14863 (release 10.0) Family This family consists of several bacterial proteins of around 115 residues in length. Members of this family are found in Bacillus species and Streptomyces coelicolor, the function of the family is unknown. 20.90 20.90 21.10 21.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.37 0.72 -4.31 21 265 2009-01-15 18:05:59 2003-08-26 16:44:47 6 1 226 0 60 183 34 103.80 37 75.40 CHANGED llslAoaRLTRLIVaDpITsalRpPFhcptcph..p.GpsEshhss+u..stlRphlGELLSCYWCTGlWsAshlhhualhhPchsp.llhlLuIAGuAul.lEshlu+ ...........................lhsLAsaRLTRLIVaD+I.TuhLRpPFhcc.......hc.......hs.......-........s...Gsspshpt.s+u....pulR.phlGEL..LoCaWCsGlWlushlh..ss..h...sal..P....phupsl..lhlLAlAGuuul.lEshhu.h...................... 0 16 41 49 +6927 PF07099 DUF1361 Protein of unknown function (DUF1361) Moxon SJ anon Pfam-B_14870 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown although some members are annotated as being putative integral membrane proteins. 25.00 25.00 26.30 26.30 21.30 23.70 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.28 0.71 -4.43 31 569 2009-01-15 18:05:59 2003-08-26 16:47:59 6 3 546 0 84 337 14 166.00 36 79.45 CHANGED lWNlFLAhlPhtluhhlphh+sp........hlhhlhsllWLlFhPNA.YllTDllHLp................hhtshsht.ahthhhlhsushhulhhGhhShhhlhphhpph........h.thhlhhslhhLsuhGIYlGRFhRhNSW-llspPpsllpplls.l.....ptcthhFllhhshl.lhlh .................................hhNlFLAYIPhELu...lLL....phhK.p....................h.lahlh.uhlallhhPNs.YhlTDLlHLp............htashhh.shshspWhhFsh..Llhulhhulhlshhsh..hpl....hp..h...hppp...........hh...phlllssLhaLsuhGIYIGRF.........hRLpShalhs.pPhpllpclhpsl.....shc.ph...hFlhhhshhQhhl.h.......................... 0 25 59 75 +6928 PF07100 ASRT DUF1362; Anabaena sensory rhodopsin transducer Moxon SJ anon Pfam-B_14972 (release 10.0) Family The family of bacterial Anabaena sensory rhodopsin transducers are likely to bind sugars or related metabolites. The entire protein is comprised of a single globular domain with an eight-stranded beta-sandwich fold. There are a few characteristics which define this beta-sandwich fold as being distinct from other so-named folds, and these are: 1) a well conserved tryptophan, usually following a polar residue, present at the start of the first strand; this tryptophan appears to be central to a hydrophobic interaction required to hold the two beta-sheets of the sandwich together, and 2) a nearly absolutely conserved asparagine located at the end of the second beta-strand, that hydrogen bonds with the backbone carbonyls of the residues 2 and 4 positions downstream from it, thereby stabilising the characteristic tight turn between strands 2 and 3 of the structure. 25.00 25.00 28.30 30.40 20.70 20.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.55 0.71 -4.37 25 78 2009-09-25 12:58:53 2003-08-26 16:50:45 6 2 67 25 33 77 31 117.60 44 90.17 CHANGED slGc+pWsIs-GYIPspSsGsp.thsSHEslClLNsuDc-A+lplTlYFsDR-PlGPaclsVsuRRThHlRhN..-L.-PpsIPpsssYAsllES-VPVVVQaoRLDoR.QuphALhoTlAYss ..t.lGcppWsIs-GYIPspSsGs.............hsSHEslClLNsuDpsAclclolaFpDR-PlssaclsVsARRTpHlRhs..-LtcsEslPpsssYAhllcSDlPlVVQaoRLDop.QuphALhoThAYs.s..... 0 12 23 28 +6929 PF07101 DUF1363 Protein of unknown function (DUF1363) Moxon SJ anon Pfam-B_14992 (release 10.0) Family This family consists of several Trypanosoma brucei putative variant specific antigen proteins of around 80 residues in length. 19.50 19.50 23.20 85.50 17.70 17.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.05 2 14 2009-01-15 18:05:59 2003-08-27 09:26:29 6 1 4 0 0 11 1 102.00 73 95.07 CHANGED MSRHGNIDIGCGAGNTMDATFRSCTPHESFYYLSINHDLKAREAQNNNTNSDTICFSTHLHKRSNRRLDRRCEYIFGICSIKGNSAARRKKFLpTPLCQRYlNNCLKYMHSICHYQTRPGRTSS ..................MS.cGNIDIGCGAGNTMDAsFRSCT.HESaYYLSINHDLKAREAQNNNTNSDThpFSTpLHKRSNR+LDRRCEYIFGhCSIKGNSAARRKKFLKTPLCQRYLNN................................ 2 0 0 0 +6930 PF07102 DUF1364 Protein of unknown function (DUF1364) Moxon SJ anon Pfam-B_14821 (release 10.0) Family This family consists of several bacterial and phage proteins of around 95 residues in length. The function of this family is unknown. 29.50 29.50 29.50 29.50 29.10 29.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.38 0.72 -4.11 13 531 2009-01-15 18:05:59 2003-08-27 09:33:10 7 4 387 1 29 247 9 91.50 59 93.90 CHANGED pscLRcAARGcpCplRIPGlCNtNPETTVLAH...hRhsuhpGsGhKscDhhusaACSuCHDtIDuRs+t...hspEphchhthcGlhRTtthLhccGhl ........................MusLRKtARGRECQVRIsGl.....CNG...NP..EToVL.AH.......hRhAG.hCGT..G..hKPsDLluAhACSuCHDEIDRRT+h.......lDs..c-s+hhtlEGlhRTQsIhlKEGhl............................ 0 3 9 19 +6931 PF07103 DUF1365 Protein of unknown function (DUF1365) Moxon SJ anon Pfam-B_14846 (release 10.0) Family This family consists of several bacterial and plant proteins of around 250 residues in length. The function of this family is unknown. 25.00 25.00 26.80 26.50 24.50 24.10 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.76 0.70 -5.33 104 812 2009-01-15 18:05:59 2003-08-27 09:37:11 6 11 766 0 319 782 1520 234.30 30 80.99 CHANGED uslahGpVhHpRhpPhpHpFpYplhhhhlDLD..-ls.................th...phst.hauhs+h.shhuF+cpDa.........................ushpts......ltshlpph...tGhp.s..GclhLLspsRhhGasFNPlShYaCacpssp.....L.psllAEVsNT.as-RHsYllsh...............tstphpscKsFHVSPFhshchpYca+hs.s......sc.cltl.....plp.p......................pt.................hhsAolshpRpP......LosssLhcshlphPhhol+llsuIaWQAL+L.alKtlPhhs.+Pss.pp ......................................................................tlh.GplhHpRh....t..P...h..p..HpFpY..phhhhhlDLD........cls...............................................................pl......th.h..hhu.....p....t..h...sh...hp..FcppD.ah..................................................................s.sh.pts.....ltsp.lpph...........pGhp.s...GclhhLspsRhh.Ga.h.FNPlohaasa........cp.ssp.....................L.phllAEVsNT.asERHsYllss.................................tthphphsK.sFHVSPF.sh.p.t.pYp...a+hp.s................sc..pltl.........plphp....................................pt..................tthhsAols.....hp.tps........lo.s.t....s...l.....hp.....hhhphPhhs.h+lhh.....tIaWpA..l+L.ah.K..t.s.Phhs+Pt...p............................................................... 0 91 197 265 +6932 PF07104 DUF1366 Protein of unknown function (DUF1366) Moxon SJ, Sammut SJ anon Pfam-B_14849 (release 10.0) Family This family consists of several hypothetical Streptococcus thermophilus bacteriophage proteins of around 130 residues in length. One of the sequences in this family, from phage Sfi11 (Swiss:O80186) is known as Gp149. The function of this family is unknown. 21.60 21.60 21.60 22.00 21.40 21.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.45 0.71 -4.47 7 78 2009-01-15 18:05:59 2003-08-27 09:44:18 6 1 46 0 9 64 0 113.10 35 67.89 CHANGED M.hEatup..ph.sstustusKVlLcspD...Gu.lPlhLPsEhhDhopsElLpps.clIYQc.aPp+AEsEKFs-Ls.......t..th.ptp.sKhEphhphuosTL.slIsphhtccshsD-sl ..............h...plsuKYPphDuoGulsuT+VIlss-D...GuhIs.h..l.pDhhspssoEllctsLEpahcppas-hAhuEthpKlD..............-..cKh.pppssK...stcs...spsA......................shh............................. 0 0 6 8 +6933 PF07105 DUF1367 Protein of unknown function (DUF1367) Moxon SJ anon Pfam-B_14892 (release 10.0) Family This family consists of several highly conserved, hypothetical phage proteins of around 200 residues in length. The function of this family is unknown. Some proteins are annotated as IrsA (intracellular response to stress). 25.00 25.00 27.30 25.20 18.40 24.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.24 0.71 -4.90 9 383 2009-01-15 18:05:59 2003-08-27 09:48:02 6 3 265 0 27 249 3 156.50 44 98.30 CHANGED MA.phphIKpusGhLlPATP-sp-hLp.+hKhGsVlhu-F....KpVRNPtFHRKFFALLsLGFEYWEPsGGslospEpcLlpGaspaLstasGpc..ssLt-hAspYLtplutcR.ssslulpKSF-AFRcWVTVEAGaYclhphPDGolt+cP+SISFAuMD-hEFppLY+usLsVLWpaIL..sRpFsopp-sEsAAsQLhsFs ...................MA.c.lphlKp.usGlLlPATscst-h.......Lp.plKlGt.hlh.A-.F....+ps..RN...AFH++FF.t.LLpLGF-YWpPsGGslostEpcLlpGaspaLst...sGpc....ssL.....psA-pYLpplAppR..st..s..h..ulhKSF-AaRtWVTlpAGaYs.th.hP.DGoht++s+SIuFusMD.-sEFpplYKusLsVLWpaIL...RpFpo.ppsENsAuQLhpFA.............................. 0 0 7 17 +6934 PF07106 TBPIP Tat binding protein 1(TBP-1)-interacting protein (TBPIP) Moxon SJ anon Pfam-B_14830 (release 10.0) Family This family consists of several eukaryotic TBP-1 interacting protein (TBPIP) sequences. TBP-1 has been demonstrated to interact with the human immunodeficiency virus type 1 (HIV-1) viral protein Tat, then modulate the essential replication process of HIV. In addition, TBP-1 has been shown to be a component of the 26S proteasome, a basic multiprotein complex that degrades ubiquitinated proteins in an ATP-dependent fashion. Human TBPIP interacts with human TBP-1 then modulates the inhibitory action of human TBP-1 on HIV-Tat-mediated transactivation [1]. 30.00 30.00 30.20 30.00 29.90 29.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.19 0.71 -4.71 12 266 2012-10-04 14:01:12 2003-08-27 11:17:55 8 8 222 0 184 281 2 151.20 26 63.87 CHANGED -ApshlhcYlpcpNRPaSsp-lhsNLpp..tluKssVpKsL-pLspps+IhpK.YGKt.KIYhssQpphc.ssst-lpclchc.lppLppclptlppphpplppcl+pLppsLsst-lhcplppl+cclpphcc+Lcslcp.shp...lo.--hpplhpppphhpptapKRK+hhp ...............................thlhpahpppNRPasspslhsNLpp...tlsK.stl...KsL-pLspp......sclhtK...h.....GKp..+lY..hspQs........ph.p............ss........st-.lpthctp....lt....pLppplpplppphpphc.scLppLpuphost-.hppplppLcp.-....hpphpp+Lpplc......s...sssh........ls.s..--tp......pl....cphpphpppapcRK+hh............................................ 0 74 108 153 +6935 PF07107 WI12 Wound-induced protein WI12 Moxon SJ anon Pfam-B_15477 (release 10.0) Domain This family consists of several plant wound-induced protein sequences related to WI12 from Mesembryanthemum crystallinum (Swiss:Q9XES3). Wounding, methyl jasmonate, and pathogen infection is known to induce local WI12 expression. WI12 expression is also thought to be developmentally controlled in the placenta and developing seeds. WI12 preferentially accumulates in the cell wall and it has been suggested that it plays a role in the reinforcement of cell wall composition after wounding and during plant development [1]. This family seems partly related to the NTF2-like superfamily. 21.50 21.50 21.50 22.30 21.40 20.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.66 0.72 -4.17 7 77 2012-10-03 02:27:23 2003-08-27 11:26:04 6 4 22 0 43 83 0 103.80 44 66.72 CHANGED MRLLTG.us.usuSFpFpPpSVsuF...GssVlAEG.sDsspplhWVHAWTVs...sG..lITplREYhNTsLTVTRlu.sssuptst...................pspslWpSphssRAtKslPuLlLAl ....................................M+lLTG...ts.........p.....p..s.......u..........FpF.Ppulsuh.......Gs.sVlAEG......t.p..stp.........h...YWVHAWTVs...sG..lITQlREYFNTsLTVTcls..s.t.......................................ttstslWpSph.....s..-.ht+SlPGLVLAI........................ 1 5 30 38 +6936 PF07108 PipA PipA protein Moxon SJ anon Pfam-B_15507 (release 10.0) Family This family consists of several Salmonella PipA (pathogenicity island-encoded protein A) and related phage sequences. PipA is thought to contribute to enteric but not to systemic salmonellosis [1]. The family carries a highly conserved HEXXH sequence motif along with several highly conserved glutamic acid residues which might be indicative of the family being a metallo-peptidase. 25.00 25.00 28.80 109.90 24.30 23.20 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.27 0.71 -4.94 4 164 2009-01-15 18:05:59 2003-08-27 11:31:57 6 1 116 0 4 64 0 174.20 86 93.04 CHANGED sssFPDlPEHussPS.lRLuaDplATNS-h+LcP.EplsEYhISGsGGIDPDhEIDDDhYsECapcLSpILpsAYTQStTFRRLMNYAYDpELaDlEpRWLLGAGEsFuTTVTsE-LpsSpGRKVIsLNLD-ssD.sshPEpYESs-GP.p.FDTpRSFhHEIVHALTpLpDcE-NHPRGPVVEYTNIILKEMGppSPPRItY ........................................phVEYLISGAGGIDPDTEIDDDTYDECYDELSSVLQNAYTQSETFRRLMNYAYEKELHDVEQRWLLGAGEAFETTVAQEHFKLSEGRKVICLNLDDSD..DSYTEHYESNEG..QLFDTKRSFIHEVVHALTHLQDKEENHPRGPVVEYTNIILKEMGHPSPPRMsY.. 0 0 0 3 +6937 PF07109 Mg-por_mtran_C Magnesium-protoporphyrin IX methyltransferase C-terminus Vella Briffa B anon Pfam-B_12015 (release 10.0) Family This family represents the C-terminus (approximately 100 residues) of bacterial and eukaryotic Magnesium-protoporphyrin IX methyltransferase (EC:2.1.1.11). This converts magnesium-protoporphyrin IX to magnesium-protoporphyrin IX methylester using S-adenosyl-L-methionine as a cofactor [1]. 19.70 19.70 23.30 22.40 17.40 15.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.11 0.72 -3.94 15 218 2012-10-10 17:06:42 2003-08-27 11:38:22 6 12 204 0 98 221 233 97.50 38 40.22 CHANGED shDsLIHYsspDssthLu+LuShspppllhoFAP+TshLshh+pIGcLFPtusRosthh.Hu.pslp+tlss....tGapls+pthlostFYhSphLEhs. ..ChDVLIHYPppcssphls+LuuhscppllhoFAP+TshLslh+tlGclFPtss+s..Tphh.hpEpsltctl..tp.....sG..a..pltRpphssssFYhSpllEhh.................. 0 26 59 80 +6938 PF07110 EthD EthD domain Moxon SJ, Bateman A anon Pfam-B_15539 (release 10.0) Domain This family consists of several bacterial sequences which are related to the EthD protein of Rhodococcus ruber (Swiss:Q93EX2). In Rhodococcus ruber, EthD is thought to be involved in the degradation of ethyl tert-butyl ether (ETBE). EthD synthesis is induced by ETBE but it's exact function is unknown, it is however thought to be essential to the ETBE degradation system. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.11 0.72 -3.05 57 827 2012-10-02 00:20:33 2003-08-27 11:42:09 6 16 516 4 432 846 223 86.60 19 56.80 CHANGED suho.ccFpcaapphHuslstplhs.......h.ta....................h..ttthhhhtsstsshsaDuhsphha...p..shcshhshhpsPthtt....ltsDc.tpFsDts .............................................hs.ttat.caapppHs.s.l.stph.u.................................................h.hhttths.t.s..hsss...sssa....cuhschhF......c...shcsh.t...s....u...hs...ss...phpt...........hhsD...pFh...t......................... 0 94 256 355 +6939 PF07111 HCR Alpha helical coiled-coil rod protein (HCR) Moxon SJ anon Pfam-B_15548 (release 10.0) Family This family consists of several mammalian alpha helical coiled-coil rod HCR proteins. The function of HCR is unknown but it has been implicated in psoriasis in humans and is thought to affect keratinocyte proliferation [1]. 20.70 20.70 20.80 20.80 19.40 20.60 hmmbuild -o /dev/null HMM SEED 739 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.47 0.70 -13.35 0.70 -6.53 3 174 2009-01-15 18:05:59 2003-08-27 11:50:30 7 5 41 0 36 159 0 342.80 49 90.71 CHANGED MAPTWASDlPLVQuPupQDVLERRLDsQRspVTMWGpDsu.uDuQ-PGRRGRSh-LEtSQALSQQAELISRQLQELRRLEEEVR.LRETSLQQKMRLEAQAhELEALAhAEKAGRAEAEGLRAALAGAEhVRKNLEEGuQ+ELEElQRLHQEQLSSLTQAHpEALSSLsSKAEGLEKSLsSLETRRAGEAKpLAtAQ+EADhLRcQLSKTQEELEAQVTLVEsLRKYVGEQVluEs+SQsWELERpELL-TlKHLQEDRAuLQATVELLQVRVQSLTHILALQEEELTRKlQPlDsLEPEFsRKCRSLLsRWREKVFALMVQLKAQELpHuDSTsQL+tQVAELQEcVTSQSQEQAILQRSLQDKsAEVEVERMuoKuLQhELSRAQEARRRhQQQlASAEEQL+LVVsAVsSSQttLpoTMA+VEpAlAQLPSLSNRLSYAVRKVHTI+GLlARKVALAQLRQESSPP...APu.ssDLSlELcQLREERNRLDAELQLSARLIQQEVGRAREQGEsERQRLuEVAQQLERELQcoQESLASlGtQLEAARpGQQESTEEAASLRQELTQQQElYGQALQEKVAEVETRLREQLSDTERRLNEARREHAKAVVSLRQIQR+AAQEKERNQELRRLQEEARKEEGQRLoRRLQELERDKNLML..............QRLLsVLPSulsKK....sSPRPlEsSuStSlPAAsPsRESlKGSLTVLLDsLQGLSEAIS+-EslC.tDN.cs+oSsNPPsoP .......................................................................................oht...htsSpu...LSQQAElIsRQLQElp.LEtEl...LREsS.lp.Qph+LEsQA.....EL-..tLt...t-p...sup....sE..sEtL+ttlutut..+............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 9 14 20 +6940 PF07112 DUF1368 Protein of unknown function (DUF1368) Moxon SJ anon Pfam-B_14994 (release 10.0) Family This family consists of several proteins with seem to be specific to red algae plasmids. Members of this family are typically around 415 residues in length. The function of this family is unknown. 25.00 25.00 33.20 33.00 18.90 18.60 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.40 0.70 -5.87 5 9 2009-01-15 18:05:59 2003-08-27 11:53:47 6 1 4 0 0 10 0 326.00 37 95.41 CHANGED EVcR..IoclllcS.chSKQYFLYKScslV.SsSsSEDDWpYCLLILKFlsPpsslKsIopILEaFL+cDRYRDKFsccauYLsTTIpKVIlpSSppNLlGssYspso...+-lIQ.sulDShaspss.I+sS.CosssVLKIsNFhShFaLcpsKYSsplhDaclshososNsolKISMhGGLLNYFDLMlFLuILYsY....KlsopLssssI.......lcINFSs.l-hMLpsNGo.sR+KYlNSLcKLSKVHL-s..p.h.shlsshpsspcphhsFSGsLLoFEpLSssp.Tpssl.hLSpPll..+hhcS.ssYSlVNWsSFlsLssoplRLlYFYFCLNVKsSpY.FTpFolcpLlccLYsusshuSohRhh+SclRKhLhhlh-sppshlDF-FpLVhs.spSpplISuIKVRRs+lhl.R .................clhp..I.p.l.pS.p..+pYhlYh.pshh.p.SsSEDDWpaClLlLpalpP.sshchhsplLchFL+pDRaRsKhppp.sYLppTItKsI..u.p.pllGs.h..p....tp.lQ..ulsp.hsp.s.lp.S.sphpsVLKl.NhhShhaLppsphSs.h.chthshs.spssslKlS..hGlLNaFDLhlFluILYta........K..s.h..ssl.......lsINhSs...hshhhpssGp..RtKalNSL.KLS+Vplcs.......h.shh.tsptphhphoupLLsFptlphpp.T.hpl.hLSpPll..+hhp.hsNYSlVNWtSFs.LsssplRLlYFYFCLpVKsSpY.FTpF.lcpllpcLYhusshpoohR.hp.phRp.L..h.p....h.-hphpl.hp..pp..hIp.IKVRR.phh..h.. 1 0 0 0 +6942 PF07114 DUF1370 Protein of unknown function (DUF1370) Moxon SJ anon Pfam-B_15274 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 200 residues in length. Members of this family seem to be specific to mammals and their function is unknown. 25.00 25.00 25.50 25.30 24.00 23.60 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.14 0.71 -5.08 6 118 2009-01-15 18:05:59 2003-08-27 12:38:11 6 1 72 0 70 120 0 175.20 33 88.12 CHANGED su+h+cshIIEhIs+KFcpLPEs-RsLhpaGolYlGhNAuhuGllANSLFRRlL+VoputlsouLPMAsLPFLTTsloYpshVosPL.oGDLsCETCslhRuuLlGlVsGGlYPlhLAlPVNGGLAARYposPLPpKGNIlpaWhslSpPVhKKMuFPLlLQshFGsalGS+HYplh.KsLphs.P- ..........................................hh.hlt+php.L.ct.-..pph..aGsshlGssAuhsGlhuNslFR+tLpV.p.p.u.tlpohLPh.uslPFLoTs...lsY+hhVsp.P.Lho...usl.s.CtsChhhRuuLlGllhGslYPshLAhshNutLAs...+..Ypos......LP....pK...G.....p....ll....haWhplspPlh+tMhhsllhQshFuhalu.....ppatlh.chhph...................................................... 0 10 14 35 +6943 PF07115 DUF1371 Protein of unknown function (DUF1371) Moxon SJ anon Pfam-B_15275 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 110 residues in length. The function of this family is unknown but members seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). 24.90 24.90 25.10 25.00 24.50 24.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.51 0.72 -3.90 3 117 2009-01-15 18:05:59 2003-08-27 12:40:45 6 1 31 0 10 64 2 106.60 68 99.47 CHANGED MDLRIGNNFELVFNNDFSLVDGIEEQKQRLFIFLKTLRGSLSYAPNWGLDYhLLLKLLKINNL-AVKNYFaEISKELNLDLINlSsoIQD+KlHISFFFsG.DVLNMEFcL ...........MDLRlGNNFELV.FNNDLSL.VDGI-EQKQRhhIFLKTLRGSLSYAPpWGLDYhLLLKLLKINNLcAVKNYFaE.ISKEL.N.LDLINISsoIQD...pKspISFFFoG.DlLNMEFsL............. 0 6 6 6 +6944 PF07116 DUF1372 Protein of unknown function (DUF1372) Moxon SJ anon Pfam-B_15278 (release 10.0) Family This family consists of several Streptococcus bacteriophage sequences and related proteins from Streptococcus species. Members of this family are typically around 100 residues in length and their function is unknown. 25.00 25.00 36.00 35.80 18.50 17.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.53 0.72 -4.07 7 50 2009-01-15 18:05:59 2003-08-27 12:44:28 6 1 46 0 3 42 0 88.30 43 95.42 CHANGED Mp........c....s.htlAshlllluhlhshshlhhhhs...s.+.spsllIapVDNuss.MaGKlTsKphltthYTlsstAYGKFLVTKEQYcsIpVGDDIPsYLKG ..........................................hh...................hhshhllluhshshhthhhhht.......c..phcsllIYKsDNsGuElaGKVs-KphlGcLYTlThpsYGhFlVTKEQY-plcVGD-l........ 0 0 0 0 +6945 PF07117 DUF1373 Protein of unknown function (DUF1373) Moxon SJ anon Pfam-B_15084 (release 10.0) Family This family consists of several hypothetical proteins which seem to be specific to Oryzias latipes (Japanese ricefish). Members of this family are typically around 200 residues in length. The function of this family is unknown. 25.00 25.00 25.30 28.00 22.50 24.00 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.64 0.70 -4.60 3 35 2009-01-15 18:05:59 2003-08-27 12:58:46 6 1 1 0 19 10 0 190.70 68 97.25 CHANGED M.RVLWIsCLLIGSIoCLPQGGss..ssu....hPPYoGQ..ScPSYE+PSGQ.SGYSosPGYYSuGTpTuGGS..GSPPMWYSASYPEQEPAKPTYQRPAQSSGYGSYGuVDSSYSGSGSQQSGSQGAQSGAPGSQHQVEQESWSSSSDDEDEPEFTPVSEEDQVYASKTRSRYNQKRLLFSQFRYTPTEPRVPQEPVFPYPSKSHQGKGSAKGSR ......................RhLWlSCLLIGoIoChP.QtGhs...s...........hh....YoGQ...tPSYE+PStQuS...G...YSS..GhYSuuT...NTAGuS..uosPMWYSASYPEQEPAKPTYQRPA.QSSGYGS.ss...stt.SYSGSGSQQSGSQGuQSGsPGSQHQVEQESWSSSS..D..DEEEPEFTPVSEEDQVYA.KSRSRYNQKRLLFSQFRYTPTEP.RVPQEPVFPYPSK.SHQGKGSAKGSR..... 0 0 0 19 +6946 PF07118 DUF1374 Protein of unknown function (DUF1374) Moxon SJ anon Pfam-B_15191 (release 10.0) Family This family consists of several hypothetical Sulfolobus virus proteins of around 100 residues in length. The function of this family is unknown. 20.70 20.70 21.40 21.30 20.00 19.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.94 0.72 -3.88 15 27 2009-09-11 20:35:31 2003-08-27 13:01:18 6 1 6 7 0 32 0 87.10 29 91.45 CHANGED cFc-LK.sllchFFc-pplpElsLcFpcpl.lTEpEacELIpsschhpphpsp....sIhsDhYpYhEspNphhKL.IcYa+cs-.KIhIhEIchWR- .....hcplc.pllphFFcspplpEhslpFcp.lclsEp-acpLlt..s.hpthtst......tlhh-.a.Yhp.ssthlKl.lpYh+css.KIhlhEIphaRc. 0 0 0 0 +6947 PF07119 DUF1375 Protein of unknown function (DUF1375) Moxon SJ anon Pfam-B_15247 (release 10.0) Family This family consists of several hypothetical, putative lipoproteins of around 80 residues in length. Members of this family seem to be specific to the Class Gammaproteobacteria. The function of this family is unknown. 20.30 20.30 22.60 22.30 19.50 17.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.65 0.72 -4.15 41 1264 2009-01-15 18:05:59 2003-08-27 13:05:19 7 3 710 0 129 455 12 66.60 48 72.28 CHANGED L..lssh....hLsGCuolhohs.ssppsh................hYsGsphshphhpt.................shhhhslshlDLPhShllDTLLLPashh ..................................................h.h.....h...hLuGCuSlhS+ThstpGp.....................YPGsphssp.hu.....................................+.lsILDlPFShVhDTLLLPhDl.a.... 0 10 27 78 +6948 PF07120 DUF1376 Protein of unknown function (DUF1376) Moxon SJ anon Pfam-B_15380 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 95 residues in length. The function of this family is unknown. 21.20 21.20 21.30 21.20 20.90 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.77 0.72 -3.85 24 221 2009-01-15 18:05:59 2003-08-27 13:08:51 6 4 156 0 60 198 23 86.00 31 30.39 CHANGED sa.aphaluDalp-..TtaLostEcGsYhhLLstha.....ps.ppsls.sDcthLsRlstspscc.tsshshllspF...ptpcspatpcRh-cEltph .............sa.hphaIuDal.s-..Th+L.S..s.pE.+GsYhhL.hhpYa.....ps..scPlP..ppspLA+lsph..oscca...tsshs.lhc.F...htpsstahp.tRh-c-lst.h......................................... 0 9 29 42 +6950 PF07122 VLPT Variable length PCR target protein (VLPT) Moxon SJ anon Pfam-B_15500 (release 10.0) Repeat This family consists of a number of 29 residue repeats which seem to be specific to the Ehrlichia chaffeensis variable length PCR target (VLPT) protein. Ehrlichia chaffeensis is a tick-transmitted rickettsial agent and is responsible for human monocytic ehrlichiosis (HME). The function of this family is unknown [1]. 20.20 20.20 23.80 24.30 19.90 20.00 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.11 0.72 -4.28 4 120 2009-01-15 18:05:59 2003-08-27 13:17:43 6 4 3 0 4 113 3 29.60 65 67.36 CHANGED DLQQSSsSDLHts.pVEL.sPSKEtVQLEs .....DLQQSSsSDLHtS.pVEL.sPSKEtVQLEs 0 4 4 4 +6951 PF07123 PsbW PsbW_2; Photosystem II reaction centre W protein (PsbW) Moxon SJ anon Pfam-B_15117 (release 10.0) Family This family consists of several plant specific photosystem II reaction centre W (PsbW) proteins. PsbW is a nuclear-encoded protein located in the thylakoid membrane of the chloroplast. PsbW is a core component of photosystem II but not photosystem I [1]. This family does not appear to be related to Pfam:PF03912. 25.40 25.40 26.20 29.20 22.70 25.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.90 0.71 -4.11 9 66 2009-09-11 00:41:56 2003-08-27 13:39:30 7 1 36 0 30 63 0 117.30 50 97.19 CHANGED MsuluAsus....splsu+uhstssthhssh.s.sshslPuhps++...hhsshppc..hhpsh.....s.tsuhhussuu.hs....utPAhALVD-Rhss-GTGLPLGlssshLGaILhGVFshIWuLYhlhs+s....lp..cD-DS.GLuL ...............................................................shGLPthtttt..tlpCshppc.t..t......huh....uuuhhAAsuuhhs....usP.AhALVDERhSTEGTGLsLGLSNNLLGWILlGVFGLIWuLYhlYoSs....L-....ED--S.GLSL........... 0 7 21 28 +6952 PF07124 Phytoreo_P8 Phytoreovirus outer capsid protein P8 Moxon SJ anon Pfam-B_15606 (release 10.0) Family This family consists of several Phytoreovirus outer capsid protein P8 sequences [1]. 21.20 21.20 556.10 556.00 20.40 19.80 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.40 0.70 -5.82 4 19 2009-01-15 18:05:59 2003-08-27 13:57:44 6 1 8 13 0 19 0 423.20 64 100.00 CHANGED MSRQsWlETSALlEsISEYlV+s.GDTFpGLThsDloTLSNLhsQLSlusVGFLND.RTPLQsMSNpFVsFISTTDRCuaMLp.sWFDSDltPsV.TDNFIssYIKsRhSsPlSDslRQlNNLSLpPpts.KlhSpQNAlhKALD.PYuoPl-PpcLhRuoAst.sGNhspRRuLsTsLstGAps.sFhVuE+c+IlFGpRS.NslsAupYpINVPsaaSsLsVTsARlYFTNSFlGsTIsNVpVNA.NGsssVAsIpVPTDsNshsVDSDulVSFSLuGGsINVsTuVshTGFAIAIEG-FphQMNRsQSYYThsSITh.sslsIDDFGlosaLEsFR.RLhACGQsEIFS-uMNpLT.sLIsNYhssstssshlAFsSPWYRhSERhtTILoF.ptslsLppR+LhVRHLWVIhSFIAVFGRYYssN MSRQAWl-TSALlEsISEYss+CoaDTFpGLThsDhssLSNLhsQlSVuSVGalsDPRsPLQsMSspFVsFISTsDRpuYMLpKsWFsSDltPsV.oDsFIATYIKPRhphshSDVLRQlNNhALQP.tsPKLIsRQ.uVhKuhDIPYSTPIpPpDlhRSsAss.sGNVuphthLuTP.l..AQNsTFhVuEpc+IlFGhRShssIssGNaQIsVPPWhSsLsVssARlYFTNSFhGsTItsVpssAVsGsDsssTlTVPTDsNshlVsSDSVVSLSLuGGsINVThuVshTGasIAIEGcFsM.hNtS.uYYTLoSlTh.sss.IDDFGLSAFLpPFhhpLRAsGQsEIFSpuMNsLTpsLIppYMsAstAss.IAFsSPWaRFSERARTILsh.tsllshssRKLIIRHLWVIhShIAVFGRYYpsN. 1 0 0 0 +6953 PF07125 DUF1378 Protein of unknown function (DUF1378) Moxon SJ anon Pfam-B_15650 (release 10.0) Family This family consists of hypothetical bacterial and phage proteins of around 59 residues in length. Bacterial members of this family seem to be specific to Enterobacteria. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 25.90 44.70 21.50 17.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.86 0.72 -4.02 3 166 2009-01-15 18:05:59 2003-08-27 14:01:00 6 1 103 0 3 53 0 58.30 75 98.88 CHANGED MTFVpplLLYFCTVVCsLYLVSGGYKVIRNYIRRKIDcAAAEKISASQSAGSKPEEPLI .MTFlpplhLYFCTVVCsLYLlSGGY+AhRDaWRRQIDKRAAEKI.SASQSAGSKPEEPLI.................................. 0 0 0 3 +6954 PF07126 DUF1379 Protein of unknown function (DUF1379) Moxon SJ anon Pfam-B_15837 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 25.00 25.00 31.00 30.90 18.70 18.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.63 0.71 -4.84 33 709 2009-01-15 18:05:59 2003-08-27 14:03:06 7 1 704 0 77 265 9 154.00 63 85.88 CHANGED hLcLuscMlFposaptKhLlssAhspstFsV-DAuhYhpahEpl.sp.lslocsppsplsLNAsAApRFhKPhMPKSWaFpsp..stsh.PppGclhp..LpsstppuphlVlEsu-pAoLChLls.pphtLsssKslt.h-sIKVMpDRltPhpst...hph .....MLDLANGMLFRSRFuRKMLTPDA..FsPsGFCVDDAALYFSFEEKC.RD.hsLSKEQ+A.ELVLNALVAIRaLKPQMPKSWHFVuH...GchWsPhsGDAAsVaLSDTtEQVNLLVVEs..GENAALCLLAQ.PsVVlAG.RsMQLGDAIKIMNDRLKPQls.....hssh.. 0 6 22 48 +6955 PF07127 Nodulin_late Late nodulin protein Moxon SJ anon Pfam-B_15657 (release 10.0) Family This family consists of several plant specific late nodulin sequences which are homologous to the Pisum sativum (Garden pea) ENOD3 protein. ENOD3 is expressed in the late stages of root nodule formation and contains two pairs of cysteine residues towards the C-terminus which may be involved in metal-binding [1]. 29.90 29.90 30.10 30.00 29.80 29.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.15 0.72 -3.84 92 434 2009-01-15 18:05:59 2003-08-27 14:53:18 6 9 8 0 2 491 0 55.00 33 72.20 CHANGED MucllK..FVYsh....IlFlSLFLl.stsss.........hhtCpsDsDCPph.......hs.hhh+Cl.sthCp ...............MscllK..F.VYsh....Il..FlSL.FL.l.sp.ssp.................hhtCpsDs.DCPph.........h.hs..hhh+Cl.p.thC..................... 0 1 1 1 +6956 PF07128 DUF1380 Protein of unknown function (DUF1380) Moxon SJ anon Pfam-B_15699 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 140 residues in length. Members of this family seem to be specific to Enterobacteria. The function of this family is unknown. 21.40 21.40 21.70 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.76 0.71 -4.36 3 549 2009-09-11 02:05:09 2003-08-27 14:58:00 7 2 244 0 12 281 0 130.40 49 93.64 CHANGED MYGTCETLCRhLsEQYPAETPLNLIIWSPADIEALADGMEYSlSEHDlRAVLARMDsIPEEQRLESGVSAGAVM-LI-QVKENsptVTVPADLLETLLpTAEQALW+REWTARD+NLPVPESVTRRLADsAKVRALLKs ..........MYsTscplhR.t.LsschPsspslhhVlho.t-lptl.Ap...DhSLoDcElcTVhtRL-Dh.E...ctts-suI.sp...ssVh.Elhpp.hp...c.s.......RQVTVPA.hLtplhthAtp.h........................................................................... 0 1 3 9 +6957 PF07129 DUF1381 Protein of unknown function (DUF1381) Moxon SJ anon Pfam-B_15743 (release 10.0) Family This family consists of several hypothetical Staphylococcus aureus and Staphylococcus aureus bacteriophage proteins of around 65 residues in length. The function of this family is unknown. 25.00 25.00 34.70 42.70 24.90 19.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.95 0.72 -4.56 6 367 2009-01-15 18:05:59 2003-08-27 15:00:33 6 2 205 0 9 129 0 43.80 83 63.12 CHANGED pQYLITpFpDSTGpsHscls+AR-NpohTlVEAESKEEAlcKYc .TQYLVTTFKDSTGRpHTHIT+AKsNQpFTVVEAESKEEAKEKYE..... 0 3 3 9 +6958 PF07130 YebG YebG protein Moxon SJ anon Pfam-B_15760 (release 10.0) Family This family consists of several bacterial YebG proteins of around 75 residues in length. The exact function of this protein is unknown but it is thought to be involved in the SOS response. The induction of the yebG gene occurs as cell enter into the stationary growth phase and is dependent on is dependent on cyclic AMP and H-NS [1]. 27.50 27.50 28.10 40.00 27.20 27.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.47 0.72 -4.29 32 761 2009-01-15 18:05:59 2003-08-27 15:05:55 7 1 755 5 92 274 16 74.10 64 75.84 CHANGED MAVhspYVV.R-GhE.......KMTFsSKKEADAYDKMLDlADsLsshLppusltl-EsptEpLuhaLAppK-sltphLKus .....MAVEVKYVV.l.R-GEE.......KMoFTSKKEADAYDKMLDhAD......lLssWLs..pSPl..th.--pQREuLSLaLAEpK-lLusILKs.u.......... 0 16 31 64 +6959 PF07131 DUF1382 Protein of unknown function (DUF1382) Moxon SJ, Eberhardt R anon Pfam-B_15770 (release 10.0) Family This family consists of several hypothetical Escherichia coli and bacteriophage lambda-like proteins of around 60 residues in length. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 34.00 33.80 22.60 22.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.82 0.72 -3.87 2 320 2009-01-15 18:05:59 2003-08-27 15:09:15 6 2 199 0 0 80 1 48.90 76 93.40 CHANGED Mp+ASPs-LRpslEhAp.LAp.GlRFVPIPs.TDtEFtTLus.hupKlE.hAAcAEhpEpp ....................................PIPVETDEEFHTLAsSLSQKLEMMsAKAEA-ERD.......... 0 0 0 0 +6961 PF07133 Merozoite_SPAM Merozoite surface protein (SPAM) Moxon SJ anon Pfam-B_15860 (release 10.0) Family This family consists of several Plasmodium falciparum SPAM (secreted polymorphic antigen associated with merozoites) proteins. Variation among SPAM alleles is the result of deletions and amino acid substitutions in non-repetitive sequences within and flanking the alanine heptad-repeat domain. Heptad repeats in which the a and d position contain hydrophobic residues generate amphipathic alpha-helices which give rise to helical bundles or coiled-coil structures in proteins. SPAM is an example of a P. falciparum antigen in which a repetitive sequence has features characteristic of a well-defined structural element [1,2]. 24.30 24.30 24.30 24.30 24.20 23.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.24 0.71 -4.37 4 426 2009-09-10 23:03:07 2003-08-27 15:43:46 6 3 6 0 17 335 0 156.00 42 29.44 CHANGED GWEFGGGs..sp.tssEcKKpc.lLEplploSWDKEsIsKENEDVh-EhpE-s---EEc.p.............EElEEsE-.-sEpEllE-c.pEEE...c-E-sscpc-.EKcspN-lss.........p.pD.pAQsLISpp.KcN-cs.KKoAEsllpsLhuLlpspNplDuTl+cLhpEhhcaFsNp ................................pKcc.hLEhlploScDcEsIsKcNED.V+EEhEEptE-p.EE.--p...................E.ElEpp...p-EE.T-E.EssEEcp.EEpE.....p...cc-.EE..sc.c....c..c.....p.p..EK.c..s.Ncpps................p.t....o.cpD.cAppLI.........Sps.KsNs-s.KcsAEoIVKoLhuLhpttst...s................... 0 6 6 15 +6962 PF07134 DUF1383 Protein of unknown function (DUF1383) Moxon SJ anon Pfam-B_15868 (release 10.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 375 residues in length. The function of this family is unknown. 25.00 25.00 48.50 48.90 18.00 17.00 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.24 0.70 -5.52 18 40 2009-01-15 18:05:59 2003-08-27 15:58:01 6 1 36 0 0 42 0 313.60 31 85.79 CHANGED -plhpQlap.tslPYIoKKtlNDpLpsplhtp............................sstpFaccsacsVhssh..sshhVlpGGAAhAsHlsst..psh...L...psLDh-hYht......................sshpp..alpLspLpccLp.ssspshhcslspllpslch..h..........................psslllhKsY.NtAlch.s....ssVphcLsc+...lKsshophN...--ahLVRFShNVcM..hS..tsu.lc.Yps.p.hlpphshhsFsVaFlslplh.....+pPh..s-......hphhslFsss.....VhVpslcpllsDQlpCLLaslF.s+sphKlcpRhspIpuLhsphspp.shssshpscc...hhtl.+ppspshohpplKclLhhhGPtLGs+tL .......................................................................-plhppLa..tslPYIoKK.INDpLpcplltp............................sstpFa+cshcsshtsh......sshhVlpGGAAlAsHlspp.pst......LpslDh-hY..............................sshpp..hltLppLp.cpLp.pssppaasplcplhtslph.............................psplllhKsY.NtAhchs....splphcLNc+...lKsshoplN.......--ahLVRauhNVcM..pS....tsshh.aps.pshhpshshhsaslaFlslplh.....+pshshsc.....shphhslFGhs.....VhVpslcpllsDQlcCLLhslF.Np.paKlcpRls+lpsLhs.hsp..shso.shpppp...hhpl.+pps.pphohpplKplLhhhGPtLGs+tl....... 0 0 0 0 +6964 PF07136 DUF1385 Protein of unknown function (DUF1385) Vella Briffa B anon Pfam-B_12671 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 300 residues in length. Some family members are predicted to be metal-dependent. 22.10 22.10 26.10 24.40 19.60 18.70 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.67 0.70 -5.29 57 654 2009-01-15 18:05:59 2003-08-28 10:03:17 6 3 631 0 173 542 224 241.30 41 75.66 CHANGED lPhlRGhhsLh-ohslGh+sLsaSAshhtt-......t................................p-c..hsphthhhol....llShshulsLFhllPshlus...hh..p.h....h...sphh.hsllEGllRlslFl...sYlhhIShh.cDIcRVFpYHGAEHKsIpsYEsGt.LTVE...NVp+aSplHPRCGTSFLhhVhllSIll....Fshlsh...........tshhhRll..RllLlPlVuGloYEll+hsu+pps..ls+lluhP....GLhLQp.LTT+EPDDsQlEVAItulctsl.s.cp .........................................................................lPFlRGlsull-uhshGhKpLsaSuphhtt-.....pp....................................................................c-c.hsph..thslsl.........llShlhuhslFhllPshluphh.....p.h........hs...sphhhsllEGll+lh.lhl...sY...Ih.hIShh..-I+RVFpYHGAEHKsIpsYE.sshtL.TVE...NVpK.oRLH.R...CGTS.Fl.lhlh..llulhl....a.h..l..lsh...............ssl...hhRllsRllLlPVVsGlSYEl.l+hsu+hcs..sll+lLuhP....GLhLQh.L.TT+EPcDcQlEVAIsuhctllt..p........ 0 90 146 160 +6965 PF07137 VDE Violaxanthin de-epoxidase (VDE) Vella Briffa B anon Pfam-B_12679 (release 10.0) Family This family represents a conserved region approximately 150 residues long within plant violaxanthin de-epoxidase (VDE). In higher plants, violaxanthin de-epoxidase forms part of a conserved system that dissipates excess energy as heat in the light-harvesting complexes of photosystem II (PSII), thus protecting them from photo-inhibitory damage [1]. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.45 0.71 -4.47 7 98 2012-10-03 08:47:39 2003-08-29 13:30:32 6 4 50 4 43 95 4 180.50 60 51.01 CHANGED EFNECAVSRKKCVPpKSDVGEFPVPDPSsLVpNFNMtDFsGKWaIoSGLNPTFDAFDCQLHEFHhEss.KLVGNLoWRI+TPDuGFFTRoAVQ+FVQDPsQPulLYNHDNEYLHYQDDWYILSSKIENKPDDYIFVYYRGRNDAWDGYGGAVlYTRSssLPESIlPELp+AApSVGRDFssFIRTDNTCGPEPPLVERl .................................................EFN-CAVS+KKCV......PpKuDl..G-FPVPsPssLVcsFNhsDF..s..GKWYIoSGLNPTFDs.FDCQLHEF+.s.E.s.s...K..LluNLoWRI.c.....TP...D...uGFFTRoAlQ.+FVQ..DP.s.pPuI.......LY........NH..DNE..a...LHYQDDWY....IlS.SKl....-Nc.DD...YlFVYYRG+N.DAWDGYGGAVlYTRStsl.PcSIlPELc+AAKpVGhDFsp.FlcTDN...TCGPEPsLhtRl............................ 0 20 35 40 +6966 PF07138 DUF1386 Protein of unknown function (DUF1386) Moxon SJ anon Pfam-B_16196 (release 10.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 350 residues in length. The function of this family is unknown. 21.40 21.40 23.00 95.40 20.20 19.90 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.09 0.70 -5.31 8 20 2009-01-15 18:05:59 2003-08-29 13:55:16 6 1 18 0 0 19 0 319.90 47 97.50 CHANGED MSLSSKLLVYAYYG.sYNhsHc+YGESYHLYRIVcE+Los.....oYVsssSCVRRDIsTARpL..ssGtlsF--ARphLDls-sAspLosWYpsG-.ooGlCu-VQpVLspIDpasPL-+RVppGu.........sIauLDs.hs-Isp-hsssLQslIGR.FhHFsRsssLsHVA-VFDPs..l+ssGWWYpKFCVLTYMHRlhssuVPsELhsRLpcAVsKaI+Ps...........DcuNCA.AlAsVYGRFCGIGR-HFu+HKsssh+ILFQYMRuchT.ssERassFuVIKDFGRpCKETYpDL+spADsLYIpuoTD+pKNALFDLLCCsNAs-IDsDCYDYIV...spFYs ...........................................................MSLuuKLlVYsYYu.pYNtsHchYGESYHLYRIVpEaLoc.....SYVsshSClcRDlssARRL..psGshsFD-AhphlDss-oscpLSpWasT.G-..opGlsssVppVLppIDshsPlshRVppG.t.........pIFuL-s.........pEIsp-h....sDsLQhllGR.FhaFhRsspLh+lAsVFsPs..tcs...sGWWYsKFCVlTYhHRIhtpuVP...sELh.sRLpcAVpKaI+.p..........t-phNCscslA-lYGRFCGIG+EHFu+HKhsCh+ILFQYlR.GcsT.p--cFsCapVIKDFGRpCK-sYcsL+shhDhLahauhoDKcKNuLFDLLCshsspEIDlDCa.YIh...cpFh.... 0 0 0 0 +6967 PF07139 DUF1387 Protein of unknown function (DUF1387) Vella Briffa B anon Pfam-B_10471 (release 10.0) Family This family represents a conserved region approximately 300 residues long within a number of hypothetical proteins of unknown function that seem to be restricted to mammals. 25.00 25.00 25.60 25.20 24.90 24.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.10 0.70 -4.81 6 164 2009-01-15 18:05:59 2003-08-29 15:07:34 6 4 51 0 64 117 0 236.20 51 55.81 CHANGED WoVTGKK.KNtKKKKsKsKscusspsA....t+..shs.ppssPss-.cs.sNGaHsNGuth.DoESlDSLSEtL-ohSLDA+E...scssh.-hsspsssshpsG.sch.ppK..hpssPcuopss.......p.stt..sp.pp..t.ss.hhpp............hu+KhusNIE+SVKDLQRCTsSLuRYRVllKEEMDSSIKKhKQTFAELpSCLMDREVALLAEMDKVKuEAM-IL-sRQK+AEELK+LTDhAupMoEEQLsELRADIKHFVSERKYDE-LG+AsRFosDlEsLKpSIpoaGpVsHPKssYSsRot.CS ....................................................................................................................WshTGKK.KN.K+K+sKsK.tttspsu....p+..sh..p.tss..p......NG....+hNGs.p.DopSsD..ShsEt....ul.sRE......c...uh...-....spsh...p.hlppu...t.h....p.+..h......tt....ps.pt...............p.s..tt.t.s.p..s.....thss.h.ps..........................................thsKKh....G.sNIEKSVKDLQRCTVSLsRYRVhlKEEhDuSlKKhKtuFAELpsClhD+EVuLhAEMDKVKtEAMEILhuRQKKAE.LK+hTDlAspMuE.QLsELRA-IKHFVSERKYDE-LG+sARFoCDlEpLKtpI..hGpl..oHPKNsYSsRo..C.................................................. 0 9 14 26 +6968 PF07140 IFNGR1 Interferon gamma receptor (IFNGR1) Moxon SJ anon Pfam-B_15930 (release 10.0) Family This family consists of several eukaryotic and viral interferon gamma receptor proteins. Molecular interactions among cytokines and cytokine receptors in eukaryotes form the basis of many cell-signaling pathways relevant to immune function. Human interferon-gamma (IFN-gamma) signals through a multimeric receptor complex consisting of two different but structurally related transmembrane chains: the high-affinity receptor-binding subunit (IFN-gammaRalpha) and a species specific accessory factor (AF-1 or IFN-gammaRbeta). The vaccinia viral interferon gamma receptor has been shown to be secreted from infected cells during early infection [2]. The structure has been halved such that the N-terminus of this family is now represented by Tissue_fac Pfam:PF01108. 20.70 20.70 25.70 24.80 19.30 18.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.26 0.71 -4.45 12 134 2012-10-03 16:25:20 2003-09-03 09:36:26 6 2 72 6 18 111 0 134.40 36 41.50 CHANGED YsptshCshhhhYsshhpascopph...pYslc-p.CspstCplshsop...pplCVos.Gspps..h...pT-tSp-VCls.h.....................ssth.VsshhlKphsDlpphpptls+slpp+hcppop.t+..phY.shlsshhtthlp..............................- .......................YccpshC.hhhlYss.hphscScEsh......pa.slc.-..DCspTtCplshsso...pchCVoApGsocsah....hT.hS.pEVCl...........................s.phsVhsCh..I+phpslpp.hp.ths+sl.hphhpp....top..........................tthh........................................................... 0 1 1 3 +6969 PF07141 Phage_term_sma Putative bacteriophage terminase small subunit Moxon SJ anon Pfam-B_15957 (release 10.0) Family This family consists of several putative Lactococcus bacteriophage terminase small subunit proteins. The exact function of this family is unknown. 22.80 22.80 23.10 28.50 19.60 22.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.07 0.71 -4.71 2 45 2009-01-15 18:05:59 2003-09-03 09:51:03 6 1 44 0 0 19 0 172.70 91 98.00 CHANGED MQTQpGGRPTILPKMYEEPLFSQIIDKIESGCNDREIYTSLHCSAKTFRKWRDDNIKAYDEAKGIARGNLLELAESALASKLTVRTLKETETIYDADGNVEKVKVKEKELDKDSLVAMMVAKAGNPELYNPTEWRRLQQEESSAHDLKAKIEELDDYKLSKYcTPcIEVPcGFE ...MQTQNGGRPTILPKMYEEPLFSQIIDKIESGC.ND.REIYTSLHCSAKTFRKWRDDN...IKAYDEAKuIARGNLLELAESALASKLTVRTLKETETIYDAD.GN.....VEKVK.VKEKELDKDSLVAMMVAKAGNPELYNPTEWRRLQQEESSAHDLKAKIEELDDYKLSKYETPKIcsPEGFE................................................ 0 0 0 0 +6970 PF07142 DUF1388 Repeat of unknown function (DUF1388) Moxon SJ anon Pfam-B_16000 (release 10.0) Repeat This family consists of several repeats of around 29 residues in length. Members of this family are found in the variable surface lipoproteins in Mycoplasma bovis and in mammalian neurofilament triplet H (NefH or NF-H) proteins. This repeat contains several Lys-Ser-Pro (KSP) motifs and in NefH these are thought to function as the main target for neurofilament directed protein kinases in vivo [1]. 25.30 25.30 25.30 25.30 25.20 25.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.43 0.72 -4.23 28 466 2009-01-15 18:05:59 2003-09-03 10:27:34 7 28 45 0 52 575 2 29.10 50 24.82 CHANGED PspsKoPuEsKoPs-sKoP..uEsKoPscpK ...............PscsKSPscsKSPscsKSP..scsKSPsctK...... 0 5 5 7 +6971 PF07143 CrtC Hydroxyneurosporene synthase (CrtC) Moxon SJ anon Pfam-B_16004 (release 10.0) Family This family consists of several purple photosynthetic bacterial hydroxyneurosporene synthase (CrtC) proteins. The enzyme catalyses the conversion of various acyclic carotenes including 1-hydroxy derivatives. This broad substrate specificity reflects the participation of CrtC in 1'-HO-spheroidene and in spirilloxanthin biosynthesis [1].\ This family also contains the members of the old Pfam family DUF2006. Structural characterisation of DUF2006 family member Swiss:Q82US3 has revealed a lipocalin-like fold with domain duplication. 26.20 26.20 26.50 26.30 25.60 26.10 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.18 0.70 -5.13 69 733 2009-01-15 18:05:59 2003-09-03 10:38:50 6 6 601 2 267 685 396 285.70 23 77.79 CHANGED hEWWYlsAphps....GpthssphshhtsshsP...................t.sstphhhspsAlh..........ssppphhhcchuRuu..lup........sssshpsh.......h-saphpups..................hslplstsutpht.........lclplpspss.hlh.sptGhoh+s.sts.....p.Auahau..tPhhplpGslsh.sut..php.........lp.......GpuWhD+pauo.......psLsps.suWDWhsh.....plss..Gps....LhhaplRppss.........................satsushhhss.Gps....h....s.ttlphps..hthsss.suhphPhtWplch.....ssps....lslphpslhpsu.h....ssshsYa......EGsltlptshs..........uhuahch.pGh ................................................................................................................................EWWYhsu.lp........stt....G.p.huhQhshhR.sht.....................................t.ssa.ts.plhh....uphul.s..........sttt...hhh..tp+huRsu.........hu.....Ash...........sspshphh.......lc...s.Wphputs.........................hshplph....ssp..shs...........................................lpLpl...ps...ptP....lhpGppG.hshKs..s.s........t.ASaaao........hPhhp.l...p...Gslsl..sup.......shp..........................Vp..................G.p.u.WhD+...EW.......uo..................phL..s...sst...p.GW.DWhsl.....pLss....Gpt......Lhha.p.lRppss.........................sh.htushh...ss..s....Gps......l.....tspplphps.........h.....phs..........s...........sst.p..hPh........t..Wplpl....sshs.........lslplpsl..tst.h........shth..sYW........EGs...lt.lp..G.sh.p..............uhGahch.oGY............................................................................................................ 0 67 142 211 +6973 PF07145 PAM2 Ataxin-2; Ataxin-2_C; Ataxin-2 C-terminal region Albrecht M, Studholme DJ, Vella Briffa B anon [1] Motif The PABP-interacting motif PAM2 has been identified in various eukaryotic proteins as an important binding site for Pfam:PF00658. It has been found in a wide range of eukaryotic proteins [1]. Strikingly, this motif appears to occur solely outside of globular domains [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.47 0.73 -6.31 0.73 -4.08 53 834 2009-01-15 18:05:59 2003-09-03 11:18:39 10 35 156 12 410 759 0 17.60 49 3.57 CHANGED stpSpLNPsApEFVPsst .....hpSpLNPNApEFlPss.... 0 62 129 229 +6974 PF07146 DUF1389 Protein of unknown function (DUF1389) Moxon SJ anon Pfam-B_16027 (release 10.0) Family This family consists of several hypothetical bacterial proteins which seem to be specific to Chlamydia pneumoniae. Members of this family are typically around 400 residues in length. The function of this family is unknown. 21.50 21.50 21.50 30.10 20.90 21.40 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.86 0.70 -5.37 15 75 2009-01-15 18:05:59 2003-09-03 11:21:53 6 2 13 0 20 46 1 296.70 27 70.81 CHANGED shslslslSlllsuluhslhlhshhpthh.............ptpsIPpG...........................FpplI+cpYPcslaclVhpppLolpElRhllsuL...........ppshsphspcL+pKlcsF.Gl-pLpsshcstcL.............ssL-clLlcNCPLYWLp+F..IclGs+phs.........pssp........hpsssYWlu+lGh.......usstsTIFs.psalluplloc-EYphLhsHA+NsTWspsc..VpslppRl.hshlspthtttpcpspsplsp.....hhpcth.shLLtLCpHGlSW-QLQLlcplsscphsFLstl...-pup.hsstlpphshuhhs.t........alsEss.p.F-PsluLhTacEa+ ........................lshhluhhhssluhslh.hshht.hh...............p.IPpu...........................applIpppaPpslhshlhpppLolpElRhllpsl.............psshsp....hsppL+p+lpsF.GlcplpsshpuhsL.............ssL-slLlppCPLYalp+F..IphGs+pl..........ptpt........hp.ssYWlschGh.......ssstsTIFp.hsalluphlSccEY.hLhp+Acss...s...Wsptp..VsulppRl..t.htphhh...ptcpshsphhs.......hhpc..tphLLhlCpHGlSWEQLQLl+pls.hcshtaLstl...-pus.hsttlpphhhshhs...........ahsppspp.a-spluLhTasEh........ 0 0 0 18 +6975 PF07147 PDCD9 Mitochondrial 28S ribosomal protein S30 (PDCD9) Moxon SJ anon Pfam-B_16045 (release 10.0) Family This family consists of several eukaryotic mitochondrial 28S ribosomal protein S30 (or programmed cell death protein 9 PDCD9) sequences. The exact function of this family is unknown although it is known to be a component of the mitochondrial ribosome and a component in cellular apoptotic signaling pathways [1]. 25.00 25.00 28.20 25.70 24.20 24.00 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.37 0.70 -5.70 14 222 2009-01-15 18:05:59 2003-09-03 11:37:19 7 2 90 0 141 204 0 322.20 25 75.44 CHANGED Mushpt...hp.l.............ttt....s.p.pp.ss.....sssshYPPIh.opshps+us+p+phcpa...................httlptssslcEKl.hhsthph................+ahsa......PpshshNu.caaQahTKTsh.suLP...s.t...............................................ptp..shschhhutlcshhpthhLQcpaappccc..........cppchsst.hLspLspslhshLupp.stLspsplDhsPplphaWsRGtph.s.........+shp+th.cshcaQlDD.............+PthplRhpppLs.ahs.-tp......hstclPshc.........acPptLsLapcpacsth.ssuhhs...ssssaGhspFphhschhpR...........cp....s.tsQhEs.h+ApuIhShFAWhsAQA.hYpGFhstsDlTcPhsoQsVITDG+haSFaCYQLNTLuLsspsst.sNsRpNhsWGTpuh.LY-p...............l.pcsclhGaN-.....sslppllpFhlNps .......................................................................................................................................................................................................................................................................................................................................................................................p....hps...h.hhT+Thh........t.L.P.............................................................t...t.hp.h........h...t.................................p......hltpl...hht.h....p...........s.Ltt.t..h.s.p....hthhW.htt...................................hQhps....................pst.tlphtp.Ls.hhs.p............hsh.l..sh..............................s..shls.l.ppp.h.p...h..suhh...........s....hsas.as..ahhh..s.t.....hR...................................................tt.ht..s.h+ApsIhtsFuhshA....pA..th..G...........hps..ps...ls.pPhlsQuV.hTDG+hFpFhsaQLNTlsLss.....ss....s....hKNlsWss.p..sh.LYpp....................................l..pss...t...hhshps.....cshhphlthhh............................................................. 0 45 57 101 +6976 PF07148 MalM Maltose operon periplasmic protein precursor (MalM) Moxon SJ anon Pfam-B_16111 (release 10.0) Family This family consists of several maltose operon periplasmic protein precursor (MalM) sequences. The function of this family is unknown [1]. 19.50 19.50 20.60 19.60 18.10 19.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.33 0.71 -4.71 36 702 2009-01-15 18:05:59 2003-09-03 11:41:22 7 2 687 0 64 315 6 135.60 62 45.04 CHANGED sstpShhAAaplP..uspGshplpLoShl..spslFsPsVllLDpphpssphhssspFpYp.sshhsssRlpuplpls....s.uppphYlllYToppDLstoTpl.cP..AKshAcuhGpshPtlsDssstHossGplclplp ..............................sGIoGPVAAYSVP..ANI..GELTL...TLT.....SEV....NKQs....SVFAPNVLILDQNMTPSAFFPSSYF..TYQcPGVM.SADRLEGVMRLT...PALGQQKLYVLVFTTEKDLQQTTpLLD.P..AKAYAK.GVGN...ulPDIPDPVARHToDGllKLKVK.............................. 1 8 22 46 +6977 PF07149 Pes-10 Pes-10 Moxon SJ anon Pfam-B_16134 (release 10.0) Family This family consists of several Caenorhabditis elegans pes-10 and related proteins. Members of this family are typically around 400 residues in length. The function of this family is unknown. 21.10 21.10 22.50 22.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.19 0.70 -5.69 5 17 2009-01-15 18:05:59 2003-09-03 13:21:49 6 2 5 0 17 15 0 278.90 21 92.35 CHANGED MNcTEYphHLLARTI+TGcDEhITPlIpQLs-hpVSMDILEKHNlPsLVscaAPaNpsApSLu+slLsWKN-clppEKPplLKcFsctst-c+aP-pFV....L+LLTuLMsF-DL-LV+ssFcILsph-LoL--aEcaGIaE+AtpFQtQhcEADELIsKl-..hLpsEhLEEsEpEpptsclhssh-cc...................u-sESGlFT--E..........cccspcp.llhEICMlaLApCIKoGNs-VISuAIpFsGsacaPLALYRKYDIQhLIYsaGs+s-DAcpLhDaIEclEclElssE+lEAFK+Fl+..sshEp.csVTDSVMolLpGFL........E-sDDahVcsTLcFFLsMPISL-QFcps+VEssLcNLEptssupLullLKhKIp-L+p ...........................Msthph.h.hLA.hltst..p.t.Io....hhp.......L.s.h....hchh-ptNhPhllt.....p.ss....A.ph...hh.hK..phtpE...hhptFht....hh........tt.h...............lpLhh.h........t..p..hhp.sh.hl.t..h.ht.h....h...h.....ph.-t..lh.p..............t.......-..............................................................................................-l.hh.hht.lpstspthls..............hulph...htt.phsLplhpKY-IttLlh.ths.p.pptA.p....Lh.pIpphpp.tht.pp.phh..hhp........t...sshht.h..ah..................................................................................................................... 0 6 7 17 +6978 PF07150 DUF1390 Protein of unknown function (DUF1390) Moxon SJ anon Pfam-B_16182 (release 10.0) Family This family consists of several Paramecium bursaria chlorella virus 1 (PBCV-1) proteins of around 250 residues in length. The function of this family is unknown. 20.00 20.00 20.20 20.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.53 0.70 -4.99 25 54 2009-01-15 18:05:59 2003-09-03 13:27:10 6 1 10 0 3 52 2 190.20 31 82.66 CHANGED csaphslYpCuCGYcTsssuNAoKHKKT.uCsHchpscpc.cFVLcEDh...phtsspshsstshsssss.ssshsphlsp.psIsIsLslP-poshssIh-slps.phhpElcsu.-PppIPAlLF+aTRGht.....utpphI+Y-sDKslVpcKDP.sG+-sspcLKKY+scYls-sssla-cshclsahPpplpcsh+-hppPphssG+KKsc.lsuA-slKhsAoGsHhhYKhPtE ...chaphplYhC.s..CGYcThspusAs+HpKo..pCtcc.hhpppp.pFlhcc-h...t...stsh......t.sshtt...s.pp.lp...phshsLhlP-tshhpslhchlps.ph.p-lpst.pspphPullachT+u.t.....tt.thlphcsDK..lhctc..sGp.htpslpKhtpcah.cssshhppsh.l.h.sp.hpp.hc-hppsphs.G+K+sp.lsuu-sl+.hAsGsHhhYKhPhE................................ 0 1 1 3 +6979 PF07151 DUF1391 Protein of unknown function (DUF1391) Moxon SJ anon Pfam-B_16216 (release 10.0) Family This family consists of several Enterobacterial proteins of around 50 residues in length. Members of this family are found in Escherichia coli and Salmonella typhi where they are often known as YdfA. The function of this family is unknown. 25.00 25.00 32.20 76.00 22.70 21.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.54 0.72 -4.47 4 588 2009-01-15 18:05:59 2003-09-03 13:43:20 7 1 261 0 3 201 0 48.70 87 78.29 CHANGED pplDLGNNESlVhGVFPNpDGTFTAMTYT+SKTFKTEuGA+RWLtRpss ..DTIDLGNNESLVCGVFPNQDGTFTAMTYTKSKTFKTEsGARRWLtRNo.s........ 0 2 2 3 +6980 PF07152 YaeQ YaeQ protein Moxon SJ anon Pfam-B_16245 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length which are often known as YaeQ. YaeQ is homologous to RfaH, a specialised transcription elongation protein. YaeQ is known to compensate for loss of RfaH function [1]. 25.00 25.00 28.00 26.10 22.10 21.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.87 0.71 -4.94 64 1039 2012-10-11 20:44:44 2003-09-03 13:49:14 7 2 1018 5 215 584 86 173.00 52 96.12 CHANGED MAL+uTIYKsplpluDhDRsaYpshpLTlApHPSET-ERMMlRLLAaALpAs..E.p....LpFTK.GLS.ssDEPDLWpKsLs.scIplWIElGpPDEcRl+KAss+AccVhlasY.upssplWWpptps+lsphcNLsVhplspsthpsLupLspRsMpLploIp-Gplalosspp..s.lpls.ph ...........MALKATIYKAslNlADhDRsha.hDtuLTLARHPSETpERMMlRLLAahhaAc.....ER.........LpFT+.GLs.s-DEP-hWh+s.c.huI-LWIELGlPDE+RI+K..ACs...pA..tc...Vs.LasY..suRAAplWWpQspuKhspasN....LoVahLD--plupLushA-RTMsLQsTIQ.DGslWLSDscs.s.lElphps................. 0 39 107 166 +6981 PF07153 Marek_SORF3 Marek's disease-like virus SORF3 protein Moxon SJ anon Pfam-B_16263 (release 10.0) Family This family consists of several SORF3 proteins from the Marek's disease-like viruses. Members of this family are around 350 residues in length. The function of this family is unknown. 21.90 21.90 22.40 28.80 15.70 21.80 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.12 0.70 -5.77 3 18 2009-01-15 18:05:59 2003-09-03 13:53:59 6 2 8 0 0 14 0 263.20 39 91.13 CHANGED MSRusAslFDDMDIPRGRFGKPPRKITclNFWslLsDELTsGIVQCMESRERLALlHTsSsN-Go.hSFDIHKDMWCQMVLWSAYRFFSTM-+MFSI-oIoNFs-TDVsEoGpW+IaCRTWDlRDAsKMKhlGPFLPALFSFHLENWTTMLSIGIsKGYDRHNsRshFMshPShKNVLlGAlEVARaAVVLoLPICEYRTPhGLPDDpIGNAIKLCCAQMQANRLEcTGIopDutpKsNDuSEEELYYRslH-LVKouREHCcSsE......sshDlsPTI+cpp.psIphphsSsh.hGhtshuR.hNsGch+Yp+.plhRNhPlRVPRSRLuNSKILQTFRcshsRSsha...l ............sRs.AhhFsshDhPRGRFhpP.h+.sshsaW.hhsDEhspGIhQChEuRERlu.lp..p.spG...paD.phDMWsQhslWSsY+hhthhp+.FSlcplh.hscpslstsu.ath.hpsWDlRDusKh+hlGPhL.AhFShHlENWTshLSIuhstG.asppsphs.hMsh.us+.sslhsuhEVAR.hlVLsLPls-YRsP.GLPDDshGNAI+sCCApMQtpRLpcsths.D....hpsssEEEhYYRslpc....lIpstRcas.ssp.......................................................................................h.............................. 0 0 0 0 +6982 PF07154 DUF1392 Protein of unknown function (DUF1392) Moxon SJ anon Pfam-B_16270 (release 10.0) Family This family consists of several hypothetical cyanobacterial proteins of around 150 residues in length which seem to be specific to Anabaena species. The function of this family is unknown. 25.00 25.00 172.30 172.20 17.70 17.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.88 0.71 -4.35 7 15 2009-01-15 18:05:59 2003-09-03 13:56:02 6 1 3 0 11 15 0 152.00 43 98.45 CHANGED Mss.IspLEpCWYlSPPWGpphsPltlsLhE+VYLpos+ohGYCCGVpWppDtW.Ysl.scpshlpsscppIIupGphpshslpKPtFtLG-hV.hcFsscusKpRlILGltLlpsuWhYhVEhhSPsL..............stttsh.pRhuhVp-cDLVpV MhstIssLEoCWYlSPPWGpplPPltlsLlE+VYLpos+ohGYCCGVpWpc-sWhYsIhsss-IlasocspIIuTGplpsholpKPsFtLG-hV.lcFts-uPKpRlILGl.LlcpsWhYtVEhtSPsL..............opsssh.sRhuhVs-tDLVcV...................... 0 0 6 11 +6983 PF07155 ECF-ribofla_trS DUF1393; ECF-type riboflavin transporter, S component Moxon SJ, Eberhardt R anon Pfam-B_16301 (release 10.0) Family This family is the substrate-binding component (S component) of the energy coupling-factor (ECF)-type riboflavin transporter. It is a transmembrane protein which binds riboflavin, and is responsible for riboflavin-uptake by cells [1,2]. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.18 0.71 -4.53 62 2634 2012-10-03 02:46:00 2003-09-03 14:01:05 7 8 1367 0 389 2679 75 161.70 23 88.03 CHANGED p..ss+plVhhulhsAlhhllsh.h..lpIPss..so....lplspuhlhlhAhlFGshhGhlsGhlGtsLhDhhsG.Ys.hWhsa.llscGlhGhlsGhhspph..php.......hphhs..llhhslhhlhuhhluhsll..................uhtshltulhuslh.pslsuhlluhhLh..tsht+ .............................t..ps+plshhulhs..Alsl.l.lsh....h.......hp...I.PhP................lp..l.s.s.u.h..l..h..lhu..hla..G...s.h..hGh.......l.......l.......Gh.......l.......u...t.h...lt.....D.....h............h....s.G.....Ys....hW...h....s.......a.......p......lls.....p....G.........l..h.....G.....h....l.........s.........G.....h.....h..t.p.p..h.......ph............................p...h.hs.........hl....h....h.s....l....h..t..l....l.u....h..l...s..tslh..........................................t.ss..h.t...s.....h.h...t.....s......l....h.u.....s....lh....ps.l...s....shl....luhlLh..sht......................................................................... 0 159 250 313 +6984 PF07156 Prenylcys_lyase Prenylcysteine lyase Vella Briffa B anon Pfam-B_12448 (release 10.0) Family This family contains prenylcysteine lyases (EC:1.8.3.5) that are approximately 500 residues long. Prenylcysteine lyase is a FAD-dependent thioether oxidase that degrades a variety of prenylcysteines, producing free cysteine, an isoprenoid aldehyde and hydrogen peroxide as products of the reaction [1]. It has been noted that this enzyme has considerable homology with ClP55, a 55 kDa protein that is associated with chloride ion pumps [2]. 21.30 21.30 21.30 21.40 19.80 21.10 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.08 0.70 -5.57 5 325 2012-10-10 17:06:42 2003-09-03 14:06:43 9 14 182 0 225 329 7 298.00 29 68.63 CHANGED FEEosWalINllKLVWRYGlSsLRM+MWVE-VLDKFMRIY+YQuHcYAFSuVEcLL+ulGtsDalshlppTLpEsLpcAGlSppFlNEhVusVhRlNYGQSsDINAFAGAVSLuuAsuuLWSVEGGNKlVCSGLLptSKuNL.IsuoVpSI.....T+aosNspchYpVsYKssstspSDFYDIVVVATPL..DcshSNIT.FtNFcPPI--hpspYQpTVTTlV+GcLNooYFGs+PhDpFsLusILTTDDuslFhssluIl.....sShRcK.....sscGstVWKlFSRcsLo+sQLccLFpSYshsV+KsW.tAYP+YcsPpppPsFILHD.+LYYLNuIEsAASsMEhSAIAA+NlALLAYcRWNscpDh..IDQcsLhc+LKTEL ...................................................hhshh.+lhW+Y..G.h.t...h+.................hp.hhc...phlpc..Fh.+.l...Y..c..............t...............h.sFps.l.pphh.sh...........G........hhthhptohtc....L.....p.s.G...h.......s.p...pFhp.-lltsshRlNYG.Q.s...s.lsuh.............s..Ghho.hs..s.u.t.s.uhhuVcG.GN.h.l...hpthlp.....t.op........u........sl...l.p.s.p.Vtpl.......................p.ss...s..st........h.tl...........h....p.........s......t.......t.....t......t......t.t..p........h..a....Dh....VllAsPh...p.t.....lt.........s..hp....s.h...p....................a.phhsT.h.h..p...u......l...s..s...phFsh...s..p..hs....t...slhTs..........t.......s.........p........h............h...s.........shh..........................p......................pt.thlaKlFS.p.lp...pp...............lppl.............F......................t...........h........................................s....................p.......h.W............u.......YP..h..h..t...s...........p...h....................s.hhL...ts.....t............l.....aYhsuhE........hh.SsMEhsuluucN.sAh.Lhh.pph...................................................................... 1 73 112 170 +6985 PF07157 DNA_circ_N DNA circularisation protein N-terminus Vella Briffa B anon Pfam-B_12343 (release 10.0) Family This family represents the N-terminus (approximately 100 residues) of a number of phage DNA circularisation proteins. 19.90 19.90 19.90 20.10 19.10 19.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.94 0.72 -3.85 9 365 2012-10-10 17:06:42 2003-09-03 14:09:03 7 3 318 0 54 329 4 93.30 41 21.62 CHANGED h+cthtcASFRGVPFhlEc-p.tssGRRl.hHEYPhRDpshsEDhG+phpphplouhllGcDhhspR-+...Ll-AL-psGsGpLVHPhaGphpVpl ...................................WpcpLtcASF...RGVPFhV.psp..sssGRRlthHEYPhR..Dp.sasEDLG+tspphslsAhll.......Gc.......D.......hhspR-.c...LlsALc........psGsGpLVHPhaGchpVtl................. 0 13 25 43 +6986 PF07158 MatC_N Dicarboxylate carrier protein MatC N-terminus Moxon SJ anon Pfam-B_16346 (release 10.0) Family This family represents the N-terminal region of the bacterial dicarboxylate carrier protein MatC. The MatC protein is an integral membrane protein that could function as a malonate carrier [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.64 0.71 -4.39 5 578 2012-10-02 15:12:49 2003-09-03 14:11:11 6 4 502 0 89 495 47 146.70 43 38.33 CHANGED MssELloILlLllsFlIATspsINMGALAFAuAFllGolllGLcscElLAGFPuDLFLTLVuVTYLFAIApsNGTlDWLV+sAVRtVRGRluAIPWVMFllAuLLTAhGAluPAAVAIlAPVALoFAs+YRIcPLhMGLMVlHGAQAGG .........................................................h...pllhl.Al.sluIuIG.a..h.pKIN.IG...llA..Is..Fu.Y...lIush.h..M.........G.l.sP.........K..-..l..l....p.......h..W...P..s..u..l..F...FsIhu..V..SL..F....as.h...A.psN.GTL-hLAp+llYRs.Rs.+..Ps.h.L....P.hll...alhusll.oALGAG..aas..s..hA.l...hs....P..lAlhlCpKh..s.hsP..L...lGAhsl.saGApuGu......................... 1 27 59 69 +6987 PF07159 DUF1394 Protein of unknown function (DUF1394) Moxon SJ anon Pfam-B_16260 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 320 residues in length. The function of this family is unknown. 20.10 20.10 21.10 20.20 19.30 18.40 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.84 0.70 -5.30 7 319 2009-01-15 18:05:59 2003-09-03 14:55:21 7 7 109 0 197 239 1 249.30 46 56.05 CHANGED tsphFLDFENAQPTEsE+ElappVstVLpcu-ulLt-LpuY+GAupEIR-AIpNPss.phQEcAWpuVsPLVsKLKcFYEFSlcLEpsL.pLLtuLs......uss.sPppHLE+cQALAKQFAEILHFTL+FDELKMpNPAIQNDFSYYRRTlSRpRlsNh..-sEscVsNEhANRMSLFYAcsTPMLKsLScAToKFVSENKoLPlENTTDCLSTMAsVC+sMLEsPcapSRFps.EETlhFChRVMVGVIILYDHVHPVGAFsKoSpIDhKuCIKVLK-QPssosEGLLNALRYTTKHLND-oTsKpI+ ..............................t.phFlDFE.....sApP.o-t.Epplaspl.shVLp-.upslLt...................cLpsY.+.GA.uppI............R..c....A..I..p..s..P..sc..........plQ..E+AWsuVsPLVsK..LKcFYcFSh.+...L.-pslpsLLttLs......ss.h.s.s..ppaLE.cpQALsK....phAEILcFsLpFDELK...MpsPAIQNDFSYYRRsl...S..R....+...l.....s..........s..................-....-.spl.....ssEhAN+...MS.LFa..ApuTPM.LK..sLSpAT...cFVs.-..sp.s.sl-ss.o-sLuThs.s.VCthMlEsscahsphps.p-oh.hshRVMVGhlIL..YDalcs.GAFs+t.u.plch+....ssl+lL...p.t.ps...................tspsLLssL+..aoopHhs.p.sTsttl............................................................................................... 1 66 85 147 +6988 PF07160 DUF1395 Protein of unknown function (DUF1395) Moxon SJ anon Pfam-B_16376 (release 10.0) Family This family consists of several hypothetical eukaryotic proteins of around 250 residues in length. The function of this family is unknown. 25.40 25.40 26.30 36.90 25.10 25.30 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.54 0.70 -4.87 6 100 2009-01-15 18:05:59 2003-09-03 14:57:27 7 3 84 10 70 109 0 220.70 30 87.04 CHANGED +ls.l+Ell.hRshh.sPshcsTLSsIssslsulchLLpchcp.lpppcps.sptKcLhpsohcpppch.tlctpsPsphs.csp.ssppssspsLlpcst....t.........cEpu.sppP.+ps...+.h.aITs-EFcSlPsYM+GRLTL-plNthlschssalsuKs+IlupspKpLocssRElhpcaR-lEsKsst+..G+aFFlEsDIKshssLKLDpoFtshlssLRHlpRlpEsRsGsLT .........................................................................................hpc.l.............ss.hps..Lptlspp...lhslpphLpphchplp.pc.p.t.sphKcLhpshppp...c.cl..pclppp.lP...spl.....P...................ts....ps.s..psh.h.p..t.st.ts.pt.....................hpsp.p.t...spcs.+....ct..c........lt......ph.a......ITs-EFsulPpYM+GRLT...h-plNssl.p.-l.Nps.lhuKY+ILppP.+...K..s..hs.ss.s+p.......hhpca.h.-p.E..s..K-op...G.paFhlEsDI+ch.s......slK.lD.+p..h+.slLslLRHhpRlpElRsu................... 0 18 30 50 +6989 PF07161 DUF1396 Protein of unknown function (DUF1396) Moxon SJ anon Pfam-B_16343 (release 10.0) Family This family consists of several putative lipoproteins from Mycobacterium species. The function of this family is unknown. 24.00 24.00 24.00 24.00 23.90 23.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.03 0.71 -4.64 17 288 2009-01-15 18:05:59 2003-09-03 14:59:51 8 1 134 7 53 132 1 191.50 36 78.19 CHANGED Dsstl..lpcuspsT+slpSsHlsloVsGp.....lssL.slpol-GDlTssPtss..ApGssplhht..uttl.sscFllsDuslYssls..ssasshGsu..tslYDsuhl..LsP-pGlusVLu.shssspspGcEslsGpsTs+l....oGslsAssVstIsPs..hsus.tslPsTlWIs--..................sstpLspstl.chssGss....VplTlScWscsVsV ...................................................................................................-At.L..lcpuscAT+slpSs..HlslsVsGc..............lssL.....slpo...l..-uDlo.s..sPt.s...ApGssplhht......sssh...sscFlVh.s.splYsc...Ls........ss........aosh..Gsu........tsl...Yssuhl......LDPs.pG...........L.........uslLu.slp.ssps.pG..p-sID.Gl...sT.s.+l...............oGsls.Ass............lst..ls...Ps...............hsp......s...t.lPsTVWIsps..................Gs.pp.LVphpl...-hspGs...........VplThScWGc.Vsl.............................................. 0 18 43 50 +6990 PF07162 B9-C2 B9; Ciliary basal body-associated, B9 protein Vella Briffa B, Coggill P anon Pfam-B_12595 (release 10.0) Family The B9-C2 domain is found in proteins associated with the ciliary basal body. B9 domains were identified as a specific family of C2 domains [1]. There are three sub-families represented by this family, notably, Mks1-Xbx7, Stumpy-Tza1 and Tza2 groups of proteins. Mutations in human Mks1 result in the developmental disorder Mechler-Gruber syndrome [2]; mutations in mouse Stumpy lead to perinatal hydrocephalus and severe polycystic kidney disease [3]. All the three distinct types of B9-C2 proteins cooperatively localise to the basal body or centrosome of cilia. 20.80 20.80 20.90 21.10 20.70 20.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.95 0.71 -4.47 45 444 2012-10-10 12:23:49 2003-09-03 15:04:25 6 10 128 0 283 416 10 153.90 27 49.58 CHANGED clhlhGpI.uAp...sapps.sL..............as+atlhtGs.sWp...........hlsGhp...................pGtTQhshsss...schsh...............asaPh-lphpsps.pG..................................WP....pLhlplau................hDshG+.ppltGYGhstl......Php.sG.pHplplshW+.P.......hsohhppLpphhlGusPpLpc..shlts..........spsRhtl+oco.sGpVplplsllh .............................................................lhl.Gpl.sAp...thp..s..sL..............as+atl.hGt.....s.......Wp............hsG.p.....................pGhoQhsps....pt......s.phsh...................asaPl-lphtsps..G..................................WP......plhhpVhu.....................D..a.sp.phltGYGhshl......P.......h.......s..PG......p..........Hph........ph...s......hapP..............................ss..h..hpphtp.....hhhGttsphtc..phltt....................t.sR.h.hp.sts..tG..lphphphh.h....................................................................... 0 122 153 228 +6991 PF07163 Pex26 Pex26 protein Moxon SJ anon Pfam-B_16379 (release 10.0) Family This family consists of Pex26 and related mammalian proteins. Pex26 is a type II peroxisomal membrane protein which recruits Pex6-Pex1 complexes to peroxisomes [1]. Mutations in Pex26 can lead to human disorders [2]. 25.00 25.00 41.60 29.00 22.30 20.50 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.93 0.70 -5.52 3 66 2009-09-11 08:51:28 2003-09-03 15:14:33 7 2 40 0 27 66 0 266.40 54 93.40 CHANGED MKSDSSTSAAPLRGLuGPLRSSEPsLAlPAsuPAVcLLEEAuDLLVVHLDFHAALETCERAWQSLssaA.AEEP.uGTplEVKCSLCVVGIQALAEMDRWREVLSWVLQYYQVPEKLPPKVLELCILLYSKMpEPGAVLDVVuAWLQDPuNQulP-YGoLAELHVhRlLLPhG+LSEAEELl...VGSAAFoEEQRl-ALQAIHsARQQpspppoQEHSsSp.......EsQKlspEGSlSpKLLSLlMLLRRLWuSAVSHlhSlPFRKuLLAALILCLLIlRFDPAuPSSLPFLY+LsQLFpph.pAshGRLYhLAsRS ...................................M+SDsSsSuAsh+GhsGsLRSSEPstssPshusAV...sLLEEAADLLVVHLDF+AAL-TCERuhpSLss.s.sEEs.suoslEVKCSLCVVGIQALAEMsRWpEVLSWVLQYYQVPEKLPPKVLELCILLYSKhpEPuAhL-VsuuWLpDPuNQsLPE.YsuLAEhHl.+VLLPLGploEAEELl...VGSsAFsEEpRhssLpslcpuRQQ......ppppcs..uo-.............Espc.sp-...G...uhspK.h..Lul.hLlpphhssuso...+hhShPF++uhLAALlLsLLllRhDP.AuPSSlsal.+Lh...pLhp.h.tuh.u.h.............................................. 0 4 6 13 +6993 PF07165 DUF1397 Protein of unknown function (DUF1397) Moxon SJ anon Pfam-B_16395 (release 10.0) Family This family consists of several insect specific proteins. Swiss:Q25513 is annotated as being a haemolymph glycoprotein precursor. The function of this family is unknown [1]. 24.90 24.90 24.90 27.40 22.10 24.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.74 0.70 -5.14 17 111 2009-01-15 18:05:59 2003-09-03 15:42:10 6 3 35 0 77 116 0 191.90 28 72.67 CHANGED -l..p.s..php..-lpchlcspCpcss.....GsDp...happlEpusps.hscClpGllDhsslppEIccApPsG-LDpVFsKYCt+tspuhpClsshpstlpPCL-p--ppphsshhshhppLLsFlCaK-GDpIALFIAEpGPEChpppp-sltpChspshsua.......sshp.up.hshsc..hu.pQCs-hsphpsChlccLEpCpssTPuNlsEShF+alR+toPC ........................................t..phtphpphhppcC.css........usst....hhpplppuhhp.hspClpshhs..hsthptEhpp.spPpGs.LDsVFpK.........YC..+hs.p.uhpClpsFsstlpsCLsp.-Eppt...s..hhhplhpplLsFlCh+sGDpIAl..FlucpGsEC.....hppp.pcslt.pChs..pshsth................psh.phs..p........thsp......hs.cpCs-ltphcsCllpc.LE.p.Css.s.suNlh-uhFchltptosC.................. 0 23 31 67 +6994 PF07166 DUF1398 Protein of unknown function (DUF1398) Moxon SJ anon Pfam-B_16404 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 130 residues in length. Members of this family seem to be found exclusively in Escherichia coli and Salmonella species. The function of this family is unknown. 20.80 20.80 21.00 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.59 0.71 -4.23 6 734 2009-01-15 18:05:59 2003-09-03 15:46:05 6 1 607 1 25 177 1 121.40 41 91.93 CHANGED QlllFpchF-pVRpDhNaphFYSELKRHNVSHYIYYLAT-Nl+IVhcNDssVLlKGt+pllpV+ho+NppLIcsost+ahStEloFpcYpcsLAsAGVFRWITslcEpKRaYYohDNoLLapEsl ...........................................hhhpchF-pVRp-.NFshFa...p-LKcpsluaYIYalATsNl+Ilpcs-sslll+us+s..llpVssopNpshIcpshttHhsGcpoFcpYCssLApAGVF+WlsDlppppRpYa.shDNpLLahEsl........... 0 11 15 22 +6995 PF07167 PhaC_N Poly-beta-hydroxybutyrate polymerase (PhaC) N-terminus Moxon SJ anon Pfam-B_16456 (release 10.0) Family This family represents the N-terminal region of the bacterial poly-beta-hydroxybutyrate polymerase (PhaC). Polyhydroxyalkanoic acids (PHAs) are carbon and energy reserve polymers produced in some bacteria when carbon sources are plentiful and another nutrient, such as nitrogen, phosphate, oxygen, or sulfur, becomes limiting. PHAs composed of monomeric units ranging from 3 to 14 carbons exist in nature. When the carbon source is exhausted, PHA is utilised by the bacterium. PhaC links D-(-)-3-hydroxybutyrl-CoA to an existing PHA molecule by the formation of an ester bond [1]. This family appears to be a partial segment of an alpha/beta hydrolase domain. 20.50 17.00 20.50 19.00 20.40 16.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -10.98 0.71 -4.83 37 1217 2012-10-03 11:45:05 2003-09-03 16:00:08 8 8 704 0 326 1286 296 145.90 45 28.33 CHANGED .pD+RFusssWppNPhachltQsYLlsu+hlpphl-ps-..lDscs+pRscFhscQhssAhuPSNFlhoNPpsl+cslposGpSLlcGhppLhcDltputu..plppoDpsuFpVG+NlAsTsGpVVacN-LhpLIQYcPtTEpVac+PLLlVPPhINKaYILDLpPpNShlcas .......................................tD+RFts.sWptpPhaphhhptYLh.tchhtphhpshp...ls.ppcp+htFhhpph.hsAhuPo.NhhhhNPt.hhcphhpotGpsLhpGhtphhpD...l...tps.t......h.p..ss.t.psFpl..G...c.....sl..A...s...TsG.pVVacNclhpLIQYp.P...h..T.......c....p....V..h.t.p..PlLIVPPhINKaYILDLpPcNShl+ah................... 1 81 187 258 +6996 PF07168 Ureide_permease FAE_3-kCoA_syn1; Ureide_perm; Ureide permease Vella Briffa B anon Pfam-B_11634 (release 10.0) Family Heterocyclic nitrogen compounds may serve as nitrogen sources or nitrogen transport compounds in plants that are not able to fix nitrogen. This family represents ureide permease, a transporter of a wide spectrum of oxo derivatives of heterocyclic nitrogen compounds, including allantoin, uric acid and xanthine; it has 10 putative transmembrane domains with a large cytosolic central domain containing a 'Walker A' motif. Ureide permease is likely to transport other purine degradation products when nitrogen sources are low. Transport is dependent on glucose and a proton gradient [1]. The family is found in bacteria, plants and yeast. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.11 0.70 -5.76 4 165 2012-10-02 19:55:49 2003-09-03 16:15:55 6 4 88 0 57 194 24 229.70 39 66.75 CHANGED MlLuLhFLGTWPAlLTLLERRGRLPQHTYLDYolTNLLAAllIAhohGEIGtspPscPsFhTQLsQDNWPSVLFAMAGGllLSlGNLuoQYAaAFVGLSVTEVITASITVVIGTTlNYFLDs+IN+AEILFPGVuCFLIAVFLGSAVHuSNAuDsKpKL......pshps.pp.tolpshEhcsh.....ppp-LEpGc..s-pscsGTAsFllELEc+RAIKVFGKSphIGLslThhAGlCFSLFSPAFNLATNDQW+TLc+GVP+LsVYTAFFYFSISsFllulILNlpFLY+PllGLP+SSlKAYlsD.sGRtWAlLAGhLCGFGNGLQFMGGQAAGYAAADAVQ .............................................................hlohlhhGoWsshhpLhp++tRh.ph.hYhDYols.lLsullhAhThGphG....ps.sFhspL..s..Q..sshsSlhhAhhGGllhsluNlhhphAhAhsGhSVs.slu..sulslVl.GsslNYhh...s.sph..scs....lLFsGVushllAl.hl.s..uh.sat............................................................................................................................................................................................................................................................................................................................................................................. 0 18 39 47 +6999 PF07171 MlrC_C MlrC C-terminus Vella Briffa B anon Pfam-B_6316 (release 10.0) Family This family represents the C-terminus (approximately 200 residues) of the product of a bacterial gene cluster that is involved in the degradation of the cyanobacterial toxin microcystin LR. Many members of this family are hypothetical proteins. 25.00 25.00 30.90 30.80 20.10 19.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.84 0.71 -4.71 15 567 2009-01-15 18:05:59 2003-09-03 17:08:38 7 6 358 1 215 590 412 175.50 31 35.98 CHANGED llADstDNPGGGusGDsTtlLcullc.Rshpss......ulusIa.DP.AVptstAAGtGAslsL+lGGKhustuupPlcscspVp+Lscsshps....htss.hslGssAsl+lt..G..lclIlsosRsQsa-.shFpslGl-PtspclLVVKSusHF+AsFtPlAppllhstuPuuhssDsspLsa++l ........lluDstDNPGuGusuDsThlLctLl.c...p.s..h..pss...............shuslh.DP.t...Asp..tshsA....G...h..........GApl.slplGu...+.....s....s....s....h....s....u....t...Pl..c.spspVtt..lscsthhs.........hts..t.h.p.hGssA.slcls......G.....lc....lllsop+p.phh-.shFpthGl.-Ppp.tc..llVlKSs.aa+u......sFts..lA..s....t.ll.hstuP..Gshs.p.Dhsplsap+................. 0 35 109 166 +7000 PF07172 GRP Glycine rich protein family Bateman A anon Pfam-B_15819 (release 10.0) Family This family of proteins includes several glycine rich proteins as well as two nodulins 16 and 24. The family also contains proteins that are induced in response to various stresses. 30.00 30.00 30.50 30.20 29.80 29.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -11.19 0.72 -3.51 15 154 2009-09-11 20:33:22 2003-09-04 11:05:36 6 3 52 0 60 154 0 95.50 33 73.37 CHANGED MA.SKA.hlLLGLhhsssLLluSEVuAtp.......usp.KsEscssVpssph.....................ttGGYsGGGGsthsGGGh.sGGGsass........................GttGh.tt....GGGYpGGG ...........Mu.SKs..hlLLuL.hhAslLLluS-VuAtc..........ttt..ps.p.sp..ss.....Vpssphhu..t.t....................ttGG.....as..GuG..Gt...t..........G.....G...G.....h.........ss.s.G..u.asG..............u.G..tGh..st.......G...tGh....t................................................. 0 12 34 47 +7001 PF07173 DUF1399 Protein of unknown function (DUF1399) Vella Briffa B anon Pfam-B_13062 (release 10.0) Family This family represents a conserved region approximately 150 residues long within a number of hypothetical plant proteins of unknown function. 21.20 21.20 21.20 21.20 21.10 20.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.83 0.71 -3.88 16 332 2009-01-15 18:05:59 2003-09-04 13:51:26 7 9 120 0 256 357 6 114.40 26 21.11 CHANGED pYpp.Ccphas+l.l...sptultsphppp.uh.pscclWpphaPpEsachphsstos.......tsstplspDLlusVhRQppFst+hs.......oPahp-s.aLppAhtRYptFlpllpp.tspc...hhLVPThDIDLhWHTHQLashsYhsD .........................................................................................................................................................................hshDLssulhRQtt.F.htphs..................................pshht.p..s..lpcultRYp.t......F.ltlhpp....psp............thlVPTLDlDLhWHTHQL.sP.htYhp.................... 1 86 161 234 +7002 PF07174 FAP Fibronectin-attachment protein (FAP) Vella Briffa B anon Pfam-B_16585 (release 10.0) Family This family contains bacterial fibronectin-attachment proteins (FAP). Family members are rich in alanine and proline, are approximately 300 long, and seem to be restricted to mycobacteria. These proteins contain a fibronectin-binding motif that allows mycobacteria to bind to fibronectin in the extracellular matrix [1]. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.20 0.70 -5.12 8 90 2009-01-15 18:05:59 2003-09-04 14:53:17 6 4 83 0 15 77 0 271.90 62 85.80 CHANGED McQsDspupRR+GL.soLAlAAVoGAoA....sulALPAsAsADPs.PsPs........sPPususAsPAPssP..sPuPsssPs................PuDPNAssP........P..ssADPNAPsPPPsDPNAPsP........ssPpsGRlDNAsGGFSYVVPAGWh.SDAopLsYGpALLSKhssEss.PspsPP..NDTpVlLGRLD.KLaAuAEsDNsKAAsRLuSDMGEFFMPaPGTRlNQpTlPLc.ANGhsGsASYYEVKFoDsNKPNGQIWsGVVGsPsAsusscG..sPpRWFVVWLGTANNPVDKuAAhsLApSIRPWsPPP.PPPsssPus ...................................McQVDss.TRRKGhhAsLAIA.AhuuASh....VTlAl...P.A.T.A.sADPE....PsPs.............ssPPS.TAsAP.......P.A.P.As.PsAPP.sPuAss.sPt............spPuDPNAAPP..........P.....AD.PNAPPP.PslsPNAPt.P..............sRIDNsVGGF.SFsLPAGWV...ESD.AuHLDY....GSALLSKsTG-PPhPsQPPPVANDTRIVLGRLDQKLYASAEAssoKAAsRLGSDMGEFaMPYPGTRINQETlsLD.ANGsoGSASYYEV...KFSDsSKPNGQ.IWTGVIGSPsAsussuG.....sPQRWFVVWLGTANNPVDKGAAKALAESIRPhssPP.PsPAPAPut.................................................. 0 2 8 12 +7003 PF07175 Osteoregulin Osteoregulin Vella Briffa B anon Pfam-B_16589 (release 10.0) Family This family represents a conserved region approximately 180 residues long within osteoregulin, a bone-remodelling protein expressed highly in osteocytes within trabecular and cortical bone. A conserved RGD motif is found towards the C-terminal end of this region, and this is potentially involved in integrin recognition [1]. 22.10 22.10 22.60 38.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.87 0.71 -4.42 3 78 2009-01-15 18:05:59 2003-09-04 16:19:44 6 1 41 0 14 72 0 174.70 55 34.65 CHANGED csNED+SSu..................GNQ-NIHpsLtASVYP-PTVsKGTEDGcDAlLHLhDQ-RYGAALLRNITQPVKSLVTGsELpuEcNKEK+PQSVLSVIPADVNsAKsaSKDpKNQQRDLLsQsSPVKS...KHT+RsRRSTHYLTHLPQIKKIPSDFEGSGSPDLLVRGDNDVPPFS .........................................p..sEsptSs..................oNKENsHssL+MSIYPcSTGN+GsEDGDDAlSKLHDQEEYGAALIRNNMQH.lM.uPVTshc...L..LGEENKEsKPRNVLsKIP..Auh..NYAK.ApSKDK.Ks.QRDuQuQpsPVKS...KST...H+..hQHNhDYLKpLsKVKKIPSDFEGSG..YsDLQpRGDNDlSPFS............. 0 1 1 1 +7004 PF07176 DUF1400 Alpha/beta hydrolase of unknown function (DUF1400) Vella Briffa B, Bateman A anon Pfam-B_16606 (release 10.0) Family This family contains a number of hypothetical proteins of unknown function that seem to be specific to cyanobacteria. Members of this family have an alpha/beta hydrolase fold. 23.00 23.00 23.20 24.10 22.40 22.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.45 0.71 -4.22 10 255 2012-10-03 11:45:05 2003-09-04 16:53:21 6 4 69 0 99 279 187 126.00 27 30.55 CHANGED AAEplsLpYuPFpc.olsVp-LppFAcTGplsssL+hahphl..sspphppLRthLspRhplsPVtlupll....sSPlGcplLsplupllpssscpssthALRuAllpuAssPs.GlollsllpsYPopslcls ...............AAEplhlpa.us.hph..o...lsl.......psLcpaAc.s.......G.p....h.s......s...p..Lphahphh...stpp..hpplRphLppplpl..........ssshhsphL....po.hGctlLpp.l.upl...l...p...st.s...s.......p..s...uh....AlRu..All.pu.At............p..sp..slollshLcsaPspslpls............ 0 6 65 95 +7005 PF07177 Neuralized Neuralized Vella Briffa B anon Pfam-B_16611 (release 10.0) Family This family contains a conserved region approximately 60 residues long within eukaryotic neuralized and neuralized-like proteins. Neuralized belongs to a group of ubiquitin ligases and is required in a subset of Notch pathway-mediated cell fate decisions during development of the Drosophila nervous system [1]. Some family members contain multiple copies of this region. 20.50 20.50 20.70 20.60 18.80 20.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.18 0.72 -4.10 6 930 2009-01-15 18:05:59 2003-09-04 17:04:25 7 17 92 2 581 865 1 68.60 35 23.45 CHANGED PLp.FHs.s+GuNlplscstplA+R.cuSFCculsFSsRPltIsEpltl+lt+hppsWsGuLRlGhTspDP ............t.hp.FH..h+Gpplp.lss.ss.p....sAp....R......p...s..s.Fsp...G.lVaSsRPL+ssEhhpl+l..sch....s.s.p..Wu....G.ulclGlTshsP.................. 1 163 205 374 +7006 PF07178 TraL TraL protein Moxon SJ anon Pfam-B_16378 (release 10.0) Family This family consists of several bacterial TraL proteins. TraL is a predicted peripheral membrane protein which is thought to be involved in bacterial sex pilus assembly [1]. The exact function of this family is unclear. 20.80 20.80 20.80 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.11 0.72 -3.97 22 417 2009-01-15 18:05:59 2003-09-05 09:50:48 6 2 336 0 58 226 12 91.80 35 93.44 CHANGED cpYphPcpLscsp+lhhaslDEhls.hllhashGhhss+...hlhGlhlushhahul++hKtGcussalhchhYWaLPsshht..h+tsPsSphRhals .........achPcpLssts+hhhhslDEllP.shlslshG.lho.u+.......hL..hGl..ss..ulllahsh++.hKcG+usuaLhchlYWahPsshht....h+slPsSphRpal.................................... 1 15 31 46 +7007 PF07179 SseB SseB protein N-terminal domain Moxon SJ, Bateman A anon Pfam-B_16678 (release 10.0) Domain This family consists of several SseB proteins which appear to be found exclusively in Enterobacteria. SseB is known to enhance serine-sensitivity in Escherichia coli [1] and is part of the Salmonella pathogenicity island 2 (SPI-2) translocon [2]. This entry contains the presumed N-terminal domain of SseB. 19.90 19.90 20.30 20.40 19.80 19.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.52 0.71 -4.30 133 1525 2009-01-15 18:05:59 2003-09-05 10:51:25 7 20 1195 0 280 1063 109 116.10 25 48.96 CHANGED Lcpslp...th..tps........ttstpthhptL.....hp.....uclhlPl.spsss.................................tt...tsphphh.hlp....tt-Gpp.....hlssFTohcplpphts.........pttsh.hshshpslhphh.......tps.tulllNPtss.t....thhlstptlttl .................................................thLtts.tsc.......tpp.thhcsL........hc.....uplaVPsssstsp...................................p...ssslsl..phc.........pp-.Gps.....hlPhFToh-tLpphsp................cppsh..hsh...sscsLhphhh.......ts.p...slhLNstss.s........sh.h.sctlt............................ 1 77 194 247 +7008 PF07180 DUF1401 Protein of unknown function (DUF1401) Moxon SJ anon Pfam-B_16789 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 135 residues in length. Members of this family appear to be found exclusively in the Enterobacteria Escherichia coli, Citrobacter rodentium and Salmonella typhi. The function of this family is unknown. 25.00 25.00 25.00 26.40 24.10 24.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.96 0.71 -4.43 2 589 2009-01-15 18:05:59 2003-09-05 10:56:52 6 1 423 0 13 126 0 109.00 50 76.67 CHANGED hsupsppsshhIPsSV+pYsGEPLYIlVuhWChLQppWlpRNpIAEAFtIshRRASalIsYlpp+pp+ls.hsRa.ohsN.+h+RhEIalhcV..pssP.pp..psGP....KR...RVGNG.hu.uN.lWNch........Ih++Kc-p ...............h.t..................Ys-+PLYLLIA-WMMAENRWVhAREIShpFDIEHsKAlNTLoYIL.S.E.V..sEIsCEVKM.....................................................................................................sp..uttspppphlhhVs................................. 0 1 4 10 +7009 PF07181 VirC2 VirC2 protein Moxon SJ anon Pfam-B_16860 (release 10.0) Family This family consists of several VirC2 proteins which seem to be found exclusively in Agrobacterium species and Rhizobium etli. VirC2 is known to be involved in virulence in Agrobacterium species but its exact function is unclear [1,2]. 25.00 25.00 57.70 57.50 19.60 15.20 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.28 0.71 -4.91 5 16 2009-01-15 18:05:59 2003-09-05 11:12:19 6 1 9 1 4 13 0 179.10 54 99.72 CHANGED MGIRKPALSVuEARRLAAARPEIVHPsssluSQ-sAsspLPE+AG+EDRpssPssAKRpcSsDRQSMLTVDALSSosuPEKlQVFLSARPPAPEVSKIYDNLILQYSsSKSLQMILRRALuDFEsMLADGSFSsAPKSYPIPpss.EKsVIVQTSRMFPVSLLEVARNHFDPLGLETARAFGHKLATAALASFFAcEKso...+sp MGIRKPALSVuEAR.RLAuARPE...Il+Ps.slso.psssssphPEcucpc-RpstPssAcRptssDpQshL.....TVDALSossuPEKlQVFLSARPPAPtVSchYDsLlhQYSsSKSLQMILRRALsDFEsMLtDGSFptAspSYPIsp.s.pK.llVQTSRMFPVsLlElARsHFDPLGLETuRAFG+KLATAALASFFstEKss...t... 0 1 3 3 +7010 PF07182 DUF1402 Protein of unknown function (DUF1402) Moxon SJ anon Pfam-B_16561 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 310 residues in length. Members of this family seem to be found exclusively in Agrobacterium, Rhizobium and Brucella species. The function of this family is unknown. 20.10 20.10 21.00 24.80 17.80 17.50 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.80 0.70 -5.54 9 105 2009-01-15 18:05:59 2003-09-05 11:14:38 6 2 93 0 23 69 1 300.30 69 93.33 CHANGED shhsssssApAhplVPsGNRsAEQPsIPGASuRRT+AspToa-tKYcKVhsLLpsD+sLhuKI+psAuAYGIDPIHIVGAIVGEHTYNVDAYDRLQoYYVKAhSYuupsFpFuYcGEslspFlpRPpFupCtt...hp-SYsLWSCREsVWsssFRGKoVsGpuaPssRFuAsFFQPFYAGQTFGLGQlNPLTALploDhVsRlSGa.KLstscspuVY+sIMDPDlSLsYlAAhIRcSIDAY+pIAshDIStNPGlTATLYNVGsPcpRAssLtspNp....uGtt.hLPpENYYGWLVNDKL-EL+uLL ......uh.hluSpuAcAlTVVPPGNRNAEQPsIPGASA+RT+thSTTYE+KY.QKIYsLL++DuSLRuKIRSTAAAYGIDPIHIIGAIVGEHTYNVDsYDRLQTYYVKAhSYlspulSFGYsGESIuQFlcRPEFAsCh+...hKDSYSLWoCREsVWNu-FRGKoVGGKAYPNNRFSAVFFQPFYAGQTFGLGQINPLTALQMSDMV.NRlSGLPKLDADDuNAVYKTIMDPDLTLPYIAAoL+pSIssYRpIADFDISKNPGITATLYNsGGopARAcsLAsENu+R+AAGpEPhLPpENYYGWLVNsKLDELKALF.. 1 3 10 14 +7011 PF07183 DUF1403 Protein of unknown function (DUF1403) Moxon SJ anon Pfam-B_16581 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 320 residues in length. Members of this family are mainly found in Rhizobium and Agrobacterium species. The function of this family is unknown. 21.80 21.80 21.80 24.40 20.00 21.40 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.88 0.70 -5.05 19 66 2009-09-10 21:27:44 2003-09-05 11:24:43 6 2 36 0 25 69 7 256.30 41 97.24 CHANGED phs...ssusss.sh.PplPuWltstss-............s.p-sAFtuGAALssLDsllR..ppssWtGsWRpRLALcuAssss+htGRsE-EsuLRDAlhLppsGD..cPGPAGclahAWRcLsppsss...s...lhtlsshLG.....culuslsspltsh....hpuspsuPl.suAtlhuslhshtPpAElluhhLADslLAppLsWs+slPLLusph......t.shRsh.sstsclR.scsshtpAlphAlssuAspAlR.AsElsRRAs+LhAVAPKLRuKGAssslchlLs-DAlsu..........S.....shstLSchAARRLF-RLhsLGAVRELSGRsoFRlYGL ..........s.............s.hPuWshstttp..............s.pDsAFhuGuALssLc.llp.....tp.s.tuhhRpRLALpuAtsssth.GRsEctssLRDAhhLhtsGD...sGPAGphhhuaRphstpsss...........ht.ls......culsslssth.th....hpsttsssh.tuA.hhtthht...s...pu.........-hhuhhLADhsLAptLsWs+.VPLLusth..................ttsclp.tttsh.hAsphAlhpushpAlc.us-lsRRAt+LhAVAPKLRuKGAstslphhLscDAlss.........u.....shs.hschAARRLh-RLhpLGAVRELoGRsoFRlYGl.............. 0 1 16 22 +7012 PF07184 CTV_P33 Citrus tristeza virus P33 protein Moxon SJ anon Pfam-B_16614 (release 10.0) Family This family consists of several Citrus tristeza virus (CTV) P33 proteins. The function of P33 is unclear although it is known that the protein is not needed for virion formation [1]. 25.00 25.00 247.30 247.20 17.40 16.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.00 0.70 -5.59 2 230 2009-09-10 15:37:21 2003-09-05 11:29:21 6 1 1 0 0 186 0 201.30 88 100.00 CHANGED MFAFASENQDlLEEKIFRRRThHRKYhDDVVRDFTIDlGYDhVDRDPTVLADYhSLYFFLLNND.lGPLAASIhVSPPVsGTHKIRAHlDNQPNpEGNVTYlKTVDKSRFsIhIKAhPAsMRGaYShRAFLuuDVsStpsEFICSFVGSRFLCCsTQTISENLsKVCSSSF.FRsVScTAhNEFSVshDDVEDVKYVRKRAEGF.HCADPYPPRCYRssNLGDsSGVQSRTIEEEGYRTDTsGNVAVolPNTPLVNAVSPYVAEYNENsRSRISLIRRVCCYAVCVLVVSLLIMSGLLsIIhI MFAFASENQDlLEEKIFRRRThHRKYhDDVV+DFTIDlGYDssDRDPTVLADYFSLYFFLLNNDNlGPLAASIlVSPPVsGTaKIRAHVDNQPNpEGNVTYVKTlDKSRFsI+IKAlPAsMRGYYSFRAFLouDVASERSEFICSFVGSRFLCCCTQTISENLuKVCSSSFFFRAVSETATNEFSVsTDDVEDVK................................................................................................................ 0 0 0 0 +7013 PF07185 DUF1404 Protein of unknown function (DUF1404) Moxon SJ anon Pfam-B_16616 (release 10.0) Family This family consists of several archaeal proteins of around 180 residues in length. Members of this family seem to be found exclusively in Sulfolobus tokodaii and Sulfolobus solfataricus. The function of this family is unknown. 27.00 27.00 27.50 27.50 26.90 26.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.02 0.71 -4.68 14 66 2009-01-15 18:05:59 2003-09-05 11:40:19 6 1 19 0 23 53 0 153.20 32 84.69 CHANGED hhslhLllhslNPaoEph.hhsshlaMhuHYuLahuGhLlGhhhh+t.....shhh..hl..............lGhhhslhWHlPhaFsluus.h.hRllp-loLllGGlLhGuShpthshhhK.lsLhuLWMhuDohLuIlhllusshYosh.h.aSPYssppLshsGlhMFlhMsllhsallh+h.l.Ksl ...........................................hhhhhh.lNPhs..h.....h.hMhsHY.lhhuGhhluhhhh+t...............s.l...hh................lGhh.shhWHhPhhFslusthhhhRllpclohhlGGlllGSulptlphshK.lhLhuLWMhGDolLullLhlusshYos....h.sYsspph.hsuhhMFlhMslhhhhllhch.hpt..................................... 0 3 4 18 +7015 PF07187 DUF1405 Protein of unknown function (DUF1405) Moxon SJ anon Pfam-B_16845 (release 10.0) Family This family consists of several bacterial and related archaeal protein of around 180 residues in length. The function of this family is unknown. 25.00 25.00 41.90 41.90 21.70 21.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.09 0.71 -4.52 31 478 2009-01-15 18:05:59 2003-09-05 13:01:54 6 1 459 0 77 287 1 164.10 48 83.05 CHANGED TlaG.ahWY............t....QLspTPhhha.FVPDSPsAoLFhslsLluhL...............hs+sh.....shlcALAhVsLlKYGlWslshplhhhht..s..sshshhthhLllSHhuMulpuhLah.ah.+hphhtlslAslWhhhNDslDYhhh..hPhhsh.ht.a.hspluhhs.............hhlulhslhlshaLshp ....hIYG.YIWY............u...pQL....scT....P....h.....hFhlFVPDSPTAhLFhllulhhhL...............hpKp..........ulI-ALAaVTLhKYGlWAVlMNllhhhp......pGsIs.hpGhhLhhSHshMAVQAlhahPha..+hshhtlsVAhlWshhNDhIDYhahQ.hPhYsh...lppa..lhpIG..hho..................hhLSl.huLhlhh.hs.+.......................................... 0 20 52 69 +7016 PF07188 KSHV_K8 Kaposi's sarcoma-associated herpesvirus (KSHV) K8 protein Moxon SJ anon Pfam-B_16868 (release 10.0) Family This family consists of Kaposi's sarcoma-associated herpesvirus (KSHV) K8 proteins. KSHV is a human Gammaherpesvirus related to Epstein-Barr virus (EBV) and herpesvirus saimiri. KSHV open reading frame K8 encodes a basic region-leucine zipper protein of 237 aa that homodimerises. K8 interacts and co-localises with human Pfam:PF04855, a cellular chromatin-remodelling factor, both in vivo and in vitro. K8 is thought to function as a transcriptional activator under specific conditions and its transactivation activity requires its interaction with the cellular chromatin remodelling factor hSNF5 [1]. 25.00 25.00 440.70 60.80 18.00 17.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.69 0.70 -5.13 2 10 2009-01-15 18:05:59 2003-09-05 13:08:26 6 2 3 0 0 9 0 181.40 89 86.96 CHANGED MPRMKDIPTKSSPGTDNSEKDEAVIEEDLSLNGQPFFTDNTDGGENEVSWTSSLLSTYVGCQPPAIPVCETVIDLTAPSQSGAPGDEHLPCSLNAETKFHIPDPSWTLSHTPPRGPHISQQLPTRRSKRRLHRKFEEERLCTKAKQGAGRPVPASVVK.................................................AEVCDQSHSPTRKQGRYGRVSSKAYTRQLQQ .MPRMKDIPTKSSPGTDNSEKDEAVIEEDLSLNGQPFFTDNTDGGENEVSWTSSLLSTYVGCQPPAIPVCETVIDLTAPSQSGAPGDEHLPCSLNAETKFHIPDPSWTLSHTPPRGPHISQQLPTRRSKRRLHRKFEEERLCTKAKQGAGRPVPASVVKVGNITPHYGEELTRGDAVPAAPITPP.PRVQRPAQPTHVLFSPVFVSLKAEVCDQSHSPTRKQGRYGRVSSKAYTRQLQQ. 0 0 0 0 +7017 PF07189 SF3b10 Splicing factor 3B subunit 10 (SF3b10) Moxon SJ anon Pfam-B_16870 (release 10.0) Family This family consists of several eukaryotic splicing factor 3B subunit 10 (SF3b10) proteins. SF3b10 is a 10 kDa subunit of the splicing factor SF3b. SF3b associates with the splicing factor SF3a and a 12S RNA unit to form the U2 small nuclear ribonucleoproteins complex. SF3b10 and SF3b14b are also thought to facilitate the interaction of U2 with the branch site [1]. 18.50 18.50 20.10 20.70 18.30 17.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.50 0.72 -4.15 19 296 2009-01-15 18:05:59 2003-09-05 13:40:43 6 5 270 0 214 245 2 77.40 49 82.45 CHANGED D+hphpsQLEpLQu+YlGoGHsDTT+.aEWhsNlpRDohuSalGH.shLsYhulu.cNEshu+lRhphL.........................p+MltPssss...Pt .......D+hphpsQLE+LQuKYlGTGHAD.TTK.aEWhsN.+RDoauSahGH.slLsYhAlA...EN..E.u.huR.l.Raphh.........................cKMlpPsGsPP............... 0 75 116 176 +7018 PF07190 DUF1406 Protein of unknown function (DUF1406) Moxon SJ anon Pfam-B_16883 (release 10.0) Family This family consists of several Orthopoxvirus proteins of around 185 resides in length. Members of this family seem to be exclusive to Vaccinia, Camelpox and Cowpox viruses. Some family members are annotated as being C8 proteins but their function is unknown. 20.20 20.20 20.40 20.50 19.80 20.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.40 0.71 -4.55 4 194 2009-01-15 18:05:59 2003-09-05 13:49:14 6 3 31 3 0 180 0 168.20 33 67.41 CHANGED M.sI+hIshLhlIhs...lushp.ss.YpP............FNKLsIsL-IYsh-sl.ssYTssN......hl.hpchhIol.op.Csshhso.-l.sspDhp.lthhhhDp.phQpps+hCplshchpCph.pcP..lt..p....phShpu-.....hpChpslch.Ishlp....DpphL+.paTL+Iss...............thsl ......................................................P............FNhlsVclslYsV.N..sShTps.s.......sps.s.sIsTpEhTITl.pcs..CsPsFcs..sh.aolhsplshsuaFps-.sc.hQspsKhCTlshcl+Csp....psp.....s......lpphpt...c...spphs.puE.....GcCLsulcl.I.Y.N...Tssp.hc.p.p.l.........t.hhst.h........... 2 0 0 0 +7019 PF07191 zinc-ribbons_6 DUF1407; zinc-ribbons Moxon SJ anon Pfam-B_16889 (release 10.0) Family This family consists of several short, hypothetical bacterial proteins of around 70 residues in length. Members of this family have 8 highly conserved cysteine residues, which form two zinc ribbon domains. 25.00 25.00 25.00 25.00 24.70 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.94 0.72 -4.19 20 619 2012-10-03 10:42:43 2003-09-05 13:53:40 7 1 612 2 48 200 3 69.30 60 95.42 CHANGED -hhCPsCpppL-hsu...tphHCspCppcaphpAhCP-CpppLEcLpACGAssYFCp.pCNpLhSKpRVcFphp .............ElpCPpCpps.L-pss............spA+CsoCsc.....s.....hphpAlCPDC+QPLQVLKACGAVDYFCQ..pGHGLISKKRVcFVl.u.... 0 8 17 32 +7020 PF07192 SNURF SNURF/RPN4 protein Moxon SJ anon Pfam-B_16890 (release 10.0) Family This family consists of several mammalian SNRPN upstream reading frame (SNURF) proteins. SNURF or RPF4 is a RING-finger protein and a coregulator of androgen receptor-dependent transcription. It has been suggested that SNURF is involved in the regulation of processes required for late steps of spermatid maturation [1,2]. 25.00 25.00 74.40 74.10 23.10 16.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.37 0.72 -4.12 2 30 2009-01-15 18:05:59 2003-09-05 14:02:14 6 1 21 0 11 31 0 67.40 87 89.83 CHANGED MERuRDRLHLRRTTEQHVPElEVQVKRRRTASLsNQECHlY.RRSQQQQlPVVDFQAELhQAFLAETPRGG .........RDRLHLRRTTEQHVPEVEVQVKRRRTASLSNQECQLYPRRSQQ..QQVPVVDFQAELRQAFLAETPRGG 0 2 2 2 +7021 PF07193 DUF1408 Protein of unknown function (DUF1408) Moxon SJ anon Pfam-B_16879 (release 10.0) Family This family consists of several hypothetical Lactococcus lactis and related phage proteins of around 75 residues in length. The function of this family is unknown. 21.50 21.50 21.90 144.50 19.70 20.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.57 0.72 -4.45 2 18 2009-01-15 18:05:59 2003-09-05 15:35:39 6 1 18 0 1 7 1 73.90 76 97.15 CHANGED McTpIhNGRcVhhlPTslG.haaDL.KREshG.Vl.pTacR.DGohYhhpRp+scpE....KAAMLNpClSDWth METTIINGRKVRllPTsVGQIYHDLIKRENRGVVVFETWcRPDGSLYMTSRKKNKQELAADKAAMLNECISDWKK 0 1 1 1 +7022 PF07194 P2 P2 response regulator binding domain Finn RD anon Pfam-B_7970 (release 10.0) Family The response regulators for CheA bind to the P2 domain, which is found between Pfam:PF01627 and Pfam:PF02895 as either one or two copies. Highly flexible linkers connect P2 to the rest of CheA and impart remarkable mobility to the P2 domain. This feature is thought to enhance the inter CheA dimer phosphotransfer reactions within the signalling complex, thereby amplifying the phosphorylation signal [1]. 21.30 21.30 22.00 21.50 21.20 21.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.50 0.72 -4.13 80 704 2009-01-15 18:05:59 2003-09-08 09:47:26 6 18 549 1 219 650 12 84.00 26 13.32 CHANGED psYplcVplscsshlKusRAahlhcsLc-hG.-llcohPshE-l.Es-p....F..cpphplhlloppst-clcphlt.sluEl......cpVplpphp .......................haclplplpc.sshhKulRAhhlhcsLcc..hG.cll+ohP...s..hEcl..Es-p........F..tppFplhhho.p.p..s..t..-clcphl...pluEl......ccVplpp..s........................ 0 88 155 181 +7023 PF07195 FliD_C Flagellar hook-associated protein 2 C-terminus Yeats C anon Yeats C Family The flagellar hook-associated protein 2 (HAP2 or FliD) forms the distal end of the flagella, and plays a role in mucin specific adhesion of the bacteria [2]. This alignment covers the C-terminal region of this family of proteins. 25.40 25.40 25.40 25.50 24.80 25.20 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.66 0.70 -5.07 35 2213 2009-09-13 17:36:45 2003-09-08 11:04:28 7 11 1857 0 507 1693 494 236.90 25 45.96 CHANGED ApsAplslsG....h.slpssoN.plssslsGVolsL................ppsopssp.......slsl.spDssshppslpsFVcuYNplhsplsshos.....................t.tt.pppspsGsLtGDusl+slpspL+shhts....t.sss...........hpsLsclGIosspt.p...........GpLpl...DcsKLcpslp..pssssltph.....................................................................FsG.........................................ssGlspp..............lpstlpshs.....pss.Ghlp...................scpssLscphpplspphpshsc+hcshpp+hppp..Fsth-phhsphss ................................................................................................................................................ApsApls..l.sG...............l..pl.pp.soN..plsss...l.pG.l.T.lsL............................pp.s....s.ttst...........slsl...spDs.s....s.sppslpsaVs.uYNsLlsshsshop................................................t..pps..tpsGsL.hG.D.u.s.lps.l...p.splcshlss..........s.t..sss..................hpoLsplG.Iohsps...............................GpLp.l.....................Ds..sKLpp..A..Lp..p..s..s.su.ltph...........................................................................................................................................................................................................................................................................................................................................Fs.u.....................................................................................................................................p.sGl.ssp.......................ltshlss..hh......................sss...Gh..lp.....................................stps...u...lspplpplscphpshspp.hcsh.s+..hptp.....Fsth-shhsphp........................................................................................................... 0 175 326 420 +7024 PF07196 Flagellin_IN Flagellin hook IN motif Yeats C anon Yeats C Motif The function of this region is not clear, but it is found in many flagellar hook proteins, including FliD homologues ([1]). It is normally repeated, but is also apparently seen as a singleton. A conserved IN is seen at the centre of the motif. The diversity of these motifs makes it likely that some members of the family are not identified. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.66 0.72 -3.98 132 2915 2009-09-13 07:46:12 2003-09-08 15:29:45 8 27 1152 5 475 2258 345 56.80 25 13.44 CHANGED slslphssttt..........lshsss..sshss.lsstIN...........sspsGVpAoh.......ssp.......upLsl...puss ..................................lslptssttts................l.shpss..sslts.l....sstIN............ss..ss.G.V.pAol.............sps..........upL.lput....................... 0 140 264 375 +7025 PF07197 DUF1409 Protein of unknown function (DUF1409) Vella Briffa B anon Pfam-B_16557 (release 10.0) Family This family represents a short conserved region (approximately 50 residues long), sometimes repeated, within a number of hypothetical Oryza sativa proteins of unknown function. 21.10 21.10 21.20 21.20 20.50 21.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.47 0.72 -3.90 51 427 2009-01-15 18:05:59 2003-09-09 17:00:33 7 14 7 0 308 422 0 46.30 47 12.38 CHANGED RLEsppl-.L........AcLcEh...pApls.Eh......tclhs.sthlE...pp+....hK.Lcp ..........................RLEApplD.L..........ApLc.Es.............pApls.Eh.....................tclhs.sthlEppp....h+Lcp............. 0 1 1 7 +7026 PF07198 DUF1410 Protein of unknown function (DUF1410) Vella Briffa B, Bateman A anon Pfam-B_13132 (release 10.0) Domain This family represents a conserved domain approximately 100 residues long, multiple copies of which are found within hypothetical Ureaplasma parvum proteins of unknown function, as well as related species. 21.00 15.50 21.10 15.50 20.90 15.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.27 0.72 -3.66 67 980 2009-01-15 18:05:59 2003-09-09 17:11:28 6 16 37 0 55 835 4 68.20 20 15.61 CHANGED spphtstplpl........phpDpsppphpl.....................phphsppppslt.....hssLpss.....p..p..Yplsclshsspphphs ...................................................hhNpplph........hhpDps.sppapl...............................pspl.spssphhh.....h..hss...Lsss.....p.....pYplsclhhsspp................................... 0 51 55 55 +7027 PF07199 DUF1411 Protein of unknown function (DUF1411) Vella Briffa B anon Pfam-B_16764 (release 10.0) Family This family represents a conserved region approximately 150 residues long that is sometimes repeated within some Babesia bovis proteins of unknown function. 20.70 20.70 20.90 21.20 19.80 19.20 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.10 0.71 -4.86 6 33 2009-01-15 18:05:59 2003-09-09 17:23:38 6 3 15 0 12 26 0 151.80 31 67.59 CHANGED MSGHLVCKSGFGLGKVAKLMLASVVVLSAFSsNVWA.....scAEsspcPh.tph-GGhhot...............h--hpKL..pshsDlspT-NTEKTsLuDlSVKLGPhc+sM..hlKcVDVDMphLN+RIollLp+lTsaGP-SPsFGlSENIVKsLNK+GoIElPKcLAQpLCphcoGKhppYEWppFpDtFuoalA-hlss .......................................h..............................................sp.s..tphpshh.oh...............h.pss...--hpKh..p.h..lpp.sc.T...EKo.l.h.lh.lp.ts.hcpshphplp.sssVp.h.hspccLoolLt.tlsphsPto.hhulScNllchLsKpp.p...-h.pclAptLsplcothhth.E..................h................... 0 12 12 12 +7028 PF07200 Mod_r Modifier of rudimentary (Mod(r)) protein Vella Briffa B, Wood V, Mistry J anon Pfam-B_16631 (release 10.0) Domain This family represents a conserved region approximately 150 residues long within a number of eukaryotic proteins that show homology with Drosophila melanogaster Modifier of rudimentary (Mod(r)) proteins. The N-terminal half of Mod(r) proteins is acidic, whereas the C-terminal half is basic [1], and both of these regions are represented in this family. Members of this family include the Vps37 subunit of the endosomal sorting complex ESCRT-I, a complex involved in recruiting transport machinery for protein sorting at the multivesicular body (MVB). The yeast ESCRT-I complex consists of three proteins (Vps23, Vps28 and Vps37). The mammalian homologue of Vps37 interacts with Tsg101 (Pfam: PF05743) through its mod(r) domain and its function is essential for lysosomal sorting of EGF receptors [2]. 27.00 27.00 27.00 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.97 0.71 -4.29 17 469 2009-01-15 18:05:59 2003-09-10 11:27:18 8 9 245 5 303 465 1 139.30 27 52.13 CHANGED lpphShs-LpcLlpDc-thpc.hhtphsp..lpslppp+-phhspscpLAccNLthpspLcph+splp.pchppLpplppphpphhpchcch.upphSspslhphLQhsspcs-E-oEph..sccFLcGchsl-sFlppahphRphhHhR+hp.EKL ..................................pthohppLpclhp.sp...ph...lpp.hh...p....hsp....lpp.h....ptp..p-thls.sNcpL..A...............c..pNL..th.p..spLpp.hR...splp.pphpplpplppphpppp.p.ch..cc........h..pshosps.Lhth..LpststctEc.....-o-sh......s-pF..L.-...........G........c........h....s........l..p.sF.....lppa.phRphhHhR+.p.-+...................................... 0 82 131 214 +7029 PF07201 HrpJ HrpJ-like domain Vella Briffa B, Moxon SJ, Bashton M, Bateman A anon Pfam-B_16649 & Pfam-B_11026(release 10.0) & Pfam-B_1285(release 5.4) Domain This family represents a conserved region approximately 200 residues long within a number of bacterial hypersensitivity response secretion protein HrpJ and similar proteins. HrpJ forms part of a type III secretion system through which, in phytopathogenic bacterial species, virulence factors are thought to be delivered to plant cells [1]. This family also includes the InvE invasion protein from Salmonella. This protein is involved in host parasite interactions and mutations in the InvE gene render Salmonella typhimurium non-invasive [2]. InvE S. typhimurium mutants fail to elicit a rapid Ca2+ increase in cultured cells, an important event in the infection procedure and internalisation of S. typhimurium into epithelial cells [2]. This family includes bacterial SepL and SsaL proteins. SepL plays an essential role in the infection process of enterohemorrhagic Escherichia coli and is thought to be responsible for the secretion of EspA, EspD, and EspB [3]. SsaL of Salmonella typhimurium is thought to be a component of the type III secretion system [4]. 22.30 22.30 25.40 23.00 22.20 22.20 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.85 0.71 -4.23 36 767 2009-01-15 18:05:59 2003-09-10 11:43:52 6 4 513 10 60 405 7 160.10 23 46.32 CHANGED s...........hhpphEctutsLspRch...........tptppppphpphhchl-ch.-tpt-p...phcpLpptLtt.....hssssplhshlpthhsDsucthlsL....pthlpp....ps.spthtptlppslppLpp..cputplpuGlNs...Ahtuttaut.tsstphptLRsLYppsl...tsp.tshsshhpslhppa .......................................................tt.h.psh-chuhuLsphtt....................+tp.tcp.pphtpthp+llcpptps...........chcplhthh................tss.tph.s.hpthhs-suptllsL....pthLpp......cslpppl+cplpphLpcltt...pcshpl..Ghp......AlhutLhut...s......stshptLRphYcphl....psp.tshsphapclhsp.................. 0 16 26 41 +7030 PF07202 Tcp10_C T-complex protein 10 C-terminus Vella Briffa B anon Pfam-B_13039 (release 10.0) Family This family represents the C-terminus (approximately 180 residues) of eukaryotic T-complex protein 10. The T-complex is involved in spermatogenesis in mice [1]. 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.42 0.71 -4.73 8 224 2009-01-15 18:05:59 2003-09-10 11:56:30 8 9 126 0 134 206 9 150.70 39 21.12 CHANGED KlEplhssGpRlIhFPNGT+KEISADGcoVpVpFFNGDIKcs.hs-tpVlYYYu-sQTTHTTYPDGLEVLpFPNsQpEK+asDGoKEIpFPDsTlKhLpscGp...EEslaPDGThlplp+sG-KhIpFsNGQ+EIHTspaKRREYPDGTVKTVYssGpQET+YosGRlRlKDc-GplIhDoc ..............................................................Ehh.htsGpp.lhasNGshKcl...t..s......Dt...ts.......shlpF.hNGDlKph..hs-...t.p.l..lYaYA..s.sp..ThHTTa..P.............-.Gl.EllpF.ss.sQ...........hE+..+aP.DG..p.....K.EIhFPDsol...K.l.scGp...E.EohaPDGTh.hphp.h..sGs.Khl..h.sN...G.p.c...El.+T.tt.hK.+............+ca.PDGThKhlas.s.GppEoph.sGph+hc...G............................................ 0 63 74 99 +7031 PF07203 DUF1412 Protein of unknown function (DUF1412) Moxon SJ anon Pfam-B_16907 (release 10.0) Family This family consists of several Caenorhabditis elegans proteins of around 70-75 residues in length. The function of this family is unknown. 25.00 25.00 25.80 34.00 20.80 16.00 hmmbuild --amino -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.70 0.72 -4.05 7 36 2009-01-15 18:05:59 2003-09-10 12:39:10 6 1 5 0 36 28 0 51.90 69 68.96 CHANGED GII.R-RR..psYassNGVVNNhVoDshhGGPTSLGWAQVPHlhSPMFSPVFG+ ................GlI.RcRR..psYWhoNG.VV.NNhVSDNlsGGPTSLGWAQVPHhaSPMFSPVFGK.. 0 9 17 36 +7032 PF07204 Orthoreo_P10 Orthoreovirus membrane fusion protein p10 Moxon SJ anon Pfam-B_16940 (release 10.0) Family This family consists of several Orthoreovirus membrane fusion protein p10 sequences. p10 is thought to be a multifunctional protein that plays a key role in virus-host interaction [1]. 21.20 21.20 21.30 21.30 20.80 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.43 0.72 -4.25 3 41 2009-01-15 18:05:59 2003-09-10 12:45:47 6 3 23 0 3 35 1 88.50 50 66.91 CHANGED MhpMsSGSCNGATSVFGNVHCQAAQNTAGGDLQATSSlIAYWPYLAtGGGlIlIIII.luLlYCC+AKVKsDAsRSVF+RELlALoSGKsNAsPPuYD.V ..........................sGsCsGhsulFGsVHCQuupNoAGGDLQATSslhs.YWPYLAuG..G.GhlLIlIl.luLlYCC+uKhKscusRssa+RELVALopuphpshssshp..................... 0 1 1 3 +7033 PF07205 DUF1413 Domain of unknown function (DUF1413) Moxon SJ, Bateman A anon Pfam-B_16942 (release 10.0) Domain This family consists of several hypothetical bacterial proteins which seem to be specific to firmicute species. Members of this family are typically around 100 residues in length. The function of this family is unknown. 25.00 25.00 25.10 26.30 23.70 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.09 0.72 -4.44 28 386 2009-11-04 10:27:19 2003-09-10 12:53:35 6 2 261 0 18 117 1 70.20 40 75.51 CHANGED hccthptlcplsssssFph+..DLasc.......pWsphspsp+p.plG+hFhphVppsstht..h.hh.s+cssspphYpK ........h+ctlhtlps.sctosFsF+..DLFS+.......pWlshShA-RQ.cst+tFta.VKphscV...h.hs.ut.csu..thplYp............ 0 8 13 18 +7034 PF07206 Baculo_LEF-10 Baculovirus late expression factor 10 (LEF-10) Moxon SJ anon Pfam-B_16893 (release 10.0) Family This family consists of several Baculovirus specific late expression factor 10 (LEF-10) sequences. LEF-10 is thought to be a late expressed structural protein although its exact function is unknown [1]. 19.70 19.70 28.70 62.20 19.60 19.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.35 0.72 -4.33 21 48 2009-01-15 18:05:59 2003-09-10 13:04:46 6 1 47 0 0 45 0 71.10 41 93.58 CHANGED opsh.ts..DllssIL+cNLpLlDNsYlILNVlDpcss..p.......lcshClGEIsuhQs.cpssp.cuhSsoSsoSELpS .....s..s..ts.sDllssILKcNLpLlsNsYIILNVlDpcss...p.........l+shClGEIsuhQT.csssp.-shSsSSsoSELpS... 0 0 0 0 +7035 PF07207 Lir1 Light regulated protein Lir1 Moxon SJ anon Pfam-B_16937 (release 10.0) Family This family consists of several plant specific light regulated Lir1 proteins.\ Lir1 mRNA accumulates in the light, reaching maximum and minimum steady-state levels at the end of the light and dark period, respectively. Plants germinated in the dark have very low levels of lir1 mRNA, whereas plants germinated in continuous light express lir1 at an intermediate but constant level. It is thought that lir1 expression is controlled by light and a circadian clock. The exact function of this family is unclear [1]. 25.00 25.00 39.60 39.50 20.40 19.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.12 0.71 -3.96 5 52 2009-01-15 18:05:59 2003-09-10 13:11:41 6 1 31 0 16 46 0 122.90 41 93.23 CHANGED MQsAuShslolsus.soosoupShshhhPpphps.pAsRpsphRl+Auuuuss.DsuTVDYsSsh.SVFPAEACDTlGGEACsu-MYPEVKLcs-ussss..sAsoEsVDREYLEY.s-PKTVFPAEACDDLGGEFC-P- .........................................................s..p.....h.h............t.pthphts...u.s.sss-suTVDYs.Ssh.SVFPAEAC-slGGEACp.u.-MYPEsKLpspu.tsss......psssEsl-REYLpY.s-PKTVF.sEACDDLGG-FC-s......... 0 1 9 13 +7036 PF07208 DUF1414 Protein of unknown function (DUF1414) Moxon SJ anon Pfam-B_16906 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 70 residues in length. Members of this family are often referred to as YejL. The function of this family is unknown. 25.00 25.00 48.30 47.80 20.30 19.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.81 0.72 -4.25 27 785 2009-01-15 18:05:59 2003-09-10 13:16:00 6 1 783 13 90 230 8 44.00 72 58.77 CHANGED HcAPsDLSLMlLGNhlTNllsppV.sssQRtslA-pFucALtpSl ..HKAPTDLSLMVLGNMVTNLINTSl.APAQRQAIApSFAcALQSSI. 0 8 28 61 +7037 PF07209 DUF1415 Protein of unknown function (DUF1415) Moxon SJ anon Pfam-B_16932 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 20.90 20.90 23.20 22.60 19.50 19.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.19 0.71 -4.53 56 495 2009-01-15 18:05:59 2003-09-10 13:18:37 7 3 477 0 142 398 77 170.90 45 88.00 CHANGED pptlls.pTcpWlccsVlGLNLCPFA+p.ht+spIRasVo-AsshcslLpsLhpELphLttssst-l-.TTLllhPps..LpDFhDYpDhlDhA-sLLtphsh-GlhQlASFHP-YpFsGsssDDspNaTNRSPYPhLHLlREsSl-+AltsaP-.sEsIPERNIphhccLGt-....tappL .........................p..tlltpTcpWLpcsVIGLNLCPFA+tshh.+....pp...........lRhhVS-Aps.-s...lLp..-LhpELptL.tt...ssspcl-.TTLllh.Pph....hpD.FhDYND.hl.-hA.-sll..pp..s..hEGl.....hQlAoFHP-YpFs...Go...-....sD.DhpNaTNRSPYPhLHLlRE-Sl-+Alps...a.P.D....s-sI.E+NIsplccLGt-thpp.h.................. 0 41 83 116 +7038 PF07210 DUF1416 Protein of unknown function (DUF1416) Moxon SJ anon Pfam-B_16939 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 100 residues in length. Members of this family appear to be Actinomycete specific. The function of this family is unknown. 22.10 22.10 22.10 22.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.84 0.72 -4.37 9 209 2012-10-02 19:08:27 2003-09-10 13:24:12 7 3 183 0 68 190 15 85.90 65 81.51 CHANGED sulDltKEsVIpGpVh.psGpPVssAYVRLLDusGEFTAEVsoSAoGpFRFFAAPGoWTVRALsssusu.-tpVsAcsuslppV-lsV .........AuVDlEKETVITG+Vl..suDGpsVGGAFVRLLDSosEF.TA.EVVASATGDFR....FFA.AP..GoWTLRALSsuGNG...Dus.VpPsGuGlHE.VDlpl........................ 0 23 53 63 +7040 PF07212 Hyaluronidase_1 Hyaluronidase; Hyaluronidase protein (HylP) Moxon SJ anon Pfam-B_16578 (release 10.0) Family This family consists of several phage associated hyaluronidase proteins (EC:3.2.1.35) which seem to be specific to Streptococcus pyogenes and Streptococcus pyogenes bacteriophages. The substrate of hyaluronidase is hyaluronic acid, a sugar polymer composed of alternating N-acetylglucosamine and glucuronic acid residues. Hyaluronic acid is found in the ground substance of human connective tissue and the vitreous of the eye and also is the sole component of the capsule of group A streptococci. The capsule has been shown to be an important virulence factor of this organism by virtue of its ability to resist phagocytosis. Production by S. pyogenes of both a hyaluronic acid capsule and hyaluronidase enzymatic activity capable of destroying the capsule is an interesting, yet-unexplained, phenomenon [1]. 25.00 25.00 37.30 56.60 19.60 19.60 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.75 0.70 -5.40 2 54 2009-01-15 18:05:59 2003-09-10 13:38:03 6 2 22 8 6 52 2 239.80 70 77.07 CHANGED NKPslsthApK.ETspKlpp....KADKpsVYhKAESK.ELDKKLsLpGGlhTGQL+hKPsuslthSSSTGGAlNIDhSpS+GAuhVhYoNpDToDGPLM.LRosK-TFsQSs.FVDY+GpTNAVNIsMRQPoTPNFSSALNITSuNEsGSAMQlRGsEcALGTLKITHENPSlcAsYDKNAAALSIDIVKK..osGtGTAAQGIYINSTSGTTGKLLRIRNhNcDKFYVpPDGGFauhtsS.lDGNLplKsPhuN-HAATKtYVDtcltcLKtLlstK .......................................NKPslsuhApKpETssKIscL.S.KADKssVYhKAESKhELDKKLsLpGGlhTGQLpFKPN.SsIc.SSSsGGAINIDMSKScGAuhVhYoNpDToDGPLM.LRosK-TFsQSA.FVDYpGpTNAVNIsMRQPoTPNFSSALNITSuNEsGSAMQlRGlEKALGTLKITHENPslcApYDcNAAALSIDIVKK...psGpGTAAQGIYIN.STSG.TT.GKLLRIRNh.s-.DKFYVpsDGGFauttsSplDGNLplKsPTuscHAATKsYVDpclpcLKtLl.cK............................. 0 1 5 6 +7041 PF07213 DAP10 DAP10 membrane protein Moxon SJ anon Pfam-B_16910 (release 10.0) Family This family consists of several mammalian DAP10 membrane proteins. In activated mouse natural killer (NK) cells, the NKG2D receptor associates with two intracellular adaptors, DAP10 and DAP12, which trigger phosphatidyl inositol 3 kinase (PI3K) and Syk family protein tyrosine kinases, respectively. It has been suggested that the DAP10-PI3K pathway is sufficient to initiate NKG2D-mediated killing of target cells [1]. 20.80 20.80 20.90 21.50 20.60 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.76 0.72 -4.23 3 40 2009-01-15 18:05:59 2003-09-10 13:48:29 6 2 25 0 16 39 0 71.20 48 94.25 CHANGED MlPPGHILFLLLLPVAAAQTTPGSCSGCGPLSLPLLAGLVAADAVsSLLIVGVVFVCARLRSRPAQEDGKVYINMPGRG ...............M...GtlLhL.L.L...LsV...uA..sQ..............sos..............G..SCSGCGsLSLPlLAGLVAADAVhoLLIVssVFhCAR...RppPsp.ccs+VYlNMPsR............... 0 2 3 6 +7042 PF07214 DUF1418 Protein of unknown function (DUF1418) Moxon SJ anon Pfam-B_16971 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 100 residues in length. Members of this family are often described as YbjC. In E. coli the ybjC gene is located downstream of nfsA (which encodes the major oxygen-insensitive nitroreductase). It is thought that nfsA and ybjC form an operon an its promoter is a class I SoxS-dependent promoter [1]. The function of this family is unknown. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.39 0.72 -4.36 7 515 2009-01-15 18:05:59 2003-09-10 13:56:04 7 1 507 0 26 140 0 94.00 67 98.88 CHANGED MRolGsLP+sVLILEhLGMlLLslAhLSlNpYLoLPushuoPpAullMIFlGlsLMlPAAlslhWRlAphhuP.L.....hs+PPp.sp....otc-KpsDusH .....MRuIGcL..PKuVLILEaIGMhLLAVAL.LS.ls-.L.SLPcPFupPp..............lt............ILMIFLGVLLMLPAAVlllhpVAKtLAPQL.....MsRPPphSc......S-REKcNDuNH................ 0 1 5 16 +7043 PF07215 DUF1419 Protein of unknown function (DUF1419) Moxon SJ anon Pfam-B_16972 (release 10.0) Family This family consists of several bacterial proteins of around 110 residues in length. Members of this family seem to be specific to Agrobacterium species and to Rhizobium loti. The function of this family is unknown. 21.60 21.60 21.70 22.30 21.50 21.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.60 0.72 -4.20 15 46 2009-01-15 18:05:59 2003-09-10 13:58:33 6 1 32 0 27 48 1 105.70 53 56.29 CHANGED Ms..sPlRKVFpGVAcRcQMFRhFDRHuQRPsRacuDsusLYsGEWFEIspspHDYMhEILPPLWMRu-MFAMREFLTGslTSVFFsL+IDGRhRaFHGYCDLSD+sSPEcM .................p..sshRKlapGVAsRpQMFchFDRHsQcPsphcsDsusLYsGEWFEIucspHDYMhEILPPLWhR....G....sMFAMREFLTGolTSVFFsLpI.DGphRaFHGYCDLuD+uSsEcM.............. 0 2 15 20 +7044 PF07216 LcrG LcrG protein Moxon SJ anon Pfam-B_16974 (release 10.0) Family This family consists of several bacterial LcrG proteins. Yersiniae are equipped with the Yop virulon, an apparatus that allows extracellular bacteria to deliver toxic Yop proteins inside the host cell cytosol in order to sabotage the communication networks of the host cell or even to cause cell death. LcrG is a component of the Yop virulon involved in the regulation of secretion of the Yops [1]. 21.70 21.70 23.00 24.80 21.20 19.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.10 0.72 -3.96 4 75 2009-01-15 18:05:59 2003-09-10 14:19:17 7 1 70 0 9 31 0 95.30 52 99.54 CHANGED MKps..s-aoc.....TlppAELAItDSDcRscLLpEMhtGLGluspAsplLFtu...ssE.hpsAEpELLsElpRpRppQPp.QshpG+RsRRPThMRGhlI .....................MKss..s-asc.....TlcpAELAItDS-cR.ucLLpEMhtuL.GLsPpAsplLFuG..tssE..h+sAEcELL-El+RpRppQPQ...ps..tcG+RPRRPTMMRGhlI 0 1 3 4 +7045 PF07217 Het-C Heterokaryon incompatibility protein Het-C Moxon SJ anon Pfam-B_16951 (release 10.0) Family In filamentous fungi, het loci (for heterokaryon incompatibility) are believed to regulate self/nonself-recognition during vegetative growth. As filamentous fungi grow, hyphal fusion occurs within an individual colony to form a network. Hyphal fusion can occur also between different individuals to form a heterokaryon, in which genetically distinct nuclei occupy a common cytoplasm. However, heterokaryotic cells are viable only if the individuals involved have identical alleles at all het loci [1]. 20.60 20.60 21.60 21.40 20.30 19.80 hmmbuild -o /dev/null HMM SEED 606 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.11 0.70 -6.32 20 311 2009-01-15 18:05:59 2003-09-10 14:28:57 6 5 127 0 157 385 0 398.00 44 72.79 CHANGED Mushh....ssLLlhhll.LllLPspstAFGAGNIASISpVEG+NWRHGDIEDhLtTlAhh+.......G+KaoshhVKRVYFGNWLRDYSQAlDVGTLK.uVsA-TIRILVWVLuFhoFGYATuEFEVTsERLGsYRPEEHIDNPKsYADspDARcYDsRLRGPVcp.tEL-IDPcTGMKNYIANE....sGGW.uTSAuYlRhohuRsIHaGRhYo.....cpGp-pDLpEALRhLGQuLHsLED.FuAHoNYsELALhEhGa+s......VFPHsGssTplpl.pG++..VYPLVTGTFGuVDFlHSVLGEAoDHhTQSElsEl-tALssAcssssss......................s.uslpuLLuKlP..........G.....sGDshsscA--LcppSpAQptpNpp..t............................................t.s...lsshpP-h.......ssh...cshtQIYPILEFRDclV+uIsshI...EKIPGLpsLlEpIoEsLTlFVhoLLAPFl+PIIsploppLpsGSSuVIsoStpcQaEsWssspCoDPTHSMLSKDHFSNlLNEPAG+VASsIlpYVVPRllhAW-ssslsscpVl--hLp.VFHHPAlRs.....psEh+RpMFcsVcpWhpspscctpc.LpchLSuEGV+sG+N+......psGspspuHupGpshshsts..s................uputsuuuhhpphppp ...........................................................................t................................................................................................a.......................................................................................................................................................................thcs..l.......................hthD..ouhKtYl.....t.........t.......h..TSsshl+h.ht+sIchGRhYs.....tpspctDLhEALRhLGpuLHsLE.D.FsAHSNYsELuLh-hGp+p.......VF.PHVGssTplpl......p.....G+p................VaP....LVTGTFGuVDFlHSVhGE..hsD+h...o.....QsElpphp.tl.tupt.tt.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 47 88 130 +7046 PF07218 RAP1 Rhoptry-associated protein 1 (RAP-1) Moxon SJ anon Pfam-B_16981 (release 10.0) Family This family consists of several rhoptry-associated protein 1 (RAP-1) sequences which appear to be specific to Plasmodium falciparum [1]. 20.50 20.50 251.30 106.30 20.40 20.40 hmmbuild -o /dev/null HMM SEED 782 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.51 0.70 -6.44 2 109 2009-01-15 18:05:59 2003-09-10 14:37:53 6 2 16 0 7 110 1 637.70 65 99.73 CHANGED M.hhluSLlllFasLapNVusGIslNG-pphspph.sp-FN.DDhN.Wh.lsctpFLNoapcphSs.SFlEsKuSh-sG.......SpsscuSppGpG+...............DShsDhpFhss.s.s.KsupPpuspspu.tSSSsupopuSu.SssKSuS.aGtSs.S-.SshcSuptSu.....VGucE.t...........pEhY......FshpashpphpcphsILKN-hshVtpcEth.hDEphcchcpc+Kcth.KhhtEuD.E..hhsEp.pFlcpphp-ppltGuFocFhSpLNPFKK-.h..KpElShhTa..Is.plssKE..h+sLGlutpYp.Y.pohLYsCPNss.hFDohEsLptcl.c.+c+Euhhsphh-ppKECLKNhGlhDhELsDspsKhGssIGShGEhHlRLYEhENDLhKapPslDYhTLADsYKL.KNcl.pLppVNFCLLNPKTLE-FLKKKEIh-LM.G-D.ItYcEpFspaMp.SIsCHlESLIY-Dl-uSQDhthVLK.sKSKLallpsGLoYKu+KLl.KlasEIQKNP-.laEKLTWIY-NhYhlKR.aThhAhcsVC.pYlpHs..phYTpLp.hhshIl-.sRhYuuCFKNlhlYNAlISGIHEphKphhKLhPRpshL.DhHFpulhcKE.K..KhhpTsalh..aDPoVtuYAhhpl-RhsMVolINsaFEAKKKtLohhlupMKhDhholpNE-.KIPNDKuANSKLss+LhphaKAEI+caFKEMR.pYshLIphRY+uHhKKNYhhaKRL- .............................SLlllFasLapNVusGIslNG-pphupph.sp-FNhDDhN.Wh.lsccpFLNoapcphSs.SFLEsKSSh-sG.......SssscuSppGpG+...............DS+sDhpFhAsss.s.KTSpPpuspsuu.+SSSsup...o+uSS.SN........s....KSuS.aGcSspS-.SshcSuppSu.....VGucE.s...........cEhYs.....FsYKashsphpcpIsILKNEhshVtpcEth.hDEphcchcpc+Kcth.Khht-oD..E..hhsEp..pFlcpphp-ppltGuFocFhSpLNPFKK-.h..KpElShhTa..Is.plspKE..h+sLGluhpYp.Y.pohLYsCPNss.hFDohEsLptcl.c.cc+Euhhsphh-p.pKECLKNhGlhDhELsDspsKhGssIGShGEaHlRLYEhENDLhKapPslDYlTLADsYKL.KNcl.pLppVNFCLLNPKTLE-FLKKKEIh-LM.G-D.ItYcEpFspaMp.SIsCHlES...LIY-Dl-u..........SQDht...h....V...L.K.sKSKLallpsGLoYKu+KLl.KlasEIQKNP-.laEKLTWIY-NhYhlKR.YThaAhcsVC.pYlpHs..phYopLp.hhshIl-.sRhYuuCFKNlhlYNAlISGIHEphKphhKLhPRpshL.DhHFpulhcKE.K..KhhpTsalh..aDPoVtuYAhhpL-RhsMVolINsaFEAKKKtLohhlupMKhDhhSLpNE-.KIPNDKuANSKLss+LhphaKAEI+caFKEMR.pYshLIphRY+uHhKKNYhhaKRL..... 0 1 2 5 +7047 PF07219 HemY_N HemY protein N-terminus Vella Briffa B anon Pfam-B_16745 (release 10.0) Family This family represents the N-terminus (approximately 150 residues) of bacterial HemY porphyrin biosynthesis proteins. This is a membrane protein involved in a late step of protoheme IX synthesis [1]. 28.20 28.20 28.80 30.00 27.00 26.60 hmmbuild --amino -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.08 0.72 -4.50 19 1471 2012-10-11 20:01:00 2003-09-10 15:05:05 8 28 1455 0 312 955 302 108.30 35 26.03 CHANGED GhVhIphsuhph-sSlhtslhhllhhhsslhl.......lhhllpplhpsstpsptahcpp+RcRuppulppGlhshupGchstAc+hht+u.uch..s-pssLthLhuApAAptpGc ...............GYVlIp.hss.a.pI..Es.Slssh.llhlllshlllah.......l.hll+plhpsss+spsa.assRKc+RAcpthppuLlcLuEGDa....ppuEKhhs+...s...Acp.............u-p...P.........slshL.lAAcAApppGD...................................... 0 64 156 231 +7048 PF07220 DUF1420 Protein of unknown function (DUF1420) Moxon SJ anon Pfam-B_17056 (release 10.0) Family This family consists of several hypothetical putative lipoproteins which seem to be found specifically in the bacterium Leptospira interrogans. Members of this family are typically around 670 resides in length and their function is unknown. 18.10 18.10 18.40 18.40 17.80 17.30 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.19 0.70 -6.16 2 13 2012-10-03 03:08:05 2003-09-10 15:58:07 6 1 9 0 3 11 68 573.50 57 99.04 CHANGED MKFGLDANlsYPPLSVlYSIhLIhGCDFLGFYILKLFEspLGclKNTWhRWQAPLhGALLLSV.LYPLAL.uhTsRhhMcSsAhhLslLGhhNlshFlKphppNhs.lsh.p.Nl.+Slpap..h-h....cp.K+sF.LFK.hh.hhsKFsLIshNchLNlFIhLLhluYGhLALCPlTNADSLDYHIGVAIEILNQGKMPsF.GWFHGRLAGSGEVLNALGLAIGAEQFGSLLQFsGLLuIYGILuFYSFhEK..tsDG.VWRcIIIIAFLSSPVLVFLVSSsKPQLLQlGMTSFAlsLLLEIhSKhKTDKNKL.hF.LICILIMSsTQAKFSFFLSAFLIGLhSlh.LGSIRLahYGlLIulFFhlLI.FPulFWKIKNasSohIDsllhPLPG.saPGVppFEssLRNYpDSsL.FPLSLlhPNpFGVlTTlIG.GLFLlIFVKPIss.psFlLShhIIlFVILG.LhGQpASRFFLEPFlWhLISLIslsshth.Nl+Fs+psluhhllLQAshThsIl.lGIYQLhPGVFSIp.REKVMsQYuNGYsLMKWsG.sLPK-AVLLSQHRSlALSERKTLSLDWh.FVsFsShlASPYL+pIKDENVTHILhhG-hSKsoPF.GCIGNsIGKTKSp.shRNPhs.pDaFTsILsEFp.spLsQCuN.lL ..........................MKFGLDANlsYPPLSVlYSIhLIhGCDFLGFYILKLFEspLGclKNTWhRWQAPLlGALLLSVlLYPLAL.uhTsRhhM+SsAhhLslLGhANIshFlKphppNhsSlsYhp..Nlh+SlpapS.h-h....cp.K+sF.LFK.hh.hhsKFsLIspNchLNlFIhLLhluYGhLALCPITNADSLDYHIGVAIEILNQGKMPsF.GWFHGRLAGSGEVLNALGLAIGAEQFGSLLQFsGLLuIYGILuFYSFAEKF.psDG.VWRcIIIIAFLSSPVLVFLVSSPKPQLLQlGMTSFAlsLLLEIFSKhKTDKNKLhhFS.LICILIMSsTQAKFSFFLSAFLIGLhSlhpLGSIRLFFYGlLIulFFFlLI.FPulFWKIKNasSoaIDsllpPLPG.saPGVppFEssLRNYpDSsL.FPLSLlaPNpFGVlTTlIG.GLFLlIFVK.PIVsp+sFlLSlhIIlFVILG.LhGQKASRFFLEPFlWhLISLIsLsShup.Nl+Fs+psluhhllLQAshThsII.lGIYQLFPGVFSIplREKVMSQYANGYoLMKWsGhsLPK-A.VLLSQHRSIALSERKTLSLDWh..FVDFsStlASPYLKpIKDENVTHILhaG-TSKsoPFpGCIGNsIG..KTKSppVsRNPFN+pDaFTsILsEFp.DpLPQCuN.lL................ 2 1 2 2 +7049 PF07221 GlcNAc_2-epim N-acylglucosamine 2-epimerase (GlcNAc 2-epimerase) Vella Briffa B anon Pfam-B_17012 (release 10.0) Family This family contains a number of eukaryotic and bacterial N-acylglucosamine 2-epimerase (GlcNAc 2-epimerase) enzymes (EC:5.3.1.8) approximately 500 residues long. This converts N-acyl-D-glucosamine to N-acyl-D-mannosamine. 19.90 9.10 19.90 9.10 19.80 9.00 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.40 0.70 -5.43 20 1825 2012-10-03 02:33:51 2003-09-10 16:14:30 6 21 1288 23 552 1700 239 297.20 21 69.11 CHANGED Gh.Fp...p.LctcGpshsssc..tchhspsRplasFAhAtt.h.G.hsG.hh.s.h...ssHGlcaLppshR..DsppG...Gaahsl..s.ssu.s.hDssccsYsaA...FllhAhus.A...htuGts...cA..cshh...s....cshsll......-p+hhctctsh.ht-phstsashh...........G....pNsp..MHhhEAh.LAhh-sos..-tp..aL........spAtplschhhpchhs.....tsshhlhEaFchpWss..shst-...t.hcshhhpPGHthEWuhLl....lp....l.t......ttptsschl.spAcpLattulspuhshctu.GlsYh.hshpupsss....ps+h..hWspsEsltAA...stLtpt.oG-..pc...YhphhpphhshhtpahhD..t.hGtWacplstcsp..lspsl.uuposhYHhhsA ..................................................................................................................h.at.h.hs.t.p.G.p..h.t...t....thhhpsR..............lasau...huht.....h........s......p...st....hh...p..h...............scp.uh.c....hh.p.t.t.h.t.....Dt..phG..............G..a..a.htl..............s.stt...s......hc...t.......p.....+..p......hY.....sps..............F.....s...lh..uhup...s..........h.t.s.G.p.s....ps..p..ph..h...p...pshphl...................pchh.hs...t.t...t....s.h...hh.pp.h.s.t.shp..............................ps........................tN.s.t.....M.Hhh..E.uh...l.....h....h.h.c....s..pt.............-tt.......ah............................pph...pl....hph...hh....p.phhp...........ttp.h...h.l..Ehh....t..........ps...thp.............................s..h............hp..P........G.H.h...E...huhLh..........hp....ht.....................t.tt.....thh....tt....A.......t.l.......h....t...h...s..h...p...t...u..h..........s....p........t...u...Gh....h....hh....h..s.h...p...s.p..shs...................pp+h....hWs.s....Esh..hsh.............h.h..h...h....ph........o...uc........tp.............h..ph.hpph...hp.a.hh.p.ah..h...........D....p............G.t.W.a.t...l...stpsp...st.t...t.........s............................................................................................................................... 0 172 349 448 +7050 PF07222 PBP_sp32 Proacrosin binding protein sp32 Moxon SJ anon Pfam-B_17278 (release 10.0) Family This family consists of several mammalian specific proacrosin binding protein sp32 sequences. sp32 is a sperm specific protein which is known to bind with with 55- and 53-kDa proacrosins and the 49-kDa acrosin intermediate. The exact function of sp32 is unclear, it is thought however that the binding of sp32 to proacrosin may be involved in packaging the acrosin zymogen into the acrosomal matrix [1]. 20.70 20.70 98.50 21.10 18.80 19.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.79 0.70 -5.19 3 56 2009-01-15 18:05:59 2003-09-10 16:20:49 7 3 28 0 25 44 0 194.90 54 48.61 CHANGED Mh+sAAGFLLMLLcVLLLPLoPsoAE-STPASTPGSPLSsTEYERFFALLTPTWKAETTCRLRATHGCRNPTLVQLDQYENHGLVPDGAVCSNLPYASWFESFCQFA+YRCSNHVYYAKRVhCSQPVSILSPNTLKElEuSAEVsPTTMToPIusHsTlTE+QoFQPWPERLsNNVEELLQSSLSLGGp-Qssu...t+PKQEQhlp++QE.lQEHKpEEuQcQEEQEEEE..cEEEAKQEEGQGT-- .........................................p.Asu.l..LLcVLLLshs.PssAp-o...u.sTPGSPLSsoEYERFFuLLTPTWKAETTCRLRATHGCRNPTLVQLDQYENHGLVPDGAVCSsLPYASWFESFCQFsQYRCSNHlYYAKR..VhCSQ..PlSlLSPsoLKE....l-sssEVs.soo.hT.sP.hss+hpsTE.pQs.psWs-RLpsNlc...cLLpuuhSLsGpcp..t..........tpc.....+tQt.http..pttp..p-Qtc............................................................. 0 1 1 11 +7051 PF07223 DUF1421 Protein of unknown function (DUF1421) Vella Briffa B anon Pfam-B_17006 (release 10.0) Family This family represents a conserved region approximately 350 residues long within a number of plant proteins of unknown function. 29.00 29.00 29.30 29.30 26.90 28.40 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.77 0.70 -4.67 9 104 2009-12-15 14:25:35 2003-09-10 16:29:41 6 4 24 0 56 102 0 285.30 31 67.71 CHANGED L+EVQsuVQlL+DKQEIsETQhpLuKLQls+tp..ppscspu.ps-sptpsssP..ht.p..t.hP..phshAhP......u.........lPss.us.Ps.p.psQ..........Psp.Qh..........................................................P.phPsp.lp.....s.P...........ttpPaasPP...u.QsptsPsQQ.....hp..PP.........................Q.tPsPtsPppppY....QsPPQhsQhpQ..............sPsPQ...hP.sssh.Pc....PY....tuhPPs...Psshs.p.P.usssP...hasss...sthYsssuuhPso.s..pt..PPP..t...............YshSGsPsp.u........usssatPptsup...utusuY...PphssAp....LPQuhshuSuP.shtt.....SspShpptPlsDll-KVsoMGFsRDQVcuslp+lTEsGQsVDhNslL ............lpEVptuVQlLpDKQElsEsphpLuKLQhsptp...pptp..ts.......h.p..ss.s..............h.........s.h.s...........s..............ss.........sp...................................t.p...........................................................s.p.s........................................hhs.s....t..p.......ssQQ......p...s..............t..t.s.P....s..pph...........Q..s..sQ.......Q......................pss...p...h...ss..............P.......Pa......s.sss......s.s.........s.s...s.....h..t.s.........h.ts.....sh.................sst....t................s........................a..su....s......s....s.......................hts.t.st.....shtss.Y....sp...ssAp....hPpt.....ssss.................ustshpphshs-hl-KlssMGas+-pV.puslp+hsEsGQslDhNslL.......... 0 8 34 46 +7052 PF07224 Chlorophyllase Chlorophyllase Moxon SJ anon Pfam-B_17130 (release 10.0) Family This family consists of several plant specific Chlorophyllase proteins (EC:3.1.1.14). Chlorophyllase (Chlase) is the first enzyme involved in chlorophyll (Chl) degradation and catalyses the hydrolysis of ester bond to yield chlorophyllide and phytol [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.96 0.70 -5.63 4 48 2012-10-03 11:45:05 2003-09-10 16:40:43 6 3 28 0 29 968 169 233.40 35 75.56 CHANGED sSssohssFpcGpapssplsl-............ssp.ssssPPKPLlIsoPsEtGsYPVlLFlHGhhLpNp.YSplhsHIASHGFIVVAPQLaplhP...PuspsElcsAApVhsWhs.sLpthLP...ssVpuNlphhAlsGHSRGGKTAFAlAL...Gau.....plpFSAlIGlDPVAGTuKsppTsPplLTYcPsSFsLs.IPVsVIGoGLG..thsshhssCAPsslNHcEFapECK.uspuHFVAsDYGHMDMLDDsh.uhhuh.huthhCKNGpc.+ssMRRhlGGIVVAFLpsahcsDsp-hhtIlts.ShuPhpL..usEhcsssshh .................................................................h......ssFp.G.ht..sp.hplt.............................ss...sP+......sL..h.l...h..sP...s..........t..G...s..a..PV.llFhH......G......h....h.....l.......h....N.....p......a.....Y...s.....p....l...ht....H....l...u.S.......H.G...aI.l..l...A........P.........Q........l....h........p....h....hs..................s...s..s................p.......-.....l.....c.........t........s...u..............p...........l......h...........s..........W......l.....s.....t......s..........L....t...t........h..L.P..........................t...V..p....s......s........l...p..+........l...slsGHS+GG.+.sA.F..A.lA...L.......shu....................plp.h...u...s..L.l.G.lD..P....V....sG.....hs.K.....t...p.h......P.l.......L.s....h...sp.SF..sh..s....hP........s.hV..I..GoG..L..G..........h....s.s.......CAP...tthNH..tpFa.cCp....st.a...Fl.stcYGHhDhLDD.....s.....h.u.....hhC.p..p.u.p..t...+p.MRchsuGlhVuFLps.h..sp....t.th..Ih....t........................th..................................................................................................................................................................... 0 4 20 26 +7053 PF07225 NDUF_B4 NDUFB4; NADH-ubiquinone oxidoreductase B15 subunit (NDUFB4) Moxon SJ anon Pfam-B_17132 (release 10.0) Family This family consists of several NADH-ubiquinone oxidoreductase B15 subunit proteins (EC:1.6.5.3). 20.70 20.70 20.90 21.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.49 0.71 -4.32 10 149 2009-01-15 18:05:59 2003-09-10 16:45:59 7 1 114 0 79 140 1 106.10 34 93.10 CHANGED cYKsuPLuoLPpTLDPsEY.slSPEpRRApsERLAlRApLKRcYLLQhNDP+....R+ulIED.......PALsRWsaARshNlYPsFRPTPKTSLLGllaGhGPLlFWYaVFKTDRD++-chIpcG+L....DRsFslSh ...................................a.thS.-tpch..pRhthRspL+ppYhhphssPp.....ct..ull..D.......PALhRa.aAps.h.shY.pFRPTP...Ksu.hlG.h.h.h.uls.Pl..hhhhhshKo-RDtpcp.hppGph...c........................................................... 0 20 32 55 +7054 PF07226 DUF1422 Protein of unknown function (DUF1422) Moxon SJ anon Pfam-B_17087 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 120 residues in length. The function of this family is unknown. 20.80 20.80 20.80 23.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.37 0.71 -4.36 14 660 2009-01-15 18:05:59 2003-09-10 16:54:00 6 2 654 0 56 195 3 115.90 70 97.14 CHANGED Mspps.....ppE+tTLlLALlAGLshNushusLhoS.VsFSlFPlIALVLulYsLaQcYLppshspshPhlusAsFhlGlhhYSAllRApYPplGSNFhPhllulsLlFWIGhKLthhpppt .........M..KQu.sQD+GTLLLALlAGLSINGTFA...ALFSSIV.PFSVFPlISLVLTVYCLHQRYLNRTMPVGLPGLAAACFILGVLLYSTVVRAEY.PDIGSNFFPAVLSVIhVFWIGtKhRNRKQ.-s................ 0 3 15 34 +7055 PF07227 DUF1423 Protein of unknown function (DUF1423) Vella Briffa B anon Pfam-B_17028 (release 10.0) Family This family represents a conserved region approximately 500 residues long within a number of Arabidopsis thaliana proteins of unknown function. 25.20 25.20 25.40 25.40 24.70 25.10 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.80 0.70 -5.82 10 189 2009-01-15 18:05:59 2003-09-10 16:56:15 6 7 34 0 96 177 0 301.30 29 51.54 CHANGED AREpV-llA-+M+chP-EaLEcLKpsLRuILEG..sup++-EhhhLQ+hVQoRSDLTscTLs+AHRVQLEILVAI+TGIQsFLHsslolSQssLlEIFLYKRCRNIACpStLPAD-CcCcICoN+cGFCsLCMClICsKFDFuVNTCRWIGCDlCSHWTHTDCAIR-u.IssGsSsK.uusGsuEMhF+CpACs+TSELhGWVKDVFQpCAPsWctEoLl+ELDaVs+IF+uSEDsRG+pLFhKs-ELl-KLKuplA-s.sAs+hILtFFQEl-.Duu+ShEss.-suRhI..APQ-As..N+IApVVpEulcpMphVu-EKhRhhKKARhul-sC-+ELc-KA+clupLchERp+KK.QlDELEoIVRLKQAEA-MFQLKAsEAR+EAERLpRIslAKo-KsEEEYASpYLKh+LsEAEAEKpaLaEKIKhQpcSp+s.pu.....GcssphhMhSKI+-LL .........................................................h......................................................................................................C+Nh..sCps..l.sspth.Cp.lCsppsuFCptChCslC.p+.F.D.sh.s.....ssp.WlsC........D........h......CuHhsHs-CAl+cthht.G.shp....ushu......h-h.FpChuCs+so-l...h.G.aVKclhphs.t.shph-sLh+cLphsp+lhpuScc.+...u+pLh.hscphht+Lc.st............................................................................................................................................................................................................................................................................................................................................................................................................................ 0 14 63 81 +7056 PF07228 SpoIIE Stage II sporulation protein E (SpoIIE) Vella Briffa B anon Pfam-B_17063 (release 10.0) Family This family contains a number of bacterial stage II sporulation E proteins (EC:3.1.3.16). These are required for formation of a normal polar septum during sporulation. The N-terminal region is hydrophobic and is expected to contain up to 12 membrane-spanning segments [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.18 0.71 -4.60 173 6344 2012-10-03 01:39:20 2003-09-11 09:29:08 7 400 2137 23 2480 8434 1928 192.30 20 35.38 CHANGED sssphslsluDVhG....+GlsA..Ahhhshlpssl+shs.tpshs..............................Pspllpclsphl.tphpss........................hasTshhsh........h-.tstplphssAGHss.sllhps.ts........sshhhh.tssGhs..lGl.ss..................hshpstphpLtsGshLllaTDGlh..........Est.....shssth....phl..phhtt............spphsctlhpplhp.....................hs..phpDDhsllll+hp .................................................................................................................................................................t..sphtlsluD.sh....G.........+G...l...s..A....A..h.h.h.....s..h...l.....p........s....h.....h.....c...s..h....s...p..p..s..hs...............................................................................ssp.ll...p.p....l.s.......ph.l.......t.p.hpps..................................................................hh..s.T...h..hhsh......................h-........p....s........t.......p........l....p..h.....s..s.AG.H..ss..s.h.l..h.cs...t............................ps.h.hh.h....t.s....s..u...hs...........l.G..l.....ss.....................................................hph..p..p..t...p....h...p....l...p..s...G....-.......h.l..llhTDGlh.........................................Esh.................s.ptp.h....h..t...p..phh..................phltt...............t.s......spp..h..hpt.l.h.ptltp.........................................htt.t...t.hpDDhsllhhch.t..................................................................................................................... 0 977 1887 2293 +7057 PF07229 VirE2 VirE2 Moxon SJ anon Pfam-B_17380 (release 10.0) Family This family consists of several VirE2 proteins which seem to be specific to Agrobacterium tumefaciens and Rhizobium etli. VirE2 is known to interact, via its C terminus, with VirD4. Agrobacterium tumefaciens transfers oncogenic DNA and effector proteins to plant cells during the course of infection. Substrate translocation across the bacterial cell envelope is mediated by a type IV secretion (TFS) system composed of the VirB proteins, as well as VirD4, a member of a large family of inner membrane proteins implicated in the coupling of DNA transfer intermediates to the secretion machine. VirE2 is therefore thought to be a protein substrate of a type IV secretion system which is recruited to a member of the coupling protein superfamily [1]. 25.00 25.00 35.30 34.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.89 0.70 -6.11 4 10 2009-01-15 18:05:59 2003-09-11 09:47:11 7 1 7 1 4 10 0 533.60 58 97.82 CHANGED MDPpupssutN.s.ph.tshtsts.ss.Ktp++cslsSoohs-IpMTsupEThE....GoPs+T-sluspLDt-.lDSpSStsSuSscGNQuElppELSuLFupMs.LPGpDRRsDEYILVRQTGpDtFsGTsKGsL-HLPTKAEFNAuCRLYRDGAGNYYPPPLAF-RIsVPspLpspht.hEs+-psK.pFQYKL-VWN+AHAEMGITGTcIFYQTDKNIKLD+NYKLRPEDRYlQT.+YGRRElQKRYpHpFQAGSLLPDIhIKTPcNDlHFVYR.AGDpYANKpFpEFE+hIKp+YGS-TEIKLKSKSGIMHDSKYLESWERGSADIRFAEFAGENRAHN+phPtATVNMGpQPDGQGGhTRDRaVSV-aLhQshPNSPWuQALK+GELWDRVQlLARDGNRYhoPpRLEYSDPEHFsplMsRVGLPtSMGRQSaAsSlKFEpFssQAAVIVAsGspLRDI+DLSsEcL.QQloppcVLlADRNEpGQRTGTYTSlsEYcRLhh+LPsDAApLLuEPsD+YSRDFVRP-Psh.PIsDSR..RsYEsRsRuQoVNuL ...............................................................................................MDPpu.ssucNls.stttshpsss.scsKRpK+cslsSoThsDIpMTsupEThE...pGSPT+TEslus+LDts.lDSpSStsSuSsHGNQuElppELSuLFuNMu.LPGpDRRsDEYILVRQTGQDtFsGTsKGNL-HLPTKAEFNAuCRLYRDGAGNYYPPPLAF-+IsVPtQLpspht.hEsKEpsK.RFQYKL-VWN+AHAEMGITGTcIFYQTDKNIKLDRNYKLRPEDRYlQTEKYGRREIQKRYcHpFQAGSLLPDILIKTPcNDIHFVYRFAGDsYANKpFpEFE+sIKp+YGS-TEIKLKSKSGIMHDSKYLESWE.RGSADIRFAEFAGENRAHN+QFPsATVNMGpQPDGQGGhTRDRHVSVDaLhQsAPNSPWuQALK+GELWD....RVQlLARDGNRYhSPsRLEYSDPEHFTpLMNRVGLPsSMGRQSHAsSlKFEpFDuQAAVIVAsGPsLRDI+DLSsEKL.QQLop....KDVLlADRNEKGQRTGTYTSVAEYERLhh+LPsDAApLLuEPoDKYSRDFVRPEPAh.PISDSR..RsYESRPRuQoVNSL........................................................................ 0 1 3 3 +7058 PF07230 Peptidase_S80 Phage_T4_Gp20; Bacteriophage T4-like capsid assembly protein (Gp20) Moxon SJ anon Pfam-B_17388 (release 10.0) Family This family consists of several bacteriophage T4-like capsid assembly (or portal) proteins. The exact mechanism by which the double-stranded (ds) DNA bacteriophages incorporate the portal protein at a unique vertex of the icosahedral capsid is unknown. In phage T4, there is evidence that this vertex, constituted by 12 subunits of gp20, acts as an initiator for the assembly of the major capsid protein and the scaffolding proteins into a prolate icosahedron of precise dimensions. The regulation of portal protein gene expression is an important regulator of prohead assembly in bacteriophage T4 [1]. This family represents the protease responsible for the proteolysis of head proteins, a critical step in the morphogenesis of many tailed phages, Cleavage facilitates the conversion of the prohead to the mature capsid. All these cleavages are carried out by action at consensus S/A/G-X-E recognition sequences at 39 cleavage sites. Evidence of multiple processing sites in nine phiKZ proteins appears to represent a built-in mechanism by which the phage ensures that the majority of the propeptide regions are removed, and emphasizes the essential nature of processing in phiKZ-head morphogenesis [2]. The family is classified by MEROPS as a serine peptidase. 25.00 25.00 49.00 48.90 18.50 17.60 hmmbuild -o /dev/null HMM SEED 501 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.58 0.70 -6.18 11 1179 2009-01-15 18:05:59 2003-09-11 09:57:38 6 2 283 0 0 965 2471 203.10 65 98.50 CHANGED M........lcLFuhhhc.-p.phpcchpscstShssPcssDGAp-lps......uuasuhh.ph.Gs...thpspp-LIppYRplstpPEVDsAls-IVNEAIV.-.tsccsVpl-L.ssochSpslKc+IpEEFccll.clLsF-++upchFR+WYVDuRIaaHKlIDhcpPpcGItELRhlDPpplchlREhhpp..s..sGsplh+uhc.............EaalYssps.phs...hsuth.tuss.tl..KIs+DAIsYuHSGLhD..tscphlluYLH+AIKPsNQL+MlEDAhVIYRloRAPERRlFYIDVGNhPppKApQYlpsVMpphKN+lVYDusTGclKNpppthSMhEDYWL.RR-GtpsTEVoTLPGuQshG-h-DVcYFp+KLYcALplPhSRhss-s..uGhslGt.usEITRDEL+FoKFltpLpp+FpplFpD.LKTpLILKsIIT.-EW--.t-pIphsFppDsYFsElK-hEILppRlNhlspl-P..alGKYaSp-Ylh+cIL+hTD--Icp.ccQIEpE ..........................................................................................................................................................................................................................................................................................................................................................................................................................NLPKsKAEQYLRDVMsRYRNKLVYDAsTGE...lR.DD+KaMoMLEDFWLPRR.EGGRGTEIoTLPGGQNLGElsDl-YFpKKLY+uLNVP.SRl-s..-s...G..F.N..L.GR.S..oEIhRDElKFsKFVuRLRKRFSplFsDhLKTQLlLKsllT.sEDW-p.M.p.-.HIQYDFlhDNaFsELKEsElhsERLshlst......................................................... 0 0 0 0 +7059 PF07231 Hs1pro-1_N Nematode_res_N; Hs1pro-1 N-terminus Vella Briffa B anon Pfam-B_17124 (release 10.0) Family This family represents the N-terminus (approximately 180 residues) of plant Hs1pro-1, which is believed to confer resistance to nematodes [1]. 25.00 25.00 105.10 104.40 22.10 21.20 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.27 0.71 -4.69 5 36 2009-01-15 18:05:59 2003-09-11 10:30:34 7 2 19 0 22 37 0 176.50 48 40.19 CHANGED MVDLDWKpKMVsS..DhPsKSPKLShpc.....slSlPp.hplPslSo-ISsAAPulCSAYEhYLRLPELRpLWSSp-FPpWsNEPILKPALQALEITFRFISsVLSDsRPYINRREWNRRLESLsTcQIEIIAsLCEDEEp...hRGouPsAsLSuG......................s.o.sppaSEhSLLPRLATWpKSE-VAQRILhoVE ..........................................................................hss.sl.p..ssssshstAYE.YLRLPELppLWpupsFPsWssEsllKPALQALEITFRhlSsVLSDPRPYhsRREWsRRLESLAspQlEllAhLCEs--p.....ttpAPhAcLSuu......................s.shspphSEsSLLPRLAoWpKScslAp+IhauIE. 0 8 16 19 +7060 PF07232 DUF1424 Putative rep protein (DUF1424) Moxon SJ, Bateman A anon Pfam-B_17284 (release 10.0) Family This family consists of several archaeal proteins of around 320 residues in length. Members of this family seem to be found exclusively in Halobacterium and Haloferax species. The function of this family is unknown. This protein is probably a rep protein due to conservation of functional motifs. 20.20 20.20 23.00 22.10 17.70 16.90 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.03 0.70 -5.68 2 13 2012-10-02 18:54:06 2003-09-11 10:49:46 6 1 10 0 1 12 0 291.40 36 69.10 CHANGED MApps....G.+LRcElThDTSRuV+AsShupAl-pFpuWYsDQRsTQhVVEpt.hGEpVGFchPNRFTPEYREMLYAKAQSLERGLREcWGsLLHTuMVTLTASoT---Gt.RP.l-HhcDLLpSWpAVhcALt+sLEsREaEYLAIlEPpp......uGYsHIHLGVFV+GPVVAEQFpsVLDAHl+Ns.sAGc-AHp.Vh--stDEsAVplRRSu+PsRpsGlENLGAYLAAYMAGEYGsEs.tMPtpVRAFYAsMWAoGpQWFRPSNGAQchMQPEp.D-t-SlEEWEMVGIAPEGDLtD.IIEVDPppPRsDPYRRLRTPPPGG ............................................................t.+AsoWucAlspFpsaasDtRssphshEsp..G....-psuhphP..sRFsP-YpchhYAKupuLER....GL....R..cc.......aG....phL......HTuMVTLTASSssps..G+.tPPl-H.......hc-Llc..SWcA......VR+AL.tRV......L-......u+......EW-YhsIhEPHp................uGYsHlHlGVaVcGs.....V.....VAEpFpPVlDuHVcNC.sAGc-AHp.h..h--..sts-csVpV+Rsup.sts..sGVE.......NLGuYLAAYMAG.YssEshEhPhphpAahAshWAouRQhhc.SsuApEhMps-.............................................................t.............. 0 0 0 1 +7061 PF07233 DUF1425 Protein of unknown function (DUF1425) Moxon SJ anon Pfam-B_17314 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 125 residues in length. Several members of this family are described as putative lipoproteins and are often known as YcfL. The function of this family is unknown. 25.70 25.70 25.70 26.80 24.40 25.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.96 0.72 -4.42 37 877 2009-01-15 18:05:59 2003-09-11 10:56:45 7 1 872 2 101 375 20 92.40 42 74.46 CHANGED lhhssssLusplslsslpssps.sshhpussplpsphpsshplpYRFhWYDspGhplpsp.ssW+slhlpGppphplpulAssspAsca+lhl+p .....................l.Vh-solLuAGlosEpPslo.sS..DhpssAoupL.h...NcppcPls.VpYRFYWYDscGL....E..h....+...P.L........E.t...sR.o..lsIPAcssVoLhusAshhsA+csRLYLa.L. 0 11 35 69 +7062 PF07234 DUF1426 Protein of unknown function (DUF1426) Moxon SJ anon Pfam-B_17431 (release 10.0) Family This family consists of several Banana bunchy top virus proteins of around 120 residues in length. Swiss:Q9IGU4 is annotated a movement protein whereas most other family members are hypothetical. The function of this family is unknown. 25.00 25.00 189.00 188.90 20.20 19.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.56 0.71 -4.36 3 31 2009-01-15 18:05:59 2003-09-11 11:09:30 6 1 3 0 0 31 0 116.50 82 98.88 CHANGED MALTTERVKLFFEWFLFFGAIFIAITILYILLVLLFEVPKYIKpLVRYLVEYLTRRRVWMQRTQLTEATGDuElsRulVEDRRDQQPuVlPasP+VIPP.QsR...R-DQAhRusAGPMF MALTTERVKLFFEWFLFhuAIFIAITILYILLVLLFEVPKYIKpLVRYLVEYLTRRRVWMQRTQLoEATGDVElGRGIVEDRRDQQPAVIPpuSQV.PS.QsR...RDDQGRRGNsGPMF..... 0 0 0 0 +7063 PF07235 DUF1427 Protein of unknown function (DUF1427) Moxon SJ anon Pfam-B_17474 (release 10.0) Family This family consists of several bacterial proteins of around 100 residues in length. The function of this family is unknown. 21.10 21.10 21.10 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.17 0.72 -3.91 32 341 2009-01-15 18:05:59 2003-09-11 11:15:13 6 2 240 0 124 290 13 72.20 43 92.05 CHANGED KsYllSLuAGlLVGllYuLlsVRSPAPPlIALVGLLGhLsGEQllPls+phlsutthtst.httth..ph........ttttt.............ttppp.s .....h.YllSLusGlLVGllYullpV+SPAPPll.A.LlGLLGhLlGEQllPhs+phhtt....................................t........................... 0 19 46 82 +7064 PF07236 Phytoreo_S7 Phytoreovirus S7 protein Moxon SJ anon Pfam-B_17475 (release 10.0) Family This family consists of several Phytoreovirus S7 proteins which are thought to be viral core proteins [1]. 19.80 19.80 20.30 20.80 17.10 19.70 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.56 0.70 -6.18 4 20 2009-01-15 18:05:59 2003-09-11 11:20:55 6 2 11 1 0 21 3 470.80 42 87.70 CHANGED hsAIVCluLLoE+sVLoRsLsDpsKcFYpulpGphh.tscsl..Tc+huppSl.lR+lVPosTlILDaKsEsFI+pN.shSL.DIsGSPSNTAPKTsFpSIMPSLSsLFssPFlQGAFRHslISoh.GptTsLLILVVGPPSGF.cp.sVuSuuSsV-ssoNAcIDLsclVulNosMhppTpLVSAuulpAMulsDVhl+CsSLDplLlshAlKYF+sYVsshpsuuhspATpIhLNosFcELFslpsscpthlts-shtsspsphRGlVLPhGHGp..ssLpp+HP-lFI-.-ulFoc-E+ucLsch+hp..-sDsaEE...hFsphVpKalppGcYGN+VIluH+sspLsssGlpIlGhaplss.pslcpplp...ShKsclDhs+pNWctlpspshVsssTlppLHctIL.Dhhssospull.ss...uscspEtIslpFhNGFPpcKaullpLEKsGIpVVu..phsDss.lVlsNsPshVSRutKuptp......psRu+hDAtplslDTDclSpsphIphl+oh .........MoAIVsVsLLSEpsVLsRsLsDhVKshYptlpGssssst-sI..pt+hospol.lp+hlPhsTVhl-ahDpcaIptNPshShM-IlGSsuNTAPKTTFQSlhPSLSALFGVsFIQGAFhHpVISop.GsphShLlLVlGPPusFh+psSVuuuSSlVpV-SsucIDLsDsVtINuhMlpsTKLVSASulpAhuls-V..KCsSLDshlIhpAlpYF+pYs.ttphGohDsATplhLNhPhcElFSsc..oupuc.Lpsc..sa+-.scupltGlVLPsGHGK..oTLsScYP-lFlplpt.hsc-cppth.....s...suDshEp...happhIp.pplscGcYus+V.lLucssccL.ssGl+llGsa+lss...spVccpls...shKschDphcpsWcshSG.shVoscslshLHDKlL.DllospsKulhhscp..........ph+scppIslhFKNGFP.t+aShspLpKpGIsVVp..-Ls-ss.lllsNtPppssRps.tpsK......sphpphcus.+VslDTDslop-phIsKlKs.L........................................ 2 0 0 0 +7065 PF07237 DUF1428 Protein of unknown function (DUF1428) Moxon SJ anon Pfam-B_17402 (release 10.0) Family This family consists of several hypothetical bacterial and one archaeal sequence of around 120 residues in length. The function of this family is unknown. 21.00 21.00 21.20 21.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.23 0.72 -4.12 72 547 2009-01-15 18:05:59 2003-09-11 11:24:00 6 3 516 2 124 281 36 102.90 66 86.44 CHANGED sYlDGFVlAVPsuN+-sYpchAppAutla+.EaGAlchVEsWGD................DVPpG.........clTsF.pAVpscssEsVVFSWlhWPsKtsRDsuhp+hM....sDPRhps..s..sMP.FDGK ...pYVDGFVVAVPA-KKDAYREMAuKA.APLFK.EFGAL..RlV..ECWAs................DVPDG.........KVT.DFRMAVKAEEsEpVVFSWIEYPSKEVRDAANpK....MM....u.DPRMKE..hG-..sMP..FDGK............. 0 37 76 97 +7066 PF07238 PilZ PilZ domain Bateman A, Galperin M anon Pfam-B_17421 (release 10.0) Domain PilZ is a c-di-GMP binding domain [3] which is found C terminal to Pfam:PF07317. Proteins which contain PilZ are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias [5]. This domain forms a beta barrel structure. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.29 0.72 -4.00 191 5848 2009-01-15 18:05:59 2003-09-11 11:30:13 9 113 1860 15 1946 4774 350 102.40 16 32.82 CHANGED ppRpt..........Rhphphss..................hhttsspth.........p.....sthhDlShuGh..tlph.....tp..thphsp...........l........plph.thsstth................................htupVhphptp..............thGl...pF.......ths.pptppltphlht ................................................................hRp........R.ht.ht.h.s......................................hhht.s.s..phh..............................p..........spl....hDlS.tuGh..tlth....sp..........thphGp..............tl............plpl....phsptsh.............................................h.........htu.pl..h...phptp........................tlGl..pF.......ths...t..t..l.th...h.............................................................. 0 680 1242 1622 +7067 PF07239 OpcA Outer membrane protein OpcA Moxon SJ anon Pfam-B_17433 (release 10.0) Family This family consists of several Neisseria species specific OpcA outer membrane proteins. Opc (formerly called 5C) is one of the major outer membrane proteins and has been shown to play an important role in meningococcal adhesion and invasion of both epithelial and endothelial cells [1]. 25.00 25.00 25.20 25.00 23.90 24.90 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.73 0.70 -5.46 2 56 2012-10-03 17:14:37 2003-09-11 11:44:44 6 1 36 2 2 42 0 202.30 53 89.45 CHANGED QELQTANEFTVHTDLSSISSTRAFLKEKHKAAKHISVRADIPFDANQGIRLEAGFGRoKKNIINLETDENKLGKTKNVKLPTGVPENRIDLYTGYTYTQTLSDSLNFRVGAGLGhpSSKDSIKTTKHTLHSSRQSWLAKVHADLLSQLGNGWYINPWSEVKFDLNSRYKLNTGVTNLKKDINQKTNGWGFGLGANIGKKLGESASIEAGPFYKQRTYKESGEFSVTTKSGDVSLTIPKTSIREY .....................................................sEaTV+TDlSp.o.ppA.LKEKHKstKpIuhRADhPFD.hppGhRhEsuauRsKKshhslpp.spp.hG........psV.EpRhDlYsGYTYTQsLS-uhphRsGhGLGaEp.KDu.ltspKtT.l.....popRpuahsKsHADLhSpLGsGWYlNPWuEVKhDLsu+hKhNssVsslppDINtK.TpGWGhGlGANIGKpLs-SsuIEAGPFYKpRsaKcSGEF.slst................................. 0 1 1 1 +7068 PF07240 Turandot Stress-inducible humoral factor Turandot Moxon SJ anon Pfam-B_17438 (release 10.0) Family This family consists of several Drosophila species specific Turandot proteins. The Turandot A (TotA) gene encodes a humoral factor, which is secreted from the fat body and accumulates in the body fluids. TotA is strongly induced upon bacterial challenge, as well as by other types of stress such as high temperature, mechanical pressure, dehydration, UV irradiation, and oxidative agents. It is also up-regulated during metamorphosis and at high age. Flies that over-express TotA show prolonged survival and retain normal activity at otherwise lethal temperatures. Although TotA is only induced by severe stress, it responds to a much wider range of stimuli than heat shock genes such as hsp70 or immune genes such as Cecropin A1 [1]. 20.70 20.70 24.50 52.70 20.60 17.90 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.88 0.72 -4.49 9 49 2009-01-15 18:05:59 2003-09-11 11:50:11 6 1 7 0 12 57 0 82.40 32 60.05 CHANGED susDcsoKh+sh.pLlshYc+assph.LsspcRtphcchlpcacscp....hVDGVPuQGGhhstlhttll..hu.tlssuhhcthhE ...usD-sTKhcsl.cLlshYc+assph.Lssp-+ppl-chlpcaccpp....lVDGVPuQGGhhtplltphls.suptlssshhp..................... 0 8 8 12 +7070 PF07242 DUF1430 Protein of unknown function (DUF1430) Vella Briffa B anon Pfam-B_17033 (release 10.0) Family This family represents the C-terminus (approximately 120 residues) of a number of hypothetical bacterial proteins of unknown function. These are possibly membrane proteins involved in immunity. 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.35 0.72 -3.94 19 961 2012-10-03 05:18:07 2003-09-11 13:29:22 6 2 507 0 38 409 3 99.70 36 16.07 CHANGED Ilhlhhs..lhlhhphshhYFcpa++clhIK+laGhshhcpappalhhplhshlhshshhhlhpp..s.llhhhslllhlhphllhlhhp.+hpp+thhpll.KG .................Ilsluou..ILLF.ohNhLYFEpFRRplhIKRluGhphhEhHtp..YL...luQhulh.lh.Ghllohllp+......-hh.luhLslhlFhhpuLLpLhhQ.pKEp+hshtllKG................. 0 13 21 31 +7071 PF07243 Phlebovirus_G1 Phlebovirus glycoprotein G1 Moxon SJ anon Pfam-B_17508 (release 10.0) Family This family consists of several Phlebovirus glycoprotein G1 sequences. Members of the Bunyaviridae family acquire an envelope by budding through the lipid bilayer of the Golgi complex. The budding compartment is thought to be determined by the accumulation of the two heterodimeric membrane glycoproteins G1 and G2 in the Golgi [1]. 25.00 25.00 44.80 44.00 21.40 21.10 hmmbuild -o /dev/null HMM SEED 526 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -13.14 0.70 -6.35 4 348 2009-01-15 18:05:59 2003-09-11 13:41:35 6 4 57 0 0 288 0 338.10 54 53.03 CHANGED sHlpNRPGpGtat..hssp-DusC+.lsYGssCpuFDahLchs+aPFFpSahcH+ohLEAhtDsI.Itppss.oCsl..sot+sstCh+EtthhKt+CPsshsusaYlsspGclssVKCcpNhELoEDCs.CRphptpu......pKh.hPLQDhhCQpsps-hossKphhKGlCcIGlhshKcCc.phoosaEsVsFhlhKs+tKlYl-phcl+sc-slpp-sFlCY....th.ss-ssuss+ttLKpVcVspCKtVssSpsKhCoGDpsFCS+YsCpsphs-saC.hAsGSGslplplsGVWlpPhClGYERVlVcREh.ts..s.pcsCDTCloEChcptllV+STGFcIouAVACSHGuChSspQpPSTplhlPYPGhupSlGGDIGlHhoH-ssssSsHhVsHCPP+DsChsHuCllCsHGllNYQCHTsLSAhllshllh.hlhhhLhllppsLhll+lhPthLlsPlsWlshFhtWhh+ph+h+stssIsRlNccIGWh-puths.pcscchp++..tPhsR.psshLhlL.LlhossS .............................................................................................................................................................................MASVKCP.P.K.YELTEDCNFCRQMTGAS.....LKKGSYPLQDLFCQSSEDDGSKLKTKMKGVCEVGVQALKKCDGQLSTAHEVVPFAVFKNSKKVYLDKLDLKTEENLLPDSFVCFEHKGQYKGT..M.D..S..GQTKRELKSFDISQCPKIGGHGSKKCTGDAAFCSAYECT....AQ.spshC.hhptsu.l.lphsGhhhhPhChGapchhsphp..t...pp.t.CssChhcC..ttlhlhohs.cl..AssCupt.C...................................................................................................................................................................................................................................... 0 0 0 0 +7072 PF07244 Surf_Ag_VNR Surface antigen variable number repeat Yeats C, Fenech M anon Yeats C Family This family is found primarily in bacterial surface antigens, normally as variable number repeats at the N-terminus. The C-terminus of these proteins is normally represented by Pfam:PF01103. The alignment centres on a -GY- or -GF- motif. Some members of this family are found in the mitochondria. It is predicted to have a mixed alpha/beta secondary structure. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.81 0.72 -3.56 395 14038 2012-10-01 23:48:22 2003-09-11 13:48:50 10 36 2604 32 3439 10633 6054 78.00 20 34.08 CHANGED hhlp..plph.p.G.........t..........p.thpsp...tlp....p..h.....p.....h....ptG.............shasppplpp....shppLp.......spGa.....hspV.......ps..psp....pt....tt.......h..slshplpEs ................................................hlpplph..p..G..........s........................p.th.psp........hlp.........p..l....t..............l...........ctG......................................................chasp.pp.lsp...........shc.pLp..........spGa..............h.s.sV..........ps....php.......s..............sp............l..slhhplpEt.................................. 0 1092 2111 2811 +7073 PF07245 Phlebovirus_G2 Phlebovirus glycoprotein G2 Moxon SJ anon Pfam-B_17508 (release 10.0) Family This family consists of several Phlebovirus glycoprotein G2 sequences. Members of the Bunyaviridae family acquire an envelope by budding through the lipid bilayer of the Golgi complex. The budding compartment is thought to be determined by the accumulation of the two heterodimeric membrane glycoproteins G1 and G2 in the Golgi [1]. 25.00 25.00 27.30 25.60 24.10 24.50 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.62 0.70 -6.23 4 259 2009-01-15 18:05:59 2003-09-11 13:53:40 6 21 64 0 61 265 0 373.20 38 33.86 CHANGED CS-pllAsSKlspCsscGSsThCplSGTlhl+AGsIGuEoClhLKGsc-sppKFloIKTISSELsCREGpSaWTo.YsPpCLSSRRCHLVGEChus+C.pW+sspsStEFoGlscsplMpEN+CFEQCGGlGCGCFNVNPSCLaVHuhL+Ss++EAl+VFsClDWsHRlsLElTsasGchphVsLsu.oTpFhsWGSloLuLDuEGIoGTNSaSFlcSuuGuFAllDEsaS..PRcGFLGEIRCsSEuuAloAHcSClhAPsLIcYKPMhDplECTTsLIDPFAlF.RGSLPQTRNGpTFouSh-K+TVQAFTsGtV+AsloLshDsaEVsF.sptssCsAoFlNlTGCYSCstGARVClplpustssshascscDpSlsllhslpssspD.CpVLHFopP.Vc.-hhYSCsup.+shlIKGTLlAhsPaDDR++pGGsSsVVNPKsGsWsh.sWhSGLhsWLGGPLKThLhILGalALuIlhhllllllstpuVsph...slKKKs .........................................................................CSE.l...AsS+lhpCsp.....pGsp..s..pCphoGosll+hGslGuEuCL.h.lKG....p....-s.p.pp..h......lpIK.TlSSE..L..sC+EGpSaWTspasspChSoRRC+hh...G-Chss+C.papsNphS.sEashhtcs.pshttstCh-pCGGhuC.G.CFsssPSCLFh+tahpssp...tphhclapC.sW..s.....phh..l..p..l...T......s.....t...p...ht.......h.....h.....h..s.ssp......hphs.o.l...s.l........s....t.l.....s.s.....ssh.s...hp................t.........th.....ulhsts.s........sh.GhlG.ElpCsocpssh..p..sCh.hs.slhthp..hsth...C...s...phhs..shh...h.s..t..lP.th..h..ss..h.p..attphtppsh...psh.s.st.lpsthpl..h.c.s.hpl.h.t...pCput.h.h.ploGCYSC.tGup...h...phphpu..pt..p..s...h...h..pt.ttph.........s..ssps.........h.h..s..s.....lp.ph.hsCs.t...p..h.lpG.L...h..t................t.....p.................s..t.htth...h..t...........h.h....lh...h.hhhh.................................................................................................................................. 0 3 13 61 +7074 PF07246 Phlebovirus_NSM Phlebovirus nonstructural protein NS-M Moxon SJ anon Pfam-B_17508 (release 10.0) Family This family consists of several Phlebovirus nonstructural NS-M proteins which represent the N-terminal region of the M polyprotein precursor. The function of this family is unknown. 23.90 23.90 24.00 30.70 22.50 23.80 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.72 0.70 -5.23 3 151 2009-01-15 18:05:59 2003-09-11 13:59:48 6 2 22 0 0 160 1 102.90 40 14.73 CHANGED IasLLsVL.TlulLVsuslpLcophSSSRs-TCFSsoTsPEhIEtYWth-StKc-.MPph-L+CRhs.-uDhKhhop.shIStI+EVpTSssEL+hSCGsps+SLGtlITsDGLNN.hhGshIIsCsTs.a.psIssG.t.sRl..DapplKppA...-EK-thIph.+TK-sE-.....D.+sQlhhpEIpQlKNplpK+RN-....lhRGQE+RDAKplscEoMARlusL+pc.h.LT..........DElpphKsphp.hRp+.h..pTVlPA.........LhosALLS..suPlsA ..............................................................................................................................................................................................................................................................................................................................................................................................................................hpshss...................................... 0 0 0 0 +7075 PF07247 AATase Alcohol acetyltransferase Vella Briffa B anon Pfam-B_17349 (release 10.0) Family This family contains a number of alcohol acetyltransferase (EC:2.3.1.84) enzymes approximately 500 residues long found in both bacteria and metazoa. These catalyse the esterification of isoamyl alcohol by acetyl coenzyme A [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.56 0.70 -5.95 9 349 2012-10-02 12:01:53 2003-09-11 14:19:53 7 8 173 0 229 370 1 388.30 14 80.46 CHANGED +hGhlEpYahtcstpphaoNFsVhuphNptls+p...LshALRplll+pshLspslh.th..p..thhsussahspP.........l.cplchsDVl.................catc.s.phsshh.phlsphhhshssss.PhW+Llllsstpst.....ppal.alssHshhDGlSuspFacDLhcpLsphcsssh..................-hlhcY.....scDhs....KLPtPIpptl-h..hsshhslshhhhss.lhpth.hpch..............ppshhsshsssptutpshp.hlphossclppI+spsKpp...csTlTsalpusahluLtch.h.......shhp.sphshchtlPhssR+alP-ctE...papYGshVuuspa..hIpshshppsp.pph.......WsLsphappplspuhpstcphpshGs...ph.llchhNlcchhpsch.ppp..RusshlSNlGhhs......pssptaplpDhhFuQssu.ht.sFsLsVlSTssGGhNhslohhccs.pppsp.......ccFhshhcphl .............................................................................................................................................................................................................h..sht.hh.p....h................................................l....lphpt...hl.....................................t........t............t.........h...p...h...p...t...h.........h.....h.................p....P.W+.lhl..l...pt..............................h.l..hhhpHshhDGhSuhhFacshh.ptLp.t.h.pt............................p......lhp...................ts...................l....P.....s..hp.hhth...........hh........h.............hh...p.....h......h...htp........................................sh...h....tht..h..t......s...p..s.hhh.h..l...pls..s...p..p..hpplhpts+pp....tsTlTuhlpuhhhhultp...........hhtt.tt.h.t...hpht....h.shsh.Rpa..h..sp.t............t...................h.s.s..hl........s..t.....p......h............h......th.p..h..................pth.............................Wphspphp.p.plp..p..t.h.p.p..t.......t....hh..............l.....h.p..h.h..s..h............t.hh..t.ph.....t.p.......ttsh.lSNl.Ghhp.................................ttt.h............ltph.hFsps.......ht...h..hshh.s.h..t.h.h.hs................................h............................................................................................................................................................................ 0 50 118 198 +7076 PF07248 DUF1431 Protein of unknown function (DUF1431) Vella Briffa B anon Pfam-B_17470 (release 10.0) Family This family contains a number of Drosophila melanogaster proteins of unknown function. These contain several conserved cysteine residues. 21.10 21.10 21.20 21.20 20.90 20.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.00 0.71 -11.84 0.71 -4.25 17 156 2009-01-15 18:05:59 2003-09-11 14:24:22 7 2 17 0 90 156 0 139.20 29 58.24 CHANGED Cpps..Cst.h..phDhpaY+PSDKttRcYQpTWsEC...s.hhhK.K+lCt.t.hhsPphtRRptptpstss..t...............s.h.ph....tt.CP+lphP.sC+suRtPPpCchs.Rt.osC.pKhps.YPSFSECp+ttlsphs..PhECpCLctsshCphapth++p ...............................................C.....h..phD.haY+sSDKttRcYQpTWsEC....s.hh..h+.KchCs.tthh....s.h...RRp..t..p.h.tss.....................t....h.........pt.Cs.+.hths.sC+su.+hsspCphh.+t.sp.C.pKhts.aPSFSECp+..h....h...shECtChp.hs.shCphht.ht......................................... 0 15 16 57 +7077 PF07249 Cerato-platanin Cerato-platanin Vella Briffa B anon Pfam-B_17594 (release 10.0) Family This family contains a number of fungal cerato-platanin phytotoxic proteins approximately 150 residues long. Cerato-platanin contains four cysteine residues that form two disulphide bonds [1]. 21.30 21.30 21.50 21.30 21.20 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.53 0.71 -4.10 5 199 2012-10-01 21:39:58 2003-09-11 16:37:23 7 4 96 2 128 194 0 112.80 36 70.21 CHANGED lSVSYDsGYDDAsRSMssVSCSDGsNGLlTKY.WPTQGuVPGFPRIGGlsuIAGWNSPsCGTCaKlTY.NGNTIaVlAIDuAuuGFNIupoAMscLTNGpAspLGRVD.AsYpQVusSsC..Gl ......................................ploYDssYD.su...st.SlssVuCScGsNGL..hs+h..apT.........GplPsF.......PhIGGs....slsGW.N..S.s.s..CG.s..CacloY.....s......G.....c..oIhllAlDp.........u.s.u.....G...FNl...uhsAhstL.TsG.p.....A.....s.....p.....h.....Gp..ls......sshppVssopCu............................ 0 40 73 109 +7078 PF07250 Glyoxal_oxid_N Glyoxal oxidase N-terminus Vella Briffa B anon Pfam-B_17519 (release 10.0) Family This family represents the N-terminus (approximately 300 residues) of a number of plant and fungal glyoxal oxidase enzymes. Glyoxal oxidase catalyses the oxidation of aldehydes to carboxylic acids, coupled with reduction of dioxygen to hydrogen peroxide. It is an essential component of the extracellular lignin degradation pathways of the wood-rot fungus Phanerochaete chrysosporium [1]. 24.30 24.30 24.30 24.30 23.90 24.10 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.93 0.70 -5.30 8 414 2009-09-11 10:08:30 2003-09-11 16:40:41 6 32 162 0 296 444 14 222.20 27 32.88 CHANGED MHhtLl.+ss+VlhaDRTsaGsSpluLPsGt.CR.ssPp.DtstKhDC.oAHSlLaDVsTNslRPLslpTDTWCSSGulpssGoLV.......QTGG.t-...G-+slRhFoPCs....ssoCDWlEhsss..Lus+RWYuTNpILPDG+hIllGGRc..........sFNYEFaPc.sp.sspsssl.FLt-TcD.spENNLYPFVaLLPD...GNLFIFANsRSIlaDacpN..pVVKEaPpIPGG.sR..NYPSoGSSlLLPL.....hpssslssEVLVCGG .............................................................................................................................................hh.u..as........s.s.th.c....s...l.t....l...h....o.ssaCuuG.s.hl.s.s..G.phl............................psG....G.......t.s......................Gt.p.s.l...R.h....hs.P..ss........t.ss..s.WhE.sst............Lpps..RWYsosthLs.D..G.p.l.lllG.....Gpp.................................sss..h.E.hh.Pp........t......sst...s..hth....h..ht...h....pc......st..........tNLYPalalLPs...GplF.....lhus....p....p.uhlh.D.tst....ph.hpp..hP...s...lP.......u......s...s......R...sYP.sGuushLPl.............s..t.....splllCGG............................................................. 0 131 220 266 +7080 PF07252 DUF1433 Protein of unknown function (DUF1433) Vella Briffa B anon Pfam-B_17690 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 100 residues in length. 25.70 25.70 25.90 25.90 25.60 25.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.17 0.72 -3.78 17 1163 2009-01-15 18:05:59 2003-09-11 17:04:15 6 2 257 1 16 477 0 88.50 47 68.38 CHANGED aacpQcpRIslYlKYNlpc...hKSlpFTphcpoPMGs.hIcGYINssKchcFpAhIustcs...pFpsshshstE.Ls+LlK..c..tK...SVsEI..Kc ...................YaccQpcRIsLYhK.a.Nhpc...hKol..+F.T.s.h.c.puPMGu.hsIcGYINcN.K.c.cFsAahsspcN..aQF.ss.sl.h.hocc..LucLlK...cp...hK....Ss--I.Kcc............................... 0 5 6 13 +7081 PF07253 Gypsy Gypsy protein Moxon SJ anon Pfam-B_17444 (release 10.0) Family This family consists of several Gypsy/Env proteins from Drosophila and Ceratitis fruit fly species. Gypsy is an endogenous retrovirus of Drosophila melanogaster. Phylogenetic studies suggest that occasional horizontal transfer events of gypsy occur between Drosophila species. Gypsy possesses infective properties associated with the products of the envelope gene that might be at the origin of these interspecies transfers [1]. This family contains many members with full-length matches; however, it also includes a number of very short sequences and short matches of sequences with other unrelated domains on them, which cannot be excluded. These matches may represent remnants of once-functional genes. 20.30 20.30 20.70 20.50 19.20 20.00 hmmbuild -o /dev/null HMM SEED 472 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.66 0.70 -6.17 4 136 2009-11-18 11:40:33 2003-09-12 09:50:16 6 8 26 0 22 137 1 259.90 27 82.28 CHANGED s+ITDaS+ANYIPVhDGcVLVa-ppshl+HSuNlSEYtshID.ET-plp-SF.PpSHM+KLLpVDssHLRshLslLplHH..RlARSLDFLGTALKVVAGTPDssDh.+I+hTEspLV-uNspQIhINocTQpQIN+LTDTINKllpupKuD.lDTsHLaEsL.sRNRllspEIQNLhLTITLAKhsIVNPsILDHsDLcollcp....sTPI...hpllEsucI+VLQSpNhIHhlItaP+lphpC.pKVslhsVsHpcTlLRLc-sslAECc+-hauVps.CohTs+sTFCcpu.....pp-oCsppLHAGssApCH.sQsSHL+tIp.VD-GlllIN-tsApVpsDsssclhlpGTaLITF.pcSATINtopFhNhcpslsKtPGllpSPlLNI.u+c.lLSlPhLHRhs.pNLhpI+shpc-Vsutt.sphhhshGsllssGlhhuhslhLshRt++uotphp+slcphs.sE-GppLctGlVpN ....................................................................................................................................................................................................................................................................................................................................................................................................................htu..upCp..h..tp....h..lptu.hh.hhst..h.h...tt..p..ltGoallpa.ppph.lstp.a.s...............................sh..lp..p.t................................................................................................................................. 0 0 0 14 +7082 PF07254 DUF1434 Protein of unknown function (DUF1434) Moxon SJ anon Pfam-B_17586 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 135 residues in length. Members of this family all appear to be Enterobacterial proteins. The function of this family is unknown. 20.70 20.70 21.10 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.78 0.71 -4.34 9 557 2009-01-15 18:05:59 2003-09-12 09:52:45 7 2 555 0 49 179 2 124.40 72 96.04 CHANGED MsLWpsDLRVSWRoQhhSLLlHGllshllLLsPWP...u.YsslWLlLLoLVVF-ClRSQRRIpupQGElpLLsDspLpWptpEWpIl+pPWhl+.GhlLpLppsss.p+Rp+LWLAADSMspsEWRcLRpLLL....pp ..................MVLWQSDLRVSWRAQWlSLLIH.G.L.V.AA.V..I..LLMPW.P....LS..YT..PLWM.l....L...LSLVVFD....CVRSQ.R.R....I.N..uRQGEI+LLMD...GRLRW..Q.G...QE.......WoIVK...sPW.hlKSGMhLRLR.s-u.G.........+.........R......Q....HLWLAADSMDEAEWR-LRRILLQQ........ 2 4 14 30 +7083 PF07255 Benyvirus_14KDa Benyvirus 14KDa protein Moxon SJ anon Pfam-B_17595 (release 10.0) Family This family consists of several Benyvirus specific 14KDa proteins of around 125 residues in length. Members of this family contain 9 conserved cysteine residues. The function of this family is unknown. 29.30 29.30 30.10 213.00 27.00 29.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.80 0.71 -4.25 2 7 2009-01-15 18:05:59 2003-09-12 09:58:37 6 1 3 0 0 7 0 122.60 80 96.84 CHANGED MthssSlsVaVtcsIT......psschFSlKhupWpLFTstVaVpYRths-cEpslKDssRLHFphpCVpCspKlphKtpN+sH.calppGalRhsRNFSIlGsCucC...h-Shs.tDEhcpslV MGMVDSLCVFVGRVITEGSESVEGVERFSIKFSEWKLFTTAVYVEYRQLGEKECSLKDVGRLHFNMSCVKCCpKLKCKKQNKNHSKHVQNGYLRKVRNFSILGVCGDC...CESFTLADEKHHVIV 0 0 0 0 +7084 PF07256 DUF1435 Protein of unknown function (DUF1435) Moxon SJ anon Pfam-B_17631 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 80 residues in length. The function of this family is unknown. 22.10 22.10 22.40 24.10 22.00 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.58 0.72 -4.00 7 529 2009-01-15 18:05:59 2003-09-12 10:16:13 7 1 520 0 37 208 0 77.10 68 86.83 CHANGED MLpRsLuSGWGVLLPGsllssLuahsLSh-thRlLIVluhLhTssMLYHppLRHalLLPSClALluGlhLhhhshp.G .....MLQRTLGSG.WGVLLPGlLIsGLhaADLSsDQWRIlIlhGLlLTslMLYHKQLRHYlLLPSCLALIuGlMLhlMNlNQG...... 0 1 9 21 +7086 PF07258 HCaRG HCaRG protein Moxon SJ anon Pfam-B_17801 (release 10.0) Family This family consists of several mammalian HCaRG(hypertension-related, calcium-regulated gene) proteins. HCaRG is negatively regulated by extracellular calcium concentration, and its basal mRNA levels are higher in hypertensive animals. HCaRG is a nuclear protein potentially involved in the control of cell proliferation [1]. 20.70 20.70 20.80 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.74 0.71 -4.73 74 1021 2009-01-15 18:05:59 2003-09-12 10:30:12 9 19 136 1 629 1011 16 157.10 16 82.18 CHANGED lshlsplspp.t.......htplhp....hshptlpttpstp................................htphtp.ht.....hstp..................phpt.lhpulthllppuspt...shsppphpppL.ppht......lsp-.ptpslsphatppppplpptltppsht.hsp.Lpsl..........pWRlclpluSsph..pc...htpPhhhl.pLplpss.......tp.pslshEhshsplppllpplcphppphpsh .................................................................................h........s............h..hhp.hhp..l.h..t.t.t.p.................................h..thtt..ht.......h.s..t......................phpt.hltshth.lhpp..s.spt........shst..pphp.ppL..t.p..ht....................h.sp..-.tt.......phl.s.p.h.at...........p.p.p.pp.lp.pt.h....h.p..pt.......ht...hsp..ltsh..........pW..+ls.hp.h.u.o.sph...pp...........htpPhhhl.pLplpps.......................tp.p.th.thphsh..tphpphhtplpphtt.hpt.......................................................... 0 238 284 439 +7087 PF07259 ProSAAS ProSAAS precursor Moxon SJ anon Pfam-B_17813 (release 10.0) Family This family consists of several mammalian proSAAS precursor proteins. ProSAAS mRNA is expressed primarily in brain and other neuroendocrine tissues (pituitary, adrenal, pancreas); within brain, the mRNA is broadly distributed among neurons. ProSAAS is thought to be an endogenous inhibitor of prohormone convertase 1 [1] may function as a neuropeptide [2]. N-terminal fragments of proSAAS in intracellular Pick Bodies (PBs) may cause a functional disturbance of neurons in Pick's disease [3]. 21.70 21.70 21.80 82.20 20.90 21.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.28 0.71 -4.76 2 25 2009-01-15 18:05:59 2003-09-12 11:08:19 7 2 17 0 12 20 0 165.00 77 74.38 CHANGED MAGSPLLCGPRAGGVGILVLLLLGLLRLPPTLSARPVKEPRSLSAtSAPLVETSTPLRLRRAVPRGEAAGAV.thu............tRututsttucDppARVL.tphLR.....uWtS.........PRsp..PLAPDDDPDAPAAQLARALLRARLDPAALuAQLVPAPAsAPRPRPPVYDDGPphP..cctGscp.Dh ......MAGSPLLtGPRAGGVGLLVLLLLGLhR.PPsLsARPVKEPRuLSAASsPLAETusPRRFRRAVPRGEA.AGAVQELARALAHLLEAERQERARAEAQEAEDQQARVL.AQLLR......sWGu.........PRsSDPsLu..DDDPDAPAAQLARALL.RARLDPAALAAQLVPAP..A..A..Ah..R...PRPPVYDDGPsGPDsE-AGDETPDV.. 0 1 1 2 +7088 PF07260 ANKH Progressive ankylosis protein (ANKH) Moxon SJ anon Pfam-B_17517 (release 10.0) Family This family consists of several progressive ankylosis protein (ANK or ANKH) sequences. The ANK protein spans the outer cell membrane and shuttles inorganic pyrophosphate (PPi), a major inhibitor of physiologic and pathologic calcification, bone mineralisation and bone resorption [1]. Mutations in ANK are thought to give rise to Craniometaphyseal dysplasia (CMD) which is a rare skeletal disorder characterised by progressive thickening and increased mineral density of craniofacial bones and abnormally developed metaphyses in long bones [2]. 20.70 20.70 21.10 20.70 20.20 20.60 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.25 0.70 -5.45 2 82 2009-01-15 18:05:59 2003-09-12 11:17:30 6 2 49 0 46 68 9 277.50 65 67.24 CHANGED Mhch.S.TsYWPLIRFLlPLuITNIAIDFGEQALNRGIAuVKEDtlEMLASYGLAYSLMKFFTGPMSDFKNVGLVFVNSK+-t.KAlLCMsVAGlIAhlhHsLIAYoDLGYYIINKLHHVD-SVGuKTRKAFLYLAhFPhhDAMAWhHAGILLKHKYShLVGsASISDVlAQlVFVAILLpSpLEChEPLLIPILuLYhGALVRhTslsLGYYpNlHDhhPESous-.GGDATIKKMLSFWWPLALILATQRISRPIVNLFVSRDLtGSoAuTEu.................hT-hpuh.PhF..................TtIcphphsshuLSLoL.....Wo ...........................llpFhlPLuloslshshucQALNRGIAuVKEDAVEMLASYGLAYSLMKFFTGPMSDFKNVGLVFVNS.K..RD.RsKAVLCM.V..VAGslAuVhHsLI.AYoDLGYYIINKLH.HVD-SVGsKTR+AFLYLAAFPhhDAMAWhHAGILL.KH.KYShLVGsASISDVlAQ.....lVFV.AILLHSpL.......ECtEPLLIPI..LSLYMGALVRhThls.L..GYYpNIHDhIP-pSGsEhGGDATI+KMLSFWWPLALILATQRISRPIVNLFVSRDLtGSSAATEAVAl.LTATYPVGHMPYGWLTElRAVYPAFDKNNPSNKlhsssssVTtsHIK+FTFsChALSLoLCFlhFWT............................................................................. 1 12 17 25 +7089 PF07261 DnaB_2 Replication initiation and membrane attachment Moxon SJ anon Pfam-B_17543 (release 10.0) Family This family consists of several bacterial replication initiation and membrane attachment (DnaB) proteins, as well as DnaD which is a component of the PriA primosome. The PriA primosome functions to recruit the replication fork helicase onto the DNA [2]. The DnaB protein is essential for both replication initiation and membrane attachment of the origin region of the chromosome and plasmid pUB110 in Bacillus subtilis. It is known that there are two different classes (DnaBI and DnaBII) in the DnaB mutants; DnaBI is essential for both chromosome and pUB110 replication, whereas DnaBII is necessary only for chromosome replication [1]. DnaD has been merged into this family. This family also includes Ftn6, a cyanobacterial-specific divisome component possibly playing a role at the interface between DNA replication and cell division [3]. Ftn6 possesses a conserved domain localised within the N-terminus of the proteins. This domain, named FND, exhibits sequence and structure similarities with the DnaD-like domains Pfam:PF04271 now merged into Pfam:PF07261. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.39 0.72 -4.38 110 3374 2010-01-07 15:44:22 2003-09-12 11:29:14 6 17 1459 3 391 2143 21 75.80 23 26.80 CHANGED lhphhcpp..hup.LSsh-hcplppalpp...phss-llphAlcpAl.tp..schs..h+YlcpILtsWpcpslpTlcpspphtcp .........................hphhppp....ht..t...los.h-h....cplpphlpp........phss-....llphAlchuh...p.......schs.........hpYlp.p.ILpsWpc.p.s..lpThcpspth...t............... 2 151 271 328 +7090 PF07262 DUF1436 Protein of unknown function (DUF1436) Moxon SJ anon Pfam-B_17809 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 160 residues in length. The function of this family is unknown. 25.00 25.00 25.10 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.63 0.71 -4.75 9 103 2009-09-11 13:43:21 2003-09-12 11:34:26 6 1 77 1 17 82 1 145.10 44 86.54 CHANGED pch.sthphTscFlSlpoYSGhshhshDshtssplLsPDlsspslGcsILpALScSRhl..s.--.t.sFhDhEptp.pYcsWlpplMtpYuY+o++uLFKsMh.suIpllNs.IpIpPo+H-KLEuWosst.psu-hVhl..slDsSPEElGuuL+.AFp+C ........................h.s.h.hstchh.lpohuthshhs.D..hsspl...Ls.DsDspsLGpslLQALusSRTh............cs.Epp.-FFcpEch+p.cYc-WlspLhsphGYKT+RALFKNMhsssIh..lpNuslcIoPS+HsKLEAWsuh....-uDsVlL..slD..sSPEEIGAuL+LALSRC.... 0 3 8 12 +7091 PF07263 DMP1 Dentin matrix protein 1 (DMP1) Moxon SJ anon Pfam-B_17812 (release 10.0) Family This family consists of several mammalian dentin matrix protein 1 (DMP1) sequences. The dentin matrix acidic phosphoprotein 1 (DMP1) gene has been mapped to human chromosome 4q21 [1]. DMP1 is a bone and teeth specific protein initially identified from mineralised dentin. DMP1 is primarily localised in the nuclear compartment of undifferentiated osteoblasts. In the nucleus, DMP1 acts as a transcriptional component for activation of osteoblast-specific genes like osteocalcin. During the early phase of osteoblast maturation, Ca(2+) surges into the nucleus from the cytoplasm, triggering the phosphorylation of DMP1 by a nuclear isoform of casein kinase II. This phosphorylated DMP1 is then exported out into the extracellular matrix, where it regulates nucleation of hydroxyapatite. DMP1 is a unique molecule that initiates osteoblast differentiation by transcription in the nucleus and orchestrates mineralised matrix formation extracellularly, at later stages of osteoblast maturation [2]. The DMP1 gene has been found to be ectopically expressed in lung cancer although the reason for this is unknown [3]. 19.20 19.20 19.70 19.60 18.60 19.00 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.11 0.70 -5.77 4 486 2009-01-15 18:05:59 2003-09-12 11:45:14 6 2 401 0 21 468 0 337.60 57 98.59 CHANGED MKToILLMFLWGLSCALPVARYQNTEScSSEEhKGcLAQoPsPPLESSESSEESKlSSEEQANEDPSDSTESEEtLGhDcpQY.aRPAGGLS+SsGptG-DKDDDED-SGDDTFGDDDuGPGPEEtp.GGsSRLsSDEDSsDTTQSpEDSsPQG-sSAQDTTSESR-LDsEDEsDSRPEGGDSTtDSESEEpWVGGGSEGESSHGDGSEFDDEGMQSDDPtohRSERGNSRhSoAGlKS+ESKGcDcEpASTQDSs-SpultasSRKhFRKSRlSEEDsRGEL-DsNoh.EVpSDSTEs.....stLoQS+EcS+uESQp-ScENpS.E-SQcVQDPSSESSQEssLPSQENSSESQEEsluESRGDNPDNsTSau.EDQEDS-SSEEDSLcp.SsSESpSpEEQADSESpESLp.SEESPESsE-pNSSSQE.GLQupSuSsESpSQ......-ShSEEDD.SDSQDSSRSKE-SNSTESsSSSEE-uQsKNhElESRKLTVDAYHNKPIGDQDDNDCQDGY .................................................................................................................................................................tt.oRhts.........s-DSuDsTpSp...E-Sssp.E-......us.......p....D..s......s..S......-......S+sh.-pE....................S-SEEaWVGGGS.E.G.-.SS.HG.D..............GSEF...DD..EGMQSDDPD....ol+S..ERupSRMs..S..Au.l..+SKE...S..psp.s.c.c.p.ussQD.S.s-.SQSV-psuR.KhFRKSRIS..t..EDDpu-hssSsoh..E.spSDSTEso............ppus.u.....Q...S+Es.SKS..-SQEDSp..E.s.QSQED..SQs.pDs...SSES.S....QEsDh.PSQE.....sS..SESQE..E..lh.ucSRGDNP..D.s...s...o.s...p...u......ED.Q-D..S.-...SS...EEDSLspsSsS.ESpScEEQAD.SE...SsES..L.p.SEES.ESoE..-ENSS.SQE.GLQSpSuS.s.E..SQ..S...........p......................p.....S.S.E....E.-..s..ps......Q-...s.s....c...S..........ts....S.sp-......p...-h-s.................................. 0 1 1 5 +7092 PF07264 EI24 Etoposide-induced protein 2.4 (EI24) Vella Briffa B anon Pfam-B_17540 (release 10.0) Family This family contains a number of eukaryotic etoposide-induced 2.4 (EI24) proteins approximately 350 residues long as well as bacterial CysZ proteins (formerly known as DUF540). In cells treated with the cytotoxic drug etoposide, EI24 is induced by p53 [1]. It has been suggested to play an important role in negative cell growth control [2]. 31.50 31.50 31.50 31.50 31.40 31.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.81 0.70 -4.91 191 1863 2009-01-15 18:05:59 2003-09-12 11:56:43 6 6 1751 2 510 1310 173 215.70 27 82.70 CHANGED suhttulp...tht................psphhthllhslhlsll....lhsshhhh..hhthltshh................................hshl.sh...............................hhhthhhhlhhhll...hhhl...hsslsshlsuhF.......hshluctl................................ptc.phsp.......hsthtshspul...hpplttlhhhlhhh..lhsl..hL...hh.lP..sl...s.hh..llahhlsu.....................ahhuh-ahshs.........h......shpctc.thhcpptsthhuhGhhhsh..............hhhlPllNllh.Pl .............................................................................................................................................................uhhhhhpshthh...............pPtltph.lllPlllsll....Lhsu....hhah....hh.stlssa...ls.slhshhs.........................sWlphh...........................allh.lsslshlll..........hsah...hss..lushI.uusF.......ss.hlAEpl.........p.............................................sst..phss........shhshhtsls...+hh...tc.ph..p.p....hhh.h.l...sts....llhl....lL....ha.lP....sl...Gthlssllaalhsu.................................ahlulp.Yt-as............hspp.........hshcch+.shl+.p.p+h.t..shtF.G.slsuL........................hshlPllNLhh.Pl................................................ 0 150 292 412 +7093 PF07265 TAP35_44 Tapetum specific protein TAP35/TAP44 Moxon SJ anon Pfam-B_17587 (release 10.0) Family This family consists of several plant tapetum specific proteins. Members of this family are found in Arabidopsis thaliana, Brassica napus and Sinapis alba. Members of this family may be involved in sporopollenin formation and/or deposition [1]. 25.00 25.00 163.30 163.10 18.70 17.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.68 0.71 -4.34 5 15 2009-01-15 18:05:59 2003-09-12 11:59:58 6 1 7 0 5 15 0 121.30 65 99.95 CHANGED MSpISKVSSLCLLLLshFFLSSQPALSLRuPKhQ.SEPsS.................PEolhsDSSSsMsKI.DpssAKSMIAGFFSHKFPLpGWPF.KYPPFoM...VNPNIP..TNPS.....GAQEEoEKLPSSPSKGNKDGGNA .MS.IS+VSSLCLLLLV.hFFLoSpPALSLRuPK.Q.SEsAS.................PQThhDDSSs.Ms+I.D..pAKSMIAGFFSHKFPlpGWPFPKYPPFoM...VNPNlP..TsPS.....GAQEESEKlPSSPSKsN+DGtNA 0 2 2 2 +7095 PF07267 Nucleo_P87 Nucleopolyhedrovirus capsid protein P87 Moxon SJ anon Pfam-B_17842 (release 10.0) Family This family consists of several Nucleopolyhedrovirus capsid protein P87 sequences. P87 is expressed late in infection and concentrated in infected cell nuclei [1]. 25.00 25.00 29.20 29.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 654 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.36 0.70 -13.24 0.70 -5.84 6 50 2009-01-15 18:05:59 2003-09-12 13:14:04 6 4 38 0 0 51 0 343.30 28 61.12 CHANGED MD-p.NSLLIA+LAGpILTRDhsAVssIhHoPE+SLsQKLDsLpshlpuhssus.s.ss......suclphNShllsQshllRapsLphAVsFL+Ruspl..slspphLspIQssLppYcsYVspsshDpsl.l-saLNpAEssYtEIR...pscIh.FlK...................phpt.t.ppscups.sspsssu.....sPh......................................+.ssphhpthtN...pouhochspplpuu...hh.hth.h.th.p.pDpchphha.....................................tsh.pps.....EhPlIsNEpDFD+hsI-QLs...DYI++Nhss.phsas.sHsS...VcDVRpFAKslWRtps................ssspTPh..........ph.oPtpTPh.h.....s-p..uhQoP..sPt............................s.ps...hph.p................s.hPspssssss.ss.p+++RRRsssL..hs.....................S-E-E..............s-.cEsDaEp-RKRRREEDKNFLRLKALELSKYAGVNERMEKIVQVT+AMQcTYDYCNCKNTIsGTPsAssFspLL+RLNTYNLuHVEMTVNFYELLYPLTLYNDESN.......RIluYIFAAuNYFQNCAKNFspMRs-FNpaGPFsQIDSMVMFVIKFNFLCDLQsFFGpIDsLPsLuQPNhplHsVLlMRDKIVKLAFNuLQYsTssKo-NRRDPKHLQRLIMLMNADFNII ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sst..satshlpplpphNLohlp.sVpFYcLL.PLshYs...s...sp...s.......pllh...aIhtussYF.NsAcNFs.hRtshps.t.htphDphshFhI+aNFLh.hRpFhspl......s.shss..N.+IhNVLhhhDplVpptasplpaph...........................t.......................... 0 0 0 0 +7096 PF07268 EppA_BapA Exported protein precursor (EppA/BapA) Moxon SJ anon Pfam-B_17945 (release 10.0) Family This family consists of a number of exported protein precursor (EppA and BapA) sequences which seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). bapA gene sequences are quite stable but the encoded proteins do not provoke a strong immune response in most individuals. Conversely, EppA proteins are much more antigenic but are more variable in sequence. It is thought that BapA and EppA play important roles during the Borrelia burgdorferi infectious cycle [1]. 24.30 24.30 24.80 25.70 24.20 24.20 hmmbuild --amino -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.58 0.71 -4.50 7 89 2009-01-15 18:05:59 2003-09-12 13:19:13 6 2 21 0 2 84 0 119.70 57 82.41 CHANGED sIccNYs+AKKsFSKEDasLIpKRLDNYsFpsEYsKShhFuh.APcIRGsLRKIGIKEpuVhLDALDllGYLIKsKlhtphhhl....h.sI.pLIpGhPsuIFNaL.lQLsSDKIDYsEKYG-cAR-pFcpSYpKsKIssVK .......IccNYscsK+uFScEDFsLIppRLDNYsFc..sEY-KShhhuc.sPcIRGpLRKItIKEpuVhLDALDlltYLIK.Khhp...lhh.........sI.pLItGYPsu.....IFsYL.IQ.LsSDKIDYAEKYG-cA..Rp..pFccuYp+DKhssVK............................ 0 2 2 2 +7098 PF07270 DUF1438 Protein of unknown function (DUF1438) Moxon SJ anon Pfam-B_18024 (release 10.0) Family This family consists of several hypothetical proteins of around 170 residues in length which appear to be mouse specific. The function of this family is unknown. 25.00 25.00 115.70 50.70 21.80 16.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.06 0.71 -4.33 5 19 2009-01-15 18:05:59 2003-09-12 13:30:23 6 2 3 0 4 16 0 126.80 58 89.06 CHANGED MhpSFsGFVKDTSDTEEHALPSAQsLPALSTRCSKSETLCFuKEpSHCSEDGWIlDWDLY.h.CVFESVDYLRSY+RLNCAMKKGTEVFQSESQR-PQVSPGDVDNspDKDTEEPDQPhPSLLREKGLELETCDGGDCPDQDPASDSsRHLG ...hp.h.uFV+soScottHALPoAQVLPA.sstCshspsLpFutE.SHC.EDGWIspWs.ash.CV.tShDYLR.hppLNsAMK+GTElFQStoQRcPph..G.h.....psstcPDQP.PuL..LRcKGL-LtTC-GGDCPDQDPuSDSsRpLG. 0 0 0 1 +7099 PF07271 Cytadhesin_P30 Cytadhesin P30/P32 Moxon SJ anon Pfam-B_18052 (release 10.0) Family This family consists of several Mycoplasma species specific Cytadhesin P32 and P30 proteins. P30 has been found to be membrane associated and localised on the tip organelle. It is thought that it is important in cytadherence and virulence [1]. 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.35 0.70 -5.13 3 109 2009-01-15 18:05:59 2003-09-12 13:35:59 6 8 24 0 13 105 16 133.40 46 68.80 CHANGED McLss.hRhKKLFllutLLhsolLFSuLIlLATul.LVpcNsoE........psLsVVLHpsED.TssIQGRsITEQPWFIPTVAGuFGFSALAIILGLAIGLPIVKRKEKRLLEEKERQEQlAEQLQRIS.-QpEQQAlE.pPstEspsQupsQPAs.sVsssP.....QPQVQ.sFGPp.QQRls..RsGFP.QPN.Mu.RPGFN....QMPPH....PGMsPN....RPGFNP.P....GMsPRPGF........PH....PsMsPNMp..RPGF.pPQP...........GtFs+PGsPhsPNMt.RPGFsPN.GMsPsPuMtsPRuGhPP .......................................................................................................................................................................................................................................................................................................PNpMGMRPGFN.....QMPPQ.....MGGMP..PNph..hRPGF.NQMPPp.........sGM.PPRP...NF..............PNQ..M..PNMp..+PGh.pspP...........Gh.................................................st..t........................ 0 3 6 12 +7100 PF07272 Orthoreo_P17 Orthoreovirus P17 protein Moxon SJ anon Pfam-B_18125 (release 10.0) Family This family consists of several Orthoreovirus P17 proteins. P17 is specified be ORF2 of the S1 gene and represents a nonstructural protein which associate with cell membranes [1]. 25.00 25.00 53.90 53.90 23.20 22.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.94 0.71 -4.40 6 41 2009-01-15 18:05:59 2003-09-12 13:42:50 6 1 25 0 0 29 0 135.80 57 98.51 CHANGED MpthR+poFsVpcF-...FsP.hlpphhpPshoAlstsDsstYhNI-lspoHPhhssLssLLup..PssVHVpLlRRauL.SoLssICEaDCsLltlssh.h..sosspsS+lVVHaDstoQSsAAKRSRslDslhDFEhEYK.WRFus.h MQhLRHTTFcVpRFs...FsPlslpEhAhPSFTAITusDPopYFNIELPpoHPLhSpLPsLLSp..PCcVHVpLIRRFALaSTLSSICEYDCALLhsspAIh.LssusppSplllHWDGtSQSlAAKRuRphDTllDFE+-YK.WRFsu..l.. 0 0 0 0 +7101 PF07273 DUF1439 Protein of unknown function (DUF1439) Moxon SJ anon Pfam-B_18280 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 190 residues in length. Several members of this family are annotated as being putative lipoproteins and are often known as YceB. The function of this family is unknown. 20.70 20.70 21.10 20.70 20.50 20.30 hmmbuild --amino -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.69 0.71 -4.49 38 831 2009-01-15 18:05:59 2003-09-12 13:49:14 7 2 797 2 138 426 24 148.20 48 78.82 CHANGED loEpElppa....Lscch.phpcphGhsGlhssplplsslslpIG.pp.ssclslsusupsplssh.hsshssplplplcutPhYDp-csAlYL+slcLlchslpspphpsslps.lhs.lhphlsphLsspPVYpLcpsc.hspulh+phspslcVcsGc ......................................IoEQEINQu.....LsK+N.NFsK.cIGl.....P....GlsDA+IsLoNLsSQIGR-E..PNKVo..LTGDAplDhsSL.FGsQcAshKLpLKAlPsFD+....EKGAIaL.....+-MEVVDsoVp..PE....KM.....QoV.....hQo.LlPYLNQu.L.+.sYFNQpPAYVL+EDsSpuEAhAKKLAKGIEVKPGc.................................. 0 19 54 94 +7102 PF07274 DUF1440 Protein of unknown function (DUF1440) Vella Briffa B anon Pfam-B_17831 (release 10.0) Family This family contains a number of bacterial proteins of unknown function approximately 180 residues long. These are possibly integral membrane proteins. 30.90 30.90 31.10 30.90 30.40 30.70 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.92 0.71 -4.17 27 965 2009-01-15 18:05:59 2003-09-12 13:51:36 7 1 896 0 73 342 0 139.50 50 77.98 CHANGED llSuhVKhGWEs.hPPRTP...............................pRstsNPPt....thL.p.lGlssp.spssYhausp.plsasuhllHauFSlsFA..lhYs.lluchaPplphhpGssaG.lhlalshHhllhPhhthsPss.........hc.PhpEHhSEhlGHIlWhWoI-.....ll .....................llSuhVKhGhEVshPPRoP.................................pRsthNPP.....hhL.-.LGlss...spssYTFusp.hhsaVu.lsHhhFSIVFA..lsYC.llAEhaPKlpLWQGhlhG.lllhlhhHhIhhPlhshsPsl.........a-hPatEalSElhGHlVWhWoIEll.............. 0 11 30 56 +7103 PF07275 ArdA Antirestriction protein (ArdA) Moxon SJ anon Pfam-B_17857 (release 10.0) Family This family consists of several bacterial antirestriction (ArdA) proteins.\ ArdA functions in bacterial conjugation to allow an unmodified plasmid to evade restriction in the recipient bacterium and yet acquire cognate modification [1]. 21.00 21.00 21.10 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.27 0.71 -4.00 38 992 2009-01-15 18:05:59 2003-09-12 13:53:08 6 11 631 4 85 617 101 159.80 30 62.10 CHANGED clYVAsLupYNpGpLhGpW..lssstDt--ltt.hcclhhs......tp.........................................EEaAIHDaEshs...thplsEYssl-clsclsph.l...........p-hs.-st.tth.thh....saaus.p-hsp..cslcsh.ahuphcs.p......DhApthl--ss......hptlP.pp..L...ppY..hDYEAauRD....lplssshh..hsppGhhpst ...................................................................tlYlushu+Y.N.pG..plsGtW...hohP.h...Dh..--hp...c.clsls.......-p.........................................cE..ah...Ip..Da...E.hP.........hpl.s......Ea..s.slt.clN..clhch..l..............p-ls..cthtpt.hpthl......................shhu..s...h...p-h.p...h..csltph....h.......s-s.c.s.p....................DlAphhl-..-sG.......hl.s-lP....p.s....l...psY....hDY-AYGRD....Lthsuphh..hspcGhh...h........................... 0 37 63 74 +7104 PF07276 PSGP Apopolysialoglycoprotein (PSGP) Moxon SJ anon Pfam-B_17916 (release 10.0) Repeat This family represents a series of 13 reside repeats found in the apopolysialoglycoprotein of Oncorhynchus mykiss (Rainbow trout) and Oncorhynchus masou (Cherry salmon). Polysialoglycoprotein (PSGP) of unfertilised eggs of rainbow trout consists of tandem repeats of a glycotridecapeptide, Asp-Asp-Ala-Thr*-Ser*-Glu-Ala-Ala-Thr*-Gly-Pro-Ser- Gly (* denotes the attachment site of a polysialoglycan chain). In response to egg activation, PSGP is discharged by exocytosis into the space between the vitelline envelope and the plasma membrane, i.e. the perivitelline space, where the 200-kDa PSGP molecules undergo rapid and dramatic depolymerisation by proteolysis into glycotridecapeptides [1]. 17.30 17.30 18.20 22.10 14.70 17.00 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.96 0.74 -6.04 0.74 -3.39 3 74 2009-01-15 18:05:59 2003-09-12 14:03:55 6 3 4 0 0 72 0 13.00 96 81.03 CHANGED DDATSEAATGPSG DDATSEAATGPSu 0 0 0 0 +7105 PF07277 SapC SapC Vella Briffa B anon Pfam-B_17881 (release 10.0) Family This family contains a number of bacterial SapC proteins approximately 250 residues long. In Campylobacter fetus, SapC forms part of a paracrystalline surface layer (S-layer) that confers serum resistance [1]. 25.00 25.00 35.10 34.80 19.70 17.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.20 0.70 -5.04 83 327 2009-09-11 20:32:24 2003-09-12 15:17:17 6 4 209 0 133 355 145 216.40 28 86.79 CHANGED L.sspcHtsl+lpstps.hsaupshphlPlhssEFtpAsspaPIlFs.csspst...ahslAlhGlcpscNLFls.-GpWpus..YlPthlRRaPFhl..............tpspsssphslslDtsustl..s....ps-G..psLF.....spsGpsopaLpphhphLpphppstptTptFhptLtchsLLpshslslsh......tsupppslsGhaslsEc+LppLss-s................lhcLp+sGhLthIYspLhSLsplppL ........................Lss.pHtsl+lp..stps.htausshphlP....lhssEaspsstpaPIlFt.....csspt.........atslAlhGlpts.cN..L.Fl......s..sGpWpus..YlPshlcRaPFhl...............ts..tppsphslslD.sustl......s...........ps...cG....psLF.....tppG.psopaLpphhphLpphppshptTptFhptLtchsLLpshplslph........tsGpptplsGhaslsEc+Lpt.Lssps................lhpLpc....pGhLthlYstLhSltplppL................ 0 36 85 107 +7106 PF07278 DUF1441 Protein of unknown function (DUF1441) Moxon SJ anon Pfam-B_17966 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 160 residues in length. The function of this family is unknown. However, it appears to be distantly related to other HTH families so may act as a transcriptional regulator. 26.50 26.50 27.60 27.80 25.50 26.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.77 0.71 -4.57 9 388 2012-10-04 14:01:12 2003-09-12 15:26:14 6 4 271 0 19 155 8 132.20 50 88.03 CHANGED cLNIsQlAtloGLHRQTVssRLtsls.PAsGospphKLYtLsDllsshhst.hsssssp.hDPs.....-RKAWaQSEp-RLKhE+EptpLlPssEVtppauslsKAlVQVLETlPDlLERDsuLoPstLscVQpllD-lR-plsptlh-sss- .........................................................................LNIsQLAAloGlHRQTVsuRLpsls.sAsGptpphKLYhLsDllsthht..hP..s.ssst..tc..hDPp.....-RKAWYQSE+ERLKhEpEptQLlPso-VcppaulhsKAlVQVLETlPDhLERDpGLpsstlscVQpllD-lR-plthtlh-sss-......................... 0 1 7 12 +7107 PF07279 DUF1442 Protein of unknown function (DUF1442) Moxon SJ anon Pfam-B_18012 (release 10.0) Family This family consists of several hypothetical Arabidopsis thaliana proteins of around 225 residues in length. The function of this family is unknown. 20.80 20.80 20.90 21.30 20.10 20.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.56 0.70 -5.24 4 79 2012-10-10 17:06:42 2003-09-12 15:28:21 6 3 17 0 57 104 0 202.80 33 86.19 CHANGED MKLVWSPETAScAYIDTVKSCcshcssusAEhluAhAAGWNs+LIVETWScGDsIAoSVGLsVAupHTsGRHlCIVPspcScotYltAMpttsoo.shsEslV....uEpsEcsMcclpGVDFLVVD.+p+EFs.ssL+.Athus+GAVlVC+Nuhp+uhssFpWpclL.Rsp+VVRoVhLPVuuGLEIsHVuAssuGsuts....+pRWI+HlDpRSGEEHlF++ ..................................................................................WSs-sA.cAYlpTlc..................s.....p.............p...h....p..............p....s..s.....s...........A...........EhlSAhAAG.NA+LlVpshup..u.........u..s...sToluLAsAA+..p...TsGRh..lCl.lPspps.hpthtpshtph.u.h.p....phsEhl...l..........G-t...scplh..tp.h.p.s.lDFllVDs+...p.c-ah..clL+....h..sc.hu..t..p.G.....A.VlVspN...u....hp..p..t.h.s.s.h..p.a.p.......tsh......pt.pph.V...+oshLPlG.pGl-ls+lusst......s.......spt..........tp.t....................p+Wlh+lDppoG-.ahh+........................ 0 9 38 50 +7108 PF07280 DUF1443 Protein of unknown function (DUF1443) Moxon SJ anon Pfam-B_18027 (release 10.0) Family This family consists of several Baculovirus proteins of around 55 residues in length. The function of this family is unknown. 25.00 25.00 56.40 56.20 22.20 19.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.00 0.72 -4.39 23 52 2009-01-15 18:05:59 2003-09-12 15:30:27 6 2 52 0 0 46 0 43.00 38 74.87 CHANGED llllllFllslhlLhhL+LN+tQhpchLaYQYpYIPcsLlshV .....hhllllFlhslhhLhhL+lN+tplpchLaYQYpYIPcsLlshV 0 0 0 0 +7109 PF07281 INSIG Insulin-induced protein (INSIG) Vella Briffa B anon Pfam-B_17905 (release 10.0) Family This family contains a number of eukaryotic Insulin-induced proteins (INSIG-1 and INSIG-2) approximately 200 residues long. INSIG-1 and INSIG-2 are found in the endoplasmic reticulum and bind the sterol-sensing domain of SREBP cleavage-activating protein (SCAP), preventing it from escorting SREBPs to the Golgi. Their combined action permits feedback regulation of cholesterol synthesis over a wide range of sterol concentrations [1,2]. 25.20 25.20 25.50 25.30 24.80 25.10 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.75 0.71 -11.36 0.71 -4.97 9 298 2009-01-15 18:05:59 2003-09-12 15:33:33 7 4 190 0 172 256 0 182.60 36 62.95 CHANGED psl+uslLFuFGVlaullshpL.......+cNahlhPVcLcsp+a.suW.h.uhaGhuusslG.lhPhlDoa..hGcs.................cchshpWsuVsRsVuAFVGIuaAh++LsapSoTQuSlTLAhssslLWYLhDRo+sGFhLSTlluluGssllhsLts.................ssh...hGlhphhoQ-.hhVRsWlsulLFsAsVsFGNIGRQLA .....................................................................h...pshlLF.h.GhhhuhllphL..................ppp.h..h.l.....h.s..........c....l.....s...h...h...ss..sW.........h.h...s.......h..h...G..s..u..usllGhLhPhlDph....hs-s.............................................................pp.h....pppWsuV.hRslusF.....V......GIsaA....t.........Kl.......s.........ass..s....l...QlS........LTLAhLslsL...WahFDR.S+o....GhhLu.lsl.uhlusl..hh.hls....................................................................tu.lh.phso..-h.lhlt...sWlsslhFsuslshGNIGRpLA.......................................................... 0 35 71 119 +7110 PF07282 OrfB_Zn_ribbon Transposase_35; Putative transposase DNA-binding domain Bateman A anon Pfam-B_4755 (release 10.0) Family This putative domain is found at the C-terminus of a large number of transposase proteins. This domain contains four conserved cysteines suggestive of a zinc binding domain. Given the need for transposases to bind DNA as well as the large number of DNA-binding zinc fingers we hypothesise this domain is DNA-binding. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.25 0.72 -4.32 72 6607 2012-10-03 10:42:43 2003-09-12 16:00:36 6 25 1414 0 1912 6323 252 69.10 32 19.70 CHANGED atphtphlcYKAphh.GhplhhV...ssta.TSppCstCGphtpp.........hss+hapCsp..CGhp.hcRDhNAuhNIhpc .....................ahphpphLpYKsp........h.h.G.t.p.l.l..t.V........s.ta...o..Sp.....p........C.......s......s.....CG..phtpp.....................tls.pR.p.a.....p.....Csp........C...G...hp....tc........RDh........NA.A.hNIht............................... 0 559 1281 1646 +7111 PF07283 TrbH Conjugal transfer protein TrbH Vella Briffa B anon Pfam-B_17942 (release 10.0) Family This family contains TrbH, a bacterial conjugal transfer protein approximately 150 residues long. This contains a putative membrane lipoprotein lipid attachment site [1]. 20.40 20.40 20.50 23.50 20.30 19.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.46 0.71 -4.25 14 125 2009-09-10 15:49:04 2003-09-12 16:08:24 6 2 104 0 26 94 9 114.40 33 73.22 CHANGED ussssptlAsDsVpQLstlYPPApTplpL.....pQtssDsFGtALlpsLRt+GYAVhEhssssttspu....................slsLpYVlDphssosLYRLTl.lGsQoloRsY.lspsushhPAusWs.RKE .......s...stptlAsDhlppLsphYsP.A.+..T.phpL.....p.p.p.sss.sFGpALlpsLRpcGYAllp.ssssstst................................l.LpYll-phss.ps..LhRlo..lsspplsRsY.hspssshhPuu.hs.h................................................... 0 3 13 21 +7112 PF07284 BCHF 2-vinyl bacteriochlorophyllide hydratase (BCHF) Vella Briffa B anon Pfam-B_17961 (release 10.0) Family This family contains the bacterial enzyme 2-vinyl bacteriochlorophyllide hydratase (EC:4.2.1.-) (approximately 150 residues long). This is involved in the light-independent bacteriochlorophyll biosynthesis pathway by adding water across the 2-vinyl group [1]. 25.00 25.00 25.70 45.20 21.70 21.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.92 0.71 -4.33 26 115 2009-01-15 18:05:59 2003-09-12 16:40:04 6 1 97 0 54 112 133 138.10 55 81.21 CHANGED YTPEQhuRRsuSlWThVQuILAPlQFllFLlSlsLVlhYhssG..pGahhsolSIlhKTlhLhhIhlTGAlWEK.VFG+alFAstFFWEDVsShlshAhHshYhlhha.sthsspshhhlAlsAYsoYVlNAhQFLl+.lphu .YTPEp+tRRDuosWThVQGILAPlQFlVFLlSLsLVlpYLsTG..pGahhAThSlllKThlLasIMlTGuIWEKsVFGpYLFAPuFFWEDVhShlVlALHouYlsslh.shhsspt.MhlALAAYuoYllNAuQFLlK.LRhA..... 0 10 20 37 +7113 PF07285 DUF1444 Protein of unknown function (DUF1444) Vella Briffa B anon Pfam-B_18053 (release 10.0) Family This family contains several hypothetical bacterial proteins of unknown function that are approximately 250 residues long. 22.40 22.40 22.40 22.40 22.20 22.30 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.47 0.70 -5.06 18 530 2009-01-15 18:05:59 2003-09-12 16:42:35 6 4 507 0 70 307 1 250.70 46 91.43 CHANGED Msscphp-hLccRLs.psshsapaDRcc-sLRIcc+pstKGlTlsLssllAKaccpt-cAl-EllYYVpEulpsM..ppcsp.hpsp-ppIaPVIRuTSFPpco+-Gp..sLlac-HTAET+IYYALDLG+oYRLIDEphLcc.shTcccI+EhAhFNlRsLssshKpDcVAG...NhFYFlsoNDGYDASRILNcuhLp-hpppspGchsluVPHQDVLI.lADIpN-sGYDVLAQhoMcFFspGpVPITuLSFlY-pGcLEPIFILuKs+.+c ............................................................................................................................MsshpM+-cLcc+Lp..+.chcFpas+.....Ec.-sLRI.pp..c.sp.KG.ls..lp..LsullAK.YEsp...K....E..c.h....l-E.ls..........Y.YVpEAlttM...tccs......p.sp....spIhPVIR...uT.....S......Fsc....p..s..........+......pGh..............sF.l.h.c-.....HTAE.TtlYYAlDh..G.K.o.........Y.RLIDEphLpch.pLT.c.p.p.lREhulFNl.Rp....L....ss.p..h...p...p.DpV.pG.........NhFY..FlNoNDG.Y.DAS..RIL.N.suh....Lpc.hct..p.....h..p.G.-.M..l.VAV.PHQDVLI.I.ADI...cNcsGYDlhA...ph...TMcFF...sc...GhVPITSLSFsYcpG+lEPIFILuKNphp.................................................................. 0 24 45 61 +7114 PF07286 DUF1445 Protein of unknown function (DUF1445) Vella Briffa B anon Pfam-B_18180 (release 10.0) Family This family represents a conserved region approximately 150 residues long within a number of hypothetical bacterial and eukaryotic proteins of unknown function. 25.00 25.00 26.20 28.30 21.20 24.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.74 0.71 -4.53 77 578 2009-01-15 18:05:59 2003-09-12 16:55:20 7 5 524 3 217 535 202 139.80 52 46.66 CHANGED VuFllGCSFoFEpALlpuG..lslRHl-pspNVPMY+TNlsspPAGhFsGs.hVVSMRPhsssclh+AlpITuRaPss.HGAPVHlG.DP.ptlGIpDlspPDaGDsV.slcsGE............lPVFWACGVTPQsslhsu+...sshsITHuP...GaMllTD ...................VuFllGCSFSFEpALhcsG..lslRHlp..p.spNVPMY+TslsCpsAGh.Fp.....Gs.hVVSMRPlss.spshcAhpITu+aPsV.HGAPVHlG..cP..p.....tl..GIp...Dls+P...DaGDsV.pl..c.sG.E.............................lPVFWACGVTPQuslhsu+.....sshsITHAP...GaMhlTD................ 0 58 114 173 +7115 PF07287 DUF1446 Protein of unknown function (DUF1446) Moxon SJ anon Pfam-B_17949 (release 10.0) Family This family consists of several bacterial and plant proteins of around 400 residues in length. The function of this family is unknown. 20.10 20.10 20.10 20.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.86 0.70 -5.98 79 939 2009-01-15 18:05:59 2003-09-15 10:38:20 6 15 655 0 436 933 748 308.70 30 63.05 CHANGED IGsuuGahG...DRhpsshcls....p......tGslDaLsh-hL....................AEhThulht.......ps+hccPs.tG................Yssthhp..phcssLshshc......+Gl+lloNtGuhNPpu.....sActltplApchGLs.l+VAhVp.....GDDlhspltt.....tst..p.hpsut.sh.t...t...hl............SANAYLGAhuIscALctG.ADlVlTGRVsDsulhluPhhacFGWs.hsDaDcLAtuslAGHLlECGuQsTGG.Fuc..ac-l....s-.htpl..........GFPIAElssDGsslITKhs.GoGGhVohsTVpEQLLYElpcPtsYlsPDVsuDasslplp.....phGsDR..................V+V..sGspGps....Pss.hKVslshhsGahspsphshsG.ssht+Aclstchlpc+lttt.........sclch-LlG ...................................................................................................lussuuh.u...Dp..sh.thh.....p...........s..lshlhh.-hL....................uEhsh.hht..........tp...t.pst.....G........................Yt.......hhp..thc.....hLshs....hc......p.sl+l.lsN.....s.....G...uh..sP..tu........hsp.tlpplupc..hGls...h+lAhlt...GDslhshhtphh..........t.......h...s....h.........p.............h..........................sssAY...lGu.sIhcALct....G..A..D..lVlsGRssDsulhhu.s.hh.htasWt.....hs..D......aD.pL..utuhhsGHllECusps.oG.G.ast.........ht.h...............t.h.p.GaPl.uEltsc....GphlloKh.....s..oGGhlo.sTlstQhLYElpsP.tYh.sPD.......Vsschsplp.hp.....ph....u.s.c.R..................Vpl..sGsc....G.p..s.s.....P.s.s...hK.lsh..shhsGapsts.hshsG.t...h.t+u.chhtc.lpptltth.............................................................. 0 119 255 370 +7116 PF07288 DUF1447 Protein of unknown function (DUF1447) Moxon SJ anon Pfam-B_18163 (release 10.0) Family This family consists of several bacterial proteins of around 70 residues in length. The function of this family is unknown. 25.00 25.00 25.50 25.30 24.90 24.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.34 0.72 -4.19 33 1005 2009-01-15 18:05:59 2003-09-15 11:09:40 6 1 979 0 87 267 1 70.70 51 96.82 CHANGED IYKVaYQcsppcsPhREpTcoLYl......EA-ochcsRphl-cp..sYNIEFIp.LsGpaL-YEKpsssFclsEa .....IaKVFYQEs+ccsPhREsTcoLYl......Eu-uchpsRphlc-p..sYNIEFIphLsspaL-YEK.E.s..u.s.FplsEh...... 0 18 42 65 +7117 PF07289 DUF1448 Protein of unknown function (DUF1448) Moxon SJ anon Pfam-B_18223 (release 10.0) Family This family consists of several eukaryotic proteins of around 375 residues in length. The function of this family is unknown. It appears that this family includes a divergent GRAM domain. 20.40 20.40 30.00 27.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.04 0.70 -5.52 13 172 2012-10-04 00:02:25 2003-09-15 11:11:37 6 9 121 0 110 157 4 282.90 47 87.44 CHANGED thWpDR-l+FD.lsspplphp..sGEtllshlssVEDTKGNsGcpGpLhVTNLRlIWaupsps+hNLSIGassItslss+.ps..pS+lRG.sopuLaIhu+..ssssRFEFlFT.............................ssstpssp........lFsolhsVa+sYpooplYR-LKLRuAllp.sGpLplLPpEplhs+lsGVhNlSu-QGsL.GoFhlTNlRlVWaAshN-sFNlSlPYLQIpulRlR-S.KaGhALVIETsppuGuYVLGFRlDPh-+LschhKElsSL+psastpPlaGlphphc....-ssttt.............psp..................s.p.pp--sE.l-schcsD...thsuYhAs..sspt.....sp-Rc....PlYssELGLAlE+l+sGhoLp-LWslh ...........s.hWpDR-l+FD..lstpthphR..sGEhll-plsslEDTKGNsG.....-pGpLhVTNLRllWaSh.shs+.hNLSlGasslhslss+.hs....pSK.........lRG.topA......LYIhs+........hs..soRFEFlFT.............................slsstss..........lFsolhsVa.+AYcoo+hYR-lKLRu.A.llp.stpLhlLPpEplas+lpGVWNLSo..D...............QGNL.GoFhlTNlRlVWaAshN-sFNlSlPYLQlp..ul+lR-S.KFG.ALVlcosp.puG.GYVLGF+lDPhE+LpphhKElsSLHpsastpPlFGVpaphc....-t..t.h.......ct................................s.p.h.--hE.l-p.p.pp-......shsuYaAs..ssp........ttpRt.......Psas.cLGlAhEtlhpGholpsLWpl.................................... 1 50 59 90 +7118 PF07290 DUF1449 Protein of unknown function (DUF1449) Moxon SJ anon Pfam-B_18269 (release 10.0) Family This family consists of several bacterial proteins of around 210 residues in length. The function of this family is unknown. 21.60 21.60 22.10 22.10 21.00 20.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.25 0.71 -4.98 3 565 2012-10-02 01:11:51 2003-09-15 11:14:22 6 1 525 0 47 238 9 190.70 56 95.09 CHANGED FA-YNoPYLFAIAFVllIGlLElloLIsGHhLSGALDAHLDHYDulSoGsluQALHYLNIGRVPALVVLCLLAGaFGLhGILIQHuuIMVWQuPLSNLllVPlSlllSVhAVHYouKIlAPWIPRDESSAlsEEEaIGuMAlITGHsAsuGsPCEGKlTDpFGQIHYLLLEPE.cGK.FcKGDKVLIVCR....LSATRYLAEpsPa ..............................................................FA-YNoPYLFAIuFVlh..IGl..LEhhuLIhGHh.......LS.......GA...L.........D....A....H..L........D...H.........Y.........D...u......l......o...o..G..........h...u...Q.....A......L.H....YLNIGRlPALVVL...CL...LA...GaF.G.LhG..ILlQH...u.s.lhl...WQ...uP...LSNL....h......l.....V....Pl.Sll..h..olhAVHYoGKllAPWl.PR..Dc..SS...A...loEEEaIGuM.AlI...TG.Hp...A...suGsP.CEGKlTDpFGQIHYLLL.E.PE..cG.K..F.pKGDKVLIlCR.....LSATRYLAEps.a........................................... 1 10 23 35 +7119 PF07291 MauE Methylamine utilisation protein MauE Moxon SJ anon Pfam-B_18306 (release 10.0) Family This family consists of several bacterial methylamine utilisation MauE proteins. Synthesis of enzymes involved in methylamine oxidation via methylamine dehydrogenase (MADH) is encoded by genes present in the mau cluster. MauE and MauD are specifically involved in the processing, transport, and/or maturation of the beta-subunit and that the absence of each of these proteins leads to production of a non-functional beta-subunit which becomes rapidly degraded [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.17 0.71 -4.66 5 457 2012-10-02 13:32:46 2003-09-15 11:20:18 6 7 374 0 184 485 112 150.00 21 71.32 CHANGED pslLs-PsVshhlRlFLALLLAAAAIPKLRHsEEFaGVVRNFRLLP-aLuRPVAhVLPllELulAVGLllPsLAslAAslAAALhlVFulAIAINVuRGRTpIDCGCFRNGhK.QRISWhhVhRNluLAuhALulAAlLPsAssuSlp-uATGLAAAuhLhLLYhuASLLuGLPAu+susclsKG ...................................................hhh..........hhhphhlu.hl.hlhuuhtK...lt...sh...t....s...F...t.p..t.l...t...s..Y.p...l..L..s....p...t.h..s...t....h.A....hhlPhhElhs.Gl...hLl.h.......u..h...h.........t...h.h..u...u..h..l.uh..h..lh..h..l.Fs..s.ul.....s.h.s.l.h.p..s..t.h.pl...c...CGC.....h........G.........s..s........st............l.u.....h.....h.........l.h..c.sh...h...h.h.hh...s....h....................................................................................................hhhh.................................................... 1 82 146 168 +7120 PF07292 NID Nmi/IFP 35 domain (NID) Vella Briffa B anon Pfam-B_17864 (release 10.0) Domain This family represents a domain of approximately 90 residues that is tandemly repeated within interferon-induced 35 kDa protein (IFP 35) and the homologous N-myc-interactor (Nmi). This domain mediates Nmi-Nmi protein interactions and subcellular localisation [1]. 25.00 25.00 25.10 25.90 24.90 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.97 0.72 -3.94 4 209 2009-01-15 18:05:59 2003-09-15 11:26:57 8 4 39 0 112 182 0 86.40 37 52.28 CHANGED ALITFEc.cVAppVlp.pcHpVphE-spL+VpspPl.LssssphQVpsplSph+l.VTGhPs..cLSEEphhDKLEl.FuKoRNGGG-V- ..............AlITFtcpt...VAppllphpcapltl.........p.psp.hpVplpPh..sphpchQl..psplSp..+p.lL..VoslPs..tlsE-pl.p.DhLElaFpKspN..GGGEVE.......... 0 4 13 43 +7121 PF07293 DUF1450 Protein of unknown function (DUF1450) Moxon SJ anon Pfam-B_18439 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 80 residues in length. Members of this family contain four highly conserved cysteine residues. The function of this family is unknown. 20.90 20.90 20.90 21.50 20.70 20.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.65 0.72 -4.00 14 826 2009-01-15 18:05:59 2003-09-15 11:43:57 6 1 437 0 89 264 0 73.70 40 85.85 CHANGED MpPllEFClSNLApGoptAhEtLE+DPNlDVlEYGCLoYCGhCupuhFALVNGEhVpG-oPE-LVcNIYpaIEENs...hF ...............................................p.h...hpcL.E..p.D..Ps.s..-llEhGC.oYCG.spcpsFAhVN..scsVtu.T.EELlpplhpplcc............... 0 29 56 73 +7122 PF07294 Fibroin_P25 Fibroin P25 Moxon SJ anon Pfam-B_18451 (release 10.0) Family This family consists of several insect fibroin P25 proteins. Silk fibroin produced by the silkworm Bombyx mori consists of a heavy chain, a light chain, and a glycoprotein, P25. The heavy and light chains are linked by a disulfide bond, and P25 associates with disulfide-linked heavy and light chains by non-covalent interactions. P25 is plays an important role in maintaining integrity of the complex [1]. 25.00 25.00 45.70 39.70 19.40 19.10 hmmbuild --amino -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.05 0.71 -4.83 5 11 2009-01-15 18:05:59 2003-09-15 11:48:57 6 1 8 0 5 8 0 184.30 46 86.22 CHANGED NIVRPCaLDDLKCIuDNLAANS+CpsNVRGpIPSpYsIPsF+FEsPFFNATYIDaNLIsRN+DpCRVSEFFFNl+ocsuVLolDCPNLsaESsRTllQHtSLpEDoVYSYaINGTYPLIRLTTNLssusclNLCSuaTFADVs.uLPIF+IDPND+pTANaLSRDLoLLNIYERETFaaRustLARaFINSLICDFGCp .pIhRPChLsDhcCIpDpLAANSpCpss.tGplPopYpIPhFpF-sPaFNATYlDaNLlsRNpDpC+VSEFahNh+ocpsVLolDCP.LsaESsRTlhQHtS.hpEDshhSaaIpGoYPLIRLTTsh.pusphsLCSuaTFADls.uLPIF+IsPpDp.TApaLS+DLoLLpIYEREphhh+ts.LhR.FlsphlCDFGCp................. 0 4 5 5 +7123 PF07295 DUF1451 Protein of unknown function (DUF1451) Moxon SJ anon Pfam-B_18524 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 160 residues in length. Members of this family contain four highly conserved cysteine resides toward the C-terminal region of the protein. The function of this family is unknown. 24.10 24.10 24.50 24.20 23.80 23.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.00 0.71 -4.42 31 784 2009-01-15 18:05:59 2003-09-15 11:51:52 6 4 781 0 106 305 19 140.40 57 90.58 CHANGED lhppl....p-s.pps.cpLpchlcpucchhptts-hTc-ElsLlspaL+RDLccatcphp-.......t.hpushhthlcpoLWptLupITD+TplEWtELhpDhcHpG..hYpoGEllGhGtLsCppCG+phphppsshlssCscCspptFpRpsh ...............................L.VASL....oE.RL+NGERDIDsLVEpARcRVhpsGELTRsEl-plocAVRRDLEEFAhSYEE................pppDSVFhRVIKESLWQELADITDKTQLEW....REVFQDLsHHG.....VYHSGEVV.GLGNLVCEKCHaHLsl.Y.T.P.-.V.LshCPKCGHDQFpRRP.F....... 0 23 49 77 +7124 PF07296 TraP TraP protein Moxon SJ anon Pfam-B_18635 (release 10.0) Family This family consists of several bacterial conjugative transfer TraP proteins from Escherichia coli and Salmonella typhimurium. TraP appears to play a minor role in conjugation and may interact with TraB, which varies in sequence along with TraP, in order to stabilise the proposed transmembrane complex formed by the tra operon products [1]. 25.00 25.00 52.10 53.30 21.50 20.70 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.34 0.71 -5.35 3 150 2009-01-15 18:05:59 2003-09-15 11:56:11 6 2 115 0 4 90 0 182.40 72 98.69 CHANGED MANNhlop.ts+AshYsVAuVLRWLaWsVKYAVIaPLATMALlVlLVFRTGsTTPGQpLVKEIspVRQpAPuG.FPVpDCssP....usoVsSPlPspLQEsCsh+ITDAA-YAA-IDQSLoQslhhLWuhLALlYTulAVlhG+pPVR+Ghl+p.tVVsuDosLss.thI.tcsEVhT.csshsGQI++s.....sD+pu+sEGDKNEHT .........MuNNhSuRQAsHAsRYVVApVLRhLFWCLKYsVILPLATMALMALFVLWKDNTTPGKLLVKEIsFVRQTAPAGQFPVuECh.............hsoSD......S...ss.u...plp.-IC+YRAADAADYVRETDRSLMQLVTALWATLALMYsSlAAhTGKYPV.RPGKMK.ClRVVTADE+L.........KEVYTEDASLPGKIRKC.VYhPDDRTNRNNGDKNEHA........................ 0 0 0 2 +7125 PF07297 DPM2 Dolichol phosphate-mannose biosynthesis regulatory protein (DPM2) Moxon SJ anon Pfam-B_18649 (release 10.0) Family This family consists of several eukaryotic dolichol phosphate-mannose biosynthesis regulatory (DPM2) proteins. Biosynthesis of glycosylphosphatidylinositol and N-glycan precursor is dependent upon a mannosyl donor, dolichol phosphate-mannose (DPM). DPM2, an 84 amino acid membrane protein expressed in the endoplasmic reticulum (ER), makes a complex with DPM1 that is essential for the ER localisation and stable expression of DPM1. Moreover, DPM2 enhances binding of dolichol phosphate, a substrate of DPM synthase. Biosynthesis of DPM in mammalian cells is regulated by DPM2 [1]. 25.00 25.00 28.20 28.00 21.00 20.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.87 0.72 -3.70 12 227 2009-09-13 06:50:04 2003-09-15 12:00:32 7 4 200 0 152 198 0 74.20 39 80.80 CHANGED tsD+lVGhuhlshShhlFsYYThWlllLPFlDssHslHpaFLPRsaAlhlPllhsllhhhhlGsFluhVMlKop+..KKu .........h..DphlGhhhlhsuhhlFhYYThWsl.lhPF..l....D..s...s..H.slppaF.P..RtaAlhIPlhhhllhhshVGsFluhVhl+spp..Kc..................... 0 36 75 120 +7126 PF07298 NnrU NnrU protein Moxon SJ anon Pfam-B_18406 (release 10.0) Family This family consists of several plant and bacterial NnrU proteins. NnrU is thought to be involved in the reduction of nitric oxide. The exact function of NnrU is unclear. It is thought however that NnrU and perhaps NnrT are required for expression of both nirK and nor [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.58 0.71 -4.92 102 551 2012-10-01 22:51:20 2003-09-15 12:54:25 6 2 458 0 223 592 305 193.30 25 85.69 CHANGED lLlhulhlFhusHhh.h.ssshRsthh.utlGctsa+uhaullSlsulslllhuattAchs....LWsss..shhttlsslLhhlAhlllsuuhh.sp..................IhthsRHPhLhuhtlWAlAHLLsNG-lssllLFGuhhsaAlhshhhhc+R........sts.hs.hhsssuhhs..................shhslshGlllahslhh.hHshL.hGlsP ........................................................................................................hllh.LhlFhs.tHSlhh..tsshR.thhh..thl..G....t..t..t...a+uhY.ullSlsu.lsl..l..lht..atttphs......lWss.......shhphls.hh.Lhh..lu.al..hlhsuhh.hpt.......................................lhphsRHPh.h.lG..h..h..l..W.....A.....h.....uH.hlss.Gs....h....hsl.lhhsuh.hhatlhshh..hcp+.........ttt..ht.......sshh.......................hhhhhhulhhhhhhhh..hH.hl..t............................................................. 0 59 136 172 +7127 PF07299 FBP Fibronectin-binding protein (FBP) Moxon SJ anon Pfam-B_18450 (release 10.0) Family This family consists of several bacterial fibronectin-binding proteins which are thought to be involved in virulence in Listeria species [1,2]. 22.70 22.70 22.80 23.30 21.70 22.60 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.37 0.70 -5.00 15 400 2009-01-15 18:05:59 2003-09-15 13:05:05 6 1 377 6 70 289 14 187.30 36 93.28 CHANGED FIcsaQYNaIKpplppLlsuasosNDcsslpsl+uhst-KIhslFsclssEpcpllcslhslp.s+pcA-+aLpcLp.YVlPFppsospplpKLF+KsKKLKlPshcphDh+phoYLGWNDtGop+KaIlhhp.csKLhGlpGslss..pspKGlCoICs...pco-VuLFhupsK.uss.sGTYs++GNYICtDSppCNcplTslspLccFlccl+ ................................hIps.QaNhIKpph..LhpsatosNDttslpslpuhs.-KI.tlF.p.t.............t...p.t.llsplhshp.sptcAEphL.plp.hVhPF.tsos.plpKlF...........KsKK.....LKl.P.s.hpphD.h+choYlGWpD.h.u.o.p.R.K.allshp..cs......+hlGl....p....Gohss.......ph....pKu...l....ColCp...sp...s...pVu..hFhuppK....usu....sG.s.asppGNYICpDu.tCNpphpphphLppFhc.lp................. 0 26 55 66 +7129 PF07301 DUF1453 Protein of unknown function (DUF1453) Moxon SJ anon Pfam-B_18607 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. Members of this family seem to be found exclusively in the Order Bacillales. 21.60 21.60 21.60 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.00 0.71 -4.34 20 613 2009-01-15 18:05:59 2003-09-15 13:44:42 6 1 432 0 59 304 1 143.10 50 92.91 CHANGED M........hslhSolhAlhMuhhlhhl.RhKAuc+Ps..osKKIILPPl.....F.MSTGALMFl..hPhFRlosh-llEAlhlGhl.FSlhLItTS+FElR.sscIYlKRSKAFsaILlGLLllRlshKhhluspIDhupLuGMFalLAFuMIVPWRIAMalpY+Kl ............h...hlhS.llAlhMushlhhl.RhKA..tp..pPl..stKK.IlLPP.h......F.MuTGA.LMal.......hP.hFRL.Ts.h..EhLEAhhlGLl.FShhLIhTS+FEl+..sscIYhKRSKAFshILIuLL.llRhshKhalS.....s..p..lD.GpLuGMFFLLAFsMIVPWRlAMhhpa+KL............................. 0 17 34 45 +7130 PF07302 AroM AroM protein Moxon SJ anon Pfam-B_18608 (release 10.0) Family This family consists of several bacterial and archaeal AroM proteins. In Escherichia coli the aroM gene is cotranscribed with aroL [1]. The function of this family is unknown. 23.10 23.10 23.20 29.60 22.90 22.70 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.31 0.70 -5.01 15 531 2012-10-03 04:39:14 2003-09-15 13:50:37 6 2 525 0 48 185 10 220.50 70 98.21 CHANGED +lAllTIGQuPRs-VlPplpcals-shclschGhLDuLScpEI.schAPpss-.csLlThLsDGppVplS+pKlpctLQpsIptL-ppGh-lIlLhCTGpFs.sLss+ps.LLEPp+IlssLVsullss.pplGllVPlt-Qhs..hptpKWphltpsshhssAoPah.uopsclhpAuppLttpGADllVLDClGYsppp+chlpcthslPVlLupsLlARl....suELls .........................................S.LAILTIGlVPMpEVLPLLTEYIDE.-.pIoHHSLLGK.....L...S...REEV..MAE...YAPEsGE.DslLT.LLNDN...QLAH....VSR+KVERDLQuVVEVLDNQGYDVIlLMSTAsIu.SMTARNoIhLEPsRILPPLVuSI..V-c..HQVGVIVPVEEhLs...sQAQKWQl.....L.Q.+sPVaSLuN.PlH....sS....E...Q....clIDAG+ELLscGADVIMLDCLGFHQ.RHRDlLQKpLDVPVLLSNVLIARLAAELL.V........... 0 6 18 33 +7131 PF07303 Occludin_ELL Occludin homology domain Vella Briffa B, Bateman A anon Pfam-B_18556 (release 10.0) Domain This domain represents a conserved region approximately 100 residues long within eukaryotic occludin proteins and the RNA polymerase II elongation factor ELL. Occludin is an integral membrane protein that localises to tight junctions [1], while ELL is an elongation factor that can increase the catalytic rate of RNA polymerase II transcription by suppressing transient pausing by polymerase at multiple sites along the DNA [2]. This shared domain is thought to mediate protein interactions [3]. 25.30 25.30 25.80 26.10 25.20 25.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.51 0.72 -3.28 18 504 2009-01-15 18:05:59 2003-09-15 14:23:59 8 8 105 3 247 398 4 98.70 38 19.03 CHANGED YPP.IsSscpRpcYKp-Fss-apEY+pLpAEl-slsc.+hspL-ppL.cpLsc...sSpcYpslt....cEYp+.lK..KpsssYppKKpRCcYL+sKLsHIKphls-YDc .................YssIpSt-QRppYKp-Fss-YsEY+pLpAclpslsc.+FpcL-spl.cpLsp.........uopEYp...............plp.plhpEYp+h+............KpsPsYppcKpRCcYL+sKLuHIKphI.t-YDp............... 0 29 55 137 +7132 PF07304 SRA1 Steroid receptor RNA activator (SRA1) Moxon SJ anon Pfam-B_18506 (release 10.0) Family This family consists of several hypothetical mammalian steroid receptor RNA activator proteins. SRA-RNAs likely to encode stable proteins are widely expressed in breast cancer cell lines. SRA-RNA is a steroid receptor co-activator which acts as a functional RNA and is classified as belonging to the growing family of functional non-coding RNAs. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.97 0.71 -4.50 4 239 2009-01-15 18:05:59 2003-09-15 14:28:43 6 16 207 1 163 255 0 157.30 24 18.02 CHANGED hsPPs.........PsSstsssPP.GpG...PsSup.......lpPssscP-....s-tshcsshhsLcpslpsspsShcpplssDIoRRLplLp-.WtGGKLShsV++RMshLspELpstpWDuADcIHhSLMVDHVsEVSQWMVGVKRLIAEsRpLs.c.ht..pputu............QsS .....................................................................................................................ssssss..............P........t..s...s..ss...s.t...s.............s....Ps.s.s..................................ht.s..s...st...s.p............spt....l....p..ll.p.s....L.pp....sh.p.......t........s....s....s.s.....h....pK...p........h.......c...Dsp+RLslLF-+.L.s.s.s.c.Lo.ps.s.h.cph.tpLspuLps+c.acsAppIHhslh.s..s..+s...s..EsupWhs..........GVKRLIsht...................ss.............. 0 41 81 130 +7133 PF07305 DUF1454 Protein of unknown function (DUF1454) Moxon SJ anon Pfam-B_18833 (release 10.0) Family This family consists of several Enterobacterial sequences of around 200 residues in length which are often known as YiiQ proteins. The function of this family is unknown. 25.60 25.60 25.70 75.50 24.30 25.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.31 0.71 -5.22 4 549 2009-09-10 15:53:58 2003-09-15 15:29:10 7 1 541 0 42 161 0 197.90 76 99.52 CHANGED M...K.hshLahhhhhuLslsssA.cApT.s-T.TssAPYLLsGAPoFD.SISQFRE+FNusNPoLPLNEFRuIsopsD+sNLTRAASKINENLYASTALERGTLKIKSlQhTWLPIQG.PEQKAA+sKA.EYMuAllRsFsPphSpsQSpp+LppLLssGKsKRYaocsEGAlRYlVADsGEKGLTFAlEPIKLALSEoLEG.NK ..............................MKPGCTLFhLLsSAL..Tlo...s.T.A...H.AQ..o....s-o.sTTAPYLLAGAPTFDLSISQFREcFNsQNPoLPLNEFRAIDSSsDKAN.LTRAASKINENLYASTALERGTLKIKSlQhTWLP...IQG....PE.Q...............KA............A........KAKA.EY......MAAlIRTlsPLhTKs...QS...QK...KLQsLLTA.GKsKRYYsET.EGAlRYVVADNGEKGLTFAVEPIKLALSEoLEGhN........ 0 1 10 25 +7134 PF07306 DUF1455 Protein of unknown function (DUF1455) Moxon SJ anon Pfam-B_19038 (release 10.0) Family This family consists of several hypothetical putative outer membrane proteins which appear to be specific to Anaplasma marginale and Anaplasma ovis. 25.00 25.00 241.90 241.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.57 0.71 -4.09 2 8 2009-01-15 18:05:59 2003-09-15 15:31:47 6 1 4 0 1 7 0 130.00 89 99.62 CHANGED MSCRAF.L+GlLhFsLLPGSLAT.ARP.LLtVsstsuhtQTsGGG..hsttASsThG+LssAsAsSAspS.tl.uSsVhEsGh.sphhSPAQ..ELhusALQPS.TPSSWVFGRTAhSGVRuFLERTVFLVF MSCRAFSLKGLLAFTLLPGSLATAARPSLLRVGGEASGQQTSGGGFHAAGGASATRGRLTSASAVSAPQSFGVLGSTVWEDGFLPSVFSPAQ..ELLSAALQPSPTPSSWVFGRTAISGVRGFLERTVFLVF. 0 0 0 1 +7135 PF07307 HEPPP_synt_1 Heptaprenyl diphosphate synthase (HEPPP synthase) subunit 1 Vella Briffa B anon Pfam-B_18592 (release 10.0) Family This family contains subunit 1 of bacterial heptaprenyl diphosphate synthase (HEPPP synthase) (EC:2.5.1.30) (approximately 230 residues long). The enzyme consists of two subunits, both of which are required for catalysis of heptaprenyl diphosphate synthesis [1]. 29.40 29.40 29.70 29.40 29.30 29.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.18 0.70 -4.88 15 387 2009-09-13 14:07:00 2003-09-15 15:36:15 6 2 383 0 41 188 0 175.40 39 77.92 CHANGED HsYLtcaIstPhlDEDKLhLLashhs-us..lpppct-cYllTsMLVQlALDTH-cVos.tst.sspspKsRQLTVLAGDYYSGLYYpLLScscDIslIRsLApuI+EINEpKIpLYp+pspsl-plhpSVspIESALlp+lu-+Fthsp.WpphsscaLlh+RL.pEpcha.pttsS.lhcsltph....cstsshcslhp-thcclpcthpphlcp .............................................................................................................................olLhGDhhSuhaYpLLAEhsDlsh.ptlucAIhEINEhK.sL.a....pp.A.h...s...s...h...E.I.pulVpIEohh.hhT...h...sHFtl...................................................................................s........................................................... 0 13 27 33 +7136 PF07308 DUF1456 Protein of unknown function (DUF1456) Moxon SJ, Yeats C anon Yeats C Domain This family consists of several hypothetical bacterial proteins of around 150 residues in length. The function of this family is unknown. 21.70 21.70 22.00 21.80 21.20 21.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.99 0.72 -3.94 117 1896 2009-01-15 18:05:59 2003-09-15 15:37:37 8 2 957 0 262 965 68 68.40 41 85.24 CHANGED NNslL++LRhALslpcschlclhthus..hplo+s-luuhh..+K.........c-cc......sYp.pCsDphLpsFLsGLhhppRG ....NN.lL++lRhAhslps..s..DllcILshss..hclott-lsuhhRK.................c-cc...............sap.cCsDphLptFLpGLhhcpRG........... 0 60 125 200 +7137 PF07309 FlaF Flagellar protein FlaF Moxon SJ anon Pfam-B_19331 (release 10.0) Family This family consists of several bacterial FlaF flagellar proteins. FlaF and FlaG are trans-acting, regulatory factors that modulate flagellin synthesis during flagellum biogenesis [1]. 20.60 20.60 21.40 21.60 20.40 19.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.48 0.71 -4.21 50 272 2009-01-15 18:05:59 2003-09-15 15:41:24 6 2 230 0 103 219 28 112.40 32 93.77 CHANGED M........stpAYupstp.sstosRphEtpsLs+ssppLppup.....spsstshpth......cALhhNR+LWohhtsDlups-NsLPpcLRAsllsluhFlh+copclh..ttp.s..slpsLI-INpsIhsGL ...................ht.tYtcshp..sstss+-pEttlhs+uhshLpsAp......tpsspshpsl......-Alhasp+LWohlhsDLs.ss-NsLPp-LRAsllSlulaVh+cspclh..psps..pshpsLI-IspsItcGL................... 1 30 63 77 +7138 PF07310 PAS_5 DUF1457; PAS domain Vella Briffa B anon Pfam-B_18761 (release 10.0) Domain This family contains a number of hypothetical bacterial proteins of unknown function approximately 200 residues long. This region is is distantly similar to other PAS domains. 20.60 20.60 20.60 21.40 20.50 20.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.50 0.71 -4.62 10 302 2012-10-04 01:10:46 2003-09-15 15:59:54 8 1 218 0 104 274 26 135.90 26 68.99 CHANGED M+psSotplFuYWsclRsucs....sP+RuDI-Pucl+sLLuDsFlLps-usGshsFRLAGTRLCsLhGc-L+spsFsuLas.ssRpclscllssVhccsssslsslsutspsGss.lchElLLLPLpscsssssphLGlLs .............................................shhtlhsYWpp.l.pt..s.cs..........hPtRps.lDPt.cl.t.p...hLssl..F...lL..E......p..p.....s.t.....u..p...h.ph.RL....A..GT+lssla....Gp.-l+...Ghph.ss...l..astp..s.........p.t..t.l.t..chlps.Vhppt.ssshhths......u.h....shsu....pt..lph-h...lLLP.Lt.sssst..phlGsh.s............................................................. 0 44 69 80 +7139 PF07311 Dodecin DUF1458; Dodecin Moxon SJ, Anantharaman V anon Pfam-B_18876 (release 10.0) Family Dodecin is a flavin-binding protein [1],found in several bacteria and few archaea and represents a stand-alone version of the SHS2 domain [2]. It most closely resembles the SHS2 domains of FtsA and Rpb7p, and represents a single domain small-molecule binding form[1]. 21.00 21.00 21.10 27.40 20.00 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.73 0.72 -3.94 80 611 2012-10-02 11:08:51 2003-09-15 16:03:34 7 2 529 137 255 516 39 65.40 41 90.96 CHANGED +lYKhlEllGoSspSh--AlpsAlscAu+Tl+slcWhEVtEh+uclcs.G+VscaQVslKVuFcl-s ......plYKllEllGoSspSh--AIpsAls+Au.....cTl+slcWFEVs-hRGclcs.GclsaaQVslKVGF+l-........ 0 69 170 227 +7140 PF07312 DUF1459 Protein of unknown function (DUF1459) Moxon SJ anon Pfam-B_18877 (release 10.0) Family This family consists of several hypothetical Caenorhabditis elegans proteins of around 85 residues in length. The function of this family is unknown. 20.80 20.80 21.00 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.06 0.72 -4.06 2 33 2009-01-15 18:05:59 2003-09-15 16:05:31 6 1 5 0 33 26 0 81.00 47 98.45 CHANGED MFQKohIVhhhALFCISSsQVlYoPEVVuSPYYYuuuPVA.SAYPYAYAYGAsAYPTAaYGWGSNKGQQA.uSA.PTQKLTNNQ .........MFpKshh.sl..hls.h.F.sl.uSsQllao.PphVsu..PYYYAu.ussu..AYP.sYu..Y.u..AAAYPos.auWGSNKsp.p..u..s.u.sA...PTpp..LsNN.................. 0 8 16 33 +7141 PF07313 DUF1460 Protein of unknown function (DUF1460) Moxon SJ anon Pfam-B_18925 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 260 residues in length. The function of this family is unknown. 21.30 21.30 21.40 21.40 21.10 20.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.34 0.70 -5.14 25 430 2012-10-10 12:56:15 2003-09-15 16:07:39 7 5 420 3 68 330 60 209.60 41 72.81 CHANGED tlSptFLGTPYtAspLhGussss..EpLVlcFcGlDCFTalDYVtALp+uss......pssFlcsLhchRYtsGcl.sFhsR+HFFo.DWstssspt..scDlTsplSs...........thlohsKpLNp............Ks-GspalsGLslhcRsl.........sYIPustl........sppVlspL+oGDaIGIYoph........sGLDVTHsGlhltsssu....shhRNASStpss.pVVDsPFh-YlpspP...GI .....................................................................p.lSptFLGTPYpA....sTLhtsssts..EtLVlNFsGlDCFTalDYV.ALupuss...........ppsFhcsLtphRYts.Gcl...uYhsR+HFFo.DWhsssspp..AcDlT.s...plSs..............phlshsKpLNp..................................................................K.s-GuEalsuLulh.Rpl.............sYIPu..ctI.........................spp..V.h..sp..L+sGDhIGlYosh.................................................sGL.DV...oHlGIs..lp.ccsp.........lhhRNASSlttp...+.VV..D..pP..Fh.-Yh+spP.....GIl....................... 0 15 36 58 +7142 PF07314 DUF1461 Protein of unknown function (DUF1461) Vella Briffa B anon Pfam-B_18854 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 200 residues long. These are possibly integral membrane proteins. 24.00 24.00 24.20 24.70 23.60 23.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.36 0.71 -4.54 40 875 2009-01-15 18:05:59 2003-09-15 16:09:30 6 1 849 0 104 563 21 180.00 35 82.83 CHANGED h+.......p.h....hslhhhlhlluhulhhslhht..h.hY.hplchhplschss............hshsplhpNaspLhsYLssshppt.Lphssh.sSssG..........hhHFt-VKpLFhhshhlh.lhsllhhhhhhhhhhpcpphhhhh.pshhl..hh......llPlllhhhh.hlsF-paFshFHplhF.sNshWlFDPspDPlIphLPEpFF ............................................................................................hhht........hhhhs.hlhll.ululhlTIhht..a.h.Y.h-IpaLsls.p..h.l.h..........................ls.ps.lhpNaph.L..hsYLssPapph.LphPcF..sSssG..........................ltHFt-VKpLFhls.hVh...l.l...sl...sh..h..ha..l..p..hl..hK...+.ph.....l...th.hh......+.....shhh..hh.............llPl..h.lulhh..hlsF-pFFslFHpllF.ssDsWLFDPspDPlIhlLPEpFF............ 0 37 66 84 +7143 PF07315 DUF1462 Protein of unknown function (DUF1462) Moxon SJ anon Pfam-B_19094 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 100 residues in length. The function of this family is unknown. 25.00 25.00 32.90 109.50 24.00 23.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.14 0.72 -4.02 18 399 2009-01-15 18:05:59 2003-09-15 16:17:21 6 1 399 1 35 144 0 94.40 59 87.33 CHANGED sVYGA-slCASCVNhPSSK-TaEWLpAALpRKYPsp..sFchpYIDIp.pPs-s..-cpp.caup+Ih-DEhFYPLVllsDElVuEGNPpLKsIapphE .hVYGA-VICASCVNAPoSK-TY-WLQshLtRKYPs...sFpasYIDIpc-s-s..-.cchpFhERI.pDELFYPLlshNDEhVA-G..phKpIhchI-. 0 9 20 29 +7144 PF07316 DUF1463 Protein of unknown function (DUF1463) Moxon SJ anon Pfam-B_19113 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 140 residues in length. Members of this family seem to be found exclusively in Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 25.00 25.00 25.50 57.20 18.90 17.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -4.59 2 115 2009-09-10 14:48:10 2003-09-15 16:19:18 6 1 27 0 9 71 0 135.40 73 98.69 CHANGED hpaYsLc.laFShsss.lcoGpLEhooEPsshAhhSoED+shPl.ShRDP+TlsalFslEVohGShDYhLLTcLSsEQFYp.sV.KpcKhhcLsFNDphuhKIISN.AhFsE.PoRpYSA-s-pVpFpI+AINCphpKss .....MQFYDLREVYFSIGG.sQLHSGKLELTSEPTTRAVlSoEDKGhPVISLRDPKTITYlFNIEVTLGSaDYILLTELSDEQFYNMDVpKpDKMLDLsFNDRIATKIISNYAIFTEEPSRSYSAEAEKVoFEIRAINCQKoKPN..... 0 5 5 5 +7145 PF07317 YcgR Flagellar regulator YcgR Moxon SJ anon Pfam-B_19142 (release 10.0) Domain This domain is found N terminal to Pfam:PF07238. Proteins which contain YcgR domains are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias [3]. 21.10 21.10 21.30 21.10 20.80 20.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.02 0.72 -4.30 7 656 2009-01-15 18:05:59 2003-09-15 16:24:08 7 2 640 2 95 297 19 105.70 50 43.27 CHANGED pFlhps.htIsshLR-LpKpps.lplp.tspGp..hlS+ILslsPppt.hlhDaGutEp-NptsLputplshlspspGsKlEFsssplppscapshPAFpstlPppLahl ..........QFLKpNPLAlLGVLRDLpKsslPLRlS.Ws.sGQ...hISKILslsP-K..Lll...DaGSQsc-Nh..AVL+Apc.lsIsA.ETQGAKVEFTl.p.Q.L.pp.u.EY.p.LPAFIT.s.PsoLWFV........... 0 9 42 68 +7146 PF07318 DUF1464 Protein of unknown function (DUF1464) Moxon SJ anon Pfam-B_19143 (release 10.0) Family This family consists of several hypothetical archaeal proteins of around 350 residues in length. The function of this family is unknown. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.07 0.70 -5.63 11 65 2012-10-02 23:34:14 2003-09-15 16:32:55 7 1 62 0 43 226 2 317.30 32 93.36 CHANGED GIDPGTsSaDlsul--..GpVhhctslPTspVtcsPthllchlp-s.....ss-llAuPSGYGLPlh+sp-ls-.....c-IhLhTLsssuctu.....hG......LRshlp.h.uu+sl....ssahlPGVIHLsoVPsaRKlN+IDhGTADKlAosshulhp.sc.....Ycsh....sFILVElGtuFoAslAVpsGpIVDGhGGTh.hsG..ahuuGhhDGElAYLhu....plsKphlFpGGht.hss.......................sapthhE.lhKslsshhuoh.csc.....IIlSGRhtphs-htcclct+htch...........h..t..t.sKEuApGuAlIAsuluGGha+cll-hLtl.cSuGTslDal+L ..............................................................................................................................................................GlDPGTc..Shslhhl--..Gplhh..h.......plsop.V....t.cs..shh.l......lchlpch...............ps-hlshPSGaGl.P..ltp.h.pclsc.......c-lh.l..hTlhcs.tphs........hG......Lpchlp.h..tppph....s..sahI.PuVI..c.LsoVP.t...aRKhNpID...hGTA.DKlAs..sshuh.t..pthtl.apps......sFIllEhGhsasuslsVcsG+IVD...G...h..G...G.T.h......h....s......G...h...h...s.G......s..l...D....u....E...l..........A.Yhl...s..............chs..K...ppl......F.....p.....u.....G.h..hst.................t.............t...t..........shcthhE.ll+tVs...sh.h.s.s.h...chp.....Il.l.SG+h.t..p.....-htcchcpphtth..........h............sKE.AA..GuAhIAsulsGGha+chlphl...cupGosl-alp.............................................................................................................................. 1 13 21 34 +7147 PF07319 DnaI_N Primosomal protein DnaI N-terminus Vella Briffa B anon Pfam-B_18931 (release 10.0) Family This family represents the N-terminus (approximately 120 residues) of bacterial primosomal DnaI proteins, although one family member appears to be of viral origin. DnaI is one of the components of the Bacillus subtilis replication restart primosome, and is required for the DnaB75-dependent loading of the DnaC helicase [1]. 20.20 20.20 20.30 21.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.23 0.72 -3.74 37 1017 2009-01-15 18:05:59 2003-09-15 16:51:55 6 3 1013 1 99 468 1 91.90 35 30.28 CHANGED MEslscslpc.hhpppshppchpplhppllsDs-VpuFlpp+ptcLopchIp+uhsKLaEalppppchpt....tsssslhpGYpPpLslspthIDVsY ........................MEsltphlpc...ps..p.c.....a.....t.....pchpc.l.h.pclhpDPDVpsFlppct.cLTsp.Ip+SlsKl.EYlsp+c+att....sDss.lscGYpPpLslspshlDlpY............... 0 22 49 74 +7149 PF07321 YscO Type III secretion protein YscO Vella Briffa B anon Pfam-B_19036 (release 10.0) Family This family contains the bacterial type III secretion protein YscO, which is approximately 150 residues long. YscO has been shown to be required for high-level expression and secretion of the anti-host proteins V antigen and Yops in Yersinia pestis [1]. 27.50 27.50 29.10 28.80 27.40 27.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.92 0.71 -4.39 12 104 2009-09-13 06:09:03 2003-09-15 17:16:14 7 1 98 0 19 64 2 149.40 38 69.50 CHANGED M....lppLhcIKphRt-cApptltpQp.tlssA+tcpppAppshpDa+.WRhpEEpRLauphptphlth+-l-chp....ppluhLR-ppApLtpplscttpplctEpptLppppptlppsp+ppEKhsELtcppps-ttt.pphpEEhE.EEFtp.......ph .....M.lcpLhclKplRh-RAE+AlppQphplpsAttcppcApps.pDY+.WRhcEEpRLFsptpspslsp+-LEpap....ppluhLRE+EApLEpcsAchtcpLcpERccLppspctlppA++pppKFhELtcppps-ptsppchpEEtEtEEFhphp....... 0 7 9 14 +7150 PF07322 Seadorna_Vp10 Seadornavirus Vp10 Moxon SJ anon Pfam-B_18930 (release 10.0) Family This family consists of several Seadornavirus Vp10 proteins found in the Banna and Kadipiro viruses. Members of this family are typically around 240 residues in length. The function of this family is unknown. 25.00 25.00 42.20 41.80 20.90 20.60 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.74 0.70 -5.11 2 12 2009-09-21 14:23:59 2003-09-16 09:38:08 6 1 4 0 0 12 0 228.30 59 92.13 CHANGED MDVLSKuSLKELLAHLE+TPLEEAlSY+IGTlPYQNVLIsRNEYYNQ.YPDsTSLIDGVuREGQRNVNGLIMSIISYVVSGSGHYIPNIGahLLRRSILDILTKHDTGLsTNNlNYslIARNLTVSKMNCEQRKRMLICFKLLAYKDGN.NDYEhYLNQNIsLKQIAPNFIPGDMRTVhpNpDpLuIVGIPAYRLTQSTELSIRDDNAKSYKlGYVDWYNSNSFLRERs-FNLIpLKDRD. ......LSKSSLKELLAHLE+TPLEEAISYKIG.TlPYQNVLISRsEaYNQLYPDsTSLIDGVuREGpRNl.GLIMSIISYVVSGSGHYIPNlGhhLLRRSILDlLT++DTGLsTNNlNYslIARsLTVsKMNCEQRKRMLICFKLLAYKDGN.sDY-sYLNQNloLKQIAPsFIPsDMRTVlSNsDpLSIVGIPsYRLTQSTELSIRDDNAKSYKlGYVDWYNSNuFLRERN-FNLhpLKDRs.h...................... 0 0 0 0 +7151 PF07323 DUF1465 Protein of unknown function (DUF1465) Moxon SJ anon Pfam-B_19346 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. The function of this family is unknown. 25.00 25.00 28.70 72.80 22.20 20.50 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.81 0.71 -4.66 26 190 2009-01-15 18:05:59 2003-09-16 09:55:55 7 1 189 2 72 161 18 157.20 49 90.36 CHANGED lphucphstSphFpsLapEGMsLVEETAuYLD.....GtGRspu+sLsRpu....ulsYAsESMRLTTRLMQlASWLLlpRAspcGEMotpQsppEKs+lphssss.....sssshscLP.thpcLltRSpcLptRltRLDcplhst.ssssttt......sPVssQlshLcs.....AF .......h.phuc+hshSssFcslYtEGMsLVEEsAuYLD.........GcGRp-A+sLs.RsA............uhhYAuESMRLTTRLMQlASWLLLQRAs+pGEMTcsQssuEKs+V+Lcsss..hspsusuas-LP.sht-LlcRShRLQsRVp+lDcclast.s.shpts.....tNPVstQlshL+sAF......... 0 20 42 52 +7152 PF07324 DGCR6 DiGeorge syndrome critical region 6 (DGCR6) protein Vella Briffa B anon Pfam-B_19101 (release 10.0) Family This family contains DiGeorge syndrome critical region 6 (DGCR6) proteins (approximately 200 residues long) of a number of vertebrates. DGCR6 is a candidate for involvement in the DiGeorge syndrome pathology by playing a role in neural crest cell migration into the third and fourth pharyngeal pouches, the structures from which derive the organs affected in DiGeorge syndrome [1]. Also found in this family is the Drosophila melanogaster gonadal protein gdl. 23.30 23.30 23.90 24.90 23.20 23.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.26 0.71 -4.83 8 130 2009-01-15 18:05:59 2003-09-16 10:42:51 6 3 87 0 75 129 1 160.30 45 79.11 CHANGED Mpca.usspt.........hs-pspQ.......QERHYaLLS-LQsLVK-LPSuaQQRlSYosLoDLAhALLDGTVFEIVQGLLEIQHLTEKNLYNQRhKLcsEH+sL+Q-Lt+KHK-ApQ..sC+sHNLulLKssQp+EhEulEpRl+-EQphMDcKIVLELDQKVhDQQSTLEKAGVPGFYlTsNPQElplQMNLLELIhKLQQhp..ssK ..................................................................................Qp+hY.Llp.tLpphhpcL.PpphQpRl.uYshLosLA.sLlss.olF-IVpuLhElQHlTE+pLhppRhplpscap..........h.tpth.hpKhp-spp......t..sH..tLsllpttpp+chc...t...hp.phc-E.p.hDpKIllELDp..KVsDQQSTLEKAGVsGFYVTsNPpElplQMpLL-hI.h+Lppht.................... 0 26 32 53 +7153 PF07325 Curto_V2 Curtovirus V2 protein Moxon SJ anon Pfam-B_19350 (release 10.0) Family This family consists of several Curtovirus V2 proteins. The exact function of V2 is unclear but it is known that the protein is required for a successful host infection process [1]. 25.00 25.00 147.90 147.80 21.90 20.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.48 2 21 2009-01-15 18:05:59 2003-09-16 10:53:44 6 1 17 \N 0 27 0 122.70 76 100.00 CHANGED MGPFpVsQFPcNYPAhLAVSTSCFhRYNKWCILGI+pElEuLTLEEGEsFLtFQKEVKKLL+hKssFpRKCpLYEcIYKKYl.ssPEcKGp.spshsEEEED.ataEcIPMEEsCspcpssElcDV MGPFRVDQFPDNYPAFLAVSTSCFLRYN+WCILGIHQEI.EsLTLEEGEVFLQFQKEVKKLLRhKVNF+RKCuLYEEIYKKYVtNVsEKKGE.SSKCVA.EEEE.DhY-..aEEIPMEEsCsKcQc.EVcDV.. 0 0 0 0 +7154 PF07326 DUF1466 Protein of unknown function (DUF1466) Moxon SJ anon Pfam-B_19433 (release 10.0) Family This family consists of several hypothetical mammalian proteins of around 240 residues in length. 25.00 25.00 31.50 31.00 18.80 18.70 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.57 0.70 -5.06 2 48 2009-01-15 18:05:59 2003-09-16 11:36:18 6 2 29 0 22 46 0 208.10 54 95.22 CHANGED MASpWQsMtTSVRRRSL.+pEQLEcpc.hpsssuH.ET..GsLGSLCRQFQRRLPLRAVsLsLtsGPSWKRLEoPEPtQQGL..AARSAKSALGAhSQRIQESCQuGTKWLhETQVKsRR.KRGAQKspGSPs.SLSQKsTRLs.....+ustDsh.ttHhRLSspMGsHsH.hpR.RREAAhRSPhSSTEPLCSPSESDSDLEPsGAGIQHLQKLSQcLDcAIhAEEptph.....hp ................................MASpWQuhtsSV..+RRSLpcpEQLE-pcthpPs..suH.ETSsGALGSLCRQFQRRLPLRAVsLN.LssGPSWKRLEoPEPtQQGLQAAARSAKsALGAhSQRIQESCQ.SGTKWLVE...T..QVK..ARR...R.+R..GAQKsuuoPs+S....L........Sp+...ST..RLou..s..sss...p...ust.....s.....shppp......t+pLSs....hG.scApP..hRRSRR.-AAh.RSPYSS....oEPLC......SP.p...ESDSDLEPVGuGIQ+LQKLSQcLD-AIhsEEptph.s............................. 0 1 2 6 +7155 PF07327 Neuroparsin Neuroparsin Moxon SJ anon Pfam-B_19487 (release 10.0) Family This family consists of several locust specific neuroparsin proteins. Neuroparsins are produced by the A1 type of protocerebral median neurosecretory cells of the PI-CC system and display pleiotropic activities: inhibition of the effect of juvenile hormone, stimulation of fluid reabsorption of isolated recta, induction of an increase in hemolymph lipid and trehalose levels, and neurotrophic effects [1]. 21.40 21.40 22.40 26.20 20.50 20.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.59 0.72 -3.79 4 14 2009-09-11 14:17:53 2003-09-16 11:54:31 6 1 11 0 5 23 0 102.50 37 87.08 CHANGED MKPAAALAAATLLIAVILFHRAEANPISRSCEGANCVVDLTRCEYGEVTDFFGRKVCAKGPG-+Css....atpCGsGhcCpsthCoGCSl+TLQCa.h-uhs.Spp .........................................hhhlhhhlhLh.pps.tt..p....Rpsc..sstCs...s...Dh.s+CcYG..Vp.DhCGpcsCAKGPG-+CGG.....phaGhCG-GLhC.sCs..+CsGCSlcolpCa........................... 0 2 3 5 +7156 PF07328 VirD1 T-DNA border endonuclease VirD1 Moxon SJ anon Pfam-B_19558 (release 10.0) Family This family consists of several T-DNA border endonuclease VirD1 proteins which appear to be found exclusively in Agrobacterium species. Agrobacterium, a plant pathogen, is capable to stably transform the plant cell with a segment of its own DNA called T-DNA (transferred DNA). This process depends, among others, on the specialised bacterial virulence proteins VirD1 and VirD2 that excise the T-DNA from its adjacent sequences. VirD1 is thought to interact with VirD2 in this process [1]. 20.30 20.30 20.90 48.20 19.70 20.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -4.40 2 14 2009-01-15 18:05:59 2003-09-16 11:58:56 6 1 8 0 5 13 1 144.40 61 98.06 CHANGED MSptoRsTSS-hAlNQ+cslpVEGFKVVSsRLRSAEYEoFSaQARLLGLSDSMAIRVAVRRIGGFLEIDAcTRc+MEAILQSIGhLSSNluhLLSAYAEsPp.DLEAlRsERIAFGcuFAsLDGLLRSILSVSRRRIDGCSLLKsAL ........MSpts+sTSSDh.lsp+cuspl...EGFKVVSsRLRSAEYEoFScQARLLGLSDSMAIRVAVRRIGGFLEIDA-TRpcMEAILpSIGsLSoNIusLLsAYAEsPpsDLEAlpAERhAFGcuFAcLDGLLRSILSVSRRRIDGCShL+-AL.. 1 1 4 4 +7158 PF07330 DUF1467 Protein of unknown function (DUF1467) Moxon SJ anon Pfam-B_19588 (release 10.0) Family This family consists of several bacterial proteins of around 90 residues in length. The function of this family is unknown. 23.00 23.00 23.40 33.30 22.40 22.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.86 0.72 -4.20 55 237 2009-09-11 13:34:12 2003-09-16 12:52:07 7 1 236 0 85 195 673 83.90 36 91.74 CHANGED MulsoulslYhllWahshFslLPhtl.+oQsEs.ucll.....sGTcsGAPsphpht+KslhTTllusllaulhhhlhhsGhlolccls....phh .....MslhoulAlYhllWWhsLFslLPhGl.RTQsEp.s-ls......GTssuAPsps+ltRthlhTTllusllaslhhhlhhsuhlslccls...h......... 0 22 51 62 +7159 PF07331 TctB DUF1468; Tripartite tricarboxylate transporter TctB family Moxon SJ anon Pfam-B_19347 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. This family was formerly known as DUF1468. 27.00 27.00 27.00 27.20 26.90 26.70 hmmbuild --amino -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.82 0.71 -4.53 199 1745 2009-10-20 16:26:54 2003-09-16 13:02:42 6 4 965 0 571 1522 1240 141.90 21 86.17 CHANGED tt-hhsullh..hslGhhhhhtu.hshshssst.....thGPuhFPhhluhlLhllGhhlhlpuhht.............................ttstthhsths........h+........slhhllsul..lhaslll....................p..........slGhllush............lhh.hhsshshs.........pt....phhtsl...llul......slsshs..ahlF...shhLslsL.P ........................................................................t..phhhuh.l.h...lhlu.hh.hh.htu....hph....ththsh........shG.Pt.h.aPh.hluslh.hl.h....ul.h.ll.lp.shh............................................tpssshhtp.hs...................h.p..............p.l.hhhl...shh......lhas.h.h.h....................p.........hlG..Fhluos................lhh.hsh.h.h.h.hu......................tp..........ph.hhsh.....lhuh....shslhh...ahlF...sthLslsLP........... 0 158 355 476 +7160 PF07332 DUF1469 Protein of unknown function (DUF1469) Moxon SJ, Finn RD, Sammut SJ, Bateman A anon Pfam-B_19352 (release 10.0) & COG5393 Domain This family consists of several hypothetical bacterial proteins of around 140 residues in length. The function of this family is unknown. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.46 0.71 -4.38 97 1760 2009-01-15 18:05:59 2003-09-16 13:06:22 6 4 1530 0 487 1143 145 115.40 24 80.40 CHANGED lspLlschssplspLlcsElcLA+sElpccsppsutusuhlsuAullshhulhhLhhslshuL..shh...........s.h...hAhLl....VuslhhllAullshhGhpclc...tshsPpcThcplccDhphlcpp .................................h....hpclss.hsphlcsclcLAtsE...lpc...cttp.hhp...hlhhhuhshlhuhhuLhs....L.hhhl.h.h.ul..phh..h....................sAhlh.ssslhlll.Ahlhulhshpp.h+...psph.ppThcpltpDhphlct.t........................................... 0 139 307 413 +7161 PF07333 SLR1-BP S locus-related glycoprotein 1 binding pollen coat protein (SLR1-BP) Moxon SJ anon Pfam-B_19392 (release 10.0) Family This family consists of a number of cysteine rich SLR1 binding pollen coat like proteins. Adhesion of pollen grains to the stigmatic surface is a critical step during sexual reproduction in plants. In Brassica, S locus-related glycoprotein 1 (SLR1), a stigma-specific protein belonging to the S gene family of proteins, has been shown to be involved in this step. SLR1-BP specifically binds SLR1 with high affinity. The SLR1-BP gene is specifically expressed in pollen at late stages of development and is a member of the class A pollen coat protein (PCP) family, which includes PCP-A1, an SLG (S locus glycoprotein)-binding protein [1]. 22.70 22.70 22.80 22.90 22.60 22.60 hmmbuild --amino -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.59 0.72 -3.74 37 134 2012-10-01 23:31:40 2003-09-16 13:35:05 7 1 17 0 112 138 0 55.50 27 69.59 CHANGED sppppupp.....Cpphl.........sspCssspCpshChpp....h+Gs...GpChs........ttphtChCtY.C ........t.....ttth....Cpphl.........sssCs..hspCpstCtpc....apGs...GpChs...............ttthpChCpY.C....... 0 43 57 62 +7162 PF07334 IFP_35_N Interferon-induced 35 kDa protein (IFP 35) N-terminus Vella Briffa B anon Pfam-B_17864 (release 10.0) Family This family represents the N-terminus of interferon-induced 35 kDa protein (IFP 35) (approximately 80 residues long), which contains a leucine zipper motif in an alpha helical configuration [1]. This family also includes N-myc-interactor (Nmi), a homologous interferon-induced protein. 22.90 22.90 23.00 25.10 22.80 22.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.45 0.72 -4.01 6 78 2009-01-15 18:05:59 2003-09-16 13:47:53 8 3 29 0 36 73 0 71.60 41 25.89 CHANGED LltEIpcENhpLKcEIQKLEsELQpssRE.QI+EDlPcsKlKFoSsEsP....EsssQhuslSpShQss.KlsYELQKGQ .........lptlpcEphpLKpclQ........ELQp....tp+...-..l+c......DlP..csKlpFsssEsP....tpspQhpslupShpsss+lsY.LpcGp..... 0 2 3 9 +7163 PF07335 Glyco_hydro_75 Chitosanase; Fungal chitosanase of glycosyl hydrolase group 75 Moxon SJ anon Pfam-B_19431 (release 10.0) Family This family consists of several fungal chitosanase proteins. Chitin, xylan, 6-O-sulphated chitosan and O-carboxymethyl chitin are indigestible by chitosanase [1]. EC:3.2.1.132. The mechanism is likely to be inverting, and the probable catalytic neutrophile base is Asp, with the probable catalytic proton donor being Glu. (see the Chitosanase web-page from CAZY). 21.00 21.00 29.00 26.50 20.80 20.50 hmmbuild --amino -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.23 0.71 -4.34 43 185 2009-01-15 18:05:59 2003-09-16 13:51:45 6 12 121 0 107 196 0 160.70 34 51.35 CHANGED MDlDCDGss........................hpC................p..sss.shQspTuFp...............................................h....shpsLsApthPYlVls................sshas.pspG..lpstslsAVls..ss+lhYGlhGDo..........sus....shlGEASlulAcsh..sp.......shsGssG..psss...DVh.......................Y.................IsFsGsc..ss.Pstth....spshpshppsl..pshG-cLlspl .......................................................................................MDlDCDGts........................................................sspC....................................s..sss.shQspTuFp...................................................t.shpsLsAshhPYVVhss...................sshasspptG..lcshSVsAVVs....ss+l.............hYGl..hGDT..........NGs.............shhGEASluhApsC.....sp.........sh.sGssG.....psss....DVl.......................Y...................IsFsGsc...uV.Putst....spshtp.tpol..tthGsphlt..h................................................................. 0 19 51 85 +7164 PF07336 DUF1470 Protein of unknown function (DUF1470) Moxon SJ anon Pfam-B_19432 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 180 residues in length. Members of this family are found in Streptomyces, Rhizobium, Ralstonia, Agrobacterium and Bradyrhizobium species. The function of this family is unknown. 23.40 23.40 23.40 23.50 23.20 23.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.68 0.71 -3.82 101 865 2009-01-15 18:05:59 2003-09-16 13:55:16 6 4 373 1 337 872 14 127.80 18 66.25 CHANGED usphuLDhlNTsshts.......stsh-tLsss..ssltsWlttpshhsssssstttt...............httst...sLR-...slpplhput...tsspt...........slstlNphltpss.stspLst........t..ththphttsssss..tshlss...lAt.shspllsssph ...............................................phsl-hlNThh..h.t...........tt.hDhLsss........pslttWh.....p....tt..s.hsss..ttssttth..................................................tth+.......plR-...slctlhput..........sstt.................slsh.lNphltpss..stsplst......................ht.ht.....h.h.ssss..sh...........sth..lus...lAt..shhpllst...h........................................ 0 130 251 303 +7165 PF07337 CagY_M DC-EC Repeat Yeats C anon Yeats C Repeat This repeat is found in the CagY proteins - part of the CAG pathogenicity island - and involved in delivery of the protein CagA into host cells ([1]). It forms part of a surface needle structure, and this repeat may form an alpha-helical rod structure ([1]). A conserved -DC- and -EC- can be seen in regularly spaced in the alignment. 20.70 20.70 21.50 20.70 20.10 20.00 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.49 0.72 -4.46 13 2220 2009-01-15 18:05:59 2003-09-16 13:58:32 6 49 41 0 30 2236 1 32.50 51 48.99 CHANGED +AahDClspA+sEpE+p...EC.KLLocpt+chLpc ..+AYhDClspA+sEpE++...EC.KLLosEt+KhLpt......... 0 29 30 30 +7166 PF07338 DUF1471 Protein of unknown function (DUF1471) Moxon SJ anon Pfam-B_19452 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 90 residues in length. Some members of this family are annotated as ydgH precursors and contain two copies of this region, one at the N-terminus and the other at the C-terminus. The function of this family is unknown. 21.40 21.40 21.70 21.90 21.30 21.00 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.39 0.72 -4.32 81 6384 2009-01-15 18:05:59 2003-09-16 14:09:38 8 3 551 5 450 1630 15 55.80 31 59.41 CHANGED slpshGslossut....so.s-lpptluc+A-ppGAphYhIhptps....ssphcuoAtlY+ ..............hp.hG.slSsout....uo.s-hcptluc+AccpGAssYpIsphpp.......sss.....hcuoA.lYK.......... 1 15 79 263 +7167 PF07339 DUF1472 Protein of unknown function (DUF1472) Moxon SJ anon Pfam-B_19493 (release 10.0) Family This family consists of several Enterobacterial proteins of around 125 residues in length and contains 6 highly conserved cysteine residues. The function of this family is unknown. 19.60 19.60 19.90 20.30 18.90 18.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.56 0.72 -3.70 2 74 2009-01-15 18:05:59 2003-09-16 14:16:46 7 3 48 0 0 59 0 78.20 61 47.89 CHANGED AWPCGFSshWP.pRVRAVPCLHLSRAGtDARVRFAAAVTRSLLPVCRDFPVVHPLRFRGLTLQLPsAVCVRLRLPLRPVHPRLIARLLWRHGTARCRthC- .....................................AWPCGFSVMWP.pRVRAVPCLHLSRAGhDARVRFAAAVTRSLLPVCR..DFPVV+PLRFRGLTLQLPsAVCVRLRLPLR.....PhhP.tL........................................... 0 0 0 0 +7168 PF07340 Herpes_IE1 Cytomegalovirus IE1 protein Bateman A anon Pfam-B_22587 (release 10.0) Family Expression from a human cytomegalovirus early promoter (E1.7) has been shown to be activated in trans by the IE2 gene product. Although the IE1 gene product alone had no effect on this early viral promoter, maximal early promoter activity was detected when both IE1 and IE2 gene products were present [1]. The IE1 protein from cytomegalovirus is also known as UL123. 19.60 19.60 19.60 19.60 19.30 18.60 hmmbuild -o /dev/null HMM SEED 392 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.20 0.70 -6.02 2 150 2009-01-15 18:05:59 2003-09-16 14:24:06 6 2 14 0 0 143 0 212.90 60 59.56 CHANGED MESSu.KRKMDssNPDEGPSSKlPRPETPVoKAssFLpoMlpKEVNSQLsLGDPLFP-luE-sLKoFEcVTc-CsENPtKDlLtELVKQIKVRVDhVRp+lKpHMLpKYTQh-EKFTuAFN.MGGCLQsALDILDKVpEPFE-MKCIGlTMQsMYENYlVsE-pR-hWhtClK-LHDVuKsAAsKLGsALpAKApAKK-ELpRKMhYhsh+plEFFTKNSAFPKTTNGsStAhAALQsh.QCSP-ElhsaAQ+IhKhLDEERDKVLhHIDpIFMDILTTCVETMsNEYKVTSDAsMMTMYGuISLLoEFCRVLSCYlLEEoSVMlA+pP.ITK.-llSsMpRRIpEICM+VFAQYlLGsDPLRVCSPSV-DLRAIAEESDE-EAIsAashAT .................................ssPt.GsS.s........K.s+.-s..h..p.pAstaLpphLttEhp..s.lsLGDPLF...s.hs.pp..hcohEplhppt.pss..........................................................................TMQsMYENYIVPEDK..REMWMACIKELasVoKGAANKLGGALpAKARAKKDELcRKMhYMCYRNlEFFTKNSAFPKTTNGCSQAMAALQNLPQ.C.S.P.DEIMs........................................................................................................................................................................................... 2 0 0 0 +7169 PF07341 DUF1473 Protein of unknown function (DUF1473) Moxon SJ anon Pfam-B_19856 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. Members of this family seem to be found exclusively in Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 25.00 25.00 38.10 38.00 21.20 18.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.99 0.71 -4.69 2 128 2009-01-15 18:05:59 2003-09-16 14:28:22 6 1 27 0 8 89 0 145.10 72 98.82 CHANGED MRYKhKILT+sKTacYsLKslPhYEWDpVLGF.sppDt.l.KLN-lphL+EITpLMIS.tFLDEFY.ILsppRca.phYK.hLshIlahsQashFph.pshKKPuLVYlppapspsGDalpaDYIsEpapY-hlhTS.pS.s.Np............pEhVsc ...MRYKMKILTKNKTYEYPL+VLPVYEWD+VLGF.NQSDA.lhKLNEVKYLREITSLMISPKFLDEFYlI.LDpNREFISYYKDYLVAIIYTAQFNTFHlDNDLKKPALVYLSEYENNVGDFVsFDYIN.ENF-YEKVsTSLoSso.NSp-................Lhsh......... 0 5 5 5 +7170 PF07342 DUF1474 Protein of unknown function (DUF1474) Moxon SJ anon Pfam-B_19882 (release 10.0) Family This family consists of several bacterial proteins of around 100 residues in length. Members of this family seem to be found exclusively in Staphylococcus aureus. The function of this family is unknown. 21.40 21.40 21.50 56.10 21.30 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.40 0.72 -3.78 5 167 2009-01-15 18:05:59 2003-09-16 15:10:26 6 1 114 0 3 77 1 96.30 55 95.33 CHANGED MNWEIKNLhsDLEVLKEKFEDLKDsHGWHFEEhYsHEPNHsLNKDEhI+EGsSYHERRIHN-QMhDLhHlYhcpFDcIlcKFcEIEKASS-.sFG-cSDDA .MNWEIKsLMCDlEllK-KlpDlsspHuWFsE-hFs...Ncl.oKcEhIsauhSYhEHRIpN-phh-LhplYLK-FspLIpKF+EIEKASS-..sFu-.SDDA................ 0 0 0 3 +7171 PF07343 DUF1475 Protein of unknown function (DUF1475) Moxon SJ anon Pfam-B_19887 (release 10.0) Family This family consists of several hypothetical plant proteins of around 250 residues in length. Members of this family seem to be found exclusively in Arabidopsis thaliana. The function of this family is unknown. 21.10 21.10 21.50 21.20 20.30 19.00 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.06 0.70 -5.03 2 48 2009-01-15 18:05:59 2003-09-16 15:13:07 6 2 25 0 21 46 101 187.80 39 93.19 CHANGED MAssu.lssh+sVhsshhsLMLuTLVYThlTDG.Ph..R.-lhTPWhVsTllDFYlNlssIusWllYKEssW.uShhWslLLhhFGSlsTCsYlhlpLhclpsptsSpDPh.hLhLR....pGsh.ccKsShVlhuRhlFuhLGshMhusllYTshT.G.PF+h-LLhPWMsshLlsFYIsVhslSVWVsaKESshI.shlWlsLLIshGSlsTsuhIVlQLFplS.hDPlYhVL...Lps+sK.lNu..Gph. ........................................................h.hh+slhsshhhlMlu.sLlYThhT...DG.Ph..R.-lh..T..PWhssTllDFYlNlhslusWlhYKEssa.uuhhWhlhLhhhGShsTssYlh.phhclpsttsspsPh.hlhlR....pss..ppKp...VhhuRhlFuhLGhlMhuslsYTsh..TsG.PF+...h-.....LL..sPWMssoLlDFYIsVhslSVWVsaKESohIsshlWlsLLIshGSlsTsuY.IllQLhplS.tDPlhhVL....................tt..................... 1 9 15 18 +7172 PF07344 Amastin Amastin surface glycoprotein Vella Briffa B anon Pfam-B_19245 (release 10.0) Family This family contains the eukaryotic surface glycoprotein amastin (approximately 180 residues long).In Trypanosoma cruzi, amastin is particularly abundant during the amastigote stage. 23.60 23.60 23.80 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.28 0.71 -4.61 76 430 2012-10-03 00:20:40 2003-09-16 15:13:54 6 2 19 0 67 443 0 156.20 30 75.97 CHANGED FlAFl........hVLVuT.PlDM..F.Rhpst.......s..psClTLWG..hKpsCpsspYshpssth.atsC....ssRhppF+sApAFAlISIhlauAAhlhGhl....hLhCC..............................................shhRhlCLsLNllGslTlslVWAsMsssYth........-sthC.s..thp..................pth.paGu..................GFuLhlsAWlL-llNIlhL ..............................................hlAFlFVlluT.Pls........F.+sctp.........ss........psClTLWG........hK........pcC.p.s.sp.Y..phshsph.at......pC........s..shhphFRh..ApAFuIISIhlhhuAhlhGhh.....hhh.s.h..............................................psh+..h..hs..hh...LslluhlTs..slVWssMsshYp.............ss.C.s.......thp..........................pth.paGs....................GFsLhVhu...WsLphlshhh..................................................... 0 54 59 67 +7173 PF07345 DUF1476 Domain of unknown function (DUF1476) Moxon SJ anon Pfam-B_19680 (release 10.0) Domain This family consists of several hypothetical bacterial proteins of around 100 residues in length. Members of this family are found in Bradyrhizobium, Rhizobium, Brucella and Caulobacter species. The function of this family is unknown. 25.00 25.00 25.70 25.60 23.20 22.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.29 0.72 -3.78 53 246 2009-01-15 18:05:59 2003-09-16 15:17:52 6 2 219 2 87 209 311 101.80 49 93.76 CHANGED M.ToFDDREcAFEsKFAHDpEhpFKApARRNKLLGLWAA-hLGhoGs-A-AYAppVVpADFEEuGD-DVhRKltuDLss.tuht.s-spIRs+MschhspA.ctQl ........................M.TshDDREcAFEpKFAhDpEhcFKAcARRNKLLGLWAA.Ep.LGhsss-A.-AYA+-VVtADFEE....AG....DEDVhRKVpuDLsu.tGlsho-ppIRt+MtphhtpAhpQl.............. 0 28 56 65 +7174 PF07346 DUF1477 Protein of unknown function (DUF1477) Moxon SJ anon Pfam-B_19762 (release 10.0) Family This family consists of several hypothetical Nucleopolyhedrovirus proteins of around 100 resides in length. The function of this family is unknown. 21.60 21.60 84.20 83.90 21.20 19.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.71 0.71 -4.06 19 41 2009-01-15 18:05:59 2003-09-16 15:20:19 6 1 40 0 0 37 0 112.00 31 96.21 CHANGED M.......................pt.t.phpsutsp...........sltshss......su+phFYphshs....alpph.sssphshsTltslhDtIIphEpslFs+ShVLNhllsFLlspSDGss.lQstl.splLsaLLpKYh ...................h...........th.pstsp...........slsslhs.....psu+phFYplshu....alpphh.hsspssl.TlpshhDtIIphEcslFs+ShlLNhlVsFLlspSDGss.lQstl.splLsaLLpKY................ 0 0 0 0 +7175 PF07347 CI-B14_5a NADH:ubiquinone oxidoreductase subunit B14.5a (Complex I-B14.5a) Vella Briffa B anon Pfam-B_19436 (release 10.0) Family This family contains the eukaryotic NADH:ubiquinone oxidoreductase subunit B14.5a (Complex I-B14.5a) (EC:1.6.5.3). This is approximately 100 residues long, and forms part of a multiprotein complex that resides on the inner mitochondrial membrane. The main function of the complex is the transport of electrons from NADH to ubiquinone, accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space [1]. 25.00 25.00 27.60 27.30 19.70 17.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.30 0.72 -4.22 9 115 2009-09-11 15:22:33 2003-09-16 15:42:13 7 1 89 0 77 113 0 95.00 41 80.28 CHANGED o.hlQ+lRsFLLGR..caphsLRapDtlucRTQPsPpLPcGPuHKLSuNYYspRDuRREVsPPlsl.hpspKtLhAtpsuuht......p+hssPG.psasW- ...........o.hlpplRsah.G+...ptph..tLRap-..lupR..TQPPPpLPsGPuH..KLSsNYYhTRDuRREshPPhll..hss...p.Kt.Ls..us...ps....s.....tp..tss...s...p+ssoPu..hhtW.......................................... 0 25 31 55 +7176 PF07348 Syd Syd protein (SUKH-2) Vella Briffa B, Zhang D, Aravind L anon Pfam-B_19909 (release 10.0) Family This family contains a number of bacterial Syd proteins approximately 180 residues long. It has been suggested that Syd is loosely associated with the cytoplasmic surface of the cytoplasmic membrane, and that interaction with SecY may be involved in this membrane association [1]. Operon analysis showed that Syd protein may function as immunity protein in bacterial toxin systems [2]. 25.00 25.00 26.00 25.50 24.60 24.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.71 0.71 -4.94 43 745 2012-10-01 20:46:44 2003-09-16 16:03:31 7 2 729 2 81 305 25 173.90 60 95.43 CHANGED lppALpsFhppYhptappppuphPtsth..s....hsSPCl...........psp-stlhWpPl..........+s.sschsslEcAL-lpLHssIpsFasshauuchtupap......stplpLLQlWsc-DFp+LQcNllGHLlMp++LKpssTlFIussss.-tpllolsNhoGpVhLEphGppp+chLAssLspFLspLpP .......T.AQAL+sFTsRYCDAWpEcHtSaPlSEELYG....VPSPCIl..........uoo-DAVaWQPQ.........PF.s.u.E.pNlNAVERAhDIslQPsIHsFYTT....QFAGD.MpAQFu......Dh+LTLLQsWSEDDFcRVQENLIGHLVTQKRLKLsPTLFIAThEp.EL-VISVCNLSGEVhpETL.......GT+pRThLAusLAEFLsQLcP.......... 0 9 25 51 +7177 PF07349 DUF1478 Protein of unknown function (DUF1478) Moxon SJ anon Pfam-B_20105 (release 10.0) Family This family consists of several hypothetical Sapovirus proteins of around 165 residues in length. The function of this family is unknown. 25.00 25.00 25.10 25.10 20.20 18.40 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -10.94 0.71 -4.86 9 46 2009-01-15 18:05:59 2003-09-16 16:04:07 6 2 46 0 0 37 0 144.80 55 98.42 CHANGED MA.Ps.pspcplsth.hhLT......RLspLsRPHPhLLhLIRNNPMGP+sAW+WLL.LsQSN.MSLpQYuTALQShVLLLGhhsCPRELhhDLa+FIPTLh+Th.IsptCG.uGsssLRssapSLuLssLLusSLLLShHpGLh.R.SshpsCCLTLpLMLA.hsQFh. .........................MAPs.pspphhsth.hhLT......RLAQhlRPHPhLLhLIRsNPMGPHsAWpWLL.LVQSN.MSL+QYATALQSFVLLLGTTsCPRELh.hDLaRFIPTLT+T.hhshtCG.GGsssLRsshpSLsLushLusSLLLS...thh...ssh.....h........................... 0 0 0 0 +7178 PF07350 DUF1479 Protein of unknown function (DUF1479) Moxon SJ anon Pfam-B_20226 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins, of around 420 residues in length. Members of this family are often known as YbiU. The function of this family is unknown. 19.60 19.60 19.70 19.80 19.50 19.50 hmmbuild -o /dev/null HMM SEED 416 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.38 0.70 -5.99 29 709 2012-10-10 13:59:34 2003-09-16 16:07:15 7 6 609 3 194 467 337 392.90 54 91.46 CHANGED ssL.Ps....papplKppLh...sp.....ctlhtoWc+lltslpcclcplpptGss..slPplsFsDItssphs........sphttpl+cRGssVIRuVhsccpAhpWpp-ltcYlcpN...........sp.htshs..sscPplYplYWSpsQlcARtHPphhtsppFhs.pLWp........st..sspshhshcpslsYADRlRhR.PGss.........................................................................phuLusHlDuGSlERWp-csY..pplYcplFc..GcWE.caDPa-A...spRssupppha.......tususCShFRoFQGWhALSshtPspGTLplhPlsc.uhAYhlLRPhFssss........................................................................sschsGAhPGpu.phssph.HPcLpL.cshlsIPclpsGDhVaWHCDllHuV-stHpGpssSsV......................hYIPusPhstpNstYltcQRcuFlpGpsPPDFstts.................EssatGRss.pc..lsph.....GhpuhGl ............................c....AAIRphKpuLRAplG.D....sQtlFspLscsIAspVsE...Issl...+ApGps...................VhPhlsauDI..t.sGp.lo........spp+tpIKRRGCAVI+GhFPREQALu..WcpshlDYLD+N+FDEh...............h+ustDs.aFGoLu..AS+PpIYslYWSpAQhpARQSccM.s.t.AQsFLN.R.LWp..................hE...pDGKpa.........FsPDhsllYsDRIRR.RPP.GoT.........................................................................SpGLGAHsDSG.uLERWLhPuY...p+VassVFs...Gs..hp....p............YDPWcA...AHRT-VEEash...............csospCSVFRTFQ..................GWTALSD....h.hP........G......pGhLHVlPIPc..AM.AYlLLRP...LhDDVP.........................................................................-D-LCGsAPG....Rs....L....sl....SE.p....W..HPL..Lh.cALoSIPpLEAGDoVWWHCD................VIH....S.Vs......sVc.NtpG.auNV.........................................................MYIPAA.PhCEK.NlAYs++.+sAhpcGsSPsDFPp-D....a.............EosacGRh.TltD..Lshc.....GKRALGh............................................... 0 48 99 165 +7179 PF07351 DUF1480 Protein of unknown function (DUF1480) Moxon SJ anon Pfam-B_20253 (release 10.0) Family This family consists of several hypothetical Enterobacterial proteins of around 80 residues in length. The function of this family is unknown. 25.00 25.00 43.00 42.80 18.70 18.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.65 0.72 -3.99 6 520 2009-09-11 08:41:35 2003-09-16 16:08:45 8 2 509 0 27 99 0 76.60 83 99.24 CHANGED MsKTsV+IuuFEVDDApLSSss.cs.-pTlSIPCKSDPDLCMQLDGWDEpTSIPAlLDGKcpLLY+pHYD+ppDAWVMRls .........KTSVRIGAFEIDDuELHGES.PG..-RTLTIPCKSD...PDLCMQLDAWDAETSIPAlLNGEHSVLYRT+YDQQSDAWIMRL.A... 0 1 6 17 +7180 PF07352 Phage_Mu_Gam Bacteriophage Mu Gam like protein Moxon SJ anon Pfam-B_19455 (release 10.0) Family This family consists of bacterial and phage Gam proteins. The gam gene of bacteriophage Mu encodes a protein which protects linear double stranded DNA from exonuclease degradation in vitro and in vivo [1]. 24.70 24.70 25.20 28.10 23.90 24.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.71 0.71 -4.68 27 345 2009-01-15 18:05:59 2003-09-16 16:14:02 7 1 306 2 48 280 5 146.00 27 85.80 CHANGED sh-plshsl+clu-lppchpclcsthscclscIc-thpsph...........cslpsclphlpptlpsascsp+sEhs...ctKohphshGplpaRtppsssth....sh-sllctL.+ph....Gh.pca...........I+scE-lsKcslhppsc.............sspslsGlplppt.-sFtlcs ..................................sh-plshsl+clu-lppcht+lps.......th.......scplscIpcphssph...........csLppclchlppslpsascsp+cEhs...........cpKohshshGclsaRhpssssph.p......sh-sllchL.+ph...........GL...pca............I+s.KE..ElsK-Alhpp.c..............sstslsG.lplhpt.-sFhlcs................................................................................... 0 27 44 47 +7181 PF07353 Uroplakin_II Uroplakin II Vella Briffa B anon Pfam-B_19993 (release 10.0) Family This family contains uroplakin II, which is approximately 180 residues long and seems to be restricted to mammals. Uroplakin II is an integral membrane protein, and is one of the components of the apical plaques of mammalian urothelium formed by the asymmetric unit membrane - this is believed to play a role in strengthening the urothelial apical surface to prevent the cells from rupturing during bladder distension [1]. 20.40 21.30 20.40 21.30 19.30 21.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.31 0.71 -4.94 4 38 2009-01-15 18:05:59 2003-09-16 16:33:11 7 1 28 0 18 34 0 160.30 65 91.72 CHANGED MASsLPVpTLPLILILLAVLuPGAA.DFNISSLSGLLSPALTESLLVALPPCHLTGGNATLMVRRANDSKVV+SSFVVPPCRGRRELVSVVDSGSGFTVTRLSAYQVTNLsPGTKYYISYLVpKGsSTESSREIPMSTLPR+NMESIGLGMARTGGMVVITVLLSVAMFLLVlGhIlALALGARK ..................................Mss.hPl.TLP.hhLlLLslLuPGuA....-FNISSL...SGLLSPALTESLLVALPPCHLTGGNATLhVRR..ANDSKVVpSuFVVPPCRGRRELVSVVDSGuGFTVTRLSAYQVTNLsPGTKYYlSYhVpKG...suTESSp....ElsMSTLPR+phEoIsL.GM.ARTGGMVVITVLLSVAMFLLVlGhIlALALGs+K.................................................. 0 1 2 4 +7182 PF07354 Sp38 Zona-pellucida-binding protein (Sp38) Vella Briffa B anon Pfam-B_19996 (release 10.0) Family This family contains a number of zona-pellucida-binding proteins that seem to be restricted to mammals. These are sperm proteins that bind to the 90-kDa family of zona pellucida glycoproteins in a calcium-dependent manner [1]. These represent some of the specific molecules that mediate the first steps of gamete interaction, allowing fertilisation to occur [2]. 21.20 21.20 21.20 23.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -11.82 0.70 -5.75 5 94 2009-01-15 18:05:59 2003-09-16 16:59:07 7 5 32 0 42 97 0 231.60 49 65.01 CHANGED MDh+LuscElVDPsYpWpGPsG+sLoGNSplNITsTGpLlLpcFpESLSGlYTCTLSYKllcApTQEEssl+cpY+FhVYAYREPcYsYQhoVRFTA+uCsutYNssFhRtLKKIL-sLISDLSCcVptPSY+CHSVKhP++GL.sELFlsFQVNPFAPGWcshCsu.StDCEDoTN+plpKA+DRIE-FFRpQsYIL+HpFps.LPsIHYVEGSLQVVhIDsCRPGFG+NplpHssCAuCCVVCSPGTYSPDsuloCpsCsSuLl..YGAKoCP .............hs.cLpppEllDPoa.WhGPptKhlo........tNsphpITpTGp.LhhpsF.EshSGlYTChLpYKs....T.EEhhhphphcah..laAYREPcY.YQhssRapst.sCtuhaN..F.+hLhpILspLl.DLSCcl...p.cCH.pVch.+tGL.pELFhsF..pVs.shssthtst.sst.shsCE....s.pp..l.p.A+shIEcFFppQs.....lht+php...lPthaal-t...ohQhVhlspChPGaGhN.hhHspCspCC.VlCSPuoasPcsslpC.pCs..osh...h..YGAKsC............ 0 4 5 12 +7183 PF07355 GRDB Glycine/sarcosine/betaine reductase selenoprotein B (GRDB) Vella Briffa B anon Pfam-B_19711 (release 10.0) Family This family represents a conserved region approximately 350 residues long within the selenoprotein B component of the bacterial glycine, sarcosine and betaine reductase complexes. 19.40 19.40 20.00 19.70 19.20 19.30 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.07 0.70 -5.52 12 440 2009-01-15 18:05:59 2003-09-16 17:22:43 7 2 262 0 72 365 148 241.10 30 90.78 CHANGED Mt.hKlVHYINQFFAGlGGE-KADhtPclt-u..sGsuhtLsthlcscAElltTVICGDSYas..ENh-cAppplLchlcphpPDlhlAGPAFNAGRYGsACGsIsKhVp-cLsIPulTuMYhENPGs-haKKslYllpTusSAAuMRculPthAcLAhKlhKGE.cIGsPpcEGYhsRGIRhNaFtE-R..GucRAV-MLlKKLpGEpFpTEaPMPsFDRVsPssAl+DlSKAKIALVTSGGIVPKGNPD+IESSSAS+YGcYDIsGhccLostsaETAHGGYDPsaANtDPsRVlPVDVLR-hEKEGhIGcLHchFYoTVGNGTuVAsuKpaupEhstcLhpsGVDAVILTST ...................................................................+hlhhlsp..uthGu--pAph..thcpt..hGsuh.h....h.t.t.....hclhsTlhCGDpah...pp.-p.s.tphhthhpphpsDhhlsGPuhphspaG.hsutlst...pthslPsl..suM..E.N.s.u..h.phahp..p..h.Ilp.h.tps.uhshppsh.thst.hs.thhp.tc.thh.....................................................................h..t.htphplAlhTsuG.l.h.....h.t...t.......cp.....s.st.pat.h.h.......t..............t.ph.s.Hu..GaD.s.s.s.tD.shhhPlDhh+chtpcGhI.ttlh.hah..shh.Gs..G.s.s.....t.p.t.p.ph.u.tlst.LhtttVDuVlhsu.............. 0 38 58 65 +7184 PF07356 DUF1481 Protein of unknown function (DUF1481) Moxon SJ anon Pfam-B_20042 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 230 residues in length. Members of this family are often referred to as YjaH and are found in the Orders Vibrionales and Enterobacteriales. The function of this family is unknown. 25.00 25.00 25.80 25.30 19.40 19.40 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.25 0.71 -4.85 17 658 2009-01-15 18:05:59 2003-09-17 11:01:20 7 3 648 0 52 244 1 185.70 59 83.59 CHANGED uuGhstscusshhWhp-ctsps..thhusasshtssuphpocYRWppspL+plpRps.......stsP.pl+lRFspcG-ssa.QhclsGch..LosDQlshYphcAcpllptocsLcpsplhLhQG+Wp..spolpTCpG.phhp.-h-pph.sal.pR...uohcs.lAaLtuscs........ppLLLlsss-a.ChhpPp ...................ASGFAtDpGAVRIWRKDosDpV..HLLuVFSPW+SG.sTTTpEYRWQGDsLoLIplNlY......SKPPhsIRARFDc+G-LSFMQREssGcKQQLSNDQIsLYRYRA-QIRQhSDALRpGRVlLRQGRWH.h-pTVTTCEG.pTlKPDLDSpAIuHIERRQs+SSV-VSVAWLEAPEG........SQLLLVANSDF.C+WQPp...... 0 3 12 32 +7185 PF07357 DRAT Dinitrogenase reductase ADP-ribosyltransferase (DRAT) Moxon SJ anon Pfam-B_20108 (release 10.0) Family This family consists of several bacterial dinitrogenase reductase ADP-ribosyltransferase (DRAT) proteins. Members of this family seem to be specific to Rhodospirillum, Rhodobacter and Azospirillum species. Dinitrogenase reductase ADP-ribosyl transferase (DRAT) carries out the transfer of the ADP-ribose from NAD to the Arg-101 residue of one subunit of the dinitrogenase reductase homodimer, resulting in inactivation of that enzyme. Dinitrogenase reductase-activating glycohydrolase (DRAG) removes the ADP-ribose group attached to dinitrogenase reductase, thus restoring nitrogenase activity. The DRAT-DRAG system negatively regulates nitrogenase activity in response to exogenous NH4+ or energy limitation in the form of a shift to darkness or to anaerobic conditions [1]. 25.00 25.00 69.20 41.30 21.50 16.00 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.73 0.70 -5.20 13 92 2009-01-15 18:05:59 2003-09-17 11:15:03 6 3 79 0 43 88 13 252.50 44 92.94 CHANGED GHSTNLVGlPs-hLuSssFN-cP.hPLHIuGVREMNssLFEMLupApsLt-AG-AFhsYMsAhFGlDsEQptsc.....sssG+..RRFRuSaLRLL+GWGaDSNGsEGAVLKGWVESRFGLFPTFHKpsIs+huosuWtsYVEEKMuSRFHNNuIalQLDLLYEFCQWALsR....assPGc.oHlsLYRGVNsFcEHplltRlD+RpsVlRLNNLsSFSSDR-lAsCFGDpILTs+VPlsKVlFFNsLLPuaPLKGEGEYLVIGG-YRVssSh.l .......thNhsslPshlluShtFNcpP...hsLcIsGVRchpssLFchLsttss.t-tupsFpcYMsshFsLt..p.pt.........stsuc...+Rh+..uSYLRLL+GWsaDSNusEGAVLKGWVESRFGLhPoFH+t.lsphsopAahpYhp-+hsupa+sNuI.sQLDLLYEaCQatltR.....th..Pup..pHlpLYRGsNchsEHpllt.....ch.scR.ptllRLNNLsSFoo-R-hAspFGDhlLcspVPlsKllFFssLLPs.hL+GEuEaLVIGGcYcVcht.h.... 0 16 31 35 +7186 PF07358 DUF1482 Protein of unknown function (DUF1482) Moxon SJ anon Pfam-B_20128 (release 10.0) Family This family consists of several Enterobacterial proteins of around 60 residues in length. The function of this family is unknown. 25.00 25.00 28.10 27.70 23.50 16.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.75 0.72 -4.38 12 929 2009-09-11 12:49:17 2003-09-17 11:20:43 6 1 454 0 29 221 0 57.20 57 86.88 CHANGED hFALVLhVshlsGsspDlllulYsopppChsAtsEQ+lp.GsCaPl-chIc..s..hPAu ..hFALVLhVhhlsGtspDIlVs.VYsTcQpClhuhs-Q+Ip..GsCaPl-chIc...s...hPAt.... 0 2 5 16 +7187 PF07359 LEAP-2 Liver-expressed antimicrobial peptide 2 precursor (LEAP-2) Moxon SJ anon Pfam-B_20235 (release 10.0) Family This family consists of several mammalian liver-expressed antimicrobial peptide 2 (LEAP-2) sequences. LEAP-2 is a cysteine-rich, and cationic protein. LEAP-2 contains a core structure with two disulfide bonds formed by cysteine residues in relative 1-3 and 2-4 positions. LEAP-2 is synthesised as a 77-residue precursor, which is predominantly expressed in the liver and highly conserved among mammals. The largest native LEAP-2 form of 40 amino acid residues is generated from the precursor at a putative cleavage site for a furin-like endoprotease. In contrast to smaller LEAP-2 variants, this peptide exhibits dose-dependent antimicrobial activity against selected microbial model organisms [1]. The exact function of this family is unclear. 21.20 21.20 24.50 23.20 19.60 19.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.85 0.72 -4.32 2 67 2009-01-15 18:05:59 2003-09-17 11:26:34 6 2 48 1 28 57 0 75.00 52 66.92 CHANGED MhpLKLFAVLhhCLLLLuQVsuSPlPp.SSAKRp.RRMTPFWRuVSLRPIGASCRDDSECITRLCRKRRCSLSVAQE ........................lhAhLh.lhLLLls.Ql.suS..P.l........P.......-...........l..........SS.........u.........+...RR.+...RMTPFWRuVSLRPlGASCRDsSEClT+LCR+t+CShs............................................ 0 1 2 8 +7189 PF07361 Cytochrom_B562 Cytochrome_b562; Cytochrome b562 Vella Briffa B anon Pfam-B_18074 (release 10.0) Family This family contains the bacterial cytochrome b562. This forms a four-helix bundle that non-covalently binds a single heme prosthetic group. [1]. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.37 0.72 -3.61 25 795 2009-01-15 18:05:59 2003-09-17 11:35:06 6 2 709 109 71 320 3 102.50 48 80.57 CHANGED s-LcssMpphstshppstcAssspphcpulschcsts.cupptp.Pschcs....p.thpsYpcGhcpLlsplDpApthsppGcLc-AKpuhpclpslRpcYHcKa+ ..........tDL--sM-sLscNh....Kll...p...K.A.DsAsplKsA...Lo+MRuA..AlDAQKus.PPKLEsK....usDSPE....M.KDFRHGFDlLlGQIDcALK.LAsEGclc.EAp...AAAcpLKsTRNsYHcKYR.............. 0 6 22 46 +7190 PF07362 CcdA Post-segregation antitoxin CcdA Moxon SJ anon Pfam-B_20349 (release 10.0) Family This family consists of several Enterobacterial post-segregation antitoxin CcdA proteins. The F plasmid-carried bacterial toxin, the CcdB protein, is known to act on DNA gyrase in two different ways. CcdB poisons the gyrase-DNA complex, blocking the passage of polymerases and leading to double-strand breakage of the DNA. Alternatively, in cells that overexpress CcdB, the A subunit of DNA gyrase (GyrA) has been found as an inactive complex with CcdB. Both poisoning and inactivation can be prevented and reversed in the presence of the F plasmid-encoded antidote, the CcdA protein [1]. 21.40 21.40 21.40 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.24 0.72 -4.11 25 640 2012-10-02 18:44:02 2003-09-17 11:44:48 7 4 509 9 114 324 32 67.90 35 86.53 CHANGED sKKssNlolsu-LlppA+shsINlStslEpuLpcpl+c.......pcscpWpcENccAIcshNchl-ppGhFuD-aRpF ...................ppploVTl-s-LhppA+s....h....s..l.NlSuhlssulppEl+c........pt.s..c..R..W.p..t..E.Np..cu.httlsphh-tpG.auD-.Rsa....................................... 0 27 64 88 +7191 PF07363 DUF1484 Protein of unknown function (DUF1484) Moxon SJ anon Pfam-B_20389 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 110 residues in length. Members of this family appear to be found exclusively in Ralstonia solanacearum. The function of this family is unknown. 21.20 21.20 23.10 22.40 20.90 19.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.32 0.72 -3.71 9 55 2009-01-15 18:05:59 2003-09-17 11:47:25 6 1 19 0 24 52 0 101.50 40 86.61 CHANGED cpppuPQhLAhupp+pLlAQLApphuphs+RstsphAusltpLcssuppIccsTE-uCucLLsVSAGLtGILpLLDLpSDRus-scuLHCLLsPLKpQLDsALsclQcML .................................t..p.sPphLAhsppppLhupLstpss..s++stt.httslpQLcssut.Ipposc-uCApLLsVSuGLsGILpLL-lpS-+uh.-.C+sLHCLLsPLKtpLDpALs-lQcML...... 0 4 13 16 +7192 PF07364 DUF1485 Protein of unknown function (DUF1485) Moxon SJ anon Pfam-B_20495 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 300 residues in length. Members of this family all appear to be in the Phylum Proteobacteria. The function of this family is unknown. 22.00 22.00 23.00 23.00 21.80 20.00 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.80 0.70 -5.22 60 567 2009-01-15 18:05:59 2003-09-17 11:50:56 7 6 358 1 215 587 505 283.50 31 58.09 CHANGED MRlhlAultpETNTFuPs.oshssFp........thhpGsshhpshps.ssssluualchAct...............pGh.-ll....sslhAtApPuGhVscsAaEplps-ILssl+uu....hshDulhLsLHGAMVu-shDDsEG-LLpRlRsllGsclPlussLDLHuNlTppMlppADsllua+pYPHlDhhEsGpcsscllhchl..pGch+PshuhtchPhlhsh...hsTsppPh+shhcthtphEsc......llulSlhhGFshADhs-sGssllshu-.....DtstAcpsAcclupplashRscahs..phhsh-pulscAhs ........................................M+IhlAuhtpETNTFuPs..sshssFt.........httGsch...ht..t....ph....hssshsuhhchs.t.t..................................p.G.h..pll....sslhAtAtPuG....hVscpAaEplpscllstl...css....h..lDulhLsLHGAMls-sh-..Ds..EG-..LLpRlRtllGs....slPlusshDhHuNlotchlppsDllsua+paPHsDh.hEouc+....us.clL.hchL.......c.......u.......c......h.......+..........P.s.huhhclPhlhsh.t..hsTs.t-Ph.+uhhs....tlttlEtc..G...llusSl....hhGFs..h..AD..hPcsGssllls..ss..........Dt.stupthAccLupthhstRppath...shhshcpulspAh.t............................................ 0 35 109 165 +7193 PF07365 Toxin_8 Alpha conotoxin precursor Moxon SJ anon Pfam-B_20562 (release 10.0) Family This family consists of several alpha conotoxin precursor proteins from a number of Conus species. The alpha-conotoxins are small peptide neurotoxins from the venom of fish-hunting cone snails which block nicotinic acetylcholine receptors (nAChRs) [1]. 21.60 21.60 21.60 22.80 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.86 0.72 -4.00 22 253 2009-01-15 18:05:59 2003-09-17 12:37:11 7 1 43 19 0 244 0 41.20 39 81.76 CHANGED TTVVSFTSD.RASDGRNAAAppKsScLluhs.h+.tCCup......PsCtsppsthC.G ......................s....stpsAApp+sscLhshs..h..p..sC..Cup......PsCtssps.hC............ 0 0 0 0 +7194 PF07366 SnoaL DUF1486; SnoaL-like polyketide cyclase Moxon SJ, Bateman A anon Pfam-B_20348 (release 10.0), Pfam-B_4335 (release 18.0) Domain This family includes SnoaL [1] a polyketide cyclase involved in nogalamycin biosynthesis. This family was formerly known as DUF1486. The proteins in this family adopt a distorted alpha-beta barrel fold [1]. Structural data together with site-directed mutagenesis experiments have shown that SnoaL has a different mechanism to that of the classical aldolase for catalysing intramolecular aldol condensation [1]. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.19 0.71 -4.55 29 1134 2012-10-03 02:27:23 2003-09-17 12:44:11 7 18 662 23 389 2430 1653 122.50 20 66.56 CHANGED thhhphatshh....sptch....cthschlsschhtps........sshhGhcuhtthhpthhp.uhPDlph...plcphls-u.D+VssRhphpGoap.GthhG.....hss..oG+plphpthslh+lpc.GKIsEpWshhDhhulhpQL ...........................................................................t...hphh.thh....s...pt.ph.....s.h.h.s.p....h.h.s....s....c..h..h.......p...............................sth.G...h.....p..u...h.t....p.h.h........p.....t....h..h..p.....s.......h......P.....D....hph............pl..c..........p...h...l......s.........p.....u.....-........p..........V.....s......s.....+..h....p.....h.pG..oa.....p....G.....h....h....s....................h.s.s..oG+.plph...p...t.hsh..h+..h..c.s....G..+Is..-pWt.hhDthshhtQl....................................... 0 95 233 320 +7195 PF07367 FB_lectin Fungal fruit body lectin Moxon SJ anon Pfam-B_20370 (release 10.0) Family This family consists of several fungal fruit body lectin proteins. Fruit body lectins are thought to have insecticidal activity [1,2] and may also function in capturing nematodes [3]. 25.00 25.00 36.50 42.20 19.40 23.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.86 0.71 -4.57 14 44 2009-01-15 18:05:59 2003-09-17 12:52:52 6 4 31 32 22 53 0 129.80 40 82.04 CHANGED MSYTI.....plRlhpss..sshhplVE+TsWhYANGGTWo-sc..Gth.lLoMGGSGTSGhLRFp.ssuG-hFhVslGVHNYKhWCDllssLps.-sTulplpPcYY........susRhpsp.tQhushpspstcG+slplpaphs-GNsLpAsls .................MsYsIplclhpsp..sshhplVEpssWpaupGGTWo-ts..Gth.sLoMGGSGTSGhLRFp.sssGEtFhVslGVHNYKpWCDIlssLss.stTultlhPcYY........s..sstRhp.hp.pQhsphphp..s...pGcslthpYpht-GssL.ssl.h............................. 0 7 13 20 +7196 PF07368 DUF1487 Protein of unknown function (DUF1487) Moxon SJ anon Pfam-B_20425 (release 10.0) Family This family consists of several uncharacterised proteins from Drosophila melanogaster. The function of this family is unknown. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.26 0.70 -5.11 7 111 2012-10-02 17:28:28 2003-09-17 12:55:20 6 3 19 0 51 291 82 213.40 36 71.11 CHANGED sWpuPphMllhc-GDlssAhahLlculppPFAsssVAolhVpEoIt-EhlcRl+sph+PLspclupHPsYlpslpclcp..hpschIhu...........psh.ssASPllVh.DhsHpahGs.tPTGllThHTFRshpEssplht+E.sLsFsuVslWsE+lussY-Llstls.sshahlNChsssLp.Ihp.atsppspVllt+saHYEoLhlsGchKhIVaPlushh ...........W.uPpLMllF-sGDlso.Ah.c.h..Ll..p.S..L...p...s..P.F...u..ss..uVA..o..VLlpESIt-pFlpplts...ch........+......P.......L........s......p........p...........V...u....p....H.....P...s.....Y...l....+....o..L......p.....p..lcp.......Lps.csltu....................c.s.h..sp..u...S.....PllVh..D.h...s...H......p...a..L.G....s....GP...T..G.....VlTl.HTFRTspEAsplt.p+E..sLsas.u.VS.lWsE+lussY-LlstLs.sshahlNChsssLsPIhp.atsppspVhltcsYHYEoLhlssch+lIVFPlus....................................................................................................................................... 0 9 10 32 +7197 PF07369 DUF1488 Protein of unknown function (DUF1488) Moxon SJ anon Pfam-B_20604 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 85 residues in length. The function of this family is unknown. 21.20 21.20 21.50 21.20 21.10 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.62 0.72 -4.14 70 961 2009-01-15 18:05:59 2003-09-17 13:14:29 6 1 762 1 202 509 12 78.20 37 90.60 CHANGED sIpFssp...saDssppslpFsuhssGtplpCtlospsLpch.st...tssp.....ppphlssFcptRtcIEchAcphlpp....sststlhLp .................tItFPsp..tpastsppslhFsAhlsGhplsCAIos-uLt.+F.......suss.....scp..hLuuFcpaRaDlEEtAEsLIpc.p..scpGhlhL......................... 0 18 60 126 +7198 PF07370 DUF1489 Protein of unknown function (DUF1489) Moxon SJ anon Pfam-B_20654 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 150 residues in length. Members of this family seem to be founds exclusively in the Class Alphaproteobacteria. The function of this family is unknown. 25.00 25.00 32.20 31.80 18.30 17.50 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.79 0.71 -4.65 54 250 2009-01-15 18:05:59 2003-09-17 13:18:14 6 1 248 0 84 197 185 137.40 50 96.12 CHANGED LlKLsVGs-Sl--LpsW.sp+....ttts.ss.shHlTRMhPKRtsElLs.GGSlYWVIKGhltsRQpllsl-phsssDGIpRCslVL-PcllcspspP+RPFQGWRYLpspDAPsDLs.tucsst.ssLPscLpppLs-LGll ..........LlKLsVGs-Sl-DLtsWhspR........pst...uh..s..spth.HsTRMhPKRt-ElLs.GGSLYWVIKGplpsRQ+llcIcshscu-GIsRCpLVL-Pcll.spspP+RsFQGWRYLpsp-APtDLs....sucuut...ssLPscL+pELscLGLL... 0 23 54 64 +7199 PF07371 DUF1490 Protein of unknown function (DUF1490) Moxon SJ anon Pfam-B_20678 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. Members of the family seem to be found exclusively in Mycobacterium species. The function of this family is unknown. 21.00 21.00 21.30 21.70 20.80 20.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.85 0.72 -4.03 8 169 2009-01-15 18:05:59 2003-09-17 13:37:13 7 1 75 \N 23 55 1 90.00 60 95.79 CHANGED MVhaGLLAKAusoVVTGlVGVuAYEsLRKAluKAPLRpsAVoustLGLRGoRKAE....EAAESARLKlADVMAEARERIGEEsPsPAluDs.c- ........................MshashLAKAssTVlTGLVGVsAYEsLRKAluKAPLRpuuVossAhGLRGTR+AE....EAAESARLpVADVlAEAR..ERIGEEuPsPAlucsc........... 0 8 13 19 +7200 PF07372 DUF1491 Protein of unknown function (DUF1491) Moxon SJ anon Pfam-B_20742 (release 10.0) Family This family consists of several bacterial proteins of around 115 residues in length. Members of this family seem to be found exclusively in the Class Alphaproteobacteria. The function of this family is unknown. 25.00 25.00 42.00 42.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.27 0.72 -4.11 53 260 2009-01-15 18:05:59 2003-09-17 13:39:21 7 2 258 1 89 215 166 106.20 37 92.50 CHANGED LsuchWVsAhlRRlpstGhsAhVhp+GDcsAGAVlVK.lssh-GpApLappuhph........sGsRtWhhh....ssssEs-lDstlsRppcFDPDLWllElED.+pGc+hL-ps ......LpochWVuAhlRRlpssGs..Ahlt++Gss-AGAlhlK.lss..h.sGpssLau.usps............sG-Rhahph......pssc-tsscstlpRch+FDPDLWlVElED.cpupchh..t....... 0 25 58 68 +7201 PF07373 CAMP_factor CAMP factor (Cfa) Moxon SJ anon Pfam-B_20762 (release 10.0) Family This family consists of several bacterial CAMP factor (Cfa) proteins which seem to be specific to Streptococcus species. The CAMP reaction is a synergistic lysis of erythrocytes by the interaction of an extracellular protein (CAMP factor) produced by some streptococcal species with the Staphylococcus aureus sphingomyelinase C (beta-toxin) [1]. 23.30 23.30 24.20 24.10 23.00 21.30 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.55 0.70 -4.85 9 517 2009-09-11 06:24:37 2003-09-17 13:44:42 6 3 137 \N 15 141 1 226.20 41 81.27 CHANGED spsos.ts...pp..tstptcphhptLNtchspLpsh.pcslpGo-.....htcplschlcssccLKsulcs.sc...shYDhsSIssRVEhlsssl-sIphuTpsLpsKVppAHl-hGhuITKhlIhllsPhuoscplcsplsslKth.tKV.sYPDLpPTDtATlYsKsKLsKtIWpsRhsRDppVLshKshcVYptLNKAIT+AsGVphNPpsTVtpVDptlpsLpsAhQTALc ............................................................................ss...........tpshsss-AcptlptlNuRIspLpcs.QKss.uSp............ht-pIpcLLc.s.....AhcL+ssl-sls+...G..sl...shhDh.soIssRVcLlssssDTIpsAspTLQsKVpsAHs-lGLpIs+AlllhlsPsSTssQLp-EhAAlKstls+lpsYPDLpPsDsATlYsKspLs+tIhQlR.....hsRsppl...lshKstsshctLN+AIo+AsuVphNstsTVupVDpAlppLcAAYQsALp................................ 0 13 14 15 +7202 PF07374 DUF1492 Protein of unknown function (DUF1492) Moxon SJ anon Pfam-B_20776 (release 10.0) Family This family consists of several hypothetical, highly conserved Streptococcal and related phage proteins of around 100 residues in length. The function of this family is unknown. It appears to be distantly related to Pfam:PF08281. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.28 0.72 -3.90 6 341 2012-10-04 14:01:12 2003-09-17 13:47:25 6 2 248 0 28 243 4 92.40 22 68.36 CHANGED LLSSPKWpsDKVQGGp++KhDDVYl-Llsh....KEsIEpcTsEAIp++lELp+hIspLcNscSRolLpMVYIsKhssaplhDcLshS+oTYY+hh+.Ah+ELs ...........................................h.t.............................t....h.hh.h.....p......h.p..ph..tphhp....pp.h...c..lp...p.hIs.c.Lt......s..spp...R..p..lLp.hhYl..s..c..hs..h....p..lsccls.hS.csohYcl+pcAlpcL...................... 0 11 19 22 +7204 PF07376 Prosystemin Prosystemin Moxon SJ anon Pfam-B_20835 (release 10.0) Family This family consists of several plant specific prosystemin proteins. Prosystemin is the precursor protein of the 18 amino acid wound signal systemin which activates systemic defence in plant leaves against insect herbivores [1]. 25.00 25.00 49.00 283.70 24.40 18.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.91 0.71 -11.58 0.71 -4.92 3 8 2009-01-15 18:05:59 2003-09-17 13:55:29 6 1 7 0 0 9 0 197.50 82 99.87 CHANGED METPSYDIKNKGDDVQE..KTKL+HEKGGDE+tKIIEpETPSQDIpNKs........DDAQphPKVEHEEGGstKEK.lEKETlSpCIIKhEGDDAQEKlpVEYEEEEh.KEKIVEKETPSQDIuNKGDDAQEKPK.......................................................................VEHEEDGDEKETPSQDI.KIEGEDAQEIPKVECEERE...KIVIRVDLAVHSTPPSKRDPPKMQTDNNKL .......MtTPSYDI.KNKGDDhQEE.KVKLHHEKGGDEKEKIIEKETPSQDINNKDTISSYVLRDDsQEIPKhEHEEGG.sKEKIVEKETISQhIIKIEGD.DAQEKLKVEYEEEEYEKEKIVEKETPSQDINNKGDDAQEKPKVEHEE.GD-KETPSQDIIKhEGEGALEITKVVCE......KIIVRtDLAVpSpPPSKRDPPKMQTDNNKL 0 0 0 0 +7205 PF07377 DUF1493 Protein of unknown function (DUF1493) Moxon SJ anon Pfam-B_20460 (release 10.0) Domain This family consists of several bacterial proteins of around 115 residues in length. Members of this family seem to be found exclusively in Salmonella and Yersinia species and several have been described as being putative cytoplasmic proteins. The function of this family is unknown. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.53 0.72 -3.97 18 511 2012-10-02 01:16:24 2003-09-17 13:59:56 7 2 255 0 69 887 434 104.30 38 92.53 CHANGED M..sslppclh-hl+cchGshh.....tchpLT.so-LcpDLplstsDsc-LhpcFhccFsV-husFphppYF.P..pssh..hhs.hc+p......csc.l..PlTluMLhcSA+AG+WLYD .......................................-slppplhcLlRpph.s..h..Y.lh....KphpLo..-oDLpp.D.Lplst--sp-LMscFFccF.....NV-tusFphpsYF.P.........p...Ph........h..p....F++p.............-...s...shTIuMLl-SA+AG+WLY............................................................... 0 5 17 48 +7206 PF07378 FlbT Flagellar protein FlbT Moxon SJ anon Pfam-B_20574 (release 10.0) Family This family consists of several FlbT proteins. FlbT is a post-transcriptional regulator of flagellin. FlbT is associated with the 5' untranslated region (UTR) of fljK (25 kDa flagellin) mRNA and that this association requires a predicted loop structure in the transcript. Mutations within this loop abolish FlbT association and result in increased mRNA stability. It is therefore thought that FlbT promotes the degradation of flagellin mRNA by associating with the 5' UTR [1]. 19.50 19.50 20.80 25.10 19.30 19.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.38 0.71 -4.28 49 266 2009-01-15 18:05:59 2003-09-17 14:07:26 6 2 220 0 105 208 18 123.60 35 80.56 CHANGED L+lpL+PtERlllNGAVlcNuDR+spl.l.ss.sslLRp+Dllps--AsTPl+clYaslQhhhhs.sss.pphps.hhptlppLh.hhssscststltpssctlhsuchYcALKtlRsLlshEpcllu .......L+lsL+ssERlhINGAVlc.u.D.R.+ssLplhNc.AshLhEpclLpPE-AsTPlRplYFssQhMLlt.ssttcp.stshahphlptlhthhpss-hhstLctlsphVtsGchacALKslRsLhshEtclh.s................. 0 27 57 72 +7207 PF07379 DUF1494 Protein of unknown function (DUF1494) Moxon SJ anon Pfam-B_20601 (release 10.0) Family This family consists of several bacterial proteins of around 175 residues in length. Members of this family seem to be found exclusively in Chlamydia species. The function of this family is unknown. 23.00 23.00 24.30 24.30 22.70 22.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.11 0.71 -4.34 6 41 2009-01-15 18:05:59 2003-09-17 14:09:44 6 1 37 0 8 18 0 165.00 56 93.80 CHANGED pKRuFLLhElLVShTLlALLhGsLGFWpR+hasSpKccE+lY+TFLpEshAYKpLRTlF.hoTSpIE-hPGhLhShlFDRGVYRDP-LAGtVtGSLaYcppptRL-L.IpS.Rpcu+.ETh.LhcpV.pV-hVshR.....ppt.uchP-RVhhsl+R+sst.s.RsLoY.FAlG+ ..pKRuFL.L.ELLIuhsLIuLLLGoLGaWpR+IahSpKcKE+VY+hFLpES+sY+hLRss................F....hSThuh....Esp.t.hlFS..FDRGVYsDP+LAGsVpGoLHYDshsQclpLhlsS.R-cu+.Eph.LhoHVhph-hpshR......pu.sEhP-RlhLTlsR+ssAhPPRTLoY.FAVG+.............................. 0 2 3 7 +7208 PF07380 Pneumo_M2 Pneumovirus M2 protein Moxon SJ anon Pfam-B_20478 (release 10.0) Family This family consists of several Pneumovirus M2 proteins. The M2-1 protein of respiratory syncytial virus (RSV) is a transcription processivity factor that is essential for virus replication [1]. 25.00 25.00 73.20 72.80 22.90 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.11 0.72 -3.99 3 21 2009-01-15 18:05:59 2003-09-17 14:14:51 6 1 7 0 0 21 0 84.50 67 96.47 CHANGED MTpPKIMILPDKYPCSISSILIoSEscVssaNHKNlLpFNQNphsNHMYS.NphFDEIHWTSQELIDslQpFLQHLGIsEDIYTIYILV MshPKIMILPDKYPCSIoSILIoScscVshaNpKNsL.FNQNp.sNHhYs.Np.FsEIHWTSQ-LIDssQpFLQHLGIs-DIYTIYILV........ 0 0 0 0 +7209 PF07381 DUF1495 Winged helix DNA-binding domain (DUF1495) Moxon SJ, Bateman A anon Pfam-B_20509 (release 10.0) Domain This family consists of several hypothetical archaeal proteins of around 110 residues in length. The structure of this domain possesses a winged helix DNA-binding domain suggesting these proteins are bacterial transcription factors. 21.60 21.60 21.60 22.90 21.40 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.84 0.72 -4.31 11 58 2012-10-04 14:01:12 2003-09-17 14:17:45 6 2 37 0 41 59 0 86.40 40 74.89 CHANGED Ih+SLp+SclR+KILhaLhchhPpshYls-loRcV+ossoNVpGuLcGhGpRYsu-pSLlpLGLV.....pptcsGhK..hYplTc....hG+pls-hL ...............llRSLp+SclR++lLhaLhch...aPpssYluEIuRtltuDsoNVhGuLtGhG.sRYsucpSLltLGLV....pphpps.GhK...hY+lT-....hG+pls-h................ 0 11 13 30 +7210 PF07382 HC2 Histone H1-like nucleoprotein HC2 Vella Briffa B anon Pfam-B_20179 (release 10.0) Family This family contains the bacterial histone H1-like nucleoprotein HC2 (approximately 200 residues long), which seems to be found mostly in Chlamydia. HC2 functions in DNA condensation, although it has been suggested that it also has other roles [1]. 40.00 40.00 40.10 40.00 39.90 39.90 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.98 0.71 -12.70 0.71 -4.44 8 284 2009-01-15 18:05:59 2003-09-17 14:20:42 6 26 180 0 92 272 95 148.50 44 66.11 CHANGED MlssQK..K+SuKKTAuK..AVRKP.......AKKAAAKKss....sRKsAAKKssARKTssKKsVAtKpsutKtss......+KssAp+hsAtK....................hss+KsVAKKssAKKssAKKssA+KTVAKK.ssA+KsAAKKAs.AKKAsARKsA.A+KAlAK+Asu.............C+KpH+HoAACKRVsoSuAst.tsGuKo+lps.....AHuWRpQLhKhsoR ............................................................tc...s.s...s+.ss..s+K.ss.......AK.K..s.s..s.KKss.......s+K.s.A.sKK..ss...A+Ks.s.s.tKts...A.t..+.p....s..s.sKK.s.....s......AKK...........s.s....s..+K.ss.A.KK.s.s.A.+.K...........ssAKKs.s.A.K.K.s.s.AKK.su..AK.........Ks.s.A+K.s..sAKK.......s.s.A.+Kss.AK....Kss..A.+K.s.s.A.+....K..ss.s.cK..s.s...Ahcss..............................hc.s..s.................................................................................................................. 0 19 45 71 +7211 PF07383 DUF1496 Protein of unknown function (DUF1496) Moxon SJ anon Pfam-B_20554 (release 10.0) Family This family consists of several bacterial proteins of around 90 residues in length. Members of this family seem to be found exclusively in the Orders Vibrionales and Enterobacteriales. The function of this family is unknown. 20.20 20.20 20.30 22.10 20.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.95 0.72 -4.71 21 610 2009-01-15 18:05:59 2003-09-17 14:33:02 7 1 598 0 48 194 3 57.70 63 62.58 CHANGED pshssuspsspp.......RsCaYpDpsYSpGAVIcl-GllLpCst-schpoNusLhWhplpp ...................EVaSouGQSuQPC.....hQCCVYQDQNYSEGAVIKsE.GlLLQ.CQR......D-KTlSTNPLlWRRVK.P............. 0 3 12 29 +7212 PF07384 DUF1497 Protein of unknown function (DUF1497) Moxon SJ anon Pfam-B_20585 (release 10.0) Family This family consists of several phage and bacterial proteins of around 59 residues in length. Members of this family seem to be found exclusively in Lactococcus lactis and the bacteriophages that infect this organism. The function of this family is unknown. 20.00 20.00 22.50 107.80 19.90 19.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.70 0.72 -3.80 2 15 2009-01-15 18:05:59 2003-09-17 14:35:51 6 2 14 0 1 8 0 59.10 92 83.98 CHANGED MGYYDT+NEARRISKLASQNISSEQsKKEFELDpQSKFNQEMQAEFHE+IKKLGtKNGS MGYYDTKNEARRISKLASQNISSEQNKKEFELDSQSKFNQEMQAEFHERIKKLGEKNGS... 1 1 1 1 +7213 PF07385 DUF1498 Protein of unknown function (DUF1498) Moxon SJ anon Pfam-B_20610 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 225 residues in length. The function of this family is unknown. 24.40 24.40 24.40 25.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.38 0.70 -5.21 5 356 2012-10-10 13:59:34 2003-09-17 14:43:15 7 1 341 4 50 222 100 221.20 51 98.34 CHANGED MKRSpIN-IIcEu+AFhpoFsahLPPFAYaSP--WKQR-ssuhpEVhDuRLGWDITDFGpscFAchGLTLFTlRNGuucsh..uhsKsYAEKIMhlRcsQlTPMHaHs+KtEDIINRGGGsLVlELapuDc.sstlD-co-ITVuVDGpppThsAGupLKLcPGQSICLsPGLYHuFWAEuGs..VLlGEVSoVNDDLTDNhFLEPlGRFSsIEEDEPPlHLLCSDYcpa ........................MKRSpINpllccscthhppaphpLPPFAhaoPpcWpp.hst..s.....shpElhDhpLGWDITDFGpscFsphGLoLFTlRNGs..hpsh..sYsKsYAEKlhhl+csQloPMHFH......hp.K.....tEDIINRGG.GsLllcLasus.....ssphs............p...c......oslTVslDGpppThsAGspL+LpPGESIsLsPGlYHsFW.u..E..s..Gh.........VLlGEVSpVNDDppDNhFh.c.P.luRFssIEEDEsshhLLCs-Ysp.............. 0 10 26 40 +7214 PF07386 DUF1499 Protein of unknown function (DUF1499) Moxon SJ anon Pfam-B_20643 (release 10.0) Family This family consists of several hypothetical bacterial and plant proteins of around 125 residues in length. The function of this family is unknown. 22.60 22.60 22.80 23.50 21.80 22.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.51 0.71 -3.93 120 649 2009-01-15 18:05:59 2003-09-17 14:46:02 6 5 562 0 243 562 331 121.10 25 61.46 CHANGED tclssssssPsshps.ths............................................pt................tlpslt...............s.sspps........hstlhpll.......pphst.......shl.....ptsps..hlcApspotlhGFsDDlplpls.....ss.ts....hlclRSsSRlGtSDLGsNtcRlcpltstLp ....................................................................................................................h..hsss.tpPsph.s.t.......................................................pt..tltshh...........hp.sssps........hptlpph.h...........pshsh......................shl......h...ppsss......hlcspspStlh..uFs.....DDl..p...l.......plp.........sstt...........hlclRStSR.......h.....G.........h.u.DhGsNtcRlcphhptL.............................. 0 79 162 206 +7215 PF07387 Seadorna_VP7 Seadornavirus VP7 Moxon SJ anon Pfam-B_20657 (release 10.0) Family This family consists of several Seadornavirus specific VP7 proteins of around 305 residues in length. The function of this family is unknown. However, it appears to be distantly related to protein kinases. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.02 0.70 -5.59 5 11 2012-10-02 22:05:25 2003-09-17 14:48:06 6 1 4 0 0 107 14 302.00 56 97.28 CHANGED hscGQsplh+.tssFcVRFRpcGKlaclPLPNuoT+c......sIlGTIKYFTElMGLslVsNuh+LshsNVuDhp+asGNuTLstlKocl.GsLFLKKlsSLPlslssshY.aNK...YcVFARlHGIl+L+....ND.NNYcYGIILE+CYcIRlhssNhIlAGLKuLMDhHpEssssLHGDCNPpNLMCDKhGhLKLVDPusLlopsVsalN.ppYcuLTs-uEVsuFVhSCLplVucLRslcp--I...FIpchYLsLcosssDsNlpsGsRLTuL.stL-VsS.....DhlStlsMhPFluLLNslcYYslsDllShlpp-lDsEs-V ......................................................................hppGphsIhR...Gs.FclRhRphs+sasMPLPNATosc......NFLDCIKFITEpVGFDYVSsGFKLs.ANVsDFQHLNGNSTLllGKTcI.GPLILKKlRSLPCC..NDsLF.+NK...aRILARM..HGILRLK....NDhNuaKYGVI..LE+C.YKP.pI..N..FSNFl.T.A...I.sD.Lps..FHSu.s.sa.hLHGDsNP-NIMSDusGYLKLVDPV..C.L....L...E.NQVN.MVN.I-YEuLTQ.-AEKKV..F..lKSLLpLVE+QLSAsh-EI........YVsLcEsNPSFN.LpsGh+LTDLLsslDlaNsspWK.h.lNH+PMhPplulLNDLTYYchsDVpshlT-sLsDEDDV............................................................................ 1 0 0 0 +7216 PF07388 A-2_8-polyST Alpha-2,8-polysialyltransferase (POLYST) Vella Briffa B anon Pfam-B_20294 (release 10.0) Family This family contains the bacterial enzyme alpha-2,8-polysialyltransferase (EC:2.4.99.-) (approximately 500 residues long). This catalyses the polycondensation of alpha-2,8-linked sialic acid required for the synthesis of polysialic acid (PSA) [1]. 22.70 22.70 23.10 26.90 22.40 22.60 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.65 0.70 -6.12 3 54 2009-01-15 18:05:59 2003-09-17 14:48:52 6 1 43 0 2 27 0 403.30 53 97.46 CHANGED M.....LKKIRKALFpPKKFFQDS.WFsTS.......LFsLpsP+sNLFIISsLGQLNQAcSLIKhQKL+NNLLVILYToKNhKMPKLIpcSsNKcLFSShYLFELPRcPNslSPKKlLYIYRuYKKILpsIQPAHLYhMSFAGHYShLISLAKKpNITTHLIEEGTGTY.....APLLESFoY+PTKhEphaIGNNLplKGYhD.KFDILHVsFPEYAKKIFNAKKYaRFFAHuGGISoSssIANLQ+KYRIS+NDYIFVSQRYPISDELYYKoIVETLNpISLpIcGKIFIKLHPKEMtNKYIMSLFLNMVcINPR.LVVINEPPFLIEPLIYLTsPKGIIGLASSSLVYTPLLSPoTQCLSIGpIIIcLIp+pupQENTshIcEHLEIlKpFDFIKILSDlpssIsss.FKTEEThEhLLKSAEaAYKsKNaaQAIFYWQLASpNslolLGaKuLWYYNAL.pVKQ.YKMcY.-I.YIDpISlsFHDKDKhhWppIKp.aa...KRIhpsc .......LKKl+KhhhpPhtFFpDS.WhhsS..........Lp..ppNlFlISNLGQLppspShlph.phpsNhLllLhTpKNhcMPKllhpphNKpLFpShhlh.lPppPNsho.KKllahYpsYKhll...pstchYhhSastHYuhhl.lhKKpNIpspLI-EGTGTY............sPllp...Yp...........hIhNsl.l.htY.D.+F-.lassFPphhKKhFNApKasc.FttAsulp.sspIsNl.+KYpIo+sDhIassQ+Y.IpcsLahcSllpIL.pIsh..pu+IFIK.HPKE..pp.l.ul.hshhc..hpsR.lllIsEPsFLIEPlIhhsp.KtlIGLsSSSLlYsPLlS.psQshSIu.LhIpLhpp.p....cshphlp.Hh.-IlKpFs.lpILsD...sIos................................................................................................................................... 0 1 1 1 +7217 PF07389 DUF1500 Protein of unknown function (DUF1500) Moxon SJ anon Pfam-B_20659 (release 10.0) Family This family consists of several Orthopoxvirus specific proteins of around 100 residues in length. The function of this family is unknown. 25.00 25.00 35.50 34.80 17.20 15.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.45 0.72 -3.99 2 38 2009-01-15 18:05:59 2003-09-17 14:49:50 7 1 17 0 0 32 0 86.80 73 58.82 CHANGED MSSSVDVDIYDAVRsFLLRaYYsKRFIVYGhSNAILHNIYRLFTRCAVIPFD.IVRhMPNESpVpQWVhDTLNGIhMNE+DVuVsVGTGlLFMEMFF..h ......................MSSSVDVDIYDAVRsFLLRHYYsKRFIVYGRSNAILHNIYRLFTRCAVIPFDDIVRT.MPNES....RVKQWVhDTLNGIMMNE+DloVsVGTGlhFMEMFF...DY.. 0 0 0 0 +7218 PF07390 P30 Mycoplasma P30 protein Moxon SJ anon Pfam-B_20617 (release 10.0) Family This family consists of several P30 proteins which seem to be specific to Mycoplasma agalactiae. P30 is a 30-kDa immunodominant antigen and is known to be a transmembrane protein [1]. 18.70 18.70 19.20 19.90 18.30 18.10 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.59 0.70 -4.88 2 12 2009-01-15 18:05:59 2003-09-17 14:56:20 6 2 6 0 1 14 0 189.00 57 98.69 CHANGED MKLKQLLNLGTALTATFSIPFVAAKCSEDDKKEKVTKPKNEPTKPVDNSKTNDNSNEMVGETNLSNSINSSNSSTQNHFGAETNAKESPALNDLYSENPATPI.p.EKGIKESSEGSKNEGDKVIAGKEAIYKDIDFDISKVKITIDKKDLKDEDLISPKKGSHKQLFFNTYKDKTKVSGKLEKDQKPWGGIAIGSVTGLPKNYSIANADSPLYISKKDKKGTAKPNGFVNVEKDGGNLKIKFRFFKFNKGsNSTVSTKVYEAIIS .....MKhK.LLsLGTsLTATFSIPFVAAKCuEsDKKEKhpKPhpEPsKPs-NocTsDNosEhs.G.psN.s.sS.N.SsNut.......................Nssss..p.EKtIKEoS-u.Kp-G-KV.stpcshYKDIDFDhSKlKIhIsKKDlKDEDLIssKpGspKQlFFsThp.tTplpGKh..tphPWtGltIGoVTGLPcsYSIuss-sPla...+s+KGphpssGFVNVEK-GspLKIKFRFFKaNKGsssTVSTpVYEAIIS................................. 0 0 1 1 +7219 PF07391 NPR NPR nonapeptide repeat (2 copies) Bateman A anon Marshall M Repeat This nine residue repeat which I have called NPR after NonaPeptide Repeat. It is found in two malarial proteins and has the consensus EEhhEEhhP where h stands for a hydrophobic amino acid. 25.00 0.00 115.20 0.60 17.30 -999999.99 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.70 0.73 -6.93 0.73 -3.28 73 570 2009-09-16 13:04:07 2003-09-17 15:05:43 6 2 2 0 555 770 0 16.90 67 98.77 CHANGED EllEEllPEEllEEVlP ........ElVEEVlPEElVEEVlP... 0 555 555 555 +7220 PF07392 P19Arf_N Cyclin-dependent kinase inhibitor 2a p19Arf N-terminus Vella Briffa B anon Pfam-B_20449 (release 10.0) Family This family represents the N-terminus (approximately 50 residues) of cyclin-dependent kinase inhibitor 2a p19Arf, which seems to be restricted to mammals. This is a tumour-suppressor protein that has been shown to inhibit the growth of human tumour cells lacking functional p53 by inducing a transient G2 arrest and subsequently apoptosis [1]. 25.00 25.00 45.60 45.60 19.00 18.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.45 0.72 -3.80 6 33 2009-01-15 18:05:59 2003-09-17 15:41:38 7 1 25 1 7 29 0 50.20 71 51.22 CHANGED RFlVTVRI..RRAsRPP+VRlFVVphsRuupctoAssspAsVAhVLhLhRppR ..RFLVTlRI..RRACGPPRVRVFVVHIPRhAGEWAAPGA.AAVALVLMLlRSQR 0 1 1 1 +7221 PF07393 Sec10 Exocyst complex component Sec10 Vella Briffa B anon Pfam-B_20545 (release 10.0) Family This family contains the Sec10 component (approximately 650 residues long) of the eukaryotic exocyst complex, which specifically affects the synthesis and delivery of secretory and basolateral plasma membrane proteins [1]. 21.80 21.80 22.10 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 711 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.36 0.70 -12.95 0.70 -6.35 26 605 2012-10-02 15:56:29 2003-09-17 16:31:31 6 17 278 0 437 610 1 516.80 19 80.29 CHANGED lppsh-sFspLDpplss................luptssclGcpLcphspp+pp........tlcut.Llpaapchhspup..............................t.LppL.cp.ssspthhcsAplhppLhtlupcl..........................................sssttppspppI-pas-phEpcLLcpFssuY+cp-...hppMpchApl...LppFNGGs....sslphFlsp+phFh.pppph......p.sh......h..l.s..sspt..hh.pshpslhsclpsslppEupIIpcVF....sssppVhphFlp+lappplpphlphlLccspsh...............................SpLuYLRsLpshauhstpLsccLpsa.hts.p...........................................hsshL-pp..hpclF.sal...spYh-pEp+sLcphhtshltcFs.ph....ptp..................htph.st..t.h.........................ssp.t.........spsp.sphsphtpsphp.sppp..........................................................t....phpttcshholshshshl+tstEulsRshpLsp.....sscsscsshslhslLLphlhcpYlcsuL-.ss.........................hthsstc.tppss.ssLh.aLpslphsspIlpLhsthhps.ll.Phlsssssh+pphhptppphhpphEtplsthlpcslcslhsplphlLu.cQKKsDFpPpssshs.......................h..psTpsCtplsshL.spltpthstsls..up........................................................................NLcsFLoElGhclaphLlcHh++apVss.sGGlhlppDlspYpshlcsa....plsplsppFchL+pLusLalV.pP-sLpplsptsth...........................sthshp.lppalppRsDapphthssth ................................................................................................................................................................................................h.........................................................................p.t...............t..Lh.hh.th.t......................................................h......tt...........ptu.hh...p..pL...h.u...pp...................................................................th....h..t.ltth.p.hEpthlppFpt.......t........................ptt..............p....tth.tphuth.....L.thp.tht...............t.s.hth....alp..pp...............................................................................................p.t.hhpth..h.hpp..t..ltthF..............s..........lh.h.hhpp.lht........lt.hhp..hhp....p.t.................................................................................................s.......a..lp.l.......h...t........h...t.h..ppl.............................................................................................t..hpp.......h.pha.....ah....t.Yht.E.t.hpp..t..h.pa............................................................................................................................................................................................................................................................................................................................................................................t.pt.h.s.p.hshphlp.ht...tuhtRsh.h..................................tph..tth.tlh...........hllp.h..tal.t.uh-.....................................t.tt........t..t...t.t...sh...ah.hlp.ss.lhthhp.........hpp.lh...h.h.....sp.ph..............h.......ttp......pth.p.h-.thsthlp...t.............slsshht..hph.hht.tQt..tpD..apP..ttt...........................................................................o.ss.tlsthl.p.t....hp..hhtshs....tp..........................................................................................s.hp.sh.tE.......lu.phhphlh.cHh.pp.th.ss..Guhh.hh..........pD...h...st...Yhphh.p.p.h.......................t.......l...haphLptlsplhll...ts.p.thpphh.p...t................................t.hp.p.hhtahphRtDh........t........................................................................... 0 164 262 375 +7222 PF07394 DUF1501 Protein of unknown function (DUF1501) Vella Briffa B, Studholme DJ anon Pfam-B_20578 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 400 residues long. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 392 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.32 0.70 -5.63 51 1229 2012-10-03 20:55:17 2003-09-17 16:41:31 7 11 455 0 491 1424 2193 366.90 22 81.41 CHANGED AKplIhLahsGGPSpl-hFD.KPphpphpspsh......................................thttpGpsGh.lu-hhPch..uphsDc....lsll+Shhscths...Hs.AthhhpTGtt................shPohGuWhsauLGsts..psLPsalshst.......tth.tsuspsausGFLsutapuhsh.psssss..............lhslsssss.lstppppptlshlsphspphh......ppsscsphts+ltpaEhAh+....Mpspss-shDlssEsppshchYGhss...t..tt...................FuppCLhARRLlEcGVRFlplhps.......G..........WDpHs.....slppshspts...tplDpshAALlpDLcp+GhL--TLVlasuEFGRTPh.........pss.........tGRDHpspuFohahAGGGl+uGhsaGpTD.....-huhssscss..hplpDlaAThL+hhGlDcpclsathtGc.h+Lss...pucllc-ll .......................................................................................................................................................................................................................................+tllhlahtGGssthshhs..ps.p..h..hh.ttpsh....................................................u....h.h.s.p...h..hsth..........sphscc..............hslh.u.h.....s..phhs..........cs..u.th.hhts..G.......................h..tsh..h...h..t...h.sstp...............shPshhhht.......................t..sh..ssp..ht....s..G.a...Lsp..hh....ps...h.h..tstsss.............................................hhsl.ss.st...s...h.s...h....t...t...t.tpt..h.s.h..h..t...t....h...st...............tt.ts...s.p.h...t..s.h.ht..t...h.p..h.uhp......................hp......s...t....h.p...s.h..c.l.s.p...-.s..p...t..h..h.p.h.h.G.st.....................................................................hu.t..p..s..l...h...A......p.c......h....h...c..p..G....s..+..h..l.plt..hs..............G..............WDs..Hs.................s.hs......t.....h...sp.hh..................tplDpuluALh....pD...Lp......p...........p......G......h............h.......-.......c......T......lVlhhuEFGRTst........................tN.us....................tG...p.....D......H.spuh....s....h.......h....l....sG...G.......ul....+............G.......G.h.shG..p.s.s........p.h....u...h....t..........t..........................c...lhss.hh.h..hshs.p.t.h.h.h.t.h.up.htl.......st........................................................................................................................... 1 299 398 453 +7223 PF07395 Mig-14 Mig-14 Vella Briffa B anon Pfam-B_20642 (release 10.0) Family This family contains a number of bacterial mig-14 proteins (approximately 270 residues long). In Salmonella, mig-14 contributes to resistance to antimicrobial peptides, although the mechanism is not fully understood [1]. 19.70 19.70 20.20 19.90 19.50 19.60 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.55 0.70 -5.38 5 242 2012-10-02 22:59:21 2003-09-17 16:48:01 6 3 223 0 34 157 10 250.40 50 88.21 CHANGED HPLVV-pLu-huuIPVRYLuWEpuGElKAAIssWGRpLALSKDsLKRtGKKuLFDLGNAEIILPsAuDs+ssLRH+sRYLSALNcspIssL+...AQKEQLAhARsPEDFSKKFRYNQRRElRLlEEAGGsVRslu-FSuuElAulYl-LFpRRWG..FPATGA-chAcVlEpLR-LLhGSVLaLNspPIAIQlVYRuEAPpWVSlEYlNGGV..DPETRcFSPGSVLSaLNTQuAWEDARuhsKsLRFSFG..RADREYKDRWCsPsPVaQ..s ..........................................................................................................................................................HPDlV-ahhc+ash+F+F.F.+.acccscIKGA.......YFlsNDppl...uh.hsRRsFPLusDEILlPh...APDl..R..saLPcRTp+LSAL+pspIpNAha...+lspK+QsC...llK.......E...s..FSsKFc+sRRpEhp+FlcpGGoV+sV..u-hS.ucE...LspIa...l-LFc.....pRaGt..p.sshs.A-pLAs.FFSpL+c....LL.....FGalLal-shPCAhDlVLKuESphsVYaDssNGul..cs..EsRsLSPGSlLMWLNlscA+caCpcppKp.LhFShG..+P..-..hEYKchWssPh.sGp.s............................ 0 1 6 22 +7224 PF07396 Porin_O_P Phosphate-selective porin O and P Vella Briffa B anon Pfam-B_20808 (release 10.0) Family This family represents a conserved region approximately 400 residues long within the bacterial phosphate-selective porins O and P. These are anion-specific porins, the binding site of which has a higher affinity for phosphate than chloride ions. Porin O has a higher affinity for polyphosphates, while porin P has a higher affinity for orthophosphate [1]. In P. aeruginosa, porin O was found to be expressed only under phosphate-starvation conditions during the stationary growth phase [2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.16 0.70 -5.59 34 1073 2012-10-03 17:14:37 2003-09-17 17:09:15 6 6 497 3 306 1050 285 310.00 14 76.92 CHANGED hplthp.hpush.phs...tps.tptpsusF..ph+ch+LplpGchsc.......chsYphp.clspssssts..s.ht..............h-hsalsach...scp..hslphG+.ts.aGthEh.sssslthhchu.....-hhs............hh..sshhtGlsssaphssspp.lphplh............ss..pssshsctassp...............thttstsshshshsasushh.sphhphchuhshtppActpssp.hlshGsthshs.......phth.hchhhu..slptphhhsphhp...........ttthpshpapuhhsphsYphsschpha.....................spGhaEsssthcthshttt.............hthp+shsahsGlpaaPh..tpsL+halsYltpchchpptsp..s....ssstlslth.Yph ..............................................................................................h...............ht.......tt...t..t....p....p.s..s.h......ph+c.h+lthp.G..p...hh.p........h.tap..h..p..hc...h......sps..s..s..............................................................lp-Aalpa..ph.........pp.....hplphGphphs..a.....uhp.pht.osp....h.h..h..pps..................h.p..........................................t.h........s..h...t.h..ths...h..t...h..p..h..t..tt...t..h.......h....t..h.shh.............................ss....t..s...s.st.s.....t..h.s..s..p....................................................hth.t.h.....th..............t...............................................................t....ht..........h...t.......t.......t.s.t.....t....t..................h........s........t.t......................hhshph..hhht....ht..h..p.schh..hhthtt..............................h.....s..h....p..h......u.h..asphsahh..h..s....p.....h..........................................h.......h.c...hhh..ths..hphtt................................................ttp.p...th.thGhpaa........th.....ch.hsa.....h....t..p.........................t.....h...h............................................................................................................................................................................................................................................ 0 111 228 276 +7225 PF07397 DUF1502 Repeat of unknown function (DUF1502) Moxon SJ anon Pfam-B_20836 (release 10.0) Repeat This family consists of a number of repeats of around 34 residues in length. Members of this family seem to be found exclusively in three hypothetical Murid herpesvirus 4 proteins. The function of this family is unknown. 19.00 19.00 19.90 19.40 18.10 17.10 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.02 0.72 -4.32 2 25 2009-01-15 18:05:59 2003-09-18 10:17:58 6 3 3 0 1 25 0 32.90 84 31.78 CHANGED RSEGAGCPARGAGPPAWGAts.ppsutsRssut. ..RSEGAGCPARGAG.PPAWG..AGPPRRDGGNRGDGAP....... 0 0 1 1 +7226 PF07398 MDMPI_C MDMPI C-terminal domain Moxon SJ, Bateman A anon Pfam-B_20685 (release 10.0) Domain This domain is found at the C-terminus of the mycothiol maleylpyruvate isomerase enzyme (MDMPI). The structure of this protein has been solved [1]. This domain appears weakly similar to Pfam:PF08608. 27.80 27.80 27.80 27.80 27.70 27.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -10.04 0.72 -3.41 68 478 2012-10-02 14:08:01 2003-09-18 11:25:45 6 2 187 0 169 384 19 92.60 21 37.19 CHANGED chsscsls...chlsthssphht...........thsh.hps.tshphpsss..........................................ssVpGssssllhh..hsGRhssss......lpssG-tshhst ..........................................lssDtls.....chLthh.sspht.....................hhhthpsstslpl+so-ss.....................s...st............................sssslpGsutsLlhhhsGRhsssst......lpssGDtslht.t.............. 0 61 131 160 +7227 PF07399 DUF1504 Protein of unknown function (DUF1504) Moxon SJ anon Pfam-B_20945 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 440 residues in length. The function of this family is unknown. 25.00 25.00 58.90 58.80 18.90 18.80 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.95 0.70 -12.56 0.70 -5.82 4 138 2012-10-02 15:12:49 2003-09-18 11:28:01 6 2 121 0 52 121 122 416.10 45 97.30 CHANGED Mhhss...ulplsuAlLFhsAllHTFhT.ahpRLscop.++..............utLa+lLucVElVFshWAlsLhhhhhhTEGhphuhtYhDSRNas.shFlhAIhllhtoRPIlaFuttVlphlA+.lshsushuaaaTlhhhsPLLusFlpEsuAMhlAAhhLpcphas.osS.+htYAThGlLFsNISIGGhhosFuu.slLhlhsthpW-pSFhLoHFuWKAlLAIhlssshhhhLFRKEh++hPcT.ss.......tssus-+VPshIIslpllh.VGsVlhu+tsslFhG.LFLFaLGFt+hh.hYQD.lhLpcshhVuLFhAGLVlhGs.QpWWl.slhhGMos.uhhhsuhhLohFhDNAhlsYLsp.lsshoDsa+YhlVAGuhouGGLTlluNhPN.sGhhILRspF.suoIH.ltLhLuALsPolIuhhsFhhL ...............................slplhuAlLFslAllHTFhs.aFp+Lu....+c....t.c........+..................................................................uuLaHLLuEVElVFGhWAhsLhhhhhh.....h..pG.hpt...........uhsYh-.S.R.NaTEPLFVhslMllAuSRPILphspphlptlA+.....lsh....psshuhaaslhshsPLhGSFITEPAAMTlAAllLscphaph.ssss+hpYuTLGlLFVNlSIGGsLTuaAAPPVLMVAspapWssuFMhspFGWKAslAlhlssslhhhlhR+c.h..t.p....h..s...ps..hts............t.hpcp.....lP.hhlhhlHl.lhLlG.l.Vl.h.A.HaPsl.FlG.LFLFFLGFspAhttYQs..LhL+cuLhVGFFLAGLVlhGGLQpWWLpslltuhsshslahGAhsLTAhsDNAAlTYLuS.L.lsuho....................Ds....a+YhlVAGAVoGGGLTVIANAPNPAGhuIL+spFscsulushtLhLuALsPTlluhhsFhhh........... 0 18 38 46 +7228 PF07400 IL11 Interleukin 11 Vella Briffa B anon Pfam-B_20854 (release 10.0) Family This family contains interleukin 11 (approximately 200 residues long). This is a secreted protein that stimulates megakaryocytopoiesis, resulting in increased production of platelets, as well as activating osteoclasts, inhibiting epithelial cell proliferation and apoptosis, and inhibiting macrophage mediator production. These functions may be particularly important in mediating the hematopoietic, osseous and mucosal protective effects of interleukin 11 [1]. Family members seem to be restricted to mammals. 25.00 25.00 27.70 27.00 20.30 19.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.28 0.71 -4.78 3 55 2012-10-02 01:28:15 2003-09-18 11:38:20 6 2 31 0 23 43 0 175.50 46 79.67 CHANGED MNCVCRLVLVVLSLWPDTAVAPGPPPGSPRVSPDPRAELDSTVLLTRSLLADTRQLAAQLRDKFPADGDHNLDSLPTLAMSAGALGALQLPGVLTRLRADLLSYLRHVQWLRRAGGSSLKTLEPELGTLQARLDRLLRRLQLLMSRLALPQsPPDPPAPPLAPPSSAWGGIRAAHAILGGLHLTLDWAVRGLLLLKTRL .................................................................................h-hDphs..T+pLLtsT+pLstph..+D.+.FsuD.u.-...Hs.LDSLPsLu.hS.AusLuu......LQLsusLopL+uDLhSYh+HVpWL+RAuss.SL+oh-PELGslpu+Lc+LL++LQLL.MuRL..sLPQssPssPusPLsPPuSs.WsslpuuHtILttL+LhhDWAsRuLlhLKs+........................ 0 1 4 11 +7229 PF07401 Lenti_VIF_2 Bovine Lentivirus VIF protein Moxon SJ anon Pfam-B_21067 (release 10.0) Family This family consists of several Lentivirus viral infectivity factor (VIF) proteins. VIF is known to be essential for ability of cell-free virus preparation to infect cells [1]. Members of this family are specific to Bovine immunodeficiency virus (BIV) and Jembrana disease virus which also infects cattle. 22.00 22.00 22.60 23.20 21.90 21.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.38 0.71 -5.18 2 22 2009-01-15 18:05:59 2003-09-18 11:45:31 7 1 15 0 0 22 0 133.50 40 59.32 CHANGED MERTlQSshGRRRGSSsRt+tpsslISsPuYAlaPsPpaRYPRWEFVhps.YS.TAplpKtclllTYpYAlWtR.WpIpTGFhD.uhhMTPAGTHTTtElscLDLFWlRYs.C.Hc.P.Wh-hLhGphs.+hSCRRshQAuVlopT..HoLQRLAuLpLssNtsLCWYPlGplscsoPLWhpFooGKEPTIQQLSGHP ...................................t.t.p...hhhh.hhtt.t.hctcch.ah.........................................................................................................aa....ltlCSCpKc+acI..REFhlG+HR.WDLCKSChQGEIV+pTcP+SLQRLALL+lscsHVFQlMPLWRARRs......................sh.h.h.ppsh......... 0 0 0 0 +7230 PF07402 Herpes_U26 Human herpesvirus U26 protein Moxon SJ anon Pfam-B_21150 (release 10.0) Family This family consists of several Human herpesvirus U26 proteins of around 300 residues in length. The function of this family is unknown. 25.00 25.00 502.10 501.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.95 0.70 -5.12 2 6 2009-01-15 18:05:59 2003-09-18 11:51:38 6 1 6 0 0 7 0 293.00 61 99.15 CHANGED Mhplh.SF.hGLshGsVIPG..sF+hh.t+o.htQlsllhhhuhuhLLTa+Rhs.h.YpPhsDhKIlhLSLhs.phss.h.hVlh.hLhFSEhRLRhIlsRChhIhPoaS.AsahGhhlulhhK.p.hscYplLlTsh.lhPhsspYtahlpSptFhhsLQ+Y+PIhKssu.hphslKsllhahlpFLhLhhllWhGKhaLsh.pspHLFFLsVlpsshFhhplaphshCullslLuGlhhphChapllFEhFlGLGaSulhhplSpsltc+shasGDLLNhFaC.sshshaF MRRLTDSFILGLAKGAVIPGLYsFRMTEGRSsLtQIGVlITVAISFLLTFKRFDPRFYKPIGDFKIVFLSLMAsKLPShLSAVVMICLIFSEMRLRMILSRCVhIMPSYSPAVFTGhMVSLFFKSQMFDDYSVLlTsA.LLPhTlRYGWMIRSSGFLluLQKYRPILKSTSFREVDLKsLVKFTVEFLLLFTlLWIGKhFLSMPKSNHLFFLTVVNNVFFKLNVFKAAACAlVAILSGLMMNVCLYRIIFEAFlGLGFSSIMLsLSSDLKDRSFYAGDLLNGFFCLVVCCMYF 0 0 0 0 +7231 PF07403 DUF1505 Protein of unknown function (DUF1505) Moxon SJ anon Pfam-B_21179 (release 10.0) Family This family consists of several uncharacterised Caenorhabditis elegans proteins of around 115 resides in length. Members of this family contain 6 highly conserved cysteine residues. The function of this family is unknown. 25.00 25.00 69.40 67.10 19.40 19.20 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.08 0.71 -4.14 7 20 2009-01-15 18:05:59 2003-09-18 11:54:03 6 1 4 0 20 20 0 113.80 34 82.02 CHANGED Ms.hh.oVllhuh....slAhsssssSp..opsaspsppCh....Sscspsh.....pCphosshphppsSsCshSpYhhppsp...tssp..ss.NtVsQCopTPCsus-KlssDCtsAFu.-+lupIp .......Mp.hh.sVLlluh....hlAhsss..ssSphpospa.sp.ucpCt..p..sssspsh.....pCphsuTWchppsssCslSpYhKKshs......ssss....-PhNGVAQCTKTPCsuoEplssDCssAFu.-+lupI..... 0 7 7 20 +7232 PF07404 TEBP_beta Telomere-binding protein beta subunit (TEBP beta) Moxon SJ anon Pfam-B_20928 (release 10.0) Family This family consists of several telomere-binding protein beta subunits which appear to be specific to the family Oxytrichidae. Telomeres are specialised protein-DNA complexes that compose the ends of eukaryotic chromosomes. Telomeres protect chromosome termini from degradation and recombination and act together with telomerase to ensure complete genome replication. TEBP beta forms a complex with TEBP alpha and this complex is able to recognise and bind ssDNA to form a sequence-specific, telomeric nucleoprotein complex that caps the very 3' ends of chromosomes [1]. 20.70 20.70 20.90 424.90 18.50 20.60 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.33 0.70 -5.33 3 15 2012-10-03 20:18:03 2003-09-18 12:45:53 6 1 5 14 0 19 0 279.10 75 97.26 CHANGED MSKGQpA.QQQSAFKQLYTELFNNGGDFSKVSsNLKKPLKCYVKESYPHFLVTDGYFFVsPYFTKEAVsEFHuKFPNVNIVDLHDKVIVINNWSLELRRVNSAEVFTSYANLEARLIVHSFKPNLQERLNPTRYPVNLFRDDEFKTTIQHFRHQALQQSIAKNlKQENLPDISKVoGADAAGKKuKVDAGIVKASASKGDEFSDFSFKEGNTATLKIQDIFVQEKGKDALsKluDcsDVt......KVKGGAKGKuKAAoKSA.+GKKsSAKKuDouuSADVRKSVDKIVKYTPNKPSSRKETPQKSQSAPAAGKSSAKKTTTGSKKslPANPSPSGKKSTKTTDQMTMAQFKKYLDWHEKKKGGKTSSGGKVLGKRSAGKASATSG MSKuQ.s.QQQSAFKQLaTEhFN.GGDFuKVSpsLKKPLKsYVKESYPHFLVTDGYFFVpPaFTKEAVsEFHpKFPNVNIVDLHDKVIVINsWSLELRRVNSAEVFTSYANLEARLlVHSFKPNLQERLNPTRYPVNLaRDDEFKTTIQHFRHQALQQSlAKNlKQENlPDIuKVoGuD...KKuKVDAGIVRASsSKGDEFuDFSFKEGsTAsl+IQDIFVQEKGKDALp+ltsttps.......KV+GGA+GKtKAAoKSA..sKKssAtKt.s.tuADVRKSVDKIVKYTPNKPSSRKETPQKSQSsPA.GKSSAK+ThTGuKpplPANPSPSGKKSTKTTDQMTMAQFK+YL-WHEKKpsGKTSSGGKVLGKRSAGKASATSG. 0 0 0 0 +7233 PF07405 DUF1506 Protein of unknown function (DUF1506) Moxon SJ anon Pfam-B_20962 (release 10.0) Family This family consists of several bacterial proteins of around 130 residues in length. Members of this family seem to be specific to Borrelia burgdorferi (Lyme disease spirochete). The function of this family is unknown. 25.00 25.00 27.50 27.00 23.40 23.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.71 0.71 -4.15 2 112 2009-01-15 18:05:59 2003-09-18 12:48:34 6 1 27 0 8 63 0 123.40 63 98.52 CHANGED MNGVRKRLSDMSFRMINVFKDPpPL+FYKGTVVKLENDSSYQRVFDKNKYTEFAGVIIDI+PQELAlLYDSDMSDIQGYSKLYTYQDLNYELKDRISIuDLlYFEIFSIDSSIGYFTLVLKEFIWTN ...................................................MsGVRKRLuDMShRMINVFK.D.PpPL+FYKGsVVKLENDuSYQRlFDKNKYTEFtGVIIDI+PQELAhLYDSDhSDIQGYSKLYTYQDLNYELKDRISIuDL..VYFEI...F...SIDSS....IGYFTLVLKEFIWT......................... 0 4 5 5 +7234 PF07406 NICE-3 NICE-3 protein Moxon SJ anon Pfam-B_21003 (release 10.0) Family This family consists of several eukaryotic NICE-3 and related proteins. The gene coding for NICE-3 is part of the epidermal differentiation complex (EDC) which comprises a large number of genes that are of crucial importance for the maturation of the human epidermis [1]. The function of NICE-3 is unknown. 20.20 20.20 20.60 22.30 19.10 20.10 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.05 0.71 -4.79 6 138 2009-01-15 18:05:59 2003-09-18 12:56:57 6 5 89 0 77 153 0 165.80 46 65.51 CHANGED huSphp.LSGVslVLlhuaGsLlhllLFIFsKRQIMRFsl+SRRGPHVPlGHsAPKsL+cEIEtpLs+lQ+IpaEP+LLu......sDcs+alp...tssps..hhsYhYRMKAlDsl+-h-h.lpchutu.ophsGcslRuaLh.Lpspps.hpGscptLIDplhDhYE+ARHssstFGcsEYh+YpphLpcLsss .............................................p.LSGVsllllhuhGsLsh.....llLFIFsKRQIMRFsh+.S.RRGPH.VPlGps.AsKs.L+cE.I-hRLsclQc.Ip.aEPpLLs......s.-.c.s+hh.....ps..ppp....t.sYhY..RM.KAlDsl+.s.E.hshpptspp.pphhGcshRuaLhpL+s....p.u.sPhcGs..ppsLIcplhDsYEpARaGss..sFGpsEYhcYpctLpcLss.s..................................... 1 23 28 50 +7235 PF07407 Seadorna_VP6 Seadornavirus VP6 protein Moxon SJ anon Pfam-B_21021 (release 10.0) Family This family consists of several VP6 proteins from the Banna virus as well as a related protein VP5 from the Kadipiro virus. Members of this family are typically of around 420 residues in length. The function of this family is unknown. 19.60 19.60 19.60 19.70 19.50 19.20 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.36 0.70 -5.72 2 17 2009-01-15 18:05:59 2003-09-18 13:01:11 6 1 9 0 2 13 0 319.60 46 81.38 CHANGED MMIALIQMKNMKCTLKVEETAENHELEGVSFDELsALREENAKLKpENEhLKTKlHRLESDWTTSDIVEKlELMDsQF-RIGKIMDKMREPMLFKRD-IELHGDLLARVEGLLRIKNERSEVEF-KDIQCIVGRYFSDEsKQRNLEKMIKoFEYDDIADTIALRLTaFIQDPGLRSIVYAMCKAAVLNQNYLNIEVQEIVDVTRQKYTHNARDDIDFYPMFTFDANVPEGVFDHIYKKHYLsPQSAALVHTLSHLDVNVDGsGIAhYHIGSATRFAECSVVYVDGRAYKPIRVMAEYAIFPTLPHEYKGRVEGLLLLHGGLAPITLVRVYHDVNVGGLVTGSIAASVSTLlRNCMLYSFDlYFTPNGlCINAVGNNNhVNIIDIsCCGRAFGKAPLDQGsWNRNKFMGHKHG+GSKCKQY .....................................................................phsALRpENu+LKpENEtL+sKlcRL..Eu-hppScllEKhpL.h.-s..pa-+lsKIMDK..h......Rc..shL......Fc+D-lcLcusLLARl-sLhRlK.cpsp........psllttYFs.DtscpcsL.chl+o.pas.hhcsht.Rlhh.IpsssLhshshthCthhhh.s.Nh.NlpspplhclstpKYpHsh.ss...Y.hhs..hplspthhDHIYtKH.ls.pshslhHhhSHLsVNVsGsslAhYHlGsusRhsEsSVl.l....-s..RhaKPl+s.u.YAIF.pLPpEhchRVpGLL.hHhGlsPITLVRsht-lp....shhloGslusSlosLh+shhL.ph-lhFs.pGlhlpssGspshsph...sCCtpAFu..................................................... 3 0 1 2 +7236 PF07408 DUF1507 Protein of unknown function (DUF1507) Moxon SJ anon Pfam-B_21047 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 90 residues in length. The function of this family is unknown. 25.00 25.00 63.00 62.80 18.70 17.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.97 0.72 -4.24 14 527 2009-01-15 18:05:59 2003-09-18 13:03:12 6 1 527 4 46 155 0 89.70 58 95.66 CHANGED sc.ths.pccAhtLLpuDA-KIh+LI+VQhDsLThPQCPLYEEVLDTQMFGLSRElDFAVRLGLI-cc-GKplLscLE+ELStLH-A.hpc ......p...hsh+ptAlp.LpcDA-+IL+LIKVQhDNLTlPpCPLYEEVLDTQMFGLo+EVDFAV+LGLl-cE-GKplhhcLE+ELSpLHEAFTp....... 0 12 29 38 +7237 PF07409 GP46 Phage protein GP46 Vella Briffa B anon Pfam-B_20855 (release 10.0) Family This family contains GP46 phage proteins (approximately 120 residues long). 22.50 22.50 22.70 23.70 22.00 22.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.53 0.71 -4.77 25 364 2009-01-15 18:05:59 2003-09-18 13:04:22 7 1 322 0 50 265 9 111.60 37 81.26 CHANGED ssLpsAVlISLFTDRRA.............tssD...s.sss-pRGWWGDoasss.....plGSRLWLLpRpKLTsc..shpcAcsYAcEALpWhl-DG....hApulsVpup...psphspLsLpVplshs......DGshhshsasstW ..................................sLppAVlISLFT.RcA..............tssD..............ss....s..hG.WWGDoa.s...........plGSRL.WLLp.RpK.hTsp...lhppActYAcEAL...pWll-DG....hsspIsVpsp.....pst....spLsLslslhps......cGs.h.h.h.pat............................... 0 11 23 39 +7238 PF07410 Phage_Gp111 Streptococcus thermophilus bacteriophage Gp111 protein Moxon SJ anon Pfam-B_20904 (release 10.0) Family This family consists of several Streptococcus thermophilus bacteriophage Gp111 proteins of around 110 residues in length. The function of this family is unknown. 25.00 25.00 26.40 25.50 23.60 23.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.43 0.72 -3.89 2 14 2009-01-15 18:05:59 2003-09-18 13:07:44 6 1 14 0 1 15 0 98.30 43 76.44 CHANGED MKpAWpIAKEAstKhGhKAhEahuEuLKMAWo.AK......tchpslcE+hhchG..cYo......hh.sLD.....pFNEl..hthphs.Yt.chhADhDs.thYhhhpKshcst ..MppAWcIAK-AscKFGGKAhEYluEuLKMAWutAKst...ssolschpslcp+h.pKsG..cYo......hh.s.LsauK-hp.............................................................................................................. 0 0 0 1 +7239 PF07411 DUF1508 Domain of unknown function (DUF1508) Moxon SJ anon Pfam-B_20853 (release 10.0) Domain This family represents a series of bacterial domains of unknown function of around 50 residues in length. Members of this family are often found as tandem repeats and in some cases represent the whole protein. All member proteins are described as being hypothetical. 20.00 20.00 20.00 20.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.23 0.72 -4.53 103 1558 2009-09-14 11:58:01 2003-09-18 13:16:35 7 9 909 14 301 816 131 48.60 40 69.96 CHANGED spsGcahFpLKAsNGplIu.oSEsYso+suscsGIpSV+pNusssshh-h ...........ussupahFs.LK.A.uNsplIs..oSEh.Yso+sSscpGIsSV+sNusppphh-.............. 0 71 186 255 +7240 PF07412 Geminin Geminin Vella Briffa B anon Pfam-B_20861 (release 10.0) Family This family contains the eukaryotic protein geminin (approximately 200 residues long). Geminin inhibits DNA replication by preventing the incorporation of MCM complex into prereplication complex, and is degraded during the mitotic phase of the cell cycle. It has been proposed that geminin inhibits DNA replication during S, G2, and M phases and that geminin destruction at the metaphase-anaphase transition permits replication in the succeeding cell cycle [1]. 21.10 21.10 21.20 21.50 20.70 21.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.43 0.71 -4.63 7 153 2009-01-15 18:05:59 2003-09-18 13:18:27 7 2 84 11 87 154 1 156.00 30 67.08 CHANGED MssSMKQpp..EpspENlKs........ussPRRTLKhIQPSAsGsLVGRpsE.sKulsKRKhWsspLsSpsspsts.Vt.Eps..ENcs..tslopEAaDLMIKEsPoupYWKEVAEcRRKALYEsLpENEKLHKEIEtKDpEIARLKpENcELtElApHVQYMA-MIERLoGcs.DNLEsLcs.ph......-sEptts-.p..--o- ..................................................................................................................................................................................................+pp..hs.s............t.....t....t.......pp....t...........p......tt...........pptp...........tth..pu....DLh.pp......pPsp......pYWK-lAEcRR+ALh-uLpENc+..LHc...clE...pKppE..Iu...pLK...cENccLtE...l.ApcspahAphl-cL...t...................................................t.................................... 0 21 27 52 +7241 PF07413 Herpes_UL37_2 Herpes_UL37; Betaherpesvirus immediate-early glycoprotein UL37 Moxon SJ anon Pfam-B_21151 (release 10.0) Family This family consists of several Betaherpesvirus immediate-early glycoprotein UL37 sequences. The human cytomegalovirus (HCMV) UL37 immediate-early regulatory protein is a type I integral membrane N-glycoprotein which traffics through the ER and the Golgi network [1]. 25.00 25.00 34.60 34.50 18.90 17.10 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.88 0.70 -5.61 6 101 2009-01-15 18:05:59 2003-09-18 13:33:48 6 1 24 0 0 73 0 276.10 58 79.43 CHANGED sshssslcCpYchshhpc+psps..tChhsChhNtsLlasGsClus+ssl.lNhsh.spuh+c..........t+sshL+lGlpYYhcGhhlRsllucst......psNsoplsGsltC....W.csssoGGplTLNhosp......RhlhsscsthptspWcustpcos.........sVhplLscpsphDhhFLpchCP+Lscc....h.+h+sspsp.pssVpscopsspslhssWspuWusWoKYuEht.h..hhsthshhhpscsRshuch....shlGlFhlsuGohslLsLFCsLShhpRRcll+Dh ..............GhGVSVRCTYHGTDlNlTSNsTSMNCpLNCTpNpTQIYNGPCsGsEu+LPLNVTF.pQSRRp..........WHSVMLpFGFQYHLEGWFPLRlLNESR......-hNVTEVpGElAC...FpNDTNlTsGQLsLNhTG+S..YVLRAlA+TS..PFESpV+W.EE.T.NsTsssos.....pNTsTVMphLspYAESDYIFLQDMCPRFL+R.....olKLoKNppppNsTFT...Gs...N.hToLPhWT.scCcGWpYWTTLShM.....W+NRRSALLRAKSRALGHW....ALLSICTVAAGSIALLSLFCILLIGLRRDLLEDF.... 0 0 0 0 +7243 PF07415 Herpes_LMP2 Gammaherpesvirus latent membrane protein (LMP2) protein Moxon SJ anon Pfam-B_21212 (release 10.0) Family This family consists of several Gammaherpesvirus latent membrane protein (LMP2) proteins. Epstein-Barr virus is a human Gammaherpesvirus that infects and establishes latency in B lymphocytes in vivo. The latent membrane protein 2 (LMP2) gene is expressed in latently infected B cells and encodes two protein isoforms, LMP2A and LMP2B, that are identical except for an additional N-terminal 119 aa cytoplasmic domain which is present in the LMP2A isoform. LMP2A is thought to play a key role in either the establishment or the maintenance of latency and/or the reactivation of productive infection from the latent state. The significance of LMP2B and its role in pathogenesis remain unclear [1]. 25.00 25.00 88.20 87.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.99 0.70 -5.90 3 276 2009-01-15 18:05:59 2003-09-18 13:47:27 6 1 6 2 0 98 0 230.90 53 99.99 CHANGED MuSLEMpPLG.AGuPuosGGPDGsEGuuNspYPSuFGSSssoPGP..PssEDh-us-csPPPY-us...sGsctGsYQPLGsQDP.SLYAGLGpsGGsG.LPPPPYSPRc-oSpHlYEEs+.cuoMsPsWLPVIsAPYLFWLAGIAASCFSASVSAlVsoTGLALSLLLLAALsNSYAAQpRKLLTKLTlLVAVVTFFAILLTWlVpPsPpNuIVFALLsAAuGLQuIYlLVMLLhLLhAYRRRWRRLoVCsGLLFLACsLLLIVDAIaQLSPLLGAMTVVALTLLLLAFlLWLSSPaGlGALGAALLTLAAALALLASLILG-LNLATMFLLMLLWTLVIILI...CSoFPtothLLoRWLLYALALLLLASALLAGGSILQT..huusoTEFFPsLFCMLLLIVAGILFILAILTEWGSGSKTYGPVFlCLSGLLTMsAGLVWLTLMpKVLLSAWILTAGCLIFFIGFhLFGVIRFCRhCCFCCLpLESs-RPsTsYcNsV ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +7244 PF07416 Crinivirus_P26 Crinivirus P26 protein Moxon SJ anon Pfam-B_21324 (release 10.0) Family This family consists of several Crinivirus P26 proteins which seem to be found exclusively in the Lettuce infectious yellows virus. The function of this family is unknown. 25.00 25.00 518.10 518.00 18.10 17.30 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.67 0.70 -4.84 2 5 2009-01-15 18:05:59 2003-09-18 13:50:21 6 1 2 0 0 5 0 227.00 99 100.00 CHANGED MNNFPEIFDDESTCDYDKEIDHQELSDTFWCLMDFISSKHGKSVADINSGMNTLINIRKsLNGSGKVVSITDSYNKTYFHSQRGLTNVDSRINIDILKhDFISIIDDLQIIFRGLIYKDKGFLDSADLLDLDKKTTTRKFQEYFNILKIKIIEKIGMTKTFHFNIDFRsTISPLDKQRKCSISSSHKKTNRLNDLNNYITYLNDNIVLTFRWKGVGFGGLSLNDIKI MNNFPEIFDDESTCDYDKEIDHQELSDTFWCLMDFISSKHGKSVADINSGMNTLINIRKSLNGSGKVVSITDSYNKTYFHSQRGLTNVDSRINIDILKIDFISIIDDLQIIFRGLIYKDKGFLDSADLLDLDKKTTTRKFQEYFNILKIKIIEKIGMTKTFHFNIDFRNTISPLDKQRKCSISSSHKKTNRLNDLNNYITYLNDNIVLTFRWKGVGFGGLSLNDIKI 0 0 0 0 +7245 PF07417 Crl Transcriptional regulator Crl Vella Briffa B anon Pfam-B_20900 (release 10.0) Family This family contains the bacterial transcriptional regulator Crl (approximately 130 residues long). This is a transcriptional regulator of the csgA curlin subunit gene for curli fibres that are found on the surface of certain bacteria [1]. 25.00 25.00 27.80 41.40 22.80 19.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.86 0.71 -4.42 17 658 2009-01-15 18:05:59 2003-09-18 13:59:10 7 1 648 2 53 209 5 124.10 67 97.53 CHANGED slsps.s+uR.LhpphsALGPYlREtQsc-spFFFDCLuVCVsschuPEcREFWGWWh-Lcsp-stFoYpYphGhasppGsWhstsl..pcstcclppThcsFHt+LtphLpp.hpLpLpsss-......schs ........TLPSGHPKSRLIKKFTALGPYIREGpCEDNRFFFDCLAVCVNVKPAPEhREFWGWWMELEAQ.EsRFTYsYQFGLFDKsG-WpuVslp-sEVVERLE+TLREFHEKLR-LLso.hpLcLEPADD....FpDEPVKL.. 0 4 13 33 +7246 PF07418 PCEMA1 Acidic phosphoprotein precursor PCEMA1 Moxon SJ anon Pfam-B_20971 (release 10.0) Family This family consists of several acidic phosphoprotein precursor PCEMA1 sequences which appear to be found exclusively in Plasmodium chabaudi. PCEMA1 is an antigen that is associated with the membrane of the infected erythrocyte throughout the entire intraerythrocytic cycle [1]. The exact function of this family is unclear. 21.40 12.50 31.70 12.80 18.30 12.40 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.05 0.70 -5.04 8 26 2009-01-15 18:05:59 2003-09-18 14:04:48 6 2 5 0 18 27 0 235.50 36 79.28 CHANGED MKlISLGLISSIIFSIVLAKNSSsSsSTTGCFsFsRKKsKKlHpsssts....VKup-..-aDPDLPNLKFIDEF-PIsLEssKuRhScLD-sFlSETDGhIIDKVTGF.RRENDSslSGWYlRPYEEDYEcMIplNFIPLp...-YYQ+ppNssHKQusuPsPlsh......hPcKQEhslppp.o........................TlpE-DussLaEDct..................................ElDsEssshh......s-GEs...............cspcEh-pcsls.L........................p-tEcspp...................................hcpplpc.Ycs................hpt-tt..ttpppc.-................c.p..c.--ptsEcs-sccpspc .......................................tpt.tSt..hspFshhpKKsK.c.hth........hc..c...aDPcLPNlKFI-EFpPlhlEssKtp..cLs-sFlSETDGhIlDKVTGF.RREN-SshpGWYIRPYEEsYEcMIphpFhPLp....p.app.psps.KQ.ssssPlsp.......PcKpcls..pp.l.........................plpEcDt.hlpE-ct.......s.........................plstt..p.h......s-t-t...................ps.sEh-tt.ht.h.........................pt-p.p.p...................................h.ptch.p.Ypt..................ht...pt...tttt...........................................ph............................................... 0 0 3 18 +7247 PF07419 PilM PilM Vella Briffa B anon Pfam-B_20906 (release 10.0) Family This family contains the bacterial protein PilM (approximately 150 residues long). PilM is an inner membrane protein that has been predicted to function as a component of the pilin transport apparatus and thin-pilus basal body [1]. 25.00 25.00 25.40 32.50 22.60 22.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.45 0.71 -4.67 13 197 2009-01-15 18:05:59 2003-09-18 14:10:46 7 1 167 4 19 125 0 135.90 31 92.57 CHANGED hVhlllsu......th.tppsspsppptpsupsshlAtphLhatsulNDatYppsspc...GslssspLGLPs..h.-stlpplIstsRlaVWhs-pP..GLsuALpcpSpsSsLlGplpsGpLlsssGsshuloLPuuIs-GslValN ..............................hhhhhlss.......h.ppp.spstpttphsssshh.AsphlhhtsslNDatYspstpc.....Gs.l..ssspL..u..LPs....sscst.lcphlppGRlaVW..h.sppP.....GLsss...L....ppp.ocs.S..uLlGhh..p.sGpLshh.u...G.ss.....s...ulslPA..uIstGulVhhN.......................... 0 7 11 13 +7248 PF07420 DUF1509 Protein of unknown function (DUF1509) Moxon SJ anon Pfam-B_21379 (release 10.0) Family This family consists of several uncharacterised viral proteins from the Marek's disease-like viruses. Members of this family are typically around 400 residues in length. The function of this family is unknown. 20.30 20.30 20.40 22.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.63 0.70 -5.18 3 13 2009-01-15 18:05:59 2003-09-18 14:17:13 6 1 8 0 1 12 1 286.70 43 71.00 CHANGED MFsGtATIELhERLATSWLTAIRLILoWHPlaA.sRppEP...LE+lCR-GREYIlMLSGTIpssHATWPFWQlMpKCLDWCCSFHsPDD+SCEHGSPRIGIRLEGENphFAPuLGLYSuVMTWTP..IPCasEhPlh.......PRPupSS-s-ssPSTSptps..VARVRPpVlQ++VsKTRPlDsElHRPtPlAhsN......PS.asDEPDhthp.cPQPGPSGQN.........RuPRTPT..L-sVRVtDpPVopsRu.coPSPP.csDpsDsD.slutPsRsh.RTPsspPSSP-oElsEEl.AQPDPWGTpt.hhsNRuuTPDDsSuIo-DSAcGSctshRRPsHSsoGERRloRRNRSE..uRSRSRSRSG-....RRaRRsRlRuhPGRRSsS...Rp.TVLVsSSE--- .........MFsGsuTltLIERLATSWLTAlRhIluWhPhaA..pppEP...L-pLsREuRpYIhhlSGoVpssaATWPFWpVMR+CLDWCCuFHhPcDpSCpaGAPRlGIp.cGpN.hFsP.LsLYSslMsWoP..hsCahp...........sp..psstsps.PSsup.....luRV+Phshpphs.KppP.ch-.Hpshshuh........Po..hc-s-......spsuPStps.........ptspoPT..lpsVhsh-tssshphs.tsPsPP.p.s..s.-.s.u.......hoP.....Ss.s.....h.AQP-sWsh......spptoPsDposh.-Dpschp..th.pP.+Stss-.RhsRcsppc..sRSRSRSRSt-....RRht..chR..PGcppsu...cp.sVls.Ssptp........................................................... 0 1 1 1 +7249 PF07421 Pro-NT_NN Neurotensin/neuromedin N precursor Vella Briffa B anon Pfam-B_20947 (release 10.0) Family This family contains the precursor of bacterial neurotensin/neuromedin N (approximately 170 residues long). This the common precursor of two biologically active related peptides, neurotensin and neuromedin N. It undergoes tissue-specific processing leading to the formation in some tissues and cancer cell lines of large peptides ending with the neurotensin or neuromedin N sequence [1]. 25.00 25.00 59.90 59.80 19.30 18.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.15 0.71 -4.76 3 47 2009-01-15 18:05:59 2003-09-18 14:29:15 6 2 33 1 22 43 0 151.40 68 98.12 CHANGED MtGMNLQLVCLTLLAFSSWSLCSDSEEDVRALEADLLTNMHTSKISKASPPSWKMTLLNVCSLINNLNSsAEEAGEMHDDDLVuKRKLPLVLDGFSLEAMLTIFQLQKICRSRAFQHWEIIQEDILDsGNDKNEKEEVIKRKIPYILKRQLYENKPRRPYILKRGSYYY ...................tM+lQLVChhLLAFoShSLCSDSEEEMKALEADLLTNMaTSKI..oK...AplP.WKMTLLNVCSLlNNLNo.AEET..GEhcEE.-LlsRRphPtsLDGFSLEAMLTIYQLpKICHS...RA..FQH.WELlQ..EDlLDsGNcK.sEKEEVIKRKhPYILKRQL.apNKsRRPYILKRsSYYY............ 0 1 2 5 +7250 PF07422 s48_45 Sexual stage antigen s48/45 domain Vella Briffa B, Bateman A anon Pfam-B_21037 (release 10.0) and Pfam-B_4621 (release 14.0) Domain This family contains sexual stage s48/45 antigens from Plasmodium (approximately 450 residues long). These are surface proteins expressed by Plasmodium male and female gametes that have been shown to play a conserved and important role in fertilisation [1]. This domain contains 6 conserved cysteines suggesting 3 disulphide bridges. 20.80 20.80 20.90 21.20 20.50 20.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.37 0.71 -3.99 83 1042 2009-01-15 18:05:59 2003-09-18 14:56:31 8 8 16 1 195 1050 1 122.40 24 32.58 CHANGED plcG....CDF..........................sssp.....................tpphhspshphs...........psphCplps...pss-l.lGhtCPpp......................................................hplpPssCFcpV....h.........................sppphplpsll.........ssphh.........................tppppphpahtlP.p.hpp........phphtCpCpptp ........................................................................lpG.CDF..............................................ospt...................pphhppshs.s............ttschCslph................tss-l.lGhhCPss.......................................................plpPpsCFpplY............................tsppthclppl.lt.................ts.hh......................................................hsppptphsahplP.pphpc........phpFpCpCpp..t........................ 0 44 75 162 +7251 PF07423 DUF1510 Protein of unknown function (DUF1510) Moxon SJ anon Pfam-B_21355 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 200 residues in length. The function of this family is unknown. 42.00 42.00 42.50 43.60 40.70 41.50 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.55 0.70 -4.97 16 275 2009-09-11 00:04:00 2003-09-18 15:34:46 6 2 186 0 28 185 0 178.30 36 98.23 CHANGED SRapp+pK+R+tNhlLNIhIulVllLIllVuspLhh.sssppps.tp..........................c.ptptsscpcpsstEcscsEsK-susss-pcc..............pcccc-spcpcsppp.p......psppstpcsspsssssVccshssssWcPlGTpQoupHsssa.cpuStDWpEMhcAlSYATGlsp-phhlhalGNN..Gs..scAhGslpcKsssp+.YcVpIpWVDscGWKPsKVppL ......................SRhpp+pp+++pNhlLNlhIslV..lhlhllsaplhh.sp.pppts.tp.................................ppsppppstp.pcptK..pcsK-tsp.p-pcp...................tppp..pppp.c.pt.cp.p.pctcc....psptspp.s.tcpstspspcshTpssWKPlGT-....Q..sup.shsa.cpuosDWpEMppAlShAh-lP.pphhhhhlGps..Gp..scAhGslpsKpssc+.YhV.IsWV-scGWKPshVppL...... 0 9 19 21 +7252 PF07424 TrbM TrbM Vella Briffa B anon Pfam-B_21098 (release 10.0) Family This family contains the bacterial protein TrbM (approximately 180 residues long). In Comamonas testosteroni T-2, TrbM is derived from the IncP1beta plasmid pTSA, which encodes the widespread genes for p-toluenesulfonate (TSA) degradation [1]. 20.90 20.90 20.90 20.90 20.80 20.50 hmmbuild --amino -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.21 0.71 -4.27 16 280 2009-01-15 18:05:59 2003-09-18 15:52:46 6 1 214 0 27 201 11 115.20 33 74.28 CHANGED lLTGDTRLACEAlLCLuS..us+PuECuPSLsRYFSIct+KhpDTlptR+sFLNLCPVssp......sPEMtsLpss.lsphsGc.CsssuLNsplcphp.....................thcchthpIssplss.CphhsupsYTDap.....s+Yssssc...cts.Wscuh-hccs.tEh.ppl......pc ..................sCEulLC....L....uu....us...ts...oECpsu.pcaF.s.I.p......t..K.....K......h.s......cThcAR+sFLN.CPssst................s..........................................................................................................................................sh............................. 0 3 16 22 +7253 PF07425 Pardaxin Pardaxin Moxon SJ anon Pfam-B_21422 (release 10.0) Family This family consists of several Pardaxin proteins. Pardaxin, a 33-amino-acid pore-forming polypeptide toxin isolated from the Red Sea Moses sole Pardachirus marmoratus, has a helix-hinge-helix structure. This is a common structural motif found both in antibacterial peptides that can act selectively on bacterial membranes (e.g., cecropin), and in cytotoxic peptides that can lyse both mammalian and bacterial cells (e.g., melittin). Pardaxin possesses a high antibacterial activity with a significantly reduced haemolytic activity towards human red blood cells compared with melittin [1]. Pardaxin has also been found to have a shark repellent action [3]. 25.00 25.00 86.60 86.50 19.20 17.00 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.39 0.72 -4.55 3 5 2009-01-15 18:05:59 2003-09-18 16:03:39 6 1 2 2 0 5 0 33.00 95 100.00 CHANGED GFFALIPKIISSPLFKTLLSAVGSALSSSG-QE GFFALIPKIISSPLFKTLLSAVGSALSSSG-QE 0 0 0 0 +7254 PF07426 Dynactin_p22 Dynactin subunit p22 Vella Briffa B anon Pfam-B_21336 (release 10.0) Family This family contains p22, the smallest subunit of dynactin, a complex that binds to cytoplasmic dynein and is a required activator for cytoplasmic dynein-mediated vesicular transport. Dynactin localises to the cleavage furrow and to the midbodies of dividing cells, suggesting that it may function in cytokinesis [1]. Family members are approximately 170 residues long. 21.50 21.50 22.00 21.70 20.90 21.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.05 0.71 -4.98 4 123 2009-12-07 10:30:11 2003-09-18 16:29:35 6 4 86 0 67 115 0 154.70 37 86.32 CHANGED Msc...sLcpLchRLQ.LEpRlYG..tutsucPsKsA-uLs+lQsALuNhAsKRERVKILaKKIEDLlKYLDPQahD+IAlPDuMKLEFILAEEphlsu...........QAALLEQlpshpPlLDSsaIpAVPEpAoKLQRLSQIHIpQQDQsEphosEVK+LhE-YNKMhhLLSKQFsQWDEsL .......................................................tlp.LptRlptLEphl.......h.G......t..s.ts....t....t...s..p...p...l.sD.uLs.......clpstL...ush..usKRE+l+...h...LaK+..l--L.hK.YLDPp...a.lDcl.slPcu..KLpaILApEp.lhs...........psthLEplppLpPhL-Stt.l.+slPpps.s+LppLuQlalpQp-ps.tlspcsptLhppYNphh..lScpFh.ast................................... 0 22 26 44 +7256 PF07428 Tri3 15-O-acetyltransferase Tri3 Vella Briffa B anon Pfam-B_21449 (release 10.0) Family This family represents a conserved region approximately 400 residues long within 15-O-acetyltransferase (Tri3), which seems to be restricted to ascomycete fungi. In Fusarium sporotrichioides, this is required for acetylation of the C-15 hydroxyl group of trichothecenes in the biosynthesis of T-2 toxin [1]. 20.10 20.10 20.60 23.10 19.70 18.20 hmmbuild -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.45 0.70 -5.76 3 46 2009-01-15 18:05:59 2003-09-18 16:55:56 6 2 24 2 6 46 0 385.60 77 81.01 CHANGED ALPPLVPALYRWESTGu..RcVQRRCVGAEAIVGLEEKNRRSLYDLFIATSLRNIAPASTLLTLRNLKDMFELALVEGRFEHPECACTVSWDDQVAAIISYESPESDESARDWARGCVHVQPTAKSAIDLWTEMEEGRAAAKDNTPSKPIELFLLSDVPTDSTPIPQGATVDILFHSNHLFWDGIGCRKFIGDLFRLVGNHIGLSDStEThKMQWGQEIKNLSPPVVDSLKLDISTLGTEFDDKCTEYTSALVANYKSRGMKFRPGLALPRCuIYKLSADDSIAIIKAVKTRLGPGYTISQLTQAAIILALLDHLKPTDLSDDEFFISPTSVDGRKWLREDIASNYYAMCQTAAVVRVENLKSIAVSHKDEKEIQVRALEKACRDIKKSYDQWLGNPFLEALGLRVHNFEASYLN ........................................................................................ALPPLVPALYRWESTGs....RpVQRRCVGAEAIVGLEEKNRRSLYDLFIATSLRNV.APASToLTLRNLKDMFELALVEGRFEHPEsACTV....SWDD.QVAAIISYESPESDESARDWARGCVHVQPTAKSAIDLWTEhEEGRAA.AKDN.sPSKPIELFLLSDVPTDSTPIPQGATV-ILFHSNHLFWDGIGCRKFIGDLFRLVGNHIGLS..DSA-T.KhQWGQEIKNLSPPVVDSLKLDVSTLGTEFDDKCTEYTSAL...VANYK...SRGMKFpPGLu.LPRC.uIYKLSA-.DSIAIlKAVKTRL.GP.GYTISQLTQAAIILALLDHL.K.PTD..LSDDEFFlSPTSVDGRKWLR..E.DIASNYYAMCQTAAVVRVENLKSIAVS.HcDEKElQV+ALEKA.CRDIKKsYDQWLGNPFLEALGLRVHNFEAuYLp..................... 0 1 2 6 +7257 PF07429 Glyco_transf_56 Fuc4NAc_transf; 4-alpha-L-fucosyltransferase glycosyl transferase group 56 Vella Briffa B anon Pfam-B_21451 (release 10.0) Family This family contains the bacterial enzyme 4-alpha-L-fucosyltransferase (Fuc4NAc transferase) (EC 2.4.1.-) (approximately 360 residues long). This catalyses the synthesis of Fuc4NAc-ManNAcA-GlcNAc-PP-Und (lipid III) as part of the biosynthetic pathway of enterobacterial common antigen (ECA), a polysaccharide comprised of the trisaccharide repeat unit Fuc4NAc-ManNAcA-GlcNAc [1]. 25.00 25.00 28.40 25.20 23.30 24.60 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.18 0.70 -5.50 11 636 2012-10-03 16:42:30 2003-09-18 17:16:37 6 2 599 0 55 333 10 331.10 69 97.20 CHANGED MTsLIHVLGSDIPHHNpTVLRFFNDpLss.hsspQtR+FMlVup-suL.ssaPALclppFsuKKuLAcAVIApA+usRspRFFhHGQFNssLWLALLoGtI+PsQhhWHIWGADLYEsSpuLKF+LFY.LRRlAQtRVG+VFATRGDLsaatp+HPpVssp..LLYFPTRMcsuLsthssscscsu.phTILVGNSGDpSNcHlsAL+AIHQQF.GssV+lIlPMGYPuNNpuYIppV+psuhtLFsscNLQILoEKLsFD-YLsLLRpCDLGYFlFsRQQGIGTLCLLIQhGlPhVLsRcNPFWQDhsEQplPVLFssDsLstshlREAQRQLtulDKspIAFFsPNYlpGWppALtlAuGEss ...............................................................................MTVLIHVLGSDIPHHN+TVLRFFNDsLAA..TSEHAREFMVsGcDs.Gho-....SCPA....LS.l.pFasu..KKuL...AcAVIA....KAKANRpQRFFFHGQF.NsoLWLAL.LSGGIKPuQFaWHIWGADLY...E....l...S...p.....G......L+.a+LF..Y...P........L...RRlAQtRV....Gs.V...F.A.T.R.GDLSaFA+pH..P.+VRG.E........L.L..Y.....FPTR.M..DP......S.L..NoM.As-.R...QRtG.KhTILVGNSGDRSNcHIAAL+..AVH.Q.QF..G..D.T.V+.VVVPMGYPsN.N-AYI-E.VRQAGLpLFSpENLQlLSEKLEFDAYLsLLRQCDLGYFIFARQQGIGTLCLLIQAGIPCVLNR-NP.FWQDMsEQ.HLPVL..FTTDDLNEslVREAQRQLASVDKssIA..FFSPNYLQGWppALsIAAGEss................................................................ 0 5 20 38 +7258 PF07430 PP1 Phloem filament protein PP1 Vella Briffa B anon Pfam-B_20843 (release 10.0) Family This family represents a conserved region approximately 200 residues long, four copies of which are found within the plant phloem filament protein PP1. This is one of the constituents of the proteinaceous filaments found in the sieve elements of Cucurbita phloem [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.40 0.71 -4.72 3 26 2012-10-01 19:28:07 2003-09-18 17:18:52 6 3 6 0 14 73 0 125.60 24 88.55 CHANGED CGQlsssPKWIQIPDVKEhGlQVVIKFIVEQlKIpaGDSLKF-SIYEGWYFELCPNSLKYRLHIKAIDFLGRSLpYEIIIIEEK.hL.RIaKL-SIIVILSPGHhVGPVDPPQVEKWIKIPNLQVPFVQEVSKFAlDEaN.KuGDGLKYlEIYDGWYMEMGQDNIKFRLHLKAKDCLGRlRNYEAlVLVKQFLSKRIKILESF ..............................thh......................................................................................................................sttppWhpI..t..slp..shlQ-LucaAVpEaN..K.......s.s.s..sL+FpcVhpGh.h..clspss.hc.a+.LplhAtct......up.tpYcAhVh............................................. 1 0 8 14 +7259 PF07431 DUF1512 Protein of unknown function (DUF1512) Moxon SJ anon Pfam-B_21354 (release 10.0) Family This family consists of several archaeal proteins of around 370 residues in length. The function of this family is unknown. 25.00 25.00 62.80 62.80 24.00 23.20 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.06 0.70 -5.71 5 62 2009-01-15 18:05:59 2003-09-19 10:25:02 7 1 60 0 35 60 28 352.10 40 96.29 CHANGED ssslaaIlohlLFalLIhLL......pclphhhlhRsIEGpLuhLEchtN-Apppllchhcsccs+-s.....EohlcRhu-FFVIsPVslDPsGIIcRh++LLcpu-D+a+chlcRhsPptDsVpRuslpsAlEllsulshIaKaVRHhLloA+KTsNhhLLlQLQMlLPhlh+lu-oYp-AsNuFlKGVPIGDSAGPLVAupLlucusp...+Ec.p+-TVsuEsElEGR+LhIVKA+GPGSoVGRhGcAVEcllc+h.u...+sccIITIDAALKLEGE+TGuVAEGlGVAMGDsGsEKapIEplAlKYGIsLcAVlIKMShEEAlosMsc-IlsAVccAlERVK-IIpEsscPGDoVlVVGVGNTVGVuQ ........h.....llthllah.lhlhlh........tp+lphh.hhtpslcstLshlcphl....s-ups....chhphLccts.....h..p-s.......cshlcR.hh-aFlI-PVsl-PssIls+h+pllcot--ph+chlp.thsPph..Dshp..hsplpsulEllsuLNhIYKVVRHYhlhA+KhsshhLlhQLQhllPhlhchu-AhpcAhssFhcGhPlGDuhGPLVAtplhhsssp....hps.s+DTVhuEs-a-GR+lhllKAcGPuuTVGRPG-AVcpll-chts....+lshIITlDAALKLEGEcTGslAEGhGVAhGssGsEKhsIEchAs+YsIPlpAlllKMShcEAITtMpKEIhpAspcshchVpclIh-pocPGssVlVlGVGNTsGVuQ... 0 13 19 26 +7260 PF07432 Hc1 Histone H1-like protein Hc1 Moxon SJ anon Pfam-B_21362 (release 10.0) Family This family consists of several bacterial histone H1-like Hc1 proteins. In Chlamydia, Hc1 is expressed in the late stages of the life cycle, concomitant with the reorganisation of chlamydial reticulate bodies into elementary bodies. This suggests that Hc1 protein plays a role in the condensation of chromatin during intracellular differentiation [1]. 20.50 20.50 20.60 20.50 19.80 20.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.91 0.71 -3.94 4 147 2009-12-07 10:22:16 2003-09-19 10:31:06 8 1 117 0 26 101 24 74.90 46 96.62 CHANGED ALKDTAKKM+DLL-SIQpDLtKAE+GNKAAAQRVRT-SIKLEKlAKlYRKESIKAEKpGLhp+K..PApKA.tAAKs.tpAuKsssKKAsAtssppuKAs.KuKPtoKKTusK.KsKpsShpRuhtK .........................cchhpKlp-LhtshppD.h........t........p......h......E+GNKAAGTRARKsSL-LEKlhKpFRKtS..lcAuK.................................................................................................. 0 14 23 25 +7261 PF07433 DUF1513 Protein of unknown function (DUF1513) Moxon SJ anon Pfam-B_21436 (release 10.0) Family This family consists of several bacterial proteins of around 360 residues in length. The function of this family is unknown. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.82 0.70 -5.53 37 313 2012-10-05 17:30:42 2003-09-19 10:33:19 6 4 305 0 85 300 24 293.50 40 81.71 CHANGED hslPsRuHulsscP............tpscuVshuRRPGpashlhDs.psGp.ht..tltusssRHFaGHGsFSsDGphLYsTENDh.ps.....ucGlIGVaDsp..p..sap+luEasotGIGPHElhhhsDG.....poLlVANGGI.cTcs-tGRtcLNL-oMcPSLsh.lsptsGpLlpphtLs.ph+phSlRHLAlsss..........GpVhhutQapGp.p-tsPLluhactGps.hphhthsppp...htshssYsGSlAsssstthlAloSP+Gsphtla.....ctsoGphltttslsDssGlushssG.................FlsoSGt.....Gp....hhphs.ttht.h...t.......sshtWDNHlhtl ...............h.slPsRuHuluh+P..................shs.pA.VsFARRPGpahhlhDh..psup........lt..hh.su.sssRHFYGHGVaStDGc....hLYATEs........-h.cs........uRGlIGVYDss..p...pap+luEassaGIG.PH-lh..h..hs..DG.......sLVVuNGGl.+Tc.t.GRt.L.....NL-oMpPSLsh....l.stsGplL-phsLstt.+pLSlRHLAhsuD......................GoVhsGpQap...Gpsc..-hs....s.LlAh+ptGts.hp....htst.-p...htthspYluSlAss.....s-..hlssTSPcGsphhlW.....stsoGpllt.ssLsDsuGVsshtsu.................FhloSGp.....Gc..............lhhtss.thp.......ph.................sslhWDNHhst.l.............................. 0 17 39 64 +7262 PF07434 CblD CblD like pilus biogenesis initiator Moxon SJ anon Pfam-B_21452 (release 10.0) Family This family consists of several minor pilin proteins including CblD from Burkholderia cepacia which is known to CblD be the initiator of pilus biogenesis [1]. The family also contains a variety of Enterobacterial minor pilin proteins. 25.00 25.00 41.20 41.10 18.40 18.10 hmmbuild -o /dev/null HMM SEED 360 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.30 0.70 -5.75 5 262 2009-01-15 18:05:59 2003-09-19 10:39:02 6 1 240 3 7 118 1 303.90 54 91.10 CHANGED IlFIh..phllsStlhuhulpAssssGslscopolu.tDRusuoPl..a.IFssaluGYssSHsL.aDRhoFLCpSSoNsssGACPTscsstspthsGpTNI+LpFTEK+SLA++sLsLcGsK+alacsssCsp.............cMsLNSssspC.spstssGssLsLYIPAGELsKLPoGGlWcATLcLclKRau-sos..aGTYTlNITVDLTDK..GNIQVWLPpF+S.sPRVDLNLRPh.Guu+YSGoNsLDMCLYDGYSTpSsSl-l+FQDD...NpousGcYsLl....KoGus-KcLsYAlSLLhGGKplsPsNGpuFoINssS.L.lNWNRl+uVoLPpISlPVLCWPusLTLso...clssP-AGcYSGpLsITFTPSSpoL ....................................h...t.p.sho............................................................................................................................s.sG.shS..slplhhTEp+SGh+hsltLpGa.+suh..ssst.....h.ss...............h..h....s.ust..Sh........................t...tphlpLal.toEh+pLPIGGlWKu+lKL+.........uss...s................htsYhAsITLNshD...spIslaFPpF.upATPRVpLsLHPh....sNuSp....hu....t.c.s.LDMCLYDGasuNAhShplhlcD-.....ss+..psG.FSlahp.tspos....spscRIDYpVpMh.....sG.t..pIs...VpNspshshss.....lN.hp+l.RPVVLPGIRhAVhCVPsPLTLssptFsVh-KpAGhYhGpLoVhFTPSh.......................... 0 0 3 5 +7263 PF07435 YycH YycH protein Vella Briffa B, Szurmant H, Mistry J anon Pfam-B_21457 (release 10.0) Family This family contains the bacterial protein YycH which is approximately 450 residues long. YycH plays a role in signal transduction and is found immediately downstream of the essential histidine kinase YycG. YycG forms a two component system together with its cognate response regulator YycF. PhoA fusion studies have shown that YycH is transported across the cytoplasmic protein. It is postulated that YycH functions as an antagonist to YycG [2]. The molecule is made up of three domains, and has a novel three-dimensional structure. The N-terminal domain features a calcium binding site and the central domain contains two conserved loop regions [3]. 20.60 20.60 20.80 21.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 438 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.50 0.70 -5.77 6 596 2012-10-01 23:24:42 2003-09-19 11:16:32 6 2 586 1 74 392 0 421.40 30 97.23 CHANGED EshKSllLslLVlhSlVLTahlWsFQPDaus.....lsspcsc+s......ppshTtphspslpPhpllh.u+--psphshsssslhcchhssLpphclpslcclpccashs..h...usstI.......pl.FspslPhslF.splhpl-scs..hsphsFsRllIsh-tscsslt.lahlsc-+cpllchssospsh-plhcshcshpuchp.hsplIpsp.....ppchalPscspcLpshchlhsolsspphsphLFsDoohV...R..uSposssoYssuouVhshsscschhcY....pp.hs.-psoShpttclIpsSacFINuHGGaTs-..achash.scosploYphFlsGaPVaNppup.....spIpVTaGscslh-Y+RPhhclD.ss.hccscKpLPouEpVhsuLupps-lch-clssIslGYcMscsossst.....hVpLEPpWahKYcGcW......ahhp-uAtpulE .................................EthKollLslLVlhSllLTah.lW.....s..hs.Pch.ss..........l.s.s.sc.s.pps.................sps.h..s...tch.ss.s...l.pPhpllh.....+p-p..sp......h.t.........h..s..s...s..s.......l....s....cl.h..ps....L...c.s.t...c...l...c...s.l.p.c....lp...p...cp.shh........hs...sshl............lsFstslPls....s..a.tpl..h.s....hs....scs......ss.p.h.p....FNR.l.l..lDhst.s.cp.....l.Yhl..sc.-.p...c...p.h..hchp.h....o.s...p....s.....cc........lhctlsp...hp...p..c..h..p.....sa...p...chhhNp..................ppclah..P..s.csp..cLp..sh..phlh..ss..lsl.....-.p....h..ps.hLF..s...D.....s..s...h..V...c..ps..c.u.ss.s..s..Y..s....s..s.s.t.....s.h.p.h.s.s.c.sc....h.hcY....ps....s.sptss......sch.tphl.toF-FlNs.HG.Ghhs........p............aR...h..a..sh....t..........t.p....s......u...p......ls....a....ph.F.l....s.....G...aP..lF..N.p.c..uh.........spIpl.....s.h....G.pcs....V.hcYpR.....sLhphs..ss..h..s..s.p..p.p..hoLPsscs...VhssLtp.sss..ls.h..c.c.lpsIsIGYchppss.....pp..p.........sscL.PpWYVcY.c..sp..W...................thp....................................................................................................................... 0 23 47 61 +7264 PF07436 Curto_V3 Curtovirus V3 protein Moxon SJ anon Pfam-B_21777 (release 10.0) Family This family consists of several Curtovirus V3 proteins of around 90 residues in length. The function of this family is unknown. 25.00 25.00 82.50 82.40 22.50 22.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.04 0.72 -3.85 3 24 2009-01-15 18:05:59 2003-09-19 11:25:43 6 1 17 0 0 28 0 79.30 84 97.79 CHANGED MVCLPDWLFLLFIFSILLQAGTNFYGTFQSGSISRKLSsLuSRFDELFlKlQQVVYTRYPSR-RTVDsRRRRGLSAIPEGuEEsoEA MVCLPDWLFLLFIFSILLQSGTNFYGTFQSGSISRQLSSLuSphDpLFLKlQQVVYoR..sSR-RusDsRRRRGLSuIPEGsEEssE.. 0 0 0 0 +7265 PF07437 YfaZ YfaZ_precursor; YfaZ precursor Vella Briffa B anon Pfam-B_21552 (release 10.0) Family This family contains the precursor of the bacterial protein YfaZ (approximately 180 residues long). Many members of this family are hypothetical proteins. 21.40 21.40 21.50 21.40 20.80 21.30 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.99 0.71 -4.85 3 659 2012-10-03 17:14:37 2003-09-19 11:26:33 6 1 610 0 85 306 5 178.50 63 95.94 CHANGED MsKatVAulAuLLLVAuSVNAsSFNupsG+-YTNlGlGLGTNTsGLAlSGNWs+SDDDGDVAGlGLGhNLPlGPhpATVGAKGlYhsP--GssGuAVAVGGGLuasIGsSFSLaGEuYYAP-sLSSG.lcSYpEANuGVRYNIhRPlol-VGYRYINhEGKDGsRDNslADGsYIGushsF ...............................................................................MKKhhL.A.G..h.AGMLh...V...SASs.sA.h.SISGQAGc-YTNIGV...G...F.G.TES....TG...LAL.S....G.NWsHN.....D..D.......DG.D............sA.G...V..GL.G...L..N.....l....P....l..G..P....L....hATVG..G..K.GlYTNP....p...t...G..D.E...G.YA.A...AV.GG..GL....QW...c..I..G..s..S....F...RLF..GEYY...YSPDS.LS........SG.Ic..SYEE.....AN...A.GARaT..IMRPlSIEAGYRYLNLu.G.K.D.GNR..D..N.Al.ADGPYVGVNASF....................... 0 13 32 58 +7266 PF07438 DUF1514 Protein of unknown function (DUF1514) Moxon SJ, Eberhardt R anon Pfam-B_21857 (release 10.0) Family This family consists of several Staphylococcus aureus and related bacteriophage proteins of around 65 residues in length. The function of this family is unknown. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 29.20 29.20 24.70 23.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.16 0.72 -4.06 3 268 2009-01-15 18:05:59 2003-09-19 11:30:45 6 1 176 0 3 46 0 64.60 67 98.32 CHANGED MWIsISIVLAIhLLIsLSSNSc+t+EI-AL+YhNsYLFcclVKspGhpGLE-YchEl-RI+s-lKc MWIshoIVhAIlLLlslSlNS-+t+EIpAL+YMNDYLhDclVKspGasGLE-YchElcRhss-lKc......... 0 1 1 3 +7267 PF07439 DUF1515 Protein of unknown function (DUF1515) Moxon SJ anon Pfam-B_21875 (release 10.0) Family This family consists of several hypothetical bacterial proteins of around 130 residues in length. Members of this family seem to be found exclusively in Rhizobium species. The function of this family is unknown. 22.20 22.20 22.20 22.70 22.10 22.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.57 0.72 -4.25 3 22 2009-01-15 18:05:59 2003-09-19 11:41:41 6 4 15 0 12 21 0 107.50 46 88.61 CHANGED MIDAGVHQQLGTLlAEVKNLREDlR...........RSEDRSDAuRASMaRRMDELVERVsTLEGSsstlpuDITuMKPVT-DV+KWKLMGlGALGVIGIGGAALu..VT..FADVsKRsL.hlLRGG ..........Mh.usltppltsL.t-lcplR-Dh+...........cSED+SssuRssMpRRhDELVERltsL..E.....uuhthlcsDlupMKPVT-DV+..+WKLMGhGALG..VIGlGuAALG..VT..FADlh+Rhh..lh................ 0 1 6 7 +7268 PF07440 Caerin_1 Caerin 1 protein Moxon SJ anon Pfam-B_22039 (release 10.0) Family This family consists of several caerin 1 proteins from Litoria species. The caerin 1 peptides are among the most powerful of the broad-spectrum antibiotic amphibian peptides [1]. 25.00 25.00 28.50 27.50 19.90 19.10 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -6.88 0.72 -4.02 2 28 2009-01-15 18:05:59 2003-09-19 11:44:48 7 2 9 0 0 22 0 23.90 85 64.33 CHANGED GLhpVLGSVAKHlLPHVsPVIAE+ .GLhSVLG.SVAKHVLPHVVPVIAE+ 0 0 0 0 +7269 PF07441 BofA SigmaK-factor processing regulatory protein BofA Vella Briffa B anon Pfam-B_21773 (release 10.0) Family This family contains the sigmaK-factor processing regulatory protein BofA (Bypass-of-forespore protein A) (approximately 80 residues long). During sporulation in Bacillus subtilis, transcription is controlled in the developing sporangium by a cascade of sporulation-specific transcription factors (sigma factors). Following engulfment, processing of sigmaK is inhibited by BofA. It has been suggested that this effect is exerted by alteration of the level of the SpoIVFA protein [1]. 22.30 22.30 22.40 22.30 22.10 22.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.40 0.72 -3.99 32 390 2009-01-15 18:05:59 2003-09-19 11:48:48 6 1 376 0 106 283 8 78.00 31 90.69 CHANGED llu....llllhllstshhpPl...+hlh+lslphllGslhLahlNhhGuhhGl....plsINhlTshluGlLGlPGllhLlllc.hl.l ......................h.lu.llhlhllhtl..h.tsl.......+hlhchlhpsllGsllLallN..lhG.s..hhsh.......aIsINhhTuhlsGlLGlPGVshLlllphh..h........ 0 42 82 93 +7270 PF07442 Ponericin Ponericin Vella Briffa B anon Pfam-B_21790 (release 10.0) Family This family contains a number of ponericin peptides (approximately 30 residues long) from the venom of the predatory ant Pachycondyla goeldii. These peptides exhibit antibacterial and insecticidal properties, and may adopt an amphipathic alpha-helical structure in polar environments such as cell membranes [1]. 25.00 25.00 27.40 58.60 18.00 16.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.59 0.72 -4.13 3 5 2009-01-15 18:05:59 2003-09-19 13:47:52 6 1 1 0 0 5 0 29.00 71 97.32 CHANGED GWKDWhKKAGEWLKKKGPGIhKAALKAAT GWKDWlKKAGEWLKKKGPGIlKAALpAAT 0 0 0 0 +7272 PF07444 Ycf66_N Ycf66 protein N-terminus Vella Briffa B anon Pfam-B_21840 (release 10.0) Family This family represents the N-terminus (approximately 80 residues) of Ycf66, a protein that seems to be restricted to eukaryotes that contain chloroplasts and to cyanobacteria. 20.70 20.70 21.10 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.81 0.72 -4.26 16 155 2009-01-15 18:05:59 2003-09-19 16:08:48 6 2 108 0 55 167 131 82.00 43 29.81 CHANGED lNhshGPuolLGlhLsluGllLYhlRsh+PEluRDaDlFFuolGLLsGGILlFQGWRLDPILLFuQhLlsGTslFFuhEoIRLR ...............................................................lphshs.uslLGIhLul.uuhsLY...h.l.....Rph+PcluRDtD.lFF.....uulGL..LsGhILhFQ.GWRL..DPIL.FGQhLlsusslaFuhEolRLR............................ 2 14 41 53 +7273 PF07445 priB_priC Primosomal replication protein priB and priC Vella Briffa B anon Pfam-B_22037 (release 10.0) Family This family contains the bacterial primosomal replication proteins priB and priC (approximately 180 residues long). In Escherichia coli, these function in the assembly of the primosome [1]. 20.50 20.50 20.60 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.21 0.71 -4.79 21 862 2009-01-15 18:05:59 2003-09-19 16:34:29 7 3 792 0 84 387 9 162.00 41 94.83 CHANGED LppLcsplspLtppsuslscpt.........phpshFDcpLFpsRuphLpsYlpEscpsLstLpptsppsph..tpssaLsE+LsuQIpAlpREluTpslRcp-sts...................pshsclYpcLupHQ-aERRLhsMlp-RchpLspsssh.cpQplp+ElhAhEuRLsRCRpALt+IEcpIsppE+ ..................................LppLctpLssLcpcsAslspht.................shpARF.D.....+pLFps+uspLpshL-EAtsNLsuL+psVcpppL............QVAaL...AE+LsAQ....lpAl.sREh...u.....sh.......s.......LRph-sss....................................plsc..h..p....c+RhQH.Q-aE.RRLtpMlt-....R....+tpLu...p....sos......h.sEQQsL..p+.E.lps.hEuRLsRCRcALcKIEppls+hpR...................... 1 7 24 57 +7275 PF07447 VP40 Matrix protein VP40 Vella Briffa B anon Pfam-B_22295 (release 10.0) Family This family contains viral VP40 matrix proteins that seem to be restricted to the Filoviridae. These play an important role in the assembly process of virus particles by interacting with cellular factors, cellular membranes, and the ribonuclearprotein particle complex. It has been shown that the N-terminal region of VP40 folds into a mixture of hexameric and octameric states - these may have distinct roles [1]. 25.00 25.00 307.40 307.00 17.30 16.40 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.84 0.70 -5.36 2 35 2009-01-15 18:05:59 2003-09-19 17:02:16 7 1 20 4 0 33 0 287.30 60 91.60 CHANGED M......susssYNshh..hs..Phtspshsp.h.uD.Lus.tu.sPs.s......sslshssph.usVspAFhLEAhlsl.uhschhhKtlPhWLPLGlhsph.YshspTsAAlhhuSYTlTpFs+.up.hVRVNRLGsGIPsHPLRhLR.GNQAFlQphVlP.s..sp.FTasLTsLhL.sQ.LPsssWpsppsthhsNsh+PulSlHPpL.PIlLPshhtpth+tpc.ssss.l.sI.shl.pL+l..ls.hpslhtIplPt.hhphhpGhh.p.tpspPll.h.hPc.hsL ................h.......susssYNshh..lsssPhus+GANphIsuDQLus.pG.TPs.s......ssLshDsphtGsVspAFhLEAhlsl.uhNchhlKtVPhWLPLGIhuphpYshspTsAALLhuSYTITpFs+sup.hVRVNRLGsGIPsHPLRhLR.GNQAFlQphVlP.s..sp.FTasLTsLhLssQhLPsssWpsSpsplhuNsh+PulSlHPpL.PIlLPshptpth+QHc.ssss.lhAIuslL+pL+l.plstppSlatIplPtshFphhpGhhppptpGSPll.h.sPc.hsL.. 0 0 0 0 +7276 PF07448 Spp-24 Secreted phosphoprotein 24 (Spp-24) Vella Briffa B anon Pfam-B_22318 (release 10.0) Family This family represents a conserved region approximately 140 residues long within secreted phosphoprotein 24 (Spp-24), which seems to be restricted to vertebrates. This is a non-collagenous protein found in bone that is related in sequence to the cystatin family of thiol protease inhibitors. This suggests that Spp-24 could function to modulate the thiol protease activities known to be involved in bone turnover. It is also possible that the intact form of Spp-24 found in bone could be a precursor to a biologically active peptide that coordinates an aspect of bone turnover [1]. 25.00 25.00 28.00 27.00 21.00 18.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.84 0.71 -4.22 3 49 2012-10-01 19:28:07 2003-09-19 17:20:33 6 1 38 0 21 46 0 115.00 49 61.56 CHANGED VLDEDoLVMNLEFoIQETTCRRESGtDPATCAFQRGYaVPTAVCRSTV+MSApQVQsVWARC+WSSSTSESsSSEEMIFGDMLGSH+pRNsYLLGLlsDES+GEQFYDRSlcIMRRsaPPGNRRYsNhp+RARVNoGFE .............................lspsslsMsL-FuIRETsCp+-SGcDPsTCuFpRGaaVPoAsCRSoVclSupQVQsVa.s+CpWuo.oSESpS.SEE..Mhasshhtspp.R.pp..hshh.p...t......................................................... 0 1 2 5 +7277 PF07449 HyaE Hydrogenase-1 expression protein HyaE Vella Briffa B anon Pfam-B_21851 (release 10.0) Family This family contains bacterial hydrogenase-1 expression proteins approximately 120 residues long. This includes the E. coli protein HyaE, and the homologous proteins HoxO of R. eutropha and HupG of R. leguminosarum. Deletion of the hoxO gene in R. eutropha led to complete loss of the uptake [NiFe] hydrogenase activity, suggesting that it has a critical role in hydrogenase assembly [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.39 0.72 -4.15 9 623 2012-10-03 14:45:55 2003-09-25 11:36:21 6 4 503 19 47 938 96 106.10 58 75.57 CHANGED LhpRLhsphGhshVD.ssslDcalss....tssuVLhluGDPsRhPEssDsAVlLPELhpsFssthhtullu..ttpE-tLusRFslh+hPoLlhhpcGpalGsLutlpDWsEY .............................................LWQRhLsR.GWpPVs.tspLDDW.lsp.....sP.D...GV..VLLSS..DP..+..RT..PE.VSDN.P.V...MIuELL...R..E....FP.s.a..o......W...Q.VA.lA...DLEQSEAIGDRFsVhRFPATLVFTs......GpaRGsL.sGIHPWAEL........................ 0 12 26 35 +7278 PF07450 HycH Formate hydrogenlyase maturation protein HycH Vella Briffa B anon Pfam-B_22086 (release 10.0) Family This family contains the bacterial formate hydrogenlyase maturation protein HycH, which is approximately 140 residues long. This may be required for the conversion of a precursor form of the large subunit of hydrogenlyase 3 into a mature form [1]. 25.00 25.00 28.80 28.80 19.60 19.30 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.85 0.71 -4.05 19 787 2009-09-12 21:51:00 2003-09-25 12:51:46 6 2 548 0 50 216 2 129.60 66 95.41 CHANGED scVlFapLs+KFVDpscs...sP-cApQVhYYSLAIGHHlGVlDCLcstLpCPhcpYppWlstLspG.EA+RKhpGlhpFGEIsIspsHsshLApA..........hstlststp.stppph..oppLlchLtsIppEPAIYLMV.R+p ........cVVF.pLs+KFlD.psDs...sPtcAQQVhYYSLAIGH.HlGVIDCLcsALsCPhsEY.sWlAsLEtG.-ARRKMtGV.KaGEIVIDhsHlshLApA..........FDcspsstT...SppQp..W..SphhlshLc-IpQEsAIYLMVRR.l.................... 0 6 16 32 +7279 PF07451 SpoVAD Stage V sporulation protein AD (SpoVAD) Vella Briffa B anon Pfam-B_22130 (release 10.0) Domain This family contains the bacterial stage V sporulation protein AD (SpoVAD), which is approximately 340 residues long. This is one of six proteins encoded by the spoVA operon, which is transcribed exclusively in the forespore at about the time of dipicolinic acid (DPA) synthesis in the mother cell. The functions of the proteins encoded by the spoVA operon are unknown, but it has been suggested they are involved in DPA transport during sporulation [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.06 0.70 -5.99 41 555 2012-10-02 12:25:54 2003-09-25 13:15:53 6 4 414 6 107 452 34 322.00 51 97.04 CHANGED G+QThhFps.PhIlusuolVGP+EGcGPLuchFDhlhpD.hhGc-SaEKAEpphhc-AsptAlpKuslppc..-lcallAGDLLNQlhuooFAARslsIPalGLYGACST.sEuLuLuuhll-uGaAchllsuTSSHauoAE+QFRaPhEYGsQ+PsTAQWTVTGAGAsllu....ppGs................s.PplTtsTlGKVlDhGlpDs.NMGuAMAPAAsDTIhpHFcDhspsPscYDLIlTGDLGplG+pIst-LL.cccGhclspp.apDCGlhIacp-.QsstuGGSGCGCSAlVhsGalhcphpcGchKRlLlVuTGALLSssShpQsESIPuIAHAVsIE ........................G+QThhF.ps.pPhIhuouolsGPcEucGPLuc.FDhla.cD.hhhGpco...aEpAEppLhp-AhppAlpKuslcps..-...Icahl....AG....DLlN......Qh.s......s...o......s...Fu......A.R...p.....l.......s...l...P..a....lGla...G..ACSTuhEuLAluuhhlsuGhAchllsusSSHpuoAE+QFRaPsEYGuQ+PsTApWTVTGAGAsllu......pp.s..s...................................................u..s+lTuATlG+VlDhGlsDPhNMGuAMAPAAsDTIppHhcDhphssspYDLIlTGDLGpVGppIsh-LLp.ccGhc....l.spt..apDCGlhIYc..ps.Q.s.VhAGGSGCuCSAsVshGalLpchpcGchp+lLlVATGALLSPhohQQtEoIPsIAHAVslE.................................................................................. 0 54 86 92 +7280 PF07452 CHRD CHRD domain Hyvonen M anon Hyvonen M Domain CHRD (after SWISS-PROT abbreviation for chordin) is a novel domain identified in chordin, an inhibitor of bone morphogenetic proteins. This family includes bacterial homologues. It is anticipated to have an immunoglobulin-like beta-barrel structure based on limited similarity to superoxide dismutases but, as yet, no clear functional prediction can be made. Its most conserved feature is a GE[I/L]RCG[V/I/L] motif towards its C-terminal end Most bacterial proteins in this family have only one CHRD domain, whereas it is found repeated in many eukaryotic proteins such as human chordin (Swiss:Q9H2X0) and Drosophila SOG (Swiss:Q24025). [1]. 24.10 24.10 24.10 24.20 23.90 23.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.62 0.71 -3.49 101 936 2009-01-15 18:05:59 2003-09-25 15:53:44 7 50 447 0 346 766 59 117.80 24 54.16 CHANGED pphsuhLsGsptss.sst...os...........usGpAhhslssss.........sLpa...........plp.hsGL.................ssssts+l.........s.tsppusllhsh................tsst....ssushp...s..lsstphpt.................LhsG....phalslpTpspss.GE..lRGplp .................................h..h.u.Lsu..tpths..ssp.......op...............usG...pA.hhp...hssst..............pLpa...........plh..hpsl.....................................stsssscl..........ch........s.hsppus..llhtl..........................tsst.hsGsh...p.......t...lsstphpt...................................LhpG.....phalslpTpspPs..GE...lRGpl....................................................... 0 104 190 262 +7281 PF07453 NUMOD1 NUMOD1 domain Pietrokovski S anon Pietrokovski S Domain This domain probably represents a DNA-binding helix-turn-helix based on its similarity to other families (Bateman A pers obs). 22.80 13.50 22.80 13.60 22.70 13.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.18 0.73 -7.71 0.73 -3.95 63 326 2012-10-04 14:01:12 2003-09-25 17:24:02 8 37 165 0 53 316 76 36.40 24 17.45 CHANGED tslhlashssph....l...tpFsSlpcAActLslspss.Isphl .................lh.hshssph....l....ppFpShpcAuctlslspss.Ipph............. 0 18 39 43 +7282 PF07454 SpoIIP Stage II sporulation protein P (SpoIIP) Vella Briffa B anon Pfam-B_21650 (release 10.0) Family This family contains the bacterial stage II sporulation protein P (SpoIIP) (approximately 350 residues long). It has been shown that a block in polar cytokinesis in Bacillus subtilis is mediated partly by transcription of spoIID, spoIIM and spoIIP. This inhibition of polar division is involved in the locking in of asymmetry after the formation of a polar septum during sporulation [1]. Engulfment in Bacillus subtilis is mediated by two complementary systems: the first includes the proteins SpoIID, SpoIIM and SpoIIP (DMP) which carry out the engulfment, and the second includes the SpoIIQ-SpoIIIAGH (Q-AH) zipper, that recruits other proteins to the septum in a second-phase of the engulfment. The course of events follows as the incorporation firstly of SpoIIB into the septum during division to serve directly or indirectly as a landmark for localising SpoIIM and then SpoIIP and SpoIID to the septum. SpoIIP and SpoIID interact together to form part of the DMP complex [3]. SpoIIP itself has been identified as an autolysin with peptidoglycan hydrolase activity [2]. 20.10 20.10 20.50 20.40 19.70 18.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.86 0.70 -5.23 49 584 2012-10-02 19:46:12 2003-09-26 10:34:07 6 5 411 0 125 491 7 270.30 29 71.69 CHANGED lphpD.....h.shhsp-lshhsthpsphhhstpts..sph..psssshp...pttpt.......................................sspcshVhIYHTHosESYh.P.....t..sssst.....p.slstVGctLsptL.cptGIsshpDcT..ha-h.....saspuYppSRpslpchLpppsshphllDlHRDu....................hp.ppth...sspIsGcshA+lhFVlGpp............sspacpNhphApplpshh-chYPG......Ls+Glhhts.t.....pYNQDLsspAlLlElGu.sNoh-EApposchlA-.lluc .........................................................................................................................................th.....h.......t....t.tt.....pt.....p.....p...tp.p...................t.....ptt..........p...........................................................sssc.tlhIYHTHotESYh...P.h..........t.sssst.........phNlshVGchLpcpL.cppGIs.shp..DcT.......hpch.......saspSYptS+pslp......c......hLtpstslphhlDlHRDu....................hp...+.phs....oppIsGK.shA+lhFVlGpp................NssaccNhphApplpphhscpYPG......loRGlhhKs..t.....thYNQDLospulLIElGussNTh-EhpposcsLAcshu.p......... 1 67 105 113 +7283 PF07455 Psu Phage polarity suppression protein (Psu) Vella Briffa B anon Pfam-B_21666 (release 10.0) Family This family contains a number of phage polarity suppression proteins (Psu) (approximately 190 residues long). The Psu protein of bacteriophage P4 causes suppression of transcriptional polarity in Escherichia coli by overcoming Rho termination factor activity [1]. 21.20 21.20 21.20 21.20 20.10 21.00 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.10 0.71 -4.82 5 269 2009-01-15 18:05:59 2003-09-26 11:05:58 6 1 159 \N 19 169 0 163.10 41 85.17 CHANGED MTT.VTLQQAF-uCQNNKsAWLsRKsELAAAEQEYcELLhuG-css..sRRLQpLR-lIDVKKWElNQAAGRYIRSHEpVQ+ISIRDRLsDFMQQHGAELAAALAPELMGYppQsshl+psAlQ+SVDYLREALoVWLA......AGEKINYSAQDsDILTsIGFRPDAASRDDNREKFTPAQNhIYoRRRApLAup ...........................................................................................................................................................spphp..Rphl-VhcWplNpAAG+YIpoHctl.cIsh+stLpDFMQpHGsALsuALA..PpLM..G.pp.suhh..hsp..ulpcusshLR-ALhpaLs......pGs....hNYuh.-pD.ILsthGhtPDssuhpDspppaTPAQ..phha....pp+ps.lst............................................... 1 0 3 6 +7284 PF07456 Hpre_diP_synt_I Heptaprenyl diphosphate synthase component I Vella Briffa B anon Pfam-B_22032 (release 10.0) Family This family contains component I of bacterial heptaprenyl diphosphate synthase (EC:2.5.1.30) (approximately 170 residues long). This is one of the two dissociable subunits that form the enzyme, both of which are required for the catalysis of the biosynthesis of the side chain of menaquinone-7 [1]. 30.00 30.00 30.30 30.30 29.60 29.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.62 0.71 -4.37 47 636 2012-10-03 02:46:00 2003-09-26 13:39:27 6 4 611 0 123 444 27 143.80 39 79.07 CHANGED LsuhAlsltllEshIPhPh.h..PGsKLGLANllslluLhh.hu.h+pulhlsllRllluulhtGsh.o.sFhhShAGullShlsMhll.pph.......tcthShlGlSlhGAlhHNlGQLhlAuh.l.....lpshslhhYLPlLhlsGlloGhhhGlhuph....l ...LsA.ulllullEs.hIPhP.hsh..P.GsKLGLANllolluLah...hs.h+-uhhlhhlRl.lLssllsGsl.o.sFhaShuGulLShhsMhll.pph.h.........cp.lSllGlSssGuhhHNlGQLllAuh.l......hpshsl.h.h.YLPlLhhhGlloGlhlGlsush........................... 0 68 110 116 +7285 PF07457 DUF1516 Protein of unknown function (DUF1516) Vella Briffa B anon Pfam-B_22136 (release 10.0) Family This family contains a number of hypothetical bacterial proteins of unknown function approximately 120 residues long. 29.90 29.90 30.10 31.50 29.70 29.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.67 0.72 -3.97 22 454 2009-01-15 18:05:59 2003-09-26 14:02:50 6 1 435 0 48 197 0 111.10 44 89.66 CHANGED M...hH.....hHIhoWllhlILFhlAhhhasp...ts+tsKhlHMllRLhYllhlloGhhL...hlpt.......hsshthhhhlKhlhGlhlIuhhEhhls+ppK...t+sopshahhhllslllThhLG .......M.lHlHIhSWVLulILFlssY.h.hSp...ts.hhKslHMlLRLFhlLsllSGFhl...llpph.....ssuusHMLhsLKMLsGlhVlGlMEhsluK+K+....pc.tops.hahlhI.sllllThhLG......... 0 11 30 41 +7286 PF07458 SPAN-X Sperm protein associated with nucleus, mapped to X chromosome Vella Briffa B anon Pfam-B_22197 (release 10.0) Family This family contains human sperm proteins associated with the nucleus and mapped to the X chromosome (SPAN-X) (approximately 100 residues long). SPAN-X proteins are cancer-testis antigens (CTAs), and thus represent potential targets for cancer immunotherapy because they are widely distributed in tumours but not in normal tissues, except testes. They are highly insoluble, acidic, and polymorphic [1]. 21.90 21.90 23.60 22.40 20.40 20.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.32 0.72 -3.76 9 92 2009-01-15 18:05:59 2003-09-26 15:59:36 7 2 12 0 10 83 1 92.30 49 79.27 CHANGED M-p.pSSssG.KRpoPC-SNp.tN-pM.ETPstD.sPcPu.KKhKTSE.ST.llVlpYR+phKI...sSspL.NDpSpENsINPlQ.EE-Ehh-hpsEospp ................................M-p.pSSssG..KRp.sPC-SNp.tN.......-......p......M..ETPsts.sP.p.u.KKhKTSE.sT.llVhpYR+shKh...ssspL.NDp.upENpINPlQ.EE-Ehh-...t.st.c....................................................................................................................... 0 10 10 10 +7287 PF07459 CTX_RstB CTX phage RstB protein Vella Briffa B anon Pfam-B_22203 (release 10.0) Family This family contains a number of RstB proteins approximately 120 residues long, including RstB1 and RstB2, from the Vibrio cholerae phage CTX. Functional analyses indicate that rstB2 is required for integration of the CTXphi phage into the V. cholerae chromosome [1]. 20.30 20.30 20.90 23.30 18.50 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.59 0.71 -4.23 2 78 2009-01-15 18:05:59 2003-09-26 16:48:33 6 1 49 0 7 34 0 109.50 79 94.27 CHANGED MKl.........VsFGhSpS.GluppsstPY.I.sLFVGKPIRQWKsDKG.s.s.Ghpp.El.F.SsDthhpKlcphAFPsLVphcsEP.PEDPo+NllIshpVlCoLaDsVPtsK. .............MKSRFVVFGASHSEGVS.KTG..APYLIPVL.FVGKPIRQWKNDKGQCLTFGLQHQEV..KFVS.SDAMTRKLE..Q..T..A..FPVLVTFDNEPDPEDPSRNLVIDYQVVCSLFDNVPGGKP.................... 0 3 4 5 +7288 PF07460 NUMOD3 NUMOD3 motif (2 copies) Pietrokovski S anon Pietrokovski S Motif NUMOD3 is a DNA-binding motif found in homing endonucleases and related proteins. It occurs on its own or in tandem repeats in GIY-YIG (Pfam:PF01541) and HTH proteins. It constitutes a beta-turn-loop-helix subregion of the the DNA-binding domain of I-TevI homing endonuclease (Swiss:P13299) [1]. 20.40 9.30 20.40 9.30 20.30 9.20 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.98 0.72 -4.06 25 485 2009-12-15 13:13:21 2003-09-27 14:40:31 6 26 140 2 73 456 321 32.80 29 23.99 CHANGED hGtK+oEEoKpKhSp........h.ttshhGKp+o-Eo.KpKlSc ................................h.hoc-s+p+hSp.................t.tps.hhGKp.t...otEs.+thhp................ 0 37 62 68 +7289 PF07461 NADase_NGA Nicotine adenine dinucleotide glycohydrolase (NADase) Moxon SJ anon Pfam-B_21586 (release 10.0) Family This family consists of several bacterial nicotine adenine dinucleotide glycohydrolase (NGA) proteins which appear to be specific to Streptococcus pyogenes. NAD glycohydrolase (NADase) is a potential virulence factor. Streptococcal NADase may contribute to virulence by its ability to cleave beta-NAD at the ribose-nicotinamide bond, depleting intracellular NAD pools and producing the potent vasoactive compound nicotinamide [1]. 28.50 28.50 29.00 31.80 25.60 28.40 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.43 0.70 -5.80 2 89 2009-01-15 18:05:59 2003-09-29 10:54:18 6 1 32 2 4 78 0 423.80 87 95.30 CHANGED MRNKKVTLAHIVAKTSVAIALAGAMGSSLLANSTTYAVSGKENKKSDVKYETTKVMEANATSSKEDNHVMHTLDGSMSTVWEENSPGGGVGEVLSYKFsSPM+IGRILIVNGDTSSKENYYKKNRIAKADVKYYNtNKLVLFpKIELGDTYTKKPHHIEIDKKLDVDRIDIEVTEVHQGQNKDILALSEVTFGNhERDlFEKKFKEIKDKWVTDKQADEFIETADKYADKAlQMSAVASRAEYYRMYVSRKYHYKKEFVEKLKQVYKESGASHVTSKKDLMLAFDDAK+KSTIGRQENGLFVTSFAEDMALLFTDQGKLKSADQIENIKGVDSGKYSDGVYQYEYDSELTKNIDKLGYIRTASGDTPGANSLNIPGCQTWSGKHIENSESELIFPSISVKDLKSKAVLAEIDAKGYFEIIDPTIIAPNGDHKKVTGRFKIKKMQDR ..........................MRNKKVTLAHIVAKTSVAIALAGAMGSSLLANSTTYAVSGKENKKS..DVKY.........ET.TKV..MEANATSSKEDNHVM...HTLDGSMSTVWEENSPGG.GVGEVLSYKFASPM+IGRILIVNGDTSSKENYYKKNRIAKADVKYYNGNKLVLFQKIELGDTYTKKPHHIEIDKKLDVDRIDIEVTEVHQGQNKDILALSEVTFGNhERDlFEKKFKEI...KDKWVTDKQADEFIETADKYADKAVQMSAVASRAEYYRMYVSRKYHYKKEFVEKLKQVYKESGASHVTSKKDLM.LAFDDAK+KSTIGR..Q...ENGLFVTSFAEDMALLFTDQGKLKSADQIENIKGVDSGKYSDGVYQYEYDSELTKNIDKLGYIRTASGDTPGANSLNIPGCQTWSGKHIENSESELIFPSISVKDLKSKAVLAEIDAKGYFEIIDPTIIAPNGDH.KKVTGRFKIKKMQDR..................................................... 0 1 2 2 +7290 PF07462 MSP1_C Merozoite surface protein 1 (MSP1) C-terminus Moxon SJ anon Pfam-B_21542 (release 10.0) Family This family represents the C-terminal region of merozoite surface protein 1 (MSP1) which are found in a number of Plasmodium species. MSP-1 is a 200-kDa protein expressed on the surface of the P. vivax merozoite. MSP-1 of Plasmodium species is synthesised as a high-molecular-weight precursor and then processed into several fragments. At the time of red cell invasion by the merozoite, only the 19-kDa C-terminal fragment (MSP-119), which contains two epidermal growth factor-like domains, remains on the surface. Antibodies against MSP-119 inhibit merozoite entry into red cells, and immunisation with MSP-119 protects monkeys from challenging infections. Hence, MSP-119 is considered a promising vaccine candidate [1]. 20.40 20.40 20.50 20.40 19.80 20.30 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.96 0.70 -6.12 4 835 2009-01-15 18:05:59 2003-09-29 11:07:15 6 6 34 0 10 628 1 341.40 56 39.29 CHANGED Shss+uESE--h.ssshEl-phYcsaLpplDs..NspFhpFlKSKK-lIsuLT.pKhNtLhhEIt+LK-h.ph.aD+YhKYKLKLERLapK+cpIpsuppQIKcLolLKs+L.+Rpp.lNssa.VLpsassFFNK+REAEKp.V-NsLKNT-hLLKYYKARsKYasuEusPLKTLocsSlp+EsNYLplEKFRshSRLEhRLpKNIpLGKE+ISYlSuGLHHVhpEhKEllKNKsYTGposs-NsscVpcAhcpYcELLPKssTtpAssss.ssTPsssssussstusssuuuuGuuuGpssssTstpl...G.utss......................Vlsh.ussDDDD--hDQlsoGpupstp.csILsAFcsE.-hlYhKsLussYKSlKKph.+chsshppslsshLNs+LcKRN.FL-VLsp-Ls.FKclSoNcYlI+sPYpLLDs-KKcK.lhshKYhtcuVscDIcTAsDGIpaaNKMlELYKspLsAVpcQIctltst.ss..c-cKK......................KYlPhhpsLcsLYEollsps--Yh-sLpp+lsshplEKsEh- ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................A..o...s..-..llssh.s-Y-VlY.LKPLAGMYKoIKKQLENHVsAFNTNITDMLDSRLKKRNYFL-VLsSDLNPFKYSS.SGEYIIKDPYKLLDLEKKKKLlGSYKYIusSIDhDlsTANDGluYYNKMt-LYKscLsuVpppIKclEsph.t..-ch..cKh.........spsspss.p.sp.hAcKtE.cKYLPFLNSlpKEYEoLVsKlsoYopN.LKKhIsNCQlEK+EAE............................................ 2 1 2 7 +7291 PF07463 NUMOD4 NUMOD4 motif Pietrokovski S anon Pietrokovski S Motif NUMOD4 is a putative DNA-binding motif found in homing endonucleases and related proteins [1]. 23.40 23.40 23.50 23.50 23.00 23.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.47 0.72 -4.08 55 364 2009-09-11 07:03:11 2003-09-29 11:41:44 6 21 293 1 46 303 100 52.20 30 27.43 CHANGED EhWKsI...G.acs...YpVSshGRV+S..................hpps+lLp.ths...sGY..hhVsLt.ps .......EhW+sIt.....u..a-s.....Ypl..SshG.+V+o..................htps+lL+sphs..t..sGYhhVsLh........................................................... 0 13 30 37 +7292 PF07464 ApoLp-III Apolipophorin-III precursor (apoLp-III) Moxon SJ anon Pfam-B_31170 (release 10.0) Family This family consists of several insect apolipoprotein-III sequences. Exchangeable apolipoproteins constitute a functionally important family of proteins that play critical roles in lipid transport and lipoprotein metabolism. Apolipophorin III (apoLp-III) is a prototypical exchangeable apolipoprotein found in many insect species that functions in transport of diacylglycerol (DAG) from the fat body lipid storage depot to flight muscles in the adult life stage [1]. 31.00 31.00 31.00 31.50 30.90 30.90 hmmbuild --amino -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.16 0.71 -4.20 8 45 2009-01-15 18:05:59 2003-09-29 14:54:15 6 2 36 1 18 50 0 143.30 27 77.83 CHANGED lEKHAtEFQKTFSEQhNulsNS..KssQ-VNKAlK-GSDSVLQQLssluoSLQuAlsDANGKAKEALEQsRpNlE+TAEELRKAHPDVE+pAspLRD+LQAAVQuTlQEoQKLAKEVuuNh-ETN-KLAPpIKpAYDD...FVKpsEEVQKKlHEAAoKQ ...............................pht-h.pshppphpphhs.......pssp-lscslK-socshlpplpshtsslpstlpc.ssschcpslcphppplpcTsccLpc..s.pP-lpcpAspLpp+lQsulQshspEspKluKclupssppss-cLsstlKpsaDs...hscsspclpcclppAsp.............. 0 4 8 17 +7293 PF07465 PsaM Photosystem I protein M (PsaM) Moxon SJ anon Pfam-B_21548 (release 10.0) Family This family consists of several plant and cyanobacterial photosystem I protein M (PsaM) sequences. PsaM forms part of the photosystem I complex and its binding is stabilised by PsaI [1]. 20.80 20.80 20.90 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.52 0.72 -7.03 0.72 -4.34 40 300 2009-01-15 18:05:59 2003-09-29 15:43:26 8 1 285 2 33 181 1 28.70 48 91.33 CHANGED IoDoQIalALllALlsulLAlRLGppLY+ ............l.-sQlllAL..hlAhhsulLAl+LGpsLYp.. 0 12 24 31 +7294 PF07466 DUF1517 Protein of unknown function (DUF1517) Moxon SJ anon Pfam-B_22018 (release 10.0) Family This family consists of several hypothetical glycine rich plant and bacterial proteins of around 300 residues in length. The function of this family is unknown. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.98 0.70 -5.42 27 295 2009-01-15 18:05:59 2003-09-29 15:46:33 6 6 196 0 143 297 48 193.10 20 68.03 CHANGED tAtSGGRIGGGSFp.uP.......SsPpo.ss.suuhh..................GGGhGaPFllPh......aGhGG.G..GLFuhLlhhulsshllpshRssttt...........ssttst.hssstVolsplQVGLLApA+sLQpDLpplAtpADTsossGLppVLQEsoLALLRpP-hWVYusscss.pssh....suAEu....pFNpLolpERSKhspEohSNlssppppsstst.ss.s.t.........-sssEYIlVTlLVAucupl.pLPt.lsous-LRpALptLGulsuscLlAlEVlWoPpscGDsLot-ELlssYPpLphL .....................................................................................h....uGt..GGt.uFt..ts.............s.ss.pt..t.tsss......................t......................us...hhs...hs.hhsh...............................hGhuh....t....t...h...hs.h.l....lhhhlshhlh.thhpt.............................................................................................................................................................................................................................................................................................................................................................................................. 1 31 88 132 +7295 PF07467 BLIP Beta-lactamase inhibitor (BLIP) Finn RD anon Pfam-B_41444 (release 10.0) Family The structure of BLIP reveals two structural domains, which form a polar, concave surface that docks onto a predominantly polar, convex protrusion on beta-lactamase. The ability of BLIP to adapt to a variety of class A beta-lactamases is thought to be due to flexibility between these two domains [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.16 0.71 -4.63 3 21 2012-10-01 23:09:26 2003-09-29 16:55:43 6 1 15 29 4 43 0 132.40 29 91.48 CHANGED VK.RluRouluLsAAAGAVLATAoSApAsoGFTuEKYsQIQFGMTRspVW-IAGAEtuC-TGGshGDSIlCasc.uGDYuPYGsFuFTuA...uKLhSKRQEcLasAssPTl+LA+YN+TslGMTEAQlWAAVPpDSCoohuEpYPNWPATsGspccYsC.....sSuTGhFsPSAaFaFTDGVLTuRpQpsL .................h.....................hll..A..sshuhs.AsothTsEpYppIphGMspsEVhsllG..ut.ss...C.s-..ss..h.................................................................................................................................................................................................. 1 1 1 3 +7296 PF07468 Agglutinin Agglutinin Finn RD anon Pfam-B_57133 (release 10.0) Family \N 25.00 25.00 27.50 25.80 23.20 17.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.00 0.71 -4.23 2 39 2012-10-02 19:42:32 2003-10-03 15:43:28 6 5 9 8 23 50 0 139.20 29 55.88 CHANGED hthPhhhshhpsNNtKYLth.o..IpQhshLQFuhDpl.DPhstaph.s..T.sGhlpIKSpYhNKahhh.osN..WIhssuN-PcE..oN.AssLF+s...-.tshphlpLLphQhsaahcpaTsGts.Flshh.Atop.lDpsu..lhclI- ..................................................hlhhpssNscYLphhs....hp..thshLpFuu--lsDPhstapl.s..s..tcG..hV+I+ssahsKaW+.c.....o...sN........WIhAsus-scp..os.ssTLFcPlhl.-.tphp.hlthpphttspaspphs......hsCL.Ass..tp.p............................................... 0 0 11 23 +7297 PF07469 DUF1518 Domain of unknown function (DUF1518) Finn RD anon Pfam-B_1525 (release 10.0) Domain This domain, which is usually found tandemly repeated, is found various receptor co-activating proteins. 25.00 25.00 28.90 28.90 24.20 23.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.85 0.72 -3.82 17 247 2009-01-15 18:05:59 2003-10-06 13:27:18 7 13 39 0 91 216 0 57.30 44 5.65 CHANGED uhsuthussthPQussQQFPasPNYGhuQQs-PuFssuhSP.SshMSsphu.oQSsMh ...uhsuphusPthPQussQQFPYPPNYG..h..up..Qs-PuFsssh.SP.oshhssphssoQssMh........... 0 4 9 29 +7298 PF07470 Glyco_hydro_88 Glycosyl Hydrolase Family 88 Finn RD anon Pfam-B_10896 (release 10.0) Family Unsaturated glucuronyl hydrolase catalyses the hydrolytic release of unsaturated glucuronic acids from oligosaccharides (EC:3.2.1.-) produced by the reactions of polysaccharide lyases [1]. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.22 0.70 -5.40 19 2447 2012-10-03 02:33:51 2003-10-06 14:06:07 8 26 1025 25 672 2054 111 336.70 21 81.37 CHANGED hhc+hspthh.c.s..........h......c........WspGlhhtG.hh.tsa-hTtccc....Yhphsps..........ascphhccssp..................hplDphshGhhh....hh....lYchst.Dpchhtsshpht-.lltphs+htcGshh+.....tph..hphW.............lDsLaMusPhhhph....uphhs-...cah-cshppahhptcp.................hhDscsGLaa+uac....pspp.hus........hWuRupGWhhhulschlch......LPp.pcss+p.lhplhpshs.ps.lt+hQc.psGhWapllccss...shhEoSuoAhhlYulh+ul............ppGh....l.scpYtpssp+uacullcp...........lpc-Gphhlppsssssuhst............YtshshtpspsaG.GhhlhAhhEhh+hh ...........................................................................................................h......................................tt...p..............Ws.pGhhhhG...hh..hha....c....ho....t....c....pp..............hh.ph..s.pp.........................hh.p.p.h..h..t..phsp.............................phpl.s.p...h..s.h.G.h..h.h.............hh............hY....c.....h.........s........s....s........t........ph..tp.h...h..h.phA-......hhp...p..h.........p.........t.....p.G...s..h..hp...............................tp.t..p.tpha.............................lDsl..hM..sl....h..L.h.hh...................uphs....s..c.t......ca.h.-..Ahpphh..hp..hpp..................................hh.D.s...c...o.....Ghh..h.+..uhs........t..p.u..h......p..s.us................................hWuRGpuWshhuh..sthhch...............l.......p....p........p......s......t........t...p.......h.......h.......h.......p......h.hpt....hh..pt..l.......h..c......h...............c....p........s..u.....h...W.....p........h............h....h........s.....c.......s...........s.......................s.........................h-oSAoA.hh...shu....l...h..p..tl...................................pp.th..............h.......t.....p...p....Y.....t...p....s....s...pc....t...hpulhpp...................hs.t....s....G..p............l..t...t.s.hhh.pu...t.................................p.sh...s...h..s.s................aG.shhal.Ahhch....h.............................................................................................................................................................................. 0 272 506 604 +7299 PF07471 Phage_Nu1 Phage DNA packaging protein Nu1 Finn RD anon Pfam-B_11430 (release 10.0) Family Terminase, the DNA packaging enzyme of bacteriophage lambda, is a heteromultimer composed of subunits Nu1 and A. The smaller Nu1 terminase subunit has a low-affinity ATPase stimulated by non-specific DNA [1]. 30.00 30.00 30.30 30.20 29.90 29.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.96 0.71 -4.77 11 762 2010-01-12 09:52:22 2003-10-06 14:22:59 7 4 374 2 31 345 35 138.20 50 83.26 CHANGED McVNKKpLA-IFGsulRTIppWQcQGhPVl+GGGKGsEslaDoAuVI+WYA-R-AEIENEKLR+EVEELRtAuEsDLpPGTI-YER+RLTRAQADAQELKNA+-suEVVETAFCTFVLSRIAuEIuSILDGIPLSlQRRF..PELENRHlDFLKpDIIKAMNKAAA .................................................................lNK+phAc.hsss.pshppW.t....p....Ps.p.......K.G..EshaDsAssIp...hpR.cs..p..c..s..p+Lp.c-.ht-...lct..s.s.pt....D..ps..spl.h..tRaRL.TcAQA-AQE..LKNs.+-pucVl-TsFChFsLS+lAtpIuSILD.u.lPLohQRpF..P-LpsRHlDhLKp-IhKAhNpuA......................................................................... 0 8 18 28 +7300 PF07472 PA-IIL Fucose-binding lectin II (PA-IIL) Finn RD anon Pfam-B_17609 (release 10.0) Family In Pseudomonas aeruginosa the fucose-binding lectin II (PA-IIL) contributes to the pathogenic virulence of the bacterium. PA-IIL functions as a tetramer when binding fucose. Each monomer is comprised of a nine-stranded, antiparallel beta-sandwich arrangement and contains two calcium cations that mediate the binding of fucose in a recognition mode unique among carbohydrate-protein interactions [1]. 20.90 20.90 20.90 22.40 20.70 20.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.15 0.72 -3.83 12 85 2009-01-15 18:05:59 2003-10-06 15:00:50 6 5 53 100 25 63 2 108.60 48 54.85 CHANGED FpLPsshcFGlTAasNou.tpQTlcValDDp.t..sTasGpGssss.huTpslsSG.sG+Vplpl.usGKPScLtup.ssLss........KsshullGuEsGsDsD..YNDulVlLNWPL ...........FsLPsNhpFGlTAasNou.spQTIcValssssp..ATFpGsuoss.....s........luTplLNSG..sGKVplpVosNG..KPScLsSpQsslss........cssFu..lVGSEDGoDsD..YNDulVVLNWPL.... 0 2 9 15 +7301 PF07473 Toxin_11 Spasmodic peptide gm9a Moxon SJ anon Pfam-B_90829 (release 10.0) Family This family consists of several spasmodic peptide gm9a sequences. Conotoxin gm9a is a putative 27-residue polypeptide encoded by Conus gloriamaris and is known to be a homologue of the "spasmodic peptide", tx9a, isolated from the venom of the mollusk-hunting cone shell Conus textile [1]. Upon injection of this venom component, normal mice are converted into behavioural phenocopies of a well-known mutant, the spasmodic mouse [2]. 25.00 25.00 33.40 33.40 21.50 20.30 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.67 0.72 -4.02 2 4 2012-10-01 22:06:18 2003-10-06 16:07:17 6 1 4 1 0 5 0 27.50 46 37.54 CHANGED .CspuCpS.scCsp+ChCp.ctpsshpt ..CNNSCQpHScCsSHClCphctCssVNt 0 0 0 0 +7302 PF07474 G2F G2F domain Bateman A anon Bateman A Domain Nidogen, an invariant component of basement membranes, is a multifunctional protein that interacts with most other major basement membrane proteins. The G2 fragment or (G2F domain) contains binding sites for collagen IV and perlecan. The structure is composed of an 11-stranded beta-barrel with a central helix. This domain is structurally related to that of green fluorescent protein Pfam:PF01353. A large surface patch on the beta-barrel is conserved in all metazoan nidogens. 23.50 23.50 23.60 25.40 21.10 23.40 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.16 0.71 -4.93 5 296 2012-10-01 21:32:51 2003-10-27 12:53:42 7 164 81 2 160 235 0 182.50 35 9.46 CHANGED cGuPQRlNGpVpGpIpVls.c...hslsFsssDLHuYlVos-GRuaTAIS.slPpsLGpuLpPlssIGGllGWLFAcEp.ssuhNGFoLTGGpFsRcspVcF..csG..clpIcQpacGlDs-.....saLsl+hslcGpVPpIsssuoVpIcDYoEhYppotsGllTSpSTRsaTVcs.....uu.spTasYolDQTITF-pCpc+.scls ......................................................GsPpRspGpltGplpss...p....hslsh.ssslps.sssssuph.aouIo.....p.lPpslG..uhhsLsslhuslhWhhAhE..ssshNGFo....lT...G..utFp+pspVpF...tsG...EhLplsQphcGLDsc.......shLhlcs.lpGpV.Pplsssuslplp.sYpE.Yh.o.su........lhutSoRpas.l.st...........tshsYphppsIpYppsttt....s................................ 0 26 38 83 +7303 PF07475 Hpr_kinase_C HPr Serine kinase C-terminal domain Bashton M, Bateman A, Moxon SJ anon COGs Domain This family represents the C terminal kinase domain of Hpr Serine/threonine kinase PtsK. This kinase is the sensor in a multicomponent phosphorelay system in control of carbon catabolic repression in bacteria [1]. This kinase in unusual in that it recognises the tertiary structure of its target and is a member of a novel family unrelated to any previously described protein phosphorylating enzymes [1]. X-ray analysis of the full-length crystalline enzyme from Staphylococcus xylosus at a resolution of 1.95 A shows the enzyme to consist of two clearly separated domains that are assembled in a hexameric structure resembling a three-bladed propeller [2]. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.79 0.71 -4.95 81 2208 2012-10-02 15:24:17 2003-10-28 16:53:30 7 7 2083 29 470 1375 236 159.20 43 53.76 CHANGED ppLAsppslHGVLlDVaGlGVLIpGcSGlGKSEsALELlcRGH.RLVADDsV-lpchsppt.LhGp.uPcllcchlElRGlGIIslpsLFGhsul+spppIpLllpLppWcpp..ppaDRLGh-.ppptcILslclPplplPVcsGRNlAlIIEsAAhNa+hKphG.asuscpFpcRl ...................t.pLA.psolHG.VLlDla.Gh....G....VLIpG-SGlGKSEsAL........E........L....l.......c......R......G......H.......R......LVADD.........tV.-l...h.........p...h.sc.p............p...Lh.Gp........s...P..c...l...l.ca....L.l...E.I..RGlGIIsVhsLaGsuul+sppplpLslpLcpappp........ctaDRLG...p.p..p..p.hclh.s.splsplplPV+sGRNlulllEsAAhNaRhp..hG.hsssppFtcR................................................................... 0 158 314 385 +7304 PF07476 MAAL_C Methylaspartate ammonia-lyase C-terminus Bateman A, Moxon SJ anon COG3799 Family Methylaspartate ammonia-lyase EC:4.3.1.2 catalyses the second step of fermentation of glutamate. It is a homodimer. This family represents the C-terminal region of Methylaspartate ammonia-lyase and contains a TIM barrel fold similar to the Pfam:PF01188. This family represents the catalytic domain and contains a metal binding site [2]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.55 0.70 -5.45 14 185 2012-10-02 01:07:48 2003-10-28 16:58:51 6 4 176 12 41 2518 572 245.40 60 60.15 CHANGED slshcslPlFuQoGD-RYhNsDKhIlKcs-VLPHuLhNsV-.KlG.cGEcLh-YlpWLppRlhpL.uscsYpPhlHlDVYGTlGthFss..Dhc+hs-YLupLccuAtPatLpIEGPhDsGs+ttQIctLstLRptLccpGlsVclVADEWCNThEDI+tFsDupAucMVQIKTPDLGGlsNol-AlLYC+c+GlGAYlGGoCNETDhSAclssHlALAspPsQhLuKPGMGlDEGlMIlpNEMpRslAlhcp .....................................................................................................s..sEsIPlFGQSGDDRYlsVDKMILKGlDVLPHALINNVE-KLGh+GEKLtEYVcWLu-RIlsh.RsuscY.+.PsLHI.D.V.....YGT...IGllF-h..Ds......hRsApYIAoL.EcpApshs.L.h.I.EG.PlDAG.sK.s.cQIchLsuls.ccLs.+hGos.....V+IVA.DE.W.C.NT.a.p.D....I.h.-.F.sDAsusHMVQIKTPDLG.uIcNllDAVLYCpp+uhpAY.pGGTCNETDlSARsCVHVALAsRPhpMLsKPGMGFDEGl.IVaNEMNRTlALLp.s........................................... 2 16 26 35 +7305 PF07477 Glyco_hydro_67C Glycosyl hydrolase family 67 C-terminus Finn RD, Moxon SJ anon CAZY Domain Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the C terminal region of alpha-glucuronidase which is mainly alpha-helical. It wraps around the catalytic domain (Pfam:PF07488), making additional interactions both with the N-terminal domain (Pfam:PF03648) of its parent monomer and also forming the majority of the dimer-surface with the equivalent C-terminal domain of the other monomer of the dimer [1]. 20.30 20.30 20.80 28.80 19.30 19.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.52 0.70 -4.95 38 293 2010-01-08 16:36:07 2003-10-29 13:35:56 7 7 257 17 125 306 20 220.20 42 31.06 CHANGED oGphhAQANhYuaGRLAWsPshsocpIscEWlRhTF.......us.cpcVlcslsp............hhhpSapAa.sY.ssPLGlpahsshs....a.HYGPsPtsp-tss......Wus..YH+ADcsGIGhDRT.poGoG...hsuQYss.lAcha-slcTsP--LLLWFHHVPasa+L+SG+TllQclYcp+acGscpVpshhpsWcuLcutlDscRapcVtt+LphQtpcAhhWRDuhssYFhphSG..IP-thsRst.a ..................................sGp.hAQANhYAaGRLAWsPslsucpIscEWl+hTF...............us.-.tpl.....lcslpp................hhhpShcsh.sY.psPLGltahhs.s.....+..HYGPsPts.c.hst....................Wss......YH+ADppGIGhDRT.uo..G..os...hsuQYhs.lspha-slposP-cLLLaFHHVPasa+L+S..G+T...lhpclhspahcGlcpspphhppWp.s.Lcs..h..l...D.s..........c..........RapcVtp+LphQtccAh.WRDshssYF.phSG..IPsthtR.............. 0 42 84 108 +7306 PF07478 Dala_Dala_lig_C D-ala D-ala ligase C-terminus Bateman A, Moxon SJ anon PSI-BLAST 2dln Family This family represents the C-terminal, catalytic domain of the D-alanine--D-alanine ligase enzyme EC:6.3.2.4. D-Alanine is one of the central molecules of the cross-linking step of peptidoglycan assembly. There are three enzymes involved in the D-alanine branch of peptidoglycan biosynthesis: the pyridoxal phosphate-dependent D-alanine racemase (Alr), the ATP-dependent D-alanine:D-alanine ligase (Ddl), and the ATP-dependent D-alanine:D-alanine-adding enzyme (MurF) [3]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.90 0.71 -4.93 18 6597 2012-10-10 13:17:03 2003-10-29 13:51:30 8 33 4445 68 1275 21237 8280 190.70 33 59.00 CHANGED LhpssGlssssahslp+pchppts......htph.tpLGaPlFVKPAptGSSlGloKVpst-ELpsAlcpAhpYDpcVllEcsl.sGcElpsulLG..Nts.hplussscIthss......uFYDYcsKY....s.ssuphhlPAsLstphppplpchAhcsY+sLus+GhARlDFFL.spcGplhLNElNThPGFTshShaPphhtssGlsascLlspLlp ............................................................................................................hpstGl.sh.ss..a..h...s.l.pp.sp.....p.t....................t.h.....p..p....L..s.h.....P.l.h.VK..P..u..p....t..G..S........S..l.....G.....l.....o....K.....V.......p....s.......p.......p.......p......L.......p.......t........A......l........c..........h.........A...........h..........p............a...........D.........p..............c.............V......l......l..Ep.hl.....s.G...........c......El...p...s...u....l..L.G...........p....tt......sp...s...h......s...s...s..E...I..h.h..ss...........................sF...Y..D...Y...-..u.K...Y...........................................s.....s.....p...........s.....p...h.........h.......h......P.........A.....p.................l.......s......s.........c.........h..t...p...p.....l..pp..hAhp.Aa..p.....s.....L.......s......s....p.......G....h...u..R.l...D..................h.....h........l.......s................t............c.........s........p.....h......h..l.E..lN.....T.hP.....G..h.....T.........s.....h.........S.h.........h.Ph.........h.........h.p.s.s.G.lsaspLlpcll...................................................................................................................... 0 456 861 1094 +7307 PF07479 NAD_Gly3P_dh_C NAD-dependent glycerol-3-phosphate dehydrogenase C-terminus Finn RD, Bateman A, Moxon SJ anon Prosite Domain NAD-dependent glycerol-3-phosphate dehydrogenase (GPDH) catalyses the interconversion of dihydroxyacetone phosphate and L-glycerol-3-phosphate. This family represents the C-terminal substrate-binding domain [2]. 25.00 25.00 27.20 26.30 24.30 24.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.81 0.71 -4.49 27 5512 2012-10-02 19:36:47 2003-10-29 14:12:16 9 20 4754 22 1400 3895 2018 143.50 41 42.43 CHANGED sDlhGVElsGALKNVlAlAsGhsDG......luhG.....sNsKuullphGLhEhp+huthhssssp....Taht.sGluDLlsTChu..uRNp+hGptlu+s...pshcplpcph.......GthlpGstTscplhplhpppsl...............chPlhpslYpllhpthpspchlppL .............sDllGVEluGAlK.............NllAluAGhs.cG...........l.G..hG..........sNu+.......AALlTRGL......sEhsRlG........s...s...h...Gupst.................TF.h.G.LuG..lGDLllTCou.....S..R............Nh+hGhtLGpG...............pslcph.pph.....................GtVlEGh....tosc...sl....h.p..lAc.c..h.sl...............cMPIspslap.lLap.stssccsh...h........................... 0 469 881 1177 +7309 PF07481 DUF1521 Domain of Unknown Function (DUF1521) Yeats C anon Yeats C Family This family of unknown function is found in a limited set of Bradyrhizobium proteins. There appears to be a periodic -DG- motif in it. 21.00 21.00 22.40 22.20 20.60 19.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.22 0.71 -4.55 5 27 2009-01-15 18:05:59 2003-11-03 12:43:51 6 2 16 0 10 27 0 155.60 39 56.14 CHANGED WSH.EV+DGKATIcLGDKYoIslDEKDGThplRNsQTG+lT+I+GDPHVDANGDGKcDFDFKKsMTFQLDDGTKITVDTVsYGp..GcTlASKLTITNGDNAMVVEGLGDccDGKNNL+VTQSNAGRTLDpLTsDGAQTIaEtoGpGWVDs.oG+tVTQAsIstsEsssssushp ....................ppupupIphsDtYoIpss-psushplpNsp.TGcsT.plaGDPHVDssGDGcssFDFK+shTFpL-DGTKITVsTssaGs..GtTloS+LTITsGcs.uhhVpGlscsc..pssLplppu.ssGhhlD.hssDG.phh.p..sstGal..s...tGt...ssQt.hs.sc.........t...................................................... 0 5 6 8 +7310 PF07482 DUF1522 Domain of Unknown Function (DUF1522) Yeats C anon Yeats C Family \N 25.00 25.00 25.00 111.30 20.60 19.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.40 0.72 -3.90 20 54 2009-01-15 18:05:59 2003-11-03 16:03:44 6 2 9 0 40 58 0 111.70 54 27.03 CHANGED oTLsVNGKTITFKsussPsuu...slssGSGls..GNllTDGsGNSTVYLss......ATVsDlLsAIDLAoGVpoATl.usGsATlussu.....GssuS...hsuuGsLsLsTuTGuDLSIo.Gsush .oTLsVNGKTITFKsussPsus...slsoGoGls..GNlhTDGsGNSTVYLts......uTVsDVLpAIDLAoGV+oAol.usGsATluTus.....GsssS...ssuuGtlpLsououADLolTGsush..... 0 8 14 20 +7311 PF07483 W_rich_C Tryptophan-rich Synechocystis species C-terminal domain Yeats C anon Yeats C Family This domain is found at the C-terminus, normally between 2-3 copies, of a range of Synechocystis membrane proteins. This domain is fairly tryptophan rich as well. 20.70 20.70 21.20 20.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.52 0.72 -4.37 14 123 2009-01-15 18:05:59 2003-11-03 16:47:07 6 23 22 0 33 93 105 104.40 32 17.49 CHANGED s.sTSLlttsssYahs.hssGosstLKYsGtshssGpFGshsPluA.pTusG.apVsWpssuTspaohWsTDuNGNahSp.oshVsGsShps.shEssFtpDlNGDGslG ............................thulltt..shahs.husssssslKat.G.ps.hssuphGsWpsl..uA.ET.ssGh.QVLWp..sss...ssphsVWsoDuNhNalu..Ssshss.sSsphhshEssFphDlNsDuhlG................... 0 13 20 20 +7312 PF07484 Collar Phage Tail Collar Domain Yeats C anon Yeats C Domain This region is occasionally found in conjunction with Pfam:PF03335. Most of the family appear to be phage tail proteins; however some appear to be involved in other processes. For instance Swiss:Q03314 from Rhizobium leguminosarum may be involved in plant-microbe interactions ([1]). A related protein Swiss:Q9L3N1 is involved in the pathogenicity of Microcystis aeruginosa. The finding of this family in a structural component of the phage tail fibre baseplate (Swiss:P10930) suggests that its function is structural rather than enzymatic. Structural studies show this region consists of a helix and a loop ([2]) and three beta-strands. This alignment does not catch the third strand as it is separated from the rest of the structure by around 100 residues. This strand is conserved in homologues but the intervening sequence is not. Much of the function of Swiss:P10930 appears to reside in this intervening region. In the tertiary structure of the phage baseplate this domain forms part of the 'collar'. The domain may bind SO4, however the residues accredited with this vary between the PDB file and the Swiss-Prot entry. The long unconserved region maybe due to domain swapping in and out of a loop or reflective of rapid evolution. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.77 0.72 -4.18 181 1934 2009-01-15 18:05:59 2003-11-03 17:20:07 7 82 865 5 447 1768 405 51.40 41 13.56 CHANGED GpIhhauhs......hs........P.........pG.....WhhCsGphlshsp...ssLau...llGshaG.Gs..GtsoFsLPDLRGchs ..........................GsslsaP.us......ss.........P...................................sG............ahhssGp...sa.st.sp...YPpLhs........................................u.........hso.......h..sLPDhRGhh...................... 0 134 266 347 +7313 PF07485 DUF1529 DUF1259; Domain of Unknown Function (DUF1259) Yeats C anon Yeats C Family This family is the lppY/lpqO homologue family. 25.00 25.00 27.20 27.10 24.10 24.10 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.50 0.71 -4.38 43 461 2009-01-15 18:05:59 2003-11-04 10:36:12 6 3 198 0 107 299 12 120.00 35 80.42 CHANGED ss.plsplLGpKGshs..sGVa+lshsRtDlplshpGhslss.s..hulsshhuFps.sss.spAhhhGDhVLlpcEVsPVhpsLppsGIpVTAlHNHhLt-pP+lhahHhtuhscsscLApsl+sALcth ...........ttlsphLG.ppGphs..sulh+hslsRpDhplphpGhplss..s..hulsohhuFps...sss.GcAhlhGDhVlhpcElssVhpulpstGIploAlHNHhLp-pPplaahHhtuhs.DsssLA+sl+sAL-t....... 0 45 78 91 +7314 PF07486 Hydrolase_2 Cell Wall Hydrolase Yeats C anon Yeats C Family These enzymes have been implicated in cell wall hydrolysis, most extensively in Bacillus subtilis. For instance Swiss:P50739 is expressed during sporulation as an inactive form and then deposited on the cell outer cortex. During germination the the enzyme is activated and hydrolyses the cortex([1]). A similar role is carried out by the partially redundant Swiss:P42249 ([2]). It is not clear whether these enzymes are amidases or peptidases. 21.70 21.70 21.70 21.90 21.50 21.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.71 0.72 -3.53 170 1705 2009-01-15 18:05:59 2003-11-05 10:54:44 7 29 750 0 448 1369 387 105.20 32 42.38 CHANGED cGEshpGplAVAsVllNRV.c......sspFPs..olsuVlaQs.........QFohs.ss.......................Gph.p.h........ssp....puhc...sApps.L..sGt...sssssAhaa..as.sts..s.s.p...Wh...shphsspIGsHhFac ...........tGEshpGplAVusVllNRV..c..........ss.p.FPs.......olssVlaQs.......stFsss.ss..............................................................Gphth..........sp.....pshp....sAccs.l........sGt.................s.....ss.s...........sAlaa..assts......s...s..p...........Wh.....st.hh...tplGpHhFhp.............................................................. 0 191 328 375 +7315 PF07487 SopE_GEF SopE GEF domain Finn RD, Moxon SJ anon Pfam-B_18665 (release 7.8) Domain This family represents the C-terminal guanine nucleotide exchange factor (GEF) domain of SopE. Salmonella typhimurium employs a type III secretion system to inject bacterial toxins into the host cell cytosol. These toxins transiently activate Rho family GTP-binding protein-dependent signaling cascades to induce cytoskeletal rearrangements. SopE, can activate Cdc42, an essential component of the host cellular signaling cascade, in a Dbl-like fashion despite its lack of sequence similarity to Dbl-like proteins, the Rho-specific eukaryotic guanine nucleotide exchange factors [1]. 19.60 19.60 20.80 22.60 18.70 16.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -10.86 0.71 -4.28 2 188 2009-01-15 18:05:59 2003-11-05 11:19:39 8 3 146 6 6 80 0 160.40 71 70.50 CHANGED AVLTsKsVKDFMLQpLNslDI+GsASKDPAYApQTpEAILSAVYSpNKDQCCpLLISKGlsIsPFLpEIGEAApNAGLPGphKNsVFTPuGAGANPFlsPLIuSAs.KYP+MFINppQQsSFKhYAEKIlMpEVsPLFNcssMPTPQQFQLhlENIANKYlQNss ........................AVLTsKsVKDFMLQpLNslDI+GNASKDPAYARQTCEAILuAVYSNNKDQCCKLLISKGlSITP.FLKEIGEAAQNAGLPGE.hKNGVFTPGGAGANPFVlPLIAuASlKYP.HMFINH..NQQVSFKAaAEKIlMKEVsPLFNcsoMPTPQQFQLTlENIANKYLQNAS............................................................ 0 3 3 4 +7316 PF07488 Glyco_hydro_67M Glycosyl hydrolase family 67 middle domain Finn RD, Moxon SJ anon CAZY Domain Alpha-glucuronidases, components of an ensemble of enzymes central to the recycling of photosynthetic biomass, remove the alpha-1,2 linked 4-O-methyl glucuronic acid from xylans. This family represents the central catalytic domain of alpha-glucuronidase [1]. 20.30 20.30 21.30 21.30 17.90 19.60 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.83 0.70 -5.54 37 290 2010-01-08 16:36:33 2003-11-10 10:33:56 7 7 257 17 125 302 12 316.00 52 44.22 CHANGED L+hltput....slssls.........hhpsPpstlRhlNcW....DNlD......GolERGYAGtSIFahs...................shsths.sRhtpYARhLASlGINulVlNNVNAps......plLospalpcluplADlFRPYGI+laLSlNFASPhp....lGG...LsTuDPLD.cVhsWWpppsscIY.phIPDFGGFLVKAsSEGQPGPhs.YGRTHA-GANMLAcALpPaGGlVhWRAFVYsp...p.p.-hpsDRA+uAY-pFpPLDGpFcDNVllQIKNGPIDFQVREPsSPLFGuh.+TshhlEhQITQEYLGQpsHLsYLsPhWc...ElLcaDsascGcs.SpVtcllsG..ph....aspphuGhAGVuNlGsDpNW ..................t.ttshppls.htpsPphtlRhlNHW....DN.hD......GolERGYAG...t..S..lahhs....................sh....th...sRhpcYARh.ASlGINusV...lNNVNups................phLospaLp.clu.tlA-lFRPYGI+laLSlNFuuPhp...............l....GG........L..sTuDPLD.pVtpWWcppsccIY.ph.IPDFGGFLVKA.sSEGQ..PGPhs.YG..RoHADGANMLAcAL....tP..a.G...GlVhWRAFVYs.......p-p...............psDRA+tAYccFpPLD.........GpFcDNVllQlKNGPlDFQsREPhSPLF.Guh.+TshhlEhQITQEYLGQppHLsYLuPhW+.EhLcaDTh.s..cG......c......s......S.....pVtc.llsGph..........hs....p.t.....hs..GhAuVuNlGsDtsW...................... 0 42 84 108 +7317 PF07489 Tir_receptor_C Translocated intimin receptor (Tir) C-terminus Moxon SJ anon PRINTS Domain Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation [1]. This family represents the Tir C-terminal domain which has been reported to bind uninfected host cells and beta-1 integrins although the role of intimin binding to integrins is unclear. This intimin C-terminal domain has also been shown to be sufficient for Tir recognition [2]. 20.50 20.50 21.00 97.30 19.70 20.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.38 0.70 -4.59 6 133 2009-01-15 18:05:59 2003-11-11 10:58:32 6 2 104 5 1 76 1 210.30 70 40.11 CHANGED lESNAQAQpRYD-QpAKRQpELslSSGsGYGLSGALILGGGIGAGVTAALHRRNQPsEQpTTTTTpTs......................lpNpsusNTsAQG...NsDToGsE-.outoRRsSpuSsAS.phSDTSSs-TV.NPYA-VshopNssph..ctsEEsIYDEVAAD.PsYSsIQ+huGssPso.uRLlGsPGpGIQSTYALLA.SGGLRhGMGGLTGGupSAsSosNssPsPGstRFV .IEsNAQAQpKYDEQpAKRQEELplSSGsGYGLSGALILGGGIGsuVTAALHRKNQPsEQ.TTTTTp.Ts.......................V-NpPuNNTPAQG...NsDTsGuE-.o.tSRRsS.ASsuS.saSDTSShGTVpNPYADVttuhpDS.s..ppSppsl.s.sssD..ssYSsIQH.stsosss.uRLlGsPutGIQSTYAhLA..SGGLRhsMGGLTGGup.SAVsTuN..ssPsPGstRFV............. 0 0 0 1 +7318 PF07490 Tir_receptor_N Translocated intimin receptor (Tir) N-terminus Griffiths-Jones SR, Moxon SJ anon PRINTS Family Intimin and its translocated intimin receptor (Tir) are bacterial proteins that mediate adhesion between mammalian cells and attaching and effacing (A/E) pathogens. A unique and essential feature of A/E bacterial pathogens is the formation of actin-rich pedestals beneath the intimately adherent bacteria and localised destruction of the intestinal brush border. The bacterial outer membrane adhesin, intimin, is necessary for the production of the A/E lesion and diarrhoea. The A/E bacteria translocate their own receptor for intimin, Tir, into the membrane of mammalian cells using the type III secretion system. The translocated Tir triggers additional host signalling events and actin nucleation, which are essential for lesion formation [1]. This family represents the Tir N-terminal domain which is involved in Tir stability and Tir secretion [2]. 25.00 25.00 31.00 31.00 21.70 19.80 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.66 0.70 -4.77 4 135 2009-01-15 18:05:59 2003-11-11 11:08:53 6 3 104 0 1 78 0 256.50 76 48.76 CHANGED MPIGNLGHNsNVss.IPPAPPLPSQTDG.AtGupGQLIsSoGsLGSRhLFTPlRNSVADusDuRuSDlPGLPsNPhRlAA..SEloLpsGFEVLHD+GsLDTLNptIGSSsFRVETQ-DGoHlAIGQKNGlETSVlLS-QEauSLQuIDPEGKs+FVFTGGRGGAGHuMVTVASDIsEARQRIl-KLEPKsot..........................csopSGusNSuEsp...ssopopTSTSTSSLRSDPKLWLSLGoIAAGLIGhAATGIAQAlALTPEPDDPTTTD .MPIGNLGHNPNVNs.IPPAPPLPSQTDG.A.GGRGQLINSTGPLGSRhLFTPlRNShADSuD.sRASDlPGLPsNPhRLAA..SEloLpsGFEVLHD+GPLDTLNpQIGSSlFRVETQ-DGpHlAlGQ+NGlETSVVLS-QEaupLQSIDPEGKsKFVFTGGRGGAGHAMVTVASDIsEARQRIL-hLEPKsTGtppsts....................c-opSGu.sosps.....hoETpTSTSTSSLRSDPKLWLuLGTlAsGLIGLAATGIsQALALTPEPDsPTTTD.... 0 0 0 1 +7319 PF07491 PPI_Ypi1 Protein phosphatase inhibitor Wood V, Studholme DJ anon Pfam-B_11090 (release 10.0) Family These proteins include Ypi1, (Swiss:P43587), a novel Saccharomyces cerevisiae type 1 protein phosphatase inhibitor [1] and ppp1r11/hcgv (Swiss:O60927), annotated as having protein phosphatase inhibitor activity [2-3]. 19.50 19.50 19.60 25.50 18.50 18.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.15 0.72 -4.34 38 331 2009-01-15 18:05:59 2003-11-11 14:35:32 6 8 258 0 231 305 1 66.50 40 41.30 CHANGED lL+LRssp............................ppc++VpWsEsVVDNEphsKKKSKlCCIaHts.+sasE...........SSo-......ssuss.- ...............................................pLp......................................psc++VpWspssVD.NEths+KpSK.....s.........C...CI.Y+Ks.+sas..E....................SS.o.-s.ss-p.pp................................ 0 78 125 191 +7320 PF07492 Trehalase_Ca-bi Neutral trehalase Ca2+ binding domain Finn RD, Bateman A, Wood V, Studholme DJ, Moxon SJ anon Prosite Domain Neutral trehalases mobilise trehalose accumulated by fungal cells as a protective and storage carbohydrate. This family represents a calcium-binding domain similar to EF hand. Residues 97 and 108 in Swiss:O42893 have been implicated in this interaction. It is thought that this domain may provide a general mechanism for regulating neutral trehalase activity in yeasts and filamentous fungi [1]. 25.00 25.00 25.90 25.00 24.30 16.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.36 0.72 -4.62 19 181 2010-01-08 16:37:03 2003-11-11 16:42:46 6 3 147 0 122 182 1 30.00 62 4.12 CHANGED LcpLLppEDTDpNhQITIEDpGPKVlsLGT ..LcpLLppEDTDpNhQITIEDsGPKVlslGT.... 0 37 70 105 +7322 PF07494 Reg_prop Two component regulator propeller Yeats C anon Yeats C Repeat A large group of two component regulator proteins appear to have the same N-terminal structure of 14 tandem repeats. These repeats show homology to Pfam:PF01011 and Pfam:PF00400 indicating that they are likely to form a beta-propeller. This family has been built with artificially high cut-offs in order to avoid overlaps with other beta-propeller families. The fourteen repeats are likely to form two propellers; it is not clear if these structures are likely to recruit other proteins or interact with DNA. 20.40 15.10 20.40 15.10 20.30 15.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -7.26 0.75 -7.61 0.75 -3.21 668 7311 2012-10-05 17:30:42 2003-11-17 13:40:01 6 214 468 96 1696 7326 915 23.90 33 6.00 CHANGED slsss.tlh..slhp...DppG.plWluops ..............Lsss.plp...slhp...DppG.plWlGTt..... 1 834 1511 1628 +7323 PF07495 Y_Y_Y Y_Y_Y domain Yeats C, Bateman A anon Yeats C Domain This domain is mostly found at the end of the beta propellers (Pfam:PF07494) in a family of two component regulators. However they are also found tandemly repeated in Swiss:Q891H4 without other signal conduction domains being present. It's named after the conserved tyrosines found in the alignment. The exact function is not known. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.15 0.72 -4.22 79 2976 2012-10-03 16:25:20 2003-11-17 14:52:01 8 220 527 18 642 2956 292 64.40 23 6.25 CHANGED shssspphpYpYhlpuh-.spWhphss.s...........phsassLssGpYplplpspsptsphs.s..tplslplt ..........................tspphpYpYpLcG..h..-..pp....Whp.h.sstp..............phsaosL..ssG.pYphpV+.up.sps.u.thstp....tplphhl................... 0 299 555 608 +7324 PF07496 zf-CW CW-type Zinc Finger Yeats C anon [1] Domain This domain appears to be a zinc finger. The alignment shows four conserved cysteine residues and a conserved tryptophan. It was first identified by [1], and is predicted to be a "highly specialised mononuclear four-cysteine zinc finger...that plays a role in DNA binding and/or promoting protein-protein interactions in complicated eukaryotic processes including ...chromatin methylation status and early embryonic development." Weak homology to Pfam:PF00628 further evidences these predictions (personal obs: C Yeats). Twelve different CW-domain-containing protein subfamilies are described, with different subfamilies being characteristic of vertebrates, higher plants and other animals in which these domain is found [1]. 23.20 23.20 23.50 23.60 22.90 23.00 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.38 0.72 -4.26 52 682 2009-01-15 18:05:59 2003-11-20 14:53:00 10 54 121 3 435 667 21 48.90 35 6.09 CHANGED thWVQC-..pChKWRpLP..tphs...pplsc..tWhCphNs.....ssth...ssCsssE-ht ........hWVQC-..pCh.KWRpLP....tphs......pphsc..pWhCshNs.....ssph......spCss.PE-................... 0 120 218 310 +7325 PF07497 Rho_RNA_bind Rho termination factor, RNA-binding domain Finn, RD anon Pfam-B_1610 (release 11.0) Domain The Rho termination factor disengages newly transcribed RNA from its DNA template at certain, specific transcripts. It it thought that two copies of Rho bind to RNA and that Rho functions as a hexamer of protomers [1]. 21.00 21.00 21.10 21.40 20.50 20.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.42 0.72 -4.34 16 3771 2012-10-03 20:18:03 2003-11-21 13:57:14 7 13 3677 47 859 2307 2356 77.10 55 16.60 CHANGED lau-GlLE.lLsDGFGFLRSs-ssYLsGsDDIYVSPSQIRRFsLRTGDolpGpIRsPKEuERYFALl+V-slNh-sPEp ..................hscGlLE..IL...........D.....G.aGFLRosp.sYLsGs.-DIYVSsSQI.RRFsLRTGDslsGplR.........s.......P..............K.-........G.............E..............R............Y...........FALL+Vsp..VNhcsPE............................... 0 304 582 738 +7326 PF07498 Rho_N Rho termination factor, N-terminal domain Finn, RD anon Pfam-B_1610 (release 11.0) Family The Rho termination factor disengages newly transcribed RNA from its DNA template at certain, specific transcripts. It it thought that two copies of Rho bind to RNA and that Rho functions as a hexamer of protomers [1]. This domain is found to the N-terminus of the RNA binding domain (Pfam:PF07497). 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.76 0.72 -4.19 139 3972 2012-10-03 03:04:30 2003-11-21 14:02:28 7 30 3568 44 1023 2585 1952 41.90 35 9.71 CHANGED -L.cphsls-LhplAc.clsl.c....shuphRKp-LIFsILcspucpss ........pL.cshslsELhplAc.ph.sl.c.....s.huc.....h+.Kp-LlaAIl+tpscps............ 0 349 705 889 +7327 PF07499 RuvA_C RuvA, C-terminal domain Finn RD, Bateman A anon Pfam-B_1373 (release 11.0) Domain Homologous recombination is a crucial process in all living organisms. In bacteria, this process the RuvA, RuvB, and RuvC proteins are involved. More specifically the proteins process the Holliday junction DNA. RuvA is comprised of three distinct domains. The domain represents the C-terminal domain and plays a significant role in the ATP-dependent branch migration of the hetero-duplex through direct contact with RuvB [1]. Within the Holliday junction, the C-terminal domain makes no interaction with DNA [1]. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.28 0.72 -3.79 100 4234 2012-10-01 23:03:33 2003-11-21 15:47:17 8 7 4202 23 886 2640 702 46.70 32 23.40 CHANGED shhs-ulpALhuLGYppp-Apcslppl.....ssshs....s-p.hI+tAL+hlt ...............pEAlpALluLGYptpEup+....slpplt........tssto.............s-p..hI+pAL+th.h............... 0 290 583 752 +7328 PF07500 TFIIS_M Transcription factor S-II (TFIIS), central domain Finn RD anon Pfam-B_1373 (release 11.0) Domain Transcription elongation by RNA polymerase II is regulated by the general elongation factor TFIIS. This factor stimulates RNA polymerase II to transcribe through regions of DNA that promote the formation of stalled ternary complexes. TFIIS is composed of three structural domains, termed I, II, and III. The two C-terminal domains (II and III), this domain and Pfam:PF01096 are required for transcription activity [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.45 0.71 -4.02 84 1033 2009-01-15 18:05:59 2003-11-21 17:01:31 9 29 337 6 638 962 110 113.30 28 17.99 CHANGED pscplRspsh.chLhsuLttt..tpp.................ssppstplAtplEp.............................tlaphh....ts...ss...pcY+p+hRSlhhNLKc.+NssL+p+llsGplospp...........LspMospEhAS-ch+ptc.cphpccslppsph .............................................................................................................pthRppsh.chLhtsLhtt.......pt...t...........................................tppstp.lAtpIEp.......................................................................................................t.lap.h................ps....ss..................tcY+s+hRSlhhNL.KD.+Ns..sL+cpVL...sGp..loPpp...........ls.pMos.-EhAScEl..pphc.cphpccslpph.............................. 0 179 295 458 +7329 PF07501 G5 G5 domain Bateman A anon Bateman A Domain This domain is found in a wide range of extracellular proteins. It is found tandemly repeated in up to 8 copies. It is found in the N-terminus of peptidases belonging to the M26 family which cleave human IgA. The domain is also found in proteins involved in metabolism of bacterial cell walls suggesting this domain may have an adhesive function. 20.90 20.90 20.90 20.90 20.10 20.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.40 0.72 -4.03 140 4313 2009-01-15 18:05:59 2003-11-21 17:13:06 7 239 1219 7 576 3447 28 77.50 25 10.80 CHANGED clpppppsppcp..lsapsppppDssL.pGp.pcVhppGpsGh+phshplshp..sGpcls+p.llspplhp.tPhscllthGTcp ......................................h....p.psp.pcp..ls.F.ps.pppcsssL....t......G......p...pp......V....hp.......p.......GpsGp..+p..h..h..hplhh.....sG..p...c..........s....p.....cp.h.......l..sp.p.l....sp...ts.hsclVclGTc.................................... 0 260 400 487 +7330 PF07502 MANEC MANSC; MANEC domain Mitter R, Fitzgerald S, Guo J, Studholme DJ anon Guo J Domain This region of similarity, comprising 8 conserved cysteines, is found in the N-terminal region of several membrane-associated and extracellular proteins [1]. Although formerly called MANSC (for motif at N terminus with seven cysteines) it has now been renamed by MANEC (motif at N terminus with eight cysteines) by Richard Mitter and Stephen Fitzgerald after the discovery of an eighth conserved cysteine. It is postulated that this domain may play a role in the formation of protein complexes involving various protease activators and inhibitors [1]. 23.90 23.90 24.20 25.00 23.80 23.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.67 0.72 -3.99 14 245 2009-01-15 18:05:59 2003-11-25 16:33:57 9 11 78 0 128 211 0 89.60 31 20.16 CHANGED ssspsChsp.....Fps.htshlIcoptSlstGApFLcus.pVtohc-ClpACCossp......CslAlh-tptsss.ss......CaLFsCh..sp.sCpFssppGasoY ..................s..t.C.tt..........h.shllcsptSlp...t.GApaLps...plpstccClpuCCppps......CNlAlh-tptsts.tss..............CaLhpCh....p...p.sCpFssppGahsY..................... 0 24 32 69 +7331 PF07503 zf-HYPF HypF finger Yeats C anon Yeats C Domain The HypF family of proteins are involved in the maturation and regulation of hydrogenase ([1]). In the N-terminus they appear to have two Zinc finger domains, as modelled by this family. 23.00 23.00 23.10 23.10 21.80 22.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.76 0.72 -4.53 185 2924 2009-01-15 18:05:59 2003-12-02 16:07:56 7 13 1412 12 778 2421 117 34.90 45 9.25 CHANGED hCspCtc.Eap-Pt..s.RRa+tphhuCspCGP+hslhp ...hCssChp.EhpDPt..c.RRa+h..lsCscCGP+hpll....... 0 258 529 666 +7332 PF07504 FTP Fungalysin/Thermolysin Propeptide Motif Bateman A, Yeats C, Rawlings N anon Yeats C Motif This motif is found in both the bacterial M4 peptidase propeptide and the fungal M36 propeptide. Its exact function is not clear, but it is likely to either inhibit the peptidase, so as to prevent its premature activation, or has a chaperone activity. Both of these roles have been ascribed to the M4 and M36 propeptides ([1], [2]). 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.11 0.72 -4.49 142 1811 2009-01-15 18:05:59 2003-12-03 12:00:01 8 74 862 2 304 1431 39 50.00 25 8.45 CHANGED shph..hcspps.spGt.s+sRapQpapGls.VhGspls.lphspsGplpshsGshh ................tapl.hptpps.tpGh.s+l+hppshpGls.Vhuspls.l+.....hs.csGplhtlsGsh.t..................... 1 102 178 249 +7333 PF07505 Gp37_Gp68 Phage protein Gp37/Gp68 Studholme D anon Manual Family Homologues of phage proteins Gp37 and Gp68 are found in several bacteria. 24.10 24.10 24.40 25.40 23.10 24.00 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.95 0.70 -5.22 10 525 2009-01-15 18:05:59 2003-12-03 20:35:13 6 5 427 0 163 511 57 235.60 33 93.07 CHANGED scTsIEWTDATWNPlsGCTKl...SPGCsNCYApphAcRhcuhuscpYp.suscppt.hu.hpt.lplctcpLphPtsWc+PRRlFVNSMSDLFHsc..VP--aIpcVFsVMcpsPpHsaQlLTKRssRhtclhsphth..............................sNVWLGsSlEsps.u.hRI-cLRcVPAulRF....lShEPLLGsls.......................................sssLssIHWlIVGGESGssARP.McP-WVRcIR-pCpsuGlPFFFKQWG......................................sh..t.hctuKKAsGRpL-GR...saDEhP ............................................t.................sWNPhtGCp.......+l.......SsGC.....ppCYA...ph.sp.+hth........t....t....t.at.ts..........................................h.........p....h........p..pp...................h...p......h..php.................psphlas.suh.S.......DlF..ttc......hss.pahpcla...p.hhc.....pssp.hpa.lLTKRspRhtphhsp......h................................................................sNlWhGsoVE.spp...s.hR........lsh...L.p.p..lP....A....t..h.+....a........lShEPLLssl.s....................................................................t.h.sh.ss.I.-WVlsGGE....S....Gsp...u...Rs.....h..c.....-..Wlhsl+-QCtt.s......slsFaFKQhG....................................................................................................t...t................................................................................. 0 54 118 148 +7334 PF07506 RepB ParB; RepB plasmid partitioning protein Studholme D anon Blast Family This family includes proteins with sequence similarity to the RepB partitioning protein of the large Ti (tumour-inducing) plasmids of Agrobacterium tumefaciens[1-2]. 30.00 30.00 30.60 30.70 29.90 29.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.97 0.71 -4.42 15 449 2009-09-10 17:01:13 2003-12-03 20:37:47 6 4 222 0 133 437 33 167.50 21 53.66 CHANGED ARssLohI-ctthhs+LhppGhscspItsALuhstspltph....cslsptlp.t..thltth.h.lsshuhsppluRsR.lElucLh.........hsh..p.hpss.hhcph.sss+hstLhphl..+sp+tcphospshu..p....tsc.hs.ths...pulpshhcppG.ptpLtlcsspuh........phs+aLscphschhspF ..............................RpcLSaIEcshas......tcLpppGa......s..c..ctIt..sALul.ctstl.sc..h........pslsptl..............ltsIGsAhplG..Rs..RWh..-..lschl....................hsh...p.h.pss....h.....p..uh.........ss..p+hptlhphl.......p.tpt..p..p.ss.p.t.hu..t.......tpp..ht..s.p....tth.hhh.pptu..p.pltltttps..............th.hp.al.tp............................................................................... 0 18 64 92 +7335 PF07507 WavE WavE lipopolysaccharide synthesis Studholme D anon Manual Family These proteins are encoded by putative wav gene clusters, which are responsible for the synthesis of the core oligosaccharide (OS) region of Vibrio cholerae lipopolysaccharide [1]. 20.10 20.10 21.30 26.80 19.60 18.60 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.91 0.70 -5.54 9 166 2009-01-15 18:05:59 2003-12-03 20:40:37 6 2 116 0 15 97 30 294.30 36 92.33 CHANGED IoVVlQGPV.s..sR.ppptuITppClsSlRsHLPGupIIlSTW.spsluGL-hDpllhs-DPGuslh.a..Dstsp.phN.NRQlVSThsGL+pVpTPYAlKLRuDNhLTustFlslhcpasp..RsspaphFcp+llsossahRc.p+GhsVhFHhSDhFpFGhTpDLLtlW-.sLhp-.phspss.....htp....tshsshphssEQhh...Wlpsh++hs.phcLtphp.-huhpphph.-phhusNlllssPcplGLslsp+Fhtpsphspp......apph-..............WlpLYpphC..s...ht.tsthphhhshhh.....+.+hl+hhhp+lcph ......................IThVlQGPl...........ssITppslpplRphFPtupIIl.....STWcG.......p...s..h.p.sl-......h..D.pll...cD..PGus.hhh......Y......cspsh...lNhNRQlhSThtG...L+tV.K...T....YAsKLRsDNlLssc....p....h....lElaEp.ast......Rtps.a.phL.s........pRllsSshFhhs.c..h.G.hsV.FahSDhFpFGhspDLLplWs....s.chhs.-hc..F.p.p................tY.....scp.....s..sE.Qhl...Whssh...Ls.thclcscp.Dasuhttsh..pFhtNNLlhsss-plGLD...lsc...R.h..hc...psshsh-.............ashpc...................WhhL.p.p.l............hhhhh.........t.ptlt.hh+phh.h......................................................................................................................... 1 8 10 13 +7336 PF07508 Recombinase Recombinase Studholme D anon Domain Family This domain is usually found associated with Pfam:PF00239 in putative integrases/recombinases of mobile genetic elements of diverse bacteria and phages. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.08 0.72 -3.80 73 4989 2009-01-15 18:05:59 2003-12-03 20:46:19 8 48 1838 0 981 4422 747 105.20 23 21.67 CHANGED sccutll+p.lachh.......h..puhuhpplsphLsppsl....s.pstt..........................W...stssl.......pplLpNshY..h............................Ghhhhsppt.......................t.hhhhps...........ta.slloc-haptspphhppp ..........................................cpAtlV+tIFchh................h.....pG......h....u.....hppIuct.L.s.p..c..ul...........s...pstt..........................................W......stssl.........ppILpN...h.Y.h...............................................................Gph...h..ht+pp..........................t...........p..p..h.h.h..c.s..................tH..p..sII.s.p-happsQphht........................................................................................... 1 429 755 881 +7337 PF07509 DUF1523 Protein of unknown function (DUF1523) Studholme D anon Manual Family \N 22.30 22.30 22.70 24.50 21.90 22.20 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.24 0.71 -5.04 20 220 2009-01-15 18:05:59 2003-12-03 20:50:56 6 1 218 0 38 192 35 165.30 42 92.02 CHANGED M+h.lK...hsllhlhalllhuhlaYsLPpaDlsRIoGsEVKRh..sc.sslhhusPssG......ssRDVaFIpT..........hc.....sst+shVYRNEDTsWuaP.YFKFsSAsLQAcApsh..sps.sphVtIcaYGWRlshhohFPNAlSI+sVsss-s.tshPahshllaslLhhhhahshphhRpah ......................................hhh....h+...hhllhshalhlhhhlpYshPcY-lshlTGsEVKRh..scssslstssPssG.......ssRDVYF.IpT...........pc.....cst+lhVYRNEDTtWGaPaYFKFsSAslQAcAQuh......s..s..sphVpl+YYGWRIshhs.FPNslsl+slsps-s..shPlFuaIhaslLhhhhahshphlRth................ 0 5 19 30 +7338 PF07510 DUF1524 Protein of unknown function (DUF1524) Studholme D, Bateman A anon COGs (COG3472) & PSI2 target BIG_246 Domain This family of uncharacterised proteins contain a conserved HXXP motif. A similar motif is seen in protein families in the His-Me finger endonuclease superfamily which suggests this family of proteins may also act as endonucleases. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.14 0.71 -4.33 240 2469 2012-10-05 18:28:12 2003-12-03 20:53:01 6 22 1617 0 608 2165 312 139.90 16 30.04 CHANGED hspspphtpphtthshhstttt...thhhllt.php.p.h.......stt............................l-HlhPps.st.p...........................tht.t...ttppthhss.l...uNLshls.tphN....sphu...s......psa...........htKpp......ta.t.....................................pphhhsp..lt....................hppp...ash..phlppRpptLhch ......................................................................................................................h.t.t........h..hllh.....php...p.h..........pht.....ph.................................I-HlhPps.sh..pt................................................................tht.t..ttpp+pphhss..l...uN...Lhhls..tptN............uphu...s..................tsa.........................h.cpp..........ta.t............................................pshh.hsp...lt...................................ppp....a.s...t..hp...tcppthh..h................................................................................ 0 200 420 542 +7339 PF07511 DUF1525 Protein of unknown function (DUF1525) Studholme D anon Manual Family \N 21.60 21.60 21.60 21.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.52 0.71 -4.09 32 218 2012-10-03 14:45:55 2003-12-03 20:54:55 6 1 164 0 63 210 8 108.30 39 79.60 CHANGED tAs.....hhVhT-sp+Plp.......sssss..pVhhLDtsc+lptpLuspLPucPppAtt.spph..........lpuss.pphppplspAapGlscAWphGlpKlPAVVhD....pchVVYGpsDVspAlthhpta+pp .....................................................................t.AtshlhTDppH.Plp.......ssssspll.LDtspplcspLsstLP..u.sPppAtt.hspph..........lpSsshpphQpcL...s...pAYQulscAW.u.lGlpKlPAVVhD....ccaVVYGpsDVspAhthhtthpt.t.............. 0 8 30 51 +7342 PF07514 TraI_2 Putative helicase Studholme D anon Manual Family Some members of this family have been annotated as helicases. 23.90 23.90 24.00 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.78 0.70 -5.67 21 472 2012-10-01 20:28:14 2003-12-03 21:24:15 6 5 346 0 109 489 32 271.90 31 46.79 CHANGED Gahss.su.pLLuo.hcpcLlpplhppsuhscpp.......Fcphahtslc+YAchVQhLPASEsHHHutsGGhLcHuLElshaul+lppualLPssusPEcputppcuWpsAshhuALlHDlGK.lsDlclphp.....cGs.pWtsatusLsp.h+h+....Yh.......+sR.thphHsshuuhlhspllspssLsWLup.hPc.lhssLlhsluG..ph-c...........ussLuElVppADptSlspsL.u.ussstuhtssspslp+pLlsAlRtLltpchcls.spssucsW...lsp-.uLaLVtKssuDplpuhLhspGls.ulPss..sssLh-hLtspGllpsss.-...scul ................................................Gh..s.ss.pLLus..ppphlpplhp.p.....s.u.hs.pp........Fpphhh.sl..c..c..hAthlQ.l..PASEsHHHutsGGhlcHuLElshaAh+lppuh.l.hss....su....sPE-pstppttWpsAshhuAL...hHDlGK.lsDlplp.t......sGp...tWtP.h.h.ss....lsp....h+h+....Yh........ptc......HtthusllhppllstpslsWLup.hPp.lhtsLlhsl....uG.....phpt.............sslLuclVhpADptSlsppL.t.tp.sht.........t...........t...sh.c.lhtuh+.ll.tp..ph...s..p..ucsa..............h.pp....tlal..h.p.hsc.lhthl.t.p.....th..........hPtp..s..hhs.h.p.thh..s.............................................................. 0 14 49 84 +7343 PF07515 DUF1528 Protein of unknown function (DUF1528) Studholme D anon Manual Family \N 21.50 21.50 22.70 21.50 20.10 20.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.41 0.72 -4.00 40 328 2009-01-15 18:05:59 2003-12-03 21:28:01 6 4 247 2 80 310 7 98.90 36 19.65 CHANGED lhlN-scAhVHhVssssFLVoPulFpcYhpcp............uptp..ptpsaphlQ+pFE+LplH++p........sGhNlWpCcVhGsRKs.p....cLpGYLlpssph..hhsp.s.PssNPaLpl ..........lhlNcscAhlHhVssssaLVoPulFppYhppp............uttp..s.psaphlQ+pFEcL..plH++p........sGhNlapsplhGsc+s.p.............pLpGYLlpssph...hhsp.h..P.sNshLp............................... 0 9 37 59 +7344 PF07516 SecA_SW SecA Wing and Scaffold domain Finn RD, Bateman A anon Finn RD Family SecA protein binds to the plasma membrane where it interacts with proOmpA to support translocation of proOmpA through the membrane. SecA protein achieves this translocation, in association with SecY protein, in an ATP dependent manner. This family is composed of two C-terminal alpha helical subdomains: the wing and scaffold subdomains [1]. 26.70 26.70 28.30 26.90 26.20 26.50 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.57 0.70 -4.57 171 5380 2009-01-15 18:05:59 2003-12-04 14:10:21 8 25 4467 32 1115 3923 3227 206.70 32 24.32 CHANGED sIcp.....phlo+ulEpAQKKVEupNF-hRKpLL-YDDVhNcQRclIYppRpclLpu...........................c..c..lpchlh.phhccslpph.....lppah....stp..........ppas...hpsLpptlpphh.s.h..phslsph..............pths............................................tcp..lpcplhcthpptY............p...p.Kppphs..................................................t..chhcplE+hlhLpslDppWp-HLpsMDpLRpuIsLRuYuQ+DPlhEYKcEuaphFppMlpslcpcslphlh...+lpl ................................................................................................................................sIEpphlo+ulEsAQ++VEupNF-hRKplLcYDDVhNc...QRclIYppRpc.l.Lc..s..........................................................................................................................c..c..lpp.p.lt.shhccslpph.lsta.h.s.sp...........cpac..........lps.L.h.p.hlppth..s..h.....chsl.p..ph....................pth.s.......................................................................................................................................t-p....lp-tlhpps.ctY.....pp....+pp.t.ls........................................................................................................................................t..-hh.c.phE..+h..l..hLpslDstWp-HLssMDpLRpuI.tLRuYu....Q+sPl.EY+pEuaphFppMlpslcp-ssphlh+sp.h............................................................................................................................................... 0 382 745 956 +7345 PF07517 SecA_DEAD SecA DEAD-like domain Finn RD anon Manual Domain SecA protein binds to the plasma membrane where it interacts with proOmpA to support translocation of proOmpA through the membrane. SecA protein achieves this translocation, in association with SecY protein, in an ATP dependent manner [1,2]. This domain represents the N-terminal ATP-dependent helicase domain, which is related to the Pfam:PF00270 [3]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null --hand HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.72 0.70 -5.10 90 5907 2012-10-05 12:31:09 2003-12-04 16:33:08 9 48 4809 34 1211 5542 4671 373.00 47 44.91 CHANGED l.pphh....tsts....c+pl++h..............pp.hlppIsshcsphpsLoDc-Lcpcopch+p+l.......ppG......................................c.......oL-p..............................lLsE...AFAllREAupR..............................................slGhctaDVQllGGlsL.........+pGpIAEMpTGEGKTLsAoLPuYLNALsG+GVHlVTVNDYLAcRDu-.hMu.lacaLGLoV..Gllhsshs..................................................s.ppR+p...............................................................uYtsD..ITYuTNsEhGF....DYLRD.Nhshstppt.....................Vp.R...shpaAIVDElDSlLIDEARTPLIISGssppssphahthsphstphppppptphppppptthhpppsttptcphhthtthhtt.........................................................................................................................................................tphphhchlppuL+AptLac+cpcYlVp-scVhIVDEaTGRlMtGRRao-GLHQAIEAKEsVpIpsEspTlAoITaQNaFRhYpK.LuGMTGT..ApTEspEFt ..............................................................................................................................................................h....h..s.ts.c+pl+c.h..............pc.hlp.pls....sh.E.sphpt.L.........oD..-p....L+s...KTtE...F+pRl..........tp.G............................................................................-........oL.D.s...................................lL.PE...AFA..l.V.R...EA..u.+R...........................................................................Vl..G..h..c..aD.VQl.h.G...G.h..lL...........................Hp.G.p..IAEM......+......TGEGKTLsA.T.hPs..Y...L.N..A.L..........s..............G.....+.......G..V.H.....VV.T.V.NDYLA.pRDu..-........Mu.la.p.F..LG..L..o.....V..G..l.......l....s..s.h.s...................................................s..tp.++..p...............................................................AY......s......s......D....ITYuTNN....................EhGF.....D.Y.LRD..NM.sh.p...h...--h.....................................V.Q...R...s.....hp.....a..AIVDE.......VD.SI..L.....IDE.ART.PL..IISGssccsophYhphsphh.pLhppcc.p.cppppthhhsppshpptpphhthtthhtt..........................................................................................................................................................tsh.lh+alstAL+Achlhp+Dh-YlVp-uEVlIVD-aTGRhMtGRRaS-GLHQAlEAKEGVpIpsEsp.Th.A.o...ITa.QNaFRh...Yc..........K....LuGMTGT..AcTEttEF...................................................................................................................................................................................................................................................... 0 452 817 1053 +7347 PF07519 Tannase Tannase and feruloyl esterase Studholme D anon Manual Family This family includes fungal tannase [1] and feruloyl esterase [2-3]. It also includes several bacterial homologues of unknown function. 20.00 20.00 20.10 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 474 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.63 0.70 -5.78 8 801 2012-10-03 11:45:05 2003-12-04 17:38:35 6 15 346 0 504 870 96 395.40 25 79.35 CHANGED CcVpG.....hpsscspt.pItFplpLPs.sWNGRalQsGsGGasGslsssst.s......hs.sLspG.aATsuTDuGHcAsss......sssFu.hss-shhsFAYpAl+cssssuKtLlpsaYs+sscp...sYFhGsSsGGREGLhAAQRaP--YDGIlAuuPuhsa.lptp...hatuh.u+sshss.....sshlsssclpllspAslspCDslDGssDGlVsDPctC+hshs......psLhCpush.........ssc....................sCLTssQlpslsplas.Ghp.....usGpshYsGas..s.....hGs.ssshspWhhss...............ts.hsstssphhp.hl..............hpssNaDsso.hch...tthccp....htplsssIsATsPDLosFRs+GGKLIhapGhsDPsloPtuolpYa-uVhA+hsts............pscsFhRhahlPGhsHsGuGsusssh...............DhLsshssWVEsGpAPspllAsctssss.........................hsRoR.LCpYPphshacGpGsscsAsSapCs .................................................................hts....tt..t....tlthph..hL.Pt...s............Ws...s...R...hh.h...Gs.GGh.s...u...sh...s.s...................................hs.s.lt..G..aAshuoDsGa......s...us............................h....u......hs..p......t......h.h......sa.u.apulpths.huK.tlhpthY..u..p.s......s.p.h...................oYa.GCSsGGR..puhh...tsp+aPppaDGllAusPu...hp...h..sp.............hhth..h............h....ht.................s.h.ss.s.p.h.p.h.ltp.ts.lttCD.s.l..DG..h..tD.......Gllps..st...hCp.hp.p............shhC.tss..............ts..............................s.s.lostph.....tshp....p.las....s.hs.........ssGph....ha...sh..............s..................s...........h.t............................................s..th.ht.....hh..........................................s.s.h..s..t.hs...............t.....t...tt.h......h.hs..s...sp...sD.L.osap..p..p....G...GKllhaHG................h.......u.....D.hlsstsohtaappltpt.h..t.t.............................thp..pFhRhahlPGhsHCs....s...u..s..s..ss...h............................................s.hltslhpWVE..p.G..huP.pt.l..usths....sst...........................................thpR.lC.aPhhsh.at......u..G..s...pts...tsa................................................................ 0 92 240 394 +7348 PF07520 SrfB Virulence factor SrfB Studholme D anon Manual Family This family includes homologues of SsrAB is a two-component regulatory system encoded within the Salmonella pathogenicity island SPI-2. Among the products of genes activated by SsrAB within epithelial and macrophage cells is Swiss:Q9KIJ9 [1]. Homologues are found in several other proteobacteria. 19.10 19.10 19.60 20.60 18.10 19.00 hmmbuild -o /dev/null HMM SEED 1002 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.76 0.70 -13.73 0.70 -7.08 12 283 2009-01-15 18:05:59 2003-12-04 17:59:07 6 2 255 0 53 223 26 878.30 58 97.93 CHANGED M.LspLscapp.ploLl.cSGIQFLDFuLp.phssc......sucFlcpsusGslhRL..ctsss+a..hlP...............spsGh.sclscs-h.olshcpSLclhpslWLPlPFhRhpPscs......FspGPsNWARlpllpL.spPDpcGNTHRlsLAFDTplhscpts.htYLAPscsDlpsGssFuLAa+sc-lu.FL-..............psWVssWL+ElFpp.s.tp.ctRstc-........lpctLtthEapAHYLNlLslLG...........splplPcl+llssshps....sIsVDLlLDVGNS+TCGlLIE-Hsp-ss.GLppshcLpLRDLSpPchlYs-.FESRlEFApApFG+pcaSlcSGRsDAFhWPoIsRVGsEAsRLAhpRpGTEGuTGlSSP+RYLWD-pshtpGWRF.....stshspoppEPLATAsPltphlN-pGcsLapL...........st--RlPVFpPpYSRSSLMTFMLuElLsQALhQINSsApRh+hspsssPRpLRslILTlPsAMPKsERcIFRpRhppAluLVWKuhGWa.tDsDh...p.tsppp....spsPlPtlphcWDEAoCGQhVYLYsEstspaGG+scpFFssh.t.....RP-p....t..s..t+sLRlASIDIGGGTTDLsIopYtLDcG.....hGsNVpIhPc.hFREGFKVAGDDILLDlIpchVLPAlptuLpptGhss..sculhucLFGs-..ut.stptlLRQQhsLQlFhPlGhulLpsYEsaDPhsspupl.phoFu-LLt..............................phP.oppVlsYlspslc+...susssFslhslPLtlcLspl+pthl...ssphslspslcuLCEllphYsCDVLLLTGRPSRLPGlQALhRphpPlPssRllPlcsY+sssWYPFpcpG..RIDsPKoTAAVGAMLChLu.s.RLssFhF+sschtsYSTlRYlGhLDsN.NtlpDpslaY+DI...........DLDsPshpLs.ctpFphRGphpLGFRQLss-RWsAoPLYpLsls..ssclA+plsG.......cusLpVcLp.......ptus........pcsshEpFtIs-A.h.psGspls...........pplpLpLsThssptusss.YWlDoGSlhhc ........................MLssLsDYKQ..plTLItNSGlQFLDFuLoPp.tss...c.......u+F.............VR..K...oA..N...GP.....LLR..Ls..ac...psG+Y..sLs.....................................stsGutPEl..VKPE..o.sLc.SLclLsslWLPLPFL.RFsPPRT......FlpGPDNWARlQlhpL...spPD.psGNTHRlTLAFDoplhtph.....s...hLAPsENDlhsGspFALAa+s-Elu-FLD..............pTWlDGWLREsFhphA..uphEpRstpt...........IppuLRpFE.YQAHaLNLLsLLG...........pQLslPElKhsopTLpp...PAlsVDLILDVGNoHTCGlLlEDHuDtss...GL+Q....T.hELQlRsLScPpaL....s.LFpSRVEFupA+FGKQpFSVESGR-D.AFlWPSIsRVGcEAptLAhQ.....Rl..G..T....EGSoGI.....SSPRRYLWDEpshhtsWRF.....sphts.p...sp.....cEPLATAhPLhpLhND-GpPLapL............Ph-ERLPVFSPpYSRSoLMTaMLsElLAQALhQINSsApRL+h...sassuPRQLRolILTLPSAMPK.EREIFRpRMhEAlALV..WKuMG...........WHPtD-..DFs.........ospp+tK........ShVPVP.-lQMEW..DEAoCGQlVaLYNEs.spauGRo-u.FFsuh.A.....RPD+p.p..su.ss..G+sLRlASIDIGGGTTDhAIspYpLDD....G............sGsNVKIoP+LLFREG.FKVAGDDlLLDlIQphVL......Pu.......LQsuLp+AGVss..usuLhupLFGss..GRhDsQulLRQQssL...........QlFMPlG+AlLpAaEp.D..DshAt.....l.cAoFG-LLh..............................ppP.TppVhsYIppsIp+.tLPuu.ussFDIhsVPLplphSpLppthL...uspholTpsL+AlCEslShYpCDlLLlTGRPopLPGlQALhRHLQPlPsNRIl.hDtYpsp-....WYPFsppG..RIsNPKSTAAVGAMLC.LALD.L.RLspF.FKAuDh..t..sYSTlRYLGhLDso..ssLp-ENlaY+-I...............DLDpsshs...L..s..sch+F....lRGs.loLGFRQLsNsRWPAoPLYsLSIs...ssELA+plAG...................Du.V..Ls..V+L+................lpsus..................+csuPE..pFhLu-AhL..pDGos.Vsh..........ctLpLKLNTLAsptpu.uoHYWIDSGSVah.c........................................................................... 0 4 17 37 +7349 PF07521 RMMBL RNA-metabolising metallo-beta-lactamase Finn RD anon Pfam-B_760 (release 11.0) Motif The metallo-beta-lactamase fold contains five sequence motifs. The first four motifs are found in Pfam:PF00753 and are common to all metallo-beta-lactamases. The fifth motif appears to be specific to function.\ This entry represents the fifth motif from metallo-beta-lactamases involved in RNA metabolism [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.99 0.72 -4.14 138 6209 2012-10-01 19:09:44 2003-12-05 14:54:50 7 36 3653 52 1894 4689 1569 40.50 33 7.13 CHANGED chhtlchplpplc.hSuHAcpp-Lhphlpth.ps...cplhlVHGE.p ...........t.........ppl+..sSGHuspp-lphhlsh...l..+P.........+hhhPVHGEh.................... 0 666 1222 1605 +7350 PF07522 DRMBL DNA repair metallo-beta-lactamase Finn RD anon Mannual Domain The metallo-beta-lactamase fold contains five sequence motifs. The first four motifs are found in Pfam:PF00753 and are common to all metallo-beta-lactamases. The fifth motif appears to be specific to function.\ This entry represents the fifth motif from metallo-beta-lactamases involved in DNA repair [1]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.69 0.72 -4.02 43 592 2012-10-01 19:09:44 2003-12-05 17:30:10 9 30 300 0 406 592 9 106.50 25 15.45 CHANGED spltshlTs-.tpsplHll.shs......plp....hpsLtsahpthttp......asplluh+PTG..Wshps.htt..s..t.............................................ttphslaslPYSEHSSapELccFVphl+Pp..clIPTVssss ....................................................................thhTss......t..tsplHhh..h..........php.........hppl..phhpt....t...h................hpph..luhpPou......Wshptt.h..t.p...................................................................................ttt.tha..tlPYS-HSSapELppF..lphl+Pp..pllP.sVs...s......................................... 0 126 211 313 +7351 PF07523 Big_3 Bacterial Ig-like domain (group 3) Bateman A anon Bateman A Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.09 0.72 -3.90 40 2232 2012-10-03 16:25:20 2004-01-06 12:05:57 7 198 845 2 141 1725 26 68.80 28 14.33 CHANGED lps+DSTIYlGDsWsuc.DNFsSAssKsGps........lsas-lpVsG.......sVDospsGsYplTYoasG......sopolsVTV ...........................h..spcsshhhG...-s..a.ssp..ssh.lsApsc.s.Gss.............ls.hsc..lp.VsG...................plDos..K....s...G.p.Y.pl.oYp.aps.............hptshpVpV........................ 0 69 91 112 +7352 PF07524 Bromo_TP Bromodomain associated Studholme DJ anon [1] Domain This domain is predicted to bind DNA [1] and is often found associated with Pfam:PF00439 and in transcription factors. It has a histone-like fold. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.42 0.72 -4.25 12 679 2012-10-10 12:36:46 2004-01-06 13:32:37 8 18 266 0 482 767 3 74.20 24 13.90 CHANGED pchspslhp.hsluplhppsGa-ssppusLEoLothhtpYlpclucphppau.ptssRspss.htDlh.hsLtchul.sVsp ......................................tcplLphuVup.lh.pp...sGF..-ssp.uu.l-sLT-l.hpc..Y....lpplucssppas..-...t......ts..R..s....t.s......s....htDlh...sh.tph.uh.....t..................................... 0 133 242 379 +7353 PF07525 SOCS_box Clip; SOCS_Clip; SOCS box Studholme DJ anon [1] Domain The SOCS box acts as a bridge between specific substrate- binding domains and more generic proteins that comprise a large family of E3 ubiquitin protein ligases. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.11 0.72 -4.09 141 2186 2009-01-15 18:05:59 2004-01-06 13:59:50 11 135 116 6 1208 1938 0 39.00 35 9.64 CHANGED stoLpcLCRhsIRpp.lsppt........lppLPLPptLcsYLt.ap ..........shoLpcLCRhsIRpt.lstpt...............lspLP.LPspL+pYLp.a.......... 0 198 297 634 +7354 PF07526 POX Associated with HOX Studholme DJ anon [1] Domain The function of this domain is unknown [1]. It is often found in plant proteins associated with Pfam:PF00046. 21.60 21.60 21.80 23.30 21.50 21.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.81 0.71 -3.95 39 330 2009-01-15 18:05:59 2004-01-06 14:12:16 6 5 36 0 166 329 0 129.90 38 22.83 CHANGED phltsS+YL+sAQcLL-Ehss.Vup.....................ht.tptptsttptpssstssssss..sstsssssspt.ssplo...........ss-ptEhQhKKsKLluML-E..................V-+RY+pYpcQMQhVluSFEsVAGhGuApsYTuLAL+shSRHFRCL+....DAIsu ..............................t.hlhsS+aLpsAQcLL-Ehss.Vst.............................................t...tttt..t..st....t..s...sts.s..s.s.tss....ssps.ttss.s..p.s.ssplu.......................................sspttEhQh.KKsKLluML-E.....................................................................V-.+RY+pYhcQMQhVsuSF-s.......V...A......Gh..GuA.tsYTuLAL+shS+HFRCL+DuIs.s........... 0 16 100 135 +7355 PF07527 Hairy_orange Hairy Orange Studholme DJ anon [1] Domain The Orange domain is found in the Drosophila proteins Hesr-1, Hairy, and Enhancer of Split [1,2]. The Orange domain is proposed to mediate specific protein-protein interaction between Hairy and Scute[2]. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.99 0.72 -4.21 79 952 2009-01-15 18:05:59 2004-01-06 14:24:01 8 6 111 2 510 910 1 42.90 31 15.11 CHANGED spa+uGapcChpEVs+aL.sshpuh........cs...thpp+LlsHLtpshsth .......tapsGapcChpEVs+aL.ushcuh...................ss.......slps+LlsHLpphhs..h................. 0 101 143 313 +7356 PF07528 DZF DZF domain Studholme DJ anon [1] Domain The function of this domain is unknown [1]. It is often found associated with Pfam:PF00098 or Pfam:PF00035. This domain has been predicted to belong to the nucleotidyltransferase superfamily [2]. 20.40 20.40 21.10 21.00 19.60 20.20 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.52 0.70 -4.96 4 544 2012-10-02 22:47:23 2004-01-06 15:10:16 9 21 114 0 254 492 0 210.30 43 35.06 CHANGED pVGSFtKGThhpGsssuDlVllLKoLPTp-sl-tLu+KVttsLctuhcs.........Eshph........hEhGhcIosshs+VRhLIshlPpshpKLEP.hHLDpKhh.upLAulRHs+WFpppApc.......ohplLIRlLKDLspRassFpPLssWhl-LLAahuIhNsPuRQshslshAFRRVFplLAuGlFlPsSuGIhDPsE.uphRltsshTL.QpDssChoAQTLlRlhAaG..GY++ILGhcsssos .....................................................................................RVG.lAKGLll.+.G-.sl-LVl.lspp.hPTt....sLLpplup...pLs.pLtt.hs.c..........................t.....-sh.........hh................t................hph..s......hp..lo.....Ss...h.h..+....c...........h....t.............s......s..p..s....h.t....h..D...P...........p.h.............LDpphCh.sA.L.Aul...RH..A+WFp.s...+Asslp.....Ss.hllIRlLRDLspRh.Ps.Wts...LpsW.slEL.Lsc.......+uls.....os...ppP...hu.su...cA.h.......RRlhEsluoGllL..........s....s....usGlhDPCE........cps..........hcshshhohpQp...............-slT.oAQ...p.hLRlhAFt..ph+KlL.Gh-s.s...................................... 0 60 80 160 +7357 PF07529 HSA HSA Studholme DJ anon [1] Domain This domain is predicted to bind DNA [1] and is often found associated with helicases. 20.80 20.80 21.00 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.26 0.72 -4.19 50 836 2009-01-15 18:05:59 2004-01-06 15:46:46 8 46 264 0 522 792 6 70.80 25 4.41 CHANGED p+htctp...+p+s+actlLpphthhupDFppt++h+.....hstsp+luchltpaapptpppcp+ctc.......tcpchptltp ...........................+htp.p...+p+p+as.h..Lpphh.hupDFppt++.p.......tupspKls+tltpaHtppccc.pc+ppc.........pcp+h+tlh..................................... 0 155 262 412 +7358 PF07530 PRE_C2HC Associated with zinc fingers Studholme DJ anon [1] Domain This function of this domain is unknown [1] and is often found associated with Pfam:PF00096. 22.10 22.10 22.80 22.50 21.40 21.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -8.95 0.72 -4.07 10 61 2009-01-15 18:05:59 2004-01-06 16:12:08 6 8 16 0 30 76 1 64.50 22 11.32 CHANGED sppIcppLpcpGapsppl+shpps................ss+sPhNMFhVpLssss-..pcc....ILplKpLGph+.VsVERtp++c-s ................................................IhppLpp.Gassh..plhshppt.................pcps....h.shahlcltss.s..pcc.....lhpl+plst.h.VplEt.phpt............................. 0 13 16 28 +7359 PF07531 TAFH NHR1 homology to TAF Studholme DJ anon [1] Family This corresponds to the region NHR1 that is conserved between the product of the nervy gene in Drosophila and the human mtg8b protein [1], which is hypothesised to be a transcription factor. 21.30 21.30 21.50 21.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.15 0.72 -4.35 14 474 2009-01-15 18:05:59 2004-01-06 16:26:22 9 11 109 4 227 399 0 92.60 49 15.33 CHANGED hcslpKC+pFLssLlcLuss...pus-huppVRsLVpsLlsuplpsEEFsp+L.ptLNusPQPaLVPFLKpoLPsLRphh.sspthlpQsth.hh..ssss .....scplpKhKpF.LoTL.phusc.....SPElucpVRsLVhsLlsuslph.EE...Fps+LpcthN.s..p..............Pall.PFLK............t.s.........LPhLpptLh.csAphhpQsstQh.s...p.......................... 1 38 55 123 +7360 PF07532 Big_4 Bacterial Ig-like domain (group 4) Bateman A anon Bateman A Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -9.12 0.72 -4.45 98 1839 2012-10-03 16:25:20 2004-01-06 16:50:15 6 189 521 0 210 1443 6 61.30 29 9.61 CHANGED hpshshs.st.........h..Gss.........................pLPppVpshh.s-Gos...p.......p...hsVp.W...........s........stphspsGs.aplpGp..lp..Gh .................................................h.t.hplh..st........................l..ups....P...........................pLPppVssha...scGpp...p........p...hsVs.W...........s........thsspphspsGs.apVpGp.lp.G............................. 0 96 157 179 +7361 PF07533 BRK TCH; BRK domain Studholme DJ anon [1] Domain The function of this domain is unknown [1]. It is often found associated with helicases and transcription factors. 20.90 20.90 20.90 21.10 20.60 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.94 0.72 -4.49 20 582 2009-01-15 18:05:59 2004-01-06 17:04:04 11 35 89 5 303 552 2 46.10 40 2.68 CHANGED slss-pRVsVlspcsG+pLsGscAPpt+cLppWLptsPsatls.....Pct .....p.su-p.V.VlppcsG+h.LsGs-APptppLp.pWLctsPsYtVs.....Pc............. 0 54 76 170 +7362 PF07534 TLD TLD Studholme DJ, Eberhardt R anon [1] Domain This domain is predicted to be an enzyme [1] and is often found associated with Pfam:PF01476. It's structure consists of a beta-sandwich surrounded by two helices and two one-turn helices [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -11.13 0.71 -4.19 112 1684 2009-01-15 18:05:59 2004-01-06 17:16:33 11 56 317 1 1140 1627 32 133.50 22 28.56 CHANGED ll.assp.pcGtuhps..hhpphp.......pp.ushllll.............ps.sc...h.....lFGuasspta......p.t.......ahGs...spo.FlFpl..........psphtsa+h.....ssps.........thahhssp......p.........luh.Gss..................phuLhl-s.shpp.uhs..p.hs.............soas..ss..hs..........tpppFplpslElWuht .................................................................................lasst.pcGhS.....hps...hhppht.............hp.sssllll........................................cs.pps....t.............lF...Guass.psh..................................p.spp.......ahGs......scsF.l.Fph................................ps.p.h...p.haph............ssts.......................................t.hh.hhss..............pt.................................lshGus...............................................thuLhlcs......sh.p...uhs.p...s...............toat...s...Ls........................................ttpF.l.tlElath........................................................................................ 0 519 685 941 +7363 PF07535 zf-DBF DBF zinc finger Studholme DJ anon [1] Domain This domain is predicted to bind metal ions [1] and is often found associated with Pfam:PF00533 and Pfam:PF02178. It was first identified in the Drosophila chiffon gene product [2], and is associated with initiation of DNA replication. 24.80 24.80 24.80 25.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.50 0.72 -4.28 29 271 2009-09-11 05:51:30 2004-01-06 17:34:58 7 7 203 0 185 277 0 47.60 39 6.36 CHANGED pcpcsGYCENC+hKY-sh-p.................HltSc+HRpFApscpNatslDsLItpLpp .......cp+sGYCEsCppKa-..-.h-p.................Hl..hSccHRpFAtpspNattlDplltpL............................ 0 50 87 141 +7364 PF07536 HWE_HK HWE histidine kinase Studholme DJ anon [1] Domain Two-component systems, consisting of a histidine kinase and a cognate response regulator protein, represent the best-known apparatus for transducing external cues into a physiological response in bacteria. The HWE domain is found in a subset of two-component system kinases, belonging to the same superfamily as Pfam:PF00512 [1]. The family was defined by [1] the presence of a highly conserved H residue in the kinase domain and a WxE motif in a C-terminal ATPase domain that is related to Pfam:PF02518. These proteins are found in a variety of alpha- and gamma-proteobacteria, with significant enrichment in the rhizobia. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.64 0.72 -3.55 44 1003 2012-10-11 19:05:54 2004-01-07 14:13:07 9 153 270 0 408 2339 212 82.20 32 14.48 CHANGED ELsHRVKNhLAslQSlspQTh+pssshschhpphpuRLtALupuHsl....LocspWtuusLp-LlptpLpPats......sp+lplsGPs ..ELsHRlKNhLAlVpuls...p..p....T..h....+..p.....s...s.....s....h....c...p....h..t..p..t...h....p...u...R..lpAL.....upu.a.s.l........L.s.c.....s...s.....W.....p.....u.....s.....s.....l....p....p....Llps....plt.sass...t......ssRl.plpGP.............................................. 0 81 201 261 +7365 PF07537 CamS CamS sex pheromone cAM373 precursor Studholme D, Williams W anon Pfam-B_18913 (release 11.0) Family This family includes CamS (Swiss:Q8L313), from which Staphylococcus aureus sex pheromone staph-cAM373 is processed. 25.00 25.00 25.40 25.20 23.60 23.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.98 0.70 -5.40 7 766 2009-01-15 18:05:59 2004-01-21 15:50:20 6 2 576 4 74 428 0 278.80 36 84.01 CHANGED PaKtupuRGlssssl..ssRhDl-EhEsGLhclup-pFss-sahaQ-GQYlDccplppalt+cp............cs......s.GLNPsh..tssshcphp.ppPhYLopILEpDYhscpDssshpLsGlsIGLAMNSV..appcpstsph...ppsIs-pchhppGcchAppllpclRpp-....shcslPIshAIY+Qts+sSlsPGNFlutssVptsssslssWpsIsEK.hlaPS.spsscpttpDssphppFppplpsaFs.N..asulVGcuhYcccphpcLplDIPlpa.GKuElluhTQalsshl.chaPK.h.clplpIpoupp.EAlIhRpss-ccPhV .................................PactupuRGlh...s.sh...ssphsht.-aEsGLhpluKc.FsTcpYlaQ-GQaLsppTl.pt......a.Ls.K..................c..p...........s.G...LNPs.......sp.....t..s..c.....phs......sPh.YLspIlEQDahsptsspsh..pltGhsIGLAMNSV.YYp.ccp.ssp......pppls....s...pchhtp...G.+phAscllpplRppc....ph.cs.lPIphAlY+Q.usp.s.SlssGpals.uss.pps..ps.plspWcsIsEKshlhP....S....sstsp.....cp....s........sspFppFpsplpsaFs...s.a....otssupspahcpphpplslslshpaaGpsEhhuhTQalsp.s.cahsp.s.phplpIps.ss...p...pAlI.+ptssccs........................... 0 19 41 56 +7366 PF07538 ChW Clostridial hydrophobic W Studholme DJ anon [1] Repeat A novel extracellular macromolecular system has been proposed based on the proteins containing ChW repeats [1]. ChW stands for Clostridial hydrophobic with conserved W (tryptophan). This repeat was originally described in Clostridium acetobutylicum but is also found in other Gram-positive bacteria including Enterococcus faecalis, Streptococcus agalactiae and Streptomyces coelicolor. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.17 0.72 -7.56 0.72 -4.43 194 1324 2009-01-15 18:05:59 2004-01-21 17:12:02 6 86 159 0 292 1109 45 35.70 41 26.52 CHANGED HVps..hGWp.s.h.spsGphuGTsGpuhRlEAlclpLssp ..............HVQshGW.......p..s...a.sp...sGp....h......u.GTsGpuhRlEAlcIpLss......... 0 235 272 292 +7367 PF07539 DRIM Down-regulated in metastasis Wood V, Studholme DJ anon Pfam-B_10642 (release 11.0) Family These eukaryotic proteins include DRIM (Down-Regulated In Metastasis) (Swiss:O75691), which is differentially expressed in metastatic and non-metastatic human breast carcinoma cells [1]. It is believed to be involved in processing of non-coding RNA [2]. 24.80 24.80 25.80 26.30 24.70 24.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.60 0.71 -4.77 29 267 2009-01-15 18:05:59 2004-01-22 13:23:18 7 4 244 0 207 278 1 140.10 31 5.54 CHANGED plhuKFpN.Ksla+usclaph.hhpLLsspssclQKhALcslhsa+ss.sls.Y+-NLpNLLDDspF+DElssFh..hssppps................IpspcRstlhPlllRILaG+hpspssusspp.....uR+tuVlphL.sshppp-lt.Flcluhs.l ....................lFupFtNP+slappsclaph.h.h.pLLsps-tplQ+hALcslhs..aKp..s..tl.hs.Y.c.-sLppLl-.....-..p.......pF+-ELspF..hspppt.................................lcs.pHRspLhPlllRlLYG+hhs+suupstt................tsR+tslLphL.ush..pspElthFlplhhts............................. 0 68 111 170 +7368 PF07540 NOC3p Nucleolar complex-associated protein Studholme D anon Pfam-B_8562 (release 11.0) Family Nucleolar complex-associated protein (Noc3p, Swiss:Q07896) is conserved in eukaryotes and has essential roles in replication and rRNA processing in Saccharomyces cerevisiae [1]. 20.70 20.70 20.70 20.70 20.10 20.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -10.17 0.72 -3.72 30 305 2009-01-15 18:05:59 2004-01-22 13:51:56 6 4 269 0 214 288 3 96.10 38 13.28 CHANGED lhctKEcIAclustlhEDPEENltsLccLtchspsp........phslpKLulloLlsVFKDlIPGYRIRPLoEpEtppKVSKEVp+LRsFEpuLlpsYKtY ..................................................l.ptKpcIAplustlhp..-PE..p..s..lt..........t..........L....+c.Ltphsppp....................................shslpKL...ullolhsVFKDIlPuYRIR.........s...........L.........o.E....p.E................t................p....p.....K.........lsKElp+LRpFEpuLlptY+tY...... 0 74 120 178 +7369 PF07541 EIF_2_alpha Eukaryotic translation initiation factor 2 alpha subunit Wood V, Studholme DJ anon Pfam-B_5125 (release 11.0) Family These proteins share a region of similarity that falls towards the C terminus from Pfam:PF00575. 21.70 21.70 24.20 23.60 21.30 20.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.30 0.71 -4.36 78 521 2009-01-15 18:05:59 2004-01-22 14:09:57 7 4 460 17 353 497 94 115.20 34 38.02 CHANGED -.hacpluasL.ccaG....csY-.AFc.ssspss.plLschp..ls..............................cchtcsLhphhpcplpsptV+lpu.l-lpshs.-Gl-tIK.cAL..psup..phs....scphp........lclphluuPpY..tlpspu.Dh .........................LappluWs..Lp..c..KYG..............cua-.AF.K.slsc........s........s..l..l.csls..ls..................................c.c.h.t-sLhp.Ip++loPpslKlRADlEls.Caua.-GI-ulK.cAL.+s.up...stu.............scph.................lKlpLlusPhYllospshDh.......................................... 0 119 206 294 +7370 PF07542 ATP12 ATP12 chaperone protein Wood V, Studholme DJ anon Pfam-B_6737 (release 11.0) Family Mitochondrial F1-ATPase is an oligomeric enzyme composed of five distinct subunit polypeptides. The alpha and beta subunits make up the bulk of protein mass of F1. In Saccharomyces cerevisiae both subunits are synthesised as precursors with amino-terminal targeting signals that are removed upon translocation of the proteins to the matrix compartment [3]. These proteins include examples from eukaryotes and bacteria and may have chaperone activity, being involved in F1 ATPase complex assembly. 20.50 20.50 20.70 22.80 20.20 19.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.67 0.71 -4.16 100 585 2009-01-15 18:05:59 2004-01-22 14:27:18 6 9 548 7 318 554 186 126.20 35 43.45 CHANGED KRFaKcsslt.p.........sssG.asltLDGRsl+TP.u+psLslPocs..LApulAsEW...sAQ..pctIcPtoMPlTpLussAlDtlss...p...............pstlhptlhpYhsoDhLhYRAss...........Ppp.........LspcQsctW-PlLcWspp...ph ......................KRFacpsslt..p...........................scsG..atltLDuRs.l+TP.scphLhlPocs..LAps..lAsEW.....suQ..pc.tIcstsMPlTpLsssAlDtssp...s.........................................................................pptlhcslhcahsTDhlhaRusp.........................spp...............L...hptQpcpW-PllcWhppt.h....................................................... 1 106 183 258 +7371 PF07543 PGA2 DUF1531; Protein trafficking PGA2 Wood V, Studholme D anon Pfam-B_46790 (release 11.0) Family A Saccharomyces cerevisiae member of this family (PGA2) is an ER protein which has been implicated in protein trafficking [2]. 24.00 24.00 24.40 24.20 22.60 23.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.19 0.71 -4.38 6 125 2009-01-15 18:05:59 2004-01-22 14:39:42 7 2 105 0 89 97 0 112.90 28 84.02 CHANGED htpNltsoFup...hohc+WIRlIllVGuYlLIRPY....FhKLusK....spt+cpEKEpAEscstp.......u+lSsNuLRGutstup....h.sE-TD-E.......Esstp.ussSusscWGKsARKRQ++hh+cL.cptEcppcc...t--DcDIpEhL-c ...............sh.tphss...hshpcalRllhIlGuYhllRsa....hh.chh.t+.....tthcphcc-p..tctctt............t..tspth+s..t..t................cp-.......cst....ttsou...tWG+psR+..+.+p........c........h.p...t.-phptp....................ppDpDIt-hLc........................................... 0 13 38 70 +7372 PF07544 Med9 CSE2; RNA polymerase II transcription mediator complex subunit 9 Wood V, Studholme DJ anon Pfam-B_45625 (release 11.0) Family This family of Med9 proteins is conserved in yeasts. It forms part of the middle region of Mediator [4]. Med9 has two functional domains. The species-specific amino-terminal half (aa 1-63) plays a regulatory role in transcriptional regulation, whereas this well-conserved carboxy-terminal half (aa 64-149) has a more fundamental function involved in direct binding to the amino-terminal portions of Med4 and Med7 and the assembly of Med9 into the Middle module. Also, some unidentified factor(s) in med9 extracts may impact the binding of TFIID to the promoter [5]. 26.10 26.10 26.20 26.40 26.00 26.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.61 0.72 -4.25 43 187 2012-10-04 14:01:12 2004-01-22 16:04:03 8 1 175 0 129 174 0 82.80 25 51.36 CHANGED ssphhPtlhshhpphtp.......sshssc-ltstsusl+h+lpcs+shlpplss....ls+oscEQppcIcpLcpplppppplLpcapppst ........................................t..phlPhlhthlpp..............hshcspDlppphssl+p+lpcsRphlpphPs....lcpoh--QppplcpLcpplppKpplLpca+phh............... 0 23 52 96 +7373 PF07545 Vg_Tdu Vestigial/Tondu family Bateman A anon [1] Family The mammalian TEF and the Drosophila scalloped genes belong to a conserved family of transcriptional factors that possesses a TEA/ATTS DNA-binding domain. Transcriptional activation by these proteins likely requires interactions with specific coactivators. In Drosophila, Scalloped (Sd) interacts with Vestigial (Vg) to form a complex, which binds DNA through the Sd TEA/ATTS domain. The Sd-Vg heterodimer is a key regulator of wing development, which directly controls several target genes and is able to induce wing outgrowth when ectopically expressed. This short conserved region is needed for interaction with Sd [1]. 19.40 19.40 19.70 28.80 19.30 18.40 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.35 0.72 -4.51 3 189 2009-01-15 18:05:59 2004-02-04 14:20:38 9 1 74 2 108 146 0 32.50 69 11.85 CHANGED pYlNAuCVlFTYFpGDIuShVDEHFSRALsass ...pYlsSRCVLFTYFQGDIuSVVDEHFSRALup.................... 0 15 23 56 +7374 PF07546 EMI EMI domain Bateman A, Doliana R anon [2] Domain The Pfam alignment is truncated at the C-terminus and does not include the final cysteine defined in Callebaut et al [2]. This is to stop the family overlapping with other domains. 21.80 21.80 21.80 21.80 21.50 19.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.38 0.72 -3.52 43 529 2009-01-15 18:05:59 2004-02-04 17:48:52 8 46 72 0 259 472 0 70.40 32 10.79 CHANGED spNhCuhhhppslsh.....spstspshhphhhpsC....hst.tCs..............................paRsha+s....sY+hshchh..ophtacCCPGapt.s .........................+shCuahs.s+slshh...spstspshhps..hp..C....hst.tCs..............................pYRshhRstY+hshKsl.....ophcW+CCPGapG..p.................................... 0 28 49 116 +7375 PF07547 RSD-2 RSD-2 N-terminal domain Bateman A anon Bateman A Domain This domain is found in three copies in the N-terminus of the C. elegans RSD-2 protein. RSD-2 (RNAi spreading defective) is involved in systemic RNAi [1]. Mutations in the rsd-2 gene do not effect somatic genes but only germline expressed genes [1]. 25.00 25.00 25.80 25.20 19.60 21.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.77 0.72 -3.84 7 25 2009-01-15 18:05:59 2004-02-05 14:53:25 8 7 5 0 22 24 0 86.50 29 13.64 CHANGED FSssspHhTYsRpluaADhYGhV-sup..ts.ptsssaps.Isls.lsYppstpsLFKssph.hch.ssc-ss-hhpphhpFE-ph ................FSSsspHhTYs+pluauDtYGhV-ls....c...tshcpssVYpspIslss.h..............phcsstpslFKlssh..hc.ptp...s-sspc.ahpphhpaE-p..................... 0 5 6 22 +7376 PF07548 ChlamPMP_M Chlamydia polymorphic membrane protein middle domain Yeats C anon Yeats C Family This family contains several Chlamydia polymorphic membrane proteins. Chlamydia pneumoniae is an obligate intracellular bacterium and a common human pathogen causing infection of the upper and lower respiratory tract. This domain is found between the beta-helical repeats (Pfam:PF02415) and the C-terminal Pfam:PF03797. This domain is excised subsequent to secretion [2]. 25.00 25.00 35.80 34.20 23.00 23.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.93 0.71 -4.49 55 594 2009-01-15 18:05:59 2004-02-19 17:21:38 6 15 39 0 64 433 0 177.10 26 16.15 CHANGED GsLsLccsApLpstu..FoQpsGo.lhhssGosLt........................................................sssssslslssLslsLsSlh..................................................................................sspssshtssssssslo.....loGslsLl-ss.sshY-shsLspsh.phsllplssss..........sstspshssshstssss+YGYQGsWohsWpp.................ssspppslhhsWss ........GsLlLc-sApLpshs..hoQ.ss.Gu.lhhssGosLp..................................................................sssssslslssLslsLsSlh..................................................................................s.s...sssss.....h.....ssssssslo.....l.s.G.slsLlDss.sshY-sh.tLsss...plsllplssst..........pstspstphshs.ts.ss+YGYQGsWohs.Wpp..................stpspoLhhsWp.................................................................. 0 9 9 50 +7377 PF07549 Sec_GG SecD/SecF GG Motif Yeats C anon Yeats C Motif This family consists of various prokaryotic SecD and SecF protein export membrane proteins.\ This SecD and SecF proteins are part of the multimeric protein export complex comprising SecA, D, E, F, G, Y, and YajC [1]. SecD and SecF are required to maintain a proton motive force [2]. This alignment encompasses a -GG- motif typically found in N-terminal half of the SecD/SecF proteins . 20.30 12.00 20.30 12.10 20.20 11.90 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.36 0.73 -7.54 0.73 -4.42 145 7147 2009-01-15 18:05:59 2004-02-20 10:11:56 9 16 3512 9 1648 5001 3208 30.00 30 6.74 CHANGED hhhhhspslshGlDhpGGspltlpsppsssh ............hhhhpshsLGLDhpGGspl.lpsc.st..h.......... 0 506 1052 1384 +7378 PF07550 DUF1533 Protein of unknown function (DUF1533) Yeats C anon Yeats C Family This family consists of several hypothetical bacterial proteins and is around 60 residues in length. It's function is not known. 20.80 20.80 20.80 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.10 0.72 -3.98 25 199 2009-09-11 11:31:05 2004-02-23 11:44:10 6 30 76 0 36 185 3 65.90 24 12.90 CHANGED spsahpsIsclpl..NGsph.........ptscatlttsst.lplssssFscsGpppIsIKApGYpDsslph ...............................ppahppIscVpV..Nssha....................th.pspspashshss........lpl..s...sssF...p.....s.....G...c....ppIpIpu.cGa.p-hphph...... 0 21 32 34 +7379 PF07551 DUF1534 Protein of unknown function (DUF1534) Yeats C anon Yeats C Family This family is found in a group of small bacterial proteins. Its function is not known. 25.00 25.00 26.60 25.20 24.60 23.40 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.44 0.72 -4.43 3 44 2009-01-15 18:05:59 2004-02-23 11:54:40 6 6 10 0 6 46 0 39.00 56 65.19 CHANGED LSFRTLQRGNAVuDAPRHRSAPRRAFKIGRGASRsAsshsA.spohhR ......LSFhTLQRGNAltDA.RH+SsPRRhhKhGRtAS.+suh...........t............... 0 0 0 0 +7380 PF07552 Coat_X Spore Coat Protein X and V domain Yeats C anon Yeats C Domain This family is found in the Bacilliales coat protein X as a tandem repeat and also in coat protein V. The proteins are found in the insoluble fraction [1]. 20.00 20.00 62.40 22.20 19.70 17.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -8.88 0.72 -4.19 22 264 2009-01-15 18:05:59 2004-02-23 14:24:19 6 2 76 0 29 192 0 60.10 42 71.12 CHANGED pscplsp-hhQhsthcQls...cpplhIcsSpsVsVoToDTpsAlslQshlQshlsl.llplsI .......p.ssplsp-hhQpophcQls...cpplhIhsSpsVsVTTTDTchAlsIQshLQshlsl.llplsI....... 0 7 19 23 +7381 PF07553 Lipoprotein_Ltp DUF1535; Host cell surface-exposed lipoprotein Yeats C anon Yeats C Domain This is a family of lipoproteins that is involved in superinfection exclusion. Proteins in this family have been shown to act at the stage of DNA release from the phage head into the cell [1]. 20.10 20.10 20.70 20.20 19.70 19.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.24 0.72 -4.09 18 773 2009-01-15 18:05:59 2004-02-23 15:58:35 6 18 375 0 116 573 23 47.10 43 32.24 CHANGED shc.psALcKAKsYucohpMS+pulY-QLsS-......aGEKFTpEpAQYAlDsL .......c.psALppAKsYt.p.h.hp.MSKpulY-QLoS-.......hu-+Fos-pAQYAlDpL...... 0 31 69 104 +7382 PF07554 FIVAR Uncharacterised Sugar-binding Domain Yeats C anon Yeats C Domain This domain is found in a wide variety of contexts, but mostly occurring in cell wall associated proteins. A lack of conserved catalytic residues suggests that it is a binding domain. From context, possible substrates are hyaluronate or fibronectin (personal obs: C Yeats). This is further evidenced by [1]. Possibly the exact substrate is N-acetyl glucosamine. Finding it in the same protein as Pfam:PF05089 further supports this proposal. It is found in the C-terminal part of Swiss:O82833, which is removed during maturation ([2]). Some of the proteins it is found in (e.g. Swiss:Q9RL69) are involved in methicillin resistance ([3]). The name FIVAR derives from Found In Various Architectures. 21.30 10.80 21.30 10.80 21.20 10.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.73 0.72 -3.65 225 12786 2009-09-13 16:22:53 2004-02-25 16:52:28 8 504 697 8 341 12670 23 49.90 26 19.54 CHANGED l....spssshptttph.....hshssstpsshspAlstApsllsp.ss.....sstpplspA ........................................l.tspsssptspsY...................hsAsssppsAYssAlssAcsllsp.ss.sss.hstspVspA.................. 1 168 208 297 +7383 PF07555 NAGidase Hyaluronidase_2; beta-N-acetylglucosaminidase Moxon SJ, Bateman A anon Pfam-B_4394 (release 12.0) Family This family has previously been described as a hyaluronidase [1,2]. However, more recently it has been shown that this family has beta-N-acetylglucosaminidase activity [3]. 25.00 25.00 26.50 26.00 21.20 20.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.04 0.70 -5.51 30 535 2012-10-03 05:44:19 2004-02-26 11:24:06 8 42 380 54 170 546 259 293.30 33 30.05 CHANGED RGsIEGFYGpPWSp--Rh-hhcFhGchKhN...TYIYAPKDDPYHRp.pWR-.YPs-El.sclp-LlcsAscs+lcFsaAluPGhsls.....ao...spcDhpsLhsKhcplhchGVRsFulLhDDIs.t.................sssupsQscLlNclpccahps+tsshs.....LlhsPTEYssstsss.......YlpsLscpLsssIplhWTGssVl.sspIohpshcph.......ssshtRsshlW.NaPVNDa..sps+LhhGPhts.pcssl..s..plsGhVoNPMppucASKlAlau....lADYsWNtpsa..DspcuWppuhchlss..............sss-slthFucp. ..............................................RGhlEGFY........Gp...PWopcpRhchhcahuchchN....oYl..YAPKDDshHRt...pWR-hYs......t-...........c..h.....pp...........lppLlpsupcsclcFlaAluPGhs.hs.........as........tpp-hpsLhpKhcQlh....p.h.GlRpFulL....hDD....Is.......................pshupsQsplhNcl.pphs....p.........c..................hlhsPT..cYssshsss.................................YLpslucpLs.s.slplhWT.GspVh.ssplo.ps.lpph...........sphh.p..RsshlW.NaPsNDa...........spp+....Lh.LGPhp......s....p.........sssl...s........plpG...llsNPMp.ph-s.....Sclulas....hAsa.Ws.tsh...ssp.pshpt.shp.h........t.t.th............................................................................................. 0 53 84 131 +7384 PF07556 DUF1538 Protein of unknown function (DUF1538) Yeats C anon Yeats C Family This family contains several conserved glycines and phenylalanines. 20.70 20.70 22.10 22.30 19.90 20.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.31 0.70 -5.25 72 585 2009-01-15 18:05:59 2004-02-26 17:09:14 6 2 271 0 199 528 359 210.10 35 83.88 CHANGED IllllhhFQhlll.+tshsshhpllhGhlhVllGLslFLhGlchGlhPlGcthuptlsp.....................hss....hhhllhFuFhlGFusTlAEPALhslutpsppVou..GtIs.........thslhhsVAlGVululuLGhhRIlh.GhPltahllsGYlllllhohFuP.....pthlulAaDSGGVTTuslTVPllhALGlGlAss.lp......GRssllDGFGLIAhASlsPllsVhlhGhlh ..........Ihhllhhhphhll....phshtp...hhpllhGhlhlhlG.LslFLhGlchuhhPlGcthGptlsp............................................................................hsp......hhhll.hhuFhlGFssTlAEPulhsluppspplos..GtIs...........ttslhhslulGVululsluhhRIlh.GhslhahllsGYlllllloh..asP......p.hlulAaDSGGVTTGslTVPhlhALulGlAus.lp...............u+.ss.hhDGFGllAhsSlsPlluVhlhGll.h..... 1 97 169 191 +7385 PF07557 Shugoshin_C Shugoshin C terminus Wood V, Studholme DJ, Watanabe Y anon [1] Family Shugoshin-like proteins contain this conserved sequence at the C terminus, which is rich in basic amino-acids. Shugoshin (Sgo1) protects Rec8 at centromeres during anaphase I (during meiosis) so that sister chromatids remain tethered [1]. Sgo2 is a paralogue of Sgo1 and is involved in correctly orienting sister-centromeres [1]. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -6.83 0.72 -4.46 25 261 2009-01-15 18:05:59 2004-02-26 17:34:01 6 9 200 0 173 252 0 25.90 48 4.60 CHANGED s.uRspR.Rsshsl.sYpEPoLpsKhRRs .............sRspR..Rspts.V.sYpEPsLpsKhRRs.. 0 42 82 135 +7386 PF07558 Shugoshin_N Shugoshin N-terminal coiled-coil region Coggill P anon Manual Domain The Shugoshin protein is found to have this conserved N-terminal coiled-coil region and a highly conserved C-terminal basic region, family Shugoshin_C Pfam:PF07557. Shugoshin is a crucial target of Bub1 kinase function at kinetochores, necessary for both meiotic and mitotic localisation of shugoshin to the kinetochore [1]. Human shugoshin is diffusible and mediates kinetochore-driven formation of kinetochore-microtubules during bipolar spindle assembly [2]. Further, the primary role of shugoshin is to ensure bipolar attachment of kinetochores, and its role in protecting cohesion has co-developed to facilitate this process [3]. 30.00 30.00 30.00 31.90 29.90 29.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.98 0.72 -4.25 24 155 2009-01-15 18:05:59 2004-02-26 18:24:25 6 3 123 1 92 154 0 45.80 30 8.09 CHANGED l+ppahpQNpplu+tNShlph+lpplcsclocLlsENhsLRpph.t ....lKc+ah+pNpElu+pNSh.sh+l..pplcscsopLhsEphslRppsl..... 0 15 37 67 +7387 PF07559 FlaE Flagellar basal body protein FlaE Yeats C anon Yeats C Family This family consists of several bacterial FlaE flagellar proteins. These proteins are part of the flageller basal body rod complex. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.13 0.71 -3.48 180 2146 2009-09-11 11:36:52 2004-02-27 10:17:02 9 15 1789 5 472 1518 458 131.10 26 27.96 CHANGED hss...ssssoas..sooh.slYDSLGss.........HslshaFs.K...................................sss.....ssW..plhssss.............................................................................................................................ss.s.ssslsFs.s.......sG...p....l.........................................................................sssssshsssssssuuss.p...............lslsh.........s.............................ssTQau....us..t..ssshs...Q.DGa ........................................ss.p.o.as....h.sso.l.slYDS.....hGs..s.........HslslYFsK....................................................................sus........Np..W.....pshs.p.ss.........................................................................................................................................................................................................................................................................................................................s.s.ss.s..shs.ssslpFs..s.........sG......sL...............................................................................................................................................................................s...ss..s.......s...s.....s...hs....s..s.....s.h....s..G.As.sts............hslsh.....................s..........................................................uh.T.Qhs......us...s.....lsshs.....p.sGY.................................................................................................................................................................................................................................................................................... 0 138 279 374 +7388 PF07560 DUF1539 Domain of Unknown Function (DUF1539) Yeats C anon Yeats C Family \N 21.10 21.10 21.80 24.40 20.40 19.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.50 0.71 -4.26 13 46 2009-01-15 18:05:59 2004-02-27 12:06:41 6 2 13 0 9 26 0 130.90 32 29.47 CHANGED APpsssFLcSLhpsosppWpshpc........LcspIp......................cLsss..lcssWtpILchlsst......spsphsuc.spslhhuhha+LhphLp.....sPsIsp-+KpplLpaIuSYus.sCsPTWlEshhpElptlaNpp-susshl ..............sPtss.FLpSLhpsssppWshhap........LcspIp......................plpss..hcssWtsIlphlsst......ppsp.psc.sushhhuhha+lhthLp.........ssslop-+KhphLs.IuSYss.tCsPTWlEshhpElptIaNpp-pshs.l.... 1 0 0 7 +7389 PF07561 DUF1540 Domain of Unknown Function (DUF1540) Yeats C anon Yeats C Family This family has four conserved cysteines, which is suggestive of a metal binding function. 22.10 1.00 22.10 5.70 21.90 -999999.99 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.82 0.72 -3.92 185 1099 2009-11-12 09:41:43 2004-02-27 12:20:09 6 2 553 0 243 664 14 41.40 32 75.43 CHANGED lpCpVssCta.Npspp.CsAspIpV.....u....................spu....ppspp.Ts..CtTF ...ltCpVssCsa..N.p..sst.Cs.Aps.IpVt....u.....................tpu.tpstpTs..CtTF................................................. 0 125 200 227 +7390 PF07562 NCD3G ANF_assoc; Nine Cysteines Domain of family 3 GPCR Liu XH, He Q, Studholme DJ anon [1] Family This conserved sequence contains several highly-conserved Cys residues that are predicted to form disulphide bridges. It is predicted to lie outside the cell membrane, tethered to the Pfam:PF00003 in several receptor proteins. 25.40 25.40 25.50 25.70 25.20 25.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.97 0.72 -4.25 119 2732 2009-09-14 23:23:15 2004-02-27 13:11:15 9 24 148 9 1676 2202 0 52.60 41 6.74 CHANGED PhSsCSpsC.sGptK...th.pGpshCCacChsCsssphssts.Dth.pChtCs.p...Wu ..............PpSlCS.psC...t..PG.hRK................th.cG..ps.....CCacChsC.s-sc...hs.....sp......s......Dt......pCh...p..Csts.pas.......................... 0 120 276 1069 +7391 PF07563 DUF1541 Protein of unknown function (DUF1541) Yeats C anon Yeats C Family This family consists of several hypothetical bacterial and occurs as a tandem repeat. 25.00 25.00 52.70 35.20 21.80 17.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.80 0.72 -4.29 25 328 2009-01-15 18:05:59 2004-03-01 10:17:26 6 3 151 8 58 283 0 52.60 52 56.99 CHANGED psGspVllsAsHMcGMcGAcAsIcuAhcTTVYhVsYsPTsGGcpVcNHKWVsc ....sGspVhlpAsHMsGMKGApAsIcuAhc.TTlYsVsYpPTsGGcpVpNHKWVsp....... 0 16 43 51 +7392 PF07564 DUF1542 Domain of Unknown Function (DUF1542) Yeats C anon Yeats C Domain This domain is found in several cell surface proteins. Some are involved in antibiotic resistance (e.g Swiss:Q9RL69 and Swiss:Q9LCJ9) [1] and/or cellular adhesion (e.g. Swiss:Q931R6) [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.37 0.72 -3.96 112 9196 2009-01-15 18:05:59 2004-03-01 17:55:50 6 178 465 0 319 6747 7 70.10 27 29.11 CHANGED pppstIssssssTsEEKptAhsplspthppAhssIspAposspVsps......pspulssIp....sl...pss..spt....KssApp ..................phstIss..sssuTsEEKpsAhsplspth.spAhpsIs..s..A..s...T...........ssp...........Vsps...........cspulssIp......sl.....pss...sph......KtsAp..................................... 0 58 119 228 +7393 PF07565 Band_3_cyto Band 3 cytoplasmic domain Bateman A anon Pfam-B_1004 (release 3.0) Domain This family contains the cytoplasmic domain of the Band 3 anion exchange proteins that exchange Cl-/HCO3-. Band 3 constitutes the most abundant polypeptide in the red blood cell membrane, comprising 25% of the total membrane protein. The cytoplasmic domain of band 3 functions primarily as an anchoring site for other membrane-associated proteins. Included among the protein ligands of cdb3 are ankyrin, protein 4.2, protein 4.1, glyceraldehyde-3-phosphate dehydrogenase (GAPDH), phosphofructokinase, aldolase, hemoglobin, hemichromes, and the protein tyrosine kinase (p72syk). [1] 23.20 23.20 23.20 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.51 0.70 -4.87 32 1074 2012-10-02 23:31:29 2004-03-02 15:25:32 8 13 122 4 398 964 12 235.00 42 26.14 CHANGED -l-csscRWu+PHVusLoa+SLhELR+sltpGsVLLDLctsoLsslActll-phlhpsplcspsRcplhcsLLh+Hp.......H.s-.........t.t..hshhhshushsp..t.................................t.sps.hs.spshtsp..tpttph....................................................................................phpphp.phhcKIPpsuEAosVLV.........GpV-FL-pPslAFVRLppuVh....LpulhEVPlPsRFlFlLLGP.sssshsYHElGRuhATLMoDclF+psAYtAcsRcDLLsuIc-FLDsulVLPPu-hssp ..............................................................................................................................................................cVEcsu-RWuKPaVAoLSh+SL....hELRp....sl.p...GslhLDhctso..........L.tl.................s.c...ll-p..l...ssplpsp...R.tpVhpsLLh+Hp.......Htsc......................hshhhS..hushsp...u.p.p.................................................................t..............t....s.......h..............tt...t..................................................................................................................................................t.phpphp.phhc.KI.P.tsAEAos.VLV................................GpV-FL-p..PhhA.F.VRLppA.Vh....L.suls.........E............VP..l.....Ps...RFLFlLLGP..tups.pY.H.E.IGRuhATLMoDc.........l.FH-sAYpAccR...........pDLl.uuI-EFLDpshVLPPGEh-s.s.................................................................. 0 68 97 223 +7394 PF07566 DUF1543 Domain of Unknown Function (DUF1543) Yeats C anon Yeats C Domain This domain is found as 1-2 copies in a small family of proteins of unknown function. 21.70 21.70 21.70 22.10 21.30 21.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.66 0.72 -4.45 44 345 2009-01-15 18:05:59 2004-03-03 13:14:42 7 2 198 16 80 277 231 53.00 32 53.00 CHANGED splEhHDltaVVupslcsshspl+ppW....hGstpuLHlDuatplcpl....DGaclp......L ...t.hElHDhthsVusshp-Ah.ph+psW....hsst.pplHhDshpsVcss......tuhpl..................... 0 21 36 60 +7396 PF07568 HisKA_2 Histidine kinase Studholme DJ anon BLAST Domain This is the dimerisation and phosphoacceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536. It is usually found adjacent to a C-terminal ATPase domain (Pfam:PF02518). This domain is found in a wide range of Bacteria and also several Archaea. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.61 0.72 -4.12 91 1811 2012-10-11 19:05:54 2004-03-03 17:01:18 7 326 1027 0 823 2166 242 75.40 34 13.93 CHANGED EIHHRlKNNLQlISSLLsLQucphpsccs......hcsh+ESQsRVhShAllHEcLYcup...shcslsFusYlpcLspsLhpoY ......ElHHRVKNNLQsluSLLpLQ.u..cp.s.....p.s...s.c..s.......+psLp.-.utsRlpulAhlH-.t.Lh...ps.t......c.....p.........p...lshs.p.hlpplhppl....h................................... 0 296 586 667 +7397 PF07569 Hira TUP1-like enhancer of split Studholme DJ, Wood V anon Pfam-B_7106 (release 12.0) Family The Hira proteins are found in a range of eukaryotes and are implicated in the assembly of repressive chromatin. These proteins also contain Pfam:PF00400. 20.30 20.30 20.30 21.60 20.20 19.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.40 0.70 -5.13 34 350 2009-01-15 18:05:59 2004-03-04 09:39:21 6 28 250 0 246 360 0 188.80 25 21.31 CHANGED sG+RlhPsllLsusssh..LEspupa.LhslTusGhLalWNlpptpshhss..SltslLs.s....t.....spusslotsplo.ppGhPllTLSNGcuYsasssltsW.hlo-sW..WuhuSpYWssh..............ssstst......................................ssssuhlshLEp+Tssphhhp...Gphh.hp+h........h+shl.ccGhEshE....psloluHLEN+ltsuhhLtupcEa+taLhhYs++Lupp..........Ghcs+lcElhppL ........................................................sG+RlhsslhLsu....hsh..lc.s...p...spa..lhslTssGhhhlW......s..lpp......t....ps......hhts.............S......lts.l.Ls...........................sssssl..spst.l.s..pp.G....h.P.l.l.s..L........o.......s..G.c.u......Yhas.shtsW.hlu-t....hh.supahssh.......stt..................................................ptu.lshlptpsppth....................................tshh.hps.pthp....p.holuaLEspltsuhhL.tutpE...a+haLhhYs+hL.stp..........G..c.+....lc-lhptL................................................................ 0 78 140 211 +7399 PF07571 DUF1546 Protein of unknown function (DUF1546) Studholme DJ, Wood V anon Pfam-B_3691 (release 12.0) Family Associated with Pfam:PF02969 in Transcription initiation factor TFIID subunit 6 (TAF6). 20.30 20.30 20.50 20.70 20.20 18.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.00 0.72 -3.87 29 394 2009-01-15 18:05:59 2004-03-04 11:00:50 8 7 260 0 270 386 3 93.80 35 17.62 CHANGED lLTCllu+pL...sspss......................hcppauLR-hAAsLLuhIs++ausshssLpP..Rls+ThlKshlDs........s+.shuoaYGAlhGLpsl.Gs-sl+hlllPpLpsa ...........................................llTCllu+pL..sspss..................................................hcs.HasLRDhAApLlup.Is..+p..auss.....hssLps...R....ls+ohhKshhDs.......................p+..shsopYGAlhGLttL..G..csl+hlllPpLp................... 0 93 145 215 +7400 PF07572 BCNT Bucentaur or craniofacial development Studholme D anon Pfam-B_10149 (release 12.0) Family Bucentaur or craniofacial development protein 1 (BCNT) in ruminents has a different domain architecture to that in mouse and human. For this reason it has been used as a model for molecular evolution [1-3]. Both bovine and human BCNTs are phosphorylated by casein kinase II in vitro [4]. 22.20 22.20 25.30 24.20 21.50 21.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.01 0.72 -4.04 37 295 2009-01-15 18:05:59 2004-03-04 11:32:09 7 6 263 0 214 299 2 78.80 36 26.19 CHANGED K+......shlsplhutht......KcpKLsTLEKS+hDWsuas-cc.GIp....-ELphHs+.K.......cGYLs+p-FLsRs-s+p.Ep.+phRhpph ..................tp......shhsplluphs......KptK....loTLEKS+lDWssah-cc...GIp..........-E.Lph..+s+uK................-GYL-+psFLpRs-t+p.Etc+phRhpt.h............. 0 71 117 177 +7401 PF07573 AreA_N Nitrogen regulatory protein AreA N terminus Studholme DJ, Wood V anon Pfam-B_11486 (release 11.0) Family The AreA nitrogen regulatory protein proteins (which are GATA type transcription factors) share a highly conserved N terminus and Pfam:PF00320 at the C terminus. 21.50 21.50 21.60 22.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.23 0.72 -3.42 7 19 2009-01-15 18:05:59 2004-03-04 13:21:54 6 2 18 0 9 21 0 82.60 56 9.91 CHANGED MSGloh.GGGssusRPTtsAs.h.o..ssADADR................osQLSDDFShsSPhSssDSupspDGLLpDSLFPEW+sGAPRsGh-SPDE ........MSGloh.GGG.s..GusRPTpsAshh.oh.s.ADADR.Sssp..p........oSQLSDDFShGSPlSPsDSSpApDuLLpDSLFPEW+sGsPRs.GhDuPDE 0 1 2 6 +7402 PF07574 SMC_Nse1 Nse1 non-SMC component of SMC5-6 complex Studholme DJ, Wood V anon Pfam-B_24547 (release 11.0) Family S. cerevisiae Nse1 (Swiss:Q07913) forms part of a complex with SMC5-SMC6 This non-structural maintenance of chromosomes (SMC) complex plays an essential role in genomic stability, being involved in DNA repair and DNA metabolism [1,2]. It is conserved in eukaryotes from yeast to human. 22.00 22.00 22.30 22.40 21.60 21.90 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.14 0.71 -4.69 9 296 2009-01-15 18:05:59 2004-03-04 13:40:24 8 7 238 1 199 289 2 168.00 25 61.36 CHANGED -s+RthLQslhs.+Ghlpps.ltphhsslsspps..s.h.ppt........hLssaVsplNpclpsLshclctspa............................ssutpaYValNssssstschuTsaossElpahKthl-tlspppshhtt........lshlstsscltupt.pp...............................................lt.ocsppLLpchsp.tWh.hohcschsLshRsLlEh .......................cRthLQhhhs.+us.hp.pp....hptlh...t...th...hphpp....t.....t.............................ptlp.salssINstl.psht...hcI+pshp................................................p.suphhaul..VNhsss...s.h.o.p..hA.T.....sastsE........lshh+phl-tlhp..s.ts...s...tpt........s.hls.hptlphtp..tp.....................................................................pcsEphLpphlpptWh.h..p..pp.G..asLssRslhEh........................................................................................................ 1 61 104 158 +7403 PF07575 Nucleopor_Nup85 Nuceloporin_Nup85; Nucelopor_Nup85; Nup85 Nucleoporin Studholme DJ, Wood V anon Pfam-B_55990 (release 11.0) Family A family of nucleoporins conserved from yeast to human. THe nuclear pore complex is a large assembly composed of two essential complexes: the heptameric Nup84 complex and the heteromeric Nic96-containing complex. The Nup84 complex is composed of one copy each of Nup84, Nup85, Nup120, Nup133, Nup145C, Sec13, and Seh1. The structure of a complex of Nup85 and Seh1 was solved [3]. The N-terminus of Nup85 is inserted and forms a three-stranded blade that completes the Seh1 6-bladed beta-propeller in trans. Following its N-terminal insertion blade, Nup85 forms a compact cuboid structure composed of 20 helices, with two distinct modules, referred to as crown and trunk [3]. 19.70 19.70 19.80 19.80 19.20 19.50 hmmbuild -o /dev/null HMM SEED 566 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.93 0.70 -6.30 20 368 2009-01-15 18:05:59 2004-03-04 14:56:35 8 8 253 16 255 381 1 430.50 18 69.60 CHANGED sshlYhlppstt.hposphRpllsEsaplFhsLQphc.......t.spspps......pal.plspsYRSllpsshpplpph.th...hhpc.tphpp......plolLhslcslWp....LhEhLah-ssstuslltpLL-Wl.Rhcspss-phsp.........-lLtppc.....slpcpssaWclVssLlLpGhh-pAhphL.tpcuph.........sspshhcshtsLLpphPhhp..tt.........shp-hcppWccW+spspc.......plpssshsspspLEsllpllsGsccslhp....hsssWYEhhsuhLLYhpP....osc.hE.LptYAppslsta..............s.ssspsh-plhlslhptclhpVltphpph.sshWhsAHlsDLl....-+sGlLp......spp.phu..............sshREaLLh-YApsLhS..c+oLWQlulsYlshssptG....+shlEhllsRlPlpTscct.+hLplCcphpLs-lsppIhKlhup+sLcps+hGsALsWhh+ApDhshlshlophlh.....cchspcGshhs.-llssluss.....................hlu.spLoFLupYp-Fa+hhpp.......ccatcAscLLlsLhpsphsPppaWhsLLsDslsLLp...ccsh.hssppThpllcsLEcht .................................................................................s............................................................................................................................................................................................................................l....L.hpW..h.p...c.............tp..t.............phh..t..............tp..taW.p.h.l..hlh.G..hp.shthL...ttt.............................thhp.h....tlhpphP..t......................................................................th...th..p..hp...h......tp...........................pt.....h.t.......tlp.lhplh...hGp....t.th.t................p.sWhchh.shhha...s....................................p....hht..hp.h..........................................s..p.h.c.hh.hshh.phshtthlt.....h..........p....h...s.hh...ssthh-lh....................pht..thlt.................s............................shc.c.h...lhpYut.Lhs.............ppth..........Wplulthh......s....h..s....p.s.........thhlp.hl..+h..sh.....p.....o.....p...........p............p.......h......chl................pl...............st.pht.h..p.........pplhchhu.p............h....h.p..p.t.p......hG...pAl.hh.......h............p...........ups............h...........hhp.............l.....sp.h.hh.....................pph...ps....................t.hl..t.lhst..............................................hht...tLsh....h....s....phhcFhp......hhtp............................tph......tu......ph..Ll...lh................................................................................t.............................................................................................................. 0 86 137 210 +7404 PF07576 BRAP2 BRCA1-associated protein 2 Studholme DJ, Wood V anon Pfam-B_5419 (release 11.0) Family These proteins include BRCA1-associated protein 2 (BRAP2), which binds nuclear localisation signals (NLSs) in vitro and in yeast two-hybrid screening [1]. These proteins share a region of sequence similarity at their N terminus. They also have Pfam:PF02148 at the C terminus. 20.30 20.30 21.60 20.60 20.20 19.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.27 0.72 -4.48 8 297 2012-10-02 20:46:34 2004-03-04 15:41:04 7 12 249 0 215 297 3 104.60 28 17.78 CHANGED ppssppphpsss.sthhslhsVsshhssc.lhphsuhppp.Icpl+ll+DusPNpaMVLI+F+sppsAhsFYppFNG+sFNslEs-s.CHllaVpcVEhspp....ssssss ..................s..........pp.ot.hhsl.hssshhhssc.L.h..hh.s.s..hppslp.p.h+l...l+..........c.up...s.NpYMVLlKFcspps.AcpFhppaNG+.F.NSh.Es.-s.C+llaVpplphpps..........s..................... 1 62 116 175 +7405 PF07577 DUF1547 Domain of Unknown Function (DUF1547) Yeats C anon Yeats C Family This family appears to be found only in a small family of Chlamydia species. 25.00 25.00 48.60 25.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.73 0.72 -4.30 17 329 2009-09-10 18:55:26 2004-03-08 13:41:42 6 4 37 0 10 241 0 60.10 42 19.82 CHANGED cILspVRpHLDsVYsupsst.stt....NQsLGslI+shE.ssTupsTllsshpssssshuop DlLusVRtHLDhVYPu-ssssops....NQsLG-llpchEspGTupcTllos.puusspss............ 0 3 3 7 +7406 PF07578 LAB_N Lipid A Biosynthesis N-terminal domain Yeats C anon Yeats C Family This family is found at the N-terminus of a group of Chlamydial Lipid A biosynthesis proteins. It is also found by itself in a family of proteins of unknown function. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.38 0.72 -4.06 10 310 2012-10-03 12:15:12 2004-03-08 16:42:16 6 3 227 0 109 288 189 71.50 40 47.27 CHANGED lGhluphFFosRFslQWhhSE+ppcSslPtsFWhhSllGusLhLlYulh..puDsVhlLsauhsLllYlRNLpl ..lGhhuQhhFuhRFllQ.Wl.h.S.E+.t.p+SllPhsFWhhSl.hGuhhhLsYulh..+pDs.VhlLupuhulhlYhRNLh............ 0 33 74 91 +7407 PF07579 DUF1548 Domain of Unknown Function (DUF1548) Yeats C anon Yeats C Family This family appears to be found only in a small family of Chlamydia proteins. 22.20 22.20 23.70 44.80 20.70 19.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.65 0.71 -4.05 10 36 2009-01-15 18:05:59 2004-03-09 10:28:22 6 2 13 0 8 22 0 133.10 39 23.45 CHANGED Rspc....EWHpIsuFKHh+GcpLGL.hDsLuc.LsshTlphTohphhpp+spYphlhppFlssY+sSsssLlpalhsQhlsSSp-lpsulpsaLL-sl.sslslPEs.c+sslls-lFY.D-s.YEhspEGIlYLLlh.GII .............ppcEWHhhsuhKah+G+pLGLsh-pLsp.LsshTlp.TuhphhpppppYphlhspFlssYpsSsssLlshlhpphhsuos-hpsslpsalL-pl.cslslPEs.t+sslhpslFa.D-p..Y-hsppuIsYLLhh.sII..... 0 0 0 6 +7408 PF07580 Peptidase_M26_C M26 IgA1-specific Metallo-endopeptidase C-terminal region Studholme DJ, Yeats C anon Merops Family These peptidases, which cleave mammalian IgA, are found in Gram-positive bacteria. Often found associated with Pfam:PF00746, they may be attached to the cell wall. 21.50 21.50 21.90 25.30 21.30 21.40 hmmbuild -o /dev/null HMM SEED 737 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.43 0.70 -6.23 18 836 2009-01-15 18:05:59 2004-03-09 12:20:28 9 67 284 0 22 447 2 617.60 40 40.45 CHANGED spltEYslssh.sllYTPNtlh+D.pssLlssVhscLpuVpLpSs..sl+plL...s....hss-s.....s.......lc-LYL-ESFscVKssLschl+KLLps-sphhpss.psspphllcKIccNKsAlhLGLTYLNRaYshKYschslK-LhhFKPDFaGK.ssSsLDpLIclGpSu.sNL+ucpslpsYsphlusshGps.cLasaLch.R+LFsspps.N-WFKcsoK..AYlVEppSsl.EhpsKp..........htlY.....Dplsss..pappMlLPLLTLp.cpplFlISshsTluFuuaE.....+Ytpss.......cEtpphhccplccsAccQRsahDaWYRlhs-ss+-KLh+S.l.....VaDuashsssshh.c+hutsstcsshssl+EFF....GPsG+aathp.uh.GAYAsGp..........sVYahshchlsca.GsSsaTHEhTHssDphIYLGGaG+RpGhGsEsaApGLLQoPssssss....sLGlNhsac+psDss....plashDP.s+hpoccDlcpYM+Nh.DslhhLDYLEupullsphsss.pspWF+Kl-pcahcs........sspsathstVRsLTsEEtp..pLsSl-DLlDNsllopRshsuNt.......ca..csuYholshhusIYuuhsoSpGuPGslhF++sAFclhuhhGYccGFlsYlSNpYcspApppGpsh...........loDchllcKV.sGpaso.hp-FKKAha+EhhsKtp..slsslTls..........spTIsoas-LpsLhccAVpcDhtt....lpssssspp....stsppLKptlaKAhLcpTDsF..+oSIF ............................................................................pltEYslssh..sllYT.PNhh..ps..hspllpplhspLppVph.St.....tlpphl...s....hptps......p.......................................hpc.LYL-EpFspsKtpLpp.lppll.p.s-.tt.h..ss.pss.thlhcKl..cpNK..ttlhhuLTYLpRaYshpasp..hshKcLhha+..DFa..Gc.ssssLDp.lIplG..............Su......pNLhuppshpsYt..lutthspt.sLhshLch.hclFhspps.N-WFhptsK..sYlsE.p..Ss..-htscp..........htla..............Dtlsss....h....at.p......M..lLPLLsL...ptplalISshsohuauua-.....+Ytppp.........pp.tp.hc.pphccsActppsahDaW.+lh.sps+p+Lh+s.......la.Ds.hp...h...sst.t.....h....pth.h..sph.p.....p.s......ssh+pha....GPhsph...h.hp.th.GAhA.sh.........tVhahshchl.sc..GhssaTHEhTHssDp.lYhGGat+RpGhssEh....aApGhLQsPspssss........slslN.hhctppsss................ph.shsP.pchpstt-lppYh+sh.DslhhL-aLEupullt.....p.ptt.p.phh+Kl-pphhps.............tsht.hshlRpLst-Ehp..pLsShssLl-pshhop+t...ss............sa..psuYhslp..hhssIYuu.ssppGsPGsl.h++.sachhuhhGYccGFlsYlSNpYcppAcp.pGcsh...........loDchllcKl..s.....spapo.htsFKKshapEhl-Khp...slpslT.ls............spoIs.shpcLppLhscAVpcDht...........hhp..t...p........ptshpLKptlaKthLppTssF..psSIF..................................................................................................... 0 3 9 12 +7409 PF07581 Glug The GLUG motif Yeats C anon Yeats C Family This family is found in the IgA1 (M26) peptidases, which attached to the cell wall peptidoglycan by an amide bond ([1]). IgA1 protease selectively cleaves human IgA1 and is likely to be a pathogenicity factor in some pathogens ([2]). This family is also found in various other contexts, including with Pfam:PF05860. It is named GLUG after the mostly conserved G-L-any-G motif. 20.50 18.00 20.50 18.00 20.40 17.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.22 0.73 -7.63 0.73 -3.40 110 564 2009-01-15 18:05:59 2004-03-09 12:56:16 7 70 210 0 192 551 145 27.50 37 4.06 CHANGED sstslGGllGtstt.........uslpsssusu..sls ......sttlGGLVGhsht............usIpNuhAoG..sVs...... 0 77 158 178 +7410 PF07582 AP_endonuc_2_N AP endonuclease family 2 C terminus Studholme DJ anon [1] Family This highly-conserved sequence is found at the C terminus of several apurinic/apyrimidinic (AP) endonucleases. in a range of Gram-positive and Gram-negative bacteria. See also Pfam:PF01261. 20.60 20.60 20.60 20.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.51 0.72 -4.42 56 878 2009-01-15 18:05:59 2004-03-10 15:21:13 7 3 742 7 289 741 345 53.10 34 16.20 CHANGED -FcsIFSpLsphGYsGhsslEWEsslhctppGApEussFl+ch..lIpssstuFDsh .....atslhSpLtthGYDGhlSlEaE.....D.....sl.....hs.....s-...c.....Ghpcusphl+sh...I.spstsh..h........................... 0 96 195 254 +7411 PF07583 PSCyt2 DUF1549; PSC2; Protein of unknown function (DUF1549) Studholme DJ anon Blast single linkage clustering Family A family of paralogues in the planctomyces. 29.80 29.80 30.00 30.40 29.50 29.70 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.39 0.70 -4.83 176 580 2009-01-15 18:05:59 2004-03-10 15:58:43 6 38 66 0 247 688 957 204.70 35 22.91 CHANGED ssIDpFlhp+L.cpt.slpPustA-.cpsllRRlshDLsGLPPT...scElcsF.....ls...D...p..u...ss..u.acc....lVD+....LL......sSP.c....YGE....+WupcWLDlsRau-opGh...p.c.......tp.ssatYRDaVlcuhNcshPYDpFlpEQLA.........GD.l...................t....................................pth........h.........................AouFht.t....................hptchh...sDtssssupsFLGloltCApCHcHKa.DPlopcDYYphtAaFs.sspttstt ....................................sIDpalhs.+Lc.pp...s..lpP..u....stAcc.psllRRlshDLsGLPPT..scElcsFls........D...s..s....ss...u.hcclVD+....LL......uSP.c....YGE....+WuppWhDlsRau-opGh.....p.s........p.....statYRDaVlcu.hNpshPaDpFlpEQlA......GD.l..s.......st...........................pph......h..........AouFhphs.............t............php.h...s-tssssupsFLGlo.ltCApCHDHKa.DP.l...........spcDYYphtAhFsssp....th....................................... 0 211 242 246 +7412 PF07584 BatA DUF1550; Aerotolerance regulator N-terminal Studholme DJ anon Blast clustering of Pirellula proteome Domain These proteins share a highly-conserved sequence at their N-terminus. They include several proteins from Rhodopirellula baltica and also several from proteobacteria. The proteins are produced by the Batl operon which appears to be important in pathogenicity and aerotolerance. This family is the conserved N-terminus, but the full length proteins carry multiple membrane-spanning domains [1]. BatA ensures bacterial survival in the early stages of the infection process, when the infected sites are aerobic, and is produced under conditions of oxidative stress [2]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.79 0.72 -3.79 193 1151 2009-09-10 21:46:21 2004-03-10 16:07:12 6 14 818 0 405 993 385 77.20 25 14.72 CHANGED M..............Fh..sPh.hL................huLl.hlsl..lllahhhht+.pph..tasulphL.tphtppppp......phpphhLLlLRlLhluhlllAlApP ....................tFtsPh.hL.......hhLl.hlsl.....h..h...l...h..h...h...h..t...p+tpph....pFssh...plL....tplhspppp.......hp....a....l..hhh.LplLslshlllAlApP.................................. 0 168 307 362 +7413 PF07585 DUF1551 Protein of unknown function (DUF1551) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins identified in Rhodopirellula baltica. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.99 0.70 -5.57 14 57 2009-01-15 18:05:59 2004-03-10 16:13:21 6 2 20 0 35 65 47 311.20 17 65.17 CHANGED pAEhlhhhpcGss.ssLlo.ss......s.sssss.s..sspsLhusphs.c..hpuGhRlphGhhhss.sshulEhpaaslts.suuhss...ssuphshhutPh.p.Tshss...ssuphl..h.s...........sthslpssschtuhEhNh.....Rhthss.........tts.phshLsGaRahpLc-tLphsps.sshst............shs..sso.l...s.p.shsscNphaGsQlGhphphpp.sthohsuhhKsulsss+tstsspsstshs.s...................s..sssshspsphuhlsEhslshtaplspshslplGYphlahssVshAs...cpIspshsssss............spssss.spsohhhpGlshGhpapa ..............................................................................................................................................sh.ss..s.p..hpsGhRhphuh..hts....slphph.....hhh..p.ptshts.....sstththht.tsh...s.hsh............tts.h.........................................sthphphpschh...uh-hsh...........ph.h......................tthphphlhGhRahplc-plshsssspt.st..............................................................s..sss.h......................p.shpspNph..a..Gs..QlGhchphph.spasls.shsKsul..hsNctphshp.spsshsts..............................ts......sstspsphuhss...-hslsh............tap..l...spphshphGYphlhhssVshu....pplstshssss................sth..ppsshhhpGhshGhph.a....................................... 0 28 32 32 +7414 PF07586 HXXSHH DUF1552; Protein of unknown function (DUF1552) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins identified in Rhodopirellula baltica. 19.80 19.80 19.80 20.20 19.70 19.40 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.11 0.70 -5.49 102 250 2009-01-15 18:05:59 2004-03-10 16:19:24 6 2 43 0 119 277 568 302.40 23 67.78 CHANGED Rhlhlhhs.Glh..........stpa..hPp.........psGt..................sa............phsts..LpPLp...sh+cchsllsGlsp.tst......suHtsss.saLou.....h...tstst...hpss.lSlDQlhAppl....Gp..pTRasSLplusps..............tt.hss.slSas................psGp.....Plss..psPpplFc+LF.usssss........tttppp..hppcpSlLDtlhpcu+sl...pppLupsD..+pKLDpYhsolR-l..Epcl..........pptpthh........sh..t..Pthsht.s.................ss.shhpch.chhhDLhslAhpsD.TRVsThhhsss.........hhht.........tl.Gl..............stshH.slSHp...sss.ptht...phtphcpahspphuh ..................................................................RhshhhhstGsh.....................ppa...hPp...............ssGp...............................sh.............p.hsth...LpPLp...sh+schsllsGlsp.tst.............ssHtsss.shLTu......s...tsss...........thpsu.hSlDQhlAppl......up....pTphs.SLplusps..............st..hts.slSas................sssp.....Plss..psPpthFc+LF.usssss.............tttppp.............hppcpSlLDhVtp-scpL...pppLupsD..+pKLDpYhsulR-lEp+l.............pptptht.......st...h......Pphsht.s.......................tstshhpch.+lhhDlhslAhpsDhTRVsThh..huss................tth.hhs..............tl..Gl................................spsaH.slSHc........sss..ptht......phtplspahhpthu............................. 1 102 108 114 +7415 PF07587 PSD1 DUF1553; Protein of unknown function (DUF1553) Studholme DJ anon Blast clustering of Pirellula genome Family A family of proteins found in Rhodopirellula baltica. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.68 0.70 -4.99 41 593 2009-01-15 18:05:59 2004-03-10 16:47:29 6 37 66 0 253 700 966 244.70 30 28.00 CHANGED ssRhsLApWlsss-N...PLTARVhVNRlWpphFGp........Gl.Vcos-DFGhtGssPoHPELLDaLAtcFl-.sG.....WslKpLhRpIlhScTYppuSpss......scs........th.D....PsNchhuRhsh+RLsAEhlRDshLsloG...Ls.p.htGssh....s...........t........h.ts.stpphRRulYthhpRsh..............sshhpsFDtsststssspRppossP.hQALsLhNsshhhptup....thApplhpp............................................................................................tsss........ppplsthFttshuRtPospEhpthtshlsp ...................................................................................................t.pRhtLAcW.ls.ssc.N...PLs.ARVhVNRlWpphFGp........Gl.Vpos-DF.....Gh.....G......p.........P.o..H..PELLDaLAscFhc..ps............WslKpLhRhIlhSpsYppoSpss.......spt........................th..D....spN....phhuRhshpRLsA...E.lRDshLssoG.....Ls..p..ht..G.s.h...ts..............................................tt..h.RRul..Y....th...hpRsh....................sshhtsFDtss.t.p..s.s.spRpp.osss.hQALhlhNss.hhhctup....thup.p..hhpp..............................................................................................................................................................................................tt...........pp..tlpthahhshuRtPsspEhphhhphh..p....................................................................................................... 0 217 249 252 +7416 PF07588 DUF1554 Protein of unknown function (DUF1554) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of proteins identified in Leptospira interrogans. 21.20 21.20 22.00 31.10 21.00 18.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.20 0.71 -4.71 15 119 2009-01-15 18:05:59 2004-03-10 16:55:52 6 6 18 0 20 123 4 148.20 36 48.04 CHANGED sssssHNG..NaGGIoGADAaCp..uplPo.sLsusG..sYKAMLVD.s.....ssRhATossPNSosG..QhDWVhpPNppYpRuDsss.h.lhTTNusGlFsFs....LpNuFss....shpsshWTGLs.......ssWpThs........stsCss....Wso...................us.sshhGtaGsusp.psust ....................s..s.hHsG..shGGIuGADuaCp..uphPu...slsusG..sYKAMLVD.u.....ss..RhAsosssNussG....QhDWVhpPNppYpRu-.sss..h.lhTTNusGlFsFs......LpNsFss........hputhWTGLs.......ssWTThs........s.sCss..........Wss...................us.hshhGhhG.usthpsth.................................... 1 13 18 18 +7417 PF07589 VPEP DUF1555; PEP-CTERM motif Studholme DJ, Bateman A anon Blast clustering of Pirellula genome Motif This motif has been identified in a wide range of bacteria at their C-terminus. It has been suggested that this is a protein sorting signal. Based on phylogenetic profiling it has been suggested that the EpsH family of proteins mediate this function [1]. 20.70 18.00 20.70 18.50 20.60 17.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.59 0.73 -7.37 0.73 -3.88 163 1616 2009-09-11 20:23:54 2004-03-10 17:07:42 6 71 212 0 766 1794 370 25.50 40 9.66 CHANGED sVPEPuo..hsLhulG..lhulsh.....hpR+ ...sVPEPuo..huLluhG...Lsuluh.....htRR+........ 1 297 677 745 +7418 PF07590 DUF1556 Protein of unknown function (DUF1556) Studholme DJ anon Blast clustering of Pirellula proteome Family \N 25.00 25.00 31.10 115.10 20.70 17.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.32 0.72 -3.75 3 3 2009-01-15 18:05:59 2004-03-10 17:11:44 6 1 1 0 3 3 0 93.70 37 69.21 CHANGED MNQGSETTERIRTSSEGRMGAICRDGNLKLKTHRAhSVNPLPsNSSPupVV+SKRPtssSsAScPTtLpVRERsR....GulPpucFtpuoShpPtss MNQGSETTERIRTSSEGRMGAICRDGNLKLKTHRAhSVNPLPsNSSPupVV+SKRPtssSsAScPTtLpVRERsR....GulPpucFtpuoShpPts.. 0 3 3 3 +7419 PF07591 PT-HINT DUF1557; Pretoxin HINT domain Studholme D, Zhang D, Iyer LM, Aravind, L anon Blast clustering of Leptospira proteome. Family A member of the HINT superfamily of proteases that is usually found N-terminal to the toxin module in polymorphic toxin systems. The domain is predicted to function in releasing the toxin domain by autoproteolysis [1]. 25.60 25.60 25.60 25.60 25.40 25.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.62 0.71 -4.27 4 642 2012-10-03 10:25:13 2004-03-10 17:20:07 6 125 235 2 152 664 82 116.00 33 14.87 CHANGED LVpTssGhpAIs+IpsGD+Vlups.pohcsuYKsVptpYsp.hpEhlalplsD.....psLlosc.HPFYsp.stalcApcLphGDcLlspsGshpsVpsIhlc..scPhKsYNlpVsDaHTYFVt.....TpGlWVHNu ...............................................tG.h.h.s.Itplps.G.D.Vhu..ts..t..o....sp...h.t....+.V..ht...h.a...s....p.........p.c...h...l.......l..pl.s.s..............................psl....h.s.st.HP......Fa.....s..p....................................t................t...W..........lpA..........t.cLp.sGsp..L........h.......s.p.s.......G............p..h..........s.V.p.s..h.th...c..............s..p..s.hpsYNLsVs.chHTYa..Vt.....sp...u....V..hV..HNt............................. 0 72 126 136 +7420 PF07592 DDE_Tnp_ISAZ013 Transposase_36; Rhodopirellula transposase DDE domain Studholme D anon Blast clustering of Pirellula proteome Domain These transposases are found in the planctomycete Rhodopirellula baltica, the cyanobacterium Nostoc, and the Gram-positive bacterium Streptomyces. 22.50 22.50 22.50 23.80 22.30 22.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.82 0.70 -5.68 11 318 2012-10-03 01:22:09 2004-03-10 17:32:17 6 4 69 0 66 395 81 192.00 30 76.56 CHANGED lpcLcuLl-ssTtGDPp..SsL+WTpKSspplucpL.pppGap....lutpsVucLLp.chGYSLQustKTppGspHPDR-sQFchINcplpphpssspPV.ISVDTKKKEllGsa+NsG+pWpppups.cVtsHDF.s.shGcssPYGlYDlssNpGaVsVGss+DTu-FAV-Sl+pWWpphG+c+YPcApcLlIsADsGGSNG.RsRhWKhcL.QcLusphGLsIpVCHaPPGsSKWNKIEHRhFSaISpNW+GpPLsoaEsllNLIuuTTTppGLpVpspLDcptYpsGlKVoDcphcslp..IpRsshHs-WNYpIpPp ........................................................................................................h.....................................................................................................................h..pht..lG...p.G..h............p................uhh......t......hs.s..s...s.hhsttlt.hW.......h.t.ppl.l.hDsG.spNs.h.p.ah.th..t.hutp.th.lplhahPPhpSKaN.lE.+has.lp.p.WpGp.L..shpshlth..tT..hT..pGlt..stl.pt.Y.hGhpl..sp..p..thtth.....h.ht..h.tWsh.l.......................... 0 10 46 63 +7421 PF07593 UnbV_ASPIC ASPIC and UnbV Studholme DJ anon Blast clustering of Pirellula proteome Family This conserved sequence is found associated with Pfam:PF00515 in several paralogous proteins in Rhodopirellula baltica. It is also found associated with Pfam:PF01839 in several eukaryotic integrin-like proteins (e.g. human ASPIC Swiss:Q9NQ78) and in several other bacterial proteins (e.g. Swiss:Q84HN1 [1]). 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.31 0.72 -4.30 79 553 2009-01-15 18:05:59 2004-03-10 17:57:57 7 89 240 0 278 593 823 70.90 27 9.06 CHANGED sAlGApVplp.ss...uppphppls....uGuGahu.psshpl+FGLGs..sssssplplpW..P.sGphpphpsl.s....sspphhll ...................AlGAcVpl...h..ss............uttp....hppls....sus.G.ahu.ps.pshlHFGLGp....sspssplclpW..P..sGp...h.pp..hpth..t.....ssphh...h.................. 0 144 226 255 +7423 PF07595 Planc_extracel Planctomycete extracellular Studholme DJ anon Blast clustering of Pirellula proteome Motif This motif is conserved as the N terminus of several Rhodopirellula baltica proteins predicted to be extracellular. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.94 0.73 -7.26 0.73 -4.01 27 37 2009-09-16 17:53:18 2004-03-11 10:45:04 7 22 4 0 24 38 0 24.90 37 1.14 CHANGED ppppppppsp+RRLthEsLEsRpLL ...pppppppsp+RRLthEsLEsRpLL 0 24 24 24 +7424 PF07596 SBP_bac_10 DUF1559; Protein of unknown function (DUF1559) Studholme DJ anon Blast clustering of Pirellula proteome Family A large family of paralogous proteins apparently unique to planctomycetes. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.77 0.70 -4.37 147 1532 2009-01-15 18:05:59 2004-03-11 10:51:16 6 21 22 0 391 1685 425 209.30 20 68.30 CHANGED QQAREAARRhpCsNNLKQlGLAlHNYH..DTatt.h...Psushssssssst.........................hs..........Wts...h..lLPalEQssl.a-ph....................shstshtsssssssstt................................ls...........sa......h.C.PScs............................................t.....ssssttsstsssssssstsssstsstssssGhhhhss..................thphpDl..pDGsSN.TlhluEpththsstststhh......................................................................sthststttssshshssssshsssssssssssttshsShHsG.Gsphh...huDGSV+F..lo-sl-hs ...............................................................................tAREAAR.RspCtNNLKQlGLA.hpsY.p...s.s.h.s.t....h.......P.sst.hss..sss.s....................................................................hs.......ahh.............lhP.a.h....-..pt.s.h..hp.t...h....................................p...t................................................................h.hh..C..Pop............................................................................................................................................................................................................................................t.t.th...DG.sp..shhh.E.........................................................................................................................................................................................................................t.p....t............-ut...........s..................................................................................................................................................................................... 0 391 391 391 +7425 PF07597 DUF1560 Protein of unknown function (DUF1560) Studholme DJ anon Blast clustering of Pirellula genome Family Small family of short hypothetical proteins in Rhodopirellula baltica. 25.00 25.00 25.90 37.10 20.40 17.60 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.34 0.72 -3.96 2 4 2009-01-15 18:05:59 2004-03-11 11:09:06 6 1 2 0 3 4 0 38.50 75 69.06 CHANGED C..shGhL+Ps.hs.Ro................ISVRSAcISVPLAALAEH CVGNTGVLKPNIhsP+o................ISVRSAEISVPLAALAEH 0 3 3 3 +7426 PF07598 DUF1561 Protein of unknown function (DUF1561) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of paralogous proteins in Leptospira interrogans. 25.00 25.00 385.70 385.50 18.90 18.50 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.19 0.70 -6.57 23 62 2009-01-15 18:05:59 2004-03-11 11:15:06 6 1 10 0 31 64 0 608.70 54 98.82 CHANGED Kl....Ll..hhhSlhl.....s.pTshsA.....lsspllQKPsDpP+DKsI+V.pVHsGtcYCYuPsFosGEuYIhIppCsp.pVhpARYDVFQRIuaNINsTWLChTAPpoVs....cuppsWDYlhLRPCsINDP.QRWIIK.....cNuFaTADt+aRlKDhsWYuYISKNuuDhYsHTLs.soMscWlpTlATPGNISl+T.IAWsh.os.G........ppYaIpssGSsp.ssTP..LYYNPENGHlAQY.PsSGslhCMhSp.ssspsWNWVpWthC..oD.....sl....SKcssuaWNl.hhssctGhIh.DYpGNhLRVT+YGosWGVsYTAKPsYLcpDTo..NSPTShFllspDl.cWsRYssuNLGcT.pYCPA.GpKcsls.....+pRlKR..oLPPDFpLT-pWl+RLa-IApSoo.....ssupppl.GhCGsChLpohQMLAELQEaHspsPLQsG.GYFFsTA.spDPFISFRQRaPtLsphLpss.shas.....ss.p.hspsppluhuuAhshLPQY-Wps.SshhpTcsEhhSclpsLlsuPsGolWhsllhcppsDG.shsGHA.PILRTspGLVlIPTNs.sshoL-paRpuLsPT....pDPppllsphhptus.psLt..sLsThQ.hstlhcsPhshhlSpRNCTGEG-cRRGoGchPpoohlNQCu...S....GRCu .....h.hhhlhhl.lhh.......s..s.lsh....tls.pllQKPTD.P+DKsI+l.hlHsGupaCYuPsFosGESYIhI-pChp.pVhsARYDVFQRISYNINNTWLCITAPEoVl....+ucpsWDYVpLRPCTINDPLQRWIlK.....-NuFWTADt+YRLKDhsWYuYIS+NSGDpYsHTLs.sSMscWlpTlATPGNISIpTSIAWshto..u........pRYFI+ssGSsK.NTTP..LYYNPEsGHlAQYsPhSGsLhCMYSph.ss.pWNWVpWthC......oDt..sl.SKcN...PuFWNV..hts-cGGhIh.DYpGNhLRVTRYGsNWGVAYssKPsYlcpDTs..pSPTSLFllc+sLLcWsRYTsuNLGKT-QYCP.A.GpKEshh.....ppRlKR...oLPPDFQLTE-Wl+RLY-IApSss.....sssp.pph.GlCGlChLpoFQMLAELQEYHSpsPLQuG.GYFFDTAPspDPFISFRQRYPpLsphLpslsphas.........hspsphlshuoAhsMLPQYpWps.SsphsTRsEhhSHIpSLIsSPsGSlWLulhtppcsDG...ohsGHAlPILRTSQGLVVIsTNs.sshoL-pYRpsLsPT....pDPpplIss.hcpsspsL.t..pLsTlQ.LsphYpNsFDhhlSsRNCTGEG-DRRGoGpYPsSo.lNQCu..u....GRCs.... 0 12 14 28 +7427 PF07599 DUF1563 Protein of unknown function (DUF1563) Studholme DJ anon Blast clustering of Leptospira proteome Family A small family of short hypothetical proteins in Leptospira interrogans. 20.40 20.40 22.00 20.80 20.30 17.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.07 0.72 -4.08 2 16 2009-01-15 18:05:59 2004-03-11 13:20:55 6 2 6 0 7 10 1 36.70 74 15.61 CHANGED MNIILIsFFLLETLENLYsTYVEh.LKQhFLDphQKI.KssRK ..MNIILIGFFLLETLENLYuTYVEh.LKQhFLDphQKI.KhspK.......... 0 4 4 5 +7428 PF07600 DUF1564 Protein of unknown function (DUF1564) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of paralogous proteins in Leptospira interrogans. Several (e.g. Swiss:Q8F4V2) have been annotated as possible CopG-like transcriptional regulators (see Pfam:PF01402). 25.00 25.00 36.10 35.10 21.10 21.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.99 0.71 -4.92 14 111 2009-01-15 18:05:59 2004-03-11 13:28:11 6 1 7 0 32 122 0 159.80 35 92.35 CHANGED McllhLsospclpSth.......tcppssssoLLIPcshh.pLscp..cpKsLppcls.LLK+YsKhlhopchltpcssKshYQps.h..cLK+hshRssstsWshLGlLAtsHGVSRChLFshLLhL-...shscSIspshstGsPsFpts...aphhhclshtpNplo+clpht.ps....hhh.l .....................MthlhhssspclpSsh........cstsussoLLlPcshap+hstp......c+KsLpp+LPhLL++YsKhlsSh..cRLpp.+.As........KhpYpcssG.....chK+holRVpousWupLGsLAsAHGVSRCYLFNYLLhL-...t.ts.hlpThstGVPsFHhs...Yphh.hclshppN.loRcLphcPpsh...h............................................ 0 24 24 24 +7430 PF07602 DUF1565 Protein of unknown function (DUF1565) Studholme DJ anon Blast clustering of Leptospira proteome Family These proteins share a region of homology in their N termini, and are found in several phylogenetically diverse bacteria and in the archaeon Methanosarcina acetivorans. Some of these proteins also contain characterised domains such as Pfam:PF00395 (e.g. Swiss:Q8YWJ6) and Pfam:PF03422 (e.g. Swiss:Q9FBS2). 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.92 0.70 -5.23 8 240 2012-10-02 14:50:22 2004-03-11 14:07:11 6 32 169 0 75 498 272 161.50 28 24.64 CHANGED lsGND.sssGo.cusPa+TIT+Alhhu+uss..lIplAPGsYssuoGEpFPlhlP-GVsLhGDEsuKGh....tslhhs.sh.hs+su..hIpGuu....sDls....hp.sTIlssNposIAGhsITNP....................s.....h+usulalpus.uspI+NNThou.shtcG..........Ips.........ssat.ssusuG.....Nl.......IoGNp......lt...........sphs.....GIuIsstpsu..h.....sKl.EN................N...lIsp.........Nth......G...Vs.sslu.hDLGsuSpsosGsNphSsNucpD.Iph..sspsspsL...hAhNNphD+.PPTl .......................................................................tGsD.sssGo.pp....sPa..+T..I..spAl.p.h.A.tsGs.......slhl.tsGs.Y..st......p......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.t................................................................................................................................................... 0 39 58 69 +7431 PF07603 DUF1566 Protein of unknown function (DUF1566) Studholme DJ anon Blast clustering of the Leptospira proteome Family These proteins of unknown function are found in Leptospira interrogans and in several gamma proteobacteria. 27.50 27.50 27.50 27.50 27.40 27.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.15 0.71 -4.39 70 852 2009-01-15 18:05:59 2004-03-11 17:18:20 6 68 343 0 265 776 364 132.00 20 39.54 CHANGED sss.VpDppTGLhWp+Cs.G.th................tGsstthsWpsA.....lshs......sslsts...............shss.WRLPslcELtSl.....l....-hssh....sP.......ls..........spsFss......s............sshaWoSTs........tssst.....t..uWs.....lsass...Gtshh.......spssthhshhVRs .............................................................s....lhD.hTsLhW...t..p.................................................tspsthh.sa.ppA........................hphs............pshsts..........................................GhssW..R......LPshp.......ELt.sL.......h..................chspt.......tst.................hs..............................sshFss.........s.....................sshaWou....os.........pstst.................t...uah...hsh.s........Gth..........thttt..hhh.l..s.................................................................................... 0 112 193 238 +7434 PF07606 DUF1569 Protein of unknown function (DUF1569) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of hypothetical proteins identified in Rhodopirellula baltica. 20.80 20.80 20.80 21.00 20.50 20.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.79 0.71 -4.30 13 113 2012-10-02 14:44:17 2004-03-15 11:23:25 6 3 83 0 55 261 38 144.50 23 82.67 CHANGED hsphtpLpassLpsAls-lppLppsuhpps..GsWsLuQlhpHLAtuh-hSl-GaPthhstlFt..+slh+hhahshht+GcMp...sLctshPuuh..ssslD-stslc+Ltpolp+FpsasGchsPH..AaGpLs+spachhHshHhtpHLpFlpPp .......................................................................................thh............pphhsclp.pLp...p.s..ph......Gch...sluQhltHs.......s...ts.h...c........h....u...h....p.....s....h..s.h........t..s....h.ht.............phl..h.+..h.h..h..p........hh.h...p...sc..........pp..st..s..ts.s.t..h.....h..h....ss.....s....h.....-....h....c..p...p....h..p.cL..hp..t....lp...pa..p.......p...t....p.........s...s...h........t.........s........H...shFGcLopccWsphthpHlsHHLpQF...s.... 0 30 48 53 +7435 PF07607 DUF1570 Protein of unknown function (DUF1570) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of hypothetical proteins in Rhodopirellula baltica. This family carries a highly conserved HExxH sequence motif characteristic of members of the Peptidase clan MA. 21.70 21.70 21.70 22.10 21.60 20.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.75 0.71 -4.14 9 34 2012-10-03 04:41:15 2004-03-15 11:53:20 6 4 13 0 18 107 21 125.70 27 27.92 CHANGED slATlsHEAsHQlAaNsGlpsRhuc.PhWloEGLApaFEsschtsttuW+GlG.tlNphRlpp.acchhs.ccsuss.t...chIssDptFptsps...s.sAYupAWAlsaaLhcpchccaspalpplut+hPhp ....................................tTlhHEusHQl.saNhGltpRh..u.s....h.P..hWlsEGlApaFEsssh..p....tsth.pu..lG..tlNph+l....tt.h...c....phhs.tp..s...stsht............phl.hs.D.p..t.......F.ttspt.....stsu..YA..p..u..WuLsaaLh..p....p....c.......p..c...a.scaLppluph.sh...................................... 0 16 17 17 +7436 PF07608 DUF1571 Protein of unknown function (DUF1571) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of paralogous proteins in Rhodopirellula baltica. 25.70 25.70 25.90 25.70 25.40 25.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.03 0.70 -5.21 4 69 2009-01-15 18:05:59 2004-03-15 11:58:36 6 1 57 0 23 72 16 195.40 40 60.37 CHANGED hlK+Ep.lsGpLpEsphMslKlRs+.psttt.spPhSVYLpa.sPcohKGREVlYlEspN-GphlV+cGGhtGphl.Tlpl-PpGhLAMctpRYPIT-lGlcslhp+LIEhtc+Dl-ps....sscsshhcst+hcG+ssThlQl..spPo+css.sF+pAplaID-EhclPIphcuasWPs........sEs-pspLIEpYsYtDLplNsuLsss-FDsTN.cYpFc .......................hlRQER.IuGch.u.s.P-pMhlKlp+p.........PLRVYAKWLPsGA+uGQElIYDsocRsDEMYGHLGGlLG+ls...hhhssDGsLARAQSNHpV+DLGsEalsshaLsEu+K....h..hEAG....sh+sTplEu+Tl.cGlRVlALTa..EoPsG+PQhhhpKcplh..lD.hc..sh.h..plEuY............................ss-.GclhE+lVa-cIs.p.s.L--osFDPcNPDY+F........................................................................ 0 13 14 17 +7437 PF07609 DUF1572 Protein of unknown function (DUF1572) Studholme D anon Blast search with Q7UW06 Motif These proteins, from several diverse bacteria, share a short conserved sequence towards their N termini. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.15 0.71 -4.69 15 202 2012-10-02 14:44:17 2004-03-15 12:06:42 6 1 177 0 65 278 44 157.90 45 91.07 CHANGED shup..sYLpsspppFcshKphG-+uluQLo-EpLphshs--sNSIAlIVKHluGNMhSRWTDFLT.....oDGEKssRNRDuEF-sshpo+pEllttW-cGWphlFsALssLss-DLpcTlhIRGEuHoVhpAIpRQlAHYuYHlGQlValuKhl+uscWpoLSIP+Gc ..........................................................................s..sppYlpsshppFcthKp.GE+sLuQLo....E.p.lpW.s.s.p.E.-.oNSIAlIlKHL...p...G...N...M...p...S...RW.T.D.F.L...T.........oD..G..E..K..h..s.R.N.R.....D...u...E.F-.s..s.h.p.o...K....c....E...l.....LtsWpc...GWp....hlF.ps..h.ss.Lss-cLh....p...s..V..h..I.....R....G.E....u.H....TV..h..p....A..IpRQ.....lu..HYuhHlGQIlYluKhLKps-WcsLSIP+Gp....................................... 2 30 53 61 +7438 PF07610 DUF1573 Protein of unknown function (DUF1573) Studholme DJ anon Blast clustering of Pirellula proteome Family These hypothetical proteins, from bacteria such as Rhodopirellula baltica, Bacteroides thetaiotaomicron, and Porphyromonas gingivalis, share a region of conserved sequence towards their N-termini. 20.50 20.50 20.70 20.90 20.40 20.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -7.98 0.72 -4.38 99 698 2009-09-13 15:23:25 2004-03-15 12:54:42 6 12 236 0 193 676 292 45.00 38 20.45 CHANGED sF.phpNsGcsPLlI..splpuSCGCTssphs+.cs......ItPGcp.uplpVpa .....FphpNsGctPLlI..splpsSCGCTss.p.as..+.cP.....................ltPGcp.uplplsa............... 0 105 174 193 +7439 PF07611 DUF1574 Protein of unknown function (DUF1574) Studholme DJ anon [1] Family A family of hypothetical proteins in Leptospira interrogans. 25.00 25.00 25.00 26.60 24.90 24.70 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.13 0.70 -5.58 4 48 2009-01-15 18:05:59 2004-03-15 12:59:46 6 2 10 0 17 40 4 313.40 24 92.76 CHANGED Mh+KhaLhhPlLhFlhsFslDKlho.phhcsYhptuhshlaYchKcpLhpcLl.....c.pppsc-pKKlhlhFGsSRhh.FpssslcpK.hsDWhlYNFSuPsusPsYaLYaLE+lhssGlKPDhllh-.sP..Fsssoshhhc.sLtYuhDs.FlL+YhsphShpDhstahhs+LFtssh.pPchp.hhtRhK-tsh..h..uh.pshhltNLKpscusAhosu.s.V...spss-+LcccAh+hhs.hhssashushQhtFhtphLplsccpsl+shhlhPplhcshcchhpphch..........hc.WhsIlcpltEppuss.hsMsc.c..htCppa.DstHhSssCYpsahcFIlc+lsh ...................................................................................hpp.hlhhPhllhhhhFhlDKlhhlthlcs..hh.p..tths.hhYc.+.cplhpcLh..................cpp..tp..ttcp+KlhlhhGsSRsh.F............st.phl...c......cp..hs-....a.lYNFSsPsusPsYahYah..E+lhp.sGl.+PDhllh-hsP..Fspssshhhc.sLthuhs..Flhpahs................huhpc..........hphahhp+lFtsth.pPphp.hhtRh...p...stt...h.........th..pphhh.....sLpptct......sthssh......................ps.-p.Lp..ppu.p.h...hp.hhssaphu.hphtFhcphlphhccpsltshlhhPtl..shpchhcphph............hc.W.hshhppl.t...cph.shs.....hlshsp.c.th..pCptasDsuHhSssCa.t.hchlh.ph..h........... 0 5 13 13 +7441 PF07613 DUF1576 Protein of unknown function (DUF1576) Yeats C anon Yeats C Family This small family is found in several undescribed proteins. The alignment is distinguished by the frequent occurrence of conserved glycine and aromatic residues. 25.00 25.00 34.30 26.70 19.50 18.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.25 0.71 -4.76 17 337 2009-01-15 18:05:59 2004-03-15 14:44:55 6 3 162 0 52 279 0 176.20 35 82.51 CHANGED hlh.u...spp.hpGhhpIhppsuhLlTDahtlsGhGuoalNhGlLsll.hhhlhlhpsplNGPsluulhTlsGFuhFGKplhNlhPlllGlhlhulhpp.pshss..hllsuLFGTsLAPlsuph..............G.lhGIlsGFlh.slstshuhlHtGhNLYNsGFouGhlAhhlhsllcsa .......................h.........pphhpGhhpIlpssuhLhTDahthsGhGushlNhGllsllshhhlhh.h.tsplNGPsluulholsGFShFGKplhNhhPIllGlhLhuhh...pp...pshsp..hllsuLFGTuLAPlsu.h..................GhhhGllhGalhsslstphthhHtGhNLYNsGFouGllAhhllsllc.......... 0 34 48 52 +7442 PF07614 DUF1577 Protein of unknown function (DUF1577) Studholme DJ anon Blast clustering of Leptospira proteome Family A family of hypothetical proteins in Leptospira interrogans. 25.00 25.00 56.60 56.00 22.00 21.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.60 0.70 -5.48 6 39 2009-01-15 18:05:59 2004-03-15 15:06:33 6 2 9 0 14 43 0 259.80 29 66.28 CHANGED p+cHlIpKYLlpp-LhhKhss.ccchs.IhcllE-GpKIlhtsus.psls.sscllLYcILAKYlpLECsllcKh-sslhhlpVscluIA+ppRsssRhsV..-shalTNVloSKTl......I-ANhFsIP....T.VKVsFpDacs+LKpcp.shlplDl....FtsslscKFElV...............KKTcKhLaIcNTp..........DppSYtu.ss.shIchccEl..---lcspl+cYKcpcIh....SELIlPIlYlNcpcppI.PlGYlhlpo+Ep...slopppl.clppL ..............................................phh+lltcaLhtppl.hchs.ptpphh.lhchhc-uphIlh.s.pt.pshs.psclsLa+lLu+alpl-spllc.chcsplhhhplscltIApppRpssRlsl..s.shahsNllssKol......Ists.hsIs....TtV.cVpFpDhcp+lKpch.shhhIsl....atstlsccaElV...................K+ocKhlaIpsTs..........sppSYss.sppshlchtcEl..--tlcphhpca+spKI+....S.LIhPIlYhsctcptI.PlGYlhlpocpp...pIstppl..ltpL....... 0 5 9 9 +7443 PF07615 Ykof YKOF-related Family Yeats C anon Yeats C Family \N 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.68 0.72 -3.82 13 198 2012-10-01 20:40:01 2004-03-15 15:41:33 6 4 133 16 68 262 49 78.90 25 77.88 CHANGED .uusQFSLYPM.ssDal-hIhusl-hhc..puslhscocchuTpLsGcstslFssLcslFhpsupsssHlslssTlStssPuc ....................ssphSLYPh.........s........s.....c...a.h.s.h.Ih.sslctlc......psslhhp.oss.huTtlpGchppVasslpshattutp.p....HhshphshShtsPs........................ 0 21 47 56 +7445 PF07617 DUF1579 Protein of unknown function (DUF1579) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of paralogous hypothetical proteins identified in Rhodopirellula baltica that also has members in Gloeobacter violaceus, Sinorhizobium meliloti and Agrobacterium tumefaciens. 20.70 20.70 20.80 21.90 20.50 19.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.95 0.71 -4.58 17 94 2009-01-15 18:05:59 2004-03-16 15:14:49 6 1 86 0 51 110 25 150.90 25 89.46 CHANGED hs.cPppEHpWLpcllGpWsh-s....-s.MsP-pPspcspuhppVRp.lGGhWl.s-upGcMs.....t.GsshsolhTLGYDss+pcaVGTWlGSMMothWlYcGpLDpss+sLsLpuEGPshss.....sG+hupYRDVlphhDsscRshoSthhsp-GpWhphMsupY+Rp..c ...............h..psttpHphLppl...lGcWssps....chhh...sPsps.......st.psp.hptss+..l.sG.hallp-sp......Gphs........Gps..hpslhslGYDstppcaluoWlsS..Mhst..hhhhcGp..hsts.sp.slsLpspsssht.......sG.h.hpa+chhphh.sssphshp..h.t..t..sGphhphhphpapR..h.................................................................. 0 23 32 41 +7446 PF07618 DUF1580 Protein of unknown function (DUF1580) Studholme DJ anon Blast clustering of Pirelllula proteome Family A family of short hypothetical proteins found in Rhodopirellula baltica. 22.90 22.90 23.60 24.70 22.30 22.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.48 0.72 -4.60 5 10 2009-01-15 18:05:59 2004-03-16 15:20:36 6 1 3 0 6 14 0 53.60 34 55.43 CHANGED PLLcAlcLEoGhRPusSTlhRWuh+PNRHGNhLcoWhlGGR.RhTSVpAV+RYlcAsT .........shhphhc.psG.RsHsSTshRWsh+ss+pGshLcohhlGGR.RhTSVpAVpRalcsss... 0 6 6 6 +7447 PF07619 DUF1581 Protein of unknown function (DUF1581) Studholme DJ anon Blast clustering of Pirellula proteome Family Several Rhodopirellula baltica proteins share this probable domain. Most of these proteins are predicted to be secreted or membrane-associated. 20.50 20.50 21.00 29.10 20.20 19.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.89 0.72 -4.00 7 15 2009-01-15 18:05:59 2004-03-16 15:26:38 6 4 2 0 8 16 0 83.90 35 7.17 CHANGED Rssl+Dspppshl..NGR.lppcslhssusPWlulRu.hpsputhRNL+IsGsPplPsplshlsutpLhGW.ssYasts....tsp.tGph ..Rssl+Dspspshl..NGR.lpp-sltssusPWlulRuphpsputhRNL+ITGsPpIPcplshlsuspLpGW.ssYasts....tsc..sp......... 0 8 8 8 +7448 PF07620 SLEI_Leptospira SLEI Studholme DJ anon Blast clustering of Leptospira proteome Motif This highly conserved sequence motif is found at the C terminus of several short hypothetical proteins in Leptospira spp and related organisms. 16.20 16.20 16.50 17.80 16.10 16.10 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.85 0.74 -6.21 0.74 -3.76 10 31 2009-01-15 18:05:59 2004-03-16 15:31:40 6 4 30 0 4 38 0 15.30 65 4.13 CHANGED stphLRDNSLEISNph I.IMEE.GNSLEISRQM... 2 2 2 3 +7449 PF07621 DUF1582 Protein of unknown function (DUF1582) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of hypothetical proteins in Rhodopirellula baltica. 25.00 25.00 26.60 40.60 23.80 18.90 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.84 0.73 -7.06 0.73 -3.70 5 9 2009-09-11 06:28:21 2004-03-16 15:42:10 6 2 1 0 9 9 0 28.90 38 19.74 CHANGED RuLPSPcsLs-pRlscsssCsscG.tsLA ..RsLPSPc..hstRlss.ssCspcG.hsLu. 0 9 9 9 +7450 PF07622 DUF1583 Protein of unknown function (DUF1583) Studholme DJ anon Blast clustering of Pirellula proteome Family Most of these Rhodopirellula baltica hypothetical proteins also match Pfam:PF07619. 22.80 22.80 22.80 35.40 22.70 22.70 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.24 0.70 -6.01 6 21 2009-01-15 18:05:59 2004-03-16 15:48:41 6 5 2 0 11 21 4 306.00 25 36.07 CHANGED tltGSphESLLpYhRPlh-sspIEYEFFYDsGpsusHPAlsRhAhLIcs-GVu.H.hTDGtappo.LRPDNthhh.p.tpcpuslPL+sspWN+scLphtG-clpLpLNGpsIaEpsl-sps.sRsFGLFHFuDpopl+VRsLpLSGDWPppL..hspQpLAsshsscLctssscLspsFpHDFcc.shsschF.tpst..scshhss--Gl+hstuo.usacphshsPphplcGDFDlTAcFsthQsss.Ess.huulthplsL-sstpcpl...tsspthscppGp+lhsshphphssGpt.ppthst+psp-uTSG+LRlsRRG-plaaLFApsDSspF+LltpEpsscuslssuGIpL.sslssssGosSupWsslolRAEcl...ssusPDhpssLu..plc.s+st ...............tLL.YhRPhhcttplpYpFaat.spstsaPslsRhshhlp.sGht.p.hp.s...t.s.h.ssNh.h......t.sshs.psptWNphcl.h.sspl.lpLNtp.lhpt.lps.t.s.pFGLa+.tcppthhhpshhhpGcWPttl.....p.hs....tpLttthptLspsapaDFpc.t.sschF.thst..stsh.ts-cGlphptsu.s.apthsls.phtlcGDFDlshpFs.hph.s.tss...stlthplphssstpspl....hhhthsttts.phhtthphthssGp..phhhshthp.s.usushLRlsRRG-plahLhuptssp.hplltppphsptsl..hslph.h.h.s.stsspsss.hhpplpl+Acpl........................s.......... 0 11 11 11 +7451 PF07623 PEGSRP DUF1584; Protein of unknown function (DUF1584) Studholme D anon Blast clustering of Pirellula proteome Motif This sequence motif is highly conserved in several short hypothetical proteins in Rhodopirellula baltica. It also is associated with Pfam:PF07621 in Swiss:Q7UJJ9. 25.00 25.00 29.60 28.80 19.80 17.80 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.91 0.72 -7.04 0.72 -4.64 6 15 2009-01-15 18:05:59 2004-03-16 15:59:05 6 2 2 0 14 15 0 26.50 76 24.92 CHANGED LAVhRKPPGEEPEGSRPSATSLVVHVV .LAVhRKPPGccPEGSRPSATSLVVHVV 0 14 14 14 +7452 PF07624 PSD2 DUF1585; Protein of unknown function (DUF1585) Studholme DJ anon Blast clustering of Pirellula proteome Family A conserved sequence region at the C terminus of several cytochrome-like proteins in Rhodopirellula baltica. 21.40 21.40 21.40 21.70 21.20 21.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.26 0.72 -4.32 63 199 2009-01-15 18:05:59 2004-03-16 16:05:40 6 17 35 0 86 222 297 75.30 25 9.62 CHANGED ssGppFpshtpl+phLhpc.p-pFscslsc+LlsYALGRslphsDcstl-pIhsphcpssaphpsllptlVpSc.Fp ................sGppFssht-L+phLhpp.tcphscshsc+llsYAlGRslp.htDcstlcplhpphppssaphpsLltslVpSp.Fp.... 0 79 80 83 +7453 PF07625 DUF1586 Protein of unknown function (DUF1586) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of short hypothetical proteins in Rhodopirellula baltica. 20.80 20.80 21.50 22.30 18.60 19.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.62 0.72 -6.59 0.72 -4.12 3 11 2009-01-15 18:05:59 2004-03-16 16:08:24 6 1 1 0 11 11 0 21.80 73 40.82 CHANGED SRTALAAVSQTPTGANAcWHLL SRTALAAVSQTPTGANAaRhll 0 11 11 11 +7454 PF07626 PSD3 DUF1587; Protein of unknown function (DUF1587) Studholme DJ anon Blast clustering of Pirellula proteome Family A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07624. 21.10 21.10 21.10 21.40 20.60 20.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.00 0.72 -3.97 66 193 2009-01-15 18:05:59 2004-03-16 16:12:43 6 25 37 0 90 219 315 67.80 31 8.22 CHANGED RRLs+pEYpNTl+DLLGlc...hshsp....thPsD.ssspGFcNsGpsLthSshphEpYhpsAcphlcpAhhpss ...........RRLs+tEYpNTl+DLlGls....hshsp.............thPsD.ssstG..F-NsussL.s..l.Sshhh-pYhpuAcplhspAhh..s................. 0 78 83 87 +7455 PF07627 PSCyt3 DUF1588; PSC1; Protein of unknown function (DUF1588) Studholme DJ anon Blast clustering of Pirellula proteome Family A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07626 and Pfam:PF07624. 21.40 21.40 21.40 21.50 20.80 20.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.39 0.72 -4.16 92 252 2009-01-15 18:05:59 2004-03-16 16:17:41 6 38 43 0 118 280 421 101.80 36 12.41 CHANGED pRGGlLspuulLshsosuscoSP..lpRGsWlh-plLssssPsPPs.sVs....slpsstsst.........TlR-pl.thH.ppsssCuuCHp.clDPlGauhEsFDslGpaRsppsst ....................RuGlLopuulLshsussscoSP..lpRGhWlhcplLs.pssPPPPs.sVs.......sltsstsss...............ThR-pl..ptH....ppss..sC.u.uCHp.plDPlGauhEsFDslGpaRsp-t.t..... 0 101 107 114 +7456 PF07628 DUF1589 Protein of unknown function (DUF1589) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of short hypothetical proteins in Rhodopirellula baltica. 20.60 20.60 21.50 21.40 19.80 19.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.01 0.71 -4.33 2 15 2009-01-15 18:05:59 2004-03-16 16:24:54 6 1 2 0 13 15 0 82.70 31 79.55 CHANGED MLWN+AuRPAGHARLSDVRPFT.PGTTWPT.LSQPDASARDTSEFWRSHQRR.GHYLAPSRpFGTKRAVQPVTQsSAMSVHSPRQVQPGLHhCPNPTRQRcTRLNoGEATNVGQVITWHPADALEQStPSSRSRKTQRCPSIHPARYNLsYI.VPTRRVSEtHV .......................................................................................+..R.sahhtPSRpFtTKRuV................................................................................................................. 0 13 13 13 +7457 PF07629 DUF1590 Protein of unknown function (DUF1590) Studholme DJ anon Blast clustering of Pirellula proteome Family These hypothetical proteins in Rhodopirellula baltica have a conserved C terminal region. 25.00 25.00 78.70 78.10 18.80 18.10 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.51 0.72 -4.35 2 5 2009-09-11 20:23:00 2004-03-16 16:28:30 6 1 1 0 5 5 0 32.00 88 26.02 CHANGED MtsGAchPPPEISLNAhFPTPPAApAtFSRlh MENGADCPPPEISLNARFPTPPAARAGFSRRY 0 5 5 5 +7459 PF07631 PSD4 DUF1592; Protein of unknown function (DUF1592) Studholme DJ anon Blast clustering of Pirellula proteome Family A region of similarity shared by several Rhodopirellula baltica cytochrome-like proteins that are predicted to be secreted. These proteins also match Pfam:PF07627, Pfam:PF07626, and Pfam:PF07624. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.48 0.71 -3.77 85 245 2009-01-15 18:05:59 2004-03-16 16:52:12 6 37 41 0 113 273 427 126.60 31 15.60 CHANGED LssaELAoRLSYFLWuShPDpc.LhphAppucL.p...ssphLcpQlcRMLsDs+u..cphspsFssQW.Lshc.plsphs..ctpha.s.pa......ssp.........lcpuhppEshpFhppl.lccs..tslpcLlsuDao.alNppLAca.YGl ...........................................................................................LssaElAoRLSYaLWuShPDcp.LhphAppGc.Lp....sspslptQscRMLs.D.s.+u....cthspsFstQW.Lplc......pls....p...hs....Dtpha..spa................ssp...........................lppshppE...sptah.ppl..lccs..ts.ltcL.lsu-as.alNppLApa.YGl............ 0 99 105 110 +7460 PF07632 DUF1593 Protein of unknown function (DUF1593) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins in Rhodopirellula baltica that are predicted to be secreted. Also, a member has been identified in Caulobacter crescentus (Swiss:Q9AAT9). These proteins mat be related to Pfam:PF01156. 42.90 42.90 50.40 43.20 42.80 40.80 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.91 0.70 -5.08 29 158 2009-01-15 18:05:59 2004-03-16 17:00:58 6 7 98 1 97 173 8 254.30 37 53.54 CHANGED RllVhTD..lts.EPDDtpSLVRhLLYuNch-lEGllsooS....hahpsp..............................spsc.lpc.l...lcAYucVhPNLtpHsss..............YPos-hL+SllphGt.s..................uh.shGpsps.osGSphIlptlpc.s-s..cPLal.sWGGsNsLApALhplcpphstpp.....hpplhsKlRVYsIuDQD.........-s.usWIcppaPc.lhYItu..hpsh.ththssWsshssph..........p.phhopsW..lpppIpp.hGPLGuhYPs........................................hpahs........EGDTPuFLaLlssGLssP-pPsaGuW.GGRa ...............................RlhlhTD..lts...EPDDtpShlRhLlYuNph-lEGlluooS....hahtsp..........................................stsp.lpc.l...lc.uYtclhsNLppHsps..............YP...os-hLpullppGt.s....................................s.htslGpsps.opGSphllptlp...........c..sss..cPLWl.sWGGsNsLAQALhplcpphsstp.....htchhsKlRlYsIuD...QD.........-s.usWIcppaPc.lhaIts...hhshtt..hhhtsWsshssph..............p.phhsppW..lcpsIpt.tGPLGs.t.YPs.........................................h..ta...h.h........EGDTPoFLhLlpN...GLss..s-cPsaGuW.GGRa.......................................... 0 34 63 88 +7462 PF07634 RtxA RtxA repeat Yeats C anon Yeats C Repeat This short repeat is found in the RtxA toxin family [1]. 20.80 5.00 28.70 5.00 18.20 4.90 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.46 0.74 -6.50 0.74 -3.24 99 3103 2009-01-15 18:05:59 2004-03-16 17:20:41 6 42 64 0 556 2748 47 18.50 35 20.18 CHANGED utGsuNlls+s......Gsssslsh .....hhGtuNllT+l.......GsGsslt.......... 0 42 271 437 +7463 PF07635 PSCyt1 Cytochrom_C_p; PSC3; Planctomycete cytochrome C Studholme DJ anon Blast clustering of Pirellula proteome Family These proteins share a region of homology at their N-terminus that contains the C-{CPWHF}-{CPWR}-C-H-{CFYW} motif typical of cytochromes C, or CxxCH. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.27 0.72 -3.65 141 688 2012-10-03 10:02:11 2004-03-16 17:24:33 6 75 104 0 304 822 1079 58.40 31 7.34 CHANGED CapCHGscpp..cusLcLDstsshht....sspsG......sullPGcscpS.L.hpplt.spcpshpMPPsc .ChpCHusspp........cusLcL-shpshhp.....................sspsG........ssllP.Gcs.cpS.L.hpp.lt....sp...s.........s.s..p....MPP........... 0 232 290 300 +7464 PF07636 PSRT PSRT Studholme DJ anon Blast clustering of Pirellula proteome Motif This motif is found at the N terminus of several short hypothetical proteins in Rhodopirellula baltica and the predicted Arylsulfatase B (EC:3.1.6.12) Swiss:Q7UX97. 20.20 20.20 25.50 24.50 18.10 18.10 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.40 0.72 -4.48 4 7 2009-01-15 18:05:59 2004-03-16 17:31:57 6 2 1 0 7 7 0 30.60 59 19.12 CHANGED cGNptpaHARPSPSRTPERRRSo.PQTphRER .......ushhp.HARPsPSRTPERRRSoSPQTphRER 0 7 7 7 +7465 PF07637 PSD5 DUF1595; Protein of unknown function (DUF1595) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of proteins in Rhodopirellula baltica, associated with Pfam:PF07635, Pfam:PF07626, Pfam:PF07631, Pfam:PF07627, and Pfam:PF07624. 21.20 21.20 21.20 22.00 20.90 19.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -9.09 0.72 -3.93 81 218 2009-01-15 18:05:59 2004-03-16 17:41:47 6 29 39 0 105 247 304 63.60 29 7.82 CHANGED tpu+..phlppFup+AaRRPlsss-.lstlhslacpttpp....upsapp.......ulctsltslLsSPpFLY.hsEt ......pucphlppFupRAaRRPlsssE..lpphhslappstpt......stshpp.......ulchslpulLsSPpFLYhsE..... 0 91 97 102 +7466 PF07638 Sigma70_ECF ECF sigma factor Studholme DJ anon Blast clustering of Pirellula proteome Family These proteins are probably RNA polymerase sigma factors belonging to the extra-cytoplasmic function (ECF) subfamily [1] and show sequence similarity to Pfam:PF04542 and Pfam:PF04545. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.04 0.71 -4.62 7 258 2012-10-04 14:01:12 2004-03-16 17:52:40 6 7 143 0 120 8136 1123 167.30 21 87.84 CHANGED .pcloplLpplcsGDstAsstLh.hlYp-LRRhAtsph.psE+hspsLpsoAL.Va-AalRhlstpch.chsuRsahauhuscsMRRILl-pARRcpppKRGGchhRtpLsp..ss.......hc.................sss-.lLsL--uLppLhs.hsPcpt+lVELRhFsGLohcEhAphLslShRTlcRpWthAR.sWhtpchps ..............................................................................................................................................h.l.tt.h...p.t..G.s..t.A.h.p..t....L.h.hh.a..pL.+.p..h.A.t.t.......h.......h....p...t...t..t.................s...t..s...................p.....s.........s.s..L...l.p..-.u.a..l...+...h......h...p............t.............p......h...p....s..........c....t...c....a....h.......t....h.....h.s....p.....h....h.....R...p.....h....l....l..D...t..t...R....p........p......t........s.......t.......K...........R.........t.........s......t....h.........h...h...p....l....t..t......t.............................t.t.................................................t.p......c....p....l....l.....t.......l.....c......c.....u.....L.....p.....p.......L.....t....p.....hs..s....c....p....t....c....h.l.p...L.p....a....h..s........G.....h..o..hpE....lAphL..s....l....S.....p....o....l....c....+....c....h....p.....hu....R..shlttt...t................................................................................... 0 83 100 112 +7467 PF07639 YTV YTV Studholme DJ anon Blast clustering of Pirellula proteome Repeat These hypothetical proteins in Rhodopirellula baltica contain several repeats of a sequence whose core is the residues YTV. 21.00 21.00 22.30 21.00 20.80 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.64 0.72 -4.23 18 108 2009-01-15 18:05:59 2004-03-16 18:00:16 6 10 10 0 52 113 196 43.40 40 33.91 CHANGED oYTVphPVhETcp+shpYoVp+PVaETpsp..sYTVpVPshET+sp ......................YTVp+PVhETpp+shpYTVp+PVhETppc..sYTVp..+PVhETh......... 0 52 52 52 +7468 PF07640 QPP QPP Studholme D anon Blast clustering of Pirellula proteome Motif These Rhodopirellula baltica proteins share a highly conserved sequence, centred around an invariant QPP motif, at their N termini. This motif may represent an export signal. 25.00 25.00 57.60 57.60 17.20 16.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -7.87 0.72 -4.28 9 11 2009-01-15 18:05:59 2004-03-16 18:04:46 6 4 2 0 9 11 0 36.00 49 13.76 CHANGED MASsQPPLTRCGShNQQPspshpssttspph.s.ss MASsQPPLTRCGShNQQPspuhpssthspph.shs.. 0 9 9 9 +7470 PF07642 DUF1597 Outer membrane protein family (DUF1597) Studholme DJ, Bateman A anon Blast clustering of Pirellula proteome Family This family of proteins are likely to be outer membrane beta barrel proteins. Possibly acting as porins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.28 0.70 -5.42 89 396 2012-10-03 17:14:37 2004-03-16 18:17:17 6 1 220 0 175 591 194 345.70 18 84.83 CHANGED slpluGhl-shaphshsps.tst.....................hssps..sphsls.s.lhhptsssp......hshthslhaGstut.s.hs................................tsthhhsl.pAYssht....hscslslssGpasThlGaEsh.shsNhsao+uhhhp.s.PahcoGl+ssYssss.phshhhulhsG.....h................p.h...........ssss....t.shssplsa..tsstphslshshhhGsps.....t............................spssphh...hslsssaplss........phplshshsashtptt......................sssssssaaGssh...................................ahp..Yshs-...phuluhRsEaapsps...............................shsssssspshuhTlsssap................tsslhlRsEhRhs......p....stst........h..Fhps.ssps.......................spsthshssshtF ...........................................................................t.hphtGhlpshh.hs..sp...tst...................................hssps.........sphplst..h.lhhppssst..................hshhhphhhGstuths.hh...................................psththsl.pAalpht.....ht.p.s..lslphGphhohhGh.Esh...s...hs...N.h.a...o...t...u....hh.....hph..t.P..ah..psGlhs.sa..t.hss....phsh..hhulssG...............h.....................................................................psh.........................psss.............t.shhstl.sa...t.ssph..s..lt..hsshh..Gsps.....st.....................................................ststphh........hslhh.saph...s...c..............................p.hslsh..phs.ahtppt.............................s.stsss.haGssh...................................................................ahp........Ys..hsc.......phsls.......h...Rs.Ehapcsst.............................................................hsh.h.ss.ssss...h.h..shT.lsssap.........................................hsshhlRsEhRhc............p.......utss........s....hs.s.sst.........................sthhhshshhh.F................................................................................... 0 76 146 161 +7471 PF07643 DUF1598 Protein of unknown function (DUF1598) Studholme DJ anon Blast clustering of Pirellula proteome Family A family of Rhodopirellula baltica hypothetical proteins of about 500 amino acids in length. 25.00 25.00 34.60 77.50 24.30 19.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.80 0.72 -3.95 9 18 2009-01-15 18:05:59 2004-03-17 09:12:34 6 1 9 0 8 18 20 83.70 38 16.70 CHANGED CSIDPssEGLt+hppalpplsst....uthsstt...hhpshcpsLGhQ-lpVhGlssso+aARlLVEADY+MKRIuhGh-psslt.l.SY CSIDPs.EGLp+hppalpplssp....sshsstt...hhssLcpsLGhQ-lpVhGlssco+hARlLVEADY+MKhIuhGl-psslt.lsSY. 0 8 8 8 +7472 PF07644 PGAMP Planctomycete PGAMP Studholme DJ anon Blast clustering of Pirellula proteome Family This conserved sequence is centred around an invariant motif of PGAMP in several short hypothetical proteins from the planctomycete Rhodopirellula baltica. The motif also occurs twice in Swiss Q7UVK9. 20.30 20.30 43.70 43.40 19.90 18.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.85 0.72 -4.21 4 8 2009-01-15 18:05:59 2004-03-17 09:31:05 6 3 2 0 5 8 0 34.80 52 15.43 CHANGED SlplPGAMP.AohphAhGQpsRhtpuQpppSphus .SlplPGAMP.AoMphAVGQpsRhtKuQtQpSphus 0 5 5 5 +7473 PF07645 EGF_CA Calcium-binding EGF domain Bateman A anon Pfam-B_330 (release 10.0) Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.08 0.72 -3.96 18 24503 2012-10-03 09:47:55 2004-03-17 16:01:14 10 2919 264 49 14019 30190 160 41.60 36 11.43 CHANGED DlDECss.ss.psC...ttsshChNshGSFpCh....C.sGap......stssspsC ..................DlDE.C......t.........p........s........s.....t................C.............t.t...u..pCh...N...o.........G..S...a....p......Cp.....................C.....s..G.ap...............tt...................................................... 0 4262 5341 9307 +7474 PF07646 Kelch_2 Kelch motif Finn RD anon Context Domains Repeat The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase [1] for which a structure has been solved [2]. The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.22 0.73 -8.44 0.73 -4.00 42 698 2012-10-05 17:30:42 2004-03-17 16:09:08 10 211 235 0 334 6336 157 51.00 22 11.00 CHANGED sphstsss.sh.......ssclhlhGGh...................s....phhhhcsppppWpphsshs ...................pssauss..sh............ss+l.hlaGG..h................................................................ttt.t.s..p...h..........s.-..l...h...hhD..h..pphpWpphp........................ 0 102 166 239 +7475 PF07647 SAM_2 SAM domain (Sterile alpha motif) Finn RD anon Manual Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.10 0.72 -4.13 49 3625 2012-10-02 20:42:54 2004-03-17 16:20:05 12 358 360 47 2049 7757 90 64.90 20 8.61 CHANGED sphhshpslspWLp...sl..sh..tpYp-tFpppslss.phl..hphstccL...t.clGlpphscpp+llpplpphp ...................................hs.pplt.....p.W.Lp...........sl.........sl.....p....pYs.s......t....h.......p........p...p.......s.lsu.ph.l.........hph.s..p.p...p...L.................p..plu..l..ss.h.tp..Rp+lhpthpth............................................. 0 524 798 1345 +7476 PF07648 Kazal_2 Kazal-type serine protease inhibitor domain Finn RD anon Manual Domain Usually indicative of serine protease inhibitors. However, kazal-like domains are also seen in the extracellular part of agrins, which are not known to be protease inhibitors. Kazal domains often occur in tandem arrays. Small alpha+beta fold containing three disulphides. 20.30 11.00 20.30 11.00 20.20 10.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.48 0.72 -4.02 73 4760 2012-10-02 00:52:43 2004-03-17 16:59:37 10 287 248 41 2600 6229 320 42.20 27 13.16 CHANGED Cspppt..P..VC......GsDGp...oYss..tCthpstshpt...........ptp..hphhppss.C ................Cspt.....P.....VC..............G..s.D...Gp...........TYs...s.......tCth..pp.ts.stt.....................ttp.....hph.hh.s.C................................ 0 733 989 1694 +7477 PF07649 C1_3 C1-like domain Finn RD anon Context Domains Domain This short domain is rich in cysteines and histidines. The pattern of conservation is similar to that found in Pfam:PF00130. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.89 0.72 -3.91 194 800 2012-10-02 13:15:50 2004-03-17 17:46:33 7 64 28 1 533 923 2 29.60 35 11.59 CHANGED htCps.Cshshtt....shYt..C..h.pC.cFhl.HpsCs ......hCss..Cs..hshpt.........hYt....C..h.pC.cF.hl.HppCs. 0 367 463 489 +7478 PF07650 KH_2 KH domain Finn RD anon Context_Domains Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.58 0.72 -4.42 165 9989 2012-10-02 00:34:43 2004-03-17 17:57:18 12 35 5164 213 2428 7426 3812 76.60 27 28.69 CHANGED cl.phlptpl.p.....tss.......huplpI..........ptshlsl+suc.GllIG+pGpplcplppplccpht.h.spp..............Vtlplh..cVccs.t ...........................................cl.phls..hcl...p............tss.............hupl.cI................ptshls....+..su+.Gl..VIGK..pGp..c...lcc..lt...pphcc.h.......spp..........................Vplplh..cV+ps............................................................. 0 805 1574 2054 +7479 PF07652 Flavi_DEAD Flavivirus DEAD domain Finn RD anon Pfam-B_199 (release 3.0) Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.84 0.71 -4.45 19 6300 2012-10-05 12:31:09 2004-03-18 10:02:46 9 76 281 64 1 7229 89 121.90 49 4.66 CHANGED h+KtplTlLDhHPGAGKTR+lLPpll+cslc+RLRTllLAPTRVVhuEMtEAL+GhslRapTsAl.sp+sGspIVDlMCHATaTpRhLp.Ps+hsNaplhIMDEAHFhDPuSIAARGalsopschscsAslaMTATPPGos-PFPcSNu .......................................................................h......................................................................................................................................................................................................................sa.u...T.as..h.h...L...hs.....s.....s.....p..s..s.s.YslIIhDEsH....h..TDss.....S..I..huh.G.h.l...s..p..s.E.h.u.t..A.t.s.l.h.h..TAT.PPGSs.shPpup........................................ 0 1 1 1 +7480 PF07651 ANTH ANTH domain Finn RD, Bateman A, McMahon H anon [1] Domain AP180 is an endocytotic accessory proteins that has been implicated in the formation of clathrin-coated pits.\ The domain is involved in phosphatidylinositol 4,5-bisphosphate binding and is a universal adaptor for nucleation of clathrin coats [1,2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.80 0.70 -5.70 29 1195 2012-10-02 18:21:09 2004-03-18 10:47:20 11 20 303 13 707 1532 5 243.60 25 36.38 CHANGED .sslphulhKATsp.p-sPsKc+asctIlhhops....tphsthhtslscRlspopsWslshKsLlllH+lLp-Gc.shhp-hhptppphsphtphps................shsaushlRpYstaLpc+lphatpttht...........hphttpt....................tp..tt........phsh.pll-pl.plQphl.phlphp.pssshp..spllltAhh.llp-shtlYptlschlhsLlp....phh-..hth.pscpshslhc+htpQhccLppFaphs+slthhps...IPpl.chssshlpsh-phhpps ............................t..ththslhKATst....ss.Kp.+ah.p.......l..h...h..h.s........pt..........h......p..h.......s....thh..sl.hcR..h.......s..t..s.....s....hl.........lsaKsLlshH+lh.p.c.......G....p...........p..hh.p..........ph.h...p..t.p.s..h..h..p...t..hst.....hsp......................................shshu..s...hlR....pYspaL..pp+.htha...pphsh...........................hph.cpt..................................................................tpss.t............phsh..pLLc.ph.hlQ.............p..l.s.hlp.h...p...t........ss.....ths....stsh.suhh.....l.hp-shplathhschlhsLh..p.............................phh-..h...................scphhtl.hc.+ahpphpcltc.Fhph..scp.l.t.....h.......pt....lPpL......p.....sPsshlps.-ph....t....................................................................................................................................... 0 180 352 538 +7481 PF07653 SH3_2 Variant SH3 domain Finn RD anon Manual Domain SH3 (Src homology 3) domains are often indicative of a protein involved in signal transduction related to cytoskeletal organisation. First described in the Src cytoplasmic tyrosine kinase Swiss:P12931. The structure is a partly opened beta barrel. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.60 0.72 -4.47 35 4617 2012-10-02 18:48:24 2004-03-18 11:11:45 12 450 402 69 2334 20355 74 59.20 23 7.44 CHANGED hhpshpcass..s.ssss.................LshpcG-...llplh..........................tp..css.........sa.ahs.pp..s.......Gct......GhlPpshlp.h ..................................hpshhcass......p..ssst....................Lshp.c.G.D...............ll..p.lh...........................................pp...pss.............sa..WpG..ph.....s...................................G...ct.................G..h..hPsshlp...................................... 0 522 751 1381 +7482 PF07654 C1-set Immunoglobulin C1-set domain Bateman A anon Bateman A Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.91 0.72 -3.98 128 16594 2012-10-03 02:52:13 2004-03-18 16:21:17 10 70 510 2328 2190 24856 1 82.80 38 31.24 CHANGED s..pss.ph...sptshLsChVssFaP..sslpVpW.h+NGpplspsspsst.h..s....sDhoaphhotLphs...P.pps-..tYoCpVpHtuLp..psht ...............................................scpsoLtChs.h.G..F.Y.P....sc..I.s.l..oW............+.....s......G......c.........-......t.....o......p..c....s...p..h.....s..p......shs....s.............GDs...T..F..Q...p..hut..l.sls............s..sc..pp...pY.o...CcV..pHpuLs.pPl.s................................... 0 174 356 861 +7483 PF00008 EGF EGF-like domain Bateman A, Sonnhammer ELL anon Swissprot_feature_table Domain There is no clear separation between noise and signal. Pfam:PF00053 is very similar, but has 8 instead of 6 conserved cysteines. Includes some cytokine receptors. The EGF domain misses the N-terminus regions of the Ca2+ binding EGF domains (this is the main reason of discrepancy between swiss-prot domain start/end and Pfam). The family is hard to model due to many similar but different sub-types of EGF domains. Pfam certainly misses a number of EGF domains. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.34 0.72 -3.96 71 24995 2012-10-03 09:47:55 2004-03-18 17:40:01 22 2546 284 90 15241 25121 202 32.00 40 10.56 CHANGED Cspp....sCpst..GpChps.........ssapCpCs.G.....asGpp ................Ctss........PC.pNG....G..s..C...h..cts.......................ss..ap.CpC.ssG.......asG................... 1 6681 7484 11311 +7484 PF07655 Secretin_N_2 Secretin N-terminal domain Yeats C anon Yeats C Domain This is a short domain found in bacterial type II/III secretory system proteins. The architecture of these proteins suggest that this family may be functionally analogous to Pfam:PF03958. 22.00 22.00 22.20 22.00 21.90 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.12 0.72 -3.60 60 476 2009-01-15 18:05:59 2004-03-19 11:19:25 8 5 427 0 107 379 37 91.00 28 16.70 CHANGED TcTFslsYLthcRtGtSpToVsSGslSssss...sssssssssss.................sssssususSu....sussGopIpopscoDFWs-LcpslpullG.susGRtVlssPQA .....................TcTaslsalphpp...pSto..olso......Gshossss........s................................................ssuusususou......susuusphpssscsDhasslcpslpshL..sstGphslsssp........................... 0 24 71 88 +7485 PF07443 HARP HepA-related protein (HARP) Vella Briffa B anon Pfam-B_21761 (release 10.0) Family This family represents a conserved region approximately 60 residues long within eukaryotic HepA-related protein (HARP). This exhibits single-stranded DNA-dependent ATPase activity, and is ubiquitously expressed in human and mouse tissues [1]. Family members may contain more than one copy of this region. 25.00 25.00 26.20 27.90 23.30 22.10 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.63 0.72 -4.59 15 154 2009-01-15 18:05:59 2004-04-01 10:52:49 8 9 78 0 90 142 0 54.50 44 11.43 CHANGED FpVcl.GYppcLIAlFKslPS+sYDssT+pWsFuLcDYptLMcplpcL.spVsLcPL .............FpVcl.GYst-LIulFKphPS+sY..Dsp..T+pWsFpLpDYstL.h..ptspsL..spVpLpPL............ 0 18 25 53 +7487 PF07657 MNNL N terminus of Notch ligand Liu XH, He QY, Studholme DJ anon Liu XH Family This entry represents a region of conserved sequence at the N terminus of several Notch ligand proteins. 22.50 22.50 23.60 22.70 21.80 21.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -9.86 0.72 -4.05 24 383 2009-01-15 18:05:59 2004-04-06 17:22:07 8 106 95 0 183 325 1 75.10 44 9.34 CHANGED uSGhFELclpsapNtpG...tsGpCCsGs.......stthshspCcTaFRVCLKHYQuplsssu...PCTaGsssTPVLGuNohslp ...........sSGhFELplp.phpNhpG.L.tsGpCCsus............ttt..s..stCcTaFRVCLK........cY.Q.u.p...Vospu..........................PCoaGsusTPVLGsNoFsl......... 0 35 54 113 +7489 PF07659 DUF1599 Domain of Unknown Function (DUF1599) Yeats C anon Yeats C Family \N 25.00 25.00 26.60 28.10 23.70 23.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -9.02 0.72 -3.91 14 429 2009-01-15 18:05:59 2004-04-07 15:17:06 6 2 231 0 116 393 387 61.80 51 66.07 CHANGED Kh+DYGsA....WRlhRlsSlTDQIaIKApRIRslpp.pupohVsEGIcuEaIulINYulhuLIQL .......KsHDYGpA....W....R.........h....R...lo....SlTD.IhhKhpRl+pIE.....s..pGp.olV...s.E.GIcupahsllNYulhuLIpL..... 0 49 100 112 +7490 PF07660 STN Secretin and TonB N terminus short domain Yeats C anon Yeats C Family This is a short domain found at the N-terminus of the Secretins of the bacterial type II/III secretory system as well as the TonB-dependent receptor proteins. These proteins are involved in TonB-dependent active uptake of selective substrates. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.20 0.72 -4.40 154 4169 2009-01-15 18:05:59 2004-04-07 17:23:43 9 51 1462 20 1089 3747 285 50.20 21 6.20 CHANGED hslhhs.t..ptlps....ppsul...p.ss.h......slppuLpplLpssslphph.ssspl.hlttp ......................................phhhs.....stlpu...........tssul.......phss.h......shcpuLpplL.pu..s.GLshph..psshl.hlt............... 0 353 719 943 +7491 PF07661 MORN_2 MORN repeat variant Yeats C anon Yeats C Repeat This family represents an apparent variant of the Pfam:PF02493 repeat (personal obs:C Yeats). 24.40 9.20 24.50 9.20 24.30 9.10 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.95 0.75 -7.19 0.75 -3.26 354 7333 2012-10-01 20:24:03 2004-04-08 11:38:36 8 45 575 0 1184 6471 5210 21.70 28 29.73 CHANGED hpGhhptYapsGplppct.pacs .............pG.hppYa.c.s.Gplppct.pYps............ 0 627 928 1065 +7492 PF07662 Nucleos_tra2_C Na+ dependent nucleoside transporter C-terminus Bashton M, Bateman A, Yeats C anon Yeats C Family This family consists of nucleoside transport proteins. Swiss:Q62773 is a purine-specific Na+-nucleoside cotransporter localised to the bile canalicular membrane [1]. Swiss:Q62674 is a a Na+-dependent nucleoside transporter selective for pyrimidine nucleosides and adenosine it also transports the anti-viral nucleoside analogues AZT and ddC [2]. This alignment covers the C-terminus of this family of transporters. 25.30 25.30 27.20 27.10 25.10 24.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.29 0.70 -4.86 161 4136 2009-01-15 18:05:59 2004-04-08 14:27:54 8 11 1902 1 688 2281 695 208.40 39 50.08 CHANGED aLluAShMuAPuuLlhAKllhPp.....ocp.sp.st........tpt..............................sNll-AhupGAhsGhplAlsVuAMLluFlALlAllNulL.uhl...G.........h............Gh............l............oLphILGalFuPlAallGV.P.a.sEshhAGullGpKlllNEFVAahsLs..phhss...............LSt+otsIloaALCGFANhoSluIhlGuluuls.PpR+sslAc.hGl+AlluGoLuslhSAsIAGlhl ...........................................................................YlluAsl..hsh.uulllupllsPhs.psppsh..........................................thtp..........cp............cspshh-hhup.ushsGh.........KlAlsVuAMLluFlAL.IAhlNull.u.sl.......sshh............Gh..............l..........................ohpt.....IlGYlFuPlA......alhG........l........P....h........s.......-A.hhuGolhupKLlhNEFVAhhsls..phhss...................................LSs+otuIlohhLsuFANFSSIGIllGulpu.ls.sc+s.st.lu+.hGl+hlhuuTLsslhSAsIAGlhl.......................... 0 182 343 518 +7493 PF07663 EIIBC-GUT_C Sorbitol phosphotransferase enzyme II C-terminus TIGRFAMs, Griffiths-Jones SR, Yeats C anon Yeats C Family \N 25.00 25.00 41.50 41.50 18.90 18.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.03 0.72 -3.86 9 882 2009-01-15 18:05:59 2004-04-08 14:30:57 6 5 806 0 77 379 42 92.40 78 28.81 CHANGED LSPlLGPGAVIAQVIGV.LIGsQIGtGsIPPphALPALFAINsQsGCDFVPVGLuLuEAKPETVclGVPAVLhSRhlTGsluVlIAWhhShhla .......LSPhLGPGAVIAQVIGV.LIGVQIGhGNIPPpLALPALFAINAQAACDFIPVGLSLAEA+t-TVcVGVPSVLhSRFLTGsPsVLIAWhsShhlY............ 0 24 39 62 +7494 PF07664 FeoB_C Ferrous iron transport protein B C terminus Bateman A, Yeats C anon Yeats C Family Escherichia coli has an iron(II) transport system (feo) which may make an important contribution to the iron supply of the cell under anaerobic conditions [1]. FeoB has been identified as part of this transport system. FeoB is a large 700-800 amino acid integral membrane protein. The N-terminus has been previously erroneously described as being ATP-binding [1]. Recent work shows that it is similar to eukaryotic G-proteins and that it is a GTPase [2]. 26.40 26.40 26.70 26.70 25.60 26.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.74 0.72 -4.54 217 3526 2009-01-15 18:05:59 2004-04-08 14:37:58 7 16 2806 0 775 2731 228 54.20 34 7.91 CHANGED llhulY.llGllsullsuhllp..pplhpup.sss...FlhELPsYRhPph.+slhhpsWp+ .......slhulY.llG..I.lh.All.suhlL+....pT.l...h+G..c....sss....FlMELPsY+lP....pl.+slhhpsWp+............. 0 304 565 683 +7496 PF07666 MpPF26 M penetrans paralogue family 26 Studholme DJ anon [1] Family These proteins include those ascribed to M penetrans paralogue family 26 in [1]. 21.30 21.30 21.80 21.40 21.10 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.85 0.71 -4.25 11 22 2009-01-15 18:05:59 2004-04-13 10:35:31 6 1 12 0 13 21 3 104.00 25 67.62 CHANGED hlphl+pl++luhhhllhahlssh.....llhlsllsalshhhp.p.hsp......s...h.hhhsllhlhlssl...hhIsphllpIlLhlKlsthKscpscapchphahhLhIlGIhl.slhulIssFhLs.KhpK ....................................................................t.hslGlL.Ihhssl...hhlhthILsIlhhlhsuphppcp.phtc..hphlLsIIGIll.slhslIsslhL............ 0 11 12 12 +7497 PF07667 DUF1600 Protein of unknown function (DUF1600) Studholme DJ anon BLAST clustering of M. penetrans proteome Family These proteins appear to be specific to Mycoplasma species. 25.00 25.00 48.60 48.60 22.70 22.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.78 0.71 -3.98 12 25 2009-01-15 18:05:59 2004-04-13 12:35:20 6 1 9 0 12 21 0 116.40 30 43.30 CHANGED thhSslahHlhtPhh.Flhhhalhhshs.hhshpphhKhLlhshIYPhlYs.lYlhslPal...ahhtss..ssYoVYuhhTssp.s.hh...................uhslhhshhhlahslShlh.lhhphhthpchh .....t.lhuslalHllsPls.FllsGalhhsas.tpsh+chtKaLlhshIYPhIYu.lYlholPal.................ahhssG...ssYSVYGhhTNsppNshh...................AhsllhshhFlaaPlSahl.lhhpahthpc+........ 0 10 10 10 +7498 PF07668 MpPF1 M penetrans paralogue family 1 Studholme DJ anon [1] Family This family of paralogous proteins identified in Mycoplasma penetrans includes homologues of p35 [1]. 20.30 20.30 20.30 20.30 20.20 19.80 hmmbuild --amino -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.21 0.70 -5.26 30 71 2009-01-15 18:05:59 2004-04-13 12:59:48 6 3 5 0 55 69 0 290.00 34 77.48 CHANGED TPcLKosVsLuGuLocIYDosss.....csTNsLIAc-IKsN.-sh..FsNGs-...hcslpshslTVcGsF....osSoW..sGpsYs.stps....W.........sshssssKllYsosusQlsIuSLsDLKopLsc.....psplKphLcsAssohss...sosaoV..pNpLGhT........s..sDLLHVNVpusp.......sssspNaDLQIPVSslNLplosLsloV......oGsNltsssctTTsFsYNIGI.cssssaspssssssssp.....s-sssusclLpcLGYss.t....................sssssLsNDplupuLGlYNspFo........t.tssssssssssspsYTlTLsAoP..spsYVW.DDG.oossKslSFsVslsls ..........TPplK.sslsLsGuLocIYDosss............csTN..pLIuc.-IKsN..psh..FsNGpc...hpslpshsloVsGsF....spS..sW.sG..t...s.Ys.s....W..............shsshsplhYsssusQls....IsSLsDLK.opLsp......tsplpphlptu.shshss...sssapl..p..NplGhT..........ssDLlHVNVtssp..........sssshshDLQIPsSslNLpls..sLploV......sGsN..ltt..s..sphoTsasaNIGI.csss...paspsss...tss.hsp.......sptssssplLtpLGass..t..........................ssshsLss-tlutuLGlYNspFo.............ssssspsssttYslolpAoP....spsYlW.-DG..osssK.ploFssplp..s................... 1 55 55 55 +7499 PF07669 Eco57I Eco57I restriction-modification methylase Studholme DJ anon BLAST Domain Homologues of the Escherichia coli Eco57I restriction-modification methylase are found in several phylogenetically diverse bacteria. The structure of TaqI has been solved [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.53 0.72 -3.68 29 913 2012-10-10 17:06:42 2004-04-13 13:56:19 6 28 738 0 199 3032 536 109.80 25 13.17 CHANGED MK...FDVlIGNPPYQhss...tst..tss..............spPIYphFl-hAhclt...s+alshIhP.uRWh.tuGcshcpaRpphLpDp+lpplh.a.supclFssssIcGGlshhhh-.psp ............................................................................h....FDhlIGNPPYhct..p...p.......t..t...h.t.....htp................................................................tssslY...t.....h..F.h.-....t.....u.h..p..lhp........sGh.l.s.a....I..s..P...spah...pup...h..s...c.p.h.Rp.....h.l.l.p.c........s.p..l....pp.l.hp.h.s.s.........t.....p..........l.F...p...s....s..s..l...psslhhh.+t...t............................................................. 0 84 151 182 +7500 PF07670 Gate Nucleoside recognition Yeats C anon Yeats C Domain This region in the nucleoside transporter proteins are responsible for determining nucleoside specificity in the human CNT1 and CNT2 proteins (e.g Swiss:O00337) [1]. In the FeoB proteins (e.g. Swiss:O25396), which are believed to be Fe2+ transporters, it includes the membrane pore region, so the function of this region is likely to be more general than just nucleoside specificity [2]. This family may represent the pore and gate, with a wide potential range of specificity. Hence its name 'Gate'. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.36 0.72 -4.15 238 13008 2009-01-15 18:05:59 2004-04-13 14:15:06 9 26 3306 1 2612 8940 1060 117.90 18 34.76 CHANGED hhphhshllhhhhllslLpph................................................Ghhshluphlssl.hp.l.Gh...sspssls.hlss....hhstpsulshhtphh..............................................................................t...hstpct.hshhh...hshh.hs..........shutl.s..shh...thhuhh ...........................................................................................................h.hphhshllhhhh...llslLpph..............................................................................................................................................................................................Ghhshlutlh.ssl.h.p.l..Gl......ssp..ss....ls...hlsu....h.h.s.s...t.s.s.ls.s.s.t.plh............................................................................................t.....hs...t...p...p...t...h...s...h...hh.......h.shh....hs.........shuhhs....hhh......h........................................................................................................................ 0 936 1758 2209 +7501 PF07671 DUF1601 Protein of unknown function (DUF1601) Yeats C anon Yeats C Family This repeat is found in a small number of proteins and is apparently limited to Coxiella and related species. 19.00 4.00 19.10 4.00 17.90 3.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.05 0.72 -7.52 0.72 -4.31 28 131 2009-09-12 22:43:26 2004-04-13 16:55:19 6 24 19 0 48 143 226 32.00 31 18.14 CHANGED MGh+WpcLctQcLosRLLsAl+.....+Ns-pFNsQsIANTL ............................................LhpAlt..........ppspcFNsQpIANsL.... 0 32 43 48 +7502 PF07672 MFS_Mycoplasma MFS_Mycoplamsa; Mycoplasma MFS transporter Studholme DJ anon BLAST clustering of Mycoplasma proteome Family These proteins share some similarity with members of the Major Facilitator Superfamily (MFS). 20.80 20.80 20.90 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.03 0.70 -5.22 7 58 2012-10-03 03:33:39 2004-04-13 18:09:49 8 1 45 0 19 92 2 252.00 30 55.53 CHANGED p+pKohlSshNhaGFNlGhhlshs.Flhs.slppsuops..WhhIhoshILllhshLllahhFtpch..hhsp..KQoK.p.shssc......sohhslLKpKpTaKhhshaGlhLlslV.shTsshhN.l.ls.SP...hsl.............................suGhhhhshshhhIhaVhGahhGhhsloPFNKThacRK+althhhshshlhlllhllh.uhhlGhssshGhsh......hhIhoFluGsFhWulQushLhlPaEhKthp.scVulhFGhlWGhGYlhYThhDIhhSshhp .................................+pKullSphs.ahaslGsllsllP.F..lhs...ps..s..p..thpss..Wphlhslhu.LLshIPLllahlh..GpcFDhhtsp....ppst....p.t.ts...........hslhshLKpKsTatWlllYGuWL...llsV.a..sho.s..hh...p.h...thSs....................................ssphs.thhpha..Ih.Fl.h.uhhlG..hslGhas+hphcR+halsh....hhshGllhh...ll...uhlh....sh.thGhs...ps.uhth....................hhIhuF..LsGhhh....WGIQuVhL.lPHEYKss.sPpplGhhFulIWGhGYhhaTlshIllSsh.................................... 1 11 16 17 +7503 PF07673 DUF1602 Protein of unknown function (DUF1602) Studholme DJ anon Clustering of A. pernix proteome Family \N 24.20 24.20 25.10 24.70 24.10 24.10 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.70 0.72 -4.49 43 115 2009-01-15 18:05:59 2004-04-13 18:16:47 9 3 81 0 21 116 7218 37.50 42 17.33 CHANGED GSStsphtG.hssARAhATRCCpPP-SssGhhhut.u...spP ......GSStshhtGthspARAhATRCphPP-sshGhhhtt.u..pP............ 0 5 12 17 +7505 PF07675 Cleaved_Adhesin Cleaved Adhesin Domain Yeats C, Collyer C anon Yeats C Domain This is a family of bacterial protein modules thought to function in various roles including cell adhesion, cell lysis and carbohydrate binding [1]. The beta-sandwich jelly-roll topology of these modules is known as the galactose-binding domain-like superfamily, clan CL0202. A tandem repeat of these modules (either two or three repeats) constitute the haemagglutinin/adhesin (HA) regions of the gingipains, RgpA, Swiss:Q51816 and Kgp, Swiss:P72194 and Swiss:P72197 [3] expressed by Porphyromonas gingivalis (Bacteroides gingivalis) [2]. They form components of the major extracellular virulence complex RgpA-Kgp - a mixture of proteinases and adhesin domains [4]. The adhesin domains in this complex are found in proteinase-cleaved forms when isolated from the cell surface [5]. Haemagglutinin genes of P. gingivalis [6] (hagA1 HAGA1_PORGI - Swiss:P59915 - and hagA2 HAGA2_PORGI - Swiss:Q51845) suggest that such proteins are composed of eight to ten tandem repeats of these adhesin modules [1]. Genomic data predicts that homologous protein modules are also expressed by a number of other bacteria and form part of putative multi-domain proteins, eg. Swiss:Q26BR9 and Swiss:B0VGL6. These domains may be acting in concert with other adhesion modules thought to be part of these multi-domain proteins such as fibronectin type III, Pfam:PF00041, and Meprin, A5, mu (MAM), Pfam:PF00629, domains. 20.50 7.00 20.80 7.10 20.10 6.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.25 0.71 -4.66 9 203 2012-10-03 19:46:52 2004-04-14 13:37:02 6 31 36 6 40 218 69 157.90 28 23.51 CHANGED stlL.EuFEsG.lPsuWpsIDADGDGtsWtphsssh.......hsGHsustCshStSa.lshhGs...LTPDNYLITPcl....pGut+lpYWVssQD.ssasuEHYAVhsSoTGssAuDFs.lLaEET....hTuKs.............pGsWhpRTlsLPAGTK...YlAFRHasCTDha..alhLDDVsl .................................................................h....Es.p..p.s...h.s.stWphI.......Duc.u........D..u.p.sWhhhs..s.s...........................hsu+su.st.s.sh.S.h....u...h...s.h..tt.......hs.s.DN..aLI..oPpl.....sG.s.p..p...lpaass...s...ps....t..s..a.s...s...EcY....s...VhhS.s..T...G..s........ss..usFs...hlh--s....hssts..............tusWhppolsL...P.sGT+....YlA.aR..H..h.s.s..o..D....h...h...h.l.hlDDlp..................................................................... 0 29 40 40 +7506 PF07676 PD40 WD40-like Beta Propeller Repeat Yeats C, Mistry J, Adindla S anon Yeats C Repeat This family appears to be related to the Pfam:PF00400 repeat This This repeat corresponds to the RIVW repeat identified in cell surface proteins [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. 20.60 10.90 20.60 10.90 20.50 10.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.99 0.72 -4.23 127 15082 2012-10-05 17:30:43 2004-04-15 11:18:12 7 349 2793 58 5424 14558 9482 34.90 24 15.47 CHANGED hpplssssu............stsPsaSPDGpplhFsSscs......Gs.p.....lah .................................................h........t...............................ptsP.saSP.DGppls....aso.p.ps..........u.................................... 3 2058 3763 4721 +7507 PF07677 A2M_recep A2M2; A-macroglobulin receptor Studholme DJ anon Prosite Domain This family includes the receptor domain region of the alpha-2-macroglobulin family. 21.70 21.70 22.00 21.70 21.30 21.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.86 0.72 -3.80 92 1246 2009-01-15 18:05:59 2004-04-16 09:26:22 9 72 186 60 533 1171 2 88.00 30 6.72 CHANGED osMsll-lslhSGFtsspssLcpLppstphh....lp+hEh.....psspVllYh-plsppp.hClsFphpp.phtVuth.pPAsVpVYDYYps....sppssphY .................................SsMull-lslhSGFhsspsslp.pLp..p.s.sphh........+hEh......psspVllYh....-...pls.pp.p............pslsh.pspp.phtVut.h..pPAsVplYDYYp.s....pcpssthY............. 1 90 147 332 +7508 PF07678 A2M_comp A2M3; A-macroglobulin complement component Studholme DJ, Sammut SJ anon Prosite Domain This family includes the complement components region of the alpha-2-macroglobulin family. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.86 0.70 -5.14 43 1721 2012-10-03 02:33:51 2004-04-16 09:40:48 9 109 288 67 626 1697 23 213.90 31 19.09 CHANGED pGYp..RpLsYK.+sDG..SYSsF......pst....suSTWLTAFVlKsFu............pA+pa..IhlDppplppulpWLlpp.Qpss.GsFppsu.lhpptMpG..Gl.........-scloLTAalsIALhEst...........hsps..............................psuls+AhthL.cst......ssshspsYshAlsAYAhsLss.cpsptpphhppLcppAhpc............................sss...haWppsppsp..sptt..................ssuhpVEhTAYsLLs.hlst...........p-lshAptIlpWLspQpNstGGFpSTQDTVVALpALocYut .......................................................................................................GYp..ppLpa+....+t..D.........G...Sausa.....tpt................suSs....W..LTAaVh.+.s.ht............tApph....h..l...-....phl.ppuhpWLhpp..Qp.s.s..G.pF.p.....-...sG...l.h.pp..sh..pG..Gh.......................................ppsluLTua...lllALhE.st....................hsph.....................................psslp+u.hpaL.ppp.........htph.pssYs.h....Al...su.YA..htLs...pp.......tt....pthh.p.pLt..th.u..hpp............................ss........haWtpst................................hplE.hTuYsL..Lu.hl.t...........pcht.tu.....lhpWLspQp.p...G..GFs..S.....T.Q...D..T..hluLpALspa..h.................................................................................................................................................... 0 122 188 400 +7509 PF07679 I-set Immunoglobulin I-set domain Bateman A anon Bateman A Domain \N 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.92 0.72 -4.13 48 59728 2012-10-03 02:52:13 2004-04-20 17:44:00 11 3030 1617 375 30033 71682 78 88.50 21 29.33 CHANGED PpFsph..pshplptGpssphpCplpGs.PsPplsWh+s..sp.lps............spchplptps......sptoLpIpssphsDsGpYsCpApNp.sGpspssspLpV .................................................................................ps..h..p...s....h....t....G...p.....s...s...p...l...p....C........p.......s.......p..........G...........p.......P.....t...........P..........p.........l........p.......W..h..+s.........s.p........l..p.s.....................................................s.p...c...h..p....l..p.t.ps........................t..p.L...p...I....p.....s......s......p......h......p......D.......s.......G...p...Y...p.C..h....A....p....Nt....h..G...p..s...p.t.p.h.tl................................................... 0 6694 9324 18688 +7510 PF07680 DoxA TQO small subunit DoxA Kletzin A, Studholme DJ anon [1] Family Thiosulphate:quinone oxidoreductase (TQO) is one of the early steps in elemental sulphur oxidation. A novel TQO enzyme was purified from the thermo-acidophilic archaeon Acidianus ambivalens and shown to consist of a large subunit (DoxD) and a smaller subunit (DoxA). The DoxD- and DoxA-like two subunits are fused together in a single polypeptide in Swiss:Q8AAF0. 20.70 20.70 20.70 23.90 20.40 20.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.55 0.71 -4.53 9 80 2009-01-15 18:05:59 2004-04-20 18:14:37 6 2 72 0 25 79 2 135.30 31 48.50 CHANGED hhassllGsLhNhSKhPclc.lshhphpsss....LhhplsclsGPDsYsuhlshlplhsusucllLppss......clsphPhsphKs-Ylshhu..........ulhlsLGucAplpLplP..hcLs..GTYplKLassu..Gtssttthpa ...................................hp.tshGsLpNhShtPthp.lus...t.hpsss....LphplaRspGsDsYGuFlltlpLhs.s.s.Gcl.lhphsu.....ppLuplPtssIcNcY.s.s+.hs......sutaulhlPLGucATlplp.hs......hpls.pGs.YpLpLhDlu..Gtsapt....h..... 0 8 17 22 +7511 PF07681 DoxX DoxX Studholme DJ anon BLAST Family These proteins appear to have some sequence similarity with Pfam:PF04173 but their function is unknown [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.74 0.72 -3.56 103 6635 2012-10-02 13:32:46 2004-04-21 11:57:59 7 10 2739 0 1741 5119 2239 87.30 23 51.92 CHANGED ulLlhRlhluhlFl.hpGhpKlh..................shsu..........ssthhs....shsl..................sshhuhhushsElsuulhlllGlhTR....huAhhlsshhlsAh....hhsHt ..................................................hhlhRlhlu....h.l....F....l....htG.h.t.Klh.............................shss.........................................ht.t.hht..................sh.u.l.................................................ss.h...h....s....h...l.s...s....h...sEls.uGl.hll...l.......G.......h.......h..TR.......h.u..A.h....h..hs.h.h.h.l.ssh.....hhh........................................................ 0 475 1065 1446 +7512 PF07682 SOR Sulphur oxygenase reductase Kletzin A, Studholme DJ anon [4] Family The sulphur oxygenase/reductase (SOR) of the thermo-acidophilic archaeon Acidianus ambivalens is an unusual enzyme consisting of 24 identical subunits arranged in a perfectly symmetrical hollow sphere and containing a mononuclear non-heme iron centre (personal communication: A. Kletzin). At 85 degrees C in vitro, elemental sulphur is oxidised to sulphite, thiosulphate and hydrogen sulphide with no external cofactors needed. The proposed equation is: 4S + O2 + 4 H2O ---> 2 HSO3- + 2 H2S + 2 H+. 25.00 25.00 191.60 191.40 22.80 16.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -12.12 0.70 -5.83 3 21 2012-10-02 00:20:33 2004-04-21 13:41:55 6 1 16 25 7 26 2 295.40 54 97.85 CHANGED PKPYVAINMA-V+N-PKThELFupVGPKVCMVTARHPGFVGFQNHVQIGVlPLGsRFGGAKM-MocEhc.............oLcLhQYThWKcWKDHEEMH+QNWupLFRLCluCAoQMVWGPaEPIYEI+YANMPlNTEMTDFTsVVGKKFApGc..sluIPsISQPYGKRVVAFGEHpVKEGhEcQFEEuAIKTLEhF++.APGFLGuMILKEIGVSPlGSFQlsuKGFHQlLESsGul-PcsssTI...YpsPEF+....s+PpcYIVHsEWScs-ALMFGhGRVLlsPElRcVHD.KVLDTLlY.GPYIRVlNPlMEGTaWRE .....hlAlN.sclhNt.pohphh.pVGPKVCMVTApHPGFVGFQNHlQlGllPhGsRYGGAKMDMscE.s.............sltlhQYThWKDWKDHEEMH+QNauhlFRLChSChuphl.GPWEPlYEIlhAsMPhNs-MTDFsuslGccFApGc..PhslPsIS.PYGpRVVAhuEHoVhPG+EKpFE-uIl+TLEhhK+.APGFLGhMlLKcIGVSuIGShQhsscGhHQlL..EosGuh.Pcssssh....aps..PEA+.....sTPppYlVHhEWuss-uhhFGhGRVllpPElRplHD.cVL-TLlh.GPYIRlhNPMMEsThWRE. 0 3 5 7 +7513 PF07683 CobW_C Cobalamin synthesis protein cobW C-terminal domain Bateman A anon Pfam-B_1247 (release 5.4) Domain This is a large and diverse family of putative metal chaperones that can be separated into up to 15 subgroups. In addition to known roles in cobalamin biosynthesis [1] and the activation of the Fe-type nitrile hydratase, this family is also known to be involved in the response to zinc limitation. The CobW subgroup involved in cobalamin synthesis represents only a small sub-fraction of the family [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.18 0.72 -4.27 158 5707 2009-10-26 17:34:02 2004-04-27 12:18:52 9 25 2706 1 1476 4245 1150 96.90 22 27.04 CHANGED lpohshptcpshc.....ppltphlpp.......hs........ts..llRsKGhltls....sp.st.hhhhptlsthhpht.ht..hts.............................ts+psclVhI...Gp..sl-...pptlpptLpssl .....................................................pohshp.t.ctshc.....pph..p.hlpp................hs......................................ss...llRhK.Ghl..h.ls.................sp...s....p...hh...hhp........tstt.h.hphp.....h....s..........hts..............................................................tstpsclVhI.........Gp....sl-....pst....lpptlpt.................................................................................. 0 401 896 1201 +7514 PF07684 NODP NOD; NOD1; NOTCH protein Guo J, Studholme DJ anon Guo J Family NOTCH signalling plays a fundamental role during a great number of developmental processes in multicellular animals [1-2]. NOD and NODP represent a region present in many NOTCH proteins and NOTCH homologs in multiple species such as NOTCH2 and NOTCH3, LIN12, SC1 and TAN1. The role of the NOD and NODP domains remains to be elucidated. 21.90 21.90 27.00 23.80 21.40 20.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.10 0.72 -4.37 28 312 2009-01-15 18:05:59 2004-04-27 13:03:59 7 249 96 8 140 249 0 61.70 36 3.09 CHANGED cltGolVaLElDNRpC...ppsscC.FpsAssAAsaLuAhusp.psLph.saPIpsVpups.psssss ............hGSlVaLEIDNRpC.............psuspC.FpsssssAsaLuAhAup.ssLp...sYPl.sVtucs.ts....s.............. 0 26 38 81 +7515 PF07685 GATase_3 CobB/CobQ-like glutamine amidotransferase domain Bateman A anon [1] Domain \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.64 88 5307 2012-10-03 00:28:14 2004-04-28 09:26:43 9 29 2967 0 1396 9246 3546 155.60 25 37.14 CHANGED ssph.s.ssDhlllPGo+ssht-Lthhc.ppuhccslpphhppGt..llGICGGaQhLGcplt.....ct.....shss...hpGLGlLshpTthpp.pKtlspspspsht..........pshtlpGaEhHhGpohh.sst..hhhh...............tsthc.....ushs...psslhGTYlHGhatsssh ....................................s.....s.ssD.h.lh.l.sG..u....p...s......p...h......p......s......h......t.....h......tc......tp...s......h.p...p...s.l..p...p...t....h...p.....p......G...t......s..llulCG.G.aQ.hL.Gcp..lt...............................sss.u....hp.G.lGlL.s.h.p.T..t..........h...p........s.....p+t..lu.thpsphtt..................................stslpG.......a.......E...hH...h.Gpohhsss.st.shhth...................................................pttsth-.........Gsh..........ss.lhGoYhHGhh.sp.................................................................................................................. 1 449 934 1202 +7516 PF07686 V-set Immunoglobulin V-set domain Bateman A anon Bateman A Domain This domain is found in antibodies as well as neural protein P0 and CTL4 amongst others. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.70 0.71 -4.19 113 19537 2012-10-03 02:52:13 2004-04-28 13:40:02 12 592 276 695 9493 120210 5 107.80 17 38.44 CHANGED s..lpp..sp................................hpst.GpslpLpCphp...........t...tltWh+..pps........spt.phhh.........ht....thttpapsphph.tsss............ttphsLplps.lphsDsGsYhCshhs...............hhhhspsspLpV ....................................................................................................t....................................h.s.t.G.p..s.l.p...LsCphs...........................sth...t.l..t.W..hp......pps................................sp..s.....phlh...............................t..t...t.t....t..t...h....s...s.....p.....h..p..s..R.h.p...h......t.s.ts.....................................tt..s.h..s.L.p.l.ps..l.p.h..p...D.s.G..h.Y...hCth.t..................................................................................... 0 892 1878 3948 +7518 PF07688 KaiA KaiA domain Bateman A anon Bateman A Domain The cyanobacterial clock proteins KaiA and KaiB are proposed as regulators of the circadian rhythm in cyanobacteria. The overall fold of the KaiA monomer is that of a four-helix bundle, which forms a dimer in the known structure [1]. 30.00 30.00 47.90 47.70 27.20 26.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.84 0.70 -5.12 3 66 2009-01-15 18:05:59 2004-04-28 17:16:04 7 1 65 14 22 79 31 249.20 44 92.72 CHANGED LoIClaV.SsAIuQ-LpRlaouDRapLssFpSt--FCsYLEs+R-pIDCLILhaussShp.VlspLaapGsLLPAILltPpsS..AcPs-Psss.lYHsAElHLspcQL-QLPpQVDcAIAcFL+LuPlCTlPsHlhhh.ssLh-uSsps.LlsQQRRLAQKLKERLGYLGVYYKRDPu+FaRNhSPtEKQKLL--LpSpYREIVLSYFSs-usVN-pIDpFVNpAFFADluVSQVLEIHMELMDEFSK+LKLEGRSEDILLDYRLTLIDVIAHLCEMYRRSIPRE ..............................................................................h.hh.s.tl.p.htp.hL.s.p+Y.......l.......hp.s.pp..hhthlptppcplDslll..p......hhppLhppuhllPs.ll.l.t...........................................tsph.......YHssElpl..pplpplth.lDtAIspFLphu..ssthsp...ps................t.pt.........l.hpQpRLup+L+ERLGYLGVYYKR...........sPppFhRsLsssE+pcLLcpLcpsYRcIlLsYFss-tslNptIDpFVNpAFFuDlsVopllEIHMELhDEFSpQLKLEGRS--hLLDYRLsLIDllAHLCEMYRRSIP+.... 0 2 14 20 +7519 PF07689 KaiB KaiB domain Bateman A anon Bateman A Domain The cyanobacterial clock proteins KaiA and KaiB are proposed as regulators of the circadian rhythm in cyanobacteria. Mutations in both proteins have been reported to alter or abolish circadian rhythmicity. KaiB adopts an alpha-beta meander motif and is found to be a dimer [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.60 0.72 -4.51 16 352 2012-10-03 14:45:55 2004-04-28 17:24:08 7 6 163 18 129 572 244 80.00 41 45.47 CHANGED hLaVuscshsptpshpslpplppcphshpapLpllDltcQPpLsEtc+llATPsLlKlhPtPhphlsGs...pphhhhh.ch ....................+LaVuG.possShpAlpsLcp.lh-..p..c.h.t..s...Y.sLcV..I..DlhcpPpLAE.pccIlATPT.L.l.K.l.h.P.sPl+.+.IIGDLSspc+VLhsLc.h........................ 0 31 86 113 +7520 PF07690 MFS_1 Major Facilitator Superfamily Bateman A anon Pfam-B_5 (Release 13.0) Family \N 32.60 32.60 32.60 32.60 32.50 32.50 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.78 0.70 -5.74 195 181668 2012-10-03 03:33:39 2004-04-30 10:30:46 11 489 5747 5 55719 183465 25371 295.20 13 82.52 CHANGED lhhsthhshhshth.............hhshh.......s..hh..............tpp..........................................hs........................................hs.st.....huh....lhsh...hhl..sh...slsthhh.Ghl..s-....+.h.....G..p+p.hhhhuhlhhulu..hhhh.......h.......ss......hh.h......lhhhph...ltGhst.uhhhssshshlsphhst.p.....cputsh..uhhts...shslGs.hlG.shluu....hls.thhu............aphs....Fhlhuhlslls...hl..h..hhh..............................htptstttttttttt...........................................h.hshht.........hthhhpp.........h......................hhhshhhh.....hhshhs....hhhhhsh......h.........tpthuhs.....................hhss..hslsshhshhh...thhhuhls.c+.......hst......thhhhhshhhhhhsshshhhhshs....................hhhhh...........shhl....hGh.uhuhh....hsshhshssphh....s..........pttu.....pshuhhs.hhts.lusslus ................................................................................................................................................................h.....hh..h.h.s....h.................h...s..hh..................s............hh.................................................hp.p...................................................................................h..s..................................................................................h.s.....s.t..............................h..u.h......................hhs.h..................h.hl........sh....................sl...s...t......h.........h.......h...G....h..l..........s..-...............................+..h...................G..............p..+......t.....h........h.....h.....h........u.....h....h...h......hs...ls.........hhhs.......h..............s................ss....................................................hh....h................hh..h...h...th................l.h..G......h..........u.......t........u............s........h.............h........s..........s.............s........h......s...........h.....l...........s...........p......h......h.....s....t....p...........................p..p....u.......t........s......h.........u......h.....h..t..s....................s.....h...s....l......u.........s.....h........l......u.......s..........h....l....u...u.....................h..l....h....p...h...h.u............................................................a..p...h.s......................a..h...........l......h......u..s.......l....s.l.l..s..................hl..........h.........h.hh............................................................................h..............t...........................................................................................................................................................................................................................................................................h...h...t.............................................................................................hhhhh..hh..........................................h.h.h......................................h....h.h.s..................................................................h...t.hs.................................................................................................h....h................................h.........h...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................................. 0 15082 30526 45126 +7521 PF07691 PA14 PA14 domain Rigden DJ, Mello LV, Galperin MY anon Rigden DJ, Mello LV, Galperin MY Domain This domain forms an insert in bacterial beta-glucosidases and is found in other glycosidases, glycosyltransferases, proteases, amidases, yeast adhesins, and bacterial toxins, including anthrax protective antigen (PA). The domain also occurs in a Dictyostelium prespore-cell-inducing factor Psi and in fibrocystin, the mammalian protein whose mutation leads to polycystic kidney and hepatic disease. The crystal structure of PA shows that this domain (named PA14 after its location in the PA20 pro-peptide) has a beta-barrel structure. The PA14 domain sequence suggests a binding function, rather than a catalytic role. The PA14 domain distribution is compatible with carbohydrate binding. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.49 0.71 -4.55 34 1994 2012-10-02 01:24:23 2004-04-30 11:44:52 7 314 720 31 924 2134 500 135.80 15 13.85 CHANGED tpsGl.tthahpssth.s...h.................pss..sshh.stthss.shs.sphssphsGhlpsspsGpYpF..tlt......uDssscla......lsuphll-ps...stp.......................phppslthhuuphYsl+l-ahptsssthh........pltWsssssstpshstss .........................................................................t........................................................................................................t.................................t..h...t............s.....p..a..s.h......c..hp....Gh.l.p...s.....s.p....s....G.......p......Y....p..........F..tht..........u-s..s..s...c.la.................l..s.........s...p....h..l....l-p.t............stpt.......................................................t...pt..p.ss.......l..t...l...p.sG......p..ha..tl..clpahp.ttstt...................h.h.............s.............................................................................................. 0 402 643 808 +7522 PF07687 M20_dimer Peptidase_M20; Peptidase dimerisation domain Bateman A, Studholme DJ anon Pfam-B_253 (release 4.0) Domain This domain consists of 4 beta strands and two alpha helices which make up the dimerisation surface of members of the M20 family of peptidases [1]. This family includes a range of zinc metallopeptidases belonging to several families in the peptidase classification [2]. Family M20 are Glutamate carboxypeptidases. Peptidase family M25 contains X-His dipeptidases. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.27 0.71 -4.33 178 25350 2009-09-11 20:10:22 2004-04-30 16:35:36 9 57 4852 83 6606 19881 6380 112.40 18 27.08 CHANGED hup+Ghhshclp..........spGpsuHuu.hsthshN..Alp.......hhschlspLppp.................................................................sohplstlpuGt....stN....slPspspsphshR...h.stps..hcplhpplpphlppthhptp .....................................................................................................s.ht..h..plp..........lp.G..p..s..u..H..uu...h..P..p...........t......u...h.s.....Alt...............hh.s....ph...l..s....p....l..p..s.h.....p.h...................................................................................................h.........s...s....l...s...l...s..p..l..p.u.Gs....................shN..........ll...Ps...psph.p..hslR.......sh..s..t...p.s.....hc.....tlh...p.plpphhpt.....th.......................................................................................................... 1 1986 4003 5480 +7523 PF07692 Fea1 HCR1; Low iron-inducible periplasmic protein Merchant S, Studholme DJ anon Pfam-B_60541 (release 13.0) Family In Chlamydomonas reinhardtii, the gene encoding Swiss:Q9LD42 is induced by iron deficiency [1]. In green algae, this protein is periplasmic. The two paralogues FEA1 and FEA2 are the major proteins secreted by iron-deficient Chlamydomonas reinhardtii, and both are up-regulated in response to iron deficiency. FEA1 but not FEA2 is up-regulated by high CO2 concentration. Both FEA1 and FEA2 are secreted into the periplasmic space and genetic evidence confirms that their association with the cell is required for growth in low iron [2]. 25.00 25.00 27.30 26.60 24.20 20.30 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.24 0.70 -5.63 3 23 2009-01-15 18:05:59 2004-05-04 12:51:18 6 7 9 0 20 23 61 329.40 25 55.92 CHANGED SVLhLVlAAGAlslAsAQ.sTTGsthGGFpaAGNVIGYVNMThDYCDIQAALuAGDFTEALsIYssGKNSp+GsAp....psFh+FAoYh-ANtoGpsFaDAl.................LhsS-TahsDshlGAA.GDGD..........ATlAtGVIsVuTLKYthHEhDoAluKp....ADGNu......uDASGAPHNlDEAWAhaaGGssusCGshSuWtaS....LstAh..pGpsahGuSsuNThMlpoFlNuhcAotosuThpIpuhsAARsNphRlLsLpuLp..ushtYT-cAtsuYsccsupsshupphIAVsWshhc.MLphpsasGuuVN..slthlDFhhcpTEupLSSp...AVcsALKsVlpsLGh-sA-l..GulcucIhD-ohshcCuucTshulp ....................................................hh..............s..t..t..hs.hsGap.sosVltasphshDht-hp..A..u...lput.s.assAhphYopGtNShpupup......cThps.huoaspu..phsuEshaDsh...........................hhGp.sshhtshh.tAs.usts........................h..tslKtuhhpsshh.YshHEl--Alscs......tssss.......usssGuPHshDEuWAFYsGShp...Gssusuo.u.tth.............lstth......ttssh..sGs.o....s..ssshhhs.hpt.h..us..h.psst...th.....spspsshspllthhslshlQ..uhl..+..YthtAssu.tpsuspttt..t.....W.sa.t..shL...h...p...s.....ustst........cl.pt.s..t.uht......tlcttlcslhSshGhphupl..Gs........................hsss.shh............................ 0 16 19 20 +7524 PF07693 KAP_NTPase KAP family P-loop domain Aravind L anon Aravind L Domain The KAP (after Kidins220/ARMS and PifA) family of predicted NTPases are sporadically distributed across a wide phylogenetic range in bacteria and in animals. Many of the prokaryotic KAP NTPases are encoded in plasmids and tend to undergo disruption to form pseudogenes. A unique feature of all eukaryotic and certain bacterial KAP NTPases is the presence of two or four transmembrane helices inserted into the P-loop NTPase domain. These transmembrane helices anchor KAP NTPases in the membrane such that the P-loop domain is located on the intracellular side [1]. 20.00 18.00 20.00 18.00 19.90 17.90 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.87 0.70 -5.44 22 1164 2012-10-05 12:31:09 2004-05-14 10:44:01 9 77 813 0 383 1481 411 290.10 15 42.16 CHANGED hs.hupplschlspss............tsshslulpGtWGsGKophlphlppthppp................................................................................................................tphhlsphssahhsst.cshhtth...hsslhpsltpchpt.ttphhhthp...........hhshhhthththhhtsshhhhh.h...............................................................................pp.ppphpthhpchtppLcp.......ptclllllD-LDRh..cspphhpllcsl+hlhph...pshsalLshDpchl...tpulpptht........supcal-Kllphshplsssshpp.lcphhtpt...hpshhpthsppth.................................................................shphshhhss.s.ts.hsPRpl+Rhlsslphhhthh .........................................................................................................................................................t.h.p.hlt.p...............t.sh.slu..l.u..tWGsGKo.h.l.p...h...h.t..pp.h....ppt.......................................................................................................................................................................................................................................................................................................................p.hhhh....hss.h.....t......p....t........p...p...hh.th.......ht....p..l.....h........p..t..h..t..t..t........t.....................h.h..h...........................................................................h...............h...h...............................................................................................................................................................................................................................................................................................................................t.....................t...p.h.p......................h......ht...p..h...p...l.pt...................p.tlllhlDp...LD......ph.......t......p...hhphh.ph...lc.h.hhsh...................h..hh...lhs..h..c.p..l.................tsltt..t..........................s...........tahpphhph...h.l.............t......h.....................................................................................................................................................................................................................................................................h............................................................................................................................................................................................................................................................................. 2 116 190 297 +7525 PF07694 5TM-5TMR_LYT 5TMR of 5TMR-LYT Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the transmembrane region of the 5TM-LYT (5TM Receptors of the LytS-YhcK type) [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.03 0.71 -4.92 106 2704 2012-10-03 02:46:00 2004-05-14 13:35:26 7 88 1472 0 397 1873 31 168.50 27 32.17 CHANGED s.hFcpl.h.ppph.phtp.clhlhllFuhhuIhusahGlpl.................................s..ulsNsRslslhluGlhGGPllGlhsGlluGlaRhh..lGG..hsuhssuluolltGlluGlltthhpp......phhshhhuhhhuhhsEhhp...........hhlll......lhspPhstuhpl.......lphIulPMllsNslGsslhhhllpsh ....................................h..hFh.l.h.p.ph...ph.p.+.hh.l..hllFol.h.s.l.hu.s..a..h....Gl...lc........................s....uls.Ns...Rhl..slhhuGl.lGGP.hVG.hhs.Gll....u.G.l.a..R.ah........hGG........hs..u.l.s....s..h.....I..o..o........I..l.hG....l.l.u..G.h...l.thhhhp................chhs.hhhu...h...hl...s...h.l.sE..hlp............................Mll.l..l...................hhup...s.h.t....p...u.l.pl.............l.phI......u.h..PM...h...lss.o.lusulhhhll...p........................................................................... 0 119 232 308 +7526 PF07695 7TMR-DISM_7TM 7TM diverse intracellular signalling Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the transmembrane region of the 7TM-DISM (7TM Receptors with Diverse Intracellular Signalling Modules) [1]. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.86 0.71 -4.65 179 1345 2009-01-15 18:05:59 2004-05-14 13:37:09 6 102 600 0 524 1395 170 202.00 15 27.75 CHANGED pph.hhuh.hhGhlls.hhlYNLhlahth.+-psalaYshal..hshhlhhh....s..hsGhshpal....hs......s.hhtphhhhhhhhlsh.hhsh.Fspp.a....Lph.p.t........plhphhhhhhhh..hhhhh................hh.....hthhhhhhh.....hhshlhhlhhhhhulhth..hcshpsA+aal..luahhhhlushl...hh.h........hhu..llshs.hhs..hshtl...Ghhl....chllhohALu.....+ .....................................................h..hh.uhhhGhhhh.hslas..lhlahhh..+c...p...sa...l.aauhhs..hshh.l.hhh.......s....hpG..hh..ht...hh........s...ths....h....t..p.thh....hh..hhh....hshh.hhhl..Fhtp.h.....lphpp.t............thhph.h.h.h.hh...hh..h..h.lhhlh.........................h.s.hh.........hs.hhh..hhht.............hhhhl.hh..hhhhh.hshhth......hcs..h...........ps.A..t..h.h...l.....luhh..h.hhl.s.slh.....th.hh..........hhu........hls.s.....hh....s.....hhh.h......uhhl....thlhhuhslu........................................................ 0 178 351 447 +7527 PF07696 7TMR-DISMED2 7TMR-DISM extracellular 2 Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents one of two distinct types of extracellular domain found in the 7TM-DISM (7TM Receptors with Diverse Intracellular Signalling Modules) bacterial transmembrane proteins [1]. It is possible that this domain adopts a jelly roll fold and acts as a receptor for carbohydrates and their derivatives [1]. 21.30 21.30 22.00 22.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.46 0.71 -4.75 114 945 2009-01-15 18:05:59 2004-05-14 13:41:02 6 52 497 4 371 965 86 135.70 17 17.93 CHANGED hsl..s.tthphhcDsssphslspl..t.....t.pspFpsh........psphs.hGhspusaWl+hsL...tsssstpt.......phlLplshshL-p..lclYh......tss......thh.hhptGcp..hsasp.Rthtppshlasls.L..sssps.hshalRlpS.psshphP......lplhss .....................................h......hphht.D.ssphs.l.spl..........ps.tapth......................psphs.huh.....s.p..s.s.....hWl+hsl.........tss.ss.p.ph.....................p.hlLpls..h...sh.lc.p..lsl...ah......tss...................s.h.p.hhphGsp...hs....hsp...Rt...ht.t....p....s...hla..ls..l........sssps..hshhl+lpo.p.t....hp.hs......l.lhp..................................................... 0 115 220 299 +7528 PF07697 7TMR-HDED 7TM-HD extracellular Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the extracellular domain of the 7TM-HD (7TM Receptors with HD hydrolase) [1]. 22.60 22.60 22.80 22.70 22.40 22.00 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.20 0.70 -4.61 62 847 2009-01-15 18:05:59 2004-05-14 13:44:56 6 5 814 0 235 690 293 217.30 18 31.07 CHANGED LpsGplA.hsIhAP+slp..DptsT-p++ppAtspl...VYshctp..hspphhpp.lpphh....splpplp...ppspt..................................................pcltph.hss...............ls-pthpp...........................................................................hh.ptsppphpphc......sslhpsl...schhs.........pslss..sthtpttp.....cs.ptpl.phtsh...sss....hptssphlsphh.lpsNhhhDtptTpphppcshpplpslh..lcpGplIV+cG-hlopcthchLchlGLlpp ...............................................................................................................................................hplu.psIhushph.......s.p...t.Tcpc+ptAtptl...s.Yt.hppp..htpp.hpt.l.p.pha.......t..l..pphp....pp.pt............................................................................tplhph.h................hh.p...lspp.htp................................................................................................................................................lh.ph...stp..php.thp...................sthhph.l..............sphhp.............................ptlpp......pphtphpp................................ch.tppl...p..sl...sst......hpp.hhpt.l..hp.h..ltsNhhhDpptTcpt+p...pshpsV..pssh..lhpGplIVccG-hlspcthphLchlthhp.p..................................................................... 0 107 187 219 +7529 PF07698 7TM-7TMR_HD 7TM receptor with intracellular HD hydrolase Anantharaman V, Aravind L, Studholme DJ anon [1] Domain These bacterial 7TM receptor proteins have an intracellular Pfam:PF01966. This entry corresponds to the 7 helix transmembrane domain. These proteins also contain an N-terminal extracellular domain. 27.50 27.50 27.60 27.60 27.20 27.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.27 0.71 -4.86 99 879 2012-10-03 04:04:29 2004-05-14 13:47:25 6 4 869 0 252 722 397 193.80 20 27.36 CHANGED hhshlGhhlhlhlhhhhhhhahc+h.p.plhppp..ptlhhh...hlhllhlhLsph...lhh.hs.............hsalhPlsssshLlshhlssclul.....hhsslhullhuhh..........shshthhlhtlluuhsushhlp.+hppRsplhtuulhlulssh.hhhhsltl..lpsss.............h.....t..plhhpshhshhsullsu...llshGllPhhEshFsllT ............................................h...hhGhhl.hlhlhlhhhhh.ahcp.....h...p...p....phhpcp....t..lh..hh.h..hlhhlslllhph.......hhh.hp.....................hsallPhAhsslllpl.hls.p.+hul.....hs.sl.lhslhsuhh.h........shsh.phhlhtlluuhsush.hl+.c........h........ppR.sp.lhtuuh.hlulhss..lhhhslhl......lpsss......................h....s...phhhphhhuhh.sullus....llshGllPhhEphFslho............ 0 121 201 235 +7530 PF07699 GCC2_GCC3 GCC2 and GCC3 He QY, Liu XH, Studholme DJ anon He QY, Liu XH Family \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.24 0.72 -4.38 41 2150 2009-09-13 19:08:02 2004-05-14 13:53:13 8 312 132 0 1375 1889 98 47.60 31 7.28 CHANGED Gpahs.....sst.....t.CpsCPtGoY.......p...spsups...pChtCPss....psT.ht..Gupshs-C .....................G.a.t......sst........ptCt...sCPhGoY........p......sptGps......pCh.sCPss.....stT...pt...tGupshppC..................... 0 558 661 1027 +7531 PF07700 HNOB Heme NO binding Iyer LM, Anantharaman V, Aravind L, Studholme DJ anon [1] Domain The HNOB (Heme NO Binding) domain, is a predominantly alpha-helical domain and binds heme via a covalent linkage to histidine. The HNOB domain is predicted to function as a heme-dependent sensor for gaseous ligands, and transduce diverse downstream signals, in both bacteria and animals. 21.10 21.10 21.10 21.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.91 0.71 -4.73 16 853 2012-10-02 19:02:47 2004-05-14 13:56:09 10 23 422 52 440 781 58 159.00 24 32.10 CHANGED MaGhlhpslpchlpcpaGp-VW-plhcpsuh-.ppsapstp.YsDshhhcLlssluplhshshcplhchaGchhlpahhcpGaschltshucshpsFlpslDs.lHhplsphY..Pph+sPuFpspst.scssllLcYpSpR.GhhchllGll+slA+pFtp..-lplphlcppp- ..........................................MhGhl.pth.t.hl.p.p.a.G..ph.hpp.h.........hpps....t..hp......t.t..a....sht.Y.....s-.p.....hhplltsh....u.p..h.h..s.....lsh....pp..lhchaG.chhh..ph...htppt..c.hl.......psh..Gsshp-FLp.....................slDs.l.....H.p.plp..ph..a.....ss...hc......sPsFpspph...sp....s............sl..h.lcY...h...S..p..R......shtths..h..............GllcusAcphhp..clplphht....p.................................. 0 159 223 352 +7532 PF07701 HNOBA Heme NO binding associated Iyer LM, Anantharaman V, Aravind L, Studholme DJ anon [1] Domain The HNOBA domain is found associated with the HNOB domain and Pfam:PF00211 in soluble cyclases and signalling proteins. The HNOB domain is predicted to function as a heme-dependent sensor for gaseous ligands, and transduce diverse downstream signals, in both bacteria and animals. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.35 0.70 -5.13 33 958 2009-09-11 06:19:49 2004-05-14 13:58:40 9 50 126 16 637 851 5 138.40 30 19.84 CHANGED shspchplsspsFCcsFPFHllascchplhQhGpulp+lh.........sphhhspp.......pls-hFcllRPp.lphoFcsIls+lNolFllpo.....+t............p.sp.t....p...........................................................................................................................LcLKGQMlalsEsssllFLsSPhVssL--LpppGLYlSDIPlHDAoRDllLluEQtcAphpLppchE..phpcpLcpshptL-pEKc+T-cLLYullPtsVApcL ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tlh..............pphp.......phpspL....c.......cp....p....p....tLcpE.K+KT-pLLaphlPtsVAppL....................... 0 228 292 501 +7533 PF07702 UTRA UTRA domain Anantharaman V, Aravind L, Studholme DJ anon [1] Domain The UbiC transcription regulator-associated (UTRA) domain is a conserved ligand-binding domain that has a similar fold to Pfam:PF04345 [1]. It is believed to modulate activity of bacterial transcription factors in response to binding small molecules [1]. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.38 0.71 -4.58 168 12427 2012-10-01 19:33:20 2004-05-14 14:00:43 8 13 3138 54 2128 7191 599 138.40 19 57.50 CHANGED spGtp.spsc....llphphhsu..sspluptLsl.sssst.VhclpRlRhh...-spPlsl-psalstp.hhssht.....pp.ph......ppS..laphLcp..hsh...tlsp..uppplp.Aths.sscpuphLsl...sss..sslL..hlc+hsasps..upsl-aspshapu-cac ......................................................Ghpspop..llp.hph.h.....s....s..t..plup...t...L...s.....l...p.....s.p...p......l..h.pl.p..Rl..Rhh..........-s...p....P....h..h....l.......-..ps..al...s.....h...p...h..h..s..s..ls...........pp..th............ppS...l..a.ph.lcp....hsh...pls.p......up.p...p...lp..sh...h.s....ss.....c.....u.p....h....L..p.l......sts....p.P..lL...hlc.ph.s...a...s..p.s....upsl-aspshaps-ta............................................... 0 564 1212 1689 +7534 PF07703 A2M_N_2 Alpha-2-macroglobulin family N-terminal region Studholme DJ anon Prosite Family This family includes a region of the alpha-2-macroglobulin family. 21.50 21.50 21.50 21.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.86 0.71 -4.37 184 2785 2009-01-15 18:05:59 2004-05-14 15:54:42 9 133 1371 58 856 2463 101 141.90 22 9.28 CHANGED lpl...phsc..pshpsG-shplplp..sshs................up.hhhhl.h.pspllptt............................................................thp.hpssps.......................................slplslspchsP.sshlssh.hl.............................................s....hshsss.hlsVpp.sp...phplplps.s....p.phpPspphplplp..s.....................suhlslsAVDpulLtL .....................................................................................................................................................plthsp.sthpsG-s..hplplpsshs................sp.hhhhl....s.ps.pll.h.hp............................................................................hhc...ls..spth...............................................................slplsls.p..s..h..sP..shhls..sh..hl............................................h...sphshssh.tl.slp..s...sh...ph.s.l.p.lps..s.............s..ph....pPs....p.p........ls..lclp.s.................s.suhlsl.suVDpulL.l............................. 0 194 349 599 +7535 PF07704 PSK_trans_fac Rv0623-like transcription factor Anantharaman V, Aravind L, Studholme DJ anon [1] Domain This entry represents the Rv0623-like (Swiss:P96913) family of transcription factors associated with the PSK operon [1]. 28.30 28.30 28.50 28.40 27.80 27.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.61 0.72 -3.30 29 347 2012-10-02 18:44:02 2004-05-17 11:25:38 6 1 139 0 81 156 29 81.60 36 96.75 CHANGED Mu.LsIKsscscpLA+clAchTGp.ohTpAlppAlcccLp......ccppptshh..-cltsl.....hpttthshhss.psscpphhaDEhshss .........Mu..LsIKsscscpLsccLA.tcsGp.ohTpAVtsAl...pcc.Lu...........ttppcpssLt..-cLtsl.....hcptuhssscs.ttscphh...hYD-pGLPt............................... 0 20 42 63 +7536 PF07705 CARDB DUF1604; APHP; CARDB He QY, Liu XH, Studholme DJ anon He QY, Liu XH Domain Cell adhesion related domain found in bacteria. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.20 0.72 -3.96 72 1259 2012-10-03 16:25:20 2004-05-17 11:57:42 6 225 338 5 749 1464 732 105.20 19 17.15 CHANGED tP...DLhl.........thssssshsspp.slss.s.lpNpGsss.usshplp.........lYlsus.h...sshsl...........suLsuGsspslshshhs....ps........Gshs.lpsslDssNtlsE.sN...EsNNphot ...................................sDLhl.........t.hhssss..s..t.s.G..p.s..hsl..ss...s..V.pN..p.G.....s.ss.....u..s..s.....h.p....l.p........................l..h...l...s.ssts..................sstsl................................ssL...s.s.G..p.......o.....t......s.....l....s..hshsss............ss....................................G.s.ap..lt..s...h.s...D.s.......t....s.........t......l.......E....s.....c..sNN....s........................................................................ 0 244 406 657 +7537 PF07706 TAT_ubiq Aminotransferase ubiquitination site Hargrove J, Studholme DJ anon Pfam-B_15367 (release 13.0) Motif This segment contains a probable site of ubiquitination that ensures rapid degradation of tyrosine aminotransferase in rats. The half life of the enzyme in vivo is about 2-4 hours. In addition, unpublished information identifies at least 2 phosphorylation sites including CAPK at Ser29 and, at the other end of the protein, a casein kinase II site at S*QEECDK. This region of TAT is probably primarily related to regulatory events. Most other transaminases are much more stable and are not phosphorylated. 18.90 18.90 18.90 22.20 18.40 17.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.31 0.72 -7.87 0.72 -4.02 7 51 2009-01-15 18:05:59 2004-05-24 12:14:22 7 3 35 0 25 44 0 41.20 55 9.12 CHANGED hcsYlIQMsususL.......PolLDsHVNlsGhsoV........uKhKuRKsR .MDsYVIQMsusGsL.......PSlLDVHVNluGRSSV.......sGKhKGRKAR.. 0 1 1 7 +7538 PF07707 BACK BTB And C-terminal Kelch Stogios PJ, Studholme DJ, Finn RD anon Stogios PJ Domain This domain is found associated with Pfam:PF00651 and Pfam:PF01344 [1]. The BACK domain is found juxtaposed to the BTB domain; they are separated by as little as two residues [1]. This family appears to be closely related to the BTB domain (Finn RD, personal observation). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.15 0.72 -4.18 81 5713 2012-10-02 01:20:04 2004-05-27 16:12:29 10 234 772 7 3017 5073 29 98.70 24 18.96 CHANGED ClulhpaA-ta.sspcLhphuppaltppFhpVhps-.-Fhp.LshcplhpllssDcLslpsEcpVapAlhpWlpa-hppRppthsclL.ppVRl.sLlsspaLhphlp ......................................Clthhthu.c.ha....st.p.cLt..p.tshp.....a..l...hpp.F...p....l......h.....p.......s....-......-..........F....hp....L...s....h....ppl...h..p.l...Ls..p.-p......L.....p..l........p.....s......E.....pp.............l.ac..Asl...p.W.........l....p..............t..........c..............h...........p..........p............R.........p.........p.........h...........h........s.............p.............l....l....pt..lRh..shls.thLhp............................................... 0 922 1157 1963 +7539 PF07708 Tash_PEST Tash protein PEST motif Yeats C anon Yeats C Motif This motif is found in the Tash AT-hook proteins of Theileria annulata. These proteins are transported to the hosts nucleus and are likely to be involved in pathogenesis [1]. It is also often found in conjunction with Pfam:PF04385. It is suggested that they may be 'part of PEST motifs' (a signal for rapid proteolytic degradation) in [2], though this is not definite. This motif is also found in other T. annulata proteins, which have no other known domains in (unpublished data: C Yeats). 22.40 22.40 22.40 22.40 22.30 22.20 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.33 0.73 -6.32 0.73 -3.83 11 113 2009-01-15 18:05:59 2004-05-28 10:38:50 6 10 2 0 52 114 0 18.70 62 4.55 CHANGED E-L-PETIplElpSD-E-p ...pLcPETIPVEluSDEE-... 0 0 47 47 +7540 PF07709 SRR Seven Residue Repeat Yeats C anon Yeats C Repeat Associated with Pfam:PF02969 in This repeat is found in some Plasmodium and Theileria proteins. 19.50 2.30 19.70 2.90 18.70 -999999.99 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -6.43 0.77 -6.36 0.77 -2.75 14 54 2009-09-16 13:37:02 2004-05-28 11:45:45 6 9 8 0 38 51 9 13.60 30 9.06 CHANGED pphccVcssYppLp ..ppacpVpssYpplp... 0 12 31 34 +7541 PF07710 P53_tetramer P53 tetramerisation motif Bateman A anon Pfam-B_782 (release 3.0) Motif \N 20.70 20.70 20.90 21.10 20.20 20.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.89 0.72 -4.86 28 335 2009-09-11 15:49:44 2004-06-01 14:29:59 6 11 99 129 105 383 0 42.20 46 9.30 CHANGED KK++s....tDcEhFTLplRGREpYEhLpKlp-uLELt-hh.sppps .............K++s............D-E.h.....ahL..plRGREp...aEhLhKlpEuLELh-hl.sppp.h........ 1 12 18 42 +7542 PF07711 RabGGT_insert Rab geranylgeranyl transferase alpha-subunit, insert domain Finn RD, Bateman A anon Pfam-B_20675 (release 13.0) Domain Rab geranylgeranyl transferase (RabGGT) catalyses the addition of two geranylgeranyl groups to the C-terminal cysteine residues of Rab proteins, which is crucial for membrane association and function of these proteins in intracellular vesicular trafficking [1]. This domain is inserted between Pfam:PF01239 repeats. This domain adopts an Ig-like fold and is thought to be involved in protein-protein interactions and might be involved in the recognition and binding of REP [1]. 21.10 21.10 21.40 22.30 20.40 20.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.45 0.72 -4.19 4 58 2009-01-15 18:05:59 2004-06-02 17:26:25 6 16 37 3 24 51 0 98.50 59 19.56 CHANGED -hIpCVaVSR-EupVoVuFS+PVts..tpssLlLhLDspP.tVpWRoscsR.++S.lWlCDLPssuIsDpsspHshpVhWT-tcs++-CsLYsGppEsWCRDSA .............-uLRCLHVSR-EACLoVuFSRPllVu.sts-sLLLhVD-SPLhVEWRTPDGRNRPS.HV..WLCDLPAuSLNDphPQH.oFpVlWTuu-spKECVLhpGRpEuWCRDS.................... 0 2 4 10 +7543 PF07712 SURNod19 Stress up-regulated Nod 19 Studholme DJ anon Pfam-B032880 release 13.0 Domain \N 25.00 25.00 27.90 29.40 21.30 23.20 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.28 0.70 -6.02 3 106 2009-01-15 18:05:59 2004-06-03 14:42:03 7 3 29 0 66 84 1 309.50 44 88.67 CHANGED FLKTEc+IKSAVFaSPKlELuPGSVSN+aYYDIDFPRGHIuLKuFsAEVVDEAG+PIPLHETYLHHWlVVRYHQsKsScpsppphF....................IaVRNuGlCQusoLGQYFGLGSETRGTuTYIPDPYGIEIGNPEEIP-GYEEKWLlNIHAIDTRGVEDKpGCIECKCDLYNVTlDEYGRuIRPsYKGGLhCCYDpTQCKLKcGF.pGPK.RSLYLKYTV+WVDWDcYllPVKIYIFDVTDTLKlSD+SKGsss-HsCKlEYEVEPCsTNshpcsGCVDVKKsSLPMQsGGYVIYGVAHQHSGGIGSTLYGQDGRVICoSIPKYGNGcEAGNEsGYIVGMSTCYP.PGSVKIhDGETLTLESNYSNslcHTGVMGLFYlLVAEQLP.Pcl ................................................................................................................................................................lpotsFhSsth.hsPGplss+hhhslcFP+GHlulKuFsuElV..D...p..tG..s....slPLaETYLHHWhh.t..Ya...h...s......p.....................t...h......................hhh+NpGhCptthl.pa.aGhGuEoRtTso.lPsPaulEhGNPtths..p.GY-E+WllNlhsIDTRG.s...D+..huChEC+C.......D..............h.......YNV.ThD..............sp..............lp....................sY+GGLhCC.DthpCph.pp...GF......pu....p....Rplh.L+YslpWl-Ws.p.h..lPl.+..lYIhDsT...Dp.......ps.p................psCp.......hEYp...l..t.sssst....s..s.csppsp.hsh..................pGG..................llYGsuH.HsG........shsuoLaG.p.................DG.RhlC.....sShP......pYG.....sG.pEuGNEtGYlVGMSsCYPp...PG...ol.+ltcGEhLsl.opYss.p..t.pTGVMGhFYlhlA-......t................. 0 7 58 64 +7544 PF07713 DUF1604 Protein of unknown function (DUF1604) Yeats C anon Yeats C Family This family is found at the N-terminus of several eukaryotic RNA processing proteins (e.g Swiss:Q8N3B7). 25.00 25.00 25.30 25.30 20.00 19.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -9.96 0.72 -4.19 20 248 2009-01-15 18:05:59 2004-06-03 14:42:41 8 7 214 0 185 248 1 86.30 55 10.82 CHANGED pchlPlacQpVpDEcGR.+RFHGAFTGGFSAGYaNTVGSKEG....WsPpTFpSSRppRus....sp..sppsEDFMDEEDlu-ht.tspplpoppcF..s ............p.p.lPlhcQpV+DEcGR.+RFHGAFTGGFSAG.........YFNTVGSKEG...............WTPoTFsSSRpsRAc............pp......pp+PEDFMDEEDluEht...ts+plpTsspF............ 0 70 105 150 +7545 PF07714 Pkinase_Tyr Protein tyrosine kinase Studhome DJ anon Unknown Domain \N 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.54 0.70 -5.32 145 24806 2012-10-02 22:05:25 2004-06-11 14:25:16 12 1823 1048 846 13827 124495 4199 232.30 26 33.84 CHANGED lpht.cpLGpGsFGpVacGphp...................ttsphpVAVKsL..+...p..sssppt...hpcFlp.EuplMppl.pH.plVpLhGlss...........ptpP.....hhllhEahptG...sLhsaL+pp..................................................................................................ptplshtp.LlphuhQlApGMpYLps+p..hlHRDLAARNlLls.....c.............st.hlKIuDFGLuRtl.........pps.Y..ptpssuc..hPl+Wh...APEulptsp..........aospSDVWSFGVlLWElhohGp.pP..............Yssh.ssp-........lhphl.cpGhR..hsp.PptCPsclY.pl.MtpCWptcPppRPoFppl...hptL ...............................................................................................................................................................................................................................p.lG..p....G....t.F..G....t...V..h..t...u....h.................................................t.h........V.......A.......l.........K..........h..........l.........p...................t....................t...t............t............t..........t................................................p...........p...................F.........h.................p...........E.................h...........p...........h..............h.................p.............p................l................p...................H.............................N...............l...............l...............p...................l............h.................G.....h....sh.................pt.t.......................................h.h..l...l......h........E........a........h........t....t....G.................s...L....t......p......a......L....p...p..p.......................................................................................................................................................................................................................................................t..t.........l....s.....h.....t....p.......h....l........p.....h.....s.......h....p.........l...........A..........p..........G....h........t...........Y..........L.........c.................p.................p..........p...................h...........l.............H...............R................D.........L..............t...........u..............p...........N...........l......L......l...s....p........................................................sh....h....s.....K......l......u.......D.......F.....G.........l...u......+..h...........................................t....t......h...................h....t......t.......t...........s........h.........h.............s........h................t...............a......h..................A.....P.........E........h....h.........t..t.h.........................................h....o.....t......p......o.........D.....V.........a.................S..............F.............G...............l....l.........l..........h......E..........l.............h......o..................s.....t........P.....................................................a............t........h......p.......t.....t....p...........................h....h.....t........h........l..........p..........p.........s............h........p....................h...................h...........s................................t...........s...........s.............................t..............h..........h.............p........l..........h..............h........t.......C.............h.......p.............p..s...p...p...R.P..s.htplht....................................................................................................................................................................... 1 4020 7259 10608 +7546 PF07715 Plug TonB-dependent Receptor Plug Domain Yeats C anon Yeats C Domain The Plug domain has been shown to be an independently folding subunit of the TonB-dependent receptors ([1]). It acts as the channel gate, blocking the pore until the channel is bound by ligand. At this point it under goes conformational changes opens the channel. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.23 0.72 -3.72 742 36469 2012-10-03 10:59:06 2004-06-15 15:23:27 10 123 2367 66 9642 33866 11001 109.70 22 13.45 CHANGED shpssstulss......lstcp.lppp..s.sssls-sLp......t..hP......Glsh.........................sssssss...s.....s..sh...........slRGh.....ss....................spshlhlD.Gh.h..............................tts...........shs..t..l.s.....s.....t..sl-plEVl+.Gssos.lYGs.suhGGl ...................................................................................................t...phstslss.....l.s.t.c..p...l.ppp....s...ss..s...l...s..-..s.Lp...................t...hP...........Glsh.................................ssss.sss....s...........s....pl.................plR...Gh.....ss...................................sps.hlhlD...G.l.h...............................................tts.................shs....t..l.....s........s.......s..sl.-..p.l-Vl+..G...s.s..u.s..lY.G.s...s.uhuGs............................................ 0 3204 6758 8441 +7547 PF07716 bZIP_2 Basic region leucine zipper Studholme DJ anon PfamB-200; Release 14.0; Family \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.57 0.72 -4.14 48 1966 2012-10-02 13:17:30 2004-06-16 18:28:29 10 24 376 25 1196 6475 28 53.70 29 17.49 CHANGED pssphpp+Rc.+NNtAu++SR-+++t+ppphpp+ltpLpccNppL...cpclppLcpE .................pphtt+.R....c..+N.s...A..A++SR-+++...h+p..p.......p...l.......p...p.......c.......l....tp.......Lp...p-.Np.pL..........cpclttLpt.......................................... 0 353 569 934 +7548 PF07717 OB_NTP_bind DUF1605; Oligonucleotide/oligosaccharide-binding (OB)-fold Yeats C anon Yeats C Domain This family is found towards the C-terminus of the DEAD-box helicases (Pfam:PF00270). In these helicases it is apparently always found in association with Pfam:PF04408. There do seem to be a couple of instances where it occurs by itself - e.g. Swiss:Q84VZ2. The structure PDB:3i4u adopts an OB-fold. helicases (Pfam:PF00270). In these helicases it is apparently always found in association with Pfam:PF04408. This C-terminal domain of the yeast helicase contains an oligonucleotide/oligosaccharide-binding (OB)-fold which seems to be placed at the entrance of the putative nucleic acid cavity. It also constitutes the binding site for the G-patch-containing domain of Pfa1p. When found on DEAH/RHA helicases, this domain is central to the regulation of the helicase activity through its binding of both RNA and G-patch domain proteins [1]. 21.40 21.40 21.50 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.61 0.71 -4.25 118 5209 2012-10-03 20:18:03 2004-06-21 13:22:17 11 143 2032 5 2747 4906 191 134.10 22 12.53 CHANGED pQLtplh...................................................pch...phph..........p.........................................................pp.phl+pslsuGha.plA.....................................thp............pp.....sp..Ypsh................................tpsp.hlhlHP.uSs...l...........hppps.c.....................allapEllp.............T........s+..ah+s........................sotl.pspWLhth.u...sph ...........................................................................................................................................t.QL.thh.................................tphth...p.....................................................................................................................................sshptl+pulhu.Gh.hs.plu...................................................php...........................pp...tp...Yhss.............................................csp....phhlHP..u..Ss.....L..........................hcp..t.P.c....................................................a..llatElVp.......................T..............................o+...ha..h..R.s.............................sstl..cPpWl..l..usp....................................................................... 0 873 1456 2200 +7549 PF07718 Coatamer_beta_C DUF1606; Coatomer beta C-terminal region Yeats C anon Yeats C Domain This family is found at the C-terminus of the coatamer beta subunit proteins (Beta-coat proteins). This C-terminal domain probably adapts the function of the N-terminal Pfam:PF01602 domain. 25.00 25.00 36.20 30.30 23.60 22.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.55 0.71 -4.53 25 351 2012-10-03 16:25:20 2004-06-21 13:22:31 7 9 301 0 255 354 8 138.40 49 15.21 CHANGED QsDDhIsF+QLpu+putsssp...sh-sDLs+AT.G.sssspp.chs...p.....+Ls+VhQLTGFSDPVYAEAYVsV+QYDIlLDVLlVNpTs-TLQNLolELATlGDLKLVERPpshTLuPtsFpsI+AsIKVSSTETGVIFGsIVY-ssu ..................................................................QsDDsIsFtQLstcssh..s..stc.............thp.sLtt.Ah.G......ssppp.p.shs..............SKLs+.V..sQLTGFSDPVYAEAYVpVpQaDI................VLDVLlVNQTs-TLQNlolEhAT.LGD..LK.lVE+P.....s.shs.LuP+-Ftslc.................AslKVoST-sGVIFGNIVYDsss........... 0 90 147 213 +7550 PF07719 TPR_2 Tetratricopeptide repeat Studholme DJ anon Context matches from Pfam 14.0 Repeat This Pfam entry includes outlying Tetratricopeptide-like repeats (TPR) that are not matched by Pfam:PF00515. 27.00 13.00 27.00 13.00 26.90 12.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.73 0.74 -7.75 0.74 -3.60 369 18395 2012-10-11 20:01:00 2004-06-21 18:10:22 12 4257 3733 24 7965 120483 33152 32.50 19 6.62 CHANGED spshhthutshhptsch.pcAhptapcAlplssss .........................hhhthGt.h.h.h..p.t..s..c...h..p...cAhp...h.appAlpht.................. 0 3026 4971 6601 +7551 PF07720 TPR_3 Tetratricopeptide repeat Studholme DJ anon [1] Repeat This Pfam entry includes tetratricopeptide-like repeats found in the LcrH/SycD-like chaperones [1]. 20.70 11.00 20.70 11.00 20.60 10.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.19 0.72 -7.60 0.72 -4.11 18 1102 2012-10-11 20:01:00 2004-06-22 13:19:31 7 26 538 52 97 1589 293 33.40 29 26.08 CHANGED h-hlauh...AtshatptcappAhph....hhshlptps ................h-hlhua.............Ahshpp.p.tchpcAtsh..at..hhhhhsh.s.............. 0 34 50 73 +7552 PF07721 TPR_4 Tetratricopeptide repeat Studholme DJ anon Manual Repeat This Pfam entry includes tetratricopeptide-like repeats not detected by the Pfam:PF00515, Pfam:PF07719 and Pfam:PF07720 models. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.82 0.74 -7.35 0.74 -2.96 143 472 2012-10-11 20:01:00 2004-06-22 16:56:25 9 256 297 0 244 3990 1150 25.00 25 5.32 CHANGED ststhshApshhttGchspAtshlcp ..........hhhsLAtshhtpGchspApphhc.......... 0 84 180 210 +7553 PF07722 Peptidase_C26 Peptidase C26 Studholme DJ anon MEROPS Domain These peptidases have gamma-glutamyl hydrolase activity; that is they catalyse the cleavage of the gamma-glutamyl bond in poly-gamma-glutamyl substrates. They are structurally related to Pfam:PF00117, but contain extensions in four loops and at the C terminus [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.30 0.70 -4.90 45 3232 2012-10-03 00:28:14 2004-06-23 11:09:18 8 15 2292 12 825 18376 10620 205.10 31 78.01 CHANGED PlIGlsusht..htttshtshst.pYltuth.pulptAGGlPlllPht..sspshpphlstlDGllloGGt.sVcPphYutpsp.tpss.hsssRDthclsLlctAlppshPlLGICRGhQhlNVAlGGoLaQclpt....t..thht........p......htssHslplp........uohLuplh.....s..sc...hpVNShH+Qulcc........LussLpVpApus.DGllEAlps.stp.hFslGVQWHPE ................................................................................................llG.lss.s.............................................t.......t....h.........pa.ls..tthhphl.t.t.u..GG.l.Pl...l...l..........P.......h........s.................s...........p.......h.......h....p.....p...hl...p..h..l..D..GllL...sG...Gt.sV....p...P..p...h...Y........G.................p..p..................................t........................p................t........s................s......h.....s.......t........R.......Dth...-...l.s.L...l.c...t....A...l.c...p.......p.......hP...lh..uICRG.hQ..l...........l...N........V.....A.......h...G...G..o..L...a..Q..c....lp..p...........t......h.....t...p.hp.p.......................................p.................hs..s..H.p...l...p.l.p..............................................ts...o...h...L.t..p.lh.......................................s....pp.....h.h.V..N...S...h..H..+...Q..u..l..c.p.........................l.u..s..s..L...c...l...p...A...h............u..........s......D.....G......l......l......E...A.....l.....p.....s.......h....s.....t....t......hhl.GVQWHPE.................................................................................................................... 0 299 509 675 +7554 PF07723 LRR_2 Leucine Rich Repeat Studholme DJ anon PfamB-152 (release 14.0) Repeat This Pfam entry includes some LRRs that fail to be detected with the Pfam:PF00560 model. 20.90 9.50 20.90 9.60 20.80 9.40 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.64 0.73 -7.26 0.73 -3.24 161 389 2012-10-02 21:32:02 2004-06-23 16:09:48 8 33 40 0 269 479 1 25.70 36 6.48 CHANGED sLKsLpL.pp...lta....ssctslppLlSuCP ...........sLKoLpL.pp...Vpa.......ssctslppLlSuCP........ 0 142 155 160 +7555 PF07724 AAA_2 AAA domain (Cdc48 subfamily) Studholme DJ anon PfamB-40 (Release 14.0) Domain This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.94 0.71 -4.35 139 20963 2012-10-05 12:31:09 2004-06-23 17:05:00 9 82 5116 83 4876 26856 10382 171.50 36 26.68 CHANGED cPhushlhhGPTGsGKTp.........Lu+sLAphLh..............spppshhphDMSEahE........c+slu+LlGussGYVGh-puG.Lo-tl....tppPasllLlDEIEKAcs...............sV.phLLQll-suplo.........-spG.ppVch+NsllIhToNhuupthtp........................................hpt.h.t.lpthahP.EFlsRls.h .........................................................................................................................................hushl.hlGPTG..s.G.KTc...........................LAc.sL.AchLh....................................................................................................................................t.tpsh..hp..h.......D..h....o....E.ahE...................................c....p...s.......l.......s.....+...L...l...G...u.......s.........s.....G.......h......V.......u...h.......-..c...sG...................................................tllllDE..I-...Kups........................................s....V........p......h.LL...p.ll...-....s....u...p..lo.........................................................................D.sp.G....hpV...D..h.....c..N....s..l.h.I.h.Tu.N..huutp..h.p....h...........................................................................................................t.t...hp.ts...p.....l.p....h.h.....h.....P.........EFlsRls............................................................................................................................................................ 2 1633 3130 4111 +7556 PF07726 AAA_3 ATPase family associated with various cellular activities (AAA) Studholme DJ anon PfamB-40 (Release 14.0) Family This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.70 0.71 -4.54 55 4128 2012-10-05 12:31:09 2004-06-24 10:26:01 6 8 2112 1 1431 6260 2158 130.60 58 39.23 CHANGED HlLlEGVPGluKThls+sLApslshsFpRIQ.FTPDLLPuDlhGo.plactcs....ppFcFctGPlFusllLADEINRAPPKTQSALLEAMpE.+QVTlsGpTasLPcPFhVlATQNPlEpEGTYPLPEAQLDRFlh ....................................................................................HlLLEsVPGluKThh...s+...sL...A....p.s...l.....s.......h.............s..............F...p....R....I...Q....F.....T...P.....D..L.........L...P...u........D.....l....s..G........s......p......l....a...s..t..ps...................t.p..F......c......F........c..........G....P.......l......F............s........N...........l...........l.....L..A.........DEINR.....A.....s....P.....K....T.....Q.....S...A...L...L...E..A..M.....p.....E..+.....Q.....V......T.....l......s.....G....p.....T.....a.....s...L...P.....p.....P...F.....h.V...l.A.T.Q.N.P..l.......E.......p.......E........G.T.Y......P.LPEA..QLDRFlh..................................................................................... 0 512 1017 1260 +7557 PF07727 RVT_2 Reverse transcriptase (RNA-dependent DNA polymerase) Studholme DJ, Eddy SR anon PfamB-40 (Release 14.0) Family A reverse transcriptase gene is usually indicative of a mobile element such as a retrotransposon or retrovirus. Reverse transcriptases occur in a variety of mobile elements, including retrotransposons, retroviruses, group II introns, bacterial msDNAs, hepadnaviruses, and caulimoviruses. This Pfam entry includes reverse transcriptases not recognised by the Pfam:PF00078 model. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.61 0.70 -5.41 99 7027 2012-10-02 12:54:00 2004-06-24 14:06:22 9 498 350 0 1834 6948 486 136.70 27 23.93 CHANGED NpTWcLVshPpsp.+slGsKWVF+hKhsscG.slpRaKARLVA+GasQppGlDYp..ETFuPVs+hsolRhlLulAuppsaplcQhDV+oAFLpG-LcE-..lYMpQP.GF.spsp......sspVC+LcKuLYGLKQAPRtWap+hsshLhphGFpputsDssla..h+ppsssh.lhlllYVDDlllsusspphlpphKppLspcFcM+DLGthpaaLGlElp+..sspGlhLoQppYspcl.LpchsMpss+..PssoPhsss ........................................................................................................................................................................................................................................................................s.salp..u.....l...p..-..p....l..ah.....t.......P..uh.................................t..hh.h..h.L.......+u.lYG......L+.Q.......u.s....t....W.....p.h...t..t.h.h.....t....th..s......p........s.h....a....h..............t....t................h..hl.hl......aVDD.h........................................................................................................................................................................................................................................................................................................... 1 611 910 1250 +7558 PF07728 AAA_5 AAA domain (dynein-related subfamily) Studholme DJ anon PfamB-136 (Release 14.0) Domain This Pfam entry includes some of the AAA proteins not detected by the Pfam:PF00004 model. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.54 0.71 -4.45 34 10563 2012-10-05 12:31:09 2004-06-29 09:05:52 9 237 3591 15 4659 65399 18925 134.50 18 14.25 CHANGED sllLlGsPGsuKohlsppluttl.....spshhhl.hspshots-lhtthph.......ssttthhsuslhpAsp..........cuplshl-Els+sp.sslhssLhslLsc+phhh.pssthhtst...........................hpllusuNshs.......hthsphssALtpRF ..................................................................lhlhG....sGs....GKo...t.....l...s..c......t..l....u..thh............................st..........h......h....h.......l..............h.......s........t..........t..........h.......s................p......c....l.........h...G...t...h....s...............................t..p..h..t....h...p....s...u.......l...h..p..s.hp...................................................................................ps..h....l....l....h....l..D.E.l..........s...........h.............s.............t.....s..........p..............h..........h.......s......s..........L......s....l....l.......c....p....p....p.....h....h....h.......t...t.....s....t......h...t..h.................................................................................................hhl..l.s.s.h.N.sht..................h..t..uhhpR...................................................................................................................................................... 1 1765 2764 3880 +7559 PF07729 FCD FCD domain Bateman A anon Pfam-B_117 (release 14.0) Domain This domain is the C-terminal ligand binding domain of many members of the GntR family. This domain probably binds to a range of effector molecules that regulate the transcription of genes through the action of the N-terminal DNA-binding domain Pfam:PF00392. This domain is found in Swiss:P45427 and Swiss:P31460 that are regulators of sugar biosynthesis operons. It is also in the known structure of FadR where it binds to acyl-coA, the domain is alpha helical [1]. This family has been named as FCD for (FadR C-terminal Domain). 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.55 0.71 -3.80 317 20783 2012-10-01 22:11:53 2004-06-29 14:06:45 7 44 3128 16 5347 14681 1573 125.20 20 52.31 CHANGED -...l...h...ph...RthLE.....stss.p..hAs..pphss...........pplpplpphh.......pphppshp.....tsch.t.............hhphctpFHttlhptusNthlt...........p..hhppltttht.h..hhthth................stpphppshpc..Hp.pllcAlts.pDsctAtph........hppH.lpt ........................................................................plh.ph...RthlE......stss...p....hAA....tp..t.os.........................pp.l.t.p..l.pphh.............p.p...h..p..p..s.hp...........................ttc.htt...............................................hhp..h.-..hpFHt.tlh.p.s.ut..N..h.Lh................p...hh.p.p.l.hshhp..t.......hhthhht............................ptpt.h.p.p...h..hpp......Hp.tIlcA.l..t.p.p.D.s...c.t..A.pps...........hppHlt.t.................................................... 0 1367 3132 4302 +7560 PF07730 HisKA_3 Histidine kinase Studholme DJ anon BLAST Domain This is the dimerisation and phosphoacceptor domain of a sub-family of histidine kinases. It shares sequence similarity with Pfam:PF00512 and Pfam:PF07536. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.27 0.72 -3.63 191 12598 2012-10-11 19:05:54 2004-06-29 14:17:48 8 320 3309 15 3406 10118 555 67.60 26 14.71 CHANGED ERpRIARELHDsluppLsslthphphhpthh.....ppsst...........psppplpplpphsppuls-lRplltsLR..sssh .............................ERpRlARELHDslup..pLssl.p.hp.h...p..h.hpphh.......ppsst.............................psp.ptlpplpphsppuh...pcl.RplltsLR.s..................................................... 0 1258 2513 3067 +7561 PF07731 Cu-oxidase_2 Multicopper oxidase Studholme DJ anon PfamB-49 (Release 14.0) Domain This entry contains many divergent copper oxidase-like domains that are not recognised by the Pfam:PF00394 model. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.81 0.71 -4.67 127 6906 2012-10-02 17:41:00 2004-06-29 16:37:37 9 88 2541 150 2712 7112 437 134.00 20 24.40 CHANGED ths.pssshhph................................t...hhssspshslphsshlclhh.s........s.............................sHPhHLHGasFhllspssssh............................sh.ssspRDTltlss....suhssltaps...D.NPGsWhhHCHlt..hHhstGhhhhhh.tsts .........................................................................................................................th....................................................t.t....h.sstph..h..t.l..p....h....s...p...h.h.c.h..h..h..s...........s.h...................................sHP.hHl...HG..h.p...F.p..l..l..sp.....ss.s..................................................................................p..s..s.h.+D..T..l..t......l....s...................ssh...s..tlthph......................-..ss.........G....s.Whh......H......C.................H.....lh........H..hs.......t..Ghhh.hhh....t.................................... 0 653 1565 2232 +7562 PF07732 Cu-oxidase_3 Multicopper oxidase Studholme DJ anon PfamB-43 (Release 14.0) Domain This entry contains many divergent copper oxidase-like domains that are not recognised by the Pfam:PF00394 model. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.50 0.71 -4.37 121 9918 2012-10-02 17:41:00 2004-06-30 09:10:05 10 110 3033 407 3010 9791 561 100.50 30 22.68 CHANGED plphsshshhsts...ppshslNGphPGPslpsppGDplhlpVhNpl...sps.solHWHGlpphtssh.DGssulTpCPItP.GpsasYpFplpp..tGThWYHuHhsh...p...sGlaGslllpspts ...................................................h..................hhhh.s.....s...t.....h.........G....P......h....l......php...............G........-t.l.............lph....h.....N..............ph..........s..................p....................s.......h............o....l..............HW.................H........G.....l............h...............h...........s............s.............................D...........G..........s........s..................t....................h............s...........p............s..........s..............I.......t.......P..........G.........p.........s.........a....s....Y..p...F........p.....s........p.........p......................s.........G.........T............a..........W.........YHuH..t...............p.h..tGl.hG..hllps................................................................... 0 695 1662 2404 +7563 PF07733 DNA_pol3_alpha Bacterial DNA polymerase III alpha subunit Studholme DJ anon Pfam-B111 (Release 14.0) Family \N 22.00 22.00 23.70 22.40 21.20 20.60 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.23 0.70 -6.04 89 7657 2009-01-15 18:05:59 2004-07-01 10:32:47 7 67 4508 9 1594 6277 5227 405.60 35 36.47 CHANGED phLpchshpGl..ccRh.................................h.pRLchELslItph......GFssYFLIVtDhlpaA+p....pGI.VGsGRGSuAGSLVAYsLsIT-lDPlp...........................................asLLFERFLs.ERsshPDIDlDFspc+RpcVIpYVtc+YGc......-pVAplsTauThtuKuAlRDVuRsh.......shs.s.hstlsKhls.........sh............ptshp.p........phpphht.p..........phpclhclAppl...-GlsRpsupHAuGlVI.usp.sLs-hsPlhhssps..........................tthlTQa-hcs.lEshGLlKhDhLGLcsLThIcpshchlp.................tp.......tth............h.....s............l..tsIP..h-Dt.psac.hlspu................cThGlFQlESpuhpshLpcl+PssFpDllshsALhRPGPhp.......t.lpsalpR+pG......ptlpa.ps.........lc.lLppTaGlllYQEQlMplu..phhAGaohGcADhLRRAMuK.Kc.p.htc.+p.....pFhpGhhcpu ................................................................t..LtchshpGh..cc+h...........st......tt...................................................hpcRL-hELslItph........GFs.sYFLIVh-hlpau+s...........................p.Gl...V.G.sGRG.SuAGSlVAYsLtIT-.lDPlp.....................................................................................................a.sLL.F..ERFLNsE......R....s......shP.DIDlDF.s.t..........c.p.....RscVIpYV.tcpY...G..c........-.pV..uplhThGTh.tu.......K.ssl+.DVu+sh...............sh.s.t.hstl..s+h.ls.t...............sl...................tthht.p.........................................ph..t..p..h..h..t...t.p...........................................cspclhchAptl...-.G.lsRpsuhHu........uGlll..ssp.s.......lh-hsPl.hssps.............................th.lTQ.a.-hps..lE......t.hGLlKhDhLGLcsLThIchshchlp............................................................tp....................t...........h.....s.....................l...pp..I..P..........h-.Dt...p.......s...ap..hhppu.....................pThGlF..QhE.S.puh+phLc..c.h...........+...Ps...........s..........Fp....Dllsl....s...uLhRPGPhp.......s..lps....al.cR+pu...............p........l......p.h.ps.................lc.lLt.....p....ThGlllYQEQlMplu..t..hhuGaohGtAD.hLRRAMuK..K....p.....p...h.tc.....+p...tFhpGhhpp..................................................................................... 1 562 1068 1361 +7564 PF07734 FBA_1 DUF1607; F-box associated Studholme DJ anon Pfam-B128 (Release 14.0) Family Most of these proteins contain Pfam:PF00646 at the N terminus, suggesting that they are effectors linked with ubiquitination. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.44 0.71 -4.55 99 950 2012-10-01 23:07:08 2004-07-01 11:05:26 8 29 37 0 484 1107 0 142.20 20 39.04 CHANGED uVSLKGNTYWhAp.........ccppt.......................a.llsFDFosE+...Ft.shlsL..Paptt..............sshsLS.sVR.-E.+LulL..hQ.......ptpo.p......hEIWlTs.K..I-ss......sVSW..spF.Lpls.hpsh......sth.............ssSFF.lDE....-K.Klsls..hc...pptppt..............shhYIl.G.cs...thhpcls.......................hhPhhh..s.YVPS ..............................................................l.lpGshYWhup.........cppp....................................................h.llsFDh.s.s.Ep...F..t.h..l...L..P.hptp...................thphhsL....h.ht....p-..pLs.hh....hp............p.p..s..p.......h-IWlhs..c....hcss......psSW.....oKh..h...sls..h..h......................................h................................................................................................................................................................................................................... 1 160 208 232 +7565 PF07735 FBA_2 F-box associated Studholme DJ anon Pfam-B128 (Release 14.0) Family Most of these proteins contain Pfam:PF00646 at the N terminus, suggesting that they are effectors linked with ubiquitination. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.54 0.72 -4.12 141 1899 2009-01-15 18:05:59 2004-07-01 17:02:22 12 25 5 0 1889 1923 0 70.40 21 21.44 CHANGED tpt.phppll...spNh....pplph.........t.phsLs-LL..hhNssplpl..tp.s.....lo.sc-lNlFLKH..WlpG.usscLchhplp .....................................................ph.........pplplt.............psphlsl.-.cL.L...th..s.sp.p.l.p.l.....pp..sp......lo..sc-l.NpFlKp..W.h..p...G...sp.s.pLchlpl.t................... 0 176 183 1889 +7566 PF07736 CM_1 Chorismate mutase type I Bateman A, Griffiths-Jones SR anon Bateman A Domain Chorismate mutase EC:5.4.99.5 catalyses the conversion of chorismate to prephenate in the pathway of tyrosine and phenylalanine biosynthesis. This enzyme is negatively regulated by tyrosine, tryptophan and phenylalanine [2,3]. 21.20 21.20 21.20 35.40 21.10 17.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.31 0.71 -10.46 0.71 -4.01 57 434 2012-10-01 19:40:00 2004-07-07 16:22:31 6 2 424 49 151 378 287 116.20 43 92.42 CHANGED lRulRGATTlspNosctIhpAspELlppllpcNslps.--llSlhFosTpDLsAsFPAtsARph..GW.psVPLlsspEhsV.GuLs+CIRVLlahpospsp..tclpHVYL+sAtsLRPDls ....hRuIRGATTl-pNst-pIhpATpELlpcllppN.plps.-DllSlhFTsTpDLsAsFPAtAsRph.......G.a.ptVPLhsspEhsVsGuLs+CIRlhlpssocp.sp..pcIpHVYL+sAtsLRPDL.s..... 0 67 120 140 +7567 PF07737 ATLF Anthrax toxin lethal factor, N- and C-terminal domain Andreeva A, Bateman A anon Pfam-B_23800 (release 14.0) Domain The C-terminal domain is the catalytically active domain whereas the N-terminal domain is likely to be inactive. 21.40 21.40 23.10 22.90 20.40 19.80 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.60 0.70 -4.96 4 70 2009-09-11 08:46:23 2004-07-07 16:59:27 6 7 32 77 5 78 0 187.20 36 41.50 CHANGED hK.-hhpplV+l-lps-phsKtptspcLlcKlPp-VLEhY.pslGGcIYlsDt-lspHhpLpslScccphlhsupGcph.htp+FVFsKtshpPhLlIps.cDYs.sp.pShpVYYElGKuI.hDhhS.pthcLtsP..cFlshlsplsst..........DpDupDLL.....FspphKE....cspslslsalcpN.sEFQcsFAcAFuhYapPcH+-sLclY..APchFpYMschpchthp .........................................K.-.h.p.sphph.spphsKh.th.chhchls.sVhphY.ptlstphYhh..-hhpphp.p.lpc.pp..hs..G.......+aVatc.t..shh.....pD.h.pp.puhtlYh..u+.l.hch.S....pl.ps..tFlp.htph.s...........DpsupDLl.....FpphhKE....cspsl..s.l.s....alcpN.sEF.chFAcAFuYY.a.sP..D........H+shLcla..APcsFpYhschpK..hp...... 1 1 4 4 +7568 PF07738 Sad1_UNC Sad1 / UNC-like C-terminal Wood V, Finn RD anon Pfam-B_5052 (release 14.0) Family The C. elegans UNC-84 protein is a nuclear envelope protein that is involved in nuclear anchoring and migration during development. The S. pombe Sad1 protein localises at the spindle pole body. UNC-84 and and Sad1 share a common C-terminal region, that is often termed the SUN (Sad1 and UNC) domain [1-2]. In mammals, the SUN domain is present in two proteins, Sun1 and Sun2 [1]. The SUN domain of Sun2 has been demonstrated to be in the periplasm [2]. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.70 0.71 -4.37 25 1140 2012-10-03 19:46:52 2004-07-08 10:09:58 8 33 300 4 758 1131 30 126.30 24 15.87 CHANGED hshtupsspsllpps...hhsusCauhpuspsalslcLuctIhlpslsltHhpp.......h.SSAPK-FpVhuppphsptp.......hhhLGpFph.stsspshQoFplpss..t.......hhchV+lclhSpa....GsstasslhtlRVaGps ..............................................................................................h....ttsspsll.p.s...hh.ssC.h.s.....h...p.......s......p......p.....s...a.l......s.lcLsp.I.hssslslpH.hth...........h.sSuP+cF.plh.....u..................p.p..sspp.........................hh.hL..G.p.....aph...s....p..s..s....p......s...l.....Q..s.F.t.l...p...pt..........................hhphlclc.lhSpa.....Gp..ta..ssl...t.hRVaGp............................................... 0 233 353 597 +7569 PF07725 LRR_3 Leucine Rich Repeat Studholme DJ anon PfamB-184 (release 14.0) Repeat This Pfam entry includes some LRRs that fail to be detected by the Pfam:PF00560 model. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.38 0.73 -6.54 0.73 -3.89 74 609 2012-10-02 21:32:02 2004-07-26 15:33:50 7 122 16 0 284 634 0 19.90 51 2.28 CHANGED pLVcLphttS.pL.c+LW-Gsp ....LVELshppS.pL.EKLWcGsp... 0 96 177 191 +7570 PF07739 TipAS TipAS antibiotic-recognition domain Bateman A, Finn RD anon Pfam-B_8785 (release 14.0) Domain This domain is found at the C-terminus of some MerR family transcription factors. The domain has an alpha-helical globin-like fold [1]. The family includes Mta a central regulator of multidrug resistance in Bacillus subtilis. 23.70 23.70 23.70 24.00 23.40 23.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.49 0.71 -3.78 164 1938 2009-10-27 17:45:06 2004-07-27 13:56:40 8 15 1353 3 329 1455 73 113.30 19 48.65 CHANGED paspEspp+aGp...pthpt.ttp............thsc.pchpphppp....hpplhppltphhpps..sssupcspplscpapphl.s....pahs.....hstp........hhtsL...uphY...hsD.t+Fpphhc.ph.....st.Ghu.palpcAlptas ........................................................atpEh.pcaGp...p.htp..pc...................sp..pc.pphppp........hsplhpplsphhppt...sssupcsQphspcahphlp....phhs.........hs.c.........hhst..l...uphY..............hs-...sc.Fpp.....h..s..ph.............ss..uhu..pals-Altha.......................................... 0 107 213 277 +7571 PF07740 Toxin_12 Ion channel inhibitory toxin Bateman A, Finn RD anon Pfam-B_20319 (release 14.0) Domain This is a family of potent toxins that function as ion-channel inhibitors for several different ions. Omega-Grammotoxin SIA is a VSCC antagonist that inhibits neuronal N- and P-type VSCC responses [1]. Huwentoxin-IV, from the Chinese bird spider, is a highly potent neurotoxin that specifically inhibits the neuronal tetrodotoxin-sensitive voltage-gated sodium channel in rat dorsal root ganglion neurons [2]. Hainantoxin-4, from the venom of spider Selenocosmia hainana, adopts an inhibitor cystine knot structural motif like huwentoin-IV, and is a potent antagonist that acts at site 1 on tetrodotoxin-sensitive (TTX-S) sodium channels [3]. Study of the molecular nature of toxin-receptor interactions has helped elucidate the functioning of many ion-channels [4]. 21.00 21.00 21.00 21.40 20.60 20.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.46 0.72 -3.46 31 254 2012-10-01 22:06:18 2004-07-27 13:59:12 7 1 24 18 1 274 0 30.60 39 40.83 CHANGED -CtthattCssss-.CC..tshsCppc....h..pa..Cta ....-CttahspCs.sps.CC..sthsCpsc....a....+W..Csh....... 0 0 0 1 +7572 PF07741 BRF1 Brf1-like TBP-binding domain Bateman A, Finn RD anon Pfam-B_18050 (release 14.0) Family This region covers both the Brf homology II and III regions [1]. This region is involved in binding TATA binding protein [1]. 25.00 25.00 27.40 26.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.38 0.72 -3.77 42 348 2009-09-11 07:42:02 2004-07-27 14:01:28 8 11 267 4 234 338 3 99.70 33 16.01 CHANGED D-.E..lcthl.LsEcEschKpplWhphNc-aLhcpcpKp.........................h+tctcp...........psstpcc+p++tpcpp..................................sssspTAuEuscphLpc+s..hSpKINY-sLcsLa ................................D-.E.lc.phl.Ls-pEs.clKpplWh..p.Nt-YLc...cQppKc..........................h+t.ttcpt.........................tthp.c++....++sp.cpct.......................................ss.usTAuEAhcpML.cc+p....hSpKINYcsLcsL............................. 0 79 129 192 +7573 PF07742 BTG BTG family Bateman A anon Pfam-B_9208 (release 14.0) Family \N 23.00 23.00 24.10 25.70 22.50 21.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.61 0.71 -4.59 31 473 2009-01-15 18:05:59 2004-07-27 14:07:20 7 7 125 8 259 429 1 113.10 44 43.74 CHANGED MptElssuVsFls+hlpp+th...LsccplphFupcLpphLhc+YpsH.WaP-pPpKGpuaRCI..RIN...cphDslltpA...............spcSGLshpcLh..LPpElTlWlDPhEVshRhGEcu.shsVhhhp .........................Mp.EItsAlsFlhphLhs+......Lscc...plphFuppLpclL.pc............+Y.c.sH.WaP-+PtKGSua.R.CI.....+Is........cph....DPlltpA................upcuGlshp-lt.t....L.P...........p............E..L........olWlDPhEVSaRlGEcG.shpVhh.p................... 0 61 82 156 +7574 PF07743 HSCB_C HSCB C-terminal oligomerisation domain Bateman A, Finn RD anon Pfam-B_6234 (release 14.0) Domain This domain is the HSCB C-terminal oligomerisation domain and is found on co-chaperone proteins. 21.60 21.60 21.60 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.46 0.72 -3.52 143 1500 2009-01-15 18:05:59 2004-07-27 14:15:24 8 10 1465 13 424 955 122 77.40 34 40.93 CHANGED htDs..tFLMppMEhREpL--httt....pst.spLpplppclppphcphhpplspth...s........ppshppAsctlp+L.......+Fhp+.lppclcp .....................s.pDssFLMcQhElREcL--lptt...........cs-.spLcshhpclcphhcsthpphsppL........-...............spsassAA-pVRKL..........+FlcK.Lpsplcp......................... 0 114 216 329 +7575 PF07744 SPOC SPOC domain Bateman A anon Bateman A Domain The SPOC (Spen paralogue and orthologue C-terminal) domain is involved in developmental signalling [1]. 21.30 21.30 21.40 21.40 21.00 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.72 0.71 -3.63 58 707 2009-01-15 18:05:59 2004-07-27 14:20:39 8 40 232 1 436 686 1 115.20 24 8.36 CHANGED WpGh.lshpshsphsspuphlsGp...ppls.ptl.........................................................PtplshpuRlchppltchhpplptst......plhllsl................ssppspts.........hpp.hhsYhps+pR.h......GVhplss.................stshYlh.P ..............................................................................WpGh..lshp....s.....s....p....hssphahlsGs.....tplh.pp.tL.........................................................s.pIs..phRls..splpchhpclpsus.st.......thhllsl................t.s..ss.tpppps...............hps.lhsYLpp+pt..s.GVh.sl.ss...tt..............stslalhP................................ 0 106 174 299 +7576 PF07745 Glyco_hydro_53 Glycosyl hydrolase family 53 Bateman A, Finn RD anon Pfam-B_5764 (release 14.0) Domain This domain belongs to family 53 of the glycosyl hydrolase classification [1]. These enzymes are enzymes are endo-1,4- beta-galactanases (EC:3.2.1.89). The structure of this domain is known [2] and has a TIM barrel fold. 20.70 20.70 20.80 21.10 20.50 20.30 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.22 0.70 -5.65 16 584 2012-10-03 05:44:19 2004-07-27 14:21:50 8 37 453 23 213 550 107 322.00 33 69.58 CHANGED h+GsDlS.lhp.........................................hEpsGss..apssNGpppDhhplL+s.sGlNhlRlRVWssPhD..............G.sDlccslphAKRAcstGM+lh.lDFHYSDpWADPu+QphPpAWssh.sh-pLpps..lYsaThssLssh.pcsGlpsshVQVGNEhssGhLWs.Gp........sssasphupLlpuGhpAV+-..........lssssKlhlHLssGts.sp.phaacpltpp..G....sDaDlhGlSYYP...aWpu..........oL.....ssLpssLpshtspYsKsVhVsETuaPao........s..sp.t....h.shPhospGQsshlpslhplVpsl........scGlGlFYWEPAWlsss.....................................h.tGsshssp.shFD..hsupsLsSlssF ..............................................................................................h+GuDlShl.p.........................................hEp.p.Gsp..ahs..t..s...G.........t...p...p.D...hhplL+s.pGlNhlRlRl.WssPts............................................G....ss..hs.pslthA+..RA+stGh+lh.lDFH.YSD..hWADPu+QhhP.....pAWtsh........shspLp.ps............lasaTtsslsth..cptGl.......P-hVQlGNEhssGh..L......WstGp...............................................sssa.sp..hAtLlpuGhpAV+c..................sssp.s+l..hlHls.s........Gt.....s.s..s.......t.......h........p...a...aacpl.ppp.....s..........scaDlIGh.SaYP...........aWpu.................sh.......ssL..p..sshs...sls.p+.Y.s..K.c.lhVsEsuasas.t..s.t....s.hst.t.............sh.shospGQtphlpc...lhpt..ltsl.t......spGhGlFYWEPsWhs.s...................................................h...Gssh..tst...shFD...pGp.s.h..uht............................................................................................................... 1 77 147 182 +7577 PF07746 LigA Aromatic-ring-opening dioxygenase LigAB, LigA subunit Bateman A, Finn RD anon Pfam-B_18522 (release 14.0) Family This is a family of aromatic ring opening dioxygenases which catalyse the ring-opening reaction of protocatechuate and related compounds [1]. 23.40 23.40 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.48 0.72 -3.98 40 275 2009-01-15 18:05:59 2004-07-27 14:23:53 6 3 207 4 91 247 101 83.40 30 44.05 CHANGED LNpashoL.hcspsRccFhuDccAhh-casLocpp+pAlhsRDhtthlphGuNhahLtKluulhGlsh.phsusMsG..hoh--apphhhs ........lNcahhpL.hcst.RpcFhs...D.cAhhccauLoc-p+pAlhsRDhtsLlphGsshahLtKh..uus.Ghs..phhu..shpG...oh-patthh..s............................ 0 18 46 75 +7578 PF07747 MTH865 MTH865-like family Bateman A, Finn RD anon Pfam-B_25445 (release 14.0) Family This domain has an EF-hand like fold. 25.00 25.00 35.20 34.90 21.90 20.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.53 0.72 -4.35 6 69 2009-01-15 18:05:59 2004-07-27 14:27:15 6 1 56 1 52 72 0 75.50 37 86.51 CHANGED --l+tpll-hhKsA-.FPIps..ELhuALPsGhsToCp.Gs.ElpAuEhs.KLlTcsDFPaKsu-plADpll....c+suL ..p-l+tQlh-uhc..sAc..FPIpo.h-LlsALPsGssTsCpsGs..hpl.TAtEhs.KllsssDFPacsA-slA-sll....pcs........... 0 12 35 46 +7579 PF07748 Glyco_hydro_38C Glycosyl hydrolases family 38 C-terminal domain Finn RD, Bateman A anon Pfam-B_731 (release 3.0) Domain Glycosyl hydrolases are key enzymes of carbohydrate metabolism. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.64 0.70 -5.68 45 2571 2012-10-02 23:57:29 2004-07-27 14:36:28 8 30 1251 65 913 2233 104 416.20 15 45.85 CHANGED tslhlaNshsappsphVplssstsshthtstp..........................................................hhh.hsplPsluhtsh.ht.spt.p.tt...........................................h.l.....cNshl+lphsss.GtlpolhDcpss+chh.........hspphthacs...................hptt.p.t...h...usuh.hh.....ppsshpstl.h.ht................................tsshlsphlplhsss................ppl-hcspVc.hpppcp......lpspFssslpu......................spshY-sphGhhhc........shphs...s.h-tsha...........plssptalp.uc...........slulLs-staGsss............tssplcLoLhRs.........................................................cstsD...pupthh............saslhscssshhtht...................ssptuh.............plptPhhh....................hptht..s..h.hsls..sl.................hh.thK.scst..................pshlL+hh-htsspsctplth.shs.ttsph..pslh-t............................................................................................................t.pspphplplpPhEItThp ......................................................................................................................................hhlaNs.s..h..p..t..h......l.....p....h.........h.......................t..............h...t.................................t..........................................................................................................................................h.h...........p....lPsh.u.h...p.sh......ht............t.t......................................................................................................................................................................hhl..........pNt..h...hpl.ph..s..t.p....G..h....l.p..pl...h....c...p..p..s...s.pp..h...................................hssph.hhaps...........................................hptt...................st..s....h..hh...................p..ss.hpst.h..hphph..................................................................t....p...........t........l....p..p..h..l.p.l..htss........................ppl-hc..s....p.lc...htppcp.......................L.+l.th.ss..s..l.p.s......................pps.h.h.p.t.t.h.GhhtR..........................................shp.p...........pshpt..sh.h......................h...s.....t.....t.t.....a...h..p...h..sc......p.............................slul.hspshh.uhs.s...........................ss.p..l.pl..o..L..hRs.........................................................pshs.D..........ps.pthh........................................................paul.h.sps...t...h......ht..............................................shppuh............................................th.p.t.Ph.hh.............................................................hp..h.............................s.h...p..h........s..l................................................................hpshK.up....st............................................................................tthllR.hhp.....t.t..t.s.p.s.t.h............s...........sp......h......slhpt..........................................................................................................................h...h...t...shplhoh................................................................................................................................................................................................................. 0 333 530 740 +7580 PF07749 ERp29 Endoplasmic reticulum protein ERp29, C-terminal domain Finn RD anon Pfam-B_5062 (release 14.0) Domain ERp29 is a ubiquitously expressed endoplasmic reticulum protein found in mammals. ERp29 is comprised of two domains. This domain, the C-terminal domain, has an all helical fold [1]. ERp29 is thought to form part of the thyroglobulin folding complex [2]. 21.70 21.70 22.40 23.30 20.10 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.07 0.72 -3.36 36 300 2009-09-11 09:36:09 2004-07-27 17:35:15 7 10 234 12 194 303 6 91.90 30 27.52 CHANGED GsltsLDplltcassus...tptppllscscctsppLp.....st...+aupaYl+lhcKlhpps.......p-YspcEhpRLc+lLp......GulussKtDElhh+tNILppFt ..................................................GpltslDslstcahsus.t..pppp.....pllpcscctspplp...............tttt..chAchYlKlhcKlhcpu.......p-asppE.hpRLp+lLp.......sslus.sKtD-lpt+.hNILpsF.......... 1 70 115 159 +7581 PF07750 GcrA GcrA cell cycle regulator Thanbichler M, Finn RD anon Pfam-B_23428 (release 14.0) Family GcrA is a master cell cycle regulator that, together with CtrA (see Pfam:PF00072 and Pfam:PF00486), is involved in controlling cell cycle progression and asymmetric polar morphogenesis [1]. During this process, there are temporal and spatial variations in the concentrations of GcrA and CtrA. The variation in concentration produces time and space dependent transcriptional regulation of modular functions that implement cell-cycle processes [1]. More specifically, GcrA acts as an activator of components of the replisome and the segregation machinery [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.14 0.71 -4.04 8 480 2012-10-04 14:01:12 2004-07-27 17:35:51 6 7 310 0 157 401 1392 140.20 30 85.47 CHANGED MsWTDERVEtLKKLWp-GLSASQIAAQLG.....GVSRNAVIGKVHRLuLoG....Ru+s.t.osAssRs++ss.s.t.........................ssutpuLphchss-lsscsshtsstssVsshsRplpLLELuE+oC+WPIG.DPtsE-FsFCGs.cus-uuPYCshHuRlAaQPsuc+RRsc ...........................................................................pWT-E+lp..hL+clW.tcGhSuupIAtpL.G......loRNAVl...GKsH.R..L.t..Lss..........+sp..s................t.....t.s.s.....p...t..tst...................................................................................................tt.h......t............t..................................t.............tt.h..tl.h...pL.....sc..p.....pC+WPl.......G...-P....s.....cFh.FCGt.....ts...tu....PYCt.H.s.tlAapshttp....t............................................................................... 0 40 96 114 +7582 PF07751 Abi_2 Abi-like protein Fenech M anon Pfam-B_8740 (release 14.0) Family This family, found in various bacterial species, contains sequences that are similar to the Abi group of proteins, which are involved in bacteriophage resistance mediated by abortive infection in Lactococcus species [1,2]. The proteins are thought to have helix-turn-helix motifs, found in many DNA-binding proteins, allowing them to perform their function [3]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.43 0.71 -4.12 111 1544 2009-01-15 18:05:59 2004-07-28 09:55:11 6 5 1091 0 266 1249 75 178.60 18 63.71 CHANGED pphLpp.luYaRl.ssYhhsh.t.t.....................htts.spFpplhpLYhaDpcLRthlhcslpclElslRstlstt..lup.............t..ha.p.tphhptt.............................................hhpphp......pplpcspt....p....hpaatpph...........tthPhWhhh.....ElhshG....................phsphap.h.tp.............ttplspp...............hs...............hp.........tphlpshlcslphlRNhsAHasplasp ...................................................................Lpp.hs.Y.a.ph.psYhh.h...............................hh.s.hphpplhsL..YhhDpcL+thhhphlttlEhsl+stlsth..lsp...........................t....h..ahp......phhpt.t.........................................................................hhpphp.......ptlpcsp..............t...........hpphh.pph.........................................tphPhWhhh........chhshG......................phs.phap.h...t.................t.thh.pp................................................ht.......................................hp........hp.ht.s..h....lptlp..lRNhsAHpp.lht........................................................................................................ 0 72 163 220 +7583 PF07752 S-layer DUF1608; S-layer protein Fenech M, Eberhardt RY anon Pfam-B_2293 (release 14.0) Family Archaeal S-layer proteins consist of two copies of this domain [1-2]. 30.00 30.00 30.70 30.30 29.60 29.70 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.46 0.70 -5.18 14 125 2009-01-15 18:05:59 2004-07-28 10:41:59 6 15 13 0 123 125 2 236.20 27 60.47 CHANGED lRGslaD.........s.stssshsWsspsFuGFYYslccs..................lsoEshslhthss..scsIs-GsLVYsTpsh.spaEht..............sW..ssYpllGFhuccYhAs.sDss.............stLuplllD..u-DKcsl.hoG-sh-Lt..............cGYSLplspVDVsGs+VWlpLpK-G-hlDsshlsssss.......alacs-lu-s...-DlshhhlalspVFtGspsohlhlcula.Is.DshlcIpsGDpFGchElsphSpssIphcN-DohoLspsss..hplhsshhFpsuD ..............................................h..........................h.Wss......satuFaYslsps................htsEphth.........sp.ltc.....s..s...l.Ypo..p......s...p...acht..............sh..tpY.h..luahu-cYhshhssss........................spLu+lLlD.........s-cctol.psGpsLs.Lt..............-GYsLplppl...Dl-Gs+ValpLpKDGphVDspllssus..s.............asYcp.-lsst...pDlslltl+lc.plFpGsp...sshshl-GlaQIu.-shhplppGDpaGphclppls.....t...s..t..I.hpNp.s.s.hsLppsps...tlhtth..htsus.................................. 0 26 83 92 +7584 PF07753 DUF1609 Protein of unknown function (DUF1609) Fenech M anon Pfam-B_2149 (release 14.0) Family This region is found in a number of hypothetical proteins thought to be expressed by the eukaryote Encephalitozoon cuniculi, an obligate intracellular microsporidial parasite. It is approximately 200 residues long. 25.00 25.00 56.80 56.70 16.70 15.80 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.52 0.70 -5.03 9 42 2009-01-15 18:05:59 2004-07-28 10:43:16 6 2 6 0 26 42 0 199.40 58 45.79 CHANGED EsEputEAEssh.Eh....AtsK+KsutcKScGtc+pYKlH+RVLRWpKSsE+IKcELDcGpEE+W+G+SlEEI+EQKhlHDIsEVhcLLRSc-.sD+FFhcTG+YMKGGSERW+MVAlGlLEpGGcK+lGsVEVGLFKsc.sGpsVlYHLMF+PTshEctGcVsssuFu+uDDV-cI--s..-spDhuGFpYPpGVRsEhV+ssspF+IVWRNP+sTSEVLRoLTlhphPpl ............EsEtutEsEhs..Eh....uss+cK....su.K.....KScGsc+pYKIH+RVLRWpKSPEKIKcEhD+GSEE+W+GRSlEEIKEQKhlHDIstVlcLLRScD.AD+FFhcTGcYhKGGSERtRMVAIGlLEsGGc.+hsGVVEVGhFKDs.sGssVVYHLMF+sTthtphGtsht...sthschss.lttlcct..c.pD.ttF.YP.slp.EhspttstFpI.ats.psTu.llppLhl.phs.................. 0 26 26 26 +7585 PF07754 DUF1610 Domain of unknown function (DUF1610) Fenech M anon Pfam-B_8731 (release 14.0) Domain This zinc ribbon domain is found in archaeal species. It is likely to bind zinc via its four well-conserved cysteine residues. 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.78 0.72 -7.06 0.72 -3.81 21 205 2012-10-03 10:42:43 2004-07-28 10:44:10 6 2 117 0 143 189 34 20.80 46 59.03 CHANGED CpsC......RchussasCPpCGh ...CppC.......Rcp..ustahCPsCGh. 0 38 81 115 +7586 PF07755 DUF1611 Protein of unknown function (DUF1611) Fenech M anon Pfam-B_8752 (release 14.0) Family This region is found in a number of hypothetical bacterial and archaeal proteins. The region is approximately 350 residues long. A member of this family (Swiss:Q6M063) is thought to associate with another subunit to form an H+-transporting ATPase, but no evidence has been found to support this. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.71 0.70 -5.77 79 333 2012-10-05 12:31:09 2004-07-28 10:48:20 6 4 302 6 140 392 459 289.80 34 83.25 CHANGED h-..hchsGpsss........tslPlhsshpts.....ttsscslllGlAstGGhls.tsWppslhpAlctGhslsSGLHs.hL......s-cPclsthApp..tGtplhDlRpP.stsh.tlusGpt+...........t.sspRlLsVGTDCulGKhsTuLtlpcuhppcGlcusFhATGQTGIhI.uGpGlslDAVsuDFsuGAlEtll.csst.....pp.chhllEGQGSLh...HPuauu.....lohuLL+GupPDullLsHcssRpphcshsp...hsl.Pslpphlplhptlushs........spVlGlu.lNTpsls..-pp.utphltchppch......uLPssDPlRp..G.ss..pll .................................phsGppss.........hslP.hs.hpss........stsscsLllGlAstGGhls..ptWhpslhpAl.ctGhslsoGLHp.hL......s-.sclst.hApp..pGt..plaDVRps.s.sh...slusGpt+.............sucRlLsVGTDCulGKhhTuLtLccshcc+Ghc...u...sFhATGQTGI....hI..sGpG.lslD.....A...VluDFhuG...AlEtls.css......cc.ch.llEGQGSLh......HPuauG.......VohuLl+GuQPDullLsHcssRpphcshsp...hsl..Pslpphlchh.thuphs.......stspllGlu..lNTptl....s..-p.c.Atthhtchppch......ulPssDPlRh.Gsstl................................................................................... 0 35 86 119 +7587 PF07756 DUF1612 Protein of unknown function (DUF1612) Fenech M anon Pfam-B_8688 (release 14.0) Family This family includes sequences of largely unknown function but which share a number of features in common. They are expressed by bacterial species, and in many cases these bacteria are known to associate symbiotically with plants. Moreover, the majority are coded for by plasmids, which in many cases are known to confer on the organism the ability to interact symbiotically with leguminous plants. An example of such a plasmid is NGR234, which encodes Y4CF, a protein of unknown function that is a member of this family [1]. Other members of this family are expressed by organisms with a documented genomic similarity to plant symbionts [2]. 30.10 30.10 30.60 49.50 27.90 30.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.72 0.71 -4.05 18 141 2009-01-15 18:05:59 2004-07-28 10:54:13 7 2 98 0 42 113 1 126.70 55 35.37 CHANGED sLlaDsDWDE-sRLsEWRuVlspscsL..PPlLpAAlhhDAWppLEsLQ+usWLGRLLsAuhLRpcGhss.uHLsulshGLKslPh-RRRpRsRtoRLhuhLcultsAAptGhKEHDRLslARp.MpR+LcG ..............tLlYDsDWDEDuRLpEWRss.hststsL..PPlLpAAlhaDAW.pLEVlQRusWlGRLLsAuaLRpuGlsu.sHLsAlsLGL+shts-cRRupsRssRLtsFLtAlptAAEssMKEHDRLhLARcQMpRKL+G....... 0 4 19 29 +7588 PF07757 AdoMet_MTase DUF1613; Predicted AdoMet-dependent methyltransferase Fenech M anon Pfam-B_8934 (release 14.0) Family Proteins in this family have been predicted to function as AdoMet-dependent methyltransferases [1]. 20.20 20.20 20.20 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.45 0.71 -3.95 3 246 2012-10-10 17:06:42 2004-07-28 10:55:27 8 4 214 0 186 308 53 106.20 44 20.68 CHANGED LV.NlEKYNDLYsELKQKYuQpLV-pW...sEsTDPtKFVFEDLAIAAYLIhLW+QTpSu...pp.pSFVDlGCGNGLLVYlLsuEGY+GYGaDlRKRKLW-hYPs-sQVcLhEKslVP .......................................................l.s..papshYtpLKpKYuppllcsW.......sE..sTDPpKaVaEDlA..IAAaLl.LW..p.p....h.sp........................pth.....u.........FVDlGCGNGLLVaIL.pEGa.pG.h.GhDsRcRK.Wsha.s...spsp..L................................... 0 54 91 149 +7589 PF07758 DUF1614 Protein of unknown function (DUF1614) Fenech M anon Pfam-B_8844 (release 14.0) Family This is a family of sequences coming from hypothetical proteins found in both bacterial and archaeal species. 22.40 22.40 22.60 22.50 21.10 21.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.20 0.71 -4.54 48 186 2009-01-15 18:05:59 2004-07-28 10:56:30 6 2 156 0 112 170 2 169.60 31 72.00 CHANGED llGShI.....NIPltp..tt............................hhGh.h....Ph..h..tpshlAlNVGGAlIPlhlulY..Llhp....h.......tsllusllsullsahhu+sl..........sGlGIssPshlsPlsAulsAh....lhuttht..............sAYluGslGoLlGADllpL...t...clhphs.ssh..l.......SIGGAGsFDGIFloGllAlLL ..............................................lhsS.hNlslhp...t..............................hhsh.h.h...s.........pphlAlNlGGAlIPlhlulY...Llhc..........t.hltsllusllsullsahhu+sl.........sGlGI.hhP...salsPllAu.l.hAh...........lhuht.hc.................sAYluGslGsLlGADllpl.........t....plhphu...ssh...l..............SIGGAGsFDGIalo...GllAlLL........... 0 46 80 98 +7590 PF07759 DUF1615 Protein of unknown function (DUF1615) Fenech M anon Pfam-B_8943 (release 14.0) Family This is a family of proteins of unknown function expressed by various bacterial species. Some members of this family (e.g. Swiss:Q8Z8Z7, Swiss:Q8ZRF4) are thought to be lipoproteins. Another member of this family (Swiss:Q93SV8) is thought to be involved in photosynthesis [1]. 25.00 25.00 26.30 26.20 20.90 18.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.94 0.70 -5.67 22 632 2009-01-15 18:05:59 2004-07-28 10:58:44 7 1 622 0 68 326 17 315.30 72 86.98 CHANGED uslpDRpGWApDItsAhps.tlssoscNlCuVLAVsEQESsFpsDPsVPGLu+IuhpEI-pRAs+h...........tlPthllcsALphpSP.sG+oYpcRlculRTE+-Lstla-Dhl...........stlPhGppLFus.....hNPV+TGGPMQVSItFAEp+u+...tYPasss.uolRcEVFoRRGGlaFGhA+LLsYPssYspslYRFADFNAGaYASRNAAFQsAlSRloGhtLALDGDLlpYss.......stsusTEhAlhsL..upcL...shscspIRcsLcptcs.sFpcTtLYcpVasLA-+psG+shPRAhLPtIpLcSPKI.oRpLTTAWFAcRVDpRap+CMpR ....A.SVKDR-AWAKDlATTFcSQGLAPTlENlCSVLAVAQQESsYQADPsVPGLSKIAWQEIDRRAERh...........HIPuFLVHTALKI+SP.NGKSYSERLDoVRTEKQLSAIFDDhI.............uMV.PMGQTLFGS.....LNPV+TGGPMQVSIAFAEQHsK...GYPWKMD.GTVRQEVFSRRGGLWFGTYHLLNYPAsYSAPlYRFADFNAGWYASRNAAFQNAVSKASGVKLALDGDLIRYsS.......KE.PGKTELAsRKL..Au+L...GMS-uEIRRQLEKGDShuFE-TALYKKVYpLAEAK.TGKoLPREMLPGIQLESPKI.TRsLTTAWFAKRVDERRARCMp....... 0 11 22 43 +7591 PF07760 DUF1616 Protein of unknown function (DUF1616) Fenech M anon Pfam-B_8886 (release 14.0) Family This is a family of sequences from hypothetical archaeal proteins. The region in question is approximately 330 amino acid residues long. 31.60 31.60 33.10 32.70 31.40 31.20 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.80 0.70 -5.27 38 205 2009-09-11 00:12:54 2004-07-28 11:00:14 6 5 101 0 137 214 7 244.00 22 72.95 CHANGED Lhlllhhsllshlhlh.hssht...pohlRslLGlsh..lLFlPGYsLlusLaP................p+psLss.....lERh.sLShGLSIullsLlGLsLNaos.......huIchhPlll.oLshhollhshlAhhRRtphPssctath.hpthh.t................hhhtsssph-phLsllLllullssls.slsaslhhPcpsEpFTEFYlLG.pt....hAtsYPsphhhGpptslhlG.................lsN+EhpshsYslclhLpspphs.......p.ph............slss...p......pohppshphps..hhups.clpaLLapcs...................ppshsY+sl+LalsV ..................................................................................................hhhh.hhhhhhhh..h...........lR...hhhuh.h....lhF.hPGYshlthLaP....................................tttpl.s........l-Rh.slShGl...SlulsshlGlhlshs........hslphhslhh.slshhslhhshluhhR+hph.............h.................................................................h..p.phls.hllhl.llhshh..hhshhhh.s.....tt..s.....ctaothhlls.tt.....hutsYssphh.up...lhlt.................lhNpEhpshtYhh.hhltp..ht.........................th......h......tlst.........s........................tshp..hph..t.........stp...pl.h..hLaptt.........................t.t.shhthhlhlp................................................................................................... 0 19 77 107 +7592 PF07761 DUF1617 Protein of unknown function (DUF1617) Fenech M anon Pfam-B_8981 (release 14.0) Family This is a family of sequences from hypothetical bacterial and bacteriophage proteins. The region in question is approximately 150 residues long and is highly conserved throughout the family. 21.10 21.10 21.40 22.40 20.70 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.92 0.71 -4.14 2 86 2009-01-15 18:05:59 2004-07-28 11:01:05 7 1 53 0 8 64 1 137.70 43 98.75 CHANGED MDLTLKNK-LNTLYpVLDKIKlTNMRANRGRAKLLAKV.sKlsEYAKDEhDlID.YsAKscDDKalhD-+tN.KltDsuKLsELNDhLsELAsE.IVIKGGEYSKRFIDFLpaL.EsEDEFTSpEIlLIDNILEQFEES.KGE ......................MplTl+N+-LsslasVL-cIKlpsh..RAsRGRAKLLuKVhcKlcEYAKDEsDLIchYAtKDcDGK.all.D-+.tNh....KL.........s.D.PsKl.....c.....EhNchLsELusEcIsIcGsEYS+RFhDhlpaLt-sEDEhoup-hll...hDplLEpaEtu................................ 0 2 5 7 +7593 PF07762 DUF1618 Protein of unknown function (DUF1618) Fenech M anon Pfam-B_8857 (release 14.0) Family The members of this family are mainly hypothetical proteins expressed by Oryza sativa. 21.00 21.00 22.00 21.40 19.30 20.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -11.16 0.71 -3.95 61 623 2009-01-15 18:05:59 2004-07-28 11:02:59 9 14 11 0 353 498 0 130.80 21 29.62 CHANGED W.V..DLs.tGlLhCDsh...s..............t......s....clcalsLP.ss.hssttt....................pt.......hptaRs.lul.....ssG....pl+aVplstt.t...............................................ssshhlssWoL..............ssht..Wph-s.plshsclWstcs............apt.t........l.Pp.......h..h..Phl..uh.cs....slla ......................................W.V.D....Lh...t.G.l.l.h.C..D.sh.....s.....................cp.........splcalsLP..sshh.sttp.....................pts.phhR.s..lss.......spG.....pl+aVp.lptt......................................................tshhlssWsL.................sttpWph..-t..pl.s...hsclhttts..................................h.t.t......l..sp...............hh..h..Phls..ps....thh........................................................ 1 0 66 213 +7594 PF07763 FEZ FEZ-like protein Fenech M anon Pfam-B_8854 (release 14.0) Family This is a family of eukaryotic proteins thought to be involved in axonal outgrowth and fasciculation [1]. The N-terminal regions of these sequences are less conserved than the C-terminal regions, and are highly acidic [1]. The C. elegans homolog, UNC-76 (Swiss:Q7JNU9), may play structural and signalling roles in the control of axonal extension and adhesion (particularly in the presence of adjacent neuronal cells [2]) and these roles have also been postulated for other FEZ family proteins [1]. Certain homologs have been definitively found to interact with the N-terminal variable region (V1) of PKC-zeta, and this interaction causes cytoplasmic translocation of the FEZ family protein in mammalian neuronal cells [2]. The C-terminal region probably participates in the association with the regulatory domain of PKC-zeta [2]. The members of this family are predicted to form coiled-coil structures [2,3], which may interact with members of the RhoA family of signalling proteins [2], but are not thought to contain other characteristic protein motifs [3]. Certain members of this family are expressed almost exclusively in the brain, whereas others (such as FEZ2, Swiss:Q76LN0) are expressed in other tissues, and are thought to perform similar but unknown functions in these tissues [3]. 28.10 28.10 28.40 29.50 26.30 28.00 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.55 0.70 -4.66 10 222 2009-01-15 18:05:59 2004-07-28 11:03:54 8 5 82 0 108 181 0 207.80 46 62.63 CHANGED ShEDLVNpFDEKLolCF+NYsssTEslAP...V+....spoEEEhLpD--lWsALTDNYGNlhPlDW+sSasRsLHhssLN...Lt.......................................pussssspl...cpSDDEELpEphDMHolIluslp.......EEPLhTADQVIEEIEEMMQESPDPE---.........ssspS-slShLppElpuLppussss..............sapEcL+pLSsScLsElL-ElEsAIR-YSEELlpQLAlRDELEFEKEVKNSFISlLI-VQNKQKEa+ElhKKK+KhKususp ..................................................h.sLspph-EKlshCFp..s.t.tst...h.As...Vp......ppcphhptpplW.s.LTsNass.h.hshDWcsopscsL.h.hl....h.........................................p..u...s...s.s...h...cts.--EElpEphDhHShI.st..hs.......-EPLhTA-QVIEE.I-EMMQpSPD.PE--.c........................sss.pSc.thohl....p-hp..sh.ps....s.s...............................tE......pl+pLSsocLsElLcElEssI+-aSEELVpQLAhRDELEFEKEVKNSFISlLl-VQNKQKEp+Eh...hKK++K.+shs.............................................................. 0 31 41 69 +7595 PF07764 Omega_Repress Omega Transcriptional Repressor Finn RD anon Pfam-B_63922 (release 14.0) Family The omega transcriptional repressor regulates expression of involved in copy number control and stable maintenance of plasmids. The omega protein belongs to the structural superfamily of MetJ/Arc repressors featuring a ribbon-helix-helix DNA-binding motif with the beta-ribbon located in and recognising the major groove of operator DNA [1]. 25.00 25.00 25.90 29.30 22.70 22.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.31 0.72 -3.61 2 125 2012-10-02 18:44:02 2004-07-28 11:04:54 6 3 97 14 2 49 0 63.30 79 46.87 CHANGED MIVGNLGAQKtKRNDTPISAKKDIMGDKTVRVRADLHHIIKIETAKNGGNVKEVM-.tLcphl+phL.s+h ................hGNLGAQKtKRNDTPISAKKDIMG.DKTVRVRADLHHIIKIETAKNGGNVKEVM-htLcphl+phL.s+h............. 0 1 1 2 +7596 PF07765 KIP1 KIP1-like protein Fenech M anon Pfam-B_2332 (release 14.0) Family This is a family of sequences found exclusively in plants. They are similar to kinase interacting protein 1 (KIP1), which has been found to interact with the kinase domain of PRK1, a receptor-like kinase [1]. This particular region contains two coiled-coils, which are described as motifs involved in protein-protein interactions [1]. It has also been suggested that the protein's coiled- coils allow it to dimerise in vivo [1]. 20.50 20.50 21.50 21.90 20.00 20.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.44 0.72 -4.14 13 269 2009-01-15 18:05:59 2004-07-28 11:06:16 7 8 37 0 153 254 0 66.70 53 8.18 CHANGED SWWWs..SHhssKsSKWLppNLpEMDp+VKpMLKLl-E...-ADSFA+RAEMYY++RPELIshVEEhYRuYRALAERYD ..........................................ppspWLtps..Lp-M-ppVKtM.lKLI..--.........-.....u......DSFA++AEMY..Yc+RPpLlshVE-hYRsYRALAERYD............... 0 16 88 125 +7597 PF07766 LETM1 LETM1-like protein Fenech M, Wood V, Mistry J anon Pfam-B_2202 (release 14.0) Family Members of this family are inner mitochondrial membrane proteins which play a role in potassium and hydrogen ion exchange [3]. Deletion of LETM1 is thought to be involved in the development of Wolf-Hirschhorn syndrome in humans [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.64 0.70 -5.36 13 760 2009-01-15 18:05:59 2004-07-28 11:08:40 8 13 347 1 502 777 35 218.00 28 44.32 CHANGED KcsLhs+lhcElpHYacGh+LLhh-h+..lSsKllh+lhsGtp....LoRREppQLhRTsuDlFRLV.PFusFlIVPFhEhLLPlhlKLFP.sMLPSTFpopsc+p-....Kh+pplpsRh-hu+FLQcTlc-huhpscsphppttp....cFssFhp+lpsssc..sS....s-EIlchuKlFcD-h.sL..DsLoRsQLsuLC+hhpLpshGTsshLRaQL+h+l+pl+pDD+tIstE.GV-uLostELppACtuRGh+uhGlScEpL+-pLppWL-LpLppplPsoLLlLS .................................................h....h.chhp.hpha..h....pGh.+LLhh...-..h+....hut...+lhh..+.h..h....t..G...t.................LoRR.Eppp..........L...h+................p.............htDlh+ll.Ph...hhlllP.F...h....ph....LLP.lh.l.+.h...FP..shL.PSTFpsppp...cp.c...................thp..p..hhth+h.....p..........h.t.p.h.l....pp.ph....ppht............t...t....t........t..........phtphh.....p.pl....ptstp......s...............pplh.t...htp.h..Fpsp....sL...cp...ls..p.........p.Lhshs+.......hh..........l.p....s.....h............s..s..s...h........L....Raplph+hp.lpt-D....ph.IthE.....G...l..p.s.......L...s......tELp.AChtRG.....h.psh..s.....h.......s..t.p..hcp.Lp.....................Wlplphp.ths...Llh...................................................................... 0 164 274 405 +7598 PF07767 Nop53 P60; Nop53 (60S ribosomal biogenesis) Fenech M, Wood V, Mistry J anon Pfam-B_8778 (release 14.0) Family This nucleolar family of proteins are involved in 60S ribosomal biogenesis. They are specifically involved in the processing beyond the 27S stage of 25S rRNA maturation [3]. This family contains sequences that bear similarity to the glioma tumour suppressor candidate region gene 2 protein (p60) [1]. This protein has been found to interact with herpes simplex type 1 regulatory proteins [1]. 24.90 24.90 25.30 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.38 0.70 -5.47 25 375 2009-01-15 18:05:59 2004-07-28 11:11:04 6 8 272 0 244 369 3 306.90 24 83.25 CHANGED SRKuKKAWRKNlDls...-V-psL-chR.-EcIpGGs..luEKss--LFslDopuss.......pltptppp+hpKPLK..scpIL...ps+Splsulss++ppssps.............p+.+pp.lscc-htRLtplspcppstpsph.ssp................aDlWucpss.p...................thshsphshtpss.pslK.tPhplttss....t..lP.ulchscuGtSYNPohE-appLLppttccElctEK+cpchc+ppppthpths........h...t...psh.Esp....-s-scscs...---st..t.-u-h-....sh.stpc.sc+KT+sQRN+p++cKctc+ptc...hp+phKpcppplt+l+uItcclsp+ppphscppcpcp....pcsccpth+p++L.GKhch.-tslEVhLsDELssSLRpLKPEGNLLcDRF+oLQ .......................................................................................................s+ptK+sWRp.hp.....-lpphhcp.p.pp.h.....hG...htph.sp.pLFhlD.t.pt.............................hh.h.h...+.L+...sp.ll...tppot...l.s....t.ht.t........................................p.t....pp..h.p...h.....th....tt..........pt...t........................................hDhWsptt..t..........................................t....h............h...t.......thp....s.phttps............................h..slphstsGtSYNPshpsapphlt.p.t...hph.....Ehpt.c.pp.phpc.......h....hht......................................................ttp.................ptttt.t............tppp.t......p.sthp..........h.t...ht...sc+...KT.ctpRp+.tct+....p.ttttt......t..pt..h+..tp........ppl..p.lctlttpl.tpct...tthttht.tpt............tptpttt......p.......ppL....G+h.........ph..t..sl-l.Ls-ELs......sS.LRpL.KP.pu.slhtDRh+sh................................................................................... 0 86 136 203 +7599 PF07768 PVL_ORF50 PVL ORF-50-like family Fenech M anon Pfam-B_8834 (release 14.0) Family This is a family of sequences found in both bacteria and bacteriophages. This region is approximately 130 residues long and in some cases is found as part of the PVL (Panton-Valentine leukocidin) group of genes, which encode a member of the leukocidin group of bacterial toxins that kill leukocytes by creation of pores in the cell membrane [1]. PVL appears to be a virulence factor associated with a number of human diseases [2]. 21.60 21.60 21.60 21.80 21.50 21.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.90 0.71 -4.10 6 423 2009-01-15 18:05:59 2004-07-28 11:12:33 6 1 235 0 5 197 0 113.60 54 91.85 CHANGED IlplpsK.PhKaspaE...tElhcKpGIosGlVppRV+sGWchpEAl-APhGh+LsEY+EhphpctlcptphEREhtRcR..........R+EAEL+RK...KPHLFN.VPQKHPRG+Yssah..h.NphF.+Khpc ........................................IVpIpsK.PY+FocaE....ELIEpaGITsGMVuKRVK.c.GWcLcEAhcAPcGhRL..uEY+Ehhphc...hl...Ep.tchERchtR...cR.............++EA.ELRRK...KPHLFN.VPQKHsRs.Y..ah..h.NphF.hKhpc............................ 1 3 3 5 +7600 PF07769 PsiF_repeat psiF repeat Fenech M anon Pfam-B_8872 (release 14.0) Repeat This region is approximately 35 residues long. It is found repeated in a number of putative phosphate starvation- inducible proteins expressed by various bacterial species. psiF (Swiss:Q7AH28) is known to be an example of such phosphate starvation-inducible proteins [1]. 20.70 20.70 20.70 20.70 20.20 20.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.57 0.72 -4.41 35 1450 2009-01-15 18:05:59 2004-07-28 11:13:55 9 3 719 \N 184 570 34 34.50 64 64.33 CHANGED A.ssQQpKMpsCNupAssKsLKGD-RKsFMSsCLp ............s.LTPQQQKMpsCNpQATsQuL....KGD-RKsFMSsCLK.. 0 17 67 127 +7602 PF07771 TSGP1 Tick salivary peptide group 1 Fenech M anon Pfam-B_2333 (release 14.0) Family This contains a group of peptides derived from a salivary gland cDNA library of the tick Ixodes scapularis [1]. Also present are peptides from a related tick species, Ixodes ricinus. They are characterised by a putative signal peptide indicative of secretion and conserved cysteine residues. 20.00 20.00 20.70 20.30 19.60 18.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.95 0.71 -4.23 8 125 2009-09-11 03:26:28 2004-07-28 11:18:23 6 2 12 0 13 126 0 108.80 37 87.82 CHANGED MthTslTLVLVSLAFFuoAAAcsCpNGTRPAS-pNREGCDYYCWNssTsuWDcaFFtDGEsCFYNsGscGsCpsGcCHLsT..sSGsPocssDasspPo.......ptPKpKKKKspKsKKPK+poc.KD .........................................thh.hhhsL.V.sl...AF...hs.......s..uAu.c....s.....Cpsus..R..P.uu.cpsR...-GCcYYCa.Ns.t..T.....suW-p.h.FF....s...sGEpCaYs.s...Gp.c..GhC....ps.....G...t....C...Hhss...........suusPs..-s.s.p.ss..sp.........p.............................................................................. 1 13 13 13 +7604 PF07773 DUF1619 Protein of unknown function (DUF1619) Fenech M anon Pfam-B_8790 (release 14.0) Family This is a family of sequences derived from hypothetical eukaryotic proteins. The region in question is approximately 330 residues long and has a cysteine rich amino-terminus. 20.90 20.90 22.10 21.20 20.20 20.50 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.56 0.70 -4.61 16 255 2009-01-15 18:05:59 2004-07-28 12:10:45 6 8 95 0 139 257 1 242.40 26 46.65 CHANGED CsCDLohuhCDlNCCCDpDCsss.stphF.thCh.tshsss.....pphCsp.ppsppopshp..........hsphssshhClhpoNpp.............ssh.shhtpsssss........shsthsps.stsssssthYchGDPlhh..........t.pssuhhplPsshss.utCs....spsVtFLpshs.osCs......hsthp....th.hsphsshhllsssp...........sss.h.hslphshtpshsthlss........sphhs..spsptCsssVlplchphhassss.l.thplhhhtuplshps...tshltpcFplp...Fhpts.ts.ssshSGNsGYlsGpP ...............................................................................................CsCDLpsstCDlpCCCD..DCs......hpl....F...p..........C.sssht.ss..................p.hCst..phh.hs..s..................t.s......hhClp.ss.p...........................sh.t...hh....p.s.ss.shp...................s...h...t..t......t....ssshY.c.Gssl.s............................uhhp....hPtsh.hs.u.Cs..p...........ssPsu.F.......L........s.s..spCsh......t..............sLp.h...t...h....s.....lhth.t..................t.h..pl........hpphs..t.hht.............................................................t.phstsll.phpY.h.hs.....s...s........t.ltphslphh.spl.............s.t..............................lpQ..pF..lp............F...p...s.......................hSGsPGY.hGhP.............................................................................. 1 43 51 85 +7605 PF07774 DUF1620 Protein of unknown function (DUF1620) Fenech M anon Pfam-B_8944 (release 14.0) Family These sequences are mainly derived from predicted eukaryotic proteins. The region in question lies towards the C-terminus of these large proteins and is approximately 300 amino acid residues long. 20.70 20.70 25.40 23.40 19.60 19.20 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.37 0.70 -5.08 35 334 2009-01-15 18:05:59 2004-07-28 12:12:07 8 9 278 0 229 324 4 207.20 36 23.50 CHANGED hsENWllYoYass.........shhphplsllELaEsppsssptsh..............tphoSh........ss.shstllspsalhs.p.tIpphulTpTcpGITsKplLhth.sosplstlP+.lLssRR......sppssss-.pcEth.hsYpshlshss.phhloHppplhshc....................pllosPotLESTollhuaGh..DlFhTR.lsPStsFDlLscsFsK.htLlhTlluLhlsshlspshsppKplpttW ............................................uENWll...YpYass..........................ps+c.clsllELYEusp.t.tspssh..........................SSh......................sps.hP.pVhpQ.oYlhP..p..sIsshtsTtTcpGITo+plLlsh.tostIlulP+..t.lLDPRR..........P..ptsot...........pp..pEEsl.lPYsP.l.lps..ct.hl..saspsV.tl+............................sIhouPo.tLESTsLVhAaGl..DlFhTR.lsPSppFDlLp................csFsh.hhlhhslhuLhhushlsp.hsppKtlpptW................. 0 81 130 191 +7606 PF07775 PaRep2b PaRep2b protein Fenech M anon Pfam-B_4098 (release 14.0) Family This is a family of proteins, expressed in the crenarchaeon Pyrobaculum aerophilum, whose members are variable in length and level of conservation. The presence of numerous frameshifts and internal stop codons in multiple alignments are thought to indicate that most family members are no longer functional [1]. 19.20 19.20 20.10 20.00 19.10 18.70 hmmbuild -o /dev/null HMM SEED 512 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.70 0.70 -6.29 15 107 2009-01-15 18:05:59 2004-07-28 12:13:36 6 2 6 0 61 113 0 199.00 21 57.96 CHANGED LcAlsPtLPtLacLRDALsEFADAF+sVTtEsl++KaGl-huYDVRNEpFFKKL-EllsMsE-YVY+NlsVERsPLDsSGphPKsVIRFKLs.GEElAHIslYWTGpcLpApFsGSREpAERLASII+ALGGcAEVKch.GstWhVpLTTDGIsAIRHsuWLsAVRuFV-ELac......KGLIuc-RY-pLl+-IEAGPNsVKaAGVEFSVhYc....t...s+pIhlcYpPpSEsoKNAAVsALKA+GL+EGVHFTVpc.Gu..YEIRVstE.YsKAlEsLspsGL+cGEaYul.s++RhIpVKs-pKDsllNALKsAGLtEG+cFss+.sG.YhIhITYDGLREIQRMALsGDhEAERFIRcLEDVL+RRaGpsAlpKLhEVLpPAREEGTlD.LPLsVhD-+GNllARVVDLKYEFVc........sspPVspCAGE-CRLRllVEYEs.uGER+QFKhEWYWtc.pcc+GpsTlTYYaEhAtsslKs-VEAAVLKALTG....KuKRGpVhLhAcpL-ALpRFKuL+DA.lDpWRsu+P .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 46 47 47 +7607 PF07776 zf-AD Zinc-finger associated domain (zf-AD) Guo J, Finn RD anon Pfam-B_14442 (release 14.0) Domain The zf-AD domain, also known as ZAD, forms an atypical treble-cleft-like zinc co-ordinating fold. The zf-AD domain is thought to be involved in mediating dimer formation, but does not bind to DNA [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.03 0.72 -3.98 98 2455 2012-10-03 11:22:52 2004-08-16 09:21:17 10 977 74 1 1809 2536 0 74.80 20 13.71 CHANGED hCRlCh......................pppsphhslhppt..........pst........plsphlpphhslplp........ts-shsp..h.lCpsChpplpphhpFcppspps.pphhpph .......................................hCRlCh...................................pptph..hs.l..aptt............................................ttt.....................plsphl.p.p.h.s.s.lplp......................ps-..slPp......t.lCppChpp.lpph...h....p....F+ppshpspphlt..h..................... 0 336 502 1534 +7608 PF07777 MFMR G-box binding protein MFMR Fenech M, Bateman A anon Pfam-B_5000 (release 14.0) Family This region is found to the N-terminus of the Pfam:PF00170 transcription factor domain. It is between 150 and 200 amino acids in length. The N-terminal half is rather rich in proline residues and has been termed the PRD (proline rich domain) [2], whereas the C-terminal half is more polar and has been called the MFMR (multifunctional mosaic region). It has been suggested that this family is composed of three sub-families called A, B and C [1], classified according to motif composition. It has been suggested that some of these motifs may be involved in mediating protein-protein interactions [1]. The MFMR region contains a nuclear localisation signal in bZIP opaque and GBF-2 [2]. The MFMR also contains a transregulatory activity in TAF-1. The MFMR in CPRF-2 contains cytoplasmic retention signals [2]. 20.50 20.50 21.70 20.80 17.60 18.10 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.75 0.71 -4.37 23 187 2009-01-15 18:05:59 2004-08-16 09:23:41 6 6 41 0 63 170 0 162.00 36 47.29 CHANGED MGs.uE-spss.c....spPss...p.....s............psssssY.PDWuu.MQAYYus......s.PsaF.ssl.Auu.s.PHPYMWG.sQ.........MMPPY..GT.P..YsAhYP..GGlYA.HPuhP.us.s.s.h.........stss...sssshoh..Es..sKuopsK-+sshK+.KG.s.....sh.hottsspssKssu.usspshSp..........u-Sus..-GSS.pGSDuN.opsss ......................................................MGs.s-.sps.K..........pcssss..p................s......tsss..sa.PDWus.hQAY.s...................ss.saa.ssh..Auu.t.sHPYMWG..p.....................hhP.PY.Gs..P..Ys.AhYP...GulYA.HPuhs.s.......................s.........sssshuh..Es..uKsstsp-ps.hK+.Ks.......sh.hsttsspssKssusoussthSp.............................spSss..-uoS.-uSDtNopt..t............................................ 0 6 35 50 +7609 PF07778 CENP-I Mis6; Mis6 Wood V, Finn RD anon Pfam-B_17323 (release 13.0) Family Mis6 is an essential centromere connector protein acting during G1-S phase of the cell cycle. Mis6 is thought to be required for recruiting CENP-A, the centromere- specific histone H3 variant, an important event for centromere function and chromosome segregation during mitosis [1,2]. 20.00 20.00 21.60 21.60 17.90 18.80 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.72 0.70 -6.31 5 194 2009-09-11 09:41:59 2004-08-16 09:25:22 6 6 139 \N 135 181 0 357.00 25 61.93 CHANGED MSs.cts+Nopt..R..QsppS.QTsL.sW+httpssspKsh..cupu..LuDpcHssDQ...DuLppAlsYFEKusc+sSpsKcolLcKHLcTlEsVAacsGLsPEuIDlLLDVALSGK..FusAlsTRILKCLIPsotISEDSVVKuVSWLCVG..KCSusIQlLFLRWLIsMFDFID+KcQlspLYGlFFshLsD-sLCPYlCHLLYLLTKKENVKPFRlR+LLDLQuKMGM..QPHLQALLSLYKhFsP-LISlSLPsRKKsaFKNuDs.W+AALpAV+QRNpussP-Pp.+LhLGssss+SpKRKWNppSlIPAlsos+..h.sttK.MSh.D.husssSaPLEQLpSFPQLLpNIH+LEFPSQMuSVLsssLLLHYlNCs+DEslLLRLsYWLoQTLQEECsWhsssNsQcEcEFpsFLDhll+upCFLQEGF.SCEsFLYKSLPLWDGhsCRSQhLpLVoWIPhSoFSElKslLLDcLAQLFFTSolYFKCSlLpsLK-LLQNW.LpWLS.-lplpShosSPh ...................................................................................................................................................................pt...ht...h..ht.ltphAhppGl.sp.lp.Llplh..hp.p....hsp...ss.s.pll.K.sh.hP.s.p..h..lsppslhpslshLs.s.............KsS.shpshhh+WLlhh..a..c..hl.-....pp..p.lp.thY..uhhF.s.Lp..sl..p..lC+L......L.l..l............T+.+cp..................V....+s.......aRlptl....hpL......tph..G.....p..tL.uLLplaK.ahPpll...........sh...s..h...s.....th.t..h.F+p.c..Wptt.h.tlp....pp....p..........t...........s.......ht..........t..........s.tt...+..p.......h.lPshpo.p.......................................s.ppt.h..slEplpsh.phlpplcplE...hPsQhsuhLtsslh...+hl.hh...sp.h....Rh..Wltthhp-.................tt........t...p.tpp.h................hLphlhph.phh..pph..sh.tFhh.p.L.hWsG.p.ps.hltllpahPh.....s.pphh..hht.Ltthh.hsss.....phtllp.hptllppa..h.................h............................................................................................................................ 0 27 59 102 +7610 PF07779 Cas1_AcylT 10 TM Acyl Transferase domain found in Cas1p Anantharaman V anon Manual Domain Cas1p protein of Cryptococcus neoformans is required for the synthesis of O-acetylated glucuronoxylomannans, a consitutent of the capsule, and is critical for its virulence [1]. The multi TM domain of the Cas1p was unified with the 10 TM Sugar Acyltransferase superfamily [2]. This superfamily is comprised of members from the OatA, MdoC, OpgC, NolL and GumG families in addition to the Cas1p family [2]. The Cas1p protein has a N terminal PC-Esterase domain with the opposing Acyl esterase activity [2]. 27.30 27.30 28.90 31.10 24.90 23.70 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.78 0.70 -6.03 8 235 2012-10-02 17:00:17 2004-08-17 10:03:26 7 11 127 0 174 248 4 389.60 33 62.91 CHANGED -hhph............sDGTCCpshcssTslQtlshshlhlsh.hhhh...hhhhphs.thpspshhsshp..pp.ttt..tp............suhpslhsuLspLGLIMAYFYLCDRTshFMKENKaYochsFalPllYlhVLGLFasEso+.-TKVLNRDQTDEWKGWMQLVILIYHhoGAS+lLPIYMaIRVLVAuYLFhTGYGHFoaaWp+GDFG..hhRlsQVLFRLNFLoVlLChsMsRPYQFYYFVPLVSaWahVlYsTLAlsPplsupsspsN.h.ahhLLlKlsshhshITlLhhSpshFEclFslhPLctlFsl.cssl+EWWFRWpLDRYlVaaGMlaAhhaLthQ+tplhD-sptssLhSpp.ht..hlllullullsYhsashsCcsKh....pCNElHsYluhlPIluFllLRNIoGhLRSpYSoaFAWFG+ISLELFIsQYHIWLAADT+GlLVLIPGsPsL..........NllloTaIFVClSHEVuplTspLsphhVPpDtppsh+phh..hshFsus ..................................................................................................h..............................................................................................................................t.hh.uhsphuhlhhYaYhCD...................RsshahcppK.Ysc......Fh.h....h.h.h..lh....s...h.u.h............h.h.c..............p..........p..................p......p...............t..t..........h........LNRcQT-EWKGWMQ..........llhLhYHhh...uAsp.....lY.tIRl.hlAuYlahTGaGpFoaaah+.tD...Fu....................lhR..........hsphhaRLNFhsshhCl.shspsY.hYYhsPhhohah....hhlY.s.slulhsph...sptt................hhhhKlhshhhh........lhhl.h....s.........hF-hl....a......h....phhh........t.......h....p.......s.........p.........lpE.....WhFR.tLDRa.hhhhGMlhAh.........ha.t.......h.....p..c...h.................c..t.........tt................ht......................t....h...........hhhhu.l.hs...h....h.h.Yhh.a.t.........h...p.sKh....phNp.hHPah.ShlPIhs.........alhlRNhst.hRsh...shFA...WhG+loLEhal...QaHIW.....L.............t......u......s..s......+h..lLsl........l.P..s...shl.................Nhhlsohlalhluacl.phTs.Lt.hhls.p.p...................hhhh......................... 0 79 106 146 +7611 PF07780 Spb1_C Spb1 C-terminal domain Fenech M, Bateman A anon Pfam-B_5001 (release 14.0) Domain This presumed domain is found at the C-terminus of a family of FtsJ-like methyltransferases. Members of this family are involved in 60S ribosomal biogenesis, for example Swiss:P25582 [1]. 25.00 25.00 27.40 25.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.54 0.70 -4.85 35 314 2009-01-15 18:05:59 2004-08-17 10:07:12 7 9 273 0 235 323 2 238.50 34 30.26 CHANGED pssscc-s-hE.................h..ttt.tpttc...................................chh...........................sAEshuLuppls.sKKo+cDLID-uaNRauFpD.....cp...sLPcWFl-DEp+HsK.phPlTKEtstthKp+h+tlNARPIKKVtEAKuRKKhRuh+RLEKh+KKAssls-ss-hoE+-KucpIp+LhcKss+ppp+ppp......plVVA++s........spshsuRPpGs+G+aKhVDsRhKKDhRAhKRht...K....K ....................................................................................................................................................................................ttptpps.......t.-.................tttt.t.p.t...t.ptttt...tt...c..........................................phchh.......................................................ssEuhALuppls.ucKs+c.DllD.suaNR.YsF.pD..c-........sLP-WFl-DEp+H...p+.phP.lTKctstth+c+h+tlNARPIKKVtEAKARKKh+shp+LEKh+KKuphls-sschoE+-Kspplp+lh+K.As.pp.cp+pth......phVVA+ts........spt.hstRPpG..l+.G..+aKhVD..sRMKKDhRAt++ht+.t.............................. 0 87 135 197 +7612 PF07781 Reovirus_Mu2 Reovirus minor core protein Mu-2 Moxon SJ anon Pfam-B_9308 (release 14.0) Family This family represents the Reovirus core protein Mu-2. Mu-2 is a microtubule associated protein and is thought to play a key role in the formation and structural organisation of reovirus inclusion bodies [1]. 25.00 25.00 280.80 280.50 16.90 16.50 hmmbuild -o /dev/null HMM SEED 735 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.18 0.70 -13.19 0.70 -6.56 5 53 2009-01-15 18:05:59 2004-08-17 10:08:56 6 1 24 0 0 56 0 726.00 44 99.09 CHANGED MAYIAV.PsltVuSRsTsLlsoIDuhsscsst-tpDVushDPoalLRQLEhhSuGhosuDllcALlH+cWh+poshsLLPs++pLL-YLLSNPSAsPDslDRopLKuhhhpK+hssD.........F+IpDaauPLIosSTuluThoRpLNAuplVYosTsKVlGARL+LaAPAKYYuuoLSsppppuIlPSoccltssP+uRlsVosFPSloTs+CFVLouVDt.ssPsh..sVcaIQLhYpRshuV+ARhLsD..PlAluchlscpuLKsphssPoDA..RAARLstlRsputosPsGlNsStlsVVDLphph.ssscuLtsVsRsh+LTlHuVPSCLLphL-ITluDc.YPIRpEoGMFssWFLlLTLhSDclTDuRT+puVhLpPuSsuscslsaVplsusVSsRspSVtussusalcuVGLsLPKGSFKSTMI+sLsuLpIsGspVM.ussVlDSD-VGDSLcPTFETA...lY-tLtuLDP..lDDllKlALuTDLlsp-ssloplasoFLcLssELLTPtARchYspposEGRoLTFAHADSEhLNANasu+LlRstIPYacEVNILlRPNRVGGsLFQVlLSYCYKMaATSssosPhGtLLKpLFsPWLcussLlusLsPscoSAsluWaIPuchhsssGWChC-D.taloas.IRuhPL-lSVLctacWuRa+AsIlV-co.....LVplGuDhRssthuVhassatPsVcLlophAsFTLss+YclpL...........sCusooGRsasAcNs+LsloosG ..........................MAYlAl.Psl.VsSRposhlshl-uhs.lcsth-h.sDVthhD.shlLcQl-hhssGhpssDlhcuLlH+sWh+pSlhsLLPs+ppLLEYhhSNP.S.uhPDslDRphLKth.......Kch.pD.........F+lpDaauPLIosoTShhThsphLNst.IVYoTTc+VlGARlpLauPtKYYsho..uhhpphsIl..scclhsVPpuRhhVGsFPShuTspC.VLouh-h.stPsh....hchIpLhYp+.hp.VpAphLsD..P.lsuhhlsp+pl+st..sPs-t..RAAR.htlphp..stsssptlssthlpV.VDlhhph.sstcuLhsspR.h+LThhuVPsClLphLslp.luDt.hsIRpEsGMFs.WFLlLThhSDtlpDsRsp.tlhlpPSSssspsl.plplTuhlst+s.slhsshhsh.lcsIGlshPKGSFKSThh+hLsulpIsGsp.lM.pssVlDSD-VGDsl-PTFEpA...lYcult.ulDs...-DlhKhshsoDLlsp-.hh.uplassFLtLsp-LLsPhARchYspphsEuRsLTFAHADSEhLNAsasu+LhRshIsYhpEhNlllRpsRlGGsLFQllLShCYKMaATSssspPhu.hL+tLhsPWLcst.LluphsPspoSt.luWaIPsphhhpsGWCsC-c..talsas.IRu..hslp.LphhsWupa+Apl.Vssp.....LV.lGtshRssthuVhhssphP.VclhsphAhFT.ht+YchpL...........pCspusGRs.tAhNh+Lhhpo.................................................................. 0 0 0 0 +7613 PF07782 DC_STAMP DC-STAMP-like protein Fenech M anon Pfam-B_9122 (release 14.0) Family This is a family of sequences which are similar to a region of the dendritic cell-specific transmembrane protein (DC-STAMP, Swiss:Q9H295). This is thought to be a novel receptor protein that shares no identity with other multimembrane-spanning proteins [1]. It is thought to have seven putative transmembrane regions [1], two of which are found in the region featured in this family. DC-STAMP is also described as having potential N-linked glycosylation sites and a potential phosphorylation site for PKC [1], but these are not conserved throughout the family. 25.00 25.00 26.90 26.90 24.50 23.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.94 0.71 -4.68 19 281 2009-01-15 18:05:59 2004-08-17 10:10:02 8 6 81 0 182 260 0 173.30 25 29.66 CHANGED F-NhYlTctFhph-pcccchsttslLPLpppEcpphlplsphphotp..Ehhplhhphh.lhlphlthshhlhlDahlapllshlpp..........h...phps.ttlplpl.sGsuhhuclh............cphhpsFpshp...phshphss..pcC.lPpPph.shtshlhlslLhLhshhllhlpsYshRlR+lIsu.FYPpcE+cRlhaLa ......................................a-NlYITp.FhphDtpcpphtp.s.lLPLpptE..p.p..phl..spht.l.stp..Eh.hp.h.hhthh.hhhhhhhhhlhshlDahlahllphhpp.............................................php.s..hlplpl...tGtuhhuplh.............+phh.t.shsh.p...phs.hphss...pC.LspP..p..stpta.hhl..sllhhlhhhlslhpsashRLR+.llsu.a....aPppEc.cRlhaLa............................................................... 0 45 59 112 +7615 PF07784 DUF1622 Protein of unknown function (DUF1622) Fenech M anon Pfam-B_9062 (release 14.0) Family This is a family of 14 highly conserved sequences, from hypothetical proteins expressed by both bacterial and archaeal species. 19.90 19.90 20.50 20.50 18.60 19.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.37 0.72 -4.33 51 422 2009-01-15 18:05:59 2004-08-17 10:21:47 6 2 358 0 127 289 25 80.30 35 64.72 CHANGED Gslhuhh.......phlttthtpp.......stsa.....p.plRhplGphllLGLEFhlAADIlpTsl.sPohp-lhhLusIllIRThLuYFLs+El ..........................Ghhhuhhphlp..hppp.......spth.....p.tlRthLGsalLLuLEhLluADIlcTll..pP.Thp-lhhLuslllIRTlLSaFLppEI...... 0 44 91 112 +7616 PF07785 DUF1623 Protein of unknown function (DUF1623) Fenech M anon Pfam-B_9100 (release 14.0) Family The members of this family are all derived from relatively short hypothetical proteins thought to be expressed by various Nucleopolyhedroviruses. 25.00 25.00 61.90 61.60 23.70 23.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.17 0.72 -4.03 13 31 2009-01-15 18:05:59 2004-08-17 10:24:15 6 1 30 0 0 26 0 87.60 39 88.90 CHANGED LoAaVLhVsNs.sh...p.p.hYhthLc+asVlDulMCs.NGDCLAVslossshhsp...............p.ppLcll-...pas.psl-hLh-KIYsIV-hYN .LhsaVlhlsNs..ch...p.p.IY.pYLp+asVlDulMCs.NGDCLAVsVossshlpp.....................psLcll-...pts.psl-hLp-KlasIlEhYs 0 0 0 0 +7617 PF07786 DUF1624 Protein of unknown function (DUF1624) Fenech M anon Pfam-B_9180 (release 14.0) Family These sequences are found in hypothetical proteins of unknown function expressed by bacterial and archaeal species. The region in question is approximately 230 residues long. 20.60 6.20 20.60 6.20 20.50 6.10 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.62 0.70 -5.01 49 1276 2012-10-02 17:00:17 2004-08-17 10:26:21 7 9 944 0 477 1471 482 201.10 17 57.55 CHANGED RhhtlDhhRGlAllhMlhaHFsaDLpaFGhhshsht.sshathhup.hlAshFlhluGlSLsluhspshph..........ppah+RhhplhuhAhlIossTa....l..hhP.pual.hFG..................ILHhIu....luslluhhF..h+hshhshhhhuhhhlhh........shhhtt...................hhsss.....hLhWlG....lhs...tsh..ho.DYhPlhPWhGlhLhGlshuphhh.p...tpht...hst.hhhtsLsh.l..GR+SLhl..YLlHQP ...........................................................................RlhslDhhRGls.llhMll......s...p....h......h.....h.................h..........t......................h..h.....h...............h.........................t.....h............................................h.......h....h..h....h............sp....h...........s....h....sh..Fl....hls..G..h..ohh..l...h..ht.p.t.th................................................tth.h..p....R..s..l.....h...l.....h...h...h..u..h..h.....l.s.h.h.sh.................h......h.hs......ph....l....hh...s...................................................................................................l.Lpt.l..u....l.s....h..l...l.s.h....h....h............hp.h......h....h....h....h.h..h.h....u..h.h.h..hh.h..........................hh.....................................................................hh.t.................h.......hh........h.......................................s..............h........sh.h.......s...hh.....sh.hhhGhhhu..h.h................................................................................................................h..h.................................................................................................................................................................................................................................. 0 161 323 402 +7618 PF07787 DUF1625 Protein of unknown function (DUF1625) Fenech M anon Pfam-B_8907 (release 14.0) Family Sequences making up this family are derived from hypothetical proteins expressed by both prokaryotic and eukaryotic species. The region in question is approximately 250 residues long. 20.30 20.30 20.30 21.00 20.00 19.80 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.49 0.70 -5.13 22 173 2009-09-11 05:28:20 2004-08-17 10:32:38 7 4 130 0 114 183 5 235.90 27 61.61 CHANGED MYQWVEppppcp.pp.ss..p....oYpYsppWpsphlsSppF.t.pGHpNPss.h.lpupshhs.pV+lGsahLupslhpplss.hcslshss................pssttphphpsshhYhsps.stpPpl.....................GDlRlpFphs............ss.sollucQpss......plhPapopss............................................ppl..hlh.GphoscclFp...pttppsshhTWhhR..hsGalLhFlG..hhhhhs.lthLsshlPllts.............lsshshhlhu..hhluhslsLhsIAhuWlhYRP ...........................................................................MYQWlEppppcphpp.stt.p...........ppYpYsp-Wpschls..SppF..ppthGHp............NPs.....t..hslcSh..shhAstVplGt.ahLusslhcchss..hptls.h..sp...........................................ps.pssl.ph....psshhYaups...stpPpl.............................................................GDlRlpFphu...............ss.ph.soVl..u..cQ.p..us........plhP.ap..opsG.....................................................pplh.ll..t..Gp...h..oscchFp...ppp...pssshhTWh.hR...hhGahhhFh.G..hthhh....ph....lhhlh...s...hhPh.hts................lsshshhh....hs...hhhuhslslhsluhuWlhaRP............................................................................................................................ 0 49 66 89 +7619 PF07788 DUF1626 Protein of unknown function (DUF1626) Fenech M, Sammut SJ anon Pfam-B_9705 (release 14.0) Family This is a family consisting of sequences from hypothetical proteins of unknown function expressed by certain species of archaebacteria. One member (Swiss:Q9YCN7) is thought to be similar to tropomyosin [1]. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.14 0.72 -4.19 18 128 2012-10-11 20:44:44 2004-08-17 10:41:17 6 6 56 0 61 129 12 70.70 35 27.66 CHANGED ScVElDlllKDGplIllEIpSSlpRGDlhhlcRKs-LYE+scuhKhs+llllTPaI--R....s+thAcclGIc ....pplElDl.ll+sGhhlllElKSulc+uDl..h....htRKschYE+.hps++ss+l.llloPhlcc+....uhthAccLGI-.............. 0 26 36 45 +7620 PF07789 DUF1627 Protein of unknown function (DUF1627) Fenech M anon Pfam-B_9837 (release 14.0) Family This is a group of sequences found in hypothetical proteins predicted to be expressed in a number of bacterial species. The region in question is approximately 150 amino acid residues long. 25.00 25.00 27.80 27.10 24.10 23.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -4.68 4 472 2009-01-15 18:05:59 2004-08-17 12:03:29 7 1 171 0 4 267 0 143.60 65 64.12 CHANGED IEQ+GPQTADELAshFGsTSRKVASTLAMAISKGRLIRVNQsGKFRYCIPGsNLPAEPKAASVocsDGKAFPQPsGsALPVtEAATQE-IKTEoVAslVQs.PSFTcppsDtLlhPSL+hANhtLRRAKspVQKWERVCAALRELNKpRDIlRpI ...........................................ItQcGspTs-ELAshFGlo..oRKVASoLAhshupGRLhRVNQsGKFRYCh...P...G...ssLPAEP.KAAsVs..EoDGKAFPQPA.GsALPVpEAAT.QE-IKTEoVA-hVQshPSFT..cs.ps..DsLILPSLHhANRELRRAKupVQKWERVCAALRELNKHRDIlRpI....................................... 0 1 1 2 +7621 PF07790 DUF1628 Protein of unknown function (DUF1628) Fenech M anon Pfam-B_5107 (release 14.0) Family The sequences making up this family are derived from hypothetical proteins of unknown function expressed by various archaeal species. The region in question is approximately 160 residues long. 24.20 24.20 24.30 24.60 23.90 24.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.96 0.72 -3.23 48 295 2009-01-15 18:05:59 2004-08-17 13:02:07 6 14 62 0 237 307 97 81.90 25 31.71 CHANGED cAVSPVIGVlLMlulTVIlAAlluuFlhuh....ssspssPp.ssl...........p.p...hssssst...............lplpHpGG-slss...pslplh...lsssss .........puVSPVlGVlLM...lulTVIl.AAllus.as.huh.....sss.sppsPp..ssl.......................................php.........hssssst....................ltlpa.tG.G.-.s.lsh...pplplhh.....s.................................................................................... 0 40 200 223 +7622 PF07791 DUF1629 Protein of unknown function (DUF1629) Fenech M anon Pfam-B_9012 (release 14.0) Family This family consists of sequences from hypothetical proteins thought to be expressed by two members of the Xanthomonas genus. The region in question is 125 amino acid residues long. 22.10 22.10 23.50 24.80 21.40 20.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.58 0.71 -4.24 13 56 2009-01-15 18:05:59 2004-08-17 13:46:31 6 1 29 0 16 57 1 119.60 48 57.33 CHANGED ctGEFahLcsDhcusGsspGVVFENc+pLLoPPRLILRPc-GGFPsLREpP+LsY-PspGs.PcDLEuGFSGYWLVSERL+pVhtuVDP-AFAFA-sDaRLADGopGPRaaLCDVVRpLDALDE .....................................GpFahlpssh.tst.spGV.FpN.cpLlsPsRlILpP.c.pGGFP..sL+EpP+LlYcPscGs.P.cDLEsGFSGYWLVSERL+pVhpsVDPcAFAFA-sDaRLAD.G.o.tGPcaaLCDVVRpl.DALDE. 0 2 5 8 +7623 PF07792 Afi1 DUF1630; Docking domain of Afi1 for Arf3 in vesicle trafficking Fenech M anon Pfam-B_9160 (release 14.0) Domain This domain occurs at the N-terminal of Afi1, an Arf3p-interacting protein, is a protein necessary for vesicle trafficking in yeast. This domain is the interacting region of the protein which binds to Arf3, the highly conserved small GTPases (ADP-ribosylation factors). Afi1 is distributed asymmetrically at the plasma membrane and is required for polarized distribution of Arf3 but not of an Arf3 guanine nucleotide-exchange factor, Yel1p. However, Afi1 is not required for targeting of Arf3 or Yel1p to the plasma membrane. Afi1 functions as an Arf3 polarization-specific adapter and participates in development of polarity. Although Arf3 is the homologue of human Arf6 it does not function in the same way, not being necessary for endocytosis or for mating factor receptor internalization. In the S phase, however, it is concentrated at the plasma membrane of the emerging bud. Because of its polarized localisation and its critical function in the normal budding pattern of yeast, Arf3 is probably a regulator of vesicle trafficking, which is important for polarized growth. 25.50 25.50 26.00 25.50 24.90 25.20 hmmbuild --amino -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.76 0.71 -4.18 31 156 2012-10-02 14:18:06 2004-08-17 15:51:17 7 6 144 0 117 210 0 149.80 36 19.68 CHANGED pV-YILlApFDlD+GPlhcHQYPssl.sG..............sEphLAcLMLPDpsHsR.sp...DaTlFhLacss.spt.p...........ttt.......................pt.t......................................p.....tt....s.hahlsllpsptDpshcRGuhlKuhAlsTphshlaha+..sllhhsL- ............................................pVEYILlApFDlD+GPhhcHQYPssIsG...................................DEphLAE.......LMLPDpsHhRsp.....DWTlFa.La+sssspptp.............................t............................................p..................................................................................................sppptpsscs.....sshhYllNlVs.sppDpos+RGAhlKuhAIsTphsahplaKPlLllsL............................................................................................................... 0 42 72 103 +7624 PF07793 DUF1631 Protein of unknown function (DUF1631) Fenech M anon Pfam-B_9170 (release 14.0) Family The members of this family are sequences derived from a group of hypothetical proteins expressed by certain bacterial species. The region concerned is approximately 440 amino acid residues in length. 19.70 19.70 19.70 19.80 17.50 19.30 hmmbuild -o /dev/null HMM SEED 730 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.13 0.70 -6.40 36 372 2009-01-15 18:05:59 2004-08-18 12:44:36 6 9 187 0 125 418 96 617.90 21 89.36 CHANGED ssptpsst.....sphPslLhpl+-pstppLtphLpshF-ssDDsLF-hA-+Atss.p-QshaF-AMR-LRh+RcslppsFhppltpuFss.Lspspssss..shs...hs.ssLoLVppc-LEcplAlcsMls+spschstsLtpLstRLshLh.sshplppcsNPLuPptLCpsFhcAs.psLslsl+s+LlllKLFERhVlsplsplYscANplLlpsGVLPcLp.s....................................hppspsss.........sssptsssssssssspsuss.ttttt.......................................pshFs....tLppLLtpsRssttsst.ssssss............................tslssp-LhphLupLQ...........t.ssshsssp.......hslpp.lppLLpphp..spsGpppslupsD-DlINLVuMLF-aIL-DcsLsssl+ALIuRLQIPlLKVAlhD+sFFs+ssHPAR+LLNplApAuhGWsspss.htcDsLhp+lpplVpRlLs-FscDsulFs-Lhp-FptFhpp-cRRs-llEpRsp-AEcG+s+tctA+pplpptLpp+ltsps.LPpsVhplLppuWocVLhLshL+cGcpS.cWppulplhDcLlaslp.pp.pspsppplhphlPsLLcsLRpGLppl.uacshpssphhppLcphthpshps........................................................................................t.st.stpssstspss-clsssttppsttttt....................stppphlpplcpLclGoWlEhtccc.tchhRsKLuuhl.csos+alFVNRpGhKVtEtotpsLAhthppGpl+lL..Dcu.hLFDRAL-uVlssLR ..................................................................................................s..................h...t.hht.hh..ht.hh.thhtth........tpt....Lhp.upputss...pt.h...hts....h....ptl.c.....pt....tth..t.thhtth.pthtt....h.t.t....t................t.ttL......sLltpppl-pplhlpthstth.pp.t..L.t.Lp.tRlshlh.......thttpp.PhtPthlsthhhpsh.tth.s.lt.p..sphhlhchhcp.lhtt.hsthYtphsphLhttGl.hP.t.h...................................................................................t....t.t..................t...t.....t.......s..............................................................................................t.hht.....tlpthht....t...htt............................................................................h.s.tttlhp.h.Ls.tlQ...............s...tt....................h.tt..htp.llpt..........tp.t..tthls..-pss.....ls...........lluhlFp.lhp-tplsssh+shlu+LplPh.l.+.l.AlhD.pF.FsptpHPARcLLsplupsshuhss.pss...t.p...ptLhtc..lpphVpp...lhp.pap.p.s..slFt.p...h.t-ht.t.a...h....p..........p....t.tpc...sph.hpp+hhc.stc.upt+hph....Ac.....p....tstttlpp.....hh.t.st.........s....lPph...l.phLppsWtcllhh.sh.l.+..p....G......tpu....thpt..h............h..t.hhspLlhshp.tt.......tttt.tl.t.....h.stLhpt..lppultph...u.hs..tttthhppLpp..hhhtshtt.....................................................................................................................................................................................................................................................t.........t...hh.pp.hs....ttt..................................................tpthh..t.h.cplthGsWl.-h.....ppp..tphhps+Lshhs...ssphlF...ls.ppG..h+h.h.h...shttLAh.hpt..Gthphh....ppt...lh-cAhpssltt............................................................................................................ 1 33 80 108 +7625 PF07794 DUF1633 Protein of unknown function (DUF1633) Fenech M anon Pfam-B_9750 (release 14.0) Family This family contains sequences derived from a group of hypothetical proteins expressed by Arabidopsis thaliana. These sequences are highly similar and the region concerned is about 100 residues long. 20.40 20.40 34.20 29.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 790 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.40 0.70 -6.58 2 28 2009-01-15 18:05:59 2004-08-18 14:02:52 6 5 2 0 0 28 1 198.90 20 84.20 CHANGED MpRLFSAPHLTsQIRRGEEISsISKIVVMMA.LNNFP.LH.hSRQLTpLs............................................SDh-DSQNKSNHSYGDssSSS..Rst.......+RVITLGGMGPIRpPSTRsDKPKKKKAsKQtGuhPMTNsDL.................Vs.cRtstVslpl+.hLpc.................P.L+L.pNVS......sG..shYsshhtcptlhhPhPAu..........Rs+-V...............sLSthcLt..hLLhpG......................................pschuTFsRDR.VsALslsPAsPsIs.PsAQ.DPlEEVPpl..hPpA.h.h...ls.sSsSTSp........Cso..........pARAsDLSAsVsAARsoLAsSpspASsSHPSLPtsN.......................................susstAAVul...................................uAsN+.......................LlthhctR.sQVP..............oupphp.GcphhRc.sspltssp-REVp.tAc.....h.RLKhELSTSKDLEKGYAEKIthME.EFtGLpADKQhARsQIHRLpp++-ELSK+VhDLTS.AQGspKAVHDAKVELAAuY.KLLuGIK-KWVsKKEaTVLEuQAAEVESNLALIDQIsKAAlDLTlEKPRhQAElDDlEARCp.KEVSDFTLSKLDlPcVSEh.VVRPhsVDEQGTPIGLDEFGSNKDsFPtGLt-ssGTVFAhPAGtS+E ......................................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +7626 PF07795 DUF1635 Protein of unknown function (DUF1635) Fenech M anon Pfam-B_9707 (release 14.0) Family The members of this family include sequences that are parts of hypothetical proteins expressed by plant species. The region in question is about 170 amino acids long. 21.40 21.40 22.20 22.20 20.40 20.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.27 0.70 -11.66 0.70 -4.96 5 89 2009-01-15 18:05:59 2004-08-18 14:04:57 6 7 23 0 64 90 0 190.00 30 60.51 CHANGED cThEELRQoLlYTThELEQTKMhAsEElRp+DEQlhpLcDLLsKTlKERDEApE+hp+LLhcs..L.Qpp......................pDEQlpPNp.pLspsNSFSSSDsE...ESIsSSp.psh................-PsspppLcclsss-hLh..ll.-KsLPEKGKLLQAVlKAGPLLQTLLLAGPLPQWRHPPP.LcS..FEIPPVo.....ls.tCP.hossGCG.NFN+KRVa.luDtShsETKYQR.LLc ........................................................h-EL+ppLhhsohEL-thp..Ap-Eh++pcpplhpLhcLLptshpERDEApcphppLh..htt..h.t.t.................................................pspp.......tp....pp...u..hs..s.Sssp.....pshsSs....h........................................t..st.t.......t............t...phh....hs..t+sLPpKG+LLpAVhcAGPLLQTLLlAG...........PLPpWRpPPP.hp..s..hplPsh......................................................................................................t................................................... 0 12 47 56 +7627 PF07796 DUF1638 Protein of unknown function (DUF1638) Fenech M anon Pfam-B_6091 (release 14.0) & Pfam-B_3149 (release 23.0) Family This family contains sequences covering an approximately 270 amino acid stretch of a group of hypothetical proteins.\ These proteins are expressed by archaeal species of the Methanosarcina genus. 20.40 20.40 22.10 21.40 19.50 18.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.06 0.71 -4.61 55 232 2009-09-11 07:42:39 2004-08-18 14:25:11 6 8 168 0 105 231 180 166.10 23 67.84 CHANGED phLsu.tLHstP-clpstlppslpchp...........sth-pIhlsYus..CGsu.....Gtlppph..schG.........lhh........htuscChuhhh...Gssphttch....schssFYLTshhscpa-s..............hhhcshGhDcpspLc....phhaup.Yp+llals..........psc..............-.thppcscchAsplGLsapch.sshGsLptslpsh .............................hLsstLHsp.P..-clpptlpptlcphp............th-p...IllsYGt..CGsu.....Ghlspthp..t.........lhh............psscChshhh...Gspphttp......pchssaa..LTs....hhhcpacs...................hhhcthGh-ct.splh.....chhaspYpcllhls........ppc......................-..htppscchAchhslshphh.sshs.Lpphlt................................................................................................. 1 44 83 89 +7628 PF07797 DUF1639 Protein of unknown function (DUF1639) Fenech M anon Pfam-B_6036 (release 14.0) Family This approximately 50 residue region is found in a number of sequences derived from hypothetical plant proteins. This region features a highly basic 5 amino-acid stretch towards its centre. 22.20 22.20 22.20 23.30 20.90 21.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.55 0.72 -4.57 20 205 2009-01-15 18:05:59 2004-08-18 14:57:55 9 6 19 0 117 191 0 49.90 50 19.35 CHANGED LS+cEIEEDFhAMpGs+PPRRPKKRsKsVQKpLD...............hl.PGhhL..sclos-pY .LSp+EhEEDFhAhpG.sK.PpRPKKRsKhVQ+pLs........................hlhPGhWL..s-lot-pY................................... 0 14 66 95 +7629 PF07798 DUF1640 Protein of unknown function (DUF1640) Fenech M anon Pfam-B_6194 (release 14.0) Family This family consists of sequences derived from hypothetical eukaryotic proteins. A region approximately 100 residues in length is featured. 24.20 24.20 24.20 24.40 24.10 24.10 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.04 0.71 -4.23 24 513 2009-09-11 19:38:48 2004-08-18 16:04:56 6 13 255 0 324 490 8 162.70 26 59.06 CHANGED hFDTpthVp.pL.Ecs...............GFsppQAEslspslscllpsulpplspshVo+tphpp.........sthpQcschucl+s-lh.......................p...............h-+oEFsslpsEpE+l+sDLc+L+s+L+-Els+spAuh+LDLNLEKu............RlR-Ehupp-hK..................lp-hss+IDpElusL+splEosKhpslpaLhGsssushAlhLuahRlhh ..................................................FDThthVp.pL.c.pt.....................................G.FsppQApslhpslpp...llpss.....l.pt...l.....t.p.s.....h.ls.+.tc.hcp......................tthp.psshuc..l+s-l.........................................................................ccsch...sth+sppc.+lpt-lpplp....p...cLpp.E.l.sc.hps...sh+.LDhNhc+u..................cl+-..stp-hc...........................l.p-hpp+I....-p........-.l.s.s....l+tt....lE....s......hKhp....sh.p.a.h....h..u.h.hs.hhslhhuhhh.......................................... 0 78 163 247 +7630 PF07799 DUF1643 Protein of unknown function (DUF1643) Fenech M anon Pfam-B_9851 (release 14.0) Family The members of this family are all sequences found within hypothetical proteins expressed by various bacterial species. The region concerned is approximately 150 residues long. 20.20 20.20 20.40 20.50 19.80 20.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.87 0.71 -4.47 57 556 2009-01-15 18:05:59 2004-08-18 16:06:40 7 5 475 0 91 385 369 133.70 33 76.28 CHANGED RYhLpRpW.........s..tsttpllFlhLNPSpAsttpsDPTlcRh.phA.+sh.GaGuhhlsNlFAhRuTsPpsLpp............ssDPlG..sc.NDthlhchsp.hu....spllsAW.GspGt....hhsRs....ppVhchLpst.............lhpLGlo+sGp.PcHPL ...................RYhLp+pW.........s....tpc.sshhIsh.P...shs.....s....s.....hp..DhTsth.lhsh.............h.....ps.....................saG...ulhllNLFu.hp.T.Pc.sLc.c...............hp.c.Phs.....pc..sDhplhc.sls..cu.............-pVlhAW.Gs....huc.........hhpRs......ppV.h.chLcsp...................tttlhp.l.h........s.tssc..hHPL....................................................... 0 21 51 76 +7631 PF07800 DUF1644 Protein of unknown function (DUF1644) Fenech M anon Pfam-B_5078 (release 14.0) Family This family consists of sequences found in a number of hypothetical plant proteins of unknown function. The region of interest contains nine highly conserved cysteine residues and is approximately 160 amino acids in length, and is probably a zinc-binding domain. 25.00 25.00 26.00 31.70 24.50 23.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.89 0.71 -11.18 0.71 -4.38 18 198 2009-01-15 18:05:59 2004-08-18 16:09:58 7 4 23 0 117 186 0 146.10 44 52.08 CHANGED -DspCPlChEaPHNAVLLhCSSacKGCRPYMCsTStRHSNCL-QF++uhsctts.sp......................................................................................t.pppc..cLsCPLCRGcVpGWpVVcp.ARpaLNpK+RoC.p-sCsFsGoYp-L+KHs+pcHPsu+Ps-lDPscppcWcpLEpcp-htDllS ................................................cspCslCh-hPHNAVLLhCSSacKGCRPahCsTshp+SNCL-..pa+puhsp.t..........................................................................................................................pppp..pLtCPLCRGpVpGWhll.c..ARpaLNtKpRsC.p-sCsFsGsYpEL+KHs+pcHPpu+PpclDPs+ptcWcphEppp-htDllS................................................... 0 18 64 91 +7632 PF07801 DUF1647 Protein of unknown function (DUF1647) Fenech M, Pollington J anon Pfam-B_5249 (release 14.0) Family The sequences making up this family are all derived from hypothetical proteins expressed by C. elegans. The region in question is approximately 160 amino acids long. The GO annotation for this protein indicates the protein to be involved in nematode larval development and to have a positive regulation on growth rate. 20.80 20.80 20.80 21.60 20.70 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.66 0.71 -4.69 15 96 2009-01-15 18:05:59 2004-08-18 16:22:08 6 6 28 0 90 84 29 119.60 29 31.81 CHANGED CtChSspoGKoYsFCYpsPpNssSIGKKFsCuhLsTLEcLsLlspss.phlsLssshcNpsslVFVSATS-DHhs.uhpShpSlR+aYPppKaILYuLsLocs.IppLscp.pNlEhRtFNTotYPcYVsNWhcY+FKPLllA ..........................................................Y...ps.t.hGtp..FsC.hlthh-pht..............lh...........t..t....h.ph....pp..p.spppls.hVSssSssHhtthhp.hpslppahPsp+hllYuLslspt.lpp..lt..p................p.sshchRpFshotYPpaVp...shhpYpaKslllA........................................... 1 34 41 88 +7633 PF07802 GCK GCK domain Fenech M anon Pfam-B_8992 (release 14.0) Domain This domain is found in proteins carrying other domains known to be involved in intracellular signalling pathways (such as Pfam:PF00071) indicating that it might also be involved in these pathways. It has 4 highly conserved cysteine residues, suggesting that it can bind zinc ions. Moreover, it is found repeated in some members of this family (such as Swiss:Q9LMF3); this may indicate that these domains are able to interact with one another, raising the possibility that this domain mediates heterodimerisation. 23.40 23.40 23.40 23.40 22.10 23.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.78 0.72 -3.85 8 81 2009-01-15 18:05:59 2004-08-18 16:43:26 6 6 28 0 64 77 3 69.50 36 36.18 CHANGED GECtFCpFMKGGuCKEuFlAW.EcCs.-tAccsccpDhVT+CtElpuphK+CMcsHuDYYpPlLAuEKsucs+hcKEL ................-ptashFMKuGuCK-sFhAW.-cCs.pts...pt.ptp..shhpcCt-shstLc+CMc.AHuDY..YpPlLsspcshtpph.tt........................ 0 16 26 34 +7634 PF07803 GSG-1 GSG1-like protein Fenech M anon Pfam-B_9727 (release 14.0) Family This family contains sequences bearing similarity to a region of GSG1 (Swiss:Q9Z1H7), a protein specifically expressed in testicular germ cells [1]. It is possible that overexpression of the human homolog may be involved in tumourigenesis of human testicular germ cell tumours [1]. The region in question has four highly-conserved cysteine residues. 19.50 19.50 19.90 19.90 19.40 19.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.67 0.71 -4.50 8 150 2012-10-03 00:20:40 2004-08-18 16:56:19 6 3 42 0 77 136 0 118.40 46 41.34 CHANGED +psR+sRuLLSlsLNhLALhFSsoAhlToYWCpGTQKVPKPhC.ot...s+ppNChshssssss...................sP...............ssVpYsWETGDDRFlFRpFHTGlWhSCEEslassuc+CR........SFI...cLuPsSp+G ..........ptpRshLolhLshLALshSsoAllooYWC.GTQKVPKPLC..up...sttspChchsss.su...t.ss................sp..........................psVpYsWETGDDRFhFRtFHoGhWhSCEEsl..c..p.sG..E+CR........SFl...-LsPstcp.u...................................... 0 3 12 41 +7635 PF07804 HipA_C HipA-like C-terminal domain Fenech M anon Pfam-B_8632 (release 14.0) Domain The members of this family are similar to a region close to the C-terminus of the HipA protein expressed by various bacterial species (for example Swiss:P23874). This protein is known to be involved in high-frequency persistence to the lethal effects of inhibition of either DNA or peptidoglycan synthesis [1]. When expressed alone, it is toxic to bacterial cells [1], but it is usually tightly associated with HipB [2], and the HipA-HipB complex may be involved in autoregulation of the hip operon. The hip proteins may be involved in cell division control and may interact with cell division genes or their products [2]. 24.00 24.00 24.00 24.10 23.80 23.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.01 0.72 -3.86 175 2174 2009-09-13 11:39:15 2004-08-18 17:00:02 7 16 1315 18 624 1916 261 82.00 26 20.24 CHANGED Rlthpshsslhshsstp......................suat.plhphl............pp..hst............ttshpchhcphlFNhlluNsDsHsKNauhlhs....ss.s...h.pLuPhYD ........................................................Rl..pshsph..hshsst......p..................suht.pl.hphl....................tt...hs.t.s..................htch.tp.h.h.c.t.h.lFshLluNsDsHsKNaShhhp.....ss.s..................a..cLuPhYD........... 0 159 359 505 +7636 PF07805 HipA_N HipA-like N-terminal domain Fenech M anon Pfam-B_8632 (release 14.0) Domain The members of this family are similar to a region close to the N-terminus of the HipA protein expressed by various bacterial species (for example Swiss:P23874). This protein is known to be involved in high-frequency persistence to the lethal effects of inhibition of either DNA or peptidoglycan synthesis [1]. When expressed alone, it is toxic to bacterial cells [1], but it is usually tightly associated with HipB [2], and the HipA-HipB complex may be involved in autoregulation of the hip operon. The hip proteins may be involved in cell division control and may interact with cell division genes or their products [2]. 21.00 21.00 21.30 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.68 0.72 -3.76 170 2004 2009-01-15 18:05:59 2004-08-18 17:04:41 7 15 1218 8 598 1762 241 83.10 28 20.04 CHANGED SluGspsKhslhh.............................ssssspaIlKhss.sp............h.shstsEhhshp.lAp.thGl.sss..pspl....hphss................ptshhlcRFDR ........................SluGsp.Khslhh.t...........................tth.h...s...s..s.s.sspaIlKhsh.sp.............s.htshspsEahshp.lAp...th..Gl...sss...pspl...hp.hss.................tpslhlcRFDR................... 1 152 346 484 +7637 PF07806 Nod_GRP Nodule-specific GRP repeat Fenech M anon Pfam-B_8942 (release 14.0) Repeat The region featured in this family is found repeated in a number of plant proteins, some of which are expressed specifically in nodules formed during symbiotic interactions with certain bacterial species [1]. Some of these proteins are also termed glycine-rich proteins (GRPs), due to the presence of a glycine-rich C-terminal region in their structures [1]. Bacterial infection is required for the induction of nodule-specific GRP genes, and it is thought that nodule-specific GRPs may play non-redundant roles required at specific stages of nodule development [1]. Members of this group of proteins may be cytosolic, whereas others are thought to be membrane-associated [2]. 25.00 25.00 49.20 35.60 18.50 18.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.09 0.72 -4.34 10 36 2009-01-15 18:05:59 2004-08-18 17:16:57 6 5 5 0 0 36 0 36.80 65 52.23 CHANGED Gss+ESKTKhGhDGWRDWGGSFWp.DscENNGGG-KEGG GVs+ESKTKlGhDGWRDWGGSFW-stcENNGGucKEGG. 0 0 0 0 +7638 PF07807 RED_C RED-like protein C-terminal region Fenech M anon Pfam-B_9789 (release 14.0) Family This family contains sequences that are similar to the C-terminal region of Red protein (Swiss:Q13123). This and related proteins are thought to be localised to the nucleus, and contain a RED repeat which consists of a number of RE and RD sequence elements [1]. The region in question has several conserved NLS sequences [1]. The function of Red protein is unknown, but efficient sequestration to nuclear bodies suggests that its expression may be tightly regulated or that the protein self-aggregates extremely efficiently [1]. 21.50 21.50 25.20 24.60 20.50 16.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.70 0.71 -3.91 7 152 2009-01-15 18:05:59 2004-08-18 17:18:46 6 11 110 0 95 143 0 113.50 59 20.48 CHANGED YuECYPGh.E..cthsDSD-EsDaSKMDhGsK.KGsltRWDF-TpEEYucYMpsKEALPKAAFQaGVKMp-.GRKTR+.pt.+s-KtcLDR-hp+IspIlp++K.htcDGu..........sssK+sKa .....................YAECYPuh...-.....D.hhsDSD-E...VDYS...KMD....G...N.......K..KGPLGRWDFDTpEEYS-YMssKEALPKAAFQ..................YGlKMu-.GR.K.T....R+....hp..E.......p.......N-KA.....ELDRpW+KIssIlpKRK..h-t.D..Gs..................p.K.......................... 0 30 44 71 +7639 PF07808 RED_N RED-like protein N-terminal region Fenech M anon Pfam-B_9780 (release 14.0) Family This family contains sequences that are similar to the N-terminal region of Red protein (Swiss:Q13123). This and related proteins contain a RED repeat which consists of a number of RE and RD sequence elements [1]. The region in question has several conserved NLS sequences and a putative trimeric coiled-coil region [1], suggesting that these proteins are expressed in the nucleus [1]. The function of Red protein is unknown, but efficient sequestration to nuclear bodies suggests that its expression may be tightly regulated of that the protein self-aggregates extremely efficiently [1]. 20.40 20.40 20.40 21.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.67 0.70 -5.15 8 299 2009-09-10 21:25:46 2004-08-18 17:21:04 8 16 226 0 212 283 4 188.50 30 37.54 CHANGED KKK+h.stLh+p-csc.cclspKYRDRA+ERRcGtNcDhcsssl.......ssa+AVssshtss.puu-pc+psIpESKFLGGDhEHTHLVKGLDauLLpKVRuElhsKpspE-E.......tsclststpt.h.tttp..ttcpt........p.pschpFpsplu+sI.............F+hh..hcp..............ppl.p.NEhFts.....GRMsYlh-L-sEh.-sDIPTTllRSKsDlPstctth.TlssNshlls+LuplhS.....ahRtuspspcsKKKcK ....................................................t..c.p..chsp..pYRDRA..+ERRcs.s....pDhp.p.sch.................tsh+Alt...s..h...p...h...s.....p.....s.t-.pc+.p.hIpc.SK.a..LGGDh-HTHL..VKGLDauLL..pKVR.......s....-........l.t....s.cppcc-c........................................................p.t..........................................p.pp..thtt...tp.l.............ht.....................................t.................h................................................................................................................................................................................................................................................................................................................. 0 74 115 170 +7640 PF07809 RTP801_C RTP801 C-terminal region Fenech M anon Pfam-B_5179 (release 14.0) Family The members of this family are sequences similar to the C-terminal region of RTP801, the protein product of a hypoxia-inducible factor 1 (HIF-1)- responsive gene [1]. Two members of this family expressed by Drosophila melanogaster, Scylla (Swiss:Q9NHN4) and Charybde (Swiss:Q9NHN5), are designated by the GenBank as Hox targets [1]. RTP801 is thought to be involved in various cellular processes [1]. Its overexpression caused the apoptosis- resistant phenotype in cycling cells, and apoptosis sensitivity in growth arrested cells [1]. Moreover, the protein product of the mouse homolog of RTP801 (dig2 (Swiss:Q9D3F7)) is thought to be induced by diverse apoptotic signals, and also by dexamethasone treatment [2]. 20.50 20.50 23.70 26.70 18.90 18.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.35 0.71 -4.43 9 137 2009-01-15 18:05:59 2004-08-19 09:24:11 6 1 74 2 82 122 0 115.20 45 52.81 CHANGED LppAKpppLtCoclLlPscLhsplupEll+lStpEPCGlRGshl.lphEs-..pss+plAplpsDPssVsTFELhLsL+.Dpc.sWsplhsh...........Fhs.....s.uhspolpl...SssF+llKpKLYS ..............LppAK.ps..pLtCoclLlPpcLspclAp-lLRLupsEPCGLRGsllclslEpp......p.sC+cl..up.......lss.....D...P.............slVPTF..ELoLVL+.Dsp.sWsplpsh...........Fhs.......s.uhppolhL...SsuFRllKKKLYS................ 0 14 19 48 +7641 PF07810 TMC TMC domain Fenech M anon Pfam-B_5063 (release 14.0) Domain These sequences are similar to a region conserved amongst various protein products of the transmembrane channel-like (TMC) gene family, such as Transmembrane channel-like protein 3 (Swiss:Q7TN63) and EVIN2 (Swiss:Q8IU68) - this region is termed the TMC domain [1]. Mutations in these genes are implicated in a number of human conditions, such as deafness and epidermodysplasia verruciformis [1]. TMC proteins are thought to have important cellular roles, and may be modifiers of ion channels or transporters [2]. 25.00 25.00 25.40 28.80 20.80 23.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.30 0.72 -3.64 26 598 2012-10-02 00:51:22 2004-08-19 09:39:48 8 5 96 0 316 501 1 108.80 41 14.30 CHANGED CWEThVGQEhY+LllhDFlhollsslhs-F.R+lhschh........hhhphhuhtEFsIspNVLcLlYuQTlsWhGsaFuPLLPslsslKLhllFYlKKhoLhtsspPsp+saRAS .......................................................CWEThVGQ.Ehh+LhlhDhlhol.hs.hLls-FhR.tlhl.chh.........sh.hph...uhsEFcIucNVLpLl....YsQshhWhGsFFuPhLPs...lssl+Lhlhh.Yl+ph..ulhpsshP..tp+sFRAS.............. 0 69 95 195 +7642 PF07811 TadE TadE-like protein Fenech M anon Pfam-B_9054 (release 14.0) Family The members of this family are similar to a region of the protein product of the bacterial tadE locus (Swiss:Q9S4A6). In various bacterial species, the tad locus is closely linked to flp-like genes, which encode proteins required for the production of pili involved in adherence to surfaces [1]. It is thought that the tad loci encode proteins that act to assemble or export an Flp pilus in various bacteria [1]. All tad loci but TadA have putative transmembrane regions [1], and in fact the region in question is this family has a high proportion of hydrophobic amino acid residues. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.76 0.72 -4.07 180 2004 2012-10-01 21:13:59 2004-08-19 09:47:00 7 12 920 0 798 1915 145 42.30 26 24.50 CHANGED GssslEaul.lhPlhl.hllh...uhlchuhhhhspptlppAsppu...AR .......GssslEFul.lhPlll.hllh....ullphuhhhhs....ppslspAuppuAR................... 0 265 502 654 +7643 PF07812 TfuA TfuA-like protein Fenech M anon Pfam-B_9826 (release 14.0) Family This family consists of a group of sequences that are similar to a region of TfuA protein (Swiss:Q52872). This protein is involved in the production of trifolitoxin (TFX), an gene-encoded, post-translationally modified peptide antibiotic [1]. The role of TfuA in TFX synthesis is unknown, and it may be involved in other cellular processes [1]. 25.00 25.00 25.30 38.70 21.70 19.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.56 0.71 -4.66 32 184 2009-01-15 18:05:59 2004-08-19 09:53:57 7 2 149 0 90 162 10 118.40 46 33.95 CHANGED DGlFtpssuVtH+EILtAlppGltVhGuuSMGALRAAELssaGMhGlGpIachY+cGtl......tsDDEVAlhasssph..tsLo.PLVNlRtTLptAhpssllsppttppLlssAcslaascRT ......DGhFhppsuVhHKElLt.AlspGVpVhGuuSMGALRAAELcsFGMhGlGtlFctYRcGhl.............psDDEVAVsHuss-.G..a.slopsLVNlRtTLptAhtsGllsspttcpllpsA+uhaaspRT.............................. 0 24 50 66 +7644 PF07813 LTXXQ LTXXQ motif family protein Bateman A, Fenech M anon Pfam-B_6101 (release 14.0) Family This protein family includes two copies of a five residue motif is found in a number of bacterial proteins bearing similarity to the protein CpxP (Swiss:P32158). This is a periplasmic protein that aids in combating extracytoplasmic protein-mediated toxicity, and may also be involved in the response to alkaline pH [1]. Another member of this family, Spy (Swiss:P77754) is also a periplasmic protein that may be involved in the response to stress [2]. The homology between CpxP and Spy may indicate that these two proteins are functionally related [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.52 0.72 -3.55 46 1977 2012-10-02 12:34:46 2004-08-19 10:47:52 7 6 1152 10 392 1261 72 99.30 31 59.45 CHANGED thttthstlhppLpLT-pQcsphpsltpshcsptpsh..........ppphtshtt................stssspclt..pphhstphct....hpthtpshpphhshLosEQ+pphcplt ...................th....tpshhacsL..sL..T-pQ+....pQh+-lh....pp....tR....cphpts...............shp-.h.c.sh+chh.....................sucsFDc..sts....cs..p..h.......p....c.....ht.ppptsp.....tl.t.hhcspsphYplLTPEQ+pphstp.h................... 0 95 201 302 +7645 PF07814 WAPL Wings apart-like protein regulation of heterochromatin Fenech M anon Pfam-B_9039 (release 14.0) Family This family contains sequences expressed in eukaryotic organisms bearing high similarity to the WAPL conserved region of D. melanogaster wings apart-like protein. This protein is involved in the regulation of heterochromatin structure [1]. hWAPL (Swiss:Q7Z5K2), the human homologue, is found to play a role in the development of cervical carcinogenesis, and is thought to have similar functions to Drosophila wapl protein [2]. Malfunction of the hWAPL pathway is thought to activate an apoptotic pathway that consequently leads to cell death [2]. 23.90 23.90 24.00 24.00 23.60 23.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.97 0.70 -5.73 13 255 2009-09-14 15:20:58 2004-08-19 10:52:24 8 8 180 0 167 258 0 313.30 27 34.01 CHANGED spsVKshpElpEhGEppEacD-lEYlLsuLc.sssshusRCLShlsLsoKChssuFRhplRA+Gh.sp+lh+sltDspcc...........................shuLssuslhalLopDthshch..DppslclhlpLLc.............................tsspcps...t.spptls+spptlhchscphc..s.....sp+hclcslosutLAhEoh.........hShosp+sGt.FK-cLR.LGuL-+llchlt-sht.........spst..ctpss.hpplhtlp+CL+lLEssolhsspNQsallpappuhhspptsphhpphptphhph...............................hhshl+lllNLTpsssp...chuosthspp.shlsssh.phhphsshlspcssa-hplLuLulLINLsEpSppsR .....................................................................................................................l+php-lhchGEppcapD-l-alls..sl...p..s..s....p....sh.sh.RphS.hlpLssKhh..spF+hph.......Rup.Gh.s....tplhcsltstt.pc...........................shulss.u.sl....hhlLspst....shcl........cps..s...l...clhlpLLc......................................pt.....spstc............pppphsK...h.p.pt...l.hphs.cpl.p......................sp.......ph.c.l.p.shohuhLshEsl.................hsho.ppsst.h.+pplh.hGhL-pl....lchlhp..t...................p.s....tt.ptthhtshh.hhtp.sL.pl...LE..s.....sT..h...hs.....p...NQtal...ls..h..p.p...s...hh.....t.s..h.hhpt..stt...h.ph..............................................................................................................................................................................................................................hhhhltlllNlTpss.....p.hu.sp.t.h........spt...t.hl..s.sh....hhh..p..s..t.h..s..p..p...ph......h-hhlLhLshhlNhs..s..................................................................................................................................................................................................................................................................................................................................... 0 48 80 126 +7646 PF07815 Abi_HHR Abl-interactor HHR Fenech M anon Pfam-B_9732 (release 14.0) Family The region featured in this family is found towards the N-terminus of a number of adaptor proteins that interact with Abl-family tyrosine kinases [1]. More specifically, it is termed the homeo-domain homologous region (HHR), as it is similar to the DNA-binding region of homeo-domain proteins [2]. Other homeo-domain proteins have been implicated in specifying positional information during embryonic development, and in the regulation of the expression of cell-type specific genes [2]. The Abl-interactor proteins are thought to coordinate the cytoplasmic and nuclear functions of the Abl-family kinases, and seem to be involved in cytoskeletal reorganisation, but their precise role remains unclear [1]. 19.60 19.60 20.00 19.70 18.40 18.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.80 0.72 -3.87 11 332 2009-01-15 18:05:59 2004-08-20 13:21:50 9 4 91 1 135 371 0 76.80 62 17.42 CHANGED IuQsV-lHKEKVARREIGsLTssKpssRspKIluPus.....E.hh+YpRpPIsaosLDslGHGl+.st.....sspttp.pGol ...................ISQTVDIHKEKVARREIGILTTNKN.TSRTH.KIIAPAN....hERPVRYIRKPIDYTlLDDlGH..GVKh.h..p.tt................h................... 2 27 40 81 +7647 PF07816 DUF1645 Protein of unknown function (DUF1645) Fenech M anon Pfam-B_8798 (release 14.0) Family These sequences are derived from a number of hypothetical plant proteins. The region in question is approximately 270 amino acids long. Some members of this family are annotated as yeast pheromone receptor proteins AR781 but no literature was found to support this. 21.60 21.60 21.60 21.60 21.00 21.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.90 0.71 -3.84 19 218 2009-01-15 18:05:59 2004-08-20 13:23:27 6 2 22 0 123 199 0 182.70 20 62.30 CHANGED ADELFpsGpIRPlp............Plhsh...............................................................Rut.hphtspss.t.s......hRcshRsLSP.cst.................s.stpssppsp.pussPsshs.................sosssuss...pSsSsuuS++WR.L+DLl..LhRSpS-G+cs..........u+cshhphs..st.....................p.hh.su.cspts..................tsossts+pcspt.soAH-hhYssp.R.A....tuE-h+RR....TaLP .....................................................................................................ADElFtsGpI+Phh.......................Phht......................................................................ptt.hp.tsts.....................h+t.h.+ph....ctt........................................................tttpssttsp.psssst..s.hs............................ssssspsp.....pS.sSstu..s...++W+..lpDhh...lh.RS.pS-G+pp..................s.p.....t.....................................................t..t.t.t.......................t..tttp....huscc..h..Y..t.t..t......ttt.t++....ohhP........................................................................................................................................... 0 10 65 99 +7648 PF07817 GLE1 GLE1-like protein Fenech M, Wood V, Finn RD anon Pfam-B_9182 (release 14.0) Family The members of this family are sequences that are similar to the human protein GLE1 (Swiss:O75458). This protein is localised at the nuclear pore complexes and functions in poly(A)+ RNA export to the cytoplasm [1,2]. 20.30 20.30 20.40 21.20 20.20 20.00 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.57 0.70 -5.45 22 279 2009-01-15 18:05:59 2004-08-20 13:27:05 8 5 230 4 208 288 4 240.30 25 41.92 CHANGED htspIpph+pslht.lp.pcsplKchhspt+RpINsphGQlosop.ppltclhpclhph..........lstsps.s.L....uhpallshlAKtlVpQAEoE.lts+s..puAhPlAtlsh.hlhppaP-ht-hLhA+hhKKCPall....uaspuh..ssE-h+pphGa+cs.sss.hEcpssY.cRhuGhhpLaAAlsphph.tsphs.............................saulppu..W+aLARhlNh.st..........sssphsllusah-sAutphhptYupQhhKlLplltpc ................................................................................................thh.th...hp..ps.phKp.hhhphp+ths......h.l..uQl.....o....s.....s....pplpchhpc.ltph............................................t....hp...shs....pst.....................shta...hh.hhA...cthlpQs-sE..lssp......puAhPluh.lss...t.....lhp......t......h......P......c....h..t.......-llhA..+hh.+pCPahl.....shpp..s........spE..ch...........pc.hlG..........a........p....hp......s..........s...........t..hEpppsah...cRMsGhhplYAAlh.phphshsptp..............................pPaslspu..WpaLApllNh.Ph...........ssspsslLhshLcssutthhptY..tt.QhhKhl.hl...t......................................................... 0 69 110 165 +7649 PF07818 HCNGP HCNGP-like protein Fenech M anon Pfam-B_9462 (release 14.0) Family This family comprises sequences bearing significant similarity to the mouse transcriptional regulator protein HCNGP (Swiss:Q02614). This protein is localised to the nucleus and is thought to be involved in the regulation of beta-2-microglobulin genes. 21.30 21.30 21.40 21.50 20.50 19.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.08 0.72 -3.95 22 256 2009-01-15 18:05:59 2004-08-20 13:28:23 8 6 213 0 187 254 3 94.20 34 31.46 CHANGED IPPpPs.ucssstLppKlp+hhclKc..puhchNppltssppa+NPulhcKlhcahsID-...huTsaPt-lasPps.asp.sYh...-tLscsQ+ch..tchpp..K ...............................................lPPpPs.Gp.ss......s......p...LppKlp+.hhphKp.......pG.hchNpplppp+paRNPulhcKLlpassI.DE..............hGTsaPt-.laDPp.......s...asppuYh...-tLtcsQ+tt.hc+h-pt.p............................ 0 64 102 147 +7650 PF07819 PGAP1 PGAP1-like protein Fenech M anon Pfam-B_9244 (release 14.0) Family The sequences found in this family are similar to PGAP1 (Swiss:Q765A7). This is an endoplasmic reticulum membrane protein with a catalytic serine containing motif that is conserved in a number of lipases. PGAP1 functions as a GPI inositol-deacylase; this deacylation is important for the efficient transport of GPI-anchored proteins from the endoplasmic reticulum to the Golgi body [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.42 0.70 -4.76 20 1253 2012-10-03 11:45:05 2004-08-20 13:29:28 8 39 770 0 638 5035 1134 153.30 17 24.56 CHANGED clsGlPVLFIPG...NAGSa+..........QlRSlAuss..............pcs..ptsshp..............................................................hDaFolDFsE-hoAhaGpolh.-Qs-YlscAI+hILshY............tss.tsPpSVlllGHSMGGlVARshlshssahs....sslsoIlTLu.oPHstsPlshDsslhchYpplsphWpp.................tht.t.t...........LpslsllS.....lsGGhpD....hhlsu-aoslcs....hls.osuhpshoouIspVW.......hshDHhAllWCpQLhhtlu+sLhph ..................................................................................................................................................................t......................................................................................................................................................................................................................................................................................................................................................................................................h...t.......h......h.........p...t.....l...h.p.hh............................................................ts.pp....l..h...lluHSM.G.G..l..l.............u...+....t.h....l......h........h.......s.......t.t.......................p.....l.....p.....t....l...l...s.......lu..........o..P.....H.t...u....s......s......h..............................................................................................................................................................................................................................................................................................................................h................................................................................................................................................ 0 197 363 534 +7651 PF07820 TraC TraC-like protein Fenech M anon Pfam-B_9690 (release 14.0) Family The members of this family are sequences that are similar to TraC (Swiss:Q84HT8). The gene encoding this protein is one of a group of genes found on plasmid p42a of Rhizobium etli CFN42 that are thought to be involved in the process of plasmid self-transmission. Mobilisation of plasmid p42a is of importance as it is required for transfer of plasmid p42a, which is also known as plasmid pSym as it carries most of the genes required for nodulation and nitrogen fixation by the symbiotic bacterium. The predicted protein products of p42a are similar to known transfer proteins of Agrobacterium tumefaciens plasmid pTiC58 [1]. 20.80 20.80 20.90 23.30 20.40 19.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.90 0.72 -3.79 15 82 2009-01-15 18:05:59 2004-08-20 13:31:27 7 1 62 0 22 74 1 82.60 40 94.92 CHANGED KKPouKIR-EIAKLQEQLKpAETREAERIGRlALKAGLGEIEI-EuELQuAFEElApRFRuGctsssG.........uuusuopssussuoGAuAGusuEA ...............Kps.tclcsEIt+Lp-pL+ph-s+pAERIGRlAlKuGLu-lEIs-sclpttFE-lAtRFRpGtttt.t.................................t............................... 0 1 8 14 +7652 PF07821 Alpha-amyl_C2 Alpha-amylase C-terminal beta-sheet domain Fenech M anon Pfam-B_1278 (release 14.0) Domain This domain is organised as a five-stranded anti-parallel beta-sheet [1,2]. It is the probable result of a decay of the common-fold. 20.10 20.10 20.40 22.40 19.60 17.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.81 0.72 -4.10 33 290 2009-01-15 18:05:59 2004-08-23 10:31:22 7 8 64 15 116 300 10 61.20 48 13.67 CHANGED pRsuIpsp.SslcIltA-uDLYsAhIDs..................KlhhKIGsc.....shhP....s...saplsssGp-YAVWEK ........pRsGIpup.Ssl+IL..tA-...uDhYlApI.Ds..................KVlsKIGs+hD.hssllP....s.......sa.p.hus.pGpDYAVWEK............. 0 30 72 97 +7653 PF07822 Toxin_13 Neurotoxin B-IV-like protein Fenech M anon Pfam-B_66513 (release 14.0) Domain The members of this family resemble neurotoxin B-IV (Swiss:P01525), which is a crustacean-selective neurotoxin produced by the marine worm Cerebratulus lacteus. This highly cationic peptide is approximately 55 residues and is arranged to form two antiparallel helices connected by a well-defined loop in a hairpin structure. The branches of the hairpin are linked by four disulphide bonds. Three residues identified as being important for activity, namely Arg-17, -25 and -34, are found on the same face of the molecule, while another residue important for activity, Trp30, is on the opposite side. The protein's mode of action is not entirely understood, but it may act on voltage-gated sodium channels, possibly by binding to an as yet uncharacterised site on these proteins. Its site of interaction may also be less specific, for example it may interact with negatively charged membrane lipids [1]. 20.90 20.90 26.70 129.30 18.10 16.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.17 0.72 -4.05 2 2 2009-01-15 18:05:59 2004-08-23 10:33:18 6 1 1 1 0 4 0 55.00 75 100.00 CHANGED ASuTWGuuYPACENNCRKpYD.CI+CQGKWAGKRGKCAAHChlQpssCpsKCKKc ASuTWGuuYPACENNCRKpYD.CI+CQGKWAGKRGKCAAHChlQpssCpsKCKKc 0 0 0 0 +7654 PF07823 CPDase Cyclic phosphodiesterase-like protein Fenech M anon Pfam-B_73368 (release 14.0) Domain Cyclic phosphodiesterase (CPDase, Swiss:O04147) is involved in the tRNA splicing pathway. This protein exhibits a bilobal arrangement of two alpha-beta modules. Two antiparallel helices are found on the outer side of each lobe and frame an antiparallel beta-sheet that is wrapped around an accessible cleft. Moreover, the beta-strands of each lobe interact with the other lobe. The central water-filled cavity houses the enzyme's active site [1]. 20.40 20.40 21.00 20.40 20.10 20.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.50 0.71 -4.82 15 138 2012-10-03 21:31:48 2004-08-23 10:36:00 6 4 118 6 92 200 152 191.60 23 91.61 CHANGED M.......................hu..........lWhhPst..s.h.phppLl.puLpslFs......sp..........PsFEPHlTlsuslslc......sps-lpclLpu.ussulcul..t..................hlplssVssGcpYFc+lalplptsstLhulAplh+phFs...st.t................................................t.spaspptatPHlSLlYuDlp.h-p..sphptltpclccsh...............................suhuWs...hspltLVpC-Gs...Vc-WpllushsL ......................................................................hulWhhPst..shh.....pplptlh..tslpshas.....s.................PhFpPHlTlsu..s.lphp.......pts..ss..p..ph..Lpu..sssu.hpsh............................hlphsslss...u.c.p.a..a..pplalt.lp.s..t.lh...shsphh+phh................................................................tthspptahPHlSLl.Yu..Dl...cp....t.htth..tp.p..lpst......................................t.hsap....sphtlh...s...ps......lppWphltth.h............................................................................... 0 35 63 83 +7655 PF07824 Chaperone_III Type III secretion chaperone domain Fenech M anon Pfam-B_32938 (release 14.0) Domain Type III secretion chaperones are involved in delivering virulence effector proteins from bacterial pathogens directly into eukaryotic cells. The chaperones may prevent aggregation and degradation of their substrates, may target the effector to the secretion apparatus, and may ensure a secretion-component unfolded confirmation of their specific substrate. One member of this family, SigE (Swiss:O30917) forms homodimers in crystal. The monomers have a novel fold with an alpha-beta(3)-alpha-beta(2)-alpha topology [1]. 25.00 25.00 25.40 38.40 20.40 20.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.45 0.71 -4.16 4 132 2012-10-01 22:01:34 2004-08-23 10:39:55 7 2 125 2 3 37 0 106.40 88 98.71 CHANGED csll.pLYcALGL-.hshD-PAllIDDDlpIYFsEut-uLEMsCPhhsLP-slppLQphLpLNYASsVsLAsDA-sosLlALhRLPtpSstEEhhsGhphaIopV+pL+pchA .......ESLLNRLYDALGLD.APEDEPLLIIDDGIQVYFNESDHTLEMCCPFMPLPDDILTLQHFLRLNYTSAVTI....GA...DADNTALVALYRLPQTSTEEEALTGFELFISNVKQLKEHYA.................. 0 0 1 2 +7656 PF07825 Exc Excisionase-like protein Fenech M anon Pfam-B_46296 (release 14.0) Domain The phage-encoded excisionase protein (Xis, Swiss:P03699) is involved in excisive recombination by regulating the assembly of the excisive intasome and by inhibiting viral integration. It adopts an unusual 'winged'-helix structure in which two alpha helices are packed against two extended strands. Also present in the structure is a two-stranded anti-parallel beta-sheet, whose strands are connected by a four-residue 'wing'. During interaction with DNA, helix alpha2 is thought to insert into the major groove, while the wing contacts the adjacent minor groove or phosphodiester backbone. The C-terminal region of Xis is involved in interaction with phage-encoded integrase (Int), and a putative C-terminal alpha helix may fold upon interaction with Int and/or DNA [1]. 21.40 21.40 21.40 21.60 21.30 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.62 0.72 -4.21 2 289 2012-10-04 14:01:12 2004-08-23 10:57:07 6 2 219 9 20 95 3 71.50 39 84.80 CHANGED hhlTLpEWstcp.RpP.S.pTlRRWsREshIhPsPVKcGRpYhhctsAshh-.p.....PVsusLlpRItsu+hutp .......hl..lTLpEW.ss..ccF.u.t.P..o.sTLpKYu+tGhIhP.P.KlGRcWhlDcpAhFV.G...s.s.......................................................pssh......................... 0 2 10 13 +7657 PF07826 IMP_cyclohyd IMP cyclohydrolase-like protein Fenech M anon Pfam-B_50235 (release 14.0) Domain This enzyme (Swiss:O27099) is may catalyse the cyclization of 5-formylamidoimidazole-4-carboxamide ribonucleotide to inosine monophosphate (IMP), a reaction which is important in de novo purine biosynthesis in archaeal species. This single domain protein is arranged to form an overall fold that consists of a four-layered alpha-beta-beta-alpha core structure. The two antiparallel beta-sheets pack against each other and are covered by alpha-helices on one face of the molecule. The protein is structurally similar to members of the N-terminal nucleophile (NTN) hydrolase superfamily. A deep pocket was in fact found on the surface of IMP cyclohydrolase in a position equivalent to that of active sites of NTN-hydrolases, but an N-terminal nucleophile could not be found. Therefore, it is thought that this enzyme is structurally but not functionally similar to members of the NTN-hydrolase family [1]. 25.00 25.00 27.50 37.40 18.10 18.00 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.33 0.71 -4.97 6 187 2009-01-15 18:05:59 2004-08-23 11:01:02 6 2 187 13 63 183 27 212.30 40 92.54 CHANGED MYlGRFLllG+sppG.shssYRVSSRSFPNRpshphs-ssssllPcDspE..hhcNPYIoYNClRlVs-ssVVoNGSHTDsIA-KlchGhs.RDALs.uLhsMDYEKD-YN.TPRIAullst-c.ualGhVss-cl..........hh+hsElcsGcuahLosYpt...ststhhshcucos.....E-sschsh...phtsFEHsVsuAsshhcs.........-GaclA.spsh ......................YPGRGIllG+o.t-G.pslsAYaI.......MGRS....sSRN.R.l..h.l..c-.......s..p...s.h.s..p..sh..Dsuc..lpDss.LIIYsPVRll.......G....s..psIVTNGDQTDTIhE....GhstptoFppuLpoR-aEPDuPNaTPRISGll...........c....h-s......s.h...hSIl.Ks.ssss.t..............s.RhtatapsshsGEGaaIpTYpp......Dus.PL....PSF-GEPt.h.......s-hsphs.....ps..Ls.-N+VSLh.s+hIDh.........tth.......thst...................................... 0 22 44 57 +7658 PF07827 KNTase_C KNTase C-terminal domain Fenech M anon Pfam-B_29524 (release 14.0) Domain Kanamycin nucleotidyltransferase (KNTase) is involved in conferring resistance to aminoglycoside antibiotics and catalyses the transfer of a nucleoside monophosphate group from a nucleotide to kanamycin. This enzyme is dimeric with each subunit being composed of two domains. The C-terminal domain contains five alpha helices, four of which are organised into an up-and-down alpha helical bundle. Residues found in this domain may contribute to this enzyme's active site [1]. 26.40 26.40 26.50 27.00 22.90 26.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.80 0.71 -4.44 2 78 2012-10-01 22:14:54 2004-08-23 11:06:00 6 2 70 2 9 39 0 137.40 69 57.06 CHANGED DstGahp+lh.sAcSsptpsF+pAIptllVtEhaEYsGKhRNlphpGPoTaLPSLslphAhhGAMLIGLHpphhaoTuA.VLsEAlK.schPpGaDHlsphsMSGpLupstKllpuhEsFWpGl.tWstcHsYllc.SKRIPF ................DSGGYLEKVYQTAKSVEAQTFHDAICALIVEELFEYAGKWRNIRVQGPTTFLP.SLTVQVAMAGAMLIGLHHRICYTTSASVLTEAVKQSDLPSGYD.HLCQFVMSGQLSDSEKLLESLENFWNGIQEWTERHGYIVDVSKRIPF.......... 0 3 7 8 +7659 PF07828 PA-IL PA-IL-like protein Fenech M anon Pfam-B_99281 (release 14.0) Family The members of this family are similar to the galactophilic lectin-1 expressed by P. aeruginosa ((PA-IL, Swiss:Q05097). Lectins recognising specific carbohydrates found on the surface of host cells are known to be involved in the initiation of infections by this organism. The protein is thought to be organised into an extensive network of beta-sheets, as is the case with many other lectins [1]. 20.80 20.80 21.60 46.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.63 0.71 -4.32 2 27 2012-10-03 19:46:52 2004-08-23 11:09:01 7 2 21 15 9 19 1 113.30 56 91.42 CHANGED sWpGpV.ANsEsGpsTulIhp.GDsIolVAtGWspYG.sph.hstsDt.hPsp...spsu.husLVhKIuNpthh.sNssLa+hVs...VpGtlhLlaNDVPGTaGsNSGpFpVplhh-pp .sWpGpV.ANsEsGQsTulIhpsGDVIoIVAuGWspYG..sppahstsptchP.....cphhhspsshssuLlhKIGNpuhh.sNsGLa+hVs...VpGtloLlaND..VPGoYGNNSGuFSVNlth-pp.... 0 1 2 7 +7660 PF07829 Toxin_14 Alpha-A conotoxin PIVA-like protein Fenech M anon Pfam-B_46690 (release 14.0) Domain Alpha-A conotoxin PIVA (Swiss:P55963) is the major paralytic toxin found in the venom produced by the piscivorous snail Conus purpurascens. This peptide acts by blocking the acetylcholine binding site of the nicotinic acetylcholine receptor at the neuromuscular junction [1]. The overall shape of the peptide is described as an "iron" with a highly charged hydrophilic loop of 15S-19R forming the "handle" domain that is exposed to the exterior of the protein. The stability of the conotoxin is primarily governed by three disulphide bonds. A triangular structural motif formed by residues 19R, 12H and 6Y is thought to constitute a "binding core" that is important in binding to the acetylcholine receptor [2]. 25.00 25.00 30.70 57.60 22.10 21.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.29 0.72 -4.21 2 4 2009-01-15 18:05:59 2004-08-23 11:14:11 6 1 2 2 0 6 0 25.50 81 66.67 CHANGED GCCGpYPNAACHPCuCp.sRPsYCsp GCCGsYPNAACHPCuCK.sRPsYCsp 0 0 0 0 +7661 PF07830 PP2C_C Protein serine/threonine phosphatase 2C, C-terminal domain Fenech M anon Pfam-B_5253 (release 14.0) Domain Protein phosphatase 2C (PP2C) is involved in regulating cellular responses to stress in various eukaryotes. It consists of two domains: an N-terminal catalytic domain and a C-terminal domain characteristic of mammalian PP2Cs. This domain consists of three antiparallel alpha helices, one of which packs against two corresponding alpha-helices of the N-terminal domain. The C-terminal domain does not seem to play a role in catalysis, but it may provide protein substrate specificity due to the cleft that is created between it and the catalytic domain [1]. 22.70 22.70 22.80 23.90 20.80 22.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.72 0.72 -3.87 15 272 2009-01-15 18:05:59 2004-08-23 11:23:01 8 3 85 8 121 231 0 75.90 49 20.16 CHANGED SllLlCFPGAP+VSEEAl++EtcL-chLEs+VEEllcc.sucpphPDLhpVh+sLuuEs.IPsLPPGGGLsSK+slIpsVYp+ .....................SllLlCFPsAPKVStEAV++EsELDKhLEsRVc.El....hpc...tttcshP...DL..spVh+h.Lu..uEs.....IPsLPPGGGLsu.....K..+.slIEtsYpc............ 0 26 37 69 +7662 PF07831 PYNP_C Pyrimidine nucleoside phosphorylase C-terminal domain Fenech M anon Pfam-B_1661 (release 14.0) Domain This domain is found at the C-terminal end of the large alpha/beta domain making up various pyrimidine nucleoside phosphorylases [1,2]. It has slightly different conformations in different members of this family. For example, in pyrimidine nucleoside phosphorylase (PYNP, Swiss:P77826) there is an added three-stranded anti-parallel beta sheet as compared to other members of the family, such as E. coli thymidine phosphorylase (TP, Swiss:P07650) [1]. The domain contains an alpha/ beta hammerhead fold and residues in this domain seem to be important in formation of the homodimer [1]. 20.50 20.50 20.50 21.40 20.40 20.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.21 0.72 -4.52 54 2815 2012-10-02 20:27:15 2004-08-23 11:28:27 8 10 2677 20 573 1892 417 73.80 36 16.87 CHANGED alsplsspplGhsuhtLGAGRtpppD.IDhusGlhlp+KlG-pVcpG-slhslauscct.hcpshstlppshtIu .......................................hloplsupslGhAuhtLGAGRtptpD.sIDhuVGlhhpt+lGDpVc..pG.c.sLssl.ausccs....hp.pshptlppulpl................... 0 197 372 477 +7663 PF07832 Bse634I Cfr10I/Bse634I restriction endonuclease Fenech M anon Pfam-B_46671 (release 14.0) Domain Cfr10I (Swiss:P56200) and Bse634I (Swiss:Q8RT53) are two Type II restriction endonucleases. They exhibit a conserved tetrameric architecture that is of functional importance, wherein two dimers are arranged 'back-to-back' with their putative DNA-binding clefts facing opposite directions. These clefts are formed between two monomers that interact, mainly via hydrophobic interactions supported by a few hydrogen bonds, to form a U-shaped dimer. Each monomer is folded to form a compact alpha-beta structure, whose core is made up of a five-stranded mixed beta-sheet.The monomer may be split into separate N-terminal and C-terminal subdomains at a hinge located in helix alpha3 [1]. 20.70 20.70 22.80 21.60 20.40 20.20 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.54 0.70 -5.28 4 13 2012-10-11 20:44:44 2004-08-23 11:38:48 6 2 11 37 2 16 0 267.20 23 86.22 CHANGED hssIpthsEs..GKhplN.+..assluphl-Ntlsps.phpc.LDthRs.ssctActtG...ppsusuuhspssGsW.ElMlu.......ph.chhLp.s.pp.l...VlpMPNsp..........SFDahslacsEhpEhI.phcupL..pKssltL.TSsPDlulI......E-.Ks..c-hhpp.Ist.T+ss.sh..sLYpphps+sphcclpuululKTShRPDRRhQ.laEuslhKuL.salphphW........KYahssop.lusADssuhpT...sAsHulsps+Shsp+AVD-lahhsohtDlsphlsphlpc ............................................th..hp.t....sh.p.h...hs.t.ph.c....hpshhtt.scttt...th.ssuAhsNssGsW.Ehhhu.........ltshphhhp..ssp..p...l...llphPN.p..........oaDhhpLacsphpptlpsL....cppL.......ptsplpLhoSsPDllIl........pthps..pshhh....psIsphocs.shsh.....slYpphps+sch.cslhuhlulKTShRPDRRhQ.laEusllKuL..salppptW.................hKYYutuop.lusADspuhpT...sAsHolspspshPp+AVDclaphsohtDlsphlpphl.............. 1 1 1 1 +7664 PF07833 Cu_amine_oxidN1 Copper amine oxidase N-terminal domain Fenech M anon Pfam-B_46519 (release 14.0) Domain Copper amine oxidases catalyse the oxidative deamination of primary amines to the corresponding aldehydes, while reducing molecular oxygen to hydrogen peroxide. These enzymes are dimers of identical subunits, each comprising four domains. The N-terminal domain, which is absent in some amine oxidases, consists of a five-stranded antiparallel beta sheet twisted around an alpha helix. The D1 domains from the two subunits comprise the 'stalk' of the mushroom-shaped dimer, and interact with each other but do not pack tightly against each other [1,2]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.42 0.72 -3.65 134 2772 2009-09-11 19:31:59 2004-08-23 13:46:59 6 269 442 32 1082 2618 19 86.20 21 23.12 CHANGED tssRshVPlRhlu..-uL..G..s...pVpW.sspspslsl..pp......sspp.....lplp.l................G.....sptshl........N.......G.......pphthc..sss.h....lt...t....uR..ThVPlRaluEshG.hp...........VpW.-sp.....spslhl ...........................................................................................hhlPhc.h.........p.t.h.....s....h.......pl.ta....ptt...t...p.t....hhh...p.............................sst.t..........lp..hp.h.....................................................................s.......s.p.phh.l............................N..............G............p..p..h..phs......ssshl..........hs......uc..shVPl+.hl.u-.shG...hp...........lpa.stt........................................................................................................................... 0 649 941 984 +7665 PF07834 RanGAP1_C RanGAP1 C-terminal domain Fenech M anon Pfam-B_23411 (release 14.0) Domain Ran-GTPase activating protein 1 (RanGAP1, Swiss:P46061) is a GTPase activator for the nuclear Ras-related regulatory protein Ran, converting it to the putatively inactive GDP-bound state. Its C-terminal domain is required for RanGAP1 localisation at the vertebrate nuclear pore complex, and is sumoylated by the small ubiquitin-related modifier protein (SUMO-1, Swiss:Q93068). This domain is composed almost entirely of helical substructures that are organised into an alpha-alpha superhelix fold, with the exception of the peptide containing the lysine residue required for SUMO-1 conjugation [1]. 25.00 25.00 27.10 26.40 24.20 24.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.89 0.71 -4.85 3 102 2009-01-15 18:05:59 2004-08-25 09:31:02 6 11 54 14 43 98 0 173.50 54 32.44 CHANGED GsDuEssu+stpcPsluEsAPs.sPPhPuDlSTFLuFPSPEKLlRLGPKRSsLIAQQVDVoDsEKVVpAFLKVSSVYKDEuEVKtAVpETlDALM+KAFsNSuF.QSNoFITSLLVpMGLLKSED.KVKsIusLsGPLLTLNHMVQQ-YFPKuLAulLLAFVSKPNuVLESCuSARHoLLpTLaK ...........................tcstssppch.csss.uEss.Ps....hss...ss.PsDlSTFLuFPSPEKLLRL.GPK.sSh.L......I......sQQT..D.T.....SD..s.EK.......V....Vs........AFL........KVS...SVa..+.............D.-..u..s..V....+..oAVt-ulDALMKKAFSouoF..NSssFl..T.pLLlHMGLLKSED...Kl.K.A.I.s.sLaGPLM.sLNHh..VQQDYFPKuLAPlLlAFlTKPN....tALEoCSh.ARHsLLQTLap................... 0 10 13 26 +7666 PF07835 COX4_pro_2 Bacterial aa3 type cytochrome c oxidase subunit IV Fenech M anon Pfam-B_86185 (release 14.0) Domain Bacterial cytochrome c oxidase is found bound to the to the cell membrane, where it is involved in the generation of the transmembrane proton electrochemical gradient. It is composed of four subunits. Subunit IV consists of one transmembrane helix that does not interact directly with the other subunits, but maintains its position by indirect contacts via phospholipid molecules found in the structure. The function of subunit IV is as yet unknown [1]. 20.30 20.30 20.40 20.60 20.10 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -7.95 0.72 -3.98 36 225 2009-01-15 18:05:59 2004-08-25 15:22:17 7 1 213 5 76 177 38 44.90 37 68.34 CHANGED Hcc........utMDhstpE+TacuFl+hsphuslsllsl...LlhhAlhhs ..................Hpp......huusMDYspHE+TYsGFlthsKauolsllsl...llhMAhhhh......... 1 16 40 56 +7667 PF07836 DmpG_comm DmpG-like communication domain Fenech M anon Pfam-B_1675 (release 14.0) Domain This domain is found towards the C-terminal region of various aldolase enzymes. It consists of five alpha-helices, four of which form an antiparallel helical bundle that plugs the C-terminus of the N-terminal TIM barrel domain [1]. The communication domain is thought to play an important role in the heterodimerisation of the enzyme [1]. 21.00 21.00 21.10 24.10 19.50 20.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.97 0.72 -4.57 37 789 2009-01-15 18:05:59 2004-08-25 15:24:35 6 8 628 4 196 566 103 65.60 60 19.32 CHANGED pssplDRpoLslGYAGVYSSFLhHAcRAAp+aGVDsR-ILlELGRR+hVGGQEDhIlDlAh-Lupp ............PlRVDR-oLsLGYAGVYSSFL+HsEpAAt+Y.GlsAhDILVELG+R+hVGGQEDMIlDlAL-Ltp.t.................. 0 45 122 167 +7668 PF07837 FTCD_N Formiminotransferase domain, N-terminal subdomain Fenech M anon Pfam-B_4434 (release 14.0) Domain The formiminotransferase (FT) domain of formiminotransferase- cyclodeaminase (FTCD) forms a homodimer, and each protomer comprises two subdomains. The N-terminal subdomain is made up of a six-stranded mixed beta-pleated sheet and five alpha helices, which are arranged on the external surface of the beta sheet. This, in turn, faces the beta-sheet of the C-terminal subdomain to form a double beta-sheet layer. The two subdomains are separated by a short linker sequence, which is not thought to be any more flexible than the remainder of the molecule. The substrate is predicted to form a number of contacts with residues found in both the N-terminal and C-terminal subdomains [1]. 19.20 19.20 19.80 23.20 18.10 16.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.86 0.71 -4.76 37 493 2009-01-15 18:05:59 2004-08-25 15:26:05 7 7 380 6 191 419 213 172.50 42 49.65 CHANGED KlVECVPNFSEGRcp-hl-tIhsshp.shpGVpLLDhssDssHNRoVlThVG-P...........-slhcAshpus+hAs-LIDhppHcGp.HPRhGAsDVlPFlPlpssTMEEClplA+plGccluccLtlPVYLYpcuAspPcRcsLssIR+G..paEuht-Kl.KpscW...........cPDaGPsphp...PouGsTslGAR. .......................pllEClPNFSEGR..spphI-...tIsps.....h......+......s.....h.....s.........G....V..pLL.DhssDtsaNRoVhT.l.V.Gc.P.......................cs.l..t-AshphschAschI....Dhsp.HpG.p........HPRhGAsDVlPFlPl.p..s.s..T..........h-EClplA+.p.....l.ucclu.c.c...l.s.lPVaLYEpuA..s.p.PcRcsLsslR+G..paEuh.tE..K..l....p.ps...c.W...................tP.Da..Gs.t...php...PosGsTslGAR......................................... 0 81 121 155 +7670 PF07839 CaM_binding Plant calmodulin-binding domain Fenech M anon Pfam-B_9279 (release 14.0) Domain The sequences featured in this family are found repeated in a number of plant calmodulin-binding proteins (such as Swiss:Q8W235, Swiss:Q84ZT8 and Swiss:Q8H6X1), and are thought to constitute the calmodulin-binding domains [1,2]. Binding of the proteins to calmodulin depends on the presence of calcium ions [1,2]. These proteins are thought to be involved in various processes, such as plant defence responses [1] and stolonisation or tuberization [2]. 21.00 21.00 22.40 21.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.55 0.71 -3.87 19 140 2009-01-15 18:05:59 2004-08-25 15:30:43 6 6 22 0 90 126 0 105.70 30 21.15 CHANGED pppptp.ppsphhcspspppps+phpph+csll.hp+hs....pshccsh+hsshtsphls.ps-.s-sE+V.sLRH.Qcsp-..+Kcu-chMlDaAlccslSKLsssRKpKVchLVpAFETVls ....................................................................................h................t.tt.p...t....pp..hp...hcthh....tp......pt.cphtphp....t.....ph..hs.psp.....csEcV..LRH.Qpsp-..+Kpuc.hhhspslccssoKLs..sRK.pKVchLVtAFETVls... 0 12 52 72 +7671 PF07840 FadR_C FadR C-terminal domain Fenech M anon Pfam-B_11411 (release 14.0) Family This family contains sequences that are similar to the fatty acid metabolism regulator protein (FadR, Swiss:P09371). This functions as a dimer, with each monomer being composed of an N-terminal DNA-binding domain and a regulatory C-terminal domain. A linker comprising two short alpha helices joins the two domains. In the C-terminal domain, an antiparallel array of six alpha helices forms a barrel-like structure, while a seventh alpha helix forms a 'lid' at the end closest to the N-terminal domain. This structure was found to be similar to that of the C-terminal domain of the Tet repressor. Long-chain acyl-CoA thioesters interact directly and reversibly with the C-terminal domain, and this interaction affects the structure and therefore the DNA binding properties of the N-terminal domain [1]. 23.00 23.00 23.20 23.30 22.90 22.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.88 0.71 -4.62 29 828 2012-10-01 22:11:53 2004-08-25 15:31:18 7 3 818 8 104 339 14 167.80 62 68.98 CHANGED NNFWETSGLNILETLs+L.DtsthPpLl-sLLSARTNlSsIalRtAl+tNP-cs..............................hElLsphcpl--s..............AcAaspaDYpLa+pLAFsSGNPlYsLILNGhKGLYoRVGpaYFusscuRpLAhsFYcpLhplscpppa-plhthlRpYGhpSGtIWpph+ssl..Ppshs ...................................................NNFWETSGLNILETLARL.D.HESVPQLIDNLLSVRTNISo....I.FI.R.TA.hRpHP...-K.A...................................................................pEVLAsA..pE....V....sD+........................................A-AFA-LDYsIFRGLAFASGN.PIYGLILNGh..K.G.LYT.RIG.RaYFuN.PEARpLALsF....Y++LuuLCppG.s+DQVhEsVR+YG+-SG-IWH+MQcsLPuDL.A.......................................................... 0 15 38 73 +7672 PF07841 DM4_12 DM4/DM12 family Fenech M anon Pfam-B_5243 (release 14.0) Family This family contains sequences derived from hypothetical proteins expressed by two insect species, D. melanogaster and A. gambiae. The region in question is approximately 115 amino acid residues long and contains four highly- conserved cysteine residues. 21.70 21.70 21.80 21.90 21.60 21.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -10.08 0.72 -3.65 45 504 2009-01-15 18:05:59 2004-08-25 15:54:11 8 5 28 0 367 546 0 82.60 26 33.78 CHANGED RpplYchlEphl.sphGh...sG..+sClLRsICEsuph.h...ctsGllu-ll+llFoPspscst.........pYhpActhGptt...s-Cppha..t.pC ..........................Rh.lYphlEphh.p...p.h..Gh.......sG+sClLRsICEs.u.phsh..........pp........pu.l.....l.u-.......ll..+llhosspsp.t................................tpYhpApphGptt..............tsCtphat.pC...................................................... 0 91 117 287 +7673 PF07842 GCFC GC-rich sequence DNA-binding factor-like protein Fenech M, Mistry J, Wood V anon Pfam-B_9357 (release 14.0) & Pfam-B_9894 (release 19.0) Family Sequences found in this family are similar to a region of a human GC-rich sequence DNA-binding factor homolog (Swiss:Q9Y5B6). This is thought to be a protein involved in transcriptional regulation due to partial homologies to a transcription repressor and histone-interacting protein [1]. This family also contains tuftelin interacting protein 11 which has been identified as both a nuclear and cytoplasmic protein, and has been implicated in the secretory pathway. Sip1, a septin interacting protein [2] is also a member of this family. 20.20 20.20 20.60 20.60 20.00 19.90 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.90 0.70 -5.04 36 631 2009-01-15 18:05:59 2004-08-25 15:56:45 7 15 282 0 392 539 6 222.60 22 31.13 CHANGED LppltptFcphppca.pcYc...phtLuplAsuhltPLl+pthhsWsPLcsss...hlt.htph+.lL................................t..ssYcshlWpshlsthp.sshspWpst.sssshlsllptW.slLP.hhhpsll-plllP+Lp..ptVpp.WcPhocsh.....sh.p....sWlaPWLPhLs.p+hcs....................lhssI+pKlppsLpsWp.p.tush.hLp.W.pclass...........tpasshlhppllP+LtttLp...ph.lsP.pp..Qs..lchhptlhpWpsllssphhspL..l.ppFFsc...WhpsL .................................................................................................................................................................pthhphFpphpppa.ppYp...ph.lu.hhst.hltPll+.plh....s.Ws.P...........L...............ppss.....................................................................psa.c.ph..hW..hhh.hh...t.t.....t.php.c.....ps..............ss..h...plls....................................s.ll-.p........lllP+Lp..thlcp..Wc............Pho.ss...............sh.p...........hlhshh..sh.h..t.....t.p.hcs....................lhpslht+h.ppsl........p.p.......................................slahs............................a.t....h..h.p.phhsthth.hhp...........tp.....ps.......h.chht.lhtW...p..shls...ph.............hhtl..h.t.phhs+...a...................................................................... 2 116 174 305 +7674 PF07843 DUF1634 Protein of unknown function (DUF1634) Fenech M anon Pfam-B_9594 (release 14.0) Family This family contains many hypothetical bacterial and archaeal proteins. A few members of this family are annotated as being putative transmembrane proteins, and the region in question in fact contains many hydrophobic residues. 21.00 21.00 21.10 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.20 0.72 -4.40 45 351 2009-01-15 18:05:59 2004-08-25 16:01:49 6 4 335 0 99 238 4 98.50 34 79.49 CHANGED IutlLRhGVlluusllhlGhllhhlpssu...............h...ts..hs.stlhpuhhthcuhslI..hhGLhlLIhTPlhRVllulhsFhpE+DhlYssIoh.....lVLhhLhhul...hl ..............................................................IuplL+hGVhluuhlIhhGllhhhlpsts...................t...ht..tlh.puhht......hpshsll..hhGlhlLIlTPVLRVslulhsF.hpE+DhhYVh.ITslVLhIlhhuhl........... 0 39 67 87 +7676 PF07845 DUF1636 Protein of unknown function (DUF1636) Fenech M anon Pfam-B_9608 (release 14.0) Family The sequences featured in this family are derived from a number of hypothetical prokaryotic proteins. The region in question is approximately 130 amino acids long. 28.10 28.10 28.10 28.50 28.00 27.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.56 0.71 -3.85 52 189 2009-01-15 18:05:59 2004-08-25 16:05:15 6 1 150 0 66 191 92 110.70 33 83.77 CHANGED lhVCsoC+tt.......tss.psGttLhstLpst..t.s..tt...lplpsVpCLsuCs+..uCsVAlp.uss+hoYlaGcl....sst...sssssllshAthYtsoscGhlPa+cRPptl+cphlARIPP ........................lhVCsoC+ts..........tss.ps...GttLhstLpst..........s......ts...lplcsVpCLuuCs+..sColAlp.usu+hoYlaGcl....ss......ssupsllshAphYtsos-GhlPapcRPpsl+cthlARlPP............... 0 12 42 48 +7677 PF07846 Metallothio_Cad Metallothio_7; Metallothionein family Fenech M anon Pfam-B_9622 (release 14.0) Family The sequences making up Metallothio_Cad are found repeated in metallothionein proteins expressed by several different Tetrahymena species. Metallothioneins are low molecular mass, cysteine-rich metal-binding proteins that are thought to be involved in the regulation of levels of trace metals, and detoxification of these metals when present in excess [1]. Some of the metallothioneins found in this family (for example, Swiss:Q8T6B3) are known to be induced by cadmium and are thought to be involved in the cellular sequestration of toxic metal ions. The high proportion of cysteine residues allows the metal ions to be bound by the formation of clusters of metal-thiolate complexes [1]. Tetrahymena spp. metallothioneins differ from other eukaryotic metallothioneins mainly in the length of their sequences and in the cysteine-containing motifs they exhibit. 19.50 19.50 24.70 23.70 16.80 15.80 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.37 0.72 -4.02 4 42 2012-10-04 14:35:37 2004-08-25 16:06:25 6 4 9 0 0 48 0 20.80 83 35.79 CHANGED CCCGsNAKPCCTDPNSGCCCV CCCG-pAKsCCTDPNSGCCCs. 0 0 0 0 +7678 PF07847 DUF1637 Protein of unknown function (DUF1637) Fenech M anon Pfam-B_6051 (release 14.0) Family This family contains many eukaryotic hypothetical proteins. The region featured in this family is approximately 120 residues long. According to InterPro annotation, some members of this family may belong to the cupin superfamily. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.52 0.71 -4.95 32 328 2012-10-10 13:59:34 2004-08-25 16:08:44 7 5 139 0 188 457 26 187.40 32 75.01 CHANGED llcplpspDlslpsphth......................................stp...hssloYhcl.aEs-.sFShulFhLssuusIPLHDHPGMsVhpKlLaGpl+lcSaDhlc........s.ssstt.p............s+hAplh.sssp....hosssssslL...hPpp.........sNlHphsAls...PsAhLDlLuPPYs...sstG.RcCsYYcthshss...........t........................................hsaLpEh...s.Pcsah.hts..Y.GP.pl ...................................................................hsphpstDltlt.t....................................................t..sslsYhc..l..a..Es..c....sFS...hu.....lF.h.L.P.s..uu.sIPLHsHPuMsV......hoKlLa.Gsl+lcSYDhlc................s...s........tt......................................................h+hAplh.h-s..........housss.sslL...hPpp...............GNlHphsAls......ssAhLDlL.......uP..PYs...........t...tG....RcCsYYc.h..sp..........................................................t.shL.ch.....Ppshh.htt..Y.GP................................................................................................. 0 43 102 152 +7679 PF07848 PaaX PaaX-like protein Fenech M anon Pfam-B_9563 (release 14.0) Family This family contains proteins that are similar to the product of the paaX gene of Escherichia coli (Swiss:P76086). This protein is involved in the regulation of expression of a group of proteins known to participate in the metabolism of phenylacetic acid [1]. In fact, some members of this family are annotated by InterPro as containing a winged helix DNA-binding domain (Interpro:IPR009058). 22.70 22.70 22.70 22.80 22.60 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.04 0.72 -3.98 13 652 2012-10-04 14:01:12 2004-08-25 16:08:49 7 2 571 5 164 630 53 68.10 38 23.66 CHANGED pApSlIlTLaGDhlts+GGslhlusLIpLhtshGlsEpslRsAloRhsppGhLssp+sGp.puhYpLS-+u .................................upSLIholaGD..lt.t.+.G....u.p..lhluuLlt.Ll.p.s.h...G..hsEphVRoALhRLs+cGhL....ss.s...+hG..R.puhYpLo-p................ 0 45 102 139 +7680 PF07849 DUF1641 Protein of unknown function (DUF1641) Fenech M anon Pfam-B_9217 (release 14.0) Family Archaeal and bacterial hypothetical proteins are found in this family, with the region in question being approximately 40 residues long. 20.30 20.30 20.40 20.70 20.20 19.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.29 0.72 -7.74 0.72 -4.34 60 843 2009-01-15 18:05:59 2004-08-25 16:10:27 6 1 521 0 181 438 10 40.50 33 23.65 CHANGED shpshcssp......lulhsLl+tL+DPDlp+uLGhhlshLKslG+s ..................s..pphpsscc.....sulhuLl+.s.LKDPDhpRulsahlshLKuhup......... 0 52 107 147 +7681 PF07850 Renin_r Renin receptor-like protein Fenech M anon Pfam-B_9266 (release 14.0) Family The sequences featured in this family are similar to a region of the human renin receptor (Swiss:Q8NG15) that bears a putative transmembrane spanning segment [1]. The renin receptor is involved in intracellular signal transduction by the activation of the ERK1/ERK2 pathway, and it also serves to increase the efficiency of angiotensinogen cleavage by receptor-bound renin, therefore facilitating angiotensin II generation and action on a cell surface [1]. 21.30 21.30 21.70 21.60 21.20 19.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.25 0.72 -3.65 8 137 2009-01-15 18:05:59 2004-08-25 16:10:47 9 4 93 4 83 131 0 95.90 43 27.64 CHANGED shYGGsAVVEllT..scoh-ssLsRcsRsIlpocs..............hpssssPYNLAYpYsh-YuVIFNIlLWlhlsLuLAVIsISYslWsMDPGhDSIIYRMTsQ+I+hD ......................................................................t.hYsusslVphls....csh-ss.hhRpsRs..llp..scp...................................psss..sPYNLA.hp..Ysh-.Y...sVl..FNIlLWhMlsLuLullslsYslh.N.MDPGhDSIIYRMTsp+l+hD........ 0 27 34 59 +7682 PF07851 TMPIT TMPIT-like protein Fenech M anon Pfam-B_9674 (release 14.0) Family A number of members of this family are annotated as being transmembrane proteins induced by tumour necrosis factor alpha, but no literature was found to support this. 22.90 22.90 25.40 23.50 22.80 22.80 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.09 0.70 -5.36 14 274 2009-01-15 18:05:59 2004-08-25 16:11:51 8 8 130 0 167 257 5 265.40 39 89.51 CHANGED cshpEWp-Lpc-appLp-TH+hYppKLE-lspLQppCossIs+Q+++L+plppsL+phpts.......lss..E-hctlpclcppl+cRpstha-ME..uaLPcK.NGlYLsLlLG.sVNVoLLs+psKFtYKDEYEKFKLhlTlIhllhuhhCpalhsY.......RshDtlFsFLLVWYYCTLTIRESILhsNGSRIKGWWlhHHYlSThhuGVhLTW.....PpG.haQhFRsQFl.FshY.ShVQaLQahYQuGCLYRL+ALGERHpMDlTlE....GFpSWMWRGLoFLLPFLFhGahaQhYNuhTLFphuppspsp.EWQVhhhuhhFLlLFlGNhhTTLtVVhpKhppp...p ............................................................................tlpptt.p..ah.phpphtthpp.psttlpppph....p....h.pplt.slp..pht.................t.phhpplptp...hpctpshhh......-h-......shL.Pp+.sGh.aLplhLG..sVNVphhppps+htaK-................EYEpFKhhhsllhllhshhhhhlhph......................phhsthaphhLlaaYsTLslREsILhsNGS+I+uWWl.HHYlSshhusVhLTW.....P.s.s.ha.........QhFpppF...L......tauhhQ..uhVQhLQ.hYQpGpLYphhALGc.tpphDlo...p....G.pu.hhhtt.....Lh.hLhPh.LFhhph.aQhY....suht.Lh...........ph......u..............sp...pW.............Q.......V.h...huhhh...llhhhGNhhsTl.slhtKhp.p....t....................................................... 0 49 72 121 +7683 PF07852 DUF1642 Protein of unknown function (DUF1642) Fenech M anon Pfam-B_9838 (release 14.0) Family The sequences making up this family are derived from various hypothetical phage and prophage proteins. The region in question is approximately 140 amino acids long. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.16 0.71 -3.83 54 426 2009-01-15 18:05:59 2004-08-25 16:12:14 6 4 292 0 26 304 0 123.50 24 69.65 CHANGED pthshVP...phVA-aIEppK..tt.........hpshphh.h.tp.........................................chhpW...........ss.chhhpAals..G............YpVEK......EpLYhVcl.ps.....................h.hhppth..t...................hphTcpEI.cp.s..chtWtat......lcVc ...................................................................thshVP...phVA-Wl-ps+........pp.................phh.t.....h.pp.............sp.....................ch.pW.............h...hppshch.....hs.pAals..G...............YEVEK......Eph.YhVcl.t.t........................hh.h.tpph.......................hphTcpEl.cp.s..ph.W.at......h.l.................................................. 0 9 16 20 +7684 PF07853 DUF1648 Protein of unknown function (DUF1648) Fenech M anon Pfam-B_9801 (release 14.0) Family Members of this family are hypothetical proteins expressed by either bacterial or archaeal species. Some of these are annotated as being transmembrane proteins, and in fact many of these sequences contain a high proportion of hydrophobic residues. 28.70 28.70 28.70 28.70 28.40 28.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.32 0.72 -4.45 86 1279 2009-01-15 18:05:59 2004-08-25 16:13:32 6 12 929 0 232 913 44 52.80 25 26.36 CHANGED hllhlhshlhsh..hhaspLPcplssHashsGpsDsassKh.hshhhhPl.......lhlh ......................hlhlhhhhhsl....hhY...s....p.LPsp..lPhHash.sGp.sDsassKh..hslh.hhPh..hhh.h..................... 0 95 171 201 +7685 PF07854 DUF1646 Protein of unknown function (DUF1646) Fenech M anon Pfam-B_9337 (release 14.0) Family Some of the members of this family are hypothetical bacterial and archaeal proteins, but others are annotated as being cation transporters expressed by the archaebacterium Methanosarcina mazei (Swiss:Q8PXG5, Swiss:Q8PXG7 and Swiss:Q8PXG8). 20.30 20.30 20.40 20.30 20.20 19.70 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.00 0.70 -5.32 6 83 2012-10-02 15:12:49 2004-08-25 16:13:47 7 2 52 0 45 108 23 323.20 43 93.85 CHANGED lAuLlVILlLlLlLPFhl+hlEcNLEhFhhsM...GlAushlSGl.........aSh-LlhcAFcsPLtlhp..lh.IPlGIsQsVLlsGLlFaha+c+lhphltpsh-+lul+VFuFllIslLGLhSSlISAIlAulILsEllshhPLsRKsKlchsVIAsFuIGhGAALTPlGEPLSTIAlSKL.....NtcFhYLhclLGhYIIPulhshGlhusahlp+hsh+p.hlEh..V-YsEsl+-......VllRAhKVFVFIhALpLLGpGFKPlIhhYlsclsSclLYWlNhlSAllDNATLAAAEIuPphTpEQIRuhLMGLLISGGMLIPGNIPNIluAG+L+Is.pEWARlGlPLGlIhhllYFIllaVL ....................................................h..hLllIllllLlLPFhh+tlE+NLEhFLhlM...Glh...AshlSth.....................hshcLl.cshps.lhhh..........Is.sVLlu.GLlFhh.h+s+lpphl.pt.lhctl...s.lclhlFlllllLGLhSSlITAIlAullLVEl.l.phhPL.c.RpsKl.clsVluCFuIG.hGAALTPlGEPLSTIslSKL.....p.A...-......F....h.YL.hchlGhhI.ls.ullhhulluhhhl.t+..t....ph....ps........hpt..........hp.t.p......Esl+c.................VhlRAhKlalFlhALpLLGsGFKPlIctYllclsstlLYWlN.h.lSAlLDNATLA..AAEIoPt..Ms.tQl+AlLhGLLISGGM.LIPGNIPNIIuAuK.....Lp.IpS+EWA+lGlPlGllhhllYalllFh.......................................................... 0 13 31 32 +7686 PF07855 DUF1649 Protein of unknown function (DUF1649) Fenech M anon Pfam-B_9402 (release 14.0) Family This family is made up of sequences derived from hypothetical eukaryotic proteins of unknown function. 20.50 20.50 24.00 20.70 19.30 20.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.95 0.71 -4.57 11 256 2009-01-15 18:05:59 2004-08-25 16:18:47 7 7 211 0 187 241 2 158.50 30 73.51 CHANGED lElhs-sppV+DlVcuILHTIhFHR..............huoltPp..opDhlDhThstlsssEL-phl-p+lsshlcplcsspsp.................................spGQIulpFa-K++++.............sW.F..........hp.................................u-EpVsWE.WTlcVslspscoEt-+tpsRcuhpc..................pLpcslhcIlplsN+c.DaIPPIsTpsps .....................-l.h-.pplc-slt.u...........lLHTIhFHR................h.us.ltsp..sp-hl.-hTas......t.s.ss.s-L-phlcpclsphhctl.c...pss.ss.............................................hpGQl...slpFap....K....++p+.................W..F...............................................................s-Epl...sWE.Wslplplh....p..........p..s........c...p....-.........+ths+cthtp............................tLpctlhpIlphhN.cc.....-alP.hsopt......................................................................... 0 63 101 154 +7687 PF07856 Orai-1 DUF1650; Orai-1_Ce; Mediator of CRAC channel activity Fenech M, Pollington JE anon Pfam-B_9685 (release 14.0) Family ORAI-1 is a protein homologue of Drosophila Orai and human Orai1, Orai2 and Orai3. ORAI-1 GFP reporters are co- expressed with STIM-1 (ER CA(2+) sensors) in the gonad and intestine. The protein has four predicted transmembrane domains with a highly conserved region between TM2 ad TM3. This conserved domain is thought to function in channel regulation. ORAI1- related proteins are required for the production of the calcium channel, CRAC, along with STIM1-related proteins [1]. 25.00 25.00 33.60 28.70 21.60 20.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.29 0.71 -4.70 15 268 2009-10-24 14:23:42 2004-08-25 16:20:09 7 6 107 0 171 238 1 183.50 52 64.56 CHANGED p+aplpslppR+L-.uR..................tQLKAoSppSALLuGFAMVAhVElQhscspph....spuLLlAFussTollVuVcLhAlhloThlLssI..................-sss.htshps..sspoPHcphcha....hEssWtauhph...GlhLFLlplullsWlKFass................................................................hsAAhssTuIhlsssllalhaohphatshlt++tscps ............................s..as.psLSWR+L.LSR................................................................AKLKASS+TSALLSGFAMV....AMVEVQL-sspph.....P.sLLlAFosC.TTlL.VAVHLFALMlSTCILPsI.............................................EAVSNlHslsu.....lp-SPHcRhHha................IELAWuFSTsl.........GhhLFLsElsLlsWVKFhsl...................................................................u.tAAhsoTsIhlPssllFl.sFslHFYRSLVsHKp-p.t.................................... 0 54 76 121 +7688 PF07857 DUF1632 CEO family (DUF1632) Fenech M, Bateman A anon Pfam-B_9654 (release 14.0) Family These sequences are found in hypothetical eukaryotic proteins of unknown function. The region concerned is approximately 280 residues long. This family has been termed the CEO family for C. elegans ORF [1]. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.76 0.70 -5.13 6 180 2012-10-02 19:55:49 2004-08-26 09:34:15 7 7 72 0 127 161 9 207.00 32 68.02 CHANGED GllAChlSslhFGShFVPlK+acouDGhFlQWlhShulhLVGllVauspGFPtFaPlAMLGGhhWusGNuhuVPIhssIGLulGhLlWsTssClsGWAsuRFGLFG.lssphPpsshLNYlGllllVVGGslFh.IKsp...spscscsoshphE..sshppc..sspcsSsh...c.....h.p..+p......RllshlhAlluGhhYG.hhsPlhYIpspsp....lYPsusppslsYlFSaahGIFlTSTllFlsYsIhp+NsPhlssp .....................................................GhhushluhlhFGS.aVPlK..+hs..su....D..............G.h..............ahQWhhshulhl...........suh....l........h.h.h.................h.............t........s.....t.....F.......hPh.A.MlGGhlWus.....GNhhs.VPllptlGLulGhLlWuohshlsG..WssuR..FGh..FG..hp...p...s.t.....p...s.hL.NhhGhslsllu...shha.hh.....l..+sp........t...........p......p...p............t...........sh........t...................p..tpp....................t.....s..........................................p...................................................+h.h..uh.hulhsGhhaG..hsPh.hh.pp............hts......slsal..auah.Glhhsuohh...ahhYshhh+...t...h...t.......................................... 0 58 75 113 +7689 PF07858 LEH Limonene-1,2-epoxide hydrolase catalytic domain Fenech M anon Pfam-B_15033 (release 14.0) Domain Epoxide hydrolases catalyse the hydrolysis of epoxides to corresponding diols, which is important in detoxification, synthesis of signal molecules, or metabolism. Limonene-1,2- epoxide hydrolase (LEH) differs from many other epoxide hydrolases in its structure and its novel one-step catalytic mechanism. Its main fold consists of a six-stranded mixed beta-sheet, with three N-terminal alpha helices packed to one side to create a pocket that extends into the protein core. A fourth helix lies in such a way that it acts as a rim to this pocket. Although mainly lined by hydrophobic residues, this pocket features a cluster of polar groups that lie at its deepest point and constitute the enzyme's active site [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.53 0.71 -4.13 7 167 2012-10-03 02:27:23 2004-08-26 09:46:28 7 2 124 7 43 472 122 120.10 39 83.09 CHANGED ssshcsVpsFhsAhpcsDhcssltchhsssphapN.GlsshpGhscshshlcth....shsuhEhcIh+IAADGutVLTERsDthhh..Gs.hhphhVhGlFEVpss+IshWRDYFDl.shhctssc .........................................................tshcsVpsFls.Ah..p..s..t...D..h.-...s...s.......s...s.lhs...-.................V....Y....p...N......V..u......h......s......s..l....+.........G.....t...c...t.s.t.p.h.lc.th.........pst..sG...F-.l.+.....I...H...+...I..u..A....D....G.s.s..........V.....L.......T......E........R.o............D...slh..h..........G.s.l.....+....lp..FWV..CG.V..F.E..V...c....D.G......+ITl.WRDYF.Dhhchh+u...ht................................... 1 8 30 36 +7690 PF07859 Abhydrolase_3 alpha/beta hydrolase fold Bateman A anon Pfam-B_100 (release 15.0) Domain This catalytic domain is found in a very wide range of enzymes. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.45 0.70 -4.76 130 13075 2012-10-03 11:45:05 2004-08-29 17:28:14 8 147 3378 94 4994 21100 3681 191.50 20 57.99 CHANGED llYhHGGGal...lGsh.sop..cshspplAptssshVlSV-YR...LA.PEa.aPAAh-DuhsuhpWlhpps.........................................t.s..hD...ss.+lsluGDSAGGNLAtslu..ltsccp........h.............lsutlLlaP...shs.............pt..tsh....t..tpt.h.l....s..tsthp.hhh.ch...Yl...s..sssts.cP.....hsuPl....hu....s...lss..LP.P.......shlhsuphDsLpD-uttYAc+Lp.t..sGVpVphh.phtGhhH.uFhh ..................................................................................llahH....G.G.......G...a.h.......h.u...s...h...pst................pth..h....p.....t......l............s..............t............t..............t.............s..............h......h......l........l....s............l...s......Y....+....................L........u...........P..............c.............t............................a...........P...........s.............s...........l........p.............D......s............h......s...........u.h.p....a.l...hpph....................................................................th.t....hc.......sp...p..l.s.l...s........Gc.S........AG..G..p..Lu.h.s.hs..........hhhp..pt......t.....t....................................................................t.s.h.l.l...h..h.P.....................hh.sh.....................t........p.h..........................t.p..............h.....h...........................s.......t......h.t..........h.....h....h....ph...............hh....s.........ttthp....p..................................h.h...s..P.h.......t.........s..................htt....h..P....s................................h.h..l..h...s.....u..t....h...D.......h...h.......s......p...s.......h...h...............h..p...t.....l.t..t...........ts.....h...............s..p.hh...h.........s...h...H.sh..t........................................................................................................................................................................................ 1 1372 2820 4031 +7691 PF07860 CCD WisP family C-Terminal Region Yeats C anon Yeats C Family This family is found at the C-terminus of the Tropheryma whipplei WisP family proteins ([1]). 25.00 25.00 110.20 108.90 18.80 18.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.09 0.71 -4.71 2 5 2009-01-15 18:05:59 2004-09-01 16:41:52 6 2 2 0 2 5 0 110.40 83 6.06 CHANGED hQpAK.........Pp.psh.E+...TPTESKGGGFWSKVGSGIAAPFKWIWHGITWPFRKLFGSRSEA.........................phP........phlssAVspF....................LpFL.. PS........HTQSAKPTEKPKEEK...TPTESKGGGFWSKVGSGIAAPFKWIWHGITWPFRKLFGSRSEAPSSTTNATGNTsGKTRVKRDT.ppPPEHPLKSVN-QIppVTsAVNNFQKSVLTSLKsFFTYLTDTA+LpFLp............ 0 2 2 2 +7692 PF07861 WND WisP family N-Terminal Region Yeats C anon Yeats C Family This family is found at the N-terminus of the Tropheryma whipplei WisP family proteins ([1]). 25.00 25.00 362.20 362.20 19.80 16.80 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.88 0.70 -5.36 2 12 2009-09-11 19:29:28 2004-09-02 08:46:53 6 7 2 0 4 12 0 250.50 77 31.64 CHANGED AhhL..SS.........psPPuLSLLSSVSpsSl.pSso+aopVS.sssp.sClosssssshhlDPlT..usotQT.oCssuhSs...pPtot....las.Yo-TsSYlYVPYIpss.l.LYY...KtsPSS.lshshoDhtTsa..........us-+VlShohososlhsLLTstNlaFa.pl.so.psplTVsl+hphcsshl.SshPuLRsS.aTaSLopPstslplDt.TGtlphS....olsspslTAhAlphsTuThlT..YhhDo.h. LT..LLLLSSLQYETAFARQTPPALSLLSSVSSTSV.SSNTKYTRVSNTNTQEVCVTTNTNVSLLIDPVT..SSTKQTLSCTPSLSP...QPQTH....IYVPYTDTSSYLYVPYITNTHISLYYTDKKADPSSFLTFPHTDIATPY..........GDEKVlSITKTTTNLIALLTTRNIFFF.DIHVTEKPKITVPIHKQIDNTYL.SDIPSLRNSRYTFSLTHPNKDITIDRYTGQIHLS....SLPTSPITAIAIN+DToTHIT..YAlDo...ss... 0 4 4 4 +7693 PF07862 Nif11 Nitrogen fixation protein of unknown function Yeats C anon Yeats C Family This domain is found in the Cyanobacteria, and may be involved in nitrogen fixation, but no role has been assigned ([1]). 22.90 22.90 23.10 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.17 0.72 -4.02 116 396 2009-09-14 15:19:10 2004-09-02 09:07:36 6 4 112 0 200 413 401 48.80 26 53.68 CHANGED MStcp....LcsFLpcl.p.........sDssLppplpu..st.st-plltlApptGaphotc-lt .............MS.cp....lctFlpcl.p..........sDssLpcplps......sp..ssccl....ls....l..A....pptGashotc-l.............. 0 34 83 179 +7694 PF07863 CtnDOT_TraJ Homologues of TraJ from Bacteroides conjugative transposon Yeats C anon Yeats C Family Members of this family have been implicated in as being involved in an unusual form of DNA transfer (conjugation) in Bacteroides ([1]). The family has been named CtnDOT_TraJ to avoid confusion with other conjugative transfer systems. 25.00 25.00 25.00 25.00 24.40 24.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.38 0.72 -3.51 15 330 2009-01-15 18:05:59 2004-09-02 09:22:06 6 2 117 0 39 273 4 62.30 53 19.65 CHANGED FMIIGIlGYFTlPTVuuWIIpAGGu.GuYu+sVNptut+uGssA.......uusuGAAhGssuG+lp..............u+ ....FhlIGIlGYFTlPTVAGWIIQAG.Gs.GuYuR..NVNpsAs+uushA........uusAGAssGNhuGRl...th....................... 0 20 36 39 +7695 PF07864 DUF1651 Protein of unknown function (DUF1651) Yeats C anon Yeats C Family This is a family containing bacterial proteins of unknown function. 20.40 20.40 20.80 20.70 19.70 20.10 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.40 0.72 -3.88 45 133 2009-01-15 18:05:59 2004-09-02 09:34:00 6 1 30 0 51 136 674 70.90 25 80.29 CHANGED pGWLlssppphlhpFc...-ppohpt.s.tlhlcphchhs...upPslhcs+c+l.phcpAhctWccLlpsGWpcs.ps.hs ................sWLlssp...pphlhpFp.....spp.s.p..s.hlhlcphphhs......spPslh+spcch.ph-pAhchWppLhppGWpps.p..h........ 0 2 20 42 +7696 PF07865 DUF1652 Protein of unknown function (DUF1652) Yeats C anon Yeats C Family This is a family containing hypothetical bacterial proteins. 21.30 21.30 21.30 25.20 21.20 19.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.04 0.72 -4.37 16 126 2009-01-15 18:05:59 2004-09-02 09:42:41 6 1 42 0 31 125 0 67.00 32 78.74 CHANGED hh-lRpllEpuFLPhpCpCohssstshTl+lh.-ssSGclpLhloGluhppLposRDlspLlspL+t-hs ........phppllEtuFhPhtCpsohssstoMsl+Lh.D.sSGcp.lslsGlshuplhoucDlscLltpl+p-h..... 0 0 0 20 +7697 PF07866 DUF1653 Protein of unknown function (DUF1653) Yeats C anon Yeats C Family This is a family of hypothetical bacterial proteins of unknown function. 21.80 21.80 22.10 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.26 0.72 -3.79 54 591 2009-01-15 18:05:59 2004-09-02 10:42:42 6 8 565 2 154 512 59 62.70 44 59.86 CHANGED sGhYRHYK.........G.s.YcVlulA+HSETcE..lVlYcsLYG-h..ulWVRPhsMFhEsVpl.-GpplP......RFphl ............................................shYRHa.K............G..shYpVlsl.Ac...HSETpE..pl..VlYpsLY.....u-h...ulWVRPhsMFhEp.V.h..Du..pphP......RFph................... 0 48 101 131 +7698 PF07867 DUF1654 Protein of unknown function (DUF1654) Yeats C anon Yeats C Family This family consists of proteins from the Pseudomonadaceae. 20.90 20.90 21.10 23.50 20.60 17.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.47 0.72 -4.61 12 129 2009-01-15 18:05:59 2004-09-02 10:55:33 6 1 57 0 18 103 2 71.10 37 83.43 CHANGED s.ouhE+LGhRlpchINuPsAQcpRhslIaRL-s-up--WEpllptlsEs-slsLsap.DDGuVpltWph.p-- ..s..puY-tLuhRlQ+.lIsuPsAQpp+tsllhRhssEspccWsplL-pIuEs-slplshp.-DGo.VpltWph......... 0 1 1 12 +7699 PF07868 DUF1655 Protein of unknown function (DUF1655) Yeats C anon Yeats C Family This protein is found in some prophages found in Lactobacillales lactis ([1]). 19.20 19.20 19.90 110.70 18.20 17.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.73 0.72 -3.98 2 18 2009-01-15 18:05:59 2004-09-02 11:17:53 6 1 12 0 3 13 0 54.70 80 86.09 CHANGED aI..Dphth.hh-hL.A+ARHaIcls-hs.+LFDGpSVsTFThhtNshQVEh.cu YILDDSIAFELMDLLKAKARHFIQLNEYVYRLFDGQSVVTFTTLENDIQVEMVKG. 1 2 2 2 +7700 PF07869 DUF1656 Protein of unknown function (DUF1656) Yeats C anon Yeats C Family This family contains bacterial proteins, many of which are hypothetical. Some proteins in this family are putative membrane proteins. 21.30 21.30 21.30 21.50 21.20 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.83 0.72 -4.18 72 1584 2009-01-15 18:05:59 2004-09-02 11:23:22 7 4 929 0 230 607 18 58.50 37 80.14 CHANGED pElslhGlahPslllhhllAhslshllctlLschshaRhlWH.uLFcl...uLalhllus.l .........lslhGlhF.PPlFhthlluh.slahll+RlLtshthhch.lWHPuLFsh....uLashlhsL............. 0 34 89 154 +7701 PF07870 DUF1657 Protein of unknown function (DUF1657) Yeats C anon Yeats C Family This domain appears to be restricted to the Bacillales. 24.00 24.00 24.70 27.40 23.70 23.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.07 0.72 -4.28 27 523 2009-09-11 09:19:45 2004-09-02 11:26:35 6 3 182 0 90 296 1 49.90 36 36.63 CHANGED pl+psl.AuLKuspAsLcsauLpTcsppAKphappsupphcpllpslpsRl .........l+psL.AuLKusp...AsLEsFuLsTcsppAKphappsApphppIlspLpsRl..... 0 30 66 74 +7702 PF07871 DUF1658 Protein of unknown function (DUF1658) Yeats C anon Yeats C Family This family of small proteins seems to be found in several places in the Coxiella genome. 25.00 25.00 25.20 34.20 22.70 24.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.92 0.73 -7.26 0.73 -4.18 7 73 2009-01-15 18:05:59 2004-09-02 11:31:08 6 2 6 0 19 34 0 30.10 42 65.46 CHANGED sslKshcs.uhsp+PhtGssstahshhhh. sslKshcs.uhsp+PstGNsssahshhhh..... 0 19 19 19 +7703 PF07872 DUF1659 Protein of unknown function (DUF1659) Yeats C anon Yeats C Family This family consists of hypothetical bacterial proteins of unknown function. 20.70 20.70 20.80 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -8.03 0.72 -4.33 20 335 2009-01-15 18:05:59 2004-09-02 11:35:22 6 1 251 0 81 255 0 46.30 31 62.86 CHANGED spshsosLhLchpsGls.ssGcPlh+s+oaspVKssAscpDlYsVApA .................s...hstsLtLhhpsG.lD.psGcslh+s+sappVKssAs.-plasVApA.............. 0 38 70 76 +7704 PF07873 YabP YabP family Yeats C anon Yeats C Family This family of proteins is involved in spore coat assembly during the process of sporulation [1]. 21.40 21.40 21.40 23.10 21.20 20.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.79 0.72 -4.41 73 814 2009-01-15 18:05:59 2004-09-02 11:39:05 6 1 421 6 177 539 9 66.30 30 70.00 CHANGED hsplolpu+pclhlpsh+slhsFsspclhlpTphGhLhI+GcsLplpplss-p..ltlpGpIpslpY. ...............clslhG+ppl.Ipsh+slhsFsscclhLp.TphGhLtI+GpsLplcplssEc..ltlcG.pIpsltY...... 0 93 147 157 +7705 PF07874 DUF1660 Prophage protein (DUF1660) Yeats C anon Yeats C Family This protein is found in Lactobacillae prophages. 21.40 21.40 21.60 21.60 20.90 20.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.75 0.72 -3.69 9 38 2009-01-15 18:05:59 2004-09-02 11:54:19 6 1 30 0 7 29 0 60.00 40 93.52 CHANGED MKLhCKLFGHKW-....hsPhsphcs.aCtRC.........................saNRSDLDESENVFP...EKWLDKHMD ..MKLhCKL.FGHKWp.........ssshpphhc.tCpRC............htp..pt.....h.hNRSDLDES-sha....c............................. 0 2 4 6 +7706 PF07875 Coat_F Coat F domain Yeats C anon Yeats C Domain The Coat F proteins, which contribute to the Bacillales spore coat. It occurs multiple times in the genomes it is found in. 23.80 23.80 23.90 23.80 23.60 23.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.58 0.72 -3.84 65 780 2012-10-01 21:25:29 2004-09-02 11:54:34 7 2 357 0 179 551 2 63.60 24 50.09 CHANGED sDcsl.sschLsstKsssssYusAhsEsssPpLRpsltptlspshphphplaphhhpKGaYps.p ......sDpsh.ss-hLsshKtsspsYupslsEssssplRpsL.pphhsps.phptplaphhhpKGaYts............... 0 81 147 156 +7707 PF07876 Dabb Stress responsive A/B Barrel Domain Yeats C anon Yeats C Domain The function of this family is unknown, but it is upregulated in response to salt stress in Populus balsamifera ([1]). It is also found at the C-terminus of an fructose 1,6-bisphosphate aldolase from Hydrogenophilus thermoluteolus (Swiss:Q9ZA13; [2]). Swiss:Q93NG5 is found in the pA01 plasmid, which encodes genes for molybdopterin uptake and degradation of plant alkaloid nicotine. The structure of one has been solved (Swiss:Q9LUV2) and the domain forms an a/b barrel dimer ([3]). Although there is a clear duplication within the domain it is not obviously detectable in the sequence. 21.20 21.20 21.40 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.10 0.72 -3.54 51 1481 2012-10-02 00:20:33 2004-09-02 12:33:13 7 15 921 48 686 1398 234 96.20 21 80.22 CHANGED lcHlVha+h+csssspph..tph....hpshpsL........splsslpphp..hGtshsspp.....ttu.ashshhspFcshpsh.csYts.cP..........tHhphs.phhpshhp....phhshDaps .........................lpHlVha+h+..psss....tpph......tph.......hpth..psL....................spl..s.t.l..t..p...hc....sG..hshs.sps...................pu.ashslhspFpstcsh.pt.Yts..HP..........t.Hht..ht..phh...ts...h.h..p.....phh.shDa..................... 0 173 439 571 +7708 PF07877 DUF1661 Protein of unknown function (DUF1661) Yeats C anon Yeats C Domain This is a family containing bacterial proteins of unknown function. Many of the proteins in this family are hypothetical. 25.00 25.00 25.60 25.60 23.10 22.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.87 0.72 -7.22 0.72 -4.45 9 52 2009-09-11 10:00:21 2004-09-02 13:59:39 6 2 3 0 9 48 0 29.80 45 32.42 CHANGED LsR-hhssRA+TKKFSR+hh...RphsspotpFR LsREhKpSRApTKKFS++hh...+php.p.t.hh............ 0 9 9 9 +7709 PF07878 DUF1662 Protein of unknown function (DUF1662) Yeats C anon Yeats C Domain This family contains bacterial proteins of unknown function. This domain belongs to the Ribbon-helix-helix superfamily suggesting these may be DNA-binding proteins. 22.20 22.20 22.30 22.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.52 0.72 -4.23 27 70 2012-10-02 18:44:02 2004-09-02 14:12:51 6 1 32 0 30 72 224 51.30 31 59.07 CHANGED usps+SsRlpVlLPppls-cLpthAcpEuRosSs......hAclLIpcGlpRhtst .....stscsshIpslLPcplsccLpthApp-uRosus......hAclLlpculpRtht..... 0 2 10 24 +7710 PF07879 PHB_acc_N PHB/PHA accumulation regulator DNA-binding domain Yeats C anon Yeats C Domain This domain is found at the N-terminus of the Polyhydroxyalkanoate (PHA) synthesis regulators. These regulators have been shown to directly bind DNA and PHA ([1]). The invariant nature of this domain compared to the C-terminal Pfam:PF05233 domain(s) suggests that it contains the DNA-binding function. 25.00 25.00 25.70 36.80 21.60 20.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.97 0.72 -4.24 47 465 2009-01-15 18:05:59 2004-09-03 09:55:21 6 3 457 0 198 441 191 63.80 56 33.74 CHANGED plIKKYsNRRLYDTpoSpYlTL-cltphVhpup-FpVlDAKTG-DlTRslLhQIIhEpEs..tGp.s ...lIKKYsNRRLYcTpTSoYlTL-DltphVhcuE-FpVhDAKoG-DLTRolLhQIIhEcEs.tG..s..... 0 60 127 160 +7711 PF07880 T4_gp9_10 Bacteriophage T4 gp9/10-like protein Fenech M anon Pfam-B_73396 (release 14.0) Family The members of this family are similar to gene products 9 (gp9) and 10 (gp10) of bacteriophage T4. Both proteins are components of the viral baseplate [1]. Gp9 (Swiss:P10927) connects the long tail fibres of the virus to the baseplate and triggers tail contraction after viral attachment to a host cell. The protein is active as a trimer, with each monomer being composed of three domains. The N-terminal domain consists of an extended polypeptide chain and two alpha helices. The alpha1 helix from each of the three monomers in the trimer interacts with its counterparts to form a coiled-coil structure. The middle domain is a seven-stranded beta-sandwich that is thought to be a novel protein fold. The C-terminal domain is thought to be essential for gp9 trimerisation and is organised into an eight- stranded antiparallel beta-barrel, which was found to resemble the 'jelly roll' fold found in many viral capsid proteins. The long flexible region between the N-terminal and middle domains may be required for the function of gp9 to transmit signals from the long tail fibres [2]. Together with gp11, gp10 (Swiss:P10928) initiates the assembly of wedges that then go on to associate with a hub to form the viral baseplate [1]. 25.00 25.00 25.30 25.20 24.60 24.50 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.74 0.70 -5.34 14 97 2009-01-15 18:05:59 2004-09-03 14:03:50 6 3 54 22 1 90 685 229.60 30 52.13 CHANGED KphIclGslsssuTGDhLhcGGsKlNsNhsplYsthGDtc.................l.HAsGhap+aups.............hshsl.hGstasIDTo...sGs.lsVsLP+.....G...chGcslchhsssGShu....sNPlslhstuGDoIcG.ssss..hplspsasclpLhhsusu.......pW-Y....pl-slhssshssVscphh.IsssspTD.lslFs.pspYNsspLhVhtcss......sshhhpssp.hh.ls.sps...scVhss-h.ssl+ht...tsssDplh.lsah..sG..lhshhSS.tspsl+l ....................KphIslGphss-uTGDhLhcGGpKINsNFsplYsthGDsc..................h.ausGhapphsts.............................hssthGp.aslsTo...sut.lsl..plsc.....G....chscslchhss.uohs....spsltl..tssDolt...G.ssss.....h.lspsa.clplhhhuss.......hWpY....plpphhssc.ssltpp...ltststss.lsl...ht.tspYshhpl.Vhtpss......s...hptsp.hh..s.t.s......splhsh-h.tsh+ht....p.tDp.l..hsa...ss..l.thhps..t.th................................................................................................... 0 0 0 1 +7712 PF07881 Fucose_iso_N1 L-fucose isomerase, first N-terminal domain Fenech M anon Pfam-B_11456 (release 14.0) Family The members of this family are similar to L-fucose isomerase expressed by E. coli (Swiss:P11552, EC:5.3.1.3). This enzyme corresponds to glucose-6-phosphate isomerase in glycolysis, and converts an aldo-hexose to a ketose to prepare it for aldol cleavage. The enzyme is a hexamer, with each subunit being wedge-shaped and composed of three domains. Both domains 1 and 2 contain central parallel beta-sheets with surrounding alpha helices. Domain 1 demonstrates the beta-alpha-beta-alpha- beta Rossman fold. The active centre is shared between pairs of subunits related along the molecular three-fold axis, with domains 2 and 3 from one subunit providing most of the substrate-contacting residues, and domain 1 from the adjacent subunit contributing some other residues [1]. 25.00 25.00 29.00 28.90 24.90 23.80 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.84 0.71 -4.73 17 960 2009-01-15 18:05:59 2004-09-03 16:01:50 7 3 940 15 59 415 11 163.90 70 29.68 CHANGED tshP+IGIRPsIDGRchG..VRESLEtpTMpMA+ulAcLlpssl+assGssVECVIADssIGusuEAAtsuc+FpppsVslTlTVTPCWCYGoEThDMsPhhPKAlWGFNGT..ERPGAVYLAAshAuHsQ+GlPAFuIYG+-VQDscDpsIPpDVpEKlLRFARAulAVupM+G .....s..hPKIGIRPsIDGRRhG..VRESLE-QTMNMAKusAsLloppL+assGssVECVIuDosIuGhAEAAACcEKFpppNVGlTITVTPCWCYGSET..lDMDPshPKAIWGFNGT..ERPG...AVYLAAALAAHuQKGIPAFuIYGHDVQDADD.T.oIPsD..VcEKLLRFARAuLAVAoM+G................ 0 22 41 49 +7713 PF07882 Fucose_iso_N2 L-fucose isomerase, second N-terminal domain Fenech M anon Pfam-B_11456 (release 14.0) Family The members of this family are similar to L-fucose isomerase expressed by E. coli (Swiss:P11552, EC:5.3.1.3). This enzyme corresponds to glucose-6-phosphate isomerase in glycolysis, and converts an aldo-hexose to a ketose to prepare it for aldol cleavage. The enzyme is a hexamer, with each subunit being wedge-shaped and composed of three domains. Both domains 1 and 2 contain central parallel beta- sheets with surrounding alpha helices. The active centre is shared between pairs of subunits related along the molecular three-fold axis, with domains 2 and 3 from one subunit providing most of the substrate-contacting residues [1]. 21.30 21.30 21.50 21.40 20.90 21.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.03 0.71 -4.63 21 1038 2009-09-11 15:21:47 2004-09-03 16:04:42 7 4 952 15 63 438 21 167.00 60 30.95 CHANGED +oYLshGuVSMGIuGSllss-FFpcYLGM+sEtVDhoEllRRh-ptIYDp-Ehc+AhtWlccps+..cGcDpN..Pcchpps.cpp....ct.ac.llKMhhIhRDlMtGN.+LtphGatEEuhG+NAlsuGFQGQRpWTDaaPNGDhsEAhLNosFDWNGhREPalhATENDoLNuluMLFs+LLTss ..........................KSYLShGuVSMGIAGSIVspsFFppaLGM+spuVDMTElpRRlDptIYDc.tE...hEhALuWsccsh+....G.DpN...............scphpcstE.pp....ct.hc.slhMsMhhRDhMpGNs+LA-hGhhEEulGaNAIAAGFQGQRpWTDpa.PNGDhhEAlLNoSFDWNGlRcPFVlATENDSLNGVuMLhG+.LTsT...................................................... 0 24 43 52 +7714 PF07883 Cupin_2 Cupin domain Bateman A anon Pfam-B_81 (release 15.0) Domain This family represents the conserved barrel domain of the 'cupin' superfamily [1] ('cupa' is the Latin term for a small barrel). 21.50 21.60 21.50 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.47 0.72 -4.47 156 20335 2012-10-10 13:59:34 2004-09-06 15:00:03 6 127 4001 229 6298 26569 7119 70.00 18 38.76 CHANGED hhhpltPGs.sss....Hh.H.ssp...EhhaVlpGc.hplp.ls...up...phhlpsGDshhhss.s..h.Hph.tN....susp.ssph...lslh ............................................h....h.sGt..ths.........+t...H..st.......p......ch.h..h....V...l...p..G..p...h..p....l....p...l.s.................sp.............pht..l..p..s...G..D...s........l..h...h.s.s..s.............h..Hth...ts.........s....pt...h.h...h...h.................................. 0 1857 3867 5218 +7715 PF07884 VKOR Vitamin K epoxide reductase family Bateman A anon [1] Domain Vitamin K epoxide reductase (VKOR) recycles reduced vitamin K, which is used subsequently as a co-factor in the gamma-carboxylation of glutamic acid residues in blood coagulation enzymes. VKORC1 is a member of a large family of predicted enzymes that are present in vertebrates, Drosophila, plants, bacteria and archaea [1]. Four cysteine residues and one residue, which is either serine or threonine, are identified as likely active-site residues [1]. In some plant and bacterial homologues the VKORC1 homologous domain is fused with domains of the thioredoxin family of oxidoreductases [1]. 21.00 21.00 21.30 21.20 20.90 20.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.10 0.71 -4.32 38 864 2009-01-15 18:05:59 2004-09-06 15:44:38 9 18 670 1 380 845 319 134.20 23 51.63 CHANGED tthhhhlhuslGllsShhlshpchshhp....hhshCsls...ssluCu..pVhsS.husl..FG.............hPsullGllsassll.shulhhhh...........phschhh.hslhhsshsuslhshaLhal.h..ahIp.uhChaChssaslolslhllshhts ...........................................t..hhhhhhuhlGlhhohhls.h...chphhp................h.shCs.ls........shh..uCspV.hs.S..uph....aG...............hPsuh..hGlhhaslhl.shuh.h...hh........................phsphhh...hhlhhu..s.h.hus..s.h..sh..aLha..h......ah.lt.shC.aChsshshshslhhhsh..t................................................ 1 129 240 313 +7716 PF07885 Ion_trans_2 Ion channel Bateman A anon Pfam-B_55 (release 15.0) Domain This family includes the two membrane helix type ion channels found in bacteria. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.87 0.72 -4.26 123 9163 2012-10-03 11:11:44 2004-09-08 16:19:51 11 158 2837 170 4782 12148 1623 81.40 21 22.30 CHANGED hhlhhhhhhushhhhh..............t..h....shhs.ulYashsoloTlGYGD.l...sPt.sss.....t.......+lhs.hhhhhhGlshhshhlshhsphhhp ...............................................................hh....hhhh..uhhhh.h................................................t..t.h..s.hh....s...uh.Ya............s.hlTho..Tl...G..........Y......G..D..l.....sPt..oth..........u..................................+lhs...hhh....h.l.h.....G.l.s..hhshh.h.shlsphh..t....................... 2 1860 2647 3883 +7717 PF07886 BA14K BA14K-like protein Fenech M anon Pfam-B_4068 (release 14.0) Family The sequences found in this family are similar to the BA14K proteins expressed by Brucella abortus (Swiss:Q44701) and by Brucella suis (Swiss:Q8FVU0). BA14K was found to be strongly immunoreactive; it induces both humoral and cellular responses in hosts throughout the infective process [1]. 21.70 21.70 21.80 23.10 21.30 21.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.50 0.72 -4.27 58 605 2009-01-15 18:05:59 2004-09-09 13:46:19 6 5 152 0 174 430 7 30.90 47 18.05 CHANGED sahpaCspRYRSYcPsssTY.sas.G.R+.C. ....HlpaChsRYRSYcspDsTYQPas.GsR+tCh....... 0 13 69 96 +7718 PF07887 Calmodulin_bind Calmodulin binding protein-like Fenech M anon Pfam-B_4579 (release 14.0) Family The members of this family are putative or actual calmodulin binding proteins expressed by various plant species. Some members (for example, Swiss:Q8H6T7), are known to be involved in the induction of plant defence responses [1]. However, their precise function in this regards is as yet unknown. 21.00 21.00 21.50 22.10 18.70 18.20 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.87 0.70 -5.37 11 283 2009-01-15 18:05:59 2004-09-09 13:48:24 6 6 26 0 157 262 0 252.30 37 49.84 CHANGED plpLtFhspL..phPlFTGuKlEucsGsslclsLlDssTs.....lssGst......SshKlEllsLcGDFsstpc.-pWTsEEFppp.IV+pREGKRPLLsGslpVsLps.Glusl.u-lsFTDNSSWhRSRKFRLGsRVsss...sGlRlpEAho.EuFsV+DHRGE.....................................LYKKHaPPhLpDEVWRLEKIGK-GAaH++LssptIsTVc-FL+hhshDss+LR.pllGssMSs+hWcshlcHA+oClLus.clalYp....spssslhFNslaEllGlhhsspahs..hcsLsphppshl...........cpLlppAYpphspl.ph-schhhshs ............................................................................................hpLpFhspl....sl.FTss+lpu-pusslclsL.hDssst.....hlssGs.......ushKl-llVL-G...DF....ss...cp....p...ps..W...T...t..--F...psp...lV..pt....RpGKcPLL..........s.........G.........-..l..lpLcp...Ghusl..s-lpFTDNS.S.a.h.RS.....RK...FRLGs+.ls....s....s.....tuh...Rl...pE....Aho.-sFsV+D+RG.E.....................................hhKK+aPPtL..pD-VaRL-+IuKc...............G........s.hH.+.........cLp................p......t...............tI.oVp-FLph.h..h...h..c.............p..cL..R....p..............lLG.....thS.s+hW-shlcHA+.sCslss...c.ha..hYh........ttpphslhFNslhphhGh.....h.s.tp.hhs...pplsp.pp..s..............pthhppAYpph.pl.tht....................................................................................................... 0 20 89 126 +7719 PF07888 CALCOCO1 CoCoA; Calcium binding and coiled-coil domain (CALCOCO1) like Fenech M, Mistry J anon Pfam-B_4504 (release 14.0) Family Proteins found in this family are similar to the coiled-coil transcriptional coactivator protein coexpressed by Mus musculus (CoCoA/CALCOCO1, Swiss:Q8CGU1). This protein binds to a highly conserved N-terminal domain of p160 coactivators, such as GRIP1 (Swiss:Q61026), and thus enhances transcriptional activation by a number of nuclear receptors. CALCOCO1 has a central coiled-coil region with three leucine zipper motifs, which is required for its interaction with GRIP1 and may regulate the autonomous transcriptional activation activity of the C-terminal region [1]. 30.00 30.00 30.00 30.00 29.90 29.70 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.79 0.70 -6.22 3 312 2009-01-15 18:05:59 2004-09-09 13:50:00 6 6 55 0 115 266 0 340.20 34 63.29 CHANGED spp+tGVsFLNVA+TYlPNTKVECHYTIPsGpcsSoRDWIGIFKVssuoVRDYaTFVWAlsPEuss-GSsuHCSVQFQASYLP+PGsQpYQFRYVDppGsVCGpSssFsFuEPRPMDELVTLE-....EDuGsDlLLVVPKATlLQNQLEtuQ+ERNDLM+t+LALEG-V..................o-hRSRIppLEsALcpScccspELcEQaK-lsoScpphouERNlLssQcAEpppRILELEsDIQTlocKh..................pEp-+lLtthp-IcA-LEppKuELp...........................QRLK-pTlQh+Dp.spt.chQLElEsL+EcLRouQEhLuSSQQKAsLLGEELAShuSlRD+TIAELH+SRLEoA-lul+LS-LuL+LKEGpuQWuQERsuLpQSsEA-KD+IlKLSAElL+LEcuLQEERSQRcsLcsELupEKDuspVQLSEs+RELoELRSALRVLQKEKEQLQEEKQELL-YlR+LEtRLDKlADEKWsEDAs..lsEDccuulshosslDLoDS-DESPtDEu.SpcLuPsuLDEcscstSoPssP+Espc.VVIsQPAPIuPaLSGlAEDssS- .............................hu.....tV.F.sVs+.YlPpsplpC+YTLs.hhhPpspDWlGIFK..........V........GW....sos.RDYaTFlWu.sh........P..-s..hspsoshpp.pVtFp....................u.YLP.p.s....ss..p.hYQF.sYV.spp.Gp.lpGsSsPFQ.....FR..s..p.s...-..L....l..shpp.................t.t.pDh.Llls...+....s.t.....h...p.........plc..c........pEpp-.....Lh...p......Lptph...........................tp.hppphtph....ptthtptpp...p........htpL.t.t..p..th........p........t..t.....h.......p.....Epp.............h...........p..........tc.........tch....hp.Lc....pp......thppp................................................t..............t.........p..................phpt.....t.....................................................pht.............p...hp................t......................................p.......................................................................................................................................................................................................................................................................................................................................................................................................................................ttt................................................................................ 0 16 26 55 +7720 PF07889 DUF1664 Protein of unknown function (DUF1664) Fenech M anon Pfam-B_4797 (release 14.0) Family The members of this family are hypothetical plant proteins of unknown function. The region featured in this family is approximately 100 amino acids long. 30.00 30.00 30.00 30.40 29.80 28.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.67 0.71 -4.33 14 118 2009-09-11 16:08:29 2004-09-09 13:51:16 7 2 27 0 61 110 0 120.00 38 37.07 CHANGED uoG.hshhll.ssslGAlGYGYhWWK............GhphsDlMaVT++shusAssslsppL-plp-slsusK+HLopRIpplDc+LDppp-ltcpspc-VstlppslssIpcDlcslppslpsLcuKlsplEtpQ ..........s..sshs.hllsssslGAl..GYuYhW.WK............GhphoDlMaVT+RshusAssslsKpL-pVssuluusK+HLo...pRIpplDp+l-cpt-lspthcc-V..stl+tslspItpDlpslpphVpsL-sKl.splEtpQ........... 0 14 37 50 +7721 PF07890 Rrp15p DUF1665; Rrp15p Fenech M, Mistry J, Wood V anon Pfam-B_9434 (release 14.0) Family Rrp15p is required for the formation of 60S ribosomal subunits [1]. 21.60 21.60 22.60 22.10 20.40 19.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.04 0.71 -4.10 32 310 2009-01-15 18:05:59 2004-09-09 13:52:49 7 4 276 0 216 307 1 139.90 30 50.85 CHANGED uausuhspILu...........o+htsssps..PILu+sKchh...cphps-cLEpKA................................................ct.hhpEK+phhp+sRV+s..........ls...............................phh-pE+pL+Kl...........Ap+GVV+LFNAVpssQhpsccthpct.ptpphh............................cchl...sc...lSKcpFLDll...............pu ...............................................................................uausuhscILs............pchspspps..slLu+sKphtt....cc....hp..p-c.lEp+s.......................................................................pt.h.tpcK+ph.ppsRl+ssls..................................................................pst-pE+pL++l...........Ap....+GVVpLFNAVpptQtps.p.c.t.h.c.cs.t.t.p.tppt.................................................pchl.....sp..lSKpsFL-hlp.u......................................... 0 72 115 177 +7722 PF07891 DUF1666 Protein of unknown function (DUF1666) Fenech M anon Pfam-B_9387 (release 14.0) Family These sequences are derived from hypothetical plant proteins of unknown function. The region in question is approximately 250 residues long. 25.00 25.00 33.00 32.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.81 0.70 -4.96 6 77 2009-09-10 15:06:31 2004-09-09 13:54:16 7 2 16 0 50 75 0 227.60 41 40.08 CHANGED ppDLEhVYVGQlCLSWEhLaWQYcKAh-lhE...cD.ash+pYNpVAGEFQQFQVLLQRFVENEPFQ.GPRVpsYlRpRClh+shLQVPsIR.pDctKsKKssRcE....ppD.sIoospLsEIlcEohplFWcFLRADKccusl.slKuhptoQhp.....DssDlELhhplKpchQK................KE++L+-lhRSssCIlKKFpKpcpcp...sps.hFhutl-LKLVoRVLsMs+lsp-cLtWCpcKLs.........................KIshssR+..lphEsSFsLFPs .....p-LEtsYVuQlCLoWEhLpWpYpphpchhp...........sss..p..ssp.pasplAtcFQQFQVLLQRFlENEPFE.G........RschYsRtRshh.pLLQVPph+.t-p...p-cpp.tpp......t.c.hIsuspllcIhEEoIpsFhtFl+sDKcpss....hhpshhpp..p..hp.....ssh-.pLltp.lppshpK.................Kch+LK-lh+stpshh++hhptp.......pchphhhu.lDlKlVoRVLpMscloc-QLhWCccKhs.........................Klph..h.pt+..lph-.sS..lhFP........................ 0 6 33 42 +7723 PF07892 DUF1667 Protein of unknown function (DUF1667) Fenech M anon Pfam-B_9631 (release 14.0) Family Hypothetical archaeal and bacterial proteins make up this family. A few proteins are annotated as being potential metal-binding proteins, and in fact the members of this family have four highly conserved cysteine residues, but no further literature evidence was found in this regard. 20.20 20.20 20.30 20.60 20.00 19.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.58 0.72 -4.29 35 405 2009-01-15 18:05:59 2004-09-09 13:57:04 6 5 308 1 112 341 7 80.60 40 59.44 CHANGED sRGhcYAcpElpsPpRllTooV+lpsGc.hshlsVKTccPIPKchIh-sM+tLpplpVpAPV+lGDVllcNlhsTGlcllAT+ .............RGttYucpElssPpRllTooV.+.l..c..........uGp.hshlsVKTpcsIPKpplh-shctLpp...lpV...pAPV+hGDVllcslhsTGVDllAT+............... 1 63 88 101 +7724 PF07893 DUF1668 Protein of unknown function (DUF1668) Fenech M anon Pfam-B_5066 (release 14.0) Family The hypothetical proteins found in this family are expressed by Oryza sativa and are of unknown function. 21.30 21.30 21.50 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.58 0.70 -5.32 43 450 2012-10-05 17:30:43 2004-09-09 14:01:57 8 10 10 0 256 374 0 265.40 19 82.32 CHANGED RRalaLllcstt.ts.....asl++lD...........................................s.stssssssthpthpLP.sshhph...................tshpFhsl.....sss+llus.....-ts.........sp...shlYDssspsVt.................shPs..Ltp.PKhhP..lulsl.................Gss.LY......lh-pssps..ss..........pFEsl....shp...............tttcsWsWps.....L......PP.PP.......Fststp..............lsuYAVV.....sG....spIhlooss.........................G........TYuFDTssp..p.....Wp+h...G-W.tLPFpGpAcYVs..EhsLWhGlussstt.....hl.sAsDlsusst........pP.hh..hthh........chtsP.....................hsupLlpL.G.uG+FCls+hhpst.................................t.stppttspphhshlTulcl...........................stspsspLchlp++up.pa ...............................................................................................h.phs.............................................................................................................hP.....hph....................hphh.h...........tstllsh........s.t....................tt...shlaD..sttt.....thh............................hPt.......h.t..sp..h...s...h.h.h...........................................................ssp..la......hhpt....................Fcsh...ht..........................ththp............lPs..P.P...a.h...............................ltuaslh......s....pp.lhlShtt...............................G........Tash...D......s..pt...t.....Wpph....GpW..LP.F..G...pu.ass..ch.t..hahGlstts............l.suhslss........................h.....................th...........................ts.lh.h.G...su.....paChhchh........................................................hh.h.hhth....................................................................................................................................................... 1 0 55 167 +7725 PF07894 DUF1669 Protein of unknown function (DUF1669) Fenech M anon Pfam-B_9335 (release 14.0) Family This family is composed of sequences derived from hypothetical eukaryotic proteins of unknown function. Some members of this family are annotated as being potential phospholipases but no literature was found to support this. 36.60 36.60 36.70 37.60 36.40 36.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.84 0.70 -5.21 23 379 2012-10-02 13:01:53 2004-09-09 14:15:35 7 3 44 0 217 312 0 254.00 40 43.34 CHANGED L--spsp.csstu....psphhYsEptRLAl-sLLpsG.cAapphLppEplhcFLSspElptlhpssph.pssscp...........................tt..sttpstssst-su.....SsTYaPttS.Dt-sP.sLDLGWPs..sssa+GsT+lslahQP..sh.cspssIKEllR+hIppApcVIAlVMDlFTDlDIhpDLl-Aup+RpVsVYlLLDpsslstFLcMCp+hplshpcl+...........NlRVRoVsGssahs+oGp+hpGplpcKFlLlDsc+VhoGSYSFoWosu+lcRs....hlplhoGplVEtFDcEFRpLYApSpsl ................................s.....t.hasEttRLAl-sLlptG.cAahphLppEph...hsFLSt.Elphlhpphpt.....t..s.st.t..................................t..t..s.t....s..tstsss........ShTYaP.tS.DhpsP.tL-LGWPt...psshpGhTcsslahQP...s....spt.sIK-hlRphIppA.ppV.lAlVMDhFTDlDIhpD.ll-Au.+.RtV.sVYlLLDptslphFLcMspchp...lp.t+..l..c...........NhRVRolsGssahs+oGp+.hpGplppKFlllDscpVhsGSY.S.FoWo.su+hcps....hlhlhoG.plVE.FDcEFRpLYAtSps.............................. 0 8 29 86 +7726 PF07895 DUF1673 Protein of unknown function (DUF1673) Fenech M anon Pfam-B_4746 (release 14.0) Family This family contains hypothetical proteins of unknown function expressed by two archaeal species. 22.90 22.90 23.10 23.50 22.80 22.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.56 0.71 -4.79 23 47 2009-09-14 08:49:41 2004-09-09 14:24:15 6 4 14 0 41 45 6 189.10 18 76.51 CHANGED hhhh-pIKKLMGWCPNA+shEs.ppphs.psF-usstscuc.tpssshcssuhap+t....psphLlhshhhThhhh.lhls.hGlsh......hhLLhGhhlu...Lhhhlhs..................WKtphppa-ultcp.lhcsssKpph.....lhhlhhhlhhhhhhh..hh.t.h.tthshpsllSF...........................luGhhlh.hWlhYhQllYWEKKN+phlahcppps....pchYllsE+p .......................................................c.I++hMGWCPsspt.ps.tpphs.t.hcht..ptst..psts.hs...thhp+h....psphLlhshhhohhhh..lhl..hulsh................hhllhGhhhs...Lhhhlhs....................a+p.hppac..shtph.lhpsp.+pph.....hh.hlhhhlhhhhhhh.......h...hs.phhhuh.............................hsuhhlh..hahha..hphhhacc..+pphhlhhppp.t.....t.hhh.tc........................................................................................................... 0 15 21 21 +7727 PF07896 DUF1674 Protein of unknown function (DUF1674) Fenech M anon Pfam-B_4326 (release 14.0) Family The members of this family are sequences derived from hypothetical eukaryotic and bacterial proteins. The region in question is approximately 60 residues long. 23.50 23.50 23.50 25.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.51 0.72 -3.75 75 599 2009-01-15 18:05:59 2004-09-09 14:35:58 7 4 563 1 312 536 163 65.50 37 65.31 CHANGED Eu-pp+tt...tt.................................P.....tElG...G.scG.EPsRY......GDWEpcGRssDF ......................................................................tttt..t.......................................tpstP........tElG...G.s.cG.....EP.s..RY......GDWEhKGRssDF.......... 0 89 167 248 +7728 PF07897 DUF1675 Protein of unknown function (DUF1675) Fenech M anon Pfam-B_4280 (release 14.0) Family The members of this family are sequences derived from hypothetical plant proteins of unknown function. One member of this family (Swiss:Q9SFV5) is annotated as a putative RNA-binding protein, but no evidence was found to support this. 20.50 20.50 20.60 20.50 19.60 20.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.09 0.70 -4.54 6 138 2009-01-15 18:05:59 2004-09-09 14:43:05 6 7 27 0 84 134 0 171.40 27 53.82 CHANGED EIELsLGLSLGGpFGsD....h++s.+LsRSSSlssssshscs.....puushts.ph............sL.RToSLPsEoEEEh+KR+EhQoL+RhEAKR+RsE+...hsusts..............stush.pRccpsusshspssspsstuustssoSSuhsEhssts.stt.spstt.push..h.h.spts.tts..Pssspsppsspspsspcstcs.p.cpsust-sspshhpDMPCVoT+GDGPNGK+V-GFLY+Y.tptEEVRIlCVCHGoFLSPAEFVKHAGGscVs........+PLRHIV .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.phP.hV.ops...Ps....G+plpGhhY+Y..pt.ppVpIVCsCHGsahoPsEFVcHAGu....ps........................................................................... 2 10 49 71 +7729 PF07898 DUF1676 Protein of unknown function (DUF1676) Fenech M anon Pfam-B_4779 (release 14.0) Family This family contains sequences derived from proteins of unknown function expressed by Drosophila melanogaster and Anopheles gambiae. 24.30 24.30 24.50 24.30 24.10 24.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.33 0.72 -3.74 64 512 2009-01-15 18:05:59 2004-09-09 14:45:18 8 6 30 0 390 577 0 109.70 18 36.55 CHANGED +hlphlschh....pp...........sp.lplssGlplV+ssst....sttspstphtpt.....................hsphlhc+ltpalpoHsLplphsp.ttpt.h.............................t.htEuR.................t+Kh++....hhhPhlh .................................hhphhschh....pp...........sp..lpls..culpll+ssss.....tttspshp.ptt.................................t.hsphlhc+ltpalpoHslplp.lsptttpttt.............................tshtEuR.......................h+Khc+.......hhhPhlh.............................................. 0 103 147 331 +7730 PF07899 Frigida Frigida-like protein Fenech M anon Pfam-B_4728 (release 14.0) Family This family is composed of plant proteins that are similar to FRIGIDA protein expressed by Arabidopsis thaliana (Swiss:Q9FDW0). This protein is probably nuclear and is required for the regulation of flowering time in the late-flowering phenotype. It is known to increase RNA levels of flowering locus C. Allelic variation at the FRIGIDA locus is a major determinant of natural variation in flowering time [1]. 21.30 21.30 21.50 21.90 20.00 20.70 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.69 0.70 -5.65 22 700 2009-01-15 18:05:59 2004-09-09 14:49:35 6 7 74 0 143 485 0 188.80 38 53.50 CHANGED sspsssspsssts.......pLppLCtpMDucGLhpFl.pphcphs..slppElssAL+tusDPApLVL-ulpsha.sspts...tpphphhshcpsCllLLEsLh.............hps.lss.......sl+-cA+plAhcWKspl.....sspsspsh-uhuFLpLLusFulsupFcp--lhcLlhhsut++Q...sscLspuLGLs.-+h.............................................................................................................sshIcpLlspGcplcAlph...hathsLs-pFsPlslLKsaLpc.s+csst.hhptss.......................s...putspstc+-lsAL+uVlKClE-p+L-tcas.-s..Lpccl.tpLcctctp+++sspssp ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hstlhh...h.......h...............................................................................................................IVE.hlppGhpIEAlph...sasFGh.DKFs...sss.lLpuaL+..uKcs.pth.....................................................................................t.p...h..h.phhpt.p........................................ts........................................................... 0 11 76 106 +7731 PF07900 DUF1670 Protein of unknown function (DUF1670) Fenech M anon Pfam-B_9559 (release 14.0) Family The hypothetical eukaryotic proteins found in this family are of unknown function. 25.00 25.00 25.00 26.20 24.90 24.80 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.33 0.70 -5.12 6 52 2012-10-04 14:01:12 2004-09-09 14:52:00 6 3 10 0 21 55 62 189.40 31 74.16 CHANGED lhs-apFhsG..Vs+hlscplhphhcp.a..ssplcsGQllahulSs-Ess..GKslp-ppLlsVhLTLls.EDh-shpc.hthsElhcp+llRlspEAa-QsALLTpsDlAhLLshSpsTIS+clcpYcpc.GcllPTRGhl+DIGsuloHKphIlcLYLKGhpToEIARpTpHS.Eul-RYIKDasRVphLhpKGhss-EIphhsGlScplVcEYhEL .....................................h..h.p.h.pthhphhpp..a.....pphp.GQhhahslstppss..GKshpphphhslhLolhs..-D.c.....hppth.t.pphpppplhRlhpEAacQsuhLTptDlA.hlLshSspTlp+plcphpcc.s.llPsRGph+.DlG.suloHKphl.lch.alc.s.hphs-IuRpTpHS.pAlpRYlpsFp+Vhhh.hpcshpspElthlsshoppLlp-Y.t.............................. 0 16 19 19 +7732 PF07901 DUF1672 Protein of unknown function (DUF1672) Fenech M anon Pfam-B_9698 (release 14.0) Family This family is composed of hypothetical bacterial proteins of unknown function. 25.00 25.00 25.20 25.30 22.60 24.90 hmmbuild --amino -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.92 0.70 -5.41 7 690 2009-01-15 18:05:59 2004-09-09 14:53:00 6 1 200 0 7 359 0 239.30 62 91.09 CHANGED ETKSVPEEM-ASKYVGQGFQPPAEKDAIEFAKKH+-chtKhGEQFFhDNFGLKVKATNVVGSGDGVEVaVHCDDHDIVFNASIPFDKshIcp-uShRSpDpGDDMSshVGTVLSGFEYRAQKEKYDNLhKFFK-NEcKYQYTGFTKEAIsKTQNsGYpNEYFYIos.shsLpEYRKYaEPLI+KsDcpFKcGMcpu+Kplshpups-lsTTLFSTKcNFTKcssh--lIchS-cl++h+phPcshplolQlucspINTp+s.YssppslEhtVapp....E ..................ETKSVPEEMEASKYVGQGFQ..PPA...EKDAIEFAKKH+.cchtKhGEQFFhDNFGLKVKATNVVGpsDGVEVaVHC-.DHsIVF.NASlP....h.KshI......cpcu..ShR..S..pDpGDDMShhVGTVLSGFEYRAQKEKYDNLYKFhK-NEcKYQYTGFTKEAINKTQNsGYpNEYFYITh.s.hsLpE.YRKYaEPLI+KsDcpF+cGMppu.+Kpls.hpu.ps.pssoTLFSppcNaoKcppl-s.VI-hS-plcch+.shPp.p.plolQlupphIsT.p+s.YscppslchtV...................................................................................... 0 4 4 7 +7733 PF07902 Gp58 gp58-like protein Fenech M anon Pfam-B_4289 (release 14.0) Family Sequences found in this family are derived from a number of bacteriophage and prophage proteins. They are similar to gp58 (Swiss:Q38355), a minor structural protein of Lactococcus delbrueckii bacteriophage LL-H [1]. 30.00 30.00 30.10 30.10 29.70 29.80 hmmbuild -o /dev/null HMM SEED 601 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.77 0.70 -6.30 10 178 2009-01-15 18:05:59 2004-09-09 14:56:42 6 11 111 0 20 176 0 345.80 24 41.91 CHANGED MSRDPTLTlDE.......SNLoIGuDGRsYYTFTA-sDspoV+LAsN.ClGTTRFNQLMIERGsKsTsYVAPVVVEGoGpsTGLFKs.......LKElNLELTDTcNSpLWuKIKLTNpGMLpEYacssIKoEIlpoAcGItpRIS-DT-+KL............................................ALIN-oIsGIRR-YQDADRcLS....uSYQAGI-GLK........ATMANDKlGLpAEIpsoAQ....GLSQ+YDsEl+pLSAKI....TTTSSGTTEAYESKLsGLRAEFTRSNQGMRsELE.........................ScISG..LpusQQoTApQISQEI+NRpGAVSRVQQsLDSYQRRLQsAE......cNYSSLTQTVpGLQScVSuPssplpSRhTQLts.I-Q+VTRscVpSlIspSGDSIhLAI.Kuth.pSKMSusEIISAINLNuYG.V+IuGcpIALDGNTTVNGAF..uAKIGEhIKL+ADQIIuGTIDAsKIpVINLNASSIV.......GLDAN.......FIKA+ItYAITuL......LEGKVIKARNGAMhIDLsSuphsFNpsApINFNos-NALhRpD.uTpTtFl+FoNussh....uapGus.YAthsIoS......suDGl-sssSGpFuGlRhaRhAouYppTusVDpsEIYGDsVlItcshNhshGaphp.s+sspllDhN.....h.A.u.hh..............hT ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................l.t........................................h..t.....t..th.....o.......lp.t....s...sp.....h.sl.........psthsus-llStINLss..tG...l.pIs...Gcp.ltls.GsThl..ss..sh............sutl.up..................lpAspIhsGolsAsclpshsl..sAspIs.....................tlsAs...............hhp.thh...t............................................................................................................................................................................................................................................................s....................................................................................................... 0 3 11 17 +7734 PF07903 PaRep2a PaRep2a protein Fenech M anon Pfam-B_4102 (release 14.0) Family This is a family of proteins expressed by the crenarchaeon Pyrobaculum aerophilum. The members are highly variable in length and level of conservation. The presence of numerous frameshifts and internal stop codons in multiple alignments are thought to indicate that most family members are no longer functional [1]. 25.00 25.00 32.90 32.80 21.10 19.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.68 0.71 -4.46 8 34 2009-01-15 18:05:59 2004-09-09 14:58:44 6 1 1 0 34 34 0 80.20 53 72.37 CHANGED hsAlspWYhpCFGcs...Gp.tKVVKRLEEYauMCQMtpAsKREaG.+.splhhSEaALRRAFWW-GEWpGKPhSCFVTE+cAVCKVGcpphcFhVhssscGVYl+PEh.Lhp-WIKVAHRGsD ...................................................htEhG......h..-huL+tuF..ctt.hstshSCFVTE++AlCKVG-+hAsFYVFDTPcGVYL+PEIKLlp-WIKVAaRGs....... 0 34 34 34 +7735 PF07904 Eaf7 CT20; Chromatin modification-related protein EAF7 Wood V, Bateman A anon Wood V Family The S. cerevisiae member of this family Swiss:P53911 is part of NuA4, the only essential histone acetyltransferase complex in Saccharomyces cerevisiae involved in global histone acetylation [1]. 20.80 20.80 22.00 22.00 20.60 20.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.27 0.72 -4.01 19 246 2009-01-15 18:05:59 2004-09-09 15:29:00 8 3 222 0 178 256 0 90.30 31 35.27 CHANGED EpEhpLF+ultp.aKPV...Glp+HFtMhsIh-clspt..............hscphsupslWcKLsphYsLctLD-hE.....pshshsscpc..........................-FsLPps-auc ..........................EhpLhcuhht.a+Ps............Ghp+HFpMlsIt-+hppp.........................................hscphsscsIWc+LsohYsLptL--pE...............h..shssppc..............................................................pFsLPpp.ht.t............................................................................. 0 52 92 145 +7736 PF07905 PucR Purine catabolism regulatory protein-like family Fenech M anon Pfam-B_4388 (release 14.0) Family The bacterial proteins found in this family are similar to the purine catabolism regulatory protein expressed by Bacillus subtilis (PucR, Swiss:O32138). PucR is thought to be a transcriptional activator involved in the induction of the purine degradation pathway, and may contain a LysR-like DNA-binding domain. It is similar to LysR-type regulators in that it represses its own expression [1]. The other members of this family are also annotated as being putative regulatory proteins. 28.90 28.90 28.90 29.80 27.20 28.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.22 0.71 -4.54 113 1282 2009-09-13 12:56:03 2004-09-09 15:29:15 6 5 619 0 359 1123 18 122.10 25 24.49 CHANGED p-lLphsslpts..plluGp.....psLc+slcWVpls...Ehs.Dhssa..........LpGGELlLTTGhsltp.ssp...hppa...lcpLscsusuu..Lslths.hhp.p.lP..psllphAcphslPllplst...plsFsslsptltptlh ................................-lLphstLptt...pllu...Gc.....ps.L.s.p.s.Vphlplh...Ess...Dh...s.sa..........lpuGEL.lLToGhshpc.stp...........hppa...lcpLtp.tss..uuLslph.s.t.hht..plP...ppll...phAsphslPllplPt....shsasplhptlhp.h..................... 0 152 276 325 +7737 PF07906 Toxin_15 ShET2 enterotoxin, N-terminal region Fenech M anon Pfam-B_4512 (release 14.0) Family The members of this family are are sequences that are similar to the N-terminal half of the ShET2 enterotoxin produced by Shigella flexneri (Swiss:Q47635) and Escherichia coli (Swiss:Q47634). This protein was found to confer toxigenicity in the Ussing chamber, and the N-terminal region was found to be important for the protein's enterotoxic effect. It is thought to be a hydrophobic protein that forms inclusion bodies within the bacterial cell, and may be secreted by the Mxi system [1]. Most members of this family are annotated as putative enterotoxins, but one member (Swiss:Q8X606) is a regulator of acetyl CoA synthetase, and another two members (Swiss:P76205 and Swiss:P23325) are annotated as ankyrin-like regulatory proteins and contain Ank repeats (Pfam:PF00023). 25.00 25.00 29.70 29.40 23.50 20.10 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.92 0.70 -4.94 8 670 2009-01-15 18:05:59 2004-09-09 15:30:05 8 1 306 0 7 369 0 250.50 38 50.79 CHANGED o...hph........pphlppcphosps.pchlptcpK..PYhSc.KppsplNLNGps..s-psuchIhCRHLAuQYhhDhh..pssGKVsh.ccaSSs-pIpp+lslpcccphpsl..p.PuslYalsNscFGpslsshFppMcppucsspolhlposNHuMAlRL+IKcoppGp.+.aVVshYDPNtTssplRs...ssstsclpphSlccFls....ppthpsYu.pcsspphhs.psphPcppslh..t.......scls.sPLsssslhhhMstGhsctIpplhcplchhscpc..sphhplLsA+s ..............................h.....h...........pp.hs.......hp.spK......c.p..spl.pLNGpshF...s..tpP..p...stIhCRHluhQYh.D.h..pspG+lsh.ccYSSsEp..ltpHlshppcp.....ph.....hsLhpp.PtGppVlAstDFGhslphaFs+MpsNs....lShMuAILhssN.............HshuVRLRIKp..ospGphc.YVVSlYDPNsTNsplRh...spsppshthhSL.cFhs...ss.sh.pWuscplh..spsluIl..PhLPcpp.lhlhssh.......schs.sPlpPush.LlMuhG.spplhplh.plph....hschc....hhphlsAhN.................... 0 2 5 6 +7738 PF07907 YibE_F YibE/F-like protein Fenech M anon Pfam-B_4781 (release 14.0) Family The sequences featured in this family are similar to two proteins expressed by Lactococcus lactis, YibE (Swiss:Q9CHC5) and YibF (Swiss:Q9CHC4). Most of the members of this family are annotated as being putative membrane proteins, and in fact the sequences contain a high proportion of hydrophobic residues. 27.20 27.20 28.10 28.10 26.00 25.90 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.36 0.70 -5.19 89 1478 2009-01-15 18:05:59 2004-09-09 15:37:41 6 2 897 0 279 1002 29 239.00 28 70.23 CHANGED ss.lhhlshlahhlllllGtppGlpul.luLhlshhllhhhl.lshhhtGhsslllsllhshlhhhloLhllsGhsp+ohsAhluTlhulhlshllshhhhphsplsGhs.EpsphLthhstt...lph.tlhhuullIuuLGAlhDluhoIuSulhElhppsP....ploh+pLhpSGhslG+DlhGThsNTLlhAahGuulshlllah..thshshspllNtchhuhEllpslsGuIGllLslPlTshl.suhh ...............lhllshlhlhlllllGtcpG.ltul.loLhlNhhllhhhl...l.hh.tGhshhllshlhsllhshlsLhllsGh.sh.+Th.sAhluTllush.lshhlshlhhthsphpG...hs..h...Ep...h...p.....lth...hshs....lph..tlhluslllusLGAlhDluloIuSulaElhpps.P....ploh+cLhpS.G.hplG+-lhGThsNTLlhuahGusLs..h..hlh.ah..p...sh..ohsphls.p.huhEllpslhGuIGllLolPlTshlsuh.h............. 0 119 208 254 +7739 PF07908 D-aminoacyl_C D-aminoacylase, C-terminal region Fenech M anon Pfam-B_13711 (release 14.0) Family D-aminoacylase (Swiss:Q9AGH8, EC:3.5.1.81) hydrolyses a wide variety of N-acyl derivatives of neutral D-amino acids, in a zinc-dependent manner. The enzyme is composed of a small beta-barrel domain and a larger catalytic alpha/beta-barrel. The C-terminal region featured in this family forms part of the beta-barrel domain, together with a short N-terminal segment. The beta-strands of both barrels were found to superimpose well. The small beta-barrel domain does not seem to contribute to the substrate-binding site or to be involved in the catalytic process [1]. 22.40 22.40 22.40 22.40 22.30 22.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.14 0.72 -4.65 150 859 2009-09-11 00:09:33 2004-09-14 13:31:24 7 23 530 12 324 878 733 47.40 33 9.08 CHANGED Ppp.lpDtuoas..cPhphAsGlctVlVNGtlsh.c.......sGp....sTG......spsG+llRps ........stp.ltDhuoa...p...cPpphupGIctVhVNGthsh.c..........cGp....hTu......spsG+hl+t.t............ 0 114 222 284 +7740 PF07909 DUF1663 Protein of unknown function (DUF1663) Fenech M anon Pfam-B_4106 (release 14.0) Family The members of this family are hypothetical proteins expressed by Trypanosoma cruzi, a eukaryotic parasite that causes Chagas' disease in humans. This region is found as multiple copies per protein. 25.00 25.00 53.80 51.70 18.00 17.40 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.87 0.70 -5.97 2 85 2009-01-15 18:05:59 2004-09-14 13:33:30 6 9 6 0 4 85 0 202.10 76 97.85 CHANGED htlpRcAVspcE......................RusRchlEtucAAh.DE..............LG.....-.hhssTHE+................uspshsAcEsttRGp.VGpE.............RcshsGhH...cpslcuEER...hsR+hLphtEstuhsplhp..........chR.hppp+t.phhtA.pDAths.httEER.......RshlsScE+hsRRhlEtG.hut.D...phGcEhusA......saphAlpthuhpEsAARtp.h.E.....YuhpRDul.Sp.psshpsLc+GpustVs..............ELhpEYcpsscchhsEtlhu..D........AsRtplVhEE.....sLH+ssl.oE....p+AVR+...............................slERsEttAhD.l..........................GEp.hSsshpR...........................slEsLttE ..........................................................................................................................................................................RcshhG.LH...RDAVDSEER...AVRRpLE+GEAAAVDELGE..........EYRSATHERsVEALAAEEDAARGQLVGEEREDshGLHRDAVDSEERAVRRCLERGEAAAVD....ELGE..EYtSA......THERsVEALAAEEDAARGQLVGEER............EDsaGLHRDAVDSEERAVRRCLERGEAAA..VD................E....LGEEYtSATHERsVEALAAEED........AsRsplVhEE..................................................................................................................................................................... 0 1 2 4 +7741 PF07910 Peptidase_C78 DUF1671; Peptidase family C78 Fenech M, Bateman A anon Pfam-B_9699 (release 14.0) Family This family formerly known as DUF1671 has been shown to be a cysteine peptidase called (Ufm1)-specific protease [1]. 29.10 29.10 30.50 30.30 25.70 28.50 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.77 0.70 -5.22 38 453 2012-10-10 12:56:15 2004-09-14 13:38:51 8 18 236 3 315 461 3 187.60 30 42.79 CHANGED upthhlhushshaHYtp.uhsDcGWGCuYRslQhllSWhht.......phh....s.....slPoIhcIQphL.chh.DK..ss........shhGo+cWIGosEsthllspL..slps+lhpht...............................................psu-l...sphhptltpaFp..sp.u....................sPlhltt..s........................shuhTIlGl-hp.......psuphphLlhDPHasus.ssh+hlhpcshsua...............+sssh..hp+...sphYpls .............................s...hh.sshthhHa......hp..thsDp....G.....WGCu.........YRslQhlhSahh.t..........thh.................s........slP.ohhclQp..hl.psh.Dc..ss..........shhGo+pWIGohEst....hlLspl..sl......ps+llphp..................................................................................pssph...sphhc.lh........pa..Fp..st....s................................sPlhltt....s........................shu+TIlGlphp..................tss..p...hphLlhDPpasus...p.hp........hlhpp..shssh..........................+s..ssh..h.p...pt.Yplh......................................... 0 110 167 249 +7742 PF07911 DUF1677 Protein of unknown function (DUF1677) Fenech M anon Pfam-B_4922 (release 14.0) Family The sequences found in this family are all derived from hypothetical plant proteins of unknown function. The region features a number of highly conserved cysteine residues. 25.00 25.00 27.30 26.80 23.00 22.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.33 0.72 -3.80 26 199 2009-01-15 18:05:59 2004-09-14 13:44:12 8 4 19 0 134 186 0 88.20 45 57.50 CHANGED ElE.ss.+CECCGhpEECTspYItpVRscasG+WlCGLCuEAVp-Eht...R........psshslEEAlctHhuhCccFsuts...sPshp...lssuMRclLR+p .......lEss+CECCGhpEECTstYIspVRs+atG+WlCGLCuEAV+-Eht...R.........ps.shshEEAlptHhuhCpcFps...s.....sP..slp...lspuMRplLR+p............. 0 12 72 108 +7743 PF07912 ERp29_N ERp29, N-terminal domain Fenech M anon Pfam-B_28781 (release 14.0) Domain ERp29 (Swiss:P52555) is a ubiquitously expressed endoplasmic reticulum protein, and is involved in the processes of protein maturation and protein secretion in this organelle [1,2]. The protein exists as a homodimer, with each monomer being composed of two domains. The N-terminal domain featured in this family is organised into a thioredoxin-like fold that resembles the a domain of human protein disulphide isomerase (PDI) [2]. However, this domain lacks the C-X-X-C motif required for the redox function of PDI; it is therefore thought that ERp29's function is similar to the chaperone function of PDI [2]. The N-terminal domain is exclusively responsible for the homodimerisation of the protein, without covalent linkages or additional contacts with other domains [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.50 0.71 -4.20 3 92 2012-10-03 14:45:55 2004-09-14 13:52:51 8 4 73 13 52 132 2 117.60 51 47.87 CHANGED lTs+GCVDLDolTF-KVlcKF+YSLVKFDTAYPYGEKHEAFTuLAhEssuoT-ELLlApVGIKDYGEp-Nt-LG-RYKlDKEsYPVIaLF+.GDu-sPVpa.PucG-VTlDuL++FLKupTuLYIGhP ........................................................spGslsLDploF.Kl.I.sK..Ka.l.LVKFDstYP.Y.G.EK..p...D...E..Fp...+lA.E.susuoc.DLLVAEV......GIp.D.YG-K..N.h-LuE+YKl-K-.saPVh.h.LFp.Gs...h-sPl.a.....sGslps.sslp+alKup..u..lYlGhP......................................... 1 15 18 33 +7744 PF07913 DUF1678 Protein of unknown function (DUF1678) Fenech M anon Pfam-B_4886 (release 14.0) Family This family is composed of uncharacterized proteins expressed by Methanopyrus kandleri, a hyperthermophilic archaebacterium. 25.00 25.00 35.50 141.40 19.10 18.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.35 0.71 -4.76 5 16 2009-01-15 18:05:59 2004-09-14 14:21:22 6 1 1 0 16 16 0 197.30 70 97.59 CHANGED ss.+htlP...PcDPVEplRsLRVLhEshRRGphPhLtsoYRoVNGpshGPYY.ARWR.sSRaERGRTLYLGKSENESVpFlE.WLVSLsRtEVLELARH......LMRNLRSVLKoLLscVSuLPYK+...ARRVLAR.............GL..ALAFD.....ARPuuSP+.IRDVLEELPDRLESFllRTLGGWPAaYSS+Lc+llRuRRuSp-t+R.............tcVPDs..plcRhK.Rc ....p....HlP..IPpDPVERIRALRVLREVaRRG+KPSLEVTYRTVsGSTCGPYYVARWRRDSRa+HGRTLYLGKPENESVpFVE.WLVSLDRpEVLELARH......LMRNLRSVLKTLLTEVSsLPYK+...ARRVLAR.............GL..ALsFD.....ARPSpSPR.IRDlLEELPDRLESFhlRTLGGWPAHYSS+LcKlIRuRR+SLDG+H..............ElPDVtLELERWKLR.+.......... 0 16 16 16 +7745 PF07914 DUF1679 Protein of unknown function (DUF1679) Fenech M anon Pfam-B_4694 (release 14.0) Family The region featured in this family is found in a number of C. elegans proteins, in one case (Swiss:Q19034) as a repeat. In many of the family members, this region is associated with the CHK region described by SMART as being found in ZnF_C4 and HLH domain-containing kinases. In fact, one member of this family (Swiss:Q9GUC1) is annotated as being a member of the nuclear hormone receptor family, and contains regions typical of such proteins (Interpro:IPR000536, Interpro:IPR008946, and Interpro:IPR001628). 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.44 0.70 -5.84 20 190 2012-10-02 22:05:25 2004-09-14 14:22:31 6 7 8 0 178 1879 311 303.80 22 85.78 CHANGED Lh-suDGLLpTHVTWcDVpcslpcphsTcAchGcs+phssIuDhpGFMS+luLlEs-Ws...t.psscpLPcKFsLKIoSpLslhslschhc.pssssascEc....hcthpphs+chHNREVshYKlLpKhN.pssl.hsKVYhhKpF-...--NcLKGalhhEalsslcshpha-slss--Lhsll+uIAshpAhuhpLo...--Eppph.tGt-a.c.hhpphhs-cslcshacphRphh.s...sE+lcEhlchhcpYh.plh.hcphsplschlGh.+sVLsHGDLWsuNlLaspps-GphplctlIDaQslphusPA.DLsRLhlosLSspDRRp+hpclL-paY-sFlcsL..ts.ctPYohcpLc-SYpLYFPhhulhlLPhlushl-sp...shspcct-ph+chshcKhtuLhEDlhchHc.sh .....................................................................................................................................................................................................................................................................................Gh..S...hhhlt.pa.........t..hPtphhlK................hs................t.....h..h.p...........h..t...........t.h...h........hHN...pEsthY.p.....h.h.t.php............th.......hs..+....................h...Y.....hh..pthp.......pt....t.....h...t...G...hl..hh.-..h..h..t..s..h.....h.....h.....t..h.....ht....sh...s........pp..l...h..l.................hcs.........luthpuh.u.h.......p...............t.c..p........h...p........h.....................t..h.........t........h...h......t..p.......h....h....t...................................h....p..t....h.....h.....t..................h........t............................................t....p.h.......t......p........h.....h..........p.h..h....t...h..................................................................h.....p..........h........p.....p.....h........p....p........h..h........G...............h......................V........l...sHuDlhtsNhla.......p.....p....t........p....p....................................p..l......t...........u...................l....I..D...........a................Qh................sp...hGsss.D....lh..+.l.h.h.........s....h..........o..s.p...............p..R+.....t.th..ptlL...c.a....a...p...t....h...h.p....h.........................................p.........s........a...o....h....p.....p..l......p......p.....t.......Y...p......h.....h....h..s...h....h....s.....h...h..h.........h...s...h.....h........h..h..................................................t..p....thht-h.........h........................................................................................... 0 85 98 178 +7746 PF07915 PRKCSH Glucosidase II beta subunit-like protein Fenech M anon Pfam-B_9407 (release 14.0) Family The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing [1]. The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum [1]. Mutations in the gene coding for PRKCSH have been found to be involved in the development of autosomal dominant polycystic liver disease (ADPLD), but the precise role the protein has in the pathogenesis of this disease is unknown [2]. This family also includes an ER sensor for misfolded glycoproteins and is therefore likely to be a generic sugar binding domain. 21.80 21.80 21.80 21.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.14 0.72 -3.18 104 586 2012-10-02 14:19:21 2004-09-14 14:25:08 8 29 264 2 365 860 7 84.40 27 19.03 CHANGED upChh...ht.su.......haoYchCah.....cplpQhc............................tctsspphhLGpa..................................t..t.p.p...............tp..ttt...a.............hshtassGs.hC ............Chh...tt..pu.......WWoYEaCaG........+clcQaHt.....................................spchptpphh..LGpa................................................................................ptt.ph.tpp.ssp..............................t.t.....tp.....tt.t..tha..................................hsphhssGohC................................................................................................................................... 0 128 189 295 +7747 PF07916 TraG_N TraG-like protein, N-terminal region Fenech M, Mistry J, Coggill P anon Pfam-B_4841 (release 14.0) & Pfam-B_689 (release 23.0) Family The bacterial sequences found in this family are similar to the N-terminal region of the TraG protein (Swiss:P33790). This is a membrane-spanning protein, with three predicted transmembrane segments and two periplasmic regions [1]. TraG protein is known to be essential for DNA transfer in the process of conjugation, with the N-terminal portion being required for F pilus assembly [1,2]. The protein is thought to interact with the periplasmic domain of TraN (Swiss:P24082) to stabilise mating-cell interactions [2]. 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.73 0.70 -6.05 62 860 2009-01-15 18:05:59 2004-09-14 15:13:40 6 7 549 0 140 792 47 399.90 21 55.85 CHANGED Elash.hsG.hlpssl.ulhshoGhhs....hs.shlhls...hhlulhhtuhshshptsh..tph.phhhsshllhhh..hhsP......psslphp-pp...............................sshstpsssVPlhhAhhtulooslstuhspshpssh.s.......................h+hhptphptphh.sP..............tltpplpcFhppChhhshth..tp.p.pshh........ss...sc.hshls..................pupsPhpshhhssspsss....................sh.TCpphas......sL+scl.hspss...........shhhhspthhsth.tsttt...shlpshl.spshphhht.u.pshsshhppshhhsshpsuhsp.....t...........husstuhtptshshtsttphstpsLPhhpshlhhhhhshhPlllhluh..h...hshuhshhthahhshha.LutWhshhhllshhsthhht..................sttshshh.shhshstsp.thshlhGhhhlslP.hhstslshuGspshuslss.hhususssuppu ...............................................................phahh.h.u.hlppsh.ul.hhhhuhhs.............hs..shhhhs...hh.l.tsh.h...uh.........s.ptsh..............h.h.th...hhsh...h...ll.hhh...hhhs.......hsslpl.st...................................sshs.ttsspVPlhhuh.hulsstl.stuhstshtphh..s.........................................hp.hp.h..ph...p....hhhh..ss...................tltpphtsahppChhhshhh....tp.t..pphh.........ss........s.hshlh.................................................................pp.psh.pth..sss......................................sh.oCpphhs.............tL+stl...tphp........................thhhhhp.thhsh...h...s.........................shhhth...h..s.pshph...........u..........tshpphhtps.hhssh.tp.uhsp.........................hssst..u.htttphsh..hshhphhhphl....Phh.shlphhhhhhhPllhhhuh.....h....phshthhthahhshha.lthW...hhlhshhhph.h.t.........................stsshsh........th.p.t..t.......ht.s...hGhh....hh...hlP.hhshh...hsh.u...u.htsh..sshhs...hhsss.tsst............................................................................................................................................................................................. 0 28 72 112 +7749 PF07918 CAP160 CAP160 repeat Fenech M anon Pfam-B_9359 (release 14.0) Repeat This region featured in this family is repeated in spinach cold acclimation protein CAP160 (Swiss:O50054) CAP160 is induced during periods of drought stress; its precise function is unknown but it has been implicated in the stabilisation of membranes, cytoskeletal elements, and ribosomes. By acting as a compatible solute, it may reduce the toxic effects of cellular solutes that accumulate at high concentration during dehydration; it may also function as an enzyme that produces such a solute [1]. Other members of this family are also induced by water stress, abscisic acid, and/or low temperature, such as desiccation-responsive protein 29B (Swiss:Q04980) and CDet11-24 protein (Swiss:O23764). 25.00 25.00 28.80 28.10 18.40 23.30 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.79 0.72 -6.76 0.72 -4.32 7 30 2009-01-15 18:05:59 2004-09-16 15:00:16 6 3 9 0 15 40 0 26.40 67 7.17 CHANGED ISsAsuslADKAsuAKNsVASKLGYGs ..ISSATSAIsDKAluAKNsVASKLGYGs. 0 2 11 12 +7750 PF07919 Gryzun DUF1683; Gryzun, putative trafficking through Golgi Fenech M, Pollington J anon Pfam-B_9179 (release 14.0) Family The proteins featured in this family are all eukaryotic, and many of them are annotated as being Gryzun. Gryzun is distantly related to, but distinct from, the Trs130 subunit of the TRAPP complex but is absent from S. cerevisiae. RNAi of human Gryzun (Swiss:Q7Z392) blocks Golgi exit. Thus the family is likely to be involved with trafficking of proteins through membranes, perhaps as part of the TRAPP complex. 26.20 26.20 26.30 27.10 26.00 26.10 hmmbuild -o /dev/null HMM SEED 554 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.70 0.70 -6.19 51 282 2012-10-04 00:47:01 2004-09-16 15:05:11 7 8 231 0 202 293 2 497.10 19 45.31 CHANGED lplch.cchhsh..............................lpsshsFpps.phpsspshphplslpSss...ssPlclsplpltFssphpshhhptsp............................................th.s.tsltLhPspsphhshthhscp.....uGchclsulphplt.......................ppthphshshpsp..stttshhhhptppsst..phhlt......................tpstsplhsp...........................sPplplph................shpsshhssEhhsls........lpIpNpcctstpsshththh.........................s...t..sttsps.h.....t..pt.tt.hphslu.lptusptshplhlp..........shpssphtLplphhYpl...............................sssspsslhpsts...................hplshhpPF.phsa.....chhs+h+spsh..................s.............hhhpppahLpsphhshu..........sLhl.ph...............................slplhstssshhs.htppt......................psstlpspp.tt.ph..................................................................................................................................................................thtph.hsscpsssh.sslpIpW+R......................sps....ts.....shssoslslPcl..l.ss.hplhsph......sspstlss.lplchplcN.sotphhshplsh................-sScs..FhFSGs+......phslp.llP.....hSc+plpasLhshps.............Ghh..........................hlP.plplhs ...............................................................................................................................................................................................................h..lt..tph.sh.......................lpsphhFpp...phpsspshphplhlpups........stslphsplplthss..t.......s.hh.hpt..........................................................sttshpl.ssp.hth..hphth.scp.....stchchsslpl.lt........................ppthphhhshp....t..ttttt..hhpttps.t..ph.hh.................................tt.hh..lhsp.....................................splplph.......................................tht.s.hhssEhhsls........lplpspc.ct..hps.hh.hhhh..........................................t..s.tspsh.............s.tp....t...hths..lupltsspphphh.lhlp............s.pssph.l...lphtY.l..................................tpp..s.h.h.psts.........................hplthh..PF...t............chh.phhsp.h............................................................hhhp.....ahLhsphhshu...............................sL.l.s..........................................plplh.st.s....shtp..tp.s.................................thhltstp.tttth...................................................................................................................................................p.shtst.ps.ssh.sphhlpW+R..............................ss.ts.........................hsposlslPcl..l.......t..hpl.sph...........ss.shl....tts.....htlphplpN..oshh.shpl.s.h................................-sS-s......FhFSG.K...................................p.hslp.llP.....hocpphhapl...hPhhs..........................Gh............................hP.plpl.......................................................................... 0 66 109 164 +7751 PF07920 DUF1684 Protein of unknown function (DUF1684) Fenech M anon Pfam-B_9328 (release 14.0) Family The sequences featured in this family are found in hypothetical archaeal and bacterial proteins of unknown function. The region in question is approximately 200 amino acids long. 25.00 25.00 26.20 25.90 24.70 19.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.98 0.71 -4.34 59 312 2009-01-15 18:05:59 2004-09-16 15:12:52 6 2 229 4 161 351 127 142.30 35 56.67 CHANGED h.psFpGLsaashDssaplpA...paps.........hsss.....................cshthtoss..GtttphhphGplpFpl....sG.pphpLpsap........thsstLFlsFsDtTsGp..poYuuGRYLphphsssp.u.........p.....lsLDFNpAYNP.CAYsstaoCPlPPtpNcLslslpAGEKp ..........ppFpGlshashDspaplpA...paps.........h.s.ss.....................cslslssss..Gt.ppph..tsGplpFph.......sG..pphpLpsht..........ttsssLalsFsDtToGp....poYuuG....RaLphshsssp.G................plslDFN+AaNPsCAaos.thsCPlPPspNpLslsVpAGEK...... 0 54 121 154 +7752 PF07921 Fibritin_C Fibritin C-terminal region Fenech M anon Pfam-B_31175 (release 14.0) Family This family features sequences bearing similarity to the C-terminal portion of the bacteriophage T4 protein fibritin (Swiss:P10104). This protein is responsible for attachment of long tail fibres to virus particle, and forms the 'whiskers' or fibres on the neck of the virion. The region seen in this family contains an N-terminal coiled-coil portion and the C-terminal globular foldon domain (residues 457-486), which is essential for fibritin trimerisation and folding [1]. This domain consists of a beta-hairpin; three such hairpins come together in a beta-propeller-like arrangement in the trimer, which is stabilised by hydrogen bonds, salt bridges and hydrophobic interactions [1]. 20.40 20.40 20.40 21.10 19.70 19.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.75 0.72 -4.07 9 39 2009-01-15 18:05:59 2004-09-16 15:16:02 7 3 37 32 1 56 4 96.60 34 16.66 CHANGED hsphspslpD.lpsclGsssSGlpusslphsstl.Gssssss.slpccGlppolKs.psslt..........................G.......hls-ssssG......phYhcccGtWsplssh ..................sthssslQs.lQs-IGNssoGlpGpllpLss.ltGsNssus.sV-c+GlpsolKs.psshs....................................G.......hls-sPpDG......phYsR+cGtWs.lss................. 0 0 1 1 +7753 PF07922 Glyco_transf_52 Glycosyltransferase family 52 Fenech M anon Pfam-B_2778 (release 14.0) Family This family features glycosyltransferases belonging to glycosyltransferase family 52 [1], which have alpha-2,3- sialyltransferase (EC:4.2.99.4) and alpha-glucosyltransferase (EC 2.4.1.-) activity. For example, beta-galactoside alpha-2,3- sialyltransferase expressed by Neisseria meningitidis (Swiss:P72097) is a member of this family and is involved in a step of lipooligosaccharide biosynthesis requiring sialic acid transfer; these lipooligosaccharides are thought to be important in the process of pathogenesis [2]. This family includes several bacterial lipooligosaccharide sialyltransferases similar to the Haemophilus ducreyi LST protein. Haemophilus ducreyi is the cause of the sexually transmitted disease chancroid and produces a lipooligosaccharide (LOS) containing a terminal sialyl N-acetyllactosamine trisaccharide [3]. 21.00 21.00 21.40 21.40 20.80 20.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.92 0.70 -5.36 16 318 2010-01-08 16:41:35 2004-09-16 15:19:39 6 3 241 4 28 224 7 254.60 24 82.76 CHANGED ppphhhlhhsssts.chshhhcplppps.pshhhppsppp.ph......hhhhhlc.phhp..tchhcplahuN.splhhphhLsth....phhsh-DGhsNhpppshhhtpp.thh..............t++lhphhlsschsl.chhp.pKh+as.hKs...pcNhh....ppsphlslhp...pplpshpsss......slLlsQ...Plhp.D............ccEp....Ichhcsllpcashp....hhhPHPR-shchs..hhhlsscllhEsY.hcllph..hpthplhThFSosslshhspss.hclshl+sph .................................................................................................................h.pph.hlhh..sps.s.K..phahp..plt..h..s.p..c..s...hh.tstphpphh.h........hhhhhK..plhp.........pphc.pla..hA.shch.hhphh.LSph..t.c....lp..TFDDGosN..l..ppsh.h..ht.pp...hht........................tp+lhp...hhhs.p.c..h.sls...chh...phspt+aola.s...hpNlh....cptphl...L.hc........t......t.lssppss.........hplLLup....P.h.p..s...................cccp.....lph.hpchlcp.hshc........hhhPHP...Rps...hchp..t.lhhl.s.sph....l..hE-hhhc.h..lcp.......hhphclashhSosthsh...shss..hcshtlps.................. 0 5 9 16 +7754 PF07923 N1221 N1221-like protein Fenech M anon Pfam-B_9309 (release 14.0) Family The sequences featured in this family are similar to a hypothetical protein product of ORF N1221 in the CPT1-SPC98 intergenic region of the yeast genome (Swiss:P53917). This encodes an acidic polypeptide with several possible transmembrane regions [1]. 21.20 21.20 22.60 22.10 19.80 19.10 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.84 0.70 -5.57 26 320 2009-01-15 18:05:59 2004-09-16 15:25:12 8 8 225 0 215 308 0 276.60 28 32.65 CHANGED pPshcFpYuDspshscElsEWFsYs..-hs.hhts...+ssF-ccap.a.............pWpctshtpc+sahtpllssLpp.s.thttR..hcsLpslhYluhGsaupss..............................ScspQlphh+pNshLLhphushtsLhplLsphhc.............pstsspssssshsths..........pp-ltllLTlhYlhlpssppp..pspsppphhlcshltsLtcP......sLhshLhchIs..+hRWssssshPl+......................pllLLlWKslLLtFGGpcpLccsKptlctthslp..........t.tpsppsshlTsSPLDYphFRp-Isu+YPsassP..pshl......Ph-h-N .............................................................................................s.ssh-FpYsDss.shttEls.EhasYs....-h...phhhs....ppsFEppap.....t................................pW....chsp..pppcs.......ahh..pllc..tLc...s.s....stptR........hcshcsllYlspGs.au-ss..............................oc.sc.hphh+hNshLLhp..hGshsslh-lLpht.h.-.............................................pstssss..s.hchss.hs.....................................hscss-LpllLolh....Y.lhlEsh+pc..........tt.st.tth.hpshhtp..Ltps................................shshhLhshls..+a.p......s.puspaPh+......................+llLLLWKslL..hsh..G....G.hc.pLpphKtthcthhuls............................................t..pu.sss...sshsss.l-.p..................................t.................................................... 1 55 96 163 +7755 PF07924 NuiA Nuclease A inhibitor-like protein Fenech M anon Pfam-B_43172 (release 14.0) Domain This family consists of protein sequences that are similar to the nuclease A inhibitor expressed by bacteria of the genus Anabaena ((NuiA, Swiss:Q44296). This sequence is organised to form an alpha-beta-alpha sandwich fold, which is similar to the PR-1-like fold. NuiA interacts with nuclease A by means of residues located at one end of the molecule, including residues making up the loop between helices III and IV and the loop between strands C and D. The mechanism of inhibition of nuclease A by NuiA is as yet incompletely understood [1]. 20.50 20.50 20.70 38.60 20.00 19.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.38 0.71 -4.25 6 25 2009-01-15 18:05:59 2004-09-16 15:30:56 6 1 21 3 12 30 0 128.40 35 95.48 CHANGED TpsspshLcpLcpAocGLLFMSEoDaPFpsFhW.sshhsloPEpVLptsscs.DssVpslslDsFFusATT.p-WasscEpsTlppFQpLl-TLKssLp-lpVYRlG-lplDVYIlGcsssGsLAGLpT+VVET .....................pphhcpLcpAocGLLahSES-YPFElh..hW..tsttslo.spplhphsGhs.sssVcplslDpFFpsssspp-Was-tppsssp+FQpLlpsLKssL..schpVYRl.GplplDVYIlGcsssGslAGlpTKVlET........ 0 4 9 12 +7756 PF07925 RdRP_5 Reovirus RNA-dependent RNA polymerase lambda 3 Fenech M anon Pfam-B_9372 (release 14.0) Family The sequences in this family are similar to the reoviral minor core protein lambda 3 (Swiss:P17378), which functions as a RNA-dependent RNA polymerase within the protein capsid.\ It is organised into 3 domains. N- and C-terminal domains create a 'cage' that encloses a conserved central catalytic domain within a hollow centre; this catalytic domain is arranged to form 'fingers', 'palm' and 'thumb' subdomains. Unlike other RNA polymerases, like HIV reverse transcriptase and T7 RNA polymerase, lambda 3 protein binds template and substrate with only localised rearrangements, and catalytic activity can occur with little structural change. However, the structure of the catalytic complex is similar to that of other polymerase catalytic complexes with known structure [1]. 17.30 17.30 17.40 17.30 15.50 15.40 hmmbuild -o /dev/null HMM SEED 1271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.26 0.70 -14.17 0.70 -7.18 3 118 2009-09-10 15:58:44 2004-09-16 15:38:12 6 1 33 6 1 75 0 466.00 63 99.55 CHANGED LFNALPspLQpLSLALSGppPLTD+IFEsAAcAWHVpPRSpsYKLLDHIPFSosVV..IPPSIYsuhsWScYaAlssspVlRVsThsuPDDVYVPNSsIuPLLoPL+TIP-YGpLHPAIENDAo-hGsspARhASTFaKIASSQARQVKlDPpRFLtFLLVopAuPRVPSGVloDQPshhDPopSPALaAIWQIMQ+YKlsGsYYAPALVVsoGAlWWIPPPG+RNsVoVQaLLTDLINLAILAasTcLSPoLEhsGVplYL+AASSsSYAasLLchKSIFPsLSLHSMYRstEFGGKCPsIEWTEPRScYKF+WhGVTQLa-GLRPpoPSpDsKALElhcKYGLsDVu+lII+cR+sHPRHsaDSVRFVRDVMALTSGMYLVRsPTMSVLREYSQoP-IK-PIPPcsWTGPVGNlRYLpDospGPARHLYcTWhtAARQlAtDPpTHDPLsQAIMRoQYVTARGGSSAALKpALtsosVsLPDFcGosVK+SSKIYQAAQLA+luFppLIsAIhA-VTMGIRNQVQRRARSIMPLNVPQQsVSAPHTLVANYINKHMNLSTTSGSAVpDKVIPLlLYASTPPNTVINVDIKACDASITYsaFLSVICGAMHEGF-lGssutPFMGVPSSIVsDRRssuAPYuRPISGLQTMlQHLA+LYpAGFSY+VsDuFSSGNcFoFPTTTFPSGSTATSTEHTANNSTMMEaFLsVauPpHsKSuoLKRIlKDMTIQRNYVCQGDDGlLIIPcpuAuKISuEDlsELLELLcKYGctFGWsYDIDaSDTAEYLKLYALFGCRIPNlSRHPPVGKEYAuPpT-EIWPSLlDIlMGpFaNGVTDsLpWR-WLRFSWAFACauSRGua+shpGpSVu.AQYPhWSFVYhGLPPILLPGQTPFIaSsYMPsGDQGMFSILstWRDaLTu+AooshPPL+RsHPVWGLADVPSLLS-lGVYpGYaAAQlsRRPcPSP-sAss-SlEQhTuALS-YLhpDPsLKuRVlRGRssWERLosoauuslsSRVPSLFDVPsKWltAGRDA-KPsPSuVA-MhcoLpRAtR+Sp+SFSRLLELYL+VHV+LGEulPLAVDP-VPpVAGADPLNDDHWaKhTSLGPIsQSTRKYFupTLFVGKTVSGLDVEAVDATLLRL+ILGA-PEAapAhLsGIGMSDSEAHpIAG+ISLADAQlVQLARVVNLAVPSSWMSLDFDohI+HHuYshpPGIocsSTclR-RuuWlsuILRLLCAohAMTssGPVssuhVo-IDGGusALuGsLRsWMRDV ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................GltuscPISGhQsMhQaLuKLYpRGFpYpVsDsFSPGN.FTHhTTTFPSGSTATSTEHTANNSTMMEsFLTVWGPEHTDDPDVLRLMKSLTIQRNYVCQGDDGLMIIDGNTAGKVsSETI.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 1 0 1 1 +7757 PF07926 TPR_MLP1_2 TPR/MLP1/MLP2-like protein Fenech M anon Pfam-B_9285 (release 14.0) Family The sequences featured in this family are similar to a region of human TPR protein (Swiss:P12270) and to yeast myosin-like proteins 1 (MLP1, Swiss:Q02455) and 2 (MLP2, Swiss:P40457). These proteins share a number of features; for example, they all have coiled-coil regions and all three are associated with nuclear pores [1,2,3]. TPR is thought to be a component of nuclear pore complex- attached intra-nuclear filaments [1], and is implicated in nuclear protein import [2]. Moreover, its N-terminal region is involved in the activation of oncogenic kinases, possibly by mediating the dimerisation of kinase domains or by targeting these kinases to the nuclear pore complex [2]. MLP1 and MLP2 are involved in the process of telomere length regulation, where they are thought to interact with proteins such as Tel1p and modulate their activity [4]. 30.00 30.00 30.00 30.10 29.90 29.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.87 0.71 -4.41 29 251 2009-09-11 18:57:06 2004-09-16 15:44:10 7 8 210 0 170 250 3 131.80 28 6.62 CHANGED hppphsplps-lpphppptpphpp...phpphppDlcpQschsppAQpp....YEpElhpHucs.sppLppl+pphpphppphspLcppscpspspL.................................................pppcpsWppp+ptlcpElsphppRh--LppQNpLLHsQlEsls .................h.ppphsplpsElpchhpctpptts...ptpptppDlppQuclApcAQpc....YEpELhhHAps.sctLpth+pp.......hsp.......hp....pphppLcppspsAcspL.........................................................................................tppcto...WpEpcptLccElschppRp--Lp..pQNpLLHsQlEsl.................... 0 52 89 138 +7758 PF07927 YcfA YcfA-like protein Fenech M anon Pfam-B_2914 (release 14.0) Family The viral, archaeal and bacterial proteins making up this family are similar to the YcfA protein expressed by E. coli (Swiss:Q9F561). Most of these proteins are hypothetical proteins of unknown function. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.97 0.72 -4.05 255 2133 2012-10-01 21:18:35 2004-09-16 15:49:37 7 4 1365 1 556 1700 149 54.70 24 71.59 CHANGED hc-lh+hLp.ph.G........a..thh..ct.p.GSHhtap+ssppt..........hsls...H..sppc.lthstl+...pl.l+.ps ..................p-lh+hLp.pt.G........ahhh...ct.p.GSH.h..pappssppt..............hslP....H..stpc.lptttl+......pIh+........................ 0 167 380 484 +7759 PF07928 Vps54 Vps54-like protein Fenech M, Finn RD anon Pfam-B_9294 (release 14.0) Family This family contains various proteins that are homologs of the yeast Vps54 protein, such as the rat homolog (Swiss:Q9JMK8), the human homolog (Swiss:Q86YF7), and the mouse homolog (Swiss:Q8R3X1). In yeast, Vps54 associates with Vps52 and Vps53 proteins to form a trimolecular complex that is involved in protein transport between Golgi, endosomal, and vacuolar compartments [1]. All Vps54 homologs contain a coiled coil region (not found in the region featured in this family) and multiple dileucine motifs [1]. 20.90 20.90 20.90 22.10 20.00 20.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.64 0.71 -4.03 17 311 2009-01-15 18:05:59 2004-09-16 15:52:26 7 10 251 4 226 328 4 128.50 38 12.82 CHANGED cspsahlssssLhllphlp-YhphhsphPslss-hhspll-h..........................L+hFNSRssQLlLGAGAhcsuG.LKsITo+pLALuSQsLphllthlPhl+thhpth.......t..hhtpac+lppDappHpsEItsKLluIhs-Rh .................................................t.tppahlss...oslhllchlt-Yhphh.s..slPs...h...ss-hhsplh-h..........................L.+hFNSRssQLlLGAGAhp...s.u.G.LKsIToKpL...............A.LuSpsLphlht.h....IPh.................lRphhctp....................s.p.p...........s..h..........h.tcFD+..lp+sYp-H.spIpsKLlsIMssh.h................. 0 79 126 185 +7760 PF07929 PRiA4_ORF3 Plasmid pRiA4b ORF-3-like protein Fenech M anon Pfam-B_4929 (release 14.0) Family Members of this family are similar to the protein product of ORF-3 (Swiss:Q44206) found on plasmid pRiA4 in the bacterium Agrobacterium rhizogenes. This plasmid is responsible for tumourigenesis at wound sites of plants infected by this bacterium, but the ORF-3 product does not seem to be involved in the pathogenetic process [1]. Other proteins found in this family are annotated as being putative TnpR resolvases (Swiss:Q9LCU7, Swiss:Q50439), but no further evidence was found to back this. Moreover, another member of this family is described as a probable lexA repressor (Swiss:Q7UEI4) and in fact carries a LexA DNA binding domain (Pfam:PF01726), but no references were found to expand on this. 26.60 26.60 26.80 26.60 26.40 26.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.08 0.71 -4.87 18 780 2009-01-15 18:05:59 2004-09-17 17:08:24 6 17 653 2 251 715 156 155.20 21 60.93 CHANGED ssplapL+lsLpsspPsIWRRltVPtsloLspLHpVlQssMGWpssHLHcFphs.sppYsh...........s....................s.s.ss.s..hspsphpLspllt..ttscshpYhYDFGDsWpHpIplE+hlsttsssphPh.ClsGptA.sPPEDsGGhsGYpchL-hlscPc+........cap-hhcWhGc.........F-sptFshccls ..................................................h.hpl.l...........t.s.l..aRc.l..lssshshtpLH.t.slptuh.s.a......p......s......s....Hhap.Fhhs......stpast....s.......................................................................................h...s....s......s.t.s........h.t..pps...pLsplh...........pttp...c..............hhYhYDFGDpWpaplp..................lp..c..........h........t.........p............t.........s.......t.......hsh..h..lput..s.......s...sP.p..s..s....................................................................................t............................................................ 0 89 173 216 +7761 PF07930 DAP_B D-aminopeptidase, domain B Fenech M anon Pfam-B_29283 (release 14.0) Domain D-aminopeptidase (Swiss:Q9ZBA9) is a dimeric enzyme with each monomer being composed of three domains. Domain B is organised to form a beta barrel made up of eight antiparallel beta strands. It is connected to domain A, the catalytic domain, by an eight-residue sequence, and also interacts with both domains A and C via non-covalent bonds. Domain B probably functions in maintaining domain C in a good position to interact with domain A [1]. 21.40 21.40 21.40 23.20 21.30 20.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.90 0.72 -4.25 2 66 2012-10-02 21:13:33 2004-09-17 17:09:08 7 2 65 1 6 26 0 87.40 75 17.05 CHANGED sucssRspAssuWFGSWLssETGLVLSLEDAGtGRMKARFGTuPEhMDlsutNEApSuhTTlRRDG-hIcLsRtsENL+LuM+RlKGE ........PAKPVRAQANPGWFGSWLNPETGLVLSLEDAGGGRMKARFGTGPEIMDISGENEAQSSMTTLRRDGDMIHLARKDENLHLAMHRLKGE........ 0 1 3 6 +7762 PF07931 CPT Chloramphenicol phosphotransferase-like protein Fenech M anon Pfam-B_29509 (release 14.0) Domain The members of this family are all similar to chloramphenicol 3-O phosphotransferase (CPT, Swiss:Q56148) expressed by Streptomyces venezuelae. Chloramphenicol (Cm) is a metabolite produced by this bacterium that can inhibit ribosomal peptidyl transferase activity and therefore protein production. By transferring a phosphate group to the C-3 hydroxyl group of Cm, CPT inactivates this potentially lethal metabolite [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.08 0.71 -4.64 7 191 2012-10-05 12:31:09 2004-09-17 17:09:32 7 3 165 6 65 615 54 167.20 29 83.68 CHANGED uclIlLNGsSSuGKSolucslQslhssPWhphusDsah-t.h.schps...............sssGlcats.h...........tP.hchhhtuhhculsAhAcsGsslIlD....-lhhstch.h.-th.clLtshcVhhVGV+Csht.lhctREhtRGD.....RhsG.............huthQhchVHpss.YDlElDTotpsshECAttItpphp .......................................................................plIlLNGsSSuGKoolA.psl.Q.s...h.h.s...p....s..a....h..p..l.ul...D...h...ahtt...h.s...s.pt.h.p...................ts..tGlp...h..hs......................ssh.hphh.htu..h..h.........cu..l..tu..h..hcsGhs..V..lsD......cV...hh..s..t....th....h.-th..c...hh..p.....u..h..p..V..h....hV..G...V...+..s.....s..-...ht..c..R..E.h..t...R...G..D...............R..h..sG...............h..st..t.p..t..c.h...s....H......t..s.s.....t..YD......h......c.l.DTot.....hssp-sAcpltpt.................................................................................................................. 0 15 47 56 +7763 PF07932 DAP_C D-aminopeptidase, domain C Fenech M anon Pfam-B_29283 (release 14.0) Domain D-aminopeptidase (Swiss:Q9ZBA9) is a dimeric enzyme with each monomer being composed of three domains. Domain C is organised to form a beta barrel made up of eight antiparallel beta strands. It is connected to domain B by a short linker sequence, and interacts extensively with the domain A, the catalytic domain. The gamma loop of domain C forms part of the wall of the catalytic pocket; domain C is in fact thought to confer substrate and inhibitor specificity to the enzyme [1]. 21.10 21.10 21.70 102.50 19.30 18.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.17 0.72 -3.86 5 78 2009-09-11 05:52:52 2004-09-17 17:10:48 7 3 77 1 15 36 1 96.80 72 18.63 CHANGED c+sDIAGRY+ucELEA-hplsucGGulYGuFEGaLGpGPhEhM+slGcDVWhLuspRuMDAPAPG-WTLVFpRc-sGcVTGVTlGCWLAR+VsYsKs ...A.RQDIAGRYRSDELE.ADLLLVS...EGGAIYGAFEGFLGKSDMYPLYAAGPDVWLLPVQRSMDAPSPGEWKLVFHRDAAGRITGVTVGCWLARGVEYKR.L 0 2 8 13 +7764 PF07933 DUF1681 Protein of unknown function (DUF1681) Fenech M anon Pfam-B_4989 (release 14.0) Family This family is composed of sequences derived from a number of hypothetical eukaryotic proteins of unknown function. 28.90 28.90 29.10 29.00 28.40 28.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.14 0.71 -4.67 23 357 2009-01-15 18:05:59 2004-09-17 17:14:00 9 8 219 1 230 343 4 159.60 47 61.76 CHANGED saEplLhl.pscValY+IP..PpsSs+.GY+AuDW.sh-........p.hWTGRlRll.........................................scucps..pIpLEDssoG-LFA..........................psPhsphs..suVEsVhDSSRYFVlRlpss.sG....+pAalGlGFpDRu-AFDFNVALQDahKh.............scppsphspppp.................sppP..chDhuLKEGpTIpINlus .....................hEplLhl.ps-VaVY.+IP..PR.so.s+.GY..RAu-W.pls..........p.hWoGRLRls.......................................................u..+..Gc....t..s....hI...+.........LED.p.....so........G...ELFA..........................p..s...Pl...cph......shuVEsVsDSSRYFVlRlpDs..s....G....R.+AFIGlGFs-Ru.DAFDFNVALQDHhKa...............................................lcpppph.tcptp......................................t.pttPplDhuhKEGpTIplsls.s......................................... 0 83 126 176 +7765 PF07934 OGG_N 8-oxoguanine DNA glycosylase, N-terminal domain Fenech M anon Pfam-B_29151 (release 14.0) Family The presence of 8-oxoguanine residues in DNA can give rise to G-C to T-A transversion mutations. This enzyme is found in archaeal, bacterial and eukaryotic species, and is specifically responsible for the process which leads to the removal of 8-oxoguanine residues. It has DNA glycosylase activity (EC:3.2.2.23) and DNA lyase activity (EC:4.2.99.18) [1]. The region featured in this family is the N-terminal domain, which is organised into a single copy of a TBP-like fold. The domain contributes residues to the 8-oxoguanine binding pocket [2]. 20.10 20.10 20.10 20.30 19.50 18.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.67 0.71 -4.13 95 767 2012-10-02 11:58:57 2004-09-17 17:15:28 7 13 710 31 324 723 56 113.40 26 34.56 CHANGED tl.hsttp..hsLptsL..psGQsF+..Wc.......cp.pss.........ahsll........t.s.....cllpl+.p....ps.....................splhhphht.............................................................pcshpphlpcYFsLch..s................Lssla.pph.....spt........Ds..........hhp.........................pthpph..pGlRl.L+QDPaEsL .............................................................tp....hplp.sh....tsGQsFR...Wp........................pt...pst...................t....ahslh....................t.s....................pllp.lp..p....ss...............................................sp..lhhpsht.......................................................................................................tpshtphlpcYFsLsh..c.......................Lsplh.pph..........spt........Ds................hhp......................................ps..h..p..th....tGlRl.LpQDsaEsL........................................................................................................ 1 121 203 278 +7766 PF07935 SSV1_ORF_D-335 ORF D-335-like protein Fenech M anon Pfam-B_4933 (release 14.0) Family The sequences featured in this family are similar to a probable integrase (Swiss:P20214) expressed by the SSV1 virus of the archaebacterium Sulfolobus shibatae. This protein may be necessary for the integration of the virus into the host genome by a process of site-specific recombination [1]. 21.60 21.60 22.50 21.70 21.20 18.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.71 0.72 -4.24 13 65 2009-01-15 18:05:59 2004-09-17 17:16:16 6 2 27 0 16 65 0 67.10 42 47.79 CHANGED +tY...paGDhhIRERKG+YYVYKLEp..NGcVKEpYVGPLsDVVEoYlKlK...GGltssP.shshs......GhEPGooss .......phaphuchhlREhKG+YYVYhlEp.psGch+-pYVGPLscVVEpYlchh.....uGl..ss..sP....ps.s........shpPss...h...................................... 0 7 7 16 +7767 PF07936 Defensin_4 BDS_I_II; Potassium-channel blocking toxin Fenech M anon Pfam-B_56105 (release 14.0) Domain This family features the antihypertensive and antiviral proteins BDS-I (Swiss:P11494) and BDS-II (Swiss:P59084) expressed by Anemonia sulcata. BDS-I is organised into a triple-stranded antiparallel beta-sheet, with an additional small antiparallel beta-sheet at the N-terminus [1]. Both peptides are known to specifically block the Kv3.4 potassium channel, and thus bring about a decrease in blood pressure [2]. Moreover, they inhibit the cytopathic effects of mouse hepatitis virus strain MHV-A59 on mouse liver cells, by an unknown mechanism [1]. 20.60 20.60 23.10 43.00 18.10 16.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -7.95 0.72 -4.28 10 17 2012-10-01 20:50:19 2004-09-20 09:42:44 7 1 6 4 0 17 0 34.20 50 68.19 CHANGED ssCpCuuKp..GsYWhs.husCPuG+GYTGsCtYhlG ssChCGsph..GlYWas.hssCPuGaGYTGpCsaahG 0 0 0 0 +7768 PF07937 DUF1686 Protein of unknown function (DUF1686) Fenech M anon Pfam-B_5313 (release 14.0) Family The members of this family are all hypothetical proteins of unknown function expressed by the eukaryotic parasite Encephalitozoon cuniculi GB-M1. The region in question is approximately 250 amino acids long. 25.00 25.00 37.70 36.90 21.30 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.44 0.71 -4.66 8 27 2009-09-11 08:48:26 2004-09-20 09:49:39 6 2 2 0 23 27 0 149.30 37 33.28 CHANGED osstluWNTuLYSLAVAGsIsYQlWlLApp...hRccGsusMlRpsWoAlGCMsPMhsuL......hHuGlIRSusYSlTVAG.......luAVhlYVQsuspc..GMsh+QhCslsuGNllLuuAChuGssKh.hsuststsphlssGlVsFuulLLLVshsccutsGcKspu..pG..ltshlhVhohhVsshsS........hVCGRD ......tsthsWNTulYSlAVsGsIhaQhWlLstt....h.ptGh.thhpptWsslushsPMhsul.......psGhhpSshYuhshsG.......huhshhhsQshhpp..GMshpphCshssGN..llLushChuts.t.....s.thshhlshGhlVhsulLlll.hh..tttGccs.s......pG....h.phlhlhohhVsshsS........hVsuRs.......... 0 23 23 23 +7769 PF07938 Fungal_lectin Fungal fucose-specific lectin Fenech M anon Pfam-B_48600 (release 14.0) Domain Lectins are involved in many recognition events at the molecular or cellular level. These fungal lectins, such as Aleuria aurantia lectin (AAL, Swiss:P18891), specifically recognise fucosylated glycans. AAL is a dimeric protein, with each monomer being organised into a six-bladed beta-propeller fold and a small antiparallel two-stranded beta-sheet. The beta-propeller fold is important in fucose recognition; five binding pockets are found between the propeller blades. The small beta-sheet, on the other hand, is involved in the dimerisation process [1]. 20.80 20.80 21.20 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.04 0.70 -5.14 3 66 2009-01-15 18:05:59 2004-09-20 09:51:25 7 5 46 25 47 63 1 208.30 21 64.82 CHANGED AQEVLFRTGIAAVN....SsN+LRVYFQDlaGSIRESLY..EuuWANGTupNVIGKAKLGSPLAAT...SKELpNIRVYslscDNlLoEssYDG.sSGW.lsGuLuusslsVuPsS+luulahuGoso..PplRIYtQKSssusuTIpEYhWsGssWcsGoshGVolPGTGIGsTsaRYTDYsGPS.IRIWFQTDDLKLVQRAYDP+oGWY.pLsTIFDKAPPRsAIAATSFssGpSSIYMRIYFVNSDNTIWQV....CWDHGpGYHDsRTITPVIQGSEIAIISWGsh..pGPDL+LYFQNGTYVSAVS..EWsWspuHGSQLG+pALPPA .....................................................s.............................p..........t.h.............t...........h.....................u...t.............oslAsh...............t..........la.h.t...tt.....htphh...t....ttW...Gt...h.t.....th...s..Stlssh...s..............hlhh.....s.......h...t........................t.....W......t.............h......s.......sss.huh..........sh..........p..s..s......p...lRlaa.pss..shpl..hptsa..-...sp.p...W......s.s.h..h.s.p.s.......s.t.ssluAs..sa....t..ss..hclahhstt..s.hhph.........hhpt.t.......................................................................................................................................... 0 10 29 40 +7770 PF07939 DUF1685 Protein of unknown function (DUF1685) Fenech M anon Pfam-B_5502 (release 14.0) Family The members of this family are hypothetical eukaryotic proteins of unknown function. The region in question is approximately 100 amino acid residues long. 20.30 20.30 20.80 20.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.25 0.72 -4.33 14 160 2009-01-15 18:05:59 2004-09-20 09:54:41 6 4 21 0 97 154 0 61.50 41 29.40 CHANGED LTD-DL-ELKGCh-LGFGF..-E..t.s........scLCsTLPAL-LYaulspph.st.....ptSsssssssssuo ......lTD-DL-ELKGClDLGFGF...sc..p..s.............scLssTLPAL-Lhauhspph.s......................s.s................................... 0 14 60 76 +7771 PF07940 Hepar_II_III Heparinase II/III-like protein Fenech M anon Pfam-B_5577 (release 14.0) Family This family features sequences that are similar to a region of the Flavobacterium heparinum proteins heparinase II (Swiss:Q46080) and heparinase III (Swiss:Q59289). The former is known to degrade heparin and heparan sulphate, whereas the latter predominantly degrades heparan sulphate. Both are secreted into the periplasmic space upon induction with heparin [1]. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.90 0.71 -4.34 158 1644 2009-01-15 18:05:59 2004-09-20 10:00:19 8 23 1018 12 395 1466 1366 143.20 18 21.37 CHANGED psGathlps......sps.hllhc...sus.ss........ssHuHuDsLuF-lts.suptlllssGss...........tt............sch+ph.hR.uTsuHsTlsls.spsps..phht...................thhh.....................ththpt.........t.....shhhsst+suYht..........................hshtHpRplh.ls.........sptlhs.D.................pl ...........................sGhh.hhcs..........psh.hl.h.h..c...sGsh.s.......................ssHuHsDphuFp..lhh.pGp..lhhDsGths...........Ytp..................tphpth..h+..sotuHNTlh...ls....sps.pp..thst.......................shth...................................t.......................ttth.......thhh.s...s..hps.u.Ys.............................s.h.h.a.pRplh.ls......tphhhlhDp.............................................................................................................. 0 167 309 348 +7772 PF07941 K_channel_TID Potassium channel Kv1.4 tandem inactivation domain Fenech M anon Pfam-B_7603 (release 14.0) Family This family features the tandem inactivation domain found at the N-terminus of the Kv1.4 potassium channel. It is composed of two subdomains. Inactivation domain 1 (ID1, residues 1-38) consists of a flexible N-terminus anchored at a 5-turn helix, and is thought to work by occluding the ion pathway, as is the case with a classical ball domain. Inactivation domain 2 (ID2, residues 40-50) is a 2.5 turn helix with a high proportion of hydrophobic residues that probably serves to attach ID1 to the cytoplasmic face of the channel. In this way, it can promote rapid access of ID1 to the receptor site in the open channel. ID1 and ID2 function together to being about fast inactivation of the Kv1.4 channel, which is important for the channel's role in short-term plasticity [1]. 25.00 25.00 38.40 36.70 21.90 20.00 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.91 0.72 -3.75 2 44 2009-01-15 18:05:59 2004-09-20 10:06:49 6 2 32 2 20 45 0 72.40 77 11.62 CHANGED MEVAMVSA-SSGCssHhPYGYA.QARARERER.AHSRAAAAAAsu.......GpGGs.GGGsthpt....ppts.ppp- MEVAMVSAESSGCNSHMPYGYAAQARARERER....L....A.....HSRAAAAAAVAAATA.AVEGuGGu..GGG..u..HHHH...QoRGAsoSH-............................... 0 1 1 8 +7773 PF07942 N2227 N2227-like protein Fenech M anon Pfam-B_5433 (release 14.0) Family This family features sequences that are similar to a region of hypothetical yeast gene product N2227 (Swiss:P53934). This is thought to be expressed during meiosis and may be involved in the defence response to stressful conditions [1]. 20.10 20.10 20.10 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.79 0.70 -5.44 12 457 2012-10-10 17:06:42 2004-09-20 10:09:33 7 12 292 0 334 464 12 246.30 33 63.30 CHANGED ss..hhsshhchsKlpssl+plsRDWSsEuptER-tsapPllppLsphhP.t.h-pppsplLVPGuGLGRLsa-luph..GatspGNEFSYaMLlsSpFlLNhspppspaplYPalHsaSNphspsDQLRslphPDhpshs.hs.......s.sshShsAGDFl-lYsps.......................psaDsVlTs..FFIDTAcNll-YlcTItplLKsGG..lWlNlGPLLYHFp................sh...ssphS.......lELoh--lhtlhpp.hGFpl.pccp.I.....cssYs..sN.cSMhpshYtssaass++ss ................................................tt.....s...chsKlpssl+phsRDWSs..-G..t.t...ERcss.apPllpp..........lp....phh..s............................t.....p............t................p...ph..p.....lLVPGuGLGRLsaEluth..........Ga.......t.s.p......GN.................EhSaaM..LlsSpal.L...N..t.........s.......p.............t................p.....papla..Pal.p.p.hS.Nphpp.psp.l+sl.thPDlpP.ts......hs.............................sshShsAGDFhp...l.Y.sp.p...................................................................ttaD...s..lsTs..FFIDT.Ap.NllcYlcsIhph...L..+.....s..........GG...........hWlNl..GPLLaHat................................................................s..........s.........................lELo.h--lhtlhpp.hGFph....t........pp...t...h......................ssYh....ts.pShhp....hYps.hassp+.t.............................................................................................................. 0 128 204 285 +7774 PF07943 PBP5_C Penicillin-binding protein 5, C-terminal domain Fenech M anon Pfam-B_1086 (release 14.0) Domain Penicillin-binding protein 5 expressed by E. coli (Swiss:P04287) functions as a D-alanyl-D-alanine carboxypeptidase. It is composed of two domains that are oriented at approximately right angles to each other. The N-terminal domain (Pfam:PF00768) is the catalytic domain. The C-terminal domain featured in this family is organised into a sandwich of two anti-parallel beta-sheets, and has a relatively hydrophobic surface as compared to the N-terminal domain. Its precise function is unknown; it may mediate interactions with other cell wall-synthesising enzymes, thus allowing the protein to be recruited to areas of active cell wall synthesis. It may also function as a linker domain that positions the active site in the catalytic domain closer to the peptidoglycan layer, to allow it to interact with cell wall peptides [1]. 21.20 21.20 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.85 0.72 -3.97 74 4610 2009-01-15 18:05:59 2004-09-20 10:14:53 8 6 2602 30 731 2984 1285 93.00 27 23.36 CHANGED F-shplhppsp.l...tphpVhtGpcc.....pltlsstcDl...hlsl.s.....+sptcplpsphph..ppslp......APlp+GphlGpl......ph.hhsschltp............h.sLlstpsVcc ..............................................Fcshphhptupth....sstpVh.h.G.cpc........plsl.s.s...pcsh...hlsl.P.......+..u..p..h...p.p...l......+.s.p.hsl.....pppLp...................APlp+GpsVGpl........ph..phc.s..c..p..ltp....................h.sLlshpsVtc......................... 1 223 436 575 +7775 PF07944 DUF1680 Putative glycosyl hydrolase of unknown function (DUF1680) Fenech M anon Pfam-B_4918 (release 14.0) Family The members of this family are sequences derived from hypothetical bacterial and eukaryotic proteins of unknown function. One members of this family is annotated as a possible arabinosidase, but no references were found to back this. These proteins are related to a large family of glycosyl hydrolases. 24.90 24.90 24.90 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 520 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.74 0.70 -6.23 20 1388 2012-10-03 02:33:51 2004-09-20 10:18:10 7 35 764 0 436 1283 178 495.00 25 73.60 CHANGED sV+lps.uhatpptcsspphll.....ltsD+........lhcshh............hpuhlsstt........shGsaE.........................h.uhshpssslG.....+aLpulAhhhAspsDspLcp+hcplls.tlucsQp.......sDGYLsuhhptt...........................shscsWss............hYsht+lhtGhlshYphoGppphLclsp+hADah....hsshus-phpchhhs...pHsthp..ulscLY..............phTG-p+YLcLA+hFhcpcshsP...................................hshspDclsshctsTtlu+sV.......GhAclaphoGDsshhp.......sucphWpslss++hYlsGGsusp....................EtFssshclsspssh...sETCuolshlhhsc+hhphs........s-.upYsDhhE+sLYNslLuuhs.Du..stahYhsPL...tsssp+.p..........hppcapuhhCCsssshcshsphucalYsps......cc..slaVNLahsSpschchpu...lpLcQcTs..aPapuplplTl.........psstssphsLtLRlPuW.................uuu...splplNGcs........sspttssuYlplsRcWpcGDplpLpLPMslchhtu....p-ssspVAlhpGPlVhs .................................................................................................................................................................................................................lpl.t.s.shht.p.h.phshphhl......h..cp............h.ts...........................ps.......l...s.h..t..........s.h.s.....p.....................................s.h.h.p.spshs.......+alcuhu.h.h.s..t.....p.....s.........D.........s.............p.......L....c....p....p....h....c....phlp....hl.ttsQp..............sDG..Ylsshhphp............................................................s..p.ppWss..........................hYsht+hhp......u..h....lsha...p......h..T....G......p....p....p...........h....Ls....lsp+h.AD....al..............................ss.......h....us.t.p.....h....p.....h....h..h....hs................t+.s...t..........hp......u.....lhc....LY.....................ph..T...s..c.......t.......+.......Y.LpLA....p....hFhc.pp.sh.ps...........................................................................................................................................hthpps.c.lsh..h...c.t.p.....s...t.lu..+sV.......................Gh...uc...lh..ph.....o...s........D.....p..t.....hhp................sspphWp.s.hsp.+.chY.lTGGhuup.............................................EtF...s..s..s......h.p....l.s.sp..hsh..............sE.o.Cu..slshhh.hs...........cchhphp........s..-...upYADhhE+............sLYNsllu.u....h.............s..............-s..............t...ta.......hY..h...sPL...........pstspp....................................hppta...h.u.....h.s.CCss..Nhscshsph.s.p.h..l..Ys...p.................................ss......sla..lsl.Y.h.s.sp..hp....h.ph.......t......s..........l..p..lppp.os............YP...a...p...t.....p.......l.plsl............................p.s.s.p.s.s.p..a.sLtlRlPsW.....................spt....sp...lp.l.......N...Gpt.....................htts.hts.u..Ylpl.p.R.p..W.p.pG.D.p..l.plp.lP.M.t...l+hh..hs.......ps.tsplAlt+GPlVhs................................................................. 0 156 320 388 +7776 PF07945 Toxin_16 Janus-atracotoxin Fenech M anon Pfam-B_50381 (release 14.0) Domain This family includes three peptides secreted by the spider Hadronyche versuta (Swiss:P82226, Swiss:P82227, Swiss:P82228). These are insect-selective, excitatory neurotoxins that may function by antagonising muscle acetylcholine receptors, or acetylcholine receptor subtypes present in other invertebrate neurons [1]. Janus atracotoxin-Hv1c (J-ACTX-Hv1c, Swiss:P82228) is organised into a disulphide-rich globular core (residues 3-19) and a beta-hairpin (residues 20-34). There are 4 disulphide bridges, one of which is a vicinal disulphide bridge; this is known to be unimportant in the maintenance of structure but critical for insecticidal activity [1]. 25.00 25.00 84.30 84.20 19.90 16.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.25 0.72 -4.28 2 4 2012-10-01 22:06:18 2004-09-20 10:20:42 6 1 2 1 0 4 0 35.80 87 97.95 CHANGED sICTGADRPCAACCPCCPGTSCpu.EsNGVSYCRpD ..sICTGADRPCAACCPCCPGTSCpGPEsNGVSYCRND. 0 0 0 0 +7777 PF07946 DUF1682 Protein of unknown function (DUF1682) Fenech M anon Pfam-B_4955 (release 14.0) Family The members of this family are all hypothetical eukaryotic proteins of unknown function. One member (Swiss:Q920S6) is described as being an adipocyte-specific protein, but no evidence of this was found. 23.80 23.80 23.80 25.00 21.50 23.70 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.96 0.70 -5.58 39 327 2009-01-15 18:05:59 2004-09-20 10:21:26 9 6 273 0 236 331 0 316.70 27 76.34 CHANGED hpsahhEhh.slshlllhlhhahh..GpppNcphAppa...hpsppshhppp..FuhlGhstst...............................hlhccu.spashYATGRpssstlhlslcLhpRpshhshl.hphlhsh............pDphplpl.............................spsshDs............hlaAlVpKpshpph+c-ph.-L................Sh......spop-ptpLPpphslhoEusElsstlls.sp...........lhpsls.ps.sch...lcaltlTDQshtps.p.p.............cts...................................c+plhlshslPpsss.....ht..phttLlshhlphhDpls........shclps-shcKscpsRppthcchhKttcpt+tEt..htpc+tctK+pc+cphhsphosEcQ+Kh-.........cKcpc+pt++ .........................psahh.Ehh.hlshllhhhh.ahh..GpppNpphAptW...................hps..p....ps...lLppp..FuhVG.ssstp.........................................pshLhccstp.ash..asoGRpsspshhlplc..........hhpRpsh.lshl.hchhhsh............pDplplpl............................t.hspps.hDs..............aVaA..lspKcshpphp..c-...hh.DL.....................Sh.............spstsp.....t...LPsp....hslhSE..sElss.s..hls..sc............hhpslp..ph...uch..............lchlthoDQ.ss......p.....psp............................................s++plhhs.hs..lPssss..................hp.shhsLhphhhh.hhDplt.............ph+hs..t-s..hpKs..cpsRpch.cphhKhpctt+tEt..t.tpc+ccp++tc+-chhpp.ss-cQ++h-.........c+cpc+p.++......... 0 83 130 194 +7778 PF07947 YhhN YhhN-like protein Fenech M anon Pfam-B_5325 (release 14.0) Family The members of this family are similar to the hypothetical protein yhhN expressed by E. coli (Swiss:P37616). Many of the members of this family are annotated as being possible transmembrane proteins, and in fact they all have a high proportion of hydrophobic residues. 25.00 25.00 25.60 25.00 24.70 24.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.43 0.71 -4.92 119 1531 2009-01-15 18:05:59 2004-09-20 10:23:20 9 5 1341 0 371 954 58 182.60 31 82.28 CHANGED h.hphlhKshshlhLhhhsht......................sshshhl.hsuLhhShlGDhhLh...................pphFlhGlsuFhlAHlsYlhs.Fh.............thhtshphhhshshhshus...................shhhh...lhspl..............st..hphsVssYshllssMsht.Aht....................hhtshsthhsslGAllFllSDsll...AhscFh.......hshshsphhl.hsoYasAQhLI.shuh ..................................................phlhKslsllLLhlhshp.......................ts.hsshshhl..lsGLsh.ShlGDsLhh.............................................hsppphhhuluuFhLuHLhYshhFs...................tphshs..h...hh..s.ls.ls..l...h.ll.us...............................................................l.hhsl.......lhscL...............................tc...hphPVss.ahshlhsMshh..Asthh.............................................hhtsss.suh.shhGA.hLahlSshl.l.hhs+ap....................pphpts.s.hll...husYauuphLIstS................. 0 101 189 287 +7779 PF07948 Nairovirus_M Nairovirus M polyprotein-like Fenech M anon Pfam-B_5426 (release 14.0) Family The sequences in this family are similar to the Dugbe virus M polyprotein precursor (Swiss:Q02004), which includes glycoproteins G1 and G2. Both are thought to be inserted in the membrane of the Golgi complex of the infected host cell, and G1 is known to have a role in infection of vertebrate hosts [1]. 22.00 22.00 22.90 35.40 20.60 21.90 hmmbuild -o /dev/null HMM SEED 645 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.36 0.70 -13.20 0.70 -6.40 6 90 2009-01-15 18:05:59 2004-09-20 11:34:11 6 2 7 1 0 87 0 473.40 69 41.74 CHANGED PTNRSKRs.pscIILThsQGLKKYYoKILKLLcLT.EEDoEGLLEWCsRhLupsCDDsaFpcRIpEFFl..TGcGaFNEVL.FK..S.o.usThsPs+su.....usss-PF+SaaAKGhlphDSGYFSAKCYs+ASNSGhQLINlTpHshKlssTPGPKhoNLKolNCINLKsSsDK-HsElEINVLlPQVAVNLSNC+Vll+SHVCDYSLDsDGsI+LPplp.HpG....oFIPGTYKIlIDKKNKtNDRCsLhTNCVIKGRElRKGQSsLRQY+TEI+lGpspuGoRRLLS.Esuss-ClSRTQLI+TEoAElHsDcYGGPG-KITICNGSTlVDQRLGSELGCYTINRIKSaKLCENSAouKuCEIDSsPVpC+QGaCLKIoQEGRGHVKLSRGSEIlLDsCDoSCEIMIPKGoGDILVDCSGGQQHFLpsNLlDLGCPNlPLLGKMAIYICRMSNHPKTTMAFLFWFSFGYVITCIhCKllFYlLIllGTLuK+lKQYRELKPQTCslCEssPVNAIDAEMHDLNCSYNICPYCASRLTSEGLsRHVsQCPKRKEKlEETELYLNLERIPWhVR+LLQVSESTGlALKRSSWllVLLlLLsVSlSPVQS....APlupu+sVpsYpsREsYsuICLFIhGSVLhAlShLh+GLlDSlG ..................................hhph.h.t+lLphhths.-.ss-tL.pWCpc.ht.sCssshhppRIpEFFh..supuhFN-VLph+h.s.s.tso..ss..st......sss.shhp.hu+t.Lph.SshhpspChsts.ssu.Qhls...Hss+lhpTsGPKhpsl+slpClNlcsph.K-pp.l.lsVLhsplsVsLpsC+s.Ipu+.C.YshshDG.l+lPphh.+pG........hhh.GsYpIslDhpsp.NcpCpLhTsCVlKG+El+KGQS.L+tYpTEl+lsps.sGpR+LLu.ctsss-ChStTQLh+TEssElHsDsYGGPG-KITICNGSTlVDQRLGSELGCYTINRV+oaKLCENSAosKsCEIDShPVKC+QGaCL+ITQEGRGHVKLSRGSEVVLDsCDoSCElMIPKGTGDILVDCSGGQQHFLcDNLIDLGCPpIPLLGKMAIYICRMSNHP+TTMAFLFWFSFGYVITCIhCKslFY.LIllGTLGK+hKQYRELKPQTCTICETsPVNAIDAEMHDLNCSYNICPYCASRLTSDGLuRHVsQCPKRKEKlEETELYLNhpRlPhhlRpLLplS.usGhsLKRusWhhsLLlLhhlolSPVQu....APlGcGKTIEsYpsREsaTSICLFlLGSlLFlVShLhKGLVDSlu............ 0 0 0 0 +7780 PF07949 YbbR YbbR-like protein Fenech M anon Pfam-B_4990 (release 14.0) Family The members of this family are are all hypothetical bacterial proteins of unknown function, and are similar to the YbbR protein expressed by Bacillus subtilis (Swiss:O34659, Swiss:O87088). One member (Swiss:Q97EN2) is annotated as an uncharacterized secreted protein, whereas another member (Swiss:P43521) is described as a hypothetical protein in the 5'region of the def gene of Thermus thermophilus, which encodes a deformylase [1], but no further information was found in either case. This region is found repeated up to four times in many members of this family. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.74 0.72 -3.86 190 4025 2009-01-15 18:05:59 2004-09-20 11:52:03 7 7 1621 2 591 2564 94 83.70 18 58.23 CHANGED lPV........psphspsh..lsshps....pssp.VplpGsp.shl......sslpphps...ps....Dlsslsp......sh.shp.....lsl..hssssssph...........Psplp..Vpl .............................l........h.st.....s.ssh..tl..t.sh.ts..........pstp..V.plsG.sp..shl..........................ppl..p.p.h.ps.....ps.......Dl.ss.lst...................sh.shp..........lpl.......ss.st.ssph..............Ps.plp..lpl................................................................... 0 239 414 504 +7781 PF07950 DUF1691 Protein of unknown function (DUF1691) Wood V, Bateman A anon Wood V Family This family of fungal proteins is uncharacterised. Each protein contains two copies of this region. 21.70 21.70 22.00 21.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.47 0.72 -4.14 38 223 2012-10-03 07:11:12 2004-10-06 09:55:10 6 5 115 0 163 218 1 114.70 21 62.00 CHANGED sshluhHhhsspllPlls..sssuupssLthlptlhtpts...........hthhlLlsssshHlsuGhshhhthp..........................ptpp++pthhhl........................sshstlG.....hulhhhu.thp.h.t.s.............ssa ....sahuhHhhsspllPlhs.....s....ssu.....opssLthlcphhtp.s...........hthhhLlsssshHlsuGlshhhhhp.................................pppt++htthhh...................................t.s.hs.....hshhhhh.h.t.................ht.hhh......................................................... 0 25 72 131 +7782 PF07951 Toxin_R_bind_C Clostridium neurotoxin, C-terminal receptor binding Finn RD anon Pfam-B_3087 (release 15.0) Domain The Clostridium neurotoxin family is composed of tetanus neurotoxins and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains [1]. This domains is the C-terminal receptor binding domain, which adopts a modified beta-trefoil fold with a six stranded beta-barrel and a beta-hairpin triplet capping the domain [1]. The first step in the intoxication process is a binding event between this domains and the pre-synaptic nerve ending [1]. 25.00 25.00 50.90 50.90 21.10 20.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.82 0.70 -4.64 10 143 2012-10-02 19:42:32 2004-10-06 11:27:06 7 3 31 75 3 169 0 187.30 42 17.41 CHANGED YsspssoslLKDFWGNsLpYDKEYYllNlsp.N+Yl.....shp..oc.l...h.htt.p....ph.NhahNtppLYpGhKlIIK+hu...ssssDshVRpsDhlYlNh.shsNpcYp.Lhhps......spst.tpcll.hhclussssshsphhlhphp.hsshssY.Co.lFpsss..sNslGL..lGh+p.....s.....p..hsslVASsWYhsplccp.pp..sGCaWpFIPp-cGWsE ..Ypsps.sshLKDFWGN.LhYsKcYYhlNhhp.NpYIshp..psuh...h.....Rs...ps.shalN.p.LYpGhKhII++hu...pussDslVRcsDhlYlsh.hspspEap.l...h......tpsp.tEKll.hhpIsssstths.hhshp.p..ss.ssh.Cph.Fppsp..sspIGL..lGhHp...........p..hshhltSpWYhpplccps.s..hGC.WpFIsc-cGWtE..... 0 1 3 3 +7783 PF07952 Toxin_trans Clostridium neurotoxin, Translocation domain Finn RD anon Pfam-B_4943 (release 15.0) Domain The Clostridium neurotoxin family is composed of tetanus neurotoxin and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains [1]. Subsequent to cell surface binding and receptor mediated endocytosis of the neurotoxin, an acid induced conformational change in the neurotoxin translocation domain is believed to allow the domain to penetrate the endosome and from a pore, thereby facilitating the passage of the catalytic domain across the membrane into the cytosol [1]. The structure of the translocation reveals a pair of helices that are 105 Angstroms long and is structurally distinct from other pore forming toxins [1]. 25.00 25.00 27.40 26.10 20.00 18.80 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.97 0.70 -5.53 8 149 2009-01-15 18:05:59 2004-10-06 11:34:15 7 5 32 46 7 144 0 292.30 51 25.87 CHANGED slFaYLaAQKhPcshpsIoLTsSl--ALLsssKVYTFFSS-aIspVNKsVpAuLFlsWlppVlsDFToEuoQKSTlDKIADISlIVPYIG.ALNIGNEstKGNFcsAhElsGAuILLEFlPELhIPllusFoIcSals..pNKNKIIKTIsNALccRpcKWcElYuaIVosWLo+lNTQFsphKEQMYpALpNQssAIKpII-YcYNpYot-EKspIss-aNIpsIcscLNcKlshAMpNIs+FlsEsSlSYLM.Khh..htlcKLp-FDsslKstLLsYIhcNpthLs.splscLpshVNsoLssoIPF....cLSpYTsDpILIphF ...............shFaYLcAQchsps.psIsLTSSh-cALLppsK..lYTFFSo.....-..aIcsl..NK..sVpA.uLFlu....WlpQll.DFTpEssppoThDKIADISlIVPYIG.ALNIGNpstKGNF...pp...AhplhGuuILLEFhPE..lhIPsltsFhl..Sal...pNKsclIcsIsNALpcRscKW.....c.....-hYthIVupWLoplNTQF.pl+EtMYpALp.QspAlKpIIchcYN.Yotc.....E....Ksplp..pash.slps.....cLNppls.AMpNIscFls-sSlSYLMKphhPhtlp+Lp-aDpslKp.LLsYI.cpp.hLh.spspcL.pshVsppLpssIPF....pLSpYTssplLlphF................................................................................................................ 0 1 6 6 +7784 PF07953 Toxin_R_bind_N Clostridium neurotoxin, N-terminal receptor binding Finn RD anon Pfam-B_1058 (release 15.0) Domain The Clostridium neurotoxin family is composed of tetanus neurotoxin and seven serotypes of botulinum neurotoxin. The structure of the botulinum neurotoxin reveals a four domain protein. The N-terminal catalytic domain (Pfam:PF01742), the central translocation domains and two receptor binding domains [1]. This domains is the N-terminal receptor binding domain,which is comprised of two seven-stranded beta-sheets sandwiched together to form a jelly role motif [1]. The role of this domain in receptor binding appears to be indirect. 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.45 0.71 -4.81 13 203 2012-10-02 19:29:29 2004-10-06 11:35:29 7 7 33 77 5 232 29 185.80 43 15.67 CHANGED sNhlhDhSGhsoplphssDltLs..lNsssltLpsssp...sVshsNslhhNuhhsNFSIsFWlRhsphssp........chslIssh.csN....sGWcI.lcsNsllaolhDsNGpp+slah........SDhl.NpWaaITIosDRLpsp.hIaINspLlsscsIcslhNIauSNsIsh.....lscNp.IaIcthsIhsKpL .................................................pNtlhDhS..GYsoplphscsl.tLss...lspN.phtLpus..sp...p.lpls..sNsIlaNuhapsFSlSFWlRIsphpsp........cYTII.ssh..cNN.....SG..W.cIsl...p...s...s...t...llaoLh.DssGppcslh...Fp.t...thSDYI..N+WaFlTITssRLssp...hIYINGpLlsspsIcpl.t.sIauSspIhhthst........s...hsp...sphlaIchFsIFs+pL........................ 1 2 5 5 +7785 PF07954 DUF1689 Protein of unknown function (DUF1689) Groocock L, Finn RD anon Groocock L Family Family of fungal proteins with unknown function. A member of this family has been found to localise in the mitochondria [1]. 25.00 25.00 35.60 51.40 24.90 20.30 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.87 0.71 -4.35 13 52 2009-01-15 18:05:59 2004-10-06 11:36:28 6 2 43 0 33 49 0 161.10 33 56.82 CHANGED acsulcFYEtDppL-spDRhpLucshpoluhushhuGasuhsuuFhsPhsYhta....pstul+GlssP+.............oFllGlsuhhlusphsu+htas+plpphcs.........................................sspp+phcsh+hhchspsuhashYYhpTupsPphpl.DP+phhpp......LKcs ...acsAlcFYEtDphL-scDRLcLucshpoIuhuphhuGahuhhusFhsPhhYpha....pstul+GVpls+...............sFlLGlhshhhusphsu+hhYs+plsphcss........................................spsp+Qhshh+hh-.s..ssutashYahhThpsPph+l.DP+shhpp.....h+........... 0 5 17 31 +7786 PF07955 DUF1687 Protein of unknown function (DUF1687) Groocock L, Finn RD anon Groocock L Family This is a putative redox protein which is predicted to have a thioredoxin fold containing a single active cysteine [1]. 20.90 20.90 21.10 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.91 0.71 -4.18 9 121 2012-10-03 14:45:55 2004-10-06 11:38:47 6 4 117 1 95 114 0 139.10 28 85.85 CHANGED MShF+TLQ.pP.csIoLFspsttst..upclhphLcpshssp.............Fcl-l.sschPThDQLphhts.h.ps..........pshltsphP....................................chpplhcphscs...tp...pshtsshap.....tPLhVDW-pshlus-......hpslcchL ................................................................a+.h.pp....csITLFHpsstss..Sp+lhslL+ps.suss.ttsss.........................pF-.L.-l...spc.PThDQL+hIl-ahsts.h............sshlpssh...........................................p.pcsl+thpps......................t..s..tFp.....cPlsVDWsNG.p..hust............................... 0 18 51 80 +7787 PF07956 DUF1690 Protein of Unknown function (DUF1690) Groocock L, Finn RD anon Groocock L Family Family of uncharacterised fungal proteins. 28.40 28.40 28.50 28.60 27.90 28.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.69 0.71 -4.21 7 160 2009-01-15 18:05:59 2004-10-06 11:42:59 6 3 127 0 118 152 0 114.40 28 78.45 CHANGED VasPpoPlsFSpsLluQL-sSsEoDaoRpQhsE+alpc+VupcLppLE.EslKcFcspLpsShhss.cs.psspsslSostlspclcsLpp+Lp...pLpptchpK......t.cchpss.....RuclscCLhcNcsKPLNCa-El-tFKchsh ...................................s.hthStshlppLp...s...s...........EoD...s..Rtp.hEh.lpt+lu...tcLpc...l...pt...p...p.......t...p...h...p...p...h..h.....pt.......t........................t.....tttt.p....tl...........SptplppclptLp.t+Lp....t.plpphp.t..............ppu.......+ptllpCLptNpt..+PLsCacEV-tFKp.V.t....................... 0 25 58 97 +7788 PF07957 DUF3294 Ribosomal_MRP8; Protein of unknown function (DUF3294) Groocock L, Finn RD anon Groocock L Family This family was annotated as mitochondrial Ribosomal protein MRP8, based on the presumed similarity of the S.cerevisiae protein to an E.coli mitochondrial ribosomal protein; however, this similarity is spurious, and the function is not known [Wood, V]. 20.00 20.00 20.00 21.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.44 0.70 -4.81 10 45 2009-01-15 18:05:59 2004-10-06 11:45:09 6 1 44 0 30 36 0 208.10 43 98.55 CHANGED MS...-.....lEpL+KcVscLpslV+KQutLIuKTGcpVl-LQlsppKschsshs.......sppspp..suplDsoDaATNEDLVQLVsELQGQLDsLE-RSIRRhsNSp..tp--c-llAPIPNtDG-hPs.p.......DthhPcTLcEFccl.-clcLh+Lu+FYEllPPs.p-pEchcchLcs..psEshch........sttsDc-lpcclcchoc-ElD-l..Fc-lARYLGLRsRRGossW ...................MSsc....lE.LpcpVs-LpsLVKKQShlIoKTGppVLELQlccpKpclsshs.......sppsss..ssphDsoDaATNE...DLVQLVsELQG....QLshlEERSIRRllNSp...ppcD-D.llAPLPNsDG-lPs.........-slFPpTLcEF+cl.sclcLl+Lu+FYEhlPPs.pEpEch-.phL-s....psEshpl........spss-c-IpcclcpaSc-plD-h..Fs-lARYLGlphRRGophW.................. 3 7 17 27 +7789 PF07958 DUF1688 Protein of unknown function (DUF1688) Wood V, Finn RD anon Wood V Family A family of uncharacterised proteins. 25.00 25.00 49.70 29.80 22.60 21.90 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.48 0.70 -5.94 32 232 2009-01-15 18:05:59 2004-10-06 11:46:41 6 3 181 0 163 231 29 398.00 45 92.28 CHANGED shLpohpAVR-Rupplh...phAcpschpHFslc.s+hsssAsaVsslI+ccYss....pIP.HuRWpHF-sGG....lsRhspLhsphs.....................-s.EpuRthlDLhlVSVLLDAGAGspW+YpEstssph....auRSEGLAVAShchFhsGhFSu.DsscPhpVDupuLppLohppLupuFQVoss.NPLsGLEGRspLLp+LGcuLtspsplF.....Gsp...uRPGsLlDaL.............tsssss.lshsplhssLhssLsPIWP.uRsplcGh...sLGDsW.pssl.......................sss.ssullPFHKLoQWLsYSLlpPhpp..hGlplssh-hLTGLPEYRNGGLhlDhGVLsL+cssht....................h.sassscshIVEWRALTVuLLDcLtshVpppLs..hssppLsLAplLEuGTWpAGRclAtppRPp.usPPItIhSDGTVF .............................................................aL+SlpAVR-Rsphlh...phA..p.pscLsHFslc..s+hsssAsaVsslI++cYsss......sIPsHuRWpHF-sGG.......hsRlspLhpphs...................t.hDshEcsRphlDLhlVSVLLDAGAGspWpYc.-s.t..oGph.............apRSEGLAVASLcMFpsGhFSu....-s.s.....p.Ph.pVDutuLpplosptLupGhQlots.NPlsGl-GRssLLpRLGcuL...sps-hF.........Gtp............uRPGsh..lDaL...........................ttsussshlshsslWssLhs...sLsPIWP....uR....splsGh..................slGDsW.pssl................................................sst.hpsllPFHKLoQWLsYSLhtPhpc..hslphssh-hLTGLPEYRNGGLhlDhGlLsLKstsht....................hshapss--lIVEWRAlTVulLDcLtshVpppLs..hssppLsLAplLEuGoW+.................uGRclAttpRP....s.......ttPPIhIhSDGTVF...................... 0 46 87 133 +7790 PF07959 Fucokinase L-fucokinase Adamkewicz J, Finn RD anon Pfam-B_121298 (release 15.0) Family In the salvage pathway of GDP-L-fucose, free cytosolic fucose is phosphorylated by L-fucokinase to form L-fucose-L-phosphate, which is then further converted to GDP-L-fucose in the reaction catalysed by GDP-L-fucose pyrophosphorylase [1]. 27.50 27.50 27.50 28.90 27.20 27.20 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.37 0.70 -5.91 6 272 2009-01-15 18:05:59 2004-10-06 11:47:43 7 11 161 0 110 250 6 326.70 26 42.43 CHANGED uGG.S+RlP.AsshGKlFTsLPh...-pspt....Lss...plLsLp.shhpcF.spu..Gl..hlsouD...lhsss-s.hlshsc.susssluhPsoLslAosHGVaVscppss.tc.shphpllcchLpKPTlE-lhphpAlp+sGphhh......DoGll.hcscss..-sLhthshpsuphhlsah-L.lsshp.EhshYtDhltAhuPu...Ea......sphsocsht.t..hh.hRp+haphhpt.slshl.lssupFhHhGTop.EhLp+hous.ssl....tlsp+phss.Pshpsps.ttossIloShlsuslSlussSl.lppS+LusslpIGupCIVoGl.lhppsuh.u..........L.DhlC..hcVhhsGslphVhhhhGlpDNhKsSl+p....cuhpFhGhsappsLcchsIcsoDlhsSss..pshsLWsA+lFPVhoshc ........................................................uGG.SpRhP.hsshGKhhoslPh............tpsp..h.......l.p....phhpL..shh.ch..tthss.G.l..hlssuD........lh.hs.....t....p...h........p.....h.........sp....s.....shh..s..huh.s.s.shupsHGVal.sp.........p.......s..............................lp.hLpKsohpch...t.sl...........ts.t..h.h....................s.Ghhh.h..sschs.........-.Lhth.p.h...ps.s...h.p......hs.....hh........t..........h....................t.h.....sha.Dhhhshu.........c....................sp.t.pt.h...........................p..t..s..lshh.....l......ss.....upahahsTo....t...E..hlpphht.....th.........................th..ttp.h.........s......................hl.suhl.pst...h..ph.ssssl..lp.spl.ts..s...hpluptslloGh...p.......th................................shhh....p..lh.................s......p.....t...hshh.hGhpDshctt.......tt.tahshsh.phh.t..hslp......-hh..ts.....sL.sAplFPlhp...p............................................................................. 0 40 59 81 +7791 PF07960 CBP4 CBP4 Groocock L, Finn RD anon Groocock L Family The CBP4 in S. cerevisiae is essential for the expression and activity of ubiquinol-cytochrome c reductase [1,2]. This family appears to be fungal specific. 20.60 20.60 21.60 23.10 20.10 20.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.77 0.71 -4.52 18 129 2009-09-10 15:02:01 2004-10-06 11:49:54 6 2 125 0 95 123 0 115.40 33 85.79 CHANGED p+ssWhhWsKhhhsGusIhsuGshLhpYhoPTDE-Llp+asPEL+pch.cpRchRQpE.p-hhphlKcpSKSDcPIWpsushcS...E.+c..............p+pctt..pt.php+tppEhcchcpppppp+c- ........t....hhhWhKhhss.GuslhsuGshLhpassPT-E-Lhp+asPELp+c..cpR..ctRQpE...pchhpplK-hS...cSD......cPIWtsst.ps...Ecp...pt.pt................ht+.ptcp...p+pphtchtt...............t.............. 0 19 49 80 +7792 PF07961 MBA1 MBA1-like protein Groocock L, Finn RD anon Groocock L Family Mba1 is an inner membrane protein that is part of the mitochondrial protein export machinery [1][2]. It binds to the large subunit of mitochondrial ribosomes and cooperates with the C-terminal ribosome-binding domain of Oxa1, which is a central component of the insertion machinery of the inner membrane. In the absence of both Mba1 and the C-terminus of Oxa1, mitochondrial translation products fail to be properly inserted into the inner membrane and serve as substrates of the matrix chaperone Hsp70 [3]. It is proposed that Mba1 functions as a ribosome receptor that cooperates with Oxa1 in the positioning of the ribosome exit site to the insertion machinery of the inner membrane [3]. 27.40 27.40 27.40 27.40 27.20 27.20 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.27 0.70 -5.52 7 74 2012-10-03 02:27:23 2004-10-06 11:51:34 6 1 72 0 48 79 0 223.40 28 77.91 CHANGED ppsppc.pshs.+alGlhs-halPsua..h.sshssPhhhhphLhRRhYhhulNThpluhFRhpoGhKPpFh.WKNcAIEsYlpVNcuFAp+sL...........pplcshsolWVpcALptRu+plPsssch-WpLlKFssVPKLluhpsh.lPspshE.hlQllY+FcT+Q+Ll+hs+tssKs-phD+sVl-YlualpDuoTs..-hlLhGSVFESs.scshLPcss.sssplslppM+ssGDIaR.ssu .................................................................................hs.p.lG.lhschalPsth...hss.htp.s.h..hhphl.hR+hhhhshN.......ohplh...ha.+..h...p.........u..h.....+.....p.Fh.WKspAIEhalpsNpuFAptsl.................splcthsu....hhl.cuLtsRs...p.....phP.t..s.s.pLsWpLlKasps...P........KlVS..h...psh.l........s..s...t.s...h.p....hlQlVh+hsT+Q.....+l.l...p............hs....ptp...p..........p......s...pph....-....+..Dls-YlVa.....h..s.shss....-hhlhGplFESp..psh..s...p........stp.h.t.....tuDlaR....t....................................... 0 11 26 43 +7793 PF07962 Swi3 Replication Fork Protection Component Swi3 Wood V, Finn RD anon Pfam-B_9217 (release 15.0) Family Replication fork pausing is required to initiate a recombination events. More specifically, Swi1 is required for recombination near the mat1 locus. Swi3 has been found to co-purify with Swi1 Swi3, together with Swi1, define a fork protection complex that coordinates leading- and lagging-strand synthesis and stabilises stalled replication forks [1]. The Swi1-Swi3 complex is required for accurate replication, fork protection and replication checkpoint signalling [1,2] 25.00 25.00 25.20 25.30 24.90 24.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.76 0.72 -4.35 29 279 2009-01-15 18:05:59 2004-10-06 11:52:44 7 7 238 0 196 269 2 84.40 34 27.13 CHANGED KLcsp+LhSp.+GlPtLc+t....................h.cch+hpu+s+E......................hs-LspllphYQhWsHcLFPKupFcDhlshlEpLG+p.+plpshhpchhpcth. ........................................................KLstpRLlup.cGlPtLp+h.............................h..cph..K..h+.G.K.GHE................................hpD..LppLlphYphW.sccLaP+hpFcDhlshlEpLGpp.+plp.shhpchh.c..s................................ 0 58 101 157 +7794 PF07963 N_methyl Prokaryotic N-terminal methylation motif Bateman A anon Pfam-B_6484 (release 14.0) Motif This short motif directs methylation of the conserved phenylalanine residue. It is most often found at the N-terminus of pilins and other proteins involved in secretion, see Pfam:PF00114, Pfam:PF05946, Pfam:PF02501 and Pfam:PF07596. 21.20 20.50 21.20 20.50 21.10 20.40 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.24 0.73 -6.49 0.73 -3.90 72 4741 2012-10-03 10:38:27 2004-10-06 12:39:50 7 22 2129 0 985 9899 3456 19.90 44 11.31 CHANGED pGFTLlElllslsllullhs ...+GFTLlElllVlsIlulLs........... 0 364 650 834 +7795 PF07964 Red1 Rec10 / Red1 Wood V, Finn RD anon Wood V Family Rec10 / Red1 is involved in meiotic recombination and chromosome segregation during homologous chromosome formation. This protein localises to the synaptonemal complex in S. cerevisiae and the analogous structures (linear elements) in S. pombe [1]. This family is currently only found in fungi. 25.00 25.00 41.40 41.40 24.40 22.60 hmmbuild -o /dev/null HMM SEED 706 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.24 0.70 -6.37 6 28 2009-01-15 18:05:59 2004-10-06 13:14:18 6 2 24 0 18 30 0 598.10 30 82.59 CHANGED hllLclhapssph+plItpLlc...scashplG.LlDN.IpDhQLsNallElLSpsFs++usspstlsphPpL.WpcppKsK.FFppphYPapuKaGsttlhpFlhsp..FushlsNhs+l+plhashust..pspsltsLsppscpt.hahQsIasplYhWsGEssFlclc+.KslcIh+-L+splcl....Klhs.sFccslpospcthht...........TlppsptFpl-Fp-pphschahps.sNlPKISEVQsalsLpa.pp.--p.I-spspc-pso+cpsl.ppcptssss-u............hphutpcspa-p....TP-+s...htpcssl.DL........ppssccpc...cshss.Klp.ppsph.psph.pllppp....pSPlsphQc+KltRssSKoh.........c.L+psh-cpt....lpsppspScpshspshpsssslsspsplpss........tutcltsphsucp.pssssVsshochpssKohKppDlsl....L-sIFupPlsKtp+p...K.p+pKQ.hLpNhhshh..........KoKh.ltsNsppsh+optlpptcosphss..ph..........sppusP.s.tch.tcQssoosshtpss+sshcs+sPptpppKlEshststphssh.........sKpNsts.sp.ss.cpth..tpp.t.sshp.......................shu-STTIss.sup..s........FTspLQ-QIapSIspFSsELppKIuIINpEhNpKIl+ELSEKYpplF+-LppsFpsDspcMhpFVGElK-MhpLPE-pLVptIRs+pF ......................................................s.hl.plh.phs.p.+phIhpllc...scastplGplhDs.lpDhQ.sNaLlElLSsCF.+psssphtl.phPpL.W.ppcp.Ks.c.FFpsp.YPapuKpGp.pshpFlhpp..Fhspl.s.hshl+plsYssup...p.pslchh...p.tpsc..p..halQsIasplYlWhsE......p..........t.lEhc+.KplcIsK.sLKsplpI....+ltp.shppslpostsphhh...........hhs+s+tFQL-FpDpphs-pFapshsNlPKISEVQsFLsLsah-p.s........-sphp..c-pp.tpss..tpppptpps.sp.................hp.uhspspt-...lhTP-cS...sph+oDtWDl........pssopppp...sshss.Kls.pp.pphppp..p..pl.pc-......................QSPlV.sQhRKhtRpoo+TL.........EhL+p-Fpppt..................lpsppspspp..s..htp..sshlhtpsphhps...................ppp.p.s.pshthschpsspSlpK+DIsl.........LsTIFupPssKtp+p.........K.ppppQppLpNaK.Pllpl..............spsc..lps.ss.tps+shclpphposphsp...............tp.spsttttch.ppp....s.sposs.pthspsscctphsc.stt+.p.tptptphsh.....................sppssts.sp.ss.pp....ths.sps.pssslo.......................shh.-STTlls..hps...s......................ssFTspLQEQIasSIspFSNELh+KIsIINpELNpKIl+ELSEKYQpLFt-LQpsFQsDspcMhpFhGEIK-hhpLPE-pLVphIRs+KF................................................................................ 0 2 8 17 +7796 PF07965 Integrin_B_tail Integrin beta tail domain Bateman A anon Pfam-B_1876 (release 14.0) Domain This is the beta tail domain of the Integrin protein. Integrins are receptors which are involved in cell-cell and cell-extracellular matrix interactions. 20.10 20.10 20.30 20.10 18.50 18.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.32 0.72 -10.88 0.72 -3.55 58 524 2009-01-15 18:05:59 2004-10-06 13:37:43 7 25 119 21 230 460 0 83.70 32 9.93 CHANGED C.spp+cCVpCptFpsG.h..pc....sCstpCpph.l..phVcphppps...........Cph+Dp-.-ChhhFshp...psss...sphhlhVhcch.-CPpuPs ...........................Csp++sCVpCptFspGph....pp.....sCsptC.pth.l...phVcph.....pcpspss...........tCph+Dp-.D...ChhtFsat........tsss......scshlpVlcc......-CPpuPs......... 0 30 46 122 +7797 PF07966 A1_Propeptide A1 Propeptide Rawlings ND, Finn RD anon Pfam-B_386 (release 15.0) Motif Most eukaryotic endopeptidases (Merops Family A1) are synthesised with signal and propeptides. The animal pepsin-like endopeptidase propeptides form a distinct family of propeptides, which contain a conserved motif approximately 30 residues long. In pepsinogen A, the first 11 residues of the mature pepsin sequence are displaced by residues of the propeptide. The propeptide contains two helices that block the active site cleft, in particular the conserved Asp11 residue, in pepsin, hydrogen bonds to a conserved Arg residues in the propeptide. This hydrogen bond stabilises the propeptide conformation and is probably responsible for triggering the conversion of pepsinogen to pepsin under acidic conditions [1,2]. 20.50 20.50 20.70 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.93 0.73 -7.15 0.73 -4.01 85 802 2009-01-15 18:05:59 2004-10-07 15:04:45 7 5 152 8 295 720 0 28.40 36 7.79 CHANGED ll+lPLcKh+SlRcsLpEpGhhpcaLcpp .....lh+lPL+KhKSlRcsLpEc...GhhpcaLcp........ 1 23 33 100 +7798 PF07967 zf-C3HC C3HC zinc finger-like Wood V, Finn RD anon Wood V Domain This zinc-finger like domain is distributed throughout the eukaryotic kingdom in NIPA (Nuclear interacting partner of ALK) proteins. NIPA is implicate to perform some sort of antiapoptotic role in nucleophosmin-anaplastic lymphoma kinase (ALK) mediated signaling events [1]. The domain is often repeated, with the second domain usually containing a large insert (approximately 90 residues) after the first three cysteine residues. The Schizosaccharomyces pombe the protein containing this domain (Swiss:O94506) is involved in mRNA export from the nucleus [2]. 20.80 20.80 20.80 20.90 20.40 20.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.15 0.71 -4.40 28 278 2012-10-01 20:49:39 2004-10-07 15:12:26 8 8 216 0 192 304 7 127.90 26 24.50 CHANGED Phs+tsahpRLcTFpshs.W..........ssKPstloslpsA+pGWhs..hs......p-tlpCp.sCpspLshp..........................sphstphhcphsccatttlp.suHcpsCsW+spsss.cthtths..lsts.pshlsshtc.....chssLhph.....spLPhhss ..............................hs+pshhpRlpTF.....p...t...hs...W..........tsKPp.tlsslthA+pGWhs.....ss............t-hl...cCs..sCp..stLshp....................................sthshp...hhpph...s...pch.tp.tls.s......uHpp.C.Wt....spsss.cphhhls.....hsps..thhlpthhp+hppLhph..........t............................................................................................................................. 0 55 103 156 +7799 PF07968 Leukocidin Leukocidin/Hemolysin toxin family Bateman A anon SCOP Domain \N 20.40 20.40 22.20 21.40 17.00 16.10 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.59 0.70 -4.74 19 1628 2009-09-11 18:55:27 2004-10-27 16:53:05 7 6 341 102 22 554 5 237.50 36 70.82 CHANGED pslQFsFlcDtpaDKcslllphsG.pIsSshphsssps......phspthhWs..........tpYslul..ssssssspllc..hhP.KNp.cphpVppThGYslG..Gsl..pl.psGPs....huusuoasaScoIsYsQpsY+oplsppssp.slsWsVctsphsssstt...........shasp.lFhhscppss..supp.FlspschPsLspuGFNPpalsllopcpssspoo.chclsYpRshD.hhshh.tp......hhstpsac..shps+phss...pYcVDWcs ................................................QslpFsFlcDppYsKcsLllKhpG...IsSthph.s.cs.........hsthhWP.................tpYsl..ul...p..sssssssllc..YhP.KNp.-otpVpp.TlGYshG....Gsh.......psu.o......hG.hsuuhsaScTIo..YsQpsYcTpl-.ppssc....sVtWsVcApphhsssht.........pssasp.LFhts+ssuh...tupp.FlspsphPsLlpSGFNPpFlsslS....+-..+..s.s.s..cpS.chcloYp.RshDhhphthspp............hhs.spsap..shhscphos....pY.EVDWcs........................................... 0 10 12 18 +7800 PF07969 Amidohydro_3 Amidohydrolase family Bateman A anon Pfam-B_751 (release 15.0) Family \N 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.16 0.70 -5.26 43 5270 2012-10-03 00:45:34 2004-10-27 17:18:53 6 49 2463 42 1791 12984 4622 377.00 18 74.72 CHANGED hlhPGFlDsHhHlhstshth..hhshtssh..shtthhthht......................hshsaspsthtptchs.sttsl..-tsts.stshhhtch.tphshlspt..uLph......................t.t.stshltppst....hhht................th.s.hsshtphtphhct.thtthsptGlssshsss..............s.tphhpshtplsppht.hsh.hhh.h......................................sshphhssGs.s..spsuhhtp..hsssh....phth....sttp..h.pplsptspppththplau.............ststulsphlsshcphhtp....................hhh+sphssspshcthtplshthshts........h...........hh.phhss.phphs....hshtthhssG.....hplshuoDhsh...sshsPh.hshtssshtpsht...t.hhh....................................pplsltculphhTtssAhh....hshpcph.GpltsGt.ADlsl .............................................................................................................................................................................................................................................................................................lhPGhlD..sHhH......h...t.h..t.h..t......h...........................t........h....s.h...t.ssh..............s....h...t..h.t.................................................................h...s........t........h.......s.....t......t......p...h.................s.....h.tpl............c.t...ht.................s....hh....h.......t......................h.hspt......ulph.........................................................................t..tps...p..s...h.l...tpps..........thhht.......................hh.s................s......p..p.....h.t..t.....h...lp......t....hht.h.sp...h..Glsuh...sst.........................................................t......h...p....h..h..h....p...h...........s....t...p......s.............................................................................................................................s.u..h....h...h........s...t.t..........shs..u.h...h........t......................h.........s...p....p.........................................................................p.......s.................h..........s.pp...................l..pph..h...th...s.....t.....p..t..u..h...t.h.t...l.au...................................................................t.s...t.u.h.p...p..h...l.s...s...h....pph.hhp................................................................................hlt+.s...p...h....h....t........t...s.......h..t.......t..h....t....p....L....t...h..t...h..s..h.ts............................................................................................h.................c...t...p...t.h..................h....h.....t....p....h....h....p..t.G..................................h.tl.s.h....u...s..D....sh..............sshsP..h....hs..l....t....s....u..s....h.t....p..s.h...p............t.....h...h........................................................................................................................................................................ptl.s..l..p..p..A..l..p.hh..T.t.suAht............ht.hp.......c.t.......GslpsGp.ADhll................................................................................................................................................................................. 0 578 1162 1523 +7801 PF07970 COPIIcoated_ERV DUF1692; Erv41; Erv46; Endoplasmic reticulum vesicle transporter Mistry J, Wood V anon Pfam-B_2028 (release 16.0) Family This family is conserved from plants and fungi to humans. Erv46 works in close conjunction with Erv41 and together they form a complex which cycles between the endoplasmic reticulum and Golgi complex. Erv46-41 interacts strongly with the endoplasmic reticulum glucosidase II. Mammalian glucosidase II comprises a catalytic alpha-subunit and a 58 kDa beta subunit, which is required for ER localisation. All proteins identified biochemically as Erv41p-Erv46p interactors are localised to the early secretory pathway and are involved in protein maturation and processing in the ER and/or sorting into COPII vesicles for transport to the Golgi [3]. 21.90 21.90 22.00 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.89 0.70 -4.73 51 979 2009-01-15 18:05:59 2004-10-28 13:59:39 7 20 303 0 653 899 28 187.70 29 53.30 CHANGED CYGAt..............stptCCNTC--VppAYtp+tWshsssp.slcQCpp-....h....ptphs..EGCpltGshpls+lsGshHhAPGp...shp.tthH.h...HDhp.hpp.........phNhoHhIpcLsFG......pp.....hs.t.............pP...LDshph.......t.pp.pt...hhapYalKlVPTpap............cospaSsspap+slpsst.............tuhPGlFFpY-hSPlplh.ppp+..pohtpFlsslsullGGlasluullD ...................................................................................................................................t.....-G..CplhG..lpVNKV.sG.s.FHhs.stp..............sh.................th.t...................................ht..htt.....................phNhoHhIpc...LSFG....pt........hsshh..........................sPLDssph..................tp.p.t............................hhapY..alclVPThY.phttt............................h.popQaSVT.p....pp...cshptt.................................tslPG.laFpY-lSPhhVh..hpEp+...psahpFlsplCuIlGGhhsluuhl...................................... 0 250 385 541 +7802 PF07971 Glyco_hydro_92 Glycosyl hydrolase family 92 Mistry J anon Pfam-B_1199 (release 16.0) Domain Members of this family are alpha-1,2-mannosidases, enzymes which remove alpha-1,2-linked mannose residues from Man(9)(GlcNAc)(2) by hydrolysis. They are critical for the maturation of N-linked oligosaccharides and ER-associated degradation [1]. 24.50 24.50 24.80 24.60 24.00 24.30 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.66 0.70 -5.86 187 2270 2012-10-02 14:50:22 2004-10-29 16:03:55 7 44 847 40 652 2109 326 474.50 31 62.78 CHANGED u.pph...t.uhlpF...st.....spp.......l...s+lulSalS...h-pApt...NLct....Eh..........s..shs..F-pl..pppApstWscp.Lu+l.pV..p..........u.....ss....psp.....pphFYouLY+shltPphhs-........h.....sup..................Yp.u.....t...........................t.............uht........Yss.a................ulWDTaRuhaPLhsllpPp.hss-hlpShlsha.c..ps..........GaLPcWthsspp.stsMsGspussllADAa...hK..G............lp.shD......hc.pAacAh..h+sAp.............ps....................+sG...........lppah.phGYl..P...........................hsc...u.......sSpTLEYAYsDasluphAc.u...L...........Gcpp...............................-.h.ptahcRupsa+Nlacsp.......................................sGFhps+......p..ts..........G....s......at..........t......s.......asP..h.............th...s.......psah.EGsuW....pYoahV.PHDhsGLl.pLhGGc..................ct...........Fhp+LDphFst....ht......................hGpY....s.hGNEPuaHhsYLYsas.GpPa+TQphlRplhsphYs..sss..sGls.G..N-DsGpMSAWYlaSu.lGhYPlsPG...sstYhluoPhFccssl...pL.........................sG.......+...s.hsI....pA.pssu........tc.s.........hYlpusplNGcshs....+..sa..lsHsc...lh..pGGpLpapMuspP ...............................................s...tshhpF...t.st...spp.......l.ph+luhSalS...h-p.....Apt.....NLpt.Eh..........s...shs...F-..ph..pp...pupstWpph.Lu+.l.pl...p...................s................ss....psp.............pphFYosLY+shlhPphht.-.h.......ssp..........................Yp..sh...pt...................................pht..s.t...h.....Yos..h......uhWDTaRshaPLhs.Ll..P.c.t.h.pchlpuhlsha.c..ps.................G..a.L.P...cW....hs.....sp..........c.s................shM..........sGstussl...lADAahK.....G.............lp...shD.......hp...puapAh.....hcsAp..........t.............................s+tu...........h.t.t.Yh...chGYlP.........................sphpc........u..........lScTLEYAasDasluphAc.tL...........Gc.pp...............................p.t.pha.hpR.ut.s.Y.+.s.l.acsp.......................................s.Gahps+pss.................................Gs............at..................................s............s..........FsPh..................ph....s........psas.Eu...suWpYoa...........hV.......HDhtGLl.pLh..GGc..................c.s............Fhp+LDphFst....h.t.....................h.h.Gpa..s.huNpPuaHhsYLYsa..s.G..pP.....h+TQthl+p.....l....................h...................p...................p....h...................ap.................sss.........sGh...s..G..............s-DsGpMSAWYlaou.lGhYPls..PG....pspYslGsP.hFc.c.spl..pL........................t.sG....+p..hsIps..tsss..........tc..s.................hYlpphplNGc...........h..s.........p.sa..ls.H.pc....lh....pGu...pLpaphusp............................ 0 232 458 572 +7803 PF07972 Flavodoxin_NdrI NrdI Flavodoxin like Finn RD anon Pfam-B_1603 (release 16.0) Family \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.55 0.71 -4.21 86 2192 2012-10-03 05:08:30 2004-11-12 15:12:44 6 3 1921 11 257 1337 42 121.30 41 86.07 CHANGED lhasS.hoGNT+RFl.....c+Lsh...t.+hshp........thsp......hplscPalLlsPTYusGt...tsu........VPppVhcFLs..cNcphlpGV..........luSGNpNFGsp.ashAuchlup+hpVPhLhcFELtGTppDlpplpp ...................................lha.S.hSsNTcRFl.......p+Lsh.............u...c.hslp............ttpp.........hplcEP...al.Ll.l..P.TYGsGss....tsu...............VPctV.hcF...Ls.......t...p.N.....+....p...h...l.....+G....V..........luSGNR.NF.G.pt..ashAuch....Iup...+h..sV..Ph..L..h..+FELh.G.TppDlcpV+.......................................... 0 54 135 202 +7804 PF07973 tRNA_SAD Threonyl and Alanyl tRNA synthetase second additional domain Finn, RD anon Pfam-B_270 (release 16.0) Domain The catalytically active from of threonyl/alanyl tRNA synthetase is a dimer. Within the tRNA synthetase class II dimer, the bound tRNA interacts with both monomers making specific interactions with the catalytic domain, the C-terminal domain, and this domain (the second additional domain). The second additional domain is comprised of a pair of perpendicularly orientated antiparallel beta sheets, of four and three strands, respectively, that surround a central alpha helix that forms the core of the domain [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.06 0.72 -4.07 219 11375 2009-01-15 18:05:59 2004-11-15 16:21:20 9 40 4904 28 3159 8949 4659 48.10 36 6.80 CHANGED lRll.....ph.G...s................hsh......-LCuGsHlpsTucI..ttF+lh.......ptsshspslpRIh ...............................lplh.....ph.G...-.................................h.....-LCtGs..HV.ssTucI...th.FKlh........................ptsuhsttlpRI........................................... 0 1065 1968 2631 +7805 PF07974 EGF_2 EGF-like domain Bateman A anon Pfam-B_80 (Release 16.0) Domain This family contains EGF domains found in a variety of extracellular proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.44 0.72 -3.78 51 1990 2012-10-03 09:47:55 2004-11-17 13:40:45 8 364 173 0 1055 1784 147 30.20 42 4.71 CHANGED C....s......Css...pGpCs........tthCpCpsu.....atGspC ...............C.t.............Csu....+GpCs...........sGpChCpsG.......apGssC.... 0 399 519 772 +7806 PF07975 C1_4 TFIIH C1-like domain Finn RD anon Pfam-B_10678 (release 16.0) Domain The carboxyl-terminal region of TFIIH is essential for transcription activity. This regions binds three zinc atoms through two independent domain. The first contains a C4 zinc finger motif, whereas the second is characterised by a CX(2)CX(2-4)FCADCD motif. The solution structure of the second C-terminal domain revealed homology with the regulatory domain of protein kinase C (Pfam:PF00130) [1]. 24.60 24.60 24.60 25.20 24.50 24.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.47 0.72 -3.96 16 268 2012-10-02 13:15:50 2004-11-17 14:18:53 7 7 237 1 201 260 6 52.60 46 12.05 CHANGED hCauCQpph.s...p................phYpCspCcphFClDCDlFIHEoLHsCPGCps ..........................................CauCptphss..............................................ptYpCstCpphFClDCDlFlHEsLHsCPGCt.... 0 62 106 161 +7807 PF07976 Phe_hydrox_dim Phenol hydroxylase, C-terminal dimerisation domain Finn RD anon Pfam-B_19435 (release 16.0) Family Phenol hydroxylase acts a homodimer, to hydroxylates phenol to catechol or similar product. The enzyme is comprised of three domains. The first two domains from the active site. The third domain, this domain, is involved in forming the dimerisation interface. The domain adopts a thioredoxin-like fold [1]. 20.90 20.90 21.40 21.60 20.70 20.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.99 0.71 -4.65 78 504 2009-01-15 18:05:59 2004-11-18 16:03:01 7 14 183 10 351 544 44 164.70 24 25.89 CHANGED FsuGhulcYssS.hLs..........spss.p.................................pt........LAsslhlGpRhhoupVlRhuDupshcLtctlpuDGRaRlhlFuGchpt.s..p..sh....s.plpplsphLs.sssShlp+assts.....................tphs.Sl.l-lhslapss+psl-lt.DlPp.lhpPhptphth...........Dap+lass.D.t..........pscsachh..G.lDccc.G ..................................................FsoGhulcYssu.hls.......tpss..p.................................pt......lussltsGpRh.ssp..VhRhuDupshcLpc......pl.us..Gp.aRlhlFu.Gs.pt.sp...h..........ttlpshs...phLt.t.p...Shlppassts..........................tt.s..Sh.h-lhh.l.......a.......st...+pp.l-lp.s.lPt.hhp.s......h...........sh.c.lass.-..............tsphaphh..G.ls.p.............................................................................. 0 72 165 283 +7808 PF07977 FabA FabA-like domain Bateman A anon Bateman A Domain This enzyme domain has a HotDog fold. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.75 0.71 -4.69 51 6557 2012-10-02 20:54:35 2004-11-19 13:29:09 8 37 3882 226 1494 3917 3933 124.30 34 64.08 CHANGED LPpch.hLhlDRlhpls.sGtp.....lhu.Kslo.s-.FFpsHF.scPVMPGsLhlEAhhQhsGhhhhht..tpspG.........h...ulscs+F+tpVhPss.plphclchp+......p+hs.huhucuhshVDGchlhpAc ....................................LPHRhPhLhlDRlh-hp.....t..........u.pp.......................lhAhKsVoh..N.....E.....F...F....p....G....H....F.Ps.p...Pl.....MPGVLllE.............AhAQ....s....s....G....lh.......hh.....tp...................t.tp.G......................thhhh..hGlcc.s+F+p..V.lP.G..D...pl..h.hclchh+........p+ts..hsh.hcuhu.h..V.DGc.llspA............................................................... 0 441 905 1223 +7809 PF07978 NIPSNAP NIPSNAP Mistry J anon Pfam-B_3436 (release 16.0) Domain Members of this family include many hypothetical proteins. It also includes members of the NIPSNAP family which have putative roles in vesicular transport [1]. This domain is often found in duplicate. 22.30 22.30 22.30 22.30 21.60 22.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.04 0.72 -4.04 42 1376 2012-10-02 00:20:33 2004-11-23 10:06:50 8 10 733 16 670 1283 354 97.80 20 66.90 CHANGED aE..lRsYplcPuphsta.hptatcthhthht.htschlGhahs...............phGs.hsplhtlasasshssRpthRsthhpDstW.ttthptstth..lpp.csplhhPsshSs ....................................E.htpYplc.P.s..p..h..t..p..a....hp...t....a.tc....t..h...h......h....h...p...t....h...s....s..chlGhah.s...............phGs...hspshtla...sap.s.....hpshcp....hRpt.hh.p....c....s....ta....tt..h....h...t..h..s.h..t....h....l.pp....csphhhsh................................................ 0 183 323 489 +7810 PF07979 Intimin_C Intimin C-type lectin domain Bateman A anon Pfam-B_1879 (Release 16.0) Domain This domain is found at the C-terminus of intimin. Its structure has been solved and shown to have a C-lectin type of structure [1]. Intimin is a bacterial adhesion molecule involved in intimate attachment of enteropathogenic and enterohemorrhagic Escherichia coli to mammalian host cells. Intimin targets the translocated intimin receptor (Tir), which is exported by the bacteria and integrated into the host cell plasma membrane. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.59 0.72 -3.81 14 389 2012-10-02 16:37:33 2004-11-24 13:34:50 6 10 133 15 2 312 0 94.00 48 12.60 CHANGED Pspl.IsVspps+ssYssApspCp..shuupLsSSpspLps......lYspWGAANKYpaYpupsoIoAWlpQTssDhpsGVuSTYDLVTpNsl.N......VsssssNAYAVCVK .........Psph.ItV-ptp+ssYs-A.shC+..s.su.LPSSpshLus........lYssWGAANKYsaYpu.pSlTAWIpQTss-ppuGVoSTYDLlTpN.l.s......Vslss.NsaAVCVc........................... 0 0 0 1 +7811 PF07980 SusD SusD_RagB; SusD family Mistry J, Bateman A anon Pfam-B_1855 (release 16.0) Domain This family includes several hypothetical proteins. It also contains RagB, Swiss:Q9ZA59, a protein involved in signalling [1] and SusD, Swiss:Q8A1G2, an outer membrane protein involved in nutrient binding [2]. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.83 0.70 -4.87 151 6618 2012-10-11 20:01:00 2004-11-24 15:06:21 6 10 210 38 1597 6145 784 279.40 17 50.72 CHANGED sEhIa......ththtt.ssssssshshthhhtstss............................tshsthts.........opphhsha............................................................................pspDsRhtt.shhh.tttht.....................................................................................htt.ststshsshhhpKahs.............................ttssstssstsh..........lhRhA-lhLhhAE...Ahh............chuss.......sp.....Ah..phlNp....l...R..pRA..................................................................uhsshs.s................................ssthhp...tlhp.ERthELshE.spR.ahDLhRh.................................................................................tthhpphtthttstpththtttt...........................................................................................lhPIPpsplsts............ssLp..Q.N...s........Ga .....................................................................................................................................................................................................................................................................................................................................-.hh...................................................................................................................................sh.s...h.h.s.............s...p...h.h...p..a.................................................................................................................................t.s..p...D.....Rhth....s....hh.h....t..h..........................................................................................................................................................................................................................................................t...t.....t...t....s..h....s..s.h..h.....h.t.Kahs...................................................ttsstt.t...ss...t.sh...............................lh...RhAElhLhhAE....Ahs...................................c.hsss........................sp..............Ah.....phl.....N.p......l..........R..p..Ru.............................................................................................................................................................s.h...ss...h.s..s....................................................psth.h.p........tlhp.ERphE.Lsh.E....G....t.....R.ahDL.hRh.................................................................................................................................ttth..t..t..h..t.t.h.h...t..t..h.....t.......t..h.t.h.....t...................................................................................................................................................................hhP..I.P.p....s.p.l.p.hs......................st.lh..........Q..N...sua............................................................................................................................................................................................ 1 756 1474 1596 +7812 PF07981 Plasmod_MYXSPDY Plasmodium repeat_MYXSPDY Mistry J anon Pfam-B_3138 (release 16.0) Repeat This repeat is found in two hypothetical Plasmodium proteins. 21.10 21.10 41.10 21.60 15.70 18.60 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.69 0.72 -5.92 0.72 -4.22 24 71 2009-09-11 18:54:15 2004-11-24 15:52:40 6 2 1 0 71 71 0 17.00 79 98.85 CHANGED MYhSPDYTL.hVtLPDT MYFSPDYTLRLVQLPDT 0 71 71 71 +7813 PF07982 Herpes_UL74 Herpes UL74 glycoproteins Mistry J anon Pfam-B_3076 (release 16.0) Family Members of this family are viral glycoproteins that form part of an envelope complex [1]. 25.00 25.00 29.80 29.50 17.30 17.60 hmmbuild --amino -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.54 0.70 -5.79 4 191 2009-09-11 13:31:48 2004-11-24 15:59:50 7 1 15 0 0 100 0 200.00 65 81.24 CHANGED +VLSppGKpt.cKhK.-ILKpLhu.opDhY+FahhPopQ+lhNlslsMccFPpsYILAGPIpNcSITahWFDFYSTQLRKPAKYVYSEYNHTu+KITFRPPSCGTVPSMsCLSEMLNVS+RNsTGEcuCGNFTTFNPMFFNVPRWNTKLYVGSsKVNVDSQTIYFLGLsALLLRYAQRNCTRSFYLVNAMSRNLFRVPKYINGTKLKNTMRKLKRKQAPVKE.spKKSKKSQ.STTTPYSsYTTSTsFNVoTNVTYSsTssspRlsTSTIuYRPDSsFMKSIMTTQLRDLATWVYTTLRYRp-PFC+ssRNRTAVSEFMKNTHVLIRNETPYTIYGTLDMSSLYYNETMsVENETASDNNETTPTSPSTtFQRTFIDPLWDYLDSLLFLscIRNFSLQ.PsYGNLTPPEHRRAVNLSTL ..........................................pVLSphGK.cLcphKlEILKQL...ppDhYphahp.o+QtlpNlThs.MscFPph.Y.ILAGPIpNpSITYLWFDF..YSTQLRK..P.......A.........KYVYSpY.....NHT.A..+pIT.....FRP...PsCGTVPS............................................................................................................................................................................................................................................................................................................................................................................................................................... 0 0 0 0 +7814 PF07983 X8 X8 domain Bateman A anon Pfam-B_374 (Release 16.0) Domain The X8 domain [2] domain contains at least 6 conserved cysteine residues that presumably form three disulphide bridges. The domain is found in an Olive pollen allergen [1] as well as at the C-terminus of several families of glycosyl hydrolases [2]. This domain may be involved in carbohydrate binding. This domain is characteristic of GPI-anchored domains [4,5]. 25.70 25.70 26.30 25.70 25.40 25.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.15 0.72 -3.49 189 1389 2009-01-15 18:05:59 2004-11-24 17:27:23 8 20 213 4 865 1350 0 72.40 35 18.91 CHANGED hhCVs.p.sss.....sspphtshhsasCu......st..s.-CsuI.sssus..........Css......ps+hSashNpYYppp...sp.s.ssuCsFsGsAs ....................WCVsp....s.ss.....spspLpss....lsaACut.........t....uDCss.Ipsuus............Cap.....shts+uSYAhNsYYQpp......sp..s.....sssCsFsGsA......... 0 131 493 696 +7815 PF07984 DUF1693 Domain of unknown function (DUF1693) Mistry J anon Pfam-B_3630 (release 16.0) Domain This family contains many hypothetical proteins. It also includes four nematode prion-like proteins. This domain has been identified as part of the nucleotidyltransferase superfamily [2]. 19.80 19.80 19.90 19.80 19.50 19.70 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.07 0.70 -5.27 11 296 2012-10-02 22:47:23 2004-11-25 10:17:43 7 7 97 0 182 265 0 287.70 57 67.88 CHANGED Lsa-QVpRLcslLsEslPIHGRGNFPTL-lp.+plV......................................................psVRu+LE-p......GlpV+DVRLNGSAASHVLspDsGluYKDLDLIFslsLss-ss.......FphlKDVVLssLLDFLPcGVsKEKloshTLKEAYVQKhVKV..ssDsDRWSLISLSNssG.KNVELKFVDSlRRQFEFSVDSFQIhLDSLLhFhcsupsshoEsFaPTVlGEShYGDFpEAhsHLpp+LIATRsPEEIRGGGLLKYCsLLVRsFRPuspp-lKsLERYMCSRFFIDFPDItEQpRKLEuYLpNHF..............hGt-.cs..................KY-YLMpL+cVVsESTVCLMGHERRQTLsLIohL .......................Lsa-QVpRLcplLsEslPIHG....R....G....NFPTLplp.+pIV......................................................psVRs+Lpcp......GItV+D..VRLNGSAASHVLtp..-s.........G..hu.............YKDLDLIFsl..-..L...s..-tp............FphVK-s.VLssLLDFL....PcG.Vs+cK.......I...os......hTL.KEAYVQKhV.KV....s.sDs.......DRWSLISL...S..Nss...G...KNVELKFVDSlRRQFEFSVDSFQIhL..DSLLhFa......ps....u.....p.....s..Ph.o.-.....s.F....aP..TVl..GEShYGDFpE...AhpHLp.p+lIuTRpP...E...EIRGGGLLKYCpLLVRsF+Ps.s.pt.-............l.K.sLpRYMCSRFFIDFsDls-Q......p+KLEsYLpNHF..................sG.-..pp...........................+YpYLhhL+pVVsE..STVCLMGHERRQTLsLIshL................................. 0 45 59 114 +7816 PF07985 SRR1 SRR1 Wood V, Mistry J anon Pfam-B_29119 (release 16.0) Domain SRR1 proteins are signalling proteins involved in regulating the circadian clock in Arabidopsis[1]. 20.70 20.70 20.70 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.75 0.72 -4.04 38 284 2009-01-15 18:05:59 2004-11-25 16:25:20 7 8 229 0 197 286 1 58.00 31 18.80 CHANGED cl.lClGLGshp.........pshsupaQLAhllhlhchh...pl.....splplY..DPlFsps-t...........phlp.slG ......phlChGLGshs................pshsupaQLAhl.lhlh-th.......ph.....................tps...la..DPlFoph-h...........phLppL........... 0 59 96 156 +7817 PF07986 TBCC Tubulin binding cofactor C Mistry J anon Pfam-B_4111 (release 16.0) Domain Members of this family are involved in the folding pathway of tubulins and form a beta helix structure [2]. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.65 0.71 -4.70 52 603 2012-10-02 17:34:32 2004-11-29 14:13:34 7 18 275 4 417 624 21 115.80 23 26.40 CHANGED spshplpshppCplhl......hsshsslplcsspssplhh.GsVsuSlalcsCpsspllssspQlRh+sspssshalpsso.pPII......EsspslpFus....ashhYstlptphtts.tls..sss.Wsp..ltDF .............................phhlpphppshlhl...................hsshss....lplcsspss.t.llh..G...............s...V...s...sulalcsCcssp..lh.....l.....u..C..p.....Q.............lRl............+ssps.splaLt......s..s..........o.....pP..II.......Ess....ps...l.pFuP............a.sh..hYstlptphtts....sls......sp..Wsp................................................... 0 171 246 344 +7818 PF07987 DUF1775 Bacterial_GLE1; Domain of unkown function (DUF1775) Mistry J anon Pfam-B_12641 (release 16.0) Domain Domain found in bacteria with undetermined function. Its structure has been determined and is an immunoglobulin-like fold. 25.00 25.00 27.60 27.50 24.30 23.90 hmmbuild --amino -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.91 0.71 -4.36 87 551 2009-01-15 18:05:59 2004-11-29 14:53:07 6 4 483 1 162 455 4 130.40 35 57.34 CHANGED HVol...ps..spAssGuat.phsh+VPpts-st.uTsclcVplPps..ltuspspPtPGWslpspcsshttsh....tt.hspsVsplsWouss....lsss..pascFslps.plP...tsssslsFsshQshscG....ss.cWs-.sts....Gt.-s.cp...............PAPslpls ................................................................HVol...ps....spuss.Guat.phsh+VPsEps.s..uTTKlplplPpG..ltht.ps...pPh...sGWphchpKss................sttlp.plsWpuss.....l.ss..papcFshsu.p.P......pctsplsasshQpYpDG.......slVcWs.........Gptcs..cp................PAPhhpl.h....................... 0 42 102 139 +7819 PF07988 LMSTEN Wos2; LMSTEN motif Wood V, Mistry J, Bateman A anon Pfam-B_4851 (release 16.0) Motif This region of Myb proteins has previously been described as the transcriptional activation domain present in the vertebrate c-Myb and A-Myb, but neither vertebrate B-Myb proteins nor Myb proteins of invertebrates. Because vertebrate B-Myb (but neither A-Myb nor c-Myb) can partially complement Drosophila Myb null mutants, this region appears to have been a relatively recent insertion. 19.30 19.30 23.60 26.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.24 0.72 -4.67 6 191 2009-01-15 18:05:59 2004-11-29 16:03:41 7 19 45 2 59 174 0 46.60 72 7.43 CHANGED csNhlphsQsusAhIQppasD.EDP-KEKRIKELELLLMSTENEl+pKp ..............clNIVNl.PQPuuAAIQRHYsD.EDPEKEKRIKELELLLMSTENEL+GQp.. 0 3 7 23 +7820 PF07989 Microtub_assoc Spindle_assoc; Microtubule associated Mistry J, Wood V anon Pfam-B_45034 (release 16.0) Domain This presumed domain has been identified in two microtubule associated proteins in Schizosaccharomyces pombe, Mto1 and Pcp1. Mto1 has been identified in association with spindle pole body and non-spindle pole body microtubules [1]. The pericentrin homolog Pcp1 is also associated with the fungal centrosome or spindle pole body (SPB) [2]. 21.90 21.90 22.10 22.00 21.80 21.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.40 0.72 -4.15 15 383 2009-09-11 00:31:27 2004-11-29 16:39:38 6 11 174 0 233 391 5 72.10 39 5.41 CHANGED TLRE.EpplscL+KENFsLKL+IaFLEEplp..pcss-sscclhKpNI-LKlEltoLc+-lpcpcchLpcAp+slcs .................sh+-.-pp..lscLcKENFsLKLRIaFLEEphp..............pphc.sstcchh+cNlE.LKV-ltoLp+-lpchcchLpcsp+slc.s........................................ 0 56 101 170 +7821 PF07990 NABP Nucleic acid binding protein NABP Wood V, Mistry J anon Pfam-B_10222 (release 16.0) Domain Many members of this family are putative nucleic acid binding proteins. One member of this family has been partially characterised [1] and contains two putative phosphorylation sites and a possible dimerisation / leucine zipper domain. 21.60 21.60 21.80 22.40 19.60 21.40 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.51 0.70 -5.19 10 106 2009-01-15 18:05:59 2004-11-30 09:14:21 7 8 21 0 60 101 0 289.90 33 38.22 CHANGED RVPSPCLsPIGs.RVuusDK+ssuusSsFNssoSulsESuDLluALSGMNLSsouulct.psh.pSQhpQDV-shpsYhFslQGGpspsNQ.Huahp+uDpuah.........................................+usssstsu.hpuspuSshssGuulss.aQp.hDusN.sh......suYulNPAluSMMtNQLGsuNhsPha-NsuAuSuhussuhDSR.hGuuhsSus...tusS-scNLsRlGNph..uGouLQSshsDPMYhQYLphsp.uAsusAshsDPSh-RNah.GoSYMDhLtlQKAYLuuLLt.QKpQYGlPh..KSuusssHuYYGsPuFG..GhuYPGSPLuoPslPsS.huPsSPlR+uEhNMRas...SuhRNh.GGlMGSWHhDs.sshD-sFuSSLLEEFKSNKTRuFELS .................................................................................................................................RsPust.sPlGs.+h.s.-.K+s.ss....pt.sos.hs-ss-lhsALSshshS...ssthh...st...tt...uph.p.plpp.pph..hhsh.ssppph.pp.pthhppsp....tt................................................................................................................................tsashsss...suhhhs..phs.ssshss..h-s..s..s.....uuuh.u..sthsSt..Gssh.st.....s.sp..shsthus.....usuhQs..hDPhYhQYhp...s.p.sstt.....hus.tDP.htps.ah.Gsuahsh.t.....hQKAalt.shh....t.QK.......Q..at.......s......pts..s.sYaGs..s.a...shsYsGSPls...sshhssS.hussS.PhRcs-h.shRhs...SshRNh.u..GshG........uWp.-h.t.ph-tshssShLEEFKoNKo+saELu................................. 0 8 34 48 +7822 PF07991 IlvN Acetohydroxy acid isomeroreductase, catalytic domain Bateman A, Griffiths-Jones SR anon Prodom_2380 (release 99.1) Family Acetohydroxy acid isomeroreductase catalyses the conversion of acetohydroxy acids into dihydroxy valerates. This reaction is the second in the synthetic pathway of the essential branched side chain amino acids valine and isoleucine. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.96 0.71 -4.88 45 4225 2012-10-10 17:06:42 2004-11-30 13:30:19 7 7 3943 20 1058 7052 5249 165.30 49 44.30 CHANGED lL+s.KplAlIGYGSQGHAHAhNLRDSGl.cVllGLRpGu....t.Shc+AcpcGFcVholsEAscpADllMlLlPDEhQspVYcpcIpPNL+cGs..sLsFuHGFNIHas.IpPPcsVDVhMVAPKGPGHhVRcpYpcGtGVPsLlAVa..QDso.GpAh-lALuYAKGIGusRAG ......................................h.lpGKKVAll.G.Y.G.S.Q..G..HA......aAh...NL..R...D........S.......G.l....-......V...s...l..G.L.R.p..G.u...............t....S.a...c....K..A.......c........p........s........G...........F...............c.................V.......h.....o.........l...s...E..........A....s....p...p...A....D...l...l..M...h...L..h..P...D..c..h..Q..u.c..l.a...t..p...c..lp..Pp..L.+pG.s..AL..u...F.u..H.G........F...N....I...H....a........s....t........l...p........s...........p.D.l....DV...hM.......V.AP.K.u.PGHh..V....RcpYp.....c...G.....t.....GVP..sLlAVa..QDs....o.....G....p.......AhslAluYAtulGGsRAG............................................................................................ 0 334 687 899 +7823 PF07992 Pyr_redox_2 Pyridine nucleotide-disulphide oxidoreductase Finn RD anon Manual Domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null --hand HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.67 0.71 -4.44 139 57441 2012-10-10 17:06:42 2004-11-30 13:43:45 9 725 5624 527 15622 68615 26122 259.00 18 57.57 CHANGED cllllGuGsAGhsAAhpht......phshc.lsll.............pp.sthsh.stsslspphhhpstthht.h....t.h.....................................pphh...tp.tslch............hht.ppshpl..shspt.......................t..h..........hphcplllATG.up.sth.....sls.G..................t.h.hh.ptltsupphhphhph............................sp...cllllGuGhlGlEhAthhpphGtcVsllcttsphhsth.sphttthhpphh..................................................hththhttttstpttsssphtththsstpphpsDhlllulGhpPsschh.....cphGlch..........tp..................pGhI..hl-c.....th..pT.......................s...ssslaAsGDsstht. .................................................................................................................................cllllGu......G..s.u...Ghs....sAh.t...hs........................pt.s...h....c....l....s..ll.........................................................pt.....s...............s......h.............h.......s....t....l.....s.....p...t...h...h....h....p..s...h..t.h..h....t.....h..............................................................................................................................................phh.t.t.h..t.......p....hs..h..ph.....................................ht......pp...s...h..pl.........thst.t...........................................................................................................t....t.t....p....h....p.h...c...t...l...l.l....A....s...G....u.......p....s.t.h.....................sls.G..........................................t..t....l.........s...s..t...t...h....h..t.h....h.........................................................................................sp.......pls...l...lGuG.huh-hu.hhtths.pshhh.hht.hh............................................................................................................................................................................................................................................................................h.hh.........tt.t......ttttt..hshhhhhhshhsssp.h....tths.l..p..h..........t.p..........................................................p.G...h...l........h..s......s.p..............h..........p.T......................................................................s......hs..s..laA..hGDsht...t........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 5062 9898 13194 +7824 PF07993 NAD_binding_4 Male sterility protein Griffiths-Jones SR anon Pfam-B_1115 (release 6.4) Family This family represents the C-terminal region of the male sterility protein in a number of arabidopsis and drosophila. A sequence-related jojoba acyl CoA reductase is also included. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.58 0.70 -5.45 102 3739 2012-10-10 17:06:42 2004-11-30 13:44:47 7 268 1310 1 2010 30893 13882 233.90 23 19.78 CHANGED lTGuTGFLGphllccLL..cssssh..clasL.VR.upssp....sutpRl.pphhphslap.h..............tp+lpslsGDlspsp...LGLsspshppL..sppl-lllHsAAsVpast..shpp..hhssNVhGTtpllcLAp.........p.h+ph......shhaVST........uhssstpt.th.c.....................................................htp.thshhtshss.sYshoKhhAEhllpptt...sLP....l.....sIhRPuhlsu...................................-shs.G..hhsshc.hhphlhhsht.tGhh.shhss.t.............hshlPVDhVspul ...............................................................................................lTGuT.GF.L...Gt.h......l....l.p......cL....L....p................p.......s.......s...................h......pl.....h...s..L.....l.R.....s...p..p......t...t............t...s........h...p...R.....l.......p.t..h....p.h.t..h....tth............................................hp+.l.p..s.l.s..G...D..l...s..p.......p.........L...G......L.............s....p....t...p.....h....p.t...L........h........p.................p......l..s........h.............l..l..H......s...A.........A........t.......l.......s.......a.......s.......t..............s.....h.....p...p.............h....h....t....s......N.....l....t....G....T..t....p....l....l....c....l...At.......................p..hpph..........thha...l..So........uh..s....s..s......t..t......t...h..............................................................................................................................................................................t.p....t......t..h..h...t....s..h.s...s....sY.....s.t...o...Kh..h....u....E.......h....l....l.........p......p.......t......t.......................................s........l....s..................s...................sl..h..R........s.uh.lsu.......................................................ps.h.s..G....h.h..s...s...h...s.......h....t.h...h.h.......h..s..h.t.....h.....s.....h..h...........t...h....h...s.p........................................hshlPVDhssph....................................................................................................................................................................................................... 1 595 1051 1665 +7825 PF07994 NAD_binding_5 Myo-inositol-1-phosphate synthase Bashton M, Bateman A anon Pfam-B_959 (release 4.1) Family This is a family of myo-inositol-1-phosphate synthases. Inositol-1-phosphate catalyses the conversion of glucose-6- phosphate to inositol-1-phosphate, which is then dephosphorylated to inositol [1]. Inositol phosphates play an important role in signal transduction. 25.00 25.00 28.60 28.40 20.90 20.70 hmmbuild -o /dev/null --hand HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.93 0.70 -4.92 37 1323 2012-10-10 17:06:42 2004-11-30 13:45:19 7 12 949 36 547 1220 416 337.10 33 89.86 CHANGED ltlhlVGhhGNsuoTllsGlhA.+cshshts............G.lsthushclG...............th.PhhsssshVluGaDI...sshplucshpc....Atshs.slpppltschp.....................shlssspup+hDslhs........tstsphcpspphtpDItcFtcpsslD......phllL.susTEp.........h.......shttl.tshp.sttp.lssSphYAhAul..h......tGssalNusPpssh.sP..uhh-hAccpsl..IsGDDhKoGpTth+osLs.hhlttshcspulsoYNhLGNsDGtsLos.pphRSKchoKSsl.ss.............................................p.hhphphsccssHhssIcYlPhlGDsKhAhDchpuclFhGup.plpl+.sscDShlAuPlllDLlhhsclspc.........................................+shtu.Vhsh.SYahKuPh......stthssh.cQttsl-shlR .................................................................ltlhllGh.GNsuoolhtGl.h.+ps.s.tt..................lsth..php..hG.....................shhs.s..hhuuaDl...ss...h...pl...upsh....tc.....Ahhhp..s.p.pl.t..ph..........................thlssp...ps.p..h.sslht..........................phpt.pt...DlhchhcpsplD........Vll.hssTtp..................................................hssophYAhAul........tsssF.lNu.Pp.hh.sP.......thhchhpc.tsl..IsGDDhKSt.sthhohhs.hhlhtshtsp..hohph.ssssh..Ls..pp.R.cphphSps.s.................................................ts.h..thttcssHhss.cYlshlsDpKhAhschpuphFhss..slphp.ps.DS..uAslllDhlhhsclutc.............................................cuhtu..lhs.h.uahhKuP..........s..hps....pththlcphh................................................................................................................... 0 192 351 473 +7826 PF07995 GSDH Glucose / Sorbosone dehydrogenase Mistry J, Bateman A anon Pfam-B_1863 (release 16.0) Domain Members of this family are glucose/sorbosone dehydrogenases that possess a beta-propeller fold. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.23 0.70 -5.36 164 3525 2012-10-05 17:30:43 2004-11-30 16:29:24 6 176 1857 34 1319 3362 3764 295.30 22 60.71 CHANGED LcpP...Wul......uFL.P....D.G.ph.LlTER.sGclplls.........sGph..pslsGl........P.pV..........hspG.....QGGLLDlslsPcF..sps........phlYloYu......................tss.....ss.ss...sT.sluRucL..sss.....pLp....shcslact...pPt.hss..stHaGuR.lsFs.....s.....DG.pLalohG-Rtp..........cs......pAQDh...ssphGKllRls.sDGolPsDNPFs........s.........ps..............suts.pIWSYGHRNsQ.GhshcspsGpLWspEHGP+GG......DElNllcsGtNYGWPlloaGhpYs....Gs.................lspt..sspsG.........hppPlh.hWsPo....IAPSGhsaYsG...ct.F.P.p......WcGslhlGuL.p.......sptLhRlpl.c.s...............sp....lsppE+lh.........................tshG.RlRDVppuPD.G.tlYllTD.........tsG...p.........llR ...................................................................................................................................................................h.h.hh.s.......s...t.....t......h.hl....s..p....G.t.lhhht.................ttth..............h.........th...................tl..........................t....s.....p.......t...G....l.h.s..l.s.h....t...P.t....a.....t......p..s.........thh.a.h.hs..........................................................t......ts...tlsph.ph...pts...................p..h...p......s......p....s....l..hp.t...............h.Pt......t..........s.pH...hG.sp...lh.Fs...............s..........-.....G...hLa.l..shG-ssp........................ht..............................tu..ps...h.........p.p......h........t...G...p........l.....l.......R...............l......s.......s...............s...............G..................p............h......s....s......NP.h..............................................................................ts....cl.a..uhGhRNs...............G..huh.......s.......s.......t.......s..........u...p......La...........h..s.....-...h..G......sc.th................................................D...E..l....s...h........l........p......t......G.........t......sY.GW.....Ph..h.t..h....s......p.hs.......................s..........................h.tt.....p.htt.........h......P.....h......h......h.....t.sp.....................u.....s.........G.h...h.....h.h....p...u...........p.........h...s..t.............................ap.tthh....huth..t..................s..plh....hh.h..p.s.........................t............h.h.t....t..hh...............................ttht...R..h....sl....h.......s.....c..G...lhl.ss.............................................................................................................................................................. 0 434 842 1113 +7827 PF07996 T4SS Type IV secretion system proteins Mistry J anon Pfam-B_4497 (release 16.0) Family Members of this family are components of the type IV secretion system. They mediate intracellular transfer of macromolecules via a mechanism ancestrally related to that of bacterial conjugation machineries [1][2]. 24.70 24.70 24.80 25.00 24.50 24.60 hmmbuild --amino -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.17 0.71 -4.45 50 544 2009-01-15 18:05:59 2004-12-01 12:38:11 6 3 398 1 96 471 20 189.20 20 79.43 CHANGED GIPVhDsus........hhpt....hppltphtpplpp..............hppQlpp.hcp.Ypu....loGscshGslh...sssshppslP.psapslhsth....sshuuluupsptlhpttphhss........tsstppptsptthspsststuhuppsYspsppRlsplppLhppIssspDsKshuDLQsRItsEpshlQs-ps+Lphhphhtpupppltpppppc .............................................GlPVhDsss..............htph..........hpphtphtpp.lpp.................hppQlpp.hc.......pphpu...............hT.Ghp..sh..sshh.........ps..sh.pphh..s.....ps.hps.l..hssh.......tss.uuhusp.hp.ph.......hp.p..phhs.....................h.spsttptspt.ph....ph...sts.h...s.h..spps.....hppsppchpplppLhpplspsp....D.K..thtDLpsplpsEpshlpsp...php....hph.hpthtps.pp.ph.pppt...................................................................................... 0 9 46 67 +7828 PF07997 DUF1694 Protein of unknown function (DUF1694) Mistry J anon Pfam-B_4517 (release 16.0) Family This family contains many hypothetical proteins. 25.00 25.00 26.90 32.50 24.60 21.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.25 0.71 -10.41 0.71 -4.17 33 780 2009-01-15 18:05:59 2004-12-01 14:05:26 6 1 736 2 85 413 0 120.90 33 82.58 CHANGED Mo-p....lccpLppuhaG..ssphcPDE+RpaLGoaRERVllulThsplppppsh..pplpptLpc....hps..hplhlNGplshs.hhspYl+lAsctslpaTlVsspptpo....shGlVls.ucpAlsp-plpl ....................Msc.....lpcpL.cthaG..s.plsPDEQR+YLGTFcERVhltlslspspssplp..pthhphLcs......tps....lplhlssplshs.ttshYlKhApcpssphTIVscpphpo.......shGLllp.uspAVsh-ph-l.............. 0 27 50 65 +7829 PF07998 Peptidase_M54 DUF1695; Peptidase family M54 Mistry J, Bateman A anon Pfam-B_4509 (release 16.0) Family This is a family of metallopeptidases. Two human proteins have been reported to degrade synthetic substrates and peptides [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.41 0.71 -4.54 5 267 2012-10-03 04:41:15 2004-12-01 14:39:37 6 3 198 3 176 401 24 127.40 29 50.09 CHANGED M-cIAFlYMGGcEacWLFFEVYDRVcRal+DVsLsVslVYAGRIKLPPGhLlRVpstsGalsMYsFEAVVEALYGKLVEM+sDVNDDSlTKIFGITTlPIGSRDpYFDIYKKYLGIpVslGNYsVLuLS..IKPFYTE..N+ELFlERVFKGVLHElGHLYGLSHCss.DCVMNPPsDL+DWD+RuPoYCNsCL+cLKR .........................................................................................................................................................................................................................................................................h.lh...-ha..........hp..hhhG.....s......thslh.h............h..........................s.t..p..l..h.....h...pRshKps.sHElGHhhGLpHCpp...pClMph.S..so...lp...-sDp+s.p.h.Cs.Chp+Lt...................... 1 64 100 138 +7830 PF07999 RHSP Retrotransposon hot spot protein Mistry J, Bateman A anon Pfam-B_4567 (release 16.0) Family Members of this family are retrotransposon hot spot proteins. They are associated with polymorphic subtelomeric regions in Trypanosoma. These proteins contain a P-loop motif. 19.50 19.50 19.50 19.60 19.20 19.40 hmmbuild -o /dev/null HMM SEED 439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.68 0.70 -5.89 3 829 2012-10-05 12:31:09 2004-12-02 10:36:38 6 4 10 0 349 862 1 307.10 29 57.65 CHANGED YDSIYNA+WuYVMSGaNsEPLGMKVa...DGEPPchWTcEEVsVSHTP-DhsEP..LPR+GNLEIAVLTSQ+GWPashFh..pcllscDAsc.t..DaVFs.DVYIRREVhRVWalVKpRLspWLpS+Lp-+psPs..VLlGTPGIGKShuVGSFLLY+LLHYDAELLpIIAYsV+G.KAYVFaKsTss+sGpVTFYpchssulcAVc-LuR.....EslKGYIIYDVGKchcpPuPhsPPsGWusIVLoSPDhssYhEWucp+RAlRIhINCs-EsDLKAhsl..W+Klups.t.sPo-A+..plEsEWQEIcGRIcKVGPLLRaIl-.-uSYKtplcKIcEAlsEhScsccpSYhpVFssuspWcscKsochLARlVRV+sE.NGEpCcN...sPlSAYluQKhLshLRsWlspAphNcasuRpulRusttHAA.pFEKsGIaAFoplNsltsIs++LRtLP .............................................a-SlhpApWpaV.h...p...s........s.....t....................h....G.M.tVh.....t...G...p..............p........Wo.tps..s..............s..............p....t.c..s....p......................t........h..h.hVLoSphGWPat............................................t...c....salppE..RVW.lVcttlstW...h......t...t...............s..h..lll.GTPGIGKShusGShLLapLLHap.......st.L........hVsahh.ts.psalh.ptt.t....pV..hY.tt..shp.lpth.t......................tth..c.GalIhDhstt...........s...........s...t.W.uhlllosPp.ppapta...p.p....t..s....l.hhNC.pt.-h+Ahhs...W.......p.h.t...............................................................t.....................................Wp....l....ctR.hp.VGPl.Ralhs..t.t...a.ttchtthtthlt.h........ht..h..hh.......h..pt...p.lhclVp...t................s..........hs...h........................................................................................................................... 0 183 223 349 +7831 PF08000 bPH_1 DUF1696; Bacterial PH domain Mistry J, Bakolitsa C, Bateman A anon Pfam-B_4657 (release 16.0) Domain This family contains many bacterial hypothetical proteins. The structures of Swiss:A1SD03, PDB:3hsa, and Swiss:A3QB43, PDB:3dcx, show similarities to the PH or pleckstrin homology domain. First evidence of PH-like domains in bacteria suggests role in cell envelope stress response [1]. 21.50 21.50 21.50 23.30 21.30 21.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.41 0.71 -4.49 55 796 2012-10-04 00:02:25 2004-12-02 12:40:12 6 3 653 16 178 567 35 123.10 36 88.40 CHANGED MGlhsulhGsuutl..s.sclpcchsslLl...suEplphuaKllRDhhlFTsKRLIllDpQGlTG+Kl-apSlPY+u..IopFSlETA.GpFDLDuELKI.WlSupshPlp........hphp+stslhclppsLAphlh ............................hhpGlhGsuo.h..ssp.p.hpc.cl.t.clLl...csEpl.hu....aKhl....RD....hhlFTs+RLIllDtQGlTGKKspa+S..l..PY+u..IspaSlET....A....G.p.F.D..L..DuELKI.Wluut.p.hslp.........hpF.cp.spslhslppsLsphh.............................................. 0 63 119 152 +7832 PF08001 CMV_US CMV US Mistry J anon Pfam-B_4698 (release 16.0) Family This is a family of unique short (US) cytoplasmic glycoproteins which are expressed in cytomegalovirus [1]. 21.50 21.50 22.90 24.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.75 0.70 -5.31 8 91 2009-01-15 18:05:59 2004-12-02 13:34:17 6 1 12 0 0 75 0 218.80 30 93.92 CHANGED Mpl....hhshlhlsshsAlushpsshcE.uhph...hspssstttsR...........phthPPhP.......pt.pVpSpsupCVlcc.GsLsAlWplRGsF.....hP+....shspatsccocthhcltsPct-lsss.hs..lRa.....pVssclsaVhLplhPCppCppha.cCcPphclPWlPhhoShc.DlcRLaaEpRaLplhaVlhlphhthsLL...shhsApslathplthalR.RHt.hh...........ss.p.hp..tcpt .....................................................hh.hhhhhshhslsh......-.shph....ppsps.ptpc..........hphpLP.hP........pt.pVsSppupCs.lcs.GsLsAsWplcGs...F.....sP+.....shsphhtccust.hh+V.sPphps-.ss.hs..lRh.....pV.s...sclshVhlplhPC.ptCps.a.cCcPphpl.PWlPh.hsS.hp..DlcRLaaEcRaLphhaslhlphshhsLL...hhhlhpshahhhlt.al+.hph..........................s......................... 0 0 0 0 +7833 PF08002 DUF1697 Protein of unknown function (DUF1697) Mistry J anon Pfam-B_4800 (release 16.0) Family This family contains many hypothetical bacterial proteins. 23.30 23.30 23.30 24.20 21.00 23.20 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.13 88 961 2009-01-15 18:05:59 2004-12-02 15:33:07 6 2 861 4 233 734 24 130.00 31 75.33 CHANGED MspYlALLRGINVGGps+ls.Mu-L+shhpshGassVpTYIpSGNllFp.....sppstsplppplcptlccpaGhsssVllhotpclpplhsssPa..tt...tps....pphhlhFlppshst-.thtpltthpst..Echthss...psl.Y ........MtpYhhLLRGINVGG+N+ls.MA....-L+phL.psl.G.h.ppVc..TYIsSGNllFp.....op.......pstspltpclcshlpppasa.t...hshh.lhohp-hpthlpsh.Ph.hpt........-h.......tchplhFhspshshc.....lt...p.l.ts..h.p....hts..Etlhhspt.h................................ 0 76 156 197 +7834 PF08003 Methyltransf_9 DUF1698; Protein of unknown function (DUF1698) Mistry J anon Pfam-B_4787 (release 16.0) Family This family contains many hypothetical proteins. It also includes two putative methyltransferase proteins, Swiss:Q8EEE6 and Swiss:Q88MX8. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.89 0.70 -5.73 11 1216 2012-10-10 17:06:42 2004-12-02 16:27:41 6 2 1182 0 199 2993 1047 295.80 54 95.83 CHANGED FYQpIAp.ssLpcWLpTLPsQLssWp.pppHGcat+Wh+sLcpLPphpP-plDLpsu.VoscpcpslupGEpcc.LcplLRthMPWRKGPaslaGlHIDTEWRSDWKWDRVLPHlSPLpsRolLDVGCGsGYHMWRMlGEGAphsVGIDPopLFLCQFEAVR+LLGsDpRAaLLPLGIEQLPt.LpAFDTVFSMGVLYHRRSPLDHLhQLKsQLVpGGELlLETLVI-GDEssVLVPs-RYAQM+NVYFlPSAtALpsWLcKsGFsDVRlVDpslTos-EQRpT-WMpsESLsDaLDPpDsoKTlEGYPAP+RAlllA+K ...............................................................................................hYp.IAp.s.Lp.WLpoL.PtQlst.Wp...pptH.G.htpW...pslch.LP.p...........l.p..P....p.l.D...L..h.p....u.....Vs.scsppsLo..tG..p..h..cc....lcslh+s..L....hPW..R.K....G...PFp.L.....a..G...................lp......I....D..T...E.W....R.....S..D...W...K..W...D.R.V..L..P...........H......l......S...........s....L......s....G...R...o...lLDVGCG...uG....Y....H...h..W...R.M...l..........G...t...G................A..+..h...s...V.G..I..D......P...o....p.......L........F.............L...s...Q...........F..E..........A....l.R...K.........L..L....G.......s......D.......p...........R.......A..+...L..L...P.....L......G....I...E..Q.....L........P.....s.....L..........p.......A......F..D.T.V...F....S..M.G....V...L..Y..H......R.....R..S..P...L.......-..H....L...h....Q.L....K....s.Q.LV..stGE.L.V......L.........E.......T..........L...V....l-............G......D.......c.....................s......s....V......L.....................V....P....s.....D..............R........Y....A...Q......M.......R.......N.....V.......Y......F......I.P.....S.s.A..LKsW...L...cKsGF.h..D..l..Rl.lD...s.s...l....T....os...-....E.QR.+..T..-WM..s..sc.SLsDF...LD...PpD.oKTlEGYPAPpRAlllApK........................................................................................................... 0 46 104 155 +7835 PF08004 DUF1699 Protein of unknown function (DUF1699) Mistry J anon Pfam-B_4896 (release 16.0) Family This family contains many archaeal proteins which have very conserved sequences. 29.20 29.20 30.40 30.30 28.60 29.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.56 0.71 -4.62 13 59 2009-01-15 18:05:59 2004-12-02 16:44:29 6 1 11 0 59 60 0 127.60 53 95.70 CHANGED MKIRVVSSR-EItsLNsNE+lVHLAFRPSN+DlFpLVcoCP+lElIQlPpSYh+TlSKSIcMFLEMQpIpLlEGDVWGHRKDINEYYsVspsVI-+IpEh+sEGhSsEcIsEKls+EoKLuP-MltYILsp .........M+IRVVSS+-EI.sLNsNE+lVHLAFRPSNpDlFsLVcpCPclcslQlPpSYh+TlS+SIcMFL-MQtIpL.lEGDVWGHRKDINEYYplspsll-+Ic-L+s-GhosE-ItcKls+EoKLsP-hltYIlp.p............ 0 12 35 40 +7836 PF08005 PHR PHR domain Stogios PJ, Finn RD, Mistry J anon Stogios PJ Domain This domain is called PHR as it was original found in the proteins PAM (Swiss:O75592), highwire (Swiss:Q9NB71) and RPM (Swiss:Q17551) [1]. This domain can be duplicated in the highwire, PFAM and PRM sequence.\ The C-terminal region of the protein BTBD1 includes the PHR domain and is known to interact with Topoisomerase I, an enzyme which relaxes DNA supercoils [2]. 20.80 20.80 21.00 21.20 20.70 20.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.94 0.71 -4.05 18 549 2009-01-15 18:05:59 2004-12-02 17:17:51 7 40 122 5 326 477 0 142.80 41 14.32 CHANGED sRFpps...uspWsauup.sDuIpFsVD+c.IhlsGhGLYG.....up...u-YpsplcLh.......................................pcpsclLupscssa.s-u.ssssaclhFccPVplpsshhYsspAplpGsc.SthGssGhppVpssc.......VsFpFpsustusNGTsVpsGQIPEllaYs ............................pRFpps...uspWtYpGp..sDuIp.....F.....uVD+c..IhlsGhG........LYG............ut......u-YpscIclh.........................................................................................cpsssLupspsta...hsDu....suss..atlh..FccPVplpssshYsAs.s.h...l...pG.s.c...Sh.hGpcGhspVpsss.............VsFp...Fp....s.u.st.Ss....N.GTsVps.GQIPpllaY.................................................. 0 93 117 219 +7837 PF08006 DUF1700 Protein of unknown function (DUF1700) Mistry J anon Pfam-B_5023 (release 16.0) Family This family contains many hypothetical bacterial proteins and two putative membrane proteins (Swiss:Q6GFD0 and Swiss:Q6G806). 28.30 28.30 28.50 28.50 28.20 28.10 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.97 0.71 -4.98 18 1443 2012-10-01 22:34:14 2004-12-06 14:55:25 6 16 1095 0 127 829 10 161.70 25 81.69 CHANGED MsKppFLpcLcppLcplPccE+c-ILt-YEpHFh.GtpcGKoEcEIhccLGsP+pIAKElpAp.sIccs...........cppsohpNlh+AlhuslGLulhNhhlllhPhlhllsl...llulhlsuhsh......lhuPllLlhtulhsGhhphhhs..........lFhuIshsGlGlllsllsahlsKhha+Lhl+YL+WNlpllKG .............................................................Ms+pcaLppLcthL..c.p..L.Pcp.-.p.p-h.hp.YccaFtp.t.t....pG.sE...p-lltpLGsPcp.lAcE.l........h..u...p.....h..t..l.p.pt.....................................p.t..p.....s........p.......p.........h..............p....s..l............h........s....h.........l......u......L.....s.....l...............h..s.............h....h.....h.....l.....h..h..h.....h..h.hh.hh................ll.u.hh.h..h..sh.h.h..............l..h.s.s...h...h.h....l........h..t...s...h...h...h...hh......................lh....h.....u....l..h...h....h..u....hu..h..hhh...h...h..h.............h....hhchh.hhhhhth...h.a...................................................................................................................................... 0 46 82 107 +7838 PF08007 Cupin_4 DUF1701; Cupin superfamily protein Mistry J, Bateman A anon Pfam-B_5011 (release 16.0) Family This family contains many hypothetical proteins that belong to the cupin superfamily. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.29 0.70 -5.19 26 1723 2012-10-10 13:59:34 2004-12-06 16:31:36 7 28 1356 7 547 2097 365 287.10 32 65.62 CHANGED scpFLccaWp+KPLLI+puhs.....thts.hsss-.LttLupccclsu....Rll...spctspWpsppGPhpp......apphspptWoLLlQulspat.sstpLhpsFcFlPp.WRlDDlMlSa..........ussuG..GVGPHaDpYDVFllQspG++RW+lsp......pss.tphtsc.slphls.h..cshh.DhlLcPGDlLYlPsGhsHpGhu.s..pslsaSlG.hRsPsht-l...............................hsphs-hLhpph.sh...................tthpcst.psh.pssstltss.th.stltthlppllp..sssphtphhsphlsps..phph-.lh...s.h...................ptss...ltphh.csGshL.+pssh+ .............................................................................................................................................h..pFlcc.a.W.Q.K.+.Pl.l........l...+..............p.u.hs..........sa.h....s..s......l.....o.s--.....L.....s.s.L...Ah-p.-lco..................RLl.......sp.p......s.s...p.....W.....p...s.p.pG.P.h...p..p.....................asp.L.s....c.....p.....s....W.o.........LL.VQ.ul..spa.........t.s.s...........t......t.Lh.psFc.....h..lPs..WR.lDDlMlSa..................usP...........GG.....GV...G..P.HhD..p........Y..D...V..F...............l.l..Q......upG.+.RR......WRlGp..................................ph.p....p.p.h......p.....s.....p.......s.....s.l....h.......l.c.....sa...............-s.lh.....D........................L.E.......PGD..lLYlPPGasH.pG..huh-.........suh..saS...l...G....a..R..u...P...s...s..p..-..L....................................................l.s..shs.-a..lhppthst...................thapD.s....t...h....st....tp....s.u....pl....s...p....th...sp....l.....p..phhhph.lp....ps.p.p.h..p.p.ahG.phlops..+hphc......lh......h.....................p.tc......l.phh.tpG..l.t..................................................................................................................................................... 0 189 314 431 +7839 PF08008 Viral_cys_rich Viral cysteine rich Mistry J anon Pfam-B_4965 (release 16.0) Domain Members of this family are polydna viral proteins that contain a cysteine rich motif [1]. Some members of this family have multiple copies of this domain. 25.00 25.00 27.00 27.00 23.70 22.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.05 0.72 -4.41 15 41 2009-01-15 18:05:59 2004-12-06 17:12:16 7 4 10 2 0 37 0 80.50 32 56.19 CHANGED phcPsCIs..NacsChtosKPCC.ctpp.opGthsscEalCtcFsuGlCpPlpsIpNlphahELlccLN-TNFpELcppYap.sltpsts ..............t..pssCIs..shp.C..h..pos+PCC.p.pt.p.sphhsc-alChh.FGpG......lCpPlpslpNlphahpLhcplNpTNatELptpYht.sl....p....... 0 0 0 0 +7840 PF08009 CDP-OH_P_tran_2 TOM13; CDP-alcohol phosphatidyltransferase 2 Mistry J anon Pfam-B_51131 (release 16.0) Domain This domain is found on CDP-alcohol phosphatidyltransferases. These enzymes catalyse the displacement of CMP from a CDP-alcohol by a second alcohol with formation of a phosphodiester bond and concomitant breaking of a phosphoride anhydride bond. 21.50 21.50 21.50 21.90 21.40 21.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.10 0.72 -4.22 14 224 2009-01-15 18:05:59 2004-12-07 09:29:33 6 2 222 0 63 151 42 38.20 40 13.83 CHANGED Pc.LlLslhlhlslhhAhLIshPWhsLolhullYlholP ........lVlPlllhVslhlAhLlsaPWhoLulsulhYlh.LP 0 16 34 43 +7841 PF08010 Phage_30_3 Bacteriophage protein GP30.3 Mistry J anon Pfam-B_5273 (release 16.0) Family Proteins in this family are bacteriophage GP30.3 proteins. Their function is poorly characterised [1][2]. 28.20 28.20 28.90 52.80 27.30 28.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.69 0.71 -4.71 5 72 2009-09-13 17:33:30 2004-12-07 14:05:45 6 1 71 0 3 50 3 149.50 59 98.28 CHANGED M...DI+SGuuYPSCALSNFAPHsFVhDGVcCASMEGFLQSLKFKNPEMQc+VCuLVGKAAKF+GpKKsWaRsQ...TLYW+GlPhpRpS-AYQpLl-NAYsElu.QNcGFR+ALpAT+sooLTHSMG+s.KpsETVLTEpEFlssLsRLRDpL .......MSEL.EIRSNF.pWPSCA..LSNFApWPFVMDGIQFGGLEGFLQGCKVKNVE.QQR.RIFGL.SGL.AAQQ...sG..Ru..YARAQD...RGTLFWLGlPFSRYSsAWKELYTNAYFEAAlQN+GFRDALpASKGKlLKH.SMASuLTKcDTILTEuEFIDlLNhLRDpL.... 0 0 3 3 +7842 PF08011 DUF1703 Protein of unknown function (DUF1703) Mistry J anon Pfam-B_5377 (release 16.0) Family This family contains many hypothetical bacterial proteins. It has been identified as a member of the PD-(D/E)XK nuclease superfamily through transitive meta profile searches [1]. DUF1703 has the predicted secondary structure pattern of the restriction endonuclease-like fold core and contains an additional beta-strand at the C-terminus [1]. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.37 0.72 -4.30 45 1376 2012-10-11 20:44:44 2004-12-07 14:49:26 6 10 264 0 217 1284 64 98.50 27 20.59 CHANGED thEsaYpsllYuahu...shsaplhsEsposcGRhDlhlp.......s.tshhalhEFK....tt.pptssp........cuLpQI+c+uYuppapsps...cplhplGlsFsscpcslhph .........................t.tEtaYpshh.hshht....hh..sa..hlpsEhcoupG.R........hDlhlp...........p...pch...sa...l.hEhK..............h..pts.sc....................cAlpQI.ccKpYstthptps..........pplhtlGl.sFsscptpl......................................................... 0 109 189 212 +7843 PF08012 DUF1702 Protein of unknown function (DUF1702) Mistry J anon Pfam-B_5312 (release 16.0) Family This family of proteins contains many bacterial proteins that are encoded by the UnbL gene.\ The function of these proteins is unknown. 25.00 25.00 123.50 123.20 19.80 19.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.93 0.70 -5.81 23 78 2009-01-15 18:05:59 2004-12-07 14:53:09 6 1 51 0 27 85 2 312.60 42 96.17 CHANGED Ms.shtslRRRlLTPsls....-TphssRGF+hKsssA+cpLEoVGpsFLpGYuaAlEARsss-scptLEplPschRGFAYEGAuMuhulLDuLsssuttR..........lsshLAGc.GstHsYMlaVGlGWAMARLP.RhhWssltss......DPLLRWLsLDGYGFHQAYF+TsRYVcpppRcpsasW..s.spscYssRAlDQGIGRALWFVsGoDP-hVAshlcpFPtsR+uDLauGlGLAATYAGGusssELptLtctAGtaRssLAQGuAFAAEARh+AGhlssHTclAsplLC.GhosppAAslspcsRsssss.sGshPA.YEsWRpcIAsphss ..................httlR+hlLsPsls....psphstRGF........phcss.su.pcpLEslGpsFlpGataAl-up.s.s-spppL-t.ls.schRGFAYEGAuMuhslhDuLss.s..ptpR..........htshLsGt......GptHsYhsYVGlGWAMARLP....RhhWtclhss...................cPLL+WLslDGYGFHpAYF+Ts+aVcppttss.asW.t..s.ssYssRAlDQGIGRALWFltGsDsstVushlppFsssR+uDLauGlGLAATYAGGssts.-LptLtctAGpa..pspLAQGuAFAAcARh+AGhlssHTclAsplLC..GhosppAAplsc-stss....sss.....su.shPA.YEhWRpcItsph.s... 0 11 19 24 +7844 PF08013 Tagatose_6_P_K Tagatose 6 phosphate kinase Mistry J anon Pfam-B_5149 (release 16.0) Family Proteins in this family are tagatose 6 phosphate kinases. 25.00 25.00 25.00 25.60 24.50 24.30 hmmbuild -o /dev/null HMM SEED 424 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.49 0.70 -6.04 31 1086 2012-10-03 05:58:16 2004-12-07 16:20:22 6 5 720 5 98 655 63 386.40 57 96.98 CHANGED Mps..L..sLlpp+KsGcshGIhSVCSAHPLVlEAAlcpAhpss.ssVLIEATuNQVNQhGGYTGMTPuDF+caVhplAcchGaPp-+llLGGDHLGPNsWpchPA-pAMspA-sLIpAYVtAGFpKIHLDsSMuCAsDP..ssLsDphlAcRAARLsplAEps....h.pphGtpsslYVIGTEVPlPGG..AtEsLs..plpVTsspAAtpTlpsH+pAFtptGL.ppsasRVIulVVQPGVEFcHpsVlcYcsppApsLophlcs.PthVFEAHSTDYQospAhppLVcDHFAILKVGPuLTFALREALFALstIEsELl...sstppusLtpshEplMhcpPp.WppaYpGssspt+ltR+YShSDRIRYYWscPclpsAlcpLhsNLsspslPLsLlSQYLPtQapulppGcLsscPcsLllc+IpplLpsYttACps .......................................hppllt.pHKAG.c........phGIhuVCSAH.PLVlEAAlpaspssp.p.lLIEATSNQVsQ..F...G.GYT.G.M.TPADFRpFVhplADplsFsp-.tlILGGDHLGP.....Np.....W.....Q.....p.......sA.s.....tA.Mtpus-LlKuYVtAGFpKIHLDsSMSC.t-DP....lPLs..s-..hV..AE..RAAhLs.....tsAEps.....s...pch.....tc..t.....pLsYV.IGTEVPVPGG..........tt.......ps............lp..p..ltlTps-sAtsTLcsHp+AFttpGL.sthhsRlIAlVVQPGV.EF.DHoslIcYQPtcApALuphl-.pht..hlaEAHSTDYQT.puhppLVtDHFAILKVGPALTFA.............LREAlFuLAtIEpELl.....ss...c...t...p...StlhpVlEcVMLc..cPpYWppaY+ss...h...ssphL......s.........h....t....YS...hS....DRl.....R........YY.WPcspIcsuhtpLhtNLt.sssIPL.slISQYLPhQa....+lppGcL...p...s...tP+pLIhs+I.DlLttY+huCt.t.................................................................................................................................... 0 21 45 68 +7845 PF08014 DUF1704 Domain of unknown function (DUF1704) Mistry J anon Pfam-B_5490 (release 16.0) Domain This family contains many hypothetical proteins. 21.80 21.80 22.80 22.10 20.70 20.20 hmmbuild -o /dev/null HMM SEED 349 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.88 0.70 -5.42 35 415 2009-09-10 23:48:06 2004-12-07 16:57:52 6 2 337 0 154 362 93 328.70 31 67.85 CHANGED +ccFhpsphphtPpapY+.......LshDstph+cpLasl.l-plcD....sslppLacchhcphsthlchLpshGo..ccFhatSlclYGsPscphhssAchlLchs.............thtcppscphsAppAschhppphcpa.....thpscVplSsslsAcAhVuusp...lhlspsshFScp-lpsLtpHElGVHllTTlNGppQP.L+hLShGhPssTtTQEGLAlluEahoGuhohpRL+pLAhRVlAl-thhcGtsFh-sFphLpcphshscccAFslTtRVaRGGG......FTKDhlYL+Ghhclhshh+p.hss.....ls.LhsGKhulcclsllp-LhpcGlLssPcalP.........hhps.sshsshhsa..hlsslc .......................................................................................................................ptF.tt..p.....Pththt.......hshss.t...hptph.tl.hch.pp..........ssltphhtp.hppht.hhphLp..sh........Gp...tFh..S.phYG..t.sscthhtss.hlhphs........................t..ppc.phhsuppsh..p..hh.pphppah........h..phpVpho-sllucA..h..suusp......lplss.....psh.ascp-lpsLhpHEhhVHlhTslNG.....ptQP..h...p....h....h....u....hG.......h.........Psss.....sTQEGLA.lLtE.h.l.s.t.s....hpRh+pL....shRVhA.ls.hh......pttsFhplFphlpcp.....sh................stpsuashssRVaRGussp.........s.sFTKDhlYLcGhlpllpahpp..hpp................hshL.h.sGKsohcDlchlcp..LsppGhltsP+ah.........thhc.hptLpshhpa..hht...p........................................... 0 31 54 100 +7846 PF08015 Pheromone Fungal mating-type pheromone Lee SC anon Bateman A Family This family corresponds to mating-type pheromone proteins. The homobasidiomycetes, or mushroom fungi, have arguably the most complex mating system of all known organisms. Many species possess a mating system known as bifactorial incompatibility, where two unlinked loci control the mating -type of an individual incompatibility loci (the A and B mating-type loci). Each A mating-type sublocus encodes a pair of divergently transcribed homeodomain transcription factors while the genes responsible for B mating-type activity encode lipopeptide pheromones and G-protein -coupled pheromone receptors [1]. 21.80 21.80 21.90 23.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.50 0.72 -3.15 38 68 2009-01-15 18:05:59 2004-12-08 15:46:22 6 1 9 0 20 59 0 61.10 24 97.63 CHANGED MDs..Fss.lshh.........................................h.ssts.......sssstsssphpshssDtERhssG...shsuaCVl .............MDs..Fso.lshh.................................t......tssss.s.......sssstssssh.slPsstE+hsuu...shsuaCVI........ 0 18 20 20 +7847 PF08016 PKD_channel Polycystin cation channel Bateman A anon Bateman A Family This family contains the cation channel region of PKD1 and PKD2 proteins. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.31 0.70 -6.06 13 1249 2012-10-03 11:11:44 2004-12-08 17:04:05 7 121 184 0 812 4460 432 268.20 19 25.62 CHANGED shuhtslsphpslapahppsLlstl.....puspo.tt.stp....pshlLGsPRLRQlRlcss...hhh.c.hhpphht....Cpsshu.sspDp..a...W....cthspsssthhhYps...........sspL..............shtpaGhlssYs.ouGYhh..Ls.tstppotctlstLpcppWLDppTRAlFl-aohYNsssNLFssloLlhEhPssGsslsshplcohsLhcassshsh.lhlh.llallhhlaashsEhhpltpct.hpYl+.osWNhL-hsllslsslsslltlhRphhssphhpphh.ssstsFhsFcpluphsphhpsltAhLlFLshlKlh+hlpFspshplhopTLpcuhc-lhuhulhlsllhlAYuthuhLlhGophssasshscuhlol.....hphlsssFsas......th.pss+hLGsLhasshhhlhhalLLNlFlulIp-sYspl ......................................................................................................................................s......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...t....h......h..u.....h...h...h.....h....h...h.l.....p....h...h...c...h.l......p...h.......t...p...h......s.......h....h...s......l...p.t...s...h....p...l..h...t.F.h.h..hh..h..ll..h.....hu.a...s..................h..u.....h..........l....l............h...........G...............s..............................h.............................p...............a...............p.............s.....h...p...s.h.....sh..........................ht.h...h.........u....s.............a.t..............................................th.......t.......h...t...............h....h.....u...........l...h.....h..hsa....h....h.h.h....h....h.....hl.ls.hhlull.tsapt.............................................................. 1 330 402 599 +7848 PF08017 Fibrinogen_BP Fibrinogen binding protein Mistry J anon Pfam-B_4323 (release 16.0) Domain Proteins in this family bind to fibrinogen. Members of this family includes the fibrinogen receptor, FbsA, (Swiss:Q8GIU3) which mediates platelet aggregation [1]. 27.30 27.30 27.40 27.80 26.90 27.20 hmmbuild -o /dev/null HMM SEED 393 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.92 0.70 -13.56 0.70 -6.26 4 85 2009-01-15 18:05:59 2004-12-09 16:47:47 6 15 47 0 45 87 8 228.40 17 25.45 CHANGED YMGVLGSTIILGSSPVSAMDSVGNQSQGNVLERRQRDAENRSQGNVLERRQRDAENRSQGNVLERRQRDAEN+SQGNVLERRQRDAENRSQGNVLERRQRDsEN+SQGNVLERRQRDsENKSQGNVLERRQRDAENRSQGNVLERRQRDAEN+SQGNVLERRQRDAENRSQGNVLERRQRDsENKSQGNVLERRQRDsEN+SQGNVLERRQRDAENRSQGNVLERRQRDsEN+SQGNVLERRQRDsENKSQGNVLERRQRDAENRSQGNVLERRQRDsEN+SQG................................NVLERRQRDAENKSQVGQLIGKNPLLSKSIISRENNHSSQGDSNKQSFSKKVSQVTNVANRPMLTNNSRTISVINKLPKTGDDQNVIFKLVGFGLILLTSRCGLRRNEN ............................................................................................................................................th.tt.p..tph.....tp..t.ptphttpt....ptph.t+tp.tph.p.t....cs..p....tph.....t.+tptphtt....+tptph......t.+t...ptphttc.s...pt..ph.t+tpp..phptps.......pt........ph.t+tpppspt+sp....tps..t+tpppstppsptp....s.t+tptps.t................ptptph.tptptphttptptph......tptptph.tptpt........ph.tptptth.t.pt...tth....p.th....ptth.....ptth.............................................................................................................................................................................................................................................................................. 0 16 19 38 +7849 PF08018 Antimicrobial_1 Frog antimicrobial peptide Lee SC anon Bateman A Family This family includes antimicrobial peptides secreted from skins of frogs. The secretion of antimicrobial peptides from the skins of frogs plays an important role in the self defense of these frogs. Structural characterization of these peptides showed that they belonged to four known families: the brevinin-1 family, the esculentin-2 family, the ranatuerin-2 family and the temporin family [1]. 21.20 21.20 21.30 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.51 0.72 -6.65 0.72 -4.04 26 221 2009-01-15 18:05:59 2004-12-09 17:23:23 6 3 48 0 1 216 0 23.70 54 39.09 CHANGED FLPhlhulAAphlPplhCsIoKKC .....FLPhlAulAAphlP+laCtIoKKC... 0 1 1 1 +7850 PF08019 DUF1705 Domain of unknown function (DUF1705) Mistry J anon Pfam-B_1101 (release 16.0) Domain Some members of this family are putative bacterial membrane proteins. This domain is found immediately N terminal to the sulfatase domain in many sulfatases. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.62 0.71 -4.72 114 2318 2009-01-15 18:05:59 2004-12-10 10:47:59 7 4 1307 0 257 1329 103 149.40 26 27.51 CHANGED hlhshhshl...hslhuhtalhKPlhhllllhuAhssYah.sYGllhDpsMlpNlhpTssuEAtsllohphllalllhGllP.uhllhplcl...phpshh+tlht+hh....hhlsulhllsslshhhapsauSlhRNp+.pl+thlsPsshlhushpahppphhpp ...............................................l.hh.hh.hl...ls.Lhu.h...hhhh+..hl....u.sl...l....l....l....h....SA...uA....p.Y....ah....h....hY....G.ll...Ispuh.ltslhp.TsssE.upp....ll.o.........ph....lLhllh..hul..L...s.sll..l.hhs+l......phs....h..h....+....s....h..h...h...+...lt.......shll...s.lll...lhhh...s...h....h.....psatthhcs.t..ph.......h..h.P.p.lhu.hphh.......t.................................................... 1 40 114 185 +7851 PF08020 DUF1706 Protein of unknown function (DUF1706) Mistry J anon Pfam-B_5540 (release 16.0) Family This family contains many hypothetical proteins from bacteria and yeast. 20.60 20.60 20.90 20.80 20.50 20.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.92 0.71 -4.75 42 927 2012-10-02 14:44:17 2004-12-10 11:49:08 6 4 794 0 99 520 21 149.80 35 96.98 CHANGED M.spspoKp-LltslppsapKlhs.hsslPcchpppth..t..........ts-+osp-hluYLlGWtpLlLpWhppp..ppGhtVthPspsYKWNpLGtLsppFaccYpph.shpchhthLppshpclhpLI-shSs-ELFsts.htWs......upWslG+alphNTsSPacshppKlR....+apKt .............Mtp.psKpELhtthppsapKhht.hsslscp..tpp.h...............phD+s.t-slsa.htWppL.lLpW.p..sp....ppGh.ps.hPs..ptapW...pp...hGtL.p.Fhppatph.olpphhthLppshpplhthI-shSp-ELFp..phhtWs......ushslhpahh.sTsu.ashhtpKlR+apK.h..................... 1 33 66 80 +7852 PF08021 FAD_binding_9 Siderophore-interacting FAD-binding domain Finn RD anon Manual Domain \N 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.45 0.71 -4.13 40 1962 2012-10-03 00:38:56 2004-12-10 14:18:49 6 12 1325 1 503 1509 90 109.60 34 37.53 CHANGED pVhpspcloPphtRlshsG.ssLssas..ss.ssDpalKLhFPpsuts.s.h.......ths.th........tt.+PhhRsYTlRphDssst....El-lDFVlH...ss........GPAusWAtpApsGDpltlsG.P.tush ..............................................pVhcspplosphhRlsls.G.....p......s......Lss.Fs...........st..shDs.al.KlhF...Pp...sssph...................h..st....t...hh....h.......stt.+Ps.RsYTsRth..Dt..t..tt...........ElslDFsl.H.....ss............................GsAusW.A.tp.Aps.GDplslsG.Ptut.h............................. 1 122 319 431 +7853 PF08022 FAD_binding_8 FAD-binding domain Finn RD anon Pfam-B_728 (release 4.2) Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.67 0.72 -4.16 19 2729 2012-10-03 00:38:56 2004-12-10 14:19:05 7 89 751 1 1659 2722 95 111.30 22 16.74 CHANGED hhssph.hpltlhs......ssllplphsKPpt....a+apsGpahalph..slo......phHPFoI.sSust........s-tlslhI+spusaTcpLpphhtp...............................................sthsph.+lhl-GPYGssu ........................................ht.......ph..hhs......ss.s..h.c..l.ph..p..p.sp.................hpacs...G.Qal..a.....lph...s..........s.........lu.....................p...........H.......PFT.l......s.S.s..s.p...................................p..s....t..l...sl....h..l..+s........h....G..s..a....Tpp.Lhphh...t.tt................................................................................................................................pl....hl-GPaGt..s..................................................................................................................................................................... 0 436 875 1356 +7854 PF08023 Antimicrobial_2 Frog antimicrobial peptide Lee SC anon Bateman A Family This family consists of the major classes of antimicrobial peptides secreted from the skin of frogs that protect the frogs against invading microbes. They are typically between 10-50 amino acids long and are derived from proteolytic cleavage of larger precursors. Major classes of peptides such esculentin, gaegurin, brevinin, rugosin and ranatuerin are included in this family [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.33 0.72 -3.63 18 473 2009-01-15 18:05:59 2004-12-10 16:58:22 7 2 54 1 0 469 0 32.20 38 49.17 CHANGED GlLsslKthAKssu....KslApshLsplsCKlotpC ..........GlhsslKshu....tssu....Kss....utslLcpluCKloppC... 0 0 0 0 +7855 PF08024 Antimicrobial_4 Ant antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of the ponericin family of antimicrobial peptides isolated from predatory ant Pachycondyla goeldii. The ponericin peptides may adopt amphipathic alpha-helical structure in polar environments. In the ant colony, these peptides exhibit a defensive role against microbial pathogens arising from prey introduction and/or ingestion [1]. 25.00 25.00 26.90 34.40 19.90 19.90 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.60 0.72 -6.65 0.72 -4.06 5 7 2009-01-15 18:05:59 2004-12-15 11:27:15 6 1 3 0 0 7 0 23.40 51 94.80 CHANGED WGoLlKhGlKLlPSVVGhFpKKKQ WGohhKhuhKLlPuVlGhh.KKKp. 0 0 0 0 +7856 PF08025 Antimicrobial_3 Spider antimicrobial peptide Lee SC anon Short protein clustering Family This family includes antimicrobial peptides isolated from the crude venom of the wolf spider Oxyopes kitabensis. These peptides, known as oxyopinins, are the largest linear cationic amphipathic peptides chemically characterised and exhibit disrupting activities towards biological membranes [1]. 25.00 25.00 88.20 88.10 19.50 15.60 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.69 0.72 -4.23 2 4 2009-01-15 18:05:59 2004-12-15 11:46:53 6 1 1 0 0 4 0 37.00 84 100.00 CHANGED GKhSshuKlLRuIAKhFKGVGKsRKQFKpASDLDKNQ GKFSsFuKILRSIAKhFKGVGKVRKQFKpASDLDKNQ 0 0 0 0 +7857 PF08026 Antimicrobial_5 Bee antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of antimicrobial peptides produced by bees. These peptides have strong antimicrobial and some anti-fungal activity and has homology to abaecin which is the largest proline-rich antimicrobial peptide isolated from European bumblebee Bombus pascuorum [1]. 25.00 25.00 28.50 27.90 23.40 23.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.20 0.72 -4.10 2 26 2009-01-15 18:05:59 2004-12-15 11:48:51 6 1 20 0 4 21 0 33.30 73 63.28 CHANGED aVPh.NsPpPGp.+PFPoFPGpGPFNPKIpWP......Ga .....Pl.psP.PGt.KPFPTFPGQGPaNPKI+hP.......... 0 1 1 4 +7858 PF08027 Albumin_I Albumin I Finn RD anon Pfam-B_100627 (release 16.0) Domain The albumin I protein, a hormone-like peptide, stimulates kinase activity upon binding a membrane bound 43 kDa receptor. The structure of this domain reveals a knottin like fold, comprise of three beta strands [1]. 20.20 20.20 23.00 22.70 18.20 17.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -11.06 0.71 -4.37 4 79 2012-10-01 22:06:18 2004-12-16 13:53:21 6 3 23 2 3 89 0 96.10 46 81.40 CHANGED LsVFLlAshoL.hFspK.luAsDCsGsCSPFEMPPCtSSsCRCIPlGLlsGaChpPSu.solhKMV-EHPNLCQScADCpKKGSGsFCARYPNPDIEYGWCFuSsSEA.-VFhpl...P..RshhK .................hhlhh..h.h..hp.htA..Cs.s..h...Cosat..h.s.sCt.o.s..s.....CR.C..lPh....sL.h.......sGh.Ch.Po.u....shsKhl-EHPNLCQScs-ChKKGSGNFCARYPNs.lcaGWCFtu.u-u.................................. 0 0 3 3 +7859 PF08028 Acyl-CoA_dh_2 Acyl-CoA dehydrogenase, C-terminal domain Finn RD anon Pfam-B_8101 (release 16.0) Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.40 0.71 -10.41 0.71 -3.91 37 2598 2012-10-01 23:33:27 2004-12-17 10:14:48 6 16 1047 24 864 19514 7084 132.50 23 33.66 CHANGED lshuussLGhA+uALssah-hsp.sRhp.....hsssths-pshsthplAcussclcAAchhl.csspt........scsGpth.s.t.pscspts....puhAschshsussplhpsuGupuhhpssPlQRhaRDl+ssssHshhs. ........................................................................................h..hsushl..Gl...A...cu...A......hc...t.......sh.p...hs....p..p+sp.............tusst..p..h...s....c....c..P......h..............s..........tp..l..G...c......h.......p.s...p.l...p....u....A...cs...h...l...h...p..s.u..c...t...h..p.t.h...................ht.p...s...p..t.....h......s..........t......t...p.....s......p..s..p..hs.............................ps.h..ss.chu....lp.s........s....s.p....l....ac.....h..sG.up.u.h.t.......t.......s..........p..........s...L...pRa......aRss+sh..Hs....s............................................. 0 165 466 687 +7860 PF08029 HisG_C HisG, C-terminal domain Finn RD anon Pfam-B_1550 (release 16.0) Domain \N 22.80 22.80 22.90 27.60 21.60 22.10 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.30 0.72 -4.04 39 1973 2012-10-01 21:59:08 2004-12-17 12:10:15 6 7 1931 6 566 1247 328 75.80 44 25.62 CHANGED tApphhhlhhNsPpspL-clht......lhPGhcuPTlosL.....scp..salAVpshlsccplhclhscL+plGApsIlVhsIpph ..................pA+cp+hlhhcsPp-+L-clhu.........lLPGhEsPTlhPL.............u-p.....phVAl+hVssEshha-sM-cLKslGApuILVhPIEKh...... 0 177 364 489 +7861 PF08030 NAD_binding_6 Ferric reductase NAD binding domain Finn RD anon Manual Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null --hand HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.09 0.71 -4.32 21 2341 2012-10-02 19:13:12 2004-12-17 14:53:52 7 96 341 1 1603 3081 161 162.10 21 23.27 CHANGED a-sllLluuGhGloshlSllp-lhpp.p..................................hctpphpFhWlsRc.uslchact.h..sEltphcpp.....plclcsYhTu.hps...............ssspsthhp......hpshp.......................................................................pthps......ht.........................sphphu.RPNhcpllpch.........tpsssplGVhsC....Gssshsccl+phss ..................................................................................................................................................................................................................................acsslLlu..uGhGlTshhullp...s..lh..pphp..............................................................................................tt.t..h.t.h.c..+..lh....h.lW.lsR.....c.tp..p.h....c.W.h.........t..s..h.h...........pplt..p.........tpp...............thl.p...l.p..h.....a..l..Tp......pp..................................................................tsh.ps.........................t.....................................................................................................................................................................................................p.........t...p.......................................................hph.ph.G..R.P.sh..pp.h.h.pph....................................t...p.......t...t....p....l..uV...hhC....GP.shspplcphs.t.................................................................................................................................................................................................... 0 440 839 1305 +7862 PF08031 BBE Berberine and berberine like Mistry J anon Pfam-B_649 (release 16.0) Domain This domain is found in the berberine bridge and berberine bridge- like enzymes which are involved in the biosynthesis of numerous isoquinoline alkaloids. They catalyse the transformation of the N-methyl group of (S)-reticuline into the C-8 berberine bridge carbon of (S)-scoulerine [1][2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.32 0.72 -4.05 67 2389 2012-10-02 00:48:38 2004-12-20 08:43:13 7 38 719 49 1461 2372 41 47.30 32 9.22 CHANGED uYlNahDhDls..........................asppYat.sN.apRLhplKspa..DPsNhF+ppQSIs ...................................sYlNah.-.h.s.s................................sa...tpt.aaG...sN....as....RLtplKp+a..DPpslF.p.....psl............. 0 342 871 1216 +7863 PF01238 PMI_typeI Phosphomannose isomerase type I Finn RD, Bateman A anon Prosite Family This is a family of Phosphomannose isomerase type I enzymes (EC 5.3.1.8). 20.20 20.20 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.26 0.70 -5.49 8 4066 2012-10-10 13:59:34 2004-12-31 11:15:28 16 20 2915 8 847 2789 266 273.80 26 82.30 CHANGED LF+LpsuhppYsWG.hGspSAlAchhuho...sPS...lttsKPYAELWMG.THPpuPS+..lhssp.....l+slshsphhAhhsEslu....c+a..uu..pLPFLFKVLSlccsLSIQsHPsKc.uch..........LHstsP+NYPD-NHKPEhuIAlTsFcuhCGF+.htplsp.LtpssElpplIsscsusphppshph.s..ts.-shp.ph.lLpslFuplhsoss-clpppsshLlcptpsps....ushpthD........hsplIpRLpp.aPsDlGlFs..hhLNhhcLpsGEAhFLc..........AssPHAYlpGDhlECMAsSDNsVRAGhTPKahDVssLspMLsYpapss...-ctcht....pc...shhpu........SlLasP.Plt-Fulhps...slc..sGcphl.sh.susSILlsstGsuplhsusp .........................................................................Lps..hp.phhW.......G..............stltp..h.h...u..hp.........Ps................ps...hu.EhW.....hh..AH.Pp.u.s......St.......l.h...sut........................hps.h.....h...h...s..p..h.....h.....s...........h.....t..c.h.h.s.........................p.p.h......sp........hPh...LhK.........lLsAppsLSlQVHPs.......................................................c.......s......Yt........t....H....c.s..Ehs.....................ps.s...ha..................h........p..ht...h.s.p.l.l.............s......h.....t.s..t....p...h....t...........t.h..........h.......t...h...........p.............pt.h........htt............F.....hl....s.s.......s.......c.......t.....t....t.h..hh.l....h....p....t........p...s...p.s............sp..h...h...............................h....h............t.....s.u..h............h.....................s..........sh...........................................h....s....hh.t.....................................................................................................................................................................................................t................................................................................................................................................................................................................... 0 291 516 721 +7864 PF08032 SpoU_sub_bind RNA 2'-O ribose methyltransferase substrate binding Finn RD anon Pfam-B_742 (release 16.0) Domain This domain is a RNA 2'-O ribose methyltransferase substrate binding domain. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.37 0.72 -3.81 127 7167 2012-10-10 14:40:03 2005-01-04 15:00:01 7 11 4492 7 1567 4691 2075 74.30 22 27.20 CHANGED hlhGh+sVhcALps...t..p...lpcl..alpps.....t.p.........plhphsppp..sltlthlscpt.Lsplst.sssHQGllAhlp.hphh ..............................................hlhGh+sVppuLps............s..p......lpcl..altcs.......tstp..............h..ppll.phhppp.......tl...t......l.p...h..l....sc..p.t....Ls..p.....h...u.........s...s.......s...s.....HQ.....Glhuhlp.h...t.................... 0 500 998 1314 +7865 PF08033 Sec23_BS Sec23/Sec24 beta-sandwich domain Finn RD anon Manual Domain \N 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.06 0.72 -3.47 141 1441 2009-01-15 18:05:59 2005-01-05 11:01:17 7 32 326 24 954 1393 15 91.90 29 10.61 CHANGED uapushcl+sSpsl+lsshhGshhst........s........................sss..hphsslssspohslhh....ch................................stp..lstt.ptsalQhshhYpss.sG.p+RlRVpTlshshs .........................uFpAshclRsS.p.s......l..+lsshh.Gshhst..............s...........................................sss....hph..ssls.scpshu.l.hch................................sss....lsps..stshh....QhshhYoss..sG.p.RRlRVpTlshsh............................ 0 314 512 772 +7866 PF08034 TES Trematode eggshell synthesis protein Ebersberger I, Finn RD anon Ebersberger I Domain This domain has been identified in a number of distantly related species of trematodes. This protein domain is crucial for eggshell synthesis in trematodes (Ebersberger I). 21.20 21.20 21.40 22.20 19.60 19.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.18 0.72 -3.90 13 56 2009-01-15 18:05:59 2005-01-05 16:40:20 6 1 7 0 17 90 0 66.20 37 27.38 CHANGED hpucGcFtupGspccGspappsTpF++GGGhDpYG+K+pa.scY-ThGphK+Yusphhps+FDlhGpL ............pu+G+hpupGptchGsp.ppsTpFphtGthspYG++.K.pa.ucacTpG+.KKYuc+hhcs+FDlhGsL.......... 0 17 17 17 +7867 PF08035 Op_neuropeptide Opioids neuropeptide Bateman A, Lee SC anon Prosite Family This family corresponds to the conserved YGG motif that is found in a wide variety of opioid neuropeptides such as enkephalin. 20.40 20.40 20.40 22.50 20.30 19.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.24 0.72 -4.37 14 584 2009-01-15 18:05:59 2005-01-07 15:32:49 6 5 291 0 25 585 0 23.60 87 11.63 CHANGED YGGFM+s..ERuQ.pPLlTLFKNlhhKss.cp ..YGGFMTP..ERSQ.TPLhTLFKNsIlK......... 0 1 4 9 +7868 PF08036 Antimicrobial_6 Diapausin family of antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of diapausin-related antimicrobial peptides. Diapause during periods of environmental adversity is an essential part of the life cycle of many organisms with the molecular basis being different among animals. Diapause-specific peptides provide anti-fungal activity and act as N-type voltage-gated calcium channel blocker [1]. 25.00 25.00 27.90 29.50 20.00 17.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.27 0.72 -4.20 3 10 2009-01-15 18:05:59 2005-01-07 16:23:30 6 1 6 1 5 5 0 39.60 52 63.36 CHANGED VRVGPCDQVCSRIsPEKDECCRAHG+uGHAoCShGGMpC ..VRVtsCDpVCuRIssERDECCRAHGY.pGh..u...CpsGph.C............. 0 5 5 5 +7869 PF08037 Attractin Attractin family Lee SC anon Short protein clustering Family This family consists of the attractin family of water-borne pheromone. Mate attraction in Aplysia involves a long-distance water-borne signal in the form of the attractin peptide, that is released during egg laying. These peptides contain 6 conserved cysteines and are folded into 2 antiparallel helices. The second helix contains the IEECKTS sequence conserved in Aplysia attractins [1]. 25.00 25.00 28.80 93.30 18.40 17.80 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.11 0.72 -4.19 2 5 2009-01-15 18:05:59 2005-01-07 16:32:56 6 1 5 1 0 6 0 54.20 59 89.14 CHANGED CDIt.hTSpCpMpapsCt-AsuCsslIEECKTSh.EcC..pphpSstuSTTltPp CDIGNITSQCcMQHQNCuDAsGCsTlIEECKTSMVERCQNQpF-SuSuSTTLGPQ 0 0 0 0 +7870 PF08038 Tom7 TOM7 family Lee SC anon Short protein clustering Family This family consists of TOM7 family of mitochondrial import receptors. TOM7 forms part of the translocase of the outer mitochondrial membrane (TOM) complex and it appears to function as a modulator of the dynamics of the mitochondrial protein transport machinery by promoting the dissociation of subunits of the outer membrane translocase [1]. 21.20 21.20 23.10 23.40 17.50 17.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.98 0.72 -4.50 24 216 2009-01-15 18:05:59 2005-01-07 16:46:53 7 6 183 0 140 201 0 41.70 39 54.60 CHANGED KERlshlhchu+sssHYGaIPLllYLGhspsss...P.......olhpLL ..........K-Rlsplhchu+sshHaGFIPhVlYLGappssss....P.......olhpLL.................. 0 39 70 116 +7871 PF08039 Mit_proteolip Mit_preoteolip; Mitochondrial proteolipid Lee SC anon Short protein clustering Family This family consists of proteins with similarity to the mitochondrial proteolipids. Mitochondrial proteolipid consists of about 60 amino acids residues and is about 6.8 kDa in size [1]. 25.00 25.00 25.90 25.80 19.20 19.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.94 0.72 -3.99 2 43 2009-01-15 18:05:59 2005-01-07 16:49:16 6 1 26 0 17 44 0 55.50 65 94.05 CHANGED MLQSlIKplWIPMKPYYTpsYQEIWlGhGLMuaIVYKIRuADKRSKALKASusAP..GHH .......MLQSlIKNVWlPMKPYYTQVYQEIWVGMGLMuaIVYKIRSADKRSKALKu.u.sAP..GH.......... 0 2 2 4 +7872 PF08040 NADH_oxidored MNLL subunit Lee SC anon Short protein clustering Family This family consists of the MNLL subunits of NADH-ubiquinone oxidoreductase complex. NADH-ubiquinone oxidoreductase is involved in the transfer of electrons from NADH to the electron transport chain. This oxidation of NADH is coupled to proton transfer across the membrane, generating a proton motive force that is utilised for the synthesis of ATP [1]. MNLL subunit is one of the many subunits found in the complex and it contains a mitochondrial import sequence. However, the role of MNLL subunit is unclear [2]. 20.00 20.00 20.30 33.40 19.90 16.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.81 0.72 -4.35 5 67 2009-01-15 18:05:59 2005-01-07 16:50:39 6 1 53 0 33 69 0 56.50 50 88.18 CHANGED MVNLhthsR-HWValLVPL.GFVlGpYLDRppDERLTAFRNKSALYutRELKPGE-sTWK ..................hshhthhp-+WlallVPh...GF...llGpYLD++pDE+LTsFRNKShLat.R..-LpPsEcsoW..... 0 7 10 21 +7873 PF08041 PetM PetM family of cytochrome b6f complex subunit 7 Lee SC anon Short protein clustering Family This family consists of the PetM family of cytochrome b6f complex subunit IV. The cytochrome b6f complex consists of 7 subunits and contains 2 beta hemes and 1 chlorophyll alpha per cytochrome f. It is highly active in transferring electrons from decylplastoquinol to oxidised plastocyanin [1]. 23.30 23.30 29.40 28.80 23.10 23.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.13 0.72 -4.46 21 131 2009-01-15 18:05:59 2005-01-07 16:53:32 6 2 113 9 56 116 5 31.40 46 45.33 CHANGED uE..IhssAslshsLlLlGLuhGFhLLKl.Q...G..E ......uE...IFssAslhhsLlLVGLAlGFlLLKl.Q...u...t...... 0 15 41 53 +7874 PF08042 PqqA PqqA family Lee SC anon Short protein clustering Family This family consists of proteins belonging to the coenzyme Pyrroloquinoline quinone A (pqqA) family. PQQ is the non-covalently bounded prosthetic group of many quinoproteins catalysing reactions in the periplasm of Gram-negative bacteria. PQQ is formed by the fusion of glutamate and tyrosine and synthesis of PQQ require the proteins encoded by the pqqABCDEF operon but details of the biosynthetic pathway are unclear [1]. 21.10 21.10 21.40 22.70 19.40 19.20 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -5.97 0.72 -6.22 0.72 -4.20 26 215 2009-01-15 18:05:59 2005-01-07 17:13:00 6 1 181 0 84 150 7 20.90 53 63.51 CHANGED M...........tWspPphs-lplGhElshY ..................M...........tWo+Psas-lRlGhEVThY. 0 19 39 63 +7875 PF08043 Xin Actin_bind_SAA; Xin repeat Wu X, Finn RD anon Wu X Repeat The repeat has the consensus sequence GDV(K/Q/R)(T/S/G)X(R/K/T) WLFETXPLD. This repeat motif is typically found in the N-terminus of the proteins, with a copy number between 2 and 28 repeats. Direct evidence for binding to and stabilising F-actin has been found [1] in the human protein Swiss:Q702N9. The homologues in mouse and chicken localise in the adherens junction complex of the intercalated disc in cardiac muscle and in the myotendon junction of skeletal muscle. mXin may co-localise with Vinculin which is known to attach the actin to the cytoplasmic membrane [1]. It has been shown that the amino-terminus of human xin (CMYA1) binds the EVH1 domain of Mena/VASP/EVL, and the carboxy-terminus binds the, for the filamin family unique, domain 20 of filaminC [4]. This confirms the proposed role of xin repeat containing proteins as F-actin-binding adapter proteins. 20.00 20.00 23.60 20.00 19.70 19.90 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.10 0.74 -6.17 0.74 -3.83 60 1090 2009-01-15 18:05:59 2005-01-10 11:10:43 7 19 36 0 561 928 0 16.00 62 7.03 CHANGED GDVpss+alFETpPLD ..GDV+os+WLFETQPLD. 0 22 78 230 +7876 PF08044 DUF1707 Domain of unknown function (DUF1707) Lai X, Finn RD anon Lai X Domain This domain is found in a variety of Actinomycetales proteins. All of the proteins containing this domain are hypothetical and probably membrane bound or associated. Currently, it is unclear to the function of this domain. 26.30 26.30 26.50 26.40 26.10 26.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.39 0.72 -4.21 59 938 2009-09-10 22:21:14 2005-01-10 16:55:33 6 10 341 0 294 719 4 52.70 37 25.50 CHANGED hRsuDsDRppssphLspAhAcGpLshsEa-cRlstAhsAcThu-LssLhsDLP ....hRsuDs-RcpshphLpsAhucGcLshsEa--RlspAhtApThu-LsslhsDLP.... 0 125 230 276 +7877 PF08045 CDC14 Cell division control protein 14, SIN component Wood V, Mistry J anon manual Family Cdc14 is a component of the septation initiation network (SIN) and is required for the localisation and activity of Sid1. Sid1 is a protein kinase that localises asymmetrically to one spindle pole body (SPB) in anaphase disappears prior to cell separation [1] [2]. 20.40 20.40 20.40 21.00 20.20 20.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.72 0.70 -5.26 13 152 2009-01-15 18:05:59 2005-01-11 12:58:53 6 7 134 0 108 153 0 243.70 30 80.45 CHANGED MEslLupuhDpLsopcsscIppGL+Ql-ulLuplshshsptppssh.............................tsssuhcE.FhpLQ-uFpaNlA..p+Llssl-hlhspss..pu......psshLlhs.lcLlQGlLLLHPsSRslFu.RctsMshlLcLL.............csssssslpsuslpTLlshLLcsPsNhRsFEclsGLtplsphFKhpps............................sp-l....+hKIlEFLhFYLhsEssshs.............................psttstt..t.t.......p+TspEKppLlt+hhsp.l-uLV--Lp-hpsh .......................................................................................................................................h.hthp.L..p.....pl+pGL+pscuhLuplphpt.p..t....t......pt....s..........................p.lt..hp.ps.uh.p-.F.hLQc.sF..paNlt........cLls.s.......L...-..clhupt.s..ph.............psDhl..l...hssLcllQGshLLHPsS+sLFs.+chhhpl.LLcLL......................................ps.s.s..sslQuu.sL.oLlshLlDsPsNpR.sFEphsGL.pVssLhKp+ps............................sccl+hKllEFLhhYLhsE.s..s................................................................................p..hh.............................................................................. 0 29 63 93 +7878 PF08046 IlvGEDA_leader IlvGEDA operon leader peptide Lee SC anon Short protein clustering Family This family consists of the leader peptides of ilvGEDA operon. The expression of the ilvGEDA operon of E coli K-12 is multivalently controlled by the three branched -chain amino acids. Regulation is thought to occur by attenuation of transcription in response to the changing levels of the cognate tRNAs. Transcription of this operon is usually terminated at the end of the leader (regulatory) region [1]. 25.00 25.00 49.30 49.30 17.90 17.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.31 0.72 -7.59 0.72 -4.80 2 92 2009-09-11 14:17:33 2005-01-11 13:56:14 6 1 92 0 8 16 0 31.90 94 95.19 CHANGED MphllQVIsLVlISVVVIIIPPCGAALGRhKA .MTALLRVISLVVISVVVIIIPPCGAALGRGKA. 0 1 4 6 +7879 PF08047 His_leader Histidine operon leader peptide Lee SC anon Short protein clustering Family This family consists of the leader peptide of the histidine (his) operon. The his operon contains all the genes necessary for histidine biosynthesis. The region corresponding to the untranslated 5' end of the transcript, named the his leader region, displays the typical features of the T box transcriptional attenuation mechanism which is involved in the regulation of many amino acid biosynthetic operons [1]. 25.00 25.00 32.40 32.30 17.00 14.90 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.31 0.72 -6.69 0.72 -4.51 4 51 2009-01-15 18:05:59 2005-01-11 13:57:05 6 1 50 0 5 10 0 15.70 96 100.00 CHANGED MsRVQFKpHHHHHHPD MTRVQFKHHHHHHHPD 0 1 2 4 +7880 PF08048 RepA1_leader Tap RepA1 leader peptide Lee SC, Rossi R anon Short protein clustering Family This family consists of the RepA1 leader peptides. The frequency of replication of IncFII plasmid NR1 during the cell division cycle is regulated by the control of the synthesis of the plasmid-specific replication initiation protein (RepA1). When RepA1 is synthesised, it binds to the plasmid replication origin (ori) and effects the assembly of a replication complex composed of host proteins that mediate the replication of the plasmid [1]. The tap gene encodes a 24-amino acids protein. The translation of tap is required for translation of repA. 25.00 25.00 31.00 38.80 16.60 15.50 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.84 0.72 -6.96 0.72 -3.89 5 34 2009-01-15 18:05:59 2005-01-11 13:58:01 7 1 28 0 2 20 0 24.40 70 100.00 CHANGED MLRKlQYhFLCHLLLPCNISAGRCD MhtKlQ.hFLppLLL.CIVSAGhCD. 0 0 0 2 +7881 PF08049 IlvB_leader IlvB leader peptide Lee SC anon Short protein clustering Family This family consists of the leader peptides of the ilvB operon. This region encodes a potential leader polypeptide containing 32 amino acids, 12 of which are the regulatory amino acids valine and leucine. A model for the multivalent regulation of this operon by valyl- and leucyl-tRNA is proposed on the basis of the mutually exclusive formation of five strong stem-and-loop structures in the leader mRNA [1]. 25.00 25.00 33.50 31.20 18.10 17.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.66 0.72 -4.20 4 116 2009-01-15 18:05:59 2005-01-11 13:59:13 6 1 113 0 11 28 0 31.90 83 99.14 CHANGED MNsShhNusLLsTA.sAAVVVVRVVVVVGNAP ............MssSMlNApLL.sTAPSAAVVVVRV...V..VVVGNAP 0 1 4 7 +7882 PF08050 Tet_res_leader Tetracycline resistance leader peptide Lee SC anon Short protein clustering Family This family consists of the tetracycline resistance leader peptide. The presence of 3 inverted repeats which can form 2 different conformations of mRNA suggests that the tetracycline resistance (TcR) region is regulated by a translational attenuation mechanism. A Rho-independent transcriptional terminator structure is present immediately after the translational stop codon of the TET protein [1]. 16.30 16.30 16.30 16.30 16.20 16.10 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.11 0.72 -6.36 0.72 -4.32 4 14 2009-01-15 18:05:59 2005-01-11 14:00:00 7 2 12 0 2 9 0 18.50 79 22.44 CHANGED MKCpKMNRVQLKEGSVSMsL MKCNECNRVQLKEGSVShsL. 2 1 1 2 +7883 PF08051 Ery_res_leader1 Erythromycin resistance leader peptide Lee SC anon Short protein clustering Family This family consists of erythromycin resistance gene leader peptides. These leader peptides are involved in the translational attenuation of erythromycin resistance genes. Interestingly, the consensus sequence of peptides conferring erythromycin resistance is similar to that of the leader peptides, thus indicating that a similar type of interaction between the nascent peptide and antibiotics can occur in both cases [1]. This family also includes a small number of regions from within larger proteins from actinomycetes. 22.80 22.80 23.10 23.10 22.60 21.00 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.79 0.73 -6.06 0.73 -4.12 10 16 2009-01-15 18:05:59 2005-01-11 14:00:36 6 2 13 0 8 11 0 15.00 72 8.77 CHANGED MLISGTAFLRLRTNR hhhSGsAaLRLRTsR 0 1 4 6 +7884 PF08052 PyrBI_leader PyrBI operon leader peptide Lee SC anon Short protein clustering Family This family consists of the pyrBI operon leader peptides. The expression of the pyrBI operon, which encodes the subunits of the pyrimidine biosynthetic enzyme aspartate transcarbamylase. is regulated primarily through a UTP-sensitive transcriptional attenuation control mechanism. In this mechanism, the concentration of UTP determines the extent of coupling between transcription and translation within the pyrBI leader region, hence determining the level of rho-independent transcriptional termination at an attenuator preceding the pyrB gene [1]. 25.00 25.00 34.20 34.20 17.90 15.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.20 0.72 -4.15 2 185 2009-01-15 18:05:59 2005-01-11 14:01:14 6 1 184 0 5 32 0 38.10 90 91.70 CHANGED MVQCVRHFVLPRLKKDAGLPFFFPLITHSQPLNRGAFFC.GVRR MVQCVRH.VLPRLKKDAGLPFFFPL.hT.popPLN.................. 0 1 1 5 +7885 PF08053 Tna_leader Tryptophanese operon leader peptide Lee SC anon Short protein clustering Family This family consists of the tryptophanese (tna) operon leader peptide. Tna catalyses the degradation of L-tryptophan to indole, pyruvate and ammonia, enabling the bacteria to utilise tryptophan as a source of carbon, nitrogen and energy. The tna operon of E. coli contains two major structural genes, tnaA and tnaB. Preceding tnaA in the tna operon is a 319 -nucleotide transcribed regulatory region that contains the coding region for a 24-residue leader peptide, TnaC. The RNA sequence in the vicinity of the tnaC stop codon is rich in Cytidylate residues which is required for efficient Rho -dependent termination in the leader region of the tna operon [1]. 25.00 25.00 70.80 70.70 17.20 16.70 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.78 0.72 -6.97 0.72 -4.56 2 48 2009-01-15 18:05:59 2005-01-11 14:01:50 6 1 46 0 2 6 0 24.00 99 98.63 CHANGED MNILHlCVTSKWFNIDNKIVDHRP MNILHICVTSKWFNIDNKIVDHRP 0 1 1 1 +7886 PF08054 Leu_leader Leucine operon leader peptide Lee SC anon Short protein clustering Family This family consists of the leucine operon leader peptide. The leucine operon is involved in the control of the biosynthesis of leucine. Four adjacent leucine codons within the leucine leader RNA are critically important in transcription attenuation-mediated control of leucine operon expression in bacteria. The leader RNA contains translational start and stop signals, a cluster of four leucine codons and overlapping regions of dyad symmetry that are capable of forming stem-and-loop structures [1]. 25.00 25.00 26.30 26.30 22.60 19.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.92 0.72 -7.16 0.72 -4.27 4 72 2009-01-15 18:05:59 2005-01-11 14:02:44 6 1 72 0 7 20 0 27.80 79 95.61 CHANGED MhHhsRhhu.LLLNA.llRGh.VuG.Qp .....MoHIVRFlGLLLLNASpLRGRhVuGIQH... 0 2 3 6 +7887 PF08055 Trp_leader1 Tryptophan leader peptide Lee SC anon Short protein clustering Family This family consists of the tryptophan (trp) leader peptides. Tryptophan accumulation is the principal event resulting in downregulation of transcription of the structural genes of the trp operon. The leader peptide of the trp operon forms mutually exclusive secondary structures that would either result in the termination of transcription of the trp operon when tryptophan is in plentiful supply or vice versa [1]. 25.00 25.00 48.60 48.50 16.40 14.90 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.54 0.72 -6.78 0.72 -4.36 2 2 2009-01-15 18:05:59 2005-01-11 14:03:54 6 1 2 0 0 2 0 18.00 78 100.00 CHANGED MFA.phpNWWWTAHPAAH MFA.phpNWWWTAHPAAH 0 0 0 0 +7888 PF08056 Trp_leader2 Tryptophan operon leader peptide Lee SC anon Short protein clustering Family This family consists of the tryptophan operon leader peptides. The tryptophan operon is regulated by transcription attenuation in response to changes in the level of tryptophan. The transcript of the leader peptide can adopt alternative mutually-exclusive secondary structures that would either result in termination of transcription of the tryptophan structural genes or in transcription of the entire operon [1]. 21.00 21.00 27.00 59.20 20.00 19.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.34 0.72 -3.96 4 77 2009-01-15 18:05:59 2005-01-11 14:04:29 6 1 75 0 5 24 0 37.70 82 87.76 CHANGED MLQEFNpNpKsKlu.h.p.ssuAELAWWRTWTSSWWANVYF MLQEFNPNHKPNFS.........PADA....ELAWWRTWTSSWWAHVYF 0 1 1 3 +7889 PF08057 Ery_res_leader2 Erythromycin resistance leader peptide Lee SC anon Short protein clustering Family This family consists of erythromycin resistance gene leader peptides. These leader peptides are involved in the transcriptional attenuation control of the synthesis of the macrolide-lincosamide -streptogramin B resistance protein. It acts as a transcriptional attenuator, in contrast to other inducible erm genes. The mRNA leader sequence can fold in either of two mutually exclusive conformations, one of which is postulated to form in the absence of induction, and to contain two rho factor-independent terminators. [1]. 25.00 25.00 42.70 42.70 19.10 18.50 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.07 0.73 -6.10 0.73 -3.49 2 2 2009-01-15 18:05:59 2005-01-11 14:05:10 6 1 2 0 0 1 0 14.00 100 100.00 CHANGED MTHSMRLRFPTLNQ MTHSMRLRFPTLNQ 0 0 0 0 +7890 PF08058 NPCC Nuclear pore complex component Wood V, Mistry J, Novatchkova M anon manual Domain Proteins containing this domain are components of the nuclear pore complex [1]. One member of this family is Nucleoporin POM34 (Swiss: Q12445) which is thought to have a role in anchoring peripheral Nups into the pore and mediating pore formation [1]. 26.80 26.80 58.40 28.40 21.80 20.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.75 0.71 -4.33 21 117 2009-01-15 18:05:59 2005-01-11 15:00:35 6 5 109 0 89 110 0 130.40 31 39.27 CHANGED GsWcpPhlcElsRRQ...s.p-ppl++llhNshuhlhh.lhsphlp.hh.hhphspth.s..........................................asphhhhllphlhllNIllALasLh+s..pDshuDlPLTspQRcLLGLsss............spTsush.....hlTPP+Yph ..............................GsWcpPhLcEls+Rp...s.p..Eppl++lhhNshslhhh.hhtphhp.hh.hhththph..s..........................................asshlhhllpllhhlNIlhuLh....Lh+...pDchuDlPLTspQRpLLGLcss.st..................ssssssh.....h.p.P+Yp.............................. 0 14 44 75 +7891 PF08059 SEP SEP domain Mistry J, Wood V anon Pfam-B_1894 (release 16.0) Domain The SEP domain is named after Saccharomyces cerevisiae Shp1, Drosophila melanogaster eyes closed gene (eyc), and vertebrate p47. In p47, the SEP domain has been shown to bind to and inhibit the cysteine protease cathepsin L [1]. Most SEP domains are succeeded closely by a UBX domain [1]. 21.50 21.50 21.70 21.70 21.20 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.27 0.72 -3.67 44 608 2009-01-15 18:05:59 2005-01-11 17:43:31 8 13 300 3 378 595 4 72.10 36 19.95 CHANGED lphWpsGFol-DG.LRpacDPtNtpFLpslpcGcsPh-Lhsht.t.ppVsVslpc+pcEcYhtPhh..phpsFsGpGp .......lplWcsG.Fol.s.D.Gs....LRsas...DP..s....N.tpFLc.......s.....I.p..cG........chPhELtp.h.....h.t....ppVslclpc+psEsahps.......thpsFsGpGp................... 0 118 184 273 +7892 PF08060 NOSIC NOSIC (NUC001) domain Staub E, Bateman A anon Staub E Domain This is the central domain in Nop56/SIK1-like proteins [1]. 23.70 23.70 24.10 23.70 22.80 23.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.55 0.72 -4.16 101 1203 2009-09-11 13:22:58 2005-01-12 13:32:34 8 20 441 26 811 1191 39 52.30 44 10.32 CHANGED hIlQulsll-clD+-lNhhthRlREWYuh+FPELspllsc.shpYu.+lVthlGs ........hIlQAluLLDpLDK-lNsasMRlREWYuaHFPELs+lls....D....shpYs.+llphlGp.............. 0 287 457 669 +7893 PF08061 P68HR P68HR (NUC004) repeat Staub E, Bateman A anon Staub E Repeat This short region is found in two copies in p68-like RNA helicases [1]. 25.00 25.00 30.50 33.30 23.60 24.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.31 0.72 -4.22 8 120 2009-01-15 18:05:59 2005-01-12 13:38:59 6 6 32 0 40 103 0 33.60 59 11.58 CHANGED SAGhpuGFpohpsptsYppGYuu..pppaGupspN ..SAGhpsuFpT.hpsptsYppGYsS..pppaGupstN. 0 2 2 12 +7894 PF08062 P120R P120R (NUC006) repeat Staub E, Bateman A anon Staub E Repeat This characteristic repeat of proliferating cell nuclear antigen P120 is found in three copies [1]. 20.30 20.30 21.70 20.30 19.30 20.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.62 0.73 -6.89 0.73 -3.37 6 44 2009-01-15 18:05:59 2005-01-12 13:43:13 6 5 10 0 8 55 0 21.90 49 6.15 CHANGED sGKA+GspKsK.QQLh+Q.tsKt .hGKAKGlcKsK.pph.KQ.tsKh..... 0 3 3 3 +7895 PF08063 PADR1 PADR1 (NUC008) domain Staub E, Bateman A anon Staub E Domain This domain is found in poly(ADP-ribose)-synthetases [1]. The function of this domain is unknown. 21.10 21.10 21.10 23.00 21.00 19.10 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.12 0.72 -4.57 17 187 2009-01-15 18:05:59 2005-01-12 14:18:02 7 27 111 4 131 208 2 54.70 41 6.18 CHANGED ll-RluDshhFGsLt.CspCsG.phhapup..tYhCpG.l..StWoKC..sapspsPpRhp .........ll-RlADshhFGALhsCsp..Csu.....pl..hapus..sYhCoGpl...otWoK.C..shpTpsPsRp........... 0 54 72 108 +7896 PF08064 UME UME (NUC010) domain Staub E, Bateman A anon Staub E Domain This domain is characteristic of UVSB PI-3 kinase, MEI-41 and ESR1 [1]. 21.00 21.00 21.00 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.06 0.72 -4.09 29 246 2009-01-15 18:05:59 2005-01-12 14:25:06 8 14 218 0 176 262 1 105.80 24 4.49 CHANGED lspaLppchLGllshFspslpc...sppshh-KccslpuIt.ll+.hstpplssshsQ......IhssLpouLchp...-LpptuhpsWplhlppLsp...pcltsllsphlshllphasp ...............hspaLpp+lLGllshFsppltc.....s.p.....tsh.-K+pslpultpll+.h.............ss.........pplssshsp.................lhssLpou.Lphc..........-lpph...s..hpsWsshlpsLsp...pcLtsllspslshll.hhp.h.............................. 0 51 92 143 +7897 PF08065 K167R K167R (NUC007) repeat Staub E, Bateman A anon Staub E Repeat This family represents the K167/Chmadrin repeat [1]. The function of this repeat is unknown. 20.70 20.70 21.10 20.70 20.00 20.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.28 0.71 -3.97 24 437 2009-01-15 18:05:59 2005-01-12 14:29:07 7 18 30 0 146 511 0 104.90 42 46.71 CHANGED sTKlsscSPQP-sscTssSoKppsKpuLpKs-V+EEh.AlpKhopouGcshcT.+.ssu-s+sIcuhhposKQKLDssusloGSKRp.pTPKE+AQsLEDLsG.FpELFQTPu .......................TKhsC+Ss.s-slsTPsSp+p.......p.+pslt..Ks-lcEE.h.Al.p..KhTposGcsscT..+.....Ps.u-c+slcsa.....pcoP+QKL.Dsstsl.oGSKR...psRTP.K-K.A.Q.......sLEDLsG.FKELFQTP........ 1 16 18 28 +7898 PF08066 PMC2NT PMC2NT (NUC016) domain Staub E, Bateman A anon Staub E Domain This domain is found at the N-terminus of 3'-5' exonucleases with HRDC domains, and also in putative exosome components [1]. 22.20 22.20 22.50 22.50 22.00 21.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.82 0.72 -3.69 31 282 2009-01-15 18:05:59 2005-01-12 14:35:30 7 8 228 0 176 278 0 91.50 28 11.48 CHANGED llsss+uuuuLuu..pDlsFY+ohpsshuppl-ppupcLLslhspllp.thsspschhtss.......t.csl-s..pacsls-shDsLhE+sDpsLDchsGh .............llsss+uu.suLsp....c-...hsFa+Sh.Psap...p.h-ppusRLLphhsp.lhp.htssp..s.shpsts...........................p.spl--..pa.c.h.ll-s.DslLE+sshhLDEhsG.l................ 0 54 90 143 +7899 PF08067 ROKNT ROKNT (NUC014) domain Staub E, Bateman A anon Staub E Domain This presumed domain is found at the N-terminus of RNP K-like proteins that also contains KH domains Pfam:PF00013 [1]. 19.90 19.90 23.40 22.80 18.60 18.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.13 0.72 -4.10 4 131 2009-01-15 18:05:59 2005-01-12 14:39:44 6 8 36 0 42 81 0 42.40 79 10.24 CHANGED M-TE.-.Q.EEsoFSNsEoNGKRPAEDh-EEpuFKRSRNoDEMV ...........METE...Q.sEETFsNTETN...Gc..hGK...RPAEDME.EEQAFKRSRNTDEMV.. 0 1 6 15 +7900 PF08068 DKCLD DKCLD (NUC011) domain Staub E, Bateman A anon Staub E Domain This is a TruB_N/PUA domain associated N-terminal domain of Dyskerin-like proteins [1]. 20.90 20.90 20.90 20.90 20.40 20.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.76 0.72 -4.04 16 540 2009-01-15 18:05:59 2005-01-12 14:44:23 7 12 459 21 349 531 82 54.30 55 12.61 CHANGED csupaPLLLKshc.+LhVRos.caTPhshGssPhcRsIcEYlchGlINLDKPusPSSHEVV ................sSpWPLLLKNa-.c.............L.VRos.HaTPl.stGssPL+RslppYlp.pGlINLDKPuNPSSHEVV........ 0 117 200 290 +7901 PF08069 Ribosomal_S13_N Ribosomal S13/S15 N-terminal domain Staub E, Bateman A anon Staub E Domain This domain is found at the N-terminus of ribosomal S13 and S15 proteins.\ This domain is also identified as NUC021 [1]. 25.00 25.00 27.60 26.50 22.80 21.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.85 0.72 -4.13 64 646 2009-01-15 18:05:59 2005-01-12 14:52:27 7 7 524 8 365 560 71 58.50 55 38.20 CHANGED MuRMHu+t..+..GhSuSshPh....+ppsPpWlp..hos-ElcchIlcLAK.+GhsPSpIGllLRDpaGI .........................MGRMHutG..K..GlSpSAlPY....+RssP....sWlK..hos--Vc-pIhKLAK.KGhTPSQIGVlLRDuHGl... 0 115 203 297 +7902 PF08070 DTHCT DTHCT (NUC029) region Staub E, Bateman A anon Staub E Family The DTCHT region is the C-terminal part of DNA gyrases B / topoisomerase IV / HATPase proteins [1]. This region is composed of quite low complexity sequence. 19.70 19.70 20.50 20.40 18.30 18.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.39 0.72 -3.34 8 130 2009-09-10 16:42:44 2005-01-12 14:58:57 6 9 47 0 61 99 0 96.90 43 6.92 CHANGED +AAPKusKp.......DS-hsuulsKKPsPsKuKs...p+KRKsSSSD-SDSsFtKtsSKusTSKKuK.......u-sDDFpsDhs....sss...APRs+SGRAKKPlKYLE ........................+tAPK.tpKh..........D.S-t.phu.l.sKKss.s..s...K.uKs......................++++tsuSps.-..uD.s.......t....+tsSK.....sssSKKsKp.............u-sDpF..s.Dhs.....ops.......ss+s+oGR..A.+K.lKYht............ 1 6 9 25 +7903 PF08071 RS4NT RS4NT (NUC023) domain Staub E, Bateman A anon Staub E Domain This is the N-terminal domain of Ribosomal S4 / S4e proteins. This domain is associated with S4 and KOW domains [1]. 20.40 20.40 20.60 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -7.69 0.72 -4.10 34 754 2009-01-15 18:05:59 2005-01-12 15:00:55 7 10 531 4 419 697 74 36.40 62 14.46 CHANGED pGs+KHLKRlsAPppWhls+ps.utaAs+PSsGPHphcc .....RGsKKHLKRLsAP++.WMLDKLs.GsaAPRPSsGPHKLRE..... 0 139 234 326 +7904 PF08072 BDHCT BDHCT (NUC031) domain Staub E, Bateman A anon Staub E Domain This is a C-terminal domain in Bloom's syndrome DEAD helicase subfamily [1]. 25.00 25.00 41.50 40.30 18.10 16.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.06 0.72 -3.85 3 59 2009-01-15 18:05:59 2005-01-12 15:04:31 6 7 34 0 22 45 0 40.30 67 3.39 CHANGED QLlSVME-ICKLVDsIPlcEL+lLSCGpELLQQRplRRKLL .QLl+VMEcICKLVDTIPtDcLKsLcCGsELLQQRslRRKLL.... 0 1 3 7 +7905 PF08073 CHDNT CHDNT (NUC034) domain Staub E, Bateman A anon Staub E Domain The CHDNT domain is found in PHD/RING finger and chromo domain-associated helicases [1]. 20.80 20.80 21.00 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.54 0.72 -4.28 5 268 2012-10-02 14:16:02 2005-01-13 16:21:08 7 31 81 0 123 210 1 54.50 69 3.24 CHANGED V-l-YoEEDacoLTNYKAFSpaVRPlIsKcNPKIssoKMMMLluAKWREFsssNP ...........l-alFSEEDY+TLTNYKAFSQFl..RPLIA.+KNPKIshSKMMhl......LGAKWREFSsNNP.......... 0 20 32 70 +7906 PF08074 CHDCT2 CHDCT2 (NUC038) domain Staub E, Bateman A anon Staub E Domain The CHDCT2 C-terminal domain is found in PHD/RING finger and chromo domain-associated CHD-like helicases [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.08 0.71 -4.53 4 307 2009-01-15 18:05:59 2005-01-13 16:28:26 6 33 89 0 132 242 0 160.40 78 10.29 CHANGED KhsEIWHRRHDaWLLAGlslHGYARWQ....DItNDspFAIlNEPFKspusc..sNFLEhKNKFLARRFKLLEQALVIEEQLRRAAaLNhpQ-PsHPAMALsARFAElECLAESHQHLSKEShsGN+sANAlLHKVLsQL--LLuDhKsDVsRLPuTlSpltP..VssRLtMSERpILSpL ........................Kh.EIWHRRHDYWLLAGIlp........HGYAR....WQ.................DIQNDsRaAIlNEPF.........KsEhsK.........GNFLEhKNKFLARRF.K..L..LEQALVIEEQLRRAAYLN.hop.DPuH...P...uMALNsRFAEV...ECLAESHQHLSK.EShA.GNKPANAVL.HK....VLsQLEELLSDMKADVTRLPATLuRIPP..VAsRLQMSERsILSRL................... 0 26 37 75 +7907 PF08075 NOPS NOPS (NUC059) domain Staub E, Bateman A anon Staub E Domain This domain is found at the C-terminus of NONA and PSP1 proteins adjacent to 1 or 2 Pfam:PF00076 domains [1]. 20.20 20.20 20.30 24.50 19.90 18.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.50 0.72 -3.56 11 282 2009-01-15 18:05:59 2005-01-13 16:34:28 6 8 89 2 146 244 1 51.10 64 10.43 CHANGED PllVEPhEp.DDpDGLPEK.LspKsspapKEREpsPRFAp.GoFEaEYusRWK .....PVlVEPhEQhDDE.DGLPEK.LspKs.pap...KEREpPPRFAQPGoFEaEYupRWK.... 0 27 36 78 +7908 PF08076 TetM_leader Tetracycline resistance determinant leader peptide Lee SC anon Short protein clustering Family This family consists of the tetracycline resistance determinant tet(M) leader peptides. A short open reading frame corresponding to a 28 amino acid peptide which contain a number of inverted repeat sequences was found immediately upstream of the tet(M). Transcriptional analyses has found that expression of tet(M) resulted from an extension of a small transcript representing the upstream leader region into the resistance determinant. Thus this leader sequence is responsible for transcriptional attenuation and thus regulation of the transcription of tet(M) [1]. 25.00 25.00 60.40 60.10 23.90 22.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.26 0.72 -4.39 3 31 2009-01-15 18:05:59 2005-01-14 11:01:15 6 2 28 0 1 8 0 27.70 92 72.55 CHANGED MLChPMlMH+FPSDKSIYHWDFhuLFGF MLChPMVMHKNPSDKSIYHWDFYALLGF 0 1 1 1 +7909 PF08077 Cm_res_leader Chloramphenicol resistance gene leader peptide Lee SC anon Short protein clustering Family This family consists of chloramphenicol (Cm) resistance gene leader peptides. Inducible resistance to Cm in both Gram positive and Gram negative bacteria is controlled by translation attenuation. In translation attenuation, the ribosome-binding-site (RBS) for the resistance determinant is sequestered in a secondary structure domain within the mRNA. Preceding the secondary structure is a short, translated ORF termed the leader. Ribosome stalling in the leader causes the destabilization of the downstream secondary structure, allowing initiation of translation of the Cm resistance gene [1]. 25.00 25.00 48.90 48.80 19.50 18.20 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.15 0.73 -6.44 0.73 -3.94 2 3 2009-01-15 18:05:59 2005-01-14 11:02:43 6 1 3 0 1 1 0 17.00 100 100.00 CHANGED MSGVPGALAVVTRRTIS MSGVPGALAVVTRRTIS 0 0 0 1 +7910 PF08078 PsaX PsaX family Lee SC anon Short protein clustering Family This family consists of the PsaX family of photosystem I (PSI) protein subunits.\ PSI is a large multi-subunit pigment protein complex embedded in the thylakoid membranes of green plants and cyanobacteria. PsaX is one of the 12 protein subunits found in PSI and these subunits are arranged as monomers or trimers within the membrane as shown by the structure of the trimeric complex from Synechococcus elongatus [1]. 25.00 25.00 33.50 33.00 21.50 20.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.72 0.72 -4.03 2 15 2009-01-15 18:05:59 2005-01-14 11:04:10 7 1 15 2 6 15 0 36.10 63 68.65 CHANGED SshAspuAKPsYsFRThWAlLLLAlNFLVAAYYFtIl .........ssKuAKPsYsFRTuWAl.LLLAINFLVAAYYFHII 0 0 5 6 +7911 PF08079 Ribosomal_L30_N Ribosomal L30 N-terminal domain Staub E, Bateman A anon Staub E Domain This presumed domain is found at the N-terminus of Ribosomal L30 proteins and has been termed RL30NT or NUC018 [1]. 23.90 23.90 24.20 24.90 23.80 23.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.49 0.72 -3.96 70 661 2009-01-15 18:05:59 2005-01-14 14:47:42 7 8 345 8 354 608 5 68.80 34 28.50 CHANGED olLKKRKpppph+tpptppthtp+ttppp+RphIa+RAEpYhKEY+ptER-.IRLpRpA+ppGsaYVPuEs ................................olLKKRKp.pp.h+tpptppth.t.pKt......tp+tc..Rc.lIacRA.cpYhKEY+..........ptcRcpIRlpR.A+ptGsaYVPuEs........... 0 109 177 256 +7912 PF08080 zf-RNPHF RNPHF zinc finger Staub E, Bateman A anon Staub E Domain This domain is a putative zinc-binding domain (CHHC motif) in RNP H and F. The domain is often associated with Pfam:PF00076. 20.20 20.20 20.80 21.10 18.00 19.20 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.96 0.72 -4.45 3 163 2009-01-15 18:05:59 2005-01-14 14:50:25 7 9 41 2 72 105 0 33.70 76 8.58 CHANGED FGSDRFGRDLoYChSGMSDHRYGDGsSTFQSTTGHC ..........FsoD.hFGR.D..LsYChSG...MSD.HRYGDGGSTFQSTTGHC....... 0 4 8 18 +7913 PF08081 RBM1CTR RBM1CTR (NUC064) family Staub E, Bateman A anon Staub E Family This C-terminal region is found in RBM1-like RNA binding hnRNPs [1]. 22.00 22.00 22.60 25.80 21.60 18.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.27 0.72 -4.23 8 128 2009-01-15 18:05:59 2005-01-14 14:57:06 6 3 49 0 49 133 0 45.30 69 12.12 CHANGED SuusMG..GRushSRGRDuYG.GPPRR-shsSRRDsYhuPRDDGYSo+ ...............SSSGMG..GRAPlSRGRDuYG.GPP.RREPlPSRRDVYLSPRDDGYSTK...... 0 9 11 19 +7914 PF08082 PRO8NT PRO8NT (NUC069), PrP8 N-terminal domain Staub E, Bateman A anon Staub E Domain The PRO8NT domain is found at the N-terminus of pre-mRNA splicing factors of PRO8 family [1]. The NLS or nuclear localisation signal for these spliceosome proteins begins at the start and runs for 60 residues. N-terminal to this domain is a highly variable proline-rich region [4]. 25.00 25.00 25.00 31.00 21.00 24.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.99 0.71 -4.57 24 428 2009-01-15 18:05:59 2005-01-14 14:59:30 6 30 362 0 242 401 7 136.00 71 7.54 CHANGED Kt-MPPEHLRKIl+DHGDMSS++apsDKRsaLGALKYlPHAlhKLLENMPhPWEps+pVKVLYHhoGAITFVNElP+VIEPlYhAQWuTMWlhMRREK....RDR+HFKRMRFPPFDDEEPPLDYu-Nl.DlEP.-uIphcLDpp-DssVhDWhYD .....................................Kt-MPPEHlRKIl+................DhGDhop+KappDKRsYLG...ALKahPHAlhKLLENMPMPWE.plR-VcVLYHITGAITFVNEIPhVIEPVYlAQWGoMWIMMRREK....RDRRHFKRMRFPPFDDEEPPLDYuD.NlLDVEPLEsIQh.ELDp-EDss.Vh-WFYD................................ 0 94 143 205 +7915 PF08083 PROCN PROCN (NUC071) domain Staub E, Bateman A anon Staub E Domain The PROCN domain is the central domain in pre-mRNA splicing factors of PRO8 family [1]. 19.80 19.80 19.80 19.90 19.70 18.90 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.45 0.70 -6.00 12 433 2009-01-15 18:05:59 2005-01-14 15:01:09 6 40 348 0 269 361 13 328.20 62 18.09 CHANGED LhscpTpsuh...tLhaAPhPFN++pGt......h+RAtDlsLlKpWapp+ss...sshPsKV+VSYQKLLKsaVhNpL+p.......pc.psppK............hpLL+sLKsTKFFQpTpIDWVEAGLQlCRQGaNMLNLLIHRKsLsYLHLDYNFNLKPsKTLTTKERKKSRFGNAFHLhREIL+hhKllVDuHVQaRLGNlDAaQLADGltYIhsHlGQLTGlYRYKYKlM+QIRtCKDLKHll.YhRFNs..lGKGPGCGFWtPuWRVWlFFLRGIIPLLERWLGNLLsRQFEGRpSpsl..sKTlTKQRl-SaaDLELRAuVMpDILDMlP-Gl+..psKuRTILQHLSEAWRCWKANlPWcVPGhPtPlcsIIhRYlKuKADhWhssAaYNR-RI+RGAsV-KTlsKKNLGRLTRLWlKsEQERQ+phhK..-GPhl .....................................................sppTssuI...sLhaAPhPFshRSG+......hhRA.D.....lPLlppWY..h.EHsP....s.PVKVRVSYQKLLKhaVLNtL+p.......+..PK...s.p.pK....................p.Lh+shKsT.KFFQpTplDWVEsGLQVCRQGaNMLNL.LIHRKsLsYLHLDYNFNLKPlKTLTTKERKKSRFGNAFHLhRE.lLRLoKLlVDupVQaRLGNlDAaQLADGl.YhFs...HVGQLTGMYRYKY+LM+QIRhCKDLKHlI.YYRFNoGsVGKGPGCGFWAPuWRVWlFFhRGIhPLLERWLGNLLuRQF..E..GRcSKGl..AKTVTKQRVESHaDLELRAuVM+DllDMMP.E.GlK..................QNKsRTILQHLSEAWRCW.........KANIPWK...............VP..GLPhPIENhILRYVKtKADWWhssAHYNRERI+RGATVDKTVsKKNLGRLTRLaLKuEQERQ+sYhK..DGPYl............................. 0 108 159 227 +7916 PF08084 PROCT PROCT (NUC072) domain Staub E, Bateman A anon Staub E Domain The PROCT domain is the C-terminal domain in pre-mRNA splicing factors of PRO8 family [1]. 19.50 19.50 19.70 20.00 19.30 19.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.76 0.71 -4.42 32 394 2009-01-15 18:05:59 2005-01-14 15:02:50 6 37 344 4 235 386 4 107.70 55 5.71 CHANGED slslolSFoPGSsolsAapLTppGapWGt...pNpDhhsppPpGapssahc+sQlLLSD+hhGaFhVPcsslWNYsFhGspas...shpYslpl-hPhsFYc-lHRPtHFlpFscl....tts-.phpsD.-ssF .......................TlhlTsSFTPGSsSLoAY+LTPsGYEWG+...pNpD........t.u..sp..Pp......Ga..s.o..hhE+sQhLLSD+hhGaFhVPp.pss.WNYsF.M..Gspas....phpaplpls.sPhpFYc-.HRP.HFhpFupl.........-...h.sDppD............................. 2 85 132 194 +7917 PF08085 Entericidin Entericidin EcnA/B family Lee SC anon Short protein clustering Family This family consists of the entericidin antidote/toxin peptides. The entericidin locus is activated in stationary phase under high osmolarity conditions by rho-S and simultaneously repressed by the osmoregulatory EnvZ/OmpR signal transduction pathway. The entericidin locus encodes tandem paralogous genes (ecnAB) and directs the synthesis of two small cell-envelope lipoproteins which can maintain plasmids in bacterial population by means of post-segregational killing [1]. 23.30 23.30 23.30 23.30 23.20 23.10 hmmbuild --amino -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.26 0.73 -6.22 0.73 -4.02 64 1429 2012-10-01 23:27:00 2005-01-14 17:10:06 6 3 892 0 211 521 26 21.00 57 43.49 CHANGED NTspGsGcDlpsuGpAlpcuA .NTsRGhGEDIpcuGsAISpAA..... 0 33 99 154 +7918 PF08086 Toxin_17 Ergtoxin family Lee SC anon Short protein clustering Family This family consists of ergtoxin peptides which are toxins secreted by the scorpions.\ The ergtoxins are capable of blocking the function of K+ channels. More than 100 ergtoxins have been found from scorpion venoms and they have been classified into three subfamilies according to their primary structures [1]. 25.00 25.00 83.10 83.00 23.30 21.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.46 0.72 -3.89 4 27 2012-10-01 23:31:40 2005-01-14 17:11:46 6 1 6 2 0 27 0 41.00 79 93.81 CHANGED DRDSCVDKS+CuKYGYYtQCp-CCKKAGHsGGTChYFKCKC .DRDSCVDKS+CuKYGYYtQCp-CCKKAGcpuGTChaFKCKC 0 0 0 0 +7919 PF08087 Toxin_18 Conotoxin O-superfamily Lee SC anon Short protein clustering Domain This family consists of members of the conotoxin O-superfamily. The O-superfamily of conotoxins consists of 3 groups of Conus peptides that belong to the same structural group. These 3 groups differ in their pharmacological properties: the w-conotoxins which inhibit calcium channels, the delta-conotoxins which slow down the inactivation rate of voltage -sensitive sodium channels and the muO-conotoxins block the voltage sensitive sodium currents [1]. 24.30 24.30 26.10 26.10 22.10 21.30 hmmbuild --amino -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.62 0.72 -4.17 7 30 2012-10-01 22:06:18 2005-01-14 17:12:28 6 1 24 0 0 32 0 31.10 72 56.38 CHANGED hACsETGtsChaS.ECCS..GACSssFsaCL.p .hACTETGRNCpaS.ECCS..GACSAsFsaCL.R.... 0 0 0 0 +7920 PF08088 Toxin_19 Conotoxin I-superfamily Lee SC anon Short protein clustering Family This family consists of the I-superfamily of conotoxins. This is a new class of peptides in the venom of some Conus species. These toxins are characterised by four disulfide bridges and inhibit of modify ion channels of nerve cells. The I-superfamily conotoxins is found in five or six major clades of cone snails and could possible be found in many more species [1]. 23.20 23.20 23.70 23.40 22.30 23.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.93 0.72 -4.05 8 26 2009-01-15 18:05:59 2005-01-14 17:13:11 7 1 5 3 0 29 0 40.30 66 83.17 CHANGED G.s.CtpDt+sCsYHADCCN..CChtGhCpPSTsWI..GCSTus ...G.s.CtKDt+tCsYHADCCN..CCLo..GICtPSTsWI..GCSTu... 0 0 0 0 +7921 PF08089 Toxin_20 Huwentoxin-II family Lee SC anon Short protein clustering Family This family consists of the huwentoxin-II (HWTX-II) family of toxins secreted by spiders. These toxins are found in venom that secreted from the bird spider Selenocosmia huwena Wang. The HWTX-II adopts a novel scaffold different from the ICK motif that is found in other huwentoxins. HWTX-II consists of 37 amino acids residues including six cysteines involved in three disulfide bridges [1]. 25.00 25.00 31.80 51.80 21.20 19.70 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.41 0.72 -4.06 3 92 2009-01-15 18:05:59 2005-01-14 17:13:28 6 1 10 2 0 90 0 38.40 81 47.52 CHANGED lFECoFSCDIEKEG.KPCKPKG..........+KKCSGGWKCKlKLCLKl .LFECSlSCEIEKEG.....NKsCK.............KKKC..KGGWKCKFNMCVKV. 0 0 0 0 +7922 PF08090 Enterotoxin_HS1 Enterotoxin_ST; Heat stable E.coli enterotoxin 1 Lee SC anon Short protein clustering Family Heat-stable toxin 1 of entero-aggregative E.coli (EAST1) is a small toxin. It is not, however, solely associated with entero-aggregative E.coli but also with many other diarrhoaeic E. coli families. Some studies have established the role of EAST1 in some human outbreaks of diarrhoea. Isolates from farm animals have been shown to carry the astA gene coding for EAST1. However, the relation between the presence of EAST1 and disease is not conclusive [1]. 25.00 25.00 73.10 73.10 19.70 17.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.93 0.72 -4.20 2 9 2009-01-15 18:05:59 2005-01-14 17:14:06 6 1 4 0 0 8 0 35.30 91 96.07 CHANGED ShphIR+PASShASCIWCsTACuSs.GRTTKPS.AT STQYIRRPASSYASCIWCATACASCHGRTTKPSLAT 0 0 0 0 +7923 PF08091 Toxin_21 Spider insecticidal peptide Lee SC anon Short protein clustering Domain This family consists of insecticidal peptides isolated from venom of spiders of Aptostichus schlingeri and Calisoga sp. Nine insecticidal peptides were isolated from the venom of the Aptostichus schlingeri spider and seven of these toxins cause flaccid paralysis to insect larvae within 10 min of injection. However, all nine peptides were lethal within 24 hours [1]. 20.10 20.10 20.20 22.50 19.80 19.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.54 0.72 -4.25 2 5 2012-10-01 22:06:18 2005-01-14 17:14:47 6 1 3 0 0 5 0 38.20 57 69.96 CHANGED C.StthPCoNuc-CCuGpCuh.hWsCh..tssCSKpC.u. CISARYPCSNSKDCCSGNCGs.FWTCalRKDPCSKECLAP. 2 0 0 0 +7924 PF08092 Toxin_22 Magi peptide toxin family Lee SC anon Short protein clustering Family This family consists of Magi peptide toxins (Magi 1, 2 and 5) isolated from the venom of Hexathelidae spider. These insecticidal peptide toxins bind to sodium channels and induce flaccid paralysis when injected into lepidopteran larvae. However, these peptides are not toxic to mice when injected intracranially at 20 pmol/g [1]. 25.00 25.00 26.30 26.10 24.60 18.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.41 0.72 -4.12 3 78 2012-10-01 22:06:18 2005-01-14 17:15:22 6 1 8 0 0 79 0 39.20 84 36.06 CHANGED CMGYDIECNEcLP.CCua..LECV+TSGY.WWYK+pYCRRK+ ....CIGEGVPC.D....ENDP.RCCSG..LVCLKPTLHGIWYKSYYCYKK.... 0 0 0 0 +7925 PF08093 Toxin_23 Magi 5 toxic peptide family Lee SC anon Short protein clustering Family This family consists of toxic peptides (Magi 5) found in the venom of the Hexathelidae spider. Magi 5 is the first spider toxin with binding affinity to site 4 of a mammalian sodium channel and the toxin has an insecticidal effect on larvae, causing paralysis when injected into the larvae [1]. 19.60 19.60 20.20 49.90 18.90 18.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.85 0.72 -4.01 5 8 2012-10-01 22:06:18 2005-01-14 17:16:09 6 1 5 3 0 9 0 29.20 50 36.68 CHANGED uClLoph+CSSDK-CCGhTPsCshGlClPs sClLophtCSsDK-CCGhTPsCshGlCsPp 0 0 0 0 +7926 PF08094 Toxin_24 Conotoxin TVIIA/GS family Lee SC anon Short protein clustering Family This family consists of conotoxins isolated from the venom of cone snail Conus tulipa and Conus geographus. Conotoxin TVIIA, isolated from Conus tulipa displays little sequence homology with other well-characterised pharmacological classes of peptides, but displays similarity with conotoxin GS, a peptide from Conus geographus. Both these peptides block skeletal muscle sodium channels and also share several biochemical features and represent a distinct subgroup of the four-loop conotoxins [1]. 25.00 25.00 71.00 71.00 17.80 17.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -7.96 0.72 -4.35 2 2 2012-10-01 22:06:18 2005-01-14 17:16:32 6 1 2 2 0 4 0 31.50 63 98.44 CHANGED uCSGRsSRCPP.CCMGLhCuRG...KCluhat- uCSGRsSRCPP.CCMGLhCuRG...KCluhat- 0 0 0 0 +7927 PF08095 Toxin_25 Hefutoxin family Lee SC anon Short protein clustering Family This family consists of the hefutoxins that are found in the venom of the scorpion Heterometrus fulvipes. These toxins, kappa-hefutoxin1 and kappa-hefutoxin2, exhibit no homology to any known toxins. The hefutoxins are potassium channel toxins [1]. 25.00 25.00 28.30 27.60 17.70 16.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.65 0.72 -6.82 0.72 -4.19 2 4 2009-01-15 18:05:59 2005-01-14 17:17:22 6 1 3 1 0 4 0 22.00 67 67.69 CHANGED GHACYRNCWREGNDEETCKERC GauCYRsCW+tGpDEETCKccC 0 0 0 0 +7928 PF08096 Bombolitin Bombolitin family Lee SC anon Short protein clustering Family This family consists of the bombolitin peptides that are found in the venom of the bumblebee Megabombus pennsylvanicus. Bombolitins are structurally and functionally very similar. They lyse erythrocytes and liposomes, release histamine from rat peritoneal mast cells, and stimulate phospholipase A2 from different sources [1]. 25.00 25.00 26.50 26.50 17.30 16.10 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.39 0.74 -6.37 0.74 -3.51 2 4 2009-09-11 05:23:05 2005-01-14 17:17:58 6 1 1 0 0 4 0 17.00 77 100.00 CHANGED .KIhDILAKLGKVLAHV IKIpDILAKLGKVLAHV 0 0 0 0 +7929 PF08097 Toxin_26 Conotoxin T-superfamily Lee SC anon Short protein clustering Family This family consists of the T-superfamily of conotoxins. Eight different T-superfamily peptides from five Conus species were identified. These peptides share a consensus signal sequence, and a conserved arrangement of cysteine residues. T-superfamily peptides were found expressed in venom ducts of all major feeding types of Conus, suggesting that the T-superfamily is a large and diverse group of peptides, widely distributed in the 500 different Conus species [1]. 25.00 25.00 27.80 27.80 12.90 11.70 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.18 0.73 -6.36 0.73 -3.17 2 2 2009-01-15 18:05:59 2005-01-14 17:18:32 6 1 1 0 0 2 0 11.00 91 100.00 CHANGED FCCPhIRYCCW FCCPhIRYCCW 0 0 0 0 +7930 PF08098 ATX_III Anemonia sulcata toxin III family Lee SC anon Short protein clustering Family This family consists of the Anemonia sulcata toxin III (ATX III) neurotoxin family. ATX III is a neurotoxin that is produced by sea anemone; it adopts a compact structure containing four reverse turns and two other chain reversals, but no regular alpha-helix or beta-sheet. A hydrophobic patch found on the surface of the peptide may constitute part of the sodium channel binding surface [1]. 25.00 25.00 25.60 25.20 17.90 16.00 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.61 0.72 -4.12 2 7 2009-01-15 18:05:59 2005-01-14 17:18:57 6 1 3 1 0 7 0 24.30 76 41.98 CHANGED poCCPC....YhuGCPWGQssa.pGCS KSCCPC...hhpGs.CFWGQNCYPEGCS 0 0 0 0 +7931 PF08099 Toxin_27 Scorpion calcine family Lee SC anon Short protein clustering Family This family consists of the calcine family of scorpion toxins. The calcine family consists of Maurocalcine and Imperatoxin. These toxins have been shown to be potent effector of ryanodyne-sensitive calcium channel from skeletal muscles. These toxins are thus useful for dihydropyridine receptor/ryanodyne receptor interaction studies [1,2]. 25.00 25.00 55.50 55.10 21.60 21.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -7.87 0.72 -4.05 2 5 2012-10-01 22:06:18 2005-01-14 17:20:37 6 1 4 1 0 5 0 33.00 83 60.66 CHANGED GDCLPHLKhCKtspDCCuKKCKRRGTNhEKRCR .GDCLPHLKRCKENNDCCSKKCKRRGTNPEKRCR 0 0 0 0 +7932 PF08100 Dimerisation Dimerisation domain Bateman A anon Pfam-B_455 (Release 16.0) Domain This domain is found at the N-terminus of a variety of plant O-methyltransferases. It has been shown to mediate dimerisation of these proteins [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.30 0.72 -4.13 30 919 2012-10-04 14:01:12 2005-01-16 14:21:20 6 7 155 29 350 965 0 50.20 38 14.45 CHANGED MsLKsAlELGlh-lltptG........phlosSElsupLs....hsPcusshlDRlLRlLu ...........MsLKsAl-LGls-hIpptG...........tshoh.s-ls.u...pLs..........hpPs...p.s...s..h.lcR.lhRlLs........... 0 22 202 291 +7933 PF08101 DUF1708 Domain of unknown function (DUF1708) Mistry J, Wood V, Novatchkova M anon manual Domain This is a yeast domain of unknown function. 20.80 20.80 20.90 21.60 20.40 20.60 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.55 0.70 -5.64 19 144 2012-10-03 21:54:49 2005-01-24 15:49:59 6 4 126 0 112 156 0 412.60 32 37.80 CHANGED pphstccV+pllHhhTtELKsRGhchsalhLPFRPcpssppl.sFlpphFshustt...hptctl.phlpps-shTLhpsLKah..WsRLPsu.lluW-sYppFKhtEp-psa.s+cAFlplhPpslsSsuHusIlYDFhDLLsuIuup....uphNthuGRKl.S+MuuhWAFs.spt.s...................su...FpsGhcpWtpuu-AhhHLhlAaLRShsPcs.pss....+LPpoLpsLL.sspYPPp.psshhpscoh.....pVPhlshps...SpsPapLLcRssc....st.t.Fps+-sashLpshF..phcssh...l.ptLocES+RlLcplo....osp........................................................................pshpsuWuc...................................................pphh...pscsshp.tpthSlscl-lcDaFhWsWhSSLusEpssp+KplFGRshllEscl...sG.pKWlVhpEps ...........................................................................p.pht.cclppll+hhTtELKt...Ru..h.................chPalh....LP..F....RPp......pssst....hcsFlpphF...ststtt..........ps.......ctl..p.c.l....p.h.s-sh...sLssslKah..Wu..RL.................P..........s.....G....l........lsW..-..u.....YphF..+htE....................p...................-upa.s+cAFt.shlPhslsS.su+.spIlaDFFDLlsulAA+.....u+tNuhuGRKL.S+hsuhWAFppscss.........................su...FpsuacsWhpAuDAhpHLhhAaLRS.....hsPcs.tss......pLPhoLpsLlptspY...........P........Pp.psshhpspoh...............pVsh.ls....ss...SssPatLLcRspp.........FphR-s.thLpph....phcssh.....pthocEs+RlLcslo....stp.................................................................................................................................................................................................pshsPuWAc...................................................................................p.th.....-pth.p.tp.h.sslsph-lDD.FhWsWhSSLusEpsst+KthFGRshllEs.l....u.t+WllhpE................................................................................................................................................................................................................................................................................................................................................... 0 32 63 99 +7934 PF08102 Antimicrobial_7 Scorpion antimicrobial peptide Lee SC anon Short protein clustering Family This family consists of antimicrobial peptides secreted by scorpions. Novel antimicrobial peptides have been isolated from scorpions, namely the opistoporin [1] and the pandinin [2]. These peptides form essentially helical structures and demonstrate high antimicrobial activity against Gram-negative and Gram-positive bacteria respectively. 25.00 25.00 27.30 46.10 24.00 23.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -7.94 0.72 -3.94 4 7 2009-01-15 18:05:59 2005-01-25 14:26:24 6 1 5 0 0 8 0 42.00 60 67.59 CHANGED GKVWDWIKSsAKKlWNS-sVppLKspuLNAAKNaVAEKIGATP .G.lWDhIKshAKKlWNS-ssppLKspALNAAKNaVAEKIGATP 0 0 0 0 +7935 PF08103 Antimicrobial_8 Uperin family Lee SC anon Short protein clustering Family This family consists of the uperin family of antimicrobial peptides. Uperin is a wide-spectrum antibiotic peptide isolated from the Australian toadlet, Uperoleia mjobergii. Being only 17 amino acid residues long, it is smaller than most other wide-spectrum antibiotic peptides isolated from amphibians. Uperin adopts a well-defined amphipathic alpha-helix with distinct hydrophilic and hydrophobic faces [1]. 21.40 21.40 24.10 32.00 20.60 16.60 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.13 0.73 -6.25 0.73 -3.70 4 7 2009-01-15 18:05:59 2005-01-25 14:39:32 6 1 2 0 0 7 0 17.00 59 100.00 CHANGED GVGDhhRKlVosIKNVV GVhDhhRKlsoslKNlV 0 0 0 0 +7936 PF08104 Antimicrobial_9 Ponericin L family Lee SC anon Short protein clustering Family This family consists of the ponericin L family of antimicrobial peptides that are isolated from the venom of the predatory ant Pachycondyla goeldii. Ponericin L family shares similarities with dermaseptins. Ponericin L may adopt an amphipathic alpha-helical structure in polar environments and these peptides exhibit a defensive role against microbial pathogens arising from prey introduction and/or ingestion [1]. 25.00 25.00 59.50 59.40 20.60 17.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.73 0.72 -6.75 0.72 -4.15 2 2 2009-01-15 18:05:59 2005-01-25 14:40:06 6 1 1 0 0 2 0 24.00 96 100.00 CHANGED LLKELWTKhKGAGKAVLGKIKGLL LLKELWTKhKGAGKAVLGKIKGLL 0 0 0 0 +7937 PF08105 Antimicrobial10 Metchnikowin family Lee SC anon Short protein clustering Family This family consists of the metchnikowin family of antimicrobial peptides from Drosophila.\ metchnikowin is a proline-rich peptide whose expression is immune-inducible. Induction of the metchnikowin gene expression can be mediated either by the TOLL pathway or by the imd gene product. The metchnikowin peptide is unique among the Drosophila antimicrobial peptides in that it is active against both bacteria and fungi [1]. 25.00 25.00 35.30 34.90 21.40 21.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.55 0.72 -4.34 2 13 2009-01-15 18:05:59 2005-01-25 14:40:48 6 1 12 0 7 13 0 51.90 66 99.70 CHANGED MQLNLGAIFLALLGVMATATSVLAEPHRHQGPIFDTRPSPFNPNQPRPGPIY MQLNL.GAI.FLALLGlhAsussl.huEsHR+QGPI.FDTRPSPFNPNQPR.P.GP.Y.. 0 1 1 4 +7938 PF08106 Antimicrobial11 Formaecin family Lee SC anon Short protein clustering Family This family consists of the formaecin family of antimicrobial peptides isolated from the bulldog ant Myrmecia gulosa in response to bacterial infection. Formaecins are inducible peptide antibiotics and are active against growing Escherichia coli but were inactive against other Gram-negative and Gram-positive bacteria. Formaecin peptides are 16 amino acids long, are rich in proline and have N-acetylgalactosamine O-linked to a conserved threonine [1]. 25.00 25.00 41.80 41.80 14.90 14.30 hmmbuild -o /dev/null HMM SEED 16 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.42 0.73 -6.40 0.73 -3.90 2 2 2009-09-10 15:42:18 2005-01-25 14:41:15 6 1 1 0 0 2 0 16.00 88 100.00 CHANGED GRPNPVNsKPTPaPRL GRPNPVNsKPTPaPRL 0 0 0 0 +7939 PF08107 Antimicrobial12 Pleurocidin family Lee SC anon Short protein clustering Family This family consists of the pleurocidin family of antimicrobial peptides. Pleurocidins are found in the skin mucous secretions of the winter flounder (Pleuronectes americanus) and these peptides exhibit antimicrobial activity against Escherichia coli. Pleurocidin is predicted to assume an amphipathic alpha-helical conformation similar to other linear antimicrobial peptides and may play a role in innate host defense [1]. 21.10 21.10 22.00 21.30 19.20 17.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.17 0.72 -4.41 4 43 2009-01-15 18:05:59 2005-01-25 14:41:37 6 1 23 3 1 50 0 39.40 46 61.12 CHANGED MKhsAhFLVLSLVVLMAEPGEuFltalh+GlhHuGKhIHGhl MKhsAhFLVL.lVVLMAEPGEsha.t.plh+GhhHsG+h.I+th...................... 1 0 0 1 +7940 PF08108 Antimicrobial13 Halocidin family Lee SC anon Short protein clustering Family This family consists of the halocidin family of antimicrobial peptides. Halocidins are isolated from the haemocytes of the tunicate, Halocynthia aurantium. They are dimeric in structures which are found via a disulfide linkage between cysteines of two different- sized monomers. Halocidins have been shown to have strong antimicrobial activities against a wide variety of pathogenic bacteria and could be ideal candidates as peptide antibiotics against multidrug-resistant bacteria [1]. 25.00 25.00 34.60 33.90 21.30 20.00 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.24 0.73 -6.32 0.73 -3.79 2 3 2009-01-15 18:05:59 2005-01-25 14:42:07 6 1 1 0 0 3 0 15.00 100 34.88 CHANGED ALLHHGLNCAKGVLA ALLHHGLNCAKGVLA 0 0 0 0 +7941 PF08109 Antimicrobial14 Lactocin 705 family Lee SC anon Short protein clustering Family This family consists of lactocin 705 which is a bacteriocin produced by Lactobacillus casei CRL 705. Lactocin 705 is a class IIb bacteriocin, whose activity depends upon the complementation of two peptides (705-alpha and 705-beta) of 33 amino acid residues each. Lactocin 705 is active against several Gram-positive bacteria, including food-borne pathogens and is a good candidate to be used for biopreservation of fermented meats [1]. 21.00 21.00 89.90 89.90 19.50 18.30 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.39 0.72 -4.13 2 2 2009-01-15 18:05:59 2005-01-25 14:42:34 6 2 2 0 0 2 0 31.00 100 72.94 CHANGED GMSGYIQGIPDFLKGYLHGISAANKHKKGRL GMSGYIQGIPDFLKGYLHGISAANKHKKGRL 0 0 0 0 +7942 PF08110 Antimicrobial15 Ocellatin family Lee SC anon Short protein clustering Family This family consists of the ocellatin family of antimicrobial peptides. Ocellatins are produced from the electrical-stimulated skin secretions of the South American frog, Leptodactylus ocellatus. The family consists of three structurally related peptides, ocellatin 1, ocellatin 2 and ocellatin 3. These peptides present hemolytic activity against human erythrocytes and are also active against Escherichia coli [1]. 22.70 22.70 23.10 36.00 21.00 22.60 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.27 0.73 -6.55 0.73 -3.64 3 5 2009-01-15 18:05:59 2005-01-25 14:43:01 7 1 3 0 0 5 0 19.00 62 81.20 CHANGED VLDILKGAAKDLLAHlAsK VLDILKGAAKDLLAHlAsK 0 0 0 0 +7943 PF08111 Pea-VEAacid Pea-VEAacid family Lee SC anon Short protein clustering Family This family consists of the PEA-VEAacid neuropeptides family. These neuropeptides are isolated from the abdominal perisympathetic organs of the American cockroach. These peptides are found together with Pea-YLS-amide and Pea-SKNacid, giving a unique neuropeptide pattern in abdominal perisympathetic organs. The functions of these neuropeptides are unknown [1]. 25.00 25.00 44.20 44.10 23.00 19.50 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.31 0.74 -6.14 0.74 -3.46 2 2 2009-01-15 18:05:59 2005-01-25 14:45:27 6 1 1 0 0 2 0 15.00 93 93.75 CHANGED LsLTPGSHVDSYVEA LsLTPGSHVDSYVEA 0 0 0 0 +7944 PF08112 ATP-synt_E_2 ATP synthase epsilon subunit Lee SC anon Short protein clustering Family This family consists of epsilon subunits of the ATP synthase. The ATP synthase complex is composed of an oligomeric transmembrane sector (CF0), and a catalytic core (CF1). CF1 is composed of 5 subunits, of which the epsilon subunit functions as a potent inhibitor of ATPase activity in both soluble and bound CF1. Only when the epsilon inhibition is disabled is high ATPase activity detected in ATPase [1] 25.00 25.00 25.30 25.30 24.70 21.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.81 0.72 -3.90 2 15 2009-01-15 18:05:59 2005-01-25 14:48:08 6 1 15 0 4 6 0 50.40 71 92.87 CHANGED Msp......DKYlpIL+spL-pKKsElLppINMEYEKhLKpRLspL-clKtplLKE .....MDQ......DKYLQILRSSLEEKKSEILKNVNAEYEKLLKNRLNQLDEVKRKVLKE 0 1 1 3 +7945 PF08113 CoxIIa Cytochrome c oxidase subunit IIa family Lee SC anon Short protein clustering Family This family consists of the cytochrome c oxidase subunit IIa family. The bax-type cytochrome c oxidase from Thermus thermophilus is known as a two subunit enzyme. From its crystal structure, it was discovered that an additional transmembrane helix 'subunit IIa' spans the membrane. This subunit consists of 34 residues forming one helix across the membrane. The presence of this subunit seems to be important for the function of cytochrome c oxidases [1]. 20.70 20.70 23.20 22.60 20.20 20.00 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.39 0.72 -4.23 2 13 2009-01-15 18:05:59 2005-01-25 14:49:03 6 1 13 27 5 11 0 34.30 59 88.49 CHANGED MEEKPKGALAVILVLTLTILVFWLGVYAVFFARG .......MEEK...PpGALuVIhVLTlTILVFWhGVaAlFhARG. 1 1 4 5 +7946 PF08114 PMP1_2 ATPase proteolipid family Lee SC anon Short protein clustering Family This family consists of small proteolipids associated with the plasma membrane H+ ATPase. Two proteolipids (PMP1 and PMP2) are associated with the ATPase and both genes are similarly expressed in the wild-type strain of yeast with no modification of the level of transcription of one PMP gene is detected in a strain deleted of the other. Though both proteolipids show similarity with other small proteolipids associated with other cation -transporting ATPases, their functions remain unclear [1]. 22.10 22.10 22.30 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.22 0.72 -4.47 2 29 2009-01-15 18:05:59 2005-01-25 14:51:50 6 1 21 0 18 21 0 39.90 63 68.42 CHANGED M...TLPGGVILVFILVGLACIAIIuTIIYRKWQARQRGLQRF ...........LPGGVILVFILVGLAsIAIluTIIYRKWQARQRuLQRF..... 0 3 8 16 +7947 PF08115 Toxin_28 SFI toxin family Lee SC anon Short protein clustering Family This family consists of the SFI family of spider toxins. This family of toxins might share structural, evolutionary and functional relationships with other small, highly structurally constrained spider neurotoxins. These toxins are highly selective agonists/antagonists of different voltage-dependent calcium channels and are extremely valuable reagents in the analysis of neuromuscular function [1]. 25.00 25.00 39.90 39.90 23.40 18.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.96 0.72 -4.29 2 10 2009-01-15 18:05:59 2005-01-25 14:53:16 6 1 1 0 0 10 0 33.30 79 79.10 CHANGED +psMsDEoVCYIpspNsssGpCLp.pusaAcPWEh .KECMsDGTVCYIHNHNDCCGSCLC.NGPlARPWEM. 0 0 0 0 +7948 PF08116 Toxin_29 PhTx neurotoxin family Lee SC anon Short protein clustering Family This family consists of PhTx insecticidal neurotoxins that are found in the venom of Brazilian, Phoneutria nigriventer. The venom of the Phoneutria nigrivente contains numerous neurotoxic polypeptides of 30-140 amino acids which exert a range of biological effects. While some of these neurotoxins are lethal to mice after intracerebroventricular injections, others are extremely toxic to insects of the orders Diptera and Dictyoptera but had much weaker toxic effects on mice [1]. 25.00 25.00 68.20 68.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.79 0.72 -4.16 4 6 2009-01-15 18:05:59 2005-01-25 14:55:43 6 1 4 0 0 6 0 31.00 72 96.88 CHANGED AFC+aNGQQCTSDGQCCpG+ChTAFhG+ICM sFCRaNGQQCTSDGQCCaG+C+TAFhG+ICM 0 0 0 0 +7949 PF08117 Toxin_30 Ptu family Lee SC anon Short protein clustering Family This family consists of toxic peptides that are isolated from the saliva of assassin bugs. The saliva contains a complex mixture of proteins that are used by the bug either to immobilise the prey or to digest it. One of the proteins (Ptu1) has been purified and shown to block reversibly the N-type calcium channels and to be less specific for the L- and P/Q- type calcium channels expressed in BHK cells [1]. 24.50 24.50 24.90 79.10 24.20 23.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.03 0.72 -3.92 2 3 2012-10-01 22:06:18 2005-01-25 14:56:59 6 1 3 2 0 3 0 34.70 57 99.05 CHANGED A-cDCls.Gu.ChGpsK.CCp.+shC..YAN+C..l AD-DCLPRGSKCLGENKQCCc.+TTCMFYANRCVGl. 0 0 0 0 +7950 PF08118 MDM31_MDM32 Yeast mitochondrial distribution and morphology (MDM) proteins Mistry J, Wood V anon Pfam-B_37122 (release 16.0) Family Proteins in this family are yeast mitochondrial inner membrane proteins MDM31 and MDM32.\ These proteins are required for the maintenance of mitochondrial morphology, and the stability of mitochondrial DNA [1]. 18.20 18.20 38.10 18.40 17.20 17.70 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.66 0.70 -6.04 5 247 2009-01-15 18:05:59 2005-01-25 16:36:47 6 4 139 0 186 252 0 358.20 32 76.81 CHANGED cRDQLLAQAoNhauRLRIRLKWhLKRS.NRPFNTDDISAFlSWlLVSNsLLlFLuTTTFlSLVIYLhNTVFAQEYVAcKlGNFLTKNSALTVVFESAIVPDWSSGKISF+KVFVSRRPKpscuFoKGSQ+EAsERAKLALSEsLLVscE-FDDGNYTQFDLTIDQV-ISLSLsKWINGKGhlDEVpINGLRGVVDRTHVaWKssDDARNYKNVHQPGDFEISsF+MNDVLFTLYQPuGFRPFpVSIFNCELPQLRKHWLFYDFLNANsMSGoYDNSMFTIHKKa+pcDhsc...psSsSssW+KVTRMRVDSLNIDHLNAGlEGPFGWITSGKVDMIGDVLLP--NtDslsLSELLTlIuDRIlKEA+RYpNhlPtpKs-pPDI..........DlccYFVMDFoLRLNNVRAcVPLFTPELSYINNALIRPIVGYINSKRTYIPI+CRVVKNLsDFuGSWTIYDShLMDDLSAEVYDAFA-YVAD-EpRslRMKRVGFWSLQLLlQLILhSLGAIA .........................................................+tphL..spshhp+hhlph+Wh.h+s..Rsas.--..h..sshhSWhhhuphhhhhluTTTFhuhhhh.h....N.....oh....p-hls...t.h.l.....uphhs......t...hph.hpp.shhst.ht.....pshI.hp.ps.l...................................................................................................................................................................................................................p..hhpa-lplpplsloLSh.pWhpGpGhlpphpl.GlRG...........hl...........shp.................p..h..........t..................s..............h.....t.h...hp.....hp......s.......sa.EhpphphpDhhhplhpst.s.h.ps.....hphSIashclspLRtpalhhDhhsAp.hsGshssShFol.....H.pQ...h.......t.........t..........................................a.pp.........hsRhRlDslplscLp..t.....sh...p.ushsWIhpGpl-hluDlhhPt.p....t..p...................t.....th....h..........h............................................................................................................................p.....hhh.hshplph.sl+A.hP................ths.....hhs.s.....l.....RslluahN............s...p....pp.....l.lpsph...htphts..ph.pshhhp.h..th...................Ytth...l...pt...p..p.c....Ws.p..............tt.................................................................... 0 51 109 166 +7951 PF08119 Toxin_31 Scorpion acidic alpha-KTx toxin family Lee SC anon Short protein clustering Family This family consists of acidic alpha-KTx short chain scorpion toxins. These toxins named parabutoxins, block voltage-gated K channels and have extremely low pI values. Furthermore, they lack the crucial pore-plugging lysine. In addition, the second important residue of the dyad, the hydrophobic residue (Phe or Tyr) is also missing [1]. 25.00 25.00 90.40 90.30 22.30 21.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -8.05 0.72 -4.42 2 3 2009-01-15 18:05:59 2005-01-25 16:49:08 6 1 2 0 0 3 0 36.70 93 100.00 CHANGED DEEPKEoCSDEMCVIYCKGEEYSTGVCDGPQKCKCSD DEEPKETCSDEMCVIYCKGEEYSTGVCDGPQKCKCSD 0 0 0 0 +7952 PF08120 Toxin_32 Tamulustoxin family Lee SC anon Short protein clustering Family This family consists of the tamulustoxins which are found in the venom of the Indian red scorpion (Mesobuthus tamulus). Tamulustoxin shares no similarity with other scorpion venom toxins, although the positions of its six cysteine residues suggest that it shares the same structural scaffold. Tamulustoxin acts as a potassium channel blocker [1]. 25.00 25.00 97.40 97.30 16.70 16.70 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.97 0.72 -4.35 2 2 2009-01-15 18:05:59 2005-01-25 16:50:44 6 1 1 0 0 2 0 35.00 97 100.00 CHANGED RCHFVlCTTDCRRNSPGTYGECVKKEKGKECVCKS RCHFVlCTTDCRRNSPGTYGECVKKEKGKECVCKS 0 0 0 0 +7953 PF08121 Toxin_33 Waglerin family Lee SC anon Short protein clustering Family This family consists of the lethal peptides (waglerins) that are found in the venom of Trimeresurus wagleri. Waglerins are 22-24 residue lethal peptides and are competitive antagonist of the muscle nicotinic receptor (nAChR). Waglerin-1 possesses a distinctive selectivity for the alpha-epsilon interface binding site of the mouse nAChR [1]. 25.00 25.00 25.70 57.80 24.10 15.40 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.73 0.72 -6.95 0.72 -4.35 2 2 2009-01-15 18:05:59 2005-01-25 16:51:09 6 1 1 0 0 2 0 22.00 95 91.67 CHANGED GGKPDLRPCaPPCHYIPRPKPR GGKPDLRPCaPPCHYIPRPKPR 0 0 0 0 +7954 PF08122 NDUF_B12 NADH-ubiquinone oxidoreductase B12 subunit family Lee SC anon Short protein clustering Family This family consists of the NADH-ubiquinone oxidoreductase B12 subunit proteins. NADH is the central source of electrons in the mitochondrial and bacterial respiration. NADH-ubiquinone oxidoreductase is involved in the transfer of electrons from NADH to the electron transport chain. This oxidation of NADH is coupled to proton transfer across the membrane, generating a proton motive force that is utilised for the synthesis of ATP. The function of this subunit is unclear [1]. 22.70 22.70 22.70 23.40 22.60 22.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.98 0.72 -4.23 13 205 2009-01-15 18:05:59 2005-01-25 17:17:10 7 3 190 0 146 193 0 56.50 39 58.04 CHANGED LRDPWuRNEAWRY.hssFuppho.hhsshF+GFtaG.FuAFVlslulE.....ahhtssc+sp.H ....+DPWtRsEAWRY..pGsFu.c....hs......hhcshh.....Ghshu.hs.AFs.shluhE......ah.l..p.sp..c+p................. 0 49 73 111 +7955 PF08123 DOT1 Histone methylation protein DOT1 Mistry J, Wood V anon Pfam-B_12064 (release 16.0) Domain The DOT1 domain regulates gene expression by methylating histone H3 [1]. H3 methylation by DOT1 has been shown to be required for the DNA damage checkpoint in yeast [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.18 0.71 -5.02 11 554 2012-10-10 17:06:42 2005-01-25 17:38:26 8 19 328 15 391 1008 656 161.00 26 26.32 CHANGED YsRuV.PcspcL+.cYcuFSspVYGELhPsFloslhpcssLsssclFhDLGSGVGNsVlQAALEhGCchSaGCElM-sASclAEtQhcEhcp+hphaGh+hsplcat.+tSFlsN-clcpllspsDVlLVNNFhFDspLNppl.pchltsLKsGsKIISLKshtsssap.Is.csh-sIhshL+Vpchshscs.SVSWTsps.ssYYISTl ..............................................................sYGEh...............t...hl.sp....l...h....p....p.h....p......l...p.p..........s..s..........l.FlDLGSGlGp...ll............hQsA.hph.s.sc........s..hGlEh.h........pp.h..s.p..h...A.........pt........t..t..h..p....t.........h.h....t......h.h...s....h..t......h.....php....h...ps....s..hh.p....t.........h.................h.......tp...ss..llhhN..s...........h..h......F..s...........p.l.t....tl.....p.h.h.......p.h...t.....G.sp.l.l.o...p...hh........................................................................................................................................................................................................................................................ 1 140 218 354 +7956 PF08124 Lyase_8_N Polysaccharide lyase family 8, N terminal alpha-helical domain Mistry J anon Pfam-B_2438 (release 16.0) Family This family consists of a group of secreted bacterial lyase enzymes EC:4.2.2.1 capable of acting on hyaluronan and chondroitin in the extracellular matrix of host tissues, contributing to the invasive capacity of the pathogen. 24.00 24.00 24.90 24.20 21.40 23.90 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.96 0.70 -5.56 26 980 2012-10-02 15:11:41 2005-01-31 09:02:22 6 52 764 44 96 695 5 312.20 30 35.54 CHANGED Wtshl......hGsp.hsssssshtshhpphsppup.p.hsshp...psspshLWpcls........tpssoupl..Tssap+LpphApAappPsSshapssslhssIhcuLcahppphYssspp.................phG.NWWcWpIGsPpulsshLlLha-tlo...psclssYssslc+FsP-P.............sht....hpuTGANpsDhupshllcGlLpcDssclppuhcuLss........VashVspG......DGFYpDGSaIQH...........sslsYTGuYGsVLlcGlupLhsllpsosaslss.phsslachlccuahPllhcGtMMDhlRGRuISRtssp.........s+..stGtsllpullhl.uphussspp..tchpuhlKs ................................................................................W.sh.hG..p.ac..pps.phtphhpt..h-p.....c.s...t.....p.....hlpshpp....pss.Rs..h..L.Wpshp...............................ppS.ush....Tpoa+plcchApsh..p....sP.....s....op..h.a..p-....pplhptlh-ul-ah.pcphYs.s..p.s......................hu..NWWDaE.........IGsP+ulssTLhLhp...-.h..ho..............s-.chpp..ao.ss.Icp...FsPDs................tthh..ohs....shtu.......pG..uNhlDhu..+s..hlhpull....p.c.......Ds.p.p......lpp...ulc...ulsp...............V..F..ph...Vs..pu...................-.GFYpDGSaIpH..............ssVsYTG.uYGs.V.L...lcGluplhsllpt.....T....tas..h..s..spp..pslhpal-c..u.FhPll.hp.G.....ch.....h.D.h.s.RGRu.ISRtspp.........u+.....stuh.plhpulhhl.uc.h..sppp..tchpphlK.................................................................................. 0 54 80 93 +7957 PF08125 Mannitol_dh_C Mannitol dehydrogenase C-terminal domain Bateman A anon Prosite Domain \N 20.60 20.60 20.80 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.61 0.70 -5.20 16 5053 2012-10-02 19:36:47 2005-01-31 18:33:56 8 18 2344 4 791 3180 477 232.20 31 52.45 CHANGED ppVuFPsshVDRIVPt...........thuhcDPhsVssEPFhpWVlEcshhpG.ss...hchsGsphVsclpPY.EhKLhhLNuuHuslAYlGaLtGhphlcEuhpDtplpthlcshhtEphtsllshh...sts-LpsYtspllpRFpNPhIpDplpRluhs.....PhRhLsshcchlt...shhchhshs.tsahphlpGlstshphhp.sDsputplpt..........hhspcpspshLu.hshhtt-h.pssph ..............................................pVsFssohVDRIVPs...........s....t...........h...............h....................p...........t...........h.....G.h.p..D............s............h.........................V............s............s............E......s.F..t..........p............W....VlE..c.s.h..h.....t.G...p..s...............h..c.h.....s..G....sp....h..V...s...D...l...h...PacEh..KL.t.h.LNuuHoh.lA....Yl...Gh....L...u..G...a...p....pIt-shp.D.sth.......cthsc.t...hhh...c.-tt...s.h..l.ph........sss...-......h.psYt.p.....p.l.l.t.RF..pN...P...hlccpshplAhss.pKh..P..h..RhLssh.c.thlpt....t.....s..........h....t....h..L....s.....h.....u....Au.a....h.pa...l..........p......G..........l...........s..........t............t.........t.........p..........h.........h.s..........s..D..s.....h......phtp....................h...s...p...p.t.....sp....s....h....Lu.....shhsts.......h.................................................................. 1 196 439 611 +7958 PF08126 Propeptide_C25 Propeptide_C25 Mistry J anon Rawlings N Motif This is found at the N terminal end of some of the members of the C25 peptidase family (PF01364). Little is known about the function of this motif. 20.30 20.30 22.30 20.70 18.90 18.00 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.23 0.71 -5.32 4 56 2009-01-15 18:05:59 2005-02-01 17:12:27 6 18 27 0 24 60 111 185.60 21 14.11 CHANGED QsschttsPs....lphltssppSho+lpFc..hsplpFTpVpTpcG..shQssohstusshuEhGpPpLPlLp+.lAVs-..Ttsh+Vclhoochh-ppsl...hlsPopssh.+sEsP-plPY......l.upuYupstFhPuElsslspshhhRsVRhtsVshsPlQYNPVsppL+lhscIpVsVS.su.....ppspshhshhcsSsFssFEssYKphF ......................................................................................................................tts.....hp..hp..h.th.htphp.h.tt..hht.hshst.sh.....hsch.G.PpLPh.hpp.lulPs...s.hp..hpVps.ss.sp.h..hph.psh....hlhP.sps..h.csp.s..Pp...p.lsa.........hpspsYspspaaPsplsplspshhhRsh+stslshtPlQYNPVoppL+l....hsplplplohpu........tpt.t................................................ 0 17 22 23 +7959 PF08127 Propeptide_C1 Peptidase family C1 propeptide Mistry J anon Rawlings N Motif This motif is found at the N terminal of some members of the Peptidase_C1 family (Pfam:PF00112) and is involved in activation of this peptidase [1]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.83 0.72 -4.33 60 407 2009-01-15 18:05:59 2005-02-02 09:21:42 8 5 183 5 123 418 2 41.70 37 13.35 CHANGED LS--hIphINp.p.ssTWKAG+N..F.tshohspl+pLhGs.h.sss .....LS--hlshINc.p.ssTW+AG+N...F...shs....hs.h+pLhGs.hhps........... 0 61 73 99 +7961 PF08129 Antimicrobial17 Alpha/beta enterocin family Lee SC anon Short protein clustering Family This family consists of the alpha and beta enterocins and lactococcin G peptides. These peptides have some antimicrobial properties; they inhibit the growth of Enterococcus spp. and a few other gram-positive bacteria. These peptides act as pore- forming toxins that create cell membrane channels through a barrel-stave mechanism and thus produce an ionic imbalance in the cell. These family of antimicrobial peptides belong to the class II group of bacteriocin [1]. 25.00 25.00 26.80 26.50 24.20 23.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.73 0.72 -4.19 2 7 2012-10-02 23:56:30 2005-02-22 13:49:07 6 1 4 2 0 8 0 46.40 57 90.78 CHANGED MKQYKVLNEKEMKKsIGGESVFSKIGNAVGPAAYWILKGLGNMSDVNQADRINRKKH .......K.LsEKEh+pslGG....scIGpulG.sAYWlhKuhGNMSDVNQAsRINRKK.t.............. 0 0 0 0 +7962 PF08130 Antimicrobial18 Type A lantibiotic family Lee SC anon Short protein clustering Family This family consists of the type A lantibiotic peptides. Both Pep5 and epicidin-280 are ribosomally-synthesised antimicrobial peptides produced by Gram-positive bacteria that are characterised by the presence of lanthionine and/or methyllanthionine residues. The lantibiotics family has a highly specific activity against multi- drug resistant bacteria and has potential to be utilised in a wide range of medical applications [1,2]. 25.00 25.00 31.80 31.60 24.00 23.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.98 0.72 -4.09 2 6 2009-09-11 06:04:39 2005-02-22 13:49:58 6 1 4 0 0 4 0 53.30 58 100.00 CHANGED McNpKsLFDLEIKK-s.pNssELEsQohGPAI+Aohp.C....KATRhhTVSCK.KssCp M...NKELFDLDINK.pMEsPTEMTsQThGTslKVS+ulCK...puTCIsTISCo...NC.K 0 0 0 0 +7963 PF08131 Defensin_3 Defensin-like peptide family Lee SC anon Short protein clustering Family This family consists of the defensin-like peptides (DLPs) isolated from platypus venom. These DLPs show similar three-dimensional fold to that of beta-defensin-12 and sodium-channel neurotoxin Shl. However the side chains known to be functionally important to beta-defensin-12 and Shl are not conserved in DLPs. This suggests a different biological function. Consistent with this contention, DLPs have been shown to possess no anti-microbial properties and have no observable activity on rat dorsal-root-ganglion sodium-channel currents [1]. 17.70 17.70 17.70 18.00 17.50 17.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.20 0.72 -3.98 2 9 2012-10-01 20:50:19 2005-02-22 13:51:53 6 1 3 4 8 11 0 38.40 44 68.38 CHANGED ac.psC.ShsGVCRcKsphNC+.hhhs.C.NcpQKCCch .......ptCpshuGVCRcKss+NC+sIhhs.CcNRNp+CCc... 1 0 0 6 +7964 PF08132 AdoMetDC_leader S-adenosyl-l-methionine decarboxylase leader peptide Lee SC anon Short protein clustering Family This family consists of the S-adenosyl-l-methionine decarboxylase (AdoMetDC) leader peptides. AdoMetDC is a key regulatory enzymes in the biosynthesis of polyamines. All expressed plant AdoMetDC mRNA 5' leader sequences contain a highly conserved pair of overlapping upstream ORFs (uORFs) that overlap by one base. Sequences of the small uORFs are highly conserved between monocot, dicot and gymnosperm AdoMetDC mRNA species, suggesting a translational regulatory mechanism [1]. 25.00 25.00 55.60 55.00 17.20 17.00 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.69 0.72 -4.03 3 44 2009-01-15 18:05:59 2005-02-22 13:56:19 6 3 24 0 23 40 0 50.70 78 40.31 CHANGED MESKGGKKKSSSSSSpsoLFFEAPLGYSIEDVRPNGGIKKFRSAAYSNCo+KPS ........MESKGGKKKSSSSsS...LhYEAPLGYSIEDVRPsGGIKKFRSAAYSNCu++PS..... 0 4 18 23 +7965 PF08133 Nuclease_act Anticodon nuclease activator family Lee SC anon Short protein clustering Family This family consists of the anticodon nuclease activator proteins. Pre-existing host tRNAs are reprocessed during bacteriophage T4 infection of certain Escherichia coli strains. In this pathway, tRNA(Lys) is cleaved 5' by the anticodon nuclease to the wobble base and is later restored in polynucleotide kinase and RNA ligase reactions [1]. 25.00 25.00 36.50 36.30 18.50 16.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -7.23 0.72 -4.45 2 15 2009-01-15 18:05:59 2005-02-22 13:59:57 6 1 14 0 0 9 0 25.40 82 96.21 CHANGED MSNFHNEHVMQFYRNNLKshGlhGhp .MSNFHNEHVMQFYRNNLKTKGVFGRp.. 0 0 0 0 +7966 PF08134 cIII cIII protein family Lee SC anon Short protein clustering Family This family consists of the cIII family of regulatory proteins. The lambda CIII protein has 54 amino acids and it forms an amphipathic helix within its amino acid sequence. Lambda cIII stabilises the lambda cII protein and the host sigma factor 32, responsible for transcribing genes of the heat shock regulon [1]. 25.00 25.00 64.30 64.20 23.90 23.20 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.28 0.72 -4.36 3 42 2009-01-15 18:05:59 2005-02-22 14:20:08 6 1 42 0 2 15 0 43.80 89 98.29 CHANGED MMHFQLAGSGVMSAFYPHESELSRRVKQLIRAAKKQLEALCAMK MMHFQLAGSGVMSAFYPHESELSRRVKQLIRAAKKQLEALCAMK.... 0 0 0 1 +7967 PF08135 EPV_E5 Major transforming protein E5 family Lee SC anon Short protein clustering Family This family consists of the major transforming proteins (E5) of the bovine papilloma virus (BPV). The equine sarcoid is one of the most common dermatological lesion in equids. It is a benign, locally invasive dermal fibroblastic lesion and studies have shown an association of the lesions with BPV. E5 is a short hydrophobic membrane protein localising to the Golgi apparatus and other intracellular membranes. It binds to and constitutively activates the platelet-derived growth factor-beta in transformed cells. This stimulation activates a receptor signaling cascade which results in an intracellular growth stimulatory signal [1]. 25.00 25.00 37.10 36.90 20.90 20.30 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.50 0.72 -3.97 3 23 2009-01-15 18:05:59 2005-02-22 14:21:20 6 1 6 0 0 22 0 42.30 82 99.79 CHANGED Msa.GLLLFLGLTFAlQLLLLVFLLFFFLVWWDQFGCRC-Gh.L MPNLWFLLFLGLVAAMQLLLLLFLLLFFLVYWDHFECSCTGLPF. 0 0 0 0 +7968 PF08136 Ribosomal_S22 30S ribosomal protein subunit S22 family Lee SC anon Short protein clustering Family This family consists of the 30S ribosomal proteins subunit S22 polypeptides. This polypeptide is 47 amino acids in length and has a molecular weight of about 5 kDa. The S22 subunit is a component of the stationary-phase-specific ribosomal protein and is assembled in the ribosomal particles in the stationary phase. This subunit along with other stationary-phase-specific ribosomal proteins result in compositional changes of ribosomes during the stationary phase. The significance of this change is not clear as yet [1]. 25.00 25.00 26.00 25.30 24.00 23.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.05 0.72 -3.81 3 402 2009-12-01 13:51:23 2005-02-22 14:28:17 6 1 400 0 14 34 0 44.90 88 97.59 CHANGED MKSNRQARHILGLDYKLSNQRKVVIEGDsEoVVTHATGRKRHA-K ..MKSNRQARHILGLDHKISN.QRKIVTEGDKSSVV..N..N..PTGRKRPAEK............ 0 1 2 8 +7969 PF08137 DVL DVL family Lee SC anon Short protein clustering Family This family consists of the DVL family of proteins. In a gain-of-function genetic screen for genes that influence fruit development in Arabidopsis, DEVIL (DVL) gene was identified. DVL is a small protein and overexpression of the protein results in pleiotropic phenotypes featured by shortened stature, rounder rosette leaves, clustered inflorescences, shortened pedicles, and siliques with pronged tips. DVL family is a novel class of small polypeptides and the overexpression phenotypes suggest that these polypeptides may have a role in plant development [1]. 19.60 19.60 19.70 22.30 18.50 19.20 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.20 0.72 -6.21 0.72 -4.48 20 182 2009-09-11 09:54:20 2005-02-22 14:32:13 7 4 18 0 129 166 0 19.00 66 23.47 CHANGED KEQRuRhYIlRRClsMLlC .KEQRARhYIlRRCVsMLlC... 0 24 81 101 +7970 PF08138 Sex_peptide Sex peptide (SP) family Lee SC anon Short protein clustering Family This family consists of Sex Peptides (SP) that are found in Drosophila. On mating, Drosophila females decreases her remating rate and increases her egg-laying rate due, in part, to the transfer of SP from the male to the female. SP are found in seminal fluids transferred from the male to the female during mating. The male seminal fluid proteins are referred to as accessory gland proteins (Acps). The SP is one of the most interesting Acps and plays an important role in reproduction [1]. 25.00 25.00 53.90 53.80 19.80 19.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.08 0.72 -4.12 5 23 2009-01-15 18:05:59 2005-02-22 14:41:42 6 1 10 1 2 24 0 52.70 57 99.51 CHANGED M+sPl.LhLllL..LlGlAhuh...hscRsc.....susIhGP+DRcKWCRLNLGPAWGGRsC ....MKs.hhhLlLVh..lLGLs.uhpWPhs++ss.....+hsI.SPpsR-KWCRLNLGPAWGGR.C 0 2 2 2 +7971 PF08139 LPAM_1 VirB; Prokaryotic membrane lipoprotein lipid attachment site Lee SC, Bateman A anon Short protein clustering Motif In prokaryotes, membrane lipoproteins are synthesized with a precursor signal peptide, which is cleaved by a specific lipoprotein signal peptidase (signal peptidase II). The peptidase recognizes a conserved sequence and cuts upstream of a cysteine residue to which a glyceride-fatty acid lipid is attached [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.71 0.73 -7.05 0.73 -3.54 71 711 2012-10-01 23:27:00 2005-02-22 14:43:14 7 16 567 0 60 226 22 24.40 62 12.78 CHANGED M...................................++hhhhhhhhh...LuuCuo .....................................................MhKKILFPLlALFh..LAGCAp. 0 6 22 43 +7972 PF08140 Cuticle_1 Crustacean cuticle protein repeat Lee SC anon Short protein clustering Family This family consists of the cuticle proteins from the Cancer pagurus and the Homarus americanus. These proteins are isolated from the calcified regions of the crustacean and they contain two copies of an 18 residue sequence motif, which thus far has been found only in crustacean calcified exoskeletons [1]. 21.40 21.40 21.40 26.00 21.10 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -7.95 0.72 -4.13 38 90 2009-01-15 18:05:59 2005-02-22 14:44:51 6 6 6 0 0 82 0 40.00 45 71.21 CHANGED GsSGllhsDGp.hQhstsh..sl..lLhGPSGhVhusGcslQh .....G.SGllhsDGp.hQhstsh..sl..llhGPSGhVhusGcNlQh...... 0 0 0 0 +7973 PF08141 SspH Small acid-soluble spore protein H family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) of the H type (sspH). SspH are unique to spores of Bacillus subtilis and are expressed only in the forespore compartment during sporulation of this organism. The sspH genes are monocistronic and are recognised by the forespore-specific sigma factor for RNA polymerase - sigma-G. The specific role of this protein is unclear but is thought to play a role in sporulation under conditions different from that of the common laboratory tests of spore properties [1]. 25.00 25.00 51.10 51.00 21.30 17.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.72 0.72 -3.92 20 329 2009-01-15 18:05:59 2005-02-22 14:48:28 7 1 194 0 59 184 1 57.70 46 94.31 CHANGED M.-spRAKcIlsSsshlsVoYpGsPVaI-cVs.EpscsApla.l.ssspccpcVslssLcE ..MslpRAKpIhsSspphsVoa.pG.hPVaIppVD.EpspsAplYpl.sN.PscchpVsVssLcE....... 0 19 39 43 +7974 PF08142 AARP2CN AARP2CN (NUC121) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is the central domain of AARP2. It is weakly similar to the GTP-binding domain of elongation factor TU [1]. 21.50 21.50 21.50 25.10 21.20 21.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.67 0.72 -4.45 73 707 2009-01-15 18:05:59 2005-02-22 15:35:56 7 33 318 0 470 699 8 82.40 35 9.33 CHANGED -htNLhRhlssh+..+slsWRss+sYllu-chch...hsssp..t................................plslhGalRGss.hss..sphVHIsGhGDFplspI.ptlsDPs ......................................EhpNLhRhlsshK..+.slpW..Rss+sYlLu-chc.....hsssppht............................................tslslhGYlRGps....Lps.....s..........phVHIsGhG..DFplspl.phhsDP............................ 0 170 265 390 +7975 PF08143 CBFNT CBFNT (NUC161) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in proteins of CARG-binding factor A-like proteins [1]. 21.70 21.70 21.80 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.89 0.72 -2.88 16 125 2009-01-15 18:05:59 2005-02-22 15:43:46 6 4 36 2 37 95 0 66.10 44 21.96 CHANGED MS...E-Qhhpss..st.....sGpcuss-ppus...tshtGs.u..............................sussssAEGspI-ASKNEEDsGK .........................MS...EpQ.htss..st.....sG..uss-tpush..ssttussu....................uG...tsusssss...sGssstAEGspIsASKNEEDtGK. 0 2 7 16 +7976 PF08144 CPL CPL (NUC119) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is fund in Penguin-like proteins associated with Pumilio like repeats [1]. 20.70 20.70 20.70 21.40 20.50 20.30 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.72 0.71 -4.36 8 270 2009-01-15 18:05:59 2005-02-22 15:44:06 6 5 231 0 193 266 1 141.50 26 20.63 CHANGED lssKYGRKVLLYLLuPRDssHFhPEIIclLccGD....sNAaSKKDsplRR+ELLEuISPsLLchls-cup-llhDpussllVuDILssssGDspsshs.AlAplAsp-h.suuh-G-hHIuccPAGHhsLKhLIpQD+chtEsGKEspFu+s .....................................h..spauR+slLY.Lls.s..t......sstah...s......s...p....h...l.p.h.L.pcsc............tsspSKKDspl...RRpELl..c...uho...P...s....LLphl....sp.....p.....s.........p.........pllp.s....sh....us.hls.-........l.Lh..........s......u.s..........G...........-.p.p..s.s.hp....A.lAph.s.....sp..p.........................t..............t..........t..t...p..................+hh.pp.shuthhL.KhLlptsp....................................................................................... 0 59 99 157 +7977 PF08145 BOP1NT BOP1NT (NUC169) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in BOP1-like WD40 proteins [1]. 25.00 25.00 25.30 25.40 21.80 24.50 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.59 0.70 -4.65 38 353 2009-01-15 18:05:59 2005-02-22 15:44:34 7 9 289 0 249 359 9 244.50 45 35.02 CHANGED Y--hsHIGYDlsGKKIhKst.ppstLDphLcsh-.cPcs..........W...RslhDthsscslpLoc--Lcll+Rlppuchsspsa-sYpshl-aFs..tcttlh..PlossP-P.KR+FlPS.KtEtK+lhKlV+AIRpGcIpspc.pcc........cpphYDlWscc.......tppptpphtclsAPKhsLPGap.ESYNPPsEYL.sccEcppWpph..c.c-RchpFlPp+asuLRpVPuYpchlpERFERCLDLYLsPRs++p+lsl.DPEsLlPKLPsP+-L+PFP ...................Y--hsHIGYDlsGK+Ih+Pt....pt-tLDphL-ph-.sP.c.h..........W...pslhD.toG.pslpLoc..--lcLlc+lp.psc..hscssas...PY...psh.l-aFo..tc.tlh..Pl.os.tP-s.KRpFlPS.+h.Et+..+VhKlV+AI+tGhIhs.+.hcp.............p.phYDLWus-...........p.ts.cphhalPAPKhs.PG.at.ESYNPPsEYL.sccEcptWppt.......-sp-R..........ch.pFlPp+asuLRpVPuYpchlcE......RFERCLDLYLsPRs..R...+....+.lNl.D.P.-sLlP+LPsPc-L+PFP............... 0 96 148 210 +7978 PF08146 BP28CT BP28CT (NUC211) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in BAP28-like nucleolar proteins [1]. 20.50 20.50 20.80 23.40 20.40 18.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.80 0.71 -4.49 29 300 2009-01-15 18:05:59 2005-02-22 15:45:25 7 10 246 0 210 299 0 152.00 29 8.80 CHANGED Kpshsspps.lhchhLpAhDhRppp.........................phstpslsclEsplhss.hlphlhKLs-ssFRPLFhcLhcWAhst...tt........pssphpRhlsFa+hhsplt-sLKSlhosYhuall-sss.plLpph....................sssssss...........................hpLhphlLpsLppsFpaDp-pF .........................pphsppts.lhphhLpAhDhRppp................................p.shpplsclEss....l...hcs.hlshlhKLs-ssFRPLFh+Lh-WAts................................psuptsRhloFYphhsplt-pLK................ulhTsahuallcsss.plLpps...............................................s.pptpp........................................htLhphlLp..sLppsFhaDpppF............................................ 0 68 114 174 +7979 PF08147 DBP10CT DBP10CT (NUC160) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in the Dbp10p subfamily of hypothetical RNA helicases [1]. 20.60 20.60 22.50 22.30 19.70 18.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.44 0.72 -4.16 23 302 2009-01-15 18:05:59 2005-02-22 15:49:32 7 8 257 0 208 294 1 63.70 37 7.59 CHANGED DLssD-stt...htpp+phh+WD+KKpKaVsh.....uspsspKhI+uESGt+IsA.Sa+SG+a-cWpKp+ ...........DLhsD-stp......hppppphh+WD+K+KKaVst..................suppsp..K..hI+sESGthI.s....A..S...a+.o..u.p....a.ccW+pp.p... 1 67 110 172 +7980 PF08148 DSHCT DSHCT (NUC185) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in DOB1/SK12/helY-like DEAD box helicases [1]. 21.90 21.90 22.70 25.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.96 0.71 -5.04 102 1286 2009-01-15 18:05:59 2005-02-22 15:57:23 7 24 846 4 689 1241 370 174.30 29 17.64 CHANGED hsssshs.pl..+GRlAscIpu...tsELllsEhlhsu.hhscLsPpclsAllSshVa-p+pssp.........ss............plpcshpp.......ltpltpcltplppcppl...........hhp.....p..phsLhplV..apWApGtsasplh..........phT.sl.EGslVRhh+Rlt-lLcQltpA.........hhusspLppphc.pAhphl+R..slVhtsShhl .................................h...tthl.p...cGRlsscI...po...tsE...LllsEhlhsG.hassLsP.t.plAAl.lSshVapp+ssst..............ss.........................pLtpshpp........................................lp.c.l.tpclsplppc.ppl..............t.............hhpp...chslhcll..YpWAp.Gt.sasplh....................pho..-l.EGslVRhh+Rlt-lLcQl....tpA...............................hhu..s..spLtp....phc..pA.hptl+R..slVhhs................................................. 0 256 432 601 +7981 PF08149 BING4CT BING4CT (NUC141) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in the BING4 family of nucleolar WD40 repeat proteins [1]. 20.90 20.90 22.50 20.90 20.60 18.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.58 0.72 -4.47 31 342 2009-01-15 18:05:59 2005-02-22 15:57:57 6 6 292 0 239 342 11 78.80 53 14.46 CHANGED PYhsH..tsusplpslpFsPaEDlLGlGHspGFoSllVPGuGEsNaDuhE..sNPaETpKQRpEpEV+sLL-KLsPEhIoL-P ..................PYhsa....t.utplpslpFCPaEDVLGlGH.spG............aoSllVP..GAGEsNFDuhE..sNPaEo.pKQRpEtEV+uLL-Kl.PEhIsLDP.................. 0 86 134 202 +7982 PF08150 FerB FerB (NUC096) domain Staub E, Bateman A, Mistry J anon Staub E Domain This is central domain B in proteins of the Ferlin family [1]. 25.00 25.00 29.50 27.30 24.80 23.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.62 0.72 -3.91 5 436 2009-01-15 18:05:59 2005-02-22 16:00:55 7 62 72 0 247 370 0 75.70 48 4.27 CHANGED PQNSlPDIhIWMlpG-KRlAYARIPAHQVLYSpss-ptsGKsCGKlQTlFLKYPt-KssGs...+VPVKlRVpLWLGLS ...PQsSlPDVhIWMlpssK.RlAYARlPA+plLaShs.p.p..tsG+pCGKlQTlFLK..............h..P..t....ccs..G................pls.uclclhlWLGL..................... 0 47 64 150 +7983 PF08151 FerI FerI (NUC094) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is present in proteins of the Ferlin family. It is often located between two C2 domains [1]. 28.00 28.00 28.20 29.20 27.60 27.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.55 0.72 -3.96 16 414 2009-01-15 18:05:59 2005-02-22 16:01:35 7 51 80 0 250 369 4 71.60 46 3.94 CHANGED lDlGhlYcpPsHtFh+KWslL......oDPcDp....puGsKGYlKlolhVlGp.............GDp...sshph.tsssscp-DIEsNLLl.PsGV ...........hDlGolYspP..s.......H..pah+KWhlL.......sDP.-.D.h......ouG.s.KGYlKsslsVlGp.............GDp...s.spp....+st.ss-p-DIEuNLLl.PtGh................. 0 52 73 156 +7984 PF08152 GUCT GUCT (NUC152) domain Staub E, Bateman A, Mistry J anon Staub E Domain This is the C terminal domain found in the RNA helicase II / Gu protein family [1]. 21.20 21.20 21.20 21.40 20.80 20.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.22 0.72 -4.00 19 241 2009-01-15 18:05:59 2005-02-22 16:02:17 7 10 123 1 143 235 2 96.60 33 14.30 CHANGED Gaop.lcpRSLLouhcGaVTlhLpss.psh.shuasaphL+p.Ls-...ptsspl+slsLhtDs..pGsVFDVPp-.s-chhst.tcs.......uhpLsssppLPs..Lpp ........Ghop.hcpRSLloucpGalThhLpss....ppht..s..hu..aua+pLpc..pLu-....shss..clpphsl.lcsp....hGs.sFDVPsptsccl.pphpcs..............phpLolsscLPcLp.............................................. 0 47 77 102 +7985 PF08153 NGP1NT NGP1NT (NUC091) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in a subfamily of hypothetical nucleolar GTP-binding proteins similar to human NGP1 [1]. 21.40 21.40 21.90 24.50 20.90 21.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.77 0.71 -4.19 37 376 2009-01-15 18:05:59 2005-02-22 16:02:59 7 9 299 0 254 366 3 128.30 43 21.82 CHANGED MY+ph+shRstcGcllpst.hQ..sp.ptssARIcPsR+WFGNTRVIuQcsLppFR-thupptpDPYpVllKpsKLPhSLLp-s.....t+ppcs+l.l-sEsappTFGP.KupRKRP+.L.sssslE-LuppApp.pppsY ..................................MY+p..Ks..hRspcGcll+sstaQ.......sp..sss...sARl-PsR+WFGNTRVI.uQpuLppFR-phupphpDPYpVllKpsKLPMSLLp-p.......................+pp+s+l.l-TEsFpsTFGP.KuQRKRP+.L...sssslc-LsppuppptppY......................... 1 99 146 212 +7986 PF08154 NLE NLE (NUC135) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is located N terminal to WD40 repeats. It is found in the microtubule-associated protein Swiss:Q12024 [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.88 0.72 -3.96 66 603 2009-01-15 18:05:59 2005-02-22 16:03:45 7 15 305 0 433 599 6 64.60 28 13.67 CHANGED plhlpFhocptt...phsssshhlPsslsp.hsLspllNp..LL..............psccslPF-Fllss.p....hl+soLpcal ...............lhlpFhopps...hphss.ssltlP.s....s.lsp.tpLspllNp..LL..........................psccsl..PasFhlps.p.........lpsoLtchh...................... 0 154 243 360 +7987 PF08155 NOGCT NOGCT (NUC087) domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in the NOG subfamily of nucleolar GTP-binding proteins [1]. 20.10 20.10 21.20 23.40 19.80 19.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.37 0.72 -4.28 29 370 2009-01-15 18:05:59 2005-02-22 16:04:31 6 11 298 0 245 355 1 53.40 52 8.46 CHANGED +++Lp+DlEpE..pGGuGVYslDL+KpYhLts-EWKaDhlPEIhsG+NlsDFlDPDI ...............+RKLERDlE.E...uhussYslDL.+KpY.....Ltss-W.KaDhIPEIh-G+NlhDalDPDI.... 0 87 138 203 +7988 PF08156 NOP5NT NOP5NT (NUC127) domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in RNA-binding proteins of the NOP5 family [1]. 21.60 21.60 22.00 21.70 21.50 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.99 0.72 -3.81 69 744 2009-01-15 18:05:59 2005-02-22 16:05:18 8 14 327 0 499 728 15 65.40 36 12.64 CHANGED halLaEo...............uuGYuLFKlttctchhss...pltcphpshpphschVcLpuFp.FcsstpAL-sssslsEG ............halLaEo...............uuGYALF+lt.ctc....hss.......plt....cp.hpshp+hschV+LtuFp.Fp..ssspALc..s..ssulsEG........................... 1 178 282 416 +7989 PF08157 NUC129 NUC129 domain Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in a novel family of hypothetical nucleolar proteins [1]. 25.00 25.00 31.10 49.00 18.90 18.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.90 0.72 -4.06 4 38 2009-01-15 18:05:59 2005-02-22 16:06:17 6 1 32 0 22 34 0 61.20 66 27.01 CHANGED YhssRLKDpshpsSpQpAAccFIpSpLYGPsosRTTsNchLSLpNKRussKtAAsQFlspsWu .YlAVRLKDQDLRDSRQQAAcsFIpssLYGPGTNRTT.....VNKFLSLsNKRhPVKKAAVQFLNsuWG. 0 1 3 9 +7990 PF08158 NUC130_3NT NUC130/3NT domain Staub E, Bateman A, Mistry J anon Staub E Domain This N terminal domain is found in a novel nucleolar protein family [1]. 25.10 25.10 25.60 25.80 25.00 25.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.34 0.72 -3.68 29 303 2009-01-15 18:05:59 2005-02-22 16:10:13 7 7 261 0 217 296 4 51.70 41 7.26 CHANGED FlupVusCYPc..psssFPppLt-LLppa+s....sLss-LRtpllpuLlLLRNKs.lI ......FlAp.VupCYPc......phssFPpcLt-LLppp+s...............sLcP-LR.plspuLlLLRNKslI........ 0 69 113 171 +7991 PF08159 NUC153 NUC153 domain Staub E, Bateman A, Mistry J anon Staub E Domain This small domain is found in a a novel nucleolar family [1]. 19.60 19.60 21.20 20.30 19.10 19.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.63 0.72 -7.10 0.72 -4.36 39 580 2009-01-15 18:05:59 2005-02-22 16:11:10 7 11 285 0 419 565 4 29.70 40 4.31 CHANGED DsRFtplFEsc-FulD.os.-F+thss.spp .DsRFp.slF-s.p-FslD.ss.cF+thpsht...... 1 134 226 346 +7993 PF08161 NUC173 NUC173 domain Staub E, Bateman A, Mistry J anon Staub E Domain This is the central domain of of novel family of hypothetical nucleolar proteins [1]. 20.20 20.20 20.30 20.40 19.80 19.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.21 0.71 -4.96 32 340 2009-01-15 18:05:59 2005-02-22 16:12:26 7 5 272 0 238 341 3 186.10 30 15.59 CHANGED lschh-shL.ol+apsuhtplLplluuhF-phthp.usPhhhpsLpsluchRsspp..FshppEh-pslGuAlpuMGPEsVLshLPLNLst..ssst....pssRuWLLPlLRDplpsupLuaFpsphlPLupthpp+htc...ttsccslpsKlapTllpQlWolLPuFCshPhDLtpuF.cthAphLuslLhpps-LRssICpAL+tLl .........................................................................................................................hphhpphL...oh+apt.shtplhpllsshFptht......h........p........u.........t..s...........hhh.phlpslschRtstp.................hshppph-p....slGuAlpuMGP-sVL..p.....hlP.Ls......ls.......tpsh........................p.sRuW....LLPlLR..-..t...l..p................s..s.....p....LuaFpsth.....lPLup......t...htp.+shc..........ht..p.s.tp....sh.psKl...a...c..o...lhtQlWslLPuFCp.h.Ph.Dlt.p.uF..p...shAchL....ush...L.c..p...s...........-LR..sslCpALppLl........................................ 0 77 130 200 +7995 PF08163 NUC194 NUC194 domain Staub E, Bateman A, Mistry J anon Staub E Domain This is domain B in the catalytic subunit of DNA-dependent protein kinases. 19.30 19.30 21.30 22.50 18.90 18.00 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.30 0.70 -5.61 8 121 2009-09-11 05:38:58 2005-02-22 16:13:54 7 15 83 0 75 118 2 341.00 37 10.01 CHANGED sslRpuhlDRsLlTLLpHCsh-ulhpFFophIsslh-slpu+asK..ssshsa-oQllcKhusY+hlElhYuRLsK--lpScpspINpAaasSsss......cGNELTKsLhKtsasAhoEsM.sGEopLlEhR..RpYHCAAYNshlAlISCohoEhKFYpuFLFsEpsEKNpalaENLIDhcRsY...sFslElEsPhERKcKhluIR+EuR......-uupstpcpPpYLSSp.SYlsDSSLSEEhSQFDFSTGVQs..hsauop-.....ttssptutp+pEpusp........hpLEhDELNpHECMAslsuLIc.HMp+spITPps.....-cGshPt....-LPsWMKFLHsKLuNsuspLNIRLFlAKlIlNscpVF+PYA+hWluPLMQLlV....pssNsucGlHYhVVDllsTlLSWsulupPp....Gss+-El ...................................................h+.shhcRsLlsLh.pCs.sslppFF....s.p.lsphhsh.lpschs+.........sptth-...s...plhcKhu...hachl-lhYsRLsK--lpuppupINpsa.ps.s.s.hs.......cGsELT+sLlKhs.acA.............h.......o...E..sh..s......G-.spL....l..Et+..............R.YHCAAYNC..hlulIssshs-.....K....FYpuFLFs.Ecs-Ks.hlaENlID.hc+pY...sFsl.El...Esshc.c...+..c+h...l..tI.Rccsp................ct.tt..s.....p...p..tspYhu...S....s.....a.hs-SSLSEEh.o...p.F..D..Fosu.Vps..hshssps...........stssphphpcpc.tp.............................hcLEhDE.LNpHECMsshssLlp.HMp+.s......l.Pt.........pp..sshst.................................plPsWMchL+sKLss.stsslNIRLFluKlllN......sp.-VF+PYA+aWLsPLlQl.......ll..........sts..sGucGl..HYhVl-llsslL.SWsslusP.....s................................. 1 36 43 59 +7996 PF08164 TRAUB Apoptosis-antagonizing transcription factor, C-terminal Staub E, Bateman A, Mistry J anon Staub E Domain This C terminal domain is found in traube proteins [1]. This is the domain of the AATF proteins that interacts with BLOS2 or Ceap, that functions as an adaptor in processes such as protein and vesicle processing and transport, and perhaps transcription. 20.30 20.30 20.50 20.50 20.10 20.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.85 0.72 -3.88 30 299 2009-01-15 18:05:59 2005-02-22 16:22:17 7 6 263 0 214 301 3 82.10 41 16.59 CHANGED YcsLL+-Ll-p+sssuss..............t...sshhhphspppsKhKKsVDTKASKGRKlRYpVp-KLtNFM.AP.psphsWs.......-cth-ElFuoLh .....................YppLL+-LlEp+susssss...............t...spphhthtphcsK..h+K.p.VDpK.ASKGRKlRYpVppKLhNFM.AP...spsshs.............-cshsEhFtoLh................... 0 71 115 175 +7997 PF08165 FerA FerA (NUC095) domain Staub E, Bateman A, Mistry J anon Staub E Domain This is central domain A in proteins of the Ferlin family [1]. 22.20 22.20 22.20 23.10 20.70 22.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.86 0.72 -4.30 12 211 2009-01-15 18:05:59 2005-02-22 16:23:34 6 33 52 0 98 201 0 64.60 36 3.52 CHANGED hulpApssssplsplhlcll-plIpDspp..LPplcsp.sssssLDhplpcLRpppLppIpEtAh+h+ ...............ulpuphstpplsplhhpLlD-lIpD...spp..sLP..slctp.sssThLDpplh+LRsppLppIpEA.Ahph+... 0 18 25 51 +7998 PF08166 NUC202 NUC202 domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found in a novel family of nucleolar proteins [1]. 25.00 25.00 34.20 26.80 24.00 22.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.14 0.72 -4.11 4 96 2009-01-15 18:05:59 2005-02-23 09:50:55 7 4 31 0 52 81 0 70.40 48 12.27 CHANGED PYoSsRC+..lYplL.shlhs..........C.......GtopssLQsutasoEALls..tLlpshsPP...............h.phG ......PYoosRs+..lYsl...L.hhl.s...........C.......GtptssLpsuu.suEALlT..tLhpshsPP......................s.......... 0 11 14 22 +7999 PF08167 RIX1 NUC201; rRNA processing/ribosome biogenesis Staub E, Bateman A, Mistry J anon Staub E Domain Rix1 is a nucleoplasmic particle involved in rRNA processing/ribosome assembly [1,2]. It associates with two other proteins, Ipi1 and Ipi3, to form the RIX1 complex that allows Rea1 - the AAA ATPase - to associate with the 60S ribosomal subunit. More than 170 assembly factors are involved in the construction and maturation of yeast ribosomes, and after these factors have completed their function they need to be released from the pre-ribosomes. Rea1 induces the release of the assembly protein complex in a mechanical fashion [3]. This family is usually associated with NUC202, Pfam:PF08166. 25.80 25.40 25.80 25.40 25.70 24.50 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.76 0.71 -4.57 37 221 2009-01-15 18:05:59 2005-02-23 09:51:27 7 9 181 0 158 213 0 166.40 23 20.46 CHANGED h.hllssLppsptlhss...................spstlpclhs+.ltsLLpopsspsRWsGlsLl+shlpts.hplL.ppussalpsLlslLpp................spshtshphsl.slsplhphhpshPoL..oRElhTPpLssh.........Isshlslhp....................phllssLpcLlh.pHPTsFRPFsspl+shLhplls ...................................................h....ht.ltppt.lhss..........................ttstlptLhsp...lssLL..put...psR....a.tGlsLlpshlpts....h.-hh.pps.ssWlpu.lhplLpp.........................scs..s.shc..hslhsLsclh......p.hstphPsL..sR-l.s.ssp.Lssh.........lsshLs..Lhp.....................th.p...ssLpshppllh.haPssh+shtsplpshlhshl.h............................ 0 41 81 124 +8000 PF08168 NUC205 NUC205 domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found in a novel family of nucleolar proteins [1]. 25.00 25.00 28.00 27.00 22.60 21.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.07 0.72 -4.56 4 52 2009-01-15 18:05:59 2005-02-23 09:51:41 6 1 37 0 25 55 0 44.00 54 6.46 CHANGED EpsVhpSFossVspKFISLhSLSSDG.ClYETLIPIpsoDsEcNQ ...EpSlhcSFTASVDpKFISLMSLSSDG.CIYETLIPIpPsDPEKNQ.. 0 2 2 7 +8001 PF08169 RBB1NT RBB1NT (NUC162) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found N terminal to the ARID/BRIGHT domain in DNA-binding proteins of the Retinoblastoma-binding protein 1 family [1]. 25.00 25.00 25.80 26.80 21.30 24.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.31 0.72 -4.05 7 171 2012-10-02 16:56:36 2005-02-23 09:51:57 6 4 73 1 86 164 0 96.30 50 8.53 CHANGED RRLNDELLGKVVsV....psptccssWasALVVSPSCsDDloVKKDQCLVRSFtDSKFaoVARKDl+Elss.slsKu-hsh+pGhctAhhFhps+tlPcs ........R+.pDEL.LGKVVsV......st..cpc...KspWaPAL..VVu.....P.....oC..sD.-....l..s.VK...KDphLVRSFpD.uK.FaS.......VsRKDl+E.lstp......shPKs-.h.s.hK.uhppAhpFh+o+slPsp.............................. 0 17 22 49 +8002 PF08170 POPLD POPLD (NUC188) domain Staub E, Bateman A, Mistry J anon Staub E Domain This domain is found in POP1-like nucleolar proteins [1]. 25.20 25.20 26.30 25.30 24.60 25.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.02 0.72 -4.14 31 292 2009-01-15 18:05:59 2005-02-23 09:52:11 7 8 256 0 218 296 1 95.00 31 11.29 CHANGED sWslllPata.shshWhtL...............sph......ss+hsGL+phcQlshEpstshFPtDaPt.opuGhthpph.ppcptcpcap++P.u....KRls.applshht.............sFssDW .............................sWslllPhta.shshWhs.l...............hhh.......ssRhuG......L+Etpp.....luhEpt.t.....shFPt.D..aPs.....ot..AGh...............thttp.ptpphppcap..R+P.u....KRss.atpls.hhs.............satssW................................ 0 70 114 178 +8003 PF08171 Mad3_BUB1_II Mad3_like; Mad3/BUB1 homology region 2 Mistry J, Wood V anon Pfam-B_113144 (release 16.0) Domain This domain is found in checkpoint proteins which are involved in cell division. This region has been shown to be necessary and sufficient for the binding of MAD3 to BUB3 in Saccharomyces cerevisiae. This domain is present in BUB1 which also binds BUB3 [1]. 20.90 20.90 20.90 21.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.38 0.72 -4.08 8 63 2009-01-15 18:05:59 2005-02-23 09:52:36 6 2 48 7 39 66 0 73.40 37 8.76 CHANGED KIslF+Dsh..............ucssPVYKLIcsPG+KsEKIssNFcLLYP-scc......EaClEEILA...luRsl......Y+cppsphK...c- ............KhsIFpDph...........................spssPVYplIpssG+KPE+IssNhcLIYs-sc-......EashEElLA...lSRsl.......Y+p.ppth...pp...................... 1 7 21 35 +8004 PF08172 CASP_C CASP C terminal Mistry J, Wood V anon Pfam-B_7701 (release 16.0) Family This domain is the C-terminal region of the CASP family of proteins. It is a Golgi membrane protein which is thought to have a role in vesicle transport [1]. 28.00 28.00 34.30 31.30 27.90 27.90 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.62 0.70 -5.35 29 261 2009-01-15 18:05:59 2005-02-23 09:53:29 7 5 207 0 179 266 3 229.80 33 35.98 CHANGED hppLppplspsstclpctcpLst+LEsDLtplptt.shstt.ssstuhhshhspphst..........t..ph.uPsssh.s......................ssssSlLPIlosQRDRFRpRNsELEc-L.+ctppplspLcpElssL+tDNhcLYEKhRYlpSYs....................ss.sssthstss....................s.t.............................................pYpppYEppLsPhsuF+t+EppRh.hp+lus....hERhhhShsRhlLus+toRhlFhhYslsLHhlVhhhhhh.huhss.hph ..........................................................................h.tpLptclsphpsphpcpppL.t+LEpDL.plpt...h.tp......spshuh...shh.pphsps....................h..sph.uPsuuh.hst...............................................................usssulLsllouQRDRFRpRNpELEp.........El.pptppplptL..ppElssLptDNlpLYEKhRalpoYst...............................sssssth...........................s.t....................................................................................+YpptY..Epp...lsPFuuFpt+EppRt.hpplSs........................h-+hhhShsRhlLuN+huRslhhhYslhLHhLVFhhLYh.huhsp....h............................... 0 63 104 152 +8005 PF08173 YbgT_YccB Membrane bound YbgT-like protein Rossi R anon Short protein clustering Family This family contains a set of membrane proteins, typically 33 amino acids long. The family has no known function, but the protein is found in the operon CydAB in E. coli. Members have a consensus motif (MWYFXW) which is rich in aromatic residues. The protein forms a single membrane-spanning helix. This family seems to be restricted to Proteobacteria [1]. 26.10 26.10 26.70 26.60 26.00 26.00 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.21 0.72 -4.12 36 1086 2009-01-15 18:05:59 2005-02-23 11:52:44 6 1 889 0 151 341 16 28.00 61 71.26 CHANGED MWYFuWILGlsLAsuhullNAhWhEhpp MWYFAWILGsLLACuFGlIsAlhLEphp.. 0 23 66 108 +8006 PF08174 Anillin DUF1709; Cell division protein anillin Mistry J, Wood V anon Pfam-B_55293 (release 16.0) Domain Anillin is a protein involved in septin organisation during cell division.\ It is an actin binding protein that is localised to the cleavage furrow, and it maintains the localisation of active myosin, which ensures the spatial control of concerted contraction during cytokinesis [3]. 21.50 21.50 21.80 21.90 20.40 20.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.89 0.71 -4.13 70 383 2009-01-15 18:05:59 2005-02-23 14:59:18 6 8 201 0 244 378 0 146.60 19 15.15 CHANGED sspstlshs....shplshttct.......hpchF..sstc..............pppthhshh......................chs.......sshcsph..h...sp-.Gshupshlshss..hph................pshupsaplclplasp....hpppsssspppphttt............................pt....................................ssapl..Gplplplhhlsc ......................................................puplslS..sl+lPLhhc.............thF...ps.p.p.................................pphhhhhhh...................................+hu...........c.hcsph.hhs..sps....hshstlsFps.hhh......................ps.sus-F.plclElYut.h...pcptshsss.s.+.+http.........................tt.hs++.......................................................tstapl..uphpLsLt.l.............................................................. 0 55 97 181 +8007 PF08175 SspO Small acid-soluble spore protein O family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) O type (sspO). SspO (originally cotK) are unique to the spores of Bacillus subtilis and are expressed only in the forespore compartment of sporulating cells of this organism. The sspO is the first gene in a likely operon with sspP and transcription of this gene is primarily by RNA polymerase with the forespore-specific sigma factor, sigma-G. Mutation deleting sspO causes the loss of the SspO from the forespore but had no discernible effect on sporulation, spore properties or spore germination [1]. 20.60 20.60 21.70 21.10 18.20 17.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.60 0.72 -3.64 5 137 2009-01-15 18:05:59 2005-02-23 15:09:59 7 2 133 0 23 61 0 47.80 76 63.65 CHANGED sKRKANHVhPGMNAAKSQGNGAG....YpEE.uQcPLTpAQRQNNKKRKKNQ ....................G.KRKANHsIsGMNAASAQGQGAG....YNEEFANEsLTsAERQNNKKRKKNQ.. 0 3 14 17 +8008 PF08176 SspK Small acid-soluble spore protein K family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) belonging to the K type (sspK). The sspK are unique to the spores of Bacillus subtilis and are expressed only in the forespore compartment of sporulating cells of this organism. The sspK gene is monocistronic and transcription is primarily by the RNA polymerase with the forespore-specific sigma factor, sigma-G. Mutation deleting sspK results in loss of SspK from the spore but had no discernible effect on sporulation, spore properties or spore germination [1]. 25.00 25.00 28.10 27.20 17.50 15.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.32 0.72 -3.96 7 127 2009-01-15 18:05:59 2005-02-23 15:10:38 7 1 127 0 20 41 0 47.40 72 89.68 CHANGED MRNKA+GFP...NppKF-G.EPcA+scaASKRssGohNT+PQERMRASs MG+QAEFWSESKNNSKIDG.QPKAKSRFASKRPNGTINTHPQERMRAAN.. 0 4 12 14 +8009 PF08177 SspN Small acid-soluble spore protein N family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore protein (SASP) N type (sspN). SspN is a 48 residues protein that is expressed only in the forespore compartment of sporulating Bacillus subtilis. The sspN gene is recognised equally by both sigma-G and sigma-F. The role of SspN is still not well-defined [1]. 25.00 25.00 43.90 62.50 19.40 16.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.38 0.72 -4.23 8 129 2009-01-15 18:05:59 2005-02-23 15:17:28 6 1 129 0 18 37 0 44.90 71 98.57 CHANGED MG.N.KcspsQFsPsHLGTKPhcacuNKGKKMpDKSGcpP.VhQTKG .......MG.NPKKNSKDFAPNHIGTQSKKAGGNKGKQMQDQTGKQPIV..DNG 0 2 10 12 +8010 PF08178 GnsAB GnsA/GnsB family Lee SC anon Short protein clustering Family This family consists of the GnsA/GnsB family. GnsA and GnsB are multicopy suppressors of the secG null mutation. These proteins participate in the synthesis of phospholipids, suggesting the functional relationship between SecG and membrane phospholipids. Overexpression of gnsA and gnsB causes a remarkable increase in the unsaturated fatty acid content. However, the gnsA-gnsB double null mutant exhibits no effect. Both proteins are predicted to possess a helix-turn-helix structure [1]. 25.00 25.00 32.60 30.30 21.80 19.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.62 0.72 -4.31 4 459 2009-01-15 18:05:59 2005-02-23 15:20:45 6 1 376 0 7 49 0 53.40 75 71.87 CHANGED MN.EcLK+pAEp-IusaIoKKlsELpKpTGKEVoEIcFssREpMsG...LESYcVKI ...MNIEELK+pAEsEIA-aIupKIAELpKpTGKEVSEIcFTAR.EKMTG.....LESYDVKI.......... 0 3 3 5 +8011 PF08179 SspP Small acid-soluble spore protein P family Lee SC anon Short protein clustering Family This family consists of the small acid-soluble spore proteins (SASP) P type (sspP). sspP is expressed only in the forespore compartment of the sporulating cell. sspP is also expressed under sigma-G control from the same promoter as sspO. Mutations deleting sspP causes no discernible effect on sporulation, spore properties or spore germination [1]. 25.00 25.00 25.10 31.50 21.40 24.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.13 0.72 -3.76 8 120 2009-01-15 18:05:59 2005-02-23 15:21:37 7 1 120 0 20 59 0 41.10 67 92.60 CHANGED M.sKNsuKchRpNspKGcssGQP..EPLSGSKKVKNRNHoRQKHssH ....................................+pNppptcp.p.GQP..EPLSGSHKVKNRNHSRQK+pu.... 0 3 11 13 +8012 PF08180 BAGE B melanoma antigen family Lee SC anon Short protein clustering Family This family consists of the B melanoma antigen (BAGE) peptides. The BAGE gene encodes a human tumour antigen that is recognised by a cytolytic T lymphocyte. BAGE genes are expressed in melanomas, bladder and lung carcinomas and in a few tumours of other histological types [1]. 20.60 20.60 22.50 21.70 20.50 18.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -6.93 0.72 -4.19 5 119 2009-01-15 18:05:59 2005-02-23 15:22:01 6 2 112 0 16 78 0 28.70 74 4.85 CHANGED LIAASlWLAA.SAQALEAKLpK-DLPlLA .LIAASLWLAA.SAQALE.....A.....KL+.EDLPlLs.. 0 6 8 13 +8013 PF08181 DegQ DegQ (SacQ) family Lee SC anon Short protein clustering Family This family consists of the DegQ (formerly sacQ) regulatory peptides. The DegQ family of peptides control the rates of synthesis of a class of both secreted and intracellular degradative enzymes in Bacillus subtilis. DegQ is 46 amino acids long and activates the synthesis of degradative enzymes. The expression of this peptide was shown to be subjected both to catabolite repression and DegS-DegU-mediated control. Thus allowing an increase in the rate of synthesis of degQ under conditions of nitrogen starvation [1]. 20.90 20.90 22.00 94.60 20.80 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.27 0.72 -4.24 3 28 2009-01-15 18:05:59 2005-02-23 15:22:52 6 1 26 0 5 9 0 46.00 86 99.77 CHANGED MEK.cIEELKQLLWRLENEIRETTDSLRNINKSIDQYDKYoY.lKIS MEK.KLEEVKQLLFRLE.DI+ETTDSLRNINKSIDQLDKYsYAMKIS 1 1 2 3 +8014 PF08182 Pedibin Pedibin/Hym-346 family Lee SC anon Short protein clustering Family This family consists of the pedibin and Hym-346 signaling peptides. These two peptides have been isolated from Hydra vulgaris and Hydra magnipapillata. Experiments have indicated that both cause a reduction in the positional value gradient, the principle patterning process governing the maintenance of form in the adult hydra. The peptides cause an increase in the rate of foot regeneration following bisection of the body column. Thus both play important signaling roles in patterning processes in cnidaria and maybe in more complex metazoans [1]. 27.00 27.00 39.50 42.00 24.20 18.20 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.37 0.72 -4.45 4 5 2009-09-11 09:46:32 2005-02-23 15:23:37 6 2 3 0 3 5 0 32.20 49 8.64 CHANGED pLptEIslLQ.hhA-GEDVscpLEpKEKtLpNhcp pLptEIslLQ.hhA-GEDVs+ELEpKEKALuNacc 0 3 3 3 +8015 PF08183 SpoV Stage V sporulation protein family Lee SC anon Short protein clustering Family This family consists of the stage V sporulation (SpoV) proteins of Bacillus subtilis which includes SpoVM. SpoVM is an small, 26 residue-long protein that is produced in the mother cell chamber of the sporangium during the process of sporulation in B. subtilis. SpoVM forms an amphipathic alpha-helix and is recruited to the polar septum shortly after the sporangium undergoes asymmetric division. The function of SpoVM depends on proper subcellular localisation [1]. 20.60 20.60 21.40 22.90 19.80 19.50 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -6.70 0.72 -4.18 3 45 2009-01-15 18:05:59 2005-02-23 15:24:02 6 1 44 0 19 38 0 25.90 67 15.99 CHANGED MKFYTIKLP+FVGGlV+slLGSFKKD ..MKFYTIKLPKFLGGlVRAhLsSF+K.t 0 6 12 15 +8016 PF08184 Cuticle_2 Cuticle protein 7 isoform family Lee SC anon Short protein clustering Family This family consists of cuticle protein 7 isoforms that are isolated from the carapace cuticle of a juvenile horseshoe crab, Limulus polyphemus. There are 3 isoforms of cuticle protein 7. The 3 isoforms are N-terminally blocked but could be deblocked by treatment with pyroglutaminase, showing that the N-terminal residue is a pyroglutamine residue [1]. 25.00 25.00 139.70 139.60 19.30 18.00 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -4.17 3 3 2009-01-15 18:05:59 2005-02-23 15:24:28 6 1 1 0 0 3 0 59.00 95 100.00 CHANGED QAVRYANGYTYDIETGQVSSPYTGRVYETKGKAPFYGFGFEHPYHYYPGYYHGYPHAFY QAVRYANGYTYDIETGQVSSPYTGRVYETKGKAPFYGFGFEHPYHYYPGYYHGYPHAFY 0 0 0 0 +8018 PF08186 Wound_ind Wound-inducible basic protein family Lee SC anon Short protein clustering Family This family consists of the wound-inducible basic proteins from plants. The metabolic activities of plants are dramatically altered upon mechanical injury or pathogen attack. A large number of proteins accumulates at wound or infection sites, such as the wound-inducible basic proteins. These proteins are small, 47 amino acids in length, has no signal peptides and are hydrophilic and basic [1]. 25.00 25.00 27.00 25.90 23.20 20.20 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.25 0.72 -4.05 4 21 2009-01-15 18:05:59 2005-02-23 15:25:19 6 5 16 \N 13 22 0 43.50 66 43.31 CHANGED MIY-ssSuLFRSFL.ppupssDKRppEsp+.pt.t.KASsNKP...VMsE ..........MIYDVNSPLFRSFLSQKG.u.uuDKRKhEEpKPK-Q+.KAsENKP...VMsE....... 0 3 10 12 +8019 PF08187 Tetradecapep Myoactive tetradecapeptides family Lee SC anon Short protein clustering Family This family consists of myoactive tetradecapeptides that are isolated from the gut of earthworms, Eisenia foetida and Pheretima vitata. These peptides were termed ETP and PTP respectively. Both peptides showed a potent excitatory action on spontaneous contractions of the anterior gut. These peptides show similarity to Molluscan tetradecapeptides and arthropodan tridecapeptides [1]. 25.00 25.00 37.80 37.80 17.60 15.60 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.49 0.74 -6.07 0.74 -3.72 2 2 2009-09-11 09:59:55 2005-02-23 15:25:42 6 1 2 0 0 2 0 14.00 86 100.00 CHANGED GF+DGuADRISHGF GF+DGuADRISHGF 0 0 0 0 +8020 PF08188 Protamine_3 Spermatozal protamine family Lee SC anon Short protein clustering Family This family consists of the spermatozal protamines. Spermatozal protamines play an important role in remodelling of the sperm chromatin during mammalian spermiogenesis. Nuclear elongation and chromatin condensation are concomitant with modifications in the basic protein complement associated with DNA. Somatic histones are initially replaced by testis -specific histone variants, then by transitional proteins, and ultimately by protamines [1]. 25.00 25.00 112.10 112.00 22.80 17.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.72 0.72 -3.98 2 2 2009-01-15 18:05:59 2005-02-23 15:26:42 6 1 1 0 0 2 0 48.00 98 100.00 CHANGED ARRRHSMKKKRKSVRRRKTRKNQRKRKNSLGRSFKtHGFLKQPPRFRP ARRRHSMKKKRKSVRRRKTRKNQRKRKNSLGRSFKtHGFLKQPPRFRP 0 0 0 0 +8021 PF08189 Meleagrin Meleagrin/Cygnin family Lee SC anon Short protein clustering Family This family consists of meleagrin and cygnin basic peptides that are isolated from turkey and black swan respectively. Both peptides are low in molecular weight and contains three disulphide bonds with high concentrations of aromatic residues. These peptides show similarity to transferrins and probably play some vital role in avian eggs but the exact function is still unknown [1]. 25.00 25.00 30.30 30.00 17.90 15.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.34 0.72 -4.48 2 9 2009-01-15 18:05:59 2005-02-23 15:27:09 6 1 4 0 3 11 0 38.90 67 77.43 CHANGED QVhKYCPKlGYCSSKCSKA-VWuhSsDCKhaCClPss.K .......VLKYCPKIGYCSspCSKsplWAhSpsC.KhYCCLPAuWK 0 0 0 1 +8022 PF08190 PIH1 Nop17p; pre-RNA processing PIH1/Nop17 Mistry J, Wood V anon Pfam-B_10462 (release 16.0) Family This domain is involved in pre-rRNA processing [1]. It has has been shown to be required either for nucleolar retention or correct assembly of the box C/D snoRNP in Saccharomyces cerevisiae [1]. The C-terminal region of this family has similarity to the CS domain Pfam:PF04969. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.88 0.70 -4.99 26 646 2012-10-02 21:54:05 2005-02-23 15:49:18 7 21 191 0 432 665 8 212.10 14 58.79 CHANGED LpD.ppc......cthcp.lpphpppps..p.hhhlpPpPGaClKTphsss.t...KsFINlCpssclstP...pppthht...stt.shpaplPhSlups+tptDpsspsCsVaDVlhNPsslppspcsptF+phlhshAhculcpca...plpLs.csh+hh.phKaKG.shpspplRtpt.tp..tt.t................................ssphttpptstplhpph................ptt...s.t..hpphssst......stpPpY..............phphhpcs......psshPcplllclcLPtlpSsppssLcluccclhl.stp................................tYhLs...l.LPYslcc-pspApFs+pp+tLplphPV .........................................................................................................................................................................................................................................................................................hhl.Nh......h...................................................................hP....h..........p....................................................h.hshhhp..h.h.................................................h.....h.ht..ht.............t.....h...........h...........................................................................................................................................................................................................................................................................................................................................................................................s...h.hp...h.h...s........h....t...t.h.lplt.........h.h........................................h....h.hs...l.......t.......s.h.................................................................................................................................................................................... 1 180 235 336 +8023 PF08191 LRR_adjacent LRR adjacent Mistry J, Schubert WD anon Pfam-B_1177 (release 16.0) Family These are small, all beta strand domains, structurally described for the protein Internalin (InlA) and related proteins InlB, InlE, InlH from the pathogenic bacterium Listeria monocytogenes. Their function appears to be mainly structural: They are fused to the C-terminal end of leucine-rich repeats (LRR), significantly stabilising the LRR, and forming a common rigid entity with the LRR. They are themselves not involved in protein-protein-interactions but help to present the adjacent LRR-domain for this purpose. These domains belong to the family of Ig-like domains in that they consist of two sandwiched beta sheets that follow the classical connectivity of Ig-domains. The beta strands in one of the sheets is, however, much smaller than in most standard Ig-like domains, making it somewhat of an outlier [1] [2] [3]. 21.80 5.70 21.80 5.70 21.00 5.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.77 0.72 -4.25 29 1133 2009-01-15 18:05:59 2005-02-23 17:17:53 6 181 124 32 13 1027 3 56.90 48 9.22 CHANGED GsllsPssIScNGsYsssslsW.s.LPs..a..hsEVSYsFsp.lplG.pspspFoGpVhQPLpp .............GsLlsPtoISDsGoY......s..p..Ps..lsW.s...LPs..a....hNEVSYsFsQsVslG.ps.....pssFSGTVTQPL+......................... 1 11 11 11 +8024 PF08192 Peptidase_S64 Peptidase family S64 Mistry J, Rawlings N anon Rawlings N Family This family of fungal proteins is involved in the processing of membrane bound transcription factor Stp1 [1]. The processing causes the signalling domain of Stp1 to be passed to the nucleus where several permease genes are induced. The permeases are important for uptake of amino acids, and processing of tp1 only occurs in an amino acid-rich environment. This family is predicted to be distantly related to the trypsin family (MEROPS:S1) and to have a typical trypsin-like catalytic triad [1]. 19.80 19.80 19.80 19.80 19.60 19.70 hmmbuild -o /dev/null HMM SEED 695 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.26 0.70 -6.19 9 71 2009-01-15 18:05:59 2005-02-24 11:40:50 6 2 57 0 45 71 3 457.00 26 72.60 CHANGED M.hpphFuhs+tpc.t-tpsp...t-tppsps.tsss...........pso.hthcsh.ps..hospsssuspuSuh...........ascuphshssulSsh..p.ssoh.pp.....sutpsssuhhu.tsst+shpsppsSh.s..ssh.ttppssotssucspssssSp.hp.hshsEppppp..ppt..pltcpLppLtpcLshlMsplppslhNlSpAVIssI-hFKcF..............................h.oh.............psphsaplos.ssuslR+IhKIhLHFhDNLLss-sa.p.+hlLl+pa.cFhppLN.psc.......tstslsp.+saAIG..psssLPscDpltpIh-cIspss.S.lp-QsGuFIAPlLRGlopchsILslhFGhPsPpppHachlpsLasLas.DlHhhshKs.IchAusss.sss..hst...................................pph.....h.pFp.PaRlPsDs.pPPhShSlSoEsos+hSGTlGGYlYPpIs.ppps+LpuYAsupFAITCuHVsLspsp....sYPpVusPSsVL.shYKpuLtcphp+as...........-spltasuhhpplpcl...hphp....................scp+FGQllWGERolIs.......................p+LSDhAIIKVNpphcC.pNaLGDDlt.s..DPuLhFcNLYVRKh..lp+hpP..........................GhpVFKhGuTTKYT+GslNGlKLVYWhDGcIpSSEFlVsS..ssshFAuGGDSGuWILTKL-D......phGLGVlGMLHSYDGEhKQFGLFTPhs-IL-RLcpVTsIcWsl ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...................................................................hGplh....h..up+.h.p.......................tph...DhAllchs...tph.s.tNhl..Gssl....t..sPsh..hhpNh.Vpph.....l.p..hts............................Gh.pVFKhGsoTsaTsGp...lN.u...h...+...L.l...Y.Ws.......D..Gp......l..p..o....o..EF...lVsS.........sssh...............FAsuGDSGualLoKhps.............................t.uLsllGMlauhDtc.tpasl..hoPht.I.pclp.hT.................................................. 0 9 24 39 +8025 PF08193 INO80_Ies4 DUF1711; INO80 complex subunit Ies4 Wood V, Mistry J anon manual Family The INO80 ATPase is a member of the SNF2 family of ATPases and functions as an integral component of a multisubunit ATP-dependent chromatin remodelling complex. This family of proteins corresponds to the fungal Ies4 subunit of INO80. 20.10 20.10 24.40 23.90 19.30 19.20 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.76 0.70 -4.41 11 94 2009-01-15 18:05:59 2005-03-04 14:12:13 6 4 91 0 75 89 3 211.90 32 78.21 CHANGED Muuuot.ssssssRpppuus.s...........+tlVsLKLoschLpphh.........................uss.lKccpP.....S.SPuuSsst.s..sSusDNASDus.STP..ssssuuscsPpppuhPuP......KsG...sKRussts..u-ops+sRGKPGPKKKsRL........DDGT.-.......ssphsuuHRLGPKANtG............AINAGLRALDRSGpPCR+WERKslpLKSFTGl.WpLPsWRuP.s.psEpssEspp.s.pTGDSsSKsNp...ssSul.SEKSNoG ...................................................s..t.p.s.....................phhlsLplssthLpth..................................t.....s.ct.pps.....................p.oPss...sss..............sss-ssS-...us....sTP.........ss.ssss.t.tts.hssP...........KtG......sKRuhs.t..s......ts..s..........s........hs+sRGKPGP.K.KK..RL........-DGshs..........................sts.ss.ss.p+LGPKANtG............AINAGLRALDRSGKPCRKWp+pshpLKSFTGlhWplP.sWpu......P..cs.ps...ppsspppt.................sspussptp..........pttt...up................................................ 0 16 38 62 +8026 PF08194 DIM DIM protein Rossi R anon Short protein clustering Family Drosophila immune-induced molecules (DIMs) are short proteins induced during the immune response of Drosophila. This family includes DIMs 1 to 4 that have masses below 5 kDa [1]. 20.80 20.80 20.80 20.80 20.30 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.75 0.72 -4.00 7 97 2009-09-10 23:55:24 2005-03-08 14:48:02 7 1 12 0 49 82 1 35.70 50 49.37 CHANGED MKhLol..shslhLLA.LAsAsshs..PGpVhING-C+sCN ...MKahol..shlLuLLA..LAsAssls....PGsVlINGcChsCN... 0 10 10 31 +8027 PF08195 TRI9 TRI9 protein Rossi R anon Short protein clustering Family Putative gene of 129 bp in the Trichothecene gene cluster of Fusarium sporotrichioides and F. graminearum. Encoding a predicted protein of 43 amino acids which function is unknown [1,2]. 25.00 25.00 25.10 88.00 19.50 17.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.42 0.72 -4.32 3 20 2009-01-15 18:05:59 2005-03-08 14:50:26 6 1 19 0 1 5 0 43.00 93 100.00 CHANGED MLAAAKLIDSYEMDPDVSWLEVFAYSGVSAALCATIWVAAKAC MLAAAKLIDSYEMDPDVSWLEVFAYSGVSAALCATIWVAAKAC 0 0 0 1 +8028 PF08196 UL2 UL2 protein Rossi R anon Short protein clustering Family Orf UL2 of Human cytomegalovirus (HCMV) which is a short protein of unknown function [1] 25.00 25.00 119.90 119.80 18.90 16.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.01 0.72 -4.23 2 13 2009-01-15 18:05:59 2005-03-08 14:53:45 6 1 6 0 0 10 0 59.60 86 100.00 CHANGED MttDuVuILIVED.s..hPSFGohsASHA.YuFRlLRGIFhlTlVlWslhWlKLLRDshh MuEDSVuILIVEDDDDAYPSFGTLPASHAQYGFRLLRGIFLITLVIWTVVWLKLLRDALL 0 0 0 0 +8029 PF08197 TT_ORF2a pORF2a truncated protein Rossi R anon Short protein clustering Family Most isolated ORF2 of TT virus (TTV) encode a 49 amino acids protein (pORF2a) because of an in-frame stop codon. ORF2s isolated from G1 TTV encode 202 amino acids protein (pORF2ab) [1]. 22.00 22.00 24.10 28.30 20.30 19.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.35 0.72 -4.19 2 69 2009-01-15 18:05:59 2005-03-08 14:59:27 6 2 2 0 0 64 0 33.30 77 65.01 CHANGED MAEFShPVRSttATEGh.pVPRAGAtGEFTHRSQGAIRARDWPGYGQGS MAEFSTPVRStpATEGc.RVPRAGAtGEFT...................... 0 0 0 0 +8030 PF08198 Thymopoietin Thymopoietin protein Rossi R anon Short protein clustering Family Short protein of 49 amino acid isolated from bovine spleen cells [1]. Thymopoietins (TMPOs) are a group of ubiquitously expressed nuclear proteins. They are suggested to play an important role in nuclear envelope organisation and cell cycle control [2]. 25.00 25.00 25.30 36.90 24.60 23.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.09 0.72 -4.44 7 109 2009-01-15 18:05:59 2005-03-08 15:14:41 6 4 36 2 42 92 0 48.90 74 11.69 CHANGED spFLEDPulLTK-KLKSELlApNVsLPsu-p+K-VYVQLYLKpLTspNp ...PEFLEDPSVLTK-KLKSELlANNVoLPuGEQRKDVYVQLYLQHLTu+N+... 0 6 10 23 +8031 PF08199 E2 Bacteriophage E2-like protein Rossi R anon Short protein clustering Family Short conseved protein described in Lactococcus Bacteriophage c2 of 37 amino acids [1]. 25.00 25.00 36.80 36.80 18.10 16.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -8.11 0.72 -4.36 2 7 2009-09-10 18:27:20 2005-03-08 15:17:58 6 1 7 0 0 4 0 31.60 89 94.44 CHANGED ML.RLLY.RFGK.IKRRlLIDNFSNFCaYNFIs.Fh. ML.RLLYSRFGKFIKRRlLIDNFSNFCaYNFIh.Fh. 0 0 0 0 +8032 PF08200 Phage_1_1 Bacteriophage 1.1 Protein Rossi R anon Short protein clustering Family Gene 1.1 in Bacteriophage T7 encodes a 42 amino acid protein, rich in basic amino acids suggesting its interaction with nucleic acids [1]. Many homologs are present in different T7 and T3-like bacteriophage. 19.90 19.90 20.40 20.40 19.00 15.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.52 0.72 -3.77 6 29 2009-01-15 18:05:59 2005-03-09 11:13:14 6 1 24 0 0 18 0 42.50 49 88.58 CHANGED MR.NFEKhTKR.uNR.scp.F-hpEt.c+G+KhNKspRsRupKRs.WE ..............MR.NFEKhTKR.sNh.scc.hEhpEtps+h+KhpKspRspupKRp.Wc........ 0 0 0 0 +8033 PF08201 BssC_TutF BssC/TutF protein Rossi R anon Short protein clustering Family BssC short protein (57 amino acids) has been described as the gamma-subunit of benzylsuccinate synthase from Thauera aromatica strain K172 [1]. TutF has been identified and described as highly similar to BssC in T.aromatica strain T1 [2]. 25.00 25.00 34.00 33.10 17.50 16.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.94 0.72 -4.48 4 18 2009-01-15 18:05:59 2005-03-09 15:12:35 6 1 14 0 6 17 1 57.30 53 97.91 CHANGED M..TTCKsCuFaFuVPEsAsDaEsGKGDCVppKEDtKGKYWLSKPshcsossCtsF+.Kp ...MoTCK-Cp.FFslPEsADDaEsGKGDCVpE+cDpKGKYWLSKPlh..-su..s..sCpsFp.K.t........ 0 3 4 6 +8034 PF08202 MIS13 Mis12_component; Mis12-Mtw1 protein family Mistry J, Wood V anon Pfam-B_127825 (release 16.0) Family Mis12-Mtw1 is a eukaryotic conserved kinetochore protein that is involved in chromosome segregation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.99 0.70 -5.35 9 220 2009-01-15 18:05:59 2005-03-15 09:49:21 6 6 165 0 141 229 2 265.80 21 60.35 CHANGED h.Lpss.hh.pppp..htpptpspR+u.hppRGRRhS.lspth.....lssPHp-VstpEaY+plsss.Lucsh+h+QLLhWshp+ulpchc.......................h.tpsp.ss.htls+sIhcphlcDh+tsphslsW.s+c.t-c.s...........sDsplpp......p.........................................................PNspNlpNcpsls.lcpKlsplcpEhppWsphh.cspp.........sshch.tppp.ht....s+lppsp.-ssss......................splhsphEpplDpLptss+pLputs.hhs-hsspplp.+lshhhtp+hhpchcp..s.t.................ohcLLRsLo........upsp ......................................................................t.......tpttps.RRuSht.pu.RRhS............................h..Hppl..s-hh+pIssp..LsEscRhppLLhWshptultch..............................................................hh+slpcthlcshtscs..hsW.s+c.tpt.s.................................l.t.......................................................................PNspNlpspppltplp..p..plp....clptEcp...pWpthh.phpp................phch...t....t....p....tt....hp..psh.psth......................................................................tth.t..sp.l..s...phchh...lDpLttts+.Lpt......h.c.ssphh.........h.t...p.t.............................................th......................................................................................................................................................... 1 31 64 109 +8035 PF08203 RNA_polI_A14 Yeast RNA polymerase I subunit RPA14 Mistry J, Wood V anon manual Family This is a family of yeast proteins. A14 is one of the final two subunits of Saccharomyces cerevisiae RNA polymerase I and is proposed to play a role in the recruitment of pol I to the promoter [1]. 25.00 25.00 58.30 54.90 21.10 19.70 hmmbuild --amino -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.74 0.72 -3.74 17 47 2009-01-15 18:05:59 2005-03-15 10:13:06 6 1 45 3 29 38 0 79.80 43 51.19 CHANGED sPlsl+htus.ptlsp--s.phLscFIsppEph........s..sstt.s......ssthussssssusLuQLKRlQRDL+GL.............PPhhs .TPlllHssph.pplop-Es.pFLppFIcppEsl...........sh.ssstss.....ss.....ssTuhshcoshooslSQLKRIQRDh+GL.............PPs..s.. 0 4 14 26 +8036 PF08204 V-set_CD47 CD47 immunoglobulin-like domain Mifsud W, Bateman A anon Pfam-B_2739 (release 7.5) Domain This family represents the CD47 leukocyte antigen V-set like Ig domain [1,2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.95 0.71 -4.38 12 126 2012-10-03 02:52:13 2005-03-18 11:48:33 6 10 67 8 29 205 0 121.20 39 39.37 CHANGED hhlsshhss..spLhhsshhoVp.aTsCNsTVslsC.lssl.hpshsplaVcW+FpscsIhhhssspp.o.......................Lphchscsl..sGNYTCEshph.p.phcphIcLph.hhpWFo.pEp .................................h..lt..hhh..s.llhssTKolE..aTsC..N-Tll.IPCh..ls.N.......t...sphY.l+WK...h.....c....s.....+.s......I...h...o....a....sts.p..ppoh.....hsp.a.o...S.A+l......ppL..cu..s.s.S.L.h.h.-.h..p.D.hl..sGsYTCEsht.sp..th.cohlcLp.+hssWFss.p..................... 0 3 5 9 +8037 PF08205 C2-set_2 CD80-like C2-set immunoglobulin domain Bateman A anon Pfam-B_280 (release 17.0) Domain These domains belong to the immunoglobulin superfamily. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -9.86 0.72 -4.08 33 3971 2012-10-03 02:52:13 2005-03-18 11:48:59 7 376 126 29 2150 9868 2 86.30 17 19.89 CHANGED splpss..shlsst.....t.phlhpCs...ossGhPssploWhpsspsh..........hpsppppsspstlholpSpLphsssp.pspspslsCplpasshptpp ......................................................................phplsCp.....ut.s..u..p..P...s..s..p.l..p..W..h.....p..s..s.p.th............................................pp..p..p..t...p....s......p...s...s.....h...h...s....s..p..S...p.l.p..h.p....s.s.t...t...t..pst....p..l.sCps.pp.......t.............................................. 0 644 828 1357 +8038 PF08206 OB_RNB Ribonuclease B OB domain Bateman A anon Pfam-B_484 (release 17.0) Domain This family includes the N-terminal OB domain found in ribonuclease B proteins in one or two copies. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.64 0.72 -4.46 48 5260 2012-10-03 20:18:03 2005-03-18 13:56:51 6 22 3124 6 885 5744 3246 58.60 30 10.43 CHANGED Gplpua.ccGF.GFltsDs.....tppDlFlPtpphppshcGDpVlspltt.tccct+pcupll+l .............Ghh.sp..ccG.F..GFl.h..s.....-s............ppDlF.....I.....P.....s.....s.....p.....h.....p.....t.....u.....h......c....G.DpVl..splpp....sc...+.c..s....c....t..cGcllcl.................................... 0 278 554 744 +8039 PF08207 EFP_N Elongation factor P (EF-P) KOW-like domain Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.50 0.72 -4.10 222 5745 2012-10-01 20:16:17 2005-03-18 14:02:52 7 10 4640 16 1192 3021 2037 57.00 36 30.63 CHANGED sss-l+pGhslph.cGp.ahl.....................l-h.pasK.PGKGsAhsRsKl+NlhoG.shh-coF+u.u-pl ..............ss-h+sGhsl..ph.-.G.p...hhtV.....................l-h..pasK..PG..K.G..s..A.hsRsKl+slhoG.phlEcTF+us-ph.................... 0 397 771 997 +8040 PF08208 RNA_polI_A34 RNA_polI_final; DNA-directed RNA polymerase I subunit RPA34.5 Mistry J, Wood V anon manual Family This is a family of proteins conserved from yeasts to human. Subunit A34.5 of RNA polymerase I is a non-essential subunit which is thought to help Pol I overcome topological constraints imposed on ribosomal DNA during the process of transcription [1]. 28.10 28.10 28.10 28.10 27.30 28.00 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.40 0.71 -4.64 25 173 2009-01-15 18:05:59 2005-03-21 15:31:00 6 2 161 8 126 170 0 209.50 20 59.28 CHANGED ptcsKplWLIpsPsslslspL.pplslsh.....sthtpup.sslptpsppYplhp-sht........tsstuptolLlssscspthhs........tshshsphapIpEssplPths.......htpshss+.shppscGL+hRahPsGhusss....................pt.stppppsppp..pptphhppscspccEppcK+pcpE.cpp......hpcKKsKKc+..........................pcp+ccKK+KK .............p.hpscplWhIpsPsshshssl.ppltlsh......tt.tpsp...shh.stc..sp..p...Yplhpsp....................sptsphs.lLlPsppssshhh.........tshshspshplpcssplsths...............tpshss.....pssh.p.sps..L..+hRahPhGhsss....................................tt.s.sttpsppp...pptph..p.ptp....h..p.ppc..p..pc.c+pptc....ctp...................................h.tKKpKKpc.....................................................................pp........................................................................................................................................................................................................................................................................... 0 33 64 98 +8041 PF08209 Sgf11 Sgf11 (transcriptional regulation protein) Mistry J, Wood V anon manual Family The Sgf11 family is a SAGA complex subunit in Saccharomyces cerevisiae. The SAGA complex is a multisubunit protein complex involved in transcriptional regulation. SAGA combines proteins involved in interactions with DNA-bound activators and TATA-binding protein (TBP), as well as enzymes for histone acetylation and deubiquitylation [1]. 25.00 25.00 25.20 25.20 24.10 24.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.37 0.72 -4.63 33 238 2012-10-03 11:22:52 2005-03-21 16:24:55 6 7 194 4 166 251 0 32.80 46 10.50 CHANGED ss.hhsC.NCsRpluusRFAsHL-+ChG.hupps ........spCsNCsR.tluAuRFAsHLEKChG.hGpp...... 0 54 88 134 +8042 PF08210 APOBEC_N APOBEC-like N-terminal domain Finn RD anon Manual Domain A mechanism of generating protein diversity is mRNA editing. Members of this family are C-to-U editing enzymes. The N-terminal domain of APOBEC-1 like proteins is the catalytic domain, while the C-terminal domain is a pseudocatalyitc domain. More specifically, the catalytic domain is a zinc dependent deaminases domain and is essential for cytidine deamination.APOBEC-3 like members contain two copies of this domain. RNA editing by APOBEC-1 requires homodimerisation and this complex interacts with RNA binding proteins to from the editosome [1] (and references therein). This family also includes the functionally homologous activation induced deaminase (AID), which is essential for the development of antibody diversity in B lymphocytes, and the sea lamprey PmCDA1 and PmCDA2, which are predicted to play an AID-like role in the adaptive immune response of jawless vertebrates [2]. Divergent members of this family are present in various eukaryotes such as Nematostella, C. elegans, Micromonas and Emiliania, and prokaryotes such as Wolbachia and Pseudomonas brassicacearum [3]. 38.10 38.10 43.90 43.60 32.50 31.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.61 0.71 -4.67 22 683 2012-10-02 00:10:39 2005-03-22 09:56:53 6 6 111 12 182 716 0 172.70 33 80.64 CHANGED hphth+ts.hsp..................................tcppohlhhhlctpt......hh+Ghhpscs.....tttHsEphhlptlhshh................pssphaplshasShSPChc..................CActluphLpph.....tslslplhsp..pLa.h.pptt.................pppGlhphhpu...Ghplchhthp...Daphshphhs...ps-...............................cthphWpstp.shthhppp.hth.p ..............................................................................................................s+pcoaLCYpVctppsst........hsh.pGhhps..p...................ttpH.AEhpFLphhpshp................sssppYcVTWahSWSPChs..................CApclscFLppp.....sploLpIFsu..RLYah..ctp...............................hpcGLRpLpps....GspltlMshp...-FpaCWcsFV...ppt.......................................csFpPWcsLppp...............h................................................... 0 17 27 55 +8043 PF08211 dCMP_cyt_deam_2 Cytidine and deoxycytidylate deaminase zinc-binding region Finn RD anon Pfam-B_8221 (release 16.0) Family \N 20.90 20.90 20.90 20.90 20.80 20.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.54 0.71 -4.12 18 900 2012-10-02 00:10:39 2005-03-22 09:57:04 6 6 846 12 148 2195 501 122.40 54 42.26 CHANGED LppYLP-uFGPpDLtl.....sshLh-ppspshslpssDs...............LhppALcAANpSaAPYScs.SGVALhspsGclYsGtYAENAAFNPSLsPlQuALlphshs..GcsassIppAVLVE+psuplSphusupsh ..........................................................Lc-YLPDAFGPKDLpI........coLL.MD.cpD....H.G...asL..s.u..Ds............................................................LsQAAlsA.A.N.+S.HhPYS+.S.P.SGVALE....s.............+.....D..G...+....I...F.......oGuYAENAAFNPoLPPLQuALh.hLsLp.......G.h...-.a..s...D..IpR.AV.....LsE+uc.A.sL.QhssTp..s................................... 0 35 71 109 +8044 PF08212 Lipocalin_2 Lipocalin-like domain Bateman A anon Pfam-B_2479 (Release 17.0) Domain Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The structure is an eight-stranded beta barrel. 21.10 21.10 21.10 21.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.83 0.71 -4.40 19 1931 2012-10-03 08:47:39 2005-03-23 14:57:04 7 16 1379 7 485 1577 247 138.30 39 74.08 CHANGED lD.LpRYhGpWYElARhPhhFp.+ssscspApYsLp-DG.pIpVtNcChp..hcG...phppspGpAphtssup.su+L+VpFhs.........spu-YaVlhlDs-.YphAlVGsPsRcYLWlLSRoPplscpphppllscA+pp.GYDsscLhhssp ..........................................................................hDhpR.Y.l.Gp.WYE..I........A...R......h....s...p....p..F....E.....+...G...h....p.p....l..o.As.....Yo.L...c...s.....D.G.....s.....l.pVl..N....+shs..........ct..............hpp.s...c.G....c.....Ah.h..s...sss.......s..............putL..K..........Vo.Fas........................shhG.sY.Vlt.....L...........D..........p.....-.......Y.......p.....a.......A.......l......V..........s.........u.......P........s......+.......c..........Y..........L..W....ILSRo.Ps..l.sc...p..h....hp.p.h.ls.hAppp..G.a.D..sscllhs..................................................... 0 154 272 396 +8045 PF08213 DUF1713 Mitochondrial domain of unknown function (DUF1713) Mistry J, Wood V anon manual Domain This domain is found at the C terminal end of mitochondrial proteins of unknown function. 20.10 20.10 20.10 20.10 17.90 19.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.94 0.72 -4.34 23 504 2009-01-15 18:05:59 2005-03-29 14:01:14 6 5 475 0 231 276 2 32.90 55 24.09 CHANGED pspSVhR+RRpKMpKHKaKKLpK+pRsLRR+Lc+ ...hsSVlKKRRK+MsK+K+RKLl++TRhpRR+htc.......... 0 74 139 193 +8046 PF08214 KAT11 DUF1714; RTT109; Histone acetylation protein Mistry J, Wood V anon manual Domain Histone acetylation is required in many cellular processes including transcription, DNA repair, and chromatin assembly. This family contains the fungal KAT11 protein (previously known as RTT109) which is required for H3K56 acetylation. Loss of KAT11 results in the loss of H3K56 acetylation, both on bulk histone and on chromatin [1]. KAT11 and H3K56 acetylation appear to correlate with actively transcribed genes and associate with the elongating form of Pol II in yeast [1]. This family also incorporates the p300/CBP histone acetyltransferase domain which has different catalytic properties and cofactor regulation to KAT11 [3]. 20.30 20.30 21.30 20.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.36 0.70 -5.37 34 515 2009-01-15 18:05:59 2005-03-29 14:08:11 6 56 258 9 340 491 6 297.80 30 21.94 CHANGED LsshLtch..lPpstplsIRhluSssppspsh.sh.st..t....psphsa+pcthhshpphss.......................-lslFGlcV..Yso............sspRplalShhDSstah+............thcstshpphlluYL..s+pcsaspth................................................................................hhuh.sptuspYlF...PsspcsPc...........scpLhcWah+hL-phl.....................................pstlllssp-hacphthhsspsp..s.............................................................stsplPhFssD.hsphL--LhcE............................lpp.h-phthRpEhphuthsu.hhh......................ts......t.hppt..h+tshptht.....tshsssspts.pplhsc......lhpphp.tp..hhhlphptssptsspshhs ..........................................................................................p..hstst...plh...lRhlt.sssp.ps..cst.thht...t..h..p..pt...hspphs..Y+s+s...lhs.Fpc...l-G..............................s-VshFGh.aVpEYs.otss...........sNpRpVYlS..YLDSlpah+Pp......................thRT.tlY+.ElLl.u.YLc.a..s+p.h..G.a...spsa................................................................................IW.A.C.PP.p.cG..DDYIFa.sHPssQKhPK....................................sc+LpcWYp+hL-+uh......................................................................................tctllhsh......p.-hac.p.........sppcths.......................................................................................................ssppl.PaF-sDhasphlE-.lcE..........................................................................................lcp..-pht..hpp...p.t..s...t....s.th.................................................................................pt...pt....p...................p.p...t....t......h........................................s.......................................................................................................................................................... 0 96 158 280 +8048 PF08216 CTNNBL DUF1716; Catenin-beta-like, Arm-motif containing nuclear Mistry J, Wood V anon Pfam-B_13045 (release 16.0) Domain CTNNBL is a family of eukaryotic nuclear proteins of the catenin-beta-like 1 type that contain an armadillo motif. A human nuclear protein with this domain (Swiss:Q8WYA6) is thought to have a role in apoptosis [1]. The interaction of CTNNBL1 with its known partners (the Prp19-CDC5L complex and AID) is mediated by recognition of NLS (nuclear localisation signal) motifs. The RNA-splicing factor Prp31 is also an interactor, with recognition also occurring through the NLS. CTNNBL1 uses its central armadillo (ARM) domain to bind NLS-containing partners [2,3]. 20.50 20.50 20.50 25.10 20.40 18.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.19 0.72 -4.46 12 275 2009-09-11 07:46:25 2005-03-29 14:47:21 6 9 226 0 202 271 0 104.80 41 19.23 CHANGED u-ccclslhchh-pstps.....EsLD-sslKKhlLsFEKRhhcNpEhRlKaPDsPEKFh-SElDLcchIpch+slAshP-LY..hV-LssVpSLlsLLuH-NoDIslsVl ........................t....t.phhphl-pptptt.......EslDtssl++hlLsFEK+hhKNtEhRhKasDsPpK..FM-SElDLcstIpphp....llAs.tP.-.....L.Y..hV.cL..s.sls.....SLluLL.u...H-NT.DIulssl.................. 1 77 112 164 +8049 PF08217 DUF1712 Fungal domain of unknown function (DUF1712) Mistry J, Wood V anon manual Family The function of this family of proteins is unknown. 18.20 18.20 18.30 18.80 18.00 17.80 hmmbuild -o /dev/null HMM SEED 604 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -13.03 0.70 -6.27 14 421 2009-01-15 18:05:59 2005-03-29 15:31:36 6 7 235 0 288 406 0 245.80 15 61.02 CHANGED hhlFsss..hGppEGp..EhcKlLhaHP.......phslspKlpsIGlsEullpFTcsFossc.sCcs.....lcsp+pshlhhcsEssaWhshslp....p.csK-h.t.h..........hhpshL+psYphFphhpGshpuhhp.....t.scptLpshLp-ahhsah..pplp.hspp-hhc.hsol.................................QhLsLs+psaLplp................................uhlss....hssshsslt.hSshLap-pllhus.lusc-......................................................tphLasaslshhhshss.s-sus..hhpso.uhhh..sps..............h..hohsptsssspsph......................hsplal..............phcpppc.htlLhaphps.lslhlhhss.phhspp-hhpcLctplhpshoplhpslt.plsppts.st.tpcspsa..........+Ylh...hsptshth+ool.thhst.sp.s.tls.ssLcll...........................................s-lssttptu..p.tstttstEhh......................................................l+shsssWllt+psst+challLcph...................usol....l-lscplt+hsspah.sshF ............................................................................................p.....t........tplhhah...............................t....php.lGh.puhhthsp...h...s.t....t....p...........................lp..pp.p....hh.p.E..tha.h.....h.lp....................................................................h.t.l...hthahh..s...................................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 2 100 149 222 +8050 PF08218 Citrate_ly_lig Citrate lyase ligase C-terminal domain Bateman A anon Pfam-B_3588 (release 16.0) Domain This family is composed of the C-terminal domain of citrate lyase ligase EC:6.2.1.22. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.12 0.71 -4.82 14 1051 2012-10-02 18:00:56 2005-04-01 14:24:12 6 8 898 0 105 1842 244 182.00 56 52.98 CHANGED luuIVMNANPFTLGHpYLlEpAuppsDalHLFlVs-DsShFsap-RhsLlppGspcLsNlslHsGucYlISpATFPuYFlK-ps..sVhcspsplDlplF+chIApsLsIT+RYVGpEPhsplTshYNQtMpphLtpst.....Ipll.ltRp..ptsspsISASpVRphlccsshptltpLVPtTThpYl ...............................................IGsIVMNANPFThGHRYLlpQAAupC...D..W..LH..L..F..l..V....+...E...D....s..S....h.....F.s.Y..cDRhsLVhcGs.A..c..l...s...p...lTVH.p.G.S....-...YI....I...S...R...A...T...F..P..s..Y..Fl..KEpu..llsc..saspID..LpIFRpalAPALG.......I.THRFVG.oE.......P.......FCpVTspYN.QsM+hh.Lc.sss.s.ussIclVEItRl...........phpptsISAS+VRpLLscp....-.h.s.........AIusLVPssTLpYL............................................................................................. 1 27 54 77 +8051 PF08219 TOM13 Outer membrane protein TOM13 Mistry J, Wood V anon manual Domain The TOM13 family of proteins are mitochondrial outer membrane proteins that mediate the assembly of beta-barrel proteins [1]. 25.00 25.00 36.80 35.60 17.50 16.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.75 0.72 -4.22 16 121 2009-01-15 18:05:59 2005-04-01 16:42:45 6 2 120 0 92 112 0 81.80 45 43.57 CHANGED lshssDuEststhppps.s.pssusssh......slWull+uuuINLlLPFlNGhMLGFGELlAHEluFRasW......sGs+l.P.pRh ................................................................l...s-SEphs...s.pp...s...sssss..ssssh......................olaull+uuAINLlLPFlNGhMLGFGELhAHE.h.uFRh.GW.......sGsKlaP.pR+................. 0 22 47 77 +8052 PF08220 HTH_DeoR DeoR-like helix-turn-helix domain Bateman A anon Bateman A Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.51 0.72 -4.43 15 15183 2012-10-04 14:01:12 2005-04-05 17:40:47 7 49 3318 0 2086 11826 840 55.70 33 21.64 CHANGED RpppIlchlpppGplolcELspthsVSstTlRRDLscLpppGl.lpRsHGGAthsssp ...........................RpptIl.p.h.l.....p..p..p...s..t..l..s.l..p.....-..L.u.p..t.....h..s.V..Sp...tTIRRDLs........p....L..pp.....pG....h....l...p...R...s....+G.GAhh............................... 0 573 1175 1649 +8053 PF08221 HTH_9 RNA polymerase III subunit RPC82 helix-turn-helix domain Moxon SJ, Bateman A anon Pfam-B_9884 (release 8.0) Domain This family consists of several DNA-directed RNA polymerase III polypeptides which are related to the Saccharomyces cerevisiae RPC82 protein. RNA polymerase C (III) promotes the transcription of tRNA and 5S RNA genes. In Saccharomyces cerevisiae, the enzyme is composed of 15 subunits, ranging from 160 to about 10 kDa [1]. This region is a probably DNA-binding helix-turn-helix. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.67 0.72 -4.10 18 285 2012-10-04 14:01:12 2005-04-05 17:50:21 6 11 249 2 200 310 6 60.60 30 11.07 CHANGED pLsstllcpaaG-lsupVsstLlppGpholpplscpsp..LshspV+puLssLlQaphVpYhtp .......................LsshllpppFG-l...ssclhstL.lp.p.G...p.s....lppls..p............pop.....ls....hc.pV+puLslLlQpslV.a............................. 0 63 106 160 +8054 PF08222 HTH_CodY CodY helix-turn-helix domain Moxon SJ anon Pfam-B_7573 (release 9.0) Domain This family consists of the C-terminal helix-turn-helix domain found in several bacterial GTP-sensing transcriptional pleiotropic repressor CodY proteins. CodY has been found to repress the dipeptide transport operon (dpp) of Bacillus subtilis in nutrient-rich conditions [1]. The CodY protein also has a repressor effect on many genes in Lactococcus lactis during growth in milk [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.02 0.72 -4.63 3 1097 2012-10-04 14:01:12 2005-04-05 17:50:46 6 4 1087 3 131 519 14 60.70 76 23.39 CHANGED pEGRLTASsIADRIGITRSVIVNALRKLESAGIIESRSLGMKGTYLKVLN-pFl+ELcchK ...........EGhLsASpIADRlGITRSVIVNALRKLESAGlIE.S.R....SLGMKG.TYlKVL.ps.chh-ELcK......................................... 0 59 91 113 +8055 PF08223 PaaX_C PaaX-like protein C-terminal domain Fenech M anon Pfam-B_9563 (release 14.0) Family This family contains proteins that are similar to the product of the paaX gene of Escherichia coli (Swiss:P76086). This protein is involved in the regulation of expression of a group of proteins known to participate in the metabolism of phenylacetic acid [1]. 21.00 21.00 21.10 21.10 20.60 20.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.97 0.71 -4.57 63 785 2009-11-26 12:00:55 2005-04-06 08:39:11 6 3 636 5 203 603 67 159.20 26 55.33 CHANGED tsW.......cGpWpllhh...sptptptRcplRcpLphhGFGplusulalpPsshtt...slpthltchshpsps..hhpsh......s....shpphlppsWsLsplsptYcpFlppapshh...............tptpt..hsstpsahtRhLLlHpaR+hLlpDPhLPpcLLPscWsGtsARpLhpslappltss .....................................................................................tWcGpWhLllh...sthcpss+tpl+cp.....LthhGFGsLt....sulahpPsphtt.......................slpp.h...lt...c...h...sls...s...pl...hhhpup.....hsps.........shpshlp.....phWc...LsphsttYcp....Flppapshh...............ttttp....hsstps.hhhphlLl..ct...aRRhlhpDPh.LPt-LLPscWsGtpA.Rphhhshhpplt..h................................................................ 0 55 122 166 +8056 PF08224 DUF1719 Domain of unknown function (DUF1719) Mistry J, Myung-il K anon Pfam-B_27966 (release 16.0) Domain This is a domain of unknown function. It may have a role in ATPase activation. 25.00 25.00 29.10 26.60 23.20 22.70 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.27 7 104 2009-01-15 18:05:59 2005-04-07 11:11:33 6 3 7 0 74 84 0 200.20 37 46.05 CHANGED +pSuFPRRIAHATKShlSS.lhhsshDt.sopSsVRRFEhhADGAs-FLR.VEhGGo..P+pYhFFDPLIuHLLAGcsLcYchlp.Gsp.+hhhIRPhshtERGlEAhlhFshpsspsPccsFhLGhlLplSESTsllGhlI+CLQ.hhsPHF+SssEsV+pELsQLPTQDhoWl.sas..hc.haWDslHshtTpWaRPNPlCCppHs....t..toushshhtl.sh..psVIpl.lppal ............................psuFP+RlA...+As+ShlSS..hh..t.......t...s.-...p....op.............osVpRFEhFADGAs-FLRhl.E.h..Gus..spp..a.......h.h..hDPLlt+L.LAGctLpYc.h.hp....G....sp..............h.hh.hl..t..P.......h..s.ht-RGl....EAtlhhhhpDt..pts-psFhLshhLplSE.STsllGhhlpCLp...hhs...P..pFc..s.ssEss....+pcLspL.ppch...hh..P....h..........p......ahppl....Hph.ophhRPsPhCCcp.pp.........t...........t...t...t...t...t......th...EslIpl.hpt................................ 0 0 36 58 +8057 PF08225 Antimicrobial19 Pseudin antimicrobial peptide Rossi R anon Short protein clustering Family Pseudins are a subfamily of the FSAP family (Frog Secreted Active Peptides) extracted from the skin of the paradoxical frog Pseudis paradoxa (Pseudidae). The pseudins belong to the class of cationic, amphipathic-helical antimicrobial peptides [1]. 19.40 19.40 21.20 56.60 19.10 17.10 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.45 0.72 -6.81 0.72 -4.47 2 4 2009-01-15 18:05:59 2005-04-08 11:48:24 6 1 1 0 0 4 0 23.00 78 97.87 CHANGED GlNTLKKVhQGLHEsIKLlsNHs GlNTLKKVhQGLHEsIKLlsNHs. 0 0 0 0 +8058 PF08226 DUF1720 Domain of unknown function (DUF1720) Mistry J, Wood V anon Pfam-B_19709 (release 16.0) Domain This domain is found in different combinations with cortical patch components EF hand, SH3 and ENTH and is therefore likely to be involved in cytoskeletal processes. This family contains many hypothetical proteins. 21.40 21.40 21.40 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.26 0.72 -11.42 0.72 -3.92 66 116 2009-01-15 18:05:59 2005-04-13 14:45:25 6 24 75 0 89 133 0 78.20 31 8.31 CHANGED hQP...............................QtTG............................pts..........sl.....psQsTG............hpP....................Q.TGh........................lpP....QtTGh..............................................t......h.pPQsT..............Ga ..................................................................s.ttp.....tlpsQ....TG...........hpP..........Q.TGa.....s.tt.....................LpPQpTGF........................t.h.pPQ.TG............................................ 0 28 53 79 +8059 PF08227 DASH_Hsk3 DUF1721; DASH complex subunit Hsk3 like Mistry J, Wood V, Finn RD anon manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. This family also includes several higher eukaryotic proteins. However, other DASH subunits do not appear to be conserved in higher eukaryotes. 24.00 24.00 24.70 28.90 23.50 23.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.82 0.72 -3.77 16 165 2009-01-15 18:05:59 2005-04-13 16:15:21 6 2 133 0 96 142 0 45.60 43 20.17 CHANGED pRphupLtuQLspLpsNLuchpphLchsuhQs..pshctLGshpuuh ...sRphspLhhp.ptLstNlAphsp.LchhSlQs..hc.+FLuuhshu...... 0 16 37 68 +8060 PF08228 RNase_P_pop3 RNase P subunit Pop3 Mistry J, Wood V anon manual Family This family of fungal proteins form a subunit of RNase P, the ribonucleoprotein enzyme that cleaves the leader sequence of precursor tRNAs to generate mature tRNAs.\ The structure of Pop3 has been assigned the L7Ae/L30e fold [1].\ This RNA-binding fold is also present in human RNase P subunit Rpp38, raising the possibility that Pop3p and Rpp38 are functional homologs. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.02 0.71 -4.40 5 88 2012-10-10 14:40:03 2005-04-14 09:40:32 6 1 88 0 67 163 1 156.70 25 65.50 CHANGED KR+QVYKPVL-NPaTNEAchWP+Vc-Q.lllELLpspll+sLl+hpc...Kss.-pclssGaNEIl-lLu..pusSc-V.....hLFVCs+D...PSVLloQlPLLstsushs....VsLVQLP+uupA+Fc-+l.GhS+s...GMLLV+ccAslDpsFsshlpspVEphshPWLcs ........................................................................................................................................................................................................................................p....l............hhu......hNplsphLp........ptss..p.h.................................hlFVs+pD..p.P.sl..LhpphPhLsh.h....Asts......................l+LVtLPKuu.splussL..Gl.s+s.....uhlulp..s.ss.s.hs.ps.LhchlpppVs.....l....p..hPWLp......................................................... 0 18 38 58 +8061 PF08229 SHR3_chaperone ER membrane protein SH3 Mistry J, Wood V anon manual Domain This family of proteins are membrane localised chaperones that are required for correct plasma membrane localisation of amino acid permeases (AAPs) [1]. SH3 prevents AAPs proteins from aggregating and assists in their correct folding. In the absence of SH3, AAPs are retained in the ER. 25.00 25.00 46.70 38.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.15 0.71 -5.35 17 149 2009-09-11 11:37:31 2005-04-14 11:25:11 6 2 130 0 110 144 0 193.30 34 90.31 CHANGED Yp-hssluTuL.IlsuToFhLGllFushPYDashLWssssT.tptFDhuLpHYphhtsoP.hlhalLahVhhLGllGthIKLYKPst-spLF-YuSLsLYhlulslYlTNl+pGlpsshsGsWG.................-VspppGlsVlAASplhlhllLlGVLlLQuGhWYApac-ppphcpFhpcE.....tttttptppptcsppppspppK ...................................sshuThl.IlssosFhLGllFupaPYDaslLWsssso..ssa.........a.-hh.pHhp.h.las.......o.....Ps...hl.tlLpl...VhhlGllGhhhKLa.KP....sEushhFDGuSLsLYhhulsVY.lsNlhpGlcssss..staG..........................................................l..scps...u....LpV..l..AASNsIlsllLlGVLlLQuGpWY.Acpc-ppchcph.tcc.........tpttttttt...................pt............................. 0 35 64 95 +8062 PF08230 Cpl-7 Cpl-7 lysozyme C-terminal domain Garcia Lopez E, Bateman A anon Garcia Lopez E Domain This domain was originally found in the C-terminal moiety of the Cpl-7 lysozyme encoded by the Streptococcus pneumoniae bacteriophage Cp-7 (Swiss:P19385). It is assumed that these repeats represent cell wall binding motifs although no direct evidence has been obtained so far. 25.00 25.00 39.20 25.30 22.80 23.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.92 0.72 -4.23 14 202 2009-01-15 18:05:59 2005-04-14 13:01:08 6 31 126 0 22 178 7 40.60 50 18.33 CHANGED lcplApEVIpGpaGNGp-R+psLt..GaDhcAVQscVNplLs ..................lDplApEVIpG..caGNGp-R+p+L...GhsYssVQp+VNchL..... 0 8 8 13 +8063 PF08231 SYF2 SYF2 splicing factor Mistry J, Wood V anon Pfam-B_11988 (release 17.0) Domain Proteins in this family are involved in cell cycle progression and pre-mRNA splicing [1] [2]. 22.40 22.40 22.80 24.20 20.70 22.30 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.22 0.71 -4.09 26 339 2009-01-15 18:05:59 2005-04-14 14:01:54 7 5 283 0 231 317 2 149.80 35 56.19 CHANGED cppGhDh-Rp+.hhshospcsEchcc+ccKKcpp..s.uassaspps.h+tYc+ph+sh..phDh-pYc+pKcphsct.............................FYsssssh..s.....pppPsp-A.l-+llpslc+pp....................cpRpKhpR+Rttc--t...DlsYIN-RN+pFNcKLsRaYsKYTsEI+pNLE ......................................................................t.ttGpDh.-+h+.hhchos--uE+..a-+Khc+Kptc...hGFs.sYsp.ts...h....+tYc+hh+pl........psDhEpY..c+p+ppt.s-t..........................................Fassus...o..lh..au...ppt..s.s.pcs..lDRhVp-Lccp.....................c+...RtKhpRR.Rhhs..--s.....Dl..s.YI.NE+NpcFNcK....lpRaYsKYTtEI+pslE.......... 0 79 127 188 +8064 PF08232 Striatin Striatin family Mistry J, Wood V anon Pfam-B_7946 (release 17.0) Family Striatin is an intracellular protein which has a caveolin-binding motif, a coiled-coil structure, a calmodulin-binding site, and a WD (Pfam:PF00400) repeat domain [1]. It acts as a scaffold protein [2] and is involved in signalling pathways [1] [3]. 27.00 27.00 27.30 32.00 26.70 26.10 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.79 0.71 -3.85 31 401 2009-01-15 18:05:59 2005-04-14 16:11:00 7 11 230 0 232 376 0 125.40 42 18.23 CHANGED TLsGVh+aLQoEWp+hER-RstWElERuEM+uRIApLEGEp+s.cthppcLt++lchLEhslKpcRs+hpp............................................tt..t.tt..t.t.tsst.sp.stpspsphpcuR.hLpcshpElsY..hllss..s .........olsGlh+alQpEWsRaEh-RspWElERAEhp.............A+IAhLpGER+upEs.LKpDL..sRRIKMLEaALKQE..R.AKh++L+hGs-hs........................................................................................ptp...........p..ps....tsp.p.t.st.......ttssphth+puRphL+........Q..a..LpElGY..pIlssp.s............................................................................................................ 0 64 103 170 +8066 PF08234 Spindle_Spc25 Chromosome segregation protein Spc25 Mistry J, Wood V anon Pfam-B_14484 (release 16.0) Family This is a family of chromosome segregation proteins. It contains Spc25, which is a conserved eukaryotic kinetochore protein involved in cell division. In fungi the Spc25 protein is a subunit of the Nuf2-Ndc80 complex [1], and in vertebrates it forms part of the Ndc80 complex [2]. 22.30 22.30 23.60 22.30 21.70 20.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.27 0.72 -3.98 58 256 2009-09-23 13:20:45 2005-04-15 13:01:43 7 7 219 2 194 247 3 73.90 28 28.58 CHANGED lcshts-p..l+FhFspl-sp-.p+E..hthslp.lss..ppYclhpssPpl...pslscllpcLN....cspshhtFl+phRctFtp ...........................hchhtt-p.LcFlFsp..lD.ps.-+c..hhFpLs..lsp.....ccYclhcspPpL....pslpcllcclN.......copshssFL+phRctFht............ 0 62 110 157 +8067 PF08235 LNS2 LNS2 (Lipin/Ned1/Smp2) Mistry J, Wood V anon Pfam-B_2646 (release 16.0) Domain This domain is found in Saccharomyces cerevisiae protein SMP2, proteins with an N-terminal lipin domain (Pfam: PF04571) and phosphatidylinositol transfer proteins [1]. SMP2 (also known as PAH1) is involved in plasmid maintenance and respiration [2], and has been identified as a Mg2+-dependent phosphatidate phosphatase (EC:3.1.3.4) that contains a haloacid dehalogenase (HAD)-like domain [4]. Lipin proteins are involved in adipose tissue development and insulin resistance [3]. 26.70 26.70 27.50 26.90 26.30 26.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.75 0.71 -4.66 16 814 2012-10-03 04:19:28 2005-04-15 14:14:50 8 20 307 0 473 726 11 147.40 44 17.04 CHANGED VVlSDIDGTIT+SDsLGHhlshhG+D.Wo+sGlscLapclppNGYpIlYlTuRulGQAcpT+uYLppl.pts..pLPcGPllLSPsthhsuhhRElIh++PchFKhusLp-IpsLa............sppPFaAGFGN+.sDshuYpsVGlPssRIFhlNspGElhhp ...............................lVlSDIDGTIT..+SD..s......L.Gpl.l...........s...h...lG+..D....Wo..+.........tGlscLa.p..cl............ppNGY.phlYlouRul.uQAchT+uaLptl.p..t....s...............h....t...............LPcGPl.hhSPsphhsu.hp..REll..p+..cPc.tFK..hssL.p.........DIpsLF.............................................pppPFYAuFGNR.sDlhu..Yp..............pVGlP...s+IFhlsspuclh............................................. 0 132 208 346 +8068 PF08236 SRI SRI (Set2 Rpb1 interacting) domain Mistry J, Wood V, Sammut SJ, Bateman A anon Pfam-B_106465 (release 17.0) & pdb_2a7o Domain The SRI (Set2 Rpb1 interacting) domain mediates RNA polymerase II interaction and couples histone H3 K36 methylation with transcript elongation [1]. This domain is conserved from yeast to humans. Members of this family form a compact, closed three-helix bundle, with an up-down-up topology. The first and second helices are antiparallel to each other and are of similar length; the third helix, which is packed across helices alpha1 and alpha2 is slightly shorter, consisting of only 15 amino acids. Most conserved hydrophobic residues are largely buried in the interior of the structure and form an extensive and contiguous hydrophobic core that stabilises the packing of the three-helix bundle. This domain mediates RNA polymerase II interaction and couples histone H3 K36 methylation with transcript elongation [2]. 20.90 20.90 20.90 21.90 20.70 20.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.96 0.72 -4.04 25 233 2009-01-15 18:05:59 2005-04-15 16:03:34 6 10 207 2 167 246 1 90.50 28 7.69 CHANGED psspphccpacp..hlupaVsNhlcKYccc.......ls+-shKphA+-ls+tLssKEhK.......pss...spsPstEloccpp+KlKpFs+pYMDKhltKhcp+c ..............t.tc+pcchacp..phutalsphlsta++p.......hscE-...hK+.h.A+cls+tlssK.E.hK.......psc......ss.shcls-......phc+KlKcalKcYhpKhssha+t......... 0 52 85 132 +8069 PF08237 PE-PPE PE-PPE domain Mistry J, Adindla S anon manual Domain This domain is found C terminal to the PE (Pfam:PF00934) and PPE (Pfam:PF00823) domains. The secondary structure of this domain is predicted to be a mixture of alpha helices and beta strands [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.41 0.70 -4.93 32 715 2012-10-03 11:45:05 2005-04-19 14:46:55 6 11 138 0 135 708 1 213.70 31 45.55 CHANGED P..shs...h.hl...hPt........thh......hssh..........ohspSlutGsstLpsAlpst...................ssss...hslhGaSQGAhlsst.htpLts..ssss....ssloFlhlGsPtps....sGGlhspass..hhlPh.....tlsassssPsss.........Ys....ThplstpYDuh.ADhPshPhNllAssNAlh.......G.hhhlH.........s.......sa.s...........s..s.....stshs.s.spstsusTTYhhlPsp..pLPLL.PL+.l.s.......lssslsshl-ssL+slV-tGYs ..................................................................psl...hPt....................phhPhss.......lssh....................ohs..pSV.spGs.s..Lc.sAI..t...s...t......................ssss...lsV...h...GaSQuAhlsshthpp..Lts...s.ssss.........................sspL.s.F.l.h.lGsPtpP..............sGG..lhtpas......s.............hh....lPh....................l.s..assssPsss......................Ys..........T.h.hsh.pYDGh.uD...h...Pp...h...P...h...Nl.l.....u.s.h.NAlh...........G..hhh..sH............s...........sa..shs......................................................stt...........stsl.hs..ss..uhs...usTs.YYhl.sp..pLPLL.PLRtl..s...............lsss...lsshlpssL+slls.hGY....................................................................................................................................... 0 19 73 122 +8070 PF08238 Sel1 Sel1 repeat Bateman A anon Pfam-B_49 (Release 17.0) Repeat This short repeat is found in the Sel1 protein [1]. It is related to TPR repeats. 21.10 2.40 21.10 6.80 21.00 -999999.99 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.33 0.72 -3.19 594 40768 2012-10-11 20:01:00 2005-04-21 12:03:46 7 502 2506 23 13860 42646 6566 35.20 26 38.95 CHANGED spAthpLG........h...pG...........l.p....s....hpp..AhpaappAAppGp .......................................AthtLG..............hha.hpG.t.....................G..ltp.........D..............hpp.......Ah..paappAAptG................................. 0 6224 9270 11773 +8071 PF08239 SH3_3 Bacterial SH3 domain Bateman A anon Pfam-B_178 (Release 17.0) Domain \N 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.82 0.72 -3.78 263 10209 2012-10-02 18:48:24 2005-04-21 15:21:09 6 416 2914 9 1841 9471 1861 54.50 23 20.20 CHANGED ssl..slR..ssP.sts...up..hlsp.......l..tG..pplpl....lp..ppss................W..hcl....................t.suh.p....Gal..t..sshlp ............................s..l..slR..ssP..uss...sp....llsp...................l..ppG......pp.lpl.............ls....ppss.........................................W.......hcl.......................p.sGp..p..........Gal..s...sphl........................................... 0 797 1353 1602 +8072 PF08240 ADH_N Alcohol dehydrogenase GroES-like domain Bateman A anon Pfam-B_7 (Release17.0) Domain This is the catalytic domain of alcohol dehydrogenases. Many of them contain an inserted zinc binding domain. This domain has a GroES-like structure [1-2]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.71 0.72 -4.29 92 42970 2012-10-01 22:45:51 2005-04-21 15:24:04 7 521 5327 455 13369 34065 8204 101.00 25 23.28 CHANGED ss-Vll+lpssGlCsoDl..phhpG...............hhsphP.......hlhGHEhsGhV.p..lG.ssVps......hphG-+Vsl...shhs.C........up..............Ct.Cpp.Gp..shC.........................tphphhGhs..........hsGuaAEY.lhlP.......tptlh.l ..................................................s-Vll+lp.s.sGl..s.t..s......Dl.....th..h.pG.........................................h...ht.hP..............h.l..h.G...H...E...........s.....s..G.........h..Vh..p....l...G....s...s..V.s.s..............................h.c..sG...D.........+.V..s.s.......................s...........................................s.........s.....................t............s...............................................................................................u.....t..p..h...h.............h............................................................................................................ 0 3704 7884 11039 +8073 PF08241 Methyltransf_11 Methyltransferase domain Bateman A anon Pfam-B_21 (release 17.0) Domain Members of this family are SAM dependent methyltransferases. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.13 0.72 -3.50 172 22723 2012-10-10 17:06:42 2005-04-22 12:24:16 7 278 4590 31 8318 65769 22557 94.00 22 33.45 CHANGED LDlG...sG.sGhhsthlspt.........stplhulDhstp.......hlp......hucpphttt.....................hhtus.......sppl..Pht.-s..oFD.hl...hsttsl..pah.......pc.ppslpEhtRlL+PGGhlhh .......................................................................................................................L-lG...sG...sG..h..h..s...h..h..htpt........................stp.l....h..u......l....D...h...stt..............................h.lp...................h.u..p..p....p..h..t..t.t.t..t............................................................hht.u..s......................s.p..p..L...........P.....a.....s.....-....s............o......F...D.....h.l.....................h..s....t.....t.....s....l........pah..........................tc..h...t...p......s...l...p.....E....h....t......R....V....L.+.P.G.Ghhh........................................................................................... 0 2885 5512 7158 +8074 PF08242 Methyltransf_12 Methyltransferase domain Bateman A anon Pfam-B_46 (release 17.0) Domain Members of this family are SAM dependent methyltransferases. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.27 0.72 -3.52 108 3160 2012-10-10 17:06:42 2005-04-22 12:25:33 7 531 1307 4 1307 38110 12780 102.00 25 6.03 CHANGED L-lGsGsGthhthlhpt...................thphh.uhDhSsthl.....hstpchtphph.th.thphtthshhp..............................saDl....ll...sh.sllcah.........tshp.psLp..plt.phLps.uGhl ...................................................................LElG.uG.o.G....s...s.o.t.h.ll..pth.......................................th.c.Y.s....hoD...l......S...s.shl...........ttA.p..p............+....h...s....s....h........s..............h......t......h......t......h....h.....c..h.p..p..h..s..h..t.p.t.........................................................................................tsaD.l.........l...l......us....s.....V.L.Hss......................ppl...p...psL.p....plp...pl...L.p.P.GGhl........................................................................................... 0 345 748 1069 +8075 PF08243 SPT2 SPT2 chromatin protein Mistry J, Wood V anon manual Domain This family includes the Saccharomyces cerevisiae protein SPT2 which is a chromatin protein involved in transcriptional regulation [1]. 20.90 20.90 20.90 22.10 20.30 20.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.95 0.71 -3.78 7 281 2009-09-11 05:41:46 2005-04-22 12:54:49 6 7 216 0 195 268 0 106.40 30 22.63 CHANGED Kpsshsh..ptpSt+t...s.ph-s-pDpchpcFIcD-cE-psch..........pccIhpIFshs+p+....ap..D--D.....MEAshtEl.cEEpRutRhAcLEDccE.ph.cEctppK+t+Kp+ ...........................................................tt.........................pcp.cc...-....D...p-h..-.D..F..I...-D-t-ppc.p.......................ppt.IhthFs.h.s+p+.......Y..p....D.-....sD.......ptMEuuat-lpcEEt+St+lutpEDhcE....hchEcccp+c.Kct+Kt......... 0 57 95 152 +8076 PF08244 Glyco_hydro_32C glycosyl_hydro2; Glyco_hydro_32; Glycosyl hydrolases family 32 C terminal Finn RD, Mistry J anon Prosite Domain This domain corresponds to the C terminal domain of glycosyl hydrolase family 32. It forms a beta sandwich module [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.12 0.72 -3.60 178 3225 2010-01-08 16:43:10 2005-04-25 13:01:37 7 49 1933 59 570 2522 88 88.70 22 16.56 CHANGED hcls............................ssp.....th...Gltlhht..sstpcpstlhac................t.tppplslDRspou....ts.......ht.thss......hctshhtt...................tth..........................pL+lhlDcSslElFsssG ....................................................................................................ththtt..................h......th..l.h......tst..scthtlh.ac.............................t.tps..plslDR.opuu........t....................ht.thssh................scss.hsp..............................................................pt................................pLclhlDpSslElFsNsG............... 0 129 322 462 +8077 PF08245 Mur_ligase_M Mur ligase middle domain Bateman A anon Pfam-B_26 (release 17.0) Domain \N 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.40 0.71 -4.25 92 26660 2009-09-15 12:35:17 2005-04-26 11:46:18 7 94 4735 65 5962 19905 11199 195.10 21 41.78 CHANGED lTGTNGKTTTsphltpll..ptt....st.hhsosG................s..t.lGlshhhhthtp.....tschhlhElSSa.......plpth.......hcsclu..lloNlsp-HL-..ha.toh-s.YhpuKtcla.ptht.ts......hsllN..h....DDt.....hhhthhpptthp.sl.sauhpsps...............lth..pssph...................................hh.h............hpl...slhG....p...aNltNsLAAluss ...................................loGTsGKTTTs...s..hl.splL.....pts.............Gh..ps...t.s...h.G.....................................................................s............l...s.h.s..h..h.h..h...h..htp........................tsc.h.hVh...Esu...p.t..................tplsth....................h.pP...p...lu.........l..l..TN.......l.s.h.D.Hh....-........ha....s.......s......h..-.s....ht.p.s.Ktp.....lh...c.t...h.............ts................h.s.l.ls....t.....-cs..............................hhp.h..h.....t...t..t....t...t...p......hh...t..a.u.h.p.tts....................th..t...tlt.h.........p..s.t.t.h................................................................................p.h.h..h............t...ht........hpl.......sl.G..........p......aNh....N.uhsAluh........................................................................................................................................................................ 0 2011 3956 5101 +8078 PF08246 Inhibitor_I29 Cathepsin propeptide inhibitor domain (I29) Bateman A anon Pfam-B_14 (release 17.0) Domain This domain is found at the N-terminus of some C1 peptidases such as Cathepsin L where it acts as a propeptide. There are also a number of proteins that are composed solely of multiple copies of this domain such as the peptidase inhibitor salarin Swiss:Q70SU8. This family is classified as I29 by MEROPS. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.98 0.72 -3.70 95 3417 2009-01-15 18:05:59 2005-04-26 17:20:04 7 34 536 17 1591 3509 92 58.50 30 16.86 CHANGED FppahpcasKp..Y...............tsppE....ptpRh.phFtpNhphlppaN...........pspts.......a...ph........................................ulNcFuDhopcEa ............................appattpas+s.....Y........................tst.pE.....ctp..Rh.plFc.....cNhchIcpaN...........................tspto..............a......ph..........................................ul.N...pFuDhTp-EF........................................................................................ 0 651 976 1322 +8079 PF08247 ENOD40 ENOD40 protein Rossi R anon Short protein clustering Family Rohrig et al. reported the in vitro translation of two peptides of 12 and 24 amino acids from the short, overlapping ORFs of soybean ENOD40 mRNA [1]. The putative role of the enod40 genes has been in favour of organogenesis, such as induction of the cortical cell divisions that lead to initiation of nodule primordia, in developing lateral roots and embryonic tissues. This supports the hypothesis for a role of enod40 in lateral organ development [2]. 20.40 20.40 22.70 22.70 17.80 16.30 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.84 0.74 -6.01 0.74 -3.79 4 11 2009-01-15 18:05:59 2005-05-03 16:13:00 6 1 7 0 0 9 0 12.30 76 100.00 CHANGED McLCWQpSIHGS ...McLCWQKSIHGS 0 0 0 0 +8080 PF08248 Tryp_FSAP Tryptophyllin-3 skin active peptide Rossi R anon Short protein clustering Family PdT-3 or Tryptophyllin-3 peptide is a subfamily of the family Tryptophyllin and of the superfamily FSAP (Frog Skin Active Peptide). Originally identified in skin extracts of Neotropical leaf frogs, Phyllomedusa sp. This subfamily has an average length of 13 amino acids. The pharmacological activity of the tryptophyllins remains to be established [1] but it seems that these peptides possess an action on liver protein synthesis and body weight [2]. 17.70 17.70 23.10 23.10 12.50 10.90 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.28 0.74 -5.89 0.74 -3.17 2 8 2009-01-15 18:05:59 2005-05-04 09:25:07 6 1 5 0 0 8 0 12.00 75 91.43 CHANGED EKPaaPPPIYPh D.KPFWPPPIYPh. 0 0 0 0 +8081 PF08249 Mastoparan Mastoparan protein Rossi R anon Short protein clustering Family Mastoparans are a family of tetradecapeptides from wasp venom, that have been shown to directly activate GTP-binding regulatory proteins. These peptides show selectivity among G proteins: they strongly activate Go and Gi but not Gs or Gt. The peptide of this family are composed by 14 amino acids but they can assume different structures [1]. 21.00 21.00 21.40 21.40 20.80 20.70 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.67 0.74 -6.03 0.74 -2.95 5 10 2009-01-15 18:05:59 2005-05-04 09:45:41 6 1 8 3 0 12 0 14.00 72 84.34 CHANGED INLKAlAAlAKKlL INLKAIAAhAKKLL 0 0 0 0 +8082 PF08250 Sperm_act_pep Sperm-activating peptides Rossi R anon Short protein clustering Family The sperm-activating peptides (SAPs) are isolated in egg-conditioned media (egg jelly) of sea urchins. SAPs have several effects on sea urchin spermatozoa: stimulate sperm respiration and motility through intracellular alkalinization, transient elevation of cAMP, cGMP and Ca++levels in sperm cells [1,2]. 18.00 18.00 18.20 18.20 17.60 17.60 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.36 0.75 -5.62 0.75 -3.07 11 21 2009-01-15 18:05:59 2005-05-04 09:46:47 6 1 8 0 0 10 0 10.00 75 100.00 CHANGED GFuLuGGGVG GFsLsGGGVG 0 0 0 0 +8083 PF08251 Mastoparan_2 Mastoparan peptide Rossi R anon Short protein clustering Family Mastoparan (MP) peptides I II and III are extracted from the venom gland of the Neotropical social wasp Protopolybia exigua(Saussure) They are tetradecapeptides presenting from seven to ten hydrophobic amino acid residues and from two to four lysine residues in their primary sequences. These peptide cause the degranulation of mast cells. Protopolybia-MP-I also act causing hemolysis in erythrocytes. 25.00 25.00 26.10 26.10 16.50 15.00 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -6.18 0.75 -6.33 0.75 -3.35 2 5 2009-01-15 18:05:59 2005-05-04 09:49:55 6 1 3 0 0 5 0 14.00 76 100.00 CHANGED INWLKLGKtV.shL INWLKLGKtVIDAL 0 0 0 0 +8084 PF08252 Leader_CPA1 arg-2/CPA1 leader peptide Rossi R anon Short protein clustering Family In this family there are Leaders Peptides involved in the regulation the glutaminase subunit (small subunit) of arginine-specific carbamoyl phosphate synthetase. In Neurospora crassa it is a small upstream ORF of 24 codon above the arg-2 locus [1]. In yeast it is the leader peptide of the CPA1 gene. The 5' region of CPA1 mRNA contains a 25 codon upstream open reading frame. The leader peptide, the product of the upstream open reading frame, plays an essential, negative role in the specific repression of CPA1 by arginine [2]. 18.50 18.50 19.20 22.90 18.20 17.10 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.86 0.72 -6.75 0.72 -4.63 3 17 2009-09-10 15:06:03 2005-05-04 09:54:31 6 2 17 1 9 13 0 23.50 64 40.24 CHANGED hstSsSQYTCQDYISDHIWKApSH ....FphS.SpYTCQDYISDHIWKoSS... 0 3 7 9 +8085 PF08253 Leader_Erm Erm Leader peptide Rossi R anon Short protein clustering Family These short proteins are Leader peptides (15-19 amino acids) of erm genes that code for resistance determinants in Staphylococcus aureus [1]. 21.00 21.00 24.90 24.40 19.60 18.50 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.28 0.72 -6.18 0.72 -4.47 5 32 2009-01-15 18:05:59 2005-05-04 11:41:20 6 1 23 0 1 13 0 18.80 80 82.42 CHANGED MGhFSIFVIsTVHYQPNcK MGhFSIFVINTVHYQPNcK. 0 0 0 1 +8086 PF08254 Leader_Thr Threonine leader peptide Rossi R anon Short protein clustering Family Threonine leader peptide of the Threonine operon thrA1A2BC. It as been sequenced in different bacteria: E. coli, Serratia marcescens, Salmonella typhi [1,2]. 25.00 25.00 25.40 26.20 18.50 16.70 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.04 0.73 -7.30 0.73 -4.09 2 68 2009-01-15 18:05:59 2005-05-04 11:43:54 6 1 68 0 8 11 0 21.20 87 98.63 CHANGED M+hIS..TTIhTThp.TTG.GAG MKRIS..TTIhTTITITTGNGAG 0 1 2 5 +8087 PF08255 Leader_Trp Trp-operon Leader Peptide Rossi R anon Short protein clustering Family The tryptophan operon regulatory region of C. freundii's (leader transcript) encodes a 14-residue peptide containing characteristic tandem tryptophan residues. It is about 10 nucleotides shorter than those of E. coli and S. typhimurium [1]. 25.00 25.00 26.70 28.10 23.40 22.50 hmmbuild -o /dev/null HMM SEED 14 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.06 0.74 -6.50 0.74 -3.84 4 51 2009-01-15 18:05:59 2005-05-04 11:48:22 6 1 51 0 5 7 0 14.00 71 91.30 CHANGED MsuhhuL+GWWRTS MKAIFVLKGWWRTS 0 1 2 4 +8088 PF08256 Antimicrobial20 Aurein-like antibiotic peptide Rossi R anon Short protein clustering Family This family of antibacterial peptides are secreted from the granular dorsal glands of the Green and Golden Bell Frog Litoria aurea, Southern Bell Frog L. raniformis, Blue Mountains tree-frog Litoria citropa (genus Litoria) and frogs from genus Uperoleia. They are a part of the FSAP peptide family. Amongst the more active of these are aurein 1.2, aurein 2.2 and aurein 3.1; caerin 1.1, maculatin 1.1, uperin 3.6 [1]; citropin 1.1, citropin 1.2, citropin 1.3 and a minor peptide are wide-spectrum antibacterial peptides [2]. 20.60 20.60 20.90 20.60 20.10 19.80 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.89 0.75 -5.92 0.75 -3.03 14 32 2009-01-15 18:05:59 2005-05-04 11:52:33 6 2 7 0 0 29 0 13.00 62 70.70 CHANGED GlhDlsKKVsGtl GLFDIlKKVsGsI.. 0 0 0 0 +8089 PF08257 Sulfakinin Sulfakinin family Rossi R anon Short protein clustering Family The sulfakinin (SK) family of neuropeptides have only been identified in crustaceans and insects. For most species there is the potential for producing two sulfakinin peptides one have a short sulfakinin sequence The function of the sulfakinins is difficult to assess. For the American cockroach, various forms of the endogenous sulfakinins have been shown to be active on the hindgut, and also on the heart. In C. vomitoria the peptides act as neurotransmitters or neuromodulators, linking the brain with all thoracic and abdominal ganglia. In adults of P. monodon they appear to be restricted to a few neurones in the brain with a neural pathway extending along to the ventral thoracic and abdominal ganglia [1]. 21.20 21.20 21.50 21.50 18.50 20.80 hmmbuild -o /dev/null HMM SEED 9 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.41 0.76 -5.79 0.76 -3.28 2 53 2009-01-15 18:05:59 2005-05-04 11:55:08 6 1 50 0 0 7 0 9.00 92 82.10 CHANGED .-DYGHMRF F-DYGHMRF 0 0 0 0 +8090 PF08258 WWamide WWamide peptide Rossi R anon Short protein clustering Family This family contain neuropeptides, isolated from ganglia of the African giant snail, Achatina fulica. Each peptide has a Trp residue at both the N- and C-termini. Purified WWamide-1, -2 and -3 showed an inhibitory effect on the phasic contractions of the anterior byssus retractor muscle (ABRM) [1]. 19.40 2.80 19.40 2.80 11.30 -4.50 hmmbuild -o /dev/null HMM SEED 7 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.27 0.77 -5.57 0.77 -2.79 2 19 2009-01-15 18:05:59 2005-05-04 13:22:39 6 2 2 0 0 18 0 7.00 83 27.71 CHANGED W+pMSVW WKQMSVW 0 0 0 0 +8091 PF08259 Periviscerokin Periviscerokinin family Rossi R anon Short protein clustering Family Abdominal Perisympathetic organs of insects contain Periviscerokinins neuropeptides of about 11 amino acids. 19.50 19.50 19.50 19.50 18.60 18.30 hmmbuild -o /dev/null HMM SEED 11 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -5.64 0.75 -5.78 0.75 -3.14 3 195 2009-01-15 18:05:59 2005-05-04 13:39:12 6 1 82 0 0 34 0 10.90 75 99.16 CHANGED GSSGLIPFPRV ..GSSGLIshPRV. 0 0 0 0 +8092 PF08260 Kinin Insect kinin peptide Rossi R anon Short protein clustering Family These neuropeptides are the first members of the insect kinin-family isolated from the American cockroach. Their occurrence in the retrocerebral complex suggests a physiological role as a neurohormone. The C-terminal sequence Phe-X-Ser-Trp-Gly-NH2 characterised the peptides as members of the insect kinin family. Data suggest a possible involvement of insect kinins in water-balance by regulating the osmoregulation. These peptides have length from 6 to 14 amino acids [1]. 19.70 19.70 21.90 21.90 17.50 17.50 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -5.55 0.76 -5.53 0.76 -2.79 3 5 2009-01-15 18:05:59 2005-05-04 13:46:48 6 1 2 0 0 5 0 8.00 79 100.00 CHANGED cPAFNSWG DPAFNSWG 0 0 0 0 +8093 PF08261 Carcinustatin Carcinustatin peptide Rossi R anon Short protein clustering Family A total of 20 peptides of the superfamily allostatin were isolated from the shore crab Carcinus maenas. They are named carcinustatin 1 to 20 and their length ranges from 5 to 27 amino acids. This family includes carcinustatin 8,9,15 and 16. 20.70 0.50 20.70 0.90 20.20 0.40 hmmbuild -o /dev/null HMM SEED 8 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.77 -5.30 0.77 -5.42 0.77 -2.46 2 2 2012-10-01 21:03:17 2005-05-04 14:12:48 7 2 2 0 0 66 0 8.00 88 2.84 CHANGED uGPYuaGL ..AGPYuFGL 0 0 0 0 +8094 PF08262 Lem_TRP Leucophaea maderae tachykinin-related peptide Rossi R anon Short protein clustering Family These peptides are designated Leucophaea maderae tachykinin-related peptides (Lem TRPs). Some were isolated from the midgut of L. maderae, whereas others appear to be brain specific. The Lem TRPs of the brain are myotropic and induce increases in the amplitude and frequency of spontaneous contractions and tonus of hindgut muscle in L. maderae [1]. They were also isolated from brain-corpora, cardiaca-corpora, allata-suboesophageal ganglion extracts of the Locusta migratoria. They stimulate visceral muscle contractions of the oviduct and the foregut of Locusta migratoria [2]. 25.00 25.00 26.90 26.90 24.20 24.20 hmmbuild -o /dev/null HMM SEED 10 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.20 0.74 -5.42 0.74 -3.63 4 4 2009-01-15 18:05:59 2005-05-04 15:39:16 6 1 2 0 0 4 0 10.00 72 100.00 CHANGED APSMGFpGhR APSMGFpGhR 0 0 0 0 +8095 PF08263 LRRNT_2 Leucine rich repeat N-terminal domain Bateman A anon Pfam-B_35 (release 17.0) Family Leucine Rich Repeats Pfam:PF00560 are short sequence motifs present in a number of proteins with diverse functions and cellular locations. Leucine Rich Repeats are often flanked by cysteine rich domains. This domain is often found at the N-terminus of tandem leucine rich repeats. 20.70 3.10 20.70 5.20 20.60 -999999.99 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.56 0.72 -3.99 101 5573 2009-09-16 14:48:15 2005-05-06 17:06:28 7 2088 260 5 3276 5555 7 41.00 31 5.53 CHANGED hss-tpuLLshKsulst.......ss....shLsuWsss........sssCs...WpGVsCs ........................p-tpALLphKp.u.ls....................ss.........shL..s..o...Wsss................sssCs.........Wp.GVsCs.............. 0 335 1979 2672 +8096 PF08264 Anticodon_1 Anticodon-binding domain of tRNA Bateman A anon Pfam-B_23 (Release 17.0) Domain This domain is found mainly hydrophobic tRNA synthetases. The domain binds to the anticodon of the tRNA. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.94 0.71 -4.46 171 17806 2009-12-15 13:50:21 2005-05-07 14:43:35 8 114 4976 36 4987 14537 7549 145.80 20 16.55 CHANGED D+alLsclpphlpplspsh-.p.apaspshptlhpa...hhsphsshYlchs+sph.t.....................tslhpslcthlplLsPhhPaloEEla.......................t..........t.l..hh.....tt.a..P..p..........ptph............................................hphh.tl....lphhcp....hR...........sphpls.tstp......hphhl ..........................................................pphhhpphppslp.p.l.s.c.s.h..-.......p..acFspAhp.tlhp.F..........hhsc.hsshY.l..-..hs.K..s.h.h...htttt.t......................s...................ssl...h...p...s...l...c...s...l...l...+...llsPhhPalsE...E..lW...........................................................................ttt........pol....hh.......ss...a...Pp..s.t.h.........p.th.............................................................................tt.thp.h.h..t.l.........lp.s.htp.........hR...............sc.h.pls..sh........h........................................................................................................... 0 1700 3159 4209 +8097 PF08265 YL1_C YL1 nuclear protein C-terminal domain Bateman A anon Pfam-B_3088 (release 8.0) Domain This domain is found in proteins of the YL1 family [1]. These proteins have been shown to be DNA-binding and may be a transcription factor [1]. This domain is found in proteins that are not YL1 proteins. 20.60 20.60 21.30 21.00 19.70 18.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.05 0.72 -4.38 51 540 2009-01-15 18:05:59 2005-05-07 14:55:52 6 11 291 0 386 523 1 29.80 42 8.37 CHANGED chCsITGh.ApYhDPpT.pl.Ytss-saphl .....hCsITGLsApYpDPpT.pL.Ysss-sFphI...... 0 114 198 309 +8098 PF08266 Cadherin_2 Cadherin-like Finn RD anon Pfam-B_179 (release 17.0) Domain This cadherin domain is usually the most N-terminal copy of the domain. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.54 0.72 -4.41 77 2331 2012-10-03 16:25:20 2005-05-09 16:18:59 7 43 53 2 1054 2531 1 83.80 38 9.36 CHANGED spl+YSVsEEscpGohVGNlAKDLGLslp-LusRshRllSpspcpahplshcoGsLllsE+lDREcLCupstsClLphplllEs ...........................................pl+YSlsEEhcpGohVGN.l.A.....c..D...L.....G....L....p.l.....t............c.....L......s...s....R.....p.....hR.ls..o..p..s......p......p.............phl.plshpsGt.L.h.l.s.c.+.IDR.E..pL..C...u........p....s.....s...t...Chlph-lll-......................... 0 88 224 576 +8099 PF08267 Meth_synt_1 Cobalamin-independent synthase, N-terminal domain Finn RD anon Pfam-B_553 (release 17.0) Domain The N-terminal domain and C-terminal domains of cobalamin-independent synthases together define a catalytic cleft in the enzyme. The N-terminal domain is thought to bind the substrate, in particular, the negatively charged polyglutamate chain. The N-terminal domain is also thought to stabilise a loop from the C-terminal domain [1]. 19.50 19.50 19.50 19.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.71 0.70 -5.16 56 2913 2012-10-01 21:20:02 2005-05-09 16:57:19 7 5 2719 25 628 4722 465 302.40 41 42.29 CHANGED tspsLG.aPRIGtpRELK+AlEsYWpGchs...pp-LhpsucpLRtppWptQpssGlDh....lPsuDFoaYDaVLDtshhhGslPpRat.......t....sLDpYFshARGs....tsh...sAhEMTKWFsTNYHYlVPEhspsppFpls.ssphlp-hpEA.ptlG.h.....csKPVllGPlTaLhLuK....ststs....shhs.................LLppLlslYpclLpcLsstGsc.aVQlDEPsLshDl..spphhpshcpsYppL.........ttssscllLsTYFss.hscphshl..hpLPVs.GltlDlVc.uscpLssltp.ths..scKhLuAGll-GRNIW+sDLppsLphLppltpths ........................................................................................................h.sphLG.FPRlG.....p..RELKhAhEsYWt.....sc..ho.........c-...-Lhtsu+......-LRtcpWphQpp...s.G.l...Dh....lPs..sDFuaYD.pVLDsuhhh...GslPtRap............t..st..........ss....lDphFthuRGp...........................tsssAhEMTKWFsTNYHYlVPEas.p.s.........p..........pF.c......L.......s......h..s.......pll-EhpE.....A.....ps.L...G..h.............ps+PVLlGPl.....T...a...L..h...LuK...spsts........sths.............................................lLsc.LL.P.lYpplLscL.sc.t...........Gsc...WlQlDEP...s......L...V......h............DL........spp....h.......h......s....t....a....c......p.......s..Y.stL...................tpstsKlLLp..T..Y............F..ss..........l......t....c...........s..........h..stl....ss..L.P.Vs.GltlDhV.....c.....utcs.....L...s....tl...p.p......t.hs.................scphLsAGl....lsGRNlWRsDlppphshlcplht..t....................................................................................................................... 0 176 381 529 +8100 PF08268 FBA_3 F-box associated domain Finn RD anon Pfam-B_322 (release 17.0) Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.94 0.71 -4.41 51 607 2012-10-01 23:07:08 2005-05-09 18:41:09 7 19 34 0 459 913 0 116.00 20 32.80 CHANGED GlCING.VlYYhAhh..................sp.....phhlhsFDVRSEcF.shIph..................h..hthsLlsYcGKLuh..h..s..t........lclWVLE.DscK.pcWS.+pha...hhs.s..th.....hphplsGsTps.GE..llhh.........sphhppsFal .......................................................................hsG.hlYah.s.......................t.......p.hlhsFDlp.sE.pF..p.hlph............................................t.t.hpL..l..ph....p..G....+.Luh.....ht.hpp.t............................hcl.Wl......Lc.....D...h..c..c..p.p.Wo.+h.hh......h....................................................................................................................... 0 120 175 213 +8101 PF08269 Cache_2 Cache domain Finn RD anon Pfam-B_865 (release 17.0) Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.12 0.72 -4.01 34 2356 2012-10-01 23:40:40 2005-05-10 17:25:59 6 99 833 2 845 2155 121 91.30 24 17.47 CHANGED +sshhpp+cppLpshlptAhshlphhtp.sptsphsc-pApppupphl..............pslRa.ssssYFalhDppsthlhHPhpPcL.Gpshts.hpDspGstlhp ................................................................thhpt+ctplpphl.p.hAhshlp.....hhpts..t......t..sth.s....cp.pA..p....p.p..u...hphl..................................psh..+...a....s...s....s......s.Y.h.a.l..h........D.....t....p.....s...s....h...lh.HP.h.ps..c.h...G.p.....s.hh.s..hpDsp.Gphlh.................. 0 253 511 690 +8102 PF08270 PRD_Mga M protein trans-acting positive regulator (MGA) PRD domain Moxon SJ, Bateman A anon Pfam-B_5126 (release 7.7) Family Mga is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions [1]. This corresponds to the PRD like region. 21.00 21.00 22.50 22.50 20.70 20.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.10 0.70 -5.07 13 638 2012-10-02 16:05:11 2005-05-11 14:46:11 6 4 320 0 27 325 1 210.10 29 43.86 CHANGED spphlcchl...shplppo.hshh.shFpaaclLlulsaKR..+ta.lslPpstlh......cplpplhhhsplhpsspphlt.caslshspsslsYlFLsYloss.shu.sthhsppctcphhphhpchssaptLLc.lpctLshphss+pclhptL...saFp+p.lhshphLI.-hpthshptaptph.pLYptlcshlpcahpphst..ptplpppchahhshalEpl ...............s.p-.lsphl...shhhKposhshshspaphh+lLlshshhR..hsa.h-lspsph.......cphhshh.h.s...hh...ctstp.h-scaslshsp-sls.lFlsYhpsth.ls.p.hhpshccsphsc..hshp......hhs..pll-plptphtlphpN+spllhtL..........sahhRppLFs.phllh-p+t..slcpaps.a.phhss.lKp.lpcahpshph.....pshhspHhhYhhhh+hcpL........ 0 5 10 19 +8103 PF08271 TF_Zn_Ribbon TFIIB_Zn_Ribbon; TFIIB zinc-binding Finn RD anon Pfam-B_1298 (release 17.0) Domain The transcription factor TFIIB contains a zinc-binding motif near the N-terminus. This domain is involved in the interaction with RNA pol II and TFIIF and plays a crucial role in selecting the transcription initiation site. The domain adopts a zinc ribbon like structure [1]. 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.38 0.72 -4.64 71 1235 2012-10-03 10:42:43 2005-05-11 16:17:32 7 14 529 7 745 1178 225 42.80 34 11.75 CHANGED htCPpCsu.sp.l.....lhD...ppG-h.lCssC.GhVl--phl-pssEacsh ............h.hCPp..Css..s...l...........lhD.....p..pG-h.lCs.....pC.GhVl...p..-phlDttsEWRs................ 0 230 423 611 +8104 PF08272 Topo_Zn_Ribbon Topoisomerase I zinc-ribbon-like Finn RD anon Pfam-B_5615 (release 17.0) Domain Some Proteobacteria topoisomerase I contain two zinc-ribbon-like domains at the C-terminus that structurally homologous to Pfam:PF01396. However, this domain no longer bind zinc. Indeed, only one of the four cysteine residues remains [1]. 25.00 25.00 28.90 28.90 19.10 16.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.72 0.72 -4.60 19 1903 2012-10-03 10:42:43 2005-05-11 16:18:39 6 10 949 2 262 1090 300 41.40 39 9.71 CHANGED u.ssllpas++scppYlpoct.sGKhouWpsaapstcWpssph .....t.splsp.p+pst..hlps.ts...sGh..hhuhssF.pstchpsshh....... 0 40 98 188 +8105 PF08273 Prim_Zn_Ribbon Zinc-binding domain of primase-helicase Finn RD anon Pfam-B_18441 (release 17.0) Domain \N 21.80 21.80 21.80 21.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.35 0.72 -3.88 18 651 2012-10-03 10:42:43 2005-05-11 16:18:46 7 22 488 2 103 533 210 37.60 54 6.24 CHANGED t+HsPCPsCGupDta+.asDt.st.........GshaC.sC...u......sGDGh ......+HsPCPs.C...G.G....c..DRFR.FD.Dp...cGp...........GoWhC.N..pC......G......uGDGh............ 0 21 44 78 +8106 PF08274 PhnA_Zn_Ribbon PhnA Zinc-Ribbon Finn RD anon Manual Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.62 0.72 -4.16 34 2095 2012-10-03 10:42:43 2005-05-11 16:19:05 7 3 2023 1 331 1007 47 31.30 58 26.06 CHANGED sL.....P...pCPpCsSEaoY............pDsslhlCP-CuaEWs ...........LP...sCPcCsSEYTY............EDsuhhlCPECAaEWs... 0 97 190 266 +8107 PF08275 Toprim_N DNA primase catalytic core, N-terminal domain Finn RD anon Pfam-B_313 (release 17.0) Domain \N 21.40 21.40 21.50 21.50 21.30 21.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.86 0.71 -4.19 93 4712 2009-01-15 18:05:59 2005-05-11 16:22:45 6 53 4423 10 1053 3634 2781 126.40 37 20.82 CHANGED aapppLpssps.....AhsYLp.pRG.lstchlccFplGaAP...ps.........hptLh..phltp.cshs...p.lhpsGLltpppt..stha..Dp......F+sRlhFPIpctpG.cl.luFGGRsl..............sspp..s.KYl...NSPET.lFcKuchLY.uhtpA+ .......................................................................................................aapp.t.L.p.sstu...t....AhpYLp..pRG..lo..s.....-hl.pcF...tlGaAP....ss.............................hs.slh...chh......tp...pshsp....ptLh.cuGL.lhps-p..........sphY...DR..................F..R.s.R.lMFP...Ics...t..p.G....+V.lGFG..GRsL...................sssp...s.KYL...NSP...E.Ts.lFcKuc.LYultpA.......................................................... 0 367 705 894 +8108 PF08276 PAN_2 PAN-like domain Finn RD, Mistry J anon Pfam-B_291 (release 17.0) Family \N 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.89 0.72 -4.13 106 1792 2012-10-02 11:41:37 2005-05-11 16:56:24 6 91 83 0 773 1862 3 65.90 34 10.50 CHANGED C..........tssDtFhplp.shKL..PDs.sth.hp.ps.hshc-CcppCLpsCSCTA..YAh..ssh......su......sGCllWp...u-LhDh ...........................................C.....psD.sFhp.hp..shK.L....P-.sp.t..s.h...hs......ps..hshc-Ccp.pCLp.sC.s.CoA..aA...sshp.....su..........sGC....llWh...s-.LhDh................ 0 36 432 619 +8109 PF08277 PAN_3 PAN-like domain Finn RD anon Pfam-B_1455 (release 17.0) Domain \N 21.10 21.10 21.10 21.80 21.00 21.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.68 0.72 -4.36 43 345 2012-10-02 11:41:37 2005-05-11 16:56:38 7 19 14 0 344 322 1 69.90 22 24.54 CHANGED MlllaGpPssh....ss.phpshsacsClstChpsssCllsats...ss..sCthaph...ss.lsslpp...hpssssphl.AhKh ..............................MlhhaGpstsh........ss.tttshsappClppCapss.s...Cllsahs....ss........pChhaph.......ss...l....p...lpp.....hp...pss..t..t..hl.AhKh..................... 1 74 107 344 +8110 PF08278 DnaG_DnaB_bind DNA primase DnaG DnaB-binding Finn RD anon Pfam-B_3213 (release 17.0) Domain Eubacterial DnaG primases interact with several factors to from the replisome. One of these factors in DnaB, a helicase. This domain has been demonstrated to be responsible for the interaction between DnaG and DnaB[1]. 26.80 26.80 26.90 26.90 26.40 26.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.56 0.71 -3.92 90 1338 2009-09-10 21:47:29 2005-05-12 11:10:45 6 11 1330 3 256 893 235 126.70 34 21.41 CHANGED s.hRtsluLLlQpPpLAttls..shtslpphp.s..uhslLhpLlphspp.psshsou.p.....LLEpaR...sssttphLppLAs...hp.phl..s--shpppFt-slspLhpp.h....lcpclppL..huKspt.ts..LospE..+pcLtpLl .....shRhlIuLLlQNPpLAshVs......shtslcp.p..hP.......GlsLhp-.Llpssh...u..pPu.l.oTG.Q.........LLEpaR.....so.spsssLcpLut.................W-clh..-cs...h....hEppFsDoLs+lhcp.h.....lcp.chEpLhA+pRs...pG...LosEE..+h.ELhsL........................ 0 50 142 210 +8111 PF08279 HTH_11 HTH domain Bateman A anon Pfam-B_125 (Release 17.0) Domain This family includes helix-turn-helix domains in a wide variety of proteins. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.58 0.72 -4.32 103 12449 2012-10-04 14:01:12 2005-05-13 11:46:06 7 125 3577 14 2246 23862 1739 54.70 24 15.12 CHANGED Rttpllph..Lhpscp..louppLAccLsV.Scpolh+DIptL.pttG.....h.Ituptsh..GYt ...............................pllph....L...p..s..p...p......l.......o.u.p.p......L.....Ac..p..l.....s.....V...Sc.....pTl....h+-l....p.t....L...pphs................h...l...u....t..th.....GY......................................... 1 790 1488 1900 +8112 PF08280 HTH_Mga M protein trans-acting positive regulator (MGA) HTH domain Moxon SJ, Bateman A anon Pfam-B_5126 (release 7.7) Domain Mga is a DNA-binding protein that activates the expression of several important virulence genes in group A streptococcus in response to changing environmental conditions [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.56 0.72 -4.21 16 958 2012-10-04 14:01:12 2005-05-13 11:47:10 6 23 527 3 56 1743 53 58.10 27 12.18 CHANGED EccItcclcLlslLhc.ppphshs..-lscpLshothplpphlppLpth.Fscplthphpcst ................pKc.pRplp...LlchLhc..pphh.p....lp..ELuchLssoc+slps-Lsclppt..Fsp..h.hhp.pps.h............... 1 15 32 46 +8113 PF08281 Sigma70_r4_2 Sigma-70, region 4 Bateman A anon Pfam-B_125 (Release 17.0) Domain Region 4 of sigma-70 like sigma-factors are involved in binding to the -35 promoter element via a helix-turn-helix motif [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.16 0.72 -4.50 141 24630 2012-10-04 14:01:12 2005-05-13 11:47:27 7 126 3614 8 7611 38704 4348 53.10 25 25.60 CHANGED tttltphlppLstpp+plhhLpthpshshpEIAphl.....sls.ssVcpplpRApppl ......................h...l.pslppL...s....t.c...p...R...p...s...l...h.L....p....h......h......p....u.......h.......o....h.....p.......EIAphl........s.l.s.h.uoV+splpRAppph........................... 0 3028 5560 6758 +8114 PF08282 Hydrolase_3 haloacid dehalogenase-like hydrolase Bateman A anon Pfam-B_66 (Release 17.0) Domain This family contains haloacid dehalogenase-like hydrolase enzymes. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.36 0.70 -4.81 69 20961 2012-10-03 04:19:28 2005-05-13 16:17:25 7 63 4356 221 3153 23365 3191 223.30 21 85.03 CHANGED lshDlDGTL.Ls..............pspp.ls.ppstpslpchpp........pGhhlslATGR..shhshhphhp.pLs..lp...hhhls...hNGuhl.....tpsphl.hpphls...pctlppllchhpphp.......hphhhhs...scshah.pt..........................thhhhtpthtthhhthtp.t.htspshhKlh.....hhtstpphpph..tpplppph.............shhpottthl-..lhspsssKupulptls..pthslshpcshAFGDutNDlpMlphu....GhulAMuNAspplKphAch..lsssssc.sGluchl .........................................................................................................................................................................................................................lshDhDGT.L..Ls.........................................................s.p..p....p....l...s.........p.shps.l.p.p.h.p.p.........p.Gh...h.slso...GR...........h....h....t....h.....h..........h...h....p...p....lt....hp...........................hls..........N.Gu.hl................t.pt....p......h...l....hp...p......ls...................p...h.....p.lh..p..hhpp.t.............h.h.h.h..hs.........tp...th.hh.pt...........................................................................................................h...........h.................................t......................t....t....t....h.hchh..................h...p......t...t...h..t.th..........t..t...l...t...t....h.....................................phh..h...s...t...........h..l..-......l..h....t..s....s.....s...Ku....pu.......l....p......p.lh...........cp..h.............s.............l.......s.................p.....p.......s...h..A...h..G....D......sh....NDlpMl....p..h.s........G.h.u.l..A.M...s...N...A...t...s...p...l...K.t....h...A..s..h.....l..s..t..sspp..pGlhthl................................................................................................................................................ 0 948 1870 2569 +8115 PF08283 Gemini_AL1_M Geminivirus rep protein central domain Bateman A anon Pfam-B_286 (release 2.1) Domain This is the cetral domain of the geminivirus rep proteins [1]. 20.80 20.80 21.10 21.30 20.50 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.55 0.72 -4.21 13 2813 2009-01-15 18:05:59 2005-05-13 16:58:13 6 5 1027 0 3 2628 1 102.40 48 32.13 CHANGED uQpsusDsht+Al..NusS+EEALsll+-chPc-aslphHNlpsNssRlF.csPEsassPFP.SS.FsplPEplp-WssspltpsSAt.hshR..SlllEG-SRoGKThW ..............................tpQoANDA.YAcAl..NuGSKpEALpll+EchPK...Dall.QaHNLsuNL-R.IFstPs.p.s..YlsPFs.SS.FspVP-Elp-Wss.-Nlh.....s.....u.....A....A..........R....P..........hRPhSIllEGD..SRTGKThW................... 0 2 3 3 +8116 PF08284 RVP_2 Retroviral aspartyl protease Bateman A anon Pfam-B_ Domain Single domain aspartyl proteases from retroviruses, retrotransposons, and badnaviruses (plant dsDNA viruses). These proteases are generally part of a larger polyprotein; usually pol, more rarely gag. Retroviral proteases appear to be homologous to a single domain of the two-domain eukaryotic aspartyl proteases 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.57 0.71 -4.23 7 1125 2012-10-02 15:32:34 2005-05-13 17:10:21 6 93 68 0 570 1652 9 115.90 36 9.78 CHANGED shspuRVNHlsAEpsQsuP-VlhGTF.VNSlPAoVLFDSGAoHSFIStsFVttHulth.pL+pPhhVpoPGsshpusphsPuVslcIpGlsF.us.IlLcSpsLDVILGMDWLspacGVIDCApRolsLTsspGc ........................................t............t.p.....t.t....t...l...lh..s..hh.......lps...hsshl..LhDSGAo.....HSFlSht.F.s.tp..p.t......h...t.h.p....p..L....p..p.....P..h.h.l....p.....o.....P.u...s...p...h.....p...s.....t..t...h..s...s...s...l..........s..l...c..I..p...s....h..s...F......u..s.L.I....l......L.-..s...c.......s.l...D...V..ILGMsWLspa.p.u.l.I.DC..s..p+plsl.t................................ 0 109 151 165 +8117 PF08285 DPM3 Dolichol-phosphate mannosyltransferase subunit 3 (DPM3) Mistry J, Wood V anon manual Family This family corresponds to subunit 3 of dolichol-phosphate mannosyltransferase, an enzyme which generates mannosyl donors for glycosylphosphatidylinositols, N-glycan and protein O- and C-mannosylation. DPM3 is an integral membrane protein and plays a role in stabilising the dolichol-phosphate mannosyl transferase complex [1]. 23.80 23.80 25.30 24.90 23.70 23.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.00 0.72 -4.14 23 232 2009-01-15 18:05:59 2005-05-17 13:34:31 6 3 205 0 169 218 0 88.40 35 92.74 CHANGED Mo+hpchlhhhshlsulahulhhshls....sp..hppll.hLPhahLVsFGsYuLsslGaslhTFsDs.-stcELhppIcEAKc.L+pKGlcl ..............................................MT+htphl.hhhllsu.......lahu..Lhhshls...................ls.....ppll..hLP..h.ahLVshGsYuLhplGatlhTFsD...ss-AtpELppcIpEA+t-Lpp+Glc.......... 0 45 83 133 +8118 PF08286 Spc24 Spc24 subunit of Ndc80 Mistry J, Wood V anon manual Family Spc24 is a component of the evolutionarily conserved kinetochore-associated Ndc80 complex and is involved in chromosome segregation [1] 22.60 22.60 22.70 23.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.56 0.71 -4.40 24 178 2009-01-15 18:05:59 2005-05-17 14:02:25 6 1 166 4 123 167 0 116.60 28 57.89 CHANGED lhcL-spphcluKphscLEsplppLpsplpcLppphp-lpppt.p................sscttt.......ss.cuslL+L+LYRsLG.lpl-t...t..........................-tspsl.I+spcpss.....lpslsl-s.phScaFhosYlWspL ................................................................pL-spphcluKphscLE...tp......hpp...Lps-LpcLcppht-L-ppth-...........................sppps.................stsuslL+L+lYRuLG.Icl-h..............................phspsl..I+sp...ccGs..............lpslslDs..phScaFhusYhWpth......... 1 32 62 95 +8119 PF08287 DASH_Spc19 Spc19; Spc19 Mistry J, Wood V anon manual Family Spc19 is a component of the DASH complex.\ The DASH complex associates with the spindle pole body and is important for spindle and kinetochore integrity during cell division [1][2]. 25.00 25.00 27.90 27.10 24.70 24.00 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.66 0.71 -4.60 20 131 2009-01-15 18:05:59 2005-05-17 16:15:36 6 2 128 0 99 123 0 146.60 35 82.74 CHANGED oLsssVsSLpuSlplLcsS...lstLcsuspDhPRLs.pVLpTsRhFELlPEs-LppAppslh-EIpPplppLls+l-cplp+LpR+cpsLpsKhELppsRLpsspspssssptt......................s...h.sssspcltcL+tLppKKERLpYslpRL .........LpssVsSLcuSlplLcsS...lphL-suspDhPRLs.plLpTsRaFELlPEsslppAptuLh-EIsPtls..pLls+scpplp+hpR+.psLpu+sE.LppuRLppspstssssstt.....................................tpt.shht.shsspc.htcL+tLppKK-pLpYsl-RL.......................................... 0 24 52 83 +8120 PF08288 PIGA PIGA (GPI anchor biosynthesis) Mistry J, Wood V anon Pfam-B_6971 (release 17.0) Family This domain is found on phosphatidylinositol n-acetylglucosaminyltransferase proteins. These proteins are involved in GPI anchor biosynthesis and are associated with disease the paroxysmal nocturnal haemoglobinuria [1]. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.93 0.72 -3.62 23 340 2012-10-03 16:42:30 2005-05-17 16:20:46 7 6 290 0 246 348 6 87.90 55 19.14 CHANGED HuYs.sRpGVRYLTNGLKVYYlPahVhYcpsohPThFusFPlhRsIllREpI-IVHGHuuhSoLuHEuILHA+TMGl+TVFTDHSLFGFAD .................................HAYs.sRpG.VRYLTNGLKVYYlPhhVh...a...p...psThPTlFs.shPllRs.........Ihl..REpIp....IVHGHu.ohS.shsHE....Alh..H..A+TM..G..L+TVFTDHSL.FGFAD................... 0 86 132 206 +8121 PF08289 Flu_M1_C Influenza Matrix protein (M1) C-terminal domain Bateman A anon Pfam-B_30 (Release 17.0) Domain This region is thought to be a second domain of the M1 matrix protein. 20.80 20.80 20.80 23.80 20.60 20.20 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.21 0.72 -3.96 2 22741 2009-01-15 18:05:59 2005-05-18 12:08:22 6 2 22466 0 0 3048 0 94.30 91 38.38 CHANGED pHRuHpphspoosPhlR+E.pMV.A.sTAKsMptMu....put-s.clApphpp.ltshRslGsp.psutGltpDlhEsLp..QppMG..s.hp+ah ......QHRSHRQMVTTTNPLIRHENRMVLASTTAKAMEQMAGSSEQAAEAMEVASQARQMVQAMRTIGTHPSSSAGLKDDLLENLQAYQKRMG..VQMQRFK. 2 0 0 0 +8122 PF08290 Hep_core_N Hepatitis core protein, putative zinc finger Bateman A anon Bateman A Domain This short region is found at the N-terminus of some hepatitis core proteins.\ Its conservation of four cys and his suggests a zinc binding domain. 25.00 25.00 26.10 27.40 24.70 24.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.29 0.72 -4.51 2 5101 2009-09-11 06:29:00 2005-05-18 14:57:32 6 7 66 0 0 2681 0 26.90 96 24.69 CHANGED hphFpLCLIISCoCPThQASKLCLGWL ....MQLF.HLCLI.ISC.S.CPTVQASKLCLGWL..... 0 0 0 0 +8123 PF08291 Peptidase_M15_3 Peptidase M15 Finn RD anon Manual Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.40 0.72 -4.25 19 623 2012-10-02 01:02:30 2005-05-19 09:30:08 6 14 470 1 168 1206 1181 110.70 24 47.23 CHANGED taFphpEhspsssssppslD...............plhshLpslRcpaGp..........PIhloSGaRssthNppVGGApsStHhpG.....pAADlpshst.sspcltphhc...sphstch....Ghttts....salHls ..........................................................h.....tt...............t.h.............................thhphL...ctl.Rp.t..h.st..........Pl.h..l.o..........SG.......YR........s...phN.......c........p.....l........G....G......u.....s.....s.....S.pHh..hG.........pAuDltls..........sh.....s.......p.l..t....p.hht......ph.h.....t...........hh...t..........talHls............................................................. 3 78 128 153 +8124 PF08292 RNA_pol_Rbc25 RNA polymerase III subunit Rpc25 Mistry J, Wood V anon Pfam-B_9841 (release 17.0) Domain Rpc25 is a strongly conserved subunit of RNA polymerase III and has homology to Rpa43 in RNA polymerase I, Rpb7 in RNA polymerase II and the archaeal RpoE subunit. Rpc25 is required for transcription initiation and is not essential for the elongating properties of RNA polymerase III [1]. 24.20 24.20 24.40 24.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.37 0.71 -3.92 33 328 2012-10-03 20:18:03 2005-05-19 09:32:39 7 9 290 3 240 371 3 116.40 35 51.74 CHANGED PFhGEllpG.pIpssopcGI+Vol.uFF-D.........IalPhs.L......-sspa..cpp-psWlWpht................-ppchahDhsEplRFRVppphF..-.pPhs.t..........................ttttpttppphssatll.GShppsGLGhloWW ..............PFhuEllhG.+Ipsso.t.cGl+lol.sFF-D.........IhIPsphL....................psu...p....F............-ps..-..p...s...WlWcht.........................pspcLahDh.sEplRFRVppE.a.hD.pPtssp...............................................ttt....tttpp.sPYplh.GShppsGLGhluWW.......................................... 0 82 132 196 +8125 PF08293 MRP-S33 Mit_rib_S27; Mitochondrial ribosomal subunit S27 Mistry J, Wood V anon Pfam-B_31036 (release 17.0) Family This family of proteins corresponds to mitochondrial ribosomal subunit S27 in prokaryotes [1] and to subunit S33 in humans [2]. It is a small 106 residue protein.The evolutionary history of the mitoribosomal proteome that is encoded by a diverse subset of eukaryotic genomes, reveals an ancestral ribosome of alpha-proteobacterial descent that more than doubled its protein content in most eukaryotic lineages. Several new MRPs have originated via duplication of existing MRPs as well as by recruitment from outside of the mitoribosomal proteome [3]. 25.00 25.00 25.10 28.20 23.20 24.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.07 0.72 -4.01 30 282 2009-01-15 18:05:59 2005-05-19 09:52:05 6 4 245 0 194 253 2 88.60 34 73.37 CHANGED shhhclscluu+IFupshNPsstRoGs..KlLppcL..+Gspls...sYY..Psth.papphcphhs..thhhhD.cEsh....Rhphlch.................+KtRGKGsP......KK ...............hhhchs+Lps+IFupshp..Ps.st+ohp...Klhppc....+sspls...saY....Ppchhhh.t.h.pphh...............hhhhhDEcpch....+.chhch.................+KhRGKGtP.....KK................................ 0 58 99 154 +8126 PF08294 TIM21 TIM21 Mistry J, Wood V anon manual Domain TIM21 interacts with the outer mitochondrial TOM complex and promotes the insertion of proteins into the inner mitochondrial membrane [1]. 20.40 20.40 20.90 21.10 20.20 20.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.70 0.71 -4.54 13 289 2012-10-01 19:51:31 2005-05-19 10:32:58 6 5 252 1 206 280 0 136.30 30 55.74 CHANGED Kl+cssphoh.hhlVluGlGloGlllYlIhsELFSsSucspIFN+AlpplcsctcspslL........GcplKuaGEtsppsR..RsRshVSppch-+cGhcHhhM+FHVEG..s++pGhVplEhpcsstp..hp.-Fhaha..lDlssc++Ihlhcs+ ...............+ltcssppss.hhlllhGlulo.uslhYhl...apELF.u.s.s.Sssplas+Alc+lcpcscl.hshl...............Gp..l+...uYG..E..t..op...s+.....R.hs.op.h.p.h..c..+..c.Gh.......cHhphpFalcG..stppGhVph-hhcs.tp......tpa.-apalh..l-s.......s....p..p.p..hhl.p..................... 0 62 107 166 +8127 PF08295 Sin3_corepress HDAC_interact; Sin3 family co-repressor Mistry J, Wood V anon Pfam-B_2731 (release 17.0) Domain This domain is found on transcriptional regulators. It forms interactions with histone deacetylases [1]. 25.00 25.00 26.90 25.50 22.80 23.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.39 0.72 -4.26 26 505 2009-01-15 18:05:59 2005-05-19 10:38:32 7 10 275 0 333 477 4 97.90 45 7.99 CHANGED sCcphGPS....YRhLPKs...s.CSGRschs...tpVLNDpWVShPohu.ED.tuFhshRK.NQYEEsLa+sEDERaEhDhllEuspsoIchLEplhpclpshspc-cts .........................pC+phGsS....YRhLPK.s..htt...CSGRstLC..................ppVLNDpW.VSa.P.oWu..E....D.S.s.Flu.p........+.K..sQYEEpLaRsEDER..aEhDhllEsshs...TIphLEsltp+ls.pho.c-p..p................................... 1 122 194 269 +8129 PF08297 U3_snoRNA_assoc U3_snoRNA; U3 snoRNA associated Mistry J, Wood V anon manual Family This family of proteins is associated with U3 snoRNA [1]. U3 snoRNA is required for nucleolar processing of pre-18S ribosomal RNA. 25.00 25.00 27.30 27.30 24.30 24.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.80 0.72 -3.75 21 98 2009-09-11 16:06:36 2005-05-19 11:16:19 6 3 96 0 72 97 0 90.70 27 29.00 CHANGED chLP--lLsshspssssss.tps....................h.tpphKtpKh+hLcphcK..psl+hGsVslpVLsspsst.....pLsPKucppshssK-pWLpRp ............thLP-ElLps.sppc...sss.tpp............................tpp...hcspKh....+hLcphcK.....csl+hGsssl+VLssssst.........pLsPKspppshpsK-pWLpR..... 0 10 33 61 +8130 PF08298 AAA_PrkA PrkA AAA domain Vella Briffa B, Bateman A anon Pfam-B_3917 (release 10.0) Domain This is a family of PrkA bacterial and archaeal serine kinases approximately 630 residues long. This is the N-terminal AAA domain [1]. 19.70 19.70 19.70 19.70 19.60 19.60 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.18 0.70 -5.76 7 1391 2012-10-05 12:31:09 2005-05-20 09:52:18 6 8 1305 0 343 3100 1561 347.80 58 55.27 CHANGED hoht-YL-hs+ps.pshtsAtcRlhshIt-s....hlcsttp.+lt+latNcsI++YshFs.-FaGhE-sl-+IVs.YF+tAAptLEE+KQILYLlGPVGGGKSSLsEpLKplhEhh......PlYsL.....ctsPhaEpPLpLh.PpchtphhEccaGI...clpG.hSPhsshRL.cEFGG-IpcFpVsKlh.ShhpphuIupspPuD.NNQDlSsLVGpVDIpKl-pY.upsDPcAYSasGuLN+uNpGlhEFlEMFKssIKhLHsLLTATQEGsasustphuhlsFsGlIlAHSNEuEWtpF+sN+sNEAhlDRIhllcVPYCL+loEEhKIYEKlLppSplsps.hAPcTL-hhuhFolLoRLpps-pSs ................................................................................................oLpEaLs..lC+pD.oAYAsAuERlLhAIGEP...phlDTup-sRL.S.....R.l.F..............u.N..+.l.I.t.R.Y......P.u.F.......c........-.......F.......Y..G..ME-AIEQIVu.Yh...+H...A.A.Q.G.L..E..E.+K.Q...I..LYLLGPVGGGKSSLAE.+LKpLM.pph..........PIYsL.............................ctSPVp-cP.L....sL......F.sPpE......Du........p........l........L.........-.........c........E.......YGIP....................pRYL.tsIMSP.....W...A....s....K....R...L.....c...E..............F.....G..GD.........I.....o.....+.....F.......+...V.......V......K.......l.........aP........SILpQ...IuIAKTEPGD..E...NNQDISuLVGKV...D...IRKLEc.a..uQ.sDPDA.YuY.S.GuLC+ANQGlMEFV.EMFK...APIKVLHPLLTATQEGNYsGTE.GluA.lPFsG.lILAH..SN..ESEWhoFRNNKNN...E..A..FLDRl.YIV..KVP....Y..C..LRlS.EElKIY-...KLL.pcS.-Ls....c..AsCAPu.TL-sL.............upFSlLSRLKEPENSs................................................................... 1 95 198 277 +8131 PF08299 Bac_DnaA_C Bacterial dnaA protein helix-turn-helix Finn RD, Bateman A anon Prosite Domain \N 20.60 20.60 20.90 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.02 0.72 -4.12 14 4828 2012-10-04 14:01:12 2005-05-20 13:18:53 6 14 4481 14 1038 3266 2314 68.70 46 15.45 CHANGED olcpIpcsVA-haslslp-lhScsRs+slspsRQIAMYLs+pLTspSLPcIGctFGGRDHTTVlaAsRKI .............................l-sIQct.VAcaa.plpl.pDl.hu.p..+R.s.+.s.l.sp.PRQl.A.M.YL.u+EL..T.s.tSLPcIGctFGGRDHTTVlHApcKI.............. 0 367 704 891 +8132 PF08300 HCV_NS5a_1a Hepatitis C virus non-structural 5a zinc finger domain Paterson M, Bateman A anon Bateman A Domain The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. This domain corresponds to the N-terminal zinc binding domain [3]. 20.70 20.70 20.90 21.10 19.90 19.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.30 0.72 -4.06 19 5702 2009-01-15 18:05:59 2005-05-20 13:58:15 8 39 117 6 0 5232 0 61.60 83 5.83 CHANGED lPFlSCQ+Ga+GsWcGDGlhpTpCsCGA.IoGcV+NGoM+..lsGP+hCSNhW+GTFPINshTo ...lPFlSCQRGYKGVWRGDGIMpTpCsCGApITGHVKNGSMR..IVGP+TCpNhWpGTFPINAYTT...... 0 0 0 0 +8133 PF08301 HCV_NS5a_1b Hepatitis C virus non-structural 5a domain 1b Paterson M, Bateman A anon Bateman A Domain The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525 [1,2]. This region corresponds to the 1b domain [3]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.02 0.72 -3.66 20 10555 2009-09-11 00:37:05 2005-05-20 14:01:38 8 41 117 6 0 7446 0 68.20 85 10.02 CHANGED GPusPhPuPNYppALWRVuAc-YVEVpRlGDaHYVsGsTsDsLKsPCQVPuPEFF..TEVDGVRlHRaAPsC+PLLRDEloFoVGLsuaslGSQLPC-PEPDV ...................................................................................PLLR-E.VoFpVGLNpYhVGSQLPCEPEPDV........................... 0 0 0 0 +8134 PF08302 tRNA_lig_CPD Fungal tRNA ligase phosphodiesterase domain Mistry J, Wood V anon Pfam-B_49998 (release 17.0) Domain This domain is found in fungal tRNA ligases and has cyclic phosphodiesterase activity [1]. tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns. 21.30 21.30 21.50 21.30 21.10 20.70 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.89 0.70 -5.17 19 168 2009-01-15 18:05:59 2005-05-20 16:00:11 6 8 142 0 119 174 3 219.00 25 29.09 CHANGED SShcNlcpVlspL+ptYPpLl...clPosp-lDpAlphAhstY+Pshc+shshsspp.tpp....pp.........................ss.cppp+plcYaulsl.sspclpshLcshhs.s....ss-pt+hacpLhss+RlQspFHVTLIH+AupKp.pP...clWcpYsphahsphpppspsp.............h.shussclcL-+Ll.WDD+lMshlsclh...s.p...........ssa..tCsNplsHITVGThuspVKP+ESN-LLp+ah.......p.tGous-...sGlhphplsGshllpGsVthsh .................................................................................ShtNhchllptl.t.aP.lh....phPsspphctAhp.uhp.Yp.sphp+.ht.t..t......................................t.pt.h.htp.pYhul..pl....s.pplhphlpphhsss......st.tp.happLhtspRl....Q.ph...HVTLhHpsstpp......p......plWpphhphatt.thtpt..t...............................st...splpL.c.+ll..aDc+lhAlssc.lhs.......................................sph....ssN.p.hsHITlGTtpssVKPhESNcLLpchh........p...sst.t...ttl..h.h............................................. 0 38 71 101 +8135 PF08303 tRNA_lig_kinase tRNA ligase kinase domain Mistry J, Wood V anon Pfam-B_49998 (release 17.0) Domain This domain is found in fungal tRNA ligases and has kinase activity [1]. tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns. This family contains a P-loop motif. 22.70 22.70 22.70 22.80 22.60 22.20 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.11 0.71 -4.38 9 141 2012-10-05 12:31:09 2005-05-20 16:00:33 6 7 132 0 106 153 2 160.00 43 19.71 CHANGED lllPIATIGCGKTTVuhsLpsLFsp.WuHlQNDNI.ouK.sps+hh+psLchL....t+csppsVlsDRNNHph+ER+QLF-.lpph+-pals.s.sl+hlulsFlc.c.phpElh-lThsRVhpRGDNHQSIKspo.stp+VhtIMpGFlKRaQPls.s.+pPDspFDhlIcLclu .................lLVPIAoIGCGKTTlulALscLF...s.....WGH....lQNDNI.suK.tcs.cFscpsl.phL......tspssVlAD.................RNNHpp+..ERcQ...lhs...slpp.........hp.........................ss...+hlALpa.scps.......lscl+clTtpRVlpRGDNHQTI+..uso...sp..pcl......huIMcGFlpRFpslssp.pp.......P........DstFD.lIcL-..s......................................... 0 32 63 93 +8137 PF08305 NPCBM NPCBM/NEW2 domain Rigden D anon Rigden D Domain This novel putative carbohydrate binding module (NPCBM) domain is found at the N-terminus of glycosyl hydrolase family 98 proteins. This domain has also been called the NEW2 domain (Naumoff DG. Phylogenetic analysis of alpha-galactosidases of the GH27 family. Molecular Biology (Engl Transl). (2004)38:388-399.) 21.20 21.20 21.20 21.20 20.60 21.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.65 0.71 -4.23 43 539 2009-01-15 18:05:59 2005-06-23 15:31:28 6 113 262 16 146 530 40 138.50 24 18.44 CHANGED ss.sssshL...SDlsh..hou....ssGWGslp+Dpus......supsLslsG.........psasKGlGsHAs..SplsYsL.Gst.hspFpAhVGlDcp...hssp..GoVhFpVh...sDG..pplasSss.....hpsssssptl.slDl.....sGspplcLllssuG.su.ssDHusWusA+ltp .............................................................ss..t..hlo-h.h....ps......tsth.t..s.hp.t..sts..........psp.lplts........hpa..s+.....GlGs.....p.....As.............S......plh.....Yslp..u...t.t...hspFp...uhl.G.lDcp................hssp.......sslpFpVh...........sDG......chla.sSss........hphss...ss.thl..slsl.....pGspplcLhss.s..uG..ss.s.t.t.Dc..ssaussph.................................................... 0 74 114 142 +8138 PF08306 Glyco_hydro_98M Glycosyl hydrolase family 98 Rigden D anon Rigden D Domain This domain is the putative catalytic domain of glycosyl hydrolase family 98 proteins. 21.00 21.00 21.10 21.10 19.10 20.90 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.11 0.70 -5.73 6 208 2010-01-08 16:43:45 2005-06-23 15:32:49 6 15 206 14 11 93 0 299.60 59 31.09 CHANGED uhhssssspsss...shRRsISsEpPhhhl.l.u............Wssss.pthh-hIPsDl+PYTVlpLp.u........ls+c-usup......chhEphlEpApSal..KTstspsl.shlps.SuG..chPsYsssspLpos....hh-EhFpcYPNhhGh..sEpaWsassshu.......sHhAphLKLosKYGGYh.......hWusp.NshAhtK.......psssFppAlcpYt+NFIhtpK.TsptshpD..sESlshGhWLSGaAspaGhphDohtWYEpt...........pupGs+pasststAhhhIE..plhLsGtTVash.chhaT.sVpspss...............PtFsNlhh-hFR+lIsssh. ................h.......s...s...thRp.lss-pPhhh..h.s...............................Ws.ss.KGAWEAIPEDVKPYAAIELHPAKVCKPTSCIPRDTKELR......EWYVK.MLEEAQSL.........NIPVFLVIMSAG.....E......RNTVPPE.................WLDEQFQKYSVLKGVLNIENYWIYNNQLA..........PHSAKYLEVCAKYGAHF.......IWHDH.........EKWFWETIMNDPTFFEASQKYHKNLVLATKNTPIRD..DAGTDSIVSGFWLSGLCDNWGSSTDTWKWWEKHYTNTFET...GRARDMRSYASEPESMIAMEMMNVYTGGGTVYNFECAAYTFMTNDVPT................PAFTKGIIPFFRHAIQNPAP...................................... 1 6 6 8 +8139 PF08307 Glyco_hydro_98C Glycosyl hydrolase family 98 C-terminal domain Rigden D anon Rigden D Domain This putative domain is found at the C-terminus of glycosyl hydrolase family 98 proteins. This domain is not expected to form part of the catalytic activity. 25.00 25.00 25.00 25.90 23.20 23.90 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.78 0.70 -5.12 3 232 2009-01-15 18:05:59 2005-06-23 15:34:03 6 14 206 9 16 109 0 224.00 53 25.82 CHANGED KpEVlsRTKuVlasshTp.NGpGpYSS.pshFsGLY......TphsQpPhY.sTGRYssIPsVappID+pKIuSpFPsu.hKllocNSoELSSIssKspYLNuLYPcEYsGDlYAQRlDNoWaIYN.sYNpNKNQpGuFslhhNNsKSLslTLsPHTYuVVcENssuLsIhLNNYRT-KsuLWthutNhDtuKph.chpcl-hhNWl.csY.hNssss-hRToTITLpGtousPThoNlsGD+GcYshsTVsasssT+uhTITVsHNGslDhoIssc ........KEEVlsRTKsVha......NGpG+...hSS.pshapGLY.............oscEshPLY..ssGRYpllPVIa-hlDp-KlushFPsu..KI.......locsSpEhSS...KVsYLNuLYPc..YEGDhYAQRlsNoWalYN.sss.N...hN.........KsQps.hLPhhhN.sscS.LoL-hoPHTYuVVKEps.N..sL+IhLNNYRT-Ksuh.........Wu.utshstuKph.phpchthhpW.IpcpY.hs.......s..ss-hRTTTlTL+Gtoup.s.hslSGD+......NcYsh...T......NaDpssHVhTITVNHNG.lEhoIssp...... 0 8 8 11 +8140 PF08308 PEGA PEGA domain Mistry J, Adindla S anon manual Domain This domain is found in both archaea and bacteria and has similarity to S-layer (surface layer) proteins. It is named after the characteristic PEGA sequence motif found in this domain. The secondary structure of this domain is predicted to be beta-strands [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.30 0.72 -4.19 32 1445 2012-10-02 19:08:27 2005-06-23 15:51:23 6 105 449 0 727 1662 573 68.30 22 20.63 CHANGED huslslsSsPpGApVhlDG.thhG...pTPh.sls.lssGp+plplphpGYtsappplplp.sscshplphp..Lp.tp .............................tlplpS.sP.t..u..ApV.h..l....s...G.....p..h..h.G...........pT.....P.......h....p.....l....t...............l..s.....s...G.......p....a..p.......l..p.l.p..t...p..G..Y.p.s..h.p..p.pl..plp..ss.p.p.h.tlpht.................................................. 0 322 491 635 +8141 PF08309 LVIVD LVIVD repeat Mistry J, Adindla S anon manual Repeat This repeat is found in bacterial and archaeal cell surface proteins, many of which are hypothetical. The secondary structure corresponding to this repeat is predicted to comprise 4 beta-strands which may associate to form a beta-propeller [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. The repeat copy number varies from 2-14. This repeat is sometimes found with the PKD domain Pfam:PF00801. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -8.00 0.72 -4.97 22 449 2012-10-05 17:30:43 2005-06-23 15:54:39 6 46 137 0 309 513 544 40.50 33 8.98 CHANGED uG.spslsVSGNYAYVA..DtssGLlIVDI...SNPSSPsLpGsasT ..................s.s.ltls..G..s..YAYVA..........s..s.s.sG......Lh.IlDl......SsPssPthhuph............ 0 143 204 270 +8142 PF08310 LGFP LGFP repeat Mistry J, Adindla S anon manual Repeat This 54 amino acid repeat is found in many hypothetical proteins. Several hypothetical proteins from C.glutamicum and C.efficiens along with PS1 protein contain this repeat region. The N-terminus region of PS1 contains an esterase domain which transfers corynomycolic acid. The C-terminus region consists of 4 tandem LGFP repeats. It is hypothesised that the PS1 proteins in Corynebacterium, when associated with the cell wall, may be anchored via the LGFP tandem repeats that may be important for maintaining cell wall integrity [Adindla et al. Comparative and Functional Genomics 2004; 5:2-16]. Deletion of Swiss:Q01377 protein results in a 10-fold increase in the cell volume of the organism and infers the corresponding proteins involvement in the cell shape formation [1]. The secondary structure of each repeat is predicted to comprise two beta-strands and one alpha-helix [Adindla et al. 2004]. 20.70 20.70 20.90 20.70 20.20 20.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.56 0.72 -4.33 31 1397 2009-01-15 18:05:59 2005-06-23 16:04:57 6 50 220 0 484 1277 0 52.30 30 25.49 CHANGED LGhPhu.sEtshsDG.G+appFps.GsIYWsssTGAasl.sGsIhctWtstuhEpG. .....................LGhPss..s.E.....h.....s...h...s....s...G............u...p.hpp...F.......p......s......G......s....l...a...W..o...s...s.........o....G.....A.a.s..l...tGslhstasphuhttu........... 1 164 343 447 +8143 PF08311 Mad3_BUB1_I Mad3/BUB1 homology region 1 Mistry J, Wood V anon Pfam-B_3330 (release 17.0) Domain Proteins containing this domain are checkpoint proteins involved in cell division. This region has been shown to be essential for the binding of the binding of BUB1 and MAD3 to CDC20p [1]. 20.70 20.70 20.70 20.70 20.60 20.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.59 0.71 -4.35 25 450 2009-01-15 18:05:59 2005-06-24 09:15:27 7 14 255 13 294 455 1 122.00 31 13.79 CHANGED lppp+ppaEpclpshp...tDDPLplWhcYIpWhccsaPp....ssppSsLhslLERslptFtcsc+Y+sDsRaL+lWLcYhc..hhs.....-sp-hFpaLhpptIGsplAhaY.paAphLEspspapcAsplaphGl .....................................h..t.+ptaEtclpthp...ucDPLsha...............c..............YlpWsppsaPp...............tsppstL.hs.lLE+shptF.hsp....p..+..Y+sDsRaL+lWlcahc.hhs........................cspchap....aLhppsIGpphAhaY.paAthhEtp.sphpcAcplaphGl............................... 0 87 150 231 +8144 PF08312 cwf21 cwf21 domain Mistry J, Wood V, Bateman A anon Pfam-B_14400 (release 17.0) Domain The cwf21 family is involved in mRNA splicing. It has been isolated as a subcomplex of the splicosome in Schizosaccharomyces pombe [1]. The function of the cwf21 domain is to bind directly to the spliceosomal protein Prp8. Mutations in the cwf21 domain prevent Prp8 from binding [2]. The structure of this domain has recently been solved which shows this domain to be composed of two alpha helices. 21.00 21.00 21.00 21.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.27 0.72 -4.11 76 461 2009-10-29 13:58:15 2005-07-13 09:22:44 7 10 268 1 314 448 0 48.80 38 7.17 CHANGED l-H-++...RcIElKlhEhc-cLE-..cu.....................................h.s---I-p+lsphRp+Lhpchp ............h-H-++...RclEl...Klh-hp-pLE-.....ps.................................................h..spppI..pc+VpphRpcLhpc..t................................................................................................. 0 97 155 240 +8145 PF08313 SCA7 SCA7, zinc-binding domain Mistry J, Wood V anon Pfam-B_21229 (release 17.0) Domain This domain is found in the protein Sgf73/Sca7 which is a component of the multihistone acetyltransferase complexes SAGA and SILK [1]. This domain is also found in Ataxin-7, a human protein which in its polyglutamine expanded pathological form, is responsible for the neurodegenerative disease spinocerebellar ataxia 7 (SCA7) [1]. Ataxin-7 is an integral component of the mammalian SAGA-like complexes, the TATA-binding protein-free TAF-containing complex (TFTC) and the SPT3/TAF9/GCN5 acetyltransferase complex (STAGA). This domain is a minimal domain in ataxin-7-like proteins that is required for interaction with TFTC/STAGA subunits and is conserved highly through evolution. The domain contains a conserved Cys(3)His motif that binds zinc, thus indicating this to be a new zinc-binding domain [2]. 19.80 19.80 20.00 20.20 19.50 19.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.56 0.72 -4.38 17 392 2009-11-18 17:56:09 2005-07-13 09:42:51 7 11 194 2 252 404 1 70.50 44 12.05 CHANGED s++pccs+p+s.......sps+p.lDl-KQCGV.LPpGt.hCuRSLTCKoHSMGuKRAV.GRopPaDlLLs-ap++sph..K. ...............................................ttp.....................hsct.hD.s+pCGVlsscsp.t......CTRSLT.CKoHShspRRAV.GRpp.aDhLLscaptcsptp..................... 0 62 111 174 +8146 PF08314 Sec39 Secretory pathway protein Sec39 Mistry J, Wood V, Schmitt HD anon manual Domain Mnaimneh et al [1] identified Sec39p as a protein involved in ER-Golgi transport in a large scale promoter shut down analysis of essential yeast genes. Kraynack et al. (2005) [2] showed that Sec39p (Dsl3p) is required for Golgi-ER retrograde transport and is part of a very stable protein complex that also includes Dsl1p (in mammals ZW10), Tip20p (Rint-1) and the ER localized Q-SNARE proteins Ufe1p (syntaxin-18), Sec20p and Use1p. This was confirmed in a genome-wide analysis of protein complexes by Gavin et al (2006) [3]. 18.50 18.50 19.20 18.60 17.70 18.30 hmmbuild -o /dev/null HMM SEED 715 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.94 0.70 -6.60 20 278 2009-01-15 18:05:59 2005-07-13 09:48:18 6 15 208 1 202 289 0 559.60 21 47.25 CHANGED LhLLAsplsspus..lpsLspLhspasuhls.-...lL+IlLohhP.Eoh-PpsYssllppl.psp.sts...............hhsp-shslusscshsssssp+ps..........cphcLlsL+h.tt.ps.t..ss-hlspFLhcRuhcl-p.os.hshhhpL...........Lhshhc...pssplpsWlhusllPLl+h.Nhchh..ts.sholsshpsh.sstsslslLLuhssscpt........lspsLcsllsPalhtppc.pp..hpphhtssshshhpsppp............lhcaLhspuphshtssspuh....s..p.......................ppcaspsuLuslYtss.......csohpsLshshtlhpths.lhthpt..hh.............................shlssss.lshsst.shshhptlLhohphLpphs..hshphhtphh..ps..pctQltchpshlpshhhpppstts...Wpplhpplhhlpsht................hlFsplsp-hlpscllc..sLLpsscas...lAtslhppssst................lssp.lccslhpshashassAoNss+oRsshK+Apch..Lphhssp.......hssss.shpplcsLlpAocsLSpYSL..sLppG.......PFpPlsl...Rh+sDPlulIp+lL-QNP+uYppl-cllslu+pLlpAh.......................sptpccshhsscpRl....huhsIcsALsssDFpsAYshslshLpsss..spp.............t.......c-hsWcssaQsG+YhsPs.sss...pth................LspRhElLShALplsP.s-sLpclLusWpph-pELss..Lhspcpst-sshc ....................................................................................................................................................sh.hs.psp..httl.hh.t.h..............lhs.hP.E.h.....s.tth....hl.......................................................................tt.........h....t....pt.....t....h............................t...ph.h.....t.............................p.h.pah.pRs.tlpp.st....h.t...............l..hhp...........h...th...h.......sh.hh.......h............t......h....slt.h.pph..ts.p.hhphhhp.stt......tp............hsp.hhthhhPah.....http.....................t.........t....................h...hlh.p.......p......h.......................................phht.hsLtshY.st...........p.p.h.ph..tlh.ppls....................................................................thht....s..h.p........t..hp.hpt.......hl...ssphLt.phsh..hshphhtph...........ppp.ht.plh.h.hhtph....hppts.....hs.t......Wtthhpph.hhhpp.s..................asp.ls..phh.t.hhc..uL.L.ps......s...chp...............LAtphh.p..s..s.s........................ls...pp..plVhts.hphassuos.scsphsht.+.....sp.......Lphhssp...........ss.shpc..sLl.pAhtt.L.p.pasl.............................hhPlpl..........R.......h.....pt.......D..................lslIppslppsspsYpp.ppLltlup.Lhhs............................tpp...ttp...tpl....h.h..hlc.tALtttDaphAhths.ph......ht...............................s.s.sW..cs.shphGph..s.........................................ltpR.-lLuhuLth...sP..s.....p....plt......lLs.shpphptp.h................................................................................................................. 0 59 107 161 +8147 PF08315 cwf18 cwf18 pre-mRNA splicing factor Mistry J, Wood V anon Pfam-B_19718 (release 17.0) Family The cwf18 family is involved in mRNA splicing. It has been isolated as a subcomplex of the splicosome in Schizosaccharomyces pombe [1]. 25.00 25.00 25.30 25.10 24.90 24.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.69 0.71 -3.68 31 268 2009-01-15 18:05:59 2005-07-13 09:55:05 7 3 235 0 201 257 0 132.60 35 70.43 CHANGED usL-ttAhcRKtRLtpL+ph.................................t.ppppptttssspcssphtLphRNYcPcsc.shKtshhsssp...........ssplEcclpcQhctsctt.....cplDLhpLtP+KPsWDLKRDls+KhchLcpRTppAIAcLlR-Rl ..................................................................s.pLpttAhcRKtRLttL+ph..................................................t....ppp.p......tppspppstpp.pLphR.NYsPcsc.shKtthhsssp.............................................sspl.E.cp.lp....cphptsptt..........................cplDLhsLsP+KP...sWDLKRDlscKL-+L-+RTpcAIAcLlR-Rl...................... 0 71 107 160 +8148 PF08316 Pal1 Pal1 cell morphology protein Mistry J, Wood V anon manual Family Pal1 is a membrane associated protein that is involved in the maintenance of cylindrical cellular morphology. It localises to sites of active growth. Pal1 physically interacts and displays overlapping localisation with the Huntingtin-interacting-protein (Hip1)-related protein Sla2p/End4p [1]. 25.00 25.00 25.90 25.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.87 0.71 -3.73 21 213 2009-09-11 00:21:46 2005-07-13 11:52:31 6 4 131 0 161 204 0 110.00 38 26.42 CHANGED DsIDKLDVTGl.aG.GsFHHDGPFDACsPHRN+ssp..sAPVhAFPtDusNsolGG..ssssccsshspsaGpt-.-s.s...................t.sshttstsp.t..s...h..psspl...spFDsps+sE.lHGssThGLGooTFLDGAPAS+uAIpc ..................DhID+LDsTul.a.G..GhFHHDGPFDAssPcRN+psp......tAPl.tAF.....st...D.u.Npsltu........s..p..tht..hu..t...................................................................ss..p...h.u..o.GLssoThl-GsPAs......t...................................................................................................................... 0 34 81 130 +8149 PF08317 Spc7 Spc7 kinetochore protein Mistry J, Wood V anon manual Domain This domain is found in cell division proteins which are required for kinetochore-spindle association [1]. 34.00 34.00 34.00 34.30 33.70 33.30 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.95 0.70 -5.77 15 147 2009-09-13 12:07:19 2005-07-13 15:40:18 6 13 141 0 113 154 0 314.20 25 26.55 CHANGED l-...........psspschEPIpLp-FLshssI+F.....hplsohcRhpsohss...............pspssshcDhlsAtasslPhLELYp..aSC+EL++hIuEGRcll+plEscThs-NPP.LF+EYhoAss-h+lLMcsQhp.VKoaARLpSKssWYEWRhpLLcGLK-sLtcplpthppDcchLs+p.shlsslhsclpc+psuLccEhssLcplsc-.....hspsDpp-Lpsh+pcLpplcpcIstpppplpELpsclpchsssIpsssppKpphhtcIp-t-+lhccs+saospEIscL+pphptlcphoGaslhuls.......Gsslohsaccp..........lcLsFs.......usapl ..............................................................................................t.tttt.......t.php..lp..Lp-FLshsslcF......hph.....sss+Rppshhst........................ttpshslcchlsA..thhslPhLELYp..auC+ELpphIs..-G+ph..hcplEscs..htpN..PP.LFpE.Y..hs.....u.s..........s-.h....+..h.l....McsQhp.lKsauRL.uKthWYEWR..hpLl..cGLcpsLtcphpthppDtphLs.cptphls.sllsplhpcpptLppEhp.pLpph.spE...........hpssDp..p-Lpph+....pcLtphctc...........lpthppplpchppclpphppp.l..cphspp+pphhppIp...cs-+hh-.cs.+sa.otpElppL+splctLEp.pGhplhphp................u.s.tl.phtappp..........lpl.ht............t............................................ 0 40 67 98 +8150 PF08318 COG4 Sec38; COG4 transport protein Mistry J, Wood V anon manual Domain This region is found in yeast oligomeric golgi complex component 4 which is involved in ER to Golgi an intra Golgi transport [1]. 20.10 20.10 21.10 20.90 19.80 19.90 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.82 0.70 -5.67 14 351 2012-10-02 15:56:29 2005-07-13 15:46:43 7 11 268 0 250 355 4 282.30 26 39.00 CHANGED LcphpcpLpplFpccFpcAs+spDltplT+aFKLFPLlGtc-hGLshYucYlCphIAscuRphhpss.t.......sptshhaupslhpLF-plupllpsHstllpphYG..sst...hlpllp+lQcEsDhQsulll-pFhDpR+lcchhppIspas...hst.............t........stp.phlsh+-lssllsEhotlhppWslYp+Fhsh+h.p..............hsssp.p.....hphspllpsuphsppl..pcllssFhtLppaahpcSlp+ulpl-ch...............ptps..sSShV-Dlhhll+psLtpslsTuphsslsphlsp.lsphlpsDa.hphhpspl+ .....................................................................................................................................................................................................................lppspppLpslhhccFtpAspp.....tDhsplpRFFKlFPll..G..h..pc.GLpha............upYlsp.ls.tpuc.t........hp.s.ht...........................pp....tshh...aussLohLF-tlupll-sHtsllc..ph.YG.....sp...................hhp.llptLQhEsDhpsth.llcpa.cpR.ph..pphhp.plpp..ht..st.........................................................................-.t...l...s..+.-lD.lLsEhshh.tphphYh+Fltp+htt...................................................................t.pt..............phsphlppst.hspph..pclls.....Y.shppaahccolpKAhtl.Dph.....................................................................................tps.hhoS.hV.DDlhallppsltRuluouphsslsuhlst.hsphL..ps-a.hthlppch................................................... 0 91 145 210 +8152 PF08320 PIG-X PIG-X / PBN1 Mistry J, Wood V anon manual Domain Mammalian PIG-X and yeast PBN1 are essential components of glycosylphosphatidylinositol-mannosyltransferase I [1]. These enzymes are involved in the transfer of sugar molecules. 20.50 20.50 21.30 21.10 19.80 18.60 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.57 0.70 -5.05 22 258 2009-01-15 18:05:59 2005-07-13 15:56:35 7 5 217 0 165 278 0 194.10 24 54.09 CHANGED pPsGLHPpLplslss.............spssppCplahahpLPpslFhD+YQ.........t..shs.plhhltGtsDLEhP-Yth..ppWGSphLhcltss............................hpsplPLHhRYhcPup..ssh....pslsls.PhlFhuCssc-ss..........htpsPFsph.....shua-shFsscThFaals.p..................ptslplplPhsst.......tshptVphsThlsllluhla....llhplhtt ...............................sGhH..sL.lp.lph..................h.st.ppCpl.hhh...pLPs....slFsD.apLts.......................................Lppcshh...phh.....hltsps...DLEtP..sa........pp.hus.pl.Llplp.ss................................t...pphpsplPlHhRY...hpP....ps...tsuh..............ppltls.Ptl.hhtC.stppst....................ht..sapph..............sh.s.hhtsp..shh.ahp.t.......................hs.tlplPlhph..........phthlp.sThhhhhls.hhlhhtl...h........................................................... 1 43 84 133 +8153 PF08321 PPP5 PPT1; PPP5 TPR repeat region Mistry J, Wood V anon Pfam-B_6912 (release 17.0) Family This region is specific to the PPP5 subfamily of serine/threonine phosphatases and contains TPR repeats. 23.50 23.50 23.60 23.80 23.40 23.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.16 0.72 -3.97 10 434 2012-10-11 20:01:00 2005-07-13 16:27:50 7 49 269 28 266 410 3 88.60 30 17.10 CHANGED KlpECcKlV+clsFEcAIul.-cpc+Sls-sl.DlEshsIEs-YsGP+LEsspVTl-FlKchhEaaK.pQK+LH++aAYpILlplcclL+ppPSLV-l .............................................h..spphlpp.tF.tAIt..............o.h.....h-..th....D.h...-.s..h.........s....l.-c.s...Y.sGP+................L..............p.....................t...............p...............lTh...............p.............Fhcphl.-t.FK..ppK..pLH.++YshpILhps+cllpp.Pohlc......................... 0 82 129 200 +8155 PF08323 Glyco_transf_5 Starch synthase catalytic domain Bateman A anon Pfam-B_148 (Release 17.0) Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.98 0.70 -4.88 179 5560 2012-10-03 16:42:30 2005-08-09 17:08:06 6 34 3850 19 1001 5688 791 204.20 33 44.27 CHANGED +...lLaluuEs..........sPh..sKoGGLuDVssuLPpAL..tphG.t..-V..cllhPtY.stl.pph.........php.lhp...h............h..........h.hhph.h...........p.slslah.l-s.t.....ha.p....Rss..hY............sa.DsspRFu...hFupAshchhtt.h..........sh.t..PDll..HspDWpTuLl.PhhL+ph.htt.......stoVhTIHN.........lsaQGhas...tphh.th..hsls..........h...pthcahs...........................plshlK.uGlhhuDtloTVSPoYAcEItsst.......hG..GL-slLpp ........................................................................................................................................lh.hs.Eh.............sh......p.GGL.ucsht.u.Ls...t...........t........s..h..c.s....hh..l..h.P....ha....h.t.t.................................h..................................................hth.h.pthh...................................p..Glsh.hh...l...D.....p.h....................ah..p.....................+ss...........hYss....................sah.D.s.t.h..R.Fu...............hh.sp.A..u.l.Ehsph..l........................................sa..t....Dll..h.spD.WH......ou.Ll.Ps..a...L.+th...h.......p................h..............t......s+..........sshsIHN..............lsaQG..hFs..........hp.h...sh....hsLs.............phh........t...s.h...c...a......s.............................pl.sa.h...K...AGl.h..uD+lsT...VSP...hYApElhss.........th.tL-sllp..................................................................................................... 0 340 651 848 +8156 PF08324 PUL PUL domain Bateman A anon L. Iyer Domain The PUL (PLAP, Ufd3p and Lub1p) domain is a novel alpha-helical Ub-associated domain. It directly binds to Cdc48, a chaperone-like AAA ATPase that collects ubiquitylated substrates [2][3]. 21.00 21.00 21.40 21.50 20.80 20.70 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.74 0.70 -5.60 22 406 2009-01-15 18:05:59 2005-08-10 13:17:14 6 21 274 9 297 411 2 256.60 19 36.81 CHANGED phhPhpphlhhcshshctlhp+lpchNsp.......tchphs-pplss....ltphlpthpps.t........hpthhthhhpllpsWs.sphhPslDllRlhlhp.sss.hh..t.........ss.hsphhtts.ss...................tp.s.hhhslRhlsNhFsssshtthlhspts.........plhstlsshhssh...............spNlplAlATLhhNhulhhhcss.........sh-hph.lluslhp.....htc..pspEAhYRhllAhGsLh....oht.sshtthspt.........shsthhptptp.........hsp.t+hp-lsp-l ..............................................................................hhP.pphl.hh..cp.s.s.h.ptlhpKl.t.chNtt............tphth...sp..splpt.......ltphl.p.......thpps.ss.....................hpt.h.h...hl.hp.hhtpW.P......s..h..h.hPslDll.Rl.hlh.psp....s.ss..h.hspt..................tphhtphhp.t..s....p...................ss.stthl.slRhhsN.hF.......s..s....tthp.......phhhsptp...............tl.hstltt.htss..............................spNlplAhuTLhhNhulhhpppp........................stctphtll..uslhp..................lhpt..tp.s......Euha.RhLlAlGsLl..........sss.sphhphsps........hth.t.thh.tthtp...................hsp..hhtphht.......................................................................... 0 103 168 244 +8157 PF08325 WLM WLM domain L Iyer, Bateman A anon L Iyer Domain This is a predicted metallopeptidase domain called WLM (Wss1p-like metalloproteases). These are linked to the Ub-system by virtue of fusions with the UB-binding PUG (PUB), Ub-like, and Little Finger domains. More specifically, genetic evidence implicates the WLM family in de-SUMOylation [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.38 0.71 -4.42 23 403 2012-10-03 04:41:15 2005-08-10 14:50:09 5 13 219 0 290 2217 499 191.90 27 49.79 CHANGED spstphphLp....pt.PscscALchLp+lAsp..lpslM+c++a+VshLsEhhPppts..................LLGhNhN+Gpc..................IpLRLRsss..ppFlsacslhsThLHELsHslausHDppFacLhcpLps-htplphth.......thhssG+pluupshh.s...............tth.httsthtGtsppLG.Gss...................................................................................................................ssstshRchhutAA-+Rhp .......................................................................................................................................................l......hPp.tppAhphLc+lAst..lpslM+.c+.papVshLsEh.Pp.pts.......................................lLGhN.....h....N.pGpp............................I..pLR..L.Rsss.......pta.h...shc....p..lhp.T.h.lHEL...sH.......s..l..aus..Hsp.pFa....sLhs.p.L..p..c..Ehp..pl..t..h..p..................h..stG.ppLustth.......................................t..ttthhts..ts...hLG...Gss..............................................................................................................................................................................................tshshRphhAtAA.pRh.......................................................................................................................................................................................... 1 97 176 251 +8158 PF08326 ACC_central Acetyl-CoA carboxylase, central region Fenech M anon Pfam-B_2008 (release 18.0) Family The region featured in this family is found in various eukaryotic acetyl-CoA carboxylases, N-terminal to the catalytic domain (Pfam:PF01039). This enzyme (EC:6.4.1.2) is involved in the synthesis of long-chain fatty acids, as it catalyses the rate-limiting step in this process. 18.50 18.50 18.60 18.60 18.40 18.40 hmmbuild -o /dev/null HMM SEED 708 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -13.13 0.70 -6.42 37 703 2009-01-15 18:05:59 2005-08-10 14:56:40 7 35 365 6 377 673 13 529.70 28 30.19 CHANGED sLDDPS+V++....ApPFpGpL..PchusPshh.G....sKstp+apthhss...............LpsILsGY.......phhhss.slppLlpsLcsscLPa.paptphSsLtsRlP.pLcptlpphhcchptp....tsp........FPu+pLppllpphhs...hss.....hhthsltPLhplhppYpsGlpsHthslhtsLlccYhsVEplFs..sspp--VIhcLR-cpcsDlpcVlphlLSHuplssKNpLlLulL-ph.ps..................sssthcssLp+lspLps+stu+VAL+ARElLlpspLPSlcpRpsplcclLpu..shhpsthGp.......h.p+ptsph-hlc-LlsSphsV..hDlL.spFF..scsDthVphAAlEVYlRRu.....YpuYp.ltc....lpacpps......slhpWcF..lsp.t.sphst..........................................p..chtshoshohhspptpt......RtGhhlshcpL-clpphl.stuLcthsp.......................tt...t............t.p.....sp.sNlhslsl.........pphpsh..s-p-llsclptllcppc..pcLts.tulRRlTFlhsp.pc...............uphPpaaTF+..uss.......YpE-phlRHlEPuLAapLELsR..Lp.sFclp.l.opsRplHlYpus..........................uKp.........sssD+RaFhRullRsupl..psphsht-hL.uEss..............Rlhs-hLDsLElhsss....poDhNHIFlNhss.hpl....................sspplEtuhsshlcRaGpRLaRLRVspsElRlhls....sssssshPlRsllsNsSGaslps-lYtEhcssp.u............phla+Shs...c.Gs.hH ..............................................................tLDsPotVpt.....sp.a.u..h.......P...t.......h..s..............tp..phht..hp..........................h...lhtGa....................................t.....hpt.hlpphh.sLc...sspLPh.phpt.hssltsRhP..lpt.hpt.ht..t...................................................................................FPup.l.thhpt.ht................................htslhplhp.pattG.t.......h.......h.hh.tlhppYhpVE..p.Fp..................p...ppsl.tl.Rpp..p....s.httVhphhhSHttlttKs.Llhhlhcth............................................................st.hhs.L.ch.spLp..p.pt.......s...cl...uLcApp...........l.L.tsthsphc....spht......p.h....p...h......t..................................................p..hpcll.u.hsl...D.......h.L...hF....pts.....hlt.t.shEs..YlRRh.....Y.sa..ltt......hpht...................hh.apF.....t.................................................................................................................................................................+hGhhh..hp.hpth..........t......h.sthht.h............................................................................................................................................................tp..thhplsl................................t...pt.....ptp....thht.htthhpp.p..t.Lht.ttl+clohhhtp...p.....................tthPtaaTF+.h.s......................................atE-phhRclEPsluhpLELsR..hp..pa...plp.h..st.s...c...phHlY.uh..........................................................................................u+............tshDhRhF....hRullR...p.sph..............hsc..sshphh.sEsp................................Rhl.phh-tL..E.l.h......p.........p..........................p....s.ch....NHlalsh....h.....s.....h.h...........................................sstplcp.htthh.phG.Rhh+LpVhthEl+h.hp..........s.ss.s.hshRlhlsN.oGahh...phphYhEhps.p..t.......p.hhapuhs.......u................................................................................. 0 139 213 317 +8159 PF08327 AHSA1 Activator of Hsp90 ATPase homolog 1-like protein Fenech M anon Pfam-B_4145 (release 18.0) Family This family includes eukaryotic, prokaryotic and archaeal proteins that bear similarity to a C-terminal region of human activator of 90 kDa heat shock protein ATPase homolog 1 (AHSA1/p38, Swiss:O95433). This protein is known to interact with the middle domain of Hsp90, and stimulate its ATPase activity [1]. It is probably a general upregulator of Hsp90 function, particularly contributing to its efficiency in conditions of increased stress [2]. p38 is also known to interact with the cytoplasmic domain of the VSV G protein, and may thus be involved in protein transport [3]. It has also been reported as being underexpressed in Down's syndrome. This region is found repeated in two members of this family (Swiss:Q8XY04 and Swiss:Q6MH87). 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.83 0.71 -4.02 114 4271 2012-10-02 19:24:03 2005-08-10 15:01:15 6 39 1661 63 1665 4537 461 127.90 18 71.07 CHANGED ssss-pVacuhTcs-tltp.W............hs.hph-h...+s..GG.paph.........tst.hstpsp...........hhcltssc+lshs.atht..t..........................httlshplpp.tsss..Tclphpp..........sshstspttp.............hptGWpphl.cpLpphlp ........................................................ss.cpVacs...hos.schl....tp...W......................h.t...tph.-h.............cs......G.G....pa.ph................t...tst..t.h.s.h..p.sp...................hhcl..p.s..s.........c..........p....lshs..htht......t...................................................ts.tlp.hp...h..p...p.....p.......s.ss.......Tplp.hpp....................sshst...ppttt.................httGW.pthl..spLpthl................................................................. 0 625 1111 1404 +8160 PF08328 ASL_C Adenylosuccinate lyase C-terminal Wuster A anon Pfam-B_1176 (release 18.0) Family This domain is found at the C-terminus of adenylosuccinate lyase(ASL; PurB in E. coli). It has been identified in bacteria, eukaryotes and archaea and is found together with the lyase domain Pfam:PF00206. ASL catalyses the cleavage of succinylaminoimidazole carboxamide ribotide to aminoimidazole carboxamide ribotide and fumarate and the cleavage of adenylosuccinate to adenylate and fumarate [1]. 34.10 34.10 34.10 34.50 31.90 32.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.38 0.71 -4.24 107 1767 2009-01-15 18:05:59 2005-08-10 15:17:40 6 8 1709 11 421 1259 659 115.00 62 25.35 CHANGED SRaQRDLTDSTVLRNlGVuhGaollAYpuhl+GLsKLplNpsplspDL-ssWEVLAEsIQTVMR......RaGltpPYEpLKcLTR..Gp.plstcslppFI..csLc.lP-psKscLhtlTPssYl .......SRWQ.....RDLTDSTVLRNLGVGlGYuLIAYpSoLKGluKLElNcs+Lhc-LDpNW.E.VLAEPIQTV....MR......RYGIE+PYEKLKELT.R...GK..clssEuh+pFI-u...Ls...lP--.....t..KsRL+uhTPAsYI........................ 0 122 261 354 +8161 PF08329 ChitinaseA_N Chitinase A, N-terminal domain Fenech M anon Pfam-B_1049 (release 18.0) Domain This domain is found in a number of bacterial chitinases and similar viral proteins. It is organised into a fibronectin III module domain-like fold, comprising only beta strands. Its function is not known, but it may be involved in interaction with the enzyme substrate, chitin [1,2]. It is separated by a hinge region from the catalytic domain (Pfam:PF00704); this hinge region is probably mobile, allowing the N-terminal domain to have different relative positions in solution [1]. 21.20 21.20 21.20 21.30 21.10 20.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.58 0.71 -4.38 43 378 2012-10-03 16:25:20 2005-08-10 15:19:53 5 25 265 36 56 351 4 122.60 40 18.18 CHANGED sAsPGsPoIc......Wu-.psaul.....Vclsp..pAT.uYppLVph.c-tlsVsVoWNlWSG-sG-puplhhDGpp......Va....pGsus..sppA..shplspGGpapMpVcLCNu-G....CSsSs.ssplllADTDGSHLtP.L.h..shpENN+saspps ....................................t.PshPsls...aup...pphth..lpls...tsT..uYpphVph.+-tssloVsaNhWo..Gs.sGsoh+lhhsGpp......Vh.......oGsss....upsoA.....sFph..s..KGGpYQhplcLCNusG....Cop.Ss.sscI.slADTDGSHLtP.Lph..slt.NNKsap...s.................. 0 13 26 38 +8163 PF08331 DUF1730 Domain of unknown function (DUF1730) Wuster A anon Pfam-B_1023 (release 18.0) Family This domain of unknown function occurs in Iron-sulfur cluster-binding proteins together with the 4Fe-4S binding domain (Pfam:PF00037). 21.40 21.40 21.50 22.20 20.50 21.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.50 0.72 -4.31 57 2383 2009-09-10 21:15:36 2005-08-10 15:23:48 5 19 2359 0 510 1721 810 77.80 37 21.12 CHANGED RscPptLlPss+SlIuluhsYhs.......tshpssppuhlo+Yuh.G+DYHcll+c+LcpLuphlppps.sch..t.....h+shVDo ............................RscPctLlPustslIulths....Yhs...................t....s.............................p............s..l..............p...s.....scp.....GhlSRYAh....G.....+..DYHcllRpRLccLuc......h......I......p....p....cs....s.sh......p..............h+sFVDo........................... 1 151 311 421 +8164 PF08332 CaMKII_AD Calcium/calmodulin dependent protein kinase II Association Wuster A anon Pfam-B_1025 (release 18.0) Family This domain is found at the C-terminus of the Calcium/calmodulin dependent protein kinases II (CaMKII). These proteins also have a Ser/Thr protein kinase domain (Pfam:PF00069) at their N-terminus [1]. The function of the CaMKII association domain is the assembly of the single proteins into large (8 to 14 subunits) multimers [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.49 0.71 -4.24 7 782 2012-10-03 02:27:24 2005-08-10 15:27:36 5 6 295 44 244 1086 152 118.40 52 32.17 CHANGED ccpEIlplT-pLlcAIssGDacsYo+lCcsshTsFEPEAhupLl-Gh-FH+FYFE.hhupps+slppslLsP+V+llGD-uAshAYl+LhQhhDcsGhs+ohQupETRVWp++sG+WpsVHhHRSuus ....................................................................................RKQEIIKlTEQ.L.IE.A....ls.sG...D...FE...uYsKl..C..D..P..s..h.TuFE...P.EA......LG.......N.L.V.E.G.h.DFH.......+...F....YFE...N.h..L.u.....K..s.......s..K.s....l.....+.T.....hI..L..N..PHV....H..l.l..G-.-..u..A...C.I.A...Y..l.....R..l..T.Q..a..hD.u...........p..G.hs..+.o.sQ.S..EETR.VWH....R....R....D......G.....K.....W..VHaHpSusP...................................................... 0 55 91 151 +8165 PF08333 DUF1725 Protein of unknown function (DUF1725) Fenech M anon Pfam-B_2110 (release 18.0) Family This family include many eukaryotic and one bacterial sequence. Many of its members are annotated as being putative L1 retrotransposons or LINE-1 reverse transcriptase homologs. The region in question is found repeated in some family members. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 20 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.32 0.73 -6.56 0.73 -4.47 24 244 2009-01-15 18:05:59 2005-08-11 08:45:36 6 30 26 0 135 164 33 19.60 62 2.50 CHANGED hpFhGKWMELEsIILSElsp ......FsssWM-LEsIhLSEloQ..... 0 2 8 20 +8166 PF08334 T2SG GSPII_G; Type II secretion system (T2SS), protein G Fenech M, Desvaux M anon Pfam-B_1144 (release 18.0) Family The Type II secretion system, also called Secretion-dependent pathway (SDP), is responsible for the transport of proteins across the outer membrane first exported to the periplasm by the Sec or Tat translocon in Gram-negative (diderm) bacteria [1,2]. The T2SG family includes proteins such as EpsG (P45773) in Vibrio cholera, XcpT also called PddA (Q00514) in Pseudomonas aeruginosa or PulG (P15746)in Klebsiella pneumoniae. The PulG is thought to be anchored in the inner membrane with its C-terminus directed towards the periplasme [3]. Together with other members of the Type II secretion machinery, it is thought to assemble into a pilus-like structure that may function as a dynamic mechanism to push secreted proteins out of the cell. The polypeptide is organized into a long N-terminal alpha-helix followed by a loop region that separates it from a C-terminal anti-parallel beta-sheet [1]. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.56 0.72 -4.30 175 1599 2012-10-03 10:38:27 2005-08-11 08:56:08 6 8 1122 10 474 1149 368 105.00 40 69.58 CHANGED plhs.......ph-....cA...+hpsAcsplpslps.AL-hY+L.DsGpYPospp.GLpALlptPsst...ppa.pGsYl..cplP.pDPWGps...YhYh..sPG....t..p.G.thDlhShGsDGp.GGpu.suDIssW ................plhuph-+ActppAh.sDI..s.s...Lcs.AL-hY...........+............L....D..........N..G.c...............Y.P..o....s.p......Q...GL.c.A.Ll..ppPss................spsa..ps...uYl....++L.P..pDPW..Gss............YpYl....sPG............p.....c.....G.thDlaS.hGsDGp..GG-s.stDIssW................................................................. 0 154 301 395 +8167 PF08335 GlnD_UR_UTase GlnD PII-uridylyltransferase Fenech M anon Pfam-B_2147 (release 18.0) Family This is a family of bifunctional uridylyl-removing enzymes/uridylyltransferases (UR/UTases, GlnD) that are responsible for the modification (EC:2.7.7.59) of the regulatory protein P-II, or GlnB (e.g. Swiss:P05826, Pfam:PF00543). In response to nitrogen limitation, these transferases (e.g. Swiss:P27249) catalyse the uridylylation of the PII protein, which in turn stimulates deadenylylation of glutamine synthetase (GlnA). Deadenylylated glutamine synthetase is the more active form of the enzyme [1]. Moreover, uridylylated PII can act together with NtrB and NtrC to increase transcription of genes in the sigma54 regulon, which include glnA and other nitrogen-level controlled genes [2]. It has also been suggested that the product of the glnD gene is involved in other physiological functions such as control of iron metabolism in certain species [2]. The region described in this family is found in many of its members to be C-terminal to a nucleotidyltransferase domain (Pfam:PF01909), and N-terminal to an HD domain (Pfam:PF01966) and two ACT domains (Pfam:PF01842) [3]. 20.50 20.50 20.50 20.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.78 0.71 -4.35 122 5858 2012-10-01 22:14:54 2005-08-11 08:58:32 6 24 2142 3 1374 4628 1176 134.10 23 21.87 CHANGED chhctplpcp........tpR+..........tphs..........t.NlK.G.GGlRDlchlh.ltphhh.............thpsLptLhpt.....uhlspp..-hppLpcuhpFLhplcptL.phlss+pscpLs..h-......tptplAph.h....G.................a.............sshtu...ahpphhpttppVpphhphlh ..............................................................hptpl.ppp.................ht.c+...............tt.p................slKhutGGlRDIEalsQ..lh..p..Lhautp..tl.........p..sslpsL.ptls..ph.......uhl..s..pp...-s.tpLp...cuap...hL..pc...l..c...p..t..L...ph....h..t...s....c....t..s....p..t.L....s.......................t..tl..A.h.h...s....................................h.....................ts..t.....hhpth.hthhppltth.p.h.................................................................. 0 366 832 1128 +8168 PF08336 P4Ha_N Prolyl 4-Hydroxylase alpha-subunit, N-terminal region Fenech M anon Pfam-B_2013 (release 18.0) Family The members of this family are eukaryotic proteins, and include all three isoforms of the prolyl 4-hydroxylase alpha subunit. This enzyme (EC:1.14.11.2) is important in the post-translational modification of collagen, as it catalyses the formation of 4-hydroxyproline. In vertebrates, the complete enzyme is an alpha2-beta2 tetramer; the beta-subunit is identical to protein disulphide isomerase [1-4]. The function of the N-terminal region featured in this family does not seem to be known. 22.70 22.70 22.80 23.90 22.30 22.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.56 0.71 -4.34 57 702 2009-09-11 11:25:59 2005-08-11 09:00:04 6 15 93 0 369 644 0 123.70 30 26.34 CHANGED oSlsphpcLlphEcpLlssLcpYlpt.pp+lcpl+phhpphcppppputp..chppYlusPlNuFtLl+RhppDW.pl..cphhpps.hspp..hlptlpphtp....ph.Popc..DlpsAspulhRLQssYpLpspclApGh.lsGh ................oShsphpcLlthEppL.lpsLcpYlpt.pp+lppl+.......ph...hp.....phct...........pp..putp...s.ctal....upPlNAapLl+RLpsDW.pl..cphhtps....spt.......hlss.lp..pp......hh.Ps..p-..DhpuAupuLhRLQcsYpLcspslupG.l.G......................... 0 106 122 259 +8169 PF08337 Plexin_cytopl Plexin cytoplasmic RasGAP domain Fenech M anon Pfam-B_3123 (release 18.0) Domain This family features the C-terminal regions of various plexins (e.g. Swiss:P51805). Plexins are receptors for semaphorins, and plexin signalling is important in path finding and patterning of both neurons and developing blood vessels [1,2]. The cytoplasmic region, which has been called a SEX domain in some members of this family [3], is involved in downstream signalling pathways, by interaction with proteins such as Rac1, RhoD, Rnd1 and other plexins [4]. This domain acts as a RasGAP domain [5]. 20.10 20.10 20.10 20.60 20.00 19.60 hmmbuild -o /dev/null HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.65 0.70 -6.19 11 880 2009-01-15 18:05:59 2005-08-11 09:05:10 7 64 115 19 421 669 1 437.30 47 33.18 CHANGED uGIPFLDY+sYshRlFF...........................sGpcsHPlhtchp....ttppsslEpuLshFusLLNsKsFLlsFIHTLEpQ+sFShRDRsplASLLhlALpuKLEYhT-Il+sLLsDLI-ps..sK.pPKLMLRRTESVVEKMLTNWMSlCLYsaL+EssGEPLahLapAIKpQl-KGPVDAlTGcARYTLNE-+LLR-slEa+slsLp..........sl..htscss-u........lsV+VLsCDTIoQVKEKlL-slY..KssPaSQRPcsc-lDLEWRsGptu+llLpD.-DlTohhEst.WK+LNTLtHYpVsDGAoluLssp....sshp.s....................hssssssstpttsh.sc..ps................+haHLV+PpD-.s-.....pp+ucRts...............+sKslsEIYLTRLLoTKGTLQKFVDDLFpoILSh..hspslPlAlKYhFDFLDEQA-p+GIoDPDslHhWKoNSLPLRFWVNllKNPQFVFDIcKosphDACLSVIAQTFMDuCShSEa+LGKDSPoNKLLYAKDIPpYKchVccYY+cIpphsslS-QEMNuhLAE.S+tHss-FsshsALpELYpYlpKYtppI ..........................................................................................................................sIPaLDY+sYs.RlhF..................................................s...u.h.psp..l.h.tchc............t.tt.ph..-puLt.hupLlssK.....hFLlpF..I+TLEtQ.+...sFShRD..R.s..lASLlhh..sL..p..u+hEYhTslh+pLL.-Lh-p...sK..pPKL..hLR.R..T...EoV.sEKhLoNWhohh.....Lap..aL+..................-s.........sGEPLahLapAIKpQh-KGPlDulTscA+YoLs-stLl+p.pl.-apsl.s.lp.............................sl.......ts...t.s...t.................ls.V+l.LsCDTIoQlKEKlL-tla..+shPhSpp.Pp.s.ts...h....-L...............EW..RtG......t.h.......s...p.....hlLpD....D.......h.T........o.h.......h.pst...WK+L...NTL......tHY..p...V..s-...u...u.s.lsLs.p........p...............................................................................h.t.tp.p......t..p.p....t....s.h....p.......pt.......................+haH.LV+s.p..-c....c..........thcscR........................tsKhl.sEIYLTRLL..u.....sK.G.TLQpFVDDhFpslhS.........s.p..s..lPhAlKYhFDFLDE.QApp...+..t.I.p.D.......-shHh.WKo..N..sLPLRF....WVN.llKNPpFlF.Dlc..psshsDAsLSVlAQTFMDuCoho-H+..LG.............+...............DSPoNKLLYAK-IP.pYKp........h...V-..cYYtsItphs.slS-Q-MsshLsE...S.......+....a..ts.ph........ss.sALpElY.p.Y.hpKYh-pl........................................ 0 104 131 262 +8170 PF08338 DUF1731 Domain of unknown function (DUF1731) Wuster A anon Pfam-B_1045 (release 18.0) Family This domain of unknown function appears towards the C-terminus of proteins of the NAD dependent epimerase/dehydratase family (Pfam:PF01370) in bacteria, eukaryotes and archaea. Many of the proteins in which it is found are involved in cell-division inhibition. 20.40 20.40 20.40 20.90 20.30 19.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.09 0.72 -4.47 150 2466 2009-01-15 18:05:59 2005-08-11 09:12:23 6 12 2252 1 619 1759 633 48.10 39 15.43 CHANGED lP..uhsL+...lllG.Ehu.pllLpuQ+VhPp+LhpsGFpFcassLcpALpsll ..............lPuhsl+....llhG..Ehu.tLlLsGQ+slPp+L.p.p.s.GFpF+assLcpALpsl........... 0 205 395 534 +8171 PF08339 RTX_C RTX C-terminal domain Fenech M anon Pfam-B_2178 (release 18.0) Family This family describes the C-terminal region of various bacterial haemolysins and leukotoxins, which belong to the RTX family of toxins. These are produced by various Gram negative bacteria, such as E. coli (Swiss:P09983) and Actinobacillus pleuropneumoniae (Swiss:P15377). RTX toxins may interact with lipopolysaccharide (LPS) to functionally impair and eventually kill leukocytes [1]. This region is found in association with the RTX N-terminal domain (Pfam:PF02382) and multiple hemolysin-type calcium-binding repeats (Pfam:PF00353). 30.20 30.20 30.20 30.70 30.10 29.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -11.04 0.71 -4.48 11 230 2009-01-15 18:05:59 2005-08-11 09:14:43 5 17 136 0 4 184 0 136.40 36 15.23 CHANGED shSDhshcDlsFc+VscsLll...........pNs+psplTIpsWFccushup............pscKIEpIlsKsGc+ITScpl-cllp.cscGp.IptpsLsp...................hu-sath......t.ptsslsNslsKlISSsuuFsoup....sptsuhh..lsosths...phpohpLApAA ........phSDlshcDlsFcRlssDLlh...........psscts.s.lThpNWFtcss.ut...................................ps+pIEpIhsKsGccIss-pls+hhp.ptss..htsptlup......................pp..s........shsslsNslsKlISSsuuFsos.....spthush..ls.pt.ss....ppuhpLsps............................... 0 0 0 4 +8172 PF08340 DUF1732 Domain of unknown function (DUF1732) Wuster A, Eberhardt R anon Pfam-B_1065 (release 18.0) Family This domain of unknown function is often found at the C-terminus of bacterial proteins, many of which are hypothetical, including proteins of the YicC family which have Pfam:PF03755 at the N-terminus. These include a protein important in the stationary phase of growth, and required for growth at high temperature [1]. Structural modelling suggests this domain may bind nucleic acids [2]. 20.50 20.50 20.70 21.50 20.20 19.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.88 0.72 -4.16 157 2499 2009-01-15 18:05:59 2005-08-11 09:16:42 6 3 2472 0 580 1771 606 85.80 52 29.77 CHANGED tQElslhApKsDlsEElsRLpoHlsphcchL.p...s...........s...............s.lGR+LDFLhQEhNREsNTluSKus.....shplophsl-lKs.lEphREQlQNlE ......QElslhAp+hDlsEELsRLcuHlpphpplL.cp...p........t........................slGR+LDFlhQEhNREuNTluSKS.......ss-lostul-LKslIEQhREQlQNIE.......... 1 207 385 489 +8173 PF08341 Fb_signal Fibronectin-binding protein signal sequence Wuster A anon Pfam-B_4004 (release 18.0) Family This domain is found near the N-terminus of fibronectin-binding proteins in Streptococcus where it functions as a signal sequence [1]. 25.00 25.00 25.50 26.00 24.60 24.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.95 0.72 -3.97 30 227 2009-01-15 18:05:59 2005-08-11 09:19:11 6 21 39 6 11 210 0 74.20 35 16.57 CHANGED PaYGY-uhss.h........tYHcLcVs.LpGo.+pYp..VYCFNlc+p.P.cspuhspsa.............Yc+l-Gss.psFppYAtsPR ................saYGYDshssh...........pYHcLpVs...lpGo..csYQ.....VYCFNls+phPpsspu.hspsh.............YKKlcGos.psFppYAtsPR..... 0 0 2 8 +8175 PF08343 RNR_N Ribonucleotide reductase N-terminal Wuster A anon Pfam-B_1066 (release 18.0) Family This domain is found at the N-terminus of bacterial ribonucleoside-diphosphate reductases (ribonucleotide reductases, RNRs) which catalyse the formation of deoxyribonucleotides [1]. It occurs together with the RNR all-alpha domain (Pfam:PF00317) and the RNR barrel domain (Pfam:PF02867). 20.80 20.80 20.80 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.75 0.72 -4.17 65 2010 2009-01-15 18:05:59 2005-08-11 09:27:31 5 10 1914 6 245 1190 16 80.30 42 11.63 CHANGED YhpLNspl.Nl.spsGplp.hcKD+EAlcsahpppVpsNThhFsSlcE+lcYLlcp.sYYEpphl.cpY...shsalccLachAaupcF ..............YatLNs.l.NhhsssGpIp.h-KD+EAlcuahtppVps..NTl..hFsShpE+lsYLlcc.sYY-pshl........scY.......shsFlpcLapaAcspsF........... 0 53 132 191 +8176 PF08344 TRP_2 Transient receptor ion channel II Wuster A anon Pfam-B_1032 (release 18.0) Family This domain is found in the transient receptor ion channel (Trp) family of proteins. There is strong evidence that Trp proteins are structural elements of calcium-ion entry channels activated by G protein-coupled receptors [1]. This domain does not tend to appear with the TRP domain (Pfam:PF06011) but is often found to the C-terminus of Ankyrin repeats (Pfam:PF00023). 20.90 20.90 20.90 24.10 20.50 20.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.07 0.72 -4.04 18 680 2009-01-15 18:05:59 2005-08-11 09:37:35 6 28 108 0 347 603 0 62.10 55 7.46 CHANGED CsCs-C.ptpppDSL+HSpSRINsYRALASPuhluLo...ScDPlLoAFcLSaEL+cLuthEpEF+ ................CpCs-.Cspp.pct.DSlpHSRS.RlNsY+uLASPuhluLS..............SEDPlLTAFcLStELpcLu....plEpEFK............. 0 97 125 217 +8177 PF08345 YscJ_FliF_C Flagellar M-ring protein C-terminal Wuster A anon Pfam-B_1149 (release 18.0) Family This domain is found in bacterial flagellar M-ring (FliF) proteins together with the YscJ/FliF domain (Pfam:PF01514). 19.20 19.20 19.30 19.30 19.10 17.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.08 0.71 -4.27 169 2282 2009-09-10 21:18:48 2005-08-11 09:40:36 6 5 2017 0 541 1738 450 168.80 31 31.19 CHANGED LpsllG..s..ssp.spVss-lDFsptppspcpa........cP...sp...........tsl.RSpps.pEss..ssst...sss..G...lPGshoN...h..Pss...........................sssts..sss...................................................t..................sscpcps....pNYEls+shppspp.ssGplc..RLSVAVll.st..........t....................t..thtshssp...clsplpsLVpsAlGas.....ts..................RGDs.......l.sVsshsF .................................................LsPllG.ss.ph+.spVsspl.DFspp-popEpY........sP.....st.............ssl.RScQp.p-sp....pust.....hssG......lP.GuhS.Np.....Pss.......................s......tts...........................................sp.............ssc..pppTpNYEl..s+ol.pcscp..ssG.s..lp..RLSV.AVl.V..st..................................p...................psp.hsh.osc...phpp....l..csLlpsAh...Gas.......tp.......................RGDs.......l.sVsshsF.............................................................. 1 175 342 438 +8178 PF08346 AntA AntA/AntB antirepressor Wuster A anon Pfam-B_2097 (release 18.0) Family In E. coli the two proteins AntA and AntB have 62% amino acid identities near their N termini. AntA appears to be encoded by a truncated and divergent copy of AntB. The two proteins are homologous to putative antirepressors found in numerous bacteriophages, such as the hypothetical antirepressor protein encoded by the gene LO142 of the bacteriophage 933W [1]. 20.50 20.50 20.60 21.10 19.30 20.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.65 0.72 -3.78 57 810 2009-01-15 18:05:59 2005-08-11 09:43:36 7 14 597 0 102 564 4 70.80 41 31.26 CHANGED VsAR-LHphLcl.ppcFssWhctRhpcYs.FhEs.DFhsh.p.....................tpspuu+............phDYtlTLDhAKEluMlpRs ..............lsAR-LHphL......pV..ppcFssWhc.Rh..p...cYG.F.tEstDah..shpp..........................ht.s.tts..us+...........ptpDYtLTLDhAKElAMlpRs...................................................................... 0 22 65 79 +8179 PF08347 CTNNB1_binding N-terminal CTNNB1 binding Wuster A anon Pfam-B_2064 (release 18.0) Family This region tends to appear at the N-terminus of proteins also containing DNA-binding HMG (high mobility group) boxes (Pfam:PF00505) and appears to bind the armadillo repeat of CTNNB1 (beta-catenin), forming a stable complex. Signaling by Wnt through TCF/LCF is involved in developmental patterning, induction of neural tissues, cell fate decisions and stem cell differentiation [3]. Isoforms of HMG T-cell factors lacking the N-terminal CTNNB1-binding domain cannot fulfill their role as transcriptional activators in T-cell differentiation [1,2]. 22.40 22.40 22.40 22.80 22.30 22.30 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.81 0.71 -4.06 18 496 2009-01-15 18:05:59 2005-08-11 09:47:26 6 5 89 10 161 406 0 168.80 45 42.31 CHANGED MPQLs.....uuGGD.DLGAsDEhlsFKDEG-.p-EKh...sENs.s.tD....Ls-lKSSLVsEoEsspssss................cth......................pRpsps.h....sa.-c.tc+h-p.s+ppp..shGhh.+s.sYs........uas.hhhh......sps..sNGohuP......................................uNKlsVlpss.th.s..LsPLhs...Yss-Hao.GsPP....shhPs-l.ssKs....Gls.RPspss-lsshYPLssuthGQls...asl.sW .......................................................................................stsuD.-LsusDEhh.FpDE..Gt.pp-......tt..........L.s-.KSSLls.oE...ts.........................................................tct..s............a...t...p.....tt..tt......s..uhh..+sssYs........uas..hhM.............t.shh...ssuu.SPs.............................................................................................................SNKVPVVQ.sHtV.HP..LTPL.IT........YS.sEHFo..PG.s.P..........sHlPsDl.ssKp....Gls.RsPpssDlSsaYPLSPGsVGQIs...aPLGW.............................................................................................................. 0 24 37 88 +8180 PF08348 PAS_6 YheO; YheO-like PAS domain Fenech M anon Pfam-B_2023 (release 18.0) Domain This family contains various hypothetical bacterial proteins that are similar to the E. coli protein YheO (Swiss:P64624). Their function is unknown, but are likely to be involved in signalling based on the presence of this PAS domain. 21.10 21.10 21.10 21.30 21.00 20.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.58 0.71 -4.54 36 1856 2012-10-04 01:10:46 2005-08-11 09:50:25 6 5 1451 0 282 973 28 116.10 38 51.45 CHANGED lLpsatsls-uluphhGspsEVVLHsL..cs.csolltIsNscloGRplG.sPhTph........uLctlpp.t..pppshtsYhspstcG.+hl+SsohhI+sspschlGhLCINh....Dlsshpth.pphLpt ........Lcsapsll-GLutll.Gsp.CElVLHsL..p....c..h..csShltIsNGchTGRplG.uPl.T-l.........AL.chL+c.....hp........cp..s......s...p...sYh....o+spsG.tlhKSsTlhI+s.c.ct.+.lI.GlLCINh..slss.h...pph...h.......................... 0 69 132 209 +8181 PF08349 DUF1722 Protein of unknown function (DUF1722) Wuster A anon Pfam-B_4169 (release 18.0) Family This domain of unknown function is found in bacteria and archaea and is homologous to the hypothetical protein ybgA from E. coli. 21.20 21.20 21.30 21.40 20.50 21.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.20 0.71 -4.22 94 1436 2009-01-15 18:05:59 2005-08-11 09:51:49 6 6 1379 0 231 801 53 115.00 35 49.27 CHANGED KhlLMAHs.ptY+pLGpllAshpphs.hcphhppYtptlMpALpphAoppspsNVL.HlhGYFKcpLssp-Kp-lhclIppYRpGhlPLlsPlTLL+Halpcas..ssYLtpQs.YL.pPaP ......................KhhlLAHSQstY+cl.GthlAsh.......ppht..l-shhppYppplhthLpcssohpspsNVL.Hl.GYF+ppls..spE+ppltpLIpp.YRpG.p.h.PlhsPL.shlKcahscYP..ssYLtpQpYhp.a......................................... 0 62 133 183 +8182 PF08350 DUF1724 Domain of unknown function (DUF1724) Wuster A anon Pfam-B_1158 (release 18.0) Family This domain of unknown function has so far only been found at the C-terminus of archaean proteins, including several transcriptional regulators of the ArsR family (see Pfam:PF01022). 20.50 20.50 21.90 20.50 20.40 19.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.04 0.72 -4.44 67 138 2009-01-15 18:05:59 2005-08-11 09:55:08 5 7 40 0 120 138 7 63.20 29 23.56 CHANGED Nsclalh.psslcl.uhs.VTDcahsLuLFscsGp.aDp.ptllSa-spAlpWGcELFpaY+ppucpl ..........Nhplalh.ppslcl.uhs.VTDchhhLuLapc....sGp.aDp..ptllSh.....-ppAlcWGp-LFpaYcppup....... 0 36 69 82 +8183 PF08351 DUF1726 Domain of unknown function (DUF1726) Wuster A, Eberhardt R anon Pfam-B_3131 (release 18.0) Family This domain of unknown function is often found at the N-terminus of proteins containing Pfam:PF05127. Its fold resembles that of Pfam:PF05127, but it does not appear to bind ATP [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.71 0.72 -4.30 42 1117 2009-01-15 18:05:59 2005-08-11 09:57:59 6 15 1047 2 384 869 19 91.60 36 11.76 CHANGED Yp-oc+l...LGpTashllLpshcslpPNhluRhl-sVcGGGllllLhsshpphcphhs..............................shcpph...hs..sat..clhtpFpcRFhtpLtpp.cshhlhD ..........pplLGppaphsVhDs.h...c....uhsssshAthsGTlcuGuhllLLls.s.h..pphcph.s..............................ssphRa...psp.sps........ss..s+Fsp+hh.psLsss.ppsh............................................................. 1 121 208 310 +8184 PF08352 oligo_HPY Oligopeptide/dipeptide transporter, C-terminal region TIGRFAMs, Fenech M anon Pfam-B_3025 (release 18.0) Family This family features a region found towards the C-terminus of oligopeptide ABC transporter ATP binding proteins, immediately following the ATP-binding domain (Pfam:PF00005). All characterised members appear able to be involved in the transport of oligopeptides or dipeptides. Some are important for sporulation or antibiotic resistance. Some dipeptide transporters also act on the heme precursor delta-aminolevulinic acid. 21.00 9.90 21.00 9.90 20.90 9.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.38 0.72 -3.62 59 28166 2009-01-15 18:05:59 2005-08-11 09:59:58 7 33 3987 0 6863 20640 4857 55.40 33 17.52 CHANGED VEhGsspplh..psPtHPYTptLlsuhPphsstpp...........htuphP..shtphsp..................G..ChFtsRCshspst.C ...............VEtGs..sccl..a....ps..P..p......HPYTpuLL..u..u....l..P..p...hsst..t.....................h....t................................................................................................................................ 0 1910 4134 5534 +8185 PF08353 DUF1727 Domain of unknown function (DUF1727) Wuster A anon Pfam-B_2131 (release 18.0) Family This domain of unknown function is found at the C-terminus of bacterial proteins which include UDP-N-acetylmuramyl tripeptide synthase and the related Mur ligase. 25.00 25.00 29.30 28.40 22.00 18.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.45 0.71 -4.46 66 1470 2009-01-15 18:05:59 2005-08-11 10:01:48 5 4 1459 0 228 929 38 108.60 40 24.67 CHANGED LlK..NPsGhspslshl........ssps......................................hslllhlN...sshADGpDsSWlWDsDFEpL.st.p.......lpplhsuG.RttDhAlRL+hA..Gls.............pphph..pp-hcpslpt..hppssscp..lYlL....sTYTAh .....LsKNPsGhspuL.sh.lt......s.ts..............................................hsl...ll...hLN...uNh.....AD...GhD....sS.WIWDsDFEpl..sp.p.......ltp....lhs.uGs.R.tpDlAlRL+lA......Gls....................t....lh...tp-.lppslpp..hp..tps.spc.....shlL....uTYTAh................. 0 76 155 202 +8186 PF08354 DUF1729 Domain of unknown function (DUF1729) Wuster A anon Pfam-B_3179 (release 18.0) Family This domain of unknown function is found in fatty acid synthase beta subunits together with the MaoC-like domain (Pfam:PF01575) and the Acyltransferase domain (Pfam:PF00698) [1]. The domain has been identified in fungi and bacteria. 25.00 25.00 40.10 39.20 20.80 19.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.77 0.72 -4.18 31 433 2009-01-15 18:05:59 2005-08-11 10:05:45 5 23 350 9 202 457 1 54.90 53 2.12 CHANGED IPsLD-cFEhaFKKDSLWQSEDl-AVlspDspRVCILpGPVAspaos..pssEPlt-IL ..VPVlDc-hcpWa+pDSLWQSED.pth...DsspVCIl.GPsAVtthT..plDEPVu-lL...... 0 48 111 168 +8187 PF08355 EF_assoc_1 EF hand associated Wuster A anon Pfam-B_4111 (release 18.0) Family This region typically appears on the C-terminus of EF hands in GTP-binding proteins such as Arht/Rhot (may be involved in mitochondrial homeostasis and apoptosis[1]). The EF hand associated region is found in yeast, vertebrates and plants. 19.10 19.10 19.40 19.60 18.20 18.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.49 0.72 -4.69 32 382 2009-01-15 18:05:59 2005-08-11 10:07:56 7 37 244 0 248 390 0 73.90 40 11.79 CHANGED sW..pssa.sosspNp.tGhlTLpGaLuQWohsTaLDhppTLEYLuYLGass.....................................ssssAlpVTRtR+hcp+ptps ...............W..ssshsso.sssN-.tGal.TLpGaLuQWo.LoTaLDsppoLE.YLuYLGash..................................................................ttpt.s..pssAl..p..V.TRs++hDhc+tp..................................................... 1 68 122 190 +8188 PF08356 EF_assoc_2 EF hand associated Wuster A anon Pfam-B_3018 (release 18.0) Family This region predominantly appears near EF-hands (Pfam:PF00036) in GTP-binding proteins. It is found in all three eukaryotic kingdoms. 21.50 21.50 21.70 22.00 21.40 21.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.97 0.72 -4.23 30 391 2009-01-15 18:05:59 2005-08-11 10:11:02 7 40 244 0 254 401 0 88.10 46 14.14 CHANGED spPLsspsLtslKpslpcp.hPsuspp...pGlTlpGFLhLNplahE+GRHETTWsILRpFtYsDsLsLp-caLhPc..........hclPsssSsELSstGY ........................................pPLtspsLpclKp.llp+p...hs-G.Vtc...........sG.lTL..cGFLaLps..LFIp+GRHETTWslLR+FGYsDsLpLss-aLa.Ph..........lclP...s...ssosELs.puY......................... 0 71 126 194 +8189 PF08357 SEFIR SEFIR domain Fenech M anon Pfam-B_33671 (release 17.0) Family This family comprises IL17 receptors (IL17Rs, e.g. Swiss:Q60943) and SEF proteins (e.g. Swiss:Q8QHJ9). The latter are feedback inhibitors of FGF signalling and are also thought to be receptors. Due to its similarity to the TIR domain (Pfam:PF01582), the SEFIR region is thought to be involved in homotypic interactions with other SEFIR/TIR-domain-containing proteins. Thus, SEFs and IL17Rs may be involved in TOLL/IL1R-like signalling pathways [1]. 21.40 21.40 21.50 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.82 0.71 -4.15 15 480 2012-10-02 18:56:14 2005-08-11 10:17:46 6 21 166 0 243 474 10 138.00 22 25.54 CHANGED hKValsYuu.DsshahchVtphAphLpshh..GhEVslDLW-chclsp.G.hpWhtpp....lppushVlllhS.sh.thhcppsscpcusspspup...........t-hFhsshstl...l.p..p..spshp+alsVYF.shspppclPshLph..sspapL.cphsplhscL .......................................+VhlsYSt...Dss.a...phVhphAphLpsh....GhcVh....lDl..a-p.p.........lt..t.u..hpWhtpp.................lpp....ss..hllllso.s.......h..hh.ptt.t.t.p..p..ps.s.s..t.tps................................................tshah..h.t..ht.h............ph..p......stsh.t.+als..shF..t...sppt......p..l..P..shhp........h.ha..p..h...pph.phh.t......................................................... 0 52 85 148 +8190 PF08358 Flexi_CP_N Carlavirus coat Wuster A anon Pfam-B_2014 (release 18.0) Family This domain is found together with the viral coat protein domain (Pfam:PF00286) in coat/capsid proteins of Carlaviruses infecting plants. 19.70 19.70 24.90 24.60 19.10 16.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.48 0.72 -4.19 25 358 2009-01-15 18:05:59 2005-08-11 10:18:53 5 3 50 0 0 371 0 51.70 47 17.60 CHANGED RLspLp-hLppppsusplsNsuFEh.GRPsLc.sssM+sDsoN.sYsRPSlDtL ..RLspLhEhhtpcppsssloNsuaEh.GRPsLpssssMRtsPsN.PYuR.SlDtL.... 1 0 0 0 +8191 PF08359 TetR_C_4 YsiA-like protein, C-terminal region Fenech M anon Pfam-B_20730 (release 17.0) Family The members of this family are thought to be TetR-type transcriptional regulators that bear particular similarity to YsiA (Swiss:P94548), a hypothetical protein expressed by B. subtilis. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.54 0.71 -4.22 8 631 2012-10-03 00:15:22 2005-08-11 10:46:25 6 4 485 2 267 630 83 130.10 20 62.86 CHANGED FcEKMGpFVE+IccchsstsospEKLtlLlcpHFptLuuDhcLAIVTQLELRQSNpELRhKINEVLKGYLsllDcIltEGhcpGEF+p-LDlRLARQMIFGTlDEsVTsWVMs-pKYDLsALucsVacLLlpG ..................................................................................thlpphcp.th..t.t.....p....s.shp+.Lth...llc....sHh...ph...l....t..p...s...p....s....l.....s.....h.l.h.....h...E..h.+.p.t..s..p..c..l.p...p.c.l..p.p.lh...+...p.....Y....hphlp...cllpcGhppGphc....s.-...ls....s.c.lspphlhGslp.th.lhphhh..s..p..t..p.hs..h.t.sp..hhthh...h........................................ 0 121 209 240 +8192 PF08360 TetR_C_5 QacR-like protein, C-terminal region Fenech M anon Pfam-B_96140 (release 17.0) Family This family features the C-terminal region of a number of proteins that bear similarity to the QacR protein (Swiss:P23217), a transcriptional regulator of the TetR family. QacR is able to bind various environmental agents, which include a number of cationic lipophilic compounds, and thus regulate the transcription of QacA (Swiss:P23215), a multidrug efflux pump [1]. The C-terminal region contains the multifaceted, expansive drug-binding pocket, which is composed of several separate, but linked, binding sites [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.51 0.71 -4.18 3 259 2012-10-03 00:15:22 2005-08-11 10:46:40 6 3 203 104 24 172 11 128.70 32 65.29 CHANGED cppcWpEKWcccEKpYoTuTEKLYulAEasL.p-YppPLpNAIpEFuo-.ssosSIl-cMhuLsscslcsYcsIl-EGIQSGEFpI-NVcDlShIluuhLuGLssFhHEh-hcELc+LaNKAIsIFLpGlSs ................................................pW.-pWpcc....ph.h....pTspEKLYths-hhl..psl.p.pPlpp.....A.hpEF.p.hp..hhocp.l.-clht.lpc.c....hsha...cpLl-cGIpSGEFp....ps.sscslshIlsu.h.l.s.G.ls..s.h.ha...p.....s.h.c..chcc.lhpch.slhLpGhs............................................ 0 11 19 20 +8193 PF08361 TetR_C_2 MAATS-type transcriptional repressor, C-terminal region Fenech M anon Pfam-B_3020 (release 18.0) Family This family is named after the various transcriptional regulatory proteins that it contains, including MtrR (Swiss:Q6RV06), AcrR (Swiss:P34000), ArpR (Swiss:Q9KJC4), TtgR (Swiss:Q9AIU0) and SmeT (Swiss:Q8KLP4). These are members of the TetR family of transcriptional repressors, that are involved in the control of expression of multidrug resistance proteins [1,2,3]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.60 0.71 -4.20 12 1463 2012-10-03 00:15:22 2005-08-11 10:46:56 6 2 937 24 179 672 28 119.10 37 55.81 CHANGED sPLpllRElLIalLpusVs-.+pRtlMEIlFHKCEFVGEMssl.phpcpLhhtsYsRIEpsLpcCIptt.LPssLch+RAAIhhRuhhoGlhENWLFsP-SFDLpp-AtsLVDshl-Mlph ..............................................................DPLphLREhLIhsLpthspp.RpptLhc.I.laHK.CEFs..sEMhsltptpcphshp..s.h.pplc....psLppChpt..thL.sssLsschAsIlh+uhlSGlhpNWL..hsP..p.....uaDLhKpA..shVshlLcMh...................................... 0 27 74 128 +8194 PF08362 TetR_C_3 YcdC-like protein, C-terminal region Fenech M anon Pfam-B_4012 (release 17.0) Family This family comprises proteins that belong to the TetR family of transcriptional regulators. They bear particular similarity to YcdC (Swiss:P75899), a putative HTH-containing protein. This family features the C-terminal region of these sequences, which does not include the helix-turn-helix. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.73 0.71 -4.47 16 1091 2012-10-03 00:15:22 2005-08-11 10:47:06 6 4 871 4 194 637 128 140.80 48 65.60 CHANGED LcslL-sWLpshpsFcsps-PhpsLsuYI+sKlEhSR-hPpuS+lFAsEIhpGAPpl.s.LtppL+thscc+sslIptWlcpG+l.AslDPhHLlFsIWAsTQHYADFshQlpsVoG+sthscsta-pAscslppllLcGhtPc .......................LcpIL-lWL.uPLc.s.F.c.t.-.hs.PltAlp.cYIRhKLElSRDaPpASRLFshEhltGAPhLhc..c..LsucL..Ks.L...l.-cK.u.A.lItuWlc....sG+.l.A.......P.lDPpHLIFhIWAsTQ.HYADFusQVcAVT.G...t.sh.t.s-shFsps.s-sVp+lIlcGlts............................................... 0 30 71 133 +8195 PF08363 GbpC Glucan-binding protein C Wuster A anon Pfam-B_3074 (release 18.0) Family This domain is found in the Streptococcus Glucan-binding protein C (GbpC) and also in surface protein antigen (Spa)-family proteins which show sequence similarity to GbpC [1]. 21.00 21.00 22.60 26.10 20.60 20.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.72 0.70 -5.16 15 406 2009-01-15 18:05:59 2005-08-11 10:50:30 5 32 178 6 30 396 0 263.90 27 26.07 CHANGED EpcKs-..DGaLo.cPpu.QuLVF-s.EPsAplolso...................thtpYsp+ph..phcshcl.pthpssstssushphhushssKtshop.hs.t........VlLc+GpslTsTYTNLpNSpYs....GKKISKlVYpYTlcsoop.ps...+lhlslasDPTlslFsuAaTGssc.+pssl.lcschpFYDEsGp.Ishss...ALlSluSLN+pps........uIEhsKsasGp..FlcIsGSSls.cpsGplYAscohsaKps..GS+ash........SsWDoss......uPsuWYGAGssph.oGsshphTlGupsss.s....................sIWFuhN ...........................................................................................................................................................................ctp..pu.ls.cs.u.psLsapp..EspA.h.s..hps.........................htph.tt.h..th..t...t.h.t.hpps.thht..s.p.th.h.sshsst.p.t................hh..lcpGpshosTYsNLps...upap.....G+K....IoKlh..apY.s.lpss.s.p...ps....ph..hhl.sDP.Thshhhu.......s.psps........p....th.hph.p.hp.Fa..DcsGp...lshsp.........ulhuhuSLNptts.......................phEhlp.s..h..s.sp......al.......IsGSoVsh.p.s.s.s...t......hYusps..phtps......Guphs............................ssWDs.ss.................................us.uaaGAushph.sssphsh....o.h.Gtpsts.s.....................shWFuhN.............................................................................. 0 4 11 20 +8196 PF08364 IF2_assoc Bacterial translation initiation factor IF-2 associated region Wuster A anon Pfam-B_3037 (release 18.0) Family Most of the sequences in this alignment come from bacterial translation initiation factors (IF-2, also Pfam:PF04760), but the domain is also found in the eukaryotic translation initiation factor 4 gamma in yeast and in a hypothetical Euglenozoa protein of unknown function. 20.90 20.90 21.20 20.90 19.40 20.70 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.44 0.72 -7.78 0.72 -3.94 89 1458 2009-01-15 18:05:59 2005-08-11 10:57:39 6 17 1446 0 330 951 291 40.90 51 4.66 CHANGED stc+lTLp.........R+ppopl+pshupG.....+s....KsVsVEVR..KKRshl+ps ...........tPcKLTLp.........RKTpSTl.p.lsu..ouG.............KS...............K.oVQVEVR..KKRTaVKRs...... 0 70 174 253 +8197 PF08365 IGF2_C Insulin-like growth factor II E-peptide Wuster A anon Pfam-B_4175 (release 18.0) Family This domain is found at the C-terminal domain of the insulin-like growth factor II (IGF-2, also see Pfam:PF00049) in vertebrates and seems to represent the E-peptide [1,2]. 21.00 21.00 21.50 22.00 20.20 20.60 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.89 0.72 -4.21 8 168 2009-01-15 18:05:59 2005-08-11 11:00:23 6 3 122 0 22 173 0 53.80 62 28.71 CHANGED Ka.KYDlWQ.KSuQRLRRGlPAlLRARRaRhhAcclcAtcpAp.hHRPLhoLPopcP ......Ka.+..Y-..sWp.puAQRLRRGlPAlLRA++hRp.AcclcAhcpAh.hHRPLIuLPoc.P.............. 0 1 4 10 +8198 PF08366 LLGL LLGL2 Wuster A anon Pfam-B_4088 (release 18.0) Family This domain is found in lethal giant larvae homolog 2 (LLGL2) proteins and syntaxin-binding proteins like tomosyn [1]. It has been identified in eukaryotes and tends to be found together with WD repeats (Pfam:PF00400). 22.00 22.00 23.10 24.30 21.90 20.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.31 0.72 -4.14 12 358 2009-01-15 18:05:59 2005-08-11 11:10:57 8 12 89 0 195 366 1 104.40 45 9.98 CHANGED PhpppsPaG........s.PCKuIpKl.a+ss.+supsFlIFSGGMPpsshGc+.pCloVhp.upspssL-hsppllDFhslsps..sschp-PhAlsVLLEc-LVllDLpssGaP ............................hpshhPa.G..........P.PCKsI.Kl....a..+os..c.s..up..sFlIFSG................Ghsh...ss.hGcR..slTVh+..G+stssL...-hsppllDFhTl.......s..........-s.s..h...............s..................s.-...........a......p-.....P......aAlVVLLEc-LVVlDLppsGaP........................ 0 45 59 124 +8199 PF08367 M16C_assoc Peptidase M16C associated Wuster A anon Pfam-B_3062 (release 18.0) Family This domain appears in eukaryotes as well as bacteria and tends to be found near the C-terminus of the metalloprotease M16C (Pfam:PF05193). 25.00 25.00 25.90 25.40 24.90 23.90 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.32 0.70 -5.53 22 854 2009-01-15 18:05:59 2005-08-11 11:13:51 6 17 730 10 360 815 243 242.30 23 25.25 CHANGED hpPcpshppcpspcEppcLcphhppLoEp-pcpIhcpuhpLpphQpppp...sLs.sLPsLslsDlscpscphslpppp.sshplhh+shsTNsIsYhchhhsls.slPt-hhPalsLassslspl.GTtshsYpphtppIphpTGGluhusplhssspsspphp.thtlsupALscpsschFplhp-llspscFsctc....Rl+.LlpphtuphssulssSGHsaAhshusupho.sutlsEphuGls.lchlpcL .............................htPctshtpcppptp....ppcLpph....pts.L......o......c......c......-......hpp..l..hc.ps.tp.L....pph..Qp..p.c.............sls...pL...P..t........Lp..lpD.l....s..p..p..h.....p.........h.h.............h...p.................p......p.......h...............s.........s.............h....s...............l........hh.p.ph....T..NGlsYh..phhh........s........l........s...s........l........s....p....-.....h...............sa...lsLhsp.hlsp..............l.G.Ttph.s.a.tc.l.....pphlphpT.G.............G.lsh.....s...hp.....h.............h......s....s.........h....p......s.........h........s......p.........h...............p...s...t..........hh......l........su+s....Ls...pphschhcl.h.p-ll..........pp..sc...F.s...-pc.......Rl.+pllpptps..ph.pss.lhsuGHthAhtp.usuth.o.sut.h......p-.thsGls.hphlppl................................................................................................................... 0 144 229 310 +8200 PF08368 FAST_2 FAST kinase-like protein, subdomain 2 Vella Briffa B, Fenech M anon Pfam-B_2858 (release 10.0) Family This family represents a conserved region of eukaryotic Fas-activated serine/threonine (FAST) kinases (EC:2.7.1.-) that contains several conserved leucine residues. FAST kinase is rapidly activated during Fas-mediated apoptosis, when it phosphorylates TIA-1, a nuclear RNA-binding protein that has been implicated as an effector of apoptosis [1]. Note that many family members are hypothetical proteins. This subdomain is often found associated with the FAST kinase-like protein, subdomain 2. 26.90 26.90 26.90 26.90 26.10 26.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.05 0.72 -3.61 22 367 2009-01-15 18:05:59 2005-08-11 11:15:47 7 6 76 0 185 347 0 87.50 27 13.55 CHANGED phph+LhpLstslpLEsP-a......psPhL.s...p........hspcstphpsh.ppltcsLpplL.Guppth+hsVhTsasaslDhEshLDpc.tpsLP ......................................hhtpLhpLstslpLE....tPtY......pGPh.L.tt..p.......hhspps.tph...shpppltcsLppLL.Gupph....hptslhsPasa........s.l..DhElhlDpp.tpsLP........... 0 26 39 86 +8201 PF08369 PCP_red Proto-chlorophyllide reductase 57 kD subunit Wuster A anon Pfam-B_2047 (release 18.0) Family This domain is found in bacteria and plant chloroplast proteins. It often appears at the C-terminal of Nitrogenase component 1 type Oxidoreductases (Pfam:PF00148) and sometimes independently in bacterial proteins such as the Proto-chlorophyllide reductase 57 kD subunit of the Cyanobacterium Synechocystis. 27.30 27.30 27.90 27.60 26.30 27.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.22 0.72 -4.09 132 610 2009-01-15 18:05:59 2005-08-11 11:17:14 5 21 408 2 163 571 349 46.20 42 10.64 CHANGED Wss-ApttLc+.l....Ph.FVR....t+lR+ssEphApppGhsplTh-hlhp.A+ ........Wss-ActELpK.I....Ph.FVR....s+l++NTE+aARcpGhpplTlEshhsAK... 0 45 93 131 +8202 PF08370 PDR_assoc Plant PDR ABC transporter associated Wuster A anon Pfam-B_2126 (release 18.0) Family This domain is found on the C-terminus of ABC-2 type transporter domains (Pfam:PF01061). It seems to be associated with the plant pleiotropic drug resistance (PDR) protein family of ABC transporters. Like in yeast, plant PDR ABC transporters may also play a role in the transport of antifungal agents [1, also Pfam:PF06422]. The PDR family is characterised by a configuration in which the ABC domain is nearer the N-terminus of the protein than the transmembrane domain [1]. 22.70 22.70 23.30 23.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.01 0.72 -4.46 38 427 2009-01-15 18:05:59 2005-08-11 11:20:17 6 26 40 0 289 452 0 64.60 40 4.80 CHANGED Wsphhsss..s....pTLGtslLcuRGlas-shWYWIulGALlGFsllFNhlaoLALsaLpPhs+upullS .............Wpp..sss...s....polGhtlLcuRGlFs-s..hWYWIGlGALlGaslLFNllFslALsaLs..Phscspshl..................... 0 35 197 250 +8204 PF08372 PRT_C Plant phosphoribosyltransferase C-terminal Wuster A anon Pfam-B_3195 (release 18.0) Family This domain is found at the C-terminus of phosphoribosyltransferases and phosphoribosyltransferase-like proteins. It contains putative transmembrane regions. It often appears together with calcium-ion dependent C2 domains (Pfam:PF00168). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.87 0.71 -4.72 29 485 2012-10-01 21:10:52 2005-08-11 11:26:41 5 13 121 0 278 450 4 140.40 36 18.93 CHANGED LPTlFLYhFhIGlWp..YRhRPRtPP.HMDs+LSpA-u..spPDELDEEFDoFP.To+ssDlVRhRYDRLRoVAGRlQoVVGDlATQGERlQALLSWR..DPRATulFlhhCLlsAlllYssPh........+llsllsGh....YhlRHP+FRs.+hPSsPhNFFRRLPupoDshL ..........................................................................................hs.s.-s..h.p..DElD.E..E.hD.shs..ss.c......chl+h..RYctlpsVuu+.lQsllG-lAo.GERlpslhsWp..sPchoslhllh..h.hlusll..LYhsPh..........+hl.sllhGl...........ahhp+......+hRs.....t.h...P...s.....l..sF..hpRlPucsp................................................ 0 51 143 213 +8205 PF08373 RAP RAP domain Fenech M anon Pfam-B_5583 (release 17.0) Domain This domain is found in various eukaryotic species, where it is found in proteins that are important in various parasite-host cell interactions. It is thought to be an RNA-binding domain [1]. The domain is involved in plant defence in response to bacterial infection [2,3]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.82 0.72 -4.18 79 594 2012-10-11 20:44:44 2005-08-11 11:27:25 5 25 110 0 425 598 10 58.80 25 8.24 CHANGED lElsGspHa..............hpso.pphp......spsth+p+hLpthG.apl.lplsaa....................-Wpph...sppp+hp.Y..lpch ..............................h.s.ppF.......................................................................spss..pphh..............Gppsh+cRpL..........p.t.h.G..a...pl.lplsaa....................EWpph.....pspppphp.YL+p............................................ 0 160 211 306 +8206 PF08374 Protocadherin Protocadherin Wuster A anon Pfam-B_4100 (release 18.0) Family The structure of protocadherins is similar to that of classic cadherins (Pfam:PF00028), but particularly on the cytoplasmic domains they also have some unique features. They are expressed in a variety of organisms and are found in high concentrations in the brain where they seem to be localised mainly at cell-cell contact sites. Their expression seems to be developmentally regulated [1]. 25.20 25.20 25.50 25.20 25.00 25.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.57 0.70 -4.71 7 319 2009-01-15 18:05:59 2005-08-11 11:47:02 6 13 55 0 124 322 0 198.20 47 21.31 CHANGED oluNuollps.lt+SlcTPLspsIussspst.u..pphlsIhlullAGshsVlLlIhlsshlR.CRpspp+puhQtGKp..sp-ahoP..........ptps+p....pKt..KKcKK...........pKSsKs.h.shVTlcts+s--tht-phshplsL..-hpppohu+a.....hPssapPs............SPDLARHYKSuSP.PuhQLpPpoPsA.sKKHpllQ-LP.sNTFVGus..................ucssSouSDpaSs.pCposs.pa ..........................oluNuohlpsLl.t+Shc.TPLs.sIu.sss.p.t.s..pphlsIhlulVAGshsVlllIhlssll.R.CR.....psp.php.....uhQtuKp..sp-ahoP..........p.csKp.........pKt...KKc..K.K...........pK...SPKs.h.shVTlEtsKs--tst-t.hshplsL....-h-ppohu+a.....hPssatPs............SPDLARHYK.SuSP.PuhQLpPpoPsu.sKKHpllQ-LP.sNTFVGu...................ucssSouSDpaSs.pspops.ta..................... 0 5 16 57 +8207 PF08375 Rpn3_C Proteasome regulatory subunit C-terminal Wuster A anon Pfam-B_4098 (release 18.0) Family This eukaryotic domain is found at the C-terminus of 26S proteasome regulatory subunits such as the non-ATPase Rpn3 subunit which is essential for proteasomal function [1]. It occurs together with the PCI/PINT domain (Pfam:PF01399). 21.20 21.20 22.90 21.80 19.30 18.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.39 0.72 -3.57 24 366 2009-09-11 08:23:08 2005-08-11 11:49:06 6 6 295 0 240 357 3 65.90 51 13.11 CHANGED DlYSTp-PppsFcpRIpFCLpLHN-uV+AMRYPssppcpp.cpt.pppp-c-p....-ltccls-s.-hD..D ......DlYuTpEPQhAFHpRIpFCLslHN-uVKA......MRaPsssapp...-..lcsA...c-pREREp..-..ElAKEhsEt..D.D................................. 0 84 136 202 +8208 PF08376 NIT Nitrate and nitrite sensing Wuster A anon Pfam-B_37103 (release 17.0) Family The nitrate- and nitrite sensing domain (NIT) is found in receptor components of signal transducing pathways in bacteria which control gene expression, cellular motility and enzyme activity in response to nitrate and nitrite concentrations. The NIT domain is predicted to be all alpha-helical in structure [1]. 20.40 20.40 20.50 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.53 0.70 -4.85 118 931 2009-01-15 18:05:59 2005-08-11 12:01:15 5 47 468 2 424 987 49 239.30 17 31.40 CHANGED husLlHpLQcER...........GhSusal....uSp......G.t...th....sspLtspRptoDpthsphpsthp........ph.ts.stt...........htpplsphhptLsp.LsslRppl.....Dshs.lsssps.hshYo.pllstlLsllsplsptsscsplspthsAhhshhpuKEtAGpERAllussaus.....sph.ssshhpchhshlupQpshh.cpFtsh..ussptpphapphhss...sshpp...lpphRphshs...........pss........tts.hs.hsupp.WFsts.Tp+I.....shl+pl.Esplss..plhpts ........................................................................h.spllptLQpER.............................sho.ssaL.............uus.......s...p..th........tsplppp+.tpoDpshsphppthp...........ph.tt.st............htptlsphhp.t.lsp.LsslRpp..l........................ss..tp...hs....s......sp.s....h.sh.Yo.pll..pt...l.......l.sh.h.t.......p.hs.....p....t...s......s...s.spl...sp...th...t.....u...h...h.......s.....l.......h.....p.....u+EhsutpRu.lhsss.hst..............................sp.h...sss...t....hpp......h.h....shhspp......p.....t....h.......h..pp.htsh..............us..s..p.t.....pphhpp.hhss.......sshpp......hpp...h.pp...th..hs...................................pss............tts..ht.....hs.s....pp...Whsh....totpl....sh....hppl.cpplhtph....h.................................................................................................................... 1 142 295 380 +8209 PF08377 MAP2_projctn MAP2/Tau projection domain Wuster A anon Pfam-B_26981 (release 17.0) Family This domain is found in the MAP2/Tau family of proteins which includes MAP2, MAP4, Tau, and their homologs. All isoforms contain a conserved C-terminal domain containing tubulin-binding repeats (Pfam:PF00418), and a N-terminal projection domain of varying size. This domain has a net negative charge and exerts a long-range repulsive force. This provides a mechanism that can regulate microtubule spacing which might facilitate efficient organelle transport [1,2]. 18.20 18.20 18.30 18.30 15.50 18.10 hmmbuild -o /dev/null HMM SEED 1134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.07 0.70 -14.11 0.70 -7.01 5 79 2009-01-15 18:05:59 2005-08-11 12:08:39 5 7 38 0 30 63 0 731.30 53 59.19 CHANGED DKVADVPVSEATTVLGDVHSPAVEGFVGENISGEEKGTTDQE...KKETSTPSVQEPTLTETEPQTKLEETSKVSIEETVAKEEESLKLKDDKAGVIQTSTEHSFSKEDQKGQEQTIEALKQDSFPISLEQAVTDAAMATKTLEKVTSEPEAVSEKREIQGLFEEDIADKSKLEGAGSATVAEVEMPFYEDKSGMSKYFETSALKEDVTRSTGLGSDYYELSDSRGNAQESLDTVSPKNQQDEKEL.LAKASQPSPPAHEAGYSTLAQSYTSDHPSELPEEPSSPQERMFTIDPKVYGEKRDLHSKNKDDLTLSRSLGLGGRSAIEQRSMSINLPMSCLDSIALGFNFGRGHDLSPLASDILTNTSGSMDEGDDYLPPTTPAVEKIPCFPIESKEEEDKTEQAKVTGGQTTQVETSSESPFPAKEYYKNGTVMAPDLPEMLDLAGTRSRLASVSADAEVARRKSVPSEAVVAESSTGLPPVADDSQP.VKPDSQLEDMGYCVFNKYTVPLPSPVQDSENLSGESGSFYEGTDDKVRRDLATDLSLIEVKLAAAGRVKDEFTAEKEASPPSSADKSGLSREFDQDRKANDKLDTVLEKSEEHVDSKEHAKESEEVGDKVELFGLGVTYEQTSAKELITTKETAPERAEKGLSSVPEVAEVETTTKADQGLDVAAKKDDQSPLDIKVSDFGQMASGMSVDAGKTIELKFEVDQQLTLSSEAPQETDSFMGIESSHVKDGAKVSETEVKEKVAKPDLVHQEAVDKEESYESSGEHESLTMESLKPDEGKKETSPETSLIQDEVALKLSVEIPCPPPVSEADSSIDEKAEVQMEFIQLPKEESTETPDIPAIPSDVTQPQPEAlVSEPAEVRGEEEEIEAEGEYDKLLFRSDTLQITDLLVPGSREEFVETCPGEHKGVVESVVTIEDDFITVVQTTTDEGELGSHSVRFAAPVQPEEERRPYPHD.EELEVLMAAEAQAEPKDGSPDAPATPEKEEVPFSEYKTETYDDYKDETTIDDSIMDADSLWVDTQDDDRSILTEQLETIPKEERAEKEARRPSLEKHRKEKPFKTGRGRISTPERK.VAKKEPSTVSRDEVRRKKAVYKKAELAKKSEVQAHSPSRKLILKPAIKYTRPTHLSCVKRK ..........................................................................................................................................................................................................................................................................................................................t....t.......t...t......p.......p.p.h...s.tchph.h..t...D+SGMStYFETSsLK--h..sc..s..t...uSDYYELSss+t...-..sh...t.....p.tp.pt.............sh.EhuYSTLspsh..............st-ph.TlsPplht-Kp-..hhsKNKDD..LpLSRSLGLGGRSAIEQRSMSINLPhSCLDSluLGhshGRu+sLSPLAoDILopTSGShDEusD.YLPsTTPul-KhPsFP...h-stt.......t..............+.....s.t..p......p...sESP..ApphYKNGsVhuPDLPEMLDLsGoRSRLuS.ss-sEhsp+KSs.u-....h.-sts.t.hs.hssps...h.Ks-uQhE-hGYCVFscYosPhPSPsps......t...........................................................................................................................................................................................................................................................................E.s..+-..+..E.EhKEKs.sKPDLVHQEA...hDtE-sYp.oGtt....c..................t.....t...c.......c...p...p..........p...........p....p...t........pc....................h...h.t.......t.................t.....t........p.......t.p....c..h.t-...........s...-...................E.+uslESVVTlEDDFITVVQT.hD-uE.uuHSVRFus..p.-..ph..........t..cE.p.....-sp.Es......p-.sP...-sPAsPp+E.Els.SEhhTEoYDDYKDETTIDDSIMDsDSLWVDTQDDDRSIhTEQLETlPKEE+.A.......-K-.......sR....RsSL-KH.RKEKPh.Ko...G...R..GRI..STPERK.lAKKEPST.V.SRDEVRRKK..AVhKKAELuKKoElQuHSPSRKhILKPAlKaTRPT.HhSCVKRK................................................................ 1 2 5 15 +8210 PF08378 NERD Nuclease-related domain Wuster A anon Pfam-B_9750 (release 17.0) Family The nuclease-related domain (NERD) is found in a range of bacterial as well as archaeal and plant proteins. It has distant similarity to endonucleases (hence its name) and its predicted secondary structure is helix - sheet - sheet - sheet - sheet - weak sheet/long loop - helix - sheet - sheet. The majority of NERD-containing proteins are single-domain, but in several cases proteins containing NERD have additional domains which in 75% of cases are involved in DNA processing [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -11.32 0.71 -3.82 377 1767 2012-10-11 20:44:44 2005-08-11 14:40:20 6 64 1278 0 464 1715 271 125.70 17 30.27 CHANGED pGttuEpts..t....phL.....pt.h.pp....h.h.h......ts.h.h.......................tt..pspIDhlllss.tt..lhllEsK....s.............hp.G......h....t..........h.....................................thts...Plpp.s....ppphptl....pph...l........................................h......hpslllhs ................................................uE..h.h.phL............pt.l.ppt...hhl.h.......ps.lhl...............ttspttpIDalllst..pG..lhllEsK......s.........................hp..G....p..lh....ss........tp..s....h.................................................h...t............................h.phts.......PlpQ..s..........tpphphl.....tph....l...........................................................h................................................................................................ 0 152 325 415 +8211 PF08379 Bact_transglu_N Bacterial transglutaminase-like N-terminal region Fenech M anon Pfam-B_2190 (release 18.0) Family This region is found towards the N-terminus of various archaeal and bacterial hypothetical proteins. Some of these are annotated as being transglutaminase-like proteins, and in fact contain a transglutaminase-like superfamily domain (Pfam:PF01841). 21.50 21.50 21.60 21.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.48 0.72 -3.67 188 1621 2009-01-15 18:05:59 2005-08-12 17:21:43 5 8 789 0 617 1553 423 80.90 28 16.48 CHANGED phplpHpTpYpYspPVshuhpplRLpP..pssst.........Qp.llsaplplsPtstphpp.hpDhaGNtlphh.sh.ppspp.c.lp...lpspupV-h ..............................hplpHpTpYpYsps.Vphu.phlRLpP.tspsp.........Qp.llsaplplp.......Pts....t...........hhst.tpDsaGNhlsthsh.spspp..p.Lp...lpspuhV-............... 0 164 370 493 +8213 PF08381 BRX DZC; Transcription factor regulating root and shoot growth via Pin3 Fenech M, Coggill P anon Pfam-B_2116 (release 18.0) Domain The BREVIS RADIX (BRX) domain was characterised as being a transcription factor in plants regulating the extent of cell proliferation and elongation in the growth zone of the root [1,2]. BRX is rate limiting for auxin-responsive gene-expression by mediating cross-talk with the brassino-steroid pathway. BRX has a ubiquitous, although quantitatively variable role in modulating the growth rate in both the root and the shoot [3]. The family features a short region of alpha-helix, approximately 60 residues in length, which is found repeated up to three times [1]. BRX is expressed in the vasculature and is rate-limiting for transcriptional auxin action [4]. 24.50 24.50 24.70 26.80 24.10 24.00 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.98 0.72 -4.91 34 367 2009-01-15 18:05:59 2005-08-15 09:27:43 6 31 36 0 237 374 0 56.60 49 10.58 CHANGED ssptEWl-QhEPGVYlThsshssG.sptLKRVRFSRcpFs.cppAcpWWpENps+lhcpYs .....t..ppEWlEQsEPGVYITls..sLs.s......G.sp-LKRVRFS.....RcpFs.chpAcpWWtENps+lhcpYs...... 0 32 143 192 +8215 PF08383 Maf_N Maf N-terminal region Fenech M anon Pfam-B_3103 (release 18.0) Family This region is found in various leucine zipper transcription factors of the Maf family. These are implicated in the regulation of insulin gene expression [1], in erythroid differentiation [2], and in differentiation of the neuroretina [3]. 20.20 20.20 20.30 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.60 0.72 -4.40 9 171 2009-09-11 10:53:55 2005-08-15 09:39:28 6 2 45 0 78 126 0 34.90 59 11.59 CHANGED spLE-LYWM..us..h.QQhs.PEuLsLTPEDAVEALIGs ...spLEDLYWM..uu...h.QQlN.PEALsLoPEDAVEALIut....... 0 5 16 35 +8216 PF08384 NPP Pro-opiomelanocortin, N-terminal region Fenech M anon Pfam-B_1053 (release 18.0) Family This family features the N-terminal peptide of pro-opiomelanocortin (NPP). It is thought to represent an important pituitary peptide, given its high yield from pituitary glands, and exhibits a potent in vitro aldosterone-stimulating activity [1]. 20.50 20.50 20.50 20.60 20.40 20.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.54 0.72 -4.33 20 577 2009-01-15 18:05:59 2005-08-15 09:51:58 5 4 358 0 22 546 0 23.50 84 11.75 CHANGED QCWE.so+C+DLsoEsslLECIcsC+sDLoAEoPlaPGNGHhQPho ................................SAESPVFPGNGHMQPLS.......... 0 1 3 8 +8217 PF08385 DHC_N1 Dynein heavy chain, N-terminal region 1 Fenech M anon Pfam-B_3094 (release 18.0) Family Dynein heavy chains interact with other heavy chains to form dimers, and with intermediate chain-light chain complexes to form a basal cargo binding unit [1]. The region featured in this family includes the sequences implicated in mediating these interactions [2]. It is thought to be flexible and not to adopt a rigid conformation [1]. 22.20 22.20 22.40 22.20 21.60 22.10 hmmbuild -o /dev/null HMM SEED 579 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.78 0.70 -6.14 40 1478 2009-01-15 18:05:59 2005-08-15 10:01:37 7 120 275 0 1051 1462 56 440.20 18 14.51 CHANGED DsshlppLpssVspWh+pIpplhphsps.st......sostsElpFWpshppsLppItpQLpu.cVphslclLcpu..Kp.phhssFps.t.slccuhscspchsphlcsh..hs-hL.tss....s..hsplppslsslhpplphhaphop..hs.pRhhsLlptIssplhpphpphL....ssp....p.lhp.h....-hs-hpphlps.shplhptaccpacchpshlcp.....tpccs.....ppaphp..tlhs+.hsphppRlppltphppsppph.pplhpt..................s.pst.hspslp...phtc.....-hppsacshpsls..sLDhssp...papsshppapp+hpclEpplsshlpct.h.sss.csspphF+llspFpsLh.hRPp.......Ipsslp.-htspLlcphcp-lcplpphFppph....p.tttphshh+shPs....luGsIhWs+pLpc+Lpph.hcplc...slh..s.....h.pps-.GpclppchsphpppLsp..pphappWhp...plppps..hslstsll..........pstsphcLh.VNF-spllpLh+El+hLthl..................s.........h..plPtshtplhpptcplhshtssLpchlcsasplhp.pl..pp.....hpt......Lltsphpclpphl.ptGls.lsWpsht..lptahp ........................................................................................................................hh..hpt.h..W.ptlpt...hht..p.t...........................s.s.tE.hpaW...th..............tth..l.pph.pt.t.h.hh...ht.hLp................pp................hh..........h..t........t.l.tth.pspp....hhtsh......p.l...t.....................p......p..h..p..h..l..lhphlph.hht.......sp........s....pRh....h...Lhpt......l...s...p.hhp.....hh...thl............s.t......p.lhp..............................ht.....h..phht...shphhp.app..h......th.pt..h..........t.t...............................t.a..ph.......hl..p.hp.h.pRht.p.l.....p..hhp....h.ph...th.....................................................................................................t....th........th.t..................php.t.h.h..p..h.p..p.h....p..............hc.ptt.............ta.p.t....shtt.....app.ph......tt....l-tplts......hhp.........pt.h.tph...t.sh..pp...................hh...ph...h...t.h..a..tt...lh........c.h.................lttt.....h..th...l..ht.h.tp-lpt.h.pp..........app...ph.......................tt......h...ps.........hP..........huut.lhWsppl.p+........l.pt..hphhp...........th.................................h.p....p...s........pth.......hpp....hpp.......h.......ht.lt.p.............t..haptWhp..........php....p.........................t...h....p....t..lh..........................................t.tthtl.............lNas.plhtlh.cEsc.h..h...............................................................t.........h..plP..h.phh.t..ptp..p..hh....h...tL..........hh.p.ap..phhp..th...............................Lh....t..htthp.th.l.....t.............u...h.........ltWps...........t........................................................... 0 416 544 845 +8218 PF08386 Abhydrolase_4 TAP-like protein Fenech M anon Pfam-B_3096 (release 18.0) Family This is a family of putative bacterial peptidases and hydrolases that bear similarity to a tripeptidyl aminopeptidase isolated from Streptomyces lividans (Swiss:Q54410). A member of this family (Swiss:Q6E3K7) is thought to be involved in the C-terminal processing of propionicin F, a bacteriocidin characterised from Propionibacterium freudenreichii [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.41 0.72 -4.13 38 1197 2012-10-03 11:45:05 2005-08-15 11:43:12 5 9 575 0 480 3862 878 102.40 26 19.93 CHANGED sPh..aGshhstshh..CssW............Ps..ssstshsphs.s...s...s.sslLllssptDPsTPapsApchsctLss..u....tllohpGt.GHsshh.ssspClsptlssYLhsGphPt.....tsssC ............................................................huthhu.s.h...Ct..h..W..............................Ps...tss..t...h.t...ht..s.....ts............s..sPlLllus.p.tDPsT...Ph.p......s.A.p.p.h.upp..h.ss....u........hl.l...o..hcGs...G...Hsu...h.........h...ts...........s......t...C...s.....s.ps......l.ssY.L.h.s.G.phPt......tshhC.................................... 0 153 319 425 +8219 PF08387 FBD FBD Fenech M anon Pfam-B_1153 (release 18.0) Family This region is found in F-box (Pfam:PF00646) and other domain containing plant proteins; it is repeated in two family members. Its precise function is unknown, but it is thought to be associated with nuclear processes [1]. In fact, several family members are annotated as being similar to transcription factors. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.48 0.72 -4.50 129 806 2009-01-15 18:05:59 2005-08-15 11:49:10 5 32 19 0 451 833 0 49.30 25 12.21 CHANGED ta...p..sslPcCLhopLchlph.p...........tapuppp.c.hcls.pYlLcNuphL.Kchslph ................................................s.cCl.spL.cplph.p............sapGpps..-..lphs..pallpNupsL.cphsl............. 0 116 212 319 +8220 PF08388 GIIM Group II intron, maturase-specific domain Fenech M anon Pfam-B_4063 (release 18.0) Family This region is found mainly in various bacterial and archaeal species, but a few members of this family are expressed by fungal and chlamydomonal species. It has been implicated in the binding of intron RNA during reverse transcription and splicing [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.88 0.72 -4.19 164 2393 2012-10-02 14:46:49 2005-08-15 11:52:43 6 14 1025 0 536 2190 222 80.40 24 19.71 CHANGED l+phpp+l.+p.lh.+ptps.......hshpplIppLNPllRGWssYaphs.sspph..applDphlhp+LhpWth++ap..p.......ht...........pphhhp+.....ha ...............................pphpp+l...+p...lh....+.ptt.s...........hshpphlc....p.LN.hl+GWhsYaph..s...ss....pph...hpplD.phlhp+Ltthhh+ca+...p.......ht...........hh.........hh.......................................... 0 205 383 464 +8221 PF08389 Xpo1 Exportin 1-like protein Fenech M anon Pfam-B_4058 (release 18.0) Family The sequences featured in this family are similar to a region close to the N-terminus of yeast exportin 1 (Xpo1, Crm1, Swiss:P14068). This region is found just C-terminal to an importin-beta N-terminal domain (Pfam:PF03810) in many members of this family. Exportin 1 is a nuclear export receptor that interacts with leucine-rich nuclear export signal (NES) sequences, and Ran-GTP, and is involved in translocation of proteins out of the nucleus [1,2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.74 0.71 -4.29 59 1456 2012-10-11 20:01:01 2005-08-15 12:46:41 7 30 320 21 1051 1452 11 147.50 19 14.93 CHANGED pphlhsc..Lshslsplhhp.a....sppWps..hls-ll...shhps.s...............shsthhlplLphLsEElh-....htp..sphspp+.ppcl+ctlpsp......hpplhphhhplLpp.ts..................splhptsLcslssalp.....Wlslshlh.....s.s...llshl.hphL......s.sph..........ppsAhcCL .........................................................t..hlhs+Lshsls.p.lhhppa......spp.Ws.s..hls-ll.........ph..hps..s................................................ststhhltlLphLsEElhs...................htp....tphspp+..pppl+.....p....t.....lppp.........hppl..h....phhh.....p.llppttp................................................splhptsLc.sltpalp........alslshl..h.................p..st........ll.p.hl.hp.hl..........p....h...........ht.shcs...................................................................................... 0 370 587 863 +8222 PF08390 TRAM1 TRAM1-like protein Fenech M anon Pfam-B_3108 (release 18.0) Family This family comprises sequences that are similar to human TRAM1 (Swiss:Q15629). This is a transmembrane protein of the endoplasmic reticulum, thought to be involved in the membrane transfer of secretory proteins [1]. The region featured in this family is found N-terminal to the longevity-assurance protein region (Pfam:PF03798). 25.00 25.00 25.00 25.00 22.70 24.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.04 0.72 -4.47 61 512 2009-01-15 18:05:59 2005-08-15 12:53:12 6 5 230 0 347 489 0 65.70 30 16.18 CHANGED hsphFltlsYph................sssshYspGhcDhsaVhFYhlhhohlRthlhcallcPlu.+thslppp+p ...................................................hthFlhlpYpl...............................................sssshYst.GhcDlshVhFYhllhshlRshlh-alLcPlu.+phtlppp+.............. 0 80 162 266 +8223 PF08391 Ly49 Ly49-like protein, N-terminal region Fenech M anon Pfam-B_1187 (release 18.0) Family The sequences making up this family are annotated as, or are similar to, Ly49 receptors (e.g. Swiss:P20937). These are type II transmembrane receptors expressed by mouse natural killer (NK) cells. They are classified as being activating (e.g.Ly49D and H) or inhibitory (e.g. Ly49A and G), depending on their effect on NK cell function [1]. They are members of the C-type lectin receptor superfamily [2], and in fact in many family members this region is found immediately N-terminal to a lectin C-type domain (Pfam:PF00059). 30.00 30.00 30.50 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.67 0.71 -3.97 27 249 2009-01-15 18:05:59 2005-08-15 12:57:01 5 4 35 18 46 271 0 113.20 44 46.66 CHANGED SVPWpLIVlsLGILChLLLVoVAVLsTpIFQYtQEKHph.pEsL.NhppphssMQsD.pLKEEhLpNKolECs.hps.......hL-sLpREQpRhapKTKohhsS.QcT.............G+tsEh+WFCaGlKCYYF ..........SVPWpLIVlsLGILChL.LLVhVsVLssp.IFQhtQ.cK+p.h..pEhL.Npppphs.....h.MQsDhpLK....EE.h..LpNKSlEsp.hpp.......hL-Sl.p+.-..QpRh.....apc..s+slh....c...o.pcT..............G+hs...c..aW.CaGhpCYYF......................................................................... 0 1 2 4 +8224 PF08392 FAE1_CUT1_RppA FAE1/Type III polyketide synthase-like protein Fenech M anon Pfam-B_1177 (release 18.0) Family The members of this family are described as 3-ketoacyl-CoA synthases, type III polyketide synthases, fatty acid elongases and fatty acid condensing enzymes, and are found in both prokaryotic and eukaryotic (mainly plant) species. The region featured in this family contains the active site residues, as well as motifs involved in substrate binding [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.77 0.70 -5.34 37 707 2012-10-02 12:25:54 2005-08-15 13:22:38 7 11 136 0 353 4319 396 250.90 42 57.47 CHANGED ahhpRPR.s...VYLlDauCY+Psccp+lohpthhchsctssph.spcs.........lcF.p+llpRSGLG--TYlPculhphPsp..s..ohtpARpEuEtVhFuAlDcLFt+T......s.lpPc.-IGlLVVNCSlFsPTPSLSuMlVN+YKhRuslpSaNLuGMGCSAGlIulDLA+sLLp.sp..NoaALVVSTEslo.NWYhGscRSMLlsNCLFRhGGAAlLLSN..+st-+c.RuKYcLhHsVRTHpGADDcuapClhQcED-pGphGVuLS..K-LhsVAGcALKsNIsTLGPLVLPhSEpLhFhss.llt+Kl ..................................................................................................................hpRs+..s....lYLlDauCacPs.sp.h.....+........sshtthhch..p.h....t.......ph.....stps.....................lcF.p...+...ll...p...+SG.....l.......G.-.....c.........T...a.....h........P.......u...l.h...t.....h......P.sp................sh.t..t...u.........R........p........E..s..-....Vh....a........u.u.l...-.p....L.h.t+.T........s...lp.......P........c..-ls.l.L.l.V.N......s..S........h...F..s.P...T...P....S.Lo..uM.l...lN...+.Y.+h.R.s.s....l.t..o.a.NLuGM....GCSAGlI..ulD.LA+sl.L....p....sp.......s..s......hAlV....VSTEs..l.....o.............s...a.....Y.....h.....G....s.....p.....R.....u.......M.....L.....l.sNC.LFRh....G.G.A..Al.LL.S..N..........+........t.........t......t....t...+.............u.K..Y.....c.Lh...+..sV.RTHp..G..As..DcuapClhQcE.....D....p....p......G...p.hGlsL..S..K.-Lht.lAGc.AL+sNI.ss.LG..PlVLPhSEplhFhhs.hlh++............................................................................................................. 0 72 218 296 +8225 PF08393 DHC_N2 Dynein heavy chain, N-terminal region 2 Fenech M anon Pfam-B_3094 (release 18.0) Family Dyneins are described as motor proteins of eukaryotic cells, as they can convert energy derived from the hydrolysis of ATP to force and movement along cytoskeletal polymers, such as microtubules. This region is found C-terminal to the dynein heavy chain N-terminal region 1 (Pfam:PF08385) in many members of this family. No functions seem to have been attributed specifically to this region. 23.50 23.50 24.00 23.50 23.00 23.20 hmmbuild -o /dev/null HMM SEED 408 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.40 0.70 -5.96 94 2363 2009-01-15 18:05:59 2005-08-15 13:39:22 8 259 303 14 1713 2264 96 354.20 26 10.66 CHANGED ppLpplpp-lpshpplWphhpphpp.phppatp.tsa.ppl...sspplcpplp...p.hhcphpplscph...pp...........hplhpplpp....plcpapphlPllps.L.+stuh........+...p.....R...HW.pplhp.hls.......sh.......t...ps..hoLpplls.hsl.h..patcplpcIsspAppEhslEptLpclcspWps......hpFphhsacs....p.....shllps..h--lhphL--phsplpohpsS........ahp....Fcpc.sppWcpcLshlpcll-hWhplQ+pWhYLEsIFsu..p..DIpppLPpEspcFppl-ppap.plhpps.ppsspllcss.t...sp.hhppLpphtcpL-plQKuLscYLEpKRptFPRFYFlSs--LL-IL..upup-Pp.tlQpHlpK.hF-uIsplphpppspp.......lhuhhSsE.G.Eplthtp.slphps......pVE..p...WLppl-ppM+poL+phlppulpphp ..............................................................................................................................................h.th.pphp.hpplaph.h..ph.p.th.pt.....hhp.t.a..p..l..........p.p..th...p...tpht........p..h..........p.....p.....h...t...p...h......c.th...pp.......................................hth.h.t...lpp..........pl....cph...h..p...hs..lltt..lps.uh...............+..p....................R..HW.ppl.p.h.ht...........hph.....p..................................ps...h...s.L...tplhp....hsl...h.........ph..tpp....l.pplstt.A.tp...Eh.s.lE....p.t..L.p.p..........htttWps..............................hph.ph....app......................p...........thhl..l.ps.....h-..-.l..hthl--p..htlps.hhsS........a.ht......F.ppp.lppWpp...pLsh......hppl.l....c.WhplQppW.......hYL...EsIF.u..........t...DI..t....pQ....LPp.E.....up.+F.pp..l-...pp.......ap..plM..........p...............ps........ps......s......p.....l.....l............ps............s.........t.........h................................s..hh...............p..........pLp....p.....htp........L...phhpK..u.LspYLE......pK...Rhh.....FP....R....Fa..........F.lSs...s-L..L-IL................u.p..s.p........s.st.....tlQ....Hl.pK.hF-s..ltplphpp.s.p................................hhuhh...S....t...E..G....Eh....l....h.....tp.............l.hps................pV.E..t........WLtpl.........tptMptol+phhtpuh.t..t..................................................................................................................... 0 747 940 1412 +8226 PF08394 Arc_trans_TRASH Archaeal TRASH domain Fenech M, [1] anon Pfam-B_18882 (release 17.0) Family This region is found in the C-terminus of a number of archaeal transcriptional regulators. It is thought to function as a metal-sensing regulatory module [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -8.04 0.72 -3.79 8 65 2012-10-03 05:12:49 2005-08-15 13:58:30 5 6 57 0 34 119 16 36.70 39 22.76 CHANGED CDYCGpEIpG-PIshKh+N+lYYsCCsTCpcchKK+h ...........CDaCGppI..t..s..c..P..l..s.hK..h..ss.+s.YahCCsoChpph+cp...... 0 8 15 27 +8227 PF08395 7tm_7 7tm Chemosensory receptor Robertson H, Finn R, Fenech M anon Robertson H Family This family includes a number of gustatory and odorant receptors mainly from insect species such as A. gambiae and D. melanogaster. They are classified as G-protein-coupled receptors (GPCRs), or seven-transmembrane receptors. They show high sequence divergence, consistent with an ancient origin for the family [1,2]. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.36 0.70 -5.44 78 1580 2012-10-01 21:54:26 2005-08-15 14:17:53 7 10 51 0 884 2288 0 315.20 12 89.70 CHANGED lchhhhhspl.hG..lhsh........phstpptphph.hphhhhhhhhhhhhhhhhhhhhh...h.hhhthhhhptsthhphhhhlhhhlthlshhhhhltthhppppltplhppl....hplppphtptspttt..........phhshhhhhhhhhhhhhhhhshh.............hhhtthhhhshhhlhhhhhhh....hh.hhhhhahhhlhhlh..phhphlpppLpphhppht.......................hpppshtp...............l..pplhphappl.hplspplsphas....h.ll..hhhhhhh.lthshhha..hhhtphh.............p.hhhhhhhl......ahhh.hhhhh......hhhhssstspp....pspcssthl..tph..........htttspphppp..lcpFhlphh.ppph....phsssGhFslspshlhshhuulhoYlllLlQFchtt ..................................................................................................................................................................................h.........h.s......................................h........h.h....hhh..hh..h..h.h...hhhhh..h..hhhh..............................................t....h.....p.....hh...h..........h..............h.h...h...........h...h.h...h.h..h.hh.h.......hh......p.......t..p...p..hh.....ph....h...pp.h...............hphptp.....h...th.th.....................thh.h.hh.....h.h...h.hh.h....h.h.hhh.hhhhh...........................................h..h......h...hh...h...h...hhh..h..hh..hh.......................h.hh......h.......h.h.h....h.h.......h....h.h..lh.............th...ht...hl..pptl...pt.hht....p.........................................tp.p..t.....................................................l....pphhphat.p....l...hp.h.s.p.p....hsp...has....h.hl...hhhhhth...hthh.h.ha......hhhhthh................................hh.h.hhhhh.........................h.hh.h.h.h..hh.......hhhhhs..pt..h.p............................phppht.hh..........hph........................s..tp.h..ppt.........lp....h.....h...th......pp..........thphhs..hh.ls.phhh..thhsshhsahlhhhQh................................................................. 0 345 411 732 +8228 PF08396 Toxin_34 Spider toxin omega agatoxin/Tx1 family Bateman A anon Mondal S, Ramakumar S Family The Tx1 family lethal spider neurotoxin induces excitatory symptoms in mice [1]. 21.10 21.10 24.80 24.70 17.80 17.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.82 0.72 -3.43 7 30 2009-01-15 18:05:59 2005-08-15 16:18:18 5 1 8 0 0 34 0 51.30 46 76.95 CHANGED CIclGs-CDG..cKsDCQCCRcNuYCSC..h...FG.hKsGCKCpVGssuphhulC.+cKppC.sp.....ssspCppsshs+Rp ....ClphGpsCDG..cKsDCQCCtcNuaCSC..h...as.hchs..CpC.ht................................................... 0 0 0 0 +8229 PF08397 IMD IRSp53/MIM homology domain Wuster A anon Pfam-B_4120 (release 18.0) Family The N-terminal predicted helical stretch of the insulin receptor tyrosine kinase substrate p53 (IRSp53) is an evolutionary conserved F-actin bundling domain involved in filopodium formation. The domain has been named IMD after the IRSp53 and missing in metastasis (MIM) proteins in which it occurs. Filopodium-inducing IMD activity is regulated by Cdc42 and Rac1 and is SH3-independent [1]. 20.50 20.50 20.60 20.70 20.40 20.40 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.32 0.70 -5.03 8 584 2012-10-03 12:17:00 2005-08-17 11:36:12 6 17 112 9 305 490 0 190.70 31 31.80 CHANGED Y+sIMpQFN...PuLcNhlshGpsYc+ulpuhlhAucuYaDAlpKlGEhAssSps.S+ELGslLhcMu.sHRplpscLEchhpsFHcpLIspLEcKs-.DpKahssshK+YppEaKp+pcsL-KspSDLhKLc+Kupt...G+sst+hpl+Es..l-slsc+psplpcalucus+cALlEE+RRFCFLV-Kppslupp.huaaucuhshLpspL.sWpptsuDso+lP ................................hp....P.hcphlthupph.ptl....................pshs.Ast..s.......ah-.......Ah.....tK...........lu...-h...A..t...p................op.s..o...+...-..l.......G....ssLhphs.hHR..plpspLcph...h...p..shhppLls.Lp..cphE..t+...h.hs...t....hpKc.a...tp......Ea+pt..tpp.lcKppu.-.hh......KL......p+Kspt....................u+..ss...p......p..hcph.........lp.s....lssc.htl.p..p.h.ppuh...+pALlEERpRaChhlphhpsls................pp.hshhsc.hp...h....Lp..thh..hpthst-spclP............................................. 2 68 101 199 +8230 PF08398 Parvo_coat_N Parvovirus coat protein VP1 Wuster A anon Pfam-B_2198 (release 18.0) Family This is the N-terminal region of the Parvovirus VP1 coat protein. Also see Parvovirus coat protein VP2 (Pfam:PF00740). 26.60 26.60 26.60 26.70 26.50 26.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.17 0.72 -4.18 11 1185 2009-01-15 18:05:59 2005-08-17 11:58:51 5 6 140 0 2 856 0 57.20 59 11.36 CHANGED LPGY+YLGPGNuLDpGcPVNtuDuAA+cHDhuYsp.LKsGcNPYlpaNpADpcFlccLccDTSF .......LPGhpYlGPGNt....L....ps....G.P.P.psssDuAA+hHDhpYuphhK.G.NPYh.hashADpch.cplppt.s............................... 0 1 1 2 +8231 PF08399 VWA_N VWA N-terminal Wuster A anon Pfam-B_2075 (release 18.0) Family This domain is found at the N-terminus of proteins containing von Willebrand factor type A (VWA, Pfam:PF00092) and Cache (Pfam:PF02743) domains. It has been found in vertebrates, Drosophila and C. elegans but has not yet been identified in other eukaryotes. It is probably involved in the function of some voltage-dependent calcium channel subunits [1]. 20.70 20.70 20.90 20.80 20.60 20.50 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.56 0.71 -3.50 11 476 2009-01-15 18:05:59 2005-08-17 12:04:57 6 21 88 0 252 411 0 113.30 37 11.56 CHANGED AEps..ptpHpaptslt.pphpYYsA+hhs-hsc...........-sstsEhu.....hph.hpccspFpN.sVNhohSuVplPTslYpcsstlLNslpWocuL-sVFhcNhccDPoLpWQYFGSuTGhhRhYPAspW .....................................................................Acphp...taph.p.sh......htYasuhh.sch.sp...............t.s.s.-ht.........hch.ht..sspFps..Vsh.shS.s.VplPTslY.pt.ss.llNslpWo..cuLs..pVFhc.....N...hcpD.PoLhWQYFGSuoGhhRhYPu.htW............ 1 68 89 164 +8232 PF08400 phage_tail_N Prophage tail fibre N-terminal Wuster A anon Pfam-B_3101 (release 18.0) Family This domain is found at the N-terminus of prophage tail fibre proteins. 26.60 26.60 26.60 27.00 26.40 26.50 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.58 0.71 -4.45 9 1004 2012-10-02 19:08:27 2005-08-17 14:13:30 5 47 343 0 14 1041 4 125.20 65 25.71 CHANGED MoVpISGlLKDGsGcPVssssIpL+Ap+sSsTVlssTVAos.s-psGpYuhclE.GpYsVhLts-GhsssasGsIsVhsDSpPGTLNsFL.tAhsEsDlpPEllpcFEthstpsscsAusuttssppAspsutsA ......MuVpISGVLKDGsGKPVpNCTIpLKA+RsSoTVV.....VNTVASENPD.EAGRYSMDVEaGQYSVhLLVEGF.P.PSHAGTITVYEDSpPGTLNDFL.GAMoEDDlRPEALRRFEhMVEEsARpAptAppNAupAcpSupsA.................... 1 1 2 7 +8233 PF08401 DUF1738 Domain of unknown function (DUF1738) Fenech M anon Pfam-B_3014 (release 18.0) Family This region is found in a number of bacterial hypothetical proteins. Some members are annotated as being similar to replication primases, and in fact this region is often found together with the Toprim domain (Pfam:PF01751). 20.60 20.60 20.60 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.68 0.71 -4.40 112 907 2009-01-15 18:05:59 2005-08-17 16:38:04 6 25 579 0 235 850 162 122.50 28 26.43 CHANGED +tclY.pclTc....cIIspLEp.G.stPWhpPWp.....tsssshshPhN.toGcsYpGlNllhLWhss.tpGas.....sspWhTa+QAppLG...........upVRKG.E+uosl...laacp..hppc.....pp.t........ps.ptptcpl..shl+tasVF ..................................p..shhpplss.......plI.ttlEp.....G.shPWh+PWp.......tsthshPhN.ho...GctYpG.lNslh...Lh.h.ts..tpGas.........sspWhT.a+Q.Ap.p.h....G....................................................u..pV+..KG..E..+us.sl...laaph......hpcp............t.p.t...................tptptphpth....shh+tapVF............................................................................. 0 46 136 189 +8234 PF08402 TOBE_2 TOBE domain Bateman A anon Pfam-B_4178 (release 18.0) Family The TOBE domain [1] (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulphate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain. In this family a strong RPE motif is found at the presumed N-terminus of the domain. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.46 0.72 -3.98 128 14426 2012-10-03 20:18:03 2005-08-18 14:22:02 5 12 3742 54 3382 10860 3488 74.40 20 20.70 CHANGED lulRPE+lpl...................ss...s....s..sl....sGpVpphtahGspshht.lclsssphhhs...ttssst........ht..hGcpVtlsasscsshlh ..............................lulRPE...c.l.pl........................................ss...s........s....sl........pu...p..V..ps...h...p..ah...Gsph......h.......h.....h....l..........c........h........s.......s........s....p....h.......hhs.................ph.sspt............hp............hG..p.p.l.tlsh.s.spphhlh............................................... 0 780 1829 2550 +8235 PF08403 AA_permease_N Amino acid permease N-terminal Wuster A anon Pfam-B_3112 (release 18.0) Family This domain is found to the N-terminus of the amino acid permease domain (Pfam:PF00324) in metazoan Na-K-Cl cotransporters. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.65 0.72 -4.66 16 266 2009-01-15 18:05:59 2005-08-18 14:58:10 5 5 64 0 109 240 0 70.70 44 7.06 CHANGED pcstsppshhpshGasTlDsVPplDFYpNouuhut.+tsRPSLp-LHcshccsss...ssht-sssst...sGsss- ............coHosTaYlpTFGHNThDAVPpI-aYRNTuu...s.G..Kh.sRPSLtELH-p.LcKt.......shtDshusG...-uh............................... 0 15 25 55 +8236 PF08404 Baculo_p74_N Baculoviridae P74 N-terminal Wuster A anon Pfam-B_3059 (release 18.0) Family This domain is found at the N-terminus of P74 occlusion-derived virus (ODV) envelope proteins which are required for oral infectivity. The envelope proteins are found in baculoviruses which are insect pathogens. The C-terminus of P74 is anchored to the membrane whereas the N-terminus is exposed to the virion surface. Furthermore P74 is unusual for a virus envelope protein as it lacks an N-terminal localisation signal sequence [1].\ Also see Pfam:PF04583. 25.00 25.00 27.10 26.60 19.20 19.00 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.88 0.70 -5.66 26 102 2009-01-15 18:05:59 2005-08-18 15:00:00 5 2 71 0 0 99 0 268.90 44 46.86 CHANGED TslDlhNAspYusphppLcaI.+WRs+FPHIh..IDYsIRsA.oNs.DYYVPssLts+.AlsVcLpFS+cGCESMSCYPasETGsI-h.pTPhG.YTQTS-TuVtYuQ..PACYsLD+AuAsR-GsEtclQosELRYosss+.CIlVDohoKMYhNSPYlRT--Hll+GVDDV.PuFNVts.ssDPlFPE+acGpFN-AYCRRFGRsl..N......suCohpWWEollGFVLGDoIahTFKLLssNVFo-LRsFDYp+PSslLPstPss-upp.lLp-WtssRDsslDh.-hEtpFhphp..shs-lsl...sssppLlYsAppGF ........ThhDhhNAspYu.phtpLpaI.+WRp+hPHIh..IcYplR.A.s.ss.DaYVPstltp+...AlhVcltFS+cGC-.u.MSCY.Pap....ETGsls..pTshs.YTQTS-......Ts..lt.Y...sQ..PACYpLDRuuAh......R-Gt-pplQusEhRYo..spp.ClhV..DohoKMYh...NoPYlRT--HhlhGlDDV.PuFNVts.....s...sDshFP............E+F+GpFNcAYCRRFGRph..N......suCshpWWEollG.FVLGDTlYhThKhlssslao-LRsaDYp+PSs.lLPs..Phssupp..hLspW+ssRDsssch.-hEh.F.p....phsclsh....s...pl.YhAE.Ga............................... 0 0 0 0 +8237 PF08405 Calici_PP_N Viral polyprotein N-terminal Wuster A anon Pfam-B_4167 (release 18.0) Family This domain is found at the N-terminus of non-structural viral polyproteins of the Caliciviridae subfamily. 24.20 24.20 24.40 44.80 24.10 24.10 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.98 0.70 -5.62 6 386 2009-01-15 18:05:59 2005-08-18 15:01:23 6 4 326 0 0 369 0 351.20 73 21.29 CHANGED RsspPApEPlIGshLEha-G+IYHYuIYIGpGKslGVHsPpAAhSlA+lslpPIuhWWRssYsPp...hLo.DpLKcLcsEsaPYsAhTNNCYpFCC.VhsLpDTWLpRRhIo.ostFa+PsQDWs+cs.-h.pDSKLchVpDAlLsAlsuLlS+PhKDLLGKLKPLNlLNlLusCDWTFhGlVEsVILLhELFGVFWsPPDVSNFIASLLsDacLQGPEDLAhDLVPlVLGGIGLAlGFT+-KluKhhpSAssuLRAAppLGpYGLEIFpLlhKaFFsu-psc...cTL+sIEsAVIDMEslussplTpLlRDKpSupsYMphLDtEEEKARKLSsKsAsPclVuTTNALlARIShARSAL ......RENRDAKEPLTGTILEMWDGEIYHYGLYVERGLVLGVHKPPAAISLAKVELTPLSLaWRP..VYTPQ..YLISPDTLKKLHGETFPYTAFDNNCYAFC.CWVLDLNDSWLSRRMIQRTTGFFRPYQDWNRKPL.PTh.DDSKLKKVANIFLCsLSS...LFTRPI..KDIIGKLRPLNIlNILASCDWTFAGIVESLILLAEL.FGVFWTPPDVSAMIAPLLGDaELQGP.EDLsVELVPVVMGGIGLVLGFTKEK....IGKMLSSAASTLRACKDLG...AYGLEILKLVMKWFFPKKEEAN.ELAMVRSIEDAVLDLEAIENNHMTTLLKDKDSLATYMRTLDLEEEKARKLSTKSASPDIVGTINALLARIAAARSLV................................................................................. 0 0 0 0 +8238 PF08406 CbbQ_C CbbQ/NirQ/NorQ C-terminal Wuster A anon Pfam-B_3065 (release 18.0) Family This domain is found at the C-terminus of proteins of the CbbQ/NirQ/NorQ family of proteins which play a role in the post-translational activation of Rubisco [1]. It is also found in the Thauera aromatica TutH protein which is similar to the CbbQ/NirQ/NorQ family [2], as well as in putative chaperones. The ATPase family associated with various cellular activities (AAA) Pfam:PF07728 is found in the same bacterial and archaeal proteins as the domain described here. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.42 0.72 -3.97 81 795 2009-01-15 18:05:59 2005-08-18 15:03:12 5 4 655 0 191 519 390 86.40 31 30.94 CHANGED EspIVucEo.GlspspAtpLVplApplRsL..cGps....L-EGsSTRLLlYAApLlssGlsstpAscsAllcPLTDDs-l..tpuLpchlsuha ............E.tllt.cp..o...u.l.s.c.p.h.s...pplVchupclRsl........ptps.............l-Eus.STRhLlhhupLl.s........t.shss....c....c....A.....hctsllcsL...s..D..-...-h..hpAlhphht...h...................................... 0 57 127 167 +8239 PF08407 Chitin_synth_1N Chitin synthase N-terminal Wuster A anon Pfam-B_1105 (release 18.0) Family This is the N-terminal domain of Chitin synthase (Pfam:PF01644). 19.40 19.40 20.90 19.80 18.70 17.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.93 0.72 -4.29 55 491 2009-01-15 18:05:59 2005-08-18 15:04:37 6 9 174 0 331 510 1 79.90 43 8.49 CHANGED +Rtps..h.....++lp..Lh...pGsllh.D.......sPVPstLhst.lst........psssEFoaMRYTAs.TCDPs-Fs.cpuasLRpt.hYs..RcTELhIslT .................h...hsh+cVpLh.....pGsllh.D.......sPlPstLhstlsp........sspcEFTHMRY.TAs.TC.DPs..-Fs.cp.........GasLRtt.has..RcTELhIslT........... 0 99 186 287 +8240 PF08408 DNA_pol_B_3 DNA polymerase family B viral insert Wuster A anon Pfam-B_3028 (release 18.0) Family This viral domain is found between the exonuclease domain of the DNA polymerase family B (Pfam:PF03104) and the Pfam:PF00136 domain, connecting the two. 24.50 24.50 24.70 24.80 22.00 24.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.11 0.71 -4.33 13 96 2009-01-15 18:05:59 2005-08-18 15:05:26 5 3 53 0 0 90 8 143.70 69 14.78 CHANGED SKNsFsCsstlc.phssshhphhussssDs+sKlplFu-VLpTGNYlTIs-.claKIlcK.cIt..c-uFpl....sltsspshth....ssh......hsluFGKDDVDLuD..MYtNYsL-hAl-MupYClHDACLCKYLWsYYtIcoKIsAuAsTYlLPQshsFEY ...................SKNAFuChuKVL.scGscEMTFIGDcTTDAKGKAAsFAKVLoTGNYVTVD-.hICKVI+K.DIa..ENGFKV....lLsCPs........NDT......YKLSFGKDDVDLAp..MYKcYNLNIALDMARYCIHDACLCpYLWEYYGVETKTDAGASTYlLPQSMVFEY......... 0 0 0 0 +8241 PF08409 DUF1736 Domain of unknown function (DUF1736) Wuster A anon Pfam-B_4104 (release 18.0) Family This domain of unknown function is found in various hypothetical metazoan proteins. 21.70 21.70 22.40 22.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.59 0.72 -4.41 30 412 2009-09-10 21:08:40 2005-08-18 15:06:46 6 202 109 0 252 413 12 78.60 43 10.48 CHANGED lhsptsPt.FopsDNPsuhssphhoRhLTapYLhsh.NhWLLLhPssLsaDWoMsuIPLl...coltDhRNlholhhasslh...lL ..................hssp.Ph.Fop...DNPAuhus.hhs.RtLTasYLhsh.NhWLLLsPsp..LCaDWoMGsIPLl.....colhDhRNlsTlshassLh...l...... 0 83 103 178 +8242 PF08410 DUF1737 Domain of unknown function (DUF1737) Wuster A anon Pfam-B_2030 (release 18.0) Family This domain of unknown function is found at the N-terminus of bacterial and viral hypothetical proteins. 20.60 20.60 20.60 24.50 20.30 18.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.67 0.72 -4.19 19 1062 2009-01-15 18:05:59 2005-08-18 15:07:52 5 3 451 0 59 981 214 52.20 58 13.11 CHANGED M.Kh.....YRhlTusDsusFC+R.VTpALscGWpLaGSPshuasu.tthhtsuQAV....lK-l ...........MshKH......YDVVRAASPS.DLAEK.LTcKLKEGWQPaGuPlAhTs........asLMQAlstE.G............. 0 16 35 46 +8243 PF08411 Exonuc_X-T_C Exonuclease C-terminal Wuster A anon Pfam-B_3061 (release 18.0) Family This bacterial domain is found at the C-terminus of Exodeoxyribonuclease I/Exonuclease I (Pfam:PF00929), which is a single-strand specific DNA nuclease affecting recombination and expression pathways. The exonuclease I protein in E. coli is associated with DNA deoxyribophosphodiesterase (dRPase) [1]. 20.00 20.00 23.80 22.50 19.70 19.70 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.85 0.70 -5.37 74 1111 2009-09-11 17:37:21 2005-08-18 15:09:21 5 3 1089 6 187 830 1335 260.60 49 55.39 CHANGED RpKcpltphlDl....hphpPLlHVSGhauut.pussuhlsPlAaHPsNpNAVIshDLstDsssLlc.......Lsu-plRpRLYTc..+s-Ls-s.th.lPlKhlHlNKCP.llAPspsLps..........-sAp.....RL...GlDtptstpphphL+pp....sp.lp-Kltplas.......pp.pas..sssDsDtpLYs.GFF.ussD+pthchlR..pssPppLup..hphsFpDpRltcLLFRYRARNaPcTLsppEpp+Wppa........ppp+Lpssp......hppahppLppLhpp......ppss...cchplLpsLhpYuppL .......................................................................RsK+KlhsLIDl....spMpP.LVHVSGMFGAh.RGsTSWVuPLA..WH.PcN+NAVIhsDLA.u.Dh.oPLLE.......LDu-sLR-RLYTt........+sDL.u-..pssVP..lKLVHINKC.P.VLA...Ap.T.LcP..........Ec.A-.....RL...GIsRppCLcNLplLRps.....Pp.lREK.VlslFup......tc.sFs....sS-s..VDsQLYs.G.....FF...S-A.DRAsMcIlh....pT.c.PcsLsu................L-lsFsD.cRIc...c...L...LFpYRARNFPs.TLstuE.QpR.WhcH......................RRphhssch...........lpsYtpcLp.Lspp........as.-..Dc......cKluLLKuLapYApc.h...................... 0 38 88 143 +8244 PF08412 Ion_trans_N Ion transport protein N-terminal Wuster A anon Pfam-B_4115 (release 18.0) Family This metazoan domain is found to the N-terminus of Pfam:PF00520 in voltage- and cyclic nucleotide-gated K/Na ion channels. 20.60 20.60 20.60 20.70 20.40 20.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.69 0.72 -4.67 9 383 2009-01-15 18:05:59 2005-08-18 15:10:34 5 12 89 0 232 371 6 66.70 47 8.90 CHANGED QhtuhLQPslNKhSL+hFGScKAVpcEQ.Rl+oAGsWIIHPaScF.........................................RFYWDLhMLhLhhuNLIlLPVuITFFpDpsos ................................hh.PtsN+huh+ha.GSpKAl.pEp.R...c.s.u.....G.hW...lI.HPhSsF........................................................RFYWDlhMLllhVuNLIIlP.VuIoF..Fp--.o..................... 1 91 116 169 +8246 PF08414 NADPH_Ox Respiratory burst NADPH oxidase Wuster A anon Pfam-B_2127 (release 18.0) Family This domain is found in plant proteins such as respiratory burst NADPH oxidase proteins which produce reactive oxygen species as a defence mechanism. It tends to occur to the N-terminus of an EF-hand (Pfam:PF00036), which suggests a direct regulatory effect of Ca2+ on the activity of the NADPH oxidase in plants [1]. 21.90 21.90 23.80 23.50 21.30 20.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.26 0.72 -3.92 28 221 2009-01-15 18:05:59 2005-08-18 15:13:29 5 16 30 2 128 252 0 93.50 49 11.45 CHANGED RLDRo+S.uAt+AL+GL+FIo+ssus..s.G...WspVEpRFsclss..-GhL.RucFucCIGMs..-SKEFAsELFDALARRRplp.s-sIsK-pL+EFW-QIoDQSFDS ...........phcRo+.S.uAt+AL.+GL+Flspssss...t.u...........WtpVEcRFcpLut.....-G.....hLsRscFupCI.........G.Mp......-SKEFAsELFDALuR.RR...plp....h..........ssIoK--L+EFWpQIoDpSFDo...................... 0 16 81 103 +8247 PF08415 NRPS Nonribosomal peptide synthase Wuster A anon Pfam-B_1156 (release 18.0) Family This domain is found in bacterial nonribosomal peptide synthetases (NRPS). NRPS are megaenzymes organised as iterative modules, one for each amino acid to be built into the peptide product [2]. NRPS modules are involved in epothilone biosynthesis (EpoB), myxothiazol biosynthesis (MtaC and MtaD), and other functions [1]. The NRPS domain tends to be found together with the condensation domain (Pfam:PF00668) and the phosphopantetheine binding domain (Pfam:PF00550). 20.90 20.90 20.90 21.60 20.80 20.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -9.06 0.72 -4.20 144 1536 2009-01-15 18:05:59 2005-08-18 15:14:43 5 209 643 0 259 1385 12 57.50 37 3.74 CHANGED HpthSGVc.VlR.-LsRp.......tsss........shhPVVFTSsLuhss............tsstshhGph...s.aslSQTPQ .....HppaSGVc.VhR.-Ls+p.....psts.........shhPVVFTSsLGhst.t.............ttspphhGc......saslSQTPQ............... 0 45 137 215 +8248 PF08416 PTB Phosphotyrosine-binding domain Wuster A anon Pfam-B_3174 (release 18.0) Family The phosphotyrosine-binding domain (PTB, also phosphotyrosine-interaction or PI domain) in the protein tensin tends to be found at the C-terminus. Tensin is a multi-domain protein that binds to actin filaments and functions as a focal-adhesion molecule (focal adhesions are regions of plasma membrane through which cells attach to the extracellular matrix). Human tensin has actin-binding sites, an SH2 (Pfam:PF00017) domain and a region similar to the tumour suppressor PTEN [1]. The PTB domain interacts with the cytoplasmic tails of beta integrin by binding to an NPXY motif [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.56 0.71 -4.24 14 631 2012-10-04 00:02:25 2005-08-18 15:15:56 8 20 96 7 308 1087 1 127.00 29 14.53 CHANGED QapVpHLsThsh-sppuhpslcDslppLphLsupG+lWsQshllpVsspuloLhD.poKp.L.EpaPLsolpaspsshpsppasS...............lLulVspcsspupssl.HlFp..ElcAp.ltpsIpsslschhhsp.p+ ...........................................................thpV...aLsohph-..shs.Ghps.lpc..t....p..h...t.....t........L.......s.......t.........p......s...p......s....h..s.......p.......l..........hh.cVssp.uloLhD.......sp...p..K....................................h....cp.......YP.......lsolp....assh...s..s...p...s...p...pa.ss...............................lhuhVs....+c...s....p........s.....p.....s...s..l....HlFt.....El...c...sppssssI.shlsc.h.st................................................................................. 1 54 78 174 +8249 PF08417 PaO Pheophorbide a oxygenase Wuster A anon Pfam-B_3102 (release 18.0) Family This domain is found in bacterial and plant proteins to the C-terminus of a Rieske 2Fe-2S domain (Pfam:PF00355). One of the proteins the domain is found in is Pheophorbide a oxygenase (PaO) which seems to be a key regulator of chlorophyll catabolism. Arabidopsis PaO (AtPaO) is a Rieske-type 2Fe-2S enzyme that is identical to Arabidopsis accelerated cell death 1 and homologous to lethal leaf spot 1 (LLS1) of maize [1], in which the domain described here is also found. 20.90 20.90 21.00 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -10.21 0.72 -4.06 29 296 2009-01-15 18:05:59 2005-08-18 15:35:27 7 5 102 0 163 309 25 97.40 26 20.94 CHANGED F.APChhhps.......................hsscs.tclttlhhChPsu.G+oR.....LlhRashsF......sthhh+lhP...RWapHl.spspVL-pDhhhLctQc+hltp..Gsss......app ...........................................................FhsPChhhsp..........................h.ptppttphhh..lhhslPsus..G+oR.....l...l...hp.hshsF.............................sthhhph..lP......cWa......pHh..st.spVL-pDhhlLptQ-chhhp....s................... 0 41 111 141 +8250 PF08418 Pol_alpha_B_N DNA polymerase alpha subunit B N-terminal Wuster A anon Pfam-B_4046 (release 18.0) Family This is the eukaryotic DNA polymerase alpha subunit B N-terminal domain which is involved in complex formation [1]. Also see Pfam:PF04058. 21.10 21.10 21.80 22.00 21.00 21.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.53 0.70 -4.84 27 272 2009-01-15 18:05:59 2005-08-18 15:36:04 5 9 224 16 181 279 0 213.00 20 36.09 CHANGED FGsss....ss-llpcLpslhplaslos--Lah+WEuFshpp..spp.p.p.LslpsLcpFccplppphp+pspstht.............tphpphhpssttct.hl.t.sssushhuhhpt.sTPshtpp+h..................................sspsp....htoPsstssstssss................................................................phsp..+pssGpll-olNsch...sps......hsssscsc.....l+lssph-sp+apY+sMh.+L.-uu-VLD-pI-chsplh.pcpaplsp..pFGsPsl.SQ ......................................................ptplltch.plhhhash.s.sp-hh.cW.u....ashph....tts.t.....lshpslptF.cppltp.ph.ppps.ptt.......................p.pthhts..pt..........t......h.s.sss.hhs......p..s.....t..TPpt.tpph.........................................ttt........h..oPs...s.p.st.t.t.......................................................................asp...RtstGpllpshs.s..t.......ps............hss.s.sh.....hc.l.hst..-...h.ph..t...a+.Mh.+L.-t.p-lLsspI-phsp.l.pppaphp...tass....u............................................................................................................................ 0 52 97 146 +8253 PF08421 Methyltransf_13 Putative zinc binding domain Wuster A anon Pfam-B_2038 (release 18.0) Domain This domain is found at the N-terminus of bacterial methyltransferases and contains four conserved cysteines suggesting a potential zinc binding domain. 20.90 20.90 21.10 21.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.95 0.72 -4.48 27 438 2009-01-15 18:05:59 2005-08-18 15:41:25 6 9 358 10 145 436 1072 61.50 34 14.89 CHANGED CRlCssstlpsllDLGtpPlsssFlss..ttscspshaPLclhhC..psCtLVQLpchlssphhFs .................CRhCust...h.pshlDLGhsPh.ss.uFlss..tph.s...p..E.s....haPLclhlC......psChLVQLpchhsscplFt........... 0 48 101 122 +8255 PF08423 Rad51 Rad51 Mistry J, Wood V anon Pfam-B_684 (release 17.0) Domain Rad51 is a DNA repair and recombination protein and is a homologue of the bacterial ATPase RecA protein. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.52 0.70 -5.48 20 2467 2012-10-05 12:31:09 2005-08-18 16:12:14 6 29 887 65 1071 8769 2265 183.50 30 69.83 CHANGED shGFpTAo-htppRpcllpITTGS+pLDpLL.GGGIETGSITElFGEFRTGKTQLCHTLsVTCQLPl-hGGGEGKshYIDTEGTFRPERllsIAERaGLDspsVL-NlAYARAYNs-HQhpLLtpAuuhMuEu+auLLIVDSsTALYRTDaSGRGELusRQ.+Lu+FLRsLp+LADEFslAVllTNQVlApVDGu.uhFs.uDsKKPIGGpIlAHASTTRLhLRKG+G-pRlCKIYDSPsLPEuEssFuIsppGItDsc ...............................................................................................................hc.t.hh........t..........u.................Gh..........................lsE.................h..h.................G...ts...GKoQl....s.hp.h.s..ls.s......p.....ls.....................t...........t.....G.............h.......t.....u..........t..sha..l.................D.T..........E...s.s.....F......p....s...............p..R.....l........t...........h.................s................p...............t..............................t................................p..........................p..........t.............h........L........p......p...........l............h......h...s......+.......s.............h.....s..........................p........c......p....h............t........l.......l.................t....h......t........t......h......h.............s......c..........p...........t.......h......p....L......l..ll.DSl.....su...h.....a.....R...s......-.........a.............s.........u........c.......s........p...............l........u......t............R.....p.....p.....p......L.....s.....p.....h....h............p..t..L..p.l.u...c...p...a...s...h...A...l.h.l.sNQ.............h.......t..............t..........t.................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 328 556 830 +8256 PF08424 NRDE-2 DUF1740; NRDE-2, necessary for RNA interference Mistry J, Wood V anon Pfam-B_21376 (release 17.0) Family This is a family of eukaryotic proteins. Eukaryotic cells express a wide variety of endogenous small regulatory RNAs that regulate heterochromatin formation, developmental timing, defence against parasitic nucleic acids, and genome rearrangement. Many small regulatory RNAs are thought to function in nuclei, and in plants and fungi small interfering (si)RNAs associate with nascent transcripts and direct chromatin and/or DNA modifications. This family protein, NRDE-2, is required for small interfering (si)RNA-mediated silencing in nuclei. NRDE-2 associates with the Argonaute protein NRDE-3 within nuclei and is recruited by NRDE-3/siRNA complexes to nascent transcripts that have been targeted by RNA interference, RNAi, the process whereby double-stranded RNA (dsRNA) directs the sequence-specific degradation of mRNA [1]. 26.60 26.60 26.60 26.60 26.40 26.40 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.99 0.70 -5.55 23 285 2009-01-15 18:05:59 2005-08-18 16:51:24 5 6 205 0 211 292 3 263.90 22 29.68 CHANGED lpp+stELs+plccsPpDlctWlchlcaQ-plhp.tpp............psppptls-hKLSlhE+ALcp..s...s..sc-cLllthLctssclW.ssccltp+....Wppllpp...........sss..phsLWhpYLcacQup.........FusFohsclhpsahcslctLpsuhsp...................phppshltlFlphshFl+puGasEhAlulaQAhlEhshapPpplptp...........ptlptFppFW-S.tssRlGEssApGWpph.pt..ps.ps..sttp..ssps.........................hap..........uWtpsE...................ppRpspsthPsRs.........s-st--DP.RsllhsDlpshLhhl.s...tpsphpLlpshLtahtlP.h.p.ss ...........................tc..phpptlpppPpshphWhthhtaQc..phht..t..................................ptpht.hh-hKlulh-+Alpt.......p..................s...sp.pLhl..t......hhc...s..t...ph........h...ps.....pt....lhpc...................Wpphl....................pst.......thtLW.pal.hppsp..........hspashsphhpha.pslpthtthtpt.....................................thp..hh.lhhphshahppuGa.....EhuhuhhQuhl-hshhtPt.h.t....................t.ht.ht..FW-s.t.sRhG............-.su.GWt........t..............................t.......................................................................................................hap.Wht.E........................................................ptpt......ttthhPh+s.................sp.tppDs...R...lhhsDlt..Lh.h.p.....t.t...Lh..hh.hhth.......s........................................................... 0 77 117 173 +8258 PF08426 ICE2 ICE2 Mistry J, Wood V anon manual Family ICE2 is a fungal ER protein which has been shown to play an important role in forming/maintaining the cortical ER [1]. It has also bee identified as a protein which is necessary for nuclear inner membrane targeting [2]. 25.00 25.00 32.80 27.30 20.80 20.80 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.45 0.70 -5.87 16 140 2009-01-15 18:05:59 2005-08-18 17:08:01 5 2 127 0 104 136 0 388.20 36 93.55 CHANGED lRshhushYLhlllloIPlAFDVGGlsCGLuFSlTLFshYFlhoTl+llst+pp...ahhhsSllYY..sQahlIPoLLhhFLShass-p.tt................................p..s...hhhhhpthVc.....sWchhLspSTPlFTLLEGFCoLLlIQAlGpss+WLshc..+SDoWlIhSLlsSGulITuohYaLYRIYshP.aplshhsAoLLGhsLohshsLGhaGIlSG+GShlESSLhFAYlV+CIYEIFPchupsAopslhphhppsh.tshpsplP...................................lPP.....................Ihssloplh+hlo.olPsShpslaphhh.hAhpoloPulllsLsaRIhVFYuATRIIPulpcsushs..........................................................................pcssspllphlYhYSPCIlIAVYTHLlLQYpGpLts-LslW.....ha.....................p.phllcuWpFWNWlNlhsTllLYAsELhuu ................................................................................phh.ushaLh.hlloIPlAFDVGGhpCGLuaSloLhhhYFhhohl+lhs.cpu.......hhh..h..s..sl..lhh..sQahlIPuLLhh.LstFSsDs.ss.............................................................t.h.......ppp.....sh..ahahhptllp.....sWchlLpaSoPlFpLhEGFsoLLlIQAsGQhsR.WL..ssc..pS-..oW.hIs.LlhSuulIouul.YaLaRlh.hP..tlosh.sAsLlGsslTssshLshaGIsSu+GsslESS...LLFAYlVhClYpIhschtsos.s.tth......s...spp.phP.................................................slPP.....................................lIhss....h.o.slh+hlo.....sLPs.lpshhphlh.hAhpsloPullIsLsYRlhVhYuATRIIPAlpcsus.ps..p.sp....tp....................................................................................................................................................................................................................................................pcs.ssp.hlthL.aaSPsILIAVYTpLlh.Qa.up..t.t.shh.........h.......................phsssuhshWpWhNlhsThhLYAsELhh.s...................... 0 26 58 89 +8259 PF08427 DUF1741 Domain of unknown function (DUF1741) Mistry J, Wood V anon Pfam-B_35314 (release 17.0) Domain This is a eukaryotic domain of unknown function. 20.30 20.30 21.80 21.60 19.50 18.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.51 0.70 -4.90 13 205 2009-01-15 18:05:59 2005-08-18 17:17:45 5 4 175 0 150 199 1 213.60 36 35.35 CHANGED Ph.sspsRsLuTslLDlhl-sIsaNL+++LssslYslsluIlhRlloYhscs+lRLsYHWpELWpsLLoLl+FL..sohso-Lpsp.....splppLsppllNllAhhlosGDoFLPossuYD-LFYKllcsupllscF+shhtppsp.s......ss..h....................................hsulssLluVspHFpshlh............t.sups.........ppppLoscpVpclIKpsY-TLolphp-uLstap+acEus.hcshhK+hsRssVsDsRthl ...................................s.ssRsLssslLDlhl-hIspphh.+chshcLYhhCltllhRlLsY.p.+s.+lR..L...p.Y.pWpELWpuL.lsLl+FL.......ho.p.ss..Lhsp.......sIh.sLshplVNLhshhlohGDTFLPoPsSYD-L...........aYcllchtplhsphhshhhhh...spss...........s.th....................................hpulsslhsl.sHapshlt...........................t...t.p......................ph.pplo.ppVhpll+psY..-TLolp.hp-uLstaE+apEts..tshhKchsRshstss+......................................................................................................................... 0 49 76 118 +8260 PF08428 Rib Rib/alpha-like repeat Fenech M anon Pfam-B_3139 (release 18.0) Repeat The region featured in this family is found repeated in a number of bacterial surface proteins, such as Rib (Swiss:P72362) and alpha (Swiss:Q02192). These are expressed by group B streptococci, and Rib is thought to confer protective immunity. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.22 0.72 -4.21 16 1574 2012-10-03 16:25:20 2005-08-18 17:26:22 5 133 326 0 165 1657 10 66.80 33 17.85 CHANGED loscsGt....sPDss-GIpNhscLPcGTp...Y.......sWcs...........sPDsosPGcKsusVVVTYPDGSpD-VsVsVpVss .....................................................ps..ut.......pP.s.sp.-.sI.......p..N.........h......ss...L.....P.....c.....G....Tp.........h.................sacs.............................s.DT.s......s.s.G.c.p.s......up.VsVTYPD..GS...p.....-......p......V...s...V.sVpVh..................... 0 21 54 149 +8261 PF08429 PLU-1 PLU-1-like protein Fenech M anon Pfam-B_4023 (release 18.0) Family Sequences in this family bear similarity to the central region of PLU-1 (Swiss:Q9Y3Q5). This is a nuclear protein that may have a role in DNA-binding and transcription, and is closely associated with the malignant phenotype of breast cancer [1]. This region is found in various other Jumonji/ARID domain-containing proteins (see Pfam:PF02373, Pfam:PF01388). 24.30 24.30 25.50 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.98 0.70 -5.55 30 546 2009-01-15 18:05:59 2005-08-18 18:47:40 6 64 236 0 309 488 4 291.60 26 21.73 CHANGED -sWtp+scchLc...pss+ssLcsL+sLlsEuEchtas...................s-L.ppL+shVppAcphlcpAppllspKppsR.......p+stts.............................p...sp..hslpplppLlcphpsLshsssp.lspLcchhppl-pFppcApphLpp......sshshtclcpLl-pGpshsl-lPElstLcphlcpt+Wh-cspcths.h..............hol......c-lccLl-.pGtpls...spspchhtcLpchlshu-pWEc+Acch.....................Ls.........t.p.hshspLpulhppu..ps...........lPlslssltpLcshLp+u+p......atpplpsl..p...........sspphPphc-lcslhtpucsLssph.pphppLE.....pplpptpsWp-+us+hFhptNush.slLp .............................................................ptWhtphpthLp....tt+.sL.ph+sLhp-uc..p..h.as..............................spL.ppLpthhpcscphsp.utthlst.ppppp...........p.............................................................................................t..hslpc...lpthl.pp.h...sLsh.sh.sp.htt.lcphhpplcpappcupphLtp.......ptss...........t........p........lp............pLl-.upp.......h...sl.-...l.PplttLp...p.t.l.p.p.t.+.Whccsppthttst........................................hoL......p.hcpLlc...tGhtls......ssscc.ths.....c........Lp...c.lLshucpW-cKAc.h...............................................................Lp................p.p.hshspL.csllpps..pp.............................................l.P.s.l.....ss.ht....sLcphlp+A+t........................Whtplctl....p.................................stpphshhcp.LptLh.ttu..c..s...lssth.pth..plE............tltpscsWpcp....stphFhptsu.h.plLp............................................................................................................................................. 0 82 140 232 +8262 PF08430 Fork_head_N Forkhead N-terminal region Fenech M anon Pfam-B_3191 (release 18.0) Family The region described in this family is found towards the N-terminus of various eukaryotic fork head/HNF-3-related transcription factors (which contain the Pfam:PF00250 domain). These proteins play key roles in embryogenesis, maintenance of differentiated cell states, and tumorigenesis [1]. 21.60 21.60 21.60 21.80 21.50 21.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.63 0.71 -3.77 29 224 2009-01-15 18:05:59 2005-08-19 10:58:52 7 3 94 0 100 197 0 126.60 31 30.04 CHANGED shYs-s.ppsYSss..s...........sMNoMsshhshssh....ts.............................s.sM.uhss....suhus.sshsuMssGhsuhhs......s.huhsshus.shussu.hus.hust.sss..hssluP..tshsts.s......s..thpR.....ssKsYRRSYTHA ..........................................hY.ts..psYos...s........................sMNs...Mso.hhshssh.........s...........................shsM.uhhs......suhus....sh.suMusussuhh............uhhuhsuhus...slusus..hus.hust.sus...hsuluP..tshssh.s.....s.utlptsR......ssKTYRRSYTHA........ 0 17 26 59 +8264 PF08432 Vfa1 DUF1742; AAA-ATPase Vps4-associated protein 1 Mistry J, Wood V anon manual Family Vps Four-Associated 1, Vfa1, in yeast, is an endosomal protein that interacts with the AAA-ATPase Vps4. It would seem to be involved in regulating the trafficking of other proteins to the endocytic vacuole [1]. There is a CCCH zinc finger at the N-terminus. 21.30 21.30 21.30 21.60 20.90 21.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.55 0.71 -4.26 24 142 2009-11-11 09:10:55 2005-08-19 15:36:58 5 2 129 0 107 136 1 164.20 33 88.68 CHANGED NhYphR+VAsssuKuChICaKPoooVLlossst.................DFFYlCssHLpDc.pFso....Plhss.chttsctKpctlpcclccl...KK-hEpcpphhpch.tph..............Kcs-cpK-cc................sspcs.-ppccpcpcptcpchpslppshst.tsphsp...ps+hasLcp.............shaptRlcphpptch......tccpp......pclpp.sshFPslPp ..........................NhYphR+VAppsu+sChICaKPoooVLl..s...ssst......................DaFY.sC..HLp..Dp.tFss...........Phhsp...ph.t..st.......tctcthpc-l-+l...cp..-hE....p+ppt.tpct.ppt...............ccpcppK-pc................ppppp.....sppspp...pp.cpppcphpp..hppphst..........sphsp.......psRhapLpp..................shap.Rlpch+....phph......s+cpt............pcl.tp..thFPSsPp............................................... 0 25 55 86 +8265 PF08433 KTI12 Chromatin associated protein KTI12 Mistry J, Wood V anon Pfam-B_11625 (release 17.0) Family This is a family of chromatin associated proteins which interact with the Elongator complex, a component of the elongating form of RNA polymerase II [1]. The Elongator complex has histone acetyltransferase activity. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.68 0.70 -5.19 21 408 2012-10-05 12:31:09 2005-08-22 14:09:07 5 14 303 13 286 1745 907 262.30 25 83.50 CHANGED MPLlllTGhPsSGKoThAcpLtphhpc............pshsVhlls.D-slt.....hs+cs..Yt.cSppEKthRuplhSsVcRsLS+s.slVIlDuhNYIKGaRYpLaChAKsspTsaCllashss.................h-hshpaN.....pp+sps.........................................................ass-ll-pLh.RaEcPsspsRWDpPLFolh......sccshsh......................--Ihpsl................hppptLpPNpuThspPh.usssaLpcLDppTppllstIhpt.ppssssstlp..............hs.ssp..lphsts...holspLpRlRRpFlshs+........psh-h-+lhshFl-aLN .................................Ms.Ll.l.hsGhPsSGKo.phut..p..l..t..p..h...hpt..............................................................t.h....p.l..h...h.ls...ppsht.......................ht.pps.......a............t..........s.s............t..........t..E....+.....h....R......u...............h....h....u.....t....l........p......R.......t......l.........s.................c........p.....s..........l.V....I........l.D..u....h..N.....Y.I..K...G..aR......Y....p..L.a.C...h.u..........+..t...h..p......s..s..h....C.h.......l..a.s...s.ss.........................................h-ps.hphN.............ppR...t..p.t......................................................................................................................................................a..s..t...-.h...hc...p...Lh...h..R.......a......E...p.P...s.s.p.s.R..WDp...PLhsl...........sptp....s..h...............................................................................................................ppl.h.ps.l.........................................................................hp.t.t............s..pt.u..T.......t.t....sh.......s.......s..s.......shLa.p..lDp.tTppllstlhp.....t..t.p..t........h..s..h...........................................................h.t.................hsh.pLpch++palth.p..................t......ph...Fhthl.................................................................................................................................................................. 0 108 158 233 +8266 PF08434 CLCA_N Calcium-activated chloride channel Wuster A anon Pfam-B_3091 (release 18.0) Family The CLCA family of calcium-activated chloride channels has been identified in many epithelial and endothelial cell types as well as in smooth muscle cells [1] and has four or five putative transmembrane regions. Additionally to their role as chloride channels some CLCA proteins function as adhesion molecules and may also have roles as tumour suppressors [2]. The domain described here is found at the N-terminus of CLCAs. 20.30 20.30 21.20 20.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.70 0.70 -5.16 13 295 2009-01-15 18:05:59 2005-08-22 15:35:17 6 15 48 0 190 313 0 228.60 40 29.90 CHANGED MushpsslF.lLlLaLLpus.ssSL......lpLNNNGYEGIVIAIsPsVPEDEpLIppIK-MVTcASsYLFEATc+RFYF+NVuILIPtoWKu+scYh+PKpEoYcpADVlVAssshttsD-PYThQaGpCGEKGcYIHFTPDFLLsccl.spYGPpGRlFVHEWAHLRWGVFDEYNsDcPFYlutppcIcATRCSssITGpshVhpCQGGSClo.+pC+hDppTGLYEcsCpFlPc+sQotKASIMFMQulDSVVEFCsEKsHNpEAPNL .....................................................hh.hhhl.hll..s.....tssh........lpLpsNGY-slllAIsPs..VPE....D..p...p..lIppIK.....-MlTpASsYLFpATcpRhY.F+sVsILIP...tTWp...s...............p.s.........p.Yt.hs....+..p.E.o........Y...cp...ADVlV....ss..s.t.........hs.D..c.PY......ThQas..tC.G-pGcaIHhTPsFlh.....s....c.....ph....t.Y..G....s.p......G+lhVHEWAHLRWGVFDE..Y...s...p....-.......p.......P........F.Y..h.s.t....p...s....pl.....csT.R........CSstIsG...l..tsp..tssC......p.......Cphs....ot.l.a.c.ttCpF.hPp....p......Q..s.t....p..uSIMF......h...Q...s.lsSVs-FCs.......p.......p.....s.......HNp-APs............................................... 0 89 93 121 +8267 PF08435 Calici_coat_C Calici_coat_N; Calicivirus coat protein C-terminal Wuster A anon Pfam-B_108 (release 18.0) Family This is the calicivirus coat protein (Pfam:PF00915) C-terminal region. 25.00 25.00 27.10 30.20 20.80 19.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.40 0.70 -5.06 29 2058 2009-01-15 18:05:59 2005-08-22 15:40:16 6 5 1840 102 0 1400 0 178.20 60 47.19 CHANGED LTpLDGoPacPs-c.PAPlGhPDFuuphasVhsp.csss........suspucpshlsTssspFsP+LGslphso.s.....sD.hhsstssphoPsulssst...phs.WslPcYuusLs.sspLAPuVsPshsGEplLFFhSplPhs.uG..hssshlsCLLPQEaVpHFhpEpAPupu-lALl+YVNPDTGRsLFEuKLappGFlTl..ssoGssPhslPsNGhFcFsSWVspFYpLtPhGTuuutRR ...............................................................................................................s.shssaDPsE-hPAPLGsPDF.Gpl.GlhoQ.pspt........ssTRuHcAhl.Ts..s.spFsPKLGp.....l.h..sops.....sD..hpstpss+FTP..lGlh.s.....tt.....c.pQWslPpYsut......shpLAPslsP.hsGE.lLhFto.h......ss...hss.........lsCLlPQEWVQHFYQEuA....PuQ.S-VALLRaVNPDTGRVLFEsKLHKsGalTV..ApoGpps.lVlPPNGYFRF-SWVNpFYTLAPhGsG................. 0 0 0 0 +8268 PF08436 DXP_redisom_C 1-deoxy-D-xylulose 5-phosphate reductoisomerase C-terminal Wuster A anon Pfam-B_445 (release 18.0) Family This domain is found to the C-terminus of Pfam:PF02670 domains in bacterial and plant 1-deoxy-D-xylulose 5-phosphate reductoisomerases which catalyse the formation of 2-C-methyl-D-erythritol 4-phosphate from 1-deoxy-D-xylulose-5-phosphate in the presence of NADPH [1]. 20.80 20.80 21.10 20.90 20.70 19.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.10 88 3400 2012-10-10 17:06:42 2005-08-22 15:49:37 7 8 3281 79 827 2645 1733 87.00 58 22.25 CHANGED llPVDSEHsAIFQsLpupstpp......lc+llLTASGGPFRshshcpLpsVTsppALpHPsWsMGtKITIDSAThhNKGLElIEApaLF ...........lLPVDSEHsAIFQsL...........s......p....t....p...pp................lp+llLTASGGPFRsp.s.hccLtsV.......T.......s.......cp....AlpHPNWuMGpKIolDSATMMNKGLElIEA+aLF.......... 0 290 560 711 +8269 PF08437 Glyco_transf_8C Glyco_transf_8N; Glycosyl transferase family 8 C-terminal Wuster A anon Pfam-B_3038 (release 18.0) Family This domain is found at the C-terminus of the Pfam: PF01501 domain in bacterial glucosyltransferase and galactosyltransferase proteins. 19.10 19.10 19.70 19.50 18.10 18.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.70 0.72 -3.95 14 973 2009-01-15 18:05:59 2005-08-22 15:50:24 5 2 442 0 23 344 0 56.50 39 17.19 CHANGED YPsspYFhpAhpsSPWpchsLhcAsstpph+hphKHhhtpt+YlsGlhshltYhhcK .....YPsupYFhpA+psSPWcchsh.h.cs.ssppph+hphKHhhtQp+YhsGlhshltYhhcK.............. 1 2 6 15 +8270 PF08438 MMR_HSR1_C GTPase of unknown function C-terminal Wuster A anon Pfam-B_4095 (release 18.0) Family This domain is found at the C-terminus of Pfam:PF01926 in archaeal and eukaryotic GTP-binding proteins. The C-terminal domain of the GTP-binding proteins is necessary for the complete activity of the protein of interacting with the 50S ribosome and binding of both adenine and guanine nucleotides, with a preference for guanine nucleotides. 23.30 23.30 23.60 31.10 22.80 23.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.48 0.72 -3.82 93 341 2009-01-15 18:05:59 2005-08-22 15:51:20 5 8 303 1 227 327 110 110.50 40 25.54 CHANGED AANKsD.h.s..uccNlc+lpc..............llPsSAtuELuLR+AscsGhIc..YtPGDpcFp...lss-s....................l.scpQcpuL-tI+c.llpcaGu.............TGVQpslspAVa-lLshIsVYPVcc .....................................................................AhNKhD.hss...........A-cNlp+ltc............ph..ssptlVssSAhuElhLR+hs+pGhIc..YhsGs.-Fp...hsp-s......................l.s-cpcptLEpl+-.V..Lh+aGo................TGVppsLspAs.-lLshlsVYPVcs......... 0 60 135 193 +8271 PF08439 Peptidase_M3_N Oligopeptidase F Wuster A anon Pfam-B_679 (release 18.0) Family This domain is found to the N-terminus of the Pfam:PF01432 domain in bacterial and archaeal proteins including Oligoendopeptidase F. An example of this protein is Lactococcus lactis PepF [1]. 20.70 20.70 20.80 21.40 20.40 20.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.05 0.72 -3.93 182 3235 2009-01-15 18:05:59 2005-08-22 15:52:47 5 3 2039 3 529 2248 391 68.60 25 11.49 CHANGED Elhpls-pplpphlp.pps...pLptYpahLccl.hcp+sHhLStcpEplLuph.ssshsuhsplashls.uclpF ........................Elhpls-cplpphlp.tp.......cLptYpahlc.pl.hpp+sHhLotcpEclLuph.s-shsusschashhs.uDlpF........................ 0 204 361 452 +8272 PF08440 Poty_PP Potyviridae polyprotein Wuster A anon Pfam-B_237 (release 18.0) Family This domain is found in polyproteins of the viral Potyviridae taxon. 23.40 23.40 23.70 23.60 23.30 23.30 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.58 0.70 -5.33 63 1068 2009-01-15 18:05:59 2005-08-22 15:54:46 5 30 151 0 0 1152 0 222.90 45 9.43 CHANGED AAFLCFsYGLPV.hTpsVoTulLupCTlcQARTMhpFELoPFahschV+aDGoMHPpIHclLKpaKLR-S-lhLschAIPhpssspWhosp......-Yp+lGsplph.scsl+IPFhs+slP-claEclWcslpcaKs-usFu+lo.SssAsKlAYTLpTDstuIsRTlslI-pLlpcEppKppaFcolsussssutsFSLsu...Iss.slRsRYh+DaopcNIphLppu+uQLhEFpshshchps.ph.........lpsaGsLpsVpaQocpp...lu+tLpL+G+W......stslhspD ......AAhhsFsYsLPV.hTpsVSTsllupsTV+Qs+sh.tFELoPFah.phlpaDGoMHP.lHclLKpaKLR-u.h.Lsc.uIP.phsstWhosp......-YpR.......Gh.plph...cpl+lPFhhpsIPpcLap.lWcslhp.a.KssssFsplp.usshsKlu.YTLpTD..uI.RTlhll-pLltpEhhKpp.Fcshhspss.u..FSl.s...lss.sh+t+hh+DaTtcNIthLpts+uQLhEFpshphshsp.ph..........hpcaGuL.psVaHQspss...huKhLpLKG+WsKoLls+D........................................... 0 0 0 0 +8273 PF08441 Integrin_alpha2 Integrin alpha Wuster A anon Pfam-B_609 (release 18.0) Family This domain is found in integrin alpha and integrin alpha precursors to the C terminus of a number of Pfam:PF01839 repeats and to the N-terminus of the Pfam:PF00357 cytoplasmic region. This region is composed of three immunoglobulin-like domains. 33.00 33.00 33.40 33.20 32.90 32.70 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -12.85 0.70 -5.64 97 1499 2012-10-03 16:25:20 2005-08-23 09:52:25 7 51 113 50 666 1224 1 389.40 20 41.35 CHANGED uRPVlpl.psslphpPpp..lshpppp....Ct.s.......hsC..hslpsChphpupsh.......stplslshslplD.....ptpthpRshF..................tspppshptphhhhsptpthCp.phphal.p...shcDhloPIslplsasL................t.ttshsslp..PlL.s.t..psshhppplsFt+s...........................................................................................................................................................CGsDs..lC.sDLpL.ssphstp..p............................hllG.ss..pplslplslpN..........pGE.sAYpspLhlph.PssLpatplt..................pt.tp..sC....stppssp......hlsCsl..GsPhhp..ssplphplhasssphst..........tpslphplps...posspps....pssshphplslthtsp.......lpl.pGsspPsphh........................p..spcchGs...tlp.apaplpNpGss...slspsplplphPhphpss........hLlahhplt...............t....C.................sttthsshpl................................ptst.ssspppphphpppphhtp.ttt......................................s.ps....pChplpC .............................................................................................................................................................................................uRPVlpl...ps...slp.h.pPp..........lshpthp.......Ct.t..............s....hplphChphpspsh...........s.phsl......p.hp.lphD......ttth.tRshF...................spptp.h.ttp..hh..hht.ttp......C..p..pht...hhl...p...............shcD.....hls.....P....I...sl...plsasL......................................p..slt.....PlLs.......t...p..t.........h...ptp..h....a.....p.s.........................................C..GpDp..hC.ssL.pl..psp...hstpp............................hhls.sp..pplslplslpN..........pG.E.sAYpsplhlph.P..s.lpattl............................h..pC.....p.tpps....................h.C.pl..G.p..P.....hhp...ssph..phtlhas.s..s..th.p...............t..pplphpl..p......h......po...p.....s....p..pp..................pps.h..p.hp.h.l...hhts.t..............l.l...pu..spssphh.......................p...........p.pshGs............lp..a.a..p.l...........t...................N....G.....sls.th...lplthPht.h....ps....t..............lla.hhph...........................t.hp..Cp.................t...p..th................................................................................................................................................................................................................................................................................................................... 0 114 162 355 +8274 PF08442 ATP-grasp_2 ATP-grasp domain Bateman A anon Bateman A Domain \N 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.14 0.71 -4.94 22 4398 2012-10-10 13:17:03 2005-08-23 14:07:01 5 18 3546 33 1434 3767 3414 194.30 40 47.81 CHANGED sLHEYQuKclhp+aGlsVPpGhVAposE-ApchApcluspshVlKAQVhsGGRGKA.........GGV+lspos-EstchAcchLGppLhThQTs.tGphVpcVhlp-tssIp+EhYlulllDRssppslllASscGGh-IEEVAtcsPEtIhKhslDsthGlpsapARclAhphGhps.c.hppsschlhpLYclFhcpDAohlEINPLVh ............................................................................slHEYQu.K..p.l.h.tp.......a.........G...l.s.ls.pG......hs...s.....to..s..c......EA..............p....s.....A....p....c......l........u...............s....s.........s............h..V..VK...u...Q.l.+..A.GG.RG.Ku.................................GGVKl.s..c....o..h..-.-s.c.t...h..A.pp....h.L..G..p.pL...l....T.h.QT........s.s..p......Gp.....V..ppl...h....l.E..p...s.s........s....I.........t.......+.....E...hYluh.ll.D..R...usp.p..sshhuSs..E.GGh..-.IE...c.VAp...c...s...P.-t.I..h.Kh.s........l.D...P...h....s.G.h.p.s...h..p..u.R.c..l.A.h.p.l....G..l.s..s...p......h...pphschhhsLap.hF..h.-..p..D.h.sl..l..E..INPLl............................................................................................................................................................. 0 451 852 1189 +8275 PF08443 RimK RimK-like ATP-grasp domain Bateman A anon Bateman A Domain This ATP-grasp domain is found in the ribosomal S6 modification enzyme RimK [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.07 0.71 -4.82 5 2606 2012-10-10 13:17:03 2005-08-23 14:07:23 6 34 1803 4 917 10880 6158 177.30 31 43.97 CHANGED AcDKh+ShpLLAKps........IslPsouLuau.-cApchIEclt...uaPlVlKslaGSpGltVhLAcccQuLcull-uh+.hsssI....LlQEFIccuss...cDIRslVVGsEVVuAlcRhuc-G-FRoNLaRGGsuEshslosEEcEIAIKAucAhuLslsGVD.Ilcoc+GLLVhEVNuSPGLcshp+soGINIAtKll-aI ............................................................................................................................................psKhhshplL..tpp.u.................l.s...h......P....h....T....h....h.....s.........t.....s..................c.....s....h....t....c....h...l..c.h.l..........uh.P...lV.lK.....h......h......p......G......o.....p.......G...h.....G....V...h......l.....s......c...s...........p.......p...........s........h.....p.......s......l.....l......c......s.......h.......h......t......h......p......s...t...l...........................ll.Q..E...al.....p..p..s..t.s.............tDl...R.....s..h..V.........l....G..........s...........c.........l........l.........u......A.........h........p........R......p........u........t......c.......G...........c.......a..........+........s........N.......h......c........p...........G.........G....s........s.............p..........h.........h.........p..............l.........o........s..........p........t.....c.....c......l........A...........l.....+........A.....u....c.......s......h.......G.....l.......s......l........s.....G.........V.....D.....l........l.......c.................s.......s.......c.......G........s.......h.....V.h.EV.N..u...s.....P..u.....h..c.u...h...p....t.s.s..s...h...s.l.u.t..hht........................................................................................................................................ 0 288 540 763 +8276 PF08444 Gly_acyl_tr_C Aralkyl acyl-CoA:amino acid N-acyltransferase, C-terminal region Fenech M, Moxon SJ anon Pfam-B_7828 (release 9.0) Family This family features the C-terminal region of several mammalian specific aralkyl acyl-CoA:amino acid N-acyltransferase (glycine N-acyltransferase) proteins EC:2.3.1.13. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.01 0.72 -3.97 8 176 2012-10-02 22:59:21 2005-08-23 15:35:18 5 3 37 0 93 173 2 87.10 38 30.37 CHANGED hClLGPEGTPVSWsLMDQTGElRMuGTLPcYRtQGLls+VhappsptLcKhGFPhYsHVDcsNpshQ+MStsLsHlshPCsWNQWNCsP .......................hClLsPEGsPVSWslMDQouEhpMuYTlP-YRp.pGhhphlhhp.hsptLpppGa.P.h.YspVtcsNptsh+hhpslta..h...h.P...Cp.WppW.hsP.............. 0 7 9 26 +8277 PF08445 FR47 FR47-like protein Fenech M anon Pfam-B_71946 (release 17.0) Family The members of this family are similar to the C-terminal region of the D. melanogaster hypothetical protein FR47 (Swiss:Q9VR51). This protein has been found to consist of two N-acyltransferase-like domains swapped with the C-terminal strands. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.60 0.72 -4.23 9 870 2012-10-02 22:59:21 2005-08-23 15:35:46 5 14 549 3 377 18220 2313 82.70 21 31.53 CHANGED slGlhct..chsphsuWhl+..s.Gtluulpshsca+p+GLGshLspuluctIsp.cGpsshshlsssNssupplac+LGFphh...pshal ...............................................................................h.................h....s.......p.....l......s......s....l......h...s...h.s.c..aR...t+..G..h....u....s.t....l....s....t........s....l....s.....p..........p...............l....h.........p......p...........G.....c.......p...........s.......h.......l...h.....s...s....s....s..N...s.......s...u...t....p..l..Y.p..+..l.GFpth...........hhh............................................ 0 141 246 325 +8278 PF08446 PAS_2 PAS fold Bateman A anon Pfam-B_437 (Release 18.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 21.40 14.00 21.40 14.80 21.30 13.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.44 0.72 -3.60 41 1308 2012-10-04 01:10:46 2005-08-24 15:19:37 6 52 760 31 297 1415 19 101.80 37 12.43 CHANGED shhptIQpsGhIQPaGslLAl.-Es....shplluhS-Nss-hLsls................spp........................lGpcl+sLhsssusssLccAhsss-lohlNPlhlp.s+s...su+sFhAllHR....h-..sslll-LEPs ..........s..p.Iphs.shIQPaGshlAl...-...-..t.........s.hpllt.h.SENs.p.hLshs................tt.............................................lG.pslc...s...L.h...s..s.s..s......s.h...t.L.p..p....A.h.s.t....t..c...l....s....h....hs.Pl...h.hp....scs.........ouK...P..F..aAIlHR...........hs.....sulllDhEPs.......................................................... 0 79 167 233 +8279 PF08447 PAS_3 PAS fold Bateman A anon Pfam-B_64 (Release 18.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.00 0.72 -3.89 122 12998 2012-10-04 01:10:46 2005-08-24 15:19:44 6 2702 2306 12 5490 23426 1576 87.10 19 13.27 CHANGED hlhhss.ph..tclhGass.p-h..hst...hhphlHP-Dhtthhpshpp......tht.pst.hpt.-aRhhp.psGphh...Wlpspspsht.spsGpshphhGsh .......................................h..hhst..ph....hpl..h.G...h...s....p....c......h..................s.................................h..........h..p..h.....l...H...P...-..D......h...p...t...s..t..p...s.h.......pp.............thp..ps....p...s........a..p.......h....c......a..R...l...h...........p....p..........c...G........p...h.p.........Wlp.sp.u.psh..h...s...pp.G.p..h.thhG....................................... 0 1661 3542 4558 +8280 PF08448 PAS_4 PAS fold Bateman A anon Pfam-B_493 (Release 18.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 23.10 23.10 23.10 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.00 0.72 -4.02 49 13956 2012-10-04 01:10:46 2005-08-24 15:20:11 5 2601 2465 26 5614 38008 2799 109.70 16 17.36 CHANGED hcshssslhhh....Dt-hphhhsNts......htp.hhthsspphhGcsht..-..hhst.......ttsphppthpcshpsppshphhthhth.......ssp.chhphph..hPlhs.pGp.hhuhhhhspDlTpppp ......................................................................pthss.slh...hh.......D...t.....c......t.....p...h...h..h..s..Nps...................htp....h.....h.....u......h......s.....t......p........p....h......l.......G...c.s.hh.............-.......l.h...s.tt..........................t.t..t....h.....t....p....t......h......p.......p......s.......h........p........s........t.....p......s......h.....p...h...p...t..h.hhh........................ss.p..t...p...h..h...p..h..ph............hP...l.....h.........s.......t.......p.........G.........p....h..........h.......u...l.l..s.h..spDlTcpt............................................................................. 0 1870 3885 4919 +8281 PF08449 UAA UAA transporter family Bateman A anon Pfam-B_606 (release 18.0) Family This family includes transporters with a specificity for UDP-N-acetylglucosamine [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.61 0.70 -5.50 23 1443 2012-10-02 19:55:49 2005-08-25 11:28:56 6 22 332 0 997 3957 210 274.70 19 77.88 CHANGED hlhshuGlasuhhs.ulh.Ehlhptp.us..............uthlTFsQhlhhshhuhlhthhhp............htppphPh+pYhlhshhhhhssshsNtuLpa.lshPspllh+SuphIssMlhGhll..hpK+YshhcYhushhlolGlhlholhsupsssspptpt.p...............GlhlLshuLhhsuhhushQEpla+pYstss.........pEhlFYophhuhshhhlhhh......pshhhpuhthhh......................phPp.hhhaLlh.slspals.phVahhhsphuuLTsollsTlRKhlSlllSllhFspshohtphlGshlVFhGhhlhshstpptp .........................................................................................................................................................................h......h..h........s...h.h.....c..h..l....hp....t...........................................s.hls..h....h.p...h..h.....h....h..h..h.....h..u.h..h..h.h....h.ht........................................................tt.hs....h.......h......t....a.....h....h.....h....u....h........h....h.....h....h....s.........h....h....s....p....t......u...L....p....a.....l.s...h...Ps................h...lh..Ku..sp..h.lsl.....h.l.h.u.hll.............ht...+..c..Ys.h.h.c...h.h.....s.sh.....h.ls.hGlhl........h.....s...h..t.ss....p.t...t.t.t....t.t.t.t........................................................................G.hh.h.l..h.h...s..l...h...h....s..u.........h....h.......sh..h.p.......-.p........h..h...p...t...h....t...h..p.s..........................hp.h.h..h.h..s...s....h....h....u..h........hh..h.h...h.h..hh..........................s.t.h..h..p..s..h..t..h.h.....................................................................ph.st....hh.......h.......l....h...h....h....s...h....s..........s....h..h......u........h....h...l......h...h....h....h....t.....h......u......u.l.s.h.shl.....h.s......h......R.K.h....ho.h.l.h.S.........h..l....h....a........s..p.....s...h....o.h..h..t.h..h..uhh.l.lFhuhhh.sh......t...................................................................................................................................................................... 0 376 570 831 +8282 PF08450 SGL SMP-30/Gluconolaconase/LRE-like region Finn RD, Fenech M anon Pfam-B_3630 (release 7.0) Family This family describes a region that is found in proteins expressed by a variety of eukaryotic and prokaryotic species. These proteins include various enzymes, such as senescence marker protein 30 (SMP-30, Swiss:Q15493), gluconolactonase (Swiss:Q01578) and luciferin-regenerating enzyme (LRE, Swiss:Q86DU5). SMP-30 is known to hydrolyse diisopropyl phosphorofluoridate in the liver, and has been noted as having sequence similarity, in the region described in this family, with PON1 (Swiss:P52430) and LRE [1]. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.67 0.70 -5.00 50 3454 2012-10-05 17:30:43 2005-08-26 09:32:17 7 90 1667 66 1425 4351 2023 232.20 22 68.45 CHANGED LGEGPhWctp...........ppsLaWV......DIhstplaphsssssppp.....................hchss..........hlusls..h...ps..spllluhppul......................hlh..c.......hss........s....phphlsph..........sp.spsRhNDGpsDspGp.hahGoMshsttsst...........GsLYRlsss....tp.....lphhhss.lslsNGluaSsDspthYa.....sDohspp..lhta-hDhssu.lss++sa.hchppts......t....PDGhslDu.-GslWsAha.....suu......plh+asPp.Gc.llpplplP.sp.psTsssF.GGschssLalToAp ......................................................................................................................................................................................................................sEuPha...............p.tt.L......h..as................D.....l............t....t....p.....lh..p....h.....p.....t..s..t..t.....t.h.....................................................h.t..hst...................suh..hh.........tp.........sp.h...lh..s..........t.t.sh......................................................hhh.......s..............t.s..............................t.......ph..p...h.lhs..........................st.thsp..N...D.s.....h.s....c....s................p.G..p....ha.hs.s.....s.h...t...t.....t.tsh............................................utl.ap.lsss......tp..........................l.p.h.l..h...s.s......l..s.h...s.........N..G............ls.a..S..s....D....t..p.......hh..Yh.......................s-..o............h.....s.....pp...............lh.ta......s......h.....s.......p..s.......s..........l...s....s..t......c.hh....hp....hsttt..................................................PD..Gh.s....l......Ds...cG...s.l.a.s.A..h.h...........................su.s..........................tl....h..h..a....s.....P......p....G.......p.......h...l....tp..l.....t......lP....s.....t...t.....s.....s...shsF...u............G.....phppLalos..t................................................................................................. 0 451 825 1164 +8283 PF08451 A_deaminase_N Adenosine/AMP deaminase N-terminal Wuster A anon Pfam-B_3145 (release 18.0) Family This domain is found to the N-terminus of the Adenosine/AMP deaminase domain (Pfam:PF00962) in metazoan proteins such as the Cat eye syndrome critical region protein 1 and its homologues. 21.30 21.30 22.00 21.80 21.00 20.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.98 0.72 -3.96 32 240 2009-01-15 18:05:59 2005-08-26 09:39:06 6 5 76 4 102 235 0 85.10 33 19.39 CHANGED hLhhshhhs.t.........................................psapppRstlhptEpthplGuclhLspcEppsNphlMp.lKp..cElpcuhhsstpFsPuhHFFcshshIcp.SslFpll ...............................................h.......................................................h.p.Rpthh.p.Ep.hthGs..pl.LsttEttsNphLMs.lKp..pElpcG...............hhss.p....FsPuhHFFcshshIcp.SslFphl..... 0 26 35 71 +8284 PF08452 DNAP_B_exo_N DNA polymerase family B exonuclease domain, N-terminal Wuster A anon Pfam-B_3196 (release 18.0) Family This domain is found in viral DNA polymerases to the N-terminus of DNA polymerase family B exonuclease domains (Pfam:PF03104). 21.00 21.00 21.00 34.50 20.40 17.20 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.58 0.72 -6.73 0.72 -4.67 6 88 2009-01-15 18:05:59 2005-08-26 09:45:14 5 2 48 0 0 82 0 22.10 77 2.21 CHANGED M-I+ClNWFEs+G.Ep+FLYLKA MDVRCINWFES+G.ENRFLYLKS 0 0 0 0 +8285 PF08453 Peptidase_M9_N Peptidase family M9 N-terminal Wuster A anon Pfam-B_4156 (release 18.0) Family This domain is found in microbial collagenase metalloproteases to the N-terminus of Pfam:PF01752. 23.90 23.90 23.90 27.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.84 0.71 -4.96 34 561 2009-09-11 12:23:32 2005-08-26 09:47:51 5 11 300 3 58 499 2 179.60 32 22.09 CHANGED hs.hspLsphs.pcLlstlps.hchsslssLFphsssstphhhscs+hpslhstLpppupsYsuss..u+ul.sLsEhLRAuaYltaYs..cplsthsst.hp.chhsulpAhhpNPsFh.ss..............cpQspllpuhshlls..NspppstslstshslLppaN...cshsp.hp.hs..........thsslaplhtGhpas.phht............hhtt ..........................hs.hu-LsphssppLs..chlss...hs.a...p...plssLFphspsshs.hh.scsphpslhstLsppupsYTt-s...u+slpshsEhLRuuaYlta.s..scls.hspt.hp.chhsulcshhpNsshhh.ss..............t-QspVl..suhupLIs..NA.us-s-slssshplLcpaN...cshsphspphs..........tssul.aslMpGhsashp.hh..................................................................... 0 19 31 45 +8286 PF08454 RIH_assoc RyR and IP3R Homology associated Wuster A anon Pfam-B_4135 (release 18.0) Family This eukaryotic domain is found in ryanodine receptors (RyR) and inositol 1,4,5-trisphosphate receptors (IP3R) which together form a superfamily of homotetrameric ligand-gated intracellular Ca2+ channels [1]. There seems to be no known function for this domain [2]. Also see the IP3-binding domain Pfam:PF01365 and Pfam:PF02815. 22.10 22.10 24.00 22.30 21.80 21.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.60 0.72 -4.44 43 718 2009-01-15 18:05:59 2005-08-26 09:51:09 6 72 128 0 390 593 1 120.10 46 3.81 CHANGED ppp...tppphcl..lppllRhLQLhCEGHNhshQNYlRpQsssc.....sohNllppslclLpsh...........................hh.ths....ppshclhhpsh-oLoEhlQGPCppNQ.sls..cophh-hsssll..........pphp .......................s....ts.ph..hpslhRFLQLLC.EsHN...p..DhQ.NaLRsQss..p.......Ts.hNllhpTlpaL.plpt.....................................Sspss..ls.hGhh......p+sls.lhpQshpoLTEYhQGPCptNQ......pslA..cSphhDhlsuhl....p................................................................. 0 162 186 280 +8287 PF08455 SNF2_assoc Bacterial SNF2 helicase associated Wuster A anon Pfam-B_3199 (release 18.0) Family This domain is found in bacterial proteins of the SWF/SNF/SWI helicase family to the N-terminus of the SNF2 family N-terminal domain (Pfam:PF00176) and together with the Helicase conserved C-terminal domain (Pfam:PF00271). The function of the domain is not clear [1]. 23.60 23.60 23.60 23.70 23.00 23.20 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.25 0.70 -5.89 42 1037 2009-01-15 18:05:59 2005-08-26 09:53:56 5 6 932 0 142 780 18 354.50 22 33.95 CHANGED u+hlhlssshhcchlpll.ptpsh......th.h.shp.hthplhc.pchP.lp....Fslpcppp.ph.Lphppt.sh..lh.stphhhhpsslYhlspcppphltslh.phht...spppplpaspcppsphlppllPtLpplG..p.l.lstpl..cphhhpshpschahDphcp.plhsplpacY.G.shplsshpchp....................................phhlhRDhc+Epclhphhcph...............................sFttstpphh.h.ts-cplapFhpptlspLpphG.cVahocsh+slhhhpssp..stlclppp..shL-hsFchssIsppElpplLpulpcpccYY+LcsGphlsL-pcchpchpphlppLphptpchppt.lplst.cuhhlsshL...pshphlphscsFccllpclpp.-chpaplP ..........................................................................................................................................................................................................................+hlhhP.shhpphlphl.th.ph...........hp.pphp.hh.hp.hc...sphs..ht....Fplpch....p...p....h....plpl..pp......php...hh..pht..h.hh..h....ps....p..lY.....h.....lst.pp..hph..l....p.....t.lh....h.ht............tp.t.pp...pl.h...spschsc.hupsLshh...tphG...p..........sh....c..ph.t...h....p...s..h....p....spFah...Dp.............t.....c.....s..cl.phcl..pacY..G...sh.plsshpchp....................................ph.hspDhcpE...pclhp.lh.pt.h...............................uFp.t.s.............p....u...ah..h.....h..p.t-p.lYp.FhpchlPthc..plG.......c..V..hhoc.p...lcplh..hh.s.ssp.....tlplspp..sh..L-....l....p..F..Dhp.....s.....I.s.p.p.ElcpslpuLh.p.ppcaahhpsGpll.h.-....c..-.hp......clpphLpcL......thp.tphps.t.lplst.puh.lsphh.....cp....tsplphs...ppFppLspclppP-c..h............................................................ 0 54 97 114 +8288 PF08456 Vmethyltransf_C Viral methyltransferase C-terminal Wuster A anon Pfam-B_2153 (release 18.0) Family This domain is found to the C-terminus of the viral methyltransferase domain (Pfam:PF01660) in single-stranded-RNA positive-strand viruses with no DNA stage in the Virgaviridae family. 25.00 25.00 64.60 64.30 19.50 19.40 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.50 0.70 -4.92 12 48 2009-01-15 18:05:59 2005-08-26 09:55:58 5 3 14 0 0 53 0 197.10 39 13.47 CHANGED +VLptLRhph+-plshpslhpsshtKlFGpVShaQ+uL+saApWluas.aGsshlphcslPLYVEIpDRl+LWppts..spsFshshpDl--KlchacEhE+E+cclScplsp-K.....................h....s.csssscssschtthpc.tt+sshp-hhtG......c.......................alppWhpp.pspFshsppps........hhthh..hthlptlh-hhhPshphuslh.s--psptlp .pVhphLRlphK-sVshtsLhpsAFhKVFGpVShaQ+AL+SFApWluYs.HGossIchpslPLYVEIsDRlKLWpptu.PspsFhLsh-DL-EKh+LaE.pE+E+pclSc+IlppK......hG...................pl...tst-htsschssch..hpcsRt+sshtchhEG......cVsTs...................hlppWsEp.-DHFshstpss......pshh.ah..h.hh+hl.-shhssh.Fuslh.ssDpstthc................ 0 0 0 0 +8289 PF08457 Sfi1 Sfi1 spindle body protein Mistry J, Wood V anon Pfam-B_54813 (release 17.0) Family This is a family of fungal spindle pole body proteins that play a role in spindle body duplication. They contain binding sites for calmodulin-like proteins called centrins [1] which are present in microtubule-organising centres. 21.10 21.10 21.30 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 576 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.56 0.70 -13.25 0.70 -6.12 14 195 2009-01-15 18:05:59 2005-08-26 10:05:18 5 14 148 3 150 209 5 386.70 14 42.23 CHANGED psh+shlptp.....sscTEcaap+LEcRAuRAR-laLlsKuFs+Wtphup-Elp+TssARRHlLp......h+hFpuWRplT.......sVNEhKup...+FhLp+saptW+p+scc..hcphcppAsphppcclh+psahpWahphst+c............Aspah-apl+++..............uL.aWhcKh+s....sc-clpthcshpc+hslspshphWpp+opsltstppcspshpcpplhpptLppW+hpApLtPhtpplssth-scllppuaspWtcchphhppAp-hcRt+lh+suaTsWpct.LRhpsLpsRh-..-..Rlhhcshh+WhLtpRhpLhQ+l+-pRlppssFssalsshpcphscL.cps-htcchcscclL+upLtpW+sphs.p+chEhtAsthatsRlhppslstW+s+hp+hsplpsa....AcsAchaFlsppslKpW+tAs.pot+cRRppuhtphRRphKhsL.AtcshspW+s+spchtsl-ppAhthtpp+shphshchlcpW+ppshcthpphcpA-.hatcplhpchLh+Wt-pltphpphpppAsth.c.cshtpssstL+KhShRhhp.l+uppcsAcsh+ERp.R+psRuhFctWhp+sc ....................................h........................................................................................................................................................................................................................................................................................................................................h..h.....................................h.......t......h..hhthh.......t.............h.........................hh....h.t........W..p...................h.....t.....t.thh.t.ht.Wt..........................t................p.........hh..........ht.......a......................p................................h...a.........ch.......h.thtp.p...h..h....pthh.....tt..ah....tt.................h....ph.t..tt.......h..t.t...hhpp....thhpthhthWp..p...tht........p...p...hp..........h......u..hh...t.......p...h......hh.thpt....t.h..t.p.ht.p.h...p.h...........ut.p.......hp.....hh...hhpthhp.....W...h..thcp..+ht...phh..t....hppt...h.p...h.pl...tphhph.........Whp....t....h....t.t....ht...hp.p....p.t..t..h..t........h..hhh.th....hthWttth...........t......t..........t........s.....h....t...p....h...h.t.h......hWh.t..h..th..........t.........h...............h.t.ht.h................................................................................................ 0 59 93 133 +8290 PF08458 PH_2 Plant pleckstrin homology-like region Fenech M anon Pfam-B_7298 (release 8.0) Family This family describes a pleckstrin homology (PH)-like region found in several plant proteins of unknown function. 21.80 21.80 21.90 22.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.36 0.72 -4.12 9 142 2012-10-04 00:02:25 2005-08-26 10:18:09 5 4 18 0 92 132 0 106.40 39 26.64 CHANGED sELlpRsRpGsL+hKpVuVYINKpuQVhLKLKSKHlGGAFoKKpKslVhuVscplsAW..sG+chhpsu...........chhhFGL+Tu.pGllEFcCcSphcpphWspuVpsLLp.sustc ...................s-Lhp+T+cGsL+h+hVSVYI.N.......+p.u...pVhLKhKS+H.luGshoKKKKsVVhsVpp-lsAW...P..GRc...hh-su..........ccctY.FGL+Ts.pGl.lEF-.Ccs.p.tc.hchWspulppLLphss...p.......... 0 11 50 71 +8291 PF08459 UvrC_HhH_N UvrC Helix-hairpin-helix N-terminal Wuster A anon Pfam-B_288 (release 18.0) Family This domain is found in the C subunits of the bacterial and archaeal UvrABC system which catalyses nucleotide excision repair in a multi-step process. UvrC catalyses the first incision on the fourth or fifth phosphodiester bond 3' and on the eighth phosphodiester bond 5' from the damage that is to be excised [1]. The domain described here is found to the N-terminus of a helix hairpin helix (Pfam:PF00633) motif and also co-occurs with the Pfam:PF01541 catalytic domain which is found at the N-terminus of the same proteins. 21.30 21.30 22.80 24.40 20.50 19.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.85 0.71 -4.68 110 4453 2012-10-02 11:25:59 2005-08-31 14:14:15 6 20 4381 10 982 3406 2670 161.00 40 26.34 CHANGED LpchLsLs.ph....PpRIEsaDlSHlpGsssVuuhVVFpsGtspKppYR+apIc....h.............stG......DDauuM+EVlpRRa..p+hhcpp........................................hPDLllIDGG+GQlssAhcllc.cLGl..s..ls.llGlAKspccpst...........lhhss......tcsltLspss.sLhllQ+lRDEuHRFAI .....................................tphLtLs.t.h.....PhRIEsFDhSHh.t.G.ss.s.VuuhVVFps..u.tPpKs-YR+apI+....sl............................................sss.......DDYAuM+EVlpRRa..s+hlc.-.p...........................................................................................................................hPDLIlIDGG+GQlstAppVlp...EL.Gl......s......ls..lhGlAKssc+pss.........................lhhs..s.......t.tcsl....tLs.p.sS.sLaLlQ+lRDEuHRFAI............................ 0 335 662 843 +8292 PF08460 SH3_5 Bacterial SH3 domain Bateman A anon Pfam-B_1108 (Release 18.0) Domain \N 20.70 20.70 20.70 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.11 0.72 -4.06 15 1610 2012-10-02 18:48:24 2005-08-31 14:30:18 5 87 699 2 109 1038 6 64.90 33 24.53 CHANGED .thspsGoahhssppsl...+ssPplouPsthhhppGsplhYDpV.hptDGasWluYsshsGsRtYlPl .............s..hspsGsahhs..p..pssl...+spPpsu.u.s.l...h...s....ha.sGppl.p.Y.Dph.hptDGYp.......WloYh.u.hs.G...p...RRYls................. 1 25 50 77 +8293 PF08462 Carmo_coat_C Carmovirus coat protein Wuster A anon Pfam-B_4180 (release 18.0) Family This domain is found to the C-terminus of the Pfam:PF00729 domain in Carmoviruses. 20.80 20.80 20.80 21.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.40 0.72 -4.09 8 62 2009-01-15 18:05:59 2005-08-31 16:17:01 5 1 4 3 0 62 0 98.80 70 28.49 CHANGED QASNDKVSDGPTYVVPSVNGNELQLRVVAAGKWCIIVRGTVEGGFTKPTLIGPGISGDVDYESARPIAICELVTQMEGQILKITKTSAEQPLQWVVYRM ........QASNDKVSDGPTYVVPSVNGNELQLRVVAAGKWCIIVRGTVEGGFTKPTLlGPGISGsVDYESA.RPIAlCELVT.QMEGQILpITKTSAEQPLQWVVYRM. 0 0 0 0 +8294 PF08463 EcoEI_R_C EcoEI R protein C-terminal Wuster A anon Pfam-B_4136 (release 18.0) Family The restriction enzyme EcoEI recognises 5'-GAGN(7)ATGC-3' and is composed of the three proteins R, M, and S. The domain described here is found at the C-terminus of the R protein (HsdR) which is required for both nuclease and ATPase activity [1,2]. 22.80 22.80 22.80 23.20 22.30 22.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.01 0.71 -4.39 125 1424 2009-01-15 18:05:59 2005-08-31 16:20:10 5 18 1139 0 251 1037 195 157.50 23 17.71 CHANGED pchpca.tthpp..hlppp.hsshssLpplhss....pphppp.LccLppthh..pt.....h..hstp...pl.p.....................t..sslhsll+plh..uh.-....hhshc-+spp.th..ppah......tpts.....hss.pQtcaLch.............................lhcpht..ppGhh-.....cshphss..Fpp..Gs.tplt..hFt........plpphlpclpctlh ............................................phpcY...scc..hltpp..hss.sshpplaps.....tchppt.lccLpphhh..ct......ltppt..shpp................................thchhchlpcl..uh..s.........hshpER...spp.th....spah...................................sphs.....hsppthphLch.............................llcphh-p.Glh-....hcsLphtP...F.pph..Gs..tl...phFsst.p........ph.pslpclpptl........................ 0 75 171 214 +8295 PF08464 Gemini_AC4_5_2 Geminivirus AC4/5 conserved region Wuster A anon Pfam-B_4177 (release 18.0) Family This domain is found in replication initiator (Rep) associated proteins such as AC5 in the Geminivirus/Begomovirus. 25.00 25.00 56.30 55.40 22.80 21.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -7.90 0.72 -4.23 17 174 2009-01-15 18:05:59 2005-08-31 16:21:10 5 2 91 0 0 174 0 43.00 59 26.82 CHANGED MpsIlshhKRLcLThAFTu.tpIhuSlHsVasGLsVHtPVsPs ..MtsIlsthKRLcLThAFTustcIhsSlHsVHsGhuVHGPVsPs. 0 0 0 0 +8296 PF08465 Herpes_TK_C Thymidine kinase from Herpesvirus C-terminal Wuster A anon Pfam-B_4030 (release 18.0) Family This domain is found towards the C terminus in Herpesvirus Thymidine kinases. 20.10 20.10 20.50 32.80 20.00 17.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.38 0.72 -4.44 12 34 2009-09-11 05:18:18 2005-08-31 16:21:50 5 2 26 0 0 33 0 32.70 48 5.80 CHANGED pFpcDlsGlWspIYsQlh+NsuIKs+hlsWsAL .pFpDDlsGhWocIYpQlhpNsAIKophlcWsuL.. 0 0 0 0 +8297 PF08466 IRK_N Inward rectifier potassium channel N-terminal Wuster A anon Pfam-B_4080 (release 18.0) Family This metazoan domain is found to the N-terminus of the Pfam:PF01007 domain in Inward rectifier potassium channels (KIR2 or IRK2). 25.00 25.00 28.60 27.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.06 0.72 -4.05 5 111 2009-01-15 18:05:59 2005-08-31 16:22:24 5 2 41 8 60 91 0 45.20 67 10.61 CHANGED MuuuRTNRYSIVSSEE-GL+LuTMuusNGFGNG..KVHTRR+CRSRF ......MuusRsNRYSIVS.S.EEDGh+LsTMu.....sA.NGFG.NG...KV....H.T.RppCRsRF.. 0 3 11 28 +8298 PF08467 Luteo_P1-P2 Luteovirus RNA polymerase P1-P2/replicase Wuster A anon Pfam-B_4011 (release 18.0) Family This domain is found in RNA-dependent RNA polymerase P1-P2 fusion/replicase proteins in plant Luteoviruses. 20.50 20.50 86.80 86.70 19.30 19.20 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.21 0.70 -5.65 4 151 2009-01-15 18:05:59 2005-08-31 16:23:28 5 2 10 0 0 154 0 332.90 77 58.24 CHANGED h.F-.LIsASA+sVKDFISaCYsRh+SlYYuhKRWLhElpGpFcAHDAFVsMCastMhsIE-FEtELAEEauptEsEVp.AcshhKhLVAptu...........sGspcuhsDF..ltuRuGsasPl..............uCcs+Spp..s+s-Kh.pLlccpclh...Es+ht+sY.cEhGcu.hspW.NsL.oRhphlKcstccttcNAKtAp+lss.hptsssIsDhhuhsEVspV-TG.ph.spKshpGEEhspspPhlc.VRRlK..sEstssApsaIpppI+.pN.pL.sus-lShATIsRYs.phsEchcLDlsSpThLhphAMh.VPlPpppDIctthllQSPsARplR-chsVLsSpsF .hFFElLIGASuKAVKDFISHCYSRLKSIYYSFKRWLMEISGQFKAHDAFVNMCFGHMADIEDFEAELAEEFAEREDEVEEARSLLKLLVAQKS............KoGVTE.AWTDF.FhKSRGGVYAPL.....................SCEPT+QELEsKSEKLE+LLEEQHQF...EVRAAKKYIKEKGRGFINCW.NDLRSRLRLVK-VKDEAKDNA+AAAKIGAEMFAPlslQDLYSFTEVKKVETGLhKEVlKEhNGEEEK+LEPIhE-VRSIKDTAEuRDAASTWITETVKLKNSTL.sADELSLATIARYVENVGDKFKLDIASKTYLKQsAoMSVPIPTsKDIKhKMVLQSPEARA+RERMDVLDSsGF 1 0 0 0 +8299 PF08468 MTS_N Methyltransferase small domain N-terminal Wuster A anon Pfam-B_4172 (release 18.0) Family This domain is found to the N-terminus of the methyltransferase small domain (Pfam:PF05175) in bacterial proteins [1]. 21.10 21.10 21.20 21.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.76 0.71 -4.38 19 926 2009-01-15 18:05:59 2005-08-31 16:25:02 6 2 917 1 125 547 57 151.90 52 45.12 CHANGED SpVlhRphchFps+pVLlAGplpDphPtpLsthspplpsao..aaas.ththptpsslphp..hsspts..t.sDhllhYWPKuKpEAca.LttLlupLshGpEIhlVGENRuGV+Ss-KhLssa.GplsKlDSARRCuLaas.plpppPp.FsLcsaa+pY ...................................................SEVLLRHuDpFppp+lLFAGDlpDDLPAcLc.ss..u..u..+.uc.s.p..........aHHap.sl.up.p.......h...s.-..ss+Fu........Ls.A.p...s..ss...s......u..ssDTLIYYWPKsKsEApFQLhsLLShLPsGs-IFVVGENRSGVRSAEpM.L.A.-Y.u.s.l.sKl.DSARRCGLYaG.+LE+p..Pt..F-h-paWtpY.................... 0 20 45 88 +8300 PF08469 NPHI_C Nucleoside triphosphatase I C-terminal Wuster A anon Pfam-B_4183 (release 18.0) Family This viral domain is found to the C-terminus of Poxvirus nucleoside triphosphatase phosphohydrolase I (NPH I, [1]) together with the helicase conserved C-terminal domain (Pfam:PF00271). 21.00 21.00 21.60 21.00 20.70 20.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.83 0.71 -4.31 13 67 2009-01-15 18:05:59 2005-08-31 16:26:02 5 4 49 0 1 57 7 146.10 57 24.20 CHANGED HhssP.ERRYVNVHFIIA+hosGcsoVD--LL-II+sKS+EFsQLF+VLKpoSIEWIasppKsFpPVDDEoGacsLhSRsl..D-sstosphh+ltpGpNIWYSpSschloIhKGFKscD.G+IYDs-Gsalpshs-N...Pll+IcssKLlYI ...................HVhTPPERRYVNVHFIhARLSNGhsTVDEDLh-IIpoKSK.EFsQLFRVhK+oSlEWI+sspK.DFSPlDsESGWcsLlSRuI..DhssKpshssKLl-GpNIWYSsSsRLhoIp+GFKssD.GRlYDsDGNaLpsMPDN...PlIKIHsGKLlYI......................................................................... 0 1 1 1 +8301 PF08470 NTNH_C Nontoxic nonhaemagglutinin C-terminal Wuster A anon Pfam-B_4024 (release 18.0) Family Bacteria of the Clostridium genus produce protein neurotoxins, which are complexes consisting of neurotoxin (NT), haemagglutinin (HA), nontoxic nonhaemagglutinin (NTNH), and RNA [1, 2]. The domain described here is found at the C-terminus of the NTNH component. 23.90 23.90 24.20 26.30 21.70 23.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.23 0.71 -4.61 7 75 2009-09-10 15:40:16 2005-08-31 16:27:13 5 3 29 2 3 60 1 162.30 68 14.08 CHANGED NYFssLNNSYIRDuscERLEYNKsYpLYNYVFP-sslhEVppNNNIYLSIpNpsNLNlpssKFKLlslssNKQYVQKWDEVIIsVLsspEKYlDISsENNRIQLVssKssA++hIlNNDIFhsNCLThuaNNKYlsLSh+spNYNWMICNssppIPKtAaLWILK ................NYFpsLNNSYIRDSscERLEYNKTYQLYNYVFs-pslhEVppNNNIYLoINNTNNLNlQuuKFKLlsIssNKQYVQK.....aDEs.IIslLDshEKYlDI.S.EsNRlQLlssKssAKKhIIsNDIFISNCLThoYNsKYlsLShKDcNYNWMICNNspplPKtuYLWhLK................. 0 1 3 3 +8302 PF08471 Ribonuc_red_2_N Class II vitamin B12-dependent ribonucleotide reductase Wuster A anon Pfam-B_4121 (release 18.0) Family This domain is found to the N-terminus of the ribonucleotide reductase barrel domain (Pfam:PF02867). It occurs in bacterial class II ribonucleotide reductase proteins which depend upon coenzyme B12 (deoxyadenosylcobalamine) [1]. 21.60 21.60 22.00 32.80 20.30 19.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.16 0.72 -3.90 47 480 2009-01-15 18:05:59 2005-08-31 16:31:44 5 13 471 0 161 391 512 99.30 52 8.97 CHANGED DGolVFc.cslEVPppWSQlAsDllAQKYFR..KsGlPt+..................t.t..........hssEsSh+QVhcRlsGsWshWG.hKsGYFso-sDAcsFaDEhtahLhpQ ....DGolVFc.cslEhPstWSpsAusIlApKYFR...psGsPs+...............................................EsSh+QlhDRlssTashhG.hK.sGYFs.S....--DApsFt-ELsahLspQ....... 0 64 112 136 +8303 PF08472 S6PP_C Sucrose-6-phosphate phosphohydrolase C-terminal Wuster A anon Pfam-B_4159 (release 18.0) Family This is the Sucrose-6-phosphate phosphohydrolase (S6PP or SPP) C-terminal domain [1] as found in in plant sucrose phosphatases. These enzymes irreversibly catalyse the last step in sucrose synthesis following the formation of Sucrose-6-Phosphate via sucrose-phosphate synthase (SPS). 20.20 20.20 21.40 21.80 19.90 19.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.88 0.71 -4.54 15 99 2009-01-15 18:05:59 2005-08-31 16:33:44 5 8 33 0 46 100 2 119.50 41 30.38 CHANGED GPNlSPRDl.DF.s..csKhE......shsPuc-VVKFYLFYEKWRRAEVEpS-ha.....lsslKulscPuGVhlHPSGlEpSL+ssIsu.L+sCYGDKQGKpFRlWVDpllsTplGSsoWLVKFDKWEhsG-ERpCClTTllLosK ...............................GPNlSPRD......l....-h.s........c.c...........shpPuctVV+FYlhaE+WRRuEV.po-th.....hphhKslscssGshlHPuGhEpsL+ssI-s.LtspYGDKpGKpaRhWVDplhssp..huossWL...V+FcpWEh.p.Gpth.sChoohhls.K............... 0 9 32 39 +8304 PF08473 VGCC_alpha2 Neuronal voltage-dependent calcium channel alpha 2acd Wuster A anon Pfam-B_4072 (release 18.0) Family This eukaryotic domain has been found in the neuronal voltage-dependent calcium channel (VGCC) alpha 2a, 2c, and 2d subunits.\ It is also found in other calcium channel alpha-2 delta subunits to the N-terminus of a Cache domain (Pfam:PF02743). 20.80 20.80 20.80 21.00 20.70 20.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.24 0.72 -4.21 2 161 2009-01-15 18:05:59 2005-08-31 16:34:57 6 7 49 0 67 128 0 91.60 59 9.28 CHANGED LDAEhEs-lKV-IRppMIDGEpuphpF+TLhKSQDERYIDKG.RTYTWoPVsssDYSLALVLPsYSh.YIKAplt-TITQA+........SEoL ......LDAELEs-.K.EIR+pMIDGcsG-+p.h+TLVKSp...DE.....RYIDcssRTYTWsPVpGTDY....S....L..uLVLPs.YShaYIpAp...lp-sIhQs+.h...................................... 0 5 12 32 +8305 PF08474 MYT1 Myelin transcription factor 1 Wuster A anon Pfam-B_4029 (release 18.0) Family This domain is found in the myelin transcription factor 1 (MYT1) of chordates. MYT1 contains C2HC zinc finger domains (Pfam:PF01530) and is expressed in developing neurons of the central nervous system [1] where it is involved in the selection of neuronal precursor cells [2]. 20.60 20.60 20.70 22.00 17.70 20.30 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.82 0.70 -4.89 3 273 2009-01-15 18:05:59 2005-08-31 17:44:20 6 25 37 0 104 204 0 181.60 45 24.52 CHANGED YRsNVAsTTPRANLAKELEKFSKVoFDYASFDAQVFGKRhlAPKlQTu-sSPKuaQcAKPFsKssSPcSSsoSSYV+SSSSssSu..GupspSTsptSSFDYoQDuEAAHM.AATAILNLSTRCREMP-NLSTKPQDL.soKusDIEVDENGTLDLSM+KpRhRDpuhPsoSSCooIsTPpSP.SPQ+p......SSulsNspsaQ.Lu-QDsWDlPlDYT..KP+RlcEEEsKEp-Psshs.upEsLEE++auGEsoIPSPKPKacpRK .....................a+ss.sssoPRusLuKE.EKauKssF-Y.sSaD....s....p....s...aGKR...hPh.l.p.s...pph.....p.....................................................................................................s.asYsps.EssHh.AAsAILNLSTRChE..h.sp.LSsKPQs.L....s+ss-...hEVDENGTLDLSMpKp+.t-.........ssss....ss.p..ss.pp..............hhsst..Q.hsc.t-tW-..hPlsYo..KspthpE.-.-...c...............-.....................p.......s.....................s...........................................................pt........h--pchstE.............................................. 1 4 12 39 +8306 PF08461 HTH_12 Ribonuclease R winged-helix domain Bateman A anon N-terminus of DUF128 family Domain This domain is found at the amino terminus of Ribonuclease R and a number of presumed transcriptional regulatory proteins from archaebacteria. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.09 0.72 -4.24 10 1050 2012-10-04 14:01:12 2005-08-31 18:07:20 5 14 1020 0 197 812 52 64.30 54 8.60 CHANGED lEILsILuEuccPlGAKhIApELcpR.GYcIGERAVRYHL+hLDEcG...LT++lGYu....GR.ITEKGl-EL .................E.FIL-aL...sc...+c...p...PA.SR-ELAsELplcsEEQl..EuLRRRLRA.MERDGQLVF..T.R....R.Q..C.Y.A................LPE+.LD.......................... 1 42 102 154 +8307 PF08475 Baculo_VP91_N Viral capsid protein 91 N-terminal Wuster A anon Pfam-B_4034 (release 18.0) Family This domain is found in Baculoviridae including the nucleopolyhedrovirus at the N-terminus of the viral capsid protein 91 (VP91) [1]. 21.80 21.80 159.30 152.90 19.40 17.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.18 0.71 -4.92 27 63 2009-01-15 18:05:59 2005-09-01 10:08:50 5 2 56 0 0 65 0 186.40 39 23.89 CHANGED LLLlsIlllllFhlhahhIhs-FsEssFssRLpVlpEYh+pss..u-pPlPssLuYVScV..ssshYhVThFsTpsLpphppplHDDphEhFsFlpQpFp......................ss.sspsRVpspssDsscFhl+uDDG..lphcC..PpstpFDss..cCVPlssC.scssGp.hPlTEchlDpLVhN++ss+p....t.sspphHPT .LLLlAIlllllFslhYlhIhs-FsEssFssRLpVlsEYh+RTN..A-pPhPcsLuYVS-V..spphYhVThFsTssLsslppolHDDphEpFsFlpQpFp......................ssssspsRVpsp.ssDsscFhl+GDDG..hphcC..PsstpFDtst.+CVPlPsCts+ssGp.YPlTEchlDsLVhN++ls+s.....t..ssspthHPT..... 0 0 0 0 +8308 PF08476 VD10_N Viral D10 N-terminal Wuster A anon Pfam-B_4155 (release 18.0) Family This domain is found on the N-terminus of the viral protein D10 (VD10) and the related MutT motif proteins [2]. The VD10 protein is probably essential for virus replication [1] and is often found to the N-terminus of a Pfam:PF00293 domain. 25.00 25.00 52.70 52.70 21.00 17.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.14 0.72 -4.49 7 51 2009-01-15 18:05:59 2005-09-01 10:13:46 5 2 38 0 0 39 0 44.80 58 18.60 CHANGED hpha.Sslhs.IhppNR+LoKTalhpDssQ+lpsTuFspQpLchh .MsaY+SSllSpIIKaNRRLuKohIhcDDSQhITLTAFVNQsLasH.. 0 0 0 0 +8309 PF08477 Miro Miro-like protein Fenech M anon Pfam-B_1154 (release 17.0) Family Mitochondrial Rho proteins (Miro-1, Swiss:Q8IXI2, and Miro-2, Swiss:Q8IXI1), are atypical Rho GTPases. They have a unique domain organisation, with tandem GTP-binding domains and two EF hand domains (Pfam:PF00036), that may bind calcium. They are also larger than classical small GTPases. It has been proposed that they are involved in mitochondrial homeostasis and apoptosis [1][2]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.36 0.71 -10.61 0.71 -3.61 17 1563 2012-10-05 12:31:09 2005-09-01 11:57:45 8 235 397 7 1022 40314 7022 116.40 20 16.79 CHANGED +lsllGctssGKoSllpphhttphs.............h-hpssshshp.....hhs.ssst...........h...-.stt.......hthp......hppusuhlllashsctpohpplp.hh.hlsphcp.t.t.lPllllusKhD .....................................................................+lhllG.s...t..u..sG...K.......o......o..L......l.p..p....h..h..s..t..s.hs....................................................................p..t..h...t...p..s...p...h...s....h.p................h.....h..p..s.p..pph...............................................l...l.h.....-....h....u....s.p...................................t.t.h...t...h.....t.................................hp.......t....s.....c.....s......l....l.....l......l.........a....s......h.........s........c........t................p........o.....h....p..........t.....l..............p.............h.........h..................h...........l.........................p.............h.........p...........t..............................................p.....l......P.....h..l..l.l.u.s+.D...................................................................................................... 0 351 546 791 +8310 PF08478 POTRA_1 POTRA domain, FtsQ-type Fenech M anon Pfam-B_1605 (release 7.0) Family FtsQ/DivIB bacterial division proteins (Pfam:PF03799) contain an N-terminal POTRA domain (for polypeptide-transport-associated domain). This is found in different types of proteins, usually associated with a transmembrane beta-barrel. FtsQ/DivIB may have chaperone-like roles, which has also been postulated for the POTRA domain in other contexts [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.83 0.72 -3.93 92 3670 2012-10-01 23:48:22 2005-09-01 14:28:48 5 6 3629 6 775 2508 1251 69.80 23 23.53 CHANGED hslcplplsG.sphlsppclhphhslph..tsshh..tlshsplppplpp.hPalcpspVp+p.aPs.plplplpE+ ........................lpplplp...G..s.....ph..sssp..clpps.h.thts.......ssshh...........ph...c...hsth.p..pplcp..hPa....lcpsp.Vc+p..aPs....plplclpEh............... 0 252 491 639 +8311 PF08479 POTRA_2 POTRA domain, ShlB-type Fenech M anon DOMO:DM07489; Family The POTRA domain (for polypeptide-transport-associated domain) is found towards the N-terminus of ShlB family proteins (Pfam:PF03865). ShlB is important in the secretion and activation of the haemolysin ShlA. It has been postulated that the POTRA domain has a chaperone-like function over ShlA; it may fold back into the C-terminal beta-barrel channel [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.15 0.72 -4.31 60 1933 2012-10-01 23:48:22 2005-09-01 14:29:39 6 19 922 6 495 1965 284 75.00 24 13.68 CHANGED aslpplp.lpG....sp.....hls.t.plpplhpshhu+slshsslspLspploshYlp+GYlToRshl.ssQ..sl.ssGt.LplpVlEG ................................................lpplp..lpG........sp...........hstt...tl...p....phh..p....s...h...h..Gc.slshpslp....t....lh...ptlsphhh.s+GYlT...ocshl...P...tQ.....sl..ps.G.h..lplpll.G........................ 2 80 250 391 +8312 PF08480 Disaggr_assoc Disaggregatase related Wuster A anon Pfam-B_4000 (release 18.0) Family This domain is found in disaggregatases and several hypothetical proteins of the archaeal genus Methanosarcina. Disaggregatases cause aggregates to separate into single cells [1] and contain parallel beta-helix repeats. Also see Pfam:PF06848. 20.90 20.90 21.30 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.59 0.71 -4.87 21 34 2012-10-02 14:50:22 2005-09-02 16:30:26 5 12 6 0 31 42 2 190.60 46 24.16 CHANGED MsDIEIYNNsIasThusGIWlhGYs.uuYoKspAtsVaIHHNhFYsTGTNsuhsWlGGIVssGF.sTLIENNVFDGsYsAAIs.phhspt.....phuPsG..sGYsThVRNNIIsNTh......spusuGoGYGlhNhL.spTHoFlLpNNClYNNuuGsYtsss.SsoDIhsDPhas-pppHDYHLKS.sG+WsGpsWVpDtlsSPCIDAG ........................MsDIEIYNNhIasTaGPGIWlhGht...suYsKsputsVaIHHNhFYsTGTNss...l.....pWlGGIl..sS.GFhs...oLIENNVFDGsYpAAls.phYsst..............thuPsG..oGYT.ThVRNNIIsNTh.R....ppsssGoGYGlh....NhL.scoHsFlLcNNClYNNuuGsYpNss.SsoDI.sDPhFssppp+DYHL+SpsGpWssptWhpc.h.SPCIDsG................................ 0 10 10 10 +8313 PF08481 GBS_Bsp-like GBS Bsp-like repeat Wuster A anon Pfam-B_2122 (release 18.0) Family This domain is found as a repeat in a number of Streptococcus proteins including some hypothetical proteins and Bsp. Bsp is a protein of group B Streptococcus (GBS) which might control cell morphology [1]. 20.00 20.00 20.20 20.10 19.70 19.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.53 0.72 -4.11 50 1326 2009-01-15 18:05:59 2005-09-02 16:33:46 5 104 156 0 188 1240 1 91.80 34 44.51 CHANGED lslpststpsGsaclhlssl.sssslpsVtlPlWS-pNsQDDlpWYsAs+pssGoapsslchssHpsp.GpYplHlY...shssGphhGlsuTs.hpV ..............................lp.pstpsusa-lhlosl...s..s..p.s.lppVplPsWScpNGQDDlh..WYsAs.+....ps...cGo.YpsslphssHcsptG...pYplH..lY..hhpssGphhG.lsuTshp................................ 1 19 53 118 +8314 PF08482 HrpB_C ATP-dependent helicase C-terminal Wuster A anon Pfam-B_2170 (release 18.0) Family This domain is found near the C-terminus of bacterial ATP-dependent helicases such as HrpB. 25.00 25.00 34.60 33.70 23.40 22.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.65 0.71 -4.02 136 1495 2009-01-15 18:05:59 2005-09-02 16:40:32 5 8 1469 0 345 1253 181 132.00 52 16.94 CHANGED WLtPaLsGlpshsplpplclhsuLpuhL.sWs..ppppL-phsPs+hssPoGs+lsI-Y..............ssst..PsLuVRLQEhFGhspsPplu........sG+lPlhlcLLSPAtRPlQlTpDLsuFWpuoY.t-V+KEMRGRYPKHsWP-DP ..............................WLhPah...s...GlpsLp.sL.psl-lhpALcuLL....sWs...hpQ..+LDp.hPsHasVPoGS+.lsIcY.......................................p.p.-s.....sPsLAVRhQEhFG.s.ssPslA........pG.RVPLlLcLLSPApRPLQlTcDLuuFWpG.uY.t-VpKEM+GRYPKHsWPDDP............. 0 98 201 282 +8315 PF08483 IstB_IS21_ATP IstB_N; IstB-like ATP binding N-terminal Wuster A anon Pfam-B_3188 (release 18.0) Family This bacterial domain is found to the N-terminus of the Pfam:PF01695 like ATP binding domain in proteins which are putative transposase subunits [1]. 20.80 20.80 20.80 20.90 19.60 20.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.00 0.72 -7.05 0.72 -4.27 15 366 2009-01-15 18:05:59 2005-09-02 16:44:39 6 4 226 0 141 347 108 30.00 42 12.31 CHANGED cEQtspsss.ssLSF-ERLGLLlDRElspR- .......cQhspPsh.ppLuFEERLuLLl-cEhspR-..... 0 49 87 116 +8316 PF08484 Methyltransf_14 C-methyltransferase C-terminal domain Wuster A anon Pfam-B_2106 (release 18.0) Family This domain is found in bacterial C-methyltransferase proteins. This domain is found C-terminal to methyltransferase domains such as Pfam:PF08241 or Pfam:PF08242. But this domain is not a methyltransferase. 20.20 20.20 20.20 20.40 20.10 19.80 hmmbuild --amino -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.92 0.71 -4.68 24 606 2009-01-15 18:05:59 2005-09-02 16:47:16 6 15 457 10 185 611 1437 149.00 30 37.09 CHANGED lssHGGSlRlhhs+pssppsss....pVscllspEpstGLsphssYppFucRlcph+cpLlsaLtpt+spG+plhGYGAssKGNTlLsaCGlspc.lsalsDpsshKpGpho.PGo+IPIhss-chtshcPDalLlLsWNap-EIlpcppthhstGG+hlhPlP ............................................shaGGSlphhht...t.s.t.......t.......tltthhttE............thslpp..t.ha.tt.Fspcspp.h+ppll.......phLh.p.h+s.pG.+.plsuYGAsuKGs....TL...LNa.s...Gl.ss...-.h.lsallDp.N.PtKpG.+ah..PGo+.IPIhsPc.p..l.pp..p..p.P..D...h..l.ll.L.s.WNhp...-EIhpphp..h.h.p..Guphlh.lP............................... 0 62 135 158 +8317 PF08485 Polysacc_syn_2C Polysaccharide biosynthesis protein C-terminal Wuster A anon Pfam-B_4073 (release 18.0) Family This domain is found to the C-terminus of the Pfam:PF02719 domain in bacterial polysaccharide biosynthesis enzymes including the capsule protein CapD [1] and several putative epimerases/dehydratases. 25.00 25.00 27.40 26.90 23.10 21.70 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.37 0.72 -4.46 50 564 2009-01-15 18:05:59 2005-09-02 16:49:37 5 3 510 0 93 379 199 47.90 54 14.07 CHANGED DsRDLNYsKYFpEG-pcloph...-DYsSHNTcRLsV-thKcLLLcL-aIp ..DsRDLNYsKYhcpGsc+loph...p-YNSc.NTchLsVEplK-hLLpL-al+.... 0 30 61 79 +8318 PF08486 SpoIID Stage II sporulation protein Wuster A anon Pfam-B_1108 (release 18.0) Family This domain is found in the stage II sporulation protein SpoIID. SpoIID is necessary for membrane migration as well as for some of the earlier steps in engulfment during bacterial endospore formation [2]. The domain is also found in amidase enhancer proteins. Amidases, like SpoIID, are cell wall hydrolases [1]. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.40 0.72 -3.72 160 1665 2009-09-10 14:51:08 2005-09-02 16:51:50 5 45 1176 0 415 1429 537 101.00 27 22.16 CHANGED lhst.ts..php.....hlsp.lslEcYLtGVVuuEMsusas.hE...ALKAQAVAARTYAltphtp........................................ttps...hclss.ospsQVYtu............hpstp...spsp......pAVpsTpGpV.LsY ...........................h....ttphphls.p.lsh.E.-YltuVlssEhs..s..oa..s....hE.............ALKAQAVAuRTa....slp.phtp...................................................ptp.s...h.s.lss...ostsQs.Yts................................hpt....p..hp.........pAlptTtG.llh........................................................ 0 188 317 379 +8319 PF08487 VIT Vault protein inter-alpha-trypsin domain Wuster A anon Pfam-B_2015 (release 18.0) Family Inter-alpha-trypsin inhibitors (ITIs) consist of one light chain and a variable set of heavy chains. ITIs play a role in extracellular matrix (ECM) stabilisation and tumour metastasis as well as in plasma protease inhibition [1]. The vault protein inter-alpha-trypsin (VIT) domain described here is found to the N-terminus of a von Willebrand factor type A domain (Pfam:PF00092) in ITI heavy chains (ITIHs) and their precursors. 21.30 21.30 21.40 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.27 0.71 -4.41 18 1019 2012-10-10 13:59:34 2005-09-02 16:53:43 5 50 337 0 559 925 68 115.20 29 13.23 CHANGED sslslhShpVcSploSRaA....+TsVoScslNpuspspEstFplplP+pAFIoNFohhIsGpsasGpIK-KptApp.Ypc.ApucG+oAuLV+ssupshEpFpsSV.sVsssoKVsFcLsY.pE ..............................h.lhshplps.pl..suphA....psslspphh....N.ps......s.p.s...h..Es.ha.h.LPc.sAhlssFph...........p..l.s.s.c.s..h..hGplKE.Kp....pA..+p.pYcp.....Ah.s.pG..c...sAuL..l......c.......p.s.s.t.s..h..............F.pssV..Nlsss.sc.lphpLpYt......................................... 0 184 259 391 +8320 PF08488 WAK Wall-associated kinase Wuster A anon Pfam-B_4138 (release 18.0) Family This domain is found together with the eukaryotic protein kinase domain Pfam:PF00069 in plant wall-associated kinases (WAKs) and related proteins.\ \ WAKs are serine-threonine kinases which might be involved in signalling to the cytoplasm and are required for cell expansion [1]. 21.30 21.30 21.60 22.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.65 0.72 -3.71 14 91 2009-01-15 18:05:59 2005-09-02 16:56:30 6 13 11 0 77 112 0 106.30 25 17.43 CHANGED sp.hCsGhtCCQAp.lPspp.QlIGVsIEsssstspst.tGC+V.AFLTscpYu.SNsT-PEphaupGYssVELGWahpTosppFhss..LuCpNhs-......Yssss...........pChCcY ...................................................t.CsGhsCCQsp.lP.st..h......Q.l......hs.....sslps...p.s..t.s.psp..ssCch.AFLs-c...p.......a...h.h.s...s....hosP.p.ph..p....st.....tYss.l.LsWhh..p...s.......s...s......p....h..........ss......hsCps.p..............hsttp.............pC.Cp....................................... 0 16 52 60 +8321 PF08489 DUF1743 Domain of unknown function (DUF1743) Wuster A anon Pfam-B_4001 (release 18.0) Family This domain of unknown function is found in many hypothetical proteins and predicted DNA-binding proteins such as transcription-associated proteins. It is found in bacteria and archaea. 20.80 20.80 21.10 24.00 18.30 17.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.42 0.71 -4.34 59 216 2009-01-15 18:05:59 2005-09-02 17:02:30 6 3 149 7 150 227 81 117.20 29 29.78 CHANGED sRGllGAhAulut.................caTaEllsYRhsp.hsp.pRplshpslht.hctcphPhsacslD.htcchllsP+sssPVLaGI...RG.s.tslhpstphl.ps.E.....phpthtIahTNQuTDsH ...s+GllGAhuulut.h..........tcaTaEhlsYRtscphsp.pRplstsolht.hcppshstsacslDhhpcchllsP+ossPVLaGI...RGhs.tslhpstphl.pstE.........hpthtlahTNQsTD.H.............. 0 35 93 125 +8322 PF08490 DUF1744 Domain of unknown function (DUF1744) Mistry J anon Pfam-B_5917 (release 18.0) Domain This domain is found on the epsilon catalytic subunit of DNA polymerase. It is found C terminal to Pfam:PF03104 and Pfam:PF00136. 20.40 20.40 21.50 21.10 20.30 19.40 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.14 0.70 -5.89 17 355 2012-10-02 01:06:00 2005-09-05 12:53:33 7 10 276 0 260 359 14 351.10 32 18.13 CHANGED slpplYpchhpphhp..t.tphtphh.hs-shsFplphhostcphh+plschlpph+cp+ss.sllhlQSsh.hppltppl.hLspFPhlpl..s.s-sslssLsWQphhu++hlpHaLslusWlsphlphu+YuclPlsNlphDshtFhlDlhaAR+LpppNhVLWWSss.shPDhGGhEpD...phsh.h-clt....PslNsPGhYsslslElplpsLslNolLpSullNEhEGus.s..s.sh..ps..........s.sstssasEsuhsssuhpVL+phlKcWac-.uhpsNshAD.l......lpphhpWVpSssShLaDsuL+halcsLh+KshLQLluEF+RlGuplIaAshN+lllpTsKhslpsuhAYupYll+ul+o+slFpalcLplt+YWDhLlWMDpaNaGGh ...............................................................................................................thatt.h.hh.t.........t..t..h....pphpFc.sp..sshcphhctl..........pchltth+p..p...c.....p...t..s.s....llslQS..s.....................h.........p........pL..h.pt..ls.h.Lp-aPhl..l......................s.....sc..ths...s.............L..sWQphsu+phlp+ahsh.sph.lpp.hhphu.R.......YhclP...lsNl.........t.............D..shahhDlh..aARpLpppshlLWhS..ss.shPDhGGh...EtD.......phhh..h.-pht......................splNpsGsYso..VClELclp.s.LAl.Nsl.L..pSshls-.h..E....G.u..ssh....hs.ss...tt...............................ttss.ss.sta...s-.s.uhssssh.pl.L+pMVp.sWhp-....s......tt...t...N...h...hA....D..........l.+.......hhR.WlpSs..sShL.aDsuL+phlpth......h+KhFhpLluEF.+.RlGupllaAs..hs+lllpTsKtplt.sA........hA.......YspYllpol+s.....+...........lFchlslphpchWchL.lWhDthNaGGh............................................................................ 0 97 148 220 +8323 PF08491 SE Squalene epoxidase Wuster A anon Pfam-B_3107 (release 18.0) Family This domain is found in squalene epoxidase (SE) and related proteins which are found in taxonomically diverse groups of eukaryotes and also in bacteria.\ SE was first cloned from Saccharomyces cerevisiae where it was named ERG1. It contains a putative FAD binding site and is a key enzyme in the sterol biosynthetic pathway [1]. Putative transmembrane regions are found to the protein's C-terminus. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.82 0.70 -5.57 13 429 2012-10-10 17:06:42 2005-09-06 09:10:11 5 17 277 0 255 1836 818 239.90 39 51.57 CHANGED aAsLTlVCDGhFS+FR+sLs.sspspVsSpFVGLlLcNscLPpssHGHVILus.suPlLlYQISSoEsRlLsshsupplPs.sss-ltpYLcssVtPplPccLpsSFhpAl-cu.plRsMPNpaLPAs.sss...pGlllLGDAhNMRHPLTGGGMTVuLsDllLLp+LLpPl...DLsDcpplschlpo.FahtRKshsu.llNTLuhALYpLFsAsscph+.tLcpGCFcYhphGG.ClsGPluLLuGl.P+PhhLhtHFFuVAlYulhp.hhsts.hhhPhulh ...................................................................................................................................AsLTllsDGhhSp....hR..........+..p..lh....s............p.....s.........p.....l.......S..p....F.l.....Gh......l..........s.........s...........h.....P...........h...........s......p........+.G...c.V.l.....L.u.......s...s....s.................Pl......L.hY.p.I.u..s.........p...E.....sR.h......L.l.D..l....s......p..............h.P..s.................p......h.p..p....a..hpp...h..l.......h......P.p...l......P...t..p...l..p.....s.Fh....t...A....l...p......p....u....p..........l.R...o....M.....P..N...p.h....h....P...s...s...........tp........G....hl.llGDAhNMRHPLTGG.GMTVAhsDlll....L....p.p.LL..p.sh...................sL.....t........D.........t..t.........t..l..h...p..h..h.p....p....F.aht.R.K.s....h.s..o..slNhL..A..t.A...LY.pl..Fu.....A.s..................sp...............p.h....+.thppuCFcYhphGG.hssGPluL.Lu.Gl......spP.h.LhhHFFuVAhaulhhhh.h...s..................................................................................... 0 73 152 207 +8324 PF08492 SRP72 SRP72 RNA-binding domain Zwieb C, Rosenblad MA, Bateman A anon Pfam-B_7529 (Release 18.0) Domain This region has been identified as the binding site of the SRP72 protein to SRP RNA [1]. 25.00 25.00 25.10 26.10 23.50 24.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.43 0.72 -3.60 19 306 2009-01-15 18:05:59 2005-09-06 16:14:07 7 56 261 0 211 304 1 65.40 35 10.29 CHANGED uupthpppuspt...................................pppc...++KRKs+.hPKsa...s.sshpP.........DPERWLPh+-RSsYRs ..............................................ts...............t............................pthpppc.t...K+KRKs+..LP..Ksa........c.Ps..hsP.........DPERWLPh+..-RSsYRs..... 0 70 114 173 +8325 PF08493 AflR Aflatoxin regulatory protein Wuster A anon Pfam-B_2081 (release 18.0) Family This domain is found in the aflatoxin regulatory protein (AflR) which is involved in the regulation of the biosynthesis of aflatoxin in the fungal genus Aspergillus [1]. It occurs together with the fungal Zn(2)-Cys(6) binuclear cluster domain (Pfam:PF00172). 20.80 20.80 20.90 20.80 20.70 20.50 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.88 0.70 -5.36 5 174 2009-01-15 18:05:59 2005-09-06 19:28:07 5 4 87 0 46 171 1 201.60 32 56.62 CHANGED GlssPsTsSoPsh.PAsTsAsTTSostPpssspupPsu.p.........PPlsTP...hTPssTSusSPchsp.QSPPspsElWGuhLSPssSs.tsoDLSSLlSVsoDFGsLFuSl.ss.L..pDGsDAD.hhAcuhGsL.sA.hsVuosMpDlFssuAspPPpSscsocsh......CLSlsL-TLp+LFPcAPlGCQ..+sDuE-SSu+LsTIESVISDNKcAhDTlpsIL-CsCAQDGYlLSLVSLIVLKVLGWYlAAARsQsouTscsGshsp-outcSRRsSSSSF......EEcVLH ..................................................................................................................................................................................................................................................................................tP....o....st.sSsh.sp...pSPP...........-h.s..Luss.hus..pssL....SS.L..olso.....pFut.......ht.Sh................cu.cs-...h.A......huuL....s......A.....s.uo.h..s.....h...tsAs.ss.ps....p.hs..................C.hslsLphLhpLa.s....p.ushs...Cp.............hsstp....ss.s.s......p.hhT.....h-....VlpsNKpsh-slpplLsC.s.C..up.Dthlh..hhs.hhh.+llthY.........ss...ht............................................................................................ 0 8 26 42 +8326 PF08494 DEAD_assoc DEAD/H associated Wuster A anon Pfam-B_4150 (release 18.0) Family This domain is found in ATP-dependent helicases as well as a number of hypothetical proteins together with the helicase conserved C-terminal domain (Pfam:PF00270) and the Pfam:PF00271 domain. 20.20 20.20 26.50 25.30 20.10 19.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.95 0.71 -4.57 178 1433 2009-01-15 18:05:59 2005-09-06 19:34:35 6 13 1220 0 512 1383 597 187.00 30 15.93 CHANGED hlPsscplllEph...ptt...stpllhashhG+psppsLuhllut+lscphshslshsssDYuhhlhsspp......................htc.....lhp.......-plpc...hlppslspotlh+p+FRpsAthuGll.+ph..Gt.phsspp.phsushLhps.h...............ct.ss.p.llppshcEshpchhDlsplpphLc+lppu..clplht...hspsSPhAh ................................h.lPsscpLllEphh....chu....caplll+ushG+plppshuhhlutRlppths.h.shs...hsusD.Glslphscs................................sp.shsp........lat.h.........s-pltp......hlpptl.ssSsLhttRFRcsAspuhLls+ptP.G+.c.pshh........Qp....phpustLlcl..t.................pth.P-asllhEohREsLp-hhDlstLtcllp+.lptu....clpltp....sspPSPhA....................................... 1 148 309 423 +8327 PF08495 FIST DUF1745; FIST N domain Borziak K anon Borziak K Domain The FIST N domain is a novel sensory domain, which is present in signal transduction proteins from Bacteria, Archaea and Eukarya. Chromosomal proximity of FIST-encoding genes to those coding for proteins involved in amino acid metabolism and transport suggest that FIST domains bind small ligands, such as amino acids [1]. 22.10 22.10 22.20 22.10 21.70 22.00 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.06 0.71 -5.17 138 1150 2009-01-15 18:05:59 2005-09-06 19:37:04 5 35 807 0 415 1000 182 190.60 18 40.54 CHANGED psslll.lFsosp..astpt..lhptlppths.sstllGCoouGtlss.............ts.sshulslhsh....sssthpshshhh..hs....sshpsutpthpphhpshttt..............................hhllhsDGhssstp...........pllpul.......ptthsslslhGG.AuDshthppohlhs..s........spshps........uslsshlps..............s.hphtshhspGapPh.utthhlTcu..cpphlhElsscP ................................................................................................................................hhh.haso.p...hs.p.h....lhptlp...pths..sstllGsoosu.lss.............................tt.sp.slsl..h..th...ssst..h....p.sh.th.hh......hpp.....s...h...ts.u..t..phhpphhpphtt..............................thhllhhDu.h.ssppp..............tllpsl.......p.tthss..sslhG..G....s....A.us...s..h.....t...h.....p...p....s.h.lht...s....................sphhps...........usl..s.lhlts.............s..hphtshhsp..sa..p.Ph....u.p......hhVTpu...csphlhElsspP................................................ 0 135 287 368 +8328 PF08496 Peptidase_S49_N Peptidase family S49 N-terminal Wuster A anon Pfam-B_4027 (release 18.0) Family This domain is found to the N-terminus of bacterial signal peptidases of the S49 family (Pfam:PF01343) [1,2]. 22.70 22.70 22.70 22.70 22.50 22.60 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.84 0.71 -4.43 68 1104 2012-10-02 13:07:06 2005-09-06 19:40:42 5 6 1078 0 169 648 164 149.80 52 44.51 CHANGED MEFLh-YGLFLAKslTlVlAIlsllshlhuhsp+p+..tpc...GcLclscLsEpYcchccplcttllsccphKthcKppKKpcKt..cpKtpcpt.pt.....................+s+laVLDFcGsIcAs-VsuLREEIoAlLulApsp.DEVLlRLESuGGhVHGYGLAASQL ......................................................MEhLs-YGLFL..AKlVTVVlAIuslshlIls..h..s..pRp+...tp+.......GELclssLoEpY+Eh+-.cLtssL.h-pcptKthcKupKKc.c.Kp-sKutKt.chK.ut..................................sp..pKPRlaVLDFKGShD....A...+EVsSLREEITAlLAsA+sp..DcVlLRLESPGGhVHGYGLAASQL....................................... 0 45 79 128 +8329 PF08497 Radical_SAM_N Radical SAM N-terminal Wuster A anon Pfam-B_2018 (release 18.0) Family This domain tends to occur to the N-terminus of the Pfam:PF04055 domain in hypothetical bacterial proteins. 25.00 25.00 28.30 27.70 22.80 21.70 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -12.02 0.70 -5.49 100 1422 2009-01-15 18:05:59 2005-09-06 19:45:10 5 3 1388 0 288 1078 179 328.40 58 47.65 CHANGED FLPho+cEMcphGWDphDlIlVTGDAYVDHPSFGhAlIGRlLEupGaRVGIIuQPDW+ss-..sFptLG+PpLFFGVoAGNMDSMVN+YTus+KhRp-DAYTPGG..........csG+RPDRAslVYop+h+EAa.p-l..PlllGGIEASLRRlAHYDYWSDKVRRSlLhDS+ADLLlYGMuE+sllElAc+Ls..........sG..cslps..lpsIRGTuahtpph.........................................................................................................sts...............pshlcLPSaEcVpsD+hhYAcA.+lhatEssPhsu+sLlQtHGs....R........hlhhNPPshPLo...........pcEhDtVYsLPYsRssHPsY........tct.tIPAh-hI+FS .........................FLPhSRcEM-.pLGW.DuCDlIlVTGDAYVDHPSFGMAIlGRhLEAQGFRVGIIAQPDW...poc.-....DFh+LG+PsLFFGVTAGNMDSMlNRYTA-++lR+DDAYTPss...........huGKRPDRAolVYoQRC+EAa.K.D..V..P.VlLGGIEASLRRhAHYDYWSDpVRRSlLlDSKADhLhaGNGERslVEVA+RLA.................tG.E.s..Isp..........I....pD.....lRsTAhhs+cs...th..h.sp....................................................................................................................h...t...t.shc......cshlhLPSaEcVp....s....D...........K....lLYAHAsRllHpETNPhs.ARALhQ+H.G-.......R........hVWlNPPslPLo...........TEEMDpVFuLPYp.RlPHPsY........Gss.+IPAa-MI+FS.................................................... 0 95 193 245 +8330 PF08498 Sterol_MT_C Sterol methyltransferase C-terminal Wuster A anon Pfam-B_3143 (release 18.0) Family This domain is found to the C-terminus of a methyltransferase domain (Pfam:PF08241) in fungal and plant sterol methyltransferases [1]. 21.70 21.70 22.60 22.40 21.60 19.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.92 0.72 -4.02 17 347 2009-09-11 17:15:18 2005-09-06 19:48:55 5 6 196 0 227 339 4 65.40 42 18.38 CHANGED hhohaRhotlGRhlTpshVthlEplGLAPpGSp+VsssLEpAAcuLVtGG+cclFTPMaLaVARKPt ............hohhRhothGRhhs+thltsLEhlGlAPcGopcsschLtpAA-sLVtGGcpplFTPMahhluRKP..... 0 72 142 197 +8331 PF08499 PDEase_I_N 3'5'-cyclic nucleotide phosphodiesterase N-terminal Wuster A anon Pfam-B_3045 (release 18.0) Family This domain is found to the N-terminus of the calcium/calmodulin-dependent 3'5'-cyclic nucleotide phosphodiesterase domain (Pfam:PF00233). 21.60 21.60 22.60 24.90 20.50 21.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.51 0.72 -4.07 6 248 2009-09-11 06:25:11 2005-09-06 19:54:19 7 5 83 0 129 240 0 59.40 69 10.83 CHANGED RLhDpDDELt-lps-oVPsE..VR-WLASTFTRps...ttpsc-KP+F+SlspulpuGIFs-+ ..................RLLDTEDELS-lQoDuVPSE..VRDWLASTFTRQMu..hhtR+uE.E..KP+FRSIVHAVQAGIFVER............ 0 21 30 75 +8332 PF08500 Tombus_P33 Tombusvirus p33 Wuster A anon Pfam-B_2139 (release 18.0) Family Tombusviruses, which replicate in a wide range of plant hosts, replicate with the help of viral replicase protein including the overlapping p33 and p92 proteins which contain the domain described here [1]. 21.80 21.80 22.90 22.10 20.60 19.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.86 0.71 -4.44 10 116 2009-01-15 18:05:59 2005-09-06 20:20:20 5 3 28 0 0 123 0 145.10 43 31.33 CHANGED plslsshhh.ss.......LP...Rthlpphcphp-Ah-phs-DD-ssssl.h...p..hh.s.....Psschp+lVtps...RRs+YAsKlAtsA+uKVGLLKNocANcLVYQRVhl-.McpcsVRasDRstlLPLAVsA.CFl............pP-uV-EsppllGuS ...............................................................................hhthhh.......lh..s.......hP...Rthhp.hhhhhptpshhps-c-ss-sl.hp......c..hs.Dh.........sso+hTKhltuo...RRtsYAs+lApVARAKVGhLKNo.tNRLlYQRlhl-hMcccsVRhscpcshlPLAlus.CFl.............s-shEEptAlhGs........................... 0 0 0 0 +8333 PF08501 Shikimate_dh_N Shikimate dehydrogenase substrate binding domain Bateman A anon Pfam-B_99 (release 18.0) Domain This domain is the substrate binding domain of shikimate dehydrogenase [1]. 21.30 21.30 21.40 21.40 21.20 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.64 0.72 -3.92 40 6595 2009-01-15 18:05:59 2005-09-07 11:19:50 6 52 4461 89 1540 4807 2370 81.30 34 25.44 CHANGED llGpPlp.HShSPhlHNtha....pphGl.sssYhsh-ls........-shpsh.lptlcs....shtGhNVTlPaKpplhshlDclsspActlGAVNTl .......................lhGpPlt.HShSPhhHsthh................p.p..hG....l..s..h.s.Yh..sh.pls...............p.sh.s.ps..lp..s..hps........hshpGhNVTh..PaK..p.psh......s......h..h....D...c....los.pAph..lGAV.NTl................ 2 443 935 1297 +8334 PF08502 LeuA_dimer LeuA allosteric (dimerisation) domain Bateman A anon Pfam-B_223 (release 18.0) Domain This is the C-terminal regulatory (R) domain of alpha-isopropylmalate synthase, which catalyses the first committed step in the leucine biosynthetic pathway [1].\ This domain, is an internally duplicated structure with a novel fold [1]. It comprises two similar units that are arranged such that the two -helices pack together in the centre, crossing at an angle of 34 degrees, sandwiched between the two three-stranded, antiparallel beta-sheets. The overall domain is thus constructed as a beta-alpha-beta three-layer sandwich [1]. 24.90 24.90 24.90 24.90 24.50 24.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.61 0.71 -4.36 57 5003 2009-01-15 18:05:59 2005-09-07 12:07:43 5 10 3771 14 1392 3984 3575 133.80 27 25.84 CHANGED cls-tDlhslhppphtp......................tp.phhcLpphpVhssst.........hssAsVplph....sGch......hptsupGsGPV-AhhpAlp+hlsh...pl...cLh-YplpulssG.sDA.upspVplp.....pc.sph..............hpGhGsssDIlpASscAhlsulNphh ...............................................................plh-t-l.sLhp.p.phhp......................tp...phacLp..p..h...p.l....p...ssss................thss...As..V.p.lps..............sGct....................................hptsupG.s.GPV-AlhpAlpchhsh..............sh.........cLhc......Ypl.pu.l..........s.............p..............G.....s....D.........A......h.u...p...shlplp...................ts..Gcp..........................................hpG.sGlssDIlpAShcAhlsAlNph.h...................... 0 459 946 1204 +8335 PF08503 DapH_N DapD_N; Tetrahydrodipicolinate succinyltransferase N-terminal Wuster A anon Pfam-B_4065 (release 18.0) Family This domain is found at the N-terminus of tetrahydrodipicolinate N-succinyltransferase (DapH) which catalyses the acylation of L-2-amino-6-oxopimelate to 2-N-succinyl-6-oxopimelate in the meso-diaminopimelate/lysine biosynthetic pathway of bacteria, blue-green algae, and plants [1]. The N-terminal domain as defined here contains three alpha-helices and two twisted hairpin loops [2]. 21.70 21.70 22.70 22.00 21.10 20.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.62 0.72 -3.86 38 1077 2009-01-15 18:05:59 2005-09-08 09:31:38 5 7 1060 10 128 492 4 81.20 49 34.92 CHANGED hDApEIIpaIpsuKKpTPVKVYlcGs.Lpslsh..sslcsFsssphtllFG-ap-lcshL-tNpcpIccYclEsDpRNSAlPLLDl ....................hsApEIIpaIusAcKpTPVKVYlcG....p....Lps..lsh...p....olpsF.......Gstp.tVlFG-Wc-lcPhL-s.spphpDYhlEpDtRNSAlPLLDh............ 0 42 75 101 +8336 PF08504 RunxI Runx inhibition domain Wuster A anon Pfam-B_4085 (release 18.0) Family This domain lies to the C-terminus of Runx-related transcription factors and homologous proteins (AML, CBF-alpha, PEBP2). Its function might be to interact with functional cofactors [1]. 20.50 20.50 59.70 59.70 18.00 17.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.64 0.72 -3.57 9 243 2009-01-15 18:05:59 2005-09-08 09:34:32 6 4 51 0 82 197 0 96.50 68 23.14 CHANGED P.sos.QsQ.....sGsFQosSoPYaLYYGouoGSYQFSMlssG....GG-RSPoRhLssC.TuASTGu......sLhNPs.LssQsD...GVEu-GSHSN.........SPTuhssssRhDEuVWRPY ........................................PGSo.QsQSGPFQoSSoPY.LYYGo.SSG.SY.QFSMVs.......GG-RSPoRML.PPC..TsuSsGo......sLlNPs.LPsQsD...GV-..ADGSHSN.........SPTshssuGRhDEuVWRPY............. 0 4 10 33 +8337 PF08505 MMR1 DSL1; Mitochondrial Myo2 receptor-related protein Mistry J, Wood V anon manual Family Myo2p, a class V myosin, is essential for mitochondrial distribution, class V being vital for organelle distribution in S. cerevisiae. It is the myosin essential for mitochondrial distribution. The established mechanism for distribution of cellular components by class V myosins is that they interact with the cargo at the C-terminal tail domain and transport it along the actin cytoskeleton using the N-terminal motor domain. Cargo-specific myosin receptors act as the link between the myosin tail and cargo. Myo2 binds with MMR1 (mitochondrial Myo2p receptor-related 1), the receptor on cargo, via the C-terminal domain. 23.00 23.00 40.10 40.30 22.80 22.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.84 0.70 -4.32 7 25 2009-09-11 00:16:20 2005-09-08 10:12:00 5 1 24 0 13 22 0 232.40 53 47.39 CHANGED KLuEhoRsGRSpp...+psS......DohRSsSPhRhthhss............sPKMLKPEYl...............S..ssshsLlSuhltpu..tptsp................................p.tts.s..ss.tts...t.....hhppsh....tQhR.p.............ph.tstpppppps.phpptpsppptpt...lpstptshss.sh.t..o......s.stphpp..tt.....p...hp...sSssuuss.s.hp.pph.pthpls.slPsDcNGFV......sspspR.SaISusuoD.h-.-...Whs .....KLSELSRGGRSKQ......RRGS......DTMRSVSPIRFQFLNN.................TPKMLKPEYL....................SQTTS..NLPLLSALLKNS..KKTTS................................EuQNSNPDPLNIEKNIIKQS.....IKDKL....EQLRoS...............................EolupVQ+KE+sssSaE...A..css.AEE...sllhpNsEuhLsShs......................PVPAoshcsPpsH..spcsEccuhRlVSuuSTpslS.sElNELPKDLNLD.sLPTDpNGFVQhs.....................tsNNNNNRYSFISSTSTD.YEsE...WpD..... 0 1 4 10 +8338 PF08506 Cse1 Cse1 Mistry J, Wood V anon Pfam-B_9217 (release 17.0) Domain This domain is present in Cse1 nuclear export receptor proteins. Cse1 mediates the nuclear export of importin alpha. This domain contains HEAT repeats [1]. 20.30 20.30 20.50 20.40 20.10 20.20 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.37 0.70 -5.89 15 650 2012-10-11 20:01:01 2005-09-08 11:24:21 5 15 291 3 453 616 8 300.30 24 31.98 CHANGED IFKRWRPLF+Ss-LalEIKhVLDpFupPalsLhpsssphlps..sp.sscspLpllFcsLlLlsKlaYDLNsQDlPEFFEDNMpshMsha++YLohsN.PLL-..s---EAulL-+lKuuICEhlpLYsp+Y-E-FpPalppFlpslWsLLso.sospsKYDlLVS+ALpFLTuVAchs+YtplFss-ssLppIsEclllPNlsLR-uDEELFED-PlEYIRRDLEGSDsDTRRRAAsDhL+pLpccaEuhVTsllhpalpphLspYtpsPssNWKtKDsAIYLaoSlAsKGssTps.GVToT.NsLVsls-FFsppIhPDLh.sssssaPILKsDAIKYlasFRsQLsKpQLlplhPlLhpaLtssshVVaTYAAhsI .......................................................................................................................................................................................................................................................................................................................t......hh+laash..s..s...lP.....h..h..p..p..p..h...t.th....h....t......hhphlp......h.ss.................p.............p.......pp..p..s.......h............hh+..t..........h.hc.....h....t....h........a...........h.......p........+.....Y............t..........-.....................h.......t............hl...............p........a........h....ts.h.h.p.....l.........L....hp...h....p.................p..........c...................h.........l.....s.....ps.....l...p.a.L.s.t..l.sp...t.s..........p..a...t.............h..a.p.t..psplpplhp..c...lIhPshshp-p...D.ElaE--P.EYIR+...c.h....-...............s..p.D.h.s....o..cpAAss.hlp.sL...s...c........p.hc.p..lhthh....h....talpph..Lsp............h...t..t....s.....s..s...t..s...........a..+.p...................K........Ds..AlallsoLA.hh............................h...hh.t.lh.........ch............................................................h...........stsh..hh..a.................................................................................................................................................................. 0 138 231 354 +8339 PF08507 COPI_assoc COPI associated protein Mistry J, Wood V anon manual Family Proteins in this family colocalise with COPI vesicle coat proteins [1]. 23.80 23.80 23.90 24.20 23.60 23.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.96 0.71 -4.49 24 296 2009-01-15 18:05:59 2005-09-08 11:31:58 5 12 163 0 225 274 1 136.70 20 72.33 CHANGED chssha..+hlNlssGslhlluGlsphh.........shp.....shllulYhIhhGlhlhhLEa..phP.......lhcYuSFhaSalGRGlFYlhlGsllhts.......shhphlsGhllhllGlhYlsLcahsslp...Ps..sh.........+psshshshpp.....................l ...............................................................h....hh.+lhslssus.l.hll....u.u...l.hphh..........hshp..........shllulYhl....lFulh...lshhEhph.h........................lhcahsFLh....sahGRGlhYlF........lG.slhhst........................thhphlsGhh.lhhhG..lh.alsl.thh..h..............................................h.......................................................... 0 111 165 207 +8340 PF08508 DUF1746 Fungal domain of unknown function (DUF1746) Mistry J, Wood V anon manual Domain This is a fungal domain of unknown function. 22.30 22.30 22.60 24.60 21.50 21.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.52 0.71 -4.16 14 96 2009-01-15 18:05:59 2005-09-08 11:59:42 5 2 94 0 74 99 0 113.30 37 32.54 CHANGED sLDhLhashlshlYahDsShlhLhlRuhsQh.hhsPc.................ss.hc.stspshlhsllhu.NlhCllhHhhhuhspuu-u..scGYLHGGlhIsFIGp+sPhS+hcllhhDllllslQllh ......sLDhLhas.LsslYYhDCShhpahlRulsQhhahoPK..................sssht.stspshlhslhhs.Nl..hChlhHhhhuhPpuuEs.........oRGYLHGGlhIDFIGQ+s....PsS+hcLlhlDllllslQllM................. 1 16 38 61 +8341 PF08509 Ad_cyc_g-alpha Adenylate cyclase G-alpha binding domain Mistry J, Wood V anon manual Domain This fungal domain is found in adenylate cyclase and interacts with the alpha subunit of heterotrimeric G proteins [1]. 20.10 20.10 20.10 20.40 19.60 20.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.46 0.72 -4.63 19 100 2009-01-15 18:05:59 2005-09-08 12:50:02 6 46 95 0 57 104 0 51.20 30 2.63 CHANGED scshsPscc.....psssla+LDTNLscMEGIlocP.PhoPhDsshhsstps.cppp ....................tt.sssspt.........ssssla+LDTNLs-M-GIl..spP.PhoPhDsshh.sht................. 0 9 27 45 +8342 PF08510 PIG-P PIG-P Mistry J anon manual Family PIG-P (phosphatidylinositol N-acetylglucosaminyltransferase subunit P) is an enzyme involved in GPI anchor biosynthesis [1]. 29.90 29.90 32.10 32.00 29.80 29.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.66 0.71 -4.50 38 303 2009-09-13 01:51:06 2005-09-08 13:13:10 7 8 260 0 208 294 0 126.70 31 50.41 CHANGED sppchYGFshalhoplsallYllWualPcphLcp.l..................slsYaPs+aWAlAlPsallh.shlhsalshhhhNh.hhT.PLsulps.........lsDphuphtstpphsphpt..t................................................................hsslhDlslstVsclLYt ........phthYGFlhalsothsallYllWualPpsaLpp.l...................Gl.s....YaPs+aWAlAlPsallh.sllhsalhhhuhNh.hhTsP..lsS...lps.........lsDphup.hth.pp..h.tp..t.................................................................................................................................................................................................hsslhDlslstVschha...................................................................... 0 62 114 169 +8343 PF08511 COQ9 COQ9 Mistry J, Wood V anon manual Domain COQ9 is an enzyme that is required for the biosynthesis of coenzyme Q [1]. It may either catalyse a reaction in the coenzyme Q biosynthetic pathway or have a regulatory role. 20.50 20.50 20.80 20.60 20.00 20.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.48 0.72 -4.47 70 470 2009-09-11 14:22:47 2005-09-08 13:40:06 6 6 434 2 244 450 340 77.20 34 29.11 CHANGED Ppssspuh+hlaphuDsIWphsGDpSs.DhsWYTKRssLuulYuuThLaa........lsDsStsapsThtFL-RRIcsVhphpch+s ...............................P.phstuhp.lhphsDsIWhhAG........D.pS.s.Dh...sWYTKRssLuulYsoo..L.ah........lpDpS..satcThpFl-cRlpc.lhp.htph..h................................................. 1 74 140 193 +8344 PF08512 Rtt106 DUF1747; Histone chaperone Rttp106-like Mistry J, Wood V anon manual Domain This family includes Rttp106, a histone chaperone involved in heterochromatin-mediated silencing [1]. This domain belongs to the Pleckstrin homology domain superfamily. 25.50 24.60 26.40 25.60 24.60 24.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -10.20 0.72 -4.00 156 861 2012-10-04 00:02:25 2005-09-08 15:06:29 7 18 317 14 611 854 9 92.10 27 12.65 CHANGED hhuVpsphtsspG...h...LaPlpc......slla.hcKPhhhlshs-.IptlpapRh.......sshsp+oFDhslhh.+............s.........ttshpFssIspp.-hsslcp.alpppslphpst ........................h..ul.sp.t.ssu...hLhPhsc......ulla.hcp...Pshhlsh--.Iph......lpapRh........shst+sFDhslsh..K..........................s................tpsshphssI..........spp.phs.....slc-.alsspslphpp.h................. 0 210 346 514 +8345 PF08513 LisH LisH Mistry J, Wood V anon Pfam-B_8344 (release 17.0) Domain The LisH (lis homology) domain mediates protein dimerisation and tetramerisation. The LisH domain is found in Sif2, a component of the Set3 complex which is responsible for repressing meiotic genes. It has been shown that the LisH domain helps mediate interaction with components of the Set3 complex [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.66 0.73 -6.83 0.73 -4.03 58 1365 2009-01-15 18:05:59 2005-09-08 15:43:56 6 33 318 20 874 1281 4 26.50 35 4.96 CHANGED ppLNtllhcYLhcpGapcoApsFtpEu ......plNhllhcYLhcpGapcoApsFtpEu.... 0 243 438 680 +8346 PF08514 STAG STAG domain Mistry J, Wood V anon Pfam-B_4766 (release 17.0) Family STAG domain proteins are subunits of cohesin complex - a protein complex required for sister chromatid cohesion in eukaryotes. The STAG domain is present in Schizosaccharomyces pombe mitotic cohesin Psc3, and the meiosis specific cohesin Rec11. Many organisms express a meiosis-specific STAG protein, for example, mice and humans have a meiosis specific variant called STAG3, although budding yeast does not have a meiosis specific version [1]. 25.00 25.00 25.60 25.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.41 0.71 -4.39 9 489 2009-01-15 18:05:59 2005-09-08 16:00:32 6 8 266 0 290 449 3 117.10 42 10.50 CHANGED EcsuDYPLssps.paKpF+ssLssFlpsLVppsp.tuhLYDs.....slhDslhshlsuLSsSpsRsaRHTATlsuhplhTuLlsVAhpLspp+-ssp+Ql.EAE+pKtpss+u...Rl-uLhpp....tpc ...............................css-YPLhht.u.....p..aKcF+.....ssFs-.FlpsLlp.pCp..hSllY..Dp......................hhM-sllohLou.LS.....s.S.plRuFRHTuT....L..s..Ah.+.LhTuLVsVA.l.sLoh....p.h.-..sspR...Qh....EAE.+.pKh.hs.p+u.........Rl-.L.p+ht............................... 0 75 127 214 +8347 PF08515 TGF_beta_GS Transforming growth factor beta type I GS-motif Wuster A anon Pfam-B_630 (release 18.0) Family This motif is found in the transforming growth factor beta (TGF-beta) type I which regulates cell growth and differentiation. The name of the GS motif comes from its highly conserved GSGSGLP signature in the cytoplasmic juxtamembrane region immediately preceding the protein's kinase domain. Point mutations in the GS motif modify the signaling ability of the type I receptor [1]. 20.40 20.40 20.50 21.00 19.90 20.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.75 0.72 -7.15 0.72 -4.61 28 639 2009-01-15 18:05:59 2005-09-08 18:27:40 7 10 112 65 307 548 0 28.20 68 5.73 CHANGED pslpDll-....soSG..SGS.GLPhLVQRTlAR ........poLpDLlp...p.soSG..SGS..GLPLLVQRTlAR.. 0 55 82 176 +8348 PF08516 ADAM_CR ADAM cysteine-rich Wuster A anon Pfam-B_197 (release 18.0) Family ADAMs are membrane-anchored proteases that proteolytically modify cell surface and extracellular matrix (ECM) in order to alter cell behaviour. It has been shown that the cysteine-rich domain of ADAM13 regulates the protein's metalloprotease activity [1]. 20.60 20.60 20.60 22.60 20.40 19.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.47 0.71 -4.04 68 1660 2009-01-15 18:05:59 2005-09-08 18:30:05 7 30 158 17 695 1511 0 112.40 31 15.88 CHANGED sGpPCps.stuYCYsGpC.shspQCpplFGtsAcsAspsCapclNppGsca.GpCGpp...ss...paltCptpDlhCG+LhCpsssphshh.t.....shh.s.h.....p.shhChuhcht.hs........sssDhGhV ..........................sGpPC.p.s....s.....p..uYCYsGpC..s.h.spQCp..plaG......s...A.ps..A....schCapclN.....p......p..G..sp..............a.GsCGpp.....ss..............pahtC...........s..t..pDs.hCG+L.Cpsspp.h.Ph.h...t...p.........tsh...h.....h...........p...th.Chuhth....t........................................................... 0 75 106 268 +8349 PF08517 AXH Ataxin-1 and HBP1 module (AXH) Wuster A anon Pfam-B_5484 (release 18.0) Family AXH is a protein-protein and RNA binding motif found in Ataxin-1 (ATX1). ATX1 is responsible for the autosomal-dominant neurodegenerative disorder Spinocerebellar ataxia type-1 (SCA1) in humans. The AXH module has also been identified in the apparently unrelated transcription factor HBP1 which is thought to be involved in the architectural regulation of chromatin and in specific gene expression [1]. 33.00 33.00 33.50 34.10 31.60 32.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.48 0.71 -3.91 10 201 2009-09-13 12:05:04 2005-09-08 18:31:55 7 7 75 8 118 189 14 116.40 43 20.73 CHANGED FlKGohlpltcGphK+VEDlpoEDFlpSApcSsDh+lssS............sVp+I-ssu.u.ullpLTFssGs.ccuhlslEspl-HPFFVhs+GWSSCsPshTlppaGLsCpcLpVGDVCLsLshp ............................FhKGohlpht....s..G.....phK+.lcDlt.sEDFhpSsE.hssshclssu...................................................hVs+l-ssp....s............uhlhL...pFss.Gp..ccup.lolEshl-HPFF.VhspGWSShpPphTsthauLPCpcLpVGDV.Cls.sh.h........... 0 22 35 69 +8350 PF08518 GIT_SHD Spa2 homology domain (SHD) of GIT Wuster A anon Pfam-B_13873 (release 18.0) Family GIT proteins are signaling integrators with GTPase-activating function which may be involved in the organisation of the cytoskeletal matrix assembled at active zones (CAZ). The function of the CAZ might be to define sites of neurotransmitter release. Mutations in the Spa2 homology domain (SHD) domain of GIT1 described here interfere with the association of GIT1 with Piccolo, beta-PIX, and focal adhesion kinase [1]. 21.10 21.10 21.60 21.20 18.80 20.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.81 0.72 -6.99 0.72 -4.59 16 634 2009-01-15 18:05:59 2005-09-08 18:34:00 6 17 202 0 362 665 2 30.50 48 6.87 CHANGED ARpKLt+LSpppFp-LssDVhsElcRRpppu ....ARpKLtpLssppFp-LshDVasElcRRppt..... 0 92 167 267 +8351 PF08519 RFC1 Replication factor RFC1 C terminal domain Wood V, Mistry J anon Pfam-B_5399 (release 17.0) Domain This is the C terminal domain of replication factor C, RFC1. RFC complexes hydrolyse ATP and load sliding clamps such as PCNA (proliferating cell nuclear antigen) onto double-stranded DNA. RFC1 is essential for RFC function in vivo [1][2]. 20.70 20.70 21.30 20.80 20.50 20.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.92 0.71 -4.47 48 368 2009-01-15 18:05:59 2005-09-09 09:02:23 7 16 300 1 251 360 30 150.00 36 16.87 CHANGED IScGDllschI+uspp..WSLhPhaulhSoVhPuphltGp....hss.phs........FsuWLGpNSppsKhpRhlpElphHhphpo....sss+pplphsYl.shlhpp.LhpPLhppst-.....ulpcllphM-pYhLo+EDh.-sIh-ls......hssppshhpplsoplKuAFTRtYNp ................IS-GDLV-phI+u.........spp..WSLhPsp..AlhSoV.hPup.hhtGp........hss..h.s........FsuW.LGpNSppuKh...pRllpElphHhpl+s....sus+pplp.hsYl.shLhpt.Ll.......pPLt..p....pG.t-......................ulpcVlphMD..sYhLs+EDa.DsIhElu.......hss.pss.hs.plsspsKuAFTRtYNp............................. 0 93 146 211 +8352 PF08520 DUF1748 Fungal protein of unknown function (DUF1748) Mistry J, Wood V anon manual Family This is a family of fungal proteins of unknown function. 20.00 20.00 21.70 30.40 19.50 18.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.30 0.72 -4.33 19 138 2009-01-15 18:05:59 2005-09-09 11:04:59 5 2 100 0 108 143 0 70.70 38 74.67 CHANGED lG+lhHhuhDhsLlSshLAGl+RsTGLo.............................ch-..thpspp.l+salc+YLshGEh.laDpoVAh.suoSsaFcRp ..................................................................lG+lsHauhDhlLl....SshLAGl+RsTGLo..........................................................................p..hs...hhpsps...lcpalcpYLshGEh.lhDpoVAh.hus.SuaFcRp.................................. 0 36 70 100 +8353 PF08521 2CSK_N Two-component sensor kinase N-terminal Wuster A anon Pfam-B_4164 (release 18.0) Family This domain is found in bacterial two-component sensor kinases towards the N-terminus. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.81 0.71 -4.69 111 966 2009-01-15 18:05:59 2005-09-09 16:15:29 5 12 521 0 332 893 127 146.10 28 30.67 CHANGED sLhllhslshhh....sahhAppsAspsaDRsLhuuApslucplph.p....sGp.lplclPhsAh-hlpsss...pD+laYpVhss..............sGphloGh.s-.LPhss....ssssp..........pstaaDsp....a.pGpslRhsslhp......l.t.....s..s...t.lhVpVAcThpsRpthupclh .....................................PLhlLhshushh....sYhhAhpsAspsaDRsLhuu.Apslu.....cpl...ph...p.......sGp...l....p....l....slPh...sAL-.hh.ptss........sDp.laYpVhss...............sG...chlu.......G...s-.L.Phss.......sssstt......................hsh.aa-sp......a..cG....p..s..lRlAtlhp.............................................slsps......shs............thshltVAEThpsRptlAppl........................................................................... 0 37 144 239 +8354 PF08522 DUF1735 Domain of unknown function (DUF1735) Wuster A anon Pfam-B_2199 (release 18.0) Family This domain of unknown function is found in a number of bacterial proteins including acylhydrolases. 21.40 21.40 21.40 21.70 21.30 21.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.97 0.72 -3.60 101 771 2009-01-15 18:05:59 2005-09-09 16:19:03 5 17 106 11 91 680 4 88.10 20 24.59 CHANGED sspshs.lpl.slDtshlctYN....pp.....pss.sY.phLPps...hYshss..plslpsG..pt.hus.hslphpst.........sh.p.sppYlLPlpls.ssss........tlsps ..............................s.tpshp.lpl..th...Ds....s....h....ls....pYN.........tp.........pss...sY.phLPps...........hYs.l.ss................p........lsl..psG.........ph...hus..lplplpst...............tth.stsppYlLPlpls.ssss.........s......................... 0 57 80 91 +8355 PF08523 MBF1 Multiprotein bridging factor 1 Wuster A anon Pfam-B_4141 (release 18.0) Family This domain is found in the multiprotein bridging factor 1 (MBF1) which forms a heterodimer with MBF2. It has been shown to make direct contact with the TATA-box binding protein (TBP) and interacts with Ftz-F1, stabilising the Ftz-F1-DNA complex [2]. It is also found in the endothelial differentiation-related factor (EDF-1). Human EDF-1 is involved in the repression of endothelial differentiation, interacts with CaM and is phosphorylated by PKC [1]. The domain is found in a wide range of eukaryotic proteins including metazoans, fungi and plants.\ A helix-turn-helix motif (Pfam:PF01381) is found to its C-terminus. 22.20 22.20 22.70 22.70 21.00 21.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.25 0.72 -3.96 21 391 2009-01-15 18:05:59 2005-09-09 16:30:21 5 2 301 1 230 359 6 71.00 39 48.23 CHANGED sDW-s.VsVlpK+......sP+sssh+sppslNuApRsGhslpTpKKasuGsNK....ssstspphsKLDc-T-.lphc+V ...............pDW-o.Vsl.lt++.......uPp.sssh+.scpAlNAApRp..GtslpTpKK.auuGsNK........psstspsssKLDc-TE.l+hc+V...................... 0 71 126 190 +8356 PF08524 rRNA_processing rRNA processing Mistry J, Wood V anon manual Domain This is a family of proteins that are involved in rRNA processing [1]. In a localisation study they were found to localise to the nucleus and nucleolus [2]. The family also includes other metazoa members from plants to mammals where the protein has been named BR22 and is associated with TTF-1, thyroid transcription factor 1 [3]. In the lungs, the family binds TTF-1 to form a complex which influences the expression of the key lung surfactant protein-B (SP-B) and -C (SP-C), the small hydrophobic surfactant proteins that maintain surface tension in alveoli [4]. 23.00 23.00 23.00 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.17 0.71 -4.41 7 187 2009-01-15 18:05:59 2005-09-13 09:14:14 6 3 165 0 123 167 0 123.90 23 64.94 CHANGED Mutstptp..ptKKFT..+Ea....KsK-Ip+sLs++ARL+KpYhKsLccEGYth...PEcp.......p.+..........-s++hppKp+ls..E+tEhtKpRKRpQ+-chptphpcchEcIc...pKppEREp++cpLop+T+pGQPlMGP+INDLLDKIKpDp..Tp ..........................................................................ht............................................................................ptt.........................p..p...............................pp..p.p......c.t.pp...p.p....php.....E.chc.th+pc+...ctp+pchpcp.t...p.c.+cE...t...hc........ptppcRh..cp...p.chlsK+T++GQPlMssphchLLcKIpp....tt...................................... 1 46 72 103 +8357 PF08525 OapA_N Opacity-associated protein A N-terminal motif Bateman A anon Bateman A Motif This family includes the Haemophilus influenzae opacity-associated protein. This protein is required for efficient nasopharyngeal mucosal colonisation, and its expression is associated with a distinctive transparent colony phenotype. OapA is thought to be a secreted protein, and its expression exhibits high-frequency phase variation [1,2]. This motif occurs at the N-terminus of these proteins. It contains a conserved histidine followed by a run of hydrophobic residues. 20.70 20.70 20.70 20.90 20.60 20.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.27 0.72 -4.07 45 1258 2009-01-15 18:05:59 2005-09-14 11:24:36 6 6 755 0 126 489 6 30.60 43 9.32 CHANGED phhphLP+hH+hhlhslssllll.lllh....Pus .......hhssLPhhHRhhlhhhsllhls.uhhh....P.......... 0 9 34 81 +8358 PF08526 PAD_N Protein-arginine deiminase (PAD) N-terminal domain Mifsud W, Bateman A anon Pfam-B_2195 (release 6.4) Domain This family represents the N-terminal non-catalytic domain of protein-arginine deiminase. This domain has a cupredoxin-like fold. 25.00 25.00 29.00 28.20 24.30 19.60 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.39 0.71 -4.03 15 190 2009-01-15 18:05:59 2005-09-15 15:51:24 5 6 40 12 84 173 0 106.60 38 17.47 CHANGED Mu.pphV+LShcpPT+AVCVlGsEhplDlpusAPcsspsFslpGSPGVplplstsss.psccssuss+WPLssss-llVsMsusSssssDsKVpVSYausppchPlupAVLYLT ............Ms.pphl+lshppsspAVCVlGsphhlDlhu...uAPpssp...oFslpuSssVplplshss..ppppss..upp+WPLsss.s-lhlsMsssSssh.sD.s..KVp...loY.Yupcpt.PlspAlLYLT................ 0 5 6 18 +8359 PF08527 PAD_M Protein-arginine deiminase (PAD) middle domain Mifsud W, Bateman A anon Pfam-B_2195 (release 6.4) Domain This family represents the central non-catalytic domain of protein-arginine deiminase. This domain has an immunoglobulin-like fold. 20.00 20.00 30.30 28.60 19.90 17.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.06 0.71 -4.83 15 201 2009-09-10 21:19:31 2005-09-15 15:53:00 5 7 49 12 96 184 1 154.90 46 24.89 CHANGED V-ISLDsDssRsGpV-+sps..cKpoWpWGPsGaGAILLVNCD+Dshtust............DpccsplhshcDLpDMS.MlLpspGPcsLhcsY+LVLHlSpSDuc+lRVFpupsst.............shpsYchVLGPpphoYpVthhsGppchpFYVEGLsFPDssFsGLlSlsVSLL- ......l-ISLDsDhsRsGpVccsps.....cKpsWsWGPpG.GAILLVNCD+D.sstp..s.t............Dspcpc.........lhs..........h.....p.....DLpD..MS.MlLpTpGPspLhs.sacLVLalStuDuc+lRVFpspss.....................sppYptVLG.p.....+hsatVthh....sG..pt-hpFaVEGLsFPDssFsGLlSlpVSLL-........................ 0 8 11 27 +8360 PF08528 Whi5 Nrm1; Whi5 like Mistry J, Wood V, Chahwan C, Finn RD anon manual Motif In metazoans, cyclin-dependent kinase(CDK) dependent phosphorylation of the retinoblastoma Tudor suppressor protein (Rb) alleviates repression of E2F and thereby activates G1/S transcription. The cell size regulator Whi5 appears to be an analogous target of CDK activity during G1 phase [1]. 25.00 25.00 25.60 25.10 24.30 24.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.36 0.72 -6.59 0.72 -4.24 24 164 2009-01-15 18:05:59 2005-09-15 16:38:06 6 3 80 0 127 152 0 25.00 42 6.36 CHANGED pL+sRLphAhaKspsG.pchoLscl ...pL+sRLshAhhKlpsGhpchoLscl.. 0 31 71 109 +8361 PF08529 NusA_N NusA N-terminal domain Bateman A, Roth A anon Pfam-B_407 (Release 18.0) Domain This domain represents the RNA polymerase binding domain of NusA. 20.80 20.80 20.90 20.80 20.60 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.46 0.71 -4.12 161 4422 2009-09-13 23:45:58 2005-09-20 15:50:34 6 28 4355 5 970 2799 2484 119.80 35 27.93 CHANGED El.lpslctlscEKsIs+-hlhcAlEpALhsAh......+Kp.a....s......ps.pslcVplDccoG-hclapphpVV..-c...........h.......cs..thplsLp-A...p.phsssh...pl..GDhl.p.pl..ssts..FGRIAAQsAKQVlhQ+l.REAERct ...............El.LtslcslppEKuls+-.hlh-AlEsALhsAh..........K+pa.............s.............pp.tsl+Vpls....c..c.oGc..hclap.p.hpV...V..--.............................Vp......ss...phEIo..LppA......p.......s..s..sh.....................pl..GD.hl..c.cl..ss..ts...FGRlAAQoAKQVIhQ+lREAERt.h............... 0 341 650 828 +8362 PF08530 PepX_C X-Pro dipeptidyl-peptidase C-terminal non-catalytic domain Bateman A anon Bateman A Domain This domain contains a beta sandwich domain. 20.70 20.70 20.80 23.00 19.60 20.20 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.12 0.70 -4.45 53 2418 2012-10-03 19:46:52 2005-09-20 16:49:51 5 23 1578 54 704 2136 679 229.10 17 35.97 CHANGED Nsh.tshPsVph.sptssps...................................Wps.ssassspspt......phhLs.......t......sthshpttsshssshsh.tthhhss.....................................................tptsshsapotslscshplsGpsplcLplsssss.......ss.plsspLh-l......ssc.....................Gps....................plloc....Ghlplpt+p...p........................................ptlpPsphhslclcLpsosaphtsGcpLclhlsosshsthhhsssstp.hshshsp.........................................pLh ..................................................................................................................................p.P..V.h.h..p..s.tt......................................Wps.h.s..s..aPs......psp.................phaLs...................................s....t.....h.sh..p....t...t...t..sh.s.s.sh.t...........tthhhss.....................................................................................................................tptsshsah.s.sl.s....c..s.......h..clsGpsplcL...p.ls.s..ss.s...............................cs...slh.spLh-l......ssc.................................................Gts....................................thlop...........G..h.l.php...Rp.....p........................................p.lps.s..c..h.hp...lplc.L..s.s.saphptGcpLclslt.uo.s...h...s..h..hh.h.s..s.s....s..hp...hs.h.p.tp....................................................................................................... 0 207 463 615 +8363 PF08531 Bac_rhamnosid_N Alpha-L-rhamnosidase N-terminal domain Bateman A anon Pfam-B_8527 (release 8.0) Domain This family consists of bacterial rhamnosidase A and B enzymes. This domain is probably involved in substrate recognition. 25.00 25.00 25.00 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.73 25 876 2012-10-03 19:46:52 2005-09-20 16:50:02 5 42 361 2 288 895 96 166.90 27 18.23 CHANGED +pltpARLYlouLGlYEhalNGc+VGDps.........LsPGaTsYc++l.YpTYDVTshLpc.G.cNslGlhVGsGWYssphsh........thp.pthYG.sc.ullupLclpat.DGppcsl.sTDsoW+ss.pGslhtusl....YsG..EsYDARt-.tsWspssaD..DstW....................tssph..............hshPp.stLtAp.ssPl+lsc ..............................................................ltpAplalouhG.......hY-hhl......N....G..p.+.V....G.c..ph.....................LsP.u.hTsY...p.................+................p......lhYp.TY.....DVT..shLpp.....G...pN..........s......lulhl...u.s.GW..ap..s.th.s............................pthhu....sp....s..shhspL.p.lpat.D.......Gs..p.p..h..l...so.........D.......s....o....W+s.......s...p...u.s....l...h.hssl......................asG.......E...p...Y..D..A..R..h..c............t........s....Ws....p..s...s...a..s........c....s..tW.....................................t.ss.h..................................h.t.....s...........l........................................................................................................... 0 134 224 270 +8364 PF08532 Glyco_hydro_42M Beta-galactosidase trimerisation domain Bateman A anon Pfam-B_2131 (release 5.4) Domain This is non catalytic domain B of beta-galactosidase enzymes belong to the glycosyl hydrolase 42 family. This domain is related to glutamine amidotransferase enzymes, but the catalytic residues are replaced by non functional amino acids. This domain is involved in trimerisation [1]. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.22 0.70 -11.17 0.70 -5.17 27 1054 2012-10-03 00:28:14 2005-09-22 16:56:59 5 20 641 2 343 1020 170 197.10 26 29.29 CHANGED AcVAl....laDa-shWAhc.p..Pp.pshp..........YhpplppaYcshhchGlslDlls.ss.-lst...YclllsPsLahlscshspclpcalcsGGsllhshhSGhhsEssplhhuthPGs....LcclhGlpl.cEh-sLssp.....pp.plp.htGp.......hpsphap-hl...pspsAcs..lApaps..t......Gp.PA....lscpthGc..GpshYlus.....t.spphlppllpplhsc.tslt ....................................................clAl.....laDa-s.....hWuhc...t..........sp...ts.hp............Y.pp.ltpaY.c.sh..hctslssD..ll....s....s...c.....s....-.....hss..............Yc..lll.sPsl..h...hlssshsp+lppaVcs.GG..pll.so.....hhoGhhs-pstl...h.hu......sh.P..Gs....L.....pclhGl.ps...p.-h...s.....sLhss............................ppspl..p....htGp......................................hp...s...p.ha...s.-h....l........ps..p..s......A.ps.....lApYts.......t.ht...........Gt..PA.....................lTpp.t.h....Gp..G.pshYlus..........t.st.t.hh.pthhtplhtphsl.t.............................................................................. 0 123 237 290 +8365 PF08533 Glyco_hydro_42C Beta-galactosidase C-terminal domain Bateman A anon Pfam-B_2131 (release 5.4) Domain This domain is found at the C-terminus of beta-galactosidase enzymes that belong to the glycosyl hydrolase 42 family [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.71 0.72 -4.46 26 633 2012-10-02 20:10:03 2005-09-22 17:02:10 5 6 441 2 201 611 24 56.50 23 8.50 CHANGED Glclpp..h..ttpspahFhhNaoscstplsl....stttppllssph.t.....hslpPhsltVlct ................tVpspt..R..pssps..palFlhNaos.cs.....tp.lsl....stshp-llsu.p...h.hpst............lsLsshsVtllp................. 0 72 140 172 +8367 PF08535 KorB KorB domain Bateman A anon Pfam-B_20369 (release 10.0) Domain This family consists of several KorB transcriptional repressor proteins. The korB gene is a major regulatory element in the replication and maintenance of broad host-range plasmid RK2. It negatively controls the replication gene trfA, the host-lethal determinants kilA and kilB, and the korA-korB operon [1]. This domain includes the DNA-binding HTH motif [2]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.72 0.72 -3.83 17 2280 2012-10-04 14:01:12 2005-09-23 14:10:54 5 9 1911 4 384 1460 462 83.50 29 28.74 CHANGED stGhppu-IA+pLGKstuaVopahuLh-hPsslcphhssthssslcsl.-LtpshccpPpcVpsal.....tstspplTRusl...pchlcsc+psp ..........shTQp-lAcclGKS..RsaIuNhLR...L.L...p.....L...Pp.p.l.pp.hl.p.pG.p..l..o.....t...h..t.....................................................................................ttttthhh................................................................... 0 127 237 310 +8368 PF08536 Whirly Plant_TF; Whirly transcription factor Mistry J, Pachon DMR anon manual Domain This family contains the plant whirly transcription factors. 20.00 20.00 22.40 22.10 19.70 18.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.63 0.71 -4.70 4 82 2009-01-15 18:05:59 2005-09-28 13:43:07 6 2 35 12 47 76 4 129.40 45 44.76 CHANGED ulYKsKuALplcsVtPoFsulsSGshhlcRsGulLLphAsAsusRpYDWppKtsFhLSsTEsupLss.MuupsSCpFFHDPu.tsuustGpVpKuhKVEPhPDGS.GhFlNLoVssu..psscpF.sVPVohuE.Asl+phhs ...................lYKGKAALolpPh.PpFstL-SGuh+ls+cGhlhLpFAP.......A.......l....G.............tRp..YDWs+KQh.FuLSsoElGoLlo...LGsp.-S.sEFFHDP..htpSstGpV+KsLplcPh..sDus.GhFhsLoV.sph...p...s...s-ph...lPlTcuEFuVhhosh......................................... 0 10 32 40 +8369 PF08537 NBP1 Fungal Nap binding protein NBP1 Mistry J, Wood V anon manual Family NBP1 is a nuclear protein which has been shown in Saccharomyces cerevisiae to be essential for the G2/M transition of the cell cycle. 21.50 21.50 21.60 21.60 20.90 21.40 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.20 0.70 -5.23 6 52 2009-01-15 18:05:59 2005-09-28 14:11:52 5 2 26 0 25 47 0 225.70 33 97.19 CHANGED Mh-slKshhssa.hh......csDGRK+EaGsLs-h+cRp..+sR+tpsppcucshh+hs+...............................sssptot.phtsphpt..................ssspcupt+shhssI+ulFSs-ppslptMppA...lshhL.s.otsppspcc...hpsRIlRS-sFKKKlhEhcYscphLppLR+Gupstp.sh.t.su.p......DpVlLLQ++lcch-c+ltplppELp.spKcLpFupEKspLLpsLLDDANIDscYlKSRRsIpNL.p.p-slpPp.csLs..PSPhR..sVNPLFTSSPlRpssppupcssspshp..........-NFYsKYP+lPcTEpLspptt..............ccSLSPlRlDYS+YSS ..............................................ts.sh.++pht.l...+ppp..p.t......pspphhph.+...............................c.t..pt......................................................................................................................................................................................................................Dp..L......phpt+httlcppLp........hpctLpaspEK.clLpslLDsuNID...pYhcSRRshpNl.p.p-p.....lKPh....sLs..P.SPhR.........tsNsLhTSSPh+h.shpsp....pshp...........sh..sthPp.P.pph.ppp................cco......................................... 0 3 10 17 +8370 PF08538 DUF1749 Protein of unknown function (DUF1749) Mistry J, Wood V anon manual Family This is a plant and fungal family of unknown function. This family contains many hypothetical proteins. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.96 0.70 -5.53 8 206 2012-10-03 11:45:05 2005-09-28 14:46:02 5 5 158 2 158 362 33 286.70 31 88.64 CHANGED Muhphss......GlLHpYsp+L.......lAFEasosush..+sps....LlFlGGLGDGLhTVPYlpsLAsAL.......-tuuWSllplplSSSYuGWGTGSLcRDsEElppsV-Yl+sph....uG...sppKlVlMGHSTGSQDVlaYLopu.......................tspV-GuIlQAPVSDREAhhpshtc...........cthcchVshA+ch..lccGpuc-llPpEascthhh...soPlSAhRahSLsusc.....GDDDaFSSDLoDE.......cLscoFGplpc........PLLlLhSppDEaVPsal..DKpsLLsRW+pusc....cphWspp.SGIIsGAoHsVs....scupsts.chLlccVhuFL+ ....................................................................................................t..............shla..a...t....t........ssaEat..sss...........t.p.s........llFlGGLs.DGhhos.sYhts....LupuL.................ptt..sWSlhplhL...oS...SapGa.......G....h.u....S...L..c...p.D.......s.c....El.sph.lp..Yl+sh........................s.p+lVLhGHSTGsQDllcYLp.pss......t........................p.sl.-.G...sI.LQAP..V.S.D.REuhthhhpt....................t.hpphlphAcph.....ls........p...u...p.s...pp..l.h.Phphsshhh...........................ssP.loAhRahSLsu.t........u-DDhFSSDL..s-..-.........pLpp.saGhlsp...................slLlLhS..spD-aV.Pthl..Dpp..t.L.lp+appsst..............t.hs......otll.s.....AsHslt....t..t......hhthl.tah.t........................................................................................................................ 0 50 92 139 +8371 PF08539 HbrB HbrB-like Mistry J, Wood V anon manual Domain HbrB is involved hyphal growth and polarity [1]. 22.00 22.00 22.10 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.39 28 284 2009-01-15 18:05:59 2005-09-28 16:01:24 6 8 167 0 164 267 1 140.30 32 25.68 CHANGED pphosssAWshlpstllslF.........cucslp.hs..lEDLNclVphalppplppp.hss.....hlh...t-lcpLLssGhssLccpl.....pts.....s-+LlspLsElW.hFFsslLPhlQAlFLPlp.......htt.t.....tpstp.h...............................upELslRpLsLluFRDhllLshhpshthh ............................................................h..hsssssWs.lpstVlslF.........psc.sl...t...l..............sLNchl.c..h.hlppplssh.hsp...................hh........ppLLspGhhhLcc.pl..........................chh.................ps..pp.hl...sp...Ls-hWpaFFsplLPhlQAlFhPlp...............................................................................................................upp.slRplsLlsFRDhllLshh.t....t..................................... 0 36 68 110 +8372 PF08540 HMG_CoA_synt_C Hydroxymethylglutaryl-coenzyme A synthase C terminal Finn RD, Bateman A anon Prosite Family \N 25.10 25.10 25.20 25.30 25.00 24.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.87 0.70 -4.93 10 2505 2012-10-02 12:25:54 2005-09-29 09:26:20 5 15 1386 46 503 1607 169 142.60 23 53.54 CHANGED IVFDpulRuoHMpHAYDFYKP..DLsSEYPVVDGKLSlpCYLpALDpCYppYssK..hpphhtp..t.spthsLccFDahlFHoPaCKLVQKShARLlaNDFlppssp.phsslhccLpsh..tslch--oYpDR-lEKshhploKshaccKspPSLllsspsGNMYTuSLYuuLASLL.ptuss-LsG.KRluhFSYGSGLAAohFSh+lssDsssho..p..IsslhDlps+L.DsR+phoPE-FsEshclREpsHtpKsFsPpuS..l-sLhPGTaYLsplDchaRRsYup+ ...............................................................hh........................................................................................................................................................................................................................................................................................................................................................................s....hsp..GNhYsuolahshhS.hl.........t........t......................s..pp.............lhhhSYGSG..uphat.h...l....t...................................................................................h..tth..............a................................................................................................................................................................................................................ 1 143 271 402 +8373 PF08541 ACP_syn_III_C ACP_C; 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III C terminal Mistry J anon Pfam-B_67 (release 18.0) Domain This domain is found on 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III EC:2.3.1.41, the enzyme responsible for initiating the chain of reactions of the fatty acid synthase in plants and bacteria. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.64 0.72 -3.97 187 7398 2012-10-02 12:25:54 2005-09-29 09:55:06 5 25 4322 91 2044 8046 1911 89.30 33 25.98 CHANGED Lpps.slshsDlDhhlsHQANhRIlcs.hsc+Lsls..-..+shhsl.pcaGNTSAASlPlALscslcpG..clcsG-hlllsu.FGuGloWGusll+h .........................................hppssls.h.p-..l....D..ah.l.s.H....Q............AN....h....R....I...l....c....s.....h.....s.....+.....+......L...........s.......l.......s........-.........+.s.....h...h...s.....l...p....caGN...TS...AAS...lP....lu....L...s.c....s....h....c....p.G........c.l..+..p.......G........c.......h........l.........l.lh...u..F.G.uG..hoaGusllc....................................... 1 605 1328 1740 +8374 PF08542 Rep_fac_C Replication factor C C-terminal domain Mistry J, Wood V, Finn RD, Coggill PC anon Pfam-B_930 (release 18.0) Domain This is the C-terminal domain of RFC (replication factor-C) protein of the clamp loader complex which binds to the DNA sliding clamp (proliferating cell nuclear antigen, PCNA). The five modules of RFC assemble into a right-handed spiral, which results in only three of the five RFC subunits (RFC-A, RFC-B and RFC-C) making contact with PCNA, leaving a wedge-shaped gap between RFC-E and the PCNA clamp-loader complex. The C-terminal is vital for the correct orientation of RFC-E with respect to RFC-A [1]. 21.00 21.00 21.00 21.00 20.90 20.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.59 0.72 -4.00 117 1565 2010-07-14 04:08:27 2005-09-29 17:21:44 6 38 497 21 1053 1492 176 89.50 21 25.10 CHANGED shs..thlc.pllpphhp.p.....shtc..................spphlt.cLhsp...GhuspsIlpplhchlhph....sh..sph+hpllptluph-hRltpGssphlQLpuhlAph ................................................................................thlc.phlpthhs.t...............shpp...............................................s.tptlp.c.Lhsp...GhuspsIlp...p..lhchlhph............phs..sph+hcllctl.uph-hRlspGssphlQLtuhlup.................. 0 361 595 866 +8375 PF08543 Phos_pyr_kin Phosphomethylpyrimidine kinase Mistry J, Wood V anon Pfam-B_787 (release 18.0) Family This enzyme EC:2.7.4.7 is part of the Thiamine pyrophosphate (TPP) synthesis pathway, TPP is an essential cofactor for many enzymes [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.55 0.70 -5.18 132 8058 2012-10-03 06:25:16 2005-09-30 10:04:10 7 34 4351 38 1754 12925 3225 233.80 27 80.50 CHANGED DouGGAGIQADLKThpuhGsaGhosITulTAQN..TtGVpulpslss-.hlpsQl-ulhsDlslsAlKhGMLusscllcsV..Actl.cp.hsh.slVlDPVMlupoGspLlps-u.lpsl+cp..Ll.PhAsllTPNlsEAp.....hLsG....hplp...s.p..-hcpuAcplhp.h...GspsVLl..KGG.H.h..t............ppsh..DlLh....sssph.hthpstRlsTppTHGTGCThSuAIAAtLA+Ghsl.cAVppAKpalptAlp....pshplG..pG.tGPls .............................................................................................................................Ds.uGuGh.ADltshtths..........sauhsslTsl.s.s.s.....s....t....s.....t..h........h.......p....h..l.tt..Qh...p...s..l...h...p.......s...h..........t........h.............c...A..l...K...h..G..h....L.u..s....s....c....h....l........c...h..l.....s....c.....h.....l......p...........p................p..........h...............l...V...l..D......P.......V.....M......s..s....p......s...........u...s........t.....l.....l..s....s.s.s....h...p....s...h....p.....p.....p.......L...l......P.h...A......s.....ll.TPNls.E.Ap...................hLsG.........hplp.........................s..c...........-h.h...p..A....u.......c....t..Lhs.....h..............G.s..p.....t....V..l..l....K..GG..c..h...t............................................pps.............-.hlh..............s.s.......p...p....h...........h......h..h..p..s.....s.....+......l......s...t.......p......p....s...+..G.TG.s.o.h.uu.....s.ls.u..........t..........L...s..........p.......G.....t...........s......l..t.....c....A.......l.p..p.A..p..t.a.ltpulp.......psh.p..h..s.............................................................................................. 0 531 1053 1453 +8376 PF08544 GHMP_kinases_C GHMP kinases C terminal Finn RD anon Prosite Family This family includes homoserine kinases, galactokinases and mevalonate kinases. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.76 0.72 -3.78 121 12811 2009-09-10 21:23:33 2005-09-30 10:45:27 8 48 4606 85 3000 8683 2225 82.10 18 24.10 CHANGED hhpsl......t.............tpht.htphht..t...............hhthhhsplpplhcth.pphG......hssthoGu..Gsslhslh.............ppppsppltptl.ppthtp ..............................................................................h....h.t.............tphphh.up.hhp.ts..t......................h.hthth..splc.plhphs..tphG........shuuphoGu...GsslhuLs.................sp..pp.spplhptl.tt....t....................................................... 0 971 1843 2506 +8377 PF08545 ACP_syn_III 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III Mistry J anon Pfam-B_135 (release 18.0) Domain This domain is found on 3-Oxoacyl-[acyl-carrier-protein (ACP)] synthase III EC:2.3.1.180, the enzyme responsible for initiating the chain of reactions of the fatty acid synthase in plants and bacteria. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.70 0.72 -4.32 170 6937 2012-10-02 12:25:54 2005-09-30 14:13:13 5 20 4279 91 1726 8497 1908 78.70 35 23.31 CHANGED FDlsA.u...CoGFlauLshAsshlpuG.th+plLVlGu-thS+.hlDa...s...D..RsTsl....LFGD..GAGAsllpss......ppt.......u..lls...splto..DG ...................FDls.A.ACuGFlauLs.s.As.p.h...l...p.........o......G....t...........h..........+....p...s..LV.lG..u.-..p.h...S+....hl...Dh.......s.........................D.......Rs.Ts.l.....................L..F..GD...GAGAsllpss.......pp...............s...lls.hphts-G.................................................................... 0 550 1116 1470 +8378 PF08546 ApbA_C Ketopantoate reductase PanE/ApbA C terminal Mistry J anon Pfam-B_396 (release 18.0) Family This is a family of 2-dehydropantoate 2-reductases also known as ketopantoate reductases, EC:1.1.1.169. The reaction catalysed by this enzyme is: (R)-pantoate + NADP(+) <=> 2-dehydropantoate + NADPH. AbpA catalyses the NADPH reduction of ketopantoic acid to pantoic acid in the alternative pyrimidine biosynthetic (APB) pathway [2]. ApbA and PanE are allelic [2]. ApbA, the ketopantoate reductase enzyme is required for the synthesis of thiamine via the APB biosynthetic pathway [1]. 20.90 20.90 21.20 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.36 0.71 -4.13 112 4691 2009-01-15 18:05:59 2005-09-30 16:41:07 6 30 2834 28 1372 3635 1120 124.90 25 38.00 CHANGED slttthWpKllhNs.....shNsloulhs...sss.uplh....tssthpplhptlhpEshtlu.........pupG...........................ht.hstp........................hhctlhphhpt..........sss.....p..psSMhpD.lppG+..oEl-hl.sGtllc.h.....ucphGls...sPhsphlhp.ll.+t.hp .................................................slttthWpKlhhNs.................shNsloulhp...................ssh.uplh......................pp.s.t..h..pph...h.pplhpEsh.sVu...................pt.p.G.........................................................hp...hstp.........................................lhpt.lh.p.lhpt.............................sst........p.....hoSMhpD.lp.ptR....oElDhI.sGhllc.h.........uc..p.t..G...ls......s.Ph.sphlhphl+th........................................................................ 0 379 802 1142 +8379 PF08547 CIA30 Complex I intermediate-associated protein 30 (CIA30) Mistry J, Wood V anon manual Family This protein is associated with mitochondrial Complex I intermediate-associated protein 30 (CIA30) in human and mouse. The family is also present in Schizosaccharomyces pombe which does not contain the NADH dehydrogenase component of complex I, or many of the other essential subunits.\ This means it is possible that this family of protein may not be directly involved in oxidative phosphorylation [1][2]. 19.70 19.00 19.80 19.00 19.60 18.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.90 0.71 -4.37 83 768 2012-10-03 19:46:52 2005-12-01 09:21:38 7 21 485 0 464 811 928 156.70 22 55.05 CHANGED hcFsss......s...........shpp...WtslsDsVMG.GhSpuphph....................s.....ttuhFpGplShc..........ssGGFuShRo.........hps.hDlu..sasulpL+l+...GDG.+pYphplpsp..st............hsshsYptsFt.....T..sst...........Wpslc................lPascFhss.hRG+hlpst...s..h...sssplpplulhlu...........sc..psGsFpLpl ................................................................th.pp....WtshsDt.s.h.G...Gh.Spup.hphs...........................................s..stuhFpGplshc.............................................................ppu.Gasuh.Rs......................................hp..hc.h.s.sass..ltLcl+..............G..DG.......+..p...Yhlplpsp..s..............ts.ha.ptth.....st..sst....................Wpplp....................................................................lPaspFh..s...c..G.p........h.h...t............h...........p..plppluhhht..............st....G.atL....................................................... 1 145 272 386 +8380 PF08548 Peptidase_M10_C Peptidase M10 serralysin C terminal Mistry J, Rawlings ND anon Rawlings ND Domain Serralysins are peptidases related to mammalian matrix metallopeptidases (MMPs).\ \ \ \ The peptidase unit is found at the N terminal while this domain at the C terminal forms a corkscrew and is thought to be important for secretion of the protein through the bacterial cell wall. This domain contains the calcium ion binding domain Pfam:PF00353. 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null --hand HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.11 0.71 -4.68 15 432 2009-01-15 18:05:59 2005-12-01 09:50:32 6 100 256 24 107 457 56 201.30 36 37.88 CHANGED ANhoTRTGDTVYGFNSNT-RDFYoATSuSsKLIFSVWDAGGNDThDFSGaoQNQRINLNEuSFSDVGGLKGNVSIA+GVTIENAIGGSGNDlLIGNsAsNlLKGGAGNDIlYGGGGADpLWGGAGsDhFVYuuuuDSssuAsDhIpDFpSG.DKIDLSuhspsss....L+FV.DsFoGcAGEAlLoYDuuoslosLtlshuGcss.sDFlVplVGQsss.oDhIV ....................................................................................................................................................................................s.pTRssDTsY........GFN..S..s.....o...s...c......D....a.h....o..........u......s......s....s........s....s....p..l....l....F.olWD.uGGsD..ThD..FSGa.s.p...sptIsLs.......t...s...s..........a..........S.....s..........l...G..............G...............h...........t..........u.....N..........lo..........IAhs.sslE.NAlGGsGsDhlhGNtssN.l.Gss.........................................................................................................................................................................................................................................................................st..hhsststshhhhtt.tts.sts.Dhlh.DF..p...u...D.+l..sl..t....h...............h...hh....t.t...............h...t.........t........t...........................h.h............................................................................................................................................................................................................................................................................................................................................................................................................................ 0 13 50 73 +8381 PF08549 SWI-SNF_Ssr4 DUF1750; SWI-SNF_ssr4; Fungal domain of unknown function (DUF1750) Mistry J, Wood V anon manual Domain This is a fungal domain of unknown function. 19.50 19.50 19.50 19.50 19.00 19.20 hmmbuild -o /dev/null HMM SEED 669 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.45 0.70 -6.22 3 99 2009-09-10 22:09:48 2005-12-01 10:49:12 5 3 82 0 83 100 0 503.50 32 94.93 CHANGED MD.DPASRVPuQLLPHMHLVSRaRYPLMHMMPTDTVV-YLLSAPKIVREApPMHWTFLDGPQDGTVMLTWQPLNHLGTNFASDGYVWADVEQAFTFEA.RGYVVEMWLHRSGYHPPNESVAIHCRRRYRLLPoKVPNPSLPPPDPSLWIVHYSRAPPsDHIPANRIPVSPQVQsMLAQRRFLQsQGQLARKDFMLHDRNNWPTIsLPPQhA.sQuhtQPsGPYPNAMVGRQPFYPQPGssAsPPsusussKAPRGHRASoAAssAAosDFALEDE.DVSsGDLMDLLTPREVSKMRYQQHHEWMEEILASPYAISQITPVSLGLGRKGELESLTAGFFDAPVGPusGDSc-GsEusQATKLEPE+A-EFADRVAKKVADMTAEIEKLKKRHARRMEKFNRTStLKDAEpRLRDAAAsPtDTGSEIWRLEGRlEhsTE-DsucluPlEHKAKYKVDDIVREVEsSWpKpIVPEP+VSCVEKGGLLEKI...EPEPso.......................hhuDlDIDMGHTDSHLLDQF.TAtGssuQotuTsAP.....uAsGQAsPTATGVAssQPsA....GLDI-MDhGDupsouTAuGETGDWVMVN-s................KKDDslshPstph.tspTPGSGLQGLTPG.souuDsGLDusNFDFTN...MDSAGDALAAY.........TEQN-GLDLP.DLENSAFGDAFH..........ASDNEsTHHHDADDMS .......................................................................................pas......h.hppshcaLhtAPpls+-.tPhhWta.lDt.P..DGolh.LsWQs..phGspFuoDGhhWsssE.hap.ph.pGh.....................lEhaht+sGahs....s.EphAhHsR+RaRLh..ss...s...s.......sDPsLa..llHYu.u...-plPssh.l.hs.th.....pth....httRp...h.L.ptGQ.lhRK-FMLpDRs....sWPpls...t....p........s......t...h..p.........h....s................................s.t............t......t..t.tps........s........h...............s..........-..h.s..........h.--E.-sSpG..DhhDhlTPR-lSh.RYpppHEWMEElhuSPYtltQI.PssLGLGh+GE.LtsLTtGhF.s....s....t..t....t.s......h.s.+hcst.sc-FtppstcphtthpsEhpphctpHtcthtphpp.shhhptEhtLR.hh......t.Gs-h.a+hE...s+hp....tpt.t..................p............l--lltplpttht+thss....tltplpcGGh.p.....t..P..........................................................s....DhsM..tst....t.hsth............t.......s.s...........................................s...................................................t.D.h...h...t...........s....p.................t..t....................s.....t....t......s......s-h.l....hlsp.t.............................................s.........s............................s.t..........s...h.st...................s..h..........-....hts......hsoAG-ALAsY...................tt.s.thsh..th.-sSAFG-AFH..........usptt............t.................................................. 0 20 43 67 +8382 PF08550 DUF1752 Fungal protein of unknown function (DUF1752) Mistry J, Wood V anon manual Domain This is a family of fungal proteins of unknown function. This short section domain is bounded by two highly conserved tryptophans. The family contains Swiss:P34072 that is thought to be a negative regulator of RAS-cAMP pathway in S.cerevisiae. the Sch.pombe member is a GAF1 transcription factor Swiss:Q10280 that is also associated with the zinc finger family GATA Pfam:PF00320. 20.90 20.90 20.90 20.90 20.80 20.40 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.69 0.72 -7.15 0.72 -4.36 48 460 2009-01-15 18:05:59 2005-12-01 11:05:33 5 14 156 0 340 467 0 29.00 39 4.13 CHANGED lW+has+s+p.....plss..scRlENloWRhhstp ............W+hhspp+s.....plss..upRLENloWRhWsp...... 0 88 185 291 +8383 PF08551 DUF1751 Eukaryotic integral membrane protein (DUF1751) Mistry J, Wood V anon Pfam-B_13217 (release 18.0) Domain This domain is found in eukaryotic integral membrane proteins. Swiss:Q12239, a Saccharomyces cerervisiae protein, has been shown to localise COP II vesicles [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.38 0.72 -3.67 23 355 2012-10-01 23:21:32 2005-12-01 11:26:52 5 5 269 0 250 1107 281 98.30 29 27.91 CHANGED Puhsh.....aPWoLlTusalEtslashllshlsLhluG+alEphWG.upEhlKFllllsshsNLlshlhtllhhhhops.p..L..h.lsGthulhsGhlVAhKQ ....................Pshhh.....as..W.ol..lTsshl..E.....p..s.....l..h.slllshhslhhsG+hLE.hWG..up..............Ehh+.Fl..h..llshh.ss.lls..hh...hh..l..lh..ah..h.......ots.p..........h....Lh..h.lpGh.huhhsGhLVAh+Q.................................................. 1 73 135 210 +8384 PF08552 Kei1 DUF1753; Inositolphosphorylceramide synthase subunit Kei1 Mistry J, Wood V anon manual Family Kei1 is a subunit of Saccharomyces cerevisiae inositol phosphorylceramide (IPC) synthase [2]. It is localised to the Golgi and is cleaved by the late Golgi processing endopeptidase Kex2 [2]. Kei1 is essential for both the activity and the Golgi localization of IPC synthase [2]. 25.00 25.00 34.10 33.90 23.10 21.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.16 0.71 -4.63 17 133 2009-01-15 18:05:59 2005-12-01 11:35:41 6 5 128 0 100 134 0 176.00 31 63.88 CHANGED poFLuhhsLhhGsEllhhhhlhNKsoGlYGlLulhTGasLshhQhhhYlaSlhsLshahhsl.pl+..c...........................psshpshhlshlYshDollsshaThhFshsWFht..........pstsssst............................sstsusstttsspsssu...htp..................................utstthEhhholhlslhhhllRhYFshllhuas...pplL+p.hhts .....................pFhhhhsL.hGspllhhhhlhNKhoGlYGlLAlhT..Ga.Ls..hhQlshYlaSlhsLslhshhh.pI+...c.............................psshpsltlAalYhlDollsshaThhF.ussWFhh................tss......ssss.t..ts.....................................ss..tss.tssttstpstst..htp..................................ssshp.EphholhllsshhllRlYFshllhuFA...pplL+p.h...h........... 0 25 54 84 +8385 PF08553 VID27 VID27 cytoplasmic protein Mistry J, Wood V anon manual Family This is a family of fungal and plant proteins and contains many hypothetical proteins. VID27 is a cytoplasmic protein that plays a potential role in vacuolar protein degradation. 27.20 27.20 28.90 30.90 23.90 26.00 hmmbuild -o /dev/null HMM SEED 794 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.72 0.70 -13.23 0.70 -6.31 9 256 2009-09-11 00:42:47 2005-12-01 11:45:52 5 5 188 0 187 259 0 563.90 30 87.52 CHANGED M.hL+p....hhGsu.sppEllpIPuGpLYLlR..pSPKGspECIYpDAssoIR+Tu.-apYQLVVp+saEEGEsph.......tt.-s-Dsshs.........-DEhsFhlDcsLch+hphpctGctslsWcDlpGDpG.DhaEFVssss.lshspl-pFthTshcC.YEpKY++SspcAo-.--Lppacaps.hs............cscphsh--ph.ssshtphsss.scssps.s.htcutpp...t..............................ph.t..hhsttpu-LalYDshotpFlLQ..cssVslslh-sGc.acaWLtlcGt-p..Luhslss-hNPsFshpphSFlFNahssc.....shSWhL+FcDhsshscFppsaspslWEphNcpKWscs.csEpcYl.-Aaps.........hph-Dt.....ps..pE-E-E---E--spu.....ucpth-s-pa--ccsttt.pp...sssNppLAVGa+pDRSaVlRGsKIGVF+pss.sspLEFsTsIpplus.pGKhFsPcKhMLHtpD+phlLpDss.stspLY+MDLEhGKVV-EWcVpDcs...VssauPssKFAQMTsEQThlGlSpNulF+IDPRlSG.sKLV.-uphKpY.ASKNsFSuluTT-pGYlAVuSsKGDIRLFDRLGhNAKTtlPALG-PIlGlDVSADGRWlLATC+oYLLLlDshIK-G.KNtGpLGFp+SFst-uKPpP+RLplpPEHsAah.p.T.tcPlsFT.ApFNTGhsppEpoIVTSTGPYlloWShKclLpGc...psPYhIKRYsssVhADsF+FGoD+NVIVALccDVsMsp++shppPoRpslsss...........................shhNp.h ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.a....................sahl+F...s.........pF.tth.thha.t..t..ph.tp...p.p.Y.hhp.ht............................................................ht.pt..................pptttt..............p.tt.ttt.....................tpt.....p.p.p.......p........pp...t.........t............tthsp...p.LslGh..DpSallp..ss.t......ItV.a+p..........ps......t......l..p.....at........s.shs.plp.....s........pG...p.......h...s..PpKsh.....L.hp.t-pshlL.....s.t.....p......ss.....t.....l.....aphDlEhGKlVpEWch.p...c...c...hs...hhshssp.s...KhuQhs.s.p.p.TFlGlspNsla+hD.....sR.........l.....s.u......sp.....l...........l...t.s.........p....h............+.........p...Y...spps......s......Fs.sh.ATTtpGhlsVuSpcGcIRLa...............s.......+..............h...............u.....h..p......AKTtlP.u.LG.pPIhtlDVotDG+WlLuTscoYLlLlss.h...p.cs..csps.ph..GFp.pph.sts.ppstP+hLtLpPtcst.h...t...tps..l..pFs.ApF..s..T..t.....s...p....pE.ppIlsusG.ahlhWshcplhpup.....ptsYph....................................................................................................................................................................................................... 0 69 126 168 +8387 PF08555 DUF1754 Eukaryotic family of unknown function (DUF1754) Daub J, Mistry J, Wood V anon Pfam-B_10536 (release 18.0) Family This is a eukaryotic protein family of unknown function. 21.70 21.70 21.70 21.90 21.40 21.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.32 0.72 -10.62 0.72 -3.40 37 249 2009-01-15 18:05:59 2005-12-02 16:25:23 5 7 201 0 171 240 1 86.70 28 58.97 CHANGED c....Ysp.sssGpLKLKGst..h............+KKKKKccppspp....pctstpspppppppstcst.....................................tptt.pttsthshTcAE+pacchpcKRhpc .......tYps.ss..tGpLKLKGss..h..............hh+KKKKK..c+ccpc.p......tpt.htssp.p.pp.ppp.pttt....................................................................thT.AEt...tacchpcKR........................................ 0 56 89 131 +8389 PF08557 Lipid_DES Sphingolipid Delta4-desaturase (DES) Daub J, Mistry J, Wood V anon Pfam-B_9504 (release 18.0) Domain Sphingolipids are important membrane signalling molecules involved in many different cellular functions in eukaryotes. Sphingolipid delta 4-desaturase catalyses the formation of (E)-sphing-4-enine [1]. Some proteins in this family have bifunctional delta 4-desaturase/C-4-hydroxylase activity. Delta 4-desaturated sphingolipids may play a role in early signalling required for entry into meiotic and spermatid differentiation pathways during Drosophila spermatogenesis [1]. This small domain associates with FA_desaturase Pfam:PF00487 and appears to be specific to sphingolipid delta 4-desaturase. 20.30 20.30 20.30 21.60 20.10 20.10 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.02 0.72 -4.71 24 352 2009-01-15 18:05:59 2005-12-05 16:10:50 5 5 268 0 247 355 8 37.80 47 11.04 CHANGED sspsDFhWoho--PHssRR+tILpKa.PElKcLhGs-Pth ........csDF.WsYT-EP...HssRR+tILpKa.PEIKcLhGs-Ph...... 0 89 133 203 +8390 PF08558 TRF Telomere repeat binding factor (TRF) Daub J, Wood V anon Pfam-B_8956 (release 18.0) Domain Telomere repeat binding factor (TRF) family proteins are important for the regulation of telomere stability. The two related human TRF proteins hTRF1 and hTRF2 form homodimers and bind directly to telomeric TTAGGG repeats via the myb DNA binding domain Pfam:PF00249 at the carboxy terminus [1]. TRF1 is implicated in telomere length regulation and TRF2 in telomere protection [1]. Other telomere complex associated proteins are recruited through their interaction with either TRF1 or TRF2. The fission yeast protein Taz1p (telomere-associated in Schizosaccharomyces pombe) has similarity to both hTRF1 and hTRF2 and may perform the dual functions of TRF1 and TRF2 at fission yeast telomeres [2]. This domain is composed of multiple alpha helices [3] arranged in a solenoid conformation similar to TPR repeats. The fungal members have now also been found to carry two double strand telomeric repeat binding factors [4]. 25.00 25.00 27.10 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.23 0.70 -11.43 0.70 -5.03 27 262 2009-01-15 18:05:59 2005-12-05 16:13:17 5 4 151 11 139 253 0 216.50 25 37.59 CHANGED hhp..oL.shLDsl......................usQlLphltp....s...h...htp.ts....spspsFppltslFchhpcha.scsshl.s.pl..............hpsppthl.psl++sNhuphlssshss.c.luh...h.LsptFl-lFsspss......................................phh+spshLhlpLKTQAaIsulc..............tt.cppp-lLcclhssshps.h..hp.........l....sEhphh.+hcpR+cpLh....ppsshpsL.ppasatsFh+clhsalppphs.....hllhutptts.spt.ps .......................s.......hL-.h......................shpllphhtp....................................spspsFpph.pslhctlhphh.......spss.........................hpt+phhlhph..Lp+ls.uph.L...sspF..sspp..ls....hEss.thhchhpsEts.................................................hh.csppplhh.lKpQAhlsshc.............................................ptphcptp-lLc.clFscs.sp......................................pph+ppLl..lp.pcsshcslhppas..apphhcclhsalppphs.....hLhhttpthhpp...stp................................................. 0 15 46 92 +8391 PF08559 Cut8_C Cut8; Cut8 six-helix bundle Daub J, Wood V, Eberhardt R anon Wood V Family In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome [1]. Cut8 comprises three functional domains. An N-terminal lysine-rich segment (Pfam:PF14482) which binds to the proteasome when ubiquitinated, a central dimerisation domain (Pfam:PF14483) and a C-terminal six-helix bundle (this entry), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding [2]. Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 [1]. Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome [1]. In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 [1]. Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum [1]. 21.00 21.00 21.50 21.50 20.30 19.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.92 0.71 -4.36 19 160 2009-01-15 18:05:59 2005-12-05 16:15:06 5 5 156 3 123 156 0 140.60 30 43.46 CHANGED PolpsslplLpphhpplhsslP..YspsspS......DYAYhRl+tpLhphLssLsDas.paLPP.....pcsphptSLpFLDtATpll+pLPsa-otpaNhhKspsY-plupsWhlllpcuucctsshth..s.....................pcLpc+NppSss+.hppsl ...............................PolpsslphLpphppplhpulP..hu.psspu..................DYAYsRl+.pLsphlc.sLsDas...phLPP.....pp..p.phssSLpaLct.A.Tcl.l+pLPpW-stpaNhh+cpsa-plupsW.hhlIcc.uucc.tuthphp.sth...................................ppLtcaNppusGc.hpps...................................................... 0 30 60 103 +8392 PF08560 DUF1757 Protein of unknown function (DUF1757) Bateman A anon Bateman A Family This family of proteins are about 150 amino acids in length and have no known function. 21.00 21.00 21.80 21.50 20.70 20.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.94 0.71 -4.53 4 53 2009-01-15 18:05:59 2005-12-13 12:59:48 5 4 28 0 49 49 0 149.10 24 75.66 CHANGED pWF+NhhGhp...lo-pEhtsIPpPcsEhslHlTh+ohQAhuLlGullsuPlsphlpu.+.Nhpthpssusphu+hG........hlhGlVsuPhLsYhch+stshsp.uLaDRCYRLRhNpsplRhDRhuhlushsG..........hhpsuhhGhVsGssluhsYs.l.Sshhs .............................................................lPpPthphslHssh+uhpsuull...............Gu.ll.sPl..hhh.hpp..pp....s..p.thh..s........shspsupsG........slsGhshGPh..lohhch+s..hschchhDRsYRLRhNpspLphDRhslhuuslG....................hh.ss.hGhVsGlsluhhhs........h.h..................................... 1 23 30 49 +8393 PF08561 Ribosomal_L37 Mitochondrial ribosomal protein L37 Bateman A anon Bateman A Family This family includes yeast MRPL37 a mitochondrial ribosomal protein [1]. 19.60 19.60 22.90 20.20 17.50 17.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.76 0.72 -4.41 23 304 2009-01-15 18:05:59 2005-12-13 13:51:40 5 2 258 0 232 295 1 89.50 29 61.01 CHANGED spsssSus.suThlsslNlhKs.GpDPshL.DSEYP-WLWpl.LDssspstcttcss.............................................................................p.t++hhRptp+ppI+psNhLsph ....................................s....hsss.hsThltGlNhhKs.spDshhh.DsEYP-WL....Wpl...hss.t....thtcttcss...................................................................................htth+chhRttp+pp.I+psNhLpt.................................................................................. 1 75 126 188 +8394 PF08562 Crisp Crisp Mistry J anon manual Domain This domain is found on Crisp proteins which contain Pfam:PF00188 and has been termed the Crisp domain. It is found in the mammalian reproductive tract and the venom of reptiles, and has been shown to regulate ryanodine receptor Ca2+ signalling [1]. It contains 10 conserved cysteines which are all involved in disulphide bonds and is structurally related to the ion channel inhibitor toxins BgK and ShK [1]. 20.10 20.10 23.10 29.20 19.90 19.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.85 0.72 -3.69 26 238 2012-10-02 17:51:16 2006-01-03 16:31:09 5 2 114 25 72 235 0 54.30 49 22.83 CHANGED CuDCPssC-NGLCTNsCpapDtaoNCsoLtpphuCpp.phlKspC.AoChCcscIh CusCPssC.-.s...G.LCTNPCpapDtaoNCcsLtp...p...h...uCpc.....ph.....l.....+ppCtAoChCpscIh.... 0 7 10 37 +8395 PF08563 P53_TAD P53 transactivation motif Finn RD anon Pfam-B_3515 (release 19.0) Motif The binding of the p53 transactivation domain by regulatory proteins regulates p53 transcription activation. This motif is comprised of a single amphipathic alpha helix and contains a highly conserved sequence [1-2]. 20.00 20.00 20.00 22.30 19.40 19.30 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.69 0.72 -7.08 0.72 -4.85 12 143 2009-01-15 18:05:59 2006-01-03 16:48:47 6 6 66 16 32 159 0 24.40 62 6.48 CHANGED spptshs.PLSQ-TFp-LWphLsts .....S-.ulEPPLSQETFSDLW+LLPEs.... 0 3 4 6 +8396 PF08564 CDC37_C Cdc37; Cdc37 C terminal domain Bateman A, Mistry J, Wood V anon Pfam-B_3345 (release 6.5) Domain Cdc37 is a protein required for the activity of numerous eukaryotic protein kinases. This domains corresponds to the C terminal domain whose function is unclear. It is found C terminal to the Hsp90 chaperone (Heat shocked protein 90) binding domain Pfam:PF08565 and the N terminal kinase binding domain of Cdc37 Pfam:PF03234 [2]. 20.80 20.80 21.00 22.10 20.70 20.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.30 0.72 -4.16 31 299 2009-09-11 10:45:19 2006-01-04 11:51:26 5 10 236 1 188 289 3 91.80 29 21.44 CHANGED h.h..Ps...............uhDP...........clF-.oLPs-hQcsh-occl-tlpcslucMss-EActhhcphs-uGlhs.puth.......sptphc-.ptp.....tthpptptppttsp ......................hPu...uhDP...........-lF-.oLPt-hQcsh-opsl-hlpcslucMss--Achhhc+hs-uGlhs.psth.................psppphp-.ttt.....................stts............................. 1 59 99 155 +8397 PF08565 CDC37_M Cdc37; Cdc37 Hsp90 binding domain Bateman A, Mistry J, Wood V anon Pfam-B_3345 (release 6.5) Domain Cdc37 is a molecular chaperone required for the activity of numerous eukaryotic protein kinases. This domains corresponds to the Hsp90 chaperone (Heat shocked protein 90) binding domain of Cdc37 [2]. It is found between the N terminal Cdc37 domain Pfam:PF03234, which is predominantly involved in kinase binding, and the C terminal domain of Cdc37 Pfam:PF08564 whose function is unclear. 25.00 25.00 26.30 25.10 22.80 24.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.05 0.71 -4.78 19 361 2009-01-15 18:05:59 2006-01-04 12:01:11 6 12 244 3 220 346 3 181.10 27 44.13 CHANGED NKsptt........t.st.ts.pphpphsppsEpl...................................ccFuplp.scaccoppFLh-HspllsEpptctLlhpAFchphctccchhpplsHQullhQYlhp...Lu+..phss...+sslp.FFpKlts..sc..htptFpc-Vpshhp+l+sRupth....hcEpppps .................................................................................................................................................pt.tt.........................tt..tp.ph.s.....................................................t..p...............p.ht........h.+....cFGhlp..cac-SpcFLp-H.s.p.L.l.s..E.c..s..ss.hLllhshchph....E.p..............Kp.s....hhcplsHQsllhQalh-....L.ucp.phss.......Rsslp.FFpK....lps...sct.hhcsFps-lpshhpR.l+tRAp.h.......hp-.t...t........................ 0 65 105 170 +8398 PF08566 Pam17 Mitochondrial import protein Pam17 Mistry J, Wood V anon manual Family The presequence translocase-associated motor (PAM) drives the completion of preprotein translocation into the mitochondrial matrix. The Pam17 subunit is required for formation of a stable complex between cochaperones Pam16 and Pam18 and promotes the association of Pam16-Pam18 with the presequence translocase [1]. Mitochondria lacking Pam17 are selectively impaired in the import of matrix proteins [1]. 20.50 20.50 21.30 47.60 19.60 19.70 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.97 0.71 -4.57 24 137 2012-10-02 17:14:55 2006-01-05 11:20:55 5 2 133 0 108 136 0 173.70 40 77.72 CHANGED hsp........sshpssSssssssssss..............LsWssFFpLR+p...cRRhshsoSlhTulsusshuhsaLushplD.....sp.IhGlDPhhllGhushusuulGaLhGPhlGsslFpLhp....RpthtthphK-p-FhpRI++pRVDPSupShuNPVPDYYGEKIuSlpsYRQWLRDppAapRKsc.pF....l ............................thtstts.........putst..ssstttss.............LsWssFFpLR+p...+RRhslsuSlhsulhu...sssussh....Lu...sh..phD.......st.lhGhDPhhllGhushusuulGaLlGPh.lGsslapLhp....R.phhsthsh......K-+EFapRI++pRVD..P..S.upShuNP..VPDYYGEKIuSlpsYRQWL+DQ.+AasRKtppFl................. 0 34 62 93 +8399 PF08567 TFIIH_BTF_p62_N TFIIH p62 subunit, N-terminal domain Finn RD anon Pfam-B_31040 (release 19.0) Domain The N-terminal domain of the TFIIH basal transcription factor complex p62 subunit (BTF2-p62) forms an interaction with the 3' endonuclease XPG, which is essential for activity. The 3' endonuclease XPG is a major component of the nucleotide excision repair machinery. The structure of the N-terminal domain reveals that it adopts a pleckstrin homology (PH) fold [1,2]. 21.10 21.10 21.20 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.35 0.72 -4.33 19 254 2009-01-15 18:05:59 2006-01-05 11:46:22 6 6 219 7 178 238 0 74.10 34 13.13 CHANGED h.hpGtssaK.....Kss......GhLplspDpp.ltWhspussusp.sloltlspIssLQtoPtsusKlhL+llh+ss.......pshphhFss .........h...stpspaK.....Kp-......GsLhlhs-+..lsWssc......upcpss....lohhhucIps..........phoPcu.psKl.Lpllh+ss.........sspsahF..................... 0 52 87 143 +8400 PF08568 Kinetochor_Ybp2 DUF1760; Uncharacterised protein family, YAP/Alf4/glomulin Mistry J, Wood V, Lonsdale D anon manual Family This entry contains a number of protein families with apparently unrelated functions. These include the YAP binding proteins of yeasts. These are stress response and redox homeostasis proteins, induced by hydrogen peroxide or induced in response to alkylating agent methyl methanesulphonate (MMS) [1,2]. The family includes Aberrant root formation protein 4 (Alf4) of Arabidopsis thaliana (Mouse-ear cress), which is required for the initiation of lateral roots independent from auxin signalling. It may also function in maintaining the pericycle in the mitotically competent state needed for lateral root formation [3] [14731255]. The family includes glomulin (FAP68), which is essential for normal development of the vasculature and may represent a naturally occurring ligand of the immunophilins FKBP59 and FKBP12 [4,5]. 21.40 21.40 21.60 22.10 20.90 21.30 hmmbuild -o /dev/null HMM SEED 633 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.13 0.70 -13.07 0.70 -6.28 29 316 2009-12-17 14:51:14 2006-01-05 11:55:57 5 8 213 0 210 288 0 458.30 17 85.82 CHANGED M.............................................................htct.s......shhpt...spDhloalThl-hh..................p.p.thLstLhplL.p.-..pslsppIGWDLsphLl.hlstsp.................cChpplAchGNP+EllLpssEhLppLps....................................tDps..pp...................................ttp.t..hpl.ph.s...LlphlsshhpRIpTphPS+FLuhulpulhphhpp.s.....ppshs.hpFl.....+plhshpRsh.sst.sp.................ss-hsscs.s.hhssEssl.++lLpthhhphlpphlh..........shphshch.....sth...h.sttpppphhs.pp.ht.h..lhu+hstLu.....hDlsLps.h....h............ppl.....t.s.pspsh....s...-cs.hphc..h.h............cIslshpGsLlLhstthh.............pssp.........hsDhlhlhlchs........sshhpspulpDshhhhshhshpssp...........t.h....spthhhsalpsLhhhsspps...th+ttshplhsplLphtPpps.pachI+csLcsssat....slKsshluhLKc.lhpsppssptt.....................................................ptslhlsscphsslhsllhtshpthh..............hhp...p.hshlhuhLNlhhhlhp.........p.s..............hhhpchhcslcshlpthcschttp....................................stpph.ph....ssp.hLpps ......................................................................................................................................................................................................................................................................................t......t-...h...h.h.......................thh..l.phl.p..p.....hhtphGWsL.t.ll.h..h..pp...................thhp.lst..ssP+EhllthhEhltp.p........................................................................................................t..h.h........Lhp.lthhh.+l..t..shhstp.lt.hslpslhp.ht..s.......tt.t.........hh...............ptl.t..hps.....................................................................p...p....t...........t..-.....tp.l..hhh..lt..h....................................................................h.ht.h.th.......s..h...............................................p......t.....................th.....s.hhh.h.................................hh.hh.phh...........t........sh.......h..hhh...tp............................t................hh..thhphlhhhhhhp......s....th...Rph.sh..tlhphhlphhsp.ps.+aphhtph.lp.ss..h.....shps.hlthlKp.h..s.tts...........................................................hh.hss.th.tlh.hhh....pt..........................p...t...s.h....lhsh...LNh.hhhLh...................................p.p....................t.........th.pphhp.lp.hhp...tph...............................................................t.............................................................................................................. 0 53 106 167 +8401 PF08569 Mo25 Mo25-like Finn RD anon Pfam-B_5502 (release 18.0) Family Mo25-like proteins are involved in both polarised growth and cytokinesis. In fission yeast Mo25 is localised alternately to the spindle pole body and to the site cell division in a cell cycle dependent manner [1,2]. 20.50 20.50 20.50 22.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.89 0.70 -5.41 23 518 2012-10-11 20:01:01 2006-01-05 12:11:16 6 8 288 6 335 448 8 289.20 41 92.36 CHANGED MsFhF...p+.sp.KoPs-lV+slp-tlhtLt..............tscstcKs.-EluKpLsshKphlhGss-s-PssEp...lspLspEhhpp.-llh.LlpsLtpL-FE.uRKDlshlasplLRpphss...........ps.Ps......V-Ylspp.sclLshLlcuY-.......ss-luLssGshLREsl............................+a-sls+llLh...ss.......................pFap.....FFcalphssF-IuoDuasTh+-LLTpH+plVucaL...psNa-cFhs.phspLlpSsNYVT+RQSlKLLu-llL-RsNhplMspYlsss-NLKLhMpLLpDcS+NlQhEAFHlFKVFVANPsKspPlhcILl+N+-KLlcFLpsFpsD+h.cDcpFhDEKpall+pIppL.p .................................................................................F...tp...p...+sPs-lV+th+-.l.h.L....................................................ts.pptpct.--luKpL.thKtlLhGs..................s..........-t...-.....P.s-t................lsQLspEhh.pp..slLhh..Llt..sL.hlsFE......u+K..DssplFspllRp..phss.......................c..ss.......l-Yl......sp..cll.hLhpGY-...............s.-....hALpsG.hLREsl...............................+a.-slA+.hlL....sp..............................................................pFhp.....FF.c.a.lp.....hssF-lAoDAF...sTF+................-LLT.+..........HK.........lsucaL...............ppNa-h.FFp...h...p.pLl.pS....p.N.YV..T+RQSlKLLGElLLDRpNhslMs+Ylsps-NLKlhMsLL+..D.cS+s.IQhEA..FHVFK.....VFVAN..P......pKs.s.lhpILlpNppKLlcFLtpF..........tt-+..........-D-QFt-E..Kthll+pIppL........................................ 0 107 186 274 +8402 PF08570 DUF1761 Protein of unknown function (DUF1761) Wood V, Finn RD, Bateman A anon Pfam-B_85869 (release 19.0) Family Family of conserved fungal and bacterial membrane proteins with unknown function. 26.00 26.00 26.30 26.80 25.20 25.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.71 0.71 -4.12 81 254 2009-01-15 18:05:59 2006-01-05 13:12:42 5 3 233 0 120 231 54 127.80 21 85.78 CHANGED hlAlllAslsuallGslWYush..FGcsW...............hcstuhspcp..hcstts......shshshlsshlhuhhluhhhshhs.................................htohtsuhhhuhhlhls....hhsshhssshhapt+shplh..hIsuGatllsh................slhuhllsh ..........................................ulhluslhsahhuhhaassl..Fucsa...............hcutuhsspc......h+ptphh......shshshlsshltuhslutl.lsths.................................shohtsuhhhGhl....lhhu....h.ssthhsphha.....E..pRshp..hh..hlsuuapllth................lhlulhls......... 0 51 81 104 +8403 PF08571 Yos1 Yos1-like Wood V, Finn RD anon Pfam-B_23321 (release 19.0) Family In yeast, Yos1 is a subunit of the Yip1p-Yif1p complex and is required for transport between the endoplasmic reticulum and the Golgi complex. Yos1 appears to be conserved in eukaryotes[1]. 19.30 19.30 20.60 20.00 18.60 17.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.59 0.72 -3.88 20 281 2009-01-15 18:05:59 2006-01-05 13:31:28 5 6 222 0 190 258 3 76.20 43 59.59 CHANGED lhsLl.ulLLhlNAlAILsE-RFLu+lGW.usossps.u...............................sssolKspllsLIpuVRTlMRlPLIslNlllIlacLlLG ..................................................LhsLlpusLLhlNAlAlLsE............-............RFLs+.lGW..ut.sp.s.t.s..G..........................t..............ppsolKupllsLItulRTlhR..........l..........P.LIhlNhlhIlhtLlhG............................ 0 51 99 153 +8404 PF08572 PRP3 pre-mRNA processing factor 3 (PRP3) Mistry J, Wood V anon Pfam-B_7232 (release 18.0) Domain Pre-mRNA processing factor 3 (PRP3) is a U4/U6-associated splicing factor. The human PRP3 has been implicated in autosomal retinitis pigmentosa [2]. 31.40 31.40 36.40 33.40 21.70 28.30 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.55 0.70 -4.97 21 355 2009-01-15 18:05:59 2006-01-05 13:48:31 5 10 293 0 250 343 1 212.70 39 38.51 CHANGED ssPYhsspht..............tphpp+ppcshpFhctG+a.ppApchRpcsphE......chcpchtpt...scpsslpppp............tcph.ttphs.......................PslEWWDtshlsps..........shsslss-ts....t...........................p.ssIst....alpHPlslcsPh-...h.sss...slaLTKKEpKKlRRppRttppcE+p-+......I+LGLcPsP................PKVKlSNLM+VLss-A.lpDPTphEtcVRcphtERpppH.ccNppRKLTs .................................................................................................................tsahDsphs..............htsttRpp+shpF.pppGKa.p.A.pph...RppsplE...................chptclspt...........s+c.s.Glppst...........................ths.hs.h.t.p.h..................................................................P.p.l.EWWDphllst.............................shpsl.sp..t..p..h...p............................................................................................p.psIT......hlpHPs.lpPP.t-.......hss...hshaLTpKEpKKlRRQp..Rttt.KEpQ-K.......IRLGL.PsP.............................PK.V+lSNLMRVLGs-A..VpDPTtlEs+V+pphAcRpptHpctNttRKLT......................... 0 87 141 208 +8405 PF08573 SAE2 DNA repair protein endonuclease SAE2/CtIP C-terminus Mistry J, Wood V anon manual Family SAE2 is a protein involved in repairing meiotic and mitotic double-strand breaks in DNA. It has been shown to negatively regulate DNA damage checkpoint signalling [1][2]. SAE2 is homologous to the CtIP proteins in mammals and an homologous protein in plants. Crucial sequence motifs that are highly conserved are the CxxC and the RHR motifs in this C-terminal part of the protein [3]. It is now known to be an endonuclease. In budding yeast, genetic evidence suggests that the SAE2 protein is essential for the processing of hairpin DNA intermediates and meiotic double-strand breaks by Mre11/Rad50 complexes. SAE2 binds DNA and exhibits endonuclease activity on single-stranded DNA independently of Mre11/Rad50 complexes, but hairpin DNA structures are cleaved cooperatively in the presence of Mre11/Rad50 or Mre11/Rad50/Xrs2. Hairpin structures are not processed at the tip by SAE2 but rather at single-stranded DNA regions adjacent to the hairpin. The catalytic activities of SAE2 are important for its biological functions [6]. 22.90 22.90 23.00 22.90 22.30 22.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.37 0.72 -3.28 22 209 2009-01-15 18:05:59 2006-01-05 14:38:56 5 9 190 0 153 218 1 83.20 35 12.52 CHANGED sas-VlRp+c-Rcpl.pGs.p.sC..Csptapshut.t................................phh.lsphppcc+pchh.ptp........tp.hhpphu+HRapa.t+ssTPPGFWchDF.Ps .............................................................EVVRcKp-RcpL..Gp...pC..Ctthatshs............................................t..tc.ccp..c..hh.................sphSRHRapa..h.ssTP.sFWcssF.Ps.............................. 0 45 78 122 +8406 PF08574 DUF1762 Protein of unknown function (DUF1762) Mistry J, Wood V anon manual Family This is a family of proteins of unknown function.\ \ \ Swiss:Q07532 is known to interact with RNA polymerase II and deletion of this protein results in hypersensitivity to the K1 killer toxin [1]. 21.30 21.30 21.50 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.88 0.72 -3.56 16 232 2009-01-15 18:05:59 2006-01-05 14:41:35 5 4 208 0 167 219 1 74.30 30 20.15 CHANGED -sss-YVYDlYh.........hpps.--.pssp.s..........ppsIGalplh-csp-....hhp-----s....phhoDDEDSNsENaYpNDYP-DE .........................................ps-YVYDlYh...........................tp....th.....................pslsh..l.hhhpp.....p.p..p...........hhs---..sc........h.pD--DSN.........sEs.apNDYP---.......... 0 48 87 138 +8408 PF08576 DUF1764 Eukaryotic protein of unknown function (DUF1764) Mistry J, Wood V anon manual Family This is a family of eukaryotic proteins of unknown function. This family contains many hypothetical proteins. 21.80 21.80 24.20 22.20 21.40 21.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -11.11 0.72 -3.24 19 96 2009-01-15 18:05:59 2006-01-05 15:47:32 5 1 85 0 71 96 3 111.90 24 68.96 CHANGED pscK..............tstphslsshFsstptpppctp..t.........pspcspssp................ttp.p.phppsttRRcs.DGhhIaoh....EELp...hu..cu.GsTs..........pCPFDC-CCF .......................................................................................................................ttp.............t.tp.ttpl-plFsshpppppptptttpt.....................p.pt.t.pp..................ppppptpspp.tttp.....s..ps..sp...sRR+TpDGhsIYot....-ELs...hu.....p.u.GsTs...............CPFDCpCCF......... 0 41 56 66 +8409 PF08577 PI31_Prot_C PI31_Prot_Reg; PI31 proteasome regulator Wood V, Finn RD anon Manual Family PI31 is a cellular regulator of proteasome formation and of proteasome-mediated antigen processing [1]. 25.00 25.00 25.00 25.00 22.50 24.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.11 0.72 -3.33 26 230 2009-01-15 18:05:59 2006-01-05 15:52:27 6 3 199 0 156 220 0 74.20 35 24.24 CHANGED shGcsDL.P.................sGhs..sshps.hss....................t........GGMh.sss...c.PhFss.tp...stt...s...........ssssPPGARa.DPhGP ....................................hGppDL.P..................Ghs.........sshpsh..hss..s.......................stt.........GGMhssPp...c.shFsp..h..........ssp.......G.s.........tth.ssusPPGARa.DP.hGP.... 0 50 82 126 +8410 PF08578 DUF1765 Protein of unknown function (DUF1765) Wood V, Finn RD anon Manual Family This region represents a conserved region found in hypothetical proteins from fungi, mycetozoa and entamoebidae. 25.00 25.00 25.00 32.40 22.80 24.50 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.56 0.71 -4.07 31 180 2009-09-10 15:38:14 2006-01-05 15:55:44 5 6 133 0 142 183 0 130.40 24 11.91 CHANGED hhstllhshsp+TslYDtssshhlhshlpphl.thhst...p..........................h.shhDapFWLpslthhlp.ocpsho.h+slthlassWshhshs.c+cl.....................................lc..aLlppphah+hF.HWs.hVRshFh+LLlaRlh ...............................h.hsthlphhhp+TslaDtsushhlh-hlcchl..hhsphppp......................................hsshhDasFalsshchhlp.ocsshs.l+slsalassWshl..sts.ccch....................................hhc.hLLspphF.c.h.F..HWsshVRshah+LLsaRl................................................................. 0 62 94 129 +8411 PF08579 RPM2 Mitochondrial ribonuclease P subunit (RPM2) Mistry J, Wood V anon manual Family Ribonuclease P (RNase P) generates mature tRNA molecules by cleaving their 5' ends. RPM2 is a protein subunit of the yeast mitochondrial RNase P. It has the ability to act as transcriptional activator in the nucleus where it plays a role in defining the steady-state levels of mRNAs for some nucleus-encoded mitochondrial components [2]. 27.00 27.00 27.00 27.50 26.90 26.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.63 0.71 -3.97 6 46 2009-01-15 18:05:59 2006-01-05 16:12:12 6 4 43 0 28 49 0 122.10 36 10.25 CHANGED slht--sh.sWp...pDpEssLN....cpsaLpTpIcpIpssacp....csYNhINsLYQoLKRNsI.lPslclaspVLpSIscRcLDs.....ssI-sKhhpLLoCYQDIlsN+...lKPspEIYsIVlsoLL+GSlp .........................t..........hp...p-s-ssl.....cpsaLpTp.lspIspsapp....pchNhI.sLYQuLKRNsl.lPsl-lYshVLcSlscRplDs.....ssl-sKlspLLTsYQDl.l...ssp.......lKPscEhYNIVltuLhcGSl... 0 3 14 27 +8412 PF08580 KAR9 Yeast cortical protein KAR9 Mistry J, Wood V anon manual Family The KAR9 protein in Saccharomyces cerevisiae is a cytoskeletal protein required for karyogamy, correct positioning of the mitotic spindle and for orientation of cytoplasmic microtubules [1]. KAR9 localises at the shmoo tip in mating cells and at the tip of the growing bud in anaphase [1]. 20.40 20.40 23.20 21.80 19.80 19.70 hmmbuild -o /dev/null HMM SEED 683 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.35 0.70 -13.22 0.70 -6.25 9 136 2009-01-15 18:05:59 2006-01-05 16:19:49 5 3 119 0 96 131 0 566.70 25 69.82 CHANGED p.ppclsplspllhsphs.l.t.....l.phhsstcshhsslhphh....-Wlh-u+sllhpL.............hpslcsI-psls+hhphl-s.hts.ps.p.pc-.hs..hhslh-csoplhs......pl+shlpslKchlDhAlEapEIhcshhssLspEl-pshphsh-lpEc+atSPh....+c.hssFsL-pllcph......usp.ssp....phph.PsF..............ss.-cplhpcalpL.csplsPlcsSL.-hLP.RlspFpsRs......p.hsshtp.Lpc+apsLhccYchLpsEhppL+pEll-c+WshlFpsLscElthhh-.sl.+.lp+lpss.ph..shphp.t.thscplpshptphp+ohslIhpAhp.S..Ilscu.lushhN..........chts+W.pL.........+...phh-plLpc.psps..t..............ss.spshpohop.....s.sspsl.sssspt......sspsSpp.shss.ptt+hussL.c+hshtPs.sp..s.t............................ssssssshhps..............psPhFs................ps....cp......t.................................s..t.............sphs..S.p+..hthshoplPslu.pps.hhps............................shpRsssptSp.....h.phtspl........s.s.............................sh.+Sltp..ttpht..tp.h.ssphPsh.hpt..s......psshlspstth.phsts.....sp.cct.....l+.P ................................hspplsplopl.hs....ltt.h.....lssh.ssh+.sh.s..sst.ph.....tWh.cupphltsL.............hcsl.cpl-puls+h.pLlps.htt.-php.+.-hsp.s..s.lhp.s.sphct......pl+thLpslKppl-lAhEapElhssllsslt.Eh-ph.phhhEhcEcRato.h..........tp.hsh.pLEpllcph.........................t.....t..tsp...................ph.ph...Phh...................st--p...phLsL.hu+hpPLcsSL.-hLP.hRls.FpsRs.....pphFsoup...ccLpp+ppt...L.ppa+tLpp-tcsL..+cELh-cRWsllFRshscpspphh-.ulE+sl.....tK....lpp..........s...p.........thphpt.......thscclcshpt....pps+hhssI..thhhu..llpcG.lts+hs.........t-htt+atsl.................p......t.hDth.L....pchpspp..............+c...slsp.ho...tps...tp.sh.tTsss.......sssS.pl.hhss.........hs..........t.......................p.....t.ps..p..s..s.s.......tt.pphsts..p+.s..hPt..s.s..ttp...........t........................................t.....t..............................t.p.s.hp........t.......t................................................................................h.............p..........s..hs......................p........................................................p......s..............t.ph..................s.s..h........................................................p.th..............................s......................t....................................th................................................................................................................................................................................................... 0 22 51 84 +8413 PF08581 Tup_N Tup N-terminal Wood V, Finn RD anon Pfam-B_9595 (release 19.0) Domain The N-terminal domain of the Tup protein has been shown to interact with the Ssn6 transcriptional co-repressor [1]. 22.40 22.40 22.40 22.70 22.30 22.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.67 0.72 -3.79 20 172 2009-01-15 18:05:59 2006-01-06 11:04:59 5 8 149 6 122 173 2 74.50 42 12.30 CHANGED RLsELLDulRpEF-shupcst.hp..ppc-YEt+..lspQlpEhphIRpoVY-LEtsHpKhKppYE-EIt+L+pELEsRstp ....................RLsELLDtlRpEF-s.sp........ppc-aEpp..lspQlpEMphIRppVYpLEpsahKhKppY...E-EIppL+pcLEsRsh............ 0 40 73 109 +8415 PF08583 Cmc1 UPF0287; Cytochrome c oxidase biogenesis protein Cmc1 like Mistry J, Wood V anon manual Family Cmc1 is a metallo-chaperone like protein which is known to localise to the inner mitochondrial membrane in Saccharomyces cerevisiae. It is essential for full expression of cytochrome c oxidase and respiration [1]. Cmc1 contains two Cx9C motifs and is able to bind copper(I). Cmc1 is thought to play a role in mitochondrial copper trafficking and transfer to cytochrome c oxidase [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.82 0.72 -4.20 52 511 2012-10-02 15:44:21 2006-01-06 11:43:59 5 6 275 0 351 549 2 68.80 21 58.80 CHANGED h+pthcpcsh.cpCpchlpsht-Cppp..phhpshhtCpcpppthppClpphppscth........ctpcschhpc.+hc ..................hh..hpp+tp.ppCsphlp............thpcCpps......p....hh..phhst.CpctppthppCLpttttppth....................pttctchhpp.+................................... 0 101 180 284 +8416 PF08584 Ribonuc_P_40 Ribonuclease P 40kDa (Rpp40) subunit Wood V, Finn RD anon Manual Family The tRNA processing enzyme ribonuclease P (RNase P) consists of an RNA molecule and at least eight protein subunits. Subunits hpop1, Rpp21, Rpp29, Rpp30, Rpp38, and Rpp40 (this entry) are involved in extensive, but weak, protein-protein interactions in the holoenzyme complex [1]. 20.20 20.20 20.70 20.50 19.80 19.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.65 0.70 -5.40 15 216 2009-01-15 18:05:59 2006-01-06 12:26:07 6 3 179 0 151 218 0 217.90 26 71.17 CHANGED LppLL-.....s-FhsoaIKp...........................GshhhlS.csR.ssDNshsLhs...GpLhLpLsK-sYEcsGLpGKPschuG++th..RalVclDL+.sShthGpKuFpRlhWuhKshLs.hsssaLahphsos..uhsp.tpssshLssa...........psplhpscssppphpslhsPsLspschsspspstt.hp-......up-LhEWLGhVuls...lsssD.cs-saLSpYssP.-sssth...schlslpWpGFlsPphlhpLltslp+hh.......................ts.s.ssWhuLospGFucsslp...........GcssYTllhhssp ....................................................................hlp.tFhpthlp..............................G.phhhlo..ps.....p.s.....ssshs.lhs...GhLhLpLsK-sYEph.G.L.G+......s.........t..t......s....cc....................+a.llplsL...h......sh....sp.+ta.pRl.huhcphhs.....t.....h.....shLht.h.ss..................s.h.tth....................................................t.p.hp...p.thp...htp.l....h........P....h.t............t.t.t..................pp.......................s.-hh-WLuhs..t.......lph...ss.p.ssalSpYp.sP......pssp.h.............sphhhh.....phpG......hl.sphl..lh.t..hhp.h..........................t.....Whs.lsspuhscsslt......................................................................................................................................................... 0 51 78 117 +8417 PF08585 DUF1767 Domain of unknown function (DUF1767) Wood V, Finn RD anon Manual Domain Eukaryotic domain of unknown function. This domain is found to the N-terminus of the nucleic acid binding domain. 20.40 20.40 20.40 20.60 20.30 19.70 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.18 0.72 -3.71 34 333 2009-01-15 18:05:59 2006-01-06 14:33:46 7 14 209 1 245 330 0 87.50 23 16.57 CHANGED sphLppht..ltlsspWLpphlst............s.sthspplhpphLssDl+-h..s...tssLPs..sltp..hpptpLp...........GshlLQlpplp-Iotshh.sphp .............................................htpht..h.l.s..p.alpthhs............................s.sthspplhpphLtsDLcsh......s........tss..LPs...sltp.......hpptpLs.......................GshlL....Qlppltslutsth.pp................. 0 80 130 199 +8418 PF08586 Rsc14 RSC complex, Rsc14/Ldb7 subunit Wood V, Finn RD anon Manual Family RSC is an ATP-dependent chromatin remodelling complex found in yeast. The RSC components Rsc7/Npl6 and Rsc14/Ldb7 interact physically and/or functionally with Rsc3, Rsc30, and Htl1 to form a module important for a broad range of RSC functions [1]. 25.00 25.00 89.30 84.80 19.60 18.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.28 0.72 -3.53 5 27 2009-09-10 21:44:18 2006-01-06 14:54:42 5 1 26 0 15 19 0 100.30 60 53.69 CHANGED huYYDVIuGLSuLE+ScpVoFospELpELT....cps--sRcsp-..ELp+s-pEcsKRVsVHGYLGG+Vuh+-AupAs.....Y-LsHTLLGGYVPRpQLESLSSsDFA .hGYYDVlAGLSALEKSsQVsFSssELQQLT.....QQscsscKuh-ssE..so+ucsoKsKRVsVHGYLGGKVoLuDAupsp.....Y-luHoLLGuYVPRpQLEuLSSlDFu 0 1 6 12 +8419 PF08587 UBA_2 Ubiquitin associated domain (UBA) Mistry J, Wood V anon Pfam-B_10238 (Release 18.0) Domain This is a UBA (ubiquitin associated) domain [1]. Ubiquitin is involved in intracellular proteolysis. 21.80 21.80 22.10 22.70 20.90 21.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.11 0.72 -3.77 7 138 2012-10-01 23:03:33 2006-01-06 15:47:57 6 5 132 2 94 130 0 44.70 45 6.48 CHANGED lD-sllptLSpTMGYs+D.-Ih-uLcp............sEs....NEI+DAYhLl+EN .....lccpllscLu+TM.GYs+-.-I.-ALcp............sEP......stIKDAYhll+EN.. 0 17 47 79 +8420 PF08588 DUF1769 Protein of unknown function (DUF1769) Wood V, Finn RD anon Manual Family Family of fungal protein with unknown function. 21.10 21.10 21.20 21.80 21.00 20.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.51 0.72 -4.02 16 120 2009-01-15 18:05:59 2006-01-06 16:05:57 5 4 103 0 99 124 0 55.50 48 14.49 CHANGED Gs-LlFGNDF-+PI+chlPsu.hssuh+lhp.aIDPslcGDlYuDc..PYLYuPuLuSh .....GsDLlFGNDFD+PIRDpLPsG.hssAh+ls+WaIDPuL-GDsY.AD+..PYLYuPuLuSa................ 0 34 60 86 +8421 PF08589 DUF1770 Fungal protein of unknown function (DUF1770) Wood V, Bateman A anon Wood V Family The function of this family is unknown. These proteins are rather dissimilar except for a single strongly conserved motif (PDLRFEQ). 25.00 25.00 26.20 32.40 24.40 24.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.66 0.72 -3.40 12 76 2009-01-15 18:05:59 2006-01-06 16:12:56 5 1 75 0 61 73 0 97.40 42 56.10 CHANGED ApTlQTAols+.c..............................PSPp+..................DlNssT............uAsc+p...sls...pps.s-.sDSls...........p-.s-....spp......hlpPhsRR..p.........phPPlPDLRFEQ......SYLsSIcuA-.o........................WtcVAaIT ......................................ApTlQoApIp..p..............................PSstH..................DlNPsT............AAsc+p........Plsh...t.....ps-...s-uls.................sDh.-....Ppp.........sl+.Ph.tR+..p.........pLPP...LPDLRFEQ......SYLsSIcsA-...o........................Wt+VAaIT................................................... 0 14 31 50 +8422 PF08590 DUF1771 Domain of unknown function (DUF1771) Mistry J, Wood V anon Pfam-B_10757 (release 18.0) Domain This domain is always found adjacent to Pfam:PF01713. 22.90 22.90 23.00 23.10 22.40 22.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.04 0.72 -3.96 30 501 2009-01-15 18:05:59 2006-01-06 16:21:49 5 28 240 1 355 500 1 65.70 29 10.99 CHANGED -Yp.....clRspAppthppRpchhpcuppAappGDt.....spA+pLSpcuKpttpphcchNcpAAptlacpsNp ..............................Yt.....chRppAtpptptRsphhp..........pAppAappGct.....ttA+tLSppG+tctpphcchscpAAc.tlacppN........ 0 112 203 290 +8423 PF08591 RNR_inhib Ribonucleotide reductase inhibitor Wood V, Bateman A anon Wood V Family This family includes S. pombe Spd1. Spd1p inhibits fission yeast RNR activity by interacting with the Cdc22p [1]. 20.10 20.10 20.30 20.70 19.30 20.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.55 0.72 -3.08 23 119 2009-01-15 18:05:59 2006-01-06 16:53:36 5 4 112 0 90 111 0 95.50 25 41.59 CHANGED uuLhoVGMRlRKSVsEGYKT...p..s..............t.s.h..................shtsstt.hssphEhsPFsuh.p.s.shhspsspssss..................sshpthsSup-uhtushs .....................ssLhoVGMRlRKuVs-GY+o....................................p..sh.................s.htshsshhssth.chhPhsu.h.phs.shhsptttsssp....................sshp..sssppt..t..t......................................................................... 0 16 44 75 +8424 PF08592 DUF1772 Domain of unknown function (DUF1772) Wood V, Finn RD anon Manual Domain This domain is of unknown function. 24.70 24.70 24.70 24.90 24.40 24.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.87 0.71 -4.44 75 579 2009-01-15 18:05:59 2006-01-06 17:09:07 6 7 370 0 326 606 107 134.50 19 80.16 CHANGED Gshhuaoshl............hPAlt......plsssptlpshpslspts....shhhsshhussshs.shlAh..............h.thts..sssshlhsuuulhllushshThhh.plPhNstL.............tthp...sstsssshh....tshhspWstaNtlRolhulhusslh.lhAh ............................................Ghhhshuhhh............hPult...................ph.ss..pt.h.l..t.th.p..t..h.pth.........shhhs.s...hhhsshhs.hhhuh..............................thtt....sss.h..h.hhsuuuhhh.lu.h..hshThhh.......sP.lN.st.L...................................................tthp.......ts.ss..s.ssth..........pphhpcWsthshlRshhs.hsuhshhhhu.......................................... 0 115 217 284 +8425 PF08593 MUG2_C DUF1773; Meiotically up-regulated glycoproteins C-terminal Wood V, Bateman A anon Pfam-B_26890 (release 18.0) Domain This is the C-terminal part of some meiotically up-regulated gene products from fission yeast. The actual function is not yet known but the proteins are likely to be cell-surface glycoproteins. 25.00 25.00 25.30 26.70 20.10 24.00 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.72 0.72 -3.73 17 50 2010-01-05 10:44:21 2006-01-06 17:21:54 5 3 40 0 44 50 0 56.50 29 13.04 CHANGED chtht+t.shp+Rp...........sthsssslP......F.sspps.pstLPslsolpDlstLSc.QhppaLpG ...................................aLau+shshs++p...........sttsssshP.........stps.pPhL..sshusIhslshLS..QlShFLsG. 0 11 27 39 +8426 PF08594 UPF0300 Uncharacterised protein family (UPF0300) Wood V, Bateman A anon Pfam-B_20198 (release 18.0) Family This family of proteins appear to be specific to S. pombe. 25.00 25.00 126.50 125.80 22.20 21.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.32 0.70 -5.41 6 9 2009-01-15 18:05:59 2006-01-06 17:25:57 5 1 2 0 9 11 0 209.40 25 43.02 CHANGED hhLpsppssaccLspaLpsGphPLpVLlHHVMLYcaYPsshQ-ALWsAVppYVpppVssstYTplHhhAspp+IGcIRMYLVcPcDIYshsssssWlsIsocsFpshlcLcpshptsslhpspsthpp..lhps.spSspEluWLhhhhulGSsuutFPlHAYLshKpplhpshlPcpl...hhhpcsDptlFpc.pshcchpsa.hpplhpDLshC-pap .....h....t..pshpplh.aLpssphPlpVLlHHVMLYchYPptlp-uLWpAVppYlpcpsssttYoclHhhAAp++lGcIRhYLlcPcDlaslsssssWlsIsscpFps.lcLcpsh.spslhpppsthpp..lhph.ssospEluWLthlhuhGssupuFPlHsYLsspppl.tphhPpsh...hahpppDphlFps.tshc.hpsa.hpphhp-L..C-p........................ 0 6 6 9 +8427 PF08595 RXT2_N RXT2-like, N-terminal Wood V, Finn RD anon Manual Family The family represents the N-terminal region of RXT2-like proteins. In S. cerevisiae, RXT2 has been demonstrated to be involved in conjugation with cellular fusion (mating) and invasive growth [1]. A high throughput localisation study has localised RXT2 to the nucleus [2]. 21.10 21.10 21.50 23.40 20.60 21.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.92 0.71 -4.41 17 118 2009-09-10 20:49:02 2006-01-09 16:37:06 6 2 111 0 90 121 0 136.40 32 32.14 CHANGED soNRGsKLhppuc.l..ttl..ssssshhpchlpYs..Ghp+tlLp.....................pttsphcp-ts-h-stpp---.-pctp.t.....sPhpplclcElLuPLspsu-lhsHPulS+sapSpsLpcLAhphlthlcpEQsslhphp+LLplhlGD .......................s.oNRGNKLptpu..chVppttL..ssstshhcctl-as....Ghp+..p..lLp...........................................pssshhDp-ss-l.D.....--.....-p...pc...spt.tst-.-sPasplplc........clLuPLppso-ls.sHPslS+sa...pScsLppLspphhthlcpEptsLhphppLhphhlGD................... 0 18 46 75 +8428 PF08596 Lgl_C Lethal giant larvae(Lgl) like, C-terminal Wood V, Finn RD anon Manual Family The Lethal giant larvae (Lgl) tumour suppressor family is conserved from yeast to mammals. The Lgl family functions in cell polarity, at least in part, by regulating SNARE-mediated membrane delivery events at the cell surface [1]. The N-terminal half of Lgl members contains WD40 repeats (see Pfam:PF00400), while the C-terminal half appears specific to the family [1]. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.11 0.70 -6.00 13 313 2012-10-05 17:30:43 2006-01-10 12:04:56 5 10 193 1 188 340 0 278.70 24 27.85 CHANGED slslsplShAs-otELuVuhpoG-VllaKFps..............Nphas...ptpssuh-hp.tphphssp.sstLlDIpcRusssl+pG..FhPhollphp.pGploulppSNIGFVAlGYcsGollllDh.RGPAlIap.-slpc..........lsstpouhsss......lEFuIMphssDsYSSIlllsGTst.GpLhTFKIlPsusGtFsVpFssss...........hsscupIlpIssl......ss-sGpSAhAohsthQ.....sLupGlhlsGhVlsooss-IRllpsspoKssHKsa.chsltssuhuhlsh.t....cppuhlLlsLhtsGpl+sholPsL+EltshplP........hslcuphlppSsl.LtsGD.lhhpsG.pEusLholhsppupt......................tppsspDpLaNssttIPsRPp...lssLQWs+G.otYsosp-LDhLlGGssR.PsSK .....................................................................................................................................p...........................................................................................................................................................................................................................................................................................................hhhspt..u.hhh..h.h...............t.......h........................................t....h..h......................................................t.p..ptht.......pt..h..sll....so..ccph+l.hshs.s.pp....s..sa.Kth.....cs..shhspu..ssV..h.............hpsuhsLsshh.usGc.lhshSlPu.L+.lhcsphh........shh.chchs.p...shsh..ossGp.slhhs.uPo.....E......lphl......oh.tpst............................................................................................................................................. 0 45 83 135 +8429 PF08597 eIF3_subunit Translation initiation factor eIF3 subunit Mistry J, Wood V anon manual Family This is a family of proteins which are subunits of the eukaryotic translation initiation factor 3 (eIF3). In yeast it is called Hcr1. The Saccharomyces cerevisiae protein Swiss:Q05775 has been shown to be required for processing of 20S pre-rRNA and binds to 18S rRNA and eIF3 subunits Rpg1p and Prt1p [1][2]. 27.20 27.20 28.10 27.50 27.10 27.10 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.88 0.70 -4.81 40 373 2009-01-15 18:05:59 2006-01-10 13:30:36 5 9 290 5 256 351 2 229.00 27 92.17 CHANGED M....ssW.D........---h-sssssst.......ssts+W--E...s---slh-SW-..t---pct..cppcsp.tsscspsKtphctchpccp+tpchppccthc............p.sst-phscKtRh++hpc-uDLpsAt-LFG.......................................htssshsslshF.pPpoKp-FpchpcsLspp.lssh..c.pshpYss.ahp-LlRslstsLssssl+KlsooLssLhsEK.KtEKtuc...............sspKKputsts+sslssttcp.stasthssshhDD.....DDFM .......................................................................pW..-........tpt.p....................hppa-sE......--psl......h-.s.....W-.............tp--pc..........pp........t...p......st.......t....st.......h.p.tKt..th.tt...pht....c+pptpch....tpcph.t....................p.ssp-phsc+h+hc+hpc-uDL.ptAp-.hFG.........................................................t...ssshpslshh.pP.po+p-Fpchsch...Lssp.lsth...pcsh.pYss..alcslh+plsts.....L...p.ss-.......l.KKlsso.LsslssEK.K.pEKtuc.................tttK.K.pst...st.........spsph.s....s...pp....sh....s.s.h.sth....t..s..s.....hh.-D.........-D.FM.................... 0 93 144 207 +8430 PF08598 Sds3 Sds3-like Wood V, Finn RD anon Manual Family Repression of gene transcription is mediated by histone deacetylases containing repressor-co-repressor complexes, which are recruited to promoters of target genes via interactions with sequence-specific transcription factors. The co-repressor complex contains a core of at least seven proteins [1].\ This family represents the conserved region found in Sds3, Dep1 and BRMS1-homologue p40 proteins. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.38 0.71 -4.79 54 585 2009-01-15 18:05:59 2006-01-10 13:48:35 6 6 246 2 375 547 0 209.60 23 51.78 CHANGED ppshsclsplEppFsph+-ph................Yc-+LspLppcLp.lt..p.........................................Gsps-ahchhpclpcp+chclphsphhpcaplpslcpchps-hptscpcappphppl+-plhsclpp+ht+.lpc-RpphDls........................ssshshph...................................................................hps+phpct...sp.h................................................................................................................p+p+hsptts.htt...hh.........................l................................................................................................hpsp-lt-Dhpsh ..............................................h.pph.plEcpFsp.....h+-ph.................Yc-+LspL.ppcLpplt....p.................................................................sptsEYhc.lppLppphch+l.....phs.t.h.hp..php.lpslcpcattEhptuppcacppthtl+-plhsclpc+...hpc.l...pc-.+p.sh-ls.................ssphthp.......................................................................................................ht.pR.hpp.t...........................................................................................................................................ppp+hs..st............h....................................................h....................................................................................................................................................................Lp..-l.pDht......................................................................................................................................................................................................... 0 102 177 287 +8431 PF08599 Nbs1_C Nbs1_N; DNA damage repair protein Nbs1 Mistry J anon manual Family This C terminal region of the DNA damage repair protein Nbs1 has been identified to be necessary for the binding of Mre11 and Tel1 [1][2]. 25.00 25.00 25.30 40.60 24.40 23.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.04 0.72 -4.06 5 81 2009-01-15 18:05:59 2006-01-10 13:57:02 5 6 52 0 33 71 0 64.30 74 8.88 CHANGED pKNFK+FRKVsYPGAGuLPcIIGGSDLlAHsR+KNSELEEWLRQElEEQsQpsREESLADDLFRY L.KNFKKFKKVsYPGA.G..K.LPHIIGGSDLIAHHARKNo.ELEEWLRQEMEVQsQ+AKEESLADDLFRY............ 0 4 6 14 +8432 PF08600 Rsm1 Rsm1-like Mistry J, Wood V anon manual Domain Rsm1 is a protein involved in mRNA export from the nucleus [1] 20.90 20.90 20.90 20.90 20.50 20.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.50 0.72 -4.13 11 189 2012-10-01 20:49:39 2006-01-10 16:15:18 5 7 140 0 130 193 0 89.60 23 20.42 CHANGED AhhLAhhGW.............suhscuphG.....LssCssCaRRLGLWMaKsKpsuths........hstLDllpEHh-aCPWhsspuQossucssttptt.t.....uGWclLspsL..........K ....................AhhLulhGW...................................p.shts...phu.........lhsCptChR+lGLWha...pthcsu.ss...............................s.s.hsshppHhpaCPahs.....p...st.p.....tp...................................................................................................... 1 33 53 88 +8433 PF08601 PAP1 Transcription factor PAP1 Mistry J, Wood V anon Pfam-B_20528 (release 18.0) Family The transcription factor Pap1 regulates antioxidant-gene transcription in response to H2O2 [1]. This region is cysteine rich. Alkylation of cysteine residues following treatment with a cysteine alkylating agent can mask the accessibility of the nuclear exporter Crm1, triggering nuclear accumulation and Pap1 dependent transcriptional expression [2]. 22.70 22.70 22.80 22.90 22.60 22.60 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.50 0.70 -4.76 10 181 2009-01-15 18:05:59 2006-01-10 17:16:00 5 6 130 2 118 186 0 264.60 22 57.66 CHANGED sS.........htss.pt.hshsstpFsh-hscFsucLstususcssslsK...........tpssstsos.........lsulpspssps.s.sssssss.sosp.ssts.sspp................sshusD..h.tsstsshps.u........sssssoossoPSsuusuasspp.ossusss.P..spSPsshtpsss...............................................hhhp.t.s.s.s.sttssssshshlDssls....sss.h-s.LFtsaREPQcsshsp.DaD.......................................ssLhs-p.shs-.hsPht..sp....s.......supssssppstssphcs...c-s--sVssuccsshLsCocIWDRIoSHPKa..u-IDIDuLCSEL+sKAKCSEsGVVlNpcDVcssLs+a .......................................................................................ttttst..........................................................................................................................................................................................................................................................t.ss.......t......t...st.t........t..t.t...............pP...........................................................................................................................................................t.............hs....ht..........s..t..s..h.t.hppsppt.h.t..s...h................................................................sshhsct.....sc.ht....s......t..........t.................................t............sst.s..h.ht...p...hpt........pt......tpp.lsst..ttp.hpCscIW-+lp........s.hs+a..u-lDlDsLCsELptKA+CS-tG.lVlpppDlpphlt+.............................................................................................................................................. 0 24 58 100 +8434 PF08602 Mgr1 Mgr1-like, i-AAA protease complex subunit Wood V, Finn RD anon Manual Family The S. cerevisiae Mgr1 protein has been shown to be required for mitochondrial viability in yeast lacking mitochondrial DNA. It is a mitochondrial inner membrane protein, which interacts with Yme1 and is a new subunit of the i-AAA protease complex [1]. 25.00 25.00 62.50 26.00 19.50 22.90 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.40 0.70 -5.44 6 63 2009-01-15 18:05:59 2006-01-10 17:48:11 5 3 43 0 40 59 0 252.20 28 91.40 CHANGED MulaTPPupstsspD+.................SuspucsotslssstpFasR.PSLGL+LWGPLVPASDNhsGLaoLlulQoslGhhhhpRhRpL.........................................t..hlK+DIADFPoLNRFSpTpGDhah..............uPh........................hpcRFso.....h++sLallsGSlLLsQShLEhsRLThLhYDPWh-EAKsVR-KpFaNslV+aYHEGlDss+hhsKD.hsGp.hshNlPEV+QulAlsRApscApN.lopWFGPl-.YKPMoFsEaLDKlEaaL-hh-a......................hQshpptpcsthshlsplsscsptL-hh.ctNcpN++Rhc+lLcpp.p..s-lscu.uhslhhcupsp.hh...hhRulIL.tDpcsspDl-LcplWslY-PWssLAL-TuLSIKFlPos ...................ahPPst..s.s..tst.................................st.tstt.......p......phh.+.PSlGL.hWGPLsPAsDNh.uLashhshQ.hlGhhhhhhhRth................................................h.p.l..............s...........................................................................................................................................h.......h...h.hhhGshlh.tShLEhsRh..L.YDPWh-Ehp.hR..................................................+hthcsts...hWaGshp.YpPMshppahp+hp.alp.ht...................................h.pht....t..t....h.......hpplp..........pp.p....p.h.ph..ppspphh.p.l...........................p.s.t.ths...........ttp.............+.s.l....shp..p-.php.hh..apPW.pLt.-TphsI+hlPp.................... 0 6 22 38 +8435 PF08603 CAP_C CAP; Adenylate cyclase associated (CAP) C terminal Finn RD, Bateman A anon Prosite Family \N 23.80 23.80 23.80 24.00 23.70 23.50 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.87 0.71 -4.89 35 461 2012-10-02 17:34:32 2006-01-11 12:58:49 6 11 296 10 293 478 8 154.60 41 33.01 CHANGED +cPs....phEL.cGpKWhlEp.cssp....slllp-s-hcpsVaIapCpsoslpIKG.KlNulolssCcKsullhDslVSul-llsspshplQVhGplPTIoID+oDGsplYLS+-Sl..ss.EIhouKSSplNl.l..Ps....c-sDa.pEhslPEQacoph..cs......uKLhTpss-ps ......................................tPshhEL..-GpKWhlEppcsps......slllp-.s-.hc.QslaIa+CpsoslplKG..KlNuIol......DsCc.KhulVh.DslV..u..s..lEllNspslpl..Q..........V..h........G..pVPTIoI-KoDGsplYLSc-SL...ss..EIsouKS..S-hNlhl........Ps...........t-uDa..p.................EhPlP.E..Qa+ohh...ps.......sc..LhTpss-h..................................................... 0 99 154 233 +8436 PF08604 Nup153 Nucleoporin Nup153-like Wood V, Finn RD anon Pfam-B_56527 (release 19.0) Family This family contains both the nucleoporin Nup153 from human and Nup153 from fission yeast. These have been demonstrated to be functionally equivalent [1]. 25.00 25.00 47.40 39.50 16.40 23.50 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.97 0.70 -5.83 4 78 2009-01-15 18:05:59 2006-01-11 14:59:46 5 9 42 0 36 71 0 425.60 55 34.34 CHANGED PoTopsu.Nas-.sLoRPoLaRuH..LsFs.L-........SsALpsQPSoSSAasIuT..SGFSLlKEIKDshSQHDDDNISTTSGFSSRASDKDlssoKssSlPsLWSPEs-RSpShspsoppSsKKPuFNLSuFGo.Ssu.GNsSlLs.opLGDSPFYPGKTTYtGAAAs.RSSRhR.sTPYQAPlRRQhKAKPhu.uQ.hGVTSusARRILQSLE+MSSPLADA+RIPu.ssSsL...Spsh-p.slDh.c..uK+cKhD..hPPVQRLsTPpshslusNRShahKPoLTPuus.ptsscclDp..sspsts+cssL.tps.cppp.......huYPhhSoPAuNulos....GGGKMpRE+soa.huoK.hp.EEl.-sPlLPcIsLPl.oosuLPoFsFoosphosT.p.oPlshso.A.oppsphps....sSs.FoFSSPIVKST..cuss.sPS.Shsho...FSVPshKh...scsouscshl.slhpstss+sAssposssc-.....p.Ghh+PAKTLKpGSVLDhL+oPGF.SSPs.pssAsp.sssoP .................................................................PSTopou.NasD.VLoRPoLaRSH..LshohL-........SsuLaCQPSTSSAhsIGo.....SGFSLVKEIKDSTSQHDDDNISTTSGFSSR..ASDK.DlsVSKssSlP....LW.S....PEs-...RS+..S......lSQpouoSSKKPAF.NLSAFGoLSsSLGNoSlLpoSQLGDSPFYPGKTTYGGAAAA...Rpo+hR...sTPYQ.A..P....VRRQhKAK.hs...uQuYGVT......SSTA.RRILQSLEKMSSPLADAKRIP.S...s....SSPL...sSslDRSslD.h.s.shQu.K+c+.hDSph...PPVQ+Lh.TPKslSlAsNRohYFKPSLTPuuchp+sspRl...Dp..cpsst...h.ccphhs.s..p..s...tpp.pp......shSYPp.hS.hPAuNGLoSt..............sGGGKM+.RERoph...su.oKs.E..EEh..ElPVLPcISLPI.oooSLPoFsFSSP...oso.us.S..P.ls.so....t..uhs.sclphsoss...ssuSPhFp..FSSPIVKST..pAslhPPo.S....lGFT...FSVPVAKs..........sEhs..us.s.s.h..pslh.os.ss..pc......sssls.S.sspcc....pph-GsF+PAK..hLKpGSVLDlLKoPGF.sSsphs...s.u.t.......................................... 0 7 9 17 +8437 PF08605 Rad9_Rad53_bind Fungal Rad9-like Rad53-binding Wood V, Finn RD anon Manual Family In Saccharomyces cerevisiae the Rad9 a key adaptor protein in DNA damage checkpoint pathways. DNA damage induces Rad9 phosphorylation, and Rad53 specifically associates with this region of Rad9, when phosphorylated, via Rad53 Pfam:PF00498 domains [1]. This region is structurally composed of a pair of TUDOR domains [1]. 22.00 22.00 22.60 22.00 21.90 21.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.67 0.71 -4.37 8 79 2012-10-02 16:56:36 2006-01-11 17:18:52 5 3 79 0 58 87 0 136.30 29 10.55 CHANGED DsuhLspsDII.tsAVWsta..shsaYPG+llup..spspst.shVcFE-Gs.h-VpssDlahLDlRIGDpVcsct...s..palVpGLcsphot-sts.........I+ClRGYsTVhL..++hpp...uG+lu.pslhp.slScIal-lppW .......................ls.pplh...usWsth...shtaYPuphluh...sssp.p.hhVcF-Dup..tclcsps.l.+h......L-LRIGDtV+lct...s+hs.alVsGhppp...s...tss.t............lpsl+GasoVhL..Kp+ps......sG..th.spp.s.lhs.PlSpIaL-ht.W............................................ 0 7 26 49 +8438 PF08606 Prp19 Prp19/Pso4-like Wood V, Finn RD anon Pfam-B_6384 (release 18.0) Domain This regions is found specifically in PRP19-like protein.\ The region represented by this family covers the sequence implicated in self-interaction and a coiled-coiled motif [1]. PRP19-like proteins form an oligomer that is necessary for spliceosome assembly [1]. 21.50 21.50 25.60 30.70 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.16 0.72 -4.24 30 339 2009-01-15 18:05:59 2006-01-11 17:41:08 6 30 290 0 243 334 5 69.20 57 13.94 CHANGED ssssSIPuLLoshQsEWDAlhLEsFsLRppLpps+pELSpALYppDAAsRVIARLh+E.RDpA+ptLspls .......s.osTSIPulLpshQ......sEW.....DAlMLcoFsLRQQLppsRQELSpALYQHDAAsRVIARLpKE.+spAR-ALupl.p.................... 0 90 140 203 +8439 PF08534 Redoxin Redoxin Mistry J anon manual Domain This family of redoxins includes peroxiredoxin, thioredoxin and glutaredoxin proteins. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.96 0.71 -4.65 68 9684 2012-10-03 14:45:55 2006-01-12 14:51:33 5 39 3870 123 2384 24093 13509 140.60 20 71.03 CHANGED p......sGsp..h.P..shs...............h.t.stsspshslsp.......hpGKp.hll....shhsuhasPs................Cstpt...hlpch.sphhpspusshlsl..........ssssDsh........stphhuc...........ps.shhh.hsD..t.sushscshG..h.............ts.phhllsccG..pltththssts.t........hsshpshL ....................................................................Gp...h..P....shp........................h.........t...t...s...h...p....p...h....s...h......ss.........h.p..G.K.h....hl.l.....s.h..a.s....o.........a.....s.s....s.....................Cp.t.ph.........h.p.ch....tp............t.....s.....p......s.......s......s...l...l.s..l....................................s..s...s...s..psh............................st.pa.hsp..............................................tu.h.s...h..s....h....l.........h....D..............s...s...u...p.....h....s....p.....s....h..G..l..h..........................................ts...p.s....h.l...l....c..p.sG.....pl.h.h.hphss................................................................................................................................... 1 675 1408 1941 +8441 PF08608 Wyosine_form Wyosine base formation Mistry J, Wood V anon manual Family Some proteins in this family appear to be important in wyosine base formation in a subset of phenylalanine specific tRNAs. It has been proposed that they participates in converting tRNA(Phe)-m(1)G(37) to tRNA(Phe)-yW [1]. 21.30 21.30 21.30 21.40 21.20 21.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.83 0.72 -3.86 72 579 2009-01-15 18:05:59 2006-01-12 16:46:12 7 11 497 2 327 541 82 61.60 33 12.94 CHANGED hupPsalElKuhh..ahG.Sp....tLohsshPtap-V....h-Fsptl.p.....ht.......Yplss-pttSclsLlup ......upPsalElKu.ss..asG.Ss...ttLohs.shPhap-V....hcFs.tl.t...............................................atlss-+t+SsssLlA.................... 0 95 193 273 +8442 PF08609 Fes1 Nucleotide exchange factor Fes1 Mistry J, Wood V anon Pfam-B_36022 (release 18.0) Family Fes1 is a cytosolic homologue of Sls1, an ER protein which has nucleotide exchange factor activity. Fes1 in yeast has been shown to bind to the molecular chaperone Hsp70 and has adenyl-nucleotide exchange factor activity [1]. 22.30 22.30 22.30 22.60 22.20 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.24 0.72 -3.50 17 214 2009-01-15 18:05:59 2006-01-12 17:19:00 5 9 186 0 151 200 0 92.50 33 31.88 CHANGED M-p....LL+WSIpss..........................ssspsussssssP..............................lsPchLspLF..GGPs-AsLMKtAMtslps..scsoLEsKlIAFDNFE.LIEslDNANNl ..................................................................hppLL+WoItso............................................ts..s..tps.sps...sp.s..............................................pth.sschL.pplh.....tsPs-upLMKpu.......hp....llps........spso...lEs+hhAhDshE.LlEslDNANsl... 0 46 86 129 +8443 PF08610 Pex16 Peroxisomal membrane protein (Pex16) Mistry J, Wood V anon manual Family Pex16 is a peripheral protein located at the matrix face of the peroxisomal membrane [1]. 25.00 25.00 29.20 25.60 24.00 23.80 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.13 0.70 -5.44 18 280 2009-01-15 18:05:59 2006-01-13 09:38:16 5 9 214 0 186 267 0 296.50 26 84.37 CHANGED hp.tYpcalhcNsss...lu...plEosl+....hloYllsGRFscuchhSEhlaoh.pLLshhpDtIlppthpp...............................................h.ts..hp+lshhLshlpasElhlEhuAc+hhG-ps+WhsllhlphhKAshRlhl.LhhppuchlhosPls.h-ccsptppt.ppptspss...............................psssashpRo....G+sl....t...Ps.t.p...ht......lspshsppshct...t..p.lst.thlAEhLaIsRPLlalluhtt........hup+S........WpPWlluhul-hhShpLhpcpcph.........osh-+tEhpRRsht.....lhaah.hRuPFY-paT+s+lhpllphl.ptlPlhullutslhDYLshWpch ......................................................................................hYppaltcNss....ss...plEosh+..........sloal...ls..G.Rats....u.chhS.EhlaohspLlshhpD....t.llppthpp........................................................s......pplhhhLphlphsE.lhhEhsA..p+h..hG.cpt..............+Whllsllphh..KA...hh......RlhL.lhhh.p..sp.h.....hsPs..ls.h-hcspt.t.tttt..........................................................................................pp.sash.Ro............shsl.pl.ss.........sshp......h..........hpphhp.tpphp............p..l.........s.......h...t.h.hAEhlaIhRPLla.hlslth................hsp+S................WpPWlluhsl-hsuh.....p.Lh...pp.t.ht..............................................othE+t..Elp+Rshh............hhhal.hRusFY-......pho.csh.l..thhphh.p.hPhhs....hlst.l.-a..hhpp........................................................................................................ 0 64 101 148 +8444 PF08611 DUF1774 Fungal protein of unknown function (DUF1774) Mistry J, Wood V anon manual Family This is a fungal family of unknown function. 22.80 22.80 23.30 23.80 22.50 22.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.35 0.72 -3.83 14 95 2009-01-15 18:05:59 2006-01-13 10:43:53 5 4 86 0 77 87 0 93.50 42 31.85 CHANGED RIlANVFIWshhlhsthalhhhpDaslGaulShLhhuLultQhhhKlhALQWIFAFlIhulhhVhSlhsulsthht+chhhp.........-pERtPLLs .....RIlANlFI..Wsh..hlaGthaIhh..hpDashGauLSlLohuLultQhh.h..K..l.IuLQWIFAFlIhulhhlhSlhsulsthhs+sh.htt......ssDpERtPLL............... 0 13 36 60 +8445 PF08612 Med20 TATA_RF; TATA-binding related factor (TRF) of subunit 20 of Mediator complex Mistry J, Wood V anon manual Family This family of proteins is related to TATA-binding protein (TBP). TBP is a highly conserved RNA polymerase II general transcription factor that binds to the core promoter and initiates assembly of the preinitiation complex. Human TRF has been shown to associate with an RNA polymerase II-SRB complex [1]. This Med20 subunit of Mediator is found in the non-essential part of the head [2]. 25.00 25.00 25.00 28.10 24.50 24.80 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.61 0.70 -5.21 32 235 2009-01-15 18:05:59 2006-01-13 11:26:05 6 2 198 11 160 239 0 213.80 26 92.51 CHANGED Msl.sh...............lhh..sssp...s.tTlsphpcpLps.tsshtGpWsl-hcha+ssspshs..........................+hhhsls.ocpPspshhlhssss...........................................................ss.sDshhphlhtKlpshapp+pslps-ssts.Ypl.....GDFhlRlGsVh.ss...sh+GlllElEa..............................................................................................sssthspstsllc-Flpph........s.sts+.sh................h.ppthspl.................................hDhshQ.Yhclhs ....................................................................................................................t................hh...sssp......s.tslp.hpcpl.p......suttt..Gpasl-hcha+ss.spshs....................s.......................+hhaslp.Sch.P.t.psFslhcsss................................................................................................shhsD.s.hshlh.hKLpshapp.+pss+l-spus.Yph...............sDFhl+lGsVp.ss...........sh+GlllElE.Y.............................................................................................tssss.hss...shpllpEFlppa............thstsp....................sshh.tpptsthh...............................................s..DshhQ.Yhchh......................................................... 0 45 76 127 +8446 PF08613 Cyclin Cyclin Mistry J, Wood V anon Pfam-B_6792 (release 18.0) Family This family includes many different cyclin proteins. Members include the G1/S-specific cyclin pas1 [1], and the phosphate system cyclin PHO80/PHO85 [2]. 21.00 21.00 21.00 21.00 20.60 20.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.84 0.71 -3.80 9 1457 2012-10-03 00:42:12 2006-01-13 14:18:39 6 13 314 4 1040 1807 12 153.60 21 39.34 CHANGED ccllptlsthlsRhsshssssspsSppstss.ps....................................Fhsh.s................................................................PslultsYhtRlp..........+astsss.....sVhlssLlYlcRlhc.........tphp.shslsuhNh+RLhlsulhlAoKhhsDhpho.Nppau+luGlsLcELNcLElsFLhhlDFcL ..........................................................................................................................................................................................................................................................................................................................t....................................................................................................................................................................................................................................................................................................................s...h.s.....l...p.a.l.tclh........................p.hs.t.h.o..........sshlhs.....hhYl.c.Rltt....................................t......h...h.l..s..........p..........h..s..........h+..R.......l.........lls...ulhlAsK........h.......h..s..D......................h.....a....s....N....p....t....a.u........c.............l........u.........G...l.........s........h.........p..E......lNtLEhpFLh.hlsacl..................................... 0 377 618 878 +8447 PF08614 ATG16 Autophagy protein 16 (ATG16) Mistry J, Wood V anon manual Family Autophagy is a ubiquitous intracellular degradation system for eukaryotic cells.\ During autophagy, cytoplasmic components are enclosed in autophagosomes and delivered to lysosomes/vacuoles. ATG16 (also known as Apg16) has been shown to be bind to Apg5 and is required for the function of the Apg12p-Apg5p conjugate in the yeast autophagy pathway [1]. 35.00 35.00 35.00 35.40 34.90 34.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.43 0.71 -4.48 16 330 2009-01-15 18:05:59 2006-01-13 14:49:26 6 13 233 8 202 320 1 170.80 25 44.66 CHANGED W+pclhcpLppRD+hp.psap-lhpph.................ppL.-csth.th.sptLps......t.................tt..usshsssht.....t.tthhhthpt-Lsphh+ppuchtppllplssplpcpcpchtppppplpplppphppLcpcl+cLcpplpp+p+sspsLpDEltuLplphshLE-+lp+lpcENppLlpRWht+tspEA-tMNp .................................................................W+pch.ttLp.Rsphp.th.........p....lh.th..................spLh-.+sshhph.stphp.................t...t...t..........................tt......s....t..t..st...h.t.s..s.tht....pht.....phhhphppELsphp+ppuc...........hspplhphspphpphcp..phptpptp..lsphppphspLcpchpchcpplp-hp..+t....p...........plp.DEhtuLp.....l...ph.......shhEc+hcch.......pcENpcLlpRWMtcpsp-ApthNt............................... 0 53 96 150 +8448 PF08615 RNase_H2_suC RNase_H1_sml; Ribonuclease H2 non-catalytic subunit (Ylr154p-like) Wood V, Finn RD anon Pfam-B_36578 (release 18.0) Family This entry represents the non-catalytic subunit of RNase H2, which in S. cerevisiae is Ylr154p/Rnh203p Swiss:Q12338 (. Whereas bacterial and archaeal RNases H2 are active as single polypeptides, the Saccharomyces cerevisiae homologue, Rnh2Ap, when expressed in Escherichia coli, fails to produce an active RNase H2. For RNase H2 activity three proteins are required [Rnh2Ap (Rnh201p), Ydr279p (Rnh202p) and Ylr154p (Rnh203p)]. Deletion of any one of the proteins or mutations in the catalytic site in Rnh2A leads to loss of RNase H2 activity [1]. RNase H2 ia an endonuclease that specifically degrades the RNA of RNA:DNA hybrids. It participates in DNA replication, possibly by mediating the removal of lagging-strand Okazaki fragment RNA primers during DNA replication. 20.80 20.80 21.80 20.90 19.90 19.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.82 0.71 -4.33 43 277 2009-01-15 18:05:59 2006-01-13 16:09:50 6 8 240 10 196 272 0 130.50 24 66.56 CHANGED ssplLPscIp......asGsss.sscaFps......................................................pspppspph.........................................................hpsaFRGRpLhGcplslPp.uapGhlhppspph...........................................................................scspspp.......................................................................................................hp.pupFpc..hsh......................................Ws+-shP.tssDshhcsl.cahplupsl ...................................hphLPscIp........hsG.sss..sspaFps..............................................................................................................t..ptpstth.............................................................................................................................hpshFRGRpLpGppls..lPp.GapGhVhpp.ppp.....................................................................................................................................................................................................................tptpspt................................................................................................................hpspup.F.sp..hsh.....................................................................Ws.h-phP.sssDs.hhpuh.pWhplAps.............................................. 0 72 115 164 +8449 PF08616 SPA SPB_interacting; Stabilization of polarity axis Mistry J, Wood V, Bateman A anon manual Family Swiss:Q99222 has been shown to interact with the outer plaque of the spindle pole body [1]. In Aspergillus nidulans the protein member is necessary for stabilization of the polarity axes during septation [2]. and in S. cerevisiae it functions as a polarisation-specific docking factor [3]. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.20 0.71 -4.37 27 396 2012-10-02 14:18:06 2006-01-13 16:54:27 5 12 241 0 281 674 2 111.20 28 17.65 CHANGED tpshLphhshhss.lhhL.hsAlLtpKRllhluhppsusp..........lspalLuhstlhss...sshhpth.tp....FPY..hslSp....l-tLpchsu..........aIsGssNPhF.....cpppph.WDllhDl-ssplhlup ................................................hhphhsshh.hp.lhh.L.aphlLhtc.lllhuss....ss..................................suEhVLuLs..u...llss...........ht.hp.th.tph........................hP...Y..hslpc.............hsch..p..t..ssu..........................................hIhGV...TNPhF.......tpphph..W.s.h.l.l..clsssph....s.......................... 0 97 144 217 +8450 PF08617 CGI-121 Kinase binding protein CGI-121 Mistry J, Wood V, Bateman A anon manual Domain CGI-121 has been shown to bind to the p53-related protein kinase (PRPK) [1]. PRPK is a novel protein kinase which binds to and induces phosphorylation of the tumour suppressor protein p53. CGI-121 is part of a conserved protein complex, KEOPS. The KEOPS complex is involved in telomere uncapping and telomere elongation [2]. Interestingly this family also include archaeal homologues, formerly in the DUF509 family. A structure for these proteins has been solved by structural genomics. 21.40 21.40 23.00 22.30 18.30 21.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.79 0.71 -4.56 46 397 2009-01-15 18:05:59 2006-01-13 17:30:53 5 8 349 6 278 392 48 150.60 22 83.16 CHANGED acslpss..........t.lhp.hhttp...................thphshlsuph...................lhutp+lhtAlh+Alp................shpps.phpo+sltsEIlhpLSssppIs-ul+paGlpcs.sssllslhls.................sp.....ppthpc...lhp.hlcupth.......................shsp..t...tphsDhppl+KhYKlsstt.......................t.tpltphllsphAl+ .................................applpNs.....lhpphhttp...............................phshlsush.......................................lsshhplhsAs.+A.lp.................................sh.pps....chpo+slpoEllhsLSsspp....Is.-Ah+caGlscs..sss.llllhhs..................................pppp.hpp.......ltp.h.lcGptl.......................................................s.hpp..........phschspl+K.hYclss.t..........................t.tpl.phlhtthuh........................................... 0 83 159 233 +8451 PF08618 Opi1 Transcription factor Opi1 Mistry J, Wood V anon manual Family Opi1 is a leucine zipper containing yeast transcription factor that negatively regulates phospholipid biosynthesis [2]. It represses the expression of several UAS(INO) cis acting element containing genes and its activity is mediated by phosphorylations catalysed by protein kinase A, protein kinase C and casein kinase II [1]. 20.10 20.10 20.10 20.60 19.80 19.00 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.29 0.70 -5.56 4 279 2009-01-15 18:05:59 2006-01-16 10:36:52 5 4 125 0 221 291 0 159.50 14 68.11 CHANGED Ds-hh.tsStLstl+............................................to.thYppoKsaSPRhR.GAEhVEpsl.....PVssTlsssspcoGlEusscathpphspspsSSso..pt.pp+..................pppp....c+lpushsshp.p-tpts..o.-h......................susscss.s.sspThsshDDpposphoso...........sptsussssspppSpWQp...clhlosouLu.uMSpEShKSL+YCLphL+hAsu+LtpslstLpsslschs.....psshstshs.............tpssptp...hspplTtLKtDVluTI+pVlcVVupYAGuALPEsARNhVRpalLSLPtRWupsopoo........................................ssttP..sppstsuhsspAAp+lLsLApEuLDhhuplhsllspoL-+AEtWsE .....................................................................................................................................................................t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 46 115 186 +8452 PF08619 Nha1_C Alkali metal cation/H+ antiporter Nha1 C terminus Mistry J, Wood V anon manual Family The C terminus of the plasma membrane Nha1 antiporter plays an important role in the immediate cell response to hypo-osmotic shock which prevents an execessive loss of ions and water [1]. This domain is found with Pfam:PF00999. 23.30 23.30 23.40 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 434 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -12.81 0.70 -5.20 25 146 2009-01-15 18:05:59 2006-01-16 11:42:25 5 5 122 0 91 137 0 434.30 23 49.53 CHANGED WMsRLP+lspsu+.ShSl+RsDTpss..sstp..t..................hs.sshs.TsGlPht.hGGhhRRh+..cccpcsts.....................sp...sshptcRcpc+cc.psthhshG...shspssh...............hPp.cttps..pt...........p.ttpttt.t........................pppsp.sptsp..sppc-ptpp......sspAYtEGcplIIEDccG-llcshclsptpsttt.............................spscs-tspcshs.....s..shtpl++plupa.shttt.......................tthc+sps..................scs+pcpsaAYphsNpIIlEsEDGEVl+RYcIs.s+sp..................sppscp....usV....ls+shohlGlcs...............................................................sppp..tp.ps.tspsppplstcpssspclhsstcsp.........................................stsspsh.sspscpplpcphsp.httsss.............t..s...pspspcDo-s-s......................................p..--ppETssER+RRLuALGphs.ssc-c-DcE ...............................................................................................WMsRLP+lpptu+.ShShp+sDsps...tp.t.sp..................hs.sshs.s.Ghs....GshhRRp+..cccptppt......................spp...ssht.p+p..cpc...thhshG...shsposh.......................................................................hPp.+p.ps...........................t..................................................................pppsttppptp.p.pp.p-pttp..........shpsYpEGcplIlEscpG-llchhchpptp.tt................................tpstspt.ttpshs..........shtph++thssh.tht.tt.......................................hp+tpp.....................tcs+.pppshAYphuNs.....lIlEsEDGEVl++Yclssctt....................spp.ps........ssl..........ls+hhohhGhps...........................................................................................................hsppph.tphpph.sspsp.....pphspp.......sss.pthht.tppc..................................................................................s.t.ttphtp.....st..t.t...t.........................t......t..s.stp-s.psps....................................................tttpptET.sERcRRLsALG.hs.s.....tpppD....................................... 0 14 45 76 +8453 PF08620 RPAP1_C RPAP1-like, C-terminal Wood V, Costanzo M, Finn RD anon Manual Family Inhibition of RPAP1 synthesis in Saccharomyces cerevisiae results in changes in global gene expression that are similar to those caused by the loss of the RNAPII subunit Rpb11 [1].\ This entry represents the C-terminal region that contains the motif GLHHH. This region is conserved from yeast to humans. 21.10 21.10 21.10 29.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.39 0.72 -4.13 35 280 2009-01-15 18:05:59 2006-01-16 16:11:19 5 4 247 0 201 281 2 73.90 43 7.90 CHANGED spphRFDFcGsLl................ssppp.....tshssppGLHHHu-sPchAGYTlsELhpLuRSohsuQRslAlpsLG+IL.h+hspt .........h.phRFshpGpLl...................................s.p.s......hslPsphGLHHHG.--P-tAGYTltELhpLuRSslssQRslAlpsLupIlh+htt.t......... 0 64 106 161 +8454 PF08621 RPAP1_N RPAP1-like, N-terminal Wood V, Finn RD anon Manual Family Inhibition of RPAP1 synthesis in Saccharomyces cerevisiae results in changes in global gene expression that are similar to those caused by the loss of the RNAPII subunit Rpb11 [1].\ This entry represents the N-terminal region of RPAP-1 that is conserved from yeast to humans. 23.30 23.30 23.30 23.80 22.50 23.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.31 0.72 -4.37 39 266 2009-09-11 10:38:13 2006-01-16 16:14:21 5 4 229 0 189 264 2 48.20 35 5.46 CHANGED sppIcpENhppLp..............sMStpEItpEpcELhssLDPpLlphLh+Ruptcpss ........ppIccENhp+Lp..............sMS.cEIhpEppcLhspL...DPsLlphLhpRtphct..p................. 1 56 98 151 +8455 PF08622 Svf1 Svf1-like Wood V, Finn RD anon Manual Family Family of proteins that are involved in survival during oxidative stress[1]. 25.00 25.00 147.40 61.10 23.40 18.10 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.13 0.70 -5.45 28 181 2009-09-11 15:23:04 2006-01-17 13:27:54 5 2 133 0 129 169 0 327.80 39 86.05 CHANGED TsVETQTFYFss.su.phGFsQlIaSNlh.Gl.psTsQFsh+lFsscss............c.slWpSspLcN..Fc.-tssFhA....DslulcL..............sc-usp.................YsIKusl........scculV-LshpR..hsPGFplG...csGsoaY.............GsD.psPWGoMRHsFWPRssssGTIss...........ps..............................................................psl-lps......huhFltAl.QGMKPHHAAupW.NFlsFQo.........tpaSAlhMEFTTP.SYus............TpVslGuIsccscI...lhsus................s.sslpHhpscpDspssWshPpsIcapasGpsp-..scs.ph........................................lpGsL.............tphl-RlDVMAElPsFVKsIVuuVAGTKPYIYQYs....pchslclphst........p..pEcGhhasEsTFIS ......TsVETQTFYhhs.ps.hhuhsQlIaSNlh.Gl.asssQFshKlFsscss............p.plWpSspLpNh...F................p.....-thsFhu....DslulcL..............sp-sss.....................YpIKusl........scpshVslplsR..hsPGFthG...csGsoaa.............GsD.psPWGpMRHsFWPRspspGoIss.........cc..................................................................pslchps..........huhFlhAl.QGMKPHHAAu+W.NFhsFQo................s...aSAlhMEFT.T.P.SY.us............TpVslGulscc.scl...lhsus...................s.spspHhso.ppDspssWs.PpsIcasasGpspc...sp..ps............................................lpusL.................tphl-RlDVMuElPuFlKsIVuulAGTKPYIYQas....................pphs.h..clphss................-....pEcGhhasEuTFIS.............. 0 32 65 106 +8456 PF08623 TIP120 TATA-binding protein interacting (TIP20) Mistry J, Wood V anon manual Family TIP120 (also known as cullin-associated and neddylation-dissociated protein 1) is a TATA binding protein interacting protein that enhances transcription [1]. 25.80 25.80 26.20 25.80 25.00 25.70 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.02 0.71 -4.67 23 345 2009-01-15 18:05:59 2006-01-18 09:29:55 5 14 251 3 238 339 1 164.60 44 14.11 CHANGED sLIhshL............splLPhlhs-Ttl+sELlRcVpMGPFKHplDDGLElRKsAYEslYoLL-.....oshupls...lhch.hs+lhsGLpD.cpDI+hLsplhls+L........sslssctlhp+....LD..........plspsL+ssLs.tKlKssAVKQElE+ppEhhRosL+hshsltpph.......ssspsss.............tWspahcplpps ................................LlpshL.sp.lLPhLY.sETpl+pELIREV-MGPFKHpVDDGL-lRK..........uAFEChYoLL-.............osl..spl.c..........lh-F.ls+.l.tsGL....cD..caDI+....hLsaLMLs+L........usls.PstllpR.......LD.........................pLlEsL+sTho.tK.lKssuVKQEhEKp-EhpRSshRslsuL.hpl..................spsppss.................hhppa.pplpp.s................................................................................................................. 0 79 122 186 +8457 PF08624 CRC_subunit Chromatin remodelling complex Rsc7/Swp82 subunit Mistry J, Wood V anon Pfam-B_56720 (release 18.0) Family This family has been identified as a subunit of chromatin remodelling complexes. Saccharomyces cerevisiae Swiss:P32832 and its paralogue Swiss:P43554 have been identified as subunits of the RSC chromatin remodelling complex, and SWI/SNF chromatin remodelling complex respectively [1]. 25.00 25.00 35.90 74.50 20.80 19.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.46 0.71 -4.26 22 230 2009-01-15 18:05:59 2006-01-18 11:32:37 5 3 133 0 176 233 0 138.20 50 24.36 CHANGED hsltsDEhhls.pDPcGEp..........Klcc.GpLtGGRcY+h+TFTlh....s+G..cplYMLuTEsARsl....GaRDSYLhFppH.sLaKhlsspsEKpcLI-pslIP.....SY+uRslslVTARSlF+EFGA+lIhsG.......++ll........DDYacpcAtp....pG.s ...................h..h.sDEhhh..tDspGEp..........KVDp.GpLhGGRc.Y+sRTFsls....sRG...c+LaMLuTEsARsl....GaR.DS.YLhFpKp+pLaKIIsspsEKcDLIcp..-llP....aSY+u.R.p.IulVTARShFRpFGA+lIlsG................++Vh........DDYapscARcpG..h.................. 0 41 91 147 +8458 PF08625 Utp13 Utp13 specific WD40 associated domain Wood V, Finn RD anon Pfam-B_8625 (release 19.0) Domain Utp13 is a component of the five protein Pwp2 complex that forms part of a stable particle subunit independent of the U3 small nucleolar ribonucleoprotein that is essential for the initial assembly steps of the 90S pre-ribosome [1]. Pwp2 is capable of interacting directly with the 35 S pre-rRNA 5' end [1]. 21.00 21.00 21.30 21.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.65 0.71 -4.54 35 326 2009-01-15 18:05:59 2006-01-18 11:48:04 6 20 280 0 230 328 4 140.80 32 16.82 CHANGED EQpLpNhlpptcYpcAlhLALsLc+Ph+Lhsll....cssl..............p-ppuh....ppl-......psltpLss-Qlh.....pLLchhR-WNTNu+ssplAQtlLpslLpphsssc.Lhpls....................u.......................ltcllEuLlPY........................oERHapRl-cLlcpoYhlDYslppMs .............................................................-QpLpNhlp.p.t.capcAltLAlpLs+PtplLslh.pshh.....................................pctpsh.....ptl-...........psltpLsp-plh.......tLLphh+cWNTNuRpsplAQtlLtsllpphsssc.lhpht...........................................u.................................hpphl-uLlsY.......................................T-RHapRlsc.Llppoahl-asltpMp........................... 0 81 128 190 +8459 PF08626 TRAPPC9-Trs120 Trs120; Transport protein Trs120 or TRAPPC9, TRAPP II complex subunit Mistry J, Wood V anon Pfam-B_15686 (release 19.0) Family This region is found at the N terminal of Saccharomyces cerevisiae Trs120 protein (Swiss:Q04183). Trs120 is a subunit of the multiprotein complex TRAPP (transport particle protein) which functions in ER to Golgi traffic [1]. Trs120 is specific to the larger TRAPP complex, TRAPP II, along with Trs65p and Trs130p(TRAPPC10). It is suggested that Trs120p is required for the stability of the Trs130p subunit, suggesting that these two proteins might interact in some way [2]. It is likely that there is a complex function for TRAPP II in multiple pathways [3]. 19.30 19.30 19.40 20.00 19.10 19.20 hmmbuild -o /dev/null HMM SEED 1185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.79 0.70 -14.13 0.70 -7.23 9 449 2012-10-04 00:47:01 2006-01-18 12:57:28 6 15 255 0 329 471 2 725.10 15 78.18 CHANGED Mp...sthS..h.ssuplRslVlPI.GchspppFpcahphltpp.sclpLtDl...hspppps.FssQsaspGplhhcFhhus...s.p..hcDFpsaRKshslIGlssh.ps.shs.....h..hpcpYPssls.pphhhFs..Ppsp........tp.cph.shF..stsp.p..pthcolhpDlstshLhshpsa.....plol+SsssI............................................................sh.ssshsopssluS...pp..+spp+p.GR..KhhGsahLLAGph.DAhppahpAlthh+tssDhLWhuuALEGhssshVlhpaht.sh....................................spht..sshp...............................................sL.p.lphhhscll.hYppuh.p.pt...sPtlh.hEuhL+hh+hhhth..................................sph-lhphl.pshthplsphshp-phplYuthAphauslGhpRKpAFhhR.lhhth.s..........................shtt........hhttlhthhsls.pscu.phtsphtp.t...............WssLQhplLp.hlpsu.phsD...hhphsshLLppahphlssspQpplhptlppsh......hpshplshsah.P.h.hth.................l.h....t....pslsp.........spphs.tPF.asPapp......htssss.p..hhhlhs-.sphplpltNPhsF-lplpsltLssctsph.........................................................pshshuhhl..Psu.chhhLshhshpsGplpl.Ghplphhss.sthhh..s.p.......................cssp.................slpllPs.PtLplhs...............t.hossshhLh-GEppphhIplpNtu.shslsplphsh..ps.c.h.ph.h....sthh.stch..hp.....h....hp............slpPsthhphplp...................................................sstts...hpt.thllp.u.pt..-top.ah+pLslPlplslh.ulclsphchlsh..................s.p..........s..p.....ss.chpLLlL-hhNuhtc..tlp............h.h.....sshtspphhlcsststRhllPlc+h...thshs.hshPpl.ppp.hlps..thstt-chph+ch.hhhppllp+lpspW+.spsp.....pGplsl+sh..p.LssphhphL..s.lphthpl.tssppt...p.tp.................hth.ss.hhshchhlh.psppsls............hhsh..hstpsshh..p.ppplLhsGsLpph.....lpstspsshphshhh ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.....R..h.hu.h.h..u....u...h.........u.....................................D..W.u.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..................................................................................................................................p.th.............................................................................................................................................................h..apsh............................................hh.t-.hth.h.l.Nsh.h.pl.lpphtl...p.h.................................................................................ps.............h...l..........t...............h.......l...hh.s............t...............G.l...l.....G..h...ht...........................................................................................................................................................h.lh...P.h.h....................................hhtGp...h.h.h.N.u....h..h.h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 116 199 276 +8460 PF08627 CRT-like CRT-like Mistry J, Pilcher K anon Pfam-B_67420 (release 18.0) Family This region is found in proteins related to Plasmodium falciparum chloroquine resistance transporter (CRT). 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.76 0.71 -4.21 3 27 2012-10-02 19:55:49 2006-01-19 09:12:24 5 3 19 0 16 29 3 103.40 26 25.10 CHANGED -EKpPLLSsIN-sDD-.NapDlNlKoPlshhSNIKKcSl.p+FKsaLKNSMSKETlTILIYVVLYIlSGVINSVLLKKVMNKFTNYGFFLSQLTNYGYVPIFGAlshYKIaFTsDIPKETRsFPQaKFVI .................................................................pthhth.......................................................tcsh.lhlhhlshlhsuVhNplLhKhhhhshpNYs.aFLsQl.TshsYlsl.aaulhhahhhhtstlsp-.hthPph+Fh......................... 0 10 13 16 +8461 PF08628 Nexin_C Sorting nexin C terminal Mistry J, Wood V anon Pfam-B_5897 (release 19.0) Family This region is found a the C terminal of proteins belonging to the sorting nexin family. It is found on proteins which also contain Pfam:PF00787. 22.30 22.30 22.50 22.70 20.80 22.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.30 0.71 -3.99 24 634 2009-01-15 18:05:59 2006-01-19 10:04:24 7 21 243 0 395 596 0 110.60 26 11.49 CHANGED WLR.Rsllshlp....phhG...sTIp+hlp-plp.plhs-cplsphlshl+-sla......PsG.........phtp.s.....R......oppp+tcscppAp..phL.hphlP-hlspllGppssppuhpplFssLQpthl ...........................................WLp.+sl.lshlp....ts.hG..........ss.lp+.h....lpcp....lp..lhs-pplsphlphl...+.-uhW...............PsG...............................htt.s........sR...........................................otpp+t.cscp.pAp....ptL.h.............shlP..-hlspll...G...ppsspcuhtclFpsLQps........................................................... 0 112 184 296 +8462 PF08629 PDE8 PDE8 phosphodiesterase Mistry J, Vasta V anon Pfam-B_72889 (release 17.0) Family This region is found in members of the PDE8 phosphodiesterase family [1]. It is found with Pfam:PF00233. 25.00 25.00 28.30 30.80 20.60 19.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.33 0.72 -4.17 2 24 2009-01-15 18:05:59 2006-01-19 11:07:15 5 5 13 0 8 46 0 49.50 77 6.74 CHANGED MGCAPSIHVSQSGVIYCRDSDESsSP+QTTolSQGsAAsL.GLFlpTDAA-s .MGCAPSIHVSQSGVIY..CRDSDESNSP+QT.TS.VSQG..PAAPL.GLFVQTDAADA.......... 0 3 4 7 +8463 PF08630 Dfp1_Him1_M Dfp1/Him1, central region Wood V, Finn RD anon Pfam-B_28140 (release 19.0) Family This is the middle regions described by Ogino et al [1]. This region, together with the C-terminal zinc finger (Pfam:PF07535) is essential for the mitotic and kinase activation functions of Dfp1/Him1 [1]. 25.00 25.00 25.20 26.80 22.90 23.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.62 0.71 -4.34 21 135 2009-01-15 18:05:59 2006-01-19 16:09:07 5 5 127 16 103 138 0 126.30 36 19.62 CHANGED sDl......Lt+A.+chtMKlWshEKL.pRhLpslsssppshpptp...........................tpspspssLopLLcsEKlaGsoDRDPpstRcDlpYF+..tPalYlaDlspph+PIhl+Ea...chhp.pct......aPph+sss.G+CPFls- ...........pDlL.p+A....+phshKIWuhEKl.pRhLpsltsspsstttt..................................................spspscssLspLLcsE+lhG.P.oDRDPpstpc-...lhaFK..uPalYlaDhppch+PlhlREY..........cls.pp...p-us..........WPph+sus.G+CPFlp-.................... 0 26 57 89 +8464 PF08631 SPO22 Meiosis protein SPO22/ZIP4 like Mistry J, Wood V anon manual Family SPO22/ZIP4 in yeast is a meiosis specific protein involved in sporulation [1]. It has been shown to regulate crossover distribution by promoting synaptonemal complex formation [3]. 26.00 26.00 26.70 26.00 25.80 25.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.59 0.70 -5.32 19 164 2012-10-11 20:01:01 2006-01-20 08:51:43 5 8 135 0 108 177 0 243.10 19 29.98 CHANGED phAhppsDhshApthhs+ucphh...hhssphstpLuchhYshGhphhppp.......shspuhphlpcuhchhph....schp..........pppsphppl+hpsLphLspshlpspstcsh.c.shchlphh...pp-hsp+.shhhLthchhhp.....httsthpc.Lpphltshshscsshphhlptht.hhspssttsstsl.hhlhs+hpssss...hhchtlhsphhhhstppp.ssp.hhcsltthls..hl.t.hspplosc.shsshhslLWspspphhptppYs.uhpahphuh.pthhh ........................uhtpschphAphhht+scthh....hhpsphh...t.pLsphhYshGhphhppp............phppuhhaLpp.uh-lh..............schp.............ptssphtph..phplLphLspsh...lphp..s...p.thtc..shphlphh...........pp-hs.p.psslhhLthclhhp..............tpthpc.Lhphlhp..h..s-.....sshphhlphhp.hhp.ps....t...t....sshth.hhlh.p+h.......pss....ts.....hhphh..llph.hhh.hppttp..s.p..lctl.................hh.....p.htp.lst......p.thpsh..hslLWpp..utp.aphppas.uhpahphu............................................................ 0 24 53 85 +8465 PF08632 Zds_C Zds1_C; Activator of mitotic machinery Cdc14 phosphatase activation C-term Mistry J, Wood V anon Pfam-B_44907 (release 19.0) Domain This region of the Zds1 protein is critical for sporulation and has also been shown to suppress the calcium sensitivity of Zds1 deletions [1]. The C-terminal motif is common to both Zds1 and Zds2 proteins, both of which are putative interactors of Cdc55 and are required for the completion of mitotic exit and cytokinesis. They both contribute to timely Cdc14 activation during mitotic exit and are required downstream of separase to facilitate nucleolar Cdc14 release[2]. 25.00 25.00 34.70 33.70 24.80 16.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.61 0.72 -4.57 11 139 2009-09-11 06:09:32 2006-01-20 16:03:23 5 2 123 0 104 153 0 52.40 63 5.56 CHANGED ohSTVlMhDaRhPIpVERAIYRLSHLKLSNP+RsLRpQVLLSNFMYSYLNLVs ..........psslsh..sRFPIh.ERAIYRhuHlKLANP+RsLhpQVLLSNFMYuYLshVp..... 0 27 56 89 +8466 PF08633 Rox3 Rox3 mediator complex subunit Mistry J, Wood V anon manual Family The mediator complex is part of the RNA polymerase II holoenzyme. Rox3 is a subunit of the mediator complex. 25.00 25.00 50.10 49.00 23.30 22.80 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.40 0.71 -3.98 13 111 2009-01-15 18:05:59 2006-01-25 16:29:35 5 2 107 0 82 105 0 192.60 34 59.81 CHANGED YpsppPsPhpDLlslYGLsslAcpVARsss.DGpKhs+LRKSYKspIpcL..uG+Fssl.sc..Nt.GGlhshlh.............hps.utDlhpt.......hss-.hpcthpshshuhhpsPp.DW..spsVLuphcpShssthtNt......sshssschuhshsGotus.ss.stp.....t.spsKRph+Kpshu-toh.shuEth.DD...................hKRR++ ......ap.stPsPppDLlslYGLsslAppVARssP.sGpKh..N+LRKSYKG+IpcL..uG+acs..ltpc......sssGGhhshhh...............................tps.sp-lhhs.........hssphhppth+shp.ht.G.hP.p..-a...psVLup.c.ohssthttt...........st.ssssh....s.h.shs...sst.t.s.ts..s.p..........t.sRscRth+KRshs-tuh.uauEua.DD....................hK+R+h................................................... 0 11 37 66 +8467 PF08634 Pet127 Mitochondrial protein Pet127 Mistry J, Wood V anon manual Domain Pet127 has been implicated in mitochondrial RNA stability and/or processing and is localised to the mitochondrial membrane [1]. The Pet127 family is part of the PD-(D/E)XK nuclease superfamily [2] including a full set of active site residues. 25.00 25.00 54.20 46.70 18.70 21.50 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.87 0.70 -5.24 11 147 2012-10-11 20:44:44 2006-01-25 16:32:47 5 4 140 0 111 154 0 272.20 48 34.04 CHANGED aloPScDppLlclAcctpKKYsuSTSSMTSlLSQlHaLLSsaRslsh..utlSpsF..s..sppssFopss+hPoSllLp.......hpsslauIDuDK....shDp-..ILSsLGHuLEphLTsccc-Ftphh.............ppphss.t.pphs-uY+YuphGcFlhRSQLDsaDs+LP.GoGsFDLKTRAVsuIRhDhsphpps...suYcIp+hhGphESaEREYaDLIRo.AhLKYSLQARIGcMDGIFVAYHNhS+IFGFQYlPLpEhDptlHuos-pt.....................................lAssEFphShplLpKlLc .......................YlTuS+DpsLhplApcppKKYhGSTSSMTulLS+hHaLLSsaRslsh..stl..Sp.sF....s.tpspsF.Tpht+hPuulhL+...........hccGlYu.IDuDK....phDs.t......s...l.Lo.LG+ohEKhLThsp--Fc+Yphp...............................ppp.phsttt...pst..sEsYH.....Yoph.Gc.FlMRSQLDAY..DPRLP.GoG...hFDLKTRAVsuIRhDhpphcps...hGYpIcpth..GpaESFEREYaDhIRo.AhLKYSLQsRlG+MDGIFVAaHNhpRIFGFQYlsLsEhDhtlHGppctt...................................................lGDpEF+hSlpLhsclLp................................... 0 36 67 98 +8468 PF08635 ox_reductase_C Putative oxidoreductase C terminal Mistry J, Wood V anon manual Family This is the C terminal of a family of putative oxidoreductases. 20.70 20.70 20.70 20.90 20.60 20.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.74 6 39 2012-10-02 13:21:44 2006-01-26 11:15:45 5 2 28 0 27 83 46 132.10 46 33.14 CHANGED RYLpsVQKMKpIlcENNLpVMuTsARYssAYEHssKhsWWsKSlssGPlVEQuTHFCDLSRYFGGDVDlsTlpu+ulEWaE.sG+LoKlPlDESs.IP--pRIPRhTuAoWKYcSGAVGshpHuluLQGssYuTELEVhADGY ........................................RYLpsVQphKpllc-ssl.p.lhssh.A.RYs................sA..YtthsKssWW.s.K.u.h.s..s..G.PlVEQuTHFsDLuRYFGG.-.V-.hsoVhu+ulch.-ps...G.pLsKls..lDEot..Is.-pRlPRhTsAsWKacoGAVGshhHshsLQGpsYsspl-VhADGa............................................................. 0 19 24 27 +8469 PF08636 Pkr1 ER protein Pkr1 Mistry J, Wood V anon manual Family Pkr1 has been identified as an ER protein of unknown function. 25.00 25.00 25.40 25.00 24.60 24.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.58 0.72 -3.92 20 147 2009-01-15 18:05:59 2006-01-26 12:00:54 5 7 140 0 110 140 0 74.60 44 37.83 CHANGED Mus.FlpsLWpSIFTPGsTPsLllATpsoFAAL.llLlsLlhsTt.SIHFlhL.lLuusLWholsWFltELppschp ..............Mus.FhtcLWpSIFTPGs..T..P.....s.....LllATssoFuAL.lsLhsLlhsTh..SIHFllL.hlssuLWholsWFhpELptsp.......... 0 32 62 95 +8470 PF08637 NCA2 ATP synthase regulation protein NCA2 Mistry J, Wood V anon Pfam-B_15813 (release 19.0) Family NCA2 has been shown to be required for the regulation of ATP synthase subunits Atp6p and Atp8p in Saccharomyces cerevisiae [1]. 20.60 20.60 20.90 23.80 19.80 19.80 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.85 0.70 -5.52 13 207 2009-09-14 08:31:22 2006-01-26 13:06:51 5 3 172 0 151 206 5 263.60 28 41.54 CHANGED slL.hhhht..olhtlhps+ppIhpal+.ssh-slpuFhpNWVhpPlpplhcTlRasp.sSplulhSpcoLpS-h-SLpRMll-FstDppssss.......lshpplpsc....lcpGDLT.......lhchYEppl+pPlKslloGsLlRoLLIQlQKsKVDsplAlsGID+lLKSQQLlFulVuluPullIlYshhphLpphltstsphupst+hp..phpl....oLsslE.RlL.......shstp...................tt..ss-hsphph......GLLll-lpsL+phuspll..........PtshpcEW...lcDLc-Lsssshss.ss+LsllpRIa+sY .....................................................hh...........t..h....hhpppt.pl..palp.phhpshhsFhhsWlhpPlpplhssl+cs...psphulhuppoLpu-hp......SLcRMll-Fst-p.t...................ph.ptlppp...................l.cpGDlo.................lhctYEp-l+p.P.lpshl.pGcLlRuLLIQ.................lQKsKVDlphAhsGIDplL+SQcLs....F..uhluhsPulll....hthh..ph....lpt..hhttt.pth.ptttc...h...t....ph................Lpsl-.R.lL.......p.t.........................................................................ttts..s.hph.................GLLlhplph...Lhphspthh.............................s..t.p.h..tp-a...hcDlp-L.....ps...t.h.sh..ttphpslpRlhhsY..................................................................... 0 52 93 130 +8471 PF08638 Med14 MED14; Mediator complex subunit MED14 Mistry J, Wood V anon Pfam-B_13303 (release 19.0) Family Saccharomyces cerevisiae RGR1 mediator complex subunit affects chromatin structure, transcriptional regulation of diverse genes and sporulation, required for glucose repression, HO repression, RME1 repression and sporulation [2][3]. This subunit is also found in higher eukaryotes and Med14 is the agreed unified nomenclature for this subunit. Med14 is found in the tail region of Mediator [5]. 19.90 19.90 20.30 19.90 19.10 18.30 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.25 0.71 -5.06 29 300 2009-01-15 18:05:59 2006-01-27 09:41:35 6 6 249 0 227 324 1 186.60 31 16.21 CHANGED shhPLutLlpphsQpsap-LsslhcpLsp.......................................t.pspsshpKKhclLpahhthRspFlKLhVLs+Wu+pup..cVs+hIDlhsal+tpphhasssh.tLssh.+psLstA+lPNsDLhTALEVLspG...phsshsshs..alssss............loscphLcpLpcLNpllphRLs.l..hpplPtph.psYcI+cGRVTFpVssEFElsLolss-s .............................hsLuhLlphhhppsap-Lp.LhchLsp...........................................psshc+KhpllpastppRp.Fl+LlsLsKWuppus.......pV.p.+hh.p.l..sal.c.ppph.haspshppLtph..+cs.....L.spA+lPshclssAl-VLooG..........+lPsh.c..p..t.....hls.sss......................lo...pct..h.....psLp.cLNpllphRLs.........ps..plP..ph..pshp.....l...............tsGRVpFpVtsEFElsLTlhsc............................................................... 0 76 124 189 +8472 PF08639 SLD3 DNA replication regulator SLD3 Mistry J, Wood V anon manual Family The SLD3 DNA replication regulator is required for loading and maintenance of Cdc45 on chromatin during DNA replication [2]. 20.00 20.00 20.10 20.30 19.10 19.40 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.74 0.70 -5.86 24 128 2009-01-15 18:05:59 2006-01-27 10:36:39 5 2 119 0 98 134 0 463.60 21 59.51 CHANGED -.h-hlcppYhpuLYho+TulAYFsKusLuRsRshhpsspsss..................p.lhshl+phlLsscphDp+Ycts....hpshhlhtsss-s...tt.pt+pp+s.pc..pchucsthhstt.thhtp.ahtp..s.pt...tp.st........pchcchlusL+lREsQLQllLlLEhl........uL......hshssEtppstppsstsh.pp...pp+sppccKtpclsh.......................................................hL-lalDRLCIWcslpsscshhsppsps.stt....................spchppssl+cFCspVllPaausRLPchschIs+KLs...sshthsPhpppps..........ptshscsusssc.........cp.....sspppppcohp+Vho-ppt.....ttttttssL.RStTsss....hccEop.shh.sh.spsp........usl.ps+phppRplslsshutpcpsch+c.thh...cpcLcsuhsulcKss+tlsucsl.spsucK+st..................hhpslQlhATPt.ssRpcs.h........................................................s..p.t....s..................................ptspssVptTPp+ ...............................................................................................................p.hp.lpppYhpsLY.........psslAYFsKusLsRh+shhp.s.pt...................t.hhphhpphllshtphDt+..acpp....htthhhhhtpspt......t..php+t.pp.......phscss..............t.t....t......................pphpphlstL+hREsQLQllLlLEll...........tL..........hth.ttc.....p.....htttp.t..t........tpp..pp.cpcpppshsh.............................................................hL-lhlDRLsIWcslttschh..tt.t....tt............................ppspp-tlpsFss-VllPaast+LPchschlpcKLt...ssht.ss.ptptp...........pptspspsssshp.......................p..................s.tpsppoh.p+shsscp................t..tth.sL.+otosss...........tlcpEsp..p..h.......s...spsc..............................usl.p.+h.hp.pRphsl...sshs.....p..tpp.c..........ppht.h............ctpLp.sAhsshp+ssRtl.sscsh..scssc++tt.............................................................s.tpslQVhATPt.tpRh.psh..........................................................................s....................................................................................................lttTP.................................................................................................................................................................................................................................... 2 24 51 84 +8473 PF08640 U3_assoc_6 U3 small nucleolar RNA-associated protein 6 Mistry J, Wood V anon Pfam-B_8720 (release 19.0) Family This is a family of U3 nucleolar RNA-associated proteins which are involved in nucleolar processing of pre-18S ribosomal RNA [1]. 20.90 20.90 20.90 21.30 20.70 20.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.84 0.72 -4.22 34 307 2009-09-11 15:26:11 2006-01-30 13:06:58 6 3 277 0 221 297 2 82.60 32 15.23 CHANGED LEptlPELc-LccptlFo+pElppIl++RocaEa+LppRssphp..DFlpYlpYEhsL-cLhpKRtp+lt.......pppptt-auh.p..RI ...........LEphlPELcclccptlFo+..sEIppIl++RscaEa+lpp+ssphp...DalpYlpYEhsLcpLhp+Rpp+lth.......ht.p..p.sh...................................... 1 79 124 183 +8474 PF08641 Mis14 Kinetochore protein Mis14 like Mistry J, Wood V anon manual Family Mis14 is a kinetochore protein which is known to be recruited to kinetochores independently of CENP-A [1]. 21.20 21.20 21.70 22.70 21.10 21.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.82 0.71 -4.11 14 172 2009-01-15 18:05:59 2006-01-30 13:18:49 7 2 152 0 115 161 0 138.70 20 55.64 CHANGED phuppulplssh-hpu.....sl...hslss.s..pspchEPaDh-LstplpplapphEctsVcVAphR+ssPpphtctYs.......cpppphLpp......h-pcl.............s...t.p.s.t.t-sDtpsp.st.tpp..pthhp......htpuhsplhphp.sl ..............................................tt.slp.sths..hps.............sl..................hsh.sts.......pppchEP..aDsclptpltpLhsph--lhlclAphR+phPt+lscsht.......cstpchLpp........................hcttl................s.t.t.p.p....t......t.s...tt...........t................................................................................................ 0 18 45 84 +8475 PF08642 Rxt3 Histone deacetylation protein Rxt3 Mistry J, Wood V anon manual Family Rxt3 has been shown in yeast to be required for histone deacetylation [1]. 29.90 29.90 30.30 30.10 29.50 29.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.73 0.71 -3.84 13 171 2012-10-01 19:31:27 2006-01-30 14:35:38 5 3 148 0 130 169 2 109.70 34 14.56 CHANGED lsl+Istcal................ptth....cs.hstpRplWG..TDIYTDDSDllhlLhHsGhhpsshs.p.......................+pos.sh.spspsh...us.......s.Ph.tDLcVslLlLPpLp+YtSshR.GIpSRsWs ............................hpl+lst.cl................ssth...........ppchstpptLWG..T-lYTDDSDl.lAshhHsGahpsp..hs.s...............................................................t...p......t.............tt...............s.......DLplplLlLPpL.cY.ushh.ulpSRtW....................... 0 32 76 111 +8476 PF08643 DUF1776 Fungal family of unknown function (DUF1776) Mistry J, Groocock L anon manual Family This is a fungal family of unknown function. One of the proteins in this family Swiss:P32792 has been localised to the mitochondria [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.85 0.70 -5.28 15 135 2012-10-10 17:06:42 2006-01-30 15:01:01 5 4 126 0 99 2683 945 275.30 30 64.76 CHANGED ARp-VVllsGSss-PlTRslAhDL-RRGFIValsspstc-tphlcscs.p.-.IcsLslsp.......ts.shpsslscFtphlppP.hshsssp.HhLpLpullllPSLsa.ssGPltsIssuoass.lss+lLs.l.lsspullPLlpt...................ppsplIllsPoIhuuLshPaHuPEslhsuslpshhpsLscElp....hsIsVspl+LGslslu...............sssSssphuslssS-....hhsWstph+sLYussasth.tpt.shtth.........RGoolRcLaphlaDhl.sp...............sssllYsGpGuplYsa.....lucalPculls .............................................................................................................uRp-lV.llh.G.u.sp-PlsRsluhDLE.RRG.FIV.al....s...sp....ss.-..-.........p....hl...cs.p....s.....p.D....Ics.Lhl-.........ps.phtsslpph.tphlpp........s..................sh.s.s..h..........s...........p......h.....p...Lp.u...l....l...h.lP..s.Lp...Y...so.GP.l....tsI..s.s.s.s..aschlN...o+lLhsl.hshpshLPLLpt......................................................pp.s..pl...l..lh.s.P...S.I......o.uL.s.h......P.a.p.u..s.E.s...h.s....s...s...u..l..s....uhhpsLppELp............slsVsplcL.G....sl..c..lu........................tt...t..p.....t.h..t.p.hssoc................W...........tt...+s.lYu...ssaht..hp.t..t...sh.............pGo.s.....h..R..p...L.a..hlhDhlts.........................hsss.h.h.s..G..p..G.u.hhYsh.....luphhPtshl.t........................................................................................... 0 22 52 81 +8477 PF08644 SPT16 FACT complex subunit (SPT16/CDC68) Mistry J, Wood V anon Pfam-B_4478 (release 19.0) Family Proteins in this family are subunits the FACT complex. The FACT complex plays a role in transcription initiation and promotes binding of TATA-binding protein (TBP) to a TATA box in chromatin [2]. 25.00 25.00 31.10 29.40 23.80 24.00 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.00 0.71 -4.37 32 353 2009-01-15 18:05:59 2006-01-30 17:00:05 6 13 294 0 255 354 4 151.60 43 15.34 CHANGED IaVDpcppolllPIhGthVPFHlsTlKNsSpspEush.sa.lRINFpsP..Gsts..sppct....hps.ssshFlKEloaRSpcsc+...........hsplhctIppLpKphppR...EsEc+chtsllp..Q-+Lhl.ps.....pcsh..pLpslalRPsh.su....++hsGsLEhHpNGhRY .....IaVDpKtpoVllPIhGhsVPFHIsTIKNsSp.osEG-a..s..............Y.LRINFhsP...Gssh...u+p-sts......aps..ssApF...l+plTaRSpcscc...........hspshptIp-lpKchtpR...............EtEc..+EtpslVc..Q-cLlhsps.....+pss.......+L.pDl.alRPsh..ss......KRhsGsLEhHpNGhRY.............. 0 90 143 212 +8478 PF08645 PNK3P Polynucleotide kinase 3 phosphatase Mistry J, Wood V anon Pfam-B_6220 (release 19.0) Family Polynucleotide kinase 3 phosphatases play a role in the repair of single breaks in DNA induced by DNA-damaging agents such as gamma radiation and camptothecin [3]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.07 0.71 -4.79 32 1136 2012-10-03 04:19:28 2006-01-31 09:22:57 6 29 976 20 329 2109 716 164.20 32 45.94 CHANGED Kl..AuFDLDuTLIpo+S.Gtsasp.s..........ssDWpa...h..p......l.p+LppL.hp-s...........YplVIFoNQuGlst..............spt....shpsappKlpslhcpl....slP.....ltlasAsp.c..........................................D.....haRKPpsGMWphhtcchsp.....h.lshp..pS...aaVGDAAGR..........................................................ppDaSssDttFAhNlGlpFhTPEEaF ...............................................................hhhhDhDGTLIppss...pp..a..s...s...........t.Dh..h..h...hcss................VhspLh....cL..pctG............Y+lVhlTNQcGlGo................thspt...ch-s..+...s...h....h..h..p..Ih..p.u...........Glp.................l..p..l..a..h...s.s..c...c........................................................................................s..........pCRKPK.suhlc.c.alpc.t....................h.Dht....pS............ahIGD.....thsc........................................................................................................ht..hh............................................................................................................... 0 129 192 276 +8479 PF08646 Rep_fac-A_C Replication factor-A C terminal domain Mistry J, Wood v anon Pfam-B_3457 (release 19.0) Family This domain is found at the C terminal of replication factor A. Replication factor A (RPA) binds single-stranded DNA and is involved in replication, repair, and recombination of DNA [1]. 24.20 24.20 24.30 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.04 0.71 -4.49 33 798 2009-01-15 18:05:59 2006-01-31 09:34:27 5 48 363 3 503 733 13 135.40 23 24.30 CHANGED aaol+Aslsal.....Kp-.shhYsACsp............sCNKKVs-p.ss...........Gp...........WRCEKCspsaspspYRYllshplsDpTGphWlTsFsEsAcplh.GhoAsELtcl+c....pssppasplhpphphppahF+l+sKp-oYs......-EpRl+hTVhslp...slcapt-ucpL ................................................hphhuslh.hl.....c..pc...s.......h..hY..ACs....................pC.s.K..K..Vhpp..ts........uh.....................apC...c.+..C..s.....p..ph...s..p...s.p..aRYh..lsh..plsDto.u....p.hhlosF..s..-s..ucplh..Ghs.A.sp.L..h..p.hpp.............ps..pt..h.tphh.p.ph....hp.pa.a..+...hps.p....ct.at......tp.+...p...h.h.t................................................................... 0 138 269 387 +8480 PF08647 BRE1 BRE1 E3 ubiquitin ligase Mistry J, Wood V anon Pfam-B_35727 (release 19.0) Family BRE1 is an E3 ubiquitin ligase that has been shown to act as a transcriptional activator through direct activator interactions [1]. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.04 0.72 -4.01 20 193 2009-01-15 18:05:59 2006-01-31 13:57:30 6 7 177 0 138 205 0 97.20 30 12.53 CHANGED LppELsslcpAacchpphspcKht-hpshEpphs+LssEKsKADQKYFAAM+s+DulpsEhKpLppphsKss-llppLp-....hEpphppplpshcKpl ....LppELsphppuacchpphsppKht-hsshEp+ht+LpsE.................KsKADQKYFuuh+st-shpsEl+pLptphsKss-llppLc-....sEsphpptlpshEKph....................................................... 0 39 71 111 +8481 PF08648 DUF1777 Protein of unknown function (DUF1777) Mistry J, Wood V anon manual Family This is a family of eukaryotic proteins of unknown function. Some of the proteins in this family are putative nucleic acid binding proteins. 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.10 0.71 -12.04 0.71 -4.54 7 315 2009-09-11 17:09:29 2006-01-31 14:03:37 7 31 236 0 237 303 3 165.60 29 60.66 CHANGED MuRSRS............Ro..c+cRRcsc.spsR-.................ccRcR-RsRSR-.RDR....+RsRsR..s.+.R.pRsRSPcR................cRSpSpS..Rc+-...Rccpcc+csc.cP......+t+.hQ..........................IscpcL-.Gcsc-ph..................-MMK.hMGF.ssFDTTKGKKVsGs.DsusVplppKR+YRQYMNR+GGFNRP..LDFhs ..............................................................................................................................Rp...ppp+.p...ptc..spp.Rc.....................p.cRc+.p.Rs...R.sR.p.....R-c............cR.s..Rp...+......s..p...+....p..Rs.+Ssp+................................................................pcsp....s...s.....t..cp.pp......Rp..p.....ppppp.pp...p........................p..p...........................................................................................................pp..t.c..hp...st........st.--h.....................................c..Mh..+.hMGF..suFsoTKsK+V...............Gs.....s..s.........u....l..p..h...pK..ppYRQYMNRpGGFNRPL....s.................................................................................................................... 0 86 137 191 +8482 PF08649 DASH_Dad1 DASH complex subunit Dad1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. Throughout the cell cycle Dad1 remains bound to kinetochores throughout the cell cycle and its association is dependent on the Mis6 and Mal2 [5]. 27.50 27.50 27.70 27.70 27.00 27.40 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.67 0.72 -4.27 11 108 2009-09-11 11:16:04 2006-01-31 17:20:58 5 3 106 0 80 107 0 57.10 48 38.23 CHANGED pYFpcQR-lLlQEIosoh-sllsNLNsLN+SLEpSluVG+EF-sVucLWppFYsulsp ............paFEpQR-hLlpEIu.....tohEpVLsNlNpLNRSLEulI.........uVGpEFsSVpuLWSpFpssMt.t................. 0 21 45 73 +8483 PF08650 DASH_Dad4 DASH complex subunit Dad4 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 20.70 20.70 20.90 20.70 20.50 20.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.30 0.72 -4.07 8 105 2009-01-15 18:05:59 2006-01-31 17:22:25 5 6 102 0 82 101 1 67.70 50 63.19 CHANGED M.ENPaEcVQsslLuRIIuNVE+LNpSVspLNQpLcclNp+N+NLElMuQhCENYpcuVpFNLEATGs+KsPL ........MEsPHEppQshLLuRIIsNVE+LNEulshlN+sLp-INhpNhNl.ElluQ.....MacNYpsNV.FpLE..ATpsh+.P.t.............. 2 23 46 70 +8484 PF08651 DASH_Duo1 DASH complex subunit Duo1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 25.00 25.00 25.90 27.00 23.10 24.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.47 0.72 -4.40 16 126 2009-01-15 18:05:59 2006-01-31 17:37:22 5 3 123 0 95 124 0 76.90 39 32.01 CHANGED tuLp+EL-pLc+INtlIEslhtsLcsup.sphpplpcoscusspLLspWhpILSQTpaspcLl.sssWpGtsp.DstshE ......uLpcELcslRpINpsIEullsoLcpup.sNhpsVscolpsussLLsoWo+ILSQTEasp+LlhsPsWpGssp.Dhhc.E............. 0 23 48 79 +8485 PF08652 RAI1 RAI1 like PD-(D/E)XK nuclease Mistry J, Wood V anon Pfam-B_13095 (release 19.0) Family RAI1 is homologous to Caenorhabditis elegans DOM-3 and human DOM3Z and binds to a nuclear exoribonuclease [1]. It is required for 5.8S rRNA processing [1]. Profile-profile comparison tools demonstrate this to be a PD-(D/E)XK nuclease, with a full set of canonical active site signature motifs characteristic to the PD-(D/E)XK nuclease superfamily [2]. 19.90 19.90 20.40 19.90 19.50 19.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.19 0.72 -4.19 31 357 2012-10-11 20:44:44 2006-02-01 09:27:35 6 7 232 4 253 347 1 69.10 34 17.75 CHANGED VDslhc.tpsps..t.....................ppYVELKTopth.....ps.p...phpsFc+..KLhKhWsQSFLlGls+IlhGFRDcpthLpsl .........................................hDsh.s..t.tt..............................tpaVELKTotth........ts.p......phpsF.c.........R...KLLKaWhQSFLlGlscIlsGFR-sc.Ghlpp.h.............. 0 79 137 212 +8486 PF08653 DASH_Dam1 DASH complex subunit Dam1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. 21.40 21.40 21.50 21.90 21.20 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.82 0.72 -4.20 16 130 2009-01-15 18:05:59 2006-02-01 09:35:50 5 2 126 0 97 128 0 57.70 51 23.74 CHANGED phlhsphpcLuDuhtsLDtNhscLphIH-uLss.FNESFuoaLYGLphNuWCVDFPssP .......s.hlpPtFuELuDuhs-L-uNhh+LphhHESLuc.FNESFASFLYGLsMNAaCVDFPcuP........ 0 26 55 85 +8487 PF08654 DASH_Dad2 DASH complex subunit Dad2 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 20.70 20.70 20.80 21.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.33 0.72 -4.02 18 132 2009-01-15 18:05:59 2006-02-01 09:47:46 5 3 126 0 100 127 0 97.40 34 64.41 CHANGED sltt+I.spK+AEL-sL+pl+chossLssQh-tLppKlushsDGTEuVAsVLuNWpsVl+uIShA....Shtlh+hsptchppst..........................PLPpsLVRI.sls .......h...t+ltpK+tEL-sLppL+clSssLssQh-tLpp+Lsshs-GsEuVAtVhuNWp.sVlpuIshA....Shplhphspt..s.p.t......................................sLPpsLVRI.h......................... 0 29 57 89 +8488 PF08655 DASH_Ask1 DASH complex subunit Ask1 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. 20.70 20.70 20.80 21.20 19.60 20.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.00 0.72 -4.15 15 126 2009-01-15 18:05:59 2006-02-01 09:56:55 5 3 124 0 95 122 0 64.40 50 14.44 CHANGED LE+L-Q-ITLsLQ.-IDpNlSpsaplITpcIlPhlpcYupsocclh-uu..pFWKpFFEpSANVpLsua ..........LE+L-QpITLsLQ.-IDpNFS+uH+IlTssIlPhVcpYucpScsVW-uo.....+FWKpFFEsSANVsLouY....... 0 26 52 79 +8489 PF08656 DASH_Dad3 DASH complex subunit Dad3 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. 21.80 21.80 22.40 22.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.65 0.72 -4.28 17 110 2009-01-15 18:05:59 2006-02-01 12:45:57 5 2 109 0 83 108 0 79.10 45 62.12 CHANGED LSPLEpplLpcYppLussLpp.....LsspLppL............ossssp......................tlLcsLRpLEhKhuLVhTLhKuSVYSllLppp.spptcssp .........LoPLEQEVL-EYp+Lspshpp.....LussLppL.............ussPss..........................plLDsLRpLERKhuLVhTLLKASVYSIVLQQphtttt....ts................. 0 18 44 71 +8490 PF08657 DASH_Spc34 DASH complex subunit Spc34 Wood V, Finn RD anon Manual Family The DASH complex is a ~10 subunit microtubule-binding complex that is transferred to the kinetochore prior to mitosis [1]. In Saccharomyces cerevisiae DASH forms both rings and spiral structures on microtubules in vitro [2,3]. Components of the DASH complex, including Dam1, Duo1, Spc34, Dad1 and Ask1, are essential and connect the centromere to the plus end of spindle microtubules [4]. 25.30 25.30 29.10 26.40 24.80 24.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.60 0.70 -4.72 21 142 2009-09-10 21:43:34 2006-02-01 13:01:21 5 3 120 0 104 127 0 205.80 29 85.12 CHANGED LsppL-pIptSscuIsoL.F...ssPtIFTNAllpsp..s.........ITsLIRDs-scE.puLaphssssp..t..t.................................................tppppspc......ctshhp..............spsts.shhsphhsupsppppst..hsh..Gu.hh.p...............ht-hsl-hlLcshppLsshY.Phs..........usp-+lssLppcapplpssltpLEpcltcQpppLcpht.spppt.s.........t...................ss-c.Ic+EppEIccLEpchpplc ..............................LptpL-pIphuspuIssL.......F..........PPtIFsNAlLtst.s...........ITpLIRDspscE..psLFplsss....t.......................................................tpt.tt......pthhh................tsth....thhs.th...tspt.hpp.h..huh..Gs.hhtp.p.......................ht-hsh-llLcshp.LsslY.Pls...........shpp+lspLppcapplpsplt.hEtcltppptpLpphs..t..ttt..................................scc.lc+E.pEIcpLEtchppL............................................... 0 24 55 88 +8491 PF08658 Rad54_N Rad54 N terminal Mistry J, Wood V anon Pfam-B_26946 (release 19.0) Family This is the N terminal of the DNA repair protein Rad54 [1]. 20.20 20.20 20.30 21.50 20.10 20.10 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.72 0.71 -11.57 0.71 -4.55 24 204 2009-01-15 18:05:59 2006-02-01 13:08:54 5 12 187 1 144 211 0 167.20 29 20.79 CHANGED s.p.hssspShs+L..sKPFKsPh....................Suosspssc+PuRKRR..pVsYu.....ssss-ss-t.s-psh.................pscc+hALusRc...............sphsshphccp-ssh++sFoVPl...hspptssYssp+.PsPoLGhRptsshss+PLHDPouEFAIVLYDPTVDstsp............ccpptpp............cpcspcpp.c.................lcs.shh...HK..SLAEILG.....l....KKKhp.chPcV .................................................................................t........................................................Rptp....hsYt....................................h........................................ptth.uL.sp...........................hhs.s...p.tt..hcplhp+sFpVPl.....p..s.s...Y.sspt..ss.sLGh+psshhls+PLHDPhsEaAIVLYDP.olDs...............................................................t.....................................................................p...........p+.oLtclLG................p.pt...thP.l................................. 0 40 71 115 +8492 PF08659 KR KR domain Hoof I, Finn RD anon Hoof I Family This enzymatic domain is part of bacterial polyketide synthases and catalyses the first step in the reductive modification of the beta-carbonyl centres in the growing polyketide chain. It uses NADPH to reduce the keto group to a hydroxy group [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.88 0.71 -4.62 85 6808 2012-10-10 17:06:42 2006-02-01 14:48:52 5 1300 1204 23 2532 84357 23197 179.60 32 8.62 CHANGED GoYLlTGGhGG...LGttlAcaLs.c....+G.AccLlLsuRs...............sssspsp.....t............hl....ppLps.h.Gs....plphhssD..luctsslppl...lsplptph...sslcGVlHuAullp.Ds.hltphospchppVlusKVpGshsLcchhts.......psLDFFllaSSluu.lhGss..GQusYAAANuFLDAhAptRcspGhs......uhSlsWGhWtss ...................................................................................ush.LlT..G....G.......h....G...u.......lG..h...h.......l.A...........c.a..L....s....p...........c....G....s.....p.........c...........l.....l......L.....h..uRp...................................................................s.s.t..s.t..........t..................................................hl.......p..c....L....p...t......h.....G..s.....................p.l.....p.....h................h...s.........s....D......l....u....D......t........s........s...........l........t....t....l.............l......s......p.......l............t...........t............ph.......................s........l.................s.......G.......V.......l...H.......u...........A...........G.................l................l.................c........D................u.........h................l............t................s................h............o........................c.........p.........h.........p.........p.........V.........h.........t.............s..........K........l...........p........G........u...........h.........p.........L......c.....c.....h.....s.ts.............................ts.L......c.......h.........F.....l.......l...F....S.....S...........h...u....u........l....h.....G........s.....s............G....Q........u......s......Y.......A....A....A....N.....u....a....L....D....u....l....A....p....t....R.....+.....s....p....G..h.s.......................uh.o.l.sWGhWtp.s...................................................................................................................................... 0 634 1473 2142 +8493 PF08660 Alg14 Oligosaccharide biosynthesis protein Alg14 like Mistry J, Wood V anon Pfam-B_12992 (release 19.0) Family Alg14 is involved dolichol-linked oligosaccharide biosynthesis and anchors the catalytic subunit Alg13 to the ER membrane [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.22 0.71 -4.26 35 618 2012-10-03 16:42:30 2006-02-01 15:15:23 6 10 514 0 296 1021 447 156.50 28 77.72 CHANGED lhllhGSGGHTuEMhcLlpth...........pshcpahlutsDshStpphp.....ht.p................hhplsRuRpVtpuhhpo...lhoslhsh.htuhhll........h+.+...........PclllsNGPGssVPlshhuhLhthhh..............................ps+llYlEShuRVpsLSLSG+lLh..h.uDhFlVQW.pLpcp.....Y.s+upY...h.Ghl .......................................................................hllhuS.GGHhschhtLhthh...............p.cpahls.scp.utp............................................................................hhph......+...s.t...p.l.t.ps..h..hps......lhsh....l....hsh.hhuhhll...........h+..+................................PDlllssGsusslPhhhhuhlh.................................................................ts+hlYl.EShsRlpphSLoGKllh...l..sDh.FlVQW.phtch......Y...s+u..h.h.G...................................................... 0 99 171 240 +8494 PF08661 Rep_fac-A_3 Replication factor A protein 3 Mistry J, Wood V anon manual Family Replication factor A is involved in eukaryotic DNA replication, recombination and repair. 23.50 23.20 23.70 24.40 23.40 23.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.26 0.72 -4.15 22 305 2012-10-03 20:18:03 2006-02-01 15:55:35 6 3 260 14 216 294 2 107.40 23 87.79 CHANGED Ms....tsssRlssuhLppa........hupsVRllu+Vpphc..uphhlhpsssst............slplphssshph..sshl.EllGpsssss......slcshshh.-hup..shDhshhsplhplspch..pha ................t.....sssRlssshL..s..p..a.........huc.sVpllG+V..pp..lc...Gp..thhlsss.-st............sls.lpl........s....p.s.h..ph...t..h.s.s...hl.EllGpV..ssst...............slphhtht..-hus...shDhphhspllcls.p.ch..tha........................................... 0 71 118 178 +8495 PF08662 eIF2A Eukaryotic translation initiation factor eIF2A Mistry J, Wood V anon Pfam-B_7957 (release 19.0) Family This is a family of eukaryotic translation initiation factors. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.13 0.71 -4.60 22 1027 2012-10-05 17:30:43 2006-02-01 16:42:42 6 49 446 0 624 2639 160 177.20 24 29.09 CHANGED KsFFpu-csphhWNppGssLLlhss...TchDKospSYYGppsLahl......thsss.sshlpLspcGs.IaDhsWsPpucEFsVlaG..hMPu.csshash.....c....sssltshspps+NolhasPpG+hlllAGFGNLs.Gpl-h.aDhp..phcpluphcssssohs-WsPsGcallTAoTuPRlRl-NGaKIWpasGpLlaphph...sELapl .........................................................................................shhpsspsphhW..ptt..uphlhhhsp..........ch.s..p.s.t.....p........shh..s...p.............plah..h......................t.t.t..t.h......s....t.h.lp..l........p.c.p.t.s.....l.h.s..h..sW.............p...P.............s............u........pc.......F....s......l.....l............h...........G.............h.s.............t.........pl..........sha.sh........................c..................sp..h...l...t...s...h...t....p...p......s.....t.....N.......s.......l....h...a....S..P.p.G........p...h.l.l.l.....u.u......h....t..............s....h...p........G.p..l.p....h...aD...........h.............p.................p..............h..........p.....h................h..............s.........p...h........c.....t.......................s....s...........o....s..h...p......Ws..P.sG.+......al.h.....o.....u.....s...s.........s........................p...h....p...h...p...N.........G.hpla.p.h.p.G.t..h.p..h....tth..................................................................................... 0 222 347 511 +8496 PF08663 HalX HalX domain Bateman A, Galperin M anon Galperin M Domain HalX is a domain of unknown function, previously (mis)annotated as HoxA-like transcriptional regulator. 23.30 23.30 23.60 23.60 23.20 23.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.31 0.72 -3.99 13 74 2009-01-15 18:05:59 2006-03-07 15:01:53 5 4 22 0 47 76 0 69.40 28 34.79 CHANGED lsRs-YD-plpEhaALsSKpAsLEupKssspLpsS-cYscLp-Rl-pLcscl-ssssphssp.Dacuhhpsh ........pRspY-cplpEhauLsuK+AsLEspKsps-Lpcs-cYpcLpsRlccLcspl-psh.sph..psp.Dh.thh.................. 0 3 32 47 +8497 PF08664 YcbB YcbB domain Bateman A, Galperin M anon Galperin M Domain YcbB is a DNA-binding domain [1]. 25.00 25.00 26.10 75.80 20.20 24.30 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.59 0.71 -4.37 21 318 2009-01-15 18:05:59 2006-03-07 15:12:28 5 2 240 0 48 218 1 134.50 43 44.99 CHANGED cphphlLu-LGIsGEuGucDllpllphlhcppps.s.......sLKplFpcluppc.....t..chp+EhKAhEQRIRRAlhpuLsplAuLGlsDasNspFcpYAsphFDFpsV+pcMpclpscsst...pu+lNlKKFlpsL ........s.ph+aLLSELGIuGEsGS+DLlshl-YLhppEpspo.....thPuLK-lFpplst++Ls...s.ts-lc+phKAuEQRIRRAIhpuLsHLASLGLTDFpNPKFEsYAs+FFDFssVR++MsElppcpst.........tsRIssKKFIQVL. 0 21 37 41 +8498 PF08665 PglZ PglZ domain Bateman A, Galperin M anon Galperin M Domain This family is a member of the Alkaline phosphatase clan. 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.19 0.71 -4.61 16 530 2012-10-03 20:55:17 2006-03-07 15:56:20 7 2 502 0 158 551 302 178.40 25 24.53 CHANGED cRVhVIlSDAhRaEhupELtphLNpcsphps-l..p.......shhulLPSYTpLGMAALLPHctltat..tstsVhVDGpsspuhtpRptILtphh......uhAhptc-lhshspccsR-......hl+spcllYlYHNpIDuhGD.....+tsoEppsF-Ascpslp........pLpcLlphlhsp.sGsplhlTADHGFlapcssl ........................................................................+lhlIlsDuhRY-ht.ppL.tp.p...l.s.p..p.p...p..h.phch....p..................shhuhLPohTphuhs.Alhs.....s..p.....h......p...h...........t....t...t.sp..h..hs..-.....s.p.p.p.p.s.h.s.....t.+.p..p..h.Lpt...........................shshp..hc..clhs.h.pp..sctpp.........hhp..sp...c...llhlh...a.NtI..Dt.huc.........p.h.p.Et.....t.s..hc......us.......h.....cul.s...........tLtcllppltst....s.h.c.lllTuDHGalhppp..h.................................................. 0 72 122 146 +8499 PF08666 SAF SAF domain Bateman A, Lakshminarayan I anon Lakshminarayan I Domain This domain family includes a range of different proteins. Such as antifreeze proteins and flagellar FlgA proteins, and CpaB pilus proteins. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.85 0.72 -3.57 169 4935 2012-10-01 20:51:14 2006-03-07 16:09:21 7 24 2605 28 1282 4640 2751 65.60 23 18.28 CHANGED psllluscs..lttGphl.........sspslphtpss...h.huhhspt................hspllG.....thstpslttGphlphsplp ..................................plslA.hp-..lttGphl.....................stps..lp..h..t..p..s..s....hthultsht............................................aspl.l.G.......hAtpslttGphlp.ptl............................................... 0 427 825 1065 +8500 PF08667 BetR BetR domain Bateman A, Galperin M anon Galperin M Domain This family includes an N-terminal helix-turn-helix domain. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.80 0.71 -4.26 6 180 2012-10-04 14:01:12 2006-03-07 16:15:43 5 3 131 0 45 208 4 118.90 32 49.44 CHANGED sschlsss+lRcLLs+pGIucRp+sohIsplLGLShSsupRKL+GuhPWsLuQLp+lAptaGhPsu.LL....-spGhsP............ss-hpDAlLshpspchpC+............AhI...uspusupspspFVAhp...pspWhVht+scs.-p....csYsVchl-lps ......................................s..phhhup+V+cLhs+pGIs.Rpp.s.o.plsclLsLSaSsu.pRK.L+.GpsPWoLuQlpclActaG.s...supLh.....ssp......................pAhh..t..ph.Ch............hhl...st...s.t..s.h.A.p......W.lh.hpth..t.....ha.sch.................................................................................................................... 0 5 12 28 +8501 PF08668 HDOD HDOD domain Bateman A, Galperin M anon Galperin M Domain \N 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.07 0.71 -5.03 57 3180 2012-10-01 20:28:14 2006-03-07 17:06:09 7 42 890 11 1179 4044 485 178.50 20 48.05 CHANGED LPslPslhhclhphl.psPssshsclAclIspDssLoA+lL+luNSshau..hsppls.olppAls.hLGhpplpsLsluhulhphhpst......hhchpthWc+SltsuhhuctlApth......shtp.s..-csahuGLLHDlGclhlhphhscthttlhphhtttt..shhpsEcchhGhsHsp.........lGuhLhcpWphPttlscslthH+ps ....................................................P.hsthhhclhphh...s..p..s..s.s..s.h..p........clscllppDssLosclL+h..s.N....S....s....has.........hs..p.......p......ls....ol..p.p...Als.hL..Ghpp..l..c.s..l.l.h.s.h..s.l..t.p.hhptts...............h....pph.a.p..p.....uh.t.....s.At.h.s.p.h..l.u.p.p.h................sh..tt.............-p.sa...h...s..GLlpslG.h.h.h.h.t........h........t.....h.................h...................................................................h......t......t..h..h......t.h.......h.................hs....l.h.t....W.t..h...s....hh..t.h............................................................................................. 0 403 849 1049 +8502 PF08669 GCV_T_C Glycine cleavage T-protein C-terminal barrel domain Bashton M, Bateman A anon Pfam-B_933 (release 4.0) Domain This is a family of glycine cleavage T-proteins, part of the glycine cleavage multienzyme complex (GCV) found in bacteria and the mitochondria of eukaryotes. GCV catalyses the catabolism of glycine in eukaryotes. The T-protein is an aminomethyl transferase. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.25 0.72 -3.96 190 6206 2009-01-15 18:05:59 2006-03-08 14:03:29 6 31 3260 45 2055 5279 10043 94.50 25 19.51 CHANGED ssFlGp-ul..tctpp.p......u........++hlul....................hp..t.shsptGttlht................ss.....ptlGtlTSusaussl.tpsluluhl...............sstsp..Gs...pl.plp.l.....................cu..pthsupl..sph ........................................................................sFlG+csl...tppcp..p...................G............s.....++lVGL.........................................thp...scs...s..s...cs..G...t..t..lht.............................................ss..........ptlGhlTS.G..sh.S..Pol......st..s..IAlAhl.t.............tth.sth..Gp....pl..l.p..l......................cs..cth.spls.............................................................. 0 599 1224 1672 +8503 PF08670 MEKHLA MEKHLA domain Burglin T, Bateman A anon Burglin T Domain The MEKHLA domain shares similarity with the PAS domain and is found in the 3' end of plant HD-ZIP III homeobox genes, and bacterial proteins. 23.70 23.70 23.90 26.80 23.50 23.60 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.53 0.71 -4.67 26 387 2012-10-04 01:10:46 2006-03-08 17:08:34 6 5 225 0 141 393 60 143.80 34 29.20 CHANGED PEshsLsphlspSYp.....pahGtsLlps.ps..uscshhctLac.pspulLspuhc..ssPlFsaANpuuLchhEs.ohssLpcls.chhh-.cssRcshssthsclhpQGasph.uGlplSshGRphphEpAssWplLss-p....sscs.AhhFsNWpFl ................................................shshsphlspSY+.........thhGt.pLlpsttt........sspphhchLac.t.s.Allspuhc.........spP.lFsaANpuuLchhEh...ohstLpsls.chsh..-...p...ss...R.cphp.s.h.splhpQ...Gas....ph..uGlplSshGRphph-pAlsWpl.ls....c-s......sh+s.AhhFhsWph....................................................... 0 29 90 116 +8504 PF08671 SinI Anti-repressor SinI Mistry J anon pdb_1b0n Domain SinR is a pleiotropic regulator of several late growth processes. It is a tetrameric DNA binding protein whose activity is down-regulated thorough the formation of a SinI:SinR protein complex. When complexed with SinI, the SinR tetramer is disrupted such that is no longer able to bind DNA. 19.80 19.80 19.80 20.80 19.30 18.80 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.94 0.72 -7.11 0.72 -4.51 17 297 2009-01-15 18:05:59 2006-04-20 09:18:08 5 4 127 4 39 164 15 29.60 46 35.33 CHANGED LDpEWhpLlpEAhctGlohE-h+cFLphpK ..LDpEWhpLlp-AhsuGlohcphRcFLch.K........ 1 7 20 25 +8505 PF08672 APC2 Anaphase promoting complex (APC) subunit 2 Mistry J anon pdb_1ldd Domain The anaphase promoting complex or cyclosome (APC2) is an E3 ubiquitin ligase which is part of the SCF family of ubiquitin ligases. Ubiquitin ligases catalyse the transfer of ubiquitin from the ubiquitin conjugating enzyme (E2), to the substrate protein. 20.30 20.30 20.30 20.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.89 0.72 -3.71 18 240 2009-01-15 18:05:59 2006-04-20 09:25:36 6 4 216 4 176 238 3 59.20 40 7.61 CHANGED YIhuMLTNhso.LsL-RIHsMLKhh...ss.sshshopcELccFLsphVcEp+Lphs.GGsY+L ...............aIhGMLT.Nhsu..hsL-RIapMLKhh...ss.sshsho.pELppFLsphVp-tcLphs.uGsY+l........ 0 56 90 143 +8506 PF08673 RsbU_N Phosphoserine phosphatase RsbU, N-terminal domain Mistry J anon pdb_1w53 Domain RsbU is a phosphoserine phosphatase which acts as a positive regulator of the general stress-response factor of gram positive organisms, sigma-B. The phosphatase activity of RsbU is stimulated by association with the RsbT kinase. Deletions in the N terminal domain are deleterious to the activity of RsbU [1]. 23.10 23.10 23.10 23.50 21.70 23.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.37 0.72 -3.97 14 334 2009-01-15 18:05:59 2006-04-20 09:26:18 5 8 325 8 32 137 0 77.00 54 22.08 CHANGED cppY+plLccYLtspsEpsLY.pspcho+csIc+pIsPE-IVslH+shlppl..sl.....pcplhcohDlLlEVMhGYGhAY ..cp+YKuLlcESLssQD....KspLIKKCEKaTcEVI+KDVLPEDIV-IHKsYIhoLsLo.......cEDVh+TL.DVLQEIVKGFGYSY...................... 0 10 19 29 +8507 PF08674 AChE_tetra Acetylcholinesterase tetramerisation domain Mistry J anon pdb_1vzj Domain The acetylcholinesterase tetramerisation domain is found at the C terminus and forms a left handed superhelix. 25.00 25.00 28.10 28.10 22.20 21.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.04 0.72 -4.43 6 95 2009-01-15 18:05:59 2006-04-20 09:29:11 5 3 45 8 45 93 0 37.40 68 6.56 CHANGED DEAERpWKhEFHRWSuYMh+WKsQF.DHYS+p-pCusL .DEAERpWKAtFHRWS.sYMhcWKNQF.Dah..SKpEpCssL....... 0 5 6 17 +8508 PF08675 RNA_bind RNA binding domain Mistry J anon pdb_1whv Domain This domain corresponds to the RNA binding domain of Poly(A)-specific ribonuclease (PARN). 27.90 27.90 27.90 29.20 27.80 27.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.13 0.72 -4.00 2 85 2012-10-02 20:46:34 2006-04-20 09:35:57 6 4 61 5 49 96 0 81.40 61 14.59 CHANGED G.-.p.pR-HlhasTFPcpW+TuDl.phFpsFGslplSWlDsTSAFVuLpp.ptsp.slph.tYtpua+l.saApa.......pQhK ........GPDLQPKRDHVLHVT.FPKEWKTSDLYQLFSAFGNIQlSWIDDTSAFVSLSQsEQVp...IAlNTS..+.Y..AESYRIQTYAEYhtpKp............................ 0 12 16 32 +8509 PF08676 MutL_C MutL C terminal dimerisation domain Mistry J anon pdb_1x9z Domain MutL and MutS are key components of the DNA repair machinery that corrects replication errors [1]. MutS recognises mispaired or unpaired bases in a DNA duplex and in the presence of ATP, recruits MutL to form a DNA signaling complex for repair. The N terminal region of MutL contains the ATPase domain and the C terminal is involved in dimerisation [3]. 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.72 0.71 -4.59 108 4304 2009-01-15 18:05:59 2006-04-20 10:11:38 6 18 3912 12 1164 3441 773 144.10 26 22.12 CHANGED s......h.hlu.....QlpssYllups...pcG.LhllDQHAA+.....ERlhYEpl.cpp..htp............psQ.LLlPhsl.pls.tp-hthlpcpt-tLpclGhplct....hGspslhlRslPshLtptph...ppllp-llspltphs.......p..........tthhcclhsthAC+sul+us...cpL ......................................hluQlpssY..l...l....s......p..s...............ps.....s......lhll..DQH..AAc.....ERltaEph..ppp.............hsth....................psQt..L....L....l.....P.......h....hl...cl........s..t....p-t..thlpc....p......t...s....t....L.p.c.....lG....lplp...........................husp....p....hhl+......u.l....Ph...hh...t.p...t....p..h...............p..p...l.......lh.-.lls...lhpps.....................p...........htphhpp.lht.h.huC+t.ul+ssp......................................................................................................... 1 401 726 969 +8510 PF08677 GP11 GP11 baseplate wedge protein Mistry J anon pdb_1el6 Family GP11 is a viral structural protein that connects short tail fibres to the baseplate. The tail region is responsible for attachment to the host bacteria during infection. 20.80 20.80 21.00 79.60 20.60 20.70 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.39 0.70 -5.18 8 37 2009-09-10 20:27:43 2006-04-20 10:37:38 5 1 36 3 0 32 0 216.80 35 94.22 CHANGED hohscsKAuVhSRcADFLta..c.suscD.sl.......hsspslGusTlsQltKGsY.PNVQSAIsDltshu.phsVGsVllsTsusuPpulpQ.s-hloFoGoVssss..sus.llIcVaGlPVpsssGsousplsspVpsshp-hlssphhhspspcc.sosuspLpl+YlDsppH.slssaoppGI..............TloppIsupu+sGYGTWshLGspTpTLssts.sss..lYYF+RIA .....hohspstAtlhSRhAsalpac.spssshsV.......hsspsIGusolsQhtKGhhhPNVQSAIsDltshu.phPlsulllsssssuPpulpQ.sDhhoFoGoVssss..sGsslllpVaGhPVpsssGsousplsspVpstLp-hhspshhhsssppc.ssssspLplpYlDsppH.hhpsaophGI..............TlsppIss.u+sGYGsWphLGspohTLsstsssss..lYYFcRl....................... 0 0 0 0 +8511 PF08678 Rsbr_N Rsbr N terminal Mistry J anon pdb_2bnl Domain Rsbr is a regulator of the RNA polymerase sigma factor subunit sigma(B).\ \ The structure of the N terminal domain belongs to the globin fold superfamily [1]. 27.00 27.00 33.30 82.60 26.20 25.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.55 0.71 -4.26 9 83 2009-01-15 18:05:59 2006-04-20 11:15:30 5 2 83 6 13 59 0 130.20 46 47.84 CHANGED Fltcpps-LlppWsspl+clsspphshplo-chaEshspEal-lll.s.sptsspphpcclp-Fup+hVQlGhsLphlosGLptFt+hlaptMtccs..hscppt..h-llhcl-calsPlssEIlNpYohSWE ..FIpsN+s-LLssWhscMccpS-QhhsslspEthYEpTSKEFVDLIl.SslTcssscasE+L--FAEKlVpLGWPl+FlTTGLpsFGhLVYTs.MpD--....LcccE+..sDhaac.h.-oWlSshhNclVstYusoWE.... 0 5 10 11 +8512 PF08679 DsrD Dissimilatory sulfite reductase D (DsrD) Mistry J anon pdb_1ucr Family The structure of the DsrD protein has shown it to contain a winged-helix motif similar to those found in DNA binding proteins [1]. The structure suggests a possible role for DsrD in transcription of translation of genes which catalyse dissimilatory sulfite reduction. 24.20 24.20 24.50 49.10 24.10 24.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.02 0.72 -3.97 13 56 2009-01-15 18:05:59 2006-04-20 11:17:52 6 2 52 4 29 53 5 66.60 48 66.35 CHANGED -hKptIl-a.....u+pusKoKaYF+Dhhch....hP-hKsRElKKllscLVsEtpLtaWSSGSTTMYGLKspG ....-hKptll-aLp...tcsssKoKFYFpDFtch....hPDtKsR-lKKllspLVsEtpLpYWSSGSTTMYGLKGsG.. 0 15 28 29 +8513 PF08680 DUF1779 Protein of unknown function (DUF1779) Mistry J anon pdb_2fpn Family This is a family of uncharacterised proteins. The structure of the ywmB protein from Bacillus subtilis has shown it to adopt an alpha/beta fold. 25.00 25.00 25.30 25.00 23.30 24.60 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.48 0.71 -4.46 14 234 2009-01-15 18:05:59 2006-04-20 11:23:29 5 1 231 1 55 184 0 191.20 31 80.00 CHANGED ssplpc.................Wshtu+pphshsp..ctapphspphct-htphpW..spsp-.cchtchpGshpc...thptplpll.sstpssptpoYllYEhputt...ps.s.thcph.cpshclaptcshIFoClpGphssphsts.LpppupplLcphsA+pVEsltEpsFlSlSAaoscapptIhssscc.hNlQlALRps.uhss+TplslGTPIlTsEY ...............................................................................plpp...Wshhs.+p.hshsp....ppFpphlpplct..ctph.cW......ppp.-.hctp..php...th.p.......hppclhl.....s......hopcsspppoallh-hpusc...........h..chlpph.clas...pKshlaoC.lpGhl..s..s+..lpss...L.pscsp.p.lL.+.clsA+slEplcE...cs.a.VSlSAYspcac-...s.lpospcK.lNlQlAlRps....ssKspIsVGTP.IITsEY..... 0 25 42 46 +8514 PF08681 DUF1778 Protein of unknown function (DUF1778) Mistry J anon pdb_1y9b Domain This is a family of uncharacterised proteins. The structure of one of the hypothetical proteins in this family has been solved and it forms a helix structure which may form interactions with DNA. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.18 0.72 -4.15 45 1442 2012-10-02 18:44:02 2006-04-20 11:29:57 6 3 878 2 236 790 75 77.30 30 79.07 CHANGED RlshRlss-p+pLlc+AAslpG.polosFllsuAhctApcllpcpch...lpLotpshptFh.ssLsp...PspPNscLccsht......phpp ......................lslRlss-p+sllccAApltG.pslosFllpuAhp...tApcllt..cp..ch....hhLsppsapthh.phL-p.....P..s.s..s..s..ttLpphhp....p............................................... 0 60 137 192 +8515 PF08682 DUF1780 Protein of unknown function (DUF1780) Mistry J anon pdb_1y0k Family This is a family of uncharacterised proteins. The structure of a hypothetical protein from Pseudomonas aeruginosa has shown it to adopt an alpha/beta fold. 19.70 19.70 21.50 21.10 18.70 17.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.38 0.70 -4.86 2 56 2012-10-11 20:44:44 2006-04-20 11:31:19 5 1 55 1 13 39 4 202.10 81 99.11 CHANGED s-uDaLRLLThQAEQANsFLSNARKW-RERWVCQRhLpuLNlPYRp--FsAsGppPPDVLF+tAuFEVFFVLDEGRRLN-EWR-ELpRRRpAhSLpQLlRREt+PpRIsAuEL.hRLAPTLRKKAHNYpERGhshGELDllAFssLKRtV.DhNo.FPPPTEYLRQGWRSLShVGPTFARVLFAHssAPEFLRuNLGRSILFDsGluL .............DDSDYLRLLTlQAEQANAFLSNARKWERERWVCQRLLQGLNlsaRsEDFsPAupE..PPDVLFRDutFEVFFVLDEGRRLNDEWREELsRRRSAFSLuQLVRREA+P+RIsAuELLtRLAPTLRKKupNY+ERGIDLGcLDIIAFsSLKREVLDLNoHFPPPTEYLRQGWRSLSLVGPTFARVLFAHPGAPDFLRsNLGRSlVFDVGISL............ 0 1 3 8 +8516 PF08683 CAMSAP_CKK DUF1781; CKK; Microtubule-binding calmodulin-regulated spectrin-associated Mistry J, Baines A anon pdb_1ugj Domain This is the C-terminal domain of a family of eumetazoan proteins collectively defined as calmodulin-regulated spectrin-associated, or CAMSAP, proteins. CAMSAP proteins carry an N-terminal region that includes the CH domain, a central region including a predicted coiled-coil and this C-terminal, or CKK, domain - defined as being present in CAMSAP, KIAA1078 and KIAA1543, The C-terminal domain is the part of the CAMSAP proteins that binds to microtubules. The domain appears to act by producing inhibition of neurite extension, probably by blocking microtubule function. CKK represents a domain that has evolved with the metazoa. The structure of a murine hypothetical protein from RIKEN cDNA has shown the domain to adopt a mainly beta barrel structure with an associated alpha-helical hairpin. 19.60 19.60 20.30 22.30 19.00 18.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.53 0.71 -4.42 14 287 2012-10-02 14:14:57 2006-04-20 11:33:32 6 6 107 1 153 255 3 125.80 57 10.32 CHANGED ss.cl.ahcsusKSN+slIpNAlsassLuGpsNcsp+ptlLc..clscsp...upHFlILF+..DspppaRulYoh.psps-phhKltGhG..PptlsptMlcshaKYsSusKpFptI..sKphosslDAhol.......K+ ........................TGPKL.aKEPSuKSNKhIIpNAluH.CCL.A.GKVNEspKp+ILE...EhEKS-...ANHFLILFR..DuG...CQFRuLYo.......Y..sP-T..E.........E...lsKLsGhG.......P+sI.oc...pMl-tlYKYsSDRKpFopI.PuKThSsSVDAhTI....+sHLWQsK+................ 2 44 60 101 +8517 PF08684 ocr DNA mimic ocr Mistry J anon pdb_1s7z Family The structure of an ocr protein from bacteriophage T7 has shown that this protein mimics the size and shape of a bent DNA molecule [1]. ocr has also been shown to be an inhibitor of the complex type I DNA restriction enzymes [1]. 25.00 25.00 83.30 83.00 22.20 21.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.24 0.72 -3.97 3 48 2009-09-11 05:26:13 2006-04-20 11:37:54 5 1 11 3 0 48 0 97.00 83 90.53 CHANGED MSNMTYsNVasHAYEhLKEpIRYDDIR-sDDLSDAIHEAADNAVPHYYADIFSVMASDGIDLEFEDSGLMPDTKDVT+ILQARIYEQLTIDLasDAEDLLN .MSNMTYNNVFDHAYEMLKENIRYDDIpDT..DDLHDAIHMAADNAVPHYYADIFSVMASEGIDhEFEDSGLMPDTKDVIRILQARIYEQLTIDLW.......... 0 0 0 0 +8518 PF08685 GON GON domain Mistry J, Rawlings ND anon Rawlings ND Domain The GON domain is found in the ADAMTS (a disintegrin and metalloproteinase domain with thrombospondin type-1 modules) family of proteins. It contains several conserved cysteine residues. 22.60 22.60 24.80 23.10 20.90 21.70 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.68 0.71 -4.47 11 187 2009-01-15 18:05:59 2006-04-20 11:48:09 6 56 84 0 124 173 0 176.80 43 12.88 CHANGED lspoCpElQphpuhtpDGEYhLpV..cGchl+IYCHGMpocsPpEYloLspGsp-NauphYshRLtssppCP.sGpc+psssspss..hshGtTpFsKlRlDlsshpIhssDapFupop.Gps.PauoAGDCYSss.+CPQGcFSINLpGTGh+lpssspWpspGshsshch....c+spssp+VhG+CGGaCGtChPp.poGLhLpVh ...........................................h..poCpElpt..h.p....sh..pc.......DGEYhL.l....pG+.hl...+...laCtsMpoppPKEYlTL.sp.G.p-NaSElYG.h..RL..ps.Ph...pCPaNGsRppsC..pCpps..hsAGhThFpKlRl..Dl..ss..hp..IhssDhpFApT.......G.........ps.VPaAT..A....GDCYSss.....cCP...Q..GpFSINLhGTGhplspsspWhspGp...a.ss..pl.....p+o.ssp+lhG+CGGaCG+ChPp.tsGL.lpl.h............................... 0 48 54 93 +8519 PF08686 PLAC PLAC (protease and lacunin) domain Mistry J, Rawlings ND anon Rawlings ND Domain The PLAC (protease and lacunin) domain is a short six-cysteine region that is usually found at the C terminal of proteins. It is found in a range of proteins including PACE4 (paired basic amino acid cleaving enzyme 4) and the extracellular matrix protein lacunin [1]. 26.40 26.40 26.40 26.40 26.20 26.30 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.35 0.72 -3.68 29 739 2009-01-15 18:05:59 2006-04-20 11:50:11 6 140 83 0 426 640 0 34.10 37 3.34 CHANGED pCpDpsp........CtlVhpspLCphpaYpptCCpSCpp ...........pCpDpsp...........aCtlVhptpLC.sptaapptCCcoCp.... 0 50 86 209 +8520 PF08687 ASD2 Apx/Shroom domain ASD2 Mistry J, Hildebrand JD anon manual Family This region is found in the actin binding protein Shroom which mediates apical contriction in epithelial cells and is required for neural tube closure. 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.67 0.70 -5.06 14 259 2009-01-15 18:05:59 2006-04-20 13:14:24 6 9 75 2 149 229 0 247.60 41 24.19 CHANGED cELs.....pplsstDcShs....shLsP.....csshsLhcuLhshsp.hht.t.tttthh.phs....................s...sossssahssuss+AE.Lh.chpshp......pt.spsE.spslstKKhELlppls+KLpsL+ctpcsLhp-hpsNssLGp...............................-lEutVpphCKPNEh-KF+hFlGDL-KVVsLLLSLSGRLARVEsALsslspsss.-E+toLhEK....+clLpcQpEDAKELKEpl-RRE+sVhclLuphLstEpLtDYp.......................................HFV+MKuALllEQRcL-DKI+LuEEQLcsLp-SL ..........................................................................................Ls.....ppl...D..h.....sh..ss....csshslhpslhs.s..hh....t...ttth..p..........................................................hsssssaassSssKA.E......Lh.ch.p-h...............tt.stp-.......st..-l.stK...K.......ELlp.uls+KLpsLc-tpcsLhp-hpsNssLGp...............................-VEAhlppl..C+PsEh-Ka+hFlGDL-KVVsLLLSLSGRLARVENAL.....ssl.s....ps.....u.....s...p.E..+ps.LhcK.................pclLhpQhEDA+ELKEslDRRE+hVhslLupaLstEpLtDYp.......................................HFV+MKuuLllEQRcL--KI+LG-EQLcsLh-SL....................... 0 20 30 74 +8521 PF08688 ASD1 Apx/Shroom domain ASD1 Mistry J, Hildebrand JD anon manual Family This region is found in the actin binding protein Shroom which mediates apical contriction in epithelial cells and is required for neural tube closure. ASD1 has been implicated directly in F-actin binding. 25.00 25.00 36.10 26.00 23.30 22.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.19 0.71 -4.50 8 137 2009-01-15 18:05:59 2006-04-20 13:15:24 5 8 37 0 75 108 0 156.60 34 12.14 CHANGED A+LQKS+STssLss.-uEsEssssph+st.....u.sos-uSFssTYK-+LKEAQuRVL+ATSF+RRDL-P.hPtp.h.t..cp.shphs.p.s.su.sspsshs.p...tt..ptso.....................sssusPpVsRIGGRKRhTsEQKh+SYSEPEKlNEVG............lpcEhs.t......spp.pposGohADRhKaFEcsuK ...............................pL.+SpSshtL.s...tsptp.....t.................s..s..-s.s.h.s.psY+spLK-AQuRV...LcATSF+.R..+DLc......Ps...t....hst...tp...shth...hp.s.s..........s.sps...........................t.st.......................tsts...ssRhGuR+RhTsEQKh+SYSEPEKhscVG............lst.t.p......................s.sohAcRh+hFEppsp............................ 0 3 11 26 +8522 PF08689 Med5 Mediator complex subunit Med5 Mistry J, Wood V anon manual Family The mediator complex is required for the expression of nearly all RNA pol II dependent genes in Saccharomyces cerevisiae. Deletion of the MED5 gene leads to increased transcription of nuclear genes encoding components of the oxidative phosphorylation machinery, and decreased transcription of mitochondrial genes encoding components of the same machinery [1]. There is no orthologue from pombe, and this subunit appears to be fungal specific [2]. 19.20 19.20 23.30 23.10 18.70 18.70 hmmbuild -o /dev/null HMM SEED 989 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.92 0.70 -13.80 0.70 -6.87 19 144 2009-01-15 18:05:59 2006-04-20 13:29:25 5 4 120 0 112 159 3 740.80 21 85.92 CHANGED M..........tustphpshlppCht++lsuspFhsLh...h.c+hPhsttshhphh...hpsp.tp.......................................t.ph-sl.ls.YlstLhh..hl.ssslLpshL...........................sphtp.sppphhh...phohht-.+lhpclhhshhpsphscsltpulthhs......slsphlptllsht.shp.s..st.tshhsp.-tsshh.s..hshllsslhtpphuhplLop...pts...h.tp...uhlshsspsuls...t..p+htthpp.p..hshhsps.spshssshhcshhsphhph...psplhshsshpocutLahYlssh..lsupsh.s...h..............hhp-LIpAuFcshusuh.......hps-sshshaha+sFlls+LPhhlhtht.........ssshsp...sh-.slp+Aluphpsst.sohophhsh...ssoshsDlRp-FlhuhthptLlP.osI.......pplhscs..s.psLslss.hshc-llpphpss.c+.........................hpQllspl-shpust.ssIssAIsElhpchsppp-h.pLpplsstLspps.puLsllLhFps..PpplLp.sL..sphL..........ss.t.hDE..DptE.QsVYppFGslLLLllshpa+Y.....clshhDl........uIsus.pS......FlhcLhttuspSpp.splsppppppLssWlpuLF.sp..GlSD-lMsu..ssPp-hYhLlPhlFpQslhAspsGtl.-hpsLpuGhEYhlpPFLlsuLlhhlhWLtpa.hhcppss.shslphhptllp.....sushSspu..ptlHpsVLpIsupsL.ppL+sh+stpsspp........t..cPhlcsl.spLsh.hs................sshpppcLp.hphh.................sh..psls.hho....s...phssssYsac.llssIchlusp+lLtsllcEL.......+hpsp......sususlslDlusshlsus.s...-shsh..t.hps....s....s....s..............p..hsh+sslph-c-.s.hhs-sDshtu...................u....p......................htshhpphp.t..sshsshstt.....hchctt.s..tsutpsss.............pchsslpp ...............................................................................................................h.thh.psh.pphss..F.th....h.tp.s.......p.h.....ht.p..t.............................................h.h..Yl..l............phl.shh.....................................................................................tp.............s.....hh..phhh...ptph.p.....h..h..........................h.thh..hhth........t..............t...t.p.h....h.t...............hh..h.th..pth.....th.t.......................t......t...hh..t..t........................h.p...................t.t.hp.h.....phh...t....p..l..ph.......h..s.....shl.ha.s...h..h.tps..t..................................h.-LI.ssFsshuphh........ppp.s...h.hhh.Flhp+lP.hh.th...................sh-.pl.pAlt.....ph...sp...h..........sshsp......................s......ss....slRp-FhhuhhhhtL...s.l..............phl.sp...s.pt.s.tt...hpphltph.t..pp.............................h.plltp.h-ph.tsst..ttlstslhphhtphhpphph.pltplst.L..p..p...slshhLha.p..s.tllpsLhthl............................s..s.t.........h-p..sp................h-.pshappFG.llLhllshh.pa..........sls..cl.................hhts..ps........l.hchhttsp....spt.ppltppp.pp.lssWlpuLF........tp........GloD-hhpu..ssspphhhLlshlapQslhAhp...tGhl.p.p.sLpsGhEY................hhpshLhs..sll.sl.......haLtp......hh....pp.p...p.p..lphh..hlh........ss.o.pt.......pthhpsVLplsu..L.p.L+t.hphp..tp..................t...s.hpsl...h...hp...................s..p..tplt.ht...........................................hh..p..sl..h......p......s...Yshc.hhthlph.ss.phl..llcpl............th.sp............stpsphshshhh..hlsh..h....th.........ht.........................................................................t...p.c..h..ptc...hh.ct.p..ht.................................................................................................................................................t.......................................................................................................................... 0 28 61 96 +8523 PF08690 GET2 GET complex subunit GET2 Mistry J, Wood V anon manual Family This family corresponds to the GET complex subunit GET2. The GET complex is involved in the retrieval of ER resident proteins from the Golgi [1]. 20.10 20.10 20.20 20.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.90 0.70 -5.17 9 97 2009-09-11 07:52:02 2006-04-20 13:35:14 5 2 96 4 72 96 0 248.30 21 81.04 CHANGED pLS-sEKR+LLRERRQtKhSpGsASuRLNcIhuQupusphso....pSVLDpcsssssssspssps............................ssPElpDl.pshsss.............pspsstpslD...chFpplhphQstG....tssss-sshsslhsMhpphts.................s.stostusppst..hhpppLlcYppYphphhKhhhlll+ashhLhsalYahhp.ss.h..h....hhhptL.p.h.sspsFFolFsThEllhluhYYplhpplphhssssshp.ssKllshsSMV....LP.ltshcshVlhhLpYa-lluMhlsDlshVlVhhGLhoh .............tEpt..RLhRERRpAKh.ppGsASuRLsKITu..u....tssptss.......pSsh-ss.s.sss.s........ss.s.s...sspp................................ssPc.p-..p..t.h..s...........................................t..tptsspshss.p...chhptlht......p...t.t.......................t.........s.ss............t..s...sshs....p....h.....h.p.hh.sh...ss.s..........................s..........................................p.........hh..h.....h....h.h....h...hh...hh.........................................................h.hh.s.Eh............................................................................................................................................................................................................................................................................ 1 10 35 62 +8524 PF08691 Nse5 DNA repair proteins Nse5 and Nse6 Mistry J, Wood V anon manual Family Nse5 and Nse6 are non essential nuclear proteins that are critical for chromosome segregation in fission yeast [1]. Nse5 forms a dimer with Nse6 and facilitates DNA repair as part of the Smc5-Smc6 holocomplex. 25.00 25.00 44.10 30.90 19.20 21.00 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.73 0.70 -5.98 16 54 2009-01-15 18:05:59 2006-04-20 13:39:35 5 2 28 0 31 46 0 438.90 23 94.46 CHANGED Msusspsp...s.sp.s............hhVp.spcc..s.Ethsuhp.htshspllh.h-t...........t...h......t-llhhhhohssh.ssa.csshlcc.t.hs.st......t.hsppsht.lpphhptLpphcspphsp.slphh+sphhhh.cp.......phchphspphcsptspphhchhhpsst.sp.psh.hlshcsshpshhsh.t.t.thhtsh.hshhhpp.st.hp.h..sLsshl..h..tppssshhhph..tp....hhchlhslhsl....+phhhh.scsstt.pp............lhtFlphltshphhpshspph..hhsshpph.t..hthphhslYhph.-hshs.hahphs+hhsphKtt...............lLppl.pshph....h..sphlhp.hcslhsppchpphhtFh.....................................hlhshssp.hsh..hp.plhphlcs............................phlDhstpss.pch.lsl.p..hh.........................................................................LKshhp ...................................................................................................Mss.........ts..st........p.....h.Vt.h.cc..s.Ehlps.hh.-s.sp.L...Ep.........t.pthhh.s.....sllhhhhh.p...pph.pps..ct.s.hshph.........p.hshts...h..pphhshlpchps.phphh-..hhppphhls.sol......pschppt.thp.tphsct.hslhh+pshSsc.cs.phhohps.Ycphhsh..pcpshh.t.hhhshphspssEFlphhh....oLos.L......p-pss.hh.s.......pch..shh-.Lhslasl....+.hhah.p-ssp.s.spph..........LtsFhp.l.sRphhsthpp.h..hhps..ph...s.p.........sphhssYhp..-ps.hhhah.hh+h.P.hcst...............LLt+l.sscphhp.p.h..pp.lhp.hppLhshpshpphhhFh...................................................lp.hust.hsF............ps.plhphhcs............................shh-.sspss.pcs...lsI...................................................h...................................... 0 4 14 29 +8525 PF08692 Pet20 Mitochondrial protein Pet20 Mistry J, Wood V anon manual Family Pet20 is a mitochondrial protein which is thought to play a role in the correct assembly/maintenance of mitochondrial components [1]. 21.40 21.40 21.60 22.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.95 0.71 -4.23 6 105 2009-01-15 18:05:59 2006-04-20 13:51:08 5 3 29 0 59 83 0 97.30 28 44.10 CHANGED KKtspDappLPRVPoTpaLctc-hosDlLYSGYRPlhhss+-sPLhpppps+.hcathchp..........chsEPhpP....................................WsSSAhGhEaasEW-NVPs-llKcLKPFcssp.pc ......................lP+VsoTpal.tp-hppphLauGYRPlh......h....ss...pt...............tp.....ptpp.....hph........................t..................................................................................W.sS..G.h.a.s...ac.lP..hhtphKPFc......t................................ 0 6 23 50 +8526 PF08693 SKG6 Transmembrane alpha-helix domain Mistry J, Coggill P anon manual, Wood V Domain SKG6/Axl2 are membrane proteins that show polarised intracellular localisation [1]. SKG6_Tmem is the highly conserved transmembrane alpha-helical domain of SKG6 and Axl2 proteins [1], [2]. The full-length fungal protein has a negative regulatory function in cytokinesis [3]. 42.00 42.00 43.00 42.20 41.90 41.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.96 0.72 -4.64 13 66 2009-09-11 17:06:57 2006-04-20 13:56:40 5 4 37 0 39 61 0 38.70 41 5.65 CHANGED Ksoc..tspsssluluVslPVuVIllVLhhhLhhhaRRpK .......os.tpssuVslAluVulPlGVIlllLhshLhhhaRRsK.. 0 8 20 33 +8527 PF08694 UFC1 DUF1782; Ubiquitin-fold modifier-conjugating enzyme 1 Mistry J, Bateman A anon pdb_1ywz Domain Ubiquitin-like (UBL) post-translational modifiers are covalently linked to most, if not all, target protein(s) through an enzymatic cascade analogous to ubiquitylation, consisting of E1 (activating), E2 (conjugating), and E3 (ligating) enzymes. Ubiquitin-fold modifier 1 (Ufm1) a ubiquitin-like protein is activated by a novel E1-like enzyme, Uba5, by forming a high-energy thioester bond. Activated Ufm1 is then transferred to its cognate E2-like enzyme, Ufc1, in a similar thioester linkage. This family represents the E2-like enzyme. 25.00 25.00 74.30 44.60 22.10 21.50 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.07 0.71 -4.76 9 167 2012-10-02 15:28:41 2006-04-20 14:03:43 6 2 145 10 111 163 2 153.40 68 93.63 CHANGED tsT+polspIPLLpTpAGPRDt.-tWlpRLKEEYtuLI+YVEpNKpsDNDWF+lE.SNcpGT+WhGKCWYlHNhhKYEFDlpF-IPlTYPsosPEIslPELDGKTsKMYRGGKICLTsHFtPLWu+NsPKFGIAHALALG.....................LuPWLAsEIPsLl-pGllK.p-c ...........p.sT+cslupIPLLpT+AGPRDt.-hWsQRLKEEYpuLIp...YVcsNKpuDNDWFRLE.SNcEGTRWaGKCWYlHsLlKYEFDlEFDIPlTYPsTAPEIAlPELDGKTAKMYRGGKICLTsHFKPLWARNVP+FGIAHAhALG.....................LuPWLAlEIP-Llp+GllpaK-................ 0 49 65 89 +8528 PF08695 Coa1 DUF1783; Cytochrome oxidase complex assembly protein 1 Mistry J, Wood V anon manual Family Coa1 is an inner mitochondrial membrane protein that associates with Shy1 and is required for cytochrome oxidase complex IV assembly. It contains a conserved hydrophobic segment (amino acids 74-92) with the potential to form a membrane-spanning helix. The N-terminus of Coa1 is rich in positively charged amino acids and could form an amphipathic alpha helix, characteristic of a mitochondrial presequence. A cleavage site for the mitochondrial processing peptidase is predicted adjacent to the presequence. Upon in vitro import into mitochondria, Coa1 is processed to a mature form, indicating that it possesses a cleavable presequence [1]. The eukaryotic cytochrome oxidase complex consists of 12-13 subunits, with three mitochondrial encoded subunits, Cox1-Cox3, forming the core enzyme. Translation of the Cox1 transcript requires the two promoters, Pet309 and Mss51, and the latter has an additional role in translational elongation. Coa1 is necessary for linking the activity of Mss51 to Cox1 insertion into the assembly complex [2]. 21.10 21.10 21.10 21.10 21.00 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.31 0.71 -4.59 43 323 2012-10-01 19:51:31 2006-04-20 14:04:22 5 3 276 0 200 455 12 113.00 22 61.06 CHANGED hhhh.shaslshssuhhhlhNhpKppSsllsssLatlRpSspsp-hLG..-tIshpsth.....PWlpGplNpl...pGclc.lsFsV+Gs+...u.pGpl+lpusRcsctt.Fplccaslpsc....supp...lcLlc ..............................................hh..h..lhhhshs.s.hhhhh....hs.h...p.......h....pp....o.....s....h..h.ppslhtl+psspshphL....G..-.....s.hppth...............hls.G.phNps.........pG..c..hs..lphsVpG..s+..................u..pGplhhpup.Rps.p.p....aphpphtlphc....psp.l.l........................................... 0 61 111 165 +8529 PF08696 Dna2 DNA replication factor Dna2 Mistry J, Wood V anon Pfam-B_8878 (release 19.0) Family Dna2 is a DNA replication factor with single-stranded DNA-dependent ATPase, ATP-dependent nuclease, ( 5'-flap endonuclease) and helicase activities. It is required for Okazaki fragment processing and is involved in DNA repair pathways [1]. 20.70 20.70 20.80 20.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.14 0.70 -5.02 31 301 2012-10-11 20:44:44 2006-04-20 14:14:05 6 24 251 0 216 318 21 196.90 29 17.42 CHANGED llhhpcsssppptlhL+ssWhpss.hphGDhlHllu.........phssssshslsssps..hlIlpPDhLlSuTslusSlpC.R+uVLp-ph.ptssssohshlhGTIlHElFQcul.....pssphssphlp.phhppslc..pah.plahlshs.hspltpclpp.thsslppWsppahpppsssp.h..............tpsppttlslscllDIEEsIWSPpaGLKGpIDATlcsp .................................h.....ppttppphshLpstWh...pss..hp.Gshlclhu......................................phs...sp.sp.hl.lsp...sp..s........hlIlp..PDhLlSuTslusShp...C.R+uVLp-ph...+s.............s......sp......ss.....t.shlhGollHEl...FQcul.........................sp....p..h.s...phlp...phhpphlp.......phlpplYt...l......shs..s-s.ppcltp.hhsslppWsppahpppsp.sp.h.................................tpspp.phplscllDlEEplWSPhaGLKGpIDsTlps................................................................................ 0 73 117 180 +8531 PF08698 Fcf2 DUF1784; Fcf2 pre-rRNA processing Mistry J, Wood V anon Pfam-B_13623 (release 19.0) Family This is a family of eukaryotic nucleolar proteins that are involved in pre-rRNA processing [1]. 20.90 20.90 22.20 26.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.25 0.72 -3.98 24 326 2009-01-15 18:05:59 2006-04-20 14:20:19 6 6 281 0 231 325 5 97.50 39 31.77 CHANGED KppccsssspWFsh.cs-..lTsEl++DLpll+hRsslsPc+aaK+sctcp...hPcaFphG.......Tllpsss-a.........au.oRh............s++cRppohl-E...Llp-sshpca.....h++K ...................pppcosGssWFshsts-..lTsElK+DLplL+hRs.slDPK.RaYKK..sctct.....hP+aFQlG.......Tll-uss-F....................as.uRl............s+KpRKp..Tll-E...Llu.Dpchppa..++K................................................... 0 82 129 195 +8532 PF08699 DUF1785 Domain of unknown function (DUF1785) Mistry J, Wood V anon Pfam-B_1585 (release 19.0) Domain This region is found in argonaute [1] proteins and often co-occurs with Pfam:PF02179 and Pfam:PF02171. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.47 0.72 -4.60 46 958 2009-09-11 17:06:15 2006-04-20 14:26:01 5 11 239 8 597 914 1 53.60 41 6.15 CHANGED hslupSF.Fstp...........hsp.......sLGs.Glpuh+GaapSlR.........so.....ptu.LhLNlDVSsssFhcs ...............................................sVGRSF..Foss.............................tst......tsL..Gu...G..hEsWh........GFaQSlR.........Pu.......pht..hhLNI.D........VSsTAFacs........................ 0 155 301 462 +8533 PF08700 Vps51 Vps51/Vps67 Mistry J, Wood V anon manual Family This family includes a presumed domain found in a number of components of vesicular transport. The VFT tethering complex (also known as GARP complex, Golgi associated retrograde protein complex, Vps53 tethering complex) is a conserved eukaryotic docking complex which is involved recycling of proteins from endosomes to the late Golgi . Vps51 (also known as Vps67) is a subunit of VFT and interacts with the SNARE Tlg1 [1]. Cog1_N is the N-terminus of the Cog1 subunit of the eight-unit Conserved Oligomeric Golgi (COG) complex that participates in retrograde vesicular transport and is required to maintain normal Golgi structure and function. The subunits are located in two lobes and Cog1 serves to bind the two lobes together probably via the highly conserved N-terminal domain of approximately 85 residues [2]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.70 0.72 -4.14 56 831 2012-10-03 17:31:52 2006-04-20 14:27:19 6 16 297 0 605 1053 2 83.80 20 11.90 CHANGED hsssshcscphhp.t.htpps.....l....pplhphcpplp........pplpphpp-l+phVhpsYpchlpss-sIpphcsph........pplpsplsplppshpphspt ..............................thsspthh.........p..t.ht.p.ps.....................h....pclp.phc.pplp...................................pphcppsp-l+phVhc..........sYpchIp......su.......cpIp.p.hcsph............pplps.lsphppthtth...t................................................... 0 197 331 497 +8534 PF08701 GN3L_Grn1 GNL3L/Grn1 putative GTPase Mistry J, Wood V anon Pfam-B_22650 (release 19.0) Family Grn1 (yeast) and GNL3L (human) are putative GTPases which are required for growth and play a role in processing of nucleolar pre-rRNA [1]. This family contains a potential nuclear localisation signal. 21.20 21.20 21.40 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.09 0.72 -4.08 36 317 2009-01-15 18:05:59 2006-04-20 14:33:43 6 5 270 0 228 328 2 78.00 37 14.88 CHANGED +hRaKIcKKsut+pRKt+KtAK..Kssph+S.+p.p.K..DPGIPNsaPaK-clLpElEcc+pppcEc+ppp+tppptc+pttp.cps ...................+hRaKIpKKstt+pRK.RKt....A..K...Ks.sp..........h.....+p..+t..+.K.......DPGIPNtaPaK-clLcElE...pc+pptc..Eccppp+ptpppppptt...tt................ 0 78 127 190 +8535 PF08702 Fib_alpha Fibrinogen alpha/beta chain family Mistry J anon pdb_1m1j & pdb_2a45 Domain Fibrinogen is a protein involved in platelet aggregation and is essential for the coagulation of blood. This domain forms part of the central coiled coiled region of the protein which is formed from two sets of three non-identical chains (alpha, beta and gamma). 29.50 29.50 29.50 33.20 29.40 29.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.15 0.71 -4.07 20 256 2009-01-15 18:05:59 2006-04-20 14:42:43 5 5 86 240 81 298 0 130.80 29 29.67 CHANGED scpssstpsD-caGshCPTsCclpshLs+hcpslc.pclpplcshLpphpppsssscphlpplpshhpscpssspssptlhsthpcslccpl..hhhh-pplssp.ppIchLQsslpsppp+Ip+LEscIspthcpC+pPCpcos..pIss ..........pts.shh.DpcaGshCPTsCtltshLsphppslc.pclppLcs.Lpphpppospspphhptlpphhpsctts.psst..sl....st.....hpps.+chh...hhhcth..ph.pplphLpphlps...chppLc.cls...p.CptsCppss..pl..s............ 0 3 9 33 +8536 PF08703 PLC-beta_C PLC-beta C terminal Mistry J anon pdb_1jad Domain This domain corresponds to the alpha helical C terminal domain of phospholipase C beta. 23.10 23.10 23.60 23.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.23 0.71 -4.58 8 215 2009-01-15 18:05:59 2006-04-20 14:48:55 5 16 68 2 101 201 0 168.00 38 15.68 CHANGED hDp-....l.-h+-cppQcLLpLREEQacsp++pKcpHlppthpKLpElAcEpQssQLK+LKElsE+EKKELKKpLD+KRh-+Is..pA+Tp-KttpEc-KpEIN+SHIQEVVQsIKpLEEsQp+RQEKLpEpps-sLQpIp-cEPphQuphht-aptch+pLPsEVpchLppptpcs...........shPstscs .......................................p+hh-L+-+QpQpLL.pLRpEQh.sEttp+ccH......lcphhpKLp-lAcEsQs..sQL.K+LK.EhsE.+Ep.KELpK...pl-cKRppc...Ip......pspo+DKpp.tE.c.KpEl.s+SaIpEsVph.I+.RLcEspp+RpE+L.cpppplhQQIt-c.cs.c....hpt.ph.....tEh.ptphttL....-l...h......................t...................... 0 10 19 51 +8537 PF08704 GCD14 tRNA methyltransferase complex GCD14 subunit Mistry J, Wood V anon Pfam-B_5615 (release 19.0) Family GCD14 is a subunit of the tRNA methyltransferase complex and is required for 1-methyladenosine modification and maturation of initiator methionyl-tRNA [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.52 0.70 -4.88 7 1145 2012-10-10 17:06:42 2006-04-21 14:09:47 5 11 975 20 564 2188 468 194.50 31 60.23 CHANGED GalYlLtPTPELWTlsLPHRTQILYosDIuhIhhhLEl+PGoVVsESGTGSGSlSHuIhRolAPTGHLaThEFHppRAcpAR-EFccHtl...sp.hVTVpppDVC.ppGF...tlsthADAVFLDlPuPW-AlsHAhssl.+hcGGRhCSFSPCIEQVQRTCpsLtphGFsEIpTlEVL.psasVRpsplsh.DL.utst.cssp..................ssssss..pSus.................Ph.....................pEshGHTGYLTFAsh ....................................................................sh..shP+t.sQl.lY.sKDh.u.......I.l.h...h.h.-..l.h..P.G.s.p...........VlEAGsGSGuLohsLhRA.l..................u.....s............p........G.p..lho.aE....h...+p..-..ht....ch.A............p...p...s....h........c......p....h...t.h.................sp....h..pl...p...h...p....D.....l..t.....p.......p.......u.h.......................................................t...t...............s............h........D.....t........l..h.....L.....D....h....s..P..W..p...s...l..p...p.st........c.....s.............L...................hs...........G.......G.............h..l.ss.a.s.....s...sl......p.Q..l.......p.......+s.......hc..sL..+..p...t...s...a...s...........-.......p.shEs...hh.Rpac.s.....p.....................................................................................................................................................................................................................................................................................................................................hh............................................................................................... 0 193 343 476 +8538 PF08705 Gag_p6 Gag protein p6 Mistry J anon pdb_2c55 Domain HIV protein p6 contains two late-budding domains (L domains) which are short sequence motifs essential for viral particle release. p6 interacts with the endosomal sorting complex and represents a docking site for several cellular and binding factors [1]. The PTAP motif interacts with the cellular budding factor TSG101 [1]. This domain is also found in some chimpanzee immunodeficiency virus (SIV-cpz) proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -8.16 0.72 -3.92 130 21799 2009-01-15 18:05:59 2006-04-24 12:03:20 6 32 62 6 0 17390 0 35.10 70 10.25 CHANGED .QSRs.......E.....PT...APP.A............Esat.hGEE....h.sss...K.....QEt+....D.........+.........PL .............LQS.RP..........E.......PT......A..P.P..A........E.S..F.R..FGEE........h..TPu........K............QE...h........D..............+.E........PL................. 0 0 0 0 +8539 PF08706 D5_N D5 N terminal like Mistry J anon manual Domain This domain is found in D5 proteins of DNA viruses and bacteriophage P4 DNA primases phages. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.96 0.71 -4.12 74 1334 2009-09-10 21:39:52 2006-04-24 16:40:24 6 39 1034 0 232 1230 422 157.50 16 23.13 CHANGED Aphhtcha.....tpp..ltasstht...............Whh.asuh.........Wp.s............ppthtphhppht.....chhhtpt..............................................................th.phhhpttpspthpshlpphpt......................................hssphpchDsc..shhlshsNGll..Dlcs..Gph....................ts..tcs.cchhTp.....hhshsa....................ps.......................ssst.........appaLpc...hhs ..............................................................................................................................................................................................................................................................................t...............h...a.s..................Wt..................h.t..hh.tphh.......t..h.tth....................................................................................t.h.....t.h..s...pptlp.s..slcthp...........................................h.ht.pph...c.ss......pLlshp.N.G.ll......Dlco......uph...........................................................ps.....ass..cp..hhop....hssssa.............................................ss........................................s.....pssp...........apcaLpph............................................................... 0 80 154 199 +8540 PF08707 PriCT_2 Primase C terminal 2 (PriCT-2) Mistry J anon Aravind Domain This alpha helical domain is found at the C terminal of primases. 20.90 20.90 21.00 20.90 20.50 20.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.87 0.72 -4.11 50 419 2009-01-15 18:05:59 2006-04-24 16:58:30 6 30 316 0 84 430 307 76.60 23 10.50 CHANGED clcshLphlss...s.h.DYcsWlplGhAL+pth.....u........spuhclWcpWSpp........us+Ycs.pc..spppWpoF.....csss...lThuTlhhhA ...................htthlphlss....s.h.sYppWlplG..hAlpsth......u.........cpuhphapcaSpp........us..+Ypt...pc.....scphW..psh........psss........lshuTlaahA........... 0 26 56 72 +8541 PF08708 PriCT_1 Primase C terminal 1 (PriCT-1) Mistry J anon Aravind Domain This alpha helical domain is found at the C terminal of primases. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.29 0.72 -4.16 93 1023 2009-01-15 18:05:59 2006-04-24 17:24:04 6 23 745 0 118 843 165 69.20 23 18.39 CHANGED pttht..stssthGRNssLFchs.tthh.hctlsppt..................lhphspthNsp.h......ssPLstpElcpss+Slh+hphp .................h....httsptuRNssLhphs...tthh.hptlspph.................shphhth.hNsh.h.......ssPLs.tpElpph.hcShh+hph.......... 0 47 77 105 +8542 PF08709 Ins145_P3_rec Inositol 1,4,5-trisphosphate/ryanodine receptor Mistry J anon pdb_1xzz Domain This domain corresponds to the ligand binding region on inositol 1,4,5-trisphosphate receptor, and the N terminal region of the ryanodine receptor. Both receptors are involved in Ca2+ release. They can couple to the activation of neurotransmitter-gated receptors and voltage-gated Ca2+ channels on the plasma membrane, thus allowing the endoplasmic reticulum discriminate between different types of neuronal activity [1]. 20.30 20.30 20.30 20.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.15 0.70 -11.37 0.70 -5.31 19 714 2012-10-02 19:42:32 2006-04-25 13:25:41 6 68 123 27 374 627 9 196.90 40 6.46 CHANGED ppsSFL+hGDlVSLYsEuosp.........GalSThGLs--+CllpssusshssP...Ph.cFcsClFpl.sPhsphsApcphhput.scpssss..-........tuAphpp+ps........t..lhYGpslQ.LLHh+SshYLos.pphPuhh-KsAh+VsLspsus.Eu..sWahIpPhaKhRStGDsVsVGDcVlLssVsus.......p..LH.uss.thh-ssst.hpVsussppTsWplphahpts-s .......................................................................................p...t.Lp.h.sD.lsL.s.u.shp................hhlu.s.....G.h.s............s..ch..C..h.lpsts.....ss.psP.......................P...chp....ChFhl.p.h...p....hpA........p.......ph...hp...s..t..s...c...tss..pt..t......................................................puAphtt+ps.............................................................lhY.G.p.s.I...Lh.HhpSsh......YLos.pp........uhh-...K.A..hc....VsL.......ppsus......................Eu....sWahIp.P..h......K...R.....S.p.G.....-..p...........V....hl.GDcllL.ss.V...su.t.................p..LH.u.................hss...s...s...........h.......pV......s......u..s..p.p......T.Wpls.hhphp.................................................................... 0 133 156 257 +8543 PF08710 nsp9 nsp9 replicase Mistry J anon pdb_1uw7 Domain nsp9 is a single-stranded RNA-binding viral protein likely to be involved in RNA synthesis [2]. Its structure comprises of a single beta barrel [1]. 22.10 22.10 22.70 41.20 19.90 22.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.35 0.72 -3.91 9 698 2009-01-15 18:05:59 2006-04-25 14:22:47 5 34 208 10 0 632 0 103.80 56 2.26 CHANGED NNElhPspLKppsspAusD.ssssss.upAhYNstsGppalhAhlSspssLKasKaEpcsG..hlslEL-PPC+Fhl-sPpGPplKYLYFVKsLNsLpRGtVLGhIuATVRLQ .............NNElhPstL+ppustAuss.ssssss.upsaYsspsGtphlhAllSspssLKas+a.pssG..hIhlEL-PPC+FsscsspG.P.cVKYLYFlKsLNoLpRGhVLGtIuATVRLQ 0 0 0 0 +8544 PF08711 Med26 TFIIS; TFIIS helical bundle-like domain Mistry J, Moxon SJ, Bateman A anon pdb_1wjt & Pfam-B_7936 (release 8.0) Domain Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species {1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Mediator exists in two major forms in human cells: a smaller form that interacts strongly with pol II and activates transcription, and a large form that does not interact strongly with pol II and does not directly activate transcription. Notably, the 'small' and 'large' Mediator complexes differ in their subunit composition: the Med26 subunit preferentially associates with the small, active complex, whereas cdk8, cyclin C, Med12 and Med13 associate with the large Mediator complex [4]. This family includesthe C terminal region of a number of eukaryotic hypothetical proteins which are homologous to the Saccharomyces cerevisiae protein IWS1. IWS1 is known to be an Pol II transcription elongation factor and interacts with Spt6 and Spt5 [5,6]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.24 0.72 -4.46 76 1525 2009-01-15 18:05:59 2006-04-25 14:52:23 6 42 326 13 982 1411 10 52.80 30 10.64 CHANGED clLptLpp..hs.ho......t-hLppTclGhsVstl+Kp............ssppltplAcpLlpc..W+chlp ........................lLptLpp....hs.lo................h-hL.p....p.T.p.lG+sVs.tl+Kp....................pspcl....pphA+pLlpp..W+chl............... 0 274 473 734 +8545 PF08712 Nfu_N Scaffold protein Nfu/NifU N terminal Mistry J anon pdb_2ffm Domain This domain is found at the N terminus of NifU and NifU related proteins, and in the human Nfu protein. Both of these proteins are thought to be involved in the the assembly of iron-sulphur clusters [1][2]. 21.30 21.30 21.40 21.80 20.60 20.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.37 107 1440 2009-01-15 18:05:59 2006-04-25 16:01:34 6 14 1107 3 493 1036 1572 85.90 34 36.69 CHANGED Ip.TEsTPNPssLKFlP.uppllssu..o..h.-FssscpAts....SPLAppLF.plsGVpuVFhGsDFlTVoKsst.s-WsplKPplhuhI.h-ahpuG .............IphEsTPNPsohKhl.....u..ps...lhspt....o....h-a.h.st....p.pstp.......usL.sppLh..pl.-..GVpuVFashD....FloVsK.......psc...s...-WpplhPplhusl.h-.h........................ 0 159 305 413 +8546 PF08713 DNA_alkylation DNA alkylation repair enzyme Mistry J anon pdb_2b6c Family Proteins in this family are predicted to be DNA alkylation repair enzymes. The structure of a hypothetical protein in this family shows it to adopt a supercoiled alpha helical structure. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.32 0.70 -4.83 89 2486 2012-10-11 20:01:01 2006-04-25 16:53:16 6 10 1475 10 471 1879 159 170.60 17 82.37 CHANGED lpppLpshus......sppAtthppahK.......pphtalGlpsPth+pls+phhpphsht.........thsppLapssh.+Et+hhAhpllhphh..pc....hs.s................hphhpphlt..phstW-hlDthssplluphlhp..............thtshlhpWspo-shWhRRsAlltplhatcp........schptlhphspthls-p....-.hIpKAlGWhLR-huKp.c.shltpFlpp........ctpt.hsthuhRpAhchl .....................................................................................................................................p.........t..h....................hhGl.hs.hp.hhcthh.t.................................h.t..hh......t..p....................-.............hh.uh.hh..........t........t.t.............................h..htthh.....ph.s..at.hhD.h....h..hhtth.h.t........................h..phh.pW.h.ts.p.....p...aht.Rh...u...h....h..........h.....h.h.p............tp..t....h..hthltt..h..p.sp..................p....hl.pp.ulu.hLpphutp.p.phhh....phlp.............................................................................................................. 0 196 347 408 +8547 PF08714 Fae Formaldehyde-activating enzyme (Fae) Mistry J anon pdb_1y5y Family Formaldehyde-activating enzyme is an enzyme required for energy metabolism and formaldehyde detoxification. It catalyses the condensation of formaldehyde and tetrahydromethanopterin to methylene tetrahydromethanopterin [1]. 20.90 20.90 21.30 23.50 18.40 20.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.62 0.71 -4.54 45 346 2012-10-03 01:04:38 2006-04-25 17:32:49 6 2 176 10 133 327 105 146.40 45 77.97 CHANGED lGEALlG-Gs.........ElAHIDLlIGs+sGPsGpAFANuLsststGHTsLLAVlpPNLhsKPsTlhlsKVTIKstcQAsphFGPAQuAVA+AVADuVp-GlIPc-ps-DlsllsuV.......FIHPpA........pDcp+IacaNYpATKhAIpRAhpshPshcclltp+cpupHPhhu ..........hGEuhlstus.........phAHIDLlIGs+s..usstpAFANuLsspppGaTsLLAVlsPNLhsKPsTlhhsKVTIKss.cQAsphFGPAQtAVAcAVsDuVt-GlIPt..-cA-DlhIlluV.......FIc.ps.........D.pcl.chNYpAsp.AltpAhtt.PphpplhtttpphtH.h.s....... 1 34 93 115 +8548 PF08715 Viral_protease Papain like viral protease Mistry J anon pdb_2fe8 Family This family of viral proteases are similar to the papain protease and are required for proteolytic processing of the replicase polyprotein. The structure of this protein has shown it adopts a fold similar that of de-ubiquitinating enzymes [1]. 21.40 21.40 21.40 23.00 19.60 20.50 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.10 0.70 -5.36 5 760 2012-10-10 12:56:15 2006-04-26 12:01:56 5 34 211 22 0 801 0 290.70 29 6.89 CHANGED csKslTIalTEDGVNl+oVVVcsucSLGpQFGsVas+sKshptVhPuDssEDKplhhlPosDhlt..uFKsslpYaoLDsptYuhYhssL..spKWph..VsGFplLcWsDNNCWVNSslllLQtuKl+Fpu.uLssAWsKhluGDsssFVAalYAsssusVG-hGDAc-sLo+LuEHhssDusssLL+hsVCspCGhK.osolsGlEAsIh.suolshDshKTGYS.sCsCGpcssscVlpssusalllsAs-s.PuAss+LpsGluhss..FoGSsssGH.YT.apAAscAhY..DGA+hpKaucpossVTAlah+tuhhopslhPVus .....................................................................................................................ph.lhhT.Dssp......hcs..Vt.s.ohG.ph.G.s.shhcstshpt.hhstst......psc.hh..h..s.....D...........................sh.....c........hh....s....hD..t.p.hh.Yhp..sL...hppaphs.h..s..shhslK.sDNNCalsushlhLQth.c.lpFps.hlp-AWtcapuGcsssFVuhhhA..hsshp.hG-.GDuc.hLppLhphhs.h-.sstlhp.pssspCG.K.ppphpGl-ushh.hts.shpphcpGhp.sCsCGpssspplsphcushlhh..p..s........s.....ss.tp.h..pu..s..shss..a.p..G.shp.sGH.YT..a.st..p..p..s..h..h...Dus....+hpKhsc.ph.lT.s.hhh.h..s.h.........t.................................................................. 0 0 0 0 +8549 PF08716 nsp7 nsp7 replicase Mistry J anon pdb_1ysy Domain nsp7 (non structural protein 7) has been implicated in viral RNA replication and is predominantly alpha helical in structure [1]. It forms a hexadecameric supercomplex with nsp7 that adopts a hollow cylinder-like structure [2]. The dimensions of the central channel and positive electrostatic properties of the cylinder imply that it confers processivity on RNA-dependent RNA polymerase [2]. 25.00 25.00 25.70 25.20 24.40 24.00 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.70 0.72 -3.59 7 563 2009-09-11 00:21:17 2006-04-27 12:56:27 5 32 199 10 0 567 0 84.10 55 1.51 CHANGED SKLTDlKCTsVVLLulLppL+VEuNSKhWAaCVpLHN-ILhssDsscAh-pLluLLusLhShpuslD......LscLs-shh-ssolLQ ..SKLoDVKCTsVVLLslLppL+VESNSKhWuYCVpLHN-ILhscDsscAhEKLluLLssLhSh..puslD...........lscLC--hlcssolLQ. 0 0 0 0 +8550 PF08717 nsp8 nsp8 replicase Mistry J anon pdb_2ahm Domain Viral nsp8 (non structural protein 8) forms a hexadecameric supercomplex with nsp7 that adopts a hollow cylinder-like structure [1]. The dimensions of the central channel and positive electrostatic properties of the cylinder imply that it confers processivity on RNA-dependent RNA polymerase [1]. 25.00 25.00 29.20 28.20 20.50 20.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.44 0.71 -4.71 9 574 2009-01-15 18:05:59 2006-04-27 13:42:27 5 35 209 6 0 578 0 198.40 54 3.56 CHANGED uVASpFsslPSYltYEsA+psYEcAlu...NGuus.QllKpL+KAhNlAKStFDR-tuVQ+KL-RMAEQAhTsMYKEARusDRKSKVVSAMpohLFuMLR+LD.sulssIlspARsGVVPLulIPtsuAsKLhlVlPDhssas+hhstssVpYAGslWsIppVpDsDGcsV+lp-lTpp.....Nt.sLsWPLllsspR.....s.VKLQ ....ulsSEFuslPSYstYEpA+psYEcAlu...NGsus.Q.LKpL+KAhNIAKStFDRDtAVQ+KL-+MA-pAhTpMYKEARusD++uKVlSAhpohLFoMLRKLDssuLNsIlspA+sGsVPLshIPhssAsKLhlVlPDhpoaspss-sssVTYAuslWpIppVhDADGp.lpLsEIshs.....ss.NlsWPLllsspR..tsps.stLQ................... 0 0 0 0 +8551 PF08718 GLTP Glycolipid transfer protein (GLTP) Mistry J anon pdb_1wbe Domain GLTP is a cytosolic protein that catalyses the intermembrane transfer of glycolipids [1][2]. 25.60 25.60 26.00 25.60 24.90 25.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.63 0.71 -4.17 68 702 2009-01-15 18:05:59 2006-04-27 13:59:54 6 10 261 19 468 642 6 140.30 27 56.28 CHANGED spl.sptFLcuscplsplhc.........hl...G.ssFuhlppDltuslp.+lcphh..tsspt...............................hpoLpshlptEhpsth.t.tps.................ouocsLLWLp.........R........u.LcFlthhLcplhs........s.spp.....hsssspcAYspoLptaHuall+pshp.........lAhtusP........sRcphhptlssss ............................................................t..l.sp.FLpusptls..hhc.............hl...G...ssFs.lppDlhusl..p.+lcphh....ss.s.tp...............................hpoLpsllptE.hps..thhp..hps..................ouocuLLWLp..........R...................u.LcFlthhLpplhs.......s.ppp..............lpssh.ppAYspo.LptaHuWll+tshp.....................hAh.hshP........pRppFhttls...t........................ 0 153 245 359 +8552 PF08719 DUF1768 Domain of unknown function (DUF1768) Mistry J anon pdb_2b3w Domain This is a domain of unknown function. It is alpha helical in structure. The GO annotation for this protein suggests it is involved in nematode larval development and has a positive regulation on growth rate. 31.30 31.30 32.20 31.50 30.50 31.20 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.15 0.71 -4.49 80 1081 2009-09-13 10:17:57 2006-04-27 14:19:58 6 29 789 1 374 820 99 145.10 35 55.13 CHANGED lhFap.pp......shuhhSNah.ps.h...........pl.....c..........s.......hp...asosEH........Ya.ApKhth....h..p........................................ppp...pIhp..up.....sPtcstp...............LGRp..ht....h+..W.pph+hplMhpushtKFpQ..pt.................c...L+phLLsT....GsphLVEsSshD....phWGsGhs.................................hpGpNhLGclLMcVR.ccLpp .............................................................................Fat.t.......shushSpahs.t.s..h...............................ph..s........................................................u..............hp.....asosE+....................a..h.....ApKhh..........h...t............................................................tp...p........pIhp....ss........sPhp....stp...................hGRp.....hp..c....hp..W..pph+..pl.h....tcuhhtKFpQ......ps...................................c...L+phL..LuT....................u....st...hL....V....E.s....o......pD..............thWGsGhs.................................................hpGpNhLGhhLMclRcpL..t.................................................. 0 172 262 347 +8553 PF08720 Hema_stalk FluC_stalk; Influenza C hemagglutinin stalk Mistry J anon pdb_1flc Domain This domain corresponds to the stalk segment of hemagglutinin in influenza C virus. It forms a coiled coil structure [1]. 21.60 21.60 301.20 301.10 20.80 19.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.18 0.71 -4.75 3 144 2009-01-15 18:05:59 2006-04-27 14:57:47 5 1 130 3 0 107 0 172.80 99 27.14 CHANGED IFGIDDLIIGLLFVAIVEAGIGGYLLGSRKESGGGVTKESAEKGFEKIGNDIQILRSSTNIAIEKLNDRISHDEQAIRDLTLEIENARSEALLGELGIIRALLVGNISIGLQESLWELASEITNRAGDLAVEVSPGCWIIDNNICDQSCQNFIFKFNETAPVPTIPPLDTKIDLQ IFGIDDLIIGLLFVAIVEAGIGGYLLGSRKESGGGVTKESAEKGFEKIGNDIQILRSSTNIAIEKLNDRISHDEQAIRDLTLEIENARSEALLGELGIIRALLVGNISIGLQESLWELASEITNRAGDLAVEVSPGCWIIDNNICDQSCQNFIFKFNETAPVPTIPPLDTKIDLQ 0 0 0 0 +8554 PF08721 Tn7_Tnp_TnsA_C TnsA_C; TnsA endonuclease C terminal Mistry J anon pdb_1f1z Domain The Tn7 transposase is composed of proteins TnsA and TnsB. DNA breakage at the 5' end of the transposon is carried out by TnsA, and breakage and joining at the 3' end is carried out by TnsB. The C terminal domain of TnsA binds DNA. 22.00 22.00 22.00 22.20 21.70 21.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.45 0.72 -3.97 38 303 2009-01-15 18:05:59 2006-04-27 16:28:33 6 4 250 4 69 203 9 78.00 24 26.68 CHANGED EI..s.hhhcNlpaLhshhpppsstphpht.p.......lhphlpppss..psltslhpphs.t...........slptupsL..lttLlApchlthDl ...................................-l..sphhhcNIpalpsh.hcp.......ts..p...h...sph.ht.........lltpL..ptpst...sslpplhspl.................slssspulhhlptLlAp+hIpsDl... 0 20 36 51 +8555 PF08722 Tn7_Tnp_TnsA_N TnsA_N; TnsA endonuclease N terminal Mistry J anon pdb_1f1z Domain The Tn7 transposase is composed of proteins TnsA and TnsB. DNA breakage at the 5' end of the transposon is carried out by TnsA, and breakage and joining at the 3' end is carried out by TnsB. The N terminal domain of TnsA is catalytic. 25.50 25.50 25.50 25.50 25.30 25.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -10.10 0.72 -3.99 76 557 2012-10-11 20:44:44 2006-04-27 16:37:50 6 8 444 4 111 422 696 88.40 25 30.37 CHANGED ssVhchc-Q.Pl.......................shphshp.Ghp+s.................hTsDaLlphpss.........t.hhhplK.........ppcLp....c....p+hhcKhph..........p+.pahpp.pGhsatlhTE ...................................sVh-lp-Q.Pl.........................shphs..pp.shp+...................hosDFLlshpss..........phhhlpVKs......................sp.clp................c........pchhpKhch......................t+.tahpt.ps.hpatlhs..................................... 0 23 62 83 +8556 PF08723 Gag_p15 Gag protein p15 Mistry J anon pdb_1hek Domain Gag p15 is a viral membrane-binding matrix protein which is alpha helical in structure. 26.10 26.10 26.10 26.60 25.90 26.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.72 0.71 -4.12 2 200 2009-09-11 06:11:00 2006-04-27 16:54:40 5 3 5 2 0 160 0 109.70 81 35.42 CHANGED GDsLTWSKALKKLEKVTVQGSQKLTTGNCNWALSLVDLaHDTsFVKEKDWQL+DlIPLL-DVsQoLSGQE+EAFE+TWWAIoAVKMGLQINNVsDGKASaQLLRAKa.....sKKQuEssEtY .....GDsLTWSKALKKLEKVTVQ.GSQKLToGNCNWALSLVD.LFHDTNFVKEKDW...QLRDVIPL...LEDVo.QT....L.S.G.QE+EAFE+TWWAIsAVKMGLQINsVsDGKAoaQLL.+AKaE+.+.su.s..KKQuEPpEEY...... 0 0 0 0 +8557 PF08724 Rep_N Rep protein catalytic domain like Mistry J anon pdb_1m55 Domain Adeno-associated virus (AAV) Replication (Rep) protein is essential for viral replication and integration. The catalytic domain has DNA binding and endonuclease activity. 21.30 21.30 21.60 24.30 20.00 21.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.15 0.71 -4.75 9 213 2009-01-15 18:05:59 2006-04-27 17:04:03 5 2 57 9 0 219 0 158.90 41 26.63 CHANGED hacsllp...........lSsNFlsasssphWp....uhhsL-ps-hPp...LTss-+lhshalschsshhcsPsu.....caFlQhEps..-phFHlHlllussslsshsl...ssplcsths+slachspPplsshFtsshoKK...Gt.phsu.saIssYLhPKl.Ppl.WuhTNlpEYthsC.sLphR+phhcpa .....................................................................................................t.........lphsptlht.hLpphsph.p..tuP.......haFhQlEssp...EctaHlHlVlstssls.sRsl...hsp.lcshhs.phhhphhs.sh.lha..shTpp....G+haps...sppFI.NYLh.Kl..P..l.p.............ll.W.shTNID.t.ahssslSsshR+thhpt.h.... 0 0 0 0 +8558 PF08725 Integrin_b_cyt Integrin beta cytoplasmic domain Mistry J anon pdb_1m8O Domain Integrins are a group of transmembrane proteins which function as extracellular matrix receptors and in cell adhesion. Integrins are ubiquitously expressed and are heterodimeric, each composed of an alpha and beta subunit. Several variations of the the alpha and beta subunits exist, and association of different alpha and beta subunits can have different a different binding specificity. This domain corresponds to the cytoplasmic domain of the beta subunit. 21.30 21.30 21.30 21.70 20.90 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.22 0.72 -4.57 33 606 2009-01-15 18:05:59 2006-04-28 13:40:02 6 34 134 25 288 512 1 45.40 48 6.09 CHANGED KlLhplpD+REat+FE+E+tpu+WspucNPLY+sAToTap.NPsYtup .......KLLhpIHDRREaAKFEcE+t.pA..KWcs.up.NPlYKsAsoThh..Nspapt......... 0 59 81 178 +8559 PF08726 EFhand_Ca_insen efhand_Ca_insen; Ca2+ insensitive EF hand Mistry J anon pdb_1h88 Domain EF hands are helix-loop-helix binding motifs involved in the regulation of many cellular processes. EF hands usually bind to Ca2+ ions which causes a major conformational change that allows the protein to interact with its designated targets. This domain corresponds to an EF hand which has partially or entirely lost its calcium-binding properties. The calcium insensitive EF hand is still able to mediate protein-protein recognition [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.41 0.72 -4.00 23 640 2012-10-02 16:17:27 2006-04-28 13:56:49 5 73 193 3 326 1364 9 67.50 48 5.87 CHANGED -psouEQlhpuF+tl.AssK.sYlTcp-L+psLsP-ps.-aClppMs.ap.......ssp...pps.su.aDYhuFspsLau .......................-scTuEQVhsSF+hL..As.s...K..sYITt-ELR...+...p.........LsP...-.QA...-YClpRMsPYp.....................................GPc......usPuA.hDYhuFosuLat....................................................... 0 82 120 210 +8560 PF08727 P3A Poliovirus 3A protein like Mistry J anon pdb_1ng7 Domain This domain is found in positive-strand RNA viruses. The 3A protein is a critical component of the poliovirus replication complex, and is also an inhibitor of host cell ER to Golgi transport. 22.30 22.30 23.00 22.40 20.90 22.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -9.08 0.72 -4.63 21 1219 2009-01-15 18:05:59 2006-04-28 16:12:37 6 15 264 2 0 1300 0 56.90 55 2.66 CHANGED GP..a+.lpIsl..csPPPsAIsDLLpSVDotEVRcYCccptWIVPt...psslERslNpA .........GP..a+sl+Isl..psPPPsAIsDLLpSVDSpE.VRcYCc-pGWIlPt..ossplERclNRA.... 0 0 0 0 +8561 PF08728 CRT10 CRT10 Mistry J, Wood V anon manual Family CRT10 is a transcriptional regulator of ribonucleotide reductase (RNR) genes [1]. RNR catalyses the rate limiting step in dNTP synthesis. Mutations in CRT10 have been shown to enhance hydroxyurea resistance [1]. 21.90 21.90 22.00 22.10 19.00 21.80 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.22 0.70 -6.54 9 141 2009-09-13 09:52:06 2006-05-02 12:36:21 5 8 105 0 106 153 0 320.30 15 55.24 CHANGED hsPtchhhssh-phshpclhhp.ssppapsthplphhu............FKNNlhshhp....tsaLhlussoplhlashDslsshsph................hc..scsshsshhDc.hlSohP..saTINal.phssahGpphLssChDsGhlhhWhhsoIlp.hppa..........ps-.c.............th..psR...hplpPc...hpl+hcu...SsWuhDhh.sa.....sspslIssucNu....puloL..hhachs..c-RaYhtc....shp..............h.HNlPslSFlssp.sst.ha.lhVussoIsGplhohpFp...Fp.pps...............................................h.clphssshh.o+shhu-DsWTlpPlSscsFhpVsuhchlsss.pphpccpplppIhp-StlLss..s.s..os.hGhuAphp.apsPVssl..........................................................................................................................ps+pss...ppps...............................................................................................phpss+.oslc--a................................pplHctlcp.h....ph..tpcpps.G..h...........ps.hlhlTTs++lsLh+scsLhssuuTp.clFsLp..shsptscho....NRIShsphI.ELsChlsuSQ.GLlSlhRLspa+GlYuhRQEaIhs.....................................sspslshshtthpslshltl..............p-psh...h.hhhlYVsYssulh.sYcI ......................................................................................................................h......................................................................................................................................................................ptlN.h.hhs.h.........htp-hlhhshDsG.lhha.hptlht.hpph...................................................ts.....h....ht....SsWulshh........p..phlAsutNp............lsl..ahht.........ppt...........t....................................................................................tNlPslsFhspp........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...................................................................................................................................................... 0 18 54 90 +8562 PF08729 HUN HPC2; HRD; HPC2 and ubinuclein domain Mistry J, Wood V, Balaji S, Iyer LM, Aravind L anon manual Domain HPC2 (Histone promoter control 2) is required for cell-cycle regulation of histone transcription [1]. It regulates transcription of the histone genes during the S-phase of the cell cycle by repressing transcription at other cell cycle stages. HPC2 mutants display synthetic interactions with FACT complex which allows RNA Pol II to elongate through nucleosomes [2]. Hpc2 is one of the proteins of one of the multi-subunit complexes that mediate replication- independent nucleosome assembly, along with histone chaperone proteins. the Hip4 sequence from SCH. pombe is an integral component of this complex that is required for transcriptional silencing at multiple loci [3]. HPC2, ubinuclein/yemanuclein, and the cell cycle regulator FLJ25778 share a conserved domain that is predicted to bind histone tails [4]. This domain is also referred to as the HRD or Hpc2-related domain. 26.90 26.90 26.90 27.10 25.20 26.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.12 0.72 -4.10 54 344 2012-10-09 00:06:17 2006-05-02 12:49:19 5 6 246 0 236 333 0 52.80 40 6.69 CHANGED pp+ccpch.c....tttYDp-DsFIDDoE...hh-Ehh.............hssppsGFalspGsLt. ..................t...tcpc..chhshshtYDpsDsFIDsSE...ha-Ehh.............hssptsG.FalssGsL................... 0 70 121 180 +8563 PF08730 Rad33 Rad33 Mistry J, Wood V anon manual Family Rad33 is involved in nucleotide excision repair (NER). NER is the main pathway for repairing DNA lesions induced by UV. Cells deleted for RAD33 display intermediate UV sensitivity that is epistatic with NER [1]. 21.70 21.70 23.90 35.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.06 0.71 -4.79 9 47 2009-01-15 18:05:59 2006-05-02 13:22:52 5 2 40 0 30 41 0 135.10 37 77.88 CHANGED ht.ptpho..........+lPsElEDEILEtYuphot........ppDhsls-LPpaFcsLplPpsa...........Ychl+sc..clpl-uT.............................DIlDh-KLlpssh+LLhFhsN.opIcspWcLhlssuuc..tpshs.h.lpsahLol.DLpKlpsplshDps. ...........ps......p..........cls.ElEDEILEtYup.sh........-pDhslscLPpaFccLpl..sh...........achl+sc..slhl-uo.............................DllDhsKLlpsTspLLhhhsNhphIcc.WphllpssGc.ssshspstlcsahLol.DLpKlps.lshDpp.s........... 1 2 14 27 +8564 PF08731 AFT Transcription factor AFT Mistry J, Wood V anon manual Family AFT (activator of iron transcription) is an iron regulated transcriptional activator that regulates the expression of genes involved in iron homeostasis . This family includes the paralogous pair of transcription factors AFT1 and AFT2. 21.60 21.60 21.60 22.40 21.50 21.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.69 0.72 -3.87 8 82 2012-10-02 23:28:20 2006-05-02 13:51:45 6 5 39 0 46 87 0 88.90 35 18.93 CHANGED FcDKsDIKPWLQK.IFYPQGI-IVIERSDuhKlVFKCKusK+pcstsc........................................................................KKKpus....u+assCPFRVRAsYSl+pK+WolVVlNNsHoHsL ............................................................................Fps+p-l+salpc.hhhspGhtlVIt+Ssp.tlhFpCc.stphpt.t...............................................................................pctsho+h.pCPF+lRAsaSh+pptWslslhsstHsH........ 0 10 31 46 +8565 PF08732 HIM1 HIM1 Mistry J, Wood V anon manual Family HIM1 (high induction of mutagenesis protein 1) plays a role in the control of spontaneous and induced mutagenesis [1]. It is thought to participate in the control of processing of mutational intermediates appearing during error-prone bypass of DNA damage. 28.70 28.70 28.70 28.70 28.60 28.30 hmmbuild -o /dev/null HMM SEED 410 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.48 0.70 -6.01 4 31 2012-10-10 17:06:42 2006-05-02 15:48:55 5 2 30 0 20 61 6 330.40 38 90.90 CHANGED LLhGuouLsGKhVhpphLcls.Ylssh.-L.phhLp....ulpphppNlhlppH.........................lhshsR.hhp..p......phh+o.sh.s........htshshpGSQa.......................a.pphpslthDl-plspo.sshs.a-s....ahKscsc..pths.huhhhthK....pasaslpYsouctchlplssshsVsQll.PcSppWPpllP.+IFo..spl-thsh.pc..hP.......ls-IpThISoLGSTSsRs++opsspsalDYhLNhslsKs.Fs....ssssKphlIsTSFNNhhlSps..YF+hKt+LEssLs.slss.LppLsILRPGPLlGcHGs.....PoNssl.c.sushLc+hhhYKKslhpphhpals-h+clGhsTKsSElVApshY+hPGuhllGYslPst+VAalhuhtA......lc+hh+pu..hhcVs..SSpphDshc ...............................................................hhGuouLhGphhLpphLp.phYlts..ppl.p.hhp.....shpp..h.tphhhpph.........................hhshsRp..p.p........hhp..ph.t........hpshshpGuca.............................................a.cphp..phc.hpphs.pss.sshs.acs.t..hhcssscp.pshsthShl..hp.K....phsapLpYsssctchlpIhhshsVhQllhscSppWPcLLP.cIFo.........tplcthshcp....cp....hhP......sLs-.IsTMlC..oLGSTSAcs++opsspsa.sDYhLsasLAQp.Fo......s......Tt.....sK+lVlsTuF.NNshlSphFpYFRhKuKLENDLcpsLs...s+LKcLVILRPGPhsGp....Hus.....Plssplt+...ssshL.p+hhha.+hhh.hphthhtph+phG.t.+ho-llAp.hYphPGs.hlGYslsst+su.hhs..u......hp........p.....cl...pS.phD...s.......................................... 0 3 9 16 +8566 PF08733 PalH PalH/RIM21 Mistry J, Wood V anon manual Family PalH (also known as RIM21) is a transmembrane protein required for proteolytic cleavage of Rim101/PacC transcription factors which are activated by C terminal proteolytic processing. Rim101/PacC family proteins play a key role in pH-dependent responses and PalH has been implicated as a pH sensor [1]. 23.70 23.70 24.50 44.80 22.00 23.60 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.23 0.70 -5.61 24 159 2009-01-15 18:05:59 2006-05-02 16:21:54 5 2 116 0 116 156 2 343.90 28 55.16 CHANGED sCpshtLssGhll.hsh.t........hphhhsAhapspCsts.........h.ssh....ph..................shppa....ppsppssFhhuhlsllaslusssVhsWhLslllhl....pP.................................ht....................tpshLh+luslhsulhhTlhlscshptlccp.h.pGhhcutpLhchlhsshshpllcllsshhhplsplQllhRlFsRp+cKph..................lhhlGhhlhlsspllaulsphp..ss..........tppstshlsshhhLhclultlhaAuhlhhYslpch.....................+hth...tt+phhhLsllolhllhl.lshFlhDlu....shhltpWuchlshlhtlhssVlsWEWls+lphlE+ctc+puVLGR+l.c.D-hhphc.s....ps ................................................Cpsh.Lss.Ghlh.h.s.t..........hshstsuhat...Csts...........s....................................s.tth.......s...hp-sF..hShhshhaulusssVlsahLslllhl....oPp..................................................shs..t+.shL.+lusLhsAlslTlhhscoh.pshccQat.hG......h.cuptlpsplh...........sshph+llclloshhL.lsQlQhlhRLFs.Rp+-Khh..................IhasGhhLhl.hsslh.slspFh..sst.................spphhsslsshsYLhcLulshlYAuhllaYshpK+..............................................................................+hsa.....th+phhllslLollslllslsFFl.hDlu....p..lssWu-hhpalstssuoVlVWEWlp+lEtLE+cccKculLGRcla-..D-hhchs.t..p................... 0 17 54 98 +8567 PF08734 GYD GYD domain Bateman A anon PSI2 target Domain This protein is found in a range of bacteria. It is usually less than 100 amino acids in length. The function of the protein is unknown. It may belong to the dimeric alpha/beta barrel superfamily. 25.20 25.20 25.30 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.64 0.72 -4.26 39 239 2009-01-15 18:05:59 2006-05-02 16:35:35 6 2 184 0 114 254 1015 90.70 27 86.84 CHANGED YlslssaTspGhcsltcs..scRtcAscphlcs.hGGplcuhahohGp.YDlVslsEuP.DssssuthuLsluutGsV+o.pTlpuhs.s-hpchlt ..............................alhlhsaTspGhcsltcs....scRhcss.pphlcp..h..Gsclc..shYhs.hGp.aDhlslsEuP.Dstshsthu.ltlsutGslco.cThpAhsh--htphl.t.................... 0 30 67 88 +8568 PF08735 DUF1786 Putative pyruvate format-lyase activating enzyme (DUF1786) Bateman A anon PSI2 target Family This family is annotated as pyruvate formate-lyase activating enzyme (EC:1.97.1.4) in UniProt. It is not clear where this annotation comes from. 25.00 25.00 31.20 81.50 22.30 21.50 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.81 0.70 -5.21 15 82 2012-10-02 23:34:14 2006-05-02 16:42:18 5 1 81 0 47 80 5 247.50 34 72.86 CHANGED MGGGPlTtAl+cHlcpGhcVhhT.pAAhTl+DDL-+V+uhGIpIs--s.........sssslphtDlDhttlpshlusasl-hshs..lulAVQDHGhuPs.hSsRhaRFc.hh+chL.ppGsp.-cFla..c-sPpt..aoRM+Alhcslccsshtt....hlMDTs.sAAlhGuLp-sch....shpshlllslGNGHTlushl.ccs+IpGlFEHHTshLssp...KLpphlc+hssGcLosEElasDGGHGAal.....suhspscslllTGP+Rplh ....MGGGshstAlppHlptGhpVhso.pAAhTlcDDL-cV.+.s.hGlpIsccs...............ssstlhhtDhDhthlpshhsshulchs.p..lslAsQDHGasss.tSsRhhRFp.hhcchL.pp...........ssp.tshha..pcsPp...hoRhpultcsltts............hlhDTu.sAAlhGsLt-spl...pttpshlllNlGNuHTlushl.pss+IhGlFEHHTuhlssp...cLtphlc+hppGpLssEEVas-sGHGuhh.........tths.thc.lhlsGP+Rph.h.. 0 18 37 43 +8569 PF08736 FA FERM adjacent (FA) Baines AJ, Finn RD anon Manual Family This region is found adjacent to Band 4.1 / FERM domains (Pfam:PF00373) in a subset of FERM containing protein. The region has been hypothesised to play a role in regulatory adaptation, based on similarity to other protein kinase substrates[1]. 28.70 28.70 28.90 28.80 28.60 28.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.15 0.72 -4.16 44 1186 2009-01-15 18:05:59 2006-05-03 11:03:47 6 34 97 0 506 928 0 46.50 43 5.71 CHANGED sphh..shGS+FRYSGRTptQshctspp..htRps.pFpRs.Spptsppps ........pFh..shGS.+FRY....S...GRTptQshcsuph..htR.sspFE.RssSK+hspp................ 0 80 119 275 +8570 PF08737 Rgp1 Rgp1 Mistry J, Wood V anon manual Family Rgp1 forms heterodimer with Ric1 (Pfam:PF07064) which associates with Golgi membranes and functions as a guanyl-nucleotide exchange factor [1]. 31.00 31.00 31.00 31.10 30.40 30.40 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.22 0.70 -5.40 30 392 2009-01-15 18:05:59 2006-05-03 12:51:38 5 4 255 0 271 363 0 254.90 21 59.05 CHANGED psLhhuasQIpGphplstshls....sshpt.ppp..hht..........................tt.tp.tp.t..............h.phh.sssphstshsthhststpss................................................................................pcslPlhoTPpolLFsDLpLsPGEoKoFpa.shsLPpsLPPSa+.Gps.........l+lpYsLslGspc....................sttsppsptlplPlRVhs.hptt..................h.tsl.pshhhlccptplphhssppptsssshht............................spcss..........pphpphhchlppLlspcs.p....................p.p...........................................................................tspapIspsscpluplsLsKshY+lGEslshsl-hssss....htshtlsssLEopEp.................lssph..............................tlpusspspp.............sstpshsppppsshpsp.plshplsIPhssT....PpFpTshlp.....LcWpL+FcFVhsp .............................................................................................................................................................................................................................................................................................................Lhsph.l....sp...........p.a....h.p..lP........hPsoap.uts.........hph.Y....l..hluhph..............................hplPhplh........................................................................................................................................................................................................................................................................................................................................................................t.aplt..stthlshh.h...cs...h..at..hG-.l.hhhpht.tt...................h.shth.pstLps..Eh.................................lt.th..................................................................................h.hhsp..t.sh..h.ph.h..slPhpss....sth.....Ts.......hhp...........hpW.lphcFhh..t......................................................... 0 83 135 213 +8571 PF08738 Gon7 Gon7 family Wood V, Bateman A anon Wood V Family In S. cerevisiae Gon7 is a member of the KEOPS protein complex. A protein complex proposed to be involved in transcription and promoting telomere uncapping and telomere elongation [1]. 21.80 21.80 22.10 21.80 21.40 21.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.39 0.72 -4.03 18 104 2009-01-15 18:05:59 2006-05-03 14:02:37 5 1 102 0 75 92 0 97.40 31 80.47 CHANGED l.sAsYouPsssp.csFchshs.P.........ot.Sths.pAuspsp-csocsKs.............sYLucLRspLosLQDcINhFLTpRMEc-Kpttttputt.t......p-cpEcchhs .............................................hA.YpuPs..p..ppFt.t.s.s......................su...t.s.psp.sops.+s.............sYLupLRstlssLQ--INpFLTpRMEc-Kpcptt.tutttp..........p-pc-cc.hs..................................................... 0 11 35 63 +8573 PF08740 BCS1_N BCS1 N terminal Mistry J, Wood V anon Pfam-B_10126 (release 19.0) Domain This domain is found at the N terminal of the mitochondrial ATPase BSC1. It encodes the import and intramitochondrial sorting for the protein [1]. 22.40 22.40 22.60 22.40 21.80 22.10 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.07 0.71 -4.68 55 539 2009-09-16 12:49:40 2006-05-03 16:25:19 6 9 253 0 410 534 1 175.50 25 35.62 CHANGED hhlGsshuhh+phhphhhph.....hpcphhsolElss.....cDcsYsallpW.huppshsph.pp..ht..t....................................................phthpsphtpcp.................ppphpasPu.G.sHahhYcG.p..hltlpRp+.pppthshss.t.s.........hEslsloslupstplhpcLLpEA+phhhpppcu+..TllYputusp.........Wpp.stsRtpRPlsoV ...........................................................................h.hhusshthh+pshphshth.....hpcphhsol-lss.....cDc.sYsal.lpW.lsppsttp..t...pp..ht.............................................................t.t.h.p..sph.pcp.....................psphpahPu.G..pHhhh......Ycs.p..alhlp...Rp+..ppp....hshts...s.s................hEsloloslup.s....pp....lhpclLpEA...+p..hsh.pppcu+..TllYpuhusc.........Wp..stsRtpRPlsoV................... 0 114 215 349 +8574 PF08741 YwhD YwhD family Bateman A anon PSI2 target PSI-blast from BH3813 Family This family of proteins are currently uncharacterised. They are around 170 amino acids in length. 25.00 25.00 81.10 80.80 20.90 19.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.97 0.71 -4.84 13 388 2009-01-15 18:05:59 2006-05-04 13:58:48 5 1 383 0 44 165 0 163.10 67 96.27 CHANGED KKp..huFNIIKsDsTDGHGGaGsGoLSL-NVSPVhlDlE-pcAFVDlGAMHARSsVEKtIKFlss+--Vs..sGK.YWLVWVTl-RpE-GPYYAGVTACEMsVs+EhRR...GYKSLPEHVN+MDKSlK++IlV-HMD-sSKplLu-FLcsHs.sMWp+Ss-EL+cuLt ..............KKshsFNIIK.NDPhDGH+GhslGSlSLDNIuPVFIDVtsKEAFlDIGuMHARucVEKGlKalT-KstV...puKtYWLsWVTsERsEp.GPYYAGlTAC.hhVs+sIRR...GYKShPEHVNhMDKSMK+HIIlDpls-csKtlL+-FLhsHsEuMWpcSS-tL+pAh.p... 0 14 27 36 +8575 PF08742 C8 DUF1787; C8 domain Mistry J, Liu XH anon Liu XH Domain This domain contains 8 conserved cysteine residues, but this family only contains 7 of them to overlaps with other domains. It is found in disease-related proteins including von Willebrand factor, Alpha tectorin, Zonadhesin and Mucin. It is often found on proteins containing Pfam:PF00094 and Pfam:PF01826. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.93 0.72 -3.73 131 2979 2009-01-15 18:05:59 2006-05-04 14:10:28 6 390 118 0 1758 2535 1 74.30 30 11.28 CHANGED pttspphCphlh.sst...FspCHshVsPpsahpsClhDhCt.....stss........psh.CsslssYApsC..pptGlslp..WRs...shCs .........................................t...spphCshlh..sss..........Fts.CHs..h..V...s.P.p.s.ahcs..C..lhDhCt...............ssss............p...psh.C.sul...ssYApt.C......p.p...t..G...l......s...lp.....W...Rs...shCs.................................................. 0 299 442 1045 +8576 PF08743 Nse4_C SUMO_ligase; Nse4; Nse4 C-terminal Mistry J, Wood V anon manual Domain Nse4 is a component of the Smc5/6 DNA repair complex. It forms interactions with Smc5 and Nse1 [2]. The exact function of this highly conserved C-terminal domain is not known. 19.60 19.60 19.80 19.90 18.20 19.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.07 0.72 -3.96 24 357 2009-01-15 18:05:59 2006-05-05 09:24:19 5 11 270 0 259 358 0 89.00 29 23.74 CHANGED pslshhcFVlNP...pSFuQTVENlFalSFLl+-GtspIphc.pcslPhlps..t.s.tsstspstsst+pQuVhslDhcsWcphlchasI..pEshlspc ...........t.slshacFllsP...pSFupTVENlFalSFLl+DGplplphD.....pc.s....l.......Phlps...........t..s..........ts...p......t......hp........p................pshtppQh.lhslshpsWcphlc..hapl..pcshl....................... 0 83 137 208 +8577 PF08744 NOZZLE Plant transcription factor NOZZLE Mistry J anon Pfam-B_86265 (release 19.0) Family NOZZLE is a transcription factor that plays a role in patterning the proximal-distal and adaxial-abaxial axes [1][2]. 20.20 20.20 20.30 21.00 19.40 19.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.26 0.70 -5.06 2 13 2009-01-15 18:05:59 2006-05-05 10:55:01 5 1 7 0 9 13 0 216.80 34 57.38 CHANGED MATSLFFMSTDQNSVtNPN-LLRNThLV...sGEIRTE.shKSRGRKPGSKTuQQpQKcPTLRGMGVAKLER.hhEEEKKph.ssAs.GDTS..Au.sNsATRhP...D.GVVLQGFPS........SLGu...sRhhCGGsGSGQlMIDPVhSPWGFVETSu..HELSSIsNPQMaNASSNp.CDTCFKKKRLDGDQ.NVVRSNGGGFSKYTMh..PPPMNGYDp.LL.sD..QRSQGFhYDpRIARuA..sAuSsohNPYFNEATNhTG.shEEFGSh...NPRNGotGVKEYEFFPGKYs-hhuhsh.suo.VGDCSPN...TIDLSLKL ........................s..........................................h.p.shpoRuRKssoKpupppQKK.s.RGMGVAQLERlRIEEcpKph.shAs...us.....sS.........hp...s.phsp.s...s.G.s...............................G....sh..s.uhGuG.hhlsPhhs...s.t.st..st......+ELSShsp.........p.CDhCFK................................................................................................................................................st............................................. 1 2 8 8 +8578 PF08745 UPF0278 UPF0278 family Bateman A anon PSI2 target MJ0950 Family Members of this family are uncharacterised proteins about 200 amino acids in length. 22.50 22.50 22.50 22.50 22.20 22.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.46 0.70 -5.08 11 103 2012-10-03 20:43:45 2006-05-05 13:00:30 6 2 91 1 79 113 3 182.50 35 88.86 CHANGED RFVLDTosFT-splRcthG.cslsEuscphLDLIucARlphsISCYlP.PoVYpElppFhcp.ssstElls+l-TWllKKoPsRYEl+IPAplFYEYlc-lRcRls+GhRlAEctlh...Euuspsh............cpp...-.lGclIschRcKYRpsLRpGhLDSs.DlDVLLLApELDAuVVuuDpGIc+WA-+LGLRalsussFPphLcEYLch. ..............................................................RFlLDTohhsssplRp.hG.ps.scuhpphlclhtpsch.htlphahP.solYpElhthhp.....tclhscl-hallhKsPs+aplpIPu.lhYEalc-hRpRls+GhRluEctlh...cus.ts................................p.lsphIpphRc+YRpALRp.GhLD..SptDlDlLLLAhELDAslVosDtGIppWAc+lGl+alsutpF..hLcphlp.h.............................. 0 19 42 61 +8579 PF08746 zf-RING-like RING-like domain Mistry J, Wood V anon manual Domain This is a zinc finger domain that is related to the C3HC4 RING finger domain (Pfam:PF00097). 22.90 22.90 22.90 23.20 22.80 22.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.95 0.72 -3.96 12 210 2012-10-03 15:03:13 2006-05-05 13:42:37 6 5 176 2 141 204 0 43.10 37 14.10 CHANGED CphC+clshpG.tCss.sCshc............hHhtChppahpppss..hCP...sC ...CphC+cllhtGppCs..s...pCsh+...............hHh.Chp+aapppp.....t...pCPpC....... 0 27 60 102 +8580 PF08747 DUF1788 Domain of unknown function (DUF1788) Bateman A anon PSI2 target Npun02004481 Domain Putative uncharacterised domain in proteins of length around 200 amino acids. 21.80 21.80 22.10 22.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.50 0.71 -4.27 24 273 2009-01-15 18:05:59 2006-05-05 16:38:57 6 1 261 0 68 239 41 125.00 30 65.39 CHANGED lhclsLa-lsl-lLc-Rtlh-cllchEpppGp-tlhctLpulLc.......pcclsptIscp.htstshDllhLoGVGpsaPhl.RoHslLNNLpshhtppPllMFaPGpYs.....GpsLcLFsplc--s...YYRAFpll ...........................................h..phslaplhl-hLpc+..s.......h..h.-...p.....hhp....hE.p..h....Gt...-tlhc.tlps.Lp................pcclsphIsc+....h...t......h..s.s.ps..lVhLTGlGpsaPhl.RuHplLssLp..s..hhs.psP..llhFaPGpYs.....GpsLplFs....phcspN...YYRAFpl......... 0 30 49 59 +8581 PF08748 DUF1789 Domain of unknown function (DUF1789) Bateman A anon PSI2 target CAE43632.1 Domain Putative uncharacterised domain found in phage-related conserved hypothetical protein from Bordetella. 21.10 21.10 21.30 21.80 20.80 21.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.51 0.71 -4.04 16 124 2009-01-15 18:05:59 2006-05-05 16:46:41 6 4 104 0 23 102 2 103.00 28 77.82 CHANGED MA..KhsL.........utsP.TF+hsVplshhsGpsscl.F.............TFKah..sppElt-lhppts....................pp......................................ss-hlpplssGWs.l--cFs-ENlphLlspasuAspAllsAY.pAlstsRlGN ....................................................Mu...hhpL..........spP.TFchsVpIPhsGt.css..l.ph.............sF+ah..shp-ht.ph.p..t....................tt........................................sh-hhhchlpGWs.......l-..-.....sFscENlphLhssYPt.AspAlhssYhptlhssRttN............ 1 1 11 19 +8583 PF08750 CNP1 CNP1-like family Bateman A anon PSI2 target CAB84161.1 Family This family of proteins are likely to be lipoproteins. CNP1 (cryptic neisserial protein) has been expressed in E. coli and shown to be localised periplasmicly [1]. 25.00 25.00 77.60 77.30 23.30 22.50 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.78 0.71 -4.49 22 187 2009-01-15 18:05:59 2006-05-05 16:59:41 6 1 186 0 53 147 10 146.50 39 75.01 CHANGED ..h.pstsWcEtps.tLP.shPpspsLlsFsV....ussos.pFhlDscSloVG.sDGVVRYslVlsSsuGA+NVsYEGIRCsoh..........Eh+hYAthsss.spWstsppsp....................Wptlpss.shNpapusLhp-aFCssts.ssts..sspllpsL+s ..................sttppacEpps.pLP..PLP..psp..shl.Fsl....s.sss.+ahlDupSlslu.sDGlVRYhlllpSss.Guc...NlsaEGIRCs.sh..........ca+hYAhhssspspW.pspps-....................Wp.Itss.shNshttsLhpphFCpsth.Psts..spsllppl+............... 0 9 30 41 +8584 PF08751 TrwC TrwC relaxase Mistry J anon pdb_1omh Domain Relaxases are DNA strand transferases which function during the conjugative cell to cell DNA transfer. TrwC binds to the origin of transfer (oriT) and melts the double helix. 22.80 22.80 23.00 23.30 22.30 22.70 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.93 0.70 -4.99 34 741 2009-01-15 18:05:59 2006-05-08 13:13:30 6 26 467 20 202 783 45 275.90 33 24.11 CHANGED thpssusussYYpsps.....................sYYt......pspssspW.hGpGupp.......L..........GL.sGpl..s.ppphtpl......l.sGphP...........sGptlsps.t....t.....................................................................................................psGaDLTFSAPKSVSllhhluuD...ccll.pAHppAVstulph.lEcphu.sRtsppGp....th.pss.sLlsutacHcoSR...........st.........DPpLHTHsllhNhs........................................hssDG....pWRoLsuc..............placpphthGtlYpupLtpcl.ccLG..aphc.........cpGsaEltGls.pchlctF..SsRsppIcpthuc.u.............ssstttcph......AshsTRpsK.pps.shsplcptWppcupph..Gh...-h ...........................................................................................................................................t....ttuhtYatpp.................................sYYs............sp....stpW..hGpG....Apt...................L.................GL..pGps.....s.pp.h..ppl........l..p.Gp.hP....................................sGtp..l...s...p..h..............tt.....tp......................................................................................................................................................................................................psGa.DLTFSAP.KSVSh....h..........th..l...........usD...............c.......plh.pAHppAVp.s........lp..lEp...h...u....s.Rhtps..Gt.........th..Ts.sLlhAhFpHcTSR............................st.........D.....PpLHTHsllhNhs..........................................p..t.s.G..........cW+..s..Lssc..............sla.tp.phsh..utlYps......p......Ltpp......l.ctlG...aph..c.............cps....h..aEl..........sGls...........t...................l-s.F..SpRspp.....Iccthsp..........................................ssshpt+ph............AslsTRpsK...p...ps......s.p.hhtpWhpphpph..Gh..h........................................................................ 0 61 144 184 +8585 PF08752 COP-gamma_platf Gamma-COP; Coatomer gamma subunit appendage platform subdomain Mistry J anon pdb_1r4x Domain COPI-coated vesicles function in retrograde transport from the Golgi to the ER, and in intra-Golgi transport. This is the platform subdomain of the coatomer gamma subunit appendage domain.\ It carries a protein-protein interaction site at UniProt:P53620, residue W776, which in yeast binds to the ARFGAP Glo3p, and in mammalian gamma-COP binds to a Glo3p orthologue, ARFGAP2 [1]. 25.00 25.00 25.20 25.00 24.20 23.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.73 0.71 -4.40 81 438 2012-10-03 16:25:20 2006-05-08 13:33:25 5 10 295 2 300 436 6 145.80 42 17.35 CHANGED ppppYsppLusIP....cht....sh.GslhKSS.p...slpLTEsETEYsVsslKHlF..ppHlVlQFslsNTLsDplL-sVsVhhsss...-st.......h..pphhhlPlspLs..h.spsGssYVshpt.....s.sshshusFus.sL+FhsKE.lDPsTG.-s--....-GY-DEY.lE-lElss ...........................................t.tphatcpLutIP.....Eht...sh...GsLhKSS.s....PVpLTEuETEYsVpslKHl.F..spHlVh........Q.......FcssNTL.....sD...plLEsVoV.hpss...-s........a..p.hhhlPstp.Lsh..s.pPGtsYlhhph..............s-.ssh...........sssoF...us.sLKFssK-.sDPsTG....Es--.....-GY-DEY....lEDl-ls.h....................... 0 101 159 238 +8586 PF08753 NikR_C NikR C terminal nickel binding domain Mistry J anon pdb_1q5y Domain NikR is a transcription factor that regulates nickel uptake. It consists of two dimeric DNA binding domains separated by a tetrameric regulatory domain that binds nickel. This domain corresponds to the C terminal regulatory domain which contains four nickel binding sites at the tetramer interface [1]. 20.90 20.90 21.30 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.42 0.72 -4.09 39 1066 2012-10-02 00:29:19 2006-05-08 14:04:46 6 4 979 79 297 602 67 78.40 43 56.69 CHANGED sGslsllYDHcppsLspcLsslQHcapcl.....IloohHlHLDccpChEllll+GpupclpcLucclhuh+GV+au+Lshsss ......................uVLohVY-H.c.pR.-Lup+lsshQHcHHDl....sluTLHVHl....s....H.-c.C.LElhlLKGchu-Vp+hADclhAp.RGV+HG+Lpslsp.......... 0 76 181 243 +8588 PF08755 YccV-like Hemimethylated DNA-binding protein YccV like Mistry J anon pdb_1bvb Domain YccV is a hemimethylated DNA binding protein which has been shown to regulate dnaA gene expression [1]. The structure of one of the hypothetical proteins in this family has been solved and it forms a beta sheet structure with a terminating alpha helix. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.41 0.72 -4.07 64 1071 2009-01-15 18:05:59 2006-05-08 14:58:45 6 15 964 1 318 578 1189 98.80 44 47.48 CHANGED puKFpIGQlVRH+hasaRGVlhDlDPpFsso--WacsIst-h+..Pt.+cQPFYHlLsEscps.............phl.uYVuEpNL.....h.DssspslcHPpl....schFpphcsu..tYh.c .......................s.uKFuIGQ.V...R.Hp..........Lh..G..Yh....GVV........l......DlDP..a..u..hsE..........-p.....ls.........ss.c..............ph..RstP......aYHV.lhED-.su.............................V..sYlAEtpL........ps-hpsE.cscpPsh....-Elhpshcpp..h.ts.+..................................................................................... 0 90 160 239 +8589 PF08756 YfkB YfkB-like domain Bateman A anon PSI2 structural target yfkB Domain This protein is adjacent to YfkA in B. subtilis. In other bacterial species it is fused to this protein. As YfkA contains a Radical SAM domain it suggests this domain is interacts with them. 25.00 25.00 25.80 190.60 20.20 17.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.95 0.71 -4.64 14 371 2009-01-15 18:05:59 2006-05-08 16:01:15 5 6 371 0 42 183 0 153.50 70 40.66 CHANGED MYPuDFASsLEsLSLcEhRcAIH+LLDhRDcslWMLFGTLPFYPCSssEEDhcLL+RLhpppNVTVRNDPDGRSRLNVNIFoGslIVTDFGDp.PsLGNIps-sLs-AYs+WppoclA+pLNCHCPuVpCLGPNVLVKNsYYp-sDFpp+pA+l MYPuDFASpLsVLTLAEM+csIHclLDhRDEslWMLFGTLPhaPC.cD--DQ+LLpRLRpuKNVTsRNDPDGRSRLNVNlFTGNVIVTDFGDE.ssluNIQcD+LsDVFDKWL.u.ScLAKSLNCHCsphpCLGPNVLVKNMYYPshDF+cpctp.h.. 0 12 25 34 +8590 PF08757 CotH CotH protein Bateman A anon PSI2 structural target cotH Family Members of this family include the spore coat protein H (cotH). 24.60 24.60 24.60 24.80 24.40 24.10 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.02 0.70 -5.19 53 707 2009-01-15 18:05:59 2006-05-08 16:09:44 6 49 471 0 258 662 798 299.10 19 51.59 CHANGED l+hRG..so.......opph.sKKSa+l....c.hcpt..............th.Gh............cchhLpspatD..sohhRspluhclhcph...........uhssspsp.....asp..lalNG.........cYhGlYhh..hEplc.cphlctc..htsss.........utlacss................t..sh..t...ptth........ppssptshsplhs.hlphlsss....s.tphtstlpphlDl-salcahshptlhsNh...D.sh....scNhaLa..............tspss+aphlP.WDhDtuauphhp............................................t...thh.hsttssLh..p+llps..ssa+pphppphppll..pshhstptltshlcshtshls..sthpp-stthtsht..........................phppphphlppalppRhpal ....................................................................................h+G..sp...............t..t..h...+p......sacl...........p.hpph................t.h.Gh................cphhLp.s.....t.h......D...............oh.hRptlu..hphhpph.......................................sh...s.sps.p......ast.....lhlNs......................................ph.GlYh......hEp.lc..cphhc...tp.....hhs..ss.........................................uhhaphs...............................................................................tt.sh.......tt.........h............................p.t.ts.......pp..s.....hptl...p..hlphhsss........................ttth....ptltphlDl.-.p.hlcahshp.hhtNh......D.sh............spNhhla.............................ttpsshap....hlP..WDh.-hsautt.t.........................................................................................................................thh.t...t...ss.Lh...tpllp......stappthpp.h...hpc.lh..........p..p......h..s....phh.sh.l.pphtphlt......s..h..p.s....h.h..h.............................................................thpt.tht..l..phhptR.t........................................................................................... 0 146 211 244 +8591 PF08758 Cadherin_pro Cadherin prodomain like Mistry J anon pdb_1op4 Domain Cadherins are a family of proteins that mediate calcium dependent cell-cell adhesion. They are activated through cleavage of a prosequence in the late Golgi. This domain corresponds to the folded region of the prosequence, and is termed the prodomain. The prodomain shows structural resemblance to the cadherin domain, but lacks all the features known to be important for cadherin-cadherin interactions [1]. 21.40 21.40 21.40 21.40 21.30 21.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.86 0.72 -4.21 52 421 2012-10-03 16:25:20 2006-05-08 16:13:35 6 13 48 1 158 409 0 82.30 30 10.07 CHANGED sCpPGFppcsashplspc.lpcGp.L........hpVpFssC.sGpp+lpYpoo-s.cFKVssDGolhstRslplpscphpFhVaApDspscc...............hslpltl ...................................p.h..h...lspp....l.ttp.l........hp...VsF..p-C..ts..ppplta..pSSDP.cF+VtpDGolassRslh..L.s....s..cp.....t..s.FhlhupDspspc....hpltV..l..................... 0 7 17 64 +8592 PF08759 DUF1792 Domain of unknown function (DUF1792) Bateman A anon PSI2 structural target AAO75156.1 Domain This putative domain is probably missannotated as a glycosyl transferase 8 family member. This domain is found at the C-terminus of protein such as Swiss:Q97P75 that also contain the glycosyl transferase domain at the N-terminus. 20.80 20.80 20.80 35.90 20.40 20.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.44 0.70 -4.74 19 183 2009-01-15 18:05:59 2006-05-08 16:19:41 6 4 170 0 25 154 14 215.50 52 33.74 CHANGED VsRFGDGEhsll........tGcsIsaQsa-scLAp+L+cll.ppscp..shlVCLPDsFp.sltcYsphuppFW+sahhhhsshapchh.....ssphYusTFlSRPYhDhtD....KspustaFccLKpIWcs+DlLIVEGtpSRuGVGNDLFcNs+SIcRIlCPu+sAap+hccIhptlpch.....ucs+LILlhLGPTAKVLuaDLtchGaQsIDlGHIDoEYEWa+MGAppKVKl.ppKasAEhN .........llRFGDGEhsLh........hG.psIsYQsaD.ELAppL+cll..th..pSsE...cLlVCLPD.s.Fc...sh.pas.hupsFW+.pHh..hah-hYpcls.....pusaYGSTFISRPYI.DhcD....Kop..utuhFcKLKplWcsRDlLIVEGhsSRSGVGNDLFDpspSl+RIICPS+NAYSpl-pIpptlh.ca......Acs.+LILhMLGPTAKVLuYcL.s.phGY...QslDlGHIDSEYEWh+MGAppKVKh.uHKHTAEaN........... 2 4 11 17 +8593 PF08760 DUF1793 Domain of unknown function (DUF1793) Bateman A anon PSI2 structural target AAO78587.1 Domain This presumed domain is found at the C-terminus of a glutaminase protein from fungi [1]. This domain is also found as a single domain protein in Bacteroides thetaiotaomicron. 19.40 19.40 20.00 20.70 18.10 19.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.04 0.71 -4.39 37 301 2009-01-15 18:05:59 2006-05-08 16:38:11 6 11 161 0 170 307 0 170.00 37 23.78 CHANGED lGltAhuclAshhGcstsAppYsslAcphhscWt.phuh....-us.......HhpLsa.sp.uoWo.hYNLhhD+LLsLs.........................................................lhPpplachpssaYhohppcYGlPLDoR+s.YTKoDW.hasAslussp.shpthlsslhpalsETsochPhoDhY-Tp..sGshss........FhARsVVGGaFhhLhh ................lGltAhuplAphhG...p.p....ssA.ppYpshAcphsscWp.phuh....-us.........Hh.+Lsa...cp...s.oWophYNLh.hD+LLs.Lp....................................................................lhPppVhptpssaYhs....h....p..pp...YGlPLD.......oRc.p.....Y...T.KoDW.hasAuhussp....shpphlsslhpahNETsochPhoDhapTp..sGphss..........FtARsVVGGaahhlh.................................................... 0 67 109 144 +8594 PF08761 dUTPase_2 dUTPase Mistry J anon pdb_1w2y Domain 2-Deoxyuridine 5-triphosphate nucleotidohydrolase (dUTPase) catalyses the hydrolysis of dUTP to dUMP and pyrophosphate (EC:3.6.1.23). Members of this family have a novel all-alpha fold and are unrelated to the all-beta fold found in dUTPases of the majority of organisms [1]. This family contains both dUTPase homologues of dUTPase including dCTPase of phage T4. 23.00 23.00 23.00 23.10 22.70 22.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.25 0.71 -4.16 34 876 2012-10-01 21:36:44 2006-05-08 16:42:01 6 2 693 18 118 593 126 152.60 27 88.39 CHANGED sLpplhchQctLspclspcp.......pt.ph......hh.pp..hAlhsEluEhhsch...........tsaKaWKspp.............scttp...............................................lh.EhVDslHFllSlslphthp..........................................t.hs.ph.pt............hh.......phhtphpphhpp..st...pht.lhsthhtls.hlGhs.--lhctYhtKNplNatRQspGY ..................................LpphhplQcchscclstpp.............p..p.................h...h..huhhlEhuEhhsph...........p.s.aKhW.Kpps............spsh-s.........................................................hlpEhlDhlHF..h..LS.lslphhhppp.......................................tt..h......shhpp............................................hh............phhtp.h...p.h..h...tp...s...........ht...hht.a..htls.h.t.ht.......ash-plhcsYhtKtthNatRQs........................................... 0 41 92 106 +8595 PF08762 CRPV_capsid CRPV capsid protein like Mistry J anon pdb_1b35 Domain This is a family of capsid proteins found in positive stranded ssRNA viruses such as cricket paralysis virus (CRPV). It forms an all beta sheet structure [1]. 21.10 21.10 21.20 21.20 20.70 20.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.31 0.71 -4.88 7 192 2012-10-04 01:49:40 2006-05-08 17:16:02 5 14 36 2 0 153 0 163.40 32 24.60 CHANGED o..s.h.hhhGEplsslRpLl+RFphh.t......ptsssl.chsplppPt............tst.shshh.......shhualYtFaRGuhRaKlhshps...................t.....s..lshhtTsspsspsssuh............hh.sslpshtEhplPYYu.....Psss.s.ts.s....psl...psh.s.sltpt.psp.......sthhhRA.uuDDaoFuhLlGsP.h.hssup ..................................................................tsh..............................................................................................................h...............shluphYthYRGGlRhKllscps.........................................l.c.hsp.ss.s..sspps.hsSc........................s.tlp.hslpsVtEhplPYYu..........Pshoso..pups..................ph.....pGt..s..s..hslssss.suh.................sph..h....slu.AGDDhsFShalGsP.h..t.s................................................ 0 0 0 0 +8596 PF08763 Ca_chan_IQ Voltage gated calcium channel IQ domain Mistry J anon pdb_2be6 Domain Voltage gated calcium channels control cellular calcium entry in response to changes in membrane potential. The isoleucine-glutamine (IQ) motif in the voltage gated calcium channel IQ domain interacts with hydrophobic pockets of Ca2+/calmodulin [1]. The interaction regulates two self-regulatory calcium dependent feedback mechanism, calcium dependent inactivation (CDI), and calcium-dependent facilitation (CDF). 20.70 20.70 20.70 21.40 20.50 20.30 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -7.80 0.72 -4.71 25 724 2009-01-15 18:05:59 2006-05-09 09:54:38 6 7 96 16 235 602 0 34.30 52 1.83 CHANGED sD-VTVGKFYATaLIQDYFR+FKp+Kppptpppps .....DElTVGKhYAoaLIpDYFRpaKp+KEpthht...s..... 1 33 56 116 +8597 PF08764 Coagulase Staphylococcus aureus coagulase Mistry J anon pdb_1nu9 Domain Staphylococcus aureus secretes a cofactor called coagulase. Coagulase is an extracellular protein that forms a complex with human prothrombin, and activates it without the usual proteolytic cleavages. The resulting complex directly initiates blood clotting. 20.70 20.70 20.80 20.70 20.10 20.10 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.89 0.70 -4.47 11 424 2009-01-15 18:05:59 2006-05-09 10:55:11 5 11 168 6 3 229 0 256.90 41 46.40 CHANGED TKDYStcSpVNcsSKpGosISstYaWuhIcsLEsQFspAlcLlEcYpYGEKEYKDAKDKLMTRILuEDQYLLEKKhspYEhYKchYKKaKcpN...Ppsp.hKMtsFacYslYsLTMcEYN-IppSLK-Al-cF+p-Vc-IppKNpDLKsYsccpEcKATccVYDLVsElDTlasuYauDppaspcAKELRAKLDLILGDpcsPpRITNERI+KEMhcDLNSIIDDFFhETsQNRPssITKYDPshHDY+...-NKpsFDALVKET+-AVscADESWKsKTVKKYG ................................................tK.st.hss.hh.thlp.tLts.hh.uhslhc...p.cYs-.EYK-AhDKhhp+..lhuEDphL.p+hhtthc.hKcaapptpp..........tp.hph..hp..hcphs....lhsLThccYsplapoLKcshc-Fp+EVccIppKNsDLKs.a........s........cs...c-p+...As........scl..c..L.scl.h.l.....shs.aa....sc..p....patccsc-LhuKL..DLILG...-...pc...c...s....+.+.....h...TNcRhtcEhh-DLpoIID-FFh-hppNRPpsIsph.sss.c.H.s.p....cN+tNh-pLhp-TctAh.tcuD-Sh+p+psK....................................... 1 2 2 3 +8598 PF08765 Mor Mor transcription activator family Mistry J anon pdb_1rr7 Domain Mor (Middle operon regulator) is a sequence specific DNA binding protein. It mediates transcription activation through its interactions with the C-terminal domains of the alpha and sigma subunits of bacterial RNA polymerase. The N terminal region of Mor is the dimerisation region, and the C terminal contains a helix-turn-helix motif which binds DNA. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.32 0.72 -4.27 17 765 2012-10-04 14:01:12 2006-05-09 12:02:26 6 4 521 1 130 526 7 97.00 22 79.65 CHANGED s+hP-lLs-LsphhtptLpchs....l-.tt.AcpluhplAspluppaGGpslYhPpGhshchs.RDhcIas-Fs........GcNhtpLAR+YslohphIYpll+RhR+pchpcpQhc.L ..............................h.....................................p.h.u..p......hs....tplhphhG..Gp.......p.l.YlP...........+............s....p............p...........h............p............t............p............h.Rs..ppIhp.-.a.s............G..p.s.h...p..cLAc+YtLS.pplhpIlpc.ct.................................. 0 57 101 114 +8599 PF08766 DEK_C DEK C terminal domain Mistry J anon pdb_1q1v Domain DEK is a chromatin associated protein that is linked with cancers and autoimmune disease. This domain is found at the C terminal of DEK and is of clinical importance since it can reverse the characteristic abnormal DNA-mutagen sensitivity in fibroblasts from ataxia-telangiectasia (A-T) patients [3].\ The structure of this domain shows it to be homologous to the E2F/DP transcription factor family [1]. This domain is also found in chitin synthase proteins like Swiss:Q8TF96, and in protein phosphastases such as Swiss:Q6NN85. 24.80 24.80 24.90 24.80 24.50 24.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.18 0.72 -4.19 59 900 2009-01-15 18:05:59 2006-05-09 13:39:52 6 46 294 1 579 859 1 53.50 28 6.68 CHANGED o-pplpptlcclLpsu.DLpslTp.+plRcpLpp+h.sh-L.s..s+KshIcphlcphLs ........-ptlppplcpILpss..D...Lps.l...Tp.KplRppLEpc....h..sh...sL.s...p+KsaIcptltthl....................... 0 169 305 449 +8600 PF08767 CRM1_C CRM1 C terminal Mistry J anon pdb_1w9c Domain CRM1 (also known as Exportin1) mediates the nuclear export of proteins bearing a leucine-rich nuclear export signal (NES). CRM1 forms a complex with the NES containing protein and the small GTPase Ran. This region forms an alpha helical structure formed by six helical hairpin motifs that are structurally similar to the HEAT repeat, but share little sequence similarity to the HEAT repeat [1]. 20.20 20.20 20.30 20.20 20.10 20.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.87 0.70 -5.52 29 475 2012-10-11 20:01:01 2006-05-09 14:14:28 6 15 300 13 326 464 6 280.10 33 28.82 CHANGED QLupIYhDhLplY+hhSphISptlsttG.hsoK....pshl+shRolK+ElLKLl-Talp+up.....................................s.p..VhpshlsPLh-sVLhDYpp.NVPsAR-sEVLsshsollsKlps....hhpstlshIhcuVF-sTLsMIscDFp-aPEHRlpFapLLculstpCFsAllp..lssppFKhllDollWAhKHspRsVt-sGLslhh-llpNlpp............ssphuppFYpsaahslLp-lFhVlTDos..HKuGFphpshlLtplhpll-ss.....plssPlhs........hssNthalppalsshLpsuFsplstpQlpsFlpuLaphsp.s...................htpF+tplRDFLlplKEFuu ..................................................................................................................................QlshIahDMLsh...Y.+hhSp.Isttltt...t.............G..hs.s.+.............pshl+.th.RslK+EhL+Llpsalp+up..........................................................................................................s.p.....lhpphl...ssL.hcsVLhDYpp..s..lPsAR-sEVLs...hhssllpK.ltt....................hh.t.spl...s.I...h.csVF...csTLsM........I..s..cD.Fp-..aPEHR...spFapLLpulsppC....F..su.........llp.....ls......sspFKhll.DSl.hWAhKHspRsVsp..s...............GLphlhpLlpNhtp.................ptthuptFap...s.aahplLpclF.V.l.TDsp..Hp..uGhphpu..lL.tth.hpllp.s.....tlp.sl.s......................sNt.alp.palsphLpsuF.....splptt..........p....lp.Fl.pu..Lhp.tp..s...................................................hstF+tplRDFLlplKEas............................................................................... 0 131 195 269 +8601 PF08768 DUF1794 Domain of unknown function (DUF1794) Mistry J, Pollington JE anon pdb_2fr2 Domain This domain forms a beta barrel structure but the function is unknown. The GO annotation for this protein indicates that the protein has a function in nematode larval development and has a positive regulation on growth rate. 25.00 25.00 25.20 25.40 24.80 24.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.06 0.71 -4.37 55 758 2009-01-15 18:05:59 2006-05-09 15:33:03 6 9 575 7 271 552 113 161.40 28 72.68 CHANGED ssLsPLuhLlGsWcGc..GtuthPsh....p.sap...YtEclsFs.as....Gp.....shLtYp..t+................oath....ssGp....PhHsEsGahRh...psssp...................................l-hhlupPsGlsElhhGp......ssusplcLs.scu.....lspostu.+..pss...........................uhpRhaslh..ssssLsYshch.t.....shsp...s.Lpp+hsupLcRh ............................................tLhsLs.LlGsWcGc.....Gp.ushPs.............t.sapaspplsas.cs.........Gt.........sa..Lsap..sp.................................................oaph.............ssGp........Ph+pEs....GaaRh.....ssssp..............................................l-llhupssGhsElhhGp........hsss.p..l.cLt..ocu........lucostu.p......pss...........................shpRhasls.....pss.s.Ltas.ch..h.....ssss.....s.lpsahsApLcR................................................... 0 79 160 239 +8602 PF08769 Spo0A_C Sporulation initiation factor Spo0A C terminal Mistry J anon pdb_1fc3 Domain The response regulator Spo0A is comprised of a phophoacceptor domain and a transcription activation domain. This domain corresponds to the transcription activation domain and forms an alpha helical structure comprising of 6 alpha helices. The structure contains a helix-turn-helix and binds DNA [1][2]. 25.00 25.00 25.40 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.33 0.72 -4.17 28 674 2009-01-15 18:05:59 2006-05-09 15:37:23 6 2 490 7 111 516 13 102.40 58 44.53 CHANGED clTsllHclGVPAHIKGYpYLR-AIhhslp-sclLsulTKpLYPpIA++YsTTsSRVERAIRHAIEVAWsRGph-slschFGYTl.........................stp+GKPT.....................NSEFIAhlADKLR ................lTsIIHEIGVPAHIKGYhYLR-AItM.VhpDh-.lLuulTKhLYPsIAKKYs.TTsSRVERAIRHA.IEVAWsRGs.l-sIsplFGYTl...........................s.sK.uKPT.....................NSEFIAhlADKLR............................ 0 59 93 100 +8603 PF08770 SoxZ Sulphur oxidation protein SoxZ Mistry J anon pdb_1v8h Domain SoxZ forms an anti parallel beta structure and forms a complex with SoxY. Sulphur oxidation occurs at the thiol of a conserved cysteine residue of the SoxY subunit [1]. 20.00 20.00 20.10 22.10 19.50 19.30 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.87 0.72 -4.58 88 444 2012-10-03 16:25:20 2006-05-09 16:07:57 6 2 281 14 195 457 292 98.30 28 59.33 CHANGED hRl+......ssttGslspl+sLlpHsMEoGhRKDt.sGphlPscaIpplsspaNGcsVhsuphusulSpNPahpFphpuspsG..plplsWtDscGsshssptsl ...................lp......ttttGphhcl.+hllpHPM-oGhp+Dt..s.sphI...........PspaIppls.s.p.h.s.....G....p........V.h.ssphssulScNPhhpFphpsstsG..plplsapDscGsshssph........... 0 44 121 159 +8604 PF08771 Rapamycin_bind Rapamycin binding domain Mistry J anon pdb_1aue Domain This domain forms an alpha helical structure and binds to rapamycin [1]. 20.40 20.40 20.40 21.30 20.30 20.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.38 0.72 -3.92 38 401 2009-09-13 23:50:46 2006-05-09 16:26:34 6 35 280 10 270 412 16 98.30 52 4.38 CHANGED ELIRlAlLWaEhWa-uLE-.ASRhaF....s-cNhctMhssLpPLH-hLc+.sP-TlpEsuFhpsaGccLpcApcalppYppopc.hssLspAWclYapVF++Is+Q ...........................ELIRVAlLWHEhWHEGLEE.ASRLYF....G-+NlcuMapsLcPLHphL-+....GPp....T..L+Es..........SFs.........QuaGRDLtEAp-asppY..................pp..............o........ts.......ls-LsQAWDlYYpVFR+Is+Q.................. 0 111 162 236 +8605 PF08772 NOB1_Zn_bind Nin one binding (NOB1) Zn-ribbon like Mistry J anon pdb_2con Domain This domain corresponds to a zinc ribbon and is found on the RNA binding protein NOB1 (Nin one binding). 22.00 22.00 22.00 22.40 21.90 21.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.72 0.72 -4.15 37 331 2012-10-03 10:42:43 2006-05-09 16:52:55 6 6 292 1 235 333 15 73.50 42 16.00 CHANGED pI+pl+palLRC+ACaphsp....-hs+pFCPpCGss.TLp+Vulols..psGphplalppp.hphssRGspYSlPpPpuG ..........I+pl+salLRCHuCFpsTp.........chsKhFCP+CGst.TLp.+Vulols......csGphphHlppN.h.h...ssRGp+YSlPpPpuG..................................... 0 84 130 194 +8606 PF08773 CathepsinC_exc Cathepsin C exclusion domain Mistry J anon pdb_1k3b Domain Cathepsin C (dipeptidyl peptidase I) is the physiological activator of a group of serine proteases. This domain corresponds to the exclusion domain whose structure excludes the approach of a polypeptide apart from its termini. It forms an enclosed beta barrel structure composed from 8 anti-parallel beta strands [1]. Based on a structural comparison and interaction data, it is suggested that the exclusion domain originates from a metallo-protease inhibitor [1]. 20.20 20.20 20.80 20.50 19.10 19.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.78 0.71 -3.93 10 167 2009-01-15 18:05:59 2006-05-09 17:23:39 6 6 78 5 92 178 1 106.90 39 23.58 CHANGED DTPANCTY-DLlGTWlFpVucts....cpslsCSphssscpolsVsLpcLslAsD-aGNpGaFTLIYNQGFElslNcYKWFuFFKYcppGopVhSYCccThPGWVHDlLGRNWACFsupKl .........................................DTPAsCsa.-.lhGsWhhphu..t.................tp.ppl.sC..............s........t....s.....s....p..pph..h...lpLp..p.shAh......D.p........h...G.N..s..GpaTlIYNQG..FElsl.....Ns.....hK......aFAFFK.....Y...c..p...c..G....p...p......s.h..S..h.CpcThs..G..W.s........+..D..h......hs..ppa.uCahupK................................................. 0 42 50 67 +8607 PF08774 VRR_NUC VRR-NUC domain Iyer, L, Bateman A anon Iyer L Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.47 0.72 -3.98 77 1394 2012-10-11 20:44:44 2006-05-09 18:51:10 6 14 1169 0 380 1233 264 99.00 21 24.30 CHANGED hpctphpstlhpthp.phs........hhhhhhthuh......................h..........uh.sGhPDlllh........hsssp........................hhhlElKuPs.......s+lpcpQhphh.ctltpp.....GhpstVspstp ..................................................................p..hhp.hp.phs...........h.hh.hu.........................................................h.th..............uttsGhPDhll..h..................hspsp...................................................................hhhlElKuss.................s+lpst.....Qhp....hh.chlppt.....GhtVtVsps........................... 0 109 224 308 +8608 PF08775 ParB ParB family Mistry J anon pdb_1zx4 Domain ParB is a component of the par system which mediates accurate DNA partition during cell division. It recognises A-box and B-box DNA motifs. ParB forms an asymmetric dimer with 2 extended helix-turn-helix (HTH) motifs that bind to A-boxes. The HTH motifs emanate from a beta sheet coiled coil DNA binding module [1]. Both DNA binding elements are free to rotate around a flexible linker, this enables them to bind to complex arrays of A- and B-box elements on adjacent DNA arms of the looped partition site [1]. 25.00 25.00 28.10 27.70 24.90 23.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.64 0.71 -3.82 15 218 2009-01-15 18:05:59 2006-05-10 09:43:09 5 6 166 4 18 138 1 123.10 38 38.43 CHANGED FPVts-LoauDYphLh+lpcphpppphslspllpslppclcslps..phs.--tKspIhchIppchptlpstss+cps.plssLtpF-sKcpFARK+s..KGRphoYEFuRlsp-lQccL...DpAIcplLcc ...FPVts-LohoDYphLhclsEchppcsh.ol-pllpslpp..clpsl.s..ths.--tKspILclIppps.phLts.ssKsp....ssstLhpFc-KcpFARK+s..KGRtlsYEFuRlSp-lQcEl...DcuIpclLc....... 0 1 4 14 +8609 PF08776 VASP_tetra VASP tetramerisation domain Mistry J anon pdb_1usd Domain Vasodilator-stimulated phosphoprotein (VASP) is an actin cytoskeletal regulatory protein. This region corresponds to the tetramerisation domain which forms a right handed alpha helical coiled coil structure [1]. 20.70 20.70 20.70 20.90 20.50 19.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.97 0.72 -4.64 6 249 2009-01-15 18:05:59 2006-05-10 11:43:15 6 5 71 2 102 217 0 39.40 58 7.98 CHANGED -uhDhDRhKQEILEEVhRELpKVKEEII-A...hpQELp+huo .......-uhDhDRhKQEIL-EhR+ELpKlKEEIIDA....IRQELu+.so....... 0 16 26 55 +8610 PF08777 RRM_3 RNA binding motif Mistry J anon pdb_1owx Domain This domain is found in protein La which functions as an RNA chaperone during RNA polymerase III transcription, and can also stimulate translation initiation. It contains a five stranded beta sheet which forms an atypical RNA recognition motif [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.29 0.72 -4.18 15 306 2012-10-02 20:46:34 2006-05-10 12:05:32 6 12 140 1 190 739 8 99.90 29 22.53 CHANGED GsllchoG.lscsso.R-clKshFppau...cVtaVDapcGsppGtlRFcssps..AccAhcpss-u....pl.l+ctplsh-lLpG-cEccYhpKIlpcpppph..sp.+pKGR ........................Gsllchp....sc...ss....Rccl+phhs.p.hu...........clt....a.l......D.a......h.....c.......G.s.p......c.......G......h......lR......Fcpsps..Apcsl.pphpp...s....................t........h.h.....c....s.....p.p......h.......phcl.Lp..G...-tEccah....pK....Iht.....c.pppph...sp.+p+t............................................ 0 63 85 134 +8611 PF08778 HIF-1a_CTAD HIF-1 alpha C terminal transactivation domain Mistry J anon pdb_1l3e Domain Hypoxia inducible factor-1 alpha (HIF-1 alpha) is the regulatory subunit of the heterodimeric transcription factor HIF-1. It plays a key role in cellular response to low oxygen tension. This region corresponds to the C terminal transactivation domain. 25.00 25.00 26.80 27.50 23.50 22.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.75 0.72 -4.49 15 244 2009-01-15 18:05:59 2006-05-10 13:04:30 5 11 100 5 63 202 0 38.70 68 4.98 CHANGED hsshsLP...QLTRYDCEVNAPlpGpppLLQGEELLRALDQVs ......h-p.sLP...QLTpYDCEVNAPlQ..GspsLLQGcELLRALDQss... 0 5 14 28 +8612 PF08779 SARS_X4 SARS coronavirus X4 like Mistry J anon pdb_1yo4 Domain The structure of the coronavirus X4 protein (also known as 7a and U122) shows similarities to the immunoglobulin like fold and suggests a binding activity to integrin I domains [1]. In SARS-CoV- infected cells, the X4 protein is expressed and retained intra-cellularly within the Golgi network [2]. X4 has been implicated to function during the replication cycle of SARS-CoV [3]. 20.40 20.40 20.60 124.50 19.30 15.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.09 0.72 -3.82 2 79 2009-09-11 12:31:16 2006-05-10 13:19:27 5 1 75 2 0 18 0 83.90 97 68.92 CHANGED ELYHYQECVRGTTVLLKEPCPSGTYEGNSPFt.PhhsNtahl......hsFspssGoR+T.YQ.hscp.os+hF.h..phQ.chas ELYHYQECVRGTTVLLKEPCPSGTYEGNSPFH.PLADNKFALTCTSTHFAFACADGTRHT.YQLRARSVSPKLFIRQEEVQQELYS........ 1 0 0 0 +8613 PF08780 NTase_sub_bind Nucleotidyltransferase substrate binding protein like Mistry J anon pdb_1wwp Domain Nucleotidyltransferases (EC 2.7.7) comprise a large enzyme family with diverse roles in polynucleotide synthesis and modification. This domain is structurally related to kanamycin nucleotidyltransferase (KNTase) and forms a complex with HI0073, a sequence homolog of the nucleotide-binding domain of this nucleotidyltransferase superfamily [1]. 25.70 25.70 25.80 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.64 0.71 -4.35 40 469 2012-10-01 22:14:54 2006-05-10 14:57:29 6 5 340 14 148 413 71 119.10 30 86.46 CHANGED p+hssap+AltpLcculph...........p.hs-l.psGlIQpFEaTaELuWKhhKcaLch.pGh..plh.us...RsshRpAhptGLI...sDs-h.WhchlcsRNhTSHTYscchAcclhppIh.pYhsthppLhpplpp ..........................................................chpshppAlppLpcuhp....................p..schhhsGhIQ+FEaoaELuWKhhKchLph.pGh.....phh..us....+sslR.pAaptGLI...p-t..ch..WhphlcsR.NhTuHoY.sc..phA..p..plhppIh....pahs.hppLhpplp......................... 0 58 106 125 +8614 PF08781 DP Transcription factor DP Mistry J anon pdb_2aze Domain DP forms a heterodimer with E2F and regulates genes involved in cell cycle progression. The transcriptional activity of E2F is inhibited by the retinoblastoma protein which binds to the E2F-DP heterodimer [2] and negatively regulates the G1-S transition. 20.90 20.90 21.00 21.00 20.20 20.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.69 0.71 -4.51 24 312 2009-01-15 18:05:59 2006-05-10 16:38:45 5 6 136 1 166 279 7 135.20 48 35.77 CHANGED p-sppLchE+pc.thcRIcpKps.LpELl.QplAhcsLlpRNpphEpp.sssPs..osIpLPFIllpTsp+ssl-spISsDppchhFsFsso.FE.IHDDhpVLKpMuhshulcsstssspshphspohss....................hslpshlsthspspttp ....................................QEspsLEhE+p+.RhERI+pKpupLQELl.....lQplAFKNL.VQRN+psEpp..sptPss......sosIpLPFIllNTu++TlI-C.SISsDKhEYlFsFDs....T.FE.IHDDlEVLK+................MGhuhGL...-sGpCo.c.slphu+ohlP.....................psLp.Ylpth.t.....s........................................................... 0 54 85 121 +8615 PF08782 c-SKI_SMAD_bind c-SKI Smad4 binding domain Mistry J anon pdb_1mr1 Domain c-SKI is an oncoprotein that inhibits TGF-beta signaling through interaction with Smad proteins [1]. This domain binds to Smad4 [2] 25.00 25.00 25.80 30.30 24.60 18.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.42 0.72 -3.87 7 320 2009-01-15 18:05:59 2006-05-10 16:54:37 5 4 90 2 180 285 0 92.80 47 15.16 CHANGED SF+VYHECFG+C+GLFlPELYsuPsAuCIQCh-CRhMFsPpKFVsHSH+s.E.pRTCHWGF-SuNWRuYlhLspcYts+-cpupLpplL--lKtKFc ............uFcVhHEChhtC+GhFl.PphYsu.scAtCIpCh...Cshh.FoPpKFlhHSHR..s..s.....-..+......h.T...p..uhs.ssWRpaL..cLs........s.c.st..ppc.Ltphh--lKthFs............................. 0 38 54 115 +8616 PF08783 DWNN DWNN domain Mistry J anon pdb_2c7h Domain DWNN is a ubiquitin like domain found at the N terminus of the RBBP6 family of splicing-associated proteins [1]. The DWNN domain is independently expressed in higher vertebrates so it may function as a novel ubiquitin-like modifier of other proteins [1]. 21.70 21.70 22.30 21.70 21.20 21.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.41 0.72 -4.04 26 408 2012-10-03 10:59:06 2006-05-10 17:03:34 6 14 265 1 270 417 1 72.60 46 11.02 CHANGED laYKF+.Sp+c.hsplsFDGo.sIoVhDLK+cIlppp+LGcupD......F-LplhNspTpE.....E.Yp.DDstlIP+sooVll+RlP ....VaYKFp.Sph.s..asslsFDGh.tISVt-LK+pIhppc+Ls.cupD..........hDLpIpNupT.pE...........E.Ys.DDsslIP+soSVIlRRlP...................... 1 80 164 220 +8617 PF08784 RPA_C Replication protein A C terminal Mistry J anon pdb_1z1d Domain This domain corresponds to the C terminal of the single stranded DNA binding protein RPA (replication protein A). RPA is involved in many DNA metabolic pathways including DNA replication, DNA repair, recombination, cell cycle and DNA damage checkpoints. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.39 0.72 -3.60 23 349 2012-10-04 14:01:12 2006-05-11 10:35:05 6 5 269 16 223 404 8 100.20 20 37.20 CHANGED Hhhho+spstssuu...........sssssssuhsss.........ushsussspshs......u.hsshpppVLshl+p..tstsscGlphc-ls.ppL..phssscl+pAl-hLss-GpIYSTlD-p ........................................................................................tt.................s....s...st..ss.hs..ss...........ssh.s.sss.t..shs..........s.hsshp.ppVhshl+s......stsp-....G...lphpplt..ppl.......ph..........s.hs...plppul-hLhspG.hIYoTlD-p....... 0 67 119 185 +8618 PF08785 Ku_PK_bind Ku C terminal domain like Mistry J anon pdb_1q2z Domain The non-homologous end joining (NHEJ) pathway is one method by which double stranded breaks in chromosomal DNA are repaired. Ku is a component of a multi-protein complex that is involved in the NHEJ. Ku has affinity for DNA ends and recruits the DNA-dependent protein kinase catalytic subunit (DNA-PKcs). This domain is found at the C terminal of Ku which binds to DNA-PKcs [1]. 21.30 21.30 21.40 21.40 21.20 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.34 0.71 -10.58 0.71 -4.21 9 263 2009-01-15 18:05:59 2006-05-11 11:19:19 6 10 204 3 172 270 0 117.30 26 17.18 CHANGED GolNPspDFpsLlpp+sts..hpcAhsQhpp+IhpLlpss.psp.apKulpClhAhRctslh.upscpFNsFLppLpphhps+pLpcFWp.llsp........cploLIopsEupsSsVosEEAppFLs...ppE ................................sPht-Fc.t.hl.........p.........p.........p.......css.........hpc..............AspQhtshIpphlpss....tss...at+..u..h-sltshRcpslphp.E.........sphaNsFl.ppL+c....pl...........p....c....p.....h........p........cFW.p.hl.hp........p.plsL.Isp..pEsp....s......Sp.V...otc-A.p.p.Fh....t........................... 0 62 96 135 +8619 PF08786 DUF1795 Domain of unknown function (DUF1795) Mistry J anon pdb_1tu1 Domain This is a bacterial domain of unknown function. It forms an antiparallel beta sheet structure and contains some alpha helical regions. 20.90 20.90 21.00 21.20 20.80 20.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.42 0.71 -4.30 39 1088 2009-01-15 18:05:59 2006-05-11 11:40:40 6 4 617 4 112 378 4 129.40 29 78.06 CHANGED EGshsLPss.apDcolNlhhhsssts..shollloRssltsucslpsalpcplptl.pppLptaphhs......ppssplusp....suhplp.........hpap.ppsp.laQhQshhhhs.........pplLhhThTs..ssshssppcttatsllsSh ...........................................hshsL.P..us..hpD.......p.......S...............Nh...hVhoDstsp..psslVIst-.hs.s......-.sLsshsp+hh.sp..cs+.sphplhs......scslpLtsp.......h.pLD.............shhs...upsp.psa..p....shllh.ls.........splLshphTh..sss.ppptpsp.hpslIpoh........................... 0 8 30 70 +8620 PF08787 Alginate_lyase2 Alginate lyase Mistry J anon pdb_1uai Domain Alginate lyases are enzymes that degrade the linear polysaccharide alignate.\ \ They cleave the glycosidic linkage of alignate through a beta-elimination reaction. This family forms an all beta fold and is different to all alpha fold of Pfam:PF05426. 25.50 25.50 25.60 28.80 25.00 25.40 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -12.01 0.70 -4.71 70 350 2009-01-15 18:05:59 2006-05-11 13:04:19 6 15 175 10 153 347 22 224.10 25 77.76 CHANGED hDL...opWplslPh............ts....psspltspplts...saps..........haahssspus..hsFhsssp.....u....oossophsRoELREhh................s.sttsWshssss............phpAolpVspVsss............VlluQIHutp..............spPlh+lhap......ppG...........slhhthc................stttshthshhs...................sltLG.........ctFsaplp...lsss.....plplshsup....sptsh.....................................tsasspt..hYFKAGsYsQ...ssstsst.......................shupspFhpLpl.s+ ......................................................hslspWplplPh................ts.stplpsspl.s......saps..........haahssstss..lhFhs..sss.....us...sTtsupasRoELREth................s.p.tsWhhssss..p...............phpuslpVsplsss.t..........llluQIHutp.............sspPlh+l.at......psG...................slhhthc.................ssststthslhs....................sl.Ls.........cpF.saplp...lssu.....pl.sVphssp.....sttph.....................................tsassp...hYFKAGsYsQ...ssss.st.......................phucspahpLplt.............................. 0 22 98 137 +8621 PF08788 NHR2 NHR2 domain like Mistry J anon pdb_1wq6 Domain The NHR2 (Nervy homology 2) domain is found in the ETO protein where it mediates oligomerisation and protein-protein interactions. It forms an alpha-helical tetramer [1]. 20.10 20.10 20.10 23.90 19.80 19.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.45 0.72 -4.49 5 245 2009-01-15 18:05:59 2006-05-11 13:49:28 6 7 68 2 108 216 0 64.00 71 11.19 CHANGED pEEslDHRLT-REWA-EW+H..LDphLNCIMDMVEKTRRSloVL.RRCQEuDREELNYWtRRaSDuEEs+ ...............QEEhlDHRLT-REWAEEWKH..LDpl.......LNCIMDMVEKTRRSLTVL..RRCQEADREELNaWhRRYS-sE-h+............... 1 13 21 55 +8622 PF08789 PBCV_basic_adap PBCV-specific basic adaptor domain Iyer L anon Iyer L Domain The small PBCV-specific basic adaptor domain is found fused to S/T protein kinases and the 2-Cysteine domain [1]. 20.40 20.40 20.50 20.50 20.30 19.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.93 0.72 -4.29 24 80 2009-01-15 18:05:59 2006-05-11 14:12:19 5 8 13 0 5 80 1 37.90 44 19.68 CHANGED TG+lsuKGRtlacss+GtpYVhssu.KKVhVpchhpPpts ....TGKlsAKtRcVF+ssKG+sa.Vhps.s....KKVYVKKlhsPKt..... 0 0 2 4 +8623 PF08790 zf-LYAR LYAR-type C2HC zinc finger Mistry J anon pdb_1wjv Domain This C2HC zinc finger is found in LYAR proteins such as Swiss:Q08288 which are involved in cell growth regulation. 20.70 20.70 20.70 22.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.22 0.72 -7.50 0.72 -4.24 30 311 2009-01-15 18:05:59 2006-05-11 14:27:46 6 11 250 1 203 291 3 27.80 55 8.36 CHANGED aoCIDCspsF.stpsa+sHouCITEsEKY ..hoCIDCspsF..GssY+sHopCIoEspKY.... 1 70 114 167 +8625 PF08792 A2L_zn_ribbon A2L zinc ribbon domain Iyer L anon Iyer L Domain This zinc ribbon domain is found associated with some viral A2L transcription factors [1]. 22.90 22.90 22.90 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.84 0.72 -4.40 14 90 2012-10-03 10:42:43 2006-05-11 14:40:21 5 3 78 0 7 72 3 32.30 41 12.11 CHANGED pp.phCph..Cspstlh...pppshhhCh.Csssh.h ......sl+hCss..Cppss..ll...o-puYchClhCpslaph... 0 2 7 7 +8626 PF08793 2C_adapt 2-cysteine adaptor domain Iyer L anon Iyer L Domain The virus-specific 2-cysteine adaptor domain is found fused to OTU/A20-like peptidases and S/T protein kinases. The domain associations of these proteins indicate that they might function as viral adaptors connecting the kinases and OTU/A20 peptidases to specific targets [1]. 20.10 20.10 20.10 20.10 19.40 19.80 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.65 0.72 -4.07 38 112 2009-01-15 18:05:59 2006-05-11 14:53:58 5 10 28 0 2 116 7 36.90 36 19.63 CHANGED ppCpcF....pcsPs.hNPhTG+sI+hsGPsY+clhccCsss .......hCpcF....pcsPs.hNPhTGRsI+tsGPsactLtccCss.... 1 2 2 2 +8627 PF08794 Lipoprot_C Lipoprotein GNA1870 C terminal like Mistry J anon pdb_1ys5 Domain GNA1870 is a surface exposed lipoprotein in Neisseria meningitidis that and is a potent antigen of Meningococcus. The structure of the C terminal domain consists of an anti-parallel beta barrel overlaid by a short alpha helical region [1]. 25.80 25.80 26.50 26.30 24.80 25.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.00 0.71 -4.36 2 580 2009-01-15 18:05:59 2006-05-11 14:54:44 5 2 98 13 6 555 0 148.80 63 58.36 CHANGED QsHSAlsALphEplpss-+.sphlspRpFhluDluGEHTuFspLP-.G+ApY+GpAFuSDDAsGKLTYTIDFAAKQGHGKIEHLKoPE.NV-LAuu-lKsDcKpHAVI.GsshYsttEKGoYpLulFGspAQElAGSApVchuptl+cIGlAsKQ ......................................QsHSAlsALQhEp.lpss-+ssp.h..lspRp.Ftlu-IuGEHTuFspLPcsG+AsY+GpAF.....u..S.........D....D.........A.........G......G.......KLTYTIDFAAKQGaGKI...EHL.K.o...PEh.N...V...-LAu...A-............l.........Ks............D......EK....p..HAV.......I...SGs..s..hY.s....p.......s..E....KGoYpLulFGs+AQElAGSApVK..hsptl+cIGlAuKQ................................ 0 1 2 4 +8628 PF08795 DUF1796 Putative papain-like cysteine peptidase (DUF1796) Iyer L anon Iyer L Domain \N 21.60 21.60 24.50 24.30 19.90 19.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.09 0.71 -4.62 26 236 2009-01-15 18:05:59 2006-05-11 14:56:26 5 2 163 0 42 187 9 161.00 29 64.13 CHANGED hsthlSLGutCtsAhhLpKhsLR..shuhPFDahhs.olssltchlpscFsshhp.pth........................................t............h..pshhtallacshauh.sha-apss..s..s......chhtchp+Rhcphhpplpssp..........plhFlR.....ssh.shctlh-hhphltptsss.pshhlll.htppsts ...................pslhSLGppChsAhtLpph.pLc......shuushDahh....Sso...Lpp...Vs...pLLpNc.FscFhphpsl...............................................................p...............thh.psssphhl..cDshYsl.ohHDFtsshss.psh..........tY.ch+tphc+RlsRFlpplpssc..........slLFlR.....tss.s.h-EshpLpplLsphspp.phplLll........ss....................................................... 0 9 24 32 +8629 PF08796 DUF1797 Protein of unknown function (DUF1797) Mistry J anon pdb_2ffg Family This is a domain of unknown function. It forms a central anti-parallel beta sheet with flanking alpha helical regions. 21.40 21.40 22.80 39.90 21.10 18.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.19 0.72 -4.10 17 675 2009-01-15 18:05:59 2006-05-11 15:37:50 5 1 670 2 59 161 0 68.10 54 86.93 CHANGED IIsRLcuMtcst..suEsppRpFE+pG..hspVsasppsc...sapLcchpsccp...apFDsIDLlAIEIaDLL ..................IIsRLEAMtp....DGtshpRpFER-GVslspVuasccpE...hFpLc-scs+Eo...YpFDsIDLlAMEIY-LL. 0 11 31 45 +8630 PF08797 HIRAN HIRAN domain Iyer L anon Iyer L Domain The HIRAN domain (HIP116, Rad5p N-terminal) is found in the N-terminal regions of the SWI2/SNF2 proteins typified by HIP116 and Rad5p. The HIRAN domain is found as a standalone protein in several bacteria and prophages, or fused to other catalytic domains, such as a nuclease of the restriction endonuclease fold and TDP1-like DNA phosphoesterases, in the eukaryotes [1]. It has been predicted that this domain functions as a DNA-binding domain that probably recognises features associated with damaged DNA or stalled replication forks [1] 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.27 0.72 -4.12 51 781 2009-09-10 22:04:53 2006-05-11 16:15:26 6 29 554 5 366 662 33 100.50 19 15.98 CHANGED hGshpsps.......huhph......................hhphlph....uptlhlpRpsps.............................................hDpsA....................lpVtsss......ut............plGalPcchuphlus.Ll-pt...hhphcuhlh.........ssp....tthsh.pplhlhhpsh .......................................................................hth.......huhph...........................h.t.lp........uph..l.tlpREs.sNs............................................aDps.A.............................l+..V....sss.......st.............plGalscphAthlus.hhDpt....hhphpu.h.l......................................t.................................................... 0 103 231 304 +8631 PF08798 CRISPR_assoc CRISPR associated protein Mistry J anon pdb_1wj9 Domain This domain forms an anti-parallel beta strand structure with flanking alpha helical regions. 25.00 25.00 27.40 25.90 21.00 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.19 0.70 -4.58 50 667 2012-10-01 21:23:39 2006-05-11 16:20:22 6 2 628 14 132 457 17 206.90 36 97.62 CHANGED MYLS+lhLssppttstp...........hsssYshHphlhphFss.......t.ssptphLaRlE...........pttstsplLl.SsppPchst................sshshpscshhspLpsGpphpF+LpANPstpt.........................t.ttpup...chshhppp.pthpWLp..++upptGFplhs.............................hhphpshppcphpppp...........p.lphssVsF-GhLpVsDssthhpsLppGIG+uKAaGCGLLolss ......................................................................................................MYLSRlpLcs.sp...h....pstph............ps.YshHphLasLFPs............................spc+pFLaRhE..............phpsshphhllSpppPsto.......................shhslps+sF.hsp.LpsGppLpFpLRANPshsp.............................................................h..ptp.t.phc....sp.s.cup......cl.h...hhppp..stlsWLt...ppu-ps..G.F.sLhc.................................................ssVcu.hcpp..p.hc+cp..........ppphlphuoVcasGhLs.VsDP..s...hFhppLspGhG+u+AFGCGLhhltP..................................... 0 39 99 117 +8632 PF08799 PRP4 pre-mRNA processing factor 4 (PRP4) like Mistry J anon pdb_1mzw Domain This small domain is found on PRP4 ribonuleoproteins. PRP4 is a U4/U6 small nuclear ribonucleoprotein that is involved in pre-mRNA processing. 25.00 25.00 25.10 25.20 24.40 24.40 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.24 0.72 -4.67 61 535 2009-01-15 18:05:59 2006-05-11 16:32:06 6 15 264 2 398 508 1 29.90 51 6.74 CHANGED Vhp+LRplsEPIsLFGEsct-RpcRL+plh ...Vhp+LRplGEPIsLFGEsst-RhcRLRpl...... 0 135 213 321 +8633 PF08800 VirE_N VirE N-terminal domain Bateman A anon PSI2 target AAO76744.1 Domain This presumed domain is found at the N-terminus of VirE proteins. 20.60 20.60 21.30 21.50 20.20 20.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.43 0.71 -4.41 14 419 2012-10-02 15:26:12 2006-05-11 17:59:31 5 13 90 0 49 404 58 131.70 27 24.52 CHANGED -tpphtcp.LPtlhsuutacc..tcstpphptasGllhl-lc+Ls..pchstl+pphtthP.T..hhAFhusSG+uVKIhlhhst.-ss.h.t........p.tp...ap....utAYphssphYpthl...shsl-hps.slsphChhoaDP-sYa .............................tphKpp.LPhlsPuupFpc...cstpphtpasGllhlDlDcLs..sh.....p...E.h...tpl+p..ph..hp..tP...ts......hhsFhosSGcGVKlhl.....hh.s.h....s....sss..h............t........ap....tpuY.ph.ss..p..h..ap.hh......sh...plDh.p...s...t..slsRhCh.loaDPcsaa...................................... 0 20 45 49 +8634 PF08801 Nucleoporin_N Nup133_N; Nup133 N terminal like Mistry J anon pdb_1xks Domain Nup133 is a nucleoporin that is crucial for nuclear pore complex (NPC) biogenesis. The N terminal forms a seven-bladed beta propeller structure [1]. This family now contains other sized nucleoporins, including Nup155, Nup8, Nuo132, Nup15 and Nup170. 23.70 23.70 23.70 23.80 23.60 23.50 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.45 0.70 -5.73 57 599 2012-10-05 17:30:43 2006-05-12 09:01:07 6 12 283 1 388 605 4 407.80 16 33.61 CHANGED sp.stapshphhs.hPstlhpphsps...........................p..spsuhhschshuhhsscsclhlWs.Y..................................t.ssph.h........................h.st.ppslsul..........sl.VpPps..G..hahpslsahhslusshp......lhlhh.........................lhssphssphhsspsss.hlhssssGRlhhhuhps..sh.pl..ph.pt.ps.......psst.shstsuh.usl....hss..........................................tt.cs.lsplpssppc.....phlashoscuhlphaplp.....sspthpp.......lhphhtpth.pth.......................................s.....phcllclpslstp.....................psthhhLluhspsusph.hthh.h......................................h...h...........................................p..hh.ss......h.t.phps......thhs.tlalsssspps...................................hlhssssshs..................hhp...pt...........hEsshhhp.s..................................hss..uhpp..............p.p.....ps.p.hhlhpstGlhh .......................................................................................................h...ph.s.lPstlhc..thsth...........................pstphhul.h.sphshAWlshcsclhlWsY...........................................p.stph.h.............................h..ss.spsl.uV...............sl.ltsps..G..hahss.lp...ahhs.lssshp.......h.l.s.....................................................................lhssthh.....s..h.hsss..sts..h..lhuossG.Rlhh.........hs.h.s.s.........slapl.......th..pttps.......................phpthshst.uu.l.u.l......hss......................................................................st.cs..lspl...th-..ps+........phlaslospu.hlp..haclp.....sspthtt.......lhp.h.pst.hhtth.................................................s.t.p.phpllsl...p..slsss......................ts...h.htLlu....h...s.ps.uh.phahshssh.........................................................t.h.h..h..............................................th.s..s..............s....p.pt.......hhs..t.hhl.hh.s.pps....................................hhhhsssshs.....................t........................-.shhhht........................................................................................................s.t.hhhhps.Gh....................................................................................................................................................................................................................................................................................... 0 119 200 315 +8635 PF08802 CytB6-F_Fe-S Cytochrome B6-F complex Fe-S subunit Mistry J anon pdb_1q90 Domain The cytochrome B6-F complex mediates electron transfer between photosystem II (PSII) and photosystem I (PSI), cyclic electron flow around PSI, and state transitions. This domain corresponds to the alpha helical transmembrane domain of the cytochrome B6-F complex iron-sulphur subunit. 23.30 23.30 23.50 23.80 23.20 23.10 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.69 0.72 -3.95 19 215 2009-01-15 18:05:59 2006-05-12 09:15:23 5 3 170 9 83 203 111 37.50 46 19.78 CHANGED D.VPDMu+RplhNLLhhGulussssuhLhPhspaFlPPu .....VPDMuRRQhMNL.LhhGslohsAsGhLhPhspaFlPPt.... 0 20 54 73 +8636 PF08803 ydhR Putative mono-oxygenase ydhR Mistry J anon pdb_2asy Family ydhR is a homodimeric protein that comprises of a central four-stranded beta sheet and four surrounding alpha helices [1]. It shows structural homology to the ActVA-Orf6 and YgiN proteins which indicates it could be a mono-oxygenase. 27.10 27.10 27.20 28.40 26.60 27.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.19 0.72 -3.91 23 612 2009-01-15 18:05:59 2006-05-12 10:05:35 6 2 606 6 47 201 70 96.50 66 94.04 CHANGED LLQlDFsasGP.FG--hupuhpsLAcSIspEPGhIWKIWTENppspcAGGIYLF-scsuApsYLpMH.oARLpsh.Glsclpu+lFDlNpsLopIs+uPl ...............LLQlHFsFsGP.FG.-tMscQLp..sLAESINpEPGFlWKl......WTESEKN+EAGGIYLFp-EcoA..A.YLEKH..TARL.KsL.GV-EVsuKlF-VNEsLopINpupL................... 0 11 28 37 +8637 PF08804 gp32 gp32 DNA binding protein like Mistry J anon pdb_1gpc Domain gp32 is a single stranded (ss) DNA binding protein in bacteriophage T4 that is essential for DNA replication, recombination and repair. The ssDNA binding cleft of gp32 comprises regions from three structural subdomains [1]. 20.10 20.10 20.90 22.50 19.60 19.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.26 0.72 -3.87 10 98 2009-01-15 18:05:59 2006-05-12 11:05:39 5 1 90 4 0 87 390 82.80 51 31.51 CHANGED DccEWKLchDsuGNGpAVIRFLPuKs-Es.LPFVKLlNHuFKcNGp.WYIENCsSTHGDaDsCPVCpahpps-L.a.....Noss-ctphhup.hKRKtSYW ............Dcp.WKLchDsu.GN.GpAVIRFL..P.......u...........s....t-p........hPasplhsHuF+ts....Gt.WYIENs..oThGc..ssPVspa..ppcL.a.....Nsspc..p...p........KRKhuaa.......................... 1 0 0 0 +8638 PF08805 PilS PilS N terminal Mistry J anon pdb_1q5f Domain Type IV pili are bacterial virulence-associated adhesins that promote bacterial attachment to host cells. In Salmonella typhi, the structural pilin protein PilS interacts with the cystic fibrosis transmembrane conductance regulator [1]. Mutagenesis studies suggest that residues on an alpha-beta loop and the C terminal disulphide-bonded region of PilS might be involved in binding specificity of the pilus [2]. 20.80 20.80 20.80 20.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.73 0.71 -4.64 18 332 2012-10-03 10:38:27 2006-05-12 11:51:41 6 3 257 5 49 317 16 137.50 30 72.62 CHANGED sscsssEpsNlsslhsss+u.hKus.uuYs...uushsssLlphtulPssMss..sGs......slhNsWGGsVTVsss.....usst.oFolThssVPpssClsLsTplusu...hsshsIsus......shssu..plssssAs....osCsus.....sNTlsaTos ........................................................................tpstpEtsNlpsIhsss+u.hh.pus.suYs.......suphsssLlp.hsshP..ss.Mhs....sus..........slhNuW.GG.s.VT.luss.........usss..uFTlT...h...s...sVPpcsClpluT.t.huss.....htsh...s...lsus..................sh.su.........lssssAu......ssCsss.....sNolsaT............................................ 0 13 27 37 +8639 PF08806 Sep15_SelM Sep15/SelM redox domain Mistry J anon pdb_2a4h Domain Sep15 and SelM are eukaryotic selenoproteins that have a thioredoxin-like domain and a surface accessible active site redox motif [1]. This suggests that they function as thiol-disulphide isomerases involved in disulphide bond formation in the endoplasmic reticulum [1]. 21.20 21.20 21.40 21.70 21.00 20.60 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.43 0.72 -4.23 12 179 2009-01-15 18:05:59 2006-05-12 13:10:50 6 2 121 2 112 204 1 73.00 33 48.92 CHANGED putl.pssGU+LsphPcVctFlpsDh...sha.sLphKalhGucP.lhLhDcpsp.hEc.lslschspDclpphltp+shh .............................s.ttl.ps..sh..p....p..h.sthPp...l.puFlpp-h...tha...slplKalhGuc.PhLhLLD.c.pGp..tEp...lsl.pcW.sp-plppalpp+hh............... 0 44 62 91 +8640 PF08807 DUF1798 Bacterial domain of unknown function (DUF1798) Mistry J anon pdb_2ets Domain This domain is found in many hypothetical proteins. The structure of one of the proteins in this family has been solved and it adopts an all alpha helical fold. 25.00 25.00 40.00 65.50 21.00 19.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.43 0.72 -4.35 19 391 2009-01-15 18:05:59 2006-05-12 13:39:28 5 1 391 5 32 144 0 109.40 44 93.99 CHANGED hTppLlptscchhp+Y.ps..+cpspcaDFappVKPas-chcphlcpWpphAhpalppt+PcYl+tpQl-tsh-Nhpplslpuaas+spcKRFh-hhcSlpYsLphlh-tlt.p .......s.sEpLl.cssphpppapps..Kpptp-aDFYpsVKPas-clDshLschK.ht.hhIch....Yhsspph-hlhsNlppluVpCaap+os+KhFlE+hcSlpYsLQNIl-tlt.Kc... 0 8 18 26 +8641 PF08808 RES RES domain Bateman A anon PSI2 target CAE41587.1 Domain This presumed domain contains 3 highly conserved polar groups that could form an active site. These are an arginine, glutamate and serine, hence the RES domain. The domain is found widely distributed in bacteria. The domain is about 150 residues in length. 20.50 20.50 20.80 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.51 0.71 -4.16 184 1322 2009-01-15 18:05:59 2006-05-12 14:02:02 6 8 874 0 452 1175 109 164.20 15 70.86 CHANGED plaRlptt.............................ph.t..s.............s......s.Rass...........s.uhssl.Ysuts.....hssAlhEs.......................................hhhsthhhtththsshthttth....t........t.h....htshsshts.t............thspthsp...thtt.sh........................ulhhsS...sh................s.....s..stslslhsst............................tthphhpspththths .................................................................................h...................................ph....s.........t.hh..hG..u.Rass..............t..shs.sl..Ysups.....hpsAlhEs.........................................................h.h..................................sthh...sthh...ht.th.t.hssh.h...thtph.......hph...............h..........h......th..ts.sh.tt..........................h.hspt...hup.......thp..sh..............................t.......GlhhsS......sh..............s......s.....shslslas.t....................................................................................... 1 115 273 368 +8642 PF08809 DUF1799 Phage related hypothetical protein (DUF1799) Bateman A anon PSI2 target CAE43631.1 Domain Members of this family are about 100 amino acids in length and are uncharacterised. 20.80 20.80 21.10 20.80 20.30 19.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.01 0.72 -4.18 15 119 2009-01-15 18:05:59 2006-05-12 14:06:48 6 3 110 0 32 116 31 77.20 31 69.89 CHANGED LtthG..hp.-Da..c..psshEVWPEN.htAhplFtuhuTQWRs........GssGshGLDYusl.shhchhGlctEppp-lassl+lhEppALchl ...................h...................hplWP-s.h.uhplFhshuTQWRs..................Ghu.G.s.sGLDYusl.tlhchh.sl.p.scpp.tlhsclRlMEttALphh...................... 0 2 17 26 +8643 PF08810 KapB Kinase associated protein B Mistry J anon pdb_1y71 Domain This bacterial protein forms an anti-parallel beta sheet with an extending alpha helical region. 20.00 20.00 20.00 41.60 19.80 18.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.34 0.71 -4.03 10 347 2009-01-15 18:05:59 2006-05-12 14:14:48 5 1 347 2 28 126 0 112.60 61 88.34 CHANGED YKTGsYhGpIpE-+spp..hLVcVcAVlKHPpQGDLHNPsQs.-ssFFHERKALuahEKphls+utV+sa-s.ElPsYs-SLppAlschcs+Lps-soc...aAppSLcsLcpL+c-Ytl .HKTGsYuVsIsE-sss.s..lLVKVcQVIKHPKQGDLHNPsET.-sVFFHERKALSaaEKRaspcSpL+cFNs.-.lhcYEDSLQpAlocLEspL+tpp.oc...aAchSLsoLscLKcDYsL.. 2 9 17 24 +8644 PF08811 DUF1800 Protein of unknown function (DUF1800) Bateman A anon PSI2 target AAK23953.1 Family This is a family of large bacterial proteins of unknown function. 25.00 25.00 26.60 26.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.46 0.70 -5.56 77 530 2009-01-15 18:05:59 2006-05-12 14:19:30 6 11 396 0 221 585 994 467.30 24 84.11 CHANGED tphLsRhsaGsp...suplsphhp.h.......uhpsalppQl...............stsss..sshhtt.hsphs....................................................................................................................................................ttptttcpt..t........................................hh.t..tuhhtpAlhus.spLpERhshFWpNHFsVSspc...s.h.phhssshpp-slRsauhG.......pFc-LLtAlsppPAMhhYLDshps......................................s.t.......ctt...t.NENaARElhELaTLGl........................s..uG.....YoQpDVpphARshTGWslsstt..........................tsshahapsph........H-susKs...............lL..Gpshs.s..............sG.tcs..ptsL-hLspHPsT..ApFlup+Lhp+FVu.....Ds.PssuhVpRlApsFp...po..............cGDltAVlpslltssEhhss..........tsK.....l+sPhchhlushRshshp.sss..................................................tthhshlspLGQshatsso..........PsG......as.sssuWsuspthltRhphstpl..usthhsshh.............................sspshtt............hh.thhss...shospop.psltps.sttt................................hlslllsSPEF.hc .....................................................................................................................................................................................................................hhLpRhsaGsp...ss..plsth...ht..h........shpthltt...l......................ts..t.....sshh.....tt...httht.......................................................................................................................................................................................t.ptthpp..........t....................................................................................................htphtt...th....lt..phlh..s..s.......s......tLpERh.shFWpsH.Fslutpp...s.h.ph.......hhh.hppphlRtpA.hG..................sFp-LLtAlspcPAMLhYLDstps..........................................................................................ppttsNENaARElMELaTLGl.......................ssG........YoppDVpphARshTGWshss......................................tstahapsp.h........H-sGsKs........................................lL.Gp..s.h.t................tG.tcsctsLchLhp..+..P....s..T..ApFlup+LhpcFVu.............-s....P...........s.........su.hV..p+lAssFp...ps....................sGDltsllpsllpsschhss....h...............ss+lKsPhchllushRshshs.sss...................................................t.h.hs...h.h.ppLGQ.shatsss.......................................PsG......a......s.........s.....s...suWlsssshltRhphsttl...sst.hhtt.h....................................................................................s.tth..t...............................................th.lts....t....hssps.t.tsltpt..st.p..........................................................................hhthlhhuP-a.h..................................................................................................................................................... 0 79 146 186 +8645 PF08812 YtxC YtxC-like family Bateman A anon PSI2 target YtxC B.subtilis Family This family includes proteins similar to B. subtilis YtxC an uncharacterised protein. 20.80 20.80 20.80 21.40 18.90 20.70 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.41 0.70 -4.84 33 299 2009-09-10 22:38:25 2006-05-12 14:27:35 6 1 295 0 79 225 0 214.20 31 73.96 CHANGED hc.hhlusllschllpph-cchltclIpcsYhahcp-EpppIhch.uppILcs-ppth..pph.phs++s.Ihcclt-hlp-s.splsl-GFlsFRL+-YhccLcchl-pAl-EYhhE+EYpEFIcLL+aFV-hQcs+lctVallhs.stpahLaDcctcplss-hlpphhsc..lhppslsh-DlLISsLIolAPccIhlast-psc...phlpTIpsVFp-RVph ..............................................................pthlhsslsphIlp.hhppchlhpllpcpaa.ahc.p-EpppIhph..upplLcscppt...h......t....p.h.phs.............pc.shIhsplpsalp-s.....phshsuFlpFRL+sYhcplpcls-hAlDEYhhEpE...YppFIch....L+.aVc..pcs+lspV+llhc.p.sFhlaDccscclppc.pltphhcc..htppsl.h-..shlIusLloluPc+Ihlasc.cps.p.....phlpTlcsVFp-RVp.h.................................................... 1 38 64 69 +8646 PF08813 Phage_tail_3 Phage tail protein Bateman A anon PSI2 target CAE43633.1 Family This family of proteins include phage tail proteins. They probably include bacterial Ig-like domains related to Pfam:PF02368. Which also includes a number of phage tail invasin proteins. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.90 0.71 -4.85 12 222 2009-01-15 18:05:59 2006-05-12 14:32:47 6 3 207 0 34 177 8 149.50 24 74.81 CHANGED SuWspLss+ss+ssssossshsl-GIDTocst.assG.Ghuphh.lsoWs-lspVpslupsGG-QQFhsaphLpD.D.+cpQIPThKSAhshThThAa-.shsaatsLcpAD-s+pshslRhplPsusph.hasuYhuFscsPohshNthhsholslSltuc.ThhAu .........................................................................................h..h..s..sapcl.sp.lp..-lspsGu-tphlphshLpD...s..hp..p..phss...hps.Asshshsh..uac.s..s..p..s..s..ap.sLct.ss-scp.hhshchth.Ps.G..s....ps...hapuhlSh.c.....hs..ssssNtlhsholslslpu.c..shh..s....................................................... 0 3 13 27 +8647 PF08814 XisH XisH protein Bateman A anon PSI2 target ZP_00111899.1 Domain The fdxN element, along with two other DNA elements, is excised from the chromosome during heterocyst differentiation in cyanobacteria. The xisH as well as the xisF and xisI genes are required [1]. 25.00 25.00 26.10 26.00 21.50 20.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.64 0.71 -4.16 8 139 2012-10-11 20:44:44 2006-05-12 14:41:39 5 2 37 2 44 187 1 116.50 41 95.22 CHANGED MsAKDlaH-sVKsALhKDGWtITcDPLhl+hGc.sslaIDLuA-KlIAAE+pspKIAVElKSFlu.sSpIsDF+sALGQaIsYRhlLcsp-sERlLYLAlscssYcsFFppchsQhllpcpplpLllaDsEpEpIlQ .................MsA+DlaHpsV+pAL.K-GWhITcDPhhlp.hst.hphhlDLuAE.......+..........llAAE+ptpKIAVElKSFlu..S.lp-hcpAl..GQal.Y+hhLpt.p..-P-RhLYLAlspshYpsaFpp.hhphhlpc.plpLllassppE.Ih..................... 0 11 31 44 +8648 PF08815 Nuc_rec_co-act Nuclear receptor coactivator Mistry J anon pdb_2c52 Domain This region is found on eukaryotic nuclear receptor coactivators and forms an alpha helical structure. 19.80 19.80 19.90 19.80 19.60 17.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.48 0.72 -4.43 10 226 2009-01-15 18:05:59 2006-05-12 15:38:57 5 16 42 2 81 169 0 49.10 64 3.62 CHANGED EGpsDE+ALL-QLsohLsstD..uLEEIDRALGIPcLVsQutsh-...-pF ...EGpsDE+ALLDQLhohLsspD....GLEEIDRALGI.P-LVs.Q..uQul-s..-tF...... 0 4 9 30 +8649 PF08816 Ivy Inhibitor of vertebrate lysozyme (Ivy) Mistry J anon pdb_1gpq Family This bacterial family is a strong inhibitor of vertebrate lysozyme. 19.90 19.90 21.00 21.00 19.10 19.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.63 0.71 -4.12 17 506 2009-01-15 18:05:59 2006-05-12 15:59:04 6 2 482 7 45 183 8 116.00 56 74.15 CHANGED Lsppssa+ssWppMlpupppLPpWlppstGous.PhpslshsGppYlVGshCKPHD.CuupphhVhautDccp...AaGlhVplspt..sshcsPocaAsapWLGpPscs........hpuhLpppLc.pcPN .............................LAKucsTKAAFNQMVQGHK..LP..A....W.VMK.G.GThT....PAQTVTLGDETYQVMSACKPHD.CGSQRIAVhWSEKSsQ...MoGLF........SoID...EKTSQEKLTWLNVs...DALSID.GKTVLFAALTGSLE.NHPD............................ 1 8 17 29 +8650 PF08817 YukD WXG100 protein secretion system (Wss), protein YukD Mistry J, Desvaux M, Burroughs AM, Iyer LM, Aravind L anon pdb_2bps Family The YukD protein family members participate in the formation of a translocon required for the secretion of WXG100 proteins (Pfam:PF06013) in monoderm bacteria, with the WXG100 protein secretion system (Wss). Like the cytoplasmic protein EsaC in Staphylococcus aureus, YukD was hypothesized to play a role of a chaperone. YukD adopts a ubiquitin-like fold [1]. Usually, ubiquitin covalently binds to protein and flags them for protein degradation, however conjugation assays have indicated that the classical YukD lacks the capacity for covalent bond formation with other proteins [1]. In contrast to the situation in firmicutes, YukD-like proteins in actinobacteria are often fused to a transporter involved in the ESAT-6/ESX/Wss secretion pathway [6,7]. Members of the YukD family are also associated in gene neighborhoods with other enzymatic members of the ubiquitin signaling and degradation pathway such as the E1, E2 and E3 trienzyme complex that catalyze ubiquitin transfer to substrates, and the JAB family metallopeptidases that are involved in its release [7]. This suggests that a subset of the YukD family in bacteria are conjugated and released from proteins as in the eukaryotic ubiquitin-mediated signaling and degradation pathway [7]. 27.00 27.00 27.10 27.00 26.70 26.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.57 0.72 -3.42 54 869 2012-10-03 10:59:06 2006-05-12 16:23:53 5 3 548 2 159 472 11 79.60 27 25.86 CHANGED shscVslph........sp....p.thDlsLPsplPlppllsslhphls..........hsshs...........tss....phpLs....tsGt..hLstspoLsptsVtDG-lLhlh ..............hscVTlth........sp.....p.phDlslPutlPlcsllstllchls..............hshh-.............sss.h..phplt....spGt..hLstspsLs-hslsDGDlLtL....... 0 48 107 141 +8651 PF08818 DUF1801 Domain of unknown function (DU1801) Bateman A anon PSI2 target AAO81511.1 Domain This large family of bacterial proteins is uncharacterised. They contain a presumed domain about 110 amino acids in length. 27.20 27.20 27.20 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.52 0.72 -3.93 150 1704 2009-09-12 22:25:29 2006-05-15 11:59:35 6 10 1072 5 545 1425 322 100.10 20 67.93 CHANGED tt+pthppL+pll.tpsssp....h.pcplpa.......uhPsath..ttp................hsthss..hKp..alulhh....hpss.hp.........c..tt.l......ptphs+shhphpph.p.cls....hphlpphlppslp ...............................hppthppl+pllhpsssp................l.pEpl..pa........shPsash.sup...............sllthps.......hKp....alulha........tpu..u.lp..........c..tphl.t........phpt.s+sh.chp.ph....p.pls.....hchlpphlptsh.t................................. 0 237 402 491 +8652 PF08819 DUF1802 Domain of unknown function (DUF1802) Bateman A anon PSI2 target Domain The function of this family is unknown. This region is found associated with a Pfam:PF04471 suggesting they could be part of a restriction modification system.. 25.00 25.00 57.80 45.90 24.20 19.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.02 0.71 -4.66 27 227 2009-01-15 18:05:59 2006-05-15 13:54:39 6 3 145 0 55 187 29 162.40 53 84.40 CHANGED hALKEWssslcALtpGcphlLlRKGGIpEt.p..F....psptppFlLaPTh.Hpps....ctl+scapshlpts..st.tscplplpuaAplsssh.lss....ptlppLpshaIWstctlp.pRhpa+spp.lhlLlLRla.LscPhplshssp.atGCpSWlsLsp.l..shpsspPVlsDppasphtppl .........s.ALKEWuAAV+ALl-GRQoVLLRKGGItE+.R....F........cVAu+cFLLFPTVuHoHA..............ERVRPEH+DL.....LsPA....AADS..T..-E..s..VlLRAuA+VVAAlsVsR...PEu..L-AIEcLHIWTAESVRuDRLDFRPKH+LsVLVVpshsLsEP....Vc..lscpP-.YuGCp........SWVpLs......l...ssphutPVhs-sshschstc.h............................. 0 22 41 53 +8653 PF08820 DUF1803 Domain of unknown function (DUF1803) Bateman A anon PSI2 target AAO81393.1 Domain This small domain is found in one or two copies in proteins from bacteria. The function of this domain is unknown. 23.70 23.70 23.80 24.50 22.20 23.60 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.21 0.72 -4.28 14 715 2009-01-15 18:05:59 2006-05-15 15:21:35 5 2 417 0 49 356 0 91.70 36 69.98 CHANGED lpsa.hphhpppPh......hhcLl.....pYhhca..sshlL+.l++cashpc+hDhhl-shlthGYIhp.Es++YpLshsl...........................................Dp-uhhath ...........h.p.h..phhpp.sh......hhsll.....cYhhcY..sshlL+.lK+phs.ppshDhhl-phlthGaIlp.Es++YpLshsh..............................s....................t.......................... 1 5 11 29 +8654 PF08821 CGGC CGGC domain Bateman A anon PSI2 target AAB98576.1 Domain This putative domain contains a quite highly conserved sequence of CGGC in its central region. The domain has many conserved cysteines and histidines suggestive of a zinc binding function. 21.50 21.50 22.90 37.20 21.10 20.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.95 0.72 -3.97 31 185 2009-01-15 18:05:59 2006-05-16 15:44:05 6 2 147 0 77 174 5 106.50 31 88.27 CHANGED KluIlpCppsp-..h.CsG....ssCFKuhpp+puuFppap.p..sElluFhsCGGCs.........ucclhppscphh.cpss-....slHLuoChhts.............CPp..hcphpchlppch..sl.pVltGTH .....KluIltCppsps..h..CsG....ssCh+uhpp+puuFppYsp.c.h-lluFhsCGGCs.........upplh....p......pscph......h...cps.s-......sIHluoCht...ptt..............CPp..hcphpchlpcch...Gh.plVpGTH........... 0 37 67 74 +8655 PF08822 DUF1804 Protein of unknown function (DUF1804) Bateman A anon PSI2 target CAB84459.1 Family This family of bacterial protein is uncharacterised. 29.00 29.00 29.60 31.50 28.50 27.30 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.76 0.71 -4.51 12 154 2012-10-04 14:01:12 2006-05-16 15:50:41 6 2 139 0 18 111 2 157.70 37 93.63 CHANGED M.AHspEsRctlRphYVhsphoLEhAAhpsGVshuTARRWKpcA+spGDDWDKsRAA.tlAuGGlE-luRphLssallQapsTMctLp...pspslssuc+sclLASLuDuasKTluAs...+RlhPETscLAsAl-llphlusalpp+aPpHl.AasElLEPFG.plpKca .....................MAaspch+cthRchYlhsphsLptuAthhsls.sTARpWKptsctcG...DDWDKhRsAphLAu.ssl-clupulltuFh...hQhpssh-plp.......sspchsspcKschLAuLuDuFoKhhuuu...++lhP-oscLusAhcllc.h.lhualppc+Pcplsshl-lLEshu..ltc............................. 0 4 9 15 +8656 PF08823 PG_binding_2 Putative peptidoglycan binding domain Bateman A anon DUF1028 C-terminus Domain This family may be a peptidoglycan binding domain. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.59 0.72 -3.88 9 96 2012-10-01 23:43:47 2006-05-16 17:46:52 6 6 94 0 51 368 46 69.20 26 22.05 CHANGED hhhcp.cs.tphslscDhtsplpsuLtcLGY...........ppthtcALpsalthpNFEs+.............hpscsp.IspsVhpaL ..........................................pstphhslsschtpplpphLpchGa..ht..t.....s........h.spshppALpsahuhENhEpR.....................htsss..ID.tVLphh.............................. 0 23 36 46 +8657 PF08824 Serine_rich Serine rich protein interaction domain Mistry J anon pdb_1z23 Domain This is a serine rich domain that is found in the docking protein p130(cas) (Crk-associated substrate). This domain folds into a four helix bundle which is associated with protein-protein interactions [1]. 21.00 21.00 21.00 21.20 20.60 20.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.95 0.71 -4.52 12 250 2009-01-15 18:05:59 2006-05-17 14:37:00 5 8 78 2 110 232 2 155.60 43 19.69 CHANGED LDL-sAhEpLs+LQpplsSSVutLhsFVu.....ssWRshsp.hEsslpcl+tAs-+lctul+-hL-Fu+GshusAsphsDpsLpsKl+cQLQplc-uaQhLlctppsLDsss.WohphLsts.tsps.....ssDDL-RhlhsARslP-DsKphAShltuNupLLF+Rssp ..............L-L-sAlEpLtRLQ......pslssoVupLhshlu.....ssWRs.hs....hEsplp-l+sAls+Vcsul+-hL-F.A+GAluNAup.....hsD.......psLpsKLp+QLQ+lEDuaQhLhppupsL-sss..WulshLshs...t..s...............ssDDLDRhVhsuRsVP-DsKQLsShl......p............uNApLLF++s..s........... 1 20 31 61 +8658 PF08825 E2_bind E2 binding domain Mistry J anon pdb_1y8x Domain E1 and E2 enzymes play a central role in ubiquitin and ubiquitin-like protein transfer cascades. This is an E2 binding domain that is found on NEDD8 activating E1 enzyme. The domain resembles ubiquitin, and recruits the catalytic core of the E2 enzyme Ubc12 in a similar manner to that in which ubiquitin interacts with ubiquitin binding domains [1]. 20.30 20.30 20.40 21.90 20.20 19.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.69 0.72 -4.22 24 285 2009-01-15 18:05:59 2006-05-17 16:11:40 5 11 231 29 198 288 4 85.70 38 19.97 CHANGED lphssshTLp-lI-pLscpschQl+pPSls.....sss+sLYhtss..spLEctT+sNLs+pLt-L.lpcGpElsV.........oDsshs.hshplplpFp ..........hphssssoLp-ll-hL..sp.ps..ph..QhKsPulo.......ucs+oLYhpss..sslEE..pTRsNLsKsL....p....E.....L.....l.......t-GpE........lsV.........sDsshs..tshph+LpF............... 0 70 109 164 +8659 PF08826 DMPK_coil DMPK coiled coil domain like Mistry J anon pdb_1wt6 Domain This domain is found in the myotonic dystrophy protein kinase (DMPK) and adopts a coiled coil structure. It plays a role in dimerisation [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.93 0.72 -4.07 6 271 2009-01-15 18:05:59 2006-05-17 16:41:07 5 15 56 3 115 239 2 59.70 48 4.79 CHANGED ELQSALEAEIRAKQulp-ELpKVKsuNlshEsKLp-oEsKNpEL.pElcpL+K-MEE.hRuc ..................ELQSAL-AEIRAKQulQEEL.p.c.V+suNlphEs+L+-uEt+Np-L.pElcpLpcchE-.hRuc........ 0 12 19 55 +8660 PF08827 DUF1805 Domain of unknown function (DUF1805) Mistry J anon pdb_1qw2 Domain This domain is found in bacteria and archaea and has an N terminal tetramerisation region that is composed of beta sheets. 21.40 21.40 21.40 30.20 21.20 21.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.46 0.72 -4.17 18 202 2009-01-15 18:05:59 2006-05-17 17:03:56 6 1 201 1 63 138 3 62.30 62 62.88 CHANGED hCGhLsVsshp+......hG.sAu+Vp......GVcTl-DhLpuplhsloptAccLGlcsGMoGcEALp+h .....................MCGALDVuLLNEKL..tDRGIIAGRAV......GVRTIEQLLEAPLESVThtAEsLGIpsGhhG+EALLKM. 0 19 45 50 +8661 PF08828 DSX_dimer Doublesex dimerisation domain Mistry J anon pdb_1zv1 Domain Doublesex (DSX) is a transcription factor that regulates somatic sexual differences in Drosophila.\ The structure of this domain has revealed a novel dimeric arrangement of ubiquitin-associated folds that has not previously been identified in a transcription factor [1]. 20.70 20.70 20.90 39.70 20.30 20.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.98 0.72 -4.44 11 105 2009-01-15 18:05:59 2006-05-17 17:27:46 5 3 47 6 15 114 0 59.40 55 18.06 CHANGED lspDslL-pCQ+LLEKF+YPWEMMPLMYVILKsAcuDl-EASRRI-EGphllppYppppphs ....spDhhL-aCQKLLEKF+YPWEMMPLMYVILKDAsADlEEASRRI-EGptllNph.p.p............. 0 4 8 12 +8662 PF08829 AlphaC_N Alpha C protein N terminal Mistry J anon pdb_1wym Domain The alpha C protein (ACP) is found in Streptococcus and acts as an invasin which plays a role in the internalisation and translocation of the organism across human epithelial surfaces. Group B Streptococcus is the leading cause of diseases including bacterial pneumonia, sepsis and meningitis.\ The N terminal of ACP is associated with virulence and forms a beta sandwich and a three helix bundle [1-3]. 20.20 20.20 20.30 30.40 19.80 18.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.26 0.71 -4.96 5 46 2009-01-15 18:05:59 2006-05-18 09:50:22 5 22 21 2 1 51 0 177.50 50 32.35 CHANGED VTQGQlNIFs-TVlAAEVIsGSAATLNTulTKNlQNGNAYIDLYDVKNGKIDPLQLIVLsPsuYoApYYI+QGuKYYosVSELQTsGuAoITYNILcEDGsPHsKoDGQIDIVSVuLTIYDSTsLRDKI-EVcsNANDPKWSDGSRDEVLTGLEsIKsDIDNNPKTQoDIDNKIlEVNELEKLLVl..slPDKDKY ...............hTQsphNI.p-olhAApsIsGSA.sTLNTshTKNlQNGpAYIDlYDVK.GhIDP.pLIsLss.uYoApYYI+QGucYao..sssclpoTGuAoITYslLDcsGsPapKuDGQlDIVSlslTlYDoosLRspI-EVhppAsDPKWS-GSRDEVLpuLEcIKpDIDNNPKTQsDIcsKIsEVNplEKhLss...hPDt.K.............. 1 0 0 1 +8663 PF08830 DUF1806 Protein of unknown function (DUF1806) Mistry J anon pdb_1njh Family This is a bacterial family of uncharacterised proteins. The structure of one of the proteins in this family has been solved and it adopts a beta barrel-like structure. 19.60 19.60 19.60 25.00 19.50 18.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.62 0.71 -4.28 14 377 2009-09-10 16:05:25 2006-05-18 10:39:37 5 2 372 1 40 133 2 115.10 62 96.23 CHANGED csIppppVQphL-pFsN+sVYlHLETTNGAYAuHaD-ph.aosGAaIRNAplpYc+uKIs.GsG.PYRVGLKh..stGWlYAEGLTcaElD-psRLLlAGHs.pG+LAlALEIScpPFs ..........................pPIccp-V.chLsoFtpKPVYLHlETTNGAYAsHFDp+s.FsAGsFlRNhplTYp+AplK..Gsp..PYRlGLKL..stGWVYspGLTHaE..Vs-+-chLlAGashEGpLAsALpIScpPFs. 0 13 25 34 +8664 PF08831 MHCassoc_trimer Class II MHC-associated invariant chain trimerisation domain Mistry J anon pdb_1iie Domain The class II associated invariant chain peptide is required for folding and localisation of MHC class II heterodimers. This domain is involved in trimerisation of the ectoderm and interferes with DM/class II binding. The trimeric protein forms a cylindrical shape which is thought to be important for interactions between the invariant chain and class II molecules [1]. 25.00 25.00 55.00 55.00 21.50 18.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.71 0.72 -4.13 10 94 2009-01-15 18:05:59 2006-05-18 11:36:40 5 6 54 3 27 80 0 70.80 51 27.02 CHANGED NhTEDQV+HLLhpuDPhKhaPpLKsohhsNLKsLKsoMsssDWKsFESWMHpWLLFEMAKsshs-.cPsphPA ....NhTEDpVhHLLhpuDPhKhaPpLKsoF.-NL++LKpoMpshDWKsFEoWMHpWLLFEMu+pshpp.cPTthP........... 0 1 2 11 +8665 PF08832 SRC-1 Steroid receptor coactivator Mistry J anon pdb_2prg Domain This domain is found in steroid/nuclear receptor coactivators and contains two LXXLL motifs that are involved in receptor binding [1]. The family includes SRC-1/NcoA-1, NcoA-2/TIF2, pCIP/ACTR/GRIP-1/AIB1. 25.00 25.00 33.60 33.60 18.90 18.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.00 0.72 -3.23 10 234 2009-01-15 18:05:59 2006-05-18 11:48:35 5 17 49 173 84 190 0 82.50 50 6.17 CHANGED Kupp....KLLQLLTopo-pht......shtsusssssKDuhus........ussSssuussosso...........SLpEKHKILHRLLQsusSPsDlAKLTAE ......................................Kupp....KLLQLLToso-pht................hs.u.s.lssssKDusus............sussSusu.ussoSsS.............................SLpEKHKILH+LLQsGsSPsDlAKlTAE...... 0 3 9 31 +8666 PF08833 Axin_b-cat_bind Axin beta-catenin binding domain Mistry J anon pdb_1qz7 Domain This domain is found on the scaffolding protein Axin which is a component of the beta-catenin destruction complex. It competes with the tumour suppressor adenomatous polyposis coli protein (APC) for binding to beta-catenin [1]. 20.00 20.00 20.20 20.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.42 0.72 -4.25 9 177 2009-01-15 18:05:59 2006-05-18 13:19:08 5 6 76 1 81 150 0 40.50 58 5.22 CHANGED EDsPpuILD-HVSRVh......+TPGspSPss........hp+ps..tupSP-s ....EEsPpoILD-HVpRV.h......+TPGCQSPGs.........upass..tpRSP-.......................... 0 16 23 47 +8669 PF08837 DUF1810 Protein of unknown function (DUF1810) Mistry J anon pdb_2d2y Family This is a family of uncharacterised proteins. The structure of one of the members in this family has been solved and it adopts a mainly alpha helical structure. 25.00 25.00 25.90 25.90 23.10 16.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.82 0.71 -4.60 44 312 2009-01-15 18:05:59 2006-05-19 14:32:37 6 6 292 1 118 297 33 135.90 48 84.13 CHANGED sDs..acLpRFVpAQss..lYspslsEL+sG+KpSHWMWFlFPQlpGLGpSshAp+YuIsShsEApAYLsHPlLGsRLhEsochllshpucosppIFGsPDshKh+SSMTLFutsss....ssssFppsL-paasGp.DstTlphLs .........................sa-LpRFVpAQps.........lYppslsEL+uG+KpoHWMWFlFPQLcG..LGpSshAp..pYuIuul-EApAYLtHPlLGsRLcEssphlht.l.p.s.+.o..sppIF..G.s.PDshKhpSSMTLFutsss............ss..ssFttsLs+aasGp.DttTlphL.s.......................... 0 45 76 98 +8670 PF08838 DUF1811 Protein of unknown function (DUF1811) Mistry J anon pdb_1sf9 Family This is a bacterial family of uncharacterised proteins. Some of the proteins are annotated as being transcriptional regulators (see Swiss:Q4MQL7, Swiss:Q65MA2). The structure of one of the proteins in this family has revealed a beta-barrel like structure with helix-turn-helix like motif. 20.60 20.60 21.00 60.70 20.30 18.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.33 0.72 -4.06 15 365 2009-01-15 18:05:59 2006-05-19 16:33:10 5 1 364 2 38 123 0 100.90 62 97.01 CHANGED -KRYS-MocaELcpEIAtLpEKARKAEQhGhlNEaAVhERKlhMAcAYLlDPs-FcsGchYclc.sssphF+lcYLNGVFAWGaRhs....usppEEALPISLLpc ....K+hSEMSE.ELRcEIplhKEKh.R.KAE.pGIlNEYDVYppKllhAcSYLl.DhpKlcIGcIY+Ls-GospYFKV-hLKGlFAWGaRhs......SDcsEEGLPIuLLQ.h. 1 11 23 32 +8671 PF08839 CDT1 DNA replication factor CDT1 like Mistry J anon pdb_1wlq Domain CDT1 is a component of the replication licensing system and promotes the loading of the mini-chromosome maintenance complex onto chromatin. Geminin is an inhibitor of CDT1 and prevents inappropriate re-initiation of replication on an already fired origin. This region of CDT1 binds to Geminin [1]. 21.70 21.70 21.90 21.80 21.60 21.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.01 0.71 -4.30 11 217 2009-09-10 23:23:53 2006-05-19 17:18:32 6 4 153 3 161 226 0 153.10 25 25.92 CHANGED LPhKYchLtEhFcslDolsshhcpRscphTFsp.lptsVQchs+KpFs.sHLuQIKplaPpuhplc........p.phhsaspsopp.hYpLpIc..lst.............................................tsssppphsssphhpRtplF+phLl-hsKppcptaL.p..s.s....pp.l...thps.Fpl-p.s.-ls.ucLPp .......................LP.caphLt-hFcsh-......olls...hLp.sR...pc.s.s.TFsp.lppsVpphh.+......+.pFp..pcluQIKtlhPp.uhphc.......................tt.h..thppts...p.p....hp..l...hlp..htt......................................................................s.t..p..hs..tp.h.hht....ctphFpptLlphlpppcpt................................................................................................................................................................................................................................................ 0 55 89 133 +8672 PF08840 BAAT_C BAAT / Acyl-CoA thioester hydrolase C terminal Mistry J anon Pfam-B_4571 (release 20.0) Domain This catalytic domain is found at the C terminal of acyl-CoA thioester hydrolases and bile acid-CoA:amino acid N-acetyltransferases (BAAT). 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.15 0.70 -4.63 24 605 2012-10-03 11:45:05 2006-05-24 14:29:18 6 14 292 4 300 3028 585 192.20 24 50.28 CHANGED lcLEYFE-AlsaLhpHPpVptstlGllGlShGu-lsLuMuuah+..plsAsVsINGosssshsshha+t..............slsslshshpclph..........stsshhphhchhpshhspssppshIPlE+u..csshLhlsGpDDpsWtSthaAc.hpc+LpppG+c.pspllsYPsuGHhIEPPYhPhstAshthhhs......hhaGGEs+sHAhAQ.DuWpcl.tFF+KHLsu .......................................................................................................................................LEaF-c.....Ah.pal..h.p.p.s....p..............l.p.....s.s..t.lGlhGhS+Gu-.ls.L.h.hA.u.......h...ht.........pl.s.....s...s....V........s....l....s.....s...u.s...s...s..h.t..s....h...h.h.ct.t................................................ls...l....h..p..h..p.p..h..ph......................................p.s.sh...h..t..h.h..........h...h.....t..................t.....t.........t...p.......p...s..h.....I.........s......l...E....ch..........pus.hLh.....l.s...............G.p..DD.p...W.s..S...t.....h..u...p.h.....pp.Lp.t.p.....s.p.p....p...hph.l..pY.......................suG..H..h...l....p..s.P.....a.h.P.h.p....h.sh...................hhhGG.pstspuh..Ap.cuWtph.tFhppth..s.................................................................................................................................................. 0 52 103 210 +8673 PF08841 DDR Diol dehydratase reactivase ATPase-like domain Mistry J anon pdb_2d0o Family Diol dehydratase (DDH, EC:4.2.1.28) and its isofunctional homologue glycerol dehydratase (GDH, EC.4.2.1.30) are enzymes which catalyse the conversion of glycerol 1,2-propanediol, and 1,2-ethanediol to aldehydes [1]. These reactions require coenzyme B12. Cleavage of the Co-C bond of coenzyme B12 by substrates or coenzyme analogues results in inactivation during which coenzyme B12 remains tightly bound to the apoenzyme. This family comprises of the large subunit of the diol dehydratase and glycerol dehydratase reactivating factors whose function is to reactivate the holoenzyme by exchange of a damaged cofactor for intact coenzyme. 24.60 24.60 24.60 24.70 24.50 24.40 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.90 0.70 -5.61 12 375 2012-10-02 23:34:14 2006-05-24 16:49:07 5 1 324 6 39 285 7 314.90 70 55.80 CHANGED pV-lspGA-tIMpslstssslpclpGEsGTNlGGMLE.+VRQsMAsLTspsss-IhIQDLLAVDThlP.pVpGGlAtEFShEpAVG.IAAMVKuD+LQMphIApplcpclshsVclGGsEAEhAIhGALTTPGTspPLAILDlGAGSTDASIIspcsplsAhHLAGAG-MVTMlIsoELGLpshtLAE-IK+YPLAKVESLFplRHEDGsVpFF-pPLssslFARVVllKp.sshlPl.suphSlEKI+.lRpsAKc+VFVTNslRAL+pVS..PTGsIRDIsFVVLVGGSALDFElPQhVT-uLu+aslVAGRGNIRGhEGPRNAVATGLlLsaspc ........................................................................cVDVAtGAEAIMcAVsuss+L-NloGEsGTNIGGMLE.+VRQTMA-LTsKs......ss-IaIQDLLAVDT.VPVsV.............pGGLAGEFShEQAVG.IASMVKSDRLQMAhIAp......EIc..p+LslcVplGGAEAE.......AAIlGALTTPGT.s+PLAILDLGAGSTDASIINscGEIl..AT.H.L.AGA..G....DM....VTMII......spELG.L-.DRYL.....AE-IK.KYPL.AKVESLFHlRHEDGoVQFFssPLsPsVFARVs.l.VK.P.DcLVPl..PGDlsLEKlRslRRSAKERVFVTNALRAL+pVS..PTGNIRDIPF..VVLVGGSSLDFElPQLVTDALuHYpLVAGRGNIRGoEGPRNAVATGLlLuapp................................................................ 0 14 21 30 +8674 PF08842 Mfa2 DUF1812; Mfa; Fimbrillin-A associated anchor proteins Mfa1 and Mfa2 Bateman A anon PSI2 target AAO79331.1 Family This family of proteins may be lipoproteins principally from bacilli. They are between 300 and 400 residues. Many Bacteroides-like bacterial species, including Porphyromonas gingivalis, the causal agent of periodontal infection, carry at least two types of fimbriae, namely FimA and Mfa1 fimbriae, following the names of their major subunit proteins [1]. Normally, FimA fimbriae are long filaments that are easily detached from cells, whereas Mfa1 fimbriae are short filaments that are tightly bound to cells; however, in the absence of Mfa2 protein, the Mfa1 fimbriae are also very long and are not attached. Mfa2 and Mfa1 are associated with each other in whole P. gingivalis cells to the extent that Mfa2 is located on the cell surface and probably associated with Mfa1 fimbriae in such a way that it anchors the Mfa1 fimbriae to the cell surface and regulates Mfa1 filament length [2]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.11 0.70 -5.05 87 437 2012-10-02 11:46:57 2006-06-01 18:05:17 5 5 93 5 62 421 4 277.90 14 80.67 CHANGED hs.sCs...............shtlphp........ashs...............ttssshspplpplslalFD.....t.sGp.hhpphshpspplt.....tt...............................hh.tlssG.p...Ypllsau........sh.......pppt..................hsssssl...sclphplpp............tssthst......phssLa....aGph..shsh...........tstpppphslsLh+sssplpl..........hlps...........t..s.shsshp......hplpssssph...sassph.....hsspshsahPh...........tt.ssssss..................hhsph.sshphhpsp....thpl....pltppssstph......phs.h..h...............hsspthlsppsph...sl.hhh......................st.......................hssplpls.sWthhtps ............................................................h.l.h...........h..t...............t.spthtpplpplplalFD.........p..sGp..hltphphp..sppht.............tt.................h.h.............hhplssG.s...Yp.hlsau..............sh........sspt..hs..........................hsssssl..............pchhhphpp..............tsshhst.........phssLa.............aGph..shsh.....................tssttpphslsLh+sssplpl................hlps....................shss.hp................h.pltss.ssth.........shssph......sspshtahsh.....................htt.psptst............................hhsph..pshphhtsp.........................thpl.......plhtpssstth.....hp.s.h..h..........................s.p...lst.ppph....l..hhh...................................t......................hthtl.ls.sW.hh....................................................................................................................................................................................... 0 13 47 62 +8675 PF08843 DUF1814 Nucleotidyl transferase of unknown function (DUF1814) Bateman A anon PSI2 target CAD86002.1 Domain This large family of proteins are largely uncharacterised. Some are annotated as abortive infective proteins but support for this annotation could not be found. This family was recently identified as belonging to the nucleotidyltransferase superfamily [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.54 0.70 -4.58 177 2210 2012-10-02 22:47:23 2006-06-02 11:52:52 6 12 1271 0 615 1962 254 226.00 14 83.81 CHANGED thLtplh.....sthtpphs.hKGGTuLshh.h......hh....RhScDlDhhhht...................ttphpph.....hptlppth............................................sshthphpthhtts......................................st.ptplcl-lsht.........h..st.h..hsh.............................shshpclhucKltAhhp..................................th.RDhaDlhhl.........................htttththtphhps.h...........ht.....phppp....tpthhtphth................................................phhpp.htt..tt...........................................phpthhpp .................................................................................................................................h.....h........htpphh.hpGGoulshh.t......h.t.......R.hotDlDhhhh......................................php.ph............hptlp..phh................................................................pt..hhhphpthhtt..........................................................t..tttlpl-lsht.........................sht.h...hthh...t.........................tl......shsh..tph.hupKltuhhp......................................tph.+DhaDlhhl...............................tttths.tthhpt.h.......................................ht.t...............................................................................................................t............................................................................................................................ 0 204 415 530 +8676 PF08844 DUF1815 Domain of unknown function (DUF1815) Bateman A anon PSI2 target ZP_00111304.2 (BIG_33) Domain This presumed domain is about 100 amino acids in length and is functionally uncharacterised. 25.00 25.00 26.00 76.70 23.10 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.41 0.72 -4.42 11 59 2009-01-15 18:05:59 2006-06-02 11:56:19 5 1 57 0 24 60 27 102.00 69 88.83 CHANGED FhRLA-QYRshVQDLVMSLQALApuLcppGhsASCYoC....Gc-hcuASFhssLG-sHhlRFLVSDaGISWsE.RssRELVKLEGAEAIpcLQclAshl+pspsssu FhRLApQHRpFVpDLVMsLQALAhlLEpRGYhASCYTC......GsphNSASFMVSLG-sHLIRFLVSDYGITWTEMRDDRELMKLEGAEAIsQLQELAsLlKhp....ss.... 0 3 15 22 +8677 PF08845 SymE_toxin DUF1813; Toxin SymE, type I toxin-antitoxin system Bateman A, Eberhardt R anon PSI2 target AAC77303.1 Domain SymE (SOS-induced yjiW gene with similarity to MazE ) is an SOS-induced toxin. It inhibits cell growth, decreases protein synthesis and increases RNA degradation. It may play a role in the recycling of RNAs damaged under SOS response-inducing conditions. It is predicted to have an AbrB fold, similar to that of the antitoxin MazE. Its translation is repressed by the antisense RNA SymR, which acts as an antitoxin [1,2]. 20.70 20.70 21.00 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.80 0.72 -4.12 23 839 2012-10-01 20:57:08 2006-06-02 12:01:30 5 3 449 0 149 526 5 49.20 39 50.52 CHANGED tRhhTV.....................GYs.http.t..........sPslpL+GcWLEpuGFssGp.lplpVppGpLVIps ....................................l..uYh....p..............h.....PslpLpGpWL.cpAGFsoGpsVsV+VhcGClVlp........... 0 19 51 90 +8678 PF08846 DUF1816 Domain of unknown function (DUF1816) Bateman A anon PSI2 target ZP_00109395.2 BIG_34 Domain Swiss:Q4C9H3 is associated with the Pfam:PF01383 domain suggesting this presumed domain could have a role in phycobilisomes. 20.50 20.50 21.00 35.40 19.80 18.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.17 0.72 -4.28 29 104 2009-01-15 18:05:59 2006-06-02 12:48:16 5 3 70 0 46 121 101 67.20 41 56.63 CHANGED uhuNth.GLAWWs+lpTpsPssTYaFGPFlocpshctplssalpDLpsEuspsIppsllRs+........+sEsLTl ...........p.hGhAWWlcIpTppPcCTYYFGPFhopp-AptthsGYlEDLpsEGApsIphslpRCp.PcsLTl.............. 0 7 32 44 +8679 PF08847 DUF1817 Domain of unknown function (DUF1817) Bateman A anon PSI2 target ZP_00111140.1 BIG_36 Domain Members of this family are functionally uncharacterised. 25.00 25.00 25.80 25.10 23.90 23.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.70 0.71 -4.65 21 131 2009-09-10 22:53:16 2006-06-02 12:51:59 6 2 119 0 42 100 33 134.30 40 60.20 CHANGED plss-uIppLDLoPlpthhp.........pshsslLststsLphphsaPRs.ssDPR.ELSEhPEsRLWhlRhDApYPWLPLLL-hpsGpLsRasAMLVPHpFstsEGlpFsPEALElalhH+lFlLscahppp.Gls..ppu.pLppMAthLGY-LDsuFasLl .......................................s.lsp-plppLDLSPlpphhc........hhptps.lhh.ppslthslpa.ps.ssDPR.ELuElPElRLWF.lRLDusYPWhPllLDhpt...GplhRasAMlVPH...........ph...p.t.G...l.a.sPpALElalhp+lh...hl.phhpp..ths..t...phtphAthhGatlssthap................ 0 8 29 38 +8680 PF08848 DUF1818 Domain of unknown function (DUF1818) Bateman A anon PSI2 target ZP_00110314.1 BIG_37 Domain This presumed domain is found in a small family of cyanobacterial protein. These proteins are functionally uncharacterised. 25.00 25.00 53.90 32.60 19.30 19.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.51 0.71 -4.03 27 71 2009-01-15 18:05:59 2006-06-02 12:59:54 6 1 70 5 30 76 118 115.20 41 87.84 CHANGED .pEGsGWRLuhDPs+spassLIGGEsWAlELTcsEhpshspLlhpLscphpsltspLMsEEpIsLElEpt.hWhpl-GptpsWuL+lILpss.......RusEGtWPAssssslstAhcplh .l.+cGsGWRLGaDP...p.t...spassLlGu-sWAlELTcsEhs-hscLltpLspshptls..s-.L..M-EEpIshEhEsphlWhplEGhspsaoL+hILpss.......RpsEGtWPssssssLltAhppl............ 0 6 20 28 +8681 PF08849 DUF1819 Putative inner membrane protein (DUF1819) Bateman A anon PSI2 target ZP_00108899.1 BIG_41 Family These proteins are functionally uncharacterised. Several are annotated as putative inner membrane proteins. 20.30 20.30 20.60 22.60 19.60 20.10 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.78 0.71 -4.89 25 271 2009-01-15 18:05:59 2006-06-02 13:12:16 6 1 259 2 54 226 31 176.70 23 85.97 CHANGED sppcYpsslsuGuLhlpES+plAcLhLpshsh-phccplhp-NlLQppo.uos+Rhs+pl.tpRLcoLssc.h.....phls-.usppptpplLahAsh+psplltDFhh-Vlp-+ahphchcLstcsastFhspps-hcsplsphSsSTptKLpQlla+hLtEAGhLtt..scspplpsshluscltphLpc.psppclh ............a.uslhutshhhpE.+hlspLhhps....s.pphp...ptlhpcNlhptsotsosc+hspsl.ptR.LpsLsps.hh....................phlsp.....us.pppp.lhh...huhhhps.llt-FhtcVlpcthhph...c....plstpchptFhppp.tp.ppstls.s.ao-sThp+htsshhphLt-uGh..lps....scp..c.plp....hhl..ph.thl.t.......h....................... 2 21 37 48 +8682 PF08850 DUF1820 Domain of unknown function (DUF1820) Bateman A anon PSI2 target AAG07366.1 BIG_46 Domain This family includes small functionally uncharacterised proteins around 100 amino acids in length. 20.90 20.90 25.90 25.70 20.80 18.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.22 0.72 -3.90 21 216 2009-01-15 18:05:59 2006-06-02 13:18:53 6 1 215 0 60 152 247 98.60 49 90.46 CHANGED pslY+lhFhNpsplYElYARplhQSchaGFlElE-FlFuE+opllVDPuEEKLKsEFuGVpRoa...IPMHullRIDEVcKcGsuKIo-spst...ssVssF.Phs ..t.slY+lhFl..N.pGc.lYE.lYARplhpSs.L.aGFlEIt-FVFsp+osllVDPSEEKLKsEFsGVpRSa...lPhHuIlRIDEVc..+..c..Gsu+Io-ht.....sNVhsF.Ph.................. 0 14 33 45 +8684 PF08852 DUF1822 Protein of unknown function (DUF1822) Bateman A anon PSI2 target ZP_00109005.1 BIG_39 Domain This family of proteins are functionally uncharacterised. 25.00 25.00 29.20 28.60 23.90 22.90 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -12.21 0.70 -5.64 27 120 2012-10-11 20:01:01 2006-06-02 13:41:17 6 3 38 0 42 154 0 290.10 19 92.18 CHANGED l.lsssspptA.phupphssspcphpsYL.NpLAltultsWLpt...-hpsshpsssthps.lsphh-.Vsshtls.th.t+l.hlsspshps...s.lplPpEh..hsshlu..YlsVpls.-tp.sclhGalstppl.........pst.cpshplshp.......cLhs.hshLp..hstp.tss..t.....s.h.......h...lpp.Lts..hs......sap..htsLlt.st....h...p........................................................................................h.t..............hths.....tthppsttlshthplsspsh...tLhltht.pssp.phtlhhpLpsht......ssshLPsulpLpllscstpsh.pspsps....p..c.hlplp.hpsp.Gppaplclthss...hhpE.hh ..........................................................................................................l..t..p.u...p...ut..t....s.tpt.psal.NtLulhsh.taLp.........th..php..s.hps..h.phht..lssh.l.....th.h+....lthhshtt.pp...t.hpl.P.Eh..hs......phhu..YlsVpls....p.p.splhGahpt.................tt.pl.shp.....................pL....thl..........................................t.......ltp.ltt..............sap....httl...ht.......h..................................................................................................................................................t........t...hptsthlshthp.hstp..l...hLhltl....sp.spp..phtlhlplhsht.............tpthLP.slpLtllspssphh.pstspt......tsphlplp.hpsp.uppFplplthss....hpE.h........................................................................................... 0 4 33 42 +8685 PF08853 DUF1823 Domain of unknown function (DUF1823) Bateman A anon PSI2 target ZP_00108651.1 BIG_42 Domain This presumed domain is functionally uncharacterised. 25.00 25.00 79.40 78.40 24.50 17.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.47 0.71 -4.35 21 75 2009-01-15 18:05:59 2006-06-02 15:29:12 6 1 75 1 32 79 121 115.90 52 79.95 CHANGED PLocslLhtILpD+lSDthVspLlWp+LGYphcs..ss...WssussoPp.Wp-caPcsPphIupRPAoV+LTRSIPKEaKQLLKcpLGFpGY+IuELhPRRTRRATAVNWLLualtpp ..PLsp-slhtILs-clsDphVspLVWphLGYRh-tsp.sp........WssupVssp.Wp-cYPcPPshIs.s............R............P...AoV.....+LTRSIPcEpKQLLKEp...L...GFcGY+IGEhsPRcTRRATAsNWLLuahtp.p... 0 8 22 31 +8686 PF08854 DUF1824 Domain of unknown function (DUF1824) Bateman A anon PSI2 target BIG_44 Domain This uncharacterised family of proteins are principally found in cyanobacteria. 25.00 25.00 36.00 42.20 17.80 18.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.51 0.71 -4.50 23 72 2009-01-15 18:05:59 2006-06-02 15:34:20 5 1 72 3 32 77 106 124.00 34 83.93 CHANGED lspLpDLspLRsA....PpLssspccpLtpELpthhssu-WhTlGlMAPSsppAlpALRshppthuast....hpsh-.....pspt-G.sVFLKuNQpoGslalRsEpGLGcGlLlosptsccspsusTaGPLPLDhF ....................phLp-hssLcht....s.ls.sppccpL+ptLhhhhstu-....a..........slGIhAsosppAltAL+shppuhuatt......hps.....s.......tss.-G.sVaLKhNppoGshalcs.sGht+GVLlSCQusp....ss..t.ssTaG.hPLDhF... 0 7 21 30 +8687 PF08855 DUF1825 Domain of unknown function (DUF1825) Bateman A anon PSI2 target BIG_45 Domain This uncharacterised family of proteins are principally found in cyanobacteria. 25.00 25.00 29.10 38.50 24.60 21.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.47 0.72 -4.05 12 99 2009-01-15 18:05:59 2006-06-02 15:37:52 5 1 99 0 33 86 318 104.30 55 89.98 CHANGED Mu.FF-SEIVQpEA+plFp-YQpLhplGupYGpFDREGKhhaI-pMEpLh-R.+lFhKRhELS..DDF.ApMshcQlcsQLstFGhoP.p...pMF-QMspTLE+MKsphc ........Mu.FF-SEIVQpEAKpLFpDYQpLhpLGuc..YGKFDREGKKhFI-pMEsLM-RY+lFMKRFELS..EDF.AphTlEQL+TQLuQFGhTP.p....QMF-QMstTLERMKspl......... 0 8 23 31 +8688 PF08856 DUF1826 Protein of unknown function (DUF1826) Bateman A anon PSI2 target BIG_48 Domain These proteins are functionally uncharacterised. 20.70 20.70 21.10 21.20 19.30 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.41 0.71 -4.55 50 214 2009-01-15 18:05:59 2006-06-02 15:45:18 6 1 195 0 63 230 227 184.90 32 84.78 CHANGED shussPslLssIhps.ssNlAlWpRpLssplppalstllsp.p.sshphthsltsspsshcshppths....sspttpsLhpDluhLlchFssLhshcplGLRLclLcpsMCPRFHVDpVPsRLlsTYpGsGTpWLtptsssRstLG...t...sp.........tpIpplssG-VALLKG-tW.G.NEssGLlHRSPsh..s....sGcpRLLLoLD .................................................................................................h.stpstsLsplhps.slNlulWpRplsstltphhs.th...ht.t....sh.th.hsh.........tt....t...ht....h.tths...................sh.shpshltDlshLspha....s....sLh..shc..plGLRLcsls...psMCPRFHVD+VPs...R.....Lls..TYtGsGopWLtpsshsR....p..L....u..th.ttt..spt.................................spIpplssG-VALLK.Gc.p..W.......G......N...........cst............GL.....lHRSPsh...s........ss.ptRLlLoLD...................... 0 19 37 54 +8689 PF08857 ParBc_2 Putative ParB-like nuclease Bateman A anon PSI2 target AAG07772.1 BIG_47 Domain This domain is probably distantly related to Pfam:PF02195. Suggesting these uncharacterised proteins have a nuclease function. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.22 0.71 -4.41 15 262 2012-10-01 20:12:50 2006-06-02 16:09:00 6 2 211 6 96 234 41 170.00 34 65.27 CHANGED pllplslssLRPTQhsVGhtEVptKppcap....................cppc.ppppc....aLcsHhl.........................................PVVlGPsu.phallDHHHlsRALh-hGspp.................VhspVluDLSsl.spssFWphMcsptWVaPaDs+Gp.++shspLPcslt........sLcDDPYRSLAuhL+ptGuacKss..sPasEFtWAcaLRc+ .............p.hh.lplspL+PTQhslGhcpVttKtt+ap....................thst..+phps.......ah..ts+hl................................................PlVlGP.........sG.phYlsD+HHhhpALh-t.s..ssp................................Vhs...h.V..hs-Luth...ststFWptMppppWla...hD...scGt.phs....hspLPssLs..........................sLpDDPYRSLuhhlRctGhhtcss................ssF.EFhWADaLRp......................... 0 21 40 70 +8690 PF08858 IDEAL IDEAL domain Bateman A anon Bateman A Domain This short domain is found at the C-terminus of proteins in the UPF0302 family. The domain is named after the sequence of the most conserved region in some members.\ The function of this domain is unknown. 20.40 20.40 20.80 20.80 20.30 20.30 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.54 0.72 -4.46 42 993 2009-01-15 18:05:59 2006-06-02 16:24:31 5 2 543 3 101 402 1 36.70 34 26.17 CHANGED hLccslt.....phpcccLhppIDpALDp+DcctFhpLoppL ....................lpcthp.....phphcpLhppIDpALpppDcptFhpLopch...... 1 34 67 85 +8691 PF08859 DGC DGC domain Bateman A anon PSI2 target BIG_49 Domain This domain appears to be a zinc binding domain from the conservation of four potential chelating cysteines. The domain is named after a conserved central motif. The function of this domain is unknown. 21.50 21.50 23.30 23.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.39 0.72 -4.20 39 205 2009-09-11 06:56:43 2006-06-02 16:29:11 6 3 167 0 104 181 5 108.80 32 79.43 CHANGED shlasCSGsSs.sGQluNpsAlcLscpG.h...uchhCluuluupssslhchAcuuctIluIDGCslpCup+sLppsulsscpalhlo.-h.........GlcKpht.tshspp-lpphh.ptltc....h .......s.llauCSGsSs.luQlANplAlcLs+ts.h....ucMuCluGlGu.slssll+hA.+uucsllAlDGCslsCs+psLpptGlsss..tHlhLs.-h.........Gl.cKpht.....p-hs...t-hpplh.t.h...t................................ 0 48 80 91 +8692 PF08860 DUF1827 Domain of unknown function (DUF1827) Bateman A anon PSI2 target BIG_53 Domain This presumed domain has no known function. 23.40 23.40 23.50 30.20 23.30 23.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.89 0.72 -3.88 15 568 2009-01-15 18:05:59 2006-06-02 16:42:00 5 1 513 4 43 173 0 95.60 51 94.90 CHANGED M+LINsTNSascLVpsQLssT........DAphVcVYShGNTcVlaTcAPcHhElLIoN++RsI+-sEI-hIhEhhLK+......hspsslcsl+ss+LIEIolP ............MKLINsTN.SHspL......VcsQL-sT........DApLVEVYSAGNTcVlFTpAPhHhEILIoNK+RuIR-sEIEpIp-aFLKRhh.ptshcpssIKsla..op+LItISlP................................. 0 5 18 27 +8693 PF08861 DUF1828 Domain of unknown function DUF1828 Bateman A anon PSI2 target BIG_51 Domain This presumed domain is functionally uncharacterised. 21.50 21.50 21.80 21.90 21.10 21.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.76 0.72 -4.34 22 497 2009-01-15 18:05:59 2006-06-02 16:47:38 5 2 378 0 50 221 4 89.20 31 38.70 CHANGED TPFh..spGDtltlalcpts..sthhloD-GhTLhcLp.tGhphp..stpRt.cllpslLstaGlphps..upl.hshsstcshutshtshlQulltl ..........TPFhD.phsDpltlahptps...sp......h.pLoD-GhTlhsLE.t.Glsls..pKpRp.clhpslLps..aG...lchs-..pEI...hh...ps...s.p.cshspshasllQslltl............ 0 13 27 39 +8694 PF08862 DUF1829 Domain of unknown function DUF1829 Bateman A anon PSI2 target BIG_51 Domain This short domain is usually associated with Pfam:PF08861. 22.20 22.20 22.90 22.90 21.50 22.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.86 0.72 -3.70 14 377 2009-01-15 18:05:59 2006-06-02 16:50:04 5 2 271 \N 25 144 2 86.70 33 36.27 CHANGED GpSuhpHpFDallspppppsp+hlpshsssspsthpshhasa.Ds....ppscpssschhl..Ih.NDppcplssphpphhppYslpslsaSc+ .......GcSGlhHsFDallssp+.pp.EKhlpshsNhspsplpsthhsa.Dsp....t+ppcp..cppsphhl..Il..N.......Dsp.....cs.....Is...E..cspshhcc.slpllsaSp+............. 0 8 17 23 +8695 PF08863 YolD YolD-like protein Bateman A anon PSI2 target BIG_52 Domain Members of this family are functionally uncharacterised. However it has been predicted that thes proteins are functionally equivalent to the UmuD subunit of polymerase V from gram-negative bacteria [1]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.82 0.72 -4.13 39 1100 2009-01-15 18:05:59 2006-06-02 17:07:01 5 1 636 0 106 648 6 85.80 23 75.75 CHANGED lPEphptlcchhp-.ppKlp.+PhLsppph-clpphlhpuhtpppplploYac.........sGhhhshhsplpplc.hppplphss..t..tpphplphpsIl..cl ...........................hsEphttlpp...-..ppKlp..+sh.Loc.p...phpclp.......h...Lp...p...u...h...t...p...p..p...t...lpl..paac............sGh.h..s.hp.hplhclst..hp.hhlpsps........pphplphtDIlsI.................................................... 2 34 66 85 +8696 PF08864 UPF0302 UPF0302 domain Bateman A anon PSI2 target BIG_50 Domain This family is known as UPF0302. It is currently uncharacterised. 20.40 20.40 21.30 35.80 19.70 19.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.90 0.72 -4.23 21 533 2009-01-15 18:05:59 2006-06-02 17:09:35 5 2 509 3 50 197 0 105.20 42 56.87 CHANGED cK+pFlcahLppaphKcREuhWlLNYlhsccplLppVHFV-stphss+uLhlostsscs.sFtFa+ssphhscspcsFt-lphN.s-slYlpLpFps...phpsppYls ..KcsFlcahLtpYphKpR.sVWlLNYlh...sp-shLppVHFV-..s.thtspcsLpluss.s...scsss.hpFhKpNl..phhsspchFhDhhh.N...+..s....p.s.laIQlpFts...shpptphL.............. 0 18 32 42 +8697 PF08865 DUF1830 Domain of unknown function (DUF1830) Bateman A anon PSI2 target BIG_56 Domain This family of short proteins is functionally uncharacterised. 25.00 25.00 25.20 36.90 20.00 18.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.38 0.72 -4.35 26 103 2009-01-15 18:05:59 2006-06-02 17:21:47 6 1 68 0 40 121 114 67.00 43 63.27 CHANGED CsYtNsTs+h.llRshs.ssaYhERVlFPsphhhFEAP.cApLEIassshuushLpsphssscltlsp .CsYhNsTsplQlsRI.sNlsNaYaERVVFPGp+LlFEAsscApLEIaos.huusILs-pIsCpcLtlp.t.......... 0 5 30 38 +8698 PF08866 DUF1831 Putative amino acid metabolism Bateman A anon PSI2 target BIG_55 Domain Solution of the structure of the Lactobacillus plantarum protein from this family has indicated a potential new fold with remote similarities to TBP-like (TATA-binding protein) structures. This similarity, in combination with genomic context analysis, leads us to propose an involvement in amino-acid metabolism. The potentially novel fold is an alpha + beta fold comprising two beta sheets packed against a single helix. The enzyme is present in the cytosol. 25.00 25.00 37.80 43.40 22.20 19.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.30 0.71 -3.81 22 596 2009-01-15 18:05:59 2006-06-02 17:24:05 5 3 592 1 56 208 0 111.20 54 97.07 CHANGED MAFpp.olplpGsphpYplssslK+aTL+DssFhpo+sGNapLpR.L-ssss.spuhhLKIoIscDLsuFKhslTstsGL+hVNIFKscppp.h.-paaFlhcsLl-Rplhpc .....MAFppplpLpssphsYoLSPslKKaTL+DNsFhETKsGNYpLpRhLEpsPsS.s-GFpLKIhINK-LoGhKlsITDpsGLRhVNIFKsEcp+hpQEKFYFLMDuLVERslFsK... 0 11 26 41 +8699 PF08867 FRG FRG domain Bateman A anon PSI2 target BIG_54 Domain This presumed domain contains a conserved N-terminal (F/Y)RG motif. It is functionally uncharacterised. 25.00 25.00 26.00 25.00 21.70 23.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.49 0.72 -3.51 73 450 2009-01-15 18:05:59 2006-06-02 17:25:37 6 4 401 0 125 417 24 102.00 26 31.55 CHANGED tpsthlFRGp.....ust.sa..t.....LhPolhRtttph.t......................Epph..........lc..............pFccp.................................utt..hhsppss............................hchLAluQHaGlPTRLLDWTpsPLVAhaFAsps.tpts............usla .................................................................................t....hhaRGp.......sst..pa....t....................LhPolhRtttt.................................Epphhp..............................phhpt..........................................................................................t.t..thp.p.t.....................................hchluhhQHaGl.sTRLLDhTpsPhlALaFAspsttptt.........h................. 0 33 71 99 +8700 PF08868 YugN YugN-like family Bateman A anon PSI2 target BIG_5 Domain This family of proteins related to B. subtilis YugN are functionally uncharacterised. 25.00 25.00 27.50 26.80 23.90 23.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.69 0.71 -4.53 26 285 2009-01-15 18:05:59 2006-06-02 17:42:44 5 1 164 3 55 187 0 121.80 41 95.57 CHANGED h..sSslEGpphpLpcL-plhcshGashuGpWDY-+shaDhKlspc.-G..hhaLRlPshAl-G-lsscsAllcLhoPhLh+HhY.+Gl-.t........................cDtchPpp..hlphucpllp-lccpLp ......hsopl-GtshsLshLc-lMcshGalluGp...WDYE+soaDYKh-....cs...hhaLRV.shAl....-G-l....su....p....pAhl+LhsPhLh+HhYs+Gl-hs...............................s.Dtphspp..hlphucpLlpplEccL.............................. 0 17 38 43 +8701 PF08869 XisI XisI protein Bateman A anon PSI2 target BIG_57 Domain The fdxN element, along with two other DNA elements, is excised from the chromosome during heterocyst differentiation in cyanobacteria. The xisH as well as the xisF and xisI genes are required [1]. 25.00 25.00 26.40 26.30 21.30 20.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.22 0.72 -3.95 34 213 2009-01-15 18:05:59 2006-06-02 17:46:45 6 4 37 7 71 266 3 103.50 39 97.22 CHANGED MDpLs.pYRplIpplLpcYuph..psspsclEs............pLlhDpp+........ccYhlhslGWcspcRlausllHl-I+ssKIWIppDsTEcGIAp-LlctGlPKpDIVLGF+sPphRpaT-FAVu ................M-.p.l.p..pY+pllpplLpcYuph......tssp.spl-s....................phlhDppp........c+Y.lhplGWpsp.c.R.l.a.usllHl-I.+.ssKIWIppDsTE.uIAp-LlctGlPKpDIVLu.FpsP.hRpaT.saAl............. 0 17 52 71 +8702 PF08870 DUF1832 Domain of unknown function (DUF1832) Bateman A anon PSI2 target BIG_58 Domain This family of proteins are functionally uncharacterised. 25.00 25.00 25.20 25.70 24.80 23.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.31 0.71 -4.37 23 127 2009-09-10 14:48:39 2006-06-02 17:49:01 6 3 123 0 36 131 30 112.60 30 66.51 CHANGED +l+lSppuccpLp+LKppTGlss.NllCRhAhstSLppsshsp.....stsh.sDuslEh..shcsas.....G-hsslhhslLKp+h.....s.ph.-scplhcthphHlcRGluhLtschplts....h..pll ..pl+lSppsc-pLp+L.Kp...h..T.sl.ss...NllsRhAhshSLtpsp..h.s.....s.sh....-u.slEh..shpsas...........G-hsslh..h...hlL+t+h.....shth..-pcslhpta+hHlcRGIuhLtschpltp..................... 0 5 25 34 +8704 PF08872 KGK KGK domain Bateman A anon PSI2 target BIG_60 Domain This presumed domain is found in one or two copies in cyanobacterial proteins. It is named after a short sequence motif. 21.40 21.40 22.10 22.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.50 0.71 -3.93 9 64 2009-01-15 18:05:59 2006-06-02 18:16:26 5 2 30 0 19 58 0 107.00 25 80.45 CHANGED s-phcptN..............scDDVl..tscp.......psaKlpclhptlcpthpschhp-h...hp..t.................sphahspGlsCElLchsspsWpKGKl+l.huLEFlPD..E.........Pphs-.ESPLDDIRppIs ..............................................................tcpsl....tp.......phhpltphhphh.t.hpp.....p..t.....h....ht............................................tpha..h...spGlcCclLphGspsWpKGKl+l...............h...........s............LEF..hPD..E..............................sp....pp.p.SPLD-lRp.h........................ 1 3 13 19 +8705 PF08873 DUF1834 Domain of unknown function (DUF1834) Bateman A anon PSI2 target BIG_62 Domain This family of proteins are functionally uncharacterised. One member is the Gp37 protein from the FluMu prophage. 25.00 25.00 33.00 32.60 23.80 23.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.09 0.71 -4.71 15 142 2009-01-15 18:05:59 2006-06-02 18:25:54 6 1 131 \N 25 118 1 179.60 26 94.11 CHANGED hIsplEpAllsRl+p...shGshl+tVso.....................asGcaD-pslsplVRphPAlaVsatGts.ts...tsucs+apssupasVhVsucslsuEpssRhG...........lYQllpslpslLtsQcl....s.hshssLp..Pppl+sLa.ssphcspG...luVYAh-Fpssh...............l-.ssLsshphsptsh...................ts.ssDhpphtlphplsssps .................................lsphEpAllsRlcp......hhG.ph.lpp.Vco.....................asGcas-.tsltphlt..shPAlaVsahGss.......tupsRhpssucaslhVs.ucslp....ucpssR.G............hYpllpplptlLsGpph...........t.ssslp...p..pl+..sla.sssh.t.stu...lulYu..l.Fssph...............s-.ssLcs..hth..tth........................................hst................................................... 0 11 17 23 +8706 PF08874 DUF1835 Domain of unknown function (DUF1835) Bateman A anon PSI2 target BIG_63 Domain This family of proteins are functionally uncharacterised. 21.10 21.10 21.10 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.70 0.71 -4.18 25 370 2009-01-15 18:05:59 2006-06-02 18:26:58 5 4 323 0 86 318 19 119.80 25 37.51 CHANGED lHlshussAuuoL+hAltp.t..hs-pVlsltD-LSlGPlhslcstt.thtRtpW...l.ptht.t.......pp.htpp......hhschcsthpp...lppl..ssss.plslWpucsup-plsLphlhhhLpsps.p..lphlssop ...........................lHlshusssuusL+hhltptt............hpcs.Vlsh.p-s..holGP.l.t.p.lc.s.s.t.h...htRtpW......l.pslst......................t.hpp-.......hhschcp.phpp........L.cpl..ssps..plslW.pup.sup-plhLphlhtpLc..spt.p...lp.lphs................. 0 22 51 66 +8707 PF08875 DUF1833 Domain of unknown function (DUF1833) Bateman A anon PSI2 target BIG_61 Family This family of proteins are functionally uncharacterised and are predicted to adopt an all-beta fold [1]. They are often found in gene neighborhoods containing genes for an NlpC peptidase and a Ubiquitin domain predicted to be involved in tail assembly [1]. 20.60 20.60 21.60 21.40 20.20 20.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.49 0.71 -5.05 12 139 2012-10-02 17:50:33 2006-06-02 18:33:47 6 1 118 0 27 137 7 147.30 21 88.56 CHANGED shhcphtusssschhltslEIp+sshsc......shhlVpshcDlssphEs......Gp.l.FhAhsh-lslPspssuss.slplslDNVsptlschl-tuhtsp..h.tlshRhYLsschssPp.-hshphslpsssh-shplospAGhhDlhNpthsphpa ...........................tth.ttsssph...lpslElsHPshsp..............shh.ls.p......s......hc.s.lss.ptcs..............Gp...hhatshshsl.shPsps.sststslshslsslss.l.sstlc..pshts...p.........h...plsaRhY.....lu.scls..sPt.thshshplpssshs..stslshpss.h.shhspth.......................................................... 1 5 10 18 +8708 PF08876 DUF1836 Domain of unknown function (DUF1836) Bateman A anon PSI2 target BIG_64 Domain This family of proteins are functionally uncharacterised. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.51 0.72 -4.28 48 1171 2012-10-04 14:01:12 2006-06-02 18:36:12 6 2 876 0 118 656 3 95.50 36 57.32 CHANGED .php-l...Psl.............DLYhDQVlphhsphhps..................................th.....chLTpTMINNYsKp...pllssPpc.KK..Yo+pplshLllIthLKsllolp-IpplLphhhss..........hshcphYpta ..................................................php-lPsl.............-LYhDQVlthhsphhss.................................h....p..phLTsoMlNNYVKp...thls...tP......K.....KK.....YsppplshLlhIshLK.s.l.holp-Ippslphltsp..........hs.pphYp..h..................................... 0 45 84 98 +8709 PF08877 MepB MepB protein Bateman A anon PSI2 target BIG_66 Domain MepB is a functionally uncharacterised protein in the mepRAB gene cluster of Staphylococcus aureus. 21.80 21.80 22.60 29.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.44 0.71 -4.59 14 546 2009-01-15 18:05:59 2006-06-02 18:40:34 5 1 530 0 56 301 2 119.70 49 79.89 CHANGED EppNs-Ycuhhhplppcp..h+hRlAKhTPsKhGhFVsFWcK.spsspNpPFshp-ss-hLlI.slhDcsppG.FlFPK-lLlcpGILpopsppGKMAhRVYPsWspsLNppApKTQpWQhpYFh-h ........EK.Np-Y-uhhFphppcs...hpsRLAKKTPpKtGYFVohWpK...D.c.s.s.pN..pPas....hcshsDhLhI.sV..........hD..-......p.....ppGhFlFP+ElLlc+sILsopppKGKMAhRlYPpWsss........LNppAppTQcWQhpYFh-............ 0 24 37 50 +8710 PF08878 DUF1837 Domain of unknown function (DUF1837) Bateman A anon PSI2 target BIG_65 Family This family of proteins are functionally uncharacterised. 25.00 25.00 25.40 25.20 24.60 23.50 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.30 0.70 -4.76 59 272 2009-01-15 18:05:59 2006-06-03 10:19:05 6 3 262 0 46 206 4 214.50 17 69.97 CHANGED cphschlhstlscaulsp................pc..hpphtppphtthht............putpph........................sphtppG...E.hGEll.Lahllcphhss......llsKh.hKss..p.thcGsDulHlthpsst......pLahGESKh..Yss...hssA........lpsshculpphh........ppsthpp-hpllpsphp.p.........t.t.tptlpchls.spps.........pphphphshshhlsa-sshh.ph.t............tpchpcplppphppphpphtp.............hp.tthtphphplhllPl ......................................................h...l.phhhs....................c....htth.htp..h.tthht..............htphh............................stphcsG...-..lGEll.ltthl.cthh.s...h.s....h.l..s+lthKss.pshsh+GsDslthh..hs..spst......plhhuEuKh.....hts...hpsu..........lpcshsslpcp...........ppp.hspphphlpsplppp.........p.shtctlcchlp.....................tt....hthshhhhhp.t.....................th...h...h.......................................................................................... 0 11 29 37 +8711 PF08879 WRC WRC Riano D, Finn RD anon Manual Domain The WRC domain, named after the conserved Trp-Arg-Cys motif, contains two distinctive features: a putative nuclear localisation signal and a zinc-finger motif (C3H). It is suggested that the WRC domain functions in DNA binding [1]. 21.30 21.30 21.40 21.70 21.20 21.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.59 0.72 -4.29 25 422 2009-01-15 18:05:59 2006-07-26 15:40:26 5 12 32 0 213 410 1 45.50 49 10.33 CHANGED -sEstRC+RTDGKKWRCs+pshsspKaCE+Hhc+u+pRsp+phcs.s .......sEPuRCRR....TDG.....KKWRCS+cshs.spKYCE+H.h...pR...G..R....pRS+K.l-..t.............. 0 23 124 179 +8712 PF08880 QLQ QLQ Riano D, Finn RD anon Manual Domain The QLQ domain is named after the conserved Gln, Leu, Gln motif. The QLQ domain is found at the N-terminus of SWI2/SNF2 protein, which has been shown to be involved in protein-protein interactions. This domain has thus been postulated to be involved in mediating protein interactions [1]. 23.30 23.30 23.70 23.30 22.20 21.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.04 0.72 -7.66 0.72 -4.54 43 563 2009-01-15 18:05:59 2006-07-26 15:44:28 6 26 209 0 287 521 0 36.80 41 3.82 CHANGED ssFossQhppLcpQhhsaKhlsp.....shPlPscLhhslpp ....sFossQhppL+tQlhsYKh.Lup.....utPlPspL.hslpt..... 0 51 141 221 +8713 PF08881 CVNH CNVH; CVNH domain Bateman A anon Bateman A Domain CyanoVirin-N Homology domains are found in the sugar-binding antiviral protein cyanovirin-N (CVN) as well as filamentous ascomycetes and in the fern Ceratopteris richardii. 22.10 22.10 22.80 22.40 21.90 21.90 hmmbuild -o /dev/null --hand HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.47 0.72 -3.81 34 277 2009-09-10 22:09:16 2006-07-26 15:50:08 5 8 82 44 218 294 10 104.10 24 56.75 CHANGED sFppSuccI+l.....pssphLsAcspsssG.p....hhsucIcLsphlGNss........GpFhW....................................................................sGtNFoco..Acslcht...ttptsslLcApLtsscGphtspp.lsL.sE+IsNpsGpLpa ..............................Fttosp.shpl....................ssp.hLtApC.p...st..s..G..p..........hh.p.o.pl-.LNph....lG.Nss....................Gph..ph............................................................................s.s.ssFspo..s..pshplp..............ts.h.L...pup.h.......ts........p..G..t...h....h......tsp....lsL.....s.c....pl.t.NtsGpL........................................................... 0 29 118 190 +8714 PF08882 Acetone_carb_G Acetone carboxylase gamma subunit Bateman A anon PSI2 target BIG_95 Domain Acetone carboxylase is the key enzyme of bacterial acetone metabolism, catalysing the condensation of acetone and CO(2) to form acetoacetate. 20.80 20.80 21.30 37.70 20.20 20.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.52 0.71 -4.05 24 126 2009-01-15 18:05:59 2006-07-26 15:56:01 6 2 113 0 52 118 9 111.40 52 68.55 CHANGED Vshs-hllLsLuschalsps...........CGHcFushRcNaKhtshlasRDscEhhptl.....YsthhAPDspWppIhEYYCPpCGshh-sEhssPhaPllHDhEsDI-uhhpcWlt .............VsacD+IlLPLGsHLalVQssp.p+..hll+C.pCGHsFsssc-NWKL+A.IYVRDTtEthcEl.....YPclhAPDspWQVhREYhCPsCGhhl-VEAsTPWYPVlHDFEPDI-sFY+-WL.G..... 1 17 31 44 +8715 PF08883 DOPA_dioxygen Dopa 4,5-dioxygenase family Bateman A anon PSI2 target BIG_92 Domain This family of proteins are related to Swiss:P87064 a DOPA 4,5-dioxygenase that is involved in synthesis of betalain. DOPA-dioxygenase is the key enzyme involved in betalain biosynthesis. It converts 3,4-dihydroxyphenylalanine to betalamic acid, a yellow chromophore. 25.00 25.00 33.20 32.80 23.50 21.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.61 0.72 -3.95 44 382 2010-01-08 13:27:16 2006-07-26 15:58:34 6 3 347 6 160 314 37 105.30 39 76.16 CHANGED YHAHVYF...c.tsshptApsLpcpltccFs.......lphG..+l+p....+sVGPHP...hh.aplsF.ssc.FspllsWLtlsRssLoVLlHP.T.G-...-LtDHT.cpAhWLGcplsLslshh .........aHAHl.YF..........s.ssptphAptLpcpltccFs..............lplh.....phap......................+.VGPHP...th.aplsF....ssspa.ssllsWLslsR.GsL.oVLlHPsT....G-..............-htDHp.-pAhWlGcshsLshshh....... 0 36 83 130 +8716 PF08884 Flagellin_D3 Flagellin D3 domain Bateman A anon Bateman A Domain This domain is found in the central portion bacterial flagellin FliC. The domain contains a structural motif called a beta-folium fold [1]. Although no specific function is assigned to this domain its deletion leads to a reduction in filament stability [2]. 21.40 21.40 21.40 21.40 21.10 20.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.34 0.72 -3.80 34 687 2009-01-15 18:05:59 2006-07-26 15:59:46 6 4 182 3 5 483 1 88.90 36 20.39 CHANGED ushshouhs..ssslKsus.............ss...ssussuspsuclhaDs.sscYYlcVsuhst...su+sGaYcVsVss......sGpVohsssssp..tsstPs...usspVTpsQ ..............................s.T.shouhs..sssIKAus................Gu...ssuTssstsuslpFcs..ssKYYspVsGhss...suKsGhYEVsVss............sGpVohsussTp...sshPs...usoslTcsQ...................... 0 1 1 3 +8717 PF08885 GSCFA GSCFA family Bateman A anon PSI2 target BIG_88 Domain This family of proteins are functionally uncharacterised. They have been named GSCFA after a highly conserved N-terminal motif in the alignment. Distant similarity to the Pfam:PF00657 lipases suggests these proteins are likely to be enzymes. 25.00 25.00 29.90 29.50 22.50 19.80 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.72 0.70 -4.83 49 313 2012-10-02 11:02:24 2006-07-26 16:17:02 6 8 248 0 91 323 73 208.60 32 70.18 CHANGED splhshGSCFApplGctLptttapsh..........................sssaGs.lYsstslhphlppuhspp..s....tpslatts...s+aash...shsshuhsotpclhpphspplp..........ps+ptlppushllhTLGhs.sappppssplhssC.tlsptpFscch.....holsEIhpshpthhshlpsl..NPpl+llhTVSPV.hht.......cshsuNphSKusLhsAsppl...sp................pp..s.cs..........tYFPSYEIlh.sphpchpaYssD...hhHssptuVsalhcpF .....................p.plhhhGSCFApplupthttt..taph............................ss.aGh.lasPhul.phlpphhtt..........ttthh...t............thahs....tht..s..s.tthhtthpttht..........thtphh.phshhllTLGoshsah........pps.......utl...ls.NCpp.h.stp.Fpcph.....lolpEhhpshpthlphltth.......NP.ph+llhTVSPlRahp.......cshhtsphSKusLhhAhcplhp..........................t...p..ps.............hYFPuYEIlh..DcLRDYRFYtpD...hhHsst.ulpalhcpF.................. 0 29 64 78 +8718 PF08886 GshA Glutamate-cysteine ligase Bateman A, Eberhardt R anon PSI2 target BIG_70 Family This is a rare family of glutamate--cysteine ligases, EC:6.3.2.2, demonstrated first in Thiobacillus ferrooxidans and present in a few other Proteobacteria [1]. It is the first of two enzymes for glutathione biosynthesis. It is also called gamma-glutamylcysteine synthetase. The structure of this family has been solved, and is similar to that of human glutathione synthetase and very different to gamma-glutamylcysteine synthetase from Escherichia coli. 25.00 25.00 77.70 77.60 20.30 19.70 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.23 0.70 -6.16 27 271 2009-09-11 17:05:15 2006-07-26 17:16:03 6 1 265 1 85 225 152 400.70 57 94.39 CHANGED Ec+lLsppssIEcWFRtpWpcassPFYsSVDLRNuGFKLAPVDTNLFPuGFNNLsPphhPLslQAshuAlp+.hCP-A+plLlIPEsHTRNtFYLpNVstLppIhppAGhpVRlGSlsspIscsTslpLssGpplslEPLhR...sssRLul.csFsPCsILLNNDLSuGlPplLpsl.-QhllPPLHuGWssRRKSpHFpsYccVup-FucllsIDPWLINPhFspCsslsFtsppG.EsLAspVDslLs+IRcKYccYGIc-cPFVlVKADsGTYGMGlMoV+suc-lhsLNRKpRNKMuslK-Gh.VscVIlQEGVhThEphss..AVAEPVVYMlc+aVVGGFYRlHssRGtcENLNuPGhcFhPLuFppss.hPp.tt..Psss........................sNRFYhYGVlARLAhLAAuhElc ......Ep+ILsspssIEpWFRhpWpcHsPPFYsSVDLRNAGFKLAPVDsNLFPGGFNNLsP-hlPLAlQAA.uAl-+.hCP-AKslLlIPEs.....HTRNsFYLpNVttLspIh+pA.GhpVRlGSLsPpl...sEsTplpLssG.ppllLEPLhR...spcRlu..........L....c....s...FsPCsILLNNDLSAGlPsILcsl.cQhlLPPLHuGWssRRKSsHFusYccVApcFAKllsIDPWhINPYFspssGlDFpt...+pGc-sLAcuV-tVLpKIpcKYcEYGIs-+PaVlVKA.DAGTYGMGVMoV+susEltsLNRKpRs......KM.utsK-GLtVo-VIVQEGVYTaEpl......ss....AVAEPVVYMhDRaVlGGFYRVHsuRGtDENLNAPGMcFVPLuFpps.shPDsptc..PsAs.........................sNRFYhYGVlARLuLLAASlELE............................. 0 18 52 68 +8719 PF08887 GAD-like GAD-like domain Bateman A anon PSI2 target BIG_90 Domain This domain is functionally uncharacterised, but it appears to be distantly related to the GAD domain Pfam:PF02938. 25.00 25.00 33.90 33.50 24.60 23.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.13 0.72 -3.98 14 185 2012-10-01 21:10:01 2006-07-26 17:18:58 6 2 130 0 28 187 5 100.20 28 50.71 CHANGED DcsaphhLcpFG.shcpp.lssus.I..-+Y+s+LP-tLLcYWp-cGWsuau-GlFWhVNPp-YcsllssWltGs.hhst-sh+llARoAFG-lalWuEpsuhslplsshhs ...................................tp.hstp..I..c+Yc.st.LPspLlphWpcaGausahsGhhtllNPp-Yp....sllpphh.................ctc.shhslhpoAFGDlhhWtcppsh.........h................ 0 5 12 19 +8720 PF08888 HopJ HopJ type III effector protein Bateman A anon PSI2 target BIG_86 Domain Pathovars of Pseudomonas syringae interact with their plant hosts via the action of Hrp outer protein (Hop) effector proteins, injected into plant cells by the type III secretion system. The proteins in this family are called HopJ after the original member HopPmaJ [1]. 21.30 21.30 21.50 22.80 20.90 20.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.43 0.72 -4.06 39 245 2009-01-15 18:05:59 2006-07-26 17:21:56 6 2 245 4 74 224 31 109.60 47 92.81 CHANGED M.......slp-...hlppLpspspplpFs-ThslI-ppYcaoPsuFpNG.s...lhNpAGpNpGSCKlFuFAplpsLocppTLtsFGpaYR.-VLtsPcGsDHtNIRsFh.p...pGWsGlpF-spsL ..................lpshlspLps..pphpFsDslAhI-spYsasPsuFpNG....s.....hpNsAG.pNpGSCKlFuFApLpuLopppTLtsFG-aYR.sVLssP-GsDHtNIRsFh....p....pGWsGlpF-upsL.... 2 22 45 63 +8721 PF08889 WbqC WbqC-like protein family Bateman A anon PSI2 target BIG_75 Family This family of proteins are functionally uncharacterised. However it is found in an O-antigen gene cluster in E. coli [1] and other bacteria [2] suggesting a role in O-antigen production. Feng et al. suggest that wbnG may code for a glycine transferase [2]. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.34 0.70 -4.89 67 583 2009-01-15 18:05:59 2006-07-26 17:27:58 6 4 429 0 184 571 435 164.00 24 92.01 CHANGED QPhahPalGYFphlspsDpFllhD-spa.p+pu..WhNRNpI.tssp..........G.phlTlPlp.......cspppp.pIp-hplssst..Wpc+phpslppsY.u+APaF.cphhshlcslap..pp.ppLschNhphlphls...chLuI..psplhhoSchph....stppsc..+llslspphuAspYlsu.uupshhp.p.....Ft.ttulplpahpht.h.tYsQ..hts...t.FlPslSIlDlLhssG.scshphlh ...............................................s.ahs.htaa.ph.lhth..-...hhl..pp.pa....+.ps..ahNRshI...hs.s.p..........u......hlolPl.........p.t...p.....hp-hplssp.........Wtph+hpsl.pt.sY.tpuPaF.p.h.s.ht.hht.....pphp.L.phN.phhphlh...ph...lt.l..p.p.h..opphth........tt.p.h..ht.tt.......................................................Y.Q.....t......F.stlSllDllh..u..pt.....h................................. 0 65 135 164 +8722 PF08890 XkdN Phage XkdN-like protein Bateman A anon PSI2 target BIG_83 Family This family of proteins are functionally uncharacterised. They are found in prophage sequence in various bacteria. 22.30 22.30 22.30 24.80 21.50 22.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.68 0.71 -4.42 23 222 2009-01-15 18:05:59 2006-07-26 17:31:39 6 1 160 1 58 199 8 132.50 24 93.15 CHANGED slp-...hlhpphhpp.ch..p..shhcRhss..G..l.aph+ulotcchscl+ccsTpppphht.......thppchDpscapupllhtuslpPs....hpsp-Lpcuass.ssc.Ell++h...L.hsGEhsslsspltElsGa.-ssh--...lEE...hK ...........................................................................hh.tphhtt.ph....p..hhh.cRhh....p..s..t.h.hpl+sloscchpclccpssphpt.ht...............thhpchDpp.pa..sp..llhpus..l.P-......................hcspELp.....c.u.....a.....s.....s.h.s.st.-ll+ch...L..hsGEhsslhstl.-lsGa...sp..p.......hp-...l.-E.hK............. 1 29 49 51 +8723 PF08891 YfcL YfcL protein Bateman A anon PSI2 target BIG_80 Domain This family of proteins are functionally uncharacterised. THey are related to the short YfcL protein from E. coli. 25.00 25.00 25.70 25.30 22.60 22.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.76 0.72 -4.00 29 716 2009-01-15 18:05:59 2006-07-26 17:33:15 6 1 710 0 84 220 6 86.50 67 94.87 CHANGED ltcaEppl.shIDshVppuoD.DELFAuGYLRGHluLulAphE.cspp.slcsLcs+lppSLppA..puELussDpsLVpshWppLttp ......IAEFESRILALIDs..MV-HASD..DELFASGYLRGHLTLAlAE......LEu....uDDH.Ss.pAV+ss....VSQSLEKAIuAGELSPRDQALVssMW-sLFpp.... 0 8 27 55 +8724 PF08892 YqcI_YcgG YqcI/YcgG family Bateman A anon PSI2 target BIG_93 Family This family of proteins are functionally uncharacterised. The family include YqcI and YcgG from B. subtilis. The alignment contains a conserved FPC motif at the N-terminus and CPF at the C-terminus. 20.80 20.80 21.20 20.80 20.40 19.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.49 0.70 -4.78 34 324 2009-01-15 18:05:59 2006-07-26 17:39:34 6 3 265 0 87 286 8 212.20 33 84.71 CHANGED pptacpFpptlts.....ssFPClhuppuhppspL+hsFhs..p.tptpsspplspsLtcalct.cshs.phs...SLllhFcss......pphohppacchhWphLppLpptDs...psWPpclPsDPccspWpFsFuGcshFlhsssPua..tpRpSRphsh.hhlsFpPRhlF-cl..psssp.hup+h+ctIRpRlttaDshshpPpLutaGt.-spEW+QYhlp--sp.............scCPFptt ..............................t..tappFpphlh-......sFPChhuhpuhppsplRYsFls......ppshpclspslhpalchh+-p...s.hhp......uhhlFhcs.......cctol-tacchaWplLphL+cpDs...psWPppIPpDPcc.tWEFsFuGEPhFlhsssPua..ppR+oRp.h...us...hhlsFQPRtlF-s.l..puspt..tupph+phlRpRlppaDp.hPhHPsLupYGs.cppEW+QYhltD-.p..........h.u+CPFp..h.................................... 0 22 54 70 +8725 PF08893 DUF1839 Domain of unknown function (DUF1839) Bateman A anon PSI2 target BIG_68 Family This family of proteins are functionally uncharacterised. 20.50 20.50 20.70 187.00 17.90 20.40 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.91 0.70 -5.46 13 104 2009-01-15 18:05:59 2006-07-26 17:48:32 5 1 96 0 42 97 5 312.40 50 90.29 CHANGED hsshs..scshssHuLHus-tlWsETNCYlDLWIELLHuhGLsPhAALuFTVsQDFEGDQFTFFKaP.tDLEpLYGlsVpELAlYDsLEsHVtsQlsRG+lVLlEVDSYaLPDT+GsuY+ccHsKTTIAIDhlDs-AptluYFHssGYHphpGEDYcGlFph.sshppsss.hhPYsEhsKpstsshspssLhcAShsLLRpHLsRRPcpNPlstFRpsFstcl-plhsRs.saFHhYuFNsLRQLGANFELhu+aLcWLstpGpssPts..supustoIAoEAKVlQFRLARAVhR+KsDsspssLDsLEsAappslsuLApp .......ht...cscpYcPHsLHusphlWppTNCYVDLWIEVLthhGLsPhAALsFTlo.DFEGDQFTFFKaPhEDLEpLYGIhVQEhAIa-sl-sHVEpQlAR...GpLhLVEVDuaaLPDTRGsoY+ppHsKTTIGIDsIDhptRpluYFHNuGYahh-GtDYDGLFtt.ssht........LhPYVEhAKRph.t.PL-.cptLs-sShsLLp+HLpRRPssNPIsAFRpthstcscslAsp.....PhsaFHtYoFNoLRQLGANFE.Lhu+YLcWLpssGtsuP.h......s........hssAscpIASEAhVlpFRLARAsuRuKp-cscusLDhlEpAassllsslut.h...... 0 4 16 25 +8726 PF08894 DUF1838 Protein of unknown function (DUF1838) Bateman A anon PSI2 target BIG_43 Family This family of proteins are functionally uncharacterised. 25.00 25.00 50.30 26.70 21.20 16.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.74 0.70 -5.15 9 37 2009-01-15 18:05:59 2006-07-26 17:54:37 6 2 34 0 19 34 76 224.60 36 82.65 CHANGED cupslaahWpGplauhhsGE..cc+LFsl.GMsls+Chshs-ut....uachloRElhhYhDPpTG-lLcpWcNPWoscsVsVlH.............VANDPVp.............uhhptphsh..tG-phsashsl.LhYPNPLus-.Qht...sGshYpAsELFphhsspssLtss-sso.ssspluWsRlusWLPWMtMG.sR.GhLhapstGpKlsSh--Ls.hhpppIss+hPhYtpsPcphs-t.N.TSWhYF+phhc ........spplhahWpGclYuhhPGE...chLFthcGhNVu+sl.ps-ut.t..uachloRElhhYh......DPsTsElLcpWcss...t.ssVlH....................VhNDPss....................................................tthh.h.hshp..hGsphshshpl.LtYPsPLss.hQh.......sussYcAhELFpahsspssLt.sst.ss..ssphuWsRhusWLPWMthG.sRsGhLhapstGpKls.uas-lPthhps.IsschP.atpsPpphs-.sN.TSWhaF+ph................. 0 8 14 18 +8727 PF08895 DUF1840 Domain of unknown function (DUF1840) Bateman A anon PSI2 target BIG_69 Domain This family of proteins are functionally uncharacterised. 25.00 25.00 25.70 34.00 23.10 24.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.43 0.72 -4.01 41 231 2009-01-15 18:05:59 2006-07-26 18:01:24 6 2 210 0 91 204 18 103.90 36 96.15 CHANGED M.LlTF+S+AsuDlhMht-lApplLpllG+.......ssss.GsIss--lPsAlppLcsAl....tuppstssssp...........pppccstctsluLuQRAhPLl-hL+tAtc..pss-VhW ......MllTF+S+AssDlsMhcDhAhhlLtllGK.......phs.cGlIos--lssAIs+L-sAlstt.ptptppssp.p........................tsctccppp-.sluLuQRAhPhlcML+pAtt..pss-VhW..... 0 15 47 72 +8728 PF08896 DUF1842 Domain of unknown function (DUF1842) Bateman A anon PSI2 target BIG_72 Domain This domain is found at the N-terminus of proteins that are functionally uncharacterised. 20.20 20.20 20.30 21.60 20.10 17.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.38 0.71 -4.35 17 105 2009-01-15 18:05:59 2006-07-26 18:20:34 5 2 61 0 24 64 0 115.10 44 60.12 CHANGED sGLFsspYhl.....ussh.GAPsLpLsLLVsTsc+pVsGpApIoQusp.PPlshcucVWGpao.htl.ss....upspIlloLpGs......uGPtSs.ht.sF+LchlLssDWps...GsAsYcYh.psGpW .............sGLFPVpahV.....uTsh.GAPsLhLsLlVsT.s-+oVsGhA..p..ITQ.uVs.PPLsF+AcVhGsas.htl.P.....ussplhloLpGss.......uGPhusthh.sFclchlLsssWpo...GsAoYRYa.psupW.......................... 0 4 7 12 +8729 PF08897 DUF1841 Domain of unknown function (DUF1841) Bateman A anon PSI2 target BIG_71 Domain This family of proteins are functionally uncharacterised. 21.00 21.00 21.30 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.35 29 307 2009-01-15 18:05:59 2006-07-26 18:20:47 6 1 298 0 91 253 52 132.80 45 88.24 CHANGED PoR-pl...RpFFh-sWpKtpsspsLosLEshAschItpHPEYHshLp.ssEstLspDYsPEtGpoNPFLHLShHLuIpEQlSIDQPsGIRssacpLst+.hss.H-ApHphMECLuEhlWpAQRsGssPDsssYlpslccp .............................spc-V...R+FFhcsWpKphssp.LosLEthAschIttHPEYHt.Lp.sh-p.tlsp-ah......PEpG....coNP..FLHlShHLuIpEQluIDQP.GIRssa-pLsu+..sshH-ApHt..hMEsLuEtlWpAQRt.Gps.PDsssYlsplp+.h... 0 22 55 74 +8730 PF08898 DUF1843 Domain of unknown function (DUF1843) Bateman A anon PSI2 target BIG_72 Domain This domain is found at the C-terminus of a family of proteins that are functionally uncharacterised. The presumed domain is about 60 amino acid residues in length and is found independently in some proteins. 21.70 21.70 21.70 48.10 21.60 21.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.49 0.72 -3.93 15 126 2009-01-15 18:05:59 2006-07-26 18:24:54 5 3 42 0 24 71 0 53.00 56 33.94 CHANGED PhYGVAIQpAhASGDLupMKuLsupA-pQLsptsplpsAlptLcsEIARLEtR .shYGVAIQpAtASGDLu+MKoLustAcpQLuspspIAuALptLcsEIAKLEuR. 0 6 12 15 +8731 PF08899 DUF1844 Domain of unknown function (DUF1844) Bateman A anon PSI2 target BIG_76 Domain This family of proteins are functionally uncharacterised. 20.70 20.70 21.70 21.30 20.20 19.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.25 0.72 -4.06 18 82 2009-01-15 18:05:59 2006-07-26 18:29:23 6 1 81 0 54 80 123 74.50 40 62.58 CHANGED plsFssFlhSLuoSAlhpLG-hPcPpoGphpc.sL.h.......A+poIDlLuMLpEKT+GNLos-Es+lL-slLh-LRh+Y .........lsFssFlhSLsooAhspLGchssPp.oGphpp.sL.h........A+....poIDlLsMLpEKT+GNLss-Ep+lLcslLa-LRhpY.. 0 33 46 53 +8732 PF08900 DUF1845 Domain of unknown function (DUF1845) Bateman A anon PSI2 target BIG_78 Family This family of proteins are functionally uncharacterised. 20.40 20.40 20.40 20.70 20.30 20.20 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.27 0.70 -5.05 25 252 2009-09-11 12:10:13 2006-07-26 18:32:17 6 2 206 0 69 241 11 209.90 34 82.41 CHANGED ptphGuL..RSslslsLHTpaAhRLWp.GRptpcscp.............uIhGhssahuhhsplppu.uppDDPYADhallplEcpltpucpplpphspplcthhsp.lPsslslucstSlpPlplsLahsosLGapsVaLLscaDpLspplhpApHhuLlu+pctpchlcpGu+hlRplFulsQpY.......RasGloRcDht..tsNA+uppA...lc+hG.clPt-lLpGp+RSs..FuP..lsps ...........................................p..phGuL..+SshslsLHT+aA.RlWp.GR...tttcscs.......................sIlGhstalshhsphpps.utpDDPYuDhhhlplEcclppsp....pphpslt.ppl-phhus.lPsslslucshslp.....Plp..lslalsssLGapsVaLLssaDpLs++lhhApHhuLIs+sphc.p....hLscGu+hl....RplaulsppY.......+.hsGsoRsDht.....tpNutuptA......hcchG..clPp-lLpGp+RSp..FuP.sl...p................................................... 0 11 34 54 +8733 PF08901 DUF1847 Protein of unknown function (DUF1847) Bateman A anon PSI2 target BIG_82 Family This family of proteins are functionally uncharacterised. THey contain 4 N-terminal cysteines that may form a zinc binding domain. 25.00 25.00 28.40 28.20 21.60 20.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.01 0.71 -4.72 31 185 2009-01-15 18:05:59 2006-07-26 18:49:59 6 2 169 0 70 167 5 151.60 44 73.44 CHANGED Csstp..tchlccshp.Yppc.pst+lupsAAplEuptY...........s+hTRlEElltFA++hGa++lGlAhClGLhpEu+lhscILcspGFElhSVhCKsGul-KsplGltcp...p.sshEuhCNPIhQAclLNctpT-LNlllGLCVGHDhLFhKaScAPlTTL ...................C.s.t....c.hppshphYpp-.pst+ltpsu.AplEuphY...........sc.hTRlEEllpFA++hGaKKIGlAhClGLhcEA+hhscIL.ctpGFE.lhSVhCKsGulsKsp.lGltcp....p.s.shEshCNPIhQAclLNctpT-LNlllGLCVGHDoLFhKYScAPlTTL........... 0 27 49 61 +8734 PF08902 DUF1848 Domain of unknown function (DUF1848) Bateman A anon PSI2 target BIG_74 Family This family of proteins are functionally uncharacterised. The C-terminus contains a cluster of cysteines that are similar to the iron-sulfur cluster found at the N-terminus of Pfam:PF04055. 24.80 24.80 24.90 29.80 23.90 24.70 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.78 0.70 -4.86 33 307 2009-01-15 18:05:59 2006-07-26 18:54:42 6 1 280 0 86 291 10 258.50 39 84.80 CHANGED IISASRRTDIPAFYu-WFhsRl+cGaVhVpNPass+ploclsLpPcsVDulVFWTKNPtPhl...shLsElcshuasYYFpFTlTsYspp.lEPsl.....Pshpp.tlcsFppLSctlG.-RVlWRYDPlllosp..hsh......caHhcsFpplAstLs..GaTc+ClISFlDh..YpKscpshppls.....hh.psspc....chtplupphupI.....Apca.ulpLpoCu-chs.LsthG...ItpupCIDtpllccl.....hGpph...........................thtK.D+uQR..ptCGChcSpDIGsYs..TChHGClYCYA .........................IlSsSRRTDIPAFYucWFhsRl.+..pGalhVpNPaN.pplo+lsLss....csVDslVFWTKNPtP.hl...spLppLpp....a.t.aYFpaTlTsYsc-.lEssl......Ps.hp.c.hlc.sFpcLSc.tl...G.tc..+llWRYDPIl.losp...hsh........paHhctFppluppLp..Ga.Tc.+sllSFlDh..YpKsppshtpht....................h.th.spc..........chhplupphscl.......Ap..p..a.....sl..plpoCuEphc....Ls.thG....lpputCIDtpll.ccl.....hGtp.l.........................................phtK..DpsQR..ptCuChtSh.DIGsYs..T.Ch+GChYCYA........................ 0 43 70 78 +8735 PF08903 DUF1846 Domain of unknown function (DUF1846) Bateman A anon PSI2 target BIG_73 Family This family of proteins are functionally uncharacterised. Some members of the family are annotated as ATP-dependent peptidases. However, we can find no support for this annotation. 25.00 25.00 45.40 45.30 24.30 24.10 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.76 0.70 -6.04 22 660 2009-01-15 18:05:59 2006-07-26 19:20:06 6 3 644 4 66 448 23 479.30 62 99.14 CHANGED +IGFDs-KYLchQScHIhERlspFssKLYLEFGGKLhDDaHAuRVLPGF-PDuKl+hLpcLKDpsEIlIsIsAsDIE+sKlRGDhGITYDpDVLRLIDsa+shGLhVsSVVITpYsu.QsuAshF+p+LE+hGIKVYhHYsItGYPo-lchIVSDEGYGKN-YIEToRPLVVVTAPGPGSGKhATCLSQLYHEaKRGlcAGYAKFETFPlWNlPLKHPVNlAYEAATADLsDVNMIDPFHLEAYGcTsVNYNRDVElFPVLpphhE+IhG.cSPY+SPTDMGVNMsG.sIsDD-Asp-AS+QEIIRRYapshs-htpstssccplp+lcLlMppsslospDRtVVssAhphA-cs............ssPusAlELsDG..pIlTG+TSsLhsuouAhlLNAlKcLAsIsc-lcLISPpsIEPIQpLKsphLGS+NsRLcs-ElLIALSloAsosssAphAhcpLspL+GC-sHoosILossDcpshRKLGlplTs-PhYpsp.pLYp ...........................K.uFDsEpYLphQp-HILERIsQ.F...D.GKLYLEFGGKhl-DaHAuRVLPGa-PDsKI+lLpcLK-QVElVIsINAusIE+sKsRGDLGIoYDp-VLRLIDpFp-hGlaVGSVVITQYsG.QPAA-sF+spLE+pGIcsYhHYsIcGYPoDh-+IlSsEGhGKNDYIcToRsLlVVTAPGPGSGKLATClSphYH-phpGl+uGYAKFETFPVWNLPL+HPVNLAYEAATADLcDVNMIDPFHLpsYGcTTVNYNRDIElFPVLKRhlE+IhG..cSPYtSPTDMGVNMVGFuIsDDEAshEASKQEIIRRYYpTllDaK.tpp.ls-ssVcKIELLMsclGlossDR+VslsARpKAEcT..............GuPAlAlELPsG..pIVTGKsS-LhGsoAAsLlNAlKphAsIs.cEl+LIpP-slcPIQsLKhcaLGS+NPR.LHosElLIALuIoAspNPsAtpAh-cLspL+Gs-AHSTlILocpDcslLRKLGIsVThDPhYphc+LY................ 0 31 47 58 +8736 PF08904 DUF1849 Domain of unknown function (DUF1849) Bateman A anon PSI2 target BIG_67 Family This family of proteins are functionally uncharacterised. 25.00 25.00 102.80 102.40 16.90 16.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.49 0.70 -5.05 26 192 2009-01-15 18:05:59 2006-07-26 19:20:35 6 1 186 0 65 145 128 251.00 40 87.83 CHANGED usAtAAuussLssHRAlYDLsLtcopsptulsuhpGRhlY-Fs.GsuC-GYoscaRhVoplpss-sssploD.posoaEstcG+sa+FpscohsssphsppVcGsAcps..scshpVclcpPcscshsLs.tslFPTpHhhclIctAcsGcphhpsslFDGo-sucKshpTsslIGcstssssssssst.ts...........................................phsshstWPVTluYFs....ssss..-thPsYphSFcLaENGVoccLslDYuDFslsGpLucl-lhcsp .......u.AtAuuuspLlPHRAlYDLoLscAsppoulsuhsGRMVYEFs.G..SAC-GYTosFRhVocls.s-pspcloDppoTTaE-uDG+sFRFhs+ohsscpls..ccVcGsAchp....sst.ssV+Lp+PcppolsLs.uo.FPTcHhtclIstAcuGp.phhpsslFDGS-cu-+lhsTosllG+tpss.ssscs.cs....s...........................................chuptphWPVTlAYF-......cppp....-thPlYchsF+LYcNGloRcLshDYGDFshpGcLscL-lhcp.s... 0 16 37 46 +8737 PF08905 DUF1850 Domain of unknown function (DUF1850) Bateman A anon PSI2 target BIG_87 Domain This family of proteins are functionally uncharacterised. Some members of this family appear to be misannotated as RocC an amino acid transporter from B. subtilis. 21.00 21.00 21.00 25.80 19.70 19.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.08 0.72 -4.17 50 249 2009-01-15 18:05:59 2006-07-26 19:21:19 6 2 241 0 113 252 29 90.80 25 58.67 CHANGED pFoLpWsHSVE+spWpEsaplssss....LhLscschcuhGAGM.....csssssphcsGhahap.shs.shspls.ltsos..susapLshssp.....s.......hsLsp .......FoltapHSVE+sthpEsaclssss....LhLtcs+hpuhGAGhs.......c.ss..sst.h.c.sG.hahhp..ht.sh..s..p..lp..ltsus..supaplshssp.....ph.l..t.................................... 0 37 76 94 +8738 PF08906 DUF1851 Domain of unknown function (DUF1851) Bateman A anon PSI2 target BIG_90 Domain This domain is found at the C-terminus of a variety of proteins that are functionally uncharacterised. 27.40 27.40 27.40 27.70 26.30 27.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.40 0.72 -4.00 31 216 2009-01-15 18:05:59 2006-07-26 19:26:17 6 2 152 0 36 227 5 76.00 30 42.01 CHANGED pphspplptFhhthpssph..s........hcs.....LFctAtccLG.LctDEhYGFsPALsLGGstslcsLpKlchhEHLhlLuQlss ....................................thp.Fht.hpspp.............hcc.ht..happAlc+h..G..LphsEhaGasPhLsLGGttcl-NLpKV+hhEHlhllsQl..s........... 0 11 20 28 +8739 PF08907 DUF1853 Domain of unknown function (DUF1853) Bateman A anon PSI2 target BIG_96 Family This family of proteins are functionally uncharacterised. 25.00 25.00 27.30 27.20 24.50 24.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.85 0.70 -5.23 43 413 2012-10-11 20:44:44 2006-07-26 19:26:35 6 3 401 0 93 362 50 259.20 28 88.53 CHANGED RDLuWhLtoPsLlssss.......................thhttaLpphDts..sssltphh................tsspR..LGhYhEpLhpahLp.....pssshcLlApNltlp.pps...pTLGELDaLl+s..sssphhHhElAlKaYLh..........ssssspt...........spWlGPNtcDpLcpKLs+lhp+QLsLuppstspthLsph......tssppplhhpGhLFhP..........hsssssssttlsssplpGhWhphc.-a.ths..........sttathLs+hsWLu..ssth..............sthhstpplpphlpphts............P.hlhth........tsthpEspRhFlV ........................................RpLsalltuPsLhpsts.................................tthhshLttL-t.s...stsLtcaht................ts..hR...LGhhhEpLhtahlp.....psP.thcl..lAtsl..tlp..ssG....pTlGplDFLlct.....tsp..p..h.HhElAlKaYLt..............................p..................ssahGPNspDpLstKhs+hhpHQL.Lopp..st...hhphh.th..............tshptphhlpGhLahs.................................stsshs...slsspphpGhWhptp.-.htths.................ttahhLs+.sWLsstph...................tts.h.tttth.................................................................................................................. 0 16 47 75 +8740 PF08908 DUF1852 Domain of unknown function (DUF1852) Bateman A anon PSI2 target BIG_94 Family This family of proteins are functionally uncharacterised. 25.00 25.00 134.20 134.10 21.50 20.70 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.84 0.70 -5.65 27 294 2009-01-15 18:05:59 2006-07-26 19:27:27 6 1 290 0 85 262 19 317.50 70 97.96 CHANGED Msp-FsFoIKsIpFDEsYpPS-sTRlTTNFANLARGcsRQENLRNsLpMIDNRFNsLAaWDNPpuDRYoVELEIISV-hcI..cGsupsFPhIEILKTsIlD+KTscRI-GIVGNNFSSYVRDYDFSVLL.-HNKspspFSsP-sFG-LHGKLFKpFlsSssY+ppFsKPPVICLSVSosKTYHRTtNpHPVLGlEYpQs.....EhSLTDpYFpKMGLpVRYFMPsNSVAPLAFYF..hGDLLsDYTNLELISTISTMETFQKIYRPEIYNANSsAGphYQPSLcaQDaSLTpIVYDREERSQLAlcQGKFsEEHFIKPYpslLEQWSAs ....MspcFTFoIKolpFDENYpPS.-sTRITTNFANLARGpsRQENLRNsLpMIDNRFNuLAHWDNPpuDRYoVELEIISV-hsI........cusupsFPhIElLKTsIVD+KTscRIEGIVGNNFSSYVRDYDFSVLL.-HNKspspF...SlP-sFG-LHGpLFKpFlNSssYKtpFpKsPVICLSVSos+TY+RTpNpHPVLGlEYpQs.....-hSLT-pYFpKMGLpVRYFMPsNSVAPLAFYF..hGDLLsDYTNLELISTISTMETFQKIYRPEIYNANSsAGpsYQPsL+ppDaSLTpIVYDREERSpLAl-QGKFsEEpFIKPYpslLEQWSA............ 0 11 29 59 +8741 PF08909 DUF1854 Domain of unknown function (DUF1854) Bateman A anon PSI2 target BIG_97 Family This potential domain is functionally uncharacterised. It is found at the C-terminus of a number of ATP transporter proteins suggesting this domain may be involved in ligand binding. 21.30 21.30 21.30 29.70 19.90 21.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.45 0.71 -4.56 22 103 2009-09-11 14:44:06 2006-07-26 19:28:22 6 3 102 0 52 97 16 131.80 42 45.30 CHANGED ps+-GVsPVRAFPIuAPscGluLlstDG+ElhWl-cLspLssssRtLlEp-LAsREFhPpIp+IpsVSoauTPSsWpVpTDRGpsphVL+GEEDIRRLsups.LLIsDscGlpahItDhssLD+cSRKLL-RFL ..................................sacuVsPVRAFPIosPscGlSLh....s.s-G+ElhWI-cL.s.cLssssRpllEp-LAtREFhPpIp+IhsVSoauo.PSsW..pV-TDRGtsph....sL.+.G.E.EDIRRL.sups.LLIsDscGlpahI.DhpsLD+pSRKlL-RFL...... 0 11 33 48 +8742 PF08910 Aida_N DUF1855; Aida_N; Aida-C2; Aida N-terminus Mistry J, Sammut SJ, Coggill P, Zhang D, Eberhardt R anon pdb_1ug7 Family This is the N-terminal domain of the axin interactor, dorsalization-associated protein family [1]. 25.00 25.00 25.40 27.00 23.70 19.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.56 0.72 -3.81 8 75 2011-09-20 15:42:39 2006-08-01 13:23:48 5 4 53 1 50 59 0 93.80 62 33.31 CHANGED pls+Wpuuhc+usDFDSWGQLlEAlDEYQhLu+pLpKpspu.tsspsFTE-QKKhluKlATCLcLRSpALQs...Tuup-uhoL--lKKLcslLKsllsss.ctFPlcVp ..........h.lp+WtuSh++usDFDSWGQLVEAlDEYQhLARH....LQKEAQu.pNsS-FT.E-QKKTIGKIATCLELRSAALQS...TQSQEEFKLEDLKKLEPILKN.IL.TYN.KEFPFDVQ.............. 0 11 16 27 +8743 PF08911 NUP50 NUP50 (Nucleoporin 50 kDa) Mistry J, Sammut SJ anon pdb_2c1m Domain Nucleoporin 50 kDa (NUP50) acts as a cofactor for the importin-alpha:importin-beta heterodimer, which in turn allows for transportation of many nuclear-targeted proteins through nuclear pore complexes. The C terminus of NUP50 binds importin-beta through RAN-GTP, the N terminus binds the C terminus of importin-alpha, while a central domain binds importin-beta. NUP50:importin-alpha:importin-beta then binds cargo and can stimulate nuclear import. The N-terminal domain of NUP50 is also able to actively displace nuclear localisation signals from importin-alpha [1]. 22.50 22.50 22.60 23.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.57 0.72 -3.60 35 204 2009-01-15 18:05:59 2006-08-01 13:53:58 6 5 157 7 130 216 0 72.10 35 13.50 CHANGED uKRsAscpLo+DNa..DpE..--.s-EsGoFppASpElLpsRtItKs+RRt....ss...........ssssstushssFuuhshsss ....................uKRhApppLoccNa.......DpE.....-p..s--sGo.FphASp-VLpsRt.ItK..s+R+p.....s.......................pssssush.psFpuhths..s..................................... 0 42 67 105 +8744 PF08912 Rho_Binding Rho Binding Mistry J, Sammut SJ anon pdb_1s1c Domain Rho Binding Domain is responsible for the recognition and binding of Rho binding domain-containing proteins (such as ROCK) to Rho, resulting in activation of the GTPase which in turn modulates the phosphorylation of various signalling proteins. This domain is within an amphipathic alpha-helical coiled-coil and interacts with Rho through predominantly hydrophobic interactions [1]. 23.00 23.00 24.00 26.90 22.90 22.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.32 0.72 -3.67 22 184 2009-01-15 18:05:59 2006-08-01 13:56:02 6 18 68 4 94 144 0 67.40 56 5.32 CHANGED LTpDluNLupEpEELNsKhKcupEchpphK-EE..hsslKAtFEKp...lpoER..TLKTQAVNKLAEIMNRK- ............................LTpDltpLspEKEELspKh...K.csp.E..c...hpthK-EE...hsslKApaEKp...LpoER..TLKTQ..AVNKLAEIMNRK-. 0 15 23 56 +8745 PF08913 VBS Vinculin Binding Site Mistry J, Sammut SJ anon pdb_2b0h Domain Vinculin binding sites are predominantly found in talin and talin-like molecules, enabling binding of vinculin to talin, stabilising integrin-mediated cell-matrix junctions. Talin, in turn, links integrins to the actin cytoskeleton. The consensus sequence for Vinculin binding sites is LxxAAxxVAxxVxxLIxxA, with a secondary structure prediction of four amphipathic helices. The hydrophobic residues that define the VBS are themselves 'masked' and are buried in the core of a series of helical bundles that make up the talin rod [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.58 0.71 -3.88 22 278 2009-01-15 18:05:59 2006-08-01 13:58:23 5 29 85 5 132 225 1 116.70 43 7.10 CHANGED oFVDYQTTMV+pAKAIAVTsQEMhTKSsTsP-ELGsLAsQhTsDYupLAtpup.AAssAEsEE..............................................................IGh+I+pRVQ-LGHGCssLVpKAGALQssPoDuYTK+ELIECARcVSEKVSpVLAALQAGNR .......................................................oFlDYQTphV+tuKuIAhoApEMs..spSss.sPp..-LusLA.....sphopD.as.pLsp..puh.AAusApspE..............................................................luhpI..+spVp-LGpus.tLlptAuALpssPs.DshsK+..........-Lhcs.ARsVoEKV.utVLuALQuGs+....................................... 0 32 42 78 +8746 PF08914 Myb_DNA-bind_2 Rap1 Myb domain Mistry J, Sammut SJ anon pdb_1fex Domain The Rap1 Myb domain adopts a canonical three-helix bundle tertiary structure, with the second and third helices forming a helix-turn-helix variant motif. The function of this domain is unclear: it may either interact with DNA via an adaptor protein or it may be only involved in protein-protein interactions [1]. 27.10 27.10 27.30 27.20 27.00 27.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.22 0.72 -4.17 11 164 2012-10-04 14:01:12 2006-08-01 13:59:53 6 13 122 1 117 163 0 62.50 33 11.24 CHANGED GRluaT-tEDsAILsYV+EpuRuP.uoVoGNALWKtMEKspLTpHSWQShKDRYLKHL+GQc+chL ......RhsaTspDDthlhpalt....p.......tcps..ss.h...s..G.Nplapph.......pcp....p.................pH..oWQSh+-RYlK+Lptp.................. 0 38 61 86 +8747 PF08915 tRNA-Thr_ED Archaea-specific editing domain of threonyl-tRNA synthetase Mistry J, Sammut SJ anon pdb_1y2q Domain Archaea-specific editing domain of threonyl-tRNA synthetase, with marked structural similarity to D-amino acids deacylases found in eubacteria and eukaryotes. This domain can bind D-amino acids, and ensures high fidelity during translation. It is especially responsible for removing incorrectly attached serine from tRNA-Thr. The domain forms a fold that can be be defined as two layers of beta-sheets (a three-stranded sheet and a five-stranded sheet), with two alpha-helices located adjacent to the five-stranded sheet [1]. 25.00 25.00 41.10 40.30 23.70 18.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.87 0.71 -4.20 35 141 2009-01-15 18:05:59 2006-08-01 14:01:24 6 3 129 15 97 144 14 135.50 45 25.44 CHANGED MRlLhIHuDhhcYcs+cKs.chAE-..tssppuch--sLVsFsuVE+sD-ps.p.llcpAlc-Ihcsup+lcssp..lllYPYAHLSSsLAsPpsAhclLcplE...ptL..tppshcVhRAPFGWYKuFpIsCKGHPLSELSRoI ..........M+lLhIHu-hhcacs+cKs.c..h..sE-...tp.cpsch--sLVsFhuVE+sD-ps.ttllcpAlp-IhclusplKsps..lVlYPYAHLSSsLusPcsAlclLcplE...ptL..tppshcVhRAPFGWYKuFcloCKGHPLSELSRsI........ 0 28 58 79 +8748 PF08916 Phe_ZIP Phenylalanine zipper Mistry J, Sammut SJ anon pdb_1q2h Domain The phenylalanine zipper consists of aromatic side chains from ten phenylalanine residues that are stacked within a hydrophobic core. This zipper mediates dimerisation of various proteins, such as APS, SH2-B and Lnk [1]. 25.00 25.00 30.00 35.20 23.70 20.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.79 0.72 -3.65 7 161 2009-01-15 18:05:59 2006-08-01 14:02:27 6 4 64 3 89 161 0 58.10 48 9.67 CHANGED .uWpEFCELHApsAAtDhA+pahhFlppN..Ppa.sPhust.FSt+Fs-hF.paFpsElpc .....uWcEFCEhHApAAAhDF.A++Fphal.......ppp..Ppa..ss.P.s.uts...uFS++Fs-h..FhpaFptEVt...... 0 12 20 44 +8749 PF08917 ecTbetaR2 Transforming growth factor beta receptor 2 ectodomain Mistry J, Sammut SJ anon pdb_1ktz Domain The Transforming growth factor beta receptor 2 ectodomain is a compact fold consisting of nine beta-strands and a single helix stabilised by a network of six intra strand disulphide bonds. The folding topology includes a central five-stranded antiparallel beta-sheet, eight-residues long at its centre, covered by a second layer consisting of two segments of two-stranded antiparallel beta-sheets (beta1-beta4, beta3-beta9) [1]. 25.00 25.00 25.00 36.20 21.50 22.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.02 0.71 -4.33 6 96 2009-01-15 18:05:59 2006-08-01 14:04:18 5 3 46 9 38 87 0 110.20 54 22.61 CHANGED lspLCKFCDVptooCsGpsoCtSNCsITSICEpP-EVCVAIWR+s-cNlTlETlCHcPphpLYGhhL-DtNoopCVMKE+pssuGphahCSCssE.ECNDhLlFs....s.p.sppppll ..........spLCKFCDVc.ooCcspcoChSNCSITSICEcspEVCVAlWRKND-NlTlETlCHDPphshHGa...hL-DssSsKClMKEKKssGpsFFMCSCss-.ECNDhlIFs...phssts........h............. 0 1 7 18 +8750 PF08918 PhoQ_Sensor PhoQ Sensor Mistry J, Sammut SJ anon pdb_1yax Domain The PhoQ Sensor is required for the virulence of various Gram-negative bacteria by allowing interaction of PhoPQ with the intracellular membrane, resulting in remodelling of the bacterial cell surface and subsequent bacterial resistance to host antimicrobial peptides. The domain contains a major flat acidic surface, which binds to at least 3 calcium ions, neutralising the domain's negative charge and allowing interaction with the negatively charged membrane [1]. 20.30 20.30 20.30 20.50 20.10 20.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.98 0.71 -4.77 10 556 2009-01-15 18:05:59 2006-08-01 14:06:04 5 7 550 8 46 195 7 178.50 77 37.00 CHANGED PFSLRsRFLlATAuVVLALSLuYGlVAlVGYSVSFDKToFRLLRGESNLFYSLAQWcNNKLoIslPPslDlNsPTLVLIYDEpGplLWRQRcVPcLEspIpsEWLc+sGaaELDTDscsSstlL.usNsphQcp...L+ch.-ssD-ssLTHSVAVNhYPATu+LPsLTIVVVDTIPQELQ+osl ....PLSLRVRFLLATAAVVLVLSLAYGMV.ALlG.YSVSFDKTTFR.LLRGESNLFYTLAKWENNKLpV..ELPE.N.l.DhQSPT.MT.LIYDEsGpLLWuQRDVPW.Lh.KhIQP-WLKoNGFHEIEuDVssTShLL.SsDHShQpQ...LpEV..RED.D...D.DAE.MTHSVAVNlYPATSRMPpLTIVVVDTIPlELKpSYM..................... 0 2 11 28 +8751 PF08919 F_actin_bind F-actin binding Mistry J, Sammut SJ anon pdb_1zzp Domain The F-actin binding domain forms a compact bundle of four antiparallel alpha-helices, which are arranged in a left-handed topology. Binding of F-actin to the F-actin binding domain may result in cytoplasmic retention and subcellular distribution of the protein, as well as possible inhibition of protein function [1]. 22.20 22.20 24.30 28.80 21.40 22.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.48 0.72 -4.35 16 161 2009-01-15 18:05:59 2006-08-01 14:07:44 5 10 74 2 82 188 0 108.80 48 9.55 CHANGED ssssploK-ulLElsphLcsulsphpss..s.h............hupahpLuDphppLashCssYs-s.hhsPHsKFpFRELloRLEsps+pLRs..suups......sssss+llscltsol+-IsslVQR ....................................h.uspIoKpslL-ss-.hL.......ssAIocsspp.....................hspaStll-sG+pLhsaCsuYVDs.I.QhRNKFAFREAlsKLEssLpELQl......ssAsAG.....sPusspshscLLSoV+EISDlVQR.. 0 14 20 47 +8752 PF08920 SF3b1 Splicing factor 3B subunit 1 Mistry J, Sammut SJ anon pdb_2f9j Domain This family consists of several eukaryotic splicing factor 3B subunit 1 proteins, which associate with p14 through a C-terminus beta-strand that interacts with beta-3 of the p14 RNA recognition motif (RRM) beta-sheet, which is in turn connected to an alpha-helix by a loop that makes extensive contacts with both the shorter C-terminal helix and RRM of p14. This subunit is required for 'A' splicing complex assembly (formed by the stable binding of U2 snRNP to the branchpoint sequence in pre-mRNA) and 'E' splicing complex assembly [1]. 25.10 25.10 25.90 25.40 24.80 25.00 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.17 0.71 -4.17 25 284 2012-10-02 12:52:19 2006-08-01 14:09:09 5 8 240 9 211 279 8 155.20 40 13.15 CHANGED sssspp+.+RSRWD...pTP......sss.sssss................s.hppsch..sp...sP.....upTPhs....sp....................uhsTPhsspp..................hpspphhthphpp-h-pRNRP.....LoDEELDplL..PsEGYcILcPPssYtPlRss....tpKlhtsssshts....ssFhl..-sspu..........tppl ................................................................................................................................ss...ts+.++SRWD.......p.TPt............sst..s.............................tssp++SRW..Dp..........TP..........s...uuTPss.st.....................shATPsssph................................hssppht.shpapp-I-cRNRP.....LoDEELDsMh..P...E..GYKlLtPPsGYsPIRTP....ARKLhATP.s..Phsu........sGFhhQ.pt-ps....tth..................................... 0 83 121 175 +8753 PF08921 DUF1904 Domain of unknown function (DUF1904) Mistry J, Sammut SJ anon pdb_1u9d Domain This domain is found in a set of hypothetical bacterial proteins. 21.10 21.10 21.20 21.50 20.90 21.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.28 0.72 -4.00 20 267 2012-10-01 20:38:22 2006-08-01 14:12:29 6 2 254 2 45 195 7 103.30 40 97.80 CHANGED MPHlRhRGlspctVpplScsLlc-Luslsssss-sFTlEalsSshapsGphspsashVEVLWFsRsp-spctlAphIsctlpph..sptpcVsVhFpsLssssYYcNGpH ..................MPHl+FRulptphlpplSpsLlpELuplhpsspc..sFThEhhs..opahhsG......ch...t.sa.PhVEVhWFs.RsQchp.DplApsITph.l+p...su.sclsVlFhsLspsuYY.sGpH...... 0 18 26 35 +8754 PF08922 DUF1905 Domain of unknown function (DUF1905) Mistry J, Sammut SJ anon pdb_2d9r Domain This domain is found in a set of hypothetical bacterial proteins. 21.00 21.00 21.00 21.10 20.70 20.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.60 0.72 -4.41 48 396 2009-01-15 18:05:59 2006-08-01 14:14:49 6 7 306 1 165 375 65 81.40 27 63.68 CHANGED FsAplh...psss.....sasalslPh-sucplstt..............GplpVpuslsGpsaps.SlhP...spG......salLslctslR+stGh.ssGDpVplpl ..............Fpu.lh.....tsts......shsa.ltlPh-htcphs.t..............G.plpVpuslss.h.sacs.olhs...hGss.........salLslcpslRcthuh.psGDtVpVpl......... 0 76 135 155 +8755 PF08923 MAPKK1_Int Mitogen-activated protein kinase kinase 1 interacting Mistry J, Sammut SJ anon pdb_1vet Domain Mitogen-activated protein kinase kinase 1 interacting protein is a small subcellular adaptor protein required for MAPK signaling and ERK1/2 activation. The overall topology of this domain has a central five-stranded beta-sheet sandwiched between a two alpha-helix and a one alpha-helix layer [1]. 20.50 20.50 20.50 21.00 20.40 20.40 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.17 0.71 -4.47 7 141 2012-10-02 21:07:43 2006-08-01 14:16:48 5 4 114 5 84 146 2 113.10 45 83.01 CHANGED -cl+caL..thhppV-Gl...puIhloDRDGVslh+lup-ss.ss.shcPuhlsTFshAs-QusKLsLGcN+oIIshYpsaQlVQhN.....tLPLllohlusossNsGhILuL-ppltsll....p-lppsl ..................................................cl++aL..chL.pV.-G.L....auIlloDRDGVPll+..Vu..s-su..P-.hA.lRPuFLoT.Fu.hAoD.Q.uS....KL.GLuKN.KoIIshYss..YQ.V.V.QhN....................+LPLlloFIA.os....sA.NTGh....I.luLEccLsslh....c-Lcpsh..................................................................... 0 35 42 62 +8756 PF08924 DUF1906 Domain of unknown function (DUF1906) Mistry J, Sammut SJ anon pdb_1sfs Domain This domain is found in a set of uncharacterised hypothetical bacterial proteins. 26.90 26.90 27.00 28.80 25.20 26.80 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.68 0.71 -4.30 35 469 2009-01-15 18:05:59 2006-08-01 14:22:11 6 19 346 1 122 422 5 132.00 30 30.10 CHANGED tustl.s.spApsl+.suGassV.sRYlosspssst....hsKslotsElcslhsuGLplhslYQhs........shts.usastu.ttGst-ApsAhphAtthGhspsssIYFuVD..DssssphsshllPYF+uhpsslut....s.....YcsGlYG .........................h....l....tsppl+..suGatss.sRYlosppssst............hsKslohsEhcsltssGLplhsl...YQhG.......................ttps..upas...t..u..sGhtcAp.pAhphtt.s.hGhP..s....ussIYhulD..Ds.sptphs...spllP...Yh+uhpsslst....p.........a+sGlYu..... 0 35 83 110 +8757 PF08925 DUF1907 Domain of Unknown Function (DUF1907) Mistry J, Sammut SJ anon pdb_1xcr Domain The structure of this domain displays an alpha-beta-beta-alpha four layer topology, with an HxHxxxxxxxxxH motif that coordinates a zinc ion, and an acetate anion at a site that likely supports the enzymatic activity of an ester hydrolase [1]. 25.00 25.00 25.40 29.40 19.60 24.50 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.86 0.70 -5.32 14 174 2009-01-15 18:05:59 2006-08-01 14:24:16 6 5 109 2 102 187 329 237.30 46 85.41 CHANGED VlpuuLppNFtsVpVoVspCPDLpcsPFphsspGLsGpspls-VGGssaLlPhsphsKhYslhslu+c.hch....ssth..llGAGAGPa.hlGpNsEhhhNlphp................psspssNtSahuplsstsspsllcKhsp..scschuLLuNLahsEG.KPGpVL+lpA+pRTGpc.sFlsCIRpuLcp+YG-..+sVuLGGsFll+pGKA+hHlMP.-F...SpsPlpocEcl.NpWL+aa-MsAPLlslsVllSpDP..GLDLRlEHhHsFS.pHGc..GGHYHaDT...TP-pVEYcGYFssAEtlYRl ......................VlptuLppNFtplpVs.Vs-CPDLsptPFphsspGls..Gpspls-VGGsPaLlPhsp..pcK.hYslppls+c.lph....sssh..llGAGAG..Pa.hhG.NsEhh.slphp................ttp..ssNuSahuplp..s.....ts.t.tsllc+h.sp....pchphuLLuNLahS-G.p.P.G..c.Vlc.lpA+pRpG.t....salsshRpsLpp+Yss..csluhGGhFllppGcs+.HlMs.-F..............SpsPlp...oc-cl..spWL+aa-hp.APLlshsshVop.........Ds...shDLRlpHhHs.FS.pcsp....GGHYHhDT......TP-tVEY.GYF.sA-hlhRl...................................... 0 31 40 80 +8758 PF08926 DUF1908 Domain of unknown function (DUF1908) Mistry J, Sammut SJ anon pdb_1v9v Domain This domain is found in a set of hypothetical/structural eukaryotic proteins. 25.00 25.00 27.70 27.70 21.10 20.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.89 0.70 -5.64 6 371 2009-01-15 18:05:59 2006-08-01 14:26:40 6 10 86 1 177 290 0 235.00 62 18.97 CHANGED PlDSPRNhSsuuslsFPF..ARp.hsp..RADGRRWSlASLPSSGYGTNsPSSolSSSsSSQERL....HQLPaQPTsD-L+FLuKHFRSoE......................................................................SssDED..................G+hSPhhRPRSRSL.SPGRSssoFDNEIlMMNHVYKERFPKATAQMEERLp-hIschs..PssoLsLADGVLGFIHHQllELARDCLcKSpsuLlTSRYFhELQEKLE+LLpEA+ERS-SEEVshIspLl+KLLlIISRPARLLECLEFDPEEFY+LLEAAEGQAKVsQGIKTDIPRYIIpQLGLsRDPlEEl ....................................P.DSPRNhS.ss.ssh.pFsF...s...................Rs....DG..RRWSLASLP..S.SGYGT.....N.TP...S..ST..V....S..S..S.sSSQE+L..........HQL.P.a.Q..P....T.sDE......L......+FLoKHFtooE....................................................................................................................................................S...lssEp...................s.pp.us...hR.PR.SRSL..SPGRos.s......shDp..EIlMMNH..VYKERFPKATAQMEERLp.-hlps.s............PsssL...s.LAD..GlLuFlHHQllElARDCLsKS.cpsLITSpYFhELQ-pLE+L.Lp-Aa..-RS.-StElsaltpLV+KlLIlIuRPARLLE.CL.E..FDPEEFYa.LLEAA..EGHAKEG.p.G...............I....Ks....DIP.+YIIsQLGLs+DPLtEh.......................... 0 34 49 103 +8760 PF08928 DUF1910 Domain of unknown function (DUF1910) Mistry J, Sammut SJ anon pdb_2fef Domain This domain is found in a set of hypothetical bacterial proteins. 21.90 21.90 22.00 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.62 0.71 -4.07 16 185 2009-01-15 18:05:59 2006-08-01 14:30:54 5 3 113 0 30 177 5 119.60 23 41.43 CHANGED RDsLpsEphhhctIc.pcctltch...pEcIhphcpctcpslpRh.........tshhshahhshchllAcYShGpsl-phps.a.pslsthEc..................hh-tc.stYh.hLWhlSLuILLch-cp.plppLspll .....................................RD.ltscphhpchlph.pctIpc....ppplhphc...psppps.htp..........thhhshht..hp...h.c.hlhspYShGpslpplpp.a.phlp..hhcp............................shptp..shY.hphlahlSLulLLchccc..hpcLhphl................................ 0 7 18 27 +8761 PF08929 DUF1911 Domain of unknown function (DUF1911) Mistry J, Sammut SJ anon pdb_2fef Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 30.30 27.20 22.00 18.70 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.51 0.72 -3.37 20 220 2009-01-15 18:05:59 2006-08-01 15:05:44 5 3 139 3 37 214 5 107.20 32 37.40 CHANGED pcs...pDhLlchLlsstshs....p.spchhappPYttLhpsl.....spst-ppsctLtpYl.cpWY+uhcshsW+sp............H+tstt.....sYaGYWuFEuAAlshlhulDDSuh+Dpsa.YPtDL ..............................................p....pDhLl-hllth.tt.th...p.spp..hhh.pPYtthh.chl.....ppspppt.ctLppYl.cpWYcu.ppch..s..W..+ss..................HKps.......sYhGYWuFEsAAlsKlhsl.D..DosL+sps.a.YPhDL............. 0 8 23 32 +8762 PF08930 DUF1912 Domain of unknown function (DUF1912) Mistry J, Sammut SJ anon pdb_1z0p Domain This domain has no known function. It is found in various Streptococcal proteins. 25.00 25.00 94.70 94.60 23.10 16.30 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -10.05 0.72 -3.89 17 344 2009-09-11 13:53:27 2006-08-01 15:07:31 5 2 343 1 24 96 0 83.70 81 98.41 CHANGED MSYEQEFLKDFE-WVpoQIplNQhAMsoupKVhEEDtDERAtDAaIRYESKLDAYcFL.GKFsNY+NGKuFHDlPDGLFGpRHY MSYEQEFMKEFEAWVNTQIMIN-MAhKESQKVY.EEDQDERAKDAMIRYESRLDAYQFLLGKFENFKAGKGFHDLP-GLFGERNY. 0 2 5 14 +8763 PF08931 DUF1913 Domain of unknown function (DUF1913) Mistry J, Sammut SJ anon pdb_1zru Domain This domain has no known function. It is found in a various putative receptor proteins from Lactococcus bacteriophages. 25.00 25.00 25.10 28.30 20.90 20.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.74 0.71 -4.40 5 97 2009-01-15 18:05:59 2006-08-01 15:09:40 5 2 97 66 0 69 0 145.80 80 54.39 CHANGED MTIKNFTFFSPNGTEFPVGSNNDAKLYMMLTGMDYsTIRRKDWpoPlNTALNVQYsNTSIIAGGRYFELlNETVALNANSVNYIHANIDLTQTTsPVSLSAETSDNSNsVDINNuSGVLKVlIDIlTTNGhGVostcsPsQsToLD .......MTIKNFTFFSPNGTEFPVGSNNDGKLYMMLTGMDYGTIRRKDWoSPL.NTALNVQYsNTSIIAGGRYFELL.NETVALKuNSVNYIHANIDLTQTAsPVSLSAETus...NSNsVDlNNsSGVLKVshDIhTTsGTGVhSsKs.spsohLD.............. 0 0 0 0 +8764 PF08932 DUF1914 Domain of unknown function (DUF1914) Mistry J, Sammut SJ anon pdb_1zru Domain This domain has no known function. It is found in a various putative receptor proteins from Lactococcus bacteriophages. 20.70 20.70 21.60 21.70 20.60 18.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.59 0.71 -4.05 9 84 2012-10-01 20:11:45 2006-08-01 15:11:48 5 3 82 140 3 68 0 114.10 37 30.57 CHANGED lssNsls...hpGSlsl.hp..sh....sGsGLphphpKKs.-lVlh+ahGpl...sshssGhphu..WVctPap..Pshsp.SLlG+F.........sspusSFHIDlsPsGohpWWGsshupssl..RGsu.YFI ..........................lssNslo...lpGSlsVPspp.Th...psGsGLpLpLpKKNsDlVIl+ahGsl...ss.lppGhshuh...sWVchPap..PsssQ.SLlGHh.........sGpsssFHIDlsPsGolsWWGsslusssh..RGsuoYFI........ 0 2 2 2 +8765 PF08933 DUF1864 Domain of unknown function (DUF1864) Mistry J, Sammut SJ anon pdb_1zee Domain This domain has no known function. It is found in various hypothetical and conserved domain proteins. 19.70 19.70 20.30 19.70 19.60 19.40 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.27 0.70 -5.94 11 77 2012-10-01 19:57:26 2006-08-01 15:13:56 6 1 76 12 22 87 28 363.90 45 96.75 CHANGED TpsstAFDcWIRscFl-lNocLEpLYapQsD+AsVsGlG-sLKttLcsEG+shIpsLLsEGNTDEGFDuAFDLLGNVGLYMAACRRHEITEPSRETsSPLhEASALAMHIGASIGVsPRFATAHLTTHNpAhsGlYKRFTsLsDE+LFlDYNT+GILAYKRAADALL+I.PLGloHPhoA-LLcsAcpALpcVl-SNptLFppLDs-RFF.CVRPYYKPaRVGuplYRGANAGDFAGINVIDLlLGLCtAN-PuYSQhLVDKFLYMMPEDQslLRDCMRRsSLMDcFLsutctspp..sWaQ-Nl+LFLpVCchHGpTAIQHHsQLVpKaIApPucphpQpHhup....lTASGPPLcVLLsuLcKLRD+RAAA+.RsDIpTRapDIp...tLKuoL .............................................hss.Ahs-.lpuR..tLs.pL.tL....sc+tsV....GIss.L+phL.scGhs..........os.GFspAhshhtslGhahuuh+RH..t.pEPsc....ssPhlEsshLsh..utuhGlsPR.shhHlTsaN....Atssh.+paTsLsDEthhl.-.sphuh.hAhctAhsAhl..clps....lulpp..Phhuphhcshttt...LpchlES.shhap.....plssphFa.t.plRPY...............Y.......cPhR.......VGup.........sYh..GssAs-hs.l.VlDhlLhhs..pAscsuYpphh.shh.YhhPp.ptlhtchhtcPuLhDchLttt.p.us.sp....hpEslph........G.sAlp+h.pllh+FhA..sp.h-p..ta.ut.......shuSGs.h.shLusL.pLp...RAAsp.RtcItst.................................................................................................. 0 3 6 15 +8766 PF08934 Rb_C Rb C-terminal domain Mistry J, Sammut SJ anon pdb_2aze Domain The Rb C-terminal domain is required for high-affinity binding to E2F-DP complexes and for maximal repression of E2F-responsive promoters, thereby acting as a growth suppressor by blocking the G1-S transition of the cell cycle. This domain has a strand-loop-helix structure, which directly interacts with both E2F1 and DP1, followed by a tail segment that lacks regular secondary structure [1]. 20.70 20.70 20.70 21.00 20.60 20.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.96 0.71 -4.59 6 127 2009-01-15 18:05:59 2006-08-01 15:15:48 5 4 54 18 50 123 0 125.00 55 15.18 CHANGED ILQYASsRPPTLSPIPHIPRSPYKhPNSPLRVPGSNNlYISPLKS.oR.....hSPshMTPRSRILVSIGESFGou-KFQKINQMVsSSDRuhKRohDuSuAPKPLKRLRFDlDGQDEADGSKs.uGEStLIQKLAEMoSTRSRMQEQKMKE-s-occc.p ...................csPsLSPIPHIP.p.SPh.p.ss.SPhRIstt.sIYISPhKs..........sothTPRSplL.hphutS.......ScchpcINpMlps.u-.R.shKRuhp.s.u.ss.pPhK+Lp...................................................p.................... 0 4 6 18 +8767 PF08935 VP4_2 DUF1865; Viral protein VP4 subunit Mistry J, Sammut SJ anon pdb_2bai Domain This domain is predominantly found in viral proteins from the family Picornaviridae. It is VP4 of the viral polyprotein which, in poliovirus, is part of the capsid that consists of 60 copies each of four proteins VP1, VP2, VP3, and VP4 arranged on an icosahedral lattice [1]. VP4 is on the inside and differs from the others in being small, myristoylated and having an extended structure. Productive infection involves the externalisation of the VP4, which is cleaved from the rest, along with the N-terminus of VP1. There thus seem to be three stages of the virus, ie a multi-step process for cell entry involving RNA translocation through a membrane channel formed by the externalised N termini of VP1 [2]. 22.20 22.20 24.40 22.80 19.10 18.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -9.87 0.72 -3.38 12 688 2009-01-15 18:05:59 2006-08-01 15:18:15 5 18 80 15 0 699 0 81.60 66 4.62 CHANGED GsGpSS.ssGspNpSGNoGsIINNaY.pQYQNShD..LusNshSstuspusssoosopoppotssshFSplus.........LLA ....GAGQSSPATGSQNQSGNTGSIINNYYMQQYQNSMDTQLGDNAISGG.......SNEGSTDTTSTHTsNT....QN......NDWFSKLAsSAFoGLFGALLA..................... 0 0 0 0 +8768 PF08936 CsoSCA Carboxysome Shell Carbonic Anhydrase Mistry J, Sammut SJ anon pdb_2fgy Domain Carboxysome Shell Carbonic Anhydrase is a bacterial carbonic anhydrase localised in the carboxysome, where it converts bicarbonate ions to carbon dioxide for use in carbon fixation. It contains three domains, these being: (1) an N-terminal domain composed primarily of four alpha-helices; (2) a catalytic domain containing a tightly bound zinc ion and (3) a C-terminal domain with weak structural similarity to the catalytic domain [1]. 20.60 20.60 21.20 20.90 19.60 19.60 hmmbuild -o /dev/null HMM SEED 459 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.51 0.70 -6.13 22 80 2009-01-15 18:05:59 2006-08-01 15:21:33 5 1 77 2 31 84 273 428.30 46 83.35 CHANGED HPLocpstNpcLpsYEppVKu+F-cIVPlLKclSulQH-.DFlppAQpLA+tcLGFsLPpplL-cAWVpsLDMRu......LaAaCVFpoapphS-pFFpsDPLpu....ppupth-sFLh-CGFHhlDlo.PCADGRLAHsluYsLRl.PauuVR.R+uHAGAhFDlEsoVs+Wl+TEHpRaREuhPNsAcpsTRYLKlslYHFSShDPsHpGCAAHGSsDthAApAuLpRLh-FRpAlENoFCCGASVDlLLIGlDTDTDAIRVHlPsssGclsLccalsstplYppThsLou-pA+tpItpsl........pstusus.p.GMhphIspLltNNlSQIDYVpphHsGpYs..DhGHAERFIGVG.GF+EVpLRNLoYFAHLDTVEEGAPDLDVGlKIFpGLNVu+sLPIPVllRFDYsu+VPGAR-RAlscCpRVpsAIpsRYs-LsspGLLashlTlR..DRsttssuEsVuushD ...................................................................HPLospttNppLhsYEpplKucF-pIVPsL+cluulQH-.DFhppAQplu+tcLGacLPpplL-cAWVpsLDMRA......LaAaClFpoachhu-pFapssPLpt....tpupthcpFLl-CGaHhlDlo.PCuDGRLutslsYsLRl.Phu.uVR.R+uaAGAhFDlEssVp+WscTEhcRaREuhPNsAptsTRYLKllsYHFSSsDPpHpGCAAHGSsDchAApAuhpRLh.cFRpAVENoFCCG.A.SVDlLLIGlDTDTDAIRVHlPsscG.chsLc+alsstpLYppThshss-pActpltpsl.........tutusus.p.GMhphlspLltNNlSQIDYVpphHsGtYs..DhGHAERFIGVG.GFcEVpLRNLsYFAHLDTVEEGAsDLDVGl.K.IFpuLNVu+sLPIPVll+F-Ysu+VPGuRERAltcCpRVpsAIpsRYscLsppGLLpstholR..D+stssshEhlut......................... 0 8 19 28 +8769 PF08937 DUF1863 MTH538 TIR-like domain (DUF1863) Mistry J, Sammut SJ anon pdb_1eiw Domain This domain adopts the flavodoxin fold, that is, five parallel beta-strands and four helical segments. The structure is a three-layer sandwich with alpha-1 and alpha-4 on one side of the beta-sheet, and alpha-2 and alpha-3 on the other side. Probable role in signal transduction as a phosphorylation-independent conformational switch protein [1]. This domain is similar to the TIR domain. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.55 0.71 -4.10 55 389 2012-10-02 18:56:14 2006-08-01 15:25:41 6 7 328 1 118 650 62 126.70 18 55.87 CHANGED ++lFlSacapsDs...php........pl+shhstp....tp.....t.hshs.hc.h.....pppscs....tI+chI.ccplpsossslVLlGppT.tp.........pcWVcaEIp.........................................................tuhc.........pspsllulplpshp..s............ssshsssshsh ...................................................hcsFlSapapcst........hhp..........tltphh..tp...................t..php...phc.h.....pppspt.....tlcphl.ccplpsossslVL.lGtpT.tp...........................ppW.l..pa..E.....I..p......................................................................................................................................................................t..uhc.....................tspsllulhlps....pph........................hh............................................................................................................................................................................... 0 36 74 99 +8770 PF08938 HBS1_N DUF1916; HBS1 N-terminus Mistry J, Sammut SJ, Eberhardt R anon pdb_1ufz Domain This domain is found at the N-terminus of HBS1 proteins. It interacts with the ribosomal protein rpS3 at the mRNA entry site [1]. 23.00 23.00 23.50 23.50 22.60 22.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.55 0.72 -3.86 47 250 2009-01-15 18:05:59 2006-08-01 15:33:01 5 9 187 2 144 233 1 100.30 25 15.27 CHANGED D.-Dh..................................................psEs-....................tpcpLssE-pct.hppshspl+ptLsstss.h..s-pplp-ALWaaYaDl-KulshLhpchpsptsppp ............................................................................p...-h...................................................-.-sp.............................tps.tpcpLss..Dpsp.L.sslschRplLG-ss.....s-pplhEAlh+.......htFDlpKulshllpppptps.t..t.................. 1 35 63 107 +8771 PF08939 DUF1917 Domain of unknown function (DUF1917) Mistry J, Sammut SJ anon pdb_1ztp Domain This domain is found in various hypothetical and basophilic leukaemia proteins. It has no known function. 20.40 20.40 20.40 20.70 19.70 20.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.76 0.70 -4.63 20 148 2009-09-11 14:20:58 2006-08-01 15:35:33 5 5 120 6 107 159 2 202.60 27 72.09 CHANGED phcphspsasstsaatptcsph.thh.....pst..............................t-tcspplhpstpos...................sEssssFLsRlsPuss.........Ia..........................t....chsptGpphLpthpphtthlptppsp.....t...uhsRtlsstRpth.pplhpLAhpstlhoGKWhlFlss-.cVDcsWptVAcATl.pGcLGhuAKVuT.......tscspspsRLIsVYTc......DasDcsDVhRVlc+LccL.Gllc.tt...IhYKsDsa..TYLs ............................................................................................................t................................................................................t...........ts........................................................tttlstaLt+h.Psps...........................................................s......thhp.up...p.ht.hh.hhtt...t................tshpt...spt....+....h.tplhpLAhppplhoGKWhla.hsss...clD.csWstlAcAss..pGc.....L..........s....A....KVus..................psptstpplICVYTp......DFpDct-Vh+lhp.tl.+ph...Glhp...............lhYKsDsaTah.................................................. 0 33 62 83 +8772 PF08940 DUF1918 Domain of unknown function (DUF1918) Mistry J, Sammut SJ anon pdb_2a7y Domain This domain, found in various hypothetical bacterial proteins, has no known function. 20.80 20.80 21.10 26.40 20.70 18.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.81 0.72 -4.63 18 199 2009-01-15 18:05:59 2006-08-01 15:37:26 6 7 132 1 67 133 1 57.00 55 62.72 CHANGED M+ApsGDpLlV+Gpslsps-RcGEIlEV+G-cGuPPYlVRWpDpGHpohVaPG..PDAhV ...M+AcVGDhLVl+GsTlsptD+cGEIlEVRu.sDGuPPYlVRW...-sGH-uhVaPG..PDAlV............... 0 23 55 66 +8773 PF08941 USP8_interact USP8 interacting Mistry J, Sammut SJ anon pdb_2fzp Domain This domain interacts with the UBP deubiquitinating enzyme USP8. 25.00 25.00 29.50 32.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.27 0.71 -4.95 5 110 2009-01-15 18:05:59 2006-08-01 15:38:36 5 7 71 6 69 97 0 166.80 59 55.14 CHANGED ELRolVQsQsp+lAELKpppsDpcpQluEQKRElpLlKtYlRAlRSoN..........PthRNlu-QlE+s..ElhcWssGLssARVTRWGGMISTPDssLQhlIRRuLsESGCPsHILN-LlENCHERRWPpGLuTLETRQtNRRhYEpYVsRRIP....GKQAVVVhuCENpHMuEslps-PGLVMIFAHGVE ................................cLRolVQpQQs+IuELccptuEpcpQluE.QKR-lQLLKsYMRAl.RSsN..........................PslpNlt-plEhs..EllcWssoLt.ARVTRWGGMISTPDslLQ..thIKRuLsESGCPspIls-LhENsHERpWPpGLuTLETRQhNRRhY-NYVs+RIP....GK......QAVVVhuC-NpHMs-Dhh.EPGLVM...IFAHGVE............................ 0 17 21 43 +8774 PF08942 DUF1919 Domain of unknown function (DUF1919) Mistry J, Sammut SJ anon pdb_2g6t Domain This domain has no known function. It is found in various hypothetical and putative bacterial proteins. 25.00 25.00 25.80 25.70 21.10 19.90 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.40 0.71 -4.96 15 177 2009-01-15 18:05:59 2006-08-01 15:41:26 5 2 148 2 17 139 4 178.80 38 78.08 CHANGED l....+p.........hh++lh+phhspph+p+LpNp...shTIISsNChGuhIhccLuhpFpSPFlNLalpssDYlKhLcNhcaYhpp-..LsFhpsscs..............tYPlGpL....sDIcl+FhHYpS.cEA+sKWpcRpcRINaDNLalhhsD+Duso.c.lpcFDpLPaKNKVlFosK....sYsplcSshaI.uhEsps....pVushh...tth.s+cYhcpFDhlsWlN ...............................hhpph.t..hshh.hhcLpsp...sholISsNChGuhlh+pLs..apoPFVs.Lal.s.pDal+hLpNhcaY.hppp..LsFhpppps.................YPlGh.L......sDlcIHFhHYpS.pEAppKWpcRppR.l.......Nh......cNLahhhs.-cD...u...s...o.c.lpcFDpLPapNKllFsp+....sY..thc..S..thhlpu..Epps....plu.hh.........pp.hhp.FDhhsWhp................................................ 0 6 11 15 +8775 PF08943 CsiD CsiD Mistry J, Sammut SJ anon pdb_1jr7 Domain This family consists of various bacterial proteins pertaining to the non-haem Fe(II)-dependent oxygenase family. Exact function is unknown, but a putative role includes involvement in the control of utilisation of gamma-aminobutyric acid [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -12.02 0.70 -5.52 11 476 2012-10-10 13:59:34 2006-08-01 15:43:17 5 1 456 2 26 252 906 284.50 85 90.83 CHANGED GaolssaspssRLhplTlscpsLcsFl-pspcasVQALEYKPFhRFclAchLcclsussLtshLssIlpDRcpGuFllsspGlss........ps--hVKhuTAluHLIG.sNaDAMoGpYYARFsVKssDNSDSYLRQAaRlM-LHsDGTaVcEtTDalLMMKhDEpNhtGG-ShLLHLDDWpDL-+FasHPLA+pshhasuPsSKNVsccVpHPVFFt.DspG+PsMpFIDQFspPpNh-EGhaLp-LS-SlEsSpsshsVplPlGshlVlNNaFWLHGRctFpts.sLpRELMRQRGtFs ..............................................................................GFTLhPSAQSPRLLELTFTEQTT+QFLEQVAEWPVQALEYKSFLRFRVGKILDDLCANQLQPLLLKTLLNRAEGALLINAVGIDDVA......QADEMVKLATAVAHLIGRSNFDAMSGQYYARFVVKNVDNSDSYLRQP......HRVMELHNDGTYVEEITDYVLMMKIDEQNMQGGNSL.........LLHLDD.W.E.H.L.Dc.aFRHPLARRPMRFA....A.PP....S.....K..N.V...SK.D.VFH...PV..FDVDQQ.G.R.PVM.RY....I....DQF.VQ........PK...DFEEGV.W..LS..ELSD...AI..E.......TSK..GI.......L.SV.......P.....VPVGKFLLINNLFWLHGRDRFT.P..HP.D...LRRELMRQRGYFA.............. 1 5 8 18 +8776 PF08944 p47_phox_C NADPH oxidase subunit p47Phox, C terminal domain Mistry J, Sammut SJ anon pdb_1k4u Domain The C terminal domain of the phagocyte NADPH oxidase subunit p47Phox contains conserved PxxP motifs that allow binding to SH3 domains, with subsequent activation of the NADPH oxidase, and generation of superoxide, which plays a crucial role in host defense against microbial infection [1]. 21.50 21.50 22.30 22.40 21.20 20.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.94 0.72 -3.78 12 85 2009-01-15 18:05:59 2006-08-01 15:48:52 6 11 38 2 33 74 0 58.90 39 14.82 CHANGED .LQpR+pt.cshPpusuos......spsppsKstPslPPRPSs-LILcRCoEsT++Klo..pus ............................................hppR+p..tshsppssss....................spspcsKspPsVPPRPSs-LILpRCoEsT++Kls.....s....... 0 4 9 18 +8777 PF08945 Bclx_interact Bcl-x interacting, BH3 domain Mistry J, Sammut SJ anon pdb_1pq1 Domain This domain is a long alpha helix, required for interaction with Bcl-x. It is found in BAM, Bim and Bcl2-like protein 11 [1]. This domain is also known as the BH3 domain between residues 146 and 161. 25.00 25.00 62.30 62.20 21.80 20.60 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.05 0.72 -4.42 6 51 2009-09-10 23:22:43 2006-08-01 15:49:49 5 2 33 14 21 67 0 38.60 75 22.36 CHANGED huup+ppputPsc.lpPElWIAQELRRIGDEFNu.a..P.RRu ......MAShRQSQAEPAD.MRPEIWIAQELRRIGDEFNA.Y.Ys.RR.... 0 2 3 6 +8778 PF08946 Osmo_CC Osmosensory transporter coiled coil Mistry J, Sammut SJ anon pdb_1r48 Domain The osmosensory transporter coiled coil is a C-terminal domain found in various bacterial osmoprotective transporters, such as ProP, Proline/betaine transporter, Proline permease 2 and the citrate proton symporters. It adopts an antiparallel coiled-coil structure, and is essential for osmosensory and osmoprotectant transporter function [1]. 23.80 23.80 23.90 24.40 23.70 23.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.31 0.72 -3.93 10 544 2009-01-15 18:05:59 2006-08-01 15:51:13 5 3 540 2 42 183 0 46.10 82 9.30 CHANGED SDlpEAKElLpEHHDNIEQKIEDIDpQIAELpcKRp+LlcQHP+Is .....SDIQEAKEILsEHYDNIEQKIDDI.......D.......cEI.......A-....LQ.......A.......K.......RoRLVQQHPRID.... 0 5 11 25 +8779 PF08947 BPS BPS (Between PH and SH2) Mistry J, Sammut SJ anon pdb_2auh Domain The BPS (Between PH and SH2) domain, comprised of 2 beta strands and a C-terminal helix, is an approximately 45 residue region found in the adaptor proteins Grb7/10/14 that mediates inhibition of the tyrosine kinase domain of the insulin receptor by binding of the N-terminal portion of the BPS domain to the substrate peptide groove of the kinase, acting as a pseudosubstrate inhibitor [1]. 19.10 19.10 19.20 25.40 18.30 17.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.42 0.72 -4.34 4 191 2009-01-15 18:05:59 2006-08-01 15:55:43 5 6 54 1 97 163 0 47.70 66 9.83 CHANGED RSsSENSLVAMDFSGct.GRVI-NPsEA.SsAlEEGpAWRK+oshRhuhh ............RSlSENSLVAMDFSGpt.GRVIENPsEA.SsAlEEG....pAW.R..K+us.Rhsh.......... 0 11 18 45 +8780 PF08948 DUF1859 Domain of unknown function (DUF1859) Mistry J, Sammut SJ anon pdb_1w8x Domain This domain has no known function. It is predominantly found in the N-terminus of bacteriophage spike proteins [1]. 25.00 25.00 32.00 205.40 21.70 20.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.68 0.71 -4.39 2 14 2009-01-15 18:05:59 2006-08-03 15:07:56 5 2 6 1 0 7 0 123.50 66 53.00 CHANGED Msspp.st.TVT..YNGs.shtGP.ulpuhh-.lAGppVhhDLph.hsTtthoGVQslYID.t-..Gshplsh.-TGQRIps.AGpQGYaPlLssshhKFlspthhsGK..hPh.FlNFsIA.GVW Msspp.st.TVT..YNGs.shtGP.ulpuhh-.lAGppVhhDLph.hsTtthoGVQslYID.t-..Gshplsh.-TGQRIps.AGpQGYaPlLssshhKFlspthhsGK..hPh.FlNFsIA.GVW 0 0 0 0 +8781 PF08949 DUF1860 Domain of unknown function (DUF1860) Mistry J, Sammut SJ anon pdb_1w8x Domain This domain has no known function. It is predominantly found in the C-terminus of bacteriophage spike proteins [1]. 25.00 25.00 462.10 461.40 19.00 18.90 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.65 0.70 -4.74 2 7 2009-01-15 18:05:59 2006-08-03 15:09:31 5 1 6 6 0 7 0 219.00 96 64.41 CHANGED PSGIKGDKGDPGtPGPAGGTVVVEDSGA.FGESLLDTsS-PGKILVKRISuGSGITlTDYGDclEIEAo.GGGGGGGGVTDALSLhYoTSoGGPASIAANALTDFDLSGALplNpVGTGhTKuAsGIQLAAGKSGLYQlTMTVKNNTVTTGNYLLRVKYGSs-aVsACPASoLTAGGTISLLIYCpVLGVPSLDVLKFSLCNDGAALSNYIINITAAKIN PSGIKGDKGDPGtPGPAGGTVVVEDSGA.FGESLLDTTSEPGKILVKRISuGSGITVTDYGDEVEIEAS.GGGGGGGGVTDALSLMYSTSTGGPASIAANALTDFDLSGALTVNoVGTGLTKSAAGIQLAAGKSGLYQITMTVKNNTVTTGNYLLRVKYGSSDFVVACPASSLTAGGTISLLIYCsVLGVPSLDVLKFSLCNDGAALSNYIINITAAKIN 0 0 0 0 +8782 PF08950 DUF1861 Protein of unknown function (DUF1861) Mistry J, Sammut SJ anon pdb_2b4w Domain This hypothetical protein, found in bacteria and in the eukaryote Leishmania, has no known function. 18.70 18.70 102.30 102.20 17.30 16.20 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.91 0.70 -5.61 12 151 2009-01-15 18:05:59 2006-08-03 15:10:58 5 1 107 1 25 116 5 292.00 48 95.12 CHANGED htsFcts..cpha-sshLoFpGVsshDVYNhSsPFphpGchaIhGRVEcRDp.tpScVhhFpEstcspassV.sshsapLpDPFlo+lpsEhlFGGsclh....sssphhsahssFYRGp.hppLpaFsoGP-tMKDIRlVcLtDG+IGVFoRPpsc..ucshIGFshlssLsELTs-sIspA.hlc.thps-tWGGVNpsaLLosGplGsluHhuhh.Dpcsscc....YhshSFVhsPcTpphpph+lIuT+usFsssssKtPcLtDssFuuGllh+sDG+s-LYuGlGDsctt+lsI-.PFcGa ...........h...acpp..ppsapst+LpFsGV.s-+DVYNIoAPFp.tGphhIAGRVEuRDS.EtScVhFFpcp.pspas.VEssssat.LQDPFloplpGELIhGGVElhP+..sssshLsW+TsFYRGpslpsLppFhsGPsGMKDIRlscLtDG+IGVFTRPQG-h.....GGRGpIGashIsoL--LT.EsIpsA.LLcppFss-EWGGsNEsHLLssGpIGlLGHIAsFDppGsRH....YYshsFhhNs-opphpphKIIApRusFhPussKRPDLsDVlFoGGLlhpsDGsscLYuGluDu-Ap+IsIsDPFps...... 2 12 20 22 +8783 PF08951 EntA_Immun Enterocin A Immunity Mistry J, Sammut SJ, Coggill P anon pdb_2bl8 Family Gram-positive lactobacilli produce bacteriocins to kill closely-related competitor species [1]. To protect themselves from the bacteriocidal activity of this molecule they co-express an immunity protein (for discussion of this operon see Bacteriocin_IIc Pfam:PF10439). The immunity protein structure is a soluble, cytoplasmic, antiparallel four alpha-helical globular bundle with a fifth, more flexible and more divergent C-terminal helical hair-pin [2]. The C-terminal hair-pin recognises the C-terminus of the producer bacteriocin and this interaction is sufficient to dis-orient the bacteriocin within the membrane and close up the permeabilising pore that on its own the bacteriocin creates [3]. These immunity proteins interact in the same way with other bacteriocins, family Bacteriocin_II, Pfam:PF01721. Since many enterococci can produce more than one bacteriocin it seems likely that the whole operon can be carried on transferable plasmids [4]. 21.40 21.40 21.40 21.50 21.10 21.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.20 0.72 -3.83 62 1139 2009-01-15 18:05:59 2006-08-03 15:14:23 5 3 516 8 131 559 0 73.70 19 66.60 CHANGED pcppph...hpplhpll...p..s.ptpp.plcplLhpstpclcpsps...h.hlhs+Lspslpthsh.p...phpLsps.pphhppl ............................pph...hphl.ppLh....pphchs.tsp..clpphLhpstpcLcpspp....phlhscLsptls.hhhhp...phphstplhshhh.l..................... 0 24 44 81 +8784 PF08952 DUF1866 Domain of unknown function (DUF1866) Mistry J, Sammut SJ anon pdb_1ufw Domain This domain, found in Synaptojanin, has no known function. 20.40 20.40 20.50 20.50 20.10 20.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.76 0.71 -4.66 6 189 2012-10-02 20:46:34 2006-08-03 15:35:34 6 6 73 2 80 187 1 141.10 47 10.92 CHANGED Dl-lhEVDsppRpsVac-VIuspGPPDuTllVslpS.........s.sEsshFD-sLhspLlppLupaGEVsLVRFVp-pMWVTFtDGpSALssLslsuhpVhG+slpI+LKS.-Wl+tLE-El..psssplshu...ossSsLLu-ssshsss-a ............................-l-l.EV-sptRppVapEVhuhQGP.DuTVlVslpS...............oh.Eps.F.s-sLhsELhQphus.aG.p.ll.Ll.R.h.....sp.s...pM.....hVTFh-upSALsVLs.lsG..hcl.hGR.slpIp.Ko.D..Wl...KsLcEEh...p.phhslu...ossSsLLtEsh-hst...................................................... 1 18 25 47 +8785 PF08953 DUF1899 Domain of unknown function (DUF1899) Mistry J, Sammut SJ anon pdb_2akf Domain This set of domains is found in various eukaryotic proteins. Function is unknown. 20.70 20.70 20.70 20.80 20.40 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.05 0.72 -4.33 33 702 2009-01-15 18:05:59 2006-08-03 15:39:09 6 34 265 2 401 638 1 64.30 49 12.32 CHANGED +hVRsSKaR...HVFGpssK+EpCY-sl+locssWDoshsulNPKalAVsh-uuGGGuFhVlPlsc.sG .........phVRsSKFR............HVFGp.ss.Kp...-pCY-sl.+.l..S+s.s..W.D.o.sFCA..............VNP+FlAllhEuuGG.G..AFhVlPLpcoG........ 0 119 174 282 +8786 PF08954 DUF1900 Domain of unknown function (DUF1900) Mistry J, Sammut SJ anon pdb_2akf Domain This domain is predominantly found in the structural protein coronin, and is duplicated in some sequences. It has no known function [1]. 21.20 21.20 21.20 21.60 20.90 20.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.62 0.71 -4.64 64 952 2012-10-05 17:30:43 2006-08-03 15:41:35 6 51 276 2 567 880 1 128.00 42 25.62 CHANGED Plss.hslDsusGlLhPaYDpDosllYLsGKGDusIRYaEl..ss-...t.hlphlspapSssPp+.........................Ghu.hhPKpulDVpcsElsRhh+lsss..s......lcPlSahVPR+.u.-hFQcDlYPsohus.cPuloAc-Wh.sGpssp..PhhlSl ......................lt..plDs.Ss.GlLhPaYDsDosllYLsGK......G..DusIRYaEl.........ss-..........tPalph.Lsp.a..p.S...t.c.PQ+.........................G.hu.hhP..KRu..l-VpcsE.ls.RhaKLpppp......lEPIuhhVPR+.........u....-hFQ-Dl.....YPsTsus.cP.u.ls.AcEWhsGpstt..PhhlSL..................... 0 177 252 404 +8787 PF08955 BofC_C DUF1901; BofC C-terminal domain Mistry J, Sammut SJ, Bateman A anon pdb_2bw2 Domain The C-terminal domain of the bacterial protein 'bypass of forespore C' contains a three-stranded beta-sheet and three alpha-helices. Its exact function is, as yet, unknown [1]. 20.60 20.60 20.60 25.20 19.80 20.00 hmmbuild --amino -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.37 0.72 -3.93 10 152 2009-01-15 18:05:59 2006-08-03 15:43:03 5 2 152 1 28 109 0 75.70 57 41.96 CHANGED DDISPLsKsNGYhGlo-DGllSlFcG+Pcps.chIQSFFQIDlp+LES+hpcpLc+GIPl+TKpcaccVIEth+sY ..........DDISPLLKssGYFGVS--GlLpIF+GsP...cs-..psI+SFFQIDh+K....LE....Sa....c....+....s+LK+GIRI+SKEtFscsIE+hKpY.... 0 9 18 20 +8788 PF08956 DUF1869 Domain of unknown function (DUF1869) Mistry J, Sammut SJ anon pdb_1nei Domain This domain is found in a set of hypothetical bacterial proteins. 21.30 21.30 21.50 21.30 20.20 20.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.87 0.72 -4.37 3 423 2009-01-15 18:05:59 2006-08-03 15:45:07 5 4 421 2 17 63 0 58.90 87 91.84 CHANGED pscGpFLLTVTNNNNGVSVDK-FSoLAsL+DPplAAEoVKDLVNIVRGYDoDEETNVCGW ......MGKATYTVTVTNNSNGVSVDYETETPMTLLVPEVAAEVIKDLVNTVRSYDTENEHDVCGW......... 0 1 5 11 +8790 PF08958 DUF1871 Domain of unknown function (DUF1871) Mistry J, Sammut SJ anon pdb_1u84 Domain This set of hypothetical proteins is produced by prokaryotes pertaining to the Bacillus genus. 19.60 19.60 20.80 19.70 19.20 18.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.61 0.72 -3.91 7 175 2009-09-11 15:05:30 2006-08-03 15:49:52 5 2 170 1 22 97 0 78.70 47 89.54 CHANGED ptMlclltpWDPFphGc-aY-TEsuDVlpAlash-DPcpLA+pIQpIYEhSFEphlPlEsCp+lAtpLLhIKpuuSCoh ......pMlcllcsWDP...Fp.....h....G....s......-....FYETEAuDVVsllpshD..Dsc..h..lAKcIQ+IYhhSFE..E...s....s..l..EcC.E..K..lAhcLLsIK-uuSCo.L.................... 1 5 12 16 +8792 PF08960 DUF1874 Domain of unknown function (DUF1874) Mistry J, Sammut SJ anon pdb_2blk Domain This domain is found in a set of hypothetical viral and bacterial proteins. 20.30 20.30 22.00 21.30 19.50 19.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.12 0.72 -4.29 12 35 2009-09-12 20:42:06 2006-08-03 15:52:21 5 3 35 6 11 38 1 104.10 29 78.08 CHANGED Mt......lalhNuhhhsh..stpshhphccIshpEsKphl....psppFlSAIGHcuTAplloplhsssl.hNRlplphp.GD+AlshhLppRl.EGpVL.opcElc......cIuaphhlh ...................lYllNuhslsh...sp....shhchcclsh.cEscphl.....pspphlSA.IGHcuTApllspLL.s.........ssl.h..NR..lplchptGDcslshp.LppRl.EGpll.stcElp......clta.hh..h................... 0 7 9 9 +8793 PF08961 DUF1875 Domain of unknown function (DUF1875) Mistry J, Sammut SJ anon pdb_2crb Domain The MIT domain, found in Nuclear receptor-binding factor 2, has no known function. 25.60 25.60 25.70 25.70 25.50 25.50 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.69 0.70 -5.06 3 73 2009-01-15 18:05:59 2006-08-03 15:53:06 5 3 39 1 40 59 0 206.80 62 81.58 CHANGED MKLTESEQAHLSLELQRDSHMKQLLLIQERWKRAKREERLKAQQuTE+DAAsHLQssH+PSsEDAEGQSP.LSQ.Y.PSTE+pLPElQGlFDRDPDTLLYLLQQKuEPuEPCIGSKAPKDDKTIIEEQATKIADLKRHVEFLVAENERLRKENKQLKAEKARLLKGshEKELDVDADFVEpSELWuLPsHuEoAsASSTWQKFAANTGKAKDIPIPNLPPLDFPSPELPL.....MELSEDILKGFMND ....................MKLTQSEQA+LSLELQRDSHMKQLLLIQERWKRApREERLKAQ...Qs...sD+-ssspLQsSt+PuuEDu-ups...P.l...s...pcaSPSsE+pLsElpGlFDRDPDTLLaLLQpKp.....E.....PsE.....PC.IGSKAPKDDKTIIEEQATKIADLKRHVEFLVAENERLR+ENKQLKAEKARLlKGPhEKELDVDADFVEpSELWuLPP+S-oAsu.S..uoWQKFA.usoGKA.KDIPIPNLPPLDFPSPE......LPL.....hELSEDILKGhMs................................................. 0 2 5 20 +8794 PF08962 DUF1876 Domain of unknown function (DUF1876) Mistry J, Sammut SJ anon pdb_2fgg Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 38.90 38.50 23.90 22.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.73 0.72 -4.20 6 190 2009-01-15 18:05:59 2006-08-03 15:54:17 6 2 118 1 53 125 0 86.30 47 88.10 CHANGED HVhpsWpVDlpI-EHDp+TRAKARLR.WcG+ElVGVGLARLcPADcsVPcIGDELAlARALSDLApQLLsloocDIEAuTHQPu+sLa .......hppWpl-l..I.-E+.....-.....t.....p.TRA+ARLc..h.....ss..p....pls..GsGlARhsPuDpsVPcIGDELAlARALuDLAppLlphuspDIEA..s.THpPsphl........... 0 16 44 50 +8795 PF08963 DUF1878 Protein of unknown function (DUF1878) Mistry J, Sammut SJ anon pdb_1sed Family This domain is found in a set of hypothetical bacterial proteins. 20.90 20.90 20.90 22.50 20.60 20.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.50 0.71 -3.86 5 133 2009-01-15 18:05:59 2006-08-03 15:55:35 5 1 128 3 17 68 0 110.50 60 97.71 CHANGED MpSlEcRlu+LEYYlcLLlcslDM-KYPFYuLlI+KsLoKEEu-Elt+lCcELu-EhEsQKAQGaVhFDcLLsLFAGQLsEKL-VHETIFALacQGLFpPLMsEFIoII+paD ...M.DVV+RLEQAEYYV-LLFKMI...DEEK..CPFYSLIIKKKARKKDIERILsLCEpLNEQYlsEKAEGLLLFDALLDQFEKALPHQLEVHETAEALtKQGLFpPLMNEFLsMIA+..c..................................... 0 1 7 10 +8796 PF08964 Crystall_3 DUF1881; Beta/Gamma crystallin Mistry J, Sammut SJ, Eberhardt R anon pdb_1yhp Family This family of beta/gamma crystallins includes the N-terminal domain of Dictyostelium discoideum Calcium-dependent cell adhesion molecule 1 (Swiss:P54657), which mediates cell-cell adhesion through homophilic interactions [1]. 26.70 26.70 26.70 27.00 26.60 26.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.86 0.72 -4.26 17 98 2012-10-01 23:14:22 2006-08-03 15:57:29 5 9 60 8 21 97 3 87.90 34 21.43 CHANGED .sstshFappKNapGpuapYspussl....h.h.sspLND+FhSVclGstscVhhWcH.......pstshhhchssspsDlop.lsGLS+FpVhs ................tVCFYt-cNasG-SlChspGppl...........tsh.ssphND+lpSIpIPtuhpVTlYEcs....satGt.hshptshs...l.......................hs.p.s.h..................... 0 7 16 18 +8797 PF08965 DUF1870 Domain of unknown function (DUF1870) Mistry J, Sammut SJ, Bateman A anon pdb_1s4k Domain This domain is found in a set of hypothetical bacterial proteins. It contains a helix-turn-helix domain so may be a DNA-binding protein. 23.60 23.60 24.10 24.40 22.90 23.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.66 0.71 -4.44 4 448 2012-10-04 14:01:12 2006-08-03 15:59:13 5 2 411 2 9 103 0 104.80 77 97.79 CHANGED MNshELQALR+IhhhsIsEsAphIupsssSpTWQpWEsGclsIPs-l.tclhphpphRp+hlstllccls.......NtlGp.ThRaasDLpuFpphYs-usalcW+lYQSVsucLaAcsh-.cLs ......MNAYELQALRHIFAMTIDECATWIAQTG......sSESWRQWENG+CAIPDpVVEQLLAMRQQRK++lpAIl-KIN.......NRI..G...NNT.MRFFPDLTAFQpVYPDGNFIDWKIYQSVAAELYAHDLE.RLC........................................ 0 2 4 6 +8798 PF08966 DUF1882 Domain of unknown function (DUF1882) Mistry J, Sammut SJ anon pdb_2atz Domain This domain is found in a set of hypothetical bacterial proteins. 20.50 20.50 21.80 54.40 19.10 18.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.46 0.72 -4.01 10 188 2009-09-11 17:04:33 2006-08-03 16:02:25 6 1 188 1 17 76 0 72.60 66 40.34 CHANGED MoshDLsLIKh.TsHYYIKRDsIlsKIca+GRpFasKFERIDAPLohsllpcHhc+cIslAHSLIs.+sDKVEN .........hosMDhSLIKIIoDHYYI+RDpIspKITH+GRlFFDKFERVDAPLNhNlM+EHssKKIlVAHDLIo.KDNKVEN..... 0 5 14 17 +8799 PF08967 DUF1884 Domain of unknown function (DUF1884) Mistry J, Sammut SJ anon pdb_1she Domain This domain is found in a set of hypothetical bacterial proteins. It shows similarity to the N-terminus of ATP-synthase. 28.60 28.60 28.80 72.50 27.90 28.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.75 0.72 -3.92 7 26 2009-01-15 18:05:59 2006-08-03 16:05:13 5 1 14 1 21 30 0 87.80 46 89.25 CHANGED Mss..ps.hlcIls.lEptIsELK.-Ga-PDlILsG.Ehhcahsc..........shhph.pl+VhhlEELGsDAllsDSKhLG.l.tAuKRIpI.P .........ps.hlclLshlEcphNELKh-GaEPDllLsG.Ehhcalsp..........chhchssLKlhll-ELGsDAVlsDSKhLG.lhtAuKRIpI.... 0 2 2 11 +8800 PF08968 DUF1885 Domain of unknown function (DUF1885) Mistry J, Sammut SJ anon pdb_1t6a Domain This domain is found in a set of hypothetical proteins produced by bacteria of the Bacillus genus. 25.00 25.00 29.60 118.30 20.60 19.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.66 0.71 -4.47 4 111 2009-09-11 06:05:25 2006-08-03 16:06:35 5 1 111 1 11 61 0 131.20 70 99.20 CHANGED M.ppAaIKLV.pSsppoloI-DVKcLhchYKslTuKTG-QlsaAYspsAFPYEIh-pot...osLhLQSsc-RYssIhlGVs.........oEp-QoFIQloLPssATaGDKGKANEFs+FLAKKLpGELQLFNGRTMYFhpR ....MQHAFITLVPKSN.QQSVSIDDIKQLFHYYKTVTSKTGsQINYuYTNTAFPY-ILDTSs......TTLKLQSoH.DRYDSIYlGVG..........I.Ep....E...QS.....aIQlSLPPNATFGDKGKANEFCRFLAKKLEGELQLFNGRTMYFYKR. 0 3 6 7 +8801 PF08969 USP8_dimer DUF1873; USP8 dimerisation domain Mistry J, Sammut SJ, Bateman A anon pdb_2a9u Domain This domain is predominantly found in the amino terminal region of Ubiquitin carboxyl-terminal hydrolase 8 (USP8). It forms a five helical bundle that dimerises [1]. 20.90 20.90 20.90 21.00 20.70 20.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.35 0.71 -4.13 25 410 2009-01-15 18:05:59 2006-08-03 16:08:08 6 16 194 4 269 390 3 106.40 26 18.47 CHANGED uphK.Lahu....sslccLpchsphh.pstpstshph.....hhpoApKlhcpA-cath-GD-EpAYlhYMKahslh.ptI+c+sDYpppcsph+phLGssphpchhhsp....hEcLppSLppRYcpt .....................t..............plctLsphu.ps...h.ph...s...pshsh..+p........YhRousclh+tAphYtpEGshEpAYlLYh+ahsLhhcplspH.Da+..p.st...s..+p........th....p......p....hpch....h..h.c.........hEpL+...tpLht+Yp.............................................. 0 71 118 194 +8802 PF08970 Sda Sporulation inhibitor A Mistry J, Sammut SJ anon pdb_1pv0 Domain Members of this protein family contain two antiparallel alpha helices that are linked by a highly structured inter-helix loop to form a helical hairpin; the structure is stabilised by numerous hydrophobic and electrostatic interactions. These sporulation inhibitors are antikinases that bind to the histidine kinase KinA phosphotransfer domain and act as a molecular barricade that inhibit productive interaction between the ATP binding site and the phosphorylatable KinA His residue. This results in the inhibition of sporulation (by preventing phosphorylation of spo0A) [1]. 21.10 21.10 21.50 27.00 20.50 18.10 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.06 0.72 -4.47 18 164 2009-01-15 18:05:59 2006-08-03 16:19:17 5 1 141 4 36 93 1 45.20 58 91.48 CHANGED Mcp...LSDELLhESYaKApELpLsPDFItLIcpEIhRRSLpcKlshSS ......Mcp...LSsELLhESYaKApEL+LssDFIhLIcpEIhRRSLccplshSS........ 0 11 26 28 +8803 PF08971 GlgS Glycogen synthesis protein Mistry J, Sammut SJ anon pdb_1rrz Domain Members of this family are involved in glycogen synthesis in Enterobacteria. The structure of the polypeptide chain comprises a bundle of two parallel amphipathic helices, alpha-1 and alpha-3, and a short hydrophobic helix alpha-2 sandwiched between them [1]. 24.20 24.20 24.40 24.20 23.90 24.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.28 0.72 -4.05 4 480 2009-09-13 10:19:12 2006-08-03 16:20:08 6 1 476 1 21 81 0 65.20 75 97.93 CHANGED M.D+slYShsNFDFLApSFARMpuEGR.VDItAVTGNMsEuppsWFpcRYuaYpQQhhQtpthplEH ...........M.ccsl.SLNNFDFLARSFARMHAEGRPVDIhAVTGNMDEEHRTWFCARYAaYCQQhhQARELELEH.......... 0 1 4 12 +8804 PF08972 DUF1902 Domain of unknown function (DUF1902) Mistry J, Sammut SJ anon pdb_1wv8 Domain Members of this family of prokaryotic proteins adopt a fold consisting of one alpha-helix and four beta-strands. Their function has not, as yet, been elucidated [1]. 29.40 29.40 29.80 29.50 29.30 28.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.71 0.72 -4.41 8 55 2012-10-02 16:06:15 2006-08-03 16:21:41 6 1 52 1 31 65 2 53.10 39 65.57 CHANGED lpIQAsWDsEAGVWVApSDDlPGLlTEA-TlEtLhcKlpsMlPDLLp-Nusspt .............htlcshWDpEAsVWVApSsDl...PGLsTEAsTl-sLhpKlpshIs-LLphNt....t....... 0 7 16 21 +8805 PF08973 TM1506 DUF1893; Domain of unknown function (DUF1893) Mistry J, Sammut SJ, Iyer, LM anon pdb_1vk9 Domain A member of the deaminase fold that binds an unknown ligand in the crystal structure. The protein is ADP-ribosylated at a conserved aspartate [1]. Contextual analysis suggests that the domain is likely to bind NAD or ADP ribose either to sense redox states or to function as a regulatory ADP ribosyltransferase [2]. 20.50 20.50 20.80 29.90 18.30 20.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.56 0.71 -4.62 9 122 2009-01-15 18:05:59 2006-08-03 16:24:45 5 3 117 1 18 93 1 129.50 36 76.92 CHANGED M-p.....hhclLccGGYShVlhp-pc.lpT.spRGltsLh-L...lpppsp.hcGAtlADKVlGKAAAhLhlhGGl+plYAclISpsAlclLcpssIcssYschVPaIhNRstoGhCPlEphsh-l-sscchaclIppFlpK ...........................p.....hhphLpptshohslhpc.pc..lho.ppRGltsLh-l....lspt.p....hcGup.lsDKVVGKAAAslhlhuGlppVaAsllSpsAhclLcptuIcVsapchVshI..s.RstsshCPhEshs.pl-ssEEhhthl.t........... 1 10 16 16 +8806 PF08974 DUF1877 Domain of unknown function (DUF1877) Mistry J, Sammut SJ anon pdb_1ryl Domain This domain is found in a set of hypothetical bacterial proteins. 22.00 22.00 22.80 22.40 21.90 21.50 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.06 0.71 -4.59 16 510 2009-01-15 18:05:59 2006-08-03 16:27:53 5 2 476 2 63 275 3 158.20 50 98.00 CHANGED MGMhspYhplsccclpcLh..st............stpshhphht-htpsp...........................phDl-KtWcsLHalLTGsss..........ppssPlu.slhGs..psht.-ss.s..sphhsssclpplsctLpslshcpLhppFshpphppsclYPs.h.p.pptcp-hh-plhphatcLpcFapcsucpspslLlhI .....................................................................................................MGMIGhascIcsEclspLL....co.................................sccsLhD.sIcD...shsshc.........................................cLDIDKpWDhLHFsLTGoSAh.............-PscNDPLS+AVLGE...........cSL.....E..Du...l...DG..F...luLThspElAusl-+LEuLDcsELRKp......F......Sl...........K+Ls......Eh.E......I......YPG..l........shsEEh.E..up..l...F..us..Ihh.chEKLluhY++hL+pGspsLssI............................. 0 28 46 56 +8807 PF08975 2H-phosphodiest DUF1868; Domain of unknown function (DUF1868) Mistry J, Sammut SJ anon pdb_2fsq Domain This group of 2H-phosphodiesterases comprises a single family typified by the protein mlr3352 from M.loti. Members are also present in various alpha-proteobacteria, Synechocystis, Streptococcus and Chilo iridescent virus. The presence of a member of this predominantly bacterial group in a large eukaryotic DNA virus represents a potential case of horizontal transfer from a bacterial source into a virus. Several proteins of bacterial origin have been noticed in the insect viruses (L.M.Iyer, E.V.Koonin and L.Aravind, unpublished observations and these appear to have been acquired from endo-symbiotic or parasitic bacteria that share the same host cells with the viruses. Presence of 2H proteins in the proteomes of large DNA viruses (e.g. T4 57B protein and the Fowl-pox virus FPV025) may point to some role for these proteins in regulating the viral tRNA metabolism. Each member of this family contains an internal duplication, each of which contains an HXTX motif that defines the family. 21.00 21.00 21.30 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.52 0.71 -4.34 13 265 2012-10-03 21:31:48 2006-08-03 16:30:44 5 1 258 1 37 106 81 117.50 61 51.47 CHANGED KFspsGchhPhsGNTlVCHL-psusshp...slLshppphhshshAs+hsFhPsSShHMTlFpGlh-pcRpsshWPucLPlDsslschsshatpRLcsFsh..sssFphtl......ssspP.tulhL ...KFKENGEFNHFPGNTVVAN..LYTK..Q.DLME....VVDIIQSRYRELP..FI.DK.FT.L.TPRNSIHMTVIELLCHENRETEFWSSNLPLDTPLQEIHDYFAKQLEIFPL......LDEEIHMRl......TEMGK.QNIL.V......................................... 1 5 17 22 +8808 PF08976 DUF1880 Domain of unknown function (DUF1880) Mistry J, Sammut SJ anon pdb_1wlz Domain This domain is found predominantly in DJ binding protein. It has no known function. 21.30 21.30 22.30 21.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.54 0.71 -3.97 2 37 2009-01-15 18:05:59 2006-08-03 16:31:47 6 21 27 4 18 46 0 116.30 52 9.47 CHANGED lQILTDEQFDRLWsEhPVNAKGRLKY.DFLS+hS.Epsso.PMAsGD...SshAQRGSSsP-hSpGTRSsL..Psp-.RsG.KSpSHPCTPs......GTPPLQNC-PIESRLRKpIQGCWR ..............................................................................................................lQlLTDEQFDRLWsEMPVNAKGRLKY.DFLS+FSoEcs.s....T...Ph..AsGD.......Ssh.AQ.RGSSV.P-lS...pusR...SAlSs.Psp-hRsG.KspS...H.P.CT.....Pustt..shsGoPPLQNC-PIES+LRK+IQGCWR...................................... 0 3 3 6 +8809 PF08977 BOFC_N Bypass of Forespore C, N terminal Mistry J, Sammut SJ anon pdb_2bw2 Domain The N-terminal domain of 'bypass of forespore C' is composed of a four-stranded beta-sheet covered by an alpha-helix. The beta-sheet has a beta2-beta1-beta4-beta3 topology, where strands beta1 and beta2 and strands beta3 and beta4 are connected by beta-turns, whereas strands beta2 and beta3 are joined by an alpha-helix that runs across one face of the beta-sheet. This domain is similar to the third immunoglobulin G-binding domain of protein G from Streptococcus, the latter belonging to a large and diverse group of cell surface-associated proteins that bind to immunoglobulins. It has been hypothesised that this domain may be a mediator of protein-protein interactions involved in proteolytic events at the cell surface [1]. 25.00 25.00 26.80 26.40 24.70 20.60 hmmbuild --amino -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.36 0.72 -4.15 10 128 2009-01-15 18:05:59 2006-08-03 16:36:58 5 2 128 1 18 85 0 50.80 61 29.42 CHANGED lollLE+lYLDGEVSEEp+sETVhohEDFWupY+sWpLV-hc-splVFRKp ..VTILLERMYVDGEVSEEIhTEKVssLEcFLQQYKEWQLVDRDDsQIVLQKK.. 0 3 10 12 +8810 PF08978 Reoviridae_Vp9 Reoviridae VP9 Mistry J, Sammut SJ anon pdb_1w9z Domain This domain is found in various VP9 viral outer-coat proteins. It has no known function. 19.00 19.00 28.90 21.30 17.60 16.50 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.84 0.70 -5.44 2 12 2009-01-15 18:05:59 2006-08-03 16:39:53 5 1 5 3 0 10 0 187.80 64 93.29 CHANGED MlS-s.lRuhK+LuhpTpRssG-pThtLsSpVKLSKGEVEhlAVTKcEhh-tLtQCNL.plE.lsh-tTFNGslhRhuAahFlhpu.hlhhspslAVp.s.pYuTslAG.shphT.VhssphhhhtplstGs.usPaSspsuuLhIT.thsLhss.l.sGplhVLF.TSL.TThspoNSaAYShCslPhpcWD.phIKLTuETSCsSLsuMpsLsNSll.G-Rs.ssGLYVDI.GVTVoTSsS.uoLPlTslssshPlhFpAhs+.VEpVu.INhLYsLu .......................................................................................................................................................................................................................WDFNMIKLTAETSCsSLTtMTNhlNoLV.GDRsRPVGLaVDIPGVTVTTSASluoLPITTIPAsTPLIFSAYhKQVEEVGlINTLYuLS... 2 0 0 0 +8811 PF08979 DUF1894 Domain of unknown function (DUF1894) Mistry J, Sammut SJ anon pdb_1z9v Domain Members of this family have an important role in methanogenesis. They assume an alpha-beta globular structure consisting of six beta-strands and three alpha-helices forming the secondary structural topological arrangement of alpha1-beta1-alpha2-beta2-beta3-beta4-beta5-beta6-alpha3 [1]. 25.00 25.00 26.20 59.70 23.60 16.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.05 0.72 -3.93 20 55 2009-01-15 18:05:59 2006-08-03 16:42:58 6 2 46 1 42 54 1 91.30 39 87.56 CHANGED M.u.Cl-sh.....sYEILL+psoFKECc-aI+cpscElYclpPGaclh.GlhlIGhsPIPVGlcs..s.slIFPYTKPC..aGoFVl+l..pss.-Elc+lRc ...Mu.Cl-ph.....sYEILL+stoFKECp-aI+cshcElhclsPGhclh.GlhlIGlPPIPlGl-s...s.pllFPYTKPC..aGTaVl+l..pss.cElc+lR.............. 0 10 27 35 +8812 PF08980 DUF1883 Domain of unknown function (DUF1883) Mistry J, Sammut SJ anon pdb_2b1y Domain This domain is found in a set of hypothetical bacterial proteins. 21.00 21.00 21.50 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.15 0.72 -4.45 10 161 2009-01-15 18:05:59 2006-08-03 16:45:37 5 3 155 1 48 116 3 87.50 35 79.80 CHANGED M+Fs.Y-hcph+cG-llsloLs.pssNVRLMssuNFppF+NstcapY.hGGhsc+SPs+IsVPSoGaWhlllDh..sGp+Ghlsuol+...s..th..clhR .........hpah.aphtphptuDlVVlcCS.ppCNI+LMsssNFppa+sGscasY.hG..Gshc+.PA+lsVPpoGaWslsIDo..tupp...hphohp.............hh............... 0 9 16 33 +8814 PF08982 DUF1857 Domain of unknown function (DUF1857) Mistry J, Sammut SJ anon pdb_2ffs Domain This domain has no known function. It is found in various hypothetical bacterial and fungal proteins. 21.30 21.30 24.70 24.50 21.00 20.30 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.68 0.71 -4.61 17 171 2012-10-02 19:24:03 2006-08-03 16:54:50 6 4 160 2 79 146 10 151.00 33 88.30 CHANGED papahl.lNsPss.thssLTRpQlWpGLlh+ucpPp.Fls.ul.-pCpllpcs........sstltRcLpa.........GsthlcDpVpht..pplca.....tptsGuolshhI-.......-sssusLaLRFhYchphs.hpssut-.cth..p-hh+pua+tushcTl+hIRphstsGp ...............pFEHLlplNssss..thssLTRsQlWpGLVhRAcpPphFV..GL.-sCtlhpcs.............sstlcR-Lpa................Gps.sl+DcVThpssp.pVcaph..sssttsGu...oLohsIE.......E.s.-..st.pLFlRFtYcTsls....h.pss.ot-tpph..pphlKpAY+puDl-Tl.chIR-hstt.............................. 0 10 41 64 +8815 PF08983 DUF1856 Domain of unknown function (DUF1856) Mistry J, Sammut SJ anon pdb_1ytv Domain This domain has no known function. It is found in the C-terminal segment of various vasopressin receptors. 25.00 25.00 30.70 29.60 24.10 24.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.47 0.72 -3.87 9 110 2009-09-11 07:47:04 2006-08-03 16:57:04 5 1 65 0 32 102 0 46.40 58 11.36 CHANGED shpKEDSDSSh..RRpT.hT+h.ssRoPosuh......ssWK...sSPKSScSl+hl.hpp .cFsK-DSDShS..RRQT.ao...NNR.SPTNSo......GhWK...-SPKSS+Sl+FlPlsT............. 0 1 4 14 +8816 PF08984 DUF1858 Domain of unknown function (DUF1858) Mistry J, Sammut SJ anon pdb_2fi0 Domain This domain has no known function. It is found in various hypothetical bacterial proteins. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.71 0.72 -4.31 33 1114 2009-01-15 18:05:59 2006-08-03 16:58:53 6 29 835 3 232 793 11 56.80 29 33.48 CHANGED Ishshslt-llcpaP-sl-lhhph..GFpplssPshhpohu+hhTLcpuuphtslslsplh ...............Ishshslu-llcpa....P....-..ll.-lLhch...GhcsLusPh.....u.phhoLcpuuphcGlsh-clh.......... 0 110 183 204 +8817 PF08985 DUF1888 Domain of unknown function (DUF1888) Mistry J, Sammut SJ anon pdb_1ai4 Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 31.70 31.40 23.10 22.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.54 0.71 -4.44 4 33 2009-09-11 00:41:11 2006-08-03 17:02:49 6 1 32 21 10 34 5 121.90 49 94.95 CHANGED Mpss.spspaIpVTVTLE.NGEPVFsYTsApGt.p.GDVslTpuuT.ITY.LpDQTGKGLKFVGAuFhTPFDplIDAVplSoDGpLlpLsDLDcssGsTpFQFVLoNouNTLhlLSPDPplIN+sp ........M.ss.t.upalpVsVTLE.NGEPV.F.YTcss.Gc..ts.sGDVTlTpuuT.lTYhL..sDpT.G.K.GLKFVGsGFl.....TPFDp....ll....D..AVTlSoD..GhLlQLVDhDcoPGoTKFQFVhoNosNTLllLSPDPplIN+s.p.. 0 2 3 5 +8818 PF08986 DUF1889 Domain of unknown function (DUF1889) Mistry J, Sammut SJ anon pdb_2es9 Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 53.20 53.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.73 0.71 -3.92 2 354 2009-01-15 18:05:59 2006-08-03 17:04:34 5 1 350 2 10 76 1 99.00 87 99.77 CHANGED MQIKVIYSLIDNMVNFKDKNMPAVIDKALDFIGAMDVSAPTPSSMNESTAKGIFKYLKELGVPASAADITsRAD.EGWNPGFTEKMVGWAKKMEoGER.VIKNPEYFSTYMQEELKALV ...................................MPAVIDKALDFIGAMDVSAPTPSSMNESTAKGIFKYLKELGVPASAADITARADQEGWNPGFTEKMVGWAKKMEoGERsVIKNPEYFSTYMQEELKALV....... 0 1 1 6 +8819 PF08987 DUF1892 Protein of unknown function (DUF1892) Mistry J, Sammut SJ anon pdb_1n6z Family Members of this family, that are synthesised by Saccharomycetes, adopt a structure consisting of a four-stranded beta-sheet, with strand order beta2-beta1-beta4-beta3, and two alpha-helices, with an overall topology of beta-beta-alpha-beta-beta-alpha. They have no known function [1]. 19.90 19.90 21.10 60.70 19.10 17.50 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.58 0.71 -4.07 9 44 2009-01-15 18:05:59 2006-08-03 17:08:24 5 2 42 1 28 38 0 110.00 45 85.71 CHANGED Must..............-NshRhlllL-cs....ppcpsptDc+...........hlDE......VpphDplNpaFDKFDEcIsIPNEGHIKYEluSDGLlVlIlD.KEl.-cVlshVcsasppsp.cppctpD ................h.ut..............sNsaRhllLLE-...................cp..c.....s...c.-E+................p-FlDE.....tlsphDplNpWFDKFDpcICIPNEGHIKYEluSDGLlVlllD.+El.--VlchVccaVccs.........p.............. 0 3 14 25 +8820 PF08988 DUF1895 Protein of unknown function (DUF1895) Mistry J, Sammut SJ anon pdb_1zw0 Family The YscE protein, produced by the pathogen Yersinia, assumes a secondary structure composed of two anti-parallel alpha-helices separated by a flexible loop. The function of this protein is, as yet, unknown [1]. 21.80 21.80 22.30 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.19 0.72 -3.94 5 99 2009-01-15 18:05:59 2006-08-03 17:10:32 5 1 94 16 12 42 2 66.40 37 86.26 CHANGED MTsLEppLpu.ussppVcsIpp+LpQAQucVKRQLcpGGsPQQYQlWp+Qu-AlpAAlsIIcTlEucpK .......MTpLEphLps..sspps+uIphpLctAhspl++phs+GssPpQYQ.hppphcAlEuAhsIIphht............ 0 4 6 7 +8821 PF08989 DUF1896 Domain of unknown function (DUF1896) Mistry J, Sammut SJ anon pdb_2apl Domain This domain is found in a set of hypothetical bacterial proteins. 25.00 25.00 26.30 26.10 24.10 24.00 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.80 0.71 -4.32 7 270 2009-01-15 18:05:59 2006-08-03 17:11:53 5 1 103 1 19 217 4 140.40 50 90.90 CHANGED hsppppELSYa+LtLLsYL+EsHPchAGDpsFIppRu-pAAcsYpcAltpGhshstAtphAptlLhpGLHFS+YDslhpVl.sEFtsEVPptptcshsLpLL..P.scsVFs+YslsDD.FphSPpYcpLYsELTGsIhhhlEc.Gl ........................h.ppspp-hSYatLpL.sYLp-p+PEphsD......psFIptRADpAhpsYpcAhtpGastspApphAp-lLapGLHFS+YDTLhpVlENEFpcE....lPsshsEphs.hLL....hlpsVFs+Y-..LoDD.FAhos-Y-pLYTELTGslVLhIEppGl.......... 0 8 15 19 +8822 PF08990 Docking Erythronolide synthase docking Mistry J, Sammut SJ anon pdb_1pzr Domain The N terminal docking domain found in modular polyketide synthase assumes an alpha-helical structure, wherein two alpha-helices are connected by a short loop. Two such N-terminal domains dimerise to form amphipathic parallel alpha-helical coiled coils: dimerisation is essential for protein function [1]. 20.90 20.90 21.00 21.30 20.20 20.70 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.57 0.72 -7.08 0.72 -4.70 141 653 2009-01-15 18:05:59 2006-08-03 17:16:00 6 165 190 6 148 643 1 26.80 48 0.94 CHANGED MssE-KLh-YLKRlTuDL+psRpR.L+- .....ss--KLh-YLKRlTuDL+coRpRLpE.... 1 24 106 140 +8823 PF08991 DUF1903 Domain of unknown function (DUF1903) Mistry J, Sammut SJ anon pdb_1hp8 Domain Members of this family adopt a coiled coil structure, with two antiparallel alpha-helices that are tightly strapped together by two disulfide bridges at each end. The protein sequence shows a cysteine motif, required for the stabilisation of the coiled-coil-like structure. Additional inter-helix hydrophobic contacts impart stability to this scaffold. The precise function of this eukaryotic domain is, as yet, unknown [1]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.78 0.72 -3.82 7 194 2012-10-02 15:44:21 2006-08-04 08:57:27 5 3 149 3 132 223 0 59.70 30 66.44 CHANGED psPCptpACAIQsCLptstYsEu+CtslI--LhpCCtpa......cu+SlCCst.pl.....lphc...hpptpK ...........sPCptpAC...t.lQ.pCLp...cNsY.....scs+CpphlptlpcCCppa.t.......ptpo..Cs..t......................t............................. 0 33 70 107 +8824 PF08992 QH-AmDH_gamma Quinohemoprotein amine dehydrogenase, gamma subunit Mistry J, Sammut SJ anon pdb_1jmx Domain Members of this family contain a cross-linked, proteinous quinone cofactor, cysteine tryptophylquinone, which is required for catalysis of the oxidative deamination of a wide range of aliphatic and aromatic amines. The domain assumes a globular secondary structure, with two short alpha-helices having many turns and bends [1]. 25.00 25.00 92.90 92.20 19.80 18.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.98 0.72 -4.22 9 37 2009-01-15 18:05:59 2006-08-04 08:59:52 6 1 32 4 14 41 1 77.80 67 75.10 CHANGED suVVGCToThDPGWEVDuFGGVuuLCQPMEADLYGCuDPCWWPAQVPDThsoYPcWupcAsssspDWRpLsoVFPpsK ..............suVsGCTsThDPGWEVDuFG.GVuSLCQPMEADLYGCSDPCWWPAQVPDhhsoY.DWsspAssuscDWRpLsoVFPcs.... 0 2 9 11 +8825 PF08993 T4_Gp59_N T4-helicase_N; T4 gene Gp59 loader of gp41 DNA helicase Mistry J, Sammut SJ anon pdb_1c1k Domain Bacteriophage T4 gene-59 helicase assembly protein is required for recombination-dependent DNA replication, which is the predominant mode of DNA replication in the late stage of T4 infection. T4 gene-59 helicase assembly protein accelerates the loading of the T4 gene-41 helicase during DNA synthesis by the T4 replication system in vitro. T4 gene-59 helicase assembly protein binds to both T4 gene-41 helicase and T4 gene-32 single-stranded DNA binding protein, and to single and double-stranded DNA. The structure of T4 gene-59 helicase assembly protein reveals a novel alpha-helical bundle fold with two domains of similar size, this being the N-terminal domain that consists of six alpha-helices linked by loop segments and short turns. The surface of the domain contains large regions of exposed hydrophobic residues and clusters of acidic and basic residues. This domain has structural similarity to members of the high-mobility-group (HMG) family of DNA minor groove binding proteins including rat HMG1A and lymphoid enhancer-binding factor, and is required for binding of the helicase to the DNA minor groove [1]. 21.10 21.10 21.80 22.20 20.00 21.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.05 0.72 -3.98 8 64 2009-01-15 18:05:59 2006-08-04 09:01:32 5 2 62 1 0 53 451 91.20 41 44.10 CHANGED lsuhuVY+LYLshKsHhsG.KYDhlKYpWp.hRso-sAFpKR+DKYFFcKLAcKaoLpElhtlhluNhlANscu...WlG-IsssDAhsFYtcalG+hc ...puhsVYplYLhlKpHFss.+YDhlKYshp.h+sS.-suapKR+D+YFFpKLucKaph+..E.Lt.hFluNhVsNscu...WlG-lsstDAhshYp-als+h.p............ 0 0 0 0 +8826 PF08994 T4_Gp59_C T4-helicase_C; T4 gene Gp59 loader of gp41 DNA helicase C-term Mistry J, Sammut SJ anon pdb_1c1k Domain Bacteriophage T4 gene-59 helicase assembly protein is required for recombination-dependent DNA replication, which is the predominant mode of DNA replication in the late stage of T4 infection. T4 gene-59 helicase assembly protein accelerates the loading of the T4 gene-41 helicase during DNA synthesis by the T4 replication system in vitro. T4 gene-59 helicase assembly protein binds to both T4 gene-41 helicase and T4 gene-32 single-stranded DNA binding protein, and to single and double-stranded DNA. The structure of T4 gene-59 helicase assembly protein reveals a novel alpha-helical bundle fold with two domains of similar size, this being the C-terminal domain that consists of seven alpha-helices with short intervening loops and turns. The surface of the domain contains large regions of exposed hydrophobic residues and clusters of acidic and basic residues. The hydrophobic region on the 'bottom' surface of the domain near the C-terminal helix binds the leading strand DNA, whilst the hydrophobic region on the 'top' surface of the domain lies between the two arms of the fork DNA, allowing for T4 gene 41 helicase binding and assembly into a hexameric complex around the lagging strand [1]. 25.00 25.00 64.70 63.90 21.70 21.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.07 0.72 -3.87 10 58 2009-01-15 18:05:59 2006-08-04 09:03:10 5 2 56 1 0 50 390 101.80 38 50.40 CHANGED F+EDl+slhYFucKhsls.hcclFpYspcspoShIFKLlQoshIShETFllLDSFLsIlD+aDc.hssDllWps.apsKlpAY+KLlsIDsc...pAKslFIcslcppK ......................Fc-DlcslhhFucpsthpthcclFp.sscsppshIhKhlQSshIShETFllLDShLshl-caDc.ptsDllWps.auhKlpuY+KlLsIDsp...csKplFl-Tl+sh.... 0 0 0 0 +8827 PF08995 NIP_1 Necrosis inducing protein-1 Mistry J, Sammut SJ anon pdb_1kg1 Domain Necrosis inducing protein-1, a fungal avirulence protein produced by plants, consists of two parts containing beta-sheets of two and three anti-parallel strands, respectively. Five intramolecular disulfide bonds, stabilise these parts and their position with respect to each other, providing a high level of stability [1]. 25.00 25.00 186.20 186.00 20.10 18.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.14 0.72 -3.81 3 16 2009-01-15 18:05:59 2006-08-04 09:05:11 5 1 1 1 0 17 0 82.00 96 100.00 CHANGED MKFLVLPLSLAFLQIGLVFSTPDRCRYTLCC-GALKAVSECLHESESCLVPGDCCRGKSRLTLCSYGEGGNGFQCPTGYRQC MKFLVLPLSLAFLQIGLVFSTPDRCRYTLCCDGALKAVSECLpESESCLVPGDCCRGKSRLTLCSYGEGGNGFQCPpGYRQC 0 0 0 0 +8828 PF08996 zf-DNA_Pol DNA Polymerase alpha zinc finger Mistry J, Sammut SJ anon pdb_1n5g Domain The DNA Polymerase alpha zinc finger domain adopts an alpha-helix-like structure, followed by three turns, all of which involve proline. The resulting motif is a helix-turn-helix motif, in contrast to other zinc finger domains, which show anti-parallel sheet and helix conformation. Zinc binding occurs due to the presence of four cysteine residues positioned to bind the metal centre in a tetrahedral coordination geometry. Function of this domain is uncertain: it has been proposed that the zinc finger motif may be an essential part of the DNA binding domain [1]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.62 0.71 -4.94 40 326 2009-01-15 18:05:59 2006-08-04 09:06:55 5 14 267 7 211 330 1 179.70 29 13.00 CHANGED l-opl.oDsp+a+ssspLplpC..spCspphhFsG...........lhs.......ssstphpssGlpCsp......Cst..hhsshpltsQLphtIRpalshYYpGWllC--ssCss+.TRQlslhu+R....Cl......................sC.+GpMph-Yo-+pLYNQLhYapuLFDl-+shppthp..................pptptcpl.hhspps+phappl+ssV-caLscsGhphVshu.slF ................................................................................sph.sDp.+a+sspthphpC.....p..Ctpp.has.u........................lhp......................tsspt.h..p..ss...h...h..tCsp......Cpt...............hh.s..hplsspLphplRphl.pcYY.pG.WL.lC-DssC...s..p.c...TRp..hsl..hs.p.R..................C.............................sC..pGphp..hcYo-+pLYsQLhYapt.lFDs-pshpph.t............................tttp.ppph..hh.s.tp.p.....thattlpsslc.phLpcsuhphVshsplF................................ 1 74 116 172 +8829 PF08997 UCR_6-4kD Ubiquinol-cytochrome C reductase complex, 6.4kD protein Mistry J, Sammut SJ anon pdb_1sqq Domain The ubiquinol-cytochrome C reductase complex (cytochrome bc1 complex) is an essential component of the mitochondrial cellular respiratory chain. This family represents the 6.4kD protein, which may be closely linked to the iron-sulphur protein in the complex and function as an iron-sulphur protein binding factor [1]. 24.00 24.00 24.10 24.60 23.80 23.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.68 0.72 -4.26 5 96 2009-01-15 18:05:59 2006-08-04 09:08:46 5 2 70 17 55 108 0 53.60 42 80.45 CHANGED Mlo+l.lGsKYsElA+uWlPoussWGuAGGVALVaFTDWRLlLDYVPYlNuKFcKDE ...........hh.+h.hG.+ahplspsWlPosusaGusuuluLlahTD.W..+LILsaVPahsGKFccs...... 0 16 18 33 +8830 PF08998 Epsilon_antitox Bacterial epsilon antitoxin Mistry J, Sammut SJ anon pdb_1gvn Domain The epsilon antitoxin, produced by various prokaryotes, forms part of a postsegregational killing system which is involved in the initiation of programmed cell death of plasmid-free cells. The protein is folded into a three-helix bundle that directly interacts with the zeta toxin, inactivating it [1]. 25.00 25.00 43.50 42.80 20.20 19.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.08 0.72 -4.01 3 79 2009-09-10 22:13:13 2006-08-04 09:09:58 6 2 68 4 1 25 0 85.90 77 95.90 CHANGED AVTYEKTFEIEIINELSuSVYNRVLNYVLNHELDKsDSpLLEVNLLNQLclApcVNLFchSLEELQAlHEYWRSMN+YSKQILsKEKVA .AVTYEKTFEIEIINELSASVYNRVLNYVLNHELsppDopLLEVNLLNQLclApcVsLFp.shEELQAlHEYWRSMNpYSKQlLsKEKVA................... 0 0 0 1 +8831 PF08999 SP_C-Propep Surfactant protein C, N terminal propeptide Mistry J, Sammut SJ anon pdb_2esy Domain The N-terminal propeptide of surfactant protein C adopts an alpha-helical structure, with turn and extended regions. It's main function is the stabilisation of metastable surfactant protein C (SP-C), since the latter can irreversibly transform from its native alpha-helical structure to beta-sheet aggregates and form amyloid-like fibrils. The correct intracellular trafficking of proSP-C has also been reported to depend on the propeptide [1]. 25.00 25.00 28.30 27.80 23.00 21.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.27 0.72 -4.50 3 104 2009-01-15 18:05:59 2006-08-04 09:13:37 5 3 70 7 22 92 0 62.70 80 51.94 CHANGED MDMGSKEVLMESPPDYSAuPRGRFRIPCCPVHLKRLLIVVVVVVLVVVVIVGALLMGLHMSQKHTEMVLEMSIGG.PEsQQRLALSE+sGTTAT ..................................FtIPCCPVpLKRLLIVVVVVVLVVVVIVGALLMGL................................................... 0 1 1 5 +8832 PF09000 Cytotoxic Cytotoxic Mistry J, Sammut SJ anon pdb_1e44 Domain The cytotoxic domain confers cytotoxic activity to proteins, enabling the formation of nucleolytic breaks in 16S ribosomal RNA. The structure of the domain reveals a highly twisted central beta-sheet elaborated with a short N-terminal alpha-helix [1]. 21.10 21.10 21.10 22.90 20.80 20.10 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.00 0.72 -3.96 5 56 2009-01-15 18:05:59 2006-08-04 09:15:10 5 19 47 6 17 71 1 81.20 38 15.33 CHANGED YHPAPKsptlsGLssL..+.stpKTPhQuGGuKRKRWpssKG....++IYEWDSpHGElEVYc.S+G+HLGShDPpTG-.lKssV+GRoIKt.l .............................t....l.uhsth....st.KTs..hpGGGshRtR...WhspKG......cpIYEWDSQH....G....clEsYc..pcGc.HLGpFDPpTGcpl..Ks.ss..tRplc.h............... 0 4 6 11 +8833 PF09001 DUF1890 Domain of unknown function (DUF1890) Mistry J, Sammut SJ anon pdb_1kjn Domain This domain is found in a set of hypothetical archaeal proteins. 25.00 25.00 136.10 135.90 24.70 19.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.73 0.71 -4.17 14 40 2009-01-15 18:05:59 2006-08-04 09:16:13 6 1 39 2 30 39 2 141.30 40 93.70 CHANGED llllGCPEsPVQhshslYLsptLcctGhclllAuNPAAh+LlcVADP-+aYlcch..h-lDcsltplttt..sh-hhhuFsHNDAulsYssTh.ttl.pscshullFG+c.s--Lscplp.....hssphlsu+AhHNPhPLps+l-clh LllLGCPEsPlQsPhslYLsptL+ccGacVslAuNPAAhKLlcluDPE+aYlppl..h-lDcslpslttt..-hDhlhuFlHNDAuloYhsTa.ptlhpsc.shAlVFG+c.s--Lschlc....thsschlsuRAaHNPtPLps+l-+lh... 0 7 19 25 +8834 PF09002 DUF1887 Domain of unknown function (DUF1887) Mistry J, Sammut SJ anon pdb_1xmx Domain This domain is found in a set of hypothetical bacterial proteins. 23.90 23.90 23.90 23.90 23.60 23.80 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.31 0.70 -6.06 6 279 2012-10-11 20:44:44 2006-08-04 09:20:15 6 6 251 1 59 213 19 305.10 28 86.60 CHANGED MtlHVslls.QcsspLIssLLDpth+sc+hVFIsocsp..+EthpRLcplLp.pGIss....-hFplsstsshptl+pplpsLhE-hKh.ssElhLNsosGh+hhhLuAYEsFR.ShHhPIahl-ssSDclpaL.PsGscptpV.pcpIp..luDYLssaGA......Rst.u-sphscphsc+LhpLupRaAosAh-hGshLuoLNhLAoss..R+tphlslclo-cptuY.+-LshLlsDLs-ssLssa-sGl.lTFtsE-ARRFhNGpWLEhhVaspl+sIpp....lpDhuLsVQVhcchsEKp...VRNELDVVslssNKLHIIECKTtGhccDu.....DDTLYKLESLRD.LGGLpuRuhLVSFRPlpssDlh..RApchsltlIGPDELsDLKcHLpsWhpts .......................................................................................hhs.p....p.............th.s.p..chlhlh...sppt..........hhp.pL.p..p.hhp....h..th...p.............ch.hp.l.s.s..................shptlp...ptl.pp.lh.pphc........sccl..hhNhosGh+h..hhLusa.p.hh.p..php..h..s.....l.......h.hh-spps..p..h.hhl..ts...p....p....p..pl....sp.lp..ltshlshhGs.........p.th...p.........h......pht.p..hsp....h.......p....h...h...l................sph...h.tpps.......p......ph...p.th..ppp...t.hh...t.......p.l....l.s....L....ctt...h...h.....p.....h...p....pt....h.....h.....sF..s....c...p...h...+p......F..h..s..GpW.LEhhl.atpl.pphpp........l..pDhp...hs....l.....pl...h..hp.....h..t..-pc.................lpN....ELDVshh.h.ssp.L.hlIECKo.psh.....p..s-.s.............hLh+..Lpsh..tchhG.G.pscthL..Vs.h...t....hp....s.....h........Rspph...tl..l.l....tp.h..th...l.t....s............................................................................... 1 24 44 53 +8835 PF09003 Phage_integ_N Bacteriophage lambda integrase, N-terminal domain Mistry J, Sammut SJ anon pdb_1kjk Domain The amino terminal domain of bacteriophage lambda integrase folds into a three-stranded, antiparallel beta-sheet that packs against a C-terminal alpha-helix, adopting a fold that is structurally related to the three-stranded beta-sheet family of DNA-binding domains (which includes the GCC-box DNA-binding domain and the N-terminal domain of Tn916 integrase). This domain is responsible for high-affinity binding to each of the five DNA arm-type sites and is also a context-sensitive modulator of DNA cleavage [1]. 21.80 21.80 22.10 23.00 21.40 21.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.61 0.72 -4.27 17 707 2012-10-03 08:51:45 2006-08-04 09:34:33 5 9 359 8 34 331 1 71.50 43 19.78 CHANGED MutRsRphs.hslP.NLhtp.s.+...sYapY+sPlTGK.aGLGp.D+ppAhspAhpANhplhpptscthLhpp.pss ..........MutR.Rpac..pclP.sLY.+.s....s....+s...YapYRcPlTGKpaGLGp.DcchAhs.AhpANpcltptptcphLshp.c.h....................... 0 7 19 27 +8836 PF09004 DUF1891 Domain of unknown function (DUF1891) Mistry J, Sammut SJ anon pdb_2cq2 Domain This domain is found in a set of hypothetical eukaryotic proteins. 20.60 20.60 20.60 20.70 20.40 20.20 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -8.06 0.72 -4.57 9 72 2009-01-15 18:05:59 2006-08-04 10:05:59 5 14 28 6 42 109 0 38.20 39 7.06 CHANGED WosNpsulhKKAQQRLaFLRpL+Ksphs....shhh+usIES .......hssNp....puhhKhuppchhFLRK.pKs+h......sLhh+psIEo.... 0 13 14 30 +8837 PF09005 DUF1897 Domain of unknown function (DUF1897) Mistry J, Sammut SJ anon pdb_2bn5 Domain This domain is found in Psi proteins produced by Drosophila, and in various eukaryotic hypothetical proteins. It has no known function. 20.10 20.10 20.30 20.10 19.70 20.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.05 0.72 -4.50 12 369 2009-01-15 18:05:59 2006-08-04 10:08:04 5 23 86 2 175 293 0 32.50 48 8.58 CHANGED ssssssGpsDYSApWhEYYRphGhhcpA-hlcpphtt.p ..........ssssuGQ.sDYSAAWtEYY+p..G...pAthhtt.....s............ 1 36 48 112 +8838 PF09006 Surfac_D-trimer Lung surfactant protein D coiled-coil trimerisation Mistry J, Sammut SJ anon pdb_1m7l Domain This domain, predominantly found in lung surfactant protein D, forms a triple-helical parallel coiled coil, and mediates trimerisation of the protein [1]. 20.70 20.70 20.70 20.80 20.60 20.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.22 0.72 -4.23 10 72 2009-01-15 18:05:59 2006-08-04 10:14:29 6 6 32 67 37 66 0 42.60 52 13.77 CHANGED VsALRQQVEsLpGpLQRLQsAFSQYKKAsLFPDGQSVGEKIFKTAG ..................lsuL+QQlpsLptplppLQsuhSpYKKstLFPsGpuVGcKIFKTuG..... 0 1 1 6 +8839 PF09007 EBP50_C-term EBP50, C-terminal Mistry J, Sammut SJ anon pdb_1sgh Domain This C terminal domain allows interaction of EBP50 with FERM (four-point one ERM) domains, resulting in the activation of Ezrin-radixin-moesin (ERM), with subsequent cytoskeletal modulation and cellular growth control [1]. 25.00 25.00 46.10 45.00 20.20 19.10 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.89 0.72 -4.10 6 149 2009-01-15 18:05:59 2006-08-04 10:16:42 6 4 37 9 55 106 0 40.90 60 12.56 CHANGED -ssLcLshShAtAKE+A+.ppRupKRAPQMDWSK+pElFSNh .....-ssLcl..u.osAtAKE+A+.tpRssKRAPQMDWsKKpElFSNh. 0 2 8 21 +8840 PF09008 Head_binding Head binding Mistry J, Sammut SJ anon pdb_1lkt Domain The head binding domain found in the Phage P22 tailspike protein contains two regular beta-sheets, A and B, oriented nearly perpendicular to each other and composed of five and three strands respectively. The topology of the strands is exclusively antiparallel. The tailspike protein trimerises through this domain, and the direction of the strands with respect to the molecular triad is almost parallel for beta-sheet A, whereas beta-sheet B is perpendicular to the triad, forming a dome-like structure. This domain is dispensable for thermostability and SDS resistance of the intact protein, and its deletion has only minor effects on tailspike folding kinetics [1]. 21.50 21.50 24.60 23.60 20.20 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.47 0.71 -4.04 5 167 2009-09-11 14:20:10 2006-08-04 10:18:20 5 10 154 22 13 147 0 109.70 70 16.69 CHANGED MTDITsNlVVSMPSQlFTtsRuFKAVANGKIYIGKIDTDPVNPsNQIPVYlENEDGSaVpVAQPIIINAAGaPVYNGQIAKFVTVQGHSMAVYDAsGAQQFYFsNVLKYDPcph ..........MoD.ITANVVVSMPoQLFTMuRSFKAVANGKIYIGKIDTDP...VNPpNQIpVYlENEDGSHVsVoQPIIINAAGaPVYNGQ.IAKFVTVQGHSMAVYD..AYGuQQFYFPNVLKYDPDQ.......... 0 2 2 6 +8841 PF09009 Exotox-A_cataly Exotoxin A catalytic Mistry J, Sammut SJ anon pdb_1aer Domain Members of this family, which are found in prokaryotic exotoxin A, catalyse the transfer of ADP ribose from nicotinamide adenine dinucleotide (NAD) to elongation factor-2 in eukaryotic cells, with subsequent inhibition of protein synthesis [1]. 25.00 25.00 26.80 36.00 20.40 19.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.62 0.70 -5.47 2 67 2009-01-15 18:05:59 2006-08-04 10:19:20 5 3 24 45 1 86 0 202.00 71 36.78 CHANGED A.lphp..oGtpaLs-stsl.hospGspNWThpcL.tsHptLpccGYVFVGYHGT.h.AAQoIV..husV.Rupsp-.-thWtGhYlAscstlAaGYAp.pE.......PstttR.tpGshLRVYlPRuSL.tFYRTshsLtssEt..clpplIGHsLPLR.-AhTGPEptGGc.ETllGWshA.+sVsIPSsIPssshp.thslD.puls.KEQuISshPsY.....K..+--LK ANINIESRSGRSYLPENRAV.ITPQGVTNWTYQELEATHQALTREGYVFVGYHGTNHVAAQTIVNRIAPVPRGNNTENEEKWGGLYVATHAEVAHGYARIKEGTG-hGLPTRAER-sRGVMLRVYIPRASLERFYRTNTPLENAEc..HITpVIGHSLPLRNEAFTGPEusGGEDETVIGWDMAIHAVAIPS............................................ 0 1 1 1 +8842 PF09010 AsiA Anti-Sigma Factor A Mistry J, Sammut SJ anon pdb_1jr5 Domain Anti-sigma factor A is a transcriptional inhibitor that inhibits sigma 70-directed transcription by weakening its interaction with the core of the host's RNA polymerase. It is an all-helical protein, composed of six helical segments and intervening loops and turns, as well as a helix-turn-helix DNA binding motif, although neither free anti-sigma factor nor anti-sigma factor bound to sigma-70 has been shown to interact directly with DNA. In solution, the protein forms a symmetric dimer of small (10.59 kDa) protomers, which are composed of helix and coil regions and are devoid of beta-strand/sheet secondary structural elements [1]. 25.00 25.00 25.90 77.90 22.00 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.01 0.72 -4.11 6 30 2009-01-15 18:05:59 2006-08-04 10:21:43 5 1 29 6 0 23 0 88.50 46 97.68 CHANGED M........Nts.Ehl+-IIuhASlLIKFupE.DIlssQpsFluFLNElGh+ossGc-FTpsSFRQMhsRLst-p+cELl-pFN.pGacslh+phhMYoNs ...MshplEhV+EIIolASILIKFupE.DIl-sRssFIAFLNElGl+s.pG+cLspsSFRclhpcLTp--+cpLI-EFN.pGa.EslaRhLhMYos..... 0 0 0 0 +8843 PF09011 HMG_box_2 DUF1898; HMG-box domain Mistry J, Sammut SJ, Coggill P anon pdb_2cto Domain This short 71 residue domain is an HMG-box domain. HMG-box domains mediate re-modelling of chromatin-structure. Mammalian HMG-box proteins are of two types: those that are non-sequence-specific DNA-binding proteins with two HMG-box domains and a long highly acidic C-tail; and a diverse group of sequence-specific transcription factor-proteins with either a single HMG-box or up to six copies, and no acidic C-tail [1]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.46 0.72 -3.61 10 1103 2012-10-02 14:16:02 2006-08-04 10:25:46 5 49 229 12 522 7615 215 70.30 37 19.76 CHANGED PsKPKushsAYhaFspcppsEhK+c......tP..ssshuEhoKtsSpcW+shSscEKccYp-pA+tcKscacpEhtsac ........................................KP..+..s.t.h.Su.Y.s.hF....lp.s....p....p....p....-....t.Kcc........................ps.......slsh..u.Eh..s..K.....c...C..S..E.+.................W..K..........s...h..Ss........c........E...Ksc..a...c.-....h...A+.....t...D..K.t.+.Y.-pEMpsa.h................................... 0 146 190 303 +8844 PF09012 FeoC DUF1920; FeoC like transcriptional regulator Mistry J, Sammut SJ anon pdb_1xn7 Domain This family contains several transcriptional regulators, including FeoC, which contain a HTH motif. FeoC acts as a [Fe-S] dependant transcriptional repressor [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.29 0.72 -4.27 36 803 2012-10-04 14:01:12 2006-08-04 09:28:13 5 3 792 2 109 876 60 71.10 45 86.96 CHANGED hLppl+palppcspsShs-Lup+Fphs.sslcuMLshWlpKG+lp+h.sssss.....uuuCpp......Ct.....tt.p....shYc ..................SLlQVRDh..LA.L.+.GR.h-AsQISppLs..sPpPhIsAMLppLEpMGKss.RIp--scGC...............LSGSCKS.CP.....EG+s........shcphh................................ 0 29 58 82 +8845 PF09013 YopH_N YopH, N-terminal Mistry J, Sammut SJ anon pdb_1huf Domain The N-terminal domain of YopH is a compact structure composed of four alpha-helices and two beta-hairpins. Helices alpha-1 and alpha-3 are parallel to each other and antiparallel to helices alpha-2 and alpha-4. This domain targets YopH for secretion from the bacterium and translocation into eukaryotic cells, and has phosphotyrosyl peptide-binding activity, allowing for recognition of p130Cas and paxillin [1]. 19.60 19.60 19.70 135.40 18.80 17.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.50 0.71 -4.45 4 68 2009-09-11 06:44:49 2006-08-04 10:34:58 5 2 31 4 6 18 0 114.70 61 40.01 CHANGED hpIssLpp.lS.plsp.tsG-phG+L+........sThQtlo.opuhpssEKsFAppVLcHVpNssLspcDlApLL......phsNaEL+pstsGpslLsGLRo-QLoLpDAKlLL-AAhRQ .hplssLpp.ls.plsQ.tpGspsG+Lp........pT..hhp.upthppuEKsFAppVLpHVtNssLopcDhApLL......plsNhEL+pstsGpsllsuLRo-QhoLpDAKlLLEAAhRQ. 2 0 2 2 +8846 PF09014 Sushi_2 Beta-2-glycoprotein-1 fifth domain Mistry J, Sammut SJ anon pdb_1g4f Domain The fifth domain of beta-2-glycoprotein-1 (b2GP-1) is composed of four well-defined anti-parallel beta-strands and two short alpha-helices, as well as a long highly flexible loop. It plays an important role in the binding of b2GP-1 to negatively charged compounds and subsequent capture for binding of anti-b2GP-1 antibodies [1]. 22.10 22.10 22.10 23.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.07 0.72 -4.22 6 79 2009-01-15 18:05:59 2006-08-04 10:39:08 5 8 40 7 37 69 0 82.80 44 22.33 CHANGED RAsCpVPlK+upVlYsGhKhhltDltcshlhHG-+VoFaCKspcK+CSasssupChDGslplPuCacE.s.Ltah...+p.sS-lpsC .............+AsCplslK+upVlYpGc+lplp-hhcsshhHG-pVoFaCKNK..E.K.+CSYotsupChDGslclPpCFc.E.o.lt.ah...+o.sS-lpsC.............. 0 1 6 19 +8847 PF09015 NgoMIV_restric NgoMIV restriction enzyme Mistry J, Sammut SJ anon pdb_1fiu Domain Members of this family are prokaryotic DNA restriction enzymes, exhibiting an alpha/beta structure, with a central region comprising a mixed six-stranded beta-sheet with alpha-helices on each side. A long 'arm' protrudes out of the core of the domain between strands beta2 and beta3 and is mainly involved in the tetramerisation interface of the protein. These restriction enzymes recognise the double-stranded sequence GCCGGC and cleave after G-1 [1]. 20.70 20.70 21.20 27.80 19.50 20.40 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.90 0.70 -5.41 6 56 2012-10-11 20:44:44 2006-08-04 10:56:01 5 2 50 6 9 43 3 239.70 54 95.23 CHANGED FH+pLl.-GslhhsN...........stu..lsSNADuSsspSpslApGltcplpu.polsc+h.....AGQTsGspFEcIsppFlccTFp+lpHLRPGsWpVppssutp+hp.IspaEQYuHLtcLs+lu+cpsELuuuLGsDYsIpPDIVlsRcstsDptIN.csc.LVD..ssssthos.LR+uN....supP..lLHASISCKWTIRSDRAQNTRoEALNLlRNRKGRlPHIVsVTAEPhPSRIuSlALGTGDIDCVYHhALsELppulpulGp-Dst-hLpshIsG+RL+DISDLPLDLul ..................FHtpLl.psslhhhN...........stG..VsSNADuSNppShsIApuIAchLtu.coluc+l.....sGQTuGstFEslsscFlppsF.+LpHlRPG.sWsVppluupsRhc.IucapQYsHLssLscAscpss-LAAALGsDYsIoPDIlVsRp..sDttIN.psphLV..D..-s.lsphus.LRtsN.....ushP...lLHASISCKWTIRSDRAQNARSEuLNLlRNRKGRLPHIVVVTAEPoPSRluSIALGTG-IDCVYHFAL.ELppslpsl..s.......h-DAh.....-hhhhMVsG+RLKDISDLPLDLAV.......................... 1 2 7 7 +8848 PF09016 Pas_Saposin Pas factor saposin fold Mistry J, Sammut SJ anon pdb_2b8i Domain Members of this family adopt a compact structure comprising five alpha helices. Charged and polar residues are exposed mostly on the surface, while most of the hydrophobic residues are buried inside the hydrophobic core of the helical bundle. The precise function of this domain is unknown, but it is has been shown to induce secretion of periplasmic proteins, especially collagenase [1]. 27.50 27.50 123.30 123.20 27.30 27.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.37 0.72 -3.98 6 38 2009-01-15 18:05:59 2006-08-04 10:57:41 5 1 33 1 8 27 0 75.40 68 99.10 CHANGED hpoLIY-TLhNLAsp-PEQHApIRQpLYEQLDLsF-KQLALYusVLGPASSGKLtspcslscAV-pAlclLEh.p+ MKTLIY-TLlsLAsQEPEQHApIRQNLYEQLDLPFDKQLALYSsALGPASSGKLEsppuIsNAVDsAl+LLEsPE+. 0 0 1 6 +8849 PF09017 Transglut_prok Microbial transglutaminase Mistry J, Sammut SJ anon pdb_1iu4 Domain Microbial transglutaminase (MTG) catalyses an acyl transfer reaction by means of a Cys-Asp diad mechanism, in which the gamma-carboxyamide groups of peptide-bound glutamine residues act as the acyl donors. The MTG molecule forms a single, compact domain belonging to the alpha+beta folding class, containing 11 alpha-helices and 8 beta-strands. The alpha-helices and the beta-strands are concentrated mainly at the amino and carboxyl ends of the polypeptide, respectively. These secondary structures are arranged so that a beta-sheet is surrounded by alpha-helices, which are clustered into three regions [1]. 25.00 25.00 722.90 722.60 20.90 19.70 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.37 0.70 -5.36 4 22 2009-01-15 18:05:59 2006-08-04 11:06:04 5 1 10 5 0 27 0 397.00 82 99.39 CHANGED MppRRRhLsFATlGAVlCTAGhMPSsupA....AuuGsGEEctSYAETHcLTADDVcsINALNEpA.sAupsG.......PS....FRAPDu...DDRVTPPAEPLDRMPDsYRs.hGRApTVVNNYIRKWQQVYSHRDG+KQQMTEEQREhLSYGCVGVTWVNSG.YPTNRLAFA.FDEsKYKN-LKNuRPRssETRAEFEGRlAK-SFDEuKGFpRAR-VASVMNKALENAHDEusYlsNLKpELANsNDALhpEDuRSsFYSALRNTPSFKERsGGNaDPSKMKAVIYSKHFWSGQDppGSSDKRKYGDP-AFRPspGTGLVDMSRDRNIPRSPspPGEuaVNFDYGWFGAQTEADADKTVWTHGNHYHAPNGuLGsMHVYESKFRNWSsGYuDFDRGAYVITFIPKSWNTAPsKVcQGWP ..................M..RtphLsFAThuAllCsuGhhPSsupA....AssGsGEpctSYAETHtLTA-DVcNINALNcpA.ssupsG.......PS....FRAPDu...DDRVTPPAEPLDRMPDsYRs.hGRApTVVNNYIRKWQQVYSHRDG+KQQMTEEQREhLSYGCVGVTWVNSG.YPTNRLAFAFFDEsKYKN-LKNuRPRssETRAEFEGRlAK-SFDEuKGFpRAR-VASVMNKALENAHDEGsYlDNLKpELANtNDALhpEDuRSsFYSALRNTPSFKERsGGNaDPSKMKAVIYSKHFWSGQDppGSSDKRKYGDP-AFRPDpGTGLVDMS+DRNIPRSPspPGEuaVNFDYGWFGAQTEADADKTlWTHGNHYHAPNGuLGsMHV.YESKFRNWSsGYuDFDRGs.YVITFIPKSWNTAPsKVKQGWP.................. 0 0 0 0 +8850 PF09018 Phage_Capsid_P3 P3 major capsid protein Mistry J, Sammut SJ anon pdb_1hqn Domain The P3 major capsid protein adopts a 'double-barrel' structure comprising two eight-stranded viral beta-barrels or jelly rolls, each of which contains a 12-residue alpha-helix. This protein then trimerises through a 'trimerisation loop' sequence, and is incorporated within the viral capsid [1]. 25.00 25.00 937.50 937.30 20.50 20.20 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.38 0.70 -5.88 3 7 2009-01-15 18:05:59 2006-08-04 11:10:51 6 1 6 78 0 6 0 394.00 100 99.75 CHANGED AQVQQLTPAQQAALRNQQAMAANLQARQIVLQQSYPVIQQVETQTFDPANRSVFDVTPANVGIVKGFLVKVTAAIKNNHATEAVALTDFGPANLVQRVIYYDPDNQRHTETSGWHLHFVNTAKQGAPFLSSMVTDSPIKYGDVMNVIDAPATIAAGATGELTMYYWVPLAYSETDLTGAVLANVPQSKQRLKLEFANNNTAFAAVGANPLEAIYQGAGAADCEFEEISYTVYQSYLDQLPVGQNGYILPLIDLSTLYNLENSAQAGLTPNVDFVVQYANLYRYLSTIAVFDNGGSFNAGTDINYLSQRTANFSDTRKLDPKTWAAQTRRRIATDFPKGVYYCDNRDKPIYTLQYGNVGFVVNPKTVNQNARLLMGYEYFTSRTELVNAGTISTT AQVQQLTPAQQAALRNQQAMAANLQARQIVLQQSYPVIQQVETQTFDPANRSVFDVTPANVGIVKGFLVKVTAAIKNNHATEAVALTDFGPANLVQRVIYYDPDNQRHTETSGWHLHFVNTAKQGAPFLSSMVTDSPIKYGDVMNVIDAPATIAAGATGELTMYYWVPLAYSETDLTGAVLANVPQSKQRLKLEFANNNTAFAAVGANPLEAIYQGAGAADCEFEEISYTVYQSYLDQLPVGQNGYILPLIDLSTLYNLENSAQAGLTPNVDFVVQYANLYRYLSTIAVFDNGGSFNAGTDINYLSQRTANFSDTRKLDPKTWAAQTRRRIATDFPKGVYYCDNRDKPIYTLQYGNVGFVVNPKTVNQNARLLMGYEYFTSRTELVNAGTISTT 0 0 0 0 +8851 PF09019 EcoRII-C EcoRII C terminal Mistry J, Sammut SJ anon pdb_1na6 Domain The C-terminal catalytic domain of the Restriction Endonuclease EcoRII has a restriction endonuclease-like fold with a central five-stranded mixed beta-sheet surrounded on both sides by alpha-helices. It cleaves DNA specifically at single 5' CCWGG sites [1]. 20.30 20.30 20.30 20.70 19.90 20.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.92 0.71 -4.40 14 145 2012-10-11 20:44:45 2006-08-04 11:12:05 6 2 130 13 25 144 16 164.70 40 44.93 CHANGED F+hhEctllhp+lptGFs......sVDsFl........sauhSVpNRRKSRAG+SLE.HLcplhpstGl....pasppu...hT.....EssKKPDFLFPusstYc......sstFPscpLpMLusKTTCKDRWRQVLsEA-R..IcpKHLhTLp.G.lSpsQhpEMpcpslpLVVPpsl+poYspp.+scLhTlpsFI ...........................................FhhhEchhh.phlppsas........slDpFl........shupS.l.....sNRRKSRAGKSLEhHLcplF.ctGl....pFpsQA........hT...........Es..sKK...PDFL.FPuutsY+............s.tFs.scpLpMLusKTTCKDRWRQlLsEA......-..R......I..p..phaLhTLp-G.lStsQhpEMpcpslpLVVPp.slpcp..aspthpsplholtsFI................. 0 11 17 22 +8852 PF09020 YopE_N YopE, N terminal Mistry J, Sammut SJ anon pdb_1l2w Domain The N terminal YopE domain targets YopE for secretion from the bacterium and translocation into eukaryotic cells [1]. 21.60 21.60 24.20 78.80 20.70 18.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.75 0.71 -4.09 2 31 2009-01-15 18:05:59 2006-08-04 11:19:43 5 1 27 6 2 13 0 104.70 97 52.96 CHANGED MKISSFISTSLPLPTSVSGSSSVGEMSGRSVSQQpS-QYANNLAGRTESPQGSSLASRIhE+LSShAHSsItFIpRMFSEGSHKPVVTPAPTPAQMPSPTSFSDSIKQLAAETLPKYMQQLsSLDA ..........................MSGRSVSQQpSDQYANNLAGRTESPQGSSLASRIIERLSShAHSVIGFIQRMFSEGSHKPVVTPAPTPAQMPSPTSFSDSIKQLAAETLPKYMQQLNSLDA 0 0 1 1 +8853 PF09021 HutP HutP Mistry J, Sammut SJ anon pdb_1wmq Domain The HutP protein family regulates the expression of Bacillus 'hut' structural genes by an anti-termination complex, which recognises three UAG triplet units, separated by four non-conserved nucleotides on the RNA terminator region. L-histidine and Mg2+ ions are also required. These proteins exhibit the structural elements of alpha/beta proteins, arranged in the order: alpha-alpha-beta-alpha-alpha-beta-beta-beta in the primary structure, and the four antiparallel beta-strands form a beta-sheet in the order beta1-beta2-beta3-beta4, with two alpha-helices each on the front (alpha1 and alpha2) and at the back (alpha3 and alpha4) of the beta-sheet [1]. 25.00 25.00 41.80 41.50 24.20 23.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.44 0.71 -3.93 17 271 2009-01-15 18:05:59 2006-08-04 11:30:01 6 2 242 36 67 175 0 131.60 41 90.58 CHANGED +lu+sAlhhAlopocEEEtplcphh.tppsh+ssssclGGp..h.psspKllcsslsAAK+pGVIpcs.ap-ptAlhtAThEAlptlhspshulss.....GhKluIsR........cs-alAVAlahslGlhhL..-c.slGLGhp ...pluRhAhhlAlupppEppthh.p....htcGh+ssssclGu......sspKlltuh.sAAKpstVIcss.ap-ptAlhtAhhEALhtltp.ttltLss.....Gh+huIsR........puEaluVAlassIGh.hh...-c.uIGLGhp............. 0 30 50 58 +8854 PF09022 Staphostatin_A Staphostatin A Mistry J, Sammut SJ anon pdb_1oh1 Domain The staphostatin A polypeptide chain folds into a slightly deformed, eight-stranded beta-barrel, with strands beta-4 through beta-8 forming an antiparallel sheet while the N-terminus forms a a psi-loop motif. Members of this family constitute a class of cysteine protease inhibitors distinct in the fold and the mechanism of action from any known inhibitors of these enzymes [1]. 24.10 24.10 24.10 24.10 23.40 23.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.28 0.72 -4.20 3 151 2012-10-01 18:54:33 2006-08-04 11:30:27 5 1 150 1 2 24 0 105.00 74 97.89 CHANGED McpYpLINIccscscapEKYWLHILEGlWHPp-lsTSPLKITFNKsIsPsYICKhINEDSR+IILsNsDNoNIIIEIIIINc+KIlFNllNKEuLGTSPKITFIK MEQhELFSIDKF..KCNSEAKYYLNIIEGEWHPQDLNDSPLKFILSTSDDSDYICKYINTEHKQLTLYNKNNSSIVIEIFIPNDNKILLTIMNTEALGTSPRMTFIK... 0 1 1 2 +8855 PF09023 Staphostatin_B Staphostatin B Mistry J, Sammut SJ anon pdb_1y4h Domain Staphostatin B inhibits the cysteine protease Staphopain B, produced by Staphylococcus aureus, by blocking the active site of the enzyme. The domain adopts an eight-stranded mixed beta-barrel structure, with a deviation from the up-down topology of canonical beta-barrels in the amino-terminal part of the molecule [1]. 25.00 25.00 30.10 30.00 22.20 19.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.35 0.72 -4.23 2 163 2012-10-01 18:54:33 2006-08-04 11:32:06 5 1 163 8 2 15 0 107.00 97 98.19 CHANGED MYQLpFIpls.DshpLT+.cQssIpLFIGNW.N.phQKSIsIRpGsDTsHNQYpIL.IDTtHQRIKhoS.-s.plhYILDY-DTpHIhhQTSsKpuhGTSRPIhYE+ ...MYQLQFINLVYDTT.KLTHLEQTNINLFIGNWSNHQLQKSICIRH..GD.DTSHNQYHILFIDTAHQRIKFSSIDNEEIIYILDYDDTQHILMQTSSKQGIGTSRPIVYER. 0 1 1 2 +8857 PF09025 YopR_core YopR Core Mistry J, Sammut SJ anon pdb_1z21 Domain The YopR core domain, predominantly found in the Yersinia pestis virulence factor YopR, is composed of five alpha-helices, four of which are arranged in an antiparallel bundle. Little is known about this domain, though it may contribute to the virulence of the protein YopR [1]. 25.00 25.00 98.30 98.10 22.20 21.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.90 0.71 -4.19 9 74 2009-01-15 18:05:59 2006-08-04 11:50:47 5 1 69 1 9 35 16 140.50 43 81.11 CHANGED s-ushh....p+hp.sssL-shLus.sPuspRElLW.hap.....pGscpss...ppLhtslppcLlucFuGp.hsl.sshDhsEL+uhlppa.PLGup+EpsLLplhu-LKsh.....PshtaLs-LsRcEL.hLIPhNuMVcNLh+pSHKLDLE .......................................................s-lhspLEptLpuEssptsRElLW.ta......Ausssst...p.Lhss.lcEcLLuRFuQp.tsl.PshDhsEL+uhLppa.shG+ppEshLLQlLtulKss.....sGh.YLu-Llp+EL.lLlPhNuhVDNLl+NSHKlDh- 0 1 3 4 +8858 PF09026 CENP-B_dimeris Cenp-B_dimeris; Centromere protein B dimerisation domain Mistry J, Sammut SJ anon pdb_1ufi Domain The centromere protein B (CENP-B) dimerisation domain is composed of two alpha-helices, which are folded into an antiparallel configuration. Dimerisation of CENP-B is mediated by this domain, in which monomers dimerise to form a symmetrical, antiparallel, four-helix bundle structure with a large hydrophobic patch in which 23 residues of one monomer form van der Waals contacts with the other monomer. This CENP-B dimer configuration may be suitable for capturing two distant CENP-B boxes during centromeric heterochromatin formation [1]. 24.30 24.30 24.30 24.50 24.20 24.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.46 0.72 -3.77 2 32 2012-10-08 13:44:34 2006-08-04 11:51:16 5 5 24 4 19 37 1 93.70 69 22.07 CHANGED PTLHFLEGtEDS-SDS-EEE-D--EDE..-DE-D-E-sDEVPVPSFGEAMAYFAMVKRYLTS.PIDDRVQSHILHLEHDLVHVTRKNHARQAGsRGLGHQS ............PTLHFLEGtEDS-SDS-EE--..-E--..D---.-.D-D.D-EDsD.EVPVPSFGEAMAYFAMVKRY..LTSFPIDDR....VQSHILHLEHDLVHVTRKNHARQAGVRGLGHQS.................. 0 4 4 7 +8859 PF09027 GTPase_binding GTPase binding Mistry J, Sammut SJ anon pdb_1cf4 Domain The GTPase binding domain binds to the G protein Cdc42, inhibiting both its intrinsic and stimulated GTPase activity. The domain is largely unstructured in the absence of Cdc42 [1]. 21.30 21.30 22.10 21.30 21.10 20.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.12 0.72 -4.38 5 170 2009-01-15 18:05:59 2006-08-04 11:51:45 5 14 62 1 77 128 0 62.20 49 7.74 CHANGED AGlSAQDIpVPLKsGFlHoG+GhuNsR+C.WGsPucF-NsYLs..hDP.shhhspLSsA..tPTQHLuslG ......AGlSAQDISpPLpsSFIHT..GHGDu..sP++C.WGhPD+ID-lYLsNPMDPPDlhss..p...usu....tph.stht........................ 0 15 22 43 +8860 PF09028 Mac-1 Mac 1 Mistry J, Sammut SJ anon pdb_2avw Domain The bacterial protein Mac 1 adopts an alpha/beta fold, with 14 beta strands and 9 alpha helices. The N-terminal domain is made up predominantly of alpha helices, whereas the C-terminal domain consists predominantly of beta sheets. Mac 1 blocks polymorphonuclear opsonophagocytosis, inhibits the production of reactive oxygen species and contains IgG endopeptidase activity [1]. 20.10 20.10 20.20 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.26 0.70 -5.67 3 78 2009-01-15 18:05:59 2006-08-04 12:34:31 5 9 60 8 13 76 1 280.70 32 54.08 CHANGED KhsDuIuAPVlApsplhVcMKV.DRGlEs-sapsDsEV....cTSEss.YcVTchcc.................lWs+GlTPPAhFspGGhV.........apAPahAtpGaYDhNKolNG+..DlsLCaAATASNMLHWWFEQNs-YIEpYLKc..cKQKlslGEphclLDlR+YIcohQDQsN...SclFNhFRsh...uausRRsGalsDuLlDhFINGYYLpV.+sGssNLscoY...DpRGGlFsDVFs+sspsKpLTsR..h+scThKEFu-Ll+KsL-sK+ALGLoa..opuNh.hsHlVTlWGA-YDssGNIcAlYITDSD.spAsIGhK+YslGVcuuGNltlsAc+lK........scstGAhlDGLaTLcpGQDpW .................................................................................................................h......phpp.............................lWscGlss..Pt..p.pst.h..............apA.Pa.ss.......p.GaYDhsKs....hNup......Dp.LChAAsAuNMlHWWh-QNpppl-tYLpc...cptpl..htpp.thh-l+chlsoh....tsQps....SplFphF+s....hhuhpp.GhhsDhllDhFINGYh.ph..ptsss.....lp.....cs....Dp+GGhF.sVF....ppphLTs+..hhtts.hcchup.l+p.LppspslGLoa..phhsh...hsHllslWGA-a..D.sp.GplcA..lYlTDSD..p.......pt.....p.....lGhK+YhlshsssGp..tls.pphp........ppshGu.l.hlaTlphGps.W...................................................... 1 7 11 13 +8861 PF09029 Preseq_ALAS 5-aminolevulinate synthase presequence Mistry J, Sammut SJ anon pdb_1h7d Domain The N terminal presequence domain found in 5-aminolevulinate synthase exists as an amphipathic helix, with a positively charged surface provided by lysine residues and no stable helix at the N-terminus. The domain is essential for the import process by which ALAS is transported into the mitochondria: translocase of the outer membrane (Tom) and translocase of the inner membrane protein complexes appear responsible for recognition and import through the mitochondrial membrane. The protein Tom20 is anchored to the mitochondrial outer membrane, and its interaction with presequences is thought to be the recognition step which allows subsequent import [1]. 20.70 20.70 21.30 22.40 20.40 20.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.79 0.72 -4.05 2 196 2009-01-15 18:05:59 2006-08-04 12:34:46 5 6 50 2 81 167 0 83.20 29 19.56 CHANGED hlAhAhllRpCPhLupsPpshLt+suKo..hLh.htRCPl............LuTthPThpQ...KsTpsGs-psuhA.S+CPFhhsEhtcRcutlVp+Au.ElpEDVpphps .....................................hlp.pCPhLspsspshltpshts..hl.hhtpCPh....................................................h..sst......st.....p.p........................tt..t..s..s....sp.pt..........s..hA..spCPFh.u.phtt.tstlh.cAu.ElQEDVpthp............................................................................... 0 7 12 32 +8862 PF09030 Creb_binding Creb binding Mistry J, Sammut SJ anon pdb_1kbh Domain The Creb binding domain assumes a structure comprising of three alpha-helices which pack in a bundle, exposing a hydrophobic groove between alpha-1 and alpha-3 within which complimentary domains found in the protein 'activator for thyroid hormone and retinoid receptors' (ACTR) can dock. Docking of these domains is required for the recruitment of RNA polymerase II and the basal transcription machinery [1]. 21.10 21.10 21.60 24.50 20.70 20.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.57 0.72 -3.84 4 148 2009-01-15 18:05:59 2006-08-04 12:48:14 5 14 46 7 70 100 0 105.30 55 4.94 CHANGED ..ssQWsQusLPQ...hpsuhPRPVhp.t.AQtuss...GPR.susQss.....tuI..uPsALQDLLRTLKSPSSPQQQQQVLNILKSNPQLMAAFIKQRTAKYsAsQPt. ..................................................s.W.t...Qus.lP..Q..Q........h.....sG....M...s.RPs..M.....u.Q..t.u.s..........uP.....p....ss.s...Qs.s....................tuh....u.sALQ-LLRTL+SPSS.PpQQQQVLNIL+SNPQLMAAFI..KQ.RsAKYs.usp.Pt.......... 0 11 19 36 +8864 PF09032 Siah-Interact_N Siah interacting protein, N terminal Mistry J, Sammut SJ anon pdb_1ysm Domain The N terminal domain of Siah interacting protein (SIP) adopts a helical hairpin structure with a hydrophobic core stabilised by a classic knobs-and-holes arrangement of side chains contributed by the two amphipathic helices. Little is known about this domain's function, except that it is crucial for interactions with Siah. It has also been hypothesised that SIP can dimerise through this N terminal domain [1]. 21.30 21.30 21.30 21.40 20.80 21.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.83 0.72 -3.92 10 147 2009-01-15 18:05:59 2006-08-04 12:59:19 6 6 107 6 89 136 0 69.90 32 31.30 CHANGED hsphlcpLptDL-El+pLlEpApRpRVpDlLppEh+KlEsElppppp....QcpQ..ppppEspcPou.hsssspsYTVKI ..........ppLppDL-El+sLLppApRpRV+-hLosEhp+lEsEltpt.h.p......p...tp.......p........................................................ 0 23 36 61 +8865 PF09033 DFF-C DNA Fragmentation factor 45kDa, C terminal domain Mistry J, Sammut SJ anon pdb_1iyr Domain The C terminal domain of DNA Fragmentation factor 45kDa (DFF-C) consists of four alpha-helices, which are folded in a helix-packing arrangement, with alpha-2 and alpha-3 packing against a long C-terminal helix (alpha-4). The main function of this domain is the inhibition of DFF40 by binding to its C-terminal catalytic domain through ionic interactions, thereby inhibiting the fragmentation of DNA in the apoptotic process. In addition to blocking the DNase activity of DFF40, the C-terminal region of DFF45 is also important for the DFF40-specific folding chaperone activity, as demonstrated by the ability of DFF45 to refold DFF40 [1]. 25.00 25.00 39.80 39.30 23.20 22.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.00 0.71 -4.41 5 60 2009-01-15 18:05:59 2006-08-04 12:59:53 5 2 35 2 30 53 0 163.20 61 53.64 CHANGED DGGTAWluRES..MEsD-sD.suGsD..+WKNLAcQLKEDLSSIILMSEEDLQsLIDVPsuELAusLshSppKVQuLQsTLQRVLDRREEERQSKQLLELYL+AlEKEGsh..+pQEscAsLu-E.DuVDoGh...sElsScsshuSpILhlLK-KsuPELSLSoQDLEhVsK ............................DGGTAWloQES..h-.s.DEsDouuGl...KWKNlARQ.LK-DLSSIILLSEEDLQsLlDlPCuDLApELsQSssplQsLQpTLQQVLDpREEsRQSKQLLpLYLpALEKEGulLSKppES...cA..u.hu.-EhDAVDo.Gh..spEsuSclsLsSplLssLKEKsAPELSLSSQDLElVs.......... 0 2 4 11 +8866 PF09034 TRADD_N TRADD, N-terminal domain Mistry J, Sammut SJ anon pdb_1f3v Domain The N terminal domain of 'Tumour necrosis factor receptor type 1 associated death domain protein' (TRADD) folds into an alpha-beta sandwich with a four-stranded beta sheet and six alpha helices, each forming one layer of the structure. The domain allows docking of TRADD onto 'tumour necrosis factor receptor-associated factor' (TRAF): the binding is at the beta-sandwich domain, away from the coiled-coil domain. Binding ensures the recruitment of cIAPs to the signaling complex, which may be important for direct caspase-8 inhibition and the immediate suppression of apoptosis at the apical point of the cascade [1]. 25.00 25.00 96.60 95.50 20.00 18.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.55 0.72 -4.07 6 45 2009-01-15 18:05:59 2006-08-04 13:05:06 5 2 37 2 25 42 0 110.40 57 37.13 CHANGED -Su.Guh-uhEILKlHcuDP.LhVpLKFsuhssCpRFLpuYtpGALppuLpp+hsphLAl..cuLtlpTpLKAGscpLDthLsD.EpCLpaIptpQP-RLRD-ElAELEppLps ...-Su.GS.-slphLKIHcSDPQLIVQLRFCGcpsCsRFLcuYREGALRsuLQppLusuLA...pul.sLpLEL+AGAEcLDshLsDEERCLpsIhtppPDRLRDEEluELE-tL+s........... 0 1 3 10 +8867 PF09035 Tn916-Xis Excisionase from transposon Tn916 Mistry J, Sammut SJ anon pdb_1y6u Domain The phage-encoded excisionase protein Tn916-Xis adopts a winged-helix structure that consists of a three-stranded anti-parallel beta-sheet that packs against a helix-turn-helix (HTH) motif and a third C-terminal alpha-helix. It is encoded for by Tn916, which also codes for the integrase Tn916-Int. The protein interacts with DNA by the insertion of helix alpha-2 into the major groove and the contact of the hairpin that connects strands beta-2 and beta-3 with the adjacent phosphodiester backbone and/or minor groove. Tn916-Xis stimulates phage excision and inhibits viral integration by stabilising distorted DNA structures [1]. 22.90 22.90 22.90 22.90 22.70 22.80 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.25 0.72 -3.88 8 457 2012-10-04 14:01:12 2006-08-04 13:11:25 5 1 293 1 23 251 24 66.20 52 93.39 CHANGED MN+.-..VPIWEKhoLTIEEAAcYasIGpNKLRpLs.cNPsscFVLalGo+hhIKRKcFEcaI-shssl .................MppsD..lPIWE+YTLTIEEASKYFRIGENKLR+LAE..E..Nc..sAsW...l..IhNGN..RIQIKRKQFEK.hIDsLcsI............. 0 13 18 19 +8868 PF09036 Bcr-Abl_Oligo Bcr-Abl oncoprotein oligomerisation domain Mistry J, Sammut SJ anon pdb_1k1f Domain The Bcr-Abl oncoprotein oligomerisation domain consists of a short N-terminal helix (alpha-1), a flexible loop and a long C-terminal helix (alpha-2). Together these form an N-shaped structure, with the loop allowing the two helices to assume a parallel orientation. The monomeric domains associate into a dimer through the formation of an antiparallel coiled coil between the alpha-2 helices and domain swapping of two alpha-1 helices, where one alpha-1 helix swings back and packs against the alpha-2 helix from the second monomer. Two dimers then associate into a tetramer. The oligomerisation domain is essential for the oncogenicity of the Bcr-Abl protein [1]. 20.90 20.90 21.10 32.00 20.40 19.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.81 0.72 -4.02 4 67 2009-09-11 10:35:23 2006-08-04 13:43:33 5 10 25 8 23 63 0 72.20 81 8.04 CHANGED hVpPluFAEAW+AQFP-..u-PPhM-LRShGD........lEQEL-+C+uSIRRLEpEVN+ERFRMIYLQTLLAKERKSYD+QRWGF+R ..MV-PVGFsEAW+AQFPD..SEPP+MELR..SVGD........IEQELERCKASIRRLEQEVNpERFRMIYLQTLLAKEKKSYDRQRWGFRR....... 1 2 5 15 +8869 PF09037 Sulphotransf Stf0 sulphotransferase Mistry J, Sammut SJ anon pdb_1tex Domain Members of this family are essential for the biosynthesis of sulpholipid-1 in prokaryotes. They adopt a structure that belongs to the sulphotransferase superfamily, consisting of a single domain with a core four-stranded parallel beta-sheet flanked by alpha-helices [1]. 20.70 20.70 20.70 20.70 20.40 20.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.47 0.70 -4.88 13 172 2012-10-05 12:31:09 2006-08-04 13:59:44 5 5 158 4 52 155 31 228.40 33 80.83 CHANGED uYllCuosRSGSTLLsclLpuTGsAGpPppFFp........pss.hppWhtths.........shp.t.s-hthscsahptslptG+ussGVaGh+LMhpphshlhppLspL.PshsuDshR..lccsaG.p.shalHLpRcDpVuQAVShhRApQotlW+..ttsDusc.tR................tspYDssuIsphlphLccpEpuWpsWFtppsl-PlcIsY-sLsssPptslsslLptLGl-sphAsthsPslp+.ADppSc-WscRYRpD .................................................................................................YllhuotRSGSTLLsc.LpuTG....ss....GpPpEaFp........................ssp..ccWhtthp................shp...s.ps-ht..s.s.tah.ptlh..stG.p.osNGlaGs.KL....Mh.sQhshl.p+.h...t..tl.ss...hs.........s..cshc..hlc.cl.h.G..p.tshhlHlpR.DlVsQAVShh......RA...hQ.......T.plW+.......sp.s.....Dspccsp....................shYcs......s.....sIu+hlp.tLcs.p-psWc.sWFs.p.cs.Ic.P...lcl.sY.s.L...h.c..s....s...s..h..lusVL-tlG...-..s..p.h..A.s..........tP...hlc+Qu...sppocEWspRa+t-................................................................................ 0 10 31 42 +8870 PF09038 53-BP1_Tudor Tumour suppressor p53-binding protein-1 Tudor Mistry J, Sammut SJ anon pdb_1xni Domain Members of this family consist of ten beta-strands and a carboxy-terminal alpha-helix. The amino-terminal five beta-strands and the C-terminal five beta-strands adopt folds that are identical to each other. This domain is essential for the recruitment of proteins to double stranded breaks in DNA, which is mediated by interaction with methylated Lys 79 of histone H3 [1]. 20.50 20.50 20.80 21.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.49 0.71 -4.29 3 89 2012-10-02 16:56:36 2006-08-04 14:11:33 5 3 51 16 40 84 2 117.20 74 7.46 CHANGED GSSFVGLRVVAKWSSNGYFYSGKIT+DAGuGKYKLLFDDGYECDVLGKDILLCDPIPLDTEVTALSEDEYFSAGVVKGHKKESpELYYSIEKDGQRKWYKRMAVILSLEQGNRLREQYGLGP ......GsSFVGLRVVAKWS.SNGYFYSGKITR.D.VGAGKYKLLFDDGYEC.....DVLGKDILLC...DPIPL.DT..E..V...TA.L.S.ED...EYFSAGV...V.KG..HR.K.E..S..GELYYS.IE.KE.GQRK.W..YKRMAVILSLEQGNRLREQYGLGP...................... 0 7 9 23 +8871 PF09039 HTH_Tnp_Mu_2 Mu_I-gamma; Mu DNA binding, I gamma subdomain Mistry J, Sammut SJ anon pdb_2ezh Domain Members of this family are responsible for binding the DNA attachment sites at each end of the Mu genome. They adopt a secondary structure comprising a four helix bundle tightly packed around a hydrophobic core consisting of aliphatic and aromatic amino acid residues. Helices 1 and 2 are oriented antiparallel to each other. Helix 3 crosses helices 1 and 2 at angles of 60 and 120 degrees, respectively. Excluding the C-terminal helix 4, the fold of the I-gamma subdomain is remarkably similar to that of the homeodomain family of helix-turn-helix DNA-binding proteins, although their amino acid sequences are completely unrelated [1]. 20.50 20.50 20.50 20.50 20.40 20.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.27 0.72 -4.32 6 131 2012-10-04 14:01:12 2006-08-04 14:14:54 6 14 112 4 25 141 1 106.80 35 17.57 CHANGED hapVpshscsDWhPhLlst....sttcppsphu.Is-cAWtaFpuDYLR.EKPohssCYcRLcpAAp-pGW.sIPShuohpR+h.pplPcsplVhsREGpauLp+LhPsQpRo .................................pVpthscsDWhssLlst....tss+ps.s+h..u.chsp-AW.p.F.hpuD.YLR.E...+Ps..aspCYcRLc...ts..Accp..G..W...s.....IPShpohpRRl..pplscs...h...hVhtRE.G-cALhc.hhPuQpRo........... 0 9 18 22 +8872 PF09040 H-K_ATPase_N Gastric H+/K+-ATPase, N terminal domain Mistry J, Sammut SJ anon pdb_1iwc Domain Members of this family adopt an alpha-helical conformation under hydrophobic conditions. The domain contains tyrosine residues, phosphorylation of which regulates the function of the ATPase. Additionally, the domain also interacts with various structural proteins, including the spectrin-binding domain of ankyrin III [1]. 25.00 25.00 46.20 45.50 24.60 23.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.61 0.72 -8.12 0.72 -3.85 5 35 2009-01-15 18:05:59 2006-08-04 14:31:05 6 6 21 2 19 36 0 41.00 88 4.36 CHANGED GKAENYELYSVELGPGPGGDMAAKMS.KKKAGGGGGKKKEKL ..GKAENYELYSVELG.PGPGGDMAAKMS.KKK.AGGGGGK+KEKL...... 0 1 1 2 +8873 PF09041 Aurora-A_bind Aurora-A binding Mistry J, Sammut SJ anon pdb_1ol5 Domain The Aurora-A binding domain binds to two distinct sites on the Aurora kinase: the upstream residues bind at the N-terminal lobe, whilst the downstream residues bind in an alpha-helical conformation between the N- and C-terminal lobes. The two Aurora-A binding motifs are connected by a flexible linker that is variable in length and sequence across species. Binding of the domain results strong activation of Aurora-A and protection from deactivating dephosphorylation by phosphatase PP1 [1]. 20.20 20.20 21.60 23.00 19.50 17.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.46 0.72 -4.21 6 51 2009-01-15 18:05:59 2006-08-04 14:31:21 5 3 31 3 24 49 0 63.60 57 8.93 CHANGED MSpspsoYSaDAPosFINFSSLc--.DhcNhDSWFDcpANLENh.sspctluclhQspsshpKstLQp ......MSQspoSYSaDAPocFINFoSLc-EtDspNlDSWFEEKANLENKh.ucNGhutlaQuKTsLRKsslpp................. 0 2 3 7 +8874 PF09042 Titin_Z Titin Z Mistry J, Sammut SJ anon pdb_1h8b Domain The titin Z domain, that recognises and binds to the C-terminal calmodulin-like domain of alpha-actinin-2 (Act-EF34), adopts a helical structure, and binds in a groove formed by the two planes between the helix pairs of Act-EF34. This interaction is essential for sarcomere assembly [1]. 21.50 21.50 28.40 21.50 19.80 21.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.92 0.72 -4.36 13 282 2009-01-15 18:05:59 2006-08-04 14:33:34 6 47 33 1 104 290 0 41.60 41 1.23 CHANGED pEplR+Es...........EK.sAVspVVlAssKA+ppEslscspEphus+pEQ ..............pEpl+KEs..........................................................-K.sAVspVVlAssK.A+cpE.h.+spEthss+p-Q.............. 0 6 7 19 +8875 PF09043 Lys-AminoMut_A D-Lysine 5,6-aminomutase alpha subunit Mistry J, Sammut SJ anon pdb_1xrs Domain Members of his family are involved in the 1,2 rearrangement of the terminal amino group of DL-lysine and of L-beta-lysine, using adenosylcobalamin (AdoCbl) and pyridoxal-5'-phosphate as cofactors. The structure is predominantly a PLP-binding TIM barrel domain, with several additional alpha-helices and beta-strands at the N and C termini. These helices and strands form an intertwined accessory clamp structure that wraps around the sides of the TIM barrel and extends up toward the Ado ligand of the Cbl cofactor, providing most of the interactions observed between the protein and the Ado ligand of the Cbl, suggesting that its role is mainly in stabilising AdoCbl in the precatalytic resting state [1]. 25.00 25.00 70.20 70.10 17.50 17.30 hmmbuild -o /dev/null HMM SEED 509 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.65 0.70 -6.38 8 226 2009-01-15 18:05:59 2006-08-04 14:41:35 6 3 199 25 54 150 6 435.60 50 68.49 CHANGED LsLDhshV-+ARstA+cIAtssQcFI-pHTTVoVERTlsRLLGIDGVDspsVPLPNlVVDHlK-pssLspGAAhaluNAMlpTGpoPQEIAEuVAsGELDLpphPhp-ptcI+tshpslAptsV-+I+uNR+pRE-hlcph.G-tssPaLYVIVATGNIYEDVVQApAAARQGADVIAVIRoTuQSLLDYVPaGATTEGFGGTYATQENFRIMR+ALDEVGpElGRYIRLCNYCSGLCMPEIAAMGALERLDhMLNDALYGILFRDINMpRTLlDQaFSRlINGaAGIIINTGEDNYLTTADAhEcAHTVLASQhINEQFALhAGLP-EQMGLGHAFEM-P-LENGFLaELAQApMsREIFPKAPLKYMPPTKaMTGNIFKGHlQDAhFNlVolhTsQpIHLLGMLTEAIHTPFhpDRhLuIENAKYIFNNh+clusEIpFKcGGhIppRApEVLcKAhsLLEpIEpcGLFpuIE+GhFGsV+RPhDGGKGLsGVlpKstsYaNPFl-LM .................................................................................................................................................................................cYhcph.ss.p.s.sPhlhs....lAoGch.-DlcphRhAAhpGADhIhVIRTTGQShlDhl.EG.TsEGhGGs.hTpcphRh.RKAhD.lp-.EVGR.IphpsYsSGlshPElAlhhA.EGlsshhpDs.YslLaRsINhhRoaVDtt.u+plhAhAsIh..sGtcNh.sTA.c.t.h.csh.p.l....hVpchlNphaulhsGhPc-.IGLups.....sPss.sphhh-LsaAlhlR-LFs-h.h+h..sT+YhpuslhcuhhpcslssLlohLTutcIQ.shhPsEuhssPWhpspshulposK.sasuhcGlt-.lphpc-G.lschsR-lh-+AlshLpE.h..cs.s.G.......hFsAlEcGhFsD...........IsRstpGGhu.usVsERDsDYhsPsh-..th.... 0 31 42 52 +8876 PF09044 Kp4 Kp4 Mistry J, Sammut SJ anon pdb_1kpt Domain Members of this fungal family of toxins specifically inhibit voltage-gated calcium channels in mammalian cells. They adopt an alpha/beta-sandwich structure, comprising a five-stranded antiparallel beta-sheet with two antiparallel alpha-helices lying at approximately 45 degrees to these strands [1]. 25.00 25.00 26.30 26.20 24.30 24.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.86 0.71 -4.32 2 55 2009-01-15 18:05:59 2006-08-04 15:01:32 5 2 26 2 49 54 0 118.20 34 83.92 CHANGED .hLhhshh.ssstpLGINCRGSupC..hhsst.hh.hhRs.puhss.sphassGE+hApVs.....ssssuhsAalQs.sssshushcuhtHh..lspHGC+VCGSsP.....usNsVscGpLThNYV.Nu ........................................h.h....hshh.h.ss..suALGINCRGSuhC..s...s....u.....s.....ss....t...h..hphhttl.pshss...s+passGpcIACss..........s....t..su.......lCAF...hQsh.s.u.s...sssshpu..ltph..............Lh-HGCptCGSlP...stssNsVspGpLThNhV...s....... 0 7 24 37 +8877 PF09045 L27_2 L27_2 Mistry J, Sammut SJ anon pdb_1y76 Domain The L27_2 domain is a protein-protein interaction domain capable of organising scaffold proteins into supramolecular assemblies by formation of heteromeric L27_2 domain complexes. L27_2 domain-mediated protein assemblies have been shown to play essential roles in cellular processes including asymmetric cell division, establishment and maintenance of cell polarity, and clustering of receptors and ion channels. Members of this family form specific heterotetrameric complexes, in which each domain contains three alpha-helices. The two N-terminal helices of each L27_2 domain pack together to form a tight, four-helix bundle in the heterodimer, whilst the third helix of each L27_2 domain forms another four-helix bundle that assembles the two units of the heterodimer into a tetramer [1]. 20.90 20.90 21.00 21.40 20.00 18.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.67 0.72 -4.43 3 113 2009-01-15 18:05:59 2006-08-04 15:14:15 5 14 40 6 50 114 0 57.80 61 3.30 CHANGED DKpRALQAlERLQuKLKERGDVssEEKLSLL+SVLQSPLFsQILoLQpSlQQLKDQVN ....DKppsLQshERLQsKL+ERGDsupp-KLSlL+ssLQSPLFsQILoLQpSlpQLKcQls...... 0 6 9 22 +8878 PF09046 AvrPtoB-E3_ubiq AvrPtoB E3 ubiquitin ligase Mistry J, Sammut SJ anon pdb_2fd4 Domain The E3 ubiquitin ligase domain found in the bacterial protein AvrPtoB inhibits immunity-associated programmed cell death (PCD) when translocated into plant cells, probably by recruiting E2 enzymes and transferring ubiquitin molecules to cellular proteins involved in regulation of PCD and targeting them for degradation. The structure of this domain reveals a globular fold centred on a four-stranded beta-sheet that packs against two helices on one face and has three very extended loops connecting the elements of secondary structure, with remarkable homology to the RING-finger and U-box families of proteins involved in ubiquitin ligase complexes in eukaryotes [1]. 25.00 25.00 107.80 107.10 24.60 18.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.58 0.71 -4.12 4 25 2009-01-15 18:05:59 2006-08-04 15:14:50 5 1 22 1 2 34 0 122.10 75 30.16 CHANGED .tVVADIRAALc.IusQFsQLRTISKADAESp-.GF+DAAD.HPDDsTpCLFGEELSLSNPcQQVIGLAGpsTDhsQPYSQEuNKsLsFMDMKKLAQaLAuKPEHPMsR-pLsAcNIAKYAFRIVP ..............tVVsDI...RAALD.IusQFSQLRTISKADAESEELGF+DAAD.HP.DsATpCLFGEELSLSNPDQQVIGLAsNPTDpsQPYSQEsNKsLsFMDMKKLAQaLAsKPEHPhNRQpLDAcNIAKYAF+IVP 0 0 0 1 +8879 PF09047 MEF2_binding MEF2 binding Mistry J, Sammut SJ anon pdb_1n6j Domain The myocyte enhancer factor-2 (MEF2) binding domain, predominantly found in the calcineurin-binding protein CABIN 1, adopts an amphipathic alpha-helical structure, which allows it to bind a hydrophobic groove on the MEF2S domain, forming a triple-helical interaction. Interaction of this domain with MEF2 causes repression of transcription [1]. 21.50 21.50 21.90 34.60 21.30 19.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.30 0.72 -4.31 2 61 2009-01-15 18:05:59 2006-08-04 15:27:42 5 3 32 1 25 48 0 34.20 90 2.02 CHANGED TLLSPKGSISEETKQKLKssILSuQSAAss+K-oL ...TLLSPKGSISEETKQKLK......SAILSAQSAANVRKESL.... 0 2 5 12 +8880 PF09048 Cro Cro Mistry J, Sammut SJ anon pdb_1d1m Domain Members of this family are involved in the repression of transcription by binding as a homodimer to palindromic DNA operator sites in phage lambda: they repress genes expressed in early phage development and are necessary for the late stage of lytic growth. These proteins have a secondary structure consisting of three alpha-helices and three beta-sheets, and dimerise through interactions between the two antiparallel beta-strands [1]. 30.80 30.80 30.80 36.10 29.10 30.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.74 0.72 -4.33 5 146 2012-10-04 14:01:12 2006-08-04 15:37:13 5 1 126 17 5 68 1 59.00 66 89.33 CHANGED Mp..RITLuDYVt+aGQAKAA+DLGVtQuAISKAL+AGRcIhVolssDGSVhAEEVRPFPS ....Mc.RITLpDYAhRFG.QsKTAKDLGVhQSAIsKAI+AGRcIFLTlpuDG.S.VhAEEV+PFPS...... 0 0 1 4 +8881 PF09049 SNN_transmemb Stannin transmembrane Mistry J, Sammut SJ anon pdb_1zza Domain Members of this family consist of a single highly hydrophobic transmembrane helix that transverses the lipid bilayer at a 20 degree angle with respect to the membrane normal. They contain a conserved cysteine residue (Cys32) that, together with Cys34 found in the stannin unstructured linker domain, constitutes the putative trimethyltin-binding site that resides at the end of the transmembrane domain close to the lipid/solvent interface [1]. 25.00 25.00 49.90 49.40 20.50 19.60 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.58 0.72 -4.25 3 39 2009-01-15 18:05:59 2006-08-04 15:42:51 5 2 37 1 24 25 0 33.00 95 37.68 CHANGED MSIMDHSPTTGVVTVIVILIAIAALGALILGCW .MSIMDHSPTTGVVTVIVILIAIAALGALILGCW 0 1 3 8 +8882 PF09050 SNN_linker Stannin unstructured linker Mistry J, Sammut SJ anon pdb_1zza Domain Members of this family are unstructured, acting as connectors of the stannin helical domains. They contain a conserved CXC metal-binding motif and a putative 14-3-3-zeta binding domain. Upon coordinating dimethytin, considerable structural or dynamic changes in the flexible loop region of SNN may take place, recruiting other binding partners such as 14-3-3-zeta, and thereby initiating the apoptotic cascade [1]. 25.00 25.00 63.90 63.40 17.80 16.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.51 0.72 -6.97 0.72 -4.39 3 39 2009-01-15 18:05:59 2006-08-04 15:43:56 5 2 37 1 24 25 0 26.00 95 29.68 CHANGED CYLRLQRISQSEDEESIVGEGETKEP CYLRLQRISQSEDEESIVGDGETKEP 0 1 3 8 +8883 PF09051 SNN_cytoplasm Stannin cytoplasmic Mistry J, Sammut SJ anon pdb_1zza Domain Members of this family consist of a distorted cytoplasmic helix that is partially absorbed into the plane of the lipid bilayer with a tilt angle of approximately 80 degrees from the membrane normal. They interact with the surface of the lipid bilayer, and contribute to the initiation of the apoptotic cascade on binding of the unstructured linker domain to dimethyltin [1]. 25.00 25.00 34.70 39.20 24.90 24.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.42 0.72 -6.91 0.72 -4.21 5 37 2009-01-15 18:05:59 2006-08-04 15:44:50 5 1 35 1 24 23 0 26.50 87 30.25 CHANGED LLVQYSAKGP+VE+KTKL.TPNGTESH LLVQYSAKGPCVERKAKL.TPNGPEVH.... 0 1 3 8 +8884 PF09052 SipA Salmonella invasion protein A Mistry J, Sammut SJ anon pdb_2fm8 Domain Salmonella invasion protein A is an actin-binding protein that contributes to host cytoskeletal rearrangements by stimulating actin polymerisation and counteracting F-actin destabilising proteins. Members of this family possess an all-helical fold consisting of eight alpha-helices arranged so that six long, amphipathic helices form a compact fold that surrounds a final, predominantly hydrophobic helix in the middle of the molecule [1]. 25.00 25.00 50.80 50.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 674 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -13.09 0.70 -6.13 3 143 2009-01-15 18:05:59 2006-08-04 15:52:32 5 2 130 10 3 112 0 584.70 80 99.48 CHANGED MlTussTQAPshLsussosouTpASsLuupLSDVR.SuoToLusctSlu-LESFsAsasQ+SLDsLFuuSs+ADsL+ElYoNSsNsYAKpEIhEFAsVapSLl+QsuLsPEAcKsLpKluAQYoApIIKDGLuEKSAFGPWos+TKKtaQLRpNLE++LA-IAppHTuGEApKLGscLlpsEVooFItSCIEscLGpoLDslTSppLTcLVDuAAtpAFEuLRppRpcLI-p+GFSVG+LARDL-TVAVlPpLLRsVLssI..sPuD+tP-psuhssPscPpPuuGPsPuGsGKsucPstIHYHIN..IcssN+ShDNR+asNpucoalssup...RHlDNSsH-NScpsAsssTotosD.LsRNGpSlLSssuSssutpHuLVsuVTp...........slsHSISGpVDssAssoA.E+VhNsou-ucDGtVhhutlGSDGLTTS.pEhsAlsS+ScsG+PLpuss+uVsD...........sL+PlhothsGsEsVKouTssSsDTstSGschpp.sAGpssssNSsTDusGsFoGl+FRsGshYhTlPTls.h+uht..F-AccclLsulRsALEPsuopPhsQRREF-uLRs-ILPSDTh+psslKs+soDuschscLs-.cA-TL+cslssHPthEK....L+ElApsLuREAsLo+lccsos.LLoslLD..GLpuDs-hRAuPs...hsuKPsssuVlpTlDGLH ................MQTEIKTQATNLAANLSAVRESATsTLSGEIKGPQLEDFPALIKQASLDALFKCGKDAEALKEVFTNSNNVAGKKAIMEFAGLFRSALNATSDSPEAKTLLMKVGAEYTAQIIKDGLKEKSAFGPWLPETKKAEAKLENLEKQLLDIIKNNTGGELSKLSTNLVMQEVMPYIASCIEHNFGCTLDPLTRSNLTpLlDtA.AAKAVcALDMCHQKLTQEQGTSVGREARHLEMQTLIPLLLRNVFAQI...PADKLPD......PKIPEPAAGPVPDG.GKKAEPTGINININ..IDSSNHSVDNSKHINNSRSHVDNSQ...RHIDNSNHDpSRKTIDNSRTFIDN.SQRpGESHHSTNSSNVSHSHSRVDSTTHQTETAHSASTGAIDHGIAGKIDlTAHATA.EAVTNASSESKDGKVVTSEKGTTGETTSFDEVDGVTSKSIIGKPVQATVHGVDDNKQQSQTAEIVNVKPLASQLAGVENVKhDTLQSDoTVITGNK.....AGTTDNDNSQTDKTGPFSGLKFKQNSFLSTVPSVTNMHSMH..FsAREsFLGVIRKALEPDTSTPFPVRRAFDGLRAEILPNDTIKSAALKAQCS....DIDKHPELKA.KM-TLKEVITHHPQKEK....LAEIALQFAREAGLTR.KGETDYVLSNVLD..GLIGDGSWRAGPAYESYLNKPGVDRVITTVDGLH..... 0 1 1 2 +8885 PF09053 CagZ CagZ Mistry J, Sammut SJ anon pdb_1s2x Domain CagZ is a 23 kDa protein consisting of a single compact L-shaped domain, composed of seven alpha-helices that run antiparallel to each other. 70% of the residues are in alpha-helix conformation and no beta-sheet is present. CagZ is essential for the translocation of the pathogenic protein CagA into host cells [1]. 25.00 25.00 134.40 134.30 19.30 19.20 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.42 0.71 -4.99 3 75 2009-01-15 18:05:59 2006-08-04 16:00:15 5 1 40 1 1 58 0 195.10 97 99.99 CHANGED MELGFNETERQKILDSN+SLMGNANEVRDKFIQNYATSLKDSNDPQDFLRRVQELRINMQKNFISFDsYYNYLNNLVLASYNRCKQEKTFAESTIKNELTLGEFVAEISDNFNNFMCDEVARISDLVASYLPREYLPPFIDGNMMGVAFQILGIDDFGRKLNEIVQDIGTKYIILSKNKTYLTSLERAKLITQLKLNLE MELGFNEAERQKILDSN+SLMGNANEVRDKFIQNYAoSLKDSNDPQDFLRRVQELRINMQKNFISFDAYYNYLNNLVLASYNRCKQEKTFAESTIKNELTLGEFVAEISDNFNNFMCDEVARISDLVASYLPREYLPPFIDGNMMGVAFQILGIDDFGRKLNEIVQDIGTKYIILSKNKTYLTSLERAKLITQLKLNLE..... 0 1 1 1 +8887 PF09055 Sod_Ni Nickel-containing superoxide dismutase Mistry J, Sammut SJ anon pdb_1t6i Domain Nickel containing superoxide dismutase (NiSOD) is a metalloenzyme containing a hexameric assembly of right-handed 4-helix bundles of up-down-up-down topology with an N-terminal His-Cys-X-X-Pro-Cys-Gly-X-Tyr motif that chelates the active site Ni ions. NiSOD catalyses the disproportionation of superoxide to peroxide and molecular oxygen through alternate oxidation and reduction of Ni, protecting cells from the toxic products of aerobic metabolism [1]. 25.00 25.00 49.20 49.10 19.70 18.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.85 0.71 -3.76 25 144 2009-09-10 20:18:48 2006-08-04 16:03:13 6 4 139 81 73 148 532 126.20 43 79.80 CHANGED ssspVpA.......HCDlPCGlYDPusARltu......olhphhcclp-lsst..........st.tsphsRhlshKEp+ApclKcclhllWoDYFKss+l-..........pYPcLH-lhapshhtuuts..KtslDhspApcLlstlsclschFWpoK .............s.spVpA.......HCDlPCGVYDPApARIcAE.....oVcuhpcKhpu.css..............shhsRhlhIKEp+AchsK+cl.lLWoDYFKssHhE.............cYPcLHpLhpcshKhsuAu....KsssDsupupcLLshIscIscIFWcTK................................ 0 32 54 68 +8888 PF09056 Phospholip_A2_3 Prokaryotic phospholipase A2 Mistry J, Sammut SJ anon pdb_1faz Domain The prokaryotic phospholipase A2 domain is predominantly found in bacterial and fungal phospholipases, as well as various hypothetical and putative proteins. It enables the liberation of fatty acids and lysophospholipid by hydrolysing the 2-ester bond of 1,2-diacyl-3-sn-phosphoglycerides. The domain adopts an alpha-helical secondary structure, consisting of five alpha-helices and two helical segments [1]. 26.10 26.10 26.10 26.50 26.00 25.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.86 0.72 -3.86 18 148 2009-01-15 18:05:59 2006-08-04 16:06:21 6 9 113 5 85 147 14 106.50 36 37.65 CHANGED TDphlFuhslssFsssRssp..sPup.L..DWooDGCSpuP.......DsPhGF...sFtsuCpRHDFGYRNY+tQsRFo.ss+t+..IDssFhpDhhptC...sthssh...htssCctsApsYYpAVRtFG ..................................t.shsta.stRpsp...sst..h..sWooDsCSsu..P..........Dp....PhGa....sF...tsuCpRHDFG.......Y....RNa....+.....t.....t...s+....F....ot....ss+.p+..lDssFppDLhphC..........st.st.............tpssCcu.hAtsYYtAV+thG.......................... 0 22 50 73 +8889 PF09057 Smac_DIABLO Second Mitochondria-derived Activator of Caspases Mistry J, Sammut SJ anon pdb_1g73 Domain Second Mitochondria-derived Activator of Caspases promotes apoptosis by activating caspases in the cytochrome c/Apaf-1/caspase-9 pathway, and by opposing the inhibitory activity of inhibitor of apoptosis proteins (XIAP-BIR3). The protein assumes an elongated three-helix bundle structure, and forms a dimer in solution [1]. 25.00 25.00 29.90 27.90 24.60 24.20 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.43 0.70 -5.24 4 112 2009-01-15 18:05:59 2006-08-04 16:07:38 5 3 52 23 36 107 0 189.70 51 94.57 CHANGED p+L.tuss.lL+aptslls..sus.+pRplphL.ssaRK.sloluVGsulCAVPhsQ+sE..sLSpEuLlRRAsSLVTDSusTFLSQTThALl-ulTpYsKAVaTLlSLp+pYpu.luKhsspEEsuIWQVIIGtRsEhps++cphh+aESsWMsAVsLSEhAAEAAYpoGADQAslss+splQlsQoQVEpl+plu+cAEhpLA-opsEElcRhhp............p.ulp-tE-lPEAYLRED ..........................................................................................................t....hpp.hh.hshsh.slCAlPht.Q....p.-..sLSp-uLhRRAsSLVTDSooTFLSQTThALI-AlTEYoKAVYTLlSLh+pYtuhLGKhsupEEDpVWQVIIGtRsEhos+ppEhh+hEooWhoAV.sLSEhAAEAAYpoGADQASlTs+splQlspsQVpcs+pLStcAEpKLAEspspEl..cphtp...............p...t..ttp.t-t..EAYLRED............................. 0 5 9 19 +8890 PF09058 L27_1 L27_1 Mistry J, Sammut SJ anon pdb_1rso Domain The L27 domain is a protein interaction module that exists in a large family of scaffold proteins, functioning as an organisation centre of large protein assemblies required for the establishment and maintenance of cell polarity. L27 domains form specific heterotetrameric complexes, in which each domain contains three alpha-helices [1]. 21.60 21.60 21.60 23.00 20.60 21.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.99 0.72 -4.09 4 147 2009-01-15 18:05:59 2006-08-04 16:10:09 5 21 76 3 67 170 0 59.10 57 8.95 CHANGED MPV++pDspRALpLLE-YpS+LSpstDctLRouIERVIsIFpSsLFQALlDIQEFYElTLhD.ss .......MPl++p.-spRALcLLE-Y+u+Lo.p.............spD+pLRpuIERVIsIFpSsLFQAL.l..DIQ..EFYElTLLDs............. 1 24 30 48 +8891 PF09059 TyeA TyeA Mistry J, Sammut SJ anon pdb_1xl3 Domain Members of this family are composed of two pairs of parallel alpha-helices, and interact with the bacterial protein YopN via hydrophobic residues located on the helices. Association of TyeA with the C terminus of YopN is accompanied by conformational changes in both polypeptides that create order out of disorder: the resulting structure then serves as an impediment to type III secretion of YopN [1]. 21.10 21.10 21.40 21.50 21.00 20.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.67 0.72 -4.05 7 103 2009-01-15 18:05:59 2006-08-04 16:28:52 5 2 94 2 16 55 1 84.90 45 49.81 CHANGED MAYssS-LMuDlIALVEcRWsusp-lpplssAhpLsssppplpFapEL++LlRhlPl-VFuDEEQRQNLlpAsQtALDtAI-pEEEE .......MAYt.S-LMuDllALl-cRWlusp-lppLusuhsL..ssscpclpFap-L++lhRhlPlpVFuD-EQRQNLLpusQpAlD.AI-pEEEp.... 0 7 9 11 +8892 PF09060 L27_N L27_N Mistry J, Sammut SJ anon pdb_1vf6 Domain The L27_N domain plays a role in the biogenesis of tight junctions and in the establishment of cell polarity in epithelial cells. Each L27_N domain consists of three alpha-helices, the first two of which form an antiparallel coiled-coil. Two L27 domains come together to form a four-helical bundle with the antiparallel coiled-coils formed by the first two helices. The third helix of each domain forms another coiled-coil packing at one end of the four-helix bundle, creating a large hydrophobic interface: the hydrophobic interactions are the major force that drives heterodimer formation [1]. 25.00 25.00 26.20 25.20 23.80 16.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.30 0.72 -4.16 4 63 2009-01-15 18:05:59 2006-08-04 16:39:47 5 6 39 8 31 52 0 48.90 65 7.50 CHANGED ElE-LL.SLKplp+sLsDsQSQpDlELlhQLlppsDFQsAapIHNAVAt ...ElEDLhSSLKHIQHTLVDSQSQEDIuLLLQLVQNpDFQNAFKIHNAVo.s 0 2 5 13 +8894 PF09062 Endonuc_subdom PI-PfuI Endonuclease subdomain Sammut SJ anon pdb_1dq3 Domain The endonuclease subdomain, found in the prokaryotic protein ribonucleotide reductase, assumes an alpha-beta-beta-alpha-beta-beta-alpha-alpha topology. The four stranded beta-sheet forms a saddle-shaped surface and assembles together through an interface made of alpha-helices. The presence of 14 basic residues on the surface of the beta-sheets suggests that this large groove may be involved in DNA binding [1]. 24.40 24.40 24.70 38.40 23.20 24.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.50 0.72 -3.24 3 5 2012-10-03 01:41:40 2006-08-07 09:45:50 5 3 5 1 3 5 0 87.00 66 4.99 CHANGED PDGEDYEFIFDYWLAGFIAGDGslDKY+SHVKGHEYlYDRLRIYDYphEThtIINDaLEKTFG++YSlQ+DRNIaYIDIKARsITSHYlELL-GI-NG PDGEDYEFIFDYWLAGFIAGDGsFDKY+SHVKGHEYIYDRLRIYDYRlETFEIINDYLEKTFG++YSlQ+DRNIYYIDIKARsITSHYlKLL-GIDNG....... 0 1 1 2 +8895 PF09063 Phage_coat Phage PP7 coat protein Sammut SJ anon pdb_1dwn Domain Members of this family form the capsid of P. aeruginosa phage PP7. They adopt a secondary structure consisting of a six stranded beta sheet and an alpha helix [1]. 25.00 25.00 25.30 283.20 24.00 17.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.78 0.71 -4.34 2 2 2009-01-15 18:05:59 2006-08-07 10:03:32 5 1 1 17 0 4 0 127.00 96 99.61 CHANGED SKTIVLSVGEATRTLTEIQSTADRQIFEEKVGPLVGRLRLTASLRQNGAKTAYRVNLKLDQADVVDCSTSVCGELPKVRYTQVWSHDVTIVANSTEASRKSLYDLTKSLVspupsEDLVVNLVPLGR SKTIVLSVGEATRTLTEIQSTADRQIFEEKVGPLVGRLRLTASLRQNGAKTAYRVNLKLDQADVVDCSTSVCGELPKVRYTQVWSHDVTIVANSTEASRKSLYDLTKSLVspupsEDLVVNLVPLGR 0 0 0 0 +8896 PF09064 Tme5_EGF_like Thrombomodulin like fifth domain, EGF-like Sammut SJ anon pdb_1dx5 Domain Members of this family adopt a fold similar to other EGF domains, with a flat major and a twisted minor beta sheet. Disulphide pairing, however, is not of the usual 1-3, 2-4, 5-6 type; rather 1-2, 3-4, 5-6 pairing is found. Its extended major sheet (strands beta-2 and beta-3 and the connecting loop) projects into thrombin's active site groove. This domain is required for interaction of thrombomodulin with thrombin, and subsequent activation of protein-C [1]. 21.40 21.40 21.70 22.20 21.30 20.40 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -7.98 0.72 -4.34 6 46 2012-10-03 09:47:55 2006-08-07 11:18:30 5 20 34 0 27 41 0 34.80 55 7.17 CHANGED MFCNQTsCPADCDPNosu..sChCPEGYILD-Gs...lC ..MFCNpTsCPA.DCDPNs.s...sCpCPEGYILD-G....hC........ 1 1 4 11 +8897 PF09065 Haemadin Haemadin Sammut SJ anon pdb_1e0f Domain Members of this family adopt a secondary structure consisting of five short beta-strands (beta1-beta5), which are arranged in two antiparallel distorted sheets formed by strands beta1-beta4-beta5 and beta2-beta3 facing each other. This beta-sandwich is stabilised by six enclosed cysteines arranged in a [1-2, 3-5, 4-6] disulphide pairing resulting in a disulphide-rich hydrophobic core that is largely inaccessible to bulk solvent. The close proximity of disulfide bonds [3-5] and [4-6] organises haemadin into four distinct loops. The N-terminal segment of this domain binds to the active site of thrombin, inhibiting it [1]. 25.00 25.00 26.00 71.90 23.30 15.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.30 0.72 -4.36 2 2 2009-01-15 18:05:59 2006-08-07 11:44:31 5 1 1 3 0 3 0 27.00 100 46.55 CHANGED CDCGEKICLYGQSCNDGQCSGDPKPSS CDCGEKICLYGQSCNDGQCSGDPKPSS 0 0 0 0 +8898 PF09066 B2-adapt-app_C Beta2-adaptin appendage, C-terminal sub-domain Sammut SJ anon pdb_1e42 Domain Members of this family adopt a structure consisting of a 5 stranded beta-sheet, flanked by one alpha helix on the outer side, and by two alpha helices on the inner side. This domain is required for binding to clathrin, and its subsequent polymerisation. Furthermore, a hydrophobic patch present in the domain also binds to a subset of D-phi-F/W motif-containing proteins that are bound by the alpha-adaptin appendage domain (epsin, AP180, eps15) [1]. 21.10 21.10 21.40 21.10 21.00 20.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.29 0.71 -4.20 31 409 2009-01-15 18:05:59 2006-08-07 12:37:55 5 12 142 8 246 427 5 109.50 33 13.65 CHANGED hs.D...uplspppF.phWpsl.spss..-hphphp...thss.pshpptLpspslahlApppsss...phhaaos+tssshhhlhElthpssssphplslKspss..chsthhhphhcpllp ..........................hsE-...Gph..-+phFltTW+...sl.Psps.......Ehpaphp.......s.hs..s....-.slps+L.pssNlaTIA+Rssps...pchhY....ShKh...s...s...Gl.hhLsELplp.s...us.s.s...h....p.....lolKscss.....Essphhhpsh-sll..................................................... 0 86 125 180 +8899 PF09067 EpoR_lig-bind Erythropoietin receptor, ligand binding Sammut SJ anon pdb_1eer Domain Members of this family interact with erythropoietin (EPO), with subsequent initiation of the downstream chain of events associated with binding of EPO to the receptor, including EPO-induced erythroblast proliferation and differentiation through induction of the JAK2/STAT5 signaling cascade. The domain adopts a secondary structure composed of a short amino-terminal helix, followed by two beta-sandwich regions [1]. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.61 0.72 -4.03 12 520 2012-10-03 16:25:20 2006-08-07 13:34:30 5 16 109 41 149 538 0 98.30 31 19.36 CHANGED Spsu..hhstcsc.hpCFopshcDFTCFW-tspssshs...YshhYphps.-phppCslhppssssu...h.hChFsps.-splaV.hclpV.spssttspasR.lsV- ...........ss.......stcPchhpChS.phETFoCaWpsG...s..p..ss..l...s..os...apLhY...........p......p....c......s......p............p............h+E.C.P..-Ypsuus......so.......CaFspp..pTolWhsYplpVtsssphss..pc................................... 0 7 22 58 +8900 PF09068 EF-hand_2 efhand_1; EF_hand_2; EF hand Sammut SJ anon pdb_1eg3 Domain Members of this family adopt a helix-loop-helix motif, as per other EF hand domains. However, since they do not contain the canonical pattern of calcium binding residues found in many EF hand domains, they do not bind calcium ions. The main function of this domain is the provision of specificity in beta-dystroglycan recognition, though in dystrophin it serves an additional role: stabilisation of the WW domain (Pfam:PF00397), enhancing dystroglycan binding [1]. 21.10 21.10 21.10 21.60 20.90 19.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.71 0.71 -4.06 30 673 2012-10-02 16:17:27 2006-08-07 14:46:03 6 51 101 2 277 608 0 121.10 42 11.54 CHANGED chs-LhpsL..tDLNslRaSuYRTAhKLRtlQKthp.........LcLlsltslhcsFccpsLpp...............Dps.....lsVsclhssLoslY...................................................ppLspchsshs.sls............................................................................................................................hslDhhLNaLLNVYDssRsG+IpVLShKhulssLC ......................hppLatph....tcLsslR..hSsYRTAhKLRhlQKths..................................LcLlsl.ss.hEuhccpsLpp.......................Dps.....lsVsc.l.ssLoolY....................................................pLppchsshh.pls............................................................................................................................hslshhLNaLLssYD..........o..t..psG+lpVhShKsuLhoLC................................................................................................................ 0 64 87 168 +8901 PF09069 EF-hand_3 efhand_2; EF-hand Sammut SJ anon pdb_1eg3 Domain Members of this family adopt a helix-loop-helix motif, as per other EF hand domains. However, since they do not contain the canonical pattern of calcium binding residues found in many EF hand domains, they do not bind calcium ions. The main function of this domain is the provision of specificity in beta-dystroglycan recognition, though in dystrophin it serves an additional role: stabilisation of the WW domain (Pfam:PF00397), enhancing dystroglycan binding [1]. 21.70 21.70 21.70 28.00 20.20 21.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.96 0.72 -3.91 30 641 2012-10-02 16:17:27 2006-08-07 14:54:41 6 48 99 2 269 581 0 90.00 48 8.40 CHANGED Lp-KaRYlFpQl.......u-ssGhhDpp+LulhL+-slplP+plGEssuFGsp..lEsSVRSCFpts....t.......................csclph.....spFL-WhphE..PQolVWLPlLHRlA ...........l.DKhRY.lFppl.......usosGhhspp+Ls.hL+-slplPptlhEssoFGh.....hEsSVRSCFpt........................................p.cIph.....stFLDhhhh-...P...Qs.h.......V.WLPlLHRlA................................... 0 59 79 159 +8902 PF09070 PFU PFU (PLAA family ubiquitin binding) Mistry J anon Pfam-B_5813 (release 20.0) Domain This domain is found N terminal to Pfam:PF08324 and binds to ubiquitin [1]. 20.90 20.90 21.10 22.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.71 0.71 -4.08 39 348 2009-01-15 18:05:59 2006-08-07 15:24:39 6 21 290 7 246 358 3 111.50 39 15.52 CHANGED ppsGp+EGpshhl+ss..GslEAYpWo..pupW.KIG-VVs..u....suss......stKthacG+cYDYVFDVDlp-GtPsLKLPYNhs-NPatsAp+Flp+p-..Ls.sYh-QVspFIhpN..TpGssls ................................................t.psGp+-GQsphl..+ps..splpAapWo......ptpW.plGsVVs..u..........sstp........utKhhapGc-YDYV..FsVDlp...-....G..tP...slKLPYNh...u-sPatsApcFlpcNc..Ls.s.......YlDQVspFIhpNTpu.t..s............ 0 91 141 207 +8903 PF09071 Alpha-amyl_C Alpha-amylase, C terminal Sammut SJ anon pdb_1eh9 Domain Members of this family, which are found in the prokaryotic protein glycosyltrehalose trehalohydrolase, assume a gamma-crystallin-type fold with a five-stranded anti-parallel beta-sheet that packs against the C-terminal side of a beta-alpha barrel. This domain is common to family 13 glycosidases and typically contains a five to ten strand beta-sheet, however its precise fold varies [1]. 20.80 20.80 22.40 59.80 19.80 17.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.95 0.72 -4.22 5 18 2009-01-15 18:05:59 2006-08-07 15:32:08 5 3 18 8 4 16 0 66.80 58 12.17 CHANGED CcR+lEVcsG-NWLTlptcKlhslauFScSVIplKYoGsLLlSS.sSFP++IscuK.h+l-KGFGlYK .CNRKLoVENGNaWLTVKGNGYLlVYVFSpSlIEMKYRGTLVLSSNNSFPSQIsEsK.Y+L-KGFALYK. 1 1 1 3 +8904 PF09072 TMA7 Translation machinery associated TMA7 Mistry J, Wood V anon manual Family TMA7 plays a role in protein translation. Deletions of the TMA7 gene results in altered protein synthesis rates [1]. 18.80 18.80 20.80 20.70 18.70 17.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.03 0.72 -3.51 24 302 2009-01-15 18:05:59 2006-08-07 15:37:21 5 6 232 0 204 302 0 61.00 54 81.09 CHANGED uuRpGGKtKPLKAPKKppp-hDE-DlAFKpKQ+--pKAhKthtsKAtttGPLssu..GIKKSGKK .............puRpGGKtK.PLKtPKKppK.E.h........DE-Dh...AFKpKQK--pKAhcEhtsKA.t.tKGPLs.sG..GIKKS.GKK............... 0 57 97 157 +8905 PF09073 BUD22 BUD22 Mistry J, Wood V anon manual Family BUD22 has been shown in yeast to be a nuclear protein involved in bud-site selection. It plays a role in positioning the proximal bud pole signal [1]. More recently it has been shown to be involved in ribosome biogenesis [2,3]. 37.00 37.00 37.00 37.00 36.60 36.40 hmmbuild -o /dev/null HMM SEED 432 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -12.79 0.70 -5.41 26 209 2009-01-15 18:05:59 2006-08-07 16:03:09 5 4 184 0 156 214 2 308.60 23 71.15 CHANGED Khcpuhpp...LppsLKpApth-hpKLh++.+th.tt.......................ssshscltcplphh+ph...shpphscthlhppLhK........................s.thh.stthhp...hhp..cKsttspssc.htp...t.t..sslsupLhssK.l+shhsslhpslctlhGhpsptttppcptpsppsppspppp............................ttppppppttcup-ssspcpstc-.....................ssss-tcpps-p...............thspaDsh.....lssusp-pp...........................................t.scspspscscppspcsSpp.tss.s.ppt..sspKKtKtpp...........tpph.LPpL.hsGYaSGu-s-spp.t............-hsspth..............pRKNRhGQ+ARptlWEKKYGppApHlpc.....cpE+ptpcppt.........Rptta-t..Rputp.......................tucsss.ttspspthts+.ppstpstppp.................LHPSWpAK+tAc-pt.t..sApFpGKKlsFD ..............................................................................................................................................................................h......h..........................................................................h.t.h.t.....h...h..h...h......t.......t....tt.tt.....................................................................t.............t......................................t................................tt..................sttptt...................................................................................p...tt.......t...................t..........................p...c.............................................lPpL....suahpsspst..t..........t.t................................cKNRhGQpARptlhEpKaGttApHltp........t...t...t.................................pp.thc...+psth......................................................tttttt.tt..ppttthttp..t.tt.s.ttttp.p......t................LHPSWcAp++tKEpt.p.......s...sFp..G....KKIsFD................................................ 0 41 78 126 +8906 PF09074 Mer2 Mer2 Mistry J, Wood V anon manual Family Mer2 (Rec107) forms part of a complex that is required for meiotic double strand DNA break formation. Mer2 increases in abundance and is phosphorylated during the prophase phase of cell division [1]. Blocking double strand break formation results in delayed dephosphorylation and dissociation of Mer2 from the chromosome [1]. 27.70 27.50 27.70 27.90 27.40 27.10 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.38 0.71 -4.37 6 18 2009-12-23 15:34:04 2006-08-07 16:30:48 5 1 18 0 11 19 0 183.10 29 57.19 CHANGED us.shuopssscshputcs.........slsEuD+QIlEWAGKLELESl-L+E.pu-pLhsllsppsppLhhsstplsphL.ppp.....tuppcslcslh.slusplsNpLp-shpulppphcshpt..pp.htt...............h.sspchs+hsu.pshcIIpp.-.stt.ch.Kuhcshp-hlhNhusQLEshptlhlSlS+pL+s....LpsRpsshc ...................................o.sssshpph.s.........slpEuDKQILEWAGKLELESh-LRE.poscLlplLpcNScpLhpshpphsphL.p.c.....tupptslcphhcsLssplpspLccs.psh.spscphps..ppthtp................l.hspclp+hss....+Ih+php.s+Qpch.KShcsTQchlaNlssQLEchpcVLlShS+-hcs....LpsRQssLc.......................................................................................... 0 1 7 11 +8907 PF09075 STb_secrete Heat-stable enterotoxin B, secretory Sammut SJ anon pdb_1ehs Domain Members of this family assume a helical secondary structure, with two alpha helices forming a disulphide crosslinked alpha-helical hairpin. The disulphide bonds are crucial for the toxic activity of the protein, and are required for maintenance of the tertiary structure, and subsequent interaction with the particulate form of guanylate cyclase, increasing cyclic GMP levels within the host intestinal epithelial cells [1]. 25.00 25.00 128.80 128.50 18.70 17.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.40 0.72 -4.11 2 6 2009-01-15 18:05:59 2006-08-08 09:28:43 5 1 3 1 0 5 0 48.00 98 71.46 CHANGED STQSNKKDLCEpYRQIAKESCKhGFLGVRDGTAGACFGAQIMVAAKGC STQSNKKDLCEHYRQIAKESCKKGFLGVRDGTAGACFGAQIMVAAKGC 0 0 0 0 +8908 PF09076 Crystall_2 Sklp_toxin; Beta/Gamma crystallin Sammut SJ, Eberhardt R anon pdb_1f53 Domain Members of this family assume a beta-gamma-crystallin fold [1,2], wherein nine beta-strands are connected by loop, and are separated into two sheets, each sheet forming the Greek key motif. The two Greek key motifs face each other in the global topology. The three-dimensional structure of the molecule is a 'sandwich'-shaped beta-barrel structure: hydrophobic side-chains are packed in the large interface area of the beta-sheets. In Streptomyces killer toxin-like protein domain confers a cytocidal effect to the toxin, causing cell death in both budding and fission yeasts, and morphological changes in yeasts and filamentous fungi [1]. This family also includes chitin-biding antifungal proteins [2-3]. 25.00 25.00 25.30 45.10 22.60 20.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.35 0.72 -4.21 10 18 2012-10-01 23:14:22 2006-08-08 10:32:47 5 1 15 2 5 22 0 71.90 35 65.99 CHANGED cIcsHtssspuh.CaANuGshshuh.s......Vs+ISTGNNhVsaphssGs.lpht+hpslTa.NhsshVssh-lh .......plhoHhssppSh.CaANtGphsFuhhs......VD+ISTGNNhlpapsssGsp..lphs..+apslTaPN+PstVssIcIh........ 0 1 5 5 +8909 PF09077 Phage-MuB_C Mu B transposition protein, C terminal Sammut SJ anon pdb_1f6v Domain The C terminal domain of the B transposition protein from Bacteriophage Mu comprises four alpha-helices arranged in a loosely packed bundle, where helix alpha1 runs parallel to alpha3, and anti-parallel to helices alpha2 and alpha4. The domain allows for non-specific binding of Mu to double-stranded DNA, allowing for integration into the bacterial genome, and mediates dimerisation of the protein [1]. 25.00 25.00 26.70 26.70 22.70 20.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.49 0.72 -4.31 15 110 2009-01-15 18:05:59 2006-08-08 10:54:26 6 3 100 1 15 94 1 77.00 46 25.15 CHANGED SRlAK+suIpKoKKuDVpAlApAWsl..ps-cthplhppIupK.PGuLRlLo+TLRLAuMsAcGcs.s.ls.chlptAap-L .SRlA+pptlpKsKKuDVpAIAcAWsl..ss-pEhplhppIupK..PGALRlLo+TL+LAshsApGcGts.lspcalptAa+El.... 0 4 9 14 +8910 PF09078 CheY-binding CheY binding Sammut SJ anon pdb_1ffg Domain Members of this family adopt a secondary structure consisting of an open-face beta/alpha sandwich, with four antiparallel beta-strands and two alpha-helices. They bind to a corresponding domain on CheY, with subsequent phosphorylation of the CheY Asp57 residue, and activation of CheY, which then affects flagellar rotation [1]. 21.10 21.10 21.30 30.50 21.00 21.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.87 0.72 -3.96 30 622 2009-01-15 18:05:59 2006-08-08 13:08:00 6 8 596 13 79 360 5 64.90 65 9.76 CHANGED LRIpLoplcsp-h-LLpEELGNLGslsssp+uu-oLsshLsoslupDDIsAVhCFVI-s-QIshp ...RIlLSRLKAsEVDLLEEELGpLsTLTDVVKGADSLSAhLsGslAEDDIsAVLCFVIEADQIsFE.......... 0 6 28 52 +8911 PF09079 Cdc6_C CDC6, C terminal Sammut SJ anon pdb_1fnn Domain The C terminal domain of CDC6 assumes a winged helix fold, with a five alpha-helical bundle (alpha15-alpha19) structure, backed on one side by three beta strands (beta6-beta8). It has been shown that this domain acts as a DNA-localisation factor, however its exact function is, as yet, unknown. Putative functions include: (1) mediation of protein-protein interactions and (2) regulation of nucleotide binding and hydrolysis. Mutagenesis studies have shown that this domain is essential for appropriate Cdc6 activity [1]. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.61 0.72 -4.26 83 705 2009-01-15 18:05:59 2006-08-08 13:37:42 6 7 285 10 384 699 139 80.40 22 15.43 CHANGED LhAllhhtppsp...p.hssuclachYpplscp.hsh..cslop.cclpshls-LchhGllpsch.spG..p.tG+ppcl..pLshs..pplhcs.....l ........................Lhullhhhcppt.....cphshsc....lachYppl.Ccp..hsh..pslsp.pchhshl.scLc.thGll.phpp......ptpuphpcl....pLp.hs...cplhtsl........................ 0 83 216 312 +8912 PF09080 K-cyclin_vir_C K cyclin, C terminal Sammut SJ anon pdb_1g3n Domain Members of this family adopt a secondary structure consisting of a five alpha-helix cyclin fold. Interaction with cyclin dependent kinases (CDKs) at a PSTAIRE sequence motif within the catalytic cleft of CDK results in the regulation of CDK activity [1]. 25.30 25.30 25.70 134.40 24.10 25.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.45 0.72 -3.84 2 10 2009-01-15 18:05:59 2006-08-08 14:06:09 5 1 6 2 0 9 0 103.90 69 40.57 CHANGED AVLsTDshu.hLhK.hhspppL.hhHppVsp.lpKAlVsPtTGuLPsSlluAA.CALhs.usshP.ss.......pLAphlGsssusLtAAsEplhTolp-FD..RI AVLATDVTSFLLLKLLGGSQHLDFWHHEVNTLITKALVDPKTGSLPASIISAAGCALLVPANVIPQDTHSGGVVPQLASILGCDVSVLQAAVEQILTSVSDFDL.RI... 0 0 0 0 +8913 PF09081 DUF1921 Domain of unknown function (DUF1921) Sammut SJ anon pdb_1gcy Domain This domain, which is found in a set of prokaryotic amylases, has no known function [1]. 25.00 25.00 29.30 89.70 24.80 17.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.29 0.72 -4.00 5 11 2009-09-11 11:07:02 2006-08-08 14:32:33 5 2 11 9 3 20 0 50.80 71 9.46 CHANGED oGFSGLVATlSGSsQpLVhALDSNLSSPuQVASGSFSpAlNpDNGplRIWR SGYSGLVATVoGSQQTLVVALNSNLSNPGQVASGSFSEAVNsSNGQVRVWR 0 1 2 3 +8914 PF09082 DUF1922 Domain of unknown function (DUF1922) Sammut SJ anon pdb_1gh9 Domain Members of this family consist of a beta-sheet region followed by an alpha-helix and an unstructured C-terminus. The beta-sheet region contains a CXCX...XCXC sequence with Cys residues located in two proximal loops and pointing towards each other. This precise function of this set of bacterial proteins is, as yet, unknown [1]. 20.70 20.70 21.80 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.39 0.72 -3.91 3 19 2009-01-15 18:05:59 2006-08-08 14:42:03 5 1 19 1 16 17 1 66.50 36 78.59 CHANGED YlIFRCDCGRVLYSKEGsKTRKCVCGKTlNVKcRRIFK+A-opEEASEsVQcMQEEIYGuocF+sAS..E ..............YhIFRC.c.CGRhlYu+-sspT++C.sCG+sl+lKppRIht+scstcEAuphVp+lQpphhGtstFpps..t....... 0 2 13 14 +8915 PF09083 DUF1923 Domain of unknown function (DUF1923) Sammut SJ anon pdb_1gjw Domain Members of this family are found in maltosyltransferases, and adopt a secondary structure consisting of eight antiparallel beta-strands, which form an open-sided 'jelly roll' Greek key beta-barrel. Their exact function is, as yet, unknown [1]. 19.20 19.20 20.20 98.00 18.30 19.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.15 0.72 -4.49 2 7 2009-01-15 18:05:59 2006-08-08 15:55:14 5 1 6 2 1 5 0 64.10 86 10.04 CHANGED GKFENLTTKDLVMYSYEKNGQKIVIAANVGKEPKEITGGRVWNGKWSDEEKVVLKPLEFALVVQ GKFENLTTcDLVMYSYE+NGQKIllAANVGKEPKEITGGRVWNGK.WSDEEKVVLKPL-FsLVVQ 0 1 1 1 +8916 PF09084 NMT1 NMT1/THI5 like Mistry J, Wood V anon Pfam-B_2797 (release 20.0) Family This family contains the NMT1 and THI5 proteins. These proteins are proposed to be required for the biosynthesis of the pyrimidine moiety of thiamine [1][2][3]. They are regulated by thiamine [2]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.17 0.70 -4.75 60 7206 2012-10-03 15:33:52 2006-08-08 16:24:59 6 87 2898 13 2338 13311 2793 196.00 17 56.48 CHANGED NssHsslalAppKGaFc-pGL-V.cIhps.sssssssphVusGpsDh.ulsht.plh..hs+u.pGhPlhslusllppshsulhshccss.Ip..s.pDLcGK+lG..hss.ssh..pcshlpshlppsGh..s.sclphlss.shsh...ulhsGplDsshhshtsh-hlp..hc..........hcGh...chphhhhp-hGss.sahshlllsscshlpp.ps-hl+pFlcAsp+GhpashscPc-A ................................................................................................................................................................t.......t.....t.......u.......l.....p.....l...p.h..h..........ss....s....s......t.........h.....p.....h..l.....t..s.G..p..hD.....h.....u..........h.........s.......t......t.....................s....h..h.........hs.........t.........s.......p............G.........h.........s..........l..........h.........h........l................u.........s...............h........h.......t.......p.........s...........s.........t........s...........l........l.........s......h............p............s..........s.......s...Ip..............oh.....t...........D..........L.........+.........G.........K.........+....l..u............h..st....uss.............s.p...h......h....h...t.......t.....h........L....p.......p...t...G.l.....................s.....s......c.......l.....p.....h........l..........t.........h.............s............s............s............s.......t................h.................s......u.......l..t...s......G..p..l...D..A..h...h........h.........s............h......h..t.......................................................t.s.........h........h...h.................t.....p..............s.........................................................h......h.....h....s....p....t.p....h....hp.....p...p..............t.......h...t...t...h...h.t..sh....pu.h.t..h...t................................................................................................................................... 0 641 1405 1915 +8917 PF09085 Adhes-Ig_like Adhesion molecule, immunoglobulin-like Sammut SJ anon pdb_1gsm Domain Members of this family are found in a set of mucosal cellular adhesion proteins and adopt an immunoglobulin-like beta-sandwich structure, with seven strands arranged in two beta-sheets in a Greek-key topology. They are essential for recruitment of lymphocytes to specific tissues [1]. 25.00 25.00 26.30 65.10 18.50 17.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.35 0.72 -4.21 5 43 2012-10-03 02:52:13 2006-08-08 16:37:59 5 4 21 2 9 34 0 108.90 71 32.38 CHANGED AFPDQLTVSPEALVPGpDpEVACTAHNVTPAcP-uLShSLLLG-QELEGuQALsPEVpE...EPQEuE.DPLFQVTpRWLLPuLGTPuPPALHCQVTMpLPGLpLSHR+uIPVL .AFPDQLTVSPsALVPG.D.EVACTAHcVTPsDPNuLSFSLLLGsQELEGAQAL.sPEVcE...E..PQp-E.Ds.LFRVTcRWRLPPLGTPsPPALaCQATMRLPGLELSHRQAIPh...... 0 1 1 2 +8918 PF09086 DUF1924 Domain of unknown function (DUF1924) Sammut SJ anon pdb_1gu2 Domain This domain is found in a set of bacterial proteins, including Cytochrome c-type protein. It is functionally uncharacterised. 21.00 21.00 21.00 21.00 20.90 20.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.42 0.72 -3.94 17 99 2012-10-03 10:02:11 2006-08-08 16:50:24 6 2 90 17 41 102 18 95.10 45 66.68 CHANGED hpssuh.sshsusRGptLappct......cphuCsSCHssssppsGpHspTGK.IpPhAPusNPcRaoDsAKVEKWFpRNCppVlGRtCTspEKGDhlsaL ........h.tstuhsshsAtRGpthatscp.....s.cphSCuoCHssss.....TpsGpHspTGKsIcPhAPusNPcRaTDsAKlEKWFpRNCssVluR-CTstEKuDhLsaL............................ 0 9 27 32 +8919 PF09087 Cyc-maltodext_N Cyclomaltodextrinase, N-terminal Sammut SJ anon pdb_1h3g Domain Members of this family assume a beta-sandwich structure composed of the eight antiparallel beta-strands. A ten residue linker is also present at the C-terminal end, which connects the N terminal domain to a distal domain in the protein. This domain participates in oligomerisation of the protein, wherein the N-terminal domain of one subunit contacts the active centre of the other subunit, and is also required for binding of cyclodextrin to substrate [1]. 25.00 25.00 29.60 28.80 21.70 20.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.01 0.72 -4.06 28 194 2009-01-15 18:05:59 2006-08-09 09:13:25 6 4 178 12 55 197 92 87.20 39 14.09 CHANGED +lEPs.WWsGMpNscLQLMlaGcsIus....hplslsh.sGVplpslp+.s-NPNYLFlsL-ls.pAcsGshslshpps............cpphphsYpLKpRcp ....+l-Ps.WWsGM+NP.pLQLhlY..G..csIus....spVolsh.sGVplpsls+.h-sPNYLhlhLsls..pApPGphslshp.ps............cpphshsYpLKtRp............ 0 22 42 52 +8920 PF09088 MIF4G_like MIF4G like Sammut SJ anon pdb_1h6k Domain Members of this family are involved in mediating U snRNA export from the nucleus. They adopt a highly helical structure, wherein the polypeptide chain forms a right-handed solenoid. At the tertiary level, the domain is composed of a superhelical arrangement of successive antiparallel pairs of helices [1]. 19.80 19.80 20.10 20.10 19.60 19.40 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.23 0.71 -4.86 9 328 2012-10-11 20:01:01 2006-08-09 09:53:49 6 9 258 11 235 329 0 159.30 35 20.77 CHANGED PsssolsuhlhR-hlhDhIshhcaNRp-sA+hLlsLchahs.tTFs..sss.spL...........hsPspSoWplEDlhVEslhuplFpLPsP.hp.lYYpSlLhEhC+huPsslAPslGRulRhlYpplso..hDhEhhcRFlDWFSaHLSNFsFpWpWpEWlsDl.pLssh..HP+hsFl+plIcKElRLSahpRI+poLP- .......................................................................................................................................................................s.tsph.h-.hhVEslFuplFpL.........PsP.......a....h.......laYtolLlElC.......Kh.......t..P....u.....u...ls..s....lupAhchLYpp.l-s...............hsh..ph.hpRalsWFSaHLSNFpFp......W...p.Wp-Wsssl......p.h.st..................pP+htFlppllcKplRL.Sat.p.RIpphlP................... 0 75 123 197 +8921 PF09089 gp12-short_mid Phage short tail fibre protein gp12, middle domain Sammut SJ anon pdb_1h6w Domain Members of this family adopt a right-handed triple-stranded beta-helix fold, and are found in the middle of the phage short tail fibre protein gp12 [1]. 25.00 25.00 25.50 26.00 21.90 18.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.92 0.72 -4.03 2 20 2009-01-15 18:05:59 2006-08-09 11:06:17 5 2 18 1 0 24 0 76.70 59 14.83 CHANGED TGtTLNGRGuTsSMRGVVKLTTpAGststGDuSuALAWNADVIppRGGQhI.GoLpl.DphT.ANGhhshsGhh+.sst.l ....TGtTLNGRGuTTSMRGVVKLTTTAG.tstGDuSoALAWNADVIsTRGGQTINGoLNl.ssLT.ANGhhshsGhhp.sst.l............ 0 0 0 0 +8922 PF09090 MIF4G_like_2 MIF4G like Sammut SJ anon pdb_1h6k Domain Members of this family are involved in mediating U snRNA export from the nucleus. They adopt a highly helical structure, wherein the polypeptide chain forms a right-handed solenoid. At the tertiary level, the domain is composed of a superhelical arrangement of successive antiparallel pairs of helices [1]. 21.00 21.00 22.80 21.40 20.80 20.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.58 0.70 -5.15 17 319 2012-10-11 20:01:01 2006-08-09 11:16:47 6 8 264 11 230 316 4 252.90 22 31.81 CHANGED Fpa......pp-shPhpptuppllshh+p+.ts..sc-lhpllppltp.sss.t.....s..hhtlslhlpsllplGS+ShSHshshlp+hpppL+tl........................hpspppcthllculhcaWpspsQhhhlllDphlphpllsstullsWhh.spph....sphhscshsaEhlppslpchttpht....t................................................c-.sst..............................h.ppLtpthsctppllthhhpphl.ll............sp....psspphtphstpW...ahthhGhlcshltca ........................................................apa........tppshPh..tshupplhshl+pK.ss....scEl.sllp...pl..p.spstt.................s...hplclhlpslLplGSKShSHshu.slp+..a+phlpp..l......................................................spsp..ps.php...llpslhchWp.spsQhhhlllDKh.lphpIlsstuVlpWl...F...ssph...............spths.......chalaEhlpsolpKhsp+lhpltpch...p...........................................................................................p-.s.t...........................................phpthpcpLppt.s.-tpslhhhhh..p.phl.lh...................spt...hpspt.th.pph.tp.W........ah.hhth.pthhh.................................................................................. 0 79 126 191 +8924 PF09092 Lyase_N Lyase, N terminal Sammut SJ anon pdb_1hn0 Domain Members of this family are predominantly found in chondroitin ABC lyase I, and adopt a jelly-roll fold topology consisting of a two-layered bent beta-sheet sandwich with one short alpha-helix. The convex beta sheet is composed of five antiparallel strands, whilst the concave beta-sheet contains five antiparallel beta-strands with a loop between two consecutive strands folding back onto the concave surface. This domain is required for binding of the protein to long glycosaminoglycan chains [1]. 26.00 26.00 26.30 30.20 25.60 25.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.22 0.71 -4.64 8 133 2012-10-03 19:46:52 2006-08-09 13:18:47 6 3 88 3 22 127 1 177.80 36 17.68 CHANGED uhhpSphh-Fs.GsQlP-hhpsuuGSpLSLSus+YhhGpQSLcWcWpsGSohslc+PlsL.pccsASKsaGhpu...lohWIYNEpPVDshhph-LGpch.hsSGsPcAuFcl+lNFoGWRushVShppD..h-Gcch-G.....................Kush-SlRhhAPhtAPpGplaIDRVhhuhc.DARhQhoD.QV...+sR ....................................h.hsh.Ep..p.lPsthpsustSpLolSsp+YKpGppSLcWsa.p.sGus.Ls.lccs.lph...pc.........s...ss.uKshuhpu..............hphWIY.NEpP.c.ct.l.p.F-Fhcs.......Gcss...suFphplNFoGWRusaVsacpD..MpG..ptt-G.......................pMsplRlhAP.......ss.pGp..lalD+lhhuhc.DsRhQhuD.Ql.h...................... 0 8 19 19 +8925 PF09093 Lyase_catalyt Lyase, catalytic Sammut SJ anon pdb_1hn0 Domain Members of this family are predominantly found in chondroitin ABC lyase I, and adopt a helical structure, with fifteen alpha-helices which are at least two turns long and several short helical turns. The bulk of the domain is formed by ten alpha-helices forming five hairpin-like pairs and arranged into an incomplete toroid, the (alpha/alpha)5 fold. Additionally, two long and two short alpha-helices at the N terminus of the domain wrap around the toroid. At the C-terminal end of the toroid there is one additional short alpha-helix. This domain is required for degradation of polysaccharides containing 1,4-beta-D-hexosaminyl and 1,3-beta-D-glucoronosyl or 1,3-alpha-L-iduronosyl linkages to disaccharides containing 4-deoxy-beta-D-gluc-4-enuronosyl groups [1]. 22.80 22.80 23.60 23.00 22.70 22.70 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.11 0.70 -5.56 4 140 2009-01-15 18:05:59 2006-08-09 13:19:05 6 4 89 3 25 131 2 351.20 36 35.10 CHANGED P-lpl...loP.psRphcslttl-p..hhp-hls.hhts.t.ps.shpppIsph+pca-thsIshpusG.thoGsPlhss+pp.hapsEhh.s.sKshhssh.lh.ttYss..................+pp.KppaLshaDahhDQGaAhGSuhsThHHaGYpsRthYhusaLM+DsLpEps+hsphhsTLRWas.shppoh.scPstsst.hDpapThhht+lh.lLhhsDs.c+lphlpohSRWlstuL.pssPGhtGulKsDGsuFHHcGsY.PuYuhsuhcsAuphIYLhpcTsFuloEpuppslKcshluhp.YsN.hchuhulSGRHPhss..uppls.uYAhhAluucss.....hD+phAusYLRLhcpssops.tt..a .................................................................phth....s.lss.ptt-hthh-p..hhpshl......ts..t...t.h..pphpphctpasthpIphp..sG.ploGpsIh...ts.+pp....haps...hhsshscphho...ph.shhssYtsLh.pluhhapps.s.....+pphtchalhhhcalhDQGashGSuhsshHH.aGYss.RthYhuhaLM+c.hLc...cps....hh..pts.csLhW..Ys.shppphp...p.....s....s..s..pupslDhaNThhpt+l.uslhhhs..Ds.....s..c+lphL+uFucWlshul.pssPGhhGGhKsDGosFHHpssY.PuYuhsuhss.AuphlYhLsGTs..Ftlo-pA+pslKpAhLs.....h.....c......h.YsN..hp.h.sluloGRHPh.s........psp.l.h.tsFthh.ALuGsPsst.pphDptLAAsYL+Lspssps.t................................... 1 10 22 22 +8926 PF09094 DUF1925 Domain of unknown function (DUF1925) Sammut SJ anon pdb_1k1x Domain Members of this family, which are found in a set of prokaryotic transferases, adopt an immunoglobulin/albumin-binding domain-like fold, with a bundle of three alpha-helices. Their function is, as yet, unknown [1]. 21.10 21.10 21.40 26.60 20.90 20.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.92 0.72 -3.99 15 86 2009-01-15 18:05:59 2006-08-09 14:08:33 6 2 86 5 57 89 15 78.20 46 11.65 CHANGED GhW+NFhsKYsEoNhMHKRMLtVSc+lpsh.......tsp......ApctLa+AQCNDAYWHGlFGGlYLPHLRcAlYcsLI+A-shL ......GhW+NFhsKYPESNhMHKRMLhlSpplpph........tptp.......spctLa+uQsNDsYWH.G.lFGGlYLPHLRcAlacplIcAEp..h... 0 27 41 47 +8927 PF09095 DUF1926 Domain of unknown function (DUF1926) Sammut SJ anon pdb_1k1x Domain Members of this family, which are found in a set of prokaryotic transferases, adopt a beta-sandwich fold, in which two layers of anti-parallel beta-sheets are arranged in a nearly parallel fashion. The exact function of this family is, as yet, unknown, however it has been proposed that they may play a role in transglycosylation reactions [1]. 19.60 19.60 21.50 21.10 17.40 19.50 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.56 0.70 -5.18 21 118 2009-12-15 10:48:00 2006-08-09 14:08:46 6 5 105 5 74 118 24 224.80 21 38.21 CHANGED DhDhDGpcElhlpscsh.shlcsstGGslhElss+..sttaNasssLsRphEtYHctl..........ppptpslsohH-hsphhtc.hcccltaDhahRshhhD+hhpsstsL-shhpsp...p.uDh...sa.ht..tt.....lphhtct.h...hth.hplpKshplps...sslplpYplp.........pshshhaulElN..........LA...................lpuhtcshs..tscplplpD.ahhsclplc..hspssplathPlpTlSQSEtGa-hlhQulshhhhasl....ptphphplph ......................DhDhDGhcEhhhpspph.hhlpst.GGplhEh-hh..ttthNhhsshsRp.EhYaphh.........................t..t.t.....h..hcp....h.......tthh.D.h.+h.hhDphh..thshpph.tsp.......t.....a..h......ttt........hhh.t...h..........hplpKphphpp...ss..lplpYplp.......t..s...hphhausEhN.........lu..........................tth..p..t.........tspthth.-..hh.hph.lp...h.st.hphhhhPh.olo.po-.tGh-hh.Quhthhhhh.h......t.........h......................... 0 35 58 64 +8928 PF09096 Phage-tail_2 Baseplate structural protein, domain 2 Sammut SJ anon pdb_1k28 Domain Members of this family adopt a beta barrel structure with a Greek key topology, which is topologically similar to the FMN-binding split barrel. They are structural component of the viral baseplate, predominantly found in the structural protein gp27 [1]. 25.00 25.00 59.50 58.10 23.30 19.40 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.14 0.71 -4.62 6 29 2012-10-01 22:58:23 2006-08-09 14:44:45 5 3 27 3 0 28 0 166.00 58 42.44 CHANGED hlspEPIshlVsEPRLlGQaIp.l-p.lsFDFEWLTKANsaTRsPacNsThYAHSFlDKphs+IlTG-GpNulslSRSGAYuDhTYRNGaEEusRLhThuQYDuYApspTaGNFsLTPGhKIpFaDpKNQF+sEFaVDEVIHElSpppSlTpLYMFsNSptlp.....cVKNE ..hlsQEPhshlVGEPpLIGQalQplchPlAaDFpWLTKuNt+sRsPhcNsTlYAHSFlDpphs+IosGcGpNSIlVSRSGuYS-hTYRNGaEEA.RLhTMAQYDGYAcCpohGNFsLTPGhKI.FhDsKNQF+s-FYVDEVIHElSNNsSlTpLYMFTNuppLc.....cVKNE.......... 0 0 0 0 +8929 PF09097 Phage-tail_1 Baseplate structural protein, domain 1 Sammut SJ anon pdb_1k28 Domain Members of this family adopt a beta barrel structure with a Greek key topology, which is topologically similar to the FMN-binding split barrel. They are structural component of the viral baseplate, predominantly found in the structural protein gp27 [1]. 18.90 18.90 19.10 120.10 17.30 16.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.12 0.71 -4.73 5 31 2012-10-01 22:58:23 2006-08-09 14:44:55 5 3 30 3 0 31 0 194.80 57 49.04 CHANGED QRsGYPNVSIKLYQsYDAWLENRFIELAATFlTLTMRDGLh.GlNEGLLQFYDuKNLHTKLsG-EIIQlSLKTANT.EpTaNRIYGIKHhuVoVDpKGDNIITFQLGSlHplcNLKFSRMFTNsAVsSVsEMIGsIYpDpPLLsPsIsuINshVPpsPWVsoINcYhcFVRcaGQoVEoE+FVaVWEDh-GIsIuDa ....QRsGaPNlSIKLY-sYDAWh-NRFlELAAThTTLTMRDuLY.GpNEGlLQFYDuKNlHTKMsGcpIIQISlpNANo.pplpoRIYGsKHauVSVDsKGDNIIsIpLusIHplcNLKFuRsFFssAsEolpEMlsVIYpD+sLLsPsINuINsYVPslPWsuohcsYhsaVRElGhuVtS-cFVFVWEDh.GIshhDY... 1 0 0 0 +8930 PF09098 Dehyd-heme_bind Quinohemoprotein amine dehydrogenase A, alpha subunit, haem binding Sammut SJ, Eberhardt R anon pdb_1pby Domain Members of this family are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase. They have a predominantly alpha-helical structure and can be divided into two subdomains, each binding a haem C group via a conserved CXXCH motif [1,2]. 25.20 25.20 25.30 25.40 25.10 25.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.12 0.71 -4.81 11 50 2012-10-03 10:02:11 2006-08-09 17:04:58 5 5 42 4 19 60 5 135.40 38 33.92 CHANGED tsGppllpppChuCHsspsss..thuRISpQRKTPEGW.MTlsRMphhHGlplos--RpslVKYLADpQGLAPuETcuhRYllERcPNshEpscstphophCuRCHStARluLQRRTspEWc+LlpFHLGQaPolEYQAhuRDR-WaslAhs-llPhLAcpYPh-osuW ...t.pGppllpspChuCHssptps....shsRIuppRKTPEG.W.MolsRMphhHGl.plos--+pslV+YLADppGLuPsEstshpYhh-+p.ss.-.ph.......s..hsphCuRCHShARhhLQRRstpEWppLlpaHluQaPohEhQA.uRDRpWh.lA.tphhs.Lucpashtp........................... 0 3 13 16 +8931 PF09099 Qn_am_d_aIII DUF1927; Quinohemoprotein amine dehydrogenase, alpha subunit domain III Sammut SJ, Eberhardt R anon pdb_1pby Domain Members of this family, which are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase, adopt an immunoglobulin-like beta-sandwich fold, with seven strands arranged into two beta sheets; the fold is possibly related to the immunoglobulin and/or fibronectin type III superfamilies. The precise function of this domain has not, as yet, been defined [1,2]. 21.20 21.20 21.20 21.20 21.00 20.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.36 0.72 -3.66 14 45 2012-10-03 16:25:20 2006-08-09 17:07:52 5 8 39 4 18 55 11 81.00 32 14.49 CHANGED pspllAVpPshl+sGsc.sclslsGsuL.....suclsLusGlcVscVlppossplslcV+ssucApsG.RsVulGutp.ussLsVYs ...........tspllAVpPshl+AGsc.oplolsGouL.....sucssL...u.s..G...lcVscllp..posspltVcl+suA-ApsG.+plulGshp.sssLsVYp.......... 0 7 14 15 +8932 PF09100 Qn_am_d_aIV DUF1928; Quinohemoprotein amine dehydrogenase, alpha subunit domain IV Sammut SJ, Eberhardt R anon pdb_1pby Domain Members of this family, which are predominantly found in the prokaryotic protein quinohemoprotein amine dehydrogenase, adopt an immunoglobulin-like beta-sandwich fold, with seven strands arranged into two beta sheets; the fold is possibly related to the immunoglobulin and/or fibronectin type III superfamilies. The precise function of this domain has not, as yet, been defined [1,2]. 25.00 25.00 131.60 130.70 19.20 15.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.84 0.71 -4.47 12 36 2009-01-15 18:05:59 2006-08-09 17:08:39 5 4 31 4 13 43 3 130.40 52 25.03 CHANGED VKVsPsaulARlGGsGG..shPKh.upF-AhuahsGsDGKsGTsDDlclGslPAoWul-sFDEpAtcDpDs+YAGphpss.GlFsPusAGPNPtR+huTNNsGNLKVlATVc-u....uctlou-uphlVTV...QRassPPl .VKVsPsauIARIGGsGu..s.hPKVtupFEA.AassGsDGcPtTtDDlRlGhlPAoWolEsFsEpAtcDcDl+aAGphpus.GlFsPusAGPNPcR+htTNNAGNLKVlATlsDG....up.loGEuHhIVTV...QRWNsPPl.. 0 2 9 10 +8933 PF09101 Exotox-A_bind Exotoxin A binding Sammut SJ anon pdb_1ikp Domain Members of this family are found in Pseudomonas aeruginosa exotoxin A, and are responsible for binding of the toxin to the alpha-2-macroglobulin receptor, with subsequent internalisation into endosomes. The domain adopts a thirteen-strand antiparallel beta jelly roll topology, which belongs to the Concanavalin A-like lectins/glucanases fold superfamily [1]. 20.50 20.50 20.60 20.80 19.60 19.00 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.79 0.70 -5.27 2 97 2009-01-15 18:05:59 2006-08-10 09:38:45 5 7 59 4 3 94 0 187.30 43 28.60 CHANGED E-thslasECtpsCsLs.csGh..pS+hSl.ssslhD...pGVLaYSMslpstpsslK.t.Dpu.SIho.G..hTlRhpt.........GV......p.NtshpYSYsRp.cGpaulNWLVPIGc-pPusIKl.lcELsttppl.chs.lYoI-hssphLt..KhttssoF.Vpt.EpN.....lAISasuVShhhAQtpspRcKRWucWtoGhsLChLsPhDulYNYlsQQpCsLsDsW.Gt.YcslAGsP....sKpsl-.KP..lppRlHF .......................................CsLs.c.Gh..pSphSl.s.slhD...pGVLaYSMslpstpsslh.t.spu.SIho.G..hTlRhpt.........GV......p.NtshpYSasRp.cupaslNWLVPIGc-pPusIKl.lcELsttppl.chs.lYoI-hssphLt..c.ttssoF.Vpt.EpN.....lAISaPuVShKhApppGpRHKRWu+WtoGL...AhCWhlPl.uIYNYIsQtpCshuDsWhGt.YcslAGsP....sKpsl................................ 0 1 2 2 +8934 PF09102 Exotox-A_target Exotoxin A, targeting Sammut SJ anon pdb_1ikp Domain Members of this family are found in Pseudomonas aeruginosa exotoxin A, and are responsible for transmembrane targeting of the toxin, as well as transmembrane translocation of the catalytic domain into the cytoplasmic compartment. A furin cleavage site is present within the domain: cleavage generates a 37 kDa carboxy-terminal fragment, which includes the enzymatic domain, which is then is translocated into the cytoplasm. The domain adopts a helical structure, with six alpha-helices forming a bundle [1]. 21.60 21.60 22.60 208.90 21.30 16.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.89 0.71 -4.68 2 59 2009-01-15 18:05:59 2006-08-10 09:57:46 5 2 22 4 1 67 0 140.50 74 23.57 CHANGED cGsuhuALsAHpsCtlPLEThsRpRpPRshpp..pCuY.sQplVuLalAsRl.asplDpVhp.sLsp.tst.....uDLtch.cppPt.sp.sLTlAtt..pcaVpptsG....pspAGAtuADllSLhCPsAstpC.AussD ..tNAMpALAAHRVCGVPLETLARSRKPRDLsDDLSCAYQAQNIVSLFVATRILFSHLDSVFTLNLDEQEPEVAERLSsLRpINENNPGMVTQVLTVARQIYNDYVTHHPGLTPEQTSAGAQAADILSLFCPDADKSCVASNND 1 1 1 1 +8935 PF09103 BRCA-2_OB1 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 Sammut SJ anon pdb_1iyj Domain Members of this family assume an OB fold, which consists of a highly curved five-stranded beta-sheet that closes on itself to form a beta-barrel. OB1 has a shallow groove formed by one face of the curved sheet and is demarcated by two loops, one between beta 1 and beta 2 and another between beta 4 and beta 5, which allows for weak single strand DNA binding. The domain also binds the 70-amino acid DSS1 (deleted in split-hand/split foot syndrome) protein, which was originally identified as one of three genes that map to a 1.5-Mb locus deleted in an inherited developmental malformation syndrome [1]. 23.30 23.30 23.40 25.40 23.10 23.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.50 0.71 -4.40 19 188 2009-01-15 18:05:59 2006-08-10 11:08:30 5 33 140 3 119 199 1 125.00 36 6.93 CHANGED Sul++IhEpDsssuphhVLhVSpl............................p.t.s.ttshlELoDGWYsl+utlD.sLpphlccG+lpl..GpKLh..........lpGAc...Lhu.scsssPL.E....sssslhLplshNuTRhAcWps..+LGhh+..hs ........................SAl++IhEpDssuupsh.VLCVSsIh.................................................................tpppsts.spp.sshlELTDGW.......Yul+Ap.LDs.sL....tphl...cp...G.....+..Lpl..GpKlh.....................lpGAc........Lhu.ss.csss.PL.E..................sspslhLplssNSTR.ApWts..+LGahtp.............................. 0 65 79 100 +8936 PF09104 BRCA-2_OB3 BRCA2, oligonucleotide/oligosaccharide-binding, domain 3 Sammut SJ anon pdb_1iyj Domain Members of this family assume an OB fold, which consists of a highly curved five-stranded beta-sheet that closes on itself to form a beta-barrel. OB3 has a pronounced groove formed by one face of the curved sheet and is demarcated by two loops, one between beta 1 and beta 2 and another between beta 4 and beta 5, which allows for strong ssDNA binding [1]. 21.10 21.10 21.20 23.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.84 0.71 -4.38 5 83 2009-09-11 15:20:08 2006-08-10 11:09:07 5 17 53 3 44 85 0 137.90 45 5.33 CHANGED REsLcFo+LLDPuFQPPCSEVDlVGlVVSV..V+spGLAPlVYLSDEChNLLsVKFWsDLN....EDIlKP+VLIAASNLQWR.PEu+SsIPTLFAG-aSsFSASPKEsHFQE+FscM+pTlE..NIDoFYcEAEpKLlcLLsuNsPK ...............RcslpFscLh-PsFpPsCuEVDllGhVlSV.......s+p....t..GhuPhVYL.S....DEsaNL.LulKFWtDLs....EDIlKPpsLIAASNLQWR...s-op...Ssl..PoLaAGDhSsFSA.sPKEuHhQEsFschKsslc..Nls..hFhs-AEpKL.hclLptp.................... 1 10 14 27 +8937 PF09105 SelB-wing_1 Elongation factor SelB, winged helix Sammut SJ anon pdb_1lva Domain Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding [1]. 25.00 25.00 25.40 148.40 23.10 23.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -8.84 0.72 -4.31 2 2 2009-01-15 18:05:59 2006-08-10 11:45:38 5 1 2 3 1 5 0 61.00 100 9.62 CHANGED GSPEKILAQIIQEHREGLDWQEAATRASLSLEETRKLLQSMAAAGQVTLLRVENDLYAIST GSPEKILAQIIQEHREGLDWQEAATRASLSLEETRKLLQSMAAAGQVTLLRVENDLYAIST 0 1 1 1 +8938 PF09106 SelB-wing_2 Elongation factor SelB, winged helix Sammut SJ anon pdb_1lva Domain Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding [1]. 21.50 21.50 21.60 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.64 0.72 -4.04 67 966 2009-01-15 18:05:59 2006-08-10 11:46:27 6 14 953 6 177 683 32 57.80 44 9.28 CHANGED llstLspaHpcpP.c.Gls+-cL+...Rhst.ths....splaptllppLhppGplttptshl+L ......lL-sLAsYHEpHtDcsGsuRERLR......RhAlPht-.................-sLl...hhLI-chp-sGtIhsc+GWLHL............................. 0 66 113 151 +8939 PF09107 SelB-wing_3 Elongation factor SelB, winged helix Sammut SJ anon pdb_1lva Domain Members of this family adopt a winged-helix fold, with an alpha/beta structure consisting of three alpha-helices and a twisted three-stranded antiparallel beta-sheet, with an alpha-beta-alpha-alpha-beta-beta connectivity. They are involved in both DNA and RNA binding [1]. 22.20 22.20 22.60 22.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.40 0.72 -4.57 95 1262 2009-01-15 18:05:59 2006-08-10 11:47:24 6 13 1235 10 249 928 241 50.30 43 8.23 CHANGED ltphhp..p..ssplssuphR-hl.GhoRKhulslLEahDctthT+RtG..-.pRhlp .............tpl.p..c...sGuhssA-FRDtL....s.....luRKhAItlLEYFDRh.GaTRRcG...s.c+lLR....... 0 93 169 217 +8940 PF09108 Xol-1_N Switch protein XOL-1, N-terminal Sammut SJ anon pdb_1mg7 Domain Members of this family, which are required for the formation of the active site of the sex-determining protein Xol-1, adopt a secondary structure consisting of five alpha helices and six antiparallel beta sheets, in a beta-alpha-beta-beta-beta-alpha-beta-alpha-alpha-alpha-beta arrangement. The fold of this family is similar to that found in ribosomal protein S5 domain 2-like [1]. 25.00 25.00 25.00 32.40 18.90 24.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.02 0.71 -4.56 2 12 2012-10-03 01:04:38 2006-08-10 13:53:43 5 3 5 2 11 15 0 147.40 27 40.39 CHANGED lc+Sps.Pl....Ep-sssNh.ssKlsu.APHsVchMsShhhAlN+.Chsps+s..spcP+SupEHhI.-hscphHsphphullRshlcpLcLppVYcIphhs.hD.sG+lu.hshLlAlW+s.............LKSh.psh.tpFt..........DshhS .........p..ph.p..t......................tpphsu...APHVVpluss...sahAVN+hClVpupllpp+.PpssppH..hI.phstc.psp.phslltphlccLpL+.psYcIpIhsthDassphu...hhshLsAIWKShsh...........p....h........h...tp.................h................. 0 1 2 11 +8941 PF09109 Xol-1_GHMP-like Switch protein XOL-1, GHMP-like Sammut SJ anon pdb_1mg7 Domain Members of this family, which are required for the formation of the active site of the sex-determining protein Xol-1, adopt a secondary structure consisting of five alpha helices and seven antiparallel beta sheets, in a beta-alpha-beta-alpha-alpha-alpha-beta-beta-alpha-beta-beta-beta arrangement. The fold of this family is structurally similar to that found in the C-terminal domain of GHMP Kinase [1]. 25.00 25.00 28.00 27.20 20.60 20.00 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.15 0.71 -4.21 2 10 2009-01-15 18:05:59 2006-08-10 13:54:42 5 2 5 2 9 12 0 170.80 24 43.23 CHANGED VthlAE.................aDhVFVpTsLHs.caTPphF.sptpsKh...tFpp.c-pspaPD..hst.MshaScpRVtppshss..l.h.op.uLctl.ppcppl.GFElQQGGhLVsLKKssFhsDc..hIphhuhhlts-pppSlppl.FcLLc.G.tup.hp.s..+hh-.pp+sslplchKsVp ..........................pssphhshhu..................tF.cpaDllFV+TNLH......sScFcPp..h...s+..pps+s..cthpp....c-ssph.scs..lsthMhthScsRhspEs.h..s...hpphEpDCcsAlpphp.ppc.pp...l...cGFEVQQGGILllLKKssFhssc....LLcsIuhuItcpsp..t.ploplSFsLLpPutsu..p...........................h................. 0 1 2 9 +8942 PF09110 HAND HAND Sammut SJ anon pdb_1ofc Domain The HAND domain adopts a secondary structure consisting of four alpha helices, three of which (H2, H3, H4) form an L-like configuration. Helix H2 runs antiparallel to helices H3 and H4, packing closely against helix H4, whilst helix H1 reposes in the concave surface formed by these three helices and runs perpendicular to them. The domain confers DNA and nucleosome binding properties to the protein [1]. 21.00 21.00 21.20 21.20 20.60 20.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.36 0.71 -3.68 29 385 2009-01-15 18:05:59 2006-08-10 14:51:15 6 11 236 5 260 400 3 101.60 46 10.01 CHANGED SlDsYYKDlLpsGspssp.......spsPRsPKphslpDaQFaPspLhcL.E+Ephaa+KplsYKsshp-sss............................cs.p-+cpcpchEQccI-NApPLTEEEpp.KpcLhpEG .....................ulDsYa+-AL+...supsps...........PKsPRsPKQsslpDFQFFP.PRLhELhEKE..hhaa+KplGYKVsh.s.s.......................................phspttttpc.EQpcIDpApPLT-EE.pEK-cLlspG.............................................................................................................. 0 70 132 206 +8943 PF09111 SLIDE SLIDE Sammut SJ anon pdb_1ofc Domain The SLIDE domain adopts a secondary structure comprising a main core of three alpha-helices. It has a role in DNA binding, contacting DNA target sites similar to c-Myb (Pfam:PF00249) repeats or homeodomains [1]. 20.80 20.80 21.00 24.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.29 0.71 -4.37 36 454 2009-01-15 18:05:59 2006-08-10 14:52:14 5 18 282 5 306 466 4 115.10 47 11.41 CHANGED a-KhlppIEpGEcKhp+hppppchLcpKlpphcsPhp-Lplp....Ys..ssp++sYo--EDRFlLshlt+hGh..t.sha-cl+ppI+psPhF+FDWFhpSRTspEluRRssTLlphlp+Ehp .........-KhhtpIEcGEt+lp+ptphc+uLcpKlspY+.sPhppL+Ip..............Yu.....ssp......tKsYoEEEDRFLlsMLaK.hGh.-p-....slY-clRpsIRpuP.FRFDWFlKSRTshELpRRCsTLlshIE+E............ 0 101 169 249 +8944 PF09112 N-glycanase_N PngaseF_N; Peptide-N-glycosidase F, N terminal Sammut SJ anon pdb_1pgs Domain Members of this family adopt an eight-stranded antiparallel beta jelly roll configuration, with the beta strands arranged into two sheets. They are similar in topology to many viral capsid proteins, as well as lectins and several glucanases. The domain allows the protein to bind sugars and catalyses the complete removal of N-linked oligosaccharide chains from glycoproteins [1]. 17.80 17.80 17.90 19.10 17.60 17.50 hmmbuild --amino -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.36 0.71 -4.90 16 80 2009-01-15 18:05:59 2006-08-10 15:15:43 5 4 67 8 27 83 150 166.50 33 34.00 CHANGED slsVF-ctplpFssssh.......tts...th.ttphhLpplpLPs..htcsh.slpLcls.tssG...Ds...WD+oGol..FVlPcssshph.s.htp...........................slELhRFhTPFtluphs....................psVsacpDlosLhPhLpG.cshlshahssWsttG.....ahsSlclch ............................pl.VF-pp.lpFssshh.......ttss.hphssGpllLKKlpLPs..hpcs.h.slplclsltSsG......Dt....WD+oGSs..FVlPcs.sshshhs.htps.tthP.hps.php..h.GllsstpY.sslELMRFhTPFGVupas..........h...lhh.pW.psV.appDlTcLhPlLc.....u.-salGlaIssWs...tcG.....ahsShclch...................................... 5 10 22 26 +8945 PF09113 N-glycanase_C PngaseF_C; Peptide-N-glycosidase F, C terminal Sammut SJ anon pdb_1pgs Domain Members of this family adopt an eight-stranded antiparallel beta jelly roll configuration, with the beta strands arranged into two sheets. They are similar in topology to many viral capsid proteins, as well as lectins and several glucanases. The domain allows the protein to bind sugars and catalyses the complete removal of N-linked oligosaccharide chains from glycoproteins [1]. 25.00 25.00 42.90 37.50 17.10 16.80 hmmbuild --amino -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.19 0.71 -4.49 23 91 2009-01-15 18:05:59 2006-08-10 15:17:00 5 6 75 8 36 95 190 148.10 34 28.22 CHANGED psYssh....................t.shsh...sFslPsss+pscLhsllTGHG.......spssssEFs.psHplhVsGpps....hphhsht.sCAshth.sss....pGsWh...........................huRusWCPGpslpPhpl-lsshhssspthssslshtshhsGsp............uhhhhSuaLl ........................pYsph.hspc.slsh...sFslspss+sscLchlsTGHG.......GasGGDEFst+ppplhlDGppl....hsahPWRp-CuoaRpaNPu..........oGsWh.......................uSSDhSRSNWCPGsslsPphlslssltsGpHohslsIPtut.h-Gsp............saWhlSuhLl............................... 0 18 30 35 +8946 PF09114 MotA_activ Transcription factor MotA, activation domain Sammut SJ anon pdb_1bja Domain Members of this family of viral protein domains are implicated in transcriptional activation. They are almost completely alpha-helical, with five alpha-helices and a short, two-stranded, beta-ribbon. Four alpha helices (alpha1, alpha3, alpha4 and alpha5) are amphipathic and pack their hydrophobic surfaces around the central helix alpha2 [1]. 26.10 26.10 27.10 26.50 25.70 26.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.21 0.72 -4.23 3 22 2012-10-04 14:01:12 2006-08-10 15:54:45 5 2 21 3 0 19 0 93.10 56 44.63 CHANGED SKVTYIIKASNNA.LNEKTAsIhIpIAKKsFITAAEVREuVcsEhNsuVVNSNIGVLIKKGLVEKSGDGLIsTGEApDIIscAA-LaAQENAPELLK ..........SKlTYIIKASpss.LNEKTAuILIpIAKKsFITuuEVRE.lp.-husAVVNSNIGVLIKKGLlEKSGDG.LlhTuEupDIlppAAsLaApENAPEhL........... 0 0 0 0 +8947 PF09115 DNApol3-delta_C DNA polymerase III, delta subunit, C terminal Sammut SJ anon pdb_1a5t Domain Members of this family, which are predominantly found in prokaryotic DNA polymerase III, assume an alpha helical structure, with a core of five alpha helices, and an additional small helix. They are essential for the formation of the polymerase clamp loader [1]. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.43 0.71 -4.07 41 817 2009-01-15 18:05:59 2006-08-11 10:11:52 5 2 811 9 97 453 22 117.10 43 36.28 CHANGED p.t.hppRpphhpshhpshpspsh..lpLlshls..ccps.tpLcWLtsLLhDAlKhphGlsp.hhhNtD.hshlpplupphssptLhpthpplhph+ppLhplsulNpELLLhchLlphpp.h...l ....................................t.s-pWptRcsLCQuL.h..ulp.osDh..huLL..sALN...HE..pAssRLaWLuoLLhDALKt+aGAup..lsNsDhsslVspLAspLSsu+Lpsllscls+hR-QLhsVoG..lNRELLlTDhLL+lEch.............. 0 20 42 70 +8948 PF09116 gp45-slide_C gp45 sliding clamp, C terminal Sammut SJ anon pdb_1b77 Domain Members of this family are essential for the interaction of the gp45 sliding clamp with the corresponding polymerase. They adopt a DNA clamp fold, consisting of two alpha helices and two beta sheets - the fold is duplicated and has internal pseudo two-fold symmetry [1]. 25.00 25.00 25.80 33.30 22.00 24.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.34 0.71 -10.38 0.71 -4.09 9 69 2009-01-15 18:05:59 2006-08-11 11:03:52 5 2 67 21 1 60 670 110.60 33 49.59 CHANGED sVhF-lcA-DhpQLh+sSpslplDslulssc-G..+IVlsuapph.Dus.spspaSlslu.-a-Gss.sFsFllphsNMKhhs...ucYKVhlhu.......chAupFputpss..YllAhEscSoasF .....sVpFc..Lcu-cLppLh+supshplsslslsscsG....+lVlss.....cp.....spspspYSlp.l.G.-h-sss..FsFslphpNhKhls...GcYcVhlss.......ptsupFpup..s..hs..YhlALEsss............ 0 1 1 1 +8949 PF09117 MiAMP1 MiAMP1 Sammut SJ anon pdb_1c01 Domain MiAMP1 is a highly basic protein from the nut kernel of Macadamia integrifolia which inhibits the growth of several microbial plant pathogens in vitro while having no effect on mammalian or plant cells. It consists of eight beta-strands which are arranged in two Greek key motifs. These Greek key motifs then associate to form a Greek key beta-barrel [1]. 21.80 21.80 22.80 22.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.82 0.72 -3.56 2 31 2012-10-01 23:14:22 2006-08-11 11:26:43 5 1 12 1 19 30 0 78.70 46 75.83 CHANGED ShFTsWuGPGCNN+AtRYSKCGCSsItpp..GGY-F.YpGQTAAhYNpssCpGVApTRFuSSs.pACssFGWKShFIQC ..............SYFssWuGPGCNN.....chtRYS...sCGCoNluss.H.GGYcFsYQ.GQTAusYNsssCpGVspTRFS....s....os.QsCu..s.FGWpShFIQC............................................................... 0 0 16 19 +8950 PF09118 DUF1929 Domain of unknown function (DUF1929) Sammut SJ anon pdb_1k3i Domain Members of this family adopt a secondary structure consisting of a bundle of seven, mostly antiparallel, beta-strands surrounding a hydrophobic core. The 7 strands are arranged in 2 sheets, in a Greek-key topology. Their precise function, has not, as yet, been defined, though they are mostly found in sugar-utilising enzymes, such as galactose oxidase [1]. 25.00 25.00 27.80 27.60 23.30 23.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.47 0.72 -3.90 68 596 2009-01-15 18:05:59 2006-08-11 11:52:05 6 64 224 13 381 626 15 99.90 29 14.34 CHANGED RPslss.s.....s........ssl....saGsshslss..........ss......shspssLl+sussTHuhshsQRhl.Lsh........tsst....ssp.hssshP..sss..s.lssPGaYMlFlls.sG....lPShuphVpl ........................................RPslss..s..s..........ssl..shG.s.s.hsls.hp...................ss..........shhpssLlpsuhsTHohsh.sQRhl.Lsh............................ssss......sts..hslshP..sss...slsPPGaYMLFlls.sG......lPS.h.u.phVpl........................ 0 152 263 339 +8951 PF09119 SicP-binding SicP binding Sammut SJ anon pdb_1jyo Domain Members of this family bind the chaperone SicP, which is required both to maintain the stability of SptP, as well as to ensure the eventual secretion of the protein. The domain is found in the Salmonella effector protein SptP, which interacts with SicP chaperone dimers mainly through four regions of its chaperone-binding domain. The structure of the SptP-SicP complex contains four molecules of SicP, aligned in a linear fashion and arranged in two sets of tightly bound homodimers that bind two SptP molecules. The SicP homodimers do not interact with each other, but are held together by a molecular interface formed between two SptP molecules. Each SptP molecule is wrapped around by three SicP chaperones (two chaperones from one homodimer and a third one from the opposite homodimer pair) [1]. 25.00 25.00 25.50 38.20 24.20 19.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.48 0.72 -4.28 3 136 2009-09-10 17:21:18 2006-08-11 13:28:16 5 3 135 2 4 70 0 81.50 68 14.40 CHANGED A+EGFKEKLLoaLSHlPLLKNT-AVQ+YsEslRl-N++lLpVFLpALocRYGc-AApDAlDhuclssssPLTQRpl...VQITE ...APEKFSSKVLTWLGKMPLFKNTEVVQKHTENI+sQDQKILQTFLpALTEKYGETAVNsALLMSRIN.MN.KPLTQRLA...VQITE.. 0 1 1 2 +8953 PF09121 Tower Tower Sammut SJ anon pdb_1mje Domain Members of this family adopt a secondary structure consisting of a pair of long, antiparallel alpha-helices (the stem) that support a three-helix bundle (3HB) at their end. The 3HB contains a helix-turn-helix motif and is similar to the DNA binding domains of the bacterial site-specific recombinases, and of eukaryotic Myb and homeodomain transcription factors. The Tower domain has an important role in the tumour suppressor function of BRCA2, and is essential for appropriate binding of BRCA2 to DNA [1]. 25.00 25.00 25.10 48.10 24.80 23.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -8.14 0.72 -4.02 9 69 2009-01-15 18:05:59 2006-08-11 14:43:34 5 13 38 3 30 70 0 41.90 65 1.52 CHANGED MEKpssGhhlFRNpRtEE+EAt+aupsQQKKLEsLFoKIQsE MEKTsoGhYIFRNERtEEKEAsKaAEsQQKKLEALFsKIQsE. 0 4 6 13 +8954 PF09122 DUF1930 Domain of unknown function (DUF1930) Sammut SJ anon pdb_1okg Domain Members of this family are found in 3-mercaptopyruvate sulfurtransferase, and have no known function. They adopt a structure consisting of a four-stranded antiparallel beta-sheet and an alpha-helix, arranged in a beta(2)-alpha-beta(2) fashion, and bearing a remarkable structural similarity to the FK506-binding protein class of peptidylprolyl cis/trans-isomerase [1]. 20.10 20.10 21.00 90.50 19.40 19.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.25 0.72 -4.20 3 12 2012-10-02 13:30:10 2006-08-11 15:37:18 5 1 12 1 3 13 0 68.20 59 18.13 CHANGED MLhpMhSPSLGDNPKAsL.DssTLlVDGslVspPDAELpSAlsHLHlGE+ApVaFKSpRVVVIEV.PtlP MhhpM.oPsLGDNPKAsL.DshTLhVDGssscpPDAElpSAhsHLHhGEtApVaFKStRVVsIEV.Phls.... 0 1 2 3 +8955 PF09123 DUF1931 Domain of unknown function (DUF1931) Sammut SJ anon pdb_1r4v Domain Members of this family, which are found in a set of hypothetical bacterial proteins, contain a core of six alpha-helices, where one central helix is surrounded by the other five. The exact function of this family has not, as yet, been determined [1]. 28.20 28.20 28.20 80.60 25.60 28.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.46 0.71 -4.22 14 60 2009-01-15 18:05:59 2006-08-11 15:53:48 6 1 55 10 36 63 1 137.20 42 90.89 CHANGED h-+lFRpsAuLDlc..Ks-hcRhsDhVpcKlYDLLlsuptsAptNsRDlIc.hDLPITKGLpEoIcpF+pl..-c-lELcsIL-hLA...........shPPLDhslupEscscLP-IsGGLslslARshKhlcP-lKs..PpsEchEcspplhD .F-+lFRpAAGLDVD..KsDlKRhsDhVccKlYDLlllAptsAKuNsRDlIc.hDLPITKGLpESl+pF+pl..DcclELcPlL-tLs...........shPPLDlslu--scscLP.IsGuLsVAlARshKtlcP-lKN..PpspHaEcApclhc.... 0 15 25 33 +8956 PF09124 Endonuc-dimeris T4 recombination endonuclease VII, dimerisation Sammut SJ anon pdb_1e7l Domain Members of this family, which are predominantly found in Bacteriophage T4 recombination endonuclease VII, adopt a helical secondary structure, with three alpha helices oriented parallel to each other. They mediate dimerisation of the protein, as well as binding to the DNA major groove [1]. 21.60 21.60 21.60 21.60 21.50 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.64 0.72 -4.34 8 37 2012-10-03 03:04:30 2006-08-14 09:14:20 5 2 35 10 1 30 0 53.20 48 34.94 CHANGED IHPpalsDpsKpFSRLs+sEMhAEMpucGF-Ys-sDsKspLscpF+KQhhKul+ .....IHPpalsDKsKcFSRLsKpEMhAEM.ppGF-YN-uDTKspLltsFKKQl+KulK....... 0 1 1 1 +8957 PF09125 COX2-transmemb Cytochrome C oxidase subunit II, transmembrane Sammut SJ anon pdb_1ehk Domain Members of this family adopt a tertiary structure consisting of two antiparallel transmembrane helices, in a transmembrane helix hairpin fold [1]. 25.00 25.00 25.10 25.10 22.40 21.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.04 0.72 -4.41 2 13 2009-09-11 15:01:49 2006-08-14 09:34:59 5 1 13 26 6 15 2 37.60 62 22.94 CHANGED DEHKAHKAILAYEKGWLAFSLAMLFVFIALIAYTLATH .....DEHKAHKAILAYEKGWLsFuLAMllVFIALIAYTLATH... 0 1 4 6 +8958 PF09126 NaeI Restriction endonuclease NaeI Sammut SJ anon pdb_1ev7 Domain Members of this family adopt a secondary structure consisting of nine alpha-helices, six 3-10 helices and 13 beta-strands. They bind two GCC-CGG recognition sequences to cleave DNA into blunt-ended products [1]. 25.00 25.00 101.80 101.60 18.90 17.30 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.58 0.70 -5.77 6 31 2012-10-11 20:44:45 2006-08-14 09:51:55 5 1 28 4 10 34 1 279.50 35 60.68 CHANGED Dt-LLhshts.hoscssGch.....huuhLRcCIDsVl.s.+TGRhsa--L-KTEKTaIGTcVEIELRAhhphscG.chDh.......Ihs.sVDlKhoM.GuNWMlPsEulDs.lCLLVtADEt+ARhhlGLlhsRPsaLTpup..N+DuKpslospG.huslLWLhpDH.PhPtNhhhplsscshspIFAspo......GssRhAcLFRclQtcPIsRsVlcAVApQcDFMRRlRust..GsRslLccEGILlLuGp.hDspLhpALsLPsssuupalSsRlhhtc.tch.............st.suD.llpLs .......................hhht..hphcssGph.....hutslRcolDpllDsp+TGRasa..cpLpKTEKTalGThlEIpLpcpFt..h..sc.G......phDh.......ItGh-VDsKaohp...ttsWMIP.Euh............sp....hsLlltAD-ppupassGLl+scs-hL......st.ut........NRDuKpslo.....spu.+pt.lhWLacct..Ph.tNhLLpLst.pshppIh.u.spS.......GppRlspLFRplppp.lsRslVtsV.A.pQcDaM+RlR.su..GuRshLp.EGIllLusp...pctplApsLsLPhsstuEalSsRls.tp.tct.t.ts.hts..WhhA..s-......s......................... 0 0 5 9 +8959 PF09127 Leuk-A4-hydro_C Leukotriene A4 hydrolase, C-terminal Sammut SJ anon pdb_1hs6 Domain Members of this family adopt a structure consisting of two layers of parallel alpha-helices, five in the inner layer and four in the outer, arranged in an antiparallel manner, with perpendicular loops containing short helical segments on top. They are required for the formation of a deep cleft harbouring the catalytic Zn2+ site in Leukotriene A4 hydrolase [1]. 21.20 21.20 21.20 21.40 20.90 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.70 0.71 -4.59 37 628 2012-10-11 20:01:01 2006-08-14 10:36:11 6 10 372 47 367 640 57 142.00 25 23.77 CHANGED oLscsshsLup.cWhst..t.......stphsstDlpsasspQhlhFLspL............hptps...ls.pp..lptLscsY..plssSpNuElth+ahplsl+....uphp..shhsphtcaL..sphGRMKFlRPlY+tLsp.....hs+phAlcsFpct+stYHPlspthVpKDL ...........................................................hhp.s.tLup.tWhpt..t.................ttshsstsh.psasspQhlhFLspL.........................h..tps.....Ls.pp..lppL..sc.sY..plssopN.u.El.phRW..hplslc....sc....ap....ssh..tstcaL.......pp..GRhKastPLYcsLhp....ttscshAhcsFtps+sthHPlstthlpc.L...................................... 0 113 181 274 +8960 PF09128 RGS-like Regulator of G protein signalling-like domain Sammut SJ anon pdb_1htj Domain Members of this family adopt a structure consisting of twelve helices that fold into a compact domain that contains the overall structural scaffold observed in other RGS proteins and three additional helical elements that pack closely to it. Helices 1-9 comprise the RGS (Pfam:PF00615) fold, in which helices 4-7 form a classic antiparallel bundle adjacent to the other helices. Like other RGS structures, helices 7 and 8 span the length of the folded domain and form essentially one continuous helix with a kink in the middle. Helices 10-12 form an apparently stable C-terminal extension of the structural domain, and although other RGS proteins lack this structure, these elements are intimately associated with the rest of the structural framework by hydrophobic interactions. Members of the family bind to active G-alpha proteins, promoting GTP hydrolysis by the alpha subunit of heterotrimeric G proteins, thereby inactivating the G protein and rapidly switching off G protein-coupled receptor signalling pathways [1]. 21.10 21.10 21.30 21.10 20.80 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.03 0.71 -4.80 9 251 2012-10-03 22:10:09 2006-08-14 11:04:46 6 13 79 9 111 223 0 172.50 40 14.50 CHANGED psusFQsl-.L....KSRPAHLAVFL+aVloQh.DPusLLsYLhu.DhYpp..ssuKEsR+hhh-hashFL-+sA.sL+lsls-plsh-l..........................ptp+scLhsp-htRthlppspppshs-lpcpLpD.............FRpKRoMGLs.hpuEhspL-t.httDhs....+ERpsA.Eplls+l...t-lL.stpshE..E-+osshpaslhTYM++lGV+ ........................................su.FQslEhL....KpRPAHLuVFLpaVhSQh..DPuPLL..........hYLhu.-hYpp......ss..sK-....sR+hhh-haphFL-+sA..sL+V.p.l.P-..plsh-l..........................-h.h.cs-.Lhsp-hhRphl.pphpppshs-lpcpLcD.............F..Rp..KRohGLssh.uEhstL-..hstDph......+ERpsA...Ep.l.l.sp..l..........t-hl...p....p..s.E.......E.-+Ssshthsl.hYMpHhGl+.................................................................. 0 25 34 64 +8961 PF09129 Chol_subst-bind Cholesterol oxidase, substrate-binding Sammut SJ anon pdb_1i19 Domain The substrate-binding domain found in Cholesterol oxidase is composed of an eight-stranded mixed beta-pleated sheet and six alpha-helices. This domain is positioned over the isoalloxazine ring system of the FAD cofactor bound by FAD_binding_4 (PF:PF01565) and forms the roof of the active site cavity, allowing for catalysis of oxidation and isomerisation of cholesterol to cholest-4-en-3-one [1]. 25.00 25.00 27.90 27.90 19.30 16.30 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.89 0.70 -5.30 3 72 2012-10-02 00:48:38 2006-08-14 11:36:47 6 3 62 4 14 78 1 295.20 57 53.89 CHANGED RhRCQSYVDIPASELFAPAGScGRTFESFVp+SGRAEAIWFPFTDKPWLKVWTVoPT+PsG...................ARsVsGPYNYPFSDNIPcslSDLluAIN.sGcPpLAPLFGKsQY-IThLGLAsThG..sDLWGWSKDVLaYIKPTTLRVTANGYAVLTRRcNVQRVINEFsttY+pRIAsYRAsG+YPlNGPVEIRVoGLDQPADVtVPGAsPPSLSAIRPRPDHPDWDsAIWLDILolPGTPsANcFYREhEQWMhSHYSGDYATlRPEWSKGWGYuPsAAWoDsslVssplssoaRpGLsss-NWDoAlRpLNchDPHRVFSSPLLDRLMP ........................RhRCpSassIPAsELFA.AsGo..s.G....RTh-SFlp+SGRsEAIWFPFT-pPWLKVWTssPs+P.s...................uRsVstPYNYPFSDsIPctloDLlupIs.sG.pstLsPLhGphQYslshsGLshThs.........hDLWGWS+slLhYl+PTTLRVTANGYAVLTRRtslQRVIsEFsthYpphlstY+AtGcYPhNGPlEIRVoGLDpPu-sh.sGAhsPoLSAlRPRPD..+PE..WDsAIWhDlLolPGTPtAstFYREhEpWhhsp..aoGsYAolRsEWSKGWGYss.sAAWsDsshlsphlsp.hRpG.lsusssWDsAhtpLschDP+RlFSSPLLDRLhs................ 0 4 11 13 +8962 PF09130 DUF1932 Domain of unknown function (DUF1932) Sammut SJ anon pdb_1i36 Domain This domain is found in a set of hypothetical prokaryotic proteins. Its exact function has not, as yet, been described. 25.00 25.00 25.00 25.10 24.60 24.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.30 0.72 -4.24 45 239 2009-01-15 18:05:59 2006-08-14 11:43:05 6 8 206 4 130 266 129 73.40 27 24.62 CHANGED AcphGVp-pllsoLspohPuhsh..ppsshhlscshtHuhRRstEMcElucTlpssGl.........sst...hspusAshhpphuc ..............AcphGVt-tllssLspoh.Puhs...phsshhlspss.HuhRRspEMc.Elucslp-sGl.........................ssh....hscusAphhptls......... 0 32 77 108 +8963 PF09131 Endotoxin_mid Bacillus thuringiensis delta-Endotoxin, middle domain Sammut SJ anon pdb_1i5p Domain Members of this family adopt a structure consisting of three four-stranded beta-sheets, each with a Greek key fold, with internal pseudo threefold symmetry. Thus they act as a receptor binding beta-prism, binding to insect-specific receptors of gut epithelial cells [1]. 21.20 21.20 21.70 21.50 20.00 21.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.39 0.71 -4.86 3 68 2009-01-15 18:05:59 2006-08-14 13:04:28 5 5 9 1 0 63 0 192.60 70 32.83 CHANGED LLVSSGANLYASGSGPQQTQSFTAQNWPFLYSLFQVNSNYVLSGlSGARLoITFPNIGGLPGSTTTQoLpuARVNYSGGVSSGpIGusNLNQNFNCSTlhPPLSTPFVRSWLDSGTDREGVATSTNWQTESFETTLuLRCGAFSARGNSNYFPDYFIRNISGVsLVIRNEDLsRPLYYNEIRNIESPSGTPGGARAYLVSVHNRKN ....................LLVSSGANLYASGSGPQQTQSFTuQsWPFLYSLFQVNSNYVLsGhSGARLohTFPNIsGLPGSTTTpuLhuARVNYSGG.........l.SSGpIG....u....osh.....NQ............N.................FN...C.STlhPPL.TPFVRSWLD...SGoDREGVATsTNWQTESFETTLuLRsGAFoAR.GNS.NYFPDYFIRNISGVsLVlR.NEDLpRPLHYNEIRNItSPSGTPGGARAYhVSVHNRKN....... 0 0 0 0 +8964 PF09132 BmKX BmKX Sammut SJ anon pdb_1rji Domain Members of this family assume a structure adopted by most short-chain scorpion toxins, consisting of a cysteine-stabilised alpha/beta scaffold consisting of a short 3-10-helix and a two-stranded antiparallel beta-sheet. They are predominantly found in short-chain scorpion toxins, and their biological method of action has not, as yet, been defined [1]. 20.70 20.70 24.70 29.20 20.00 19.70 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -7.50 0.72 -4.08 2 14 2009-01-15 18:05:59 2006-08-14 14:09:14 5 4 4 2 5 15 0 29.90 55 20.45 CHANGED PhsspCKscpDssMCshGhSsKsGhCpuCT PVsGECKscsDAsMCTsGsssKs.GTCTuCT... 1 5 5 5 +8965 PF09133 SANTA SANTA (SANT Associated) Zhang D, Mistry J anon Zhang Domain The SANTA domain (SANT Associated domain) is approximately 90 amino acids in length and is conserved in Eukaryota. It is sometimes found in association with the SANT domain (Pfam:PF00249, also known as Myb-like DNA-binding domain) implying a putative function in regulating chromatin remodelling [1]. Sequence analysis has showed that the SANTA domain is likely to form four central beta-sheets with three flanking alpha- helixes [1]. Many conserved hydrophobic residues are present which implying a possible role in protein-protein interactions [1]. 20.90 20.90 21.10 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.21 0.72 -4.03 7 120 2009-01-15 18:05:59 2006-08-14 14:20:17 5 5 73 0 79 112 0 91.90 32 12.64 CHANGED lpLpDWhlKphsps.....ltlcGh.cspps...hhasSshIscRhpsshLcs.sGhhhsLhGhl..............sppph.csGhs.clhccFhhGFP..Wcchh.st. .........................................lpLp-Whlc.hhss.s................tlsVpGh..tsp...ss...........hha+...S.....ssIlcRhppspLcThsGplYhLpGhl..............sp.phccs..G..aPstl.......hc+Fhh......G....FPcpWcchlpp..h......... 0 21 35 58 +8966 PF09134 Invasin_D3 Invasin, domain 3 Sammut SJ anon pdb_1cwv Domain Members of this family adopt a structure consisting of an immunoglobulin-like beta-sandwich, with seven strands in two beta-sheets, arranged in a Greek-key topology. It forms part of the extracellular region of the protein, which can be expressed as a soluble protein (Inv497) that binds integrins and promotes subsequent uptake by cells when attached to bacteria [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.30 0.72 -4.14 2 269 2012-10-03 16:25:20 2006-08-14 14:36:03 5 18 254 1 6 214 8 102.50 72 8.12 CHANGED PDsspSSFsVSssDIlADGoMpShLoFVPhsKNscFlSGhpsLpFhQsGVPVoISPlTEpsDsYTAoVVGNosGDVsITPQVss..LshLQK+IoLaPl .....M.DVANS..TLSANEPSGDVVADGQQAYTLTLT..AVDSEGNPVTGEAS.....R...L.RF...V..PQ....D.T.......N....G....VTV...G..A...I...S...E.IK..P...G..V...Y.SA...T.VSSTRAGNVVVRAFSEQYQLGTLQQTL.....KFVA............ 0 0 2 2 +8967 PF09135 Alb1 Alb1 Mistry J, Wood V anon manual Family Alb1 is a nuclear shuttling factor involved in ribosome biogenesis [1]. 20.40 20.40 23.00 23.00 18.00 18.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.79 0.72 -3.45 48 156 2009-01-15 18:05:59 2006-08-14 14:58:35 6 1 117 0 115 151 0 110.10 28 63.64 CHANGED h+SRAARRtss.shs.h-+.......p.lpsls....sccsshp+stlhsspp.......suslsccp..t...+spc.....ls.+tpp..........hpppsh-+AthlhcpLspKhsKSlsR.uKhlpp..R+ssW-phNpphppp .............................hpSRAARptspsshs.h-+.......p.h.p.sls....sccsshp+stlhsst+.............suulsccp.......+spp.......ls..ppppp...............tpccuhc+AphshcpLspKlsKSlsR.sKhlpp..R+tsW-phNpph...t............ 0 16 54 96 +8968 PF09136 Glucodextran_B Glucodextranase, domain B Sammut SJ anon pdb_1ug9 Domain Members of this family adopt a structure consisting of seven/eight-strand antiparallel beta-sheets, in a Greek-key topology, similar to the immunoglobulin beta-sandwich fold. They act as cell wall anchors, where they interact with the S-layer present in the cell wall of Gram-positive bacteria by hydrophobic interactions. In glucodextranase, Domain B is buried in the S-layer, and a flexible linker located between domain B and the catalytic unit confers motion to the catalytic unit, which is capable of efficient hydrolysis of the substrates located close to the cell surface [1]. 29.80 29.80 30.50 31.50 29.70 29.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.24 0.72 -3.82 2 5 2009-01-15 18:05:59 2006-08-14 16:35:04 5 3 4 2 0 7 0 74.60 39 8.58 CHANGED PtLolsuPttLoTADSAsssVpGTTsAAKVYVSVNGshhEAPlT...DG..TFSlDlALsusKNpVTVAAVuuDGGTAVEsRTVLaYGS ...PuLTVpu.sshSsssStTspVSGTTNAuKVhssVNGptTEhPVo...pG..oausDLsLstscN+VTlsAsGucGuoAopcRTlhtYG....... 0 0 0 0 +8969 PF09137 Glucodextran_N Glucodextranase, domain N Sammut SJ anon pdb_1ug9 Domain Members of this family, which are uniquely found in bacterial and archaeal glucoamylases and glucodextranases, adopt a structure consisting of 17 antiparallel beta-strands. These beta-strands are divided into two beta-sheets, and one of the beta-sheets is wrapped by an extended polypeptide, which appears to stabilise the domain. Members of this family are mainly concerned with catalytic activity, hydrolysing alpha-1,6-glucosidic linkages of dextran to release beta-D-glucose from the non-reducing end via an inverting reaction mechanism [1]. 19.50 19.50 19.70 20.50 18.00 19.20 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.66 0.70 -5.16 14 171 2009-01-15 18:05:59 2006-08-14 16:38:33 6 8 150 6 82 181 17 265.40 31 32.85 CHANGED AsGuPGtsssWu.usKpGVGTu..............ssssSKVWFTlucGlloEVYYPpIDpAph+-LpFlVsc.GpsFhs-EpcDTtScl-hhs......stohuY+lsssDppGRYpIpKcIFTDPcRsullh+VpFpALcG...tDaplYlLhcPHlsNsGusssuals.cuput.sLhAptssshhALsuShsats......sSsGaVGsSDGhoDLt...tssphshpassAsp...GNlu.sucIsL.stspT..pFslsLGFGpotpEAspsAtuoLpsuasphhppY.s.....sWccYhsSL ...................................................A.GuPGtss..pWs..usKsGlGTu..................hsssS+VWFTl.ucG.lloElaYPplDpspl+-.lpFlV..ss...GpsFh.........spE+css....p....pplchhs.............sts.uac.ls...sp.s....tp...G+...aplpKclhoDPp+ssllh+lpFpu.hps.......shphYhlhsP+lsNsG.......t.......s..............spuhs.....p.....h...........tut.t...s..........Lh.A.pp............s..................sthhuLtu......sh.....s.....att.....................sSsGal.G.s.......SDGhpDLt......pstph...sh.p..aspAss............GNVuhsuplsh.......tsps......phslsLGFGpotp..p..A..hssAtuoLsp.u..apshhppYts.....sWcsahtsL............................................. 0 29 53 71 +8970 PF09138 Urm1 Urm1 (Ubiquitin related modifier) Mistry J, Wood V anon Pfam-B_16507 (release 20.0) Family Urm1 is a ubiquitin related protein that modifies proteins in the yeast ubiquitin-like pathway urmylation [1]. Structural comparisons and phylogenetic analysis of the ubiquitin superfamily has indicated that Urm1 has the most conserved structural and sequence features of the common ancestor of the entire superfamily [2]. 21.00 21.00 21.00 21.30 20.90 20.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.19 0.72 -3.87 29 304 2012-10-03 10:59:06 2006-08-14 17:07:09 6 2 274 6 211 350 18 94.70 42 89.71 CHANGED clplEFhGGLElLhs.sp+paplsls.......sppthshpsLltalcpNlIp-.Rs.-lF..........................l.p.ssolRPGILVLINDsDWELhGph-YhLc-sDslsFlSTLHGG .........................................lplEFsGGhEhLFs..s.+pHplsls.....................t.tp.hslppLlpalpcNllp-.....Rs..-LF..................................................................lp...ssoVRP.GILVLINDsDWE.L...G..c.-Y.Lpss.DslhFlSTLHGG........................ 1 72 114 175 +8971 PF09139 Mmp37 Mitochondrial matrix Mmp37 Mistry J, Wood V anon Pfam-B_15301 (release 20.0) Family MMp37 is a mitochondrial matrix protein that functions in the translocation of proteins across the mitochondrial inner membrane [1]. It has been shown that MMP37 proteins possess the NTase fold but they have only one active site carboxylate and thus probably are not able to carry out enzymatic reaction. These potentially non-active members of NTase fold superfamily may bind ATP, hydrolysis of which is necessary for the translocation of proteins through the membrane [2]. 21.10 21.10 21.80 21.80 19.20 19.00 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.34 0.70 -5.41 24 355 2012-10-02 22:47:23 2006-08-14 17:14:10 6 8 272 0 247 347 4 275.80 34 80.49 CHANGED tL+pllspF.ss.lcaAFAYGSGVapQsG................t........tt.pttMlDhIFuVscspcaHohNLcQN.cHYSs..L+hhGschloplQsphGAGVYFNsalslN.....GphIKYGVVShcsLhcDLtpWcoLYlAGRLpKPV..clLp.c...csplchssphNL+SAlpsALLLL.......PppFoEhpLYppIuGLSYhGDhRM.lsG.EsPpKVpNIVpsphsp..F+cLYtPllps....................h.phsh.s..t..............phtpDhss.sphshlttLPp.shppplhhphpp.h..........................ptppst.h.hplutDs..ph.tpslppslpphlphsShsQolKGlhTAGlt+SlpYuhtKhpK .............................h.hptlh.pF....lph..sFuYGSGVh.Qts...............................................ttpttMlDhlhsV....sps...pWHshNlppp.pHYSh......l.t..hh..G....st.hlsp.....lQp...h.G..AG.VYaNs.hl.hs..........uphl..KYGVlshcsLhpDLhpWpsLYlAGRLpKPV...cl..l.p...s...............ssplp....hs...phNLhuAlps....AlLhL....................................PtpF..o....E.pLatpIsuLSYh.GDhR..M...lhu.Es.pKVpNIVp..sp..htp..FcpLYt.sllpp.........................h.ph.h.........................thhh.t..D.hs.s.tphp.l.tLPt.phpppl.h.ht.........................................................................t.h.......p..l..u..tp....ph..tp.lppslpphlhhsShhQohKGlhoAGhh+ohtY..tKh.K...................................................................... 0 80 139 203 +8972 PF09140 MipZ ATPase MipZ Mistry J, Thanbichler M anon Pfam-B_23525 (release 20.0) Family MipZ is an ATPase that forms a complex with the chromosome partitioning protein ParB near the chromosomal origin of replication [1]. It is responsible for the temporal and spatial regulation of FtsZ ring formation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.76 0.70 -5.31 12 431 2012-10-05 12:31:09 2006-08-14 17:22:01 6 7 400 5 138 13620 5467 180.40 27 57.60 CHANGED HlIVlGNEKGGoGKSTsulHlAlALhttGh+VusIDLDhRQ+ohsRYhcNRsthtc+p.GlsLPhPpah........pl.cs-ssp..phpthhschptstDFIllDTPGscohLuRhAHohADTLVTPlNDSFVDFDllupVDPcThclptPShYuEhVW-uRptRAps....sttshDWlVlRNRLuthpuRN+p+lspuLp-LS+RlGFRlhsGhuERVIYRELFPpGLTLL.Dlpchsh..chshSHlsARQELRsLltuLsLPh .............................................................................llsV..u..s.tKGGsGKST.lo..h....p...........L.A..l.....A....L...s.......+.....h.....G.......h....+.....VG.l........l.D...............sD...............l..............h.................t.................................S..................l..................s..................p................h....................h..............t...................s...................+..................t...................h....................h.....................................t...................p....................p....................................h......................................l............................................h....................................t............h....................h..........................................l........................................................................................................................h.........................................................t.............................................h.......................................................................h............p......t.....h...h......t.......p.........h..........t.............p.......h....D......a..lllD..h.P...s......................................................................................................................................................................................................................................................................................................................................................th............................................................................................................................................................................ 0 46 90 114 +8973 PF09141 Talin_middle Talin, middle domain Sammut SJ anon pdb_1sj7 Domain Members of this family adopt a structure consisting of five alpha helices that fold into a bundle. They contain a Vinculin binding site (VBS) composed of a hydrophobic surface spanning five turns of helix four. Activation of the VBS causes subsequent recruitment of Vinculin, which enables maturation of small integrin/talin complexes into more stable adhesions. Formation of the complex between VBS and Vinculin requires prior unfolding of this middle domain: once released from the talin hydrophobic core, the VBS helix is then available to induce the 'bundle conversion' conformational change within the vinculin head domain thereby displacing the intramolecular interaction with the vinculin tail, allowing vinculin to bind actin [1]. 25.00 25.00 35.30 25.20 24.90 23.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.71 0.71 -4.56 6 194 2009-09-11 14:56:09 2006-08-15 09:39:34 5 31 87 6 108 161 0 157.90 59 6.87 CHANGED sQpALhGoIsuuhcAVppA-c-Lcshss..lPsLGsDhuShpW+cNplDsSKpsVsS+lAAhoAuTApVVphTAu-PsDsDasAVGsAVosIouNLsElSKsV+LlAALM-c-s.sGccLL-AARsLsuAFSDLLKuApPpS...cEPRQsLLsAAGpVGpuSucLL .................................AQQALhGTINoSMpAVQtAQssLs....-h-s...LP.PLGpDhAS+...sWhpNKhDESKHEIHSQVDAITAGTASVV...........NLTA..G.DP......s-TDYTAVGCAlTTISSNLTEMSKGVKLLAAL..M-D-s...GsGcsLLpAA+sLAGAVSDLL+u.spPsS.........................uEPRQslLsAAGslGQASG-LL...................... 0 33 42 70 +8974 PF09142 TruB_C tRNA Pseudouridine synthase II, C terminal Sammut SJ anon pdb_1sgv Domain The C terminal domain of tRNA Pseudouridine synthase II adopts a PUA (Pfam:PF01472) fold, with a four-stranded mixed beta-sheet flanked by one alpha-helix on each side. It allows for binding of the enzyme to RNA, as well as stabilisation of the RNA molecule [1]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.31 0.72 -4.32 34 629 2012-10-02 17:37:24 2006-08-15 10:01:30 6 7 616 2 142 459 81 55.70 33 18.30 CHANGED FsthclossEApslsaG+tLsss....uhsGshAAhsPDGcllALl...c-putcs+slsVht ......................................hshhplTscEAstlphGRhlphs....................shst.h.hA.Ahs.s.cG+......llAll........cc..cGsp....h+sltVh................ 0 43 99 129 +8975 PF09143 AvrPphF-ORF-2 AvrPphF-ORF-2 Sammut SJ anon pdb_1s21 Domain Members of this family of plant pathogenic proteins adopt an elongated structure somewhat reminiscent of a mushroom that can be divided into 'stalk' and 'head' subdomains. The stalk subdomain is composed of the N-terminal helix (alpha1) and beta strands beta3-beta4. An antiparallel beta sheet (beta5, beta7-beta8) forms the base of the head subdomain that interacts with the stalk. A pair of twisted antiparallel beta sheets (beta1 and beta6; beta2 and beta9/9') supported by alpha2 form the dome of the head. The head subdomain possesses weak structural similarity with the catalytic portion of a number of ADP-ribosyltransferase toxins [1]. 25.00 25.00 28.70 28.20 24.50 23.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.05 0.71 -4.55 4 50 2009-01-15 18:05:59 2006-08-15 10:31:42 5 1 47 1 4 43 1 156.10 48 71.89 CHANGED SpalGt.TLTSIHQLSsspREpFLssHDPMRshsLss-TslYRTTppRYl....+psKLAGNPpShAhlthHEELp.ss.hAS+..hGuhPcpAc.........AYhP+ph+AsDLssPSLNVMsGstAcsulRuYA+..sDHVsVcMRLGDFL-pGGKVYuDTSuhusGGDpspALIVTLPKGpKVPVcIl ................hstpshul................................F+I++DhVsVRIpsspFsD..hKNcKIpGHpNTVASVhDaYs.QcNu.Lu.s..hGosc+.oAD.........hh+.c++sAhNhhlhphNs.hap......s......us-Nht+SYuK.T-DsshVuholGsLLDKG.uKVYsDTSsulc...LuEPhIhTLPEus+VsV-I............................. 0 0 1 3 +8976 PF09144 YpM Yersinia pseudotuberculosis mitogen Sammut SJ anon pdb_1pm4 Domain Members of this family of Yersinia pseudotuberculosis mitogens adopt a sandwich structure consisting of nine strands in two beta sheets, in a jelly-roll topology. As with other superantigens, they are able to excessively activate T cells by binding to the T cell receptor [1]. 25.00 25.00 205.40 205.20 19.30 18.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -4.14 2 4 2009-01-15 18:05:59 2006-08-15 10:43:22 5 1 2 4 0 5 0 116.80 90 77.45 CHANGED IPNIATYTGTIQGKGEVCIIGNKEGKTRGGELYAVLaSTNVNADMTLILLRNVGGNGWGEIKRNDIDKPLKYEDYYTSGLSWIWKIKNNSSETSNYSLDATVHDDKEDSDVLTKCPV IPNIATYTGTIQGKGEVCIIGNKEGKTRGGELYAVL+STNVNADMTLILLRNVGGNGWGEIKRNDIDKPLKYEDYYTSG.LSWIWKIKNNSSETSNYSLDATVHDDKEDSDVLTKCPV 0 0 0 0 +8977 PF09145 Ubiq-assoc Ubiquitin-associated Sammut SJ anon pdb_1pgy Domain Ubiquitin associated domains contain approximately 40 residues and bind ubiquitin noncovalently. They adopt a secondary structure consisting of three alpha-helices, and have been identified in various modular proteins involved in protein trafficking, clathrin assembly/disassembly, DNA repair, proteasomal degradation, and cell cycle regulation [1]. 20.20 20.20 20.30 56.80 18.60 16.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.22 0.72 -4.10 4 19 2009-01-15 18:05:59 2006-08-15 11:59:10 5 4 19 1 13 20 0 45.50 60 6.90 CHANGED llDEV+DMElA+LMSLGLsI-cAscYY-+GlpYE...-.L+pR+pc lVDEVKDMEIARLMSLGLSI-cAs-aYEpslhYEphl-hlKp+pt..... 0 1 6 12 +8979 PF09147 DUF1933 Domain of unknown function (DUF1933) Sammut SJ anon pdb_1q15 Domain Members of this family are predominantly found in carbapenam synthetase, and are composed of two antiparallel six-stranded beta-sheets that form a sandwich, flanked on each side by two alpha-helices. Their exact function has not, as yet, been determined [1]. 20.50 20.50 20.50 20.60 20.20 19.50 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.24 0.71 -4.89 3 6 2012-10-03 21:14:07 2006-08-15 13:47:08 5 2 6 8 4 26 83 199.20 38 44.24 CHANGED N-FClV+tGhDKDINpLt+-FsGphEtLSNGsLFhcpsT+VQKa+hERGTAYLIGSLYN+ohLRuLAG+aEGcusVlNDAEILhLlpT+LGuuALuLAEGDFCFFIED+NGsLTVITESRGhNPVaLVQucctWITNSLKLVoAlEG-tAhDFccEupVscoul+sDsaoPl+NlQRLKPGTlNVLTFDspcYpalESRpL ...............................spFChl+tuhsKNlsplhpsF.shph-pLSsGpLahpspophpKhcs-+tTAYLI..GoIYN+shLculAG+aEGcu..VlsDsElLhhlhs+LGsuALoLAEGDFCF.FI.E-Kp.G.pLpllT-SpGhssVaLVp...sc.h.hWI.TN.oLKlVu..t.lEG-tAhDFpsEu..pVhpssL..+..sDsaoPl+NspRLKPGolNhLoaDppcY.alEsc.l....................................................... 0 0 2 3 +8980 PF09148 DUF1934 Domain of unknown function (DUF1934) Sammut SJ anon pdb_1r0u Domain Members of this family are found in a set of hypothetical bacterial proteins. Their precise function has not, as yet, been defined. 21.40 21.40 21.50 22.20 21.30 21.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.32 0.71 -4.43 66 1332 2009-09-10 22:25:01 2006-08-15 13:52:04 5 1 1317 1 164 708 2 126.10 26 91.48 CHANGED l......pIphpphh..........tpsc.p-slEhhspGpahpKs.sshYlhYcEp..p.ss...sspsslKl..pcscltlhR...tGssph+hh.FppsccshshYpTPhGphpltspTpplplshs-ps...GplplcYpLh....hspphhuphplpl ..................................hpIphpshl.......p..tssc..pEph-.hhhp...Gphhp.+s.stpYlpYpEp..p.tt....phplslKl....pcpplhlhR...tGs.sphph+.Fhc.sp.c.shs.hYs.TP.hG.h.hpltspTppl..p.h.phpct.........tplpl.....cYpLh....tssphhusYplcl...................... 0 64 110 138 +8981 PF09149 DUF1935 Domain of unknown function (DUF1935) Sammut SJ anon pdb_1r75 Domain Members of this family are found in various bacterial and eukaryotic hypothetical proteins, as well as in the cysteine protease calpain. Their exact function has not, as yet, been defined. 20.90 20.90 20.90 25.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.35 0.72 -4.05 71 300 2009-01-15 18:05:59 2006-08-15 14:02:30 5 8 15 2 76 289 0 104.30 31 23.51 CHANGED ppGp...Ps...hp....sclhpsFcp...........G.........LLFRlVs.p................cppp..WuFYNDTpcaphcVpspFutsSp.l....csLssT..plt.........ppssGp........hhsplsVhPhcTphFl.cGps.sGacs.phpAhs ............................................GtPs.....hp.......sclhpsFcp...........G..........LLaRlls.p......................cppp....WuFYNDTpsaphcVpspFutsSp.l........pslssT..plp...............ppssGp...............hhsplsVhPhcTphFl.cGps.sGapsphpA................. 0 38 56 76 +8982 PF09150 Carot_N Orange carotenoid protein, N-terminal Sammut SJ anon pdb_1m98 Domain Members of this family adopt an alpha-helical structure consisting of two four-helix bundles. They are predominantly found in prokaryotic orange carotenoid protein, and carotenoid binding proteins [1]. 25.00 25.00 64.60 64.60 23.40 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.90 0.71 -4.54 8 95 2009-01-15 18:05:59 2006-08-15 14:34:12 5 2 47 8 43 100 4 155.00 45 65.57 CHANGED ThDsA...Fs.ohtussV.sllspFNpLss-DpLALlWFsYpEMG+oITsAAPGAA..shpLAEuhLsplptho.cE.QhplMpDLss+sDTPloRsYGsaSsNsKLsFWYpLuEhMcQGsVsPlPsGYQLSssANplLEsIKsLDhGQQITlLRshVlsMGaDs ..............s............s..usslssshspFppLos-DQLALlWFuYhEMGcoITsAAPGAA..phphA-shLspI+pMo.pE.QhplMpDLAs+s-...Tsl.oRsYushSsNsKLuFWYpLuchMcpGhVsPlPsuYpLSssAsslLpsIcsL-.uQQITlLRssVs-MGhDs........... 0 5 27 42 +8983 PF09151 DUF1936 Domain of unknown function (DUF1936) Sammut SJ anon pdb_1pvm Domain This domain is found in a set of hypothetical Archaeal proteins. Its exact function has not, as yet, been defined. It possesses a zinc ribbon fold. 25.00 25.00 98.90 98.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.81 0.72 -4.45 2 2 2012-10-03 10:42:43 2006-08-15 15:04:02 5 1 2 4 2 5 1 36.00 89 20.45 CHANGED pHLCPKCGVGVL.PVYspKGEIKVFRCSNPACDYEE pHLCPKCGVGVL.PVYspKGEIKVFRCSNPACDYEE 0 1 1 1 +8984 PF09152 DUF1937 Domain of unknown function (DUF1937) Sammut SJ anon pdb_1t1j Domain This domain is found in a set of hypothetical bacterial proteins. Their exact function has not, as yet, been described. 23.00 23.00 23.00 23.40 22.90 22.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.41 0.71 -3.52 7 46 2012-10-02 19:28:18 2006-08-15 16:31:34 5 1 35 2 10 39 1 111.80 41 80.93 CHANGED RhlaLAsPYS........+.stshlptphhssschAApllcsGhssaS.VohS....HPI.hshs.h.cst.stlWushst.ahchhptllVl-lsGWccSsGI++Elchatstsp.V.laup ..R.laLAsPYS........+sss-hsEtphtssschsAp....llc.sGhsshS.VshS....Hsh.hshh..h.csh.stLWtshst.hhcth.ptllVh-lsGWccSsGIR+ElchhpAtsh.V.lau........ 0 7 7 10 +8985 PF09153 DUF1938 Domain of unknown function (DUF1938) Sammut SJ anon pdb_1mgt Domain Members of this family, which are predominantly found in the archaeal protein O6-alkylguanine-DNA alkyltransferase, adopt a secondary structure consisting of a three stranded antiparallel beta-sheet and three alpha helices. Their exact function has not, as yet, been defined, though it has been postulated that they confer thermostability to the archaeal protein [1]. 22.10 22.10 24.80 94.00 20.50 19.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.65 0.72 -4.17 4 13 2009-01-15 18:05:59 2006-08-16 09:16:18 5 1 13 1 11 16 0 86.00 45 49.32 CHANGED MLoscpF+ltsRtlhIGVlaE-+IQGIsaSlDthEhl+cplscLhsaLpKR.GVplsLcEppScYP-lVFcVLhGKIuNEcuhE..ELS MLSlE+FcIssR-lhIuVla-cK.IQGIoFSLDGtEFLccRIssLspaLc+R.GVsVsLcpccScYP-LVacVLlGclcNE-uLc..ELS. 0 1 1 6 +8986 PF09154 DUF1939 Domain of unknown function (DUF1939) Sammut SJ anon pdb_1mxg Domain Members of this family, which are predominantly found in Archaeal amylase, adopt a secondary structure consisting of an eight-stranded antiparallel beta-sheet containing a Greek key motif. Their exact function has not, as yet, been determined [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.73 0.72 -4.07 9 520 2012-10-02 20:10:03 2006-08-16 09:30:45 5 6 490 22 62 384 5 58.10 47 11.92 CHANGED LlVhI.Npussh+p+hVpT.sWsspslhDYoGNuu...sssossDGWVplpsPsp.......GYulaS ............hu..lhl.sNsp.t.sS.K.pMaVGp..c.a.s.spsFhD..lLGNpp.........spVTI---GaGpFsVuup.........SVSVWs. 0 18 31 47 +8987 PF09155 DUF1940 Domain of unknown function (DUF1940) Sammut SJ anon pdb_1nig Domain Members of this family adopt a secondary structure consisting of six alpha helices, with four long helices (alpha1, alpha2, alpha5, alpha6) form a left-handed, antiparallel alpha helical bundle. The function of this family of Archaeal hypothetical proteins has not, as yet, been defined [1]. 25.00 25.00 252.60 252.50 23.20 18.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.83 0.71 -4.35 3 3 2009-01-15 18:05:59 2006-08-16 09:46:44 5 1 3 1 3 4 4 143.00 47 93.87 CHANGED YCPVIDDpLPlDHVYFKFRSEIEuAEAFLGLAVSEGlKVsETRElLDILDTVYNSLYDcESKLNDFQEKRLNFTEE-WYDIKEKsNNGNKWSLYMFLARSHlDsAVYWlo+M+EDERFK-hVcDEsIstLLKlGhVILREGLG YCPVIDDpLPlDHVYFKFRSEIEuAEAFLGLAVSEGlKVsETRElLDILDTVYNSLYDcESKLNDFQEKRLNFTEE-WYDIKEKsNNGNKWSLYMFLARSHlDsAVYWlo+M+EDERFK-hVcDEsIstLLKlGhVILREGLG 0 1 2 2 +8988 PF09156 Anthrax-tox_M Anthrax toxin lethal factor, middle domain Sammut SJ anon pdb_1j7n Domain Members of this family, which are predominantly found in anthrax toxin lethal factor, adopt a structure consisting of a core of antiparallel beta sheets and alpha helices. They form a long deep groove within the protein that anchors the 16-residue N-terminal tail of MAPKK-2 before cleavage. It has been noted that this domain resembles the ADP-ribosylating toxin from Bacillus cereus, but the active site has been modified to augment substrate recognition [1]. 83.00 83.00 108.40 108.30 82.90 82.90 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.06 0.70 -5.30 2 16 2009-01-15 18:05:59 2006-08-17 09:03:18 5 3 14 17 1 17 0 259.30 97 36.03 CHANGED MLARYEKWEKIKQHYQHWSDSLSEEGRGLLKKLQIPIEPKKDDIIHSLSQEEKELLKRIQIDSSDFLSTEEKEFLKKLQIDIRDSLSEEEKELLNRIQVDSSNPLSEKEKEFLKKLKLDIQPYDINQRLQDTGGLIDSPSINLDVRKQYKRDIQNIDALLHQSIGSTLYNKIYLYENMNINNLTATLGADLVDSTDNTKINRGIFNEFKKNFKYSISSNYMIVDINERPALDNERLKWRIQLSPDTRAGYLENGKLILQRNIGLEIKDVQIIKQSEKEYIRIDAKVV MLARYEKWEKIKQHYQHWSDSLSEEGRGLLKKLQIPIEPKKDDIIHSLSQEEKELLKRIQIDSSDFLSTEEKEFLKKLQIDIRDSLSEEEKELLNRIQVDSSNPLSEKEKEFLKKLKLDIQPYDINQRLQDTGGLIDSPSINLDVRKQYKRDIQNIDALLHQSIGSTLYNKIYLYENMNINNLTATLGADLVDSTDNTKINRGIFNEFKKNFKYSISSNYMIVDINERPALDNERLKWRIQLSPDTRAGYLENGKLILQRNIGLEIKDVQIIKQSEKEYIRIDAKVV... 0 0 1 1 +8989 PF09157 TruB-C_2 Pseudouridine synthase II TruB, C-terminal Sammut SJ anon pdb_1k8w Domain Members of this family adopt a secondary structure consisting of a four-stranded beta sheet and one alpha helix. They are predominantly RNA-binding domains, mostly found in Pseudouridine synthase II TruB [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.78 0.72 -4.11 98 1516 2012-10-02 17:37:24 2006-08-17 09:13:28 6 5 1503 3 305 983 305 58.00 38 18.73 CHANGED PplpLsspputtlhpGQsV...............sssst..pG.hVRla......spstp..FlGlGclstcG...hltP+RLl .........PhVsLs.ss.ushhppGpsV........h................s.ss.sPh..cG..hVRVh........ucss+....FlGlGE.l...sc.-.G......+luP+RLl................. 0 68 161 235 +8990 PF09158 MotCF Bacteriophage T4 MotA, C-terminal Sammut SJ anon pdb_1kaf Domain Members of this family adopt a compact alpha/beta structure comprising three alpha-helices and six beta-strands in the order: alpha1-beta1-beta2-beta3-beta4-alpha2-beta5-beta6-alpha3. The beta-strands form a single anti-parallel beta-sheet and the three alpha-helices pack side-by-side onto one surface of the beta-sheet. In this architecture, the domain's hydrophobic core is at the sheet-helix interface, and the second surface of the beta-sheet is completely exposed. The domain is a DNA-binding motif, with a consensus sequence containing nine base pairs (5'-TTTGCTTTA-3'), that appears to bind to various mot boxes, allowing access to the minor groove towards the 5'-end of this sequence and the major groove towards the 3'-end [1]. 25.00 25.00 57.50 55.90 19.00 19.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.37 0.72 -4.34 3 25 2009-01-15 18:05:59 2006-08-17 09:29:02 5 2 24 6 0 22 0 102.40 43 49.29 CHANGED ITuEMcELt-hlhcLl--N.lsLKcVEIYRSNYplIFuKRT.pGIRpFEIpNNGshRIFGYKMuEc+lctFTslGs-lKIthGGpNTYIDIcpsucNItsVlTlA ....ITs-MEp.tDhhhcLL..--p.hs.lKclpp.RSNallhhpKRT.cGIRpFElpNsGphRIFGYKMtE+clppFTslGhssKlttsG.NsYlDIcpos-NIttlIssA... 1 0 0 0 +8991 PF09159 Ydc2-catalyt Mitochondrial resolvase Ydc2 / RNA splicing MRS1 Sammut SJ anon pdb_1kcf Domain Members of this family adopt a secondary structure consisting of two beta sheets and one alpha helix, arranged as a beta-alpha-beta motif. Each beta sheet has five strands, arranged in a 32145 order, with the second strand being antiparallel to the rest. Mitochondrial resolvase Ydc2 is capable of resolving Holliday junctions and cleaves DNA after 5'-CT-3' and 5'-TT-3' sequences [1]. This family also contains the mitochondrial RNA-splicing protein MRS1 which is involved in the excision of group I introns [2-3]. 20.00 20.00 20.00 20.70 19.80 19.90 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.92 0.70 -4.91 53 128 2012-10-03 01:22:09 2006-08-17 10:06:20 5 6 108 2 83 123 59 264.70 23 72.90 CHANGED I....LSIDhGl+NhAascLps..........................shtssppshP............................pLpsWp....+lsLs.............................................................................pttstppccpp.shsPshhuphAasLlspLl.tshp.....P...shllIERQRhRSsGuuul.EhsL+VsllEsMLaAsLpshppppptt.st..............hVhussPp+lspaWhpttsh.........ps......................tpppphssppsKph+IcLltphLsssh........................hthshsts.ph....h...pthlt+apsptpsppthtthtt...............................................tt.chpKlDDLADsLLpu.lsWhcWppsppcl .............................ILSIDhGl+NhAasplph................................tttpshs.............................................................................pltsWp....+lsLp.......................................................................................................t.t...ppppp.shs.Pt.huphsapLlppLh..shp..........P.shllIE..R..QRhRosu.u......ssl.-.sl+VshlEshLaAsLpsh.t.ttp...t................hVhussPp+hspaWhp.t.........................................................ppphhssppsKph+lcLltphLpssh........................hth.h..tt.ph............thhtt.attthptththtth.t.................................tt..phtKhDDLsDslLpu.lsWhcW.tthpp.................................................................. 0 18 40 69 +8992 PF09160 FimH_man-bind FimH, mannose binding Sammut SJ anon pdb_1klf Domain Members of this family adopt a secondary structure consisting of a beta sandwich, with nine strands arranged in two sheets in a Greek key topology. They are predominantly found in bacterial mannose-specific adhesins, since they are capable of binding to D-mannose [1]. 25.00 25.00 27.20 27.10 24.00 19.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.74 0.71 -4.36 6 806 2009-09-14 15:18:23 2006-08-17 10:27:01 5 4 326 37 14 375 0 143.80 68 49.23 CHANGED C+sssTGQ.sh...suGsusVhVNLsPsVpsspNl.VlDLSQ...plsCpND.ushp.hDYlplppGSuFusuLss.......FoGolpahupoYPhPhssps.sphhhspssa+PhPlKhYLoPsuuAsGVlI+uG-LIAplhhpKhuohu.cus.+N.......FsWp ...............C+s.sGsu.sI....GGG....oA...NVY.VNLuPs.VNVGQN.L..VVDLST...QIFCHNDYPE.......TITDYVTL.QR.GSAY.GGVLSs.......FS.GTVKYsG.oSYPFPT.ToET.sRVlYsSRTDKPWPlsLYLTP....VS.o..AGG.VsIKAGSLIAVLIL+QTNNY.....NSD-FQ.......FlWN........................... 0 2 4 9 +8994 PF09162 Tap-RNA_bind Tap, RNA-binding Sammut SJ anon pdb_1koh Domain Members of this family adopt a structure consisting of an alpha+beta sandwich with an antiparallel beta-sheet, arranged in a 2(beta-alpha-beta) motif. They are mainly found in mRNA export factors, and mediate the sequence nonspecific nuclear export of cellular mRNAs as well as the sequence-specific export of retroviral mRNAs bearing the constitutive transport element [1]. 20.80 20.80 21.60 21.50 20.20 20.00 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.86 0.72 -4.41 12 238 2009-01-15 18:05:59 2006-08-17 10:39:35 5 15 77 12 138 237 0 85.00 43 15.68 CHANGED Dtsp..csWFKlTIPaG+KYDKpWLLs.lQshCSlPFsPVcFHh-pp+ApFFVEsussApALKplSt+IhDc-spKlsIhhsPsssP.pl .............tsWFKl.TIPaGpKYDKpWLls..I.Qu.p.CSVPFsPl..........-.FHY-ps.c.ApFFV-DussAsALKslshKI.hD.c.-.sp+IsIhVs.sussP.................. 0 21 29 62 +8995 PF09163 Form-deh_trans Formate dehydrogenase N, transmembrane Sammut SJ anon pdb_1kqf Domain Members of this family are predominantly found in the beta subunit of formate dehydrogenase, and consist of a single transmembrane helix. They act as a transmembrane anchor, and allow for conduction of electrons within the protein [1]. 20.60 20.60 20.90 20.90 20.20 20.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -7.95 0.72 -4.46 40 1298 2009-01-15 18:05:59 2006-08-17 10:52:16 6 17 834 2 134 480 11 44.10 58 14.80 CHANGED ISssVpLWK.GlhKPLushuhuussluuhhHYlslGPNcss....---- ..IspoVphWK.GhhKPLAAsGFhATF..AutIFHYlGlGPN+ts....----p....... 0 20 52 94 +8996 PF09164 VitD-bind_III Vitamin D binding protein, domain III Sammut SJ anon pdb_1kxp Domain Members of this family are predominantly found in Vitamin D binding protein, and adopt a multihelical structure. They are required for formation of an actin 'clamp', allowing the protein to bind to actin [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.39 0.72 -4.36 6 46 2012-10-01 21:15:37 2006-08-17 11:02:52 5 2 31 9 23 56 0 67.30 58 14.43 CHANGED ELCADYSENTFTEYKKKLu-pLRsKhP-Aosp-Ls-LVsKRSDFASpCCSINSPPlYCsSpIDAElps .....................ELCADYSENTFTEYKKKLuEpL+sKhPDAoss-LtcLV-+RSDFASpCCSINSPPLYCsSplcs.ht............ 0 1 2 6 +8997 PF09165 Ubiq-Cytc-red_N Ubiquinol-cytochrome c reductase 8 kDa, N-terminal Sammut SJ anon pdb_1l0l Domain Members of this family adopt a structure consisting of many antiparallel beta sheets, with few alpha helices, in a non-globular arrangement. They are required for proper functioning of the respiratory chain [1]. 22.90 22.90 23.70 22.90 21.60 20.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.42 0.72 -3.66 16 86 2009-09-11 00:16:57 2006-08-17 11:15:50 5 2 67 47 31 89 0 73.30 47 27.43 CHANGED hSlsuRSGsluPYlpATopuVAusLK.PLlPusl.hpuEKlllcs++shLs+ESLsGphPppuLtsosu.lsususVR ........hSlAARSGsFAPhLpATSpuVAGsL+.P.LltusV....sssEpslLDsK+PFLsRESLSGQus+p.sLsASVG.lNsPAoVR....... 0 9 12 18 +8998 PF09166 Biliv-reduc_cat Biliverdin reductase, catalytic Sammut SJ anon pdb_1lc0 Domain Members of this family adopt a structure consisting of four alpha helices and six beta sheets, in an alpha-beta-alpha-alpha-alpha-beta-beta-beta-beta-beta arrangement. They contain a catalytic active site, capable of reducing the gamma-methene bridge of the open tetrapyrrole, biliverdin IX alpha, to bilirubin with the concomitant oxidation of a NADH or NADPH cofactor [1]. 21.00 21.00 21.00 23.40 19.20 20.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.43 0.71 -4.07 4 55 2009-01-15 18:05:59 2006-08-17 11:33:23 5 3 38 7 31 52 0 111.70 61 38.98 CHANGED -FKtLK+ElpGKpL.EGsLHFTGGPLcts.FGFPuFSGIARLTWLVsLFG-LoVTSAThEEcKEppY.KMTApLhTppc+PLTWIEERGPGhtRsKHIcF+FpssoLsplPuusR ................................EFthLKKEVsGK-LlKGoLhF.TuGPL-Ep+FGFPAFSGIuRLTWLVsLFGELSlsSATLEEcKEcpYhKMTVpLcTpsK+PLTWIEE+GPGLKRs+alsF+FcSGoLEslPsss.h............. 1 2 3 10 +8999 PF09167 DUF1942 Domain of unknown function (DUF1942) Sammut SJ anon pdb_1lmi Domain Members of this family of bacterial proteins assume a beta-sandwich structure consisting of two antiparallel beta-sheets similar to an immunoglobulin-like fold, with an additional small, antiparallel beta-sheet. The longer-stranded beta-sheet is made up of four antiparallel beta-strands. The shorter-stranded beta-sheet consists of five beta-strands, four of these beta-strands form an antiparallel beta-sheet. The exact function of this family of proteins is unkown, though a putative role includes involvement in host-bacterial interactions involved in endocytosis or phagocytosis, possibly during bacterial internalisation [1]. 20.70 20.70 20.80 23.30 20.40 20.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.59 0.71 -4.38 13 123 2009-01-15 18:05:59 2006-08-17 11:45:15 6 2 76 1 32 109 0 122.70 51 64.17 CHANGED ApssspplGpsucLls..GsVVQsWTVSDLKPSoDsIP.YtltGpLWEATATscAlsGsVTPIVSNhNARAssGpoYRVLapVATPpGlNPuTLuQGppoTGKlYFDV.TGssPsSVVY.NsGupDlllW ....................s..PhstphGophphsD..ssGtVV.uWpVSDL+sSossIP.YsltGplWEATATspAlpGoVTPsVupFNARsssG.sYRVLap.sAsPsslssATlsQGppoTGKIYFDV.TGssPshVsh.NsG..hpDLLlW........ 0 1 18 27 +9000 PF09168 PepX_N X-Prolyl dipeptidyl aminopeptidase PepX, N-terminal Sammut SJ anon pdb_1lns Domain Members of this family adopt a secondary structure consisting of a helical bundle of eight alpha helices and three beta strands, the last alpha helix connecting to the first strand of the catalytic domain. The first strand of the N-terminus also forms a small parallel beta sheet with strand 5' of catalytic domain. The domain mediates dimerisation of the protein, with two proline residues present in the domain being critical for interaction [1]. 25.00 25.00 28.60 28.50 21.30 19.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.07 0.71 -3.98 24 510 2009-01-15 18:05:59 2006-08-17 11:56:45 5 3 489 1 52 340 1 141.70 47 19.29 CHANGED MKhN..QFualssshcpthpELppluF....hptpss.ps.LcsFlp+hahphps...psttLsplhAsscpDLhsFhpoc.pplotclFYslALQLLGFpsthDFsh.DshshhcchshP.......hhssppllpuhYpLLsTRoKsGpoLlDpLsucGa ...M+aN..QaSYlshsh-phlpELcplGFp...hpspsstKcsLEsFLR+hFhpaps....osasLo.LAA-pcTDLLoFFpS-.pcLTs-lFYsVAhQLLGFphhVDF-..DspsFh+csuFP......hhasp..LI-sLYpLLNTRTKpGpTLIDpLVScGL......... 1 7 20 34 +9001 PF09169 BRCA-2_helical BRCA2, helical Sammut SJ anon pdb_1iyj Domain Members of this family adopt a helical structure, consisting of a four-helix cluster core (alpha 1, alpha 8, alpha 9, alpha 10) and two successive beta-hairpins (beta 1 to beta 4). An approx. 50-amino acid segment that contains four short helices (alpha 2 to alpha 4), meanders around the surface of the core structure. In BRCA2, the alpha 9 and alpha 10 helices pack with BRCA-2_OB1 (Pfam:PF09103) through van der Waals contacts involving hydrophobic and aromatic residues, and also through side-chain and backbone hydrogen bonds. The domain binds the 70-amino acid DSS1 (deleted in split-hand/split foot syndrome) protein, which was originally identified as one of three genes that map to a 1.5-Mb locus deleted in an inherited developmental malformation syndrome [1]. 25.00 25.00 25.00 25.10 24.40 24.60 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.13 0.71 -4.74 5 134 2009-01-15 18:05:59 2006-08-17 12:55:12 5 28 95 3 83 142 1 148.40 43 7.01 CHANGED DLhusLps......ARDlQ-MRIKKKpRQplhPQPGSLYLsKoSslsRISLKuAVGccsPStpSs...cQLYsYGVSK+CIcVNScNAESFQFclp-FFuK.EsLpsGcGIQLADGG..WLIPoNDGKAGKEEFYRALCDTPGVDPKLISctWVYNHYRWIVWKLAAMEpuFP+cFANRCLTPEpVLLQLKYRYDlEIDpS ....................................................................................................................................................................h.t...............h............ph..htl..t.....h.tlpstsu....apF...paht.....t..h..........t.tuh.htDus.....hl.l.s.ppGp..s.Gt.cEFhcAlhsss...GVDPpLloctWVhNHYRWIlWKLAu..M..Eh.sFPc...c...h...us...+.sLoP-pVLhQLKYRYDhElDp..................... 0 36 46 64 +9002 PF09170 STN1_2 DUF1879; CST, Suppressor of cdc thirteen homolog, complex subunit STN1 Mistry J, Sammut SJ anon pdb_1wj5 Domain STN1 is a component of the CST complex, a complex that binds to single-stranded DNA and is required for protecting telomeres from DNA degradation. The CST complex binds single-stranded DNA with high affinity in a sequence-independent manner, while isolated subunits bind DNA with low affinity on their own. In addition to telomere protection, the CST complex probably has a more general role in DNA metabolism at non-telomeric sites. 21.90 21.90 24.90 24.20 21.50 21.30 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.27 0.71 -4.50 6 63 2009-01-15 18:05:59 2006-08-17 12:57:40 5 4 43 1 32 51 0 165.20 48 47.30 CHANGED DPshslQIARMLELPplYRpVYDpPFchPsptcs..Eutsspt.ls.stLlShLSEKlKEFLhEp+lpsFYQpELE.hV-SLlulASpPV...ssuDQ.p.tcoSsSpQl+plFKEAlplLQ-cGhlFQKcpspDEVYpVTcQDKDLHptIhcII+EDC++pKHhEKGCHhLHILsCVRhs .......DPshslQIARMLELPplYRclYDpPFp.sshtpp...cuhs.s.sss.lshssL.sshL.....SEKhK-FL..hE.s+VpoFY....QpELE.hV-SLlslAspPl........sssucQ...sh.ps.sosSptI+slFKpAlplLQ-cGlVFQKssu.Dpl.YaVTccDK-LH+pIhcIIpEDCQKPpHsEKGCHFLHILuCsR.p.................................... 0 5 7 14 +9003 PF09171 DUF1886 Domain of unknown function (DUF1886) Mistry J, Sammut SJ anon pdb_1xg7 Domain This domain is predominantly found in the Archaeal protein N-glycosylase/DNA lyase. 21.30 21.30 21.60 21.30 19.80 21.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.58 0.70 -5.35 19 68 2009-01-15 18:05:59 2006-08-17 13:00:51 5 1 58 4 53 72 1 219.20 28 79.14 CHANGED ls-hlpclul-tsphlEcp.DsQacAlppLhcph.spthhspLllhNALVSYpLou+GE-aWhpFucY......Fupp....pspslscsahpFLppSphN+RhlcsKl+Rlc+hpsalpsL.t..s...ha.pshstlhppLu+hLsuctpsKTlVFAlKMhsYAhRsshsh.hshPh-IPIPlDhRltplThp......................hpp-tshchWsplA+couIPPLHlDollW.lhGtsh.htp..tp..........ltcclttlhc ....................................................................................................................................hphlpplslchhphhEcp.D.QahslppLhpph..st..............thhhhLslhNuLlSYpLss+G.E.c.aW..Fuca.......aspp.............pshhctahp.Fl.po.htchhhptKl+Rlp+h..hsh.h...pl.....p...hh.pshtthhppLuphlssc.ptKTlVFAlKMhsY...uhchs.s..h....hsh-IsIPlDhRlsphThp..h...............................hppctshphWpplu+p.....u.s....IPPlHlDolLW.lhGtth..h.p.......................t................... 0 16 26 38 +9004 PF09172 DUF1943 Domain of unknown function (DUF1943) Sammut SJ anon pdb_1lsh Domain Members of this family adopt a structure consisting of several large open beta-sheets. Their exact function has not, as yet, been determined [1]. 23.10 23.10 23.60 23.20 22.60 22.40 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.78 0.70 -5.55 71 578 2009-01-15 18:05:59 2006-08-17 13:03:58 6 29 211 1 253 586 0 275.40 18 14.04 CHANGED +aS+shchshassshhhGs...usps.hI.sssohLP+slhhphpshhhGt..shslhElGhRsEGlcchltc................................................................................ts...tpphpclpphlpt.lpph+shs.......ppsh.............ushYl+hhGpElsFhslsc.........phlcphhphh..............pphlpp.......lh.pGhphp..hs+shlhh-schhhPTshGlPhcluhhssul.suhpspsp.hslps.h.p.h..........pshphcschpPSluhphhuhhGlsss.hhps..ulphcsplpstsshchpsclshptt.phclph...ss..pp.pclhsh..p.scsasl ..................................................................paSpshp....hshh.p...h........hG....s....thph.hIhsssohlP+thhhphpt.hhGh..shshhEluh..cs..cuhcphltph.........................................................................................t....tp.....tpthpp.lt.p..hl...ph.hpphcth........ts..............up.h.al+hhGp-htahshsc.............phl.pph.hphh..................tphlpp...lh.....puhphp.......ht..p.hhhh-sphhh..PTshGlPhp.hshhsssl.sshp.s.p..sp..hphps...t...................tphphph..phpPohuhphhsh..hGh.ss..hhps....ulthpsph..pt.ths...hchphphpht....t..p.....hclph....ss....pp......pl..hph.psp.hh................................................................................. 0 84 117 208 +9005 PF09173 eIF2_C Initiation factor eIF2 gamma, C terminal Sammut SJ anon pdb_1kk1 Domain Members of this family, which are found in the initiation factors eIF2 and EF-Tu, adopt a structure consisting of a beta barrel with Greek key topology. They are required for formation of the ternary complex with GTP and initiator tRNA [1]. 22.00 22.00 23.10 23.40 21.90 20.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.95 0.72 -3.94 36 581 2009-09-10 18:26:08 2006-08-17 13:07:42 6 11 483 29 368 573 91 89.20 51 18.91 CHANGED asclcl.paaLLcRllGs+p......thKVppLppsEsLMlNlGSsoTsGhVsulK..sDhsclpLspPVCsphG-.+lAlSRRlsp+WRLIGWGpI ..................asElEl.saaLL+RLLGV+ot...........stKtuKVpKLs+sElLMlNIGShoTGG+VsulK........sDh.A+l.tL.TsP.VCT-hGE.KlA.LSRRl-....KHWRLIGWGpI........ 0 119 204 294 +9006 PF09174 Maf1 Maf1 regulator Mistry J, Wood V anon manual Family Maf1 is a negative regulator of RNA polymerase III [1][2]. It targets the initiation factor TFIIIB [3]. 21.60 21.60 23.00 23.50 21.30 21.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.08 0.71 -4.42 21 367 2009-01-15 18:05:59 2006-08-17 13:09:05 5 7 291 1 253 346 4 169.50 32 62.03 CHANGED slhupl-saos+hstsc+plhpphppp.pttt.ps.s.s.....................t.........................sshsslscpsSR+sLsYLIusLNtsa.PD.YDFS.slcspsFp+p.ohppVhsplsssL.slspp.h............................shhtplWpslc-pls..lp-C-IYoYs..Pp.ssDPa.sEcGslWShsYFFaNKKhKRllahp ............................................................................................................................hhsth-.aosKhsus-+chh+ph..ppp..h..p..sp....p..p.s......................................................s......tt....t.t....................................................sshusL.s...pp.s.SR+ThhYLIuTLNtua.PD.YDFS.sh+s..pcF..............p+E.s...hphlhstlsssLh..shs.tp.........................................................................................................shhsphWpsl-cphs..Lp-...CslYoYs..P...s.sDPa..t-p.....G...s....lWShpYFFaN+chKRlsah...................................... 1 94 148 216 +9007 PF09175 DUF1944 Domain of unknown function (DUF1944) Sammut SJ anon pdb_1lsh Domain Members of this family adopt a structure consisting of several large open beta-sheets. Their exact function has not, as yet, been determined [1]. 20.90 20.90 23.70 21.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.02 0.71 -4.62 30 201 2009-09-11 17:00:47 2006-08-17 13:15:43 5 9 70 1 58 162 0 155.20 37 12.32 CHANGED PshsllsRAVRuDpKhtGYQlusYhD.+ssuR..lQlIluslu-ssNW+lCADuslLS+HK.lhA+luWGtEC+pYssplpAETGhl.GspPAsRl+lsWs+LPpsh+...cYuKtlscYl...sssAhhsGlspp+p+Nsp+QlplTlsssSp+olslllKsPchTlYKhultLP ...........................................................................PshsllhRAlRuDpKh.GYQlssYhDpssuR...lQllluslu-ssNWKlCADuslLSpHK....spA+lsWGtEC+.pYps.hpA.EoGhl.uppPAsRl+lpWp+lPphhp...phu+pltcal....ssAh.hGhp..p+scNsp+plplosAhsSp+olsllh+hPchTha+hslhLP...................................... 0 0 11 41 +9008 PF09176 Mpt_N Methylene-tetrahydromethanopterin dehydrogenase, N-terminal Sammut SJ anon pdb_1lu9 Domain Members of this family adopt a alpha-beta structure, with a core comprising three alpha/beta/alpha layers, in which each sheet contains four strands. They are predominantly found in prokaryotic methylene-tetrahydromethanopterin dehydrogenase, which catalyses the dehydrogenation of methylene-tetrahydromethanopterin and the reversible dehydrogenation of methylene-H(4)F [1]. 19.10 19.10 21.30 24.80 17.80 15.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.70 0.72 -4.12 17 128 2009-01-15 18:05:59 2006-08-17 13:40:18 6 9 85 6 52 135 48 76.40 47 27.55 CHANGED PFDlNMAlDAGa-slhsYusVp.p-VpsLVQDuIFSRuPpshp+TulFIGGpDhshAhshLcsAKcuhlPPFclSV..hsDPu .PFDlshAhDAGh-hlhsYssVp.spVsuLsQDuIFoRuPpshtcTuIFIGG+DsthAhDMLcsA+cuhhsPFclSV..hADPu. 1 16 37 44 +9009 PF09177 Syntaxin-6_N Syntaxin 6, N-terminal Sammut SJ anon pdb_1lvf Domain Members of this family, which are found in the amino terminus of various SNARE proteins, adopt a structure consisting of an antiparallel three-helix bundle. Their exact function has not been determined, though it is known that they regulate the SNARE motif, as well as mediate various protein-protein interactions involved in membrane-transport [1]. 24.90 24.90 25.00 25.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.11 0.72 -3.58 43 417 2009-01-15 18:05:59 2006-08-17 14:06:41 6 8 263 7 271 389 2 95.20 29 35.74 CHANGED DPFa.Vpc-Vpculsph....cslappatphhssss.................chtphpp-LpsslpslchsLcDLcculsl.....spps...Pp+a.slsppElscR+palpphcsplpplc ..............DPFa.....VppE.Vpculsph....culappahclhppss.....................t..Ehp.tppELcssLpolchsL.-DL-c.ol..pl...................scps.....Pt+.a.slstsElspR+palpshcpplpphc.................... 0 80 146 214 +9010 PF09178 DUF1945 Domain of unknown function (DUF1945) Sammut SJ anon pdb_1lwh Domain Members of this family, which are predominantly found in prokaryotic 4-alpha-glucanotransferase, adopt a structure composed of six antiparallel beta-strands, four of which form a beta-sheet and another two form a type I' beta-hairpin. The role of this family of domains, has not, as yet, been defined [1]. 21.20 21.20 29.40 28.10 20.90 19.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.52 0.72 -4.08 2 9 2009-09-11 06:49:26 2006-08-17 14:23:45 5 1 8 4 2 8 2 49.40 71 11.17 CHANGED AplEFLCKE-KhL.VYRLhD-t+SLKVhHNLSstEhVFEGV+hpPYpTEVl .......AKlEFLCKE-KFL.VYRLYDDQ+SLKVFHNLSGEEVVFEGV+h+PYKTEVV. 0 1 1 2 +9011 PF09179 TilS DUF1946; TilS substrate binding domain Sammut SJ, Bateman A anon pdb_1ni5 Domain This domain is found in the tRNA(Ile) lysidine synthetase (TilS) protein. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.10 0.72 -3.96 123 1699 2009-01-15 18:05:59 2006-08-17 14:53:22 6 7 1681 3 357 1331 279 68.00 29 15.99 CHANGED Lslss..Ltpho.scppplLRhWLpth...sh.thPoptpLpplhpplhtup...tDupsplph.s...shplRRapscLah ...........Lplss..hhshS.ss+pttllRtWLstt...sh..shP.......S..pspLpclhpplt.hA+...pDussplph.s......shplRRapspLah............................. 0 94 204 286 +9012 PF09180 ProRS-C_1 Prolyl-tRNA synthetase, C-terminal Sammut SJ anon pdb_1nj1 Domain Members of this family are predominantly found in prokaryotic prolyl-tRNA synthetase. They contain a zinc binding site, and adopt a structure consisting of alpha helices and antiparallel beta sheets arranged in 2 layers, in a beta-alpha-beta-alpha-beta motif [1]. 20.90 20.90 21.10 21.00 20.60 20.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.60 0.72 -4.05 82 1277 2009-01-15 18:05:59 2006-08-17 15:23:59 6 39 1155 18 562 1221 419 73.60 33 12.63 CHANGED a--h............ppsl.spt....shlhssWCGs.tcsEpcIKcco.....................sApshCl.Ph..-........p.tpsspCl...hsG.+sAp..phshFu+uY ............................................a--hpptl..pcs........salhu.WCGs..tcCE-cIK-co...........................................uAssRCI.Pa-.....................ttptsspCl.....hsG..+sAc..phshFu+uY.............. 0 222 375 492 +9013 PF09181 ProRS-C_2 Prolyl-tRNA synthetase, C-terminal Sammut SJ anon pdb_1nj8 Domain Members of this family are predominantly found in prokaryotic prolyl-tRNA synthetase. They contain a zinc binding site, and adopt a structure consisting of alpha helices and antiparallel beta sheets arranged in 2 layers, in a beta-alpha-beta-alpha-beta motif [1]. 25.00 25.00 78.40 77.10 19.50 17.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.08 0.72 -4.05 4 16 2009-01-15 18:05:59 2006-08-17 15:24:53 5 1 16 4 10 17 0 65.10 57 14.15 CHANGED ITll-NhcsD......clKtsLSEpKGlILVPacEsIYNEEFEEhIDASVLGpTpYcGKcYISIA+TY .........ITll.sh....-s-.....-clKphLsEp+GlILIPacEsIYNEEhEEpl-ASVLGpTpYcGKcYIuIA+TY 0 1 2 7 +9014 PF09182 PuR_N Bacterial purine repressor, N-terminal Sammut SJ anon pdb_1o57 Domain The N-terminal domain of the bacterial purine repressor PuR is a winged-helix domain, a subdivision of the HTH structural family. It consists of a canonical arrangement of secondary structures: a1-b1-a2-T-a3-b2-W-b3, where a2-T-a3 is the HTH motif, a3 is the recognition helix, and W is the wing. The domain allows for recognition of a conserved CGAA sequence in the centre of a DNA PurBox, resulting in binding to the major groove of DNA [1]. 24.60 24.60 24.80 24.60 24.50 23.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.21 0.72 -4.36 37 1165 2012-10-04 14:01:12 2006-08-17 15:54:53 5 2 1161 8 145 506 0 68.50 57 25.17 CHANGED +Rs-RllshT+hLl-pPpcLlsLshFu-hapuAKSoISEDLsIlKcshcchuhGpl-TlsGAAGGV+YIP .....+RS-RhVshopYLlspPpcLlsLshFA-+YpuAKSSISEDlsIIKcsFccpplGplpTlsGAuGGVpahP.... 0 52 92 118 +9015 PF09183 DUF1947 Domain of unknown function (DUF1947) Sammut SJ anon pdb_1q7h Domain Members of this family are found in a set of hypothetical Archaeal proteins. Their exact function has not, as yet, been defined. 22.50 22.50 22.60 23.00 22.40 22.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.19 0.72 -4.21 7 27 2009-01-15 18:05:59 2006-08-17 16:16:08 5 2 27 1 13 24 4 64.70 45 42.26 CHANGED pRHhlScK-tKhhhschcp.YGIDlou.tclEVuppK+phhYalssh.shFs-.pLIPTLhhlpphp .QRHlhSpK-tKhhlsKlKp+Ys.IDlSs.s+lElGKEKKcs.aYYlsslL..u..F.Fs-...LIPTL.ChlhKhp........... 0 3 5 10 +9016 PF09184 PPP4R2 PPP4R2 Mistry J, Wood V anon manual Family PPP4R2 (protein phosphatase 4 core regulatory subunit R2) is the regulatory subunit of the histone H2A phosphatase complex. It has been shown to confer resistance to the anticancer drug cisplatin in yeast [1], and may confer resistance in higher eukaryotes. 25.00 25.00 29.60 29.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.10 0.70 -4.86 15 305 2009-01-15 18:05:59 2006-08-17 16:18:36 6 3 245 0 201 303 0 204.50 22 50.95 CHANGED shcpl.phhccFpchtpK.Elss.L-paLsclA+T.G-ThhsWsphKshhpaKlppVhcDFp....................tpsP..tchssssNV-shshE-MKcplLcllssFNu......hPFTIQRlCELLs-P..p+pY..sclDKFlRAlEKNlhVVSolpPssc+ssus......stsphsulh..................hststssahcc.t.............Vsssusscshs......csp..huss.ssNshssospscspphpp..-ccss.sussps-ss.s..sslt......sc+s-c--sppt-u-..........hEscphc.-c---Esc--p-pssss- ...............................................................................................................................t....................................................h..hh...h..h....ph..........................................................................p..........p................hpp.h...pphl.h.....p.h.l.s.sFss.......sPFTIQRLCELlhcP..p+p.Y.....sshsKal+AlEK...........slhVsS....s....h....t..........s......t...................................t........................................................................................................................................................................................................................................................................................s................................................................................................................................................................ 0 64 106 162 +9017 PF09185 DUF1948 Domain of unknown function (DUF1948) Sammut SJ anon pdb_1q8c Domain Members of this family of Mycoplasma hypothetical proteins adopt a helical structure, with one central alpha-helix surrounded by five others, in a NusB-like fold. Their function has not, as yet, been determined [1]. 25.00 25.00 292.80 292.60 23.90 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.78 0.71 -4.52 2 4 2009-01-15 18:05:59 2006-08-17 16:25:49 5 1 4 1 2 4 16 140.00 87 88.33 CHANGED LTRTQRRIAlVEFIFuhLFFLPKpA-.IQAsFL-YDs.ER.LN-WQK.IVKsFSEphhpF.chIEpQQ.+NQhElQoKYNKlSGKKlDLLTpAVlLCALSEQ+ApsTDKPLLISEALLIMDHYSQssEKKQTHALLDKLL LTRTQRRIAIVEFIFATLFFLPKTADQIQAAFLDYDVPERPLNDWQKEIVKVFSERCVEFIELIENQQQRNQAEVQSKYNKVSGKKVDLLTKAVILCALSEQHAQATDKPLLISEALLIMDHYSQVPEKKQTHALLDKLL 0 1 1 1 +9018 PF09186 DUF1949 Domain of unknown function (DUF1949) Sammut SJ anon pdb_1vi7 Domain Members of this family pertain to a set of functionally uncharacterised hypothetical bacterial proteins. They adopt a ferredoxin-like fold, with a beta-alpha-beta-beta-alpha-beta arrangement [1]. 20.90 20.90 20.90 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.33 0.72 -4.36 181 2861 2012-10-02 20:07:24 2006-08-17 16:36:13 6 4 2841 2 493 1750 45 55.80 23 26.73 CHANGED lphcYsphuplcphLp...p.ts.htlhcppYssp..Vplpltlstsphpshpptls-hosGp ......lphsYsphsplcthLt....p.ps.htlhssp.Yssp.....Vphpltl..s..tsc..h..pshpstLs-hopG................... 0 132 288 408 +9019 PF09187 DUF1950 Domain of unknown function(DUF1950) Sammut SJ anon pdb_1vk5 Domain Members of this family pertain to a set of functionally uncharacterised hypothetical eukaryotic proteins [1]. 25.00 25.00 107.40 25.60 17.60 17.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.71 0.71 -3.96 3 18 2009-01-15 18:05:59 2006-08-17 16:49:10 5 2 10 3 11 22 0 110.80 59 64.70 CHANGED LLR+.AEMYQ-YMKQlPIPo+R.GSlIPsTTWlGLGpSMKQLYGQPLHYLTNVLLQRWDQSRlGSDsEH+PLDSIIHPoKAEATIWlVEElHRLTTSoQHlAoLWtSDPMYHAFIDPIFPc ...llRR.AEMYQ-YMKpIPIPspR.GShIPFooWhGLu+SlKQLYsQPLHYLTNlLLKpWDQ.RlGS--Ep+sLDsIIHPsKAEATIWlhEElHRpToSphHlApLWtsDPMYa.uFlDsIFP...... 0 1 7 9 +9020 PF09188 DUF1951 Domain of unknown function (DUF1951) Sammut SJ anon pdb_1tm9 Domain Members of this family of Mycoplasma hypothetical proteins adopt a helical structure, with a buried central helix. Their function has not, as yet, been determined. 24.40 24.40 26.40 30.50 23.70 24.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.92 0.71 -4.22 2 7 2009-01-15 18:05:59 2006-08-17 16:57:27 5 1 7 1 3 4 0 136.60 49 96.37 CHANGED ME.NNlKEpLlShFppACSoHpERLDFICSsRESDTFSsVDVPLtPIKsIIEIsKsEppQhEIhKlAlpNIKTLSoVG.oGQYhASaFSTpsEsAIIFCl.YFLYHFsFL+DpNKKQllK+AaEslA-pIADYLNEN ..............EchlosFpphhopctp+hDFIpSVhEsDshuNh-hPht.lpplh-lhhNE.spp.hhphhIpshhTh.Tsh.php.lhShFppppplh.hFCl.YhLa+.sF.aD-sc+phlp+hhpslAcclh-hLs.... 0 2 2 2 +9021 PF09189 DUF1952 Domain of unknown function (DUF1952) Sammut SJ anon pdb_1v8c Domain Members of this family are found in various Thermus thermophilus proteins. Their exact function has not, as yet, been determined. 21.20 21.20 21.90 21.80 18.60 17.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.61 0.72 -4.28 2 11 2009-01-15 18:05:59 2006-08-17 17:05:56 5 2 11 4 4 11 0 77.60 56 49.80 CHANGED GFERTFGAFPPWLLERYLEEWGGTREGEGVYRLPGAVVRFREVEPLKVGSLSIPQLhVEVEGEtuEtWFERIAhAASR ........htppFGAhPPWLLE+YLpEWGGp+-GEGsYRLPGAhVRFRElEPL+VGSLSIPQLcVEVEGEEA..EtWFERIAhAASR........ 0 1 3 4 +9022 PF09190 DALR_2 DALR domain Sammut SJ, Bateman A anon pdb Domain This DALR domain is found in cysteinyl-tRNA-synthetases [1]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.11 0.72 -3.72 124 4263 2012-10-02 19:03:26 2006-08-17 18:00:12 6 6 4170 6 980 3173 1201 63.60 30 13.64 CHANGED cFtpAM-DDFNTspAlulLF-LA+clNp..........ttt.....shptt....pth..t......stLppL....uslLGl..lpp........ss..csal ..........pFhpAMsDDFNTspAluslF-hu+p.lNp...............pst.........sttst...........pth.....t......................stlc.ph...........sslLGl..ltpt.............................................................................. 0 339 644 840 +9023 PF09191 CD4-extracel CD4, extracellular Sammut SJ anon pdb_1cid Domain Members of this family adopt an immunoglobulin-like beta-sandwich, with seven strands in 2 beta sheets, in a Greek key topology. They are predominantly found in the extracellular portion of CD4 proteins, where they enable interaction with major histocompatibility complex class II antigens [1]. 25.00 25.00 25.40 49.20 21.40 24.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.46 0.72 -4.03 11 88 2009-01-15 18:05:59 2006-08-18 08:50:08 5 16 45 8 18 96 0 105.90 61 25.33 CHANGED QKsSsTVYtKEGEQsEFSFPLsFp-ENL..pGELpW.QActASSsQSWITFoLcN+KVSVpKsppslKLQMpEsLPLpLTLPQsL.QYAGSGNLTLsLs..KGpLHQEVNLVV ....QKsSsTVYKKEGEQVEFSFPLsFptEpL.....oGELhW.Q.AEtASSupoWITFsLcN+cVSVpclppD.KLQMucpLPLpLTLPQALPQYAGSGNLTLsLs..pGK.LHQEVNLVV.................................. 0 1 1 2 +9024 PF09192 Act-Frag_cataly Actin-fragmin kinase, catalytic Sammut SJ anon pdb_1cja Domain Members of this family assume a secondary structure consisting of eight beta strands and 11 alpha-helices, organised in two lobes. They are predominantly found in actin-fragmin kinase, where they act as a catalytic domain that mediates the phosphorylation of actin [1]. 20.30 20.30 21.40 20.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.79 0.70 -5.42 7 59 2009-09-11 11:13:43 2006-08-18 09:14:55 5 5 31 2 47 61 0 252.30 26 38.15 CHANGED sssuh..slsppthhhssIpsIsWssL.slchl-hu....................ssslhhlhThps.......t..p.....................................plllKuoso...Isp-sauSlLphlLtlPlPchRllc.ss.EappMopsLh.....tohpscpLhchIpoclpcsa.....hLIMEYhp.GpphscLspp......paFuspt.uc++hpQLGplluhDlhs......................................................NN.s+hP...ht.ssps.huNIlhh-pPp..GhhhsllsSslp...slssS.FshtY...................+pahsRl+.lLaolhQ...pPstEShQ...............lpphR-hl.pppshclsppSs...hplQp.....GIspGl ................................thttssh.......c.pshsWs...lpulcpocpu............................SuGVl..FhshFps...............t...........................................usVlKhu.so...hpuEhhuhcluchLG...lpsPpsRll.cpss........Eatphpcuhp.....huspp..s-p.ltchhp..pEl.hcuh.....lhlMpYlp.G.ps..Lh-.sps.............Fpsp....p.upcphpsLG+llhLDlll..........................pNpDRLP.t.LtWc..GNsuNlll.scc............h.tsh......t.....................................p.hh.pht....t.................................................................................................................................................... 0 24 38 43 +9025 PF09193 CholecysA-Rec_N Cholecystokinin A receptor, N-terminal Sammut SJ anon pdb_1d6g Domain Members of this family are found in the extracellular region of the cholecystokinin A receptor, where they adopt a tertiary structure consisting of a few helical turns and a disulphide-crosslinked loop. They are required for interaction of the cholecystokinin A receptor with it's corresponding hormonal ligand [1]. 20.30 20.30 20.70 24.70 20.10 17.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.33 0.72 -4.08 5 39 2009-01-15 18:05:59 2006-08-18 09:29:54 5 1 27 1 20 37 0 47.20 73 11.41 CHANGED MDVVD.SLLsNGSNITP.PCELGLENETLFCLDQP+.PSKEWQPAVQILL ..MDVVD..SLLsNGSNITP.PCELGLENETLFCLDQPp.PSKEWQPAVQILL 1 1 1 3 +9026 PF09194 Endonuc-BsobI Restriction endonuclease BsobI Sammut SJ anon pdb_1dc1 Domain Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence CYCGRG (where Y = T/C, and R = A/G) and cleave after C-1. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates [1]. 25.00 25.00 53.90 53.90 21.60 17.50 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.92 0.70 -5.49 4 29 2012-10-11 20:44:45 2006-08-18 09:44:51 5 2 22 2 5 29 0 225.10 41 98.98 CHANGED hPYp.HLpSsDDLhToYEthRAGFlALALEKN+RuTPaltcARALKltASpAcsPpDLLplcDIpsuLLsASGlSDKAtsaLp.pDKsEAIpsLIpNFLEPAGEpFVEELVaRFLLhRGDoLGGoMRNlGGsLAQpKhTRuIISsLslAsIuY+WLcSpsKp...Wh-ts-DDs-lElhlRGlSWp.pGcsRTlhYNlsVPlV.+pNlDlCLFsCcssplpsQ....pshpssshYIALGELKGGIDPAGADEHWKTApoALsRIRsAFs+tuhpPaTFFIGAAIE+pMAcEIWcQLpoGhLTNAANLTpssQluSlsRWhhpL ..........................spDLhTshpthhsGF...A.p+.t+usPalt.Achhp..hpp.hpp..pLhp..tlp.hllsAshhScKuhtaLp.p.p.chIptLI.sF.c.ssppalppL.h+aLLhpGDoLGG.MRNhsG..ApphhsphllstL...shs.ph....ppp................................................l.ppslDhhlhpht.tth........h....p.hlshGELKGGIDPAGADEHWKTApsALsRIhpAF.phthp.PhhhFlGuAIEhtMu.EIap.LppthLssAANl.p.pQlhpl.thhh................... 0 0 5 5 +9027 PF09195 Endonuc-BglII Restriction endonuclease BglII Sammut SJ anon pdb_1dfm Domain Members of this family are predominantly found in prokaryotic restriction endonuclease BglII, and adopt a structure consisting of an alpha/beta core containing a six-stranded beta-sheet surrounded by five alpha-helices, two of which are involved in homodimerisation of the endonuclease. They recognise the double-stranded DNA sequence AGATCT and cleave after A-1, resulting in specific double-stranded fragments with terminal 5'-phosphates [1]. 21.60 21.60 22.40 22.00 21.10 20.30 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.89 0.71 -4.77 20 74 2012-10-11 20:44:45 2006-08-18 10:00:22 6 1 72 10 33 86 41 177.60 21 84.63 CHANGED pApsllpp.phssthpEltpsLtshslphsclpsuutscptsp.hh.........cchltscGWtpch.............................................hh.......................sh-pps...........................pclDahK.........sslulElpauNhs.hh.pDLhp..aphhaspsh......IcVGllls.spshpc.................................................................chusussaaE+lhpcl.ptuc.ss.slPllllGls ............................................................................................................................................................................uttllt...thsp.hpEl.psltsh.php..h......tp.httss..tpptt..ht.hh...............ccthhtp.GW.pct.............................................ph..................................t..............................pplDahK.........sclulElpasshshhh.pDLhs....aph..h..a..p.tsh......IslGllIs...tspp.hpp............................................................phusussta-+hh.cl.ctup..ss.ssPllllGl............................... 0 12 27 31 +9028 PF09196 DUF1953 Domain of unknown function (DUF1953) Sammut SJ anon pdb_1iv8 Domain This domain is found in the Archaeal protein maltooligosyl trehalose synthase produced by Sulfolobus spp. Its function has not, as yet, been defined. 25.00 25.00 41.80 122.20 22.90 21.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.19 0.72 -4.27 2 2 2009-01-15 18:05:59 2006-08-18 10:11:54 5 1 2 2 1 3 0 64.50 49 9.06 CHANGED EYKsLcLpcGLCGFhRhsKlLVIlKT...lNhchclE.su.YTDVlTsEpl+tcVplscLPhILV+ EYKsLcLpcGLCGFhRhsKlLVIlKT...lNhchclE.su.YTDVlTsEpl+tcVplscLPhILV+ 0 0 0 1 +9029 PF09197 Rap1-DNA-bind Rap1, DNA-binding Sammut SJ anon pdb_1ign Domain Members of this family, which are predominantly found in the yeast protein rap1, assume a secondary structure consisting of a three-helix bundle and an N-terminal arm. They contain an Arg-Asp-Arg-Lys sequence that interacts with an ACACC region in the 3' region of the DNA-binding site [1]. 22.40 22.40 22.80 22.40 22.30 20.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.56 0.72 -3.49 19 53 2012-10-04 14:01:12 2006-08-18 11:12:12 5 4 48 3 30 86 0 114.20 39 17.31 CHANGED KFTA--DYpLshtlpc.hhpchhphsssputshhps.c...h.h.tph.h.........................tpFFcph.....................ucppP..sHTpsuWRDRaRKFlhsaG.lccYIcYYEppptsucpPcshKNhTs .........................................................................................KFoA-EDYtLshslpcphhc-hhphDs-supshlpstct.shlscpphs.........................................sls+pFFcpa.....................uccas..sHTcsuWRDRFRKFlhsYG.lccYIpYYEspptpscpPEPM+NlT.......................... 0 5 17 28 +9030 PF09198 T4-Gluco-transf Bacteriophage T4 beta-glucosyltransferase Sammut SJ anon pdb_1jix Domain Members of this family are DNA-modifying enzymes encoded by bacteriophage T4 that transfer glucose from uridine diphosphoglucose to 5-hydroxymethyl cytosine bases of phage T4 DNA [1]. 25.00 25.00 56.40 55.20 17.90 14.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -7.91 0.72 -4.63 2 4 2009-01-15 18:05:59 2006-08-18 11:21:40 5 1 2 22 0 6 0 38.00 82 13.98 CHANGED MKIAIINMGNNVINFKTVPSSETIYLFKVISEMGLNVD MKIAIINMGNNVINFKTVPSSETIYLFKVISEMGLNVD 0 0 0 0 +9031 PF09199 DUF1954 Domain of unknown function (DUF1954) Sammut SJ anon pdb_1m4v Domain Members of this family are found in various staphylococcal toxins, and adopt an OB fold, wherein the domain folds into a five-stranded beta-barrel. The exact manner in which they confer pathogenic properties to the protein has not, as yet, been determined [1]. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.82 0.72 -3.78 21 1637 2009-01-15 18:05:59 2006-08-18 11:49:10 5 2 161 29 14 384 0 83.10 38 33.87 CHANGED pcL+pYYopsShEh+NloGht.pt...ps.phlphh.spphhplsLlGcDKcKa+c.ssc.pslDVFlVpEt.pchpuppYSlGGlTKoN ........cL+pYYopsShEhcNloGhh.p....tspphlphh.pp..phhpltLlGcDc.pKY+ctsp...slDVFhVpEt.pchpuphaSlGGlTKpN................ 0 12 12 14 +9032 PF09200 Monellin Monellin Sammut SJ anon pdb_1mol Domain Monellin, a protein produced by the West African plant Dioscoreophyllum cumminsii, is approximately 70,000 times sweeter than sucrose on a molar basis. The protein adopts an alpha-beta structure, with a cystatin-like fold, where each helix packs against a coiled antiparallel beta-sheet [1]. 23.60 23.60 23.60 87.80 21.90 23.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -7.73 0.72 -4.32 2 2 2009-01-15 18:05:59 2006-08-18 11:59:41 5 1 1 49 0 22 0 41.50 20 87.37 CHANGED tEhchh-ht.a...oppLh+hslsE-.KhtththLpFNtsl.P tEhchh-ht.a...oppLh+hslsE-.KhtththLpFNtsl.P 0 0 0 0 +9033 PF09201 SRX SRX Sammut SJ anon pdb_1nrj Domain Members of this family, which are predominantly found in eukaryotic signal recognition particle receptor alpha, consist of a central six-stranded anti-parallel beta-sheet sandwiched by helix alpha1 on one side and helices alpha2-alpha4 on the other. They interact with the small GTPase SR-beta, forming a complex that matches a class of small G protein-effector complexes, including Rap-Raf, Ras-PI3K(gamma), Ras-RalGDS, and Arl2-PDE(delta) [1]. 25.00 25.00 26.20 141.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.91 0.71 -4.54 5 23 2009-01-15 18:05:59 2006-08-18 12:22:27 5 5 22 1 13 22 0 149.30 57 24.56 CHANGED MFDQLAIFTPQGpVLYpYNsLsKKFSEsQlNuFIScLIopPVo+cc.......cshsSKLsoIs.o.pKsocSFoslFHloKQPELYFVlTYAE.pSLELNsEAEpVLsLuLpLWDSLsLN-uILcNhpG+ucKNcHNas-ILpulsE-IcKF-pYF MFDQLAVFTPQGQVLYQYNCLGKKFSEhQINuFISpLITSPVT+KE.......cuh.casLLoIN.......S..p.....ccN........osSFsAhFalsKQPELYFVVTaAE.QTLELNQEspQTLsLsLKLWNSLcLsESIL+Nl.pGpsc.KNcHNYlDILpGl--DLcKF-QYF.... 0 1 6 12 +9034 PF09202 Rio2_N Rio2, N-terminal Sammut SJ anon pdb_1tqi Domain Members of this family are found in Rio2, and are structurally homologous to the winged helix (wHTH) domain. They adopt a structure consisting of four alpha helices followed by two beta strands and a fifth alpha helix. The domain confers DNA binding properties to the protein, as per other winged helix domains [1]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.78 0.72 -3.91 29 449 2012-10-04 14:01:12 2006-08-18 13:09:27 6 7 407 5 321 479 8 80.70 43 19.50 CHANGED h+aLop-DFRlLoAlEhGh+NHEhVPspLlsphupL+..tsusp+.tlpcLh+tcLls+...pstpY..-GY+LTYtGYDaLAL+sh.scR ..............hRaLsp-DFRV...LsAlEhG.h+NHEl..VPspLIsplusL+..puusp+.hlpcLsKhcLls+...cstpY................-..GY..RLTas...GYDaLAL+ohspR................... 0 105 187 266 +9035 PF09203 MspA MspA Sammut SJ anon pdb_1uun Domain MspA is a membrane porin produced by Mycobacteria, allowing hydrophilic nutrients to enter the bacterium. The protein forms a tightly interconnected octamer with eightfold rotation symmetry that resembles a goblet and contains a central channel. Each subunit fold contains a beta-sandwich of Ig-like topology and a beta-ribbon arm that forms an oligomeric transmembrane barrel [1]. 19.70 19.70 20.70 22.10 19.30 19.60 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.15 0.71 -4.83 19 280 2009-01-15 18:05:59 2006-08-18 13:29:33 6 2 59 10 90 299 3 172.20 25 80.36 CHANGED PDGhp....lTVstpDEptsslPsLsss...hssRphhVuGshsus...........................lsGssst..ssGpLpsGY.lGCslslus.....hussuGhoPulsh..shss.s..s.uh.s.h...hsuuhsl....................sLtPGtlpsVslst.p-hcGsssa.................VthpshclplsGCsG.uhlRSYAslstpT-sssthlshYGtshsl .......................................sGhp....lslp..ttsphhsslssL.sss....hoREhal..S.....Gpsssp.........................lsG..sust.....hsupLpsGY...plGCthslus................hss.ss.G.ss.suls.h...........shs.s......s..............................ss..s..l..ssulsl...................................................sltPGtltsls.l.p....hshpsssst........................lslssh+lplsGCuG.stlRuaApl...........pssTssspt.lshYGpPhsh................................... 0 10 60 76 +9036 PF09204 Colicin_immun ColicinD; Bacterial self-protective colicin-like immunity Sammut SJ anon pdb_1v74 Domain Colicin D, which is synthesised by various prokaryotes, adopts an antiparallel four helical bundle fold: the helices are tightly packed, forming a compact cylindrical molecule. The protein specifically cleaves the anticodon loop of all four tRNA-Arg isoacceptors, thereby inactivating prokaryotic protein synthesis and leading to cell death [1]. This family also contains immunity proteins to klebicins and microcins. Many bacteria produce proteins that destroy their competitors. Colicin D is one such. The immunity proteins are expressed on the same operon as their cognate bacteriocins and protect the expressing bacterium from the effects of its own bacteriocin [2]. 21.60 21.60 23.60 23.20 21.40 20.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.88 0.72 -3.90 10 59 2009-11-02 17:58:57 2006-08-18 13:44:51 5 1 47 3 11 46 0 78.90 43 88.30 CHANGED MS.hsll-LA+sFlpp+lSAppFoEsahphW+lERcsthhl+D.spslscCLuplFshADhYsPDs-.R--..YElD--pLRcEV+plLcKap ...................Ms.hhllchA+pFlstcloAp.Fuptahtha+hEpcpt.hhpD..spplspsLsslFshADhYsPDsD...RE-...YElDDcpLhcpVhpllsKhp..... 0 2 6 9 +9037 PF09205 DUF1955 Domain of unknown function (DUF1955) Sammut SJ anon pdb_1vdu Domain Members of this family are found in hypothetical proteins synthesised by the Archaeal organism Sulfolobus. Their exact function has not, as yet, been determined. 23.70 23.70 23.70 188.30 23.60 23.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.00 0.71 -4.71 4 17 2009-01-15 18:05:59 2006-08-18 13:52:30 5 1 17 1 6 12 0 159.40 67 94.03 CHANGED Ell+KLM-AK+hllDGhl-cGlcIlpchspSSshcEhNWhICNllDohsCchlhpsL-sIGphFDlotCtNLKpVlpChhhhNp.SEaVDhALDhLVtpsK+DpL-cIhp-lh..NpclssplLlKIAsAh+KlGspR-us-LLpcACc+GlKEACpslspl .ELRRKLIEAKKLILDGFVEQGIELLSKTIoSENIKESNWIICNVIDTADCDAVVKTLDSIGKIFDhSPCANIKRlVYCYALlNKsSEYVDLALDlIVKuNKKDuLDKLYNDLK..NEKINPEFLLKIGhAYKKLGAV+ESNEVLRKACENGLKEACENIKEI.. 0 1 1 5 +9038 PF09206 ArabFuran-catal Alpha-L-arabinofuranosidase B, catalytic Sammut SJ anon pdb_1wd3 Domain Members of this family, which are present in fungal alpha-L-arabinofuranosidase B, adopt a beta-sandwich fold similar to that of Concanavalin A-like lectins/glucanase. The beta-sandwich fold consists of two anti-parallel beta-sheets with seven and and six strands, respectively. In addition, there are four helices outside of the beta-strands. The beta-sandwich strands are closely packed and curved with a jelly roll topology, creating a small catalytic pocket. The domain catalyses the hydrolysis of alpha-1,2-, alpha-1,3- and alpha-1,5-L-arabinofuranosidic bonds in L-arabinose-containing hemicelluloses such as arabinoxylan and L-arabinan [1]. 25.00 25.00 26.10 25.70 24.50 23.20 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.19 0.70 -5.48 7 132 2009-01-15 18:05:59 2006-08-18 14:15:12 6 11 81 4 81 133 29 312.70 58 63.03 CHANGED PCDlYuuusTPClAAHSTTRALYuuYoGPLYQVpRuSDGsTsDI.......uPLo.AG....GVANAuuQDsFCANTTClIoIIYDQ..................................Su+GNHLTpAPsG.uFsG...PsssGa........................DNLAsA.GAP.....VoLsGpKAYGVFloPGTGYRNN.sspGoAsGDpsEGhYAVLDGTHY..NuuCCFDYGNAETsStDTGs.......GHMEAIYF..GssTlWGp..GsGsGP..............WlMsDhENGLFSGsss...c.NuusPuIs.RFlTAllKGps....shWAIRGGsAuoGuLSTaYsGsRPssoGYsPMuK.......EGAIILGIGGDNS.GAQGTFYEGsMToG..aPSDATENtVQANlVAAtYusss .........PCDIYuuGGTPCVAAHSTTRALYuuYsGsLYQV+RuSDuuTpsI..............usLo.A.G....GlAsAA.....AQDoFC..Au.TTClITlIYDQ..................................SG+GN+LTpAPsG..uh.pG.......Ps.s.sGh.......................................DsL.As....A.huAP.......VTl.sG.pKAYG.Val.uP..GsGY.RNN.ss.sGs..ATGDpsEGhYAV..hDG....THY....NuuCCFDYGNAETs....spDsGN.............................GpME.AIYF..Gsss..h.aGs.....GuGsGP..............WlMADLENGLF..S.G.sss......t.NsssP...olstR.FVTAhlKGps....spWAlRGGNApSGuLoTaYsGs.RP.t.........s...............GYsPMpK.......EGAIILGlGGD......NS.....suupGTFYEGVMToG..YPSDATENuVQANIVAAtYus.u................................................................................... 0 32 52 74 +9039 PF09207 Yeast-kill-tox Yeast killer toxin Sammut SJ anon pdb_1wkt Domain Members of this family, which are produced by Williopsis fungi, adopt a secondary structure consisting of eight strands in two beta sheets, in a Greek-key topology [1]. 21.50 21.50 22.00 196.20 21.10 20.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.28 0.72 -3.55 2 2 2009-01-15 18:05:59 2006-08-18 14:27:41 6 1 2 1 0 4 0 86.50 88 69.48 CHANGED DGYLlMCKNCDPNoGSCDWKQNWNTCVGIGuNVHWMVTGtS.sGpQGCAhIWEGSGCsGRSTTMCCPusTCCNINTGFYIRSYRRVE DGYLlMCKNCDPNoGSCDWKQNWNTCVGIGuNVHWMVTGtS.sGpQGCAhIWEGSGCsGRSTTMCCPusTCCNINTGFYIRSYRRVE 0 0 0 0 +9040 PF09208 Endonuc-MspI Restriction endonuclease MspI Sammut SJ anon pdb_1sa3 Domain Members of this family of prokaryotic restriction endonucleases recognise the palindromic tetranucleotide sequence 5'-CCGG and cleave between the first and second nucleotides, leaving 2 base 5' overhangs. They fold into an alpha/beta architecture, with a five-stranded mixed beta-sheet sandwiched on both sides by alpha-helices [1]. 25.60 25.60 26.00 82.40 25.00 25.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.58 0.70 -5.32 3 10 2012-10-11 20:44:45 2006-08-18 14:41:34 5 1 10 4 4 9 1 265.90 35 70.46 CHANGED YTchLSDlLs......clsEKs..phtpGlpus+LGNcaE+aIV-lLNDlcNLutYNsNppAQpchc....cIhcclLccLsL-ctaDpILEVTuTs.DIs+LpNGGSPKTDlolRlphssKEh+IsNISIKNTpcK+VSIHEYsVcDlloslulSDoD.L+pLlc+FQcsGStKcFsulpsp+splL-ps..LcPYpE+lIcWsVT..up+stssLLc-KIQlsshIIsRNtsuVssK..DDYlKcYIEEhStAhGKG.FGTPFsWTYPSK+RGQKIQlKG ............hpp.Lo-lhs......pI.EKAspNlupGlRuslLGNshEppIVNlLNDlcNlshWNs.pssppshc...YcIaKcIlccl..slc.cth..-pIl-loAT...s.-IPhLp....NtGKPKTDVpVTIpsss..Kch.IhsISlKp..TpcppVoIHEhoVccllosLclS...-S.....D....LppALc+FQcVGStKKlhsppssptclL-cp..LcsYN+cLIcahls..u.hu.sslls-KIQhsshIIspsphsVhs+.....D-Yl+cYIpEhstt.uKGtFGTPFpWTYPSKKRGpKIQlKG. 0 1 4 4 +9041 PF09209 DUF1956 Domain of unknown function (DUF1956) Sammut SJ anon pdb_1t33 Domain Members of this family are found in various prokaryotic transcriptional regulator proteins. Their exact function has not, as yet, been identified. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.24 0.71 -4.19 57 836 2012-10-03 00:15:22 2006-08-18 14:51:42 6 2 775 2 151 427 30 123.30 42 54.72 CHANGED scppLpshlcshlth..lhsp....psthhs+hhhRE...hpP.osAhc..plhpphhtPhpphlsplluplhGts......ssstphthpshollGpslhatlu+pshhthhs...shsst.htplpslhpch..hphhLsul ..........................................h.R-hlltAh+sMlph...Lsp-....-T.lsl.....SK.FluREQ....L..SP..TuA.Yc..LlH-Q...VIsPLH...s+LsRLlAAaTGsD......A.sDo.ch.lLH.THALlGplLAFRLu..+ETILhRsGWs..saDc-cs-hIsp.sVssHl-llLpGL................................ 0 40 90 121 +9042 PF09210 DUF1957 Domain of unknown function (DUF1957) Sammut SJ anon pdb_1ufa Domain This domain is found in a set of hypothetical bacterial proteins. Its exact function has not, as yet, been defined. 25.00 25.00 28.40 27.10 22.80 19.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.27 0.72 -3.96 57 366 2009-09-10 21:40:42 2006-08-18 15:02:37 6 6 355 5 163 338 133 102.80 33 18.87 CHANGED .clccAscpMlclssphs...t.sstlpp..RsLsQAARELLLAQSSDWuFIhpssTss-YAtcRhccHlpRFhcLhptlp....pspl........s....pphLpplEttDslFPpIsa+..ha ...........................h.pstpphhchspt...........ssthcc...RsLsQhsRElLLspSSDWsFllsssoss.......-YAppRs+pHhppFpclhpslt....suc.h........-.......pchL.p.t.hcp.tDslFs.slDhRha...................... 0 67 120 148 +9043 PF09211 DUF1958 Domain of unknown function (DUF1958) Sammut SJ anon pdb_1tvf Domain Members of this functionally uncharacterised family are found in prokaryotic penicillin-binding protein 4. 23.40 23.40 23.70 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.14 0.72 -3.90 9 328 2009-01-15 18:05:59 2006-08-18 15:22:48 5 2 303 6 15 172 0 65.70 53 15.00 CHANGED YKKlLSKGppcIDGKKYplccDLYDVVPKspst...+lhlc-.GplplDhsRpalssphtss.pVpspc ....YhKlLSKGEQcIsGKKYhVcNDLYDVlPpchsp...aKlsVED.G+.V+sDYPRcFlNpchuPP.oVEspp....... 0 2 3 13 +9044 PF09212 CBM27 Carbohydrate binding module 27 Sammut SJ anon pdb_1oh4 Domain Members of this family are carbohydrate binding modules that bind to beta-1, 4-mannooligosaccharides, carob galactomannan, and konjac glucomannan, but not to cellulose (insoluble and soluble) or soluble birchwood xylan. They adopt a beta sandwich structure comprising 13 beta strands with a single, small alpha-helix and a single metal atom [1]. 19.50 19.50 21.70 20.10 19.20 17.00 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.07 0.71 -4.24 2 51 2009-01-15 18:05:59 2006-08-18 15:57:36 5 14 33 6 9 53 0 171.90 24 23.72 CHANGED hQ.s.A.El..pFShsp-hpNhassGTWQApFthPsIp...pststsLphNVsLPGpuDWEEV+V.lp.hspLs.sphlpaDlhlPcV-.lsGtLRPYhsLNPGWlKIGlD..psslsshphVohcsppY+hhHVplEFsthPsVNELalslVGs+LtYcGPIaIDNVpLaKK ...............................................t..........................................ht..t....t......sssuLclsls.hsss..s..s.W-Ehc.......l....h......p......th.....t.......c...l..sshptlca-.lalP..ps..p..t...h...s.Gslps.........h.us..l...s..s.GW.s.c....l....shsh.p.ph.s..lpshcp.l....pl..s.GppYtp..hpssh....p....h...s...p...s...t...p.hs...pL...hlplsGsphsY.sG..sIYlDNlcL...t... 2 5 7 9 +9045 PF09213 M3 M3 Sammut SJ anon pdb_1mkf Domain Members of this family of viral chemokine binding proteins adopt a structure consisting of two different beta-sandwich domains of partial topological similarity to immunoglobulin-like folds. They bind with the CC-chemokine MCP-1, acting as cytokine decoy receptors [1]. 25.00 25.00 104.10 104.00 20.80 19.20 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.24 0.70 -5.99 2 8 2009-01-15 18:05:59 2006-08-21 09:23:23 5 1 3 8 0 9 0 332.90 57 90.18 CHANGED SsVshpohshsp.cpt--hpsa......CLh.sphpsT.Cus.hccl.p+shapL.shCNVKsphhVsa.shcchG..hhpuRLPhPohuusssscll+VLVlAEus....pP.ccaaA.lth.T...shtLoD.NshFpoca.plW.lsls+p.VDlshhhtuhhhtt..usplTlhhsYssTFTWCGpl.uls-.shP.PShpAhpsl...Chs.hRY.sup.Fpc.DGCptEoth.p.ohlhPh...Gs.spphphNTCsCahKYs.lp.Lsshc+lhlhslush.uhhpPlYVhssYFsSoc.Ns.t.us.L.aCsl.hppts.Ghapo..pus.pCPh+hs.Gpsp.VL.s+hs..sh.plVGlolhh-GQpaRlpYhG SsVshpohshsp.cpt--hpsa......CLh.sphpsT.Cus.hccl.p+shapL.shCNVKsphhVsa.shcchG..hhpuRLPhPohuusssscll+VLVlAEus....pP.ccaaA.lth.T...shtLoDhNshFpoca.pIW.lsls+p.VDlshhhtuhhhtt..usplTlhhsYssTFTWCGpI.uls-.shP.PShpAhpsl...Chs.hRY.sup.Fpc.DGCptEosh.p.ohlhPh...Gs.spphphNTCsCahKYs.lp.LsAhc+lhlhslush.uhhpPlYVhssYFsSoc.Ns.tPusKLYHCALQMTSHD.GVWTS..TSSEQCPIRLVEGQScNVLQV+VAPTSMP+LVGVSLMLEGQQYRLEYFG 0 0 0 0 +9046 PF09214 Prd1-P2 Bacteriophage Prd1, adsorption protein P2 Sammut SJ anon pdb_1n7v Domain Members of this family form a set of bacteriophage adsorption proteins, composed mainly of beta-strands whose complicated topology forms an elongated seahorse-shaped molecule with a distinct head, containing a pseudo-beta propeller structure with approximate 6-fold symmetry, and tail. They are required for the attachment of the phage to the host conjugative DNA transfer complex. This is a poorly understood large transmembrane complex of unknown architecture, with at least 11 different proteins [1]. 25.00 25.00 1204.10 1203.90 17.00 16.70 hmmbuild -o /dev/null HMM SEED 560 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -13.00 0.70 -5.95 2 7 2009-01-15 18:05:59 2006-08-21 09:39:33 6 1 6 2 0 8 0 555.70 90 93.92 CHANGED AshsVPKLG.FPshAVhDIDNVP.DSSsTGSRWLPSlYpGuNYauGGPQtLpAphusFDSssRLPYNPRT-sNPAGNCAFuFNPFGQYISNISSAQSVHcRIYGID.NsEPLFoPNAASITNGGNPTMSQDhsYHNIGPINoAYKAEIFRPVNPLPMSDTsPDPETLEPGQs.PlIKoDGlYosSGIAuFIFD+PVTEPNPNWPPLPPP.IPIIYPhPALGIGAAAAYGFGYQVThYhWEplPVEFIADPtTCPApPTTDKVIIRTTsLNPEGoPCAY-suIhLVRQsuNPMNAVAGRLVP.V.DIsVDIFLTGKFFsLsPPhRlTNNYFAD-pVpE.TVThGNapsshuusYatVYpTDGhGhApsFIu.GGuGlSALlpLQDsoVlD.LaYSlPLSlGGStushcEWlANNsGhaPhShGhsKosLlEIPRRpLEAIpPQssPG..DlFhLD-SuuYASFSSFIGYspuAYYVAGAuTFMDVENPDpIIFlLRsGtGWYuC-IuDALhI.....uDsEaDSVDYFAapGGVMFIGSARYTEGGDPLPIKYRAlIPuLP ANFNVPKLGVFPVAAVFDIDNVPEDSSATGSRWLPSIYQGGNYWGGGPQALRApVSNFDSsNRLPYNPRT-sNPAGNCAFAFNPFGQYISNISSAQSVHRRIYGIDPNDEPLFTPNAASITNGGNPTMSQDTGYHNIGPINTAYKAEIFRPVNPLPMSDTAPDPETLEPGQs.PLIKSDGIYSsSGIAuFIFD+PVTEPNPNWPPLPPP.IPIIYPTPALGIGAAAAYGFGYQVTVY+WEEIPVEFIADP-TCPAQPTTDKVIIRTTDLNPEGSPCAYEAGIILVRQTSNPMNAVAGRLVPYV.DIAVDIFLTGKFFTLNPPLRITNNYFADDEVKENTVTIGNYTTTLSSAYYAVaKTDGYGGATCFIASGGAGISALVQLQDNSVLDVLYYSLPLSLGGSKAAIDEWVANNCGLFPMSGGLDKTTLLEIPRRQLEAIsPQDGPGQDDLFILDDSGAYASFSSFIGaPESAYYVAGAATFMDVENPDEIIFILRNGAGWYACEIGDALKI.....ADDEFDSVDYFAYRGGVMFIGSARYTEGGDPLPIKYRAIIPALP 0 0 0 0 +9047 PF09215 Phage-Gp8 Bacteriophage T4, Gp8 Sammut SJ anon pdb_1n7z Domain Members of this family of viral baseplate structural proteins adopt a structure consisting of a three-layer beta-sandwich with two finger-like loops containing an alpha-helix at the opposite sides of the sandwich. The two peripheral, five-stranded, antiparallel beta-sheets are stacked against the middle, four-stranded, antiparallel beta-sheet. Attachment of this family of proteins to the baseplate during assembly creates a binding site for subsequent attachment of Gp6 [1]. 25.00 25.00 86.40 25.90 19.70 24.30 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.86 0.70 -5.28 10 76 2009-01-15 18:05:59 2006-08-21 09:49:36 5 2 62 12 0 68 1391 281.80 34 83.52 CHANGED uIVT...sKFRTcshhNFYcolG...............Dsss+sTIYhoFGRscsWucNEs-stFAPPYPsDShcGlsDsWocMlGslKIspShLcsVVPR+DW...............................GDoshssPppFaluDIVVVNStPhNpT-.u.uGWMVYRClDlP-s........GtCSIsolssKsEClplGGcWTss.....tcSltsPpGpusu...I-sG.DGYlWEYLYTIPPDssINcCTNEaIVVPaP-ELhtDPsRWGY-ssls...........W.ssch-llYRhKssTlRF+AahDSlhFspsuhsGNpGFRQlSlIlNPL.hKucPsss-VKAsts.tYsspplphcSGpMI.........YMENRpPIh+ohDQTEElsIlFsF .....................................ullT...spFRh.phhpFhps.lt...............ss.spsplYhhhGRspsWsspp...........uPP...PsDshpthtshaspMhuhh+l.tS.httVl.RhDW.........................ucsthss...shphh.s-hhVsN...........usa.VY+Cl.ssss........s............s...Gt..Tsp.....hpS...sth.........lss...G.DGYhWcYlYpIPsssslphhoN-ahsV..........sphuhcssls...........a.pschchlah.p..ssoh.p......puhhDul...hspsuhsGstG..plSllssP..tpupsssssVcus...ts...sYshsclphcSGphl.........Yh-NRpsIhpuhDQsE-lpIlhpF.................................... 0 0 0 0 +9048 PF09216 Pfg27 Pfg27 Sammut SJ anon pdb_1n81 Domain Members of this family are essential for gametocytogenesis in Plasmodium falciparum. They contain a fold composed of two pseudo dyad-related repeats of the helix-turn-helix motif, serving as a platform for RNA and Src homology-3 (SH3) binding [1]. 25.00 25.00 119.70 118.90 22.30 19.90 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.29 0.71 -4.69 2 7 2009-01-15 18:05:59 2006-08-21 10:00:20 5 1 5 1 4 8 0 176.60 49 44.52 CHANGED Y+YEc..cpctsL.shIppls-lEFpusss..YLhhhlspD..KaN.uLpDp.uIh+.lpKsQNch..hhl..clpsshs.RIS-RLhsashDK-lTt.YlKKlcDhhhlEpcshcph.h.Vcpt+php-KKRlhNshc.I+hha-o...hp.lphscDph.sAhhRlSphlsDl........I.hLP .............................hI.plhplEFcssps..YLhhhls..pD.EKaN.uLcD+luIh+.lpKNQN+as.Fhl..clpDshs.RISDRLhsYChDK-lTEsYlKKlcDhhhlEpcVhEpl.h.V-Ht+ph+EKKRlhNDhcLI+hha-oh.hspslphTDDQacsAAhRlSpFl.Dl........I.......... 0 2 2 3 +9049 PF09217 EcoRII-N Restriction endonuclease EcoRII, N-terminal Sammut SJ anon pdb_1na6 Domain The N-terminal effector-binding domain of the Restriction Endonuclease EcoRII has a DNA recognition fold, allowing for binding to 5'-CCWGG sequences. It assumes a structure composed of an eight-stranded beta-sheet with the strands in the order of b2, b5, b4, b3, b7, b6, b1 and b8. They are mostly antiparallel to each other except that b3 is parallel to b7. Alternatively, it may also be viewed as consisting of two mini beta-sheets of four antiparallel beta-strands, sheet I from beta-strands b2, b5, b4, b3 and sheet II from strands b7, b6, b1, b8, folded into an open mixed beta-barrel with a novel topology. Sheet I has a simple Greek key motif while sheet II does not [1]. 25.00 25.00 34.10 33.60 24.40 23.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.92 0.71 -4.65 6 84 2012-10-02 12:51:43 2006-08-21 10:21:06 5 3 73 3 10 59 2 147.10 46 39.22 CHANGED chuscsahhahKRLoANDTGATGuHQuGhYIPpshsppLFPslN+s+-pNPolhlss+hpSHpssDSphRAIYYNs+hhs...tTRNEtRITpaGtuts.hhs.csTGALsllAF...ctstcsthscsWVCsos-EtDllEutlGpllPGu....lhss.uupI ..........sspshhlYlKRLSANDTGATGGHQsGlYIPpsh.s-c..LFPslsc..s+phN..Polhlps+hsoc.sss-.SphRhlYYNs+has...tTRNEtRITRaG+s.s.l.ss-NTGuLhlLA.....cts.....tcsp.hphaVhsos-E.Dlh.sthGpllPGs....Lh.s.ttpl.................................. 0 4 6 8 +9050 PF09218 DUF1959 Domain of unknown function (DUF1959) Sammut SJ anon pdb_1nxh Domain This domain is found in a set of uncharacterised Archaeal hypothetical proteins. Its function has not, as yet, been described. 21.10 21.10 21.70 21.40 21.00 20.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.54 0.71 -4.31 5 29 2009-09-10 17:31:46 2006-08-21 10:27:44 5 1 29 2 19 28 0 115.10 42 84.75 CHANGED KhNlIKuNRalMEDVIlPISKALKlslEEVIDIFscKLDhuSLYELHAYsEQA+MGCLGRKVDIDLGLCWlsDFFGLISK-DADLIRKKVVE-hIlcKKPYKEALEEGR+hllcLLK ............KhplIcs.Ra.hE-lllPlSKtLplsh-Elh-lFhcphDhuoL.plHAhhEpA+hsCLsc+lDhD.LuLCWlsDahsLIS+c-ADhIRcKVscphllpp+sYc-AL-EGRphllclLK..................... 0 4 9 15 +9052 PF09220 LA-virus_coat L-A virus, major coat protein Sammut SJ anon pdb_1m1c Domain Members of this family form the major coat protein of the Saccharomyces cerevisiae L-A virus [1]. 25.00 25.00 27.00 25.30 23.30 21.20 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.35 0.70 -6.03 2 18 2009-12-01 14:28:50 2006-08-21 10:41:59 5 2 13 2 5 23 0 311.60 31 49.63 CHANGED ML+FVsp.spctpssLap.ppscGThsshsRlRsDFKaDsLsFsRshssSQcaThVGpshsshsEspS.L-GlsKKYLTLDGuluhDNVhpEL+sosGh.uNhlAuHAYNlsuWRWYDNHVALLhNhLRhYhLpsLsEpuphSsGchPhYcDGHVhIcLssTl..pstsspasWPucRus-SYP.Ws.hoE.hPshDsPalDlRPLT.pEsphVLMMhucW+.pTNhtlDa.sPpLA-KhhYRat.slpshsEWl-u-tTsspah.PpS+VhhSALRKYVsHNpLYNQFYTAspllAQlMhpshPssAEGhsWLhHss.VplPKFGSlRGRYPFL.SG-AAhIQApALEDWuAlhAKPELlFTYuM.lussLNhGLhlRcsKtohhhsp.csSa-DshFLpPETFhtuAluhsTG.DAPLNGMuDVYVhYP-Llph .................................................php..hps...p+.h........p.hs.lG...s.hsps.s.L-Gltt.hhs.cGslshs.l..tLpp.shh.ts..s.p.ht.tth.ahcNhsuLLhNhLRhYhlt.ltp.tth.pssph.hYcsGHspl..s.tl...sts.pht...WP.....spt.ptths.hs.h....P...ssshl.hpshs.pcsthlL..hhupat..sshtlsa..PpLspphhhph...ht..s...........h..h..psthhhsAl+cYVstNplYspF.sAhtlluQlhhoshPssAEuhsWLhp.s.VslPpFuSlRGhYPhL.pG-uhhhptpAl--WsthhspP.phlFohu..Mhhuss...lphGLhhRpsphp.hhst...th-p.hhlpscohhtuhhuhsoG.phs..t.tshhh......................... 0 0 1 5 +9053 PF09221 Bacteriocin_IId Bacterioc_AS-48; Bacteriocin class IId cyclical uberolysin-like Sammut SJ, Coggill P, Eberhardt R anon pdb_1o82 Domain Members of this family are membrane-interacting peptides, produced by Firmicutes that display a broad anti-microbial spectrum against Gram-positive and Gram-negative bacteria. They adopt a helical structure, with four or five alpha helices forming a Saposin-like fold [2,5]. The structure has been found to be cyclical [1, 3, 5]. It should be pointed out that one reference [4] implies that both circularin A and gassericin A are class V or IIc-type bacteriocins; however we find that these two proteins fall into different Pfam families families, this one and BacteriocIIc_cy, Pfam:PF12173. 23.00 23.00 23.30 23.00 22.00 21.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.14 0.72 -4.06 17 111 2009-09-11 17:00:14 2006-08-21 10:51:36 5 1 108 12 12 62 0 63.50 51 69.18 CHANGED AutlGIususAtplVsllssuuolsolIulluulsuuGshu.......sulhAslKphlp+pGpttAssW .......AGTLGISTtAAsTVVNlIsAhSTVsulISIV.GAlTGsGuIu.......uGIsATVhhllKKpGtAtAAhW....... 0 5 8 9 +9054 PF09222 Fim-adh_lectin Fimbrial adhesin F17-AG, lectin domain Sammut SJ anon pdb_1o9w Domain Members of this family are carbohydrate-specific lectin domains found in bacterial fimbrial adhesins. They adopt a compact, elongated structure consisting of a beta-sandwich with two major sheets: one consisting of five long strands in mixed orientations, and a front sheet with four antiparallel strands, forming an immunoglobin-like fold [1]. 25.00 25.00 25.40 25.10 21.30 21.10 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.22 0.71 -4.97 2 22 2009-01-15 18:05:59 2006-08-21 11:05:39 5 1 9 15 0 29 0 167.20 50 47.14 CHANGED VSFIGSTENDVGPS.GSYSpTHAMDNLPFVYNTG.NIGYQNANVWRIStGFCVGLDGKVDLPVVGSLDGQSIYGLTEEVGLLIWMGDTNYSRGTAMSGNSWENVFSGWCVG.NhsSTQGLSV+VpPVILKRNSSApYSVQKTSIGSIRMRPYNGSSAGSVQTTVNFSLNPF ................................................................PS..uYSpsauhDNLPFha.N.sGhsI.......tYQsuNs.....a+h.....osG......FCssL-uKsDLPVlGoLDGQSIYsLT-E..lGlLIahGDTNYSRsoAhsGNSWpsVFSG.WCsu...hSoQGhSV+VhPVlLKps.uu.upYoV.+TpIGSIRhR.hssSphGph................................ 0 0 0 0 +9055 PF09223 YodA YodA lipocalin-like domain Sammut SJ, Bateman A anon pdb_1oej Domain Members of this family of prokaryotic domains have been identified as part of the response of bacteria to a challenge with the toxic heavy metal cadmium. They are able to bind to cadmium, and ensure its subsequent elimination [1]. 20.50 20.50 20.60 20.90 19.80 19.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.17 0.71 -4.63 33 1239 2012-10-03 08:47:39 2006-08-21 11:21:21 6 5 1174 5 75 565 3 177.80 56 49.23 CHANGED -pccplhpGYF-DspVKDRsLSDWpGcWQSVYPYLpcGTLDtVhsaKAccsc.chTApEYKsYYcpGYpTDV-pIsI..cssslTFhp.sspspospYpYsGacILTYcpGNRGVRalFcts-usu.st..P+YlQFSDHsIuPpKusHaHlYaGs-ppt.LLcEl-NWPTYYPupLouc-IscEMlAH ...................................-.s-ppstsGhF-DssVcD..RsLSDasGsWQSVYPaLpsGpLD.....VFchKAct.s.t..chThtEhKsYYcK.GYpT.DlppIsI..c.c.s..p.lEFhp.....s......s....p.....s....p..o.s.p.Y.cY.sG....hK.IL...TYppGp+.GV....R....a....L....FEs...p.Ds..s.u..tt..KY.lQF......SD.H.IA.P.p.K.u.pHFHIFhGs-SQpuLL...p.EM-N...W.PTYY.P.pLoupElspEMluH............................ 1 20 36 56 +9056 PF09224 DUF1961 Domain of unknown function (DUF1961) Sammut SJ anon pdb_1oq1 Domain Members of this family are found in a set of hypothetical bacterial proteins. Their exact function has not, as yet, been determined. 25.00 25.00 28.80 25.80 19.50 16.40 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.61 0.70 -5.25 3 41 2009-01-15 18:05:59 2006-08-21 11:26:34 6 1 38 4 21 39 9 214.70 46 84.29 CHANGED REG.uLLYpNPLuSPcDV+GWVMEGsGpluFcDGuLHLSs.hDsEclGD-AHFVFWCPETFPDGIlVoWDFhPlcEPGLCMlFFAAAGhsGEDLFDucLAcRTGpYPQYHSGDINALHLSYFRHKHA-ERAFRTCNLRKSRGFHLVApGADPLPPs-DAcsPYRMKLIKDGuYV+FSINGLPILEWTDDGcRaGPVLGuGKIGFRQMAPL+AAYRNFsV ......................................................thlYpNsLpSspDVtsWhhEG.su.ploh...sssthcLp..h...phsppu+FVaWCPEsFPDsI..hloW-FpPlc-..PGLsMLFFuA.AGhs..Gc.DlF.DsuLt.....RTGpYPpY..HSGDINsLHlSYFRR+as-ER.AF+TCNLRKStGF.HLVApGADPl.PsssD........Ap.u......sYRh.cllKDtstV+FuI.N......sLPlhpWpDDG.ss.hGPVLspG+IGFRQMAPhtAsY+sLpV.... 0 9 15 20 +9057 PF09225 Endonuc-PvuII Restriction endonuclease PvuII Sammut SJ anon pdb_3pvi Domain Members of this family are predominantly found in prokaryotic restriction endonuclease PvuII. They recognise the double-stranded DNA sequence 5'-CAGCTG-3' and cleave after G-3, resulting in specific double-stranded fragments with terminal 5'-phosphates [1]. 25.00 25.00 118.70 118.40 18.60 18.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.89 0.71 -4.53 3 14 2012-10-11 20:44:45 2006-08-21 11:42:23 5 2 14 21 3 19 2 144.30 63 82.21 CHANGED HsDhsKLlcLWPpIcEYQcLAsKHGINDIFQDNGGKLLQVLLILGLTVLPGREGNDAVDssGsEYELKSVNlELTKuFSTHHHMNPsIIAKYRQVPWlFAIY+NIsIcuIYRLcPDDLEsFYDKWERKWY-DGGKDINNPKIPVKYVMEYGclIW .......DhttL.tLaPplpcaQtLAp+aGINDIFQDNGGKLLQVLLlhuLp.llPGREGNDAVDssGsEaELKSVNl-LT.KSFSTHHHMNPsIIuKYRQVsWlFAlYpsIslpplYhLpPc-LEsFYsKWEcpWapcsGKDINNPKIPlKYVhEaGplla... 0 0 2 2 +9058 PF09226 Endonuc-HincII Restriction endonuclease HincII Sammut SJ anon pdb_1xhv Domain Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence 5'-GTYRAC-3' and cleave after Y-3. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates [1]. 19.30 19.30 20.10 312.90 18.70 19.20 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.84 0.70 -5.03 2 10 2012-10-11 20:44:45 2006-08-21 11:55:36 6 1 9 49 3 12 3 256.50 73 99.34 CHANGED SFIKPIYQDINSILIGQKVKRPKSGTLSGHAAGEPFEKLVYKFLKENLSDLTFKQYEYLNDLFMKpPAIIGHEARYKLFNSPTLLFLLSRGKAATENWSIENLFEEKQNDTADILLVKDQFYELLDVKpRNISKSAQAPNIISAYKLAQTCAKMIDNKEFDLFDINYLEVD.ELNGEDLVCVSTSFAELFKSEPSELYINWAAAMQIQFHVRDLDQGFNGTREEWAKSYLKHFVTQAEQRAISMIDKFVKPFKKYIL ........SFhKsIYp-INspLlGppVP+Pp...SGTLSGHAAGEPFEKLVYpFLK+pLsDhTFKQYEYLNDLahKNPslIGHEARhKLFNSPTLLFLLSRGKsATcKWSIEN.FEEKQNDTADILLVKDpFYELLDVKTRNISKSAQuPNIISAYKLAQTCAKMIDNcEaDLFDINYLElDW.ELss--.LlChSTaFAELFKSpPS-LYINWAAAMQIQFHVRDLDQtFsGTREEWA+uYLKHFVsQAcpRAssMIsKFVKPFcKYIl. 0 1 3 3 +9059 PF09227 DUF1962 Domain of unknown function (DUF1962) Sammut SJ anon pdb_1uoy Domain Members of this family of fungal domains are functionally uncharacterised [1]. 21.30 21.30 21.80 118.50 20.60 18.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.42 0.72 -3.93 3 8 2009-01-15 18:05:59 2006-08-21 12:10:45 5 1 7 1 4 7 0 65.20 70 72.20 CHANGED DTCGSGYGGDQRRTN......SPCQuuNGDRHFCGCDRTGVVECRGGKWTEIQDCGSSTCHGTNDGGAsC DTCGuGYGGDQRRTN......SPCQAuNGDRHFCGCDRTGVVECRGGKWTEIQDCtuSTCHGTNDGGApC 0 0 1 4 +9060 PF09228 Prok-TraM Prokaryotic Transcriptional repressor TraM Sammut SJ anon pdb_1us6 Domain Members of this family of transcriptional repressors adopt a T-shaped structure, with a core composed of two antiparallel alpha-helices. These proteins can be divided into two parts, a 'globular head' and an 'elongated tail', and they negatively regulate conjugation and the expression of tra genes by antagonising traR/AAI-dependent activation [1]. 21.40 21.40 22.00 22.60 20.90 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.17 0.72 -3.87 14 32 2009-09-10 15:54:28 2006-08-21 13:14:02 5 1 24 12 10 38 1 100.10 36 95.07 CHANGED Mphtsospss......sEtcshtuhhsuhpcu-LEsLsluAIR-HR+Llsts-slap-h..ussDspsusushpshptEYlptphcpcAQQptLosll-hLGalPcV ................Mp..sus.ss.....tsEh+shhuhhpulspu-LEsLolsAIRpHRpLltpA-pla.pt...hs-c.psupushts.phcYlctphchcAQ.ptlssllshLGalPcV........... 0 1 4 7 +9061 PF09229 Aha1_N Activator of Hsp90 ATPase, N-terminal Sammut SJ anon pdb_1usu Domain Members of this family, which are predominantly found in the protein 'Activator of Hsp90 ATPase' adopt a secondary structure consisting of an N-terminal alpha-helix leading into a four-stranded meandering antiparallel beta-sheet, followed by a C-terminal alpha-helix. The two helices are packed together, with the beta-sheet curving around them. They bind to the molecular chaperone HSP82 and stimulate its ATPase activity [1]. 22.50 22.50 23.00 23.00 21.30 20.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.58 0.71 -4.34 69 522 2009-01-15 18:05:59 2006-08-21 13:38:25 6 16 302 7 363 496 9 134.10 25 44.42 CHANGED -KsstsWu+phlcphlss.....lp..h.psssh............................php...lsclsp..lcG.-usVspRK.GKlIsha-hclphpapGp....................tspst................hpGplplP-lup-.s..-t--hph.pl............shp.scssptp.....hcsll+pphhsp..l+ptltpahpcLhtp...up .................................................-KssssWu+phlcp.h.Lhs.......lp.....s..p.ssss............................................psclsc..lpp.l-G.-us.lspRK.G..KlIhha-.hp.lpLpapGp......................................................................spss................hpGplplPpluc-.s...c.--h...ph...pl...................................sht...ps.p..s.ptp.........lcshh.+.pp...h..h...sp...l+ptltpahptLhtpa.................................................................... 0 116 202 295 +9062 PF09230 DFF40 DNA fragmentation factor 40 kDa Sammut SJ anon pdb_1v0d Domain Members of this family of eukaryotic apoptotic proteins induce DNA fragmentation and chromatin condensation during apoptosis [1]. 25.00 25.00 52.10 36.10 23.10 23.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.69 0.70 -4.99 9 108 2009-01-15 18:05:59 2006-08-21 14:03:21 5 3 71 1 54 108 0 223.70 45 71.91 CHANGED tllptAcpLloD-cuPcpp+lLushlpslp-phptEsR--DscWFEGl.-spF+oKpshM+hsspSRIRGYhpcscshhops....tsspA+c.hpchL-thpppLpuscY.ushFDRu.tctt.......................................................RLCTs-GWFoCQGsFDps..sCsttHoINPYusREuRILFSTWNLDHhIEK+RoVlPulscAlcc.h...............cG+clshtYFYpLLFThpNLKLVHIsCHcKssHcLp......CDps+hY+ ..........h...pthcphls-.pt.ppp+lLssh.........ppphthpp+sEcspWFcGh..EpR.F+oKpthhphpspoRlRuYhhcspp.hops.......sscA+p.h.pllpthp.hLpuscY.u.aFDRs.tptt.......................................................................................spLCospG.FpCQGsasts..sC.....p..p..HoINPYusREshILFpTWNL.DHhIEhpRTllPsLst.slcc.................pst-lsh..aa..LFThcNLKLVHIsCHcKspHp.t......Cs.pc.Yp......... 0 11 15 32 +9063 PF09231 RDV-p3 Rice dwarf virus p3 Sammut SJ anon pdb_1uf2 Domain Members of this family are core structural proteins found in the double-stranded RNA virus Phytoreovirus. They are large proteins without apparent domain division, with a number of all-alpha regions and one all beta domain near the C-terminal end [1]. 25.00 25.00 1167.80 1167.60 18.80 18.40 hmmbuild -o /dev/null HMM SEED 965 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.57 0.70 -7.14 2 12 2009-09-10 17:20:48 2006-08-21 14:21:20 5 1 4 2 0 14 0 964.70 62 94.51 CHANGED pSVVSRsPlPlShhslS-lpKLFDI.sIppGSohpIV-cPQsoFhlphtcshascahCl-H.sAaEPuLhhHRl+hlaShLscYsuphISEVPh.sshlsph.VpslshsKhsDRNMsshAE+L.h-.-VhsApp-.hh.Y.RpSsstsPlTFtDDL..sVRERssLY+RY.VPap.lELuLashA.phLslQYCHPhlVYpYLpsRAPsFLhlsDQluLchhSAG.GpLhPRPVhplLDYsLVY.SPLALNNLuShLhp+Iphplhhp.lstVppsLuElVssSSsVoNsASuslspMNVtGVpTlAsFIhpShLNPNISYuhlutLsLDsFssFIYGsCLhLhQAlhPPSAlsAhpRlcINNthAYFLl+hhssQsshspll.NplI.slssh.pWpSscRDlLsAIYsNLhsu-hhl.NLlppYaRtsssppsspl.lPAcpTSYGhNETRulShPYLFGssIs.htPDsRLssYKpcLsLPsRSPILIsss.tpNslslp.lphKhshIhshY..NsFVppPstWlRNusSNoALLu+FhDsssNlhGIhEslLuNsYuNAVNsYCDSVYRAslPhpWKh+.slDPpDhhFslFGlsPpY.lhs-ulPDFFAGuEDILILQLlRAla-shSN+hGpsPschF+h--V.KslpEhVSlllppKlDsp+YFT-sMRSsoFSK.tW-pFltR.VupcLPsL.psIhsQs-pl.NYMsQhhtIhPIsDpFYlV+NSGhssRGSssPlhAuool..N.lpsshhItDapthstLhhppcpVD.so.ps.hps.F.tLppIuSsEFVRSs...pucshFT-..l-AI+VNMhs+a-LphhpEpGtaSKPsplpKlMapDhhSFlcSphuchhPPlhTlPIsIhLNsLGEssSsphRMRS.tlDEYFpsasGAQlllPlshVshchhsphp-LpshFsGsVslp.cPa.lh-shcssYl.hGsHtVhlDP ..SsVSRVPLPVSFluhpDVc+lFcIIPl+hGSTohIlDpPpVsFllhYs-sIYDDFMpIcplsAFEPSlTMHRVcslFSlhpKYCsSMlstVPThSohVu-lPVKuVTMScFGDRsMDpLApaLspEhElluAQtpNRthYVRouA.-VPhsFGDDLssAshERtN.YHpYpVPFHslELALYpLAs-LLchQYCHPTVshcaLpcRAPPFLsV-DpVu.+MhpAGsGsLMPRPVMELLDYoLVYpSPLALspLAoRL.SKISl+L+MRMVTEVQpolS-hluVSSosSs.uoSuItshNlhGVEsLslahARSlLNPNhuYA.IScLTh.AFpDFlYGoCLLLlQAhlPPSAIhApsRlhINNRLAYFLIRYIAh.ATYsRLssNpVlPphhNpDcWQ.sshDhLVAIYoNLLsGEtRLsplIphYFRGpsP.tVstIsIPAsQTuYtIsEppuISAPYLFGAPINtMAPDsRL.-aKpsLNLPPRSPIL.TNlEGsNVISLpNLhsKsDlIpAlY.LsGFs+.oPuhWIRNAu.NTAhLoKhlssVSNLouIYEAVLANTYANAlNVYCDo.Y+s-IPLNWKl.hoIcPKDhLFuVFGllPhYQL.sEAVPDFFAGSEDILILQLIpAVYchLup+LGssPTpaFHLEEVhpslScIVSILsppplDV+hYFTDS.+ShTFsKPhWDRFIRRs.ttpLPPLYclIhpQlssVYshhptMpcllPIsDaFYIsRNsGaVARGSopsIlAATSlYpNQhsVpppIssFo-AssLRLptRRVDNsShTosL-DMFYsLSSISSsEFVRSstRGcop.+hss..VssIKVNMRARYsLpIhTE-Gsh.+.PclKKhMaSDFlsFLhpHpp-P+sPllpIPITIGLNNlGtTTSTslR.cScsVDEYFKuYlGAQVlIPhDslslEplGSFsELRNhhossVVsRDKsW-IWssV-AoYVPIGNHsVpLDP 1 0 0 0 +9064 PF09232 Caenor_Her-1 Caenorhabditis elegans Her-1 Sammut SJ anon pdb_1szh Domain Her-1 adopts an all-helical structure with two subdomains: residues 19-80 comprise a left-handed three-helix bundle with an overhand connection between the second and third helices, whilst residues 81-164 comprise a left-handed anti-parallel four-helix bundle in which the first helix consists of four consecutive turns of 3-10-helix. Fourteen Cys are conserved in all known HER-1 sequences and form seven disulfide bonds. The protein dictates male development in Caenorhabditis elegans, probably by playing a direct role in cell signaling during C. elegans sex determination. It also inhibits the function of tra-2a [1]. 19.80 19.80 45.60 45.40 19.20 18.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.05 0.71 -4.10 2 15 2009-01-15 18:05:59 2006-08-21 14:35:42 5 1 8 2 13 10 2 114.70 37 68.76 CHANGED CCo.phhECCh-.lpFupPl+Cs.thcLth.h.Vh.ChQpELau.E.p.hhNLsDoVCCsVFupD.ND.pchC.o.ChTsMQ.PuLcsspKLp+IK-Cp.ppNsLYpCFs+C..hh+pchc.EshcFpp.Cs .CCstpthcCCh-uIcFsh.lpCs.thphtt..hpshpClQpELauccs....hhslschsCCsVFtsD.sDspthChptChpsMpuPSlcussKLppI+pCp...hsNsLapCFppCpthhc.pt.c.Eshphpp.Cs................. 0 8 9 13 +9065 PF09233 Endonuc-EcoRV Restriction endonuclease EcoRV Sammut SJ anon pdb_1sx5 Domain Members of this family of prokaryotic restriction endonucleases recognise the double-stranded sequence 5'-GATATC-3' and cleave after T-3. They catalyse the endonucleolytic cleavage of DNA to give specific double-stranded fragments with terminal 5'-phosphates [1]. 25.00 25.00 35.50 34.70 21.70 21.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.62 0.70 -5.02 3 40 2012-10-11 20:44:45 2006-08-21 14:45:55 6 1 39 61 7 49 8 236.60 41 89.70 CHANGED SsLlscIpDsNppWcVKGFIDu-s+IYoLSsDTKVISKILEIalFPhIpcFAccHcFpVlLPKcQNaYPDlSFlc.K-SscKIAIDIKTTYRNccNsK..sGFTLGuaTuYFRNRpSsKNIsFPYc-YluHalLGlVYTRVssRlDELKpYsIsELs+I.SVIKslclFLQEKYKIAoDpuGSGNTsNIGSI+.+h-Dll-GKGIFuchGE-lFDDYWhNYpphsh.psSQL+scsYsNISEY..apYR.G+ ..........ht....t.lssptscWpl+GFIDhpKsIYTIouDTKllSKIlElhlFPt.lppFAccpGaclhhsppQNaYPDloFhs.....pssshKaAlDIKTTYR...pscs.s.....sGFTLGSasuYFRNRpusKNIpaPYscY.uHasLGllYoRs.s.p.p.h.-.Ep.chhsl-cLcpIsSVl+shpFFlpEKWKIASD+sGSGNTsNIGSIp..pIccllpGpGhFssL....GE-..lFD-YWh.N.a.shh.h...ptphphp.hssLtpa..hta....h............. 0 1 5 6 +9066 PF09234 DUF1963 Domain of unknown function (DUF1963) Sammut SJ anon pdb_1pv5 Domain This domain is found in a set of hypothetical bacterial proteins. Its exact function has not, as yet, been described. 21.80 21.80 21.80 22.30 21.30 21.60 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.82 0.70 -4.11 34 835 2009-01-15 18:05:59 2006-08-21 15:34:19 5 12 604 1 134 636 11 217.00 27 78.00 CHANGED SKhGGp.PalPps...paPp..............s.ppG.........p.PhhhLAQlNhs-ls.....p.lpsa..P.pp...GlLpFFls..p..........psppps..shcVhah-phtppp.....ppl.p-hshhp.h.........h.tthpl...p.p..........sl..pshphpp.ht..h.p...........cpppchhcchh-.h...........tps...p+..lGGYP.asQ............................ps.+pttc...........................pphhLLhQ..lDo-......tphshhaGDs.Ghh.FaIpcpDLtshcFsplhhsasC ......................................................................................................SKhGGh.PhLPtt...hcaPh......................................................................s..pps......................PhhhlAQl.Nhs.-ls..............t...p.a......P...pp.......GlLp.Falstps.................p...s.spt....shRll...Yhpp.hpph.h..........ppl.schp.hstt...........................httthplth..p.t.......................psl...p.s.hchpp.h.......ch.......................tt.pphhpt.l.hpph..................pt...hpp......lGGYPhasQ..............................................................-scp.pp..........................ttt.h...l.LhQ....lDSs.............sshshhWGDh...G.hstah.IpppDLtshcFsphhhshp.............................................................................................................................................. 1 52 91 117 +9067 PF09235 Ste50p-SAM Ste50p, sterile alpha motif Sammut SJ anon pdb_1uqv Domain The fungal Ste50p SAM domain consists of five helices, which form a compact, globular fold. It is required for mediation of homodimerisation and heterodimerisation (and in some cases oligomerisation) of the protein [1]. 20.90 20.90 20.90 21.10 20.80 20.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.55 0.72 -3.95 3 28 2012-10-02 20:42:54 2006-08-21 16:06:02 5 2 27 2 16 31 2 73.20 54 22.58 CHANGED -sFscWSTDEVlpWCtosLGl-EsDPLhpR.IRENcIsGSlLsELTLQDCKELCDsDLscAIKLKlhINKhlDScL .........-DFopWSVDDVloWClSoLEl-E.s..DP.LCp+....LRENDIsGDLLPELsLpDCp-LCD.uDLs+AIKFKILINKhpDoc................. 0 1 6 13 +9068 PF09236 AHSP Alpha-haemoglobin stabilising protein Sammut SJ anon pdb_1w09 Domain Alpha-haemoglobin stabilising protein (AHSP) acts a molecular chaperone for free alpha-haemoglobin, preventing the harmful aggregation of alpha-haemoglobin during normal erythroid cell development: it specifically protects free alpha-haemoglobin from precipitation. AHSP adopts a helical secondary structure consisting of an elongated antiparallel three alpha-helix bundle [1]. 20.50 20.50 21.10 20.70 19.30 20.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.94 0.72 -3.78 3 32 2009-01-15 18:05:59 2006-08-21 16:22:21 5 1 27 10 16 34 0 85.80 62 82.00 CHANGED QoNKDLISsGIKEFNVLLNQQVFsDPLISEEDMVTVVcDWVNFYINYYKKQVoGEQ-EQDKALQEFRQELNTLuusFLAKYRsFLKS+E ................puNKDLISsGhKEF....slLLNQ.........QVFsDPl.lSEEcMVTVVcDWhNFYINYY+pploGEpQEpD+ALQELpQ..ELNTLusPFLsKYRsFLKSp.... 0 1 1 3 +9069 PF09237 GAGA GAGA factor Sammut SJ anon pdb_1yuj Domain Members of this family bind to a 5'-GAGAG-3' DNA consensus binding site, and contain a Cys2-His2 zinc finger core as well as an N-terminal extension containing two highly basic regions. The zinc finger core binds in the DNA major groove and recognises the first three GAG bases of the consensus in a manner similar to that seen in other classical zinc finger-DNA complexes. The second basic region forms a helix that interacts in the major groove recognising the last G of the consensus, while the first basic region wraps around the DNA in the minor groove and recognises the A in the fourth position of the consensus sequence [1]. 20.20 20.20 20.20 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.75 0.72 -4.37 3 58 2012-10-03 11:22:52 2006-08-21 16:35:59 6 13 44 2 38 74 1 48.30 47 10.02 CHANGED stsshhc+hsGllKIKSKSQSEQPATCPICQAVIRQSRNLRRHLELRHFKKPGV ...........................................+s+upS.-pPuTCPlCtAllRQSRNLRRH....LElpHh.................... 0 7 11 31 +9070 PF09238 IL4Ra_N Interleukin-4 receptor alpha chain, N-terminal Sammut SJ anon pdb_1iar Domain Members of this family are related in overall topology to fibronectin type III modules and fold into a sandwich comprising seven antiparallel beta sheets arranged in a three-strand and a four-strand beta-pleated sheet. They are required for binding of interleukin-4 to the receptor alpha chain, which is a crucial event for the generation of a Th2-dominated early immune response [1]. 22.90 22.90 23.70 26.10 22.50 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.32 0.72 -3.88 7 71 2009-01-15 18:05:59 2006-08-22 09:17:01 5 2 37 4 27 64 0 90.20 45 18.27 CHANGED +VLp-PsCFSDYIpsSTCEWchsussNCSupLpLsYpL..F.hSENh.TClPENpu...uoVCVC+Mhh-p.VpsDsYpL-LWAspp.LWpuSFKPSppV ....chlp-.sCaSDYhut.TCcWKhsussNCSs.-LpLhYpL..a.......s...csh.oCVPENst......uus......ClC+lhh-shVssD.YpLcLauGppl...LWp.u.SFpPuppV............................. 0 1 6 10 +9071 PF09239 Topo-VIb_trans Topoisomerase VI B subunit, transducer Sammut SJ anon pdb_1mu5 Domain Members of this family adopt a structure consisting of a four-stranded beta-sheet backed by three alpha-helices, the last of which is over 50 amino acids long and extends from the body of the protein by several turns. This domain has been proposed to mediate intersubunit communication by structurally transducing signals from the ATP binding and hydrolysis domains to the DNA binding and cleavage domains of the gyrase holoenzyme [1]. 20.50 20.50 21.20 28.80 20.30 20.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.93 0.71 -4.70 36 213 2012-10-03 01:04:38 2006-08-22 09:42:12 6 7 190 20 129 216 138 155.40 40 26.23 CHANGED Ps.sssLSPIGE-hIcpGLcc....hps-Fluu.lTRpPpsYuGp.PFlVEsGIAYGGcl...sps..psplhRFANRlPLLYcpGuCslTc.slc..slsW+pYslcpst......uPlllhVHlsSTpVPapSsuK-uIA....-lPEIccEl+lAlpcsuRcL+paLs++c+tcctcc+ ........................PssssLSPIG--.lctGlcc....hp.P-.Flss.hT........RpPpsYpGp.PFlVEsGlAYGGcl.....................spp........phplhRFANRlPLLa-pGusslTc.s...lc..plsW+p.Ytlcpst......uPlslhVHlsSTplPapStuK-uIA....clsEItcEl+hAlpcsuRcLcpals++cptpctpc+............................. 0 41 82 109 +9072 PF09240 IL6Ra-bind Interleukin-6 receptor alpha chain, binding Sammut SJ anon pdb_1n26 Domain Members of this family adopt a structure consisting of an immunoglobulin-like beta-sandwich, with seven strands in two beta-sheets, in a Greek-key topology. They are required for binding to the cytokine Interleukin-6 [1]. 23.10 23.10 23.20 23.10 23.00 23.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.63 0.72 -3.83 23 523 2012-10-03 16:25:20 2006-08-22 09:59:09 5 17 48 40 212 691 0 94.90 22 19.27 CHANGED ssp..sLpChh+s...........hphlpCoWpsGtsssssspYsLaahapshppstp.....Cppahpsp.t.........phuCphsh.ph..sthp......pahlpVsuoSpsuslpst.pshphpsl ....................sp.slsChhps............hphhpCoWpsGp..s..ss...s..T.pYsLahph.ps.....p..pt.pp.......Cpph...hpsp...t............phuCphsh.p..h..p..ht......phhltlpspsttt.lps...hh...................................................... 1 12 21 54 +9073 PF09241 Herp-Cyclin Herpesviridae viral cyclin Sammut SJ anon pdb_1bu2 Domain Members of this family of viral cyclins adopt a helical structure consisting of five alpha-helices, with one helix surrounded by the others. They specifically activate CDK6 of host cells to a very high degree [1]. 25.00 25.00 217.10 216.80 23.60 18.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.52 0.72 -3.75 3 3 2009-01-15 18:05:59 2006-08-22 10:12:15 5 1 3 5 0 5 0 106.00 81 41.30 CHANGED AVLATDFLIPLCNALKIPEDLWPQLYEAASTTICKALIQPNIALLSPGLICAGGLLTTIETDNTNCRPWTCYLEDLSSILNFSTNTVRTVKDQVSEAFSLYDLEIL AVLATDFLIPLCNALKIPEDLWPQLYEAASTTICKALIQPNIALLSPGLICAGGLLTTIETDNTNCRPWTCYLEDLSSILNFSTNTVRTVKDQVSEAFSLYDLEIL 0 0 0 0 +9074 PF09242 FCSD-flav_bind Flavocytochrome c sulphide dehydrogenase, flavin-binding Sammut SJ anon pdb_1fcd Domain Members of this family adopt a structure consisting of a beta(3,4)-alpha(3) core, and an alpha+beta sandwich. They are required for binding to flavin, and subsequent electron transfer [1]. 21.50 21.50 22.20 21.50 20.20 21.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.37 0.72 -4.02 53 230 2009-09-11 08:59:02 2006-08-22 10:22:50 6 6 178 2 112 249 205 70.00 30 16.54 CHANGED Pss.shhsNTCYSllu..PchuloVAuVYchsssp..lhslpG.......loshssssshpppEAphAhuWYpsIopDhaG ..........shhsNTCYShlu..scpulpVuulYchsssp.....lsplsG.....hhpsss...tsss.hp.ptcuphAhuWhpsIhsDhaG......... 0 28 69 89 +9075 PF09243 Rsm22 Mitochondrial small ribosomal subunit Rsm22 Mistry J, Wood V anon Pfam-B_8789 (release 20.0) Family Rsm22 has been identified as a mitochondrial small ribosomal subunit [1] and is a methyltransferase. In Schizosaccharomyces pombe, Rsm22 is tandemly fused to Cox11 (a factor required for copper insertion into cytochrome oxidase) and the two proteins are proteolytically cleaved after import into the mitochondria [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.89 0.70 -5.37 7 679 2012-10-10 17:06:42 2006-08-22 11:14:57 5 14 492 0 408 777 58 228.70 20 53.83 CHANGED chsshAYhAtRhPuTYAAV+uuLcuhApusPpFsPtShLDlGuGsGsuhWAssphWs.chppshhl-tStshhslGpplAtcssthppshhttshhhthlph..s.....DLVTluYVLsELss.spRptll-pLWstssp..hlVIVEsGTPsGapRll-ARctL....IAtGh+lsAPCPHshsCPlsss....DWCHFStRVu..RSplHRhsK....suplsaEDEKFsY..lAAsRtssuss...............ssRVltPPpltuG+VllcLCppDtphpcplsTK.R..............cG.tha+tARcucWGDta. ....................................................................................................................................t.....ah.......a...t.h.t.h.th....................................t......h..................s.t........p...h.h.DhGsG..........u.s...s...h.....h...........sh.......t....t....h..........h......t....................h...................p.................h............h.......h...l...-..........s........h........t.....hu.t..l..h....t....t.........................................................................................h....t..........................................Dll.hhsas.L....c......l...........................t.............t......c.........t.......t........h......lpph..h..t..h..h.t............hlll...lE....Gs.t..G.aphl.h..ts.Rphl................................tt.....h.hlhAPCsp..t..............C.....P.h......................................C.p..F....t..h..........................................................pt.....t..asa....l....h....h..p....................................................................................hsRlh...s.h.tttth.hphC......t...................tt........h...tt...hst..p...........................................t.......a.....h+....Gs................................................................................................................. 0 139 251 353 +9076 PF09244 DUF1964 Domain of unknown function (DUF1964) Sammut SJ anon pdb_1r7a Domain Members of this family of bacterial domains adopt a beta-sandwich fold, with Greek-key topology. They are C-terminal to the catalytic sucrose phosphorylase beta/alpha barrel domain, and are functionally uncharacterised [1]. 21.80 21.80 24.00 73.30 21.70 16.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.40 0.72 -3.64 3 41 2009-01-15 18:05:59 2006-08-22 11:24:14 5 1 40 6 7 34 4 68.70 68 13.57 CHANGED AFDG-FSYoVDDDTSIoFoWsGt..TSpATLTFEPuRGLGVDNsTPVAoLsWoDSAGDHRTDDLIANPPV AFDG-FSYpsDGDTSIoFpWpus..sooAsLTFEPG+GLGsDNsssVAoLsWoDuAGDHcTDDLlANPPl. 0 1 2 3 +9077 PF09245 MA-Mit Mycoplasma arthritidis-derived mitogen Sammut SJ anon pdb_1r5i Domain Mycoplasma arthritidis-derived mitogen (MA-Mit) adopts a completely alpha-helical structure consisting of ten alpha helices. It is a superantigen that can activate large fractions of T cells bearing particular TCR V-beta elements. Two MA-Mit molecules form an asymmetric dimer and cross-link two MHC antigens to form a dimerised MA-Mit-MHC complex [1]. 25.00 25.00 62.90 62.70 20.20 19.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.65 0.70 -5.06 2 5 2009-09-11 16:59:48 2006-08-22 11:50:13 5 1 2 8 2 9 0 165.60 79 87.34 CHANGED SMKLRVENPKKAQKHFVQNLNNVVFTNKELEDIYNLSNKEETKEVLKLFKLKVNQFYRHAFGIVNDYNGLLEYKEIFNMMFLKLSVVFDTQRKEANNVEQIKRNIAILDEIMAKADNDLSYFISQNKNFQELWDKAVKLTKEMKIKLKGQKL.............................................................D SMKLRVENPKKAQKHFVQNLNNVVFTNKELEDIYNLSNK.EETKEVLKLFKLKVNQFYRHAFGIVNDYNGLLEYKEIFNMMFLKLSVVFDTQRKEANNVEQIKRNIAILDEIMAKADNDLSYFISQNKNFQELWDKAVKLTKEMKIKLKGQKLDhhDs.sslNpVcchFGuDcsVKplhWF+SLLl+us.hlh+YY-us..hpspoDFtKAlFE.. 0 0 2 2 +9078 PF09246 PHAT PHAT Sammut SJ anon pdb_1oxj Domain The PHAT (pseudo-HEAT analogous topology) domain assumes a structure consisting of a layer of three parallel helices packed against a layer of two antiparallel helices, into a cylindrical shaped five-helix bundle. It is found in the RNA-binding protein Smaug, where it is essential for high-affinity RNA binding [1]. 21.50 21.50 21.60 23.90 21.40 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.48 0.72 -4.44 4 24 2012-10-11 20:01:01 2006-08-22 12:56:51 5 1 23 1 19 32 0 110.20 45 11.80 CHANGED RhslL.RlEpDLl..uGQ.hpLSTslEELTNIVLTPMKPlt.stP..EsIutpFlKVlDLVushl.tcPhC.sQD-EshsVFhWIL-RulHN-AFhsHusQLK-hKaKlSKl ................................RhphL.+lEp-Lh...........sGp.hp....LusslEELTNIVLTPMKPl.......ps.ssP....EsIuhpFlKVlDlVs..stLtt-Phs.sQDDEsLsVahWIL-RulHN-AFhsHssQLK-hKaKlsKh............. 0 4 5 16 +9079 PF09247 TBP-binding TATA box-binding protein binding Sammut SJ anon pdb_1tba Domain Members of this family adopt a structure consisting of three alpha helices and a beta-hairpin. They bind to TATA box-binding protein (TBP), inhibiting TBP interaction with the TATA element, thereby resulting in shutting down of gene transcription [1]. 25.00 25.00 25.20 25.20 23.90 24.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.95 0.72 -3.76 8 130 2009-01-15 18:05:59 2006-08-22 13:12:45 6 18 84 1 77 139 0 63.20 49 3.73 CHANGED SDuDs-p-s....shsLsGFLFGNIDpsGcL-sDsh........LDcEuKcHLuuLuchGLuShLsEl........hssc-t ...........sp-p-t-p.........shs.LsGFLFGNIspsGpLEsDsl........LDs.......EsK+HLAuLu.s.L.GLGSLloEl........sus--.................... 0 21 33 53 +9080 PF09248 DUF1965 Domain of unknown function (DUF1965) Sammut SJ anon pdb_1n9e Domain Members of this family of fungal domains adopt a structure that consists of an alpha/beta motif. Their exact function has not, as yet, been determined [1]. 29.20 29.20 31.50 30.20 28.40 29.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.44 0.72 -4.07 7 53 2009-01-15 18:05:59 2006-08-22 13:32:51 5 7 42 7 41 57 0 73.70 35 9.63 CHANGED FDspoLlPhGLaFpSDlTGRDPSpWpL.GWlYNshFYpTTEEFRpAaaSssFsKhtPNl-...GsWutTDppGsl.P ..........sLlPlGLahthDlTGRDPSp..WplhGalYsspFYpTs-cFRpAahsssF.p+.h.ssshs....GsWutpsppGp.......................... 0 16 26 36 +9081 PF09249 tRNA_NucTransf2 tRNA nucleotidyltransferase, second domain Sammut SJ anon pdb_1r89 Domain Members of this family adopt a structure consisting of a five helical bundle core. They are predominantly found in Archaeal tRNA nucleotidyltransferase, following the catalytic nucleotidyltransferase domain [1]. 21.90 21.90 21.90 22.10 21.60 21.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.60 0.71 -4.20 41 166 2009-09-11 09:45:38 2006-08-22 13:58:42 6 5 161 44 104 174 43 112.60 42 25.93 CHANGED D-VRLLKpFhKulGVYGuEl+spGFSGYLsELLllpYGuFpslLcuAu.pW+s.shhI-hp.......p.hcpF............ccPLlVlDPVDPpRNVAAAlShcshupFlhtuRtFL.cpPShsaFhs ...................--VRLLKpFhKulGlYGuEl+spGFSGYLsELLllcYGuFpslLcsAu.pW+..shhI-hp............t..p..ppF...............ccPLlVlDPVDPpRNVAAulShcshupFhhtu+paLc.pPuhpaFh..................... 0 26 61 84 +9082 PF09250 Prim-Pol Bifunctional DNA primase/polymerase, N-terminal Sammut SJ anon pdb_1ro2 Domain Members of this family adopt a structure consisting of a core of antiparallel beta sheets. They are found in various bacterial hypothetical proteins, and have been shown to harbour both primase and polymerase activities [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.83 0.71 -4.26 98 956 2009-09-11 10:28:24 2006-08-22 14:21:46 6 32 635 4 284 971 283 175.10 18 37.07 CHANGED Ahthh.pp.GhsVhPl.....................sssKpPh................................hpsappss.p.....c.pplpp..hap......t.......................................shslulhs.............ssllllDlDsps...............................slpthtt..........thlss.....sh......sspT....sps.................GtHhaaphsss..............htt......................t..lD....l.tsss.sa............l..ls.......s.......PShp....sss....Yp......hh.....................................hsshP...........thL ............................................................hh.tt.Ghs.lh..Ph................................s.ssKtPh.....................................................hpshp.pso..s..............s.p.plpp...ahp...ph.....................................ssss.lulhs......................t.sshhllDlDsts..........................s.t...................................slpphtp...........h.hthlss.......sh..sstTsss......................................Gt.Hha.ap..h..stsh........h.pphhhh...........................................su...l-....l..p....u.p...s...sa............l..ls..s..PSht.............sts.....Yp.........h.tt............................................................................................................... 0 86 198 257 +9083 PF09251 PhageP22-tail Salmonella phage P22 tail-spike Sammut SJ anon pdb_1tyv Domain Members of this family of viral domains adopt a structure consisting of a single-stranded right-handed beta-helix, which in turn is made of parallel beta-strands and short turns. They are required for recognition of the 0-antigenic repeating units of the cell surface, and for subsequent infection of the bacterial cell [1]. 25.00 25.00 127.50 127.30 18.20 17.40 hmmbuild -o /dev/null HMM SEED 549 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -12.76 0.70 -6.24 6 48 2012-10-02 14:50:22 2006-08-22 14:45:56 5 2 44 23 0 51 0 509.60 79 82.26 CHANGED phEADKKFKYSVKLSDasTLQshAsAAVDuLLIDlDYpFoNGETVDFGGKsLTIDCKAKFIGDGNLlFTpLGpGSlVtuPFMESsTTPWVIhPWT-DspWITDAAAVVATLKQSKT-GYQPTVNDYVKFPGIEoLLPspAKsQsIsSTL-IRECoGVEV+RASGLMAsaLFRuCHaCKMlDuDs..GGKDGlITFENLSGDWGhGNYVIGGRTsYGSVSSsQFLRNNGGhu+DGGVIGFTSYRAGESGVKTWQGTVGuTTSRNYNLQFRDSlsL.PVWDGFDLGADssMsPEsDRPGDaPlSQYPlHQLPhNHLIDNLLVhGSLGVGlGMDGpGhYVSNITVpDCAGSGuhhhTappVFTNIulIDTNThNFsAsQIYIpGsChVNGLRLlGI+sTsupGhsIDAPNSTlSGITG.VDPSRINVANLh-.sLGNoRINSFNsDSAuLclRIHKLSKTLDSGAlhSHlNGGsGSGSAWTElTAISGSsPDAVSLKlNRGDaRAsEIPlusolLPDsAV+DpuohuhYhEss..SLKALVK+sDGShTRlTLA ........SIEADKKFKYSVKLSDYsTLQDAASAAVDGLLIDlDYpFYsGEpVDFGGKsLTI-CKAKFIGDGNLIFTKLGKGSRIAGVFMESTTTPWVIKPWTDDNQWLTDAAAVVATLKQSKTDGYQPTVSDYVKFPGIETLLPPNAKGQNITSTLEIRECIGVEVHRASGLMAGFLFRGCHFCKMVDANNPSGGKDGIITFENL.SGDWGKGNYVIGGRTSYGSVSSAQFLRNNGGFERDGGVIG.FTSYRAGESGVKTWQGTVGSTTSRNYNLQFRDSVVIYPVWDGFDLGADTDMNPELDRPGDYPITQYPLHQLPLNHLIDNLLVRGALGVGFGMDGKGMYVSNITVEDCAGSGAYLLTHESVFTNIAIIDTNTKDFQA.NQIYISGACRVNGLRLIGIRSTDGQGLTIDAPNSTVSGITG.MVD.P.SRINVANLAEEGLGNIRANSFGYDSAAIKLRIHKLSKTLDSGALYSHIN.GGPGSGSAaTQLTAISGuTPDAVSLKVNHpDsRuAEIPFsPslsSD-hlKDuSCFlPYWEsN.sSLKALVKKPNGpLVRLTLA....... 0 0 0 0 +9084 PF09252 Feld-I_B Allergen Fel d I-B chain Sammut SJ anon pdb_1puo Domain Members of this family of cat allergens adopt a helical structure consisting of eight alpha helices, in a Uteroglobin-like fold. They are one of the most important causes of allergic asthma worldwide [1]. 24.00 24.00 25.10 24.80 22.80 21.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.03 0.72 -4.07 10 38 2012-10-01 20:54:19 2006-08-22 16:24:57 5 1 15 6 15 64 0 66.30 42 64.32 CHANGED ClPFFcuYuuVloGu+laLpp-LStFNATstE+sAaEKIQDCapEpGlKoKlL-splMtollhSsEC .................ChsFassYsullsGs+hhLptpLuhFsATssE+sAaEKIQDCasEpGL+sKlh-splMholhhSsEC..... 0 0 0 0 +9085 PF09253 Ole-e-6 Pollen allergen ole e 6 Sammut SJ anon pdb_1ss3 Domain Members of this family consist of two nearly antiparallel alpha-helices, that are connected by a short loop and followed by a long, unstructured C-terminal tail. They are highly allergenic, primarily mediating olive allergy [1]. 25.00 25.00 28.90 28.90 19.30 18.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.46 0.72 -4.31 3 13 2009-01-15 18:05:59 2006-08-22 16:37:14 5 1 7 1 7 14 0 39.50 54 51.09 CHANGED CYDsCQKECSDcGsGYTFCEMKCDsDCosK-lKEKIENL..Kp CFssCccECpscGsG.TFCEMKCDTDChsK-lttKl......h........ 0 0 5 7 +9086 PF09254 Endonuc-FokI_C Restriction endonuclease FokI, C terminal Sammut SJ anon pdb_2fok Domain Members of this family are predominantly found in prokaryotic restriction endonuclease FokI, and adopt a structure consisting of an alpha/beta/alpha core containing a five-stranded beta-sheet. They recognise the double-stranded DNA sequence 5'-GGATG-3' and cleave DNA phosphodiester groups 9 base pairs away on this strand and 13 base pairs away on the complementary strand [1]. 25.00 25.00 25.00 25.20 23.60 24.30 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.96 0.71 -5.20 12 30 2012-10-11 20:44:45 2006-08-22 16:55:39 6 3 30 3 5 37 1 174.70 34 34.62 CHANGED KsslpchKsclRcclsplsHcYLpLlDlAaDuK....pNR-...FEhhTh-Lhhp.hsFcGh+LGGoRKPDGllYpssh....GlIlDTKAYupGYsLsIsQADEMhRYl--NppRDcphNPN+WWEsFscsl..pp.ahFlaVSupFhGsFpcQLpphspcTsspGuAlsVppLLLhA-hl+sGchshp-hhchhpNs-Ihh ..........................phKt.hhpp.pplshpYlphl-lAacu+.....cs.p-...FEhhTh-LFps.htapup+LGG.u...pKPDsllassct....ulIlDoKAYucGYslshspsDcMh.RYIcpsppRccphsPs.WWc.asppl..sp.haFhalSupFsupacpQLpphspcTshpGuAlsVtpLLlhA-phpptphshcclhchhpsppl............ 0 3 3 5 +9087 PF09255 Antig_Caf1 Caf1 Capsule antigen Sammut SJ anon pdb_1p5v Domain Members of this family are predominantly found in the F1 capsule antigen Caf1 synthesised by Yersinia bacteria. They adopt a structure consisting of a seven strands arranged in two beta-sheets, in a Greek-key topology, and mediate targeting of the bacterium to sites of infection [1]. 25.00 25.00 27.80 27.00 22.70 21.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.70 0.71 -4.27 2 29 2009-01-15 18:05:59 2006-08-22 17:09:56 5 1 20 15 1 24 0 107.20 85 82.49 CHANGED VEPARITLTYKEGuPITIMDNGNIDTELLVGTLTLGGYKTGTTSTSVNFTDAAGDPMYLTFTSQDGNNHQFTTKVIGKDSRDFDISPKVNGENLVGDDVVLATGSQDFFVRSIGSKGGKLAAGKYTDAVTVTVSNQ .VEPARITLTYKEGAPITIMDNGNIDTELLVGTLTLGGYKTGTTSTSVNFTDAAGDPMYLTFTSQDGNNHQFTTKVIGKDSRDFDISPKVNGENLVGDDVVLATGSQDFFVRSIGSKGGKLAAGKYTDAVTVTVSNQ 0 0 1 1 +9088 PF09256 BaffR-Tall_bind BAFF-R, TALL-1 binding Sammut SJ anon pdb_1oqe Domain Members of this family, which are predominantly found in the tumour necrosis factor receptor superfamily member 13c, BAFF-R, are required for binding to tumour necrosis factor ligand TALL-1 [1]. 21.00 21.00 21.20 22.40 20.80 20.60 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.64 0.72 -4.36 2 31 2009-01-15 18:05:59 2006-08-23 09:17:39 5 1 22 70 15 30 0 31.00 57 17.37 CHANGED PT.Cs.sECFD.LVRpCVuCtLh+TPcst.u ....PTpCspuECFDPLVRpCVuC.cLh+T..Pcst.......... 0 1 1 3 +9089 PF09257 BCMA-Tall_bind BCMA, TALL-1 binding Sammut SJ anon pdb_1oqd Domain Members of this family, which are predominantly found in the tumour necrosis factor receptor superfamily member 17, BCMA, are required for binding to tumour necrosis factor ligand TALL-1 [1]. 22.40 22.40 23.40 51.20 22.20 22.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.15 0.72 -4.00 2 32 2009-09-10 15:58:20 2006-08-23 09:26:21 5 2 24 12 15 30 0 37.40 77 22.05 CHANGED C.psEYFDSLLHAChPCpLRCSs..PPhTCQ.YCssSVT .C.QNEYFDSLL+ACKPCpLRCSs.TPPLsCQRYCNAS..... 0 1 1 2 +9090 PF09258 Glyco_transf_64 EXTL2; Glycosyl transferase family 64 domain Sammut SJ, Bateman A anon pdb_1omz Domain Members of this family catalyse the transfer reaction of N-acetylglucosamine and N-acetylgalactosamine from the respective UDP-sugars to the non-reducing end of [glucuronic acid]beta 1-3[galactose]beta 1-O-naphthalenemethanol, an acceptor substrate analog of the natural common linker of various glycosylaminoglycans. They are also required for the biosynthesis of heparan-sulphate [1]. 20.60 20.60 20.60 21.00 20.30 20.50 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.82 0.70 -5.22 25 640 2012-10-03 05:28:31 2006-08-23 09:38:24 5 15 149 8 394 576 20 229.70 37 40.08 CHANGED FTlllhs.....hsRtphLh+llppl..ussshlspIlVlWss.scssPpp...........pphsssu.VPlpllc.................sppsslssRFhPassIcT-AVLulDDD.shlsss-lcFAFpVWp..sFP-RlVGassRtHh.hD...spspWsYso........paoscYSMVLTGAAFaH+hY.hpLYo...phhPpulRshVDcptNCEDIhMNFLVushTppPP....lKVs......ph+papcstsst..............suhsucss..H.htpRspClNpFuchaG..hMPLhtoph+h..ssshF .............................................FThlhhs......hpR.psLhphlpph.............sts.s........l.ppllVlWNs...s...cssPtp........................phs..sht..lPl..hll.p..........................spp....ssl.ssRFhPa....s..........pIc....T-A..VLulDD.D..s.hl.ss.s-lpFuFpV.......Wp.........pa..P-.R.l..V.G..as..s.RhH.......h..aD...........spp...pWtYso....................pho.sphSMVL.TG...AAFaH+.hY.hh.LYo...phhPt..s.l...+s.hVDph.h.........NCEDIh..MN.FLVup...lT.tpsP......................l.K.Vs........ttpac..pstsst........................uh.tcss...H.hhpRppClNpFsp.haG..hMPLhhoph+h..-.hha................................................................................................. 0 136 195 290 +9091 PF09259 Fve Fungal immunomodulatory protein Fve Sammut SJ anon pdb_1osy Domain Fve is a major fruiting body protein from Flammulina velutipes, a mushroom possessing immunomodulatory activity. It stimulates lymphocyte mitogenesis, suppresses systemic anaphylaxis reactions and oedema, enhances transcription of IL-2, IFN-gamma and TNF-alpha, and haemagglutinates red blood cells. It appears to be a lectin with specificity for complex cell-surface carbohydrates. Fve adopts a tertiary structure consisting of an immunoglobulin-like beta-sandwich, with seven strands arranged in two beta sheets, in a Greek-key topology. It forms a non-covalently linked homodimer containing no Cys, His or Met residues; dimerisation occurs by 3-D domain swapping of the N-terminal helices and is stabilised predominantly by hydrophobic interactions [1]. 25.00 25.00 169.00 168.80 24.50 22.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.47 0.71 -4.14 2 8 2009-01-15 18:05:59 2006-08-23 09:52:28 6 1 6 5 1 8 0 109.50 72 95.01 CHANGED SsTuLhFpLAa.VKKlsFDYTPNWGRGsPssaIDslTFPKVLTDKtYoYRVsVsGpsLGVcssaAVpssGuQplNFLpYNpGYGlADTpTIQVFVV.PDTsN..-aIIApW ..SDTALlFpLAWsVKKLSFDYTPNWGRGsPSSaIDNlTFPKVLTDKAYTYRVVVSG+DLGV+PSYAVpSDGSQKlNFLEYNsGYGIADTNTIQVFVVDPDTGN..-FIIAQW 0 0 1 1 +9092 PF09260 DUF1966 Domain of unknown function (DUF1966) Sammut SJ anon pdb_7taa Domain This domain is found in various fungal alpha-amylase proteins. Its exact function has not, as yet, been defined [1]. 21.30 21.30 21.30 21.60 21.20 21.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.95 0.72 -3.90 39 219 2012-10-02 20:10:03 2006-08-23 11:03:31 6 7 96 10 148 223 0 86.70 32 16.21 CHANGED slYpDs........sslAhRKGs.tGtQllsVLoNpGo........uusYslslss..sGasuGsslh-lloCsshTs....sssGslsVsMsu....G.P+VahPsshhtGSGLC .................lap-s........sslAhRKGs.tGtpllsVloNtGu.s........ussaslsls.....suass.Gppls-lloCs.s..hos....sssGslsVshss....G.P+.VhhPssh.h.u.SulC............. 0 45 74 117 +9093 PF09261 Alpha-mann_mid Alpha mannosidase, middle domain Sammut SJ anon pdb_1o7d Domain Members of this family adopt a structure consisting of three alpha helices, in an immunoglobulin/albumin-binding domain-like fold. They are predominantly found in the enzyme alpha-mannosidase [1]. 20.90 20.90 21.00 20.90 20.50 20.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.92 0.72 -3.77 126 2561 2009-01-15 18:05:59 2006-08-23 11:25:36 6 27 1449 64 839 2123 87 78.00 28 8.09 CHANGED EaHpGhaTSpsp.....hK+hsRpsEphL.pssEhlsshusht........tt.........ttplpplW......................cslhlsQaHDslsGouhppVhp.Dhtpch ......................................hapGhho.S..+ht.....hKphsRcsEp.h.L....ps.s.E.LsshAtht.................................th....th.........pppLpp.h..W......................+plhhsQaHDulsGouhspVhc-hhtc.............................. 0 291 479 678 +9094 PF09262 PEX-1N Peroxisome biogenesis factor 1, N-terminal Sammut SJ anon pdb_1wlf Domain Members of this family adopt a double psi beta-barrel fold, similar in structure to the Cdc48 N-terminal domain. It has been suggested that this domain may be involved in interactions with ubiquitin, ubiquitin-like protein modifiers, or ubiquitin-like domains, such as Ubx. Furthermore, the domain may possess a putative adaptor or substrate binding site, allowing for peroxisomal biogenesis, membrane fusion and protein translocation [1]. 19.50 19.50 21.20 19.60 18.80 16.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.78 0.72 -3.93 32 271 2012-10-01 20:15:13 2006-08-23 11:53:34 6 14 244 1 190 278 1 81.00 35 7.38 CHANGED AppVplEPlTscDWEllEhpAphlEs.plLsQlRslhs.............sphlslal..sssossplpVspltPssst..................hu+lsssoEllVAP .....sppVplEPlos-DWE....IlELHAphlEp.pLLsQlRlV.s.............sphlslal........sssossplplsslpPsss....................hu+lsssoEllVAP............... 0 53 95 152 +9095 PF09263 PEX-2N Peroxisome biogenesis factor 1, N-terminal Sammut SJ anon pdb_1wlf Domain Members of this family adopt a Cdc48 domain 2-like fold, with a beta-alpha-beta(3) arrangement. It has been suggested that this domain may be involved in interactions with ubiquitin, ubiquitin-like protein modifiers, or ubiquitin-like domains, such as Ubx. Furthermore, the domain may possess a putative adaptor or substrate binding site, allowing for peroxisomal biogenesis, membrane fusion and protein translocation [1]. 25.00 25.00 29.70 32.80 21.60 20.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.94 0.72 -3.68 2 52 2009-01-15 18:05:59 2006-08-23 11:54:29 5 4 36 1 28 53 0 83.50 64 7.17 CHANGED GAVVolthTss+DsFh+Ls.cllAQL+L.QNpAlEVu.scQ.PsYLsWhEuRphss.upNVAElNRQhupKLGhSpGpQVFL+.Cop ....ssVTVsFTNARDCFLHLPp+LVuQLHLhQNQAIEVsWucQ.PsFLSWVEGRHhocp....GENVAEINRQlGQKLGLSsGpQVFL+PCoH..... 0 3 5 12 +9096 PF09264 Sial-lect-inser Vibrio cholerae sialidase, lectin insertion Sammut SJ anon pdb_1w0p Domain Members of this family are predominantly found in Vibrio cholerae sialidase, and adopt a beta sandwich structure consisting of 12-14 strands arranged in two beta-sheets. They bind to lectins with high affinity helping to target the protein to sialic acid-rich environments, thereby enhancing the catalytic efficiency of the enzyme [1]. 25.00 25.00 36.90 35.40 18.60 16.40 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.09 0.71 -4.98 2 111 2009-01-15 18:05:59 2006-08-23 12:46:17 5 8 55 9 4 48 0 178.40 52 44.74 CHANGED DVTcQVKE+SaQIAGWGGSELYp+ssoLNSpQDWQ.NAplRIhDGAANpIQsADGuRpaVVThulD.SGtLsApLNG.SuPlllt.ppucVauFHpYpltYSALsppsoLhVDGpplsoWuGEsSppN.lpFGNADuplDGRlHlQcIsLpQpGhsLVphDAhYLAQQsP.psppDLEpLGWoK.KoGNTMShYGpAS ....................................................................p...ppp.u.s..Sp.sWp.ssch+llsGuh.p..hAsGop+hlshlSlDpSGsLVsphpG.ou.hlLtotpAt.ppaHcaEL.a.s.h.sp.osohahDGphIp.s...pspsSppN.I.aGNusuphDGhhthpcIthp.QGcs................................................................... 0 2 2 4 +9097 PF09265 Cytokin-bind Cytokinin dehydrogenase 1, FAD and cytokinin binding Sammut SJ anon pdb_1w1o Domain Members of this family adopt an alpha+beta sandwich structure with an antiparallel beta-sheet, in a ferredoxin-like fold. They are predominantly found in plant cytokinin dehydrogenase 1, where they are capable of binding both FAD and cytokinin substrates. The substrate displays a 'plug-into-socket' binding mode that seals the catalytic site and precisely positions the carbon atom undergoing oxidation in close contact with the reactive locus of the flavin [1]. 20.50 20.50 20.70 20.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.94 0.70 -5.26 18 278 2012-10-02 00:48:38 2006-08-23 13:02:20 5 5 80 16 137 330 2 242.40 36 52.73 CHANGED Pp+VRWlRllYoDFssFTcDQEhLIShpss.t.......hDYVEG.lhls.puhhsshpos..............FsPsD.s+luslss..sutVlYCLEsshaYc........ss...s.ssshDQcl-sLhppLsaltGhlFppDVsYh-FLsRV+ppEhpLRupGLW-VPHPWLNLFVP+SpItcFccuVFcuI.Lpssss.GPlLlYPhN+sKWDs+hSslhP.-...E-VFYhVGlLpSu.....suhssl-cLpppNccIlcFC......c..puGIshKQYLPaa..sopp-..W.p+HFG.s+WsRFsc+KscYDP+sILuPGQpIF ..............................................................PthV+WlRhlYssFstFotDQEhLluh.tt.........hDYlEG.lhhs....p..s..ssh......ss..................h.sstp...phsth.t....tuth..lYsL..Ehsh.as..............tt...p..tsshs....pclptlLtpLpah.uhhFtpDlsYh-FLsRV+t.tE.pLRupGhW-.V.P.HP..WLNLFlP.......cSpItcFsptVFpsl.L.pp..s...s...s..........G.PlL.lYPhs+.sKWDsph.Ss...sh.P..-.......--lFYhVuhLpou............sssslcpl.ppNpcIlchC........p..tsslthK.pYLspa....popt-..W.tpHFG....s+WppFhptKtpaDPhtILuPGQtIF................................................................. 0 23 81 113 +9098 PF09266 VirDNA-topo-I_N Viral DNA topoisomerase I, N-terminal Sammut SJ anon pdb_1vcc Domain Members of this family are predominantly found in viral DNA topoisomerase, and assume a beta(2)-alpha-beta-alpha-beta(2) fold, with a left-handed crossover between strands beta2 and beta3 [1]. 25.00 25.00 43.80 43.70 21.60 20.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.61 0.72 -4.09 13 69 2009-01-15 18:05:59 2006-08-23 13:10:41 5 2 45 4 0 56 0 57.70 68 18.51 CHANGED haYpDGKLFpDKphop.Vsp..DNPsYEILK+lKIPsHLoDVlVYEQTaE-AhspLIFVG .LFYKDGKLFsDssFhNPVSD..DNPAYEVLpHVKIPoHLTDVVVYEQTaEEALTRLIFVG 0 0 0 0 +9099 PF09267 Dict-STAT-coil Dictyostelium STAT, coiled coil Sammut SJ anon pdb_1uur Domain Members of this family are found in Dictyostelium STAT proteins and adopt a structure consisting of four long alpha-helices, folded into a coiled coil. They are responsible for nuclear export of the protein [1]. 20.70 20.70 21.40 21.40 20.30 19.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.54 0.71 -3.86 3 15 2009-01-15 18:05:59 2006-08-23 13:26:01 5 3 4 2 15 17 2 109.90 33 13.48 CHANGED QpILNEIaKLphpQ+ETL-KMhIsQKQlLu+hssshspNscEsL+SLss-QsTLuuQl-oEloALsQlcpshILEPs-LsKLhhLLQDLoIQhKQLcLYHpELQhllsPQcPsP ..........................................p.llsphh+Lh.tQcppL.pMhh.QpplLsc................hspsp.hphhptLpscQsTL+pQI-sEhouLpplhpphIL-Ps-LpKlhhLlp-LpIQh+QLcLhHpELQhllsPppP......... 2 12 15 15 +9100 PF09268 Clathrin-link Clathrin, heavy-chain linker Sammut SJ anon pdb_1utc Domain Members of this family adopt a structure consisting of alpha-alpha superhelix. They are predominantly found in clathrin, where they act as a heavy-chain linker domain [1]. 20.60 20.60 20.60 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.46 0.72 -6.38 0.72 -4.87 17 470 2012-10-11 20:01:01 2006-08-23 13:48:23 5 23 355 13 187 307 3 24.00 68 2.26 CHANGED -psIlPYlpspLpNs-LAl+lAsR ....EENIIPYITNVLQNPDLALRMAVR. 0 59 97 144 +9101 PF09269 DUF1967 Domain of unknown function (DUF1967) Sammut SJ anon pdb_1udx Domain Members of this family contain a four-stranded beta sheet and three alpha helices flanked by an additional beta strand. They are predominantly found in the bacterial GTP-binding protein Obg, and are still functionally uncharacterised [1]. 20.70 20.70 21.20 22.10 20.50 18.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.02 0.72 -4.31 110 2054 2009-09-11 09:14:44 2006-08-23 14:07:22 6 7 2030 1 404 1340 191 69.80 42 15.64 CHANGED Fpl...p+..csss..........hahVpGpclERhlphTshs.scEulthhtctLcphGVc-tLcctGsc.sGDtVpI..s....chpF-a ....................FpI..pR..-s..Du.......salloGp+lE+hhphTsFs.pD......EultpFA+pL+phGV--uL+.cpGA+.sGDhV+Is.....sh.EFEF........ 0 162 293 361 +9102 PF09270 BTD Beta-trefoil; Beta-trefoil DNA-binding domain Sammut SJ anon pdb_1ttu Domain Members of this family of DNA binding domains adopt a beta-trefoil fold, that is, a capped beta-barrel with internal pseudo threefold symmetry. In the DNA-binding protein LAG-1, it also is the site of mutually exclusive interactions with NotchIC (and the viral protein EBNA2) and co-repressors (SMRT/N-Cor and CIR) [1]. 25.00 25.00 32.90 26.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.13 0.71 -4.48 7 243 2009-01-15 18:05:59 2006-08-23 14:47:35 5 7 119 7 151 235 0 146.30 56 27.06 CHANGED lCIspsopVuLFNRlRuQTVsT+aL..........................slEss......shtuustpWssFtlphh..........ss+sp.cchshp-G.hlpYGslVhLVsp.TGlt.PPlhl+KV-ptpulLss...s-PVSpLpKh....AFphp-....us+hYLslspc+l.p.....................hpss..sp..s+..lssGupW ..............LCIuSGoKVALFNRLRSQTVSTRYL..........................cVEsG............s..FpASopQWuAFhIH...Ll-........................Ds..c..u..puc-.......Fs.l.R-G...YI.+YGpsVcLVCoVTGhuLPhLIIRKV......D.KQpAlLD...........u....................D...............-PVSQLHKC....AF.hhD............scphYLCLS.pE+IlQ.....................FQ..AoPCP.KEsN+thlNDuusW..................................................... 0 61 71 112 +9103 PF09271 LAG1-DNAbind LAG1, DNA binding Sammut SJ anon pdb_1ttu Domain Members of this family are found in various eukaryotic hypothetical proteins and in the DNA-binding protein LAG-1. They adopt a beta sandwich structure, with nine strands in two beta-sheets, in a Greek-key topology, and allow for DNA binding [1]. This domain is also known as RHR-N (Rel-homology region) as it related to Rel domain proteins. 25.00 25.00 27.30 25.20 24.40 23.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.98 0.71 -3.70 12 251 2009-01-15 18:05:59 2006-08-23 15:01:53 6 6 119 7 155 234 0 132.00 55 24.73 CHANGED slhlhHu+VAQKSYGsEKRFhCPPPhlYLhGsuW......pht....tp.hp...................tsh..sppu.....splsuahsluusst....phpphsh-st................hssA..KoLaISD.sD.KRKphpLtlphhh...........ssu....pcl......GhF.SphIKVISKPSKK+pohKss- .................................................................TVhILHAKVAQKSYGNEKR.....F.FCPPP.CVYLhGsGW................+hK........ppph.pp......................................................pup...u-pt.............sp.CualGIGsusp.....-hQpLsh-s.t......................................................................passA..KTLYISD..oD.KRKHFhLsl+hah.....................................usu......c-l....................GsFhS+pIKVISKPSKKKQSLKNsD..................................................................... 0 64 75 116 +9104 PF09272 Hepsin-SRCR Hepsin, SRCR Sammut SJ anon pdb_1p57 Domain Members of this family form an extracellular domain of the serine protease hepsin. They are formed primarily by three elements of regular secondary structure: a 12-residue alpha helix, a twisted five-stranded antiparallel beta sheet, and a second, two-stranded, antiparallel sheet. The two beta-sheets lie at roughly right angles to each other, with the helix nestled between the two, adopting an SRCR fold. The exact function of this domain has not been identified, though it probably may serve to orient the protease domain or place it in the vicinity of its substrate [1]. 26.70 26.70 26.80 27.10 26.50 26.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.33 0.72 -3.83 2 55 2012-10-03 20:35:02 2006-08-23 15:58:54 5 4 32 6 24 43 0 105.50 65 25.71 CHANGED sLapVQlSsuDpRLhVhDpTphpW+hlCSSpsNthlAslsCEEMGFlRAlsaS.hss.puG..GspsFFCVcEutLshup+lhssl.sCcCs+GphLpshCQDCGRRhLP ...............LYsVQVSsuDuRLhVFDcTEGTWRL.LCSS.RSNuRVAGLSCEEMGFL......RA.L.sHSE..LDVRTAG.ANGTSGFFCVDEGcLPaup..RLL-VIS.VCD.CPRGRFLsslCQDCGRR.KLP................. 0 1 3 9 +9105 PF09273 Rubis-subs-bind Rubisco LSMT substrate-binding Sammut SJ anon pdb_1p0y Domain Members of this family adopt a multihelical structure, with an irregular array of long and short alpha-helices. They allow binding of the protein to substrate, such as the N-terminal tails of histones H3 and H4 and the large subunit of the Rubisco holoenzyme complex [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.43 0.71 -4.22 43 613 2009-01-15 18:05:59 2006-08-23 16:27:20 6 17 220 26 433 611 10 124.40 18 24.97 CHANGED lspsD.hhtp....KtplLcptGhsss..t.hslthsss.........hstcLLsaLRllshsspchtthtsstpstsh................slStpN............EtpshphltshspthLspYsTTlc-.Dct..hlcp...sshptp..................tphAlplRhsEKcIL .............................................................................+.phlt.....p..s.....h.tp.......h.lthst....................s.pLlsh.LR.lhth.s.t...p...ch..p.ph.t.th.tts.......................................................h..u.ss......................................Etps.hp.h.L.tstsp.hhL.ppa.sT....o.l..cc.....Dpp.....lLpp....ts....ttp.............................................................tphAlplRhtEKplL.................................................................................... 0 156 269 351 +9106 PF09274 ParG ParG Sammut SJ anon pdb_1p94 Domain Members of this family of plasmid partition proteins adopt a ribbon-helix-helix fold, with a core of four alpha-helices. They are an essential component of the DNA partition complex of the multidrug resistance plasmid TP228 [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.52 0.72 -4.05 3 92 2012-10-02 18:44:02 2006-08-23 16:30:01 5 1 84 2 16 60 5 66.00 30 86.74 CHANGED MALEKsHTSsKKMTFGEHRDLEKVVsSPlPSGKpKRVNVNFDEEKHTRFKAACAKpGTSITDVINQLVDNWLKENE ........................................................................stt.hKRVssNh..sE-hHpRhKhtCscpGpSIsDllspLl.cpaLpp........... 0 6 10 14 +9107 PF09275 Pertus-S4-tox Pertussis toxin S4 subunit Sammut SJ anon pdb_1prt Domain Members of this family of Bordetella pertussis toxins adopt a structure consisting of an OB fold, with a closed or partly opened beta-barrel in a Greek-key topology [1]. 25.00 25.00 254.40 254.10 19.00 18.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.55 0.72 -3.96 2 5 2009-01-15 18:05:59 2006-08-23 16:43:19 5 1 5 12 1 5 0 110.00 99 72.37 CHANGED DVPYVLVKTNMVVTSVAMKPYEVsPTRMLVCGIAAKLGAAASSPDAHVPFCFGKDLKRsGSSPMEVMLRAVFMQQRPLRMFLGPKQLTFEGKPALELIRMVECSGKQDCP DVPYVLVKTNMVVTSVAMKPYEVTPTRMLVCGIAAKLGAAASSPDAHVPFCFGKDLKRPGSSPMEVMLRAVFMQQRPLRMFLGPKQLTFEGKPALELIRMVECSGKQDCP 0 1 1 1 +9108 PF09276 Pertus-S5-tox Pertussis toxin S5 subunit Sammut SJ anon pdb_1prt Domain Members of this family of Bordetella pertussis toxins adopt a structure consisting of an OB fold, with a closed or partly opened beta-barrel in a Greek-key topology [1]. 25.00 25.00 217.50 217.30 20.60 17.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.30 0.72 -4.02 2 5 2009-01-15 18:05:59 2006-08-23 16:44:09 5 1 5 6 1 8 0 97.00 96 75.90 CHANGED PTHLYKNFTVQELsLKLKsKNQEhCLTAFMsGRSLVRACLSDAt+p+sTWFDTMLGFAISAYALKSRIALTVEDSPYPGTPGDLLELQICPLNGYCE PTHLYKNFTVQELALKLKGKNQEFCLTAFMSGRSLVRACLSDAGHEHDTWFDTMLGFAISAYALKSRIALTVEDSPYPGTPGDLLELQICPLNGYCE 0 1 1 1 +9109 PF09277 Erythro-docking Erythronolide synthase, docking Sammut SJ anon pdb_1pzq Domain Members of this family of docking domains are found in prokaryotic erythronolide synthase. They adopt a structure consisting of a bundle of four alpha-helices, and mediate homodimerisation of the protein, stabilising the resulting complex [1]. 24.60 24.60 25.00 25.00 23.60 24.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.86 0.72 -4.28 2 8 2009-01-15 18:05:59 2006-08-23 16:59:38 6 5 7 2 3 10 0 54.80 50 2.20 CHANGED uA.PsVslGsRLD-LE+AL-ALsstpGHsDVGtRLEuLLRRWpSRRsstspsssIS-D ............slhscLDcLE+ALcALPsEDG.Hs-VusRLEuLLRRWpsRRAsAsus............... 0 1 2 3 +9110 PF09278 MerR-DNA-bind MerR, DNA binding Sammut SJ anon pdb_1q08 Domain Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -9.01 0.72 -3.57 304 2740 2012-10-04 14:01:12 2006-08-24 08:59:03 6 15 1578 18 772 6957 665 64.40 30 43.61 CHANGED lpRLphIppu+plGFoLsEI+plL....sl.......pp..t.spssschp..plhpp+ltclcp+lpcLpphcpp.Lpph .............hcplthI+pAp.c.lG.hoLscI.tclL.....sL....................ps.....c..sts.s.p-..hc..........plu..p.p+....h....p....-.l.-...c....+I....p....p.Lp.thcsp.Lpth.......................................................... 0 215 470 645 +9111 PF09279 EF-hand_like efhand_like; Phosphoinositide-specific phospholipase C, efhand-like Sammut SJ anon pdb_1qas Domain Members of this family are predominantly found in phosphoinositide-specific phospholipase C. They adopt a structure consisting of a core of four alpha helices, in an EF like fold, and are required for functioning of the enzyme [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.76 0.72 -3.96 63 1479 2012-10-02 16:17:27 2006-08-24 09:12:10 6 79 244 26 795 1356 2 83.20 25 8.27 CHANGED ElpplFppa.us.p...pphlospcLhcFLpccQ+csch........s.pcpstplIpc.....aEssp.....pspcc.......thlol-GFhpYLhSs-sslhssp+hcla ....................................-l..lh.ph...us...p.......pthlo.hppLhpF...Lp.pcQ..p.-sph.........................................s...ppstpl.Ipc.............a..Essp.......................ptppc...............................shloh-.GFhpYL.h.S.p.-.sslhsspp.pl.................................. 1 179 275 510 +9112 PF09280 XPC-binding XPC-binding domain Sammut SJ anon pdb_1pve Domain Members of this family adopt a structure consisting of four alpha helices, arranged in an array. They bind specifically and directly to the xeroderma pigmentosum group C protein (XPC) to initiate nucleotide excision repair [1]. 22.80 22.80 22.80 24.50 22.70 22.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.94 0.72 -4.51 36 513 2009-01-15 18:05:59 2006-08-24 09:22:59 6 13 297 12 316 512 7 58.10 44 15.62 CHANGED Ls.hLpspPQFpplRphlQpNPplLpslLQplupsNPpLhph...Ippsp-tFlphLsp.ssss .............L-FLRspPQFQplRpllQpNPp....lLtslLQQlGppNPpLhph.......IpppQ-pFlphLNE.ss..s............ 0 95 163 247 +9113 PF09281 Taq-exonuc Taq polymerase, exonuclease Sammut SJ anon pdb_1qtm Domain Members of this family are found in prokaryotic Taq DNA polymerase, where they assume a ribonuclease H-like motif. The domain confers 5'-3' exonuclease activity to the polymerase [1]. 25.00 25.00 76.00 74.10 21.90 21.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.89 0.71 -4.03 4 36 2009-01-15 18:05:59 2006-08-24 09:36:09 5 4 33 37 13 49 0 130.30 60 15.85 CHANGED stPcE.A.hssPEGshhG.lLspscP....hhAphtA.tAsp-uch+RAs......Plsu.A-h...+EVpuhhAKsLAshhShcGssl-PGDDPLLlAYLLDPANTN..sVA+RY.ssEWsEDAApRAhlotRLhpsL.P+L ...............shEEAPWP.PPEGAFVGFlLSR.sEP....MWA-LhALAAAp-GRVH.RAss......PhtuLcDL...+ElRGlLAKDLAVLALREGlsLsPGDDPhLLAYLLDPSNTsPEGVARRY.GGEWTEDAucRALLoERLapsLh.RL...... 0 3 8 13 +9114 PF09282 Mago-bind Mago binding Sammut SJ anon pdb_1rk8 Domain Members of this family adopt a structure consisting of a small globular all-beta-domain, with a three-stranded beta-sheet and a contiguous beta-hairpin. They bind to Mago alpha-helices via extensive electrostatic interactions and at a beta2-beta3 loop via hydrophobic interactions [1]. 20.60 20.60 20.60 25.60 20.20 18.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -6.79 0.72 -4.37 11 235 2009-01-15 18:05:59 2006-08-24 09:52:20 5 3 201 1 178 238 0 26.60 56 12.55 CHANGED G-+hIPsopRsDGohRKsl+lRsGYhP ....Gp+aIsuopRPDGThRKthRV+sGYhP.. 1 60 94 146 +9116 PF09284 RhgB_N Rhamnogalacturonase B, N-terminal Sammut SJ anon pdb_1nkg Domain Members of this family are found in prokaryotic Rhamnogalacturonase B, and adopt a structure consisting of a beta supersandwich, with eighteen strands in two beta-sheets. The exact function of the domain is unknown, but a putative role includes carbohydrate-binding [1]. 27.00 27.00 28.30 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.51 0.70 -5.02 6 102 2009-09-11 00:34:51 2006-08-24 11:09:34 5 8 71 5 74 110 2 228.70 42 45.15 CHANGED FGhTsoGssaslDuGus..LlFpVsKoosDlsSlpYRGsEhQ.YsuKuSHIuSGLGSATVosppluu....aIKVTssoSoL..THYals+sG-ssIYMAT.hsAEsslGELRFlARLpsshLPs..ptPaupVuTTtGsou.IEGuDVFll.sGpTRSKFYSScRFIDDchH......slSGuu..spVCMlhs..saEsSSGGPFFRDIsopssu-sssLYaYMNSGHVQTEuaRhGLHGPYulsFocuGsPssu....plDTSFFs.oLGIsG ........................FGhTpousphllDsGus..LsFsVs.psssDIsSlpYpusELQ.hpup.sSHIsSGLGo.u.o..V.shp..phss........hIhVosps.......u....s....L..spYhls+pGcssIYMAT.hsscsslG..ELRFlARLssshLPs....p.shussuss......s.ps.IEGsDVFhl.sGpTpSK....FYSup......RhIDDphH......sVsGsu...htVhhlhs...shEpSSGGPFFRDIssppssstppLY.Y..M.SsHsQT.....EsaRhG.LHGPYuhhFocuusPsss......l.DhuFhs.sLslpG.................................. 0 30 45 66 +9117 PF09285 Elong-fact-P_C Elongation factor P, C-terminal Sammut SJ anon pdb_1ueb Domain Members of this family of nucleic acid binding domains are predominantly found in elongation factor P, where they adopt an OB-fold, with five beta-strands forming a beta-barrel in a Greek-key topology [1]. 25.00 25.00 25.20 27.80 20.20 23.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.56 0.72 -4.62 108 5622 2009-01-15 18:05:59 2006-08-24 11:26:27 6 9 4549 10 1133 2891 2007 55.20 50 29.52 CHANGED VpLpVscT-PulKGsT.uoust.KPApLE.TG.hplpVPhFIppG-pIplcTcs.GpYlsR .............V-Lcls-T-PGl.KG...DT...u...........o.u.us.KPATLc..T.....G.....hs.....lpVPhFlptG-hlclcTcs.GpYluR............ 0 377 737 951 +9118 PF09286 Pro-kuma_activ Pro-kumamolisin, activation domain Sammut SJ anon pdb_1t1e Domain Members of this family are found in various subtilase propeptides, and adopt a ferredoxin-like fold, with an alpha+beta sandwich. Cleavage of the domain results in activation of the peptide [1]. 20.90 20.90 20.90 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.64 0.71 -4.15 124 1063 2009-01-15 18:05:59 2006-08-24 11:45:25 6 31 332 4 629 1085 28 137.50 21 20.46 CHANGED sshtphstssssph...l.p..lplsL.ppps..h..splcphlhpl..ssPssspY......................spa..LotpphtphauPsppslstVtsaLpstG.lsstp....................tsstphlshsuolupsc...............phh.ps..phthYp..................ssst.........hhhp..ssp..plPppls....................................shlshl....hs....hsph ..............................................t.......t.t.h.sssp.....l.p...lplsL....p.ps....h...sp..Lpphl...h.pl..ss.Psu.s.pY......................sc.a.....Lo..pphtphau.......Ps.sp......slstVtsaL.p..p...p....G...lsstp.............................ssspshlph.p.uolsps-...............phh.ss.....phptap....................tstp................hhhp.ss.p..hplPtplt....................................shlshl....hshp................................................................... 0 204 379 512 +9119 PF09287 CEP1-DNA_bind CEP-1, DNA binding Sammut SJ anon pdb_1t4w Domain Members of this family of DNA-binding domains are found the transcription factor CEP-1. They adopt a beta sandwich structure, with nine strands in two beta-sheets, in a Greek-key topology [1]. 17.80 17.80 18.30 17.80 17.60 17.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.42 0.71 -4.88 2 7 2012-10-03 00:25:27 2006-08-24 11:57:19 5 1 7 1 7 9 1 171.90 28 36.20 CHANGED EcWhph-VhKp+suKsSDhtFthsspcthYLWsKMtC.lPh.VKWplsppH..ppL.L+lRhVpY.tp-NlE.uIRsP.SslhKC+sHp.pEp+hPh-SFFYlhpStccao..hsucKspsFshhhhPGssQs.FDlIFhCQcpCLDLs-RRKpMCLAVFL-DENGNElLHshIKQlhIVuYPRRDWKNFCE+csshp .............h...hphpV.pt+stK.Ssh.....st.thhLWo+hts.lPh.lpWplspth...pppL.L+lRlVpY.tppslp.uI+ss.sslhKCpsHp.cEp+.hPh-uFFYlhsSspcas.....hs...p+.uppasshl.s..Gthp..ltFDlIFhCQcpChtlt-+RKphCLssFL-DE.tp.l.athlcplhlhuYPpRDhpNFp.+.............. 3 2 3 7 +9120 PF09288 UBA_3 Fungal ubiquitin-associated domain Sammut SJ anon pdb_1tte Domain Members of this family of ubiquitin binding domains adopt a structure consisting of a three alpha-helix bundle. They are predominantly found in fungal ubiquitin-protein ligases [1]. 20.40 20.40 20.40 20.50 20.30 19.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.58 0.72 -4.37 6 131 2012-10-01 23:03:33 2006-08-24 12:52:50 5 3 125 1 95 122 2 50.80 36 19.96 CHANGED DEupLYGID+-lV-pFsuQGFE+sKllEsL+RLslKohs.sDNpTsN+IlEELLK .................thh.Ghs+-LVDcFpsMGF-h-+VV-sh+hlGIcphss.s.................................. 0 21 50 80 +9121 PF09289 FOLN Follistatin/Osteonectin-like EGF domain Sammut SJ, Bateman A anon pdb_1nub Domain Members of this family are predominantly found in osteonectin and follistatin and adopt an EGF-like fold [1,2]. 21.10 21.10 21.50 21.20 20.20 21.00 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.79 0.72 -6.84 0.72 -4.09 14 363 2012-10-03 09:47:55 2006-08-24 13:32:02 5 12 89 21 133 310 0 21.90 49 6.20 CHANGED CtNacCK+GKsCchscps+PhC ..CtNhpCttGKhCchsc.pscPpC 0 9 22 54 +9122 PF09290 AcetDehyd-dimer Prokaryotic acetaldehyde dehydrogenase, dimerisation Sammut SJ anon pdb_1nvm Domain Members of this family are found in prokaryotic acetaldehyde dehydrogenase (acylating), and adopt a structure consisting of an alpha-beta-alpha-beta(3) core. They mediate dimerisation of the protein [1]. 25.70 25.70 26.10 32.80 19.20 25.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.82 0.71 -4.27 62 778 2009-01-15 18:05:59 2006-08-24 13:46:32 6 4 619 4 192 558 124 146.10 64 47.83 CHANGED CGGQATIPlVtAlSRVs.sVcYAEIVASIAS+SAGPGTRANIDEFTcTTucAlEpVGGAp+GKAIIlLNPAEPPllMRDTVasLsc..ssD...pssIpsSlppMltcVppYVPGYRLKppspF-th....................+VolFL..EVEGAucYLP .....CGGQATIPhVAAVSRVs.....pVpYAEIVASIAS+SAGPGTRANIDEFTcTTu+AIEsVGGAs+GKAIIlLNPAEPPLhMRDTVasLs-...-Ac.......p-sIpASIp-MsctVQsYVPGYRLKpcsQ.F.-shs.st............................hsth+suVaLEVEGAucYLP.................... 0 45 122 164 +9123 PF09291 DUF1968 Domain of unknown function (DUF1968) Sammut SJ anon pdb_1oga Domain Members of this family are found in mammalian T-cell antigen receptor, and adopt an immunoglobulin-like beta-sandwich fold, with seven strands in two beta-sheets in a Greek-key topology. Their exact function has not, as yet, been determined. 25.00 25.00 36.80 36.30 22.90 17.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.80 0.72 -4.17 4 101 2009-01-15 18:05:59 2006-08-24 13:54:43 5 2 23 79 18 841 0 84.50 59 39.18 CHANGED PAVYQL+DPpSsDpolCLFTDFDS.QsNVsp...StsSssalTspTVLDM+uMDSKSNGAlAWSNposFuCpssFp..NuohP.uDs..Ps PAVYQLR..sspSSDpSVCLFTDFDS.QsNVSp...ScsS-ValTspTVLDM+uMDSKSNGAVAWSNpSDFuCpsAFp....NuslP.usoh....................... 0 1 1 2 +9124 PF09292 Neil1-DNA_bind Endonuclease VIII-like 1, DNA bind Sammut SJ anon pdb_1tdh Domain Members of this family are predominantly found in Endonuclease VIII-like 1 and adopt a glucocorticoid receptor-like fold. They allow for DNA binding [1]. 25.00 25.00 27.00 25.80 18.50 17.30 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.05 0.72 -4.15 6 57 2009-01-15 18:05:59 2006-08-24 14:08:40 5 4 41 1 31 51 0 38.60 70 10.96 CHANGED sYSsFcsWLpCYhVsGMsSLRD+NGRTIWFpGDPGPLAP .DauAF+AWLpCYssPGMsSLRD+pGRTIWFQGDPGPLAP.... 0 6 8 15 +9125 PF09293 RNaseH_C T4 RNase H, C terminal Sammut SJ anon pdb_1tfr Domain Members of this family are found in T4 RNaseH ribonuclease, and adopt a SAM domain-like fold, consisting of a bundle of four/five helices. These residues may have a role in providing a docking site for other proteins or enzymes in the replication fork [1]. 25.00 25.00 25.30 27.10 22.80 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.42 0.71 -4.21 8 45 2012-10-01 19:52:02 2006-08-24 14:24:43 5 2 42 6 0 45 191 115.00 40 38.84 CHANGED GSPcpDLhsKlIKGDtKDGVAuIKsRSDallT+VEGERAPsspsKhLEsl....h-sEDP+sLLTsEEa.pRacENpcLlDFDaIPDcIuspIlppYNo.KssPRuKlYsYFVKsuLsKLls+ls-F ................GosthDhhsKllKGD+KDsVAulKsRuDahho.+VEGERsPshpsphlEtl....h-p-pscsLlTc-p..a.pRacENphLIDFDaIPDsItspIlptYNshphs.s+uKlYsYFVKsuLsKLhspls-F................. 0 0 0 0 +9126 PF09294 Interfer-bind Interferon-alpha/beta receptor, fibronectin type III Sammut SJ anon pdb_1n6u Domain Members of this family adopt a secondary structure consisting of seven beta-strands arranged in an immunoglobulin-like beta-sandwich, in a Greek-key topology. They are required for binding to interferon-alpha [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.62 0.72 -4.00 96 681 2012-10-03 16:25:20 2006-08-24 14:40:10 5 13 59 57 291 719 22 99.50 21 28.34 CHANGED TplGPPp.lpl...pstssslplslpsP.......pspthohpslas.phtYplhhacsu..ss....cppthpsspsh.hlpsLpPtosYClpVpu.......ths+pu.hSphpChpTs ..............................................luPPt.lpl...ps.ssslplplpsP...............tttths.......h.....p.......phas.......t....h.....p.Y...plh.....hhcss..ss.............ppp.t..h......p.......s..p..t..s...h...h...lpsLp.....P.....t.o..s..YClpVps.......t.s+pu.h....Sp.pChp.......................................... 0 14 34 115 +9127 PF09295 ChAPs ChAPs (Chs5p-Arf1p-binding proteins) Mistry J, Wood V anon Pfam-B_11349 (release 19.0) Family ChAPs (Chs5p-Arf1p-binding proteins) are required for the export of specialised cargo from the Golgi.\ They physically interact with Chs3, Chs5 and the small GTPase Arf1, and they form also interactions with each other [1]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.29 0.70 -5.70 14 327 2012-10-11 20:01:01 2006-08-24 15:27:08 5 42 208 0 196 576 86 276.20 23 43.34 CHANGED psclGoaaYssGlDsSssASlAsaLpsLs.pl.pcsQhWFGcppsaKlsphoYCsaNAFo+sDhRVpV+IPGuV-oahlDpcG-+......cpt.................s-plWtETalSullRulhhu-D.......................ssssphpplstsRphNPhoss-h-cc.......FlcshEpLFhcGhpLGussclts..PThlsNaLVcullchh+hTppacpulsll-+Lpppp.PEVssLlA+lhlhtDcElcAV+lhpcuL........pps.pD.......ssLLslQucFLlsKc+s...........chALpsAppAVpuuPSEFtTWshLsclYlcLpDhENALLoLNSCPM...oapEK.hh+hssPhc.......lHLPlPh-ssL-Elsshsssc.....ppcpsDPsLlpLsAusL+uTFtpAYpLLTEIVpphGW-pLLKhRSpVFVMEEEYR .........................................................................................................................................................t..........................................................................................hs.shs..htp.-hph....h.......t......p.................h...t.tt..................................................hW.Eh.hsthlR.h....p................................h....t..........................hh......h..h..h.......................u...............t...............................................s..h..........p....N.hL..ht..sh......h...hh...t....s.....p...h..t.....sh....phhc...p.........l..h..p...p......p....s....p.....s..............h....l....sp...l...h...h.............t...p..p...E...h......pu..lph...hpp.hl................pp..........s.................t.hL...hQu...pahhpp.tc.......................chAlph..uppusphsPspapsWhh.LsphYh.thpph-pA..L....h....s...l.N.u....hPh...........pp.p.......................................................................................................................................................................................................................................... 2 63 120 174 +9128 PF09296 NUDIX-like NADH pyrophosphatase-like rudimentary NUDIX domain Sammut SJ, Bateman A anon pdb_1vk6 Domain The N-terminal domain in NADH pyrophosphatase, which has a rudiment Nudix fold according to SCOP. 21.20 21.20 21.20 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.44 0.72 -3.65 75 1383 2012-10-02 00:00:35 2006-08-24 15:37:11 6 14 1293 3 430 990 338 99.50 22 32.64 CHANGED puhhhlhpssplllps.......................................ththsththtth...........................shshtpslhLG...hhps........................pshaulshsttss.........................shphhsLRphh......htls.spphslhupAtpllpW ...........................................................................................t.shhll.pc.pplhlsp.......................................................s.tls.h.s.pstph.............................................shssp.pslh..lG.........phpu......................................psla.slphsttts.........................................................h.h..ssl.Rplh...................pls..ss.htlhupAhpLhpW............................. 0 106 236 334 +9129 PF09297 zf-NADH-PPase NADH pyrophosphatase zinc ribbon domain Sammut SJ, Bateman A anon pdb_1vk6 Domain This domain is found in between two duplicated NUDIX domains. It has a zinc ribbon structure. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.69 0.72 -4.43 63 1999 2012-10-03 10:42:43 2006-08-24 16:14:14 6 20 1879 3 487 1363 306 31.80 34 10.88 CHANGED psapFCupCGstsphtps.thuphC..ssCstpta .....sa+FCutCGptht.sps..t..huhhC....spCtpchY........ 0 120 265 382 +9130 PF09298 FAA_hydrolase_N DUF1969; Fumarylacetoacetase N-terminal Sammut SJ, Eberhardt R anon pdb_1hyo Domain The N-terminal domain of fumarylacetoacetate hydrolase is functionally uncharacterised, and adopts a structure consisting of an SH3-like barrel [1]. 25.00 25.00 25.00 26.60 22.30 24.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.32 0.72 -4.05 94 764 2009-09-11 16:58:44 2006-08-25 09:49:03 6 9 631 10 393 776 109 99.60 35 23.95 CHANGED QNLPaGlFo.pss.ss....RsGVAIGDpllDLuul.ttt..................G.lhs......ts.tsshspssLNsFhuLGtssWpslRtpLppLLp.................thpsppshhptsLlstuc.sph+LP .............................pNLPaGlFS.sss..ss.........RsGVAIGDpllDLuul.ttt...................u.lhs............t..tssFs..psoLNsFhuLG+ss..WptlRtpLppLLs.........................hpsspshtppsLls.sc..sphHLP........................................ 0 104 219 322 +9131 PF09299 Mu-transpos_C Mu transposase, C-terminal Sammut SJ anon pdb_1bco Domain Members of this family are found in various prokaryotic integrases and transposases. They adopt a beta-barrel structure with Greek-key topology [1]. 20.50 20.50 20.50 20.50 20.30 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.03 0.72 -4.09 36 1172 2009-01-15 18:05:59 2006-08-25 10:46:00 6 26 862 3 233 975 33 64.70 26 11.47 CHANGED Lclhhh....stptRpVp+........sG.lphh...sh+Yhsst.Ltua..sGcp...VhlRYDPpDl.splhVapp..sG......alspA ...............................hhh.....ttpRp.lp+........sG..lphp.............sh.p.Y......h.s......st....L.t.sh..........hucp...........VhlRaDPp..Dh..splhVhp...st.....hh............................... 0 68 146 204 +9132 PF09300 Tecti-min-caps Tectiviridae, minor capsid Mistry J, Sammut SJ anon pdb_1w8x Domain Members of this family form the minor capsid protein of various Tectiviridae [1]. 25.00 25.00 197.80 197.70 18.90 18.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.96 0.72 -4.20 3 7 2009-01-15 18:05:59 2006-08-25 14:36:31 5 1 6 1 0 4 0 84.00 98 100.00 CHANGED MALINPQFPYAGPVPIPGPAPTETMPLLNYRVEGRIAGIQQARQFMPFLQGPHRAVAEQTYYAIGTGIQMGQTFNQPLINTQEG MALINPQFPYAGPVPIPGPAPTETMPLLNYRVEGRIAGIQQARQFMPFLQGPHRtVAEQTYaAIGTGIQMGQTFNQPLINTQEG 0 0 0 0 +9133 PF09301 DUF1970 Domain of unknown function (DUF1970) Mistry J, Sammut SJ anon pdb_1w8x Domain Members of this family consist of various uncharacterised viral hypothetical proteins. 22.50 22.50 24.70 266.90 20.40 17.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.60 0.71 -4.05 3 7 2009-01-15 18:05:59 2006-08-25 14:42:11 5 1 6 1 0 5 0 117.00 96 100.00 CHANGED MDKKKLLYWVGGGLVLILIWLWFRNRPAAQVASNWEGPPYMTYNQPQAGSVTLPVAGYTSPSLTLPNRNRSCGCNPAVSAAMAQGADLASKLTDSITSQLNNYAESLNDYLASQAGV MDKKKLLYWVGGGLVLILIWLWFRNRPAAQVASNWEGPPYMTYNQPQAGSVTLPVAGYTSPSLTLPNRNRSCGCNPAVSAAMAQGADLASKLT-SIoSQLNNYAESLNDYLASQAGV 0 0 0 0 +9134 PF09302 XLF XLF (XRCC4-like factor) Mistry J, Wood V, Hentges P, Doherty A anon manual Family XLF (also called Cernunnos) interacts with the XRCC4-DNA ligase IV complex to promote DNA non-homologous end-joining. It directly interacts with the XRCC4-Ligase IV complex and siRNA-mediated downregulation of XLF in human cell lines leads to radio-sensitivity and impaired DNA non-homologous end-joining [1]. This family contains Nej1 (non-homologous end-joining factor) [2], and Lif1 [3]. 21.70 21.70 22.80 21.70 21.50 20.90 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.64 0.71 -4.48 30 240 2009-09-11 09:52:26 2006-08-25 15:05:44 6 5 179 28 128 239 0 164.00 22 41.85 CHANGED tsWphlplsp..s.........hlhpsphs..tsoas....lhloD........Lps..lWsEclspsslhp+upppstslcs...sspphphhLpclhpsh.......sspcssphsLppt.t.....tssLhlphphclsssh.shpWsh+lpppsss....slhppLshPLlphptshpppt..............................ppLtshLpcKDt...................................slspLh-phps ..................................Wthl.lst.ps.........hlhphhhs..ppu....t....lhlo-........Ltp..lWpEplspss....l.pcspp.spplss...s.sph.hhhLpplhpsh........sspcssphslsttt....................sssLh.lp..hpspLs..sh...shp.Wshclppssss....tlhp...cLhhPLhphttthppph..............................ppLtslLppKDt...................................tlpch.-pht.t............................................................ 0 24 53 96 +9135 PF09303 KcnmB2_inactiv KCNMB2, ball and chain domain Mistry J, Sammut SJ anon pdb_1jo6 Domain Members of this family are found in the cytoplasmic N-terminus of KCNMB2, the beta-2 subunit of large conductance calcium and voltage-activated potassium channels. They are responsible for the fast inactivation of these channels [1]. 25.00 25.00 25.30 25.00 24.50 24.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.13 0.72 -4.18 7 63 2009-01-15 18:05:59 2006-08-29 09:22:20 5 2 35 1 25 55 0 29.60 80 15.20 CHANGED MFIWTSGRTSSSYRpDEKRNIYQKIRDHDLLD ........MFIWTSGR.TSSSYRHDEKRNIYQKIRDHDLLD..... 0 1 3 9 +9136 PF09304 Cortex-I_coil Cortexillin I, coiled coil Mistry J, Sammut SJ anon pdb_1d7m Domain Members of this family are predominantly found in the actin-bundling protein Cortexillin I from Dictyostelium discoideum. They adopt a structure consisting of an 18-heptad-repeat alpha-helical coiled-coil, and are a prerequisite for the assembly of Cortexillin I [1]. 23.90 23.90 24.00 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.53 0.72 -4.00 4 18 2009-01-15 18:05:59 2006-08-29 09:39:01 5 6 11 2 12 18 0 102.60 35 23.56 CHANGED KEEKtcLEAS+s-hAN+LAuLEpSLEuEKsSp-pL....hKQKDp...LcuhLtoLcupsApRppRlpELpAKl-EhL+NLEhEKhA+hELEuRLuKsEKDKAILELKLAEAhD ..........+EE+ttL-uSpsplts+LAuLppSLEspKtSp-cL.......h+QK-p...LcstLppLcspssspspRls-LpA+ls-sl+sL-p.EKhA+.-LcsRLsKscKD+AhLEL+LtEh.s......... 0 9 11 11 +9137 PF09305 TACI-CRD2 TACI, cysteine-rich domain Mistry J, Sammut SJ anon pdb_1xut Domain Members of this family are predominantly found in tumour necrosis factor receptor superfamily, member 13b (TACI), and are required for binding to the ligands APRIL and BAFF [1]. 21.30 21.30 21.40 21.50 19.00 20.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.11 0.72 -4.16 2 66 2009-01-15 18:05:59 2006-08-29 10:41:35 5 3 27 5 27 55 0 39.30 55 22.37 CHANGED lsCRKEQG+aYDHLLtsClSCsShCsQHPpQCAaFCEp+.R ............sCp+EQGpYYDpLL+sClSCtSICG.Q.HP+.Q.CAhFCctp...... 0 2 2 6 +9138 PF09306 Phage-scaffold Bacteriophage, scaffolding protein Mistry J, Sammut SJ anon pdb_1gp8 Domain Members of this family of scaffolding proteins are produced by various bacteriophages [1]. 25.00 25.00 25.30 25.80 24.00 24.90 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.88 0.70 -5.27 2 104 2009-01-15 18:05:59 2006-08-29 10:52:09 5 1 99 2 5 48 0 273.20 52 92.96 CHANGED M-.TT-IQuoE-LTLoGsHAAASADuLVVDNANDNAGQEEGFEIVLK.DE.tPKQDPApNAEFARRRIERKRQRELEQQMEAVKRGELPEpLRVNP-LP.QPD.NsYLSE-uLAKYDYDpSRALAAFptANoEW.hKA.DARSpAVAEQGRKTQEFTQpSAQYVEAARKHYDAAEKLNIPDYQEKEDAFMQLVPPAVGADIMRLFPEKSAALMYHLGANPEKsRQLLAMDGQSALIELTRLSERLTLKPRuK.lSpAP.sDpPIpGcssAANhsAIcKQM-AAAsKGDVETYRKLKApL.KGIR ..................................................................................................................................................................................EpQhEsh.c.RtpL.EuLts.pPs..pQPpssAh..scssLtphDYDpp....AFppA.T-W.p.Kttcsc....pQt.tpptRpp.QEapQp.tQhVEAhtcHhpt.AtKLsl.DYQEhEshhhp.lPPh.tt.Ih.+hh..sEtSthLhYtLGtN.tphRQllA..h.Ds.pAhh.LsplSc+loLtP+sKps..ss.s...h.tts................................................................... 0 2 2 2 +9139 PF09307 MHC2-interact CLIP, MHC2 interacting Mistry J, Sammut SJ anon pdb_1muj Domain Members of this family are found in class II invariant chain-associated peptide (CLIP), and are required for association with class II major histocompatibility complex (MHC) in the MHC class II processing pathway [1]. 21.00 21.00 21.10 23.40 20.70 20.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.53 0.71 -4.30 11 121 2010-01-11 15:07:59 2006-08-29 13:06:26 5 6 60 11 32 96 0 111.30 39 44.98 CHANGED M--QR.....D..LI.ossppshLPh....sssscuusoRuhtloGlolLssLLLAGQAlTsYaVapQpGcIscLTpTspsLphE.LppKhPtu..ssspM+hsM.shPhLhchhs.sss..tss...hc .....................--Qp.....D..LI.usp...pp...L.sh........sss.cupsoRushhoGhSlLVsLLlAGQAsTAYFlYQQpGplcKLThTSpsLpLEsLph.......Kh.Ptss.sss+M+Msh....Phlhphhs.t.t...hs..t.............................. 0 2 7 15 +9140 PF09308 LuxQ-periplasm LuxQ, periplasmic Mistry J, Sammut SJ anon pdb_1zhh Domain Members of this family constitute the periplasmic sensor domain of the prokaryotic protein LuxQ, and assume a structure consisting of two tandem Per/ARNT/Simple-minded (PAS) folds [1]. 25.70 25.70 26.10 35.40 25.60 25.60 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.50 0.70 -5.03 6 115 2009-01-15 18:05:59 2006-08-29 13:44:49 5 8 113 6 14 78 0 236.20 54 28.02 CHANGED SspIItQEVpRTpQQTSuLIpNlF-p+LuhLQIHpDSsuKstulhchah-pD.s-pLshFFhSlDQt-PopTP-FRFlospcullWDDGNApFYGlNp.hLcplup+VshSNNWaalpsposhG.tahLlRRoPll-ssTGEVlGahYsuVVLsNNFuLhEpL+stSNS-NlVllssspsLASSLsGs.EsY.slssVLppppssp+hDshllscTPIplpussT.lslLolQsNpsVloL ...............SS+lhuQEspRTshQTSSLIQsLFDFRLAALcIH.QD....SoAKNsSLlsALsoRD.sspLDpFFsSVDplEhSNAPDlRFISoHD.sIlWDDGNApFYGIspp..pLs+Lh++VuhSuNWHlVQTPSphpshHlLhRRoulI-ssTGpVlGYLYVGIVLNsNFALlEsI+sGSNS-NlVLsVcosPLsSTLKGN..EP.Y..o.lc.Vl...........+s.uc-sh+.D.ualVuQThLEVcuVPTaLCVYSIQsNQNVlTL............................................ 0 5 6 11 +9141 PF09309 FCP1_C FCP1, C-terminal Mistry J, Sammut SJ anon pdb_1onv Domain The C-terminal domain of FCP-1 is required for interaction with the carboxy terminal domain of RAP74. Interaction relies extensively on van der Waals contacts between hydrophobic residues situated within alpha-helices in both domains [1]. 22.70 22.70 24.10 24.40 22.50 22.60 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.96 0.70 -5.08 4 63 2009-09-11 10:47:49 2006-08-29 15:20:43 5 6 42 2 32 66 0 210.90 56 26.15 CHANGED ERW-KVEEQLFPL+DDaoKspRsNSPAsFPDppush.TsLFHPsPI+sKs.pPGPEVRlYDssTGKLIRpGsQuStPuP......sSuhss+tEPSSFRuVpPpQ.QhFs..EphssuQDsEQPGPSRRKRQPSMSETMPLYTLCKEDLESMDKEVDDILGEGSD.DSDuEKK+s..p.c-pEptsQspK.psPs.RpEp.................s.thPuSSERSssGuRsPRGHKRKLsEE.........DAtSE....pStESSNEDEtGSSSEADEMAAALEAELNDhM ........................................ERW-KVEEQLFPL+-DasKs.p.RpsSPAsFPDppush.TsLFHPsPlpPKs..pPGPEVRlYDssTGKLIRpGs....psst.ss.........s..s.Lsl+tE...S.SFRsVpPpQ.pQhFs....EphssupD...s.EQPGPSR..RKRQPS...MSETM.PL.YTLCKEDLESMDKEVDDILGEsSD.-S-ucc+cs....p.cpct..t.pspp........s.t.pptp...................t.sssupcs.hss.p..sR.GHKRKhp-p..........-htsp.............pStcsSN--E.GSSSEADEMAtALEAELsDhh................................ 0 5 8 16 +9142 PF09310 PD-C2-AF1 POU domain, class 2, associating factor 1 Mistry J, Sammut SJ anon pdb_1cqt Domain Members of this family are transcriptional coactivators that specifically associate with either OCT1 or OCT2, through recognition of their POU domains. They are essential for the response of B-cells to antigens and required for the formation of germinal centres [1]. 19.20 19.20 19.40 19.20 18.40 18.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.89 0.70 -5.22 2 69 2009-01-15 18:05:59 2006-08-29 15:42:10 5 5 43 2 30 56 0 197.40 55 86.37 CHANGED MHhtKS.hSEQtsp.+PYQGVRVK-PVKELL+RKRGNs..ApshssTsVVlPppsLPSYo.hG.ssh.ssstuA.s.sus-.GALCsuWluQPSs.uohQPLspWss.P-YhpHEt..uohP.hTuDMYlQPhCPSYslVGPSSVLThAptPLhTNhsshS.STsul.PQl-V..QpssLsYhPWA.PLSshPtss...........sPQhlPhPlslscPtPQp.EsA.ps.GTLslEKLL.E-E-spp..Yshs.uL.spsl ................sEQt.P.s.P.RPYQGVRVKEPVKELLRRKR.....G.+.s.....ssG..s..sssPT.u.V....V..LP.+pPLso.Yos...s.....Gsssl.-h-s.us.ss...s--us.LCsu.....W.luQPss..AsLQ..PLs..s.Woshs-Yh.c-u.......sosP.houDhYl.QPh.CPSYT.hVGsoShLTYus.PL..lTNhssp..............S..usPs..ssP.lEh.-pQuPLTYhPWsQPL...STLPss..oLQYQssussLPGPQFVpLPISIP......EPs.Q-h-DsRR.shsoLsI-KLLLE-E-sss..Y.hspsLslEG.h............................................... 2 1 3 11 +9143 PF09311 Rab5-bind Rabaptin-like protein Mistry J, Sammut SJ anon pdb_1tu3 Family Members of this family are predominantly found in Rabaptin and allow for binding to the GTPase Rab5. This interaction is necessary and sufficient for Rab5-dependent recruitment of Rabaptin5 to early endosomal membranes [1]. 26.00 26.00 26.00 26.00 25.90 25.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.12 0.71 -4.39 20 544 2012-10-11 20:01:01 2006-08-30 09:24:06 6 90 98 14 254 485 0 165.40 42 29.53 CHANGED ELuhucuQshhslpppLstlpsp+p+lcspl+RLspENpaLRsEhutoppch..ptpEppstpL.-chccLpahsphp+.Dthpppst....cpchcschssLcp.......hhss.E-phtsph..................p.tsptsstpssthEhsucLRoL+sLllQhssQs+hE........htlshsKpALEDLppsstccpscl ........................................ELuhu-uQlhhuLss+.Lssl-uE+Q+L+s.......QVRR.LsQ.......ENpWLR-E....L..u...s..TQp+L........QpSEppVAQLEE....EpcHLcFh..spl++hD.tsts..s........t..............-.c.....c...t....c.s.p.p.-sLc-..................Lhss-E--.stth...................................................t.tsttustp.puuhElPARL.RTL+NLVIQYuuQ....GRYE...........................VAVPhCKQ..ALEDLp+osG+cHscl................................................................................... 0 51 71 148 +9144 PF09312 SurA_N SurA N-terminal domain Sammut SJ, Bateman A anon pdb_1m5y Domain This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment. 29.50 29.50 29.50 29.50 29.40 29.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.25 0.71 -4.24 19 1802 2012-10-02 13:36:56 2006-08-30 10:53:39 6 12 1787 8 392 1588 590 113.80 32 28.46 CHANGED lD+lVAVVN-sVlLpS-L-ptlcpVcpphtppsspLPPcsVLccQVLERLIl-plQlQhAccsGlRlsDspLspAlusIAppNshol-QhppuLup-GloYspaREQIRcEhlluclR ..............................................lDplsAl..VNssllhpo-l....cth....h....p......p....l..ph...p...h.....t....p........t........t.....t.....p...l........P....s......p......s.....t....L.....+....c.......Q..l.......L....-+LIh-pl..lQ......hu.p+.hG.l..c..l..s....DppLD.p...AI....s....s...I.......A....p.......p......N....s...h...T....l......-..Q..h.+..spL....s.t.c.G.l..sa.s....pa....Rpp....lRc....-hhhsclp......................................... 0 109 231 319 +9145 PF09313 DUF1971 Domain of unknown function (DUF1971) Sammut SJ anon Pfam-B_3000 (release 20.0) Domain Members of this family of functionally uncharacterised domains are predominantly found in bacterial Tellurite resistance protein. 23.10 23.10 23.60 23.60 22.60 22.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.70 0.72 -4.26 44 1196 2012-10-10 13:59:34 2006-08-30 14:03:34 6 10 1013 13 105 527 12 81.20 43 43.97 CHANGED KphPlascsolPphhhpcH...NTKsGsauplsVlpGpL+ahths-ptt...spclhhssspsshstPphWH+VEsho-Dhchplc ......+phPhWsKpThPtulhp+H...sT+sGsas+LoVhcGslKahshs-.....Ets.pssphlhhsuuphsh.hsPptWHplEshTDDspapl-......... 0 10 37 74 +9146 PF09314 DUF1972 Domain of unknown function (DUF1972) Sammut SJ anon Pfam-B_3020 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in bacterial glycosyltransferases and rhamnosyltransferases. 21.10 21.10 24.00 23.10 20.60 20.40 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.20 0.71 -4.83 12 494 2012-10-03 16:42:30 2006-08-30 14:34:02 6 7 403 0 103 539 148 177.60 41 47.78 CHANGED hpcVaIIGS+GlPA+YGGFETFVEcLlcaQpsp.sIpYaVAChu-sptpp.....pFcYpGADCFsIssPplGsA+sIhYDhhAIphAlchsKppp.ppPIFYILGsoIGsFIs.at+pI+plGGplalNPDGlEW+RuKWutPVppYLKaSEKlMsKaADLlIsDNpsIEpYIpscYs...scTpaIAYGTD ....................................................................................................ppVaIIGo+GlPApYGGFE....TFVEcLs.p..h...p.p..s............p...sI.p........YaV.uCh.......s.c...s....p.s.t.p..........................phca.pGs.csas..l..s..s.P..p...l...G.s...A.c...sIsYD.hh...AlphAlph.h+pp.t....pts.I.hY....lLuss..lGs.Fl.h.shh+pI+phGsp.l.hlNPDGhEWcR.u.KWut.sV.+pYh...........KhSEphMsKaADllIsDspsIcpYlppcYs...........scTsaIAYGsD...................... 0 25 61 79 +9147 PF09315 DUF1973 Domain of unknown function (DUF1973) Sammut SJ anon Pfam-B_3022 (release 20.0) Family Members of his family of functionally uncharacterised domains are found in various eukaryotic calcium-dependent chloride channels. 21.00 21.00 21.60 21.50 20.80 20.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.18 0.71 -4.84 13 287 2012-10-03 16:25:20 2006-08-30 14:51:31 6 17 61 0 185 287 1 171.20 37 20.05 CHANGED ulQLESpG...slpsschlsGTVsVDSTVGsDThFLlTWssp..sPpIh...LtDPsG+pYso..FhsD.tss+sApLpIPGT.AcsGsWTYoL.ptpsssQsLTlTVTSRAuSsolPPlslsA+hspcoupaPSPhlVYAcVpQGhLPlLuAsVTAhI............EopsG+sV..TLcLLDNGAG.ADssKNDGIYSR ................................................................................lQl.Sps....plpspthhssoVhlDuoVGp-ThFllT.Wssp..............P.p..Ih.....LhDP.sGphhss..............Ft.hD....hs...+hutLp.I.P...G.....s...Ac...sGtWsYol............pt......p...ss..s...ps......l....ol.........TVT..SR..As......s.s........s.s......s......Pl.o.....Vsu...th...s...p...ss...sp....a.....P...s...P....h.l..lYApVpQGh..hPlLu..AsVTA.hI..............E.spsGp.s.s.....sLcL.hDNG.A....G..A....DshKsDGlYSR...................... 0 86 92 118 +9148 PF09316 Cmyb_C C-myb, C-terminal Sammut SJ anon Pfam-B_3027 (release 20.0) Family Members of this family are predominantly found in the proto-oncogene c-myb and the viral transforming protein myb. Truncation of the domain results in 'activation' of c-myb and subsequent tumourigenesis [1]. 20.30 20.30 20.90 20.90 20.10 20.10 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.38 0.71 -4.58 15 288 2009-01-15 18:05:59 2006-08-31 09:38:54 5 26 77 0 111 268 0 154.40 46 24.59 CHANGED FSPSQFLNtsspp-shsl-sssLTSTPVC.uQKsh.oTsLpRDpTP.hhQKENuhFRTPsl+RSll-sTPRTPTPFKsALsh.-cKYGPLKhlspTP.aLEEDlpEVl+pEsspslIlt-psc...PhhKK...tKQ.phcSP..hKKVRKSLsLchh-.pch..ssphhspsss.scptPs ...........................................................FSPSQFL.Nssssp.-phsl..EsPoLTSTPls.upKlhlTTP....hH+-p.Ts.+....sQKEN....ss.......FRTP.s...h+RS.l..lp.soPRTPTPFKsALAsQEtKYGPLK...h.l..P...QoPuaL.EDlpEVlKpEospsh....hl.t..-.pc....................P.h++.........hKQ.phpoP..scKstp.hs..ctW-t-ph...ssphhsps.s.......p........................................................................ 0 15 23 54 +9149 PF09317 DUF1974 Domain of unknown function (DUF1974) Sammut SJ anon Pfam-B_3029 (release 20.0) Family Members of this family of functionally uncharacterised domains are predominantly found in various prokaryotic acyl-coenzyme a dehydrogenases. 25.00 25.00 28.20 26.30 23.90 23.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.80 0.70 -5.00 103 1236 2009-01-15 18:05:59 2006-08-31 10:02:30 6 6 996 0 224 886 173 278.60 50 35.24 CHANGED lRCHPYlLcEhpAutssDp.ppuLcpFDchlhuHlGashsNshRuhhhuLTuuphu.suP.....ssstoppYY+plsRhSAuhAlhuDluMhsLGGsLKR+EhlSARLGDlLSpLYLuSAsLKRa-Dc...GR.ppDlPhl+auhpcsLhphppAhcchlpNFP.s+hluhlLRhl.lFPhGp.p..hptPSDcLspclAchlh...pPus..sRcRLspsh.Ylsp......s......csss.lGtlEpAapshhps-slhcKlpcAh+...tsplshh......phpphhptAlctGlIopsEschLpcscthRhcsIpVD-F .................................IRCHPYVLcEMpAApss..D.....lpsFDclLFpHIGassSNtlRShWLGLTtGhho..ssP.......ssssT+RYYppLNRlSAsLALLuDlSMulLGGuLKRRERlSARLGDlLSpLYLASAs.LKRY-DE....GRpc.uDLPLVHWulQDuLapAEpAhD-lLpNFP...NR.sl.uulL+sl...lF.P.hG...R...+...ahuPSD+L-cclAclLQ...sPsu..oRsRls+Gp.Ylss.....u..............-csP.VGhlEpALhslluA-Plap+lsKtlt.pplPhp.........pL-clscpALtpGlIsp-EAslLhcAEctRh+uIsVDDF............................................................... 0 52 102 172 +9150 PF09318 DUF1975 Domain of unknown function (DUF1975) Sammut SJ anon Pfam-B_3057 (release 20.0) Family Members of this family of functionally uncharacterised domains are predominantly found in the N-terminal region of various prokaryotic alpha-glucosyltransferases. 26.80 26.80 26.90 27.10 26.70 26.50 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.43 0.71 -4.71 51 2040 2009-01-15 18:05:59 2006-08-31 10:17:07 5 3 483 0 135 1104 1 188.20 20 38.94 CHANGED hhYhlspslshssSGlEhAthtRhplFcp.hshssKhlhhsap..splpphhcphsh.....pc.scllshY-aFpch...hsssptt.hsh...cplsh.pthphttss..thhphh............ps.phhhtlhhtspp...phlppl-ahs.pspl.l++-hashpGahSphthast.ssclhhcpaas.-Gphhhcchh.tspptt.phs...h.........hhFps ...............................................a.h...l..t.ss...s.h.ph.placp.hsh....sschlhhs...at......plp.p.h.hpphsh...............hs.schhsh.Y..s..aFpsh.....ht.pptt.hsh..........pplsh........t...t.hchhtss......p.hc.lhs....................ps....phh....s.ph.aaps.p..t......phlphl-aac..p..spl.h++-.hYstpGhhSshph.....hss..p.....s......c......hhh....cpaas.pGphhlpchh...s..sptt...h.....hhh..tt......................................................... 0 43 57 115 +9152 PF09320 DUF1977 Domain of unknown function (DUF1977) Sammut SJ anon Pfam-B_3043 (release 20.0) Family Members of this family of functionally uncharacterised domains are predominantly found in dnaj-like proteins. 20.90 20.90 20.90 21.10 20.80 20.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.42 0.72 -3.80 29 448 2009-01-15 18:05:59 2006-08-31 10:32:53 6 9 265 0 284 412 2 105.10 31 28.52 CHANGED lluphhss........sPs.YSh..p.o.tashpRpTsphpVsYYVsp..sFppca.....susplppLEppVEp-YlppL+psChpEppp+..........................pphh.tAp..hhtDpc..hhpcApphphP.sCccLpcl ......................loplhso........sPs..YSL........pso..ssa..s.hcR...pT.p.......p..l..p....V....sYYVsp..sFp.pcY..................puspLppl.EcpVEcDYlspL+psCh+Eppp+..........................pphh.tAp.....hatDtc....hhpcApphths...sCpcLpp.............................................. 0 78 129 206 +9153 PF09321 DUF1978 Domain of unknown function (DUF1978) Sammut SJ anon Pfam-B_3044 (release 20.0) Family Members of this family are found in various hypothetical proteins produced by the bacterium Chlamydia pneumoniae. Their exact function has not, as yet, been identified. 25.10 25.10 25.20 63.70 23.20 25.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.69 0.70 -4.93 6 49 2009-09-14 14:09:44 2006-08-31 10:41:27 5 4 2 0 25 49 0 196.80 38 35.96 CHANGED cDcpKsupAcpcatEht-phcca+KshFWLsE-ssIDh...ossssWshst.PpRps..........hsclspcEh...Wp+pstLK+hcspYspshsphpcpsoccNpptLp-tppch.cthp-happEhccscpRlcsLpthYspl.sspp-sctppphs..h.......cL-phh-pIEpphppssc-Q-sYWKpp-s+E....tEh+Ectsctcp.cEhpcsLct......L-chl+pppcpLchlctclpctphphst.sspppLpsu .......cDcsKSupAEp+hp-hp-pWcca+cslFWVcE-GshDl....shh.usWshsh.PhRpt.........RhsclshHEl...a-cThhlKch.cpphshA+sthEKptSpcN.pthpchptth.p.hpchhtpEhpcstpRlcpLpthYstl.sp..-tchppph...........sL-phhttl-pphppssp-Q-.Yhc..-.pE.....Ehctphsphh..cph.p.hp.......h-phlcth.ppL..hph.h.tht..hpt..t.h.lp......... 0 0 0 25 +9154 PF09322 DUF1979 Domain of unknown function (DUF1979) Sammut SJ anon Pfam-B_3053 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in various Oryza sativa mutator-like transposases. 25.00 25.00 32.20 31.70 21.40 16.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.80 0.72 -4.33 3 157 2009-01-15 18:05:59 2006-08-31 10:53:18 5 17 4 0 87 153 0 57.80 75 4.81 CHANGED MSSKlhFchaaGEGNVRaGPsGVDLSDFlsooRGIDRPAERSFpSIpNWLMRGFRIDP ....MSsKlhFQlhHGpGNlRaGPsGVDLSDFlhTu+GIDRPAERohpSIhuWLhRGhRlD.................. 0 0 0 2 +9155 PF09323 DUF1980 Domain of unknown function (DUF1980) Sammut SJ anon Pfam-B_3062 (release 20.0) Family Members of this family are found in a set of prokaryotic hypothetical proteins. Their exact function, has not, as yet, been defined. 25.60 25.60 25.60 25.90 25.50 25.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.99 0.71 -4.54 29 841 2009-01-15 18:05:59 2006-08-31 11:00:14 5 1 748 0 102 467 0 173.80 37 63.92 CHANGED hlRhLILhGashlhhaLhloGclspaIss+ashhshhuhllhhILullQlhhhhpshcpp..................cpH..t....t+hhshhlhllPllhGhh....hPssoLD......SshsstKGhphsh........ts....tscptopsphl+s-..............stt.htp.hpt.h.pphhtpppIpls--sahcsh-tI..hp.s.scahG+ ..............MlRhllLhGah.LhhaL.plSGcLspYINh+YsYLuhlohll.hlLA...l..VQlh....l....hh+...phc.p.............................HsHh.ps+........t+hhuhsLLslPlllGlh...........FPoloLD..................................Ssh.V...sAKGapFPl..........................utts....ps.s..tts..p..sQaL+PD.............................TS.Ya.scs....sYcctM.ppth.cca..hsp..ssIplssENYhcsMEhI.YsYP.s-FtGK........................................ 0 30 70 85 +9156 PF09324 DUF1981 Domain of unknown function (DUF1981) Sammut SJ anon Pfam-B_3041 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in various plant and yeast protein transport proteins. 20.90 20.90 20.90 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.71 0.72 -4.46 41 681 2009-01-15 18:05:59 2006-08-31 11:31:41 5 13 272 0 470 659 6 83.80 34 4.95 CHANGED h+FLcp-ELspap.FQ+-FLpPFEhlh.....pps..psh-l+-hlLpCltphlps+s..spl+SGW+slFsllshuuppps-pllphuap....llph.l ...............................+Fh-ctELspFp.FQc-FL+P..FEplh......pps..pssslR-h...llcCltQh..lpupu..ssI.+SG.W+slF...uVhptAus...-....p....p.csllphAFppht................................................... 1 163 245 367 +9157 PF09325 Vps5 Vps5 C terminal like Mistry J, Wood V anon Pfam-B_6206 (release 20.0) Domain Vps5 is a sorting nexin that functions in membrane trafficking. This is the C terminal dimerisation domain [1]. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.27 0.70 -5.11 22 1147 2012-10-03 12:17:00 2006-08-31 11:48:15 5 17 292 0 720 1151 2 193.30 20 42.46 CHANGED htphhsshtpulst........shKhsEs.....DpaF.-+pphl-sLEppL+pLhcul-slsspRp-Lutshs-hupulstLussE.s..psLSpsLspLu-lpt+lcphhpcputpDhhpLuthlc-YlRhluSlKssFspRhKhapphppsppsLpKK+pphsKhpts.tsp..t-KhpphppElp-hcp+sppscpcFccloppl+cElp.+F-p-+hcDFKsslptaLEutlcsQcEhl-hWEsFhsp ..................................................................................h........................th..p-.............D.hF.p........tt.h.ph.cpplp.p.h.pttsc.ph.spp..+.c.......plutshs..phutshtt.Lu......t.....E...t.p................s..l.......s..p........s.......h.......pplu.......c.h.p.......ph..pp......h......t........p.c.....u.....ps.-.hph........sch...lp.Yhp.ht.......ul+sh.hppR.pth..php.pspptLpct...+tt..t.....ch....................................h.......t.....................................................p...............c...+hp.p....h..................t....cl.t.p..............hp.....tp.p..thp.......pphcpl.s.......p....h...pp..Eh..cap....pp+htsh+p.lhphhp..lphtpp....ht............................................ 0 198 345 556 +9158 PF09326 DUF1982 Domain of unknown function (DUF1982) Sammut SJ anon Pfam-B_3077 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in the C-terminal region of various prokaryotic NADH dehydrogenases. 25.00 25.00 28.00 26.70 20.90 19.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.70 0.72 -3.81 98 630 2009-01-15 18:05:59 2006-08-31 12:44:19 6 11 567 0 299 594 813 49.80 34 7.25 CHANGED Dpltssshts.hsh.h..........splup..ss.F...psslpDFYhTsPIuRASssMAcC .............................Dpltsss.htp.hss...............hssphss..ss.h......ptslcDFYhT.ssIoRASssMAcC.. 0 96 171 232 +9159 PF09327 DUF1983 Domain of unknown function (DUF1983) Sammut SJ anon Pfam-B_3073 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in various bacteriophage host specificity proteins. 20.90 20.90 20.90 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.64 0.72 -4.03 47 1596 2009-01-15 18:05:59 2006-08-31 12:57:54 6 28 644 0 69 1683 37 78.60 46 8.51 CHANGED lpptupuhsshsG....clsAhaslKsps.sssGphhsAGhuluh-.sssusspSpllltADRFull...ss..ssGshtsP......FVlp..sG..plal .......................................................................lpphp+s.hDsNs.........phsAMWul.Klpp.spD.G..phY.l.A.GIGhuhE.sTss..s.........hhSQlLluADRIAhI....sP...ssGNp..pPh......FVuQ..GsQlFM............ 0 7 22 45 +9160 PF09328 Phytochelatin_C DUF1984; Domain of unknown function (DUF1984) Sammut SJ, Bateman A anon Pfam-B_3070 (release 20.0) Family Members of this family of functionally uncharacterised domains are found at the C-terminus of plant phytochelatin synthases. 25.00 25.00 26.20 78.80 21.10 24.70 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.82 0.70 -4.90 14 95 2009-01-15 18:05:59 2006-08-31 13:15:16 5 3 42 0 22 104 0 238.20 45 54.33 CHANGED p+sPulLYTLSC+cESWhuhAKYLhEDVPhLLpScslcslpclLSslhcSLPuNhspFIKWVAEVRRpE-Gs.psLScEEKpRLtlKpcVLpQVp-TcLF+hVschLpp.pt.stp.stsstcDSLs.plAAsVCCQGAslLsGp.hsussthCC+c.Tsh+slcus.GcsssTVlSGsVl..ssssEQuVDhLlPhs.tpssssss.........t.stp..hhHPossDVLTlLLLALPPpTWtsIcDcpLhsElpsLVSp-sLPslLQcEV.LHLR.cQL ...................+.sshLaTLSCKcpsWhuhuKYLhE-VPhLL+ScslssVccll.slhpSLPushspFIKWlsEVRhtE-Gs.ppLSpEEppRLtlKpcVLpQl+pTcLFphlscaLpp........s.ssp--SLs.phAApsCCQGAthLs..Gs..s.SsthCs+c.sshpslpus..ucu.ssVloGpVl..ssGsEQslDhLVPpp.sps.s.ssss.........t.s.p..shaPossDlLTVLLLALPPpTWpsIpDpplhtEhppLlSpcpLPs.LQpEV.hHLpcQ....... 0 3 14 17 +9161 PF09329 zf-primase Primase zinc finger Mistrj J, Wood V anon Pfam-B_9710 (release 20.0) Domain This zinc finger is found in yeast Mcm10 proteins and DnaG-type primases [1]. 21.80 21.80 21.80 23.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.44 0.72 -4.36 37 283 2009-01-15 18:05:59 2006-08-31 13:27:25 6 12 258 4 201 287 3 46.00 37 6.37 CHANGED GpupDhGhCpuh+.+sGphCsshlNt.....pcspaCpaHhphp....h++h.pupR .....GpupDlGhCKuh+..KsGch...CsshVNh.......pcs-aCpaHlptp....h+Kh.putR........... 0 58 101 164 +9162 PF09330 Lact-deh-memb D-lactate dehydrogenase, membrane binding Sammut SJ anon pdb_1f0x Domain Members of this family are predominantly found in prokaryotic D-lactate dehydrogenase, forming the cap-membrane-binding domain, which consists of a large seven-stranded antiparallel beta-sheet flanked on both sides by alpha-helices. They allow for membrane association [1]. 25.00 25.00 70.10 61.00 20.70 20.00 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.81 0.70 -5.03 19 896 2012-10-02 00:48:38 2006-08-31 13:47:55 6 2 874 2 110 550 214 277.90 68 51.68 CHANGED VFYIGTNssscLTclRRclLssFcsLPluGEYhHRDhFDIA-KYGKDTFlhIcphGTcpLP+hFulKuplDshhp+lsalscaloD+lMQhhuclhPsHLPcRMp-aR-+YEHHLlLKMuscGlpEA+paLcpaFucus..GsaFECos-EGp+AaLHRFAAAGAAIRYcslHpscVEDIlALDIALRRNDc-WhEpLPtEIsspllcKLYYGHFhCHVFHQDYIlKKGsDsptlc+cMLcLLDpRGAcYPAEHNVGHLYcAcssLppFY+cLDPTNoFNPGIGKTSKpKpW ..................VFYIGTNpPpVLT-IRRHI.LusFcsLPVAGEYMHRDIYDIAEcYGKDTFLMIDKLGT.D+..hPhFFsLKGRsDAhL-KVpF.htsHFTDRsMQphu+LFPuHLP.RM+saRDKYEHHLlLKMuG.DG.......VuEA+paL............t-a..F............ppA.....-......Gs....FFsCTsEEGsK...AF.LHRFAA..AGAAIRY.pA.VHuDEV.EDILALDIALRRNDp-WaE+LPPEIDspLlHKLYYGHFMCaVFHQDYIVKKGVDs+A.............LKcpMLcLLppRGAQYPAEHNVGHLYcA.poLp+FYRc.DPTNShNPGIGKTSK+KpW.................. 0 20 49 82 +9163 PF09331 DUF1985 Domain of unknown function (DUF1985) Sammut SJ anon Pfam-B_3094 (release 20.0) Family Members of this family of functionally uncharacterised domains are found in a set of Arabidopsis thaliana hypothetical proteins. 29.80 29.80 30.10 30.50 29.50 29.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.72 0.71 -4.38 50 173 2009-01-15 18:05:59 2006-08-31 14:19:20 6 13 9 0 34 177 0 119.50 24 17.19 CHANGED LLsRpLhscKcpE.hWhlhuGpPlRFSlcEFtllTGLsCtphPpphcsppttph........shhpph....h...t.ppshsltclhphLppt.......hhsstp+lpluhlhhlsullhsppphst....lp..hhchsp-lchhhsaPWG+h.uFpt.hhpsl .........................................hh.pph.hpp..p.hWhhhss.PlRaulpEathlTGL.Ct.hspp.p....th.th...........thhtph....h...h.pps.hshtcl.phL.tt.......thps.tp+lphuhlhllpullhsppptst....ls..hlchspslchhhpaPWGchuFphhhpt..................................................................... 0 12 13 13 +9164 PF09332 Mcm10 Mcm10 replication factor Mistry J, Wood V anon manual Domain Mcm10 is a eukaryotic DNA replication factor that regulates the stability and chromatin association of DNA polymerase alpha [1]. 20.10 20.10 20.40 22.10 19.30 19.80 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.59 0.70 -5.10 9 127 2009-01-15 18:05:59 2006-08-31 15:11:39 6 8 106 1 85 126 3 292.30 32 37.32 CHANGED holPTsGhhplppc.....tpu.httsGsshpShSAspLLKpQ+ppp.phLthh+ccuEclQK+......hLpSostscssSpsooo.......pushpoPptus-h.pspth....ssoPKLuRuhs.u.Dl.Fhscpss...t.supu.pAtKhAAltKL+A....LtKtsPN.lK+Kpupouc.h.lsptVcpp.......ssuu.....ppus--pEPthKKcR......p.cEhp+ILsAKSpHosllcttEtEhQEcYFssL.+KEphEEKMpshhEhp.C+sVTCppCKYTtFpsu-cClpEpHch+hHDAsKRFF+Cs.CGNRTloLtRLPKppCusCsh.KWERsuMl+EKpG.plGGEsLhsRGEEc.KFLsS .......................................................................................................................................t.....t......p.hss.th.tppp.p....h.h..h.ttp...ttthp........hhtsstt.........s........................s.ph.p....p...t............s.s..hPpLupu..t..u.pl.h.hs...tp............s.u.......K..hsAlh+h+s...........ltK.sPN.h...p.p+t..tpstc....t....lp.ptscpp................t..s............pcp.c.t.++p+.......................pp-ph...pcl...l..p.A+SpHssllcpsEt-hpEcYFp.L.+KEpMEEKMpshpEhp.C+sVpCcp..C..p.YTtFpss-pChpcpHs.l+.h+DulKRFF+Cs..tCGpR..ol.ol.p.+.l.Pp.p...pCps..Cs.......+.WERsuM..........h+...E.....+p......s..hhstEtL..RG-Ec.pFlsS.................................................... 1 30 39 66 +9165 PF09333 ATG_C ATG C terminal domain Mistry J, Wood V anon Pfam-B_61662 (release 20.0) Family ATG2 (also known as Apg2) is a peripheral membrane protein. It functions in both cytoplasm to vacuole targeting and autophagy [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.01 0.72 -3.84 36 689 2009-01-15 18:05:59 2006-09-01 09:08:59 6 33 269 0 463 699 7 95.20 27 4.20 CHANGED +thSlYusQ...PtslppGlppAYpoLpc..............................slthstpslhpsstch..hc....spuspuA..........stsVl+tsPssllRPhIGuT-AlupsLhGlpNplDPppppc.c-KYK ..................................................................p...phhspp....Ptslt-Glspuhpultc..............................ulhsu...hsulhppPhcs....tc...........pcGssG..h..........................hpGVs+ulss.sls+PhhGss-hsSpshtGlcN.......ph.................................. 0 164 244 364 +9166 PF09334 tRNA-synt_1g tRNA synthetases class I (M) Bateman A anon Pfam-B_107 (release 20.0) Family This family includes methionyl tRNA synthetases. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.33 0.70 -5.81 39 7775 2012-10-02 18:00:56 2006-09-01 16:59:03 6 66 5004 42 2168 33465 18847 310.30 30 47.26 CHANGED hhlTosl.YsNussHlGHhh.ohlsADlhsRapRlp.Gp-.VhFlsGoDEHGp.IphpApcpGloPpchs-c.hpptapchacp.hsIsaDtFhRTTsppHpchspchapcLhcpGhIactphpthYsssscp..Fls-+.................upCP................tCG+plc.................hh+.pcpaFFcLscapct....................Lhcal.cps.t....psphpphs.sal..cpGLc-huIo...pchsW.GI.l.....P.....ssps+slYVWhDAhlsYloust.hs....................c.pa....pcaW.....p.....t.hHhIGKDIlhFHslaWPAhLhu..........tshp.......lPpplhupualsh-Gp.KhSKSpGsslps.pchlcp.as..sDhlRYaLhp.psshspDscFShcchhp+lNs-LssslGNhlsRsh ........................................................................................................................................................hlTsslsYss.G........t........l.Hl.GHhh..shltu.Dlhu.R.a.........p.R....h....p......G............h.........s..........V.....h..........a.........l...............s.........G..........s.................D........t..H.........G.............t...........................l......p..........t...............t.............A.................t............p..................t..................G......................h................s....................P.................p.................p.................h.....................s.................c.................c................h.................t................t.................p.................h............p............p........................h................p............t..............h................s..................l.................S............a...............D............p.............a.............h.........p....................T.............o....s......s......p......a..........t....c.h....s..p........t.........l.........a....................p...cL...h....c...p...G..............Ih...p.t.p.h.p.....t...h.a..s..s..p..p...p...p....a...h.s-p......................................l.t.t...h.................................................sut......h..p...........................................................................h..p.....p..cpaFhc..l..s..t.......h..t..p.h........................L..t.ah....p.......t.....t.................p....h.......ph....h....pa.h...............p.....s......L.p......t...hslo........ps..h........a.Gh...l.........P......................................tt.t+h..h..Y.V..........W........h......D........A...........h.....s.......Y...h..s.....sht..t...............................................................a.............pp.a...W.................................................................h.p.....h..h.......GK.........D......l...................h....F...........H...s....l...h....a....P...h..h.L.u..............................th..................................h.P.....p.p.....l.....h.s..p.t.....a.....h.....h.....h........p......s......t......K.hS.K...........S.....h..Gs..h.l..s................h...l..........p...............p......h.s................-......hR.....Y..a.hh......t...................s........h.....p.D.h.sh......s......p......t..h..h..t...p..h.N......-..lssthsNhhsRs.h.................................................................................................................................................................................................. 1 764 1399 1855 +9167 PF09335 SNARE_assoc SNARE associated Golgi protein Mistry J, Wood V anon manual Family This is a family of SNARE associated Golgi proteins. The yeast member of this family (Swiss:P36164) localises with the t-SNARE Tlg2 [1]. 32.60 32.60 32.60 32.60 32.50 32.50 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.50 0.71 -3.91 83 15761 2009-01-15 18:05:59 2006-09-01 17:20:35 6 40 4479 0 3638 10044 3069 125.40 20 54.58 CHANGED lP....sthlh..hsuGhhh........uhhh.....uhlhshlGshlGshlsahlu+hht.............pthhpphhpppthpt...hpphh.........pc..huh.hh............lhlhRhlPhlstshls..hhuGh.splsh.tpFhhsshlGthshshlhshhGth .................................................................................lP..uphlh......hh.u.G..h.l.h......................shhh.......................shlhshl.uu...h...lGsh....lsa...h.l.G.Rh.hG.............................phhhp...h...h..h..p..p..p..t.....h...p..p.............s..pph.h......................p+...aG...h..hs....................................lll.s.R..a.lP..h.l...psh..ls...hs.AGh..s.p.hs......h.tp.FhhhshlGsh.hhshlhshhG..h.......................................... 0 1172 2266 3020 +9168 PF09336 Vps4_C Vps4 C terminal oligomerisation domain Mistry J, Wood V anon Pfam-B_8681 (release 20.0) Domain This domain is found at the C terminal of ATPase proteins involved in vacuolar sorting. It forms an alpha helix structure and is required for oligomerisation [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.87 0.72 -4.08 26 1201 2009-01-15 18:05:59 2006-09-04 10:04:39 5 14 381 41 789 1144 12 54.50 29 9.96 CHANGED hlTPCSPGDPs.A.lEMoWh-ls....uccLhEP.slThpDFlKAlpss+PTVspcDlc+apcFTc-FG .........................h............................p..l.......tpch......P..slshpDFpcAlpps..+sSVSppDlc+aEcasp-FG................. 0 272 408 623 +9169 PF09337 zf-H2C2 His(2)-Cys(2) zinc finger Mistry J, Wood V anon manual Domain This domain binds to histone upstream activating sequence (UAS) elements that are found in histone gene promoters [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.05 0.72 -4.54 11 664 2009-01-15 18:05:59 2006-09-04 13:23:51 5 42 181 0 194 519 2 39.20 51 10.82 CHANGED Hhpp.HuGINKoTotIApKYHWhRIKETVucVI+sCscCK ............HslA.HoGp-u.Tah..KloSKYaWPNlRKDVlKVIRQCcQC.h.. 0 88 124 161 +9170 PF09338 Gly_reductase Glycine/sarcosine/betaine reductase component B subunits Mistry J anon Pfam-B_25756 (release 20.0) Family This is a family of glycine reductase, sarcosine reductase and betaine reductases. These enzymes catalyse the following reactions. sarcosine reductase: Acetyl phosphate + methylamine + thioredoxin disulphide = N-methylglycine + phosphate + thioredoxin Acetyl phosphate + NH(3) + thioredoxin disulphide = glycine + phosphate + thioredoxin. betaine reductase: Acetyl phosphate + trimethylamine + thioredoxin disulphide = N,N,N-trimethylglycine + phosphate + thioredoxin [1]. 20.00 20.00 20.30 20.70 19.80 19.60 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.32 0.70 -6.30 16 404 2009-01-15 18:05:59 2006-09-06 15:52:04 6 3 133 0 71 362 11 324.90 29 81.50 CHANGED M+LELGpIaIKDlpFGcpTcVcsGVLhlNKcEllchltp.D-+Ipol-l-IA+PGESlRIhPVKDVIEPRVKVEGsGslFPGhhuKV-.TVGpGRTHlLKGsAVV............ToGcIVG.FQEGIIDMSGsGAcYTPFS+hhNlVlls-sh-Glppa-HEp...AlRhsGLKAAtYLGEAu+slpPDElcsYETpPlhEpspcYPsLPKVuYVYMLQSQ..GLLHDTYVYGVDAKpIlPTllYPTEVMDGAIlSGNCVSACDKNsTYlH.NNPlIc-LYc+HGK-lNFlGVIlT.NENVhLADKERSSsasAKLschLGhDGsIlSEEGFGNPDsDLlMNC+KlEpcGIKTVllTDEaAGRDGsSQS.LADusscAsAVVosGNANpllhLPsM-KlIGcl..phl-slAGGasG......SL+tDGSI-sElQAIsGATsElGFspLoA+sY .....................................................................................Lphtphhlpclpaup..pop.l.c.sssLhl.pc.p.h.th..h.t.....cphl.pphclcl.hpPs-.t.h.p.h..s.hh-slps.ts+hpGt......................lGpGhThsL.pG.shVh............hs..G........p............hu....p..-Gh.l-....t.h...h..s..t..s.....h....s.....t........plhl.hp........th..p.....ht..pt....h.hhu...hchs..a.l.s....p...hc.t....h...p...s..-..p.......h.ph..hhp.t.t....hsth.pVshlh.h.sQ....Ghha-s.........hhh.........G....h...p......hh.....th...........l.....Ps.hh.PpElhDGul.huh...shlusssKpsohpahppPll.cplhpccsp-lshhGVlhs.sps.h.s-K.hsuphsuthsc.hhssDGsllop.EGaG.NschDhh.shcplttpGl.sVhlo.......................................................................................................................................................................................................................................................................... 1 42 66 68 +9171 PF09339 HTH_IclR IclR helix-turn-helix domain Bateman A anon Pfam-B_70 (release 18.0) Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -7.78 0.72 -4.37 93 11639 2012-10-04 14:01:12 2006-09-12 11:35:16 5 84 2544 26 3363 13801 1334 51.10 26 19.10 CHANGED ulsRulplLchluppsps........h..sls-lAptsGls+uosaRlLpoLhptGalcpcs .............................................slt+ultlLc..h...l.s..p.p.s.ss.....................h..sls-.lupp...s.....G..l.s.+.o.TsaRlLpo.LtptGalpp................ 0 800 1991 2786 +9172 PF09340 NuA4 Histone acetyltransferase subunit NuA4 Mistry J, Wood V anon Pfam-B_29415 (release 20.0) Family The NuA4 histone acetyltransferase (HAT) multisubunit complex is responsible for acetylation of histone H4 and H2A N-terminal tails in yeast [1]. NuA4 complexes are highly conserved in eukaryotes and play primary roles in transcription, cellular response to DNA damage, and cell cycle control [2]. 20.90 20.90 21.20 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.82 0.72 -4.34 29 357 2009-01-15 18:05:59 2006-09-12 13:53:42 5 8 275 0 236 335 0 80.70 39 40.29 CHANGED +pcLp.phlp+KppL-ppLssLEcpIYchEspYLp.....tsoshGNIl+.GF-sa..hpsss................ttsp++pttap-sDRlFShSSso.h ...................tcLt.phlp++pplp.cp...LssLEcp......IYshEsuYL-................pophhGNII.+.Ga..DpY...lpspp.......................................sutss+R..pppap-s-RlFSpSSlT................................... 0 85 135 199 +9173 PF09341 Pcc1 Transcription factor Pcc1 Mistry J, Wood V anon manual Family Pcc1 is a transcription factor that functions in regulating genes involved in cell cycle progression and polarised growth [1]. 20.90 20.90 21.30 21.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.31 0.72 -3.94 57 386 2009-01-15 18:05:59 2006-09-12 13:56:42 5 11 320 13 261 415 29 76.60 23 62.38 CHANGED hphslplsFtotc.AplshpuLps-..sphptspsphphshps.s..............hLh..lphpA........p-sphLRsulNoalc.lplshcshpt ...............................thslplsaso.tc.A..plshpuLtsD....tphp.stsppph....shss.s..................................................................hLh..lphpA............pcs..+hLRsulsualc.lplshcshp...................................... 0 72 146 210 +9174 PF09342 DUF1986 Domain of unknown function (DUF1986) Mistry J, Rawlings ND anon Pfam-B_99782 (release 20.0) Domain This domain is found in serine proteases and is predicted to contain disulphide bonds (see Swiss:P98159). 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.99 0.70 -5.11 3 34 2012-10-02 13:45:52 2006-09-12 14:19:54 6 16 29 0 24 2142 2 201.90 31 10.89 CHANGED N++sElV-sFc.....................--hcWPWlAcVYl-GshhCoGVLIDtSWVlVScSCLpslsLcHpYlSVVLGGuKTh+Sl.+GPYEQIhRVDCa+slP+ScllLLHLcoPloFS+HVLPTFVP-opNcNpocu.cCloVGQDD.hGRsKTluIaL.cNsTNCsScpl.CYK+cpKQP.h....lhN..stsMsSpHE.stlISCaTPpthsslscFT.............sss..sLKN.....sScthassS.pGVlV..C+sSRoGWaPsuhapapRGsC.GFccls.GVRoLE-uY+clQ-llHK .............................................................................................................p.h..h.WPW...L.A.....c...l......a...........s......s.....G......c.....h....h.....Ch.....GlLl-.pWlLsp.puC.......l........p........s......l.......s.......h........p........s.....p......Y...l.......o........s....l.....L......G......t......u....+...........o....h......h.............h........c.........u.......s......a......p......Q.......I...........h..........V.......D.........p........h........c..................l.....................c.............o.............p........l......LL.....HLc.........p.........s.....p....a....o.+..a..V.Ph..hl...c....................t..p..ts.....C......l...u....l..h....p..sp..............p..........o..................t...l..h.....h.............s........p....s....C.....s...........Capht.................................................................................................................................................................................................................................................................. 0 8 11 21 +9175 PF09343 DUF2460 CHP2217; Conserved hypothetical protein 2217 (DUF2460) Bateman A anon PSI2 target BIG_186 Family This model represents a family of conserved hypothetical proteins. It is usually (but not always) found in apparent phage-derived regions of bacterial chromosomes. 21.90 21.90 21.90 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.12 0.71 -4.96 59 277 2009-01-15 18:05:59 2006-11-08 13:52:02 5 4 254 0 88 226 66 193.60 38 84.33 CHANGED H-lRFPh..sluhGusGGPERpT-lVsLusGtEcRNssWucuRRRYDAGhGl....RSh--lpsLlAFFEARpGphaGFRa+DhsDapSs.sstsss.tDQhlGh...GDGssspFQLsKsYsu......GtpsYtRsIsKPVsGoV+l.ulsuschttu....tatlDhsoGhloFs..psPssGstloAGFcFDVPVRFDTDplphSlsuFpAGpsP....slPllElRh ................................................H-lpFPh...sluhGusGGP-hpTclVshsoGhEpRNs.WupuRR+a-suhul...........+Shc-lptlluFFEAR+GphauFRa+D.hDapSs..t.................................................................................s.hDQslGp...GDGssspFQLhKsYss..........s.tsYsRsIs+PVsG.oVhl....uVsGschstu....taslDssoG.....lVT..Fs....tsPstssslTAGF..tFDVPVRFDoDplphsls.....sapuGpls.....slPllEl+........................... 0 20 59 68 +9176 PF09344 Cas_CT1975 CT1975-like protein Bateman A anon PSI2 target BIG_184 Family CRISPR is a term for Clustered, Regularly Interspaced Short Palidromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family is represented by CT1975 of Chlorobium tepidum. 25.00 25.00 29.30 29.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.06 0.70 -5.08 25 679 2009-01-15 18:05:59 2006-11-08 14:22:11 5 3 640 0 127 467 19 337.10 44 96.95 CHANGED lplHlLpoassSNLNRDDoGtPKoAhaGGspRsRVSSQuhKRAhRpuhp.tthhst....hG.lRT++lsc..lhctltp................tuh......ctp...Atphscphhsshu.....Klpptppttt...........................coctLhalutpElstluphspcttpsspsspttth......pccpp................................ulDIALFGRMlAs...ss....phNV-AAsQVAHAlosHtlssEsDaFTAVDDLtp....--..sGAuahGssEFsSusFYRYsslDlcpLhcNLGGs.......p.........-hAtcslpAhlcAhspssPoGKQNoaAupshsshlhhph.tpspPhSLAsAFcpPV.......pspcs...hhpsulptLsshhpphcpsYGtt....pshhthsshssptt......s.psolccLlshlt ...................................IplHlLpuaPsuNLNRDDTGuPKTsl.hGGssRlRVSSQSLKRAhRsSthacps..lus........lGlRotRlucc.sAph.L..h-......................pG..l..-pc..cAhchut..plsshhG.............KsKpc+.c..cc..............................................-TcpLlalSssEh-sltsLApphsp-cc.s.sp.ccch.t.....h++cph..................................................................................AVDIAhFGRMLAs...ps.......chNV-AAsQVAHAhulpcs.h.lEsDaFTAVDDLp...........pss.....ED...uGAGHlGpstFuSAlFYpYhsIsh-hLlcN..Luus..............c..............tLAspslcAFs-AhlpssPTGKQNSFAu.+s.hAsaslsph.t.s-.QPhSLAuAF.cP..l......susc.......lpsulp+lsshtcshsplYspp........spssshsl.hspp...ss........................pssh................................. 0 38 93 111 +9177 PF09345 DUF1987 Domain of unknown function (DUF1987) Bateman A anon PSI2 target BIG_79 Domain This family of proteins are functionally uncharacterised. 21.30 21.30 21.40 22.90 21.20 21.20 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.29 0.72 -4.20 56 185 2009-01-15 18:05:59 2006-11-08 14:48:30 5 3 128 0 66 195 81 98.00 35 74.40 CHANGED PpVpFcspsulhclpG-SYsEssh...sFYpPllsWLppYlpp..spptlphsh.cLtYaNTSSo+thhplhchL-ch.tppsspVplsWaap..p-DcchhEhGE-a .......PtlpaDhpsshLplpG-SYPENuh...tFatPllpalcpYLsp............spp..........s..........lplcl.cLhYhNoSSsKslhslh-hL-ps.tppGppVslpWaY-..p-D-phtEhGE-F................ 0 29 47 57 +9178 PF09346 SMI1_KNR4 DUF1988; SMI1 / KNR4 family (SUKH-1) Bateman A anon PSI2 target BIG_91 Domain Proteins in this family are involved in the regulation of 1,3-beta-glucan synthase activity and cell-wall formation [1][2]. Genome contextual information showed that SMI1 are primary immunity proteins in bacterial toxin systems [3]. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.44 0.71 -4.13 178 1863 2012-10-01 20:46:44 2006-11-08 15:03:16 5 30 934 3 480 2059 25 133.60 15 53.86 CHANGED ssoppplp...p.hEpp..luh.pLPps.a+phlp.........................................tthhhthtshshhthhhtthph..tthtphhphpphhpph..................................................................................................................................h.pphlshus...sssushhslchsstt................splhhhspc........tsp...hthl..................us..oFsca.......lpp ..................................................................hoppplp....p.hEpp.....hs.h....pLPps.apphhp...............................................s.ss..h..h...h...t...h.t...h...p..h..............t......hh......h..ttph........pthtt...h..hp.h...t..thhpt...................................................................................................................................h.tthl..shup.....sssu....s.h..lsl.-hpsst.t...............splh.h.hsp-......................................tpp.........hhhl.......................................up.....sappal..t................................................................................................................. 0 137 283 391 +9179 PF09347 DUF1989 Domain of unknown function (DUF1989) Bateman A anon PSI2 target BIG_202 Domain This family of proteins are functionally uncharacterised. 20.40 20.40 20.80 20.60 20.20 20.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.68 0.71 -4.80 144 1044 2009-01-15 18:05:59 2006-11-08 15:22:35 5 9 576 7 482 1045 1397 168.80 30 62.00 CHANGED ppplsutsshuhtlcpGphlRlsDlcGsQssDhlhasAcc.hs.....ERhssscThpht...tsha.lssG..shLhSshs..Rshholl.pDo..........sGhHDslsusCsspp.phh.aut..pph........csCp-Nhhtulu......paG...ls.......pcDlssslNhFhsVsls.ssGp.lphtsshSpsGcaVpLcAEhDll ......................p..hlPutsshuhtlptGphlRlhD...lcGs....QssDhhhasAcs..p.....ERhssscThphp....pssalssG..shLaSshs..RshhoIltDo............................sGhHDsluusCssppsphh.aGp...pph...........csCp-Nhhhulu..............caG..........Ls.............................ppDlssslNhFhslsls.s..........cG..........p.....ht....hpssh.opsGcal-lcAEhDll................................................ 0 123 281 393 +9180 PF09348 DUF1990 Domain of unknown function (DUF1990) Bateman A anon PSI2 target BIG_236 Domain This family of proteins are functionally uncharacterised. 20.70 20.70 20.70 20.70 20.60 20.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.87 0.71 -4.60 30 289 2009-01-15 18:05:59 2006-11-08 15:25:40 5 8 240 0 132 262 2 152.20 33 78.55 CHANGED oYsplGuotttth...........ss.....GappsctcshlGpGct.......sF-pAspAlhsWphhctuhlplh.....ssspsstsGssVslp.hthh.......hhhhhsssRVlhll.....-Es.........c+hGFuYGTLsGHsEsGEEpFhlch......s.ssGpVahclpAFSRPAph...hu+luhPls.+hhQ+ths++.hhpul ...................................................................................htsot.t.h.........ss........Gaphhchpt.lGpGpt.......tFcpA..spulhpWthpct...uGlpV..................ssspsstsGssVh....lt.hth.........................ltsPsRVVaVh................--s.........................................shtGFuYGTLsGHs...sGEEpFsVch.......c..tsusVahplhuFSRPAsh...hs+hutPls.ph.hQ+hhspR.hhcuh................................. 0 55 101 123 +9181 PF09349 OHCU_decarbox DUF1991; OHCU decarboxylase Bateman A, Percudani R anon PSI2 target BIG_237 Domain The proteins in this family are OHCU decarboxylase - enzymes of the purine catabolism that catalyse the conversion of OHCU into S(+)-allantoin [1]. This is the third step of the conversion of uric acid (a purine derivative) to allantoin. Step one is catalysed by urate oxidase (Pfam:PF01014) and step two is catalysed by HIUases (Pfam:PF00576). 20.50 20.50 20.60 20.50 19.60 20.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.00 0.71 -3.94 139 1098 2009-01-15 18:05:59 2006-11-08 15:27:51 5 15 925 26 464 1030 527 154.20 29 57.32 CHANGED hs...shspspF.stthsslaEpo....sWluctshs.......tp.....sasoh.ssLhsuhtphlpsss....................pppphsllpAHP-Luu+hh.........stpLospSssEQusAGlsph................ssp-hpc.hppLNssYcp+F.GFPFllsV+.....G.ps+.pp....Ilsshp............................pRl.pNs...cpEhppAhpplp+IAthRLpc ............................sthst.pph.htthsslhEps....sWlschAhs...............t+.....Pasoh.ssL...hsshpp.h.h..p.shs.......................pspthsllpAHPcLus+h................ttph..o..s.....pS.s..pEQ..u..uuGl.sp.h................sspphpp..hppLNttYc.p+F.GasFl........l....slc......G....p.s+...pp.....ILsshc............................pRl..pNs......ppEhppAhpplt+IAthRLp........................................................... 0 124 273 377 +9182 PF09350 DUF1992 Domain of unknown function (DUF1992) Bateman A anon PSI2 target BIG_207 Domain This family of proteins are functionally uncharacterised. 20.60 20.60 20.70 20.70 19.20 20.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.13 0.72 -4.15 74 1227 2009-01-15 18:05:59 2006-11-08 15:29:28 5 11 1099 0 334 771 12 84.10 39 45.09 CHANGED lsEcpIpcAhpc..G-F-s...LsG.tGKPLs...hccs..shhs.phphth+llpssGhlP.tlpLp+El.............................tp....lpchlsph .................hAEc+It-Att+....G-FDN...LsG.pGcPL.....L-Ds...Sal..Ps-lRhuYRl....LKNAGh.lPPplE.p+Eh...........................hp.....Lh-hLp..thpp.......................................................................................................... 1 97 193 267 +9183 PF09351 DUF1993 Domain of unknown function (DUF1993) Bateman A anon PSI2 target BIG_238 Domain This family of proteins are functionally uncharacterised. 20.40 20.40 21.50 22.30 20.10 19.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.81 0.71 -4.31 60 472 2012-10-02 14:44:17 2006-11-08 15:31:17 5 8 351 6 234 454 160 160.20 38 87.75 CHANGED hYssoVPs.ahp.....hLssLsslLcKApsaApspslcsss.LlsuRLhPDMhPLstQVphAs-tA+tssARLu....Gh.....-sP...........shs.DsEso.Fs-LpARIucslsalpulssspl..-usps+slsh......hsspphshsG.p.sYlhsauLPNFYFHloTAYuILR+pGVtlGKtDYlG .......................hYpholPshhp.hLpsLsslLsKA.pu..a.Apspt.hc..sss.llsuRLhP..D..............Mh......PLstQVphA.sDtA+tss.A..R.Ls....Gh....-sP................phs..DsE..sT.as-LpuRIscTlsaLps..l..s...s.ppl.-us....ps..+..p.l.sl.................hsstshph.s.G.p.sYlhsauLPNFaFHlTTAYsILRHpGVtlGKhDYlG.......... 0 39 113 189 +9185 PF09353 DUF1995 Domain of unknown function (DUF1995) Bateman A anon PSI2 target BIG_247 Domain This family of proteins are functionally uncharacterised. 26.00 26.00 26.50 26.50 25.40 25.90 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.50 0.70 -4.71 39 273 2009-01-15 18:05:59 2006-11-08 16:33:42 5 9 102 0 174 269 140 221.80 21 67.59 CHANGED L.PssLppA..........hpputpAltsALt..sspsRhp....l-lpFs..sL................plhsluhphhpt...Ltp......ts.pshhllasDuGuuALApR-h.sshs......plhshsphh.st.ts.................t.........chhlhVsPpsh-....l-plEtlscthss.............psllhlNs+L-...Dsu.lG..lG.su.Rph...RcpFluoapssYaLcPL..ps....................GALh+s.aP......ssWplapps......s..ssYphlsphppRPss-plsth ..................................................................................Ppshpph..........htputpAhttAlp......tttt+h.....l-lph.P.....tL.....................................................p.hpluhthhpt...htp...............ps..pthtllaP-s....uts..shAppph.tshs..................hplsslssht..h.t..sts......................................................................s.....chhlhls...Psshp.......lsplcths.pthss...........................+Pll.lh.NscLc......shp.su....................hu...hss.+.ph............+pp..Fl..ssa..ps.....sYhl+sl..ss.....................GslhRs.YP........s.Wplhhcp.........s.spYt..hl.tp..tp.+.Pshpplp.h.................................... 0 67 131 161 +9186 PF09354 HNF_C HNF3 C-terminal domain Bateman A anon PSI2 target BIG_367 Domain This presumed domain is found in the C-terminal region of Hepatocyte Nuclear Factor 3 alpha and beta chains. Its specific function is uncertain. The N-terminal region of this presumed domain contains an EH1 (engrailed homology 1) motif, that is characterised by the FxIxxIL sequence [1]. 20.80 20.80 21.10 21.10 20.70 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.81 0.72 -2.83 26 211 2009-01-15 18:05:59 2006-11-09 09:21:50 5 5 86 0 97 189 0 62.20 43 14.86 CHANGED sHPFSIsNLM..Sspp..............pKh.DlK.sY-th..pY.uuY..............ssh.shshs........psshp.ssshsssss..........YYQ ..NHPFSINNLM..SsEpp.............pKh.DlK.sY-psh...pY..suY..............sus.sss.s.......ssctshc..sss....ss..............YYp............................................................................ 0 16 25 52 +9187 PF09355 Phage_Gp19 Phage protein Gp19/Gp15/Gp42 Bateman A anon PSI2 target BIG_98 Domain This family of proteins are functionally uncharacterised. They are found in a variety of bacteriophage. 21.10 21.10 21.30 23.00 20.20 20.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.49 0.71 -4.25 8 161 2009-01-15 18:05:59 2006-11-09 09:41:09 5 1 154 0 13 124 2 113.30 32 84.71 CHANGED LhRsLcsDEpcRApALLcsVscplRhEhs+sG+DL-shlstcPsYhtsVhpuVsl-lVARslhsusc.EPhuphSEoshsYShSuoYhls......uGGLhIcDSELcpLGL+K.............pRhGslshYGhs ........hhRsLss-.EpspspshLpsspchlRpchs....-LDt.h.....sstcs.hhtsV......lcl.Acslhp..hsc....sP...u...hoET..sGsYoaphoatls......sGsLhIpcpEhcpL.....Glp+.............p.Rhu.h......h.................. 1 1 5 9 +9188 PF09356 Phage_BR0599 Phage conserved hypothetical protein BR0599 Bateman A anon PSI2 target BIG_187 Family This entry describes a family of proteins found almost exclusively in phage or in prophage regions of bacterial genomes, including the phage-like Rhodobacter capsulatus gene transfer agent, which packages DNA. An apparent exception is Wolbachia pipientis wMel, a bacterial endosymbiont of the fruit fly, which has several candidate phage-related genes physically separate from obvious prophage regions. 20.40 20.40 23.90 22.50 19.70 16.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.87 0.72 -3.84 73 343 2009-01-15 18:05:59 2006-11-09 09:42:31 5 5 305 0 99 303 38 76.40 37 28.73 CHANGED suaFstGplshhsGsssGhsttl+pcps.......stlpLhpshstslssGDtlplhAGCDK...phsTC+sKFsN..hlNF+GFPalPGpD ............................saFstGhltahsGs.stuhthtlttcts..................stlpLhtsh..slss....G-thplh...sGCDK...ph.sTCcsKFsN..hlNFRGFPalPupD..... 0 20 64 76 +9189 PF09357 RteC RteC protein Bateman A anon PSI2 target BIG_10 Family Human colonic Bacteroides species harbor a family of large conjugative transposons, called tetracycline resistance (Tcr) elements. Activities of these elements are enhanced by pregrowth of bacteria in medium containing tetracycline, indicating that at least some Tcr element genes are regulated by tetracycline. An insertional disruption in the rteC gene abolished self-transfer of the Tcr element to Bacteroides recipients, indicating that the gene was essential for self-transfer [1]. 20.60 20.60 20.70 20.90 18.50 20.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.38 0.70 -4.49 17 391 2009-01-15 18:05:59 2006-11-09 09:46:05 5 2 123 0 52 328 12 175.30 27 80.01 CHANGED pFFK.hKP.hhu+LlaasclaphEhppPpGshcstppahppclpcLpphhpp...shsFhpYh+sttshhDcpYFsRsphclt..ssphhhppD.pFoTu......aDhhsApllAs-hl.hahscclcthtpt........tt...tpslpWTusKhsLlELIYALpuptslNsGphsIKclushhpplFsl-L.tchY+oYh-lKpRKp.sRTtFLccLp-sL.p+Mpc-D ..............................................................................................................................hhh..phhth..ths...t..t.p...thh.tph.tlp....t......hhtYhc.t.p.hs...h.p......tp.ph......h.hp.s..Fsp.......hp.hhut...s..hh.....tph.............................lpWTust..hsLlELlYuL......sts..s..l...s...s..G....p...h.slpcluthhpplFslcl..sc..hY+hahc.l+pRKt.sRThFLDpLp-pL.c+M.c............... 0 19 42 52 +9190 PF09358 UBA_e1_C Ubiquitin-activating enzyme e1 C-terminal domain Bateman A anon PSI2 target BIG_346 Domain This presumed domain found at the C-terminus of Ubiquitin-activating enzyme e1 proteins is functionally uncharacterised. 29.20 29.20 29.40 30.30 29.00 29.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.46 0.71 -3.96 57 525 2009-01-15 18:05:59 2006-11-09 09:54:28 5 25 308 2 345 522 14 123.90 36 12.42 CHANGED a+NuFlNLALPhhshoEPlsssctph.....pchca...............TlWDRaclpt......s........hTLpchlcahcpch.sLclshlotGs.shLY..ssa.......tcpp-RlshplscLlcplscp......l.stpchlslplssp.D.psspDl.-lPh..l ......YKNuFlNLALPaFuF....oEPlss.s+tca.......pshpa..........................TlWDRaclpu.........p.......hTLp-hlcahccc......s.L-loMlSpGs..uhLY...usa.......s+hp-RL................shpho-lVcplsKp.................ls.sph+tlll-lssp.....D..pss-..DV..-lPhl..................... 0 128 190 271 +9191 PF09359 VTC VTC domain Bateman A anon PSI2 target BIG_223 Domain This presumed domain is found in the yeast vacuolar transport chaperone proteins VTC2, VTC3 and VTC4. This domain is also found in a variety of bacterial proteins. 20.80 20.80 22.40 20.90 20.40 20.60 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.83 0.70 -5.39 44 868 2009-01-15 18:05:59 2006-11-09 09:58:08 5 23 587 8 395 831 403 232.00 26 51.08 CHANGED sRpshKahlc.cslhpl+ttlh++Lslhsasssp.......................................................s.sIsSlYaDsss.hchYpp+lp+hpsspplRlRWY.up..t.........splFlEpKh+....ppshsst......KpRhplcp+.lpsal..ssp........t.................................................................................................................phpp.pphssElp....hlhpp.pLpPhlpspYpRsAFphss.-splRlolDoslph..........................................................................................................W++h-ls...hp.s...hlttsth..hs..aullElKhps.....................chspWlp-Ll..uchspplspFSKYhpGhAsLa ..................................................R.phKahlp.pphhtl......pthlh..phhsh..assp..................................................................................................t.sIsSlYFD..s..p.hphapp..clp...ph....p..........tpcplRlRhY..st..t.......................splalEhKp+.................................tpshss............KtRh.slp.t.ppspthl.......pGph.................................................................................................................................................htp..pplh.pElth...hh.hpt...pLpPhhhstYpR......pua......t......h..t......-......s...........p....l.R...lolDpslp..................................................................................................................a+phcls.....t.s.....l.tt............hslhElKhps..........................thPtWlpcll......schshtsspFSKaspuht........................................................................ 0 147 259 354 +9192 PF09360 zf-CDGSH Iron-binding zinc finger CDGSH type Bateman A anon PSI2 target BIG_227 Domain The CDGSH-type zinc finger domain binds iron rather than zinc as a redox-active pH-labile 2Fe-2S cluster. The conserved sequence C-X-C-X2-(S/T)-X3-P-X-C-D-G-(S/A/T)-H is a defining feature of this family [1]. The domain is oriented towards the cytoplasm and is tethered to the mitochondrial membrane by a more N-terminal domain found in higher vertebrates, MitoNEET_N, Pfam:PF10660 [2]. The domain forms a uniquely folded homo-dimer and spans the outer mitochondrial membrane, orienting the iron-binding residues towards the cytoplasm [3]. 21.20 21.20 21.20 22.20 20.50 20.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.37 0.72 -4.12 158 1359 2009-01-15 18:05:59 2006-11-09 09:59:42 5 27 737 21 688 1317 995 42.40 34 41.39 CHANGED sppsPhtl............................phpp...pphhhCpCGpSp.spPaCDG.oHp .............................................................t...sPhhl............................p.pp...pphhhCpCtpSp.sh.....PaCDG.SHp... 2 241 412 580 +9193 PF09361 Phasin_2 Phasin protein Bateman A anon PSI2 target BIG_183 Domain This entry describes a group of small proteins found associated with inclusions in bacterial cells. Most associate with polyhydroxyalkanoate (PHA) inclusions, the most common of which consist of polyhydroxybutyrate (PHB). These are designated granule-associate proteins or phasins. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.01 0.72 -3.99 132 960 2009-01-15 18:05:59 2006-11-09 11:03:55 5 1 504 0 408 873 126 99.50 18 65.25 CHANGED hcphtstp.+ssh-s....hhshsstshcGhpclsphshphs+sshppuhstscplhusKssp-hhp.l...Qsphspst..hEphlutu+clt-lsspstp-hhcshcsphs ................................................pph.stt.cpsh-s....hhthsphshcu....h....ppl........sp...........hplphs+sshppshsp.h.cph....hs.sK.s...s...pch...hp.h....Qsph.hppt..h-phhs.pu+cltplsppstp-htc.hptph.................. 0 92 225 298 +9194 PF09362 DUF1996 Domain of unknown function (DUF1996) Bateman A anon PSI2 target BIG_243 Domain This family of proteins are functionally uncharacterised. 19.90 19.90 20.00 20.30 19.30 19.80 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.90 0.70 -4.67 63 716 2009-01-15 18:05:59 2006-11-09 16:06:26 5 20 264 0 462 652 24 214.10 31 52.22 CHANGED DPlVsPGth.ou.HlHplhGusuFshsh.s...h-.hppusCToCshsp.DhSsYWs.....ssLYa.ct.pN.........Goh..chVP.................GuhslYYhtt..............ttslpAFP...Ga.....RMlsGssttp....................t.tts...tptslsapChsstttt.........................spsaPspsCs......sG....lptslhFPoCWD.GpsLD.o.sc..................HpoHhAYPs....th.s.....sGs.CPsuaPl+lPplhaEshWDTstassts...t...ssFshSs..............GDs..oGYuhHuDFl.GW ...............................Dslh.PG...ss.HhHphhG..s................ss........hshs..s......hp.ht.tu..s..sToC......p.pt.DhSsYWs.........Ps.lhh........s............G.p.h..hss..................uthp.sYYhtt.............................ttlpsFPt.....G..h+h.lsGcsptp.................................hsppslsatChssps.t............................t.ths.s..p...Cs......sG....lphp...lhFP......s..CWD.GhsL-.o..sc.............................a+sHhuYss.............t........tGt.CP.s.s.aP.l+lPplhhclhWsss...t.hs...s.ht.............p.h.hhS...................G.......ouashHuDF..htGW............................................................. 1 164 312 412 +9195 PF09363 XFP_C XFP C-terminal domain Wood V, Bateman A anon Wood V Family Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 [1]. 21.20 21.20 21.60 21.80 20.70 21.00 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.18 0.71 -4.89 93 1022 2009-01-15 18:05:59 2006-11-10 17:50:17 5 13 862 16 389 968 40 198.30 45 25.77 CHANGED KQPp.QaLoh-pAhpHCspGlGIW-WAS.sD....putE...PDVVhAsAGDlPThEsLAAlslLRcphP-LKlRhVNVVDLh+Lpsp..s..-HPHGLSDc-F.......DslF.TsDKPVIFAFHGYPhLIH+LsY+RsNppNlHVRGY+EcGohTTPFDMsVhNclDRF+LshDslcRlPp....ltspuutlppphccplhcH+pYlpcaGpDhPElpsWpWs .........KQPp.QaLoh-EAtpcsspGlulW-.WAS..s-...........ps.tE..........PDVVhAsuGDhPThEsLAAlslL+..............cphP-..LKlRhVNVVDLh+Lpss.........p....p+P+GLSDc-F.......DslF..Tp.DKPVlFAaHGYshLI+cLhacRs...N....t...c..NlHV+GYcEc.................GshTTPFDM.tVlNclDRF+Lsh-slc....clst....................htspuuth....hpph....pstlhcH+pYl+cpGp.DhPElpsWpW.............................................. 0 98 230 324 +9196 PF09364 XFP_N XFP N-terminal domain Wood V, Bateman A anon Wood V Family Bacterial enzyme splits fructose-6-P and/or xylulose-5-P with the aid of inorganic phosphate into either acetyl-P and erythrose-4-P and/or acetyl-P and glyeraldehyde-3-P EC:4.1.2.9, EC:4.1.2.22 [1]. This family is distantly related to transketolases e.g. Pfam:PF02779. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.05 0.70 -5.91 6 1135 2012-10-02 16:07:47 2006-11-10 17:51:28 5 14 914 16 430 1562 249 358.00 48 46.75 CHANGED hlo-ctLcplDtaWRAANYLulG.IYLp-NPLh...+EPLc.EclKpRLlGHWGTsPGloFlYAHlNRlIpKaDtsMlYlsGPGHGGPAhlusoYL-GoYoEhYPclopDcpGhp+LF+QFSFPGGIsSHhsPETPGSIHEGGELGYuLSHAYGAlhDNP-LIVsCVVGDGEAETGPLATSWHSNKFlNPtpDGAVLPILHLNGYKIuNPTlLuRIs--EL+shFcGhGYcPhaVpu....cDs.shH+hMApshDpsh--IpsIQ+sAR...sssps.RPcWPMllhRTPKGWTGPKa.......lDG..hhsEGoaRAHQVPLuss+cssupLp.LccWhcSY+PEELFDtsGslptslcshsPcG-KRMuuNPpANGGlLpcsL+lPDa+cYuls ...................................................................................................s..s.p.Lptlc...taWRAANYLol..............GQlYLh...c.NPLL...........+cPLp..-clK...s+l...l....GHWGT....s....PG.....NF...lYuHLN....Rl....I...p...c...h..s.......ls.....h..........ha.ltGPGHGGPuh.lu.ss.YL-Go.Yo.E.h.Y.P.c.lo.pDppGhp+LF+pF.....S.F.......P....G.G........l.s.....SHs.u.P.E..TPGSI..H..E..G..GEL....GYuLu....H....A.....a.....G....A.....s.....h.....D.....N.....P....D....L....l...sssllGDGE.AE.T.G.PL..A.sSWa.SNKF.lNP.h....pDG.AVLPILHLNGaKI..u.N..P.T..lL.u.Rho.c.-E.L.pphFcGhGacPh.a.V-G.............cD....tsh.Hpt...h.AtshDpslpcIptIQppAR.................p.s..s...s.s..+P..pWPMIlhRoPK...................GWTGP.+p.......lD.G..p.lEGsaRAHQVPlsss..p...p.s..pHh.chLpp.Wh+SY+P-.ELF....D.......p.......s.G...p....l..h..s-lt.t.....l.sP.p.G.p.p.R.MusNPhsNGG.h.lh....+sLphP.Da+caAl............................................................................ 0 111 259 365 +9197 PF09365 DUF2461 CHP02453; Conserved hypothetical protein (DUF2461) Bateman A anon PSI2 target BIG_248 Domain Members of this family are widely (though sparsely) distributed bacterial proteins, about 230 residues in length. All members have a motif RxxRDxRFxxx[DN]KxxY. The function of this protein family is unknown. 20.80 20.80 21.40 21.20 20.20 18.40 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.21 0.70 -11.38 0.70 -4.94 99 698 2009-09-11 12:04:33 2006-11-14 14:11:37 5 3 644 0 255 606 173 203.20 28 84.13 CHANGED sssshpFLpcLp..tNNsR-WFpspKspYcp.l+pshhshlspltspls.phcsph.s..h..psslaRIaRDl.RFS+DKoPYKsphusth.ptt.........pp....tsu...aYlclps.st..shluuGhap.....PpsptLpplRppIppsssp...hcpllpphphppha..............................t..s-pLKpsP..................+..Gasp..ccPhl-hL+pKsahshpphss.ptlhsschhcplsptapthtPhhca ................................................................t..pshpFLppLp..tsNs+pW...........Fpp............H+s................pY-p.lcpshpshlppltspht.phc.th.t.....tcs.laRIaRDs.RFS+D.KoPYKsphuuhh.pst...............tchs....tsuaYl...c..lpP..st......shlu.....sGhap.....sp.pstLtthRptItcpspt.......appll.pshphptha...................................................................hst-p..LK..psP.....................................................+..Gast..-hshl-hL+pKsassht.phss...chl.h.s.s.c.hhppltchapthpPhhca....................................... 0 79 170 223 +9198 PF09366 DUF1997 Protein of unknown function (DUF1997) Bateman A anon PSI2 target BIG_266 Family This family of proteins are functionally uncharacterised. 20.80 20.80 21.60 21.10 20.40 20.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.79 0.71 -4.48 34 280 2009-01-15 18:05:59 2006-11-14 15:55:48 5 4 121 0 159 288 167 151.10 21 67.50 CHANGED sp.........tls.cYLpp.pRhhpshh.cshclptLsc.....spa+h..........pltshphhth.plpPslslclhspsss..............h.hp..shclcGlshl.......scaslshcupl..........................tsppstLpucscLsVslph.PthlphlPcsllcuoGcp...............................................lLptlltplppRlsppLhpD....appahtt .................................................................................................t...lt.cYLpp.tphhpthh..cs..p....hpt...ls.c...........ppach.................................phh.hth.hth.plpPhlslplhspsps.......................h.hp..ph..cl.cG..hsh.......sppas.l..s..hpuplh..p........................................tsstsplpsch.clsV.slpl.P.slphlPp..sllcssGst...............................................lLppllpphpt+hhppl.pDappah..t....................................................................................... 0 37 94 136 +9199 PF09367 CpeS CpeS-like protein Bateman A anon PSI2 target BIG_280 Domain This family, that includes CpeS proteins, is functionally uncharacterised. 20.60 20.60 20.80 21.60 20.20 19.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.92 0.71 -4.36 66 209 2009-01-15 18:05:59 2006-11-14 16:11:35 5 3 80 1 90 233 201 158.60 29 89.90 CHANGED shtpFhppStGcWhSpRosHpL...sh..pcs..EsspSplslp..lssscsplhpls...p..tlsss.thhsuhthpWpup.phpp...pspsssslhhhlP..ts..ppGtLLRstGYsEphss...supaphssDs.sLsLpTcYsss.hspERlWFsssslR.hRsSslpphsGhs.......pso.FsoElRp ....................hhpFhptStGpWhSpRosHpLsh....pcs....EpspSplhlc..Lsssssplhpls...p.hplsss...thhsGhplpWpup.thsp...psppusslhshlP..ps....ppGhLLRspGYs.Ephss.......supYphsp-s.sLsLpTcYpph.hstERhWFsssslR.hRsShlpphsG.s........hsoFsoEhRh...................................... 1 11 50 81 +9200 PF09368 Sas10 Sas10_Utp3_C; Sas10 C-terminal domain Bateman A anon Bateman A Domain Sas10 is an Essential subunit of U3-containing Small Subunit (SSU) processome complex involved in the production of the 18S rRNA and assembly of the small ribosomal subunit. 25.00 25.00 26.60 26.60 23.70 22.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.58 0.72 -3.81 35 308 2009-01-15 18:05:59 2006-11-15 10:51:18 5 4 279 0 228 307 3 76.10 46 13.75 CHANGED hssDuKRtIohpItKN+GLT.+R.pKcp+NPRVKpRtKYcKuhp+h+ophpsh+....pppus...YuGEhoGI+ssls+SlKl ....h.ttsuKRuIoYpItKNKGLTP+R.pKcsRNPRVK+RcKac+App+h+ut.hpsh+.......cppss...YuGEhoGI+sslsKSlKL...... 0 82 128 188 +9201 PF09369 DUF1998 Domain of unknown function (DUF1998) Bateman A anon PSI2 target BIG_296 Family This family of proteins are functionally uncharacterised. They are mainly found in helicase proteins so could be RNA binding. This family includes a probable zinc binding motif at its C-terminus. 24.20 24.20 24.20 24.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.22 0.72 -3.41 165 1448 2009-01-15 18:05:59 2006-11-15 11:31:00 5 16 1110 0 569 1299 129 85.60 28 7.75 CHANGED uhpHALhpthsh..hhthsps-l.shshh.t.........t.ttsslhlYDuhsG..GsGhsppl.....hct..htcllppAhchlpt..........................C..s..C...............p..sGC.sCl ..............................................................shpaAlhthlsh....hh.tss.ps.......-l..shsshhps.........................sthsslhlYDuhsG...GsGhsp.ph........hct.....htcllptuhchlpp...........................C....s...........C..............................................p..sGCssCl................ 0 196 395 515 +9202 PF09370 TIM-br_sig_trns TIM-barrel signal transduction protein Bateman A anon PSI2 target BIG_293 Domain This domain is likely to have a TIM barrel fold related to IGPS. Although this family of proteins are functionally uncharacterised this domain is found as an N-terminal domain of sigma 54 -dependent transcriptional activators (enhancer-binding proteins) suggesting a potential role in signal recognition/receiving and signal transduction. 25.10 25.10 25.20 26.00 24.70 25.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.60 0.70 -5.33 20 371 2012-10-03 05:58:16 2006-11-15 17:30:52 5 9 289 6 140 329 41 255.50 56 79.87 CHANGED pplL.cphRpplppG.pPIlG............uGAGoGLSAKstEtGGlDLIlIYNSGRaRMAGRGSLAGLLPYGNAN-IVl-MA.pEVLPVV+c.TPVLAGVsGTDPFpsh-pFLscLKshGFuGVQNF.PTVGLI..DGpFRtNLEETGMGYshEVEMI+tA+phsLLTTPYVFssc-ActMs.cAGADIIVsHMGLTTGGsIG....AcTuhoL--sVphlsslscAA+slssDlIVLCHGGPIApP-DApalLc+sss.scGFYGASSMERLPsEtAIpppsppFKsl ...................t.plLp+h+ppItcG.cPIlG..................uGAGTGlSAKstEAGGhDLIlIYNSGRaRMAGRGSLAGLLsY.GsANpIVl.-MA..pEVLPVVKp.TPVLAGVsGTDPF.p..ph-tFLcpL+shGFuGVQNF.PTVG.....LI..D..GsFRtNLEETGMGYsLEV-MI+hAHchsLLTTPYVF.ss--AhsMs.cAGADIlVsHhGLTTuGsIG....A.c...T..A.....h.oL--sVthlpphtcAA.cpV.psDl..IVLCH.GGPIupP-DApYllcpstt.scGFaGASSMERLPsEtAlpppscpFKsl......................................... 0 40 93 117 +9203 PF09371 Tex_N Tex-like protein N-terminal domain Bateman A anon PSI2 target BIG_312 Domain This presumed domain is found at the N-terminus of Swiss:Q45388. This protein defines a novel family of prokaryotic transcriptional accessory factors [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.09 0.71 -4.98 145 3396 2009-01-15 18:05:59 2006-11-15 18:19:14 5 18 3331 3 724 2645 145 190.50 46 25.65 CHANGED lApELs.lpspQVpuslpLLDEGsTVPFIARYRKEhTGuLD-sQlRplp-RhpYL+cL-cR+pslLcuIcEQGKLTsE...LcppIpsApshscLEDLYLPYK..KRRT+ApIA+EtGLEPLAchlhspspt.....ssp.ptAtpal.....st-.......tt....VssscpALpGA+...cIlAEphuEcAclRpplR..phhhcpGh.lpo.pl..hpsccc-s ........................................................................................................................IupELs.hp.pQlpuslp.L.L.-.E.GsTVPFIARYRKEhTGu....LD-sQlRplc-R................hpYL.+pL--R+pslLc..sIp...EQ......GK......L...Tc.E...Lcpt.ItsupshscLEDLYLPYK.....K.RRT+ApIA+.........E..tG....LEPLA-h.lhspspp.................ss-..ppAtpal.................st-................t...........Vs..sscsALcGA+......pILsEphuE-AsLhsplR..phhh.c.p.u..h.lsSpl..hcscccE.s.................................... 0 230 452 608 +9204 PF09372 PRANC PRANC domain Bateman A anon PSI2 target BIG_313 Domain This presumed domain is found at the C-terminus of a variety of Pox virus proteins. The PRANC (Pox proteins Repeats of ANkyrin - C terminal) domain is also found on its own in some proteins. The function of this domain is unknown, but it appears to be related to the F-box domain and may play a similar role. 23.40 23.40 23.50 23.50 23.30 23.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.37 0.72 -4.02 77 463 2012-10-02 00:56:31 2006-11-16 17:40:10 5 59 47 0 6 429 0 93.10 24 17.31 CHANGED hssho..laDllhspsp...phhh+hlps.phhph........ph..lYtphlcchIppuhpRpphlppslphlsshhpp......shWshLPhEI+hpIlphLssp-Lphlhp ...................tsholasllhspsp.....phhhRY.hps..p.hhph........h....sh.plYsshlcphltphhp+pphlsp.llcplpss.............shhopLP.EIphpIlphls..-Ltph..h................................... 1 0 6 6 +9205 PF09373 PMBR Pseudomurein-binding repeat Steenbakkers P, Bateman A anon Pfam-B_12784 (Release 21.0) Repeat Methanothermobacter thermautotrophicus is a methanogenic Gram-positive microorganism with a cell wall consisting of pseudomurein. This repeat specifically binds to pseudomurein. This repeat is found at the N terminus of PeiW and PeiP which are pseudomurein binding phage proteins. 24.90 24.90 25.10 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.08 0.73 -7.34 0.73 -3.77 49 106 2009-01-15 18:05:59 2006-11-27 14:56:45 5 26 19 0 90 104 9 32.50 28 8.87 CHANGED sssplohpphhchss+lhsa.hpppsRh..Pshlsl ..t..splshsphlchssRlhsF.hppssRh..PsYls..... 0 37 67 76 +9206 PF09374 PG_binding_3 Predicted Peptidoglycan domain Moxon SJ, Bateman A anon Pfam-B_8737 (release 8.0) Domain This family contains a potential peptidoglycan binding domain. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.46 0.72 -3.99 20 823 2012-10-01 23:43:47 2006-11-30 10:00:54 5 12 535 9 152 611 209 75.60 34 37.79 CHANGED pAs+hLQ+hL......u.....ltsDGhIGspTlsAl........ssts.ssLhpthssuRhpaYhcLs.......................stspFscGWhsRls ...........................ss+hLQRhL..............u........l.tsDGhIGspTL...uAl..................sppu.tt.sL.lp.sLs...stptpa.Yhpluttp....................spppF.hhGWlpch.................... 0 30 90 125 +9207 PF09375 Peptidase_M75 Imelysin Bateman A anon MEROPS Family The imelysin peptidase was first identified in Pseudomonas aeruginosa. The active site residues have not been identified. However, His201 and Glu204 are completely conserved in the family and occur in an HXXE motif that is also found in family M14. 21.10 21.10 21.10 21.10 21.00 20.80 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -12.04 0.70 -5.10 169 1931 2009-01-15 18:05:59 2006-11-30 11:01:45 5 5 1429 10 414 1465 252 267.80 27 74.43 CHANGED shssY.....pshhps.spsLtsu...spshs..sssst...............ssLptA..+sAahsARhsappsEshph.................s.h...........................t.tlthWP..sc...........uhl-hshssh...............................t..hssp.ht................sssspGaHAlEalLasps.......................s.t....t..sth.................................sphlp..uhustLhscsppltspWpt.........tshttth.....................t.tt.......ulp..pll...sGhssh.ls-lusp+.ls...sh..............ps.ptcspaScsohtshhs.slpGhpsla.s.............sulps.ll....tpp.ss..s....Lssplcsphss......shstlstl.ps.........shs..................ptl...ssspspth...........lps.h....suLsp.t.spl ......................................................hhssY.csalps.sppLhps.....scsFs......puh...ps.................sclppAKsha.hssRhhYpchEsltt..........................................................th.......uplDhplcup....................................tssh.pt.c.h...........................ssshoGaHtlEhhLasps.......................s................................................................sphhcthAspLhsDspcLppphssht..................h................................................................................ssp.phl....sGtssL..lpElAssK.ls...............................sEE-h...a.S..c.osLhD.apA.NlcGspp..lh.................................shlcs...h...l.......ppp..s.......p....LssclpspFppspshLsph..ps...............p.t......................ppl.......sssstpth...........hss..l...suLsc.hut.............................................................................................................. 1 114 235 339 +9208 PF09376 NurA NurA domain Iyer LM, Bateman A anon Iyer LM Domain This family includes NurA a nuclease exhibiting both single-stranded endonuclease activity and 5'-3' exonuclease activity on single-stranded and double-stranded DNA from the hyperthermophilic archaeon Sulfolobus acidocaldarius [2]. 20.60 20.60 20.70 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.39 0.70 -4.89 65 438 2012-10-03 01:22:09 2006-12-01 16:54:16 5 2 287 10 251 429 41 278.20 14 74.78 CHANGED tplhAlDGS............phth.phpsshhhhlhuhuhsstthtphthhthh.th.hh...............................................................cpthphhhthhEht...............................hhtttppsclllhDGslhtphh........................................................................................................................................h...thtchltth.phlct..........hsllulsKshpspplhsth..........................................................h.Dttllpthh.p.................s.pt.h.............................................hhhtthhtshp.hhhhYlph........ttss.lh+lE........h...................spstcllstlt...............................................hsspGYPhs...LthAcchu+l ...................................................................................................................................................................................h.phhulDGS............pht...h..phtths..hhhhhs....hu.h..h.s.ht.ht..p.h.thh..h...h.shh....h..h...t............................................................................................................pphhp.h.hthhEht......................................................hh..t.t.t.pp.ss.lllhDGsL.hhhhh........................................................................................................................................h...tht.ph.lt....h..phlcp.............th.llu.llKpsps..p...p.hhphh................................................................................h..tDh.t.lht....thh.p..............................................ss.pt.....h.h...................................................................................................hhthhtphp...lhhhYlph.......................sstlh+lE........h....h............................................tp.pchlshlh..............................................hhp.st.G....aPhs...LthAcp.sph.......................................................................................................................... 1 91 160 213 +9209 PF09377 SBDS_C SBDS protein C-terminal domain Bateman A anon Bateman A Domain This family is highly conserved in species ranging from archaea to vertebrates and plants. The family contains several Shwachman-Bodian-Diamond syndrome (SBDS) proteins from both mouse and humans. Shwachman-Diamond syndrome is an autosomal recessive disorder with clinical features that include pancreatic exocrine insufficiency, haematological dysfunction and skeletal abnormalities. Members of this family play a role in RNA metabolism [2] [3]. 21.20 21.20 22.80 21.20 20.90 20.00 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.38 0.71 -4.43 27 515 2012-10-02 20:07:24 2006-12-06 09:57:19 5 6 468 6 343 502 98 134.40 28 49.03 CHANGED phhp-IssIlup+slsPpTp+sassshIE+Ah.c-lphu..............................................lsss+oAKpQAL-lIKtLpc..hlPItRAcM+l+lshssctspthh...........................................................ctltshhp.ph..tpp-ptsstaphlshlpPsta+tlpphlppp ..............hh+-llslluppslNPp.T.c+Passsh..IE+Ah.c-l.+hs..................................................................................lcssKssK.pQAL-slKtLpp....h.lPIcRAcM+lclplPs.ph..s.p.p..hh...............................................................................................................................................................................................ppltphhp...hh........cppt....t.s.s..phph.......l.shlsPGhhcpl.-hlp................................................................................... 1 114 198 282 +9210 PF09378 HAS-barrel HAS barrel domain Iyer LM, Bateman A anon Iyer LM Domain The HAS barrel is named after HerA-ATP Synthase. In ATP synthases, this domain is implicated in the assembly of the catalytic toroid and docking of accessory subunits, such as the subunit of the ATP synthase complex. Similar roles in docking of the functional partner, the NurA nuclease, and assembly of the HerA toroid complex appear likely for the HAS-barrel of the HerA family [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.93 0.72 -4.08 60 266 2012-10-02 13:55:04 2006-12-06 11:10:15 5 8 209 12 130 3149 1869 90.80 17 19.30 CHANGED lGhllGssst.pshhhhlps.t...............plphG-hVphp.t................tppllGhlhslpp.................................s.hshlshs..........................clluplcp ..................lG.Vlussss.pththhhcs.................tsphGcaVhlptp................sspllGhlpslpp...............h.p..t...t.......t..sp.hthltht............................h.....h......................................................................... 0 46 82 110 +9211 PF09379 FERM_N FERM N-terminal domain Bateman A anon Bateman A Domain This domain is the N-terminal ubiquitin-like structural domain of the FERM domain. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.55 0.72 -3.93 30 3027 2012-10-03 10:59:06 2006-12-08 09:24:46 5 150 200 46 1524 2604 2 84.60 27 9.05 CHANGED Vp.hLDss.hhphp.....lsspsp.GpplL-pVsp+l.slpEp-aFGLpa.....sps......sph...................................pWL-hs+plp+phsc...............ssshhlhFRlKFas .......................................lhhh.Dsp......h..pht..............lp..tpsp.GppLh-..t....Vsp..+l......sL..h..E..p-.............YFGLta............cpp...................spt....................................................................................................................................pWL.-.sK.pl.p.cQ.hhp........................................................ssh.p.h..hF..plKFas................................................................................................ 0 372 505 936 +9212 PF09380 FERM_C FERM C-terminal PH-like domain Bateman A anon Pfam-B_851 (release 2.1) Domain \N 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.03 0.72 -3.77 62 2452 2012-10-04 00:02:25 2006-12-08 13:07:32 5 88 124 42 1204 2086 0 90.40 28 10.92 CHANGED cpcssc..lh.LGlsstGlhla...csss+l...ptFsWscIp+lSFccK.................................................+Fhlcht............................................tppppphsFhhss..hcssK..tlW+hClppHp.......F..a...phpcpsps .........................................................................................................................pcGsc..lhLGlsstGlhla................csp.p...+l.....tt..FsW.scIt..+l.Sa.cc.+.................................................pFhIc.lhs..................................................................pppp.ss.hsFhhss......hcssK..plW+hClppHs.......F..aphtp........................................................... 0 270 364 720 +9213 PF09381 Porin_OmpG Outer membrane protein G (OmpG) Mistry J anon pdb_2f1c Family Porins are channel proteins in the outer membrane of gram negative bacteria which mediate the uptake of molecules required for growth and survival. Escherichia coli OmpG forms a 14 stranded beta-barrel and in contrast to most porins, appears to function as a monomer [1]. The central pore of OmpG is wider than other E. coli porins and it is speculated that it may form a non-specific channel for the transport of larger oligosaccharides [1]. 25.00 25.00 46.20 45.70 21.90 19.60 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.21 0.70 -5.33 4 299 2012-10-03 17:14:37 2006-12-08 14:43:39 5 1 282 10 9 87 0 288.50 92 99.37 CHANGED MKKLL.CTALVMCAGMACAQAEEKNDWHFNIGAMYEIENVEGYGEDMDGLAEPSVYFNAANGPWRISLAYYQEGPVDYSAGKRGTWFDRPELEVHYQFLESDDFSFGLTGGFRNYGYHYVDEPGKDTANMQRWKIAPDWDVKLTDDLRFNGWLSMYKFANDLNTTGYADTRVETETGLQYTFNETVALRVNYYLERGFNMDDSRNNGEFSTQEIRAYLPLTLGNHSVTPYTRIGLDRWSNWDWQDDIEREGHDFNRVGLFYGYDFQNGLSVSLEYAFEWQDHDEGDSDKFHYAGVGVNYSF .................................MKKLLPCTALVMCAGMACAQAEE+.NDWHFNIGAMYEIENVEGYGEDMDGLAEPSV......YFNAANGPWRIuLAYYQ...E...G.P.VDYSAG.KRGTWFDRPELEVHYQFLEsDDFSFGLTGGFRNYGYH.YVDEPG..KD..TA...NMQRWKIAPDWDVKLTDDLRFNGWLSMYKFANDLNTTGYADTRVETETGLQYTFNETVALRVNYYLERGFNMDDSRNNGEFSTQEIRAYLPL.....TL.....G.N.H...SVTPYTRIGLDR.WSNWDWQDD.IEREGHDF.NRVGLFYGYDFQNGLSVSLEYAFEWQD.H.....DE...GDS..D.....KFHYAGVGVNYSF.............................. 0 1 3 5 +9214 PF09382 RQC RQC domain Bateman A anon Pfam-B_571 (release 21.0) Domain This DNA-binding domain is found in the RecQ helicase among others and has a helix-turn-helix structure. The RQC domain, found only in RecQ family enzymes, is a high affinity G4 DNA binding domain [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.18 0.72 -4.32 174 3296 2012-10-04 14:01:12 2006-12-11 14:34:43 5 41 2926 9 924 2638 355 104.30 30 14.90 CHANGED DsTptApclLSslh+h..........t..p....p..........a..Ghshlh-lLp.........................Gspsp+ltph....sa.cplssa..GlGc.chsppphpsllcpLlspGhL.....phshp......pas.sLplspp.u..cslLc..Gc....p..plhlphst ................................................DsTpcAppsLSslhRs.................sp.........+.........FGhshll-V.L+.........................G..s..p.s.p..+lpph.....sa..-p..L.ssaGlG.......+..-.....h...op..cc........hpsll+pLltt.Ghl.........p.shs.......pas..sLpL..Tct..u..+slL+.Gct..pl.ht...s..................................... 0 278 557 764 +9215 PF09383 NIL NIL domain Bateman A anon Pfam-B_524 (release 21.0) Domain This domain is found at the C-terminus of ABC transporter proteins involved in D-methionine transport as well as a number of ferredoxin-like proteins. This domain is likely to act as a substrate binding domain. The domain has been named after a conserved sequence in some members of the family. 20.60 20.60 20.60 20.70 20.50 20.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.05 0.72 -4.41 184 4317 2009-01-15 18:05:59 2006-12-11 17:17:47 5 25 3162 22 702 2638 28 75.40 25 22.67 CHANGED sstll+LpFsGpssppPllupls+ca....s.lssNILtGsl-plpstshGpLllpl......t..Gsppp..hppulpaLp.pps.lpl.Ell ...............................s...ll+LpFsG.p.s.ssp.Pllupls+ca....s.lssNILtusl-hlp.s..sshGtLllpl................................p..Gsppp..hptAlpaLp.pp.t.Vpl.Ell...................... 0 181 399 562 +9216 PF09384 UTP15_C U3_snoRNA_C; UTP15 C terminal Mistry J, Wood V anon Pfam-B_7112 (release 21.0) Family U3 snoRNA is ubiquitous in eukaryotes and is required for nucleolar processing of pre-18S ribosomal RNA [1]. It is a component of the ribosomal small subunit (SSU) processome. UTP15 is needed for optimal pre-ribosomal RNA transcription by RNA polymerase I, together with a subset of U3 proteins required for transcription (t-UTPs) [2]. This entry represents the C terminal of UTP15, and is found adjacent to WD40 repeats (Pfam:PF00400). 26.90 26.90 27.40 27.60 26.00 26.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.45 0.71 -4.51 40 321 2009-09-21 22:48:38 2006-12-12 09:50:17 5 11 278 0 219 324 1 147.80 28 28.41 CHANGED +Gps...hhscp--hllppt..........ppp+lppa-+hLppF+aucALDssL.........tstpscsslollpELp+RG......sL+tALtsRDEtoLpslLpalh+pls-sRassllhcssssllDlYusslspSs..hlcchltpLpp+lpcElchppchhplpGMl-...hL ..................................t.phhspts-hl.lspt..........pcp+lp...ta-+tL+pF+........aucALDpsL................tspssphslollpELt+Ru......sL+sALsuRDEpsLpslL.palh.+pl.s..c...PRassllhsluthllD........l..Y..u..sh.l...sp.Ss.......hlcchh.hpLppplpcElchppphhphtGhl-hl................................................................... 0 77 122 179 +9217 PF09385 HisK_N Histidine kinase N terminal Mistry J anon pdb_1yku Domain This domain is found at the N terminal of sensor histidine kinase proteins. 24.40 24.40 24.90 26.60 24.30 24.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.62 0.71 -4.55 7 130 2009-01-15 18:05:59 2006-12-12 10:37:03 5 3 103 3 13 66 0 137.40 58 43.26 CHANGED Mp...........sspphLssaLcpppcpFlpsW+p+llls-cD.a+-cllpNGptlhchhhphhpcphs.p...lp.lupKIApERh-AcsNIu-FVYNsNhGRpElhphlstlssshp-LpsllccINhhFD+hlYasVppYs- ..............................................MtVFPIDK..DIKElFCSHLKNNRHQFVENWKNKM.....IIS-KDPFK.EVVQNGEcLLEhIIELhME-KDIs..YLQPLCEKIAIERAGADANIGDFVYNANVGRNELFEAMC.....ELDVSAR..ELKPIM...spIHTCFDKLIYYTVLKYSE.... 0 1 7 9 +9218 PF09386 ParD Antitoxin ParD Mistry J, Bateman A anon pdb_2an7 Domain ParD is a plasmid anti-toxin than forms a ribbon-helix-helix DNA binding structure [1]. It stabilises plasmids by inhibiting ParE toxicity in cells that express ParD and ParE. ParD forms a dimer and also regulates its own promoter (parDE). 22.80 22.80 23.30 23.50 22.30 22.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.65 0.72 -3.86 4 38 2012-10-02 18:44:02 2006-12-12 14:31:42 5 1 37 2 7 32 2 77.80 61 92.81 CHANGED MSRLTIDITDQQHQSLKALAALQGKTIKQYALERLFPsDsDuDQAWQ-LKsLLGsRIu-GLAGKVSsKSlsEILsEELu ..MSRLTIDlT-QQHQSLKALAALQGKTIKQYALERLF...P..uDuDuDQAWpEL+sLLssRIppGLsGcVSsKSlscILDEELs......................... 0 1 6 6 +9219 PF09387 MRP Mitochondrial RNA binding protein MRP Mistry J anon pdb_2gid Family MRP1 and MRP2 are mitochondrial RNA binding proteins that form a heteromeric complex. The MRP1/MRP2 heterotetrameric complex binds to guide RNAs and stabilises them in an unfolded conformation suitable for RNA-RNA hybridisation. Each MRP subunit adopts a 'whirly' transcription factor fold [1]. 18.60 18.60 18.80 18.80 18.50 18.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.31 0.71 -4.58 11 164 2009-01-15 18:05:59 2006-12-12 15:52:17 5 4 50 14 43 134 1 157.00 29 36.07 CHANGED suts.u-usustustusssuo....+ppssusppt.Rs...PAF-IsHhsc-csupGs.atlsVst+sshl.p.PsLD.+..Rp.pplDp.............Ncss.Qscu-R.sloVphpslasAphluVLcuRhsplcspsphhsAsFsPsspsYshctpl+psuop................psh-W.....oVcF-sthulhLc+FLppAL+.shGFucphs .......................................................................tst...ttts.....t...t..p..........t.hpl.+.stps.s.u.........t...s......htt.h..........Ls.p......tss........................s....t.pu-h...h...h.t........ssa.sp.sthlptRhshhps.s....h.......ssphY.hctp.+...tst...................................................................................................sth.W.....hhphss.hhuhh..hhLppsL...........h................................................................................... 0 6 9 19 +9220 PF09388 SpoOE-like Spo0E like sporulation regulatory protein Mistry J anon pdb_2c0s Family Spore formation is an extreme response to starvation and can also be a component of disease transmission. Sporulation is controlled by an expanded two-component system where starvation signals result in sensor kinase activation and phosphorylation of the master sporulation response regulator Spo0A. Phosphatases such as Spo0E dephosphorylate Spo0A thereby inhibiting sporulation. This is a family of Spo0E-like phosphatases. The structure of a Bacillus anthracis member of this family has revealed an anti-parallel alpha-helical structure [1]. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.94 0.72 -4.44 47 988 2009-01-15 18:05:59 2006-12-12 16:44:01 5 4 287 3 195 665 0 44.70 32 66.69 CHANGED pLhppIEp+Rcchlplstph.GlsspcslphSQELDpLlNpYp+hp .............................LpppIEtpRccLh...p.l..s...pch...Gh..s.pp.cllphSQELDcLlNpa.ph.h............ 0 68 144 154 +9222 PF09390 DUF1999 Protein of unknown function (DUF1999) Mistry J anon pdb_2d4o Family This family contains a putative Fe-S binding reductase (Swiss:Q72J89) whose structure adopts an alpha and beta fold. 25.00 25.00 85.50 85.30 21.90 20.00 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.94 0.71 -4.88 3 18 2012-10-02 22:59:21 2006-12-13 11:24:32 5 1 18 2 12 18 1 148.80 53 97.85 CHANGED M...RYRsFoEPDaEsLQALDLAtQRRssPtaDsLP-REpAGRLSoSLuALRFYERSGHSFVAEsEu-clpGalLAQuVWQGDRPsVLVRAllLs-upsEDstRGLLRAVVKSAYDAAVYEVHLPLsP...ELEAAARAEEA+LTG.sYAVpHLGTRAAoAPG++LtR ...............................M+aRsFsE.Dh-tLptL....................tGchssSLuALRFFsRTGHSFlAp.EG-cshGFsLAQAVWQG-tsTVLVsRl...-ups....t....-shcGLLRAVVKSAYDAGVYEVALpL-Pt.+t-LcpALcA-Gas.lushsLAVRVLGSRGtRuEsctVL.E. 0 3 7 12 +9223 PF09391 DUF2000 Protein of unknown function (DUF2000) Mistry J anon pdb_2gax Family This is a family of proteins of unknown function. The structure of one of the proteins in this family has been shown to adopt an alpha beta fold. 25.00 25.00 26.20 30.30 20.70 17.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.37 0.71 -4.49 36 322 2012-10-02 19:40:38 2006-12-13 13:31:01 5 2 297 2 106 309 39 130.80 30 94.96 CHANGED -sKhslllcc-LsshptLNVsAaLusGlus..stP-hlGpsYtDusGphahslht.PlllLpu.spspLppl+pculp+...slshssaopshhsTGpppspptshttsst--lchlGlALhGs+KhVcKlT+uhsLat ............pKhslllcpcLssh.thNssAhLuhulut........phP-llGp..s.htDusGpta.slht.Pl.lLpu.stptLppl+pcuhsp....slhhssFspth.ssss.pp.ptshtssstsslphlGlulhGs+KtVsKlstuhsLa........................ 0 25 52 81 +9224 PF09392 MxiH Type III secretion needle MxiH like Mistry J anon pdb_2ca5 Family Type III secretion systems are essential virulence determinants for many gram-negative bacterial pathogens. MxiH is an extracellular alpha helical needle that is required for translocation of effector proteins into host cells [1]. Once inside, the effector proteins subvert normal cell function to aid infection. 27.40 27.40 27.60 27.90 27.20 27.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.90 0.72 -3.42 39 1127 2009-01-15 18:05:59 2006-12-13 14:16:18 5 1 510 43 74 367 1 79.00 20 87.18 CHANGED lsthspthpssssst...............................................tpplppthpshptt...................................................ssP...pplhphQtplspaolhhslpopslpthppslpsll.ph .............................................................hh..................................................................tlpt..tthtpts.......h......................................................................st.hs...s...P.....ptlhphQhsltpaolhhshpSsls+thpshlssll.p.h....... 0 17 35 50 +9225 PF09393 DUF2001 Protein of unknown function (DUF2001) Mistry J anon pdb_2guj Domain This family includes phage-like element PBSX protein (Swiss:P54332) whose structure adopts a beta barrel flanked with alpha helical regions. 22.40 22.40 22.40 23.30 21.80 22.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.42 0.71 -4.48 25 231 2012-10-01 22:58:23 2006-12-13 17:06:55 5 1 162 2 56 189 2 140.60 30 95.64 CHANGED M......hcupcsIsGphGclal....cspphspspphpAclchsKp-lshhGcphsppKssGhcGoGolThY+VoShhhphhhchlKpGp-shFclhspLcDPsS.hGt..EclsLcssshDclslAsa-ss..-hlEEEsPFTF--a-ll- ..................................hhcupcslsGp.Gchal.....cspphtphpphEAclEhsKp-lplhGc.hhpspKs..sGhpGoGohThap.loShhtph.hhch.l..+p.G.....p.-.hF.sl.ssh-DPso...hGp...cplhLpsss.hD...s...hhlush-ss..-hlE.EEhsFTFpca-h.-................................. 0 24 47 50 +9226 PF09394 Inhibitor_I42 Chagasin_I42; Chagasin family peptidase inhibitor I42 Mistry J, Bateman A anon 2fo8 Domain Chagasin is a cysteine peptidase inhibitor [1] which forms a beta barrel structure [2]. 22.30 22.30 22.30 23.60 21.80 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.12 0.72 -3.65 103 354 2009-10-19 12:22:43 2006-12-13 17:26:24 5 19 270 16 162 332 18 92.20 23 51.54 CHANGED lsl..phGp.phtlpLspNPo..TGYpWplpt........sshlpl..hss......pahss..t.ss......hlGuuGhph.aphpuhps....G....psplphhYtRsW-ssss...ppash.slpV ................plp.Gp.phhlpLsuNPo..........T.GYpWphps...........sssslph....hss....................pahss...s.ss.t........hlGu.u.Ghph.apapuhps.....G....psplphtYtRsW-ssst...pphshpl.l.................... 0 55 103 136 +9228 PF09396 Thrombin_light Thrombin light chain Mistry J anon pdb_2b5t Domain Thrombin is an enzyme that cleaves bonds after Arg and Lys, converts fibrinogen to fibrin and activates factors V, VII, VIII. Prothrombin is activated on the surface of a phospholipid membrane where factor Xa removes the activation peptide and cleaves the remaining part into light and heavy chains. This domain corresponds to the light chain of thrombin. 20.60 20.60 20.80 22.20 19.50 18.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.62 0.72 -4.35 13 80 2009-01-15 18:05:59 2006-12-14 17:18:55 5 8 46 403 27 111 1 47.70 64 8.36 CHANGED TshspacoFFss+TFGsGEADCGlRPLFEKKslpDpoE+ELL-SYhpGR ....Tsss-apsFFs.+TFGs....GEADCGLRPLFEKKslcD+oE+ELL-SYh-GR... 1 1 3 10 +9229 PF09397 Ftsk_gamma Ftsk gamma domain Mistry J anon pdb_2j5p Domain This domain directs oriented DNA translocation and forms a winged helix structure [1]. Mutated proteins with substitutions in the FtsK gamma DNA-recognition helix are impaired in DNA binding [1]. 25.10 25.10 25.40 25.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.10 0.72 -4.26 130 5039 2012-10-04 14:01:12 2006-12-15 16:44:09 5 23 4114 16 987 4009 1919 66.30 43 7.56 CHANGED ssp.ppDsLa--AlplVlcpppASsShlQR+hRIGYNRAARLl-pMEppGlVushpus.tsR-VLh....tp ..................s.tptDsLa-cAsph.Vl....c...p..p..+....ASsShlQR+h+IGYNRAARll-phEppGlVust.p.u.s.tsR-VLh..t.............. 0 318 635 820 +9230 PF09398 FOP_dimer FOP N terminal dimerisation domain Mistry J anon pdb_2d68 Domain Fibroblast growth factor receptor 1 (FGFR1) oncogene partner (FOP) is a centrosomal protein that is involved in anchoring microtubules to subcellular structures. This domain includes a Lis-homology motif. It forms an alpha helical bundle and is involved in dimerisation [1]. 29.40 29.40 29.40 29.40 29.30 29.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.85 0.72 -3.99 6 189 2009-01-15 18:05:59 2006-12-18 11:26:16 5 6 93 2 120 199 3 75.50 35 27.53 CHANGED K.oPLsNENLKKhlsT+-...GRLVAsLlpEFLpFFpLDFTlAVFpPEuuh.pshpsRpsLuK-LsIs-u-ssKssPLLhEll++ ..................................p...shLlspLlpEaLpF.phcaThuVap....sEo..up..s.p.s...........s....Rp.Luc-Lslh-.....s..pt...s.ss..h..PLLhtllt................................. 0 50 60 86 +9231 PF09399 SARS_lipid_bind SARS lipid binding protein Mistry J anon pdb_2cme Family This is a family of proteins found in SARS coronavirus. The protein has a novel fold which forms a dimeric tent-like beta structure with an amphipathic surface, and a central hydrophobic cavity that binds lipid molecules [1]. This cavity is likely to be involved in membrane attachment [1]. 25.00 25.00 197.30 197.10 19.70 15.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.42 0.72 -3.95 4 60 2009-01-15 18:05:59 2006-12-18 11:57:56 5 1 59 8 0 23 0 97.70 94 100.00 CHANGED MDP.KTNVVPPALHLVDPQIQLTITRMEDAVVHGQNNADPKVYPIILRLGSQLSLSMsRRNLDSLEARsFQSTPIVVcMTKLATTEELPDEFVVVTAK MDPNQTNVVPPALHLVDPQIQLTITRMEDAMGQGQNSADPKVYPIILRLGSQLSLSMARRNLDSLEARAFQSTPIVVQMTKLATTEELPDEFVVVTAK 0 0 0 0 +9232 PF09400 DUF2002 Protein of unknown function (DUF2002) Mistry J anon pdb_2g7j Family This is a family of putative cytoplasmic proteins. The structure of these proteins form an antiparallel beta and sheet and contain some alpha helical regions. 19.70 19.70 20.40 26.20 19.00 18.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.49 0.72 -4.34 4 520 2009-01-15 18:05:59 2006-12-18 12:57:38 5 2 516 1 30 104 2 111.10 86 97.78 CHANGED MYLRPDEVARVLEpsGFphDhVTscuYGYR+GEpYVYVNREARMGRTALlIHPsLK-+SssLApPsSsIKTss+Y.pFPLYLuG.thpE+YGIPHGFSSR.uLppaltphF.....c .........................MYLRPDEVARVLEKVGFTVDVVTQKAYGYRRGENYVYVNREARMGRTALVIHPTLKERSSoL.....AEPASDIKTCDHYQQFPLYLAG..EpHEHYGIPHGFSSRlALERYLNGLF.GE............... 0 1 4 17 +9233 PF09401 NSP10 RNA synthesis protein NSP10 Mistry J anon pdb_2fyg Family Non-structural protein 10 (NSP10) is involved in RNA synthesis. it is synthesised as a polyprotein whose cleavage generates many non-structural proteins. NSP10 contains two zinc binding motifs and forms two anti-parallel helices which are stacked against an irregular beta sheet [1].\ A cluster of basic residues on the protein surface suggests a nucleic acid-binding function. 21.50 21.50 22.10 38.80 20.00 21.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.64 0.71 -4.39 11 653 2009-01-15 18:05:59 2006-12-18 16:29:24 5 34 206 53 0 662 0 121.00 53 2.47 CHANGED ssNSslLolCuFuVDPupsYlDhVpsGupPlsNCVKMLss+oGsGhAITssP-AshsQ-oYGGASlClYCRsHl-HPshsGhC+hKGKaVQVPh.sspDPltFsLcNsVCsVCGhWhsaGCsCDt ....ssNSulLSlCAFAVDPAKsYhDalssGspPlsNCVKMLssHsGoGhAITssP-ushsQ-SaGGASsClYCRs.Hl-HP....s.hcGhCchKGKaVQlPo.stpDPVuFsLcNcVCsVCGhWhsaGCsCDt... 0 0 0 0 +9234 PF09402 MSC MAN1_C; Man1-Src1p-C-terminal domain Mistry J anon pdb_2ch0 Domain MAN1 is an integral protein of the inner nuclear membrane which binds to chromatin associated proteins and plays a role in nuclear organisation. The C terminal nucleoplasmic region forms a DNA binding winged helix and binds to Smad [1]. This C-terminal tail is also found in S. cerevisiae and is thought to consist of three conserved helices followed by two downstream strands [2]. 21.70 21.70 22.00 22.10 21.50 21.60 hmmbuild -o /dev/null HMM SEED 334 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.04 0.70 -5.73 37 386 2009-01-15 18:05:59 2006-12-20 11:29:55 5 12 255 1 248 363 1 257.20 21 43.10 CHANGED hLlhshhlha.....shaatppphtlGaCupsh.st....................................................C..CP.puhCh...plpCctsahht...h..................sspCh.Dsc+tphlptl..hcthhchL+p+suphcC.G................t.psshssslsts-..........................ltchltppp....hhstppF-p.hapsslttlpcp....-lhhp.......................................................................spthhhusohsplslpC..thppplpphlh..........chthhlhslhlhhhsh...hhl+hthppcppcptplppllppslcpLppptt...........t.spsppsalslspLRDp.....lltsptphp........ppplWp+lhphl.-p.susVcsphtE.htG-hh+sWEWlssh ..............................................................................................................................................................................h.............................................................................................................................................................................................................................................h........ctpttthhptl...pphhphLtpptuphpC.G.............................t..p.tls.t-..........................htphltp.p..........tp...hpp.haptsl.t..lhpp...........t-lh.ht........................................................................t..s.sp.hhhhp.sshsphshtC..thpp.th...hthht...............phhhh..hhslhhh.h.hhh.......hhhc.hphp.pt....cppptth.p....hlppll...chLpsptt.................t.s..tt.psalslsplRDp.....ll.sp.pchp.........................................hpplWp+shphl..tt.poplpsp.pc..hsGc.hhsWcWhts.s........................................................................................ 0 80 125 192 +9235 PF09403 FadA Adhesion protein FadA Mistry J anon pdb_2avr Family FadA (Fusobacterium adhesin A) is an adhesin which forms two alpha helices. 25.70 25.70 26.00 25.70 24.70 25.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.63 0.71 -4.12 3 115 2009-01-15 18:05:59 2006-12-20 11:44:49 5 2 30 9 12 93 0 119.50 35 97.25 CHANGED MK.KhLLhuhLlLSuhSaAA--......AtpllSELKuL-AEYQsLspcEEARFpEEKppuEsAcppltcLcElpsulEE+lt+LpEEuKTRFaKDpYccLhK+Y--YLsKLEpKIs-pcplIS-FEKIQclR ...................................MK.K..hl..Lh..s.hLlluuhuaAAps...............stplhuE..lpsl-AEYQpLhpcEptRhpE.+ppt-...shcppltp.cph.tphpchht+Lpp-.uchRaa+..cpYpcLh.K+ac-hhpcLEpchtcpcphIsphpKl.tlh............... 0 6 11 12 +9236 PF09404 DUF2003 Eukaryotic protein of unknown function (DUF2003) Mistry J anon manual Family This is a family of proteins of unknown function which adopt an alpha helical and beta sheet structure. 19.80 19.80 21.60 20.30 19.10 19.00 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.44 0.70 -6.07 3 98 2012-09-27 07:58:09 2006-12-20 14:16:26 5 7 69 1 71 99 0 328.40 41 81.61 CHANGED MsDs...EELRPVP+ERAVLESFFTQLGMFSFDRAKDYVEKEKDASKSAGAIWuSLLAALAHLAAAEKlYHNMTFLGQKLGGQSFFSRKDSIRTIYTSLHNELKKVVoMGRsAPGGSAPsLEELLPHLSEQLCHFlQARMEIADFYEKMHoLGSQKoINSEELVoTLDoIL+KYSSRFHHPILSRLESSFQlEVDVLTQLLRCQAQISEW+FLPSLLoLHGAHSKLQoWGQlFERQRETRKHLFGGQSQKAVQPPHLFLWLQRLQAsLLAKFSFYFHEALSRQTosSEMKALTARTuPDYFGKISSFIRKYDASNVSLVFDNRGSESFQGHGYHHPHSYREAPKGVDQFPAVVSLPSGERPVTHWPNVIMIMoDRAoELNTLEKVVHFYDDKVQSTYFLoRPEPHFTIVVIFDGRKSERDSaIlAFLQELsGSLRNSKPFoSLKPGSKG .............................................................................................................................hh.th..hs.hcp.Yhph.ah..c.....hht+.pp.o.hh...YpsLhsph...pph.t...................................t.......p......hhsplspQLs.FhpARh-hhshYcp.hhshu.sp..p..h..s.ccLlshL-slhcpasp+hpHs..hLp...lcsuhphElslLtpLL+sQsplscacFLs.oLlsLasA+.....s+Lp..sWs.p.h...h....p.pp..ppo..pK..phFuu..pspKs......sp....sP.pLahWL.+hpshLLuKFShYFH-sLSp...Q.....so......s....u..-.M.K.slsu+s....ss.Dhhs....+IpuFhRKhD.u..slsLla....Ds..+u.s....-.s..ap.upGYpaPpp..p-sPp.....G..l.cpaPslhohP.s...p.......p.Ph...HhPNllhl......ht-pt..s-Lss.h.-+llaahD....s.+.lps.................TY...alsps-sphhlV.lIac.s.p+.p-+DphhhsFlp-ls.tL+ssKhht.L+.......................... 0 31 36 54 +9237 PF09405 Btz CASC3/Barentsz eIF4AIII binding Mistry J anon pdb_2jou Domain This domain is found on CASC3 (cancer susceptibility candidate gene 3 protein) which is also known as Barentsz (Btz). CASC3 is a component of the EJC (exon junction complex) which is a complex that is involved in post-transcriptional regulation of mRNA in metazoa. The complex is formed by the association of four proteins (eIF4AIII, Barentsz, Mago, and Y14), mRNA, and ATP. This domain wraps around eIF4AIII and stacks against the 5' nucleotide [1][2]. 20.80 20.80 21.50 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.24 0.71 -4.10 21 266 2009-01-15 18:05:59 2006-12-21 09:49:36 5 7 179 10 185 259 0 121.40 24 19.09 CHANGED sssssppsup.sst..tt...t..pptccp-pcchcc-ccpsPs..aVPsRGsFFhHDcRss-sssssh+................R.hu......................h...s.h..................t+phpssutpspWsHDha-phss-psPpsp.phhsthshshpspss ........................................................................tts................t..t..tp..........tttcct..t..p.c.ttpccccpsPt..alPp+GsFahHDc.Rtp.sst...p...th+....................pstG.......................................h........................................t+thpsss....t.ttpWpHDpacp...pc..ptsppptphht..t........tt.......................................................... 0 52 87 143 +9238 PF09406 DUF2004 Protein of unknown function (DUF2004) Mistry J anon pdb_2aby Family This is a family of proteins with unknown function. The structure of one of the proteins in this family has revealed a novel alpha-beta fold [1]. 22.40 22.40 22.90 24.00 21.20 20.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.17 0.72 -3.98 22 148 2009-09-10 18:54:03 2006-12-21 10:54:57 5 1 128 1 20 117 5 99.60 26 66.38 CHANGED phpcpAcpAltshhpsc........ppahpaHl--httchh..hh...t...sshpphlctlpltths.a.p.tst....hhhhDathss-.hoD.llsVpFcppGclhp.lsaES ..................................p.htppA+pAlhp.hcp-........shYh-FHt--hspphh...h...........shpphlctl..pLttls.ahs..pst.pp.shs...hDaphss-..oDElLsV+acpcGclhp.lsaES... 1 7 14 17 +9239 PF09407 DUF2005 Protein of unknown function (DUF2005) Mistry J anon pdb_1zel Family This is a family of proteins with unknown function. 25.00 25.00 56.60 32.70 20.50 19.30 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.10 0.70 -5.67 2 47 2009-01-15 18:05:59 2006-12-21 11:07:22 5 2 45 2 3 12 0 279.90 91 99.00 CHANGED M....................VSPAGADRRIPTWASRVVSGLARDRPVVVTKEDLTQRLTEAGCGRDPDSAIRELRRIGWLVQLPVKGTWAFIPPGEAAISDPYLPLRSWLARDQNAGFMLAGASAAWHLGYLDRQPDGRIPIWLPPAKRLPDGLASYVSVVRIPWNAADTALLAPRPALLVRRRLDLVAWATGLPALGPEALLVQIATRPASFGPWADLVPHLDDLVADCSDERLERLLSGRPTSAWQRASYLLDSGGEPARGQALLAKRHTEVMPVTRFTTAHSRDRGESVWAPEYQLVDELVVPL.RVIGKA ..............................hlSPAGADRRIPTWASRVVSGLARDRPVVVTKEDLTQRLTEAGCGRDPDSAIRELRRIGWLVQLPVKGTWAFIPPGEA...AISDPYLPLRSWLA.RDQNAGFMLAGA..SAAWHLGYLDRQPDG.RIPIWLPPAKRLPDGLASYVSVVRIPWNAADTALLAPRPALLVRRRLDLVAWATGLPALGPEALLVQIATRPASFGPWADLVPHLDDLVADCSDERLERLLSGRPTSAWQRASYLLDSGGEPARGQALLAKRHTEVMPVTRFTTAHSRDRGESVWAPEYQLVDELVVPLLRVIGKA..................................... 0 2 3 3 +9240 PF09408 Spike_rec_bind Spike receptor binding domain Mistry J anon pdb_2dd8 Domain Spike is an envelope glycoprotein which aids viral entry into the host cell. This domain corresponds is the immunogenic receptor binding domain of the protein which binds to angiotensin-converting enzyme 2 (ACE2) [1]. 25.80 25.80 26.00 26.50 22.00 25.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.56 0.70 -4.83 11 626 2009-09-14 15:17:47 2006-12-21 12:06:25 5 3 218 29 0 442 0 166.60 48 21.86 CHANGED PNLPD.CsIEcWLsApoVPSPLNWER+pFSNCNFNhSoLhpalQA-SFoCNNIDASKlYGhCFuSlolDKFAIPsSRplDLQlGsSGaLQohNYKIDTsuTSCQLYYSLPtsNVTls...NaNPSSW.............................NRRYGFNshshht......hspHDVVYuppCFssssoaCPCtpss.......hhusCls.........spssousC.PsGTphhpC.tthshsh..........tCcCsCsPcPhs ............................t.C.ht.hhss.....PpshsWcRhhhpsCshshs.lhp..h.htph.C.shs.s+l.s.Catsl.hD.Fhh.tt...........................................................t.h..........................................................s..........sh.psps........hGhhspHDVVYApcCFp.....APssaCPCKLsu.........u...hCVs.s......hppTGhGsC.PsGTsY..hsCapts.................sDslssPcshp............................................................................................................... 0 0 0 0 +9241 PF09409 PUB PUB domain Mistry J anon pdb_2cm0 Domain The PUB (also known as PUG) domain is found in peptide N-glycanase where it functions as a AAA ATPase binding domain [1]. This domain is also found on other proteins linked to the ubiquitin-proteasome system. 20.30 20.30 20.30 20.30 20.20 19.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.79 0.72 -4.22 76 656 2009-01-15 18:05:59 2006-12-21 13:09:37 5 60 181 5 451 632 8 85.50 22 15.60 CHANGED pshcphtpslphLh.cllpNIlppPsctKaRpl+hsNpshppplhsh.ps....uhplL.phhGFpppt.........pthhhhs...tts.......shttlpphhpt.......L ..............................t...tthtpuhphLh.phlsNll...p.....p..P......p..-......p...KaRpI+hsN.tsFppcltsl.pG......uh-hL.pthGFpct.................pphhhhs.....t..........p...........hh.......................................... 2 189 262 372 +9243 PF09411 PagL Lipid A 3-O-deacylase (PagL) Mistry J anon pdb_2erv Family PagL is an outer membrane protein with lipid A 3-O-deacylase activity. It forms an 8 stranded beta barrel structure [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.81 0.71 -4.07 51 691 2012-10-03 17:14:37 2006-12-21 16:16:45 5 3 608 2 225 817 917 139.70 20 62.70 CHANGED sssshpshplshpashsh.......phtttplphhh-sshshhp.....sst........ssth.hGhssh.hpaphss.t.....halEsGlGsthhspsphs......................t.phuotFpFpsplGhGhpassst......pluhRapHhSNAGlp.p.PNsGlsthslhhuhsF ..................................................................................................s......h.h.h.ht...........p..hhth.t..h..ch.shs.hhp......sst.............ts.h.hG..hs.h..hth.hhtt......hah..phG.hG.st...hhspspss......................thp..l.uos.a.p..F......tpp..hul..G..hpasst........psuhphpHhSNuul....p.p.sNsGhN.h.h.shpluhsa............................ 0 59 132 186 +9244 PF09412 XendoU Endoribonuclease XendoU Mistry J anon pdb_2c1w Family This is a family of endoribonucleases involved in RNA biosynthesis which has been named XendoU in Xenopus laevis. XendoU is a U-specific metal dependent enzyme that produces products with a 2'-3' cyclic phosphate termini. 25.00 25.00 29.60 29.40 24.40 24.40 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.80 0.70 -5.35 40 279 2009-01-15 18:05:59 2006-12-22 09:55:27 5 13 125 3 203 282 1 246.00 34 69.40 CHANGED stsss-lpslsppLWshD....sN+h.............sutca.plshQsptsshsp.....sDt.AspsLFshVs-shhp..pP....TautahsLlDNYphssG.....hsE.plTsp..EppEppsFL-tlh.pTslM+hhapaLhpKsh.......ssss.psF+.phLpplWFphYsRssut......h-.SSGFEHVFVGEhK.s..........sclpGhHNWlpFYlpE..+pGp......lDYpGYhhctpts...s........lLslQFpWs..........ul.hKsluShFlGsSPEFElALYTlCFlsssc......cp.splplus..hplsIhs..aphtpps.......IGouYP ...........................................tp-lpplsppLaphD.....Nph.................tpph.hlshQsphps..tp......tDt.usps.LFshVspphhp..ps....TatthhsLhcN..Yphs.su.....hsE..hho.sp..chpEpssFLctlh.pTslM+......hapaLhpKsh.......h.ss.p.pF+.p.Lppl.WFshYuRspsp.......s.SSGFEHVFl.GEhK..s..............scl...GhHNWlpFYhpE.......cpGp......lDYhGahhctphsp..sp..........llslpFsWp...........uh.hK.luo..hFlGsSPEFEhALYTlCFlspsp........ct..spl..plss.....h.htlhsaphttpt.......Iuoua................................... 0 77 101 166 +9245 PF09413 DUF2007 Domain of unknown function (DUF2007) Mistry J, Bateman A anon pdb_2hfv Domain This is a family of proteins with unknown function. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -9.00 0.72 -4.15 90 1038 2012-10-01 21:59:08 2006-12-22 11:53:11 5 11 859 1 319 859 252 65.90 22 55.22 CHANGED hclhpsss....lpsphlpslLcpsGItshlpspt.huuh.........s...us.sh..hclhVtc.cDhccAppllpchpt ..................h..lhpsss....hcAphlpslLcspGItshl..p....spt.hssh...........h.....ss.uh.....hclhVt-.cchppAppllpph..t................. 0 106 212 273 +9246 PF09414 RNA_ligase RNA ligase Mistry J, Coggill P anon pdb_2hvq Family This is a family of RNA ligases. The enzyme repairs RNA strand breaks in nicked DNA:RNA and RNA:RNA but not in DNA:DNA duplexes. 20.90 20.90 21.00 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.53 0.71 -4.50 22 428 2012-10-02 00:43:09 2006-12-22 12:03:17 5 5 363 7 118 450 40 184.40 19 55.28 CHANGED p-aVsoEKlHGsshslhh.tttp.........hphstpos.............thp..................hhstatpslpshphhhc..ht..h.......stslhlhGElhG.............Pslpttsh....................t......chcFasFclhh....tp.t......phhs.cthpthspphslt....hsPhLscGshsthhhhss......................................................................h..shtt.slsEGlVl+sst...........ststshlKh+ .........................................................hlhpEKlDGsNhs.l.hh..t....................hthhtRsp.........................ht.....................................................................tpsta...ttt.hpthhthhp...t...................htshhla.GEhhhhh.................pul.phpth..........................................................................cpcFa.lFslhsp.......stt.........taLs.h-..ps.phhsphhs...l......h....VP...h..lh...ts.t.h..s....h.tthph..thh......................................h..shts.sh.t..EGlVh+stt........t.........h...................................................................................................... 2 55 89 107 +9247 PF09415 CENP-X DUF2008; CENP-S associating Centromere protein X Mistry J, Wood V anon manual Family The centromere, essential for faithful chromosome segregation during mitosis, has a network of constitutive centromere-associated (CCAN) proteins associating with it during mitosis. So far in vertebrates at least 15 centromere proteins have been identified, which are divided into several subclasses based on functional and biochemical analyses. These provide a platform for the formation of a functional kinetochore during mitosis. CENP-S is one that does not associate with the CENP-H-containing complex but rather interacts with CENP-X to form a stable assembly of outer kinetochore proteins that functions downstream of other components of the CCAN. This complex may directly allow efficient and stable formation of the outer kinetochore on the CCAN platform. 25.00 25.00 26.10 25.50 24.10 24.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.38 0.72 -4.16 24 170 2009-11-05 17:40:01 2007-01-02 12:50:08 5 1 155 12 127 175 0 73.10 33 50.36 CHANGED stphlsRllp.p.Fcs.ppT+Isp-AhpllscYlclFVcEAltRut......pcsputpp...........lchpcLE+.lssQLlLDh .................chls+lLp...t.Fps..ccT+ls..p-..AlplsucalclFVpEA.ltRust.......tpscsttt...................................l-l-cLE+.lhPQLLLDF.................................. 0 33 66 104 +9248 PF09416 UPF1_Zn_bind RNA helicase (UPF2 interacting domain) Mistry J anon pdb_2iYK Domain UPF1 is an essential RNA helicase that detects mRNAs containing premature stop codons and triggers their degradation. This domain contains 3 zinc binding motifs and forms interactions with another protein (UPF2) that is also involved nonsense-mediated mRNA decay (NMD) [1]. 25.00 25.00 25.50 29.80 23.20 22.30 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.12 0.71 -4.63 16 324 2009-01-15 18:05:59 2007-01-04 16:32:50 5 7 266 6 238 323 3 142.70 57 14.31 CHANGED HACAYCGIcsPssVlKC...sCsKWFCNu+ssT.uuSHIVsHLV+S+H+pVsLHs-SsLGDTsLECYNCGs+NVFlLGFlsAK.sEsVVVLLCRpPCA..pt.+DhNWDsspWpPLI-..-RphLSWlspsPS-p-..h+AR.IT.pQIs+LEphW+sN .......................HAC.uYCGIHsPusVVpCs..sCpKWFCNu....R.G....sT.SuSHIVNHLVRA+HKEVpL.....H.-usL...G-TlLEC.YNCGs+NVFlLGFIPAK..uD.oVVVLLCRpPC......A.s..s.u.KDhN.WDsop...W.QPLIp..DRsFLsWLVp.hPS-.pE.QLR.AR...pl.ospQIsKLEEhWKpN....................................................... 0 88 129 199 +9250 PF09418 DUF2009 Protein of unknown function (DUF2009) Mistry J, Wood V anon Pfam-B_18128 (release 21.0) Family This is a eukaryotic family of proteins with unknown function. 25.00 25.00 49.70 25.80 18.10 21.80 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.45 0.70 -5.71 11 144 2009-01-15 18:05:59 2007-01-05 15:47:23 5 7 77 0 106 145 11 364.20 38 78.36 CHANGED c+A+YIPlRLo.-ERKlLRLLEAALpVS-YTD+VDllSh.pS+sKRlspQLKEhCulLsGLlVA.DhKtGpcLlcpK-FsspAcaFQslFEIGRRYKlMNP-+MRosYGKlhYMlQDSh.s-lpc.tLGFslhKPIhTVapaLppps......uhslLpDshl.hAhscl.P.sKsRsplp+tI+pKEpAlEpLu++YSs..uthscE-lc.slYSluDtNualptNR-Plp+MLphLcpaFsPssspch.asLuIphGtsGARLoHsHc+QaaYVhQSLoLWppIh+-MFpLWhlA-tDLhsssp.YcLssTGQGLNRlQtCPslh+AhcplLpcspccht.sWVGSSVlHLGDcsVPNALaFIDKYsQVsRILIPllpsltpIcsL.csDtplhsYIcppaGusppL++sILpDFF+HuFDGSGuDNaaDAGSCIDGRLTSAWNWsNpIpKKpYYsIFLhoGFouF-G ...............................................spalPlRLs.pERphLcLl-us................LpVS-YTD+lD.h.p.........h...h..................s.psp+hhtplcchhshlsGLhlut.....sh....c.Gpp.lhp.p.+s.htt..t.phhpphFEluRRaKhhNPpphRspYGKhhahl.Du...pltc....hlta...p..hhpslpTVhthLp.ptt......s.thlp.D.hl.hsh......l...........................lpp.lptKct..uhptl.pcYss..pthpp-plcpslhSlsD.psalt.NppPhpphlphLppaa......pPtp..p........t........hoLuIp....................Gt......sGuRLoHsHppQa.YVhQSLhLWppl.ppMhpLW.hu-pD.hL.ss.pt.YpLhsTGQGlpRhptsPph.phMppllppsppph.s..tWVGSsllHLGDcsVPNuLhFIDKYsQls+IL.Plh.sltpl.pl...........p..p......t................httalpptauuhpphphhILtDFF+HuFDGSGuDNh..uGSCIDGRLTSAWNWCsplp+K.aashFhhsGF.GF-G............................... 0 59 79 102 +9251 PF09419 PGP_phosphatase DUF2010; Mitochondrial PGP phosphatase Mistry J, Wood V anon Pfam-B_22310 (release 21.0) Family This is a family of proteins that acts as a mitochondrial phosphatase in cardiolipin biosynthesis. Cardiolipin is a unique dimeric phosphoglycerolipid predominantly present in mitochondrial membranes. The inverted phosphatase motif includes the highly conserved DKD triad [1]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.90 0.71 -4.90 16 326 2012-10-03 04:19:28 2007-01-09 14:34:34 5 8 304 0 187 825 133 152.60 27 70.39 CHANGED lsu.slslhpLlhsPSLhlPHlsVsoFspLPhsl..................hppssI+AVVLDKDNChshPccsclassYpcphccL+.........psasst.plLIVSNoAGosc.DhstptActlE+soG..lsVLRHs.........hKKP..GCtcElhsYF+pp.h..lppscElAVVGDRLhTDllhANhMGuauVWlp-GVp ............................................................................................................thhhPphhl.........s.sltpl..shth...........................LpptuI+ullhDhDNs....Ls..ppsplhs..p..hh....ph.hcch+...................pt.hst..tlhIVSNss...........st..ppsp..tlp...pths.............l..slt.as......................sK.KP....ts...h...p...cl.hp.hh.....t..tp.........................t.........s.pclslVGDRlhTDllhuNh..h.G.aslh......h................................................ 0 72 121 162 +9252 PF09420 Nop16 Ribosome biogenesis protein Nop16 Mistry J, Wood V anon Pfam-B_6406 (release 21.0) Family Nop16 is a protein involved in ribosome biogenesis. 25.00 25.00 27.70 28.50 21.10 22.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.15 0.71 -4.14 45 332 2009-01-15 18:05:59 2007-01-10 16:36:55 5 4 279 0 223 301 1 178.70 25 84.52 CHANGED lR++++s+uuhs+ss++pts+p..t...+plsh.s..sslI.......tp...sWDcptTlsQNYp+LGLsschstss........................................................shtph..spscltRDspss.l..h.....................................................................pttpspllppLEc.......App.s.....+pt+phSccE..pcalppLlcKHG.-DacAMthD+KLNhhQpotupl+...R+lp+app .......................................................................................................................................................................+p+p+ppp.thpss.p+ph.p+p............+thshht...sshl.........tp......sWDcptolpQNapchGLssc.Ntss..........................................................................................s.ppht.tphc.lpp-.....tps.t.........................................................................................thppspVlppLEp........p.Ast.p............+ptpphSccphcalppLlcKHG....-D...YcAMspDcK.N.hQpT.tpl+++lppah.t............................ 0 77 120 183 +9253 PF09421 FRQ Frequency clock protein Bateman A anon Griffiths-Jones S Family The frequency clock protein, is the central component of the frq-based circadian negative feedback loop, regulates various aspects of the circadian clock in Neurospora crassa [2]. This protein has been shown to interact with itself via a coiled-coil [2]. 22.40 22.40 22.60 25.60 20.30 22.30 hmmbuild -o /dev/null HMM SEED 989 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.00 0.70 -13.94 0.70 -6.78 4 63 2009-09-11 06:31:12 2007-01-17 12:55:18 5 3 35 0 55 66 0 625.70 32 95.98 CHANGED RssPhsSpGHPLPRRsSP-pSlTL+pHRLARDAS...lpuShhusssspsQ.sSSssRRsSSGESp-TGQSDsppWFsQSNpNPsAsF..-SNhM-VDPPFYQKEoDSSNE-u+hP.ttsPs........shh+sosAHSSSADDYRSVIDDLTVE.pRLKEELKRYKQFGSDhMRKEKLFEIKVHGLP+RKKRELEATLR-FAASL.GsSSp..SoSpR...+KsuRHusts+....SuusShSKHsSSSS.S+SRPVDSAYASMSTG.....spSSGsSLsRPShouttpTupQKVEsYLRDhP-GLhP+HllMT-KEKKKLVVRRLEQlFTGKIuG+phpRspo..hPuhsuuLss..............pGpthu..pPPs..............EshREApI..Q-sp.sc...KpppS+DssSASNSstDQTEsGGsssuSGsGssSG....sNTSPPhs.sP-QRPTRPRDLDPDRlQIPSENM-YIRHLGLVsPEhLpsSpsphp.DVAPDAEGWVYLNLLCNLAQLHhlNVTPsFIRpAVsEKSTKFQLSsDGRKIRWRGGTDGTKFSSDSStDpSQpSPhTDDTEDuSsKNG+RKKpKsppAcSphuphs.S+..........usSDoFHYKPhFlHppuSStETSLE-u..uS.Gs...l-ESs.usS+WshSGSGsTpQRRKRRhDGAIVYYoGAPFCTDLSGDPGDh.........SPss.MpuutctcusupG.........ctschV..RohSGSSLshRPLSDs+hpluph.cFsPtN.....P-LVsDsGspssD.-h.FPWs--PthlcVpPL.....EPSGLGGVLPDDHFhllVTTRR...shRPs.ppp.LuRopoSE..-Ts-hIspRLAohpTS.....PhP.Pps++Lss....u.lpIEYlSGch+RLNPssLPPPAIFaPPFSoDSSasstDDLuSDs-.....-E-s.SEt.MSRRANPH.SDNp.Y.ccsDLu.ssEss.c.Ds-.......cDhchuuDpGts.RuhhspscuVcGsupPhusssG+-sshh+TGSSsATAGGsESGYSSShE- ......................................................................................................................................tt.st......sp...s.pWappss.psh.......t.h-........p.s...pps.p..p........pt....t...........................................................sp.usss--aRSlIDDLTl-.ppL+pcL++Ycp..ss.shhcc-KLFEl+haGLPtcK++ELEthLRpFsssh..t...s.t........s.....t.........tt..............ptts.Ss.......-SuYsShS.t.u.....stsptss......t....h..t.......p..t.tphpsaLp-hPtGLhP..p...h.ho-+p+KphlV+RLEQlFsGc........t.....................................................................Es.+EAph..................t......................................................................scQRPTp.hDlD..R.......hpYhppLsh.s.p..................cGWl.LNLlhshAQLHhhsVsstFl+pAl.thSs+hplS.DGpKlRW+GG.psTphSup...u.....ttts....tp..h....t.tt.tphppt......t.t.....t........s...t....t..................................................ptFhYcPhFhp.....sst......ss....-tt....S..s............pps....st.st..hs.sh...t..........p.pp.-GshhaYsss.FChDLSGD..hsh.........p..ss...h..st.p................................ps.SGo.....h.hp.Phsp.......................t..........sp..t.......p....hspt..t.......t.......h......ssGlGGV.PtDpFhhhV.o++...............................t........................t..p.......l..pth...sth..ps.............................lph-hhohphp.L.PssLP.Ps.ah..as.ssp..t....s...p..s......................................................................................................................................................................................................................................................................... 0 8 23 51 +9254 PF09422 WTX WTX protein Bateman A anon Bateman A Family The WTX protein is found to be inactivated in one third of Wilms tumours [1]. The WTX protein is functionally uncharacterised. 21.20 21.20 21.40 21.80 20.20 21.10 hmmbuild -o /dev/null HMM SEED 471 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.70 0.70 -5.53 9 168 2009-01-15 18:05:59 2007-01-17 13:40:10 5 3 40 0 96 163 0 267.30 25 53.70 CHANGED psls+SKTHDGL...........pctspususssphssstShsssP.....s.t..sspShstuhsFhShl++uutppsttu.....phuhs+pK+GLKGlFSShRh+RKsK.ttt-.p.s..pt.ttshhhstp.suSh.pl.......E...pshcc................susc.sssssppsPupspstsssPstsspp.usth.sstsh.ts-u.hps..st.tth..s................................uss-PPu-PSsDpLCh.hFsDVTSLKSFDSLTGCGDIIAD.-.-...GsSsssp..............................................hsusGptssu....thcpsuslVsYQGGGEEMASP-psD......................-s-hpchW-hlspo--pppt............s..hPths.....p.hh..uspssc.....+c.splcsttLtclPl.........ppt.p.t.tsPps-ptEusPsSDEGYaD..SsTPG.E--s...uu.s..p+sslPRDShSGDALY-LY.DP--u.....httusp-ssslSph+shSt.hhs.Ph ..............................................h.pspoh-sh.......................................t...........................tph...h.....................t...t......................sh.+.t+sh+shFp.h+.ppp.....................................................................................................................................................................................................................................TGCG-lhA-...........S..hs...................................................t.t....t......pptsshsshQGusEphA.Ptt.-...................t......tt...ht....t.....t........................................................s..t.............s..t...h.......................t..pp..Es.ssSDEGYaD..ShoPs.p.-t...........t.hhsRDShSGDtLY-ha.-Ppt..................................h................................ 0 4 8 30 +9255 PF09423 PhoD PhoD-like phosphatase Bateman A anon Prosite Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.82 0.70 -5.92 148 1704 2012-10-02 19:15:56 2007-01-23 14:55:29 5 31 861 2 812 2178 650 392.20 20 71.03 CHANGED hGVASGDPtsDulll................................WTR...lss..............................s.......slsVpaElA...s.........DtsFp.........p..lVppGss.hsssc..hDaoV+..........................V-lsG................LpPsppYaYRFp...s...........stSssGRsRTs....sssthpp....l+hAhsSCssa............pGa...................assYcphAp....p.s...hDhVlHLGDYIYEh.............t.t.h....sppth.p......t...t-.................hhTLsDYRtR....auhY+sDssLpuhHA....phPalssWDDHElsNNhh...tsusp..................ttsattR+ssAhpAYaEahPl............R...........tss.s.....................hp.l..........YRphpaGsLsclhhLDTRpa+s..........................................spsh.h....................................................................t.h...........................s....ssRolLGtpQppWLhppLt.......sSp...upWpllupQlhhuphshss..........................................................t.shshDsWD...........GY.spRpclLpth...p....pt.slpNsVhLoGDsHpsaAs-lpss...............ttt..p.s....lusEFsssS....loSsuh....................hst.lhst.sPclpahshsp...............RGYs........plc..hsspphpsca ..................................................................................................................................................................................................................................................................h.hth..................t.....t............................................h................h..............t.....s..a.ss...p..........................htht.s................L..ss.p.h..h.Y...p.h.h.................................s.....h....G........p......hp...Th.......stt...tp...................hphsh...sSCt...t..h.............t..uh....................................................................hpha.p.p..hsp....................p......Dh.hlahGDhlYt.s...............................h........................................................................h..s..l...t.c.....Y.R.....tp........a.t.....h.h..........t....c......s.......s.......lp...t...h.p..u.............phP..h........l.h..hW.........DDH.........E.l.t.sshh........ttt..................................................tsh...t...t.h.t..t..s...AhpAa..h...E...a..h..Ph...........................................................c.................t...p............................................hpl.......a..c....p....h....p.....a.....G....s....h.....h....p....lh..h.LDsRpaRs...............................tpt...t.........................................................................................................................................................................s...sspsh.lG....t....p....QhpWLhpt........L.t...pup...........upW.p..l..l..us..p.h....hs....h......s..................................................................................................h.s..h..-.s.Ws.............G..a.....t.p.pp......c...lhphh...........tp....t.....t.....h....p.....s....hl..hLoGDhHhs.hs.t.c.ht.t....................................................hhEhssss............lst....................................................................................................................................................................................................................................................................................................................... 0 272 518 709 +9256 PF09424 YqeY Yqey-like protein Bateman A anon Bateman A Domain The function of this domain found in the YqeY protein is uncertain. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.67 0.71 -4.34 42 2465 2012-10-02 13:42:24 2007-01-24 12:41:00 5 3 2421 1 718 1873 2150 142.00 36 94.47 CHANGED +-+lppDhKpAMKA+DKt+LuslRhlpAAlppcElctpt....c.LsD--llsVLsK.lKQR+-SlppappuGRpDLs-pEpsElpllppYLPpQLo--ElpshlppsIu-s...GAs.....uh+DMGKVMuslps+ltG+A.DhutlSshVKptL .......................................................................................pplppDhK.pAMKAK..D..c.+..L...sslRhlpAAl..ppt-.l....ctpt.........................p..L..sD-.c..ll..slls+..lKQR..+-Slpt.appu...G..R.p..D...L.A-p....EpsEl.sllppY...L........P....p......pL..o--E.lpshl.c.ps....I....sc....s....G.As.......u.h.p.D.MGKVMssl.ps...+...l...t...G...+...A...Dsstl.uthVKphL........................................ 1 253 495 623 +9257 PF09425 CCT_2 Divergent CCT motif Bateman A anon Bateman A Motif This short motif is found in a number of plant proteins. It appears to be related to the N-terminal half of the CCT motif. The CCT motif is about 45 amino acids long and contains a putative nuclear localisation signal within the second half of the CCT motif [1]. 21.00 21.00 21.00 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.76 0.72 -7.10 0.72 -4.40 18 348 2012-10-01 19:54:00 2007-01-29 13:51:09 5 9 35 21 176 437 0 26.20 60 10.36 CHANGED PlAR....+uSLpRFLEKRKcRlss...ssPY.s ...........PhAR....K.sSLpRFLE..KRK-.Rlsu...tuPY..t...... 0 26 104 145 +9258 PF09426 Nyv1_N Vacuolar R-SNARE Nyv1 N terminal Mistry J, Wood V anon Pfam-B_50964 (release 21.0) Domain This domain corresponds to the N terminal domain of vacuolar R-SNARE Nyv1 which adopts a longin fold [1]. In yeast it has been shown that this domain is sufficient to direct the transport of Nyv1 to limiting membrane of the vacuole [1]. 25.00 25.00 151.30 150.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.97 0.71 -4.62 5 27 2009-01-15 18:05:59 2007-02-05 13:24:10 5 1 26 1 15 24 0 136.60 63 56.58 CHANGED VSYVEVlcsG+olSSC..Ycsts.ssusYGslousscuss....TP-lFHsLIsDMVlPKVVslsGNKVTKMShsLIDGYDCYYTT..cscDsspVLVCFT+lDlPKILPIRlLS-LKphEst......DsDEhLSuslGsILDsFHcELloYRNp VSYVEVI+sGcTlSSC..Fpshp...pspsYGolsss...scph.................TPslFHpLIhDMVLPKVVPIcGNKVTKMSMsLIDGFDCaYoT...-D......cDscTVaVCFThVDIPKILPIRlLStLpchEuN..........uTsEhLSuHVGpILDuFHEELlpYRNp. 0 1 6 12 +9259 PF09427 DUF2014 SREBP_C; Domain of unknown function (DUF2014) Mistry J, Wood V anon Pfam-B_71890 (release 21.0) Domain This domain is found at the C terminal of a family of ER membrane bound transcription factors called sterol regulatory element binding proteins (SREBP). 20.20 20.20 20.70 64.90 17.60 19.80 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.58 0.70 -5.25 8 72 2009-01-15 18:05:59 2007-02-05 14:12:33 5 3 72 0 59 73 0 263.90 50 26.35 CHANGED +shKaoLRsllG.psY..uaLTGsoE.....EpEtARlKAWsIALDAQLsGGDsElSKSRLsLTlhASGTLPcTPtRlMLKALHsRVLLWcl....ssshplsspluuclARhpWNtARphpphLsp......ucccsLPEHLAsLLEp-.sDDVhssuIlQRAaNLA.WNRsTscsshsp..ssuhDsVVEDsAl+SPLDALAAWYSSpsLp+sLhpoL...pspsts.....upcshhscIslAlplAPhsSsAphRALVA+AVlhsccRussIssALsAls.spss .............+shcasLRsllGh+hY..uhLTGlTE.....EpEtARVKAW-IAlDAQLsGGDsElSKSRLlLTlhASGTLPcTPtRlMLKALHsRlLLWclu..suhshtluNtlAtpLARhpWshARphpchLsp............sp-DsLPcHLAsLL-tD.CD-VhsDsIlQRAtNLA.WNRsTp-ssssc..pshhDsVVEDsAlpSPLDAlAAWWSSphLQcALlpsL.....-hssss.......................hcspcshppplclAlclAPhsSsAasRAlVs+AVhh-ccRsssIsssLtALs.p.ps... 0 10 26 46 +9260 PF09428 DUF2011 Fungal protein of unknown function (DUF2011) Mistry J, Wood V anon manual Family This is a family of fungal proteins whose function is unknown. 24.50 24.50 25.00 27.70 22.50 24.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.09 0.71 -4.27 37 112 2009-11-20 16:11:53 2007-02-05 17:05:15 5 2 109 0 85 112 0 149.10 29 52.01 CHANGED +lpLRpso........schlss.RshsYYFsp.s.tp...........................+cpFpsuAlou-slhphup.hsas.......tsphPWRVlclp.....pttsphppp...t.................t...p++++PGKK+Rlth+pp.............ttcp.....tccp.......-t-K+..........p++p+cKKh++...............Rtpp+p+ .......................+lpL+p.s........tthlsstRPhsYYFus.pttp...........................+ppFppuAVou-sllptup.hsa...................usphPWRVlclp..ttssphctptt..t............................................pctpp+++RPGKKpRlshRtp.............ttcp......pEcp.....p+-K+..........pR+N+cKKh++....Rtpc+pp.................................. 0 14 41 71 +9261 PF09429 Wbp11 WW domain binding protein 11 Mistry J, Wood V anon Pfam-B_13108 (release 21.0) Family The WW domain is a small protein module with a triple-stranded beta-sheet fold. This is a family of WW domain binding proteins. 21.10 21.10 21.70 21.70 20.70 20.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.64 0.72 -4.00 30 272 2009-10-15 16:14:36 2007-02-06 10:25:24 5 9 212 0 198 259 1 77.90 41 17.48 CHANGED c+shNPs-utRKppKcKElKKsKp-RpptRcttlt++sPcplpcplccLcph.EtsttL.....pstc+p+lcpLEcslchlt+ .......tK.hNPsDttRK.......pt+K+ELKK..........NKcpR.hsRss..hLtt..KDPcpIhcph-cL-ph..Ehss.h......................cps.+c+hcpLccshctlh................................... 1 63 99 149 +9262 PF09430 DUF2012 Protein of unknown function (DUF2012) Mistry J, Wood V anon Pfam-B_49614 (release 21.0) Family This is a eukaryotic family of uncharacterised proteins. 26.60 26.60 26.60 26.70 26.50 26.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.55 0.71 -4.28 26 303 2012-10-02 19:08:27 2007-02-06 12:46:16 5 6 253 0 216 352 5 118.10 28 48.11 CHANGED hsstshhsssplhL............su......................spa...hshl+pDGoFshpsVPsG....oYll-lpsssahFss...lRV-ls......psphcutplshhpstpsh.........................hshPlhlcshuhtpYa..Rcpaslhsl..LpsPMlLM ......................................th.sss+lhl..................su..................................spt...huhlppDGoFshpslP.s.G............SYll-lt....s..sahFpP...lRVDls................ttphc.s....ph...lshhpst....p.s....p...........................hsh....P.Lph.csh..u.hpYappRcpa..shhsh..L.h.NPMlLh................................................. 0 76 117 175 +9263 PF09431 DUF2013 Protein of unknown function (DUF2013) Mistry J, Wood V anon Pfam-B_11317 (release 21.0) Family This region is found at the C terminal of a group of cytoskeletal proteins. 21.40 21.40 21.80 22.70 20.20 21.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.73 0.71 -4.42 23 255 2009-01-15 18:05:59 2007-02-06 12:52:43 5 8 227 0 183 253 1 139.20 34 23.37 CHANGED ss-.hs..hl+LLLslNEQahhsuht..............pNtVhchL..............ptssh+sFsEpLlLhlNR-pDs......lplh.........lLKhLYLlFT...ossThchFYsNDL+VLlDIlIRpLssL......ssctchLRtTYL+VLtPLLppTplpch...Y++s-lhcl ..............................................................s.p-.h...hlplLLshN.pahhss...................................pNhlhpsL..............pts....sh+sFsEpllLLLN.Rts-s....................lplh..................lLKhLh.lFo...os..sT.t...p.hFYsNDl+VLlDIllRpLhDL............sssstLRhpYLclLhslLppTphpp......a+ps-lht.h................. 1 60 94 148 +9264 PF09432 THP2 Tho complex subunit THP2 Mistry J, Wood V anon manual Family The THO complex plays a role in coupling transcription elongation to mRNA export. It is composed of subunits THP2, HPR1, THO2 and MFT1 [1]. 28.00 28.00 29.10 31.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.71 0.71 -4.29 5 26 2009-09-11 02:57:22 2007-02-06 14:47:07 5 1 25 0 15 20 0 127.80 61 49.81 CHANGED hLpYINLLs+LSVDLA+QlEsuD..sssclhV-+hsPPsELQulLcpYss...-ss-scsLRAcLp+YLD-IKMsRAKYuLENKYSLp-oLppLTKEVScWRccW-sIEsLMFGDuPsSMK+MlQsIESlK.cpLTs ..hLRYINLLcRLSVDLAKQVEVSD.PSVTVaEhDKWsPs-cLQuILEQYss...P-TDIcslcAQlcsYLDQIKMuRAKauLENKYSLKEpLsTLTKELN+WRKEWDcIEMLMFGDsAHSMKKMlQpIDSLK.Scls.u... 0 1 6 12 +9267 PF09435 DUF2015 Fungal protein of unknown function (DUF2015) Mistry J, Wood V anon manual Family This is a fungal family of uncharacterised proteins. 20.90 20.90 21.50 41.90 20.80 18.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.69 0.71 -4.36 20 119 2009-01-15 18:05:59 2007-02-07 13:30:17 5 1 117 0 90 111 0 120.20 43 87.04 CHANGED Ms..hllYhhshhlllsuT.hlahsRpRWhsh....h...........lschhYs+Ls....SFssDlEAGLSSosFDLuu.Nl..ssuDsRuGLD-puKcEIp+IM+pcplsFD-ARhlYhcc+FucNsIuPD.GpPRDPK ..............................hha.h.hhhhllhss..hhahhRp+hh.h...........................hsshhYs+LP.....SFpsDlEuGLSSusFDLsu.Nl..ssuDsRuGLDctuKcElh+IM+pc.plsFD-ARhlYhcp+FscNs.IGPD.GpPpDPK.. 0 23 49 79 +9268 PF09436 DUF2016 Domain of unknown function (DUF2016) Mistry J anon Iyer L Domain A predicted alpha+beta domain that is usually fused N-terminal to the JAB metallopeptidase. This protein in turn is found in conserved gene neighborhoods that include genes encoding the bacterial homologs of the ubiquitin modification system such as the E1, E2 and Ub proteins [1]. The domain is also known as the JAB-N domain. 19.80 19.80 20.00 22.90 19.10 19.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.31 0.72 -4.41 14 40 2009-01-15 18:05:59 2007-02-07 13:38:47 5 1 34 0 23 41 0 71.90 37 32.17 CHANGED hDthLQssFPolhsP+aGs.lsshppsGcRhllAusGlalElpRsWLcslpplu...t.psslPYGplscplchh ........................hDtsL.suhPoVhVP+aus...lssh..p..psGcRlLluusGlalElpRPWLcllptlu...s.sstlPYGsVpEphchs...... 0 4 12 19 +9269 PF09437 Pombe_5TM Pombe specific 5TM protein Wood V, Bateman A anon Wood V Family \N 19.10 19.10 131.50 41.20 18.00 16.50 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.83 0.70 -4.96 2 8 2009-01-15 18:05:59 2007-02-07 13:40:20 5 2 1 0 8 9 0 120.00 34 85.26 CHANGED uhhcchNoQ.NR.M........ppSshs.pNIs..hhslhIsc...TspYClAs+.........................................lLlaL.Y...................ChYI..au.s.olppphcpasFus.slhappFas..c...FlRTp...........uhs.hRThsKa.hI....IlhulppVhh.p.hpcsYsho-I.DhhQ.......................sYpNss.uRFhpR .....uhhcchNoQ.NR.M........ppSshs.pNIs..hhslhIsc...TspYClAs+..................................................................hChYI..au.s.ohppphcpasFus.slhappFas..c...FlRTp...........uhs.hRThsKa.hI....IlhulppVhh.p.hpcsYsho-I.DhhQ....................................hh.......... 1 8 8 8 +9270 PF09438 DUF2017 Domain of unknown function (DUF2017) Mistry J, Iyer LM, Burroughs AM, Aravind L anon Iyer L Domain This is an alpha-helical domain found in gene neighborhoods that contain genes encoding ubiquitin, cysteine synthases and JAB peptidases [1]. 25.00 25.00 25.10 33.00 24.80 24.40 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.15 0.71 -4.71 19 377 2009-09-11 16:56:57 2007-02-07 15:24:14 5 2 375 0 100 254 96 175.00 33 92.35 CHANGED ht+ppshtGsphputl-PtEsplLpsLsuslhshLpp..pstsuPpD.LutlsGh.sGpsp.PpDPsLtRLLPDhaps-s-ss.ss...........uthRpLpEs-lhcsKhssuphlhcsL......Pts.....usclpLot-pAcAWluuLNDlRLsLus...tLclss-ss........-t..s-cshs.phslYpWLTalQESLlpAlhu .................................................................................................t.th.s.ht.hEhtlLtslssth.thltt...........t.....t...p....ss.s....t.D..Ltth.h...u.......................ss.........sp.P.........p..DPsLtR.L.LPDha+sD-pss....................uthRsh+Es-lhsAKhtsApslhssl................scs........uuplpls.ppApsW..lsulNDlRLslustLsItscss...................tchst.p.c...s...ts...s.chsVYp.WLshlQEsLVpshh........................................................ 0 31 77 96 +9271 PF09439 SRPRB Signal recognition particle receptor beta subunit Mistry J, Wood V anon Pfam-B_7840 (release 21.0) Family The beta subunit of the signal recognition particle receptor (SRP) is a transmembrane GTPase which anchors the alpha subunit to the endoplasmic reticulum membrane [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.17 0.71 -5.06 8 357 2012-10-05 12:31:09 2007-02-07 17:41:34 5 11 294 5 246 9068 2252 182.20 30 56.77 CHANGED ospsslllsGlssSGKTsLFspLoTs.....shpcThsS..Esssuh+h...ps.+GsphTLIDaPGH-+LRhcLl-phhtpus.l+ullFVVDSol.sKclp-sAEaLYplLo.othh.csslsILIACNKp-lhhA+ssphI+puLEKElsplhcpRutuLssh..........-uss-tss.LsppGcsFcFspLcsp.V ...............................................t..tpslLL..sG.ss.SGK.T......s.....L....as......p..L..h.p.s........................................ph.t....t....T....h....T....S......h.......c...s.s.....s..s.h....t.h........................t..t..t...p..s...p...s..h...........p..L..l......D....h...P...G...........H..........t.......+......L..........R.......................p..............h.......h....c......h.................................................t................s..............p.........s........+..........u..........l.......l.......F..V..V...D.......o....s.......s.........h...p....c.............p....l........c...s..s.A..E...a....L...Y...s..........l...L...........h.........s...........s.p...........h.............h.........p.........p.............p........h.............s.lLl.s...C.NK.......p..D...h..........h....s.....A....c...s...s....p..h....l...+.....p...t.........L..E....p..El.s....p.....l..R.h.o..Rsts.lpsh......................-tst.p.............h....l.s..t.....u..p.t..F.pFpph...................................................................................................................... 0 85 137 199 +9272 PF09440 eIF3_N eIF3 subunit 6 N terminal domain Mistry J, Wood V anon Pfam-B_4886 (release 21.0) Domain This is the N terminal domain of subunit 6 translation initiation factor eIF3. 25.00 25.00 25.40 26.50 24.40 24.10 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.68 0.71 -3.99 23 318 2009-01-15 18:05:59 2007-02-08 09:34:55 5 6 244 0 203 294 3 126.30 37 30.33 CHANGED DLstKlhsaLDRHLlFPLLEFhusctha-p..........p-lhcA+h-LLpcTNM.sDYshslhpcl....sscphPsEhsp+RppVlppLpcLppcssplhclhpc.-llssl+o..D+stNlcaLp...cc+slTh-hlsuLY+aupap .........DLTs+ls.aLDRHLlFPLLEFLss.....p..p.l..as-................c-llpuKh-LL.p..cTNM.l.Dash-lacpL........sc...-.........hPpchtc+RppV.lspLc.pLppcspslschhp.ss-sspphco.....Dpph.hc.aLp...cc....+.s......h................p.-hl.ssLYcau+FQ...................................... 0 78 113 163 +9273 PF09441 Abp2 ARS binding protein 2 Wood V, Bateman A anon Wood V Family This DNA-binding protein binds to the autonomously replicating sequence (ARS) binding element. It may play a role in regulating the cell cycle response to stress signals [1]. 25.00 25.00 28.70 27.80 24.60 19.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.08 0.71 -4.35 6 71 2009-01-15 18:05:59 2007-02-09 13:41:04 5 2 68 0 58 71 0 167.70 58 23.27 CHANGED pRsLPsRc.Vos-oI-DAYsuFILYCNPsVshsTDTspLRcsFRsPPKS-GKSFSoasLFELI+pL-sKEIKTWupLAlcLGVEPPD.EKGQSoQKlQQYAVRL..KRWM+uMHVDAFFEYLLG+.HPYaspIPPspsPlu-htRDGVssE-DLALRALlPch+P+RGR+Ks--h--ss .....s.RsLPsRc.lT.s.pTI-DAYVsFIhYCNPsVPhs...oDTspLRcsFRsPP+S-GKoFohFsLapLIcK.L-pK.ElKTWhpLAlcLGVEPPshEKcQSoQKVQQYAVRL..................K.....RWM+uMHVDAFFEYhlG+sHsYaTplPsssss..hs-..RDGVshE-DhALRALlPch+PKRGRKRs--cp..t............................. 0 10 27 45 +9274 PF09442 DUF2018 Domain of unknown function (DUF2018) Coggill P anon manual Domain Acid-adaptive protein possibly of physiological significance when H.pylori colonises the human stomach, which adopts a unique four alpha-helical triangular conformations. The biologically active form is thought to be a tetramer. The protein is expressed along with six other proteins, some of which are related to iron storage and haem biosynthesis [1]. 25.00 25.00 30.20 29.90 22.70 17.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.50 0.72 -3.42 14 194 2009-09-11 13:54:03 2007-02-09 16:45:20 5 1 193 8 23 82 2 81.60 53 91.25 CHANGED FtGoP+-KFh-IlhpANpsllcpElEclhcchsshcplhEcp.Gls.........Epcl+sahhcpsch..lcsthsslYIEhhGcILopsE ..............FspoPKEKFhEIIpNuNhsslEK.hEchFtcalAM.ELLEKp.Gls.........Eh..-sK...sFILENuDh...IE-RpNDlaIELuAcILu+p.t. 0 6 19 23 +9275 PF09443 CFC Cripto_Frl-1_Cryptic (CFC) Coggill P anon pdb_2j5h Domain CFC domain is one half of the membrane protein Cripto, a protein overexpressed in many tumours [1,2] and structurally similar to the C-terminal extracellular portions of Jagged 1 and Jagged 2 [1]. CFC is approx 40-residues long, compacted by three internal disulphide bridges, and binds Alk4 via a hydrophobic patch. CFC is structurally homologous to the VWFC-like domain [1]. 20.20 20.20 20.50 20.50 19.60 18.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.17 0.72 -4.32 9 108 2009-01-15 18:05:59 2007-02-12 15:04:21 5 3 47 1 44 92 0 33.30 52 19.83 CHANGED CGu.l.HGsWl.+uCpLCRChaGsLHChPpph.scCD ......CGu.lsHGsWl..+tCpLCRChaGtLHCFPpth.ssCD... 1 6 8 16 +9276 PF09444 MRC1 MRC1-like domain Wood V, Bateman A anon Wood V Family This putative domain is found to be the most conserved region in mediator of replication checkpoint protein 1. 25.00 25.00 25.90 25.90 24.00 23.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.23 0.71 -3.87 12 137 2009-01-15 18:05:59 2007-02-12 16:05:59 5 3 132 0 101 140 0 137.30 40 11.45 CHANGED hc+pshsshlEtEAEES-DEa.........tGlGGs............DsEto..--.su-lccMIDD...pstpchcscpltp.hhhpcppptDp+.lpKlLcDIpsGshR.+Rh...tNuh-h-lS.Ds-D-.lpphR.p..+Rc...hh+p+hLpssc...hspLscNsKupAFaco .......c+pts+chlEppAEESE.DEa...................tGLGGs...................-sEsu...--p-spsccMIDD...tsspshcpcclst.hhApcp+ppDc+plpKLh+DIp.sGtLR.++R.....su.-...h-LS..Ds.-D.-thtph.Rht..RRcht.ch++tLLtscc...ltclucNPKppAFhco............ 0 25 53 85 +9277 PF09445 Methyltransf_15 RNA cap guanine-N2 methyltransferase Wood V, Bateman A anon Pfam-B_9480 (Release 21.0) Family RNA cap guanine-N2 methyltransferases such as Schizosaccharomyces pombe Tgs1 and Giardia lamblia Tgs2 catalyse methylation of the exocyclic N2 amine of 7-methylguanosine [1]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.12 0.71 -4.69 11 426 2012-10-10 17:06:42 2007-02-12 18:11:39 5 9 324 7 290 6417 1083 156.50 32 33.86 CHANGED tphlLDlFuGuGGNsIpFAp....Fp.Vhul-hshp+ltpst+NAcsYGV.sc+lhhlhGDahp...........................................hlsph+htph...hDsVFhSPPWGGPsYpcppsaDLpp.plpPhslppllcs............shplopNll...lFLPRNosLsQL.....................................................splshclhssts+Cp.................lh.hppNGhhKulhsaaGpt ..............................s.hllDsFs..G..sG..G...N..s.I....p..F.....A..hp...............h...p....+..V...l..A..lD..hDst+lphA....c.+...N.....A....p.l..Y.......G..V.....s......-.......+...I....p..a..l....p.G...D.h.hp................................................................................................................l.h..t...p...h..p.................hD.s..V.Fh.S.P.P...W....G.G............P....s....Y.............t..p.............t.....p........s.............a.....c.l.p.....h......h....p....P...h..sh......plhph..........................hpph..o.p...p....ls.......ha..LP.Rss.slp.Ql...........................................................sph...............t..p....hc.................l.p...h.pst.pslhhhhs..h......................................................................................................................................................................... 1 114 171 246 +9278 PF09446 VMA21 VMA21-like domain Bateman A anon Mitreva M Domain This presumed short domain appears to contain two potential transmembrane helices. VMA21 is localised in the ER where it is needed as an accessory factor for assembly of the V0 component of the vacuolar ATPase [1]. 21.30 21.30 21.60 22.70 20.70 21.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.16 0.72 -4.05 18 286 2009-01-15 18:05:59 2007-02-13 13:09:18 5 7 216 0 203 272 2 62.80 32 50.42 CHANGED ssuVlhKL......................lhFoshMlslPlssaFshpthl..........hts..th...s..ssshhuGlsAslssplVLshYlhhAa ...............................ssltpL..........................lhFoh.hMlslPlusaFsspphl................hpu...............ss.shhuuhhAllssplVLshYlhhAh............. 0 50 97 165 +9279 PF09447 Cnl2_NKP2 Cnl2/NKP2 family protein Wood V, Bateman A anon Wood V Family This family includes the Cnl2 kinetochore protein [1]. 21.40 21.40 21.90 29.30 21.30 19.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.92 0.72 -4.30 11 88 2009-01-15 18:05:59 2007-02-13 13:17:23 5 2 87 0 64 79 2 66.90 42 34.58 CHANGED sEppILssaLLssupL.sIISLppFpc...LFP+plpssPpI.csLY+-LQpQRp.pslDpVpspI-pEhcp ...oEssILsNaLLoPusLPolISLppFsc...LFP+pl.....po.cPpl.RsLYR-LQp.Rs.psl.DhVptNIcpEh+.t........ 1 14 30 51 +9280 PF09448 MmlI Methylmuconolactone methyl-isomerase Coggill P anon manual Domain MmlI is a short, approx 115 residue, protein of two alpha helices and four beta strands. It is involved in the catabolism of methyl-substituted aromatics via a modified oxo-adipate pathway in bacteria. The enzyme appears to be monomeric in some species [1] and tetrameric in others [2]. The known structure shows two copies of the protein form a dimeric alpha beta barrel. 29.60 29.60 30.20 31.90 28.70 29.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.55 0.72 -4.32 3 8 2012-10-02 00:20:33 2007-02-13 14:42:15 5 1 8 14 3 13 0 105.10 71 95.79 CHANGED IRLLYLLVKPAGMSDETFRAECLRHYEMSHDVPGLHKYEVRLVAEQPTDTHVPFFDIGHVDAIGECWFKDDAAYATYMASDIRKAWFEHGKTFIGQLKPFRTAPVAGDEPAS .....IRlLYLLVKPEuMScEpFRtECL.RHaEMStslPGLHKYEVRLVAp..pPTDTHVPFFDlG+VDAIGECWFcsEtsYpsYMsSDIRKAWFEHGKTFIGQLKPFhTtsVs......h......... 0 0 1 2 +9281 PF09449 DUF2020 Domain of unknown function (DUF2020) Coggill P anon pdb_2i8g Domain Protein of unknown function found in bacteria. 25.00 25.00 38.50 42.00 20.40 17.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.75 0.71 -4.52 4 58 2009-09-11 14:12:22 2007-02-14 16:43:04 5 2 56 2 15 52 0 138.70 53 78.50 CHANGED LPlDAhPtsP.GRsu.ptCPYLDocWVADTNGQRlTGhGsDERFsTPACVFWSYPEtPQhTVhVRcMsTp-DAIAVVDWAAPIDoTE.AEEPsGWSGGRRGGscpSGAlYAVQKsssAVlVaTNQ-QSLKAQLlAEEsIpNLGL ...............................P.....p.ss.....scCPYLDopWVA-TNGQRhsu.GlDsRFsTPACVFWSYs--PQhTVhVRcMsopp-AhtVVDaAAPIDoTEPAp....-PsG...WSGGR...tu.....ss...cGAlYAVpKGssAVVVaoNQpQSlKAchIAcEsIspLuL......................... 0 4 11 15 +9282 PF09450 DUF2019 Domain of unknown function (DUF2019) Coggill P anon pdb_2i9c Domain Protein of unknown function found in bacteria. 20.70 20.70 20.70 21.40 20.50 20.50 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.20 0.72 -4.18 7 37 2009-01-15 18:05:59 2007-02-14 16:49:09 5 2 22 1 27 37 0 101.70 31 82.69 CHANGED sptLVtRFAclultQDsAlLts-hu+FNRLascMtplssELKpRsGDQRpALhsLasHPNMQVRLpAAKtTLAltPsEARppLEAIAsS+WhPQAGDAGMsLhpLD ..........pcLVtpFschshtQscAlhts-tpphN+hacchhtlhscL+u+ssst.RpuLhsLhcHPshpVRlhAAttsLt..htsscA+thLptlupt.................................... 0 1 14 22 +9283 PF09451 ATG27 Autophagy-related protein 27 Wood V, Bateman A anon Wood V Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.91 0.70 -5.16 14 241 2012-10-02 14:19:22 2007-02-15 13:10:36 5 15 184 0 180 355 1 221.90 18 72.68 CHANGED sshhhuhtsssuuhcCspppl...ppYclspluu.pslpsscsTPPopsphpahlslC......pcsshps.cpCsss.stlCulp.hhhsst...sshsopllshtpsspsshcthts............tGlplphsG.......tsauspphcAplpapC....ssstsusEhps......................................................sstssp.lclshps..uC.p................t...t.......ttsspsssppuss.....usthGaFTWLFlhlhLhh.slYllsGuahNasphuspsa.....-lls+u.-hl+slPhhh+-hlp+........llsshpG.........suuRG ..................................................................................................................................................................................hs..........................................................................................................................h.hs.s.....................C............hC.h...................h...hh....s....h.....................t.....t..p...t..........................u.l.l..hp.u............t...t....t....t.t...ps.lph...C....s..ph.p.t....p..h.........................................................................t..s...t......h.p..lphps...uC..............................................................ts.ttstp.......................usthG.aF...s.hlh...l..l.h...hlh.h.ssYllhGuhhshpt.hutpG.h.......-hlPph.-hhps.lPhhhp-hhth........hhtt.ts......................................................... 0 83 123 159 +9284 PF09452 Mvb12 ESCRT-I subunit Mvb12 Wood V, Bateman A anon Wood V Domain The endosomal sorting complex required for transport (ESCRT) complexes play a critical role in receptor down-regulation and retroviral budding. A new component of the ESCRT-I complex was identified [1], multivesicular body sorting factor of 12 kD (Mvb12), which binds to the coiled-coil domain of the ESCRT-I subunit vacuolar protein sorting 23 (Vps23) [1]. 25.00 25.00 81.30 81.10 21.80 17.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.03 0.72 -3.81 4 24 2009-01-15 18:05:59 2007-02-15 13:39:10 5 1 23 1 15 20 0 91.10 53 88.18 CHANGED hLR+IPLYNtatss..aP+cR.sKlplPthpl.shssTt-hLpsWhcECccIhcssp.+cppscpF-pWYpEpYLupKPPGllps....shLuPpRK .lLR+IPLYNKYGc-..FP.p.EslsRhphPEaKLPsLQ.PTc-hLsPWYEECDsIs+s.CphHDuSsKcFDpWYcEpYLSKKPPGllss....slLSPSRK........... 0 1 6 12 +9285 PF09453 HIRA_B HIRA B motif Coggill P anon pdb_2i32 Motif The HirA B (Histone regulatory homologue A binding) motif is the essential binding interface between HIRA Pfam:PF07569 and ASF1a, of approx. 40 residues. It forms an antiparallel beta-hairpin that binds perpendicular to the strands of the beta-sandwich of ASF1a N-terminal core domain, via beta-sheet, salt bridge and van der Waals interactions [1]. The two histone chaperone proteins, HIRA and ASF1a, form a heterodimer with histones H3 and H4. HIRA is the human orthologue of Hir proteins known to silence histone gene expression and create transcriptionally silent heterochromatin in yeast, flies, plants and humans. The yeast CAF1B proteins which bind H3 also carry this motif at their very C-terminus. 21.00 21.00 21.10 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.47 0.72 -6.80 0.72 -4.30 30 239 2009-01-15 18:05:59 2007-02-15 14:15:42 5 22 194 2 167 262 0 23.40 48 2.46 CHANGED pQpsThTKsGKKRlAPhLlSouss .....KQp.ThTKDGK+RIsPlhlupss.... 0 36 73 131 +9286 PF09454 Vps23_core Vps23 core domain Coggill P anon pdb_2caz Domain ESCRT complexes form the main machinery driving protein sorting from endosomes to lysosomes. The core domain of the Vps23 subunit of the heterotrimeric ESCRT-I complex is a helical hairpin sandwiched in a fan-like formation between two other helical hairpins from Vps28 (Pfam:PF03997) and Vps37. Vps23 gives ESCRT-I complex its stability [1]. 20.60 20.60 21.10 20.70 20.20 20.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.55 0.72 -4.36 19 362 2009-01-15 18:05:59 2007-02-15 14:37:13 5 6 269 7 223 353 3 62.60 44 14.79 CHANGED -phltstsslhpQlhcLlAc-pAl-DsIhhLscuLcpGpIsl-palKplRtLuR-QFhtRhhhpK .........D-lllsssslh+QllsLhAEEpAI-DsIahLucAL..c..+....G...s.....I....s...l.-saLKplR.LuRcQFhhRAhhpK................... 0 76 126 181 +9287 PF09455 Cas_DxTHG CRISPR-associated (Cas) DxTHG family Coggill P anon pdb_2i71 Family CRISPR is a term for Clustered Regularly Interspaced Short Palidromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR associated) proteins. The family describes Cas proteins of about 400 residues that include the motif [VIL]-D-x-[ST]-H-[GS]. The CRISPR and associated proteins are thought to be involved in the evolution of host resistance. The exact molecular function of this family is currently unknown. 22.30 22.30 22.30 22.80 21.40 21.20 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.31 0.70 -5.31 44 268 2009-01-15 18:05:59 2007-02-15 14:47:23 5 6 191 2 136 269 9 350.30 14 81.01 CHANGED hllohlGss.............pappspYphsspp...............hco....thsttulhchhp......sschllhssss.....................................t...sshptlpptlpsphtp........................pslhlP..............................cGpsps.hhp...lahpl.hcp..l......pcss.clllDlTHGhphhP.hlshhuhphhttlt.......................tsphcslhhuhapsps...............hsshh-lsthhplhchhhsspphhphsssp.lsph..hppt...h.......................tpthpphhphlpslspul.lshshph................thhpplhphlppthphtt.........................hhth.hh.hlpplhpphphhstpp....................lcphhclhc.hahcpt.h........hpuhhltpE...............hpphpptpt+pphppth............................................pp....hhphhsphtphRNhlAHuGhp ................................................................................................................................hlshhGts................tap.spYhh..tspp.....................ps........thht.slhchhp.........schllhhspp...............................................................pphpthhptl.pphh.p..................................................................phh.lP........................................pupspp.hhp......lhppl.hcp...l.........ppss.clhl..DlTHGh+.hP.hlsh..hshphhphht.......................th.p.h.ptl..hauhhpsps.............................t.sslh-ls.sh....h.pl.hchh.uhptahph.sp.s..p.ltph.......htt..............................t.hpth......hp....hlpphppsl.h..hhhth...................p.hpphhp.lpph.p..t.....................................................hh.hhpph...pph.t...ht.tp....................htp..hphhc.hhhpt.t..h.........puh..hlhp.....E....hhh...t.........h.ph.pht.tpt.ht..h.....................................................................................t.....................ht....ph.p.hRN.hsHsuh.................................................................................................................................................................. 0 69 89 110 +9288 PF09456 RcsC RcsC Alpha-Beta-Loop (ABL) Coggill P anon pdb_2ayy Domain This domain is found in the C-terminus of the phospho-relay kinase RcsC between Pfam:PF00512 and Pfam:PF00072, and forms a discrete alpha/beta/loop structure [1]. 25.00 25.00 37.40 35.80 19.70 18.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.95 0.72 -4.34 12 546 2009-01-15 18:05:59 2007-02-20 16:07:56 5 4 538 2 41 289 1 91.90 68 9.90 CHANGED pCWLAlRNspLppaLpslLutpGlpltcYp.sppsss-DllIoD.ssphshtspAhIphspcHIG.spEppPG.WhpSsusspEL.sLLs+Ia ......pCWLAVRNASLCQFLETSLpRSG.lsVspYE.GQEP..sPEDVLITD-sls+cWQGRAsVsFCRRHIGIPLE+APGEWVHSVAuPHELPALLARIY.. 0 1 9 24 +9289 PF09457 RBD-FIP FIP domain Coggill P anon pdb_2d7c Motif The FIP domain is the Rab11-binding domain (RBD) at the C-terminus of a family of Rab11-interacting proteins (FIPs). The Rab proteins constitute the largest family of small GTPases (>60 members in mammals). Among them Rab11 is a well characterised regulator of endocytic and recycling pathways. Rab11 associates with a broad range of post-Golgi organelles, including recycling endosomes [1]. 23.00 23.00 25.30 23.60 22.80 22.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.19 0.72 -4.16 19 430 2009-01-15 18:05:59 2007-02-21 13:42:40 5 10 90 10 225 398 0 44.20 41 7.00 CHANGED o+-ELhphltcpEcpht+.......Lc-YIDsLLlplMEcsPsILcss .................o+-ELhptl..hcpEc..t+.......LccYIDplLlplhEpsPsILcl.... 0 40 60 129 +9290 PF09458 H_lectin H-type lectin domain Coggill P anon pdb_2ccv Domain The H-type lectin domain is a unit of six beta chains, combined into a homo-hexamer. It is involved in self/non-self recognition of cells, through binding with carbohydrates [1]. It is sometimes found in association with the F5_F8_type_C domain Pfam:PF00754. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.00 0.72 -4.32 87 326 2009-01-15 18:05:59 2007-02-21 14:07:37 5 22 127 25 229 344 33 71.10 20 18.43 CHANGED pphppslpFs.psFpp..hPpVhlulphhD..hsps........psh..phplpspslTtpuFs.lphpshsss.plhplphsahAlss ..................hplsFs.psF..ps.....sPtVhlulshhD...hsps........tsh..phplpspslTps........u..Fs.lphp...s..hsss..tl..hph..p..hsahAh..t....................... 0 170 204 223 +9291 PF09459 EB_dh Ethylbenzene dehydrogenase Coggill P anon pdb_2ivf Domain Eythylbenzene dehydrogenase is a heterotrimer of three subunits that catalyses the anaerobic degradation of hydrocarbons. The alpha subunit contains the catalytic centre as a Molybdenum cofactor-complex. This removes an electron-pair from the hydrocarbon and passes it along an electron transport system involving iron-sulphur complexes held in the beta subunit and a Haem b molecule contained in the gamma subunit. The electron-pair is then subsequently passed to an as yet unknown receiver [1]. The enzyme is found in a variety of different bacteria. 26.20 26.20 26.30 27.10 24.20 26.10 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -12.05 0.70 -4.94 49 210 2009-09-13 05:32:36 2007-02-21 14:20:32 5 12 144 1 98 213 46 238.40 19 66.68 CHANGED sssWsp....ssssplsl....hss.ssh.........................................................tsstpslpVpAsasucpl.ahhhpWps.........................................................................................stsh......................t.............tss.apD+lAlha..s..............s.tss.h.s..s........hpshssss.t.....................hhpahsss..................................sthl-l..WpW+u..t.........t.................t....s...........................................................ph........................tt.sht...s.p.p............................stsslpssu....a..psG....pWsVhhsRsLpss...s...-sshps.GpshshuhAla...-sspsp+stphu .............................................................Wpph..hplsL.....hss...h.........................................................pthtt.lt.VpAsa....sup....pl.hhhhpWts...................................................................................................................................stsht..........................t.....................t.t.h.DphAlha..s.t...................sh.h.s..s..........s.hsth..h........................hhsss................................sthssl.ah..Wpu...................................................................s........................................................................................................t...pst..............................ts.t.htstu.....ph..tsG.....hWpl.hsRsLts.....s...t...psphp..Gt.h.huhAha...psp.hpp.hph............................................... 0 31 74 89 +9292 PF09460 Saf-Nte_pilin Saf-pilin pilus formation protein Coggill P anon pdb_2co1 Domain This domain consists of the adjacent Saf-Nte and Saf-pilin chains of the pilus-forming complex. Pilus assembly in Gram-negative bacteria involves a Donor-strand exchange mechanism between the C- and the N-termini of this domain. The C-terminal subunit forms an incomplete Ig-fold which is then complemented by the 10-18 residue N-terminus of another, incoming, pilus subunit which is not involved in the Ig-fold. The N-terminus sequences contain a motif of alternating hydrophobic residues that occupy the P2 to P5 binding pockets in the groove of the first pilus subunit [1]. 25.00 25.00 25.10 25.00 24.20 24.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.75 0.71 -4.41 4 84 2009-01-15 18:05:59 2007-02-21 14:30:22 5 1 81 19 1 51 0 138.30 60 90.19 CHANGED GShlPNoEQppSVDlsFuuPppLolohsPVuGLhAG.ptustpIApLsVsSsohKpaulpG..hussVlsssGssWplsGKNoGptIpVsFuusshuppsus.paNG+pWhsaDhNDpLslhLsG.uQNVsADTYPlTlclsuYQs .........GSFLPNTEQcKSVDISFAAPEcLTlSL-QsP.GLhAGKsKPsT.slAKLTVNSTS.IKEFGVR...G..VSsohl..NshGShWsITGKN.S.GsoluVGFS.......SpoLusS+SspsWNGl+WaTFDsN....DPVsIsLlt...DQNIPPDTYPlTVDVVGYQP...... 0 0 0 1 +9293 PF09461 PcF Phytotoxin PcF protein Coggill P anon pdb_2bic Domain PcF is a 52 residue protein factor of two alpha helices, containing a 4-hydroxyproline and three cysteine bridges. The presence of the hydroxyproline is unique in relation to other fungal phytotoxic proteins. The protein has a high content of acidic side-chains implying a lack of binding with lipid-rich components of membranes and appears to be an extracellular phytotoxin that causes leaf necrosis in strawberries. 25.00 25.00 25.10 33.10 24.40 24.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.53 0.72 -4.06 5 31 2009-01-15 18:05:59 2007-02-22 13:45:16 5 2 5 1 11 32 0 44.60 56 36.26 CHANGED QQLC+.AsGCAYEYScANtVVSKCC+AINs-PlAFHDCCpcSCNoG QQLCp...AsGCA.Y.cYScANpslSKCC+sI.NscPsAFa-CCucSCNoG.. 0 1 10 11 +9294 PF09462 Mus7 Mus7/MMS22 family Wood V, Mistry J, Bateman A anon Wood V Family This family includes a conserved region from the Mus7 protein [1]. Mus7 is involved in the repair of replication-associated DNA damage in the fission yeast Schizosaccharomyces pombe. Mus7 functions in the same pathway as Mus81, a subunit of the Mus81-Eme1 structure-specific endonuclease, which has been implicated in the repair of the replication-associated DNA damage [1]. The MMS22 proteins are involved in repairing double-stranded DNA breaks created by the cleavage reaction of topoisomerase II [2]. 21.10 21.10 21.20 22.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 614 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.13 0.70 -12.86 0.70 -5.68 17 141 2009-01-15 18:05:59 2007-02-22 14:11:52 5 5 120 0 111 150 1 490.60 19 29.02 CHANGED hEthW.plhphhs......lsplsthshhhhssp.....tssWshlp.hhp.hh...p.t.t.........shspYtcs.....hhtRChhLhptasW..c.sp.llhp..LachFsppphhchcpEp.hppPt....h.lccLspsPohthpstp..hFchal+lLh.ulp.L...........tspcpl+shs.RlhPspshpYs+ppsl..tphs.LhN+asLLhsLhahsssshc.+lpplpp..lpstpuchcs.....phIslps.spLshhploppcchpsht.hsthhs.hlsphlcpah.hpt..p...........................hhpsFppphhcphhspspcplpshLphslpuhpshl............ptusshppsphllsps.lptlhph.p....pls....pcslphIpsaL..............tt......................tt..t...........t........................................................phhphlcplhp.slp..p.hsp.htts.s.-t.h............................................lhshlcsasphAphLVcpshp.hWs....hs...................astpsahtlp.sstpccas.hFhsphlphDstthp..chcpplhuhhhpsLlpphshlt.....pLhphlhsh.pps.hhp.hsh..t.....hplshsshpppcLsllosllpsh+...........................t.p.t+ppapchlpphhpsh+ssYtphtt........ut....ttahsFlp+llshl .............................................EthWpthhthhs......h.phs.h.shhh.s.+............spWthhp.hht.h...h...t..s..t..........hspYhcs.....hhtpChhLhptasW....phsp.ll.h..lach....Fspp.phtshppEtshtp.P........................h.lpplstps.shthp....stp..sFchaL+llh.shp.h...............ptcplpshshRhlPspsh........pa.......s.cppsl..pphshLhN+asLLh..sLhhhss..s.s.hc.p..lptlpph..lp.tpucpcs.....p.lslcs.tpLshhtls...pt..c...sh.......ht.hhthhsphhpphlppa..htt..t.............................p.h.pphhp...h.ht..ppth.phl...h.s.hthh............ptt.....s...hhh......httlht.........th.........shthl.tah.......................................................................................................................................hpphht..........p.............t................................................h.thhp.a.thuthhlppthh..hs.....s....................hs..sh..ht.p....ppcah.hFh..hhp.ss...hp...php..phhphhhtslht.t............thhthl.p..................................htht.....ppphslhp.hhpshp................................................tt...ta.tphlp.hhtthp...tpht.h.....................t..........h.tFhtthlt.............................................................................................................................................................................................................................................................. 0 27 60 98 +9295 PF09463 Opy2 Opy2 protein Wood V, Finn RD, Bateman A anon Weod V Family Opy2p acts as a membrane anchor in the HOG signalling pathway [1]. 21.10 21.10 21.60 21.10 20.70 20.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.06 0.72 -3.86 15 95 2009-01-15 18:05:59 2007-02-22 14:50:21 5 2 92 0 63 92 0 35.20 46 8.07 CHANGED CVs.Csosss.oCPsCscGcpCshsStTCspCspThC .CVs.Css.sss.sCP..s..CssuEhCsho.uhoCspCssshC 0 11 29 51 +9297 PF09465 LBR_tudor Lamin-B receptor of TUDOR domain Coggill P anon pdb_2dig Domain The Lamin-B receptor, found on the TUDOR domain Pfam:PF00567, is a chromatin and lamin binding protein in the inner nuclear membrane. It is one of the integral inner Nuclear Envelope membrane proteins responsible for targeting nuclear membranes to chromatin, being a downstream effector of Ran, a small Ras-like nuclear GTPase which regulates NE assembly. Lamin-B receptor interacts with Importin beta, a Ran-binding protein, thereby directly contributing to the fusion of membrane vesicles and the formation of the NE [1]. 20.70 20.70 20.70 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.73 0.72 -4.34 5 85 2009-01-15 18:05:59 2007-02-23 17:27:58 5 10 51 2 45 73 0 53.10 54 9.68 CHANGED MPupKYpcGElVMGRWPGSsLYYEVcVlSFDsKSQLYTVlYKDGTELELKESDIK .........MPs.tKascGElVhGRWPGSuLYYEVcllSaDspopLYTVpYKD.GTE..LELKEsDIK........... 0 8 13 25 +9298 PF09466 Yqai Hypothetical protein Yqai Coggill P anon pdb_2dsm Domain This hypothetical protein is expressed in bacteria, particularly Bacillus subtilis. It forms a homo-dimer, with each monomer containing an alpha helix and four beta strands. 24.80 24.80 25.70 26.00 22.10 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.35 0.72 -3.97 2 32 2009-01-15 18:05:59 2007-02-23 18:12:33 5 1 25 2 4 31 0 63.30 35 82.67 CHANGED MlENPMVhpNh......pc.s.hDh.I-.hhGsElhPsD-all.ssGEllLRENl.cYhhpQLGFEhKsAt. .............t....................p...pEs-h.h-DhaGsElhssDcYll..ss.GEllLc-NLpcYLhEphGhchp.............. 0 1 3 3 +9299 PF09467 Yopt Hypothetical protein Yopt Coggill P anon pdb_2dlb Domain This hypothetical protein is expressed in bacteria, particularly Bacillus subtilis. It forms homo-dimers, with each monomer consisting of one alpha helix and three beta strands. 25.50 25.50 25.50 46.40 22.20 25.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.42 0.72 -4.13 2 7 2009-01-15 18:05:59 2007-02-23 18:13:57 5 1 7 2 2 5 0 70.40 65 95.73 CHANGED AGYLNNIALNLEIVLKNKADSPEVSETLVTRICENLLLSKEVSFLKADGSVENFKLSDMEYEITNTEELPE ..AGYLNNluLpLEIVLKNKAcs.EVSpoLspRlCEpLhlu+EVsFLpADGoVEpFKLsDhEYEIoNTEEl.... 0 1 1 1 +9300 PF09468 RNase_H2-Ydr279 Ydr279p protein family (RNase H2 complex component) Wood V, Chahwan C, Bateman A anon Wood V Family RNases H are enzymes that specifically hydrolyse RNA when annealed to a complementary DNA and are present in all living organisms. In yeast RNase H2 is composed of a complex of three proteins (Rnh2Ap, Ydr279p and Ylr154p), this family represents the homologues of Ydr279p [1]. It is not known whether non yeast proteins in this family fulfil the same function. 23.60 23.60 23.90 23.90 23.20 23.50 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.05 0.70 -5.22 42 345 2009-01-15 18:05:59 2007-02-26 08:50:54 5 5 278 16 229 334 0 283.10 21 86.68 CHANGED s..s+lh.....lLPpssssst......hphlpLssPp.sspsstahh.....ppsplaElpthpt........................s.+Shhls.............................stlhpsuplhhsoshDslFlllshLhptt........................pspFhsl--ll......Dphhs...........thpphh.h.........hshlcpplptls-sh...............Esusc..pha+hsppKllpaLttKspphsph...l.pshp.phhpph...................................................................................th.pphhp.thhphuhshlsp.h.pphhpplhpt.hh............-ht.hpthhtt.................hppc+th.psh-uthsps...........pppppp.pttpKpsp...opst+....shtphsssGhp..plsuFF ...............................................t...hh.....lhs..p..t.t.........hphlpL....pPp..pu........psshalh.............p.pps..laElpthpt.........................th+SWhls.....................................................s.VtpsupLhhsTPhDslFllLs...hLhpsp................................ptpFhsl--ll.........Dp.hs............................thpplh.....................hthhcptlppls-sh....................................-.h..usp.....pha+hscpKhlpaLtpKspphs......ps....L.ts.p......phst.t...........................................................................................t..tp...p..hl..ph...Ahshlsphh.tp.....hh.ppl.hp.h....................t.t....t...................................p.p..........ptt...phs....................tp.tt...tt.ttpptt.......stt.c....th.t.ph.stpG..ht..plssFF.......................................................................................... 0 78 128 191 +9301 PF09469 Cobl Cordon-bleu ubiquitin-like domain Coggill P, Bateman A anon pdb_2daj Domain The Cordon-bleu protein domain is highly conserved among vertebrates. The sequence contains three repeated lysine, arginine, and proline-rich regions, the KKRAP motif. The exact function of the protein is unknown but it is thought to be involved in mid-brain neural tube closure. It is expressed specifically in the node [1]. This domain has a ubiquitin-like fold. 25.00 25.00 27.70 26.80 23.30 18.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.68 0.72 -4.01 4 145 2012-10-03 10:59:06 2007-02-26 12:46:05 5 5 39 1 57 123 0 79.10 57 8.93 CHANGED PsICpKCEFsPtHVlLL+DshupEEL-LsKSLs-LGIKELYAaDsp.............+EopphSSstss.o-KEKKuFLGFF+hsK+pps ......PlICuKCEhsPtHslLL+D..utE.L-LoKSLN-LGl+ELYAhDsp....................REohp.hS.....p...ss...p-KEpKtFhuFFptsK+pp.p....... 0 2 6 22 +9302 PF09470 Telethonin Telethonin protein Coggill P anon pdb_2f8v Domain Telethonin is a 167-residue protein which complexes with the large muscle protein, titin. The very N-terminus of titin, composed of two immunoglobulin-like (Ig) domains, referred to as Z1 and Z2, interacts with the N-terminal region (residues 1-53) of telethonin, mediating the antiparallel assembly of two Z1Z2 domains. The C-terminus of the telethonin appears to induce dimerisation of this 2:1 titin/telethonin structure which thus forms a complex necessary for myofibril assembly and maintenance of the intact Z-disk of skeletal and cardiac muscles [1]. 25.00 25.00 41.90 41.80 19.00 17.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.02 0.71 -4.79 3 40 2009-09-11 00:10:22 2007-02-26 13:39:14 5 1 34 3 25 38 0 162.40 66 97.71 CHANGED MATSELSCpVSEENCERREAFWAEWKDLTLSTRPEEGCSLHEEDTQRHETYHRQGQCQALVQRSPWLVMRMGILGRGLQEYQLPYQRVLPLPIFTPAKVGATKEEREETPIQLQELLALETALGGQCVDRQDVAEITKQLPPVVPVSKPGsLRRSLSRSMSQEAQRG .......MAToELSCpVSEENpERREAFWAEWKDLTLSTRPEE..........GCSLHEEDopR+ETYHpQGQCQuLVQR.SPWLVMRMGILGRGLQEYQ.L.PYQRVLPLPIFTPs.KlGssKE.ERE-TPlQLpELLALETAL....G.....GQClDRQ-VAEITKQLPPVV.PV....S.....K.....P.GsLRRoLSRSMSQEAQRG................. 0 1 5 10 +9303 PF09471 Peptidase_M64 IgA Peptidase M64 Mistry J anon Rawlings N Family This is a family of highly selective metallo-endopeptidases. The primary structure of the Clostridium ramosum IgA proteinase shows no significant overall similarity to any other known metallo-endopeptidase [1]. 20.10 20.10 20.10 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.58 0.70 -4.76 14 251 2012-10-03 04:41:15 2007-05-02 11:06:12 5 20 205 8 66 360 113 241.70 28 47.19 CHANGED apso--upcsstshppohphPssstssplshhph.tstpph.p.hphllc...P...c-.hhspstssssss.....lh.lhcoGs.spp+lDlllhG-GYTssEhscFhpDAp+hhcslFu.ppPa+pa+scFNVWulsssSp-S.........GV..upPpsshh+cohlGupassas..ERhltsscs+shppss...........u...hsPY-hl.......hllsNscpYGGuGhhsh........auslsucss.hushlhlHEhGHuhuuLADEYhsusssh.s......EPac.NlTs..sspth....KW .............................................................................................................................t..h..........ts.hhp.t.t.tp........hhhlhpsus.sscplclsllu-GY..Tt....s..E...h...s...p...Fhp.D..A..ppth.-.sLF......u...tE.Pa+oh+stFNlhAVtssSp-S...........................Gl.......s.s......t.....p....s......h.h.....+..sTul.s..opF....s....s.ah....s...-R.h...Lss...s..phcshc.ssh............s......sssa.-.tl...............llLsNo.s...p...YG.....GuGhhss......................ashs.o.s..pps....h....hp.VllHE.h.G.HoFuGLuDEYhh.spss.h.th............thEPhc.Nlosh....s.s.t......KW............................................. 1 30 54 64 +9304 PF09472 MtrF Tetrahydromethanopterin S-methyltransferase, F subunit (MtrF) TIGRFAMs, Coggill P anon TIGRFAMs Family Many archaea have evolved energy-yielding pathways marked by one-carbon biochemistry featuring novel cofactors and enzymes. This domain is mostly found in MtrF, where it covers the entire length of the protein. This polypeptide is one of eight subunits of the N5-methyltetrahydromethanopterin: coenzyme M methyltransferase complex found in methanogenic archaea. This is a membrane-associated enzyme complex that uses methyl-transfer reactions to drive a sodium-ion pump. MtrF itself is involved in the transfer of the methyl group from N5-methyltetrahydromethanopterin to coenzyme M. Subsequently, methane is produced by two-electron reduction of the methyl moiety in methyl-coenzyme M by another enzyme, methyl-coenzyme M reductase. In some organisms this domain is found at the C terminal region of what appears to be a fusion of the MtrA and MtrF proteins. The function of these proteins is unknown, though it is likely that they are involved in C1 metabolism. 21.00 21.00 21.40 21.30 20.50 19.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.77 0.72 -4.73 9 59 2009-01-15 18:05:59 2007-05-02 16:24:54 5 2 54 0 38 58 10 63.30 32 52.29 CHANGED slshsscPphsuIpshV-sl+Y+uQLlARspKLsSGltusshhGhslGhlhAllhlllP.hlhh .............l.hsspPphsuIcshlEslcY+spLluRct+LsSGltssslhGhshGhlhAllLlslslhlh.h.. 0 8 24 32 +9306 PF09474 Type_III_YscX type_III_YscX; Type III secretion system YscX (type_III_YscX) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are encoded within bacterial type III secretion gene clusters. Among all species with type III secretion, those with this protein are found among those that target animal rather than plant cells. The member of this family in Yersinia was shown by mutation to be required for type III secretion of Yops effector proteins and therefore is believed to be part of the secretion machinery. 20.50 20.50 21.20 35.80 20.00 19.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.50 0.71 -4.09 6 77 2009-01-15 18:05:59 2007-05-02 16:41:22 5 1 74 0 10 33 0 121.50 53 99.75 CHANGED MSR.IoAhHIGIEpLotlShEElssuLPcRatLLPDGQuVETHlp+LYPtp.u-QtLhsaApPphsFHsLLRPcDaRQthcs....LppLLppssospLptAusLLpppppD-RLLQMALNLLHKV .........MSR.loAhHIGIEpLotlShE-lsssLP-RatLhPDGpsVEsHLc+LYPttpocptLhDaApPphsFHsLLRPpDaRpphcs....LppLLspu.pSsp......LpAAAsLLpspQpD-RLLQhsLNLLHKV........................ 2 2 4 5 +9307 PF09475 Dot_icm_IcmQ dot_icm_IcmQ; Dot/Icm secretion system protein (dot_icm_IcmQ) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are the IcmQ component of Dot/Icm secretion systems, as found in the obligate intracellular pathogens Legionella pneumophila and Coxiella burnetii. While this system resembles type IV secretion systems and has been called a form of type IV, the literature now seems to favour calling this the Dot/Icm system. This protein was shown to be essential for translocation. 25.00 25.00 27.00 140.90 18.10 16.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.10 0.71 -5.04 21 62 2009-01-15 18:05:59 2007-05-02 16:43:40 5 1 51 3 3 55 3 178.20 57 91.04 CHANGED AILKALD-AIEpGPWEESNFLRVIGKNL+EIR-sFuspluss..sptps+ptoph.AsRlALRoGQQElFIuLYSo-GsNlQoWERIlANLPRQMISRPIYAsEpDV+slIKoKENKlNEAYVAIYIsQsDILslssDKsPhDKhGKPLLoLKD+ulsLENIsRFVHhSGlY+Ys+GR...LlKs ..ILKALs-AIcpGPW-cSNFLRVIGKpLhtIRDpFhcpluus..sps..cl+.s-.upL.ANRlALRSGQQElaVSLYSSDGoNLQSWE+IlusLPRQMISRPIYA-E-DlKsllKoKENK.NEAYVAIYIsQoDILplosDKsPsDKLG+PLLTLKD+oIsLENIoRFVHlSGVY+YspGRLIKp....... 0 2 2 3 +9308 PF09476 Pilus_CpaD pilus_cpaD; Pilus_cpaD; Pilus biogenesis CpaD protein (pilus_cpaD) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry consist of a pilus biogenesis protein, CpaD, from Caulobacter, and homologues in other bacteria, including three in the root nodule bacterium Bradyrhizobium japonicum. The molecular function of the homologues is not known. 30.10 30.10 30.50 30.80 29.30 30.00 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.32 0.71 -4.84 38 190 2009-01-15 18:05:59 2007-05-02 16:43:56 5 2 168 0 79 185 16 181.00 27 84.57 CHANGED hllshshsLuuCsssss.......t..spshcppHPIsVpcsspslsl.lssstsuLossp+spltuahppatppussslhlpsPu.ss.st.s..Assshp-lpphLtptGlssspltstshtsssspssuslRlsas+hsA.psssCGtWscshs.sh.....hcNpsh.NaGCAspsNLAAMVANPpDLlpsRshosssssR.Rspsl...-pYRpu ............................h..hhhshhLuuCstt.............s..shptp+slslppsppslpl.lt.ttttLsssppspltshhtphhp..t..usshlhl.h....Ps.st.....t..utpshtpltphlsttGlsspplhhts.....st........s.t........ssu......s.lRlsa.phpA.hs.s.sCu.thspc..hs.ss..........hpNcshhNFGCAspsNLAAMlAsPtDLltPRsh.sssDusp.pssslcpYRp............... 0 21 40 55 +9309 PF09477 Type_III_YscG type_III_yscG; Type_III_yscG; Bacterial type II secretion system chaperone protein (type_III_yscG) TIGRFAMs, Coggill P anon TIGRFAMs Family YscG is a molecular chaperone for YscE, where both are part of the type III secretion system that in Yersinia is designated Ysc (Yersinia secretion). The secretion system delivers effector proteins, designated Yops (Yersinia outer proteins), in Yersinia. This entry consists of YscG from Yersinia and functionally equivalent type III secretion proteins in other species: e.g. AscG in Aeromonas and LscG in Photorhabdus luminescens. 26.10 26.10 26.30 44.40 25.90 26.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.45 0.71 -4.01 7 84 2009-01-15 18:05:59 2007-05-03 10:10:21 5 1 65 4 9 55 1 110.50 55 99.54 CHANGED MphpL+t.LAElALhGoGpHpHpEAssIA-WLtppsp.pEsVsLIRlSSLMNpGcYppA..L..stshshPDLEPWhALCEa+LGLtutL-pRLstLuuSusPplpsFssGhRpQlps MchcLpp.LAElALhGTGpHCHpEAssIA-WLcttuc..-EsVpLIRlSSLMNRGcYppA..Lt.utppsaPDLEPWhALCEa+LGLtuAL-sRLttLutpts.t.tsFssuhpppl+s............... 0 1 3 4 +9310 PF09478 CBM49 Carbohydrate binding domain CBM49 Mistry J, Urbanowicz B anon Pfam-B_6310 (release 21.0) Domain This domain is found at the C terminal of cellulases and in vitro binding studies have shown it to binds to crystalline cellulose [1]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.82 0.72 -3.88 28 310 2012-10-01 21:34:18 2007-05-03 10:13:48 5 20 40 0 245 346 0 82.10 25 20.10 CHANGED lslpQplTuoWhsssp.sY.hpassslsNpu...spslpslplpls.pl.....ssIWGlsp..usssYshPuWls...oLssGpohsFsYIp....s ..............................tlppphhsoWh...p...s...sp...sa.hpaslplpNpu...spslps....l......plshs...slt............sslWs..ls......p.....ss.....s.......t..........a....s....L...P..u..aht....sls.s.Gp.oa.s.FGYI..ps................. 1 165 230 238 +9311 PF09479 Flg_new flg_new; Listeria-Bacteroides repeat domain (List_Bact_rpt) TIGRFAMs, Coggill P anon TIGRFAMs Family This model describes a conserved core region of about 43 residues, which occurs in at least two families of tandem repeats. These include 78-residue repeats which occur from 2 to 15 times in some proteins of Bacteroides forsythus ATCC 43037, and 70-residue repeats found in families of internalins of Listeria species. Single copies are found in proteins of Fibrobacter succinogenes, Geobacter sulfurreducens, and a few other bacteria. 20.70 14.00 20.70 14.00 20.60 13.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -8.25 0.72 -3.92 74 3687 2009-01-15 18:05:59 2007-05-03 10:35:46 5 395 401 4 433 3605 541 42.10 41 10.26 CHANGED PT..+...sG.YoFsG.WYssp.ssGspasas..ssh...spslTLYApastss .............Ps...K....pG..YTFpG.W...Y.....s..s....p....ss........G.......s....c......a....sFu............Tshh.........ssss....l....T........LYApashs.s........................... 0 247 380 413 +9312 PF09480 PrgH Type III secretion system protein PrgH-EprH (PrgH) TIGRFAMs, Coggill P anon TIGRFAMs Family In Salmonella, the gene encoding this protein is part of a four-gene operon PrgHIJK, while in other organisms it is found in type III secretion operons. PrgH has been shown to be required for type III secretion and is a structural component of the needle complex, which is the core component of type III secretion systems. 20.40 20.40 20.40 21.90 20.20 20.30 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.93 0.70 -5.60 8 407 2009-01-15 18:05:59 2007-05-03 11:43:20 5 1 334 38 14 205 0 296.60 47 95.75 CHANGED hhlRlLsGPLpGsEahL.sGpsLhllGpssuhspsupss-........hPtsTlalPhspGssNFclhls..............tssschhLchLs-pssppc.lsaNpshpsGslthsl+.tsEsWpsp...................ssht.susspspsRhtsuhhsslsshhhLuslhshtlhhhsoppc.QltpLsslLssssppaplLsGcDGplYlLAsopRDusWu+QuLh+sphspsVplls.ssEppRIppaLscphPtLsha+LcLscPppPhLh.LS+pRsuhspsthc+L.ttLhshhPYAcslslsshsDssltppAcpGLschulsYcchscssuVTFlIpuuLsDsEltclppal-sah+pWGupYVpFuIpL+-DaL+GKSaphGssGYVhlssuHWYF ..............................................................................lGpssuLstSsths-..........lPtsohhlPhschsssFEI.ls.................................sssh.lhh+.ELpttpsEs...RolpLNp.IpVhtLhhhl+.csEsW..p...................PthlEss...hhpsc.+Fhsuhl.....hth.s..hh..FhLhhhhh.hthhhhsusp........c..phtplsplLstppptaplL.GcD.p.h.lYl.sp..c-slWs+QsLt+sp.sKssRVIN.s-EshR.I.oWLss.aPpLtYa+Lph.-spp.hhh.lS+Q...RNshopp.l-sL.ptLhthMPYAsslsIslhDDsshtuQA.tsLpt.uLsYp+hpptssVhFsI.GsLsDsEl.+hppaVDpYY+pWGtpYVpFslpLKsp.hpspSFpYGspt.a.K.psupWhF.................................. 0 3 6 9 +9313 PF09481 CRISPR_Cse1 CRISPR_cse1; CRISPR-associated protein Cse1 (CRISPR_cse1) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry, represented by CT1972 from Chlorobaculum tepidum, is found in the CRISPR/Cas subtype Ecoli regions of many bacteria (most of which are mesophiles), and not in Archaea. It is designated Cse1. 21.90 21.90 22.30 22.30 21.00 21.80 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.69 0.70 -5.74 55 645 2009-01-15 18:05:59 2007-05-03 12:04:30 5 2 613 4 118 467 12 456.40 33 89.10 CHANGED NLlc-PWIPVhht.....sGs..pt.plulpplh.....pssclhslshspPshpsAlhc..lLlul...lpss...............................tsP....cs.tc..cWtchapp...........shssctlpsaLpc..apcpFpLhssp...PFhQssth............pscstslsp...............Llh-h...suusshhhh.sahscpuhsph...ssupAAhsLlshpsausu...Gh+suhp...............GuGPlssls.sh...........tGpsLacTLhLNllstp..............thssss............hssWpts.spsspptspthh.........sshhphahhhsR+lcLh....spsst.............spsshhstGh...shsssh............h.t.cPhssa+............hppppshhshc...hpsuphhWRshsuLlhspssss....ptt........stllchhtphttpth..................................hplhshGhchDs.pA.ph.hphptshlslssth.........pppthtpplpphlptAcphtptLcstlpphhtst..............ps-hshhst.............tpaWppsEstFhp.hlpslst...........ttsppttttWpppl....pphuhpha-p .........................................LLssPWlPVRhc...................DGs........ss.clu.h-L.......ss.cslhc.lAusRsDhp.sAsap..hLLulLQsu..............................................huP.....cc..h.....cpW.c-lWpc...........ulps-tlcchLts..h..c.ctFphss-s..ssFMQsh.................p.hpscp......sslus......................LLs-h......PGupssch...-ah..hc+Gssch.....hsspsuAlALaohQh.A.Puh.GtGh+o.GlR.....................GGGPhTTLltl............pt.s.s.Lac..pL....WLNVhsp-........tthshstphcst............lhPWhss...s...csuchsusshs..........ttsshlpsYashPRRIRlc.....tphpsGsss............................................hsstpsstLlshhhhcsaussa............hW.p.HPhTPaR...............h.hKpss....tahsl+....spPGuhhWRcahuLltsspscs..........spp...................AtVlchhsshsh.ppht....................................lsLhuhG.h-h-s.hcu.psa..h-c.chslL...............tpcshhsplctssp....sA.s.c.l.h.....s.h.L...Rs.Al+-shhus....................pss+uDhuhlch........................cFWspopt...pFhp.llcplcps...........pcsc.-.hlspWp+pl....hhhspp.F-............................................................................................. 0 34 85 102 +9314 PF09482 OrgA_MxiK Bacterial type III secretion apparatus protein (OrgA_MxiK) TIGRFAMs, Coggill P anon TIGRFAMs Family This protein is encoded by genes which are found in type III secretion operons, and has been shown to be essential for the invasion phenotype in Salmonella and a component of the secretion apparatus. The protein is known as OrgA in Salmonella due to its oxygen-dependent expression pattern in which low-oxygen levels up-regulate the gene. In Shigella the gene is called MxiK and has been shown to be essential for the proper assembly of the needle complex, which is the core component of type III secretion systems. 25.00 25.00 26.10 25.90 23.10 22.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.06 0.71 -4.64 7 353 2009-01-15 18:05:59 2007-05-03 12:05:50 5 1 334 0 15 157 0 136.90 46 92.30 CHANGED lhplhacPloYlHssRhslssthhss.stRslsNchllstYcLss-ht....tsshhthalspWphLPplAhLhGsphhRspLsppGhhlpLsshspsFh.Ah.lssp.puu..pthss.ttll.......ssGhhpLluhhcphP.ultQRhsLLFPshh-cshs......hPstpsL.h.p.shpaAp+Hsph.ss .................tlhhtPh.Yhp.ph...ss......s..t.hhN.hll.tapLp...s.t........ss.h.hhlppWphhP.hshh.GsphhRtthscpGhhhtlsshh..p..saL..u.h..ths..psp.....sths.s..p.ll.......ssGhupL.sh.c..P.uhtQRFsLLFPsFl.-chph.........hslt.ol..L..ls.h.htpphsphs.h........................ 0 4 7 10 +9315 PF09483 HpaP Type III secretion protein (HpaP) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are always found in type III secretion operons, although their function in the processes of secretion and virulence is unclear. Hpa stands for Hrp-associated gene, where Hrp stands for hypersensitivity response and virulence. see also PMID:18584024 23.30 23.30 25.40 25.40 23.10 23.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.24 0.71 -4.34 11 81 2012-10-01 19:58:36 2007-05-03 13:00:00 5 2 68 0 15 85 3 190.60 33 77.85 CHANGED sPss....PA........cssppuhstsphhpsuhpsuPsspshPsps........h....sps.psAsss......sssPps.....pssscs.pstsDu.stpshutstsuc.......h.pspchsthlcplAtclAtFCussslhtuGp.WplplslDPslLPposLtLsLS.apLoLRF.....cosss-oRcLlspHtssL+tpLcs....thsusRslpI-Vs ........................................t...s.PA..........pupcpsF.hsphhpcu.h.ph.uPsstshPsss................h....spscc.sAsss......sssP.h........sstcs.ts..Ds.sptuhuRshAut....................htppp+hshhlctLAtclAtFCuusAlhsuGp.WEspLslDsplLPpTsLaLtLS.apLuLRF.....-sscs-oRcLLhsHpstLctpLcs....thusshsIpl-V.................. 0 3 7 10 +9316 PF09484 Cas_TM1802 cas_TM1802; CRISPR-associated protein TM1802 (cas_TM1802) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This minor cas protein is found in at least five prokaryotic genomes: Methanosarcina mazei, Sulfurihydrogenibium azorense, Thermotoga maritima, Carboxydothermus hydrogenoformans, and Dictyoglomus thermophilum, the first of which is archaeal while the rest are bacterial. 22.50 22.50 23.10 23.40 22.00 22.40 hmmbuild -o /dev/null HMM SEED 593 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -13.15 0.70 -6.03 16 118 2009-01-15 18:05:59 2007-05-03 13:06:51 5 2 108 0 59 118 6 501.70 18 87.54 CHANGED MlpulhplGchhlp..scutpphph..........l.p.sptushtplLhlshsscpsplp..hhht-EhspcshpcaLahspussssu...hssTsphs.......................c.hcphtpKlpphhpph...h..............t+.c.hlp....................................................slcphlpcphcphlp-h.......shcppcsslholph...sschhhsp..aptahcthlpthppphh.....tKps+ppul.CplCscp.stlhuph.p...aKaYTsDK.sFss.shsppstaKshslCpcChhpL.tGcpalcppLssphh.uhc..halIPphlh........phcp.lccIhcphpphpshp......tslpppE--Ihphlpppcp...hhhshLF.acpsp..usa+IlhhIcDlsPSRhccIhcttc+spptht...................................phsLspIhhhh.......pp.spsspsKpaLplhsuIFsG+.lshchLlsphhptlRppah...............cstththhshpuhhlltFLpclshLp.......tshphcpspht.......pp-lcpFh....pphhpss.KpAlFLLGlLhscllphQa.........tcppsKsalsK..Lph.thsppclhplhs-lpsKLppY.......tt...ppY.hcplhtphtchhhpuhssWpLopDEhsFYhl.Ghohup ..................................................................................................hlt.h..lGp...t...t........................................ph.hhhhlth...tt.tth..........cpht.tpphtphh....a......httsss.ss...h...s..s.pht................................p..pth.thl.phhp.h...................................p.t.hhp....................................................ph.....phhtpt..hpphhpc..................t..tpp....h.hh.slth.......ssp....h.s....h..ahphh...ht.httt.........ttpt....tpth..CtlCspp.ptlhspht........apaaohD+.sahs.thp....ppphh..+shslCtpCh.tl.tGcpal.........pp.p.........L.phphh..uhp......hhllPphhh...........pp.ht..plh..pth..pphh..pht..........ph.t.cc..clhp.hp...p.pp.......h.hshlF.hpptp...st.+lhhhlpDV.PS+hpplh..pth.pp..hpphh.....................................phshttlh.hh...............tpt...p.h.Lp....lhpt....lhptp.lshphlhp..hhpth.....p..hh.......................pt...h.........hh...p.h.hhh.alpphshlp.....................t..h...t.......................p.hppah....tth.h.ss.cpu.hFLLGsLhstlhphQh................ppptspPhhpp..l.phht.hsppclhplhsclhsKlppY........pt......htth....hctl.htph..pphh...p..t.......t....pa.phs.pEh.FYhh.Ghsht........................................................... 1 23 43 53 +9317 PF09485 CRISPR_Cse2 CRISPR_cse2; CRISPR-associated protein Cse2 (CRISPR_cse2) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with non-homologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family of proteins, represented by CT1973 from Chlorobaculum tepidum, is encoded by genes found in the CRISPR/Cas subtype Ecoli regions of many bacteria (most of which are mesophiles), and not in Archaea. It is designated Cse2. 22.10 22.10 23.40 22.80 20.40 20.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.94 0.71 -4.15 58 642 2009-01-15 18:05:59 2007-05-03 13:07:14 5 4 627 2 120 399 12 146.00 32 78.32 CHANGED uspApLRRutshsss...........h..ha........hhp..h..t............ptpppshhhlAulhAttppt.....................................ps.ut.....slGpsht.hhttstt.............sss-tRFppLlpus..........hcplhppLRpllphlc..sp....lsaspLscsLhtWpppp..............ptlptcWupDYa ...........................................................Gt+AsLRRusssscs..........phscua.......Lhhpstp.hh.pt...............phc.hsAlsls..AulhAphcthcc...............................................................ppshus.......pLuts............................hsc.tRFs+Lhssc.........ss--LlcpLRRhVcLLs....us........lshssLA-slapWspcpp............s-hlRlRWAh-YY................. 0 34 87 105 +9318 PF09486 HrpB7 Bacterial type III secretion protein (HrpB7) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow range of species including Xanthomonas, Burkholderia and Ralstonia. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.89 0.71 -4.30 6 92 2012-10-01 21:16:01 2007-05-03 13:20:05 5 1 73 0 16 91 1 149.70 35 85.75 CHANGED RRspsacsLlpt+sRcscRlpscLustRstLppssstLtpspspscApsscLssasuplDshsusGushsl-shLppccaRssLh-pputAEpttstAptuLputtspluuhppcluR.cApl-lhs-+tcplcRAt-AttEsAp-EEs.EAlluhRh ...............cpttsapsLhth+spctcRhptclpshRtthptstttlspsptpsctppsthsthstplsshss.ssushs..lsshhtpccaRssLh-cpttAEpppAthcsslputtcpluuspppluR.pApl-lsc-+hcplcRut-AttEsAp-EEh.EullAtRh............ 0 3 6 10 +9319 PF09487 HrpB2 Bacterial type III secretion protein (HrpB2) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow group of species including Xanthomonas, Burkholderia and Ralstonia. 25.00 25.00 51.70 51.20 21.90 20.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.47 0.71 -3.90 9 93 2009-01-15 18:05:59 2007-05-03 13:24:07 5 1 73 0 16 60 1 116.90 38 89.29 CHANGED ussosAhsPsss.....sstpLss+FpALMpsusshPsuhptscs.Shlu+lVtppDsulRpsssclthhsppAsphohp-l..sAtslclphEhsuhphchpsphuVspSuKsAlpTLMKNQ ............................hss.AhsssAs.....sstcLssRFpALMpsAsstPsttppsts.Sslu+lVsspDstlRpss-clsshshpAsphohpEh..sApsl+lphElsuhphchpsphuVspSuKsAlpTLMKNQ 0 3 7 10 +9320 PF09488 Osmo_MPGsynth osmo_MPGsynth; Mannosyl-3-phosphoglycerate synthase (osmo_MPGsynth) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of examples of mannosyl-3-phosphoglycerate synthase (MPGS), which together with mannosyl-3-phosphoglycerate phosphatase (MPGP) EC:2.4.1.217, comprises a two-step pathway for mannosylglycerate biosynthesis. Mannosylglycerate is a compatible solute that tends to be restricted to extreme thermophiles of archaea and bacteria. Note that in Rhodothermus marinus, this pathway is one of two; the other is condensation of GDP-mannose with D-glycerate by mannosylglycerate synthase. 20.20 20.20 21.50 127.70 19.30 18.70 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.23 0.70 -5.78 15 56 2009-01-15 18:05:59 2007-05-03 13:38:18 5 4 53 12 32 53 9 385.70 44 87.68 CHANGED lEhPp.sEpFGuVpIH.-VQ+VlE..LDoG........s.ttpssslpslspccLpclhpchAIVVPsKsEcl+LLEGVLpGIPH-ChlIlVSNSpRps.hDca+hEh-hlcpFsphTc+pllllHQKDPsLApAFp-sGYsslLsscuh.....VRsGKuEGMIlGhLLAKh..hG+cYlGFlDADNYlPGAVpEYV+tYAAGFsMu..............cSPYoMVRlpW+aKPKVscss.lYFcKaGRVSElTN+aLNtLlSshouFET-..........lIpTGNAGEHAMohcLAcpLsauoGYulEPYchV.LLEcFGshhts.tps-shppG..................................................lEIhQIEThNPHlHE.-KGs-H.lpcMlhsSLusIYH..................................S+Lss.........ctl+pcILc-L.....ptpshlspt.--PPpPhlhPPltplDlctFtchlc ........................h.EhPphsEhFGsVcIa.-lQ+Vlc..LDos...............sttsts.slpslsppslpcllpchAIVVPhKsEcL+Ll-GVLpuIPHcChIIlVSNSpRt...-pa+hEhDhlccFsphTc+phlhlHQKDPuLApAFpcsGYsclLs.csGh...........VRsGKuEGMllGllLA+h..hG+cYVGFlDADNYlPGAVpEYl+sYAAGFtMu...................cosYoMVRlpW+aKPKlspsp..laF++aGRVSElTN+aLNpLloshouFE..Ts........................llpTGNAGEHAMohcLA.hLsauoGYulEPachV.LLEpaGthhs...ttp-shppG..................................................VEIhQIEThNPHhHE.sKGc-H.lccMlhsuLuoIYH..................................ScLss.........-pl+pclLc-L.....ptp..shlpps.cEPPpPhlhPPlpplDlctFhchl............. 1 7 18 24 +9321 PF09489 CbtB Probable cobalt transporter subunit (CbtB) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of proteins which have been proposed to act as cobalt transporters acting in concert with vitamin B12 biosynthesis systems. Evidence for this assignment includes 1) prediction of a single transmembrane segment and a C-terminal histidine-rich motif likely to be a metal-binding site, 2) positional gene linkage with known B12 biosynthesis genes, 3) upstream proximity of B12 transcriptional regulatory sites, 4) the absence of other known cobalt import systems and 5) the obligate co-localisation with a protein (CbtA) predicted to have five additional transmembrane segments. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.92 0.72 -4.06 20 220 2009-01-15 18:05:59 2007-05-03 14:19:16 5 1 217 0 69 157 46 56.40 39 87.85 CHANGED sspsssssspu.sclhtssuAshLGhsllahsGFuphsslHNAAHDsRHusuFPCH ........................................hs...t.shshs.tplssulsuhlL..G...shL...lYhsG........aup.......s.......sh........lHsuAHDsRH.u.hG.FP.CH 0 12 37 53 +9322 PF09490 CbtA Probable cobalt transporter subunit (CbtA) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of proteins which have been proposed to act as cobalt transporters acting in concert with vitamin B12 biosynthesis systems. Evidence for this assignment includes 1) prediction of five transmembrane segments, 2) positional gene linkage with known B12 biosynthesis genes, 3) upstream proximity of B12 transcriptional regulatory sites, 4) the absence of other known cobalt import systems and 5) the obligate co-localisation with a small protein (CbtB) having a single additional transmembrane segment and a C-terminal histidine-rich motif likely to be a metal-binding site. 22.20 22.20 22.70 24.30 21.40 22.00 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.60 0.70 -5.04 25 417 2009-01-15 18:05:59 2007-05-03 14:20:37 5 1 375 0 137 384 69 223.70 30 92.26 CHANGED Mhp+llhuAlhAGllAGllsshLQhhhlpPlILcAEsYEsush...................sH.tupu................................suuHs..Hstt......s.ht.tsGhp....RshhT....hsuNllsusGaA.LlLsuhhult.t...stsssppGLlWGlAGFsuhpLAPulGLPPElPGssAAD.............................LssRQhWWluTlsuTAlulsLlAFupshhhhhlullLllsPHllGAPp........P-shsussPspLuspFssAuhssuhshWssLGlluuahapR ................................................MhtpllhpuhhAGllAGlLshshthlhspPh...l.pA.saEputs.......................pttutu...............................huscs...ct...............tstcshp....................Rssts....hhusllhusuh....u..Lhhssh.sh.h....h....s........th....ss....ttt.u.l....hhuhuGFhslhllPsLthPsp.Pu..sus.s.-.................................L.s.tR..h...h....Whu.....o.l......s.......u..s...u..h....u...lh.....l...........l.............................................................h.....h.........h.u....l.........s....h...l....l....h...P...p......l.....ht.....s.....Pp............t...h..s.shPusLhhpFtlASlusphlhWsslGLshGhhhp............................................................... 0 27 74 108 +9323 PF09491 RE_AlwI AlwI restriction endonuclease Bateman A anon Bateman A Family This family includes the AlwI (recognises GGATC), Bsp6I (recognises GC^NGC) , BstNBI (recognises GASTC), PleI(recognises GAGTC) and MlyI (recognises GAGTC) restriction endonucleases. 19.70 19.70 20.00 19.90 19.60 19.50 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.48 0.70 -6.00 18 173 2012-10-11 20:44:45 2007-05-03 14:23:23 5 4 152 2 21 155 34 374.30 18 73.57 CHANGED pplGFlhhc.........t...thlplTcsG+tLls..................p...s.pplFL+pLlKaQlssshppp..........h.hpPahhlLcllpcL...........stlohcEhshalhhhhs...ppphcpllsc..........IhpaRppchttpt....................scchapccphpt..................................ptuhcpphcspt.............................................................shpDYsDsshRah+hTGlhshp....upG+hlplsppcp.cpl-hllpshpp....h......s.ppahphhspsststLsh-......shtpphlpplsthhsphspphtltpht..h...........shsp-plccplpclpc......pKhhpc..........hhplLp.htch.pt................tptPshhEahhhhuhh.tlssththh....NhphDs-shPhssAsG..stsDI.h.......ascathllEVTLscuppQp...phEupPVsRHlschtpch.....scshaslFlAPpIcssohppFhht.h.........t.ttthtIhPhslppFhph ................................................hGhh..t...................h..lot.Gphhlp.....................ph.p.haLptlhphphsssh................h.Phhh..llplltch...........t.l.shc-h.hhhhhh........ppphptlhpp..........IhphRpph.t...p.........................pp.h.pt.ht..............................................................................tp.hph.t.......................................................................................................hp-h.D...hRhhphTslhph.......t.sphh..h....t....hp.l.lpphtt.................h....t......l.hp...........h.......p...................................ph..p.h.p.h..hhp......tp..pp..........hhphl...................................t.P.hhhEah.hhhhh..h.t..........shph-sphhPh.pAsG.......s......sDl.h.........tph..hlEsoh.ptppQ....thEh.sl.RHh..hphh.p........h..hslFlAs.lptsshp.h.....................thtIhshth.ph...h........................................................................................................................................................................................................... 0 7 17 18 +9324 PF09492 Pec_lyase pec_lyase; Pectic acid lyase TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are isozymes of pectate lyase (EC:4.2.2.2), also called polygalacturonic transeliminase and alpha-1,4-D-endopolygalacturonic acid lyase. 19.80 19.80 19.80 20.10 19.70 19.70 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.00 0.70 -5.24 51 198 2012-10-03 02:33:51 2007-05-03 14:24:54 5 14 122 5 65 232 27 282.70 34 61.02 CHANGED QpssGGWsKN......hDhstshsttpttth...............httttp...puTIDNsATssplpaLA+lapts........pst...+a+sAhh+Gl-aLLsAQYs.NGGWPQ...................aYPh.ppu.......YpsaITaNDsAMlsVlplLc-lupspssa.t..hls.psppt+uppAlp+Gl-sIL+sQl.h...sGp..hTsWsAQHDppTLpPstARuYE.sSLousESsuIlcaLMsl.pPo...scllpAlcuAlsWhc..ps+lsGh+h.phts............p.hhltc..susslWARFY-lsTN...................+PlFssRDuh.......h+asls-lptERRsGYuWasshspcll ..................................QpssGGWsKs.......hchssthstpphtph.....................hptttph....puTlDNsATssplpaLuclYpto........psp.......+Y+sAhh+GlcaLL.puQYs....N......GGWPQ............................haPh.tts.........................YpspITaND.s.AMsp..lhplLpplhpt.pssat.................hls.tthp....t+...sppAhs+Gl-sILcsQlh....s..Gp...hTsW.stQaDph.TL......pPstARuYEhsSluusESssllphLMslspPo...sclhpulcuAlpWhc..ps+lpGhphpphts.............t.hhl.........s..susslWARFY-lpTs...............................................................+PhFscRDG.h........hphs.ltclstERRsGYuWYss.stpl......................................................... 0 34 57 62 +9325 PF09493 DUF2389 CHP02450_Tryp; Tryptophan-rich protein (DUF2389) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are small hypothetical proteins of 60 to 100 residues from Cyanobacteria and some Proteobacteria. Prochlorococcus marinus strains have two members, other species one only. Interestingly, of the eight most conserved residues, four are aromatic and three are invariant tryptophans. It appears all species that encode this protein can synthesise tryptophan de novo. 25.00 25.00 32.30 32.20 21.00 16.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.12 0.72 -3.70 57 214 2009-01-15 18:05:59 2007-05-03 14:45:43 5 3 201 0 79 203 221 60.10 36 70.83 CHANGED sSKWTuspshs.tp+HFhlsphthcc.ctphlc..hlpsl.scpphplsWp-L+ssspWppGWp .h.SKWTAspshs.tp+HFhVsphp.....tcc.....csshlc..hlpul.spp....phhlsWpp.L+DsshWptGWp. 0 15 45 65 +9326 PF09494 Slx4 Slx4 endonuclease Wood V, Coggill P anon Wood, V Domain The Slx4 protein is a heteromeric structure-specific endonuclease found from fungi to mammals. Slx4 with Slx1 acts as a nuclease on branched DNA substrates, particularly simple-Y, 5'-flap, or replication fork structures by cleaving the strand bearing the 5' non-homologous arm at the branch junction and thus generating ligatable nicked products from 5'-flap or replication fork substrates [1]. 21.30 21.30 21.40 23.30 21.20 20.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.11 0.72 -4.22 57 215 2009-01-15 18:05:59 2007-05-03 15:16:06 5 8 193 0 153 225 0 65.60 27 7.08 CHANGED lhptlpphlc.................ppPs....hac+ILhY-PItLc-LtshLp....tth....clststl+pahDppuIshphp ..........................................................hptlpphlp.................spPs....hac+ILhY-........PI.Lp-LtshLp.............ssl.....clshspl+cahDspsIshph.t................ 0 39 67 117 +9327 PF09495 DUF2462 UPF0390; Protein of unknown function (DUF2462) Wood V, Coggill P anon Wood V Family This protein is highly conserved, but its function is unknown. It can be isolated from HeLa cell nucleoli and is found to be homologous with Leydig cell tumour protein whose function is unknown [1, supplementary Table I]. 21.80 21.80 23.00 22.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.85 0.72 -3.25 16 199 2009-01-15 18:05:59 2007-05-03 15:21:16 5 6 183 0 143 182 0 80.70 31 76.49 CHANGED MAQGthKh..KAKsssssp........+Kpps.++.usRhItPKKtshhptpKLpKhhouulpspsE+.lsp+A.....G+LpLlK...usp+cp ........................MAQGph.K...........Ku+tssssp.........tpppp..u.s..+K.Gs+sIt.PKKs.p..l.h.p.p.pK.lp....Kplosulsp.psE+tlsp+A.....G+LpLlK...ustc..h................... 0 41 69 114 +9328 PF09496 CENP-O Cenp-O; Cenp-O kinetochore centromere component Wood V, Coggill P anon Wood V Family This eukaryotic protein is a component of the inner kinetochore subcomplex of the centromere. It has been shown to be involved in chromosome segregation via regulation of the spindle in both yeast and human [1][2]. 19.60 19.60 19.80 20.00 19.00 18.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.03 0.72 -3.75 28 202 2009-01-15 18:05:59 2007-05-03 15:21:40 5 1 164 2 135 187 0 88.50 31 28.93 CHANGED LGlRh-lhsc.pupFhcPYYllL++hsp.tp.....................hpla+HTlPsaIPlcpltpph.Lss.............................tpp-lppFscclpcpLssaphRpcthptL .........lGlph-l.st.pGpahcsYYllLpp.pt..tt.......................................lpla+HTlPsFI..PLcpltpca.Lss......................................slppFlcplpcpLsuaptR+tthp.L........... 0 25 60 100 +9329 PF09497 Med12 Transcription mediator complex subunit Med12 Wood V, Coggill P anon Wood V Domain Med12 is a negative regulator of the Gli3-dependent sonic hedgehog signalling pathway via its interaction with Gli3 within the RNA polymerase II transcriptional Mediator. A complex is formed between Med12, Med13, CDK8 and CycC which is responsible for suppression of transcription. This subunit forms part of the Kinase section of Mediator [2]. 25.00 25.00 29.80 25.00 24.90 19.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.07 0.72 -3.99 28 294 2009-01-15 18:05:59 2007-05-03 15:21:57 5 9 223 0 201 300 0 62.60 40 3.55 CHANGED sFp.PsRVTLocs++-sWLp-LAssshsLpcLu+p.lPHGh+p+pLl-thh.............................................ppplPlpRAlWhlK ..............Fh...hsshopsphcsWhpDLAs.stPLppL......u+...........p.lPph....++cplhshlh.............................................phsVPlhRAsWLlK.. 0 54 94 155 +9330 PF09498 DUF2388 CHP02448; Protein of unknown function (DUF2388) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of small hypothetical proteins, about 100 amino acids in length. The family includes five members (three in tandem) in Pseudomonas aeruginosa PAO1 and in Pseudomonas putida (strain KT2440), four in Pseudomonas syringae DC3000, and single members in several other Proteobacteria. The function is unknown. 25.00 25.00 26.60 31.10 19.30 18.80 hmmbuild --amino -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.29 0.72 -4.34 32 301 2009-09-10 15:05:37 2007-05-03 15:32:04 5 1 84 0 73 218 5 71.80 49 68.29 CHANGED ulssSstsTossoooh+.DpKlVlsARDDAAoFVASsGsIRGApLEAALpplRpptPpL.pASDhpLApAILuh ...........................s.hhposssTSssoooh+.DsKllhsARDDAASFVASsGsIRGApLEAALpplRppsPph.pASDhpLApAILA.h.. 0 7 14 47 +9331 PF09499 RE_ApaLI ApaLI-like restriction endonuclease Bateman A anon Bateman A Family This family includes R.ApaLI and R.XbaI restriction endonucleases. ApaLI recognises and cleaves the sequence GTGCAC. 25.00 25.00 26.30 25.90 22.80 20.20 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.25 0.71 -4.92 6 327 2012-10-11 20:44:45 2007-05-03 15:36:43 5 1 171 0 9 42 2 56.80 51 84.36 CHANGED slpcpI+hLAppYAscLpppltpRshEMpsDDp.................................................SHYLIY...........RVLGlsh-EGcLIDhYQNpGRFLYKYAGSFLEE..AThLCFKctF........................scu..u+t+VtNTpGp+PKsFEIDCLl.....sspAaEIKWR..DATTDGDHIsKEHTRl+VlpstGYhPlRIMFYYPsRsQAhRIQpTLcTLYpGVGGpYYhGDuAWsaVcccTu .................................................................................................................................................................................................................................................................................................................................................................. 0 6 9 9 +9332 PF09500 YiiD_Cterm yiiD_Cterm; Putative thioesterase (yiiD_Cterm) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry consists of a broadly distributed uncharacterised domain often found as a standalone protein. The member from Shewanella oneidensis is described from crystallography work as a putative thioesterase because it belongs to the HotDog clan of enzymes. About half of the members of this family are fused to an Acetyltransf_1 domain Pfam:PF00583. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.80 0.71 -4.62 47 840 2012-10-02 20:54:35 2007-05-03 15:37:05 5 5 832 4 122 505 74 141.30 61 49.49 CHANGED tppLpphh+ppIPloctMulplhpasspplplsAPLssNlNh+sThFuGSlaoluTLoGWGhlaLpL+-tslp.GcIVlt-upIcYhtPlssc..hApsphs...................shss.lspLpptp+ARlplpsplhsssph.......supFpGpaVsh. ........................CuQLQQAWY-HIPLSEKMGVRI....QQY..T.GQ+FlTTMPE.s...G...N.Q.NPHHTl..........FAGSLFSLA.TL.TG.WGL.IW.Lh.L.RERHLG....G........TI..I..L.A.D.AHIRYS+PIoG+...PpAVADLu...................uLSGDLDRLAR.GR..K.ARV.QhpV-la.G.sE.ss.......GAlFEGsYlVL.P............................... 0 22 48 86 +9333 PF09501 Bac_small_YrzI Bac_small_yrzI; Probable sporulation protein (Bac_small_yrzI) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are very small proteins, about 47 residues each, in the genus Bacillus. Single members are found in Bacillus subtilis and Bacillus halodurans, while arrays of six members in tandem are found in Bacillus cereus and Bacillus anthracis. An EIxxE motif present in most members of this family resembles cleavage sites by the germination protease GPR in a number of small acid-soluble spore proteins (SASP). A role in sporulation is possible. 21.20 21.20 21.90 21.40 21.00 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -8.07 0.72 -4.05 13 422 2009-01-15 18:05:59 2007-05-03 15:43:11 5 1 124 0 34 156 0 46.00 49 98.60 CHANGED MTFplFFLTITIQK+p...hSpsElcpcpQhcplh-Eh+-Rp..spYhs+h .MpF+lFFLTITIQKpp...lSpsEIh+-pQIcphMD-lKERQ..upYho+L......... 0 4 21 22 +9334 PF09502 HrpB4 Bacterial type III secretion protein (HrpB4) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents proteins encoded by genes which are found in type III secretion operons in a narrow range of species including Xanthomonas, Burkholderia and Ralstonia. 21.60 21.60 21.80 55.80 21.40 20.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.56 0.70 -5.15 8 89 2009-01-15 18:05:59 2007-05-03 15:43:34 5 1 71 0 15 75 1 211.40 37 94.56 CHANGED ssApstthARhLpcapsplssLs-alD....s.uWLpsssulus...u+usuhRsthphh.......h.+sh.shtGhssPoLssL-stAsR.................LAVLstssLlpVLpuRALhsRsssLRpCI-RspRotLpptlGPtsh.hh.s.tt....-AsptcsustuhsPL......sh-ucshAWl..GaRthppDGsWPssu.lh+llRLAL...Ptu.........u.sPpLsPhAusu...sucphLuALPoLaPE ...........................s.ss.hpttAthLptapsshtphschlc....s.uWhs.sls.uls.....upAsshRsAhphh..........h.csl.....Ghu...tsolsuLtthA.s.R....................LAlLshsphh+VLthRALhhRpstlR+sIDRtphs+LtshV.......Gssssphh...................-AsRtp.......tpst.....shsPL.....sth-A..cslAat..GWphhpsD..ss....hsss...........u.sh..+...llRLuL...Ptu.............s.ssh.LsthA.ucs..............sutthlstLPoLhPE... 0 3 7 10 +9336 PF09504 RE_Bsp6I Bsp6I restriction endonuclease Bateman A anon Bateman A Family This family includes the Bsp6I (recognises and cleaves GC^NGC) restriction endonucleases. 25.00 25.00 29.40 71.10 21.00 24.90 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.21 0.71 -4.82 3 17 2012-10-11 20:44:45 2007-05-03 16:24:33 5 1 17 0 0 10 19 169.30 53 94.55 CHANGED MAYKKFGYIEIDDARI--TCDAYFKWKDLNoYIKNTSSRGINMPDAISEPMGCYCLGYLWNRGoEVGDATDPcTNAKIEFKATSKFDGDLSSFGPKTVFDNLVFLRFNLD-NcLYIYDLsINSE-LcKYPANKTQTIQDQKAhGRRPHVSLQKLFVEA+DlcPDIIFDIRRCRIIEDNR+ .....uDsh...........DKs-FEhlp-lahcWhhhNp+lKSluuRGlNhPDVFSEuLhChAhNhlRosGTuh.hDslhcDTsctVQVKuuS.hssDsTSFGPTSsaDlLYFAcFs.ssc.-hhVhhhKIpsDDlYNlslN..KsETFKDQQAQGRRPRFSIQSsII+uKcLhPltslNI...........ITGc....................... 0 0 0 0 +9337 PF09505 Dimeth_Pyl Dimethylamine methyltransferase (Dimeth_PyL) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of dimethylamine methyltransferases from the genus Methanosarcina. It is found in three nearly identical copies in each of Methanosarcina acetivorans, Methanosarcina barkeri, and Methanosarcina mazei. It is one of a suite of three non-homologous enzymes with a critical UAG-encoded pyrrolysine residue in these species (along with trimethylamine methyltransferase and monomethylamine methyltransferase). It demethylates dimethylamine, leaving monomethylamine, and methylates the prosthetic group of the small corrinoid protein MtbC. The methyl group is then transferred by methylcorrinoid:coenzyme M methyltransferase to coenzyme M. Note that the pyrrolysine residue is variously translated as K or X, or as a stop codon that truncates the sequence. 25.00 25.00 529.60 529.50 19.30 18.40 hmmbuild -o /dev/null HMM SEED 466 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.64 0.70 -6.22 3 16 2009-09-13 14:57:56 2007-05-03 16:31:52 5 1 8 0 12 25 14 459.20 77 99.76 CHANGED MATEYALRMGDGKRIYLTKEKIluEIEAGoANAADLGEIPALSuDEM-KLAEILMMPGKAVSVEQGMEVPVTHDIGTIRLDGDQGNSGVGIPSSRLVGCMTHERAFGADTMELGHIDYSFKPVKPVVSNECQAMEVCQQNMIIPLFYGAMPNMGLYYTPDGPFENPGDLMKAFKIsEAWESMEHAADHLTRDTVWVMQKLFASGADGVNFDTTAAAGDGDMYGTLHAVEALRKEFP-MYIEVGMAGEMVLGMHGNLQYDGVTLAGLWPHQQAPLVAKAGANVFGPVVNTNTSKTSAWNLARAVTFIKEAVKASPIPCHVDMGMGVGGIPMLETPPIDAVTRASKAMVEVAGVDGIXIGVGDPLGMPISHIMASGMTGIRAAGDLVARMEFSKNMRIGEAKEYVAKKLsVDsMDLADEHVMRELREELDIGVITSVPGAAKGIAAKMNIEKLLDIKINSCELFRKQl .....MATEYsLRMGDGKRlahoK-+Ihp-lEAGhusAuDLG-IPsLSs-Eh-KLAEILMMPGKsVSVEQGMEVPVTHDIGTlRLDGDQGNSGVGIPSSRLVGCMhHERAFGADTMELGHIDYSFKPVKPVVSNECQAMEVCQQNMIIPLFYGAMPNMGLYYTPDGPFENPGDLMKtFKIpEAhESMEHAA-HLTRDTVWVMQKLFASGADGVNFDTTuAAGDGDMYGTL+AIEALRKEFP-MYIEAGMAGEhVLGMHGsLpYDGVTLAGLWPHQQAPLlAKAGANVFGPVCNTNTSKTSuWNLARAVTFhKAAVcASsIPCHVsMGMGVGGIPMLETPPIDAVTRASKAMVEIAGVDGIOIGVGDPhGMPISHIMASGMTGhRAAGDLVARMpFSKNMRIsEAKEYVAKKLsV-hMDLuDEaVMRELREELDIGlITSVPGAAKGIAAKMNIEKLLDIKINSCpLFRKQl.. 0 4 7 7 +9338 PF09506 Salt_tol_Pase Glucosylglycerol-phosphate phosphatase (Salt_tol_Pase) TIGRFAMs,Coggill P anon TIGRFAMs Family Proteins in this family are glucosylglycerol-phosphate phosphatases, with the gene symbol stpA (Salt Tolerance Protein A). A motif characteristic of acid phosphatases is found, but otherwise this family shows little sequence similarity to other phosphatases. This enzyme acts on the glucosylglycerol phosphate, product of glucosylglycerol phosphate synthase and immediate precursor of the osmoprotectant glucosylglycerol. 25.00 25.00 248.60 248.30 18.50 18.10 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.17 0.70 -5.91 16 45 2009-09-11 09:42:30 2007-05-03 16:32:15 5 1 45 0 16 45 195 380.80 52 93.38 CHANGED pcLLIVQDLDGVCMPLVKDPLTRpl-scYVpAsupLpscFsVLTNGEHEG+RGVNRlVEpALuspppspccGLYLPGLAAGGVQaQDRaGplSHPGVS-sElsFLAtlPp+McshLtppLsslhPpLoscplpphhptAlLDTplSPTINLNuLFull.sDVppQ+pLQthlpclMspLlspApupGLcsSFFLHlAPNLGpctsGpEhlK.AstsDlGTTDIQFML+GAlKEAGLLsLlN+aIup+oGpuPLG-sFNVRsAP+sHpuLLsLC+cpIss-pMPhLVGVGDTVTSph..sssupuWLRGGSDRGFLTLlQcLGppaspsNRVVhVDSSpGEV.sRPSlpDssLpGIS..........DP-DPL+FDllhsuGPcpYlsWFppLAp .pNLLIlQDLDGVCMsLVKDPLTRpl-scYVpAspphpscFhVLTNGEHtG+RGVNRlVEpALGssshsppcGLYLPGLAAGGVQaQDRaGplSHPGVScsElsFLAtVPpthcstLpphlsshhPpLus-plpptlptoVLDspsSPTlNLNuLFsllpc........-sphhppLQphhppLMspLltcAtupGLtsSFFLHlAPNLG+Dpp...GpEhlK.AptsDsGTTDhQFML+GAlKEAGlLsLLN+ahtpRTGphPLGcsFNsRsAPpsHpsLLcLs+pplsscpMPhlVGVGDTVTSps...ss..s..stsahRGGSDRuFLpLlQcLGptaspsNpVVaVDSSpGEV.sRPslphss...........l.pGIo.................Ds-DPL+hslshPsGPcpYlsaFpplA..... 0 2 8 14 +9339 PF09508 Lact_bio_phlase CHP02336; Lacto-N-biose phosphorylase TIGRFAMs, Coggill P anon TIGRFAMs Family The gene which codes for this protein in gut-bacteria is located in a novel putative operon for galactose metabolism. The protein appears to be a carbohydrate-processing phosphorolytic enzyme (EC:2.4.1.211), unlike either glycoside hydrolases or glycoside lyase. Intestinal colonisation by bifidobacteria is important for human health, especially in pediatrics, because colonisation seems to prevent infection by some pathogenic bacteria that cause diarrhoea or other illnesses. The operon seems to be involved in intestinal colonisation by bifidobacteria mediated by metabolism of mucin sugars. In addition, it may also resolve the question of the nature of the bifidus factor in human milk as the lacto-N-biose structure found in milk oligosaccharides. 26.60 26.60 27.20 77.50 24.40 26.50 hmmbuild -o /dev/null HMM SEED 716 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.52 0.70 -13.24 0.70 -6.87 7 288 2009-01-15 18:05:59 2007-05-03 16:33:53 5 3 255 18 27 215 17 673.60 54 98.37 CHANGED GRhTlPs-sshtppshELhpRWGADAlRDsDGTchs--lhphssKlYSTYhssRsDpsWAptp.-ElpQhYLMop.ssAhS-sh.pIsLhcGaac-QhhsssspD.ccWWEVlDRTTupVlss-pWsaDtpstpVhlpsspsaHcYTVoFLAahIWDPlpMYNalTNsWtshcHph...saDshpPcTppalhpthccWlc-pPpsDVVRFTTFFapFTLsaspht+E+aV..DWaGYusoVSPhALEpFEKEhGY+LpPEDhlDpGaYNssaRVPsKpa+DahsF.pcaVschsKcLV-lsHpsGKEAMMFLGDpWIGTEPatctFpclGlDAVVGSVGsGsThRhIuDIPGV+YTEGRFLPYFFP......DsFhEGGDPlhEApsNWlpARRAILRpPlDRIGYGGYLpLAspFP-FlDhlEcIssEFRcla-shpGppsh.shh+VAVLNsWGphRSWtsphVtHAlah....+psYSYhG..llEuLSGhPh-VcFISFDDIhcp.Gl.c-lcVlINsGsusTAaSGGphW.c-tclsutl+pFVtpGGGhIGVG-P.oAppapG+aaQLAcVLGV-+EhGholSpc+Yshphp.pHFlT.t-hstc..............................lDFGEuhpslYshss.splLttc...........ttplphAVNpaGcGRuVYluGlPYShcNuRlLhRulhWAuppEpphppahSoNhpsEVAhYscss+hsllNNT.-.QpTslhps.ucs.slcLts.-hhWhsl .................................GRlTLPs-psh.tpstELh-RhtADAlRsSDGTcLP-.lpcL.s.sKIYsTYassRuDp-WAptHPcElpphYLMSchssAh......u-s..sIsLMpsaas-QltPsscsDsc+WWpVIDRTTG-VlssstWph-t..tst.s......V..slppAtshH.YTVuFLAh.hWDPspMYNalTNsWt.Dc..pcI...PaDlRpPpThpalhctLcpWLt-..sPpVDVVRFTTFFYpFTLlasppu...pE+..aV..DWFGYSASVSs.AlEtFEKchGYtLpsEDhlDpGaYNSsFRVPpKpa+DalsFQpcFVsphs+ELV-hsHtsG+EAMMFLGDpWIGTEPYG.tFtphGlDAVVGSVGsGsThRhISDIPGV+YTEGRhLPYFFP......DsF.a-GGDPV..tEApssWlsARRAIlRsPlDRIGYGGYLSLAhpaP-FVDhlEclssEFRpIassstGppPh.ssh+VAlLNsWGtlRSWtsaMVAHALaY....+QsYSYhG..lLEuLSGhPhpVcFlSFDDIhp..Glscsl-VlINsGsAsTAaSGGp.W.tDtclsoslRcFVtsGGGhlGVG-P.oAh....t..p..GthaQLuDVLGVD+EhGasLSsc+ash..........c..hsccHFlo.tDl.s..s..c..............................lDhGEGh..sl.ashsssTplLttc.......................cp.pVpLAsNpaGpGRuVYluGLPYS.pNuRLLhRAlhWAu+cE-th.pp.WhSosspsEVssYPcss+hhVlNNoh-t.sTsVhss.sss.pclchpssthhWhc............................ 0 15 19 23 +9340 PF09507 CDC27 DNA polymerase subunit Cdc27 Wood V, Coggill P anon Wood V Family This protein forms the C subunit of DNA polymerase delta. It carries the essential residues for binding to the Pol1 subunit of polymerase alpha, from residues 293-332, which are characterised by the motif D--G--VT, referred to as the DPIM motif. The first 160 residues of the protein form the minimal domain for binding to the B subunit, Cdc1, of polymerase delta, the final 10 C-terminal residues, 362-372, being the DNA sliding clamp, PCNA, binding motif. 24.40 24.40 24.40 24.60 24.30 24.30 hmmbuild -o /dev/null HMM SEED 430 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.68 0.70 -5.25 21 329 2009-01-15 18:05:59 2007-05-03 16:34:30 5 7 232 7 213 324 1 344.60 20 88.93 CHANGED +lVTY+hLScsLsVHsNpAKQMLY-ahcpcppcps.splaATYlloGp.hsst.sspp...................................................lslVcE-cLEss+u+hspshShalYSlQpshLpDsssLhsssh-hlpp.....spp.sthpstphs.htstplppppt......s.ssts....psshsuc.s...............stcsspuhhs.thpscppspspspp+sppstpscttss......pssstKh.s.s+ushhssFhptpsppKhttp.tstpsstpE.....ppt.psspshs.pcpss.......cppcps..hcs...................p.sspppppp.......................EscKc+.cc+l+phhpDc................s---............ss-SP.sppcspsssPs..........cp-.ptppsphpst..........s+RRtRR+V.hKpcThhD.-EGalVTcch.p.EShS-sEss.s................PsKppsssp.ss..ss...........spcsK.............sttttQusIMSFFpKK ...........................................................................................................................loa+.lu.thtlp.s.AK..p....h................L.pahpppp.............splpssYllpG..h.p.tt...............................................................................................................lhl..lp.-..p.pLpth..h...t.pht...h..o...hplYSl.....p..........s...h..p.-.......s......l.....h.....s....s....t...........ph.t................t......sh...t...tt.h..s.h....s...t..pptpt..................................sh.stt............................................t...pt....t.....t...p...pt.t...ps.p..ttp.ttp.....tttp...st.......................ps.s.t.c.......................tp..ss.h.tshhtttsh...ttht........t...tt.....tp.....................................p........t..h..p..p..........p.ttpt...t..............................................ppppppp........................................................................................cp.ppc......ccch..+..........p.ss...................................sp-p..................pp.......t....p....t.p.................................ptp........t.p................................ptt....t.+..+pl..hp..ppp..hhD.-c.G.hh.........VTpp.....t.ph..s.ppc..............................................p..tsh..t.........................................................t.t.p.......................................tt.t..Q.ss....lhuFFt+.................................................... 0 66 111 175 +9341 PF09509 Hypoth_Ymh Hypoth_ymh; Protein of unknown function (Hypoth_ymh) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry consists of a relatively rare prokaryotic protein family (about 8 occurrences per 200 genomes). Genes for members of this family appear to be associated variously with phage and plasmid regions, restriction system loci, transposons, and housekeeping genes. Their function is unknown. 25.00 25.00 25.50 25.40 23.30 23.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.53 0.71 -4.50 25 190 2009-01-15 18:05:59 2007-05-03 16:38:58 5 3 179 0 48 143 15 120.50 37 43.17 CHANGED pscslHsclhchsppphhss..pYhcAVhEAsKulss+lRptoGl..stDGssLhppuFu.sc.........pPhLtl..sshpocopcupQcGht.......sLhpGhhssaRNspAHcsc.hp.....hocpDAl-hLuhlSlltRhL ...............p..+tlHsclhpaCcschl.....sc..sYacAVhEAsKulsc+lRpho..Gl.......s....tDGscLlsp......AFs.sp..........pPhLtl..sshpT.coEcuEQcGht.......sLhhGlauuhRNPhAHps+..hp.s......hscpDAL-lLsllShlpR+L....................... 0 15 33 41 +9342 PF09510 Rtt102p Rtt102p-like transcription regulator protein Wood V, Coggill P anon Manual Family This protein is found in fungi. The family includes Rtt102p, a transcription regulator protein which appears to be integrally associated with both the Swi-Snf and the RSC chromatin remodelling complexes, [1]. 21.40 21.40 22.80 22.40 20.40 18.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.64 0.71 -4.16 6 33 2009-01-15 18:05:59 2007-05-03 16:40:41 5 1 32 0 20 32 0 128.50 40 65.10 CHANGED SLIc+AN+.uGYussscpp..HWcY-WhoPs...........KpsEsspp.....P....ss.psp.ppYsFKaKsWl+sssscsh.shhc-ss-.plLDLscFDR........T+hsttspthpspusss....puLohsDIRGAVGGSEuI.......PGhS ................oLIs+AN+..uhYusssscc..pW+YDWYpPs..........................K.ss..usspp.sppp..............ush.pNslEKYsF+YKTWl...+spc.s-p....sLpc-ss-..D.........lLDLp-FDR........Tpcscsss......spsssssusspsspuLosDDIRGAVGsSEuI.......PGhS............................. 0 3 10 17 +9343 PF09511 RNA_lig_T4_1 RNA ligase TIGRFAMs, Coggill P, Mistry J, Wood V anon TIGRFAMs & Pfam-B_49998 (release 17.0) Family Members of this family include T4 phage proteins with ATP-dependent RNA ligase activity. Host defence to phage may include cleavage and inactivation of specific tRNA molecules; members of this family act to reverse this RNA damage. The enzyme is adenylated, transiently, on a Lys residue in a motif KXDGSL. This family also includes fungal tRNA ligases that have adenylyltransferase activity [1]. tRNA ligases are enzymes required for the splicing of precursor tRNA molecules containing introns. 26.80 26.80 27.60 27.20 26.20 24.50 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.60 0.70 -4.87 48 347 2009-01-15 18:05:59 2007-05-03 16:41:19 5 16 314 2 157 357 36 216.60 26 33.56 CHANGED hpuRGLhhp........................psscIlsRua-KFFNls.........EhstTphcsl..pt.......Phclh.KtsG..sll.hshl.........psG....plllsSKpSs...................................................................................................................psspAptucphlpp.....pltpt..........hppls....ctLhppshTslhE...lsssp.pchVlsYspcphsLhLpshstss.....sca.hshs.hplsph.uc..pashpphchh.thss......th......ttph.ssshpt................ct...hEGa.Vlcsps........................................shhaKhKh-t.hh ................pARGLhhs........................pssscIssRuacKFFNls........................Eh..t..t..oph.pp.l....tph...........PhclhhK.NG...sll...l.shl..............psu............plllsSKpSh...................................................................................................................pssau.phucph.l.pp................phtp...........................hpplt.....ptl.hctshTslhE...lsssp.-..cH..l.lt...Ys.p.pp.hhl.hLtshNhps.......sca.hshs..tplpph....scpauh.....p.....ps.....phh...hhps....hp....pltt.hh........pph....ps..s.hps.................cp......hEGa.Vlpspp........................................shhaKhKhp..h........................................................ 0 64 102 138 +9344 PF09512 ThiW Thiamine-precursor transporter protein (ThiW) TIGRFAMs, Coggill P anon TIGRFAMs Family Levels of thiamine pyrophosphate (TPP) or thiamine regulate transcription or translation of a number of thiamine biosynthesis, salvage, or transport genes in a wide range of prokaryotes. The mechanism involves direct binding, with no protein involved, to a structural element called THI found in the untranslated upstream region of thiamine metabolism gene operons. This element is called a riboswitch and is seen also for other metabolites such as FMN and glycine. This protein family consists of proteins identified in operons controlled by the THI riboswitch and designated ThiW. The hydrophobic nature of this protein and reconstructed metabolic background suggests that this protein acts in transport of a thiazole precursor of thiamine. 27.20 27.20 27.60 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.90 0.71 -4.38 26 735 2012-10-03 02:46:00 2007-05-03 16:43:36 5 2 729 0 72 328 1 148.20 47 86.38 CHANGED LslsAlhlAluVllSslhtIPlGhs+shPhQHhlNVluuVlLGPhauluhAhlhullR.hlGsGolLAFPGShhGALLAGl....hY+hs+.+h...hhAslGEllGTGlIGuL.luaPlAshlhG.spsuh...hhahssFhhSolhGulIuahlLhhLp+p .....LslhuhhIAlsVVLSsI.hpI....tt.hAPhpphVNlLuuVhlGPhYuLAhAhloullRhhhtsh....ssLAhsGuhhGA....LLAGl....FYpa.......s..+...+h......ahuulGEllGTGlIGSl.lua.PlhhhhsG....pst.tl..............hhahspF.hsuolIGohIualllhhLpK.p....... 0 36 52 66 +9346 PF09514 SSXRD SSXRD motif Bateman A anon Pfam-B_23332 (Release 21.0) Motif SSX1 can repress transcription, and this has been attributed to a putative Kruppel associated box (KRAB) repression domain at the N-terminus. However, from the analysis of these deletion constructs further repression activity was found at the C-terminus of SSX1. Which has been called the SSXRD (SSX Repression Domain). The potent repression exerted by full-length SSX1 appears to localise to this region [1]. 20.90 20.90 21.40 22.20 19.00 20.40 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.43 0.72 -4.64 33 133 2009-09-11 15:10:05 2007-05-03 16:52:11 5 18 25 0 55 155 0 32.80 56 11.21 CHANGED pssthpVpsWoHRLRERK.hVIYEEISDPEEE-- .......pss++thc.sWoHRLRERK.hVlYEEISDPEEDDp. 1 12 12 13 +9347 PF09515 Thia_YuaJ Thiamine transporter protein (Thia_YuaJ) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this protein family have been assigned as thiamine transporters by a phylogenetic analysis of families of genes regulated by the THI element, a broadly conserved RNA secondary structure element through which thiamine pyrophosphate (TPP) levels can regulate transcription of many genes related to thiamine transport, salvage, and de novo biosynthesis. Species with this protein always lack the ThiBPQ ABC transporter. In some species (e.g. Streptococcus mutans and Streptococcus pyogenes), yuaJ is the only THI-regulated gene. Evidence from Bacillus cereus indicates thiamine uptake is coupled to proton translocation. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.22 0.71 -4.51 43 674 2012-10-03 02:46:00 2007-05-03 16:53:45 5 2 640 2 112 689 6 171.80 34 87.25 CHANGED llEhulhsAlAhlLshl...hhphPpGGSlol.uMlPIhlhAhRaGhpsGllsGhlaGlLpll...hu.......s.a..llpshQslLDY.lAFsslGlAGlFtp............................ttt......hhhhhGsllushsRahsHaluGllFaGuYAPcGh......sshlYSllhNGoh.hlsshllshlllhlLhhpts.phhh ............................hlEsAlhsAlAhl.Lshl.....h.h.ths..p..G.h....Sloh...uMlPlhlhuhRhGhtsGhhuG.hl.aGl.Lphl...lG..............sha...hLsss.Qs.l....l..-..........Y.......hl.....AF..........u.h.l..G..h.A.G.l.F.tp.h...........................ttphh.th.hhhl......hhusl...l....ush...s.R..ah..h.Ha.lu.G..l.l.F.a....Gs..Y..A.....Pc...Gh................sshl...YS..h.l.h..NGoh...hlsshllshlllhlLhhs.sph..h......................................................................................................... 0 51 86 99 +9348 PF09516 RE_CfrBI CfrBI restriction endonuclease Bateman A anon Bateman A Family This family includes the CfrBI (recognises and cleaves C^CWWGG) restriction endonuclease. 25.00 25.00 96.00 95.50 24.50 23.30 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.85 0.70 -5.51 5 36 2012-10-11 20:44:45 2007-05-03 17:06:14 5 1 36 0 1 27 2 257.80 52 84.94 CHANGED MpFc-.........................plh++VIpcVLpGcDYREpl.......lNtlNsEFLDFu......IDFFK-lloAKhpspslsL-WYpcasLuNKu..Pc.-lAI..hsGhNpKTIpNlYGouoKpVVlshupsplchL.sulppLu.csQ--IG.IslKIsY....K-lSVs...LsLcESLlVINALAoK+lsLRGStaSohGKplEKhLMLoLCplhGlc.....E-slsAcsFsKsKutDhDRElDFpL.plcsuKtYRVEVKLMGKGNPE.............SADAVIARsosIFIADTLS-QNK ....................................slp+sIphLL+GKDYREhV.......LNsINsEFLDFA......IsFFK-IlhAKMpDcuIshsWYppashsNK-..sK.-lAI..LsGhNhKTIhNsYGToTKEV.VLDlupsNlcYLh-l.LQsLppss.s-lG.IsIKITY....K-ISVs...LDLcESLlVINALATKKIALRGSAaShlGKRlEKPLMLpLCc+CGlS.....EuaIDAosFpKDK+L-aDREVDFKLYNpD+SKsYRVEVKLMuKGNPE.............SADAVIAR-ocIFIA.TLSEQNK............... 0 1 1 1 +9349 PF09517 RE_Eco29kI Eco29kI restriction endonuclease Bateman A anon Bateman A Family This family includes the Eco29kI (recognises and cleaves CCGC^GG ) restriction endonuclease. 23.00 23.00 24.40 23.00 22.40 21.00 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.10 0.71 -4.93 6 60 2009-01-15 18:05:59 2007-05-03 17:25:00 5 2 56 17 15 56 3 164.70 44 74.26 CHANGED PcpFpGAGVYALYYsGsasLYcphu.chNchsastPIYVGKAVPtGhRpuRhucNss..uspLasRL+EHuRSIupsusLDlsDFhCRFVlh-hthSshIshhEAsLIchapPlWNosVDGFGNHDPGpGRasQ.....t+SsWDslHsGRsWAs+hss................h.uc..uItcsIppaLcp ...............cpFpGAGVYAlYYhGs.h.shYpplu.chN.R...s.as...t.PIYVGKAVPtGhRput.u.ss..pustLapRLc.EHucSIs.tss......sLc.sDFhCRFlll-...sshIshsEuhLIcha+Pl....WN..............s.....slDGFGNHDPG+GRapp.....t+ScWDslHPGRsWAp+hps................hspohcplhppltpaht.t..................................... 0 4 12 14 +9350 PF09518 RE_HindIII HindIII restriction endonuclease Bateman A anon Bateman A Family This family includes the HindIII (recognises and cleaves A^AGCTT) restriction endonuclease. 25.00 25.00 66.40 66.20 20.40 20.40 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.90 0.70 -5.52 5 26 2012-10-11 20:44:45 2007-05-03 17:38:09 5 1 23 8 3 23 3 278.00 34 94.43 CHANGED NuE.........hl.s........hss-hlKlsuphT.Dac+lss-llptI+KhsccplI-lLpouGhIPEsYcsDSSEEKLFSKhsDALlu-shpchGapApsLsERusAuDVhltsc..sptIVuDAKoFRLSRT..AhNsKDF.KVculcKWRstht.sKDaulVlsPpaQhPssKSQ..lYsQulDpNVLlLSYcHLAaLLcp+-.hss-hh..LWEYpsIFspp.hhN.suKsu+pYWsuINcsllclssct-KtahDphstscctLpphlcccIp.lE.....chEalcpEIs+IhshoREcAIR-LlcchKIpu+lEph.phl+slpcpcl ..............h...................t..phsu..s.-hpphstcl.t.lpp.spptll-hLppsGhIPEshptDSSEEKLaSKhsDsllucshphhGhpupsL.sERusuuDVhshsp..sYtlVADAKsFRLSRT..AKNtKDF.KVpulscWR...p....sKDaulllsPhaQhPsppSQ..lYpQulspNVhlhoacHLAhLlphtt..s.c.....Lh-......tIFst..hh..stcpuhpYWpslNpshhphssphpchah-.hthspctltphhpptI...........cahppEhppIhtho+-cAlptLlpph+lpu+lcth...hcthpp................. 0 3 3 3 +9351 PF09519 RE_HindVP HindVP restriction endonuclease Bateman A anon Bateman A Family This family includes the HindVP (recognises GRCGYC bu the cleavage site is unknown) restriction endonucleases. 25.00 25.00 26.00 34.60 23.10 20.50 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.83 0.70 -5.53 3 41 2012-10-11 20:44:45 2007-05-03 18:03:25 5 3 34 0 4 50 1 255.10 38 90.73 CHANGED oWGKNQFNSSFPlALACYMSSKNIpPlYL+LE+.tsIcHupIDVcsVFpIcP.ptpsFFAFEpSY..............................pIKLTAlPDpTTssLsDs..GsEIVIRPDoIVYLAhSlAclhpps.hshhDI..PssschhDWp-sppl.PhhPhhhphL.slhsRhpslQhPhLLQPlWKT.GK.shLtDNCLDIFlWSNlAFsKlFLDASplclNscSITRpcRTsVWLhKMLYDFApsGKINH++TIDcISaNTKNDKAFAuSGMlT+pYMKSPELp+PRIK+cEIKNIILGGGQ+LLSPERRFDAIIlNTPsLF- .....................aGKN.FssuFP.uLhsah.p.ptlps.Ylhh....p..p..............plppt.lshpplashps...s.tphaasFEt.a...............................hElKLTslPDpsTtphs-p.ausElVlRssol.ahAhSlspth....pp..ph..t..t.hh.h....h....cWsp.tplhshh.phhtslpplhpph.shQpPhlhQPlWKTpGKps.LspsCLDlFlWSshAFs+hhlshu....p.s..p.....pIsR.tRohlWLhKMLhDhsppGphsappllcphoasspsDKAFuhsGphTp.aMps.clppPtI.+pEIppIILssGppLLSPERRFDuhlh.ssspLF................................. 0 2 4 4 +9352 PF09520 RE_TdeIII RE_MjaII; Type II restriction endonuclease, TdeIII Bateman A, Coggill P anon Bateman A Family This family includes many TdeIII restriction endonucleases that recognise and cleave at GGNCC sites. TdeIII cleave unmethylated double-stranded DNA [1]. 23.80 23.80 24.00 23.80 23.70 22.70 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.67 0.70 -5.08 11 54 2012-10-11 20:44:45 2007-05-03 18:10:25 5 1 48 0 19 62 14 233.10 26 89.78 CHANGED Mulss...tT+NKl+-aLsshlccpLpsY..pt.hp.............ss-sshpPFHtpLlPtslMplt+FERSLsTuLGp.hFEpsA+lIAhspausA.ppYclpsslsptshssIDpllsplc..........+spppppsohsEhlcplpplspsthtEsolV.uDLYlp+cDGpEhaF-IKSsKPNKGQshpshpcLLRlhAlc..pp.tssscsFaAhsYNPaG....p+tuYohshh+thhDhcs.tVlIGpEFWshIGp.suTYsELL-Iac-VG .......................................................................httlpphlpp.lcph...hpph....................psptt.hP...Fhttll..............st.............-........hhthspF.+ShsTshGpshaEplAphlApspht.s.p.phpht.sp...IspptpppIpplhsplc..........t.ptp.sph.p..ctlct.h..hph....sp.tp....h.psphs...Dl.a.l.p.pp.s.sp..hhh-lKo.....s.....K.P.NtsphcthKpplLphhshh....pp......ptphpsahAhPY.......NPa...........p.psapp.......h..ht..th.h....Dh.-p..plllupEaWshluG.csoY.pplLchapclG........... 0 9 12 17 +9353 PF09521 RE_NgoPII NgoPII restriction endonuclease Bateman A anon Bateman A Family This family includes the NgoPII (recognises and cleaves GG^CC) restriction endonuclease. 25.00 25.00 35.90 35.90 23.60 23.20 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.76 0.70 -5.61 4 62 2012-10-11 20:44:45 2007-05-03 18:15:05 5 1 53 0 7 42 7 186.50 42 93.41 CHANGED M...pNIlsAIhNlspNP.hpLcpa...spS+NRANpMG-uLEEYlKDlFusoh.p.D.sp+.tlauchFSYLGNpNNPPDhIL+sGDAlEVKKIEshcSSLALNSSYPKuKLasssSMIT-AC+sCEcWE.KDIIYsIGTlccpp.LKtLhhVYG-sYAA-splYp+IpspIKtGlpsIsslpFSETpELGRVNRVDPLGITYhRlRGMWGIENPhKVFsYIhph-..csppFNhhAlhpc-KYpSFsspD+hc.huhpNptLtIpDl+IKs ...................NIlsAlhNlhpp......h.tlpp.....pu.sRANphGsuLEpalKDhFut.................................hl+tG.sulEsKKhps...s.ssLsLNSSaPKs.l..ss..l..pth+ttE..pWcpKcllYhlGhh.p..pp..LppLhhVYG..asAptphY.clcppIppul.tph...ss..l.hscTpElG+lpplDPLsho.LRlRGMWtIppPhhV..Fp....lht......ppthphhsll..tcatphspp.c......p...h.tlpch.l........................ 0 0 5 6 +9354 PF09522 RE_R_Pab1 R.Pab1 restriction endonuclease Bateman A anon Bateman A Domain \N 25.00 25.00 60.20 29.80 18.40 17.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.65 0.71 -4.21 5 56 2009-01-15 18:05:59 2007-05-03 20:02:19 5 2 32 6 4 55 0 116.20 69 53.14 CHANGED KQKQFIEDNF..MITRERFRSHQFGGMDFELSRISYPLLIHSFDD.NELSEIVIKEQQYGSKTQAMLYFCFSILELKTATPLLNRTAphKEHALLlIHcsNAshFLEMLKIFGLLSQsHHNDVLKILEKIL.QN ..............................KQKQFIEDsF..hIsRERFRSHQFGGMDFEhS+ISYPLL.IHsFND.Np.LSEIVIREQQYGSKTQAMLYFCFSI...L...ELKT...A...TP...........LLNRTAsLKEHALLhIHKTNA.hFLEMLKIFGLLSQAHHNDVLKILEKILpN.... 0 1 2 2 +9355 PF09523 DUF2390 CHP02444; Protein of unknown function (DUF2390) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are bacterial hypothetical proteins, about 160 amino acids in length, found in various proteobacteria, including members of the genera Pseudomonas and Vibrio. The C-terminal region is poorly conserved and is not included in the model. 25.00 25.00 26.30 27.90 22.50 19.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.40 0.72 -3.88 60 298 2009-01-15 18:05:59 2007-05-04 09:10:55 5 1 295 0 89 240 56 106.10 35 65.72 CHANGED psLWpauLshYupsuVppuCLpLQsphGssVsLLLhshWLs.....p..pshshsspphptltstsppWcpplltPlRplR+ph+stssps..........lhcplhshELpuE+hp.phLh ..p.pLWpFuLphYut.tVcpA.CLp.LQspptuNVNLLLhhtWLs.....p.ptl.shsppchtpltpslsphp-sllpshRcLR+phKsphscs..............LhcchhphELphE+ppttpL.h........................... 0 21 44 68 +9356 PF09524 Phg_2220_C Conserved phage C-terminus (Phg_2220_C) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents the conserved C-terminal domain of a family of proteins found exclusively in bacteriophage and in bacterial prophage regions. The functions of this domain and the proteins containing it are unknown. 21.10 21.10 21.20 25.60 19.70 20.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.64 0.72 -3.98 26 362 2009-01-15 18:05:59 2007-05-04 09:12:02 5 7 326 0 39 286 12 73.70 47 26.64 CHANGED IIsYLNpKuG+sF+.ssptspcLI+ARhpEGasl-DFKpVIDhKspcWhsssp.....hppYLRPcTLFu.s.KF-uYLNp ...................................IIcYLNcKsG+pa+h..s..otpopchI+ARhs-.GaplE........DFKpVIDhKss-Whsss.......hscYLRP-TLFG..sKFEuYLNp........ 1 16 26 37 +9358 PF09526 DUF2387 CHP02443; Probable metal-binding protein (DUF2387) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are small proteins, about 70 residues in length, with a basic triplet near the N-terminus and a probable metal-binding motif CPXCX(18)CXXC. Members are found in various proteobacteria. 24.40 24.40 24.50 25.00 24.30 24.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.96 0.72 -4.06 44 844 2012-10-03 10:42:43 2007-05-04 09:13:20 5 3 840 0 112 300 136 62.10 57 89.83 CHANGED hKKRFIAGAsCPpCsphD...plthap-ss..l-hhECVpCGas-ppssp......tsphpsR...............sptpsIsl..F ....RKRFIAGApCPuCpAQD...ohAMWRENN....l-lVECVKCGHp.RcsDK......pscc+VR...............pcEQVIGIF....................... 0 18 40 78 +9359 PF09527 ATPase_gene1 Putative F0F1-ATPase subunit (ATPase_gene1) TIGRFAMs, Coggill P anon TIGRFAMs Family This model represents a protein found encoded in F1F0-ATPase operons in several genomes, including Methanosarcina barkeri (archaeal) and Chlorobium tepidum (bacterial). It is a small protein (about 100 amino acids) with long hydrophobic stretches and is presumed to be a subunit of the enzyme. 23.00 23.00 23.20 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.74 0.72 -4.16 277 1319 2009-11-03 13:52:42 2007-05-04 12:08:56 5 2 1253 0 429 996 1353 54.40 25 56.48 CHANGED uhhht.l..Ghplls.sll..lGs..hl.GhhLDphh...so.t..shhhllhlll.GlsuGhhshh+hhp .............h..hhtl..uhphls.ull.lGs.hlGhhLDchh.......ss....Phhhl.....lhlllGlsuGhhslh+hh........... 0 172 321 377 +9360 PF09528 Ehrlichia_rpt Ehrlichia tandem repeat (Ehrlichia_rpt) TIGRFAMs, Coggill P anon TIGRFAMs Repeat This entry represents 77 residues of an 80 amino acid (240 nucleotide) tandem repeat, found in a variable number of copies in an immunodominant outer membrane protein of Ehrlichia chaffeensis, a tick-borne obligate intracellular pathogen. 20.50 20.50 21.20 20.90 18.00 18.00 hmmbuild -o /dev/null HMM SEED 688 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.09 0.70 -13.79 0.70 -6.39 2 47 2009-01-15 18:05:59 2007-05-04 12:09:46 5 4 7 0 6 47 2 244.40 44 123.71 CHANGED MDIDNsNloTushpspossLh-lIMRILsFGNps..psps.soclh.pp.p...sDsVupPS................LpPhVstS..........cVScsppEcosPEVhhcDLQ.....sltpppStVu-psupsspEcps.Elcucphpsth-sulpcSppc.sEhVSpoSpE..ssE......................pcVSKsppEEosPEVhhcDLQ.....sVtpppStVu-psupsspEcp.....................SEltp+.ucTpKEpuhsEs+tc-.....D-.VppsSSE...stpcVScsppEEssPEVhhcDLQsssstp..........VpcppSEltp+.uETpKEpuhsEs+tc-.p.sspsS.E..suE..pcVScsppEEosPEVhhcDLQ.....sVtpppStVu-psupsspEcpo.Elcucphpsth-uulpcSppc.sEhVSpsSpE..ssE......................pcVScsppEcosPElhsEDL.......ltps.s.VsEK.sEh.t.p.sP.VhtE-.................-KVsETscpE...Elht-sQsVtsupsl.lsPM.sl-shDo.l...........SslhpstMhCPhSc..sGpaVphYthYhYthQsVKDL.Gsh..shssC.CNlslYFhtaN.FTN+Esl......Dll .......................................................................................................................................................................................................................................................................................................................................................................................sptp..........VpcppsEltp+.uETpKEpuhsEs+tc-.p.VspsSsE.alAE..pcVSKl-pEEosPEVll+DLQ.....cVspp-SsVuDpsupsssERpo.Elcucphpsth-uulpcSppc.sEhVSpsSpE..ssE............................................................................................................................................................................................................................................................................................ 0 4 5 5 +9361 PF09529 Intg_mem_TP0381 intg_mem_TP0381; Integral membrane protein (intg_mem_TP0381) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of hydrophobic proteins with seven predicted transmembrane alpha helices. Members are found in Bacillus subtilis (ywaF), TP0381 from Treponema pallidum (TP0381), Streptococcus pyogenes, Rhodococcus erythropolis, etc. 21.80 21.80 22.20 22.30 21.70 21.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.70 0.70 -5.11 43 802 2009-01-15 18:05:59 2007-05-04 12:30:38 5 1 697 0 95 583 737 219.40 29 92.57 CHANGED hhsFphaussHhshlhhhllhslhlhhhhp................phptpppphhpphhshlhhlphluhhhah..hhhshhsltpsLPLahCcluhhhhslhLlscp..phhhphhaahGlsGuhhALlsP-l..........................................sasFPHhpahsFalsHhhllhsslahlhhpph+sshpshhtsllhshhhshhlhhlNhlh..........GuNYhaLsppP.hssohLslh...ssaP...hYllsthslshhl...hhlhh............................hsat..hpc ................h..shphhpss+hslhhhhhlh.sllhlhhhp................thp.h.p.p.p..p.h.hphhh...thl.hs..phlhL..a..sWa....hs...s..............th..s....L.s.p.SLPhahC+lAhh..s.lh...lls..p....p......p+.ht.p.......h....hh.h...h...GhhGulhA...llhPs.h.........................................ssY.sF.P.HlshlsFhhuHhsLlhsuLhhlh...c..p..Y.c.sph.hshpthhlhshslssll.hhVNhlT................GGNYuF..LscsP....ssp..h..Lsh.......................Yllsshllshhl.....hhhh....................................ph.................................................. 0 32 65 82 +9363 PF09531 Ndc1_Nup Nucleoporin protein Ndc1-Nup Wood V, Coggill P anon Wood, V Family Ndc1 is a nucleoporin protein that is a component of the Nuclear Pore Complex, and, in fungi, also of the Spindle Pole Body. It consists of six transmembrane segments, three lumenal loops, both concentrated at the N-terminus and cytoplasmic domains largely at the C-terminus, all of which are well conserved. 19.90 19.90 21.20 20.20 19.70 19.80 hmmbuild -o /dev/null HMM SEED 602 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.89 0.70 -6.10 34 331 2009-01-15 18:05:59 2007-05-04 13:22:05 5 8 241 0 220 326 0 466.90 18 88.01 CHANGED ppllppRhst.ustlslhlshlhhhshhl..........thhohhhshhsh...hh+slhlhluslllhlhRhp.hpl.t.psp..sohhsplhptlhshphl.hhlhashuuh.hhuhhath.hstsh..t..hh.................t.tppshLN-phlalhhhthhluhhh...........oltalhhshsclsF..............s....hpp..tppltppl.phlhpu..lhpohhshhstsllY..................hh..hh.hshh.h.thhhshsps.h.sshh...hshpllhphhlhuhhllhsW..phsNhhFslahops.slctspsloshop-Ppt..oLlsGLp.spc....hs+hhAhpELshluppsssc....Rpslasspptss..ssWstlhcpChplIpphssclsphhpt.t.....ststhss..ppppspp...........................................................t.......t...h....st...pt..h.t......hptshsppshpssssssthst.tp..pttpplhsthpphhp...............................................................hpphLpphhsh.athohp+pspshl......ss.hhspAl.uLotLlhtSlpEDpaGsVpss.lspllphlpchhtslspahph.sssshhst..............tttpp......stlphlhsshpsulhcIshpFstaLp-lhLsscsh+hhp .......................................................................................................................Rh......hh.hhh....h....h.h...............................h.....h..h.h.h.hs.h..hh..hh.ht.hh....h............s...ht...h...h...ph.h.h.hha...h.uh..hhs..hh...h...h..........t............................................hst...lahh...hhhuh.............sh.hhh.p..hthl.h.....................sh.....t.....h.hhtt.l..hh..hpu..hh.shh.hh.h..hha...................h.....h.....h.thhhs..hp.........s.....h....sh.......hh.hphhhhuhh.lhhha..ths...hhapha.hspt.sh.....t....ss..hsps.spt......sLhpsLp..sp............................hhphhA.h.-Lhhlupt..ssp.....RttlFp..p...t.......s..s..tsWstl.ptChphlpthspcl.th.p..........s......t....t....................................................................................................................................................................................s.......................t..t.h.h...tp........................................................................htthh..p........h.ah.sh..php..h.......sp...hh.uhp.uL.stLhhtShpEDpaGhVpps...lstllphhhph.tslcph.....h........t................................................................t.........t.h..hhhtshppulhplh.tFt.hlptl.hs.p..p................................................................................................. 0 65 111 182 +9364 PF09532 FDF DFDF; FDF domain Anantharaman V anon Anantharaman V Domain The FDF domain, so called because of the conserved FDF at its N termini, is an entirely alpha-helical domain with multiple exposed hydrophilic loops [1]. It is found at the C terminus of Scd6p-like SM domains [1][2]. It is also found with other divergent Sm domains and in proteins such as Dcp3p and FLJ21128, where it is found N terminal to the YjeF-N domain, a novel Rossmann fold domain [1]. 26.10 26.10 26.20 26.20 26.00 25.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.56 0.72 -3.49 18 611 2009-01-15 18:05:59 2007-05-04 13:55:02 5 11 255 10 383 561 2 97.50 26 19.68 CHANGED hc.cpDFDFEusNuKFsKpclhcchcpptphtttt.....................ppppssspcpspsssphhs+ppsFFDsISscspccsttss.t..................................apcEcchs..ETFGhst ..........................................ppDFDFEusNApF.sKpc...l..h.......cch.p.pp.p.h......t...t.p.pt..tt..t.........................ptpptt.pp.c.s...h.s.ss......t.h...hs+ppsFFD.s.l.Sscs.p...p..pp...p..s...............................................atppc.phs..-TFG.................................................................................. 0 78 159 280 +9365 PF09533 DUF2380 CHP02269_MYXXA; Predicted lipoprotein of unknown function (DUF2380) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of at least 9 paralogs in Myxococcus xanthus, a member of the Deltaproteobacteria. One appears truncated toward the N-terminus; the others are predicted lipoproteins. The function is unknown. 25.00 25.00 25.70 25.60 23.40 23.40 hmmbuild --amino -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.40 0.71 -4.88 8 28 2009-01-15 18:05:59 2007-05-04 14:10:00 5 2 8 0 27 29 0 164.80 35 60.96 CHANGED uhpsApEtCsssDEspCVSLLCpGDA.CGFYcCEDlsGcVEh.ARFPPA.....RPPsAsAAPGpGPRRsWGsGQpLPRGA..VMVFPsWsGAPpchlsPuhpLsPG.RWEKHHIFPQAcDLAcWFpp..+GVKIHDYThPIPR-lHRRIHuGsspGGAWNcAWR-F+cpp.G.A.SP-EIa+HAGELIaRFELhGGPIpPY......YSR ...........................................t...t.t..Cttspts.ClshhC.tsh.CuhahCEDlss...ts.h..uth.ss.......RPPh.......G.sspRsW.Gtt.tl.tss..lhsF..W..t.t..h.sshphssG.ca.....cKHHIFPQt.cLApWFpp..pG....lcIHcaTlsIPcclHpRIHuGs............s..p..GGs..WNpsWRpFhpts.t..A.o.pplac+AucLIhRFpLh.G.l.sYa................................. 0 11 13 21 +9366 PF09534 Trp_oprn_chp Tryptophan-associated transmembrane protein (Trp_oprn_chp) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are predicted transmembrane proteins with four membrane-spanning helices. Members are found in the Actinobacteria (Mycobacterium, Corynebacterium, Streptomyces), always associated with genes for tryptophan biosynthesis. 24.90 24.90 25.00 25.10 24.80 24.80 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.36 0.71 -4.68 24 358 2009-01-15 18:05:59 2007-05-04 14:15:32 5 2 348 0 99 256 3 178.30 31 85.29 CHANGED tIA.tLlLlluAusLWhAuRhsWVslpSFDsLGs........P+sssLoGAoWooALlPLALLhLAAsVAslAVRGWsLRlLAlLlAsAususuYluloL.WV....ss-sAsRuusL..ApVPlss..lsGopRphhu...........AslullAushsLluAVLLhppAs.ptsAts..uRYssPsARRutApppts.....t............hSERhlWDALDEG+DPT ...........................h....hhhhhuAhhh.hhu.up.sW..h.h.s..s..hs..........shthsloGushosu.L.h.ulALlhlAu.ssAs.hs.V.RshuRRllulLhAlsuhuhshhslsh...hh...ts.s.s.Atpsush................sts.s.sst......hsss.pho.sWs............hlslluuslsl.luulhLhhtus.chsutu......s+Ytt.tARRstthpt...............................................................s.Rsl..WDALDcGcDPT..................... 0 30 74 94 +9367 PF09535 Gmx_para_CXXCG Protein of unknown function (Gmx_para_CXXCG) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry consists of at least 10 paralogous proteins from Myxococcus xanthus and that lack detectable sequence similarity to any other protein family. An imperfectly conserved CXXCG motif, a probable binding site, appears twice in the multiple sequence alignment. 25.00 25.00 29.00 28.40 20.70 18.80 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.59 0.70 -5.36 11 36 2009-01-15 18:05:59 2007-05-04 14:22:52 5 2 9 0 31 35 0 184.80 40 91.83 CHANGED RFahl+cDcssp..aTGsL.sAsH+WuLPGVpsCssCGsshGssGhpYPCVDLSsLP..EppchscP.hPlsaEEFuRLRELVRPhAPPGAhL.PGTphGPLsGsASGpFGsLhhQsshsLhlRREALERLputGlRGLpGCsh-lRFRtKsPPELLELQLE.+GRLHPDClPsDctPPCssCGsp.shphP.-sslLDAsSLPsslDlFRltsasTlIluTERFVEAVc+LcL-GlsFpELssR ...............................p.s.s.....hsGth.pu...WthPGlt.Cs.st.shu..h.tYPsVDLuths..t.t.h.ps...h..shpEatRLtp.lRPhhP.th.l.PGsthGPhhGtupGtFu.h.h.ss..lhlRp-Ahc.Lpt.GlpGL.us.hpl+hRtptsPtlhcLplp.pGplH.cChssth.ssCspCGp..sh........phP........ct.hL-stSLPsshDlFRltsasThlluTERFV-Alc+LtLsGlsFpElss+... 0 12 12 23 +9368 PF09536 DUF2378 Mxa_TIGR02265; Protein of unknown function (DUF2378) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of a set of at least 17 paralogous proteins in Myxococcus xanthus DK 1622 and and 12 in Stigmatella aurantiaca DW4/3-1. Members are about 200 amino acids in length. The function is unknown. 21.20 21.20 23.10 22.50 21.10 20.30 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.81 0.71 -4.75 27 82 2009-09-10 15:59:30 2007-05-04 14:23:17 5 1 10 \N 69 82 2 174.80 24 87.95 CHANGED thhs.sp...sscGLFhpulhstlp....tthtpcl+psuh....-hcts.h.sYPltsahphlhsAAptltPph..s.-sAhRtlGcphspuFh.pohlGRslhslhthhuP+Rhlsplspua+.uussYsEpplp.h...Gspssclhhpcsh..sssappGlLpusLcssGAps.pVpspthsh.hsssYclsWp ........................................hh.tpshcGLahpulhshlp........tthtpclctsGh...-hct..h.sYPhptahphhtssAchlhPth..sh-tAhctlGcphspuah.pohhG+slhslhp.hhuP+RhLpchspuap.sussasctplpth...Gspssclthp...........cshh..sss....att....GllpusLchsGupsspV...p...spthss.t.sspYclpWp....................... 1 20 20 50 +9369 PF09537 DUF2383 CHP2284; Domain of unknown function (DUF2383) TIGRFAMs, Coggill P anon TIGRFAMs Domain Members of this protein family are found mostly in the Proteobacteria, although one member is found in the the marine planctomycete Pirellula sp. strain 1. The function is unknown. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.89 0.72 -3.82 79 432 2012-10-01 21:25:29 2007-05-04 14:23:56 5 2 358 2 189 457 26 109.50 24 67.21 CHANGED pslss..LscLlcsshDuppGacpuu-clc..s.spL+shhpchupp+pptspELpstlpphG..scPcss.uShsGslHRsahsl+utlousc-p.slLpcsEcGEctslcpYccALc.c ...............hhptLNcLlctspDutcuacput-csc...s...spL...+........shhpchsppppptspcLpshlp.phG...ucPpps...uohtGslp+sahsl+s....hhss..p.c.cp...lLppscc.uEctshctaccslp....................................................... 0 49 115 153 +9370 PF09538 FYDLN_acid Protein of unknown function (FYDLN_acid) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are bacterial proteins with a conserved motif [KR]FYDLN, sometimes flanked by a pair of CXXC motifs, followed by a long region of low complexity sequence in which roughly half the residues are Asp and Glu, including multiple runs of five or more acidic residues. The function of members of this family is unknown. 23.80 23.80 23.80 23.90 23.70 23.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.31 0.72 -11.07 0.72 -3.47 63 299 2009-01-15 18:05:59 2007-05-04 14:25:01 5 1 295 0 110 234 357 112.80 36 90.88 CHANGED MsKtEhGTKRlCPssGp+FYDLN+.sPllsPhsGpshshc................ss+s+thtspt-c...................scpscsssss-.ssl--s-s-.p..................p-s..t..--....DL-s.-DDs.......hcD-D- ........MAKsELGTKRlC..PpsGcKFYDLN+.cP.ll.sPasGpshshs................hhcspt..tsst-cst................tpchcss..pps.-hssl--.s.Ds.-sph...................................sDDl.s..D.sDD...sl-ls-...DDDs......hhtp-----p................................... 0 33 70 85 +9371 PF09539 DUF2385 CHP02301; Protein of unknown function (DUF2385) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this uncharacterised protein family are found in a number of alphaproteobacteria, including root nodule bacteria, Brucella suis, Caulobacter crescentus, and Rhodopseudomonas palustris. Conserved residues include two well-separated cysteines, suggesting a disulfide bond. The function is unknown. 25.00 25.00 58.40 58.10 18.80 17.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.20 0.72 -3.61 16 161 2009-01-15 18:05:59 2007-05-04 14:25:50 5 1 159 0 59 119 6 95.00 52 72.36 CHANGED PY-tpLhRLAEILGulHaLRsLCu.tscsspWRscMptLL-uEsss-.pRRpRLhuuFN+GYRuFussYppCTsuAphAhpRYhpEGpsLoc-IsuRY .......PY-schhRLAElLGoLHYLRsLCG..pcGscWRccMpAlIsAEpPs-.tcRt+LluuFN+GYRsFussYspCTPuAhsAlcRYhcEGucLSp-IhuRY.... 0 15 31 40 +9375 PF09543 DUF2379 CHP02267_MYXXA; Protein of unknown function (DUF2379) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of at least 7 paralogs in Myxococcus xanthus and 6 in Stigmatella aurantiaca, both members of the Deltaproteobacteria. The function is unknown. 20.00 20.00 20.20 20.00 19.70 18.30 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.41 0.71 -4.16 16 44 2009-01-15 18:05:59 2007-05-04 15:28:38 5 2 5 0 43 43 1 103.10 42 91.86 CHANGED scphDWsPIRuLu+RVlcpGEPLtLTc-VRALLpRoAcEVuIosu-sppALuosssAtsLLcEhpRRI+-GSpRLhcAlpRhhchp-AGDlDuARppMc-VLAVEVVPhYRchApspLcsls ......s...-Wp.lttLs+RV..ptGtsL.LotchRuLLh+oAtEVulstt-sttALto.tsAhsLLpEhtcRIp-GSpRL.cAlhc.hhchp-uGDh-uARpphc-lLAVEVVPhYRchAptpLcsh....... 1 9 9 30 +9376 PF09544 DUF2381 Mxa_TIGR02268; Protein of unknown function (DUF2381) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of at least 8 paralogs in Myxococcus xanthus, a member of the Deltaproteobacteria. The function is unknown. 20.60 20.60 20.60 20.80 20.20 20.50 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.67 0.70 -5.30 21 91 2009-09-11 05:39:59 2007-05-04 15:29:54 5 3 9 0 71 90 0 237.80 24 91.05 CHANGED sLLhussApApss..sspt.ptRsVslsusssp..ssPEV+VussssTsL.lFsuslpccslsl-..cuRhphlDsG..cpolhLtPsssLttGERhcLsVhFtDGssPspAsFlLVsc.Puc.......sDspl-VpRsttsstuhpsE..s........t...t..phcsptstPpshsL.Ghlsp......pGVssp..plpc..hsssupultstsuh..sYRu.tshshVslplcN.suttPWssp..tApLs.....uts.G.sL+shhV.hppuslsPGpt.scVlV.s-ssshusp....ssFT...LcLhspsG.RslplssVph ...................................t..ss.ut...........Rtl.lssp.st..s...lhlusshsThl.hFssslt.tphph-..csRhphlssu..tptlhltshtslt.sE+h.ltVhatDG..t.P.pssFhLssp.ssc.......s-t.lpV.R.ttsstsh.sc..h...................t.pt..ttststshs...h.uhlsp.......pultht....hpt....htssspshthhthh..sapu.tshshl.hplpN.tsttsWtst..tAplp..............stt.G........hcshhl.h.........ptsslh.PGtt..splhl.s-.hss.htst....t.as...Lclht.tsG.Rsl.l.th................. 1 13 13 56 +9377 PF09545 RE_AccI AccI restriction endonuclease Bateman A anon Bateman A Family This family includes the AccI (recognises and cleaves GT^MKAC) restriction endonuclease. 21.00 21.00 24.60 24.40 18.00 17.80 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.09 0.70 -5.91 2 13 2009-01-15 18:05:59 2007-05-04 15:31:22 5 2 8 0 7 20 3 229.80 29 92.59 CHANGED MsYY-pIREhTK.VPVpLVsFEpPRDhARTPTQASSNFITNKEQGDWAEsLlsRAINEsSpNFVAVKYGKSDNLVAGEsGFDsFYQDFQsELDTIGKRPDLLIFpKsDFDsoLGFDlSQ.PHcpITDYVKKAIAGIEVRSSAFLID+YEEAMQsRTp+FspIAhpT+DKILs-F.DVL-HPuRppYIpLLNolThpTlslhDF+VPuWpSs-RLIpVpNLFKpLKsAIKEIQKRDYLSITPKVEDlKVVYKWIETFNVPHFYFQVFFDKVYGISFEQILpIISsSDNDGVIFSVEpDspNQNKTTIKINSKhGhPIA.KVDEPhHESlRKEMDRGRLLFYVTFKGGTAYLDl-NLRsILslEEu.F ........................h..................................sSpFlhphpQGsWuEphlhpAIN-ss.pahAltYG.Stshsss-.puFt.aatchppth.shsKRPDlLlFp.ss...h.p.....................pl..........hV.cAlhulEscsS.ahht+h............................................................................................h.K.EDht.l.cW.pp.sV.hahhpVFFDhhaslSFpph.pll.....p...........u..h.h-p..ps.....p.Khhhhh..p.uh.lh.h.-....P...shh.E........ppG+ll.hVpF.GG.hhl........................................................ 0 3 4 7 +9378 PF09546 Spore_III_AE spore_III_AE; Stage III sporulation protein AE (spore_III_AE) TIGRFAMs, Coggill P anon TIGRFAMs Family This represents the stage III sporulation protein AE, which is encoded in a spore formation operon spoIIIAABCDEFGH under the control of sigma G. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. 20.40 20.40 20.70 20.70 19.90 19.50 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.98 0.70 -5.48 32 401 2009-01-15 18:05:59 2007-05-04 15:34:36 5 1 393 0 90 304 11 320.30 36 82.95 CHANGED lP..phshpphl.shh+.G-t..shshpchhpullpalF+Elhssh+LLupLllLullsulLpNLQsAFp.ppsloplAahlsYhlLlhlslpSFtlAlshuc-sIssMssFM.uLlPlLlsLlsusGulsouAhFcPlllhslshsuplhpslllPLlhluslLpllsslS-ca+lo+LusLl+phuhhslGlhlTlFlGllolpGlsuussDuVsh+TAKFssssFlPVVG+hhoDAs-sVhGsSLLlKNAlGhhGlllllhIshhPlIKllulshlYKluAAllpPlu-pclsssLsshusolhhlhuslusVulMFFlsIsllluuGNls ...........................................................p.phtshh.phlp.Gct..phoh.pphhtulhpalF+ElhsstKLLupllhLslhuulLpsLQsAFp.ppslSc...lAa.hlsYhlLlhlslsSFhlshshAp-sIpsMssFhhuLlPlL.lsLlAouGGlsSuuhaaPlllhhhshsuhlhptlllPLlhluslLplVssl.S.cpaKlo+LucLLpplshhhlGlhlTlFlGllolQGhss.ussDulsl+TAKFssusFIPVVG+hho-As-TVluuSlLlKNslGllGlllllhIshhPhIKlhsluhlYKhuAAllpPlu..s.stll.pCLsslucSlhhlhuslshV..ulMFFloIsllluuGNl.................................................................... 0 47 75 80 +9379 PF09547 Spore_IV_A spore_IV_A; Stage IV sporulation protein A (spore_IV_A) TIGRFAMs, Coggill P anon TIGRFAMs Family SpoIVA is designated stage IV sporulation protein A. It acts in the mother cell compartment and plays a role in spore coat morphogenesis. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. 25.00 25.00 30.50 30.50 23.00 22.30 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.65 0.70 -5.80 30 421 2012-10-05 12:31:09 2007-05-04 15:44:18 5 2 408 0 87 296 12 475.10 56 99.67 CHANGED MEphsIY+DIAcRTsGDIYlGVVGPVRTGKSTFIKRFMElhVlPNIcstac+-RA+DELPQSuoG+TIMTTEPKFVPNEAVEIsl.s-slch+VRlVDCVGYhVcGAlGa..E--p..PRMVpTPWF-cpIPFpEAAEIGT+KVIs-HSTIGlVVTTDGSITDIsREsYl-AEERVlpELKpIsKPFlllLNSpcPtupEThpLpp-LE-KYsVPVlslsstphpccDIppIhcplLaEFPVpElNIslPcWlEpL-ssHWLKpshhssl+-hspslp+lRDIppslpsls-hEalccsplpplshGsGsApIplssccsLFYclLsEhoGhcIcG-pcLlpllc-LupAKcEYDKltsALp-VKpsGYGlVsPpL-EhpLEEPEllKQGs+aGVKLKAoAPSlHhIRADIpTEloPIlGTEKQuEELVpYLLccFEs-PpcIWpSNlFGKSLc-LV+EGlQNKLa+MPE-uQsKLQ-TLQKIlNEGsGGLICIIl .....MEphcIaKDIAERTsGDIYlGVVGsVRTGKSTFIK+FMELlVlPNI-.N-tc+pRApDELPQSAuG+TIMTTEPKFVPNpAVpIpl......s-ulclplRLVDCVGYhV.GAtGYt..--ssPRMlpTPWa-c.IPFpEAAEIGTRKVIp-HSTIGlVlTTDGoIs-IPRcsYlEAEERVlpELKpluKPFllllNospPhps-..TppL+ppLpEKY-lPVlshsltphcEpDlhslLccsLaEFP........VtElNlslPpWV.hLspsHWL+psa.psV+-slcclp+lRDl.ccs.ltp.hsph..EalccsplstlchGpGsAcIclhs.-pLa.pILpElsGhEIcGcscLlplhp-Lu+AKpEYDpVu-ALc.VKpTGYGlsuPsLs-MsL-EPEII+QGuRFGVKLKAsAPSIHMI+sDlEoEhsPIlGTEKQSEELV+YLhp-FEsDP.pIWpSsIFG+SLpslV+EGlQsKLthMPEsuphKLp-TLp+IINEGsGGLIsIIL.................................. 0 46 72 77 +9380 PF09548 Spore_III_AB spore_III_AB; Stage III sporulation protein AB (spore_III_AB) TIGRFAMs, Coggill P anon TIGRFAMs Family SpoIIIAB represents the stage III sporulation protein AB, which is encoded in a spore formation operon: spoIIIAABCDEFGH that is under sigma G regulation [1]. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. 24.90 24.90 25.60 27.90 23.30 24.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.72 0.71 -4.51 37 385 2009-01-15 18:05:59 2007-05-04 15:44:36 5 2 379 0 87 291 10 165.10 33 98.57 CHANGED KllGulLIlhuoohlGahhupcapcRscpL+pLpsuLphLcsEIhYutTPLs-AhppluppsptslutlFpphuppLppppstostpAWpculpphh.ppssLppp-h-lLhphGpsLGppDhpsQpKplpLshppLcpp.pcAcpttp+ptKha+.LGhlsGlhllIlLh .......KlhGslLllhuoohhGa..thApphpcRscpL+pLptuLptLcsEIhYupTPLsEAhpcluc.p.......h.s.p.PlshlFpphuppLppsc..p..osp-AWpcul...cc.h...pp...suLp.pp-hElLpphGcsLGppDp-sQpKplcLslppLcppppcAcptptchpKMh+sLGlLuGlhlVILLl... 0 45 72 77 +9381 PF09549 RE_Bpu10I Bpu10I restriction endonuclease Bateman A anon Bateman A Family This family includes the Bpu10I (recognises and cleaves CCTNAGC (-5/-2)) restriction endonucleases. 25.00 25.00 56.80 56.60 17.90 17.70 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.18 0.71 -4.75 8 25 2012-10-11 20:44:45 2007-05-04 15:49:05 5 1 23 0 6 28 5 190.50 33 72.57 CHANGED QpcLcsSILEEFh.hLhpphhtt....hssplclG......ppsFsslsFh.sohtshhpuscstl+sKDpDFsIG.ptl.lKlohcschspspps.-hslshVAsECKTNLDKsMLp-ssuTAccLKpssPsuLYallsEaLDhs.ssss.ssTpIDEVaILRKp+Rsssphh...............hhchlshsPlss-VhhcLlccVpphLscsu.cssp...sLpRGal .......Q.cLcsolLEEFh.hLhp.ch.l.t....hssshp..lG......ppsFtsl.Fp.oshtshhpss.shI+pKDpDFslu.pph.h+hohpsp.ssh.pp..phplshlAsECKTNLDKTMhQ-susoApclKtssPsuhYhllsEaLDhT.Phss.phTplD-VhlLRKsKRhssphR...............ahchhsppPlts-lht+llpclpphlssst.ssps...sLpcGah.. 0 2 6 6 +9382 PF09550 DUF2376 CHP2216_phage; Conserved hypothetical phage protein (DUF2376) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a family of proteins found exclusively in phage or in prophage regions of bacterial genomes, including the phage-like Rhodobacter capsulatus gene transfer agent, which packages DNA. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.29 0.72 -3.81 39 165 2009-01-15 18:05:59 2007-05-04 15:50:55 5 2 163 0 50 131 32 41.30 41 61.95 CHANGED LtLpPcpFWsLTPsEL.phh...lGhpu.u.s.uPlsRutL-pLhppaPD .Lth.PttFWthTP+EL.s...hh.........lGhtu..s.s..sshsRspLDuLhttaPD....................... 0 12 27 37 +9383 PF09551 Spore_II_R spore_II_R; Stage II sporulation protein R (spore_II_R) TIGRFAMs, Coggill P anon TIGRFAMs Family SpoIIR is designated stage II sporulation protein R. A comparative genome analysis of all sequenced genomes of Firmicutes shows that the proteins are strictly conserved among the sub-set of endospore-forming species. SpoIIR is a signalling protein that links the activation of sigma E to the transcriptional activity of sigma F during sporulation. 25.00 25.00 25.90 25.80 21.70 21.20 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.67 0.71 -4.12 40 391 2009-01-15 18:05:59 2007-05-04 15:52:32 5 3 385 0 90 332 8 130.70 44 55.25 CHANGED lscclIRhHVLANSDSspDQpLKh+VRDpVlchlpstlp..sspsh-Eu+plIpspls-IcclAcphlpcpGhsYsVpsphup.hsFPsKtYGslshPAGpY-Al+IhIGcGcGpNWWCVLFPPLCFlDhoputs ..................lsccslRh+lLANSDSccDQsLKh+VRDtVhttlsshl...s.......sh...p....S.h-Eu+cllpscls-IcchuppslccpGh.s..ssplphuc.spFPTKsY..........G........shlaPAGcYEAlhIsIGcG..cGpNWWCVLFPPLCFlDhopu.s............... 0 47 75 80 +9384 PF09552 RE_BstXI BstXI restriction endonuclease Coggill P anon Bateman A Family This family includes the BstXI (recognises and cleaves CCANNNNN^NTGG) restriction endonuclease. 25.00 25.00 114.10 98.80 18.00 17.90 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.85 0.70 -5.37 3 10 2009-01-15 18:05:59 2007-05-04 15:59:13 5 1 10 0 2 10 4 279.30 35 80.40 CHANGED RKIYKTGQT..RGADcssIYQNRVSRNSTVLIPFEhl.pcspEsVstucY-NGYIVLIsPDaYF-cA+s-Kspshp.sstlsLGVNAllaYpQRuQa-cYsP.LsDhpspGL+pTpPhs..RouDl..GGcYVuRlSGTTuE..KcuKIphGaN.Tsu+GAGIRlaEYAssET..LEKARLQLEuhYWLsc-SL-uAIpaGMSscDAccR+c.oaNEAcpQGL........LppIcLlssRIIDccslTlCPLCLp+I............SASGFas+sEQsEhRcsaDLToTEINLFHIsELRYGALpHKPYNLGWGHH .....................pKl.KTG.T..RGAspstIYQNRVsRNssVLIPachh...sp...tssst.sp.Y-sGaIVLlsPc.YFssstss+hhhhp....tlpLGlNAllaYppRspWssasP..ssh.tspGLshsssps...Rpssl..uGpYVARIsuTT..uc............ctc+I.hGas.....t...p...sh+GAGIRlaEYAopEs..Ip+sRLQLEAhaWLscDSlEshhthGMs.pDshpR+c.sLscs.pcpGL..............................L-hpcLh-hRIls.ccshTICPLCLccI............SApsFas+hEQsEGRcsaDLTsTElNLFHIcELRaGphsHKPYNLGWGHH. 0 0 1 2 +9385 PF09553 RE_Eco47II Eco47II restriction endonuclease Coggill P anon Bateman A Family This family includes the Eco47II (which recognises GGNCC, but the cleavage site unknown) restriction endonuclease. 25.00 25.00 27.00 27.00 20.90 19.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.28 0.70 -5.21 6 64 2012-10-11 20:44:45 2007-05-04 16:05:53 5 2 59 0 13 61 7 190.80 44 79.26 CHANGED LoFIS-EDF.NcVpsTIscYpcpLcuh-hK+FsKNlIDPhKhIFDtslap.SacEhlssElhRQp-Ko.sNcIGhFHQ+IhtaIcsh+s.Ps....GaDV.htNs-..........pplasEhKNKHNThsuussuchahKhps.lhsctpp.cDssCahVElIs++SpNhpW.hpsssp+.........upcpIRhlShDpFYpLVTGppDAFpplshsLPhsI-KhlsE ..........................................LsFIoccDh.pcVptTl.pp.Ytct.Lpuh-lK+FN+NlIDPIKLlFDpslaptoaEcllpsElhRQpDKoNsNsIGYFHQpIFpYl.cs.hcVPps....GaD..V.hps.sp............pplalEhKNKHNTMNSuSuupsahKhQsplLpc....ccssCaLVEsIAK+SQNlpW.hplcspch........upphIRRlShDpFYtlVTGpcDAFhphChsLPpllpchlp.................... 0 7 9 12 +9386 PF09554 RE_HaeII HaeII restriction endonuclease Coggill P anon Bateman A Family This family includes the HaeII (recognises and cleaves RGCGC^Y) restriction endonuclease. 25.00 25.00 106.70 106.50 19.70 19.20 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -12.11 0.70 -5.44 4 29 2012-10-11 20:44:45 2007-05-04 16:07:30 5 2 29 0 2 17 2 315.50 69 92.61 CHANGED AK-ALDsIIKKuRVHLYKPIQIAEILYpcRs.csL..sL.NL-TYRspSK+WRDlIChRFLGRlSTSSAKaQDNLFEcNAhPPchLslLGp.N+pssGhVEuYIY+pFh-RasQMosuLsYshsosh-NF+LoEFLs.FW.EPGLKRSIDKIYEIVVYALFcsLlppLsVpVcIphshsNlDLLcEFpDFocKllolsupNsphpLsAKhaRVGVTNAADRGLDMWANFG.AlQIKHLSLsE-LAEsIVSSloADRIVIVCKcuEEclIlSLLNQIGW+SRIQSIITps-LIsWY-KALRGpas.llGp+llEplppEIphEFPus.-sNDF.sFhcpRtY .....AKEALDsIIKKSRVHLYKPIQIAEILYHDRshKpL..-hLNL-TYRNpSK+WRDpICpRFLGRlSTSSAKFQDNLFEcNAhPPE+LuVLGshNRposGGVESYIYKpFFsRFSQMSpALAY..VG..soD+.SFQLSEFLNLFWLEPGLKRSIDKIYEIVVYALF-uLVoELGlTVoIDaPcENLhLhcEapDFu-KIIoh.PcNp+LcLsAKIHRVGVTNAADRGLDMWSNFGhAIQVKHLSLDEELAEsIVSSISADRIVIVCKcAEpSVIVSLLTQIGWKSRIQNIVTEDDLIsWYEKALRGpYs..IAEsLLEsI+sEIhcEFPAV.EANEFl-FtQsRGY...................... 1 0 1 1 +9388 PF09556 RE_HaeIII HaeIII restriction endonuclease Coggill P anon Bateman A Family This family includes the HaeIII (recognises and cleaves GG^CC) restriction endonuclease. 25.00 25.00 25.40 25.30 21.30 21.20 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.89 0.70 -5.54 4 54 2009-01-15 18:05:59 2007-05-04 16:09:16 5 3 51 0 12 46 1 272.30 48 88.81 CHANGED G+AaEYAhlpuLtptLsssQcVlIEpNSuhplshcpYcshocphpp+hshuAcAulplILcLEP.Los.lsNss..LhLuIQcDs+Gp.GDVRDILhhRcp.pWEIGLShKHNHsAVKHSRLSRsIDFGEpWFGlPsSQsYaDpIpPLF-cLEphKccG.LWRslsNKE-cIYsPLLcAFIpElc+lspNppulIPpRhlpYLLGp.DFYKlIohDp++lTplQAFNhhGTLNRsSs+c+Phl.lP.h.hPTRhhcIsFKPsS+NTlElhLDpGWohSLRIHNASocVEPSLKFDl+LlGVP ......GRAYEaAhh.sL.pplsh..h.c.pl.ltcpsua.ss.cAapsLpcphpphahtSAhtul.hlhchEPhlp-..sssp..lpLplQpDphGchGDlRDILIhh...c...p..WpIGLSlKHNH.AVKHSRLS+cLDFGc+WhGlssSQsYaDsIcPlFppL-stKccs......hhW+-lsN....KEp-lYhPLLpAFhcElhRhtpp...pps.......lPp+hVEYLLGcaDFYKsI.l-pcphTplpuaNhpsTLN+.ShcpKsphhl.PlspLPTRhlthcFK.....P..........p.....StNTlElhLDpGWpFShRIHNASo+VEPSLKFDIplluhP....... 0 5 7 9 +9389 PF09557 DUF2382 CHP2271_C; Domain of unknown function (DUF2382) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry describes an uncharacterized domain, sometimes found in association with a PRC-barrel domain Pfam:PF05239 which is also found in rRNA processing protein RimM and in a photosynthetic reaction centre complex protein). This domain is found in proteins from Bacillus subtilis, Deinococcus radiodurans, Nostoc sp. PCC 7120, Myxococcus xanthus, and several other species. The function is not known. 18.60 18.60 18.90 18.80 17.90 17.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.59 0.71 -4.17 46 499 2009-01-15 18:05:59 2007-05-04 16:29:31 5 9 317 0 179 399 8 106.10 31 43.13 CHANGED lpLhEE+LpVsKc+hpsGcVclpKcVhp-ppslsVPVc+EcVhlERpslscsspsss.t..phpp.-shc..lslpEEcsslpKcsVlpEEVplpKcssp-scplp-slR+Ecl-l ........................h.lpEE+LpVsKc+lpsGc.Vcl+KcVlp-ppslpVPVc+EElhlE.Rp.slsctsssss..............................t.............h......p-..psh.c.........lslpEEpssVsKcsVshEcVpltKcthp-scplstplp+Epl-l................................... 1 67 126 161 +9390 PF09558 DUF2375 CHP02922; Protein of unknown function (DUF2375) TIGRFAMs, Coggilll P anon TIGRFAMs Family Two members of this family are found in Colwellia psychrerythraea (strain 34H / ATCC BAA-681) and one each in various other species of Colwellia and Shewanella. One member from C. psychrerythraea is of special interest because it is preceded by the same cis-regulatory site as a number of genes that have the PEP-CTERM domain described by PEP_anchor (IPR013424). 21.80 21.80 22.10 65.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.40 0.72 -4.13 6 23 2009-01-15 18:05:59 2007-05-04 16:31:02 5 1 22 0 10 13 4 70.60 75 92.22 CHANGED MpsspssVTVLYY.-APsGLlMHNtVlsuLslScsGRVMIPppFR+GKSIIAVLEGECcILNSLGERVauQt ..MQAsQATVTVLYY.DAPVGLIMHNuVLssLPVSEuGRVMIPASFRKGKSIIAVLEGECKILNSLGERVFAQ.A 0 3 5 7 +9391 PF09559 Cas6 Cas6 Crispr TIGRFAMs, Coggill P anon TIGRFAMs Family The Cas6 Crispr family of proteins averaging 140 residues are characterised by having a GhGxxxxxGhG motif, where h indicates a hydrophobic residue, at the C-terminus [1]. The CRISPR-Cas system is possibly a mechanism of defence against invading pathogens and plasmids that functions analogously to the RNA interference (RNAi) systems in eukaryotes [2]. 22.80 22.80 27.20 121.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -10.98 0.71 -4.58 10 43 2012-10-01 21:23:39 2007-05-04 16:34:45 5 1 41 0 20 51 26 199.20 32 90.31 CHANGED lDLsFslpGcsLPhDHuYhLhSALschlPtL+-hsshGIpsl+GsssssG.......llhLo+co+LhlRlPtsplstlhsLsGpsLcluGcplc.LGss+l+tLpPsssLauRhVl..h+sth-E-sFLpuspRpLcshslps+thls....G+..RcTl+hsptsllGauLhlcsLusE-SL+LQpcGLGu+RphGCGLFlPcKsls ...lDlhFslpGpsLPhDHuYsLhsAlpc......hl....PhLp-...p..sslu....lpsIpGssspsG..........hlhLocRo+LplRlPtcpls.t.l.h.s.LsGpsLclusatlp.lGtscl+sLpshssLhuRhVs......hcstp.-tpsFLcsst+pLpplslpsphhls.........G+..........c+slpht....sts....lhGauLhlssLst--SlcLQppGLGu+R+hGCGlFlPpKp..t.... 0 10 17 19 +9392 PF09560 Spore_YunB Spo_YunB; Spo_yunB; Sporulation protein YunB (Spo_YunB) TIGRFAMs, Coggill P anon TIGRFAMs Family Spo_YunB is the sporulation protein YunB. In Bacillus subtilis its expression is controlled by sigmaE.The gene YunB seems to code for a protein involved, at least indirectly, in the pathway leading to the activation of sigmaK. Inactivation of YunB delays sigmaK activation and results in reduced sporulation efficiency. 25.00 25.00 51.20 50.60 23.30 17.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.99 0.72 -4.27 31 313 2009-01-15 18:05:59 2007-05-04 16:36:26 5 1 305 0 78 259 3 94.00 38 40.71 CHANGED shthplPLGplhssslLushGP+IsV+hpslGsVpssh.pscFcsuGINQT+HpIhlplpspl+lllPhsocshpVpsplPlu-slIlGcVPphY ...s.thslPlGplosNsLLushGPcIPlchpsIGpVso.-l.cpchcstGINpTth..pIhlclcsplpVlIPFtoc.chpVppplPluppll.G-VPshY.. 0 38 63 68 +9393 PF09561 RE_HpaII HpaII restriction endonuclease Coggill P anon Bateman A Family This family includes the HpaII (recognises and cleaves C^CGG) restriction endonuclease. 25.00 25.00 32.60 32.50 19.20 19.10 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.13 0.70 -5.25 9 103 2012-10-11 20:44:45 2007-05-04 16:40:46 5 3 98 0 15 93 13 324.40 40 95.57 CHANGED NKGEWSElYshF+LLuDGplahGssplcKhEslhaPIhpIhREE.pstphpY.lcsptlhhl...........sssccchplPhcsFtppApL..lLstlKssp.pcsFshsslEpFhsulshhcLc.ApSosKoDIslslac.cs..pPhhGFuIKSpLGu.sTLLNuG+sTNFhacl...pshphsssplppINulsp.toclt-Rhh.I.chGGhLcYhcltscsFpsNLhhIDsphPclLuchlhhaYtsploclpDLTcplpchNPLphc.p..ppHsFYEaKhKpFLsslALGMpPuKlWNGp.sAsuGallVcpsG-VLCYHIhs+ppFc-YLapNT+LEpuSoSRacFGplhp-NGchYFKLNLQIRF ............sKtEWuElYshh+LLuDGplhhGsschpp..shhaPlhhl.RcE.ccGp.ppYhl.ccpt..lh.l..............psppt.tplPtp-FttsA-h..lLptl+ssp..tppshs.s.c.ulEpFL-ph.sha.c........Lc.A+ocD+oDhplshac.cs.....PhhGFsl+ScLGshssLLsuG+s.sNhhhc.....ts.hpF....s....ssslsclNul.........sp....sclt-RhhhIc.chGGhLcYtclssclFcsNLhhIDhphP+lLuEhlh..hhal-..slocls..-LsEh..lpphNPLKlccp.......pcHsFYEaKhKpFLhAlALGM+PuKlasGps.sAstGhllVcssG-V.LCY.H.h.c+phFcDaLahNo+hEpuSsp..+c+aGhl.+.E..NGsaYFKLNlpIth.......................... 0 7 12 13 +9394 PF09562 RE_LlaMI LlaMI restriction endonuclease Coggill P anon Bateman A Family This family includes the LlaMI (recognises and cleaves CC^NGG) restriction endonuclease. 20.90 20.90 22.80 22.70 19.30 18.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.75 0.70 -5.23 2 7 2012-10-11 20:44:45 2007-05-04 16:44:01 5 1 5 0 0 8 11 262.00 54 98.34 CHANGED MsssKE+lhElFhpNVhGh.PsIpGhsh+HsGthGHWLEc+hGhossAsNcADhhGYEhKN.hTSsKTTaGDWSANEYIFcp...Ns.Fphs.....pstFh+hFGKPNpAKpsRhSWSGpPlPc...pYs.FGQIMsI-EsLsIsIhYSFppD.R.NKF-lhP.phppspl.lA+WYGhtps..S++.psLcsKlpcKFNphGWFpChhcs.ssYscIsFG+PIsFE.WhNhV-pG.laFDSGMYpGNpRsYSQWRu.NSaWppLIp-pap .........KppII-lF+pNVhG+p...Pchs.uhN.RHDG+c..GHWLEcphGIuANAsNEADlaGYEhKN.oTuuKTTFGDWSAN.YIFcs...ps.aphs.....pspFhchFGKPNttKssRaSWSGSPlPch.sp.YshhGQhMsI--uhsIlIhYSaSpD.R.sKhsIlPspLQp-tl.lARWh...p.....p.csLcsKLpcKFNc+GWFpCppss.GsYscIsFGcPIsF-sWlpLV-pGlVaFDSGMYpGNsRsYSQWRAsNsaW-SLIs-sY.......................................... 0 0 0 0 +9395 PF09563 RE_LlaJI LlaJI restriction endonuclease Coggill P anon Bateman A Family This family includes the LlaJI (recognises GACGC) restriction endonucleases. 23.60 23.60 23.80 23.90 23.10 23.50 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.20 0.70 -5.77 12 86 2012-10-11 20:44:45 2007-05-04 16:44:32 5 1 83 0 15 98 4 346.80 21 74.68 CHANGED FVGl.hsp....sscllhsFPpththspsp.slc.tD.ttLhs....lLp+Yscppsp.....hph.stpsppppshslsshhhllcDahppG.hYpcscphhphNtpGcIsWs+TIpcspPllp....pssshYhphhsc+phps-pshlopIHcasVpcshpph..Galhsh.t.sh.phsht.h.....spshhlphLppclssTFsDcchhLhcuhhshlpppcp..scpp...haGTppFphlWEchhcplF..............sltphp....................shhs+PpWp....hsptp.spshpPDhlhhh....ccp..lhILDAKYYphthp....hcuhPsssDIsKQlhYtthlpphh........ptsclh...NhFlhPhsppsp.ht..sssphphls...hp...t.csht.l..shhlssc ......................................................................................................aVGl.h........tpphhhhhPhhh...pp....thp.....h.h.llp......sl.cap..ppppp..........h..ttstt.t.p.st.slsshh.llc.ah.p.pG..hYhpppphhppstpG+IsWs+TIpcspsllp....pssslYhchhs++ph.spschlptlathslppshpph..Galhsh..t..t..phsp.........spsh.hlph.LcpchsptFsDcchtLhp.shhs.alpppsp.hppsp....hhGTpsFphlWEchlcchh...................s.t.spp..............................phh.s+spWp........ttsp.p.p..tp.....phl.pPDhlhht.......pcp....h.a.IhDAKYYphsh...........p........hps.hPsssslhKQhsYsp..shtph................p.tplh...NsFlhPhpt..tp..........h............................................................................................... 0 7 10 14 +9396 PF09564 RE_NgoBV NgoBV restriction endonuclease Coggill P anon Bateman A Family This family includes the NgoBV (recognises GGNNCC but cleavage site is unknown) restriction endonuclease. 25.00 25.00 32.70 32.70 18.10 17.40 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.57 0.70 -5.13 2 50 2012-10-11 20:44:45 2007-05-04 16:47:48 5 1 44 0 4 35 4 197.50 69 96.74 CHANGED MIKLTAQQIFDKLLDEEKILSANGQIRFFLGDVDIIVKQKDVVGNIIQEWLGGWLRKREIEFDVSTNTQMPPDFFLNKKDRSRELLEVKAFNRNAsPGFDIADFKMYSDE.hhpP.h.sschh.hGYDMDDNGNVTIKDLWLKKVWQITRSMDGWAINhpsKK.........sW............Clcs..............................................A+ha .............KLTAQQIFDKLLsE-KILsspGQI+FaLGDVsIIVKQKDVVGNIIQEWLGGWhcKRpIEF-susNTQMPPDFFL.NKKDRSRELLEVKAFNRNAsPGFDIADFKMYSDEI....IHKPYMLDVDYLIFGYD.MDD.N..G..NVTIKDLWLKKVWQITRSMDGWAINLQVKKGVVHKIRPGV..WYSIN..KKNMPMFECLEDFVSAIEETVYQNPATRHNASLWK+KFEEAYKKHYNRSISIPRWHEIAHKY......................... 0 3 3 3 +9397 PF09565 RE_NgoFVII NgoFVII restriction endonuclease Coggill P anon Bateman A Domain This family includes the NgoFVII (recognises GCSGC but cleavage site unknown) restriction endonuclease. 20.80 20.80 20.80 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.00 0.70 -5.47 7 72 2012-10-02 13:01:53 2007-05-04 16:59:06 5 2 66 0 17 148 64 252.10 24 71.27 CHANGED at..hppuDcL.lhsGYlo.shulpclcpls-.php.hpIsLlsGMash-Ghsts.asshhcLpthhpcpshGsl...alhtshchHuKlYlahKc.tpsh.uhlGSANhss...suhhpcphE..sssspDs.s.spchht.lpss.l.php.sltsh.ph..........phlh-tss.hpshtshptlsppslphh.ppt....F.l.h......csst.............pSsLNh....u.sR....Gph..psRsh.Es.IplstcIsRp.........ssa..P......ppp.FpllTDDGahh.s+hsup.......ssKplsuhts............pplLG+al+sRL.hspGsl...pp..........lT+E.L .......................................................................................................................................................................h.hhsGh.s...............................h.t.......pl..plhhGMh...Ghs.......hpth..p...l.....hptpth..tth...al..........hHuKhY.a..pp....t..h..t.uhlGSANhs.........s..p...pt.hE....hhh.h.s....t..........h....lpp...h.ph.t...l...p....................hh..p..tp....hpth.....thtt.l........p.l.t.......t.p.....h.....ls.h........cs..t..............................................................+SsLNh..h.u.tu.R.....G.h..hsRsh.Es.lhlsppIppp......................tha...P..................tpc.s...FsllTDDG...h.hps.+sstp..........sKshpopss.............phLGcW.l.+s+L...tts.h................................................................................................................................................. 0 7 12 14 +9398 PF09566 RE_SacI SacI restriction endonuclease Coggill P anon Bateman A Family This family includes the SacI (recognises and cleaves GAGCT^C) restriction endonuclease. 19.30 19.30 20.50 123.60 18.10 17.80 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.29 0.70 -5.51 3 10 2012-10-11 20:44:45 2007-05-04 17:00:02 5 1 10 0 4 12 0 348.70 27 95.43 CHANGED MuITINHSsAppVL+cAaEcAup.oDDchpsQW.....lILuT+LpElss.R...TYpAALlTALLAKAsDsRVDPhuIpE+sssDsAYSARSLCHuVlVs.pVEtuFLEGsLGAsR.EPlNNpPFhRYspaSuI.pVcNK.uRcYLD+VLsALScIDpEchATTp..uaRALVAuLthTlsRTN+......................ssKEssAlGuAIVptSLluEpcuFVlpuH-VsR+hQAuuAuhLshsa.KE.Ilst+lNDPsRsFPhDIsVY...cDGssaLoIEVKDKslsapDLppuVSKAotuGIp+VlaLssA+A.TslsL....DcohAlERstsCtVpVsFS.VpoFs+sCFAlSPllusStl.hAF.cAIscpLIEIcV+-ssID ....tlsIs+.cpAccVLpcAacsAsppsD.shsspa......llpsspL..hss.+...Ta+YILlTALLAKATsscINPLsLQppusl-GAYDARSLCHKVlVP..FE+saLpsuLGuSN.EPFLNKP.ARascl..Sp....cNA.VRRGpD.c.lL.ptLsthc..hpssss..AFcuLssALhhtlppspc...............................tpphhhs.s.hIhphpLh.phppsal...lcspcs...GpphpAt..suusLth....ha...pE.....VtschVNp..s...Spc.suDIDVY........................c-cpllhoIElKDKsaotpDVpHAlcKsusuGhp+shFls.GP+A.splch....-cops..hppApppslhlhas.hppFs+.hhsh.sh.stpth.hsh...lhpphh-hp.pp.............................................. 0 2 3 4 +9399 PF09567 RE_MamI MamI restriction endonuclease Coggill P anon Bateman A Family This family includes the MamI (recognises and cleaves GATNN^NNATC) restriction endonuclease. 25.00 25.00 122.60 122.10 18.80 18.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.03 0.70 -5.36 2 5 2009-01-15 18:05:59 2007-05-04 17:06:57 5 1 5 0 0 5 3 274.20 30 84.58 CHANGED LGSLp.ppc..LlcDLaVDLht+.pV....WAAlTtQoAQschGYIuQalsSlVhG.PG..pGhRGK.....sD-hA-................GStVp.........uuANIptsDcs+W...lG..cDsEa..........tEhLt.PhhaYLll.c.pslpcPssIRhphWplDu.psG.hhsLh-ha.....sS+pGtsaN.......FpLh.Pl.............L.apshll.Dsplphtshst.p.s.HhPLs....PlT..uRopsLphGthtthssRL.....hNsuchVLhps..........D.sslhsulhtPhsshDhAshS.hto.-A.s-thS .........h.toLEsS-p..LIK....DLYVDL+++lss....WStITsQTAQA+hGYIGQHLASlVTGhPGstSGARGcDLs..ssDhuEIKoC.RVDQLspCpsCGosVpthEppCPsCuSTNIcRcDDSKW..LlulRsDsEa..........-ElLs..Pc+aYLlLFDapDl..pDs..csIRIpsWcVDupEc.GhsaC.Ll.DYYh.NI...Kh.uS++G...APaNLaPapacFtLp+Pl.............LIFcusIl.sDs..pIpht....lhPs.p.sh+hPLs....Pls..uRspslphsthtth.sRL..............................................................pt..hh................... 0 0 0 0 +9400 PF09568 RE_MjaI MjaI restriction endonuclease Coggill P anon Bateman A Family This family includes the MjaI (recognises CTAG but cleavage site unknown) restriction endonuclease. 21.60 21.60 26.80 36.70 21.50 21.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.90 0.71 -4.72 7 42 2012-10-11 20:44:45 2007-05-04 17:08:48 5 1 37 0 16 44 4 169.30 31 84.26 CHANGED s.llNhAsphhptshP+lVGthSELI+Ehp...s+ohc-W+EaYhp+..hp-pIsctscKlhhhlpphhpu.lpplccEslcpalc-LVIs+Ta-Ghhhp.csILpplAcc...hssphc.AssE.-cspslDGaI....tpIPlpIKPhoa.sppsplsE.hphphIhYcppcshhhl.hscsh ....llNhAsphhphs+PcsVGthSElI+Ehp..........s+ol......cEWcpaYhp+...............hsEslcchscKlashlpchh.....pu.lpploc-....DshpalcsLVIs+TasGhhhp.psIhppltpc...........hshpa..c.AssE.-cshsID..h.aI.....tthslQIKPhTY.......t.hsE.hph..h..ppppp.....h........................... 1 7 12 15 +9401 PF09569 RE_ScaI ScaI restriction endonuclease Coggill P anon Bateman A Family This family includes the ScaI (recognises and cleaves AGT^ACT) restriction endonuclease. 25.00 25.00 40.70 40.50 22.20 18.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.31 0.71 -4.81 4 14 2012-10-11 20:44:45 2007-05-04 17:09:11 5 2 14 0 2 13 2 172.00 47 93.08 CHANGED SPYtuhsEEcW.plTcpLIp-aPLSt-hIlphVLuSWEsIFoS+hGs.thpIGpslFPsPQhhGhlLcsLIshcltst.ss.Wpu-psp.-KDlVhhhs-hYSIElKTSSsccpIaGNRSaG..pspcuh..KSKsGYYLsINFEKap-s.sLcP+IphIRFGWLDaoDWluQputTGQQASlssphtpsKLlsla ..SPYtslspc.WhuhTccLIcpHPLStcpIVplsLcSW-sIFoSplGs..hpIG+shFPsPQIhGhlLHtLIstplpsca.s.Wpu-csth-KDlVhIhschYSIElKTSScsc.IaGNRSYu..p.spsuh..KSKsGYYLsINFEKF..s..phcPcIphIRFGWLDaoDWIuQpAuTGQQARLsscs.psKLhhlY..... 0 0 2 2 +9402 PF09570 RE_SinI SinI restriction endonuclease Coggill P anon Bateman A Family This family includes the SinI (recognises and cleaves G^GWCC) restriction endonuclease. 20.10 20.10 20.70 99.20 19.50 17.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.36 0.70 -5.36 3 15 2012-10-11 20:44:45 2007-05-04 17:11:24 5 1 15 0 6 17 15 215.10 37 92.89 CHANGED FlQNAAElAKpsMDsl-PSLSEKFTlVI+FLSDNP-usSshRGKc.RssVGs-EaIphLAQNFp-G.RcP+pPsPPoTIPDElVSVVLNVuF-lPpEpLN+IKEpHRLSMuAENIVGDLLERYLAEVLEPsGWIWCSGohVKAVDFI+pDsE.ssWtsLQVKNRDNTENSSSSAIRcGTPIKKWFRTFSK+cuTNWDNFPsphuucsLNEcGF+sFVEpYLccl ..........................................................................................................s..p.pL..tF..lhpaLsp.Pp.hS.hRuKp....p.pVhp..cEhlphhAppYacu.RpschPt..P...pTlPDEhVShlhp.hsashoppplppI+hpHphSMuAENhVGsLLERYLssVLcspGWhWCsGshVKAlDFlphssc.s.WhhLQlKNRDNTENSSSSAIRsGTsI+KWaRohS+ssp......T.......NWsslPp.hp....uhsLsEpsFhtFVcpYl.t.p............................................ 0 2 5 5 +9403 PF09571 RE_XcyI XcyI restriction endonuclease Coggill P anon Bateman A Family This family includes the XcyI (recognises and cleaves C^CCGGG) restriction endonucleases. 25.00 25.00 46.20 45.70 18.80 18.20 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.90 0.70 -5.57 3 13 2012-10-11 20:44:46 2007-05-04 17:12:37 5 1 13 0 4 13 0 292.30 44 93.16 CHANGED PsPcsQI-FutuL-cLRplaLQpALLcTV+chDIucLDcELuKYVPsuDLQpLApYGLRAEllFsVPsVLEANP+LIGYYRLLLGYSQKcFYstD+GLGhGsFKSMEcKGKIuKAtpPcIcDLClAFsASAStLLsGlGhlRISRELLDDLTLLTLGPQLRGGRNNslGhAGh+hVFEIIREIVAPAIsESc-ouIVLoNAAGRsVTIEFuuDPDIIIREKlcsp+aKNslAIEVKuGTDlSNIHNRLGEAEKSHQKARp+GFTECWTlVNsuNIDLVKARsESPTTD+FYpLohLsDKussEYADFRcRIlALsGIP ..................................P.PchQIsFh.tLcchRthhLQpALh-TVcchDIspLDcpLtcYVssucLtpLApaGLRuEllFsVPsVLcsNP+LlGYYRLLhGaSQKcFYsts+GhshGhFKSMEpKGphspsttsclpDLChshstsuStLLsuls...plSpcLLDDLTLLTlGPQLRGGtNNp+GssuhhhVFEIIR-IVup.u..h.sEs...cp.stIplssAsGRplhIEFAsDPDIlIREchp.ppa+NllAIEVKuGTDsSNIHNRlGEAEKSHQKA+tpsaTECWTllsVtplDh.hAppESPoTsRFYplotLs.psssEYtDFRcpllulsuIs............................. 0 4 4 4 +9404 PF09572 RE_XamI XamI restriction endonuclease Coggill P anon Bateman A Family This family includes the XamI (recognises GTCGAC but cleavage site unknown) restriction endonuclease. 25.00 25.00 43.30 43.10 21.30 21.00 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.69 0.70 -4.93 5 29 2012-10-11 20:44:46 2007-05-04 17:13:06 5 1 28 0 9 32 4 214.40 35 79.36 CHANGED TuNLT..uI...TP-lL.csaPpsLs.sLRMoTuPPIAcDRLlGLAchS.sLVcSlE.cs+LPs+Mcut.LcpcLcKlssVIp+hlDPDlFsWhscGcsPTccERchAATIVADRLCGAlAsPIlRNAQERR......QLAsIKSWLcARGYTQlssuAu..lNSM..sPGTFoFRtNlsV....Gs+..VNIPVDAVVps+Du+ptthPhhIEAKSoGDFTNTNKRRKEEAsKhuQLplp.YGpplsh.sLFLCGYFcoGYLGYuAAEGLDWVW ...................hp..tl...pst.l..t.s.hls.tLRh.suPPIucDcL.slus.....lpshp....thL..p.ps......Lp+lhsllp+hlD.chFsWhttsttPospphc.AAhlsustLhuA.......R.ApE+R......Q.shlcpaLcshGasch.tsu.....hpsh..................PsshpFptps.l.......Gpc......sDlVlt.tcs+.....hhlEsKsusshTNosKRhpp-.Ashhsphhhp.aGpt.hh.shhLsGhFcst.Lt.ttupGlshhW................ 0 0 2 5 +9405 PF09573 RE_TaqI TaqI restriction endonuclease Coggill P anon Bateman A Family This family includes the TaqI (recognises and cleaves T^CGA) restriction endonuclease. 25.00 25.00 44.80 44.70 19.10 17.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.78 0.70 -5.25 4 13 2012-10-11 20:44:46 2007-05-04 17:14:29 5 1 10 0 3 15 0 199.50 46 95.09 CHANGED opAQcALEAFEcFLcuLDLESYppKYRPIKTVEQDLPRELNPLPDLYEHYWKPssssPpFPsFEEFF-pWW-KRLR..PLDEFIRKYFWGCSYpFVRLGLEARLYRTAlSIWTQFHFCYRWpASCpLpLpAu.ELDuQGIDALIp.s.p-p.lGIQIKKETYRSEARutNRFLRKpp.soALlElPYTLQo.EELpcKApRARocp..EsYpLWsKVApHL-+LPNGFVIFRESYVKclEsFLpcNAsTLoGLI ..............pcsLctFEcFLtuLDL-pYpp+hRsIKTVEQDLP+EL.PLsslYcaYWcs...p.pF.sFE-aF-paWc+......+L+......P.LspFI+KYFaG..CShtFV+hGhcARLYRThlSIhTQFHFCYhWps.C.....p.....L.Lpus.ELDtpGIDAhlp.h.pt..lGIQIKK.oYRpEA+stsRFh++pp..tsuLlElPYsl.s.EELpcKhtpsRscp...csYp..hchh.p+.h.+L.NGFVlFpEsYl+.l.............................. 0 3 3 3 +9406 PF09574 DUF2374 Short_TIGR02808; Protein of unknown function (Duf2374) TIGRFAMs, Coggill P anon TIGRFAMs Family This very small protein (about 46 amino acids) consists largely of a single predicted membrane-spanning region. It is found in Photobacterium profundum SS9 and in three species of Vibrio, always near periplasmic nitrate reductase genes, but far from the periplasmic nitrate reductase genes in Aeromonas hydrophila ATCC 7966. 21.40 21.40 22.30 63.50 20.10 19.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.13 0.72 -4.34 11 107 2009-01-15 18:05:59 2007-05-08 12:46:36 5 1 106 0 12 42 1 42.00 78 97.19 CHANGED MSTLESVIWHlLGYuAMPVIILuGFluVAsVSIhLLuhsKDK MSTLESlhWHVLGYSAMPVIILuGFlGVAVVSIhLLuhTKDK...... 0 1 4 8 +9407 PF09575 Spore_SspJ Spore_SspJ; Small spore protein J (Spore_SspJ) TIGRFAMs, Coggill P anon TIGRFAMs Family Spore_SspJ represents a group of small acid-soluble proteins (SASP) from Bacillus sp., which are present in spores but not in growing cells. The sspJ gene is transcribed in the forespore compartment by RNA polymerase with the forespore-specific sigmaG. Loss of SspJ causes a slight decrease in the rate of spore outgrowth in an otherwise wild-type background. 25.00 25.00 88.60 88.30 19.30 19.30 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.20 0.72 -3.76 3 23 2009-01-15 18:05:59 2007-05-08 12:52:21 5 1 23 0 5 12 0 45.90 79 96.88 CHANGED MS.FFNKDKGtNSEKD+NsVcGALEDAGpALKGDPLQEAVQKKKNNR Mu.FFNKDKGKpS-K-KNVIQGALEDAGuALKDDPLQEAVQKKKNNR.. 0 1 2 3 +9409 PF09577 Spore_YpjB Sporulation protein YpjB (SpoYpjB) TIGRFAMs, Coggill P anon TIGRFAMs Family These proteins are found in the endospore-forming bacteria which include Bacillus species. In Bacillus subtilis, ypjB was found to be part of the sigma-E regulon. Sigma-E is a sporulation sigma factor that regulates expression in the mother cell compartment. Null mutants of ypjB show a sporulation defect, but this gene is not, however, a part of the endospore formation minimal gene set. 20.40 20.40 20.40 22.30 20.10 19.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.42 0.70 -4.79 10 157 2009-01-15 18:05:59 2007-05-08 13:35:37 5 2 156 0 27 110 0 227.20 45 87.38 CHANGED uhcELscLSDslLQLsKpp+YEEAlQVLpYFpcpFhus-hcppp..lTssplRplTluY-cAl+uLpppchscpEKl+ssspFRLllDAlsScpcPLWsphEcPlMcuFushKcAspppDspsFpcphNpFloLYslIYPSLpIDlssspLppVssHlshlEphc..phopsopp-+LsllcpDLpslFDpVccD-ADPSLLWVIloTGGIIlhTLTYVGaRKYKuEKcKcKsR ............................................................pWpELssLhD-uLQLVKcsc.EcAlQVLpaFS-QFhhpssc..ccp..lTs-plRlISLuY-cAppSLsppslscp.Klcslh.tLpLAVDA.sSKaQPLWhEhEtplM-AFSphEKAhpKc...Ds...spFppoLNsFLpcaslIYPSLhIslPEs-lQRVsuHlSY....L-.ch.c.s.sh.Lc.s.cus....phQ.L.ull.+uDLQ+lFcsVK.......KD.......Eh....s......P......SL.........I......Wh.MshTGGlIlhTLTYVGWRKYKGE+EK+Ksp........................... 1 6 17 19 +9410 PF09578 Spore_YabQ Spore cortex protein YabQ (Spore_YabQ) TIGRFAMs, Coggill P anon TIGRFAMs Family This protein is predicted to span the membrane several times. It is only found in genomes of species that perform sporulation, such as Bacillus subtilis, Clostridium tetani, and other members of the Firmicutes (low-GC Gram-positive bacteria). Mutation of this sigmaE-dependent gene blocks development of the spore cortex. The length of the C-terminal region, which includes some hydrophobic regions, is variable. 21.00 21.00 22.50 22.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.52 0.72 -4.03 38 379 2009-01-15 18:05:59 2007-05-08 13:55:22 5 1 375 0 85 295 6 79.20 32 46.51 CHANGED hlsslhsGhhlGhhaDhY+..hh+phh+hp.+hhshltDllFWl...ltulllFhhLhhsN.GplRhYlaLullhGhsl...Yhtllup ..............hlhhlhhGhhlGhsaDhYp..hhhpth.....cpp...+hh..shlpDlLFWl...ltulhlFhhLhhsNpuclRhYlaLullhGhhh...Ytphlp............. 0 45 70 75 +9411 PF09579 Spore_YtfJ Sporulation protein YtfJ (Spore_YtfJ) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this family are encoded by bacterial genomes if, and only if, the species is capable of endospore formation. YtfJ was confirmed in spores of B. subtilis; it appears to be expressed in the forespore under control of SigF. 25.00 25.00 25.10 25.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.81 0.72 -4.13 71 712 2009-01-15 18:05:59 2007-05-08 14:01:05 5 1 496 0 166 461 6 87.30 40 62.67 CHANGED lcssD.slIlPlu+VuFGFuuGGu-hpspp.................tppt..........hGGGuGuGlulpPlAhLVlp.s.....spl+llsl.spps..hl-+ll....-hl.Ppll-+ ..........lpssDGslllslS+V..uF..GFuAGGS-apssp....................t.ttps.............FGGGSGuGVSIsPlAFLVls.s.....ssV+lLpl..spss....hh-Kll.....-hsPphl-K......................... 0 73 135 147 +9412 PF09580 Spore_YhcN_YlaJ Sporulation lipoprotein YhcN/YlaJ (Spore_YhcN_YlaJ) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry contains YhcN and YlaJ, which are predicted lipoproteins that have been detected as spore proteins but not vegetative proteins in Bacillus subtilis. Both appear to be expressed under control of the RNA polymerase sigma-G factor. The YlaJ-like members of this family have a low-complexity, strongly acidic, 40-residue C-terminal domain. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.02 0.71 -4.16 52 715 2009-09-10 23:13:51 2007-05-08 14:11:15 5 3 229 0 145 527 0 161.00 19 84.30 CHANGED hhhhhluGCsssppsp.tt...............................................................................................................................................sstsst.thsp..hphpp.tsppt............htstpcpclAcclschss.plspVccAsslVs...........................................sppAlVulchp.t....t.......spsc...pl+ppVpcslcshssphppVhVouDs-hhpRlcshuppl.psGpsh..pshhp-lsphl..p+lhs ..........................................................................................................................h......................................................................................................................................................................................................................................................t..t....tp......hphpphttpp.........................pstpsspchAcplsshst.plssVpcusslVs...........................................sp....pshVulcsc.tp.....p.............................................stsc..pl+ppVtcplps.sP...p.hslhVosD.chhpclcplsppl.pp.s..p.sh...st.h.tpplstlhtch................................ 0 50 104 116 +9413 PF09581 Spore_III_AF Stage III sporulation protein AF (Spore_III_AF) TIGRFAMs, Coggill P anon TIGRFAMs Family This family represents the stage III sporulation protein AF (Spore_III_AF) of the bacterial endospore formation program, which exists in some but not all members of the Firmicutes (formerly called low-GC Gram-positives). The C-terminal region of these proteins is poorly conserved. 21.50 21.50 21.70 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.05 0.71 -4.43 40 411 2009-01-15 18:05:59 2007-05-08 14:19:16 5 1 379 0 93 314 6 174.90 20 89.74 CHANGED llhllhsshlEhLLPsush+KYl+hVlGLlLllllLsPllpLhp.p-hshththhpp.......pp...ppphppptcchpspppshll-p....hpspLcpplccplc...pphshphtclplphsps.pptt.......pIpplsltlpp...tp.t........................................................pVc.lpIst..................tp.stppttpspptpclcphlup....hapl.sp-pIpVth ..................................................hhllhsshlchlLPssshpKYl+hlluLlLllllLsPlhpl...hp.pc.h...s..h....h.t.....hsp.......................tttt.p..pp...l.p.pp.pcch.pttp..c.shslcp.....htpphccplppthp........pp.hshph.clpl.hs....psttp.p............plpp.l.ltlppt.t.tp..tt..t.........................................................plc.lplst.................................................t.t..tt.pt.t.c....hpphhup......hhpl.t.cpIpl................................................................................. 0 47 76 82 +9414 PF09582 AnfO_nitrog Iron only nitrogenase protein AnfO (AnfO_nitrog) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry include Anf1 from Rhodobacter capsulatus (Rhodopseudomonas capsulata) and AnfO from Azotobacter vinelandii. They are found exclusively in species which contain the iron-only nitrogenase, and are encoded immediately downstream of the structural genes for the nitrogenase enzyme in these species. 23.90 23.90 23.90 57.20 23.30 23.50 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.96 0.71 -5.18 14 61 2009-01-15 18:05:59 2007-05-08 14:24:52 5 2 51 0 39 60 0 200.20 27 92.49 CHANGED hcIAVhlsppGphsohh-sGhltVYpcssspWplt+ch.hs.lssspulstl+ttlsshlspLscC+lhlupsspGlsYuhL.-chuhplWchpGpsh-hLDtVhccEp-pt.cptt...............................tssshshshPlchG.s...GcaplsLpcl.ppsscloSKQlLlPFLcsssFpcL-llCsHlPpWhsp-lsthsLch-s..pphs+pslp ...cIAValscpGpssoh.csGh.ltlap+.psspWphhcch.hp.l.ssstulsplRpphppllptLscC+lhlupplpGlsYshL.-chGhslWch.pGps.-..hLDtlhccEpcptpptt.................................ssshsh.tPhchs..s...GpaplsLpcl.t.psssloSKplLlPFLcpssFppLEllCsHlP.Wh-pcltthsLphch..pt.......h............ 0 17 30 34 +9415 PF09583 Phageshock_PspG Phage shock protein G (Phageshock_PspG) TIGRFAMs, Coggill P anon TIGRFAMs Family This protein was previously designated as YjbO in Escherichia coli. It is found only in genomes that have the phage shock operon (psp), but it is only rarely encoded near other psp genes. The psp regulon is upregulated in response to a number of stress conditions, including ethanol, expression of the filamentous phage secretin protein IV and other secretins and heat shock. 25.00 25.00 55.60 55.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.28 0.72 -3.91 13 608 2009-01-15 18:05:59 2007-05-08 14:26:17 5 1 604 0 44 195 2 64.50 71 79.78 CHANGED MlELlFllsFhlsLllTGlSllGllAAlsVAhslMhluGMhulVIKLLPWLlLhllslWlhRshp .MLELLFVlGFFlMLMVTGVSLLGIlAALVVAT.AlMFLGGMLALMIKLLPWLLLAlAVVWVIKAl.t.. 0 1 7 25 +9416 PF09584 Phageshock_PspD Phage shock protein PspD (Phageshock_PspD) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are phage shock protein PspD, found in a minority of bacteria that carry the defining genes of the phage shock regulon (pspA, pspB, pspC, and pspF). It is found in Escherichia coli, Yersinia pestis, and closely related species, where it is part of the phage shock operon. It is known to be expressed but its function is unknown. 25.00 25.00 27.70 29.20 19.50 19.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.16 0.72 -4.27 7 520 2009-01-15 18:05:59 2007-05-08 14:33:02 5 1 516 0 28 113 1 63.30 79 86.29 CHANGED t+s+sG..LKhhuKlhlhsALpYGPAGsAGWhVKoVuRKPL+hLLAhsLEPlL++hhs+lutpahp ................QKVKPG..FKlAGKLVLLTALRYGPAGV...AGWAlKSVARRPLKMLLAVALEPLLSRAANKLAQRY..Kt. 0 1 5 17 +9417 PF09585 Lin0512_fam Conserved hypothetical protein (Lin0512_fam) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of few members, broadly distributed. It occurs so far in several Firmicutes (twice in Oceanobacillus), one Cyanobacterium, one alpha Proteobacterium, and (with a long prefix) in plants. The function is unknown. The alignment includes a well conserved motif GxGxDxHG near the N-terminus. 23.40 23.40 23.50 24.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.43 0.71 -4.51 22 222 2009-01-15 18:05:59 2007-05-08 14:40:49 5 3 169 0 69 202 119 110.20 39 85.68 CHANGED pphhlEhGhGsDlHGQDhTKAAtRAV+DAlp+sSlsuhhc.hhsh.shspMpVpVclGVscP..-pVDt-tltshlPaGp.ssVpllpGGL......sltchss.s-shlIAsAAVpVu .......hlhlEhGhGsDlHGQ..DhTKAAtRAV+DAIp+NSlsultp..h.....lsh..s...hp.sMhVplpluVs.......ps..-plDp-tl+ullPYGp..solcshpGGh.s..uhtltchs-ts-.hhIssAuVpV......................... 0 25 51 63 +9418 PF09586 YfhO Bacterial membrane protein YfhO Coggill P anon Pfam-B_2727 (release 21.0) Family This protein is a conserved membrane protein [1]. The yfhO gene is transcribed in Difco sporulation medium and the transcription is affected by the YvrGHb two-component system. Some members of this family have been annotated as glycosyl transferases of the PMT family. 28.50 28.50 28.50 28.50 28.30 28.30 hmmbuild -o /dev/null HMM SEED 843 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.37 0.70 -6.52 56 1820 2012-10-03 03:08:05 2007-05-08 15:29:27 5 4 1236 0 257 1471 211 637.40 21 83.13 CHANGED u.hllPhllhhhhhh.....hthhshuspslhssDhhpQYls....Fhthhpptlh......sssshFYoastGLGushhuhhuYYl.hSPhshlhhhFs..............hpphspuhhllsllKhulhGLohhahhpphh........tph....hhslh.....huhsYuLsuasls.phslhW...hDshIlLPLllhGl-+llcpp+hhhahlslulhl.lsNaYhuYMhslFhhlYhlhph...h.......pshpphh.....pthhpahhsSlLushhSulhllPshhslhs.spps.sp...sh.hthphshhchhs+hhhGsashpph.......s.laluhlshlhhlhaFht+phph+h+lhhsllhhhlllShhhphlshhWpGhptPsha.aRaualhuFhhlhLuupslp....phpph.........hhphhhshhllhtlhh.hshhhp.ppt...phhp.....h...................................................slhhlllhhlllh.hhh..........tph.hhhhhhllhlhhhh-hshNhhhslsphs....hsspp.asshhpshpphhphhp....ppsss.haRl-phhsps......t..........N..-shhhsYpGlStaSSlhspsh.shhs.....slG...tssssphphtssohlhDuLhulKYhls............................................pspphplYcNp.sLPluasssshh......pchchpp.....tssls...pQsthLpulsspst.............................................phFps.........................p.sphphpssptts..shht.st......................spstssslphshs.sssssshYlphs.........................................s.sppshslpV.Nspshppp...............pstlhsluhpscsp.plplsl......hsps....phplsphplhshsh.pshppshpplpp.pthphtphp...ssplpuslss..pcsshLhhoIPYs+GWpsplsGKplphp+..sp............ssFhu..lslspGppplplpYhPshhhhG..lllS .......................................................................................hlshhlh.hhhh.......t.h..h.....u...t........h......s.Dhhp....Q.hh.........a...h.hphh...........t..p...h.......as...h.s..ul.Gh.sh.ht........uYYh..hu.h....l.h....hh...............hp.hs.shhhh...hhl+hhhhhhshh.h.hphhh...........................th............h.hhhuh.Yuh.shhh...............a......hss.hhhhPLhlh.ulch.h.h............p....pp....+......h...h...hh....h.s.....h...hhh...h.p.....N......a......Y.....h.....ua..hhs.....lhhhhahlhth....................................ph..hp..hh......pthh..hhhhshlu....sh.uhhhhhPs...h..sh.p.ptp.t.......................................................................h.........h..........p..h..p..........h.....h.............h.......................s.........................................h..ah...hh........h.s....h..lh....h.hh.....h..h..........p......hp..h..h..h.......hhh.....hh....h...hhh.h.hh...s.......h........h.s.......ha.......p...uh..p.P.t...................hR...as...alh.sh.h.h.hsh.hhp..........phpph.....................hthhh...hhh..hhhhh.......hhh...h.h.....t............h.........h.h.h...............................................................sh...h.h.h.h.hhhh.hh.h.h......................................................h.h.h....hh..h..hhh.h.h..........h.......h.....s.................hpth..................p.t.h..tp.....h....t..........t.......t.h..............................ttt..h.Rhp...h.....t...........................N....sshh.ht....atuho....asSh.hstt.....p.hhp.............ths.........tp.s...ht.......s..sp.....h....slhslph.h.........................................................................................p.t......ph.hhpNp..t.hshu.h.hspp...h...........p.p...p.ht............sht.ppt.t.hhpths..p....................................................................................hhp.......................................p.t..ht..t..t..............................ttt.hthp.h.......hp.....t.....tph..ah.h..............................................................................tpthpl...l...st...p...............................hhsl.s........t...tp.phplph.............tt....thpht...thh....t...s....pt.hp......pthpph.pt............h...p.......ph........ps.plthshss..ppst.....hlhh.o.l..P.Y..sp.G...Wpsh...s.G.c..p..lp.hpp..sp............tshhu..lpls.t.Gp.p.plphpahP.hhhhGhhho..................................................................................................................................................................... 0 106 190 230 +9419 PF09587 PGA_cap Bacterial capsule synthesis protein PGA_cap Coggill P anon Pfam-B_1441 (release 21.0) Domain This protein is a putative poly-gamma-glutamate capsule biosynthesis protein found in bacteria. Poly-gamma-glutamate is a natural polymer that may be involved in virulence and may help bacteria survive in high salt concentrations. It is a surface-associated protein [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.63 0.70 -5.02 181 2048 2012-10-02 19:15:56 2007-05-08 15:48:07 5 29 1542 0 483 2755 332 235.10 26 60.66 CHANGED slshsGDlh.hs........psh.p.h..t..................................shs.ha.ttlt.s...hlppu...D.lslsNLEssl.........................................ssssp...shss..h.....htapsssph.sssLppsGh..c...slsl.ANNHshD....aGtpGltcTlp...tLc.psG...lthsG...suts.tpupp...hlhph........pGh+luhluas............................................................................................................ppltpc.......lpps+...p............sDlllVshHW..G....tE........hptt...........P.sspQpp............hA+thlD.u.....G.ADlllGpHPHVlpshE.ha..........csp.......................hIhYSLGNF.lhst .............................................................................................................lhhsGDhh.ht........t.t.h.t.h..t..........................................shp..a..ttlt......hl..ppu....D..l.s.hsNhEssl.............................................sspst....hsth..........hapsss.ph..hp..sl.......p.s.s........G...a.c....slsl.ANNHshD....hGh...p...Glh.s.T.lp............tl.c....p....ts...............lth..h.G.............shp....s........t...t....c..s........p.p..s..........hl.h.ph............pGh+luhluashsh.ssht.sh....s....t............................................................................................................................................................spltpp...lccs+....p..........puDl.llVhhHh.....G.......sE.........................a..p.p...........................P...sppQpp.......................................................hu+th...l.-..t........G...A...Dl.lhGpHPH.V..l.p.s.hE.hh................................ptp.................................hI..h..YShGNFl.s.................................................... 0 197 343 427 +9420 PF09588 YqaJ YqaJ-like viral recombinase domain Coggill P anon Pfam-B_3587 (release 21.0) Domain This protein family is found in many different bacterial species but is of viral origin. The protein forms an oligomer and functions as a processive alkaline exonuclease that digests linear double-stranded DNA in a Mg(2+)-dependent reaction, It has a preference for 5'-phosphorylated DNA ends. It thus forms part of the two-component SynExo viral recombinase functional unit [1]. 21.10 21.10 21.10 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.06 0.71 -4.24 97 1553 2012-10-11 20:44:46 2007-05-08 15:50:07 5 18 969 13 375 1400 442 140.60 24 46.91 CHANGED -WhphRptt........l.suS-sus.................................................hhG.hs..h.....ps.shplhhccssp..............................ptspsht....hGpphEshstphappppG..........hplpt.......thh........p.shhtAShDGhs..................................................................................................................................thlEhKsst....t..hht....................ttl....PptYhsQ.....lQtthhV........o.utc ...............................................................................WhthRhth..............l.suS-sts.....................................................llu....hs...............ts....hhphhh.ph.hsp.............s..........................................................................psp.sht....h.GpphE..........s..ut..........p....ha..ph.p.pu.........................hplpc..............p.shhh+...........spp.h...h.u....u.osDGls.........................................................................................................................................................sshLElK..ssh...............s.p..hhph..pht....................................tl.................ttYhsQlQhphhlos..t............................................................................................................................................................ 0 189 280 336 +9421 PF09589 HrpA_pilin HrpA pilus formation protein Coggill P anon Pfam-B_3574 (release 21.0) Family HrpA is an essential component of the type III secretion system (TTSS) which pathogens use to inject virulence factors directly into their host cells, and to cause disease. The TTSS has an Hrp pilus appendage for channelling effector proteins through the plant cell wall and this pilus elongates by the addition of HrpA pilin subunits at the distal end [1]. 25.00 25.00 26.00 25.90 24.90 24.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.82 0.71 -3.66 6 71 2009-01-15 18:05:59 2007-05-08 16:00:00 5 1 55 0 4 39 0 119.80 48 98.42 CHANGED M........................s.hppLsshGptslNslGGA....................................hQGlNslsSusshpsNll...................usT..GsopSscAppcuhucu.......................................DAsuA+L..........................................................uhQucEstK+pp.ssLsAhpAu+EDuoNKKISuTtpNApGIsY ....................h.....slhSSLosAGpulVNslGGA....................................hQGlNoVcSuADRphuLh...................psT..GSoDSlDAspsuluKG.......................................DAcuAcL.........................................................puhusEEsuhhREpSMLAGFEstKEsLoNQIVAuKIcNuV.VQF. 0 0 1 3 +9422 PF09590 Env-gp36 Lentivirus surface glycoprotein Coggill P, Bateman A anon Pfam-B_3269 (release 21.0) Family This protein is found in feline immunodeficiency retrovirus. It represents the surface glycoprotein which is found in the polyprotein C-terminal to the Env protein. 19.30 19.30 19.90 21.70 18.70 16.70 hmmbuild -o /dev/null HMM SEED 591 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -13.04 0.70 -6.31 5 1139 2009-01-15 18:05:59 2007-05-08 16:06:50 5 2 10 0 0 929 0 233.30 65 86.41 CHANGED FuQNRQWIGPEEAEELLDFDIATQloEEGPLNPGVNPFRVPGIT-pEKQ-YCsILQPKLQELR-EIpEVKL-E-NAGKFRRlRYLRYSDEpVLSllYLLl.GYl+YLhsRNKLGSLRHDIDIEss.scEpFsK+EKGsTlNpKYCRlCCIGsssLYLlLFIGIGlasGossAQVVWRLPPLVVPVE-oEIIFWDCWA..PEEPACQDFLGAMlHLKAshNISIQEGPTLGpWAREIWATLFKKATRQCRRG+lWRRWNETITGPhGCANNTCYNlSVlVPDYQCYlDRVDTWLQGKlNISLCLTGGKMLYNK...-TKQLSYCTDPLQIPLINYTFGPNQTCMWNTSLIcDPEIPKCGWWNQsAYY....NSCKWEQsDVKFQC........QRTQSQPGoWlRsISSW...KQRNRWEWRPDFES.EKVKISLQCNSTKNLTFAMRSSuDYu-VsGAWIEFGCaRNKS+pHopARFRIRCRWNsGoNTSLIDTCGscQNVoGANPVDCTMpuNTMYNCSLQcGFTMKIEDLIVHFNMTKAVEMY............NIAGNWSCsSDLPouWGYMsCNCTsosSo....hsKM+CPsc-GILRNWYNPVAGLRQALcKYQVVKQP-Y ..............................................................................................................................................................................................................................................................................................................................................................................................................AYY......NuC+WEps.sVpFQC.........QRTQSQPGoWhRsISSW...+QRNRWEWRPDFES.EKVKlSLQCNSTKNLTFAMRSSuDYG-VsGAWIEFGCHR..NKS+hHo..-ARFRIRC+WN.GsNsSLIDTCGp..s....NVoGANPVDCTMtAspMYNCSLQcGFTMKlEDLIhHFNMTKAVEMY............NIAGNWSCpSDlPtsWGYMsCNC..T..................KMtCPpppGIlhshYssshh.h.tLt+Y..lVKQP-Y................................................ 0 0 0 0 +9423 PF09591 DUF2463 UPF0328; Protein of unknown function (DUF2463) Coggill P anon Pfam-B_3132 (release 21.0) Family This protein is found in eukaryotic, parasitic microsporidia. Its function is unknown. 22.00 22.00 55.00 54.50 21.90 21.90 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.48 0.70 -4.74 23 38 2009-01-15 18:05:59 2007-05-08 16:07:07 5 2 2 0 34 38 0 199.70 39 75.93 CHANGED FPhhhYhIhsKDsF-ps.hLRFIslLhPaSYSAlQahhLLaoN...W+uspKPEshLaphLYahLNlLLlsFulISILSIIshsl.....scWpss-s.hhaSllLPSFhlssTYLLSTSCsLVPGpIuFTDTGIslLIDlLILLs.llsllh..hhccsc....Yh.a.ulhSslLlLl....RhL+E+ahP..SccS..........u.PousWRVulhllILlLushlYshhshssl ....aPhhMYhIhsc-pFccs..FLRFIslLhPhSYSAlpahhLlaoN...W+uppKscshLpshLYahLslLLluFulISILSIlsFsl.....s.cWcs..scs.hhaSl.lLPShhlsssYLLSTSCsLlPGpIuFTDTGIslLIDlLILlsslssllh..hhccsc....ah.ahullS.lLlLl....RlL+E+hsP..sccS..........s.PossWRlulFllILlLuhhlYshhshhsh.... 1 34 34 34 +9424 PF09592 DUF2031 Protein of unknown function (DUF2031) Coggill P anon Pfam-B_2630 (release 21.0) Family This protein is expressed in Plasmodium; its function is unknown. It may be the product of gene family pyst-b [1]. 20.70 20.70 21.40 21.60 20.30 20.60 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.66 0.70 -5.20 44 121 2009-01-15 18:05:59 2007-05-08 16:18:06 5 3 5 0 117 121 0 178.40 43 83.33 CHANGED RlsILK.YVhFS.......IlICSF...EYuKN...ELYhlN......-RsIhLERNlINFRNNRILADsDNpFDLN-FYQSTLSLANQhN-hs-....Ds........cEItaLRNhIDSHIKKHKEsNTL.DLpNVD.pKTKKLIscL+KELEEl......KKElDN..KpNuELuIQPIpDKpIhKKDENsSVSEcEsF+QLENp.s.lt............E.NcIs.SSsphhc.Khpp+h+..Kttpphlhps..lhhlshshslh.lsG....hh.lhhllls .....................................thhp.hlhas.......lhlh.F...casp.s.....................ELa.hs......tttlhhERNlIsFRNNRILussDNpFDLNpFYpSTLSLssQhs-hs-........ss.......cEIhhlRNhIsS+IKcHKcssT..sLpNlD.pKTKKlIpcl+cELEEl......KKElDN..htN.sclsIpsIpsKhIhKKscNssVSEp.EsapQLcNp...........................................c..pht..s....p.p..pchp..+...thhhp...hhhh.hh..h.h..h.s.h.h.hhhh........................................................................................ 0 0 69 117 +9425 PF09593 Pathogen_betaC1 Phage_C1; Beta-satellite pathogenicity beta C1 protein Coggill P anon Pfam-B_991 (release 21.0) Domain Cotton leaf-curl disease - CLCuD - is of major economic importance in cotton-growing areas of the far-east. The infectious agent appears to be a single-stranded DNA molecule of approx 1350 nucleotides in length, which, when inoculated with the Begomovirus into cotton, induces symptoms typical of CLCuD. This molecule requires the Begomovirus for replication and encapsidation [1]. DNA beta encodes a single protein, betaC1. The intracellular distribution of betaC1 is consistent with the hypothesis that it has a role in transporting the DNA A of Begomovirus from the nuclear site of replication to the plasmodesmatal exit sites of the infected cell. The DNA beta-encoded protein, betaC1, is the determinant of both pathogenicity and suppression of gene silencing [3]. 25.00 25.00 46.80 43.60 19.70 19.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.53 0.71 -4.38 36 475 2009-01-15 18:05:59 2007-05-08 16:19:52 5 1 198 0 0 438 0 116.10 44 96.96 CHANGED TIpYsNpKGlcFlIDVRLpppppIhVpIplhST+SPsLsKpcahIPYsHsGIIsP....FDFNulEEuI+shLclMY+-Sslp-F+pEDMl-sIDIlMMccAsVl-Icls-cYcVpspssV .............TIpasNp+GlcFh.lDV+lp.-sppIhVclclhST+..SPuLsKpcFhIPYsapsIhsP....FDFNul..E-uIpshlchhYpcupIpEh+pEDhVchlDIlMhcpssllsh-Vh-.YslspphsV................................. 0 0 0 0 +9426 PF09594 DUF2029 Protein of unknown function (DUF2029) Coggill P anon Pfam-B_1780 (release 21.0) Family This is a putative transmembrane protein from bacteria. It is likely to be conserved between Mycobacterium species [1]. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -12.03 0.70 -4.87 135 2105 2012-10-03 03:08:05 2007-05-08 16:20:35 5 21 708 0 676 2166 215 239.00 18 52.91 CHANGED ah.YPPhssllhsPh.u...........hLs.....hssuhhlahhhs...lsshhhs.lthhhphh........................shhhhshh.hhlshtsl.hsshshGQhshllhshlhhshhhhh...tt.......t.h....................h.uGlhlGLusslKlh......Phlhsl..hhLh....p+c........a+uhhsu.sssssshssl..uhhhhs............sshthahp.hlhts.ptl...........htt.ssto...hsuhlut...hsh..............hhhhh...............shhs..sshshhhhhthh.........ps.sphtths...ssuh.h.......lLsSP ...................................................................................................YsPhssl.h...h.h.sh.s...............................h.ls................hsssh..h..l.h...hhhs............hhsl...h...hs...lhhh.h.ph.hsht........................................hhhhs.h.h.h.h.hh.s..h...pPl...htsh.sh....u..p..h..s.h...lhhslshhshhhht.........pp.......................p.h..............................h.uG.l.h.l.G.Lu....su..hKlh................Pslhhl......hhlh.......................s+c..........................................h..+s...hhtu....sssss.shssl......shhlhs........................................................p.s....h.p..h..ahp....hhhtt..tth........................hph.tspu........h.t.s..hhs.p.....hsh..................................................shhhh.hh.......................................shhs........hshsshhhhthh...............pp...tphht.hs.........hsuh.h.....hlh................................................................................................................................................................................ 0 204 477 607 +9427 PF09595 Metaviral_G Metaviral_G glycoprotein Coggill P anon Pfam-B_922 (release 21.0) Domain This is a viral attachment glycoprotein from region G of metaviruses. It is high in serine and threonine suggesting it is highly glycosylated [1]. 25.00 25.00 56.90 56.80 20.10 19.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.38 0.71 -4.65 6 400 2009-01-15 18:05:59 2007-05-08 16:31:51 5 1 2 0 0 422 0 170.10 51 88.21 CHANGED LIGlTsLShALNIaLIIsYshpKNhocoEHpss.PPsEsSKcTshsssssPsTsPNsQpsTQ.oTEsST..sAostup.cTtsTsTPDsTsp.posD+HTT.PpuopspTopsspcKpsoRsso......+ppos.pToTtAspsssTsppoSsG+csTTTSspPcosuTTQspEpTopsss.oSsSpt ..LIGlTsLShALNIaLIIsYsh..+shocsEHpss.PPsEssKETsohshssssTpPNsQpsTQ.oTEssT..sAsStupscTEsTsTPDsTsp.posDcpTT.PppopppTspTspcKpsspsso.......+ppos.psoTtAsppssThppoSsuccsoTTSspscsusTTQsp-pTspsss.uS.st......................... 0 0 0 0 +9428 PF09596 MamL-1 MamL-1 domain Coggill P anon pdb_2f8x Domain The MamL-1 domain is a polypeptide of up to 70 residues, numbers 15-67 of which adopt an elongated kinked helix that wraps around ANK and CSL forming one of the complexes in the build-up of the Notch transcriptional complex for recruiting general transcription factors. 21.00 21.00 22.40 35.70 19.40 17.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.98 0.72 -4.17 6 82 2009-01-15 18:05:59 2007-05-08 16:39:24 5 2 56 4 56 113 0 60.40 48 8.06 CHANGED PphHSsVVERLRpRIEsCRRHHsoCEuRYppupsEphEhERccTlsLhpRslcu+uKRuAp ....P.hHSsVVERLRRRIEsCRRHHssCEsRYEpspsEphEhERpcThsLpQRsLcuKAKRuu... 0 14 19 36 +9429 PF09597 IGR IGR protein motif Wood V, Coggill P anon Manual Domain This domain is found in fungal proteins and contains a conserved IGR motif. Its function is unknown. 22.40 22.40 22.50 22.90 22.30 22.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.56 0.72 -4.06 18 185 2009-01-15 18:05:59 2007-05-08 16:50:16 5 7 159 0 138 171 1 56.80 39 30.06 CHANGED lpsFLptIGRsssc+s-.pFEs.W....-cLFp..hsoppLK-hGl.ss+pRRYILphtc+aRpG ....................psFLshIGRsh..pp+ss.Kh..ps..W....cpLas..hsStpLK-h.GI..Ps+pR+Yl...Lcatc+aRpG......... 0 40 81 117 +9430 PF09598 Stm1_N Stm1 Mistry J, Wood V anon Pfam-B_39435 (release 21.0) Family This region is found at the N terminal of the Stm1 protein. Stm1 is a G4 quadraplex and purine motif triplex nucleic acid-binding protein. It has been implicated in many biological processes including apoptosis and telomere biosynthesis. Stm1 is known to interact with CDC13 [1], and is known to associate with ribosomes and nuclear telomere cap complexes [2]. 21.80 21.80 22.50 22.50 19.50 19.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.86 0.72 -3.28 24 195 2009-01-15 18:05:59 2007-05-09 10:35:27 5 3 137 2 126 189 0 68.20 35 20.52 CHANGED NhaDLLGNDs............E.DsstsstsPs.+tlsKpsspstKt-............sP...PsussPs.........pup+stspsoG.NEuAhR ........N.FDLLGsDs............E.Dssphs....ssss.....+tl....sK....s.s......spstKts.............ss..p.P..sps.sPsttt...............+ss+s.ts.pssG.scsuh+............................................... 0 22 67 105 +9431 PF09599 IpaC_SipC Salmonella-Shigella invasin protein C (IpaC_SipC) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of proteins associated with bacterial type III secretion systems, which are injection machines for virulence factors into host cell cytoplasm. Characterized members of this protein family are known to be secreted and are described as invasins, including IpaC from Shigella flexneri and SipC from Salmonella typhimurium. Members may be referred to as invasins, pathogenicity island effectors, and cell invasion proteins. 25.00 25.00 26.90 25.80 21.40 20.10 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.78 0.70 -5.42 5 167 2009-01-15 18:05:59 2007-05-09 11:34:12 5 2 152 0 6 107 0 338.40 53 86.65 CHANGED +ssspuLhps-uAhushs.....................AtspslLsohLsDctpsssLspslphhsuspls+lssh....VQ+cLcpcuAclssGpshDISu................hSocAsuLlhu.lssLMusLspADs+LuoKLSLlSFDATKoAAuShlREGhAsLSSSIsGuAsQluITGVGAKpphsGLssc+GALK+NLsstscLssEu+slpLsLNpQNsssLuA...DtlppltlK+ssu-usKplsc.p...................lssuNcplSsEHcusLupcstulpc+I-hcppsa-psplKAQppQppGDtlMcsSssAGNIuuuSGpYAustEcuEQpISQuuu+sApoASs-o+EuS++ocplIQElL+sl-SIsQS+o ...................................................................................+.suchuLu.sLuA.APsV....................LspsssslToaLp.sshQsochsQ-lNtLAsslTsKss-s....VQTpL+EppAEV......GchhDISu................MSSsAVALLuA.AssLMhoLNQA....DoKLSGKLSLVSFDAAKosAuSMhREGhsuLSGSISQSALQLGITGVGAKLcaKGLpsERGALK+NuAKlcKLoTEu+uIKssLNGQNol+LGA..sDuLKolshK+T...GsDuTKsLs-soh................htIKcSNKplSsE...HQAlLSKRlE.SlE.ScI.cL.EQsTMDhT+l-ARKMQMTGDLIM+sSsolGsIAGASGQYAAsQERSEQQISQsNNRVASTASDEARESSRKSTSLIQEMLKoMESINQSKA................ 0 2 2 4 +9432 PF09600 Cyd_oper_YbgE Cyd operon protein YbgE (Cyd_oper_YbgE) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a small protein of unknown function, about 100 amino acids in length, essentially always found in an operon with CydAB, subunits of the cytochrome d terminal oxidase. It appears to be an integral membrane protein. It is found so far only in the Proteobacteria. 22.00 22.00 27.30 26.90 20.80 20.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.80 0.72 -3.82 21 679 2009-01-15 18:05:59 2007-05-09 11:41:23 5 1 674 0 64 249 4 80.50 64 85.54 CHANGED ..LR........uLSllLAlhlsuhlhWsPstaAsphuuhsshhu.hllWAlCuuhlaGVGFcP+phlWphlF.uPhhuhsILhhslh....aah .........................PLRALSFVMAllLAGCMFWDPSRFAA+TSpLEIWHGLLLMWAVCAGlIHGVGFRPppVlWQGI.F.CPLLADIVLIlGLIFFF.h.................... 0 3 22 44 +9433 PF09601 DUF2459 Chp_urease_rgn; Protein of unknown function (DUF2459) TIGRFAMs, Coggill P anon TIGRFAMs Family This conserved hypothetical protein of unknown function is found in several Proteobacteria. Its function is unknown and its genome context is not well-conserved. It is found amid urease genes in at least one species. 21.80 21.80 23.40 24.10 20.60 19.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.06 0.71 -4.57 33 265 2009-01-15 18:05:59 2007-05-09 11:44:24 5 1 262 0 60 200 49 167.20 41 75.35 CHANGED IhllssslHTDlllPhss....hhthhthhsssslshstsss.p.alshGWGs+sFYhsossas-lsstsslcAl.ssss..oVl+Vsshsshs......ssssltplslupspappLhshlpsoFttstsuts..l.ssuasss-tFatApGcashhpTCNsWoussL+sAGlchuhWs.Phshul ............................IYllSpGhHT.sIlhPscss.............shshs..plcpc.as.pt...p.ahplGWGD+uFYts....p-losuhsLpAhFhsou..uVhHlssaushPc..........suu-l+sL...hLpssQhssLhcalucSFsRDtpGs.lIsl..c...........tG.hhucspFYsAsGRYulLsTCNpWTAcuLcuAGLshss.h.Lstu.s......... 0 19 41 49 +9434 PF09602 PhaP_Bmeg Polyhydroxyalkanoic acid inclusion protein (PhaP_Bmeg) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a protein found in polyhydroxyalkanoic acid (PHA) gene regions and incorporated into PHA inclusions in Bacillus cereus and Bacillus megaterium. The role of the protein may include amino acid storage. 22.20 22.20 22.60 22.50 22.00 20.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.21 0.71 -4.42 2 94 2009-01-15 18:05:59 2007-05-09 13:53:14 5 3 92 0 6 35 1 162.60 80 76.60 CHANGED MpTh.Y-.llDAhWcpWopuLp.hususKQlEQhTLcsLcQQQ-h...lTpuV-pLptp.pQapAphps..pchVcpL...sGNulsDphpEWpp+h+Eh.s+hQpLhhs.oKoShSllpQsptQacpsspQhlEpQphpRpEhQ+..-ualEphK.hQhphApphEp ...........................METKPYE.LVDAFWKNWSQSLSLFSSAGKQLEQLTLET.LKQQQDALHKLTSGVDELEKELQQF.......TAQFNNQYTDYVKQL....TGNSLNDQIN..EWQ-KWKELSAHM..pQLTVSPTKTSLSILTQTSGQFEETTKQFIEQQQLQREEAQKQLEGFLEEFKoKQLELAKKFEE...................... 1 0 2 3 +9435 PF09603 Fib_succ_major Fibrobacter succinogenes major domain (Fib_succ_major) TIGRFAMs, Coggill P anon TIGRFAMs Domain This domain of about 175 to 200 amino acids is found, in from one to five copies, in over 50 proteins in Fibrobacter succinogenes S85, an obligate anaerobe of the rumen. Many members of this family have an apparent lipoprotein signal sequence. Conserved cysteine residues, suggestive of disulfide bond formation, are also consistent with an extracytoplasmic location for this domain. This domain can also be found in small numbers of proteins in Chlorobium tepidum and Bacteroides thetaiotaomicron. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.59 0.71 -3.79 40 375 2009-01-15 18:05:59 2007-05-09 14:02:06 5 24 93 0 169 369 212 179.60 19 39.68 CHANGED VpIGsQhWMAcNLcsscYRNG-.sl.psts.ttW.....shssuAaCaYsscspssph.........YG+LYNWaAV.........sDs..................R.GLAPpGWHlPoD.sEW.ptLtshl.............G...GcstuGutLKu..h.....stWttss.sss........sspsGFsA........lP.uG.hRsssGsF.hhtGphuhaWooocss....stpAasRplshs....s.sslt+spss..+p...tGaSVRClK ..................................................................................................hstphWhspNLp...................................................ssshs..........pt..psttt.............................hG.hhYsatus....................ss................................................................p.slsPpGW.+lPop.sEa.ppL.hphh.................................................s............p..tsupt.lps...............staht.t.....t..........................tsthGFss...........................hs..sG...ht........t...........s..........s.....s............t.......h.......tt..sphshaWo.s..s.pts..........s.t.sh..h.h.h.ht......t..t...h..ps.ths........pt......uhulRCl+..................................................................................... 1 120 132 160 +9436 PF09604 Potass_KdpF F subunit of K+-transporting ATPase (Potass_KdpF) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry describes a very small integral membrane peptide KdpF, a subunit of the K(+)-translocating Kdp complex. It is found upstream of the KdpA subunit (IPR004623). Because of its very small size and highly hydrophobic character, it is sometimes missed in genome annotation. 19.20 19.20 19.40 19.50 19.10 19.10 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.52 0.72 -6.83 0.72 -4.02 40 728 2009-01-15 18:05:59 2007-05-09 14:09:44 5 1 704 0 103 314 5 25.00 44 58.04 CHANGED hhluslluluLhl...YLhhALlcPE+F .........llulllshhLls...YLlYALlpsEcF.. 0 34 62 84 +9437 PF09605 Trep_Strep Hypothetical bacterial integral membrane protein (Trep_Strep) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of strongly hydrophobic proteins about 190 amino acids in length with a strongly basic motif near the C-terminus. It is found in rather few species, but in paralogous families of 12 members in the oral pathogenic spirochaete Treponema denticola and 2 in Streptococcus pneumoniae R6. 26.50 26.50 27.50 27.00 26.40 26.40 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.34 0.71 -4.62 23 1171 2009-09-11 16:54:32 2007-05-09 14:23:32 5 2 634 0 118 730 19 180.40 28 94.33 CHANGED ps+DLlsIGlFuAlYFlh.hhlsshluhhsPhhh..hhtPslsulluGslaMLhls.+VtKhGslolhullhullhhhsGph.hhhhlssllsullA-llt.phu...pY+shptshluYslFuhhhs..G.shlPhahhh-sYh.tshhppGhupsYl-shhthhss.hhhhlhllssllsuhlGuhlGp+lLKKHFcK ................................hKDllshGlFsllY.Flh.hhls.s.h.l..u..h.l.....s.hhh..........hhhPshsulluGslahlhhs.KVt+hGsl.hlhGllhulh.h.h.hs.Gas..hsshlsull.s.ullA.-llt..p.hG...pYc.s...h.p.t.sh.luaslFu..h.shs..G.shl..hal.s.tctYh.tph.h.tp.uh.up-Ylstlh.th.h.ss...hhhhlhlh.sshlsullGuhlGptllKKaFpK.................................... 0 64 86 105 +9438 PF09606 Med15 ARC105; ARC105_Med_act; ARC105 or Med15 subunit of Mediator complex non-fungal Coggill P anon pdb_2gut Domain The approx. 70 residue Med15 domain of the ARC-Mediator co-activator is a three-helix bundle with marked similarity to the KIX domain. The sterol regulatory element binding protein (SREBP) family of transcription activators use the ARC105 subunit to activate target genes in the regulation of cholesterol and fatty acid homeostasis. In addition, Med15 is a critical transducer of gene activation signals that control early metazoan development [1]. 26.20 26.20 26.40 26.40 25.90 25.60 hmmbuild -o /dev/null HMM SEED 799 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.78 0.70 -13.71 0.70 -6.34 7 273 2009-09-11 06:41:54 2007-05-09 14:50:32 5 17 89 1 140 249 1 349.30 26 82.33 CHANGED QpVVspIc-AhppsGhsp.....oKsut-MEsHVFtKA+o+DEYLuhVARLIlHhR-hppKppQsp.s.....................DPhNALQsLsutss.sst....shshGP.tP.GtphGG.Gshoshhpsh................psQ...u..shu.PpthstVuths.usQhss.h.........tt.pt...ttttuthtt.t.p.tQt...ttt.P.tMhtshhs.....t..tttt.....ttt.t.ts.pQ.p.h...t.Q.p......Q......p.phpQ.ps.tputsQh.p..tsput.psQphttu.sQ..shpQt.sttp.p.s.hthh.tQhpQh.........stG.ttshupst..Psp.hss.s..Qt...hs.............hhs.tt.t.h............Q.psptQtQpttsp..stt...........................................tQ.s...tstpuuph.u.t....................................................t.....ptsshsshsssthsptp..MMSussPs........pspssQtM..sPQPp.PpPstPsups..sp.ssus.hPSP.uhhPSPSPQ.uQsssstRs.tp......SPG.sL.......NTPGp..sus.SPhssp..EEp.Yh-Kh+pLoKYIEP..L+RMIsKh-ps.tch.cchuKMKpLL-ILosPopRsPLcTL.KCEhALp.........sphus.p.sP..........hspPLl-AV.uNlpSPlhNHoL.R...TFtPshpAlaGssIts.sssu+p.Rlp.D-........pQpIPplLQGElARL-sKFhVsLDPstpuss.tsl+LICpLDDKpLPsVPPlplolPt-YP.pSPphp.tpppYsAsPFL.p.Vp+shhu+lspLPthaSLotLLsTWt.uVRQAC......................hLuh .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hst..................................................................................................................................................................................................................................... 0 51 60 103 +9439 PF09607 BrkDBD Brinker DNA-binding domain Coggill P anon pdb_2glo Domain This DNA-binding domain is the first approx. 100 residues of the N-terminal end of Brinker. The structure of this domain in complex with DNA consists of four alpha-helices that contain a helix-turn-helix DNA recognition motif specific for GC-rich DNA. The Brinker nuclear repressor is a major element of the Drosophila Decapentaplegic morphogen signalling pathway [1]. 23.40 23.40 23.40 23.50 23.30 23.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.81 0.72 -4.40 3 116 2012-10-04 14:01:12 2007-05-09 14:53:10 5 11 65 1 88 108 0 53.40 40 11.82 CHANGED GSRRuaouuFKL..QVlESacsDNsC+Gs.RAsA+KYNIpRKpVcKWLQsEsQLQsucAp ....................RRoasssFKL..pVl.-hA.c..psssCp.....sA+cFsl..pc..+pVccWpchcspLpst................... 0 25 41 71 +9440 PF09608 Alph_Pro_TM Putative transmembrane protein (Alph_Pro_TM) TIGRFAMs, Coggill P anon TIGRFAMs Family This family consists of predicted transmembrane proteins of about 270 amino acids. Members are found, so far, only among the Alphaproteobacteria and only once in each genome. 20.40 20.40 20.50 21.30 19.70 19.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.40 0.70 -4.84 49 249 2009-01-15 18:05:59 2007-05-09 15:58:14 5 2 244 0 90 221 746 229.60 36 87.83 CHANGED EplVhuLSpccVsIossFsGu-lllFGAlcptssh.tt..shDllVslpGPspslslR+K-RhhGIWlNs-uhphcssPoFYAVAoo+Plcclssts.pttchplGls.........hs...................ttssss.sssss..pFpcALlRl+pppGLY........ppptsuVplhppsLFRsslpLPAsls.GsYpsclaLhRsGplluptpsslpVpKsGhEpalashA+ppshlYGLhulslAlhsGWhAuslFRth ...................Epl.lulSscpluITusFsGs-lhlFGAlcpts.sh........tt..thDllVslp.G.Ps.psl.s.VR+.K-RhhG.l.WlNssShpFtslPshYulAooRslpcls.s..tp.shp...phplGlpph..........Ls......................ss.ss.ss...sssh.pFtsuLh+l+pppGLY.........ppc.uuVph....hsp.....oLFR....AslsLPAslPsGpaps+shLhRsG.thlsptssslclhKsGlEphlashA+ppsllYGLhulhlAlhsGWhuuhlFR+.................... 0 29 59 70 +9441 PF09609 Cas_GSU0054 CRISPR-associated protein, GSU0054 family (Cas_GSU0054) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a rare CRISPR-associated protein. So far, members are found in Geobacter sulfurreducens and in two unpublished genomes: Gemmata obscuriglobus and Actinomyces naeslundii. CRISPR-associated proteins typically are found near CRISPR repeats and other CRISPR-associated proteins, have low levels of sequence identify, have sequence relationships that suggest lateral transfer, and show some sequence similarity to DNA-active proteins such as helicases and repair proteins. 20.80 20.80 21.90 21.80 19.00 18.60 hmmbuild -o /dev/null HMM SEED 519 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.62 0.70 -5.68 4 90 2009-01-15 18:05:59 2007-05-09 16:03:54 5 2 55 0 36 91 7 277.10 14 76.09 CHANGED hhulohphhsG+acuss..........sD.usP...EWPPuPhRlFpAlVAoht.chs......E-h-sLpaLpu...Pslhthssshsu........psshphVstsasRG.pchasGsus.hpc......pslslPcsssVthlWc.s-ss-tplA.ls+lstplsahGRspShshspVV.sGul..........hs.cWlsct-s.....GslslRsPhsG.....pLp-Lps+aEshhshh.hGsh..s.PhssYs.hsphlu.-.s.ps..s..h.ch.shG.+tssssphtLDltshs..T-uLRRAslup........hcssplsuhlpGH....GD-tp......HsAYLslssluc+tAcG+l.slGshhPs.loth-......GhhGs-.s..huLh+s+pLtclcLc.ss....VuhhuLps....pths..tuS+oWsoVTPhhLsRaP............................s++lp..................ssullttSlspsGhP.Pttlpslss.tlsGshR....hcRYsshcshpphP....hHs+Isa......Pp.lcGPlhlGusRaaGhGLFsP .........................................................h...............................t.....E..PtP.Rlhtuhhush....th.....................t.........hl....t......................h.....................................................................................................................................................................h.h......................h...hh..h.........s.h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 28 32 32 +9442 PF09610 Myco_arth_vir_N Mycoplasma virulence signal region (Myco_arth_vir_N) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents the N-terminal region of a family of large, virulence-associated proteins in Mycoplasma arthritidis and smaller proteins in Mycoplasma capricolum. It includes a probable signal sequence or signal anchor, which, in most instances, has four consecutive Lys residues before the hydrophobic stretch. 20.40 20.40 20.50 24.10 19.40 19.30 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.55 0.72 -4.00 6 43 2009-01-15 18:05:59 2007-05-09 16:05:19 5 1 12 0 15 38 0 32.70 60 2.93 CHANGED MYFLKKKKNKILshALVASLATSLSFGSVIYYS .MaFlKKKKNKILshALlASLssSlSFGSVIYYS... 0 4 15 15 +9443 PF09611 Cas_Csy1 CRISPR-associated protein (Cas_Csy1) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2465 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy1, for CRISPR/Cas Subtype Ypest protein 1. 20.20 20.20 20.50 20.30 19.60 20.10 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.31 0.70 -5.52 18 182 2009-01-15 18:05:59 2007-05-10 09:29:29 5 1 173 0 41 149 8 345.90 35 83.41 CHANGED spYphpsWLsD.AA+RAGQIpLVTHshKasHsDA+GSslashs......tptsspaLuo..s.sphAhDslGNAAALDVhKhLplph.-.Gc.oLLspLppsDssALssLucsscQ....hcpWhsuFtslhssc..phoSHpLAKQlYFPl...........tsspYHLLuPLFuSoLAatlap+lppsRFu-psKtARpA++cpcaHspshhpYPsLAsQphGGTKPQNIStLNSpRGGcsaLLsShPPpWc.spp+sPhphcSlFp.psthpspspstlppLppaLtssp...........sNhclRppRs.phlsplIDpLlphsuplQph...sGWSspspp..pLtcspQLWLDPhRs..........sDpsFppERcpsD.W.pplupcFupWLNppLpp....ppl.lG-sEt+cWpp ..............................................cYphcsWLs.c.AA.p.....R.AtQlphsTHssKhtHsD...u...Ku..sslh.....h.s.......phspshlso...slsph.......s.hD.s.h.GN.AAA.....LD.lsKhLphph.-...Gp...oLlstlpp.s-.phhphhu....p...spp............hp.pahpuFtphhpsp...p.soHclsKQlYFPl...........tsspYHLLsPLasSSLspthap+lsp.sRFS-psKpuRps++pschp.pshhph.slAs.phGG.........TKPQNIShL.NS.pGG+saLLsShPPpap..st..+.P.p.cohF..p..sththtspshltphpphltsst................NshclRstRt.phlspllDhlhthsttlQph...sGWSpp.....pL.ttp..plWLDPhRs...............s-ttFpp.c.cpppD.W.pplspcFutWL....NttLpc.......pphshGssEhpcWp.t........................................... 1 10 25 33 +9444 PF09612 HtrL_YibB Bacterial protein of unknown function (HtrL_YibB) TIGRFAMs, Coggill P anon TIGRFAMs Family The protein from this rare, uncharacterized protein family is designated HtrL or YibB in E. coli, where its gene is found in a region of LPS core biosynthesis genes [1]. Homologues are found in Shigella flexneri, Campylobacter jejuni, and Caenorhabditis elegans only. The htrL gene may represent an insertion to the LPS core biosynthesis region, rather than an LPS biosynthetic protein. 23.60 23.60 23.80 24.80 22.20 23.50 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.82 0.70 -5.14 3 269 2009-01-15 18:05:59 2007-05-10 09:39:14 5 1 240 0 15 100 74 249.20 59 89.51 CHANGED M-spTTIITAFFDIGR+Da.......KclsRSNDKYaSYFEFLAuLKNcMIIYTsEslKc+IcuIRscasLE-KTThIIlc-Ip-hccpIYKRII-IppDcoF+Na...lp.RphcNsEshSPcYsYLMhLKuaFVsDAIsRGLT-TN.lAWIDFGFNHGGNVFsshcpFDF.a...sshDENKINLFoIKK....sDQQslF-IhhuhEsaIMGGlIlusS+pWccFY+hsLEShKIhsShGIlDDDQlIhLaC..sYRRNs..sYNhIt+upWFDuLpHFpspslGuKLpI ................McsShTIlTAaFDIGRGDWsupcGF.ccLsRSsDsYFSYFE+LAuLENcMIIFTSPDLKsRVEAIRN.....GKPTT..VIVIDIK...Kp.....h.+a.I+....s.+.....Ic.K..IQ.KDESFTN+...L.-P+plK.NPEYWSPEYVLVsNLKsYFVsK.A.IphG.L...VK....Ts..LVAWIDFGYCR+s.N.V.TpGlKhWDFPF.......DEsKMHLF.TI....KKGLsloSpQplFDhM.IGNHVYIIGGAIVGSQHKWKEFYKLVLESQKITLNNNIVDDDQGIFVMC..YYKRsDLFN.LNY.LG.R.GKWFDLFRCFRSNTLGAKMQA.......................... 0 8 9 15 +9445 PF09613 HrpB1_HrpK Bacterial type III secretion protein (HrpB1_HrpK) TIGRFAMs, Coggill P anon TIGRFAMs Family This family of proteins is encoded by genes found within type III secretion operons in a limited range of species including Xanthomonas, Ralstonia and Burkholderia. 26.90 26.90 27.10 27.80 26.00 26.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.86 0.71 -4.74 8 155 2009-01-15 18:05:59 2007-05-10 09:40:55 5 1 96 0 27 103 0 158.00 31 94.89 CHANGED shhpCssplluGLl-llssALpts......chtDhptlLpALRlLRPchsth-hhDGWLplsRtpWs-AtRlLR-l...-ssssshshu+ALhAhCLpuhpDssW+ttApclLtpssss-ulsLV+sLhuppshhpA.t....................ssssuu-Asussspu............s..tp.pahRl ..............h.pCstthlsGLl-shssALpts......c.tDhptlLsALRVLcPphssh-hh-uhLplt+tpas-AhRlL+c.l...ssss...sptshu+ALhAhCL.....pshsDssW+th........AsplLtsss..sssuhsLV+uL.stpshh.h......................................s.htsp..hts..s............................................................. 0 5 11 16 +9446 PF09614 Cas_Csy2 CRISPR-associated protein (Cas_Csy2) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2464 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy2, for CRISPR/Cas Subtype Ypest protein 2. 25.00 25.00 32.40 29.70 19.10 21.30 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.62 0.70 -5.21 25 219 2009-01-15 18:05:59 2007-05-10 09:58:56 5 2 202 0 47 169 10 266.80 33 80.18 CHANGED LlLP+l+VpNANAlSSPLTaGFPulTAFhGas.HALpRKLsspt...slplpusuVlsHcaplpshps...hs......psFsLTRNPL......sK...sGso........uuhlEEGRhHLslSLllpssuppt............sppptpthhppltpll.tshRlAGGoll.......phtpschhphs........tpchcphh++LlPGFsLlsRp-LLssthppLpps....................stsphlDAaL-hus.lpapshsspssp............Wp...+.....p.sGWlVPlslGYpul.SPh..atsGplpssRDspsPapFsEolaulGEWlsPHRl.p...........slsplLWpacsc..ppsl.....YhCp ...........lll.cl+lpsANAlSu.lThGFPuhTsahGhs.HALpR+Ltp...pt.....sl..plpGsulhsHphplpsapss..hs..........hsFthTRN...PL......pK........pups............suh.EEu+hHLslSLllEhpsp.t.............ptttpthhpplpphh..ptRlAGGolh............shtpsplhph..............phcclhh.pLhPGasLh-RpphLscthpphppt............................sp.p.LDAhL-hss.lchps..stsss..................Wth..h.......tGaLVPlssGYpuI.SPl..htsGpsppsRss....phP...apFsEslaulGcWhh.a+l.p...................................slpphhWpYchp...pp.h.....Yhh............................................ 0 9 25 35 +9447 PF09615 Cas_Csy3 CRISPR-associated protein (Cas_Csy3) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This entry, typified by YPO2463 of Yersinia pestis, is a CRISPR-associated (Cas) entry strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy3, for CRISPR/Cas Subtype Ypest protein 3. 25.00 25.00 29.50 29.50 22.50 24.90 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.91 0.70 -5.34 23 219 2009-09-11 10:34:39 2007-05-10 10:02:20 5 1 207 0 48 165 10 318.40 48 97.09 CHANGED csASVLAFERKLssSDAlhauusW-s+s...pasPlplpEKuVRGT...ISNRLK.stspDPAKL-A-l..ppsNLQpVDVAsLPs-sDTL........KlpFTLRVLushupPSsCNsstYpppLtpslssYhpcpGFsELAcRYApNLANGRFLWRNRlGAEslplpVs....spstpsas.FDAhpasLRsF...spsspplspLuphItpGLuu.ssaslLcVsAalclGtGQ.EVaPSQELlLDc...s..cupKSKhLYpV....s..........slAAhHSQKIGNALRTIDTWYPcss-....hGPIAVEPYGuVTspGpAYRQPK.pKtDFYTLL...DsWlh+schP......slE.pQHYVMAlLIRGGVF.GE ..........sASVLAFERKLssSDAhhaussW.ps.pp...phpPlplpEKuVRGT...................ISNRLKs..shss....DPsKL.DAEl.....pKuNLQpVDsAsLs..-sDTL.........cspFTL+VLushupPusC........ND..pYptpLtsslpsYlpppGFpELApRYApNlANGRFLWRNRlGAEpIpVpVs...........tspppsap..F.supphuL+pF....spsspplppLAphIppGLuu.psas.......hLpVpAhl+lGtGQ..EVaPSQELVLDp.......pupKSKlLYpl.....s......................shAuhHSQKIGNAlRTIDTWYP-ssp....hsPIAVEPYGuVTshGpAaRpPK.pKhDFYTLh........DsWlh+sphP......sl.E.pQHYVMAsLIRGGVFG............ 0 10 25 36 +9449 PF09617 Cas_GSU0053 CRISPR-associated protein GSU0053 (Cas_GSU0053) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is found in CRISPR-associated (cas) proteins in the genomes of Geobacter sulfurreducens PCA and Desulfotalea psychrophila LSv54 (both Desulfobacterales from the Deltaproteobacteria), Gemmata obscuriglobus (a Planctomycete), and Actinomyces naeslundii MG1 (Actinobacteria). 25.00 25.00 29.60 25.20 19.70 18.30 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.29 0.71 -4.47 4 71 2009-01-15 18:05:59 2007-05-10 10:12:02 5 2 66 0 28 72 5 173.40 26 46.68 CHANGED uulllcAsLpPVpGts.+lhPsTF.....uussYph..thsDG...ssslllDSVsS.ANRlE..Lhshc.st..lV......P.Ipspls..stploslEhsHRhsDAhl..RsS.lstsp..spsltptLpssps+ss......csLhthtPpoLlaGsWDS...cpsptsKluR.luuhI.uYsVcsl....ppuu ....................................pLp.PssG...tltPssa...ssh......ss.usYsh......ph.....s.cG...............hpslLlDSspStANRhEthlhp.t..h..csup.....hl..........................thPhlpV..ph.p................sst..p.h..o.s.....lptsHRhsDual...Rsup.h...st.t...........h......pcpsh.hp.shp.s.s.s.psh.................psl.hchsPsuLlaGs.WcS....tpt..s...sph+lsRslsupI.u..........s............................ 0 17 24 25 +9450 PF09618 Cas_Csy4 CRISPR-associated protein (Cas_Csy4) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR (Clustered Regularly Interspaced Short Palindromic Repeats) is a widespread family of prokaryotic direct repeats with spacers of unique sequence between consecutive repeats. This protein family, typified by YPO2462 of Yersinia pestis, is a CRISPR-associated (Cas) family strictly associated with the Ypest subtype of CRISPR/Cas locus. It is designated Csy4, for CRISPR/Cas Subtype Ypest protein 4. 25.00 25.00 25.10 25.10 24.40 24.80 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.26 0.71 -4.47 32 230 2009-01-15 18:05:59 2007-05-10 10:15:38 5 2 218 7 52 183 6 181.10 33 97.59 CHANGED Yl-IplLPDsEhstshLhstlaspLHtsLsp....hspscIGlSFPpast............sLGspLRlau.sppsLppLptpsWlpshpD..YsplusltsV.Psssp.apshpRhpspu......shpRhh+Rhh+Rt....thstEpsptthspphp.pph..shPalplcStS.......ssQ.pFhLFIc..tphtspsssGtFsuYGLSss.......uTVPaF ......................Yl-IpllP...c.s...EhstthLhutlaspLHpsLst.............ptpscI..G.VSFPphst..............sLGs.pLRlau.sppsLppLptp.sWh..p.s.Lp.D..Ysplo.pltsV..Pp.p.sp..apshpRsQsKu......stpRhh+Rthp+t..................tlsp-psttch.pptp.ppt...............shPalpLcStS........opQ.pFhLaIc..tphts..p.s..spGpFsuYGLSps.......uTVPaF........... 0 12 28 39 +9451 PF09619 YscW Type III secretion system lipoprotein chaperone (YscW) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is encoded within type III secretion operons. The protein has been characterised as a chaperone for the outer membrane pore component YscC. YscW is a lipoprotein which is itself localized to the outer membrane and, it is believed, facilitates the oligomerisation and localisation of YscC. 21.50 21.50 21.50 21.60 21.40 21.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.23 0.72 -4.33 11 875 2009-01-15 18:05:59 2007-05-10 10:43:35 5 7 808 3 107 364 9 110.00 50 60.76 CHANGED s.tst.u.spVsGsVplpp..uLPhsAslpVTl....Lusssu+lLs.tssRhEssthPhpFsL.hNPspl.scucIhLpAtlphss+hsl.sss.QpVhs..ssschclpLlP .....................shppssVSGTVhlRp+.lALPP.DA..VLTVTLSD..sSLA.D.AP.S..+..V..LA.Q+...u.l.R.T.E.GK...Qu.P.FsFsLPFNPu-lp...PNARll.LSAAIo.VssKLlFITD.T.lps.VIs..pG.G.sc.uDLpLVP............................. 0 15 35 72 +9452 PF09620 Cas_csx3 CRISPR-associated protein (Cas_csx3) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is encoded in CRISPR-associated (cas) gene clusters, near CRISPR repeats, in the genomes of several different thermophiles: Archaeoglobus fulgidus (archaeal), Aquifex aeolicus (Aquificae), Dictyoglomus thermophilum (Dictyoglomi), and a thermophilic Synechococcus (Cyanobacteria). It is not yet assigned to a specific CRISPR/cas subtype (hence the x designation csx3). 20.90 20.90 21.90 21.20 20.20 20.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.55 0.72 -4.18 7 37 2009-01-15 18:05:59 2007-05-10 11:28:15 5 3 34 0 14 37 1 78.50 31 36.24 CHANGED chcLpcs...ltP-DLtplplPshlcsptG...VVlSGRuPIWLYuaLlHchH...TsalAsaDPRL..GAVVVuSHo.phR.GpVI.h ....................................ts...lp.ptltthp..lPt.ls.ptG....lV..loG+uPIWLYshLlHth.H...ssalAsa.DP+l..Gu.VVltoH.s.th.psGpll...... 0 8 12 13 +9453 PF09621 LcrR Type III secretion system regulator (LcrR) TIGRFAMs, Coggill P anon TIGRFAMs Family This family of proteins are encoded within type III secretion operons and have been characterised in Yersinia as a regulator of the Low-Calcium Response (LCR). 21.20 21.20 24.20 23.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.69 0.71 -4.90 7 286 2009-01-15 18:05:59 2007-05-10 11:50:46 5 1 274 0 15 74 0 113.30 36 83.16 CHANGED hp-PLlPWhht+GltVpPahhpposI.LGath.hcGhcLAWRV-..ptRlWIVhl+RsptptGLuNPFAALYLLApAshslLGssahLYGNVsVLtuSsLsupRLA+FYpRWsGAS.E.psGWF.Ltst+Vhoh+sh+KRQp .........p.l.paLcp+GhhspstaLstoAIhlG.ph.hssaplsYRl-..tpElIICpFcRhpsp....t..G.LtsPh.sL.h.hLhpthh.hhs......hl.u.......htss....t.pc..h..ah.ph.s....-..........Wa.h................sp............................................................................. 0 3 6 9 +9454 PF09622 DUF2391 CHP02587; Putative integral membrane protein (DUF2391) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry is found in Nostoc sp. PCC 7120, Agrobacterium tumefaciens, Rhizobium meliloti, and Gloeobacter violaceus in a conserved two-gene neighbourhood. Proteins containing this entry appear to span the membrane seven times. 20.60 20.60 20.60 20.70 20.40 20.30 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.72 0.70 -5.24 9 115 2009-09-10 18:53:12 2007-05-10 12:00:33 5 1 112 0 52 100 14 184.30 24 90.50 CHANGED sslsRGhAGAhLFulPhLhTMEhW.lGhalcPhRLhLlLslshslLhlLs+ttGFR+op...shc-sltDul.AhulGhlsuuhlLhlhthlssshuhcEhlGKlslpuVPsoIGshLuRs.Lttcus-sppt..t..............pstsuhhu-LhhhhlGAlFluhNlAPT-ElhlluhthoPaahlhllLASLhI.ashVatspFpsp+phtpppGhapt.lp.TlsuYllulhluhhhLWhFpRhs.ssuhpphlptslVLGhPAoIGuAAuRLll ........................t.....shsGuhh.hulPh.ho.EsWtlG..h.....p..Lh...hl..h.lsh.hlhh....hsh.tsFppph.........ph......h..l..hchhhAhhlu......hl...ssshlLhhhshls.h..s.......lt+lhl.shPhohGAhls.................................................p....hhGuhhhshsh..st-h..lu..h.....hhhh.h.hhh.h.hh.....................................h....h.s.......h...................................................................................................................... 0 12 29 45 +9455 PF09623 Cas_NE0113 CRISPR-associated protein NE0113 (Cas_NE0113) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this minor CRISPR-associated (Cas) protein family are encoded in cas gene clusters in Vibrio vulnificus YJ016, Nitrosomonas europaea ATCC 19718, Mannheimia succiniciproducens MBEL55E, and Verrucomicrobium spinosum. 21.20 21.20 21.80 21.60 20.50 20.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.50 0.70 -4.89 17 107 2012-10-11 20:44:46 2007-05-10 12:53:01 5 5 94 0 44 103 11 188.90 25 58.63 CHANGED hKplLlushGhoPQVlTETL.uLhppup......hss-lhll..oTpps+sphthsLL.............pphhp-a.......thpastsslpl..l.sspst.lsDIpotp-stthhshIhphltpLpps.s.tplHlSlAGGRKsMuhhhuhAhpLauc.pDpL.HlhVss.hE......pFhh.stpsthlplpstps.....st..h.VpLsplPalphpp.lsstlhp..uths.spsltphppt.s. .........................................pplLlushGhoPQ....llTETlhsLhppst........hssElhll..oTps.stsph..thpLl...................tphhp-h.......thphppp...lpl.....hhs.ppst.lsDIcot....cDspthtchIhphltphptp.t..tlahsluGGRKsMuhhhshAhphht..tDtl.Hlh......st................................................h..hs..................................tt............................................................. 0 21 34 42 +9456 PF09624 DUF2393 CHP02588; Protein of unknown function (DUF2393) TIGRFAMs, Coggill P anon TIGRFAMs Family The function of this protein is unknown. It is always found as part of a two-gene operon with IPR013416, a protein that appears to span the membrane seven times. It has so far been found in the bacteria Nostoc sp. PCC 7120, Agrobacterium tumefaciens, Rhizobium meliloti, and Gloeobacter violaceus. 24.60 24.60 25.00 26.40 24.50 24.50 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.80 0.71 -4.68 40 338 2009-01-15 18:05:59 2007-05-10 12:55:27 5 1 213 0 42 168 8 149.40 34 87.04 CHANGED hhhhhhhhlslhhcpp.....hhhhhlhhshlhhhh.hshh.........hhhhl.cpts+psplslspscp.lp..hspshhls..hslpNpuphsh................ppCplpscl.hp......sssshhpphthp.ts...ahpp...................ph.ltp..Lphscspph....phh...hs...t.sh...tshslph.pupCh .............................................................h.LhshllhLplh.+ch.....Ahl.hhhhsIlhshL.lYhh...........hhpll.pppspsphoplsps+...lp....spulIls..hplpNhoKhsh................+cChlhhcIhspp....sssslhp-hph+.h.....appp....................ohEIh-..L.sNssp.h....Rhu...hs.....sh...NNhphhh.s.cCh............................. 0 11 34 41 +9457 PF09625 VP9 VP9 protein Coggill P anon pdb_2gj2 Domain VP9 is a protein containing a ferredoxin fold. Two dimers come together to form one asymmetric unit which possesses a DNA recognition fold and specific metal binding sites possibly for zinc. It is postulated that being a non-structural protein VP9 is involved in the transcriptional regulation of the White spot syndrome virus, WSSV, from which it comes. WSSV is the major viral pathogen in shrimp aquaculture [1]. VP9 is found N-terminal to the Pfam:PF07056 domain. 20.50 20.50 20.60 22.90 20.20 17.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.51 0.72 -4.17 6 19 2009-01-15 18:05:59 2007-05-10 13:38:38 5 2 2 7 0 15 0 77.60 41 19.98 CHANGED Lpl+suFhlsG-c.s-+YEcVhh.oFcuV-.....olRKSELcDssaIVpLK-scpl+lpsGlccLRpLTGDsoLpIp.hlosshP ..LphcuuFhhhG-c.sc+YEcVht.oF-uV-.....olR+SELc-ssaIVpLKcscphphpsGlccLRpLTGDsoLpIp.hlossh........................ 0 0 0 0 +9458 PF09626 DHC Dihaem cytochrome c Coggill P anon pdb_2fwt Family Dihaem cytochrome c (DHC) is a soluble c-type cytochrome that folds into two distinct domains, each binding a single haem group and connected by a small linker region. Despite little sequence similarity, the N-terminal domain (residues 12-75) is a class I type cytochrome c, that binds one of the haems, but the domain surrounding the other haem is structurally unique. DHC binds electrostatically to an oxygen-binding protein, sphaeroides haem protein (SHP), as a component of a conserved electron transfer pathway. DHC acts as the physiological electron donor for SHP during phototrophic growth [1]. In certain species DHC is found upstream of Pfam:PF01292. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.98 0.71 -3.44 35 196 2009-01-15 18:05:59 2007-05-10 14:12:42 5 5 163 2 75 205 20 107.70 32 55.86 CHANGED YhccCuuCHhAaPPtlLPupSWpplhssLspHFGssAs.LsssspttltsYLpspuuststs.............ssssPh.RITcs.aFpcpH..pcls.thhp.pspltohusCsuCHss.AcpGsF ....YtppCu.uCHhAaPPslLPupoWptlh..s..s..LppHaGssAs...Ls.sssptpItsYLtspAuptst.......................sssss.+Iocs.aFhcpH....scls.......thhp...spltohusCsuCHstAppG.a....................... 0 19 56 66 +9459 PF09627 PrgU PrgU-like protein Coggill P anon pdb_2gmq Domain This hypothetical protein of 125 residues is expressed in bacteria but is thought to be plasmid in origin. It forms a six beta-strand barrel with three accompanying alpha helices and is probably a homo-dimer in the cell. It may be involved in pheromone-inducible conjugation [1]. 25.00 25.00 77.10 77.10 18.60 18.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.52 0.71 -4.53 2 54 2009-01-15 18:05:59 2007-05-10 14:19:04 5 1 40 2 2 31 0 100.60 89 88.98 CHANGED MKElAIQEK-lshpWpG.pG+Llhl+lKps+shEhhhNpQlT.ENIpEIsslpllKNGKoLsLcV.sE+Shash.p.uphpVPhFaIcT.IpRtsac-hF..............Gpp..lKt MKEIAIQEKDLTLQWRGNTGKLVKVRLKNTRAMEMWYNKQITEENIQEITTLNIIKNGKSLALEVYPEKSIYVKPNLGRINVPVFFIKTPINRGlFEEIF......................GETLK....... 0 2 2 2 +9460 PF09628 YvfG Yvfg; YvfG protein Coggill P anon pdb_2gsv Domain Yvfg is a hypothetical protein of 71 residues expressed in some bacteria. The monomer consists of two parallel alpha helices, and the protein crystallises as a homo-dimer. 25.00 25.00 123.50 123.30 22.40 18.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.22 0.72 -3.83 2 24 2009-01-15 18:05:59 2007-05-10 14:21:45 5 1 24 4 6 13 1 67.00 88 93.33 CHANGED pLFos.hhhpNh+pahp.Npu..sKIpAhNuYY+sVsuolltDplsKNAsllhRhpHL-EAYpKVtpt ELFSVPYFIENhKQHIEMN.s.EDKIHAMNSYYRSVVSTLVQDQLTKNAVVLKRIQHLDEAYNKVKRG 0 2 3 4 +9461 PF09629 YorP YorP protein Coggill P anon pdb_2heq Domain YorP is a 71 residue protein found in bacteria. As it is also found in a bacteriophage it might be of viral origin. The structure is of an alpha helix between two of five beta strands. The function is unknown. 25.00 25.00 66.00 65.80 19.10 16.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.32 0.72 -4.00 2 8 2009-01-15 18:05:59 2007-05-10 14:43:34 5 1 8 1 1 7 0 65.40 75 99.43 CHANGED MPKYWSYPVGLAVEINNNARYGCPHHVGRKGKIIEHLHSATYDYAVSDETGDITYFKEHELTPLKGGLAYV MPKaWSYP.GLcV.INpNA+.uCPHHVGRcGKIIEhLHSATYDYAVSDETGDITaFKEHELNPhKGG.......... 0 1 1 1 +9462 PF09630 DUF2024 Domain of unknown function (DUF2024) Coggill P anon pdb_2hfq Domain This protein of 86 residues is expressed in bacteria. It consists of four alpha helices and two beta strands. Its function is unknown. One UniProt entry gives the gene name as Traf5. 20.40 20.40 21.10 69.20 20.10 17.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.85 0.72 -4.27 13 50 2009-01-15 18:05:59 2007-05-10 14:46:45 5 1 46 1 22 46 40 81.50 45 95.21 CHANGED MclHVaDTaVps+DG+hMHFDVll..sscstc+shpYAKpaLcolG.csssloppECpFCHSEtAss-VppuIcppGaaIhcM ....Mcl+VFDTaV+s+DG+hhHFDVll...sccpst+AhpaA+cWLsuhG.csAsloppcCpFCHSE...tAss-VtcuI+p+GaaIhph. 1 9 15 21 +9463 PF09631 Sen15 Sen15 protein Coggill P anon pdb_2gw6 Domain The Sen15 subunit of the tRNA intron-splicing endonuclease is one of the two structural subunits of this hetero-tetrameric enzyme. Residues 36-157 of this subunit possess a novel homodimeric fold. Each monomer consists of three alpha-helices and a mixed antiparallel/parallel beta-sheet. Two monomers of Sen15 fold with two monomers of Sen34, one of the two catalytic subunits, to form an alpha2-beta2 tetramer as part of the functional endonuclease assembly. 22.00 22.00 22.20 22.10 21.80 21.60 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.41 0.72 -3.78 44 217 2012-10-11 20:44:46 2007-05-10 14:48:09 5 8 184 2 146 215 1 113.10 26 62.71 CHANGED hVhtsLhchptWp-Vcl..hppp...t.......................lhllpGpPspphstp.................................EhllPlph....sptlolcplcplFspl.....s..t......................sp+llLAIlssDuTlVYYh...lpcGlhc.....Pcp.s .....................................................................................VhhsL.ct+.Wpplph.ht..p.hp.....................hhhlpGh..ct.hh............................................phVlPssh.........spphohpplpplac..tltt.t.t.............................................spplhLAhlp....s....DuTlVYYh..lpcGhhpPc.............................. 0 37 71 113 +9464 PF09632 Rac1 Rac1-binding domain Coggill P anon pdb_2h7o Domain The Rac1-binding domain is the C-terminal portion of YpkA from Yersinia. It is an all-helical molecule consisting of two distinct subdomains connected by a linker. the N-terminal end, residues 434-615, consists of six helices organised into two three-helix bundles packed against each other. This region is involved with binding to GTPases. The C-terminal end, residues 705-732. is a novel and elongated fold consisting of four helices clustered into two pairs, and this fold carries the helix implicated in actin activation. Rac1-binding domain mimics host guanidine nucleotide dissociation inhibitors (GDIs) of the Rho GTPases, thereby inhibiting nucleotide exchange in Rac1 and causing cytoskeletal disruption in the host [1]. It is usually found downstream of Pfam:PF00069. 25.00 25.00 43.00 42.30 23.90 19.00 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.76 0.70 -5.34 2 33 2009-01-15 18:05:59 2007-05-10 15:45:29 5 2 30 3 3 15 0 287.60 85 40.67 CHANGED DsRRlTP+KlRELSDhLRhHLSSAuT+QLshGhsLSDLssM.ssLDKAERpthsDpsQlKSFNSLILKsYuVIuuYlKGc.s-oKospsEsSP.hpuNhMhSlsEPoLppIQtpLsQoHu.sDIuoL.Ru+pHLETLLpVLhs.S.Q.p.VosEsYsFL.RlAEsKsoLu.pLssLptQQpps+upLShLhptssuWAssARQuL.RFDShRPVVKFGo.Q.hAlHRpMhAAaAAhTLQEVutFss-MRpFsAsuhPLLhQLGRSoLhDEtLs.QREpLRELsTlAERLNRLppEWh .......DVRRITPKKLRELSDLLRTHLSSAATK..QLDMGGVLSDLDTMLVALDKAEREGGVDKDQLKSFNSLILKTYRVIE.DYVKGREGDTKNSSTEVSPYHRSNFMLSIVEPSLQRIQKHLDQTHSFSDIGSLVRAHKHLETLLEVLVTLSQQGQPVSSETYGFLNRLTEAKITLSQQLNTLQQQQESAKAQLSILINRSGSWADVARQSLQRFDSTRPVVKFGTEQYTAIHRQMMAAHAAITLQEVSEFTDDMRNFTVDSIPLLIQLGRSSLMDEHLVEQREKLRELTTIAERLNRLEREWM. 0 0 1 1 +9465 PF09633 DUF2023 Protein of unknown function (DUF2023) Coggill P anon pdb_2guk Domain This protein of approx.120 residues consists of three beta strands and five alpha helices, thought to fold into a homo-dimer. It is expressed in bacteria. 21.30 21.30 54.60 54.40 20.80 20.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.26 0.72 -4.07 10 133 2009-01-15 18:05:59 2007-05-10 16:32:13 5 1 133 2 35 108 1 100.10 53 83.50 CHANGED +lFhHHIYEapKGlRsLlLhTLsscttstshtRLcppuIsYalQcs.ups+lNlFFGsspClcslRplss.+PLspLTsEEDFlLGuMLGYDhppQCcRYLpR ..+lFhpHIYEacKGVRphVLaThs+capshAlpRLcppsIsYhIQ.V..uss+lNLFFG+pECl-slRtllp.+PLNpLTPEEDFILGuMLGYDlptQCcRYCpR.. 0 12 24 29 +9466 PF09634 DUF2025 Protein of unknown function (DUF2025) Coggill P anon pdb_2hg6 Domain This protein is produced from gene PA1123 in Pseudomonas. It contains three alpha helices and six beta strands and is thought to be monomeric. It appears to be present in the biofilm layer and may be a lipoprotein. 25.00 25.00 170.10 169.90 24.20 17.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.24 0.72 -4.29 5 33 2009-01-15 18:05:59 2007-05-10 16:37:21 5 1 33 1 11 25 1 106.00 75 96.84 CHANGED MSITSppICQAADQLKGFVGFNRKTGpYIVRFSEDSFGMDVADDoIlPsSEFVWAsssDssMsLcREpLQLLLEQNIDDRlNIoEPLRVYLRRoDLPEIpApRSLl MuITSsDICQAADtLKGFVGFNRKTGpYIVRFSEDSFGMDVADDSITPTSEFVWuuspD.ssMpLsREpLQlLLEQNIs-RLNIGEPLhVYLRRpDLPEIsAQRpL... 0 1 2 7 +9467 PF09635 MetRS-N MetRS-N binding domain Coggill P anon pdb_2hsn Domain The MetRS-N domain binds an Arc1-P domain in a tetrameric complex resembling a classical GST homo-dimer. Domain-swapping between symmetrically related MetRS-N and Arc1p-N domains generates a 2:2 tetramer held together by van der Waals forces. This domain is necessary for formation of the aminoacyl-tRNA synthetase complex necessary for tRNA nuclear export and shuttling as part of the translational apparatus. The domain is associated with Pfam:PF09334. 21.20 21.20 21.90 42.10 20.60 21.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.58 0.71 -3.73 5 42 2009-01-15 18:05:59 2007-05-10 17:08:36 5 2 39 1 27 42 3 115.50 37 15.90 CHANGED KGHoutLQLANNLKLALALtLAsssLKLclNEDcutPpLhsouoGFcLFDANAILRYVLcDFEupcS-cYpaAluSLEshLYH..K-ssc-HlsclsNKuL-NYL.lsh-EPLoATcLIlFANsY .......Ktpsh.LpLANNLKlulAlphhs.tsL..clpls.-Ds..us...hpLhsspss.FpLh-sNAIlRYlhsDFps.ppu.cap.....hpshLYp....Kp..ppclp.hss.tltpah....cp.losoplIlFAslY..................................................... 0 3 14 26 +9468 PF09636 XkdW XkdW protein Coggill P anon pdb_2hg7 Domain This protein of approx. 100 residues contains two alpha helices and two beta strands and is probably monomeric. It is expressed in bacteria but is probably viral in origin. Its function is unknown. 23.00 23.00 23.50 23.30 22.00 22.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.55 0.72 -4.18 4 40 2009-01-15 18:05:59 2007-05-10 17:10:23 5 1 35 1 9 35 41 96.80 32 88.42 CHANGED M.Lh-AIhYpYPsAsspKDF.lRNDGDG..SYIpcWplcAPhPTEtELcsWWEEhQpNPsY-PP.Ql-hLAQELupEKLhRKQLEc.sppLGsELSslKLplLsLKG- ..........................................................lh-slhahaPssss.hDa.lpssup.G...hItcWplp.hP.PTptpLcphac-htcs..sshpsPspl-hLuQ-LupEKLtRKph-ph.psLGppLuslKLplLplKGt........................................ 0 4 6 7 +9469 PF09637 Med18 Med18 protein Coggill P anon pdb_2hzm Domain Med18 is one subunit of Mediator, a head-module multiprotein complex, that stimulates basal RNA polymerase II (Pol II) transcription. Med18 consists of an eight-stranded beta-barrel with a central pore and three flanking helices. It complexes with Med8 and Med20 proteins by forming a heterodimer of two-fold symmetry with Med20 and binding the C-terminal alpha-helix region of Med8 across the top of its barrel. This complex creates a multipartite TBP-binding site that can be modulated by transcriptional activators [1]. 20.70 20.70 21.10 20.80 20.30 20.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.51 0.70 -4.84 26 307 2009-01-15 18:05:59 2007-05-10 17:12:11 5 6 206 12 211 292 0 171.20 21 91.12 CHANGED pELhLhuSlsspshpphlppLpuLsu..p..Ppphtchphla+...s..........p.t...lpspstthpphhh+htpphspt.................hh....................p.s...tp...........................................ppsWpLphtDhP-sGp.spsssspphhsssl..............................................................................................pssslhpalpchG.achshEYhhcGhhFhp.uslhItlh+lhp.hsspspt...................p.hshSssallcs.lslscus-h.-thspuhtcL.tlp-pL+shlpLEhsDR..hDoR ...............................................................................................ph.L.u..l.t.t.th..hhp.Ltshss.....s..h.phphhh................................................................................................................................................................................................t..aplph.s.......Pt.s....p..tp....s.......p..hhp..................................................................................................................................................ptsh.thhtthG.ath..pahhtGhhahh.s.hhl.l.+hh..h....tt.................................p.thhlph....h.h.tstp.....t.t......htp.Lpshh.h............................................................................ 0 63 97 163 +9470 PF09638 Ph1570 Ph1570 protein Coggill P anon pdb_2hq4 Domain This is a hypothetical protein from Pyroccous horikoshii of unknown function.\ It contains six alpha helices and eight beta strands and is thought to be monomeric. 20.70 20.70 21.30 24.10 18.10 20.10 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.74 0.71 -4.42 4 13 2009-01-15 18:05:59 2007-05-10 17:14:26 5 1 13 2 11 17 0 152.10 56 97.29 CHANGED MhCEEKLEVFENGF+D-KFNlElcaYGpDuRKVLLAlIYELYLP-YGpEYVYPFECAKEFWsIYhDusElcsEEscLKPlKFlSESVhpKlcc.LccIcsPlEVK....lEcAclYKsK-GYLslGKNFlLD.+GRLFIFNKPSluEhILKYIWcW ................................MhCEEKLEVFENGFcDGKFNlclEaYGpDARKlLLAlIaEL.YLPDYGp-YVYPFECAKEFWGIYhDuuEIpsEEh+LuPlKFlscSVhsKLEcsLc-IcAPtEVKt.lslE+A-laKlKcGhLslGKNFlLDt.+GhLFlFNKPSstElILKYlGh............ 0 1 1 6 +9471 PF09639 YjcQ YjcQ protein Coggill P anon pdb_2hgc Domain YjcQ is a protein of approx. 100 residues containing four alpha helices and three beta strands. It is expressed in bacteria and also in viruses. It appears to be under the regulation of SigD RNA polymerase which is responsible for the expression of many genes encoding cell-surface proteins related to flagellar assembly, motility, chemotaxis and autolysis in the late exponential growth phase. The exact function of YjcQ is unknown [1]. However, it is thought to be a prophage head protein in viruses [2]. 21.80 21.80 22.20 22.10 21.70 21.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.14 0.72 -3.90 8 83 2009-09-10 15:07:02 2007-05-10 17:17:20 5 1 76 1 15 64 1 92.60 25 87.98 CHANGED tYKILptlhauaEshpc.D.slhD........cphscsLphLpD-GYIKGlphp.s....slhsuhssshlTh...cGlsYL-ENohhKKAYKshKEl+-WlP .........Y+lLphlht.s..h..cphpth.c...s.hhp.......lspphhsphlphLh--GhIcGlphhpth.......phhh.t.h.pshpITh...cGlcYLp-NShhpKshchhK-hpt............. 0 9 12 14 +9472 PF09640 DUF2027 Domain of unknown function (DUF2027) Coggill P anon pdb_2huh Domain This protein domain is of unknown function. though putatively involved in DNA mismatch repair. It is associated with Pfam:PF01713. 25.00 25.00 45.80 45.00 23.00 22.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.95 0.71 -4.88 5 110 2009-01-15 18:05:59 2007-05-11 10:06:21 5 2 110 1 14 104 3 160.50 48 43.58 CHANGED LNVaLAYVPpDhKAluTTPFETYLVNDSNYYLYYTYLSAEGsuW+sRSHGlIEPNTKLaLEEFsKu-LNDhERVsVQLIAFKDsKsFslKPAVSVELRIDTVKFYKLHTFpEoDFFEEPALIYDIV+NDhPVKQVaVSAEELKEALlQKKssD+s.pPQsllK ..LNVaLAaVPpD.hKshsoTsFEsYLVNDSNYYlaaoYh..ou.EGp.u.WpsRupGhlEPNTKlhlEEFs+ssLN.-hE+lsVQllAaKcsKsahlKPAlsVElRIDsVKFYKLHTFp-sDFFEpPALlYsIV+sDhPs....+plh..VsAc-lppuhhpKp..t.-.t....h........................................... 0 5 12 14 +9473 PF09641 DUF2026 Protein of unknown function (DUF2026) Coggill P anon pdb_2hly Domain This protein of approx. 100 residues is found in bacteria. It contains up to five alpha helices and up to seven beta strands and is probably monomeric. Its function is unknown. It is cited as a major prophage head protein [1], so might generally be of viral origin. 25.00 25.00 156.50 156.40 22.30 18.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.31 0.71 -4.95 4 14 2009-01-15 18:05:59 2007-05-11 10:08:21 5 1 14 1 5 16 1 201.70 47 95.83 CHANGED sLoDY.RIYpVl+uVLhS.tsAcss+AChFFuhhGAaILpcHY+lcA+shuGhhuathu..hsslLhFu.pIEcsplpSu--sFHsWVpsDsallDFMAPhFsEshtut.hshslPtKMhQR+L-sMAsS.ssL+puGDFhahPs.-lTpcLtt+htppth.tDllslsssWatKpPpphssoluhsDt+GpssplsLsssp.lhGAW .p.oDYpRIYRVIpSlLlu.pNADsAsAshaFSTFGAaILppHYKlcAhPpuGhAAYsLG...uslLhFu.+c-DGh.VsuAs-sFHCWVEADGWAIDFMAPtFupuscu....LslPsKMFQRsLuuMAsSlsDLspSGDFFYcS.-s-sTschhsDacppshluDlusVAssWFRKoPKpMssSlols-pcGcsp.plPLoGps.lsGuW 0 1 2 4 +9474 PF09642 YonK YonK protein Coggill P anon pdb_2h4o Domain YonK protein is expressed by the bacterial prophage SPbetaC [1]. It is a 63 residue protein that associates into a homo-octamer in the form of a beta-stranded barrel with four outer helical features at points of the compass. Its function is unknown. 25.00 25.00 29.70 29.60 21.30 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -8.81 0.72 -4.28 2 10 2009-09-11 09:12:47 2007-05-11 10:17:06 5 1 10 4 2 7 0 60.00 48 94.94 CHANGED ASKKVHQINVKGFFDMDVMEVTEQTKEAEYTYDFKEILSEFNGKNVSITVKEENELPVKGVE .........h.uKKVpplNlKGhhDh-sspIpEpsK-sEppYDLsElLScFsGKpVSITIKEEsELPhc...t.. 0 1 1 2 +9475 PF09643 YopX YopX protein Coggill P anon pdb_2i2l Domain YopX is a protein that is largely helical, with three identical chains probably complexing into a twelve-chain structure. 22.40 22.40 22.80 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.15 63 761 2009-01-15 18:05:59 2007-05-11 10:26:09 5 2 545 9 61 587 31 122.40 24 92.72 CHANGED KFRuasctppchh.spthph...phtthhhtp.tpp.t...................................l...hQaTG..LKDKNGpEIaEGD.....Ilch....t..h..h..........................pat-sphhhhht...............................phh..h.hpspsh..EVlGNIa.ENsELLE ........................................................cFRsaspt..p...pthh...htt.hph.........pt.h.h.h.tt........t..............p........................l..hQh.TG....LKD..KNG....pE..IaEGD.....Ilph......ptt.tthh..........................................hpapcsthhh.ht...................................ph.h......h.h.......t.....pph.....EllGNIa.ENs-LLc.......................................................... 2 18 46 56 +9476 PF09644 Mg296 Mg296 protein Coggill P anon pdb_2i15 Domain This protein of 129 residues is expressed in bacteria. It consists of three identical chains of five alpha helices. Two copies of each chain associate into a complex of six units of possible biological significance but of unknown function. 25.00 25.00 113.30 113.20 20.20 18.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.66 0.71 -4.26 3 8 2009-09-14 14:07:04 2007-05-11 10:29:33 5 1 8 3 3 5 0 116.10 52 95.77 CHANGED KPQLlAaKpFLQTEFpcVDFETFRpNFNLCLEREQcTllIYEDDDYDDQsFFhKPMLSDuFFIpoEVIKQL.DaLAcLV-NPKDDDKpCC...QsFYEALhlFISALAITKGIslsRaHQpLssR p.pL.thhp.lpp.hpclshEphhpNaNhhhEppppThlhY-DDDY--.sFF.Ksh.u-h.alpsclIppl.DaltcLlcs.hDDDKphs...ppFYp....hhpAlAlTKsIslpRhpphLtsc 0 2 2 2 +9477 PF09645 F-112 F-112 protein Coggill P anon pdb_2cmx Domain F-112 protein is of 70-110 residues and is found in viruses. Its winged-helix structure suggests a DNA-binding function. 22.40 22.40 22.80 45.00 22.20 22.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.57 0.72 -4.17 2 4 2012-10-04 14:01:12 2007-05-11 10:36:56 5 1 4 1 0 5 0 93.00 37 97.64 CHANGED QolN..chAp.haphLccKtElThEDIlA.aplosssAYsI.psLKshCppH.sECps.h+sRKTs.......................................IhuKQ ....lNshphAclhaKILppKtElolEDIlAQFEISsosAYsI.+sL+hICEpH.-ECpsppKsRKTlhh.hKpEphppptpEp..ppItKIhsAps...............h... 0 0 0 0 +9478 PF09646 Gp37 Gp37 protein Coggill P anon pdb_2gjv Domain This protein of 154 residues consists of a unit of helices and beta sheets that crystallises into a beautiful asymmetrical dodecameric barrel-structure, of two six-membered rings one on top of the other. It is expressed in bacteria but is of viral origin as it is found in phage BcepMu and is probably a pathogenesis factor [1]. 20.50 20.50 21.00 21.70 20.30 19.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.56 0.71 -4.74 13 105 2009-01-15 18:05:59 2007-05-11 10:50:08 5 1 99 6 20 81 0 141.20 42 92.06 CHANGED llsRL+ppLPphplEhFP-cPscYpLsHssGAlLluYtGS+FspPcDTsuVlQsRplplulTVlhRpLsucpGAlssLDplRpsLsGF+PPsC.pchaLlsEpFlG..EssGLWQYsL-hsTETltlE-s-spssPhLspVs....YEcp- .............................VhsRL+Et.P.p.hp.lch.spcsppYh.sp..uslLlpYsGSpFscP-sTsAllQpQplplssTVlstQlsst....tAls......sLDplRpuLGGap.PsC..cR.lWLppEpalG..-ssGhapYsL-hsspolFIt-p-s.pcu.PLLTtVN....YEE................................. 2 5 15 17 +9480 PF09648 YycI YycH protein Bateman A, Szurmant H anon Pfam-B_6483 (Release 21.0) Family This domain is exclusively found in YycI proteins in the low GC content Gram positive species. These two domains share the same structural fold with domains two and three of YycH [2] Pfam:PF07435. Both, YycH and YycI are always found in pair on the chromosome, downstream of the essential histidine kinase YycG. Additionally, both proteins share a function in regulating the YycG kinase with which they appear to form a ternary complex. Lastly, the two proteins always contain an N-terminal transmembrane helix and are localized to the periplasmic space as shown by PhoA fusion studies. 25.20 25.20 25.30 25.20 25.00 25.10 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.50 0.70 -4.90 33 732 2012-10-01 23:24:42 2007-05-13 16:43:49 5 2 721 12 94 434 1 224.20 27 82.91 CHANGED psp.sssppssttlppchcs-pIslss.lsscp.....scshhLsucpcshp..hcshps.....Lpspssshppps........plpushspslpls....................tppttpplpsalp...........pplhpG.....pcYpasph.p....sssplsatQpacst.la........spp....uplphplsscs...clsuYpQohlsclp.hc....c+.pplIospcAlpsLYh..pspltsssc...lppscLGYh......pll..sssspplh.sPsWpltlcpp...s....tthphhhVNAh ......................................................................................................................................................p.......ppps..h-pphpp-pIshs.s..lsscs...hph.hlsucspsFs....tcshps.............lt...spshphpsts..................phLpuslsp.slt.ls..............................cpshccl.pcalp...........pplhpG.......................pcY..p.hsph...................sssplhatQ.p.Y..cshPlh..........ssp......A..hls.h.plppcs...cls..SYp..Q...o.hhs...clc.hc.......c+...pplIospcAl.-sLYh.......ppt..lpp..ssc.......VhpscLGYh.......sll....ptsss...plh....hPsWplplcpcs.......tpsphhhVNAh.......... 0 30 60 76 +9481 PF09649 CHZ Histone chaperone domain CHZ Wood V, Coggill P anon Wood V Domain This domain is highly conserved from yeasts to humans and is part of the chaperone protein HIRIP3 in vertebrates which interacts with the H3.3 chaperone HIRA, implicated in histone replacement during transcription. N- and C- termini of Chz family members are relatively divergent but do contain similar acidic stretches rich in Glu/Asp residues, characteristic of all histone chaperones [1]. 23.30 23.30 23.40 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.84 0.72 -4.72 25 201 2009-01-15 18:05:59 2007-05-14 09:24:18 5 1 169 1 137 189 0 37.30 40 13.89 CHANGED ---sD-LppIDsuNIIosGRRTRGKsIDFspAAcclps ................c--p--Lt.pIDsuNII.o..uG.RRTRu+slsastsupph...................... 0 29 66 104 +9482 PF09650 PHA_gran_rgn Putative polyhydroxyalkanoic acid system protein (PHA_gran_rgn) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded by genes involved in either polyhydroxyalkanoic acid (PHA) biosynthesis or utilisation, including proteins found at the surface of PHA granules. These proteins have so far been found in the Pseudomonadales, Xanthomonadales, and Vibrionales, all of which belong to the Gammaproteobacteria. 20.50 20.50 20.50 20.90 20.30 20.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.58 0.72 -3.88 24 217 2009-01-15 18:05:59 2007-05-14 11:17:44 5 1 213 0 80 187 38 89.10 29 90.59 CHANGED IcIcRpHsLshppARptA-plAcclspcaslcspWp....uDplpFpRoGlcGplplsssplclplcLGhlLpshpupIcpEIcctLDphL ...............................Iplc+sHsLGh-pARp+s-ph...sp+ls...p...c.....a......u.....l.....ppp.....Wp.........GDpl..ph....p.....tp....G....l....cGplsltscplclplcL.shlL.ushpstlcscIc+hLcphh....................... 1 18 39 60 +9483 PF09651 Cas_APE2256 CRISPR-associated protein (Cas_APE2256) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a conserved region of about 150 amino acids found in at least five archaeal and three bacterial species. These species all contain CRISPRs (Clustered Regularly Interspaced Short Palindromic Repeats). In six of eight species, the protein is encoded the vicinity of a CRISPR/Cas locus. 24.80 24.80 25.00 25.10 24.20 24.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.74 0.71 -4.45 28 119 2012-10-11 20:44:46 2007-05-14 11:35:42 5 3 97 2 56 125 1 135.30 23 39.62 CHANGED cpsSAELNulhphhppt.hs.......phhLlsoDTssGchsApllccalpp............puhp.Vplht..t....lpshs.hcp..Fpc......GLtsLlctlspplp...ppsutp.lhlNsTGGaKspsuahsllu.h..husslhYlaE.phs-llpLPhl ................................................................phSAElsulhph......hppt.ht.........plhLlsoDT.pGchsucllppalpp...................pshp..lphhthts....lpsps...pp....Fpp...Glts...Lhctltpplp...ppp..u.tp..lhlNsTGGaKu.ssah.slhuhh...huh.lhYlaE.p.hscllplP............... 0 32 43 49 +9484 PF09652 Cas_VVA1548 Putative CRISPR-associated protein (Cas_VVA1548) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents a conserved region of about 95 amino acids found exclusively in species with CRISPRs (Clustered Regularly Interspaced Short Palindromic Repeats). In all bacterial species that contain this entry, the genes encoding the proteins are in the midst of a cluster of cas (CRISPR-associated) genes. 25.00 25.00 33.10 28.10 20.60 18.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.07 0.72 -3.98 6 31 2009-01-15 18:05:59 2007-05-14 11:44:26 5 4 27 0 10 31 1 91.80 41 51.20 CHANGED WhloRHsGAhEWAtc....QGlclD+hVs.HLc...htclstGDpVIGoLPVpLAtslCc+GucYaHLsL.clP.plRGpELoA-phcussA+LtcacVcps .....aFloRHsGAl-Whtp....p.G.l.plDchls.HLD...sspIpsGDsVIGTLPlpLAAclCp+GA+ahaLoL.slPhchRGpELos-phpstGApLppaplp..h.......... 0 4 9 10 +9486 PF09654 DUF2396 CHP02652; Protein of unknown function (DUF2396) TIGRFAMs, Coggill P anon TIGRFAMs Family These conserved hypothetical proteins have so far been found only in the Cyanobacteria. They are about 170 amino acids long and contain a CxxCx(14)CxxH motif near the N-terminus. 19.60 19.60 20.70 182.80 18.70 17.70 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.07 0.71 -4.26 7 46 2009-01-15 18:05:59 2007-05-14 11:53:07 5 1 43 0 20 48 1 160.70 74 95.31 CHANGED PIFGPEIpCPHCRQsIPALTLTDTYLCsRHGAFEAsPcTt-LVHLQSGRpWRLWEspWYRQHTHPDGIRFEIHEALDRLaTQGYRAT+VIIAcRYR-LlssYLERsssh...usscss..RLYGLPVEFSssssp-.....spWpVINF-LEKE.GsshRYPYFRL..F- PIFGPEIpCPHCRQsIPALTLTDTYLCPRHGAFEAsPcTsELVHLQSGRHWRhWpsEWYRQHTHPDGIRFEIHEALDRLYTQGYRAT+VIIApRYc-LlSsYLE.RsosWR.......up..s..-us.PRLYGLPVEFSP-spc-.....PCW-VINFDLEKE..PGlPh.RYPYFRLF-...... 1 3 17 20 +9487 PF09655 Nitr_red_assoc Conserved nitrate reductase-associated protein (Nitr_red_assoc) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are found in the Cyanobacteria, and are mostly encoded near nitrate reductase and molybdopterin biosynthesis genes. Molybdopterin guanine dinucleotide is a cofactor for nitrate reductase. These proteins are sometimes annotated as nitrate reductase-associated proteins, though their function is unknown. 25.00 25.00 73.40 73.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.85 0.71 -3.86 25 124 2009-01-15 18:05:59 2007-05-14 11:57:37 5 1 119 0 43 126 32 144.80 46 92.10 CHANGED hFpFEpDFlssLRCIPMsVRhKLDhsGlKLKLsHWtpLopppRptLVchPs-sssplp.saRptLpphspshsss....scsLs.-ssP.sWppssplPptl....pppApphGlt.lolsQWssLssLQRFALlKLoRsGH-N+NhhsAhpEF ..hFsFEt-.s-sLphIPMlVRapLDphGl+lpLppWphLshE-RphLschPs.-..............ssst.......lc..sacctLhchlps+uss....sphh..ts-ppP.uWpsssslP-ul....hppushtGls...o.hsQWtpLsPhQRasLhKLSRps+t.N+sFlPAh+EF.... 0 5 22 33 +9488 PF09656 PGPGW CHP02611; Putative transmembrane protein (PGPGW) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are putative Actinobacterial proteins of about 150 amino acids in length, with three predicted transmembrane helices and an unusual motif with consensus sequence PGPGW. 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.50 0.72 -4.67 29 301 2009-01-15 18:05:59 2007-05-14 13:06:46 5 7 274 0 124 284 202 51.40 35 34.98 CHANGED RsslsllGhsllllGllhlslPGPGWLllhhGLulLupEatWA+RhLchscpp ..........p.hlhllGhhlllsGllhlsl..P.G.P.G.aL.hlh.lGLu.l.LuhEa.WApphLthspt................... 0 46 90 111 +9489 PF09657 Cas_Csx8 CRISPR-associated protein Csx8 (Cas_Csx8) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes proteins of unknown function which are encoded in the midst of a cas gene operon. 25.00 25.00 32.30 32.30 19.40 19.40 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.61 0.70 -5.76 3 31 2009-01-15 18:05:59 2007-05-14 13:13:45 5 2 26 0 3 31 0 380.60 38 92.76 CHANGED FDTsLEuSDWRaSATIVGLIpYFcaL-pcY.hcK.pl.ElEDDYLLYNSssINEENYLDFVEKaY....cDsLHHKlVENILp+.-ElTEEpIKlINEKLsANTlMKKlFGKIKFDGTNK-EILDLI-KNRacLIKETFRRKKsLYuNYuNTNpLFsD.sNcsCRLlGYChDsGKKGKSTGYNFssSTF-apD-KIFDFIPFAFoG.SaEAFFINDNloIccLK........................KSNchIpcKhp--hEGppNScssRpTLFsslpEouDYIKaDVEVILKsRDKEYFETlYlRcESIcIFKE..sEDF-YKuI+F.aKlTDKYYhNlpcEVssuILNslLLDslIElLLK-KNs................aSYsIppLIKINsLIRcGGKEMcE+LKuAa.ACAKpVsKKl..cNppNKLsSYKQKLlSSVlFKD+DRlC-ILLQLSsYSGVhFuFAYDLaEDFEpNKDLAYTFINAL .........aDptlpsSDW+aSAAIsGLhhYhp.hphpa................h....pt..p...chp-....salhYsppsI....s....Ec.p...YLpFlEcaa....p-phhH+tlEs.Lpp.ppFo.E.IK.Ip-phpANolhKKlFtphKFsGpNccElLpllpcNR..lI+ETFRNtKshYsNasp.....splhpc..pppsCRLhGYhhD.s+KsKuhuasFspsohsa.D...FDFIPFAFot.sh-shFlNsNhslchL.........................Kppptlt...p...pp.hc.t.tp..pphsh+p.hht.hpc.pscaIcashE.....lIhKsp........-c..c.Y.FEThalRpcuIclhcp...hcchp......hhs...lph......Khs-ph....YhslhpEVhssILNh.hLsp.....I.....hLLK..-.cps...........................hsYh.IppLI+lN.hIhp.....ts.ppMp..p.......phctu......h.AsAtpVscKh......st......NKltSY+pKLhSsllhKsacRhhplLhQLSsYSsV.FsFsaDh.hEs.ppNcslhahFl.tL......... 0 2 3 3 +9490 PF09658 Cas_Csx9 CRISPR-associated protein (Cas_Csx9) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes archaeal proteins encoded in cas gene regions. 25.00 25.00 26.30 25.80 22.20 21.80 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -11.97 0.70 -5.65 3 8 2009-01-15 18:05:59 2007-05-14 13:47:11 5 1 8 0 6 9 0 349.00 46 88.05 CHANGED MSWVVMDAGLEPDcEELADALEGALcSLcSRs+INTSKIGRNDRNSFD+VLpAWFGRSAPETYGELFELVIsETIKLLR-GKIDPucSLSTIKTDKNGTYLGlAYNGEQAILPAIIKQPEYYEaQSuFLKPTTGQKAQIRMDPLWFSFMALGFFTSFAGFIuGKYYLMTKPGIEVFWPYEVEEIIE+GILPLTuAGASGRISLoTEELYEMKLAMKLAEEGRcVIEEVYPVTLHlISLEGQVYTELKTlQLNLpELSNYlsEYVKKIEuu+VGGlsLLVELKEGsATlcKYPLWALVDIAEKELWKGVsGDcEMLAYIFVKDLYRAINSGRKELIcDSIFRLFRQGRALLEGSGRASGEFRKVMRTFMWEEHLEVLL .......hSWlVh-AGlEPDpp-LA-ALEsulcoLcpRh+.hsoS.....KluRNDRsSacKVhpsW.....FshpsPETYsELFELlIcETIKLL+csKIDPucSLpoIKhDKNGTYLGlsYNGp.AILPAI.IKQPEYYEaQScFLoPTTGpKAQIplDPLWFSlLAlGFLTuFAGaIGGKYYLMTKPGIEsaa..........PYElE-Ilccull.LTsAGhpuRhSLpsEELYEh+LuMKLAEEt+pl.-clYPlTLHlISLEGQVYTELKTlQLsLppLspYhpcYlc+IcshphhuhslhVcLK-s....pKYPLWALlslAE+ELhKGVsGDpEhLAYIhVKDLYRAINSG++ElIcDolFRLFRQGRuLLEGp..u+uSG..EhRKVh+sFMhEcHLtVLl...... 0 0 0 1 +9491 PF09659 Cas_Csm6 CRISPR-associated protein (Cas_Csm6) TIGRFAMs, Coggill P anon TIGRFAMs Family Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. 21.40 21.40 23.90 23.10 19.80 18.30 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.17 0.70 -5.83 4 211 2012-10-11 20:44:46 2007-05-14 13:59:36 5 2 187 0 24 168 0 273.70 26 89.77 CHANGED sh-sEII..hh-sl.sspcaDlFhshFc+aLh.lpsta..-sEIlLNlSSGTPQMcSALhllslhss.shpsVQVoTPt+s.SNtu.paussp.clEphhcsN-DNcs..ss.sRshEssutshpthhlRpshhshIssaDYpuALsIlus.ppF.....l.-pl.pll......puAhhRc+L..hpt.h+s+.lhs-lh..shhpsDu.pKshpYhLhLslhtpRtplA-hlhpspshhphllcshlcc+h.t.lh.c...sc.aLsDph..pcthL.-p-splhph..Lccs...ccpshsspphLshhsalslLphhtPsppllttlpsltslpu.lRNssAHslsslspcp.hp.ht.lLStphlKph......hh.phhphppssaNhY-+hNcElhch ........................................................................................................................................................................................................................................pttlp.LlspYcYptAhplupp...t...........spph..Ll......chhhcRp....cL..............................t...pp.hl....h....s.c...hh......hh.h.pt-...........p......p..l.s.pYlhsLp......h+h+ptplhDFlRuloPhl....hhlhhph.lp..p....clP....................tcph..pt.....h...h.h.......t.thh......p.p..p....lhpt.....hp.p..........pshss..........c.t..hlthhs.hh.....p....l..l......h.t..sp.....l....hphl.p.LpphEppVRNslAH.Ips.hsE-....cl...p+s.st..h...up.t.h..........lc.l..............hp.h.st...p..t.t.h......h.aDphNt.lh................................... 0 4 6 18 +9492 PF09660 DUF2397 CHP02677; Protein of unknown function (DUF2397) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Betaproteobacteria). 20.80 20.80 21.10 20.90 20.40 20.70 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.44 0.70 -5.90 21 146 2009-01-15 18:05:59 2007-05-14 14:13:19 5 4 129 \N 63 150 5 439.70 25 93.17 CHANGED .hpYls.u-puspYRsIhR.sFhtttpphphhLps.--V...lstLtt.tphhsch..........t.cplptcLcQLscWGNLtsppDTu+.ssolt-Fcp++ahYQlothuhplERhltplpssh.spsu.uLpsshLcclhppLppLtphs........ps-stc........laphhpcLhssFpsLspsupsahupLtpshs.pcphcs-sFluaKcpLlsYLpcFltcLtppuspItttLtplptptlc..plLptsspt-tt.st............................s........................shtcthschts+WpuLpsWFlupsu.csspschLpctsppAIspllpslpRlsEp+sstssRcs-hcpLAtWFupsss.-pAHcLhsssFGltpsRHLpss.......stc.ssshssuha-uPPlclshphRppGpttppspsutl.D+stp+cthttphtpcppptctthppL..sssstlclupls..LsspshphhLplLucAhs.......p.....ttsspo-sshplpLp.hss.sphshlcoscGsLphs-ahl ...................hpYLs.s-ss.spYRhIhR.hFhtth.p+h.phhLh..--V.......hphlpp..shhtch..........s.-phppsLppLsc...Wt..NL.st...pDs..u+.spTlpEa..cp++ahYpho.huhplcRhlhplpphh..ttsu.uLpsshhcclhppLppltphh...........ptstpc................................................lhphhpcLtssFppltpsspcahupLts..............hhs....pp...h.psctFlhaKcpLlpYL....ppFltsLpphu.pItthlpplpt.p.tlp.....pllpphsph-hthst..........................................p..............s.pphhpphts+WpuLptWFhu.pss..t.spsptLtptspphItplhthstclsEpppttssRpp-hhpLAphFsps.s...ppAHpL.ussFGlh.sRHltss.......ptc.s.shspu.ha-tsPhplshp.Rppshpppt.ttsshh.Dpstp+pthhtphhppp...ppppthhtpl..hpsstlchs.pls....lsstspphLLphlucAhs............p.......tttsps-suhphplp..ts...tptshl+stcG.pLphPshhl................................................................. 0 24 50 58 +9493 PF09661 DUF2398 CHP02678; Protein of unknown function (DUF2398) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Betaproteobacteria). 22.20 22.20 22.30 23.80 19.90 22.10 hmmbuild -o /dev/null HMM SEED 368 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.99 0.70 -5.73 18 150 2009-09-11 13:39:07 2007-05-14 14:29:58 5 4 127 0 66 155 4 340.50 24 84.48 CHANGED lRsLLspPhlsp.sp-sEhathV+Rc..pstL+caFpcchGapLlVss..phARLtKhPssspsshshtch......hps+cYshLCLsLAhLEs....sspQhsLucLs-plp.Atss-sshs.....hshsshscR+ulVpsL+hLlslGllptsDGct-sFspcp.....s.u-sLY-lsttshthLls..hsps.uph............pshs-hs.pcshssssttpst..........RpRlhR+Ll.sPVlahc-Lssp-hsYLpsppp.tltcclp-hhGhhhEhRuEGlhhl......cs-c.phssshaPcsu........o.lucssLLlsptltpch...t.sphsts...........lstsclpshltcltpc...asssWp+..ttcstu.scLscpllshLpphtLl.........pt.s-t..lhhhPAsAR.......ausph.ss .............................................hchLLpp.hlhp...tp..-.-ha.hl+..cp..tptL+cahtcphGapLllss..ths+LtKhPsps.c.s...hhshtph.........p.hcYshhC.lhLAhLEc.......stpQhhLupls-hlp..sths..t.t................h-hsthpcR+uLVcslchhhphGllphsD....Gst...-taspst.......s.s-sLY-ssthshhhlhs..hspshtph.....................................................pphpshh.pp.ph.s.sp...pt.pt...hh...........Rp+lhRpLl.sPsVYh..p..chssp-hhYlpp.+t..tltp.clpchh.Ghthch+tpuhhll........sps..phhs.hFPsps........s.hsclsLLlsthltpph.................th.....s....h.......................ls.tphtthltcltpc...htshWt+....hpths...tplhppslphhpphthh...................................ph.tct...lhhhPhhu+htst...t....................................... 0 23 51 60 +9494 PF09662 Phenyl_P_gamma Phenylphosphate carboxylase gamma subunit (Phenyl_P_gamma) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this protein family are the gamma subunit of phenylphosphate carboxylase. Phenol (methyl-benzene) is converted to phenylphosphate, then para-carboxylated by this four-subunit enzyme, with the release of phosphate, to 4-hydroxybenzoate. The enzyme contains neither biotin nor thiamin pyrophosphate. The gamma subunit has no known homologues. 25.00 25.00 26.60 26.50 21.60 21.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.82 0.72 -4.27 2 10 2009-01-15 18:05:59 2007-05-14 17:47:31 5 1 5 0 1 10 0 81.30 43 96.10 CHANGED MNQWEVFVMD.AELPEGppLELSVRTLNPGLKKYTYQRV+AElSsALDKFPDpLQVRhGRGQLssQpFSIRIIEpVQRMPAKYL ...........................M.pa-lFl.sLsELsEGpELELpVRDLTPGlHKYsa..+hVKApVSucPcsaP..-+LhlRFGRGQhpspsaSI+llpclp+hPt+aL...... 0 1 1 1 +9495 PF09663 Amido_AtzD_TrzD Amidohydrolase ring-opening protein (Amido_AtzD_TrzD) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are ring-opening amidohydrolases, including cyanuric acid amidohydrolase (EC:3.5.2.15) (AtzD and TrzD) and barbiturase. Note that barbiturase does not act as defined for EC:3.5.2.1 (barbiturate + water = malonate + urea) but rather catalyses the ring opening of barbiturase acid to ureidomalonic acid. 25.00 25.00 31.10 94.90 20.60 19.80 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.12 0.70 -5.88 10 66 2009-01-15 18:05:59 2007-05-14 17:49:48 5 2 60 0 33 66 9 335.60 48 98.20 CHANGED s-VaRlPhcuPuDVSGLAsLI-sGtlcPscIVAVlGKTEGNGCVNDFTRsaATpuLpslLuc+lshu.cEV.s+lAhVMSGGTEGVLSPHhTVFAt+cspcssps...s+RLAVGhAhTcsLLPE-LGRtsQlscVAAAV+sAMcDAGIsDPuDVHFVQlKCPLLTscRIpsAcuRGcslATcDThcSMuaSRGASALGlAlALGElsuuploDpslhpDauLaSsVASsSAGVELhcsEIIVlG.SsshuGcLsIuHAVMpDAIDscuV+uALcclGLtss.......sscttuRLVNVFAKAEAussGpVRGRRHTMLDDSDIssTRHARAsVGGVlAulsGcstlaVSGGAEHQGPsGGGPVAVIAct .....pVh+lshpuPuDsSuLtthIssGhlpssclVAllGKTEGNGsVNDaTRthAstuhppsLup....+hshs..p-.......V.pplshVhSGGTEGVlSPHhTlFspp....ss.s.sstt.ss....ttRLslGhAhTcshhPE-lGRhu.lpcsAsuV+pAMtcAGIsDPuDVHaVQlKsPLLT.ppItsApuRGpsssTcc....ThcSMuhSRGASALGlAVA....LGElshs....hsD...pslhpDhsLaSulASsSuGlEL.cspllVl......GNutshuGphtIuHuVMpDAlDhsulhtAlcshGlp.s.......ts.ts+lVsVFsKuEAsssGplRG+RpsMLsDS.Dl.p.pRph+AsVGGlhAulsGcsslaVSsu..ApHQGPsGGGPVAsIsc.h... 0 14 28 29 +9496 PF09664 DUF2399 CHP02679; Protein of unknown function C-terminus (DUF2399) TIGRFAMs, Coggill P anon TIGRFAMs Domain Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Beta-proteobacteria). Just the C-terminal region is ioncluded here. 20.80 20.80 20.80 20.80 20.60 20.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.05 0.71 -4.35 17 184 2012-10-01 21:47:57 2007-05-14 17:52:04 5 5 172 0 66 266 4 144.40 25 38.01 CHANGED hpLoLRpltphts...shsspup........VaVsENPsVhushhDp.....s...........tssPLVCTsGpPssAshtLLstL..sssGspLtYpGDFDhsGL.tlAsplhpRasscP..WRhsssDY........htussssslsspsls......ush.spLspshtppuhsshpEtllstLlsDL .............................................................h..lsLhpltth.t.....h.s.sspt........lallENsuVhst..hhpp......t..................tshsLlCs...sGp..ss....h.u.s.h.hLlctL........tts..G..s...p....lhY..p..GDFDstGl.tIAspl.....h.......p....p.....a..s.....t.....p.......a+hsspsY.............................hts.h.s.t...t.......h.s.tp......p...hp..h..t..tl......s.....s.........tlspthpph....thsshpEtlht.hh.................................................................. 0 29 51 60 +9497 PF09665 RE_Alw26IDE Type II restriction endonuclease (RE_Alw26IDE) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this entry are type II restriction endonucleases of the Alw26I/Eco31I/Esp3I family. Characterised specificities of the three members are GGTCTC, CGTCTC and the shared subsequence GTCTC. 21.50 21.50 22.20 400.30 20.90 21.40 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -13.08 0.70 -5.79 7 16 2009-01-15 18:05:59 2007-05-14 18:17:05 5 1 14 0 1 14 11 493.30 51 93.32 CHANGED cPpFl-Yh+hIlpHPNYtGMPsshttcGcIpWpssSs+poG.Fhph.ppRhtWWcpKAcplGls..sspsthhopsA+hIHPTthKPC+hCG+.hslsYhYPs+shhp+lpK.....-pF.l..sp.ppIsclhspLhphhu-phhpph.thlhscth.phschsssh-talp.l.ppalspt.phLSPGsMuNsPDRhDGFHoaNtCCRuppDpGRpcENL+SYopDRRuFEaWsDGsWsAADpLM.....Gphthssphh...pl............SADHIGPISLGFsHcPpF.p.hCpusNSuKNsRhhhsDlppLlchEs.pucsVuSWasctlW-hhKcsV......sss-sshp..hSphLRsNpchahplLhclhp....sGpthhLushLp.cY.....ApashsF-slphpsphhphpslpp.p+hTchsptppsRtlRIAFEuLp-YspK..EsRphhts.s....cp.pphls.hhp.lp........hhs-plsp.lppss.p-c.hpslh...........tp...h+chLhphMshluccltp ..stFLEYs+hIVsHPNYhGMPDshGccGcIQWEAPSNRuSGpF+cTaQ+RhcWWcsKA+SlGID..soEpuWISKTAKLIHPhGhKPCKpCGKpM-LuYuYPNcpFhuRl+KLsYlDEoFEL..opsEHIlDLlsRLccpaGEcla.DLP+LhssKol.sIPcloSsL-sWIcaLcEpYIPpEu+hLSPGAMANPPDRFDGFHSaNRCCRShADKGRoKENLKSYsTDRRsFEYWVDGDWVAADRLM.....GplRo.NNhhh....pEECLNsst....PsPCpADHIGPISLGFuHRPpF.QhLCKSCNSAKNNRMYLSDlhpLL-AEN.EGcoVhSWaucclWs+lKHoV......cDsEcAlR..LSKlLRDNRHTYMpLLc+Ihc....-GaasFLAolL+LEh.....ANYs.hFEGLsISNHlTcYcSl+K.++pochsthpcsRplRIAFpuLp-YtpK..ENRNshhVss....c.ppphhschh.ppLpuhsp.h..ppLs-tlst.l.pp..pcpphpslhptl....h..t.cpaphhhchL.phhs.lGcpht.s..................... 0 1 1 1 +9498 PF09666 Sororin Sororin protein Bateman A anon Bateman A Family Sororin is an essential, cell cycle-dependent mediator of sister chromatid cohesion [1]. The protein is nuclear in interphase cells, dispersed from the chromatin in mitosis, and interacts with the cohesin complex [1]. 23.00 23.00 27.50 27.50 19.40 18.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.09 0.71 -4.05 14 61 2009-01-15 18:05:59 2007-05-15 14:10:46 5 2 42 0 29 58 0 126.90 37 35.78 CHANGED ENhPP.........hsppshhhssstsspssps.V.lPsstcolQp+ushcslsIl.......WpKpV......cpo.oR.phs..............................ts.pssoPp...tp...............s+psLFGFEchLssE....-h..spssspu+sts.psVo............hpchsp.tssscslPtVs..sh+c .........................................ENtPP.......p..hppsh.t...sps.sspssps.V..s..hco.ppc.s..hcshsl........MSKKV......RRSYSRLps...............................tsssTSTP.....................uRRShFGFEshLssE....DL.tsuslssSKhhpsspVs................scshsPDpsLPGlSsss.+..................... 0 3 5 12 +9499 PF09667 DUF2028 Domain of unknown function (DUF2028) Bateman A anon Bateman A Family This region of similarity is found in the vertebrate homologues of the drosophila Bobby Sox. 25.00 25.00 31.90 28.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.07 0.71 -4.09 3 80 2009-01-15 18:05:59 2007-05-15 16:21:03 5 4 40 0 31 87 0 124.30 72 15.86 CHANGED EMPQLNFuMADPTQMGGLSMLLLAGEHALTo................PEVSSGICR.s.St.Pp.ppKSsLFpFsElSSSTSHSDsPAssKQspTSALFQ..FAEISSsTS.sQl+sA-PVKRCGKSSL.............suusptK.CtpSALFQLAEMCLASEAsKMEpS+slcsD-S ..............EMPQLNFGMA..DPTQMGGLSMLLLAGEHALGT................PElSSGTCRPD..lSES....PELRQKSPLFQFAEISS.STSHPD..AsoKQCQsSALFQ..FAEISSNTS..QLGGsEPVKRCGK........................................................SALFQL..AE.................M...CLASEGhKME-SKLhKuKES................................. 0 2 4 14 +9500 PF09668 Asp_protease Aspartyl protease Mistry J, Wood V anon Pfam-B_9589 (release 20.0) Family This family of eukaryotic aspartyl proteases have a fold similar to retroviral proteases which implies they function proteolytically during regulated protein turnover [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.50 0.71 -4.55 12 445 2012-10-02 15:32:34 2007-05-16 13:33:18 5 22 263 6 291 833 24 118.70 41 29.40 CHANGED ppplpENhppAhEapPEsFupVhMLYlssclNGh.VKAFVDSGAQsoIMS.cCAE+CGlhRLlDTRatGlA+GV.Gst+IlGRlH.splKlGs.halPsuhoVlE.spclDhlhGLDMLKRHQssID .....................................ptlp-shp..A.hE...tPEsF.s..p..V..s...MLYlssclN.Gp.s.l.K.AF.V.DSGAQ.hT.IMStu.CA..-..+..C...s..l..h...R..L...l.D.p..R.at..G..l.A..p..G..V.....G..o..t..c..I.l....G..+.l...H........s.....pl..p...lts....a.l...s...C......SF.s.V..l.-........s.........p.......s..h....-..h.LlGLDhL+RHpssID............................................................................ 0 105 148 212 +9501 PF09669 Phage_pRha Phage regulatory protein Rha (Phage_pRha) TIGRFAMs, Coggill P, Iyer LM, Bateman A anon Iyer LM Family Members of this protein family are found in temperate phage and bacterial prophage regions. Members include the product of the rha gene of the lambdoid phage phi-80, a late operon gene. The presence of this gene interferes with infection of bacterial strains that lack integration host factor (IHF), which regulates the rha gene. It is suggested that Rha is a phage regulatory protein. 28.20 28.20 28.60 28.30 27.70 27.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.13 0.72 -3.63 122 1673 2009-01-15 18:05:59 2007-05-16 13:38:50 5 20 1017 \N 171 1330 10 87.90 27 38.68 CHANGED ssoShclA-hhsKcHcsVlRsIcphh...........................t........sths.a.tthp.t....s.spspp...hYhl....s+cshhhLlhuao.sttshpa+...tthIctF..pchEp.pl ....................hsoShplAchh...........sKcHcsVl+sIcplttt..............................p..h..sphs.a..psphh...........s.sp.s.cp..h.hYhl....s+cuhhhLlhuho..stts.h.pFK...tth...lptFppMEpt..................................................... 0 39 110 141 +9502 PF09670 Cas_Cas02710 CRISPR-associated protein (Cas_Cas02710) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are found, exclusively in the vicinity of CRISPR repeats and other CRISPR-associated (cas) genes, in Methanothermobacter thermautotrophicus (Methanobacterium thermoformicicum), Thermus thermophilus (Deinococcus-Thermus), Chloroflexus aurantiacus (Chloroflexi), and Thermomicrobium roseum (Thermomicrobia). 22.40 22.40 22.40 22.40 22.10 22.30 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.34 0.70 -5.83 10 109 2012-10-11 20:44:46 2007-05-16 14:25:52 5 5 85 0 64 132 12 269.30 16 69.90 CHANGED -slthuIsshpP-+VlFLso...............-pScsp..lscl+...cpsstpspp.........hhshstDssslhcsYcchcsll-+a...thcccplllDhTGGTKsMouGLslAuhsh.....chshsYVsG....csssG+lpsGoE+lpp.psPassauclEtcpAtpLaN+tcauuAhplhcsLspR.lsccps..YshhttLscuYhtWDpFcaccAh-tLc+shsp.sl...........sscppsLpphsslh+sLpshLssttsshsth.c...shsLltDLLuNAcRRAupt+YDDAuhRlYRsLELluQtcLt.shGlsTusss.....hhpclP-tLcpsYcptpsspGht.........h+IuLhsuYtLLpshGDc..lucpa.....htphsclpphlpsRNpSILAHGacs....loccpYcpltDhlpshhpth .....................................................................................................................................................................................sltth..pP..ctlh.hl.ho....................................ppstth......htp...lh......p.h..th.t.tt.........................hhhh.hs.psh..p....h.a.ct.lpph.lpph........tt..t...pl.hlDhTGGTKsMusuh.s.huuh.h....................sh.p.h.Ylsst......p.t.tps..stp................................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................................. 0 27 45 60 +9503 PF09671 Spore_GerQ Spore coat protein (Spore_GerQ) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this protein family are the spore coat protein GerQ of endospore-forming Firmicutes (low GC Gram-positive bacteria). This protein is cross-linked by a spore coat-associated transglutaminase. 25.00 25.00 65.90 65.60 24.10 20.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.69 0.72 -4.33 19 183 2009-01-15 18:05:59 2007-05-16 14:32:04 5 1 174 0 31 116 0 80.90 67 52.50 CHANGED GMLPLEQSYIENILRLNKGKpATVhMTYEpuSphGopsasGIIEAAGRDHIVISEPpSG+RYLLLMIYLDYVpFsEEITYh ......GMLP.lEQSYIENILRLN+GKpATVhMTYEpuop..h..sspsapGIIEAAGRDHIlIS-Ppo......G+RYLLLMIYLDYVpFsEEIsY..... 0 10 21 23 +9505 PF09673 TrbC_Ftype Type-F conjugative transfer system pilin assembly protein TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents TrbC, a protein that is an essential component of the F-type conjugative pilus assembly system for the transfer of plasmid DNA. The N-terminal portion of these proteins is heterogeneous. 20.90 20.90 21.10 32.00 20.80 20.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.44 0.71 -4.47 49 432 2009-01-15 18:05:59 2007-05-16 15:26:40 5 1 316 0 67 337 22 108.60 35 41.09 CHANGED hlFlShSMPppuL+phhppspph....ssslVlRGhhss...shppThstlppLhppstt.............ssltIDPphFcpasIspVPuhVhsps...........s.spt.t.......spastltGslolphALcpls ........hhFlShShP-puLKphlt-spch.....GhssllRGhsss....sLps.TscslhsLlpcssss.............ulpIDPslFppYsIpsVPulVlhss..........................................psachlpGslplttAL-pl......................................... 1 16 35 54 +9506 PF09674 DUF2400 CHP02757; Protein of unknown function (DUF2400) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this uncharacterised protein family are found sporadically, so far only among spirochetes, epsilon and delta proteobacteria, and Bacteroides. The function is unknown and its gene neighbourhoods show little conservation. 21.90 21.90 22.30 32.70 21.70 21.80 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.43 0.70 -4.66 49 423 2009-09-10 16:07:27 2007-05-16 15:34:14 5 2 396 0 109 346 215 212.90 36 86.13 CHANGED DPlplsH+apc.pDhEIuuhlsAhhAaGNtKtIlcphccL.hpLhspuPhpalhphp.....tp..hpthpsFh..aRh.supDltthhhsLpplhp..phsuLEshFt................thppssslppulppFhpthhp.h...h..hs.cal..............ssstssSAsKRlNMaLRWMVRpcs..lDhGlWcs.lssupLlhPLDsHst+lu+pL.GLhpRKpsDh+sshElTpsLRclDPpDPlKYDFALatLGhpc ...........DPlphs+hasc.......s.EhhullsAhhAaG.st+pIlphlccL.hslhs..s..s...t.h...................pcp.h.h....pphcshhYRh..stpDlhthhhsLpplhp..c...toLcphFh....................................t.h.p.p...p...p.shhpu.ltsFhpthhph......h.....hs...chh............h...pssssSshKRhNMaLRWMVR+Dp..lDlGlWpp..lpspcLllPLDTHst+luhpL.GLl......c......RK..........phDhKsshElTpsL+clsssDPlKYDFALatlG.s................ 0 42 91 105 +9507 PF09675 Chlamy_scaf Chlamydia-phage Chp2 scaffold (Chlamy_scaf) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this entry are encoded by genes in chlamydia-phage such as Chp2. These viruses have around eight genes and obligately infect intracellular bacterial pathogens of the genus Chlamydia. This protein is annotated as VP3 or structural protein (as if a protein of mature viral particles), however, it is displaced from procapsids as DNA is packaged, and therefore is more correctly described as a scaffolding protein. 25.00 25.00 47.00 46.70 22.70 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.47 0.71 -4.07 4 9 2009-01-15 18:05:59 2007-05-16 15:38:40 5 1 8 0 1 10 48 108.80 40 74.45 CHANGED IphIVtKhNtsuslpHl....tpRpspYs-CssPhDap-ALssVhcupEtFDuLPA+lRcpFuNsPcEhLp.......aLpc.cNh-EuhuLG..ll..-........hhcst.+csp.sssQppsh .INpIVAKhNuTGVlpHl....p+RpscYhDC.sPh-YpEALNlVhEApEtFDuLPA+lREcFuNsPcthL-.......FLsc.cNhEEutuLG..hl.t-tp......hhcst.+csp.sssQp.s.l.......... 0 0 0 1 +9508 PF09676 TraV Type IV conjugative transfer system lipoprotein (TraV) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry includes TraV, which is a component of conjugative type IV secretion system. TraV is an outer membrane lipoprotein that is believed to interact with the secretin TraK. The alignment contains three conserved cysteines in the N-terminal half. 21.00 21.00 21.30 22.20 20.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.77 0.71 -3.40 39 394 2009-01-15 18:05:59 2007-05-16 15:56:05 5 2 298 0 59 271 10 128.70 30 69.11 CHANGED sscFuCsussus....Ctohspshppuhtt..stp.ht........................ssssssssssssssssssshsh...s..h..............................................................Pl.Rsttclh+lhlsPahDsc..GchapsshVahhlcsu...pW ....................................................................................................................po-apCsusssss...Chohcps...ctspphpt.spp.sps.sst.th....................................sss.susps.ssssp...sppthhss...cshhs..tstpsp............................................................................................................................Pl.RosppshplWIAPahD..sp..sshapsuhVahllcPutW........... 0 16 30 47 +9509 PF09677 TrbI_Ftype Type-F conjugative transfer system protein (TrbI_Ftype) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents TrbI, an essential component of the F-type conjugative transfer system for plasmid DNA transfer that has been shown to be localized to the periplasm. 21.50 21.50 22.00 21.80 21.20 21.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.13 0.72 -4.23 11 206 2009-01-15 18:05:59 2007-05-16 16:16:43 5 2 170 0 26 141 4 108.80 52 83.32 CHANGED ++p.l.hl.hslshlslsshlohhlsp..sPslVsFDMKpTlsuFapSsupppLo-tppcshssRFsssLccpLptaptpH+slILVoPAVVpGAsDlTp-IQpslhcRhp ........................pphhhhlsus.LuhVlLNAAlSau.l...l..R.l.s..sP..l.....ssAFsMKpTVDAF.aDS.AS.Q.......KpLSEAQSKALSuR...FNoALEASLQuWQQcHHAVILVSPAVV.Q..G..A.P.DITREIQQDIARRM+....... 0 4 10 20 +9510 PF09678 Caa3_CtaG Cytochrome c oxidase caa3 assembly factor (Caa3_CtaG) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are the CtaG protein required for assembly of active cytochrome c oxidase of the caa3 type, as found in Bacillus subtilis. 21.80 21.80 21.90 25.60 20.70 21.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.53 0.70 -5.44 106 1028 2009-01-15 18:05:59 2007-05-16 16:22:26 5 6 749 0 353 921 395 237.70 26 56.82 CHANGED sshYhhuhh+hppp..stths...tRsshah.hGhhslhhshsoslshhup.hhFosHMlpHhlLthlsP.LLlLG.tPhslhh...cslssttt..........hh.hhhhsthh+hlspPhlAhhlFsushahaa.hsslashshpshhhHhlhphphlluGhLFahslls.sP...stphshht+lhhlhsshshpshhGshlhhussslhssaht...............................sas.h.ssltDQplGGhlhWhsGplshllshhhlhhpWh+pc ...................................h.hhYhhu.hhth+p+....spthss....tRh...hhah..hGhssl.hhshsoslshhup.hhFShHMhpHhlLthlsPhLLlLG.tPhsLhh...pslshsht............hh....h....h..pt....hhc....h....l....spPlhAh...llFlushahha.hs.slashsh.psphsHhhhphphllsGhlaaaslls..c................Ph...spp.hshht+lhhlhsshshpshhushlhhsspslhssahp.............................hsas..h.shlpDQplGGhlhWhsu-lshlllhhhl.hhpWh+p...................... 0 113 251 309 +9511 PF09679 TraQ Type-F conjugative transfer system pilin chaperone (TraQ) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents TraQ, a protein that makes a specific interaction with pilin (TraA) to aid its transfer through the inner membrane during the process of F-type conjugative pilus assembly. 21.20 21.20 21.20 21.20 20.80 19.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.08 0.72 -4.06 4 133 2009-09-11 14:33:19 2007-05-17 10:28:23 5 3 110 0 3 55 5 89.20 81 97.41 CHANGED MRKhRFpLPchDITGhWVhulGlaFHIVuRLVh+cP.MAhhLAElIulhhVLaGuYRlLsAWIAcsp+EE+....ARptuhhctp.-t........+ .................hpKhRFSLPRLDITGMWVFSLGVWFHIVARLVYSKPWMAFFLAELIAAILVLFGAYQVLDAWIARVSREEREALEARQQAMMEGQQEGG....HVSH.............................. 1 0 0 2 +9512 PF09680 Tiny_TM_bacill Protein of unknown function (Tiny_TM_bacill) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of hypothetical proteins, half of which are 40 residues or less in length. Members are found only in spore-forming species. A Gly-rich variable region is followed by a strongly conserved, highly hydrophobic region, predicted to form a transmembrane helix, ending with an invariant Gly. The consensus for this stretch is FALLVVFILLIIV. 20.50 20.50 20.80 20.50 19.90 19.20 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.58 0.72 -6.85 0.72 -4.52 27 936 2009-01-15 18:05:59 2007-05-17 12:31:50 5 2 143 0 116 418 0 25.00 71 52.80 CHANGED uaGu..GFALIVVLFILLIIVGuuah ...............GhGG...GF....ALLVVLFILLIIVGASCa...... 1 27 80 91 +9513 PF09681 Phage_rep_org_N N-terminal phage replisome organiser (Phage_rep_org_N) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents the N-terminal domain of a small family of phage proteins. The protein contains a region of low-complexity sequence that reflects DNA direct repeats able to function as an origin of phage replication. The region is N-terminal to the low-complexity region. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.27 0.71 -4.54 26 638 2012-10-04 14:01:12 2007-05-17 13:57:17 5 8 478 0 41 432 20 117.60 31 43.88 CHANGED WIKlpsshFDDcKIKll-pMs-sDslhhIWh+lllhuGKhNssGhlahscslPYTsEhLAhhhs+slssl+hALpshpphGMIEl.h-ssshplsN..ap+aQsh-sh-ppRcpscc+hp+pcp ........................WlKLppshFcDc+I+.hlcph.s.su..s..shhhIalKLllhu..hpspGt.Lhh...s...tp...lshstc.LAphhc.cshssVchslphhp....phGLlEh..h-.s.st...hhlss..ht.ph.sp.-sstsc+cRptRpppp...h.................................. 0 11 34 39 +9514 PF09682 Holin_LLH Phage holin protein (Holin_LLH) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry identifies a family of putative phage holin from a number of phage and prophage regions of Gram-positive bacteria. Like other holins, it is small (about 100 amino acids) with stretches of hydrophobic sequence and is encoded adjacent to lytic enzymes. 24.80 24.80 24.80 24.80 24.60 24.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.23 0.72 -3.93 13 257 2009-01-15 18:05:59 2007-05-17 14:32:53 5 1 214 0 20 171 1 103.90 41 87.97 CHANGED Msplsp.Ilssulul.lsllsGhhs+tVhcaLhKK.GGEKss+IsEIlA+sAVsAVEQlss-su.KGp-KLspA+stlpshLsphsl.phoDsplcshIEuAVKpMNssh ......................MpplsphIhssAlul...LsllsGhl....l+...sVK-YL...hpK....G...GEKsl+Is......E..ILAKsAVsAVEQlus-.suhKGcEKLspApstV+spLs+h..sI..shTDcp.l-shVEuAV+pMNDs....................... 0 5 9 12 +9515 PF09683 Lactococcin_972 Bacteriocin (Lactococcin_972) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent bacteriocins related to lactococcin. Members tend to be found in association with a seven transmembrane putative immunity protein. 22.00 22.00 22.10 23.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.50 0.72 -3.62 6 605 2009-01-15 18:05:59 2007-05-17 14:37:33 5 1 388 0 27 146 0 60.30 44 66.27 CHANGED V-GGhWsaGlG..usasaScYhHup+sHsuTslut..s+os+uhAcAGshu+AShsK..sh.spcsFY ...................s-GGsWsYG......G..........s..th...saSsYhH.sp..+hHsSolh.....ut.....spS.sKG.hAt.AGspShAhlhs...sW.utphAFY.................... 0 10 15 19 +9516 PF09684 Tail_P2_I Phage tail protein (Tail_P2_I) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent the family of phage P2 protein I and related tail proteins from a number of temperate phage of Gram-negative bacteria. 20.60 20.60 20.70 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.66 0.71 -4.62 49 1210 2009-01-15 18:05:59 2007-05-17 15:44:47 5 9 872 0 232 949 32 134.60 33 58.83 CHANGED LPssu.osLE+slt.tshspl......tthPsslpsLhsPspCPsslLPaLAWuhSV...DpW-ssWsEpsKRpllcs.........Ah.lH++KGThuAlR+slcslGh..hclhEWapps........st.......PuTFplpltlppp.......slspphhtplpcllscuKPsp .....................................................LPs......h.htht...tshstl.......pph..s.s.lpsL..h.s...P.s..sC.Ps...p....hL....PaLA.WsauV...........D.cW..-.p....s.W....sEpsKRpll+s.........AhhlHc+KGThuAl.Rcll-.s.......l..Gh....hlclpE.....Waphs.........................t.............tPsp..Fclpls.l.pp.......uls.pphh.plcclls-s+ssp.............................................................................. 1 54 137 196 +9517 PF09685 Tic20 Tic20-like protein TIGRFAMs, Coggill P anon TIGRFAMs & Jackhmmer:D3PVW8 Family Chloroplast function requires the import of nuclear encoded proteins from the cytoplasm across the chloroplast double membrane. This is accomplished by two protein complexes, the Toc complex located at the outer membrane and the Tic complex located at the inner membrane. The Toc complex recognises specific proteins by a cleavable N-terminal sequence and is primarily responsible for translocation through the outer membrane, while the Tic complex translocates the protein through the inner membrane. This entry represents Tic20, a core member of the Tic complex. This protein is deeply embedded in the inner envelope membrane and is thought to function as a protein- conducting component of the Tic complex. This family also includes many proteins of unknown function from non-synthetic organisms. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.76 0.72 -4.07 122 1525 2009-01-15 18:05:59 2007-05-17 15:49:58 5 10 1098 0 439 1083 83 107.20 21 79.33 CHANGED s-+ph.uhlsH....lu..................hh.uhls..PLllallp..+-p..ssalcppu+pulNFplo...hhlhs.....llssllshlh..........................................hhlsh.......llhhlh...........lht....llh..sIl....uul+A..ppGpha+.Y..Phs.lch....l ..............................p.h.uhLsa....hu..........................as..shls...Pllla...llp....ccp.........hlctpu+pu.l.hpls....hhlhs....llhh.hl.shlh..........................................hhlsh..............lhhhhh.........hlhs....hlh...sIh.......sslKs...ps........h..h..Ph...................................... 0 153 304 392 +9518 PF09686 Plasmid_RAQPRD Plasmid protein of unknown function (Plasmid_RAQPRD) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry identifies a family of proteins, which are about 100 amino acids in length, including a predicted signal sequence and a perfectly conserved motif RAQPRD towards the C terminus. Members are found in the Pseudomonas putida TOL plasmid pWW0 and in cryptic plasmid regions of Salmonella enterica subsp. enterica serovar Typhi and Pseudomonas syringae DC3000. The function of these proteins are unknown. 21.10 21.10 23.40 23.90 20.80 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.77 0.72 -3.90 30 227 2009-01-15 18:05:59 2007-05-17 16:26:49 5 1 183 \N 68 210 9 80.10 40 72.02 CHANGED uuhAs....sAuE+-pLAtslRQL-tlcths....pRApssAs.ssts...Ra+FDYsRlpsDLpplRpGIppYLsPSRAQPRD...sssLsGDY .......s..hsssuuE+pcLuhslRQL-tlctsl....pR.A...tstAs.sssp...RaaFDYsRlpuDlpplRtGIppYLsPSR..AQPRD....sssL.sGpY................ 0 7 30 54 +9519 PF09687 PRESAN P_fal_TIGR01639; PHIST_a_b; PHIST_a_c; PRESAC; DUF3837; PRESAC; Plasmodium RESA N-terminal Aravind L,Coggill P anon TIGRFAMs, Aravind L Domain The short, four-helical domain first identified in the Plasmodium export proteins PHISTa and PHISTc [1] has been extended to become this six-helical PRESAN domain identified in the P. falciparum-specific RESA-type (Ring-infected erythrocyte surface antigen) proteins in association with the DnaJ domain. Overall, at least 67 proteins have been detected in P. falciparum with complete copies of the PRESAN domain. No versions of this domain were detected in other apicomplexan genera, suggesting that the domain was 'invented' after the divergence of the lineage leading to the genus Plasmodium undergoing a dramatic proliferation only in P. falciparum. A secondary structure-prediction derived from the multiple alignment of the PRESAN family reveals that it is composed of an all-helical fold with six conserved helical segments. There is some evidence it might localise to membranes [2]. 23.60 23.60 25.50 24.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.67 0.71 -4.08 162 210 2010-01-11 11:54:32 2007-05-17 16:34:49 5 7 10 0 203 264 0 125.10 16 30.63 CHANGED phppphsccclpchlp...sltt..h...hspcchhtla.phhphp+pcahphhppLhphhpplspch.pl........sc.............chp.pchWpcspptlppphtph-pthpppFhphl..ppp...h...sttc................FhphlpphpptWcphhpphpppa ......................phpcpclpc.hlp..pltt..h...hspcchhtla.phhphp+pcahphhppLhphhppltpph....pl.........sc.............chp.pchWpcshptltpphtph-pthpp.pFhphl...ppp....h.shpc................FtphlpphpptWcphtpphppph............. 0 83 84 162 +9520 PF09688 Wx5_PLAF3D7 CHP1606_PLAF3D7; Protein of unknown function (Wx5_PLAF3D7) TIGRFAMs, Coggill P anon TIGRFAMs Family This set of protein sequences represent a family of at least four proteins in Plasmodium falciparum (isolate 3D7). An interesting feature is five perfectly conserved Trp residues. 25.00 25.00 28.80 28.80 20.10 20.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.00 0.71 -4.24 8 26 2009-01-15 18:05:59 2007-05-17 16:45:06 5 1 6 0 26 33 0 139.00 23 60.31 CHANGED acRIluE...+sp.sacp-FslthN-pp.pphpp.scpspPh..p..-pIpclspLsspsoclWK-slcsMcpcYhcpTD..pM-+pWRDtMWpp+WsK.YL-sVHspINppLN-.sholc-KEphlspWlpWsp-DacaFLphlKE-Wcc ..........................................................................t...t...pp.s......p.ht...c..hppLhhphschWcpslpsMhppYpphT-...phspcW+phMWNppWt+.YLEtlhspIspslps.shohpcpEshlpphlphspcDFphFLp.lptcWc....... 0 5 7 21 +9521 PF09689 PY_rept_46 Plasmodium yoelii repeat (PY_rept_46) TIGRFAMs, Coggill P anon TIGRFAMs Domain This repeat is found in the products of only 2 genes in Plasmodium yoelii, in each of these proteins it is repeated 9 times. It is found in no other organism. 20.80 20.80 22.00 20.80 19.50 18.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.41 0.72 -3.63 7 34 2009-01-15 18:05:59 2007-05-18 11:17:37 5 3 2 0 34 33 0 43.80 61 80.98 CHANGED KSK+SRFPoMFKRDKKDK-sccu.......ts.ESLssD.....KSLEoLsDDscsp ......KSK+S+FsoMFK+DKpDK-sccu...............stSQEoLssD.....KSLEoLsDDscs..... 0 0 18 34 +9522 PF09690 PYST-C1 Plasmodium yoelii subtelomeric region (PYST-C1) TIGRFAMs, Coggill P anon TIGRFAMs Domain This group of sequences are defined by the N-terminal domain of a paralogous family of Plasmodium yoelii genes preferentially located in the subtelomeric regions of the chromosomes. There are no obvious homologues to these genes in any other organism. The C-terminal portions of the genes that contain this domain are divergent and some contain other yoelii-specific paralogous domains such as PYST-C2 (IPR006491). 25.00 25.00 33.90 33.40 19.40 18.50 hmmbuild --amino -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.04 0.72 -4.33 22 27 2009-01-15 18:05:59 2007-05-18 11:23:47 5 1 4 0 26 27 0 60.10 38 47.86 CHANGED Ks.SslGN+.lRuhKcIppsNEcNsIE.KpETQLpNNNs.p.+DccDsps............KcsKcsK ......KsSslGN+.hRuhKcIspoNEKNsIE.KpETQLpNNNs.p.Ks-ccspt........cpp....t........................... 0 0 15 26 +9523 PF09691 PulS_OutS Bacterial chaperone lipoprotein (PulS_OutS) TIGRFAMs, Coggill P anon TIGRFAMs Family This family comprises lipoproteins from four gamma proteobacterial species: PulS protein of Klebsiella pneumoniae (P20440), the OutS protein of Erwinia chrysanthemi (Q01567) and Pectobacterium chrysanthemi, and the functionally uncharacterized E. coli protein EtpO. PulS and OutS have been shown to interact with and facilitate insertion of secretins into the outer membrane, suggesting a chaperone-like, or piloting function for members of this family. 25.00 25.00 25.70 25.50 19.90 19.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.50 0.72 -4.05 8 624 2009-01-15 18:05:59 2007-05-18 11:52:13 5 1 539 6 60 190 1 99.80 72 82.95 CHANGED LsGCQQsssp..sPotphts........-QlpQLuoLlAGo+YLKpcCpRSDlPD-ssIh+oAlplApp+GWssts..hptLsp+ScslYpsLhcDsTPctspCupFNpphssFI-u .....MAlCANSYALoE...............................SEAEDMADLTAVFVFLKNDCGYQNLPNu.QIRRALVFFAQQN.QWDLS..N.........YD.........TFDMKALGEDSYRDLSGIGIPVAKKCKAL..ARD.......SLSLLAY.. 0 1 15 34 +9524 PF09692 Arb1 Argonaute siRNA chaperone (ARC) complex subunit Arb1 Mistry J, Wood V anon manual Family Arb1 is required for histone H3 Lys9 (H3-K9) methylation, heterochromatin, assembly and siRNA generation in fission yeast [1]. 25.00 25.00 29.60 25.60 24.80 24.00 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.43 0.70 -5.62 11 86 2009-01-15 18:05:59 2007-05-18 13:22:42 5 5 74 0 72 88 0 321.70 29 75.91 CHANGED KKKppsK..op+sKp+.PTGFEEYYsDsPlTPsEYpEEcc.lYss......RhEcsI.Rapp+RRh.-SsRpslFsKYLshGGVDsGs+hFsG...lDpcsLpshcu-pIhttpAps.l..............spcpspasVDFsuVs+GFLophhPhhhs.psp-.lphusssl+NFLsYlLhH-VCPEYpcslctAp+hCDlApcEL.p.pphttthPG-FNsAsSoLFGtshtcha..ssuWsspccDp.hhssplARp....llpFshuutst.htshca.-hsp.sshpshplps.puaEVsslphPssss+thYpph.......hssLpPlGKLluKsahsPGhst........hDhss-p.......pssssspcaEFalE-slLphCasGMKlpssVapLNsGl+aFDplhssasSFYphLsN-LMhGWKEP .................................................h.c..ssGFEEhhs-sPhTstEhtppcp.l...Ys..........RlEpsIpRapt+R+h.pspRtplFscYh.hGGl-sus+hFpG........spppL.......pp..hstpplhthpAp...ssl..............................stpp.pasVDFpsVstGFLSphh..hhs..s.t.hphussslcNFLpYllhH-VCPE..Yp-..slhtApplCphAppEL.tspph.tt.hPGpFNhAsotLFs.s...h...t.h.....p...pta..pt......tt........thsht......hhthhhus.st..ts..ph.....phht.pphpsh.h.p..tshElhslh.Pstthpphapph........................sssltPlGphhs+shh...............Dh.stp..................tt.phphalEcslLphhhsGMKlt...sslhcLss..G..lpahcphhshhsSFashL.p-hMhtaKpP.................................................................................... 0 15 31 56 +9525 PF09693 Phage_XkdX Phage uncharacterised protein (Phage_XkdX) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry identifies a family of small (about 50 amino acid) phage proteins, found in at least 12 different phage and prophage regions of Gram-positive bacteria. In a number of these phage, the gene for this protein is found near the holin and endolysin genes. 23.50 23.50 23.50 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.73 0.72 -4.43 23 600 2009-01-15 18:05:59 2007-05-18 13:28:44 5 2 404 0 44 281 2 39.20 39 73.28 CHANGED a-slKphYchG..haTp-pl+paVphphITtE-YccITG.ccY ................FccIKphYshs..hY.....op-.....p....lthaVsh..t..hITcEEYppITGccY.......... 0 22 35 39 +9526 PF09694 Gcw_chp Bacterial protein of unknown function (Gcw_chp) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a conserved hypothetical protein about 240 residues in length found so far in Proteobacteria including Shewanella oneidensis and Ralstonia solanacearum, usually as part of a paralogous family. The function is unknown. 20.40 20.40 20.40 20.60 20.30 20.30 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.52 0.70 -4.56 7 705 2012-10-03 17:14:37 2007-05-18 13:31:32 5 2 422 0 274 695 451 214.20 22 85.19 CHANGED tApsuSPhohSuNluLsSpYhFRGlo.ost+PAlQGGhDhuH.SGFYsGsWsSols....ssssshu........ssEhDlYGGass..slGt..hsaDlGlhtYhYPGupsss....Y...hsEhhuuluat.l...sh+hsau.s.s.hG............pSpsstYlshssshsls.sshThhAHlGhpcst.h.ss...uasDW+lGho.+shssshshustYhDspuc.sh............u..ts.shscsshhsolstTF ..........................................hs..........ph..ous..lsh..s..SsYha.RGloQ...........o................s..s.....p.....PAl....QG..Gl....-....hua....s....u.....G.a..Yl...G.sWuS..s..l.................sh.ss.ss.........................s.hEh.DhYuG..Ypt.....ph.s.s.....hshDlG.l..h.tY.....hY.....P......us..s..............s...s..s......................................................hsE.h.h....s....s....l..u...a...s....s....h..............pht..h...s.h..sh...s...........p..h.......h..G.................................psps.s...h...Y...h.p........h.s......h.shs.....l...s..................s....h...s...l...s..s..p.h..G.........ap..p..h........p......s..................st.................sa........Dapluls......tth................s..hshsh..thhs.ss......t..t....................................................................................................................................................................................................................................................... 0 64 153 218 +9527 PF09695 YtfJ_HI0045 Bacterial protein of unknown function (YtfJ_HI0045) TIGRFAMs, Coggill P anon TIGRFAMs Family These are sequences from gamma proteobacteria that are related to the E. coli protein, YtfJ. 21.70 21.70 22.10 21.80 21.10 21.60 hmmbuild --amino -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.80 0.71 -4.90 26 1008 2012-10-03 14:45:55 2007-05-21 11:22:22 5 2 724 0 68 382 31 154.40 61 86.22 CHANGED lplupplPsVsVs-cGELhLps..sphuYpsWsSupLsGKVRVlQaIAGRoSAKEhNusLhpAIpuApFPc-+YQTTTIlNpDDAIaGTGsFV+SShEcuK+-FPWSphVLDppGsVppAWpLppcSSAIlVlDKpG+VhFsK-GuLossElppVlsLl+p. ..................................................................h.psGppVPPVuIs-+GELlLsp..DphsYpsWNSuQLsGKVRVlQHIAGRoSAKEKNAsLIcAI....cuAp.h...PpDRYQ....TTTIVNTD.DAI.GoGh.FV+SSlEsNK+hYPWSQFllDuNG.l.u.RsAWpLs..E..cSSA..ll..VL..DKsG+VQWsKDGALTp-EVQpVlsLlpK.L................. 0 6 21 49 +9528 PF09696 Ctf8 Ctf8 Mistry J, Wood V anon Pfam-B_46199 (release 21.0) Family Ctf8 (chromosome transmissions fidelity 8) is a component of the Ctf18 RFC-like complex which is a DNA clamp loader involved in sister chromatid cohesion. 21.10 21.10 21.70 21.40 19.40 20.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.51 0.71 -4.33 31 267 2009-09-10 14:53:20 2007-05-21 11:57:48 5 5 235 0 194 259 0 116.80 29 75.80 CHANGED pl.lpTPp......GhsllElQGslplPp......................ppshtssphGcLpa.............p.pptp+shLaVGppQRLhGplhKLspPLull++p.t...............................t.tpspspplcll-ll+hKl.lFKsRPhPl .........................t.............thsllELQGplph.t..................................................ppshtsh.lGcLpa.......................pp.ppshLhlG.appLpGclh+Lp..+PluVlc+ppt..................................................t..ttpp.psphp...lssll+tKl.lF+sRPcPl................................................................... 0 64 107 165 +9529 PF09697 Porph_ging Protein of unknown function (Porph_ging) TIGRFAMs, Coggill P anon TIGRFAMs Family This family of proteins of unknown function is found in Porphyromonas gingivalis (Bacteroides gingivalis). 25.00 25.00 25.30 25.00 24.60 24.70 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.45 0.72 -3.84 44 420 2009-01-15 18:05:59 2007-05-21 14:53:50 5 2 119 0 136 412 109 59.90 41 22.25 CHANGED Wcls.pEoKpIusYpChKATssh.................................................cthslpAWYTspIPlspGPtpahGLPGLILE ...................................................................................................................Wpl.t.s-.s.+p.I.tsY.pCpKAsspa............................................................................pGRpapAWaTs-IPl.sp.GPa+atG.LPGLIl... 0 37 118 136 +9530 PF09698 GSu_C4xC__C2xCH Geobacter CxxxxCH...CXXCH motif (GSu_C4xC__C2xCH) TIGRFAMs, Coggill P anon TIGRFAMs Family This motif occurs from three to eight times in eight different proteins of Geobacter sulfurreducens. The final CXXCH motif matches the cytochrome c family haem-binding site signature, suggesting that the sequence may be involved in haem-binding. 21.10 21.10 21.30 21.10 20.60 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.60 0.72 -3.78 135 404 2012-10-01 23:37:15 2007-05-22 11:49:07 5 30 15 0 299 381 5 36.50 38 11.69 CHANGED oCossYCHusuts........................s.tsss...Wsssssst........C..ssCH .oCossYCHosGpu......................ssshssPs....Wsssssst........C..suCH 0 42 180 299 +9531 PF09699 Paired_CXXCH_1 Doubled CXXCH motif (Paired_CXXCH_1) TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents a domain of about 41 amino acids that contains, among other motifs, two copies of the motif CXXCH associated with haem binding. This domain is predicted to be a high molecular weight c-type cytochrome and is often found in multiple copies. Members are found mostly in species of Shewanella, Geobacter, and Vibrio. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.12 0.72 -4.44 178 1252 2012-10-01 23:37:15 2007-05-22 12:40:01 5 102 240 0 748 1592 142 43.00 29 19.96 CHANGED HsPltpu.t..CssCH......sPHuS.s..pstlL..............ptssspLChsCHspttt ............................h..ts..p.....CssCH..........................sPH...uu..s.......ptthL............................ptstsp.lC.hpCHsp...s............ 0 318 549 694 +9532 PF09700 Cas_Cmr3 CRISPR-associated protein (Cas_Cmr3) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR associated) proteins. This highly divergent family, found in at least ten different archaeal and bacterial species, is represented by TM1793 from Thermotoga maritima. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.17 0.70 -5.03 33 217 2012-10-01 21:23:39 2007-05-22 12:54:02 5 3 185 0 98 248 5 321.60 16 88.35 CHANGED hhlpPhDslhFR-uRPFs...susps...stuhhhP.PpTlAGAlRsthhppsshph...sth.t.........................lplh.GPhlhc...........ptchhaP...tPhchhh.........hcppsshhphhtlc..........................ht.hhlsst...............tsthp.hsua.lshpslhp.aLpuch.ptp.........plhptt.............hhphEpRlGlulcspppss..........cEutLYpsp.hlRhp....................tshslulhlcsssssph...................tshhpLGGEsRhu......thch..........tth.sh.pt.ttshtpspphtl..hLhTPuhFspuh..hst..................................lsusul..s+sh....hlS.GWDh.tppcP+shp.......hh......sPsGoV...............aahchtpststtt.pttt........................tp.....hpp.GaGphlhsth ........................................................................................................................................hpPh-shhhtpspsFt.........ttt.........ht..s.h.P.spshhGhlpsh....hh.tt.t...................................................h.tlh.Gshhht..............ttphhhP...hPhshhh..................tttt...h...hp....................................h.......................tt.........t.a..hthpth......t..hhpst....t.....................ph.t................................ht.c.+htlslct..pppps...........c-..uhLaptp..hlc.ht..........................tthshshhlphttttt.........................tthhplGGEs+.hs.....tlph.................................th.p..t..htt..t........t...s.pthtl..hLlTPuh.h.p...p.sh.....hsth..............................................t....lhhhsh.....s+.h..................lu...Ga....ch....t....pp...p..s+sht..........................hh..........lssGoV...............a.hhc...h..t.t......................................t......pp.Gauhhh.............................................................. 0 59 78 92 +9533 PF09701 Cas_Cmr5 CRISPR-associated protein (Cas_Cmr5) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This family, represented by TM1791.1 of Thermotoga maritima, is found in both archaeal and bacterial species. 20.40 20.40 20.70 21.10 20.20 20.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.51 0.71 -3.94 25 134 2009-01-15 18:05:59 2007-05-22 12:57:38 5 1 127 7 72 132 5 120.80 23 88.87 CHANGED Tl-QppAptAappl...................pplppt................cphpccYtshs+cLPshIhpNGLsQsLAFlhuKucpptctt...........................LhccLspaLpppst...............sllctlhpt...-hppYhthTpEsLuhLsWl+RaApuhL .............................hppphAphAhptl...................................................................pp..htp.................pphpccYpshs+phPshIhpsGLspslAFhhuKsppptpt......................................lhppltph.....lppptt......................................slhct.l.hpt...s..tpYhhhTpcsLthhsWlKRaupuh........................................................................... 1 47 59 67 +9534 PF09702 Cas_Csa5 CRISPR-associated protein (Cas_Csa5) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry represents a minor family of Cas proteins found in various species of Sulfolobus and Pyrococcus (all archaeal). It is found with two different CRISPR loci in Sulfolobus solfataricus. 21.70 21.70 22.30 22.60 21.60 21.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.23 0.72 -3.95 5 26 2009-01-15 18:05:59 2007-05-22 13:02:21 5 1 25 0 13 27 0 100.10 43 74.66 CHANGED sFlYoEoPTYVDRIuNALSKEAVs+VLaEupRIlpoGl-uGEIcttss.........sGR+....YlsV..tEK-G.sYIllGtLPSDcDVEpFLc-VERDIYhARKVGALAMAhsN+ ..h.FlhoEosolVDRhANALS+EsVs+sLaDs.Rllphul-pu.EIss..h.........pG+c....Yst.s.hppp-G..chhllGhLPosp-lE.FLchlccDlhhARKsGALAholss+.............................................. 0 4 4 8 +9535 PF09703 Cas_Csa4 CRISPR-associated protein (Cas_Csa4) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR loci appear to be mobile elements with a wide host range. This entry represents a protein that tends to be found near CRISPR repeats. The species range for this species, so far, is exclusively archaeal. It is found so far in only four different species, and includes two tandem genes in Pyrococcus furiosus DSM 3638. CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. 25.00 25.00 97.10 96.10 20.30 19.80 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.11 0.70 -5.48 6 13 2009-01-15 18:05:59 2007-05-22 13:06:21 5 1 12 0 10 13 0 355.20 23 95.95 CHANGED lshhhTPGas.hhDThIhYGlVcsLstuGhssu.cVhshGpcYhIps-ssspt.hp...Eplc........huLhpshEEh...HhAhhscpsp..ss+lhpstDhssGssIsss..hplahctlstpLphlpcphctu++uscuc......thsTlsLsLhPshGKYh...lcphshpEspsh+VsphsYALAhlGFaY..YushlhhpcGcsplVplhshs....sh--LshlphLs.p-Lspcl.hsthschc.hlsp.hu.LYhLhhoEol....ElsocppFsllsYsh.EpssNsptlRsFtsl-lu+lh-FlhpLKthshYcshth.+hl-pL...........h+ts.ElhtpLh-slha-..s.tuhYoslRtl......p+ushsu...c..hlsslt-hlsph .....hhTPGHshIsDsLIhaGlVctL.stuGthcu.pl..hG-cYhlps-hsstphh....pshc.........tLhphhcEh...+hshht.psp..tshlhpshshshstshss.....sah.....cplstpLpp.lccp..hcp.t++phcuc........thhTlhLsLtPshGKYh.............lcsas..lpp.statVCshCaALAhlGahY..hoshl..phcc.....Gpp....shVplhths....sh-clshhplLslpchtppl.hhhhsp.p..lsphhA.LYhLshuEol.....hsscpshsllsYsh..p+sGNs..Qthpuhtph-ls+L.h-FlhplKt..hYcs.ph.+hl-sL...........hcc....s.EsLtsLh-slhac..p.tshYpslRpl......c+ushstp......ht.l.phh.................... 0 6 8 8 +9536 PF09704 Cas_Cas5d CRISPR-associated protein (Cas_Cas5) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This small Cas family is represented by CT1134 of Chlorobium tepidum. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.51 0.70 -4.72 102 1346 2012-10-01 21:23:39 2007-05-22 13:35:06 5 3 1181 1 402 1115 32 210.50 18 87.80 CHANGED slhl+lhG.shtsa.upsthtspRs....ohshPohSulhGlLsush.............uh.tt...................hpsslpaslct...........pptsplhp..sh.......pssp...................................t....tttttsshls.............pttYhhcAtahlshpsssp...........................hhp.hpctlccsthhhh..LGp+ps.s.stslp..................hps...pshh.......pshpphs.......................hthhh..........................................t..st..hpchss.......hps...phphuhhpl. .......................lhhplhG.shAsa.spsthtttRt.....oaplPohSAlhGlLuAsh...................Ghtptt...................................hp..p.hphslpt.............................spp.sp.hhp....sh.........posp............................................hh.t......t..t...phhssh..lp..............................................ptpYhh.-A....h.....ahl.sl..tsssp...........................hhlp..lpc.sl..c..c..s..h..h...hh...LGp+pp..s..s.slt...............hps......psh....................pshp.ph.............................................................................................pt.s...............t...h..u.h................................................................................................................................................................................................................................................................................ 0 160 289 348 +9538 PF09706 Cas_CXXC_CXXC CRISPR-associated protein (Cas_CXXC_CXXC) TIGRFAMs, Coggill P anon TIGRFAMs Domain CRISPR is a term for Clustered, Regularly Interspaced Short Palindromic Repeats. A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. This entry describes a conserved region of about 65 amino acids from an otherwise highly divergent protein found in a minority of CRISPR-associated protein regions. This region features two motifs of CXXC. 21.00 21.00 21.20 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.59 0.72 -3.80 10 84 2009-01-15 18:05:59 2007-05-22 14:03:33 5 2 83 0 35 86 4 64.70 28 12.16 CHANGED cscsphsChsCGcRpspphKs.ht.h.cosh.PLhuS.ushtNaFWsspsssslCPlCpllhphsPhuhh ....................h.pctph.ChhCu...p..t..t.....hc...s...t.hshhtcss...........a.....s.sctpsahWsh.p.s.shslCslCpllatssPhGhh........ 0 14 21 26 +9539 PF09707 Cas_Cas2CT1978 CRISPR-associated protein (Cas_Cas2CT1978) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a minor branch of the Cas2 family of CRISPR-associated protein which are found in IPR003799. Cas proteins are found adjacent to a characteristic short, palindromic repeat cluster termed CRISPR, a probable mobile DNA element. 21.80 21.80 22.10 26.80 21.50 19.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.74 0.72 -4.04 33 643 2009-01-15 18:05:59 2007-05-22 14:09:50 5 3 629 0 107 344 9 83.70 58 72.52 CHANGED MhVllscslPs+LRGcLuhWhlElcuGVYVG.shSs+VR-hlWpplsch....hpc.GsuVMsassss.EpGashpThGcscRtsVDhDGlhL ........MlVlVsENVPPRLRGRLAlWLLEVRAGVYVG.csSpRlREMIWpQlsph.......sss.GNsVMAWATNo.EuGFEFpTaGcNRRhPVDlDGL+L... 0 28 75 92 +9541 PF09709 Cas_Csd1 CRISPR-associated protein (Cas_Csd1) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR loci appear to be mobile elements with a wide host range. This entry represents proteins that tend to be found near CRISPR repeats. The species range, so far, is exclusively bacterial and mesophilic, although CRISPR loci are particularly common among the archaea and thermophilic bacteria. Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). A number of protein families appear only in association with these repeats and are designated Cas (CRISPR-Associated) proteins. 21.20 21.20 30.00 29.90 21.00 20.40 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -13.01 0.70 -6.22 36 393 2009-01-15 18:05:59 2007-05-22 14:22:25 5 3 343 0 123 375 18 525.50 25 96.60 CHANGED LhchYpRhtppst............lsPhua.stcpls...ahlsLstpGshls....shcptcs+cth...h.lPpst...pRo.oult..sphLhD+hsYVLGs.s...........ptttcptpt.........apuFhphlpp.hsss...p-tslp...AlhpFLcp....s..htshh..............thpp..............................p.tss.lsF+l-.ussp...hlaccsslpphWtphhssp...ps.........ps.lCLVoGcps.slAchHPshh....tspusG..ApllSFNps.uFsSaG+..p.....pupNAPlShpuuhthssALsaLlp+pppp+............plGD.sssVFWAppss............hpsshts.hshh..........tcsss................psspplpthlpphhsGhtsp.sss.....scaalLGL.usNuuRlulRaacpsshuchhc+lppaac-hthhp.t...................hhpsPt.hslhpllhuss...........+s-s.lsP..sLuucLhpuIlsGp...aPpsLlpsslpRhR...upp..............phsh.+suhl+AsLhRph+hpppt........hshuLDpspsshuYLhGRLFAlhEplQcpAl...sG..lNATItDRaauuASssPsslF.hLhcttps.HluKLtpcp.Ghhh.h..-+hlsEIhstlss...sFPsshsLspQGpFslGYYpQRpshap ..................................................LhphYpph.ttst....................l.s.ua....spppht.....hhlslspcGphhp.........hp.....h..p...tcpth...............h.lPts......pRo..osht.........sphLhDphtYhhGh.t...................................pt..pttp..ttt.................apsahphhtp.hhp......ps......t...lp.....Alhpalpp......ts.hht...................phtt....................................thhss.l...hFplp.tttt................hlhpctshpph.atphhpsp........................pst.......tt.hChloGcps...sluch.Hsshh.......tspsss.Ap..llShNt..........s..uFs.h.G+..........p.........pstsuslutpsu.thpsALpaLlpppt.ph............................plGs.tshVhWupssp..............h..p.h.....h..shh........................t..p.............................pttpph....tphhp...p..h..h..pGh.....hp..s.........tph.alluL.uss..suRlul+aahphshsphhppltpaapc.hth.t...............................htt.s...sl..hphlhtss....................pscp...h.s.......tlhtplhpullsGp...hP...slhpthltRh+.......utt................p.hs.h.+.....s.th.l..pshLh.+phph.tt.................hshsLspp....p..p.shuYlhGRLhAlhE..plpptAh......................s...hpsTht-RahsuhuspPhpsashLhphhp...ahp+.l.p.p...t...........s.....h...........h.............cc.ls-lhstls................sa...s.p..phsLp..u.tFhlGYYpQ+pthh.......................... 0 51 88 107 +9542 PF09710 Trep_dent_lipo Treponema clustered lipoprotein (Trep_dent_lipo) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a family of six predicted lipoproteins from a region of about 20 tandemly arranged genes in the Treponema denticola genome. Two other neighbouring genes share the lipoprotein signal peptide region but do not show more extensive homology. The function of this locus is unknown. 24.70 24.70 24.80 37.10 24.60 24.60 hmmbuild -o /dev/null HMM SEED 394 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.40 0.70 -5.79 4 20 2009-01-15 18:05:59 2007-05-22 14:31:14 5 1 2 0 8 20 0 282.90 34 80.84 CHANGED sKLKLIFILhLAVLLFSC.oKElKEtpphcscV.SshhhEsK.....Esph.SKsE.spaV..............poP..E.IK.LEpK....ctat.pL+plthELDch-.lh..sYcp.sthuIDpLspQK.........NlEhI...clc+DsE.hYCK.KtsspNGtpL.YplsYcWYshtphalshsY-LLpEpNhstIs-clIcpQVHGplhphcpD.cK-chIKlFEcY.Np..............Ttl+ppchtpYsh+lLDYVKGNFTNSGYDEYhVhFhp-................ssDsEl.D.......paIccVcCFlV-psKlIKsYhIsspuuhFhPs.hppssL.t.........lpsFGhpFSQGWlADFNQNGINEIY.ltahphtsstlhhlEF.DppFVpthlhsps.-lsuVDWaKKpI..........ll+DcotsscWhc-.YQWN-s.+palL ..sKLKLIFILlLAshLFSC.SKElKEpp.EcscVEossKhE.K.........psEh.uKt...Ehspch..............cos......EtIK.L.E-K....Ktat.plp.l.hpLDhh-.hh..th.....hshs.h...p...............hphh...ch.p-tc.h.hh.ph.....t.h.Yph.hp......h.p....................................................................................................hFh....................tsph.p.........lp.lthFlh.ttphhp.Y.l......h.....t...h...............t..h..shluDhNQNGhNEIa..............h..s.hh.hEa.tt.F....l....t..h.slDW.pphh................................................................................................................................................... 0 8 8 8 +9543 PF09711 Cas_Csn2 CRISPR-associated protein (Cas_Csn2) TIGRFAMs, Coggill P anon TIGRFAMs Family CRISPR loci appear to be mobile elements with a wide host range. This entry represents proteins found only in CRISPR-containing species, near other CRISPR-associated proteins (cas). The species range so far for these proteins is pathogenic bacteria only. Clusters of short DNA repeats with nonhomologous spacers, which are found at regular intervals in the genomes of phylogenetically distinct prokaryotic species, comprise a family with recognisable features. This family is known as CRISPR (short for Clustered, Regularly Interspaced Short Palindromic Repeats). 20.90 20.90 21.20 20.90 20.80 20.70 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.11 0.71 -4.61 5 132 2009-09-11 11:10:28 2007-05-22 15:11:46 5 1 132 12 11 105 1 185.20 44 84.50 CHANGED lVKahYQYDEDSELKlFDcKhKSLKuSELMLVTDILGYDVNSuslLKLIHADLEsQFNEKPEVKSMIDKLsATITELIuaECLENELDLEYDEITILELIKALGVKIETQSDTIFEKhFEILQVYKYLoKKKLLVFVNSsAYLTKDElhpLpEYIsLoQhsVLFLEPR+l-GFsQYILDcDYFLIsEN ...............................................................llp.hYQYptpp-LKlF..Dtc.+sL.K.toELh.llTDlLGaDlNSsshLKLIauDLEsQ.....L.N-KPEVKSMI.-..KLsuT.IoE.L.Iu...a.E..h...LE..p.ELDLE.h.D.EITllELhKALGlKIE.Tp.S.D.T.lFE..Khh.E.IlQVa+YLoKKKLLlFlNssuYLTc-EltpLhEYIpLsplpVLFlE......s...R....c.......l.........s..h.s.Q.....YllDpDaaL..c......................... 1 2 6 9 +9544 PF09712 PHA_synth_III_E Poly(R)-hydroxyalkanoic acid synthase subunit (PHA_synth_III_E) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents the PhaE subunit of the heterodimeric class (class III) of polymerase for poly(R)-hydroxyalkanoic acids (PHAs), carbon and energy storage polymers of many bacteria. The most common PHA is polyhydroxybutyrate but about 150 different constituent hydroxyalkanoic acids (HAs) have been identified in various species. 22.10 22.10 22.40 22.20 21.60 22.00 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.97 0.70 -5.01 10 111 2009-01-15 18:05:59 2007-05-22 15:24:40 5 1 108 0 41 130 21 237.60 22 79.03 CHANGED sDahEhQRpYWsuWu-suR+uhu.........t.tupssssWpcul-.Whcthussusspspshh-chhcQupsaathu-phhpt..ttsttp..........LcQhtcpFuG.hp.....tssotpchsshWphPl-s.....aQphhSohpshssshhpsh........hpthppphsphLssPuLGhsREcQuQhQpLhRsth-YQpAhpcYssthsplu.cul-+htspLptshsSupsls..SsRALYDpWlcssEpuYu-hlto--YsplaGcLlNApMpL++thQphlDchhpuLshPTRpELcosp+RLpELRRcp+tLp+ ..........................................................................................................t.....................................................................................................................................................hpt.h....t.t.............................t....t.h.t.a......tt..................ht...ht..s.......s.s.ht.h..............................p.h.ptt.t.hh.....p..PhhG.s+p..ptph.t.hhcs....ph.tt.ttY.t.htpsh.puhtphtpcl.ph.ppspplp..oh+thh-hWlpss-csatchhtocpFtcshGphlsuhhch....+tphpchh-phhcphshPTRuElDsltc+ltELcRcl+pl..p.................... 0 13 26 35 +9545 PF09713 A_thal_3526 Plant protein 1589 of unknown function (A_thal_3526) TIGRFAMs, Coggill P anon TIGRFAMs Domain This plant-specific family of proteins is defined by an uncharacterised region 57 residues in length. It is found toward the N terminus of most proteins that contain it. Examples include at least several proteins from Arabidopsis thaliana and Oryza sativa. The function of the proteins are unknown. 25.00 25.00 25.00 25.50 24.00 23.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.70 0.72 -3.83 15 206 2009-01-15 18:05:59 2007-05-22 15:30:56 5 3 36 0 120 187 2 53.00 50 20.53 CHANGED VQpLIE+ClphYMS+EEsspsLpcpAsIsPhhTps........................VWppLEKENtEFFcsYtp+ ..............VQpLIE+CLphaMspcEslc.sLpp+.ApIcPshTph.........................................VWpcLEcENpEFF+AYhh................. 0 27 75 99 +9547 PF09715 Plasmod_dom_1 Plasmodium protein of unknown function (Plasmod_dom_1) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent an uncharacterised family consisting of a small number of hypothetical proteins of the malaria parasite Plasmodium falciparum (isolate 3D7). 21.30 21.30 32.20 32.20 20.80 19.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.20 0.72 -4.04 5 9 2009-01-15 18:05:59 2007-05-22 15:47:30 5 1 1 0 9 9 0 66.30 39 31.54 CHANGED oFFKRAKLlL-sFDNIFIDKLIDuNIpNKuSsl+EDVh-NuLsLssAsI.hhAIPIaoYlsKRINFF SFFKKhKLlhshhDsla.DhLIssslpstu....EhV..ssLuhhsushP.huhshhsYlspRINFh 0 9 9 9 +9548 PF09716 ETRAMP Malarial early transcribed membrane protein (ETRAMP) TIGRFAMs, Coggill P anon TIGRFAMs Family These sequences represent a family of proteins from the malaria parasite Plasmodium falciparum, several of which have been shown to be expressed specifically in the ring stage as well as the rodent parasite Plasmodium yoelii. A homologue from Plasmodium chabaudi was localized to the parasitophorous vacuole membrane. Members have an initial hydrophobic, Phe/Tyr-rich, stretch long enough to span the membrane, a highly charged region rich in Lys, a second putative transmembrane region and a second highly charged, low complexity sequence region. Some members have up to 100 residues of additional C-terminal sequence. These genes have been shown to be found in the sub-telomeric regions of both Plasmodium falciparum and P. yoelii chromosomes. 25.00 25.00 25.50 25.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.90 0.72 -4.03 32 90 2009-01-15 18:05:59 2007-05-22 16:41:14 5 5 16 0 54 94 4 83.20 24 37.35 CHANGED MKlsKl.hhhhshLLulpll.............................................sPsh.ssss.tspss.................................+thtphpsshp+K...hcppKhhllSoluoslAl..llusul.GhuhYpppKpp ..............................................MKlsph.hhhhhhlhh.hphh......................................................................ssh.spts.t.tppts........................................tslcphc.pslpcK...h+pcKhllloo..luoslsl..llussl.GlshYhp+Kp.s...... 0 13 25 44 +9549 PF09717 CPW_WPC Plasmodium falciparum domain of unknown function (CPW_WPC) TIGRFAMs, Coggill P anon TIGRFAMs Domain This group of sequences is defined by a domain of about 61 residues in length with six well-conserved cysteine residues and six well-conserved aromatic sites. The domain can be found in tandem repeats, and is known so far only in Plasmodium falciparum. It is named for motifs of CPxxW and (less well conserved) WPC. Its function is unknown. 21.10 21.10 22.50 21.90 20.90 20.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.83 0.72 -3.35 72 270 2009-01-15 18:05:59 2007-05-22 16:49:37 5 9 13 0 270 301 4 60.40 28 48.96 CHANGED Cp.....cs.Ysp..CPtsWh...........tspst.....Chus..ssY....pGs..Cpphh.pFpp.hspppKppauppCplpWPChp .......................ps.Ys.....t..CP..hsWh................hspsh............Chus...psY.................pGs..Cpp.....hh..shps..h..s.pppKpp.....apppCplpWPC................... 0 84 120 223 +9550 PF09718 Tape_meas_lam_C Lambda phage tail tape-measure protein (Tape_meas_lam_C) TIGRFAMs, Coggill P anon TIGRFAMs Domain This represents a relatively well-conserved region near the C terminus of the tape measure protein of a lambda and related phage. The protein, which controls phage tail length, is typically about 1000 residues in length. Both low-complexity sequence and insertion/deletion events appear common in this family. Mutational studies suggest a ruler or template role in the determination of phage tail length. Similar behaviour is attributed to proteins from distantly related or unrelated families in other phage. 21.80 21.80 21.80 22.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.26 0.72 -4.07 62 1382 2009-01-15 18:05:59 2007-05-22 16:53:26 5 11 585 0 77 1312 114 76.70 38 9.36 CHANGED pusWhsGs.ppuhtsah-sApssuup...spphhosAFsuhpsulssFlsTGKhsFcs...........assSlluDlu+lssptuhssslsu .......................pGsWhsGl.ppGaupat-sA.sD....shuQ...scsssTpsFsGh....up....shAshlT...such...s...aRu...........F...T.pSVLu.hocIlh+tAhlshlt............................... 0 15 38 57 +9551 PF09719 C_GCAxxG_C_C Putative redox-active protein (C_GCAxxG_C_C) TIGRFAMs, Coggill P anon TIGRFAMs Family This entry represents a putative redox-active protein of about 140 residues, with four perfectly conserved Cys residues. It includes a CGAXXG motif. Most members are found within one or two loci of transporter or oxidoreductase genes. A member from Geobacter sulfurreducens, located in a molybdenum transporter operon, has a TAT (twin-arginine translocation) signal sequence for Sec-independent transport across the plasma membrane, a hallmark of bound prosthetic groups such as FeS clusters. 21.30 21.30 21.30 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.30 81 793 2009-01-15 18:05:59 2007-05-23 10:44:21 5 8 499 4 237 679 25 120.90 26 77.02 CHANGED asCupulltuhs-phuhs.ss....sl+huuuFGuGhG...sGssCGAloGuhhslGh..................hhGcsp...............ttttppchhthsp-lhctF+....ccaG..uhpCcpLhtt..................tt..tpppCsplltpssc.hlt-hL ................................asCupulltsh....s....-ph...shsh..pp.....shchuuuF...u.uGhG..tht.ssCGAloGuhhslGh..............................hhGcsp.......................ttttptpshthsp..........-lt.p..p.Fc......................pc...hG.....ohhCppLhst............................tp..ttppCsphltpsschhtchl............................................ 0 118 214 231 +9552 PF09720 Unstab_antitox Stabl_TIGR02574; Putative addiction module component TIGRFAMs, Coggill P anon TIGRFAMs Family This entry defines several short bacterial proteins, typically about 75 amino acids long, which are always found as part of a pair (at least) of small genes. The other protein in the pair always belongs to a family of plasmid stabilisation proteins (IPR007712). It is likely that this protein and its partner comprise some form of addiction module - a pair of genes consisting of a stable toxin and an unstable antitoxin which mediate programmed cell death - although these gene-pairs are usually found on the bacterial main chromosome. 22.40 22.40 22.40 22.40 22.20 22.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.32 0.72 -4.28 83 291 2009-01-15 18:05:59 2007-05-23 14:36:35 5 3 140 0 122 319 88 54.20 24 70.64 CHANGED pLsspERlpLl-pLhcSLsp...s.ps..c.lsphatpElccRhpthcsGpspslsh--lh ...........................pLs.pERhtLs-pLh.cSLst....s.ps..........p.lsphatpEhccRhpphcsGpsp.slsh--lh....................... 0 43 74 117 +9553 PF09721 Exosortase_EpsH Transmembrane exosortase (Exosortase_EpsH) TIGRFAMs, Coggill P anon TIGRFAMs Family Members of this family are designated exosortase, analogous to sortase in cell wall sorting mediated by LPXTG domains in Gram-positive bacteria. The phylogenetic distribution of the proteins in this entry is nearly perfectly correlated with the distribution of the proteins having the PEP-CTERM anchor motif, IPR013424. Members of this entry are integral membrane proteins with eight predicted transmembrane helices in common. Some members of this family have long trailing sequences past the region described by this model. This model does not include the region of the first predicted transmembrane region. The best characterised member is EpsH of Methylobacillus sp. 12S, where it is part of a locus associated with biosynthesis of the exopolysaccharide methanol-an. 25.30 25.30 25.30 25.40 25.10 25.20 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.97 0.70 -5.18 102 524 2009-01-15 18:05:59 2007-05-23 14:54:15 5 4 392 0 259 557 185 236.50 17 69.36 CHANGED hlhshhlhh....ah..h.t.........hhttW..hp.s.....tphsHuhllhslsha.lhatpp.t.h...tt...ts..h....ulhhlhhuh..hhalhuph.......sth.hhhtthuhshhlhuhhhhhh...............Ghphhp............th......hhPlh.hllhhlPhs..thl......hs.hs.sLpthsuphsshhLphhGhslhh......c..........Gshl.....hlsstp....lpVspACSGlchlhshhsluslhshh..hptshhp.+hhhlhsul.slsllsNhl....Rlhhlsl.ls...hhs.........................phu..tshhHthhGhlhFssshhhlh.hlsthhtph .........................................................................hh................................h........................h.hh..h.hh...hhh.......h.....................................shhhhhhhh.....hh.hhht.................................h..hsh.h.hhh..hshhhhhh...........................Ghthht................................hh.........hhslh.hhh.hh..l.Ph........h............h...h.h.....sLpths...uthss..hl..l.p..hh..G.....h..s...shh.......p................................Gshl.....hl.s.stp.......lpVst..uCoGlph.lhhhhulshhhh.h........h.tshhc...+l...h.h....hhh.ul....slsh....hsNhl....Rlhhlsh..ls......hhs...............................phh....hshh..H.p.h.h..G..hlhahhhh.hhlh.hhh......hh...................................... 0 88 205 245 +9554 PF09722 DUF2384 CHP2293; Protein of unknown function (DUF2384) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this family are found almost exclusively in the Proteobacteria, but also in Gloeobacter violaceus PCC 7421, a cyanobacterium. The function is unknown. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.39 0.72 -4.11 120 1010 2009-01-15 18:05:59 2007-05-23 17:01:51 5 9 668 0 338 835 106 54.10 24 37.91 CHANGED hppspsla.Gsp..........ptAppWLp.pPshu..LsGppPl-llp.o..ttGhptVtphLspl.ctGl ...............................................h...h..lh.sst..........ptA.tpWlp..pPstu..LsGppPl-llt..s....suhhtVhphLsthctG................... 0 84 192 261 +9555 PF09723 Zn-ribbon_8 CxxC_CxxC_SSSS; Zinc ribbon domain TIGRFAMs, Coggill P anon TIGRFAMs Domain This entry represents a region of about 41 amino acids found in a number of small proteins in a wide range of bacteria. The region usually begins with the initiator Met and contains two CxxC motifs separated by 17 amino acids. One protein in this entry has been noted as a putative regulatory protein, designated FmdB. Most proteins in this entry have a C-terminal region containing highly degenerate sequence. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.56 0.72 -4.00 182 1432 2012-10-03 10:42:43 2007-05-23 17:08:51 5 3 985 0 622 1298 568 41.50 36 43.22 CHANGED MPlY-YpCp..pC..GppFEhlpphu-..ss.ssCPpCuu..p..hp+hlS ...MPhYcYp.......Cp..sC..GppF-h.l.p.p...hu-......sshssCP..pCuu..phc+lh................... 0 254 465 555 +9556 PF09724 DUF2036 Uncharacterized conserved protein (DUF2036) KOGs, Finn RD, Coggill PC anon KOGs (KOG0798) Family This family of proteins includes members ranging in size from approximately 300 to 460 residues. There are a number of well-conserved domains along the length. 20.50 20.50 21.30 20.60 19.10 19.80 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.01 0.70 -5.29 22 306 2009-09-11 06:41:13 2007-06-15 09:23:13 4 5 268 0 216 306 2 281.80 22 80.09 CHANGED sYpLLplsscllcplc......pu....p..sLtl+Gt..................s-cu..VLCocs+TaslKthcpSNolLLh.thh.s..................................p..tst....phphhshspphhElpp..h.psphcplcp..lthshasG..cstt..ch.t.............ashcpLlsssQsScpEhpptLpp.lsuhpl-.GthplLs.cahh+hLshllthlsspuhsh..s....clshpsshpslt.s...........hsppllcsslpha....upp.s-..................hapLctpplsphhuhtlLpphh.........phplc-...........Fh.pWppplP.......shshchchLcGhsllp...........pstsp...plphls.psL.Ppshp-RFppLFph+p.cWsh--lpPaIc ...............................................................................................hpLlcL.ssplhp.lp......................ps.........p.........pLhl+ut.........................................................s.-ps......VLCops.cTapl+tspp..SNslllh..h..stt................................................................t...............thphhuhh.pphhElpp.......h....tsphp...tlt.h..h..s.apu..p........ptt.p.t......................................................hshppLhpph.sSptEh.pthp.p...htu...h.............h......p....G......h................hp.h.lp.ph.hphl..phlht.h.t.ps...hth..p.....................p.hshpphhptlt....................................................spthhpth.l.p.ha........sp...t................................................................t.........hhplptptl...sphhu..h.Lpth......................ththp-..........................Fht.Wppt.lPt...............th.sp.hp....Lp.Ghhh................................................t.st.t..hl.hh..ppL.s..p.....ttRht.LFphp..pWp.t-htPal................................................................... 0 74 121 178 +9557 PF09725 Fra10Ac1 Folate-sensitive fragile site protein Fra10Ac1 KOGs, Finn RD, Coggill PC anon KOGs (KOG1297) Family This entry represents the full-length proteins in which, in higher eukaryotes, the nested domain EDSLL lies. Fra10Ac1 is a highly conserved protein, of unknown function that is nuclear and highly expressed in brain [1]. 20.90 20.90 21.50 26.10 19.20 18.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.75 0.71 -4.01 11 188 2009-01-15 18:05:59 2007-06-15 10:13:15 4 8 151 0 128 192 2 114.90 55 41.83 CHANGED +TDhDlLKccH+Fl+D----sc.........sWtcpLA+KYY-KLaKEYsIhDLS+Y......+csKl....................GhRWRTEsEVlsGKGphhCGs++C.....................................................pcspsLcoaElsFsYhEpGccKsuLVKlRlC.cCuhKLNY .......................................+TDhDll+EsHRFlhs-E..-.-...-.......................oWEpRLAK+YYDKLFKEYCIADLS+.Y......K-sKh....................GhRWRsEcEVlsGKGQFhCGN++.C..........................................................................................................s.c..c-sL+SaEVNFuYhE.cGEp+sALVKlRLC.cCohKLNa.................................... 0 58 75 104 +9558 PF09726 Macoilin Transmembrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1821) Family This entry is a highly conserved protein present in eukaryotes. 32.00 32.00 32.40 32.70 31.60 31.20 hmmbuild -o /dev/null HMM SEED 697 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.29 0.70 -6.25 5 207 2009-01-15 18:05:59 2007-06-15 10:14:40 4 6 88 0 126 181 0 441.60 33 90.48 CHANGED KRRNs-CGK..RRPlKRs.KhAEGlsuSTFLYIKFLllWshVLLADFlLEFRFEYLWPFWLLIRSVYDSFKYQGLAFSVFFVCIAlTSDlICLLFIPlQWLFFAASTYVWVQYVWHTDKGICLPTVSLWILFVYIEAAIRaKDLKN.PH.lDLCRPFAAHCIGYPVVTLGFGFKSYVSYRhRLRKQREVQKEN-FYMQLLQQALPKE..s+sLpss-KEsoEsoocuhToscsllsspsstssssssKlSs.shsplthp-pupcptp...sssppp+slupsNs...pspsl.......suslQ-lEhhps+oNuS+hoss.hsGsspshsh+cc......opuSSssptossSKsSsosptstts.uNSSP+SHsss.............................................................................................................NGSVsSSSsscN.........-cKpKpuupssutth..c..h...ssscsVcsshssNp...lSoPsAls.....................RLEsDIKKL+AELssuRQsEsELRsQIS.LoStERShKS-LsQL+KENDhLQTKlpuhVSuKQKDKQolQolEKRLKuEp-uRsslEKQLsEEKKRKKpEE-TAARAsA.AtsoRt.EsuESLKp+K+DLEsEIcKLpp-lKlKEEppcsLEKcs....QEl+KY......+E.SpK-TEVLMSALuAMQDKNppLENSLSAETRlKLDLFSALGDsKRQLEIApGuIapREpEIlDLKuKIA-llAVMPs.shu...SplpssTPHYSupFL-ssPuutsosuSsY.sL ..........................................................................................................................................................................................................................................................hh.hDhhh.hRhEhhaPhWhhhtshhpohphp............sl.......h.........hshhFhhhshh.shhChhhlPhphlhhhuoshVhhthhhp..s...........ptG.......................................husH.lGas.h.hsh.hp.hht...hh.phphtltttN..hhp.h.puLP..................................................................................................................................................................................................t...............t..........t.................................................................................................................tt...t...t....t............................................................................................................................................tts.........s.s.s.....p................ppt.ttt................................tt...t.........t.t...........................................................................................p...Lct-lcc..h+s-lpt.+p.Ep-h+tp..............h.s.tth..t.+.phtthptcs-........lpp..........+htph.p.+pt-+.shtthE++.......l......ttEtct+..........h-tpL.pp+pt+h...........tt.........t.....................................................ptsp.......h..+tc..pph-...t-hpplphchp.t-pth..h..ch......................................ttht.............................pc.......t.-.-hLhtsLtshp-Ks.pLEpsLuuEsRlK.-LFpsLGts++phc..t.................................................................................................................................................................................. 0 47 58 97 +9559 PF09727 CortBP2 Cortactin-binding protein-2 KOGs, Finn RD, Coggill PC anon KOGs (KOG1103) Family This entry is the first approximately 250 residues of cortactin-binding protein 2. In addition to being a positional candidate for autism this protein is expressed at highest levels in the brain in humans. The human protein has six associated ankyrin repeat domains Pfam:PF00023 towards the C-terminus which act as protein-protein interaction domains [1]. 25.00 25.00 27.50 27.00 22.90 23.30 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.28 0.71 -4.80 8 414 2009-01-15 18:05:59 2007-06-15 10:15:04 4 12 99 0 190 349 0 128.90 39 16.96 CHANGED suc+Epcs-sLSKc-LhhLLSlMEGELQARD-VIchLKA-Rp-lhl.EApYGphs.pcPhtALQRDuhtuuucsppccl...YpsPlupL-+lht+pKcoQcRMhsQLlsAE+pp++hlhcL-t-+ct+hcahppuD-hhshLEpctp..Lp..LE.EKupppptEpE+cKhss+Lc-EhsKhKShsLMLVcEppph ..................................phsp.-Lh.LhuhhEGElpAR..-.VIthL.+..........sp+hc..l..Et.YG..h.p..........p......csh.uLpRDh.....................h..t.......t....t......................t.p..sl....................hppPhs.Lchl.tpp+ph.cRh.tQLhhuEppp+p.......................................................p..p...lE.E+...tthEpEppKh.tpLc-Ehs...+.KphshhLltEpp...................... 0 20 35 91 +9560 PF09728 Taxilin Myosin-like coiled-coil protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1850) Family Taxilin contains an extraordinarily long coiled-coil domain in its C-terminal half and is ubiquitously expressed. It is a novel binding partner of several syntaxin family members and is possibly involved in Ca2+-dependent exocytosis in neuroendocrine cells [1]. Gamma-taxilin, described as leucine zipper protein Factor Inhibiting ATF4-mediated Transcription (FIAT), localises to the nucleus in osteoblasts and dimerises with ATF4 to form inactive dimers, thus inhibiting ATF4-mediated transcription [2]. 27.40 27.40 27.80 27.40 27.20 27.30 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.12 0.70 -5.34 18 400 2009-01-15 18:05:59 2007-06-15 10:17:26 4 7 207 0 247 384 2 263.30 35 60.93 CHANGED +chsKtsp.LhpsLsphss.t-KlptlhK+hsEhlp-p+php+phphhpK+hptlp+EK-plpsEps...Kshhs+sKLEsLCRELQ+cN.+pLK...........EEshpptcEE-c+RpEhsp+FQssLpDIQsph-ppsscspc...Lpc-NpcLscKlKpll-QYE...........lREpph-+lhKp+-LphQLs-AKLpptppthppppp+pcppp-hhhpph.....plpphpcpEppL+.........................pQLslYs-KFc-FQsoLsKSN-lFsTFKpEMEKMoKKhK+LEKEstsa+s+aEpsNpsll-MsEE+ph.......pp+chcphpp+lp+LEpLCRuLQsER ..............................................................................t...t+....l.p.l.s....p....hps....p+lt....hl.h.K+hs-Llp-h+p.....p+p.hp.hhp..K+ttpl.p-+cp........l.puEhs....................+sl...hs+sKLEsLCRELQ+cN.+pl..K...........................-Eshph....scc-ccpRpEh...ss+F...pssLp................-Ipsph-..ppspc...shc..............hp-..N.t........Lt.....pKLKplh-QY-...........................lR.EpHhcc..lh.+...p+-L.p.pLhpAKlpptpph.hcptpp+ppp.pp......................................psp.hpppEspL+....................................................t.QLslYs-KFc.p...........hp.....sTLsKSNElFtoF+pEM-cMoKKhK+LEKEshhh+p+aEpsNt.......sllphhEE+ph...........................ppcchcthphchp+LEpLCRthQtpt.................................................... 0 73 114 174 +9561 PF09729 Gti1_Pac2 Gti1/Pac2 family COGs, Finn RD, Bateman A anon COGs (COG5037) Family In S. pombe the gti1 protein promotes the onset of gluconate uptake upon glucose starvation [1]. In S. pombe the Pac2 protein controls the onset of sexual development, by inhibiting the expression of ste11, in a pathway that is independent of the cAMP cascade [2]. 20.40 20.40 22.00 20.80 19.80 20.30 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.12 0.71 -4.38 32 332 2009-01-15 18:05:59 2007-06-15 15:36:41 4 3 138 0 260 349 0 134.00 35 33.49 CHANGED Ta.pGalcospDALllhcAshp.......GhLs..plsRR.p-cERs.hIpSGsVFVapEppoGhKRWTDGhsWSPSRl..GpFLlYRELsKpp...........................t...tttt...ps...tt.tp.s.h..........................................................................................................................................................pshtaK.sG...LlKKThSlpsp.......................tspphHlISYYsh ...........................................oa.GalpostDAlllhcA.sh............GhL...pltRR.p-pERt.hIpSGsVFVa..pE.............p.........p..u..........Gh+RWTDGhsWSsS.Rl...G.s.FLhYREh-tth..............................................................................................................................................................................................................................................................................................t...h.h+.sG..LhKpohSlphp..................phHllSYYt.h........................................................................................................................................... 1 105 173 236 +9562 PF09730 BicD Microtubule-associated protein Bicaudal-D KOGs, Finn RD, Coggill PC anon KOGs (KOG0999) Family BicD proteins consist of three coiled-coiled domains and are involved in dynein-mediated minus end-directed transport from the Golgi apparatus to the endoplasmic reticulum (ER). For full functioning they bind with GSK-3beta Pfam:PF05350 to maintain the anchoring of microtubules to the centromere. It appears that amino-acid residues 437-617 of BicD and the kinase activity of GSK-3 are necessary for the formation of a complex between BicD and GSK-3beta in intact cells [1]. 26.00 26.00 26.30 27.10 25.40 25.90 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.61 0.70 -13.47 0.70 -6.18 11 255 2009-01-15 18:05:59 2007-06-19 14:23:25 4 5 95 0 150 256 0 528.20 44 84.39 CHANGED upuhSsp++sAtDGEopEEoLLQESAuKEuhYtt+lLELQsELKQsRshlsNspuENERLsslsp-l+-s.................spslElpRsRhR-ElKEYKhREsRLLQDYoELEEENISLQKQVSsL+psQVEFEGLKHEI+RLEEEsElLNSQLE-AhRLK-IAE+QLEEALEoLKsEREQKssLR+ELspahshssh..hushplpl-tl..phpc-.ptsspss...........sDs-ch.sut.pss..ht.................phtsstpuElh..sPssu.....VsDLhSELplSElQKLKQQLhQhEREKssLhusLpEsQpQLcpupsslsEpp-+VspLTpplsAl+p.............................ttKctpss.-ppcspsspcs...st.aElDhpu.ElLpCKh+sAlsEhscL....+pELKsL+scapphpp..php--+s+hcs-lQsLscclpphE+su+pc.tc..........................chucLEpElpthsclAsEopGsLssAQDELVsFSEELApLYHHVChsNNETPsRVhLDaY+cuthhtt.t......st..scth.sshh.phhh.........tEs.t...t.....................spuPussu.osshSPs............tDhR..p-PhNIhNLlAlIRDQIKHLQpAVDRohpLuRQRsA....utcLushsD+DpEuhhEEILKLKSLLSTKREQIATLRsVLKANKQ......TAElALuNLKSKYEsEKuMVoETMhKLRNELKuLKEDAATFSSLRAMFAoRCDEYVTQLDEMQRQLAAAEDEKKTLNSLLRMAIQQKLALTQRLEDLEh ........................................p..o.p+hsstsG.ppE-sLlpEoAs+Et.h.tplh-hps-l+p...+t.htpspsEt-Rh.t..pchtc..................pp.hEhpRhph...+s-l+EhKhREsRLLpDYoELEEENIoLQK.VSsL+ps.......Q......VEaE....GlKHEl+RhpEEs.hLpuQlE-hhcLKcIuE+QLEEALEsLpsEREpK.sL+KELsphhshps........s..phth..sl...pth.....tpp..........p...........s-.t...tsh.t............................t...pst.h...P........hsDLhSElplsElpKLcpQL.Qh.E.pEKs.LhtsL...............p-sQppLcpspstLp..p..pclttLs.t.p.lpuhpt.................................t.p...ts.....-ttpttsttpp................c...L...ts+hp.suhs-hhpL....ptclcsL+tphpt..t...pht-.pp..s..........phcschp.hppplt.h...Ect.p.p.tt...........................thtphcp-LpthsphAsEspspLssAQ-ELlshSEELApLYHHVChsN...sET.PsR.V..hLDah+psps.htt.........................p.pt..h...s.sh...t.h..................................t........................ptt.p.s.p....s...ush.......................tD.p...p-sh.....slhsh.shlpDQI+H.Lpt.AV-+ohplu+p+hs........s.phssh.cc-tEth.Epll.KLKSLLSTKREQIsTLRsVLKuNKQ......TAE..lALuNLKsKYEsEKshVoETMhKLRNELKsLKEDAATFSSLRAMFAs..RC-EYlTQ............lDEhQRQLsAAE-.......EKKTLNpLLRhAIQQKLALTQRLEpLEh...................................................................................... 0 43 60 108 +9563 PF09731 Mitofilin Mitochondrial inner membrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1854) Family Mitofilin controls mitochondrial cristae morphology. Mitofilin is enriched in the narrow space between the inner boundary and the outer membranes, where it forms a homotypic interaction and assembles into a large multimeric protein complex [1]. The first 78 amino acids contain a typical amino-terminal-cleavable mitochondrial presequence rich in positive-charged and hydroxylated residues and a membrane anchor domain. In addition, it has three centrally located coiled coil domains [2]. 27.40 27.40 27.50 27.80 27.30 27.30 hmmbuild -o /dev/null HMM SEED 582 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.81 0.70 -5.86 33 516 2009-01-15 18:05:59 2007-06-19 14:34:14 4 7 348 0 280 469 33 425.10 19 80.05 CHANGED pssht+...hlhhhsLhsslsaGGslhhuphs-cFp-hhpct.lPhu......Ecllphh-pts.......................htt...phhpth.t...............................................sshspcstsspppssshttpss....................................t.pphtpshcthtpphlppstpthttst..........p.hpttsp.h..........................................................................................t......................h.tht.hssssstl..pplhshhscllsslsspsh...tschsshlscsppplsplspclsclppphcpclppplcppppc.hpphspphhtphctt.t..p.ppphppchppccpclccphcpcLcpcLcppppshpp+lpstlt.tplEhp+pFpc.lcc+lppER...........sGRLupLscLpup.lpsLEphstu.......hspshspscpsppLthulpulcusLcsuss.........pPlppclssLpch............................usc.............Dpllssulsulsstuhpc...GlhopspLhsRF.ppltsclR+sSLl..PssAGlhuHhsShlhStllFc.Kp....s.........supDl-olLuRscphLccG-LDtAsc-hss.L+G.Wu+...........cLApDWls-uR+pLElcphlcllpu-up ..........................................................................................................................................................................................................................t.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t........................th.......t......................h.th..t..th.t.t..t...p....................t.t...ltt...................t.........t.t...............................t...p....tt.h............t.tt.h.pt.php......httttt..........tt.htp........t.t...........tt.h...t..pp......................tthh.pl.tph.sp.l................ptlpp....hhtt.................hs.......pt..s.phptlhhuspu..l.ptslppu......................hshtttlpslpt..........................................ssp.........s.hht......shltuls......uhpt...Gl.o.tpLhs+........F..tltp....hpps.uhl....tp.....sulhthhhuhl...Sh.lhhp...tt................................s..............ps.s....s..sllscsphhlp.pGcLc.Ahc.hsp.LpG..u+............tlu...p...-WlppsRhhl-spphhphl.t............................................................................................................................................................................................... 0 93 156 236 +9564 PF09732 CactinC_cactus Cactin; Cactus-binding C-terminus of cactin protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2370) Domain CactinC_cactus is the C-terminal 200 residues of the cactin protein which are necessary for the association of cactin with IkappaB-cactus as one of the intracellular members of the Rel complex. The Rel (NF-kappaB) pathway is conserved in invertebrates and vertebrates. In mammals, it controls the activities of the immune and inflammatory response genes as well as viral genes, and is critical for cell growth and survival. In Drosophila, the Rel pathway functions in the innate cellular and humoral immune response, in muscle development, and in the establishment of dorsal-ventral polarity in the early embryo [1]. Most members of the family also have a Cactin_mid domain Pfam:PF10312 further upstream. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.77 0.71 -4.53 19 306 2009-01-15 18:05:59 2007-06-19 14:37:51 4 10 240 0 225 312 5 124.40 58 22.10 CHANGED hscchcs...+KP+aFNRV+oGa-WNKYNpTHYD.h-NPPPKhVQGYKFNIFYP-L...hspsp..sPpYplps..........sts.ss-hsll+FpuG.....................PPYcDIAF+IVs+E...W-asp+t...GF+ssFcp..............G.lLpLaFsF..........K+hpYRR .............................Wt-KYRPRKPRYFNRV+TGaEWNKYNQTHYD.hDN.PPPKlVQGYKFNIFYPDL...IDKsp..sPpYplpp......................sss..scDhsllRFpAG.....................PPYEDIAF+IVs+E...W-YStK+...GF+s.p.F.c.p.G..Ihp..LaFpFK+hhYR+........................................ 1 87 133 186 +9565 PF09733 VEFS-Box VEFS-Box of polycomb protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2350) Family The VEFS-Box (VRN2-EMF2-FIS2-Su(z)12) box is the C-terminal region of these proteins, characterised by an acidic cluster and a tryptophan/methionine-rich sequence, the acidic-W/M domain [1]. Some of these sequences are associated with a zinc-finger domain about 100 residues towards the N-terminus. This protein is one of the polycomb cluster of proteins which control HOX gene transcription as it functions in heterochromatin-mediated repression [2]. 21.10 21.10 22.40 21.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.71 0.71 -4.53 11 298 2009-01-15 18:05:59 2007-06-19 15:00:15 4 4 186 0 137 263 2 114.50 39 22.98 CHANGED tcclsuEpscsRshhh.h+pRpFaHScpsQPhphcpl.SDpDSEDEsD..c-hhchc-pphL--FsDVsc.sEKclM+LWN.FV+KQpllADuHlPWACEuFs+hHGp-Llps.sLhhsap......lahIcLasaGLlsucohspssphL ...................................................h..pp.h.........p.DS-sE.D..st.hhp.cp..hp.lp-FsDVsc.sEKElM+hWNha....V...h..K....p......s....h.....luDsp...lPhACphFlp.h...+GpcLlc+..s...LhRsFh......LHhssLa..DauLlsshsl.pssph............................... 0 42 73 107 +9566 PF09734 Tau95 RNA polymerase III transcription factor (TF)IIIC subunit KOGs, Finn RD, Coggill PC anon KOGs (KOG2473) Family TFIIIC1 is a multisubunit DNA binding factor that serves as a dynamic platform for assembly of pre-initiation complexes on class III genes. This entry represents the tau 95 subunit which holds a key position in TFIIIC, exerting both upstream and downstream influence on the TFIIIC-DNA complex by rendering the complex more stable. Once bound to tDNA-intragenic promoter elements, TFIIIC directs the assembly of TFIIIB on the DNA, which in turn recruits the RNA polymerase III (pol III) and activates multiple rounds of transcription. 27.70 27.70 27.90 28.10 26.60 27.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.23 0.70 -5.10 52 385 2009-01-15 18:05:59 2007-06-19 15:02:08 4 9 291 0 250 378 2 276.50 25 54.86 CHANGED clsslEhPthV....psls+..................ulpslGG.ptlsp.....................slpstptp............................................................................................................................................lcLph.....p................spDsh..s+Pl..husp.....p.ssslLL+lsh...++stptpp....t................................................................hpschlGhlspsapFc...shuDFpahs......s..htp.........chppphhsh................pthp.hp.p.s..............................................................................................................................................................................................................................................hpl.......hPP....................................................................shFSphch.PasYt..................Ycpsstspt......................................................................................................................................................................................ttsp.t..........hsspppttphththhshs.hs.lPsp........................th.t.tthp.h.pptlphlcpLF-cRPlWs+psLh..splsp........pphtlK...............pslshluYhapsGPWRpshl+aGaDPRpcsp.thYQol.FR ...........................................................................................................................................................hhslchPhhl....ps.sp..................hltshGG.ttlpp..................................................hhtt.t..............................................................................................................................tlpLhh..p.....................spssh....s+Ph...hupp.......h.spsll..L+lph...+ppt...t......................................................................................................................................thphchlGhlsphapFp.................shsDFpahs................tp....phttphhth..................................h..h.t..t.........t..........................................................................................................................................................................................................................................h.h.....lPP..............................................................................................................................................................................................shFophsh.Phpah..................ac.ps..p...........................................................................................................................................................................................tt...........................h.h..thpp..tt........h.h.h...............s.hPpp......................................thp.h...p.ph..t.lp..p..LF-.....pRPlWo+pulhspls.............p.ptl..K.......................................................hhlshluYhhh.s.GPWRphhl+aGaDPR...p...ss.ps+hYQsl.FR................................................................................................................................ 0 91 143 211 +9567 PF09735 Nckap1 Membrane-associated apoptosis protein KOGs, Finn RD, Coggill PC anon KOGs (KOG1917) Family Expression of this protein was found to be markedly reduced in patients with Alzheimer's disease [1]. It is involved in the regulation of actin polymerisation in the brain as part of a WAVE2 signalling complex [2]. 20.10 20.10 20.10 20.70 19.90 20.00 hmmbuild -o /dev/null HMM SEED 1117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.88 0.70 -13.74 0.70 -7.09 8 304 2009-01-15 18:05:59 2007-06-19 15:03:46 4 9 127 1 181 296 1 741.10 36 92.42 CHANGED spQpKlAEKLsILN-RG.GhLsRlYNIKKsCuDsKs+PsFLs-KshEsulKaIl+KFPslDs+ss..pLsslpcppsEIl+uLu.YYaoFVDlhEF+Dplh-LLsTlDusQlahDIslNaDlT+uYLDLlVTaVolhlLLSRlEDRKslLGlYNtAHEMpHGpSDsuF.sRLGQMIl-YDpPLKKLpE-FsPHo+tlosALpSLphlahRRNpou-pWRssQlLSLlusP.uslLssupo-ThACEYLSl-sh-RWIlhGallCHspL.ssssl.clWplALQsuLsloLFRDE.............................slhlHcshpshlpuhKGYuKRluD......................l+Es+EpAlupuuslH+ERRpaLRsAl+ELsLlLsDQPGLLGPKhLhVFhALuhARDEVlWLlRHsspss..pp+s+o...sEDhsDspIuELLFhMEcLRuLVRKYhuVlQRYalQYLSuaDAlsLspllQslulCsEDESlILoSF....lspLsSLssKQVEssEhF..DFouLRLDWFRLQAaTSsu+SulpLpcpc-.........LuchMNshVFHoKhVDpL-EhLsEsSDLShaCFYs+hhEchFtpsLEtP.upsRYsIAFPLlCuHFspCsH-hCPEE+s+Isc+uLuhsNpFLEEhAKpscslIsplCuE..ptsLupQLLPcHsA.+hloputscKspt..sps+KG.ts-hptPGtESaRKsRpslTshDKLphsLTELChulNas.slsVaEHsFsPREYLsspLEs+Fs+ulVuMstY.s.ssp-lsRPSElLuul+AYMollQolEpalulDloRlhpslLLQQT............QPtD.SpGcsTIsslYTNWYL-sLLRcsSsGsIVauPshpAFlShssE.thsFpAEEaSDlsELRALuELlGPYGhKhLsEpLMWHluSQVsELKKLVspN+DlLhtlRosFcKP-pMt..ull+pLs.............ssDsVLpRMhIIG.ILoFRsLhQEALccVLcp+lPFLhSsIcslp-plP.sssDhKshhpltElASAAGlsCclDPuLVsAltupKuc...spE.-acsuCLLLVFVAVSlPpLAtsssStYssph-GasNNlHCLA+AIsslsuA...............................LFol.tpssIEo+hKEFLsLASSSLLclGpEoDKsp......l+sREolhLLLD.lVpcSPaLThDhLESCFPYsLLRNAY+pVYcpsh ......................................................................................................................................................................................t...p...thh...s.h.........th..h.......t.t.t.hht.ht.ah..hhD.hh.hp..-thhphl.thts.......h.h..t.s..hh..ahpLhh.hh...th.hh.tp........c.+hlhthashha....h.ptt.-..th..+...................hsphh..h.p.....Ph+.hh.pch.......ht.......h....t....hl.uh..hh....t.p...p.h+t.thhs.ht......hh.ss.t...h...........h.....hh..h.t.h.pWhhh......shhh..t.h..t.................p..........h....hh..sLpts.hl.lh+-p..........................................................................hh.hHp..p..hh.s...h...h......sK..chtc...................................................lp....-.hp.sh..pss.hHtp...+R.hl.+.tltph..hht...cpPsLLuPp.....h......hh.h.uLsh.psEl.WhhpH.t......................t...........-..-....pls.Ll.hhtplpthltca..hhp.t.....Yhl..................palst.ss..lp.hht.s.........th.h..s.....-.shhh.psh....hp.ht..tls...p...........pp....................phpshRh.cWhpl...hhho.spsshtl.tc..p.......................lhp.hs.hh.aophl.Dth.p.L.cpusL.ph.hh.......aa..p..hp...t.Ft.....s.h..s..s..pa.hsa.hlsspF..psh.p...hs.PE.E................h.t...ltpculthsp.hl-phhtthpshl....lssc.....hthttp.LhPppsA...h...tt...th...............tt.....h....h.PG.E.Sh..csp...lp.h-thh.tLspLs.shs.h.sh.Vh.pashh.pEYhpppl..pht...c.lh.hh..............h....h.........s.tpl..........+PS.l.s.lptahshlp.htpalthDhophhppsLLpps.............................p.h-.tpst.ohsthhspWY.-sll+psS.......sstlhh....psF......hsh..............sa.s.-paoDhpEhpuLsclhGsYGhchLsctLh.Hhss.l.plcphlh..pNh-hL..th..tsshpps-.ht....t.h.p.l...............sh-sllphhh..hG.hlsF+thht-uh.tlhp.phPhlhu.ltsh.p.hs....s.h.p.........h.p.husssGl...sc.h.Dsths.sl.t.hst..........................t..hshhLh...al.hsuh.h.s.......o..ap.thtuatNNhHChsh...s...l.tl.su................................h.ph...........pt..s..hp..pth.....p..Flhhuu..lLp.....pp.p...p............................h.p..........lhhc.hhp.s.hls.s.hE.hhPashhpt....h....h....................................... 0 66 98 139 +9568 PF09736 Bud13 DUF2050; Pre-mRNA-splicing factor of RES complex KOGs, Finn RD, Coggill PC anon KOGs (KOG2654) Family This entry is characterised by proteins with alternating conserved and low-complexity regions. Bud13 together with Snu17p and a newly identified factor, Pml1p/Ylr016c, form a novel trimeric complex. called The RES complex, pre-mRNA retention and splicing complex. Subunits of this complex are not essential for viability of yeasts but they are required for efficient splicing in vitro and in vivo. Furthermore, inactivation of this complex causes pre-mRNA leakage from the nucleus. Bud13 contains a unique, phylogenetically conserved C-terminal region of unknown function [1]. 20.60 20.60 22.00 21.10 20.10 20.10 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.38 0.71 -3.93 44 337 2009-01-15 18:05:59 2007-06-19 15:04:41 4 3 291 0 240 323 0 136.80 36 33.20 CHANGED -TlaR.Dp.oG+hI.shcppppcpcpcp........................pccpcccpp.ttphspG.lQtpppcpphpchpcttptsh....sch.pDp-hsp.h+pppRhsDPhspa......hppccppss..............thht+stY.............pGssss.NRFsItPGaRWDGVDRSNGFEpchFptpsc ..................................................................................................................cTlaRDt.sG++h..s.h.p.cc.t.ctccpt..........................pcptccc...ph.htphs+G.sQpcppppphcchtcttptPh....ARhh-Dp-h.s.p.L+pppRhsDPMspa.............lpcpctpts................................tptht+stY..............pGssPP.NRasIhPGaRWDGVDRSNGFEpchFtths.t.......................... 0 82 130 197 +9569 PF09737 Det1 De-etiolated protein 1 Det1 KOGs, Finn RD, Coggill PC anon KOGs (KOG2558) Family This is the C-terminal conserved 400 residues of Det1 proteins of approximately 550 amino acids [1]. Det1 (de-etiolated-1) is an essential negative regulator of plant light responses, and it is a component of the Arabidopsis CDD complex containing DDB1 and COP10 ubiquitin E2 variant. Mammalian Det1 forms stable DDD-E2 complexes, consisting of DDB1, DDA1 (DET1, DDB1 Associated 1), and a member of the UBE2E group of canonical ubiquitin conjugating enzymes and modulates Cul4A function [2]. 25.00 25.00 28.70 26.30 18.00 24.20 hmmbuild -o /dev/null HMM SEED 407 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.36 0.70 -5.71 8 179 2009-01-15 18:05:59 2007-06-19 15:06:35 4 6 119 0 111 170 0 314.00 37 70.11 CHANGED REhSLFhp-s+asIluuuohlsc-.shP.......pah-lacss-ulpss..sslEcYThallDL+pGcloDu+sF+sDpIlLuHNpGlaLYsshLAILSlpaQsIaIapVs.-GcFlchRTIGcFCp--Dthhlsps..........pthsssshcpshh.P.....hIsuLKpRlLsFLaRpAcspuussupt.....R+FY+pF-pacpLhMWKMQLLDcchLLIKY....uopDlssL+ss-ss..psShFVlYsIhspplluVYpNsSscLLpLaEpFsDpFRsushtp.t.sFtsSsouNsaup.hppphKpThl..NtchuuppcAs+RlhuuLPlSuQSYSuSPYLDhuLFSYDDKaVSshERPKsCu-aPI+Fhu.RcouLLKF+lpAGspspssPts.sRRLVAFlFHPh-PFAlSVQ+s..stsYlVNFHhR+ ..............................................+-h.Lahcssphslhuoush.....pp....s.s.................................sstulpss.....sslEchohallcL.........p..sG..hlhDphhapsDhl.Lu...HN.GlaLa.....pshLullSlp.QsIalh.plt....p.G.p.hlpl+.sIGtaCh-DD.hhlps.........................t....tt..h.s..................hlsulK.pRlLsala+ph.ptts....s.s.......+cFa.hFpth.pLhhWKh.QhLDpp+LhlKa...........sS.Dsssh.Rss.........-............psuFFsVYNh.oo-lluhap.NoSp-LhpLFEpFsDhF+sss.ps.h.pF.sStSss.aAhp...pp......hK........stphusptphs++hLsp.LPhSsQSh..SsSPYhDhsLFpYD-KhlS.sh-R.+tss-pPI+Fhs..R.p.sshlKFcltsu............tt.....s.t.s+.+lsuFhFHPh.PhulSlQps............hlshHh+p......................................... 0 38 59 86 +9570 PF09738 DUF2051 Double stranded RNA binding protein (DUF2051) KOGs, Finn RD, Coggill PC anon KOGs (KOG2010) Family This is a novel protein identified as interacting with the leucine-rich repeat domain of human flightless-I, FliI protein. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.13 0.70 -5.13 7 400 2009-01-15 18:05:59 2007-06-19 15:58:51 4 3 88 0 181 356 0 199.10 31 66.18 CHANGED AEARLAA+RtARAEAR-IRM+ELERQQKE..................-pp.D+.a....scpsSpst.ssl...s..............sshsGo..SSRRuStD...Sh-p-s.ShR-l........................+cpLtEVEE+a+KAMloNAQLDNEKsshhYpVDhLKDpLEEhEEphAphpREhcEKp+-hct.K+shshLphphp.l+ppLppRDpLIpcpGLllls..ssNu-su-p.s.......su.shlptEstplLpss...tGsLDV+L+Khs....sEpppL.tplpp.ptpLpth..ptp..h.utpsGsh...........p.........sQ+-uNK.Io-hKhKL.sKu.EpElsNh ............................................................................................................................................................................................................pp...cpth.........t..op......t.t......................................ss.tst......uS+...R....us..t.s......t....t...................................................................................................-+tt...plchhpc.h.p.pp.h....+p.......-.K.............p...-.h.E.+.pKchhshlp.pht-l+-tl...pptcchl.............p......cp...............................................................................................................................................................................h...................................................... 0 26 42 93 +9571 PF09739 MCM_bind DUF2044; Mini-chromosome maintenance replisome factor KOGs, Finn RD, Coggill PC anon KOGs (KOG2545) Family This entry is of proteins of approximately 600 residues in length containing alternating regions of conservation and low complexity. The Arabidopsis protein is a replisome factor found to bind with the mini-chromosome maintenance, MCM-binding, complex and is crucial for efficient DNA replication. 25.00 25.00 27.70 26.20 17.10 18.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.72 0.71 -4.57 37 164 2009-01-15 18:05:59 2007-06-19 16:02:28 4 7 136 0 117 167 2 120.10 29 19.86 CHANGED cphLpsssshp...plP.lNss.slctlps........ssLVRFRsMlQDh.hssEhYlusYcstst.......hcssKYp.Dhhphs.ttth..........pshhhERpshYsVPlPGpssWstppppt.........phtstspssssppKR ..........................phLpppsshp.lPsLsps...slchlps.........soLVRFRsMlQD...M.hsPEaYhusYcshsppst.....hh+tuKYc.Dshpssstpph..ts......psshhERpshas.VPVPGpssWs+pt..s.............t................................................ 0 51 67 96 +9572 PF09740 DUF2043 Uncharacterized conserved protein (DUF2043) KOGs, Finn RD, Coggill PC anon KOGs (KOG2374) Family This is a 100 residue conserved region of a family of proteins found from fungi to humans. This region contains three conserved Cysteines and a motif of {CP}{y/l}{HG}. 20.10 20.10 25.70 21.70 19.60 19.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.91 0.72 -10.64 0.72 -3.96 9 103 2009-01-15 18:05:59 2007-06-19 16:10:46 4 2 75 0 68 99 1 104.60 44 16.58 CHANGED ths+APlVPaGhDLpYWGpEp.sssp.hp.sspH+FWt.s-.EppVsusclsphhppRplTatGchcslp+h.CpA.h.sGtLCpRpDhhpCPFHG+IlPRDDpGpPlppED .........................................t.ucAPVlPaGhDLpYWGpcp.ss.......suph...hp....sspHRFWtss-s...-p........plssscluEhhppRpIoFsGchEPlp+h.C+APh.s.....s.Gp..LCpRpD+h.K.....CPFHGpIIPRD-.p.GpPls.p.............. 1 23 34 50 +9573 PF09741 DUF2045 Uncharacterized conserved protein (DUF2045) KOGs, Finn RD, Coggill PC anon KOGs (KOG2465) Family This entry is the conserved 250 residues of proteins of approximately 450 amino acids. It contains several highly conserved motifs including a CVxLxxxD motif.The function is unknown. 25.00 25.00 25.80 25.10 21.20 22.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.51 0.70 -5.23 3 168 2009-01-15 18:05:59 2007-06-19 16:11:30 4 8 105 0 103 160 1 208.10 41 49.25 CHANGED V-pcosFWTslFssYFlcphEsR+p.D.DDLLFFVR+KLupuSS......RshsEsEsElEVY..RRDS+KLPtLGDPD...VDWEESVYLNLIlHphDYTVTlAICTRsusK-........Lpll+K+SQpVYASPSR..++MDSKGEsEcI.oYPcICFMVDsFDEVFcDllVsDGE.MVCVELVAoD......................+ssosQGVIFpGSIRY-ALKKVYDuRpSplGS+hAQ+MSFGaap....po+sEFVRMKGPQGKGHAEMAVS+Vss ..........................................h...t.haWp.hFspaFh...t....ph....tsp.p.............DDhLFaV+.p.hs.htu............ht...s..p...psplpVa...RRp.u.KLPt.ls-ss...................lDWEcolhLNLlhpp......h.sYhlTsAlCo+sssts....................lp.h.p+hoppVaASPS+..+.MDo.K.G................-..p..pl..oYPpIhFhlDsF--sFsch.hl.t-.sE.hhCVpLsApD.....................................................+.psshp.sslF.G.lpYpsl+.csYDs....phS...........hu..u................h.h..S.hGh....................s.ph-hlh...M+GPpG.+GcsEhAVots..s............................................. 0 35 50 76 +9574 PF09742 Dymeclin Dyggve-Melchior-Clausen syndrome protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2225) Family Dymeclin (Dyggve-Melchior-Clausen syndrome protein) contains a large number of leucine and isoleucine residues and a total of 17 repeated dileucine motifs. It is characteristically about 700 residues long and present in plants and animals. Mutations in the gene coding for this protein in humans give rise to the disorder Dyggve-Melchior-Clausen syndrome (DMC, MIM 223800) which is an autosomal-recessive disorder characterised by the association of a spondylo-epi-metaphyseal dysplasia and mental retardation [1]. DYM transcripts are widely expressed throughout human development and Dymeclin is not an integral membrane protein of the ER, but rather a peripheral membrane protein dynamically associated with the Golgi apparatus [2]. 25.00 25.00 27.20 27.20 24.40 24.30 hmmbuild -o /dev/null HMM SEED 678 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -13.23 0.70 -6.42 25 291 2012-10-01 19:21:38 2007-06-19 16:27:57 4 7 170 0 171 566 11 481.00 28 87.39 CHANGED MGss.S...................t..chphpph.......hthhssppsss.ss-sF..WspLhph.hphspssp-hh..lsut..phhpshh.sNhsohshttlhphhhtp........sphttspcpps.........................shNslhllspllp..ahhE..p.tpsphhthhhtt..t..........................................................shst....t............................h.shsphEphhpshls.hhss.ss...............sstp.th+hEhlplLLshhSsplatsssst........ss.ahphhhp...tpsptsssLhsoLLpphhphssssh........ph.hht...................uhh.hhsts...hh...............................sst.sp+sPLu..stulplLLlLhsapss...........................psNsapp..slshhpchcp............t..hpu.sp.ts..h.lsastL......hsol............sp...hhtppphllLLYpLlcpNppFhpYlLs.+uD.lpslllPILphLasuppc..................................................psphlahslhlLLlLSpDpsFspslpch........................h.hssls.Wh..p-hslt..........pho............lGuLlllVlh+hIphNhp...+hp................sh.LasssLuslsNhSsah+sLshhsup+LlsLh-hLop+ahphssthspch............................t.........h.hhpphl+hlLEllNuhls..hplctNspLVYsll+......+R.plFpphps........c................................................................................................................................................shhpcl.lpsItpl..Lpaasspl-.....phsssp...hs............hsclLchIpcushs..............................sstslp.....................as.lhacYppcptsppaF.hs........YhWsl ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hhh..h.s..hh.........................................hh.......................h...hl..h............................................................................................................................................................................................................hs.....p.sh.lLllLhp.hpt...........................................................t.s.sapt..hl..h....ts.p..............................................t............h..pl..s.hs.L.............h.psh................................st......tppp.hlLLY...LlctNpphhp..ahh..psc...h.pl.....l.hPlLph.Lapstpp....................................................................pst.lahhLhlLLlLo...pDpsFst.pl.pph.......................................h..h.sls..ah...t-..+.h..t.....................phS.....................lG.uLhllllh+sI.p.hsht...+hc.......................s.hL.asssLshLuN..h.....u.s.h+p...Lp.asup+lls....L.hphls++...at+h...p.hppph..............................................................................t.........................lphhpchl+hlLEIlNuhLs.....................tL...pNsplVYsllh......cp.plFp.hts............p.........................................................................................................................................................................................................................................................................s.htcl..hpsl..h...lthh...h.....................................t........s........................................tthhthl..t.s..............................................................................h.................................hhh.....a.tp.....ah.h................................................................................................................................................................................................................... 0 77 102 141 +9575 PF09743 DUF2042 Uncharacterized conserved protein (DUF2042) KOGs, Finn RD, Coggill PC anon KOGs (KOG2235) Family This entry is the conserved N-terminal 300 residues of a group of proteins found from protozoa to Humans. The function is unknown. 23.20 23.20 23.20 25.10 22.80 22.50 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.61 0.70 -5.09 17 181 2009-01-15 18:05:59 2007-06-19 16:28:15 4 6 143 0 136 188 1 250.30 35 35.86 CHANGED ElpcLttcFpcsQhsp.ssp+LSERNClEllpKLlccphl-..llaThDGKEYlT.spLppElpcELhspGGR...lsll-LspsLNVchs+lEptspcllcps..slphhtGpLlscsYl-plspElN-cLpEpGplslu-LsppacLss-Flp.pllpt..hhuplIpGphsts.....hlaTpsalpppcuplRGsLhAhTcPsslsslhpp.............hthppplhhsllpsLlst..splpGphhG......u.YlPphYppspsshVcsaacQNGalEassl.ppLGIsssppalp ...............................................ElpcLttphptsQ.sp...ssp+LSERN.slElls......KLhppp.hl..-...llaThDG.KEYlT.tplp+Elp-ElhhpG........GR...lsllDLt..phlsVDh..+lEpphpclhp.....ppt...p...........l..p.ll....G.p.LlspsYl-plspElN-pL.........p-p..Gplslu-Ls.ppasLsu.cFlp.phlpp....clGp...lIpGph-ts.................hlaTpualsRp+AplRGhhsAlT.+...Psslsslh..pp...............................................hsh....p.....pplhh..sllppLlpp...upltGplhG.....tuhalPslaspsQpphVcsFapQNuYl-a-sL.p+LGIscshpal................. 0 55 74 112 +9576 PF09744 Jnk-SapK_ap_N JNK_SAPK-associated protein-1 KOGs, Finn RD, Coggill PC anon KOGs (KOG2077) Family This is the N-terminal 200 residues of a set of proteins conserved from yeasts to humans. Most of the proteins in this entry have an RhoGEF Pfam:PF00621 domain at their C-terminal end. 24.10 24.10 24.10 24.10 24.00 23.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.11 0.71 -4.24 6 354 2009-01-15 18:05:59 2007-06-19 16:29:01 4 5 89 0 191 319 1 144.20 40 16.80 CHANGED VpsLAsuIY+EFERlIcpYsE-VlKpLMPLVVsVLEsLDulhu-NQ.ch-lEhpLLc-DpcpL.sQYEREKpLR+pAEpKllchEDshEpE+K-Lpp+l-phEppsRpLELKhKNhs-pluRLEEREs-hK+EYsuLH-RaTcll+sYlEalERs+hph ....V.slAuuIhpEFERlIcpasp-.sVppLMPlVVsVLEtL.........-.........slhs....cs.......p..cpcl.Elc.hL+-..DsE.pL.sph....c....c....EKth.R+p.t...E.....p....chlc..h....E....DshctEp+...-Lpsp.lppLEp.ps...+pLp....hch.+.N....h...u-...p..................h..s....+....L....p.EcEsp..h..+pcas.slpp..RcsphhpphhEhl-+p+................................... 0 48 65 126 +9577 PF09745 DUF2040 Coiled-coil domain-containing protein 55 (DUF2040) KOGs, Finn RD, Coggill PC anon KOGs (KOG2117) Family This entry is a conserved domain of approximately 130 residues of proteins conserved from fungi to humans. The proteins do contain a coiled-coil domain, but the function is unknown. 25.00 25.00 25.60 25.10 24.60 24.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.76 0.71 -4.32 30 288 2009-01-15 18:05:59 2007-06-19 16:33:23 4 10 235 0 203 277 2 119.30 38 29.80 CHANGED +pscpctpcAhp...pDsolasYDulYDsl+.tt.pctppsp.pscsp.+cPKYhssLlcuA-pRK+-pphspERpLt+EREtEG-pauDKEKFVTuAYK+phEE.++hpE-.Ecc+-chEcppctspttshh ..................................p.pschphpcAht...tDsola-YDslYDphp.....ttpp...cpp.t..t.h.......t..pp............++P....KYIpsLlcuA-hRK+.-pphthE+plp+EREtE.sc...c..F...sD.K.EtF.VTuAYK+p...hpEhcch-Ec.E+cc.pthEtptchsptts.u.................................... 0 73 110 159 +9578 PF09746 Membralin Tumour-associated protein KOGs, Finn RD, Coggill P anon KOGs (KOG2092) Family Membralin is evolutionarily highly conserved; though it seems to represent a unique protein family. The protein appears to contain several transmembrane regions. In humans it is expressed in certain cancers, particularly ovarian cancers [1]. Membralin-like gene homologues have been identified in plants including grape, cotton and tomato [2]. 23.70 23.70 24.30 24.20 23.60 23.60 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.39 0.70 -5.41 3 172 2009-12-07 11:10:06 2007-06-19 16:39:37 4 5 100 0 123 196 0 233.70 36 53.94 CHANGED uQNPLINVRDRLFHALFFKsAlTYAcLVP+sVRRsIEFllLLKALLsFFILlYIHluFS+oPsTCLEHVKNcWPRDGILRVEIls.suc++sIaLpht-ssollRslK-suhhuI-PpTcpsHEolEpYQNsplKLpLslcPosahssppLps....pFDuENhosThSascAhSht.-sWs-EQYIVEYSLEYGFLRLSuSTRQRLNIPVhlVsLDPs+DKCFGDSFSRFLLKEFLGYDDILMASVKsLAEQEENKGYLRNVITGEHYRFVSMWhMARSSYlAAFsIMlLFTlSVSMLLRYSHHQIFVFIVDLLQMLEaNlSlRFPAAPLLTVILALVGMEAIMSEFFNDTTTAFYIILIVWlADQYDAICCHTSlTKR ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................YshEhGahh.Ls.ts+tchpI.sh.lplss.pp.C.FG....sth.pphllpphlGYDslLhsSlht............p..spG.aLhNh.otEhY.............p......h.....s.....................h..................hths.s.....a......s........s........hh...hhhh..h.....o.htluhhL.R..spp..phhhFh...h...pl........p.....hhphp.s....h.......h....hh..hshl.......hhl.G..h.hhh.EFasDp.hAFhlllhVWhs-.ashls..s+osho.......................................................................................................... 0 49 64 103 +9579 PF09747 DUF2052 Coiled-coil domain containing protein (DUF2052) KOGs, Finn RD, Coggill PC anon KOGs (KOG3044) Family This entry is of sequences of two conserved domains separated by a region of low complexity, spanning some 200 residues. The function is unknown. 25.00 25.00 31.80 25.20 23.90 23.60 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.46 0.71 -4.35 20 236 2009-01-15 18:05:59 2007-06-20 11:01:27 4 7 195 0 184 239 0 155.60 26 58.63 CHANGED NRRhthLppLlpcu-YFS-ppMc.R-PLLY-phlGpa.oppE+pscspsc...t..........uhuulLhsslp+pptchpltcpp.....p.-t.t...-pcspt...................p.p.s..sp..t-p.............ts............................ssp-+-.hc-cFhshMpp+FLsGEDcD.FDYopVDsss-aDD...c.hppDcEE+YF--- .........................................................NRRhthh.pt...................ps.......p....YFS...t.th........c.t......pPhLYcphl.....tpa.s.t-tptps.hc.................shushL.t....ph.cp....ph.p..t.h.tp....................................p...............................................................................tp.....p.....t.......................................................................ssppcp.htppahthMpp+FL........pGc.Dp-.aDYst.lDpst.phDs.........tpDtE-+YF---.................................... 0 65 102 150 +9580 PF09748 Med10 Transcription factor subunit Med10 of Mediator complex KOGs, Finn RD, Coggill P anon KOGs (KOG3046) Family Med10 is one of the protein subunits of the Mediator complex, tethered to Rgr1 protein. The Mediator complex is required for the transcription of most RNA polymerase II (Pol II)-transcribed genes. Med10 specifically mediates basal-level HIS4 transcription via Gcn4, and, additionally, there is a putative requirement for Med10 in Bas2-mediated transcription [1]. Med10 is part of the middle region of Mediator [3]. 21.50 21.50 21.80 21.90 20.30 21.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.50 0.71 -4.32 18 291 2009-09-10 23:46:38 2007-06-20 14:56:13 4 7 256 0 209 273 0 121.30 33 74.23 CHANGED hppplcplIEshhpLslhVp-Fp....P.......................soppsLtp+lssLlpsLpplpchspp...............................hps...............lp..lPlEV.lpYI-sGRNPDlYTREhlEt.shcpNphh+GKhcuh+chRcsLtcElpcpFP-hhsphcsI ........................................................l.ppplcphlpslhpltlhVpsap.....s..........................poppsLsp+lp...pl..lpsLpplschppp...................................................hps...............lp..lPhEV.l.cYID..p...G..RNPplYT+EhlEt.shtpNpthKGKh-uhc.pF+shLhpElspsFP-.htphctl................. 0 67 112 168 +9581 PF09749 HVSL Uncharacterised conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3102) Family This entry is of proteins of approximately 300 residues conserved from plants to humans. It contains two conserved motifs, HxSL and FHVSL. The function is unknown. 26.80 26.80 27.80 27.40 26.40 25.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.62 0.70 -5.35 27 269 2012-10-03 21:31:48 2007-06-21 09:20:52 4 6 231 0 194 278 2 211.80 23 79.87 CHANGED PPlPpshh-hhts......t.p.pDs....sshHsGRhRsh.HhcGNWsoa....lY..lthtsspt...hchlpphlsp.hpphhhhhphp................sth.....s.h.LHlSLS+slslcp.cphssFlpplcptl....sshpsFtlphss.lp..hhsNt-cTRhFLsLclsp........spsst...LpcllptlscshpcashsshY..........................................psspFHlSlAWsl.ss.ptphp...ct..ph..phhpth.p..................phpsspl+s+lGNpshshsL ..................................................................pp.....t.ctuRhRt.h.H.pGsWsoa....lY....l...h.....sptt........hp.......hl.p.tlltp.htp.h.t.................................................................tshHlSLS+slslpp.cthpsFlptl+ptl....................sphpt.....Fhhphsp...lc..hasN.p....-p.TR..sFluLpVsp.................................sttp......lpp.llp.hl.spshpp....a..shsphY...................................................pssphHlSlAWsl.ss.ptphp...t...p....p...p...htth..........................................ph.hpplph+.Gpthhph......................................................................... 0 55 93 149 +9582 PF09750 DRY_EERY DRY; Alternative splicing regulator KOGs, Finn RD, Coggill PC anon KOGs (KOG2548) Domain This entry represents the conserved N-terminal region of SWAP (suppressor-of-white-apricot protein) proteins. This region contains two highly conserved motifs, viz: DRY and EERY, which appear to be the sites for alternative splicing of exons 2 and 3 of the SWAP mRNA [1]. These proteins are thus thought to be involved in auto-regulation of pre-mRNA splicing. Most family members are associated with two Surp domains Pfam:PF01805 and an Arginine- serine-rich binding region towards the C-terminus. 25.00 25.00 27.00 27.00 24.30 24.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.64 0.71 -4.01 17 271 2009-09-10 16:58:51 2007-06-21 09:30:53 4 11 117 0 173 284 0 124.10 33 17.96 CHANGED p.LplaGpps+latDsthAtAs-ssptL...hPW...Gspp.hIDRaDsRuhL....shh.......ttsp.sp..phs..ppttEphhshERYhsLhpsch....................ptthppcpppphsppptp.tshsslGFoYssst ........................L.VaGhuCKla.hDs.thAhst-psppL...lPW.......G........D..p..shhIDRaDsRutL............chh.........................ts..sp.hpho.tEptp.Ephss.ERYhsLhpspht................................................tt.pc-chpphtpt.....t..t.tthsslGasYtss.s..................................................................................... 0 60 85 130 +9583 PF09751 Es2 Nuclear protein Es2 KOGs, Finn RD, Coggill PC anon KOGs (KOG2627) Family This entry is of a family of proteins of approximately 500 residues with alternating regions of low complexity and conservation where the domain similarities are strong. Apart from a predicted coiled-coil domain, no other known functional domains have been characterised. The protein appears to be expressed in the nucleus and particularly highly in the pons sub-region of the brain. The protein is clearly necessary for normal development of the nervous system [1]. 20.20 20.20 21.30 23.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.70 0.70 -5.06 34 313 2009-01-15 18:05:59 2007-06-21 09:35:03 4 10 226 0 243 314 2 326.40 27 74.37 CHANGED cptpVLsE-sYlpsLpcIIpRDFFPcLhchpspp-Y...................L-Al-spDhphlpphp....p+hpphht..............oPsphcss...sh....st.s.....ssspsss.ssh......tt-p..t...ttptt................................phuLspF.s+YTSEDNcSFpcll-ptppKcp.cKauWlap.scttpspplt.t...............................t.p.ssppt.....lc..........................sp.st............................................................................................-tW.ph+scNsLMahPsuspss..-sltptsct.........pclhapNTRh................pt.s.shsps......uhstlpcAhttp.p.p.sch.....ssc..........oPc..........VNGY......uaVcss.pPss...............t.sPlhTaGclpsT....Phpl-s.c..s.......tsss.......................ssFpI.Ess+REpluc+hsccsu.tKppp+p..............tpshsos.....t..ss.tls.........................LoPAA.Q+L ........................................................................................................tlLcE-pYhpslppIItRDFFPsl.chpspp-.a...................L-A.pptD..hhtpht.......chtphh.................................s.ht..tt..................t.....................................................................................................................................................................................................t.thoLstF.t+YTSEDNtSFpclh-ptpp+pc.t+aualap..s...c..ttppht................................................................t..p.....ttt...........hp..............................................tt..t...........................................................................................................................psW..p.h.pscNsLMahPp.uht......p........pph....tt..p......................pl.apNTRh..........................p.....shs.p......................shstlppshthpst...tp..........s.t.............oPp..............ssGa.......shVtss..pPtP...................t.t..sP.....hh...T....WGplpsT....Phhlpstp.............s.s............................ssF.+......l.-sspREplthphspcsu.tp.ptpt.................tt..to................s..th....t................................hoPAhp+h......................................................................................... 1 103 146 204 +9584 PF09752 DUF2048 Uncharacterized conserved protein (DUF2048) KOGs, Finn RD, Coggill PC anon KOGs (KOG1551) Family The proteins in this family are conserved from plants to vertebrates. The function is unknown. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.00 0.70 -5.57 11 211 2012-10-03 11:45:05 2007-06-21 12:56:45 4 4 135 0 126 523 39 304.20 34 88.08 CHANGED o+FFs+GWGc............chlcclhs.ct.lppR.......sPsshtlclspshppcssp.....lt-GpFtSPht..hsuhLPspucpA+hphLlPpph....h...+slCl.HLAGTGDHsah+Rcph.AcPLLK-.sluullLENPaYGtR+PppQptuuL+sVSDlalMGuuhlhEupsLLpWhccc.GaG..lGloGlSMGGpMAuLsuoshPcPlu.....lVPhLuhsoAssVFTpGllppulsWchLcpp...............................................................................................................................................................................................s.t........tpppsts..pEulR.hhthh.stTslpsFssPhsPphsIhVsAccDuYV..P+pust.sLpphWPGuElRa...l-u.GHVSAhlhHpchaRpuIh-Ah-R ................................................................................................................................................................................................................................................................................h...........t................pu.F.os......thhP..s....A.hhthlh...........Ptt.......................c.hsl.pL.A.GT...............G..D.H.h.a.+Rhph..utPhlKc..thuollLE....sPa......Y....G....R.+....P.t...t.Q....h.....t.S...p...L....p...s..V...S...D.....l...h...l..h.G...t.s...h....lhEst.sLLp.Whcpp....Gau....lGhsGlSMGGh...hA...u....lsso....saP...c...P...hs........hl.P..hL....o...........s.............o.....A.ss.s....Fsp.G...l..h.......p.....uh.s....W..t....Lp..pp....................................................................................................................................................................................................................................................tp..p.p...p.....hEs.l.......h.h....h....t.hh..p.h..T..c..lts...F..s..s.Ph.c.s..sh....l......lhVtApcDuYl......P+p...ssh..pLpch.W....P.G.s.E.lR..a...l..p.u.GHloualh+...p...thFRpsIh-sht........................................................................................................ 0 47 65 99 +9585 PF09753 Use1 Membrane fusion protein Use1 KOGs, Finn RD, Coggill PC anon KOGs (KOG2678) Family This entry is of a family of proteins all approximately 300 residues in length. The proteins have a single C-terminal trans-membrane domain and a SNARE [soluble NSF (N-ethylmaleimide-sensitive fusion protein) attachment protein receptor] domain of approximately 60 residues. The SNARE domains are essential for membrane fusion and are conserved from yeasts to humans. Use1 is one of the three protein subunits that make up the SNARE complex and it is specifically required for Golgi-endoplasmic reticulum retrograde transport. 29.40 29.40 29.40 29.50 29.30 29.30 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.61 0.70 -4.94 11 304 2009-01-15 18:05:59 2007-06-21 14:54:11 4 8 223 0 200 291 0 221.70 20 82.89 CHANGED S+LElNhhRLLu+CEthAp...EcpppspWRLcKaVsuLccMlspLccp..........hsKPos-hlsEYsc+lshLKullpupch.............................soss-KshssphLusups.sthsptcssso+p.............l+.pppu+hpsEhRpELls................sssuhp.ptshhtpp...........................ts.sssccpussclDphlphHpslQEKLA--MltLARNLKppo.sApslIKpDspsLspSt+hsDpNlspLppES-RLEpHupKusp..hahWlMlhlVhhhFIsMlLFI+lh .......................................................................................................................................................................................................................................................................................................................................................................................t.thh......th..cht.lt...ht....p....................................................................s.....ptt...tp..s..t............p..t.s.t................................................tt..........sphRp.chht..........................t..................................................................................t....t..pt.ps....ttph..-t.hp....tpcphQEcLsc-hlpLAppLKps.ohs.h.ppslc...pDpphLsps...tcth-ps...h...pphppt....sp+...l....pp.h..t.p.p.s.hp........h.hhh.........h.....hl...h.h.l...hh.h.F.l.....hllhh+h......................................................................... 0 63 103 159 +9586 PF09754 PAC2 HCCA3; PAC2 family KOGs, Finn RD, Coggill PC anon KOGs (KOG3112) Family This PAC2 (Proteasome assembly chaperone) family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 247 and 307 amino acids in length. These proteins function as a chaperone for the 26S proteasome. The 26S proteasome mediates ubiquitin-dependent proteolysis in eukaryotic cells. A number of studies including very recent ones have revealed that assembly of its 20S catalytic core particle is an ordered process that involves several conserved proteasome assembly chaperones (PACs). Two heterodimeric chaperones, PAC1-PAC2 and PAC3-PAC4, promote the assembly of rings composed of seven alpha subunits [4]. 22.60 22.60 22.60 23.60 22.10 22.10 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.33 0.70 -4.49 126 1323 2009-01-15 18:05:59 2007-06-21 16:27:51 4 4 791 13 568 1106 677 221.70 21 77.96 CHANGED hlpGhsssGpluplAscaLlcph..chchlupl.ps.phhs.....Ph....shhpc.s.........thhhsshclYhsps..tt.....................lllltup....s.ss.hthp.......paspt.llshscchslppllsLuuhsssh.c..pps...slhshssspchh.pphp.............p.ppht.....s.sGssuhlhths.tptshsuhslhstss........................t..hs.....sPpAutsllc..sl.sch............hslpl.s..hspLtccA...p............-hpphlpp.Lpc ..................hhpGass.sGpsuphAsctLhpph............ptchlupl.cs.-thhsh.sp......pPh...hthps.s......p..........thths..shplahsps.tt.....................sllllpG.....spPs...hpac.......paspp.lhshs.c.ch..s.lppllsLuulsssss+........s+s....hslhs.hus..s...sphh..pphp..........................htchp............sss.u.hss.ll.tth.tppsh.ss.ls.hhstlP................................pY.hs........sPtAshsLlc..tl..pch.............hslpl..s...hs..sLtppApphp...............Eltphlpt.L..t.................................................................................................................................. 0 182 378 494 +9587 PF09755 DUF2046 Uncharacterized conserved protein H4 (DUF2046) KOGs, Finn RD, Coggill PC anon KOGs (KOG2129) Family This is the conserved N-terminal 350 residues of a family of proteins of unknown function possibly containing a coiled-coil domain. 20.70 20.70 20.80 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.12 0.70 -5.54 3 169 2009-09-10 16:40:10 2007-06-21 16:31:04 4 5 127 0 120 159 5 230.70 45 56.20 CHANGED A-CsSESDoDGuTscsSSouSp.........EcLpsRIcSLpQENKVLKhELDTFKLKCKuLQEENRsLRQASVoIQAKAEQEEEFISNTLLKKIQALKKEKETLAhNYE+EEEFLTNDLSRKLsQLRQEKscLEQTLEQEQEaQVNKLMRKI-KLEADTluKQToLEQLRREKVDLENTLEQEQEALVNRLWKRMDKLEAEKRhLQEKLDQPVS-PPSPRDhh..oputDTsssluSHI+uLRSEVcRLRcNLAsSEt-aTEKMpQYAcEERphREENIRLQRKLpREVERREALCRQLSESESSLEMDDERYaNE.l .....................................................................................................................................t.p......hc...phth.+h+sphltc.-.+tL+psuV.lQu+AEQE..............EEaISNoLhKK...Ips.LpKEKEsLAhpYEpEEEhLT.....NpLSR.KL...QL...ppEKs.cLEppLEpEQEh.VNKLh+....+Ic.......+....L..............cs-p..s.pp....p...............LE.......pLR+...Etl-.LENsLEpEQEhLVN+LhK+h-cLpsEKR...LQ.+...Lpp...hs...t.s..s.....t...................t......t.............h..lptEh..p..............hpt.h.........................c..........pthttpNhphpp.l..phtch.th.pt....................................................................................................................... 0 59 73 99 +9588 PF09756 DDRGK DDRGK domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3054) Family This is a family of proteins of approximately 300 residues, found in plants and vertebrates. They contain a highly conserved DDRGK motif. 23.30 23.30 23.30 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.26 0.71 -4.89 16 209 2012-10-04 14:01:12 2007-06-21 16:39:39 4 4 157 1 132 209 5 164.80 40 57.11 CHANGED tclupKcttKh.ptKpt++ppREhEctpREp+c+hptp+.tchppp.....c-pcctpcpccEcpccctcEEpcc+EpEEYp+hKupFslE-pGppptps--ptp....hlpcFIsYIcppKlV.lE-LuscFsl+Tp-sI.cRlpsLptpGplsGVlDD.RGKaIYIos-ElpuVspaIpp+GRV.ohs-LsptsNcLIs .......................................................t..hst+chtKh.ptK.pt++tpR.c....t-pt...t.REpR+.phpptc-t.ch+cc.................--cpc.pEppcEE.tp+p.t.+E....EpcccE...p.EEY.+h..KtsF...sVEE..EGhtpp.sc-ppp.....hLpcFl.pYIKppKlV.LED.LAupFtl+Tp...-sI.sRIppL.spGploGVh.DD.R.....G.KF..IYIo.-EhtuVApaI+p+.GRV.SIs-LuptSNplI............ 0 53 73 106 +9589 PF09757 Arb2 Arb2 domain Wood V, Bateman A anon Wood V Family A second fission yeast Argonaute complex (Argonaute siRNA chaperone, ARC) that contains two previously uncharacterized proteins, Arb1 and Arb2, both of which are required for histone H3 Lys9 (H3-K9) methylation, heterochromatin assembly and siRNA generation [1]. This family includes a region found in Arb2 and the Hda1 protein. 19.20 19.20 19.30 19.20 19.10 18.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.12 0.71 -4.84 43 409 2012-10-03 11:45:05 2007-06-21 17:39:30 4 19 222 9 275 398 1 154.40 20 27.75 CHANGED sPcchhp.........hphhcpshh.pppaacshtp..............................sI+ph.hpcLppc.thl....LP................hsshsp.....p.ts.Ihsosshh.pspp..lllllHsssp....lWAppss.hsssl-suosls.......................aIphu.................................tpcshullslNhsphhhp..........t.t......sshpsshpsp-hsthla-s ..................................................................................p............hphhpphh..ppthapsh..................................................................hlpph.hphLtpp...thl.....lP................................................hssh.p.....p.cs.lhh.S....shh..pspp..llllla.s.ss................Wupp.....hh..hs.....psl....ctuotls.......................alchA......................................hppsaullshN..sp.hh......................t.pt......t..........h...h....................................................................... 0 74 138 218 +9590 PF09758 FPL Uncharacterised conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2219) Family This entry represents an N-terminal region of approximately 150 residues of a family of proteins of unknown function. It contains a highly conserved FPL motif. 21.30 21.30 21.50 21.50 20.70 21.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.89 0.71 -4.43 18 233 2009-01-15 18:05:59 2007-06-22 10:31:58 4 4 148 0 162 219 5 135.00 47 16.91 CHANGED lRpIsEhlIWGDppcsp.hF-aFhEcslhsphhcllpp.p..sspslplpllQolohLlpNlppcpslaYlLSNsplNclIsapaDhp....c-EllsYYISFLKoLuh+LspsTlphFFNp+hs...sFPLhscAl+Fhsap-sMlRsusRsIlLsIh+ .................................lRsIsEllIWGDQpD.ss.lF-.........FFhE+shhshFlpIl+p..p..ssphVslQLLQTLsILhpNlppEoSLY...........YLLSNNalNslIs.....ac.FDFs.........................DEE.lhuYYISFLKoLSlKLNpcTlpFF..a.Nccss.................sFsLYsEAlKFhsH.sEoMVRhAVRTloLNVa+............................................... 1 72 100 136 +9591 PF09759 Atx10homo_assoc Spinocerebellar ataxia type 10 protein domain KOGs, Finn RD, Coggill PC anon KOGs (KOG2676) Domain This is the conserved C-terminal 100 residues of Ataxin-10. Ataxin-10 belongs to the family of armadillo repeat proteins and in solution it tends to form homotrimeric complexes, which associate via a tip-to-tip association in a horseshoe-shaped contact with the concave sides of the molecules facing each other. This domain may represent the homo-association site since that is located near the C-terminus of Ataxin-10. The protein does not contain a signal sequence for secretion or any subcellular compartment confirming its cytoplasmic localisation, specifically to the olivocerebellar region [1]. 24.70 24.70 24.70 26.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.02 0.72 -4.28 20 296 2009-01-15 18:05:59 2007-06-22 15:02:31 4 5 255 0 203 293 0 93.20 31 16.29 CHANGED uhKppllcllusLsacsp-lQ-plR-hsG.ltllLssstlD-pNPal+EaulhsIRNLhcsNscNQchlupLcspsl..scsshLpchGhclpl.ps.G+lplcsp .............................thKp.llpllusLs..acs...psQcplp..............ch..sG..l.hlLss.C.s....h.D....cp....NPa.....l+Eaulhsl+.Lh-sNtcNQchltpL.c..tpth.......spss..hLpp.hGhph....t.tp......p........................................ 0 65 112 167 +9594 PF09762 KOG2701 Coiled-coil domain-containing protein (DUF2037) KOGs, Finn RD, Coggill PC anon KOGs (KOG2701) Family This entry represents the conserved N-terminal 200 residues of a family of proteins conserved from plants to vertebrates. In Drosophila it comes from the Fidipidine gene, and is of unknown function. 25.00 25.00 28.30 36.20 21.10 19.10 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.98 0.71 -4.22 14 147 2009-01-15 18:05:59 2007-06-22 16:15:42 4 4 105 0 94 146 3 172.80 51 30.58 CHANGED php-Il-lLlsAGYaRARlpuLSsFDKllGGhsWsIpss.s.....aclDl-hhFpEs.......oIGpKIuloE+IlpsL.cM+CPapLcPHQIpGLDa.slaPVlpWLlK+slEsRpEpu-hl+pauhspFppcas.h.scp..-hhppcpcsspsltshpchatPpR.....hcR.tss.s.hhsc.ppscpsLhEYGp .......p.hp-Il-LLVAAGYFRARIKG...LSsFDKVVGGMTWCIosC..s......aDVDVDLL..FpENu..........................TIGQKIALoEKIVuVLP..+M..KCPHpLEPHQIQ..GhDF...IpIaPV...lQWLVKR.ulEs+cEhGDalRpauluQ...FpKpap..hspDc......-hhpcpcpshcslhslpchYpPpR....ha+R..ttss.........pph.--tsclc.sTLLEYG................................................................. 0 35 45 76 +9595 PF09763 Sec3_C Sec3; Exocyst complex component Sec3 KOGs, Finn RD, Coggill PC anon KOGs (KOG2148) Family This entry is the conserved middle and C-terminus of the Sec3 protein. Sec3 binds to the C-terminal cytoplasmic domain of GLYT1 (glycine transporter protein 1). Sec3 is the exocyst component that is closest to the plasma membrane docking site and it serves as a spatial landmark in the plasma membrane for incoming secretory vesicles. Sec3 is recruited to the sites of polarised membrane growth through its interaction with Rho1p, a small GTP-binding protein. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 701 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.38 0.70 -13.14 0.70 -6.40 24 414 2012-10-03 17:31:52 2007-06-22 16:27:39 4 6 269 0 278 545 1 511.00 21 63.19 CHANGED p-...............u-shhccLs+ELspl-tss.lpsllpp-ppssplhphl.-pulsEsDcl-shLshaphpLpslp--lshIEspspGLQlpssNpKhLhpELppLLsplslscsplpsLpp.sslppsptlcth..EsuhtsLhpAhsslp.......................shcsshsphpAlpp+cphacchsppFhcRlspahpptFp.h..p.hpt.p..............chshppatshhppLhhYusLhhahK-lstcpaptLhptYpsphpplYcp-hpphhpth+tphp.......................................................t.pspptptsh.sss..........t..pph....hshpps+phph..................ppuphh.................................tp.p..cshtthLsphpslhhhcQsFl.pFF......+hss.......................................................shsas-hlpttssppppt..phsphp.hc.........ss+ths.pplpphhstlFts..h.scl.shls.s..phcsh......s.slLhhl-ptl.phpp..s..sps.aL.phlt+lhtplpp.as+alppQlctIEc.splsh+pp.GllshlpsFs.FsptsEshhppspp..............hssh.hl-puYp+lspuhhctlpphs.....t............................................ppppphspplshl....ENhpahhcpLs.........................hph......sslpshhcpupphaccphphYhp.tlltcshs+LhpFhpuscshlps.....s.sp.uhphuaSKptl+pllssYsu+-lcpslcpLh++l-KHF..t.p...........................shpcsLlp+lWpshpppalphap+ltsllp.csYss ............................................................hp..hcth.tplppclt.hptss.l......ttl..tp.-.....h.......p.lhphl.-tshtps-p....h....-.hlp.a.......p..L..pp........hpc...........pht.Iptpsphl..phpstNphhLhpclp.llpph.p.l.st.t.ht.Ltt..ss...h............tp.p......ltth........uh.hL.tsh.........................................hp.s.th...h..u..lppppt.htt.pt.Fhp+h.paht.ha.........t.t................................ph...tp.t.hht.lh.as.Lh.....ah+th....s.ttat.lhp........Y..p.hp...l.hppphpthht.hp.tht.................................................................................................................t.......................................tttt..t........................................................................................s.................................................................t.hhtthLtp.h...hh..EptFh.pFh.....php................................................................................................................t....t.ht.........tt............................................t.htthh..lht.....h..ph.thht.h...p...ssh.....................h.hhhhhpphh.....tt.....s.........stt.al...hh..tph...thtt.apc.hl.ppp...h............pthpp....scht....h....pp..t.Gllshh..h....h.thhEthht..................................................................................lsp..Y.pl.tshh..lphhs..............................................................plhhh.....cNhthh.t.l..........................ht........sLtt.hppAp.p.hppthp.Yh....hh....ht+l.tFhpthpthh.t........ht.tpl....sh..p.shs+t.hpcl................ltt..s.pp.....l.....ccslpthhc+hpKph...........................................pptLh..lhpt.hpt.hhp.h.th.thht.thY............................................................................................................................................................................................................................................................ 0 107 168 238 +9596 PF09764 Nt_Gln_amidase WDYHV; N-terminal glutamine amidase KOGs, Finn RD, Coggill P anon KOGs (KOG3261) Family This protein is conserved from plants to humans. It represents a family of N terminal glutamine amidases. The enzyme removes the NH2 group from a Gln, at the N-terminal, rendering it a Glu. 20.30 20.30 22.00 21.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.65 0.71 -5.00 14 159 2009-01-15 18:05:59 2007-07-09 17:17:08 4 5 132 1 108 172 5 164.70 40 77.58 CHANGED hYsSpYCEENVaKLsEhl...t..psshppha....AVFIS...........N-pKplPlW+Q+uups..sss.VlWDYHVIhlp.......sspsutshVaDLDosL......PaPs......shppYlpcuhps-..tplpspa.........RRpFRVlsucpYLppFuSDRSHM+ctsGsahpPPP.aPsIpsscus......hNLss.alsMp.pss.................GtVhs.sphhphFu ................YsspYCEENlaKLCc.l.....tt..p..t.....hp...cha............sVFIS..............Ncp+hlPlWcQ+uuts......sss...VlWDYHVlhlp..........................sssspshVYDLDosL......PFPs......sappYlpcsh+s-...sl.pspa..............+RhFRVl.AspaLppFuSDRSH..M.......+....c.s..s..G...s.....WhpPPPsYPsIts.sssp.....................NLsp.aIsMs.t.s...................G.Vhs.tph.p.a.h..................................... 0 44 58 85 +9597 PF09765 WD-3 WD-repeat region KOGs, Finn RD, Coggill PC anon KOGs (KOG3268) Family This entry is of a region of approximately 100 residues containing three WD repeats and six cysteine residues possibly as three cystine-bridges. These regions are contained within the Fancl protein in humans which is the putative E3 ubiquitin ligase subunit of the FA complex (Fanconi anaemia). Eight subunits of the Fanconi anaemia gene products form a multisubunit nuclear complex which is required for mono-ubiquitination of a downstream FA protein, FANCD2. The WD repeats are required for interaction with other subunits of the FA complex. 25.00 25.00 27.80 25.70 20.20 22.40 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.74 0.70 -5.13 9 143 2009-09-11 16:53:32 2007-07-10 10:43:26 4 7 100 4 91 139 4 227.60 33 69.28 CHANGED LhcchPhLlsps.+ptshh.talusptpsa.+l+lhLPccspLcsu+lhs.hthcplhhthppsspp+lpps..L.uFl.cLcplLEstLKspsttps.....sphhosLhp-ltsltas+hshl..Dsshopl+Lpu.Dut.RpHhlTlclpuphshcss-asls.slshuhsht..puoLtshhspFlthLEsLcsFaDshc-IDEhsaVLEPppssppsosRhIslsppV.lplplcPtcPhhh.tshhLu...ss+.Vs.LRphLssslc.WDPEsslhpNLhclh-l.tFPh. .........................................................................................................lph.....................h.h......th.........t..........p....p.ht.ps..l.thh.pl..ll..h.p...p.......s..........sphhsplhp-lt.tlGWsplh.l..ssshsplchph.Dst.tRpHhlplpl....s..paP....h....psP...s.h.sshPh....hth.pas.............p..............o...sLhslhpQFhttl-.pLptFWcshD-IDcpsWVL-PppPspus.s......hRRIslus..ssSlplpl.cPtcPphlP....pshhLu...ssp...................hlp.Lt..p..hsp.sh..phWssc....pslhpNLpplLph.thP...................................... 0 33 45 69 +9598 PF09766 FimP Fms-interacting protein KOGs, Finn RD, Coggill PC anon KOGs (KOG2216) Family This entry carries part of the crucial 144 N-terminal residues of the FmiP protein, which is essential for the binding of the protein to the cytoplasmic domain of activated Fms-molecules in M-CSF induced haematopoietic differentiation of macrophages. The C-terminus contains a putative nuclear localisation sequence and a leucine zipper which suggest further, as yet unknown, nuclear functions. The level of FMIP expression might form a threshold that determines whether cells differentiate into macrophages or into granulocytes. 26.80 26.80 27.50 26.90 26.40 25.10 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.24 0.70 -5.38 17 303 2009-01-15 18:05:59 2007-07-10 13:56:45 4 8 220 0 209 278 2 260.80 32 53.71 CHANGED lpL+chsRhshhphccsR.cpscctKppVDttpLpLpNLhYEtpHLp.......KEIp..pCh-FKopct......cl-Lls.-EFap.cAPppls......csphsttspHp.hLtRLsaELpQRKcLscphccLpppKppltps.Itp+cchL.sSLtspL+...sltpuohPlQchlshsapp...p.cppc.......hsphLPtPLYlLYsplpuhtpsp-c......tlplpIh...Gs.c-Apshtpt................pspppspssp-sccppppp++Rcpptphp.sstp...pphhc.hHP..........Lplhlclhspct...............ltLpFp........YlspLplVsVpsphsst....................-slLssLFs.......sDsGp........chP.................p.ssph.hpchsl..pphsp...t..hG+PYpWsQpLsGlpah ...........................pL+thNR.shhph+ps+.ppTtcs+pclDthcLQLQNLhYEh.HLp..............pEIs...tC.pFc.s..+ap.......................pl.LlshE.EFhp...ptPtpht.........................ttscsHphhlsRLsaELpp...Rc..cLtpphpclhppKpplhp-..pp++chL.ssLt.cLp....plh.pAu..hPlQchh...th......................................................s..h..h............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 83 122 172 +9599 PF09767 DUF2053 Predicted membrane protein (DUF2053) KOGs, Finn RD, Coggill PC anon KOGs (KOG3236) Family This entry is of the conserved N-terminal 150 residues of proteins conserved from plants to humans. The function is unknown although some annotation suggests it to be a transmembrane protein. 21.70 21.70 21.70 23.10 21.60 21.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.91 0.71 -4.24 11 161 2009-01-15 18:05:59 2007-07-10 14:12:23 4 5 123 0 102 168 2 149.60 45 65.19 CHANGED TLFHFsNChALsasPaalsYKsouLSEY.suhhpClpAussYlhTQLsKhllLA....TFhsss-s....ssashhsEhL...+t.hshlDlhGLhLllop...hssctch+llssGLGWuhA-slho+hlsLWVGARGhEFoWcYl.pul-uNh.LlpplshssL.lahhoR ...............TlaHFhNChALsa.hPaalsYKtosL.S....EY.su......hhp.Cl.pAussY.lhsQLsKhLhLA................TFass....-s..............s.a..-...hhsEhh...KsslDlhDllGLh....hlhop...hs.GKuchKlhssu..LGWAhA-hlhoRhl..P.LWVG.A.RGhEFsW+YI.hul-SNlsLV..phlshusL.lWhaoR........ 0 39 53 81 +9600 PF09768 Peptidase_M76 Ku70-bp; Peptidase M76 family KOGs, Finn RD, Coggill PC anon KOGs (KOG3314) Family This is a family of metalloproteases. Proteins in this family are also annotated as Ku70-binding proteins. 25.20 25.20 25.70 25.40 24.20 25.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.29 0.71 -4.80 29 315 2009-01-15 18:05:59 2007-07-10 15:56:33 4 8 269 0 235 318 5 167.90 37 67.56 CHANGED pppCpchhchhlphSPhVpFhhpplc+ls.......ss....hppppIhCc...................hCsst.........puGGFsP........ctGIllCpNpl...............+sctclEcsLsHELlHsaDch+hc.lDa...tNl+HpACSEIRAusLSG-C+ahpEhh+tshs...phtp.......paQcCV+RRAlhSVhuNPsCp.s.pcAccsVscVWcSCFsDTRPF-c .............................................................................pcCpt.hphhlp..s.P..h.l+ahhpt..lcp..hu..........ss........hppppltCc...................................Cssp....................huGG.F.ss....................ptsIllCpNp.h..........................................................+sptcl..ccslsHELlHAaDahRh..c.lDW....pNl+HhACoE.IRAus...LSG-CpahpE.hh.+t.tht.....lpp..........paQ...pCV++RAlhSlhu.s.ssCp.........p.tpApcsVscV...a-sCasDpcPFs....................................... 0 83 133 195 +9601 PF09769 ApoO Apolipoprotein O KOGs, Finn RD, Sammut SJ anon KOGs (KOG4798) Family Members of this family promote cholesterol efflux from macrophage cells. They are present in various lipoprotein complexes, including HDL, LDL and VLDL. The apoprotein is secreted by a microsomal triglyceride transfer protein (MTTP)-dependent mechanism, probably as a VLDL-associated protein that is subsequently transferred to HDL [1]. 23.90 23.90 23.90 24.00 23.70 23.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.57 0.71 -4.52 36 346 2009-01-15 18:05:59 2007-07-30 13:28:07 4 5 224 0 212 323 0 151.40 23 63.87 CHANGED hh-ccsstts.h.spphsh.....sspp.p.................................hsscssshLppthpphRhtltpthshsps...........thsshhsphhstccphpsshssLtsssps...lLPshhaIlluuluGsIluRpRuhhhRhhhPlshussuhshhhPpo....hcssuphhashEccthPsls .............................................................................h.sppls.lhss......sttp.c.............................................hs.psss.LpptlsphRphhtshhshsps...........ths.pshsphhsh.p.t..hpsshs...Ltsssps....hhPphshIsluuh..sG.....llu...Rp...tuhhh+hshPhshushuushhhPtp....hpssuchhaphtpp.hssh................................... 0 47 90 156 +9602 PF09770 PAT1 Topoisomerase II-associated protein PAT1 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4592) Family Members of this family are necessary for accurate chromosome transmission during cell division [1]. 25.00 25.00 25.50 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 808 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.62 0.70 -13.55 0.70 -6.30 17 499 2009-01-15 18:05:59 2007-07-30 13:30:45 4 8 240 9 328 497 1 425.60 16 81.25 CHANGED MSFFGFDoohPtcptstst.tt..p..............................h.Fp-TYcG.LG-p.pE-sDshND-TFGssh....slG+DFDFtstpuphsts.tttt.........tsutss.tstsphs..................psppsshpttpssp.hscLpshsulWu...t....................tssttspstPpssststp.lphpthpt.h..........tt..t.st.s..shs.shsst...t.h..........h.sshsttas..ss..................tph..tst.t....tthsh..PsphPs...tht............ph.st..tsst.ptt............pttsPPht...ts.P.t.sp......tt.ssh....t.p...........................................tp.hphspppchshh.cc..................t++hp+pcchhth.......tKasGLMTPpDKsFITRlQLoQlV..........o--PYsEDFYaQVap........h.ttstppsppstsphApsYL.poGpR..........tt+h+pu-sshQRMQQQVp+AVp.............ts+t+sKtsphhh....EGuLGKIShu.suKsPRp.L....shcpspssp..........tt.pp.ssphshpc.........................................t+KpILphlEslYpplhclEshpRshs..................tth..hptchps.sppLWpsL+l....ps.ssss.p...........spPFIuhLSasKGhKllPRlFpalscEQcl.TllohIhspLspLsVlhpu...ss....Phhsh.........pcth-..hFptslhsslhsalsps.sat.lhuLLshllp.psslshlupo+IGLullThLlSRAEll+ps....u.ss...........upp-hppWsphashLFssL..s..lsslFPs.............s.ststptYl.....................WQFL..Aululuup.ppQphlV.tV+DclhtTlspuKpl.s.......................hhtppplpNlNLFLpshGLssc ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ssLGpl..s.s...tP+..l.......ph......t............................................................................................................t..hh...........lEthh..lhph-t.....p........................................................tth..thth.......................................................................................hhthhth.KGhhhh.Rhh..l.........t....t.h...hh.h.hh.th.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 84 140 233 +9603 PF09771 Tmemb_18A Transmemb_18; Tmem18A; Transmembrane protein 188 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4606) Family The function of this family of transmembrane proteins has not, as yet, been determined. 25.00 25.00 25.60 25.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.56 0.71 -4.15 5 136 2009-01-15 18:05:59 2007-07-30 13:31:49 4 4 93 0 68 120 0 113.90 55 84.03 CHANGED hEPS.ACEDLKAFERRLTEVIouLpPoThRWRIlLslhSlhTuluAapWLsDP....cTppVPhh-S.LWsHPhFTlSslsLllLF.lhGIHK+VVAPoIIAuRCRoVLAEFNMSCD-TGKLILKPRPpNssp ..................t......sE....DLKAFERRLTEhlpslpPuTtRWR....hlLlllSlCTAhGA.WpW..LhDP..................cTppV.sh..hpS.LWsHPhFTlSsl...TLlsLF.hhGIHK.RVVAPSIIsuRsRsVLu-aNMSCD.-.oGKLILKPR.....s................ 0 23 28 50 +9604 PF09772 Tmem26 Transmemb_26; Transmembrane protein 26 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4610) Family The function of this family of transmembrane proteins has not, as yet, been determined. 20.20 20.20 20.70 20.60 19.30 20.10 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.02 0.70 -5.19 10 150 2009-01-15 18:05:59 2007-07-30 13:32:27 4 2 77 0 112 137 0 235.10 35 74.38 CHANGED llshLpAllTRlLFhlHulVuVWpVshlK.c-shYWhLslsllLLslEslhTlhh+KGcEaKWFsPSlFLYLsoIlPulWlLElchlpp+.............................sshucshcs.tpLhusluls...............httlsscsWstsLEQsLlLlLIlGRWLLP+Gc.lTRDQLSQLLLsYVGsAADIlEFh.-olKEspVt.....sNstlVhulLslWoWShhQFsLVLosTtsptscsusptcst.p........shhsphssDlWuIhlslllQDGPFLllRLlLhshacVIspMhlFFTsKNsLVllLQLYR .......................................................................hhpAlhoRhlFhhHu.hlslWpVs.h......p.pp......haWhL....hh..hhL.hhEhhhTlhh+.cspt.....a......+WFsPulhhYL.sllPulWlLEhc..pph............................................................................s..p.p.t.......h...t.t..tht................................................lstpsWhhhlcQhhllhLIlGRWLLPh.Gs..lTRDQLSQLLLhalGsAADIlEFh.-ohcpp..pl..t.....pp.hllhhhLslWoWShlQFslsLssp..thh.....s.ht.ttt.............................s.p....hsh-lWslhhslhlQDuPFLhhRLhlhh.aplhs.h.lFFssKNhLllhLphYR................................................................... 0 52 61 89 +9605 PF09773 Meckelin Meckelin (Transmembrane protein 67) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4611) Family Members of this family are thought to be related to the ciliary basal body. Defects result in Meckel syndrome type 3, [MIM:607361], an autosomal recessive disorder characterised by a combination of renal cysts and variably associated features including developmental anomalies of the central nervous system (typically encephalocele), hepatic ductal dysplasia and cysts, and polydactyly. Joubert syndrome type 6 [MIM:610688] is also a manifestation of certain mutations; it is an autosomal recessive congenital malformation of the cerebellar vermis and brainstem with abnormalities of axonal decussation (crossing in the brain) affecting the corticospinal tract and superior cerebellar peduncles. Individuals with Joubert syndrome have motor and behavioral abnormalities, including an inability to walk due to severe clumsiness and 'mirror' movements, and cognitive and behavioural disturbances [1][2]. 25.00 25.00 27.70 25.10 22.10 22.20 hmmbuild -o /dev/null HMM SEED 853 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.53 0.70 -13.40 0.70 -6.77 7 200 2009-01-15 18:05:59 2007-07-30 13:33:13 4 7 106 0 140 197 7 504.90 26 83.21 CHANGED P.uhtspsspC.ssshhshhhssuCs.splhpuh.hshssssssshps.ssssh...s.hs.phsu..hhph..suAs.phhhushoACphLuNhCVh.hashsu...ssCtLapplhpst.t.........hhpthPhLaYucssshhp.l.sshshshphshttp.....LphlsusYDlcGshlthpslt.p.lpLCspssschpshashGsshphsCplshpcLl...tptspshFa-lalp..tsspchh.hslsh.hpshphpstphppsp.........................uh.phhhpRRhaLhDslsts+cts.p..........ppPphlpsspplplsh.lsspsppcplhsPllhlpYush.h....................sssphsshohulp...aphspssh..lhh.lshslhssLshhsuhhRThsWhRRpts.....hlshtslh+FhlahsscluNhFhhhshhsuhYhhlhaKhQpss.hhh.....spp.hhhpshlhsAhAhKulthLhclhpQsshDhFhIDWERs+uphhtppch.................sPVShWRohFVANEhNcLQslRphsPLhphhhlLFFL.sLsa.phu.psP.sott.sh.shs.s...hLRhuLsohhalhluLl.hlhchthahRFh..pPlppFVDLCSlSNISlhlLs-ppaGYYIHGcSlHuHuDssME-hppNLphEups.hs.RGLsspo...csQTaplhhs.phRpah.hhh.p.ptcpppu.h+t.ts.t...........hshppp.psYsshshhlpshIscsh+.shchh..h.hsK.hhcphLshtPs.hhhps.tp................sthslFahD-shuaupshhhG.-hsLhlh.hhlasslDluspNhhlAhslsaslphlh+ahRhp.GhtNlSpKTLID-RFhI ..............................................................h..............................................................................................................................................................................................................................................................................................................t.h........................C.................................................th.h.........................................................................................h.h..........................................................h.+Rh.hhpt..........................h....ph.h..........................s.h.h.h.............................................................h..............................................h.h........................................................h....t..s.h.h............................................h..hh......h..ht.h.hh..hh.....phFhlDWEp.c........................................................................lshWR.hhhANta.clp.hp.hs..hphhhhhhhh.hhth............................................................h..h....hah.h.hh.hhh...hh......p.h.tpFhDlsshuNlSlh.h.p..aGaYlHGcu.hshu-ssh..........ph...h..........tts....RGL..tt........p.psa.h.....hp..h..........................................................................................hh.thh...................................hth........................................................................................................................................................................... 0 74 84 118 +9606 PF09774 Cid2 Caffeine-induced death protein 2 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4613) Family Members of this family of proteins mediate the disruption of the DNA replication checkpoint (S-M checkpoint) mechanism caused by caffeine. 25.00 25.00 26.80 26.70 20.50 24.50 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.03 0.71 -4.11 22 228 2009-01-15 18:05:59 2007-07-30 13:33:41 4 3 202 0 153 219 0 140.50 30 83.25 CHANGED Chs.stlcshLRh.Rp.lDDpIpppLNshhsssp............t.p.................t.ttCpphhcp.LhsuWpsRscllpaCtshusp.c.cssp............tpssptpcpthspRlDPYus+thpcE.pt.....psltphlpsEcsVEpIIRpRThplls-+Cth..pshp ........................Chs.tthp-hL+hhRs.lDDpIhppLNshhssup...hshc.............................sssppCcphhcp.LhsuapsRsclIphChs.ss..................................tpstp.+-ch.cpp.Dshsh+thtcE......................psh.hphlpsEhsVEpIl..psRohclhp-RCphp.p...................................... 0 44 74 119 +9607 PF09775 Keratin_assoc Keratinocyte-associated protein 2 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4615) Family Members of this family comprise various keratinocyte-associated proteins. Their exact function has not, as yet, been determined. 23.50 23.50 24.20 40.00 21.90 23.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.61 0.71 -4.60 8 113 2009-01-15 18:05:59 2007-07-30 13:34:05 4 2 96 0 73 121 1 122.90 45 90.56 CHANGED MAVsouTShsLSolLhhLlFusMQMY+sQLASSphhTIhGGFLGSLLFlh.LTAluNlEsllhG+GFQsKlhPEVVlshllALhAuGhVHRVClTTCLIFSlsuLYYlNKIS.phat...ssslstssspKpR ...............................uoGsShhLSuLLulllFushQMYpppLASoEhhT..IhGGhLGShLFlh.LTAhsNlEphlFGpGFQsKlhPElllsLhlALhAuGhlHRVClTTChlFShsuLYalNKISpphaps...ss.s.sh..ttt..tt....................... 0 22 34 52 +9608 PF09776 Mitoc_L55 Mitochondrial ribosomal protein L55 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4616) Family Members of this family are involved in mitochondrial biogenesis and G2/M phase cell cycle progression. They form a component of the mitochondrial ribosome large subunit (39S) which comprises a 16S rRNA and about 50 distinct proteins. 25.00 25.00 29.90 28.70 23.00 22.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.31 0.71 -4.50 8 98 2009-01-15 18:05:59 2007-07-30 13:34:58 4 3 79 0 60 104 0 110.20 39 80.83 CHANGED hLplLppsshpuss..s.p.hhsoshRssusRASloRl+RpsYuRLYPlhLVpPDGSTIpIRY+EPR+llphPlDL-sLSPEERRARlcKR+P..+pKlchp-El-DsFDsc+YhpFh.....+K .................................h....................phhhsshp.hsS.s+.AuloRl+RpsYsRh.YPslLVpsDGSTIpI.RY+EPR+llt...hPlDLssLS.EE....R+sRLc+Rcs..p..p.K.hc.h..p.p..El..pDsFcsc+Yhpahp+........... 0 18 22 41 +9609 PF09777 OSTMP1 Osteopetrosis-associated transmembrane protein 1 precursor KOGs, Finn RD, Sammut SJ anon KOGs (KOG4617) Family Members of this family of proteins are required for osteoclast and melanocyte maturation and function. Mutations give rise to autosomal recessive osteopetrosis [MIM:259700]; also called autosomal recessive Albers-Schonberg disease. 18.90 18.90 19.80 19.60 18.40 17.50 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.19 0.70 -5.20 10 112 2009-01-15 18:05:59 2007-07-30 13:35:37 4 3 86 0 64 112 2 211.00 35 74.45 CHANGED CpcLLtpFusspuchssChshpuhPV.+LCps..ChstYcsLpsh......YsNlpus...........................stpCucslLsSDRlplVsTlpshLss.lWppANC-sCls........pt..shsNcTtpFhshhsphhsChcp..p...........Nto-lCcsCKssYpcLNchYt+l-Kh.........ss..clClDlEDuMNpTRpLWS+TaNCs..Cp-sVs....lIAVuuhlLhLPllFYloSalpocpKc........R+LIhssRhpSssutsslp ....................C.thl.phupttuchhtChspt.uhPs.....plCps..Ch..atphhph......h..ssltps........................pstsCsc.lh..sD+hplVshlp.phhss.hWppAsCssCls.......................................ppt.t.....hoNsThhFhsh..hsphhsChpp...............p................s...............................Nh....oclCpsCcpsYpsLsshYp.chp+.hs.................ss..plClDlEDsMNhTRpLWS+sasCs..pscsVs.....lIAVushlL.hLPllFYloSalpocp+c..........RcLlhspphps.................................................. 0 23 27 46 +9610 PF09778 Guanylate_cyc_2 Guanylylate cyclase KOGs, Finn RD, Sammut SJ anon KOGs (KOG4621) Family Members of this family of proteins catalyse the conversion of guanosine triphosphate (GTP) to 3',5'-cyclic guanosine monophosphate (cGMP) and pyrophosphate. 28.10 28.10 28.40 30.60 26.60 28.00 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.40 0.70 -5.15 10 169 2012-10-10 12:56:15 2007-07-30 13:36:51 4 4 106 0 89 157 9 174.60 40 78.21 CHANGED VPHlpQtYsWDCGLAClLMVLctlshsspp..t-FpclCpc.thTpSlWTIDLAYLL++FuVcHpYaTpTlGANPsapscoFYK................cphssD.sRVspLFpcAcssGlsVcpRSVohpEIppHLtsGp.lAIlLVsAslLsC-lC....Khsh..shsptsasppscYpGHYVVlCGYDpssscFhYRNPAsSD.+lC....psShcsLEcARKSaGTDEDILLIa ..............................lPhlpQhapWDCGLACshMVL....phh.t..p.t.....phpph.hp......opS.lWTlDLAYLhp......+a............uV..p.apahT.TLG.ss.sYpspoFY+................cphsp-ppRVspLFtpAps.st....l..l.ppp.o....lohp-l..hLhput..hsIsLVstshLp..................p..h......h.....t.hst...p......ssYtGHalVl...pGYstss...tp................h.hpsPu.sc..php....................phs.psh-pARpuaGTDEDllhl........................ 0 32 49 66 +9611 PF09779 Ima1_N DUF2349; Ima1 N-terminal domain KOGs, Finn RD, Sammut SJ, Eberhardt R anon KOGs (KOG4623) Family This domain occurs at the N-terminus of the Schizosaccharomyces pombe inner nuclear membrane protein, Ima1. Ima1 interacts with other inner nuclear membrane proteins [1-2]. 21.30 21.30 21.30 26.70 21.00 20.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.19 0.71 -3.66 12 204 2009-01-15 18:05:59 2007-07-30 13:37:26 4 4 167 0 144 192 0 120.30 35 19.20 CHANGED lsCaaCspcot.shpst.p.WpC.pCEthNhhsEpG-..pD...Psttpsp.....pshtsssp.ss.p...........sspshFCspC.cNQplhhptLApa..hPss-cspYttY-cch.taR+pLEcpYPplCspCEs+V ....................................lsCaaCspp......o.hhsh..t..spsp.apCspC-thNhhpcsG-..ps...Psth.pph.sp.hs.ps.....s...s...tsssp..tt...................ssspslhCpp..CpcsQplhhppLAs..a..h.P.c.s.-s....................................pa-cclpsY++pLEppY..plCpsCpstV............. 0 46 68 110 +9613 PF09781 NDUF_B5 NADH:ubiquinone oxidoreductase, NDUFB5/SGDH subunit KOGs, Finn RD, Sammut SJ anon KOGs (KOG4632) Family Members of this family mediate the transfer of electrons from NADH to the respiratory chain. The immediate electron acceptor for the enzyme is believed to be ubiquinone, the reaction that occurs being: NADH + ubiquinone = NAD(+) + ubiquinol [1][2]. 25.00 25.00 51.20 40.70 19.60 18.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.26 0.71 -5.01 10 143 2009-01-15 18:05:59 2007-07-30 13:39:22 4 2 98 0 69 136 0 164.40 44 97.25 CHANGED MAuMSlL.puuAuhsApLsslhpussttsslppslsts+ss.ush.auGsHG++hFsIpPS+FYD+RFLcLL+FYlhLTuIPVushITaVNVFIGpAELAEIPEGYhPEHWEYYKHPITRWIAR.laDSPpK-YEKhLAhlphEsEKA-hRhhEtEVR+hM+ERGDGPWYaYcT.......l-KEhlDcu.KATPDs ................................................................................h..........th....p....t......h..s...tu.HG++hFhI+P.Sp.a..c+FhcLh+FYlh.LssIPVshhlohlNlFl.GpAELAEI.P................EGYhPcH.WEYaKHPIoRWIARhhasSPpcpYE+thAhlphEsEKAcl...RhhEhcVR+hMppRsDh.haaYps........lsKphhDp..c........................................ 0 19 25 47 +9614 PF09782 NDUF_B6 NADH:ubiquinone oxidoreductase, NDUFB6/B17 subunit KOGs, Finn RD, Sammut SJ anon KOGs (KOG4633) Family Members of this family mediate the transfer of electrons from NADH to the respiratory chain. The immediate electron acceptor for the enzyme is believed to be ubiquinone, the reaction that occurs being: NADH + ubiquinone = NAD(+) + ubiquinol [1]. 25.00 25.00 31.50 28.10 21.50 21.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.03 0.71 -4.37 11 120 2009-09-11 15:31:47 2007-07-30 13:40:18 4 5 86 0 65 126 0 126.40 38 83.66 CHANGED hoGhpP...............DE+lRLptL....RphR+pWLKDQELSs+EPVlsP....cphsPIc+Fapp.L-p......pssathhhhpsYRhslhplshlLlshahsHYYhKYcss......ppsatllppKsplhPG.......sslhEpG.s.Ps.t-assppa .................hsGhpP...............-E+lRl.pt.l....RphR+pWLKDQELSs+EPVlsP.......pths.Plc....+Fapt.Lc..p.........pssat...h...hh.htshptshhthhhhLlssahhaYYhKYphs......................ppsa.tllpp+.tlhPG.......spl.-pu....s..t-h........................ 0 19 24 49 +9615 PF09783 Vac_ImportDeg Vacuolar import and degradation protein COGs, Finn RD, Sammut SJ anon COGs (COG5073) Family Members of this family are involved in the negative regulation of gluconeogenesis. They are required for both proteosome-dependent and vacuolar catabolite degradation of fructose-1,6-bisphosphatase (FBPase), where they probably regulate FBPase targeting from the FBPase-containing vesicles to the vacuole [1][2]. 20.70 20.70 21.50 24.10 20.50 20.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.34 0.71 -4.78 20 333 2009-01-15 18:05:59 2007-07-30 13:41:13 4 8 229 0 222 328 0 167.60 39 52.54 CHANGED saLcsGspFpGpQpS.............ppppYpVcVpIcsVDht.........psaLsGaLpIpsLTs.paP....plTTaFEGEIIs.....................scasFhTp.......cWsAsp.........csDlpHWt+FPuF.+slsptttp.......................................tphpht-h.spcalFMRWK.EpFLVPD........tpl.cslsGASa-GFYYIsasp.....ssGsIpGaYYHts.uEpF..QpLpLpss.p.c ...........................hLhsGtpFtGpQpS.............ctptapVpVpl...ppVDht......................puaLsGaLcI.....pGLT-..caP.................slTTaFEGEIIs.............................pcas..FhTp.......pWsAsp..................................csDhpHWs+F..uF.pt.htpph.p.p.............................................................................................sshp....hpchh..sp.palFM..........R....WK..E.p.F..LVPD........................ppl.+slsGASasGFYYICapp........................ssGslpG.hYYa..p.Scha..QpLpLp.s.sp........................ 0 65 115 176 +9616 PF09784 L31 Mitochondrial ribosomal protein L31 Mistry J, Wood V anon Pfam-B_24102 (release 21.0) Family This is a family of mitochondrial ribosomal proteins. L31 is essential for mitochondrial function in yeast [2]. 20.50 20.50 20.50 31.50 19.30 20.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.54 0.72 -3.95 10 125 2009-01-15 18:05:59 2007-07-30 13:41:43 4 3 122 0 100 120 0 106.30 53 91.26 CHANGED TpPlhGGLLWKlPWRhSssQKtRQRcRLRuVD-VlcsLs................puLp.h+.sp..spppl.+hlsp...hPpEppMSPKDK.......................YTsFsKKs.....+GYRKGIHKVPKWT+lShRcNPptF ............Tssl.GGLLWKI..P..WRLSshQKsRQRcRLRsVDpVlcsls.......................pAL.t...+p....Gt......stcslpRh..htc...hP+EpEMhPKDK.......................YThFD+Kp.....KpYRKGIH...........KlPKWT+lStRhNP.GF.............. 0 26 57 86 +9617 PF09785 Prp31_C Prp31 C terminal domain Mistry J, Wood V anon Pfam-B_7665 (release 21.0) Family This is the C terminal domain of the pre-mRNA processing factor Prp31. Prp31 is required for U4/U6.U5 tri-snRNP formation [2]. In humans this protein has been linked to autosomal dominant retinitis pigmentosa [2][3]. 25.20 25.20 28.20 30.90 24.80 25.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.74 0.71 -3.69 28 315 2009-01-15 18:05:59 2007-07-30 13:43:11 4 7 273 0 230 308 5 129.70 42 25.27 CHANGED ss+hsKsLPhPp-.pspKKRGGRRhRKhKE+auhTEhRKhtNRMpFG....................ppE-shh..hspshGlGMlupssst.........+lR.....htphss+.........sps+hoKph...........................pppLpp.pstss..............GhsSSlsFTPhQGlEllsP .....PsKtsKsLPsPt-.ts.+K..KRGGRRhRKhKE+.huhTElRKttNRMsFG...................c.E--sht..t-hshGlGhlGpsssG.........RlR................ts.plsp+.........o+A+lSKph...............................................pppLpt...pshtssshs..............................uGhuSSlAFTPlQGlEllsP.................................................................. 0 78 130 191 +9618 PF09786 CytochromB561_N Cytochrome B561, N terminal KOGs, Finn RD, Sammut SJ anon KOGs (KOG4670) Family Members of this family are found in the N terminal region of cytochrome B561, as well as in various other putative uncharacterised proteins. 27.30 27.30 29.60 27.60 25.60 24.20 hmmbuild -o /dev/null HMM SEED 580 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.78 0.70 -6.06 9 196 2009-01-15 18:05:59 2007-07-30 13:43:26 4 6 121 0 107 172 0 407.90 32 88.01 CHANGED pss...........psPhlspslshphpptppphhLhhsllsl....ulluhlhh-hshpshhsahsl...shahhthslsulluLsslhs......aspaF+hlh..................sp-plshostQppLLu.lcspspt....ssupsspp.stspsphPsssSss.lph.p.shstSstpStSsu.hhosssssthps..ps......................p..u..s...s.uasosl.s.psSsstspht......SPhuhpp.sspcDhhT-p+hL-paLpphcc...phppussspsos..p.t...sousohhstupsssshupslhpp.hph...Ssuss.s+pchshssKchpu.......h-su.Esht+lu.....hsplppapupL..RtWlSpTlLpPLVpcIcoscpph+ppus...sslpIGplu................................lcpL+psA....................t.phps.hhPhLPhlh.aLDshoNQc.......................YLVpRIKELAcGoClssY+WsuGushpGc...........cWspcLPTDSsllhaLFCsYLD...............oQLsspPh.sG....scsFss+YlllsssKPsstp.......stAhslhlss.sP..PpFshla.D++la....sshpsRsNLFcsll.FlahlKscpsGhlcslNLGpSulNILsll-s ...................................................................................................................................................ss.............ltt.h.....t......lhhh...h......hsh..h.c........h.t......h.....h.h..hh..hhsh.hlh.......h.thhh.hh........................t...h.ho..p..hLt.h.............................................h........t.s...t..........................................................................................t................s......................ss....t.....tpth...t-.t.L.phhtt.pp.................p....................ss.hs.........st.ht..h.hh.hph...u.s.....p...pp.............s.cp................ts.-sh.t+ls......hsphppahsph...R.............Wlspsll..Llpclpphst.hpphsh.....pl..pl.G.psu.....................................................................................lppL+thh.............................s.lshl..h..L-hhspp.p.......................YlhpRl+-LupGsChs.sacWstGu...shpsc.........................pWs.pcL.PTDutllhHlFCsYLD....................................upL...sp.P.h..s.........s+sFs.spahh.ps.sc.sshhp...................pp.shh..lh.ss..s..P...P+apllh....p..c...clh.........ph.pGRsNhFcsllhFlahl+ppptGhlttl...slG.uulNlh.lh............................................................................................. 0 39 53 85 +9619 PF09787 Golgin_A5 Golgin subfamily A member 5 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4677) Family Members of this family of proteins are involved in maintaining Golgi structure. They stimulate the formation of Golgi stacks and ribbons, and are involved in intra-Golgi retrograde transport. Two main interactions have been characterised: one with RAB1A that has been activated by GTP-binding and another with isoform CASP of CUTL1 [1]. 30.00 30.00 30.20 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.82 0.70 -5.98 16 253 2009-01-15 18:05:59 2007-07-30 13:43:51 4 5 110 0 149 263 0 354.90 25 75.37 CHANGED -N..................ElspLsQstAollptpptsp-.p............p.s...s....................................psNpusucspKhsscp.........sppssoohpLsutscshspssusps.......pLAslKltLpEhstElcph+ppL-sh.ppcsp.hpscpcsppLppttl......coLp-+Lp-t-sslppcppshpptphtFlc+lschEh.hppLt.t.s.A.R+hscchpchs-hppplclh+ttscspctELhcY+.+Ap+hLQsK-KhIspLKptshhpuhp.t.ss.............hEL-ph+cEppp.p-ElppLptQIpp.h.EhpDhcsctsupscph+cpspclppthtsphoo....-s-.thhppEhtahcEshtpppsshpsRlp-R....psEhQplRspLos+s.psSu.s-lEsRLpsLTpoLlp+QshLE...pLosEKNuLslQhERlpp.L+t.....t.pssssopl.hphls.s-Ds+tR.lPlhhppssh-l.thht+h++AhpsIDshuIRlGhFLRRYPhsRl.lIlYhAlLH.....................hW ..................................................................................................................................................................................................................................................................................................................................................................................t.ht.............p...t..................p..h.................ht.........htt.ptt.hpt.h..............p.h...p.pt....psp.pl.ttt.............................poLpc+hp.h.cttlthp.ptphh.h.....aht+h.......p..h.c....hp.Lt.t.....A.....p.p.hhc..hp....chs-.....p..lchh+h.hcptc.tL.pY+...tph.lps.-ph.hp.LKp.sh................................................h...........tp-.th.p..-phphh.hQ..l.....ph.pphp....p..t.t......p..pp..pphp......htt.........h..pph..hp.pt..p.tsshp.php.+.........................-hphhhp..tlss...ps..p.p...s...p..-......l-........thhpp.hs.............................ls.h.p+pth.hthc+l.p.hp...........ttpts..s..p..h...th....p..s-..s....sc.p..h....shhhp..p.h.....sh...................slDp.h.........R..hsR..llhYh.hlp................................................................................................................................. 0 48 60 105 +9620 PF09788 Tmemb_55A Transmemb_55A; Tmem55A; Transmembrane protein 55A KOGs, Finn RD, Sammut SJ anon KOGs (KOG4684) Family Members of this family catalyse the hydrolysis of the 4-position phosphate of phosphatidylinositol 4,5-bisphosphate, in the reaction: 1-phosphatidyl-myo-inositol 4,5-bisphosphate + H(2)O = 1-phosphatidyl-1D-myo-inositol 5-phosphate + phosphate. 30.00 30.00 30.20 30.10 28.60 27.00 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.16 0.70 -5.25 7 173 2009-01-15 18:05:59 2007-07-30 13:45:18 4 3 87 0 106 163 0 224.80 49 91.93 CHANGED M.ADs..ERSPLLScspcGs.....s..u.sst.ht......sstP...tshsPhs....ss.h.uE.PPPYoshsSP-ouosPslsCRVCQSlIsl-GKhHQHVVKCslCNEATPIKNsPsGKKYVRCPCNCLLICKsTSQRIACPRP.CKRIINLGPV............p.uPsoPss..QPtGsRVhCGHCusTFLWsEhpspTL......................ARCPHCRKVSSlGptaPR+RslhhhllsllhllsusGLh...........................sGThphAppatGlYsuWshhllLsllsLsRuhYWhsh+lS ...............................................................t...........................................................................................tt.s...............sE.PPPYs.s.........tS.sss..u...uh..PhlsCRVCQuhIsl-GKh+........QHVVKCshCNEATPIKNsPsGKKYVRCPCNCLLICKsoSpRIuCPRP.C+RIINLuPs..........................p.tPhpPt....pP.t.usR.VhCGHCpsTFLas.php.p....o..L.......................A+CP.H..C+.Kl...............SSl....G.p.tasR+RshhhhlluhlhhhhuhG...Ls...........................hGThph.Appat.u.hYsuWshh..hL..lullhLhRuhYahsh+VS............................................................... 0 28 37 70 +9621 PF09789 DUF2353 Uncharacterized coiled-coil protein (DUF2353) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4687) Family Members of this family of uncharacterised proteins have no known function. 30.00 30.00 30.00 30.90 29.50 28.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.23 0.70 -5.17 4 117 2009-01-15 18:05:59 2007-07-30 13:45:48 4 2 80 0 70 102 0 264.50 45 66.32 CHANGED +pKLpSKs-ALhILtp-LEpsppERDtaKhhscpLp.chpshK+p.pEhph.shttGc..h..tp.............................+ppsLupLlpcsR-cNppLss-hp-L+pphtElptDhclLRpslsc.csuhpthsspcph...........cp+pcLlppL...E+h+cKsptLEpDl+SlhDEKp-VshERDtappKupRLNsELsalLsuDpp..Rll.DlDuLlhENRY.+p+lspLcEEhphh+tslsKYKshhEs.Kpppshl....KsG...ssspsuVhutKQV+-LLtSctsc...thslpstohS-L+uLssuLL-sls-KshALtHQ+psNKlLGsRlsELEpKltsL .........................................................M.AppLp.RapsLK+p.p-h........ts.......p.............................ppssLupLLp-op-cN+pLspElcpLpQRLsElQGDsK.....LLRhTlA+p..+lsc..p.pl.u.s+..phs...................tHERE-LVpQL......E+h+cphcpLcaDLpuslDEhp-lppERssYpsKscRL...NpEL...salL......uGccs............RIl.DlDALhhENR...YLpERlpplpEElpLhKpslsKYKs.hL-t.K...pp..Ks.h..........................K.t...soshssV.LSsKQV.....pplL.Spt.t...sLPhpspoloDLKSLssALLEslp-KNhslpHQ+pTNK.ILus+lsELEp+lptL................... 0 19 25 50 +9622 PF09790 Hyccin Hyccin KOGs, Finn RD, Coggill PC anon KOGs (KOG4688) Family Members of this family of proteins may have a role in the beta-catenin-Tcf/Lef signaling pathway, as well as in the process of myelination of the central and peripheral nervous system. Defects in Hyccin are the cause of hypomyelination with congenital cataracts [MIM:610532]. This disorder is characterised by congenital cataracts, progressive neurologic impairment, and diffuse myelin deficiency. Affected individuals experience progressive pyramidal and cerebellar dysfunction, muscle weakness and wasting prevailing in the lower limbs [1][2]. 25.00 25.00 26.80 25.20 23.00 23.70 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.93 0.70 -5.57 12 234 2009-01-15 18:05:59 2007-07-30 13:46:59 4 4 110 0 141 211 0 266.40 36 61.63 CHANGED spsphsshAssLppctulssAlapllpc..stu-Ll-PlC+QLa-hYpSsE.pLphFsLQFlP.LlahYLptssucc.p...usushEAlLLulYNhEl......sccGsuKllohpIPsLSpPSlYHEPps...hsho-suhtpp......................sh.+sVhSush.pp-slpAQNRhcllshLLhsYNupls.MPtsShhp.lCphsuplsspGa..........................tpt.h.......Rl.lsspFhlp.hpuhaaAhhNG.hshu.psl-slh.RAphEhhscslLluNuhctSL.tuu.spsc-Gphsl.hElp.ss.R....IsppslTuhSlRs++hpcc ..............................................s..sph.phAtsLhpctsl.h..s.ulapslpp..............sts...cLl.-PlC+QLa-hYR.S...u-...pLpp..FsLQFLPtLhasYLthssucshp.....................Ssu...slEAlLLulYN...hElh........................cccG..p..s...KsloFplPoLSp..PS.lYH.E...Ppsh.....hshT-sshtpc......................sh.+s..Vh.Sssh..pp-thsApN.RhcllshLhhsYNu.tlshMPssSh.s.lCphso...pl.sspG...a................................................................................................................tpp.h...t.....t.ps.Rl..ls.s...tFhlphlpu.l.aa.Ah...a.N.Gthp...hu.psl--l..hhRAphEhhspslL..Vs..NAhcsSL...s.s..tp.sp-.G..t.hslps.t...ls.ss.+....lsps..slTshSh+t++h.cc................................................................................... 0 34 58 95 +9623 PF09791 Oxidored-like Oxidoreductase-like protein, N-terminal KOGs, Finn RD, Sammut SJ anon KOGs (KOG4690) Family Members of this family are found in the N terminal region of various oxidoreductase like proteins. Their exact function is, as yet, unknown. 20.40 20.40 20.50 20.40 20.00 20.30 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -8.70 0.72 -4.52 26 419 2009-01-15 18:05:59 2007-07-30 13:47:32 4 11 343 0 263 401 6 45.70 35 23.27 CHANGED +sIAGVsVPs+Pp..EPDNCCMSGClsCVW-hYpDDlc-Wsp+pcpApp+ ..............................P..pP....-..P..s..s..CChSG..C.s.s.CVa-hYt--L.pcaptthtth...t............ 0 75 132 207 +9624 PF09792 But2 DUF2295; Ubiquitin 3 binding protein But2 C-terminal domain Mistry J, Wood V anon Pfam-B_45554 (release 21.0) Domain This family is of proteins conserved in yeasts. It binds to Uba3 and is involved in the NEDD8 signalling pathway [1]. This family represents a presumed C-terminal domain. 24.20 24.20 25.00 24.30 23.80 23.50 hmmbuild --amino -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.79 0.71 -4.43 24 100 2009-09-11 16:53:07 2007-07-30 13:48:38 4 4 66 0 84 103 0 143.10 32 44.30 CHANGED apFPHLIlPlcSouPspuhGTsasGpVoss......lSoIFNFDlPsu....sucoCoLsFhFPp.cth.susasFsGsGphsFspL...susssssTTasNsPshtpcluphsloP..GssYslso.FsC..PuGp..sluaEMssuGs.TpLsa......FpDasPs ..........................hpaPHLIlPlssssPspAhGTsasu....pVoss..........lSoIFNFDlPsu....sucsCoLsFhFPppp....th.ts..ssasFsG.......s...Gp.....lsFspL........suss.sssT.T.asN.tP.....ss..ppc.h...Gs..h.slsP..Gpuasls......o..FsC...PsGp..sluachsssGs..TpLpaFpshs.................................................... 0 26 45 69 +9625 PF09793 AD Anticodon-binding domain KOGs, Finn RD, Coggill PC anon KOGs (KOG4401) Domain This domain of approximately 100 residues is conserved from plants to humans. It is frequently found in association with Lsm domain-containing proteins. It is an anticodon-binding domain of a prolyl-tRNA synthetase, whose PDB structure is available under the identifier 1h4q. 21.70 21.70 25.10 22.70 20.30 18.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.01 0.72 -4.14 29 270 2009-01-15 18:05:59 2007-07-30 14:02:34 4 6 225 0 189 258 2 89.20 31 42.44 CHANGED sssslslpplppRhppslppt.........pppttphGtuVo.-uQplF-slt+T..hstspWsup....sIlVh-.-VpIssP.Ypsss.sp......sssspuhs...pVpKl ................................................lslpplppRtcp..slcpt.............pppttthu.s.GVS.E..uQplFcslpKT...hs....s+Wpsp....sIlVhc....cVhIssP.Yps-s..sp..........ussssu.ls....+V+Kl......................... 0 63 99 148 +9626 PF09794 Avl9 Transport protein Avl9 Mistry J, Wood V anon Pfam-B_12001 (release 21.0) Family Avl9 is a protein involved in exocytic transport from the Golgi. It has been speculated that Avl9 could play a role in deforming membranes for vesicle fission and/or in recruiting cargo [1]. 21.30 21.30 21.30 21.30 21.20 21.10 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.35 0.70 -5.98 23 380 2012-10-02 14:18:06 2007-07-30 14:08:51 4 13 238 0 281 533 2 300.40 27 50.61 CHANGED lhtlsVVDFHHp+GP.................clEahas.......tpspttsshWp.LPF.ALPDGuHsapE-FoaFsLh...................ssspstpTlFGlSCsRQIcuscL....hpRssDVTRSTVQKAVVVlucpP.IFG.l+-KLSllTpAaFtQcDFospcILcpha-sLpsp.hpshsspp......................tpsc.........halGL..sLRcllh+FR+phLlLaKhlLL-KKllhau.ssVEtLsshQhullSLlPsLlsp.LpDsu.......................................................................................................sPhhcshcps..lspssShcoSsRpS................................................hLcahGhPLpIFs..................+GuhasPYhPLQQlchL......ss.ss+uallGoSNsLhhpQ+cph.sDlll..........slDssplphh...sspLcphLpLSstD++ahDhllppVppshc-sp.pt............ta....................GS--aIRhQFE-YLhuLLSos ......................................................................................................hlhlVsFchthG..................tlEh.as.................tt.....pthp.lsahAhPDu.sHs..................t-..-.h..a.F.pL............................................................................s..t.ts.ttsl..aG...h.u..C..hR..Q...lc.s.p..tL.........h.+...-...lT.R.uhV.Q.K...u.lsl.....l.........uc..h...P..ha..uh...l.........p............t+Lp..l.lsp.s.aFtp.....t.........ph..t.p.h............p.l..l..h.pth.............................................................................................................hhsh....s..p.ph...l............p...ap..p.h....l.hl.hKhh...hLp.+h..................hh..h.......h......t...........h.......s.h..shh...ShhPt.hh....h.................................................................................................................................................................................................................................................................................................................................................................................t.hth.Ph.lF.t..................p.s.h.h.PYh.sL..hp.l...............................h....ahhGso.N.lh.....pptp....-hhl..........phc......t......t.......l.h.....p.tht..h.....o..Dh+a.hp........lht..h.....t...................................................Gu-talR.pht.Yh.thlt................................................................................................................................ 1 105 148 233 +9627 PF09795 Atg31 Autophagy-related protein 31 Mistry J, Wood V anon Pfam-B_60001 (release 21.0) Family Autophagy is an intracellular degradation system that responds to nutrient starvation. Cis1/Atg31 has been shown to be required for autophagosome formation in Saccharomyces cerevisiae [1]. It interacts with Atg17 [1]. 25.00 25.00 92.10 88.90 21.20 20.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.07 0.71 -4.53 5 24 2009-01-15 18:05:59 2007-07-30 14:14:04 4 1 23 0 13 20 0 177.00 53 99.48 CHANGED MEs...TlTVYD+Nltaphpscc.............hhssht.S.pGuspoMFPTNIKYIFEDDDDplsDss-h......pp.ss-lENVIIV-LDsoGoLENVELISDpYELLSF...p..pL.p.t.ho..pcuNDpsND........................IEL-VlSEF.sDLSss.o+DLuLD-LlKlYspQNcQL+plSDoL ....Mss...TVTVYDKNV+apL.cEN.pp......s..sthsscS+Ss..DGuchAMFPTNIKYIFEDssD-Ll....DooDt.........................splsD.ElENVIIVpLDESGSLEcloLISDQYELLSa...pphSLppNp.+ohsS+u-D.+uND........................IELDVlSQF.sDLSPh.L+DLSLsDLIKLYspQNEQLQhLSNSl. 0 1 5 11 +9628 PF09796 QCR10 Ubiquinol-cytochrome-c reductase complex subunit (QCR10) Mistry J, Wood V anon Manual Family The QCR10 family of proteins are a component of the ubiquinol-cytochrome c reductase complex (also known as complex III or cytochrome b-c1 complex). This complex is located on the inner mitochondrial membrane and it couples electron transfer from ubiquinol to cytochrome. This subunit (QCR10) is required for stable association of the iron-sulfur protein with the complex [1]. 23.80 23.80 23.80 41.70 23.70 21.70 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.87 0.72 -4.31 21 109 2009-01-15 18:05:59 2007-07-30 14:15:48 4 3 105 0 82 110 0 63.60 35 51.33 CHANGED paushohpshp......pausthuhaGuuAushshhFhutlP+h+pDlhpKlPhhGsaa.p+pIsPEDsPh ............huulohpphh......pausthuhaGuuAuhsslhFhuslP+lpcDlLpKIPllG....p.aa.p+pl.sPEDsP...... 0 19 44 72 +9629 PF09797 NatB_MDM20 N-acetyltransferase B complex (NatB) non catalytic subunit Mistry J, Wood V anon Pfam-B_12009 (release 21.0) Family This is the non-catalytic subunit of the N-terminal acetyltransferase B complex (NatB). The NatB complex catalyses the acetylation of the amino-terminal methionine residue of all proteins beginning with Met-Asp or Met-Glu and of some proteins beginning with Met-Asn or Met-Met. In Saccharomyces cerevisiae this subunit is called MDM20 and in Schizosaccharomyces pombe it is called Arm1. NatB acetylates the Tpm1 protein and regulates and tropomyocin-actin interactions. This subunit is required by the NatB complex for the N-terminal acetylation of Tpm1 [1]. 22.80 22.80 23.00 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 365 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.08 0.70 -5.74 41 303 2009-01-15 18:05:59 2007-07-30 14:19:52 4 15 265 0 219 313 2 352.20 20 40.32 CHANGED Wshaphhl......pushpl..................ppppsshtphp.h.p.ltt..................t.hsRsshLApL-lhthh..................ptpchtstlhp...YaccFtsKssCasDLppYl...tl....stp.phpphhpth.pt..........................ss.pphhpplsshcl....................................t..tthsppph.p........hhpphhptappshpht.pt...............p-hpsuD.chsLlusp.hllch..............pppts.phllpulslLEphlscsspNaphpLhLlplYhhL.GssshAhptappLslKplQh-TluHhlhsphsshtshstss............phhspshpaYpsspppssch.ltpuactssaspl.shhcFpc+LppShp+...hhhtl-phplptlh.ssphht....thpthsp................p..stpslsDsRDh .................................................................................................................................................................atha..hh......tushph.............................t.t.pt.p.thp.h..p....l.p.......................tt.hhRsshLApL....Elhpphht.t............................p.ssh....phhhp...YapcFss.KssCasDLchal...thL........s.p...phpph.hpp.lht.h....................................tshpth.pplsshpl..................................thhh.s.......t..p..hs.tpph.p....................hhpphhtt.appshphs.ps...............p-h.p.sD.thsLlAsp.sLlchh......................tpsss.sh......l..hpAlslLEp.h..L..ppo..spNhph+LlLl+lY..th..L.Gshshuhp..ha.ppLclKplQhDol.u..ahlhsh...htshs.hstss.......................................phhpt.h......hp...aa.psstp.cssch...lhtuachssasp..........l.chhpa.pp.+LppShph.......hhsthEphhhphhh.tsp..t.........hpth...........................p.th.Dpcs..................................................................................... 0 82 125 183 +9630 PF09798 LCD1 DNA damage checkpoint protein Mistry J, Wood V anon Pfam-B_41058 (release 21.0) Family This is a family of proteins which regulate checkpoint kinases. In Schizosaccharomyces pombe this protein is called Rad26 and in Saccharomyces cerevisiae it is called LCD1 [1]. 25.00 25.00 38.80 31.40 18.30 22.00 hmmbuild -o /dev/null HMM SEED 654 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.99 0.70 -12.96 0.70 -6.53 6 47 2009-01-15 18:05:59 2007-07-30 14:21:43 4 2 43 0 29 43 0 601.70 32 89.65 CHANGED MLRD+.................LphLppptcc-csppptphsplpscacpELpKL+ppLQ+LEDE+KFLlhEpRulssschps.....p.........p.h.ss-ussls.pupssus+p++cchp....hpphlsLs.s+llt..c-sSLFhD+lh.apIhGu-hosl-hLs+Iph-h.s-hss.cphlIsustPLGpuIpphLhphKpphpLDchVDpsLEsLAsLIKp.Ilhsp-spLulPFLlALMapslpFRsSAsSlpuLKDLF.FhsDLhhpFphlLKsPlHcSs.L-lclsPplFQYpllDpLsLhYSFDllEsshpl..l.ppssps.pphacE.hlhKsLhtshphsLTISaKsl..lNlIaShVEllhslssl...h..pssscslhssphWtslIo+Laplhp+plpsscla........hph..hhsFhGLpRshGsNssssLIcplIsppcl..........pulP........hlIp+-s.shst-s.....h...p.chEtWhlpL+pslssIhcpLlhpapcp.plsstEhLhphs+hlupEQthhhshhlstDopshthRhpLlphLlplIYhhWppapcplppphhh-sps.ELlhsLWRllasp.psps.tpp..h-hthLlsphcsLslcDppcha-Dsa--.sh.PtalcpELtsphsppstpthpspa-phhhEMA+pILES....hlohEEsDSLYluM .................................hLRsp.................lp.Lptp+ccEhph.ttphpphphpc........pclstLKpplQ+LEDEKKFLp.Eh+stoppcht..p.....p.................tsh.sssspss..s....pspsp.......osps+p.pchp........tpshhsls.s+ll...c-oSLFh-plh.HpIhGuchoTlEhLs+lpl-.hschph...cshhIsKttslupuIsphLht.hKKshpLDchI-phlpslssLIcc..lp.p..-spLAVPFLluLhapslpFRPSAspp.sl+chhhhlCDLlphapalL+s....slc-ss..hshcstPp.hQhpll-hhllhauhDlLEtllpl..hppastphhhphacc...lhpsh.hshh.s.o.paKsh..lNVlashVEllsh.soh.........shs.s...ss.pphhstp...shIspLhtlh.h-l.st-sa........................................sFaGL.RslGsNp.sthIsplI.p-ch..........pulP................plI.c-s...hspsp.................lshphEtaLLpL+.clhslh-sLlhhhts.thlhstEhlhphs+hluhEQshhhp...u.cS.slclRhpLIpphlplIahlhp-p.cplpp..hp...-s..t.-LhhsLhRlhhspspts..........ss.s.cp.hh-h...Rph.sthppL.slp-p..sphhpcthpch....spahp...t...Ehhtplppchuphhph.Y-pch.EhARpIL-s....hsohEEADsLYhsM............ 0 6 16 27 +9631 PF09799 Transmemb_17 Transmemb_17; Tmem17; Predicted membrane protein KOGs, Finn RD, Sammut SJ, Coggill PC anon KOGs (KOG4694), (KOG4502) Family This is a 100 amino acid region of a family of proteins conserved from nematodes to humans. It is predicted to be a transmembrane region but its function is not known. 23.30 23.30 23.60 23.80 23.00 23.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.24 0.72 -3.72 24 272 2009-09-11 14:19:48 2007-07-30 14:25:56 4 2 110 0 190 264 1 103.40 28 63.14 CHANGED hhLYhsshahshaalspllhhhhK......hhhhsshhhshtlshhllhsllEslRLhlGhtGNLpEcsstLshhhlLohhstlshlhahhhpshlLh..L-hslsslhlshhsh .........................hhhahNshahshahlsplhhhh.hK.......h..h..h.s...s.h.h.....h.........hh.tlsl....lllhhllEslRLahGhp.GNL.s.......Echs.Lsh.lhLThsstl.h.slaaL.L.h.p..shlLp.....lEhhlsslhlhhhh.h.......................... 1 55 75 129 +9633 PF09801 SYS1 Integral membrane protein S linking to the trans Golgi network KOGs, Finn RD, Sammut SJ anon KOGs (KOG4697) Family Members of this family are integral membrane proteins involved in protein trafficking between the late Golgi and endosome. They may also serve as a receptor for ADP-ribosylation factor-related protein 1 (ARFRP1) [1]. Sys1p is a small integral membrane protein with four predicted transmembrane domains that localises to the Trans Golgi network TGN in yeast and human cells [2]. 21.90 21.90 23.30 22.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.77 0.71 -4.19 27 331 2009-01-15 18:05:59 2007-07-30 14:27:17 4 3 259 0 233 308 3 132.70 30 71.10 CHANGED apps.hsPhhIlsQIlhLQshYYhshsllh.hhhstlsG.sho................L.-hlFsac............slchssshGhhlhhhalls.uLl................sslhLhhlVtRoKLsLDFAlTlHhlHLlhshlYo...pshPtshsWWhlplhusslhshlGpahChh+EL+sI.h ...............................................................................hsPhhIltQIl.hh.Qsh.aYhsh.slhh.hhhshl.hst.t.h.o...................l.chlFsa.c.................................l.ph..ss.s.Ghhhh.h...s.al.Ls.ulh................su.lh...Lh.h.llpRuKhshDFulTlHhlHLlhshhYs...tph.PsshtWWhlphsuhslhshlGpahCh.h.pELptI.h........................... 0 78 126 188 +9634 PF09802 Sec66 Preprotein translocase subunit Sec66 KOGs, Finn RD, Coggill PC, Sammut SJ anon KOGs (KOG4699) Family Members of this family of proteins are a component of the heterotetrameric Sec62/63 complex composed of SEC62, SEC63, SEC66 and SEC72. The Sec62/63 complex associates with the Sec61 complex to form the Sec complex. Sec 66 is involved in SRP-independent post-translational translocation across the endoplasmic reticulum and functions together with the Sec61 complex and KAR2 in a channel-forming translocon complex. Furthermore, Sec66 is also required for growth at elevated temperatures [1][2][3][4]. 21.30 21.30 21.40 21.40 21.10 21.20 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.03 0.71 -4.90 16 149 2009-01-15 18:05:59 2007-07-30 14:29:02 4 2 146 0 113 141 0 182.10 37 76.09 CHANGED hphlSlhTPLlYlulLlsSLhsFSshYRK++hpchupLcPhFs-ptsRclYhsLtch.-s.........................+lp-KVlKAALLRRusEsIRRslKL+EtcstlshLappGSlGDDlWpRFppttKhhEhEl+-llpEApshtPsWsQohFtsApEIshNpALRRRhssIpsRscpptchW-h+hs..psuhhhp .................................................h..hlSlhhPhhYlslLluoLhsFSshYRKR+stc........sp.l.tPaFss.ph..p....RslYhoLhchp.............................pls-pV..LKAALLRRAsEDI+R...llcl+ptKsulstLhQ+GSlG.DDlWpRFppAEKEhEtEl+DVltEANsht...P..uWGQhIFpoApEhsh.Np...hhRc+lpplppptppppchW-h+ts..pp............................................. 0 35 64 98 +9635 PF09803 DUF2346 Uncharacterized conserved protein (DUF2346) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4702) Family Members of this family of proteins have no known function. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.73 0.72 -4.13 6 116 2009-01-15 18:05:59 2007-07-30 14:29:41 4 3 106 0 83 121 0 77.40 29 84.11 CHANGED MGsWtLEluRMslYhTFPVAMFalFNQPEYFE-aVscpKRplaPPEpcpHRcclEchhcpl.....R-++-pcLL+thp.tEpKc .........MGs.pLElhKhulYlsFPlu.haa.h..h..Nps-hF...c..c...a.lh.....p........p+cc.la.PP.Epp.........p..t..........c..pElp..c...htc.ch.....ppp..cc.pchhcth.......ptt..................................................... 0 34 46 65 +9636 PF09804 DUF2347 Uncharacterized conserved protein (DUF2347) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4704) Family Members of this family of hypothetical proteins have no known function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.87 0.70 -5.13 36 201 2012-10-02 14:18:06 2007-07-30 14:30:16 4 7 172 0 144 318 1 266.40 32 54.98 CHANGED lFllpFDlKsGYslsWpcsh...sslpL-G.VEYKoLPSGlHpls-DllY.Fs........cc.ta.....hGlSsFhstsss.-ppR........ss+MhulGVLss.................s.thlspuWcasstLcphs.pphhpst....sshpsLppaa.....p..ph....tt..............................pttshspstshhsspphhsshHPshsLPphlcphGPLlFsLa+uuLLRKRILlh....sps.................PVctsCsa................VYslSlLSslPpslhshhssp....s.h.pPlFslGlpDl...shLtph........................uaIACToDpILthKscLaDlhVslssststpstt ............................................................................................................................................lFlhpFDh+.t.G.hl.Wp....hs...........s.....ls....L....-......G..V..Ea...K.....ShPS.....GhHplppDhlY.Fh................................cs..a.hGlusFhshss-.ctpR..........sA+MhuV..G.lLss..............................hsthaRahp.hLcphs.pphhps...........sphp..Lp...taa...........cp.ph..............................................................tht.s..h..s..sh..hh...t..ph.p.hHPshshsphlchFGs.IhsLa+.huLLRKRILIh....s.s.................................................PVt..sC.h...............................l.Y.s.h.....s.....hL.us.lshshhshh.sp........................+PhF....lsltDI....s..Lps.h.......................................ualACTT-cIht..KpcL.Y.DlhVshsssho.p....................................................... 0 40 73 111 +9637 PF09805 Nop25 Nucleolar protein 12 (25kDa) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4709) Family Members of this family of proteins are part of the yeast nuclear pore complex-associated pre-60S ribosomal subunit [1]. The family functions as a highly conserved exonuclease that is required for the 5'-end maturation of 5.8S and 25S rRNAs, demonstrating that 5'-end processing also has a redundant pathway. Nop25 binds late pre-60S ribosomes, accompanying them from the nucleolus to the nuclear periphery; and there is evidence for both physical and functional links between late 60S subunit processing and export [2]. 22.50 22.50 24.90 22.70 22.40 22.40 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.20 0.71 -4.10 41 300 2009-01-15 18:05:59 2007-07-30 14:30:41 4 6 257 0 202 285 1 141.20 26 58.48 CHANGED p++phtpp....scElsFDccsRpEaLTGFHKRKhpRpKcAQE.hcc+tR.t+hEERK+lR-ER+pchpctlcphccthp.lpcttsstcctpsspsppspt-...................s.t.......p.sspspptp.hDpcph...................................osVslEpl-s .................t......pt..thtplsFDccsRpEYLTGFHKRKhpR+.....Kp..Ap-phcc+h+.tphEcR++lR-ER+pp...h.p..c.....hl..p.....p.h...c..ct..h..p....hp...p...t....tsp.pp.......pttpp...p.p.p...............................................p...t.s.s.p.p..-.sth........................................................................................................................................................... 0 67 110 164 +9638 PF09806 CDK2AP Cyclin-dependent kinase 2-associated protein KOGs, Finn RD, Sammut SJ anon KOGs (KOG4713) Family Members of this family of proteins are cell-growth suppressors, associating with and influencing the biological activities of important cell cycle regulators in the S phase including monomeric non-phosphorylated cyclin-dependent kinase 2 (CDK2) and DNA polymerase alpha/primase. An association between mutations in the gene coding for this protein and oral cancer has been described. 25.00 25.00 25.00 26.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.36 0.71 -4.07 11 192 2009-01-15 18:05:59 2007-07-30 14:31:31 4 4 91 2 111 168 0 120.90 43 82.32 CHANGED MsYhsItss.Sphs.................ssTshPtssht..............................................................................suGSssosS.........................................................................................................................................ss.suuuhhpPlhSshs.PShG...ssssh.......oKYupLLuVIEEMG+-IRPTYuGS+SuhERLKRGIlHARhLVREC...LtETERsARp ................................................................................................................................................................................s.s......................................................................................................................................................................................................................................................................................................................s...ssu.s.h...p.lh....s..c.h.us...PShG.h.sp......sstss.........SKYu-LLulIEEhGK-IRPTYAGS..K..SAhERLKRGIlHARsLVREC...LtETERsAR.................. 0 25 34 69 +9639 PF09807 DUF2348 Uncharacterized conserved protein (DUF2348) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4723) Family Members of this family of putative uncharacterized proteins have no known function. 31.60 31.60 31.70 31.70 31.50 31.50 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.66 0.70 -5.27 5 176 2009-01-15 18:05:59 2007-07-30 14:31:58 4 4 121 0 114 164 0 204.20 28 87.66 CHANGED MFPELNsLLssoPDpsE........pGKlTLLCDu.KTDGSFLVHHFLSFYL+..AuCKVCFVALVQSFSHYSIVGQKLGVSLTsAR-+GQLVFLEGLKSul-llFp...su-sspPLpFLREussGsLcsLFcFVp-oL+..PusSuGs.WphPVLlVDDLSVLLS..LGVuAlAVLDFhHYCRATVCoELpGNlVlLVH-sE-AuDE-s-...lLLpGLSHQSHLIL+AEGLATGFC+DVHGQLcILWRpsSsSutpRuQoh..sYQYKIQD ............................................................................................................................up.hhll.p-t..pssuuFllpphLp.hL+............us..s...t.......lhhluh.p.shsHYp.lup+...l....G.hsLshtpc.psplsFl.-sLp.....ht..........h....ht...............t...t.ttsp.........t.hh.....ts.s.........s....t.L...p.La.p..lpptlp.....s.s.s.t.ts.............sslllDDlSlLhs.........hG..h.u...s.h.t....Vl....c...Fh....chsp.sls..h.p.....h..p.s..s..hV..........hLs+......ts.....tp.s.......p.cpst.............lhptLta.uplhlpspsLsTGhspDVcG.plpl.....c.......................................................................................... 0 38 55 87 +9640 PF09808 SNAPc_SNAP43 Small nuclear RNA activating complex (SNAPc), subunit SNAP43 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4746) Family Members of this family are part of the SNAPc complex required for the transcription of both RNA polymerase II and III small-nuclear RNA genes. They bind to the proximal sequence element (PSE), a non-TATA-box basal promoter element common to these 2 types of genes. Furthermore, they also recruit TBP and BRF2 to the U6 snRNA TATA box. 25.00 25.00 25.40 25.00 24.20 24.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.25 0.71 -4.63 17 185 2009-01-15 18:05:59 2007-07-30 14:32:26 4 4 132 0 126 174 0 174.70 25 47.41 CHANGED sl+pDhcpLLpcFtpt....coscFpsFpplWcchp...FpplFpG+ppssEhhtFsptlLhhshtYhhss.........pohppRluuLYhLYslY.pQ.sp.hhKIRlshpsapchpcaspphh....ppphh.-sshlhp+LhpcpAF+FsAh.phhs.shh+ph....p...hptpstpphhsstspspplhp.ph.....lpcLt.lcttYpchK ........................................hppDhctLlpc...Ftph...........pshpFpsFpclWcphp...Ft.tlapu.p...pph-..h..t.pFscphLthsh.p...ah.hss...................................................hohp.RluuLYhLYsLYpsQ........s....ps......h...............hKI+lslcsapplh....ch.pp.hh.......ptphh.-sshlhp+.L.h.p.pAFha...s...Ahs.phhs...t....h.pph.........p.....hh.th.....tph.th.p...t......hpth..hpttYtphp............................................................. 0 43 63 101 +9641 PF09809 MRP-L27 MRP_L27; Mitochondrial ribosomal protein L27 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4756) Family Members of this family of proteins are components of the mitochondrial ribosome large subunit. They are also involved in apoptosis and cell cycle regulation. 25.00 25.00 25.50 26.30 24.50 24.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.22 0.71 -4.46 16 274 2009-09-11 08:35:56 2007-07-30 14:33:14 4 3 242 0 196 253 1 95.10 31 65.91 CHANGED +sus++hshToKpGs+sa..hKG.............+GupshGhhspsG+alh.h-hV.palVPs.LpshcLKPYVSapsPplpps................hcsacpGhhcs-hh.chshE.spcG+l ......................................................................................thh.LToKpus+sa..YKG....................psspshGha.s..p.pGpYllshcKl.saVVP-....Ls...sF...+......LKPaVShpssthhpp............................................................................................ 0 59 103 160 +9642 PF09810 Exo5 Morph_protein1; Exonuclease V - a 5' deoxyribonuclease KOGs, Finn RD, Sammut SJ, Coggill P anon KOGs (KOG4760) Family Exonuclease V is a monomeric 5' deoxyribonuclease that is localised in the nucleus. It degrades single-stranded, but not double-stranded, DNA from the 5'-end, and the products are dinucleotides, except the 3'-terminal tri- and tetranucleotides, which are not degraded. The initial hydrolytic cut of exonuclease V on the dephosphorylated substrate produces a mixture of dinucleoside monophosphates and trinucleoside diphosphates. The enzyme is processive in action [1]. Exo5 is specific for single-stranded DNA and does not hydrolyze RNA. However, Exo5 has the capacity to slide across 5' double-stranded DNA or 5' RNA sequences and resume cutting two nucleotides downstream of the double-stranded-to-single-stranded junction or RNA-to-DNA junction, respectively [3]. 25.00 25.00 25.90 25.30 23.90 24.60 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.10 0.70 -5.20 13 244 2012-10-11 20:44:46 2007-07-30 14:33:33 4 5 193 0 168 260 3 317.10 25 67.42 CHANGED SPlcRF+p..ppslSVTDLsustWCElQhhYsLsc..hG+KccTtAM+pGsplHcpLEcElassVsVc.....V...TT+EDshuL+lhNhIptLppLpppG.............hsREl.VaGhl.cGpllsGlIDpLshcssc.phppp......p..h..........................................................clhloDsKTRtusolPo..psQhRsohlQL.LYp+hLschss...................................splshpplhscY.......sLDPpcsFossahsp.u.ht.......................................................p..t..hhcapsLpsLhphhhtphphp.LP...........................hlssphphcYcppsssp.....lluscpaha-.cslcthls-thsaW+GpR-scGV.cspEuWKCRhC-Ft-pCsWp ...............................................................................................................th.......phLsVTcL.hs.tWCEhph.Ysh.t..................hh.p....ts.s.........hctGpphHttLEp....Ela.p...ltl...............................l...poc.....E...Dshu..l+...h............h.Nhl.tl.pLhp.pG..................................................hsR..Eh.l..........a..G..hl.....c.................s..................hlsGlIDpL......p...h..p...s..p................................................................................................................................................................plhlsDhKT...R....t..p.........plPs...ps.t.h.c.s.splQl.hY+.hhhsph.sp................................................................tp.hsht.thh...pph.........tLs.sptshs..t..hht.p...............................................................................................................................................................................................................................................................hh.pht....s.L.tp.lhthh...t..hph..hs....................................................................................ht..hthpY..pts...................hspp.h.as.p.lpthlpp.htaWhGpRpspsV.........p-t...h...K...CphCca.tp.C.h................................................................................................................................................................................... 0 50 92 134 +9643 PF09811 Yae1_N Essential protein Yae1, N terminal KOGs, Finn RD, Sammut SJ anon KOGs (KOG4774) Family Members of this family are found in the N terminal region of the essential protein Yae1. Their exact function has not, as yet, been determined. The family DUF1715, Pfam:PF08215 has now been merged into this family. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -7.90 0.72 -4.42 82 669 2012-10-02 21:03:42 2007-07-30 14:34:13 4 24 410 0 356 588 16 38.90 34 17.48 CHANGED GYp-GhspGpppshpcGhp.Gapp.Ghp..hGhphGphtGhh .........GYp-GlspGpppuhp....cGhptGhpp.Ghp..hGhphGthpGh............... 0 93 179 275 +9644 PF09812 MRP-L28 MRP_L28; Mitochondrial ribosomal protein L28 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4778) Family Members of this family are components of the mitochondrial large ribosomal subunit. Mature mitochondrial ribosomes consist of a small (37S) and a large (54S) subunit. The 37S subunit contains at least 33 different proteins and 1 molecule of RNA (15S). The 54S subunit contains at least 45 different proteins and 1 molecule of RNA (21S) [1][2]. 24.10 24.10 24.20 24.50 24.00 23.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -10.82 0.71 -4.40 14 225 2009-01-15 18:05:59 2007-07-30 14:34:48 4 4 207 0 158 213 0 134.30 24 72.35 CHANGED hpApPh+KKKKlDP....pppttt+c+lc+plR.+hpKssppLpPl--hhss.p.lcpsppRp.thhclo.E-l-+Rsll.KpWupY+ppp+pt-hphlcphltuQpcALppLch.Ss-LYptAlts-su.hhPhchcsPstoPP............p.o+la ......................................................................................................l+...php.+.t.t....ph........Phtp.h.hs.....h.p...+p.t..lclut..-ch.++thlp+sWslapppp+ctcppplcp.hpu.pcAh-EL+.......t....S...............cLaptA.................t......-.........t...hhPh.p.h.phP...hPP...................................................................... 0 48 74 125 +9645 PF09813 Coiled-coil_56 Coiled-coil domain-containing protein 56 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4782) Family Members of this family of proteins have no known function. 21.10 21.10 21.10 21.10 20.70 21.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.07 0.72 -4.02 4 119 2009-01-15 18:05:59 2007-07-30 14:35:03 4 6 108 0 85 117 1 85.20 31 62.89 CHANGED MAt.......osKEGsA.aApRIDPo+EpLoPtQlcFMRplEhtQWpK..pht+hRsRNllTGLuIGulVLGIYGYTFYSVuQE+FLDEhE--AKAARt.uh.hppss .................................tt...h.............................................t.hp...php+........th.t.h.R..t+NhlTGLuluu.lVluIY.......uYThhuVpQEcFhD.....-.h..-c.hph............st..................... 0 21 38 62 +9646 PF09814 HECT_2 DUF2351; HECT-like Ubiquitin-conjugating enzyme (E2)-binding KOGs, Finn RD, Sammut SJ anon KOGs (KOG4784) Family HECT_2 is a family of UbcH10-binding proteins. 19.00 19.00 19.10 19.20 18.90 18.70 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.32 0.70 -5.73 28 316 2012-10-03 14:42:41 2007-07-30 14:35:36 4 9 236 0 227 300 2 298.30 16 76.93 CHANGED Ehhsplpshplhlphsspht........sls.sphplplsps....................shplpLPsclphsspshhththtst.............slphclplpptsss....................thsspslpsps........hplhCpsCppsllps........pshp................+lh-hPSEtWsphhD.WaCHpss.tstptt..............................ppLpPpps-.lhlGssahL..lppsphpphhhhtt...................t.lhCppCps.LG..........phsspsh+LaKhslph................t.ss..cph..pphlhtpllphlpspusc.............+Fhlpt..........sspphl.....hlWlhssshtlsp....stt...........................................tsspsuhKlLYp.......t..thhshhpss.slcplplP...stshpphhphLppsNshlPtshR..thss........aplu .........................................................................................................................................................................................h...............................h.h....h...............................hthth.h.t.........................................h.tpphpstp..............phhCppCtp.l..lpp........pph......................phhsLPStpW...tth..h.-.W....t.......CH.ss.tp..........................................ptlhs.ptsp.hh....lu.shhl..hp..pssh.p.t....................................................................................lhC.....t.pCt..lG..................................tt...tshcha.hhtl.h............................pp........pthhst.llph.ptpssh.................+hhlps....................ttp..l........h.lWlhssp.h.hh........s..........................................................................................................................t..tuhKlhYp........................t...............tht.....l..h.......h.thh..lttsp...hP.t.p......................................................................................................................................... 0 65 116 185 +9647 PF09815 XK-related XK-related protein KOGs, Finn RD, Sammut SJ anon KOGs (KOG4790) Family Members of this family comprise various XK-related proteins, that are involved in sodium-dependent transport of neutral amino acids or oligopeptides. These proteins are responsible for the Kx blood group system - defects results in McLeod syndrome [MIM:314850], an X-linked multi-system disorder characterised by late onset abnormalities in the neuromuscular and hematopoietic systems [1][2]. 20.20 20.20 20.40 20.30 19.60 19.70 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.30 0.70 -5.44 35 705 2009-01-15 18:05:59 2007-07-30 14:36:05 4 12 97 0 433 583 1 269.20 23 65.58 CHANGED c...hhhslhulhhahsDhusDlaluspYatpucahatuLsLsFlllsSlllQhhSahWappDhspsphp...............................................................................h.hhsllHlLQlGhhhRahcslphuhpsthpppppp............................hpthhhtpsDl...oMLRLlEoFLEosPQLlLQLaIhl...............................tp....ps..phhQhluhshShhSluWullsYp+s......LRtphsDKpphsh.husllhhlW+lhhI......................suRllulsLFuulhthasshhhhhhWllhh......hWshtpp.Ts.Ftp.ohshEhlaphlVGllhlFsaFN................VccupTRtRhhlYYshhll-slllhhl.h.....hhh+sshhsp.htlhlsssl....hssahlGlhhhllYYphhHPs ............................................................hhsh...h.p...thhh......ah..tt..p........h.h.hhsh.h...h...hhsshhhp...h.s.h..........ah...h...s.t..t.t.............................................................................................h.hph..h....h.u...h..h.........R......h..hpshhhhh......p..........ttt...............................hhhh..hp...ssh...shhplhpsaL.tos.PQLhLQlhlhl....................................................tp......tth.......hthh.hh.hShh.ulsashhthphh..............lp......hs...s....h........h...sh...huhl...h.hh.W+hh...l......................ssRhlsh.sLF.sshh....hhhhhh.hhhpahhhh................hWh..hhtt...sp....ht..............s...........h..................t.................hhh..h.lh.uhlhl.ash.hN.................................l.pcs.p.st..h+hhhaYh..............hhhhENhhhhhh.a..................h..h.t........................h.............................hhh.l....h.sahh.u.l.....hhhllaYt...hhHP.............................................. 0 122 154 274 +9648 PF09816 EAF RNA polymerase II transcription elongation factor KOGs, Finn RD, Sammut SJ anon KOGs (KOG4795) Domain Members of this family act as transcriptional transactivators of ELL and ELL2 elongation activities [1][2][3][4]. Eaf proteins form a stable heterodimer complex with ELL proteins to facilitate the binding of RNA polymerase II to activate transcription elongation. The N-terminus of approx 120 residues is globular and highly conserved [5]. 20.20 20.20 21.40 20.70 19.40 17.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.68 0.72 -4.09 29 307 2009-01-15 18:05:59 2007-07-30 14:37:57 4 8 211 0 216 291 0 104.00 31 29.65 CHANGED pYslhLGsphp.............scssps.....phhsl+YsFKPsSssssp..splpts.....pspphslsl................sssuspss..hsacGs..........spss..pppchlLlF...........DscspsahLE+lsosl.phphpRsss ....................aslpLGpohp........................cpsps.........tFpolRY..-FKPAS.lDsopt...uplpsu.........pssplslol.................phpuus..ss..slFcGs.......................p+sh..p...c-slLlh...........stc..otthhLE+Lsosl.pl++sRs..s....................... 0 60 99 156 +9649 PF09817 DUF2352 Uncharacterized conserved protein (DUF2352) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4803) Family Members of this family of uncharacterised proteins have no known function. 25.00 25.00 25.40 25.30 24.10 24.00 hmmbuild -o /dev/null HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.95 0.70 -6.34 9 120 2009-01-15 18:05:59 2007-07-30 14:38:32 4 4 80 2 74 113 0 412.90 28 87.48 CHANGED PspYhsslpst............h..tsp....schlalhc............................pspL-hpssshc-sh.pp.suls-........LSlcp.ht....................tslslt+ARpllshhpht+s..shu............slWlhCDGSD.ptTshLthEhst....psphhpGllhahssps......sh.hohpsLtppHpp+tt.uh.pspthuh.pha..........pspsplTlcloWssssp.....hLppsslu.stTlplc.t.h-ppuss.pphappLcFLlslt-slhohcsEhhpshpscus..ss.sph.ls-Lcpclsphhssssc.pspphspssst...tsh.phlpsRtshDhs-pLWshh+p.ssSYpDLptsFshlhQshp+usIts..pssssopLucLIpp.hpschshssLoGspPhchLLEIGL-KlhKDYhshFsppclsohNhLchhh..............................................................sophDtpEpshRlpKLaplhplL...pphLhlchphclh.haopsC.cYh+csPhs.pcla.......pl.lpsphlpchhpsccPhph+VphsSspt....+cVcTsa.hsscs.ls......schs.phpssspp.....ccthahhphlpspp ............................................................................................................................................................................................................t...................................slsh.pu+tl..ht....hhtps..shs.............slWhhCDuoD...tt.ThhLthph........sp.h..pGl.hhh.ssh.........t...p.p.tlhp.....a.tth..s..t..th...phh.................s.sth.lchpWts.st..................hp.ss.......tthhplt.......-.ps.h..thhppL....phLhhl...tcslh.shhp-h....sh....pt.psh........ht....t..hs.cL.pphp...t..........h.sphpt...t....h..tp.........th....h......Rt.hDhs-pLWphhpp..s.oap-lh......sh.phlhpshtpup.lts.hp.s.spshLuplIpp............hpt.pht..............s.LsGh.......PlphLlElGl-Klh+DYh.h.......ah.t.pchsshs..Lp.h......................................................................................ssth..s.pcth.+l...t+Ltplhthl...pph...hhp.thph....hs.p.s..phhpp..h..p.h...............pl.l.......th.l.....pp..h.hptppP..h.phph.st.........ppl.T.sh.hs.p..h.............t................................................................................................................................................................................ 0 21 27 51 +9650 PF09818 ABC_ATPase Predicted ATPase of the ABC class COGs, Finn RD, Sammut SJ anon COGs (COG3044) Family Members of this family include various bacterial predicted ABC class ATPases. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.29 0.70 -5.90 44 462 2012-10-05 12:31:09 2007-07-30 14:40:15 4 4 429 0 135 1778 323 421.10 45 75.58 CHANGED hppLpphLpplDs+sYtAYKplpGp.Ypas.....p...apLhlD+VQuDPFAsPScl+ltlstshs..................shstthh..psts+plAhpDals...Rphtpth...........thtssscoGhltlspsGQplLcRoushl...........................ssptl...clRhpVuLP....ApGRcIhu+tAhplLhptLPcllccslhhpth......stcplppalphscDQptLRppLtc.......tGLVAFVAsGuILPRcSGsS-+......Phc..sAVPFpSPc....oLc.lplpLPp.tGplpGMGIPcG.lTLIVGGGaHGKSTLLcALEpGVYsHlPGDGREaVlTcssAlKIRAEDGRslpsVDISsFIssLP..tGcDTs.pFoTpsASGSTSQAAslhEAlEsG..............upsLLIDEDTSATNFMlRDtRMQpLls+ccEPITPFl-+l+pLhcchGlSTllVs.GGSGsYhclADpVIhM-sYpstDlTtc.......A+clspp....hsstpttpstt.........sR ...............................................................................................................................h.ppLhphLppl-tpsYtAY+plcGp.....YcFs...................s........apLhIDHlQuDPaAsP.S+l+shhshphs.................................................sh....s......hh.....ps.ht.phA.hpDals...RtFtphh..........................t.....t........psutlpIstsGQplL-..RTulhh........................................spctl.........ElRhcluLP...............ApGR...sIh.u+pAtplLhptLPchlccuhhhc.pl.............stptLhppsclsEDQchlRppLpc.........................tsLVAFVAsGulLPRpoGssDh......Phc......sAVsFpo..Pc.......oLc..lshphPs....t........Gpl..p..G...M..G..I..Pc....G.ITLIVGGGaHGKSTLLpAL...Ep...GV...Ys...H....IsGDGREaVlTcssAhKIRAED.G..Rs.l.p.s.l.sI...SsFIspLP........hG.+.......DTs..pFS.T.p....s.A...SGSTSQA..AslhEu.lEuG......................................ussLLIDEDTSA.....T...N....F....MIRD...tRMQtLlu.+..pc..E...PI....TPhl..-RlppLh..cch....Gl.S.T.llVh..GGSGDYhD.V..A..DsVI..Mcs..YpshDV.TpcA+clstp.hsp.ct.t................................................................................... 2 67 107 127 +9651 PF09819 ABC_cobalt ABC-type cobalt transport system, permease component COGs, Finn RD, Sammut SJ anon COGs (COG4721) Family Members of this family of prokaryotic proteins include various hypothetical proteins as well as ABC-type cobalt transport systems. 20.80 20.80 21.30 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.69 0.71 -4.19 51 1105 2009-01-15 18:05:59 2007-07-30 14:41:44 4 2 991 0 157 535 10 127.90 35 65.84 CHANGED lhDIllsulluVshGlla.hsashlhsslsshhs...hsPhhpsl.................hhGlWhhuusluuhllRKPGAAlluEhlAAhlEhllGupaGst.slltGllQGLGuElsFA..lhtY++ashhshhLuuhsuulsshlh- ...........................................pDlllhuhlulhFGlla.hshshl.Y.shlpshht...htshhppl..................hhGlW...h.MAAslAulll.KPGuAlluEllAAhs.Esl.h..G..u..paG.ls.slluGll.QGLusElsFh..lh+Y.+.phshhshhluuhssslsuFsh........... 0 45 96 131 +9652 PF09820 AAA-ATPase_like Predicted AAA-ATPase Mistry J anon Pfam-B_5377 (release 16.0) Family This family contains many hypothetical bacterial proteins. This family was previously the N-terminal part of the Pfam DUF1703 (Pfam:PF08011) family before it was split into two. This region is predicted to be an AAA-ATPase domain [1]. 26.10 26.10 26.10 26.10 26.00 26.00 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.03 0.70 -5.24 84 1607 2012-10-05 12:31:09 2007-07-30 14:44:05 4 10 265 0 256 1536 96 241.50 31 54.30 CHANGED PIGIQsFcclRpcs...YhYVDKTthIhpLhpsusha.FLSRPRRFGKSLhlSTLcsaFp.....Gp+cLFpGLhIcc.pp.........tW....tpYPVl+lshsstph.pohppLpptlpphlpphtptashthp.............................cpshssphttlIcphhcppGppVVlLIDEYD+PlLpslp.st......p...........hhcchRp...hL+sFYuslKs....t-saL+FshLTGVoKFupsulFSsLNNLpsIohsp..pYsslCGhT-pElcphhp..hpths.....shcthhpcl+chYDGYpFs..s.....slYNPFslL.hhpp .......................................PlGlpsFpcl..h...pps.ahYlDKTt...h...l.........p....l...........h....p....p...s..p....h...hhoRPRRFG.K.oLhhoh...Lc...taFp...............................tpccLF.p....s....Lh..I.tp.....p..........................h......s......pY..PVlh...ls.hp..shph..tsh...p..p....h.p.lpth.lp.th.p..ath.hp.........................................ptshttphttlhphh.hpp.h.s.pplllLlDEYDpPh.ps.ht.pt................................hhpphhp..........hh+.s.h.a.sshhp......tsthlc..hshlTGl.+h...s..p........ulF......Ss.......LNp.h..p.s..h....o....h.s..pa..s.p.hhGhTcpElcphhp...................thpt...hppl+phYDGY....pFs..........tlYNPasll.hhp.p..................................................................... 0 142 228 252 +9653 PF09821 AAA_assoc_C ABC_transp; AAA_36; C-terminal AAA-associated domain COGs, Finn RD, Sammut SJ anon COGs (COG4754) Domain This had been thought to be an ATPase domain of ABC-transporter proteins. However, only one member has any trans-membrane regions. It is associated with an upstream ATP-binding cassette family, Pfam:PF00005. 21.30 21.30 21.30 21.40 21.20 20.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.28 0.71 -10.04 0.71 -3.84 49 413 2009-01-15 18:05:59 2007-07-30 14:44:22 4 2 357 0 152 349 21 118.50 38 29.96 CHANGED spLAcclpl-lD-LhPlsEuhplLshAclpsGDlpLTstG+pascuslpcRKclFsppLhphlPLsspI+phLcccsscphscpphhctLccahssptAccsLcsllsWGRYAElatY.D- ...............PcLAcpLpl-lDDLaPlsEsLphLtFAcl..c.c..G..DlhLTshGcpFs.cus.h.pERKtlFucpL..lchVPLsspI+c.lLc-.+..s..s+pssppRFtpcL..E-...a..h....o..pptAccsLcslIsWGRYuElFsYD........................................... 0 38 81 117 +9654 PF09822 ABC_transp_aux ABC-type uncharacterized transport system COGs, Finn RD, Sammut SJ anon COGs (COG3225) Family This domain is found in various eukaryotic and prokaryotic intra-flagellar transport proteins involved in gliding motility, as well as in several hypothetical proteins. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.62 0.70 -4.94 88 807 2009-09-11 15:43:23 2007-07-30 14:46:11 4 13 632 0 316 818 488 225.20 19 41.63 CHANGED RhDlTpsphYoLSpsocplLppLcp..Plplphahss................hpphtsplcclLccYpthus.c...lplcalD..P........................ptssttpc...t.Gh.s.....................phasslll...phsscpphlshhs...................t.tthEhplopul...pclsps.....p................................................................................................................................................ptplshlsGpuEh.............................................................................................................................................sttthsphhppLppp..a..plpslsLs.................tspl..PpcsclLllssPpp.ls-pphhtlcpalh.pGGcllhhl-sh.t.............s......................t.sLss...LlppaGlplssshVhD .....................................................................................................................................................................................................................................................................................hDhotpphaolo.to.phlp..p.lpp...............sltlhhh...hp..........t............................h.tth.pph..l..ppat....t...h...st..p....lphphhs.....s................................t....tp..............................................................................hhh.....t.t....t.p.thl.h.t.........................................Ehpls..psl....plhpt.......t.....................................................................................................................................................p.hl.hh.hpupsph...................................................................................................................................................tt..th......t......ph......tptLp......pp..a...pl.....p..p..lslt....................tppl...psss....l.lllssPpp.hsp.....pEhptlcpalt.pG...G....p....l....lhhhssh..t...........p....................................sls..ll.ppa.Glph.psshlh......................................................... 0 156 231 275 +9655 PF09823 DUF2357 Domain of unknown function (DUF2357) Waterfield DI, Finn RD anon COG1700 Domain This entry was previously the N terminal portion of DUF524 (Pfam:PF04411) before it was split into two. This domain has no known function. It is predicted to adopt an all beta secondary structure pattern followed by mainly alpha-helical structures [1]. 20.60 20.60 20.60 20.60 20.40 19.90 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.57 0.70 -5.28 22 202 2009-01-15 18:05:59 2007-07-30 14:48:20 4 5 189 0 66 203 17 224.80 20 32.58 CHANGED tGplsFsNclGhsphtlt.ttp....thhplphEVhssKls....app-actlLpp.....lscphsplhhphhppT.hphpts.ptstss.h.hashlcp......hhpsL.pulctIhppPHppL.pppphh+ssplpchssthhttltcpst.......................hstphhspphhptcpchohDT.ENRFlKahLpphppclt...plpptltptttphttt.....hhpplpphtcplpchLppsha+cVGphpthsspShVLpptsGY+-la+ha ..................................................hph.s.hG..ph.hh.ttt.....t.hphth-VhstKhs.....................hpp-apthlp-.....ls.p.h.tlhhphhpt.o.h.t...ht......t.t...tss....hh....thlpp........hpphhpulppIhppP+ppLhpcpphhpsccl+chsstshp....ltccsthh......................tpphh.spphhssp..+..phohDThENR....FlKahlp.p.lhcplp...........pl.hp.tl...pt...t.pp...t...........................hhppl.ppht.phpphhp.p.shhptlup.hp.t.s...S.llp.t.GYpphapha....................................................................................... 0 30 48 59 +9656 PF09824 ArsR ArsR transcriptional regulator COGs, Finn RD, Sammut SJ anon COGs (COG4860) Family Members of this family of archaeal proteins are conserved transcriptional regulators belonging to the ArsR family. 23.90 23.90 23.90 150.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.84 0.71 -4.91 12 29 2009-01-15 18:05:59 2007-07-30 14:48:53 4 1 28 0 26 33 9 160.40 40 94.30 CHANGED lNDPu-LVPLhpsFsScs++KlaptLsstWhTct-l--hhGp-s..pcuLplL+KusLlEoQWRhP.csGppPpKEYHosYS+VQsNF.sShpDLuDlIhlsFhs.--lc-hh-clcphlcp.GssSlusloRsLshsPhaI+ulA+RSptLsV+GQ+lcllc- lNDPu-LVPLLpsFsScsaK+Va-tLoppWhTcpELc-hh.Gc-s..pcuLplLKKuuLlEopWRMP.cPGppPpKEYHooYo+lpANFQCohcDLu-lIhlshhsDE-l+-hh-clcctlcs..GNsSlssLoRphslSPhaI+ulAKRSppLsVKGQ+lclsc... 0 7 21 23 +9657 PF09825 BPL_N Biotin-protein ligase, N terminal COGs, Finn RD, Sammut SJ anon COGs (COG4285) Family The function of this structural domain is unknown. It is found to the N terminus of the biotin protein ligase catalytic domain. 24.90 24.90 24.90 25.00 24.80 24.80 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.17 0.70 -5.67 13 349 2012-10-03 00:28:14 2007-07-30 14:49:48 4 13 294 0 194 322 23 253.10 28 58.70 CHANGED MNVLVYsGsGoTspSV+HslpoLRhLLtPaYAVssVsspsLtpEPW.spsALLVFPGGADLPYCcsLsus..ss++IpcaVpp.GGsYLGFCAGGYaGSuRsEF-lGDPshEVsGsRELuFFPGssRGsAFpGFpYsSEsGARAspLplsp........tssspFpsYYNGGuVFl-A-Ka....sNVElLAcYsEcs-lsssps...........pAAlVas+VG+GpslLTGsHPEFssp.Lpptsst......thptll-pLctp-psRhpFh+tlLpKLGLc.............lspsss.sssPsL.......Tslalsupss.......splpchhssl..p-.sspsssp....hlcsp..sDpFphtcstp....................................................s..sut.......pssphtD.ccssKpllh..pspslPspchoPpFDhctYassL .......................................................................................................lhlYss.G.ss.......sl...c...psl.tL.+....th.ht...s...p..h...t...V.....l...ss.p...l.h..pps.W..t.p.ss.LL.VhP.......G.Gs.D.l..s.Y.s.psLs...st...................GsppIc.paVpp.GGsaL.GhCAGuYaustt.....hpFt.s.....s....thtl...upRp.LthasG...hstG..sh...h..t.s..a.Yp..op...tGs.p...s....s..l.htt.............t.h..aa.N.GGshF..ss.ph......thpllApYtp.................................sAhl.pp.......h.G.....c..GtslLoGsH.Eh..................s....t........hp....................................................h..hh.......................................................................................................................................................................................................................................................................................................................................................................................... 0 71 117 161 +9658 PF09826 Beta_propel Beta propeller domain COGs, Finn RD, Sammut SJ anon COGs (COG4880) Family Members of this family comprise secreted bacterial proteins containing C-terminal beta-propeller domain distantly related to WD-40 repeats. 22.40 22.40 22.40 23.20 22.30 22.30 hmmbuild -o /dev/null HMM SEED 521 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.78 0.70 -6.16 28 199 2012-10-05 17:30:43 2007-07-30 14:50:25 4 9 169 0 97 204 204 474.40 25 72.41 CHANGED cYSsTNlQVtGVDEuDlVKTDGphIahh.s.s..............splhllcuhP.......s.pph+llup..lshsup....pcLaL..psc....pLllls.....st.t........ss...................s..h.tshspshlYDISD.ppPclltplplsGsYlsuRhlsutlYlVssphsphh..................................h.t.p.pshhPthhss............hhhsss.hahPs.sthssa...s..slsulsl..sssp.sssssllG.su.splYsSpssLYluhsphh...h...........................................................................pt.ttpppTtla+Fslss...sclpahusGpVsGpl...LNQFShDEap.GhLRVATThsp........................................................hptpstspNslalL....D.psLchlGclpsLA........GEcIYSsRFhGDpuYlVTF+plDPLFVlDLS-PppPclLGELKIPGaSsYLHPls-s+LlGlGp-sspp.............s..p.GlKlSLFDVoDsssPpEhsphhltppsspSssht-H+AFhacpp+...sllulPls.....................h.hpshhlacls....ssGhshhuplsptsss...................lhRulYlsD.....hLYTlSpstl+shshsshc ..................................................aSpTNsQhtGVDEuDl..VK.o..DGphlahhs..t......................................pplhlhcs.s........................pphphhuplphttt.....................plal....pss.................pLllls.....pt...............................................t.sthhhaDlu.....s.t...p...Pphhtphphp.Gp.h.....hs..uRhhs.shlYllspp...h.......................................................t...t.hhP.http....................hh...tp.hhh........st.......t..sh.........h..hlsuhsl.........pssp...hs.t...sh..hG...ss..stlYhStp.slYlstpth..........................................................................................ttttTtlh+hslps....sphphhupspVsGhl.....hspFuhDEas....GhhRlsTTht.............................................................t.ttpspNslalL........D..pshphhGpl..p.slu........s.Ep.IYusRFhGcpuYhVTF+...psDPLFslDLpsPppPpllGtLK..IPGaSsYLHPh...s-s+llGlGh-stt...................h...GlKlulFDVoDsss..Ppp.htphhl......t...p...ss.S..pshhs..a+..Ahhh...st..pc............slhshPhs..............................hpshhl...apls......ppG.hphhtplph.s.............................................h.Rslals-.....hlYslotthlps.shts......................................................................................................................................... 0 56 76 86 +9659 PF09827 CRISPR_Cas2 CRISPR associated protein Cas2 COGs, Finn RD, Sammut SJ anon COGs (COG3512) Family Members of this family of bacterial proteins comprise various hypothetical proteins, as well as CRISPR (clustered regularly interspaced short palindromic repeats) associated proteins, conferring resistance to infection by certain bacteriophages. 21.10 21.10 21.20 21.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.64 0.72 -3.84 233 1845 2009-01-15 18:05:59 2007-07-30 14:51:40 4 3 1369 9 558 1538 61 82.80 23 82.31 CHANGED M................................hhllsYDl.....ssc.......+ths+ltKhhcpaG.pplQhSVF-s.plspspht.plppclpchls...pp...Ds...lphatls...ppp..hpphthh ...........................M.......hhllsYDl.............spp............+thp+ltKhltpa.G..th..lQ..h..SVaps...pls.sspht..plhp..c.lp..ph..ls......tp....Ds...lphhpls..ppp.htp....hh................... 1 248 404 499 +9660 PF09828 Chrome_Resist Chromate resistance exported protein COGs, Finn RD, Sammut SJ anon COGs (COG4275) Family Members of this family of bacterial proteins, are involved in the reduction of chromate accumulation and are essential for chromate resistance. 25.00 25.00 31.90 31.90 20.60 19.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.61 0.71 -3.97 46 307 2009-01-15 18:05:59 2007-07-30 14:52:12 4 5 224 0 121 292 27 131.70 46 51.68 CHANGED WsTRpRP+lDRlACsWLIRRFIDspAcFlaVsssp....us.hsAlsFDhcG..staoHh....G-pCoF-shlccFuL.ssPuLt+LAtlV+uADsu.phshsPEAuGL.Al.hGLSppht.DDpphLpsuhslaDALYtahc ..........WsTRpRPtlDRlACsWLIRRFIDspAcFLalsssp....ss.......tsAlsFDhcG..stao.Hh....G-tsoF-shlppFuL..psPALh+LAtlV+u.....h..Dsu...hshsP.EAuGl...Al.hGLpct.hp.DDpphLct.uhslaDuLYsah.................... 0 33 66 87 +9661 PF09829 DUF2057 Uncharacterized protein conserved in bacteria (DUF2057) COGs, Finn RD, Sammut SJ anon COGs (COG3110) Family This domain, found in various prokaryotic proteins, has no known function. 21.80 21.80 22.60 22.30 20.90 21.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.04 0.71 -4.63 79 1101 2009-01-15 18:05:59 2007-07-30 14:53:04 4 3 811 0 143 559 26 179.40 33 83.49 CHANGED ssLslsps....lclLsls...t.phpsshhpppps....lsLssG.ppQllhRapshh.......p.psssp.phhpSsshll.sFssss.pslpLphPc.hpshppAcp.Ftc.sPphpLts.ssupslshpp-hL.htsGhphspshtpplspYNt.sussAuh.......................ssssssssssssssssss.............................ttspstphLpaWappAspps+cpFhpW ......................................h.TLclsss....l-LLsls...GpK...l...u...uuL..h....cuscs....lcLcsG..aQlVFRhpchl.......hsss-c..plahSsPlll.oF.ss.p...pplshphPc.lcs...t+.-Ap+..F..st.sPplpLlD.ssupsls.lchD...hL.thsu..h..th..s.hsa-h.-...spcYNp.uuttAul..........................................sthAs.hh.ss.s.sslhssss...............................shsstupshsEppLcaWaphADspTRppFhpW...................................... 0 20 49 97 +9662 PF09830 ATP_transf ATP adenylyltransferase COGs, Finn RD, Sammut SJ anon COGs (COG4360) Family Members of this family of proteins catabolise Ap4N nucleotides (where N is A,C,G or U). Additionally they catalise the conversion of adenosine-5-phosphosulfate (AMPs) plus Pi to ADP plus sulphate, the exchange of NDP and phosphate and the synthesis of Ap4A from AMPs plus ATP [1]. 19.40 19.40 20.30 23.30 19.30 17.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.82 0.72 -4.17 64 315 2009-01-15 18:05:59 2007-07-30 14:53:45 4 8 241 0 200 306 96 64.90 31 19.94 CHANGED sYNLlhTccahhllPRsppph...............tslulNuhGauGhlLl+sppphphlcctssh.....plLppluhs ..sYNllhTccahhllPRpppph....................tslu.lNuhGa.uGhlLV+scpphc.hlpcts.sh......plLpplGh......................... 0 47 111 171 +9663 PF09831 DUF2058 Uncharacterized protein conserved in bacteria (DUF2058) COGs, Finn RD, Sammut SJ anon COGs (COG3122) Family This domain, found in various prokaryotic proteins, has no known function. 25.00 25.00 25.50 25.40 21.30 22.20 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.97 0.71 -4.38 44 851 2009-01-15 18:05:59 2007-07-30 14:54:44 4 1 801 0 133 459 78 171.30 57 94.89 CHANGED SLQ-QLLKAGLlscKKsKcscppp..+Kpp....+pp+cs......sttssp.+ttlpcs+tpptc+D+pLNcp+ptctcpKuhtAQl+QlIcts+lsp.tpG-lsYNFsD..ssKlKclaVspphpcpLspGpLAIsph......pssYtllPctlA-KIspRDsshllhhss...pspst.....-EDDPY.....A-atIPDDLMW ..............................TLQEQLLKAGLVoSKKhuKVpRTA....KKSR..........VQA.REA...........................RtAV.EEN.KKAQLERDKQLSEQQKQAsLu.KEhKAQVKQLIEMNRIsl....upGD...I.......uFNFTD..sNlIKKIhVDKLTQsQLIsGRLAIARL......E.scYAIIPAuVADKIAQRDAsS..IVLpuA.......lStEEQ.......DEDD..PY..AD..FKVPDDLMW................. 0 25 57 98 +9664 PF09832 DUF2059 Uncharacterized protein conserved in bacteria (DUF2059) COGs, Finn RD, Sammut SJ anon COGs (COG3184) Family This domain, found in various prokaryotic proteins, has no known function. 23.60 23.60 23.70 24.40 23.50 22.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.77 0.72 -3.95 125 777 2009-01-15 18:05:59 2007-07-30 14:55:24 4 3 654 3 230 656 55 63.20 22 31.68 CHANGED shschppth.hshaspph..opp-lpplhsFYpSslGp+llptpstsppp.hp.thpth....spphhspl ................t..tphppth.hchYtcta.....oppElcslhsFYpSPsGpKllpppstltpc.ht.hhpth....htth....h............................... 0 52 123 178 +9666 PF09834 DUF2061 Predicted membrane protein (DUF2061) COGs, Finn RD, Sammut SJ anon COGs (COG3205) Family This domain, found in various prokaryotic proteins, has no known function. 21.40 21.40 21.50 21.60 21.30 19.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.46 0.72 -3.77 64 547 2009-01-15 18:05:59 2007-07-30 14:56:55 4 5 401 0 140 400 941 52.50 45 66.49 CHANGED hhKTloatllthslshsVuYll.....TGslhluuslAhlEshsphlsYaFHERlWp+ ....hhKTloFuslHFolAFsVuYlL........TG.slhlGuhlAhlEPslNTVuaYFHEKlWp... 0 40 84 119 +9667 PF09835 DUF2062 Uncharacterized protein conserved in bacteria (DUF2062) COGs, Finn RD, Sammut SJ anon COGs (COG3216) Family This domain, found in various prokaryotic proteins, has no known function. 21.50 21.50 21.60 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.86 0.71 -4.61 145 963 2009-01-15 18:05:59 2007-07-30 14:57:42 4 4 883 0 327 819 535 150.40 24 77.18 CHANGED ahR....hhthhhpc.t.hhphs.+ulAtuhAlGlFsu.hhPh..shQhllAshlAhhhRu.......Nlslulhssal.oNPlThsslah.hsYpl..GshlLutss...........................................hth..hph.pWht..............htslhhPhllGullhullsuhluYhlshhlh....+hhhp+c...........h..pp+ .................h..+..ht.hh.hc.h....hhphs.+p....lAtuhAlGlFhu..hhPh....shp..hl..l.u.hh..l.A.h.l.h...+.s.......N..lshu.l.hs.s.hl..sNP.l.Ths.sl....ah....hsYpl...Gphlhstss...........................................hth..hp..hpat.th.............p.htslhhshLlGsh...lhullh...ulluYhlsthlh....chhht+php.................................. 1 115 241 287 +9668 PF09836 DUF2063 Uncharacterized protein conserved in bacteria (DUF2063) COGs, Finn RD, Sammut SJ anon COGs (COG3219) Family This domain, found in various prokaryotic proteins, has no known function. 22.50 22.50 24.50 23.80 21.40 19.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.72 0.72 -4.00 112 668 2009-01-15 18:05:59 2007-07-30 15:01:16 4 4 545 1 222 634 74 91.00 24 35.49 CHANGED tphQpsFtsslhssst..ss..............hs...tssspRhslYRsthhsslhssLsssFPslppllGc-tapshuptahppps..spoPhhhchGtcFssFLp ........................................thQptFtttlhtspts..........................t.sh.sscRhslY+phhhsslhssLsssaPhhp....pllGc-t...a.ptlsc.t.alppps..spoPhhpchutcFspalt................ 0 55 117 179 +9669 PF09837 DUF2064 Uncharacterized protein conserved in bacteria (DUF2064) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG3222) Family This family has structural similarity to proteins in the nucleotide-diphospho-sugar transferases superfamily. The similarity suggests that it is an enzyme with a sugar substrate. 25.80 25.80 26.00 26.10 25.60 25.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.77 0.71 -4.50 93 550 2012-10-03 05:28:31 2007-07-30 15:02:34 4 8 506 1 212 557 305 121.80 26 49.00 CHANGED Tlt........phtpssshshhlshssstspstht.....................hhssshthh.Q.suuDLGpRhtpAhpps..........ttshp.slllIGoDsPsLssphLppAhptLppp........-h..VlGPApDGGYaLlGhp....p....hs.plFpslsWSo ...................................................................hhh.......t...hthhlhhs.sthttth.......................hhhtshp..hh.Q....tu.ssLGpRhtpAhtps.........................tshp.slllIGsDsPplssphltpAh..p..tL..ppp........................-s..V.lG..PA.p.DGGYaLlGlp.......t..hs.plF.p.s.lsWup................... 0 84 161 199 +9670 PF09838 DUF2065 Uncharacterized protein conserved in bacteria (DUF2065) COGs, Finn RD, Sammut SJ anon COGs (COG3242) Family This domain, found in various prokaryotic proteins, has no known function. 20.80 20.80 20.90 22.60 20.70 20.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.42 0.72 -4.26 86 1053 2009-01-15 18:05:59 2007-07-30 15:03:48 4 1 1049 0 199 485 964 56.60 49 87.80 CHANGED llhAluLlLllEGlh.hlhPsth+chhtpls.phssppLRhhGLsshlhGllll.allp .....lhlALuLVLVLEGLGPhLaPpuW.++Mlsshs.pLPDshLRRhGGuLVVAGlVlhYhlR.... 0 45 106 152 +9671 PF09839 DUF2066 Uncharacterized protein conserved in bacteria (DUF2066) COGs, Finn RD, Sammut SJ anon COGs (COG3249) Family This domain, found in various prokaryotic proteins, has no known function. 20.70 20.70 21.10 21.00 20.40 20.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.21 0.70 -5.24 60 369 2009-01-15 18:05:59 2007-07-30 15:04:52 4 1 364 0 106 346 429 233.00 28 62.88 CHANGED Ahsls.sLYpucl..sVs.upsspscstAhppALppVll+loGspp..shpssslpptlpsspphlppauYp..........p.ts........phhLpssFcspplpplLppAplPlWu.ssRPtlLlWls...-sstsRp.lls...-pss..st..htptlpptAppRGlPlhhPlhDLsDphslsssDlWGtFsssltpASpRYssDsllss+lppp.s............tt................p.hphpW.pL.......asssspppt............sspuushsthspshhssluchhuscY..A ...............................................A.phssLYpucl..sVs.spps.sscstAhtpuhppVllRsoGsp.s.....sh.pssslpptLc..pssp.al.sQauap.....p.pu.................................ptsLplpFsspplcsLLpp.At...L.PhWs.psRPslLlWll...-s..s.tspp.llh.......-pss...........hhptlpptAppRG.L...PLtl.P...l.uDh.sD.ss.ls..ss..-lhGs..sss..lppASpRYssDslLll+hpts..t..................t..........................................................................................................t.h.hcW.pL.......aspssppts..........pussussppshsthhstluchhup+.................................................................................................................................................. 0 33 56 82 +9672 PF09840 DUF2067 Uncharacterized protein conserved in archaea (DUF2067) COGs, Finn RD, Sammut SJ anon COGs (COG3286) Family This domain, found in various archaeal proteins, has no known function. 21.90 21.90 21.90 49.10 21.40 21.10 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.98 0.71 -4.79 13 57 2009-01-15 18:05:59 2007-07-30 15:05:35 4 1 57 0 44 60 7 190.70 25 92.48 CHANGED loh+hssc-EhccFhctLp+t.htssah.lcs+h........s+lhlpl.Gs-+.clc-shppl+pLtspl+pch...pcuhhchsLpsLhR-As..hslPs-llsDALphhGhcscl+s-..hlcTsAsh-Ell-hsccLuchYpElchhslTsp.s++llsshuastshsl--slEEhlEpGlLccsEst..IsLpcshcpsc+cL ...............lsh+htsccEh.chhcpl.p+h...sh..h..lcs+h..........splhlpl.Gpcc.clc-shppl+plhppl+sch....tcGlhpYslsslhc.ht..tslshslll-sLch.hGhpschccs..hlc..Tshsh-ElhclsccLs-lhs.Elph.shsop.s+cllsssuhh..tshss--ll-chl-tGlhcctEct.+h.Lp+shcpAhc.................... 0 13 19 30 +9674 PF09842 DUF2069 Predicted membrane protein (DUF2069) COGs, Finn RD, Sammut SJ anon COGs (COG3308) Family This domain, found in various prokaryotes, has no known function. 22.10 22.10 22.30 24.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.08 0.72 -10.46 0.72 -4.10 61 494 2009-01-15 18:05:59 2007-07-30 15:07:36 4 2 491 0 139 400 175 107.70 35 80.19 CHANGED phluhsuh..luLlhLhhsWphhluP............................................lpsssshl......lhhlPLLlPLhGlLcGcsYTatWusallhlYFhcGlstshssss..pthlAhlEllLssshFsushhYsR.h+spph ..........................hluhhuh..lALls...LhlhWplhluP............................................lpssushl.....hlhslPLlLPL.GlL+.spsYTatWushllhlYFhculsthassss..pph..hAhlEllLshshFhushhYsR.h+tt................. 0 25 73 107 +9675 PF09843 DUF2070 Predicted membrane protein (DUF2070) COGs, Finn RD, Sammut SJ anon COGs (COG3356) Family This domain of unknown function is found in various bacterial hypothetical proteins, as well as in prokaryotic polyketide synthase. 21.90 21.90 21.90 27.90 21.60 21.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.90 0.71 -4.80 41 170 2012-10-02 15:46:01 2007-07-30 15:08:55 4 6 154 0 87 168 17 176.50 20 30.93 CHANGED llDs+Nphssh.......lhPpspcstth.suhcphhpthth.........pphsltsGhsps....hspptslustGlpshllcsssp+sslllhDuNNhcsslhcclpctlsph...hp.h.lhToDsHhlsshstss...h.lG.hh...sscclhpthtpssppAtpslp.sp.suhtpshhp.lpshGpc..hpplsuss ....................................................................Llch+sphtsh........lhPpspcut.hpsu.hchhhcthsht.........pphplts.Ghhp..uh..sh..hscpssls.tuhtsh.llcs...s....p+sslhhhDuNNh-stlhcclp.cthspl...........h.o.DsHhl..sh.st..Gs.h.h.hGshh......scchspshptss.....pp.utpslc.sc.huhppsh...lhshGpc..hp.l.sh.............................................. 0 25 56 76 +9676 PF09844 DUF2071 Uncharacterized conserved protein (COG2071) COGs, Finn RD, Sammut SJ anon COGs (COG3361) Family This conserved protein (similar to YgjF), found in various prokaryotes, has no known function. 21.00 21.00 23.90 23.30 19.30 17.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.39 0.70 -4.93 36 298 2012-10-01 19:21:02 2007-07-30 15:09:20 4 2 252 0 118 282 111 211.30 27 85.83 CHANGED hsQpWp-LsFlHWsVDPpslpshlPsGhclDsa.-GpsaVulVsFhhpps+htulP.lPahpsF...sElNlRhYVhcpss...++GVhFhSL-ss+hhsVhsARshapLPYha..upMchppps..........tplpapupR+hs.........thp........phphpssst...scs..ssLspFLTtRathasptps.pshhhclcH.sWsLppAchhth.ssslhsusGhthss....sssplhaususcl ..............ptpWpclhFlHWsVsPptltshlP.su..hc.Dsa....-....G..p...s..a..lulVsFphpsh+htshP......l.Phhs.sF.....s.EsNlRhY.Vhpsss....++GVhFhul-ss+hh.sshluRhhht.l.P..Yhh..u.cMphppss......................pplpapop+chs.............shp...........phsh...p.h..s..s............hs..ps.....ss.lph...a...LTtR........a.th........asp.....tt.....s...phh.h.h.spH..W.Lptuchhth..psslhsstGh..ss.....s...shau.s.t........................................................................................... 0 56 91 115 +9677 PF09845 DUF2072 Zn-ribbon containing protein (DUF2072) COGs, Finn RD, Sammut SJ anon COGs (COG3364) Family This archaeal protein has no known function. 23.00 23.00 23.00 24.00 22.40 22.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.82 0.71 -4.22 10 88 2009-09-11 04:52:25 2007-07-30 15:09:51 4 2 57 0 64 91 8 101.50 22 83.37 CHANGED PH+CT+CG+lFcDGu.cILpGCPsCGs+KFLYV+-cccsp...........cs..ctsl-phtc-tccEltpspp.c.p..t....t...................ct-RIESVRIluPGoYELNLspLhcs-ElVlul.p-EGoYhlDLsShhK+p ......................................................................................................pCtp.h..ptt...hhpGC..CG.phF.a..tt.............................................................................................................................................................................................................................h-ol.l.t.G.Y-lNl..lhpp.thlht..tppG.Y.lpl.......t.............................................................................. 0 11 50 60 +9678 PF09846 DUF2073 Uncharacterized protein conserved in archaea (DUF2073) COGs, Finn RD, Sammut SJ anon COGs (COG3365) Family This archaeal protein has no known function. 20.40 20.40 20.60 62.70 17.90 19.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.28 0.72 -4.38 14 56 2009-01-15 18:05:59 2007-07-30 15:10:39 4 1 56 0 41 56 7 104.00 53 83.96 CHANGED LpphsoMEKIRLILDsV+cGsIVlLEpGLoP-EEu+LIEhTMpEIsPDsFoGIEIEoYPtpp..ppuhhu+LhG+ppsp.+LTVIGPANplKTL+K-cshISAhlS ....hpphsSMEKIRhILDpV+-GsIVVLEcGLoP-EEu+LIEhTMsEIsPDpFsGIEI.....EoY...Pppp.....tssohLu+lhG+ppsp..+LTVIGPANQlcTL+KDcslISALlS.. 0 8 32 38 +9679 PF09847 DUF2074 Predicted permease (DUF2074) COGs, Finn RD, Sammut SJ anon COGs (COG3368) Family This domain, found in various archaeal hypothetical proteins, has no known function. 29.60 29.60 29.60 30.50 29.50 29.50 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.78 0.70 -5.73 10 50 2009-01-15 18:05:59 2007-07-30 15:12:31 4 1 46 0 31 52 3 416.40 21 86.14 CHANGED slplshhhpSluFhhhulhlsssuhhuc+ts....hhSshlll.FlaslasTuhssuahhou....+ll-PL+uLPlshhsplhuVhhhI-ohsuhhFll....hlushhlsshstuLLGLlWuhhulLhG.aSluhllhltFGsphsGRtohs+slh+shGhllhhh.lhulahll.h.hphl...sthlsslhphYphlaP........hhs.sIhtshhu..hlLullYh.lhhshhYhhsl++LappL...EstcspGplphchpl+scushlohlhKDaKllhR+oQsLshlLhPlahslhhlhslsp.s..hh.....hshhhlshlsllSSlhLphhhtIEssGF......phLpSLPls++chlhsKhLhhhlI.hhlshslhhlshlass.tsslYhhhllssslLsshluhhashp+l......su-slphssh.GhlssIlhhllshlslulsulhsFhLstPlu......allssuhsl....IsuLllac+ ..........................................................hh.p.h.ahhhshh..h.....hh..p.........h...hh.....l.hlhsl.ahoshtshhhhst....tlhp.LhsLPlp.t.....s..hhl..u..hh..h..hl.ph.hh.hhlh....hhss...h...hhhhh.ul..Lshlhhh.htlhhh..a....slu...hhlhhhhGtth......+huhspslh+..h..hshll.hhh.l..hh...hahh....................thh....l..hh.hhaP........hsh.sl.t......hll...llYh..hhhshhahhsh+tl.h.pl.........hpuchp.hphp.h.phpsshhuhhtKDlKhlhRp....sph....lh.hllhPl.hhslh.....hhshl.....................hslhhlh.hh.shho.uhhhth....hhtlEtps..h......plLpoLPlsh+phhhuKlLlhs..ll.hhl.hh.hhIhshlash....sthhl.hhh..hlhsh.ll.sstluhhh.shpp.h.........pscslphs.h....shhshl.l.hhllts...l....hlGlsul.h.hahhshhhs................hlhshh.hl....l..h....................................................... 0 13 16 25 +9680 PF09848 DUF2075 Uncharacterized conserved protein (DUF2075) COGs, Finn RD, Sammut SJ anon COGs (COG3410) Family This domain, found in various prokaryotic proteins (including putative ATP/GTP binding proteins), has no known function. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.18 0.70 -5.48 63 776 2012-10-05 12:31:09 2007-07-30 15:18:44 4 21 611 0 219 3403 1226 288.50 20 51.73 CHANGED +plhllpGssGoGKollulslhscl..............sp.............ppthstalssNps.ht.....hlpcplstsh..........tthhpphhtpssuFIpshp......................t.sphDllllDEAHRLhs...........pts.......h.phtt.tsplp-lhcp.u+..lslhlhDpsQtlphs-hsshpp.....lcchhpphshpht.................................pLssQhRs.tuucshlsWlcsllphpthtshsh.............ppsa-h+lhcsspphppt.......lcp+.....sptss..p..uRllAuasasht.....p.psshtshhl.......tp.aph........................tWshpst.....................Wspps.psl-cVGola.....TsQGhEL-YsGVIlGs-lhac......psththptp..phpDtsstpp+.................tt.htpllpNsY+VLLTRGhcGhalassD .................................................................................hhhlpGssGoGKoll.h...h..plhtcl..............................................t..................................................................t..tt.p.s.......a.l...s.....h...N.....p..s.....h.....................h...l..h...p..p..lspph..............................h.p.p....h..h..p....s......s.....s.....h..l.pphp...................................................................................................tph.c.ll.llDEA.....p.hlhs...................................................p.t....................t.h.t....tspl.p.clh......pt....sc..............lhlh.hh.D.t...t.Q...h...l....p....pphht.t...............lpp..hhtth......h.t........................................................................................................pLppphRh.....t.......s.....t...t...h..h..p.....a...l.pt.hht...t.................................................tta...phphh...ps...tt.......h.t................l.tt............................................tt...................thh.s....s.sh...............................h........................................................................................................W.....................................ht.....phus.a.....shQGh-hsashlhhs.sh.........................t...p...................................hhhp.h.hLhpRuh.Ghhlh......................................................................................................................................................................................................................................... 1 59 123 170 +9681 PF09849 DUF2076 Uncharacterized protein conserved in bacteria (DUF2076) COGs, Finn RD, Sammut SJ anon COGs (COG3416) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. The domain, however, is found in various periplasmic ligand-binding sensor proteins. 38.00 38.00 38.40 38.40 37.90 37.60 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.75 0.70 -4.32 44 304 2009-01-15 18:05:59 2007-07-30 15:19:05 4 3 286 0 119 304 5 229.80 31 98.75 CHANGED MsspEppLI-sLFsRLppscss.........PRDspA-uhIsctlppQPsAsYhhsQslLVQEtAL+phpt+lppLcsph..............stttttt.uGGFLu.uh....FGustsp...s.Pts..ssts......................tssasptts..ts..............p.thsts............t................GuFLGsAhsTAAGVAGGslLhsulpshFssspt.tthss..t..................ssss.u..........p-sshsshtsspsts............................h-pus.st.s..shsDus.sss.s.s...................DD........Dssa ..............................................................................MsspEppLI-sLFsRLppApst.......................sRDspAEthItptlppQPsAsYhhsQslLlQEtALcphppclppLctpltt..................tptttttuuGGFLu..ul...........FGuuss.p......stPss.tssts......................sshsttt......s.......................psshsts...............ts.................uGFLGuAhpTAAGVAGGh....lLusulsuhFstppt.tthsthhtt........................t..tss..............ps..ss.hss.hh.sssss........................................cpst..s..........ts..tDss.sss.s..................s...................................................... 0 25 56 86 +9682 PF09850 DUF2077 Uncharacterized protein conserved in bacteria (DUF2077) COGs, Finn RD, Sammut SJ anon COGs (COG3455) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.90 20.90 21.30 21.70 20.60 20.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.17 0.70 -4.92 122 1603 2009-01-15 18:05:59 2007-07-30 15:21:13 4 8 979 5 300 1102 33 200.90 26 64.69 CHANGED NsLlss.ussLlthlspl+p..tsptssh.tplcpplhp..plpphcpph.pptuh......stpplhsu+YsLC.shlDEslh.sos....Wutpu.....tWtppsLhspaas-s.GGE+hFphLpclh..ppPtpphclLElhYhCLsLGFcG+Y.ph.tpGpp..plcplpppLhphlpph..........t...t....sl.ssphpssststpph.ptt.hPhW.hhhulss.hlhhshahshphhL .............................................psllss.spsLlthlhplpp......htstsps..ppl...tpplhp..plpthcpth..pptuh...........p.tppl.hsh+Ys...LC.shlDEu...sh.sss....Wusps.............................tWtppsLhspFas-s.GGE+hFplLc+Lh...ppPp..p...hhslLEhhahsLsLGF.cG+Y..pl.........tp.upt.........ph-pl...hp....pLhphlpph......tsh.....................sl..ssphttst..ss..php..h....t.........p..t...lshh..hhhshsh..llhh..hhahhhphh.............................................................................. 0 50 115 206 +9683 PF09851 SHOCT DUF2078; Short C-terminal domain COGs, Finn RD, Sammut SJ, Eberhardt R anon Jackhmmer:B0PET9 Family \N 24.00 15.30 24.00 15.30 23.90 15.20 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.20 0.72 -4.34 733 1938 2009-09-13 15:00:14 2007-07-30 15:22:51 4 75 1219 0 751 1854 980 30.70 31 15.57 CHANGED htspLcpLccLhspGhIoc-EFcppKpcLLs ...........ppLcpLccLhspGhIoc-EappcKpplL........... 0 286 560 699 +9684 PF09852 DUF2079 Predicted membrane protein (DUF2079) COGs, Finn RD, Sammut SJ anon COGs (COG3463) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 22.00 22.00 22.10 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.56 0.70 -5.77 53 276 2012-10-03 03:08:05 2007-07-30 15:24:01 4 2 206 0 118 317 177 416.90 18 80.11 CHANGED Rahsh.......ssuaDhGlFsQhlaphupG+.h.hSohps..................ptLGpHhs.llhlhs.....P.lYtla.Ps.hsLhllQshhluhuulslatLAppt.t.s........pphuhhlshlahht.sslh.ss.sL.asFHs.shhh.Phhhhulh.uhpp........c+hhhhhlhhlllLhs+-shulslhulGlhlhlp.................c..............h................uhshhhhuhsahllsspll........hPha.sst.t....hhhtt.....................au.hh.......sushpllhshltpPh.hhhptlht.......slhY.llhlhlPhhalshh..........s.shLlsulPllhh.llu.stss.hslsa..+Ysh.llPhlhlusl..shpptp......................................phthhWssshhLuLshshhhs..................................hsYhshhpphsps...ppslshlP.ssuuVhss...........s.LhPpLup..Rpslhhhsts....hp..........t...pph-allls.........t..shtsstct.pphlsphpppspaphhht...psGlhl ..........................................................h........suaDhGlFpQhhh.hs.p.tp...hsshpt..................phhusHhs..lhhlls......s.lYhla....Ps...s.h...hLl..llQshhl..uhuulslahlupph..h.s...............................pthuhhh..uhsYhL..sslh...ss..sh.hs.....FH...shuh..Ph.lhhuhh..shtp.............pphhhhh...hhs..hhllhs+-.....s.h...ulh..lhs.h...Gl..hlhht......................................................................................p.......................p..th................................uhhhhhhuhsahhlshtll........hPhhtts...............................as.hh....................................suhht.hhhshltpPh..hhlttlhs.s........................plta.llhlhhshhhlshh............pshhllsslPh.lhhphlu.s.s.....hshhh...pY.shslhPh.lhhusl...shtphp.................................................................................................t.h.hh..hh...shh.....h.h.h..shh.hhhhhs.................................hhhp...hppht.th...pphls..lP..ssusV...hss...............s.lhstLst....+p.lh.hh.tt......................t.callls.................................h................h....................................................................................................................................... 1 38 74 102 +9685 PF09853 DUF2080 Putative transposon-encoded protein (DUF2080) COGs, Finn RD, Sammut SJ anon COGs (COG3466) Family This domain, found in various hypothetical archaeal proteins, has no known function. 21.00 21.00 21.00 21.00 20.70 20.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.36 0.72 -4.23 6 51 2009-01-15 18:05:59 2007-07-30 15:25:35 4 1 29 0 24 55 12 47.10 31 78.22 CHANGED lpIpKcoW+Ks+phhsoFhshVpshGNSAplD..lPKEYlGKTVhlTll-cD- ....................t.h..pth.s.ha.tpVp.hGNSA+ls..lP+-alGKplhlsllcpp........ 0 10 15 20 +9687 PF09855 DUF2082 Nucleic-acid-binding protein containing Zn-ribbon domain (DUF2082) COGs, Finn RD, Sammut SJ anon COGs (COG3478) Family This domain, found in various hypothetical prokaryotic proteins, as well as some Zn-ribbon nucleic-acid-binding proteins has no known function. 20.60 20.60 20.60 20.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.21 0.72 -4.13 18 259 2009-01-15 18:05:59 2007-07-30 15:27:38 4 2 254 0 56 152 85 63.10 45 87.63 CHANGED atCPKCGsssa-psplpsTGGshSKlFDVQNc+FhsloCppCuYTEFY+....upsSshtNlhDhhhu ..........sC.KCGs.....p.....pYtscphp..u.TG.Gshu......K....l..FDVQN+.+.Fl.....ol......o.......CspCGYT.ElY+......upoSsuhNlLDhhhG............ 0 22 40 48 +9688 PF09856 DUF2083 Predicted transcriptional regulator (DUF2083) COGs, Finn RD, Sammut SJ anon COGs (COG3800) Family This domain is found in various prokaryotic transcriptional regulatory proteins belonging to the XRE family. Its exact function is, as yet, unknown. 21.30 21.30 21.40 21.30 20.70 21.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.06 0.71 -4.57 67 753 2009-01-15 18:05:59 2007-07-30 15:28:12 4 8 475 0 213 613 820 153.00 46 32.83 CHANGED TLQRPGA+GlPFFFlRVDpAGNloKRh.SAosFpFuRaGGuCPlWsVHpAFtp.PGcllsQluchPDG....ppYlslARoVspssuuastss+phAluLGC-lpaAsclVYAcGl...........sLssssu...sPIGhuCRlC-RssCsQRAhPPls+tLtlDppppuhsPY ...................................TLQRPut+GlP.FhFlRlD+AGNloKRp.SA.suFpFuRhGG.sCPLW....sVapAFs......p.PG+..ll..p.Q.lAph.P..D.G....pp..Yl.hlARTlpp.ssuu.as.tPs.+paAluLGC-lpHApclVYusGl...........................cls..ssss...sPIGsuCRlC-RpsCsQRAhPsls+tLslDpptpsh.PY.......................... 0 47 134 177 +9689 PF09857 DUF2084 Uncharacterized protein conserved in bacteria (DUF2084) COGs, Finn RD, Sammut SJ anon COGs (COG3811) Family This domain, found in various hypothetical prokaryotic proteins,as well as proteins belonging to the UPF0386 family, has no known function. 21.50 21.50 22.30 50.10 20.80 21.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.70 0.72 -4.28 25 284 2009-01-15 18:05:59 2007-07-30 15:29:07 4 1 276 0 48 145 6 84.50 70 98.69 CHANGED MNISKsEQRlLHlLAQGGcIphpRs-sG+lsclpChTR-GahLusCoLsVFc+LK+KRLIpScsGpPYRITcpGLpsVRuQLDNR .....MNLSRQEQRTLHVLAKGGRIsHlRDuSGRVTuVECYSREGLLLuDCTLAVFKKLKTKKLIKSVNGQPYRINTTGLNsVRAQsDNR............ 0 15 25 35 +9690 PF09858 DUF2085 Predicted membrane protein (DUF2085) COGs, Finn RD, Sammut SJ anon COGs (COG3815) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 26.10 26.00 24.00 21.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.35 0.72 -3.91 24 150 2009-01-15 18:05:59 2007-07-30 15:29:42 4 2 123 0 61 129 105 88.10 34 60.70 CHANGED hCHQhs-RSaahtGpQhPlCARCTGIYlGhll....shlhh.......hthhshslhlhllhhlP.hslDGhsQhhs...............hhcSsNh.......LRhlTGlhhGhuhshhl ...............CpphspRohhhhsh.hslCARCsGhhlGhlh........hhlhhhh.........hhh.h.h.h.slhl.sh....h.h..h.lP.hhlDGhsQhht..................................htpSsNh.......LRllTGlLsGhuhuhh................ 0 26 46 56 +9691 PF09859 Oxygenase-NA DUF2086; Oxygenase, catalysing oxidative methylation of damaged DNA COGs, Finn RD, Sammut SJ anon COGs (COG3826) Domain This family of bacterial sequences is predicted to catalyse oxidative de-methylation of damaged bases in DNA. 25.00 25.00 25.40 25.20 24.30 21.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.06 0.71 -4.94 42 288 2012-10-10 13:59:34 2007-07-30 15:30:15 4 1 258 0 107 277 10 168.60 57 71.64 CHANGED RGEY+YFsYPLPchltpLRsuLYP+LsPlANRWspphGhssRaPsphs-aLspCHtAGQpRPTPLLLpYusGDYNCLHQDLYGEhVFPLQVslLLScPGpDFoGGEFVLTEQRPRhQSRupVlsLpQGDullFsspcRPVpGsR.GhYRVshRHGVSclRSGpRaTLGlIFHDA ...................pGEY+YFsaPLP..phlppLRpuLYP+LsPlANcWttthGh.-spaPsshscaLtpCHsAGQsR....P....TPLlL+YGsGDaNCLHQDLYGEhVFPL.QVsllLScPGpDFTGGEFVLTEQRPRhQSRup..VlsLpQG-..ullFss+cR..PVpGsR.G.hYRs.....slRHGVSplRSGpRa.TlGlIFHDA...... 0 31 58 87 +9692 PF09860 DUF2087 Uncharacterized protein conserved in bacteria (DUF2087) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG3860) Family This domain, found in various hypothetical prokaryotic proteins and transcriptional activators, has no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 22.40 22.40 22.70 31.70 22.20 22.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.34 0.72 -4.22 55 586 2009-01-15 18:05:59 2007-07-30 15:32:31 4 10 465 0 149 460 21 72.10 36 40.26 CHANGED G+LpphP.u..+pp++lllLphlhppF.c.sppYoEpElNpllpph..a.s.DaAhlRRtLl-hGhlpRp.tGutY.......Wpt .........................G+LpphPp....+pK+Kll....lLpclh.p.c.h.css..ppYoE+EVNp.hlcph..........as..DasslRRhLl-aGalcRst.GstY.......hh............. 0 64 116 134 +9693 PF09861 DUF2088 Domain of unknown function (DUF2088) COGs, Finn RD, Sammut SJ anon COGs (COG3875) Domain This domain, found in various hypothetical prokaryotic proteins, has no known function. 23.30 23.30 23.30 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.92 0.71 -5.30 50 648 2012-10-02 00:55:42 2007-07-30 15:32:58 4 4 416 2 239 630 126 196.70 26 45.88 CHANGED YGcs..plplplP-cplhullpspt..htshtctpphlpcALpsPIuopsLp-llpst..c+lsIlsSDhTRP.sPs.......chhlPhLlccLpt.Gl.-csIpllsAhGhHRtpTcEEhtphlG......p-lhpch....cllsHsspDcsshshlGpos..............pGsslhlN+hssEADhlIssGsIp.HaaAGaoGGpKulLPGlAuhcTIhsNHsh...hl.ssputhG ....................................................................pt.....h.l..t........thh......t..............s..tthlpp...ulp.p.Plss..................Lpchsps....t..........cplsIlssDh....TR....s..sss.......phll.shllccl.pp....sh.ptp.l.tlls.AhGsHRst.Tc-.Ehtphl.G......pc.lhp.p.h....pll.sHpspsppshlpl...G.p.ss........................pG.sslhlN+hsh..E..u..D..hlIshGhlpsH..h.a...u...G...a...SGG.tKslhPGluuhcolttsHsh.hl.t........................................................ 0 137 205 230 +9694 PF09862 DUF2089 Protein of unknown function (DUF2089) COGs, Finn RD, Sammut SJ anon COGs (COG3877) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. This domain is a zinc-ribbon. 29.10 29.10 29.20 29.80 28.30 29.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.26 0.71 -4.28 18 297 2012-10-04 14:01:12 2007-07-30 15:33:38 4 2 274 0 103 262 1 98.50 33 87.76 CHANGED CPVCGcchpVT+LcCspCsTsIEGcFclscFspLopEphpFlclFlKsRGNIKElE+ELGISYPTVRs+L-cllsALGa............psps-tpsc+c-lLccLccGEISsEEAlchLK ..............................pt.t.h.l.ph.h.t..s.h.lpspFt....s.................h..hspLspEchpFlchFlpspGslKElp+phulSYPTVRsRLDclIpplsh....................tpt.t.t....plhp.l..ptpls.ptA...................................... 0 52 76 92 +9695 PF09863 DUF2090 Uncharacterized protein conserved in bacteria (DUF2090) COGs, Finn RD, Sammut SJ anon COGs (COG3892) Family This domain, found in various prokaryotic carbohydrate kinases, has no known function. 21.40 21.40 21.40 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.98 0.70 -5.58 34 447 2012-10-03 05:58:16 2007-07-30 15:35:24 4 3 431 0 108 433 338 300.60 50 48.52 CHANGED ELsaaLpcup..phs....c.cpDtpLp+LHtsTsR+.......ppWspLplhAFDHRtQLt-hAtpsGushs+IsshKpLhlcAutpVsp.....Gh...thGlLsD.spa..Gp-ALppAoGpGa.WIGRPlElPGSRPLchE..tGtslGupLhcWPtEHVVKCLsFYHPDDsstLRtcQEpplpcLapAs+psG+ELLLElIs.....Pcs.....tPhs.DsshhcslcRhYsLGlhPDWWKLtPh.ospuWpplsslIpc+DPaCRGlVlLGL-APt-pLtsuFtsAAs.slVKGFAVGRTIFu-su+tWLsG-lsDtphlscltspatpLlshWcptR .................................................................................................ELs.aLtRup..phs....RPDtDspLsHLHRVTs.R+.......ppWscLplFAFDHRpQLh-....h.A....p..p....s..G..A....s.....s....+I....stLKpLhLc.Auppsutph..................Gl...pspsGlLsD.spa..GQ-A.L.N.ss...T.G.pG......W.....WI.....G..R.P.lE...hPG...SR..P..Lc..hE..+G.slGS.pLlsWPt.......E.......H.......V.......V.......KCLVFYHP-DssplRhEQEtplpclacAsppSGHELLLElI..............Pcs...............t.s.sp...-phhhcslcRFYpLGIhPDWWKLsPl....osssW.......pplsplIpcpDPaC.RGl..VlLGLDAPtpcLcsuFsuAAshshVKGFAVGRTIFu-sSRpWLuGclsDtpLIupltppYppLIshW+pt........................................................................................................................................... 0 15 46 76 +9696 PF09864 MliC DUF2091; Membrane-bound lysozyme-inhibitor of c-type lysozyme COGs, Finn RD, Sammut SJ anon COGs (COG3895) Family Lysozymes are ancient and important components of the innate immune system of animals that hydrolyse peptidoglycan, the major bacterial cell wall polymer. Various mechanisms have evolved by which bacteria can evade this bactericidal enzyme, one being the production of lysozyme inhibitors. MliC (membrane bound lysozyme inhibitor of c-type lysozyme) of E. coli and Pseudomonas aeruginosa, possess lysozyme inhibitory activity and confer increased lysozyme tolerance upon expression in E. coli [1]. Structural analyses show that the invariant loop of MliC plays a crucial role in the inhibition of the lysozyme by its insertion into the active site cleft of the lysozyme, where the loop forms hydrogen and ionic bonds with the catalytic residues [2]. 21.30 21.30 21.30 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.68 0.72 -4.24 101 1381 2009-01-15 18:05:59 2007-07-30 15:40:44 4 15 1175 9 224 727 37 71.70 32 55.43 CHANGED YpCt.....ss...pplpspahsssp.........shlphss....pphhhspsh.SASGA+Y.......sss.phpa.W.sK....G.s-Ahlthttpss.........sC .............................YpC.........sp..pslsV.phsNsppt............ssh.s.hcs.......phl.hLppsl.S.AS.GA.RY........scu...hYsaW..oK....G...cpAplhppcchsh.......................................... 0 47 108 166 +9697 PF09865 DUF2092 Predicted periplasmic protein (DUF2092) COGs, Finn RD, Sammut SJ anon COGs (COG3900) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 27.50 27.50 27.50 39.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.26 0.70 -5.28 17 95 2009-01-15 18:05:59 2007-07-30 15:40:57 4 4 74 0 50 114 16 209.90 29 77.32 CHANGED cAhplL+sMusaLtutpphShph-ssh-slhtsGQKlpauuouplslsRPD+lRssh.pushscschhaDGKTholhu.ssshYupsstsu.slD...tll-cLpschulplPhuDLhlss.shstlt.pslhsuhhlGps.slsGs.CcHLAFRps-lDWQIWIspGspPlP++hVITt+pssupPQaolphpsWshss.shssssFsFpPPssAcpl-h..ssh .....................................A.plLcpMusaLtstcshohshcssh-.lhtsG...pKlpasu...ssplplpRPs+LRssh.pGs..tsst.....c..hhaDG+s.hTlhs.stshYuphssPs.ol-...ph.l-plppchulphPh.uDL...l..hss.shsslh.sslpsuhhlGps.hlsGstCcHlAFp....pps.hD..aQlWIspGspPLPp+hlI.T.+sh....sup....P.......Qassphp...sWshss.phs.sssFsFpPPssApplchh.h.... 0 14 29 41 +9698 PF09866 DUF2093 Uncharacterized protein conserved in bacteria (DUF2093) COGs, Finn RD, Sammut SJ anon COGs (COG3908) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 26.80 63.90 24.40 18.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.13 0.72 -4.78 30 196 2009-09-10 21:13:40 2007-07-30 15:41:37 4 1 191 0 75 146 668 42.10 57 55.22 CHANGED llpsGsaVhCAVTGppIPLD-LRYWSVc+QEAYss...uptuhpR ..llpsGoaVpCAVTGtsIPLDEL+YWSVsRQEsYAo...sthuhcR...... 0 19 44 55 +9699 PF09867 DUF2094 Uncharacterized protein conserved in bacteria (DUF2094) COGs, Finn RD, Sammut SJ anon COGs (COG3913) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.80 20.80 23.80 27.20 18.40 20.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.72 0.71 -4.28 33 490 2009-01-15 18:05:59 2007-07-30 15:43:05 4 6 421 2 119 397 5 130.00 25 47.21 CHANGED GaaGKlPutGDFVp+pLPpuhlpsWDsWlppulssspp..thsttWtpsahsuPlWRFslssulsG....tshsGllhPShDRVGRtFPLslssslssss.....s.hhhstssaaptlcshhluslctsssh..-tLspuLssLssP ..........................uaaGKlPutGDF.l.ppths.thht.hctWhttu.hth.tp.....t...t.ht.tsappsssWpFshssuhhs....tthlsGslhsSpDpVGRhaPlllhpthshpp.h......spthttstt.ah.ttltphhhptlp.spt.h..tthtptltth...h.................................................. 1 19 49 81 +9700 PF09868 DUF2095 Uncharacterized protein conserved in archaea (DUF2095) COGs, Finn RD, Sammut SJ anon COGs (COG4003) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 39.40 31.50 19.90 18.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.80 0.71 -4.36 3 25 2009-01-15 18:05:59 2007-07-30 15:43:31 4 1 25 0 21 28 1 109.50 47 90.30 CHANGED EKKKKPIDELPWQEYDIEEFKc+FPALARELEE-hG.LEIoGIRLDEYQVLEEEEEE.KIDFSGYNPTIIDFLRRCDTDEEALEIINWMEE+GEITPEMAKELRVTLVHKGVRAFGPKKEWGWYERHGKH ..................pY-h-EF+c+aPtLA+El..-tpu.l.Ips.phs.......-.......sauGYsPTVIDaLRRCcT-EEALEIINahEc+GEIopEhAc-LRhpLs+KGVRuFGs+KEhGaY.ER+u..h...................... 0 4 9 14 +9701 PF09869 DUF2096 Uncharacterized protein conserved in archaea (DUF2096) COGs, Finn RD, Sammut SJ anon COGs (COG4010) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 22.50 22.50 22.50 140.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.87 0.71 -4.38 7 27 2009-01-15 18:05:59 2007-07-30 15:43:47 4 1 27 0 19 25 2 169.20 39 95.52 CHANGED l-pQWVVLsELsocL.p+uh.VPp-VhpcLRhApsllsaYhhD.Hs..shc.Ltcs-+.LsplQthLhsls-...sDhsccal-Kht+AhRsEhshc.PhppSpFp..lp+....tss-sIRVph.c.lp.EhLt-luEapGVIhEh..--sDh.VhI.Gs+-clppALK-hu.h...W+p .lDppWlVLsELsocL.p+shtlPcplhpcLRlApslIoaYhlDsHs..shcsLpcsE+pLsplQthLhsLs-...t-hsccaLpKht+AhRuEhshcFPhp+SpFp..Vp+....ssh-slRlplpptlp.ERLt-luEapGVIhEa..E-csp.llIcGsc-+lppALK-hush...WK... 1 4 9 15 +9702 PF09870 DUF2097 Uncharacterized protein conserved in archaea (DUF2097) COGs, Finn RD, Sammut SJ anon COGs (COG4013) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 36.90 60.60 23.90 20.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.79 0.72 -3.90 8 32 2009-01-15 18:05:59 2007-07-30 15:44:29 4 1 26 0 22 31 4 88.80 31 93.27 CHANGED hclIch.......osEEhlEYl+cNVc.sDhlElsauRlalsGcVlshss........shlpLpl-uchh.Glh-lDlccIp-DlLElhHhsc-G-phlll ........c.lph.......ss--hlEYlcssVc.sDhlEl.asRlal.Gcllshps.........shhplph-schh.sh.lElDlccIp-DLlElhHhsc-scctll.h.... 0 5 12 18 +9703 PF09871 DUF2098 Uncharacterized protein conserved in archaea (DUF2098) COGs, Finn RD, Sammut SJ anon COGs (COG4014) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 56.50 56.30 22.70 21.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.10 0.72 -4.05 16 48 2009-01-15 18:05:59 2007-07-30 15:45:13 4 1 48 0 38 46 2 90.10 31 91.83 CHANGED c.IclGshVRYlsTGThGpVpcI.KsEsscpWllL...-sosLaYcschLEls-...tp++pc+..chst-Ehl-+l+ccc.-thpphDlssssss.....GGG ....IplGshVRYlsTGThGcVscI.Ks-...ss.cpWllL...-sosLaY+schLElh-...tp+cccc.......ctstcEhl-+lcppc..c.....thpthchstttss...GuG................... 0 8 24 32 +9704 PF09872 DUF2099 Uncharacterized protein conserved in archaea (DUF2099) COGs, Finn RD, Sammut SJ anon COGs (COG4022) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 226.80 226.60 18.00 17.60 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -5.36 22 64 2009-01-15 18:05:59 2007-07-30 15:45:37 4 1 51 0 50 63 6 256.00 45 90.71 CHANGED HlhEslG+s..RVll+sG..KVlEVu-PtlcaCPLacKhR.GlcclTpEsl+cNlEaRIc-FGMhTscRplchc.sFVuFGsSElhhTuLppGhlDAsVhVCDGAGTVlsssPpLVQGlGGRhSGLlcToPIsEVIc+IccpGGhVL-.psApID.ltGlctAhchGY++lAVTlusspsAcp..lRcht..................pssclhlhuVHsTGl.op--AccllphsDllTuCASKhIRchst.+slhQlGsulPlFAlTptGKcLlhcRh+- ....HlhEsLG+.u..RVlV+sG..KVl-VuEPhlcaCPLFcKaR.GIc.clocEsI+cNlEFRIc-FGMhTscRplchc.sFVuFGsSElhhouLpcGhlDusVhVCDGAGTVIsssPpLVQGlGGRlSGLlcToPI.-..VIc+Ic..cpsGhVLD.psAcIDQltGVc+AlchGYK+IAVTVs..s..s..c..c..AcclRclEt.................tslclhlFuVHsTGl.sc--AcplhchsDllTuCASKhlR-hst......t+slhQlGsulPlFAlTppGK-LllERh+..... 0 13 35 43 +9705 PF09873 DUF2100 Uncharacterized protein conserved in archaea (DUF2100) COGs, Finn RD, Sammut SJ anon COGs (COG4024) Family This domain, found in various hypothetical archaeal proteins, has no known function. 22.20 22.20 22.20 141.00 21.90 22.10 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.44 0.70 -4.87 6 22 2009-09-11 01:16:15 2007-07-30 15:46:46 4 1 22 0 16 22 0 216.40 46 98.18 CHANGED cschSK-LI+KAlpTIS+pc+sh.h.p...p..pch.apDAcsG+IDssEhK+AlapLIEAD-YLYKoAPsH-LspEcAcEFsKlLhpspcHIs+ILupFGF.Eh.EsphclD-...suLYIVuNKKlhKsL.KplsssLNIlsTEGsL-lEDM+hlNPcIPEKALhGIEKKscIs+cpIp+hIpKlsPp+lVVVVccsDsuDELlapR....AKEL.YsAE........KLsu--IL ............chp.SKcLI+KulpTISchcp.....hh.p................pppKch..apDAKsG+IDssEFKKAlapLIEAD-aLYKpAPpH-Ls-EcA+EFsKLlhpsp+HlsplLusFGF.Eh....E.p.tclDc...sALYIVSNKKlhKpL.Ksh.ss............sLsIlsTEGsL-lEDMKhINPpIPEKALtGIcKKscIsKcpIp+hIpphpPsKllVVVc..DcuDELlapR....AKEL.YNAE........Klss-ElL............. 0 3 6 12 +9706 PF09874 DUF2101 Predicted membrane protein (DUF2101) COGs, Finn RD, Sammut SJ anon COGs (COG4025) Family This domain, found in various archaeal and bacterial proteins, has no known function. 20.40 20.40 20.50 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.36 0.70 -4.84 5 26 2009-12-03 16:38:53 2007-07-30 15:47:35 4 1 26 0 17 25 0 175.90 32 81.10 CHANGED hKsaLlPpPS-chP+S..hlh+lsKRshTPHELlSL+LQLlFLlYLI.lSLlssa.LpshLlLIlsuslhFLYLRYlLIRNp-ahl..DFcAYRsF...YlulSTIsFlualGYlLlRcaopshaaYLsYLusIslsVllFRaYFKsRYGRDYTYGlVEEVKsDLV+VFVHDDIAANVKPGYYWVPAVu.-A-sGDIVKLLVEpRT..FRGS+PVRIL ......................................................h.ph..p.+EhhsLhLQlhhhhall.hsl.l.h...hh.h.....hhh.ht.hhYhlh..LpalLhpshshhl..ca.EsYphF....YhululluhLshhGhlllc+huhc.sh.Yhshlshlhl.sVlhhhhhF+.+ahRsaTYGhV..Eltts.hshVhVp.DltuNVKPshYhV.tss.sh..GthVKlhVEpph..h+ushPh+Il.............. 1 3 5 11 +9707 PF09875 DUF2102 Uncharacterized protein conserved in archaea (DUF2102) COGs, Finn RD, Sammut SJ anon COGs (COG4029) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 38.80 38.60 19.10 18.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.34 0.72 -4.51 16 54 2009-09-11 09:37:18 2007-07-30 15:47:51 4 1 50 0 42 51 3 102.20 45 70.17 CHANGED hlllusss.soPu-lshtlaplshs.lpIKETCFGshIsGcc-sVccllcclRshDss+IFsK-RGFPsGDsRRCRApRGGu.RPGFHtLEtEhplLshIucAL- ..............hlllussu..soPuclsthlapl........shs.lplKETCFGshIpGcc-hVccllcclRpl-.s+IFsKDRGFP.GDtRRCRApRGGusR.GFHtLEtE.clLshIucAL-............ 2 9 27 36 +9708 PF09876 DUF2103 Predicted metal-binding protein (DUF2103) COGs, Finn RD, Sammut SJ anon COGs (COG4031) Family This domain, found in various putative metal binding prokaryotic proteins, has no known function. 20.80 20.80 21.00 21.00 18.90 20.70 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.29 0.72 -4.27 11 139 2009-01-15 18:05:59 2007-07-30 15:48:36 4 3 138 0 78 142 101 95.60 31 69.35 CHANGED s-tssccKlGGoHSTlIGGRpGpKhlpplApHPcVKKVIPGsIsusGsu..uG.lcuKVT.RuDspGNlRLLlp-GoSVQElRlVTTAuDcEpG-clhc-LN-hL .................h.....pt+lshsHSThlsGh..h.hLcplupp..stlcslhPGsIs.tspup..pss....lpl+lo.hsspsG.hKLlARpGsoVQElFlVTshspc-..pt......................... 0 20 51 70 +9709 PF09877 DUF2104 Predicted membrane protein (DUF2104) COGs, Finn RD, Sammut SJ anon COGs (COG4035) Family This domain, found in various hypothetical archaeal proteins, has no known function. 22.80 22.80 24.90 24.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.31 0.72 -4.04 8 27 2009-01-15 18:05:59 2007-07-30 15:49:00 4 1 27 0 17 25 0 96.20 44 94.03 CHANGED hl.FhlLlsslsFIlGShlGLpYSY+KYscPalE+plDslALllAVlGhl.lhVNss......hhhhlGhhhluaslGMRPGYGRhEhllGlllAlIlalLpt ........h.hLlshlsFlIGShlGLtYSY+KYspPal-KplDhlALllAllGul.hhlNss...........h.luhhhluFslGMRPGYGRhEhhlGlllAlllallh.h.. 0 4 7 13 +9710 PF09878 DUF2105 Predicted membrane protein (DUF2105) COGs, Finn RD, Sammut SJ anon COGs (COG4036) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 25.00 196.40 22.50 24.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.50 0.70 -4.76 10 32 2009-01-15 18:05:59 2007-07-30 15:49:21 4 1 32 0 21 29 0 216.30 50 96.77 CHANGED M-hshplapsslhsGlIlGhluLlsIuh....pKsDLshlllTDLlEhuMLVlIAuVGTDLAEALILPGLVVGlAELLAVSElhluR..........................p.+lhEchsl.phss..........h+hEVLcTAPsFlAllLVVYGllLoGFTGGAVAusGLLFYhLo++spsh.h......hpaEGIuulSGIuWALWIhGFlhFFlaPptWLhuLhlAGsG.lllKVuSKlGLIGslhc .......phh..lapsslhlGhllGhluLhuIuh....pKsDLphLlLTDLlEhAMLllIAuVGTDLAEALILPGLVVulAELLAlSEIhloR..........................c.+lhEchhh.phss...........hchEVLpTAPpFlAl..lLllYGslLSGFTGGAVhuuGLLFYslS++shGh.h......hpWEGluulSGIAWAlWl.hGFlsFFlhPphWLhsLhlAGhG.LllKVuSKlGLIGhlh.c. 0 5 11 17 +9711 PF09879 DUF2106 Predicted membrane protein (DUF2106) COGs, Finn RD, Sammut SJ anon COGs (COG4037) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 193.50 193.30 22.70 17.10 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.85 0.71 -4.88 9 32 2009-01-15 18:05:59 2007-07-30 15:49:49 4 1 32 0 21 29 0 151.80 54 93.62 CHANGED tpls+lhNhlSpPcsls+laAhhlsllhllGlh.lPh.shpcsQLYPKs..p.QlphposLAPYDRGGlPLppPuslKuQYPp.pPhlGhlTAYLoPluhalu-pThYhGTTIVSpPGGIlDEILYYTRGhDTVLESSILhhuFsIhSaLahscs .h.pls+lWNhLSpPcslsRlFAhhlslllllGlh....lPh.shsssQLYPKsh.QpQl.hcoPLAPYDRGGlPLccPAslKuQYPpapPhlGpITAYLoPlAhalucpThYFGTTIVSoPGGIlDEILYYTRGhDTVLESoILhlSFhIhSWlahN+.s. 0 5 11 17 +9712 PF09880 DUF2107 Predicted membrane protein (DUF2107) COGs, Finn RD, Sammut SJ anon COGs (COG4038) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.50 25.50 26.30 46.90 25.40 25.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.51 0.72 -4.09 7 32 2009-01-15 18:05:59 2007-07-30 15:50:03 4 1 32 0 21 28 1 73.70 52 88.98 CHANGED hYsGhhllIlGolusshGPtspDPlhRhlNhElPAhGVsLlhLuYNcsLALlTFluVsslhThVLlRAIlR.Etht .hYlGhhLlIlGoluslhGPts+DPlhRhLNhEVPulGVsLIFLuYNcTLALhTFlAVsAllTLVLlRAIl+.EEh........ 0 5 11 17 +9713 PF09881 DUF2108 Predicted membrane protein (DUF2108) COGs, Finn RD, Sammut SJ anon COGs (COG4039) Family This domain, found in various hypothetical archaeal proteins, has no known function. 29.00 29.00 69.70 69.40 28.90 28.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.15 0.72 -4.16 9 32 2009-01-15 18:05:59 2007-07-30 15:52:31 4 1 32 0 21 29 0 72.30 44 80.92 CHANGED M-hLs.lluhsssll..GuluslLtpcslsKlIhhullpuGhlsllsuttYLDVAhssuLl-PluTIllLlhhhK .......h-lLs.l..lussssll..GulGsIlpscslsKIIMhulL-uGhlslIsuhhYLDVAhluulh-PluTlILLlulhK. 0 5 11 17 +9714 PF09882 DUF2109 Predicted membrane protein (DUF2109) COGs, Finn RD, Sammut SJ anon COGs (COG4040) Family This domain, found in various hypothetical archaeal proteins, has no known function. 23.10 23.10 89.40 89.20 22.30 21.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.46 0.72 -4.04 10 32 2009-01-15 18:05:59 2007-07-30 15:52:45 4 1 32 0 21 30 0 77.90 46 91.52 CHANGED llplllGlIullsulRlhls+sRupKL.YLNslsFuIAuLIALhlcoPhGhlAAssYFIsSTloSNAIAaTlucl-ch ....lphllGlIullhslRlhls+sRApKL.YLsslsFsIuALIALYIcoPhGulsAhsYFIsSTlSSNAIAaTlsclcc..... 0 5 11 17 +9715 PF09883 DUF2110 Uncharacterized protein conserved in archaea (DUF2110) COGs, Finn RD, Sammut SJ anon COGs (COG4044) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.40 20.40 20.40 21.50 20.30 20.10 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.33 0.70 -4.72 10 44 2012-10-03 20:18:03 2007-07-30 15:52:59 4 1 44 0 33 50 1 227.00 42 95.66 CHANGED lsTppalhG-sp-Rul-uLcohVpN-LG-LDVca.clulpcDsaspVTLsG-DsElAtNLLpEcaGElssph-s...G-lYlGpLpSas--GaslDl..GlsVplPuDEL.sLGp.......G...oPcQltcRFGLVpHLP..lRhlcspc......t.ApLo-pphD+La-Wp+..u.ssRlsVNSsTRuEl+uslNRAGHuRDIlslERLGLLEpuVVC+EcTDuPGllAsIGPYLpu.............Eltsl .............................lshh.hl.GDs.-pul-uLcohlpNtlGDL-Vca.clulpccsaspVolpG-DpplutNLL+EEaGEls.clcs...G-sYhGpLpuhs-sGaslss..GhhlplPsDcL.sLG...........GsscQltcRFGlVpHLP..lchlpspc...............tupLo-cphD+la-Ws+..s.ssRl.V...NusTRucV+usls+sGHu+DlhplERLGLLEpullC+-sTDsPGllAsIGsYL..u.Eltsl..................................................... 0 5 17 26 +9716 PF09884 DUF2111 Uncharacterized protein conserved in archaea (DUF2111) COGs, Finn RD, Sammut SJ anon COGs (COG4048) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.90 20.90 21.00 104.00 19.90 20.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.72 0.72 -4.16 17 50 2009-09-11 16:51:47 2007-07-30 15:53:10 4 2 46 0 37 50 5 83.10 50 53.86 CHANGED lHpLlstLPVThRStppsGlRlEcGcVlDcsYTGPVLEpVLcsucll+ssP.sGsYcGlPVlVuPI+sc.GclluAlGlVDlsu ..lHpLls.LPVThRS+s+sGVRlE+GcVlDcsYTGPVLEpVlcpucll+ssPhpGsYpGlPVlVsPI+sp.GcVluAlGlVDlTu. 0 9 22 29 +9717 PF09885 DUF2112 Uncharacterized protein conserved in archaea (DUF2112) COGs, Finn RD, Sammut SJ anon COGs (COG4050) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 168.70 168.50 20.10 17.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.65 0.71 -4.53 18 54 2009-09-11 09:55:49 2007-07-30 15:53:23 4 1 50 0 40 52 5 144.40 58 92.47 CHANGED hKVhIYPsNSLILuDLVERFGHcPLshsppI+c+Vpsss........lDSPPhNlTsEDPK+GLKYAAlEVPSGVRGRMoLhGPLIE-AEAAIIlsc.us.............huFGChGCARTNELshaLlRcKsIPlLELcYPcs---uhphVp+IspFLcuL .hKVFIYPsNSLILuDLVERFGHcPLsltptI+c+Vpssp........lDSPPlNITsEDPKKGLKYAAlEVPSGVRGRMuLlGPLIEEAEAAIIhpc.AP.............huFGChGCuRTNELshaLlRc.K.sIPlLElcYPps-E-uh.hVp+IspFLcuL. 0 8 26 34 +9718 PF09886 DUF2113 Uncharacterized protein conserved in archaea (DUF2113) COGs, Finn RD, Sammut SJ anon COGs (COG4051) Family This domain, found in various hypothetical archaeal proteins, has no known function. 19.50 19.50 19.50 176.30 18.30 17.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.02 0.71 -5.16 17 52 2009-01-15 18:05:59 2007-07-30 15:53:35 4 1 52 0 38 49 4 184.00 33 96.99 CHANGED h.VEss.-ctGsphYccIhpssLpDLsLs+sIt+l+halcPctPlFlhsslh+psspsI+ltDlAslp.........spsthplpIs-EsYlsplLcpLWpphG+splcQPsRaplhlssshs........lt-hlVtDPpcsLhcclhDhh.RlhPEGFRVR+pshpssphshlASEcslpp-hh.chstchhcch.tttt ..hhVEst.-ctGtchYccIlpssLpDLsLs+uIt+l+hhhcPccPlFllullh+pssphIplpDlAslp........tspsthplpIp-EsYlspLLc+LWph.ucsplcQPsRapll..lcschs..........-hlVhDsccclhc+lhDhh.RlhPEGFRVR+.hhpspplshlASEcslccEhl.ccstchhccl.p..s.... 1 8 24 32 +9719 PF09887 DUF2114 Uncharacterized protein conserved in archaea (DUF2114) COGs, Finn RD, Sammut SJ anon COGs (COG4065) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 612.40 612.20 18.00 17.50 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.35 0.70 -6.15 17 54 2009-01-15 18:05:59 2007-07-30 15:53:53 4 1 54 0 38 54 6 449.10 58 91.99 CHANGED KP..YaIVASVElGNTTTKCILTuTNL-TG+oYllsKsV+MTRDVRsPKsGEElFG+Tl.GsELT+EuVuELV+DTLlcuhKcApLsIcpDLcFVVRSTGVsAuFsSP--VGphIhALAsGCLtAGVPP+KMTPsMohsslPc+lccaShhDKlhFDGAVsGVlPP...TGhEhVANEMEGELsTAGIK.GAKWTsVDFRNPClSlDFGTTLsGRITsD.....ppPYAKTlGNFCGLAGAIPDAIlRGoGlVDccsGsALDla.......tphptphspchsccYu-csH.EhIcIccVPtsRcRFGpVPVsscuAccuGlsLIGCDsGcNGSclscLpcIGt-lhccpultsLhtslDhVsAplshRLl-lAhccsll..pouIGlTGRAGIoGpKPcLIL-tlt-h.tlacpsp-clVFVDDGLARGAAlMARCMNSLGTPcsPlGGhRGGtCIhupRIKhp+ ....pPYYIVASVElGNTTTKsIlTATNhcTG+TYllsKsV+MTRDVRsPK.GEcVFG+TlhGlELT+EuVuEhV+-TLlcuhccApLsIc.DLcFVVRSTGVsAGFuSPEEVGphIlALAsGCLtAGlPPpKMTPsMohpsl.PcclpcaShl-KllFDGAVsuVlPP...TGhElVANEMEGELVTAGIK.GAKWTcVDFRNPClSlDFGTTLAGRITs-.....spPYAKTlGNaCGLAGAIsDAIlRGoslVDc+sGsALDla.......tthptphspchsccYA-chH.-hIpIccVPpshcRFGoVPVsPcuAccuGssLIGCDVGcNGSclscLp-IGt-I..hccpslssLhsslDhVsAplspRLl-lAh-cGllptsouIGlTGRAGITGcKPcLILEtlp-h.tla.cp.p-clVFV-DGLAhGAAVMARCMNSLGTPcsPlGGpRGGtCILupRhKhQp.. 0 8 24 32 +9720 PF09888 DUF2115 Uncharacterized protein conserved in archaea (DUF2115) COGs, Finn RD, Sammut SJ anon COGs (COG4066) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 51.60 51.40 24.70 24.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.12 0.71 -4.14 19 67 2009-01-15 18:05:59 2007-07-30 15:54:01 4 1 42 0 51 66 4 160.70 27 89.85 CHANGED hspp-LhphLtcpsppaolhDLhphpshhcc-hchlPtcYRcchhpshhphhhtsapcl+s..psss.hpspc..hDptphpchhshl....tptp.ppttpt..........h.hlhhllssYhhFlpccPhHPVGh.FPGGhpV.c+sGsYYCPl+-+pp-schulCpFCsu+pss ...........................ppt-LhphLtcpstphSlhDlh.hpshlcp-hphlP..ppY+cphhpt.hhphhhpshpcl+p..ppspthp.tp..h-tcphpchhshl....tp.t.pt.ppstcpp..........applh.hllssYhhFlhccPlHPlGh.FPGGhplhcc....sspYYCPl+c+..ppc.thulCpFClscp..... 0 10 36 46 +9721 PF09889 DUF2116 Uncharacterized protein containing a Zn-ribbon (DUF2116) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4068) Family This domain, found in various hypothetical archaeal proteins, has no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.80 21.80 21.80 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.22 0.72 -4.17 9 52 2012-10-03 10:42:43 2007-07-30 15:54:15 4 3 49 0 35 57 35 54.30 30 67.66 CHANGED EsH+HCllCGtuIP.-EphCS-+CpEtapp+pK+hh+pp.Ihashhllhlulahhhsah ......H+HChsC.G.tsIP..s.....-..csFCSpcCccpatpcp++.h.p.p.hhhhhhhhhlhhhhhh...................... 0 8 21 31 +9722 PF09890 DUF2117 Uncharacterized protein conserved in archaea (DUF2117) COGs, Finn RD, Sammut SJ anon COGs (COG4069) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 29.00 28.30 20.80 18.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.46 0.70 -5.14 16 49 2009-01-15 18:05:59 2007-07-30 15:54:38 4 4 43 0 37 49 2 210.00 37 57.38 CHANGED sDllsLLNaGKohpTG+sFGchVssR.....st.c.KPLIQIERPGpsDGslIsWN.......cts...pshs-cLScpLsLsh.p........hppslsptpshppsp...plhR+lsGV.PGEsIhVNGIVIG+AsSscVplluEN.GhlssIpGGpIKcHGlEKLtc......VDLp+AhVKTG.hlR+ss........hpschhc.cshstch..............s+VllIDHAAEcshEhh..csAshVlTlGDDTTsIuGD ........................................................DllhLlNaGKSt.TG+sFGphVspR..................sthpcPlIQIERPGctDGslI.WN......................pts...pshscpLocpLsLp......................ppslspthshh.ppp.....phhRclpGVpsGEsIhVNGIVlG+.upuscVtllucs.GhlscIhGGplKcHGlEKLtp......lDLpcAllKTG.hLRRps...........hpschtp.....pshspph.............................sclshlsHAuEcsh-hh....cc..sshVlTlGDDTTtlsGD................................... 0 10 21 29 +9723 PF09891 DUF2118 Uncharacterized protein conserved in archaea (DUF2118) COGs, Finn RD, Sammut SJ anon COGs (COG4072) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.64 0.71 -4.50 9 43 2012-10-02 20:27:15 2007-07-30 15:54:49 4 1 43 6 31 71 34 148.80 31 94.63 CHANGED h+lP+lYVEstts-................t.cVhlcps-chhhah-cscthhh.GKslYph.hc-hsD.lhtp+hp+chlIhhP..DtRphlalKcGocLh.lPsEGapVs.IschGsRVtcGsslAslpT+KG-lRalcuPVsGsVValpE.Ps....tRssYlaYIlPE ............................................+lPcLYVEs..p-.................s.cVhlcpstph.....tF.....l-h...s.c......thhhtt...KhlYch.acpas-.hhtsph.ppDhlIhhP............ct+phhal+cGsc.Lh.lPsEGapVh.IsshGsRVccGctlAslpT+KGE.lRal+sPhsGsV..lalpEhss....tRspYlaYIL.E................... 0 10 13 21 +9724 PF09892 DUF2119 Uncharacterized protein conserved in archaea (DUF2119) COGs, Finn RD, Sammut SJ anon COGs (COG4073) Family This domain, found in various hypothetical archaeal proteins, has no known function. 29.60 29.60 29.60 144.90 29.50 29.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.16 0.71 -5.02 10 27 2009-01-15 18:05:59 2007-07-30 15:55:00 4 1 27 0 18 26 2 196.00 39 94.08 CHANGED YupG+t..Ph+LFVAGlHGsEuKsToclLcclp.schp.....GslsllP.lVcsuKYISTLcPcYYco.hGKpILclIEch+P.-hYlELHSYStENacKLTupsRhsptGVPsalsLpsGVLlGSVSPhlR++hFs+-sLCLTlElP.............pttspcut-hlsclLclltcupoR-EFIEclc++YPcpscKAhchh+cFaGp ....h.spucs..Pp+LFluGlHGsEuKhTpplLcplp....sphp..................Gplhlhs.lscp..o+YlSTLs.cYYcop.GppllclIEcYcP..chYhElHsYstcsYp+LTshcRhsppGVPPhl-LssGVLluSVSPhlRp+.Fpc-slChTlElP..................shpstcshchhhclLchhhpupsR--hhcclhpcYPcpscpAhphu+caah... 0 5 11 14 +9725 PF09893 DUF2120 Uncharacterized protein conserved in archaea (DUF2120) COGs, Finn RD, Sammut SJ anon COGs (COG4077) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 39.30 95.70 22.20 20.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.82 0.71 -4.25 6 27 2009-01-15 18:05:59 2007-07-30 15:55:13 4 1 27 0 19 26 2 138.80 38 95.54 CHANGED lpcl.............hG+IMctl-AF+GSKPllcscElLhVRGhsR......DchhcchsSlhEaL....l-hLsccGhELls-h.............DEh...Vp......chsElhs-o-ha.D.hGFE+lKcoFEshGCssDYslG+p.cshhlhlsMWhDKsuhsPKFVEllsl ....................lhG+lMpph-AFKGSKPlhcp-plLhVRGlsR......Dc.hcchssIc-aL....h-pLsppGhElhsc..............D-h...lp......cIsE.lhs-.osha.DtaGFEtlKcuFEshGCcsDYtlu+K.sslhlsluhWhDKpshsPpFVEVhs.. 0 4 9 15 +9726 PF09894 DUF2121 Uncharacterized protein conserved in archaea (DUF2121) COGs, Finn RD, Sammut SJ anon COGs (COG4079) Family This domain, found in various hypothetical archaeal proteins, has no known function. 26.20 26.20 141.40 141.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.09 0.71 -5.02 13 34 2009-01-15 18:05:59 2007-07-30 15:55:24 4 1 34 0 25 33 2 194.40 38 71.02 CHANGED MSLlIuYhGKNGAVlAGD+RpIhFpGpcpsREcLEEcLYSGcI+oDEELtK+AsEauVp...IplsDs+cKVpplss.VLsGEVsol.Gt-oKRRRlYsTpGshsIl-lpssplTspohssGosllVFGN+asKclsppplK+php..hKhslccltclhcclhcpl.tcssTlScpa-lhhspsttcshct......sIc+Dlpc MSLlIuYhGKsGAVlAGD+RpIhFpGscppRcpLE-cLYSGcI+oDEELh++AcEhGlp...lpIsDsccKV+clus.lllGEVpol.uh-oKRRRlYsTpGshsIl-lhssplsspphtpuusllVFGN+asKclupcpl+cphp...phslpcltclhcclhccl.ppTPolSppa-lhhspsthsshct......slc+Dlp.p............. 0 6 13 19 +9727 PF09895 DUF2122 RecB-family nuclease (DUF2122) COGs, Finn RD, Sammut SJ anon COGs (COG4080) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 61.30 61.00 23.50 18.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.21 0.72 -3.84 18 59 2012-10-01 22:53:19 2007-07-30 15:55:36 4 1 58 0 37 53 0 105.40 35 71.33 CHANGED GuAAQpGlP-lhKLAhKhGp.sllVlsDlcDAlElL+P-hVhhlspsspctt.........phhc.........hcs+lhlVhsGu-.suhochElshGpslhhtulcp-lGslGtlAlhL..Ypl .GuAAQsGlP-lpKLAhKhsK.slllLs-lcDAlElLcP-hlhhlupsuppphp........p...hp.........hts+lLllFsGs-.sGFoKhElph.Gptlhl.tlpp-lGulu.hulhLYp.................... 0 13 19 28 +9729 PF09897 DUF2124 Uncharacterized protein conserved in archaea (DUF2124) COGs, Finn RD, Sammut SJ anon COGs (COG4090) Family This domain, found in various hypothetical archaeal proteins, has no known function. 21.40 21.40 22.30 142.70 20.90 21.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.52 17 49 2009-01-15 18:05:59 2007-07-30 15:56:01 4 1 41 5 34 50 1 147.50 38 93.47 CHANGED +GlushLpsFKshl....t.psucpI.sahGssGsCsPFApLhuYulRsh..pphFlPssch-cu+plp.lsclGhp...ls-.hshc.sDhlVlhGGLAMPchsloh-csppllpclsstp....llGlCFhshFp+sGW.cpIsFDhlIDupl-.Vpl ..+GlushLcsF+shl....t.psucpI.lahGosGsCsPFApLhuYulRsp..cthFlPssch-cu+plt.hsslGhp...hs-ttshc.sDslVlhGGLAMP+hslss-clppllpclsscp....llGlCFhuhFp+uGWtcc..IcFDhlIDuplc.V...... 0 8 22 27 +9730 PF09898 DUF2125 Uncharacterized protein conserved in bacteria (DUF2125) COGs, Finn RD, Sammut SJ anon COGs (COG4093) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.40 21.40 21.40 21.50 21.30 20.60 hmmbuild -o /dev/null HMM SEED 312 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.74 0.70 -5.12 45 259 2009-01-15 18:05:59 2007-07-30 15:56:21 4 3 225 0 86 216 94 300.50 20 82.17 CHANGED WahuAstlcpshssWhspppupGhts-hushs....lsGFPhRhclphsssslusP.........psGhuhpustlphhu.sYpPs+llsphss.stplshs......stsasltpss.....hpushshsspssLsh-chslsstshshsssh..t.......sshsplp....hthtps..ssssshchslpspshshssshh................thpshphcuplphshs........hshsslpptpspssplclppsplph.Gshtlpu.uGslslDssGhssGclslpspshcpllshhtpsthl.t.....................................t......shpsshshhuthsGptp.slslsLphpcGplhl.GslPlGtsPs ...........................................................................................................................................Whhhustlctthps.hsp.tstGhtspttshp....hsGaPhRhplhssshphtps..........tpuhshphsthpssu.hatPhplhhthsu.Pthl.....ph..s.............hthshshhpss........hphthsh.s.chplshcssslshts.ssths............sshtchp....hp..hs......ssssshc..hth..p...hsshshssshh..........................shttsphsushs.hslssh...................................ss..p...th..h....t...p...h.ps..t...s...l...s...lpphplsh.usht.hsu.oGshsls..p......s.......G.........h........hs..GclpLplsshpslhpsht.tst.hps........................................s...........shpsh.hthhu..ths..t.st..........s...shshsltl.p.cGph.h.G.lslGthP................................................ 1 20 55 65 +9731 PF09899 DUF2126 Putative amidoligase enzyme (DUF2126) COGs, Finn RD, Sammut SJ anon COGs (COG4196) Family Members of this family of bacterial domains are predominantly found in transglutaminase and transglutaminase-like proteins. Their exact function is, as yet, unknown, but they are likely to act as amidoligase enzymes [1] Protein in this family are found in conserved gene neighborhoods encoding a glutamine amidotransferase-like thiol peptidase (in proteobacteria) or an Aig2 family cyclotransferase protein (in firmicutes) [1]. 18.40 18.40 35.20 26.50 16.30 17.50 hmmbuild -o /dev/null HMM SEED 819 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.43 0.70 -13.37 0.70 -6.51 40 471 2012-10-02 17:21:26 2007-07-30 15:56:54 4 7 408 0 164 493 127 725.70 51 74.92 CHANGED s-.sEV-Fpa-MpVsRltEsPRVTKPao--pWpslssLGcpVDtcLptsDVRLTMGGEPTFVSlDDh-usEWNTsAlGPsKRtLAssLl+RLRc+FA.PGGlLHaGQGKWYPGEsLPRWuhulaWR+DGpPl..W+ss..sLlAc-s....psts....ssscpAcpFlpslApcLGlsschlhPAYEDshaalh+EspLPsNVDP.tcu+Lc......DsppRtRls+VFccGLspPsGaVLPlp+hp.spss......WpSstWhhR.+s+laLlPGDSPlGhRLPLsSLPalsss-aPa.hh.tDPhpspssL.....................tttt.ssststptstpphtpth..........................thVRTALsVEsR-G+LpVFMPPl.-p.lE-YL-LluulEssApplshPVhlEGYsPPp.DPRLshl+VTPDPGVIEVNlHPAsoWcEhVchTpsLYE-ARpsRLGoEKFMlDGRHTGTGGGNHlVLGGsTPsDSPFLRRPDLL+SLltYWQ+HPSLSYLFSGhFIGPTSQAPRlDEARcDuLYELEIAhuQlPts...tsspsP..PWLVDRLhRNLLsDlTGNTHRAEhCIDKLYSPDusTGRLGLlEFRuFEMPPcsRMSLAQQLLlRALlA+FWcpPhcs.tLVRWGTsLHDRFMLPHFlhpDFtDVlsDLptsGasFcspWFssahEFRFPhhGclph.pGlcLELRpALEPWHVLGEEuusGG.TsRYVDSSlERLQVKlpGhss..sRaslsCNGRplPLpsTGssGEhVAGVRY+AWQPsSuLH.PTIslcuPLsFDllDoWsuRSlGGCsYHVAHPGGRsY-TFPVNuhEA.EuRRlARF.shGHTsGths..........................ss.tp.....ssEFPhTLDLR ...................................................-.s-spFpap.pVsRlhEsPRVT+PYo-ppWptl.tLGppVDtpLttsDVRLThGGEPTFVSlD...Dh-usEWNTsAhG........PpKRthAspLhpRL+sc.au.s...sGhlHaGQGKWYPGE.LPRWshulaWRpDGpPl..Wpss..sLlAc.t...ts.s...........................hsstpApchltslAttLGlssphlhPAYEDshhaLhcEtpLPs..s..lss......s.....s.c..Lt..............................-....p..Rtclt.chhppuLspssGaVLPlpthtst.t................WtSstWhhR.ct........+hhLlPGDSPhGhRLPLsSlsahs.t..p..h.h...h.tDPhtstssL.................................................................................................................................................................................t.hsRTALssEsR..................s.....................Gh..................LhlFhPPh..pt..lEcaL-LluslEssAp..pls.h.s.l.llEGYsPPt.Ds.RLphhplTPDPGVIEVNIpPutsWpEhsppTphLY-pA+.sRLsoEKFhlDG+HsGTGGGNHlslGGsTPuDSPhLRRPDLLtSLlsYWppHPSLSYLFSGhFIGPTSQAPRlDEuRp-sLYELEIAhupl.t.........................st.pss..PWllDRlhRpLLhDlTGNTHRuEFCIDKLYSPDus.oGRLGLlEhRuFEMP.PHs+MulsQpLLlRALlAhFWcpPhps.....LhRWGTtLHDRFhLPHFlhpDhtDVls-Lpt.tGasFcspWFssahEFRFPhhGphth..pG....lplELRtAlEPWHVhGEEus..suG.TsRYVDSSlERlQV+lp....G...hss.........pRa.lloCNGh.lPLpsT..s.psu.phVuGVRa+AWpPsSuLH.PTIsVcuPLsFDllD.s.......Ws.....tR..........SlG.GCpYHVuHPGGRsY-ohPVNuhEA.EuRRhsRF..shGHTPGt..hs......................................t........s.-hPhTLDLR............................................................. 0 45 105 133 +9732 PF09900 DUF2127 Predicted membrane protein (DUF2127) COGs, Finn RD, Sammut SJ anon COGs (COG4331&COG3305)) Family This domain, found in various hypothetical prokaryotic and archaeal proteins, has no known function. 24.30 24.30 24.60 25.50 24.20 24.20 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.51 0.71 -4.15 33 344 2009-01-15 18:05:59 2007-07-30 15:57:02 4 2 332 0 92 229 22 140.60 28 75.50 CHANGED tuL+slAlhEusKGllslluGluLhslhtps..lpphstpllp+.........hplsPssphsshh......LphssplossslphluhhshsYuslRhlEAaGLW+s+tWupahullSuulYlPaElYElhp+s.ohhphslhhlNlhlVsalhhplt .............................................................h.sl+slulhcus+ull.lluululh..h..ltspp..lp.thlpcllph.........hplsssshhsphh......Lctutt.lssssLt.hsushhhsYusl+llEuhGLW+t+hWupahullusulaLPhElY-....lh.p..ph.ohhplsshhlNlhlVhhhhhph............. 0 30 61 81 +9734 PF09902 DUF2129 Uncharacterized protein conserved in bacteria (DUF2129) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4471) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 22.00 22.00 23.90 22.40 20.20 18.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.33 0.72 -4.18 38 1016 2009-01-15 18:05:59 2007-07-30 15:58:04 4 2 1011 0 95 365 0 68.40 38 76.10 CHANGED RpulIVYLaplKps+.pLR+aGslhYhS++h+YllLYlsp-cl-plhcKlpph+FVK+VchSh+s.lcpsap ..........RpullVYLhh.+ct.+.pLp+YGcIhYhS++h+YlllYlspp-l-plhp+Lpch+FVK+VchSahptLcpsa............. 0 22 48 71 +9735 PF09903 DUF2130 Uncharacterized protein conserved in bacteria (DUF2130) COGs, Finn RD, Sammut SJ anon COGs (COG4487) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 24.70 24.70 25.20 26.30 24.60 24.60 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.55 0.70 -5.65 42 737 2012-10-11 20:44:46 2007-07-30 15:58:23 4 1 708 0 105 513 324 259.60 43 60.52 CHANGED -...c.hhpch-pp...lcphpcpI-clK....chttphSsphhGEsLE.ahEs........hhtsuF...PpshhEcssc.uppGu.....KsDaIaRshs.......ppspchsSIhaEhK.......pTKphs-calc+LccDppppps-aAlLVS.l.Pc.......hhV-s.tthcphaVs+PphFlslhslLRpuhlphhph+ppht....hhpspth.lhpF..csphcphhpuhscsas.hpcchppthcpIsKthp+hpcph-slltuspsLphuppphssholpcl..sthpsshttc ............................hp....sthppphEsp.LKsts-plEh..hK...shK..sQtSo.KhlGEoLE.asEscFp........phcphuF....Psuh...F-KDNK..uspGo...............KGDaIaRtpD.......csGsEhlSIMFEMKNE...cto..csK+KNp-FhKcLDcDRREKsCEYAVLVThLEs-s.phaNtGIVD.so...H...p..YEKMY.....VlRPQ.aFltllulLRsuulNoh..+hK..ppLt....hh+EpNh-locF..Ec-hcshppAhsKsYpssopsatculccI-KslK+hEcs+chLssocspL......+hAs.NKlps.lol++LsptsssMtt...................... 0 29 63 90 +9736 PF09904 HTH_43 DUF2131; Winged helix-turn helix COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4519) Family This family, found in various hypothetical prokaryotic proteins, is a probable winged helix DNA-binding domain. 21.90 21.90 22.20 23.00 21.50 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.09 0.72 -4.18 22 301 2012-10-04 14:01:12 2007-07-30 15:59:03 4 2 275 3 56 190 6 87.40 50 91.47 CHANGED SKopsuFhRRLYlAaLI-sspt.sVPuLhchTGMPRRThQDslsALs-lsIcspF.Q.cGtRppsGaYpIpsWGsIsctWlpp+lppIppsL .........chpssFhRRLYLAhLl-stcpsNVPcLhctTGhPRRTlQDslpALs.slG.IplpFVQ..-...GtRpNsGYYplsDWGPlDppWltcphppItus.................................. 0 5 17 37 +9737 PF09905 DUF2132 Uncharacterized conserved protein (DUF2132) COGs, Finn RD, Sammut SJ anon COGs (COG4628) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.50 20.50 22.00 36.70 19.60 20.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.13 0.72 -4.01 80 623 2009-01-15 18:05:59 2007-07-30 15:59:32 4 1 484 1 182 464 52 63.70 59 64.05 CHANGED QsNNPLHGloLEpllscLVcaY.GW-tLupplsIpCFpscPSlKSSLKFLRKTsWARcKVEsLYL ..psNNPLHGloLEplLscLV-aY.GWctLupplpINCFps-PSIKSSLKFLRKT.WAR-KVEsLYL. 0 44 101 146 +9738 PF09906 DUF2135 Uncharacterized protein conserved in bacteria (DUF2135) COGs, Finn RD, Sammut SJ anon COGs (COG4676) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 22.00 22.00 22.10 22.40 21.90 21.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.40 0.72 -4.01 24 419 2009-01-15 18:05:59 2007-07-30 16:00:37 4 15 368 0 47 179 49 47.80 72 16.17 CHANGED PhpGsYhVaVNYaGstssp.............thsssplsllos......Es..oPsEc.pcsahlPl ..PlHGRYQVYINYYGGRSET......................ELTTAQLTL..ITD......EG...SVNEK.QETFIVPM....... 0 7 24 37 +9739 PF09907 DUF2136 Uncharacterized protein conserved in bacteria (DUF2136) COGs, Finn RD, Sammut SJ anon COGs (COG4680) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.80 20.80 21.00 21.30 20.70 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.40 0.72 -3.90 64 642 2009-01-15 18:05:59 2007-07-30 16:00:53 4 2 550 0 147 444 36 75.70 38 75.54 CHANGED cucpsLpsWaphscpApWpsPp-lKppa.ssAshlt..sp+hVFNItGNcYRLIstlsa.....phphlal+alGTHsEYD+ls ........................p.tchsLpshhshlp+upappPp-lKpha.sShDth.+....scahVFsluGNphRLluhlpF........ptp+hal+alsTHtEYD+l........ 0 31 94 128 +9741 PF09909 DUF2138 Uncharacterized protein conserved in bacteria (DUF2138) COGs, Finn RD, Sammut SJ anon COGs (COG4685) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.70 20.70 21.30 22.00 17.70 20.60 hmmbuild -o /dev/null HMM SEED 555 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.85 0.70 -6.37 14 352 2009-01-15 18:05:59 2007-07-30 16:01:41 4 1 334 0 18 175 9 509.80 80 97.64 CHANGED KtWthhulls..AlslluusVhtlta.....tspusc+sLss....sslpl........DLscPDullDScSLSQLPKDlLpVPhL+DVLTEDFVFYYpscuDRLGlpGoLRRIlYEHDLsL+DpLlcpLhDQPApVALWRusDG+LpaahllhpRuGLAKlLEPLuhsAsuDuQLSKsthu...lsusslPVYpL+Ysus+sLhFAocsD+llsLSsPsMLacc-.......tp......sscAsslhusLLuG....+chaspuFGLts+ssp..PspQRlVVSAshLuFGYQRahPuFAGlRF-hG.s......sG...WpSalALsD.stplstpaDFsPlWpAMPuGAShCVuLPhsathApshLs+luuE...ssphs.......-tLsGsAGlCWYucS+LhoPLFVuplcusstt...chsptlGKLFspsIGAaEuKAsc..........thLPVoppppG.puplWpRpVSSRaGpYssupAspPsQL.u-hFFpVoLAhpspTLlFSLDDpLVscALssLsKpaPAhuDVlPpD..ullPLYlsPpuhApLlcpEThsSLPpDhEPVFhNAAQThLhPKLcALuphspYsLsLspshpsuuuWQWLPlpW ......................KGWRFYGLVGFGAIALLSAGVWALQY.....AGSGPEKTLSPL.VVHNNLQI...........DLNEPDLFLDSDSLSQLPKDLLTIPFLHDVLSEDFVFYYQNHADRLGIEGSIRRIVYEHDLTLKDKL.FSSLLDQPAQAALWHDKQGHLSHYMVLIQRSGLSKLLEPLLFAATSDSQLSKTEISSIKINSETlPVYQLRYNGNNALMFATYQDKMLVFSSTDMLFKDD..........QQ......DTEATAIAuDLLSG....KKRWQASFGLEERsAEK..TPVRQRIVVSARLLGFGYQRLMPSFAGVRFEMGN..........DGWHSFVALNDESASVDuSFDFTPVWNSMPAGASFCVAVPYSHGIAEEMLSHISQE....NDKLN.......GALDGAAGLCWYEDSKLQTPLFVGQFDGTA.....EQAQLPGKLFTQNIGAHESKAPE.............GVLPVSQTQQG.EAQIWRREVSSRYGQYPKAQAAQPDQLMSDYFFRVSLAMQNKTLLFSLDDTLVNNALQTLNKsRPAMVDVIPTD.....GIVPLYINPQGlAKLLRNETLTSLPKNLEPVFYNAAQTLLMPKLDALSQQPRYV..MKLAQ.....MEPGAAWQWLPITW....................... 0 3 7 13 +9742 PF09910 DUF2139 Uncharacterized protein conserved in archaea (DUF2139) COGs, Finn RD, Sammut SJ anon COGs (COG4697) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.90 25.90 26.30 26.30 20.00 25.80 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.03 0.70 -5.41 6 35 2009-01-15 18:05:59 2007-07-30 16:02:18 4 1 31 0 24 37 0 292.40 40 66.93 CHANGED LYaTLAFEAcAaFIc.ssspc+lY-FpLVGstPsSGGDTYNAVEsVD-hIYFGGWVHAPAhaRt+scG+.AsIsFoNKYSHVHcYDTusscV+LlWKEShHc.ccWAGEVS-IIYNPYsDcLLLARtDGHtNLGVYpLD.RRGtsR+L.scPuhKGshhhDhAhFul.+pahtGhpGlcslDLlot+h....EtFs.us.SlDGGshhpP+lGuh.sSlasRlFAFV+GGlhVhNPa.GEcatFVRLhDFs.ohYuPhRsNALslGGGlLluFNuapcAshRsssE.tplhp+hTNTlluPSlLVYIAPPhVKIVGuFGARlTSlEthGsKlLlAsNTsPNstthD ..............LYaTlAFEupAaFl..pssucplYcFchlGpsPsSGGDTYNAVpsVD-hIYFGGWVHAPA.hactcs.php..thI...sFpNKYSHVHtYDhcsspVcLLWp-uhHc...c...pcWsGEVS-IlYDPapDcLLlARtDGHtNLGlYplshcsGchchLsppPoh.KGshhcDhAhF.sl.hps.........appGlptltshDhlst+.h.....-tFs.hut..ohDGtshlt.thGsh.soh.NRhaAFh+GGlhluNPh.t..-phpFhRLhDF..s.huPhRsNsh.huGGllhsaNuapcuhhpshp..t.h.hthoNslsuPolLlYlsPP.s+IVushGARlTShp.htscllluhsThsNhtth.......................................... 0 10 12 17 +9743 PF09911 DUF2140 Uncharacterized protein conserved in bacteria (DUF2140) COGs, Finn RD, Sammut SJ anon COGs (COG4698) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.90 20.90 20.90 21.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.07 0.71 -5.12 28 414 2009-01-15 18:05:59 2007-07-30 16:02:32 4 2 407 0 70 269 2 183.70 34 90.22 CHANGED NhWKhuFhs...LLAl.lhshshlsstlhtPs.p....ptsspppscucs.shslsosKcpLNpllspYLpcht.ssphsYchhl.ssplhlpGshplhGpslshhlhFcPpVhcNGsVhL+spslSlGsLsLPlshVLpYlppsYcLPcaVslpPccpplhlpLschchpsshhl+AccIDLhsD.cIpFslhlP ......................................................NhWKhsFlh...L.lul.llusssh...lssplhpsp...c.........tpts.sht..pppups.shsls..osKpplNp.llstYLp-ap.....s.pcl..p...Y.+hhl.sspshhpGohplLGp.slshhlhFpPhV.h.cNGsV.pL+spslSlGsLsLPlp.VLpalcpsY.+LPcaVplssccpplhlpLsphchpsuhhl+AcpIsLh.sD.cIpFslhh............................................. 0 18 43 59 +9744 PF09912 DUF2141 Uncharacterized protein conserved in bacteria (DUF2141) COGs, Finn RD, Sammut SJ anon COGs (COG4704) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.00 21.00 21.10 21.30 20.90 20.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.49 0.71 -4.41 90 328 2009-01-15 18:05:59 2007-07-30 16:02:53 4 6 253 0 136 354 294 108.70 26 65.11 CHANGED Vplsslcss....pGplhlulass..scsa.......ptppshtstpspsss..sssplsFpsls.sGsYAlulhHDpNuNG+hDpsh.hGl..PpEsaGhSNNsp..hphG......PPsFpcut.Fsls.ttspslsI .............................................lplpslcss.pGplhlslass....tpsa.............ttppsht...th...ps.tsss....sshplshpsls.sGpYAlslaHDcNsNs+lDpsh...hGh..PpEsaGaSssst..hh..h.G......sPsapcutFtls..tstph......................... 0 45 93 121 +9745 PF09913 DUF2142 Predicted membrane protein (DUF2142) COGs, Finn RD, Sammut SJ anon COGs (COG4713) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 24.10 24.10 24.30 24.30 23.90 24.00 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.44 0.70 -5.80 34 382 2012-10-03 03:08:05 2007-07-30 16:03:08 4 2 312 0 85 365 107 367.10 17 74.57 CHANGED l..hhuhhhshssPshpsPDEshHhs+sh.......tl.sp...G.h......hssspsph.t.......................thttshhthhsptss.st.pthtpsshspptsshsph.s......YsshhYlstulGhhhuphlshsshhslhhuRlhNhlhashlhhhulthhsht+..hl...hhhluhlPhsla.suShssDuhthuhshhhhuhhlph....hhppphsp+...hhhhllsssLlshsK...ssalhLshlhhhls.......ttthspct......................................hhsthhsshhhslshhhhhths......pth.hhtsth............stslthllspshthht........lhhpsl.....h...phhsstF.....hGWhcs.l.s..hhhshhhslhhlhhsshsthph+.........phhhhhlslsshhhlhhhlhlshosssssh.......l.G........lQGRYalP...lhhll .......................................................hshhhhh.h.P.....hp..s.P.DE.hHh.psh.................tl.sp.....................................hspstt.t..............................................h.ht.......t.t.t.....t.t...ht.t.h..t.t....h.sh...p..t..h..s....s..................h.shhYlspulGhhl....uphl........t..........hs....h.....hhh.h....hhuRlhNhlh...ah....h....h....h...h....h....u..l....+h....h.......ht+........hh..........hhhlull....Phsl..........s....u.ShshD...shs.h...shshlhhuhhhph............htppphstp....phhhh.hlhsh.llshsK.......hsahhll...hL.hh..hls.................hp.h.httpt.....................................................................................hh.tthh.hh.h.h.hhlhh.h....h..h.hh.hh.....................h...hh..t............................ttp...h.t..h.hltp.h...hht.............hhhpsh......................h.s..h.hhs.h......h.hGhhs.ht......l..sh..h.hh.h.h..h.......h....h.h....lhhl.hh...s.thp.ththp.............................h.ph.h.hh..hlh...h...sh..l.hhlhhhh...hls..as.shstsh.............h.G........lQGRYah.P.hl.h............................................................................................. 0 30 55 75 +9748 PF09916 DUF2145 Uncharacterized protein conserved in bacteria (DUF2145) COGs, Finn RD, Sammut SJ anon COGs (COG4727) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 25.10 69.70 18.30 16.50 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.25 0.71 -4.79 20 193 2009-01-15 18:05:59 2007-07-30 16:04:25 4 1 187 0 37 127 1 199.40 53 72.14 CHANGED ctLsu....pGAcVsllARsGpDhSch..Gl+aoHsAaAlp...s.s-G....sWpVhpLhpcCss.spSpLap-GLu-Fah..-sshph-sulllPoPplQp+LlslLsosstt..tLHsspYSllAaPauocYQNsNpWsLElLAAAhtt...........sss...........RcpspuWL+.AtYpPssl+luhhpRLuuphhssslshcDHP.t.hhuGplpTsTssSlhpaL .......QpLsu.psssVVILsRpGQDhSph..cLpaSHAGaAhR.....p.PsG.......sWRVaHpLNsCGT....AcSuLYhQGLhEFlu..DDLlspsluVlRPpu-lAsALpsLLpSuh+Ls.hhHuPRYsllAaPFSusYQNSNtWlLEVhAtA.-A.p...........lho...........RscA+pWLQhpGYQPSlVssGsacRLGA+LFsPNVaTDDpPuEhlhpGNlulsosDSVhcFl.............. 0 8 23 30 +9749 PF09917 DUF2147 Uncharacterized protein conserved in bacteria (DUF2147) COGs, Finn RD, Sammut SJ anon COGs (COG4731) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 19.80 19.80 20.30 21.80 19.70 18.80 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.61 0.71 -3.82 158 1014 2009-01-15 18:05:59 2007-07-30 16:05:10 4 3 685 0 331 901 232 111.90 28 71.04 CHANGED G.hWpT.......ststs..Vcltp.su.....up...lsGplhph..........................tpspsp.....hcspsllGhpllhs.hc...sssp........ap.GplhcPcsGKsYpuplpl.....pss..ppLpl+G..pluh..hls+o.QsWpR .....................................................GhWpo.......ststs.....lcltp.ss......up.....hsGplhph.....................t.t..tppssss.........hcspshhGh.plltshct...sssp.........aptGplh-Pc..sGKs.Ypschpl.........pss....tpLpl+G...hl.u.h.t.hls+o.poWpR............... 2 94 198 252 +9750 PF09918 DUF2148 Uncharacterized protein containing a ferredoxin domain (DUF2148) COGs, Finn RD, Sammut SJ anon COGs (COG4739) Family This domain, found in various hypothetical bacterial proteins containing a ferredoxin domain, has no known function. 25.00 25.00 36.70 35.90 20.30 19.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.30 0.72 -4.25 11 138 2009-09-10 16:57:13 2007-07-30 16:05:19 4 2 131 0 61 116 3 69.00 55 39.09 CHANGED -FsGPpCsh+hlDLGIAlGSAsKsAp.LslDNRlMYohGsuApphGhl.DA-llhGIPLSsoGKNIaFDR ............tGssCAhpslDlGIAlGSAsusAuplpVDsRVMaSsGlAApcLshl..-..sc..h..V...h.uIPlSAouKNsFFDR.. 0 29 50 57 +9751 PF09919 DUF2149 Uncharacterized conserved protein (DUF2149) COGs, Finn RD, Sammut SJ anon COGs (COG4744) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 39.40 32.40 23.70 23.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -10.16 0.72 -3.89 29 201 2009-01-15 18:05:59 2007-07-30 16:05:37 4 1 172 \N 82 166 11 90.30 38 82.04 CHANGED hsulsNLhDlhlVhuVuLllAlVhshsh.phh....sppshshhpssspsshphlh.+.cGpclcphp.otp....uuGpG...p+hGssYc.hcsG+lIaVP- ....ssVsNLhDlhhVFuVuLhlAlVhphshsphh....uppchshlc.s.sGcts.M..clls....K.cGpclpcapsopp.......uuupG...p+lGssYc.LcsGclIYVPE..... 0 31 68 77 +9752 PF09920 DUF2150 Uncharacterized protein conserved in archaea (DUF2150) COGs, Finn RD, Sammut SJ anon COGs (COG4749) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.60 20.60 103.70 103.50 19.40 19.40 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.27 0.71 -4.70 14 47 2009-09-11 15:57:27 2007-07-30 16:05:50 4 1 47 0 36 48 0 187.60 39 94.39 CHANGED hpFYopERWpNWlsplcEsch-..s-s--suc...lhhshpDDlslAllKIlstacstpl.sc--AlscLcsl+-IVhs-st......sEDh..ll-ulQsSLlslFhAucpYltGshs.-sslcphlcsAhcAEt.--Dl-pALshsuphGutVIsGcshstchlcD..hphGLVsEWlsGlDolpsAhssscsh-E- ..pFYopERWpNWls+lcEp-h-...sEsE-su+...lLlNhpDDsslAlhKIlsta-cspl.scE-AlccLpsl+-IVLsEl-......cE-pshll-uVQTSLlslFhAAEpYlsGshst.-uslp-hlcsAs-AEt.-EDlDsALshsupsGstVlsG.ccLshphhc-..hpYGlVsEWlsGlDSlpuAMsss-sh-E-....... 0 6 29 34 +9753 PF09921 DUF2153 Uncharacterized protein conserved in archaea (DUF2153) COGs, Finn RD, Sammut SJ anon COGs (COG4755) Family This domain, found in various hypothetical archaeal proteins, has no known function. 23.00 23.00 23.40 103.20 22.20 22.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.53 0.71 -4.52 8 41 2009-01-15 18:05:59 2007-07-30 16:06:01 4 1 41 0 26 34 0 125.30 47 88.22 CHANGED sL-pWVKhQKplLcolcsh-..EchcsuDRL-LIluoRsAFpHMhRTLKAFDpWLQDPhlluHMPREMLhDVpsshhclhppLLELDI+HTSpa+-LlpKhu+EGKLsPllhhh+..stpcPsRR+cu ..pL-pWlKhQ+pllpsLcchE..-phc..s..hDRL-LILuoRsAFQHMhRTLKAFDpWLpDPhlhpHMP+EMLc-lcsphh-lL.cLlELDIcHTSpaR-hlsKhuKEGKLsPllhh.h..ppppspRcp..t............. 0 9 14 19 +9754 PF09922 DUF2154 Cell wall-active antibiotics response protein (DUF2154) COGs, Finn RD, Sammut SJ anon COGs (COG4758) Domain \N 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.37 0.71 -4.39 37 1584 2009-01-15 18:05:59 2007-07-30 16:06:22 4 7 1195 0 260 904 13 106.40 24 44.17 CHANGED thhGstpphpp..sapacDlNltshhGcshIDLspshlsps-ssIhI+phhGsscIlVP.DlsVplct.oslhGslphhsppp.tl.Npslphpossappst+clKIhsslhhGslEV ...............................hGp.ph..p..shph.cDlplpthhGcspIDLspsh.h.p....cp...s...s..Ih..lcthhGssplhVP.hc..hp..V..sLc.s..ss.haGssp..........t..p.....t....t..h.......p.....php...pt.h..............s....h.plp..lhhshhhGsl-V.............................................. 0 99 190 227 +9755 PF09923 DUF2155 Uncharacterized protein conserved in bacteria (DUF2155) COGs, Finn RD, Sammut SJ anon COGs (COG4765) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.30 21.30 22.40 22.50 20.80 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.12 0.72 -3.88 47 322 2009-01-15 18:05:59 2007-07-30 16:06:44 4 3 319 0 107 251 923 91.50 43 52.17 CHANGED AslpuLDKloGcsp-l-ltsGcssphGsLpVslptC..RhPs-.sPsssAFstl.........slpcpspst.......lFsGWMhASSPuLsAl-HPhYDVWVhcC .........................................................AhhpuLDKITG+h.psh-lhlGE.o.spF..G.u.Lplss+sC........hp......R......sssE.sspssAFlpl..................Tlscphpc.........lFsGWMaAsSPuLsulEHPlYDVWlpcC............... 0 32 66 79 +9756 PF09924 DUF2156 Uncharacterized conserved protein (DUF2156) COGs, Finn RD, Sammut SJ anon COGs (COG4866) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 27.20 27.20 27.40 27.20 26.50 27.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.65 0.70 -5.60 156 2342 2012-10-02 22:59:21 2007-07-30 16:07:05 4 28 1932 2 547 1913 59 286.60 23 38.43 CHANGED ttllppts...tpshuplshhpcpphhh.s....ssp....uhlha....thpspshlshusPlG...s.....pphsphlppahphuppt.uhtsshYplstchhshhpc.hG.....hphh+lGp-sllslpsa.sLs..G++hpslRptlN+hc+pG..hs..hchhth.....sphhscltplucpWhpt.c..........pE+sFohuhhctthht.p..hthsl..lcs..-G................................clhAFsshh.sht....psshslDlhRtpsc.uspGhh-hlhhph...hthhptpG.......hphhshuhushtth...t................................t+hhthlh..tpphashpGLRpaKtKapPpa.ts+Ylsh ........................................................................................................t...htphs.sssh.u.thhhht..D.+th..hh..s............ssp.............uhlha......phpss..t...hl.sh.G-PlG........st..............pph.t.ps...l.ptFhphsc.ph...u..hpss.ahp.lspp...hh..h.h..pc..hG.........................hphh+lG-..Eu.ll.c.h...ps....F..sl....s.....G++.....h.p.sh.Rptls+hc.+tu.hs...hchh..p..........sph...h....p.clpp.l.u-pWhss.+..................................pEpsF..ohuh..h....c...t....t.......ht..p.....tt.h.sl......hcs.....cG..................................................................cllAFsshh...sht...................ppshol.DlhRt..p..s.....c.s...spGhh-hlhhphhth..tp..p..p..G.................hphh.shuhA.....s.htsh...........s................................t...u..................p+hhthlhph.hpphas.h.pGL+chKpKapPpa.ps+Yhs.................................................................. 0 193 362 467 +9757 PF09925 DUF2157 Predicted membrane protein (DUF2157) COGs, Finn RD, Sammut SJ anon COGs (COG4872) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.40 25.40 25.70 25.60 25.20 25.20 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.11 0.71 -4.59 51 469 2009-01-15 18:05:59 2007-07-30 16:07:22 4 6 466 0 144 421 111 138.90 23 29.45 CHANGED ltpWhcpGlls.........sssspplhthhs....t..ssttp.....hshhLhhlGulhluhGllhFlAtNWppls+hs+hullhshlhssthsuhhhhh.............................ptpthhupAhlhluull.hGuhluLlGQhYphuuc.h..phhhhWulhslshAhhhppsslhhl ........................................................t.ls..........tthtth...ht...................th.p.............hhh...hlhhlGulhlusullhhlAtNW........pthschh+lullhsl.lhs...shhsuhhh.h................................................................pt..pph....hsps.hhh.hu.uhh.hGuhlAllGQh.YphuuDs.a..phhhhW.slhhLshuhlhp.t.hhh.h................................................................................... 0 41 89 121 +9758 PF09926 DUF2158 Uncharacterized small protein (DUF2158) COGs, Finn RD, Sammut SJ anon COGs (COG5475) Family Members of this family of prokaryotic proteins have no known function. 22.00 22.00 22.20 22.30 21.30 21.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.85 0.72 -4.53 18 536 2009-01-15 18:05:59 2007-07-30 16:08:06 4 3 515 0 65 179 7 47.80 68 73.29 CHANGED FplGDlVpLKoGGPcMTVo.shs........htsushhhCpWFsspu.....cpthFsE-oLt ...FhVSEEVTVKEGGPRMIVT...............GYSSGMVECRWYD...GaGV....KREAFHEsELV..................... 0 6 19 37 +9760 PF09928 DUF2160 Predicted small integral membrane protein (DUF2160) COGs, Finn RD, Sammut SJ anon COGs (COG5477) Family The members of this family of hypothetical prokaryotic proteins have no known function. It is thought that they are transmembrane proteins, but their function has not been inferred yet. 25.00 25.00 29.50 29.30 22.50 20.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.27 0.72 -3.89 37 244 2009-01-15 18:05:59 2007-07-30 16:08:49 4 1 231 0 96 238 242 89.80 49 93.67 CHANGED WMAWThPoAlFFssIushLhsMTlhE.l+pP.ss.R+GhLsIpTTRGDRLFIuLLuuAaIpLuWlGlss...........sslWhuhslSllahhhlhRhs ..............WMAWThPTAlFFhsIuhhLssMTlaE.ltpP..ss.R+GhLsIsTTRGDRLFIuLLuuAaIpLuWlGlss................s.s.l..WhAhslSllahlhlhRhs........... 0 16 43 65 +9761 PF09929 DUF2161 Uncharacterized conserved protein (DUF2161) COGs, Finn RD, Sammut SJ anon COGs (COG5482) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.40 20.40 22.90 38.00 20.10 19.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.52 0.71 -4.18 30 134 2009-09-11 04:54:42 2007-07-30 16:09:03 4 2 129 0 48 143 23 113.40 43 50.24 CHANGED llsVc.pcG..tVEVhsDPu.PasPRK....sp+++s+LLpEFpRRpGDPNsGGuTR.pslhTAYRQ-ALpsAsaLuttGPs+uucltctssVs.cATpIhpcNHYGWFERVp+GlYsLTstG+suL ...............hlsVp.tsu...tVEVlscPs........sh..t..P..R+.......s.+++sRLlcEapRRpGDPssGGuT.R.tslhTAYRQpALtCAstLs...tG.st+stclppt...ls.pAspILtcNhYGWF-RlpRGlYsLTssGctu.l........... 0 13 27 33 +9762 PF09930 DUF2162 Predicted transporter (DUF2162) COGs, Finn RD, Sammut SJ anon COGs (COG4827) Family Members of this family of bacterial proteins are thought to be membrane transporters, but their exact function has not, as yet, been elucidated. 28.40 28.40 28.90 37.90 26.10 28.30 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.59 0.70 -5.21 15 44 2009-01-15 18:05:59 2007-07-30 16:09:15 4 1 38 0 33 43 2 225.80 27 93.79 CHANGED sssLahhGlLhullIFGlKsGlGhGFuslS.+KtlhhIsuuYhllshll...utlAsphshplhphlhshGhslHsllulhlIhsGlaTl..+cWtspG..KDs...o+pThLAlslPCPsCLuAlhhSshllusslslSshplGhhlGssFhlsl....lloshls+....thshssP.sLGshMlhlGlYFLluulllPuhlpstphphsshth.........sssshhhslllhhsLllsGalhs+tc ...........lh.hGlLhu.lhIFGlKhGLuhG...a..u.sl.o.++thhhIs....hh....Yhhhhhll.......uhlssth.s.....phhphlhphshhlallhulhllhsGlaTl.....+c..Wttps..+ss.................sppohlshhhPCPsChuAlhhShhllushlslSshplGhhl.....ullhhlhl........lhoshlh+.....thphstPh.hLGshMlhlGl.YFLluulllPshhpstph.thsshsh.........shpslhhshl..hhhsLllhGalhp+h.p..... 0 12 27 29 +9763 PF09931 DUF2163 Uncharacterized conserved protein (DUF2163) COGs, Finn RD, Sammut SJ anon COGs (COG5449) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.60 21.60 21.70 21.60 21.10 21.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.78 0.71 -4.47 52 337 2009-01-15 18:05:59 2007-07-30 16:10:04 4 4 307 0 94 298 50 154.50 26 56.38 CHANGED Mpsh.sssLtsHL.psusTTlscsWtlsRpDGsshGFTDHDpsLsh-GlsacAsoGh........ousslppssGLulDssEsh.....GuLss.....sslsEsDltuGpaDGApVcsaLVNW...s-s.......st+hlhapGslGElpRus..uuFpAELRGLsc.tLspshGR.lappsCsAsL...GDucCtlD ...................................th.thl.ts.t.h.s......shshsahlp..p.t.sGhshuFT-aDpsL.s.h..........s.......u...hh....aps.ss.Gh.........ssuthppssulus...sshclp.....Gshss............stlp..c..tclttGhaDu.Apl...c...ha..hV.s.a......scs..........stphl.lhpGpl.uclpp.ss....s.thph-l+ulsp..tLspstsR.ha.pptCsAsL...GDtpCtls............................... 0 21 61 73 +9764 PF09932 DUF2164 Uncharacterized conserved protein (DUF2164) COGs, Finn RD, Sammut SJ anon COGs (COG5460) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 21.50 21.50 22.40 21.60 21.30 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.24 0.72 -4.18 47 470 2009-01-15 18:05:59 2007-07-30 16:10:22 4 1 443 0 102 324 16 74.70 32 88.77 CHANGED Iclsp-p+pphlspIpcYFpcEh-p-lGshpAchLLDFhscElGPhaYNpultDApthlpc+hpslpp-la....tlE......K. ...............Iclsp-p+pchlsplpcaht--hp.ElGpFpAchLl-FhhcclGPhhYNpultDApphlpc+hpslpp-la.lEp................ 0 23 53 79 +9765 PF09933 DUF2165 Predicted small integral membrane protein (DUF2165) COGs, Finn RD, Sammut SJ anon COGs (COG5472) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 20.80 20.80 21.50 21.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.85 0.71 -4.51 25 244 2009-01-15 18:05:59 2007-07-30 16:10:43 4 1 207 0 58 154 29 137.60 37 94.20 CHANGED h.hlRluKslhlhulAhFusLVuasNlTDYsoNasF.VpHVLsMDTs...FPss..sltaRAIssPhlapsuYhhIIshEslsulLChhGuhpLh+sh.psssssFppAKshAlhGlsluhLlWhsGFhsVGGEWFsMWtScpWN..G.puAFRFhhhlllVLIals.s- ........h.RhuKhlhshslAhasslVshsN.lsDasoNatF.VpHVhsMDoh....a.ss....slhhRAIsoP.......hlpphuYhhIIhhEshsulhhhhGuhthhtsh.t.sstsFppuKhhshhuhshshlhW.hsFhslGuEWFsMW.Sp.WN..G..sAhRhhhhhhhsLlals............ 1 18 38 48 +9767 PF09935 DUF2167 Protein of unknown function (DUF2167) COGs, Finn RD, Sammut SJ anon COGs (COG4714) Family This domain, found in various hypothetical membrane-anchored prokaryotic proteins, has no known function. 25.00 25.00 27.90 29.80 22.00 20.10 hmmbuild --amino -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.45 0.70 -5.54 15 227 2009-01-15 18:05:59 2007-07-30 16:11:22 4 3 219 0 39 156 51 235.70 49 77.40 CHANGED ulp.tsGsIsLstcpAsLsLPsuahaLsssDsp+lL.-taGNPPsssp..LGhlhPss....sspsWhVllpY-ssGYVoD-DAscIDhs-LLpsM+puscEsNcpRccpGhsslcllGWAptPpYDsso++LhWAtcL+spuu......ppolNYslRVLGRcGVlsLNhVAuhspLsplcpphpplLuhscFscGpRYADFssssDKVApYGLAALVuGs....lAuKhGLaAhlusFLtKFhKllllullulhu ................................sA.ssGPocIsLG.DcATLNLPcGFsalPAc-Auha...M..+ph..G....NhsD--h.hhGLVh.c-.......hshalslEYDDSGYVKDDDAKsWDADELhcsL+cGTcEuNK-Rht+Gl-sIEllGWlEKPsYDAssHRLIWSss.lpDhGsst...s-puVNYNTYlLGREGYhSLNLV...TDcuslD.c-h..PhA.c.clLoul+FNsGpRYADFNcSTDKIAEYG....LAALIGGl..............AAKKlGLLAhlGlhL....lKFWKVsAlGVlAlGA................................... 0 6 15 29 +9768 PF09936 Methyltrn_RNA_4 DUF2168; SAM-dependent RNA methyltransferase COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4752) Family This family has a Rossmanoid fold, with a deep trefoil knot in its C-terminal region. It has structural similarity to RNA methyltransferases, and is likely to function as an S-adenosyl-L-methionine (SAM)-dependent RNA 2'-O methyltransferase [1]. 20.80 20.80 20.80 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.22 0.71 -4.79 28 174 2012-10-01 22:53:19 2007-07-30 16:11:38 4 2 169 1 76 201 25 181.90 45 76.98 CHANGED hsslhlALlHYPVhsKctclluTAlTNLDlHDIARouRTYGlppYYlVTPlpsQppLlc+llsaWpcGaGupYNPsRpEALplV+lssoL--sl--lpphpGpcPhllsTuA+.h.tsssloappl+chl..pscpPhLLLFGTGWGLspElh-psD.hlLEPIcGt..ucYNHLSVRuAsAIILDRLhGc ..................................................t.tplYluLVHYPVhNKptcllsTulTNhDlHDIuRsu+TYslptYalVsPlcuQ+cLsp+IlsaWppGhG..upYNPcRp-Ahphlclssol-sslccIcc.tcGp+Phl..l.sTuAR.....t........hsso.....loappl+chl..ps-cPhLlLFGT.G.WGLscElhptsD.alLE..PIpG.t..ucYNHLSVRuAsAIILDRLhG...... 0 44 67 75 +9769 PF09937 DUF2169 Uncharacterized protein conserved in bacteria (DUF2169) COGs, Finn RD, Sammut SJ anon COGs (COG5351) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 38.20 32.40 24.10 23.70 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.78 0.70 -5.33 44 398 2009-09-11 05:29:39 2007-07-30 16:11:59 4 39 234 0 153 377 32 263.10 25 49.67 CHANGED -.pGpphlslslKuoasls.tshh........sppQtslh.u-pah.G-Puhouhh.hpsDhshhKstsDlllpGpAauPtGcsssplp..VslcVGs.....hpKtlpVhGcRhW....h..ssstsosPpPFpp..hPlsa-pAaGG..........s.....tsh.cNPlGpGa..tpppttt...thslPslEts.sp.lpsss.....tp.t.PsGFGPlshtW.sRt....phAGT.YDcpWhcs...chPhL....PpDaD.RaapsAPsDQphst...h.pGGEtlpLhslps.p...sp.lpapLPtht.hshhh........h.hsss.........hpphphplDTlhl-s-tpplhLlWR ..................................................................h...tt.hhslssthshpl...............st.t.t.lh.ss..a....sts..t.t..uhh.......hDhs..Ksts-hl.l.sGpAau.s.tsp.sspt..hpstlplss.................hp.Kt...lplhGsR.a.........................sshthstsp.Pa.p..hslsaphAaG.G..........................................sh.tNPhGhGhh.t....tt........h......................hPslp.s..pp..hptst......................tt...ssuhGslshtW..Rh.................phh....Go.aDpt.W.hpp.......phPh..h......................PtDhD.+aapsAssDQ.hsth......h..puspphpL..hshpsp...........tt.lthpLPthpshshlh.........tpst........................hpphshphcTlhl.s.....c..p..p...phhlha+............................................ 0 56 94 124 +9770 PF09938 DUF2170 Uncharacterized protein conserved in bacteria (DUF2170) COGs, Finn RD, Sammut SJ anon COGs (COG3789) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 19.00 19.00 19.80 27.60 18.90 17.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -4.82 28 545 2009-01-15 18:05:59 2007-07-30 16:12:18 4 1 536 0 65 200 8 131.50 57 94.39 CHANGED hs.WslpsLtsuLsspsphpstphsh.llp..Gs-ssLplshp-hs-LPlaluloucQ.IlspuhLasss-Vp-..tsthN-tlL+sp.phhPLSohGltth.csc-hYshFGALSssSolsslltEltTLA-Nslcss-shpsahp .....................M..sWsPhsLAsALpslsE...pph-...ls..NsEuuLIIKMNDYGDL.IslLFTScQ.hlIETaICPVsoIss..ssEFNpFLLRNQ.KhhPLSSVGIopV.pQEEYYllFGALSLpSSLcDIlLEIToLVDNALDlAEITc-YS.p....... 1 12 26 44 +9771 PF09939 DUF2171 Uncharacterized protein conserved in bacteria (DUF2171) COGs, Finn RD, Sammut SJ anon COGs (COG3798) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 25.00 25.00 25.10 26.10 23.70 24.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.20 0.72 -3.97 17 134 2012-10-02 14:14:57 2007-07-30 16:12:29 4 2 117 0 59 143 4 66.70 44 70.39 CHANGED pI+EHh-VluuDGsHVGTVDHl-G.scIKLTKpDts.........tsGcHHaIPhuWVspV-ss.+V+Lstsu-pshp ...pI+-Hh-Vluu-Gs+VGTVDHl....-.G.spIKLTKsD...............sGpHHhIPhuaVscV-ss.pVhLstsucpsh.p.... 0 13 33 48 +9772 PF09940 DUF2172 Domain of unknown function (DUF2172) COGs, Finn RD, Sammut SJ anon COGs (COG4310) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. An aminopeptidase domain is conserved within the family, but its relevance has not been established yet. 19.70 19.70 19.80 19.80 19.60 19.60 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.26 0.70 -6.09 18 203 2012-10-02 19:46:12 2007-07-30 16:12:41 4 4 195 1 42 243 564 367.60 46 85.76 CHANGED LplHEVPSGTpVFDWTVPpEWNl+-AaltsssGc+llDFtcsNLHlluYStPVctploLcELpsHLaSLP-pPDhIPYhTSYYpcpWGFCLsapphpsLp-GcYcVhIDSsl.ssGsLoYGEhllPG..-occElLlSoalCHPSMANspLSGssVhshLAchLtspspR.YoYRhlFlPETIGSIsaLSRN.-clcc.lctGhVLoClGDcp.saoahtSRpG....sshhDRlutHVLpcps.psachhsFhs.hGsDERQaCSPGhDLPVsslhRohYGpYPEYHTStDsLchlsPcuLtsuashlpcslplLEsNpsYh.s.sshGEPQLGKRGLYsslusppph.t.pths.........hL.lLshuDGppoLLDIAEphshsFhcltshsctLhcsuLlc ...............................................................................LchHpl.oGTplaDWhVPpEWsl+-AaIhs.sp.Gc+IsDFtcpNL.HllsYS.slctchsL-ELpsHLaol.-.PDhIPYh..T..SY..Yp.cpWGFClsHsphp...tL....c......c......G..c..Yc..V..hIDuch..csGs..LsY.uEall.PG.........poccElLlSsal..CH.....P....S.h........AN...spLSG.s..lshhLA........+hLh.....u...h......cp...R......Y......oYRF..l.h...........h....P....t....T......I...GuI..saLu..+.........ph...-.pL.c.....+.............V+tGhV.LSClG....Dst.......sh...o.....h.h..+.S.++s......s.s....D.+lhhHs..L.pp.p..........p.sa.c...hasFhs....h....G...DERQas....u.PGhNLslsslpRoh.Yu..cY..spY.HTStDsL.s.FIo.cuLts.uhph..htc.h.I...hL.EhN.tsah.N.o.hs..EPpLG+RGLYto.....lus...ps.....ch........h.........hh.hLshsDGppsllDIAp....hhsh.hhphtphh-+ltthGLl................................................................................................................................................................................... 0 15 27 35 +9773 PF09941 DUF2173 Uncharacterized conserved protein (DUF2173) COGs, Finn RD, Sammut SJ anon COGs (COG4831) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. 24.20 24.20 24.90 26.10 23.60 24.10 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.26 0.72 -4.11 11 47 2009-01-15 18:05:59 2007-07-30 16:12:56 4 1 30 0 31 40 0 106.30 40 93.38 CHANGED .pLccLMplsGVhAAscFosDGcLh..EacG..plscchAchsAphCuANshhspMpAcuaophoGpsGWpPhpGWslsGs-auVClsGshuVFVcpscssasclhcsLpps ..t.cLc+LMpLpGslAAGcaosDG+Ll..cYpG.....shsc-hA-MlApMsAANshMuphpA-uaothS......Ghc.WsPhhGWAVsuu-YsVClhGNhGVFVchscADFNplF+sLtc..... 0 11 23 27 +9775 PF09943 DUF2175 Uncharacterized protein conserved in archaea (DUF2175) COGs, Finn RD, Sammut SJ anon COGs (COG4847) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 25.50 29.00 24.10 23.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.43 0.72 -3.96 10 32 2009-01-15 18:05:59 2007-07-30 16:13:27 4 1 32 0 18 31 5 97.60 40 92.31 CHANGED p+WKCslCGcsIhpGpLFTFhSc.GsVHa-ChccctspKhpt-s..plssL.pL--hlpcslVhtccLsplu-sEElKcllcpscKplE+tAAcLT+clpch ..pKW+CslCspsIh.spLFTFhpp.GslHasCLccchhppsph-s...thhL.pL--.L+phllhtpcLppltp.EEsK+hlcphcKshE+pAuhLTphlpc.h..... 0 4 8 13 +9777 PF09945 DUF2177 Predicted membrane protein (DUF2177) COGs, Finn RD, Sammut SJ anon COGs (COG4852) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.10 20.10 20.10 23.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.68 0.71 -4.15 50 240 2009-09-11 01:55:54 2007-07-30 16:14:14 4 1 231 0 82 203 342 125.50 36 93.84 CHANGED Mpph.llhYluohllFlsLDhlWLuhlucshYcsplGslhtcp.....hRhsPAllFYLlYluGllhFslhPultsushtpul.lhGALlGhhsYuTY-LTNhAsL+sWshphsllDlsWGshlTusuuhhGhhl ............phlhhalsohllFLllDhlWLshhucph...YpptlGsLhtst.....schsP.AllFYllYlsGlshFslhPulpp.........s.sh.st.......sl.lhGALhGllsYuTYDLTNhATL+sWshphsllDlsWGohloulsushuhh.............. 3 26 46 61 +9778 PF09946 DUF2178 Predicted membrane protein (DUF2178) COGs, Finn RD, Sammut SJ anon COGs (COG4854) Family This domain, found in various hypothetical archaeal proteins, has no known function. 22.50 22.50 22.50 23.20 22.30 22.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.38 0.72 -4.32 12 56 2009-01-15 18:05:59 2007-07-30 16:14:43 4 1 44 0 46 58 1 111.00 20 81.38 CHANGED ppaphllhhlshhhGul......luaAhssGsshlAlhsVhhGlhhhhhl+p+l-s...VlEDERhh+luEKAShhTlplhsl...shALuGshlh....shp..hsthsphuhhlsaushhlllLY ..............................hhhhhllslh..hshl......huausps.....ups...hlu.hhshhhuhhlh....hhh...cp.plcc.......l...l.EDERsh+IsEKAuphTlp...lhhl...shul.tslhhh....hht....t...hh.h.h.h............................................................ 0 6 25 39 +9779 PF09947 DUF2180 Uncharacterized protein conserved in archaea (DUF2180) COGs, Finn RD, Sammut SJ anon COGs (COG4855) Family This domain, found in various hypothetical archaeal proteins, has no known function. A few of the family members contain a zinc finger domain. 20.90 20.90 20.90 21.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.54 0.72 -4.22 13 46 2012-10-03 10:42:43 2007-07-30 16:15:47 4 1 38 0 34 45 0 67.50 43 89.15 CHANGED MKCYhCAcEGKDT-AVuICIVCGMGlCh-HllR-ElshWcGGYPaPsc........KlKcsLPRILCh.CasAhpt ...MKCY.CstpGK.Do.-AVAlCIVCGhGlCh-Hsl.+..E-l.shhp.G.uYPhstp.........+hpcsl..PRlLC..ChsAhh.t....................... 0 8 25 28 +9780 PF09948 DUF2182 Predicted metal-binding integral membrane protein (DUF2182) COGs, Finn RD, Sammut SJ anon COGs (COG5486) Family This domain, found in various hypothetical bacterial membrane proteins having predicted metal-binding properties, has no known function. 25.90 25.90 27.50 53.40 24.10 25.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.62 0.71 -4.39 51 254 2009-01-15 18:05:59 2007-07-30 16:16:03 4 2 201 0 94 263 221 188.40 34 70.97 CHANGED hhsMWslMhsAMMLPoss...PhlhtYpclhttt......spstshhshhssGYlhlWhuaullAtslphsLtthuhhsshhs.sts..hluuslLlsAGhYQFoslKptCLspCRsPhsahhpc..WRsG...htuuh+hGl+pGhaClGCCWuLMslhFsuGlMsLhWMuhlolh.......hslEKl....sshGctls....+slGslLlshuhhhl ......hsMWhlMhlAMMLPoss.Phlhtapclhttt......sppshth.shhssGYLssWsuFullAtshphsLpthuhhs.shhs.ss...hluuslL.hlAGhYQaoPlKptCLspCRsPhsa.l..h..pp...a..R.su...htsuhphGl+HGhaClGCCWuLMslhFssG.sM.slhWMuslshh.......hhhEKh....hPt.Gc.h.lu....+slG..ssLlshushl.h................. 0 20 51 70 +9781 PF09949 DUF2183 Uncharacterized conserved protein (DUF2183) COGs, Finn RD, Sammut SJ anon COGs (COG4850) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.50 26.50 26.80 26.50 26.00 26.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.23 0.72 -4.07 76 563 2009-01-15 18:05:59 2007-07-30 16:16:46 4 9 433 0 307 521 33 99.20 37 20.65 CHANGED shaYVSsSP.WNLashLppFlpppthPtGs.lhL+ca....ssshpphhpsspt...+KhpplcclhppaPphcFlLlGDoGpcD.s-lYsclscpaPspltul..aIRpl ...............................................haYlSsoP.WNlashLppFlpp....psa..PtGs.lhL+-a.........sss.pshhpsusp......+Kpstlcclh.psFPch+alLlGDsGQpD..clYs-.hscpa.Ps+ltAI..hIRp......................... 0 99 187 265 +9782 PF09950 DUF2184 Uncharacterized protein conserved in bacteria (DUF2184) COGs, Finn RD, Sammut SJ anon COGs (COG4834) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.80 26.80 26.90 27.10 26.50 26.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.27 0.70 -5.32 26 275 2012-10-03 06:22:39 2007-07-30 16:17:03 4 3 246 0 39 242 28 236.10 25 74.81 CHANGED sGtA..phlu.stusDlPtl-lshscppsslthhuhuasaolpELcsApth.GtsLsspKspusppshppplsplsahG.....-sspGhsGLlNp.sulssssssss.......W......tspTs-pllsDlsphlsslhstosh..sthssplLLPsspathLupphhsssssholLcalpcsNh.........hs..tG.sLsIpsltt.LcssGsuGs.......sRhlsYcpssphlphtlPhshphLPs.Q.pslchcVPhht+hGGlplhhPtsltYhDGI .......................................h...t..phlu.ptuss.lstlslshscptsslthhuhthsaol.ELpt..Apts.Gps.ls..spKhc..uhphthphc.schsahG.......-ssh.s...lpGLlNt.sslssssssts...............W.......sssTs-...cIhss...lsp.hlsssh.tt.osh....sh.........s.Ps....s.LhlPsppa.shLssphhs...s..s...s.s.polLpalppNsh.............t..sLs.Ip.....s.......lh....L.......c......st.G..susp.........cp.h.lsY.........spc.........chlph...l.hshpt....lss..Qhc..s.lh..hsshht+hGuVt.hh.tsh.hhDGl................................................................................... 0 4 23 30 +9783 PF09951 DUF2185 Protein of unknown function (DUF2185) COGs, Finn RD, Sammut SJ anon COGs (COG4859) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.30 26.60 23.60 24.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.93 0.72 -4.19 11 257 2009-01-15 18:05:59 2007-07-30 16:17:12 4 19 221 0 31 197 7 88.00 33 31.24 CHANGED sIVSNsVh.-+R.hsFha+EssphEsDSGWRhFSG.EoD-YssDP.cNFpIlSlusIhplDsolhtlLppP.ssAaEhs--Gs.F.cltDa .....CIsosplh...ps.ptlsahaRE.pP..s..t....p..s...DSGWRhhSGsEoD-YhscP....cNhsl....lslsslhph-Psllsl.ls.t.P..Go....saph..scpGt.a..l......................... 0 17 22 27 +9784 PF09952 DUF2186 Uncharacterized protein conserved in bacteria (DUF2186) COGs, Finn RD, Sammut SJ anon COGs (COG4861) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.10 20.10 20.40 22.00 19.60 19.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.77 0.71 -3.94 11 178 2009-01-15 18:05:59 2007-07-30 16:17:31 4 3 169 0 45 154 10 143.50 27 41.90 CHANGED RLh.ctccLL-pWAtsYPs+Lps+hphhcFsu..st.sWhpphsltshsA......hhuGEsAAscLss.l+PsshslY....stpshsscLlhptRL+c.......sspGs.Vplhcs.....FWs.p..................shtchslssPhllYADLluoGDsRshEsAphIh...-chls ......................................Lh.cscpLL-tWsts..YsstL.ttp..h..phht...h...su.......p.......p..hh....t.h..s..ls..s...t...............thuGEsAus.h.st..hp...Pts.h.hlY....sssshsscLlhpt+h.+.........sscGs...lhlhch......FWps...................ssp.p.sslsPshLlYADLLsos-sRshEsAchl+..-....t............................................. 1 18 30 37 +9785 PF09953 DUF2187 Uncharacterized protein conserved in bacteria (DUF2187) COGs, Finn RD, Sammut SJ anon COGs (COG4873) Family This domain, found in various hypothetical bacterial proteins, has no known function. 21.60 21.60 22.00 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.74 0.72 -4.24 7 524 2009-01-15 18:05:59 2007-07-30 16:17:44 4 1 440 0 39 139 2 57.60 57 85.89 CHANGED MphAcVGsIIEFKsGLpGIVEKVNENSVIVDLThMENa+-L-l-p+TVVNHKNYKII ..................MplAcVGsIl......E.F....h.s....G...l..pG+V.EKlN.-NSVIVDlTIM-NFs-...L...D..l...-KTVlNHKpYKIl....................... 0 5 16 26 +9786 PF09954 DUF2188 Uncharacterized protein conserved in bacteria (DUF2188) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4876) & Jackhmmer:B5ZC26 Domain This domain, found in various hypothetical bacterial proteins, has no known function. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.02 0.72 -4.05 101 747 2009-01-15 18:05:59 2007-07-30 16:18:01 4 2 643 0 192 487 10 59.90 24 57.15 CHANGED haVs..P.tpsst.......WtV+tp....Gss+s.sphasTpp-AlptA+phA.cp....p.su-lhIHsp.D.GpIpccpoa ..................pptt........Wtl+st....sspps.otphss+p-AlchApphu.+p.............p.ssplhl+pp.D..spht............................... 1 56 107 147 +9787 PF09955 DUF2189 Predicted integral membrane protein (DUF2189) COGs, Finn RD, Sammut SJ anon COGs (COG5473) Family Members of this family are found in various hypothetical prokaryotic proteins, as well as putative cytochrome c oxidases. Their exact function has not, as yet, been established. 22.80 22.80 22.80 22.80 22.60 22.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.81 0.71 -4.20 69 418 2009-01-15 18:05:59 2007-07-30 16:18:23 4 1 318 0 155 393 69 127.20 27 45.87 CHANGED llhshsuGFhLl.GPh..hAlGLY-lSRRhEp...Gcsssh..tpshts.....hstshsphhh........huhlLhllhlhWhhsAthlaAlhh.s.ss.shss.hsshht.lhhsspGhthlhlGsslGulhAsllauloVl...........ohPllL.D ................................................hlhshssGFhLl.GPh...hAlGL.Y-hS+chEp.......Gptssh..tpshps.....hs.shsphhs..........huhlL.hlhhlhWhhhuthlaAhhh.u...sh.shss....hsshht.shhsspshthlllusllGslhAhlsauloVl...........ohPhhlD.......................... 0 36 96 124 +9788 PF09956 DUF2190 Uncharacterized conserved protein (DUF2190) COGs, Finn RD, Sammut SJ anon COGs (COG5471) Family This domain, found in various hypothetical prokaryotic proteins, as well as in some putative RecA/RadA recombinases, has no known function. 20.80 20.80 21.50 21.30 20.40 20.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.31 0.72 -4.27 30 380 2009-01-15 18:05:59 2007-07-30 16:18:52 4 1 318 0 62 202 18 105.40 45 95.92 CHANGED MKNYlQsGcslsls.Ass.uVsSGssVllGs..lhuVAh.sssssGpssphhspGVF.sLPKsuussh.shGstVYWDsospshTs..suouNshl....GhAhtsAussu..sossV+Ls ....................................AKNF.Vp-.GcTlslV..Aus...uIpSG-hV.V.Gs.............lhAVAl.TDIstG.....EsG-...G.....h...sEG.V......F.hLPK...h...p.s..D.Dh..psGppV.........YLKsu..t..lQLss........usu.ssh.l.....GVsWtsAusuu..ppVsVKlN.......................... 0 15 35 48 +9789 PF09957 DUF2191 Uncharacterized protein conserved in bacteria (DUF2191) COGs, Finn RD, Sammut SJ anon COGs (COG5450) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.40 23.40 23.60 23.40 23.30 23.20 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -8.03 0.72 -4.15 51 685 2012-10-02 18:44:02 2007-07-30 16:19:12 4 2 332 0 155 436 50 47.60 33 65.80 CHANGED MRTslslDD...pLlpcAhclsG.lpoK+plVchAL+sLlppcp....p+...th.cL .........RTsIclDD....cL...l..scA.hchsG..hpoK.+..s..sVchAL+p.llpttt......tc.....p....hh...................... 0 43 111 140 +9790 PF09958 DUF2192 Uncharacterized protein conserved in archaea (DUF2192) COGs, Finn RD, Sammut SJ anon COGs (COG4879) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.60 20.60 20.60 22.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.44 0.70 -5.13 12 48 2009-01-15 18:05:59 2007-07-30 16:19:24 4 1 46 0 29 44 0 223.30 36 83.96 CHANGED hs+plaRcRIcshhsLlpclLpt...thhoRpsllt.LpcsYcccsIcPhRGhu.spslY-KElAolYVVGKYGhGlhs-..t.FDclF.hEpph-EAhcll......lstphpEAhtchss.l..ts.ccspltRhLRllFTtslhGahsEptLlKsl+shp.s.hP-hpcphhsYutFYsAaKlAEsIAtGcIRs+.shcAhKhuhAlclGh..p+slPsDchIAhIAppVapVscphLs+lL ..........................................sKpla+cRIclhs-lhupllcp....psloRpsll-llcpsYcccsIcPhRGhs.ssslY-KELsolYVVGKYGLGLh..-.-...phF-+lF.hEpph--shclI........lsspspEAhcch..ss...t...sc-hlARsLRhsFTpslhuF.sE-chh+sL+slphu.hs-lccsspsau+FYsAaKlAEsIApGtlRs+.shpAhKpAlAlplGh..phshPpscYluhIAccVFpVscclLs+lL............................ 0 11 17 22 +9791 PF09959 DUF2193 Uncharacterized protein conserved in archaea (DUF2193) COGs, Finn RD, Sammut SJ anon COGs (COG4883) Family This domain, found in various hypothetical archaeal proteins, has no known function. 25.00 25.00 403.50 403.00 22.30 17.80 hmmbuild -o /dev/null HMM SEED 499 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.74 0.70 -6.24 9 32 2009-01-15 18:05:59 2007-07-30 16:19:37 4 1 31 0 23 33 2 497.80 65 99.61 CHANGED MpELYcKMlDEAMuAQ+ADV-slK+KRGpcFplpDAKPYVDsVpKMcssusQSpuVhsLHhsSVpuHa-lLsuLTcTVRPEDDPFVEHYQTPsILEILh-EDstFpKSl-tFlcuItKuEALIG+EulRRYGGFYGPTCVVDFALlPGSTSNVVNRILppsDIPhcHKQAILAAKSWGMNTSYGIG-hFAptlEtGsThuEAs+cEI-MlQhlYcpPlEAQAcLMDsAGppSFDsRKYMpsY+p+McssV+AAhD-GVHYGNIlTVPAYCVGDlAHHIuQSTFNMCKDDVVMAlIEAsTsVMEoTLpsAlspFKsEYslLoLATGuSAsAsEaILELDGFNAhMlVDLLTKRFHNYV.haPTRGAAAELHNsDFMDMIYRGW+hlDKAcRspsutpt.l.PhVuGasVDLSPIacNEVlMNPQRYAYPACAITVRFSALMRLADYPCLLTSEPVTAThMTNIIALHKEssuuPARsCKsCAsAsLlDaRHpYCQWKEAV ..MpElYcKMlcEAMAAQpADVpsIKcKRGpcFKIcDAKsYVDsVsKMcslssQupuVlcLHhcSVpuHa-lLsuLTcTVRPEDDPFVEHYQTPsILEILY-EDspF+KSl-KFIcsIs+uEALIG+EslRRYGGFYGPTCVVDFALIPGSTSNVVNcIL+psDIPtpHKQAILAuKSWGMNTSYGlG-sFApslEsGtThuEAlccEIchlphIY-pPl-AQA+LMDst..G....HpSFDVRKYMspYKK+McssVpAAh-ssVHYGNIVTVPAYCVGDIAHHIuQSTaNMCKDDVlMulIEAsosVM-sTL+ssl..sp..FKsEY-lLoLATGSoAsAsEYILELDGFNAsMVVDLLTKRFHNYVQLYP.TRGAAAELHNsDFMDMIYRGW+hlDKARRs+sGsts.ltPcVuuacVDLsPIccNEVlMNPQRYAYPACAITVRFSALMRLADYPCLLTSEPVTAThMTNIIALHKEssuuPARsCKsCAoAsLlDFRHpYCQW+EAV................ 0 8 18 20 +9792 PF09960 DUF2194 Uncharacterized protein conserved in bacteria (DUF2194) COGs, Finn RD, Sammut SJ anon COGs (COG4878) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.00 26.00 26.70 26.80 24.40 25.90 hmmbuild -o /dev/null HMM SEED 585 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -13.07 0.70 -6.47 8 111 2009-01-15 18:05:59 2007-07-30 16:19:48 4 3 107 0 24 106 3 535.50 29 88.40 CHANGED lhllLhh.hlu...........................................................................................................IshhhsKhGlpYshss+p.h..t.........s.sssslspsh-p....plh.................................LaDsNs-supcl.......................c..........pphcphLcYh+hphchls.u...pcshPSl.cY+sllIhhs-lstls-..pslhsalEsGGsllhAsshc+ssthssI...hGlp..p.-hsclcslhlspDlh..lGuscs.thpt.F.cpulslsLsscsplphlossc.pTPllWppchGpGphlhhNhsllpKp.lRGlassuhuhhsssslhPlINutshYlDDFPuPlPuG-tc.lp+-a.shoss-FYpKlWWPDlpKLuEcYsIKYTGlhIpsYpssTsss.shhctpspscalhaGppLL.ssuGElGlHGYNH.PLs.cs.s.hcc..tYh.Wts+EshtcuLcpLp+Flps.LhsshphssYVPPSNllscpGhcsLhcshPpIKoIuSsYhssc.ptsY.QEFplt-..cGhlclPRhTSG.hhsscphhhsshssLs.hGVhsHFlHPDDlLDtDRu.uphuWschhKsLcshlchl+cpaPaLRshTuSEsusulpcYtshcsphphscsulclslpshccpsthhlRts+sp..KlpGGp.....lh+h..susLYl ...........................................................................h..............................................................................................................................hhp..s......t...t.....................tt..th.h..tth..tt.phh...........................................lhcsssp...upph.................................hpphcphhc.h+hthp.hs.s...pt.hsshtsYpslllhhsph.ptlsp..h.pl...hpaVppGGsllh.....utp.pp.ss.h...ssl..........hGlpptt..t.t.p.s.pslhhppshh...Ghsp.h..t...s..t...h.cou......h...p.......l...............sLs......pssp.lhh..ho........scs....hP.llWppshGpG+llhhNs.s.h.hpcp.sRGlhssshuhhssshhhPhlNupshalDDFPuPlPpspsc.lpc-Y.phosp-FYpplWWsDhhclucchsl+YTGhhI..tsYpspsps....p.hp..........t..p.s..h.........pph.hhaGppLl....ppGGElGhHGYNHQPLhhts.phhtth..sYhsWtsppsMttulppLhp..ahpp.hhPphphpsYVPPSNllsppGhpsLtcshPplcsIuShYhsspp....s..a.QEFthsp..c..s..lhphPRhoSG.hhhsc.p.hthhstlshhhlhoHFlHPDDl.........lD.............t-........Ru.tphsWpp.......Lhcshcphhp.lppphP.l+shTtSEhssthppatshphphphp-stlplt..htshhptshhhlRhpps.....plpsGp.....hpcl....ssplY.................................................... 0 11 14 20 +9793 PF09961 DUF2195 Uncharacterized protein conserved in bacteria (DUF2195) COGs, Finn RD, Sammut SJ anon COGs (COG4893) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 73.70 73.60 22.40 16.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.52 0.71 -4.32 6 124 2009-01-15 18:05:59 2007-07-30 16:19:56 4 2 121 0 8 50 0 122.80 52 92.21 CHANGED sllLuusshs.uPhhAsAus.h.l-NsLuAClch+sut.....psucshltl.hslph++ShG-CGChSALlpYpslstpt.....susphsLQcGlhsstpsssR...TLsLAsDssLst-pplslplGCsssc ....l.hhphsuGh.lslhA.sGp.l.IcNsLAtCVDh+st+.....pphsNllhlpsshpL+KshGtCGChSALspYsS.lsp......RtSthhLQpGlhsLtKpss+...oLsLAs-stLV+DtpltlpluCs.P.. 0 2 6 8 +9794 PF09962 DUF2196 Uncharacterized conserved protein (DUF2196) COGs, Finn RD, Sammut SJ anon COGs (COG4895) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.20 25.00 24.00 22.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.88 0.72 -4.18 39 362 2009-01-15 18:05:59 2007-07-30 16:20:21 4 3 326 0 139 286 19 60.90 60 66.71 CHANGED hsGppRssIp.GhpVsI.VLKpDQRoGpLTcGlVp-ILTpSssHPHGIKVRLcsGpVGRVpcI .................MsGppRusIpsGhcVsI..VLKp..DQRT....G..+....LTcGlVKDILTNSssHPHGIKVRLp..D.G.Q..VGRVQpI...... 0 47 101 120 +9795 PF09963 DUF2197 Uncharacterized protein conserved in bacteria (DUF2197) COGs, Finn RD, Sammut SJ anon COGs (COG4896) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.20 22.20 22.60 22.20 21.30 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.96 0.72 -4.01 13 385 2009-01-15 18:05:59 2007-07-30 16:20:30 4 1 354 0 37 121 0 53.10 48 86.80 CHANGED M+VKCllCDpl-pID-cohhAKRLRNRPIHTYMCc-Cc-RIccpTppRhsTGpFph ...hpVpClICDpKshlD-coscuKRL+N.p.PI+TaMCc-CcpRlsp.p.c...utpa................ 0 13 22 31 +9796 PF09964 DUF2198 Uncharacterized protein conserved in bacteria (DUF2198) COGs, Finn RD, Sammut SJ anon COGs (COG4897) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 30.20 30.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.41 0.72 -4.22 9 297 2009-01-15 18:05:59 2007-07-30 16:20:41 4 1 297 0 21 75 0 72.70 63 95.13 CHANGED hphlhALhFPslLVlLFoRVTYN+aVGllLTsALlsAShhKGYTcohalIslDlsSLlAGaLahsphctct+pc ..IWYhSAAFFPClLVVLFSVITRSKWVGTlLTLILIGASlYKtYFHNEWIIFIDVVSLLAGYLIIDQLEFHK+Qc.... 0 5 10 17 +9797 PF09965 DUF2199 Uncharacterized protein conserved in bacteria (DUF2199) COGs, Finn RD, Sammut SJ anon COGs (COG4899) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 27.30 29.80 21.10 19.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -11.08 0.71 -4.36 6 159 2009-01-15 18:05:59 2007-07-30 16:20:54 4 2 152 0 24 125 3 142.40 40 83.45 CHANGED YpCssCGEhHEs.P.SauFcAPssYhplsEEERps.pschuDDLChIp......DGpcaF.IRslLEIPIhGp-EsahWGVWVSlSEsSFc+Yh-oF..spssps.saFGWLsNhlPhY.s.Thu......LcscVHhpsDGpRPhlhLacus.Hs...LshD.ccGI ...................................CtpCst.H......tthP..uhGhpAP......Y...p....lstcE.R.s.....ps..c.....hs.....uDhCll.......DtpchF.IRusL.IPII.sp.....cE...sh.aGVWVSlSccSFsch...ppa...c..c..p..t..p..s..s..sYF.GhLssclPsY.s..Ths......L+s..sVphpssGpRPhlpLc.p.o.s.HP...Lsh-.hpGI......... 0 9 18 21 +9798 PF09966 DUF2200 Uncharacterized protein conserved in bacteria (DUF2200) COGs, Finn RD, Sammut SJ anon COGs (COG4898) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.00 22.00 22.90 23.20 20.20 19.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.39 0.72 -4.20 37 512 2009-01-15 18:05:59 2007-07-30 16:21:09 4 1 506 1 101 279 74 109.20 58 93.45 CHANGED RIasMoFASVYPhYlsKsEKKGRT+pEVDplIpWLTGYspppLpptlscpssFETFFspAPplNPstsLIpGVICGhRVEEIEDP..LMppIRYLDKLlDELAKGKtMEKILR ........+lYsMpFAuVYshhIsKsERKGRp..p-EVcpllpWLTGY-..ssltt.pLcccVTatsFFtpAPhhsPppthITGhICGVRlEEI-DP..LMQcIRaLDKLlDELAKGKshpplLR............ 1 45 82 95 +9799 PF09967 DUF2201 VWA-like domain (DUF2201) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4900) Domain This domain, found in various hypothetical bacterial proteins, has no known function. However, it is clearly related to the VWA domain. 26.40 26.40 26.40 26.50 26.30 26.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -4.11 16 512 2012-10-10 16:07:06 2007-07-30 16:21:21 4 6 465 0 176 533 365 126.10 24 28.68 CHANGED lslslDTSGSIssttLspFtuElsuIt++h.tAp.lpllhsDsplpssphhcs..hct.lsclphsGGG.GTDasPll-tAsctcPsssVl..LTD.LpG.PschtP.thPVlWslst.psssss...P.FG+hlp......Ls ......hlslDoSGSh.......s...c..p......l......p........p..........hl..sElh....s....lhpp........h.....pt.....c.....l....pll.p.s.Ds.plpss.thlps...............t....ht.p.....h..p.....l...t..GuG.G.Tca....pssFc...a..s....p.....c....t......p...........p.....s.s..l.....l.....lhhTD..u......ps.......t..htP.....th.....s..ha.lh.t...........t...................P..au......h......................................... 0 70 116 150 +9800 PF09968 DUF2202 Uncharacterized protein domain (DUF2202) COGs, Finn RD, Sammut SJ anon COGs (COG4902) Domain This domain, found in various hypothetical archaeal proteins, has no known function. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.98 0.71 -4.57 12 134 2012-10-01 21:25:29 2007-07-30 16:21:30 4 4 106 5 72 171 17 151.70 38 65.03 CHANGED EtcsLlaMREEEKLARDVYlsLYchWt....lslFhNIup.SEppHhctVphLLc+Ysl.............pDPlhs-phGhFss.clQpLYNpLlpcGSpSh.DALpVGAhIE-LDIhDL.cchhpcosNp...DIphVYpNLhpGScNHhRuFsRslcphGhsYpPpYlSppcappIlpus ..........................EhcsLlaMhEEEKLARD..VYhpLYcpas....l.l.FtNIup.SEppHh.sA.lttL...lc+..Ysl..............s.DP..ss..s..p...t.h.G..hFsssc....lQpLYs..p....Llpp..Gp..p..S..h.h-ALpVGshIE-lDItDL.pchl.ppsssp..............DIphVYpsLhpGS.c.N.HLRAFhpt.L.p.p.h..G..h.sYpPphlsptthptllp..s......................... 0 23 45 65 +9801 PF09969 DUF2203 Uncharacterized conserved protein (DUF2203) COGs, Finn RD, Sammut SJ anon COGs (COG4911) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.80 20.80 21.10 25.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.89 0.71 -3.89 23 128 2009-01-15 18:05:59 2007-07-30 16:21:42 4 1 124 0 57 116 64 118.60 30 87.62 CHANGED phFolcEApcLLPhlcchhpph....tct+pplcch.......c...............htthcpph..cphtpplcttlcc....lpphGl.lKsl-.GLVDFPuhhss.c.laLCWKhGEs-ltaWHth--GFtGR+sIp ................................+hFTlcEApplLPhlcthlppl....tphppclpph.t.th...tt.tt.....................h.thctch..pphttplcphlpc....lpphGlhlKs..lc.GLlDFPuhhss..c.laLCW+hGEscltaaHth-pGFsGR+Pl........... 0 30 41 52 +9802 PF09970 DUF2204 Nucleotidyl transferase of unknown function (DUF2204) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4914) Domain This domain, found in various hypothetical archaeal proteins, has no known function. However, this family was identified as belonging to the nucleotidyltransferase superfamily [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.78 0.71 -4.81 15 161 2012-10-02 22:47:23 2007-07-30 16:21:59 4 2 147 0 66 166 23 151.70 25 59.21 CHANGED thpsLttlhppLp-+Gl..caVlIGusVlsLuhsp+hsssDVDLFlhshSsll-p-hac-lAcEpsW-hGposhGTsthlshls.uEplpVDh.aENIhDlalPtElLscucclsIsGlcl+sIslE-hlVLKA+tupcEsp-hlcclu...................chlh-..tplplshchl+chlphaPE.-scsIhcRL+p ..................t..pshttlhtpLp..cp..Gl......phhllGGs.shh.l.s......a...sp.c...hs.TcDlDh.....hh..t..s.s..us..h.....l.-t.....l...ccl.A.p..chuW...s..h...shh..s....t...s.....s.....t.hl......h......hs.t-sh..l......h...l.t......s..l.h...hs..t.phl.thp.....ht.h..t...uhc....shEDhh.lhhsphthpps....t...hc.lh....................hh.t.................................s.................................................... 0 26 44 54 +9803 PF09971 DUF2206 Predicted membrane protein (DUF2206) COGs, Finn RD, Sammut SJ anon COGs (COG4906) Family This domain, found in various hypothetical archaeal proteins, has no known function. 33.10 33.10 35.60 35.10 21.20 32.20 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.50 0.70 -5.64 9 29 2009-01-15 18:05:59 2007-07-30 16:22:19 4 3 23 0 23 30 1 355.10 19 51.06 CHANGED tp+tlLhllFhhullhSHYGsAYlhhhhlhhusLhhhlhthh..................p.ppt+hshpLlhlahllhhhW...YshlusuuhFpolssIhtplh......pol...hpp.LsPpsopGhtll.ss.sshhttLhKllplhs.hhIslGhltslhchhtpppp......lpYluhuhhhhhlLlAulhlPaFu.shNssRLaplohlhlAPahlIGh.shhchIsphhp+hh......sh+s.h....thh.ohFlllhhLhs.oGal.plhpspPhShulspts..............cshh.sp-VhuuKWluphpsps..lplhs-hhsshhhhh...............sYuslspphlssspph..................pssYlaLshhNlhpphhhlp.......hh.tthhhhN...hssshphltppNcIYDNtGStIYh .....................h.p+tlLhllFhhullhSHYuhuYlhhhhlhhshlhhhlht.h.h.........................p.pp.phshshlhlahshhhsW...Yhhlusushhts...lh...plhsp.hh........psl...h.p.hs.psspG...htlh....ht..shhthlhhhlphhh...hhlhlGhhthlhphhhtc.p...................hpY.lhhuhh.hllLlsslh..lPaa.u.shsssRLaplohlhLAPahllGhhthhphl.s+hhpthh..........t..h..............................phh.slalslhhLhs.oGhl.t.lhp.s.sho.hulspss...............tshh.s.spEltuspWlsp.psps............lhsshhph.hh.h....................a..ush...sht.htttpt..p................psuYlalsphNlppthhh.........hhstthhhhp....hsh.phlpspspIYsNusSplY...................................................................................... 0 4 12 19 +9804 PF09972 DUF2207 Predicted membrane protein (DUF2207) COGs, Finn RD, Sammut SJ anon COGs (COG4907) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.00 23.00 23.00 23.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.79 0.70 -5.92 46 1234 2009-01-15 18:05:59 2007-07-30 16:22:39 4 3 970 0 247 980 138 434.70 18 83.34 CHANGED sapIpphpsplplppcGshplpEplsYshcss..hcGlahshshsst...t.......hshpthslppss.......tssshplppssss..............phpltlasss.st.....sphphphpYplpssltha..pDh.uE..LpaplhGssWshslsplpsslphP......pshps..hchasasGshsspsp........tppsplthpsss.l.sssptlElchhh.Pp.....hsssstp.hptphhpph.t.tttht.p................tphhthhhslhhhlhhhh.hh.hhhah+hs+csphth.....................shhpa-hPtshsPshl.shlhptshs....................pc..ultAs.llcLhs+thlplpp................ccshhlplssps..........thsshEptllchlhspt....sspplshtpl...........................t.tpphppthppappslpcph..pptthhtthhhhpstt..............................hhhhshlhhlhuhlhh......................hh.....hhhthhhhhshlhhlhshlhhhh..........................tthhst..hotcGtphhtchcuF+paL..sDh.splphtsst..sltlW-chLsYAsALGlucc...Vt+phc ...........................................................................................tIpthpsphhlpt.-....ss....hphppplhaphppp.........hpG..hhslshhh.h.......s............sh.....htstpss.......................................hs.s.phpt.ptsh....................................................hhl.t.h..p......h..............s.h..phplpaplpshhhhh..............tDh...st................L.Wp.h...sp..s.....tl.tp.hphplphs.............ts...........ph.h.a..G..ttphp...............tp..t..t.....shp.hphps..h...tpt.....hplhhha..sp....................hsssts..t........h...t..p.h..h.............................................tthhhh.hh..s..hhh.h.l.s.hhh.............hh..h.h+p..s...p........h.......................................................................h.h.....ht.P...p..h..p..Phhh..t...h...l..h...p.ht.hp.......................pp....hht.As.llslhs+t.ltl.pt..............................................................tpthhl.ph.ptt...............h...hE....p.hlphhhttt...........p..hsthph...................................................................p.h.pphhpph..ppt.h..htph...p...................ht..t.................................................................h.hh.shh.hhlshhhh...............................................................hh..h.hh...h.....hh.h......h.hhh.l......h.hhhhh...............................................................h..h.sh....hs.ptGtphhh.hpuhpphL......p-h..sph.p.p.ttht...............shh.l.app..hLsaAhhhGhucc...h.p.h.............................................................................................. 0 89 175 205 +9805 PF09973 DUF2208 Predicted membrane protein (DUF2208) COGs, Finn RD, Sammut SJ anon COGs (COG4920) Family This domain, found in various hypothetical archaeal proteins, has no known function. 23.60 23.60 23.60 84.10 23.20 23.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.38 0.70 -5.12 13 43 2009-01-15 18:05:59 2007-07-30 16:22:49 4 2 43 0 27 35 1 234.40 30 94.20 CHANGED h+plLIS.l.lllaShVLohaPpaah.saILYFllahuIohshshRSh+..pshtshpEIusu+sLhE..EKcuscLhpKDcELhp.Ehpphs+uuhhshlhhhlalllhhllashl...hp....sthssslssths+FLsalshFEhhalluhh..lh+hlh.+.th.sshl..spsaKVoEKGIlsccphG...lthPhc...sschplNc-RKaVEl+.sspt.......plRLYs+-lc+L.slLpRl.KtLct ......................+hlLlStl.hllFShVLu...aa.PpY...a...h...haILYhllahuIohhhshRSt....+phpshtElspu+sLaE..EKcsscllpKDcEhhp.EhpchhKtshh.shhh.hhlhhI...lhhllasal..........................hthssshs.shhh+....FlsallhFphhaulshh...ht+h........lh..+h..pt...hshh..sp.uaKlpEKGllhscphu......lhhP.c...ssplphNc-+KaVEl+sssph.....sh+lRLYspDlc+lh-llp+l.+.lt.h......... 0 10 15 20 +9806 PF09974 DUF2209 Uncharacterized protein conserved in archaea (DUF2209) COGs, Finn RD, Sammut SJ anon COGs (COG4921) Family This domain, found in various hypothetical archaeal proteins, has no known function. 26.80 26.80 27.00 83.30 26.50 26.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.48 0.71 -4.05 4 22 2009-01-15 18:05:59 2007-07-30 16:23:06 4 1 22 0 19 23 0 123.00 38 95.52 CHANGED lAVDISGRH+.cDGaY.hVsAuVulEVsAs+I.pVcpVslhsVhp+-s.sLhDIVc.lccslsplu..F-hhIVsE+G-haNpPcWlspuhhupshKYtEohuEhcAIEhAH+lShSsR+LLhcELclp ...lAlDISGRHc.pstah.hVsAuVtspluus+lc+Vcplclh.httccs.slpDllphlc-slspls..h-h.IVsE+G-FaN.PchhVpuhhs+cFKYsEoluERcAIplAHHlShSsR+LLhct......... 0 3 11 15 +9808 PF09976 TPR_21 DUF2133; Tetratricopeptide repeat Anantharaman V, Finn RD, Sammut SJ anon Manual Family TPR repeat 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.85 0.71 -4.25 350 1664 2012-10-11 20:01:01 2007-07-30 16:23:50 4 81 1626 0 351 1305 476 141.70 30 54.91 CHANGED pspQssptpt..AShhYpplh.pu.h.............s...sscssp...............l.....st............u..schtscassT.YAshAuLhhAKttV-ss-hstApspLph.sls.ps.pDs.sLp.slAplRLA+l.lpppphDs...ALptL....ss....stspua.suh.hs-l+GDlLlu.pGcpppA+sAYptAl ...............................................................................................................t.phppspp.AShtYpph.h.puh..................s.tscs.sp............................l...ss...........spcht..s.c.tt.s.o.....Y.u.s..hA..uLp.l...Apth............l..-p.s.....-............hc............p.A......tspLpp...u.hs....ss...p....Dc....s.L..p......s.....l..h...pL....R..L....A.c..l...l.p.......p...p......h..D..s.......A....L..pp.L.....cs...........l.p....s....p...u...a......s....u.h....s..t..-....l...+.G-hhhu...pGcppsA+sAaptu................................................. 0 100 205 279 +9809 PF09977 Tad_C DUF2134; Putative Tad-like Flp pilus-assembly COGs, Finn RD, Sammut SJ anon COGs (COG4655) Family This domain, found in various hypothetical prokaryotic proteins, is likely to be involved in Flp lius biogenesis. 21.70 21.70 22.00 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.36 0.72 -2.97 75 253 2012-10-01 21:13:59 2007-07-30 16:25:29 4 5 204 0 106 358 18 100.40 25 18.09 CHANGED AuAtphsssss..........ss.ssAt...pssspsuh..........................ssst....shslpsGhass.....t.sstshhssss...................sssAVpVsssps.......ls.hhFsshhuh........sshs.lsApAsA ......................................................tuAtphsssss.............ts.ssAp.sss..tpNGh............................ssus............shsltsGpass........sssssppFs.s.uus.................................shNAVpVsssps.............VP..haFsu.h..........sshs..luAousA.............. 0 30 60 80 +9811 PF09979 DUF2213 Uncharacterized protein conserved in bacteria (DUF2213) COGs, Finn RD, Sammut SJ anon COGs (COG3566) Family Members of this family of bacterial proteins comprise various hypothetical and phage-related proteins. The exact function of these proteins has not, as yet, been determined. 20.90 20.90 20.90 21.00 20.80 20.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.96 0.71 -4.59 32 362 2009-09-11 16:51:15 2007-07-30 16:29:40 4 5 302 0 52 327 29 157.90 31 42.39 CHANGED ssRphss-GhLlscss.luRsGhphYsucElst.ts......pthlpVhRsPEpVFps-olASF-GhPlTlpHPc......c.VsscNW+slulGtlpNsRR....cu-hllADLllpDtsAIptI.-sG...hcElSsGYcs-a-.ss.......shcucQhsIpGNHlAl.VscGRAGspssl.u.............Dcs ...........................................t-GhLhsps.s..lu+s.G.h..Yhut-lsh.............t..hhl.hRsP-plFpspslsSapuhPlThtHP........thls..scNh.+.p..h.slGpltsstc.......pu.shlhuslhl.......hDps.............uIptI..psG....hcElSsGYpschp....ss.s....................shcu....hQhsI.t.s.NHlAl.V.scG..RuGspstl.tD............... 0 8 33 43 +9812 PF09980 DUF2214 Predicted membrane protein (DUF2214) COGs, Finn RD, Sammut SJ anon COGs (COG3556) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 26.30 26.10 21.40 20.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.91 0.71 -4.46 26 200 2009-01-15 18:05:59 2007-07-30 16:29:53 4 1 198 0 85 192 118 148.80 35 94.81 CHANGED hlssAllAalHaLuhhhsFuulssEphhLRss.hs..tspstplllsDhlYGluALslLlTGlhRlh.aauKGu-FYhpNPlFahKlsLalllulLSlhPTlsaIpWthslpcspsP..s.shsp+lthllplEhhhhslIPLhAshMARGlGh ....hhpAllAalHaLuhhhhhuslssEt.hhL...Rhs....hs......hsphpplhhhDtlYGluAlsl.LsTGlhRlh.hasK...GssaYlpsPlFHhKluLFlll...uLLSlhPTlshl+W+tsh..+p.....sthP....s...........ucspphtthlphphhLlhlIPLhAshMARGlG................................ 0 14 38 65 +9813 PF09981 DUF2218 Uncharacterized protein conserved in bacteria (DUF2218) COGs, Finn RD, Sammut SJ anon COGs (COG3553) Family This domain, found in various hypothetical bacterial proteins, has no known function. 24.30 24.30 24.30 24.70 22.70 24.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.77 0.72 -3.80 39 357 2009-01-15 18:05:59 2007-07-30 16:30:16 4 8 331 1 97 244 10 88.60 33 72.22 CHANGED SpuplsTspAoRYLppLCKHauHKhtVpaDsppGclshP....hGpspLpA..sscsLslplpusstpsLsplcpVlscHLpRFAhREs.lplsWp ..............pApl.TscusRYlppLC+Ha.u...HKhs.............sp....a..st..p..p..G.p..lphs..........su.s.spLss..csppLshslpu..spp.slsphcsVlssHlsRFAtREp.lslsWp................... 0 31 62 75 +9814 PF09982 DUF2219 Uncharacterized protein conserved in bacteria (DUF2219) COGs, Finn RD, Sammut SJ anon COGs (COG3528) Family This domain, found in various hypothetical bacterial proteins, has no known function. 19.00 19.00 22.30 20.50 18.10 16.70 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.86 0.70 -5.59 67 618 2009-01-15 18:05:59 2007-07-30 16:30:29 4 2 571 2 122 391 160 281.60 29 87.86 CHANGED hth-NDh.hh..u...sDp.tYTsGlplsastss...........thttthsphh...............t..thshpluQphaTPs..ch.....phpss.ssDRPYAGhLhhshshhthpsspt.......phshslGhl.GPsuhucpsQphhHp.llG.......sscsp..GW-pQlcN-hshplshctphphht............shth-ltshspsslG........NlhohsssGsthRhG...pLsssaGsstlp.ss.....ttthh.st..............shuhahasuscsphlspslh.l-Gssa........ppst..sl...shpthhscsphGlshth....t.shplsau....hshtopEFcsppc....hppaGolsluhta ............................................................................................Dp.sYTsGlhLuaopshh................................................shsphuh.pluQchaoPS..sh.........cp..spshhs...DRsau..uaL+sslp..h.u..hssshh...................chshslGsl.GP.sAhupcsQphsHc..lhG.......u-.c.p...uWssQlcN..chshslphhhphp.sh...........shhG..sshslhPpsssshG........NhhphluhGstlphG..pshssc.Ghuhls..st...........h.h.httpp.............shtahlFuGh-tRhlsps....hT..LpGpoh................psph......sV.....slpthVsphplGsshta...........s.shuholu....hsthTsEF+ssc-.......asahshslshhF........................................... 0 45 84 110 +9815 PF09983 DUF2220 Uncharacterized protein conserved in bacteria C-term(DUF2220) COGs, Finn RD, Sammut SJ anon COGs (COG4924) Domain This domain, found in various hypothetical bacterial proteins, has no known function. The family represents just the C-terminus. 20.50 20.50 20.50 20.80 20.40 20.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.23 0.71 -5.12 31 376 2012-10-01 21:47:57 2007-07-30 16:30:42 4 4 352 0 131 417 28 166.90 19 45.82 CHANGED GL.......ppcsshlRhRhL-sshsh....sshs-lslshpphupl..plssppllIlENctsaLshP.shs....sulllaGuGauhsplss..hs........WLppp.plhYWGDIDTaGFtILspLRphh.PpscSlLMDppTl.pa.psthstE..sp.pttp....Lst.LossEtslYcpLhpsthtsplRLEQE+IshphlhptL ................................................................................................h....tthtth...th.s.s.pplll.lENpss...a...hshs...phs....................ssl..sla.G....u....G....h.sh.p..t.h.t.p....ht.................................hlps...p...plh..YWGDlDstGh....tI.....hppl+phh....s..........t..........h..p...s....h...h.Ms...tt..h....th....p..hs.p.....st..h.t...........h.t...Lp....t-.hth..hp...tl...ht..............thc.lEQEhl.........h...................................................................... 1 53 95 118 +9816 PF09984 DUF2222 Uncharacterized signal transduction histidine kinase domain (DUF2222) COGs, Finn RD, Sammut SJ anon COGs (COG4999) Family Members of this family of domains are found in various BarA-like signal transduction histidine kinases, which are involved in the regulation of carbon metabolism via the csrA/csrB regulatory system. The role of this domain has not, as yet, been established. 20.50 20.50 20.60 20.50 20.20 20.30 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.56 0.71 -4.76 27 713 2009-01-15 18:05:59 2007-07-30 16:30:59 4 8 701 0 82 352 6 146.60 63 16.35 CHANGED hsRYp-L-cpLlspGsoIIEPLAIASEhGhppcsREsl++LIohsHR+pSslV+SIAlFDssNcLFVTSNaHpsFctLphscshshPphhplphpssolILRoPIluEu...ph....sss.stssssphLGYIAlELshsslpLpQYp-lh ....VHRYNDLQRQLEDAGASIIEPLAVSoEYGMsLQNRESI.GQLISVLHRRHSDIVRAISVYD-.s.NRLFVT..SNFHLDP.Sp..MQ..Lsss........sPFP......RpL.o....VsRcGDl...MILRTPIISES.......................YSP......DESssuDAKsopNMLGYIALELDLKSVRLQQYKEIF.............................. 1 11 26 54 +9817 PF09985 DUF2223 Domain of unknown function (DUF2223) COGs, Finn RD, Sammut SJ anon COGs (COG4945) Family Members of this family are found in various prokaryotic membrane-anchored proteins predicted to be involved in the regulation of amylopullulanase. 18.60 18.60 19.80 22.60 18.40 17.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.51 0.70 -5.03 16 159 2009-01-15 18:05:59 2007-07-30 16:31:13 4 21 101 2 90 164 135 215.70 27 30.19 CHANGED hlhphsDPhGDDpGsGsYsYPTsssF.tP..GhFDLhphplh-sussahFphphscl.sNPWsGPpGFShQhlplYlspppGupsssl......sGsNVph..ppsWDhslhls.GW.s..............ushlhsssGsth.....ssshpltssPs.spsIlsplsKphL.t.s.s..phthhllluu.DGYGPDp.hRP.Vu...spus......pWphGGus.......sst..ssssPhVlDlLlPpu..tsQtphLosa..tssphAll...hslsl ...............................h.lhphpDP.GDDpG.s.G.sYsYPssssFp....GhhDLhphplhpt..s....sshhhphphpcl.sNPWs..uP..............sGFShphlplYlc...h...s.p...u.u.ssshht............Ghsssh....ptsWchslhls..G....W.t...............................tshlh.ss.Gsth..............psshpl...tss.s...tssIhsplspphl.t.st......s........sht...hhVhssu.D...Ga...ussp.....hRs.ls...........spss...............tWphGGus.................tstPhlhDhLssps.........tQ...Lt....................h........................................... 1 35 48 69 +9818 PF09986 DUF2225 Uncharacterized protein conserved in bacteria (DUF2225) COGs, Finn RD, Sammut SJ anon COGs (COG1655) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.47 0.70 -4.88 23 252 2012-10-11 20:01:01 2007-07-30 16:31:21 4 3 242 0 98 280 16 223.70 28 87.25 CHANGED LYcKclpCPVCspcFpscpV+ouuhRlh........++DsDhph+Yps..lNPhaY-VhVCspCGYAAhcpcFs.plsstpp-hlppplss+aps+.......sast..RslspAlpsYKLALls.hplhcs+.S...ppAhssL+lAWlYRhtpc................cppEhhaLppALctaccAYpsEshshp.shscho...............lhYLluELsRRhGspc............-AlpWau+llssssssp....plh-hAR-tacll ...............................................hccplpCPlCsppFpppcl+outh+lh........cc-pDhpscY.ps.....lsPhhYsVhVCPpCGaAuhppc....Fs...p.l...s...stp...t...chlppp.lsp+.h.p.p............sast...RslppAlpsYtL.Al.ls.hph.hp...t.phs...phAhhsL+lAWlaR.hpc.................pppEphahppAhchYpcuhpp-.p.h..s.p..shsphp.....................................................................................lhY..L.lGpLph+hGshc............-Ahpahu+llsptpssp....hlhchAR-.ap.................................... 0 51 80 88 +9819 PF09987 DUF2226 Uncharacterized protein conserved in archaea (DUF2226) COGs, Finn RD, Sammut SJ anon COGs (COG1667) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.50 20.50 20.60 20.60 19.90 20.40 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -12.01 0.70 -5.29 4 41 2009-09-10 17:02:35 2007-07-30 16:31:33 4 5 23 0 31 41 5 233.70 16 84.24 CHANGED MhhP+s..pVVEschh.....GshcEIhc.luohs...GhlcIos+cG-tLh-uallVpsGKlVushlpclcotEchsuEEAlccLh.......uhpssVlDVYchsc-clphhhchps.....................psls..h.phcl-h.ht.................................................Es..t...s.pth-hsEEhlc-P-..........................................................pREElLKKhGIK.Ps.Ep.lEsILc-hhc.............sshEEhKppl.pph..plh+hpGhs-VhVh.csK.tEt..........sscpllthl++c.................slcEh.ochhR ...............................................................................................................................................................................................................uhl.h....t.....shhhh.tt..lh..................t.h.th..............hl-laphp.p.hphh..........................................................................................................................................................................................ps..t....t.pt.p.h.pphhp..c.p.t........................................................................lsR-cLhKphsl+..s.-p.l-pllcphht...................th...h.pp....h....t..................................................................................................................................................................... 0 9 13 23 +9820 PF09988 DUF2227 Uncharacterized metal-binding protein (DUF2227) COGs, Finn RD, Sammut SJ anon COGs (COG2389) Domain Members of this family of hypothetical bacterial proteins possess metal binding properties; however, their exact function has not, as yet, been determined. 25.00 25.00 27.90 27.90 23.90 23.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.28 0.71 -4.62 28 198 2012-10-01 21:01:47 2007-07-30 16:31:49 4 1 192 0 62 150 38 162.70 39 96.84 CHANGED PSGRsHDRlTl..h.ulPhshlhshh........lsts..huLhs.uuuaLhuGLhhSPDLDlp.S..cthpRWGhLRalWhPYpcll.HRShhSHG.llGohlRLhYLtshshslshllshhh..h.hhshsh.s.h.....thlhphhppp.p.llshllGLEhuuhhHhluDhsssshchh................................+tR+ .................PSGRTHs+IsLh...uLPslhhhha............huhos....hLl......s.uhuaLhGshhLoPDLDha..S.....psap+WGhLRhhWhPYp+lhsHRShhoHshllGsllRlhYhhllhsshhhllshlh..t..............................splhphhpp..ap.hpllshlhGlhluuhLHlIuDtlsoptK+hh..........+...++c...................................... 0 16 42 57 +9821 PF09989 DUF2229 CoA enzyme activase uncharacterised domain (DUF2229) COGs, Finn RD, Sammut SJ anon COGs (COG3581) Family Members of this family include various bacterial hypothetical proteins, as well as CoA enzyme activases. The exact function of this domain has not, as yet, been defined. 29.40 29.40 29.80 29.40 28.90 29.30 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.43 0.70 -4.88 84 1010 2009-01-15 18:05:59 2007-07-30 16:32:13 4 8 744 0 247 884 34 205.20 36 18.45 CHANGED plGIPRsLshYc.aPhWt.TFFscLG..ac...VllSstos+clhchGhcshsuEs.CaPsKlhHGHltsLl.c.....Ktl-hIFhPplst.pp.c........tpasCPhltuhP-hl+ssh.........ptshphlsPhlshps......cthtcphhc.h..............hp...l..spc...............-lppAhcpAhcp.cpacp.p...lcppucchlthhptptpp...................sIlLhGRPYpl.Ds.lNhGIschlsp.hGhsVlTtDsl ..........lGIPRsLNhY.EsYPaWt.ThFspL..G..ac...VllSscSo+pla-p.GlcolsS-s.CaPAKlsHGHltsLl..c.......+.slchIFh.....Pslsap....pp...c.........p...ss.spaNCPlVtuYP-sI+sNh........................ptslphhsPalshssp.....ctlhcplhc.h........................hp.h.s.l..stp....................................-hppAlptuh..pc..pta+p.c......lpp.tG..ccslthhpp.pstt............................................uIVLuGRPYHl.DPcINHGIschlss.hGhsVLTEDul............. 0 136 211 231 +9822 PF09990 DUF2231 Predicted membrane protein (DUF2231) COGs, Finn RD, Sammut SJ anon COGs (COG4244) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.00 22.00 22.00 22.10 21.80 21.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.39 0.72 -3.68 51 367 2009-01-15 18:05:59 2007-07-30 16:32:28 4 6 261 0 187 377 94 102.30 23 55.45 CHANGED ah-luaWslhsuslhshhAslhGhhEhhLs.shh...........tspsshhhHslsulsllulhss........palhRhcssp..pl.hhhLsluhlhssllslpuaLGupLsacaGlt .......................................................h.psuhW.lhsuhlsuhhAslsGhh.-hhhs....tt......................psppshhhHslhslshhslhss.....................phhhRtp...sst.....tl......h..........h..hL.hlu.hls..hs.llslsuaLGupLsapaGlt......... 0 46 119 163 +9823 PF09991 DUF2232 Predicted membrane protein (DUF2232) COGs, Finn RD, Sammut SJ anon COGs (COG4241) Family This domain, found in various hypothetical bacterial proteins, has no known function. 24.90 24.90 25.00 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.72 0.70 -5.42 46 942 2012-10-03 02:46:00 2007-07-30 16:32:53 4 5 917 0 226 724 132 260.60 20 81.98 CHANGED ulhsllhl..lsha.lPlluhlhshhhPlPhllhhh+puhphulh..sllssh.llhhlhss.hhu..lhhhlhhulhGlllGhhl+cppshtps.....lhhushshlluhllhahlhhhhhs....ls...hhsthhphhcpshppshphhpph.Gh.....s.....tphpchhpph.hphlthllPuhlllsuhlhuhlshllst.ll..+Rhphp.h.thssFppaphPppllWhhllsllhhhhhpt.sh....phlhhNlhhllshlhhlQGlullhaah+p+.phspsltl...lhhllsllhs..lhhlltlLGllDl ..............................................................h..hhlhhh..lshh.lP.h.lu...h..l...hthhhslPhhllhh+.t.........t.......p...hulh....uhlssh.....llssl..l.u.....s.s..h.s..........lhhh.l..hhhl.hul....llG..h..h..h.....+...c.....p.pshtph........................lhhs....s.ls..h.h.l.hh...ll.hh.hl.ht.h.hht...........ls......hs.....h.h.p.hp..ps.hpp..s..h..p.h.httt................................tt.pp.hhpph....hp.hth..hhPuhlllhs....hhhshlshhls.h...ll.....p+h.t.h.....s...h.s..h.s......h...t.hph.s.....t.....h.lhhh....h..l....slh...hh.h..h....h.t.........s....................lh...h.N.h.hl.hshhhh.lpG.luhltah..hpt+...thsthh.hh...lh..h..lhh.hhhs....hh.llhllGlhDh...................................................................... 0 85 164 200 +9824 PF09992 DUF2233 Predicted periplasmic protein (DUF2233) COGs, Finn RD, Sammut SJ anon COGs (COG3698) Domain This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.10 25.10 24.80 24.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.76 0.71 -4.24 288 1904 2009-01-15 18:05:59 2007-07-30 16:34:37 4 62 1410 1 489 1560 149 177.80 21 44.80 CHANGED ppslhulN......u.....s...h....hs.ptt...................s.G.hhlp.......sG..phht...............sttttshhshp.........tsuthhh..............shpphth........tshpps.l..t.G...........P..hLl.p............sGp.......hh............t.sssst........................tsRoAlGhs.....pcG..pllhls....l-G.....t......tGh.olt-hup.lh...p......h.Gsh.s.....AlNLDGGuSoshh......................hth..h...spP....ssst.................Rslssslhlh ................................................................................................................................t..h.huhN..uu...h......ap.p.t..............................shG..hhlp............sG...phlt..................ttspststhhlp............................sGhhhl.................s.....h....................................................................pshp.u.l....suG...........PhLl.p......................................sGp.........ls..............p..tss............................................sppsRoulGhs..........ccG....pllhll...s-s........................uh.shh-hAp.hhps.......L..G.s...s.............AltLDGGuSoshhh..........................t..h.........s..............................R.l.shhh..t........................................ 0 212 355 421 +9826 PF09994 DUF2235 Uncharacterized alpha/beta hydrolase domain (DUF2235) COGs, Finn RD, Sammut SJ anon COGs (COG3673) Domain This domain, found in various hypothetical bacterial proteins, has no known function. 21.10 21.10 21.80 21.70 21.00 20.80 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.00 0.70 -5.03 90 1245 2012-10-03 11:45:05 2007-07-30 16:35:17 4 29 586 0 602 1402 86 226.40 21 51.26 CHANGED +...pl.l.lChDGTs.....Nshpsstp.............................oNVh+la.chl..........spss............pQhsa...............YpsGlGT...............................tpthpphhssAh...Gtuhspplh....pA........YpF.........LhcpY....psGDc...Ia.....lFGFSRGAasAR........sLA.shl..sphGL.........lp.......................hhhhsacthtp..................at.ttht.t.pt...................phhtthppphs........................p.ssplcFlGlaDTVsul........................................stahhp...............th.....ssth...........sst.................VcpspHAlulDEpRttFp.s....Lhp.......................................................................................................................................sssttsp.......ppVWFsG..sHuDl...........GG...Gas..............ttpttLSc.lsLsWMlpcA ..............................................................................h.lhhDGT........pp.ttt....................................................oNlhclh.phh...................tt........................................hh.h............Y............GlGo.............................................t....h.t.hhs.sh.......u..........s...htt...pl....tu.........................................................hth..................lhp.h..............ssp.......lh............................hhGF...SRGAhhsR..........hs..thl......................................................................................................................................................................................................................................................................................hplphl.GlaDTVsul...............................................................................................h..t..................h.....h.h........................s..................sphshHhlu..hc..EpR...ht.F..s..ht...........................................................................................................................................................................h.phhasG..sHu...Dl..................GGGh............................................hs...hsh...hh....................................................................................................................................................................................... 0 164 331 481 +9827 PF09995 DUF2236 Uncharacterized protein conserved in bacteria (DUF2236) COGs, Finn RD, Sammut SJ anon COGs (COG3662) Family This domain, found in various hypothetical bacterial proteins, has no known function. This family contains a highly conserved arginine and histidine that may be active site residues for an as yet unknown catalytic activity. 21.50 21.50 21.60 21.50 21.40 21.10 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.35 0.70 -5.08 128 1684 2009-09-11 16:50:31 2007-07-30 16:35:46 4 16 655 0 676 1614 392 240.60 18 70.56 CHANGED hshcsatchssh...hhsussullhphhpPtlstulhcp................usap..............p..csht........RltcTspalhsss...................au.sspputthttcVRthHtpV+us.......hss............................G....tsYsAhssplhhWstsshshshl.tuhpt....h.sh.lsss-t-phapphthluphlGl..c.h..Ppotsphtphhpphh.pt.lpsstps+pl.sphlht................hsss..hhh.......t.hhthhh........hhshshLsstspc.hlGlshsshtpth....hhhhhht........tshthl.ttl ...............................................................................................h.......p.......hhhshtulhh..p..h.hhPtlstulhcp....................upht.......................p..cs...hp.........RhtcTstalhsss................................as..ssp.....utt..httpV+.thH.tplcus...p..........................................................G........h..a.ps.s.stlhhastsshhhshl...pshcp.......h..s....t..............l.o.t.t-.t-phap.p..h.thhuph.h.............Gl................c..h.............Ptoh..........t..........pht..........p..........ahcp......h..........h................p.....lp...s..sttscpl..hphlht.................hsh....h..................t.ht..thhh..............hhshshl.s..shh..pc...hhs.l....hsshtpth.......hhh.hht...............hh........h............................................................................................................ 0 164 398 587 +9828 PF09996 DUF2237 Uncharacterized protein conserved in bacteria (DUF2237) COGs, Finn RD, Sammut SJ anon COGs (COG3651) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.40 25.40 22.90 19.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.54 0.71 -4.61 79 426 2009-01-15 18:05:59 2007-07-30 16:36:10 4 2 408 3 165 373 1622 108.90 51 88.89 CHANGED shNVLGpsLpsCussPhTGFaRDGtCpTsspDhGsHoVCAhhTsEFLpaS+tpGNDLSTPtPcasFPGLKPGD+WCLCAsRWhcAh-sGhAP.VhLcATHppAL-lVsL-sL+paAh ..................................t.hNVLGtsLps..Cu...p..c...P.....h.....TGFaRDGhCpTsspDhGpHoVCAlhT...sEFLpao+u.h.GN.DLoTPhPpa..s..........FPGLcPGD+WClCAsRWhcA......h........p...........s...........G........h..AP.VhLpATHppsL-hlsL-hLppaA.h............................ 0 60 121 149 +9829 PF09997 DUF2238 Predicted membrane protein (DUF2238) COGs, Finn RD, Sammut SJ anon COGs (COG3647) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.30 26.30 26.40 26.90 26.10 26.20 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.88 0.71 -4.80 67 737 2009-01-15 18:05:59 2007-07-30 16:36:24 4 1 709 0 132 449 45 140.30 50 68.56 CHANGED sllshslLhhoh+.+F..hoshsYhLlhlahhlhhlGuH.YTYAcVPhhsW........lp-hhGhpRN..pYDRluHFh.Ghl.Ahsh+ElllRpphl......ps..thhhhhslshshulSAhYEllEWhsAlhs.G.csutAFLGoQGDlWDsQpDMhhA ...............................................................llllslLlsTt+.Ras.L.TsLhYsLIFhasllLhVGGp.YTYAcVPls...............lp-hl.Gh.o...RN....sYD+LGHFhQGLlPAlls...RElLlRthhl........+s.tthlsFLlsslsLAlSAhYELIEWWsAlsh.G.puA-s.FLG..TQGD.WDTQpDMhsA................. 0 36 83 113 +9830 PF09998 DUF2239 Uncharacterized protein conserved in bacteria (DUF2239) COGs, Finn RD, Sammut SJ anon COGs (COG3644) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 35.60 29.20 20.40 19.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.27 0.71 -4.68 37 209 2009-01-15 18:05:59 2007-07-30 16:36:37 4 3 197 0 74 206 12 178.10 51 91.18 CHANGED tsaTAFpGpRRlAoGsLh-VALAl+...tths.tsssuslLlFDDsTGRslDlDLRGoss-lhARhs.sss......................stsut...........PR...GRGRPKLGVVAREVTLLPRHW-WLuuQPGGASsALR+LV--ARRsssspDRtRtAp-AAY+FMoAhAGDLPGFEEAsRALaAsDtsphsphhtuWPsDlRsHAhtLA .......saTAFsGpRplAuGsLspVAlAlKpths....ttssuslLlFDcsTG+slDlDlRGosp-lhuRhs.s....................................tsttsR..GhGRPKLGVVAREVTLLPRHW-WLusQPGGASVsLRKLV-cARRspsst-+tRtAp-tAY+FMSAhAGDhPGFEEAsRALaAsDtsthtphIsuWPsDVR-ashtLA......................................................................... 0 14 32 54 +9831 PF09999 DUF2240 Uncharacterized protein conserved in archaea (DUF2240) COGs, Finn RD, Sammut SJ anon COGs (COG3612) Family This domain, found in various hypothetical archaeal proteins, has no known function. 20.00 20.00 20.10 21.60 19.70 19.60 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.50 0.71 -4.75 16 65 2009-01-15 18:05:59 2007-07-30 16:36:58 4 3 65 0 51 72 48 134.50 29 87.88 CHANGED pL+hsVAAPF+p..ptsspLscs-FlauLohDp+WhSP-pApcllctAtppGLLpp.csGslsssFDsuslplPhuF+Pscslh.................ppcssaEclLDtlsusuGls+p-lVuclNp.hp-pls..lsh-sAulllA+chGlDls....shhpcl .................LphslAsPFcp..cupsplscsEFlhsLohDhcWhSs-pAKcLl-hAhpcGLlpc.csstlhssFDssplplP.sFpPs..tphh.................pccssF-cll-hlss.tGhs+pEslutlNp.hpcchu..lsh-sAAllhA+cpGlDlschhpc.h....... 2 10 34 44 +9832 PF10000 ACT_3 DUF2241; ACT domain COGs, Finn RD, Sammut SJ anon COGs (COG3602) Domain This domain, found in various hypothetical bacterial proteins, has no known function. However, its structure is similar to the ACT domain which suggests that it binds to amino acids and regulates other protein activity. This family was formerly known as DUF2241. 25.00 25.00 28.70 28.20 22.90 18.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.50 0.72 -4.25 48 273 2012-10-02 00:29:19 2007-07-30 16:37:16 4 9 265 4 97 252 79 71.00 38 48.18 CHANGED MsGEp-LspLLpoMpPpLpsupaVFCols......shst.hsLpsl.usF+EpEGlTllLpcppAcptGL.saphsh+h ........MsG.psLptLLpoMsPpLpsGsYVFsTls............shssh.sl..pPl..uoFREs..EGLTLllppcpApptGL..shphhht................ 0 19 48 76 +9833 PF10001 DUF2242 Uncharacterized protein conserved in bacteria (DUF2242) COGs, Finn RD, Sammut SJ anon COGs (COG4259) Family This domain is found in various hypothetical bacterial proteins, and has no known function. 25.00 25.00 80.60 80.40 22.70 17.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.53 0.71 -4.24 15 159 2009-01-15 18:05:59 2007-07-30 16:37:31 4 1 154 0 57 182 20 120.40 59 41.58 CHANGED CEAARRALLSQGYllssucsctV-GpKsFQ.PcsDsHlplphRVVCAscshcushollFssALQDRYALKKosNSASVGVGuLGSlSLPhuSscDoLVKVuSETIsuupFY-RFFpLlc+YL ..CEAARRALLSQGYllouuc.sctV-GsKsFQ..PssDsHlpIsF+VVCAss..s.....tDGspShsaVNAlQDRYuLKKosTSASVGluVLGSlSLPIGSoDDShVKVASETlsuutFY-RFFsLV-paL.. 0 5 21 41 +9834 PF10002 DUF2243 Predicted membrane protein (DUF2243) COGs, Finn RD, Sammut SJ anon COGs (COG4329) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 55.00 54.50 20.50 20.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.05 0.71 -4.41 22 165 2009-09-14 14:06:24 2007-07-30 16:37:41 4 1 162 0 73 156 1 137.90 42 79.08 CHANGED uGlLlGlGLuuFlDtIlhHQlLQWHHhhsp.......................shclshl.DGLFHuhoalhslsGlhlLh...sh+pchsass+thhGulLlGhGhFpLh-GllsHplLtlHpV+....ssphLhaDlua.s.hGhlhllhGhhLlpps.pp ......uGlLhGlGLs.uFlDtllhHQLLpWHHhhsps.......................shclslluDGLFHAhoahhslsGLalLh...sh+R...+tsash+thhuulllGhGsFpLh-GllpH+lLtlHplRh................ss.shLh....aDlsa.l.huslhllhGhlLhhpsts..... 0 18 47 61 +9835 PF10003 DUF2244 Integral membrane protein (DUF2244) COGs, Finn RD, Sammut SJ anon COGs (COG5488) Family This domain, found in various bacterial hypothetical and putative membrane proteins, has no known function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.45 0.71 -4.81 79 402 2009-01-15 18:05:59 2007-07-30 16:38:12 4 2 380 0 144 369 562 136.80 29 81.50 CHANGED LpPppSLs.cuahhhhsshsslshhsulshh.hhGu.WsllsFhGL-lhulhhAhphshRpupttEplslsstphhlh+psspGptpcachNshWs+lphctpst..h..tlsLpupG+cVclGsFLu.--RtsltppLppAL ............LpPppSLu.ptFhhhhsslsshshhhuhhhh.hhGA.WslhsF.hGl-.llslhhAFthsh..R.pApshEcIslst.phhlhchsssu.chppachNPhWsRlchtppsc................tlpltucGcpl.lG.p.FLs.c-RtphAptLptuL......................... 0 33 78 107 +9836 PF10004 DUF2247 Uncharacterized protein conserved in bacteria (DUF2247) COGs, Finn RD, Sammut SJ anon COGs (COG4304) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.40 25.20 23.40 24.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.82 0.71 -4.53 10 135 2009-01-15 18:05:59 2007-07-30 16:38:36 4 1 131 0 18 82 1 158.70 35 97.50 CHANGED Mppsh...........hhtpthchsW+slhhGhppph.....lupcsVspaAhchlshuspspp...ElhLtlstchsspclsplLssLss........p.ppphptsh+KWh.alhLshlacspp-hsDsLcclEcIYADFsYPE-ltpFlpYMPscs...s..hs.ppNpcRlhspaccaLcp-puc ...................................................l........-hpsp+l+LSWcDIh...WGYpp....Kh.........luassls..saA.ch...hohu-pspt.....hcL.uh.ts+.sh.Elp.lL--Lus.............cpcsho.cpWL.all...Ls...cl.F.pp..+..cpap.-PLtcVEcIYsDFDYPE-I-SFVpYMPspDt..h.PstaohcENhtRLascWccYLssts..t...... 0 4 12 16 +9837 PF10005 DUF2248 Uncharacterized protein conserved in bacteria (DUF2248) COGs, Finn RD, Sammut SJ anon COGs (COG4307) Family Members of this family of hypothetical bacterial proteins have no known function. 22.50 22.50 22.70 22.70 22.20 22.40 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.21 0.70 -5.52 49 377 2009-01-15 18:05:59 2007-07-30 16:38:49 4 1 332 0 127 355 70 318.00 43 91.29 CHANGED pCssCup.laF-NstChpCustLGasP..pptplhslps.....ssps.....ap..................................................................................tha+hCsN.tshssCNWhlsscss...pshChuCchs+slPDLup...spNhttWpclEtAKRRLlYsLh+LGL.....Plhs+.....p..DsttGLsF-FLu-.....sssts.VhTGHssGlITlNluEADDucREphRpphsEPYRTLLGHFRHElGHYY...WspLlt.ss..shLttFRslFGD.EctDYutALppHYpsGP.P.ssWpppalSuYAouHPWEDWAETWAHYLHlhDsL-TAtuaGlplps...p.......th.........hshcPhpss.........shpp.llstWlPLohulNulNRSMGpsDhYPFVLsssVlcKLpFlHpll ...................................pC.pCsp...lhFENstChpCustLGa..pphtlhslts.....stts.......h....................................................................................t...........t.hphCsN.tthstCNWll..s.......ssss............ssh....ChuCphscphPs.ss...sts.htpWt+hEsAKRRLlhpLhcLtL.Plhs+....pp..DsptGLuFchLu....................sstpt.VhTGHssGlITlsluEuDDscREphR.hp.MsEPYRTLLGHFRHElGHY..Y.aspLlt....ss....s....hLptFR.plFGD.-ctDYstALp+HYpp.GsP..ssWp-sal.S.u.YAThHPaEDWAETaAHY.LHIhDsL-TAtuhGlsht................shp..........hshsshtss....sFpp.llp.tWlPLohulNplNRSMGpsDhYPFVLsssVlcKLcFlHpll.................................................. 0 35 76 101 +9838 PF10006 DUF2249 Uncharacterized conserved protein (DUF2249) COGs, Finn RD, Sammut SJ anon COGs (COG4309) Family Members of this family of hypothetical bacterial proteins have no known function. 24.40 24.40 24.40 24.40 24.30 24.00 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.25 0.72 -4.40 163 821 2012-10-01 20:42:06 2007-07-30 16:39:03 4 21 465 0 266 571 35 68.20 30 60.34 CHANGED plDlRsl..sstp+astIhsshssLtsGcs....hhllsc+-PhPLhtplptc......uh.saphhppssst.aclplp+ ..........LDlRsl..sshp.astIhushssLpsGps....hhlls..s+..cPhP..L.htpLcpc.........Gt.paphhppusst...Wclpls.................................... 0 76 190 239 +9839 PF10007 DUF2250 Uncharacterized protein conserved in archaea (DUF2250) COGs, Finn RD, Sammut SJ anon COGs (COG4344) Family Members of this family of hypothetical archaeal proteins have no known function. 24.40 24.40 24.40 24.40 24.30 24.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.90 0.72 -4.08 18 100 2012-10-04 14:01:12 2007-07-30 16:39:16 4 4 63 0 53 98 6 90.80 35 76.76 CHANGED hcplhpc.hhLplLpHLccuslDYuKslschoclPLpcVpchLccLcchGLlE+ss.usolKpo-sKhKhupEVH+HHTYYpLoRcG-hlLRcl ...................h.....c.htlpILtaL.cc.h.s.sDY.u+h..lA+.....p.....h........chsLp-lpchlccLEchGLlERsp.up.......hl.Kps.....ct.......+h..K........pp.Es++...HHs....YYcLoRcG-hllRp................ 0 15 32 47 +9840 PF10008 DUF2251 Uncharacterized protein conserved in bacteria (DUF2251) COGs, Finn RD, Sammut SJ anon COGs (COG4316) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 33.70 41.00 22.80 16.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.21 0.72 -3.87 9 172 2009-09-10 23:17:17 2007-07-30 16:40:18 4 1 172 0 20 82 1 95.30 56 69.74 CHANGED hltupS..ps+huVVFEDDG-TGYFYALDhp.p.tpPIlDuLalYNVpslo..stctPpclpItWS-DGhpAhLlINGYPHAlFDFsphhGYs+otaP.P ........h..topu..cthhusVFEDDGpTGYFYAhD.p.....p......tssIlDALHIYNVE.DlS..Dt..H..I........PscVcIsWsEsuphssLLINGYPHAsFDFsppsGYCRsGFP.P......... 0 6 14 19 +9841 PF10009 DUF2252 Uncharacterized protein conserved in bacteria (DUF2252) COGs, Finn RD, Sammut SJ anon COGs (COG4320) Family This domain, found in various hypothetical bacterial proteins, has no known function. 22.30 22.30 22.40 22.40 21.70 22.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.18 0.70 -5.56 62 553 2012-10-02 22:05:25 2007-07-30 16:45:36 4 4 397 0 238 582 38 375.30 28 85.90 CHANGED R....hspLlshRat+MusSPFuFaRGoAtlhtpDLss........ssssuhps.lCGDsHltNFGhau...ss-tpllFDlNDFDEshhGPapWDL+RLusSlslAucpp.uh.....................scppspphlpshspuYpcphpphuphshhphh..................hpspsspthlpchhc+App+.sptphhs+hophs........sthRhhp.psstlhtls................pstpptlppthppYhpols..sscp.hhsp....aplpDlAh+l.sGlGSlGhpsahlLlpuptps...c.sl..lLplKcAptSslstah.....tt.....tppGcRVVtuQRhhQusuDhaLGasphss..............+...s......FhlRplpshKsslsh..cp...h....st..cphtpauchhGtsLA+AHA+.us...css................hlsshhupscphs..pslscaAhpYAcQscpDattahcs .................................................................spLlslRat+Mus..SPFuFaR..GoAtlhtt.DLsp.............................tsssuhtl.lsGDsHltNFGhas...ssctpllFDlNDFDEshhGPapWDl+RLusSlslAucpp.uh...................sppptpphlpshspuYcpphpphsphsh.phh.........................................................htsppspthlpchlp.+Appc..s......ptphhs+hTphs.t...........sth+hh...t..psshhhtls.........................psptt.tltshh..p.p..Y.h...po..l.......tst...th.h..hsp....aplhDlsh+l.lGlGSlGhpsahlLlpupsss.....s.sl......lLplKEAt.s..ushs.ah...............t...............tppGcRVVtuQRhhQssuDshLGas.p..hss...............+.s......................ahVRphpsh+tslch..sp.....l......s.....pphtthuphhutsLApAHA+.us....sss...................hlssh.h..u..p..s.cphc....pslspaAhtYuspsctDattahp................................................... 0 67 139 199 +9843 PF10011 DUF2254 Predicted membrane protein (DUF2254) COGs, Finn RD, Sammut SJ anon COGs (COG4325) Family Members of this family of bacterial proteins comprises various hypothetical and putative membrane proteins. Their exact function, has not, as yet, been defined. 22.30 22.30 22.50 22.70 22.00 22.20 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.94 0.70 -5.99 89 512 2012-10-02 20:27:15 2007-07-30 16:46:06 4 3 472 0 161 484 58 341.10 25 80.16 CHANGED lppplWhhPslhulhullhuhlshhl-t....hhs..........hshhhtsss-uscslLoslAuohluVsshohSlhlsulstASuphoPRll.phhhpDpssQhsLusFlGoFlYuLlsLhslcssh.t................sspls.h.hhslllsllslhsLlhalcHlsphhplssslppltppsppslpphhpp.thss....sspsth.ts.......................ltutpsGYlQtI-hpsLtchApc..psshltltstsGsFVhtupslshh.......................pth..sccptcpltps....hslGppRohpQDspFGlppLsEIAhRALSPGlNDPsTAlsslspLsplLsp....................h.sshhtspps...sRlhl.shs.h..pcllcpuFstlpp.uussht.....Vhh+lhpsLtpl ...........................h..pplWhhsshhslh.ulhhuhlh.hh.hct.hhs............thhhps.ss-sscslLssluuohlslsshshSlhlsuhs.AouphoPRhh.phhl.c.Dp.ssQssLuhFluoFlaullslhslptst.st..............hspls..hhhslhlhhls..lhsllhalp.+lsp.hplsphhsplpptsh.p.s.lpphhtp.s..phs...s..............s.t..t.......t.ths.....................................lhutpsG....YlptlchspLtp....hs.pp......pph.plhlhstsGsalh.utslhhl...........................................tt.s....p.p..h..pp....lhps....hhlutpRohpQD.pFulp.lsEIAhRALSPulNDPsTAlpslsplsplLsh...............hht.t.sp.hhh..ttt................plhl...hs.h..pchlcssFs.ltp.uuspht.....VhhplhpsLtt.............................................. 0 56 107 135 +9844 PF10012 DUF2255 Uncharacterized protein conserved in bacteria (DUF2255) COGs, Finn RD, Sammut SJ anon COGs (COG4334) Family Members of this family of hypothetical bacterial proteins have no known function. 29.70 29.70 29.90 29.80 29.00 29.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.41 0.71 -4.13 14 98 2009-01-15 18:05:59 2007-07-30 16:46:22 4 3 93 0 40 99 4 111.40 34 86.82 CHANGED --Lc+IspA-DL+IAPaRcDGpThuTPTWIWsVhV-scLaVRuYpGpsScWapuAlsQ+AG+IpAuGhst-VsF-sl...Dssls-pIDsAYRsKYup.StYlsPMls.tcARuATl+lh ....................-Lsplspu--lpluPhctDG.ThtpsshIWsVhlsscLYVRuhpGpp.SpWYpuAhsppsG+IpsuGhphpVsFtss....DttlpsplDpAYRpKYut..s.hlss.Mls.ttsRsuTl+l.h............. 0 16 31 34 +9845 PF10013 DUF2256 Uncharacterized protein conserved in bacteria (DUF2256) COGs, Finn RD, Sammut SJ anon COGs (COG4338) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 25.20 26.80 24.60 21.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.61 0.72 -4.20 71 341 2012-10-03 05:12:49 2007-07-30 16:46:39 4 2 336 0 116 321 140 41.70 59 69.36 CHANGED t+KspLPp.KhCslCtRPFsWRKKWp+sW-pV+YCS-RCR..+p+ ......+KspLPp.KlCsVCtRPFsWRKKWt..+sWD-V+YCSERCRRp+..... 0 42 83 101 +9846 PF10014 2OG-Fe_Oxy_2 DUF2257; BsmA; 2OG-Fe dioxygenase COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4340) Family This family contains 2-oxoglutarate (2OG) and Fe-dependent dioxygenases. It includes L-isoleucine dioxygenase (IDO) [1]. 23.10 23.10 23.40 23.30 22.30 22.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.20 0.71 -4.92 113 432 2012-10-10 13:59:34 2007-07-30 16:46:58 4 4 397 2 130 367 28 193.40 28 79.36 CHANGED htssass.LshDpahsst...aRhRRaupathpsss......lhth.......scpsahQssp..aNth..pGul.RpFpsls..sshhpssshppllphhhphhsthp...........pshplplHQhRlpsssst...upsoPEGlHpDGhDal.hlhhlsR.pNl...pGGpopl...........asssppthhphpltcsh-slllsD..pclhHtlTPlpshsssp.......uaRDlLlloa ..........................................................................................tssasp.LshDsahsst........aRhRRYuphthpssp..........lh.h.........scpsahQosp....hNth..pGslhRpFpslp..sshlpssshppllthhhphsshss............shplplHQhRltAs.sp....u.ssPEGlHpDGhDal.slhhlsR.pNl.....tGG-shl...............hps.s.p.ptshhhplhcsh-shllsD..pclhHssoPlpshsssp.......uaRDlhVlT............ 0 28 70 103 +9847 PF10015 DUF2258 Uncharacterized protein conserved in archaea (DUF2258) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4345) Family Members of this family of hypothetical bacterial archaeal have no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 25.00 25.00 61.80 61.80 22.80 18.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.44 0.72 -4.17 10 57 2009-01-15 18:05:59 2007-07-30 16:47:08 4 1 53 0 39 54 0 76.90 44 43.67 CHANGED ELsTGlVIAuuYADKLRRVlFAsl.....pstlssc-llRssuELN+pLF-hLVpchclsKhDVVRIpV-..Yslc-uKlha .ELsTGllIAutYADKlRRshaAth.....pphlsp-pllRssuELN+tLYcpll.chplsKhDVVRIsV-..aclcss+lha....... 0 9 18 27 +9848 PF10016 DUF2259 Predicted secreted protein (DUF2259) COGs, Finn RD, Sammut SJ anon COGs (COG5497) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.70 26.20 19.10 24.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.14 0.71 -4.59 13 87 2009-01-15 18:05:59 2007-07-30 16:47:21 4 2 83 0 35 83 3 188.10 26 82.36 CHANGED AFtpaGlpDGSGFPYAslahlDstpspFl.sushRsphsc-su.......sLsthhpcspppspshh.stthpscsshhss.sshoEh.........oucsaplhlhs+ss.sshctshpLplpp...hshs.st.htsht.cshGFpL..htt.suspsphlpts+ulP.uRsCshsYcIcpVhl...psspsshlhlltscphGFEG.PstR..alslss+ .........hFtpYGhp-G...h.sYuslYhlDlspssFlpu.ushppphpccht.......sLh.p.hhpptphtspphthsth..hpup.hhhhsp.s...lsEh.........................suspaplhlshch..s...tthsssaplpLp.....................................phssushpc.tlpss+s.sh.RtsshsY+Icclhl....sspsshlhllchh.hs.pG.tshR..ahs.sh+........................................................................ 0 11 22 26 +9849 PF10017 Methyltransf_33 DUF2260; Histidine-specific methyltransferase, SAM-dependent COGs, Finn RD, Sammut SJ anon COGs (COG4301) Family The mycobacterial members of this family are expressed from part of the ergothioneine biosynthetic gene cluster. EGTD is the histidine methyltransferase that transfers three methyl groups to the alpha-amino moiety of histidine, in the first stage of the production of this histidine betaine derivative that carries a thiol group attached to the C2 atom of an imidazole ring [1]. 23.00 23.00 23.20 24.50 22.90 22.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.37 0.71 -4.25 314 818 2009-01-15 18:05:59 2007-07-30 16:47:50 4 20 692 0 412 836 262 124.80 34 32.13 CHANGED GlDLh....K-sshL.sAYsDutGVT.....AtFNhNl.Lp+lN+cLs...uc.............FchssFcHh......Aha..sspps...c.lEMaLhupcspsVpls......s........hsh..pFttGEpI+....TE.ShKash.....cphptlhpp.................AGhpstphWsDsps.hFulhl ...............................GhDLsKDsshLhtAYsDutG..VT.................AtFNhNl.Lpcl..NccLs..u-.............Fchcs.FpHh........Aha......ssppp...R.IEMaLhupcspsVpls.........t.........shthpFptGEplc....TE.ShKas......cphpthhpt.................A.GhphtphWoDsps.Fulh........................................ 0 120 255 343 +9850 PF10018 Med4 VDRIP; Vitamin-D-receptor interacting Mediator subunit 4 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4552) Family Members of this family function as part of the Mediator (Med) complex, which links DNA-bound transcriptional regulators and the general transcription machinery, particularly the RNA polymerase II enzyme. They play a role in basal transcription by mediating activation or repression according to the specific complement of transcriptional regulators bound to the promoter [1][2]. 25.50 25.50 25.60 26.10 25.20 25.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.32 0.71 -4.89 33 292 2009-01-15 18:05:59 2007-07-30 16:58:54 4 8 248 0 212 284 0 182.90 23 60.52 CHANGED hphhppLlst-cpLppslcpLpcapchppclppLcp-spplDppl+pllcpLtshcppLpshs......t.pp........................tshsptppppls......sc-LLcYA++ISKaops........................................................................................htPssah........t..h.Pa.PtE-phRtGhLuphshtss...t..h.pstpp.....................................ssstsptpsptttpppss ................................................t.thhphLlptDpplpptlch.........h.tpptph.ppchppLcp-scphD.p....c.lppl.cpLpcscp.Lssss.......sppch..........................................pshtpspcttl..s......sc-llcYA+R.ISttsts....................................................................................................................tP.sah...........t..hhPa.Ps-.phRtGhLuphp...st.th.............................................................................ttssttthttt.............................................................................. 0 60 104 170 +9852 PF10020 DUF2262 Uncharacterized protein conserved in bacteria (DUF2262) COGs, Finn RD, Sammut SJ anon COGs (COG4296) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.60 25.60 25.60 26.20 24.70 25.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.99 0.71 -4.01 50 250 2009-09-11 11:06:32 2007-07-30 17:17:49 4 5 177 0 44 249 1 141.30 24 59.29 CHANGED p-phlGsFphs+pht..ta-uphphh.spp.lplph...............t.stpccpphpp...slp....thcc...................l.......hpphcchccpl+phsucc..ll-lAN-.Whps..................s-p................................loc-cFhpplpLs...slslpscu.........shphaasDs....DhFhGHslhVpsshstshp..susl .......................................................hGphhhs+php...hapsphphh...spp.lpl.h..................stppppthpp...hh.pthcc...........................hhtphcpac.pch+phhucc...LlchAN-.Whc.p..................c-pt...............................lTcEpFtpplp..lp...slslptss.........shshaacDs....DlFhGHsIhlshshctslhsApl.................. 0 24 33 34 +9853 PF10021 DUF2263 Uncharacterized protein conserved in bacteria (DUF2263) COGs, Finn RD, Sammut SJ anon COGs (COG4295) Family This domain, found in various hypothetical bacterial and eukaryotic proteins, has no known function. 19.30 19.30 20.80 19.80 19.20 18.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.92 0.71 -4.21 20 353 2009-01-15 18:05:59 2007-07-30 17:18:14 4 11 248 4 211 358 19 139.00 26 40.87 CHANGED ApETlsll...........ssGhYlsssustlslptphctu.hpsophasPs.h..............s..pTtlcVssssThsuA....ppLsp........ssppVslLNFASA+NPGGGalsGApAQEEsLsRsSuLYhsLhp.....h...Ythp+tppshhYoDth...IYSPsVPlFR-c .............................................................................................................p.hh...t.........................................tt.splpVh.s.tsolpuA..................hpLtt........................tttpluVLNhASsppP..GGG.ahsG.At.A.QEEsLC+pSsLhssLpp......h...Y......p...t..........t..t.....hYs.th..................IYSPsVhVaRs....................................... 3 74 132 181 +9854 PF10022 DUF2264 Uncharacterized protein conserved in bacteria (DUF2264) COGs, Finn RD, Sammut SJ anon COGs (COG4289) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 30.60 26.80 24.40 23.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -11.98 0.70 -5.80 52 781 2009-01-15 18:05:59 2007-07-30 17:23:14 4 6 507 0 203 599 25 287.20 29 59.32 CHANGED psRpp.............hhpshpplhpslhshh.upspt+hsh.spossta....sppss..s.lEuauRsLhGlAPhLpt........tt........phh-hhhcultsGsDP...pss-YWshhpst.s...QplVEsAsluhuLhhu.ctlWpsLsptp+pplhpaLpphp.phphspsNWhhFplhlphsLpph.Ghth-p...tlcpslpclcp.aYl...........GDGWYsDGs.........................thphDYYs.uauhHhhhlhhsphhscpc.t.........hhpphppRtpcFuppa.+hhus-Gsh.saGRSlsYRhAshuhhuthuhtph....tsl.s.Gpl+shhppsl+hahsps.shasps......GhLolGashsp.thsEsYsusGSsYWuh+uF.lsLuLPtscsFWoutppshs .....................................t.sht.h.thh.phhtsl...h.s.tt.....thph.stttshh............sptts..p.hEuhsRshhshushht.......................................hhphhhpulhtGsc.P...pp.s.pY.h....s...ht.sh................QhhVEhu.luhsLhhs..p.hWp.Lstppppplhpah.t.p.hp.php...hs..sNWhhFplhlphhLpph.Gh.hst..............hp.thth.hc.p..aYh...........GsGWatDGs.........................th..thDYYs.uashp.h.h..hhphh.ct.t..................hthhhpRht.astt...h.hhhs.p.....G.th...hGRShsYRhA.ht.huthsht........th........h.s.G.h+..thhtttl+hahpp...thhstp......GhLolGa.h.....h...p.h...hu-sYsusGSsYWuh+sF.lsLulPtscsaWps.tpsh.................... 0 61 135 180 +9855 PF10023 DUF2265 Predicted aminopeptidase (DUF2265) COGs, Finn RD, Sammut SJ anon COGs (COG4324) Family Members of this family of bacterial proteins comprise various hypothetical proteins and putative aminopeptidases. Their exact function, has not, as yet, been defined. 25.00 25.00 26.30 25.30 21.60 21.40 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.99 0.70 -5.30 26 162 2009-01-15 18:05:59 2007-07-30 17:23:26 4 1 160 0 64 186 97 334.50 43 92.38 CHANGED hhhlhhshhLuGCss.......luYYhQuspGplplMpttcPlcchlsDs..sssstL+pRLthupphRpFAsp-LtLPDNsSYRpYA-LpRPaVVWNVhAsPchSLp.+pWCFPlsGCVuYRGYFspssA+stAspLpppGhDstVtGVsAYSTLG.....WF....sDPLLsTFlt.as-s-LApLlFHELAHQslYlpsDTsFNESFATsVEp.GlcpWL...A............ppusssscspatthcpRRpQFpuLlhsTRpcLptLYtsshsssptptt....KtthhpphRpcYupL+sp.W..................uGhuuYDtWhspslNNAcLushusYsphVPAFpuLacpsstc............WsRFYsuVcpL..upLPts-RctsLp ...............h..hhhshhLuGCss.......luYYhQ.spGphpLlps+cPlsclluDP.....spsstL+p+L..tpupphRpFAoccLtLPDNpSYRhYA-LsRPaVVWNVhAsPEhSLpshpaCFPlsGCVuYRGYaspusA+upAstL+tpGhDVhluGV.AYSTLG.....WF....sDPlLSohlt.as-...t...cLAsLIFHELAHQphYlcs....................DTtFNESaAohVEppGscpWh....t...............tp..u..t.s..stpsph..pp...tRRpQFttLlLsoRpRLcsLYss.s..l..s..sstp...Rst....KsthFppLRpcYtpL+sp..W......................uG...ptaD.t.Whsp.P..h...NNAcLhshuhYcpaVPAFtsLF.cps.sGD............WspFYstVcpL..up..LPh.tpRptsL................... 0 16 35 51 +9857 PF10025 DUF2267 Uncharacterized conserved protein (DUF2267) COGs, Finn RD, Sammut SJ anon COGs (COG5502) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.20 25.20 25.20 28.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.64 0.71 -4.06 135 355 2009-01-15 18:05:59 2007-07-30 17:24:29 4 7 195 1 178 362 14 123.70 24 78.49 CHANGED chp.palpclppcssh.ss............pppAhpsscuVLpsLtcRLsscputcLAupL..............Ph.lpshhhp........stttssp.........hstccFlp+lup.h...............pt..spt........sucpsspAVhsslpctlstsch.cclh.spLP.....p.shcpLa .....................hppalpplppcssh..s............cppAhpsscuVLpsLt-RLssp.......pstcLuupL..............P..lpshhhp..............sttsstp..........hshccFlp+lup..h................ss....sst........ssctsspAVhsslpctlstsph.cclhspLP.....p.shptLa................... 0 41 114 159 +9858 PF10026 DUF2268 Predicted Zn-dependent protease (DUF2268) COGs, Finn RD, Sammut SJ anon COGs (COG5504) Family This domain, found in various hypothetical bacterial proteins, as well as predicted zinc dependent proteases, has no known function. 23.60 23.60 23.60 23.60 23.50 23.40 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.09 0.71 -4.93 29 528 2012-10-03 04:41:15 2007-07-30 17:24:48 4 1 428 0 84 353 10 181.50 33 68.64 CHANGED happhpcphpphtphapts.slslh.......lh.sstpp.hht.phGhpussuhsshlhlh..lssshsps.....clpullAHEaHHssRhphhphs.tshTLh-sllhEGLAEpastphaGcphhusWss.hspppLpphhpphlcpphchp...shtphsshLaGsthut........hPthhGYulGYplVppaLppsst.oltchshhsucpIlc .........................................................................apphpp.hchhhs.hpt..sls.........lLl.uss.tp...hh.........p.h.sht.ussu.hsshlhlh...lssp.ols........phpullAHEhpHshRhp..h...............l..chp.....................s..hoLh-hllhEG....LAEpaspplaGc....tt.h..uPWso....ths..............p......ph.............hhcp...hltpplplp...thhchtsaL..a.G.....hsp..............hPph...hGYAhGY+lV+talppssh...sh.th.hsls.ApcIl.................................... 0 26 58 68 +9859 PF10027 DUF2269 Predicted integral membrane protein (DUF2269) COGs, Finn RD, Sammut SJ anon COGs (COG5528) Family Members of this family of bacterial hypothetical integral membrane proteins have no known function. 27.80 27.80 27.80 27.80 27.70 27.70 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.06 0.71 -4.43 56 382 2012-10-01 21:57:53 2007-07-30 17:27:15 4 1 316 0 152 427 95 142.60 26 94.20 CHANGED hlLKhLHlluuslLhGoGhGhAaahhhAp................................cotcstslAtss+hVVhuDal..FTssusllQPlTGhhLsals..GasL..spsWlhhSlsLYlluGshWLPVlh.lQh+hp...chApsAspssts.ls.tYhphh+hWhhhGhPAFh.uhlsIhaLMVsKP ...........h.hlchlHlluuslllGsshsh...A.hhh.h.h..Ap................................+su..s.s...ss..hu.t..s...t+h.ls.hschh.....h..h..ss..s.slh.PloGhhlsthh.....Gh....sl....sp..s..Wl....lhSl.sLasl.u.u.h.h.Wlsllh..hphcht..........chAtt..utt..tst.......ls.th.h..phhp..hhhhhuh....uh.h...shlslhhLMlhKP.............................................. 0 36 81 113 +9860 PF10028 DUF2270 Predicted integral membrane protein (DUF2270) COGs, Finn RD, Sammut SJ anon COGs (COG5530) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 40.70 68.60 22.80 21.10 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.06 0.71 -4.81 19 87 2009-01-15 18:05:59 2007-07-30 17:27:43 4 1 68 0 51 93 11 180.30 40 77.60 CHANGED uHLYRGElsRussWRoRLDpTTNWAVsssussLShuFuospus.hslLlshlhlhhFLhlEARRYRaa-laRuRlRhhEpsaaAshLsstths.css..WpphLApDhp+Ppa+lShhcAluRRLRRNYhaIhhlLhlAWhsKlslHPh.........stShsphlppAulGPlPGhhllsssshahhshlslul .hHhYRGElsRhssWRsRLDpTTNWAlsshAAhLShuhSossssHhsLLhuhhllhlFLhlEARRYRaaDlaRuRVRhlEcsaaAthLss.tss.sss..WpphLup-hcpPphplohh-AluRRLRRsYhalhhlLLlAWlhKls..s............stshsphhpsAulG.slPGhhVhsslshaYsshlsls.h.... 0 9 29 42 +9861 PF10029 DUF2271 Predicted periplasmic protein (DUF2271) COGs, Finn RD, Sammut SJ anon COGs (COG3656) Domain This domain, found in various hypothetical bacterial proteins and misannotated lysozyme proteins, it has no known function. 27.30 27.30 27.50 30.60 27.10 27.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.78 0.71 -4.68 62 357 2012-10-03 16:25:20 2007-07-30 17:27:59 4 5 342 0 130 295 47 138.00 31 75.56 CHANGED Asshpl..slplsplsstpactPY.....VAlWlpsu.csphspTLtVWhpps........+Wh+-.lRpW..WR....csucptp..lDGloGAT+ss.Gphthshsssts.hssLhsGpYplhlEsuRE.sGscph.....hclshsl.....spststphpGpsElusl ...................thslslpl.pLssh.thctsY.....VAlalpcs.puphstoLhlhstps.................KWhcc...L+pW...aR....tsu..ts..sp...lD...GlTGAThsu.Gc...shch.shchscsLhsusYplhlEuAhE.ctscph.........sclshsL.....ssputshpspGpp.lus.............................................................. 0 37 87 110 +9862 PF10030 DUF2272 Uncharacterized protein conserved in bacteria (DUF2272) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4322) Domain Members of this family of hypothetical bacterial proteins have no known function. However, given its similarity to the CHAP domain it seems likely that this is an enzyme involved in cleaving peptidoglycan. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.56 0.71 -4.82 13 136 2012-10-10 12:56:15 2007-07-30 17:28:21 4 5 128 0 47 156 22 179.70 29 62.05 CHANGED EssshuhpplutYW...t-ussscshhsGs.su.....................asWSAAFISWVMRpAGlss.pF.hussHusYlssAh...psGhsshshhth-PssttPcPGDLlCssRGRsphlsasuhhssss......asuHCDIVVu....sDs..pplpsIGGNVpsSVuMcplsLscuGpLsss...............................sss+hsWhllL+s ....................................................t.hhhthsttYW......p.shht..hst.......h..h..s.....................sWSAAFISWVM+pA....G..lss...Fshu.ss.H.hpYlpsAh.......pss....p......sh.hhh-s..ss.htP.psGDLlChsRups....h...t.h.ss.h.tshs...........hstHCslVVu....scs...cplphIGGNV..t.soVs....hpphslspt...Gpl....................................shh.WhhlLc............................................. 0 8 32 40 +9863 PF10031 DUF2273 Small integral membrane protein (DUF2273) COGs, Finn RD, Sammut SJ anon COGs (COG5547) Family Members of this family of hypothetical bacterial proteins have no known function. 25.40 25.40 25.50 25.80 25.30 25.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.10 0.72 -4.35 33 978 2009-01-15 18:05:59 2007-07-31 08:34:00 4 1 886 0 111 282 4 49.80 39 73.55 CHANGED p-hhppaph.llGullGhllAllhlohGFaKslllllhsslGhhlGhhl.cp ...........phhcpa+h.IIGuLlGLllAlLhlolGFaKTlllllLshlGlhlGhhl-...... 0 47 72 97 +9864 PF10032 Pho88 Phosphate transport (Pho88) KOGs, Finn RD, Sammut SJ anon KOGs (KOG4554) Family Members of this family of proteins are involved in regulating inorganic phosphate transport, as well as telomere length regulation and maintenance [1][2][3][4]. 25.00 25.00 25.60 33.50 21.50 24.50 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.15 0.71 -5.08 27 200 2009-01-15 18:05:59 2007-07-31 08:46:54 4 5 167 0 148 182 2 178.70 39 92.04 CHANGED MsPtloNlhlhLlhMQlu++..lsh-D.PpllhhlRhhYlsspslhhslYhasph+IspKN.....DhTsLKYV-PusPhuutt.....cs+hssTTV+-YDlpplpph.h+uhhhGluMMuFMHlYhKYTNPLlhQSI.slKuAhEuNlV+IHlaGpPApG-..L+RPF...Ktssuhhuuhtt..upspoDKpol-sAEpsstu..GsKs- ......................hsPtlpNlhlhLshMQlu++..lsh-D.PsllhhlRshYlsopllhhslYhYlptpIspKp...........Dh.TsL...K.YVEPs.sh..uupp...............E.+hlsTT...Vp-YDhpplcph.l+u.hhGluM.MuhMHLYhKYTNPLllQSIhslKuAhEuNlVKIH.laGpPApGD..LcRPF...K..ts.suhhutht....t...stspoDKtul-tAEcshtuGhK............................................... 0 57 95 131 +9865 PF10033 ATG13 Autophagy-related protein 13 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4573) Family Members of this family of phosphoproteins are involved in cytoplasm to vacuole transport (Cvt), and more specifically in Cvt vesicle formation. They are probably involved in the switching machinery regulating the conversion between the Cvt pathway and autophagy. Finally, ATG13 is also required for glycogen storage [1][2][3]. 25.00 25.00 26.90 25.50 21.20 20.80 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.60 0.70 -4.94 38 337 2009-01-15 18:05:59 2007-07-31 08:57:08 4 5 247 0 227 334 3 203.60 28 31.14 CHANGED IppFahKuutlIlpSRl...............................hsspssst+sN+W...................FNlcl...p-psthp-pL+hW+s.tshtp...............hPPllIEsaLDhptLssspslhh.Dttsp.h.s.tu.......................sp..-llLERW.hl-hc.............sttttssstssphsplYKc....sllLFRSLashs+lLPAa+lp+ph..........ttt.......................slslthRlh.........sGp..h.stsp.sLopPl...hst.p...............phc...phpFsslpTshGpLplsVsYRssscF ..............................................................lp.FhhKssplIlpuRh.....................................................................t...s...spsss.s.psscW....................F..Nltl...c-h.st.h.pcph+thhs..tth.s............................hh.shslElhLcs......................................................................sctsphlLEpW.plchs..........................................pp..s.sp.phpsh.slY++...................hhlLhRSLhshsRlhPAa+lt+ct..............s.......................t.phslhaRlh...............up........s.hsp...................................shp....phphusltTshG.plslSlsYRhshtF......................................... 0 67 122 185 +9866 PF10034 Dpy19 DUF2211; Q-cell neuroblast polarisation KOGs, Finn RD, Sammut SJ anon KOGs (KOG4587) Family Dyp-19, formerly known as DUF2211, is a transmembrane domain family that is required to orient the neuroblast cells, QR and QL accurately on the anterior-posterior axis: QL and QR are born in the same anterior-posterior position, but polarise and migrate left-right asymmetrically, QL migrating towards the posterior and QR migrating towards the anterior. It is also required, with unc-40, to express mab-5 correctly in the Q cell descendants [1]. The Dpy-19 protein derives from the C. elegans DUMPY mutant, Swiss:P34413. 20.40 20.40 21.20 20.70 18.80 20.30 hmmbuild -o /dev/null HMM SEED 642 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.31 0.70 -6.35 12 344 2009-01-15 18:05:59 2007-07-31 08:58:36 4 6 92 0 200 352 2 453.40 30 90.58 CHANGED Alhsuhlah.altplaEN-paFSpluplEREhuFRTEhGLYYSYYKsll.cAPSFhcGlahlhpDshTEYPpsINsLpRFNlYPEVlLAhhYRsatthhphh..................................-PshFYltslFhLpGlhhshhahhuhhLS.GShLuGllsshhah..FN+s-sTRl.aT.PLREsFuaPFlhhQhhllThhL+p...ptspptphlhlhhoshh...FhLsWQFuQFllhTQlhuLFshashuhlsss...Khppllhhhh.......hShhlsalLhF....GNsMlLsShhhS.llulhslhtl.................ppp...hsthhh...clhhhllpshhhhshTlhLphhhpplL............shpDDtHlhshlcuKa..tsapsFcohlYsC.usEFshlptcs.h+lspThLlshhlhhhshhh.............hphhtsh...........................................................................hthlsppp.p.....................pcsp.t.csE................................................................................llYpslQhlsFshLAllIhRLKhhhTPahClhuuL.lCSppl............a.........lhppl+ht.............ssshhllshhhhphhsslppphshlsEass.sp.pELlpWIppsTp.sAVFAGuMsshAsVKLoTtRslVNHPHYEcsslRcRTchVYphYS++ssc-V+cpL.hphtssYhIl-tshCsp.Rs+s....GCsh.-lhDhcsscstsp.............s.hCpt..lttc..s+......P...aFspVFp.NppYpVhKl .....................................................................cpph.h.tc.uhYYsaac.hh....tu......sh.t..uh..lh.sp.o.........................................lNhlpph.lh.Elhhu....h.aphh.......t.h..............................................................pPh.FYl.hlahhtuhhh.hhahhu..........hhhS..so............h..............uGhlss..hhah..hN+h-sTRl.as.PLREsauhPahhhQhhhlThhl+........................t...h...........hh....h.h.hsshh...h.h.Wpaspahhhh.Qhhslahh..hsh......................ph..lhhh.h.......huhhhshhl.F.....N.hhL.o.h.s..h.hhhhhhh..........................................................................................p.............t..h.....ph.h......hh.h...hhhhhhslhlp.hhp.h.hh................htpp...+h..phlts+....h......htsFph.hh.hC.t.thth...th.....ph..po.lh.h.hhhhh..hhh.............h.hh..h...........................................................................................h.............................................st................................................................................................................hhaphhphhhhs.huh.h.th.Khhh..hshhChhuuh...ls.S.ph.................................................ht.ht............................h.hhhh..h.h..hph....sth.tph......htEa......s.sp..pLhpWIp.p..s.....t.s..............shuGs...hhusl+L.............s..................s.......hh.lsshPhYpptthht.Rs....h.Ythaupts.pplht.h..hthtspahllp..hC......t.............sCph....phhD.tp...s...t..tt..........................................hCt...h..t...........................F.hha..Nt.ahlhp.......................................... 1 55 69 126 +9867 PF10035 DUF2179 Uncharacterized protein conserved in bacteria (DUF2179) COGs, Finn RD, Sammut SJ anon COGs (COG4843) Family This domain, found in various hypothetical bacterial proteins, has no known function. 20.90 20.90 20.90 21.00 20.80 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.38 0.72 -4.36 189 5446 2009-09-10 22:22:21 2007-07-31 08:59:57 4 5 1792 2 766 3177 158 54.80 32 19.63 CHANGED RGlThl.pupGuYot....pc+plLhsVls+p-lhcL+pllpclDPpAFlsl.t-sp-VhG ...........RGlThl...pupGuYop......pc+pllhsVls.+.pEhscl+pllpplDPpAFlsl.t-spclhG............ 0 297 555 661 +9868 PF10036 RLL Putative carnitine deficiency-associated protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4380) Family This family of proteins conserved from nematodes to humans is of approximately 250 amino acids. It is purported to be carnitine deficiency-associated protein but this could not be confirmed. It carries a characteristic RLL sequence-motif. The function is unknown. 22.50 22.50 24.30 24.30 21.40 22.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.45 0.70 -4.95 15 168 2009-01-15 18:05:59 2007-07-31 09:07:27 4 4 123 0 110 159 3 220.70 42 95.44 CHANGED hh+.RtLssLpas.tssthshs-cc-FtsllhWLEDQKIRhYphEDRppLRplssuc...WscsapKYhpDlphPh.........clps+pptlsWLLshAl+L-YtDss...............ts.pstpch.pcpp+psp.pscpsp..sslshssscFptGlppLAstLsltt..............................Hs........DplVhLcuss+llpE+Lspcstscsslp...s.Ph..-p.tslGhss................tDssLccAApILRLLpIpsLRcLQocINEslVuVQslTAsP+TDs+LG.KVGR .......................................................................hc+KLpALsY....sshsh..p..DcpcFRshllWLEDQKIRtYpI....E-RssLRslpusc...WsphapK..YlpDlssPh............phpp+.p-tl-WLLuhAV+LEYsDss........................pphps....spptpss..sstspsspPl....sl.Dhs..s..s-F+sGVhsLAslLpIpp....................................Hs........Da..LlhLcA....lphl..lp-+Lstcu..lscssps.....ts.......h...shsh.c...c....t.lGFss............t.Dss.lspAApILRLLaIp-LR-LQTcINEsIVAVQslhA-PKTDp+LGKVGR.................................. 1 42 52 82 +9869 PF10037 MRP-S27 MRP_S27; Mitochondrial 28S ribosomal protein S27 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4570) Family Members of this family of small ribosomal proteins possess one of three conserved blocks of sequence found in proteins that stimulate the dissociation of guanine nucleotides from G-proteins, leaving open the possibility that MRP-S27 might be a functional partner of GTP-binding ribosomal proteins [1]. 25.90 25.90 25.90 26.10 25.80 25.60 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.47 0.70 -5.56 8 150 2012-10-11 20:01:01 2007-07-31 09:08:46 4 3 86 0 85 150 0 316.00 26 84.07 CHANGED MtsShlppuhhhu+hh........uu+RhLLSuAYssuchWEuRcpEPhpLA.s...LAphh-psa-RKLPV....SSLsluRFlDNIuSR--..lDpAEYYLYKFRHSPNCWaLRDWTlHoWIRQCLcasupD+uLYTLcN+VQYGIFPDsFTFNLLlDsaIKcG-aKsAsSVVpElMLQEuF-hPSTphLSLYu.Lhpa...........LAspPsLo.......hpEERslGASLLlsGhKQ-solGhSup.LhGhALLGKVEhppGl+AVa+sMPLhWusGYLs+ulplhEtlAos..sltLuc-sLclhpslLc.....sLou.sDussptp.pcs.-sttpp.....plsEc...-psEpuK.......LspYucpFpch+spLpsts+l-scul.s.s.thlpE+LsssEp.DlchYEp+lptWphEpc...............pLIQREccpREpA-pE.....atstpss+su ..................................................................................................................................................t....tt..h....s.hslsh...h........ls.hpsp-c..l-.sc.hla........+..............aRppspshhltshsh.p.shlR.hLchst.p-pAl.hLpsplpYGlFsDsaoaNlLhD.hlcctpa.csAhpVlh.clh.Q-sh.ps........sTphLulhs.ha+h......................t...........p..............p-.hp..htsthlhs.hhpcp.shhp.p.lhGhshh..hhsp..h..t...s.ttlhpt...s.hht.shhtch.pshphhtt.......t.t.ltp-...slphh....tthhp..........th.......t.....t.....t..p......p..tt.t.t............p..pp...pp...hct..h............l.p.hp.apt...hppphpthhp.p.pth.......h.pph.thpt...th.tpp...h..c......................h.h.....tcpp.......p...p..................t...................................................................................................................................................................... 0 20 25 52 +9870 PF10038 DUF2274 Protein of unknown function (DUF2274) COGs, Finn RD, Sammut SJ anon COGs (COG5639) Family Members of this family of hypothetical bacterial proteins have no known function. 23.80 23.80 24.00 25.20 21.60 23.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.00 0.72 -3.85 42 198 2009-01-15 18:05:59 2007-07-31 09:10:14 4 1 125 0 104 209 23 68.60 45 84.66 CHANGED Mo+LKLGsls.D-+PVKlTlELPAslHRDLsAYAclLuppsGps..scPs+LIsPMLpRFhAoDRuF..sKAR+ ..............sKL+LGsls.-ccPVKlTlplPAuL+....pDLstYAtlhup....p.hGps....s-sscLIs.MLcpFhAsDRuF..tKu+........ 0 15 61 84 +9871 PF10039 DUF2275 Predicted integral membrane protein (DUF2275) COGs, Finn RD, Sammut SJ anon COGs (COG5660) Family This domain, found in various hypothetical bacterial proteins and in the RNA polymerase sigma factor, has no known function. 24.60 24.60 25.20 26.40 22.60 24.50 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.27 0.70 -4.44 7 40 2009-01-15 18:05:59 2007-07-31 09:10:48 4 4 40 0 13 40 0 204.20 22 55.95 CHANGED .cpt..sDhhcphhu..tc.s..p.p..hh.thahphh+thultulhlhhhAsshhs.h..........hplsphslsplt.cshhslpsp..............hp.thhsls.tuhut...hpssut.uppssssslcplps.shhththsssh....sh.lttshsspppsht.h.ththhssthtshsts..............hhhsuchccptttlhhut..................shs.slpc+ts.cph.t..........st ............h.......DLscthhu..t..s..sspt.thh.thh..p+.hh+thAlhslslhhlAsss.h..h...........hplsshshshloscshtshpspa.............hpcsst.lshhshu....ltsGsh.utssuhsplsthhuhshhshhlsssh..............shpls.tshltsc.sh.hs..Lt.........hshlsthh+shutT.................hh..tphpph..hh...s............................................hs...................................................................................................................... 0 4 9 12 +9872 PF10040 DUF2276 Uncharacterized conserved protein (DUF2276) COGs, Finn RD, Sammut SJ anon COGs (COG5551) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 25.00 25.00 24.90 24.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.53 0.71 -3.90 63 439 2012-10-01 21:23:39 2007-07-31 09:11:45 4 2 340 0 176 405 13 121.00 21 43.08 CHANGED lpFhTPTthp..............cspthh..hPps.ttl.hpSlhc+asshss.hth..................stphht.hhspslplhshp.l....+shphph.........pttthsGhsGpssapht.......tthhpthttLlphupahGlGppsuhGhGpsc ......................................................................lpFhTPsth+p....................puphh.h..hPss..thl.h...poL..hp+a...sshss.hth....................s.phht..thspplpltshc..l....cs....hthth...........pttphsGhhGphsaphp.............htphhthLLthupasGlGtpsuhGhGth............ 0 82 133 161 +9873 PF10041 DUF2277 Uncharacterized conserved protein (DUF2277) COGs, Finn RD, Sammut SJ anon COGs (COG5552) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 64.60 61.40 22.10 21.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.58 0.72 -4.02 22 232 2009-01-15 18:05:59 2007-07-31 09:12:33 4 2 229 0 100 209 12 79.60 55 87.30 CHANGED MCRNI+sLhshEPP.....ATc-EIcAAALQaVRKlSGhscPSpANptAF-pAVt-IutsopcL.LsuL.sps......PP+sRtt.tA+ ..MCRNIppLRs.hp.PP.....ATs-EIcAAALQYVRKVSGhp+PSs.ANpEAF-pAVs-VsssTp+L.LsuLss+t......PPhcR.t.tA+............... 0 31 70 87 +9874 PF10042 DUF2278 Uncharacterized conserved protein (DUF2278) COGs, Finn RD, Sammut SJ anon COGs (COG5634) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 27.50 28.80 19.10 22.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.45 0.70 -4.60 27 254 2009-01-15 18:05:59 2007-07-31 09:13:42 4 4 223 0 76 227 3 209.10 41 82.83 CHANGED Msl...psYGVhKup.hphth-tttp..tsPHhplhlcssss...........................spaRsAlNlcSss......t.sucllYhhspph.p+Plhpp.LssLs.Gap.Lp...........................ssssthuLDYlRss.Lhsspsh+sls.hDhs...................GssNDlhchL-shlppuhtp.........................ssplYlFGph.............F......................psu.sG....lHslHMNQGss..p.......caps-NGlaQDGulllch.......sD..........pWsulFlAFtoQshhT.Ds...pGc ..............................MsL...psYGVLKGpslp.pht.....pst...toPHYpV+lpsp.ss...........................s-aRl.AINVcSps........h.sS-ll.Yhssp...sh...cp.shph.L..s..sLs.GaTclp...............................sspstsALDYlRus.....LaDspphhsLP..h-ts...................GscNDLs-hl-phlcpAhp.p.........................cuhlYsFGcp.............F.................................pPG...NG....IHDIHMNQGNs..p.......+apsDNGlWQDGulLlca......p-..............tp.W..hulFLAFpSQuasT.D-.pGp............. 0 20 34 53 +9875 PF10043 DUF2279 Predicted periplasmic lipoprotein (DUF2279) COGs, Finn RD, Sammut SJ anon COGs (COG5544) Family This domain, found in various hypothetical bacterial proteins, has no known function. 28.50 28.50 28.50 28.60 28.40 28.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.01 0.72 -3.71 20 603 2009-01-15 18:05:59 2007-07-31 09:14:20 4 1 591 0 71 238 32 86.00 69 70.70 CHANGED usDsWsup...........D+ApHF........hsSAsLuAsusp.........tp.h.huhhhosulGhhKELaDoppuGSGaSapDLAhDhAGushGhslhpts ............A.NDuWSGQ...........DKAQHF........lASAMLSAAGNEYupH..QGhScDRSA.hFGLMFSlSLGASKELWDS.RP..EGSGWSWKDhAWDVAGAoTGYslWQh.s.......... 0 14 34 52 +9876 PF10044 Ret_tiss Retinal tissue protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4402) Family Rtp is a family of proteins of approximately 112 amino acids in length which is conserved from nematodes to humans. The proposed tertiary structure is of almost entirely alpha helix interrupted only by loops located at proline residues. Three sites in the protein sequence reveal two types of possible post-translation modification. A serine residue, at position 41, is a candidate for protein kinase C phosphorylation. Glycine residues at position 69 and 91 are probable sites for acetylation by covalent amide linkage of myristate via N-myristoyl transferase. Rtp is differentially expressed in the trout retina between parr and smolt developmental stages (smoltification). It is likely to be a house-keeping protein [1]. 21.70 21.70 21.90 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.21 0.72 -3.45 7 126 2009-01-15 18:05:59 2007-07-31 09:14:58 4 3 87 0 79 121 0 89.70 44 69.00 CHANGED LlShEpLDRuSP-lWPEphPGhsEFho.sts.h...pssPpahssLsc-DhshlpcLGsLosspLhtKlKpLps.AYQLGLcEA+EMTRGKaLsIhs ......................................LlShEpL..RsSP-lWPEp....h......PGls.-Fss.p.p.sshp..sssscahsc.l.c.....p.-Dl..c...hlpELusLTsssLh-Kl+sLpshAYQLGL-E............u..+EMTRGKaLsIhp....... 0 24 31 56 +9877 PF10045 DUF2280 Uncharacterized conserved protein (DUF2280) COGs, Finn RD, Sammut SJ anon COGs (COG5556) Family Members of this family of hypothetical bacterial proteins have no known function. 24.00 24.00 27.40 24.50 23.60 21.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.90 0.72 -4.11 15 135 2009-01-15 18:05:59 2007-07-31 09:15:31 4 3 102 0 25 104 1 99.90 48 65.70 CHANGED MAsLss-VKuFIVQALACFDTPSpVscAVKcEFGlcloRQQsEoaDPTKtAG+sLuc+WtsLFccTRc+Fhp-sucIPIAN+AaRLRsLsRhupKAEph+NhuL ..........MAsLpppVKhFIVQuLACFDTPopVAcAV+pEFGlcloRQQVEsaDPTKsuG+sL.u+Kah-LFppTRccFppc.ltsIPIAN+AYRL+tLpRhhpcscp.+sh..h........................... 0 2 10 20 +9878 PF10046 BLOC1_2 Biogenesis of lysosome-related organelles complex-1 subunit 2 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4559) Family Members of this family of proteins play a role in cellular proliferation, as well as in the biogenesis of specialized organelles of the endosomal-lysosomal system. 26.80 26.80 26.80 27.20 26.70 26.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.22 0.72 -3.86 17 169 2009-01-15 18:05:59 2007-07-31 09:16:01 4 4 142 0 123 176 6 95.60 35 58.97 CHANGED hpchhsshsphlpuphptospchpLLEphNpssst+YtchpphspsLpsphcpLptp.pphpsalppIcpI-pplspLEpssptLDtasppLEsKlpsl ...........pchFschupalpuELsuosp-Yc..LLEpMNchsuh+Yh-hcslutslspslpcLsp.Ka.tpL.pPalpQIstI-cpVspLEpsshcLDsYoppLEs.KhKp................. 0 40 60 98 +9879 PF10047 DUF2281 Protein of unknown function (DUF2281) COGs, Finn RD, Sammut SJ anon COGs (COG5559) Family Members of this family of hypothetical bacterial proteins have no known function. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.14 0.72 -3.68 19 250 2009-09-11 14:35:08 2007-07-31 09:17:16 4 5 143 0 89 275 17 56.90 21 71.76 CHANGED sppplhpplppLPpclhpElLDas-FLhpKt..............h.pptsppthhhshtGt..h..pp.pshs.sslElQ+chh .....................h..ttlhpplppL.PcphtpEVlDFl-FLhpKt.................t.t.................................................................................................. 0 26 63 83 +9880 PF10048 DUF2282 Predicted integral membrane protein (DUF2282) COGs, Finn RD, Sammut SJ anon COGs (COG5572) Family Members of this family of hypothetical bacterial proteins and putative signal peptide proteins have no known function. 22.40 22.40 24.00 23.30 22.20 19.40 hmmbuild --amino -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -9.04 0.72 -4.06 74 316 2009-01-15 18:05:59 2007-07-31 09:18:13 4 5 262 0 115 309 38 55.50 49 57.74 CHANGED pEKCY......GV.uhAGpNDCuA.GsGToCAGTSpsDhQGsAWphVstGTC..........ppl............ttGoLp ..........hEKCY......GV.AhAGpNDCAs.....usuooCAGoSphDhQusAWphVPtGTCspl...............stGoh.t...................... 1 22 60 95 +9881 PF10049 DUF2283 Protein of unknown function (DUF2283) COGs, Finn RD, Sammut SJ anon COGs (COG5428) Family Members of this family of hypothetical bacterial proteins have no known function. 20.80 20.80 20.80 21.40 20.60 20.40 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.33 0.72 -4.11 63 354 2009-01-15 18:05:59 2007-07-31 09:19:17 4 2 242 0 146 369 32 50.30 28 59.25 CHANGED h+lpYDt-sDsLYlpl..spssht.......-ot-lss........slllDhDppGcllGIElLpAo ..............plpYD.-sDsLYlpl......psspht...........-op-lss...............slllDh......D.ppGcll.GIElhssp............... 0 47 102 126 +9882 PF10050 DUF2284 Predicted metal-binding protein (DUF2284) COGs, Finn RD, Sammut SJ anon COGs (COG5423) Family Members of this family of metal-binding hypothetical bacterial proteins have no known function. 25.00 25.00 28.30 27.60 22.00 21.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.38 0.71 -4.65 37 229 2009-01-15 18:05:59 2007-07-31 09:19:58 4 5 162 0 103 212 12 159.70 24 78.21 CHANGED chthlpstclsl.pppsth+Cp..sCssYGpphsCPPtssshcEh+chlpcYcpAlLhphphsspp.tp.............................hhthppplpphhhclE+phhttGa.hAhshhsGsCp..hCp..p.......Cshpp..tt...........C+hPchsRsShEAhGl-lhphscps.sh.hphht.............pphshhuhlLl ........................h.stclhhptchhth.Cc...tCssYGpshuCP..Ph.ss.....ssc-htthlppYcpuhlhphp..h.hpst.............................t.hh.....ptp.hpphhhchEcp.h..h......t..p.u.a...shsh.hsG.sCp..hCp....c.......Cshtp..sts............CRaPccsRsSlEAhGlDltphscph.shph.hst...........phhshhuhlh.................................... 0 56 88 95 +9883 PF10051 DUF2286 Uncharacterized protein conserved in archaea (DUF2286) COGs, Finn RD, Sammut SJ anon COGs (COG5399) Family Members of this family of hypothetical archaeal proteins have no known function. 24.00 24.00 24.30 107.50 23.40 18.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.63 0.71 -4.54 12 37 2009-01-15 18:05:59 2007-07-31 09:20:22 4 1 37 0 21 27 0 143.70 41 98.34 CHANGED M..slVs+hccspVspc-VlcG-ls-lV+clAtchLc.EWsPpsSDFlllRDhhplphPhPL.p.ElhEcl..+paphp+scscs.lplPla.IsasspWh.tpsapsccshVVhPYlD-ptpcElhchshpshut.t............................cEEt.pc.E ...M.+llllKuEsGcVssccls-.G-lscVl+clAp-.ALc.EWN.hsSDFIIhRDs.ElclPLPL.pP-lYEtl..+pF....hht+scscA.hsclPVYhISa-NpWt.-ssapDc+laVVu.YIsD-hpcpllssAsphToppc............................pE..tc...-.p......................... 0 8 11 16 +9884 PF10052 DUF2288 Protein of unknown function (DUF2288) COGs, Finn RD, Sammut SJ anon COGs (COG5626) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 58.60 58.30 22.40 17.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.07 0.72 -4.27 36 217 2009-01-15 18:05:59 2007-07-31 09:20:50 4 1 217 \N 72 199 26 92.90 42 87.76 CHANGED ssL+scLtt-TuplsWp-LpsaaARGsllhVsssLDLl-VAhulApDspspVppWlssGplu+sosppApcW.hpcs.phWAVVVuPWVLVQ-c .....o.sLhs+LhuETApIsWs-Lp.FFA+GsLlhVstsLDLlpVAcAlApDDsppVspWLusGtlu+sssppAt-a.hscssp..LWAVVVuPWVLVQp.... 0 18 41 59 +9885 PF10053 DUF2290 Uncharacterized conserved protein (DUF2290) COGs, Finn RD, Sammut SJ anon COGs (COG5619) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.10 34.90 24.30 23.10 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.49 0.71 -4.99 8 37 2009-01-15 18:05:59 2007-07-31 09:21:39 4 1 36 0 11 36 0 185.10 31 87.28 CHANGED Lss-.Nhsshpp.s......ssscloasGtEcsulhht-hsYucl..YpulhcppuYshhLlDGuLlQhpYchcpspLlpHRLuYYPsPtLhs.............hps-s-lhhpDhLhh-hspctllshP....lRFDFDs..tttp...cssHPsSHlTlGsssuCRIPVsusLTPcpFlcFVlRNFYpoha+chlut.....hsstph-Fc....soIsspEpslhHls..hs .............................................N.s..pp.s......th..lshss.tp.....pshpYpcl..YptlhcppsYshhL.hDGullQh.Ychp..p..spllpHRLuaaPuP.hlps.............aps-s-hYhpDhlhh-.....Ihpcpl....ls..hP....lRFDaDs..stht.......chtHPpSHLTlGphpsCRIPVsuPlTPphFlpFlL+pFYpothcsh.st.....hsphp...pFp.....olh..EtthhHhs..s......................... 0 4 7 9 +9886 PF10054 DUF2291 Predicted periplasmic lipoprotein (DUF2291) COGs, Finn RD, Sammut SJ anon COGs (COG5618) Family Members of this family of hypothetical bacterial proteins have no known function. 20.10 20.10 32.50 32.50 18.90 18.20 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.21 0.70 -5.04 21 215 2009-01-15 18:05:59 2007-07-31 09:27:20 4 2 195 4 58 151 13 196.30 36 91.75 CHANGED sllhsAsslllLsthupcIl...h..up-ssutsus.tFsPcphusphW.sclhs.sppcAlshs-ltstlssstsuAsppYG...ht.ss..shhV+hoGsVsssc.....suthslclcGs.tpsslplQhGPslpGoulRDAsuhlpFs-F+NQI-asphupAlNspspspVlt.h.....scsslsG+plsVhGsasL..sssph...hlsPlplpl .....................................s.h.shssllllshsGtclh......p.s.sDss.s......hs..s....s..hsPs.shusshW.scVhs...plppcAlshsEltstl..su..st..s..uAscpaG....st.sss.shsV+hoG.sVsphc.....uuhhsl.c.l-.G....t.hslplQhGPslpGTsLRD.AsshIpFs-F+NQlpasphupAlNpchtppVh..h.....ssp.shsGcsVsVlGsFsl..sssp.......lTPlpLp... 1 10 24 39 +9887 PF10055 DUF2292 Uncharacterized small protein (DUF2292) COGs, Finn RD, Sammut SJ anon COGs (COG5583) Family Members of this family of hypothetical bacterial proteins have no known function. 20.00 20.00 20.10 20.30 19.70 19.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.40 0.72 -7.45 0.72 -4.62 23 311 2009-01-15 18:05:59 2007-07-31 09:28:01 4 2 292 0 79 190 5 37.40 43 67.35 CHANGED chlccIpchLcsl+aGSlTlslQDGpVlQI-+sEKlRL ..................l.ccIpshLpsh+....aGolpIsVpDGpVlQlE+sEKhRL........ 0 27 55 67 +9888 PF10056 DUF2293 Uncharacterized conserved protein (DUF2293) COGs, Finn RD, Sammut SJ anon COGs (COG5586) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 26.40 25.40 24.90 23.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.98 0.72 -4.01 25 287 2009-01-15 18:05:59 2007-07-31 09:29:46 4 1 242 0 124 205 3 84.60 39 24.91 CHANGED pAl+sLaPthPtpc......tcsIlpHAh.cu......utRVG+.sushs.tt.+VpLAVhAHlRHpaTcYD..pLLcsGhsRcpARptVhctspslLpcWt .........................t.ulppLhPthPtsc......tpsIhp+Ah.cu........................pt+lut..sushs...t..tVpLAVsAHlRHhaTcYD..pLL..c.s.G.hs+csARthVhptlpshLscWR............. 0 26 68 98 +9889 PF10057 DUF2294 Uncharacterized conserved protein (DUF2294) COGs, Finn RD, Sammut SJ anon COGs (COG5609) Family Members of this family of hypothetical bacterial proteins have no known function. 21.40 21.40 21.60 22.00 21.00 21.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.25 0.71 -10.06 0.71 -4.44 23 741 2009-09-11 07:39:01 2007-07-31 09:30:33 4 4 437 0 101 280 9 110.00 38 92.03 CHANGED Mppo.+stlEpEISculppacK-hlGRGstplKThIlcsMlllpLcGlLTssEptlspo...tcGhthlKpsRspLh-st..ppclpcllpclhGpcVhuhaoDloscTGEplhVFhL-.......pslEK ..............................hEtchschlpphcK-hhG+Gs.p.l.+osh.hcsMs.IssLpGlLTPsEhhlspT....p-Gh.hl+tsRo-.hlcps..ppphp.chlpclsGtKl..hshaoDlsspssEtl.lFhh-....cslE......................................... 0 46 70 88 +9890 PF10058 DUF2296 Predicted integral membrane metal-binding protein (DUF2296) COGs, Finn RD, Sammut SJ anon COGs (COG5415) Family This domain, found in various hypothetical bacterial and eukaryotic metal-binding proteins, has no known function. 21.70 21.70 21.70 21.70 21.50 21.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.81 0.72 -4.22 30 334 2009-01-15 18:05:59 2007-07-31 09:31:15 4 6 254 0 214 322 1 52.20 42 14.26 CHANGED ahDRllDhLlGsss.......psRhALICppCptHNGhu...pcchp.l.papCspCsshN ....................hDRll-hLlG-ss.......psRaALI.CppCttHNGhAh...pc-hphl.ta+CshCthhN........... 0 60 110 171 +9892 PF10060 DUF2298 Uncharacterized membrane protein (DUF2298) COGs, Finn RD, Sammut SJ anon COGs (COG5427) Family This domain, found in various hypothetical bacterial proteins, has no known function. 24.70 24.70 25.00 24.70 23.00 24.60 hmmbuild -o /dev/null HMM SEED 473 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.96 0.70 -5.65 18 117 2009-01-15 18:05:59 2007-07-31 09:33:29 4 9 68 0 64 119 65 412.70 21 56.82 CHANGED lllsassahluhl...hsa.......sllls...slhlhshuhhh...hh+pc........h........t.hlts-hlFhsuFhFhlhlRsapPtl.h.....Gs..EKFMDauahsulhRusshPPpDsWFAGtpls.YYYhGaLlsulhuhLoulssphuaNLAlAshhAhsssuhaGlutslspth..............h.hhhhsshhshhh........ushhshh..h.................hthhushhhttssshsa................................................aWsuSRsl.ss.....................IsEFPhFoalhGDlHuHhlul...sFhlLsluhshshatptsppp..............................thhhlh...hulsLGhlhssNo.WDaPlah...sLshhslhhh.tt.h.h...................ph..hhs.............t.ssllslluhllhlPFhls..hpstuhtt.....hshl..............ppohh.phlhla.GhFhhhhh..uaLhsplppt............hhhhh..h.hh.......hhshslhulhhPllshuhhthhppp.................sssFhsLLlhsuhsllllsEllYlp-s.hss...sRMNTVF..KFYhplWllh ..............................................hhshhhahhs.h.....................hhhs....hh.hlh.shsh.hh.....hhptt...............................h.......ht.hh.h-..hlF.h..hhFhhhlhlRh.....hs...Psh.h............usE+.hDhuah....pulh+ushhPP.D.WauGt.ls.YYY.hGahhhuhhshlouhssthuaNLslshhhuhhhhssaulsht..lhtth................................................hhshh.h.shhh.........sh..hh..h........................................h.....h.h..h.....h.........................................hW....soRsl...s...................................IsEFPhFoalhuDhHsHhhsh......shhlh.hhshhh.th..h.hp.tttt..............................h.hhhh...hulhhGh.h.hhhNs.Wshshhh...hlh..h.h.slh.h.h.h.p..h.............................................................shhhhhhuhl.lhhPFhht..h.s.s..tt...............l.thh......................pto.h.phlhla.Ghhlhhhh.......hh.hhtthht..........................................................................hh......h..........h.h.h.hh.hhh....hhhhh......hhhhh..................................................t.hh.hl..hhhhuhhlhhhsEhhal.p-...................RhNTlFKhhhpsWhlh........................... 0 23 51 54 +9893 PF10061 DUF2299 Uncharacterized conserved protein (DUF2299) COGs, Finn RD, Sammut SJ anon COGs (COG5440) Family Members of this family of hypothetical bacterial proteins have no known function. 22.10 22.10 22.20 22.40 19.30 21.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.67 0.71 -4.73 13 64 2009-01-15 18:05:59 2007-07-31 09:35:12 4 1 45 4 34 53 58 135.50 29 87.04 CHANGED cItsWhpELGhhlp+...sssusthFHlssoPPt.sssslsllRssscosaYlluhulslcspH.phl.thphccRpchltplph-Ll+h.sV-FhhhPPsp-.PpsIQlu+.latDG..LTKNchlssltpV+NuulhVl.hlp ..............................ltsWlpEhGhhspc...sssupthFHhssssPt..uGsslsVl+PpscschYlluhultlc.pH.ptl.thphc-Rt+...hltplph-LLch.sV-F.hhh....P..s.sp-.PpsIpVu+.la.tDG..LT...pNchlsslhpV+Nuslhll.hh............................ 0 8 19 29 +9894 PF10062 DUF2300 Predicted secreted protein (DUF2300) COGs, Finn RD, Sammut SJ anon COGs (COG5445) Family This domain, found in various bacterial hypothetical and putative signal peptide proteins, has no known function. 25.00 25.00 26.00 35.30 24.30 22.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.88 0.71 -4.43 13 657 2009-01-15 18:05:59 2007-07-31 09:35:38 4 4 320 0 34 314 12 129.20 50 47.26 CHANGED +uspssp......hhsspAstuhthDthLsp.....oluhahpPhthshsshshsphW.t+psPtWhtcLsshssappssshslsclhpshPhpDptRctlh........stslholtsRl.........slspphhahuFuuaPs ........GspspE.......hssQAGpu.phDphLsh.....ShuhahAPpp.phth.shuphW...t+....p.hPtWht.Lps.ps.s.pssVhulhc..lushPhps+..tQchLh...................Itsh.hLtsRl.........D.ppEhhphuFsGa.................................... 0 5 13 24 +9895 PF10063 DUF2301 Uncharacterized integral membrane protein (DUF2301) COGs, Finn RD, Sammut SJ anon COGs (COG5413) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.00 25.00 31.00 29.50 21.80 21.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.73 0.71 -3.96 38 200 2009-01-15 18:05:59 2007-07-31 09:36:44 4 1 188 0 59 175 0 123.50 38 62.52 CHANGED luLphIHIYl+sl+phLQhhWhlGsluh....hhh.h....t.h...lhspPh....hLulGshFsuLsGluFKEaFCFp+hEshhlshLlPlhlLupLhGlhshththuLlu..ulLhlllAlRKathPltsDIGDKSlFpa ................hshsslHIYl+sh+hllQhhhhlGllst........................lhs.hPh....LulGshFhsLsGlsFKEhFCFphhthph.shllshLhLualhshh.htthhullu.....ulLhllhAlpKaphPl+hDIGDKohaph......... 1 9 32 51 +9897 PF10065 DUF2303 Uncharacterized conserved protein (DUF2303) COGs, Finn RD, Sammut SJ anon COGs (COG5532) Family Members of this family of hypothetical bacterial proteins have no known function. 21.90 21.90 22.10 22.10 21.70 21.80 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.51 0.70 -5.53 14 390 2009-01-15 18:05:59 2007-07-31 09:38:46 4 2 323 0 29 229 15 251.50 51 98.85 CHANGED M......s..coAlptlttshhsusphpp...uh..shl..hlP-...uh+lpsLEpahs......p.sRh+GshpspshsuFlpYspcptp..-ssphF..Ist-....shsssulhshtsts......s...........sGas-apAshshptTstapshhshsGcthsQtchu-alED.tsslls........hsssu.......................uchlplspshpAs+phshcuusptpsspspFshpcss.psscupp...hphPstFthphsPapshsphplphRlphhh..tsup.slhhcllp.-tl.EphspEhtshlpcthpst........lhlGohp .....................................................p.D..usAlppIp.shloAtsssp.lsthts.ssl..sLP-....sacl..c..sL..E..+Fth......sRFRFRGshoTsSI-DFs+YS...KchAs........Eu.......oR...CF..IDAD..........sMcAsoVlNlGT.l.-.p.................PG...HADNsAoLcLK..+TAPFpALL.s.lNGc+.sQKsLAEWlEDWADaLlu...............aDus...................................G-slphocAsuAVRKITI-AspouDaE-sDFSGKRSlMESVEAKoKD..lMPsuFcF+ClPFEGLc.ER.s..FcLRLSllT....GDcPVLVLRIIQLEAVp.E-hAsEFRDLLlEKFc-scl.....ETFIGTFo............................ 0 7 14 21 +9898 PF10066 DUF2304 Uncharacterized conserved protein (DUF2304) COGs, Finn RD, Sammut SJ anon COGs (COG2456) Family Members of this family of hypothetical archaeal proteins have no known function. 23.00 23.00 23.00 25.20 22.60 22.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.43 0.71 -4.23 15 588 2009-09-10 17:25:32 2007-07-31 09:39:02 4 3 561 0 136 408 84 113.90 23 92.15 CHANGED Mhh.lQlIuIllhlhsllhlhhph+csplshssslhWlhhalslllhslFP.plutpIAcllGlGRGlDsLhhluIhlhahLla+LYl+l-+LER-ITcLV+EIAIc..-t.p.hp+cc ................hphl.h.l.lhu.l.h...h.hhh.llpt.l+.+.sphphchulhWllhu...lshllhulaP.plhshlAphlGltps..sshlhhhslhhlhhh.hhphohplSchc..pclppLsQcl.Alh....pt.........pt............... 0 45 79 111 +9899 PF10067 DUF2306 Predicted membrane protein (DUF2306) COGs, Finn RD, Sammut SJ anon COGs (COG5395) Family Members of this family of hypothetical bacterial proteins have no known function. 29.10 29.10 29.20 29.70 28.90 29.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.51 0.72 -3.90 47 499 2012-10-01 23:59:14 2007-07-31 09:40:11 4 1 398 0 212 456 145 104.70 23 44.00 CHANGED hptKssp....hHRhlG+lalshhl.lsAlouhaltth...thhu.....asshalLuhhhlhshhhulhss++tp.......lttH+thMltsYhhulsssshhsh.l..........ssRhhhsllhs ............Rp+thphHRhhG+lal.shhl..lsulou.h..h.lsh.h..s...hs.h.hs.....hhsh.tlL.u...lhhlhssh.hAlhsh+..ptp...............ltpHRpWMlRsahhshsss..shphh..h....................h.hhh............................................ 0 79 141 186 +9901 PF10069 DICT DUF2308; Sensory domain found in DIguanylate Cyclases & Two-component systems Anantharaman V anon Manual Domain DICT is a sensory domain found associated with GGDEF, EAL, HD-GYP STAS, and two component systems [1]. It assumes an alpha+beta fold with a 4-stranded beta-sheet and might have a role in light response [1]. 25.00 25.00 36.90 36.30 23.80 23.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.48 0.71 -4.61 93 183 2009-01-15 18:05:59 2007-07-31 09:43:40 4 11 104 0 87 191 1 125.80 24 28.34 CHANGED hs.spushppLhpt.h.t...s..p.thahKsoLhALs+ulEDpllp......sp.pshlhuuFQptchappEs...cR.YpclAppup.plalhus................................................s-ssh......sssphhpshh....lsssDsLspEWaLllluss.....asuhllspEh ....................................t..........p.thh.KspLhulS+slEDpslp......ss.ps..llhusFQctphappcs...cRYpclAppss.plalhus................................................s-ssh......sspthpslt.....hsssDs.LspEWallllusp......asuhLlspE.............. 0 18 66 86 +9902 PF10070 DUF2309 Uncharacterized protein conserved in bacteria (DUF2309) COGs, Finn RD, Sammut SJ anon COGs (COG3002) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.30 26.20 23.50 23.20 hmmbuild -o /dev/null HMM SEED 788 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.47 0.70 -13.30 0.70 -6.43 67 634 2009-01-15 18:05:59 2007-07-31 09:44:26 4 4 569 0 133 484 108 793.90 40 94.76 CHANGED hlcpAschIsPhWPLcsalAsNPahGhpcpsFtpsusphtpltGschhhspshappthtpGcIspssLptsltt........................................................................................ttshshshsphhtthtp.................................................tt..ttl........slu-hhsshp......u....hshsshlscplspasuuaaDpGQuhWp...Pt+cpGhYsAW+phsppDhshtht...t.tthtphltphPpssppulspslppL..slspsthpsYhcphLhsLsGWAuhhpapphps...th.sppp.tslh-hLAlRLsh-hsLh............................................................hht..htthht...............................thtspttpshhs.............ppshthstlhpcAhEtuapcpLhpsl..........................t.....ssssssst..............+P..th.QusFCIDVRSEshRRtLEshsPt....hcThGhAGFFGlPltapshussstcsphPsLLsPphtl...........t.s...ttpthpppp...chptphpptapphKpsslouFshVEssG.hauhcLltcoLts.............................ptssshh.php....t..................................................................................uhohp-plshAtshL+uMuLT...psFA.LVllsGHGSposNNPasuuL-CGACGGpsGtsNARllAtlLNcspVRpuLtpc.GIsIP-DThFlAAlHsTTTDclphastct.lsts...tpsshcplcshLspAuphsptERutpLs...tssp...........p.hpplpcRupDWuEsRPEWGLAsNAuFIluPRphTpuhsLcGRuFLHsYDWpp.Dp..cuslLEhIloAPhlVupWINLQYYuSTVsschaGSGsKlhHNVs.GslGVhpGNuGDLRsGLPhQSVcc.spphhHcPlRLhVlI-APt-tlpcllpcp.............ssl+pLh-NtWL ...............................................l.lppAp+lIsPl.PlstFsApNPa.GhEspsFcpsAphL+cltsscla.stuhhcpthtpGcIspsslpptlsphh................................................................................................................................hph.sptthphhhpshtp.............................................................................tshtt.sh..............shu-hl.spp.u.........pshs-.lscphh+WsthYhDpttusWs..hP..pR..-..p.uhYpAWhclspa...Dhuhsht..........+phlpsLPp.c..sp.slpps...Lspl..uIs.p.pphpsYlcupLLuLsGWAGhhhaRupQt.............pp-.thLpDaLAIRLshE.hLl..............................................................................tstht..hhtps.p...............................................................................................h......phssschpshhp............hhtphspthhcpLWlpAaEhsapppLhppl........................................tss...p.p..s.psss......ps...psQhAFCIDVRSEsFRRHLEshuP.....hETlGhAGFFGLPIphpslspphp+spLPVhl.sPsapl........................cphssc...phphh.pppp....pshpshhh.s.a.+..th..KpsshsohhLsEhoGsh.hulphlspolhP.............................cpsts..lp.phppphhpcs..p.sphplp...........p......................................................................................slGhT.pEplshAtpsL+hMuLT......csFAPlVVLsGHuSpSpNNPatAuL-CGACGGtoGuhNA+lLAhlsNcssVRpuLt.pp.GlhIP-cTlFhAA.HpToTDplphh.l.sc...Lsst....ttcuh-pLpshhstsuppAspERhspLs.htptt............pssp-spRhApDWSElRPEWGLApNAuFIIGpRpl.....TcshcL-GRsFLHsYDWcp.Dp-G..plLssIlouPslVApWINLQYYASTVsP+hYGSGNKsspsVs.uulGVhpGNuuDLhhGLsW.QSVhs.spphhHpPlRLhVVIpAPpthlpcllspp.............tphpchVpNpWl..................................... 0 43 87 115 +9903 PF10071 DUF2310 Zn-ribbon-containing, possibly nucleic-acid-binding protein (DUF2310) COGs, Finn RD, Sammut SJ anon COGs (COG5595) Family Members of this family of proteobacterial zinc ribbon proteins are thought to bind to nucleic acids, however their exact function has not as yet been defined. 25.00 25.00 31.80 31.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.83 0.70 -5.42 23 268 2009-01-15 18:05:59 2007-07-31 09:45:21 4 2 257 0 54 193 9 249.40 52 99.43 CHANGED MalsElpFc..sapssslsssp+sINpll-thRYNGQILGREaPlshspsp....FtsRllCPEpsSLtscaNSspVppALppLscsGllhsphcllGcDlNS-tos....cpppPoWQlLYTTalcoCSP.....L+sGDsltPIPLY+hs.sphsu..pcsllKWQp-WQACDpLQMsGus..hEphALcEIu-ssScLh++GhcLspcIEthTpIPTYYYLYR.VGGpSLtsEppRpCPpCGu.-WpLspPLaDlFcFKCDcCRLVSNLS..W-ap ............MYllELpF-..sassTolsus-+AINsLhDshRYNGQllGREFPlshu-s......FhVRllCPEp-SLaPcapSthVptslpcLs-suLltPph+llGpDlNSEpsA....Ep.-pPSWQlLYTTYlcoC..SP.....L+SG-sLhPIPLY+ps..sshsuD.acslIKWQp-WQACDpLQMsGus..sEpsALcEIs-scSsLF++GaDLptRIEhlTplPTYYYlYR.VGG....pSLspEpsR.CPpCGu.-WhLc-PL.....aDlFaF+C-sCRlVSNlSW-a....... 0 10 20 38 +9905 PF10073 DUF2312 Uncharacterized protein conserved in bacteria (DUF2312) COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG3750) Family Members of this family of hypothetical bacterial proteins have no known function. Structural modelling suggests this domain may bind nucleic acids [1]. 21.70 21.70 21.90 22.40 21.50 21.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.28 0.72 -4.62 51 455 2009-01-15 18:05:59 2007-07-31 09:47:06 4 3 340 0 158 352 153 73.20 50 78.91 CHANGED lAu-pLRpaIERIERLEEEKKslu--IK-VYAEAKupGaDsKllRpllpL....RKpDps-+pEpEslL-lYhpALGM ...............As-pLRsaIERIERLEEEKKs........lu-DIK-VauEAKupGFDsKllRpIl+L....RKp-...ps...-htEpEul..L-lYhpALGM.................. 0 44 91 109 +9906 PF10074 DUF2285 Uncharacterized conserved protein (DUF2285) COGs, Finn RD, Sammut SJ anon COGs (COG5419) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.00 23.00 23.30 23.30 22.80 22.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.46 0.72 -3.70 33 292 2009-01-15 18:05:59 2007-07-31 09:47:52 4 2 153 0 155 286 28 100.70 27 57.06 CHANGED lhlPLcsshts....................RhcAhtRhhctLp.u+sssPssph..hsstp+tRhhphLpAlDuchsGAShR-IAtsLFGs..splstc..Wps..uulRsplpRLV+cGpuLhcGGYR+LLp .................................................................................................t......ss.stt.....hss.pct+hlthL+sLDu....ptsGA.oaR-lA.p.slaGt..csspts...ats..sthRsph..cph..lc+upthhcuGYRpLLp......... 0 23 94 128 +9907 PF10075 PCI_Csn8 COP9 signalosome, subunit CSN8 KOGs, Finn RD, Coggill PC anon KOGs (KOG4414) Family This PCI_Csn8 domain is conserved from plants to humans. It is a signature protein motif found in components of CSN (COP9 signalosome). It functions as a structural scaffold for subunit-subunit interactions within the complex and is a key regulator of photomorphogenic development [1]. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.52 0.71 -4.42 48 621 2012-10-04 14:01:12 2007-07-31 09:48:56 4 7 256 1 376 1244 4 138.70 20 59.51 CHANGED hhptphhu.hLlthLspsch.ssFchthchlPsshpps...................stplpplhpLtphLhpssYspha....phlcps....sp..phpshlssh..t-plRcclsphltpuY.....psIshpthuphLshs..sc.......phppasppp.GWpl..-........ssshhl.h.sc.ptpsps ............................................................hh....phhu.hLlhhL.h...p....h.ssFphh..hphl...s....t.....p..h..h.pt.......................................................................p..l...pplh.tL.tphL.ps.pas...pha....................p.h...css....................p.............sht..hl..s.sh.....p-sl...RcclsphlspuY............p.p.I.shsthsphLshs....sp................ph.ppa....s....pc...p...GWph..s...........ss.s.h.h..h......t.p.................................................. 1 132 192 290 +9908 PF10076 DUF2313 Uncharacterized protein conserved in bacteria (DUF2313) COGs, Finn RD, Sammut SJ anon COGs (COG3778) Family Members of this family of proteins comprise various hypothetical and putative bacteriophage tail proteins. 27.20 27.20 27.60 27.60 27.00 26.50 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.80 0.71 -4.88 60 660 2009-01-15 18:05:59 2007-07-31 09:49:48 4 5 514 0 114 541 6 159.10 22 80.70 CHANGED ptLhphLPssht.sc.....hptlhpu.usplsplptpupsllpphhPpous..hLscWEchhGlss....ssst.olppRRptlhsKhptpGuhohthhtplApsh.Gh...slpIpphp....................hpahapVphs..........shsssscslps......lcshlcch+PAH....hsYs .....................t..LhthLPsh.t.s.t.............hpth.hpuhu.phsplpppspplhpph.spTss....hlscWEchhGLss......sssp.olppRpptlhuKh....p..t.tGsh........ohtahhphhpsh.Gh...shplpphp........................................hpahhplphs............................shssss.sslpt..........lcshlpchpPuH.hhhh.Y................................................................. 0 38 77 97 +9909 PF10077 DUF2314 Uncharacterized protein conserved in bacteria (DUF2314) COGs, Finn RD, Sammut SJ anon COGs (COG3779) Family This domain is found in various bacterial hypothetical proteins, as well as putative ankyrin repeat proteins. The exact function of the domains comprising this family has not, as yet, been determined. 21.10 21.10 21.50 21.10 20.60 20.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.79 0.71 -4.10 10 385 2009-09-11 08:32:28 2007-07-31 09:50:18 4 9 337 0 61 293 6 114.60 27 42.17 CHANGED DscMttAhs+ARpTF+aFWRELuW-pRRIlsuLDhAh.VKVsF.pD.t.....susssEaMWlsclsF..DGctlhGsLsN-PcploNlcp.GDplshsh--IuDWhalssG+saGuFTlsAMRupM.SccERscHDpAWGlDF ........................................................t......hh.pAppphthF..plh..pp.....h..hhh...VKlsa....-....................ptspsEHhWlp...hph..s.st.hhhGlLs.N-P...hhlpslc..Gpphplsh-.cIoDWh.hh.t.sst............................................................................................... 1 19 36 43 +9910 PF10078 DUF2316 Uncharacterized protein conserved in bacteria (DUF2316) COGs, Finn RD, Sammut SJ anon COGs (COG4367) Domain Members of this family of hypothetical bacterial proteins have no known function. 26.00 26.00 35.70 32.70 25.70 25.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.75 0.72 -3.99 7 273 2012-10-04 14:01:12 2007-07-31 09:50:44 4 2 268 0 23 100 1 91.20 54 95.69 CHANGED MSLshtph+sT+pELptNFpLssLolpplAp-Lpho.sclEtlLpLc...........pplpDsWhLRsYL.ctlpppGppPhPaotLsG-....tpaWFLs ....MSLNKEQRRITsEELQAHFEtSTLSlphIAccLNlToE-VEKlLuMpsP.GlFuppLQRF.IHLVWDVRDlINDNIKtNGQTPcPYTYLKGEK...EDYWFLc............. 0 9 14 19 +9911 PF10079 DUF2317 Uncharacterized protein conserved in bacteria (DUF2317) COGs, Finn RD, Sammut SJ anon COGs (COG4365) Family Members of this family of hypothetical bacterial proteins have no known function. 24.20 24.20 25.10 24.70 23.70 23.20 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.09 0.70 -12.61 0.70 -5.90 54 546 2009-01-15 18:05:59 2007-07-31 09:51:26 4 3 532 0 133 404 329 519.30 36 99.20 CHANGED Mphcplsh.pp...sphhtDYhspppp....lpsFaph..h.p.psapcplsphppph...t...RpsLschLppphpphss.upts.psI-tLtp.pohsVlTGQQhGLFTGPLYolYKllSsIsLA+chcpph.shshVPVFWlAuEDHDa-ElNahhh.t.tt...+Khphp....t..cs.psussphsspslpphlcphhppl....spopasptlhphhpcs...........Y.tpssohs-uFttlhpcLFtchGLlllDusDspL+plttshhpctlpppsshtpslpppppplc.phGhs..lQspscplNLFahp..c.stRptlp..tcsspF..hlpsschpaoccELlchlcscPERFSPNVlhRPlhQEhlLPslAaIGGPGEIuYWupL+psFcthslphPhllPRhohsllpp+hp+hlp+h..........plshpclhhpth.......phhpcph.t.tppsslptshppt+p.......plpptappLtshstphDsohtshlptsptphhctl-tLEc+hh+utcR+pcphlpphpclpspLhPpsu.QERhhNhh.ah..scaG.pllsplhp.hpshtsp.phhlpl .................................................Mchp.lslspp...sphlschhsuc........lpuhFpY.thhpp.puFccRhpc.sstc.......ctsLssslpcYhsc.....L.......p.h...o...ppp.tNIptL.sssohVVluGQQAGLFsGPLYThHKIhSlIpLuK-lp-ph.ptpVVPVFWIAGEDHDaDElNHTalhs.pptpl+Klpacshp....cs..osSchh.s..ptc.h+..phlcphFtphtE....TsaTpsllphhcch...........lcp.....hsoasDhFttLlpchF.....tsa.GllhlDuph.tLR+h..EsPhFKcllp+pptlpcuhcssQpphp.p.GhpshIpscss.lpLFhcp............-..s.RpLlp..h-stpF..hlpcsctsaoc-EllphhEppP.EpFSNNVVTRPLMpEalFsTlAFlGGPuEItYWuELKsVFchhslcMPhVhPRlplThlpc+lp+hLpch..........slshpclhh.s.s.l.......-t.+ppalppptspph.pchcthhc.......phpclapsLhp.ltt.psslp........hlpKNpphhhpQh-hL.++hhhslE+cp-lphcpFccIppsL+PhGu.QERlWNsh.hL..Nca.GhDhhc..h...PhsashpHhllp........................................ 0 48 94 121 +9912 PF10080 DUF2318 Predicted membrane protein (DUF2318) COGs, Finn RD, Sammut SJ anon COGs (COG4393) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 25.40 36.20 23.90 23.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.04 0.72 -10.75 0.72 -4.25 36 409 2009-01-15 18:05:59 2007-07-31 09:51:54 4 3 396 0 77 306 8 104.00 46 25.67 CHANGED plpDGKlH+ata.scsG+plRFFllpcsc...sphsssFDACplC..ucpGYh.pcGspllChsCss+hhlsoIG..psGGCNPlPl...saphcssplhIstssLpsGspaF .......................lcDsKLHRFsY...lus-G....K....slRFFlIp+hs..tsuhsssFDAC.lC..GDtGYh.pcssplIChsCsV+lhlsolG..psG..GCNPIPl.....pachc.spcIsIstpsLtuGssaF..... 0 28 54 70 +9913 PF10081 Abhydrolase_9 DUF2319; Alpha/beta-hydrolase family COGs, Finn RD, Sammut SJ, Eberhardt R anon COGs (COG4425) Family This is a family of alpha/beta hydrolases which may function as lipases. This domain is the catalytic domain and includes the catalytic triad and the GXSXG sequence motif which is a characteristic of these enzymes [1]. 26.00 26.00 51.00 35.40 22.10 21.90 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.74 0.70 -5.71 25 395 2009-01-15 18:05:59 2007-07-31 09:52:33 4 5 288 0 111 304 47 266.90 43 52.72 CHANGED IRVYVGLsu.u-os-pRAcLAlcELcRsGAF-RpVLVlsssTGTGWl-PsuhcslEahasGDsAoVuhQYSYLsShLohLs-p-tuhcuAcALF-AVasaWppLPpspRP+LalaG.SLGAhuopsua..shhc.ltcs.hcGALWsGPPh.SphWpplsspRcsGSPthLPhacsGphVRF..ssp.sslst......sss.W.GthRllaLQauSDPlVaasscllaRcPDWlpEPtGs..........DVospl+WhPlVThlQlusDhssusssPsGHGHsYs.pcalDuWsAlssP.....pGWospctpRL+ ...................IRlYAGLsu.........ucsh........pupAchsltELtRTGuacRpVllltssTGoGWlsthsssulEYhhpGDsAhluhQYSaLsShluFLsD+-ssppuucALFpsVhphhp.pLP.p.pRPKLhlhG.SLGSaGGpuuF......sslsshh.up............sDGAlasGsPh.sslWpplptpRc.GSPphlPlhssGcs.VRF..hsps...p.clpp...........s..sPW.sps..RllYlQHsSDPlsWWoPchlapcP.DWh+.E..t..G................DV.PphpWhPhVTFhQVo.ADMs....hAs..ssP...sGH....GHsYt.sphhshWAAVhs......ss.T.............................. 0 26 81 99 +9914 PF10082 DUF2320 Uncharacterized protein conserved in bacteria (DUF2320) COGs, Finn RD, Sammut SJ anon COGs (COG5338) Family This domain, found in various hypothetical bacterial proteins, has no known function. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.78 0.70 -5.74 39 519 2012-10-03 17:14:37 2007-07-31 09:53:37 4 6 416 0 193 493 51 322.80 18 73.62 CHANGED cP.atshG.hhGuFhlpPulphussassNhhp.ssstpusshhplssclphpS-WsRHtlshchpushstatssp.................s..spsshsspssuRlDlscpsplshpsphphtpEspsussshts.s..ptP..l...hpshuushulppchschplsssushp+psYpssph.sus.shsspDRshsphshssRsuYclpPuhpsFsEsphspRhYD...phDpsG......hpRDSsuhtspsGsph-hophlpG-htlGYhp+sacDsphpsloGhshsAsltWpPpphTolphssspplp-o.sssssuuslppshslplcHphpppLsspuphsaspp-Y.psts.....RpDchh.sspsuhsYtl.sRhltlpsplcacppsSsh....sstsastsslhlulplph ............................................................................................................................................................t.............sh.lhPtlp.shthssNh....t....t.................s......p..........s.............t......p......ss.h...h...........thpstlthhsc.h.t.p...p.ph.p.lshph.phtpahssp..........................................p..s...sshth.phphp.h.....-h.s.p.p.p.p.l.sh.phphthtpp..stsps....................s....h.h.......s...................p..................psh..s..ht.hsht.p.p...s...h.h........p.h.p.hshs...hpc...hYts.......sp.....................................p......p.......p........shs..hththphhaph..sth.p..hhph.hsp..p.ac......s..s.............p+s..s...pth...hhhhG..hp.h.p..h.s..tt..hpu..phthG..a.pp....sa.c....s....s.....p....h........ts.h.............s.......uhs.hsstl..pWt..s.....hp.h.oshphhsppth...........pss.....s....s...............s............s..............sh...ht.....pthslsh..s+p....h.p...s.plshpht.....hshtp.csa..psss........................ppDpph..shshs..hsYth..pRhlsl.....phth.pappp.sSsh......sshsappshhhlshp...h............................................................................................................... 0 61 127 157 +9915 PF10083 DUF2321 Uncharacterized protein conserved in bacteria (DUF2321) COGs, Finn RD, Sammut SJ anon COGs (COG4306) Family Members of this family of hypothetical bacterial proteins have no known function. 22.90 22.90 23.40 25.80 22.80 22.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.02 0.71 -4.73 5 34 2009-01-15 18:05:59 2007-07-31 09:54:32 4 3 30 0 8 25 0 131.90 41 91.49 CHANGED MGTYhsAQIChNGHphToussps.ELhpsFCs+CGutTIspCPcCsoPIRGcYYV-GVlulGp-Y-...sPoYC+NCGKsFPWTccuLEAAsELlE-s-sLSsDE+EpFpssLsDLllE..TP+TclussRFKKlluKhuououuulRDIlVDVASEolKKuI ............MshYchAplC.NGH.hssshsps.chhpsFCspsGttTIhpCPpCsssItGc....haV-GVhsh.u..tsap...sPoaCpsCGp.aPWscptltus.pllc.t.tpLss-EhpphppslscLhh-..s.P+TplAsh+aK+hhsKhtsssssuh+-IlVDVhSEssKKtl................ 0 4 8 8 +9916 PF10084 DUF2322 Uncharacterized protein conserved in bacteria (DUF2322) COGs, Finn RD, Sammut SJ anon COGs (COG4390) Family Members of this family of hypothetical bacterial proteins have no known function. 20.70 20.70 20.70 23.00 20.30 16.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.29 0.72 -4.28 17 258 2009-01-15 18:05:59 2007-07-31 09:54:53 4 2 251 0 58 181 35 98.30 50 92.36 CHANGED Fp-hLpsLPul-HLuulslhsupGplltpI.stsGphGSLpVYpALAppasu.lstsAApcGLElFAEHotDA+tpPGKHPNIDhLhpllpps.s.hplp.l ..................FpD.LApLPuIDHLuulclhDus.Gp.lVt.pI.shsGKhGSLplYptLA..ppFsu.LstpAAppGLthFA.EH.ssDA+spPGKHPNIDhLhpllpps.s.Lplcs.h................... 0 11 33 47 +9918 PF10086 DUF2324 Putative membrane peptidase family (DUF2324) COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4377) Family This domain, found in various hypothetical bacterial proteins, has no known function. This family appears to be related to the prenyl protease 2 family Pfam:PF02517, suggesting this family may be peptidases. 23.50 23.50 23.50 23.60 23.40 23.30 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.27 0.70 -5.15 28 607 2012-10-01 21:07:14 2007-07-31 09:56:08 4 2 500 0 78 421 23 198.90 38 86.09 CHANGED lhhhlPlh..lhhah+..+....+htlsh+shhlGulsFhlhuh...lL.......Etslphhllp.stssthh...ppPhlaslYuuLsAGlFEEsGRaluh+aLh++ppthss................uluaGlGHGGlE.AlLlGslshlshlhht..hhspGphph.h........tthhttl.t.ltshsshphhLushERlhAlslpluLSllVhhul+..p+.+hh....alhhAIllHAlhDh.......sAlhQsth..hsthhssEsll ...........................................l...llshl......hhah+...K......+hplshts.hhlGslsFhl..sp....lL..............Ep.lHhhllpsptssshh...ppPhla.h...lYG.hhhAulFEEsuRh.lhF+aL.K+hp.tps........................................ulAYGlGHGGlE.hlhl.G.hhohls....hhlhh..sslpsts.p.h...................h.pth....chl.ph...sshphhLhuhERIhAlshQlhLolhV.htAV+...pK....Khh.......aLlhAh.hlHAhhDhh......suLhQsth....lss.hlsEsh.......................... 0 16 34 51 +9919 PF10087 DUF2325 Uncharacterized protein conserved in bacteria (DUF2325) COGs, Finn RD, Sammut SJ anon COGs (COG4378) Family Members of this family of hypothetical bacterial proteins have no known function. 24.20 24.20 24.20 24.60 24.00 24.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -9.99 0.72 -4.16 68 671 2009-01-15 18:05:59 2007-07-31 09:57:41 4 4 610 0 191 488 34 92.50 23 49.82 CHANGED slLllGG..cphsphcphlcc...hGsphhhH....pts.pcppt.lsspl.tsDhVllhTshluHsshppl+ctu++psh..shlhs+.puhs.sltptLpphts ................................sllllGu..cphsph+pllcc.....hGschhha......ptshsppp..lsspl.t.DhllhhTshluHsshppl+ptAK+psl..sllhs..+.pShs.sltptlpph.t....................... 0 70 142 167 +9920 PF10088 DUF2326 Uncharacterized protein conserved in bacteria (DUF2326) COGs, Finn RD, Sammut SJ anon COGs (COG5293) Family This domain, found in various hypothetical bacterial proteins, has no known function. 26.80 26.80 27.10 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.92 0.71 -4.55 36 203 2009-01-15 18:05:59 2007-07-31 09:58:16 4 2 190 0 45 195 8 132.60 23 26.30 CHANGED plscplY..spsu.hlshpssppup...aphph.h.......ssstSpGpsptclhC..FDlulhh.stpcp.pt.cFLlHDu.la-slDs+phtphlpllpc.hp..hshQallTl.p-cLst.........phspcphll....lcL..s.spss+LFthph .......................................t.h.pplh...ptts...hphp..sp.p.s.........ap...hph.h.......ssstupGhpphclhs..FDlslhths.tpp.pt.tFLlHDu.lh-slDscphtshlplh.pc.tp........hs...hQhIlol.pDclsp.t.............thp.pp.hl..............lcL.....sp.pspLFthp.................................................... 0 15 36 40 +9922 PF10090 DUF2328 Uncharacterized protein conserved in bacteria (DUF2328) COGs, Finn RD, Sammut SJ anon COGs (COG5385) Family Members of this family of hypothetical bacterial proteins have no known function. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.86 0.71 -5.09 59 265 2012-10-11 19:05:54 2007-07-31 09:59:00 4 1 263 0 96 302 75 177.00 35 84.94 CHANGED VGAIsNGLELLs-sss....ptt..phsLlpcSupsAsARl+FhRlAFGAAGs.spplshs-spslhpshhpss.+.....hplsWpssss.hs+stV....+llLNhlllstsAlP+GGpl.pl......spsssthplpupGp+lplcsslhphLsGs...st.pslsupsVQhhhhhhlAppsGtplshphsssplhlss ......................................................VGAIsNGLE.LL--sus....sppt................AhsLI+pSAcs....A.o..ARLp....FsR.lAFGAAGo.....hG.....sp..ID....tu..-Ap....s.lspsah..p...s...p...+......splsWp..sscs....h.l....sKscV...........KLL...LNhlLlupsAlP.RG.GslsVt....h.s.sssss......p..ahlsupGphh..R...ls..schhchlsGs.....p..psl.cA.+sVQsaYs..hLLAccsG.hplslptss-plshs...................................................................... 3 25 57 70 +9923 PF10091 Glycoamylase DUF2329; Putative glucoamylase COGs, Finn RD, Sammut SJ anon COGs (COG5368) Family The structure of UniProt:Q5LIB7 has an alpha/alpha toroid fold and is similar structurally to a number of glucoamylases. Most of these structural homologues are glucoamylases, involved in breaking down complex sugars (e.g. starch). The biologically relevant state is likely to be monomeric. The putative active site is located at the centre of the toroid with a well defined large cavity. 20.90 20.90 24.20 23.60 20.70 19.90 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.59 0.70 -5.29 79 731 2012-10-03 02:33:51 2007-07-31 10:00:16 4 22 543 4 234 693 90 215.20 30 13.28 CHANGED sah.hhsuEu+ls.hlAl......spucl.pptW.hthG...Rshsshstp..sLhoas.......GshFpahhshLhhc.hs.........ss.h.pss+tssthQhpY....utp.......huh.s.WGlSpSshss.shth.............YpAaGsP.hsh...........c-tVluPaAshhul.hs.PctuhtsLcpLh...t.shhG.......cYGFa-Ahsh.T.tt............t..shlscsahAhcQGhsllhlpNh.psshhhchFtssP .......................................................................................................hhshhhsEuhls.hlAl..........spssl.pptW..h.....phG.....cth....ss.h..stp...tsLho.hu..................Gs.hFpahhshLhhc.hs...............ss..h.pss+tsshtQhpY....ut.p.......huh.s.WGlotSshss.s.t................................Yp.ua.Gs.Pths................cssVIuPaAulhuh.as.PptultslcpLt...t..shhG..........pYGFa-AhshT.............................................shlspsahAhcQGhhllhlpNh.psuhhhchFpssP................... 0 76 157 192 +9924 PF10092 DUF2330 Uncharacterized protein conserved in bacteria (DUF2330) COGs, Finn RD, Sammut SJ anon COGs (COG4402) Family Members of this family of hypothetical bacterial proteins have no known function. 18.70 18.70 21.40 19.40 17.20 17.20 hmmbuild -o /dev/null --hand HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.13 0.70 -5.59 26 145 2009-01-15 18:05:59 2007-07-31 10:00:36 4 6 120 0 43 106 19 275.80 34 66.64 CHANGED ssutAhCGhh...VusuDsplhssup.lllh+DG....pcTslsMp.sYpGc..s-cFAhVVPVPsl...spVplustclF-+l-phoAPRlschacp-sC............tt.t...sh..sssssuu.ussshG............lsV.schpVGtYElllLoup-usGLcsWLcpNGYplPsuApslLpPYlcpGhcFhssKlsssp.........suhspLcPLph+YcSschslPlRLuhlsA.......ssp...-lhlalLusp.RhpssNYppVtl..Nh.h.thhpp..p.h..Yttlhshsht...ssspAshTEYAhssussDPssh...shs.ptLtpLusssls..t.....................................................................................................................................hstpsaLTRLas+h.sspphsEDhhFp ..................................................................................shtshCGhh....Vsttssphh.stp.sllth.DG.......sppslsMp.shpu-......scs.hAh.VVPsPss...spVpsu-..t...phFscLDshoAPhl.c.h.c..s.p...........................t..sss.suustpssu.s..........spVhsphplGsh-sshLous-.sG....LpsW....L.s....c....NG.....Y..t.....l.........sussshLcPYl+pGhtFhsh+.lsssc.......................hhsssLcPlphsa.c..Ss+hhhPhRLuhhst.......psp.clhlahLoc+.R.phsshcssp.......................................................................................................................................................................................................................................................................................................................................................................... 0 19 35 42 +9925 PF10093 DUF2331 Uncharacterized protein conserved in bacteria (DUF2331) COGs, Finn RD, Sammut SJ anon COGs (COG4394) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 82.20 80.90 22.40 22.30 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.27 0.70 -5.67 50 370 2009-01-15 18:05:59 2007-07-31 10:01:19 4 2 355 0 113 360 39 352.20 41 96.25 CHANGED WDIFCpVVDNaGDIGVsWRLA+QLupEaGh.......pVRLWVDDLsuFs+lsPplcsshstQhhsGVplppWsssh..........shssuDVVIEAFAC-LPssalsuM..............stpspsPlWlNLEYLSAEsWV-sCHuLPSPpss.s...LpKaFFFPGFostTGGLL+...EssLhscRpuFpsssss+tshhppL.................G.....l.t.t....ssspllSLFuYEssuLsuLLctap...psspPlpLLVPpGRuhssl..t.....thhs...tshtsGsthp+GsLslplLPFssQpsYD+LLWuCDlNhVRGEDSFVRAQWAu+PFlWHIYPQ--DAHlsKL-AFLshYs.tslsstsssulpshapAWNsssssss.........Wtshtsths.....tlpp....+AppWstphhspsDLAspLlpFsps ....................................hDIFCpVlDNaGDIGVsWRLARpLsp.E.h.Gh.......pVRLWlDDlsuhtplsPshss...h..s.Qhh....pGlplppWpssh...........sshsssDVVIEAFAC-LPtsalpth...............tppptPlWlNLEYLSAE-WVpphHhhPSPpss.s...LpKaFFFPGFottTGG...LL+....EpsL..htpRps.a.pt.....s..ttppshhppl............G.....l........t.....ssshhlSLFuYcsss...lsshLcthp...........pu...s.......psstLLlstGphhssl...tp.h.hs...........t.htsGshhppusLplthLPFlsQpcaDcLLWssDhNhVRGEDSFVRAQWAu+..........PFlWHIYPQ--ssHLsKL-AFLst.hp...s.ths.......t.s..s....s....uhpphapsWNssts..s...........Wpshhpphs.....thpp....pApp.WsppLhsps.D..LuspLspFhp............................................................................ 0 20 56 86 +9926 PF10094 DUF2332 Uncharacterized protein conserved in bacteria (DUF2332) COGs, Finn RD, Sammut SJ anon COGs (COG4427) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 25.50 25.40 21.80 21.70 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.11 0.70 -5.47 37 294 2009-01-15 18:05:59 2007-07-31 10:01:55 4 2 278 0 75 245 44 317.90 35 96.49 CHANGED ltpsFppQActstphuSPhhucLhttlusphp.stshsupthhsatst..stupulPLRLhGuLHtLlLp.GpsstLsthYsst..pssstsh...sslppsltsapstlhshLcss.PQTNEVtRuAsLhsuhhhlupph.stPltLhElGuSAGLNLhhDRYtaphsss.p...h........GstsusltlsscWpGs...PP..hs...plspRtGhDLsPlDhsssssthpLpuYlWP-p.-RhtchcuAlulstp...ssp..l.cuDAh-hltptL...ttt.psshpllaHolshQYhPstpppthpstlpthGupAs.ppPLsalshE..spt..ssh.sttLp.......lchWssG....psphLucschHGpWlcWt ................................h..phhRp.uhhpspuuSshYptLshtlApD.-...shslhuth...........t.GpslP.hLhGulHhLl..Ls.G..+t..LtpaYsohs...p.........sspsh.........sthpchsppht-plhs.h.Lppt..sQTNEVtRsAhLhsuhthl.......hpph....ptPlsLlElGsSAGLpLhhDpYpYpY....s..ss..p.....a...........Gs.hsSsVp.l....sst..h...p...Gp......sP..hhp.......splVcRhGlDLpPhDlpss-chLhLpuhlWP-..pp.-.RlchhctAhuls+pt.......ssp.......LhcuDus.chLsshh....ttls...c-.AlhslaHohshp.lPt.ph.+ttlcttlptluAp....p..slh+Lh..............ss..cttl+.........................hcha.sG......htcslGcscsHGphhsWt................................................... 0 29 59 66 +9927 PF10095 DUF2333 Uncharacterized protein conserved in bacteria (DUF2333) COGs, Finn RD, Sammut SJ anon COGs (COG5345) Family Members of this family of hypothetical bacterial proteins have no known function. 20.40 20.40 20.90 20.60 18.50 20.30 hmmbuild -o /dev/null HMM SEED 338 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.97 0.70 -5.79 23 372 2009-01-15 18:05:59 2007-07-31 10:02:50 4 2 281 0 102 291 132 236.90 34 85.16 CHANGED a........................tt...h+hlssshslhlllhhhlGhYWSpEPssFsVpspApptA..............ptpp.....pplVsGYTTTsTLIcls-TLLsKPGGYLSNDlhPPGLahDNMPuWEYGVLlQlRDLu+AhR+-FSRSQSQSsEDtDLshAEPpFNFDscSWhlPSoESEY+cGIctLppYhsRLs-sspssAQFYARADNLpsWLpcVppRLGSLSQRLSASVGpc+lNssLAG-ssu.ppuo.sssphtlKTsWhcIDDVFYEARGuuWALlHhL+AlEVDFuDVLcKKNApVSLpQIIRELEATQpolWSPhILNGSGFGlLANHSLVMANYISRANAAlIDLRpLLuQG ..............................................................................................psh....................................................................................................lsph.LL..K..Gha.ls.D.h.hP...a..hDNhsuaphGh..tlRchuhthhcshuR.pupSt.ct-LttAp.sp.h.......phsp......pu...Wh.......h.......P......p........s.u.YppulctLpta.scLt..........p..tpA.F.uRADNL.paltpltpcLG.ShSthLp................................................t......cssWhp..h..DshFa.uhGthaAh.thhpAhc.DFtpVLtcKph.s.shpph.+pLcss.t..hpP.hl.l.NG.s..s.u.h.h.ssH.hsMu.Yl.Rspusll-lpplLp....................... 0 26 55 78 +9928 PF10096 DUF2334 Uncharacterized protein conserved in bacteria (DUF2334) COGs, Finn RD, Sammut SJ anon COGs (COG5298) Family This domain, found in various hypothetical bacterial proteins, has no known function. 25.30 25.30 25.50 25.40 25.20 25.20 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.56 0.70 -4.95 40 444 2012-10-03 16:37:10 2007-07-31 10:03:24 4 6 384 0 94 382 96 229.30 23 48.78 CHANGED pshlRlEDl.os.....h.ssh.cpL+pls-hlhcpslP..ahlusIPsahsspsth.........ltpssphlpsL+ahps+GGpIh.....LHGYTHph......hsthoG.shEhht........................................pp.thhtp+lpp.ulphhsp.sl.hsssac....uP+Yshoppsh.phhpphFshhhtphthttssh.......................................lpps....t.hhhPpslshl-tsp.............tp..pttphtptspsshsuhFaHsah....h......suhp..........Lpcllsth ..................................shlplcDVsP........h..tshsplctls-hLhc.p.s.lP..ahluslPsatssptth....................ht.t.phsphL+hht.s.cGusIh.....hHGYsHph.................t.su..hc....h...ht......................................sp.phhpp+.lpp.ulp.hsc..t.....pl...hP...huap....APpYthsppsh..ch.h.tph.hs..s.hh.tp...h.tpsh.........................................htp..........hhsp.sht.lp.pp..................t........ht.hthhp.ps..h.shahash..............tht.........l.p.ht..h........................................................................................................... 0 32 54 75 +9929 PF10097 DUF2335 Predicted membrane protein (DUF2335) COGs, Finn RD, Sammut SJ anon COGs (COG5346) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 26.70 26.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.14 0.72 -4.34 15 168 2009-01-15 18:05:59 2007-07-31 10:03:47 4 1 163 0 32 110 0 48.60 33 35.58 CHANGED PsPslLpcY-pIlPGsA-RIlsMAE+EpcHRHch-ppt.......lctpp+cs+hGQ ..........PPPshLppYcpIhPusAc+lhpMAE+EQs..HRHphp.t............lc.ptp.s+hsp............... 0 10 16 23 +9930 PF10098 DUF2336 Uncharacterized protein conserved in bacteria (DUF2336) COGs, Finn RD, Sammut SJ anon COGs (COG5330) Family Members of this family of hypothetical bacterial proteins have no known function. 27.80 27.80 27.80 27.80 27.60 27.70 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.19 0.70 -5.15 75 459 2009-09-11 00:20:12 2007-07-31 10:04:31 4 4 166 0 204 448 45 213.00 19 64.39 CHANGED VAtPlLppSshLsDsDLl-lspstu.psHhhAIApRtslotsVo-sLlctGstslltpLlpNsGAcloptuhppllcRutpssslttslstRs-LPsshhppLlstsucsl+pplht.ptt.hstpplppsh.....pcutspuss..thsststp.t.spthltpLcpsGc...Ls............suhlhphsppschs.hssAlutLuslshpplcplltcspsculhsLs+ssGLshsshtslhtthtthpph.sh.shpp.............hhspacpl.ssssApplLphh.......c ...............................................................................................................................................................lAtslLtpS..hLp-tsLlphhtpts..t.phhAlutR.tls..lssslhthst..sh..h...htN.tu..hs..sh..hhtt.tt........lt..hh.c.tls.thh..lh....hhs.ttht....t.h............t.htthh...........................tthtt....th...................t..t....h.tthlttlpt.ttp...hp.........................thlhphhhtt..pht.h.tulu.hsshs.thspthh.p.t.tslhhhhcthuhs.thh.hhhhhh....t....t.....thtt.............................h.t.htth...t.....h................................................................ 0 59 108 138 +9931 PF10099 RskA DUF2337; Anti-sigma-K factor rskA COGs, Finn RD, Sammut SJ anon COGs (COG5343) Family This domain, formerly known as DUF2337, is the anti-sigma-K factor, RskA. In Mycobacterium tuberculosis the protein positively regulates expression of the antigenic proteins MPB70 and MPB83 [1]. 26.00 26.00 26.10 26.10 25.90 25.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.15 0.71 -4.53 90 784 2009-01-15 18:05:59 2007-07-31 10:05:33 4 6 638 0 335 801 62 181.70 20 72.39 CHANGED PssplaspI...ptpltspst...........................t.hh...phth...Wpthuh......uusAsAsllslshhhs.............................................ssssshlAslsss.......sussshllphc.....psppl.lhshssssss.ss+shpLWhlsss..ssPhSLGllssssss.h..slssshtsthss....LAlolEP.GGSP.sutPTGsVlhpGtl ...........................................................................................................................Psstlhpp.l......ttlt.ttt...........................................................................................hhp..........phph..............W..+.hhs.h.......susAsusllssshhhh.................................................................................ttshs.sss.h.h.s.ss.hsst.............ssss..s..h...ll..h..h...s...........................pspt.....l...lh.....s.hsh.s.s......ss...sspshplWhl...............sst....ssPhShG.l.h.ss.ssss.h........hlshshhssht..........lulTlEP..tGGSs....pPT.ushlh.h....................................................... 0 106 225 294 +9932 PF10100 DUF2338 Uncharacterized protein conserved in bacteria (DUF2338) COGs, Finn RD, Sammut SJ anon COGs (COG4408) Family Members of this family of hypothetical bacterial proteins have no known function. 25.00 25.00 40.00 40.00 24.10 22.50 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.54 0.70 -6.12 6 271 2009-09-11 14:21:54 2007-07-31 10:06:33 4 1 260 0 13 118 0 418.70 62 99.03 CHANGED hsplLllGoGPsAIQLAlshptHusscluhsuR..sSp+ScRla-AlsptsphhpVuhQNstHpphpGpssIDphaKchcslps-W-TllLuVTADuYhsVLpQlshcsLppVKslILlSPThGSshLVpphLsshsp.-uEVISFSTYhGDTRhlDcp.....................tPp+VLTTuVK++lYlGSo..cusSpplp+LstlhsplsIplpshsoPLcAEsRNlSLYVHPPLFMNDFSLpAIFpsppsPtYVYKLaPEGPIT.slI+cMtthWcEhMplLp+hslpslNLL+FMsDDNYPV+spolsRtDIEsFspLssI+QEYLLYVRYTALLIDPFSpPDEpG+YFDFSAVPa++VapscpulhcIPRMPpEDYYRlthIQulAptLslssPplDphLppYEsulppahDsptcpphSsuF.spsFcpDhslIsppl..psps .........MSKlLMlGTGPVAIQLAslChL+uDacIDMVGRupoScKSKRLYQAYKK.-+pFEVKlQNEAHQaLEGKF-Is+LYKDVKsV+GEY-TVVhACTADAYYDsLQQLSLETLQuVKHVILISPTFGSpMlVcQFhSKhsp.DIEVISFSTYLGDTRIlDKE.....................sPN+VLTTGVKKKLYhGST....HSNSshspRIoALhEpLKIQLEVl-SPLHAETRNSSLYVHPPLFMNDFSLKAIFEG.TcVPVYVYKLFPEGPITMTLIREMRLMWKEMMsILpth+VPSVNLLpFMVKENYPVRPETLcEuDIE+FEhLPcIhQEYLLYVRYTAILIDPFSpPDEsG+YFDFSAVPFKpVYKNEQcVlpIPRMPSEDYYRTAMIQaIG+hLGlpTPMIDpFLsRYEuospuYpDhH.p-QpLSuQFssshFEtDtALVTKaLc...lNch.................. 0 6 11 13 +9933 PF10101 DUF2339 Predicted membrane protein (DUF2339) COGs, Finn RD, Sammut SJ anon COGs (COG5373) Family This domain, found in various hypothetical bacterial proteins, has no known function. 29.50 29.50 30.70 29.50 28.50 29.30 hmmbuild -o /dev/null HMM SEED 745 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.61 0.70 -13.65 0.70 -6.25 47 995 2009-01-15 18:05:59 2007-07-31 10:08:10 4 4 648 0 278 982 83 443.90 16 80.54 CHANGED lutNhhlplGhlsLhlGssFLl+Yuh-psh..ls.thRlshuhlsGluLlusG.hLRc+............t.shhuhsLpGGGhuslaholhu..AhphYpllssssAF.slhsllsshsssLAlhppu.hLAslGllGGassPlll.SoGsushhsLauYhsllshuhhuluhh+tWchLshhuhshohhhhhhhh....h..ht....h.hssspshhhlaah..lalh.lslhhs..hptt.th.........................hs...ssLlhussllshsLththsp............hhuh.u.lhhuhlhhsls.hhlh+p..thphhhhshs.......................................hlslulsFssLulsluh.ssphpsh.......hWAlpushh........th...........s..lhllsuhhhhhs....................p..hhsshh...lshsshsusahhhthpttttttt................hhhhhuhhhhhhu..hhhtlppt.....................hhhhhsh.shhhhhhh....h..hth.hhhh.shhhh.h................hhh.h...............hth.hhsa..........shshhhhhhhhhhhtt...............................thhhhhhh.hhh.shhhhhth...........shhpthlhshhhhshhhhhh....................h...........shhhhhhh.lhh.hhhhhhhts.hhhs.....slshhPlhN.L.l.hhh.shshlhhhhtp...............hhthshhshhuhhsh..lhlsh.lt+hhpt.thh....................htt.h....phuaSlhWhlhulshhhhuhp...+p.pchlhhsGhuLlulsllKlFllDhuslsslhRllSFlulGllLLsluhhYpphhs ....................................................hhhh.hhhuh..hh.h..t.....hs..h+h.hhhh.u.hhhhhu....h................................................h.s....h..uhuh..h.hh.hhh......h.......h..........h.....h.s..........h..hh.hh..hhuh....t..hs..h.s.....h.s........s..hh....................h..a..hh..h...h.............h...h.........h....h.....................................................................h.h.........h..h........................................................................................................hh...........................................hh..h.h.h.hhh.........t..............................................................h.hh.hhh.h..h.h..h.h.h........................................hh....h.shhh...........................................h.....h..h.....h.h.h........................................................h.............hhhhhhh...hhh..h.................................................................hh.hh..h.h.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.shhh.hh.uh..hhhhhu.....................hh..h....h......uhhhhhhshhKhhhh-h.t...h.+hh.ahhhG..hhhhu........h.................................................... 0 90 179 227 +9934 PF10102 DUF2341 Domain of unknown function (DUF2341) COGs, Finn RD, Sammut SJ anon COGs (COG5306) Family Members of this family are found in various bacterial proteins, including MotA/TolQ/ExbB proton channels and other transport proteins. The exact function of this set of domains has not, as yet, been determined. 23.40 23.40 23.50 23.50 23.00 23.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.15 0.72 -3.83 55 186 2009-01-15 18:05:59 2007-07-31 10:12:46 4 24 122 0 107 186 82 87.20 28 12.34 CHANGED scsuDlRFh....sssssp.LsYWlEphcs.....ssp.pAhlWV+lsth.......ssssIhlYYGNssAsssususss......F..hFDsFps...............shaphspssusss ...........suuDlRFs......sscpss..LsaalEpacs......tss..hAllWV+lPpl.s........spsslahYYG..N.s.....s.Ass..s..u..supss..........................FDs.ps................haphstt....s................................ 0 33 58 85 +9935 PF10103 DUF2342 Uncharacterised conserved protein (DUF2342) COGs, Finn RD, Sammut SJ anon COGs (COG5282) Family Members of this family of bacterial hypothetical and uncharacterised proteins have no known function. 25.00 25.00 30.50 27.60 21.90 24.50 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.82 0.70 -5.48 44 734 2009-01-15 18:05:59 2007-07-31 10:13:14 4 2 468 1 237 622 191 326.50 32 80.26 CHANGED V.sWcl.AtphApphs..us.......ssssspsststltpuschA-hhlcssTsLsssss......psplhsRscWlcsshssapplscPlspphspthts.....................................hhstluGthhGsQlGtsLuhLuscVLuphDhslshs...............ssGphhLVssNlsphtctLsls.c-hRlalsl+EssHpt.FsusPWLpsalhstlcpaupslshD..s....splpchscphp.s...sspphpph......tuhhcs.postQ+tALsRLpslhuLlEGas-hVhsssssthlPususlpcphccRRt.suGPs-phhtpLlGL-h+.+phccutshhctVsspsGhcstssVWscPDhLPsss-l-sPtsal ..........................................VsashAtphApphs....us.............s...sss.....sttttstltcAschA-halcssTtl.....sssss.....................pstshsRspWlcsshssappls......sPlspp..hstshss.hs.t..............................................hhpthuuhhhGsQlGpuLu.LusplluphDhulsls.............................ssuphsLlssNltshtpsL.clssp-htlalsl+EsAHpp.FstVPWLtsplhutlptaupshplDh.............stlc-hhpphss..............sPp...t.h......pphhp...............tsl.hpshpTstQ.ctALpRL-sLhuLlEGas-tVhstsssshlPusstlpEthcRRRA..su......GP...s.......EphhtsLlGL-h+s+ph+-ussha.ctlspt.....h......GhcutstlWscPDhLPsss-l-sPttal.................. 0 72 179 222 +9936 PF10104 Brr6_like_C_C DUF2343; Di-sulfide bridge nucleocytoplasmic transport domain COGs, KOGS, Finn RD, Sammut SJ, Coggill PC anon COGs (COG5085), KOGs (KOG4503) Family Brr6_like_C_C is the highly conserved C-terminal region of a group of proteins found in fungi. It carries four highly conserved cysteine residues. It is suggested that members of the family interact with each other via di-sulfide bridges to form a complex which is involved in nucleocytoplasmic transport [1]. Brr6 in yeast is an essential integral membrane protein of the NE-ER, wit two predicted transmembrane domains, and is a dosage suppressor of Apq12, Pfam:PF12716 [2]. 25.00 25.00 31.90 31.00 21.30 20.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.86 0.71 -4.65 41 216 2009-12-22 12:56:39 2007-07-31 10:13:49 4 3 159 0 165 210 0 130.40 30 33.73 CHANGED lstYLQLhhNhllhslllallhthhpsl+pDlpp+hcppptplhtEhppCpcpYhpNpC.s....spp.lPAL-ptCppWccCM......sp...Dsphh.spsph.oAcslucIlNuFlcsloaKohlhllhhhhhhlhssNhshG.hR ............ttYlQLhhNhhlsslhlallht.hlhsl+pDlppphcptptclhtEhstCpcpYhpNcC..ts...spc.lPAL-phCspWccCM......sp.....DPstl...tpsp...h.oAchlA-IlNuFl-slSaKohh....hlhhhlhshhhsoNhshu.hR.............. 0 49 97 145 +9937 PF10105 DUF2344 Uncharacterized protein conserved in bacteria (DUF2344) COGs, Finn RD, Sammut SJ anon COGs (COG5011) Family This domain, found in various hypothetical bacterial proteins and Radical Sam domain proteins, has no known function. 28.90 28.90 29.30 29.60 28.20 28.40 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.99 0.71 -4.80 74 639 2009-09-14 16:02:11 2007-07-31 10:14:24 4 5 634 0 218 549 66 181.20 28 45.89 CHANGED RlRl+auKpGph+alSHLDlhRhh-RAlRRuslPluaStGFpP+P+lohAsALslGspSpuEahDl-L....pp..sls.scphhppLstsLPtGlcllpsppl........hpssulsshlssupYplpl...........ss.pphpp..tlcphlstcpl.hp+p.s.K+G+p......phDlRstlhplplpstt...............h.l.h.thhpsssssl+P .................................+lRl+asKpGph+FlSHLDhhRhFpRAlRRAplPluaSp.G.Fs.PHP+lSa.AsALslGssSpuEYh..-lpL................p.c....sh.s.s...p.....c.......lhppLspt.hPs.Glcllcstch........tptssh.ts...hlsuupYtlph.......................ths.tphtp....tlps.hhst.c.p.l.l.p+to......K.pGp+.............phDl+shl.hp.lphtttp....................l.h.................................................................................... 0 113 185 210 +9938 PF10106 DUF2345 Uncharacterized protein conserved in bacteria (DUF2345) COGs, Finn RD, Sammut SJ anon COGs (COG4253) Family Members of this family are found in various bacterial hypothetical proteins, as well as Rhs element Vgr proteins. 25.50 25.50 25.50 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.78 0.71 -4.62 93 1108 2009-01-15 18:05:59 2007-07-31 10:15:10 4 20 365 0 273 1206 46 145.80 28 19.47 CHANGED usssssssth.thspPhllluuPAGIuhsTspohphsAupp.lslsuGpsspluuGpshhssuucslSLFA...........ppsGl+LhAupGplplQAp.s-slplhApcslplsS......spsclplsAppclhLsuGGu.al+lp.uGsIphssPGphplKuAs+shsGPsu ..........................................................................t.....ttthsthtpshllhuuPtGluhsosps....h..phsA.upp..lthsuG.pshsl.ussps.hhhssu.pslolFu......................pptG.......l+lhAu.pG.......plplQAQ.s-.shpl.hA.pcs..lplsS.................s.psclpl...s.Apcc.IhLsuGGu.aIpl.p..susIphsss.GphhhKuuth.h.Gst.................................. 0 26 89 172 +9939 PF10107 Endonuc_Holl Endonuclease related to archaeal Holliday junction resolvase COGs, Finn RD, Sammut SJ anon COGs (COG4741) Family This domain is found in various predicted bacterial endonucleases which are distantly related to archaeal Holliday junction resolvases. 19.40 19.40 19.50 72.80 19.00 19.30 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.79 0.71 -4.53 15 76 2012-10-11 20:44:46 2007-07-31 10:32:13 4 2 76 0 29 67 31 142.10 41 87.63 CHANGED lLllsIlsLhhphhplptch-p+tpphacpWpp..tt............ct....ct-sthppWppccEcclR+-uVp+S+uVltG+loEQLsPaF..P-FcYsPpDARFLGoPVDaIVFcGLo-......G.shpcIVFVEVKTGKsupLocRE+tlR-sI-sG+VpaEll+hc ............................................................................................pphhuclch.+pptltpspKcuVspSRAVLpG+huEQhAPhL..PEFpY.PoDA+FlGsPVDaIlFDGho-.......u.cshpIlhl-VKSG.supLocsppuItpAIccG+VRaEslRl.... 0 11 19 26 +9940 PF10108 DNA_pol_B_exo2 Exon_PolB; Predicted 3'-5' exonuclease related to the exonuclease domain of PolB COGs, Finn RD, Sammut SJ anon COGs (COG3298) Family This domain is found in various prokaryotic 3'-5' exonucleases and hypothetical proteins. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.51 0.70 -4.80 36 607 2012-10-03 01:22:09 2007-07-31 10:32:51 4 3 575 0 155 1077 343 200.10 36 78.28 CHANGED s-FL.hhhH+lVuISsl.hc.....ctspF.+Vhsl...us.sts......EpEllppFachlc..chp.PpLVSaNGpuFDLPVLcYRAlhaulsAspaa-tG-........hKWN..NYhsRYpsc.HhDLhDlLutat.spAsssLDplAslhGhPGKhGhcGupVhchapsGclpc......I+cYCEoDVlNTYLlYLRaphhcGplot-sYtpplphlpshL..ppp....s.......ps.+..hptFLpsW ..........................................................-Fh.h..ha+IluIuslhhp..................s.spF....+Vts.l..........st....p..s............................Ecpllp...pF....a....ph....l-..........ca..p....P...p.....L.V..S.aNGpsFDhPlLhhR.....A..L......h.a...s.....l..s....As..p...a.a-.h.sp.........................................K.WN......NY.h.s..R....a......p..p....p.........H........hDL.....h-....h..L..u.....h.......a...t.......sp..t....shsLD..sl..s.p..h..hG..l...P...G...K.....h...s...hcGs....pV....h..c.h.a.h......p...s..c...lc.c.........I.c.sYCEoDVlNT..Yhl...aL+aplh+Gt.lstcp.Y...lt.hpphL...tp..pts..aht.ahts............................................................................................... 0 53 109 133 +9941 PF10109 FluMu_gp41 Mu-like prophage FluMu protein gp41 COGs, Finn RD, Sammut SJ, Bateman A anon Jackhmmer:Q1GH51.1(1-98) Family Members of this family of prokaryotic proteins include various gp41 proteins and related sequences [1]. 25.00 25.00 25.10 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.72 0.72 -4.06 114 1089 2009-01-15 18:05:59 2007-07-31 10:35:28 4 3 785 0 169 674 13 81.60 26 78.61 CHANGED slpL.ppP...hph.sG...pp.....l...splsl.RcPpstDhcshp.........t.sp....ssc....tp.....htl....luplsu..ls.c-lppLshsDasplptthsshht ......................lpLppP...lptst....pp.........l...spl..sL...+cP..s.stsl+usp..........................shts........-ss....th.............hpl......lsplos...lstp-l..sphch.sDhtsltttlhsFh.h.......................... 0 32 80 130 +9942 PF10110 GPDPase_memb Membrane domain of glycerophosphoryl diester phosphodiesterase COGs, Finn RD, Sammut SJ anon COGs (COG4781) Family Members of this family comprise the membrane domain of the prokaryotic enzyme glycerophosphoryl diester phosphodiesterase. 28.90 28.90 28.90 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.67 0.71 -4.78 31 1075 2009-09-11 03:27:55 2007-07-31 10:36:16 4 4 932 0 95 639 6 147.50 28 26.66 CHANGED llYhpFuhLllshhplhtpp.ho....hcplhpp.shpplpplpssshlahlhYhlLllPFu..slshposLLsKlpIPpFI....h-alhpshhh.hlhhhlhhllhhalulRLlasLPlhllppp.sh+pAl+pSWphT+p.phhphlhhhlllhhhlsh.l ......................................llahphuhLlhh.shthh.pcp.hhp....h+plh+p.shth.l++.lhshph.lahhhYh.h.....lllPhh..shhhp.os..llspIhIPpFI......hs..L...hc.shhh...hl.....shhhhhlll........h.....YlslRLlF.s.LP.hhllccp.shpcAh+hShphTK+....pha+llhhhlhlhhhlh..hh....................................... 0 31 64 77 +9943 PF10111 Glyco_tranf_2_2 GlycosTransf; Glycosyltransferase like family 2 COGs, Finn RD, Sammut SJ anon COGs (COG4092) Family Members of this family of prokaryotic proteins include putative glucosyltransferase, which are involved in bacterial capsule biosynthesis [1][2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.65 0.70 -5.32 12 436 2012-10-03 05:28:31 2007-07-31 10:37:56 4 22 388 0 82 19422 6862 205.20 17 53.59 CHANGED SlIIPlthutt...l.c+l...Ltptpph..s.shpllls-ssssh...ppltplhsp.tth.hlpppstpphhu.utsRNtusphu....pochlhFhDlDshhs.shhpphlp.....tlpps.sthhhlPshYLsppuspthhp..t.h.cpthhcshhs.ttshht........hu..o.ushlhp+chFltlGGaDEsFhGaGhEDhEhhhR...Lthhthth..scshhh.chtphshh.........pGFRtaFthhuhshh...thhhh.HhaHppsptpsYh..pppttNcthh.pph+.h ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...............h......................t...........u....h.........h...p...N.........h....u...h.....t.........s...................p.s..p....h.....l.h..h.h..D....s....D...h....h.......p.......p......h...t...p...hhp.................................h.t.....t....t...............................h........h....h.......h................h........h..........h..............s......t...........................t........s..........p......................h......h.......p.................................................................t................................h....................p........p.....h......h.....t...................t.......h...h.t.........................................................hh..s........h......o.....u.......s.....h......h.....h..p....+.p.pa.....h....p.....l......G...........G.......a...-..E...s...a............h....s......a.....G.......h.......E...D.h...D..h..h..h..R.........L..h.t..h..s...h.......h........................................................................................................................................................................................h........................................................................................................................................................................ 0 28 53 67 +9944 PF10112 Halogen_Hydrol 5-bromo-4-chloroindolyl phosphate hydrolysis protein COGs, Finn RD, Sammut SJ anon COGs (COG4915) Family Members of this family of prokaryotic proteins mediate the hydrolysis of 5-bromo-4-chloroindolyl phosphate bonds. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -10.96 0.71 -4.91 20 680 2009-01-15 18:05:59 2007-07-31 10:38:18 4 3 601 0 75 370 10 187.00 27 78.74 CHANGED M+hhhphll.hshlGhshhslshhsshhuhs.sFll.uhlluhuuhhssahsspphspt+.............alccpGLocc-YcYl+psLcEA+pKIppLpKshhpl+slpshcpsp-lh+lu+pIaptl+p-Pp+FacscpFaYp+LDslV-LoEKYshLsppPhKst-hppsLccTRhTlccLscslppDLpclls-Dl-sLDhEl-lAK .................................................................................................................................................................hu..hhhlh.hhhhh.............h.....sh.l..hhs.h.hh.h..h..h..h......h.pphtptK....................................ahpphGLo.cp.-.h..c.h...h+pplscs.+...pplptlpcshs.ph+sl+......s....h.c.plschhplu+uI..a.pplcpcP.pph.hcscpFhYs+lssslcLs-pYscltc..pshK.spphpppLcp.o.ctslDplpcsltsDhccLhp-Dh-cLDhEhclsK................ 0 24 48 61 +9945 PF10113 Fibrillarin_2 Fibrillarin-like archaeal protein COGs, Finn RD, Sammut SJ anon COGs (COG4018) Family Members of this family of proteins include archaeal fibrillarin homologs. 25.00 25.00 534.40 534.20 18.10 17.60 hmmbuild -o /dev/null HMM SEED 505 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.56 0.70 -6.16 5 30 2009-01-15 18:05:59 2007-07-31 10:39:21 4 1 29 0 19 29 1 505.70 56 99.96 CHANGED M+DLI+EAlNDLDAAhEL+KLhhp.......tEpslpEVVDAVsDLScEEtpKLGusFRRFPLGCDLlEIuVGPCASDLohsslLuNClLADpMGhPIHVCAYAlADIAEsaGM+PIELh+EVhENVEVPLDLDHFGRYGPMRFP+-ITuCtGsCYhEGPPFKGCPR-RIH+RLIDKE+EtusDhEEWlKLASSVCVNVTsEQG..A-sHAAPL-EMc-VAEsARKaGKGVEAIhHVGDGYDDLIoGlKAul.DlcVDVFVlEGGPFNRAcDRLcAFA+AVAluRILVPGKVVATNGAYEDECRVGLRAGLNuILTGFP+NHHGYMCGYSPGTARRGpFGLPRVL+IMKEEl.uc.slTtsPlsK-pLEALARAs+FLGsN...hVYPpcIGshYlGDAHW.AuLssoslac+s+lsKTV--l....tst.suDTVALLGGRFlSWuIAcKLDtl.V-EllISDsDPWVE+ATV+lLs-ELcsslaussGDD+KAlEpADsSlITTsIPpIut+IupKhsDAlT..Ll M+-LIK-AlN.DhDuAhELpKh..........t-pDllElVDAVsDLShE-h.KLGusF++FPLGCDLsElsVGsCASsLsLhplltNClLoDhhGhPIHlCAYAlADIAE+cGhpPlEVM+cVh-sV-VPLDLDHFG+aGPMRFPK-ITtChG-CYhpGPPacGCPR-RIHKRLI-KEKEtusEh--WlKLSSoVsVNlspEQG..uEsHAAPL-Ehc-VAchA+KaGKGlEuIhalGDGYDDLIoGlcAsl.DlsVDVFVlEGGPFNRAKDRLcAFAKAVAsSRILV.GtVVATNGAYEDECRlGLRSGLNsIloGFPtNHHGYMCGYoPGTARRGNFGLPRVh+IhKEEl.ts.slshs.lsK-.LcALApusKFLshp....IYPp.pIGsaalGDAHW.sslpsSslYcplp..lsKTl--l....tpt..ss-KVulLGGRalSWuIAccLc...s-ElhISDsDPWVE+sTV+lLs-s.shssassNGsD+cAhcpADpolIoohIPpIshKIpsKh..-Ahsl......... 0 5 9 15 +9946 PF10114 PocR Hist_Kin_Sens; Sensory domain found in PocR COGs, Finn RD, Sammut SJ, Anantharaman V anon COGs (COG4936) Domain PocR, a ligand binding domain, has a novel variant of the PAS-like Fold [1][2]. Evidence suggests that it binds small hydrocarbon derivatives such as 1,3-propanediol [1]. 20.70 20.70 20.70 20.80 20.50 20.60 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.01 0.71 -5.11 71 805 2012-10-04 01:10:46 2007-07-31 10:40:04 4 113 595 0 236 604 20 166.20 26 39.32 CHANGED lp-l.lDlptlQpl.-sFschs.tlshsllD.hcGpsl..tss..sapchC.chhRspspspcpChcSDtphu.puht...G.c.tlY+C.+sGlhDhAsPIhlsGpalGslhsGQhhhccps.-h.hhpptup.chGhsccp...altAhccVPhloccplpsshphhhplushluphuhpplphtp .....................................................................hls.chlpcltpsFupuT.slAhll.....Vc...hc.........Gp.........l...o..chs.......sa..s.sFC..phh..Rp..ps....p...tp.p.p.Cpp...s.Dtp.u.G.hc...As+..............ss...pP.hI.Y+C.HsG.Ls.DhulPlll.ss.p.hlGhl.ls...GQVh.h...p.s..s..sc.......t..h........h...p..h.........s........c........s....h.ptc........hhpt..hpplPhhshp.cltssuphLthlsp..hhptthp......t......................................................... 1 116 193 212 +9947 PF10115 HlyU Transcriptional activator HlyU COGs, Finn RD, Sammut SJ anon COGs (COG5453) Family This domain, found in various hypothetical prokaryotic proteins, has no known function. One of the sequences in this family corresponds to the transcriptional activator HlyU, indicating a possible similar role in other members. 21.30 21.30 21.30 21.40 20.80 21.20 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.09 0.72 -4.15 31 240 2009-01-15 18:05:59 2007-07-31 10:40:28 4 1 238 0 48 133 8 92.50 49 97.45 CHANGED M...uhhs+LF....Guuptst.......pscP....-YcGFhIhspPhpEuGQaRlAupIpKcl.sGEh.KsHcFIRuDllsoc--Ap-hsl+KA+hhIDQpG-plF ................MuhhsRLF.......Guspt.sEss........psEPh.....EYKGFhIhspPhu.EuGQaRVAG+IoK.pI..sGEl..KsHRFIRuDlhs.ucpDAt-.lhL+Kup.hIDp.GspIF.......... 0 9 23 33 +9948 PF10116 Host_attach Protein required for attachment to host cells COGs, Finn RD, Sammut SJ anon COGs (COG5622) Family Members of this family of bacterial proteins are required for the attachment of the bacterium to host cells [1][2]. 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.28 0.71 -3.95 79 395 2009-01-15 18:05:59 2007-07-31 10:41:06 4 4 363 0 166 378 47 128.90 23 82.09 CHANGED sallVADup+Aplhpspus.....tt.s.h.tltph.c.psp.tsp-lss.....D.psGc.hpstst..t..............ut.tps-h+phpccpFAcplAshLpc..ttppsch.ccLlllAsPphLGtLRppLppplpptl.hsElsKDLTp.t.....ssp-lpptl ..........................................hllVsDuppAhlhpstst.........th...h...ht...ph.p.pt...p.t..spphsss.psGp..t................................sh.tpss.....pchp..ccpFA+plAcpLpc..t.spp.tca.cc.LlllAsPp....hLGtlRppLsptlp.phl.ht-lsKDLsp.h.shpclpctl........................................ 0 49 98 125 +9949 PF10117 McrBC McrBC 5-methylcytosine restriction system component COGs, Finn RD, Sammut SJ anon COGs (COG4268) Family Members of this family of bacterial proteins modify the specificity of mcrB restriction by expanding the range of modified sequences restricted [1][2]. 20.90 20.90 21.10 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.94 0.70 -5.51 40 909 2012-10-11 20:44:46 2007-07-31 10:41:42 4 2 849 0 160 671 83 298.40 25 74.67 CHANGED l........+spsYVGhlphsss......plEILPK..hsppss................................................+phLhpMLp...........hstslp.hppsshsslpptch.sLhElhhhh..Flptlppll++Glhp-Yhphp-spphl+G+Lplsppl+pss..sppc+htscaD.Fs.Dps.NRll+suLchltphs............psssshc..hlpcltshh....p-lshsp.stpshpphp..hsRhhscYcslhthsclILpp..psshstpGcp.pshuLLFsMpplFEpYVu....thL+................tplssshplcsQsssha.Lssc.pspth......FpL+PDl..llpppspt...............................hllDTKWKhlps.....tpphu..lupu.DhYQhhAYuptYpssput......LlYP ...............................................................................................................................................................................................................................htshhGhlt.h....t..t........lpl.s+hs..t..........................................................................p.hL.hhhLp...........................................h..hshh...hp.h...s.p..h.ss..h..tppc...pLhp.lh.hhh...F.ctl.pt.hl+..+G...Lh+-Yh..php.cs...s.h.l+Gclplp.pp.l.+p..sh..shp..tp.h..t..sphcEaohDsshNpll+ps.l.phlh.ppp...........................p.sp.pphp........plhc.l.h.ha................pt.l.s...h.h...p......h..p...ts..h.....sphh........hpphh....ppYctl.hphshhlLpt......tp..s...h..t...t..up...p......c.....h..h...u....h....L......F...s...hpt....LaEcaVh...........phLp...................thh..t.s..p....p.s..p...s.p...s...t..sha......hlsp......................h..plp..PDh....hh..c..p.p..t....................................lllDsK.a..Kthp...............spps...lsps..DL.aQlhuYsht......hpttps...........LlaP....................................................................... 0 51 99 130 +9950 PF10118 Metal_hydrol Predicted metal-dependent hydrolase COGs, Finn RD, Sammut SJ anon COGs (COG3687) Family Members of this family of proteins comprise various bacterial transition metal-dependent hydrolases. 30.00 30.00 30.10 34.80 27.30 29.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -12.03 0.70 -4.93 54 983 2009-01-15 18:05:59 2007-07-31 10:42:16 4 4 332 0 205 849 96 251.60 30 85.30 CHANGED lpsRclcF-hss..hs..taWhss-PhsTphhNuLSlhhPtGEpaFlcul+phpstl.....pDscLpp-lcuFIuQEAhHu+pHpthNp..tl.ptpGhs.lpt.h-thhphhhphhtc................hs.htpLAhssAlEHaTAlluchlLp.p.phh.tusDPphtcLacWHuhEEsEHKuVAaDlattlsu......pYhhRstshhhsohhhhhhhhtssth..hlppD.........tthhp.h+.shhcsh..phhh.u.t........Ghhp..thhsshl.sYac..PuFHPhpc....ss ....................sRclcFshss....hs..hpW.h.ss.sshhTHhhs.sLShlFPsGEcaFlcosRphp...spI.....pD.s....p.L+pclpuFIGQEAhHo+tHpthNcth..pptGhs...sct....hc....phth...hh...hp.hhpc..............hhshhhpLuhssAlEHaTAh....luchlLs...s.ph............tssDsp.htsLahWHuhEEsEHKuVAaDVaptltu.........sYhhRhtshlh.shs....hhh.sh.hh.h.hshh....hhppD..........tthhp..hc..shhchh..phhh....t...........uh.hp...thhtphh.sYh+..PsFHPtpps.t........................................................................ 0 53 112 163 +9951 PF10119 MethyTransf_Reg Predicted methyltransferase regulatory domain COGs, Finn RD, Sammut SJ anon COGs (COG4797) Family Members of this family of domains are found in various prokaryotic methyltransferases, where they regulate the activity of the methyltransferase domain. 25.50 25.50 26.60 28.10 22.80 25.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.75 0.72 -3.49 20 365 2009-01-15 18:05:59 2007-07-31 10:42:41 4 17 288 0 96 333 26 85.60 31 15.60 CHANGED ppsctYLhHEYLpthNpPhYat-FssphuptsLpYlu-ushtcshsthhhspst..pthltthscshhp..EQahDFhssRpFRpoLls+ ......................tsctYltHEYlpt.NsshYapcFhcph.pcpp....LsYluDsslpsp.a.sshhsp.pst..pph..lptss.cshhp...EpYhDFlssRpFRpoLls+..................... 0 29 55 72 +9952 PF10120 Aldolase_2 MethylPyrKinase; Putative aldolase COGs, Finn RD, Sammut SJ anon COGs (COG1992) Family Members of this family of archaeal and bacterial proteins are likely to be aldolases. 25.00 25.00 27.20 49.10 22.70 23.20 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.86 0.71 -4.88 52 250 2012-10-03 06:25:16 2007-07-31 10:43:11 4 10 148 6 156 245 49 164.60 30 46.24 CHANGED cplpcAlphltppst.hspLIPEVtoNluhuls...A+shpDVAulsGR.Isphts+stssusspFGuSpHlARllLsshchsschRuslNl+Ysccllcth....cchGhclsph-RppEPppsp.....oh.ahlcpshcph..sphPDl..IaDpGshGKEPhlhlhGcssh-Vlcplhpl ..........................................plcpAlph.l.tptpt.hhtLIPEVtoNluhuhs...AcshpDVAulsGR.Ih.p........h.tspshssusspFGuSpHlA+hlLsshchsPpl.RushNl+Ysccllcth....cp.hGhplsphccpp-Pppsp.....sh.ahlcpsh........pph..........sp..............sPDl..lh-pGshGpEPh.lhlhGcssh-Vlc+lhp............................. 0 38 83 123 +9954 PF10122 Mu-like_Com Mu-like prophage protein Com COGs, Finn RD, Sammut SJ anon COGs (COG4416) Family Members of this family of proteins comprise the translational regulator of mom [1][2]. 22.90 22.90 22.90 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.96 0.72 -4.91 6 135 2009-01-15 18:05:59 2007-07-31 10:43:53 4 2 114 0 27 112 2 45.50 40 68.66 CHANGED Mh+slRCGHCpKLLARhsshspLpIKCPRCtTLNHV+AsShp.p.Pp.p.pc .........+slRCspCsKLLu+h......s.shs...plpIKCPRCtslNp.................pt................. 0 10 15 22 +9955 PF10123 Mu-like_Pro Mu-like prophage I protein COGs, Finn RD, Sammut SJ anon COGs (COG4388) Family Members of this family of proteins comprise various viral Mu-like prophage I proteins. 21.90 21.90 22.10 22.40 21.70 21.60 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.02 0.70 -4.98 21 423 2009-01-15 18:05:59 2007-07-31 10:44:10 4 6 304 0 68 385 21 247.10 23 86.85 CHANGED lpLlPtGpFpupDGRs.p......ssWhlssssuttllsphs...ttttclsIDYEHQTlh.ttppGpPA...PAAGW..lcplpaRssc...GLa.ucVcWTscAtphIcscEY+YlSPVFhaDpssGplhplhsAALTNpPuLssht..tlsAlss...............t.s..pcpp.sphlctLhthLGLsssuss-t..............hh...........sthsssttsshsshtsshsthsttptphsshss..t..........sDhupalPlsshptl.......psclssLpsphsshph....-thlssAlp-G+lhPup.+sWuppLuppss..suLpsalssssslsALsupQT....h.sssststsusLos--hulsctLGlot--ahK ...............................................h.P....G......h....s.....pu..p...........h.hs..t.htth.h.ttht...t......t...sl.h.lDa-Hpshh.........st.s.......sA.u.GW....hp..t..h.hpts......u.l.h.sp.s.c...as.pu.tthltstpatalSsl.h....h...st.t........G...th....h......p.....lh.........sAL..TNp..Psl.th.......lh..shtt..................................t..t..p.....p..hpt.lhthh.s....l....ssstt..............................hh.................t..t........t.t.....t........t.......t.........................................................................s.t.hh..h.t....t.h.pt.l..........ttph...t...shp...t...p....t...ttth..........pthlptAhppG+.l.h..u.....ctah.thstp.....tthpshl..pt.t......s..shst.p......................t..................................................................................................... 0 22 44 57 +9956 PF10124 Mu-like_gpT Mu-like prophage major head subunit gpT COGs, Finn RD, Sammut SJ anon COGs (COG4397) Family Members of this family of proteins comprise various caudoviral prophage proteins, including the Mu-like prophage major head subunit gpT. 20.10 20.10 20.30 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.92 0.70 -5.44 23 696 2009-01-15 18:05:59 2007-07-31 10:44:45 4 10 447 0 86 552 204 140.20 27 35.41 CHANGED LpuLhsuFpssFpcGls.tAsopasplAhhVPSootsssYGWLGphPphREWl.GcRllpslpuaGYsIpNKsFEoTVuVsRsDIEDDplGlYuPlhpEhGcuAutaPDELlFuLLKsGFoThCYDGQsFFDTDHPV.spssGTsssVusss.............ussGssWFLLDsSRslKPlIaQpR+thpFsshscss.D-pVFhpscahYGVDuRsNVGFGFWQhAauSKp.sLsusNatuAhpAMpuh+uDuG+.LGI.....+PopLVVPPsLEpsAccllps-...hssuGu..o......Nsa+s.sscllVsPaL ................................................................................................................................................................................................................................................................................................................................pt..hs..htsh.tAhphMt.h.p....t.-.st+......LsI.....pPs..hllVPsuhEshApp.llput......htsus..............Nshps.hhplls.s..th............................. 2 32 59 73 +9957 PF10125 NADHdeh_related NADH dehydrogenase I, subunit N related protein COGs, Finn RD, Sammut SJ anon COGs (COG4078) Family This family comprises a set of NADH dehydrogenase I, subunit N related proteins found in archaea. Their exact function, has not, as yet, been determined. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.20 0.70 -5.18 8 32 2012-10-02 00:39:38 2007-07-31 10:44:57 4 1 32 0 21 166 53 219.20 57 99.39 CHANGED Mh...o..IuGplLslIPFGDIVhYhSsaohlhFlsAlhFTlllhlo+PE+QlEhpaGp.u.+hcpVsscEhRh+RFMAIlCGlATsGAhlTGDlFNFoLFlAlIGIsNIGIV.oAV+pcaVLsAAFpYGllAMlATLPLFGGAAlILAsTGTLSIaELuth...uts.hllauKlLLslGVlGEsGlAPFYAAKAEhFRAPGAPYILMIHLSSLLlhlRslEILLol .................................................................h....tsluGphhGhlPhGDIVhYhTsFollhFlsAllFThllhloKPE+QlEAphtchGs+hphVshc.EhKIRRF...MA..I...l...C..GlA.T.A..GA.M.l.TGDLFsFsLF..lAlIGIsNIGIV..SAV......K.p.c.a.VLNAAFpYGlIAMlu.oLPLFGuAAllLApTGTLS...laELu.ph........sss.....llat.K...l...Lh.slGhh..G.EoGlAPFY..AAKAEMFR.APGuPYI.LMIHLSSLLlIlRslEILLol................................................................ 0 5 11 17 +9958 PF10126 Nit_Regul_Hom Uncharacterized protein, homolog of nitrogen regulatory protein PII COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4075) Domain This domain, found in various hypothetical archaeal proteins, has no known function. It is distantly similar to the nitrogen regulatory protein PII. 22.90 22.90 22.90 74.90 22.30 22.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.23 0.72 -4.29 6 33 2012-10-01 21:59:08 2007-07-31 10:45:06 4 1 25 0 24 32 0 108.20 47 95.18 CHANGED M...........................................+lhl+LFVEsENlGKshNhLo-sGITGFYlhEY+GhSPscWKGFplc..EDPEhAIchlpDhSccAVlIsTVVsEEplc+IcchlcE+Lss-RYTlIclPlppIcVsts .....MKlLl+LFVEs-NlGKAINALoEuGITGFaLhEY+GMSPp-W+GFhlp..EDPEhAlcllp-hSpsAVlIsTVVsc-pltcIcchlcE+Lps-+YTllElPlhsIpVNt... 0 5 10 18 +9959 PF10127 Nuc-transf Predicted nucleotidyltransferase COGs, Finn RD, Sammut SJ anon COGs (COG3541) Domain Members of this family of bacterial proteins catalyze the transfer of nucleotide residues from nucleoside diphosphates or triphosphates into dimer or polymer forms. 21.10 21.10 21.10 21.30 20.90 21.00 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.69 0.70 -5.03 47 840 2012-10-02 22:47:23 2007-07-31 10:45:28 4 4 657 0 203 769 31 217.10 25 84.92 CHANGED M+ppl.pcLpclEpcp....sl.........+lLaAsESGSRAWGFsSs..DSDYDV..RFlYlcsh-aYlul.......p.t..+DVIEhslscpLD.lsGW-L+KsLpLht+uNPsllEWLpSPll..YhpssshhpplpthspchFsscpshaHYluhAppsh+pa....Lpsc..pl+..........hKKYhYlLRPlLuspWlpptts.hPPhpattLhtth...ssslhsclspLLshKpputE.hphhst..hshlcsalcpp...lcphpphspt.pp........schptLsphhhc ............................................................................h.th.tp........th...........pllauspSGSRuaGhso.......-SD...h..Dl....Rh.lal.ps.h.cha.l.pl..................................pt.....c-sl....E...h...s...h...s...c...p......hD..lssa-l+KhlpLhtpuNPsllEhL.tSPhh....h..p..p..t.p..h..ht...th..h.th..s....thF...s.pts..h.h+Y...huhAp..pph+ph..............hpsp.....ts+......................................................hK+hhalL.Rs.llssphl.ttt....s....h......Ph...................t.........hhtphp.....tLLthKptt.p.ht..........h.....lp...t.....hlptp.......ht.ht......s.h.pt......sht.lsphhh................................................................................................................................................................................................ 0 79 146 177 +9960 PF10128 OpcA_G6PD_assem Glucose-6-phosphate dehydrogenase subunit COGs, Finn RD, Sammut SJ anon COGs (COG3429) Family Members of this family are found in various prokaryotic OpcA and glucose-6-phosphate dehydrogenase proteins. The exact function of the domain is, as yet, unknown. 21.30 21.30 42.30 41.80 20.90 20.60 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.75 0.70 -5.49 65 561 2009-01-15 18:05:59 2007-07-31 10:47:06 4 3 535 0 172 439 195 257.90 38 76.06 CHANGED sssshSpp+PsRlIsl...shssspss......................................L-ApVps........GussusuEhllLRssussh.pcstulVhPLLlPDhPlhlWWsus.sPssshhc.LuphupRhIsDosthss.PhtsLpthtp.....shssussDLsWsRLosWRphLAtshDtPspcs....lsplslsst..ss.....sP.suhLlAuWLAsRLshslppttstt......................................slhulcLtsp.......ssslhLs.stsut.shplptsGtss......pslslscpsspphLAc-Lc..RhssD.lYtpul ..............................hAscAS+cHPsRlIsl......spssscss..........+........................................................LDApl+s............Gu-..uGsuEhlVLRhpGshu..p+ssSVVhPLLLPDsPVVsWWPss.u..P..s..ssu..pD..slGtlApRRIT.Dus.ss...pc..PhpsLtp..h.ts.......sassGDoDLuWsRlTsWRulLAuAlDQsPtp......lpus..pVput.....ss..................sP.sstLLAuWLutRLssPVp+ssstss........................................................ulhslpLsps..........suslhlpR.s.sup...hAslph..sGpss.............phluLs+Rs...st-hLuEELR..RLcPDclYstul.................................................................................. 0 53 128 162 +9961 PF10129 OpgC_C OpgC protein COGs, Finn RD, Sammut SJ, Bateman A anon COGs (COG4645) Family This domain, found in various hypothetical and OpgC prokaryotic proteins. It is likely to act as an acyltransferase enzyme. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.37 0.70 -5.77 62 563 2012-10-02 17:00:17 2007-07-31 10:47:48 4 3 419 0 168 593 70 345.30 29 88.01 CHANGED RDhR.lDhaRGlALhhIhlsHl.....Psshhp.hlTh+saGFSDAAElFValSGhssuhsY.uphht.....cpGhhsust+lh+RsaplYhAalhlhhhhlshhuhhuhhh...psphlpph.....................slthhhppPht..slhshlhLpapPshhslLPhYlllhhhhPhllhhhhpt.sthslusShsLahsAthht..........hslssaP...sshtWaFNPauWQlLFVhGhhhuhthtpt....thh...hpthlhhlAhuhllhu.hhhthhhhh..h.t.h.......s.hh.thhhs..hsKopLu.hRllHhLALualssphls.tshthhp............t.hhpslhhlGppSL.VFssGshLuhhsphlhtth.ss....shhhp...hll.shsGlulhhulAhhhphh+p .......................................................................Rphc.lDhaRGLsLlhIhlsHl..........sssl...lp.....hh.....T...h+.....s..a.....Gh.s.....D.......A.....A..EsFVFlSGhssuhsY.uph..hh.....c.t.sh.hsus...t+hh+RuhplYhsalhhhhlhls.hhs.....h.......hsh......h.hh.......ps.hltph.....................sl.sshhppPht......slht.h.l..hLphpPshhsl...LPhYll.....hhlhh.PlhLhlhppp...h...hhLuhS.....sslWhh.uthht....................h..shsshs..ss.hW....a.FNPhuW.Qh....LFshGhhsshtstttt.....hh......hp.....h.lh.hl.usu.hl..l.hs..hhht......hhhhhs.h.................hh...h.hhs....sKspLuhhRll.phlAluaLhsthl.....hh.thhh............shhhcslshlGcpSL.lFshGshluhhs.p.hhhhhs..ss.........shhhshhlshsulshhhhhAhhhph...t........................................................................................................................................... 0 30 79 113 +9962 PF10130 PIN_2 PIN domain COGs, Finn RD, Sammut SJ anon COGs (COG5378) Domain Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases). 21.90 21.90 22.00 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.69 0.71 -4.12 15 82 2012-10-03 20:43:45 2007-07-31 10:51:19 4 1 55 0 44 124 12 122.10 24 90.15 CHANGED VlDANIlluulLt................................cs+phhlhhsttlchassptshs....Elp+ahstlspppplstpphtshLp....tLhptIpllscslap.phpppAccRhtchDscDaPhlALuLhLsusIWT-D+DFhssGlso.................WTocplphhLs ...................................llDsNllhusllt................................t.hh.hhh...h.....ttphph.hss.phhht....Elccahttl...h....c....t.pp..l..st...cpht....p.hlp....hLhp..h.lplls..pphht..phhppAhphhtp..h..D....c..DhPalALuLtl.....ss...l.W.TtDpch.....................................p......................... 0 15 36 40 +9963 PF10131 PTPS_related 6-pyruvoyl-tetrahydropterin synthase related domain; membrane protein COGs, Finn RD, Sammut SJ anon COGs (COG5617) Family This domain is found in various bacterial hypothetical membrane proteins, as well as in tetratricopeptide TPR_2 repeat protein. The exact function of the domain has not, as yet, been established. 22.90 22.90 23.10 23.30 22.80 22.50 hmmbuild -o /dev/null HMM SEED 616 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.99 0.70 -6.37 10 172 2012-10-03 03:08:05 2007-07-31 10:51:56 4 6 156 0 45 127 60 380.40 25 62.59 CHANGED hRYauPLuYalhAsLphLs.GslhpuatlhlhLshhlGAlu.Whhas++pGp..hluhlsulLalhhPtpl+lhahEGNlPpsss......huLsPhll.lhLahhhc++chRsllshsLhhsllsLoHhMhuAlssluhhlh....lLlhtlh.pp+hlpplhulluhslGls.............huSaWllPhL..+sGlssh-ssusutVhcshohshosuLsPhh......t.uhaYaGlulsllhluGlhhsp+++ssu....................hlssllhllhohsuslsllhpLPlspLlh.hRFsshA..huhllsuhhhhtph+K.........hhslhslhlul...LllDossshphlsassscstpstppl..pptcshscpRlull...DtoshGohsoaal...sshsspsplhGWsaQGusTupsIhhLNpAL-s.......tYatahFDsshphsspolllp+tllocpt..hptlcpss......thsGYphhpcsscthlYp.....ps...sspsatshsphhGlsIG+suh-lsLpasthcsussshl-....Dholp-Lp..+acllYLouhsh+scuphEt.................................................llh+lu+sGs...............pVll-hsch..p..scpstFLG............Vpupshshcs.chpshhasspchsssh.FtpcttpWpsshhsssp..hpshh.lsc+tlthhh+stpss........................plYFlGhNLhaH .......................................h...sahhhshLhhlh..u....hhu...h...hh.h...hhs.shl.u.sh.....u..hY.....hhu.+h.h....sh.p+ptuhLhulLYhhuhYphps.las.RssluEhlA........huhhPh.lL..h.sh...a.hl.h.p.ct..p...p+.h.hhhLsL..uMohlshu.H....hlohh.hs.s...lhlhla.......llh.h.h........pp......p.....hht..l....h..h..h.h.u...h..shu..ls....................................................hh..aLhPhl............hsss....pp...hp...s.........p..lac.s.h..s....p.sh.s..hh.p..h.......................sh.h.h....l.GLl.lhl..lt..L..hu.hh...hp..t..++.hlh......................lshl.lh.Lh..so.o..h.hPh....h...h....lp.....p..hPhpp..l...Q.F.....P..aRF....lslh.lh..huhh...lu..s....h....t.lh++..................hhhhhllhLsl...........sls.h.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p...pp.thhhhtt........................................................................................................... 0 20 30 36 +9965 PF10133 RNA_bind_2 Predicted RNA-binding protein COGs, Finn RD, Sammut SJ anon COGs (COG1532) Family Members of this family of bacterial proteins are thought to have RNA-binding properties, however, their exact function has not, as yet, been defined. 20.60 20.60 22.10 29.20 19.70 19.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.76 0.72 -3.94 20 129 2009-01-15 18:05:59 2007-07-31 10:56:29 4 2 114 0 73 120 3 60.10 34 92.39 CHANGED MCEuNlYL.hpsspcELlMEsVshlcsps-sl...hLpsIhG-pKhlcu.pIKclsLscH+Illcc ..MCEussYl..hps.spccllMEsVsplpsps.s.p.l...hLpDIhGcpKhlcu.cIcclsLhsH+Illc........ 0 32 55 67 +9966 PF10134 RPA Replication initiator protein A COGs, Finn RD, Sammut SJ anon COGs (COG5534) Family Members of this family of bacterial proteins are single-stranded DNA binding proteins that are involved in DNA replication, repair and recombination. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.42 0.70 -5.08 32 404 2012-10-04 14:01:12 2007-07-31 10:56:48 4 2 282 0 162 499 117 222.40 33 63.88 CHANGED +DshssM-aPlFSLupp...chp..slpYctss..splpVpsusc..GhATIaDtDILIassSQlhcA.......tstG....cs.o...Rhl+hTsa-lLthssRsTuGcsYpRLctAh-RLpuTsI.....pTslc.sssp..cppcsF.ulIsp.aphl..sc.t..sGRh...hulElpLs-WlYculh...stp.VLTls.-YF.cLppsLERhlYcLA...RKH.sGcQst.WphslppLpcKSGSsush+cFptplRcllpsss...LP-Ypl ............................................................+Dt.shMtaPhFSLuKp....tcht......slcactss..........hplcVp.....us...sch..Gh..ATIa.DtDlL..Ias.sSpls-A..............pstG.....t..u......Rhhpho.s.a-lL.p.hhsR...s.s.uuc.sYpcLcsALcRLpu...Tsl................p.T.sIc....pssp........pc...pcpF..uhIsc..ac.h.........scp.........sG+h........hsl-.lhLs-Whapulh.....psh...VLT..lc...s.Y.F..pL.pts..l-RhLY+ls.....R.KH...sG.c.p...tt.........W..phshppLh..pKoGSh.u....hp...cFth.lRtlhttp.......LPsYt.......................................................................................... 0 34 110 139 +9967 PF10135 Rod-binding Rod binding protein COGs, Finn RD, Sammut SJ anon COGs (COG3951) Family Members of this family are involved in the assembly of the prokaryotic flagellar rod. 21.00 21.00 21.10 21.10 20.90 20.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.28 0.72 -3.96 174 1787 2009-01-15 18:05:59 2007-07-31 10:57:32 4 5 1578 0 421 1174 304 49.40 36 20.68 CHANGED KuMRpus........ps....shh.su..s.sschapsMhDpphApphu.pp.s.GlGLA-hlhcQ ..............................KoMRcu.s.....c-....ulF...su.............p.pschapuMhDpQlAp.pho.pt..t..Gl...GLA-hlhcQ............ 0 114 241 325 +9968 PF10136 SpecificRecomb Site-specific recombinase COGs, Finn RD, Sammut SJ anon COGs (COG4389) Family Members of this family of bacterial proteins are found in various putative site-specific recombinase transmembrane proteins. 20.40 20.40 20.70 52.80 19.90 20.30 hmmbuild -o /dev/null HMM SEED 644 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.14 0.70 -6.48 31 317 2009-01-15 18:05:59 2007-07-31 10:57:55 4 2 305 0 84 286 27 626.70 36 93.99 CHANGED psshsschthLlcLhcWlR...........stt.ssstp...AhtRlphLlcsL-ppPshpsplpshltsllpphchhsLas-sGlhs+sGFhuElscRltt+lLPssP-ps-ls-LhsllFsctpDspWlsulscpthtRLhpLl..............sstscpph....ssphppsllsAlphLusplsuhGlsP-lhpph.sp...........................shcsoPFhsLpc-stshhsthtptppss...........chspLcshL-pCRsslsplap+hcps..GlSVsLsFpLcRl+ppLtRlctLLsllh.......ssstspthscLhspLlpsspc+pSlptLhssssphLAp+lsEpuucTGEHYITcs+pEYhtMh+uAAGGGhlhAhhshlKhhlsuLtLusFhcGlhsulNYuluFlLIahLHaTlATKQPAMTAsAlAuplcss..scsp...slcphV-.lspllRSQsAAlhGNlhlshPsAlhluhhhthhhGpPhloss+ApthLcols.h.usolhaAAhsGVhLFhSuLIAGahDNhhshc+lsptlthp.hLpthLGtptApRlAsahcpNluulsuNlsLGhhLGhsPslushFGLsLDlRHVshSouslGhAhsslGhpshphsthhhAlsGlslhGhlNlsVSFuLAhhlALRuRslphsstpplh+ulhpRlhppPhsFhhP .........................t...tpt.phLhtLhpaLR..................sst.pt......sttRlchllpsLcpsPphtsphuphlhthlsphchhsLhscsGlhu+puFhsEhspRlhp+hlPsh.-tspLtplFh.hl.F.sp.sDspWlpslspcphhpLhpLl.............stspppppt....ttphpcphLtAlphLohpluupuLpP-lhphh.sch...........................hpt-SPFlsLp+E...hhthhptht.pspts............................-sspLpVhl-QC+s.lsplp++spps..GsSlplsahLcRLcQpltRlchLlslhs....................tstsp.pthhpLhspLlpAstpppSlppLhcsshphLARploEpsucsGEHYIoRs+pEYhpMhtuAAGGGlllAhhshlKhhls.sh.t.lus.hhpulhsuhNYulGFhlIHhLHaTVATKQPAMTAushAppl-ps.......upst.....sh.p.clspLllslhRSQssAlhGNVhluhPlAhhlshuhsthhttsllssppAthpL+Sls.h.usolhaAAIAGVhLFsSGlIAGahDNhssatpltpRLthp.hL+thhu.tttp+hAsah+cNhuulsGNhhFGhhLGhssslGthhGLPLDIRHVsFSSuslGhAhsshsh......shshhhhulhuVhhIGllNlhVSFuLAlhlALRSRsl+hsph+tLhpslhpplhppPhshFhP.......................... 0 19 51 70 +9969 PF10137 TIR-like Predicted nucleotide-binding protein containing TIR-like domain COGs, Finn RD, Sammut SJ anon COGs (COG4271) Family Members of this family of bacterial nucleotide-binding proteins contain a TIR-like domain. Their exact function has not, as yet, been defined. 24.70 24.70 29.70 28.30 24.50 23.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.48 0.71 -4.50 28 258 2009-09-11 12:22:46 2007-07-31 10:58:20 4 8 238 0 79 235 14 122.30 31 42.51 CHANGED +VFIVHG.HD-sAKpcVu+Flc.pLGlcsIILHEQs.spGpTIIEKlEphus.VuFAlVLaTP-DlGsttspts........hp.RARQNVVFEhGahIGKLGRs+VssLl.KG.slEhPsDluGVVYsshDs.s.tuW .............+lFIlHG.+-p.thtp...p.ltphL.c.p.h.sh.cs.l....lhppps.......st...G....p.T...l...l.E...cl.Ep..t..s....sp....ssFAlllhTPDDhuhhpsptt.............................p.pARpN..VlFEhGhhhG+LGRp..+lhhL......h....cs...sl-..hPSDltGlshhphst..................................... 0 26 54 66 +9970 PF10138 vWA-TerF-like Tellurium_res; vWA found in TerF C terminus Anantharaman V anon Anantharaman V Domain vWA domain fused to TerD domain typified by the TerF protein [1]. Some times found as solos. 43.40 43.40 43.40 43.50 43.30 43.30 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.19 0.71 -4.49 77 225 2012-10-10 16:07:06 2007-07-31 10:58:34 4 3 174 0 64 274 6 199.20 36 54.93 CHANGED pA+VsLVLDtSGSMpshY+sGsVQclsERlLulAsphDD....D..Gsl-lahFuschpp...hs............slolsshps.al...........................cph.hs..s.h...........tt.hGt..ss.s.shccV.lspat..............s...ss...........................................PshV....................lFhTDGu..sp.cp.pt.hcchlp..cu..uphPlFWQFVGlGcs.p...............................FshLc+LDs...h...tsRhlD.NusF..Fslccls...plsDtcLY-pLL.sEFPsWlp.......tA+ttsll ................................................AtVsLVLDtSGSM..p...s.....Yp...c.G...s.VQ...p...lh-RllsLA...s...p...hD..D....D.G..plcVahFus...chcp..hs..............slTls.s....hp.s.al..........................pph.pss.....h...h...tthGt..ss.s...slMc-llcaat.pt.........s.ss.......h................................P..s..hVlFlTDGG..s..s..cp.pt..lcchlpcu.....uphPlFWpF....VGlGss.s....................ashLccLDs....h......t..s..RhlDNss..F..Fsh-chs.....plsD...pcLY-pLL.pEF..Wlpts+t.tl.h....................................... 0 14 37 55 +9971 PF10139 Virul_Fac Putative bacterial virulence factor COGs, Finn RD, Sammut SJ anon COGs (COG4458) Family Members of this family of prokaryotic proteins include various putative virulence factor effector proteins. Their exact function is, as yet, unknown. 25.00 25.00 29.00 28.00 18.30 18.10 hmmbuild -o /dev/null HMM SEED 854 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.33 0.70 -13.22 0.70 -6.61 14 467 2009-01-15 18:05:59 2007-07-31 10:59:05 4 3 251 0 78 343 25 401.00 27 97.56 CHANGED AI-WlsssRppA.RLspEADuLhhcLRRs+Npu+pLucsutpshslGhFG.SQAGKSaLlSsLAusppGcLpsphs..GcplDFlpclNP...u+EuTGLVTRFo.ppssss.........ssuaPlpLpLloEs-lAKIlsNuaFpDhspppst.hplscppIsshlpphpttppstsssGlos--VlsLhDYhppphspt.p..LpstaWspAhpLAPtLslc-RApLFSlLWGchspLTcsYhphApsLppLupApclhAPLusLV....tt.h.tucuIhsVssLttLsssss..ppltV+PhppGphussVolshApLshLssELhhslsp.st.shhEpVDLLDFPGhRsc.ph.t........t.sssspPluphhlRuKlAYLhERYTspQchssLllCssssppp-VsolutsLcpWlcpTQGEostpRupRpPGLlWAlThaDtRlssphsht............h.......................................................................c-uVpRalucPsc.uWsshLsLs-uGhpRlspaLsphsp.-hKhsRlpEQLsclt+cl..hcshhstaapssssschtcKpphuctlh+sLp...spsth.aGELLctLhssp-plR....pLYhp..............t...sttshssstsss.sstsssshDhFu-sssssss.....s..ss...ttsp-ppaApsVhchWlsHLRpLscsssLlphLGlstpslphLlcELlTuupRLcltppLtpsLtspcps.u.pp-phspRQVhpstslluDFluWLGatplstppR...PsSclpcGptlFstssp..ss.....tpLs+LutpPsstsshYlhDWLVuLtphshcNsGauAupEIospppptLusIL .................h................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 4 21 52 +9972 PF10140 YukC essB; WXG100 protein secretion system (Wss), protein YukC COGs, Finn RD, Sammut SJ, Desvaux M anon COGs (COG4499) Family Members of this family of proteins include predicted membrane proteins homologous to YukC in B. subtilis. The YukC protein family would participate to the formation of a translocon required for the secretion of WXG100 proteins (Pfam:PF06013) in monoderm bacteria, the WXG100 protein secretion system (Wss). This family includes EssB in Staphylococcus aureus. 25.60 25.60 26.10 26.20 24.70 25.50 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.02 0.70 -5.88 16 402 2009-09-10 16:26:00 2007-07-31 10:59:25 4 1 395 1 25 209 0 352.10 39 84.89 CHANGED php+ucl+hp-hhEhpllcphsPhFhc.plsts-DplhlshpsssphhsFppl+ppscp..pKl+huhs.Vtslpca.ppRlshllpP-NllFscsLpPthlHhGl+-slPPh-hs-EchLpphKshllslhstcasF--.Yhuhh-sh+hoshtKplhpucol-sLhtll.csa.cEp-ppp+shthVPK++a+l.Kalululhslll.hll.llYhhFhtpPhQ-shlpuspsFLsssYocVIosL-chsscphPpsspYpLAhSYlps-p...Ls-pQ+csI.hNslTLpSDcpYhLYWItlGRGph--AL-lA+pL-DspLIhaALsphh-pl+sDssLSGcERpccLcslps-lccY.h+ct ..........................t.lsKSpl+scchc.h..LL-pcusaFls.s.clsp.hcD...o.apIpYclscptps.F.-s..I..+phs+s..EKLRhLhN.ltsL.c.-h..po.RhTFhLtPDpLaFs+sthPhhtpRGlpsllsPhs.l.oEt-FLppYKALlIshFspK.sF-sLhpGsLElt+t..T..sFEppll-AtTL-.LsshL-EpYpKpcpchp.pNhshVpKhtaplFKalAluhssloVLLlshLua.hhF.hh.apE+l..cu.puFlKs.DYo.pVlssh--l-sccLsppuhY.hAhSYIpspK...Lpc-pK-sl.LNNlT.pSsccYLLYWh.lGpGch--AlsIAphLDDsslhhhALhpplpplKsNscLSs-cRscchKcYpccLp-hh-K.t................................... 0 6 12 19 +9973 PF10141 ssDNA-exonuc_C Single-strand DNA-specific exonuclease, C terminal domain COGs, Finn RD, Sammut SJ anon COGs (COG4199) Family Members of this set of prokaryotic domains are found in a set of single-strand DNA-specific exonucleases, including RecJ. Their exact function has not, as yet, been determined. 22.20 22.20 22.50 22.30 21.10 21.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.01 0.71 -4.80 25 1020 2009-01-15 18:05:59 2007-07-31 11:00:12 4 3 1017 0 98 691 27 186.00 30 25.15 CHANGED QLhDhRoKp......thhtlspshshh.t..tppht...................tpptshpsctlVlhDlPsshspLcplh.p......tpphppIYhhh.pccshYhsGhPoR-pFtphYphlhppscFcl+pchpcLupaLslscchlsFMlpVFhELsF.VoI-sGllslNppspKR-ls-SpsYQp+pcphch-cpLlYuohpElpcWl ........................................................................hs.Rs.p................t..t...hh.................................................t........p.tps....ctl..VlhDlPps.hppLcslh.p......ppphsplYhhh.....phs.....c.....s.hY.l....sGh..soR-pFs+lYKhl...h..p..h...-.h.sl..+p..c..h.ppL..upaLs..lppphLhhMlpVFhELtF..VT..IcD....Gl..lplNp.ps..sKR..sls-SplYQphppplcspchhhhushp-lhsal...................................................... 0 23 49 75 +9974 PF10142 PhoPQ_related PhoPQ-activated pathogenicity-related protein COGs, Finn RD, Sammut SJ anon COGs (COG4287) Family Members of this family of bacterial proteins are involved in the virulence of some pathogenic proteobacteria [1]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.14 0.70 -5.98 26 293 2012-10-03 11:45:05 2007-07-31 12:39:29 4 11 226 0 99 405 154 341.20 42 71.85 CHANGED LssYhpp.tspshpapl..ssstphss.schhhhchTSQpWpspp.sp......WpH.lsIhlPps....shsppALLllssGs.................................t.t.sssssptpsphhttlAppopsslsslsslPNQsLtassp....+cEDshlAaoWptahc....pp-tshPLhlPMstuss+AMchspchhtph.....plcpFlVoGuSKRGWToWLouh.sDpRVtAIsPhVlDhLNhpsslp+hhcsYG.sWshultsYhtpGl......schl.sosthspLhplhDPhpYh.....p+L.slPKallsuosD-FFlsDusphYascLsG..pKtLphlPNssHshhs..ppslpolssFhppl.tpspslP...plphplptsp.....t.tlplpsupt.PppVpLWpAsssss..RDFR ......................................................................................................................LssYhpp.pspshpY.s.....l..h.s..oss.l..st..lhhppapLhSQpWps-s.hV......s.ss...........tWpHpVsIalPcs.......sppccALlllssGh.................................ssphs..st..s..s..-.hs....-...s.L.sslAppTpTlVlsloslPNQhLsFps...Dt.......p....s..............hpED.....-.VuhoWshFhct............sppcthhPLpl...PMspAlspAMclspctLsph...................s.......I..ppF.....lloG.hSKRGWTT.WLoAl..sD......scVc.............AIlPh..sID..l.L.s...h....css...L...cHh....Ypo.YG........N...W...P...l...sFhsY.appGI...................scpl.co..P..s..F..spL.hp.Il....DP.Lp.Yh...........................sRL..slPKYIlNASGD-FFlPD...sopaYascLP..G...s.K.tLRhl.P..Nhs.....H....sls..........phs...po.LlsF....l.s+.h...pp..p.p.........sLP...pl.sh..l..ppph.....................lsVhh..S....Et...PspVhhWoAsNPsu....RDFR.............................................................................................................. 0 72 83 92 +9975 PF10143 PhosphMutase 2,3-bisphosphoglycerate-independent phosphoglycerate mutase COGs, Finn RD, Sammut SJ anon COGs (COG4255) Family Members of this family are found in various bacterial 2,3-bisphosphoglycerate-independent phosphoglycerate mutase enzymes, which catalyse the interconversion of 2-phosphoglycerate and 3-phosphoglycerate in the reaction: [2-phospho-D-glycerate + 2,3-diphosphoglycerate = 3-phospho-D-glycerate + 2,3-diphosphoglycerate]. 20.50 20.50 20.60 22.30 20.10 20.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.91 0.71 -4.61 92 556 2012-10-03 05:58:16 2007-07-31 12:40:27 4 7 462 6 268 542 95 172.10 32 41.38 CHANGED AppGhs.GlhcslssGhsPGSDsApLSlhGYD.PtphYsGRGPlEAhuhGlclpss.DlAFRsNhsTl...ccp......................hlhD+pAG+Ips.cEu...ppLhcsLsp..cl...ss..................phpahsusuaRsllVl....+...uss....h...u......spls.sosP..+.hpGp............ltchhPh.....sp.......s...sAcllschhpputclLpsHPlNt ................AppGhsGhhpslssGhsPGSDsApLulhG.YD.Php............hYp.G.RGsLEAhuhGlclpss...Dl.AhRs........NhsTl...cps................................hlhD+pAG.+Iss..EEu...ppLhptLsp....cl....ss.....................sclpF...hsuspaRtllVl.....+.....Gss...................s.......sp..ls..sosP..+..hssp.....................ltphhPh.....st................tut.oAcllschhpcutc.lLp.sHPlN..................................................................................... 1 107 199 243 +9976 PF10144 SMP_2 Bacterial virulence factor haemolysin COGs, Finn RD, Sammut SJ anon COGs (COG3726) Family Members of this family of bacterial proteins are membrane proteins that effect the expression of haemolysin under anaerobic conditions [1]. 25.50 25.50 25.50 25.80 25.40 25.40 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.27 0.70 -5.22 5 788 2009-09-14 08:20:40 2007-07-31 12:46:41 4 3 783 0 95 345 12 204.10 52 86.28 CHANGED MlRAKLKFRLHRTAIlLICLALLVlLMQGASYFSpSHQhARs-QVEELA+TLARQVAaSLSPLM..GscssNup+IsAILcQLTssSRILDASVYppsGoLVA+SGEsVpVRDRLALDGK+uGSYFNaQIVEPIsGKsGPlGFLRlTLDTHsLATEu+QVDNTTNlLRLMILLuLAIGlILARTL....LQsRRoRWQQSPYLLTAspPVKEE-ES ..................Ms+sKLKFRLHRsVI..VLhC.LA.LL.VsL.M.Q.G.AS.a.F.Sp..s...HQ+tR....ssQL.E..E..LARTLA...........RQVsLslAPLM....p..s-o...sD.ccRIpAlLcQLTcESRILDAuVYDcQGcLlA+SGE...o.VpVRDRLA.L...DG.K.+..A....G.u.YFN..Q..Q..IVEP.I....tG..K.N.G.P.LGYLRlTL.D.T.H.s.L.AT.E.u.p.Q.V.DN.T.T.N.IL.....RLML.L.LSLA.I......GVlLTRTL..........LQG+RT...R..W..QQSPaLLTAspPV.EE--................................. 0 5 24 62 +9977 PF10145 PhageMin_Tail Phage-related minor tail protein COGs, Finn RD, Sammut SJ anon COGs (COG5280) Family Members of this family are found in putative phage tail tape measure proteins. 30.00 30.00 30.00 30.00 29.80 29.40 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.33 0.71 -4.67 185 2672 2009-01-15 18:05:59 2007-07-31 12:56:46 4 25 1759 0 359 2411 301 209.40 19 20.43 CHANGED lpptshc..hus...p...............h.sh......oss..-hupuhttls..psG....hs.........spphh.shhtsshph.AsAsst..-hspsushhsshhssath.ss.......pp...ssphs.......Dhlstsupp.u.ssshpchupshp.thussupshGhsh.....pphsuhluhhspsGh.puspAGoul+shhtpl.........t..sppstpshpp..........l..G..l................................................................shtc.........upuphpsh.phltplppth.......tt..hsptpp..tshlp..........plF.........................G ..............................................................................................................................................hppthhp..hut..p........h...sh.....oss..-hupuhtths....ptG.......hs..............spphh.....shhtss.hph.....A..t..Assh.......s..h...s.p.s.uph.h....sphhssasl..ss..................cp.......hpplu.....................Dhlstss..sp.s.ss.sh...p...slucshp...thu.......ss.up.sh..Ghsh.......ccsuA.hhuhhtssGh..pustAuo..uh+s.hhtpl.................sts..sc..pstcshp.p..................L....G....l.........................................................................................................................shtc...........spsph...p.s.h.....phltpl....pth.......tt.....hsptpp.....thht.thhG.................................................................................................................................................................................................................................................................................................. 0 106 225 302 +9978 PF10146 zf-C4H2 Zinc finger-containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4451) Family This is a family of proteins which appears to have a highly conserved zinc finger domain at the C terminal end, described as -C-X2-CH-X3-H-X5-C-X2-C-. The structure is predicted to contain a coiled coil. Members are annotated as being tumour-associated antigen HCA127 in humans but this could not confirmed. 27.20 27.20 27.70 28.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.83 0.70 -4.33 7 142 2009-01-15 18:05:59 2007-07-31 13:15:50 4 3 99 0 93 126 0 206.40 47 91.22 CHANGED psItclRsKs.phcKl+sclhcEh-ss-spE+plp-hptph-tL.pEKhuHsEELR.IptDINshEshIKpocs-hp+phphhp+ha-Ehh.l+ttlschhcsh.....LulccL.phcEEE..hlo......................................pA+pshpPp.....hPss..........................PssP...sshhtuhhsstthp.sc...........................tsphc...sh..RpQsPP.......MKsC.SCaQQIHRNAPICPlCKuKSRS+NPKKsKRKp ....................................................................................................tltclRsKTlphEKlKsRlhtEh-shEsEE.+pLcEY+pEM-hLLQEKMuHVEELR.IHADINs......MEssIKQoEs-hs+.hh-ss.pRla.-.EYpPLKcclDth.Rhs......LGLpc..LPsLp...EEE..pp.lo.....-.hap...........................................................pt+t....hps.p................P.................................................................Ps..P......pshtAutssstphpssp.................................................................................p.p.sp..s...sshRQQPPP.........MKuCLSCHQQIHRNAPICPLCKAKSRSRNPKKPK+K............................................................................................................................... 0 35 43 73 +9979 PF10147 CR6_interact Growth arrest and DNA-damage-inducible proteins-interacting protein 1 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4848) Family Members of this family of proteins act as negative regulators of G1 to S cell cycle phase progression by inhibiting cyclin-dependent kinases. Inhibitory effects are additive with GADD45 proteins but occur also in the absence of GADD45 proteins. Furthermore, they act as a repressor of the orphan nuclear receptor NR4A1 by inhibiting AB domain-mediated transcriptional activity [1]. 21.80 21.80 21.90 22.60 21.70 21.70 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.55 0.70 -5.02 9 102 2009-01-15 18:05:59 2007-07-31 13:21:45 4 4 86 0 67 101 0 192.60 32 86.81 CHANGED Mttshhp...pcshh...pLshThs............th..tssp.phhPp+RphhtshhPs.psp...pW.ccs.+YpRchFGRYGht.SGVsPctLaPot.EclcEh.tEchtah.oLppMhcphctpppcccp+tpAREpclAcshtKhPphlAcactphtcpcpctpscKpR+-RLlt-sp-+FGapVDPRssRFpEMLpphEKE-+K+hKttKR+cKcEchhAthsttsup .............................................................................................t.tt.ph.P..hph.tshhP...ps.....pW.phs.+Ytt+.aGRYGtt.SGVsP.t..h..WPo..Eplc.ch..tE..chtah...oLtpM.cplctppttccp+pptR............EppIAcpMsKMPphltca+pphpc+.............cpctpscKpR+tRLh.tEsp-+hGaplDPRss+FpEhLp..chEKc-+....K+hKttK++tKp.Ethhut....................... 0 22 27 50 +9980 PF10148 SCHIP-1 Schwannomin-interacting protein 1 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4847) Family Members of this family are coiled coil protein involved in linking membrane proteins to the cytoskeleton. 21.70 21.70 21.80 23.70 21.60 21.30 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.54 0.70 -4.97 2 132 2009-01-15 18:05:59 2007-07-31 13:44:20 4 8 75 0 80 130 0 206.90 59 57.02 CHANGED shpAQ+NERESIRQKLALGSFaDDG.hhaTuCSKSGKssLSSRLQsGhNLQhCFVNDSuSD+DSDA-DS+TETSLDTPLSPMSKQSSShSDRDTsEE-.-S.-D.-FhphQ++LQtEA+hALAhA+PMA+MQVEVE+Q.p..++SPVADLLPHhPHISECLMKRsL+PsDhRDMolGQLQVIsNDLHSQIpuLNEELVQLLLhRDELHhEQDAMLVDlEDLTRHAcSQQ+HMhEK..uK ....................................................................................................AQ+NERESIRQKLALGS......F.a.....D....DsPslYTosS.+uGKPS..LSS.RLQSGMNLQICFVND.S.uS...D.....KD.S.....DADD.....S.....+..T............ETS.L..D..T..P...L......SP.......MSKQSS..Sh...S..D.R.DToE--.S.....ES..L-Dh.D...F.loRQ....+KL.Qu...EA+hALAhAKP.MAKMQVEVEKQNR......K.KSPVADL...L...P.H...M...PH..I.S..ECL..M..KRsL.+...ssD...LRDMTlGQLQVIVNDLHSQIESLNEELVQLLLhRDELHhEQDAMLVDIEDLTRHs..pupQ++.t-+................... 0 21 28 62 +9981 PF10149 TM231 NAcGluc_Transf; Transmembrane protein 231 KOGs, Finn RD, Sammut SJ anon KOGs (KOG4838) Family This is a family of transmembrane proteins, given the number 231, of unknown function. It is conserved in eukaryotes. 20.50 20.50 22.90 21.10 18.10 20.30 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.89 0.70 -5.44 7 132 2009-01-15 18:05:59 2007-07-31 14:10:04 4 6 95 0 88 132 1 236.80 31 81.00 CHANGED Mthaslas+sshlhY+spLCShAoLhlhhhlhLohh.P..LlshhpshuhW.cppshhEQPsVpFpYphlhluph-s.......stuhlAhSoFssFN.pLpsps.psstlpshpEDsNhDGppDtLphpl..pLP.pso.plhthpLllhF-hpLpphsshsh.ohhslphp.P.....hhuupl.hpG-LpLpQ....psPhshRslcopas.Vpl.hNuoss.hspaphcsIhtph.ctNsuha.hsspph.Wphspuuts..h.lchtl...ch.p.slhY+suhWpplt.hWlpYhSlLhI.Lalhp+lKpFlFpppllpohpl.lPaKc .....................................................h.hatpsh...h.Y.st.hohuslhhhhhhhhthh.P..llhhhpstGhWh+pssa.EQPsVpFpaphlhls.hs.........tthlshSo.a.sth..N.phtss.p...h.......ph.s.......hl...phh.........p......pDh.NtDG....ph..Dhlphpl..pl.shpst..plhthpll..hhFphpLpphsph.hpohhhhp...t.s......h.supl.h.G-LpLpQ....pts.h..ts....h.....p...a..........p.hpl.hstst...h.p.......aph...pplhtth.ptN.shh..t....h...ah.s....s.t.ts...h.lph.l..........ph..........h.............Y.....suhWEhlK.hWlQYhuhhhlhhalhpclp.alFppplltoh......................................... 0 31 40 66 +9982 PF10150 RNase_E_G Ribonuclease E/G family Bateman A anon Pfam-B_234 (Release 21.0) Family Ribonuclease E and Ribonuclease G are related enzymes that cleave a wide variety of RNAs [1]. 22.70 22.70 22.70 23.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.64 0.70 -5.17 173 4570 2009-01-15 18:05:59 2007-07-31 16:14:19 4 14 3079 16 1101 3529 3482 263.90 41 37.13 CHANGED olsGRYlVlhP.......pssp..lGlS++Icsc..p-Rc+L+....plh.........................ppl...hsps.......hGlIlRTuupssspcclppDlphLhchWppIpcc.tpptt...........PsLlac-.slhh+slRDhhssclpclllDs....ppsapphppa.h..pphhPph.hp..pl..phap....sp.tslFctaslEppIpcsh.p++V.Lt...SGGallI-pTEALssIDVNoGph.sspps..h--Tsh+TNlEAAcEIARQLRLRslGGlIlIDFIDMpspccccpVhctLccslcpD+s+splssho.pLGLlEhTRKRhR .............................................................................SLsGRYLVhMP.......sss+..sGlS++I...-s-.ERpcLKchl...............................tpl.....hs.cs.................hGlIlRTAu....t.Gtst.--LptDlsaLh+hW.ppIpcp....tp..p...tt...s..............................PhLlapEss.lhh.RslR...Dh.h.s.p.-...lscIllDs...................ppsa.cphp.pa.h...p.h...hPch...ss.................+l...chY.p..............sc...hPlFstaplEspIpp..Ah..pRcVpL.....SGGhllIDpTEAhTsIDlNoGph...st.t..............t..s.............lE..-Tsh.pTNLEAAcEIARQLRLRsLGGlI.lIDFIDMs.stccp+tV.pp....Lc....cALpp.D.Rs+h.plstlS.phGLlEMoRpRh....................................................................... 0 343 700 917 +9983 PF10151 DUF2359 Uncharacterised conserved protein (DUF2359) KOGs, Finn RD, Coggill PC anon KOGs (KOG4467) Family This is a 450 amino acid region of a family of proteins conserved from insects to humans. The mouse protein, Q8BM55, is annotated as being a putative Vitamin K-dependent carboxylation gamma-carboxyglutamic (GLA) domain containing protein, but this could not be confirmed. The function is not known. 19.80 19.80 20.00 20.90 19.50 19.70 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.50 0.70 -5.97 6 158 2009-01-15 18:05:59 2007-08-01 11:58:53 4 5 88 0 99 172 0 346.60 30 64.08 CHANGED HGYRlCIQAlLQDKPKIsTsNLscaLELLRS+QsRPhKCLTIMWALGQAGFssLopGL+VWLGIMLPVLGlKuLSsYAIAYL-RLLLhHsNLTKGF.GhlGPKDFFPLLDFAaMPNNuL.................................SsSLQEQLppLYPRLKVLAFGAKPESTLHTYFPSFLSRA.TPuCPs-MKKELLsSLT-CLsVDshShSVWRQLYsKHLuQSSLLLpHLL+oWcplPpKhpK.SLp-TlQSF+VTNpEh.tKGuuusp-lpsCsoACpuL.pKh+GsuhPWsRLLL.lLLVFslGFlsHDlRoHuSFQuSpTuphLppSGlhssuQQAhu+lpsYShQG.SWLpcshPtahSchlpVltPshp.uhs+hptsssFlst+stshlsahp-plsphhphl.tphP-sl.QhhthL+ELLLhhh+shlLPshthLhtsltth.pph.tuCpGcVohsClpspl.phup.oWhhLQcsTsshhs.tluh .............................................................hlh...Pphh...h...h.....psh..pp....hhl.slhWsluQAuhssLs.GLplWhtlhLPlL............s.....hK.....s.....h..u.....s.....hs....l....t.hl.-........Rl........L.....................hps........s........l.....p........c.u...........t....hlsPppFhsl...hc.hsa....s..p..ssh.................................s..shp-phptlYPpLK...luhu.u.ps..to......p....h.....pp...h..h.p....h..h...s...cu...ssps....s....s...p..h.....pcEh.h.s.shh..CLs...s..s..shphWcp.LY.cpLptS.....shLLp+LhpsWcphs.tKh.....p..p......s......L.pcTlpSh+hpNpch..hp..cu..p..s...ppp....sl....hs....s.......cthCc.....slhtp.............h.......p...u........t...........h..hsp.........h..hl...hllhhhs..G.hlhaD..h..t.t..s...httS.st.hLppsG.....hh.h.ppsh.hh..h..tu.phht...s.h............t.............ht........................h..................................................................................................................................................................................................................... 0 23 39 65 +9984 PF10152 DUF2360 Predicted coiled-coil domain-containing protein (DUF2360) KOGs, Finn RD, Coggill PC anon KOGs (KOG4496) Family This is the conserved 140 amino acid region of a family of proteins conserved from nematodes to humans. One C. elegans member is annotated as a Daf-16-dependent longevity protein 1 but this could not be confirmed. The function is unknown. 24.30 24.30 24.60 24.30 24.00 24.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.79 0.71 -3.92 22 332 2009-09-14 15:15:59 2007-08-01 13:41:33 4 33 124 0 252 294 3 108.90 26 34.36 CHANGED lNpFllpTlpFLNRFushCEsKLsphppplQplEtphtlLEsKL...uSIs...................Ghppl.............................................sssssssssppsss.sssss................t.stssusssptsshtsss..t.................P.h...shhphp..pDPRYttYaKMLplsVPstAl+sKMpt-Gl-PshLDs ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s......t......s.hhthp....c-PcY.sKYFKMl.ph.GlPhtul+tKMht-..G..lDsslL-............................................................................................................................................. 0 124 163 225 +9985 PF10153 DUF2361 Uncharacterised conserved protein (DUF2361) KOGs, Finn RD, Coggill PC anon KOGs (KOG4484) Family This is a region of 120 amino acids that is conserved in a family of proteins found from plants to fungi. The function is not known. 25.00 25.00 28.70 25.50 19.50 21.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.46 0.71 -3.89 28 193 2009-09-10 22:45:35 2007-08-01 13:50:37 4 4 165 0 142 189 2 111.10 35 37.80 CHANGED clK++IRDlERLLp+c...sLPsclRl-pERsLpuL+hcLps..pptcppp+Kh...hcKYHhVRFFERKKApRclKpLcKphcps.s...........tttchcphpcpLcpsclDLsYVlhFP+sEKYISLY ........................lKp+IR..slcRLLp+p......sLPsslRlppERpLpuLcpplpp....pptppt....cpph.......hpK....YHhV......RFF...........ER+KspRpl+pLp+phcpsp...............................tttph.pplppplpttclDLsYsh..........aaPpsEKYlSLa......................................... 0 44 84 122 +9986 PF10154 DUF2362 Uncharacterized conserved protein (DUF2362) KOGs, Finn RD, Coggill PC anon KOGs (KOG4506) Family This is a family of proteins conserved from nematodes to humans. The function is not known. 25.00 25.00 27.60 27.60 22.80 22.30 hmmbuild -o /dev/null HMM SEED 510 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.65 0.70 -5.83 7 140 2009-01-15 18:05:59 2007-08-01 14:11:51 4 4 99 0 101 136 2 381.70 37 82.64 CHANGED ppshscLptplhhl+t...Pph....h-psLcpsLppFl-p..EslschDccAEthlpthtptcs-hptlspsht+hYs-phLEaAc..tss.-EpFApsaHpLlHSss.pplLshE+sYhhslophhpcpD.Elcphpphpt.EhsK.hppLstolsspDlN.shhAppapsQp.lcp+auoELpphpshQKpEYpcWV...........sSpls-phpsQstp.ptsustsshh.ppppphEESFTIHLGuQLKpMHNlRLlpsc.hhDhCp....hcscto..phpRLphAhuLYSoSLCGlVLLlssp...hss.c+-FhpssppsT-FHFspl-cQLEhlppsl..L.sptp+sphpctp.spsspstusts+sc..........+p.sslhsG-FaITRHSNLScsHVlFHLslD-slpS.....upIsuRcPsIhGLRNILKssspHDITTIoIPlLLhH.-MSE-MTlsWCl+RAELVFKCsKGFMhEhsSWsGu.....pTVQFLlPpsISEphFhpLusMlPplF+VusshhL .....................................................................pl..ptLppFlpp..pp.p..scpsp..lpth.t..ptp.sl.pthhptht+.atpp........hlpasc....ts.....s...............--....FutsaHpLlHS..s...s.cslLphEptahhsls-hhtttc.plpphp.pp..............p..........t.EhpphhppL.......shshosp-lN.slhupp..hpppp...............l..cppapsclct.pthQ+pEYppWl.th.t..............ss...th.......sp........h.....h.p..s.....................p......s....pt......t....pp....t...........h-ESFTIa...L..G.uQLKpMHNlRllpss..hh-hCp.................hpspts...........pRLphAhuLYSssLsGlVLLs.......s.......s.p.......hsshpc...........phhphsppsT-FHFspl-pQL.chlpp.h......h...t.p......pth.ppt........p.ttttt.................................................pp..pl.....s.G-haIT+HSNLu...........p.....h.HVhFHLssD-.s.lpS.....spIsuRcPslhGLRNIL+ss.spaDloTloIPlLLhc..-...hsE.....cMTl...s....W........Cl+RAELVF..............KClKGFhhE..soh....sus...........polQhllPpslo..cphFhplsshlPplF+lsssh............................. 0 46 53 81 +9987 PF10155 DUF2363 Uncharacterized conserved protein (DUF2363) KOGs, Finn RD, Coggill PC anon KOGs (KOG4508) Family This is a region of 120 amino acids of a family of proteins conserved from plants to humans. The function is not known. 25.00 25.00 26.80 26.90 21.90 19.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.55 0.71 -4.20 16 204 2009-01-15 18:05:59 2007-08-01 14:17:11 4 3 134 0 149 197 3 120.10 58 30.15 CHANGED lEpNPplAh-lLh+lhpos.p........hp-YhcsLlsh-lolpShElVN+Loppl.......pLPp-FlphYIspCIpsCpslK..-+.hQsRhVRLVsVFlpoLI+sKllss..+-LhsElpuFClEFS+lpEAssLF+hlKs ............VEsNPllAlEhLlKLhpSs..p........................Is-YFslLVs..M-MSLHSMEVVN..RLTTuV..............-LPsEFlHhYISNCIuoCEp.lK.........DKYMQNRLVRLVCVFLQSLIRN.KIINV....pD...LFIEV........Q.......AFC..lEFS..R..IREAAuLFRLLK.......................... 0 71 93 123 +9988 PF10156 Med17 DUF2364; Subunit 17 of Mediator complex KOGs, Finn RD, Coggill PC anon KOGs (KOG4512) Family This Mediator complex subunit was formerly known as Srb4 in yeasts or Trap80 in Drosophila and human. The Med17 subunit is located within the head domain and is essential for cell viability to the extent that a mutant strain of cerevisiae lacking it shows all RNA polymerase II-dependent transcription ceasing at non-permissive temperatures. 19.40 19.40 19.40 19.40 19.30 19.30 hmmbuild -o /dev/null HMM SEED 467 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.42 0.70 -6.22 28 294 2009-01-15 18:05:59 2007-08-01 16:06:26 4 5 238 3 216 270 0 390.00 21 67.52 CHANGED Ms...............psh.lsLcP.....................tt..pp...........s..................................lslcphIspIhtEhG...sFtslsEcoLpccIppppsptptsptspp...........ppccspssp................................................................ppsph-ph.pt..+p-lhpplppAhsEssluLDFlSLLlSphpsp......................uhsohS.....PhLcphsPsuSLs...u-hlt.s..ttpttht...............ptlspGWKhcuLspAsshLhpuup+Lppplpp...........Epc.................................................................YWpcllplppps.....Wtlh......+h...tpspp.lGlcYGat-uGspF.+ccGlAsLRpssc.uslpl................s..........shthpts+hlRV+Ih......csthlsGp.Sphs........p.tssssslpspIppA+pslFEcELFapLpREAppLhshslplps..Nplhhpls.......sppltI-L..........lsh-cps.tstpt........sp.psthApslhtsL+lLLsthH+psLpp+tpsssshs.h.....................p.p.sss.hlLRPlluhh+Hpphhphhpphlsslspslpsss ............................................................................................................................t...................................................................................................................................hslpt.htp..tphu...t.h.lt..shpppltp.ttp..s...tptttp............pppptttt......................................................................................tpptt.c....ph......hp..plh...pplp.......pAhsEhslhhDhl.Sllhsp..................................................................................................thhshs..............Ph.pp.....hs....tphs..................................................................thlphhhKhcuLst..uuphLhpuAp.cLpppltp....................ppc.................................................................aapcllplppp......Wplp............................phts.lh.sc..h...u...a...tpu..u.sha.pptshtsh+psp......tshth..........................................................................s...hpt..thlp...Vpl.........tsh.hss...pt.............................p.t.ts...p.hppp....lptA.......p.......pslhpcElFtpLsREAhplh.s.....h...sph...h....stlhhph..........s...pl.lpL............................h...sptt.tt.t.................................pths.hl.h.L+.L.lt.hactp.pph...s.ss.s.h....................................p.p.p.....lLc.llt.hpH..hhp...htthlpphst.l......................................................................................................................................................................................................................................................................................... 0 70 116 175 +9989 PF10157 DUF2365 Uncharacterized conserved protein (DUF2365) KOGs, Finn RD, Coggill PC anon KOGs (KOG4514) Family This is a family of conserved proteins found from nematodes to humans. The function is unknown. 26.20 26.20 26.30 27.30 26.10 26.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.77 0.71 -4.32 6 109 2009-01-15 18:05:59 2007-08-01 16:13:18 4 5 96 0 81 116 0 135.70 33 51.13 CHANGED csGchTHFVAcNLEtKIR...............pSucpo.TPssusGPh........hsht.h.p...IP.....lDPsVLsDlEpcuQtLAsSVDsLLcsLsusLHulSulTs-slpsY+sAVsKLsDslDuNIKshYsLLAKsEEls+SMpPscpLAppIR-IKRLVDhLEolh .................................................t..st.h.FlspsLp.+l+...........................s.........s.......ts.tsh.......................t.h..p.t.lP.....lDss.lLp-LEpcupplussVDpLhcsLsuslpp.houlolsslpsa+cuVspls-.u.l.DtsIKu......hYpLlA+sEELs+uMpslptLAppl+cI++hl-hh-sh.............. 0 32 41 65 +9990 PF10158 LOH1CR12 Tumour suppressor protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4515) Family This is a region of 130 amino acids that is the most conserved region of hypothetical proteins involved in loss of heterozygosity and thus tumour suppression [1]. The exact function is not known. 23.20 23.20 23.30 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.62 0.71 -4.36 5 152 2009-01-15 18:05:59 2007-08-01 17:47:16 4 5 94 0 69 138 0 104.40 47 44.96 CHANGED pFLPVL+GlLSupTs.pTNp....pLERLcSppllpLCoRhQ-HLspCAcAVAuDQNpLVcRIKEVDsSlsoLauthp-+QKpYAuYAEQluKVNplSspLsRIQhlL-QsVPhMEpLNshLs--ERLEPFsh..................+P- ..............................pF.PlL+tslout...Ts....s.sp.....hLE+....LsSppllplChRhQs....HLp.CAphVA....-QstLstphKph-.thtpl.....ht.h.ptpcpaspasp.pht+...lpphpt.lpchp................hlpp.l..h-tlNphLs.tcpL..hsh........................ 0 22 27 50 +9991 PF10159 MMtag Kinase phosphorylation protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4520) Domain This is a glycine-rich domain that is the most highly conserved region of a family of proteins that in vertebrates are associated with tumours in multiple myelomas. The region may contain phosphorylation sites for several protein kinases, as well as N-myristoylation sites and nuclear localisation signals, so it might act as a signal molecule in the nucleus [1]. 25.00 25.00 26.40 25.10 24.60 21.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.83 0.72 -3.94 16 182 2009-01-15 18:05:59 2007-08-01 18:10:29 4 5 163 0 133 175 1 77.70 55 27.33 CHANGED GsRGGpspFpW-DV+ssp..cR................ENYLGHSlh.APVGRWpKG+DLpWYsK..scsssuptst........ph+EElp+lKptEpcAhttALG .........GsRGGpspFpW-DVKs..sp..cR................ENYLGHSLh.APVGR.WQKG+DLsWYAK.........scss....s.s..s..t....................pccEElpclKcAEc-AhttALG............................................. 0 46 76 103 +9992 PF10160 Tmemb_40 Tmem40; Predicted membrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4536) Family This is a region of 280 amino acids from a group of proteins conserved from plants to humans. It is predicted to be a membrane protein but its function is otherwise unknown. 25.00 25.00 25.10 36.70 24.50 23.90 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.87 0.70 -5.16 7 145 2009-01-15 18:05:59 2007-08-02 10:45:22 4 3 83 0 90 124 1 232.70 37 73.71 CHANGED hc.luss+sphashlLllPslhFhhFLhachspuhsKLphspS..IhluaYhLlalVullslhhshlphh.shsst......pshWpllphF....hLhlElSllhFhLthsahsutpSlpRshllouhlshh.slsQshhphths..hsLhhs..s..pcGththWlhpphlhhhVYhhlhhh..hp.phR.+LPu+suFY.YhhhhhhLNhlphhusuLhsspst.GhhhhsloshhYaulahPLlYlTFLtcFhpp.....................D..L.ssahu .........................lsps+shhaslhLllPslLFhlFLhh+h.puhtKlp....hspS..IhloaYhLlalVullslshshlph.h..sssst......cllWplhphF....hLslElSllhhh.Lthu..al.p...utpSlpRsLhlosl.....luhs.olhpuhh.hhas.....s.L.h..c.....s...h....s.hh.sc....GthtaWlhpphhhhhVYshlhhh...h..+.R..p..LP...............u+.uFYhYsshhhhLNhlphhGssLls.h.s.hh...Ghh.hhshTshhYauhahPLl.YhsFLtsFFtpp....................c..L.............................................. 0 22 42 68 +9993 PF10161 DDDD Putative mitochondrial precursor protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4542) Family This is a family of small conserved proteins found from nematodes to humans. The C-terminal region is rich in asparagine. Members are putatively assigned to be mitochondrial precursor proteins but this could not be confirmed. 21.00 21.00 23.40 22.30 20.90 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.53 0.72 -4.35 9 96 2009-01-15 18:05:59 2007-08-02 11:08:41 4 4 78 0 69 85 0 76.30 44 71.82 CHANGED ssphustssshulh.h.hposhshpSGAltP+PpphsFGLLtlhhsVIPuLaIGuhISKNhAsaLEENDlFVPsDDDDDD .............................ts..............h..RoshsspSGA..lhP+P..chsFGLL+l...........hslVI...Ph..LalGshISKNhAshLEEpDIFVP-DDDDDD............. 0 19 23 47 +9994 PF10162 G8 G8 domain He Q, Liu X, Bateman A anon He Q, Liu X Domain This domain is found in disease proteins PKHD1 and KIAA1199 and is named G8 after its 8 conserved glycines. It is predicted to contain 10 beta strands and an alpha helix. 21.00 21.00 21.10 22.90 20.90 20.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.55 0.71 -4.33 34 504 2009-01-15 18:05:59 2007-08-02 11:58:43 4 98 97 0 317 448 35 124.80 30 7.75 CHANGED soWtst.....pl.........Pps..ucsVhIssGpplllDss....ss.hpplhl........pGpLlFss.....pshsLpspsIhlpsG..plphGoppsPa.....psphsIsLpG.........................spsssph.......shGsKsIulhtsGsl-lHG...............csoWTcLss ...................................................Wt.st.................Pp..s..uspVhI..pGphlllDss.....sshhphlhl......sG...pLlFs-................pslpLps.c.hIllpsG................u.pLplG.o...cpsPa.....................ps..chpIsLhG...................................................................pts.ssph.................sshGsK..s.lu..........Vht.GsL-LHG...............hphs.WTpLs.t.................................................................................... 0 146 187 233 +9995 PF10163 EnY2 Transcription factor e(y)2 KOGs, Finn RD, Coggill PC anon KOGs (KOG4479) Family EnY2 is a small transcription factor which is combined in a complex with the TAFII40 protein [1]. The protein is conserved from paramecium to humans. 20.50 20.50 20.80 20.70 20.10 20.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.71 0.72 -4.05 24 256 2009-01-15 18:05:59 2007-08-02 12:50:33 4 9 183 24 181 241 1 83.10 35 66.19 CHANGED plcstlppp...LlpS..G-pc+lpchL+t+LhEsGWpDcl+phs+chlpp...............tt.sslsh-cLhshlsP+AcshVP-pVKp-lLp+I+saLs ...................hpstlppc...Llco..G-p-+Lc-LL.+p+L..h....EC.GW+Dpl+ths+.chlcc...................puhp..sloh--LlsplsP+uRs.hVP-sVKpE.LLp+I+saL.h............................................................ 0 67 94 144 +9996 PF10164 DUF2367 Uncharacterized conserved protein (DUF2367) KOGs, Finn RD, Coggill PC anon KOGs (KOG4517) Family This is a highly conserved family of proteins which contains three pairs of cysteine residues within a length of 42 amino acids and is rich in proline residues towards the N-terminus. The function is unknown. Several members are putatively assigned as brain protein i3 but this was not validated. 22.00 22.00 24.00 23.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.38 0.72 -4.15 4 109 2009-09-11 06:36:34 2007-08-02 13:25:12 4 1 77 0 78 100 0 88.90 38 74.20 CHANGED hPsYGAh-ous.tsPlPlV....lpssPsutPph............hsp.IIVVGGCPsCRIGhLEDsaoChGIhCAIFFFPlGILCCLAh+pRRCoNCGs.F .........................................................................................................s..............s.......t..........................sp..llVl..G......G.CPsCRlGsL.E..Ds.F.TsLGIhh...AIhhFPlGll.....CChuh...Rp+RCsNCGAhF............. 0 35 40 62 +9997 PF10165 Ric8 Guanine nucleotide exchange factor synembryn KOGs, Finn RD, Coggill PC anon KOGs (KOG4464) Family Ric8 is involved in the EGL-30 neurotransmitter signalling pathway [1]. It is a guanine nucleotide exchange factor [2] that regulates neurotransmitter secretion. 25.30 25.30 28.10 26.00 24.80 24.60 hmmbuild -o /dev/null HMM SEED 446 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.60 0.70 -5.66 23 341 2009-01-15 18:05:59 2007-08-02 14:02:37 4 3 198 0 221 310 0 380.40 31 83.45 CHANGED sLpsl+lluR-hpshsPhhocculph.LhpaAth...................................................................................................................psss.csthEALKCLsNhlh.sspsp..phhschthsstlscpLpps.t..................sp-scahthRLlFLhTu.sststppLhpchpslshlsttLppphpptspstpt....................-t.tlsEsLKllFNlThhhscpts.............p.uspht+lsslLpcl..................shsps.h-shhuphlNhLssL.sLpshcs.....hppssl................................tssshsslctLlphL-pplpph................pshc...............-plsPlLslLschschs...............................ctsR+al+ttlLPs...cDhcp...t.-pusoLpu+LlRLhTps..sslKcsuuEhLFVLCpcsss+hlKhsGYG.AAGlLAsRGl..sh......tp..t...............tauss.pcu-T--.............hpphp.sINPlTGph.s.-p....ssh-s...MT-EpKEtEA.+LhsLF-....+hpcpGllps.............tthhp-G+ ..........................................................................hLptl+lhuR-.p.shsshhscps........hph.Lhphu.................................................................................................................................p......p...shhEuL+CLsNhlh.s..sp.........phhh..-h.t.hs....t.l..sphlpt.........................................sp-.p..hh.shRlLFLho..hhpst.h.hppLhpch..phhph..lsphl..pttht.....hh.ttt.t.....................................pp.thhEhLKhlFNlTh...c.t..............................p.stth.hlhslLpch..................s.stp.hpth.hsphlNhLssl.slp.shc..s................ptt.s.h.........................................................................................................................s.shpslphLlphh-pplpph......................................pt.c.......................ptlsPlLslLpchscht...............................cthR+al+......t........p........lLPs...cDh..pp.......................h.p..h..us..s..lps+LlRLhTpss..sl+phsuEhLFl..L...C.......pcs........s...sphlKhsGYGsAAGhLss+Gl.....sh.......tp.....t............................aSp...p.psoco--..............htphp...tlNPl.TGph.p...........ssh.t....MT-EpKEhEA.+Lhshh-....+Lppptll.p.s.................................................................. 0 72 106 168 +9998 PF10166 DUF2368 Uncharacterised conserved protein (DUF2368) KOGs, Finn RD, Coggill PC anon KOGs (KOG4544) Family This family is conserved from nematodes to humans. The function is not known. 25.00 25.00 34.70 34.50 24.00 23.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.77 0.71 -4.54 13 98 2009-01-15 18:05:59 2007-08-02 14:20:32 4 2 76 0 72 84 0 127.10 39 88.52 CHANGED MGt..shu+u.p.pNh+cpQEa.....chQhERQLtMppph+pRQhAhplApuREhhpW..huuFaslssluhssuhhK+K.......+PshlsPllPLuFlluYQhDhuYGsplpRl+uEAEpIl-pEpp.LLphPtGh.TlcsI-c ...................G...hhscs.s...tsh+p.pQ...Eh..........chQhERQlhMQspMRERphAhp..IAhuREhhpa..husFaulAsluhsuuuh+.pK.......+s.shlhPllPLoFllsY...QhDhuYGohlpRh+uEAEpIhcpEpp..hLphPtGh.Thpsl-p...................... 0 28 32 53 +9999 PF10167 NEP Uncharacterised conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4523) Family This is the N-terminal 80 residues of a family of proteins conserved from plants to humans. It contains a characteristic NEP sequence motif. The function is not known. 25.00 25.00 27.60 26.20 23.40 24.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.41 0.71 -4.40 7 127 2009-01-15 18:05:59 2007-08-02 14:45:11 4 2 98 0 89 113 0 107.90 33 64.56 CHANGED hp-sEhth+h++so-KhoEslahlANEPSluLYRlQEHVR+slPpllp++s-lhphppphpGtsYDlEYuhpsVKohpc.....us.hFpsIpphL+puIthKpplpss..pu+p..cpptsssSs ..........................ph..thtc...so-+hoEslhllANEPSlALYRlQEHlR+ulPtlspc+schtphppp.pGthaslEYuhsAV+shtc.....us.hFcslpphL+puIth+pphpht...ps..............s.................................. 0 30 46 69 +10000 PF10168 Nup88 Nuclear pore component KOGs, Finn RD, Coggill PC anon KOGs (KOG4460) Family Nup88 can be divided into two structural domains; the N-terminal two-thirds of the protein has no obvious structural motifs but is the region for binding to Nup98, one of the components of the nuclear pore. the C-terminal end is a predicted coiled-coil domain [1]. Nup88 is overexpressed in tumour cells [2]. 20.00 20.00 20.00 20.00 19.60 19.90 hmmbuild -o /dev/null HMM SEED 717 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.39 0.70 -13.25 0.70 -6.69 5 211 2009-01-15 18:05:59 2007-08-02 14:58:37 4 6 126 0 142 223 0 392.10 22 73.25 CHANGED G-pWpppLscHcLFs+L+EGL+lppcTppct.t................+NLLsCLDG-LFlWDucESsFLVsNLRSssusuppsshSpYQTLLCoNPPLFEVscVLlSPoQaHVALlGsRGVsILELP+RWGKcSEFEGGKpsINCRThPlAERlFTSSsSLsLRQAtWYPSEspEPHLlLLTSDNTIRlYNLp-PpoPs+VssLSph.c-osl+ssupoatASLGETAVAFDFGP..............................LussPKsLsGp+..uKpEhlsYPLYILYENGETYLlYISL.+psuslGKLhGPLsMaPuAEDNYGYDACAVLCLPCVPNIIVIATESGhLYHCVVLEAEE-E-touNEsWDuss-psPSLY..VFECVELELuLKLAosE-E.h.-sDFoCPI+L+RDPICppRYHCTHsAGVHSVGLoWIs+LcKFL-S-EEDKDSLQELAoEQ+ChVEHILCTKPLsscpPuPVpGFWIlsDl.LGAoMICITuoaECIuLPLLool+PsSPPLLCo+hDs-Vspp..+hLAEossSFEcHIRoILQRSVsNPlLLpSu-KDSSPPPcECLQLLSRATQVFREQYLLKpDLA+EEhQRRVKLLpsQKEKQLEDlphCREERKSLoEsAE+LAEKFEEAK-+QEsLlNRsK+lLpSh+oQLPVLS-SERDM+KELQpIs-QL+HLuNuIcQVK+KhsYQ+++Ms.uscSP+KsolTLs-+Q+KsIpsILKEpGEHIccMlKQIKcI+NHVuF ......................................................................................................................................................................................................................................................................................................................................................................l.ph.WaP.....tsphh..hL.sps.hc.aph..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 64 88 117 +10001 PF10169 Laps Learning-associated protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4811) Family This is a family of 121-amino acid secretory proteins. Laps functions in the regulation of neuronal cell adhesion and/or movement and synapse attachment [1]. Laps binds to the ApC/EBP (Aplysia CCAAT/enhancer binding protein) promoter and activates the transcription of ApC/EBP mRNA [2]. 25.70 25.70 26.10 25.80 25.60 25.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.24 0.71 -3.90 11 105 2009-01-15 18:05:59 2007-08-02 16:02:28 4 5 86 0 69 106 0 123.20 42 88.89 CHANGED KSLRSKa+R.KMRA.KRpKstPKELtRLKpsLu.p-....ut.lMc-lp-lsphhsscchc.Eptct-sp.......t.....pcctKM-V-sc...h....NpKTLpDpaGpYPsWhNpRQ.pKKh+tKppupK.sKsKs.....sKsls .............KSLRSKWKR.KMRA.KRcKsAPKELsRLKphLt.hD.....uchl.MpDlp....-IATVlss+chc..cphph-tp.............ppt.....pcstKM-.s-tK...+....N+K..TLhD..paGpYPlWMspRQ.tK+LKsK+pppK..s+s+s..sh..th..................... 0 17 23 46 +10002 PF10170 C6_DPF Cysteine-rich domain KOGs, Finn RD, Coggill PC anon KOGs (KOG4543) Domain This is the N-terminal approximately 100 amino acids of a family of proteins found from nematodes to humans. It contains between six and eight highly conserved cysteine residues and a characteristic DPF sequence motif. One member is putatively named as receptor for egg jelly protein but this could not confirmed. 20.70 20.70 21.10 36.90 20.20 19.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.45 0.72 -3.93 8 86 2009-01-15 18:05:59 2007-08-02 16:22:26 4 3 73 0 60 75 0 90.60 44 66.03 CHANGED FcCQLCsLouPYoYaGQKPPsopulVLLE-sYVMKDPF.......oPD+E+FLlLGS+CSlCSKsVCVGs-CSLFYoKRFCLPCVpcplpsFP.EIQp-l-K+K ..........................FpCplCslpt.apY.hGp......+PP.....s.pullhLE-sYVM+DPF..........................o.s.c+ccaLlLGucCSlCu+hVClus........pCSlFYo+RFChsCsppphptFP.clptclpK............................... 0 16 20 40 +10003 PF10171 DUF2366 Uncharacterised conserved protein (DUF2366) KOGs, Finn RD, Coggill PC anon KOGs (KOG4545) Family This is a family of proteins conserved from nematodes to humans. The function is not known. 25.00 25.00 35.20 30.50 17.90 16.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.78 0.71 -4.77 9 94 2009-01-15 18:05:59 2007-08-02 16:30:53 4 3 80 0 68 89 0 155.90 36 75.42 CHANGED sshthPp+acphh.pKhuhaa+uLlpDYpEssh-sspsu+sRPhKAulYLolLushhhsstpNPsEssF.spLhcssspLlLVsPpppNssSstalppL.chhNpG+LR+lSLGlhSllahssaDcssslYpAhC.alps.tahshacRllDVGFhG+WWhLcpKMhDYDlNp- ..................h......thatth.....stah+sLhpDYt-ssh-ssttspt+Ph+AslYsshLuushhshtpsPsEtsFpp.tLlcsospLlLlusthRN.pSptalppL.hhhspGpLRalsLGlhSLla.ssaDtpsslYpApCpYLps.pahsF.pRllDVGFhG+WWlLpt+McDaDlNp....... 0 27 31 52 +10004 PF10172 DDA1 Det1 complexing ubiquitin ligase KOGs, Finn RD, Coggill PC anon KOGs (KOG4816) Family DDA1 (De-etiolated 1, Damaged DNA binding protein 1 associated 1) protein binds strongly with DDB1 and Det1 forming a DDD complex which is part of the ubiquitin conjugation system [1]. 21.00 21.00 21.20 21.30 20.70 20.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.18 0.72 -4.15 11 148 2009-01-15 18:05:59 2007-08-02 17:01:35 4 2 101 0 93 145 0 62.20 44 51.23 CHANGED cFLpsLPs.sppNFS.h..ssss.psostRssVYlPT..cshP.s-QlIsT-ppNILlRaLpQph-cK ................cFLpGLPsascuNFSphpss...sss..ps....SspcsuV...YlPT..cshP.............s-Ql...IsT-p.TNILLRaLpQph-KK.......... 0 23 46 72 +10005 PF10173 Mit_KHE1 DUF2343; Mitochondrial K+-H+ exchange-related KOGs, Finn RD, Coggill PC anon KOGs (KOG4539) Family The members of this family function as mitochondrial potassium-hydrogen exchange transporters. The family is part of a large mitochondrial KHE protein complex. 22.50 22.50 24.30 25.20 20.50 20.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.38 0.71 -4.44 47 180 2009-01-15 18:05:59 2007-08-03 11:32:18 4 3 162 0 133 170 3 185.80 32 61.97 CHANGED +lhl..lPlos...........+.+s.........hlY..spphp...t....................................phhphhs+lssKAucs....Wsphppucpu....a....pp+llsaGp+lLp+lPaEEhuLKSl.....Pshsp.h+phpp..........................................................ttp..........plplhaP....sslpspplhspL.+plupcptthH++hhhhshlshPlThPhsLlPllPNlPhFYlsYRuauHa+AhpGucHLphLlc ..................................RlhllPlos.................+.+s.....hlY..spphssh.h....................................tpt.shhctlssKuucs....WsphccupcG....a....p++lssaGp+lLpRIPa-EhuLKSl..........Pshss.h+p.p.t...............................................................................................p................tlpllaP....shlss..........pplhphL.+plspcppthH++hhhhsllshPlThPhsLlPl.......lPNlPhFYlsYRuaSHa+.....AltGuc+LphLl................. 0 43 82 117 +10006 PF10174 Cast RIM-binding protein of the cytomatrix active zone KOGs, Finn RD, Coggill PC anon KOGs (KOG4809) Family This is a family of proteins that form part of the CAZ (cytomatrix at the active zone) complex which is involved in determining the site of synaptic vesicle fusion [1]. The C-terminus is a PDZ-binding motif that binds directly to RIM (a small G protein Rab-3A effector). The family also contains four coiled-coil domains [2]. 35.00 35.00 35.30 35.40 33.50 34.90 hmmbuild -o /dev/null HMM SEED 775 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.79 0.70 -13.90 0.70 -6.49 7 395 2009-01-15 18:05:59 2007-08-03 11:45:35 4 15 88 0 216 377 0 443.00 33 71.08 CHANGED LpupLcps.pEpE.L+pEhEhppocLspohsplcp..........................................................hhSsELc+-+.hRccEuuclshh+pphchsptpst..+hQ.phptLQpcL.+.Qt-lp+Lhpp..pusschtp..................h-hs.tt.pRlptE+-ttth-L.hlR.....c..ohEchpLphEp.pppLssts-plcKLhEhL.....................................................Kupppsppth+RhtEt-sphpcLEslL-p+EKpp.hh..........................plHp+hphp.DsAtTc.............A.QplI-.KDop.tphE+hLcc...hpsEl.hLpSpsshupt-Rpc...plEs.+SthtthKs+................hDpst.pLuRpc........cLEshspptsDh.+p+l-hLppsLpttppctshLQsEl-tLphcL-cpcshhsKpotplpshpEEtuphusEIcch+-hh-hpptclphLQcp.....-.phpccp+phpp.p-Rl+p............ppl-cslhEhE+hht+lcp.Rpptpp.ctEph-paKcchcplcpclpshptcLpEpphpL..h+tcsspLhSst.KpcochcphcIthEpt+-cppKL.pplc+........p.psphspphstlctsssthps-sttspsElDRLh-hLccs.pp+ssp-hchucLtp....sph+hscQpcssts.....hhhp.......c+hsAphhc.hcR......cc-slppuspp..........................................pplpcL.ushcplttEh-phhhpLppoQp.LhphppcLsshcsEhp+phEEshchK...QpAhlAAluppsuphAhhEhpu.pKchsp-chtthcpE+-..........................+lVcQ.K...p.......phQpRMpL ............................................................................................................................................................p..ts..t.hptpht....p.t..................p.t.....ht.tLppcL..+.......ph......................-....t..th..t..-..-p...-h.hhp.......t.phcchphphpp.pp..ph.thppplcpL.-..................................................................................................................................................................................................................................................................................................................................................................................t.ph............p.....p...t..ptchth.p.p.tt.ptp.t.h.sp.............h-tht.chcptpt..tphtt...th.t-ht.h.t.clpc...hhp.hc.tp.ch..h..tp......p.h..t.........p..ctp.h.....th............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 59 80 165 +10007 PF10175 MPP6 M-phase phosphoprotein 6 KOGs, Finn RD, Coggill PC anon KOGs (KOG4531) Family This is a family of M-phase phosphoprotein 6s which is necessary for generation of the 3' end of the 5.8S rRNA precursor. It preferentially binds to poly(C) and poly(U). 25.00 25.00 25.20 25.00 24.40 24.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.49 0.72 -3.65 6 144 2009-09-11 06:05:00 2007-08-03 13:18:23 4 1 114 0 97 135 0 102.40 40 63.94 CHANGED LSKulLcMKFMpRs+-pl-Kcts-Ecp+t...LhSsE...lstchLppoppall.EsSal.C......EsLl.GRlSFtGhNPElE+LM...........E....p.pAt+....puctt-cpscth.....DVsDpEMA ....LScsLLcMKFMQRsh...-pppKcp..h..-.E-c+c........l...huc..E.............l..shstl...p....ppps...a..ll...Ep.Sah.C........-sL.l.h.......GRMSFpGFNP-l.EKLM.....................php..sc.p........p.s..ct...p.cp.p..hth......DVoDpEMA.................................................... 0 29 40 69 +10008 PF10176 DUF2370 Protein of unknown function (DUF2370) KOGs, Finn RD, Coggill PC anon KOGs (KOG4812) Family This family is conserved from fungi to humans. The human member is annotated as a Golgi-associated protein-Nedd4 WW domain-binding protein but this could not be confirmed. 22.60 22.60 22.90 23.30 22.50 22.40 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.72 0.70 -5.02 26 493 2009-01-15 18:05:59 2007-08-03 13:29:02 4 7 220 0 283 488 0 134.60 24 66.36 CHANGED phstlssh.......p..sst.ut...thGtusDGVFsNLsAK...P-p.................tttppphPPoY-pAAADssPsYa-sshhssu.................hhtDElhl-GLPVGshhsFlWNhhlohsFQalGFLLoYlLHToHAA+pGSRuGLGLTLlpaGahM+sus.....................t.tts.thh...s...................Pssap...........p.....t....suaps.........................htsp.WluahlhhlGhhIhl+ulhsYh+l++hEchlLpsssppt ..........................................................................................thh...................................................................................................................P.P.....sYp.st.t................P.Y.t.ph.ts..............................................................................s...hp..th.....l....Gs.h.Fhhshh.huh.Fp.hlGFhh.hhhppohAu+.Guh.GhGlohhtahh.hh.t................................................................................................................................................................................t..hh.hhhhhhGhhh.hpuh.ta..hhp.............t.................. 0 66 110 190 +10009 PF10177 DUF2371 Uncharacterised conserved protein (DUF2371) KOGs, Finn RD, Coggill PC anon KOGs (KOG4823) Family This is a family of proteins conserved from nematodes to humans. The function is not known. 22.90 22.90 23.10 24.20 22.80 22.80 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.88 0.71 -4.30 10 150 2009-01-15 18:05:59 2007-08-03 13:41:28 4 3 78 0 102 135 0 130.10 39 32.60 CHANGED RQDSLRSp.p...........tstc++......+s+++p+u..-VVVVRGKl.....+LhSsSGhhllLGlllllVGlAMAlLGYWP+................................................ttt.tsttss.opspssshpspst.Gshs+hhppaLHS-+hKhLGPllMGIGIFIFICANAlLHENRD+cTKlIshR .........................................................tph+.......h+h+.+s...cVlVV+GKl.....+LhS.SGhhhllGlLlhllGh.....uMAVlGY.WPp......................................................tth...s..ts..t.spt..hpsps...uhhh+.hhptahHs-+h+hlGPllMGIGlFlFICANslLaENRDccTKll.hR........................................ 0 19 28 60 +10010 PF10178 DUF2372 Uncharacterised conserved protein (DUF2372) KOGs, Finn RD, Coggill PC anon KOGs (KOG4828) Family This family consists of proteins found from plants to humans. The function is not known. 21.20 21.20 21.20 21.20 20.80 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.79 0.72 -3.85 8 121 2009-01-15 18:05:59 2007-08-03 13:49:53 4 3 100 2 79 102 0 89.10 34 62.90 CHANGED lVlTQhGKhGTllslp........s.spu-.........lspPsaoscV......LLGp.D-Plh+lhA+pLlstlutEsus+sllLuluLKD+Ss-sLKsltcslcpspl ..............................VllTQhGKhGTl.lslp...............s...tts-....................hscPshsspV......LLGp...D.EPLlcl....hA+....pLlpal..ot......p......uu..s...+sllLuluLK.D+Sh-sl+slhpslpps......................... 0 21 34 53 +10011 PF10179 DUF2369 Uncharacterised conserved protein (DUF2369) KOGs, Finn RD, Coggill PC anon KOGs (KOG4806) Family This is a proline-rich region of a group of proteins found from plants to fungi. The function is not known. 20.00 20.00 20.00 20.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 300 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.28 0.70 -5.31 5 148 2009-01-15 18:05:59 2007-08-03 14:07:11 4 5 72 0 105 142 0 226.20 31 59.39 CHANGED sRhplsTIV......................ClGp+NpplVpG.L+sDoTYalDVFulcppRsoSSAYluootpT....................+cpsRouPltLp-GpLtQVcLcs++G....hKFFsFslPp..tssusQospLlVHsCsGs.VRlpLFRsGKLLt+o.tuFpGhRpFsVsul+PGc+YLlRap.sNDDEuh+TlRVh.AsSTcuspuPaPsLP-DToVKhls.hRoCSSATIAWhuopDc+.lKYClYp++psoNahE+pVsctsNhCpGslS+clh.....p.VsChYsHSPsps-.pscSlhppTIuNLtPuSTY...LLsVssstssG+uLPYRolhV+TssYC ..............................................shh...............................Chstpp..hl.t.h..pppY.hslashp..pt..shhh.st...p......................tp.......h.Lpptth..h.ltttts.....t.hpht.........p..hhhhl.sC...s...lp..lpl...ptp.lhpt...h.t.t.h.......................ps..stthYhlcl..............s.spc.t..sot.........hKlh...sTTt.scpsaPpLPpDsR.................lcshshhRpCoolTlAWh....so.-.....+....p....pYClhhpc.cs.p...........s.C.......pp.............h.C....p...................pth...hl.tL.sut.Y...hh.lhh.h......u.sl.Y..hhl+ht..C.................... 0 15 27 66 +10012 PF10180 DUF2373 Uncharacterised conserved protein (DUF2373) KOGs, Finn RD, Coggill PC anon KOGs (KOG4829) Family This is the C-terminal conserved region of a family of proteins found from fungi to humans. The function is not known. 25.00 25.00 28.30 27.50 19.30 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.23 0.72 -4.43 26 219 2009-01-15 18:05:59 2007-08-03 14:32:39 4 6 203 0 158 217 0 68.90 35 21.80 CHANGED LpYLppatps+ps....................................WKFsKp+QsallKahhs.-clPpcatshLlpYLpsLpG.suRp+Lhcpucpll ..................................................LpYLppatps+ps.........................................WKFpKs+QsaLl+phas..-.clPsp.ahshLltYlpuLpG.suRpclhppApph........ 0 48 84 128 +10013 PF10181 PIG-H GPI-GlcNAc transferase complex, PIG-H component KOGs, Finn RD, Coggill PC anon KOGs (KOG4551) Family PIG-H is a family of conserved proteins that complexes with three other proteins to form the GPI-GnT (glycosylphosphatidylinositol anchor biosynthesis transferase) complex. It appears to be a peripheral membrane protein facing the cytoplasm involved in the first step in GPI anchor formation. 20.50 20.50 21.10 20.60 20.10 20.30 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.98 0.72 -4.34 33 259 2009-01-15 18:05:59 2007-08-03 15:55:35 4 8 226 0 173 243 1 69.60 33 28.05 CHANGED Sllll+slGlQlsop......hh.spt..............pcFIPhscIhDlVIsEuF.psa.pVhaYLslll.........+spsp...............lhllF.p .............................ollllculGlQhoop.......ahsupp..................................pspFIshscIpDlVINE..uh..phh.pVhaYLslll.........cspsc...............llllF............................. 1 47 89 139 +10014 PF10182 Flo11 Flo11 domain Linder T, Bateman A anon Pfam-B_18862 (Release 22.0) Domain This presumed domain is found at the N-terminus of the S. cerevisiae Flo11 protein. Flo11 is required for diploid pseudohyphal formation and haploid invasive growth. It belongs to a family of proteins involved in invasive growth, cell-cell adhesion, and mating, many of which can substitute for each other under abnormal conditions [4]. 25.00 25.00 27.50 27.50 21.70 24.60 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.81 0.71 -4.65 14 112 2009-01-15 18:05:59 2007-08-03 16:57:57 4 12 42 0 73 110 0 141.30 29 19.18 CHANGED stssphshs.phtstshPthslsVpsVpaVtsNhYpVTlpapsspshsLppL......ppLpIlGlsssp.....sshhLautsps.s.lIs...NsscaouThhVtup....sssshhChP.sFpIpa-aspssssph..sssWp..hhspsashhhuCss.ssptp.uppshssa ............................tppsh.ph.s.h.slpsVpalp-ssYplTlphpupps.l.sL.c..L......tuLKIhGlsuPp.....tsh.Lautspp.shhI.s...sPscaosoap.....Vhup.....spsspshhP..sFpIpa-ahpusu.sph..hpsWp...hGsssas..h.sGCpt.sp....uphsh...h......................................... 0 7 32 66 +10015 PF10183 ESSS ESSS subunit of NADH:ubiquinone oxidoreductase (complex I) KOGs, Finn RD, Coggill PC anon KOGs (KOG4808) Family This subunit is part of the mitochondrial NADH:ubiquinone oxidoreductase (complex I). It carries mitochondrial import sequences [1]. 22.70 22.70 22.90 23.00 22.60 22.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.45 0.72 -3.63 45 270 2009-01-15 18:05:59 2007-08-03 17:16:03 4 9 222 0 169 257 0 102.20 25 65.39 CHANGED htpspt.........st..ssssupts..u+tp...............Ps................Gh......LascpP...G.....pphchEsWEhsaahuhhhslVlhulshsaK.P..Dhol.poWApcEAt+R....LEscG ..........................................................hht....................pss.s.ps.....sctt....p.........................sp......................uh.........Laspp..P...G.....pphchcsWEhsaahuhshsllhhushhsap.P..Dh...........s...........h..psWApcEAhcc....hEttt...................... 0 53 87 131 +10016 PF10184 DUF2358 Uncharacterized conserved protein (DUF2358) KOGs, Finn RD, Coggill PC anon KOGs (KOG4457) Family DUF2358 is a family of conserved proteins found from plants to humans. The function is unknown. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.71 0.71 -4.15 29 393 2012-10-03 02:27:24 2007-08-06 10:34:44 4 5 158 0 238 556 155 112.80 25 44.24 CHANGED tcllchL+pDhspha..scshsYuIYscDlhFpDPlspFcs..lppYphh...l+Fhsp.aFtslpl-lpclppssp....pIpsRWsl.pshshlP.....W+...scls............hsGhSphplspsG.hIhpH .........................t..hhchL+p-h..s..p..ha...hps...hs..h..s.lYspDlhFt.....DPl........t..sacG.............hppYpt.........l...chlsp.ha...........t....p..hpl-.lhpl.....p.......p......ts......cs.......slp..sRWpl..ps.h..s.t.hs........Wc......scht.........................hsGhSpahlsp.pu.hIhpH................................ 0 84 158 207 +10017 PF10185 Mesd Chaperone for wingless signalling and trafficking of LDL receptor KOGs, Finn RD, Coggill PC anon KOGs (KOG4357) Family Mesd is a family of highly conserved proteins found from nematodes to humans. The final C-terminal residues, KEDL, are the endoplasmic reticulum retention sequence as it is an ER protein specifically required for the intracellular trafficking of members of the low-density lipoprotein family of receptors (LDLRs) [1]. The N- and C-terminal sequences are predicted to adopt a random coil conformation, with the exception of an isolated predicted helix within the N-terminal region, The central folded domain flanked by natively unstructured regions is the necessary structure for facilitating maturation of LRP6 (Low-Density Lipoprotein Receptor-Related Protein 6 Maturation) [2]. 19.20 19.20 19.20 19.90 18.50 18.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.92 0.71 -4.53 7 123 2009-01-15 18:05:59 2007-08-06 10:54:47 4 4 99 12 84 133 0 158.90 44 78.42 CHANGED tKKDIRDYsDADhtRLLEQW..E--DslE.s-LPEHhRPssslDhSplDs..ssPEslLKhSKKG+TLMhFVoV..oGsPTccE....TE-ITpLWQsSLaNsphphpRallssNRAIFMh+DGu.AWEsKDFLVpQ-RCtcVTlEsp.YsGKs..............................tp.tpsK-EL ......................................................tKKDlRDas-ADhtRLL-QW...EcD-.slE-s.-LPEH..hRPs....s....lD.h.Spl..Ds.........spPE.slLKh..oKK...G+T..LMhF.VoV.....sGs.P....Tc.....cE....oE-ITpLW.QsSLaNs..p.hplpRahVsssRAIFMh+DGu.AWEhKDFLlpQ-RCt-VTlEspsY.sGhs..............................t................................................................... 3 26 36 62 +10018 PF10186 Atg14 DUF2355; UV radiation resistance protein and autophagy-related subunit 14 KOGs, Finn RD, Coggill PC anon KOGs (KOG4398), Wood V Family The Atg14 or Apg14 proteins are hydrophilic proteins with a predicted molecular mass of 40.5 kDa, and have a coiled-coil motif at the N terminus region. Yeast cells with mutant Atg14 are defective not only in autophagy but also in sorting of carboxypeptidase Y (CPY), a vacuolar-soluble hydrolase, to the vacuole. Subcellular fractionation indicate that Apg14p and Apg6p are peripherally associated with a membrane structure(s). Apg14p was co-immunoprecipitated with Apg6p, suggesting that they form a stable protein complex. These results imply that Apg6/Vps30p has two distinct functions: in the autophagic process and in the vacuolar protein sorting pathway. Apg14p may be a component specifically required for the function of Apg6/Vps30p through the autophagic pathway [1]. There are 17 auto-phagosomal component proteins which are categorized into six functional units, one of which is the AS-PI3K complex (Vps30/Atg6 and Atg14). The AS-PI3K complex and the Atg2-Atg18 complex are essential for nucleation, and the specific function of the AS-PI3K apparently is to produce phosphatidylinositol 3-phosphate (PtdIns(3)P) at the pre-autophagosomal structure (PAS). The localisation of this complex at the PAS is controlled by Atg14 [2]. Autophagy mediates the cellular response to nutrient deprivation, protein aggregation, and pathogen invasion in humans, and malfunction of autophagy has been implicated in multiple human diseases including cancer. This effect seems to be mediated through direct interaction of the human Atg14 with Beclin 1 in the human phosphatidylinositol 3-kinase class III complex [3]. 27.10 27.10 27.10 27.20 27.00 27.00 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.95 0.70 -5.47 39 520 2012-10-03 11:38:54 2007-08-06 10:55:36 4 16 250 0 371 532 2 295.80 16 56.42 CHANGED pCslCp....sspp.hhCssCsps...........p...Lhch+hchtplttcp-slpp+lpphlp...............................sththpth+schtpppc+hpplppplppl+pclcpp+pclpph+pslpp++sslp...tp.thtpccsp.lpphpsshc+tcpphpt.lpshhtppRshlhcplsplaslpph....................cpss........ptsas.Iu...s...........hslhsh+shpp................................................................hssppIssSLsahspllsLluchLulcLPtplshs..ppsh......................................................................shsths.cp.......stshppFhculshLshNlsaLsp........op...ulsls...hssa-shsplhp.lhphhhsp .....................................................................................................................................................................t.....h.php.thtphh.pp...pp...lt...pplpphlp.....................................................tp.thpphptphtp.t...p.+ht........l.ppplpp.ppplppt.ppplp.chptplpp+pptht.......tt....th........p.pttp..thp..p..h.pp.p...h..p..pp......cph.h.......p.........p..hppp......htt...pppp.lhp.p.lst...laslp..............................................p.ts.............................ptsas..Is..............s............................htl..s.s...s...p....sh.ps......................................................................................hst.t...plu..s...u...LuasspLlpllu.hhL..slsL.a.lt...h....t.pup..........................................................................................................................................................................................................................................................................t..h......t................s.s.p.....pFphulhhLspslt.Lph...............pp...slph.......p..hpsh..hh..........h.................................................................................................................................... 0 111 180 293 +10019 PF10187 Nefa_Nip30_N N-terminal domain of NEFA-interacting nuclear protein NIP30 KOGs, Finn RD, Coggill PC anon KOGs (KOG4036) Domain This is a the N-terminal 100 amino acids of a family of proteins conserved from plants to humans. The full-length protein has putatively been called NEFA-interacting nuclear protein NIP30, however no reference could be found to confirm this. 21.60 21.60 21.60 22.50 21.40 21.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.59 0.72 -3.84 17 264 2009-01-15 18:05:59 2007-08-06 10:57:05 4 4 215 0 181 250 1 97.20 39 42.18 CHANGED FVSpupl-E.+c.ttp.ht.......tcpscspsppp.cu+oLYEhLQppK.ctKptEa-Ep...hKlKN.h+uLD-DEsEFLDplpppc+ttEtphccEEscpLctF+ ......................................FVSpuplpEt+cpc.pEthcthpp..........scsspt.ps.pcthD...sRSLYEpLQcp..K..spKQpEaEEp...hKhKN...h.............RuLD-DEspFLDplpcppcthEcphccE-tcpLcpa+.......... 0 62 95 145 +10020 PF10188 Oscp1 Organic solute transport protein 1 KOGs, Finn RD, Coggill PC anon KOGs (KOG4033) Family Oscp1 is a family of proteins conserved from plants to humans. It is called organic solute transport protein or oxido-red- nitro domain-containing protein 1, however no reference could be find to confirm the function of the protein. 25.00 25.00 52.20 52.00 23.50 20.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.98 0.71 -4.44 10 139 2009-01-15 18:05:59 2007-08-06 11:10:40 4 3 104 0 83 133 3 170.40 48 48.65 CHANGED MlYlL-QRL+AQs...lsp-KuspVLcDIspslasscFlsELFpspt.....lhShstl+plhcslupCSIMRLspsSMsKLaDLMhMshKaQlhssp+PtEllp.lThsHL-ulpcllsp...sctpshlctshpplh-hassLsss-hhtlRpcLLsFLpshps+VSlhlcsshQs.psGsFhl .....MLYlLDQRL+AQs...IssDKu..........c+...........VlsDIlssMFscKFh-ELFKPQc.....LYScculRola-+LAHuSIMRLNpuSMDKLYDLMTMuFKYQlhlss+P+-lLh.lTaNHLDuI+s.hlpD......ssslhppVDp......shppL...h.........-hYu.sLosG-athIRpTLLhFFQDh+lRVSlFL+cplQs.ssGpFl...................... 0 29 41 63 +10021 PF10189 DUF2356 Conserved protein (DUF2356) KOGs, Finn RD, Coggill PC anon KOGs (KOG4262) Family This is a 200 amino acid region of a family of proteins conserved from plants to humans. Some members have been putatively annotated as being integrator complex subunit 3 but this could not be confirmed. The function is unknown. 25.00 25.00 30.10 36.70 20.90 20.50 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.48 0.70 -5.30 8 130 2009-01-15 18:05:59 2007-08-06 11:12:43 4 3 105 0 95 130 0 216.70 53 24.43 CHANGED pLWRDllh...............pPpshsPta.GlhQLhch+TS++FLt.sRLoPEMEpKlhFlhusVKFGsQK..RYQsWFpcKYLsoPEScSLhsDlIRFIssVlHPoN-lLsSDIlPRWAIIGWLLooC.........................psssspuNuKLuLFYDWLFFDPcKD.NIMNIEPAILlMhaSl...............+paPtlTsoLL-FLC.+lhcsassptc-pIRpGVpNuh+hIh-KtVl.sLs.lh-ssKL-R-Lp.hlRcpFs-FLS ............................h.LW+Dllh....................NPpsL.s.P.pFsGlhQLLptRTSR+FLt.sRLTP-MEpKLhFh.sSpV+FGpQK...RYQDWFQ+pYLuTP-SpSLRsDLIRaICuVlHPoNElLsSDIlPRWAlIGWLLToC.........................Tosl.A.suNAKLALFYDWLFF...sPcKD.s.IMN.IEPAILVMtHSh...............+sHPs.lTsTLLDFhC.RllssFaPs..hcsplRpGVhsSLphIl-Kp.Vl........spLsPLF-s.P....KL...D...+-LRshlRcpF.EFh..................... 0 35 48 71 +10022 PF10190 Tmemb_170 Tmem170; Putative transmembrane protein 170 KOGs, Finn RD, Coggill PC anon KOGs (KOG4349) Family Tmem170 is a family of putative transmembrane proteins conserved from nematodes to humans. The protein is only of approximately 130 amino acids in length. The function is unknown. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.48 0.72 -3.97 8 145 2009-09-11 14:34:23 2007-08-06 11:13:35 4 3 91 0 98 132 0 98.90 53 67.44 CHANGED LssFsEMWYaVFLWALFSSlFlHssAullAFsTLR+HKhGRF.hSIhIllMGlLuPloGGllTSAsIAsVYRAuGhsMuPlcAllaGVGQTllohllSFoRILATL ..................psasEMWYt.lFLWALhSSlFhHss.AGlLAhhTL....R+H.KhG....Ra..hSlhl....llhGhlu...slTuGhlTSAAlAGVYRuAGKpMhPhcALshGlGQThhslllSFh.RILATL................. 0 22 33 63 +10023 PF10191 COG7 DUF2354; Golgi complex component 7 (COG7) KOGs, Finn RD, Coggill PC anon KOGs (KOG4182) Family COG7 is a component of the conserved oligomeric Golgi complex which is required for normal Golgi morphology and localisation. Mutation in COG7 causes a congenital disorder of glycosylation [1]. 29.30 29.30 31.10 30.90 29.20 29.20 hmmbuild -o /dev/null HMM SEED 766 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.31 0.70 -13.21 0.70 -6.63 8 242 2012-10-02 15:56:29 2007-08-06 11:14:02 4 7 143 0 166 238 2 481.30 21 88.41 CHANGED DhSpFuuDcFDVKcWlNAua+uspcssuss+sct..aluoLVhKLQLalQ-lNsulEEoStQALpsMP+VlRDlptL+pEAuhL+-pMttVpp-Ip+hEp-TupsMpsLsRlDsVKpRMQhAtcoLQEADGWupLSA-lEDsFcosDlsssupKLsuMQpSLuAls-lP-aoE+psQLEsLKNRLEALsSPplVsAhsspsl.......DQupcaVpIFpcIsRhPQLptYYpKlHKs.LhphWpchspppts.........................hshsphLopFYDpLLphhcsQh+WssQlFpc.hc.llsllll-TLusLsPShsuplspAlc+Ass-.c........................LpsLl-lassousFu+slEphlttt.tp....ssLh+lh-LlcAVauPacsFptpYGsLEcopLptpluulslctu..........ElsDsVpcL-cSlsKlhsLhpuAV-RClshTsGhulspLlpALculhtpYlopaspsL+SLRhspsl-shssss......................E-WSlhQsolplLsssG-LhppsusFEppLtsplhshuuph..p.assp..ts..p....tppuss+sshptaphlspschs-hsshhcsLhslpEcu..........................................ssshslLutsppsls+hsppsH-LsaDslF.pI+QpLtpl.s+hcsaootultE.hs.DLPsFShsPQEYITsIGQYLLTLPQHLEPLlpu.sssLchsLcssch.assppttt..-hpshADhWLuplAEuopthYp-pIhpIptLos+uAKQLATDIEYLuNVL-ALGLslstsLQpIlTLL+As.-pY+sluK.....uLsh+LuusIsphRs ...............................................................................................................................................................................................................................................h..th..h........................................t.........h.t.......l.p...lp.hptchp.s.ts.Lp.cuttht....t....h.tpl......pt..hh.........t.......t.......t.......ph................................hsttl.thppsh.h..........t.hs.th.tt.h...ht..hppp...lct.hp......l....ht.ht....t.t...................................t.st.hh...tlh......th.tp..th...a.t..............h....W...t.....................................................................h....hht.hh..http..........hh........................h....h.......h....h......h....................................................................................................................................................h...............................................t.h..............t.s..tph..ho...t....h.h...ht..h..............................................................................................................................................ht.h.t...............................................................................................................................................................................................h.........th.......h.t....hh.p.h....h..h...h..h.....................................................................h..st..hs...............P.t.hpphuphlhsls..h-sh.......................................................................................................................................................................................................................................................................................................................................... 0 85 110 141 +10024 PF10192 GpcrRhopsn4 Rhodopsin-like GPCR transmembrane domain KOGs, Finn RD, Coggill PC anon KOGs (KOG4290) Family This region of 270 amino acids is the seven transmembrane alpha-helical domains included within five GPCRRHODOPSN4 motifs of a G-protein-coupled-receptor (GPCR) protein, conserved from nematodes to humans. GPCRs are integral membrane receptors whose intracellular actions are mediated by signalling pathways involving G proteins and downstream secondary messengers [1]. 23.70 23.70 23.70 23.70 23.50 23.60 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.29 0.70 -5.19 23 259 2012-10-03 04:04:29 2007-08-06 11:14:50 4 6 114 0 206 328 2 222.60 26 48.39 CHANGED shplcaclplhNss.........t..spcFSh-ppsl.phhhlhhllallhlhhshhhh.pthpppp.hchshphhshslhlphlShhlhhhahhhYu.hsGhGh.hhplhupllphhuphlhhhLLlhLuhGWTIop.hphs........shhhhhslhsllshhpsllhlh.sphhh-stppha.Y-uhsGahllhl+lshhlaFhhultp.sh..p+ps..t+psFhhhFshhshlaFluhPllshluhh.lssahRtKllphsshhhphluhlhh ............................................s..plpachhlhNsp..........s..hpcFSh-c........slhph..hhhh..l.lahlhhhh...h..h...hlptpt.hHssh.+.h..hh.s.u.l.hlp...hhShhh.hla.hhp.Yu.tsG.lG..................hthhucl...hphhSphh.hhhhLl....hL.u.hG.aols+..hph.s.......................st...h.hhshh.shh..sh.h..p..hhLhla..........t...p.....-s..t.p..sh........atac...o.sGhh.ll..h......lpl.shh......l.h.F...hhslh........h.....c+.s..........KptF.....ah.F.h.h........sh.hlW..FhshPl.h.shl.u.h..hst..ahRt.+llp.hh.hhhph.huhhlh.................................................................................................................... 0 93 119 170 +10025 PF10193 Telomere_reg-2 Telomere length regulation protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4346) Family This family is the central conserved 110 amino acid region of a group of proteins called telomere-length regulation or clock abnormal protein-2 which are conserved from plants to humans. The full-length protein regulates telomere length and contributes to silencing of sub-telomeric regions. In vitro the protein binds to telomeric DNA repeats. 20.90 20.90 20.90 21.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.36 0.71 -3.82 31 272 2009-01-15 18:05:59 2007-08-06 11:15:43 4 8 242 4 200 275 5 111.50 31 12.69 CHANGED sPlYl+DLlphLp.-s.ps...h-+hchALpsussLIRpKss...assElpphup-LsphLlsLps.pachppFp-hRhpuhlAllVsp...............................P.phupahsphaas..u-aSlpQRhslLssLuhuA ............................PsYl+DllphLptsp...s........h-+hchALpss.s.LlRppss......-lpchut-LsphLlpLp.....s.....chshssFpphRhpuhlAllVsp....................................Ph.psupals.pthas..sshSlpQRhslLssLuhuA............................... 0 65 108 163 +10027 PF10195 Phospho_p8 DNA-binding nuclear phosphoprotein p8 KOGs, Finn RD, Coggill PC anon KOGs (KOG4319) Family P8 is a short 80-82 amino acid protein that is conserved from nematodes to humans. It carries at least one protein kinase C domain suggesting a possible role in signal transduction and it is thought to be a phosphoprotein, but the sites of phosphorylation and the kinases involved remain to be determined [1]. 21.20 21.20 21.60 21.30 20.30 20.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -8.92 0.72 -4.09 13 132 2009-01-15 18:05:59 2007-08-06 11:19:25 4 2 94 0 67 134 0 57.90 46 70.61 CHANGED MSEsaaD-aEaYNacpDKtlhoG.tSGKtRTK+EushHTN+hsPuGHsRKllTKLpNoEpK+ ............p-thhDpa-hYshst.ph.hhu.G..uuKtRTK.+EAut+TN+.sP..uGHpRKlloKL.N.oEp++.................... 0 18 22 40 +10029 PF10197 Cir_N N-terminal domain of CBF1 interacting co-repressor CIR KOGs, Finn RD, Coggill PC anon KOGs (KOG3869) Domain This is a 45 residue conserved region at the N-terminal end of a family of proteins referred to as CIRs (CBF1-interacting co-repressors). CBF1 (centromere-binding factor 1) acts as a transcription factor that causes repression by binding specifically to GTGGGAA motifs in responsive promoters, and it requires CIR as a co-repressor. CIR binds to histone deacetylase and to SAP30 and serves as a linker between CBF1 and the histone deacetylase complex [1]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.62 0.72 -3.93 39 495 2009-01-15 18:05:59 2007-08-06 11:23:05 4 9 268 0 353 481 1 37.10 38 10.07 CHANGED SWHPshh+Npc+VWcsEQcthpEp++lc-hpc.EhccE .uWHPtshcNhc+VWtAEQct....ptEcKKh--htp.chpcE................ 0 116 185 289 +10030 PF10198 Ada3 Histone acetyltransferases subunit 3 KOGs, Finn RD, Coggill PC anon KOGs (KOG4191) Family Ada3 is a family of proteins conserved from yeasts to humans [1]. It is an essential component of the Ada transcriptional coactivator (alteration/deficiency in activation) complex. Ada3 plays a key role in linking histone acetyltransferase-containing complexes to p53 (tumour suppressor protein) thereby regulating p53 acetylation, stability and transcriptional activation following DNA damage [2]. 24.50 24.50 24.70 25.00 24.30 24.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.71 0.71 -4.40 23 232 2009-01-15 18:05:59 2007-08-06 11:23:59 4 2 212 0 178 230 0 133.70 32 22.73 CHANGED phDasolEERLK+EL+alGlh...................................................Dss-h...ss+pDDElsAELRpLQscL+plophNptcKp+LlslscEchAaQEYpsIL--LDpQlppAYhKRl+shtK+K+...pc+pst.................ptpAssu..................sL+sLL-+ .........................................chtslEpRlKpELhthGlL................................................................................-...s.Dh....sscpDDElhucL.RphQsEL+p.sthNps+KpcLlc..ls+EchuhQEhpph.l-slDspV.pAYhKhhpshtK++p.....p+ccts.....................tt.tshpt.................tc.hhpLh-...................................................... 0 58 94 145 +10031 PF10199 Adaptin_binding KYY; Alpha and gamma adaptin binding protein p34 KOGs, Finn RD, Coggill PC anon KOGs (KOG4273) Family p34 is a protein involved in membrane trafficking. It is known to interact with both alpha and gamma adaptin [1]. It has been speculated that p34 may play a chaperone role such as preventing the soluble adaptors from co-assembling with soluble clathrin, or helping to remove the adaptors from the coated vesicle. Another possible function is in aiding the recruitment of soluble adaptors onto the membrane [1]. 32.00 32.00 32.00 32.00 31.90 31.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.85 0.71 -3.89 38 207 2009-01-15 18:05:59 2007-08-06 11:24:57 4 5 184 0 156 211 0 139.40 22 39.17 CHANGED EppGhc..RlhEsL-sp.Wss.hc.tp.t...............................................................................................................hppcthshcps.pphp...................................................................tt.p.-...............shEplhp+lpth+cpspp..lsc......pcRccaAtchspc .............................................................................EppGlpRlhpALpup.Wss..hthcstpp.........p...t.........................................................................................................................................................................................................................hptphtshpts.pptt.....tt............t.......t..........................................................................................t.t.tp..s....psphcl-...........................shEpLhp+lpth+-puss..LPc.ppR+phAtKsstt.............. 0 52 82 125 +10032 PF10200 Ndufs5 NADH:ubiquinone oxidoreductase, NDUFS5-15kDa KOGs, Finn RD, Coggill PC anon KOGs (KOG4110) Family This is a family of short, approximately 105 amino acid residue, proteins which form part of NADH:ubiquinone oxidoreductase complex I. Complex I is the first multisubunit inner membrane protein complex of the mitochondrial electron transport chain and it transfers two electrons from NADH to ubiquinone. The protein carries four highly conserved cysteine residues but these do not appear to be in a configuration which would favour metal binding so the exact function of the protein is uncertain [1]. 21.20 21.20 21.30 21.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.22 0.72 -4.00 6 171 2012-10-02 15:44:21 2007-08-06 11:25:25 4 4 139 0 105 165 0 82.40 31 76.86 CHANGED MPFlDlQK+LGlNlD+WlhhQSuEQPaKhAuRCHAFEKEWIECuHGIGpTRAcKECKIEaEDFhECLhRpKTM+RLpsI+cQR-KLlKEGpYTPPP ....................................................sRCasFEpcal-C...s..c...uh...G...t..s.Ru...c....KcCphth-DahEClp+pKphpRhpslp....p....p+c+...t........................ 0 29 51 81 +10035 PF10203 Pet191_N Cytochrome c oxidase assembly protein PET191 KOGs, Finn RD, Coggill PC anon KOGs (KOG4114) Family Pet191_N is the conserved N-terminal of a family of conserved proteins found from nematodes to humans. It carries six highly conserved cysteine residues. Pet191 is required for the assembly of active cytochrome c oxidase but does not form part of the final assembled complex [1]. 22.90 22.90 23.10 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -9.76 0.72 -3.93 33 259 2009-01-15 18:05:59 2007-08-06 11:26:49 4 5 238 0 182 235 2 64.60 39 59.86 CHANGED hssSCpcl+psLttCL.cScClhhccposc-CLpss...tplPpcChtl+cuah-CKRuhlDh.RpRFRGN .....ssuCpsl+psLstCL.pS-Clhhp.c..+..oPp-CLcps....plP.cCptL+puah-CKRuhl....Dh.RpRFRGp............... 0 50 89 144 +10036 PF10204 DuoxA Dual oxidase maturation factor KOGs, Finn RD, Coggill PC anon KOGs (KOG3921) Family DuoxA (Dual oxidase maturation factor) is the essential protein necessary for the final release of DUOX2 (an NADPH:O2 oxidoreductase flavoprotein) from the endoplasmic reticulum. Dual oxidases (DUOX1 and DUOX2) constitute the catalytic core of the hydrogen peroxide generator, which generates H2O2 at the apical membrane of thyroid follicular cells, essential for iodination of thyroglobulin by thyroid peroxidases. DuoxA carries five membrane-integral regions including a reverse signal-anchor with external N-terminus (type III) and two N-glycosylation sites [1]. It is conserved from nematodes to humans. 21.20 21.20 21.30 26.90 20.80 21.10 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.83 0.70 -5.34 7 173 2009-09-11 16:49:30 2007-08-06 11:27:08 4 3 80 0 103 157 0 229.80 42 73.87 CHANGED Pa..ssRsshshsspllslhllFlshhluFLlILPGlRt+............................RhhhhhpllLSLFlGAVIlssphsssWtsup.hhhssYKuFSpphl.AclGhalGLhtlNlTLt.........uhPstphsE.sIsYNEtFsWctspshscpYccALE+GLP.PlLhVAEhFohs.sshuhhpQYRhAGaYASthLWsuFssWlLhNlLL..hsVshYGuhhhhh..TGhhhLhuhhsat.h...hpsssl+lu......suVlph+.aGhsaWLsLssGlLCl...lsGlslslhchshP+slpshlphu.cps .....................ah.ts+.shshssshhhllhlFlshhssFllILPGIRG+.............................Rhhhhl+VhhSLFlG.......AhIl...ss.phuspWtsu.p.hp.h.ssY+uFStthl..AclGhplGL.tlNlTLp.................................uhP.h...tp.........h.s....-....sIsYNEpFsW............p..........h...........s....p..........s..........hsppYtpALp+GLP.PlLhlAEhFo.p...pshshhtpYRhAG..aYsuhhLW................sAFhh.WlltNlhL..hss.hYGuhh.hhh..TGhh.lhuhhhas.h.s.h.....h..slpls................suhLphp..aG..sFW.ls.LssGlLCh...lhGhhhshhchh.Ppthpshhp...t..s.......................... 1 29 35 67 +10037 PF10205 KLRAQ Predicted coiled-coil domain-containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4421) Domain This is the N-terminal 100 amino acid domain of a family of proteins conserved from nematodes to humans. It carries a characteristic KLRAQ sequence-motif. The function is not known. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.24 0.72 -3.98 7 131 2009-12-22 15:39:10 2007-08-06 11:27:39 4 5 89 0 81 123 0 96.40 53 16.16 CHANGED QKLApEYSKLRAQspVLK+AVl-EQupssuL+EpLKpK-poLR+hpQEhDSLuFRNpQLt+RVp.LQpELt.....lscstspKs+..ssscpsu.huppspsVhcE-L .....QKLApEYSKLRAQspVLKKuVl...-EQApss........uL+EQLK.KDQSLRKhpQEhDSLoFRNpQLsKRVElLQ-ELs..........ls.Es+..uKKsK....csucs.suphstpptsVhtE-............................... 0 29 36 62 +10038 PF10206 WRW Mitochondrial F1F0-ATP synthase, subunit f KOGs, Finn RD, Coggill PC anon KOGs (KOG4092) Family This is a family of small proteins of approximately 110 amino acids, which are highly conserved from nematodes to humans. Some members of the family have been annotated in Swiss-Prot as being the f subunit of mitochondrial F1F0-ATP synthase but this could not be confirmed. The sequence has a well-conserved WRW motif. The exact function of the protein is not known. 26.50 26.50 26.50 27.40 26.40 26.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.38 0.72 -3.98 17 197 2009-01-15 18:05:59 2007-08-06 11:28:26 4 8 102 0 96 213 0 84.90 43 59.95 CHANGED MuhGthPucaNsKlpGsY...paYGp.DpPhhpVKLGELsuWhuRR-KoPpuhsuuhSRuaWRappKYhp..sK+sGIushhpllsuhhshhYhhsYs+lKHaRph ..............................h..............................tDpp.Lh-VKLGELsuWlhh...RD.hoPsulhuAhpRuaa.Ra.pKYlp..sK+uulushh...llsuhhhh.YhhsY.p+..lKHcR..h.................... 0 17 23 68 +10040 PF10208 Armet Degradation arginine-rich protein for mis-folding KOGs, Finn RD, Coggill PC anon KOGs (KOG4154) Family This is a family of small proteins of approximately 170 residues which contain four di-sulfide bridges that are highly conserved from nematodes to humans. Armet is a soluble protein resident in the endoplasmic reticulum and induced by ER stress. It appears to be involved with dealing with mis-folded proteins in the ER, thus in quality control of ER stress [1]. 20.60 20.60 20.70 20.60 20.50 19.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.17 0.71 -4.56 8 204 2009-01-15 18:05:59 2007-08-06 11:29:04 4 12 129 6 120 194 4 131.40 47 65.87 CHANGED -CEVClcFlsRhhpSL.scsVchcsDsIEctlhctCcss+GKENRFCYYlGAsp-SATpIhsEVo+PLSa+MPs-KIC.EKLKKKDuQICEL+Y-Kp.lDlsoVDLKKhKVK-LKKILs-WGEsC+GCsEKoDFIp+IpELtPKYs..tt...ps+TEL ..........................-CEVClphls+F...hpsL.sc..s.sp.h.s.ttI.Ep.thhchC.pps..+...s.........K..ENRhCYYlGuhp-uATtIls.Elo+PhoaphPspKIC.EKLKK+DuQIC-L+Y..-....Kp.l.DLs....o......VDL+KL+V+-LK+ILs-.WGEpCcGChEKoDal++IpE.LhPKYs................................................................ 0 46 59 86 +10041 PF10209 DUF2340 Uncharacterized conserved protein (DUF2340) KOGs, Finn RD, Coggill PC anon KOGs (KOG4147) Family This is a family of small proteins of approximately 150 amino acids of unknown function. 25.00 25.00 46.60 31.10 19.80 17.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.67 0.71 -3.95 18 127 2009-01-15 18:05:59 2007-08-06 11:29:24 4 5 113 0 92 121 0 111.60 43 71.62 CHANGED lTVRlIKSF.YRNlKNhlh+slDLpspTsc-Llc.l+pcIpTsuuh.RPFRsl..cYDoLKIYT+AHGSKTsNLVINh-cD-pWlL...............phpsts+oLh-hGVcNETElShFNhcDYhpaKtNPcpKW .....lTVRlI+SF.aRNhKslVh+slsL..spTsc-hhphlppcltpp.....suh..PaRNh.......pYDTlKIhppAHuuKTssLVlsh-cD...-.phlL...................p..pspoLtph.GltsETEluhFshcDYhpaKtNP..p.pW..................... 0 38 53 72 +10042 PF10210 MRP-S32 Mitochondrial 28S ribosomal protein S32 KOGs, Finn RD, Coggill PC anon KOGs (KOG4106) Family This entry is of a family of short, approximately 100 amino acid residues, proteins which are mitochondrial 28S ribosomal proteins named as MRP-S32.\ Their exact function could not be confirmed. 25.00 25.00 32.60 32.00 24.80 24.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.25 0.72 -3.67 8 112 2009-01-15 18:05:59 2007-08-06 11:29:54 4 4 76 0 66 106 0 88.40 44 60.99 CHANGED NCsV-lAlTpDGRTIVCaHPuV-lPYEHTKPlPcsD.lpssspoHEplLKo+Lp..phKptc.tPspEELSKlFaTTKHRWYP+Gp..cR+tKpsPsccc .............hpsclAlTSDG+TIVCYHPss-lPYEHTKPlPcss.....hpsst....ps+-phL+splp...phpphcptshh-pLuKhFaTTKHRWYPpup..pc+tp.sP.cc................... 0 17 21 43 +10043 PF10211 Ax_dynein_light Axonemal dynein light chain KOGs, Finn RD, Coggill PC anon KOGs (KOG4001) Family Axonemal dynein light chain proteins play a dynamic role in flagellar and cilia motility. Eukaryotic cilia and flagella are complex organelles consisting of a core structure, the axoneme, which is composed of nine microtubule doublets forming a cylinder that surrounds a pair of central singlet microtubules. This ultra-structural arrangement seems to be one of the most stable micro-tubular assemblies known and is responsible for the flagellar and ciliary movement of a large number of organisms ranging from protozoan to mammals. This light chain interacts directly with the N-terminal half of the heavy chains [2]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.11 0.71 -4.46 30 362 2009-01-15 18:05:59 2007-08-06 11:30:38 4 5 126 0 212 328 14 174.70 30 36.75 CHANGED sc-lLsoIlPPRcap.cc.uphalQhVSssPuoRpDVlpLp-pLDppLppRpAR-oGI.CPIRcELYuQCFDElIRQVTIsCsERGLLLlRVRDEl+hTlsAYpsLYESulsFGhRKslpsEps+s-hcpclppLEc-ppcLcpplpcLct+h-shc+ptpEcpptcEKp+p-ElsaLK+s.sppL+spLcpl ......................................................................h....h...pt.t.hhhp.sS.pP...s...oR.DVlpLp-.....LD....phLp......p......p...p...........A.....+....p............o....G............I....Cs.l...R...........c......c...l..YspsF........cELIR......QVTlsCsERGlLLh+lR--........h.ph....sls....s........a....p..pL.hcu....ul....s....a...uh+c.slps...c...p.t.c...t.......chpp.c....l.tp.......L.cp.chpcLpppltchpt+.h-th.p..c..pttct...pt...........tp..cth.tpclt...h...hct.t..tphp......h.................................................................................................. 0 108 131 176 +10044 PF10212 TTKRSYEDQ Predicted coiled-coil domain-containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4421) Family This is the C-terminal 500 amino acids of a family of proteins with a predicted coiled-coil domain conserved from nematodes to humans. It carries a characteristic TTKRSYEDQ sequence-motif. The function is not known. 19.90 19.90 20.10 20.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 518 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.79 0.70 -5.58 5 140 2009-01-15 18:05:59 2007-08-06 11:33:46 4 6 86 0 85 141 0 322.70 36 59.42 CHANGED LAGQALSFVQDLVoALLNFHTYTEQRlQIFPlDSAIDsISPLNQKFSQYLHENAuYVRPLEEGMLQLFESITEDTVTVLETsVKLKtFSDHFoSYVsFL+KILPYQLKSLEEECESSLCTuAL+ARNpELapDMK+lTAVFEKLcTYVoLLALPSTcP-GLLRTNYSoVFTQluuuLHuLHDllKElSKHYsQKAoLEQELPTATQKLlTTNDCILSSlloLTNuTGKIAoFFuNNLDaFTSSLSYGPKGuTsFlSP..LSAEsMLsYKKKAuAYM+oLKKPCP-SVPYuEALuNRRVLLSSTESREGLAQQVQQSLEKIuKLEQEKEHWMLEAQLuKIKLEKENQRIAsLlKso...puGpLsssspEsusLspuuEQsEusSscup+EPTpoTSLlGMLTlTs-sp.sPDcESRE-LIKoHYMARIAELTSQLQLADSKAVHFaAECRALAKRLuLAEKSKESLTEElKLAsQsISRLQDELhTTKRSYEDQLSMMSDHLCSMNETLTKQREEIDTLK.MASK ................................................................................................................................................................................................................................................................................................................................................................................................................................................hp...tp..p.....tt...............t.t.....t......tt...h.......s...t....p....t...s.t..t......ts.h...ssph.......t....h....h........t...s.-..ps+.E.pL...Ipsaa.t+l.-Lhpphphu-SKshpaht.EscsLtp+.Lthu.-c...p+cs....hp-c......hp.ssp...........slppLp-ELpTT+psYEcQLShhS-HLsohN-pLsppp-pIpth+...................................................................... 1 28 37 65 +10045 PF10213 MRP-S28 Mitochondrial ribosomal subunit protein Wood V, Coggill P anon Wood V Family This is a conserved region of approx. 125 residues of one of the proteins that makes up the small subunit of the mitochondrial ribosome. In Saccharomyces cerevisiae the protein is MRP-S24 whereas in humans it is MRP-S28. The human mitochondrial ribosome has 29 distinct proteins in the small subunit and these have homologues in, for example, Drosophila melanogaster, Caenorhabditis elegans, and in the genomes of several fungi [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.57 0.71 -4.05 21 297 2012-10-03 10:08:23 2007-08-06 14:08:43 4 2 268 0 222 400 31 121.40 28 36.00 CHANGED pa..p.ss..ppPlpacaooYhu-t....cPt.sRKVVlplpsssL...sL..sc+pp+KhhhLAGsRYssc........TD........ll+hSo-+aspttQNt+YLsslLptLlcEupc..s-sapDlPlDsRHpph+................pFPccWt .........................................p.slphphssYhupt...........cPt.sR+Vslp.lplpcL........sL..scctpcK.hh.c.L..s........G........sRYs.p.......................................oD............................llphss-+asp........ppQNtcYhhpllssLhpEutc....s....-sa..pc..h-.hchp...h................................................................................ 0 80 122 182 +10046 PF10214 Rrn6 RNA polymerase I-specific transcription-initiation factor Wood V, Coggill P anon Wood V Family RNA polymerase I-specific transcription-initiation factor Rrn6 and Rrn7 represent components of a multisubunit transcription factor essential for the initiation of rDNA transcription by Pol I [1]. These proteins are found in fungi. 27.70 27.70 28.30 27.70 22.60 27.60 hmmbuild -o /dev/null HMM SEED 765 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.17 0.70 -13.15 0.70 -6.44 19 144 2009-01-15 18:05:59 2007-08-06 14:14:48 4 7 116 0 108 148 0 591.40 16 77.63 CHANGED hDPphusLhsluphhshps.css.scshslhshsSGEsussLslphlpccthphpp.......hthshhhpppsstas.s......husPI+QIpau...cshcppsshlulpp.sphhlhps.htt.............h...ppppuplsss.Lhplptsph.ushsauDVsFNPWspp...QhAllDp+GNWolWcl........tpp+pppsshpsspshs.GSl........tDsp-l......ssWtpIpW.............................sschspllVhsRpshh.hchpss.p..h.....Lhpu+shspIhDl++ssp...ppsFlLTopcllWhclss..............shp.hLSW+HahDscDsoL+l..sshts....ppphhhhlaSphs.llhsathuhpsst.hshts..ssallclsspsstpt..........phpsh.hp.t.sc.spssspph...t.phhtlatlsspLslppphhsssspss..spst....ppsshcthtspp.p...t.pp..hp.........................h+hhtc..h.slp..upspps.t-hs....Yuhtlupths..................th.E.hp...t...sphspspshhphLsp.sslssh.pshp-hsshlppl.phhpspphthsshhphhhthhh..............tstspl.slashLhpsW.ss.sp.h........+-plh+p...lstplsLohlts.................................................................................pcpt.t.......splpphhphstpp.....hscsspslLspWc....ttssss.ph........pphhpsshpps....tpsphPsh+.pppssps................pt......................sspsts.ts.p.h......spo.shsp.....s...P..oQhpsst.tsRpttptt......t+KKKRtuGF ..........................................................................................................................................................................hhshh.h.........p......p.h.hhshssGpssp.lpl..ht..pt........................t....ps....h..t...............tsPlpplphs....pt.tt.........p....h...hhlph.tphhlhp......................t...spl.ss.lh.tl..pph.ss..hsDhsF..sPh..p...phAllD.t.G..Wslacl.............h.p...t.t.h......h....t..spl..................c.t.h..........................sth.tl.a...................................sp.pthllhsppph..h.hp..ht..st............................l...tp.t.ph...lhDhtt..t.......p.hh.lL.To.pplhhhth...........................t...hlSh.H...hss.pD.olch..p...h.............tt.hhhhl.S....p...h.h.hh.h..........t...............t.hhlt.s.......................h....h.....................................hthh......th...hh.........t..................................................................................................................................................................................................................................................th.thh...................h.....................................t...hhp.h.p.a....................tt..htt....hh..hhhs.hhh..................................................................................................................................t....................ht.h..t........t......hst.ht.phl.spW........t.pst..t..................................p........tp..h.th+.pttt..t...................................................................................................s......................................s....t.................s..........p......t..t.p...................p+t++h.GF......................................................................................... 0 21 53 90 +10047 PF10215 Ost4 Oligosaccaryltransferase Wood V, Coggill P anon Wood V Family Ost4 is a very short, approximately 30 residues, enzyme found from fungi to vertebrates. It is a member of the ER oligosaccaryltansferase complex, EC 2.4.1.119, that catalyses the asparagine-linked glycosylation of proteins. It appears to be an integral membrane protein that mediates the en bloc transfer of a preassembled high-mannose oligosaccharide onto asparagine residues of nascent polypeptides as they enter the lumen of the rough endoplasmic reticulum (RER). 21.00 21.00 21.20 21.00 19.80 20.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.34 0.72 -4.46 12 156 2009-01-15 18:05:59 2007-08-06 14:22:28 4 2 139 2 105 156 1 34.50 44 50.21 CHANGED MIoDspLshlA.sLGhshhlLIVlYHalsssspct ...MIoDspLshhAshLGsshhlLlVlYHalpsNspp..... 0 28 56 83 +10048 PF10216 ChpXY CO2 hydration protein (ChpXY) TIGRFAMs, Coggill P anon TIGRFAMs Family This small family of proteins includes paralogues ChpX and ChpY in Synechococcus sp. PCC7942 and other cyanobacteria, associated with distinct NAD(P)H dehydrogenase complexes. These proteins collectively enable light-dependent CO2 hydration and CO2 uptake; loss of both blocks growth at low CO2 concentrations. 25.00 25.00 125.40 125.20 21.30 20.50 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.02 0.70 -5.58 18 117 2009-01-15 18:05:59 2007-08-06 14:46:53 4 1 69 0 41 129 44 363.50 58 91.73 CHANGED lIpRLpuGtALLsDoP-NllEVVGlLKSYGlVLDAYS+NLlYIAEcQFLshFPFFKYF.....................................NGc..hohsKLl+HahHDRINaEYAEYCMKAMhWH....GsGGLDsYLDSsEFpppAc+AIpA+h+pNPlhhsLppLFP-FL.EhlRphuYYouLGQFWRVMSDlFlsLSDRYDpGEIpSIsDVVsHItsGLVAsAucPITYpVpIcG-sY-llPtpAGLTFLhDsAVPYVEAVFFRGoPFhGTVSYNAQApQIPs-QucFpYGALYADPLPlGGAGIPPTLLMQDMhHaLPcYLp-aYcppsRGEcDl+VpIChSFQKSMFCVToAAIpGLtP...aPLDossPpcQtANRAYh-uWhsRlhs ..hIcRLEuGsuLLsDoP-NLhpllGILKuYuVllDtY.+NLlYIAEppFLs.FPFFKYF.....................................pGc..hphsKLh+HLhHDRINaEaAEYCM+AMhWH.....GsGthDsYLDosEFptsuc+sIpA+a+tNPl...hhsLp+LFP-ah.EQlRphuYYSsLG.FWcVMuslFh-LSDhYDpGclpslP-shpalhsGLhAsAu+PIhapVpItGEsY-IlPKStGhTaLh-sAlPYVEAVFaRusPFhGThSYNAQApQlPsDQp-FpYGsLYADPhPlGuAGIPPTLLMQDMhHaLP-YLp-aYpppsRGE-DlhVQIshoFQKSMFCVToAsI.puLtP...YPLDsssPcctpANRsahEuWhsRFh.p. 0 8 30 39 +10049 PF10217 DUF2039 Uncharacterized conserved protein (DUF2039) KOGs, Finn RD, Coggill PC anon KOGs (KOG3241) Family This entry is a region of approximately 100 residues containing three pairs of cysteine residues. The region is conserved from plants to humans but its function is unknown. 22.80 22.80 22.90 37.10 22.40 22.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -10.69 0.72 -3.97 16 147 2009-01-15 18:05:59 2007-08-06 15:03:23 4 4 120 0 92 144 3 91.10 46 43.81 CHANGED sQKHQN+ssFcsshapp.s.....ppc+lps...hthssl.CpRCp-hIcWKl+YsKYKsls..psp+CsKCpp+oV+cAYHplCcsCupchclCAKCt+ssp ...s.QKHQNp.huFKsshac+.s......pp.c+lss...hhhsGV.Cp.RCK-l...........lEW+lKYsKYKPLo..pP+KCs+Ctp+oV+cAYHhlCcsCApch.clCAKCsKp..s......................... 0 36 49 71 +10050 PF10218 DUF2054 Uncharacterized conserved protein (DUF2054) KOGs, Finn RD, Coggill PC anon KOGs (KOG3136) Family This entry contains 14 conserved cysteines, three of which are CC-dimers. The region is of approximately 200 residues in length but its function is unknown. 20.80 20.80 21.40 21.40 18.10 18.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.39 0.71 -4.31 9 153 2009-01-15 18:05:59 2007-08-06 15:05:51 4 5 112 0 107 132 3 121.10 44 50.44 CHANGED psps.s.sspCRsSlQG+pLlsD-cGaVCpphsl.LhsGCCs.......h...ssphasCcsC.hsspCCssYEYCVSCCLpPsppsLLEcVL.pusssp.tlhuss.pDtFEhCls+CRTsSpSV.HENcYRs.ctKHCa .....................ss....t.ttpCRNolQG+.tLlsD-h...GaVCpRpsl..hssGCCs........................hphssscpa..sC.cuC................hs.stCCssYEaCVSCC.LpPs+...ph...lLE..c.hL...........csss.uhp..slhh.ss..pDpFELClu+CRTSS.....pSVpHENsY+ss.tKaCY............................................................................. 0 38 57 82 +10052 PF10220 DUF2146 Uncharacterized conserved protein (DUF2146) KOGs, Finn RD, Coggill PC anon KOGs (KOG3692) Family This is a family of proteins conserved from plants to humans. In Dictyostelium it is annotated as Mss11p but this could not be confirmed. Mss11p is required for the activation of pseudo-hyphal and invasive growth by Ste12p in yeast. 17.70 17.70 17.90 17.70 17.60 17.60 hmmbuild -o /dev/null HMM SEED 895 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.71 0.70 -6.56 7 293 2009-09-11 14:24:08 2007-08-06 15:07:12 4 6 126 0 207 273 0 348.80 18 75.81 CHANGED pp.hhphDsplhVVGllGKos........ssphNphlchslhsstspts-....csplpsaapp-splLaLhhsus.Dstslh..hscslpss....s..phh-ahpt.cspasRhLLahhpVCHhlVlVEsopsFDhoahplF+tlchlR...cKhlhph.....L.phl+ssslup.hppcsRhCsPRlLFhFptsss..............sKPcp+-.hcpLEpslE-pIhclLRpppllTNsSusuLhslPpsptFVah.ssc.hcpDslhc.l-hLhtthcps......p..-p-p...lttstshpths.s.t.hpF...ptphpstppch-polhphlhpHVp.sL.ctGh--uss..+pttpopF.l.s.pcWh-hhppLaplhl....pNspcP..s.chus..hK..s.hcsh-t.lch-pKF.tphCpcuL.huh.tYpp.hP.pYssshHcpphspAhpha.cpuRGsp.pthh.pLpp.CpphWpSutptCE..ShhupsCshthH.s.t..p............HsuushhlSoCsCGRpQshRp-Pasl+pANa-FYph.s.pC..CttL.+hpFPlapPso...sDhtsAch.......spshpsh.p.tptpcp.s-ttsp.ch.pshp..hpsspp.......S.s.ssslshph.......pssctstppsps.tpssspp...............t..tpppt+-......tt.tthth.spos-aL.GhspssSshGLLPhFPSWtLssLGsSSlYsHssGLpp..QsuFlsGusaLhPWDV.lR..............hchpsWsAshpplpsc+tu..ppu+pcc+sDhhplKlFVGhEYECuRGHRFhhs.uPDpll+s.Gsslh+-Sus+.....VlpsDMPlYhPCsC..Rs..sphAQLhRlHVVTPKAPVplhl-PKVpl..pph....saspG.......pP..............lcLspSuYWILRLPaVYpG-pGshhsPpEhss..shslhtGhLhsshh ..............................................................................t................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 2 74 98 149 +10053 PF10221 DUF2151 Cell cycle and development regulator KOGs, Finn RD, Coggill P anon KOGs (KOG3711) Family This is a set of proteins conserved from worms to humans. The proteins are a PAN GU kinase substrate, Mat89Bb, essential for S-M cycles of early Drosophila embryogenesis, Xenopus embryonic cell cycles and morphogenesis, and cell division in cultured mammalian cells. 17.50 17.50 22.80 24.40 17.10 17.40 hmmbuild -o /dev/null HMM SEED 695 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.20 0.70 -13.23 0.70 -6.35 6 167 2009-01-15 18:05:59 2007-08-06 15:07:27 4 5 88 0 104 154 0 447.20 40 95.15 CHANGED M.pcspKTVFVLDHssYFuEuscphl-hDhlcus+sp......hs.luKSLWTCAVEuShEYCRIlaDlaPttKL.lRFlVSDotu+hLNo.WssssQshp-LhsAlshVGsP.sRtsPpssDhSllHGLptAIEALAc.T-.Q+E................Scsstlhc-hp......tltNcGRlIClTsA+oDscMcpLEDplspllhppNtlAAu.Sc+hh.sIspC+LVIlNhYPsG--ohVosRsLpElS..PlLpsEVHSsKAuc.lusKLspLllsHFDLASTTVTsIPMK........EEQ+AsoSANYDVELhHu+cAHs..lhpGsshhsssShK....cGusYETVTLKWCTPRtssSs-hpYCsuAhhlTPVDVsSRPSSCLTNFLLsGRSVLLEpP+KS......GuKllSHhLSA+GGEIFIHsLshsRSp.......L--PPSIu-GsGGRVsD.YRls-hGchM+ppRLhPlscp...sDslsEt.h-........ch+ppLpRhoRYaPhhhupTsIF.N.ch..tl.PLlsLIsKppLoEpDlhpCppsIasLhpMcp+p-sLshsss...GsRlKs.Kc.-EQYRlhasELEpllppasusS.+HKplhcslhssRutssptE.....-+ccuGcKh.sKtshhtK.......sstuuco-chpss.DpEc--..cs....................shsDSPsSPch.p......Kpspssl--.....................................................pththtscGshSLLslhsshlEpupSpKRpEFsGRl.sshGNhAcLYscLuEKpts ..................................................................................................h.hslho..h-s.hEhpRlh.Dlas.t...p..lphhhsD...hhlss.W.....sh........t..p..lht.h..........hu.P..........t.t.....s.h.tGL..Alp.......sLs..o..Q.t................................................................................................NpGpllhh..........T...........php....p..ptph..l.t.h.p.l.p.N...................t.h..lp.hphhhl.hhs......t.........p.t.........t.s....h...lhp.................h..tl..Lh..papLuSTolosIPMK........E-pp..ss.t.SsNYDVplhH.ttsH.......h......t................p............................................s.....pshtlpWsTs......t.....................h.....s..u.h.hoss.stSRPS.CLhpFlhs...G+.sVhLp...p................stchhoHhL.sps......ucIFlHsLs.spsh.......h--sP..sI...p-...ssus+V....sD..YRhsph.t.hhp....hh........................................................................p.sp.hP.h.t.shhh.............l..h.pllhKpphspp-..pCpphlhpLhthttct-.Lshsph...stt...h.c..........pp.c-QYRhhatELpthlph..s.o.tHpplhpsh.thpsh..............................................................................................................................................................................................................................plht.h........t..t..............................h......................................................................................................................................... 0 38 45 79 +10054 PF10222 DUF2152 Uncharacterized conserved protein (DUF2152) KOGs, Finn RD, Coggill PC anon KOGs (KOG3778) Family This is a family of proteins conserved from worms to humans. Its function is unknown. 21.30 21.30 21.40 22.00 20.60 21.20 hmmbuild -o /dev/null HMM SEED 604 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -13.19 0.70 -6.30 6 150 2009-01-15 18:05:59 2007-08-06 15:07:47 4 4 108 0 86 149 0 440.40 38 91.23 CHANGED RLKsh....sGhLSSpasRRLLslLhllVlhlaYlhus.hRhs.......Fsuus.s+s.sutChcsclp.Wpt.t-ch-sslps......PtE.p.h..Pal.GNGaluLDlsus.pLaVp.puR...shhh.ssFpPhlslc.tGstsEpcuosl.a+pGll+plRCh........................s.sp-ChhVpppaaAHRoRPsVaVQcIQIoNPpcpllsl-Lss.ph.sh.cthooolchhps...t.....ph.lhoGhl.sVs....sthlhlVslsspchss+LpVs.+sshc.phhhslasScPls.....spht..hph.pppApctMhclhp............hshtshcp-HhclWusLahoGhphusuKst+T.sGssINsThYhhlSpsRAPhl-...sshoppc+-slEpsLsYu-tCasuHuThp.AENLW.pchSslppLhpLhshWhLTLQKRGC+sLlpAGApGllQAMVLSFGGhpFo-sHLphphDP+sLHpsYshRGlp.YsN....spINlulllDpDsKPaLaVulcpp-p..plYAC-AGCLDEPVpLTss.+sH.FPVhVTpPlTsLLYIooDhpHLpDLRHsLHlKEll...AHEEHhhpha+......GLPhLFWhSVsulIslFHLFLhKLIasEY.......CsPusKshaRs+ ...................................................................................................................................................................................................................................h.uNGhhh...........h.....................l...h.......t..............h...s.h...h......t.............t.h...h.hh..pG..hhhhpCh.............................t.shhlp.phhAHRshsplhlQplplsNs.h.p.....thph......t............p.h..t.ps..h.h.t..............ph.l.sth.......................t..h..h...hslst.phstplplt.+sphp..h..sl.h.St.sht.........p........p.hp.p.....shpthhchhp.............s.tphhp-H..thWtp.La.sGhphp......p..ps.p.s...su.plNhT.hYhhLSps..u.h.hp.........thstp.c.+p.....phcpsLsYt-tCasGHsThc.A-NLW.tchoohtplhpLsshWhLTLpKpGC+sLlt.....sGA.GhhQuMVLSFGuLpFoppHLpaphDPc.sLHpsath+sIp.Ypp....shlNluVlhct-..s.KshLpVulc..ptp...plYAC-AGCLs-PVpLTpt.psp.FPVhlTpPlTslLYI.osDhpHLp-LRHslHlKtll...AH-cHhht.........GLPhLFWhSlsuLIslFHLFLhKLIhpE.Y.......ht.............................................. 0 33 40 64 +10055 PF10223 DUF2181 Uncharacterized conserved protein (DUF2181) KOGs, Finn RD, Coggill PC anon KOGs (KOG3748) Family This is region of approximately 250 residues conserved from worms to humans. Its function is unknown. 25.00 25.00 30.20 28.50 18.80 20.60 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.62 0.70 -5.02 10 192 2009-01-15 18:05:59 2007-08-06 15:07:58 4 2 94 0 120 194 0 225.50 35 76.71 CHANGED DuhplTWhHAsNS+ptLs-sL..s..SslphIEADVslpthpp..usEpplPIMAHPPAhsSDlTLcEWLcpVl.........sppKGIKLDFKSlpAVcsSlDlLc.sltct....lphPVWINADILsGPsss..ossVDsopFhstlpphhPpsTLSlGWTTtassshssssYTpshVcpMhcllpt.....LsQslTFPVRAulstsShspLpWL...LspopRYSLTlWouAsDsl.sV-DLlhlRcshuhcRlYYDl.EsltsQ ....................................shh.lhW.hHAsNppt.httuL.....p.........ush.h.h....lEADVhlt..tt...sscps..............hP..IMA.H.P..P....th..s.SD...TLppaLp.plh....................ss..pK..G.I..KLDFKo.lpAlpPSlslLp..phppp.....................lptPVWlNADIltGPsss....ss.VsuppFLstVpp....haP........csTlS..GWT.T..a.h.p......h................sts....Yo.phl.ccMhplspt.......LsQsVTFPVRAu.hl.t..p..S.h.s.p.l.thL.........Lpp..S.....s..R...h....oLTlW.p..u..t...s..D..h...sl..psLlhltcshshpplaYDl.cs...................................................... 0 33 46 76 +10056 PF10224 DUF2205 Predicted coiled-coil protein (DUF2205) KOGs, Finn RD, Coggill PC anon KOGs (KOG3650) Family This entry represent a highly conserved 100 residue region which is likely to be a coiled-coil structure. The exact function is unknown. 26.70 26.70 26.90 27.20 26.20 26.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.62 0.72 -4.41 8 221 2009-01-15 18:05:59 2007-08-06 15:08:21 4 2 190 0 155 220 0 77.70 41 58.26 CHANGED huschsut-lE.th-pEt+p+LIpQlLpLQsoLc-LupRV-uVKEEN.KLcSENphLupYIENL...MosSsVhpSoosp..tpK ....................phps.-hc.phphEt...+pcLl.....p.....QshpLQsoLc..s..LupRlDuVKEEs.KLcoENphLtpYIpNL....MS.s..SsVhpoos...................... 0 46 76 122 +10057 PF10225 DUF2215 Uncharacterized conserved protein (DUF2215) KOGs, Finn RD, Coggill PC anon KOGs (KOG3817) Family This entry is the central 200 residues of a family of proteins conserved from worms to humans. The function is unknown. 28.70 28.70 28.70 28.70 28.40 28.60 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.55 0.70 -4.93 16 259 2009-09-10 22:40:12 2007-08-06 15:09:00 4 6 118 0 156 265 0 218.00 28 54.65 CHANGED aslslppt.hphaRlhhhlhGllLhhhushLSpshsFYYsoGhulGllsslLlllahhh+lhP..++ohhhhhlhGua.uhuhYhlphhhpNlp.Ilhpa..........thaVlsalhhsGh......lphhlshp.ssshstpstphlpWslphluhshlhpSs..spluhuhllhshhh......thhhhslp.........hhhthtphh+...............tph...ssth+.LspcEhcpQGphcopcuLppLR...........passSPptssh.....pshu+lpsPpphuchhpuo.sHloss .........................................pl.l.pp.hshhhhhhhhhGlhLhhh..u..phL..S.c.s...hFYYssGhslGl....h.hslllllahht+hhP....++s...h...hh.....l.lhG..uh.sh........uhYhlp.l.....hpslp.lhhpa..................................h.allsallhsGh.................lshhlshh.hss.........s..pohpl............lpWslplluhshhh.u.s...thuhshl.lh...hhh...........th..hshp.........................hh..hhhhhhp.......................................................................hh....s...+hLop-EYp..psthpTtpuLppL+...........phspp.Ppht.W......hhsplp..sP...c.hs.p.h.ut..Hh............................................................................................................................ 0 42 66 108 +10058 PF10226 DUF2216 Uncharacterized conserved proteins (DUF2216) KOGs, Finn RD, Coggill PC anon KOGs (KOG3819) Family This is the conserved N-terminal half of a proteins which are found from worms to humans. some annotation suggests it might be PKR, the Hepatitis delta antigen-interacting protein A, but this could not be confirmed. 22.40 22.40 22.40 24.40 22.10 22.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.45 0.71 -4.80 11 174 2009-01-15 18:05:59 2007-08-06 15:09:23 4 2 80 0 104 154 0 177.10 55 45.64 CHANGED usu--LScloDEELLpWSKEELVRRLRRsEAEKhulll-HGNLh+EVNRpLQhHLsEIRuLK-VNQKLQEDNQELRDLCCFLDDDRQKGK+lSREWQRhGRaoAulMRKEVulYLQKL+ELEt+QE-llRENLELKElC.......LhLDEE+.............sutusuGsRsSlDSQsuhs..s..u....ssRDVGDGSSTSSsGSsuSPDp ..............................s......thsD-EhhthuKE-Ll+pLR+tEu-+hshhlp+upLhpEVNRpLQhHLsEIRuLK-lNQ+LQ.......-DNQELRDLCCFLDD..DRQKG++.l.uREWQRhGRYoAuVM+cEVuhYhQKL+ELEs+Q..EEll+ENhELKELC.......lhLDEE+......................ss.usu.G.up.sShDuptsh...s.....st......hhRDlGDGSStSSsGSssSPDp.......................................................... 0 20 31 63 +10060 PF10228 DUF2228 Uncharacterised conserved protein (DUF2228) KOGs, Finn RD, Coggill PC anon KOGs (KOG3952) Family This is a family of conserved proteins of approximately 700 residues found from worms to humans. 25.00 25.00 27.20 29.90 20.50 22.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.53 0.70 -4.83 9 139 2009-01-15 18:05:59 2007-08-06 15:10:22 4 6 101 0 86 138 0 213.50 42 61.87 CHANGED GLpLVGPF-lLuGcacssphtps.sahhHWRaaYDPPEFQTllhsspsothHhGYaRDpPsshss.hluhN-sKpusphshlG-NlFsAllhFLp+ph+pps.pKtphuAhcKl.t.LpctApphshtLpppschhKpRcKclVsKshHtuGlV..VPhs.Ksc.......lGYR.LscoD..AsLK+ll+slsc...spscpccpcshs.lQtlhThlphANDECDaGhGLELGhDLFChGs+thHclht.LLshAYshLpRspFspI ....................sLpLVGPa-lLuG+a+htpt.....tps..sa..hHWRFaY..DPPEFQ..........Tlll.....s.spp.....sthHhGYa.R..DsPschPs.hluhN-.u..p..pss.h...Gs.NlFsAlhhal.cchp....t.hs.p..p.thshhppl...........tt...Lpchspp..shpLppps.hh..........h+.pRp+.+.lVs+TFHtsGlV..VPhD.+sp..........lGYRpLsto..D..spL++lhctlhc....utscpp+.psh.........s.lQ.EhhohsphAsDECDaGh..GLELGhDLFsh...Gp..c....h........hpphhtpLLshAYpLLpRs.FhpI.................... 0 34 40 65 +10061 PF10229 DUF2246 Uncharacterized conserved protein (DUF2246) KOGs, Finn RD, Coggill PC anon KOGs (KOG3994) Family This is a family of proteins conserved from worms to humans of approximately 300 residues. The function is unknown. 21.80 21.80 21.80 24.40 21.40 21.60 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.85 0.70 -5.11 9 180 2009-01-15 18:05:59 2007-08-06 15:10:35 4 5 127 0 98 165 1 216.90 32 67.15 CHANGED tLhp+sstsRuhshsssuuScpshhshhs.shhs+s.shPc-shGsFGspDpphpLsGslsassclsuhshphcs.h..ph.lP-sls.s.pscppphhhsp.lsE.......lh.pclpps..p..hssuplEsuhppCP-LL++-htplFP....shsssslTVlTlTQ+s......hupthEp-REpLhp+FlpuAKEIC.sLhotGYWADFIDPhSGtsaFushTssTLacTDsRaRpLGF+lEDLGsCpVIpHphhGTphFVGTIFTsAPscSslht+Lhu .................................................................................s.................................................t...t.ht....p..h.sp....s....t.............................t..h........t....th......t.lt......................ht...............ht.p..splEhslpsC..P.cLL+c..chcplF..P...................ph...........ss...splhllsl..oQ+o.....p.s.hs..hspp.sEtE+EhLhc..cFlpsAp-lChsLp........spGYWADFIDPpSGhsahu.s.h.s.ss.slh.-sDp...........p.a..p.p..L.......Gapl..ps.hGsC+VIpHshWG.o........psas...........GolFTsAssp...h.pl..s................ 0 36 50 75 +10062 PF10230 DUF2305 Uncharacterised conserved protein (DUF2305) KOGs, Finn RD, Coggill PC anon KOGs (KOG3975) Family This family of proteins is conserved from plants to humans. The function is unknown. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -12.07 0.70 -4.98 27 346 2012-10-03 11:45:05 2007-08-06 15:11:11 4 7 258 0 233 951 208 239.00 24 77.85 CHANGED schlllhIPGNPGLhsFYppFlptLtpph..............sspatlhsloauGa.shpsps................pchasLpsQlcHphshlcphls............hch+lhllGHSlGuYluhcllc+hs.................phplhtshhLhPTltchucSssGphhohh....hht.hhhhhhssh.hhhhhthLPttltphLl.phhhsss.................sh.ssshh....hhs.pslppslaMAtpEMppltp.c.....-phhpt....................+lhFaaupsDpWss.phhc-lhcths..p.........ss.....hpls-.cslsHAFs ...................................................................................s...llhhIs......GNPGlht.aYt.......FhptLhp..h.........................................................t.ph.lhshuhhGa.s..spp....................................p..tcha..s..L..p...tQl..c..a...phph..lpph..l...................................tphpllllGHSlGuYlshclhcch..........................................thplh.tshhL.....a...........P.......T..l..p.......c.hs..p.S..ss.Gph....hs.h.............ht.h.h..hhh.hhshh...........hhh......th.....lPt..hh...hp....hll..phhht.................................th.hshh......lhp..ps..ltps.l.h....hu.tpE......ht.p...ltp.p........pphht.....................h.......................................plhhha..up..pDtWss.phhcclhp.ths...............p......................h.lsp..psh.HuFs..................................................................................................................................................... 0 79 124 193 +10063 PF10231 DUF2315 Uncharacterised conserved protein (DUF2315) KOGs, Finn RD, Coggill PC anon KOGs (KOG4094) Family This is a family of small conserved proteins found from worms to humans. The function is not known. 21.40 21.40 21.60 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.45 0.71 -4.26 6 120 2009-01-15 18:05:59 2007-08-06 15:11:38 4 6 88 0 72 116 0 115.80 41 54.95 CHANGED ppDaIGPPDs.SNLRPlhh+hs-NETcLE++LRthRpEsppWNs-FWucaNhpFpcEKE-Flcpc.......L+cEuGpppclsA--MucFYKsFLDKNaptHhhYNhcWY++NhsllhLuhtVsLpRlapth ........................DhlGPPDthSNLRPlhhhh..scsE.o..L....E....p.............cLRthRpEsppWNppFWsppNhpFpcEK--Flppp..........................l+ppsG..ptpl..sA-.-MucFYKpFLs+NappHhhYNhpWYc+NhslhhhhhtVtlpphht.h................ 0 29 35 54 +10064 PF10232 Med8 Arc32; Mediator of RNA polymerase II transcription complex subunit 8 KOGs, Finn RD, Coggill PC anon KOGs (KOG3583) Family Arc32, or Med8, is one of the subunits of the Mediator complex of RNA polymerase II. The region conserved contains two alpha helices putatively necessary for binding to other subunits within the core of the Mediator complex [1]. The N-terminus of Med8 binds to the essential core Head part of Mediator and the C-terminus hinges to Med18 on the non-essential part of the Head that also includes Med20 [3]. 25.00 25.00 25.10 25.70 24.20 23.20 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.45 0.70 -4.84 7 254 2009-01-15 18:05:59 2007-08-06 15:45:29 4 6 216 8 171 240 1 224.60 26 88.67 CHANGED Mpp............spcph..........tsL-ulhpRLspLppSLsphltpLp......pchch.sWsoh.spFsll.upLsolophLtppp.shLcspslhPh.hhPs+scp..............sllsshLRTKssPcVE-hhttsctthts.tssputpp.................lhp.s+hhpph.............hshlochR-.Eh-..upppht.ppshp.tcsphllttht.upshp...................................t.p...sh....t.t...........ssslhp........hpou....s.pR ..................................................................p.h.............tsL-slhpRlspLppSLtshhtplp...................pp.th.sW.................sol.sphsll...sspLpolschL.........p.......c.......p....p.....sh........hc.shllhPhsh.Ps+sc-................................sl.lsshLRTK.-P-....VE-h.pphpt...t.u.s.ph.ss.c...s..utp.p..................................................ltphsch..hpph...................................hphlsp..tc.....-h.....-..tp...st.t.t......t....pp.p.......p.....p..........t-..........tphls.tt.h.t.utsht.................................................................................................................................................................................................. 0 48 83 135 +10065 PF10233 Cg6151-P Uncharacterized conserved protein CG6151-P KOGs, Finn RD, Coggill PC, Bateman A anon KOGs (KOG4085) Domain This is a family of small, less than 200 residue long, proteins which are named as CG6151-P proteins that are conserved from fungi to humans. The function is unknown. The fungal members have a characteristic ICP sequence motif. Some members are annotated as putative clathrin-coated vesicle protein but this could not be defined. 26.40 26.40 26.80 29.90 25.20 26.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.49 0.71 -4.10 25 228 2009-01-15 18:05:59 2007-08-06 15:52:44 4 5 197 0 155 227 0 109.10 35 61.65 CHANGED hGllshlLChALGlANlFph...ssl.IlFullsllpGhlllFlElPhLL+ICPhospFssFl++hssNahRAuhYslMAllpalSlshtsTSLlusAlhLsloulhYuLAult+Q ...................................hGllshlhChhhG....lhN...lhsl.....ssl..Ishullpl..hsuhlllhlEsPhhhph...sshus.shsphlc+h.ssa.RAshYssMAll..lsls..h..s..hoSLlus....Alhhs.oulhYuLuult+p...................... 0 38 68 120 +10066 PF10234 Cluap1 Clusterin-associated protein-1 KOGs, Finn RD, Coggill PC anon KOGs (KOG3647) Family This protein is conserved from worms to humans. The protein of 413 amino acids contains a central coiled-coil domain, possibly the region that binds to clusterin. Cluap1 expression is highest in the nucleus and gradually increases during late S to G2/M phases of the cell cycle and returns to the basal level in the G0/G1 phases. In addition, it is upregulated in colon cancer tissues compared to corresponding non-cancerous mucosa. It thus plays a crucial role in the life of the cell [1]. 24.20 24.20 24.20 34.00 23.90 24.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.61 0.70 -5.19 15 186 2009-01-15 18:05:59 2007-08-06 15:53:50 4 6 127 0 126 186 4 229.40 42 63.34 CHANGED RhLGYPRlISM-sFRsP.....NFtLVA-lLhWLlp.........................RYEPss-Isssl-oEp-RVhFlKulspFhsTKu+IKLNs+KLYtADuaAV+ELLKlTslLhsAhpos.....stp-E-s.....ottphslusKlsDLKtsRpLuS-lTs+GuuLaDLLsKEl.pl+-sRppslu..........RshElsplE+sl+puIpsspsclpphps.LsslpuDcssLEuKIc+KKpELERspKRLpuLpsVRPAaM-EYE+lEpELpclYppYlc+aRNLsaLEpQL-shs+tEptph- ..............................................RhLGYP.RhlSh-sFRs.P.......NFtLVu-lLhWLlp.........................+a-Ppspl.s..s.l-oEpcRVhFl+shsph...hh.....sKu+IKLNsKKLYtADGaAVpELLKlsslLhpAhpsp.....................................t.t-pp..........shhph.s.lss+.ht..-lKt...sRpLuS.-lTppGAsLa-LLucEh.ph+p..Rppslu..................+shElsp.hE+sh+..slpph.tplpphpp.lsslts-EssL-sKIc+++.ELERspKRLpsLp............slR..PAaM-EYE+hEpELpp.YphYlp+aRNLsaLEppL-phpchEpth.................................... 0 54 68 100 +10067 PF10235 Cript Microtubule-associated protein CRIPT KOGs, Finn RD, Coggill PC anon KOGs (KOG3476) Family The CRIPT protein is a cytoskeletal protein involved in microtubule production. The C-terminal domain is essential for binding to the PDZ3 domain of the SAP90 protein, one of a super-family of PDZ-containing proteins that play an important role in coupling the membrane ion channels with their signalling partners. SAP90 is concentrated in the post synaptic density of glutamatergic neurons [1]. 23.70 23.70 24.20 25.80 23.00 23.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.70 0.72 -3.63 17 204 2009-01-15 18:05:59 2007-08-06 16:22:19 4 4 187 0 153 182 0 90.10 43 72.48 CHANGED oclsTPssh+cus............................+ssss.Gsptl.scNKLLSpttp...sPYush..ssC..chCKsplcQ.Gp+YCppCAYpKs..hCAMCGKpl..sspsh+poss .................sclhTPDsaKcus..............................pso.spu.G.uR+l.sENKhLosppp..............hsPYu...........p............h....sp..............C..+lCKspVHQ.Gu+YCQsC.AY+KG..lCAMCGKpl.hsTKsY+pos.......... 0 54 78 119 +10068 PF10236 DAP3 Mitochondrial ribosomal death-associated protein 3 KOGs, Finn RD, Coggill PC anon KOGs (KOG3928) Family This is a family of conserved proteins which were originally described as death-associated-protein-3 (DAP-3). The proteins carry a P-loop DNA-binding motif, and induce apoptosis [1]. DAP3 has been shown to be a pro-apoptotic factor in the mitochondrial matrix [2] and to be crucial for mitochondrial biogenesis and so has also been designated as MRP-S29 (mitochondrial ribosomal protein subunit 29). 23.10 23.10 23.10 23.20 23.00 23.00 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.90 0.70 -5.49 29 361 2012-10-05 12:31:09 2007-08-06 16:22:52 4 8 283 0 251 385 6 276.70 23 66.47 CHANGED slVRctol-l...hctlppt.......ptsppsh+hllsGppGoGKSlhLsQshsaAhpp...sWlVlplPpspphlpusspat.ss..p.thasQPhhstphLpchhpsNp.phLpp..lp..locca.hh............tpsstsspoLh-LlphGhpcsptuh.ss...hpAlhpELpspus.............hPllhslDshsthhps..opY+ssch.......p.Icsc-Lslsphhhshlt.....spsshssGssl..LssTusspssp..sh.hsLtttts...................-Pah.....c..h.t.h.................shpVs.shoccEscshhcYatcsshlpcp............sscphspEchhhsusGNstEL.+h ..........................................hlRp..shcl........hp.hppt......................p.tt.sh+..h.ll.....hGtpGsGKohhLs.psh.p.auhpp....sWlllalPp.u.p.th.sp...s...spp.hh.ss...........p..........th.........asQPh.stphLp.........phhpsNc...phLpp...lp......lppc.a.hs................................................................tpthtts.psLh-llp....Gh.p.....p.....sp.....h..us.....ss....hthlhpELptpsp.................h.lLlslD...shsthht....oph+p.....ph.......................p.ltsp-Lslsphhh.ch............sppsh.sGshl....lstou.t.t.p..........h....s..lhttts.......................................ssah.........................................ht.ls.shs..pEhpshhpYahppthlpp....................................pp.h.cphhh.ustss..h........................................................................................ 0 96 144 210 +10069 PF10237 N6-adenineMlase DPPF; Probable N6-adenine methyltransferase KOGs, Finn RD, Coggill PC anon KOGs (KOG3350) Family This is a protein of approximately 200 residues which is conserved from plants to humans [1]. It contains a highly conserved QFW motif close to the N-terminus and a DPPF motif in the centre. The DPPF motif is characteristic of N-6 adenine-specific DNA methylases, and this family is found in eukaryotes [2]. 23.20 23.20 23.50 23.20 22.70 22.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.21 0.71 -4.57 31 349 2012-10-10 17:06:42 2007-08-06 16:25:33 4 12 224 0 245 348 10 157.00 29 54.77 CHANGED QLSQFWYu--TAptLu...ctllpssscs.....stIAhlSAPSlYttl+p.............phsspplhLhEaDcRFsl.hutc..Fs.......aYDYNpPhclPtpl.ct..phDhllsDPPF......LsEEC.pKsuhTl+hLh+s................ph+llhsTGctMpchs.phl....sschssFhPcHppsLuNEFRCYuNF-s ...........................................................hSQFWYu-cTsphLs....ct.lhp.....ss..p.s...................splAslSsPolattl+p............................tts.ph..pshLhEaDpRFsh..a.st-..Fs..........FYDas.p........P..h........c..l..s..pp......h....pp....ph-...hllsDPPF......Lsc-ChpK...h...up...ol.chLhp.......................................cll....h....sTuth.tth..htphh............shp.s.ta.Ppatp..Ls.pahhassat......................................................... 0 78 127 198 +10070 PF10238 Eapp_C E2F-associated phosphoprotein KOGs, Finn RD, Coggill PC anon KOGs (KOG3395) Family This entry represents the conserved C-terminal portion of an E2F binding protein. E2F transcription factors play an essential role in cell proliferation and apoptosis and their activity is frequently deregulated in human cancers. E2F activity is regulated by a variety of mechanisms, frequently mediated by proteins binding to individual members or a subgroup of the family. EAPP interacts with a subset of E2F factors and influences E2F-dependent promoter activity. EAPP is present throughout the cell cycle but disappears during mitosis [1]. 25.00 25.00 37.10 26.20 20.50 19.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.08 0.71 -4.27 15 144 2009-01-15 18:05:59 2007-08-06 16:52:27 4 2 111 0 99 138 2 130.80 42 54.62 CHANGED LY.DsctD-c-ccWVccph+tt....................tsssoDAlLsCPuChTslChcCQRHEpYpsQYRAhFshNCplsc-pll...p+phsspcp+csppcscpstptstt..............................................................................ssEhY+sVpCspCuTcVAVaD.c--lYHFFNVLsS ..............................................hY.DPchDscDptWVstpR+sh....................................................................sSDAV.LsCPuChTTLClDCQRHEpYpsQYRAMFVhN...Cpls.c.-clL..h.tt...ppc.p...pp.c..t...tpp.ppt..t.t.....t....................................................................................ttptp-hY+PVhCspCuTcVAVhD.cDEVaHFFNVlsS.............. 0 43 59 76 +10071 PF10239 DUF2465 FAM98AB; Protein of unknown function (DUF2465) KOGs, Finn RD, Coggill PC anon KOGs (KOG3973) Family FAM98A and B proteins are found from worms to humans but their function is unknown. This entry is of a family of proteins that is rich in glycines. 25.00 25.00 25.90 25.40 22.70 22.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.01 0.70 -5.22 13 277 2009-01-15 18:05:59 2007-08-06 16:52:43 4 9 106 0 148 226 3 270.10 39 72.72 CHANGED lLDsLcsLGY.cGPlh--s...sLppAspuGhuSs-apsLstWLsscL+llssl...-Eplo...ss-Dh-o.FpLElSGhL+ELuCPYssLsoG.lssRlpspccpLhLLhaLsoELQAs+llts++sppttpcp.........sosshQtlpslshsLslscs.ssslshhthFspIps+l..pchlpph.........spsplupPLL..KcsLsspQWccLEplspsLpsEY-hRR+MLl+RLDVTVQSFtWS-RAKs+tsphschapstRpsLss....psslslAcLLAARpDl..upl.+TSSushRcpTssuIN+lLh.G+VP.DRGGRssEhtsPs.EMPsWpKRp-GG ...........................................................................................................t.........h.tuhpt.Ght..ss-aptLshaLss...plp..hspl.......-Eplp.........ts.s-...h-..p..F.hElSuhLtEht.............CPatsLhsGclppclhppps..pLhLLhaLhoELpAs+hlphpp..ppt...pt........................................ssphhp.-lpthh.sLuhscs....ssshs.h..thhstlcp.Kl..p-hLs.pl....................sssplucPLL...ptsLsstpW...Ep...lEpl...............s.psLtsEYcsRRphLlKR....LDV..T....lQSFtWSDRAKs.p..s-.plsplapPhRpsLss..........coslolAcLLA.....AR.pDL..uplh+TSSushRcp.T.ssuINKVlh.GpVP.....DRGGRssEhpsP..............MPsWppRptt........................ 0 39 53 95 +10072 PF10240 DUF2464 Fam125A; Protein of unknown function (DUF2464) KOGs, Finn RD, Coggill PC anon KOGs (KOG4000) Family This is a family of proteins conserved from worms to humans. Members have been annotated as FAM125A proteins, but their function is unknown. 20.70 20.70 21.30 22.80 20.30 20.60 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.52 0.70 -5.04 12 190 2009-01-15 18:05:59 2007-08-06 16:53:13 4 6 92 1 109 163 0 219.10 34 81.91 CHANGED pPlTulshlushspsPpsassIspThD.ussAsLh+s...Fsp+sshYLChSps......sh.s.Vlsslplls-KsslPhGas.lscshDocppsh+KKplCl+hhP+soscsAlsDIplsu+sKpsP.sYphlG-IsuhhIahKpus.....P.slPcspsho........................................pshpshslsu.........sspPuPshPht.s.o.......psts.htts.hhpsuslYslSuh-GVPFhlpP+F....ssss.ssps.hthplcolsclcpEYsYsFssE .................................Ploulsh.sus..tspsPpsas....s.lupTsD.Gs-AsLa.+s....FtpKs.sR.YL....Cho+shs........sh.ssVlsDhplls...K.-.slPhGF.sl..p-ThDopptsh+......K.KRlClKhhP+.suscsA.....lsDIp..lh.u..+........o........Kp..sP..t.Y..phl.G.-lsuhsIWh+hup.............lPpsps.s..........................................................ps.ps..p..ss.........ss.pPu....Psh.s.t.s.s..................................ttts...ttts...h..ptu..sl.hshouh.DGVPFhl...p.+h.......t...t.t.....h.....hplpoht..clpccapYsF.hE................................................................ 0 29 39 76 +10073 PF10241 KxDL Uncharacterized conserved protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3443) Domain This is a family of short proteins which are conserved over a region of 80 residues. There is a characteristic KxDL motif towards the C-terminus. The function is unknown. 24.00 24.00 24.20 24.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.86 0.72 -4.08 12 243 2009-01-15 18:05:59 2007-08-06 17:10:16 4 9 211 0 171 217 0 85.50 31 44.06 CHANGED ssRhtshhsstclsshlttQpph.u+Lptppc.LLphpsLupsRLppspscFtpth+hhp-hKcDL-ahh++lculKuKhtppYPpta .................pphtshlsstcl..sp.hlttQpph....s+hpppsc.Lhph.psluppRLpphpp....cF........tctt+slp-........hK.cDL-hlh++l.........cslKuKltppaPcta....... 0 46 78 132 +10074 PF10242 L_HGMIC_fpl Lipoma HMGIC fusion partner-like protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4026) Family This is a group of proteins expressed from a series of genes referred to as Lipoma HGMIC fusion partner-like. The proteins carry four highly conserved transmembrane domains in this entry. In certain instances, eg in LHFPL5, mutations cause deafness in humans [1] and hypospadias [3], and LHFPL1 is transcribed in six liver tumour cell lines [2]. 25.20 25.20 25.40 25.30 25.10 25.10 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.63 0.71 -4.28 18 458 2012-10-03 00:20:40 2007-08-06 17:10:50 4 5 97 0 283 414 0 165.10 32 78.27 CHANGED hulLWslholshAhlplluFlpPpWlss.......ssp............sspsuphGlaphCh.......hhp..hthpCsshshsFtslsSus....a.psushFluluhhL.LsslshhuLhshCp.....spola+lCuhhQhluulhlhlGChlYPhGWcSscV+chCG.cuspaplGtColtWAahhAIluhhsuhlLoFluhlLus+ .....................................hGslWs...lholshuhhshlsFhpPhWlhs.......s..............................................tspss.FGlaphC...............................thhs..ht.pC.ts...h..........sFss.IPSss........a.+s.us.hh...luhuhhL.....lss.l.sh....h.u...L....h.hh...Cs..................s.t.ola+..ls..uhhQh.hu..u.......hhlhlG..sh.laP.GW.s...........upcl.+.p....hCG...tpss.tap.l.G.p.C.sltWAahhAlhuhhsuhlLshhshshu.p...................................................................... 0 69 96 171 +10075 PF10243 MIP-T3 Microtubule-binding protein MIP-T3 KOGs, Finn RD, Coggill PC anon KOGs (KOG3809) Family This protein, which interacts with both microtubules and TRAF3 (tumour necrosis factor receptor-associated factor 3), is conserved from worms to humans. The N-terminal region is the microtubule binding domain and is well-conserved; the C-terminal 100 residues, also well-conserved, constitute the coiled-coil region which binds to TRAF3. The central region of the protein is rich in lysine and glutamic acid and carries KKE motifs which may also be necessary for tubulin-binding, but this region is the least well-conserved [1]. 32.30 32.30 32.80 32.40 31.90 31.90 hmmbuild -o /dev/null HMM SEED 539 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -12.98 0.70 -6.04 13 257 2009-09-11 16:48:08 2007-08-07 14:41:36 4 16 131 1 160 235 12 349.50 20 82.10 CHANGED hcGLYTssEhcusslpDK-sKltFLQKhIDllphsoGcsLts+PuKIVAGpEPE+TNELLQtlu+suts.pLspcpAV++l...ttuppps.sshsp.sp-tcscssppcctcccpppcc+ccsc.c.ppppcttp.ppph...c-ppp.pE+c+p.ccKpppsstccpc.sccppp+ppsptcppctp..........................................................t.tpptpp..pppcs+ppstpp.sucsccttpstctcctpsct.s.-ps..t..tp....................................................t.p..hpt....s.p........t............t......t............................................................................................................................s......stssoAR.usPR.Kcpp.....t..sttt.uchhssVlh-shp......s-s--c-phhh.tttt.sp................st...s.....ppt-pcGtL.Vp+ILETpK-h-stsupsp....ttppt.................tsptppshssc-lppLRpplQpLs+SspPLGKlhDaIpEDIDuMppELphW+pEt+pptpthpcEpphT-sAlcPLpspLtpLEppIsDppsKIssl+usILpN-p+Ip+hltsl .......................................................................................................................................tt.htp+p.tKhtaLpphlthh....t..h.spst+lluG.EsctTN.hLQhhu.sh.......hp.t..thhpth..................t..t.t.........t...tt......tt...t..t.......t.p....p..t.t..tt.ttp.t..p..t............................t...................t.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 79 101 139 +10076 PF10244 MRP-L51 Mitochondrial ribosomal subunit KOGs, Finn RD, Coggill PC anon KOGs (KOG4045) Family MRP-L51 is a family of small proteins from the intact 55 S mitochondrial ribosome [2]. It has otherwise been referred to as bMRP-64 [1]. The exact function of this family is not known. 25.00 25.00 35.40 35.40 24.10 24.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.19 0.72 -3.96 8 96 2009-01-15 18:05:59 2007-08-07 14:52:15 4 2 87 0 65 98 0 92.90 50 57.29 CHANGED hPc.KshDRWo-KRAhFGVYDNIGILGDF+hHP+-LIhG.PsWLRGa+G....NELQR.hIRK+pMVGsRMhh-D....hHNLpKRI+aL.....Y+RFNRpGKHR ..hP.h+shDpWsEKRAhFG..D.IsI.......LG......sh.chHP.pcllhu.PsWLRGapG....NELQ+.hlRK++Mlus+hasps....h+sLpKRIpYL.....YK+hNR+sKh........ 0 17 22 46 +10077 PF10245 MRP-S22 Mitochondrial 28S ribosomal protein S22 KOGs, Finn RD, Coggill PC anon KOGs (KOG3890) Family This is the conserved N-terminus and central portion of the mitochondrial small subunit 28S ribosomal protein S22. Mammalian mitochondria carry out the synthesis of 13 polypeptides that are essential for oxidative phosphorylation and, hence, for the synthesis of the majority of the ATP used by eukaryotic organisms. The number of proteins produced by prokaryotes is smaller, reflected in the lower number of ribosomal proteins present in them [1]. 25.00 25.00 25.00 25.00 23.30 22.40 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.59 0.70 -4.97 6 129 2009-01-15 18:05:59 2007-08-07 14:52:31 4 3 90 0 79 119 0 209.00 47 67.02 CHANGED pPlFhcc-VQpLLpshTpL-LD.KVFR+Rss+s.spscaKhMT--QL-cphhpslEpAcphLQMPPVlc.+p-ssc.VlAKDhuLKuh..uTsKaVFTDITaslscpcRpIVVREPDGTLpcAshEpRcRlNQlYFPLpGRplhsPhMFc.-cpLpchL-pscaEFVLDRsCVQFEP.-s-Yp+lotpVY-+lNE.................oppF-hLRSTRHFGPhAFaLAhp+tIDc.LLhDhIp+DhLcsuspLltLhptLH..P .............................................hFhctcVQplLhphTtlsLp.KsF+.th...t..p.....psspaKhMTptQLccthppshctA+hhLpMPP.Vlp.R...tshsc...VLucDthLcGh..-ouKaVFTDIoaulsc+-RhIVVREP.sGTLRcAoaEERDRhhQlYFP+cGRclhsPhhFp.-.Ep.....Lpphhppscat....lLshshsQFEPDs..s-Yh+lpppsY-clsc.................ptpa-lLRSTRHFGshsaahs.p+pIDs.LLh-.lpc-hlp-AspLlpLhphlH.................... 0 31 36 59 +10078 PF10246 MRP-S35 Mitochondrial ribosomal protein MRP-S35 KOGs, Finn RD, Coggill PC anon KOGs (KOG4078) Family This is a family of short mitochondrial ribosomal proteins, less than 200 amino acids long. that are highly conserved from worms to humans. The structure has previously been referred to as MRP-S18 but the current numbering fits the preferred nomenclature from these authors. 24.40 24.40 24.80 29.10 23.30 24.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.12 0.72 -4.10 6 114 2009-09-11 12:55:10 2007-08-07 14:59:26 4 5 84 0 67 106 0 96.20 56 45.93 CHANGED ppsspSFuSLLRpShhlQLGsscGKlllGKIFHlV--.DLYIDFGhKFHCVCpRPAssGEcY.RGoRVRLRLcDLELooRFLGuopDlTlLEADAsLLGLlpspsp ...............s..sscSFAShLRpSsLhQ.hGs.u.csKlVlG+IFHlVp-....DLYIDFGuKFHCVC+......RPph.sG..c..p.Y.+...Gs+VRLRlhDLELoo+FLGuspDhTlLEADshLLGl.ps..p.................... 0 20 25 48 +10079 PF10247 Romo1 Mit_gmP; Reactive mitochondrial oxygen species modulator 1 KOGs, Finn RD, Coggill PC anon KOGs (KOG4096) Family This is a family of small, approximately 100 amino acid, proteins found from yeasts to humans. The majority of endogenous reactive oxygen species (ROS) in cells are produced by the mitochondrial respiratory chain. An increase or imbalance in ROS alters the intracellular redox homeostasis, triggers DNA damage, and may contribute to cancer development and progression [1]. Members of this family are mitochondrial reactive oxygen species modulator 1 (Romo1) proteins that are responsible for increasing the level of ROS in cells. Increased Romo1 expression can have a number of other effects including: inducing premature senescence of cultured human fibroblasts [2,3] and increased resistance to 5-fluorouracil [4]. 24.10 24.10 24.90 25.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.19 0.72 -3.78 19 272 2009-09-11 01:43:06 2007-08-07 14:59:42 4 2 243 0 191 236 1 65.30 47 65.22 CHANGED sosa-+l....KMGhhMGssVGsshGhlhGsasshptGstspthhtslGphhlsSuuoFGhFMulGollRs ............PSsa-+h....KMGhhMGs..................sVGhshGhlFGsauhh+hG.......hts.ptlhpslGphhhsSu.uTFGhFMuIGosIRs.......................... 0 53 101 156 +10080 PF10248 Mlf1IP Myelodysplasia-myeloid leukemia factor 1-interacting protein KOGs, Finn RD, Coggill PC anon KOGs (KOG4049) Family This entry is the conserved central region of a group of proteins that are putative transcriptional repressors [3]. The structure contains a putative 14-3-3 binding motif involved in the subcellular localisation of various regulatory molecules, and it may be that interaction with the transcription factor DREF could be regulated through this motif. DREF regulates proliferation-related genes in Drosophila [1]. Mlf1IP is expressed in both the nuclei and the cytoplasm and thus may have multi-functions [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.10 0.71 -4.66 9 258 2009-01-15 18:05:59 2007-08-07 15:00:23 4 2 114 0 123 239 1 155.10 36 64.12 CHANGED MRpMhtuF.u-PFu...hlSloDtph+u.sR.usstt..........ssa....hhuMsuhhhsMhshMpshhtsh-ph.ossssspoFSSSoVhoYSss.GDusPKVYQtTSpTRsAPGGI+ETR+olRDS-SGlE+MuIGHHItDRAHIlcRScN++TGDpEERQ-FINLDEu-AtuFD-EWppcs .............................................................................................h....tsh......s..s.t.............t.....t..t.............................s........h.shst.hh......s..h.......h..s............M.ph...............tp..h..tph....ss.s..s.s..spoFs..SS.oVhoYSps....sst....PplaQ..s..o.oppR.suPG....G..l..+..ETR+sh+DScoGl.ccMuIGHHItD..RuHllp+ppNp+oGcpEppQ-al.N..lsEs-AtsFDcEWppc.................. 0 32 47 83 +10081 PF10249 NDUFB10 NADH-ubiquinone oxidoreductase subunit 10 KOGs, Finn RD, Coggill PC anon KOGs (KOG4009) Family NDUFB10 is a family of conserved proteins of up to 180 residues. It is one of the 41 protein subunits within the hydrophobic fraction of the NADH:ubiquinone oxidoreductase (complex I), a multiprotein complex located in the inner mitochondrial membrane whose main function is the transport of electrons from NADH to ubiquinone, which is accompanied by translocation of protons from the mitochondrial matrix to the intermembrane space. NDUFB10 is encoded in the nucleus. 22.20 22.20 22.20 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.73 0.71 -3.99 8 155 2009-01-15 18:05:59 2007-08-07 15:00:34 4 2 122 0 94 150 0 111.50 40 70.28 CHANGED DtPVThFR-.lVEp.ps+sKasYYHpcaRRVPslspCtssDhlChaEA-hQaRRDhtVDpEIVplLppRhcsCpQhEupsHh...QsCuK.lcQaccsscsa..+YGDLGAYusARKshMKQKHRMlhER+ .................DhPlThhR-..hl-...spp+h.aYHppaR...RVPslspChpsDhlChaEAc.Qa+RD+hVDpEIlpIlp-RlcsCtp...h...EG.....ssah.......QpCt+.lcQapcsscsa..+Y..t-LGshh.ss+pshhKQKpRhh.ct....................... 0 32 46 70 +10082 PF10250 O-FucT GDP-fucose protein O-fucosyltransferase KOGs, Finn RD, Coggill PC anon KOGs (KOG3849) Family This is a family of conserved proteins representing the enzyme responsible for adding O-fucose to EGF (epidermal growth factor-like) repeats. Six highly conserved cysteines are present in O-FucT-1 as well as a DXD-like motif (ERD), conserved in mammals, Drosophila, and C. elegans. Both features are characteristic of several glycosyltransferase families. The enzyme is a membrane-bound protein released by proteolysis and, as for most glycosyltransferases, is strongly activated by manganese [1]. 26.80 26.80 26.80 26.80 26.70 26.60 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.21 0.70 -5.34 62 1215 2009-01-15 18:05:59 2007-08-07 15:00:56 4 28 188 6 759 1192 4 302.80 22 67.15 CHANGED llh..s.G.GhNppRss..hhchlshA+hLNt...........TLVl...............P.hpp..hhWpcss.p.....Fsc...hF-l-p.........hhphl.sslclhchl.............Pphhsth....................................................phhhshphs....hsssphahpp..h..hhtch...........pVltlstssspLssphhshpl...Q+........uLpaspcIpphupphlp+hh................................psssalulHLRhp.Dh..hpsC.h.tht..ptt.htt.p............t.p..ps..hphpspsPhtspts.....shlLpslth...ps.......splYlAosp..........ht...th..psLpshhsphh...+pplsstc-ht.h.s..sphAtlDhhlsspu-hFlssp...ss.hsthlttcRchh..s.h........................ssF ...................................................................................................................................................................h.....s.G..Ghsp.+.t..........hhshlshA+hLNt.......................................................TLVl...........................................P....htt.....ha........p......c.s.....p........................Fpc........hF-h..ct................hh...p....l......t..slpl..c.l..............................P..h.t....................................................................................................................t....hph...t.....hss..ph.hhpp......hhshh.p............................tslt.l.....s.t.......h...s.spl...s....p..hs...ph..............p+h..........................................htuLpas.........p...l.pph.u...pph..lpchh.....................................................ts...s.alulHL.R..........h.c...D.h.........hptC....................t...h.tt.p...............................s..hph.psp.s.Ph.t..spts.....................shhhp..thsh...ps...................s..tl.Yl....Assp...................................hht..t.t.h....tsLp....phas.phh......p+ppl.h..s........p-...ht..h.t................sthA.t.lDhhlshpu-h.Flss....ss....hsthl.tcRphhsh.......phh....................................................................................................................... 0 207 476 627 +10083 PF10251 PEN-2 Presenilin enhancer-2 subunit of gamma secretase KOGs, Finn RD, Coggill PC anon KOGs (KOG3402) Family This entry is a short 101 peptide protein which is the smallest subunit of the gamma-secretase aspartyl protease complex that catalyses the intramembrane cleavage of a subset of type I transmembrane proteins. The other active constituents of the complex are presenilin (PS) nicastrin and anterior pharynx defective-1 (APH-1) protein. PEN-2 adopts a hairpin orientation in the membrane with its N- and C-terminal domains facing the luminal/extracellular space, and the C-terminal domain maintains PS stability within the complex [1]. 25.00 25.00 25.60 26.00 22.70 24.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.28 0.72 -3.76 11 135 2009-01-15 18:05:59 2007-08-07 15:39:15 4 2 126 0 86 119 0 92.60 43 78.83 CHANGED lPNEcKLpLCR+YahsGFAhLPFLWhVNssWF...F+EAFtKPuasEQppI+pYVltSAlGhhlWsllLoTWlshFQh..pRspWGshuDhlSFhIP..LG ...................ssEcKLsLCR+Yah......u.......GFAh....LPFLWhVNshWF...F+-AFhtP...sas..EQppI+pYVht....SAlGhllWsllLsoWlhlFQh...RstWGshuDhlSFsIPhG............................. 0 31 45 66 +10084 PF10252 PP28 Casein kinase substrate phosphoprotein PP28 KOGs, Finn RD, Coggill PC anon KOGs (KOG3375) Domain This domain is a region of 70 residues conserved in proteins from plants to humans and contains a serine/arginine rich motif. In rats the full protein is a casein kinase substrate, and this region contains phosphorylation sites for both cAMP-dependent protein kinase and casein kinase II [1]. 25.00 25.00 25.90 25.70 22.80 21.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.03 0.72 -3.96 22 247 2009-01-15 18:05:59 2007-08-07 15:39:49 4 6 192 0 177 239 2 84.40 51 39.93 CHANGED sNPN+ssppp.pphpp.....h..s............s.spLSRREREtlEtQpA+cRY.KLHspGKT-pA+ADLARLAlIRcpREtsAtR+EsEKct+ ..............................................................................tNPN+st.pps..p.pssp......lshs.............................ssspLSRRERE.plEKQ...cA+...ERYhKLHttGKT-pA+A.........DLARLAlIRcQRE-AAt++EtE+ct+..................... 0 53 88 134 +10085 PF10253 PRCC PRCC_Cterm; Mitotic checkpoint regulator, MAD2B-interacting KOGs, Finn RD, Coggill PC anon KOGs (KOG3903) Family This family constitutes the major, conserved, portion of PRCC proteins. In humans this family interacts with MAD2B, the mitotic checkpoint protein [1,2].\ \ \ \ \ \ In Schizosaccharomyces pombe this protein is part of the Cwf-complex that is known to be involved in pre-mRNA splicing [3]. 20.90 20.90 20.90 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.47 0.71 -3.59 43 260 2009-09-11 16:00:24 2007-08-07 15:40:11 4 6 218 0 186 243 0 214.30 19 52.85 CHANGED lsLFuLs............sspptsssssss.............tsussYpPhhhsspttts.tsst..sts.pstpsspsssps.s.tt................................t..........slutup+Rphht+t...............pspllchss-pph.ssspchhhpt.t.ct.sspt...p.sslpsht.................t.uKHpLppLlptApsp+-tLE-paAsuRpN++pAuuKYGa ...............................................................................................................................................................................................................................................................................shFuh...................................................................................................t........t.....................t..t..........................................................................................................................................hts.............thstpth+phhs+tt.t...............phpll-lssDcph...sss..p....p.h...htpths.tct...shps...thp..s...sht...................t++KHQls.Llpp........A.pppc.cLcppaups+hs++pspuKYGa................................ 1 61 98 147 +10086 PF10254 Pacs-1 PACS-1 cytosolic sorting protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3709) Family PACS-1 is a cytosolic sorting protein that directs the localisation of membrane proteins in the trans-Golgi network (TGN)/endosomal system. PACS-1 connects the clathrin adaptor AP-1 to acidic cluster sorting motifs contained in the cytoplasmic domain of cargo proteins such as furin, the cation-independent mannose-6-phosphate receptor and in viral proteins such as human immunodeficiency virus type 1 Nef [1]. 25.00 25.00 25.60 28.60 24.70 24.50 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.53 0.70 -5.84 8 272 2009-01-15 18:05:59 2007-08-07 15:40:32 4 4 94 0 174 211 0 351.90 47 46.84 CHANGED VYDQLNpILlS.DutLPE...sllLVNsoDWQGpa.....lu-LLQspphPVVCTCSsAEVQAlLSulloRIQ+aCNCNSpsPpPVKVullGuQpYLuAlLRaFV-pLupKo.PDWLsHhRFLllPL.GS.HPVAKaLGulDsRYSshFhDsuWR-LFsRsEsssosppt........................sDlluRIpQYlsG.AsssHQLPIAEAMLThKp+.............DEDSsQpFlPFlGVVKVGllEsspuo........G.D.-DusslS...........................tul.So.SPP...psoshuK-u.uTPP......................sSPShsuuhsu.uSPs....s-ulGLQVDYWsus.PsE+++.........-u-+......+Dt.ouKNTLKusFRSLQVSRLPpuG....puphssoMoMTVVTKEKNKK.....................................sshFLuKKsK-KEs-.SKSQsIEGIoRLICSAKQQQsh.LR........VhIDGsEWsDVKFFQLAuQWsTHVKaFPIuLFGtoK ..............................................lhDQLspILlS..Ds..tLPE...sllLlNssDhQGQ.h......lup...lLQpp........phP.l.VsT..sSss-lQAs.hssllo+IQ+aCNsNop.PtsVKlulsGuQpYhuulLRhF.V-pLupKs.sDWLsahRFLllPL...G................S...HPlA+YLuS.....lD.+Ys..shF.........D..sW+-LFp+.Es.ss..p...........................-...lssRltpYlsG....Ass..spQLPIAEAMLsh+pc....................DE-S..t.QpFlPFlu..s.VKVGhl.E.ssuss.......sDs-Dusss.s................................................................sl...So...oPP....pss..s...h....+.......t...s.so.PP.........................s.S.Puhs..suh..s..s...u..ps.........u-hhtLQVDY.Whst..s.s-c++....................cs-K.......cD..ssKNT.LK....ssFRSlQV.....SRLPpuG..........ps..t.h.s.ss.MuM.....TVVTKE......Kp.KK...........................................................hhhL.sKK.sK-K..-h-.......u.KSQsI-G.IuR..LICoAKpQpsh.LR..................................................V.IDGV.EWsDVKFFQLuAQWsoHVKaFPlslFuhs............................................. 0 34 47 94 +10087 PF10255 Paf67 RNA polymerase I-associated factor PAF67 KOGs, Finn RD, Coggill PC anon KOGs (KOG3677) Family RNA polymerase I is a multisubunit enzyme and its transcription competence is dependent on the presence of PAF67 [1]. This family of proteins is conserved from worms to humans. 19.90 19.90 19.90 20.80 19.70 19.20 hmmbuild -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.43 0.70 -5.59 26 467 2012-10-11 20:01:01 2007-08-07 15:40:55 4 6 266 0 232 338 15 296.80 40 76.52 CHANGED cpRhsSacNYssLFshll.......supss......................ssLpLPs..WlWDllDEFlYQFQshspa+s+htpps..t.................-cht.l...ttp.spsWsshslLplLpuLlp+SpIpp.hph.pts...hp..s.....hsssh...sspslhthLGYFollGLLRlHsLLGDYptALKsLcsI-l.s+cu.las+VsssaloshYalGFuYhMhRRYsDAIRhFsplLlalp+pK....p.hpppsaQ.stl.Kps-pMYtLlAIshsls...Ptp...lDEoltptl+E+.Yu................-chhp.lppu...shpsFc-lFshuCPKFls..Ps.ssshcss...h.........-shppQlplFhc-VppQptlsplRSYLKLYsolslpKLAuhh-l...................-s-.......................pl+s.LhshKpcs+pl.hhstu.slhcGchhss...u-lDahID............pDhI+ls-s+.st+pas-aFl+pltKhc ...................................................................................................h...s.....h.hthhtEalaQht.h..ap..................................................at......h.thh..hh..s.h.................................t...hhh.hGaauhlslh+hpslhGDa..ulphht....lph.t....................t...h...l..s.hsh.YahGas.hMhpcY.-u.p.h.thl.hh.p.p.............t..........K..-phh..LhsIsh.sht...........Ptt.............lD-slptthpEK.as.............................DKhh+.MppG...s..s...sac-LFsaACPKFlo..Ps.ss....sh-ts..sh.....................-shchQhplFh-EV+QQhhhssIR..SYLKLYTThslsK.LutFh-h.............s.p............................................................phph.LlshKp+hpph..W..s.u......u.........h..L-Gc...u.............s-hcahl-.....................pshlhIs-sc...tptatcaFhpplt+h................................................................... 0 87 126 185 +10088 PF10256 Erf4 QRDY; Golgin subfamily A member 7/ERF4 family KOGs, Finn RD, Coggill PC, Bateman A anon KOGs (KOG4069 & KOG4101) Domain This family of proteins includes Golgin subfamily A member 7 proteins as well as Ras modification protein ERF4. 26.30 26.30 26.30 26.50 26.10 26.20 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.53 0.71 -4.23 32 473 2009-01-15 18:05:59 2007-08-07 15:41:10 4 7 230 0 323 429 0 117.60 28 53.06 CHANGED hVtI.Rca....tsshhspF........ps.taPsp..................................L.psh.lstp-FcphlsplNph.LtcAhsshshpshl-shlsslT..hhlh.hhh.....hshhc+..................pLpclppal......pphNp....phhptpGl...pllsP+cpG.Lplsh ...........................................................................h.....p.h....stGhsspFpo...cFPsp........................................L...su+..lstp-FcpolpplNsh...htcuhps......hstt..shhts.hhsChT...hhh.hhsh.........................hshhc+............................................s.h+plp+hl......ppp.Np.....cla.t..........hG..L.............phh.shcps.h.................................................................... 0 90 148 238 +10089 PF10257 RAI16-like Retinoic acid induced 16-like protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3695) Family This is the conserved N-terminal 450 residues of a family of proteins described as retinoic acid-induced protein 16-like proteins. The exact function is not known. The proteins are found from worms to humans. 20.50 20.50 20.70 21.90 20.40 20.40 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.20 0.70 -5.52 27 432 2009-01-15 18:05:59 2007-08-07 15:53:27 4 6 190 0 274 417 1 336.50 29 42.16 CHANGED CLEahLpcpIL-sLhpLuhs...shss...Gh+ppslphaspLlup.pps...lLsatsltRPlhcL......................................lphssps....usss........................Ep-hVplLpslCsplppcPtLLshahpspppp...................................................................t.t.sh....ttpssasLFshLlsas+pp........................GRlGphARcuLLhllshu.....p.shu...................................................................................palsp.oslss.......................lhustLu.uLYspLPppl....p..........l.uhsh.......tpp..................-hsths....tL.tFhuhLpasssllpps+s....................tlsssLhcthpptFL.slltPuLLpsS-...tuhhsshshLptlLcplspssLlcphlpFLLu................................ppp.t.....tth.lhppLlppps+ho.clshssh...tlh-sllttPspthLhsLlLcsL .....................................................hhEahlpcpl..hpplhphuht.....chss......th+tp.lthaphLlsp.cps.....LLt.at.slhcslhpL.......................................lphsuts.......sssh.......................................Epch....V.LLst..lCsplt.p..c.Pt.lLphahpsp.p..t....................................................................................................................................................................................tstspa..LFshLlsal+pc........................GplGppAR-uLLhlhulu.....ts.shu......................................................................................................................................................................palsppo.hCs.......................ll...usG.Lu.uLY..opLPpplc...........................................l.u...s..W.....h.ttc...............................................Dh.ths..............tLstFhs.L-FCs...sllph.A.+s..........................................................l.tp.pLhchlp.ptFLhslhtPuLhpso.............................tphlssouYLch..............hL+.......pl.......s.......ps.sLlpph.lpFlL...................................................................p.pp................spts.......lhcpLlp+hsp.....c.lshsoL.......thFcpLlthsscplhhpLlLp.L......................................................................................................................................................... 0 79 115 189 +10090 PF10258 RNA_GG_bind PHAX RNA-binding domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3948) Domain RNA_GG_bind is the highly conserved U3 snoRNA-binding domain of PHAX (phosphorylated adaptor for RNA export) whose function is to transport U3 snoRNA from the nucleus after transcription [1]. It is characterised by having two pairs of adjacent glycines, as GGx12GG. 25.00 25.00 27.60 25.00 22.50 24.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.73 0.72 -4.29 17 161 2009-01-15 18:05:59 2007-08-07 16:04:49 4 16 122 5 121 157 4 81.30 37 19.68 CHANGED clsscluppLpE.+s-LlhRllpslG.phshpLhpEThpIppsGGhhsssGsR+RTsGGVFhtLlKp...p.plsc-phctIatc-+cpp .............................lss-lu..h+LpE...Kp...cLltRlVphlG.c...tsl-LhpE.....Ttc.lEpsGG.....hhl....hsG......s...R...RRTPGGVFhpLlKp...p.plspcph+pIa..-pph.................. 0 41 63 97 +10091 PF10259 Rogdi_lz Rogdi leucine zipper containing protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3992) Family This is a family of conserved proteins which have been suggested as containing leucine-zipper domains. A leucine zipper domain is a region of 30 amino acids with leucines repeating every seven or eight residues; these proteins do have many such leucines. The protein in Drosophila comes from the gene ROGDI. 29.40 29.40 30.00 29.90 24.50 28.20 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.75 0.70 -5.29 24 229 2009-09-11 08:31:40 2007-08-07 16:05:10 4 4 203 0 164 223 0 258.20 25 80.33 CHANGED ElcWLlcp.lpssLsplh-sLpcCsthLh..............tt.s.phsLSos.........ps-...plKGllTRsGstIsphslpl+h.................schsp...hhhph.s.sps.hhLpQlpsspstlspulsllsshpths......................phpstsplhp.lptlhpplppu+ptLp..........hPscss....hhh.tpssssp.........................FsPsLssp..............lulclhIspscLslpl+sLctlp.......t.hst....sshshhsplh.th...pp......................................................................................tsh-.hscshsa..ssps.................................................VhhpcKhpVposDPhLhushsKLsultphlpphhssLss ..................................................................................................EhpWllpp.lpssLtpLpchLp-....Ctthhs..............................................s.tptpphsloss.....................................ps-.pl...KGhlTh.Gstlspu...clpl+h......................s+tsp.....hphs..hp.sp..htLpQlpsspsplspulp.llsshptsh.......................................................................................phpsusplhphlstlhtpL.pcu+spLp.............hPts.s.....h..htsst.sp...................................................................FsPsLPss.............................lslphhlppscLsltlhpLcshp........................ss.h.sh...........tt.........................................................................................c..sthhpa...tsp.......................................................................l.lpchhcVps...P.L.sh.shhss..t.htphhttlt............................................................ 0 40 73 126 +10092 PF10260 SAYSvFN Uncharacterized conserved domain (SAYSvFN) KOGs, Finn RD, Coggill PC anon KOGs (KOG3249) Domain This domain of approximately 75 residues contains a highly conserved SATSv/iFN motif. The function is unknown but the domain is conserved from plants to humans. 25.00 25.00 28.10 27.60 19.40 18.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.42 0.72 -4.21 9 137 2009-01-15 18:05:59 2007-08-07 16:05:22 4 5 114 0 96 133 0 70.90 41 37.18 CHANGED lhlllWllhhsluhchs.FGslFllhShhhhIahNhtp...RppGEhSAYSlFNcsscpl.GThsAEph-R-lt .............h.hhllWllLhslhh..c.l-..FGhlahllShFhhl..ah.shcs.......t.+cpG........E..hSAYSVFN.sCcsl.GTLsAEphER-lp.......... 0 35 50 77 +10093 PF10261 Scs3p Inositol phospholipid synthesis and fat-storage-inducing TM KOGs, Finn RD, Coggill PC anon KOGs (KOG3750) Family This is a family of transmembrane proteins which are variously annotated as possibly being inositol phospholipid synthesis protein [1] and fat-storage-inducing. The members are conserved from yeasts to humans and are localised to the endoplasmic reticulum where they are involved in triglyceride lipid droplet formation [2]. 20.50 20.50 21.30 21.70 20.40 19.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.97 0.70 -5.33 40 321 2009-01-15 18:05:59 2007-08-07 16:05:44 4 6 218 0 223 312 0 213.20 27 67.70 CHANGED .tshsY...ass+cNllNhhFVKpG.WhWTolhhhhh..lhhh.............ststtps...st.phh...............................hptlhRallsTlhWhlhTp...hh.h.slhDplhshTGGpCphssst...............................................................................................................houtsC+ptGGp...........................................WpGGaDlSGHsFLLshsoLhLhpE.............................h..htphhtsh.th.t......................................hhphhhphshhlsssLlslWhahLlhTslY..FHohhEKlsGhlhGhlshhll.Y ....................................................................sYhup+pNlhNh.hFVKhu.WhWTshhhh.h..lhhh........................shhhstt.....................................hhptlh+hslsThhWhhhTp.............lhstl.hhTG.pC..s.h...................................................................................................................t...hhoptpC+tt.GG.h...........................................Wp.............GaDlSGHsFlLshsshhlhpE..................................hth..htph.tt.t.....................................................................hphhhphhhhhhshlhslWhahLlhTsl.Y..FHshhEKl...hGhlhuhhshhhhY.......................................................... 3 58 101 169 +10094 PF10262 Rdx SelT; Rdx family KOGs, Finn RD, Coggill PC, anon KOGs (KOG3286) & COG3526 Family This entry is an approximately 100 residue region of selenoprotein-T, conserved from plants to humans. The protein binds to UDP-glucose:glycoprotein glucosyltransferase (UGTR), the endoplasmic reticulum (ER)-resident protein, which is known to be involved in the quality control of protein folding [1]. Selenium (Se) plays an essential role in cell survival and most of the effects of Se are probably mediated by selenoproteins, including selenoprotein T. However, despite its binding to UGTR and that its mRNA is up-regulated in extended asphyxia, the function of the protein and hence of this region of it is unknown [2]. Selenoprotein W contains selenium as selenocysteine in the primary protein structure and levels of this selenoprotein are affected by selenium [3]. 22.10 22.10 22.10 22.20 21.90 22.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.81 0.72 -3.95 82 990 2009-01-15 18:05:59 2007-08-07 16:06:05 4 6 744 31 497 872 69 87.60 31 61.58 CHANGED sclpIpY........CspCpahhR............ushhsQ-LLpoF.ss.......t..clsl.....................................................................hs....us.sGs..F-ltls.....sp.....................llas+hpsu.....................GFPsscpLhphl+st ..........................................................................................plpI.Y...........C..tp....Cp.ahh+.................ushhupcLh.psFss............tl.t..pV..sL............................................................................pP.....so..uGs.FElpls................sp.......................................hlW...s+pp.sG.....................GFP-sc.tLpphl+s.h............................................................................ 0 167 270 391 +10095 PF10263 SprT-like SprT-like family Finn RD, Coggill PC, Bateman A anon KOG3931, COG3091 Domain This family represents a domain found in eukaryotes and prokaryotes. The domain contains a characteristic motif of the zinc metallopeptidases. This family includes the bacterial SprT protein. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.55 0.71 -4.57 148 2631 2012-10-03 04:41:15 2007-08-07 16:06:41 4 16 2330 0 664 2121 615 152.10 24 66.24 CHANGED lpphhpphspph.aps..htt.........plpas...........chppsAGp........shhppp..................................................................................pIcls.hllpp......spp..tlhp......................slhHElsHhhhahh..........tcspsHGs.ca+thhpplss.....................................h..phhcpass.p........................pahapC............................sCst.h..tpptpl..cppp.....................................................................atCt.......................pCp.uplthhp ................................................................................h..thhtphs.phFtp...hp..................pltas..........................................pht.o.ouGp..................hh..hcst.....................................................................................cIc.lNshlhpc................spp....hltp......................llhHELsH.hhlaht..............t+st.H.sc...-...........a+thhppVhu............................................................h.sp.hhcpath.p...........................tpa..YpC.....................................pC...p.................+p....tp....l....p.hpp.........................................................................................apCs......................pCt.tpLh...h.................................................................................................................................................................................... 0 194 353 536 +10096 PF10264 Stork_head Winged helix Storkhead-box1 domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3897) Family This is the conserved N-terminal winged helix domain of Storkhead-box1 protein which is likely to be a DNA binding domain. In humans the full-length protein controls polyploidization of extravillus trophoblast and is implicated in pre-eclampsia. 21.20 21.20 22.50 21.40 21.00 19.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.43 0.72 -3.80 8 151 2009-01-15 18:05:59 2007-08-07 16:06:58 4 4 78 0 92 140 0 78.10 53 9.84 CHANGED MoPIsQSQFIPLuElLCssISDhNusphsVTQEsLhp+LpppaPGhshPSp-lLYsoLusLl+ERKIYpTucGYFIVTPQ ........MoPIuQSQFlPLuElLChAISsMNuAppsVTQEuLhE+LsppaP.G..ls..sPSpElLhpTLspLl+ERKIY.Ts-GYFIVTPQ................ 0 20 29 55 +10097 PF10265 DUF2217 Uncharacterized conserved protein (DUF2217) KOGs, Finn RD, Coggill PC anon KOGs (KOG3831) Family This is a family of conserved proteins of from 500 - 600 residues found from worms to humans. Its function is not known. 25.00 25.00 36.90 36.70 24.70 24.00 hmmbuild -o /dev/null HMM SEED 515 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.62 0.70 -5.74 11 211 2009-01-15 18:05:59 2007-08-07 16:07:11 4 6 90 0 112 184 0 382.80 38 89.17 CHANGED hshushstl+los.utK+llhusAhGsluLhhhA+pL+RR+t++c.......t...hh..........op+s.ut+tspsoupR.p......sssscsNDshSsluSttsuKpSuSupSluShpshpSsussuss..ussW-sts..p-sh...sssssosEsLYlMGMELFEEslppWEpALsh+pctst..u.hs.................sspst-Fsc+LEsLLptAYpLQE-hphhh...cssshh...............sD....c+shthshhststshptscssSlsSsDSFhSAsElh....-ph-hp..shsh.shc.tshYEEALpLsc-GcVsCRoLRTEhLtChoDsDFLAKLHCVRQAFpsLLpDcsNphFlu-sGRQlLouLls+AcKsPKcFL-uYE-MLpalpps-sWssschELEuRGVpsMsFYDIVLDFIlMDAFEDLEsPPsSVhAVlpNRWLSsSFKETALsTusWSVLKuK+phLKsP....sGFhuHFYsloEplSPllAaGFLGP+ppLp-lCsaFKcQlltFL+DlFDh-KVRYoolEpLAEDILplh+RRs-llhsYh .........................................................................................................h.......h..s..sh+hlhhshshush...h...hhup.h+R+t..................................s.h....h.t.hstpp...................ss.ppss.p..hsh.hs.u...u+..suss........tShh......uh............h.S.t.ss.s...s.....hpt.s...t..h.......h..hssppL..hh...GMEhhEcslppWEpALsht.pp...t.t....s............................................................................................p..ptcFhcclpsLLptAYpLQ.Epht...h...h....ss.hh.......................D....cps......hshtt...s...t....ht..t.sp..shsSt-SFh..SAh....E.h....-.hpht..t...h.....p...hshYppAhphsc-spl.sR......sl............R...o-hhtC.uDt-aLAKLHClRpAaphlhp-....tsp.ahscsG+phlssLhhhup+sPKtF..sa--Ml.ah.pspp...a..sphp.ELtsRG......VhshsFaDllLDFILhDuF-DL-sPPsSl.sVlpNRWLssuFKETAlsouhWSl....L.K..tK+p.hh.hs....sGFhuHFYslsEplSPlhs.aGFLGP..p...pL.-lCthFK................pQll.aLtDhFshppsRaooh.thucDlhph..pRsp.l............................................... 0 32 39 75 +10098 PF10266 Strumpellin Hereditary spastic paraplegia protein strumpellin KOGs, Finn RD, Coggill PC anon KOGs (KOG3666) Family This is a family of proteins conserved from plants to humans, in which two closely situated point mutations in the human protein lead to the condition of hereditary spastic paraplegia. Strumpellin contains one known domain called a spectrin repeat that consists of three alpha-helices of a characteristic length wrapped in a left-handed coiled coil. The spectrin proteins have multiple copies of this repeat, which can then form multimers in the cell. Spectrin associates with the cell membrane via spectrin repeats in the ankyrin protein. The spectrin repeat is a structural platform for cytoskeletal protein assemblies. 25.00 25.00 45.20 32.30 21.50 23.40 hmmbuild -o /dev/null HMM SEED 1081 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.63 0.70 -7.15 12 186 2009-01-15 18:05:59 2007-08-07 16:07:20 4 5 115 0 130 181 15 759.30 41 92.15 CHANGED soIIAElLRLupaIPp.F...s+t-ppcYucIl.hDFpYFptt-thEscIpps.cL.pLDE-h+poal.llpRFatlFcSlacYhhDLppal-ElppGhalppTlEolLpst-GKQLlCEulYLYGVMLLlhDhpIsGhlRERlLVoYhRY.....+usu-.sNhs-VscLhRuTGYsssss.s............................sYP.pYFsRl.lscphIshlIGRlRoDDlYp.ht.YP.P-HRSsALusQuuhLYVhLaFsPplLpspsuhMREIVDKaFsDNWlIsaYhGhTVsLhsAWcsFcAA+sAlpNTls.pslp.hhp+hppphsplhsphpplLpEGhLsEphlLDslp.plhshlR-sNVslRWlhLH.......................spppt+pspphl.h..phspcplhpLLLsouQhEhpl+phhpslLppKpp+Wpcs+cpusp+hpcLuchFSGs+sLscsppsppLppWFtplupcIppLshs-s..stuuRpI.pLlpALEpVp-FHQlssNlQVKQalt-TRphLppMh+hlNIc-csLhplthluDhSYAWclls...sasshMQppI+ppPphslplRusFlKLuShL-lPhlRIsQupS.........D......L.oVScYYSsELVsFlRcVLpIlPpohFslLtpIIpl.TNsl+EhPo+L-KcchK-aAQh-pRtpluchTtsIuhaopGILAMcoTLVGlIclDP+QLLEDGIRKELVppIscthapsLlFs..........t....ptpssphppcLspLupplcGhRpSFEYIQDYVNlpGL+IWp....EEhsRIlpapVEpECNuFl+pK.l.t..WpSpYQSp....IPIPpa.sh....stpuhsFlGRLspcllplT-P+solals.hsuWa-hcs.hpEVlus+.hauplpcslGshGLsuLD+Lhuahls+cLpphlpphpttl...-pthhsslpsltspLp....stsshscp..shchYtphhpphpthh......................splhthlhplGQhQllRppIutcLphpsKlcSspL.ssLcshNcAlLtD.lppHh...........ppspopPhPsp....llsslssaLpssGlp-PhpKlY..........................................hsscshPph.slhLhlhsLtpl..s+hpascplsshls.pctps....lDspsLhlGlhslL+QFpspppphaLsaluQal ......................................................................psllAElhRLtphlP..a.........................ttt..cat.ll.hDFpYhp..-.h-t..plptp.tL.tl-cchhps...hlpRFa.hFpulhtYh.-l...palp-lt..p...Gh...alp.oh-slh.s.cu.pQLh......sEshaLaGshLLlhD.ph.G.lRERhlluahRh...................p..pss...sphcclstL..h+sTuh........................................................................s.........pYPpt...aFtRh.hsp...hlphllupl+sDD..lYpt....a.P.Pp.HR.SsALu.QuuhLaVhLaFts.phLpsp.shMREIVD+aFsDNW..l................lshahGhslsL.........t........WtsacAApsAlttslp.t.tlp...htp...t.....tp...p......hpthlpcG.....lppphl.....lp.ph..plh.hhRpsNsslRWhhLH...........................tt.pphtp.l.t.....thp..t.lhpL.LLpsuphEh.l+phhtphLtp+ptpWtt.+pps.pph.cLuphFuG.hs.L.sp..pstpL.tWF.phupp.........l.pLphtp....ttsuRhh.plhpALcpVppacplptshplpphlt-opt.LppMh+hlslp-chhhphphlsDhSaAWtllp...tah.hhQptI+.pPt.shtl+uhFlKhu......Sh.......L-lPlhRlsQspS.........D......L.sVSpaYSs-LssalR.pVLpllPpohFt.L.plh.l.sp.hhc.hPs+lpKcc................l+cauQht.Rhpl.AphTttluhaopGlLhMcpThlGllc.lDP+pLLEDGIRKELVppls.thpt...L.Ft.....................................t.t.h..pLtthut.hpuh+pSFcYlpDYlpl.GL+la.....EEhpRIlphtlptEsp..sFhppp...h.....p..app........hshP.h..........t.u....sFhG+LhppllthTs.sp..ohahp.h.sWa.........p.ps..........p-lh.s.p.hhs.ltpslu..GlsuL-plhshhlsppLp.h.lt..htt.l......h.thhtth.ttlt..................s.....th..tt.....phY.thhtthtt.h.......................s.hhphlh..plGphQllRp.Is.pLp.ss+hcup.L..sslpshNpull.p.ltt................................................................t...shstt......hl.plpthLphsGh.pPhpplY..........................................lp.s.p....h....h..shh.hlhhlspl..s+h.astp....hs......hh..tt..ts...hD..shhhGhhslLpQFp......hlthhs.h................................................. 0 57 72 102 +10099 PF10267 Tmemb_cc2 Tmcc1; Transmemb_cc2; Tmemcc2; Predicted transmembrane and coiled-coil 2 protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3850) Family This family of transmembrane coiled-coil containing proteins is conserved from worms to humans. Its function is unknown. 23.30 23.30 23.30 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 395 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.27 0.70 -5.88 15 351 2009-01-15 18:05:59 2007-08-07 16:07:43 4 5 107 0 168 289 1 324.30 46 73.96 CHANGED +s+tsh-pLpQKILcloEQl+lEQsARD-NVsEYLKLsss........ADKQQsuRIKQVFEKKNQKSApoIuQLpKKL-pYH++LKElEpss...........tppP+-sl+-hppuLK-sssp.p..........httl+s.hsGhS........hVhSKPREFAsLIRNKFGSADNIsphcs...............................s..Lc....shtsEp..ss+uLuuuu.ohsspsKYsSD-..-CSSsouu.Sssususps........................psstpsspsslstlhEELpEI+csQupLc-sh-sLKsphp+DashlspoLQEERYRtERLEEQLNDLTELHQNEhsNLKQ-LAohEEKlAYQSYERARDIpEslEsCQTRISKLE..pppQQQslQLEuh-p.pAR..sLLsKhINllLuLhsVlLVhVSTlAshssPLh+oRh+lhsTllhlhllshhW+pWcpl .........................................................................................+.shspLpQKILKloEQI+lEQpuRD........sNVuEYLKLsss........A.DKQQ.suRIKQVFEKKNQKSApoIuQLpKKL-pY+++L+ElEpsG............cpsK-sh.+DhppuL+-stup................s+suhsuhS.s......................shVhsKsREhAs.LI..RN.KFG.SADNIspL+s.....................................................................s..h-....ph.s-t.......usps.hus.u..sh.s..psKYsS--...-C.SSsouu.Sss.us...uspu......................................tshh..tttthphlhcEl.pEl+cspspLp-shEsLKtp...hp+-.h...s....hh.psLQEERaRhER.LE-QlNDLT-LHQpEhhNLKQcLAshEEKlsYQuhERuRDIp.EslEsC.TRloKhE.......ppQQsl.........Qh.-shp.....A..+....sLLsKhINl...lL...slh.sVlLVhV.SThAphh.hPhh+o..........R.......+hhsThhhlhhlhhhh+pWtt.h............................................................................ 0 31 46 87 +10100 PF10268 Tmemb_161AB Tmem161AB; Transmemb_161AB; Tmem161AB; Predicted transmembrane protein 161AB KOGs, Finn RD, Coggill PC anon KOGs (KOG3978) Family Transmemb_161AB is a family of conserved proteins found from worms to humans. Members are putative transmembrane proteins but otherwise the function is not known. 18.30 18.30 18.60 18.50 17.10 18.20 hmmbuild -o /dev/null HMM SEED 486 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.81 0.70 -5.98 8 239 2009-01-15 18:05:59 2007-08-07 16:08:00 4 7 107 0 142 235 2 324.50 35 90.47 CHANGED AllGlQLVVTLlhsSlhQKluPHaSFARWLLCNGSLhRYpHPTE-ELRsLAG..KQK..sK.u++-R+tNG....ssEsKPholPKDIDlcL-TpPlsshDsLsLRaFsEYQWLlDFuVYuslVYLlTElYahhhssscEhNISllWCLLVlhFulKlLhoLTsaYFpSEEG.GERSlCloFuFhaLLlAMlVLlVsEchLEhGLEsG...............asShssshpsFLcpQGLpsuu........PhoKLshKlhLAVhCuhLGAhLTFPGLRLAQMHLDALphspD+.hlQhLLHlSFLsPlllVlLWlKPIsRDaLpssshG+.SssLhSsssF-TLRLWlIllLCVLRhslhRhHLQAYLNLApcpV-QMKKEAGRIoslElQ++luRlFhYLCVVuLQYlAPllLsLahTLhLKTLGsaSW.GlhsEs...s..sh.ssscsuPlss....................t-s-t......phpsTstplpsuhuSL+slFTPllaRGlhuFLTWWlusC.FsoSLFGlaYHQYLstu .................................hhhh.hhp+h...hhshhchllsps.Lhha.hPo..-ppLp.hss..........p.........p.tt.+p.p........pt............t..hplP+.sh....hpL.pp..l..h..Dhl..h..ha.p..h.ahh.h..h.s...hhlhhh...s.th.h..hh...................tp.....h..Nls..h.h.a.hhhsh.................a.s.h..p..h...l...hhp......................ps..hEpshhlshshh.hhlhuhhh..hhtcphh-hslp.u...............hs.shspsh...hhpppGh...t.....................P.h.sc.l......hh+h..hL.Ah.hs.uhlGuhhsF.PulRhAphah.......D....u.......lp.......h.....p.t.p.hhp......h..lLphsal.PhhhlhLWh..................+Pls+p.hlh...............................ht..t...........................l.......h....s......t.apphRlhhllhhshh+hhhh....hLQuYLshA.tth.p.+p.cuGpltshp....hpp.hl...hahYlsllsLQahs.Phlhh.l..hs.hhh.p....t....h.s...t...hs.h......................................................................................................................................................s.................................................................................................... 0 45 63 105 +10101 PF10269 Tmemb_185A Tmem185A; Transmembrane Fragile-X-F protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3879) Family This is a family of conserved transmembrane proteins that appear in humans to be expressed from a region upstream of the FragileXF site and to be intimately linked with the Fragile-X syndrome. Absence of TMEM185A does not necessarily lead to developmental delay, but might in combination with other, yet unknown, factors. Otherwise, the lack of the TMEM185A protein is either disposable (redundant) or its function can be complemented by the highly similar chromosome 2 retro-pseudogene product, TMEM185B [1]. 22.90 22.90 22.90 22.90 22.70 22.80 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -12.19 0.70 -4.76 13 397 2009-09-13 05:38:14 2007-08-07 16:08:33 4 14 158 0 224 366 7 149.00 22 56.90 CHANGED L+LDshlsWSaWhVFsPlW.haphllhhGuhlssss...........tspscahAhllulshpLlLlsFplLlC.pL.......tssshsWplVFlPL...a.hhpshulhthlhsh+a..............DcuhphphhahhshlphlF...........lhLKL......DsllshsWhlVFlPhaIshshthlhs................lahlhhsl.hhscs.plhsspccpphts.ulss......hhhslPhlsFpllLst+L-sss........plshhslFsPLhls ...............................................................................................................................................................................................................................................................hl.phlh.............lsL+L......-.p.hh.....h.....sWhh.l.........a.lPha.....lh.s..hhhl..hh.........................................hh..hh.h........................................................................................................................................................................ 1 74 105 163 +10102 PF10270 MMgT Tmem32; Tmemb_32; Membrane magnesium transporter KOGs, Finn RD, Coggill P anon KOGs (KOG3918) Family This entry represents a novel family of membrane magnesium transporters (MMgT) [1]. The proteins, MMgT1 and MMgT2, are localised to the Golgi complex and post-Golgi vesicles, including the early endosomes, suggesting that they may provide regulated pathways for Mg(2+) transport in the Golgi and post-Golgi organelles of epithelium-derived cells [1]. 21.20 21.20 21.30 22.00 21.00 20.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.34 0.72 -3.61 35 278 2009-09-21 12:36:32 2007-08-07 16:08:55 4 8 246 0 200 267 1 99.40 28 72.23 CHANGED lhhlGhlhLhHuuYSuh..pa+phh+h.......sp......shssLPhDIllEsllulllhhhGllhsstph+.lph...............htclp....................pssaspl...............psRsuFhshpp+p+tht ..................h.hhlGhlhLhHAuaSuh..pa+shh+h....................spp......phpsLPlD.IllEslluhllshhGllhss.sch+slph.................ssclp...........................ppsassl...............pscsuFhshp++t+.h.................................. 0 59 106 163 +10103 PF10271 Tmp39 Putative transmembrane protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3828) Family This is a family of conserved proteins found from worms to humans. They are putative transmembrane proteins but the function is unknown. 19.70 19.70 22.50 21.90 19.50 19.50 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.59 0.70 -5.82 11 215 2009-01-15 18:05:59 2007-08-07 16:09:16 4 3 90 0 111 180 0 321.40 44 87.45 CHANGED uSuhGhSsPPlss..T...l+Hs.IP-lshs.upLlFEhhlFhapllALhlQYlNIY+TVWWhP.Sa..s+pSLNFHLIDapLhsFIslhLuRRLlasl......lpcsspuspsohhpplhhll....s+hslLThsshoLshohlpLF+saShlsLLFLsYPFshYlslhshph-.p.st...t..................ushhc.sp-ah.hLRchh+pp...ls.t..shPsHuCs......SPshIRsEV-tLKsDFNtRhKcVLFsSlLoAYYVuFlPshFVc..........................ss.YYDhhWoCpphlhVhluuhlhLhsaLLPs+YCDLLHRAAsHLGpWQ+l-......stssssspHsWS-pslWPQGVLV+H.s+slYKAlGhaNV....AlPu-sSHhRFahhFppPLRllNlLhslEsulllYQLaSLh.poccWppslSluLlLhsNYashFKLLRDRllLu+sYuh .................................................................................................................................................................hss..s......pHs.hP-lshp.upllFEh.hFhh.llALhlpYlNIY+TVWWhP.sa..sppulNFaLID..lhhhhhlhL.uRRhlhsl.............l.ps...pts...t...ph.p.hhhhh.......h+hslh.shsthsL....s.h...shhpLapsaohlpLLaLsYP.hshYl.hhthp.-.....................................................................................ph..hlhp.....p.hcp....h.....shPsHsC.sh.....sPs.IRpEVEhL+hDFNhRhKclLhsS..........hhoAYYsuFlPhhFV...........................ss.a.aDh.hWus.phhlhV.lss.shhht.L.hP.sp...YsDl....L.H+uAhHLGpWp+l-.............s.hsss..tp.WoctshaspGslV+H.spslY+AhG..hsl....AhPussSHh....RFa.............hhF......ppPhhll..shLhhlpsullhhQlh.Lh.topcWpphlShullhFsNYhshFKLhRDhllht+hYp.............................. 0 28 37 73 +10104 PF10272 Tmpp129 Putative transmembrane protein precursor KOGs, Finn RD, Coggill PC anon KOGs (KOG3899) Family This is a family of proteins conserved from worms to humans. The proteins are purported to be transmembrane protein-precursors but the function is unknown. 21.80 21.80 22.30 23.20 20.90 21.70 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.12 0.70 -5.73 10 123 2009-01-15 18:05:59 2007-08-07 16:09:26 4 4 92 0 81 123 1 279.90 38 90.95 CHANGED hChVFPPsEFhS.AGLTVpsLhStaLGSEDhuFVpYHlRRTo...ssLLsHShLPhGY....althshhAsppshhpstpss-........sWphhhhh.....uVllsllsuslsaYWSp+sWspHPlsKsLuhauhspu...sW+AVASuINTEFRR.-KFspthsu.o+VlVT-oWllKsTsYplclApQpDl+L..........................oVscS+pH-lo..sDsspslQhlsIpVsshsPt.lcPFsIRLNus-Yt-LpE+LcsPIpssuNVslHpols-cFlEsF+upVctNssaphs...sspEhEPCluCMQspssIKltKpC.sp-p....pG.......cCQsCYCRPMWCloChuKWFASRQ..Dppc.-TWLuu+ssCPTCRA+FCIhDVChl ................................................................................................shlasPpEF.t.hGhTlpplh..utaLGpEphsFl.aHl+Ros...hslhhHohLPhuY....hhthp.hhstpp..h........t..............aphhhhh.....ulh.h.shhsshhha..h.Wp..ppWtpHPls...+sLthas..s..t...................sapsVAssINsEaRch.D+aththsu.u......pllsT-sWlhKsosY..plphApQpDspL..........................sVscucpaplo...-.....ss...slQh....lsIpVt..s.........hpst..lpsFsI..Rlsuh-atpLp-+lptPIt.s.tsl.h+.pol.-hFl-sFtp.VthNshaphs.........................t...ph-.ChuChp..ttsslKl.K.C.p.st........tG.........pCppCaCRPMWC.......lpChu+WFAuRQ..s..c.....-.hWLtu+ssCPhCRupFClLDVshl................................. 0 34 39 63 +10105 PF10273 WGG Pre-rRNA-processing protein TSR2 KOGs, Finn RD, Coggill PC anon KOGs (KOG4032) Family This entry represents the central conserved section of a family of proteins described as pre-rRNA-processing protein TSR2. The region has a distinctive WGG motif but the function is unknown. 21.90 21.90 22.10 22.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.56 0.72 -3.89 34 320 2009-01-15 18:05:59 2007-08-07 16:09:44 4 7 273 0 236 319 3 83.20 33 40.01 CHANGED hFppulshllppWsuLplAVpNs.WGGscSp-Kp-hlsstlh-hFss....................................spsh-..tp-lE-hLhphMt-EFss.slEDsSshpVApt .........FptulphhlppWsuL.plAV-Ns.WGGs.....p..S..pcK...t-alss.slh-.hFtp..................................................sp.s.sc..hp-lE-hLhphMssEFcs..lEDsSh.pVAp......................................................... 0 78 130 195 +10106 PF10274 ParcG Parkin co-regulated protein KOGs, Finn RD, Coggill PC anon KOGs (KOG3961) Family This family of proteins is transcribed anti-sense along the DNA to the Parkin gene product and the two appear to be transcribed under the same promoter. The protein has predicted alpha-helical and beta-sheet domains which suggest its function is in the ubiquitin/proteasome system [1]. Mutations in parkin are the genetic cause of early-onset and autosomal recessive juvenile parkinsonism. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.27 0.71 -4.38 21 338 2012-10-11 20:01:01 2007-08-07 16:13:17 4 16 140 0 236 318 11 152.70 38 43.48 CHANGED opFRthYsRGDlPlplpHuust....pc..ltW..+lsscpLDYc...hYLPlFF-GLpEpcaPYpFlAppGsh-LLpp............utpKIlPllPpLIlPlKsALsT+c.clhpssLclLQpLVhsushlGtALVPaYRQLLPlhNhapt..+p......hNhGDtlca.....cpppsluDlIp-TLchLEcpGGsDAaINIKYMlPTYpS ......................otFtthYp+GshPhtl.assht......pp..ltW...........cs......c..pLs...ac...hhLPlFh-GLpEhppPYp..FhAtpGhp-hLtt............uspK.llPllPpLIhPlKs...............ALsp+s.plhptsLpsLppL.........u...p..........hlG.tALlPa..aRQl....Lshhshhhs...hp....................................s...u.p.h........ptpppht-lIppTLphhE.hG.........G...s...uhh.IK.hlPTYpS............................................ 0 104 134 186 +10107 PF10275 Peptidase_C65 Otubain; Peptidase C65 Otubain KOGs, Finn RD, Coggill PC anon KOGs (KOG3991) Family This family of proteins conserved from plants to humans is a highly specific ubiquitin iso-peptidase that removes ubiquitin from proteins. The modification of cellular proteins by ubiquitin (Ub) is an important event that underlies protein stability and function in eukaryote being a dynamic and reversible process. Otubain carries several key conserved domains: (i) the OTU (ovarian tumour domain) in which there is an active cysteine protease triad (ii) a nuclear localisation signal, (iii) a Ub interaction motif (UIM)-like motif phi-xx-A-xxxs-xx-Ac (where phi indicates an aromatic amino acid, x indicates any amino acid and Ac indicates an acidic amino acid), (iv) a Ub-associated (UBA)-like domain and (v) the LxxLL motif. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.95 0.70 -5.02 33 450 2012-10-10 12:56:15 2007-08-07 16:16:48 4 8 244 24 284 557 7 211.80 28 59.13 CHANGED pttsphPh.luph....tshssLtcEYspt.sssahpKh.........ptLppp..YsthRpsRGDGNCFaRAhsFuYlEhLLppps......chsc.hhpplp..phpppLh.....thGhschhhcDFh-thhpllcplpstspts.......pllptasctstSs..........llhahR..hlsSualpppsctapsFl.t.........holcpaCppplEshtpEuDcltItALupuL....slslcVhYlD..............................ps....ssspsspash..................p....................ttshI............hLLYRPGHYDILY. ............................................................................h.lut..........l.t-a.t....s.......hh.tKh..................ptLtpp....ashhRps+sDGNCFaRAhhauahEtLlpptc......................chpc.hhthh.t...p.pptl............................thGa...tph.h..h.csF.hpthhpllppl...tpttp.s.................pLhp.h..Fs.cp.s.h.Ssh.............................llhahR...LlTSualppps..c...h......a.p...Flps...................hslcp..aCp...ppV...-shtp....Es..Dcltl..hALspAL.............slslpl...Y.hD..............................ps...........tssths.a.hs...............................ts................................ststl............hLLY+P.G.HYDILY.................................................................................................. 2 105 150 216 +10108 PF10276 zf-CHCC Zinc-finger domain KOGs, Finn RD, Coggill PC anon KOGs (KOG3456) Domain This is a short zinc-finger domain conserved from fungi to humans. It is Cx8Hx14Cx2C. 21.20 21.20 21.20 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.29 0.72 -4.11 37 906 2012-10-03 19:45:42 2007-08-07 16:22:25 4 3 865 3 386 717 277 40.00 35 42.20 CHANGED ss+plsC-G..........us.....ss......hGHP+Valslsppst...hhCsYCup+ah ...................................h.CsG..........st.....ss......huHP+Valslsc.psp....stCsYCGphah...... 1 115 224 304 +10109 PF10277 Frag1 Frag1/DRAM/Sfk1 family KOGs, Finn RD, Coggill PC, Bateman A, Wood V anon KOGs (KOG3979) & KOGs (KOG4320) & Pfam-B_15139 (release 21.0) Family This family includes Frag1, DRAM and Sfk1 proteins. Frag1 (FGF receptor activating protein 1) is a protein that is conserved from fungi to humans. There are four potential iso-prenylation sites throughout the peptide, viz CILW, CIIW and CIGL. Frag1 is a membrane-spanning protein that is ubiquitously expressed in adult tissues suggesting an important cellular function [1]. Dram is a family of proteins conserved from nematodes to humans with six hydrophobic transmembrane regions and an Endoplasmic Reticulum signal peptide. It is a lysosomal protein that induces macro-autophagy as an effector of p53-mediated death, where p53 is the tumour-suppressor gene that is frequently mutated in cancer. Expression of Dram is stress-induced [2]. This region is also part of a family of small plasma membrane proteins, referred to as Sfk1, that may act together with or upstream of Stt4p to generate normal levels of the essential phospholipid PI4P, thus allowing proper localisation of Stt4p to the actin cytoskeleton [3-4]. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.49 0.70 -4.86 89 980 2012-10-01 21:22:51 2007-08-07 16:52:53 4 16 284 0 645 961 31 204.50 18 57.40 CHANGED shth....lslhtslhshsshlhshhhuhh..p.hhphs...........................hP.ISs..hGshsPcptlaphhhslsuhhthhlhhhtahh...............................................hhthhppt.....................lshhuhhhGllss.lulshluhh......sssptt.slHhhhhhhahshshlahhhpshh.hhht............................................ttphshth+hhhshhshshhlsh.................hhhhhhpphh........................hhshsAhhEWhlshhhhhah.hohhhDFtsh ..........................................................t...hlshhhshhshsshhhs.......hhhuhh..p..hh.p..s...........................................................................hP...l......Ss..hG....shs.P.pphlathhhshsuh.h...........t.hhhhhhhath.....................................................................hht..h...pp.t........................lshhshhhuh.lss...hul.hhlu.h.....................................ppsp......h.....s..........l...H....h.hh.hh..hhssshhahh..hpshl.hhht.............................................tth..hs.hhh+...h...h..hhhht.hh..shlhh...........................................h.h.hh..httt...............................................hhshhAhhEWhlshh.hhah.hohhh-Ft..h...................................................................................................... 0 196 297 503 +10110 PF10278 Med19 Mediator of RNA pol II transcription subunit 19 KOGs, Finn RD, Coggill PC anon KOGs (KOG4043) Family Med19 represents a family of conserved proteins which are members of the multi-protein co-activator Mediator complex. Mediator is required for activation of RNA polymerase II transcription by DNA binding transactivators [1]. 25.00 25.00 26.20 26.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.35 0.71 -4.60 7 119 2009-01-15 18:05:59 2007-08-07 16:53:34 4 5 92 0 79 112 0 161.60 53 58.81 CHANGED uPFYLh+.E.Ps.sELTGspNLlscYsLEpuasKFsG.KKVKEpLSsFLPcLPGhIDhsGo..-NSSLRSlIEKPPIsGpph.PLTus.LsGFRLHsGPLPEQaRhh...+hp.Pp+KpKpKHKpp+sp-...shs............tpossDo......-+K+KKpK.+-DD.-RKK+KK-KK+KKpR+oP-p.ssG. ...................................................tPFYLh+..E.P...u.p.sELTGuTNLloaYsLEcuY...sK.FCG....KKl.KEcL..S..sFLPcLP..GhIDhPGsp.D.NSSLRSLIEKP.....PI.hG..pshsP.lTushL....s.GFRLH...s.....GP...LPEQ..hRhh.........................chp.Pp+K..p..KpK...HKpp+ppD....sss...................p.ost.Ds.......-.+.K+KKpK...+c-D..-..RK++KK...-..KK+..KKp.+cosc..s...s......................................... 0 25 31 58 +10111 PF10279 Latarcin Latarcin precursor Bateman A anon Bateman A Family This family represents the precursor proteins for a number of short antimicrobial peptides called Latarcins. Latarcins were discovered in the venom of the spider Lachesana tarabaevi [1]. Latarcins are likely to adopt amphipathic alpha-helical structure in the plasma membrane. 25.00 25.00 25.20 33.40 24.90 22.60 hmmbuild --amino -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.80 0.72 -3.79 7 14 2009-01-15 18:05:59 2007-08-08 11:04:13 4 3 1 2 0 16 0 66.10 32 75.41 CHANGED TuYsVsE-LEs.ELD-L-............................ttuh.......................................................................................LtchsEsLc.l-Dh.p.EEARu.hhthhKcchKchhpphhc+h .puYsVsE-l-spEL--hpttuh...............................Lt-hsEpLctlpsh.p.EEARu...t.h+chh+phhtthhph.. 0 0 0 0 +10112 PF10280 Med11 HSPC296_Med11; Mediator complex protein KOGs, Finn RD, Coggill P anon KOGs (KOG4507) Family Mediator is a large, modular protein complex that is conserved from yeast to human and conveys regulatory signals from DNA-binding transcription factors to RNA polymerase II. Not only are the polypeptides conserved but the structural organisation is also largely conserved. One or two subunits are either fungal or vertebral specific but Med11 is one of the subunits that is conserved from fungi to humans [2]. Med11 appears to be necessary for the full and successful assembly of the core head sub-region [4]. 25.20 25.20 25.40 27.30 25.00 25.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.40 0.71 -4.05 25 234 2009-01-15 18:05:59 2007-08-09 09:12:30 4 2 208 15 169 229 0 118.80 24 78.35 CHANGED sp-RLcuLs-IDpcIsplLppAupslppLup.pp........................ts+pthpppsppFhpoLspVsspLR+pIphL--ssss.scptssh....................................................................tsLsls.lsp+psshs+c+LpEhhpp.hcphls .......h.t-RlppLscI-+.....cIsp.lLppAupsltpLuppps.......................................pp..pthcpp...sppFh.pslp.p.l-scLpcQIphLscsshhpscpsssh................................................................................sshcs.s.hsh+p.hphs+p+hs-lhpp.hpph..................................................... 0 42 81 131 +10113 PF10281 Ish1 Putative stress-responsive nuclear envelope protein Wood V, Coggill PC anon Pfam-B_11056 (release 21.0) Repeat This family of proteins found in fungi is a putative stress-responsive nuclear envelope protein Ish1 [1]. 24.10 24.10 24.10 24.20 24.00 24.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.60 0.72 -3.86 71 642 2009-01-15 18:05:59 2007-08-09 14:52:20 4 9 141 0 466 616 0 37.40 36 30.74 CHANGED acsWos..scL+paLcp....aGl.sspst......s+-cLlpts+csh .....a-sWSc..ocLKpaLcp....pGlssspss......pR-cLlsts+cp............... 0 129 251 391 +10114 PF10282 Lactonase DUF2394; Muc_lac_enz; Lactonase, 7-bladed beta-propeller Wood V, Coggill PC anon Pfam-B_1372 (release 21.0) Family This entry contains bacterial 6-phosphogluconolactonases (6PGL)YbhE-type (EC:3.1.1.31) which hydrolyse 6-phosphogluconolactone to 6-phosphogluconate. The entry also contains the fungal muconate lactonising enzyme carboxy-cis,cis-muconate cyclase (EC:5.5.1.5) and muconate cycloisomerase (EC:5.5.1.1), which convert cis,cis-muconates to muconolactones and vice versa as part of the microbial beta-ketoadipate pathway.\ \ Structures of proteins in this family have revealed a 7-bladed beta-propeller fold [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.00 0.70 -5.37 81 4118 2012-10-05 17:30:43 2007-08-09 15:02:03 4 86 2565 28 1191 3490 508 268.50 22 71.36 CHANGED phalGoYTp..............ttupGIathpl...sspoGpLsshph.ss.....phssPoaLsh..ptssphLYulscts.......spGulsuaplsspp..upLphls......ph.s..tGssPsalulstssphlhsANYt.......uGslsVaslssc.Gslttssp.hhppp......Gs..GPp............pcQcssHsH.sshoPcscallssDLGsDclhhYch...sssst....Lst...hsph.pstsGuGPRHlsFps...s...................sphAYllsELuupVslh..pYs..tsG..ph.pplpslsslPt..s.hsup.....................s...tuAuI+losDG+FLYsSNRG...................psSIulFslstssGpLphlpthsotGph..PRcFsl.sssGcaLlsusQcSss.lslapRDspoGpLs.hsps.htsspPs.................sVhF .........................................................................................................................h...........................ttl..h.h.......t....t..t...t..ht......t...h................t.s.t....h.....h...h...........t..........t...t.h...l.ash..t.....................th..sh.t.ht..............t...hs..................t..................st......s.a.l..s..h...t....t..t.p..h...lh.....s.....u.s...Yt........................tu..h...h.h........htt...u...............................................................................................................s...ts....H..h....h.sP....s....tp..hl.h.s....s....s....L......t................D.p......lhh...ap.h....sttt.............h............h...th.......G.s....G.......PRHh...s.Fps.........s...................................................tph.hYl.h..s..E...l...s.....ss....lslh...........phs.......ps...............ph....p...h...p.p...hs...hhst......s..hsst.................................................p......tsu.s.l..p..l.....o.....s.....D.....G.....+.....a.....L.Y.s..us.Rs............................................tss...l...u..l..a....s......l............t....s...s........s.............p...............L....p.....h...l.....t..h..........h.........s......o.....t.....G..p......................P........R.........s......h.....s...l......s........s......s..............u.....c.....hL.h.....s......ss.p...p..o................s...s....ls..la..p..h..s.....pp.G.....lt.................ts............................................................................................................................................ 0 394 731 968 +10115 PF10283 zf-CCHH Zinc-finger (CX5CX6HX5H) motif Wood V, Coggill PC anon Pfam-B_93850 (release 21.0) Domain This domain is a zinc-finger motif that in humans is part of the APLF, aprataxin- and PNK-like forkead association domain-containing protein. The ZnF is highly conserved both in primary sequence and in the spacing between the putative zinc coordinating residues and is configured CX5CX6HX5H. Many of the proteins containing the APLF-like ZnF are involved in DNA strand break repair and/or contain domains implicated in DNA metabolism. 25.10 25.10 26.40 26.20 25.00 24.50 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -6.99 0.72 -4.10 22 261 2009-01-15 18:05:59 2007-08-09 16:02:15 4 30 96 6 185 282 0 25.50 48 7.07 CHANGED PcC.YGspCYR+NPtHhpcasHssps ...pC.YGspCY..R.KNPtHhpcasHPs....... 0 82 94 142 +10116 PF10284 Luciferase_3H Luciferase helical bundle domain Bateman A anon Bateman A Domain This domain is found associated with the the catalytic domain of dinoflagellate luciferase . Luciferase is involved in catalysing the light emitting reaction in bioluminescence. The structure of this domain has been solved [1]. This domain has a three helix bundle structure that holds four important histidines that are thought to play a role in the pH regulation of the enzyme. 25.00 25.00 26.60 58.10 18.30 16.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.38 0.72 -4.02 7 51 2009-01-15 18:05:59 2007-08-09 16:29:47 4 3 14 1 0 49 0 65.80 70 21.82 CHANGED CEK.GFEsusssKGGALsAtpVE+hG.-sFKsGLHpPpFHs-GLHKPhEuGGKsYETGFHYLLEsHE CEKTGLEuGGsu+GGALNAAQVtHLGc-sFKsGLHKPcacsEGLHKPHThGGKTY-TGFHYLLEAHE 0 0 0 0 +10117 PF10285 Luciferase_cat Luciferase catalytic domain Bateman A anon Bateman A Domain This domain is the catalytic domain of dinoflagellate luciferase . Luciferase is involved in catalysing the light emitting reaction in bioluminescence. The structure of this domain has been solved [1]. The core part of the domain is a 10 stranded beta barrel that is structurally similar to lipocalins and FABP [1]. 25.00 25.00 46.90 46.40 20.40 19.70 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.82 0.70 -5.25 3 71 2009-01-15 18:05:59 2007-08-09 16:40:55 4 4 18 1 0 69 0 174.50 64 66.51 CHANGED PLCKDPYGPEVpuLVEsLL+EApDD+TLCF-NFpcPCPQLTKEQVAhCKGFDYGDKTLKLPCGPLPWPAGLPEPGYVPKTNPLHGRWITVSGGQAAFIKEAIKSGMLGAAEAHKIMADTDHHQTGGMYLRINQaGDVCTVDASVAKFARAKRTWKSGHYFYEPLVSGGNLLGVWVLPEEYRKIGFFWEMESG+CFRIERRAFpsGPYMFLRQATEVAGKISFVFYVKVSNDPGSKPIPLQSRDYTALAGpDNVPDNLGKPYsCTAKDLDYPpKRDuWLDpNKcpMLcQR-lVuoAF ...........PLCtDPautElpslspsLLp-AppD+TLsasNFpDPCPpLTKcQVt........sKthpLPCGsLPWPAGhP.PsYVPKTsPLpGRWITlSGGQttFIKpuIcsGMLGtuEApKIhADTDHcpTGGMaLRINQhG-.CTVDASVAKaARAKRTW+SGHYFYEPLVsGup..GVhsl.EEY+pIG................................................................................................................... 0 0 0 0 +10119 PF10287 DUF2401 Putative TOS1-like glycosyl hydrolase (DUF2401) Wood V, Coggill PC, Bateman A anon Pfam-B_11571 (release 21.0) Domain This family of proteins is conserved in fungi. One member is annotated putatively as OPEL, a house-keeping protein, but this could not be confirmed. It contains 5 highly conserved cysteines two of which form a characteristic CGC sequence motif. It has recently been shown that this family is related to known glycosyl hydrolases [1]. 21.50 21.50 22.70 21.60 21.10 20.10 hmmbuild --amino -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.70 0.70 -4.88 39 173 2012-10-02 19:29:29 2007-08-10 10:30:32 4 7 100 0 120 172 22 219.60 41 50.78 CHANGED uusWsRsuYY...osuou-NlTFLNptGu.tuSGsaSssaGsSLSYAsocGsuuASSspsLs.-sh.............lsSspEasIaSsppCsss....sCGaYRpG.IPA.....YHGFuGssKhFlFEFpMPs.sssssu..hshDMPAIWLLNA+IPRThQY....ssssCSCWp....oGCGEhDlFElLsuusc...+LhSplHshQG.ss....ssGuGsssYFpRPTs..uohKuuVlFsusss....lplltl.s-shsFssolsussVpsWl ..................................................usWsRhuYY.ps.ssuos-NloFLNptGu..ttSGsho.sshGsuLS.YA..suDG..s...uuAuospsLs.ssh................lsSspEahIhSsppCsss.....sC.GaaRtG.hsA.....YHGFsGssKhFlFEFpMPp.spssou.........hs.DMPAIWlLNA+IPRThQY....spss..CSCWt........oGCGEhDlFElL....sussp...+hhSphHshQ........thusGspsYFpRPos..uohK.suVlFsusss.....lplltl.s-sh.sFs.s.olssssVsph........................................... 0 25 66 105 +10120 PF10288 DUF2392 Protein of unknown function (DUF2392) Wood V, Coggill PC anon Pfam-B_10085 (release 21.0) Family This is a family of proteins conserved from plants to humans. The function is not known. It carries a characteristic GRG sequence motif. 27.30 27.30 28.40 28.00 27.20 27.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.22 0.72 -3.71 35 268 2009-01-15 18:05:59 2007-08-10 11:46:09 4 8 242 0 195 274 0 104.40 28 23.31 CHANGED LAsclLutlsKGRGuulshpls.sssp......tclphlhPLRDlltpElphYsphttls.h.hs...hhhp................tpphscshoIpcLsppalpsLpps.aPSssSTVlRTusKL ....................LAhchlstlshGRGttlshp.....ss.sspp.......................tslhllhPlRDhhtpElshYsp.ht.s..l...phhhs....hh.t....................................................psscstSIpcLhppalpslppp.aPusluTVhRTusKL.................... 0 57 100 160 +10122 PF10290 DUF2403 Glycine-rich protein domain (DUF2403) Wood V, Coggill P anon Pfam-B_11570 (release 21.0) Family This domain is found in the N-terminal region of members of DUF2401 Pfam:PF10287. The function of this glycine-rich region is unknown. 20.90 20.90 21.70 22.20 20.80 20.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.13 0.72 -3.82 26 164 2009-01-15 18:05:59 2007-08-10 15:40:59 4 7 97 0 113 155 1 64.50 42 14.41 CHANGED scplpas....NVG..aSGoYpsVopMspto....C...sC..spsshpFSGslAPlsEcLSVHFRGPLpLpQFAVYhPs ......................ptl.Ys....NlG..hoGoYpcVosMppts........s.....sC...sppshsa..SGsluPLsE......E..LSlHFRGPlpLhpFusYhs............ 0 23 62 99 +10123 PF10291 muHD SAFF; Muniscin C-terminal mu homology domain Yang H, Coggill P anon Yang H, Pfam-B_7632 (release 21.0) Domain The muniscins are a family of endocytic adaptors that is conserved from yeast to humans.This C-terminal domain is structurally similar to mu homology domains, and is the region of the muniscin proteins involved in the interactions with the endocytic adaptor-scaffold proteins Ede1-eps15. This interaction influences muniscin localisation. The muniscins provide a combined adaptor-membrane-tubulation activity that is important for regulating endocytosis. 25.00 25.00 25.10 25.40 24.60 24.90 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.99 0.70 -5.20 32 429 2012-10-02 01:13:52 2007-08-10 17:13:25 4 5 231 1 244 384 2 258.90 34 33.14 CHANGED .GLsASlsEslsAhF+sGtlsc.............splsGElALsYsus.sss.......s..........tshslRlsNhpplEKVhsNppal..........pppss.......sc...aplshstlsspo.hsu.............hKYpl+hp.......s.sPlhlpssW+hEsppsulhlpYphNs....uhsss.....lsLpNlslsVsl-su..............psTus.o.cPp..usas+EppplsW+hs-.lslssps...t+LlARF.ssssss.........p.usVps+Fphc.ssshpt.......lslshh..............t...P....................s..ap.s.spRplsuG.p..............Yhup ..........................................................hslssuhsEsVsAaF+susso+.............l+ls..GEhsloFPuuhsphhss............sP...........ssLsFR.l.........p.Nhu.p..LE+..lhPNspLlhp........sspss...............sp..........Fhl.NhssLtst.LppsuptpPtu.................................aaNlshLKYQlp....sps................pssPLpLsshW+s.csspTslpl-Y+hNsp......uhsss...............ssLpNlpllVsl..-Gu...............sTshp.u.hPs...AsW..stEpp+lhW+ls................-...lo.pucst....GpLh...ARFphspuss..........pPusl..s....spFssc.....u...so..LSG.........hclpLs......................................................Gssa+h..s.hl+++hssG...+Yhu.s............................................................................................ 0 56 99 180 +10124 PF10292 7TM_GPCR_Srab Srab; Serpentine type 7TM GPCR receptor class ab chemoreceptor Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srab is part of the Sra superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. The expression pattern of the srab genes is biologically intriguing. Of the six promoters successfully expressed in transgenic organisms, one was exclusively expressed in the tail phasmid neurons, two were exclusively expressed in a head amphid neuron, and two were expressed both in the head and tail neurons as well as a limited number of other cells [3]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.13 0.70 -5.74 17 186 2012-10-03 04:04:29 2007-08-10 17:28:29 4 4 10 0 183 384 0 249.20 17 86.59 CHANGED ppsCphMtpluoStaL+hoLhhpLllsllulPlhlhu...laphppspLFHhNh+lIhphHhhuhhlHshuRlhLHuhDLaNYhs....hssC-hhPshhRChllRhhYshGhalsssTslsLlIERhlAThpuspYEpptphhGlllshhQlsluhhhhhhhapphsF....sshh.YChshpsu...sh.hplshhlslhsQlluhlhFtaLh+hNc+lRs..tphpuoLSsRYQLEQNlpShpsL+hFushshlFlhhphhhhhhlhhhutphppspYhAllElstshPlYullhhhhlh+p.+c.+schppsLpsphphstsp...YF-papc...pls ..............................................................................................................................h..hhhs.hh.s.h....h.hh....hh......t.h.h..H.N.hphl.h..hhhh......h....h.lh..sh.................h....h...h...............h........h.h....hh.............p.s.p.h.h........t..h...............hC..........h.hhRh.h.hu..........h.h.h.......s.......th.o...hh..h..l.....h.....lERh..............lAThh...pp.Y..E................p.p.t..t....h.h...G...h.hl................s.h...............hp..hh.l.uh.h.h.h.h...h.hh..t...t.h.............t.hh...a....C....h.....................t..s..........h.....h....hh..h.h..h..hh.p..l.hs.h...h.h..h.........h.........h..l.h....t.h.N.p..................p....h....p....................t...sLop........+......aQ......lppNh..............p.shp.h.l...h........h.h..hp......h..hhh..h.....hh...h.h.h..h........h........................................h......................................................s...........h...h..................h......................................................................................................................................................................................... 0 64 90 183 +10125 PF10293 DUF2405 Domain of unknown function (DUF2405) Wood V, Coggill PC anon Pfam-B_12420 (release 21.0) Family This is a conserved region of a family of proteins conserved in fungi. The function is unknown. 20.20 20.20 20.30 20.20 19.90 20.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.75 0.71 -4.49 32 202 2009-01-15 18:05:59 2007-08-13 10:57:13 4 25 165 0 156 211 0 159.50 26 6.23 CHANGED hpschlRlhscuPplcp..hWsRllslpsh+lshc..phcpp....................lslps-sl+lslPHpalhaplhDNlssshKulKQLptph+s.....sssch....lhs+p..pstplP+lsl+octlhaplEDDPFEpcLuhIYclGllEQ+pRLp+.ptFct+spclppsspppt ........................................................................................................h...p.hRhhstsPthpt...hW..scllslpshclshc.tptpts.............................t.hslss-sl+ltlPachhha.clhDN.hlshhKulKpLhapaKs........ss.ch.......lhspt..tPptlP.clsl+s+phhh-lEDDsFEh+LuhIYclGl.EQ+pRLtR.ptFctKlpclpptp...p.......... 1 42 83 139 +10126 PF10294 Methyltransf_16 Putative methyltransferase Wood V, Bateman A anon Pfam-B_19672 (Release 21.0) Family \N 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.08 0.71 -4.81 15 2584 2012-10-10 17:06:42 2007-08-13 15:02:31 4 72 442 2 1819 5419 1399 156.00 21 50.49 CHANGED hhcss.LpIpE-sus.ulutplWDAulshshYLtpt..tt............tspphsshslLELGSGsGlVGlulAth....h.ssssVhlTDl--.hh-lhcpNIpl..t...hlssclpscsLcWG-.cL.s-hhssp.hDLILsADClYhEcuhssLlpoLpcLssssss..lLhuaK+R.....+cu-ppFFphlcct ......................................................................ht...........................h...G.h.h.l.W.s....uuh.....hL....u...p.......a.l.p................................................................t.t....h.psp.p.l.l.EL.....G..uG.s.G......L..s..u.ls.suhh.................................ss.p.V..h...h.T..D.......h....s...p........h.....l..p....................l...p...t..........N.l....ph...N....t............................t...s....p.......l.......p............s........t...............t....L..........p.......W............s.......p............................h...............................................p...............h..............................t..............................t...........................h..........D.............l...........l..........l...u...u....D....s....l...........Y.....................p.......h.....h.............Lh...p...s...l...p...t.h...h......t.........t.t..................h.hh.s.........t........................................t........................................................................................ 1 693 1089 1500 +10127 PF10295 DUF2406 Uncharacterised protein (DUF2406) Wood V, Coggill PC anon Pfam-B_13850 (release 21.0) Family This is a family of small proteins conserved in fungi. The function is not known. 21.10 21.10 22.90 53.60 17.60 16.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.61 0.72 -3.46 18 139 2009-01-15 18:05:59 2007-08-14 10:52:48 4 1 105 0 94 127 0 69.80 52 25.28 CHANGED AlpEAQPappAh.......htpppstphpstpc............+DhaGpsIs................................................pPDhSNPTRsR.ERPLDTIRuFEhAIsG .......................................AlpEAQPh.ppAh..................pppht.hps.p+..................+DhaGpsI..s................................................pPDlSNPTRsR.ERPLDTIRuFEhAIsG... 0 13 43 76 +10128 PF10296 DUF2404 Putative integral membrane protein conserved region (DUF2404) Wood V, Coggill PC anon Pfam-B_12178 (release 21.0) Family This domain is conserved from plants to humans. The function is not known. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.82 0.72 -3.85 32 588 2009-01-15 18:05:59 2007-08-14 14:30:48 4 15 262 0 414 579 1 90.30 24 12.53 CHANGED NAllGRlFhshh+opthpshltpKIpKKLs+l+...pPsFl.scltlpclDhGsssPhlosspl.plsscGphhhs.............hslpYpG.......shplplpTphsl ............................Nslluphhhphhpss.h.h.....ps....hlppplpcpLs.clp.....hPsal..sclpls-lshGsssPhl..pss.c.l...h..t....l......s........t....p.G........hhhc...............................h-lpYsG.......shplslpophp........................................... 0 130 232 350 +10129 PF10297 Hap4_Hap_bind Minimal binding motif of Hap4 for binding to Hap2/3/5 Wood V, Coggill P anon Manual Motif In Saccharomyces cerevisiae, the haem-activated protein complex Hap2/3/4/5 plays a major role in the transcription of genes involved in respiration [3]. Hap4_Hap_bind is the essential domain of Hap4 which allows it to associate with Hap2, Hap3 and Hap5 to form the Hap complex [2]. 21.10 21.10 21.50 21.80 20.60 20.40 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.05 0.73 -6.22 0.73 -3.92 11 175 2009-09-10 17:07:33 2007-08-14 17:00:41 4 3 128 0 121 171 0 16.90 77 3.20 CHANGED sSKcWVLPPRPKPGRKP ...TSK-WVlPPRPKPGRKP... 0 29 67 104 +10130 PF10298 WhiA_N WhiA N-terminal LAGLIDADG-like domain Mistry J, Bateman A anon Manual Domain This domain is found at the N terminal of sporulation factor WhiA. This domain is related to the LAGLIDADG Homing endonuclease domain while the C terminal domain of WhiA is predicted to be a DNA binding helix-turn-helix domain [2]. 23.40 23.40 23.40 23.40 23.10 23.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.47 0.72 -4.14 74 1805 2012-10-03 01:41:40 2007-08-14 17:02:56 4 4 1794 3 299 986 93 85.90 33 27.65 CHANGED pssppA...ELuAllRhsGuLpls..spp.lslclpT-ssulARRlapLl+cha.slpsElhVppptpL+K.sshYlVpltps..spplLccl.sll ................t..pt+AELuAllRhsGuLsls.....sp.......p....lslslpTENuslARRlaphlcchY..slpsElhV+p.+hp.L+K..NshYl.VRlspp....spclLp-Ltlh............. 0 110 208 263 +10132 PF10300 DUF3808 Deme6; IML2; Protein of unknown function (DUF3808) Wood V, Coggill PC anon Pfam-B_15386 (release 21.0) Family This is a family of proteins conserved from fungi to humans. Members of this family also carry a TPR_2 domain Pfam:PF07719 at their C-terminus. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.47 0.70 -5.93 20 568 2012-10-11 20:01:01 2007-08-15 15:32:33 4 21 235 0 370 612 25 413.60 24 75.08 CHANGED hpAlsLhLsschp-AhshLc...h...sspShaH.uLGtusltalpAhlTFEscchppAsssL+cAppssp+.Rc+uphhs.............t..h....phht..thctplCYAEshLh+AsLsF.hsEshlshIKGuhKlR+uYphYc-hhphhpth....................................................................................................shtps.........psccalcuGVphGhGlFpLhlSLlPspll+LLphlGFpGsR-hGLptLhc.uup.scsl+usLusLsLLhYashlp.hhuh.............................tphshccs-plLt.hpppaPpuulaLFacuRlptlpGpl-pAlphhcpsh..tsppchKQl+pLChaElhashsappcacpAhphhhhLhcpScWS+AhYsYhpushhshhscp............-ttstt.-ptsplhcplssLtt+hth..KslPhE+FshRKsp+apup..p............shhssPshEhhYhWNGashhuccth..puhhphh.p...............p.pp........D-ps.....lhpLL+GlsL+pL ...............................................................................................................................................uhthhhssphptu.thhp................................tpps.............hh...s........hshuhl.hhp.Ahh.sF..-..................pphp..Ahpthpps...phst.t..pcpsphhp.....................................t...h.......tth.h.....t.hchplshA-s.l.p.Ahlsh...pps..h..hthlKuhhplRpuathh..p..p..hhp.lpt............................................................................................................................................................................................................................p...hhcuGlphshGhhpLhlShl.Psplh+llphlGFp.G...........s+phGLp.L.....h.....p...u...............s...............p..tpsh+usls.slhLLh..aashh..hhsh.........................................................................................thshptscplL...t.hhttaP......p.us...la...hh.puRhthhcsp....lp...pAlphhp.psh.....p.........................p.p......p......h+................Q..........h..p..t...ls.h.a.E.hhhs..hh.hpapt...A...h...h.....h.Lh.p....s.p.W..S..+.......u.hYhYh......tu.shh..hhc.........................................................cph.plhp.ps............ssh........h.......t.+hth........+sh.Ph-pFsh.+Ksp........+a.tpt..............................hhh..shhEhhYhWsuhthhstp.......shh.h............................................................................Dp.s..........lh.lLhuhhh+.................................................................................................. 0 129 181 279 +10134 PF10302 DUF2407 DUF2407 ubiquitin-like domain Wood V, Coggill PC, Bateman A anon Pfam-B_17915 (release 21.0) Domain This is a family of proteins found in fungi. The function is not known. This domain is related to the ubiquitin domain. 25.40 25.40 25.40 25.50 25.30 25.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.50 0.72 -3.80 46 152 2012-10-03 10:59:06 2007-08-15 16:09:52 4 4 135 0 111 189 0 115.20 28 33.54 CHANGED hhlsIRFos......slPD...l.LsIs....s.ss.sTsstLKp...........................................hIRpcl..........sp.......hss+RLRLIa.......sG+h.Ls.-ssslssplphs........................................................ptpupts.p.................+h.YlHCslG-. ...................h.hlsIRFos......ulPD...l.LsIs......sssp..sTsttLKp...........................................hIRppl.......................................s.p.......ssp+RLRLIa.......sG+l.Ls.Dsoslusplph.....................................................................t..p.ptpupss.t....................................phYlHCslG-................................................................................................................. 0 27 58 92 +10135 PF10303 DUF2408 Protein of unknown function (DUF2408) Wood V, Coggill PC anon Pfam-B_16841 (release 21.0) Family This is a family of proteins conserved in fungi. The function is unknown. 25.00 25.00 45.80 25.00 24.10 24.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.89 0.71 -4.04 22 259 2009-01-15 18:05:59 2007-08-15 16:25:05 4 4 128 0 202 255 0 114.90 31 47.36 CHANGED plcplss-LtshapcLlsIRRslhshsopsKhs.....................................ssclppLpppLccl-spR..hDG...cFhus-.....sphlps.QsllsGLLDcChphhpDl.tccs......................pls.phpshYcpLl-I+sp.LEpLhlT+RWTLRE ................................s..-.lotsL.PlappL.ol+RsLhplpppuths.......................................................................spELhshphcLppIDshR....sDG.............KFhsss.............tph..t.QuhlssLLscCaclsp-lptptt.................................................................................................... 0 58 117 174 +10136 PF10304 DUF2411 Domain of unknown function (DUF2411) Wood V, Coggill PC anon Pfam-B_15078 (release 21.0) Domain This is a 38 residue domain that is found in proteins at the extreme C-terminal end of some HEAT repeats Pfam: PF02985. the function of this domain is not known. 20.10 20.10 20.10 20.10 20.00 19.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.46 0.72 -4.31 21 177 2009-01-15 18:05:59 2007-08-15 16:51:16 4 5 166 0 127 191 0 35.70 31 3.74 CHANGED pslpclhcsLcaVtppDs.DslV+tpspssL-pL-sh ..........l.clh+lL+aVtsp-s..Dsll+tHAphsLEpL-s.... 0 28 59 100 +10137 PF10305 Fmp27_SW RNA pol II promoter Fmp27 protein domain Wood V, Coggill PC anon Pfam-B_15444 (release 21.0) Domain Fmp27_SW is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation [1]. It contains characteristic SW and GKG sequence motifs. 25.00 25.00 26.00 27.30 24.90 24.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.00 0.72 -3.59 31 159 2009-01-15 18:05:59 2007-08-15 16:57:59 4 12 125 0 123 169 0 102.10 32 3.74 CHANGED pAhp+LpcphSpSWlp+lcthcphpppphpchpphhaGtcp.htpshs.scsllshsppPsLhshlhc-lDlsls+PpFslcclscFlachGKGhP+spcYSlL ....Ahp+Lpcp.upSWhcRlcpthphppsphcchp..........ph.............hhGssc...sspshp....p..sc..pll..sh.sppPsLhshllp-lclslsKP.S.Fslcc..lPcFlHclGKGhPcDhcYoLL.. 0 30 68 110 +10138 PF10306 FLILHELTA Hypothetical protein FLILHELTA Wood V, Coggill PC anon Pfam-B_18082 (release 21.0) Family This is a family of conserved proteins found in fungi. It contains a characteristic FL(I)LHE(L)TA sequence motif, where the bracketed residues are I, L or V. The function is not known. 25.00 25.00 32.00 30.70 24.20 21.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.02 0.72 -3.94 17 117 2009-01-15 18:05:59 2007-08-15 17:09:20 4 3 115 0 85 112 0 89.50 34 37.06 CHANGED +lLpRl...P+Fh+.asp.hhsuPhoalsSFLILHElTAIlPLhulWahFap..........................hshhs.htlPsa.......hlscuh............chhcchhp+h.Gathhstpc .................l.sRL...P+Fh+.Ysp.hhsuPl.oalsSFLILHElTAIlPLhuLahhFHh..........................................hsh...hs...hhlssh........hlscGs....................cpht+hhc+h..Gaht.t...ht..................................... 0 19 46 71 +10139 PF10307 DUF2410 Hypothetical protein (DUF2410) Wood V, Coggill PC anon Pfam-B_19378 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known.There are two characteristic sequence motifs, GGWW and TGR. 22.10 22.10 26.30 22.30 21.30 21.20 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.30 0.71 -4.94 20 120 2009-09-10 23:56:53 2007-08-15 17:10:28 4 1 115 0 88 119 6 193.30 40 37.24 CHANGED pSPLPNPpLWsusTIGhLpu.-sFuN.GGWWaDsclLuA..TG-GhthEcs+AWcGWWNEcIVcLlcLShcpKD..uLoVLLTGRuEssFucLIcRh....lcS+pLs...............FDhlCLKPp.sstspcasoThcFKpsFLc-Ll.pTYp.ps-EIRIYEDRl+HlKuFRcFFcphscp.........sRpslss-VIpVs-tspYLsPlsEsApVpchINsHNtsl .............oPhPN...spLWsssslG.h.Lps.pshss.G.G..WWpDsphLuA.....ospuh.ch.c.cs+u.......hcu.............WWN-pIVpLl.chShppKD..........sLo........V...LLTGRsEssFsclIc+h..........lpS+tLp...............FDhlsLKPphs.....................tspcapoTMcFKQpFLccLl.pTYc..pucEI+lYEDRl+Hl+uFRcFFpphNpp...............sR.tsl.su-VIpVs-hsphL.sPlhEhApVpcMIspHNtth................................ 0 21 45 73 +10141 PF10309 DUF2414 Protein of unknown function (DUF2414) Wood V, Coggill PC anon Pfam-B_22455 (release 21.0) Family This is a family of proteins conserved from fungi to mammals. One mouse member is referred to as ELG protein but this is not a homologue of human ELG protein. The function is not known. 28.90 28.90 28.90 29.00 28.50 28.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.98 0.72 -4.29 19 181 2009-01-15 18:05:59 2007-08-16 10:01:07 4 6 164 0 138 181 0 61.80 39 12.76 CHANGED tplR.Esl+lpGVDshST-DlKsahstY............................hs.p.ss+IEWIDDoSsNllatopcsutpALhsl ...................plp.-plalpG..V..Dc....hoTpDlhsYhp-a.................................s.Ps.+IEWIDDoSsNlVatsptsAtcALhs........... 0 41 69 108 +10142 PF10310 DUF2413 Protein of unknown function (DUF2413) Wood V, Coggill PC anon Pfam-B_20450 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 23.90 23.90 24.20 25.30 23.80 23.80 hmmbuild -o /dev/null HMM SEED 444 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.80 0.70 -5.67 21 142 2009-01-15 18:05:59 2007-08-16 10:03:45 4 2 132 0 109 146 0 405.10 31 92.26 CHANGED lL-hlDsLssspsssp...............stpspsss....sssps--DlLp.L--Ltppth.ppsp.....t................sp+pscsss........ssssssppppstscKsscospstpstshsssspsppp......tttsph.sshuShuuhWs..........WGshhusAp.h.psshcpAptsspcIpp...Ecsphhscplps.s.....stltsl.su...................cLt...hsTho....slhpslA.sl...socEhLpIHlsHDL..lsYsuL-.llhssFs+VMu.QVEGG....lhIthsp.sp......................t..tt..hp..ssss.+slshhpGhl.-GpKLs+ANhEshhccahpu+tsscpttcpupt..p......................-sssl+sSDIFluIQAlsh..........tustsp...t...s...h.hcsssssphsFslhL+D.spsIshpThSQuhPt+WhcWLDupt...............................tt.t-..-stslDPpEWVpEWlE-uLuLulGllAQcYVh+RMGl ....................ht.h-sLs.spsssp.................................tstsss...........ststs-pDlLs.L-p.Ltppp...p..st..........ts...........stppspsss....................ssstsp.p.ppt.s.tsp+sscssps.pttp...sspppsppt..........................tt.t.pshs.u....suuhW...............W.G.uh.huoAo.........tshcp..Apss...hpclpp.......Epspphscplpt.h.............s.lpsL.us..........................cLpt.s....hsThT....sllcslAPPI........uoHEhLpIHlsHDl..hsYsuL-sllassFs+VMu.QVEGGt...lslppspput.........................t..sts.hc..pssp.RslshhpGhs.-GpKLspAshEshspcahsscsshcptt.ppupp..s.......................csssl+pSDlFluIQsls.............ttssttp....t.ttsh....ht.pppspppls.FslhLpDshHsIsatThSQuhPt+WhcWL-uss...............................tt.tp..-ssslDP+EWVt..EWlE-sLsLulGllAQcYVs+RMGl....................... 0 30 62 93 +10143 PF10311 Ilm1 Increased loss of mitochondrial DNA protein 1 Wood V, Coggill PC anon Pfam-B_22448 (release 21.0) Family This is a family of proteins of approximately 200 residues that are conserved in fungi. Ilm1 is part of the peroxisome, a complex that is the sole site of beta-oxidation in Saccharomyces cerevisiae and known to be required for optimal growth in the presence of fatty acid. Ilm1 may participate in the control of the C16/C18 ratio since it interacts strongly with Mga2p, a transcription factor that controls expression of Ole1, the sole fatty acyl desaturase in S. cerevisiae responsible for conversion of the saturated fatty acids stearate (C18) and palmitate (C16) to oleate and palmitoleate, respectively [1]. 25.00 25.00 25.80 25.00 23.40 24.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.04 0.71 -4.74 22 113 2009-01-15 18:05:59 2007-08-16 13:16:10 4 2 109 0 85 103 0 171.60 30 86.20 CHANGED LSu+ollhu+shFLhsLAaahl+sPpslspsshVhlLGpuMpLsh..h.....hsp.pu.huhhulLhsh.AlsDLIsLht....sph....paapohsP.............................hRlhhaFhlssasYhspssh....................................................lpNplVFTYuFhEhhh.FhlassLR-ERpchhp+hs ..........lSopollhh+slFhhsLAhahhpsPpslsppshlhllupuMplPt..hp....hsp...sushuhhullhhhhulsDLlslhp....sph.....paapo.sP..........................................................................................lRhhlaFhlohhsYhhpssh...........................................................................l+NplVFsYsFhEhhh.FhlassLR-E+pp.hp+..h...................................... 0 14 41 70 +10144 PF10312 Cactin_mid Conserved mid region of cactin Wood V, Coggill PC anon Pfam-B_20647 (release 21.0) Family This is the conserved middle region of a family of proteins referred to as cactins. The region contains two of three predicted coiled-coil domains. Most members of this family have a CactinC_cactus Pfam:PF09732 domain at the C-terminal end. Upstream of Mid_cactin in Drosophila members are a serine-rich region, some non-typical RD motifs and three predicted bipartite nuclear localisation signals, none of which are well-conserved. Cactin associates with IkappaB-cactus as one of the intracellular members of the Rel (NF-kappaB) pathway which is conserved in invertebrates and vertebrates. In mammals, this pathway controls the activities of the immune and inflammatory response genes as well as viral genes, and is critical for cell growth and survival. In Drosophila, the Rel pathway functions in the innate cellular and humoral immune response, in muscle development, and in the establishment of dorsal-ventral polarity in the early embryo [1]. 22.50 22.50 24.00 29.20 19.90 21.20 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.34 0.71 -5.04 22 283 2009-01-15 18:05:59 2007-08-16 13:39:26 4 9 220 0 208 282 5 178.40 35 31.47 CHANGED p+tppstphccW.tcE-pFhLcQu+t+ucIRl+-sRuKPIDhLshtlchhp.................................pcsh-hph.h-P..slhpGL.shc-Lc-Ltp-Ichahp.LEpspt.ph-.aWpshhhlsccclpphcp.ptsts+uh..................ssVssDlpcllpsKohppLppLEtpIcpKlpSsps..lDhsYW.....EplL+pLpla..........KA+ApL+chap ...........................p.p-stphcpW.tpE-pFhL..cQAKh+ucIRl+-GRAKPIDhLuhhlph....................................p....................psshp.l-h...t-Ph..shhpGL.....ohp.....-hc-LhcDIcs.ahp...LEp.s...........p....Nh.............-.aWcshpslscDclp+hcp.htsps+uh...............................htsoVssDl..pplh..ps.KohppLpsL.ptpIctKlposps..........lDhsYW.....EpLLppLpsa...........hA+A+L+cha........................................................ 0 77 118 171 +10145 PF10313 DUF2415 Uncharacterised protein domain (DUF2415) Wood V, Coggill PC anon Pfam-B_25751 (release 21.0) Family This is a short, 30 residue domain, from a family of proteins conserved in fungi. The function is unknown. There is a characteristic DLL sequence motif. 20.10 20.10 20.10 20.30 20.00 19.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -7.96 0.72 -4.28 15 152 2012-10-05 17:30:43 2007-08-16 14:57:37 4 4 138 0 111 166 0 41.20 42 6.54 CHANGED GAFRssKFS.tsshp...DLLllSEHpGRVHllDLRp....shps+QVIs .GAlRshpFS..tsh........DLLsasEcpGRltlsDhRp....sFsp+Qll......... 0 29 64 97 +10147 PF10315 DUF2416 Protein of unknown function (DUF2416) Wood V, Coggill PC anon Pfam-B_28778 (release 21.0) Family This is a family of conserved proteins found in fungi. The function is not known. 26.70 26.70 26.80 27.40 26.50 26.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.41 0.72 -3.80 12 63 2009-01-15 18:05:59 2007-08-16 16:18:41 4 1 62 \N 44 55 0 118.50 32 77.50 CHANGED sthspoPhPuhhhuusL...lt+s.hssps.....................shhtPoptoshhFGuAphLGuahIYDGDlpNGuGFshAWSsLYLlVsG+sSlpulhhG+lhPLsLoslAluNAslYG++Fh .................................................................s.....sP.st..h.s.....h..ps.h.tpps...........................shhtPoptoshhFusspulGuYhlaDGDhpNGuGFssAWSsLYLlVsG..+pShpul.h....hG+..hhPLsLoshuhsNuhlYGpcFh.................................... 0 14 25 38 +10148 PF10316 7TM_GPCR_Srbc Srbc; Serpentine type 7TM GPCR chemoreceptor Srbc Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srbc is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -11.88 0.70 -5.14 62 285 2012-10-03 04:04:29 2007-08-16 17:46:55 4 8 7 0 280 476 0 218.40 23 87.25 CHANGED M.hhh.hhsshlullhuhhshhlNlalLhpIh.....ppKs-hhLFYaRFhlDlhauhslshahsahllh.........ph.schh...h+sllhalshsspslushRuhlslhIolERllAsahPIhY+saRpplPshhIhllhlshulh-phlLFsaCshsl.slPhsCsshtCulNpCahpYWthpcpllashhhhhSllLsh+La..lasp......tppsspplS+..As+lALlDuhhlhlFshlPshhsshh....hhshpshGPhsslhKhhGssIEulllh+lLh+cp .................................................................................h.h..hshh.s.hshhhshh..hl.h............hh........pp.ph...Lh.h.h...+h....h.h.Dhh..h.u...............h..........h...h...hhhh.hh.............................h....s.p.h.....p...h...hh..h....h...shsh......hh.t..h..Rs..h.......l.sh..hI.sh-Rhh................AsahPlhapp...h.+..p.h..h....ss....h.h..l...h.h.h..h..h.h.hshh......-thlh..a.h....h..Cshth.phs.....s.CsshtChhsp..C..ah.pY..a.hh...c.l.h.hh..hh.....hhhoh.hlh..h+Lh......hhpt................t.tt...tspp........hp+...............ANp...lulhDshhhhhFshlPshhhs.h........................h.hp.hGPh.sh.+.hGhhlEuhlh...h................................................................. 0 91 105 280 +10149 PF10317 7TM_GPCR_Srd Srd; Serpentine type 7TM GPCR chemoreceptor Srd Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srd is part of the larger Str superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.76 0.70 -5.43 59 467 2012-10-03 04:04:29 2007-08-16 17:47:12 4 12 7 0 462 1891 0 209.10 18 81.09 CHANGED hplaaslhhlhulhhphhLlaLIlh+SP+tlcshphlLhspshsphhtshhshhsQ...........hRhl..sspholshlshG.C+ah.ushhCahsaphhhpshhtushslllohhaRYhhLpphp...pppphhhhhhlhYhhslhh.............lhhhhs.sst.tlpthpphhaPsY...shs.h.s.......lsGhhshpshsshhshhhhslsshhlslhhhhhR+KllphLpp..s.phSpso+shp+pLlpuLThQshlPhhhal.shhhahhsphshhph.hhpahlhshsslssllsPllolYFlsPYRptlhc .............................................................................................................................................................................................................h....s....p.hhhhhhh...........p.s..ht........ht....h....hl.h...........s..h.......p.h.h.......h....s..h.h...............................R..l.............t.........h..h.h..h............G...s......p..h.......h........t................................h...s....h...............h...........................h................................................h.........s....h........h.......h.s.h..hh...+..h......h......h........................................t..........................h......h......h...........h......h..h....h...h................h....................................................hh..........h.h................................................h...........................................h.........p.h..........t.h..p......................................h..G..............................p....h...................t.............................................................h................h......h..........h....h....h..h..............h............h.........h......h...........h..h..h.....h.....p...t....t....h........h..t..h...l.........................h...s...t..ptt....tpph..........h...............h..............sLsh.Q...........s...h...l.......P..h..h.h.h.h..............hh..........a...h.......h.........................h........t....h.h....p...................h................p...hh.....h.................................h........s...hhsP...hh.hhh...l....sY+p.h................................................................................... 1 204 272 462 +10150 PF10318 7TM_GPCR_Srh Srh; Serpentine type 7TM GPCR chemoreceptor Srh Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srh is part of the Str superfamily of chemoreceptors [2]. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.97 0.70 -5.61 186 1040 2012-10-03 04:04:29 2007-08-16 17:49:53 4 23 6 0 1028 2479 1 244.00 20 84.56 CHANGED sSsphhshshHllshlslPlalaGhYsIlhKTPppMcsVKhsLlshHhasshhDlhlohlshPalhhPshuGaslGllphh.ulss.lQh....alhhshhs............hhssSllhlFENRa.h.lh.spt...hph+ph..Rhhahhhpahhs.hhhhhPshh.pl.....P.-Qppu+hhhhcph...P.C.....hspphhpt.sshallsh.-.t...hh.h..hshshhhhhhhhpllhFhhhhhhhlhpp.p....p.sh.SppThchQ+pFhhulhlQhslPhlllhlPhhhhhhshhhs..aas.QshsNls.hlhhuhHGlhSTlshlhlH+PYRchhhphhtt ...................................................................................................................................................................h....h..h...h.h....t.hPh....h...h.s....h..ah.l....h....h....p.o....s........hp.p.h+h.hh.........h.........p...h...h...s...h...h...h...-...............h....h.....h.......s...h...............h..h......h.........P..h................h..h..............h....P......h....h..u...h....h......s......h.....G..h....h.....t.............h......t.....h.......s...........h.......h................h...h...h...h...h....h...h..h................................h.h.s...hu....h.......h.....h....h.....F.....-....s..Rh....h......l.h..t..............h.h.phh........+h....h....h....h......h...........h...h...h.....s.....h....h......h.....h...h..s..h..h...h.......h........................s.......p...Q......p..........t....c................h..........h....p...h.h.......................P..s..............................s.............h...h...p............t...h...h.l....h..s....s.h.......................hh..h......hhh.h...h.h....h..h.h...h................h...h......p...h...h...h..h.......h.h...h......h...h.....h........h..h.t.t.................ph..S..t..pT..h....p.....h..Q.+..............p.h........hh.u..l...h...............hQ...h..............h......l............sh.h.....h.h.hh.Phh..h.........h.h....h....s........h....h...ht..................hhs....p.......h..............h...s....s........h.....h...h..........hhh....s...hH.Gh...h...u...o.....lhh.lhhppsYRphhhphh..h....................................................... 0 287 375 1028 +10151 PF10319 7TM_GPCR_Srj Srj; Serpentine type 7TM GPCR chemoreceptor Srj Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srj is part of the Str superfamily of chemoreceptors. The srj family is designated as the out-group based on its location in preliminary phylogenetic analyses of the entire superfamily [2]. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 24.10 24.10 24.10 24.10 23.90 24.00 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.95 0.70 -5.31 35 202 2012-10-03 04:04:29 2007-08-16 17:50:14 4 9 6 0 199 1601 0 225.70 31 86.97 CHANGED Ma.hsWhahalP+lFusLSFllNPlFlYLIhoEpps.phGNYRaLLlaFAlFNlhYSlssllVPlslaua+YsFhlalscGhFh...-poc..ls.ahlusRCuhlusSYAlLllHFlYRYLllassphhpchF.shhlhhShhhhlhahssWp.hlCahhhtus.Eh.RpYIRcsFpcpYssDShclshluuLYt-uSscshhRSWhuIhlhTslSshSlshallluhhIhpKLp..phssshSppTp+hQppLh+ALlVQTlIPIhlSF.PCllsWYuPlFslsLuphhNYhpllAlusFPFlDPlAIIhhLPsaRp+l ..............................................................................................................................................................................................h..ha.....hPh.h..h.h.h.u.h...hh....N....s....h.h....l.a.llhppp....p...hGpY+aLLh.......h..FuhFsh.h..h....Sh..hp......h.l....h.....P............h..................s..............h....h......s..........t.......s..h....................h..h...h......t.........G...................F.................t.....................h.t...h..h..h...s.........R................C.......s.h......lu.h...o.Y........u....l....L.........HF.la.....Ra...h.......s.............l..h........t..................s...p..h.h...t............t.....hh.............h..h..h.........h....h.............hh.......hh...h...h..a.h..h......hW..h...............h..s......h..h....h......h...s................s.........E.....h..+...........p....Y................l................p.................t............s..F...c........a.....s.......s...s......p.......h................s...h....l......h....s...h........a...............................t..........s......o.............t.........h..........h...........p.............o...........h.......h.....u..............h......h.....h....h.....o........h.....h.....u......................h.................s.........h..........h......h......h...h..................h.............u....h......h..............I...h...t....p.l.............t.......t........t..h.....S..t.p...o....p..hp...h...pLh...puL.................hl................Q...........o...............h..........I...............Phh....h...S.a...P...s....h...h.....s....a....a.........s.............h.......h....t....l..p..h..............................h...h...h...........t...s....h.sl...uhF.s.h......h.DPlA..lhhhlPshRpt....................................................................................... 0 50 68 199 +10152 PF10320 7TM_GPCR_Srsx Srsx; Serpentine type 7TM GPCR chemoreceptor Srsx Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srsx is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.69 0.70 -5.23 23 428 2012-10-03 04:04:29 2007-08-16 17:50:27 4 13 56 0 362 23544 3 174.00 18 62.30 CHANGED hllGlhGNhlhIhlhh+cKpL+S+sshL.slpClucllplsGplhhsh.hhh.thphspspCFhhlshhlauhshQuslhLhlslDhLIhVpFPhhY+plpppt..YlhhthhhPllaSshlhhhGalttss-..hllhCssPhALsspuh...phashSslhlslhllllYhhhhhl.....h+tpspp..............pssshp+lhKSLploVslalhuWhhsplsstlhlthststphtthlphasuhhlhluhopsaaVhhhpSsEYRpsa+pha ...................................................................................................................................................................h..hu.l.h.G..N...hh.h.l.h.h..h........h...p.....p.....................p..L...+.......p.............h...................h....h.............l.s...h.h....s......h.h.c....h....h.....h..............h.......s........p........h.....................h.......h.........h.....h............h..........t....................h.................t.............h...............t..................t........t..........C...........h......h................h............h...........h.........h............h........h.........h..........h....h............p.......u......s.......l......h...l.......h.......h...u...h..D..p.........h..l......u....l.....h........P....h...............p.........Y....t.......t......h.p............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 107 136 265 +10153 PF10321 7TM_GPCR_Srt Srt; Serpentine type 7TM GPCR chemoreceptor Srt Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srt is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.30 20.30 20.30 20.70 20.20 20.20 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.02 0.70 -5.67 39 386 2012-10-03 04:04:29 2007-08-16 17:51:09 4 7 7 0 383 347 0 231.10 25 87.42 CHANGED p.LhYs.hpuhsL....YsCs.....tp.hsps.GhpRPhhGhhhlshGlll......hLYlPshhslh+pc.h.phssYKlMlhLulhDhhulhlsSlhTGhLshhGssFCsYPphIalsGuluhusWhuuChssllLslsRss-ls.phhhphlFctphhahllhh.llYuhYhhhFTpPllFsopahuWFFDPhl..G+ssc.....hYhNhscshNNhllsssTshLYhhhshhlhtKht.s...sotthpKsppQIhhQusllChFpssuAhIYshMpFhtss.hllllGplsWphupGsssllYLThN+TIRpull+hlh..+sh+hpp ............................................................................................................................hhGh..hh.Ghhh................hh.Yh.hhh.h.h.h.p.p..p...h.....s..sa.........pl.M.hhL...................uh...........hDhh.s.hhh....s......s.......l...h..sGh.hh.h.......G...h....s....aCph.P.......h..h.h.hh.................G...hs...h............s...................W.hhsshhs..hhLhhp....Rh.h.pl..................hh.h.......t....t...hhh....h.............h.hhh..h...hYh....h.....hh...h.......h.....h......s..........s........h....h...a..s....s...th.....sa.......hh....s..Phh..........t....t...p.......................Y...sh.p....hh.N.N..h....hh...shhh.hhh..Yhhhh......hhl.h.h..p.............tt.....p...................h................p..h...............p.................l........hh.Qsh..hl.....Chh.ph........h..s.u...h.l..Y...sh..h.p.h.h.....s...s......h..hl.h.h.u..p..h..hWph..........spu.ssshlYlhhNpsl.Rpthhphhh........h............................................................... 0 132 152 383 +10154 PF10322 7TM_GPCR_Sru Sru; Serpentine type 7TM GPCR chemoreceptor Sru Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Sru is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 24.70 24.70 25.00 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.16 0.70 -5.32 38 299 2012-10-03 04:04:29 2007-08-16 17:51:26 4 5 5 0 293 318 0 226.40 25 90.73 CHANGED oIaGsppYhsapasahs..hs.sllAhlPhlYllPThhllh+Ihhhahpshhpps.p..lNhplFhlIhlsQlhshhFFlsDalhlRLPsTGlhTSWCAs.pPNcaLpllhhhshYh.sYsshlaPhLlsllRLlllhhPppa.pclsp+Ih.chulPhlalaPhhhohhhlPAlGhC+QlthPa.FGulhlhapsshauhpN..shh.hlhsohhahhhsllhNhlLahKLcph.p....phsspppsptspKAElSLTlToluMllsalsNshhslsalh.....shhsYhlhlRPFGNDh-ssllPWlFYLTHPlF+++pp ......................................l.....a.paphp.......hs..hhhhhshhh...shhhhhp.hh.hhh.....t...t.......hp..lF..........h.h.............................h.........shh...........h..hh.-ahhhR.l.....P..o...u........h....h.Ts..aC.....u.....p.........ts..p......h.....l.....hhh.....hhh.hh...h...tY.s....hhs...hl...hshhRlhhhh...............stp....................p..............h...........th........lh..hh..hs.......hhhhhs.hh.hsh.hh..s.uhChph..Pa..aG.ul.hlh........................t.......h................h.s................h.h.............shh..hh..hhhh..hhhhhhshhhhhKltphp....................t...pp..p.+uEho....lohThh....hlhsh..l.hs.....hhhh.....h..hh...................shh...a..h....h..hhRshh.DhphhhhshhFYhTHPhF+pp..h.............................................................. 0 41 58 293 +10155 PF10323 7TM_GPCR_Srv Srv; Serpentine type 7TM GPCR chemoreceptor Srv Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srv is a member of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.64 0.70 -5.28 25 258 2012-10-03 04:04:29 2007-08-16 17:51:59 4 9 8 0 256 5739 1 205.70 17 78.89 CHANGED aashsllolPlYlhllhsLlchRptshs.apTsFYplLlp+sIADlhshlsahhstsh.phshhcsFYaphQpaY.AuhhashhYhslhIRssGlshLohpRahsIshPp.phsphhpssphapllhlaWlsshllull...sLtssshtYcshcs..hshls-cshlppsohhAhlhlslsClhhllsYshLahhlR................ppops........ho+uhpREh+LshpVhlllhA.hshhsaahhpshhu.....pstsss..lFYhRhlYPlssGllSaINPaslLlhN+-lp+plhpplps .....................................................................................................h.....h...........h....................hh...............................a.a....l...h...h..t.hhDl.h.............h.....h..........h..h...............................h..................h..........h......h...h.................t............h.....h.......s...............t.....h...........h.............h....h...h.......h.h...h..p.h.....h.....s....h...........h.....l.....s...h...p.....Rh..s.....lh..h.......P...........................h.....p............................h.h....p.t...........................h.....h.....h.....h.....h....h........a......h.............s....h......h......h....s....h................h.h..h.......s........p...............a.......tt.................h..h....h.....h............s.....................h........h...............t..................h.........................h.......h.......s........h........h......h....................h.........s.......h........h..........................h...............h.....h....h.h....h....h...h.p..........................................................................t..............p..t........t.p..E..h....p...L....h...h..........s.........h.....h....h.h............h.......h.........h......h........h......h......h.....h...h............h................................................t.......h...h.....h......h...........h....a....s....h......h.......s....h......h........s....h....h....s....shh....lh..hhsp.htt.hh.....h...................................................................................................................................................... 0 83 107 256 +10156 PF10324 7TM_GPCR_Srw Srw; Serpentine type 7TM GPCR chemoreceptor Srw Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srw is a solo family amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. The genes encoding Srw do not appear to be under as strong an adaptive evolutionary pressure as those of Srz [3]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.16 0.70 -5.31 90 944 2012-10-03 04:04:29 2007-08-16 17:52:27 4 31 52 0 866 4349 0 230.20 17 73.85 CHANGED thph...hluhhulllNlhHlhlLTRKuMRssSlNllMlGIulsDlhshhhslhphh..hhhth............cC.h.....sss.oYhhhh..hphhhtslpchs+RsSsWLulhhAllRhLll+.Phss+hppLupsphuhhllhhlhhhshshohhha.hphp.lh............p.t.hh...........C........t..h...t...sthtYhhhhschh...pthhhp.hhhhhsulh.cllPsllhPIlTlhLlh-L+Ksp+p+pphp....ppt............cscpoT+LVlhhTIsFhluEhPhGl.shhl.............phh...hhpss......sl....hhlhpphshhhshlhslNussHhlIChhMSSQYRpTs+plhttp ..........................................................................................................................................................................hh.hhshhhN.h.h.p.h.h.l..L...p.....t..h.t..t..s.h......hhhsl.shsDh.h.hh..h......h........................................................h.........h.h...h.....h.....h.................h....hhpth...u.....hals.lhh......Ah..h...Rhh....l...t...........p..........h........h..t......s.h.hh....h..h..hhh..h..h..........h.....h.........h.....h...ht...h............................................................................s..................................................a............................................................................h....h.....h...h.s..h..............h.................p...............h..........l..s....s...h...h..h...hhs.h..h.....L..h.....h.....t.........l.....hph......p........p......t...p..tth.....................t................t...................ptppoo.....h.h.l...h.h...h..hh...ahl....s.ph..P....uh..h.hh.................thh.......................................................h...h.........h..........h...h.........h.h..h.hss......p.hhlhh.hSppa.Rp.hh.hh...h..................................................................................................... 0 227 294 847 +10157 PF10325 7TM_GPCR_Srz Srz; Serpentine type 7TM GPCR chemoreceptor Srz Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srz is a solo families amongst the superfamilies of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. The genes encoding Srz appear to be under strong adaptive evolutionary pressure [3]. 21.30 21.30 21.30 21.50 21.10 21.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.98 0.70 -5.27 62 372 2012-10-03 04:04:29 2007-08-16 17:52:46 4 15 4 0 367 410 1 207.80 21 75.85 CHANGED hhhhhhhhhhlhhhllhPFYlY...VhKlNRc+D+phhlaP.lssHFYch...l+hsYhlhhhhlhh..hhhhhhhtt...................hhhhlhhhhhhhhhhh....LhllsplFalLlhLLAlp+FllYFF.PptE+hlhhspphh.+hlhhlYlhhllK.-lhhhhhhhhph.t..................phhthhahhh........................hhhhs....lllhlSulLYIPIhISlR.Khup.Lt.SsQpspPppYIaaQolhVhlhKhlhl.hhlhhhh...hsthhhh..........hhhhhDhlooPLIIQlSYLsCN+ ................................................................h................h.hahh...h...s..pcp..t.............................ha...lhp.hhath...hhh.hhhh.hhhhh...hhhh.hhh...................................h.hhh..h..hh....h.hhh...hhh........hhh....hs..psh..ph.l.l.llAl...p.....+hhlaF...h..P.p..cphh.h.pp....h...h.h.lh..hhYhhhhhh.p..hhhhh...h..h.h.h..........................................h..h.h....hh................................hh.hhp..hhhhhoshlYlP....l..h..h..pl.p.+.ht.....h..s.t.....sps.ppalhhQhhhl......h..hhKhh....h.hh..hh................h.....................hh.hhDhhh..hPhllQhoYlhCN.......................................................... 0 76 76 367 +10158 PF10326 7TM_GPCR_Str Str; Serpentine type 7TM GPCR chemoreceptor Str Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Str is a member of the Str superfamily of chemoreceptors. Almost a quarter (22.5%) of str and srj family genes and pseudogenes in C. elegans appear to have been newly formed by gene duplications since the species split [2]. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [3]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -11.93 0.70 -5.30 178 1469 2012-10-03 04:04:29 2007-08-16 17:54:00 4 34 10 0 1456 2149 6 223.40 21 82.72 CHANGED pphhphhuhhhuhhhNhlLlaLllt+ut+phGsYKaLMlhFolhulhaohl-hlspP...............hhHsass..ualhh.h..p.hth...spthtthhl...............s....las...uhauhsluhlulpFlYRYhulppsp.t..l.chFpGhtlhhWhhhslhhGhhWshshahhhtssp......sscal................+.pph..hcpYs.lshsclsahuhhaa............spsG..p....lphpshhshhhhshlhshsahlllahGh+hahph.pcltph....Spps+cLQpQLFpuLllQollPhlhhalPsshlhhh....Phhs....l...ph.shhushlshslulYPslDslshhhllpcYRpslh .............................................................................................................................................................................................h........h..s.......s.h.hh...hh..h..h...........h....u......Y.+....h....hh....h...h....sh....h......t......h.........h......a......s......h..h......p..h..h..h...P......................................................h.h..............t....hhhh........t......................h........h..h..........................................hh.s.....s.....a.s.........h.....h..h...........s...h..p....Fh......a.......R.a...h.sl...............t.......................................h...t.h.h..p.................h..h..h..h.................h.h......h....s........h.h...h..h..........a.....h....h.............h....h..h..h..h.h..........s...........p..t....hh........................................p.....h......t.....h...t.....h...p.........p...p....h......h.h.s........hha......................t.p.s...................h..p.h...........s.h..h.h..h.h...............h.................h.h...h...............................h...............p................h......h...h....h.....h.hu....h....t...........h.a...h.th....pph.t...........S...pplp.p..Q..................ha...h.s.....Ll..............h..............Q..............s.h..h............Ph.h....h...hahPh...h..h.h.h.h.h....P.h.h.t....................h..ph....t...h....s.t.h.h.....h.......h.h.s.ha.Ps.h.Dsl.hhhhl.ppaRph..h............................................................. 0 456 563 1456 +10159 PF10327 7TM_GPCR_Sri Serpentine_Sri; Serpentine type 7TM GPCR chemoreceptor Sri Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Sri is part of the Str superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.80 0.70 -5.46 54 363 2012-10-03 04:04:29 2007-08-16 17:54:30 4 17 7 0 359 1625 0 222.70 22 82.08 CHANGED hslDFssPhaLlhaaalIGslSlllNhhsIYLllFcSsKlcsF+YaLLhFQlsshloDlplohLhQPhsLaPlhuGashG..hhsph...htlooHhthshhshlhshQltuLhhCFlpKHQuIuplh..ppp..hsp....hhhhhhhhhshhaPhhsshshhhsslsc-pphcalcpsaPphhspFtsLssFslYphs...ahhhhhlhshhsshhshslhshhshchhphLppl+pplSspsYp+H+sAlpSLlsQhhsoslhh.lPshhhhhllhhthstuQ.....hIsphhhslhssHSslNslVllhosPsYR+hl ......................................................................................................................................................................h.h..h.h.....t..................t.....a.p.............h.h.h.......h.........h...Q.....h.........s.......h.h...s-....h.......h.s.....h.....l.........hp..s...h.............l.....a........P.........h...h.......u...G...a...s..h....G...hh.s.ph............................h.............s....a....h........h........s................h...h................h.......h......h...h...h...........h...p.............h.........t....s..............l.hh..C......F........h....h....K.Hp..s..l..s...t...h...............p.h.h...h...h...s...p.................h..h...h.h...h..h....h....h.....h.....h....h....h..h....s......h...h....h....h....h...h....h..............................t........h.............s....p...p.............p...p...h....t..h...................l......p................p......p............a.....P..............p........h....h............................h.......p.........h........t....h...........l........a......s...............h.h...h...h...h.......h...h.........h...h....h...h...h....h..h...h...h.....h...h.h..h...h..h.h.h...p..h...h............h...h.....t.........h...p.........p....h.....S......t...s..h....ph...a.pt.s.l........h..............o.....Lh..h..................Q................h....h.................h..s.....hhh..........hP...h..hh..h...h....h..h..hh.......h...........p.....................h.s.......h.h..h.hhs..p.u..h.s.h.hhh..s.aRth................................................................................................................. 0 104 134 359 +10160 PF10328 7TM_GPCR_Srx Serpentine_Srx; Serpentine type 7TM GPCR chemoreceptor Srx Thomas JH, Robertson H, Bateman A anon Thomas JH, Robertson H Family Chemoreception is mediated in Caenorhabditis elegans by members of the seven-transmembrane G-protein-coupled receptor class (7TM GPCRs) of proteins which are of the serpentine type [1]. Srx is part of the Srg superfamily of chemoreceptors. Chemoperception is one of the central senses of soil nematodes like C. elegans which are otherwise 'blind' and 'deaf' [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.91 0.70 -5.29 80 586 2012-10-03 04:04:29 2007-08-16 17:54:54 4 18 10 0 579 7241 0 219.70 18 82.16 CHANGED hhluhhGhlhNhhlhhthh+.h.shpsSFshlstspuluNslhsh.sFLha.hsPhslhshph..h.p..pohhsthhshhsYth.ushsplhlolNRFhAlahPhhYpplashphTph.......hlhhhahh.........uhhhhhlhhhhhsCthhas.phhsat..spt..hC.shh.sh.h.....hhhhhslshhsshlsllThhKlhthppph...thspt..pspp+pp+..phphhhQoshQshlahl-hlshahl.sph............hss.phhpFlssohsWhhlHshDGhlhlhFN..pclpphhhpphpt .......................................................................................shhG.hh.h...N...h...h....h....h......h......h.....h.....p.......................p...s...u.....F..........h..l......s..h........p.sh.s.s....h....h....h............h.....h......h....h......h......h......h.h.....P..h...........h......h...........t........................h..........................s......h.......h.......t.........h.....h...h.......h....h...h.......a.................h.....s...................h........p....h....h....h...uhNRh.hslh.h.s.....h....h....h..p......p...h..h...s.h.............T...h...h................h.h.h.h..h..h..hh....................................sh........h...........h...h...h............h.........h....................................t..............C......h.....h....h........a....s.........p......h......h..........a..........................t.....................C.......s.......h............h.........h.................h..h..h......h..h.....h...........h...h........h.............h......h..h......s.....h...h...s...h....h................+....l.h...h.......phth..........................ttt......t.t..p...p...pp......p+..............ph...t...h.h..........h...Q.............s.....h....h........Q.s..h........h..h........h....h......p...h...h.........h.h..h..h......h......................pp....h...h......F.....h.....h.s..s...h.....h.....hh...p..s..h..-.G....h.lh.hh.p...ph.........t............................................................................................................................................................... 0 177 228 579 +10161 PF10329 DUF2417 Region of unknown function (DUF2417) Wood V, Coggill PC anon Pfam-B_22799 (release 21.0) Family This is a region of a family of proteins conserved in fungi some of whose members also have the Abhydrolase_1, Pfam:PF00561, domain in their sequence. The function of this region is not known. 25.00 25.00 32.30 32.10 24.10 24.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.62 0.70 -4.95 21 122 2009-12-10 16:57:14 2007-08-20 13:09:06 4 4 112 0 92 118 0 228.20 36 43.81 CHANGED sE+p.LLs.................................ptsalsPDDPt....VSPaNLap.....lRhlRslsslhlhlohlhahhhLlS.Fl....u.PuhpsRGsuFhshshshlslhh.llsLhF..FulPSphtplhuhllusLLllDhlllluVstlRhc.GWlGluollWAslhulhssltDhhV.............................thGKpc-EERL.....TGR.......EoR+oLpEWlslhlcolhhlllhllshLhTlTLhLpAhDus..................hts.GphahVDssp.apVHLsCh ..........................................E+p.LLspp.............................................tspsaLsPDDPs.......VSPYNLap.....lRhhRhlsslhlhlshlWalhlLlS.Fh....o.Puh..psRGuuFhshshshlolhs.hhsLlF....FulPSp..plhshsluslLhlshhlhluV.phRhcEGaVGlsoslWAslhulaslhtshhV........................................thGKpcEEcRL...............TGR............EoR+oLpEWhslhlps.lhhhlhhllshLhohTllL+uhDup..................hss.GphaaVDss+.YplHltCh............................... 0 18 44 76 +10162 PF10330 Stb3 Putative Sin3 binding protein Wood V, Coggill PC anon Pfam-B_24989 (release 21.0) Family This is a family of the conserved N-terminal end of a group of proteins conserved in fungi. It is likely to be a Sin3 binding protein. Sin3p does not bind DNA directly even though the yeast SIN3 gene functions as a transcriptional repressor. Sin3p is part of a large multiprotein complex [2]. Stb3 appears to bind directly to ribosomal RNA Processing Elements (RRPE) although there are no obvious domains which would accord with this, implying that Stb3 may be a novel RNA-binding protein [1]. 25.00 25.00 25.60 42.90 22.60 17.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.10 0.72 -4.30 17 120 2009-01-15 18:05:59 2007-08-20 13:31:59 4 1 112 0 92 118 0 95.00 50 22.14 CHANGED lTPphLup.....lLLp+GPLAIRHlhsaLopplPuFuclssuKtRRLlhuALE.......uGs.cssVlFEKlGWGpWs.....A+chspPtphspphptos.u .........ITPuhLAph+Ls-ILLp+GPLAIRHIhuaLTssVPGFutlssuKtRRLlluALE...........uGsh-ssVlFEKVGWGpWs.....A++tspssphspt......s.................. 0 13 44 77 +10164 PF10332 DUF2418 Protein of unknown function (DUF2418) Wood V, Coggill PC anon Pfam-B_29723 (release 21.0) Family This is a conserved 100 residue central region of a family of proteins found in fungi. It carries a characteristic EYD sequence motif. The function is not known. 19.00 19.00 19.50 19.20 18.80 17.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.25 0.72 -3.70 19 132 2009-01-15 18:05:59 2007-08-20 15:52:43 4 2 128 0 103 132 0 101.10 36 22.07 CHANGED pcclapLpVW-Ps.aslplFshFSPsplllhhhh..........sssshpshhhhllhoh.hhhlhp.pFpphlcDcpllppEshpEYspKaVcP+hpshppDsslss ...............p.c-VWpLslWDPtsh..slpLFshFSPsHlllhhhhhs.......................Ssshhpslhlt.sllohph...hhLhs.pFsQphKDptllppEVhpEY-sKaV+PRhpshh+DVusp.h............ 1 28 58 89 +10165 PF10333 Pga1 GPI-Mannosyltransferase II co-activator Wood V, Coggill PC anon Pfam-B_50403 (release 21.0) Family Pga1 is found only in yeasts and not in mammals. It localises in the ER as a glycosylated integral membrane protein. It binds to the GPI-mannosyltransferase II subunit of the GPI and it is responsible for the second mannose addition to GPI precursors. The GPI-anchoring complex is a glycolipid that functions as a membrane anchor for many cell-surface proteins [1]. 20.00 20.00 20.30 20.30 19.90 19.50 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.06 0.71 -4.63 5 47 2009-01-15 18:05:59 2007-08-20 17:40:11 4 1 43 0 29 37 0 180.10 31 85.69 CHANGED lsllaLCslVlANTETapl+VP+-FPsposttpspuoY...sshlshIsL+NlshspIs....oTclssssssYV.ELscLp+sETYQVKICWTAIDPISIsDlsallIPHuTtFpGTlS-cA......RlhVpFcVpuDSYPlLsDsphVPVsVSllslKLGIPVDLYslLlYlllVl..uISlllshcPapLLc .............h..lhhhls.hlhANTEoh......Lh+VPp..sF..s......lp...ps.....tssssht.................h.sshls...h.o..lssps..h.ophh........sssh.....ph.c.s..Tpal.cLss.....L.....p.....ps-s.Yp......IKlCWsAhcPhshpslpphhhs+.sthttohsD.h............clh..lpa..p....hh..u-..SY........s..............ppp.shlslpls...l.ss...hL.....s...IPl-lYshllhhl.ls...hhhlh................................ 1 3 13 25 +10166 PF10334 DUF2421 Protein of unknown function (DUF2421) Wood V, Coggill PC anon Pfam-B_39020 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.17 0.70 -4.74 21 168 2012-10-02 19:04:43 2007-08-21 09:54:30 4 10 89 0 134 204 0 205.10 18 19.91 CHANGED PR.PsSu++tlcpsLupslcsluslastlhsahsp............sschclhuc............h.thhlpltt+LsuLpthlshLpaEhohpGhWPpppYtclpsLhp-lspLhspLhhlhspL..Pttah.cLhcpsGhhcpphhu-lhAVltMsppAL+oGssLPclhPssLsh+ph-hhcpphtstc...............................t.lshchlcspshppasVulshhhplhs+l...D-llllVKullGEta ................................................................................P.P.oupphlRpslupslhtlushYs.hlh.sthtt.............................................t.t..h...p...................................h..phh..h..p..h..ttpL.t.slpshlthhpaE.sl.tGpFP..p..p..p..Ypplhphhp.pllphl..stl.h..h...sh...p....pl...............s.............p........p....h...p..pt.l.hph.hth.........h.cp.........phh.up.lh.slhthlusulcstpsLPthhs...............h......p..h.h...h..t...........................................................h.p..thh.p.th...................hh..th...cphh..hp.hhG...................................................................................................................................... 0 47 81 117 +10167 PF10335 DUF294_C Putative nucleotidyltransferase substrate binding domain Bateman A anon Bateman A Domain This domain is found associated with presumed nucleotidyltransferase domains and seems to be distantly related to other helical substrate binding domains. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.37 0.71 -4.55 86 1001 2012-10-01 22:14:54 2007-08-21 11:02:28 4 14 661 0 348 952 154 140.10 29 24.93 CHANGED shaLtt.hApsulphpsP.LGhapplhsppps..ttttlDLK+tGlhPlVcssRlhALpp.................u.lpsssTh-RlctLtptGhlstchspsltcAachlhplRLppQlpph.psGp.....ssshlssspLsphERptL+-uh+hlcchQphlphca ...................................................................t.halsthucsAlphpsP..LG.hFpshhhc+..sG...ttttlDlK..ct..GlhPllchsRlhALtt.................G.hp.s...ss.Th-Rlcslh..c.t.s......lLspppupsLp-AachlhplRlcpQhtph..ppspt..........ssN.tlssc.pLsphE.Rc.tL+-uhpllpphQphlth+a.................................... 1 100 229 306 +10168 PF10336 DUF2420 Protein of unknown function (DUF2420) Wood V, Coggill PC anon Pfam-B_32350 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 21.00 21.00 22.00 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.23 0.71 -4.49 13 112 2009-01-15 18:05:59 2007-08-21 11:16:24 4 1 110 0 84 116 0 106.70 27 16.46 CHANGED shlhcssulhctols-hauslRp.h-pt.GphhssscELlLshspL-.LslsEDNlYsccIohsDllslFchLpppohpptc.slPcsLshpLo.spPRFlo+YNsLs-hscsstu ....................h..hcstulhppslsclhtuhRphlt...sphh.sssc....ELllplcsLs.LplsE..Ds...s..asp..ploLppllplaptLpppst.st....PpsLhlpLo.opPpFhscastLsphspps..s.................. 0 20 44 70 +10169 PF10337 DUF2422 Protein of unknown function (DUF2422) Wood V, Coggill PC anon Pfam-B_42729 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. This family is the C-terminal half of some member proteins which contain the DUF2421 Pfam:PF10334 domain at their N-terminus. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 459 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.39 0.70 -5.70 27 237 2012-10-02 19:04:43 2007-08-21 13:24:18 4 14 109 0 197 260 1 360.00 14 36.09 CHANGED hhhssWhh.pL.Dhtoh+lhhRshhsshhshhlhhhssshphhGsuuYLhslluhls.s.thshshhlhhhlhhhlulsluaAhu.llshhhuscsRtp..........................ssssshsssspsthhpu.hhsupsouVhslhLhht..lahhshhRs+....aphshIhs.IhsslhhsaushhPsh.tshslsttLlcPhulululshssulllFPhooshhhhpph.tuhlshL+sslphppsahpohcsss..h.................................pplcpshtplpshhstlcsshshhhhEhuhG+hsssDlpslhphlRplhhsssGLshhhphhpch........................................................sclspapp.pshphhc.......s.tc............phshpphsclh..ptlcc.hsshlcsscpuLppl....sphLphssph+...t.hh...hppptptp.phttph......tshtpphcpphpsFtsscphtLhthtp ..................................h.............s...hhhhh.........+s.ls.hlshhhh..s.shph...h.s.p.hsahhslhshhs.s.hh.hhhhl.hhhh.hhuhhhuhuhs.hlshhhu.tsh.t......................................................hpst..slhhlhhhh...hahhshhRsh.....h.hshl.hh....lhhs..l.hphushhsh....shph.sp.hhhshhhuhuluhssslhlaP...osp.thhhpth...hthlt..lpthl.ph.pht..hh.ts.t.....pt............................................................................ptl.ptth.ttltshhsthpsshthh.h-huhup..hsspDlptlh.p.h.h+plhh.h.uh..hhphh..h..............................................................................................................................................................................t.ht......hp...t......................................t.h.p.hh....htp..t.h.t.....ht.slthh.......ht.....t..........................................................................t............................................................................................................................... 1 55 107 167 +10170 PF10338 DUF2423 Protein of unknown function (DUF2423) Wood V, Coggill PC anon Pfam-B_46946 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 27.60 27.60 27.60 27.60 27.50 27.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.05 0.72 -3.98 20 136 2009-01-15 18:05:59 2007-08-21 13:32:42 4 4 128 0 108 129 0 44.80 43 29.28 CHANGED MAKSLRSpo+hps+ohKR...csVFthss-ARspRlusKLccphtppc MAKSlRups++ps+uhhR...psVFusstcARscRLSsKLp-hhtp.p...... 0 33 61 95 +10171 PF10339 Vel1p Yeast-specific zinc responsive Wood V, Coggill PC anon Pfam-B_50673 (release 21.0) Family This is a small family of proteins from Saccharomyces and related species. The function is not known but member proteins are highly induced in zinc-depleted conditions [1,2] and have increased expression in NAP1-deletion mutants [1]. The S. cerevisiae genes are named VEL by association with Velum formation in the wine making process http://www.ajevonline.org/content/48/1/55.abstract 23.00 23.00 289.00 288.80 19.70 18.50 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.64 0.71 -11.48 0.71 -5.16 4 33 2009-01-15 18:05:59 2007-08-21 13:58:00 4 1 18 0 12 21 0 189.20 81 98.25 CHANGED llshF.shIsl..ssAlRFDLTNlTCptL+GPHCGTYLL+VsGpNuTaLGQphFVGhDALTpstsDhatRhLcpEsRhIPRLTTlAp.N-TsNFpPhhFTTshsTCNPQSIEsAhlPFlNTVTsEIpYDSWA.Tu.NAShITGLANQLhNuosYGVQVAoChPGFssslhsoPTVNlFNs--slPSWCpAIElcAVCPhDsGFs ......hhhh.hhls...ssAhRFDLTNlTCptL+GPHCGTYlMEVVGQNGTFLGQSTFlGADVLTESAGDAWARYLGQETRFLPKLTTIAS.N-TKNFSPLIFTTNI.TCNPQSIGDAMVPFANTVTGEIEYNSWADTADNASFITGLANQLFNSTpYGVQVASCYPsFASVILSTPTVNIFup--TLPDYCTAIQLKAVCPP-AGFs... 0 4 4 8 +10172 PF10340 DUF2424 Protein of unknown function (DUF2424) Wood V, Coggill PC anon Pfam-B_51256 (release 21.0) Family This is a family of proteins conserved in yeasts. The function is not known. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.39 0.70 -5.82 5 116 2012-10-03 11:45:05 2007-08-21 14:05:56 4 3 60 0 80 2905 696 294.20 23 74.90 CHANGED LpFLs+llslLPItll.....hcSIsllshs+R+LslDhLoRlFhRpShhLhDctICpYVLNPla-.lu.sLaK.ph.s.o..chslPp-Dpps.psslFsc+hlNpshhpSphaWhtph.PcsFcPclD.PlLLYaHGGGaALpLsPsoLlFLsNltKhFP.cM....AILlsDYoVTAspscopcYPhQlLpslslY-YlspohGCKNVslMGDSAGGNhVLslLLY...L+KpNK.llPKKAIAISPWlNsThhsEpE+saM+ts-clDulChKuLshFG+hYlsN...-supshpo-sFlNIE+Na-h-sWscI..lcKCcLLITYGDDElLphQIKlalcKIo-hptt.pa.pscNVLl-c...QGsHIG......P.ILsassNlD+WoKhsSlscILsF .......................................................................................................................hh..............................................................................................................................................................................p............h......Wh..h.p............t......ps..p...s.D....P...l...l.lYhH....G....G.....G.Y....h....l........p.........h....h....s....s....p....l.....p......h....L....h.....s......h....h....p.....h..l....s.....ch.............................u.I......L..l.....l.......DY.......s...L.......s......s............................p.........u...t...h........a........P......t...Q...l....h....p....h....l.....s......s........Y........p.............p......L........l......................s.........p.......G.........s............p.........N........l.....h......L.....h...GD.SAG....G..NL...s..l.s...h...l...pa........................L..p......p.........p........p...................s...........h...........P..............+........p.........s...l........LI......S.PW..l...ph...s.................p..pt...t.....t..p.....hp...t..s...p...ph.D..h.l.....s...h.....p...t..h....p..........h...t..c...hahss..............t..t..t...h.........s..h.ss.h.....tt............p...p...Wpcl.......hp....p...t......s...shlhh.G-cEhh+-plhpas.phh.............................................................................h.h............................................................................................................................. 0 15 37 72 +10173 PF10341 TPP1 Est3; Shelterin complex subunit, TPP1/ACD Wood V, Finn RD anon Manual Family TPP1 is a component of the telomerase holoenzyme, involved in telomere replication. It has been demonstrated that TPP1 dimerises and binds to DNA and RNA. Furthermore, TPP1 stimulates the dissociation of RNA/DNA hetero-duplexes [1,2]. Yeast telomerase protein TPP1 (Est3 in yeast) is a novel type of GTPase [3]. The key residues in Swiss:Q03096 are an Asp at residue 86 and the Arg at residue 110. The Asp is totally conserved in the family, whereas the Arg is not so well conserved. The N-terminal of TPP1 is likely to be the binding surface for TINF2, whereas the C-terminus probably binds to POT1, thereby tethering POT1 to the shelterin complex [4]. The complex bound to telomeric DNA increases the activity and processivity of the human telomerase core enzyme, thus helping to maintain the length of the telomeres [5,6]. This domain is conserved from fungi to mammals, hence family Telomere_Pot1 has been merged into the family [7]. The human shelterin complex includes six proteins: telomere repeat binding factor 1 (TRF1), TRF2, repressor/activator protein 1 (RAP1), TRF1-interacting nuclear protein 2 (TIN2), TIN2-interacting protein 1 (TPP1) and protection of telomeres 1 (POT1) [8]. 21.90 21.90 21.90 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.51 0.72 -4.06 51 102 2012-09-26 10:57:07 2007-08-21 14:13:12 4 1 93 2 67 107 0 118.70 21 19.44 CHANGED sWIpphlhsthptp..................................hhtps.sspll+llca................sssssshtul............................lSDusapIhulFo.pculppac..pcpc....pchp.........tsspssllhlpchplpht...sppthssca....................................hLp.....l ....................................sWIppllhsshp....................................hhssstsspllcllph..................sssssshtsl............................lSDusapIpslho.cculpphc.....hcpc.....phhh.........psspspllhlpchplhhp...sptthssca.......................hL................................. 0 14 27 47 +10174 PF10342 GPI-anchored Drmip_Hesp; Drmip_MAPK; Ser-Thr-rich glycosyl-phosphatidyl-inositol-anchored membrane family Wood V, Coggill PC anon Pfam-B_42324 (release 21.0) Family Some members of this family appear to be serine- threonine-rich membrane-anchored proteins, anchored by glycosyl-phosphatidylinositol. In A. fumigatus these proteins play a role in fungal cell wall organisation. In Lentinula edodes this family is involved in fruiting body formation, and may have a more general role in signalling in other organisms as it interacts with MAPK. The family is also found in archaea and bacteria. 26.00 26.00 26.00 26.00 25.70 25.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.48 0.72 -3.51 101 635 2012-10-04 12:14:07 2007-08-21 14:16:40 4 18 172 0 476 646 9 93.20 21 35.69 CHANGED os.P.shs.pslssG...p..hTlsWsss......sss.......ssholhLhpGss...s.hssh..tsl..ussls.su.....uo.aoass....ss..s..lssss....s..Ytlplh.s.ssss.......hsYSspFsl ..................tP.stssshssG....ps.hslpWpsst.......sss..........ssh.sl.hLh...sGss........tp..hs.sh.....tsl...us...s..ls....ss............uo..as.hss..............ss...s....lssss......s...Y....tlplh..s..ssss.......hsaSspFsl.......................... 0 169 295 400 +10175 PF10343 DUF2419 Protein of unknown function (DUF2419) Wood V, Coggill PC anon Pfam-B_35257 (release 21.0) Family This is a family of conserved proteins found from plants to humans. The function is not known. A few members are annotated as being cobyrinic acid a,c-diamide synthetase but this could not be confirmed. 19.90 19.90 20.00 19.90 19.40 19.80 hmmbuild -o /dev/null HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.78 0.70 -5.15 21 302 2009-01-15 18:05:59 2007-08-21 14:49:58 4 5 236 0 233 297 138 246.10 34 73.73 CHANGED HELHPps...cD......cpTVsalFlhDhLNFuFWo-ps.........tpaslpY+G+taTGYaSLsAAlpRAL-cGIsITsPpFa...pchT.chL+c.lF+Ssot..cclPLLp........ERlcsL+EuG+VLhcca-GSahNhlpsu.spSAssLlpLls-sFPsFRDcssacG+........+..VphYKRAQILVADLWusFpGcu....h.GcFcDIDpITMFADYRlPQhLppLGsLtYospL.pcl+ppchIssGuphElElRusSIasVEhlRctlp+pcsc.t........................................................................lNAILIDaaLWDht+chptcht.........plPaHRT.......RSIaY ...............................................................................................................p........ts.......ttsspalFhhDhLNF.s..FWsptt..............h.ap.................p.hpGYhsLsAulp+Alc..p.shsl.hssphh......tpho.p.lpp.lhcs......s.t......h..Plhp......................ERhph.LpE...sGplL.......h.c.c.......a....tGs....hhshl.p.p......u......p...t....S...At......tLlpllsppFP.s......FRDtsh...a..c..G....+...............p...VthaKRAQILVADlWusa.p....G.p..u...................h..G...p..F.....p.........D.Is.p....l..T....MFAD.......YRlPQhLhphGsLpY.S.spL.ptl.....cp....t.p......l.sGsp.E..................l................ElR.u.......sSlhslEhlpct....l.........p..h..p.................................................................................................l.s.ul...hlDaaLasht+phtt..p........................lPaH+spsIaY.................................................................................................. 0 93 135 198 +10176 PF10344 Fmp27 DUF2425; Mitochondrial protein from FMP27 Wood V, Coggill P anon Pfam-B_54917 (release 21.0) Domain This family contains mitochondrial FMP27 proteins which in yeasts together with SEN1 are long genes that exist in a looped conformation, effectively bringing together their promoter and terminator regions. Pol-II is located at both ends of FMP27 when this gene is transcribed from a GAL1 promoter under induced and non-induced conditions [1]. The exact function of the Fmp27 protein is not certain. 25.00 25.00 25.30 25.30 24.90 24.80 hmmbuild -o /dev/null HMM SEED 881 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.38 0.70 -6.86 29 253 2009-01-15 18:05:59 2007-08-21 15:54:39 4 23 161 0 164 252 0 580.20 17 30.91 CHANGED hhshhllhhllphhsGlsIppl...shhSl+tlshss+s.hpIpIctlthpl.....hspsphlplhshc.lclph........................pcpppppstpppsspttp................p.hthhphph.pphh+tlhphl.calphlslhlhpsslsh.shsptthshlphsspppphhhsp.......................hph.h..lpslhhthps.....tp..pllspsslslpshlphsh.......sLcshslslpluclplPhs..shhphhthhcp.cpptspt....t...t................................psppthpplp............p.hchhpplhchlp-lplplcphpl.ch..s.tsp..sh..............lsh+plshshp+hsppsPua+hhFppc.Dhshphhhshhslplths.t........ptpsscllplPssohohcoshhp.h..........ts.spspthps......................................sllphshslooPslDlphcplshlhshhtshhhh+ph.shsptppp.............................................................h.hhpchhPphshKhslc-Phlhlphs..t.s.......hshlhtShSslshslposc...........ttptplhashhophpltshphhhps..spth+pslhphcshphch.hshhsphplphpsslss....hshchsphcl.hpulppllhplstphp.c..hthptp............................................c.plh+tLPsaltphphphoslshplus..............spslp..-htcGlshcLcshsspacthphsh.sshpthppps.Sps.s.hps................t.pt.ths-hpphs.p.p......................................sh.s.h.psscph.hcshhslsph-ltlsshscp......sshhplphplpplhsphshaphashhsulshlpphhht..tshppppppsp.hphtt........................................th.chlplchchpslplhhpLPs-sshhlplhs ....................................hhhhh.h.ph.stlpItph...thh.lptlshp.pp...p..lplctlhhp..........phh.lh..p...l........................................................p....................................................th....thhphhhp.h..ahp.lslhh.pssh....ht.......t.h.hhhspptp....................................lpph.h..............p...phlsph.shthphhh.hsh........shp.hshslplhphph.hp......h.......t...............................................................t.t.h.p.h..................hh....h.th.pphphthp.hth.........t.........................h.hp.lth.hp+.p.p.s.hp..Fp.p..shs......hhhp.sl.h..............ttp.pchh.ls.hphshpss..................................................................h.hphslsos.lchc.pphshhhthh.......pt....t..hp............................................................................................hhs.h.h............................hh..p.o.h.hshp.......................hhh...t.....t....h.......t....thh..p.hth.................ht................h.....p..h.......hp.hh.ph..............................................................hhp.hP.....h.ht.t.h...hs................t..p....p..pu....phtthshph.....t....................pt............................................................................................................h..h...p.h.h.s.st.........s...lp..h.th...hp...hhh.h.sh.hlpphh..........................................................................phhthsht.t.ht..h.hstt..hhhp................................................................................................................................ 0 21 55 113 +10177 PF10345 Cohesin_load Cohesin loading factor Wood V, Coggill P anon Wood, V Family Cohesin_load is a common cohesin loading factor protein that is conserved in fungi. It is associated with the cohesin complex and is required in G1 for cohesin binding to chromosomes but dispensable in G2 when cohesion has been established. It is referred to as both Ssl3, in pombe, and Scc4, in S.cerevisiae. It complexes with Mis4 [1]. 21.30 21.30 21.30 21.70 21.20 20.70 hmmbuild -o /dev/null HMM SEED 609 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -12.82 0.70 -6.31 21 268 2012-10-11 20:01:01 2007-08-21 16:06:20 4 10 212 0 192 312 1 467.70 23 69.21 CHANGED ptph.hLltlA-cahpsApshsspl....pptphppYapLlshulpCLpslhp............shpLssch-AplphcluplLhpETcNhspA-shLs..+uhhlspp..sshh-lKapsphLL...splhhcos.ps..Ahptlsctlp.hps..................pptahasachLphpLslpp...t-.stAhphLppltphss.psppthhlhhhhhpshlphhps.spDslphlpph.httpth.....t..ph.pLtshhhllclhttlppsp.tsspppLptlpphhpphps.........ass..ptchtl.lth.........................hhhhpWLspcplhhLsYhloulsthhcppss..+upKahpculpphcc.h.............sl..hppphthhpslphhhhhahhhpthhhsshtpsppphp.htshspt..........pthtshhhhLtGlhtQtp.......uchcpAhthah..........................................phstspssps-LtlhusLNlhhIhpt.spppptp........................hsplhsplcshsspssspphthstshh..ltshpth.shphp......pp+pphppsLpth....p...................thspsphhshsLsllstpha.......tssst..........Epsphstp...uhphAppp................................................pstLWh.lssshhtp.hc.tGptsctcpstpphpplpp ...................................................................p.....lhthAc.hhpts...ht.h....................lpCLpslh..............php..s..p.EuhspLphuplLhpcTp.....N...p.AcphLp......+uh.....lspp..........ph.-lKapht...LL...splhhpps.hs....Ah.hlcp..hlp..pt.........................s.hath+h...l...hpls..h.....p-hstAhp.Lthhsp..hup......ssthh...hshhhl.cu.hlh.h..h...p.t...t-.sh..hlp.s....sp.h.......p....ph.....pLp.shhh..hlpls.h.l.tt.phppsp.pLptlpp.hpph.p....................tp..h..t.................................................................hh...pahspcphhsLsahl....o.sht.h.t..........s..p......+up+ahpculh.lpp.h..........................................................h.pp...hhp.h.h.hh.hhhhhthshsp.hth.s.ptht.htp................................t.h.s.hhhh.uhh...sh.......sphptA.t.a............................................hh.p.s.p...p-lhhhhshNhh.lh.t.tp.tp.............................h.plhptlps...tp...s.......sp...h......hshhhh..htsh.t...h..................ptKp.lppsLphu......p.......................t.s.sphhshsLs.hhshhhh........ssht..........Ep.phss....uhthAp+.................................................tstLW...ssshh..tp..chtspt.ctpthht.h............................................................................................................................................... 0 54 99 157 +10178 PF10346 Con-6 Conidiation protein 6 Wood V, Coggill PC anon Pfam-B_35316 (release 21.0) Family Con-6 is the conserved N-terminal region of a family of small proteins found in fungi [1]. It is expressed at approximately 6 hours after the induction of development and is induced just prior to major constriction-chain growth [2]. 25.00 25.00 25.80 25.30 23.30 24.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.16 0.72 -7.45 0.72 -4.54 23 178 2009-01-15 18:05:59 2007-08-21 17:03:05 4 5 51 0 145 192 0 35.80 43 41.08 CHANGED c-ssNlhtGaKA.............sL+NPpV........SccuKc+AcchLcchss ..................ts.sNVhtGhKA.....................slpNP....sV........S-cAKp+A+chL-pht....... 0 78 101 134 +10179 PF10347 Fmp27_GFWDK RNA pol II promoter Fmp27 protein domain Wood V, Coggill PC anon Pfam-B_5282 (release 21.0) Domain Fmp27_GFWDK is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation [1]. It contains characteristic GFWDK sequence motifs. Some members are associated with domain Fmp27_SW (Pfam:PF10305) towards the N terminus. 20.30 20.30 20.50 20.30 20.00 20.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.01 0.71 -4.25 33 294 2009-01-15 18:05:59 2007-08-21 17:12:47 4 35 229 0 216 303 1 141.10 33 5.80 CHANGED DYPLPllphP....sps.ph.shphpGslVluEphhps.cphRplhVPlsPsst.ptt...sshaulpl.RTlosVKhasDhphclsospsoplsWGtSYQPAlpphMtshDpFoKPslDPSs+lGFWDKlRLlhHG+hphphpp..phclthKGS...........+DPYpl ...............................................DYPhshhpl..................shplhushlhuEph.ps.cuhRphhl.ls.............pshhsltVpR....oh....sPlKhYpDhphclpo...splsWG.suapPuhpphhhsh.................-............t..ho.....K..........P....s....h....D...........PS.................t...luaWDKhR..LhhHG+hphshcp....hplph+uo...........cDPYph................ 0 64 115 183 +10180 PF10348 DUF2427 Domain of unknown function (DUF2427) Wood V, Coggill PC anon Pfam-B_52268 (release 21.0) Domain This is the N-terminal region of a family of proteins conserved in fungi. Several members are annotated as being Ftp1 but this could not be confirmed. The function is not known. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.24 0.72 -4.46 33 258 2012-10-03 10:28:09 2007-08-22 10:51:40 4 7 134 0 196 274 0 100.80 27 18.68 CHANGED shsssoaassspstcusLahHIhlhsluashlaPlullLuhs+.S+aalssphlthslslsGhhhutl.apuppPp...hYssNsHsphuhlLhhhhssQhshulhhpht ...................t...s.s.hh..tp.hcuh..lhhHIhlhsluasllaPl............u.h.l....L.....uhs.+...S.......R.......a....al..P...s..QhlthslshhuhhhG....t....l.....apupp...........hh..ssN.....sHsphuhllhhhhhsQllhGlhhth.......... 0 51 107 162 +10181 PF10349 WWbp WW-domain ligand protein Wood V, Coggill PC anon Pfam-B_5077 (release 21.0) Domain The WWbp domain is characterised by several short PY and PT-like motifs of the PPPPY form. These appear to bind directly to the WW domains of WWP1 and WWP2 and other such diverse proteins as dystrophin and YAP (Yes-associated protein). This is the WW-domain binding protein WWbp via PY and PY_like motifs. The presence of a phosphotyrosine residue in the pWBP-1 peptide abolishes WW domain binding which suggests a potential regulatory role for tyrosine phosphorylation in modulating WW domain-ligand interactions. Given the likelihood that WWP1 and WWP2 function as E3 ubiquitin-protein ligases, it is possible that initial substrate-specific recognition occurs via WW domain-substrate protein interaction followed by ubiquitin transfer and subsequent proteolysis [1]. This domain lies just downstream of the GRAM (Pfam:PF02893) in many members. 21.80 21.80 22.40 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.89 0.71 -3.20 31 266 2009-01-15 18:05:59 2007-08-22 14:06:34 4 9 168 0 161 239 0 107.50 30 39.43 CHANGED PGGGa.Gp...spaKloFppGGAI-Fupthh+lsppsppu..ht.t...............h..shuhst.ss...Pshst......................................Ps.....................s.hssss...............ssssssa.sssssts.hhst...PP.PsYsus.....t .....................sGGGa.Gp....spaKLoFpsGGAIEFuQthhpsssp.uppu..h.t.s....................h.ss...hshs..ss...sshsh......................................................hss.....Ps..................uhs..ss.........................................sssssa.ssss....s..s..s.hs..t...PPPsYsss............................................................................... 0 36 61 113 +10182 PF10350 DUF2428 Putative death-receptor fusion protein (DUF2428) Wood V, Coggill PC anon Pfam-B_6748 (release 21.0) Family This is a family of proteins conserved from plants to humans. The function is not known. Several members have been annotated as being HEAT repeat-containing proteins while others are designated as death-receptor interacting proteins, but neither of these could be confirmed. 21.40 21.40 21.40 22.80 20.70 19.80 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.63 0.70 -5.36 33 333 2009-01-15 18:05:59 2007-08-22 14:57:58 4 9 254 0 238 352 4 254.50 26 17.08 CHANGED hcphhscllshspplhphl+sllsp-o...PEGt..........................................hs-pht.......p.shssphllshuWRul+E..uShLLtsll...............................sshlotsplctlGphhhppLtpl+HRGAFpsVh.sFsshCpphhp.....spstplssLPppWLppslphlpsp...........sph....hTRRSAGLPahlsuILsucsss.......shhppshppLlclAchsst..........tsttt...p...............lPQVHAhNsl+sIFpsscLustsssaltcuLpLulcs.FsSssWu ................................................................................................................................hhpcll.hs.pl.ths.t.sll.sssu.......PEGh.....................................................................................spphp............pttsssphlLs..ssWRuhKE..s..uh..LLs..t.lht.h..h...................................................................t..s.sthl..shpp.l....cpl.Gphhh.ptLhphRH+GAFptsh.uFspl...spthhp.............sp..s.....sp..h.....p.p..L..PppWlpp.hlpt.l.psp..........................ssh..phTRRSAGlPhhltulLsu..-sppt..........sllppsh.ppLlp....lAp..sss......................................tt..p.....................................lPpVH..A..l..NhL+slFpco...pL...u...p..ps.s.a...ls.cu.h.phAlps.hsSshWs..................................... 0 85 128 197 +10183 PF10351 Apt1 Golgi-body localisation protein domain Wood V, Coggill PC anon Pfam-B_6317 (release 21.0) Family This is the C-terminus of a family of proteins conserved from plants to humans. The plant members are localised to the Golgi proteins and appear to regulate membrane trafficking, as they are required for rapid vesicle accumulation at the tip of the pollen tube [1]. The C-terminus probably contains the Golgi localisation signal and it is well-conserved. 20.20 20.20 20.20 20.40 20.00 20.10 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.32 0.70 -5.63 41 368 2009-01-15 18:05:59 2007-08-22 15:48:38 4 35 242 0 260 388 2 417.80 21 19.93 CHANGED llp+sShhh....hYsphsshthp.............ssttphcs...........hcplslphPclhssssSpQYhs..lYsIltsLLhas-Phccph.pc+lc+lhhshDhp..Dlpuhs.phlppLQpcl+pLtplppphph+pphLspps....................................htchhhlph-htpstt-Lhhlhpslpssptc..........pp...sspsshpaplpsccllW+llc-sppPhl.-htLssspapRpcsscGSshNplplphlpshNLh.sAhYspllsPa.p.............s.psscpshlclpWphttslGGIsVh-chclslhPLplpl-cchsc+lhpYlFPsppsp....................cpssspctpppsthshsstsssptstps.........................................................................................pt.t..s.tph.spssppt.pspc.tht...................p--lscMhpRuppahsltpl+lsshhlplSYK.....Gpsptpl.sVpDhhhplPslcYcNpshohhDLhhplKKcll+sllpHsG ............................................................................................llppssh.hhY.phs....p..........................t.....t............t.hcphtl.h.p.l..phtssu..tQY.h..hh.ll.sL.Lhas-..P.cc.h.pc+hp+.lhht.-hp...shtt...p.l.phQptl+....p....h.thh....pp.....hphp..h.l..ppt..............................................................................................................ph..hptp.hthptt.....cL..hhhpshpptphp.....................................tp...ssphhhphphthpphhWc.hhpcp........sp...........h..l..-htlpph.a.........s+hppt-.........sos.p...hlplt.h.hhNL......h......s..ss..hY......p..l..ltPh.s................t.scp.hlclh..hp.......hh.sluG.I.s.lh-.p.FE...lslhPLplpLpcphhcchhpahFPs.t............................................ctppspptppts...h.h..ss.sssp.tt..................................................................................................................................................................................................................s......pt.st....s...ttt..pt..ph...tht............................s--lscMhpRupp.....hphh.lKIspl..lplSYK..................Gptp...........t.....sl.....p-h.hhhPslcapNpTaohhDhhhtlK+chh+sllpps.................................................................................................................................................................. 0 84 144 224 +10185 PF10353 DUF2430 Protein of unknown function (DUF2430) Wood V, Coggill PC anon Pfam-B_67886 (release 21.0) Family This is a family of short, 111 residue, proteins found in S. pombe. The function is not known. 24.60 24.60 24.90 210.00 24.10 24.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.50 0.72 -3.87 2 3 2009-01-15 18:05:59 2007-08-22 16:11:30 4 1 1 0 3 3 0 107.00 75 94.13 CHANGED MLLLhhICClFlKhlLApVsLTFV-YAKLPspYAELLANhhsQpGlMLFsTuDlRItAYNYLlNslTEhNsDTDAYLCQLLTGQYTTDCYIFssss.-tPEshNsSh MLLLFCICClFIKLVLAEVNLTFVDYAKLPPKYAELLANLTDQHGlMLFDTADVRIEAYNYLVNNITEINTDTDAYLCQLLTGQYTTDCYIFDDSVYEGPENINPST 0 3 3 3 +10186 PF10354 DUF2431 Domain of unknown function (DUF2431) Wood V, Coggill PC anon Pfam-B_6967 (release 21.0) Domain This is the N-terminal domain of a family of proteins found from plants to humans. The function is not known. 24.70 24.70 25.70 25.00 24.40 23.80 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.31 0.71 -4.29 47 435 2012-10-10 17:06:42 2007-08-22 16:42:22 4 23 253 0 309 432 13 160.90 28 42.77 CHANGED LlVG-GDFSFuhuLhppht...sssLsATohDot.ppLppKYs.pht..pNlptL...cptGspVlaslDspphtpph...........................hpppp.FDpllFNFP.Hs.........G...tt.tp.....sppplp..............tppcLlhsFFpsupplhpt...................................tGclhlohtsGpP................Y.spWslc.tLAtc..sulplhcphcFphp..saP..GYcp+cs ........................................LllG-GsFSFuhuLh......p.......t.......ht........................st.....p..lhATsh-s..ppl...........htcYs..pst......psl.ptL.....cp......s....s..pl..h.aslDspphtpth..................................................................................ptt...aDpIhFNFP.Hs..................................G............pttlt...............................hpppLl.htFFp.s.stph.l.t.................................................................................tGplhlohhpsps........................a.s.Wpl.....thAtp............suhhl..pt.h.Fphptas....GYpptt.h...................................... 0 87 175 257 +10187 PF10355 Ytp1 Protein of unknown function (Ytp1) Wood V, Coggill PC anon Pfam-B_7247 (release 21.0) Family This is a family of proteins found in fungi. The region appears to contain regions similar to mitochondrial electron transport proteins. The C-terminal domain is hydrophobic and negatively charged. There are consensus sites for both N-linked glycosylation and cAMP-dependent protein kinase phosphorylation [1]. 25.00 25.00 29.90 48.70 21.30 22.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.82 0.70 -5.19 36 273 2009-01-15 18:05:59 2007-08-22 17:18:04 4 5 134 0 199 267 0 266.30 38 48.98 CHANGED lhuaVpl...shGslshhGascGpclhssLAHaIhGulFhhYGll.hu.hhGhhhphGhAW....................................ch...t.................ptthStEFh-ShlIhhaGssNsFhEHhuutss....WotpDLQHsSlullaauuGLsGlhlpppt.........................................................s.thuhNshPullIhhTGhhMSpHpQps.lSThlHt.aGhhLhuuuhhRhlphhhlhhcss..................ss.shsspsshphlssFsLlsGGllFMtSo--hlpshcthGh.sthhhhslshuhshLlhsWhhlLlt.l+sa .....................................h.hualphhhGslshhGas+usclhpsLAHaIhGusFhhYGll.....hshhh.lG.sW...........................................c.............................p.sshStEFh-ShlIhhaGs..sNsFhEH..hhG.ts.....WotpDlQHsohu...llaauuGLsGhhlppcp...........................................................s.t..phN.lPullIhl.TGhhMSuHsQp.hlSThlHs.aGhhLhuuuhsRllphshlhhc.s.......................................ss.shssh...sshp.hlssFsLhuuGllFMsu......T--plphhpphGh.sthhhhhlhhuhshllhhahhlllt.lhs............. 0 50 107 167 +10188 PF10356 DUF2034 Protein of unknown function (DUF2034) Wood V, Coggill P anon Manual Family This protein is expressed in fungi but its function is unknown. 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.38 0.71 -5.12 11 178 2012-10-11 20:44:46 2007-08-23 13:34:52 4 3 128 0 140 242 59 117.60 28 61.05 CHANGED STlapGTLYEhpVtphLpppLtshsLc+sGGutDuGlDlhGpWslssh.............s......s..............................hpsLcVlVQCKuhps.KluP+hlRELtGoasphsst...p..pssTlsIlsSPp.hT+sulphhsphslPllah+lsh.p...................htDG....hphs.hNsstLptha.NshuctlL ......................................ossh.GshaEhhs.t.Ltph....hhpL.+hG..Gt.DtGlDlhG.W.ls......................................................................................t.lpllhQCKs.pt...+h..sPt.lREL..Gsh........th...t..........................t.th....h...hhhhs.p.hT.thh..h.p.t.....hPhh.....hh..h.........................ts..................................................................... 0 38 73 116 +10189 PF10357 Kin17_mid Domain of Kin17 curved DNA-binding protein Wood V, Coggill PC anon Pfam-B_7469 (release 21.0) Family Kin17_mid is the conserved central 169 residue region of a family of Kin17 proteins. Towards the N-terminal end there is a zinc-finger domain, and in human and mouse members there is a RecA-like domain further downstream. The Kin17 protein in humans forms intra-nuclear foci during cell proliferation and is re-distributed in the nucleoplasm during the cell cycle [1]. 25.00 25.00 26.10 26.10 20.80 20.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.78 0.71 -4.49 30 331 2009-01-15 18:05:59 2007-08-30 09:51:48 4 10 284 1 245 323 2 125.20 44 35.57 CHANGED Rphtlhup..sspphl-saSppFppsFlpLL+ppaGpK+lpANplYpEaI.pD+cHlHMN................AT+WpSLTcFl+aLG+pGhC+V--o...-+........GhaIpYIDpssEslp+pcthp++c+t-ps-E-pppchlpc.Qlc .........................RQhhlhup..sspcalcpaSp-FppsFlpLL+ppaGpK+V.psNplYpEYI.u-+cHlHMN................ATpWtoLT-FsKaLGR.pGhC+V-ET-K.....................GhaIpaIDc..sPEslpRppt.hp++c+t-hsDEE+ptchIccQlc................ 0 81 133 200 +10190 PF10358 NT-C2 Eeig1; N-terminal C2 in EEIG1 and EHBP1 proteins Wood V, Coggill PC, Zhang D, Aravind L anon Pfam-B_7857 (release 21.0) Family This version of the C2 domain was initally identified in the vertebrate estrogen early-induced gene 1 (EEIG1) [1], and its Drosophila ortholog required for uptake of dsRNA via the endocytotic machinery to induce RNAi silencing [2]. It is also in C.elegans ortholog Sym-3 (SYnthetic lethal with Mec-3) and the mammalian protein EHBP1 (EH domain Binding Protein-1) that regulates endocytotic recycling and two plant proteins, RPG that regulates Rhizobium-directed polar growth and PMI1 (Plastid Movement Impaired 1) that is essential for intracellular movement of chloroplasts in response to blue light [2]. 25.20 25.20 25.20 25.30 25.00 24.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.67 0.71 -4.70 76 744 2012-10-10 12:23:49 2007-08-30 13:20:29 4 14 252 0 483 711 0 146.10 20 19.43 CHANGED ppcphKaphplplcclpshst......................ssplhlph.+csspt..........................................ttpstph.lppspspap....pphphssplhhs.....tcst......hppKhhpl.lht.....................................tpspp......hlGpsslslupass.......................tpstspphhhpps.....tps.suplplslphp.hpts.pt .................................................................................pt.Khph.hphpcl..s.sh......................sshlhlph.phhstt...................................................................................................phpsshtp......l.t......p.s..plpWp......cp.hphs.splhts..........spst.....................h.csp...hphsltpp..........................................................................................tsu+p....thLGpsslNLucassts........................................................................................tss.shph..hLcsh......ts..suhLplslph.hhpts................................................. 0 138 264 379 +10191 PF10359 Fmp27_WPPW RNA pol II promoter Fmp27 protein domain Wood V, Coggill PC anon Pfam-B_8838 (release 21.0) Domain Fmp27_WPPW is a conserved domain of a family of proteins involved in RNA polymerase II transcription initiation [1]. It contains characteristic HQR and WPPW sequence motifs. and is towards the C-terminal in members which contain Fmp27_SW Pfam:PF10305. 23.20 23.20 23.50 24.60 23.10 23.10 hmmbuild -o /dev/null HMM SEED 475 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.55 0.70 -6.08 29 179 2009-01-15 18:05:59 2007-08-30 16:16:13 4 19 134 0 139 186 1 450.00 27 17.85 CHANGED GLKu+h-shhlDLHQR+E.hpphp.....ctLs..+pp+lh+h+hptu-lchpshDlRslsAtFppsshsshhpst.p..pt.......................................asI.DsD...hsWhDhcDFlElchhtspss.P.chcIhPLhaoP+FoYhRcsstpsp.sspst...........................FGsEsoHsChh.tpscPtpsQhcLlc-RlppLccplpphpctltphp...........s.t.p.tphcph.pplppLtc+lchlpshlpchpppp....ptpptssssp..tp.t.t..............................ssstpssusFcNRFhlHNhpLKWNsslRshll+YlHplspR+uhsaahSp+ulchlp-llccppctppssppphpp............p......ppsppss...........................pphh-ph-chLp-spp...............................s.sapsppsYhlclIuPQIQLpS-csPcusVLloA.shcl+llsl.....hDppssssslss.llppRaslhhcsuphFVhp+c-h.sh..thhassssYGsp....tosWPPWLslEls ....................................................................................................GlKs+h-shhlDLHQR+E.hpths.....cths..pp.pcs.ph+hptuplchtssDlRslsuthttsshpphhp..st.t...t......t...s................................phsl.D.pD...hsWhDh-DFlE.lphh...sp.ps.P...chcIhPlhasPRhoYaRpsshs.th..sps..........................................................................FGsEsoHtChh.tpscPtplQhpLlpcRlppLctphpphpctltc.p.............t.t.t...tphc.h.pphp.Lpp+hphlpshlpchptp.....ttptt..t.ss......tp..t.....t..t........t...................................................ssstphtupFcNRFhlHNhpLKWNsslRslll+YhapsspR+uhhaahSpcAl+hl.-llc...cptpsppp.pttttpp........................t.......tp..tpppss...........................psh..-phcplLp-spp..................................................s..chssppsYhlcLIuPQIQLpS.-....cs.c....usllloApshpL+llsl........hDppphs.s.slss.lVppRasl.hcssQhFlhppcph.p........h.hassstYGs.....tstWPPWlshEh........................................ 0 38 79 125 +10192 PF10360 DUF2433 Protein of unknown function (DUF2433) Wood V, Coggill PC anon Pfam-B_83000 (release 21.0) Family This is a conserved 120 residue region of a family of proteins found in fungi. The function is not known. 21.40 21.40 21.70 22.10 21.20 20.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.80 0.71 -3.83 10 111 2009-01-15 18:05:59 2007-08-30 16:29:41 4 7 104 0 85 111 0 122.90 51 18.13 CHANGED YGlSYNEFSVssoh.......EsYRuKLupu+ssFs-lW-sVKsEV-.ssls........pp-sQppLLchuLulh-Kl.Psusssu.shhhshsh.ps.......tsAhh..shWpasLs..DuuaGphlLp.IsDu+luuEhpupGFNhucR ....................YGSSYNEFSVNPoL.......DHYRGKLAASKASFNDVW-TV+uEVE.sAIs.........pN-uQpsLLcNALslV-KM.PosAsGGN..PF.GG.Pssss.su...t.....GplDESAFK..NhWNFNLA..DAAFGaLVL-.IpsGRIuTEMRAQGFNFuHR........................... 0 27 50 73 +10193 PF10361 DUF2434 Protein of unknown function (DUF2434) Wood V, Coggill PC anon Pfam-B_84994 (release 21.0) Family This is a family of proteins conserved in fungi. The function is not known. 25.00 25.00 63.30 47.80 19.70 18.60 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.82 0.70 -5.35 10 63 2009-01-15 18:05:59 2007-08-31 08:48:38 4 3 61 0 51 64 0 270.70 46 54.05 CHANGED YSNGTLSNGopCYLuFspapPph....hpNGTFlNGTSCYuPlpsIGs+uulGlAaAlhFululhlTLlNLRKHG+paLPs-KRaphl..................GRRhpWaWhLFluACGhISsFhoIDVDRsYL.usPllLpSlFapLhhPGhhAAVWEAVRHWGSWQERQllD..+DPaAFscsupRp+tEhlLPllFYlFshlNFFLsVPRSWouIEhQRs.-QphppApPsATDsRFKAAuFltluuhLllsYSLcHSIY+Y+s+spushsp.llFalptsPspFllsl.....sLsulplGYulAuAFsaslSP...L+hs ......YSNGTLSNGS..pCaLsFp.YpPth....hsNGTFlNuToCYsPlpslusRuulGluFAshFulslhhoLlsLpKHG+haLPh-KRah.l..................GRRWQWYWhlFlsAsuhlShFhslDVDR.YL.thsllLpShFaaLhh.uhlAhVWEuVRHWGSWQERQhhD...Dsa..shp.pcspRp+.EhaLPLlFYlFhaLNFFlslPRSWstlphQRo.-QphshAtPsATDsRFKuuuhhhhsshllIsaSLtHSIh+Y+s+spuhhsp.hhhh.lphsPh+Fhl..sI.....sLsulhluathhsuFpashSP...l+h........................................ 0 8 22 39 +10195 PF10363 DUF2435 DUF2435; Pmp3; Protein of unknown function (DUF2435) Wood V, Coggill PC anon Pfam-B_7476 (release 21.0) Domain This is a conserved region of approximately 400 residues which is found only in eukaryotes. It is associated with HEAT domains Pfam:PF02985 in all members. The function is not known. 23.00 23.00 23.00 23.30 22.90 21.80 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.13 0.72 -4.09 40 249 2011-09-19 22:45:39 2007-08-31 11:17:15 4 8 223 0 180 258 0 93.40 27 9.87 CHANGED ssppthccAlpsLsDPLlPlRA+GLhhLppLlppcS...............slhslstlLslaLppLcDp.......DsFlYLNuI+GLusLs-hpsc.plLppLsphYsspspp ............................................s..pthpcslppl...p..D..s...sP.lRAtGLphLppLlcpcs............................slhp.h.ptlL.p.lhL.p.tLpcp.......DsalYLsuIps..l....ssLushh.Pc.plltpLhcpYhstp..p...................... 0 57 93 146 +10196 PF10364 NKWYS Putative capsular polysaccharide synthesis protein Wood V, Coggill PC anon Pfam-B_99492 (release 21.0) Family Found only in Vibrio species, pombe and one other fungi, this is a the N-terminal 150 residues of a family of proteins of unknown function.\ There is a characteristic NKWYS sequence motif. 21.40 21.40 21.90 31.50 19.20 20.80 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.85 0.71 -4.44 6 61 2009-01-15 18:05:59 2007-08-31 15:01:44 4 5 39 0 17 63 6 135.80 35 42.11 CHANGED s-thhpDspshchERsphLhE.sF-cahNppYshpWFDNEIKplhGIDVaspsFspssGaQTapNcphSllVI+s-KLsp.tspslu-FLs.pshslVp-NpucNKWYSslhp-FKsoYp.ss.Fl-cMhsS+Lo+HFaops ......................h-thhpDs.thc.-hs..h.E.sF-p..ahNhpY.hsWFDNElKpshGIDVhppsFDpspGa.s..apscphslllI+sEpLsp.hspslu-F..Lsl.p...shslp.p.sNpucNKWYSs..lapcFKppaphsptahppMYsp.Lh+HaYop...... 0 4 9 13 +10197 PF10365 DUF2436 Domain of unknown function (DUF2436) Mistry J, Wood V anon Pfam-B_5683 (Release 22.0) Family This domain is found on peptidase C25 proteins and has no known function. 25.00 25.00 25.40 33.50 21.20 20.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.98 0.71 -4.61 3 36 2009-01-15 18:05:59 2007-09-05 10:47:55 4 9 9 0 3 38 1 156.60 60 14.28 CHANGED ARpVcGIu-ulhVolEDAs-.lRoGpAcIVLsAcsVWsDuSGYQFLLDADHDTYGsVIP.DTGPLasNsoVPSNLY.AsFEYpIPuNADPusoTQNhIssG.oAcVsIPuGTYDYsITNPpP.suKlWIAGsGGspPARhDDYVFEAGKKYTFTM+KsGSGDGT-L ................uRpVctIt-hlhlolcsAs-.lRA.spA+lVLtAcsVWGDsTGYQhLLDADHNpaGuVIP.sTGsLas.s.ossusLY.AsFEYhlPuNADPssTspNhIlsG.pupVsIPuGsYDYsIsNPpP.suKhWIAGDG.stPuRhDDasFEAGKKYpFTM++sG.GDGT-h.......... 0 3 3 3 +10198 PF10366 Vps39_1 Vacuolar sorting protein 39 domain 1 Mistry J, Wood V anon Manual Domain This domain is found on the vacuolar sorting protein Vps39 which is a component of the C-Vps complex [2]. Vps39 is thought to be required for the fusion of endosomes and other types of transport intermediates with the vacuole [3]. In Saccharomyces cerevisiae, Vps39 has been shown to stimulate nucleotide exchange [1]. The precise function of this domain has not been characterised. 21.40 21.40 21.40 21.50 21.30 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.46 0.72 -4.03 23 369 2012-10-11 20:01:01 2007-09-05 13:18:27 4 21 253 0 253 369 0 105.10 34 11.14 CHANGED lDTsLh+sYhhs.pssLlusLLRl..NaC-hchscphL.............ccpppap-Ll-hYas+phHccALcLLpcluc.............ttss.hpshppt.......lpYLppLssspl-Llhc.....aucWlL ....................lDTsLh+sYhts...ss......s.........l.lusLlR..........lt..NaC..clccs....pc.hL.............ccpp.+as-Ll.LYp..tKshHccALplLh.chup..........................pts.sshpshpcs.......................l...pYLppL.s..s..ppl....cLIhcautWlL............................................. 0 75 128 200 +10199 PF10367 Vps39_2 Vacuolar sorting protein 39 domain 2 Mistry J, Wood V anon Manual Domain This domain is found on the vacuolar sorting protein Vps39 which is a component of the C-Vps complex [2]. Vps39 is thought to be required for the fusion of endosomes and other types of transport intermediates with the vacuole [3]. In Saccharomyces cerevisiae, Vps39 has been shown to stimulate nucleotide exchange [1]. This domain is involved in localisation and in mediating the interactions of Vps39 with Vps11 [1]. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.39 0.72 -3.88 24 612 2009-09-13 14:58:24 2007-09-05 13:18:58 4 31 283 0 445 603 4 104.40 22 11.03 CHANGED LclLspHus+lsshpsLpL.............LPsshslpp......lpsalppslRppsppp+psplhpsLhpucplpspcphh..............ptcspplhls-pph.CslCcK+lGs.SsFshaPsu.slVHatCtcc .........................plLppaus..pl.cst.plLpl.............l..Ps..shslpp...............lps...aLtpsl.c..p.ts.p..ptc..p..splhps....Lt.p.u..c..t..l..p..lp.tc..hh.........................pt.pp.pthhl.sp.pph.C.hCpc......tl......s...........s......s.....s.........hsh.a..Ps.s...hhh.Hh.C...p........................................ 0 180 269 381 +10200 PF10368 YkyA Putative cell-wall binding lipoprotein Coggill PC anon Gene3D, pdb_2ap3 Family YkyA is a family of proteins containing a lipoprotein signal and a hydrolase domain. It is similar to cell wall binding proteins and might also be recognisable by a host immune defence system. It is thus likely to belong to pathways important for pathogenicity [1]. 22.00 22.00 22.10 22.00 21.90 21.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.32 0.71 -4.98 17 458 2009-09-13 15:40:04 2007-09-05 15:38:52 4 3 358 1 37 198 2 195.40 42 93.56 CHANGED shhuhhlLsGC.hspcst-plasthEpAscpE+.shppptcpLppLEcccpcLYppllp.sh-c.cplhphs-pAlpsspcREchlppEK-ulccupcEhcsscphh-cI-Dcch+cpAcplscshccRYcuapphhcsYpculph-K-LYphLpccchshcpls-plcslNpsYcclpctpccFNchTccYNctK.sFYctutlc .................................................................................s.hhSssLLAGC..hscK.....csh...h...pth-phtcpEc.slhssuKKlpcL-cctpcLappl.....spc..cstshtKtlcphlcNsD-RtK.hcKEc-ulcKupp-hKpAcsal-pI-sc....th....+Kp....scpl-cshKc+YchaschscuYpKAl....spEKpLachLppp..-sp.pslsEKsKslspsYKclpc.p-caschhpchscEK.sh.p..................................... 0 7 19 28 +10201 PF10369 ALS_ss_C Small subunit of acetolactate synthase Coggill PC anon Gene3D, pdb_2fgc Family ALS_ss_C is the C-terminal half of a family of proteins which are the small subunits of acetolactate synthase. Acetolactate synthase is a tetrameric enzyme, containing probably two large and two small subunits, which catalyses the first step in branched-chain amino acid biosynthesis. This reaction is sensitive to certain herbicides [1]. 22.60 22.60 23.00 24.00 21.80 22.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.13 0.72 -4.25 160 3802 2009-01-15 18:05:59 2007-09-06 10:36:43 4 13 3503 7 996 2241 1813 73.70 39 42.97 CHANGED cRELhLlKVpu.ss..psRsElhplsclFRA+lVDVstco.hslElTGsssKlcAhlcllcs.aGIhElsRTGhlAls.RG ..........pRElhLlKVpA..su.....psRs....Elhchs.clF....Ru..pllDVoss..s.hslp.l.........oGsscKl-Ahlphlcs..hu.I.hElsRoGssulsRG........ 0 309 658 854 +10202 PF10370 DUF2437 Domain of unknown function (DUF2437) Coggill PC anon Gene3D Domain This is the N-terminal 50 amino acids of a group of bacterial proteins annotated as fumarylacetoacetate hydrolase-containing enzymes. In most cases members are associated with FAA_hydrolase Pfam:PF01557 further towards the C-terminus. 21.30 21.30 21.30 21.50 21.10 21.20 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.24 0.72 -3.58 44 659 2009-01-15 18:05:59 2007-09-06 11:59:18 4 1 623 6 229 541 241 53.70 35 20.45 CHANGED M.+lsRF....sssss.spaGhlE.G-s........lt..lsGsP.F.sshp.TGpphsLu-..V+LLAP ............M.RluRF......stsss..ss...aGhl-..sss................lt..lsGcP..a..u.......ss..ph..T.......G..pp.asLs-..VRLLuP............. 0 74 162 205 +10203 PF10371 EKR Domain of unknown function Coggill PC anon Gene3D, pdb_2c42 Domain EKR is a short, 33 residue, domain found in bacterial and some lower eukaryotic species which lies between a POR (pyruvate ferredoxin/flavodoxin oxidoreductase) Pfam:PF01558 and the 4Fe-4S binding domain Fer4 Pfam:PF00037. It contains a characteristic EKR sequence motif. The exact function of this domain is not known. 19.40 19.40 19.40 20.30 18.60 18.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.90 0.72 -4.40 113 1931 2009-01-15 18:05:59 2007-09-06 16:54:57 4 33 1656 20 330 1395 43 62.40 40 5.31 CHANGED ss....ttp..tts........hst..ps.........P.........cFVpplhtshhutcGDpLPVSuhs....tDGTaPsGTupaEKRsIAl ..........................................ssps..........hhss...ss.....P..........-FV+slstsh.AtpGDsLPVSAhs....sDGTaPhGTopaEKRsIA...... 1 159 261 304 +10204 PF10372 YojJ Bacterial membrane-spanning protein N-terminus Coggill PC anon Gene3D, pdb_2fb5 Family YojJ is the N-terminus of a family of bacterial proteins some of which are associated with DUF147 Pfam:PF02457 towards the C-terminus. It is a putative membrane-spanning protein. 20.10 20.10 20.20 23.20 19.80 18.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.31 0.72 -4.11 6 127 2009-09-11 10:46:57 2007-09-06 17:03:14 4 1 127 3 15 76 0 68.40 58 33.78 CHANGED McphphSEtpaKsphppalcpIpt-huhlhpTlDEcDpClLC-hE-LpHhhs-hQslASSaYLQoYlp.F ..........MpEWGLSE.ELKIQTKQMIElAE+ELS....lMRpAID..KEDECILCKMEDIHHhLuNVQTLAATYYIQAYLSPY 0 5 9 10 +10205 PF10373 EST1_DNA_bind Est1 DNA/RNA binding domain Mistry J, Wood V anon Pfam-B_24280 (release 22.0) Domain Est1 is a protein which recruits or activates telomerase at the site of polymerisation [1][2]. This is the DNA/RNA binding domain of EST1 [3][4]. 21.20 21.20 21.20 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.67 0.70 -5.19 69 696 2012-10-11 20:01:01 2007-09-07 14:14:08 4 18 255 2 489 771 76 252.20 17 29.44 CHANGED AhpYYphAtplhPssGpsaNQLulluhh.s...ssc.......................hpAl.YaahRSl.hsppPh.ssApsNLhthacc..........................................................................htphhptp.ht..........pttphhpphhhhFlpLhuhhaps..........hshpphspltppl..hpplphhLpp.........................................thhstphll+hlslslhshchhpp..................................t.......s...hphhhthhthlhpphsphh.............................pptsssspphLssl+l..hhsW..lttpsshhp...............ptpptphtt...hhthhsphlshhpth.....................h.sppts...pshhL.EDhp.......hpuFtPlttsh ...................................................................................AtpaYhpAhplhPps.Gp.sasQLAlLuh..p.........tpp......................................lpsl.a.aYhR....ul.ss.p..P....a...sAppsL.phhpc........................................................................................................t.p...............ptt..hpp..hhh..Fl.thpuhl.aht.........................shp.....p..h..t.t..htpph...............hpph.ph..hl.p.............................................h.st..h....hphhh....ls.hhshpthp..............................................................................p.....hthhht.hhthlhpth......................................................................................................t.......t..Ls.lph.....ta..hh............t...h..........................................................................h...h...hh............................................h.h.E-.........h.th.......h................................................................................................................................................................................................................... 0 147 237 365 +10206 PF10374 EST1 Telomerase activating protein Est1 Mistry J, Wood V anon Pfam-B_39673 (release 22.0) Domain Est1 is a protein which recruits or activates telomerase at the site of polymerisation [1][2]. 21.70 21.70 21.90 21.70 20.90 21.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.52 0.71 -4.16 27 478 2009-01-15 18:05:59 2007-09-07 15:36:39 4 10 230 2 319 468 0 120.60 24 12.61 CHANGED ttpclcphLWcphaYplhpha+ph..................tpp.ph..phcplppth.palcpuhtFYpsllppltspYp.ls.hphh.......................................................................t....t.pth.thssph.phslhssaRsLlhLGDlpRY+s.htpss ...............................................h..pplEphLWppsahphIpt.h+t...............................................tpppps..sptp.clp..st..hppa.LctupsF....Ypp..Llpclps.pap..lpl.phh.................................................................................................................t......t.tt.p.t.hs.s.hsp.tt.hp.h...s.hh.ssp+CLlpLGDluRY+ppht...s............................................................ 0 90 150 233 +10207 PF10375 GRAB GRIP-related Arf-binding domain Wood V, Coggill PC anon Wood V Domain The GRAB (GRIP-related Arf-binding) domain is towards the C-terminus of Rud3 type proteins. This domain is related to the GRIP domain, but the conserved tyrosine residue found at position 4 in all GRIP domains is replaced by a leucine residue. The Arf small GTPase is localised to the cis-Golgi where it recruits proteins via their GRAB domain, as part of the transport of cargo from the endoplasmic reticulum to the plasma membrane [1]. 19.70 19.70 20.90 20.80 19.60 18.30 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.23 0.73 -6.26 0.73 -4.00 14 129 2009-01-15 18:05:59 2007-09-07 16:52:08 4 3 124 0 96 131 0 19.00 51 3.40 CHANGED psccsVD+clloNlllsFL ...ps--sVD.R...cLVTNhlLpFL 0 28 56 83 +10208 PF10376 Mei5 Double-strand recombination repair protein Wood V, Coggill P anon Wood V Family Mei5 is one of a pair of meiosis-specific proteins which facilitate the loading of Dmc1 on to Rad51 on DNA at double-strand breaks during recombination. Recombination is carried out by a large protein complex based around the two RecA homologues, Rad51 and Dmc1. This complex may play both a catalytic and a structural role in the interaction between homologous chromosomes during meiosis. Mei5 is seen to contain a coiled-coli region. 21.10 21.10 21.60 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.41 0.70 -4.82 8 105 2009-01-15 18:05:59 2007-09-07 17:46:31 4 3 85 0 69 109 0 182.80 26 84.00 CHANGED ssslssppssh.h.pt.ssss.pstps+pPhusoL+E+L..+csRhpppshss.lKcl+l-spcscpshst.sts.............cph-SEspppps...h.c.+slCp.-ppphpsuosstls+..uhpchh+ccLcpcKt+Lp+Qlc-..........cpDpLRRLclV+hhchKNp.pcLppLI+KW++suQptLpcLpuhlu-..............pEs-..........chTLoELlspaGlD.sLlaaN ..............................................................................h................................h.tshtp.h..........pcht.p.pp...hp.hp.hp.cppp..p.t...........................hh.st..pt.p..p.h..p.hshs....p.p..h.ssshs..t.hp.p.h.s.pch.cpplpp.-Kt.......+L....cQlp-...........cc-hLR+LchVcha+.tKsp.ppLptLIcKWRsssQ.hLh...-Lp.pth.sp..........................................................-sc.................chohspLlcphslD.pLlaa.................................... 0 21 32 49 +10209 PF10377 ATG11 Autophagy-related protein 11 Mistry J, Wood V anon Pfam-B_21462 (release 21.0) Family The function of this family is conflicting. In the fission yeast, Schizosaccharomyces pombe, this protein has been shown to interact with the telomere cap complex [1,2]. However, in budding yeast, Saccharomyces cerevisiae, this protein is called ATG11 and is shown to be involved in autophagy [3]. 21.40 21.40 21.50 23.60 20.30 21.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.94 0.71 -4.25 28 284 2010-01-13 17:17:44 2007-09-10 14:03:15 4 6 233 0 202 291 0 141.20 27 11.99 CHANGED apsulh+RhcDlEph.....A+KlpK-s+stcpchpphtpc...KIuhcsFchGDLsLFLPTRpp..................................................................tt.ps................WAAFNl..usP+YFL+spstt.................................plcs................+-WhlGRlsclEcpsV.......................................s.cssNPFcLucGspWYhV-App. .........................................................................................cph.p.+p.hpt.pp..p....hp.phpp-...........KIuh+sFphGDLsLFl.scpps....................................................................................................................................asAFsl....usspYFL+ppsht......................................................plps.............+sWll.u+lhchEpphsc.........................................cspN.acLshGs+aYhlcss..................... 0 59 107 164 +10210 PF10378 RRM RMM; Putative RRM domain Griffiths-Jones S, Coggill PC anon Griffiths-Jones S, Domain This is a putative RRM, RNA-binding, domain found only in fungi. It occurs in proteins annotated as Nrd1 yeast proteins, which are known to carry RRM domains. It is not homologous with any of the other RRM domains, eg RRM_1 Pfam:PF00076. 19.30 19.30 19.60 22.50 18.90 18.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.66 0.72 -4.38 7 104 2009-01-15 18:05:59 2007-09-10 15:38:39 4 10 104 0 79 99 0 54.50 47 6.11 CHANGED .hP....hP.TPFDhsYGtSLLPSpLLhGSPalssPtpsssh.usphusthshstpphp .........t.........PsT.s.FDMsa.sPLLPSQLLlGSPF..QPGoPuuF.sSPQFpshtshtptp..t.......................... 2 12 38 65 +10211 PF10379 nec1 Virulence protein nec1 Mistry J, Morningstar A anon Pfam-B_11405 (release 21.0) Family This is a family of virulence proteins that are found in pathogenic Streptomyces species. 20.00 20.00 20.60 24.50 19.30 16.50 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.28 0.71 -5.01 2 20 2009-01-15 18:05:59 2007-09-12 16:07:43 4 1 14 0 1 7 0 165.00 91 84.55 CHANGED hNLKIRTLGDLMRTSGVTPKTQSSSPKRRVLTSLATILAASGVVVANPSAAFANSTFTAVGYCDTNYQCAGGSGSSSRWSVNFDDGPTVSTIDLHELYRDQSDTMSSFRILGSVMSRANHPNETVTIHQQFYRDNGGQVPLGEYETRFRASSSNNAQRFNFDQGIPNLPWNDQVSSVAIWITRK .........VNLKIRTLGDLMRTSGVTPKTQSSSPKRRVLTSLATILAASGVVVANPSAAFANSTFTAVGYCDT.NYQCAGGSGSSSRWSVNFDDGPTVSTIDLHELYRDQSDTMSSFRILGSVMSRANHPNETVTIHQQFYRDNGGQVPLGEYETRFRASSSNNAQRFNFDQGIPNLPWNDQVSSVAIWITRK............ 0 0 1 1 +10212 PF10380 CRF1 Transcription factor CRF1 Mistry J, Wood V anon Pfam-B_25525 (release 21.0) Family CRF1 is a transcription factor that co-represses ribosomal genes with FHL1 via the TOR signalling pathway and protein kinase A [1]. 25.00 25.00 32.80 32.80 21.80 21.60 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.81 0.71 -3.78 13 51 2009-01-15 18:05:59 2007-09-13 14:50:17 4 1 43 0 32 51 0 121.50 44 16.05 CHANGED sESTDE...............D-oLP...................ssss+s+h..hupKAKEVLSS.............................................................pssshRPPhLGTW.phDsKPFuIIDGLST+SL..................hs..ppppp.p-............p..........................ptst.sss.pppssssus-sssLs.....................LsELL.............................................Nh.............SELDD- ....sESTDE...............D-sLP...................sPss+pKp..hupKA+El..lSS.............................................................pslG.l+PPKLGoW..ph-s.KPFoIIDGLSTKSL........................ash.pptpp.pp................................................pppspss...pppppssuspsspLs.....................LsELL................................................Nh..........SEL-s..................................................................................... 0 6 18 30 +10213 PF10381 Autophagy_Cterm Autophagocytosis associated protein C-terminal Finn RD, Wood V, Coggill PC anon Pfam-B_10019 (release 7.3) Domain Autophagocytosis is a starvation-induced process responsible for transport of cytoplasmic proteins to the vacuole. The small C-terminal domain is likely to be a distinct binding region for the stability of the autophagosome complex [2]. It carries a highly characteristic conserved FLKF sequence motif. 20.10 20.10 21.30 22.70 15.80 19.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.65 0.72 -6.81 0.72 -4.82 20 317 2009-01-15 18:05:59 2007-09-18 14:32:30 4 5 286 1 226 312 3 24.90 65 7.80 CHANGED Vc.YLhlFLKFlsSVlPTIEaDhTh ...Vc.YLllFLKFlsSVlPTIEYDaTh.. 0 80 125 186 +10214 PF10382 DUF2439 Protein of unknown function (DUF2439) Mistry J, Wood V anon Pfam-B_19050 (release 22.0) Family Proteins in this family have been implicated in telomere maintenance in Saccharomyces cerevisiae [1] and in meiotic chromosome segregation in Schizosaccharomyces pombe [2] 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.89 0.72 -3.77 22 223 2009-01-15 18:05:59 2007-09-19 16:23:01 4 16 177 0 160 201 0 81.80 29 12.84 CHANGED VtE.apCLYTsplppKpKpWpDGhL+aa..phNs+l.Lasp.ssshlsstahppp.......p.hs.spEhcl.p.thLlpl...s-hhpphpp-ls ............c.apsLYT.cph.p+.Kp.KpW.p.DGhL+hp....phsp+hhLY..D-...pushlsshahpst..............................ttlpsGc-h.chpp..hLlpl...p-hht...................................................................... 0 43 78 124 +10215 PF10383 Clr2 Transcription-silencing protein Clr2 Wood V, Coggill P anon Wood V Family Clr2 is a chromatin silencing protein, one of a quartet of proteins forming the core of SHREC, a multienzyme effector complex that mediates hetero-chromatic transcriptional gene silencing in fission yeast. Clr2 does not have any obvious well-conserved domains but, along with the other core proteins, binds to the histone deacetylase Clr3, and on its own might also have a role in chromatin organisation at the cnt domain, the site of kinetochore assembly. 25.00 25.00 26.10 27.80 24.30 24.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.93 0.71 -4.18 13 79 2009-01-15 18:05:59 2007-09-19 16:32:14 4 4 73 0 63 83 0 132.90 27 19.30 CHANGED lappGIFlGAEhLhVGDsVRL...........tsh.ht.t...psstssssDVMVI-....EIplclhpsssshpu.......pVRlsGclYTsscppA.h..p.sshs..sspP........hsh-EVhsphp.hlGMuuhucW.............apLhs.s........sslclopshVlGRhYE ....ahpGlFlGAEhlhlGDsVRL.............hsht..p.s...............ssts..pss-lhlIc....pIphchhs.sssshts..........slplhGclYshs.ppu.tt....s.....spP..................hh.cElhpphp.hl.....shtthupW............................athht.t.............psspls.splhGRhY...................................................... 0 14 32 50 +10216 PF10384 Scm3 Centromere protein Scm3 Mistry J, Wood V anon Pfam-B_19394 (release 21.0) Family Scm3 is a centromere protein that has been shown in Saccharomyces cerevisiae to be required for G2/M progression and Cse4 localisation [1]. The C terminal region of Scm3 proteins is variable in size and sometimes consists of DNA binding motifs [2]. 24.30 24.30 24.30 25.90 23.60 24.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.78 0.72 -4.48 35 154 2009-09-14 14:05:34 2007-09-19 17:11:47 4 9 141 3 104 165 0 57.60 34 9.22 CHANGED pcltpt+ppucp+hKstacsIh-KYup.-.cs.uDEIDL...pTGc...IVhDNGHL+slpscp ............cltpt+ptucp+h+stappIh-KYsp....-s.u.DEIDL...pTGp...IlhDNGHlcphpsp.t.............. 0 25 51 80 +10217 PF10385 RNA_pol_Rpb2_45 RNA polymerase beta subunit external 1 domain Coggill PC anon Gene3D, pdb_2a6h Domain RNA polymerases catalyse the DNA-dependent polymerisation of RNA. Prokaryotes contain a single RNA polymerase compared with three in eukaryotes (not including mitochondrial or chloroplast polymerases). This domain in prokaryotes spans the gap between domains 4 and 5 of the yeast protein. It is also known as the external 1 region of the polymerase and is bound in association with the external 2 region [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.04 0.72 -4.17 149 8806 2009-09-11 00:06:38 2007-09-21 14:13:17 4 34 7040 41 964 6923 2365 66.20 46 8.12 CHANGED YR+V.....ps.............GpV...oc-lhYLoA.-EcpahIAQAss..sl..........-ccG.phh....s.-hVhsR.hpu..-h...hhssspcV-ahDVS .....................YR+V......ts........................GhV....TDE.l..pYLoA.EEs.palIAQANu..sL..........D.-.c.G....pFs..........-.-hVssR..ppG.......Es................thhs...-cVDYMDVS........... 1 338 646 822 +10218 PF10386 DUF2441 Protein of unknown function (DUF2441) Finn RD, Coggill PC anon Gene3D, pdb_2aua Family This is a family of highly conserved, predicted, proteins from Bacillus species. The structure forms a homo-dimer. The function is unknown. 25.00 25.00 25.00 26.60 24.70 24.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.70 0.71 -4.15 2 90 2009-01-15 18:05:59 2007-09-26 11:48:52 4 1 90 2 4 56 0 137.10 76 70.61 CHANGED NTLaHFFFEREpLNusGcDuhQILpcHYpNpELHIpNENApVVMsYMDQTIRAhRETIVEMlRLQEaPpYPSRLSCLYAAKSYEDALKWKALFDSYNREVLQIVKLpVIGsSFEGDGNLLPKEDuIPFSQKhEQAREYWKG .....................................NTLYHFFFE+EQLNusGEDuhpIlppHYKN-ELHINNENApVVMNYMDQTIRAlRETIVEMVRLQEaP-YPSRLSCLYAAKSYEDALKWKALFDSYNREVLQIVKLRVIGsS.FEGDGN.LLPKEDGI...PFSQKhE..QAREYWKG.... 0 2 3 3 +10219 PF10387 DUF2442 Protein of unknown function (DUF2442) Finn RD, Coggill PC, Bateman A anon Gene3D, pdb_2auw & Pfam-B_2245 (release 23.0) Domain This family of bacterial and fungal proteins has several members annotated as being putative molybdopterin-guanine dinucleotide biosynthesis protein A; however this could not be verified. Hence the function is not known. This family also includes the DUF3532 that was found to be related and was merged into this family. Members of this family also fall into the NE0471 N-terminal domain-like superfamily, a family of proteins with a unique fold in SCOP:143880. 21.10 21.10 21.20 21.40 21.00 20.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.66 0.72 -3.96 153 848 2009-01-15 18:05:59 2007-09-26 12:29:25 4 11 504 5 290 765 94 71.90 20 66.29 CHANGED shsVphtss.....plhlphsD....Gcphtlsls.h...hppushppl..........pphplt.tstulpWs........shD.DluscuLhtstts .................hpVphhpsh.plhlphs-....Gpphhhshp.hh......ppsshppL.........h...ppspl...p.st..slpWs..............s.s.hDlsspsLht....t................. 0 92 202 258 +10220 PF10388 YkuI_C EAL-domain associated signalling protein domain Finn RD, Coggill PC anon Gene3D, pdb_2bas Domain In Bacillus species this highly conserved region of the YkuI protein lies immediately downstream of the EAL (diguanylate cyclase/phosphodiesterase domain 2) Pfam:PF00563 domain so that together they form a monomer which dimerises for its enzymatic action. The region contains three alpha helices and five beta strands and is the C-terminal half of the structure. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.13 0.71 -4.57 14 277 2012-10-01 23:40:40 2007-09-26 14:17:17 4 6 265 6 64 194 4 149.80 31 33.45 CHANGED hl-cshhK-+Lpp-hcpFIpaE+KKLps.aphsEphppclpph.lsclK+spp.s.Ehlhpluptls-hsFRlYlCDc-GaQhosNhhK.psGpWhlps-ahtKNWSWRPYFLENIh+Mppcp+G..llSDLYoDIETGEhIRTaSYPls-phYLFlDlsYpaLaEp-uLh ......................................................................................hptaht.chpphtt.hph.tp.h.ppltph.lt....p.cpst.p......s..p....hl.....t.....h.sp.t.l..s......ps..s..h..R..IahssccGhQpoGNs..h+.....p......s.....upW.....hlp....s..paht+NWSWRPY..FlcsIh..c.h.+..pp+s..hlS-hYtDlpTsch....hhThS..hs...l...s..s.p.hLhl...Dl.............................................. 0 24 43 52 +10221 PF10389 CoatB Bacteriophage coat protein B Finn RD, Coggill PC anon Gene3D pdb_2ifo Family CoatB is a single filamentous bacteriophage alpha helix of approximately 44 residues. It is likely to assemble into a complex of 35 monomers in a Catherine-wheel like formation [1]. It is the major coat protein of the virion. 24.80 24.80 25.50 25.90 24.10 24.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.13 0.72 -4.10 6 39 2012-10-01 20:22:05 2007-09-26 17:16:45 4 1 34 1 11 37 4 46.50 34 62.44 CHANGED AuuuGlDVu-VssAIpuAtusIuoIGuuVLsVlVul+VaKWlRRuh .............usutuhDlssVsoulsuAtssluslGuAVLslhluItlaKalR+Ah.. 0 1 4 9 +10222 PF10390 ELL RNA polymerase II elongation factor ELL Wood V, Coggill P anon Wood V Family ELL is a family of RNA polymerase II elongation factors. It is bound stably to elongation-associated factors 1 and 2, EAFs, and together these act as a strong regulator of transcription activity. by direct interaction with Pol II. ELL binds to pol II on its own but the affinity is greatly increased by the cooperation of EAF [1]. Some members carry an Occludin domain Pfam:PF07303 just downstream. There is no S. cerevisiae member. 21.60 21.60 23.80 23.80 21.50 19.90 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.86 0.70 -5.18 19 307 2009-01-15 18:05:59 2007-09-27 14:12:00 4 7 103 2 164 239 0 244.00 31 44.81 CHANGED pEstsYGLshuphsss..pholhaVKLT-oAl+AlppaQp.....p...sh+PsIpF.pGspG.................................................................hlpIPpsc.sss...................hppFsFhlSslup-sspG.SaDClpQhhtpsGsspLpsLGsIp-KlslpAosDSYpt....oRpphspsEE-p+s+soh.IKsssp.su++Vph..+ps...................................................................................ssshssssP...ppcpopPh...............................................................................p.sssh+pts............ssssppRshR-RllHLLAL+PY+KsEllhRLp+DGlspp-+psLsslLppVusls.p-ssasL+chhap-lQ.cDWPsYoEs-+phlcphLs+pLs .........................................................................................................................................................................................................................................................................................................................p.......................phh.l+Ls-sshpshpthpt...............ps.ItF..pG.pG.................................................................hlplPtss....sp...............................p.FsFhlSsh.sp-tspG.oh-....s..lpQh....stpGttpLpsLGslp-+l.slpA..s.sDSYph.......s+pphspsEE-.p+..spss...h..IK..sss...h....s.c.psph...pp.s..........................................................................................ths..ssss...ppppspPh.......................................................................................................................................p.t.thpptt..............................sslppRshR.-RllHLLAL+sa..+K.s.ELlh.R..L.p+.-G....l..s.p..-+stlss...hLppV.uphs...t....ct........oa...sL+c.hap-lp.cDWPhYo.-.-pp.lcphh.p...t......................................................................................................................................................................... 0 38 49 91 +10223 PF10391 DNA_pol_lambd_f Fingers domain of DNA polymerase lambda Finn RD, Coggill PC anon Gene3D, pdb_2bcq Domain DNA polymerases catalyse the addition of dNMPs onto the 3-prime ends of DNA chains. There is a general polymerase fold consisting of three subdomains that have been likened to the fingers, palm, and thumb of a right hand. DNA_pol_lambd_f is the central three-helical region of DNA polymerase lambda referred to as the F and G helices of the fingers domain. Contacts with DNA involve this conserved helix-hairpin-helix motif in the fingers region which interacts with the primer strand. This motif is common to several DNA binding proteins and confers a sequence-independent interaction with the DNA backbone [1]. 29.80 29.80 29.80 29.80 29.70 29.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.32 0.72 -4.41 58 544 2012-10-03 02:11:09 2007-09-27 16:59:55 4 29 240 212 323 547 116 52.00 38 9.68 CHANGED lphFsslaGlGspsApcaap.pGhRTL-DL...cp.pt.pLoptQplGlcaY-Dhpp ......hphFsslaGlG...spoApcaap..pGhRoL-...Dl.............+p....ps..pL..opp..QplGlcaY-Dht........ 0 92 151 228 +10224 PF10392 COG5 Golgi transport complex subunit 5 Wood V, Coggill PC anon Pfam-B_24958 (release 21.0) Family The COG complex, the peripheral membrane oligomeric protein complex involved in intra-Golgi protein trafficking, consists of eight subunits arranged in two lobes bridged by Cog1. Cog5 is in the smaller, B lobe, bound in with Cog6-8, and is itself bound to Cog1 as well as, strongly, to Cog7. 29.00 29.00 29.10 29.30 28.90 28.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.38 0.71 -4.19 16 280 2012-10-03 17:31:52 2007-10-01 16:23:45 4 6 245 0 199 280 3 126.10 26 20.85 CHANGED -sFL-ssFsspsFuNslLh.............tspstssssLDlsoslc+lpaDlpElDp+lcphhssst.pLLsphsshpptpu.llsplcsslptLstSacRLcscVlpPaccAtplpssLp+lapTscLLRsshhaltLuppL ........................sFL.sssFssppassphlt.........................................................thslus.Ls+Lthslppl-pclcp.lsspttsL.Lspssshpphps.llptlpsplpsLptuhcRLcpcll-PapphpphpttLp+lptsscLLRpshRhLtLu++L......... 0 63 110 164 +10225 PF10393 Matrilin_ccoil Trimeric coiled-coil oligomerisation domain of matrilin Finn RD, Coggill PC anon Gene3D, pdb_1aq5 Domain This short domain is a coiled coil structure and has a single cysteine residue at the start which is likely to form a di-sulfide bridge with a corresponding cysteine in an upstream EGF (Pfam:PF00008) domain thereby spanning a VWA (Pfam:PF00092) domain. All three domains can be associated together as in the cartilage matrix protein matrilin, where this domain is likely to be responsible for oligomerisation [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.09 0.72 -4.59 14 256 2009-01-15 18:05:59 2007-10-02 16:16:30 4 52 50 3 93 230 0 44.00 40 7.85 CHANGED pls.pEDsCtCEulltFQppVpstlppLspKL-sVo+RLptLEp+lh ...........cDsCtCEulltFQspspu.tlp...p..LT.p....+.......L...tt...hopRlp.LEspl........ 1 5 12 33 +10226 PF10394 Hat1_N Histone acetyl transferase HAT1 N-terminus Finn RD, Coggil PC anon Gene3D, pdb_1bob Domain This domain is the N-terminal half of the structure of histone acetyl transferase HAT1. It is often found in association with the C-terminal part of the GNAT Acetyltransf_1 (Pfam:PF00583) domain. It seems to be motifs C and D of the structure. Histone acetyltransferases (HATs) catalyse the transfer of an acetyl group from acetyl-CoA to the lysine E-amino groups on the N-terminal tails of histones. HATs are involved in transcription since histones tend to be hyper-acetylated in actively transcribed regions of chromatin, whereas in transcriptionally silent regions histones are hypo-acetylated [1]. 21.40 21.40 21.60 21.70 19.70 20.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.45 0.71 -4.17 45 314 2009-01-15 18:05:59 2007-10-02 16:40:42 4 9 267 3 220 310 2 156.60 28 35.93 CHANGED sssuN-AlplplV..............................p....sstphp..t.......ss.FpPpFTa.IFG-sE..pIFGYcsLpIpLhasusohcsalplpYspK.......sshphsD....lpppLtc..hlPp......................s.hhpscp.....-Fhpsl..pp......................cpcs.ac.PsGp..hl.csa.......................s..ss....tp...ac......................................................................Iapsslts.ss...hpch+pRlQhFlhhFIE ........................ssuN-AlplpLV......................................p.........sspshtpt.............tsFpPpaoaplFG-sE..pIFGY+s..LpIpLhasAsshpshlplp........YspK......th..sshpssD....lpsplpp..hlPt...............................s..hhpsts.....-Fhptl....c..........................................cpts...Fp..P...Gp..ll..csa...................................s..ss......tp.............ap................................................................................Ia+s..shs..s...sshpchapRlQhFlhhFIE........................................................... 0 74 119 183 +10227 PF10395 Utp8 Utp8 family Mistry J, Wood V anon Pfam-B_24590 (release 22.0) Family Utp8 is an essential component of the nuclear tRNA export machinery in Saccharomyces cerevisiae. It is a tRNA binding protein that acts at a step between tRNA maturation /aminoacylation, and translocation of the tRNA across the nuclear pore complex [1]. 21.00 21.00 21.40 21.60 20.70 19.90 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.20 0.70 -6.22 10 40 2009-01-15 18:05:59 2007-10-03 11:45:37 4 2 40 0 29 41 0 652.60 33 98.20 CHANGED MPSlopPFhLusLP+luSLsshcptsssspsus.s.p.cssplslGIStSoISpYlIsPTPKLlasaPlPsTsIVsuhsVhshss..............................ppclashGLosp+Kp+hLplppc...................stsssussEssspacl.Klcc+llslKlh..scsplIlVlhcsGhlchaca...............pLpht+sh..cl.Yo..pFVpchc.pstp-allllss.pscK..lsYKLlpL....sscssulhEL.sSsIlEshslssophsYphGpLYpL..ssscIplYolPsh.plppoIplP.hlscp....-hlSlpslusNRVLLossNKIYLlDlhasulLsph-.....op.pshQlLhsuVlsspspupso+.ThAlslshKNGsN.sotLcVINIsVGosoLc-.uLGKShppsss.....ppsthLcsLFs-csh...........spsElsslphhclLp-Lp..pppclpp............FDsIhhphLK.cK..........................Ea.sEsDRhl.DspFlsplL-LIFs.......pFtss-..a....P.+oLhYLLTHPLFPhs+T+GLLsphc..spscLhKQAIVTCPNLPLc-LLppLFo.pNsE.lhhDlshRlLp-ao+c-IppshK+L.....uplDlpshlshllpssss............phapLlsllIDusGLFuh-t-sl-cLsuhI-scVplhspNsphlsLl-phhLpspstspsupppps.....................................ccsspslstp...YoVEhL-l .....................................................PsLopsatlssLP+lssLs..ph.ss.....h.sus...p.sospIslGlStS.ISpYIlpPTPKLlasaslssTslVsshslhphps................................chashGLpspKp.phL.ltpp.....................st..ssssphhsphcl.K...h.cs+lhslKh....tpsphlhslhpNGhlphaca..................plphhpsh..clsYs..palpphp......t.tpcalhllss..pss+..lsa+Llpl......pssslhEl.sp.hhcshshpsuphsYp..GpLYpL.....spt..pI.haSlPph.plppsIpls.hlsc....pcllShpslusNRlLLossNpIYLLslhasSlLophc........ho+s+shplLpssVlss....c.pS.p..so+.ThAlslshKNtsN.sopLclINlDVGoNs.Lp-.uLGKShp.....ssss...............sps.lLcsLFs-cshs..........tss-lt.pl...chlccLp..pspDlsp............F-sIhhthLK.+c..........................Ea.s-pDRhl.D.sFlspVlcLIFs.......pFp.ss-...a......P.+TLTaLLTHPLFPhphTcsLLphLc..spPRLhKQAIVTCPNLPlp-LLppLhs......lcNpE.......l...hhDlshRllp-Fop-pIppphKcL..................splDlpshIphllshsps............phapLLslllDuhGL...Fs.h....ctshlcpLsphI-ppVchhppNophhsLl-pthhpp.thtpppscppss.....................................ppps.........hYolEhL-l................................................................................................... 0 5 16 27 +10228 PF10396 TrmE_N GTP-binding protein TrmE N-terminus Finn RD, Coggill PC anon Gene3D, pdb_1xzp Family This family represents the shorter, B, chain of the homo-dimeric structure which is a guanine nucleotide-binding protein that binds and hydrolyses GTP. TrmE is homologous to the tetrahydrofolate-binding domain of N,N-dimethylglycine oxidase and indeed binds formyl-tetrahydrofolate. TrmE actively participates in the formylation reaction of uridine and regulates the ensuing hydrogenation reaction of a Schiff's base intermediate. This B chain is the N-terminal portion of the protein consisting of five beta-strands and three alpha helices and is necessary for mediating dimer formation within the protein [1]. 21.60 21.60 21.60 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.38 0.71 -4.11 270 4354 2012-10-01 23:12:28 2007-10-03 14:31:21 4 10 4206 9 1027 3258 2398 116.40 44 25.73 CHANGED TIsAlATs..sG.pGuluIlRlS.....Gspuhplspp...lh.............sp.....t.p.........s...+pspa...s..plhD....ts.........sp.......hlD-s.l.llhFhuP........pSFTGEDVlElpsHGGhsllptlLp.tllph..............G..............sRhAcPGEFTcRAFLNG+ ..........................................TIsAluTs...G...cGuluIlRlS.....Gs..p.A...h.p.l...u.pplh...........................sc.....p...............s..+phpY..s...plhD............ts....................................sp.................llD..c.s..l...lhh.ah.u..P.........pSF.TG..EDllElpsHGG.lllstlLphllph...............G...............................................sRhA..cPGEFTcR.....AFLNG+....................... 0 354 652 860 +10229 PF10397 ADSL_C Adenylosuccinate lyase C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1c3c Domain This is the C-terminal seven alpha helices of the structure whose full length represents the enzyme adenylosuccinate lyase. This sequence lies C-terminal to the conserved motif necessary for beta-elimination reactions [1], Adenylosuccinate lyase catalyses two steps in the synthesis of purine nucleotides: the conversion of succinylaminoimidazole-carboxamide ribotide into aminoimidazole-carboxamide ribotide, the eighth step of the de novo pathway, and the formation of adenosine monophosphate (AMP) from adenylosuccinate, the second step in the conversion of inosine monophosphate into AMP [2]. 22.90 22.90 22.90 22.90 22.70 22.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.43 0.72 -3.97 148 3649 2009-01-15 18:05:59 2007-10-03 14:33:22 4 11 3192 22 1012 2751 1563 82.20 29 18.48 CHANGED tGllhopplhhsLs.p.pGluRppAaclVpcsuhpuhp......p.sps.........ht....-hLt...pDsplt...thlopc-.l.cplh.DPptalspssplhc+lh ..................................GLlhop+VhhsLl..c..pG.......hsRE.cAa-lVptpuhpuhc.......................................p..pss............hh....phLt.....sD.scls.......t.h.Lo.p......--.l..c.chh.....Dsp.haht....pssslhcRh.................................. 0 333 643 850 +10230 PF10398 DUF2443 Protein of unknown function (DUF2443) Finn RD, Coggill PC anon Gene3D, pdb_1zke Family This is a small family of highly conserved proteins from bacteria, in particular Helicobacter species, The structure is a bundle of alpha helices. The function is not known. 25.00 25.00 31.70 51.50 22.30 21.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.61 0.72 -4.18 3 57 2009-01-15 18:05:59 2007-10-03 16:39:50 4 1 56 6 5 26 0 77.80 81 99.60 CHANGED MFEKIDcIL+sIEDSR-EIEILLNlAKISLlDYIMIKRGSMDMPEHLshshLsQIDEEVEKLKEpI-ALNKIKKELLlF MFEKIRKILA-IEDSQNEIEMLLKLANLSLGDFIEIKRGSMDMPKGVNEAFFTQLSEEVERLKELINALNKIKKGLLVF..... 0 2 4 5 +10231 PF10399 UCR_Fe-S_N Ubiquitinol-cytochrome C reductase Fe-S subunit TAT signal Finn RD, Coggill PC, Bateman A anon Gene3D, pdb_1zrt Motif This is the N-terminal region of the E or R chain, Ubiquitinol-cytochrome C reductase Fe-S subunit, of the hetero-hexameric cytochrome bc1 complex. This region is a TAT-signal region. The cytochrome bc1 complex is an oligomeric membrane protein complex that is a component of respiratory and photosynthetic electron transfer chains.\ The enzyme couples the transfer of electrons from ubiquinol to cytochrome c with the the generation of a protein gradient across the membrane [1]. The motif is also associated with Rieske (Pfam:PF00355), UCR_TM (Pfam:PF02921) and Ubiq-Cytc-red_N (Pfam:PF09165). 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.99 0.72 -4.79 49 936 2012-10-02 00:19:25 2007-10-03 17:10:36 4 4 874 26 297 706 1269 39.70 41 20.61 CHANGED hssspsspsoRRDFLalATuusuuVGuuussWPhIsQMNPs ...........ppshsssRRcFLhhA.TussGuVGususAh.PFlsShsPS........ 0 73 167 224 +10232 PF10400 Vir_act_alpha_C Virulence activator alpha C-term Finn RD, Coggill PC anon Gene3D, pdb_1yg2 Family This structure is homo-dimeric, and the domain here is the C-terminal half of the structure, often associated with PadR upstream, (Pfam:PF03551), which is a transcriptional regulator. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.74 0.72 -3.62 37 838 2009-09-11 09:27:19 2007-10-03 17:40:19 4 3 630 1 235 647 7 88.30 21 48.18 CHANGED sslR-EhLlKlhusshhssssltsplpcphphppp+LspYcclc.pchhss...tpth.stpphhpaLsLctGlphEpthlcWs-cslthLsthp ............hRD-hhlKlhhs...sh.hs.s.sshht.lp...cphpt...ppcplpp.app...hc..pphass..............tt...sppphhphLsLct..ulthcpshlpWh-cslttLp...t................................................. 0 73 158 207 +10233 PF10401 IRF-3 Interferon-regulatory factor 3 Finn RD, Coggill PC anon Gene3D, pdb_1zoq Family This is the interferon-regulatory factor 3 chain of the hetero-dimeric structure which also contains the shorter chain CREB-binding protein. These two subunits make up the DRAF1 (double-stranded RNA-activated factor 1).\ Viral dsRNA produced during viral transcription or replication leads to the activation of DRAF1. The DNA-binding specificity of DRAF1 correlates with transcriptional induction of ISG (interferon-alpha,beta-stimulated gene). IRF-3 preexists in the cytoplasm of uninfected cells and translocates to the nucleus following viral infection. Translocation of IRF-3 is accompanied by an increase in serine and threonine phosphorylation, and association with the CREB coactivator occurs only after infection. 20.80 20.80 20.80 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.92 0.71 -4.70 43 649 2012-10-01 21:55:46 2007-10-04 10:54:49 4 8 78 11 242 858 0 168.80 31 40.48 CHANGED hspLclshaYpGchVt.phplssspGsRlhh....tsssststhht.............shpplhFPssst..l.....sppQpphsccLLssl-+GllLphssp.ulaupRLsps+VaWsusss.tss.....tPs.pLc+s.ppsplFshppFlp........................-Ltpa.p.pstssPp.a.plhLCFG-chss...pstpc+LIhVpl.PhhsRhlhEhsp ..............................................hpLcl.ph.hY+Gc..Vt..phslsssp.GCRlhh............tss.stst...s.................shppVt..F..Psspt...l............sp....pQ....pp....hspcLL.stl-+GllLthssp...u......la.u.pRL..Cps+VaWsuspsssts..........tPs...hlpRp.ppsclFshp.p.Flp.........................-Lhta...p.....pttt......P....p.....a.plhLCFGE-aPs.t.pshp++LIhV.pl..shhs+.lhp.h........................................................................... 0 25 43 115 +10235 PF10403 BHD_1 Rad4 beta-hairpin domain 1 Bateman A anon Bateman A Domain This short domain is found in the Rad4 protein. This domain binds to DNA [1]. 20.60 20.60 20.60 21.60 20.50 17.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.81 0.72 -4.45 56 421 2009-01-15 18:05:59 2007-10-05 13:00:34 4 18 274 3 311 451 1 57.10 35 6.66 CHANGED phscshPcol...........pshKsHPhalL-+pL++pEslhP..sspslGhhps.t...................EsVYtRssVh .....h..pcshPpsl...........psaK..sHPl........YsLcRaLp+pEslhP...ssp....lGhhps.....................EsVYpRssV....................... 0 91 161 258 +10236 PF10404 BHD_2 Rad4 beta-hairpin domain 2 Bateman A anon Bateman A Domain This short domain is found in the Rad4 protein. This domain binds to DNA [1]. 21.30 21.30 21.40 22.70 21.20 20.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -9.27 0.72 -3.66 62 397 2009-01-15 18:05:59 2007-10-05 13:01:31 4 16 257 3 291 426 3 61.20 34 7.04 CHANGED L+St-pWh+p..GRslKhsEp.Ph...KhVpt.t.......p........p.tpp...............................phsLYuhaQT.......-.ahPP ......l+St-sWh+p..GR.sl+..hGE.....p..Ph...KhVpt.pst.....ttp.........pttppt.............................................................thsLYuhaQT.......-.YhPP............. 0 80 150 241 +10237 PF10405 BHD_3 Rad4 beta-hairpin domain 3 Bateman A anon Bateman A Domain This short domain is found in the Rad4 protein. This domain binds to DNA [1]. 20.60 20.60 20.70 22.90 20.30 19.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.52 0.72 -4.27 34 411 2009-01-15 18:05:59 2007-10-05 13:03:48 4 17 264 3 308 438 6 76.00 38 8.82 CHANGED lP+NsaGNl-latssMlPhGsVHlp......hsslt+lA+...........tLsIDaA.AVTGFcF......+stpspPVhsGlVVscEap-tlhpsa ....lP+NpaGNl-l..a..hss.MlPtGsVHlp............................hss...ht+lA+..............................pL.sID..aApAVsGF-F...............+sttu..hPlhpGlVVscEhc-hlhpsh................ 0 91 162 258 +10238 PF10406 TAF8_C Transcription factor TFIID complex subunit 8 C-term Wood V, Coggill P anon Wood V Domain This is the C-terminal, Delta, part of the TAF8 protein [1]. The N-terminal is generally the histone fold domain, Bromo_TP (Pfam:PF07524). TAF8 is one of the key subunits of the transcription factor for pol II, TFIID. TAF8 is one of the several general cofactors which are typically involved in gene activation to bring about the communication between gene-specific transcription factors and components of the general transcription machinery [2]. 19.10 19.10 19.70 19.30 19.00 18.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.42 0.72 -3.91 25 316 2009-01-15 18:05:59 2007-10-08 10:18:23 4 7 242 0 219 302 0 49.80 39 13.82 CHANGED sYIPsa..LPsFPscHTYppTPhappslo.D.+plRc+hsc-uRhsE+uLh+Lh .........aIPsa..hPsFPssHTYhp.TPha..p..c.ho.....DhptlR..c....+tspppRpsEcALp+h............................ 0 60 110 174 +10239 PF10407 Cytokin_check_N Cdc14 phosphatase binding protein N-terminus Wood V, Coggill P anon Wood V, Pfam-B_23062 (release 22.0) Family Cytokinesis in yeasts involves a family of proteins whose essential function is to bind Cdc14-family phosphatase and prevent this from being sequestered and inhibited in the nucleolus. This is the highly conserved N-terminus of a family of proteins which act as cytokinesis checkpoint controls by allowing cells to cope with cytokinesis defects. These proteins are required for rDNA silencing and mini-chromosome maintenance [1]. 21.20 21.20 21.20 21.90 21.10 20.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.35 0.72 -4.36 16 70 2009-09-11 05:31:51 2007-10-08 14:49:30 4 2 54 0 49 73 0 73.10 37 7.39 CHANGED ppsKKFLah.TcsssoLhpLs-EIhs+hpKlYPs.p..clcIhoLQDpstCDLDP-FlVcDVFs.ssshVRVILcsch ..........s.h+KFLah.Tcs.ssoLhpLupEIls+acKlYPshp...sl-.IloLQDp..s..u..CDLDs-FlVcDVFs.ssshl+Vllcs-.h............... 0 15 29 44 +10240 PF10408 Ufd2P_core Ubiquitin elongating factor core Wood V, Coggill PC anon Wood V, Pfam-B_4085 (release 22.0) Family This is the most conserved part of the core region of Ufd2P ubiquitin elongating factor or E4, running from helix alpha-11 to alpha-38. It consists of 31 helices of variable length connected by loops of variable size forming a compact unit; the helical packing pattern of the compact unit consists of five structural repeats that resemble tandem Armadillo (ARM) repeats. This domain is involved in ubiquitination as it binds Cdc48p and escorts ubiquitinated proteins from Cdc48p to the proteasome for degradation. The core is structurally similar to the nuclear transporter protein importin-alpha. The core is associated with the U-box at the C-terminus, Pfam:PF04564, which has ligase activity. 20.40 20.40 21.50 21.20 19.40 20.10 hmmbuild -o /dev/null HMM SEED 629 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -12.77 0.70 -6.28 50 532 2009-01-15 18:05:59 2007-10-09 16:35:59 4 19 290 5 355 518 10 538.70 29 57.82 CHANGED LGslhslSshs............sphspta......h..tsstptsppplpsthpslptphpshhppLapllppll+suspsR..pphLpahupl..lphNptRpph..........................phc.ppluS..-GFhhNlshlLl+LspPhhc............hsKlc+IDssY..................................hhp..sshlcl.......p-ETclpus.ccscphhspptp..............................................pFlo-hFFLThpshahGhtshhpchpcltpplpchpcphpphptp...............p..htphppplcthhspphshcsh.lhcsshhppshpFhshsstalh+lh...........s.tp.hsp.pthphP......h.tt.........................................................................s.........................aphlPEahl-slhsah...hahhph.......sshhhst.phcphlphslhhhpssp.hlcNPaL+u+llclLhhhh.sht..tppthh.s........lhpsppls.....pcpLlhuLlchYl-lEpTGsp....sQFY-KFNhRapIstlhcplW.ppsta+p.pltc.scs...........................................................................s.s.hFlRFlshllNDssa.LLDEulspLpcI+ch..ppphpstsphtsh......................................sppp...cpc....................................................pppplpptcpps+ohhtLupcolphhphhT..pplscsFh.psEll-RlAuMLNYsLptLsGPKsps.L.KV+ssccYs..FcP+plLsplsclYlNLs..........ppcp...FltAVupDGRSas.phFpcAhpllpchs.lhstp.lpphpphspclcc ............................................................LGshhp.hSsh.................stsspta...............a......ss.t.s.tph.ts.ttslp.hhp.hppplhplh.ppl..l....hs...sp...sR....pthLp.ahuth..lphNttRtth..........................php.p...h..huo....DuFhhNlhhlL.pLsp.h.................KltplcspY...................................hp..pttlph........pcETpl.ss.pt.p.thhtp...t...................................................pF.o-sFaLThtshahuhhs..hhpp....h..p.h..pplcchppt.hpphptpt..............................p.hhpphcpphcthhp.hhsh..csh..l.......h-...ph.ppshp.Fht.hhh.hll.plh.....................t............thphP...............Ls.t.............................................................................................................s...t.hthlP.Eahl-slsphh...hFlhph......................s..hh....t..t...hpp.hlthhlh.hhts.p....hl+NPaL+A+L.splL..hh...ht......pt.hh.p................................hhppp.hs.......tp.Lh.uL..hcha.s.-lEhTGsp..............spF..h-KFshRh.l..lhc.lW..ts..a+t..phhp.spp..........................t.....Fl+FlNhLhNDsha.LLDEu.....l.ptLtcI+ph..Qt.hpsp.tphtth......................................................................s.pt.....ppp.............................................................................cpppLtp.tp.s+.hhhLup-TlshhphhT..p..p..l..tpsFh..tPpls.p...........RluuMLNa.LppLsGPKpts.L.KVc..s...............p.cYs..FcP+pLlsplsslYlpL....................................sppp.........FhtAlupDtRSY..s.plFppshphlp.+h...s...h.....sst.l.tpaptlsp+hp.......................................................... 0 132 197 291 +10241 PF10409 PTEN_C2 C2 domain of PTEN tumour-suppressor protein Finn RD, Coggill PC anon Gene3D, pdb_1d5r Family This is the C2 domain-like domain, in greek key form, of the PTEN protein, phosphatidyl-inositol triphosphate phosphatase, and it is the C-terminus. This domain may well include a CBR3 loop which means it plays a central role in membrane binding. This domain associates across an extensive interface with the N-terminal phosphatase domain DSPc (Pfam:PF00782) suggesting that the C2 domain productively positions the catalytic part of the protein onto the membrane [1]. 25.50 25.50 25.50 25.80 25.00 25.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.88 0.71 -4.44 61 847 2012-10-10 12:23:49 2007-10-11 15:54:09 4 39 146 25 477 790 5 136.70 24 15.32 CHANGED hsspslhlpplhl......psl...........P.h....p.ts.....ss.........cPhlplhptpphl.................t.phpthppt.............ptpphhhhhs.sl..l.p..GDlhlch.hppp........hhtcph.....hFphhFNTuFlp...................................................................................................................................sshLhhs+s-LDt.stcs...cpaspsFpVclhFsc .............................................................................................psLhl+plhh.psl...........P.F...ptts................uC.............................cPhh.clh..ttpphl..............................................pst..hph.ht.ph....................................................................ptphhhh..h...h...s.....sl...hl.p...G....Dlh.lch.aHtp..p.........thpcph.............hFph.FpTsFlp.................................................................................................................................................................................................................sthLhhsKp-LDt.spc.....s.......cpasptFpVclhFt.................................................................................................. 0 147 223 334 +10242 PF10410 DnaB_bind DnaB-helicase binding domain of primase Finn RD, Coggill PC anon Gene3D, pdb_1dd9 Domain This domain is the C-terminal region three-helical domain of primase [1]. Primases synthesise short RNA strands on single-stranded DNA templates, thereby generating the hybrid duplexes required for the initiation of synthesis by DNA polymerases. Primases are recruited to single-stranded DNA by helicases, and this domain is the region of the primase which binds DnaB-helicase [2]. It is associated with the Toprim domain (Pfam:PF01751) which is the central catalytic core. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.22 0.72 -3.98 185 3150 2009-01-15 18:05:59 2007-10-11 16:20:54 4 23 3048 9 677 2386 736 58.20 24 9.81 CHANGED lh-Fhhcphtp..p.....h.sl..sss-u+sphlpp.shsllspls.sssh+phhhpcLucthulstppl ................L.pFhhpplht..p......h.sL...sss-G+sphhpt.shsllspls.stsh.RphhhppLupclGl..pt............................ 0 208 432 568 +10243 PF10411 DsbC_N Disulfide bond isomerase protein N-terminus Finn RD, Coggill PC anon Gene3D, pdb_1eej Domain This is the N-terminal domain of the disulfide bond isomerase DsbC. The whole molecule is V-shaped, where each arm is a DsbC monomer of two domains linked by a hinge; and the N-termini of each monomer join to form the dimer interface at the base of the V, so are vital for dimerisation [1]. DsbC is required for disulfide bond formation and functions as a disulfide bond isomerase during oxidative protein-folding in bacterial periplasm. It also has chaperone activity [2]. 21.00 21.00 21.10 21.20 20.80 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.57 0.72 -4.71 144 1562 2009-01-15 18:05:59 2007-10-12 11:30:55 4 5 1318 12 304 941 348 55.30 33 22.65 CHANGED lp..ppLp......phh..shplp..slpsoP...lsG.LaEVhs..ss...s......llYsstcG.callt.Gplh..-hpspp ....................lppsLt......ch....sl.p.ss..sIpsoP...lsG...hhpVho...su....s.......llYsos..DG....+allp.G.s.la..Dlssp.t....... 0 60 159 243 +10244 PF10412 TrwB_AAD_bind Type IV secretion-system coupling protein DNA-binding domain Finn RD, Coggill PC anon Gene3D, pdb_1e9r Domain The plasmid conjugative coupling protein TrwB forms hexamers from six structurally very similar protomers [1]. This hexamer contains a central channel running from the cytosolic pole (made up by the AADs) to the membrane pole ending at the transmembrane pore shaped by 12 transmembrane helices, rendering an overall mushroom-like structure. The TrwB_AAD (all-alpha domain) domain appears to be the DNA-binding domain of the structure. TrwB, a basic integral inner-membrane nucleoside-triphosphate-binding protein, is the structural prototype for the type IV secretion system coupling proteins, a family of proteins essential for macromolecular transport between cells and export [2]. 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.04 0.70 -5.84 37 675 2012-10-05 12:31:09 2007-10-12 13:00:04 4 14 429 36 140 6042 663 312.90 31 53.44 CHANGED shpluulPh.shpsEtpHhhlsGosGoGKoshlpcllsphRtR........G-RAllaDhsGsFhppFac..........ssp.DllLNPhDsRsssWshasEsps.hDacshApullP..t..ssss-sFWspuARtlFspssh+L.tpp..spposppLhctlhssslcpLcphLtsT.usslhutp..sp+.sstSl+uslsshlcslphLsstt..........ssFSIR-Wlpssp......suhLFlosptsptssl+PLlohWlslAhpslhu..hst......spc..+R.lWahhDELsuLp+LssLtpsLscuRKaGGshVlGlQuhuQLcclYGpc.tApolhuhhsT+lhhpss..sscsAchhuc..lG..cpElcchpEshSaGtss.hRDGs...........ohsppcpt...c.lVhso-Ihs.LssLpu.....alphssshPls+lplp .........................................................................................................................................................................................................................s...hsslsh....hp.t.E.......pp.h.hlhGos.GsGK..o..p..h..l..p....p..l...h....p.......h..h...p..+....................G..c..h..s...l..l..a..D..........p......s.....p.........a........h......p......p....a..a...p........................................t........D.....h.....l.....l........N...P...h.....D......t....R..........s.............t.....Ws.........a......p........-....h........h........p.........................-..........h..........p....p.......h..........u.p....sl...ls...............tst....tcsaW.tu......u....ct..lh........s.............p....h............h.....h.....h.h....t.....p...p.....................p..........t..........s..................t.........p........l............h...p.........h........h............h................t.........................p............h...........c..............p............l..........p....p.....h.....l........t........s.........o....................s...............t...........s.......h.........h.p....tp..................hp+.....s...s....h........S....l.....p.....u........s.......l....s..s...h.....l.......p......s.h...p.....h....ltt.......................................t.F.o....l...+....c...W....hps.t................tuhLF.l...o........p.....t.....p...p.....c....s.....s....l....p.P....l.l....u....h....h....l....s....h....s....h...p....t.l.hs........hs.................................sp...p.......p+.....l....a....h.h..h.DE.ls........o....L....t..+.........l......s..p....l........ph..l....s..p.uR.......caG.........ssh.l.hG.h.Qsh.s.Q..L.....c.....c.....h....Y....G..c.p...h.A....t....s....l...h.s....h.h...s.oph..h.h..p.ss.....s.t...p...hA.chhut....l.G.....c..p..-...h.......p...h.......p.....c...p....h......o.h....u....t..s...................t.s...u.s.........................sh.sp..p......ph........p....l..l.......sc.l..p.Lsshps.....alhhst.................................................................................................................................................................................... 0 27 81 120 +10245 PF10413 Rhodopsin_N Amino terminal of the G-protein receptor rhodopsin Finn RD, Coggill PC anon Gene3D, pdb_1edx Domain Rhodopsin is the archetypal G-protein-coupled receptor. Such receptors participate in virtually all physiological processes, as signalling molecules. They utilise heterotrimeric guanosine triphosphate (GTP)-binding proteins to transduce extracellular signals to intracellular events. Rhodopsin is important because of the pivotal role it plays in visual signal transduction. Rhodopsin is a dimeric transmembrane protein and its intradiskal surface consists of this amino terminal domain and three loops connecting six of the seven transmembrane helices. The N-terminus is a compact domain of alpha-helical regions with breaks and bends at proline residues outside the membrane [1]. The transmembrane part of rhodopsin is represented by 7tm_1 (Pfam:PF00001). The N-terminal domain is extracellular is and is necessary for successful dimerisation and molecular stability [2]. 21.10 21.10 21.40 21.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.73 0.72 -4.28 17 2285 2009-01-15 18:05:59 2007-10-12 14:21:53 4 2 1603 37 51 1764 0 30.00 80 15.25 CHANGED NGTEGsNFYlPMSN+TGlVRSPaEYPQYYLA-Paha ...............NFYlPMSN+TGVVRSP..FEYPQYYLA-PWpY... 0 1 10 28 +10246 PF10414 CysG_dimeriser Sirohaem synthase dimerisation region Finn RD, Coggill PC anon Gene3D, pdb_1pjq Domain Bacterial sulfur metabolism depends on the iron-containing porphinoid sirohaem. CysG, S-adenosyl-L-methionine (SAM)-dependent bis-methyltransferase, dehydrogenase and ferrochelatase, synthesises sirohaem from uroporphyrinogen III via reactions which encompass two branchpoint intermediates in tetrapyrrole biosynthesis, diverting flux first from protoporphyrin IX biosynthesis and then from cobalamin (vitamin B12) biosynthesis. CysG is a dimer of two structurally similar protomers held together asymmetrically through a number of salt-bridges across complementary residues in the CysG_dimeriser region to produce a series of active sites, accounting for CysG's multifunctionality, catalysing four diverse reactions: two SAM-dependent methylations, NAD+-dependent tetrapyrrole dehydrogenation and metal chelation. The CysG_dimeriser region holding the two protomers together is of 74 residues [1]. 20.40 20.40 20.40 20.40 20.00 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.93 0.72 -4.49 132 1426 2009-01-15 18:05:59 2007-10-12 16:12:34 4 8 1263 6 269 987 131 59.40 36 13.80 CHANGED tslGpLAshuuphRscV+pplsshstRRpFWEchh.pG.huptlhsGppppAcpthpptls ...........pLGplAchAuphRscVKpphs.shspRR+.FWE+hF.sspl..Aptltsscpptsppthcphl........................ 0 54 139 201 +10247 PF10415 FumaraseC_C Fumarase C C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1fup Domain Fumarase C catalyses the stereo-specific interconversion of fumarate to L-malate as part of the Kreb's cycle. The full-length protein forms a tetramer with visible globular shape. FumaraseC_C is the C-terminal 65 residues referred to as domain 3. The core of the molecule consists of a bundle of 20 alpha-helices from the five-helix bundle of domain 2. The projections from the core of the tetramer are generated from domains 1 and 3 of each subunit [1]. FumaraseC_C does not appear to be part of either the active site or the activation site but is helical in structure forming a little bundle. 20.70 20.70 20.70 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.29 0.72 -3.90 258 5840 2009-01-15 18:05:59 2007-10-12 16:51:58 4 10 3710 79 1358 4033 1826 55.00 43 11.79 CHANGED LVTALNPhIGY-pAAcIAKpAhcpGpol+-ssl.chGhl.......oc-phDcllcPppMspP ................lVTALNPaI.GY-pAAcIAKpAtc.pGpol+Essl.ch.Gh.L.........oc-chDchl.cPppMhtP....................... 0 415 827 1129 +10248 PF10416 IBD Transcription-initiator DNA-binding domain IBD Finn RD, Coggill PC anon Gene3D, pdb_1pp8 Domain In Trichomonas vaginalis, thought to be the earliest extant eukaryote, the sole initiator element for control of the start of transcription is Inr, and this is recognised by the initiator binding protein IBP39. IBP39 contains an N-terminal Inr binding domain, IBD, connected via a flexible, proteolytically sensitive, linker (residues 127-145) to a C-terminal domain. The IBD structure reveals a winged-helix-wing conformation with each element binding to DNA, the central helix-turn-helix contributing the majority of the specificity-determining contacts with the Inr core motif TCAPy(T/A). The binding of IBP39 to the Inr directly recruits RNA polymerase II and in this way initiates transcription [1]. 25.00 25.00 25.00 26.90 24.80 23.20 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.39 0.71 -4.34 94 107 2009-01-15 18:05:59 2007-10-12 17:53:28 4 2 1 7 107 109 0 122.10 22 48.18 CHANGED ppD.pp.pahpLcphlss..p+pt+.............spphpsFsptLptlhpast+sss.p..RshlsGltWhsss...lslNscpLphLls+sKSo.....INGshpphGatth..t....tt.h...hs....hhps..shsph+.pWolRphss ...............D..pa.pLpphlss..p+pt+.............spphpsFsptLptlhpash+sss.p..RshlsGltWhsss...lslNscpLphLls+sKSo.....INGshpphGatth.tptp..tp.lhthhs.....htts..t.+.pWolR....s..................................... 0 107 107 107 +10249 PF10417 1-cysPrx_C C-terminal domain of 1-Cys peroxiredoxin Finn RD, Coggill PC anon Gene3D, pdb_1prx Domain This is the C-terminal domain of 1-Cys peroxiredoxin (1-cysPrx), a member of the peroxiredoxin superfamily which protect cells against membrane oxidation through glutathione (GSH)-dependent reduction of phospholipid hydroperoxides to corresponding alcohols [1]. The C-terminal domain is crucial for providing the extra cysteine necessary for dimerisation of the whole molecule. Loss of the enzyme's peroxidase activity is associated with oxidation of the catalytic cysteine, upstream of this domain; and glutathionylation, presumably through its disruption of protein structure, facilitates access for GSH, resulting in spontaneous reduction of the mixed disulfide to the sulfhydryl and consequent activation of the enzyme [2]. The domain is associated with family AhpC-TSA, Pfam:PF00578, which carries the catalytic cysteine. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.89 0.72 -4.30 86 5521 2009-01-15 18:05:59 2007-10-15 10:44:00 4 14 3627 357 1733 3878 622 35.40 38 17.34 CHANGED ALQhs-cp.s..ssPAsW.ps..........Gccsl...ssotc..u..hpcahps .....AhQasppH..G-..VCPAsW.ct...........Gccsl...p.s.o.-............h...................... 0 559 995 1407 +10250 PF10418 DHODB_Fe-S_bind Iron-sulfur cluster binding domain of dihydroorotate dehydrogenase B Finn RD, Coggill PC anon Gene3D, pdb_1ep3 Domain Lactococcus lactis is one of the few organisms with two dihydroorotate dehydrogenases, DHODs, A and B [1]. The B enzyme is a prototype for DHODs in Gram-positive bacteria that use NAD+ as the second substrate. DHODB is a hetero-tetramer composed of a central homodimer of PyrDB subunits resembling the DHODA structure and two PyrK subunits along with three different cofactors: FMN, FAD, and a [2Fe-2S] cluster. The [2Fe-2S] iron-sulfur cluster binds to this C-terminal domain of the PyrK subunit, which is at the interface between the flavin and NAD binding domains and contains three beta-strands. The four cysteine residues at the N-terminal part of this domain are the ones that bind, in pairs, to the iron-sulfur cluster. The conformation of the whole molecule means that the iron-sulfur cluster is localised in a well-ordered part of this domain close to the FAD binding site [2]. The FAD and and NAD binding domains are FAD_binding_6, Pfam:PF00970 and NAD_binding_1, Pfam:PF00175. 23.90 23.90 23.90 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.62 0.72 -4.46 180 2835 2012-10-02 17:47:23 2007-10-15 12:37:55 4 37 1790 3 745 2221 217 38.60 43 13.13 CHANGED EphMsCGlGhChuCs..lpst..........hpl..CpDGPVFsspclt .....EppMs.C.GhGtChuCt..lpsstt...................hhV..ChDGPVFssppl........ 0 318 533 651 +10251 PF10419 TFIIIC_sub6 TFIIIC_subunit; TFIIIC subunit Mistry J, Wood V anon Pfam-B_14433 (release 21.0) Family This is a family of proteins subunits of TFIIIC [1]. TFIIIC in yeast and humans is required for transcription of tRNA and 5 S RNA genes by RNA polymerase III. Yeast members of this family are fused to phosphoglycerate mutase domain. 20.80 20.80 21.10 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.25 0.72 -4.49 25 235 2009-11-13 15:40:22 2007-10-15 13:05:59 4 6 207 0 162 231 0 34.40 35 13.12 CHANGED lplhGL-ocpPllplsspl.apGsWcchlGT-lhFs ....hpllGl-ocpPllplsspl.FpGpac.-slGTplhFp..... 1 46 78 128 +10252 PF10420 IL12p40_C Cytokine interleukin-12p40 C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1f42 Domain IL12p40_C is the largely beta stranded C-terminal, D3, domain of interleukin-12p40 or interleukin-12B. This interleukin is produced on stimulation by macrophage-engulfed micro-organisms and other stimuli, when it dimerises with interleukin-12p35 to form a heterodimer which then binds to receptors on natural killer cells to activate them to destroy the micro-organisms [1]. This domain contains two disulfide bridges, one of which serves to bind p40 to p35 and the other to hold the beta strands within the domain together. The cupped shape of the p35 binding interface matches the elbow-like bend between D2 and D3 in p40 [2]. The domain is often associated with family fn3, Pfam:PF00041. 25.00 25.00 30.30 29.20 21.20 22.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.07 0.72 -3.88 21 132 2009-01-15 18:05:59 2007-10-15 14:05:34 4 6 63 8 36 104 0 87.30 42 29.28 CHANGED scsaLcCpApNYuG..cFpCuWhht..pssh.hhpl+upR.....................................sSsssptVoCuhs................pchulpCp.--stCPhAEEotPIplslcstpc ......KsFL+CEA+NYSG..cFTC.Whss.hsssl..pFsl+..usR.....................................sSsss...psVTCusssh..........ppp.pcYolpCQ.-cssCPhAEEshPIclsl-shp...................... 0 1 6 15 +10253 PF10421 OAS1_C 2'-5'-oligoadenylate synthetase 1, domain 2, C-terminus Finn RD, Coggill PC anon Gene3D, pdb_1px5 Domain This is the largely alpha-helical, C-terminal half of 2'-5'-oligoadenylate synthetase 1, being described as domain 2 of the enzyme and homologous to a tandem ubiquitin repeat. It carries the region of enzymic activity between 320 and 344 at the extreme C-terminal end [1]. Oligoadenylate synthetases are antiviral enzymes that counteract vial attack by degrading viral RNA. The enzyme uses ATP in 2'-specific nucleotidyl transfer reactions to synthesise 2'.5'-oligoadenylates, which activate latent ribonuclease, resulting in degradation of viral RNA and inhibition of virus replication [2]. This domain is often associated with NTP_transf_2 Pfam:PF01909. 19.90 19.90 19.90 22.40 19.00 19.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.44 0.71 -4.81 49 511 2009-01-15 18:05:59 2007-10-15 16:12:05 4 20 58 2 142 448 0 173.60 46 48.00 CHANGED sscPs.splYspLIcpps..t..ptGEFSsCFTELQ+sFlcpRPsKLKsLIRLVKHWYppCpc+......htssLPPpYALELLTlYAWEpG.sspspFshApGFRTVL-L...ltcYppLCIYW....Th.YsFccphlppaLppQLc+sRPVILDPADPTsNVusu.sh.sWclLAcEAptWLppsChpststssVssWcV.sth .................................s..pPs.splYspLIptts......ptGEFSsCFoELQ+sFlp.pR....P.sKLKsLIRLVKHWYppspcp................t....ssLPPpYAL.ELLTlYA...WEpG.st..pspF...shApGF.R.TVL.-L.........l.ppYppL.....CIYW.....Th.YsFcs.hlppaLppQLp.......+s.......RPlILDPADPTtNluts.s...WchLApEAtthh.ptsChhptptssltsWpV...h.................................... 0 29 35 37 +10254 PF10422 LRS4 Monopolin complex subunit LRS4 Mistry J, Wood V anon Pfam-B_63451 (release 22.0) Family Monopolin is a protein complex, originally identified in Saccharomyces cerevisiae, that is required for the segregation of homologous centromeres to opposite poles of a dividing cell during meiosis I [1][3]. The orthologous complex in Schizosaccharomyces pombe is not required for meiosis I chromosome segregation, but is proposed to play a similar physiological role in clamping microtubule binding sites [2]. In S .cerevisiae this subunit is called LRS4, and in S. pombe it is known as Mde4. 21.70 21.70 21.70 24.00 21.30 21.60 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.88 0.70 -5.02 5 29 2009-01-15 18:05:59 2007-10-15 16:21:14 4 2 28 2 18 21 0 226.90 38 72.84 CHANGED LQLlA-YYcSVL-sE+I...........YhEa..........sp.phpFhuspKTNAscs-.............sphlT-EsLpLQ+Q.......................IsQLssDLQlp+pEsE......................KL+clpKTQKAl......................LESKLpotKupVDphK.....cpossuscupsRssshuts-cppRupthppt+.o.........FHLLSP....IhsscpP.............................sSssupc+p...............uGLRplLcsGcsTIFDp.Sp..cD-.................................sDEDu-pssslpshphutsosR+.lsuL+sos- ................................................LQLLuNYYKuhl-uERI...........Y.EY..............psphphuu.s+hssspus.............sp+ls-ETLhLQRQ.......................lsQLsppLQ.hphpENE.........................................KL.hplQKsQKAL.................................................................hpSKLsopcthIDcLK.cLpspphsscpcspppssssups-pppsspttpss+so..........hH.LL..SP......lssRcps....t.....tsp...........................sSssupc+s...................pGLRplLooG+sTlFDs....Spp.DD-..................................s-sschppDssssp.t.................................................................................................... 0 2 8 15 +10255 PF10423 AMNp_N Bacterial AMP nucleoside phosphorylase N-terminus Finn RD, Coggill PC anon Gene3D, pdb_1t8s Domain This is the N-terminal domain of bacterial AMP nucleoside phosphorylase (AMNp). The N- and C-termini form distinct domains which intertwine with each other to form a stable monomer which associates with five other monomers to yield the active hexamer. The N-terminus consists of a long helix and a four-stranded sheet with a novel topology. The C-terminus binds the nucleoside whereas the N-terminus acts as the enzymatic regulatory domain. AMNp (EC:3.2.2.4) catalyses the hydrolysis of AMP to form adenine and ribose 5-phosphate. thereby regulating intracellular AMP levels [1]. 20.90 20.90 20.90 22.20 19.30 20.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.90 0.71 -4.66 67 952 2009-01-15 18:05:59 2007-10-18 11:24:16 4 3 935 27 168 567 55 159.40 47 32.95 CHANGED ttAVs+LppLY-pusshLpsshpphl.sGs.PsschRA....hYPplRlsssshups......coR..uaG+VstPGsYuTTlTRPcLFcsYLtEQlpLLhpsHsVslpVGsSspsIPhpFslsssstlp.sstssstc......LpchFssPDLushsDcIssGh.apstsstshP ........................s.pAl-+LctLYEpulsALRsAlupYlpsGpl.....P.D.ppsRtt...........FsYPpLsVoaDuss............ps......c..........TR..AaG+hocsGsYoTTlTRPsLFRsYLpEQLsLLhp-YGspIsVpsSppcIPYPYVl-uup.Ls..l.Dcohu.A.s......LschFPTT-LAplsD-hADGlacPsphpP...................................... 0 21 76 119 +10257 PF10425 SdrG_C_C C-terminus of bacterial fibrinogen-binding adhesin Finn RD, Coggill PC anon Gene3D, pdb_1r17 Domain This is the C-terminal half of a bacterial fibrinogen-binding adhesin SdrG. SdrG is a Gram-positive cell-wall-anchored adhesin that allows attachment of the bacterium to host tissues via specific binding to the beta-chain of human fibrinogen (Fg). SdrG binds to its ligand with a dynamic "dock, lock, and latch" mechanism which represents a general mode of ligand-binding for structurally related cell wall-anchored proteins in most Gram-positive bacteria. The C-terminal part of SdrG(276-596) is integral to the folding of the immunoglobulin-like whole to create the docking grooves necessary for Fg binding. The domain is associated with families of Cna_B, Pfam:PF05738 [1]. 20.50 20.50 20.50 21.20 19.10 18.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.02 0.71 -4.27 35 1380 2009-01-15 18:05:59 2007-10-18 15:44:03 4 52 266 14 22 1140 1 161.70 27 17.80 CHANGED sshsssthsslsu....pIps...lsppssp..asphlYVNPpppshppss.....lslpG............phspuushsssssplKlYcVsss.ppLscShhssh.s.sphcDVTsph.............phohssNs.phslsFs..slsps..YVl+hsGcasssusp...lshpopLsuhsppt........h..ssshsasN .....................s.thspppssslpu....plsp...lspsssp..appslYVNP.p..p....s...hssss.............l.lpG................phssuuplssss.splKlYcVsss..s.......p......LscSahsss.........ophcDVTsph..................ploassss..shslsFs....clsps...Yll..hVsu+a-ssuss..slshpsslpshssph...............hssthsas....................... 1 9 10 20 +10258 PF10426 zf-RAG1 Recombination-activating protein 1 zinc-finger domain Finn RD, Coggill PC anon Gene3D, pdb_1rmd Domain This is a C2-H2 zinc-finger domain closely resembling the classical TFIIIA-type zinc-finger, CX3FX5LX2-3H, despite having a valine and a tyrosine at the core instead of a phenylalanine and a leucine, hence CX3VX1LX2YX2H. The structure, nevertheless, contains the characteristic two-stranded beta-sheet and alpha-helix of a classical zinc-finger. The domain binds one zinc and, in complex with the zinc-RING-finger domain, helps to stabilise the whole of the dimerisation region of recombination activating protein 1 (RAG1) [1]. The function of the whole is to bind double-stranded DNA. 20.70 20.70 21.00 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.46 0.72 -3.98 53 4732 2009-01-15 18:05:59 2007-10-18 16:00:50 4 15 3377 1 20 4638 0 29.20 66 5.20 CHANGED LslRCPVK-CcEEVhLGKYs+HlSSHKEs+ .......LslRCPVK-CcEElhhGKYupHLSSHKEhK..... 0 1 2 4 +10259 PF10427 Ago_hook Argonaute hook Bateman A, Wood V anon [1] Motif This region has been called the argonaute hook [1]. It has been shown to bind to the Piwi domain Pfam:PF02171 of Argnonaute proteins. 23.00 23.00 24.30 24.30 21.50 22.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.80 0.71 -11.98 0.71 -4.13 36 202 2009-01-15 18:05:59 2007-10-19 10:48:31 4 10 53 1 85 153 0 133.70 41 8.72 CHANGED GpPosPh..ssV..............................cp..GsuhWG+sssS.ssh.tstssssssouWGs....sssuss....suspshpssW..s-csssshus...........suWp-ptssssGh...Ws..spsSp.ssuSh.suuWupt.sst....tthct....uhhu.uphttcuh.hu+ ...........................................................h..................................DN..GTSAWG+Ps....so....usu.....WG-...s.ssssssuW....Gs..s.....ssusssh.............KsuuKSMQ..D.GW..GscDhslsusR....suWEEE-..-GGh...WN..osuSQtS.sSS...hssuuWGpt.usK..........+phpp......uhts..Gs..s-uW.MN..................... 0 6 14 35 +10260 PF10428 SOG2 RAM signalling pathway protein Mistry J, Wood V anon Pfam-B_35594 (release 22.0) Family SOG2 proteins in Saccharomyces cerevisiae are involved in cell separation and cytokinesis [1]. 25.00 25.00 87.00 42.90 24.30 23.90 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.40 0.70 -5.83 17 158 2009-01-15 18:05:59 2007-10-19 11:53:20 4 23 126 0 122 165 1 355.20 22 46.12 CHANGED ah+RLSsLP.....Ecctppps.......................h-sllEus+GlLFulhQlpstlp.tlhslspccssppsslc......lhYssssHl-pL.psLcph-stspss.........hpppslhctChohlsuap+lhs.LppslcthhssuDsRYlRshhh.lasShhElpNuhstlss..t..........ppt.........s...h....tp.t.tshhpspphosTts+.sshs.pp.ps................................................sslpps..........................................................phsssss...........................pspscssphs.......s...s.ssPposcoh.shsssssspls..................s..t.spst-cpl....cplappLpsssshshpslsplppphs+shtsuppspp...scsltt....................................hhpsLhcpCpsshclocsLppRLsshp............................................p-sh.shpsphpaW...chspuFlc.................uhlsllsph+plpssh.h..ss-lhshLtslp+uoK-sshllphSsaphl ........................................................................ah+RhS.LP.....Ep.p..hp.pth.................................................................hts..llcsu+tlLauh.plpstlp.thhshhpstp..hpt.s.lp......lhYsspsal-pL.ptlpph-p.stpt...........spslhctC.shlsuatplhs.Lttslphhhs...psDs+alR.l.hhhlasShhElpssht.lss..................................t.............................................h..............ssp...t.t.........t................................................................s.....................................................................................................................................................................s.......................................t..tts.......................sstssp.h...s.t.....t.t............................s..t..p.pph....cpla.tLppshphs.pshs.hppthsphh.t.....uttppt....spthh.....................................................hhppLhppCt.shphochLpp+Ls.hp............................................pps..s.tsp....htha...p.hpsFlp...................shhphhs.th+th...s.h.h...s-hh..htslpcsh+-sshhlt.Ssap............................................................................................. 0 32 67 106 +10261 PF10429 Mtr2 Nuclear pore RNA shuttling protein Mtr2 Finn RD, Coggill PC anon Gene3D, pdb_1q42 Domain Mtr2 is a monomeric, dual-action, RNA-shuttle protein found in yeasts. Transport across the nuclear-cytoplasmic membrane is via the macro-molecular membrane-spanning nuclear pore complex, NPC. The pore is lined by a subset of NPC members called nucleoporins that present FG (Phe-Gly) receptors, characteristically GLFG and FXFG motifs, for shuttling RNAs and proteins. RNA cargo is bound to soluble transport proteins (nuclear export factors) such as Mex67 in yeasts, and TAP in metazoa, which pass along the pore by binding to successive FG receptors. Mtr2 when bound to Mex67 maximises this FG-binding. Mtr2 also acts independently of Mex67 in transporting the large ribosomal RNA subunit through the pore [1]. 23.70 23.70 23.70 24.10 23.60 23.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.18 0.71 -4.49 7 52 2012-10-03 02:27:24 2007-10-19 13:07:04 4 1 50 4 32 75 0 161.10 37 88.94 CHANGED ss.sQ.........s-sFlKKlLApLD......p.ps.sclppalp.F..........ppstIlhNupPhuss......stFLphW.ptsshTpHtlouhDhHlIP..GoGThlsNsssKVRF.DESGRD+hGpsusl.h..............sspshscsRPlWGoaaGlsLpLllD-plhpss.stsIsShNYphVa+P-DSllpI ....................s.s.sphhpsFlK+lLApLD........s.ss.splspalshF..........s.sspIIhNusPhups......stFhphWpsps.tTpHtLouhD.....h...H..sIP......GoG.....Thl..hNsssKVRF.DE..SGRs+hGpsAsl..tss..............ss.spsRPlWGsaFGlslpLllD-plhpss.stlIsuaNYphsY+P-DSllpl..................................................................................... 0 8 17 28 +10262 PF10430 Ig_Tie2_1 Tie-2 Ig-like domain 1 Bateman A anon Ciani B Domain \N 25.00 25.00 49.50 48.00 23.40 16.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.32 0.72 -3.63 4 53 2012-10-03 02:52:13 2007-10-19 13:35:17 4 11 28 2 19 50 0 95.50 79 9.53 CHANGED AMDLILINSLPLVSDAET.SLTCIASGW+PHEPI.TIGRDFEALMNQHQD.PLEVTQDsTREWAKKVVWKREKASKINGAYFCEGRVRGpAIRIRTMKM .AMDLILINSLPLVSDAET.SLTCIASGW+PHEPI.TIGRDFEALMNQHQD.PLEVTQDVTREWAKKVVWKREKASKINGAYFCEGRVRGEAIRIRTMKM 0 1 2 6 +10263 PF10431 ClpB_D2-small C-terminal, D2-small domain, of ClpB protein Finn RD, Coggill PC anon Gene3D. pdb_1qvr Domain This is the C-terminal domain of ClpB protein, referred to as the D2-small domain, and is a mixed alpha-beta structure. Compared with the D1-small domain (included in AAA, Pfam:PF00004) it lacks the long coiled-coil insertion, and instead of helix C4 contains a beta-strand (e3) that is part of a three stranded beta-pleated sheet. In Thermophilus the whole protein forms a hexamer with the D1-small and D2-small domains located on the outside of the hexamer, with the long coiled-coil being exposed on the surface. The D2-small domain is essential for oligomerisation, forming a tight interface with the D2-large domain of a neighbouring subunit and thereby providing enough binding energy to stabilise the functional assembly [1]. The domain is associated with two Clp_N, Pfam:PF02861, at the N-terminus as well as AAA, Pfam:PF00004 and AAA_2, Pfam:PF07724. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.63 0.72 -4.08 772 19559 2009-01-15 18:05:59 2007-10-19 14:59:04 4 59 4882 83 4604 13942 5521 81.80 27 12.29 CHANGED LsccplppIlcl.....lp.clpp.+L..t.c...cp.l.pL..clo-sAhc.hlucp.G...aDstaG.ARPL+RhIQcplcssL.........ActlL....pGp..lt.p.Gspl..pl .............................................Lsc-slhpIlsh.....tLs..chh.p.p..L...........h.p....cs....lpL.cho-p.A..hc...h...l.Acp..u.......h.....-...........p.....h.....G.ARsL+pll..pc....tlpc.l...............schhl..ttp.h.................................................. 0 1527 2924 3867 +10264 PF10432 bact-PGI_C Bacterial phospho-glucose isomerase C-terminal region Finn RD, Coggill PC anon Gene3D, pdb_1tzb Domain This is the C-terminal half of a bacterial phospho-glucose isomerase EC:5.3.1.9 protein which is similar to eukaryote homologues to the extent that the sequence includes the cluster of threonines and serines that forms the sugar phosphate-binding site in conventional PGI. This domain contributes a good proportion of the active catalytic site residues. This PGI uses the same catalytic mechanisms for both glucose ring-opening and isomerisation for the interconversion of glucose 6-phosphate to fructose 6-phosphate [1]. It is associated with family SIS, Pfam:PF01380. 24.50 24.50 24.90 24.60 23.70 24.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.68 0.71 -4.52 29 276 2009-01-15 18:05:59 2007-10-19 17:18:44 4 2 269 10 113 214 335 153.00 27 45.09 CHANGED hpphps.A+pLAttlts..tlPllaus.shhtssAhRaKsplsENAKhPAhhshlPEhsHN-lsuhpss...........................htphthlllpsp.-p.ps............thhhshspclh.tpsssshplcsp...usS.hLpclhtLlhlsDasSlaLAhhhGlDPhslshIsthKccls ..............................h...hsN.AKsLAttLss...thPllaus.sshssssucRhtp.hscsutp.Ahsushsch.......+shlsuhtts................................s.ts+h.lllltDc.s.-ttcs.................................tthc......hcpl...h......ts+ssslppl.ph......tu....o.slp+hsuLlhhusaAulYLAltht.................s........................................ 0 54 87 103 +10265 PF10433 MMS1_N MMS1; Mono-functional DNA-alkylating methyl methanesulfonate N-term Mistry J, Wood V anon Pfam-B_64607 (release 22.0) Domain MMS1 is a protein that protects against replication-dependent DNA damage in Saccharomyces cerevisiae [1]. MMS1 belongs to the DDB1 family of cullin 4 adaptors and the two proteins are homologous. MMS1 bridges the interaction of MMS22 and Crt10 with Cul8/Rtt101 [2]. Cul8/Rtt101 is a cullin protein involved in the regulation of DNA replication subsequent to DNA damage. The N-terminal region of MMS1 and the C-terminal of MMS22 are required for the the MMS1-MMS22 interaction [3]. The human HIV-1 virion-associated protein Vpr assembles with DDB1 through interaction with DCAF1 (chromatin assembly factor) to form an E3 ubiquitin ligase that targets cellular substrates for proteasome-mediated degradation and subsequent G2 arrest [4]. 24.50 24.50 24.50 24.70 24.40 24.00 hmmbuild --amino -o /dev/null HMM SEED 504 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.68 0.70 -6.36 38 910 2009-09-11 06:56:25 2007-10-22 15:39:32 4 19 291 34 656 897 17 467.30 21 40.61 CHANGED p...hLhluhcS...tclhhLthpp................................psss.......Fh......pshsssssphpphGpplslDPpuRshulsuhpshFtla.Lpp.............ht.phhttst.tsPl...hp........................-GhIhphsFLa...stsssss.hlhhLhhs..ppppschhsYc............Wpsspslppshs+hshs....lssphclP.....shlIPLsp..................ssuallVsspphhlap..........st.......phhshphs............hppsslhssasts............................hppptcclhLsc-sGplhhltlsptst..........php.lGp..sslsssFshLcss........t..lLhsuustGs.uhhlphs.......................................................hspschlpch.NWuPllDhsllc..pppssp....t...............classSGsu.pcGulpplRpGlpup.thphshpph.s.spslWsls.....tssts....ssallhShPhpotlLpl.......................Dhs--l..ht....ulshsspTLtsushs...sshllQVTssulplsshtstphtpphp...........sspplhsAsssssphLlsssspstptlplpht+h.........h.ppt.ph......h..spssslshp...Ph ......................................................................................................................................................................................................................Dhlhlsocp...hphhhlpap.................................ttpth......t...........tshtc.shhp...sG.hhh.lDPp.uRhhhlthh.ct.hhlh.hpp....................p..tt.thtt..l........p.....................................................ph.l.hsh.hhl..................uhtpPhhhh.......l..s..............tpt..t........ph...h..hhc....................................................................................ht...h....sh.p.ch.p.s...........h....phs.............shllsVPt...........................................s..uG.ll.lh.spp.lhYp..........................t...............th......................................................ppshhhsh.....................................................hthsh.hh.L.lt..s.c.tGclahltlphspp..............................lppl+lphh.....s.....p...ss.ls..ss.hs..hLcsG...............hlFluSchGs.s.L.hphs.......................................................................................................................................................................hp.pltl..l...-phs.....sluP..l.hDhpl..s..ch.tp.p.sps...........................................plhsssGts...tc..uoL+ll.RpGltlp.......-hs.s...-LsG..spslWol+.......................................tp.pc..........hcsal..ll..S.....a..s..s....t.Th...lLpl..............................................p.s-.El......pt..GFhs.s.t..Tl.hsupls.....................pstll..........QVpsp.....u.lR...h.l..........................p...s..t.....t...h..hp..p....atss...................ttpsIstsu..ss.p..pp.lllshs.stp.l.hhhph.p.t................................................h........................................................................................................................ 0 225 359 545 +10266 PF10434 MAM1 Monopolin complex protein MAM1 Mistry J, Wood V anon Pfam-B_58835 (release 22.0) Family Monopolin is a protein complex, originally identified in Saccharomyces cerevisiae, that is required for the segregation of homologous centromeres to opposite poles of a dividing cell during meiosis I [1][2]. MAM1 is required in S. cerevisiae for monopolar attachment [3]. 25.00 25.00 60.40 60.40 19.80 19.80 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.08 0.70 -5.20 3 26 2009-01-15 18:05:59 2007-10-24 10:37:39 4 1 25 0 15 26 0 249.80 48 85.97 CHANGED +cKRsLSsKDTNV.....lp.sNp.ppRpRsL+NKsoh....IsDSSslppPpKNscccpLsKassERpl+RhoNssNlssp-sNs.................pp..oQ+I-NNsAs+E...uG-sLTRssL+ELQppIh-hElssF.pCcHulCsQ.hshcsLcpsRTWFLFELEMoEstsc....NLRpSCYsKYVYoAIDpSW+hsNhLhcAs-supEaFPIEQLLIPc.plD.psppKth-....IEslSI-h-SIhETN+sss.pthV+KKpLPsSVLp+Rsc+clFDEhslDAcEVlNshSoSSS ...................................pKRsluNKDsNh.......l+.sNphpphSRhLspK.......IpsosspcpP+...KNhpccsLsSa+p.-+Sl+.KpNssNlssc-cK-...........................TQcLp...NNlsscE......uscs..LT+sNLKcLQccIF-pEhs.sI.sCcHsLCSs....ENR+..c..IKaSRLWFLFEL-MSt.Nhsc....NLRhSCYsKaVYsAIDcuW..phENILh...cEp-K+Y.........EaFPIuQLLIPN..sIDassc....pK+ccN...I.EDLTlEI-SIIETNHp..........cKRaLPpSlLhKRccchAFDDh.cLDA+KlLNDhSAoSp........ 0 1 6 12 +10267 PF10435 BetaGal_dom2 Beta-galactosidase, domain 2 Finn RD, Coggill PC anon Gene3D, pdb_1tg7 Domain This is the second domain of the five-domain beta-galactosidase enzyme that altogether catalyses the hydrolysis of beta(1-3) and beta(1-4) galactosyl bonds in oligosaccharides as well as the inverse reaction of enzymatic condensation and trans-glycosylation. This domain is made up of 16 antiparallel beta-strands and an alpha-helix at its C terminus. The fold of this domain appears to be unique. In addition, the last seven strands of the domain form a subdomain with an immunoglobulin-like (I-type Ig) fold in which the first strand is divided between the two beta-sheets. In penicillin spp this strand is interrupted by a 12-residue insertion which forms an additional edge-strand to the second beta-sheet of the sub-domain. The remainder of the second domain forms a series of beta-hairpins at its N terminus, four strands of which are contiguous with part of the Ig-like sub-domain, forming in total a seven-stranded antiparallel beta-sheet. This domain is associated with family Glyco_hydro_35, Pfam:PF01301, which is N-terminal to it, but itself has no metazoan members. 21.70 21.70 21.90 22.00 21.50 21.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.03 0.71 -4.93 38 264 2009-01-15 18:05:59 2007-10-31 12:16:33 4 20 163 6 155 264 1 166.00 27 16.75 CHANGED .LstTsttus.sosYo.ssssltso.Lhs......ssTtuuFYVlpHschoStssssapLplsTSs.GslTlPphsG.olsLNGR-SKIhVTDaslG.u.psLlYSTAElhTatphss.c.sVLVLYussGEpsEhAl......uspupshslcGpssslshpptsusllls.aspssshpllplss.....lclhLLDRssAYpa ......................................................s.....hs.sssslhsh.lhs......sto.stFalh.pp..tp.ss...ssts..aplpl..s...T.o.t.Gs.l.slPp.s.......u....slpLsGR-S.K.lhlsDasl......G..s......p........pLlYSTA-lhThtphss.c.sVll.LaGs.tG-tsEhsl....phs.s.t..s.p..s.slpG..sphsh.p.tps........s...t..l.hls..asp.sssh..p.hlplsss.....lhlhllD+psAhp....................................................... 0 48 88 127 +10268 PF10436 BCDHK_Adom3 Mitochondrial branched-chain alpha-ketoacid dehydrogenase kinase Finn RD, Coggill PC anon Gene3D, pdb_1gkz Family Catabolism and synthesis of leucine, isoleucine and valine are finely balanced, allowing the body to make the most of dietary input but removing excesses to prevent toxic build-up of their corresponding keto-acids. This is the butyryl-CoA dehydrogenase, subunit A domain 3, a largely alpha-helical bundle of the enzyme BCDHK. This enzyme is the regulator of the dehydrogenase complex that breaks branched-chain amino-acids down, by phosphorylating and thereby inactivating it when synthesis is required. The domain is associated with family HATPase_c Pfam:PF02518 which is towards the C-terminal. 20.90 20.90 21.20 21.60 20.80 20.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.12 0.71 -4.73 57 905 2009-01-15 18:05:59 2007-10-31 13:35:54 4 12 296 36 574 859 9 170.50 32 40.88 CHANGED sslSL+phlpF...Gp.ps.............sppp....hh.uupFlppELPlRLA++lp-lppLPhslsppPsltpVpphYhpSFcclhp..a...............................................................................................................pshc-....................spc...Fs.chLpplhpcHs..sl.lsolApGlhEh+..cth........................ssp.......plppFLDcahhuRIuh....RhLlsQH........................lsL.................t..t.psst................................ssa.lGhIss ..................................................................................................................slShcphlpF....Gp...s............................................sppp.....h.SspFlp.pELPl.........RLApplc-lptLP..tlsp...pP..........s.lptV.psh.Yh...pSFpclhp..a...............................................................................................................................p....c.s.hcc.......................t.p.c.Fs.chltplhpcHs..sV.lsshApG...llEh+.cth............................ssp.......................plphFLD+ahhu..........RIuh....RhLhsQH........................ltL...........ht..t.t.sst...........................................spa.lG.Is........................................................................................................................................................... 0 171 289 439 +10269 PF10437 Lip_prot_lig_C Bacterial lipoate protein ligase C-terminus Finn RD, Coggill, PC anon Gene3D, pdb_1vqz Domain This is the C-terminal domain of a bacterial lipoate protein ligase. There is no conservation between this C-terminus and that of vertebrate lipoate protein ligase C-termini, but both are associated with the domain BPL_LipA_LipB Pfam:PF03099, further upstream. This domain is required for adenylation of lipoic acid by lipoate protein ligases. The domain is not required for transfer of lipoic acid from the adenylate to the lipoyl domain. Upon adenylation, this domain rotates 180 degrees away from the active site cleft. Therefore, the domain does not interact with the lipoyl domain during transfer. 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.81 0.72 -4.31 94 2280 2009-01-15 18:05:59 2007-10-31 14:37:17 4 6 1825 11 300 1274 29 85.30 33 26.19 CHANGED coPcFshpppcRFsh.Gtl-lplsVcpGhIpch+IaGDFhus.ts.lpclpptL.hGhpYctcslppsLppl..shp.pYhs.slph--lhphl ...................puPpFshppscRFsh.G.t.V-l..phsV.c.c.GpIpcs............+.Ia....GDFFu....t............-...l.p.slcptL....pGs..pY..c..t..-s..lpps.Lcsl..slscY.hs..slphcElhphh................................. 1 115 189 254 +10270 PF10438 Cyc-maltodext_C Cyclo-malto-dextrinase C-terminal domain Finn RD, Coggill PC anon Gene3D, pdb_1h3g Domain This domain is at the very C-terminus of cyclo-malto-dextrinase proteins and consists of 8 beta strands, is largely globular and appears to help stabilise the acitve sites created by upstream domains, Cyc-maltodext_N Pfam:PF09087, and Alpha-amylase Pfam:PF00128. Cyclo-malto-dextrinases hydrolyse cyclodextrans to maltose and glucose and catalyse trans-glycosylation of oligosaccharides to the C3-, C4- or C6-hydroxyl groups of various acceptor sugar molecules. 22.20 22.20 22.20 22.90 22.10 21.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.30 0.72 -3.92 32 208 2012-10-02 20:10:03 2007-10-31 15:17:05 4 4 182 12 58 208 92 79.20 31 13.36 CHANGED cLhHa.sP.psGlYVYhR................................h.sscoVhVlhNpsscshsLsLsRFpEhltssssup-llo.spphsLsc....sLslss+ushllpl .............hh+F.hP..ppGlYVYtR................................phsscoVhVllNsscpp.tslsls+apEllssps.supDllo.G+pls.Ls.c....slsLss+sshlLEh................ 0 21 43 54 +10271 PF10439 Bacteriocin_IIc Bacteriocin class II with double-glycine leader peptide Coggill P anon Manual Family This is a family of bacteriocidal bacteriocins secreted by Streptococcal species in order to kill off closely-related competitor Gram-positives. The sequence includes the peptide precursor, this being cleaved off proteolytically at the double-glycine. The family does not carry the YGNGVXC motif characteristic of pediocin-like Bacteriocins, Bacteriocin_II Pfam:PF01721. The producer bacteria are protected from the effects of their own bacteriocins by production of a specific immunity protein which is co-transcribed with the genes encoding the bacteriocins, eg family EntA_Immun Pfam:PF08951. The bacteriocins are structurally more specific than their immunity-protein counterparts. Typically, production of the bacteriocin gene is from within an operon carrying up to 6 genes including a typical two-component regulatory system (R and H), a small peptide pheromone (C), and a dedicated ABC transporter (A and -B) as well as an immunity protein [1]. The ABC transporter is thought to recognise the N termini of both the pheromone and the bacteriocins and to transport these peptides across the cytoplasmic membrane, concurrent with cleavage at the conserved double-glycine motif. Cleaved extracellular C can then bind to the sensor kinase, H, resulting in activation of R and up-regulation of the entire gene cluster via binding to consensus sequences within each promoter [2]. It seems likely that this whole regulon is carried on a transmissible plasmid which is passed between closely related Firmicute species since many clinical isolates from different Firmicutes can produce at least two bacteriocins. and the same bacteriocins can be produced by different species. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.56 0.72 -3.85 35 1162 2012-10-02 23:56:30 2007-11-06 13:42:44 4 3 367 0 62 351 0 66.30 33 90.98 CHANGED M.........pphcsLspcpLusl.G.....G.........phspshsuhsuusssGshsGushus...........sGuhsGA......hhGussGuluGt .........pphM.....hpQFphMDsEMLusVEG..G........sths.s..shsu.ssu......uA.s....s.G.hth.Ghtsssh..............hsuhhGu......hhGuhhsuh...h....................................... 0 6 29 42 +10272 PF10440 WIYLD Ubiquitin-binding WIYLD domain Bateman A, Thorstensen T anon Thorstensen T Domain This presumed domain has been predicted to contain three alpha helices. The domain was named the WIYLD domain based on the pattern of most conserved residues [1]. It binds ubiquitin. In the Arabidopsis thaliana histone-lysine N-methyltransferase SUVR4, Swiss:Q8W595, binding of ubiquitin to this domain stimulates enzymatic activity and converts its activity from a strict dimethylase to a di/trimethylase [2]. 21.10 21.10 21.60 21.10 20.70 19.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.08 0.72 -4.04 8 79 2009-01-15 18:05:59 2007-11-08 17:56:48 4 5 15 \N 49 87 0 63.40 36 13.75 CHANGED M..ss....p....cRhDAAh-+M+phGhccshlpsslKpLLp..lYscN.WhLIE-DNYcsLlDtIFspc-cp ....................tp....pRhcsAhctMpthGhscppl+sllcpLLp..lY..s....pN..W.hIE-.-sY+sLhDslh-pp-p...... 0 5 28 41 +10273 PF10441 Urb2 Urb2/Npa2 family Bateman A, Wood V anon Pfam-B_28626 (Release 22.0) Family This family includes the Urb2 protein from yeast that are involved in ribosome biogenesis [1]. 21.30 21.30 21.30 21.30 21.10 20.50 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.29 0.70 -4.75 26 228 2009-01-15 18:05:59 2007-11-19 11:21:27 4 4 205 0 167 223 0 220.80 21 16.86 CHANGED sssphlsthlphlp...phltpc..shhhsQhsl-hllsllsslst............................p.t.pspsslahphspllsslLthHRh+lss+hHllhsshspLLphLh......sptphssssss.........................ApthuRLlsphs-P.....................p.p.ttts..............Lssthsth+c.hs+ahshlLssYlphplphs.....lsssl+ptLpsGlYuIhDlho..........pp-.lphlsuuLDsuu....RshF+sLYs-Yp+huKWp.p .....................................................................................................................................................................................t.......hhpsl....hllppc..shthsp.hsls.hlshlsslss.................................sth.pspth.sslahtlppllt....sllpp.....Hpphh.tph.hllsshppL.lpslh.....ttptspt.stss.sps..................................................shphuRL...lpphhp.......................................................................hsttpcthsp.ah.al..lhpY......lph.hcss.......................lhspl+ptLpsGlY....sll....Dlhs..........cps.hphlpuuL...ssuu.......Rs....lFKpLYs-Yt+atKap.pt....... 0 46 84 131 +10274 PF10442 FIST_C FIST C domain Borziak K anon Borziak K Domain The FIST C domain is a novel sensory domain, which is present in signal transduction proteins from Bacteria, Archaea and Eukarya. Chromosomal proximity of FIST-encoding genes to those coding for proteins involved in amino acid metabolism and transport suggest that FIST domains bind small ligands, such as amino acids [1]. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.74 0.71 -4.23 188 1182 2009-09-11 08:20:13 2007-11-19 11:45:35 4 33 821 0 440 1024 155 135.40 19 29.02 CHANGED AhphYpchlu.....t.t.t........pphst..........................hPlul........t.......................h.ssphh...........................................lRslhtls.tssul.shhsslppGtplphh.ts..spshhpshpphhpph..........................................t..tsthsl.hhsChuR...hht.pt.tppc.lpt.lpchhstt..sl..........sGFhoaGEhtsh .......................................................................................................thhtchlst..t..............pph...........................hslul....h.......................h..spp.h..................................lRshhth.s..s.sul.thh.s...s....l.p...G.pplph....tp..sssh...hcshppshpph.......................................ttttpstssl.hhsChuR..h.hht...t...t...sp-.hpt.lpphh..s....h.sh.........................sGFasaGElh..h................................ 0 126 291 379 +10275 PF10443 RNA12 RNA12 protein Wood V, Bateman A anon Pfam-B_18000 (Release 22.0) Family This family includes RNA12 from S. cerevisiae. That protein contains an RRM domain. This region is C-terminal to that and includes a P-loop motif suggesting this region binds to NTP. The RNA12 proteins is involved in pre-rRNA maturation [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.40 0.70 -5.99 18 149 2012-10-05 12:31:09 2007-11-19 12:57:18 4 12 135 0 119 171 38 416.20 37 49.99 CHANGED Rp-clcpLppWLtEsssTFlVlpGPRGSGK+ELVh-+sLpsccp.............sLhIDCcplhc.ARsDsthIsssAsQlGYhPVFSWhNSlSuhlDLAsQGLTGpKuGhSEopEuQlpshLpsospAL+cIuLppp...................ccsDcss..........................................................slp---YLptHPEt+.................PVlVIDpF......hp+ucp..suhlYcclu-WAAsLVpsNIAHVIFLTsD.VuhsK.LocALPNp...VF+slsLuDsS.csA+paVlspLtt.......................................................................................................p...................ptstpppptsp.t.......................plp-LDssl-sLGGRhhDLpshsRRl+s.G-oPccAlschIpQuus-IhphFLt........t.tssspsWospQAWpLIcpL...Spsssl.Ys-llhssLFKu.......ssEs..uLpsLEpuELIolsp.ssGpsscI+sGKPlYpAAFppLlsDcslpsthchthlsplIshEsscIcKhE-ELphL.ucl .............................................Rh-tlpplpt.WLhEsssT..FlllpGP....+GSGKcELVhcpsLps.ccp.............hLlIDCc.l.c.A.+uDsshIpshAsplGY.hPVFoWhNS..lSuhlD.LAs....QGhhGtK..uGhSEoh-sQlp.........pILpsospAL+pluLppp..............ppsccss........................................................................................................................................................plp---aL..ptH..PE.t+..........................PVlVIDsF......hp..K.u.cp...sshlYcclu-WAA.sLsps.N.IAHVIFLTsD.luhsKsLucALPsp...VF.+sl.sLuDsS.-su+paVlspLps...........................................................................................................p..................................................................................pphp-LDssIcsLGGRhoDLphhsRRl+s.Gp.oPppAVpcIlpQuus-I...h+halh...........t.ssp.sppWos.pQAWhLI+tL.................up..psslpYs-llhsslFKu.........ssEs..sLpuLEpuELIolpp.ppGpsppI+sGKPlapAAFppLhp.Dphlpsph-hthltpllphEsppIpKhEpELthLup.h............................................................................................. 0 41 73 105 +10276 PF10444 Nbl1_Borealin_N DUF2455; Nbl1 / Borealin N terminal Mistry J, Hartsuiker E, Wood V anon Manual Family Nbl1 is a subunit of the conserved CPC, the chromosomal passenger complex, which regulates mitotic chromosome segregation. In Fungi and Animalia, this complex consists of the kinase Aurora B/AIR-2/Ipl1p, INCENP/ICP-1/Sli15p, and Survivin/BIR-1/Bir1p. In Animalia, a fourth subunit (Borealin/Dasra/CSC-1) is required for targeting CPC to centromeres and central spindles. Nbl1 has been shown in budding yeast to be essential for viability, and for CPC localisation, stability, integrity, and function [1]. The N terminus of Borealin is homologous to Nbl1 [1]. This family contains both Nbl1, and the N terminal region of Borealin. 21.20 21.20 21.20 23.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.41 0.72 -4.60 17 162 2009-01-15 18:05:59 2008-02-06 12:35:15 4 4 136 5 116 162 0 58.40 30 18.74 CHANGED pphpshlpshshE.lpsRhccl+uphp.hhpshcsth-hclh+lPpulRchpht-lhsch .........phpuhlcshphE.....l........ppRs+pl+uphp.hhpslcsphchcl.RlPpulRchsht-hhtph.... 0 37 59 91 +10277 PF10445 DUF2456 Protein of unknown function (DUF2456) Hartsuiker E, Wood V, Mistry J anon Pfam-B_97171 (release 22.0) Family This is a family of uncharacterised proteins. 25.00 25.00 32.10 31.20 24.00 20.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.31 0.72 -4.07 6 48 2009-01-15 18:05:59 2008-02-06 12:37:41 4 1 46 0 35 47 0 90.40 34 26.54 CHANGED tsLcuh.pspFht..sphhhhpalEWhl.pslRGhlLulhhahhlWPlThGILAuIGp+.tpHDYYFNs..h.hPQVhKLIYGsVlGhlosPllAhlhh ..................hp...h..h......t.phhpalphhltphlRuhllulhhFlllWPlohGILuuIGp+...tupDahaps..h.hhPQlhKLlYGsVluhlsTPlhshlhh.... 0 11 19 32 +10278 PF10446 DUF2457 Protein of unknown function (DUF2457) Hartsuiker E, Wood V, Mistry J anon Manual Family This is a family of uncharacterised proteins. 30.00 30.00 30.60 32.00 27.10 29.80 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.77 0.70 -5.76 10 78 2009-09-13 16:18:35 2008-02-06 12:38:57 4 4 71 0 66 77 0 404.60 41 60.99 CHANGED RFHEFASS..EDD-Wlpcs.ssaKpKlTlsDsMKKEpAIRKLGEEAE......EEAhEEE....................-tD-DDspDsDp-Dt..tpt-s...........DsDSslt.hsp..DDGNEoDNEAGFA-S.DE.oDstS-YsFWAPsusTsATos...psl-ssRpshsR+sSsoSh-Shscppsp+p....s.....ppspRpPhKss..+hRPuTPcLPDSTDFVCGTLDEDRPLEsAYhSChEtRRhuKplhIPQDIDPSFPTSD...P-DE-D-.pchp..hshpsD............-usRsRttt.ts++pSPtsSP+RhhSPPP..R.........+ttttS..P++L.RSPPPPh+h+Ss..................sttuusssssoh...sscGlsh..upLspRPshT+TKSLPRTPNsF.......p+hsthsPh.pusE+Euoss+-sHsRGsIDIVcGLEKKRQ+RKEKFaRpHC.RKAsKEphpR.RPhPGKGAERM+ELGLtsAcphhua..ulG...............psuphVLSV ................................................RFHEFuSu..c.-DDWlpps..sshppKlT.lsDs.h+KE.sIRKLGcE.AE........EEA.pEE....................----.--pp..-p-..t..pDp...pp.--..--.ptD....-tp.....-t......s-.s-....p.sp..sD..G.coDsEsGFA-S.DEsD.t.s-hthWsPst..sssssps...tshshhR..s...tc...pt.S....soShtShps.ts.p...............pRp...h+......+h.R.ssT..PsLPDSTDFVCGTLDEDRPLEsAYhSChptRRppKph.IPQDIDPSFPTSD...s..EDE.----..tcp...........ptps-.p..h.pt.........................-t.+sRt......tp+po.ptSP+RhpSPPP..R.......................+.hhupS..P++L.cuP.ss.hRh+SP..................sths.hpshss.....tstu.hph..tsLut...RPsh..s+T+SLPRsPs.F..............................+hp.t..s...tptp.pttspsp-.hHsRGsIDIVtGLE+KRQ+RKEKFappaC.p+A.tK-p.h.pc.....+P...hPG+GAERM+ELGL.hAt...............thu...............pss..hVLSl..................................... 0 13 31 52 +10279 PF10447 EXOSC1 Exosome component EXOSC1/CSL4 Mistry J, Wood V anon Pfam-B_6887 (release 22.0) Family This family of proteins are components of the exosome 3'->5' exoribonuclease complex. The exosome mediates degradation of unstable mRNAs that contain AU-rich elements (AREs) within their 3' untranslated regions [1]. 21.20 21.20 21.20 21.40 21.10 21.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.84 0.72 -4.03 32 355 2012-10-03 20:18:03 2008-02-06 12:46:39 4 8 310 1 250 337 11 79.70 41 34.22 CHANGED lPchGslVlsRVoRls.+tApspILsV................................................sssshps........................sF+GlIRppDVRuTE+D+VclhcsF.+PGDIVRApVlSL ...............................lPpsGslVhs+Vocl.st+hAplpIlsV...............................................................ss...pslpp.....................................................sFpGlIR.ppDVR..uTE..+D+Vc.....h.....hcsF.RPGDIV+ApVlSL........ 0 84 134 203 +10280 PF10448 chaperone_DMP POC1; 20S proteasome chaperone Mistry J, Wood V anon Pfam-B_75798 (release 22.0) Family This family contains chaperones of the 20S proteasome which function in early 20S proteasome assembly. The structures of two of the proteins in this family (DMP1 and DMP2) have been solved, and they closely resemble that of the mammalian proteasome assembling chaperone PAC3, although there is little sequence similarity between them [2]. 22.10 22.10 31.80 31.20 19.30 18.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.79 0.71 -4.30 18 73 2009-01-15 18:05:59 2008-02-06 12:51:02 4 1 39 6 47 72 4 144.80 21 96.35 CHANGED Mphtphppphsst.s....................utcsclslp...thchssKhslsltlNuphDsohcs...sh............hsYhhsl..................pphtushpshlhlusus-hth.sl.spplucLls+phpsss.............hlolSS+hht......t.spssshphLhhlLcsl+ ............................M..hphppphstt.t...................sstshplhlp...sschssKl.lsltlssphDsohcss.ps..................hshhhuh...........................ppphushpslshlGsssDhph.sl.spQlucLltcphts.s...............hlohS.S+hht.......ptsssschthLhhlLcslK.................... 0 5 23 44 +10282 PF10450 POC1 POC4; POC1 chaperone Mistry J, Wood V anon Manual Family In yeast, POC1 is a chaperone of the 20S proteasome which functions in early 20S proteasome assembly. 20.40 20.40 21.20 20.80 18.40 20.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.82 0.70 -5.64 3 40 2009-01-15 18:05:59 2008-02-06 12:54:17 4 1 38 0 25 39 0 242.90 33 98.06 CHANGED MLFKQWN-hstP+H.LD.P.Iu+N.pSLplhPVPcVahPp.hDlspYpssVlTTKIMsPLFPppLLphppIu-IpTTLplcpsp.st-SEcHSWNY-ENFPNEV..s.KpDosscplhuFSaPIauFcDTLIhhIEENFIKhSAIFoNhIoRslIsp.LAQhsPDIp.IsI.GTSDKIsslKpLTpscCoLpPPEFITGFIGSlLTQL...PSKELKVFpsIVAPSEGPIGFEKhoLsslcuLVDlCucLLshcPSc...YSsEChRLWRLDuAAIGAQSGLYI .........................................................................................................................MlhK.Ws-...P+H.lp..........psh..tshsp.l.h.s....ph..p.hp.hll......t...hhssLFP.cp.L...lphpplG..clpsols.lp...ts.p.......sps.sscc.sasaDEpa.spl........-...cpcst.pp.phhshphPI..aths..c..oLlhshp..-NFlphssI...hsN.hlo+pllsp...L.s.ph.....ps.....-..I...l....ll..us....S...........D+.Is..sh........K...s...ho......ps.....s.....oLpPPEFITGhluSlloQL.........sppshp..hpsLVssSEGP...GFEKlslsshssLlclhu...phLshpspp...Ysppsh+.W+httss...psGLYl............................. 0 3 13 23 +10283 PF10451 Stn1 Telomere regulation protein Stn1 Mistry J, Wood V anon Pfam-B_51291 (release 21.0) Domain The budding yeast protein Stn1 is a DNA-binding protein which has specificity for telomeric DNA. Structural profiling has predicted an OB-fold [1]. This domain is the N-terminal part of the molecule, which adopts the OB fold. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution [2]. 20.90 20.90 21.50 21.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.60 0.70 -5.54 7 123 2012-10-03 20:18:03 2008-02-06 12:56:02 4 9 115 3 95 163 1 178.30 20 49.51 CHANGED atchtpt.....VsaahspLh+hsphastsp..shhlpDl+pphc.Shphsp....YhshhtshlaWhN+PlppI+llGsllGhpa+hlttp-ahhhplDDCos............hLpCpspcsplhShuhslssh.lGhTlpVhGhhs.......hph.ELpVpalc.hshsLppEIcaWchshph+cpLshPWclss.hltt.hptcpcht.tt...............coPpppps......pssaIEpL...cphcscLplhSPasspsp...........s..lhs...hphlsstspl-pt ........................................htt...................................................................tshhFatN+.PlphlplsGhlluh.p..h.....................t..........c....h..hlh.s..lDDuSG.t...................................slps.......h.....st....tp......................................................G.hl.....plhG.hp..................................t..php...hp.h....hh.........s...httEhthWp.h.ph.h.....L....s.......................................................................................................................................................ttttttttt................................................................................ 0 15 45 80 +10284 PF10452 TCO89 TORC1 subunit TCO89 Misry J, Wood V anon Pfam-B_61649 (release 22.0) Family TC089 is a component of the TORC1 complex. TORC1 is responsible for a wide range of rapamycin-sensitive cellular activities. 20.20 20.20 20.90 20.50 19.80 19.90 hmmbuild -o /dev/null HMM SEED 613 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.16 0.70 -5.63 6 69 2009-01-15 18:05:59 2008-02-06 13:00:05 4 5 53 0 46 70 0 286.50 20 51.06 CHANGED RQFST+SRuK...SsASFKG...L+RVhoHDGThsps.sh.spaushKKoKSSDuLh+RRslSGLsMTALst................puPlp..................huusGL+PcRs+popsVLsL+-upt.hDs-STTDEEVEhFo--p.c.........-Apss......-spstst+stPpppp.h.php.hptp.sh..st.cp.psh.tt..sh+s....hs+.lDSssthh........sc.pI-t......................S+ppppsaDussussslppslsstph.pp............s-h..........................................pt.sHsstpcc.pED+hsssspssp.spssApohtpttccuuptspssppppp..lsD-........p..t.scsp......-pYlPDMILSQSTGVER+F-pplShQNSLu.......................c.tst.s.pthcstphpcs+aNhlppplstsl.ssp...............psptsFSouISSLTssLpRssPpSh.sss+hNss.hp+spQp.L.Rtpph....tt................hspssppssSssplNsFuQFLpSsshsu-SRTQpKLWLQREsSIhDLsuQs.Duu-AlFhASNlEsKREFERIS+EYosVRRFtNPLs-uLpRlp.....ptpshppppppcusouhpuuS.s........hFssY.ppsKohcEhhssupp..hclpplLsuIWpSpotpFNKDsNPL ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ps............+ppFE+lspEY.sVRRahNPls-uL.Rlp.....................th.phppptt.p...s......................................................................................................................................................... 0 11 28 45 +10285 PF10453 NUFIP1 Nuclear fragile X mental retardation-interacting protein 1 (NUFIP1) Mistry J, Wood V anon Manual Family Proteins in this family have been implicated in the assembly of the large subunit of the ribosome [2] and in telomere maintenance [3]. Some proteins in this family contain a CCCH zinc finger. This family contains a protein called human fragile X mental retardation-interacting protein 1, which is known to bind RNA [4] and is phosphorylated upon DNA damage [1]. 22.30 22.30 22.80 22.30 21.90 22.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.02 0.72 -4.46 39 274 2009-09-11 12:31:40 2008-02-06 13:04:56 4 9 247 0 202 267 1 56.50 29 11.43 CHANGED htptphpshphpG.pplpL....p.T.sE-It+WhcER+KpaPTptplp....cKpptcctptcctph .......................h.......hth.G...pphpL........p.o.sE...-ItpWhEER+KpaPTpsplc........cKc.phpptp.pctt............................... 0 74 112 166 +10286 PF10454 DUF2458 Protein of unknown function (DUF2458) Hartsuiker H, Wood V, Mistry J anon Manual Family This a is family of uncharacterised proteins. 33.10 33.10 33.70 33.50 33.00 33.00 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.93 0.71 -4.32 9 64 2009-01-15 18:05:59 2008-02-06 14:04:39 4 3 62 0 52 65 0 144.90 32 61.78 CHANGED DsppITsassAL+YVh+plspssch.pcIRcLIpcQccapcpahppR-tLlp+.puph-pp+cL.....-slLpslst.hsp..tp.sp.c.c.ptcLpthDtphhht.pph..t.hthhcsLplPhFhhpp..........................ht..clpp.phhhlplL.D.ltp .............sphIssa.tAL+hlh+plupscshtpcI+cLIppQcc+E+pWapuRpsLltKQps+ttpp+pl.................cplLpslGs..lsp....p.sss......c........cpptELppa.D.pKVapA.ppMscthptcL+sLulPFFshpp.................................hstc-lpthph+hLplLpDhh................. 0 19 29 42 +10287 PF10455 BAR_2 Bin/amphiphysin/Rvs domain for vesicular trafficking Mistry J, Wood V anon Pfam-B_12557 (release 22.0) Family This Pfam entry includes proteins that are not matched by Pfam:PF03114. 22.70 22.70 22.80 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.97 0.70 -5.49 10 190 2012-10-03 12:17:00 2008-02-06 15:55:33 4 5 139 0 139 628 1 273.70 37 82.41 CHANGED oI.olss+Tp+hlpEpLGpVp.............DISpLPt-YlpLEpKsDsLcKlYp+lLhlo.cTaEs-uYDYPP..shsESlsD..aspolutKhpphtNhoSspcsphhhhGpstp-tsptpKt..................p.sss.hP+TLstALS+sAtcuo..hpcL........................................csp-hssLupshtphSss.tcIupA+h-pDphIlKcFNpcLcclLsppFtKsccLRKKVpcoRLpFDhhRpclcp...................................scPEsEE.............................................t.t..LEshEDEFVSATccAVhhMpcllcsSchlsLLKlFtssQLpYacpuscpLcp.LssLst ...................................................................................................................shAt+T.phlpEpLGpsp.......................................................................Dh.opLPt-Yl-LEc+lDuL+tlap+hLtVT.spYpsEuYDYPs..NlpEShsD..hu+olupKlp...LopAo..SssEA.pshL.hu.Pss.sc.................................PKThsa.Alu+Auhsuot.hpph....................................................ppsssp.cs.L.up.uLcpau.s.p+lupARLs.QDs.Ipo+...F.tshp.ssLNs.s...l...thAs.......+sRKsV-suRLphD..ss+sp..hcst...................................................pp.sph.pp...................................................................................................phchclE..pAED.......E.FVspTE.-.AlslMcp.V..l.-..s.....s.....-.s.L..cpL.t...-L...lsAQLpYa+puhchLpph......t.............................................. 0 34 77 122 +10288 PF10456 BAR_3_WASP_bdg WASP-binding domain of Sorting nexin protein Coggill P anon Pfam-B_43522 (release 20.0) Domain The C-terminal region of the Sorting nexin group of proteins appears to carry a BAR-like (Bin/amphiphysin/Rvs) domain. This domain is very diverse and the similarities with other BAR domains are few. In the Sorting nexins it is associated with family PX, Pfam:PF00787.13, and in combination with PX appears to be necessary to bind WASP along with p85 to form a multimeric signalling complex [1]. 24.90 24.90 24.90 25.70 24.70 24.60 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.56 0.70 -5.11 8 753 2012-10-03 12:17:00 2008-02-11 16:57:52 4 10 512 8 150 521 1 143.90 60 42.00 CHANGED DEKsWKpGKRKAEKDEhVGusFF.TIs.Pph.ssLDLp-VEpKlEsFppFTKpMD-uVhpLpssusEah++psGsh+KEYQKlGpAFpsLupuFphDthstSusLNcAlutTGcsYEpIG-hFAEQP+pDLc.lh-sLs.Y+GhLuNFPDIIpVpKGAlsKVKESp+hstpu+lssp-tsshpcRssshSYAl.AEhpHFHspRlhDa+uhMQpYLcQQlpFYQcIupKLccALspYD ............................................................DDKQWKLGKRRAEKDEMVGAHFMLTlQIPs...EHQ.DLQDVE.ERlDsFKuFAKKMDDSVhQLTHV......AS.ELVRKHLGGFRKEFQRLGNuFQSISp..........A..Fh..L.D..P............P..h.t...S....ps....LspAls............................................................................................................................................................................................................................................................. 0 43 55 99 +10289 PF10457 MENTAL Cholesterol-capturing domain Coggill P anon Pfam-B_16187 (release 22.0) Domain Human meta-static lymph node (MLN) 64 is a late endosomal membrane protein, and carries this MENTAL (MLN64N-terminal) domain at its N-terminus. The domain is composed of four trans-membrane helices with three short intervening loops [1]. The function of the domain is to capture cholesterol and pass it to the associated START domain Pfam:PF01852 for transfer to a cytosolic acceptor protein or membrane. In mammals, the MENTAL domain is involved in the localisation of MLN64 and MENTHO in late endosomes, and also in homo-and of hetero-interactions of these two proteins [2]. 20.50 20.50 20.70 20.80 20.00 19.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.43 0.71 -4.65 10 190 2009-01-15 18:05:59 2008-02-11 17:18:49 4 7 92 0 107 176 0 151.70 49 43.35 CHANGED ScVRRpFCLFVTFDLLFloLLWIIp.lssscuIppsL-pEVl+Ys...a+sShFDIhLLAVaRFhlLlLuYAlh+L+HWasIAlTTslooAFLIsKVll...shhSQssFsalLsIsSFlLAWlETWFLDFKVLPQEscsEcha.luu..........................psss-RsPLLsPu..........slScGpFYSPs-Shs ..........................................S-VRRTFCLFVTFDLLFloLLWI....Ip...ls..s.ssuIpps.LcpEllp.Ys...atoShFDIh..lLAhFRFtsLl..LuYAl..h...+.L+...H.....WWsI.AlTThlooA...FLls......K...V.ll......ph........h............op....s.....s..FuYlLsIh....SFlLAWlETWFLDF+VLPQEscpcphh.hhs........................................p.ssstRss..ll.ss..............shSp...upFYSP.-o.u................................................................................. 1 26 35 66 +10290 PF10458 Val_tRNA-synt_C Valyl tRNA synthetase tRNA binding arm Bateman A anon SCOP Domain This domain is found at the C-terminus of Valyl tRNA synthetases. 25.10 25.10 25.10 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.03 0.72 -3.89 31 4203 2012-10-01 23:07:44 2008-02-12 16:38:06 4 17 4133 4 940 3276 1648 65.80 36 7.28 CHANGED Dl-cEpuRLpKELt+lppElc+lptKLuNPuFlsKAPs-VVEpE+s+lt-hppphptl+ppLspLu ............sh-tElsRLpKEls.KlppEls+lptKLuNEsFVu+APptVlpcERcKhtchppphspl.pppltpl.......... 0 326 626 804 +10291 PF10459 Peptidase_S46 Peptidase S46 Rawlings N, Mistry J anon Manual Family Dipeptidyl-peptidase 7 (DPP-7) is the best characterised member of this family. It is a serine peptidase that is located on the cell surface and is predicted to have two N-terminal transmembrane domains. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 698 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -12.83 0.70 -6.11 61 519 2012-10-02 13:45:52 2008-02-15 12:22:33 4 2 290 0 157 649 484 661.60 34 96.35 CHANGED ADEG.MWh.ppl...p..tsphpp.hGlclssppLhslstss.....hsAlVp.sGGCouuFVSscGLVlTNHHCuauuIQppSos-+saLpsGFhApsts-ELss.PuhplphlpplsDVTcplpsulp..shssptphptlpsthppltpc.sppcs......sh+spVtsFasGspYhLhphpcapDVRLVaAPspulGcFGGDsDNWhWPRHTGDFohhRsYsstsspPAtaups.NlPhpPcpaLplospGlc-GDashlhGaPGpTsRahsssplcpphchshPtplchhptthsllcphhppssch+lpY..AuphsuhsNhhKshtGhhculpchshlspKptpEsplttalppssph.tpatsshsplpthhppppphtppphhhppsht....ss.plhshAppLh+hspctp........pRttuhp-cshstlppplpph..psassslDctlhtthLsphtptsstpc.hsslpp.hlstpt........tt.....hpphscphastopLss.cshhthhpt.stsshps..scDPhlphAhul.hsthtthcpppcphsuthppspttahcAlhthh...spshYPDANtTLRloYGpVcG..Y..ss+.DuhhhssaTTlcGlhcK..p.sGstsFslPpphl-hhpsp-a...Gta..........................t.sssPVsFlossDhTGGNSGSPllNu+GELlGLsFDGNaEuluuDahassshsRsltVDhRYlLahl-clssAspLlcELsl ..................................................................................ADEG.MWh.ppl...p.....ttphpp.hGlplsspplas.stss.....hsAVVp.......h...sG.......G.CTuphVSscGLVlTNHHCuauuIQppSosE.+DYLpcGFhApohs-E.LPs.PshplphlpclpDVTcpVtttlp......st.s..tp...p...pt.....thlpshhpt.lspc.htpps...............shcsplhsFasGNcYaLhhhppapDVRLVhAPPpSlGKFGGDTDNWhWPRHTGDFShFRlYss.ts.spPAtYSt-.NVPh+Pc.paLtlShpGlc-GDasMlhGaPGpTsRYhsuttlcpthpht.ssthphpshphslh.ccthppssp..hRIpY..ASchAu.uNYaKNhhGh.culpchsllspKpttEpphttahpppsp.................t....p.atp.s.hsplpphhspppththpthhhppshh.......ss.....phhphA..hp...lhphttthp...................tct.......th.cpthtp.hppthpph..cs.a.s.plD+plht.thLptYtphss.t.pp.hs.sh.hp.hlspch........tss.....hpthl-t.haspSh.lsstp.........s.ht.......pah...........ptss.hpt..ppDPhlp...huhul..hsthhph.p...pphpphssphpttcphahpuhhchp.s....pshYPDANtTlRloYGpVpG..Y......sPp..Du.hhh.......s.aTThcGlhcK......c..sss...-FslPtc..lh-Lhpp.+-a...GpY........................s.t.GphPVsFl.ossDhTGGNSGSPlhNucG.ELlGLs.FDG.NaEulsuD....hs.F-.ssh.pRsIsVDlRYlLall-KhusAspLlcEhs................................................... 0 62 113 141 +10292 PF10460 Peptidase_M30 Peptidase M30 Rawlings N, Mistry J anon Manual Family This family contains the metallopeptidase hyicolysin. Hyicolysin has a zinc ion which is liganded by two histidine and one glutamate residue. 20.20 20.20 20.20 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 366 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.16 0.70 -5.58 6 71 2012-10-03 04:41:15 2008-02-18 13:56:05 4 8 52 0 39 167 8 301.10 24 54.19 CHANGED DsosRsATLppQsTA...oDGssVNhWVEsuE.suuKISsshlcsLsspFAs.sGuIYDhLpslGG.hWGPpu.tuohIs.ssQPlDIVIL......NF-+sGQPaGtlGYFWupNsFppuu..tPhSNcSlSLYLDoETLYLGGpsG..hpslloTMAHEuhHMQNFYRRuVhhGupYua-sWLEEMTAMhMEDhtSppIsssYNsIRDsRF.sYhsYtu.GuYNCuLhpa.TsaGssC-SYuVSGSLGGFLNRQh.GLsFYpsLLopsotosShuVLssAI+sApPuuoLu-tltpaussssuLhPssuuPAGFGaPuRpDusFoLPhIDPs..hhsulRoLs....sulPsTLpuaGoaPVsRosssGTYSETV+VPAGs......TLSVVVc ....................................................................hh...................ts...hh.hWltss..................plop...t.hpplhpcFss........tlYs......hhsslhGp.....s..................sps..s...t....ss.hls......h.Illh........s.h....p....s.s...spshG..h.....h.GYFaups.a....s......t...............tshSNpu..hYl...Dstshhh.ss.s.ss............sp.....h....huTluHEapHMlNF..p+s.lh.....p...s..tt.h.h-sWLpEhhuhshEc..hh.......utph..........................h..s.....sRhs.t.a.p................shs...sslhta...ss.......u.....ss..h..s..Yuhuh.hhtaL..h....cQh.....Ghsh..hhphl...s....s..tssptslhsssh.thsss..s.hsphhtpaths........t...st.asa.t............................................................................................................................................................................................... 0 9 18 26 +10293 PF10461 Peptidase_S68 Peptidase S68 Rawlings N, Mistry J anon Manual Family This family of serine peptidases contains PIDD proteins. PIDD forms a complex with RAIDD and procaspase-2 that is known as the 'PIDDosome'. The PIDDosome forms when DNA damage occurs and either activates NF-kappaB, leading to cell survival, or caspase-2, which leads to apoptosis. 20.10 20.10 20.60 20.60 19.70 17.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.60 0.72 -4.24 2 40 2009-01-15 18:05:59 2008-02-19 12:42:30 4 27 28 0 19 43 0 33.40 66 4.02 CHANGED WssL.TtLcctu.+R.hatRspVP+FSWFhVV.RP .......WuDL-TaLEEEuPpR.hWA+CQVPHFSWFLVVSRP. 0 1 1 7 +10294 PF10462 Peptidase_M66 Peptidase M66 Rawlings N, Mistry J anon Manual Family This family of metallopeptidases contains StcE, a virulence factor found in Shiga toxigenic Escherichia coli organisms. StcE peptidase cleaves C1 esterase inhibitor [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.07 0.70 -5.55 15 252 2012-10-03 04:41:15 2008-02-19 13:26:58 4 13 181 1 40 186 20 293.50 42 30.59 CHANGED EssslsYSsstWSshLPt-Wl+PGlslpF.........spsspsuclts...clGsss-Lhlpolshhhh......TpPhs....psphhc..sclscEhFQ+hPsS+Llsssasst+l-h...VhhP.cu.hhsstsPuhsshpuushR-uhu...l.sGl.NhshGIpuusG-upsuhphhushlh..AhsupGpYsss....................aGG.GGG..GhsThDtohsspFsHEhGHsaGLsH..uGpshu.....satpush.sSsWGaDus+pcFluNhhsssss...hpsCpss.......hss...cG+satpDsMpGGusspss.tsRaohassapsspIQsahcNphh .............................................................pttlsYupphWSs.lPhsWhpPGloLpl........tcpssppGhlps......IphGussELllpsIDlGML........h...P.Rs....Rs..p...hhpp...tphtt-YFQKlPsS+LlhssYsPhHhph..........Vs.hPsGslhT..-.t....ss.uhGGWHsGsMR-ulG..KthVSoGIsNANhGIsoouGhu.p.....p....a.....shhsspIs...AHss.hGhYs..st..........l............VHGGS..G..GG......GI...VT.L.-sT..h.uN.EaSHElGHNYGLG.HassG..uo......sH.....s.sS...sWGWDu.+pRFIsNh..h.ppss....spptss.........pls.PF.Du.apahhDAMs...GGtsppsu.hsRFThapPhsu.t.hhQcahpNth................................................... 0 24 30 34 +10295 PF10463 Peptidase_U49 Peptidase U49 Rawlings N, Mistry J anon Manual Family This family contains Lit peptidase from Escherichia coli. Lit protease functions in bacterial cell death in response to infection by bacteriophage T4. Following binding of Gol peptide to domains II and III of elongation factor Tu, the Lit peptidase cleaves domain I of the elongation factor. This prevents binding of guanine nucleotides, shuts down translation and leads to cell death. 23.90 23.90 23.90 23.90 23.70 23.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.53 0.71 -4.54 7 29 2012-10-03 04:41:15 2008-02-21 10:51:40 4 2 29 0 11 41 4 181.90 20 57.80 CHANGED Iphp.tsLchlWhhsahhhshhpp........l.phhcsss.phcL....................E.spshlpcupphhpahtphlpsh..psas.........ttlPpPth..ptu.p.....tss-lFLpAlualhhHEluHlhhpc.hh.......ssp.shpEEh-sDsaATpalLss.pp.s............+RtluIulAhhhlp.Lhlcpphshps..THPsspsRI.sslpt.p.pts-phh.hhs ..........................................................................h....hphhW.hs...h.hhpt........h......t..................................htht.t.h.hh....p.hp..sh....pt...............thsp.t...t.t........hssplhhhAls.allhHEluH.lhhtH.th...................................s.s.shp-EhpADpaAhchllsphtp.s.p..................hppthuIhhulhhh..lh....p.p.phshpp..oHPshppRl.s.lpt.....tp..h........................................... 0 5 8 11 +10296 PF10464 Peptidase_U40 Peptidase U40 Rawlings N, Mistry J anon Manual Family This family contains P5 murein endopeptidase from bacteriophage phi-6. P5 murein endopeptidase has lytic activity against several gram-negative bacteria. It is thought that the enzyme cleaves the cell wall peptide bridge formed by meso-2,6-diaminopimelic acid and D-Ala 20.80 20.80 20.90 113.30 20.70 19.40 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.37 0.70 -4.92 2 29 2009-01-15 18:05:59 2008-02-21 11:15:17 4 1 1 0 0 30 0 171.00 92 96.88 CHANGED VQYSLRALGQKVRADGVVGSETRAALDALPENQKKAIVELQALLPKAQSVGNsRVRFTTAEVDSAVARISQKIGVPASYYQFLIPIENFVVAGGFETTVSGSFRGLGQFNRQTWDtLRRLGRNLPAFEEGSAQLNASLYAIGFLYLENKRAYEASFKGRVFTHEIAYLYHNQGAPAAEQYLTSGRLVYPKQSEAAVAAVAAARNQHVKESWA VQYSLRALGQKVRADGVVGoETRAALDALPENQKKAIVELQALLPKAQSVGsSRVRFTsAElDSAVARISQcIGVPASYYQFLIPIENFVVAGGhETTVSGSFRGLGQFNRQTWDGLRRLGRNLPAFEEGSAQLNASLYAIGFLaLENKRAYEusFKGRVFTHEIAYLY............................................ 1 0 0 0 +10297 PF10465 Inhibitor_I24 Peptidase_I24; PinA peptidase inhibitor Rawlings N, Mistry J anon Manual Family PinA inhibits the endopeptidase La. It binds to the La homotetramer but does not interfere with the ATP binding site or the active site of La. 25.00 25.00 74.60 74.30 22.30 21.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.88 0.71 -4.44 2 11 2009-01-15 18:05:59 2008-02-21 15:18:23 4 1 10 0 0 10 0 139.90 77 89.06 CHANGED ThsKWF+Is+tDptLpshaPELptGTlhKV...ltpsp-Dh.sDpGIIEl.LssGchlsIYD+shohWChWpocSl-..ElEEl...sp.Vsptshu-FpGERISYALAKLAAQENNDGYEGNLMQAAAEYIEaLEpplS ..TVDKWFRINRADpGLCNYWPELSAGTVFKVRELuKECEDDIEPDTGIIE...IELSDGKIINIYDKPITYWCLWNTESVENGEIEEVVE..RTN..QVVQKPKA-FQGERISYALAKLAAQENNDGYEGNLMQAAAEYIEWLETQIS. 0 0 0 0 +10298 PF10466 Inhibitor_I34 Saccharopepsin inhibitor I34 Mistry J anon Manual Family The saccharopepsin inhibitor is highly specific for the aspartic peptidase saccharopepsin.\ It is largely unstructured in the absence of saccharopepsin [1], but in the presence, the inhibitor undergoes a conformation change forming an almost perfect alpha-helix from Asn2 to Met32 in the active site cleft of the peptidase. 25.00 25.00 26.10 39.40 21.40 21.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.43 0.72 -3.91 2 7 2009-01-15 18:05:59 2008-02-21 15:24:18 4 1 7 3 2 6 0 68.40 82 97.96 CHANGED MNTDQQKVSEIFQSSKEKLQGDAKVVSDAFKKMASQDKDGKTTDADESEKHNYQEQYNKLKGAGHKKE ...MNTDQQKVSEIFQSSKEKLQGDAKVVSDAFKKMASQD.KDGKTTDADESEKHNYQEQYNKLKGAGHKKE..... 0 1 2 2 +10299 PF10467 Inhibitor_I48 Peptidase inhibitor clitocypin Rawlings N, Mistry J anon Manual Family Clitocypin binds and inhibits cysteine proteinases. It has no similarity to any other known cysteine proteinase inhibitors but bears some similarity to a lectin-like family of proteins from mushrooms [1]. 20.60 20.60 24.10 24.50 19.70 19.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.89 0.71 -4.28 2 19 2009-01-15 18:05:59 2008-02-21 15:42:18 4 1 3 6 10 23 0 128.00 42 92.90 CHANGED LEDGIYRLRAVTTHNPDPGVGGEYATVEGARRPVKAEPNTPPFFEQQIWQVTRNADGQYTIKYQGLNTPFEYGFSYDELEPNAPVIAGDPKEYILQLVPSTADVYIIRAPIQRIGVDVE.GsQtNTLsYKFFPVDGSGGDRPAWRFTRE ............GhYpLRA....sPssGlGG.YATspGspc.VpstPpoPPFFERQlWpl..T..+..s..p..-.GpYTI.p.hpshsssFt.auFSh..D.p..h..PpuPVI.su-..h.E.a.h.h..p.hP.ssspshhIpA.h.hlGhsh..sGs.p.t.pp...................................................................... 0 0 10 10 +10300 PF10468 Inhibitor_I68 Carboxypeptidase inhibitor I68 Rawlings N, Mistry J anon Manual Family This is a family of tick carboxypetidase inhibitors. 25.00 25.00 29.50 29.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.31 0.72 -3.80 2 8 2009-01-15 18:05:59 2008-02-21 15:58:10 4 1 5 10 1 12 0 85.50 43 83.72 CHANGED lVF.hhVLssupsN-CVS+GFGChPpScCP.EhRLSYsGCsTVCCDlS+LTGCcsKGGECpPh-+.C+EL.uEssSCuctQKCCVhL ..sh....LlllspupAN-CVSpGaGClPcScCPcEuRlsYu..GC..uTVCCDlS+lsuCcu+GGECpPhcpsCKE.LpupouoCs+GQKCCVal. 0 1 1 1 +10301 PF10469 AKAP7_NLS AKAP7 2'5' RNA ligase-like domain Buljan M, Coggill P anon TreeFam_TF105406 Domain AKAP7_NLS is the N-terminal domain of the cyclic AMP-dependent protein kinase A, PKA, anchor protein AKAP7. This protein anchors PKA for its role in regulating PKA-mediated gene transcription in both somatic cells and oocytes [1]. AKAP7_NLS carries the nuclear localisation signal (NLS) KKRKK, that indicates the cellular destiny of this anchor protein [2]. Binding to the regulatory subunits RI and RII of PKA is mediated via the family AKAP7_RIRII_bdg. at the C-terminus. This family represents a region that contains two 2'5' RNA ligase like domains Pfam:PF02834. Presumably this domain carried out some as yet unknown enzymatic function. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.40 0.70 -4.63 50 426 2012-10-03 21:31:48 2008-02-21 19:00:45 4 20 246 0 280 884 46 205.50 22 57.73 CHANGED sTHFlulPL.spsp.lppphpchppplL....................................................................tpt.sl..pshhhsspplHlTLsshtLhspp.-lptAt.....chLppsp....pp.......................lhph......pslplpl+.Glchhss.................cVLYA..plp..t......spLpplss....plhcp.FtppGlhh.tspch...........................................phchHhTlhpspht...........................tppp...psh..sspplhccatcacFGphpl..splcLsph.hspss.sGaYcstuslpl ........................................................oHFlulsl..s......p....sp....l.p..p....ht....ph..ppplh.........................................................................tt..s.....tl......tsh.h....h.sstpLHlTls..h..h.......p.Lhspp....clppuh..............phLp.php............tp.............................................................l.p.....h.t.....pslplphp.Glphhss..........................t.s...pVLau....tst..tt..........spLp.phsp.....tltcp...F.p..t.G....lhhtcptt....................................................................................................h.p.hHhTlhpshhh............................................................tpc.....tph.....sspplhc..pa.t.s...h....a......G...p...........l...pplplsph....t.....t..............t.Y.......................................................................................... 0 94 145 212 +10302 PF10470 AKAP7_RIRII_bdg PKA-RI-RII subunit binding domain of A-kinase anchor protein Buljan M, Coggill P anon TreeFam_TF105406 Domain AKAP7_RIRII_bdg is the C-terminal domain of the cyclic AMP-dependent protein kinase A, PKA, anchor protein AKAP7. This protein anchors PKA, for its role in regulating PKA-mediated gene transcription in both somatic cells and oocytes, by binding to its regulatory subunits, RI and RII, hence being known as a dual-specific AKAP [1]. The 25 crucial amino acids of RII-binding domains in general form structurally conserved amphipathic helices with unrelated sequences; hydrophobic amino acid residues form the backbone of the interaction and hydrogen bond- and salt-bridge-forming amino acid residues increase the affinity of the interaction [2]. The N-terminus, of family AKAP7_NLS, carries the nuclear localisation signal. 22.70 22.70 22.70 23.20 22.00 22.60 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.90 0.72 -4.14 3 57 2009-01-15 18:05:59 2008-02-21 19:01:47 4 2 29 0 22 52 1 59.70 63 33.75 CHANGED uuEPDDAELVRLSKRLVENAVLKAVQQYLEETQNKpQPGEGsSVKAEuuDpNGsss-NNRK ...G.uEPDDAELVRLSKRLVENAVLKAVQQYLEETQNKp+P.G-G..SSVK.sEtu.Dp.NGsss-NNRK....................... 0 3 4 6 +10303 PF10471 APC_CDC26 Anaphase-promoting complex APC subunit 1 Buljan M, Coggill P anon Treefam_TF101057 Family The anaphase-promoting complex (APC) or cyclosome is a cell cycle-regulated ubiquitin-protein ligase that regulates important events in mitosis such as the initiation of anaphase and exit from telophase. The APC, in conjunction with other enzymes, assembles multi-ubiquitin chains on a variety of regulatory proteins thereby targeting them for proteolysis by the 26S proteasome. CDC26 is one of the nine or so subunits identified within APC but its exact function is not known [1]. The APC/C becomes active at the metaphase/anaphase transition and remains active during G1 phase. One mechanism linked to activation of the APC/C is phosphorylation. The yeast APC/C is composed of at least 13 subunits, but the function of many of the subunits is unknown. Hcn1 is the smallest subunit of the S. pombe APC/C, and is found to be essential for cell viability, APC/C integrity, and proper APC/C regulation. In addition, Hcn1 phosphorylation indicates a specific role for the phosphorylation of this subunit late in the cell cycle [2]. 23.90 23.90 24.60 24.80 23.80 23.80 hmmbuild --amino -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.70 0.72 -3.24 42 164 2009-01-15 18:05:59 2008-02-21 19:08:13 4 4 145 8 112 156 0 79.20 22 53.31 CHANGED M.LRRpPTsIpLo.-.DltcaEct+pcppppppppppppp.....................tt.sttp.ssptp.lsscptphhpptspthu ....M.LRRpPTplpLph-.Dl.p-aEsh+pcpppppppppphpt.............................t...t.s.sttstspt..t..................t............................................................ 0 32 51 84 +10304 PF10472 CReP_N eIF2-alpha phosphatase phosphorylation constitutive repressor Buljan M, Coggill P anon TreeFam_TF105548 Domain This is the conserved N-terminal domain of CReP, constitutive repressor of eIF2-alpha phosphorylation/protein phosphatase 1, catalytic subunit. It functions in the dephosphorylation of eIF2-alpha under basal conditions in the absence of stress. In response to translation inhibition, there is reduced synthesis of the labile CReP that contributes to elevated levels of eIF2-alpha phosphorylation [1]. The C-terminus, family PP1c, is shared with the apoptosis-associated protein Gadd34 and herpes simplex virus [2]. 25.00 25.00 54.80 54.00 17.70 17.20 hmmbuild -o /dev/null HMM SEED 411 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.40 0.70 -5.76 2 32 2009-01-15 18:05:59 2008-02-21 19:10:12 4 3 24 \N 15 35 0 362.50 65 55.21 CHANGED MEsGTttuRKR.GPRhG.hFh.PFh.RRSpAsSScFPsP.u.pNsGN.....SA.PEpRspYWTKLLSQLLA.LPuLhQKlLlWSQLFGGhhPTRWLDFAusYSALRAL+GREcsAAPTsQKSLSSLpLDSS-s.VsSsLDWLEEGlpWQhSssDLcLcLKAptpALDsAA.sFLLEQQLWGVELLPSSLQutLhSpRELsSSsSGPLslQplsNFpVVSYLLNPSYLDhhPpLtlphQsSsGsuphVGFpTLTPESshLpEDtCHPQPLpAEh.ssuWptCPPLSTEGLPEIHHLRMKRLEFL.QANKGQ-LPTPDQDNGYHSLEEEHsLLRMDPpHCpDNPsQhVssAuDhP....EsTEcK.ELlhpEV.....ppSPptus..sElPhEKEstEs+hsssDhS.htt ..............Es...t.sR.....t.t.hFh.P.F.h..hpSpsssSphPsP.ustp.ts....sSs.PpsRsp.W.hKLLSQLLAPLPuLLQKlLlWSQL.FGGMhPTRWLDFAGsYusLRAL+GREcsAAPTAQ.K.SLSSLpLDs.S.-sussSPLDWLEEGlHWQCSssD......LcL-LKAKGsALDPAApAFLLEQ.QLWGVELLPuSLQu+LaSsRELGSSPSGPLNlQRlssFsVVSYLLNPSYLDChPRLElSYQNSsGsGELVsFQTLTs.ESuCLpED.tCH.PQPLsAElosASWQGCP...PLSTEGLPEIHHLRMKRLEFLQQASKGQ-LPTPDQDNGYHSLEEEHsLL.RMDsKH......C......pD..sPTQhVPsAG.slP.GssQEsTEEKIELLTpEVPLALEcpuPo.EuCPSsE....lPhEKEPGEsplSVVD.S.lc.s........................... 0 1 1 3 +10305 PF10473 CENP-F_leu_zip Cenp-F_leu_zip; Leucine-rich repeats of kinetochore protein Cenp-F/LEK1 Buljan M, Coggill P anon TF101133 Domain Cenp-F, a centromeric kinetochore, microtubule-binding protein consisting of two 1,600-amino acid-long coils, is essential for the full functioning of the mitotic checkpoint pathway [1,2]. There are several leucine-rich repeats along the sequence of LEK1 that are considered to be zippers, though they do not appear to be binding DNA directly in this instance [2]. 33.00 33.00 33.00 33.20 32.90 32.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.95 0.71 -4.32 30 150 2012-10-08 13:49:02 2008-02-21 19:16:05 4 14 53 0 66 118 1 136.90 33 13.97 CHANGED DEKKpL+lhEcLKESc+cuDsL+D+VEsLERELEhSpcNpEpslL-AEsuKAElET....LKscl-phsppLpsLEhDLsslR.......SEKEsLs+pLQccQp+VSELEthsoShcsLLcEKEp-...+lQhcE-oKsAlEhLQsQL+ELsE ...........ccKpLcltpcL+cscccpssLcc.+lEsLEp-Lphupcspppshh-uEsu+tElps....Lcscl-phsppLpsLch-Lsslp.......pEK..csLsppLpccpp+lpELEphp...ss...hp..phlp...ptEpE...chphtcp.psshp.LppphpcLpE................. 0 3 7 24 +10306 PF10474 DUF2451 Protein of unknown function C-terminus (DUF2451) Buljan M, Coggill P anon TreeFam_TF106152 Family This protein is found in eukaryotes but its function is not known. The C-terminal part of some members is DUF2450. 26.40 26.40 27.10 26.50 26.10 26.30 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.48 0.70 -4.61 12 199 2009-01-15 18:05:59 2008-02-21 19:22:53 4 8 130 0 132 188 1 211.50 37 25.45 CHANGED LYGLppRhlAsEShhhlAcphchhcshlpphLPtpptsh.........lppFasp............slus.ls-lRchlapsssuphlslp.............shlsthsslKW-lpEs.sspHssYVDsl.p-hppFsh+Lppht.ppssls.c..h.tsLW-pslplhtchL....V-GauplKKCostGRALMpLDhpphhstLEplost+..s.chpaV-sYIKAaY....LsEp-hcpWhppp................pEYohKplsuLVptssuu......s++tRpcLLshl-s ...........................hYGLsERlVAsES.lhhlupthch.h.pshLpslhsts.pp.sh.............LppFasp................hl.ss.s.s-L+c.lahhsusph.l.chp.............................................................phl.hhusl.K.W.-l+El..hspHs.YVDhLlp..-.hpp.F.spRLtphs..ppstlsh....lpplLW-pslclsscsl....VEGau.p.V...K.KCSsEGRALM..QL..DhQphh.pL.........E...+l...s.s....h+.......shP..c......tpaV-..sYIKAYY....Ls.E.s..-.hcpWl+pH................pEYStpQlssLlshshss......p+ptRpplLthl-......................................................................... 0 56 76 109 +10307 PF10475 DUF2450 Protein of unknown function N-terminal domain (DUF2450) Buljan M, Coggill P anon TreeFam_TF106152 Family This protein is found in eukaryotes but its function is not known. The C-terminal part of some members is DUF2451. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.85 0.70 -5.51 12 433 2012-10-03 17:31:52 2008-02-21 19:23:24 4 20 221 0 301 888 8 203.70 22 22.10 CHANGED l-ulEtsYFpp......cFDssha.Lc+lssst..hshpp......l-cptsplppQtchluc+l.phlhppppshspthpplp-lcccLptusshCtsuR+pLstuppphTphsLtlhupp+K+psLhphL.pLsslKphpsh-hclcphlp-usYstAlplL.EspphhpsappasClp-lspplpshhthhtppLDshL.plstsF-scpYsplhpAYtLLs.+spthh-KlpphFlpslcopopsVL+shhp.t.....pscchp+hsYppLCtplsp-pah.CLhchhpsLaclhhSYaplhpa ..................................................................................................................................................................p.-+Lpp.h-hVp...hplhppI.ppppuah.puhsp.lp.pLpsp...lptu...st...tsp.......p.hR.c..p..Lpthcp..t.h...sp..s..uLp....ll.p.....pp+Rpsl.....h.t.lhp.....tLptl.p....pl......ppspsplpth...........l.....p................p.s....-.ass...A...l...p...ll.......p.sp..p..h..h....p..p..............h.t..t..h...p...s....h...p...p.....l...t...p...pl...pph....t.htp..h........................................................................................................................................................................................................................................... 2 109 163 246 +10308 PF10476 DUF2448 Protein of unknown function C-terminus (DUF2448) Buljan M, Coggill P anon TreeFam_TF106107 Family The family DUF2349 is the N-terminal part of this family. This protein is found in eukaryotes but its function is not known. 19.60 19.60 19.60 20.30 19.50 19.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.39 0.71 -4.80 3 69 2009-09-10 19:42:20 2008-02-21 19:24:03 4 3 37 0 30 52 0 195.40 60 34.97 CHANGED aRRoREADKAFlK..STuuVuoPspVILLRILAFLACAFLlAsTLYGhs-..pS.sspQTLSGGVlPPK...suNsScASDpTssuupuWQDLhGLLPE+ATEsl+lsWQaGpsHQhAVVSVGLLTCLTAlLlAGPIRLRRIDAhASVLWhLlLCLYLAEsYLpTsuPSWLDTlKFuTTSLCCLVGFAAAVATRKSTGPRRARsRRs.sst .........+R.READQsasQ......s.SSuVKuPsQVILLRALAFLACAFLLssALYGsSc...........shss.Gus.lPPu..........GssuS.s.s.s..s.s.TssuA-GWpQLLuLLPEHssEKLpEAWAFGQSHQhuVVAlGLLTCLLAMLLAGRIRLRRIDAFuosLWALLLGLHLAEpYLpssoPSWLDTLKFSTTSLCCLVGFTAAVATRKuTGPRRaRPRRa....ss......................... 1 1 4 11 +10309 PF10477 EIF4E-T Nucleocytoplasmic shuttling protein for mRNA cap-binding EIF4E Buljan M, Coggill P anon TreeFam_TF101531 Family EIF4E-T is the transporter protein for shuttling the mRNA cap-binding protein EIF4E protein, targeting it for nuclear import. EIF4E-T contains several key binding domains including two functional leucine-rich NESs (nuclear export signals) between residues 438-447 and 613-638 in the human protein. The other two binding domains are an EIF4E-binding site, between residues 27-42 in Q9EST3, and a bipartite NLS (nuclear localisation signals) between 194-211, and these lie in family EIF4E-T_N. EIF4E is the eukaryotic translation initiation factor 4E that is the rate-limiting factor for cap-dependent translation initiation [2]. 19.70 19.70 21.20 20.00 18.50 19.60 hmmbuild -o /dev/null HMM SEED 578 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -13.04 0.70 -5.72 5 194 2009-01-15 18:05:59 2008-02-21 19:26:45 4 5 79 0 92 179 1 427.30 31 63.88 CHANGED YSK-ELL-IKELP+S+cRPuCLo-KYD........................SDGVWDPEKWHuSLYPuSupSsPsEuhKK-pDs-RsoLKRRIsDPRERVK-DDLDVVLSPQR...............................................RSFGGGCpVsussuSRRssSPLE.K-s-uhRLh....................GuRRIGSGRIhs.......................................uRsFERDaRGsch-RcsEcsRDRE+-aKDKRFRR-aGDsK.........RVFu-RRRND....SYsE....EEPEWFSAGPTSQSETIELpGFDDKILEED............p+u+KRo++Ro.....pSlKEshVECNGGlu.c.c.slslspEsuADQEVPcsssLPEs.................sPG-FDFNEFFN......h.s.h...-psht.ss.usSRFS+WFp.............................................hEsttps.h..hc..t.tp.phhP..psL-psh.P+LsSh.ppsh..s.ss.h.....ht.ssspppshFpcLLs...............h.sN..sp..ph...............................hss.P..s.la.ptt..h....pph.s..s.hs..pt.htt..sshs.h..h.hhhph.shptps.slst.......t.phhs..p....p...pp.ppphhpp..sphshs.l.ph.............sS.hs.uFTPTSVlRKMhccKcKc+ .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.st.h............................................................................................................................................................................................................................................................p.......................................c..pPEWhs..Ppo..-hI-LhGF-..-.....................................................................................................s..........................................t.........................................................................p.p.....t......hp..........................................S.h...ht....................................................................................t..................t....p.............................sh.t..p.....h.......p..lp..........p......s...ts......................t..............p......h..pc.hlt...........tt.............................................................................................................................................t......................................................h........................................t.....p..............................................t....................t.t..t..pt...................................................................................................................................................................... 0 26 33 65 +10311 PF10479 FSA_C Fragile site-associated protein C-terminus Buljan M, Coggill P anon TreeFam_TF105915 Domain This is the conserved C-terminal half of the protein KIAA1109 which is the fragile site-associated protein FSA [1]. Genome-wide-association studies showed this protein to linked to the susceptibility to coeliac disease [2]. The protein may also be associated with polycystic kidney disease [3]. 18.80 18.80 23.30 21.70 18.70 18.20 hmmbuild -o /dev/null HMM SEED 615 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -13.01 0.70 -6.45 4 151 2009-01-15 18:05:59 2008-02-21 19:31:11 4 4 85 0 95 148 0 477.70 45 20.51 CHANGED sosSssusspQhTsWETLVlFAINascLNVQMNMuNVMGNssWhTpshpSpGRLSlGSssc+sh.hulsLGuSpLDAKGGlVGGTI-lNplchhhH..IpE-ssppPsHKhtIphhulEh+lDYMGoSVLMs+hSuhshplpDEWKssppsshssthspp...u.IhlHGDLoWDpLQlMISKSTTsDllKhhhKLpEFFoQQFKoSKRlhSsLp..PpLtspo.tAslc+Rppcp.......L..............DAtHHRHWp+sLc.ssGhhls.pLhs.LsccGshlGGohEL+GpsISLACFHG.NF+uKSWALFpL+-PsIsFsTEAph.....pSscp...lhlhQTLshpLGpsTtsQp.....ppshAsVsRlo...RN.haP.phcolpEWF...........cYuhA..NpEl-slcphPh.EsEpptso...solpRhRuuGSups..tph.thNHNpEsIFALPuLQL+FKopHlQGsssP-ht-s....KPcV.CSFlT-FpDHIhVTsDA-A.hhFLHDLloSYLpEKEKslus........Pp....hshpP..........................Gp.ssL..hpsSHospuss..........................................usSuTtsoVsupppp................DWRcFpCpTWHLEPTVRL...lSWsG+pI-PhGlDYILpKLGFpHARTTIPKWLQRGhMDPLDKV.AlhhlpLLhh ......................h..........phssWETLVlFAlNhppLsVpMNMuNVMGNssWhTpshpopGRLolsSstc+ph.huluLutSpL-u+GGlVGGsl-lst..lchh..............hH...........Ip...E..csspp........P....tHpl.tlphtuh-hRlDYMGoulLMuhhSshshplpDEW+h...s....h.sshs.s...pc...........................u.IhlcGcLpWD.hQlhIo+STTsDllKhhhKLpEFFoQQFcoS+Rs.hSoht....shl........sts...ssh..cppptpp....................................-s..tHHRHW.tlL..................thh......sshhl.s...h.h.LPp..p..u..h.........hGGohpL+GpphoLACFHG.NF+.S.KSWALFpLc-PsIsFhTEAppl.........p...psppp..........shlsQTLshpLGps.....s.............hpp...............pp..sMAsls+lo....Rp...P.thtolpEWF............................................................................pYshA..spElsh.lcp.h.sh.pt..-pthss.....s.hpthRusu...S...........................hNHppE..sIFALPphQLchKo.HhQ.tspp.Pshp.-s.........KP..pV.-CShlTEFpDH.IhVThDA-h.hhFLHDLlouYlKEK..E+s.hhs............p....hs.ps.......................................................................tp.ss.h...p.tspsphtp...................................................................................t.s.s.psh..............................................................DWRcFhC..pTWHLEPTl.R.L...lSWsG+pI-PhGVDYILpKLGFpHARTTIPKWlQRGhMDPLDKllullhhpLh..s...................................................................................................... 0 39 46 75 +10312 PF10480 ICAP-1_inte_bdg Beta-1 integrin binding protein Buljan M, Coggill P anon TreeFam_TF105393 Family ICAP-1 is a serine/threonine-rich protein that binds to the cytoplasmic domains of beta-1 integrins in a highly specific manner, binding to a NPXY sequence motif on the beta-1 integrin. The cytoplasmic domains of integrins are essential for cell adhesion, and the fact that phosphorylation of ICAP-1 by interaction with the cell-matrix implies an important role of ICAP-1 during integrin-dependent cell adhesion [1]. Overexpression of ICAP-1 strongly reduces the integrin-mediated cell spreading on extracellular matrix and inhibits both Cdc42 and Rac1. In addition, ICAP-1 induces release of Cdc42 from cellular membranes and prevents the dissociation of GDP from this GTPase [2]. An additional function of ICAP-1 is to promote differentiation of osteoprogenitors by supporting their condensation through modulating the integrin high affinity state [3], 25.00 25.00 26.20 28.40 19.30 18.40 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.30 0.71 -4.54 2 77 2012-10-04 00:02:25 2008-02-21 19:31:49 4 2 49 0 42 66 0 168.10 64 91.50 CHANGED MFRKsKKRHSSSSSQSSEISTKSKSVDSSLGGLSRSSTVASLDTDSTKSSG..NssSDssAEFRlKYVGAIEKLphs.uKsLptPLDLINYIDsAQQDGKLPFVPs-EEhIhGVSKYGlKVuo.DQhDVLHRHsLYLIlRMlCYDDGLGAGKsLLALKTTDspppEhSlWVYQCsShEpAQAICKVLSouFDssLsSEKP ....................................................MFR.KGKKRHSS.S.SSQS.SEISTKSK..SVDSSLGGLSRSST.VA.SLDT...D..........STKSS.G..QS.NsNSDTCAEFRlKYVGAIEKLchs-uKsLEGPLDLINYIDVAQ...QDGKLPF.V.P.E..E.EhI.hGVSKYGIKVoo...sD.QaD..........VLHRHuLYLIlRMVCY..DDGLGAGKsLLALKTT..DuppEE...........hSLWVYQCsSh-QAQuICKlLusuFDslLss-.............. 0 11 13 24 +10313 PF10481 CENP-F_N Cenp-F_N; Cenp-F N-terminal domain Buljan M, Coggill P anon TreeFam_TF101133 Domain Mitosin or centromere-associated protein-F (Cenp-F) is found bound across the centromere as one of the proteins of the outer layer of the kinetochore [1]. Most of the kinetochore/centromere functions appear to depend upon binding of the C-terminal par to f the molecule, whereas the N-terminal part, here, may be a cytoplasmic player in controlling the function of microtubules and dynein [2]. 25.00 25.00 28.50 25.00 24.70 23.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.24 0.70 -5.05 10 64 2012-10-08 13:47:38 2008-02-22 09:02:08 4 10 40 0 34 53 0 257.70 59 11.94 CHANGED MSWAVEEWKEGLPo+ALQKIQELEuQLDKLKKERQQRQFQL-SLEAALQKQKQKVEsEKoEuusLKRENQSLhEoCDsLEKuRQKlSHDLQVKESQVNaLEGQLNSsKKQIEKLEQELKRaKsELE....RSQpuht....su-spLsssoTPQKoFusPl.oPuphapDu+hE-LpEKYNKEVEERKRLEsElKsLQ....s.KKhsQs.lsQuTloHRDIARHQASSSVFPWQ.QEpTPS+tSusuhETPl+RssousahsWEpE-TPsKss.p.tptpt.soShtussssopLh-QLKsQNQ.EL+S+VsELEhRLQuQEK- .......MSWAlEEWKEGLPoRALQKIQELEuQLDKLKKE+QQRQFQL-SLEAALQKQKQKVEsEKsEsssLKRENQpLhEhC-sLEKs+QKlSH-LQVKESQVNh.EGQLsSsKK.......QIE+LEQELKRhKSELE....RSQQutt....uuDspLs.ssTPQK..FsoPL.TPs.....opaE-LcEKYNKEVEERKRLEtElKsLp.....s.K+hsps.lsQuThsHRDIARHQASSSVFsWQ.QEpTPS+.Stss.cTPh+Rshuuuah.hE.EsoPs+ss.p.tptst..uuh.ssspsspLhcQLKsQNQ.......EL+u+lsELEhcLQupEK-.................................. 0 7 10 18 +10314 PF10482 CtIP_N Tumour-suppressor protein CtIP N-terminal domain Buljan M, Coggill P anon TreeFam_TF106469 Domain CtIP is predominantly a nuclear protein that complexes with both BRCA1 and the BRCA1-associated RING domain protein (BARD1). At the protein level, CtIP expression varies with cell cycle progression in a pattern identical to that of BRCA1. Thus, the steady-state levels of CtIP polypeptides, which remain low in resting cells and G1 cycling cells, increase dramatically as Dividing cells traverse the G1/S boundary. CtIP can potentially modulate the functions ascribed to BRCA1 in transcriptional regulation, DNA repair, and/or cell cycle checkpoint control [2]. This N-terminal domain carries a coiled-coil region and is essential for homodimerisation of the protein [3]. The C-terminal domain is family Pfam:PF08573. 27.00 27.00 28.00 32.70 25.90 24.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.66 0.71 -4.32 4 111 2009-01-15 18:05:59 2008-02-22 09:04:36 4 3 51 0 55 100 0 115.10 67 16.20 CHANGED FtEhLs+LKEhHDKEl.GLQsKlscLppERChDAQRlEEhFoKNppLREQQKlLpEsI+VLEcRLRAGLCDRCsVTpEhh+KKQpEhEss+.psLphIotLpNEhpoLpEEN+pLpEELc ..Fp-hhsKLKEhH-KEVpGLQsKlscLppERh..hD..AQRL.EEhFoKNQQLREQQKsL+EoIKVLE.............c.RLRAGLCDRCsVTcEphRKKQQEFENh+pQNL+LITELh....NE+NsLQEENK+LpEpLp........................ 0 2 6 17 +10315 PF10483 Elong_Iki1 Hap2_elong; Elongator subunit Iki1 Coggill P, Eberhardt R anon Pfam-B_26773 (release 22.0) Family This family is a component of the RNA polymerase II elongator complex [1,2]. This complex is involved in elongation of RNA polymerase II transcription and in modification of wobble nucleosides in tRNA [3,4]. 27.50 27.50 27.50 27.80 27.20 27.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.69 0.70 -5.04 22 253 2009-01-15 18:05:59 2008-02-22 09:11:26 4 4 198 3 176 260 0 240.80 20 82.24 CHANGED LLp+lLSL+-.sSPLhLlLDSLtQsup.LlpEal++u.....t.pp.cllalSFEThpc..PphsspFlssp.............stshppllppltst.........stspssp..cpLlllDSlN.lhsp.......plspFlushh.sPt.......sollusaHp-.............s......ahPuslslLpalAoTllclpsh.p..hccpshcpplsc..hsht..hsss.................shslplppRRKSGRult.pahlssss+.h..hhp.........c-....................sphsp...--sEt....hTFNLshocKQ+tAREpV.LPah-AQpt.u........usGGtIlYE .....................................................................................................phh.t......sshhL.h.Dslt..uh.llpthhp.p........t.t...lhhlu.hp.s..pp.........t.hs..htsp.........................................th.s..th.tt.hth..............................tt.sp..............thhlhlDS....ls...lhtp.............ph.phluphh..pst......................................................ss.llshhHp-l..................................a.P.t.s...hshLphlAsshlplps.......h.........t..................................................................th.hph.hR.....R+s.G......R..s..h....p..t...hhs..t...sp.......h...................tp............................................................................tt.st....psp.t........hTFNLtL..oc+p+ps+-plsLPah.upp.....................ttsutIhY-.............................................. 0 54 92 137 +10316 PF10484 MRP-S23 Mitochondrial ribosomal protein S23 Buljan M, Coggill P anon TreeFam_TF106116 Family MRP-S23 is one of the proteins that makes up the 55S ribosome in eukaryotes from nematodes to humans. It does not appear to carry any common motifs, either RNA binding or ribosomal protein motifs [1]. All of the mammalian MRPs are encoded in nuclear genes that are evolving more rapidly than those encoding cytoplasmic ribosomal proteins. The MRPs are imported into mitochondria where they assemble coordinately with mitochondrially transcribed rRNAs into ribosomes that are responsible for translating the 13 mRNAs for essential proteins of the oxidative phosphorylation system [2]. MRP-S23 is significantly up-regulated in uterine cancer cells [3]. 37.50 37.50 37.80 37.70 32.20 30.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.61 0.71 -4.24 9 115 2012-10-03 14:45:55 2008-02-22 09:19:38 4 2 85 0 64 100 0 123.60 46 73.18 CHANGED Au..SRLEKlGTIFTRVpGLL+uGAhKh--+PlWYDVYtAFPP+tEPcYsRP....sPps..sVRpIaYtEDslRAKFac.ps+uptshsLhs.spposoQpFlphYppLcspG..sh-EEh.......h.cTupsLht-tlh.p ......AtSRLEplGolF..oR.scsLlRuGslK...-KPLWaDVYpAFPPhcEPhacRP.......tsct.......sl....p...cIaYpEDtlRAK.....Fap.....hssu.c.shsLh.......ssshp.SssQ+FVc+Yp-Lpp.G..th---p.......a.-TucsLLu-thh..p............................................. 0 20 25 45 +10318 PF10486 PI3K_1B_p101 DUF2447; Phosphoinositide 3-kinase gamma adapter protein p101 subunit Buljan M, Coggill P anon TreeFam_TF102035 Family Class I PI3Ks are dual-specific lipid and protein kinases involved in numerous intracellular signaling pathways. Class IB PI3K, p110gamma, is mainly activated by seven-transmembrane G-protein-coupled receptors (GPCRs), through its regulatory subunit p101 and G-protein beta-gamma subunits [1]. 18.20 18.20 18.80 19.60 17.40 17.20 hmmbuild -o /dev/null HMM SEED 857 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.68 0.70 -6.97 4 166 2009-01-15 18:05:59 2008-02-22 09:24:22 4 4 47 0 94 139 0 485.90 28 92.84 CHANGED ToCTEDRIQHALERCLHGLStss.hSopWsAGLCLNhWSLpELVsRDsGpalILlEplLt+sREsQcpspY-LlhPLALhFYasVLpAPaIP.sS-LL.KAhuhaHpFLTWPsPYCsVhpEhLoFIssEhKAPGIoYQRLVRsEQGLPs+sapSSTsTVLLlNPsEVpuEFLS...lApcLSsspps.csshspLlpHhaQAohGs+CcssuLHppLpA+slccLp-IaosssEA.EhAuu.s.-su.uREtLps+Lp-l.....uttAGh.sGshssupPs+lpPIPhPsu+CYTYSW-QDsFDlLsplLhpEspL..lps.lhu-DEEs--EE...--c.EssGpsP-RDSlhSs...h.p.ss..................S.tss..phssLu++.hp.FVSuLSsshDSGYsEDS-EuSpEhsthsp.p.ERupp+.tp....+IhpLFKoKuplshR+....Lpss.s..........stsLPLRRAGShssP..p..p.PuRu+Ro+SLPQ.thGpph.s...hst.hs..RRPFLSsD-D.....sKluThRVVVFGSDRISGKVARAYSNLRh+EospPhLTRaFKLQFaYlPVKRS...ssu.hhsP....toPSPss...ht.s.u-......EtSTNDISHYlGhLDPWYcRNVhGLMpLs.ssLC.QShKtEAE...shEsSpTphPILADMlLYYCRFATRPlLLQlYpsElTF..loG-KpTElFIpsLEL..........GHSAspRAI+ASGPGuKRLGIDGDp-slPhTLQIsYSKuslSGRSRWsslEKVCTSVsLpKACpp.EELsSphE............CLNLslTEVVKRQNSKoKKSFNQlsTSpIKlDKlQIIuppus.FslCLDQDEpKILQsVlRCEVSPCY+PEcsDhsphs+sP..sLsuQstsEhpSL....LCLPIsTFSGAlP ...................................................................................................................................................................................................h.h.........hh....thh.ph..hhh.h...hhhP.shtsh..phh....lp.E....ss..Gh...a.pc..hlhtEQtl........p.............p.p......hhlhhs....h.........h..............p.tt.....t............t.h..h..ll.p..hpsshG...th.h.tltt.Lp..........hh..h...h.p.........t.............h..h......................................................................................................................................................................................................................................................................................................................................................................................................................................................p......pttt.hh..hhphphahhPh.........................................................t..p......lu.hlu.hD.WYptsh.............t...................t.................hh.Dhh.aYhR.u...hhh.ha.hp......t......pphhh.tl..th............................................................................................................................................................................................................................................................................................................................................................... 0 9 19 49 +10319 PF10487 Nup188 Nucleoporin subcomplex protein binding to Pom34 Buljan M, Coggill P anon TreeFam_TF101106 Family This is one of the many peptides that make up the nucleoporin complex (NPC), and is found across eukaryotes [1]. The Nup188 subcomplex (Nic96p-Nup188p-Nup192p-Pom152p) is one of at least six that make up the NPC, and as such is symmetrically localised on both faces of the NPC at the nuclear end, being integrally bound to the C-terminus of Pom34p [2]. 25.00 25.00 29.80 25.20 24.50 20.70 hmmbuild -o /dev/null HMM SEED 931 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.60 0.70 -6.63 4 230 2009-09-11 16:46:00 2008-02-22 09:32:58 4 8 183 0 149 233 0 642.30 25 42.33 CHANGED cphstlspFLppsKppLLE.Lshhp.psTppu-pscs+chlAp.lLthslpspcF.upsEc.LsphLph.l.Ec.phl.chhhsl..hss.c..pthh.cLcstsahpp-Rl....stllpp...llcllophlssps.hsupshpshppL-pEla.s.cpplpsLl........................-hhhspsllaClchhp.LLpLhllNtsVssphsppWF..h.sphlt.s+LhpsTshu........t+ahshp-hs...phhlu+lpuhholloI.hLsLsouh-pup.shso..shhtDspsFchVsssLhs..-sspp.........shVLYuWShILht+.hh.pp.s...............Fs.sp.tphIphhuppA-slsVhppLpcLschLp.D....slaTslhssahhhSlshIslTupTopshtslIsssPcslIEsFlsNssapsc........lshl+uKhPLl...LhPhIpL..A.IsschAtaphccltSa..lsKhshhs..Ychhs.s.hscpso......-Llchpu.lLVhPP.hEpss.ph.hslPpuTKupIls.......................usps.ps.......LlhF.YpYNGWoLlGRlLQNL.+la....sshDs.Q+.EhhIsIIsLlsNVlsscsuV-cShhlLp.hSs.hsps.......sIlpVIaclhEp..uLpp+shcllspCsshhTsLssphsahVWsYLs+SsLL................-phGKsGLuNlIhGSIEhssG-YsFTISllKLspsLIcssLo.....s.thsp+oKpsIlspllhHLlclaEoahhW+YNpph.Q+hEIuhhLhphFpsILt.VasID.tssspsKlppl......hocAAspllshFLlss-s.shsstolhshl.s.spspohh.hGsu..spLYspslppAaslssLlIsIRsss+.LpPSsLE+hLF...sposcLlslY.ph......Po...l+lpIlcLLssLssssWssth.P.LhSaLGps.utshLcullsDLsoslpDaplthoLY.FhssllEoKQ-GLulLFLTGcssss .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....h..........................................h............lPtsThGplh..........................ssps..........llha.ap.asshshhsp.l.p..hh..p...sh................................sp.s..p....p.p.lh.ll....s..Lh.tpl............l..........p.t.....p.................s........h.......t.....p.......s.....h...........................................slhshlh.....lhpp........sl..............s...................s........lpl..lsuClphlssL..h..sh.....Ps+VWs.L.t.+os.hL.................................................ps..s.t.u.G.h..hshl.h.....s....h.E.spGcYshhluhlcLh...psLlc.s...tls......................................pp....t..........psl..........l............s....h....h.h....hh.h.-hh.shtpW+ass.t..+..plsthlhphhptl.L.hsa.....t.h...s....tt.s.....ps..plptl......................hspuu.ph.llshhh..hss.ss....thshs.............s.p..s.t...hpt......hh..phlp.shphsp.ll.p..l.p..............p..h..ss..............L.EptLh............ttu..s.t.L..ltlhshh.....................ss......h..th.p...ll....c.h.........L.ts.....l...s......s.......................................SlhuhLGs.p.u.t.t.hhpth.lsplpp....h.ps.plp...h.lhcFloshl.p..sp..hLh.hhLshp..................................................................................................................... 0 41 72 112 +10320 PF10488 PP1c_bdg Phosphatase-1 catalytic subunit binding region Buljan M, Coggill P anon TreeFam_TF105548 Domain This conserved C-terminus appears to be a protein phosphatase-1 catalytic subunit (PP1C) binding region, which may in some circumstances also be retroviral in origin since it is found in both herpes simplex virus and in mouse and man. This domain is found in Gadd-34 apoptosis-associated proteins as well as the constitutive repressor of eIF2-alpha phosphorylation/protein phosphatase 1, regulatory (inhibitor) subunit 15b, otherwise known as CReP. Diverse stressful conditions are associated with phosphorylation of the {alpha} subunit of eukaryotic translation initiation factor 2 (eIF2{alpha}) on serine 51. This signaling event, which is conserved from yeast to mammals, negatively regulates the guanine nucleotide exchange factor, eIF2-B and inhibits the recycling of eIF2 to its active GTP bound form. In mammalian cells eIF2{alpha} phosphorylation emerges as an important event in stress signaling that impacts on gene expression at both the translational and transcriptional levels [1]. 23.90 23.90 26.10 25.30 20.60 23.20 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.00 0.70 -5.23 5 184 2009-01-15 18:05:59 2008-02-22 09:35:22 4 6 97 0 70 171 0 131.80 36 35.04 CHANGED shPISshsuCSsc..sah.thAsc-lpoSSsspSI........Sas-EuE-uuuoSDSSphEu-hE.-sEG-+LW-shscSsDPYNPLsFTAslQTssT.sPKs.o.ptpshSspps..sSsuEuPlsSss-sSoSsDDSW-uSuDEEEst...cLWsSFCpNDDPYNPLNF+APhQoSucs.cus+hDSsosStssVuI........put+SoR+Sp+A.LlsKlpccCs+pLSsEoLSls.hptclh.s.spGs++cosKlKKV+FSPsVpVH+MssWSaAhctuRKGPWEEaARDRCRFQKRItETEcAIGYCLohpHREKMasRh .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h+...+V+F...s.....t....s.p...l+hlssWsh..uuctuR+G.sWEphAtDRsRFp+RIscsEthlusCLssptRtRhhsR...................... 0 13 17 39 +10321 PF10489 RFPL3_antisense Ret finger protein-like 3 antisense Buljan M, Coggill P anon TreeFamB_TF341410 Family This short transcript is purported to be the antisense protein of exon 2 of RFPL3 gene, however this was not confirmed. Since the RFPL3 (ret-finger protein-like 3) gene is expressed in testis the suggestion is that this may have a role in the antisense regulation of the RFPL genes. RFPL transcripts encode proteins with tripartite structure of RING finger, coiled-coil, and B30-2 domains, which are characteristic of the RING-B30 family. Each of these domains is thought to mediate protein-protein interactions by promoting homo- or heterodimerisation [1]. 27.00 27.00 29.70 29.50 26.30 26.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.63 0.71 -4.34 6 31 2009-12-01 17:09:26 2008-02-22 10:15:53 4 2 5 0 4 22 0 71.80 47 51.62 CHANGED MGNLCGCIQGDSKKPSKKRVKRKPYSTTKVTSGSTFNENTRRYAVHTNQCRRPHGSRVKKKRYPQEDDFHHTVFSNLERLDKLQPTLEASEESLVHKDRGDGERPVNVRVVQVAPLRRESRsI- ...................t...D..htsS+chspcc..Sso..pVos.soh.t.hRRYul.TpQt.R..s...+Kth.s.E........................EEs.shKctGsGER.VpsRVhps............................ 0 4 4 4 +10322 PF10490 CENP-F_C_Rb_bdg Rb-bdg_C_Cenp-F; Rb-binding domain of kinetochore protein Cenp-F/LEK1 Buljan M, Coggill P anon TF101133 Domain Cenp-F, a centromeric kinetochore, microtubule-binding protein consisting of two 1,600-amino acid-long coils, is essential for the full functioning of the mitotic checkpoint pathway [1,2]. This domain is at the very C-terminus of the C-terminal coiled-coil, and is one of the key Rb-binding domains [3]. 19.20 19.20 23.40 34.30 18.60 16.10 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.44 0.72 -4.64 4 55 2012-10-08 13:53:08 2008-02-22 10:18:33 4 10 32 0 26 45 0 46.60 67 1.96 CHANGED sp.tpspEso-aEP-GLPEVVp+GFADIPoGcsSPYILRRTThuppsSP .....hpstEcpEso-FEPEGLPEVVKKGFADIPTGKTSPYILRRTThuTRTSP.... 1 1 4 10 +10323 PF10491 Nrf1_DNA-bind NLS-binding and DNA-binding and dimerisation domains of Nrf1 Buljan M, Coggill P anon TreeFam_TF105308 Family In Drosophila, the erect wing (ewg) protein is required for proper development of the central nervous system and the indirect flight muscles. The fly ewg gene encodes a novel DNA-binding domain that is also found in four genes previously identified in sea urchin, chicken, zebrafish, and human [1]. Nuclear respiratory factor-1 is a transcriptional activator that has been implicated in the nuclear control of respiratory chain expression in vertebrates. The first 26 amino acids of nuclear respiratory factor-1 are required for the binding of dynein light chain. The interaction with dynein light chain is observed for both ewg and Nrf-1, transcription factors that are structurally and functionally similar between humans and Drosophila [2]. The highest level of expression of both ewg and Nrf-1 was found in the central nervous system, somites, first branchial arch, optic vesicle, and otic vesicle. In the mouse Nrf-1 protein, Swiss-Prot:Q8C4C0, there is also an NLS domain at 88-116, and a DNA binding and dimerisation domain at 127-282. Ewg is a site-specific transcriptional activator, and evolutionarily conserved regions of ewg contribute both positively and negatively to transcriptional activity [3]. 20.30 20.30 20.30 26.00 18.40 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.48 0.70 -5.18 4 147 2009-01-15 18:05:59 2008-02-22 10:19:02 4 6 78 0 85 152 0 178.40 61 35.18 CHANGED GPVGVAAAAAIATGKKRKRPHSFETNPSIRKRQQTRLLRKLRATIDEYsTRVGQQAlVLssoPGKPNslFKVFGAsPLENVlRNhKuhlLpDL-sALAp+AP.Ppspss.sl.ELPPLlIDGIPTsV-KMTQAQLRAFIPhMLKYSTGRGKPGWGKESsRPsWWPpDLPWANVRSDsRoE-pKpKVSWTpALRpIVhNCYKaHGREDLLPtFtEp ..................................GPVGhAAAAAlAouKKRKRPHsFEoNPSI.RKRQQsRLLRKLRsTlD.EaoTRVGQQAlVL........shoPu....K..PNssaKVFGAtPLEsVl+ph+shlh--L-sAL..AppA...P...P......ss........ELPPLsIDGIPsSV-KMTQ..AQLRAFIP.MLKY.STGRGK.PGWG+ESs+P..WWPc-.l.PWA.N..V.R..Ds...RoE-.pK..Q.+.............V.S..WTpALRpIVhNCYK.HGREDLL.sFt-p............. 0 25 31 59 +10324 PF10492 Nrf1_activ_bdg Nrf1 activator activation site binding domain Buljan M, Coggill P anon TreeFam_TF105308 Domain In Drosophila, the erect wing (ewg) protein is required for proper development of the central nervous system and the indirect flight muscles. The fly ewg gene encodes a novel DNA-binding domain that is also found in four genes previously identified in sea urchin, chicken, zebrafish, and human [1]. Nuclear respiratory factor-1 is a transcriptional activator that has been implicated in the nuclear control of respiratory chain expression in vertebrates. The first 26 amino acids of nuclear respiratory factor-1 are required for the binding of dynein light chain. The interaction with dynein light chain is observed for both ewg and Nrf-1, transcription factors that are structurally and functionally similar between humans and Drosophila [2]. The highest level of expression of both ewg and Nrf-1 was found in the central nervous system, somites, first branchial arch, optic vesicle, and otic vesicle. In the mouse Nrf-1 protein, Swiss:Q8C4C0, there is an activation domain at 303-469, the most conserved part of which is this domain 446-469. Ewg is a site-specific transcriptional activator, and evolutionarily conserved regions of ewg contribute both positively and negatively to transcriptional activity [3]. The family Nrf1_DNA-bind is associated with this domain towards the N-terminal, as is the N terminal of the activation domain. 19.10 19.10 19.30 19.30 18.70 18.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.36 0.72 -3.54 7 86 2009-09-16 13:14:49 2008-02-22 10:19:52 4 5 61 0 50 83 0 60.50 45 11.56 CHANGED othlslPV....S..MYQ....ThlA.shtQh.........lsPh.QV..........psG............................husthHthhhpussu.ts...Qhl..pVloLcs ................GlVpIPV....S..MYQ....TVVT.SlsQs..........suPV.QVAhus...lssc.u-ph.Th.............................................................s..................................... 1 12 16 30 +10325 PF10493 Rod_C Rough deal protein C-terminal region Buljan M,, Coggill P anon TreeFam_TF101176 Domain Rod, the Rough deal protein, displays a dynamic intracellular staining pattern, localising first to kinetochores in pro-metaphase, but moving to kinetochore microtubules at metaphase. Early in anaphase the protein is once again restricted to the kinetochores, where it persists until the end of telophase. This behaviour is in all respects similar to that described for ZW10 [1], and indeed the two proteins function together, localisation of each depending upon the other [2]. These two proteins are found at the kinetochore in complex with a third, Zwilch, in both flies and humans. The C-terminus is the most conserved part of the protein. During pro-metaphase, the ZW10-Rod complex, dynein/dynactin, and Mad2 all accumulate on unattached kinetochores; microtubule capture leads to Mad2 depletion as it is carried off by dynein/dynactin; ZW10-Rod complex accumulation continues, replenishing kinetochore dynein. The continuing recruitment of the ZW10-Rod complex during metaphase may serve to maintain adequate dynein/dynactin complex on kinetochores for assisting chromatid movement during anaphase[2]. The ZW10-Rod complex acts as a bridge whose association with Zwint-1 links Mad1 and Mad2, components that are directly responsible for generating the diffusible 'wait anaphase' signal, to a structural, inner kinetochore complex containing Mis12 and KNL-1AF15q14, the last of which has been proved to be essential for kinetochore assembly in C. elegans. Removal of ZW10 or Rod inactivates the mitotic checkpoint [3]. 20.30 20.30 22.00 21.40 19.90 19.40 hmmbuild -o /dev/null HMM SEED 551 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.84 0.70 -6.21 6 118 2009-01-15 18:05:59 2008-02-22 10:27:05 4 3 83 0 78 103 0 461.60 32 29.23 CHANGED cGhhhP.lA+hRLPF+Lll...ppshhsILssELo.Esa.oLLLlsphhps.........shDhhhhSAsKpsh.pchK.phhcpsssppaplhs+s.sthlQoIhphlcslsssphAhhIhahlTppsP...........................-GsDplpAhhhChchAccacpslssps-A+-K....hcKlph+Y.lptTpplLhsatLNDpchL+lltpPscLIsALYcHosl.pp.......shsDIpslVpEIAclpsLslppIpshLLpKWLshhhsu.......................lhc-pshs-sss-lEsss......sltRshYlLpuashs..puVhFLlu..souLhsssshosup+pLtlhssaphh.sDsssoahcphhpcphhsLKClhaLpuLthhNI..TlppFps..osK.ullKtLWpsHuppPpulcllAplCLtYclYsPQlWNGlLp+hltFNh..................................lsaLptlL-slSuhcpLaplsuFupAWctlIptPFhoAspshS.sQpusLt+oLVhL.KCPlstsLsLlsIAchhlplshPuhA.usLLhhsps-cpcQpIKphluossspslhQQItEL.ssG.luslsptlsstl.s .......................................................................................p....hs.huphRLPF+.lh.......tpshhp....Il....ssELs..-sh.phh.lsphhph.................shDphhhssspp....h.pphp......p...t..ptpp...sl.spp.....tthh....ps.l.phl.sl.ssp...husshhh.lspphP.............................Gs.+hpuhphChhlucc....a.h...psh......ss...p...s....cs+c..c..h...p..........hpKlch....pa....tpptTEtl..Lhsa.....t.Lss....tchLc....llspPscLIhsLYcH.sl.pp..........spshs.DIpssscEIAclp...plslpplh.hLLpKWLs..............................................................................................................hps.t.p.t.spph.php.p...cp............sLp.RlhYLLp......shshc......pthhal..h.........s...h.h..h..shp..p..ho.hsp+tRAlpsLhh..ls.sp-..........slpoh.........h.........cp..shpclp.hLKClh....aLsphEhLslsh.oh-hFps..ssKpuhlKu.LW+s..a.uc.psh....ulcLlspLCL-YclaD.pl.WNulLp..+lltFsM..................................l.shLpplLp.slo.sh.pLhpl.s..hspuWppllphPhhsAs.s.s.pQ.tth..cshlhl.tCPl..sLshhthApphhphphsthA.usLh.hspspcpppplpthlt.p....lhpQl.-h.phG.lshh.p.h...................................................................................... 1 24 30 55 +10326 PF10494 Stk19 Serine-threonine protein kinase 19 Buljan M, Coggill P anon TreeFam_TF105332 Family This serine-threonine protein kinase number 19 is expressed from the MHC and predominantly in the nucleus. Protein kinases are involved in signal transduction pathways and play fundamental roles in the regulation of cell functions. This is a novel Ser/Thr protein kinase, that has Mn2+-dependent protein kinase activity that phosphorylates alpha -casein at Ser/Thr residues and histone at Ser residues. It can be covalently modified by the reactive ATP analogue 5'-p-fluorosulfonylbenzoyladenosine in the absence of ATP, and this modification is prevented in the presence of 1 mM ATP, indicating that the kinase domain of is capable of binding ATP [1]. 21.00 21.00 21.00 21.60 20.40 20.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.77 0.70 -4.98 22 201 2009-01-15 18:05:59 2008-02-22 10:40:10 4 3 164 0 134 203 0 230.00 26 72.35 CHANGED h+sshhcplsh.hsshs.pphspllphRps.lPs.llshsplau.....lh..pssThV-+pl.pLl+pGtlR+hhlss..............tthhhhhcshctllppsshlps...................hp..................................c+Flphlpppssshhhs....pphFsttp............hstLlpsGh.Ls..........................sthssss...............................a.lSlPssGsal+LlsuGRpaLlshLp+...s+a+Ehh.spLpc+W.Ghs.........................+hp.hYGhshcWhLp-slGAGllElFsT.slG.uhRh ....................................h.............tshp.hh.h..hthhps.lPP..llhhuplau............ll..ps.ThV-RpltpLhcpGtlRhhtlst...........................th.Dhcsllh.ppshhpp..........................hhp................................................................h..........pcFhphlh.s..s.t..s.shs......pphhotpp.........................lstLlpAGh.Lsh...........................................p..csts......................................................................ahlulPshG.al+hhspGRptllshlp+...s+a+Eh.hs.LhpR...............................+h.thhtlshpahlc-hlGuull-shpT.ssGhhlRh...................... 1 37 71 105 +10327 PF10495 PACT_coil_coil Pericentrin-AKAP-450 domain of centrosomal targeting protein Buljan M, Coggill P anon TF105408 Domain This domain is a coiled-coil region close to the C-terminus of centrosomal proteins that is directly responsible for recruiting AKAP-450 and pericentrin to the centrosome. Hence the suggested name for this region is a PACT domain (pericentrin-AKAP-450 centrosomal targeting). This domain is also present at the C-terminus of coiled-coil proteins from Drosophila and S. pombe, and that from the Drosophila protein is sufficient for targeting to the centrosome in mammalian cells. The function of these proteins is unknown but they seem good candidates for having a centrosomal or spindle pole body location. The final 22 residues of this domain in AKAP-450 appear specifically to be a calmodulin-binding domain indicating that this member at least is likely to contribute to centrosome assembly [1]. 25.00 25.00 25.50 27.30 23.00 23.00 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.47 0.72 -4.05 18 211 2009-01-15 18:05:59 2008-02-22 11:00:32 4 5 142 0 139 223 0 80.90 41 3.77 CHANGED +hhRpEoFR+uLsaQK+Yl.LhlssaptCppssLphlscM............GspPs.......tphpcs+p+a+oslpsllAlhR........................M+hlsccWp ...........+YLRsESFRKALlY.QK+YLLL..hl..suaQpCEpssLphlAch.....................................Gst.Ps.......htpp.cshs+FRoAlpsslAlsR........................h+als+cWp.................................... 0 33 51 93 +10328 PF10496 Syntaxin-18_N SNARE-complex protein Syntaxin-18 N-terminus Buljan M, Coggill P anon TreeFam_TF105868 Domain This is the conserved N-terminal of Syntaxin-18. Syntaxin-18 is found in the SNARE complex of the endoplasmic reticulum and functions in the trafficking between the ER intermediate compartment and the cis-Golgi vesicle. In particular, the N-terminal region is important for the formation of ER aggregates [1]. More specifically, syntaxin-18 is involved in endoplasmic reticulum-mediated phagocytosis, presumably by regulating the specific and direct fusion of the ER with the plasma or phagosomal membranes [2]. 21.20 21.20 21.20 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.86 0.72 -4.15 33 245 2009-01-15 18:05:59 2008-02-22 11:01:51 4 3 213 0 167 252 0 81.90 26 24.35 CHANGED sDlTslFpp.Vtpsp......pp.ss.s............tptthtph.phpDpFlKEAhclhppIspLpsaLpplRpsYlshs......tsphopspcDph ....................DlT.hF+t.Vtthc.....................ptttt.s..................tp...htps....tspssFhccAhclhspIspLcpaLhphRpsYls...h.p.h.s.htp...hoctpcs................................ 0 43 84 132 +10329 PF10497 zf-4CXXC_R1 Zinc-finger domain of monoamine-oxidase A repressor R1 Buljan M, Coggill P anon TreeFam_TF101076 Domain R1 is a transcription factor repressor that inhibits monoamine oxidase A gene expression. This domain is a four-CXXC zinc finger putative DNA-binding domain found at the C-terminal end of R1. The domain carries 12 cysteines of which four pairs are of the CXXC type [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.76 0.72 -3.90 13 365 2009-09-11 09:20:48 2008-02-22 11:03:32 4 20 145 0 233 361 3 98.80 40 16.64 CHANGED IYDshpGpoCHQCRQKTh............Dh+ssC.....psppC...hp..Gp.........................FCGcCLhsRYGEcscEshtssc.WhCPhCRG........ICNCS...aCR+K+GhsPTGlLh+pA+tpGasSVtcaLh ...............................................................................hYs...GpoCHQCRQ.KTh............-t+.s...C................c...s..spC....tsp.....sp..................................FCssCLhsR.YG.E.c........lccs...h.............h..s......s........s...........WhCP.CRG...........ICNCS................hCR++....c....Gh...ss...TGhhhhhsp..GatsVtthL.......................... 0 60 121 173 +10330 PF10498 IFT57 Intra-flagellar transport protein 57 Buljan M, Coggill P anon TreeFam_TF106156 Family Eukaryotic cilia and flagella are specialised organelles found at the periphery of cells of diverse organisms. Intra-flagellar transport (IFT) is required for the assembly and maintenance of eukaryotic cilia and flagella, and consists of the bidirectional movement of large protein particles between the base and the distal tip of the organelle. IFT particles contain multiple copies of two distinct protein complexes, A and B, which contain at least 6 and 11 protein subunits. IFT57 is part of complex B but is not, however, required for the core subunits to stay associated [1]. This protein is known as Huntington-interacting protein-1 in humans. 40.00 40.00 45.40 49.90 39.80 39.10 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.03 0.70 -5.71 18 178 2009-01-15 18:05:59 2008-02-22 11:11:11 4 3 124 0 127 178 4 298.40 38 81.42 CHANGED ME-Ll-KLKLLsYEccahcptp..hKPLoRaYFu....hssNPuEQFahFosLssWLhs..psG.psh-pPpEaD..DPNuTssNILs-L+shG..hss-FsPsKL+sGsGEtVshlLssLu-puLpppsF...sa++Ppasp.......-c..s-pcsh..-DDs-hhhpcl---...-sp-p--.............hhp.th.....s.tttspsptllcSplDstEWplElERVhPpL..Klpl+sDs.+DWRsHl-phpphpcslppthspspthLc+.....lps-ls.psLE+lpoREchlNsQlpphlp-.a+ptppphsphpcphpphstulpphpttLsclspcL-plKpch-E+ssshoDsuPlhplKpuls+L+pEIppMsl+IGVlp+olh .....................................................-plh-KL+lLpY-..pphh.t..p........h+s.s+aYFs.....ssN.....suEQFahFstLssWLhp....ptG.p.hptPp.E......aD...DPN.sshspIlttL+phs..hss-FsPsKL+pGhG-tshhlLstLuppALphh.ta...pap+.s.hs........--......p.tsh...-D-sE.h.hhpcl--ph...ttpcs--............p..hhp..t.........th.ttptptppl..hpopsDst-WplElERVhPpL........KVp...l+.s...D......s..+DWRhHl-QMcphpssIpsth..pcscs.Lc+.....Lps-lscsLEKItoREKalNs..QLcphlpc.Y+shptpLupspcpappsssslscpop.Lsc....ls-cLEplKpch-E+GsshoDu.uPllpIKpuls+L+pEhhpMsl+IGllppsl.............................. 0 62 73 104 +10332 PF10500 SR-25 Nuclear RNA-splicing-associated protein Buljan M, Coggill P anon TreeFam_TF105478 Family SR-25, otherwise known as ADP-ribosylation factor-like factor 6-interacting protein 4, is expressed in virtually all tissues. At the N-terminus there is a repeat of serine-arginine (SR repeat), and towards the middle of the protein there are clusters of both serines and of basic amino acids. The presence of many nuclear localisation signals strongly implies that this is a nuclear protein that may contribute to RNA splicing [1]. SR-25 is also implicated, along with heat-shock-protein-27, as a mediator in the Rac1 (GTPase ras-related C3 botulinum toxin substrate 1) signalling pathway [2]. 55.00 55.00 55.50 55.40 44.10 50.70 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.98 0.70 -4.88 6 90 2009-01-15 18:05:59 2008-02-22 16:23:21 4 2 66 0 53 97 1 186.70 48 80.28 CHANGED spS+S+SpShsc...ptpcp+c+.......pp.psppR.pptt......................pcshKKt++Rt+.........ppSSSSSSSoSsSsSppp.ppcp+K+Rtt+KpK..t+KcKKct++.cKcupcct........E.ttlscAhsts.Lchhpppst.pt.sP.............shTDEQKu+lpsh+PhTKEEY-ARQSVIR+VVDPETGRTRLIKGDGElLEEIVSKERH+EINK.............QATRGDGttFQtRs.Ghh ..............................................t.................................................................................................pt...tptpp.p..t............up.sSSo.SS..S......SoS...S...S.....sS...s......p...p..spppt+KK+t..KcK........c.K............p.c+K.cKK+....c..KKhKK+...sKc+t..........ptpt.......sc....s.h..Pssslct.hpcps..tpcp.s..sP................lhTDEQKSRlpAM+PMTKEEW-ARQSlIR+VVDPETGRTR..LI.KGDGEVLEE...I.VoKERH+EINK............................QAT+GDGhhFQh+s.sh.h......... 0 15 20 36 +10333 PF10501 Ribosomal_L50 Ribosomal_S39; Ribosomal subunit 39S Buljan M, Coggill P anon TreeFam_TF105895 Domain The 39S ribosomal protein appears to be a subunit of one of the larger mitochondrial 66S or 70S units [1]. Under conditions of ethanol-stress in rats the larger subunit is largely dissociated into its smaller components [2]. In E. coli, in the absence of the enzyme pseudouridine synthase (RluD) synthase, there is an accumulation of 50S and 30S subunits and the appearance of abnormal particles (62S and 39S), with concomitant loss of 70S ribosomes [2]. 25.10 25.10 25.10 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.29 0.71 -4.10 30 196 2012-10-02 01:16:24 2008-02-22 16:43:46 4 3 179 0 144 199 2 108.30 23 36.59 CHANGED pphssssplpph..l.ppstppsht...........................s.s.sahphsLsD.phKFtFlKphppphG+hlPDstlpplpTlpclhpah.......psss...cscp.sctl..ptcthshP.NVplhpct ..................................................................p..h.sspphpph..l............phhpphht.......................................s.sa.phsL.p.D.phKFphlppltpphG..HhlPsspLpph....pol............pclhsah........spss...pspphh-tl....pthsLP.Nl+lh.c......................... 0 28 59 106 +10334 PF10502 Peptidase_S26 Signal peptidase, peptidase S26 Mistry J. Rawlings N anon Manual Domain This is a family of membrane signal serine endopeptidases which function in the processing of newly-synthesised secreted proteins. Peptidase S26 removes the hydrophobic, N-terminal, signal peptides as proteins are translocated across membranes. The active site residues take the form of a catalytic dyad that is Ser, Lys in subfamily S26A; the Ser is the nucleophile in catalysis, and the Lys is the general base. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.88 0.71 -4.65 32 3046 2012-10-02 16:34:55 2008-02-25 12:55:21 4 13 2357 14 823 4403 991 93.00 32 34.94 CHANGED lhhNsosShPlGLYplpshsp........hpsGclVhsssPpssAhhhA.pRGYLspGl......PLlK+VhAlsGpcVshsssh.lsl-G..lshupshtpDphGRsLPsap.tstslssGElFLhu.ssssSaDuRY..FGPl..ssSul..lGhApPlaT .......................................................................................................s.......................................................................................................................................................................................................................................................................................................................................................................................................................................................p.......h...........h......VPtGp.Y.FhM........G.D.....N.....R..s..s....St.DS........R......Y.........a......G.....h.....V..P.cpsllG+A.h.lh.h............................................... 0 221 509 690 +10335 PF10503 Esterase_phd Esterase PHB depolymerase Mistry J, Fushinobu S anon Manual Family This family of proteins include acetyl xylan esterases (AXE), feruloyl esterases (FAE), and poly(3-hydroxybutyrate) (PHB) depolymerases. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.53 0.70 -5.10 2 1139 2012-10-03 11:45:05 2008-02-25 13:32:45 4 32 653 0 468 2296 946 182.10 23 48.03 CHANGED LAYtLYlPP.....Gt.R...PhVVMLHGCpQps--FAtGTRMNhLAcchGhsVLYPEQo.RApsptCWpWac.pppptGRGEssslAuLsculsstHthDsuRVYVAGLSAGuuhAslluhpaPDhFAAVulHSG.shGpAssshoAhssMRpG.t.sPussssAhssA.....hPsllhpGDuDpsVts+NA-pLssthhphsGhusupGA.tuu.RVpptpoG ............................................................................................h....hal.P..s..................s...h....t......s...h..s..Ll.V.h.LH..G.........C.....t...t........s....u.....p....s......h.....t...p.........s......o.....t......h....s.........p.........l...A....-...p....t...s......a.l......V......l....Y...P........p....t......s............t.....t........s.......t............s..........p...........t....C.......a......s.........h........h.s..............s........s.....p.......p.....p......s.......t.......s......-.....s.............s..............h.........l......t...s.......h.....l....p.....t...l...h...p......p...a....s.........l...D.......s.s..R..V..a..ls..Gh...SuGu..hM.s..s.h..hu..s.........p..........a.......P.-......l........F....A....A.......s....u..........h....h......u...........G.....h.......s......h.....s......s.....h..................s....................................................................................................................................................................................................................................................................................................................................................................tsttsts.................................................................................................................................................. 0 165 303 401 +10336 PF10504 DUF2452 Protein of unknown function (DUF2452) Buljan M, Coggill P anon TreeFam_TF105992 Family This protein is found in eukaryotes but its function is unknown. 20.70 20.70 21.50 21.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.96 0.71 -4.96 10 150 2009-01-15 18:05:59 2008-02-26 12:47:45 4 1 134 0 83 166 179 138.60 36 80.19 CHANGED spVsLVEsNPsP.sGhplVSPYpouRhu-PhDLVsLApplQcADphI+sNACsKLTVIAEQIRaLQEQARKVLEEu+RDtDLHHAACNFsK+PGpIYaLYcR-SGQpYFSMLSPcEWGs.SCPHpFlGuY+LEHDhSWTPhE-lEcpDsclchs-+Llspss ....................................................t....................ss..sthh..s...DlltLApplppADp.hl+sssss..KLp.lIs-QlphLQpQA+clLc...-uccstcLHpusCNFhKtPGplYaLYpRp....s....GppYFShlSPcEWGs..ss.s.p..c.alGuY+LptDhSW..T..Ph-clpppptphthhpphh...s.............................. 0 30 42 62 +10337 PF10505 NARG2_C NMDA receptor-regulated gene protein 2 C-terminus Buljan M, Coggill P, Berhardt R anon TreeFam_TF106272 Domain The transition of neuronal cells from pre-cursor to mature state is regulated by the N-methyl-d-aspartate (NMDA) receptor, a glutamate-gated ion channel that is permeable to Ca2+. NMDA receptors probably mediate this activity by permitting expression of NARG2. NARG2 is transiently expressed, being a regulatory protein that is present in the nucleus of dividing cells and then down-regulated as progenitors exit the cell cycle and begin to differentiate. NARG2 contains repeats of (S/T)PXX, (11 in mouse , six in human), a putative DNA-binding motif that is found in many gene-regulatory proteins including Kruppel, Hunchback and Antennapedi [1]. This C-terminal domain belongs to the PD-(D/E)XK nuclease superfamily [2]. 20.70 20.70 20.90 21.60 20.40 19.60 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.32 0.70 -5.16 5 96 2012-10-11 20:44:46 2008-02-26 12:56:43 4 4 76 0 65 111 1 201.00 35 25.27 CHANGED --cS-hlsPsNuNoshpLWoLQpts-.......DaplhlRhSlsslc...cocupcslts+F..l.l.lKLEYQs-aGsEtMSKSELl+tWscphL+suohuasuRISA+TaclhLcEKLTLtsLEcpL+caYsoShsNLLoHLhphLKlLsulPsGDYLLpHus..KDKFLLslhosDspsTPsSasLH.l.pTcoussDpPuloussWlPIDPoLlsphHcEpsLLPCoFPs .........................................................ppsphhts.pcuNhsYpLaoLp......................-lhLLVRsS......lptlc...csppccpl.pp.ph...slhlhPKlEYQssa.Gs.EsLTcSELs+hWspohL+ssspha.su+IsuhTu+lhhlEclo.ppL...ccp...lsshp...s..shhslLpcl..........LK....pL....s..uLptGs.YLLpHss...cDs.lllhKss-......p....s.o..csu....asLa....pscss.ssh.s.s..h...p.ssWlPlDPslhhshHhcptRlPCoFPs............................................. 0 20 25 43 +10338 PF10506 MCC-bdg_PDZ PDZ domain of MCC-2 bdg protein for Usher syndrome Buljan M, Coggill P anon TreeFam_TF105384 Domain The protein has a high homology to the tumour suppressor MCC (mutated in colon cancer; or MCC1 hereafter) and was named MCC2. MCC2 protein binds the first PDZ domain of AIE-75 with its C-terminal amino acids -DTFL. A possible role of MCC2 as a tumor suppressor has been put forward. The carboxyl terminus of the predicted protein was DTFL which matched the consensus motif X-S/T-X-phi (phi: hydrophobic amino acid residue) for binding to the PDZ domain of AIE-75. 21.50 21.50 23.80 22.30 21.30 19.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.99 0.72 -4.14 7 229 2009-01-15 18:05:59 2008-02-26 13:21:28 4 6 60 0 113 229 0 64.60 36 16.56 CHANGED ccLpuplEcLcshNchLstsLpthKtpsEphsh.lGctEupAsAL..+LALpapp+shEsashhhAlht ......c+LpuplEcLcStschLsts.E-p+ppstchshhlt+t-ushsAh.....+Lth..papp+scEh.shLhAlhE...... 0 20 28 55 +10339 PF10507 DUF2453 Protein of unknown function (DUF2453) Buljan M, Coggill P anon TreeFam_TF105823 Family Some members of this family are purported to contain GAF domains but this could not be confirmed. The function is not known. It is likely to be a transmembrane protein. 25.00 25.00 25.00 25.60 23.70 23.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.34 0.72 -4.26 16 194 2009-01-15 18:05:59 2008-02-26 13:33:18 4 15 115 0 142 191 6 101.10 44 34.68 CHANGED hsuulPFlGFGFLDNulMIlAG-hIDpslGhhLGlSTMAAAALGNllSslhGlthtshlEphsp+ls.hphPsLT.tQhc.ptsphusplGsslGlhlGClLGMhPLLFhss .................hsulPFlGFGFlDNhIMIlAG-hI-holGhhhulSTMA..AAALGNllSDlsGlshush.lEths.p+l.......G..hph...........P....sLostQhc.....h.........hps+husphGpslGlslGClLGMhPLlFht.s.......................... 0 65 83 122 +10340 PF10508 Proteasom_PSMB Proteasom_PSMB5; Proteasome non-ATPase 26S subunit Buljan M, Coggill P anon TreeFam_TF106231 Family The 26S proteasome, a eukaryotic ATP-dependent, dumb-bell shaped, protease complex with a molecular mass of approx 20kDa consists of a central 20S proteasome,functioning as a catalytic machine, and two large V-shaped terminal modules, having possible regulatory roles,composed of multiple subunits of 25- 110 kDa attached to the central portion in opposite orientations. It is responsible for degradation of abnormal intracellular proteins, including oxidatively damaged proteins, and may play a role as a component of a cellular anti-oxidative system. Expression of catalytic core subunits including PSMB5 and peptidase activities of the proteasome were elevated following incubation with 3-methylcholanthrene. The 20S proteasome comprises a cylindrical stack of four rings, two outer rings formed by seven alpha-subunits (alpha1-alpha7) and two inner rings of seven beta-subunits (beta1-beta7). Two outer rings of alpha subunits maintain structure, while the central beta rings contain the proteolytic active core subunits beta1 (PSMB6), beta2 (PSMB7), and beta5 (PSMB5). Expression of PSMB5 can be altered by chemical reactants, such as 3-methylcholanthrene [1]. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.55 0.70 -6.23 9 249 2012-10-11 20:01:02 2008-02-27 11:21:59 4 12 149 0 143 379 6 365.10 21 83.06 CHANGED htshhpchlpplpst...phh-pLsthpsslstps.hptlspth.........lassLs..os....p.-ph.lthplLcplltshsscsh...hsphpshLpcGLspssssl+thshhp....ltRhlpppsushphlsspslh.hlhhsltspchssuptAhphLsplsppphthctl.......pspLpslhpp.s-hlRhRlY-lslcluuhSssshshptp...lhcplLpElps.-DVLlphssl-llscLAp.spaGhpaLtppulh-plsshlptscpDP...hushhlsGhh.....+FastlAsh.ss.plhpsaPphhpplh...phhsstD.shhssAhDolGhlutsl-GKphL....+sssshcphltthusttp.hshch+hphLpulsslh..tppptpppl........shscsWYpphuppshp....lhhsllppPFPEl+suuhchhpslssasWslpt.hlsssGFl-alLDcpoEpsK-hc.tKapll+plsp..p....usslhucsphl....+LpcYlp.GPaahps.sssAsp..ss- .....................................................................................................................................................................................................................l..hh..................ph.........l.hs.L..p..sp...tlp.hsh.t.......ltph....pp...t..t....s.....ht..hh.s...p......l....h...ll.hl.tt...php.l.sptA..hphltpls.t..p.t.............th.p...lh....h......htpLtpl.h.tp...ss....h.h.R.......h.R..lhp................l....lspl.sphS.......p...h.p.hhhp.t..s..l..ls.llp....plps..cDhLl...p...hssh-hlspL.u....ppps............h....pal...p.p.slhptl....sshl.t..ptss.............tt.hhl..uhh.........................................t..h.............h..h.......h.............h.......h................s..........u.hp.shu...h.s.ts..puh.hl...........h...h..h....t.........t...hpshs.h.........................................................hht.ht......................h.t.....pps..-hp.s.hthh..h.........h............h.......t........t.hhphhhs.t.t...............h+hthh..h..............................h..........thp.hht.G.a................................................................................................................................................ 1 59 77 114 +10341 PF10509 GalKase_gal_bdg Galactokinase galactose-binding signature Finn R, Coggill P anon PROSITE_PS00106, Pfam-B_2277 (release 22.0) Domain This is the highly conserved galactokinase signature sequence which appears to be present in all galactokinases irrespective of how many other ATP binding sites, etc that they carry [1]. The function of this domain appears to be to bind galactose [2], and the domain is normally at the N-terminus of the enzymes, EC:2.7.1.6 [3]. This domain is associated with the families GHMP_kinases_C, Pfam:PF08544 and GHMP_kinases_N, Pfam:PF00288. 20.20 20.20 20.20 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.79 0.72 -4.69 183 3091 2009-01-15 18:05:59 2008-02-27 17:35:22 4 16 2627 25 790 2189 438 51.10 45 12.61 CHANGED tptFpptF....upp..sp..h.hhpAPGRVNLIGEHTDYNsGhVLPhAIshsshsu.sphp ..................p.phFtphF.....G.t......ss......h.shtAPG.......RVNLI.......G.EHTDYNsGaVlPsAI.shsThsust.R............ 0 262 473 658 +10342 PF10510 PIG-S Phosphatidylinositol-glycan biosynthesis class S protein Buljan M, Coggill P anon TreeFam_TF105857 Family PIG-S is one of several key, core, components of the glycosylphosphatidylinositol (GPI) trans-amidase complex that mediates GPI anchoring in the endoplasmic reticulum. Anchoring occurs when a protein's C-terminal GPI attachment signal peptide is replaced with a pre-assembled GPI [1]. Mammalian GPITransamidase consists of at least five components: Gaa1, Gpi8, PIG-S, PIG-T, and PIG-U, all five of which are required for function. It is possible that Gaa1, Gpi8, PIG-S, and PIG-T form a tightly associated core that is only weakly associated with PIG-U. The exact function of PIG-S is unclear [2]. 20.40 20.40 21.50 20.60 19.30 20.30 hmmbuild -o /dev/null HMM SEED 517 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.50 0.70 -5.83 32 374 2009-09-10 22:53:53 2008-02-27 17:48:41 4 11 269 0 266 360 2 405.30 24 84.23 CHANGED luahhl..hlhl.......................GlPlWa+TTol...YRAsLPhpplpshspth.........................psplphslslhlpssp...........h.ssttcpl.pplpctlss.....p...t.taplplphhpts.pppppht....t.........................h..tt..th.phphp.tspph.lhhspssl..sp.....lsthlushLh...........tplhptchtplsplhsstshttsp........................................ulpaussYcl.sFSLLssssp..shs..WDI-........sulcc.YhpPllptLuslsNFol-oQl.Yaushs...hpshhspt.t............................................................................................................aslsps-LuphINssEhsLsss...hspsPs............lNFllYl.......Pss..ppuPlhlppps..............ssuFllPQWGGVhlhN.s..........tpp.sh.......lscptLtssh.sFspQLhpLLGlspsss.......................ssshcl-sLhRhpshpsltpussTLtSLs+LscslspIsIPcpVtspVpp..ulptlpp.uhctLpts.......phppALthuppAhshuE+AFF-t.sMltQhYFPsEHKhAVYlPLhuPlslsllhull+.hKchppcp+p .................................................................................................................................................................hhhlhhhlGlPhWapoTph...aRs.LPhpth.tht...................................h....h.l.l...t...................tptl...t..h.p.hp.....................hphph...........t...............................................................t.....t........h...hhhs...tt.....t...............ls.hhtt.lh..........................hht....th.h..hh...s.t...t.....................................................................hsht.s.thcl.shoLhssssp.....p...W-lp...................t.slpp.alpPhlp..tlusls..NF.o....lcoQl.has.hs.......hpsp.........t............................................................................................................hh.l...p..psLsth...l.ss...sE.h..Lsss..........hs.ts.s.................................................................lpallal..........P.....tpsPlhlt....ttp........................ssuah.P......pWGulhlhN.s................tt..h.................hsp.th....hh.hFh.s..pLh.Lh...Ghsps........................................h.hclstlhhhpshp.lhpussTLtSLspLh.tpls..phsI.ppluppV.p.......ulttlpt.shp.ltts..........................................htpuh.h.up.A.t.uEpAFF-..ohlt.hYFPp-pKhAlYlPLhhPhslsllhuhhp.hhth................................................................................................ 0 103 157 222 +10343 PF10511 Cementoin Trappin protein transglutaminase binding domain Finn R, Coggill P anon PROSITE_PS00313 Family Trappin-2, itself a protease inhibitor, has this unique N-terminal domain that enables it to become cross-linked to extracellular matrix proteins by transglutaminase [1]. This domain contains several repeated motifs with the the consensus sequence Gly-Gln-Asp-Pro-Val-Lys, and these together can anchor the whole molecule to extracellular matrix proteins, such as laminin, fibronectin, beta-crystallin, collagen IV, fibrinogen, and elastin, by transglutaminase-catalysed cross-links. The whole domain is rich in glutamine and lysine, thus allowing and transglutaminase(s) to catalyse the formation of an intermolecular epsilon-(gamma-glutamyl)lysine isopeptide bond [2]. Cementoin is associated with the WAP family, Pfam:PF00095, at the C-terminus. 22.30 22.30 22.30 22.30 21.40 22.10 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.26 0.73 -6.52 0.73 -3.99 21 190 2009-01-15 18:05:59 2008-02-28 13:12:49 4 14 25 1 41 164 0 16.90 61 22.81 CHANGED GQDsVKGph.lKGQDss .GQDsVKGpsPVKGQDss... 0 2 2 2 +10344 PF10512 Borealin Cell division cycle-associated protein 8 Buljan M, Coggill P anon TreeFam_TF101077 Family The chromosomal passenger complex of Aurora B kinase, INCENP, and Survivin has essential regulatory roles at centromeres and the central spindle in mitosis. Borealin is also a member of the complex. Approximately half of Aurora B in mitotic cells is complexed with INCENP, Borealin, and Survivin. Depletion of Borealin by RNA interference delays mitotic progression and results in kinetochore-spindle mis-attachments and an increase in bipolar spindles associated with ectopic asters [1]. 19.50 19.50 19.70 23.70 19.00 19.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.66 0.71 -4.17 9 119 2009-01-15 18:05:59 2008-02-28 14:20:31 4 3 68 2 73 116 0 115.60 33 42.21 CHANGED RhSRuphtTPhsup...htsShs+.hshhTP.......+hsspss.hshLRpsphuEslYSho..GSPl.Ass...c.hslslPItsG....cshcL.As-l-oh.lppLDscsLppIKpLpspLspIssphc ................................................S+tthhTPtssp........thS.hs.p.psh...hTP...............+hs..sps..tts.h.L.R...pPtsuEhlhohu..GSPL.sss...............cph.lslPlssG.....csh+L...ss.-.lpph...slt..pLDs....csLspIKpLpspLtplss.h................................. 0 17 20 49 +10345 PF10513 EPL1 Enhancer of polycomb-like Mistry J, Wood V anon Pfam-B_3033 (release 22.0) Family This is a family of EPL1 (Enhancer of polycomb-like) proteins. The EPL1 protein is a member of a histone acetyltransferase complex which is involved in transcriptional activation of selected genes [3]. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.34 0.71 -3.99 83 1066 2009-01-15 18:05:59 2008-02-28 14:26:12 4 25 277 0 649 973 5 150.90 23 16.50 CHANGED hRh++lshp....ptl.lhp..t.c-l.shpt..............................................................................t.t..p.............p..ptplpthhstsp..........................................lPpPshp....hp...hpp.hsst.atpPp.sYl+ap.................................s--hs................tscYDhD-cDptWLcph...........................Npc...............t.shp.................lopppFEhlh-chE+p ......................................................................................................................................................................................................................................................................t....th.s......p.h.lhp....p-h.s.....................................................................................h..tp.......................t..ccp.hptshpttp..........................................................................................................................................lPpPphc...........lp.......hpp..ss....a.p..Pp..pa.h.+ht...ttt................................................st-hs............................tscYDhD-cD...ttW.Lc.h.h..............................................................................Npc.................................................t.h...t................................................lstppFEhlh-clE+........................................................................................................................................................... 0 168 282 466 +10346 PF10514 Bcl-2_BAD Pro-apoptotic Bcl-2 protein, BAD Buljan M, Coggill P anon TreeFam_TF102001 Family BAD is a Bcl-2 homology domain 3 (BH3)-only pro-apoptotic member of the Bcl-2 protein family that is regulated by phosphorylation in response to survival factors [1]. Binding of BAD to mitochondria is thought to be exclusively mediated by its BH3 domain. Membrane localisation of BAD mediates membrane translocation of Bcl-XL. The C-terminal part of BAD is sufficient for membrane binding. There are two segments with differing lipid-binding preferences, LBD1 and LBD2, that are responsible for this binding: (i) LBD1 located in the proximity of the BH3 domain (amino acids 122-131) and (ii) LBD2, the putative C-terminal alpha-helix-5 [2]. Phosphorylation-regulated 14-3-3 protein binding may expose the cholesterol-preferring LBD1 and bury the LBD2, thereby mediating translocation of BAD to raft-like micro-domains [3]. 25.00 25.00 35.70 35.70 23.30 23.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.06 0.71 -4.29 4 66 2009-01-15 18:05:59 2008-02-28 16:01:59 4 2 34 2 24 48 0 140.30 56 92.03 CHANGED MFQIPEFE.SEQEDuSpsDRGLGPS.oGDtP.G.tKpahpAPGLLtphspQQ.GQssusSHHGGsGshEhRSRHSSY.AGsE-DEtM.tE-.uPFRGRSRSAPPNLWAAQRYGRELRRMSDEFcsSF.KGLPRPKSAGTATQMRQSsSWTRhlQSWWsRNLG+GuSsPSQ .........................................................................MFQIs-.-.oEpEDs........SsspRuLuPp.ststs.u.....htsPuhhhtssHpQ.tp..ssupsH+G.Gs.ushEhR..SRH..S.......Sh.uG..s--....DEuh.tEE.uPFRGRSRSAPPNLWAApRYGRELRRMSDEFpsSF.K.G.LP.RPKSAGTAoQMcQSsSWp+hh.uhasRp.ucGtstPs.............................. 0 1 4 9 +10347 PF10515 APP_amyloid beta-amyloid precursor protein C-terminus Finn R, Coggill P anon PROSITE_PS00320, Pfam-B_2082 (release 22.0) Family This is the amyloid, C-terminal, protein of the beta-Amyloid precursor protein (APP) which is a conserved and ubiquitous transmembrane glycoprotein strongly implicated in the pathogenesis of Alzheimer's disease but whose normal biological function is unknown. The C-terminal 100 residues are released and aggregate into amyloid deposits which are strongly implicated in the pathology of Alzheimer's disease plaque-formation. The domain is associated with family A4_EXTRA, Pfam:PF02177, further towards the N-terminus. 19.90 19.90 20.50 20.20 19.20 18.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.50 0.72 -3.71 9 380 2009-01-15 18:05:59 2008-02-28 17:07:07 4 21 99 18 118 341 0 51.60 67 8.22 CHANGED lIllullhl+.R+shusluHGhVEVDss....hoP...EE++LscMQppGYENPTYKaaE .............................................................VIVIoLVML+.K+Q...Yso.I.pHGlVE.VDsh....lTP......EERHLsKMQppGYENPTYKahE........ 0 21 30 66 +10348 PF10516 SHNi-TPR SHNi-TPR Mistry J, Wood V anon Pfam-B_14727 (release 22.0) Repeat SHNi-TPR family members contain a reiterated sequence motif that is an interrupted form of TPR repeat [1]. 20.70 20.70 20.70 20.80 20.60 20.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.33 0.72 -4.71 25 326 2012-10-11 20:01:02 2008-02-29 10:58:00 4 17 229 0 195 330 10 37.60 37 7.83 CHANGED A-sashLGElSLEsEpFsQAspDhcpuLpL+pplhssc ..........Apsa.tLGElSlEuEpaspAlpDappsLslppphh.sc.......... 1 65 104 158 +10349 PF10517 DM13 Electron transfer DM13 Iyer L, Mistry J anon Manual Domain The DM13 domain is a component of a novel electron-transfer system potentially involved in oxidative modification of animal cell-surface proteins [1]. It contains a nearly absolutely conserved cysteine, which could be involved in a redox reaction, either as a naked thiol group or through binding a prosthetic group like heme [1]. 25.00 25.00 25.60 26.40 24.60 24.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.33 0.72 -3.67 66 749 2009-01-15 18:05:59 2008-02-29 15:11:02 4 14 535 0 284 582 71 101.70 27 42.30 CHANGED G....pFpt..pss...HhspGplpl....supphlthp.shss...G...PDh+laLuss......tsptsh.h.psph..lplGsl.Ksa..sshhhslPsslclscassVllWC-pFu.hhuuAphp .........................pFpt..pss.....ct.s.p.G.pspl..........hsucp.hlh.t.sacupt.G...PDhalaLsps........................sshcpt..........tclu....l....ct....ppc..shsLPps.l...c.l..sc..aspVslaCcchps..FGuApl..... 0 108 166 254 +10350 PF10518 TAT_signal TAT (twin-arginine translocation) pathway signal sequence Bateman A anon Bateman A Motif \N 19.90 19.90 19.90 19.90 19.80 19.80 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.85 0.74 -7.47 0.74 -3.49 64 1505 2012-10-02 00:19:25 2008-02-29 15:28:33 4 168 868 0 484 1326 535 25.60 34 5.54 CHANGED phoRRshL+uuuusuussshuussss ....loRRsFLKuuuusuAusuluush............. 0 160 342 422 +10352 PF10520 Kua-UEV1_localn Kua-ubiquitin conjugating enzyme hybrid localisation domain Buljan M, Coggill P anon TreeFam_TF106147 Domain This domain is part of the transcript of the fusion of two genes, the UEV1, an enzymatically inactive variant of the E2 ubiquitin-conjugating enzymes that regulate non-canonical elongation of ubiquitin chains, and Kua, an otherwise unknown gene. UEV1A is a nuclear protein, whereas both Kua and Kua-UEV localise to cytoplasmic structures, indicating that the addition of a Kua domain to UEV confers new biological properties. UEV1-Kua carries the B domain with its characteristic double histidine motif, and it is probably this domain which determines the cytoplasmic localisation. It is postulated that this hybrid transcript could preferentially direct the variant polyubiquitination of substrates closely associated with the cytoplasmic face of the endoplasmic reticulum, possibly, although not necessarily, in conjunction with membrane-bound ubiquitin-conjugating enzymes [1]. 21.70 21.70 24.30 23.40 20.30 19.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.44 0.71 -4.62 21 202 2009-01-15 18:05:59 2008-02-29 16:08:18 4 3 148 0 125 190 17 167.40 46 60.55 CHANGED hlhADFsSGlsHWusDTWGos-pPllGp.aIcsFREHHlcPpsIT+HDFlEsNussshsplssLhhhshphhhpsss.....h.......shps...FlhshshalshTNQlHKWSHthhu.lPshVhhLQchcllLsR+cH+hHHpuPa-shYCIToGWLNhsL-clsFW+phEtllphhTGhpPRucD .....................................lhADFhSGlVHW......uuD...TWGSs-hPllGcsFIRsFREHHlDPouI...TRH....D.FIETNGDNs.....hlslhsLhhhsaphhstss.t....th...h..............shps.Flhshh.lalshTN........QIHKWSHohhu..LPtaVhhLQch+lILPR+HH.R.lHHluPH-oYaCI..T...TGWlNhsL-....ph....tFapthE.llphhTG.pPRspD.................. 1 45 66 98 +10353 PF10521 DUF2454 Protein of unknown function (DUF2454) Mistry J, Wood V anon Pfam-B_82729 (release 22.0) Family A Schizosaccharomyces pombe member of this family is known to interact with Tel2. Tel2 is a component of the TOR complexes [1]. 22.30 22.30 22.30 22.60 22.10 21.80 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.79 0.70 -5.40 29 199 2009-01-15 18:05:59 2008-02-29 16:15:39 4 5 178 0 142 207 0 256.50 20 52.36 CHANGED cllhsLusasss...ppshooppspshsptllp...............sss.hphlpp..........hLhppl+Phhh+s............p..thottG+.............shpP..h.tht.s......t.psWKhssshtshslsWhlh..pthspspls....sphshllPslLsLlDDp...ssshKhpGspLLpplLp.h.psp.........LppTsLssVap-ulhspLh.lPshostspolpLlpssaPsLhsLh.h.......................tsspsp.......phtplschlpcsILsshtthps...............a.pLsphLlppl.thltp.LGhpsstaLpcll.hltp.hlpsPhhsth ........................................................................................h..h.has.....p.sWs...o.ts..tphsttlhp...................pt..tt...htp...........hlhthl+Phh.+p.......................t...............................t..............psWKpps..sht.t.shtWslt..............psspshls.......................pph.hllPs.hLsllDDh...psphKhtGlphLpplltps.tsp........................LtppshspVh.culh.s.pLh........spshpLlphshsslhsLh.h...................................psstt.........c.pphschltphlLss.hthttp...................tlpph.hhpp.ls.hlpp..lGl.hss+aLpclh.hl.t.hlps..h................................................................... 0 37 72 109 +10354 PF10522 RII_binding_1 RII binding domain Bateman A anon Vijayaraghavan et al. Mol Endocrinology 13(5):705-717 (1999) Motif This domain is found is a wide variety of AKAPs (A kinase anchoring proteins) [1]. 14.40 14.40 14.40 14.40 14.30 14.30 hmmbuild -o /dev/null HMM SEED 18 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.55 0.76 -6.60 0.76 -2.98 14 161 2009-01-15 18:05:59 2008-03-03 10:00:48 4 11 27 0 47 185 0 18.00 44 1.91 CHANGED plcphAsplVptVIppAh .pLEatAspLlptlIQpAh... 2 4 4 9 +10355 PF10523 BEN BEN domain Lakshminarayan I, Bateman A anon Lakshminarayan I Domain The BEN domain is found in diverse animal proteins such as BANP/SMAR1, NAC1 and the Drosophila mod(mdg4) isoform C, in the chordopoxvirus virosomal protein E5R and in several proteins of polydnaviruses. Computational analysis suggests that the BEN domain mediates protein-DNA and protein-protein interactions during chromatin organisation and transcription [1]. 21.30 21.30 21.40 21.60 21.20 21.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.34 0.72 -3.92 70 842 2009-01-15 18:05:59 2008-03-03 10:50:14 4 14 114 0 412 780 0 78.60 21 21.22 CHANGED pphsptLlptlFscphhsp..shssphp..................t.sp..tLDsptlphI+phlppthshpp.......phWt.pshptls..phhpstpppt ........t.phsppLLthlFscph.hssp.shsGphpp................ttss+p.tLDspclphl+.....palpthhsspc............p.a..phtppls..phhpp..p..h..................................... 0 71 102 212 +10356 PF10524 NfI_DNAbd_pre-N Nuclear factor I protein pre-N-terminus Finn R, Coggill P anon PROSITE_PS00349 Family The Nuclear factor I (NFI) family of site-specific DNA-binding proteins (also known as CTF or CAAT box transcription factor) functions both in viral DNA replication and in the regulation of gene expression in higher organisms. The N-terminal 200 residues contains the DNA-binding and dimerisation domain, but also has an 8-47 residue highly conserved region 5' of this, whose function is not known. Deletion of the N-terminal 200 amino acids removes the DNA-binding activity, dimerisation-ability and the stimulation of adenovirus DNA replication [1]. 21.50 21.50 22.20 22.90 20.80 19.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.18 0.72 -4.36 7 412 2009-01-15 18:05:59 2008-03-03 11:18:10 4 5 78 0 136 328 0 42.20 88 9.29 CHANGED sh...pQ--htPFlEtLLPaV+A.AYsWFpLQAAKR+aaKcH-K .....s...shhQDEFHPFIEALLPHVRAFAYTWFNLQARKRKYFKKHEK... 0 19 35 75 +10357 PF10525 Engrail_1_C_sig Engrailed homeobox C-terminal signature domain Finn R, Coggill P anon PROSITE_PS00033, Pfam-B_11539 (release 22.0) Domain Engrailed homeobox proteins are characterised by the presence of a conserved region of some 20 amino-acid residues located at the C-terminal of the 'homeobox' domain. This domain of approximately 20 residues forms a kind of a signature pattern for this subfamily of proteins [1]. 21.70 21.70 22.50 21.70 19.90 21.60 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.13 0.72 -7.54 0.72 -4.82 23 236 2009-01-15 18:05:59 2008-03-03 14:14:06 4 1 120 2 106 226 0 30.00 64 10.64 CHANGED AoGs+NsLALpLMAQGLYNHST.sshsccp-sp ....AoGtKNsLALpLMAQGLYNHST.sshppcp-p............... 0 27 36 68 +10360 PF10528 PA14_2 GLEYA domain Linder T, Bateman A anon Linder T Domain This presumed domain is found in fungal adhesins and is related to the PA14 domain. 19.70 19.70 19.70 20.20 19.50 18.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.37 0.71 -4.11 8 132 2012-10-02 01:24:23 2008-03-03 17:31:24 4 25 47 1 112 246 18 116.20 24 15.87 CHANGED shsIVaQhahhs.tT.sTYTLsV.sNsDDlFaGWFGs.KAhSGWSss.......NYDsYAhWp.......GppuhushssssLssGpalPlRFlhANGuuhGuFsFsFsussosslssT........oYsY...TuTC ......................hsl.hp.GYFhsspo.GsYoFsl........p..sDD...hshlhh.Gs......sA......as.s.....hs.ps......................Nhsh..hsh..h.........ssss..s..p.s..sh..shpL..puGhYYPlRl.ha.sNt.s.ssushsh.shpsP.sGshhhs......................t................................... 0 18 59 104 +10361 PF10529 Hist_rich_Ca-bd Histidine-rich Calcium-binding repeat region Finn R, Coggill P anon PROSITE_PS00328 Repeat This is a histidine-rich calcium binding repeat which appears in proteins called histidine-rich-calcium binding proteins (HRC). HRC is a high capacity, low affinity Ca2+-binding protein, residing in the lumen of the sarcoplasmic reticulum. HRC binds directly to triadin. This binding interaction occurs between the histidine-rich region of HRC and multiple clusters of charged amino acids, named as the KEKE motifs, in the lumenal domain of triadin. The region in which this repeat is found in many copies is long and variable but is the acidic region of the protein. There is also a cysteine-rich region further towards the C-terminus [1]. HRC may regulate sarcoplasmic reticular calcium transport and play a critical role in maintaining calcium homeostasis and function in the heart. HRC as a candidate regulator of sarcoplasmic reticular calcium uptake [2]. 17.50 9.00 17.50 9.10 17.40 8.90 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.44 0.73 -6.52 0.73 -3.76 25 173 2009-01-15 18:05:59 2008-03-04 12:26:43 4 10 17 0 51 157 0 15.30 56 11.48 CHANGED HRH..RGHc-E-D-D-D .....HRH..pGHccEED-Dss..... 0 6 6 6 +10362 PF10530 Toxin_35 Toxin with inhibitor cystine knot ICK or Knottin scaffold Finn R, Coggill P anon PROSITE_PS60029 Family Spider toxins of the CSTX family are ion channel toxins containing an inhibitor cystine knot (ICK) structural motif or Knottin scaffold. The four disulfide bonds present in the CSTX spider toxin family are arranged in the following pattern: 1-4, 2-5, 3-8 and 6-7. CSTX-1 is the most important component of C. salei venom in terms of relative abundance and toxicity and therefore is likely to contribute significantly to the overall toxicity of the whole venom. CSTX-1 blocked rat neuronal L-type, but no other types of HVA Cav channels [3]. Interestingly, the omega-toxins from Phoneutria nigriventer venom (another South American species also belonging to the Ctenidae family) are included as they carry the same disulfide bond arrangement. suggested that CSTX-1 may interact with Cav channels. Calcium ion voltage channel heteromultimer containing an L-type pore-forming alpha1-subunit is the most probable candidate for the molecular target of CSTX-1 these toxins [3]. 25.00 25.00 27.80 30.10 20.60 24.70 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.80 0.72 -6.97 0.72 -3.86 6 67 2009-09-11 14:32:54 2008-03-04 12:49:05 4 1 6 0 0 71 0 26.30 76 26.11 CHANGED CIs++cSCppD.++GCC....ahhoCsC CIPKHHECTSN.KHGCCRGshFKYKCQC 0 0 0 0 +10363 PF10531 SLBB SLBB domain Iyer LM, Bateman A anon Iyer LM Domain \N 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.64 0.72 -4.08 139 11758 2012-10-03 10:59:06 2008-03-04 15:21:41 4 110 4407 39 3040 9399 3756 52.50 24 14.22 CHANGED hlslsGp.Vpc.....Pushclth..Gsslp-llph..uGGhstp.ut.......t..........sGshhshs .......................htlsGc.Vpp......P.G.h.h.plsh......G.......h.........s...lt..-hlpt...AG.Ghsps..up..............p...............st......h.............................. 0 1080 2052 2579 +10364 PF10532 Plant_all_beta Plant specific N-all beta domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Babu and colleagues [1]. It is found associated with the WRKY domain Pfam:PF03106. 21.60 21.60 22.00 33.00 21.50 21.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.35 0.71 -4.01 10 22 2009-01-15 18:05:59 2008-03-05 11:41:14 4 4 3 0 1 22 0 105.40 36 16.50 CHANGED pKlulDEossKLKLSYhsh.spsp..REsYIsDDEDVhlYLTss-pEuhRsVLHVEllsc.t.s-ch.EQlshV-R+..SSlGpNasplsstsc-h..csssshhhsE.......ssEsllEs-sp ........pKlulDEsshKL+LSY....spsc..REsYIsDDEDVhlaLTpscpcuhhsVLHVE.lst.t.sEch.EplShs-t+..SSVGhNhtc.ss.p-.h..c.stshh.s-.......tsEsllt.c....................... 0 0 0 0 +10365 PF10533 Plant_zn_clust Plant zinc cluster domain Iyer LM, Bateman A anon Iyer LM Domain This zinc binding domain was identified by Babu and colleagues and found associated with the WRKY domain Pfam:PF03106 [1]. 20.60 20.60 20.90 20.70 16.40 20.10 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.76 0.72 -4.32 21 226 2009-01-15 18:05:59 2008-03-05 11:44:15 4 2 43 0 90 230 0 45.10 53 14.48 CHANGED +++Csttspsputt....suuuu+CHCSK+.RKt..RlKRslRVPAISuKlAD ................+++Cpst.spstsp.........suuuG+CHC..S.K+..R......K...Rl...KRoIRVPAISs.KlAD........ 0 11 54 73 +10366 PF10534 CRIC_ras_sig Connector enhancer of kinase suppressor of ras Finn R, Coggill P anon PROSITE_PS51290 Domain The CRIC - Connector enhancer of kinase suppressor of ras - domain functions as a scaffold in several signal cascades and acts on proliferation, differentiation and apoptosis. 22.00 22.00 22.90 22.30 21.90 21.50 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.04 0.72 -3.93 9 212 2009-01-15 18:05:59 2008-03-05 11:48:43 4 14 49 1 128 176 0 92.00 52 12.49 CHANGED sL+oLspKLpAus+sLQshIpuRh+ssu.-utsopchPschLsuVV-LItAA+uLLuWLsRh.FotloDaoss+.cIhpLChELsssVpccssss .........................NL+oLscKLpAuu+NLQNaI..suRR+...ustYD.G....psS..+..Kh....PNDFLTSVV-LIuAAKuLLA....WLDRsPFsulsD.aSlo+NpllQLCLELTTlVQpDCsV........ 0 10 17 69 +10368 PF10536 PMD Plant mobile domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Babu and colleagues in a variety of transposases [1]. 24.00 24.00 24.10 24.00 23.80 23.90 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.50 0.70 -5.46 44 1219 2009-01-15 18:05:59 2008-03-05 12:39:04 4 78 27 0 764 1162 0 222.00 17 35.00 CHANGED uhuhhtsls....t..phphDps..LlsuLs-+WpsETpTFhhPhGEhTlTLpDVshLhGLsltGp......sVsushssp....hpch.......htchhthp.t.....................................................tphpppss.phs.Wlpphh....thssp.....................hpptAFLlhhluthlFss...pssptlshthh.slstslsp.....ssphuhGsAsLAhLYppLs....pustps............ssslsGshh...LlQlW...saERhthh....RP...............................p.t...h..............................................ps..............Ph..........sthWps...p.....psps.............hppth.........-t.p.sph.WpPY......spshttt....hhst.s.....................ppth......shhp..pss..L.l.............sh.......s......tlEhahPpRVhRQ.FGhtQs.lP......hph......hphhtp...tshtsastphtsh...h..t.tts..sp...........stYhcWatp ............................................................................................................................................................................................hs.....sh.-h..lhth......................................................................................................................................................................................................................................................................h...h............hsht...h......h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 15 235 306 +10369 PF10537 WAC_Acf1_DNA_bd ATP-utilising chromatin assembly and remodelling N-terminal Finn R, Coggill P anon PROSITE_PS51136 Domain ACF (for ATP-utilising chromatin assembly and remodelling factor) is a chromatin-remodelling complex that catalyses the ATP-dependent assembly of periodic nucleosome arrays. The WAC (WSTF/Acf1/cbp146) domain is an approximately 110-residue module present at the N-termini of Acf1-related proteins in a variety of organisms. The DNA-binding region of Acf1 includes the WAC domain, which is necessary for the efficient binding of ACF complex to DNA. 25.00 25.00 27.20 25.50 24.80 24.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.27 0.72 -3.85 28 339 2009-01-15 18:05:59 2008-03-05 13:56:12 4 46 223 0 226 350 0 96.40 36 8.79 CHANGED ccVahlptTpEhFpsY--Yh......pRhshYppchaoCc.............lTG+SsLTahEALcSE..................ccttcplcp.FPptL+pPlLchlp.aspho+l-pLV-claphhKscaFsGEpV .............pVahh..TtEhFpsY-..-..ah......pR......h....h...hsp...........plWoCp.............lTG+..ssLTa.EALcSE........................p.cst.cplpp.FPp.LctPlLchlp.asph.o...pLccL.......s.........-p.l............ath....h+scaasGEpV..................................... 0 67 116 180 +10370 PF10538 ITAM_Cys-rich Immunoreceptor tyrosine-based activation motif Finn R, Coggill P anon PROSITE_ Domain Signal transduction by T and B cell antigen receptors and certain receptors for Ig Fc regions involves a conserved sequence motif, termed an immunoreceptor tyrosine-based activation motif (ITAM). It is also found in the cytoplasmic domain of apoptosis receptor. 25.00 25.00 28.30 27.30 18.30 17.60 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.83 0.72 -6.84 0.72 -4.39 6 300 2009-01-15 18:05:59 2008-03-05 14:30:42 4 5 137 0 0 316 0 24.00 71 2.81 CHANGED +pGCYRTLulFRY+SRCYVGLVWC ....sGCYRTLsLFRYKSRCYIhThWs. 0 0 0 0 +10371 PF10539 Dev_Cell_Death Development and cell death domain Finn R, Coggill P anon PROSITE_PS51222 Domain The DCD domain is found in plant proteins involved in development and cell death. The DCD domain is an approximately 130 amino acid long stretch that contains several mostly invariable motifs. These include a FGLP and a LFL motif at the N-terminus and a PAQV and a PLxE motif towards the C-terminus of the domain. The DCD domain is present in proteins with different architectures. Some of these proteins contain additional recognisable motifs, like the KELCH repeats or the ParB domain. 27.80 27.80 27.80 31.50 27.40 27.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.70 0.71 -4.59 10 245 2009-01-15 18:05:59 2008-03-05 14:41:54 4 20 39 0 127 228 2 119.10 44 24.57 CHANGED LGGsIFsCNssThcECac+pLFGLPu+ahsaV+sIKPGLsLFLFNYss+pLHGIFEAoSpGthNI-spAatspt..s........opaPAQ....VRh+lthpChPLsEspF+ssIhpNYhs.......ss..KFRaELo+sQspcLlcLF ........................uGhIFhCNspThpEshc+pLFGLPtp.htc...V.c.sIcPGhsLFLYsass+pLaGlFEAsS.GGhNI.-PsAapspt.............ppFPAQ....V+hph.ct..Ch..PL...EspF+ssl.....pahs.......t...KFphELshtpshpLhpLF.............................. 0 20 76 103 +10372 PF10540 Membr_traf_MHD Munc13 (mammalian uncoordinated) homology domain Finn R, Coggill P anon PROSITE_PS51259 Domain Munc13 proteins constitute a family of three highly homologous molecules (Munc13-1, Munc13-2 and Munc13-3) with homology to Caenorhabditis elegans unc-13p. Munc13 proteins contain a phorbol ester-binding C1 domain and two C2 domains, which are Ca2+/phospholipid binding domains. Sequence analyses have uncovered two regions called Munc13 homology domains 1 (MHD1) and 2 (MHD2) that are arranged between two flanking C2 domains. MHD1 and MHD2 domains are present in a wide variety of proteins from Arabidopsis thaliana, C. elegans, Drosophila melanogaster, mouse, rat and human, some of which may function in a Munc13-like manner to regulate membrane trafficking. The MHD1 and MHD2 domains are predicted to be alpha-helical. 29.10 29.10 29.10 29.10 28.50 28.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.37 16 574 2009-01-15 18:05:59 2008-03-05 15:06:33 4 32 217 2 337 493 0 126.90 35 9.55 CHANGED -sslpPLMDaLDspLulhscsh.KpshpRlLp.tLWchVlsplcctl..s..pp........................................................htpp+sLosppsshlphsLpslcpaFHA......pGpGLp.........................hshLp+oschpsLcptLpLaspsT-pLIcpF ......................................................-ssltPLh-a.....LDs.sLslhspshpcss..h.p+VLp.cLW+h..Vlssh-chl...lPshsct..........................................................................................................ppt+sLoscpsshlchsLcsl.p.paFHA......tG..s..G.L..........................hshL.KSschp...........sLchtLpLYspsT-pLIcpa.................................................... 0 83 129 238 +10373 PF10541 KASH Nuclear envelope localisation domain Finn R, Coggill P anon PROSITE_PS51049 Domain The KASH (for Klarsicht/ANC-1/Syne-1 homology) or KLS domain is a highly hydrophobic nuclear envelope localisation domain of approximately 60 amino acids comprising a 20-amino-acid transmembrane region and a 30-35-residue C-terminal region that lies between the inner and the outer nuclear membranes [1]. During meiotic prophase, telomeres cluster to form a bouquet arrangement of chromosomes. SUN and KASH domain proteins form complexes that span both membranes of the nuclear envelope. The KASH domain links the dynein motor complex of the microtubules, through the outer nuclear membrane to the Sad1 domain in the inner nuclear membrane which then interacts with the bouquet proteins Bqt1 and Bqt2 that are complexed with Bqt4, Rap1 and Taz1 and attached to the telomere [2]. SUN domain-containing proteins are essential for recruiting KASH domain proteins at the outer nuclear membrane, and KASH domains provide a generic NE tethering device for functionally distinct proteins whose cytoplasmic domains mediate nuclear positioning, maintain physical connections with other cellular organelles, and possibly even influence chromosome dynamics [3]. 23.00 23.00 23.60 26.70 22.60 22.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.03 0.72 -4.12 20 319 2009-12-22 14:12:53 2008-03-05 15:18:45 4 33 80 2 156 310 0 56.40 58 1.68 CHANGED altRlhRsALPlQ.LLLLLLlslAsLlP.......hpt--aoCshsNNFARSFpPMLRYs......NGPPP.........h ...........alhRVlRA.ALPLQ.LLLLLLlsL.ACLlP........sEEDYSCshuNNFARSFaPMLRYs......NGPPPh... 0 26 37 87 +10374 PF10542 Vitelline_membr Vitelline membrane cysteine-rich region Finn R, Coggill P anon PROSITE_PS51137 Domain In Drosophila melanogaster the vitelline membrane (VM) is the first layer of the eggshell produced by the follicular epithelium. It is composed of at least four different proteins. VM proteins are similarly organised with a central highly conserved 38-amino acid domain which is flanked by unrelated regions. The domain contains three highly conserved cysteines. 20.30 20.30 20.80 26.10 17.00 16.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.06 0.72 -4.33 2 82 2009-01-15 18:05:59 2008-03-05 16:08:28 4 1 15 0 37 86 0 36.90 75 25.87 CHANGED uh.APPCPpNYLFSCQPNLAPsPCut.APuYGSAGAYo .....SIPAPPCPKNYLFSCQPsLsPVPCS...A.P..A..s.SYGS.AGAYS..... 0 5 5 21 +10375 PF10543 ORF6N ORF6N domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.73 0.72 -3.95 60 807 2009-01-15 18:05:59 2008-03-05 16:34:40 4 9 514 0 115 705 44 86.90 32 37.57 CHANGED p.plhclRGp+VhhsppLAclYss-ocplppshpRNt.......cRFs.scahFpLstcEhcp...l+sphs..............................thstpstt.hhaTEpGsshLupl ...............t..phhphps.pVhhsppLAclYGs-.spplppshpcNt.......pRFs.scahFpL.....ss..pEhcp....l.+.s.p..hsh.p...................................thut.pttt.hlaTEpGhuhhuph............................... 0 42 87 110 +10376 PF10544 T5orf172 T5orf172 domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.69 0.72 -3.61 204 1694 2012-10-01 19:55:08 2008-03-05 16:40:48 4 26 830 0 930 1844 598 92.50 31 29.87 CHANGED GhlYl.h...........tp.t..st.......hKIGhTps...lpcRlpphp..............sshshphph......hth.............h...sstcl.EphlHpphps...hcl...............cpEaFc.....ls.....hcplcp.....hlc..pht .................................................................GhlYl.l...................ppspshsp............ph..aKIGhT........hs....lppRhcphs....................................................................sclcV.......................................................hhss.Dhpsh....EspLh.cp..Fcc....t..h..h..p.......................................t+EhFc.........Vs..........plpphh....ph........................................................................ 0 747 823 896 +10377 PF10545 MADF_DNA_bdg Alcohol dehydrogenase transcription factor Myb/SANT-like Finn R, Coggill P anon PROSITE_PS51029 Domain The myb/SANT-like domain in Adf-1 (MADF) is an approximately 80-amino-acid module that directs sequence specific DNA binding to a site consisting of multiple tri-nucleotide repeats. The MADF domain is found in one or more copies in eukaryotic and viral proteins and is often associated with the BESS domain. It is likely that the MADF domain is more closely related to the myb/SANT domain than it is to other HTH domains. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.10 0.72 -3.79 116 1496 2012-10-04 14:01:12 2008-03-05 16:45:37 4 95 82 0 954 1699 0 82.30 20 23.59 CHANGED LIphl+ppPsLacppp..pYp......sps................t.+ppsWppluppl...s..hs....s..p.....s+p....+WpsLR...spap+ch.cphp...............tspthtspah....ahcphpFL .............................................LIphhcppPsLas...tpp...p.ap......spp..........................t.+p...psW..p...p...lu...ppl.......s.......hs............s..pp................hpp.+aps..LR.......sp.Yp+ch.+chp..................tttthhsph.....ahcphpFL..................................................................... 0 247 332 748 +10378 PF10546 P63C P63C domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 25.00 25.00 36.80 36.10 24.70 15.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.12 0.72 -4.05 12 137 2009-01-15 18:05:59 2008-03-05 17:02:07 4 6 134 0 16 88 3 89.00 38 30.96 CHANGED Ih-tFlucclpPahKpFPssaYcplFRLpGhpasscssp.RPthhGphTNcllYpRLAP...tlLEEL+cpssp.ttctt+.t+hHQhLTp-IGHPcL ............IhthFlhp-s.psWpKcF..sDshYctLaRhpGls..tcpss.+PhlauhloschIYs.lhP...plhs-lKtptsh.........tpKhHQaLssD.G........ 0 5 8 11 +10379 PF10547 P22_AR_N P22_AR N-terminal domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 25.00 25.00 27.80 27.70 23.50 23.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.66 0.71 -4.28 13 394 2009-01-15 18:05:59 2008-03-05 17:05:07 4 6 279 0 35 249 7 115.70 53 47.21 CHANGED lssp..sVsFpGppL.l..sVppsG..psYsuMKPIVEuhGLuWpSQapKLhpscstsshlcIsh.VuuDGKpRcMlClPL+KLsGWLhoIsPsKVRPElRDKlIpYQcEChcVLa-YWp.pGtA.....R ............hshhsVPFHGss..L.a..lVs.aN..G..EPY.VPMKPlVpGMGL...s...Wp...S...Q...hsKL.+pRFts.sls.E....Is.........h..VA..........p.D......GKpRpMlsLPLRKLsGWLpTI....NP....N...K.V....+.PE.........IR-KVIpYQEECDDVLY-YWT.KGhVhNsR................ 0 8 21 32 +10380 PF10548 P22_AR_C P22AR C-terminal domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. It is found associated with Pfam:PF10547. 21.00 21.00 21.10 21.30 20.80 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.28 0.72 -4.11 16 459 2009-01-15 18:05:59 2008-03-05 17:07:46 4 11 251 0 11 261 0 73.00 45 29.70 CHANGED pcp...ashpFT-pELppLsWLahuhp+spshhpplh.sLctlsSshusplYuhuhEYtpshcps+plLp+hstchc ..............pc+hssphoAcEhsoLVWLWpausRuQsLh+cLYPALKpIpSsYoG+sYDhuaEasYllshAR-VLlpcTcclD...................... 0 2 6 9 +10381 PF10549 ORF11CD3 ORF11CD3 domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 25.00 25.00 26.70 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.72 0.72 -4.24 9 414 2009-09-11 15:21:10 2008-03-05 17:11:40 4 6 250 \N 11 158 0 56.50 43 32.33 CHANGED phohppphNphstEacpcKslAShsG+sLscW.+hcKPhL.AclcphcpchQlplshh .......ShppEhNtsCt-hc+-KslAShsuptLNcW.+hsKPsllu+IcpLtppuQlhlshh.............. 0 3 5 8 +10382 PF10550 Toxin_36 Conantokin-G mollusc-toxin Finn R, Coggill P anon PROSITE_PS60025 Domain The conantokins are a family of neuroactive peptides found in the venoms of fish-hunting cone snails. They possess a high content of gamma-carboxyglutamic acid (Gla) (4-5 residues), a non-standard amino-acid made by the post-translational modification of glutamate (Glu) residue. Conantokins are the only natural biochemically characterised peptides known to be N-methyl-D-aspartate (NMDA) receptor antagonists. 25.00 25.00 26.70 26.00 19.40 18.50 hmmbuild -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.13 0.74 -6.50 0.74 -2.29 3 3 2009-09-10 23:18:32 2008-03-05 17:15:11 4 1 3 1 0 3 0 14.70 76 19.21 CHANGED GEEElQKMspELhRE GEEEVAKMAAELARE 0 0 0 0 +10383 PF10551 MULE MULE transposase domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Babu and colleagues [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.32 0.72 -3.82 126 2654 2012-10-03 01:22:09 2008-03-05 17:40:22 4 142 229 0 1688 6089 517 90.00 18 13.95 CHANGED us.....ah....h...........hhs.......hhGhst......phh....l........uhslh.ss.............Estcs.ap...hhhp.....shp..psh................hlloDtppulhpAlpp..la............Pt..sp..pphChhHlh...csht ................................................................................................hhuhstpt.....phh....sh........u.hs.l.l..ps.......................................Estcs..at...........ahhp..................................................phh....phhst...........p...sh..sllsD.p.pt...u....hhpAlpp..sa............Pp.....sp....pthCh.hHlhpsh.h............................................... 0 642 1072 1371 +10384 PF10552 ORF6C ORF6C domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 21.90 21.90 21.90 21.90 21.60 21.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.26 0.71 -4.36 23 471 2009-01-15 18:05:59 2008-03-06 12:49:20 4 10 383 0 34 350 3 108.30 23 46.84 CHANGED ptltlhhps.cphccclpplcpclpcLc....pshsLhss-scplp+pVpppVsphLGG+tosAYpc..l+pKlapDIapplK+pFsVsoYssI++Kch-pAhchl.ssapssp..shptcIp .................................................h.hhhps..phppphpp.lcpc.......lptLc..................pp.sl...stsptp....plpptlpp+Vh....thhs.Gh.t..s.............sap.........c....hppp...lapp........h.hppl....KctFsV.spYspl.+pca-cAlpal...ppWpPsh..tht.pI................. 0 7 24 29 +10385 PF10553 MSV199 MSV199 domain Iyer LM, Bateman A anon Iyer LM Domain This domain was identified by Iyer and colleagues [1]. 21.10 21.10 21.50 22.80 20.80 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.79 0.71 -4.29 16 37 2009-01-15 18:05:59 2008-03-06 12:58:01 4 10 5 0 0 37 36 134.10 32 36.17 CHANGED hlDIhp...FIctssa-hchh..hhpphWhshhs.......cpplhIosslLpaltYpsp...........app.+cs..FcchLcpNpIpa............................................pclpas-s.lpp..YsplpcEhcpls..sslspp+WlIlpscsFKhhIh+LNTpsuchIRcYYlplEcllp ...................hlDIhpFlchspaclshs..hFs.hW.slss.................ppphhlspslLcWhGYpGc...............hppQ+ps..Fp+hLcpNpIpY............................................pElshp-..lp...YsplpcElptls.ssslspp+alIhcscshKhuIMpLpTKsuchIRcYYlsLEcllp.......................... 0 0 0 0 +10386 PF10554 Phage_ASH Ash protein family Iyer LM, Bateman A anon Iyer LM Family This family was identified by Iyer and colleagues [1]. It includes the Ash protein from bacteriophage P4. 21.80 21.80 21.80 21.80 21.70 21.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.35 0.72 -3.71 19 596 2009-01-15 18:05:59 2008-03-06 14:21:45 4 7 351 0 35 460 0 95.40 36 41.17 CHANGED YospAstKouAGhssPphstApscA....usFh..ssths............................thplM..........VupAGtPpuhPsSh....................loGhusPVphsT.clsss.G..GshhphshEAA..................hMsTl............ohspsph .................................Ysh.AshKsuAG.hts.p.......st.cA......ssFh..shthp.......................................hhp.M..........VupAGt..spshPsSh....................hsGhusP.Vphs.T.p.clssS.G..Gs..hphhhEAA..................hMA.Th.......s.spsp.................................................................................... 0 1 8 20 +10387 PF10555 MraY_sig1 Phospho-N-acetylmuramoyl-pentapeptide-transferase signature 1 Finn R, Coggill P anon PROSITE_PS01347 Domain Phospho-N-acetylmuramoyl-pentapeptide-transferase (EC 2.7.8.13) (mraY) is a bacterial enzyme responsible for the formation of the first lipid intermediate of the cell wall peptidoglycan synthesis. It catalyses the formation of undecaprenyl-pyrophosphoryl-N-acetylmuramoyl-pentapeptide from UDP-MurNAc-pentapeptide and undecaprenyl-phosphate. It is an integral membrane protein with probably ten transmembrane domains. This domain is located at the end of the first cytoplasmic loop and the beginning of the second transmembrane domain. 20.10 11.30 20.10 18.00 20.00 11.10 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.95 0.74 -5.85 0.74 -4.24 64 3122 2009-01-15 18:05:59 2008-03-06 15:12:43 4 4 3072 0 644 1899 1136 13.00 74 3.72 CHANGED KpuTPTMGGllll ..KpGTPTMGGllIL. 0 203 418 541 +10389 PF10557 Cullin_Nedd8 Cullin protein neddylation domain Finn R, Coggill P anon PROSITE_PS01256 Domain This is the neddylation site of cullin proteins which are a family of structurally related proteins containing an evolutionarily conserved cullin domain. With the exception of APC2, each member of the cullin family is modified by Nedd8 and several cullins function in Ubiquitin-dependent proteolysis, a process in which the 26S proteasome recognises and subsequently degrades a target protein tagged with K48-linked poly-ubiquitin chains. Cullins are molecular scaffolds responsible for assembling the ROC1/Rbx1 RING-based E3 ubiquitin ligases, of which several play a direct role in tumorigenesis. Nedd8/Rub1 is a small ubiquitin-like protein, which was originally found to be conjugated to Cdc53, a cullin component of the SCF (Skp1-Cdc53/CUL1-F-box protein) E3 Ub ligase complex in Saccharomyces cerevisiae, and Nedd8 modification has now emerged as a regulatory pathway of fundamental importance for cell cycle control and for embryogenesis in metazoans. The only identified Nedd8 substrates are cullins. Neddylation results in covalent conjugation of a Nedd8 moiety onto a conserved cullin lysine residue [1]. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.12 0.72 -4.15 109 1474 2009-01-15 18:05:59 2008-03-11 09:11:25 4 25 308 28 994 1441 15 66.30 42 8.55 CHANGED lpp-RphhlpAuIVRIMKsRKplsHspLlsElhpplpp...+Ftssss.IK+pIEpLI-+EYlcRs..psts .....................tpDRphtlpA.A....IVRIMKsRKp.lsHspLlsEl..............hpQLpt...........+Fh..Pp...sshI.K.........KpIEsLI-+-YlcRscss..t.................. 0 369 551 808 +10390 PF10558 MTP18 Mitochondrial 18 KDa protein (MTP18) Tolvanen M, Bateman A anon Tolvanen M Family This family of proteins are mitochondrial 18KDa proteins that are often misannotated as carbonic anhydrases. It was shown that knockdown of MTP18 protein results in a cytochrome c release from mitochondria and consequently leads to apoptosis [1]. Overexpression studies suggest that MTP18 is required for mitochondrial fission [2]. 25.00 25.00 61.10 35.40 22.60 22.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.83 0.71 -4.34 24 236 2009-09-11 16:45:10 2008-03-11 10:46:53 4 3 206 0 171 223 16 155.30 37 70.83 CHANGED oslRYAu.h.Rhtp.........uYssDlGEuFRPls.shLV+uuYGVSauYlluDls...cuhKshhcscthhs..............................................................................hpchph.ssscpslaQulAShulPuFTIHolVpaSuhhhpps..+........shh.RpWsPsulGLuslPhLsa.hDcs......V-psl .....................................................h...RYluYss-lGEuFRsls.shlV.......puuYGVuhuYlluDss..pcGhKAhhpspt.hs....................................................................................pchth...ssscphlaQulAShulPuFTIp.plsthSthhhpss......+.............shh..RpWsP..sulGLuslPhl.a.hDcsV-th........................................ 0 59 93 140 +10391 PF10559 Plug_translocon Plug_Sec61p; Plug domain of Sec61p Coggill P anon Finn R Domain The Sec61/SecY translocon mediates translocation of proteins across the membrane and integration of membrane proteins into the lipid bilayer. The structure of the translocon revealed a plug domain blocking the pore on the lumenal side.The plug is unlikely to be important for sealing the translocation pore in yeast but it plays a role in stabilising Sec61p during translocon formation. The domain runs from residues 52-74 [1]. 22.10 22.10 22.70 22.20 21.80 21.30 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.42 0.72 -4.44 68 686 2009-01-15 18:05:59 2008-03-13 11:30:10 4 8 470 12 424 593 89 34.60 52 7.47 CHANGED llYhlhoplPLYGl.....sssssDsFhhhRslh.AuspG ........hIFLVhs.QlPLaGI........hSocouDPFYWhRsIL.ASNRG.. 0 141 242 351 +10393 PF10561 UPF0565 Uncharacterised protein family UPF0565 Coggill P anon UPF0565 Family This family of proteins has no known function. 25.00 25.00 25.10 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.93 0.70 -5.53 12 182 2009-01-15 18:05:59 2008-04-16 15:38:41 4 5 101 0 127 176 0 192.10 24 72.60 CHANGED shpl.slsGhp..pRsNsllhhtPhhp...............spp.ssptllaFsGDhpsa.pp......hppsssssp.hpasLEslAhlLsp+Fsts......HIhVl+sSph..spFSsasNFlpussh............................................GsPcp...o.sthtuhpHLhpLL.shhpclhp...t.ph..p.h......................................................................................................................................................................................s.shtchpLhLlGFSKGCsVLNQllhEhp.......................ht..ssphphhlspIsshYaLDsGaststtsalTppssLcpLupt......ulplalHsTPaQlpDshRsWI++EhcpFlchLcphuhsh ...............................................................................................................................sht..h.hl..th..t.......hhhlcs..h.....t....husapshl.ss..............................................G.s..............u...th...lh.t.........................................................................................................................................................................................................................................................h.lhGFSKGssVlsphhhEht.................................................................th....h.ht.lpthaalDsGhs.t.tsa.s..thlpthsp.........th.hhhHhTPhphts...Rshlt.E.t.h.phLt......s............................... 0 47 64 100 +10394 PF10562 CaM_bdg_C0 Calmodulin-binding domain C0 of NMDA receptor NR1 subunit Coggill P anon Pfam-B_7118 (release 22.0) Domain This is a very short highly conserved domain that is C-terminal to the cytosolic transmembrane region IV of the NMDA-receptor 1. It has been shown to bind Calmodulin-Calcium with high affinity. The ionotropic N-methyl-D-aspartate receptor (NMDAR) is a major source of calcium flux into neurons in the brain and plays a critical role in learning, memory, neural development, and synaptic plasticity. Calmodulin (CaM) regulates NMDARs by binding tightly to the C0 and C1 regions of their NR1 subunit. The conserved tryptophan is considered to be the anchor residue [1]. 20.20 20.20 20.20 23.20 18.10 19.00 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.85 0.72 -7.01 0.72 -4.09 4 144 2009-01-15 18:05:59 2008-04-16 17:43:46 4 12 71 0 48 149 0 28.80 70 3.30 CHANGED IsYK+H+shKcKphELA+pAsD+WRuNIp .IAYKRHKDARRKQMQLAFAAVNVWRKNLQ... 0 11 15 32 +10395 PF10563 CdCA1 Cadmium carbonic anhydrase repeat Bateman A, Coggill P anon Bateman A Repeat This domain is the cadmium carbonic anhydrase repeat unit of the beta-carbonic anhydrase of a marine diatom [1], that uses both zinc and cadmium for catalysis of the reversible hydration of carbon dioxide for use in inorganic carbon acquisition for photosynthesis (thus being a cambialistic enzyme). Compared with alpha- and gamma-carbonic anhydrases that use three histidines to coordinate the zinc-atom, this beta-carbonic anhydrase has two cysteines and one histidine, and rapidly binds cadmium [2]. 20.80 20.80 21.80 30.80 19.70 20.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.41 0.70 -4.83 18 25 2009-01-15 18:05:59 2008-04-16 17:46:55 4 3 10 6 16 27 2 195.00 32 65.05 CHANGED hWpuulsssssahplhs.+hshs-hD.....cssDG.s.ss..sphstP......................................+hLushh.hsspcGhRsp+h.cstsp.ss.................psGcs.pVHhsHSu...hhuCG....Ypp.ahsGhFsshs..................hssasu..GutsltsAsGV...l.slVs..-EpYhY.shlcGhhl-sctp.....................cpp.lssuauslpaplDtpsahIoAuu .............apuuhssVNlHaHlGsEHhShG-aD.....css-G.s.ss..sphsts......................................+hhs..uhtshs...hscGh+shchstssst.ss........YtapaC.thpVGcTYEVHWsHSu...hGACG....Ypp.ahsGVFsshs..................hsshsu.tus...tsltsulGVpuQVaslVNt.-E..p.hhY.shlcGhhl-ssts.......t-hshY.GSTTGpt.csN.-lCsua.usloWplDRpCHhloAuu. 0 11 15 16 +10396 PF10564 MAR_sialic_bdg Sialic-acid binding micronemal adhesive repeat Coggill P anon Chen Z Repeat This domain is a novel carbohydrate-binding domain found on micronemal proteins. Micronemal proteins (MICs) are released onto the parasite surface just before invasion of host cells and play important roles in host cell recognition, attachment and penetration. Toxoplasma gondii can infect and replicate within all nucleated cells [1]. This domain interacts with sialylated oligosaccharides; the protein in Toxoplasma gondii is a monomer but several MAR domains are carried on the protein. Each MAR domain contains one central sialic acid-binding pocket [2]. 25.00 25.00 26.00 25.20 24.80 24.50 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -10.82 0.72 -4.23 17 151 2009-01-15 18:05:59 2008-04-16 17:48:43 4 12 14 13 55 153 0 94.00 27 32.00 CHANGED LDpaC.sphuppsspsshp.hh........hhhAR.t.ttptttsh.WRCYspsphphstst.....ClDsCGshhs..C.uslpt.ssphhs+s....th.plIsppt......................hC .........................lDthC.tph.tthstphhp.shsss......shVARhsh.s.psup..pst..WRCYstppLshstps.t...ClDsCGshhs..C.Gs.....lstsssp....ahTtp....pl.phIppt........................................ 0 34 36 54 +10397 PF10565 NMDAR2_C N-methyl D-aspartate receptor 2B3 C-terminus Coggill P anon Pfam-B_53396 (release 22.0) Domain This domain is found at the C-terminus of many NMDA-receptor proteins, many of which also carry the Ligated ion-channel family Pfam:PF00060 further upstream as well as the ANF_receptor family Pfam:PF01094. This region is predicted to be a large extra-cellular domain of the NMDA receptor proteins, being highly hydrophilic, and is thought to be integrally involved in the function of the receptor. The region also carries a number of potential N-glycosylation sites [1]. 25.00 25.00 32.80 31.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 681 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.28 0.70 -5.85 11 229 2009-01-15 18:05:59 2008-04-16 17:49:41 4 14 86 4 78 194 0 403.00 39 42.23 CHANGED HLFYWpLRaCFhGVCSG+PGLlFSISRGIYSCIHGVpIE-K...thsSPoh....shssopSNhL+LLRoAKsMssloslNG....SP+sAh-Fh+p..cSulhDhupc+tshstsc.....s.sa.sc-shFuD.hS-lcpphuNlphccsNsYtcp.hpHthshst...........pPpSlGuuuSh-Gshs..................sopPRulh....KKslDhlttpsssssss.ptthssh.G+.sashKS.........sRYh.......st-..sh+SDlSDhSo+ssoYts.Eus..t+++pph+DoLKKRsuouK..R-hsElEL.......................shh+s.......spuus..cphYhh.pDKEslpsFalDQh+s+EGss..h-clDhs-hauscussh+pssss.........hthsoShlphc-h.....p....tpthspthshuspsps......sstussthuss..aCRSChS..............................phsuauuhs..tpsspusshRC-uCp.+sGNLYDISEDp.Lpct.........................tpsttupsshstthsQssstphp+pptttpLpRQHSYDsh.l-ht+Ets........................h.ssRSVSLK-K-Rah-Gsshh....................................h......utchhss+ssthhuucsttt.p.................sst..............hphSKSLYPD+sopNPFlsTasD.......D...QpLlHGupua.hhKpphs.............httpsps.tposhussSassssu..........Rhsp-lslu.....ppshshsssp...h.......ssPRshNuu..oNs+VYcKlsSIESDV ...................HLhYWphRash...po.sp.sh..lh..uhSRGIYSChpGVt...tp.....tsPsh....shs.spuNhL+hLpsAKshssh..sshps....S.ppshchh..................................................................................................................................................................................................................DhS-hSs+sssahp.-s......+p.p.+ssh.KRs.su+..+-.s.................................................tt.p..pp.a...t-pt...sh.....p..Es.sh..Epl-hs-.ap-ts-sh......++ssss.........h..sps..p.ccs.............hhtpthshtthsts.......s-hppp.tts..aCRSC.S..............................ph.sYus............p.shpssh+C-uCh.+hGNLYDIsEDp.Lp-h................................tts.t..tthsps.s.phtp.tp...p.lpRQHS.............................................................................................................................................................................................................................................................................................................................................................................................................................. 0 3 13 31 +10398 PF10566 Glyco_hydro_97 Glycoside hydrolase 97 Coggill P anon Naumoff D Domain This domain is the catalytic region of the bacterial glycosyl-hydrolase family 97. This central part of the GH97 family protein sequences represents a typical and complete (beta/alpha)8-barrel or catalytic TIM-barrel type domain. The N- and C-terminal parts of the sequences, mainly consisting of beta-strands, form two additional non-catalytic domains [1]. In all known glycosidases with the (beta-alpha)8-barrel fold, the amino acid residues at the active site are located on the C-termini of the beta-strands [2,3]. 28.00 28.00 28.10 28.10 27.90 27.80 hmmbuild --amino -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.74 0.70 -5.30 173 795 2012-10-03 05:44:19 2008-04-16 17:52:05 4 17 278 12 178 849 187 272.00 35 40.12 CHANGED D.s.SWIK.PGKhsas.WWshtss............................tu...s...sscshKpYIDFAAchGhcalLl.........DtGWt.................sh..ps.sc....h-lpcLspYA+pKGltlhLWhppps...............hcpp.h.-cthphhpchGlpGlKsDFhsp...................-sQhhlpaYpcllcpAAca+lhVshHG.sh+PsGhpRTYPNhlspEuV+GtE..pths.......sst.spHsshLPFTRhluGPMDYTPGhhp.tht..................pspspsT...hu+QLAhaVlh.SPlQMh..uDtPpsY.....pp.......shpFlcslP ..................................DsSWIKPG.Khshs.WWphhss.......................t.s.ptuh...sspshKcYIDFA.Ap.....pGh-hlLl....stGWp....................t..hshhp.shs-...........hDlppLscYA+pKG..lhLhhpac..s..................................h-pp...h.-cthphhpchGlpulKss..ahss..........................ssQhhssaYhcllccAAca+lhVshHt.sh+P.oGlpRTYPNhlspEus+GtEh....ptas........sspspHsshLPFTRhluGPMDYTPGhhppthp................spsts.poT.hu+QLAhaVlh.SPlQMh..ADhPpsYtct.........sshpFlcsVP.................................................................................... 0 71 152 170 +10399 PF10567 Nab6_mRNP_bdg RNA-recognition motif Griffiths-Jones S, Coggill P anon Pfam-B_57293 (release 21.0) Domain This conserved domain is found in fungal proteins and appears to be involved in RNA-processing. It binds to poly-adenylated RNA, interacts genetically with mRNA 3'-end processing factors, copurifies with the nuclear cap-binding protein Cbp20p, and is found in complexes containing other translation factors, such as EIF4G as in Swiss:P39935 and Swiss:P39936. 25.00 25.00 31.10 31.10 24.60 19.60 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.09 0.70 -5.44 5 25 2009-01-15 18:05:59 2008-04-16 17:54:03 4 4 24 0 15 24 0 317.80 46 30.36 CHANGED INYKVLPKGcDsY+TRSLLFENV-..+-lDLHsFl+pFVKauPlESlYLIcsss...............csscsh-sDscN.............................pSILLSFLTKssCLDFYNNlLQRLSEFKTpL+SscLolSFVslpY.spp............sDEEush...p.hl..suLchNIlo+GATRSIsVEFpssVp..cpDhlpcKLPFLcsScNKRYILEuVDlINA--ssssFPpNYsILTFLNISMAlEVhDaL+t...p+shuIS+ChFVolss.h..................p+.Ss.Ssssssssp.psscosSVSshSt...................lSLossooslS..LscElDs....ht.lcL-tpsLplshS-Y.pPhIEpHscHL ........IsYKlLPp....GDDsYRTRSLLhENV-..cSIDLHShlcsFVK.sslESsYLIcus................psscsp-s-scs.............................hSILlSFLTKusCLsFYNNlLQR...LSEFKThLKS-uLsLpFVsLsYcscshsp..tt.......................t.p.NsEEsDl....s.hl.suSL+aNIsN+sATRSIhIEFc.osVc......KsD.LhcKKL.pFL.cpucNKRYILESIDLVNs-ss.sspFPcNYAlLTFLNISMAlEVLDYLKph..o+sLGISKChaVSlsshs.p..............ss++hSs.SsssusNscossssssspSshSs...........................S.hSLoSh.uSsVS..Ls-ElDhhs.pKLpulcLcsphLplshp-YpsPpIEpHosHL........ 0 1 6 13 +10400 PF10568 Tom37 Outer mitochondrial membrane transport complex protein Wood V, Coggill P anon Pfam-B_30563 (release 22.0) Family The TOM37 protein is one of the outer membrane proteins that make up the TOM complex for guiding cytosolic mitochondrial beta-barrel proteins from the cytosol across the outer mitochondrial membrane into the intramembrane space. In conjunction with TOM70 it guides peptides without an MTS into TOM40, the protein that forms the passage through the outer membrane [1]. It has homology with Metaxin-1, also part of the outer mitochondrial membrane beta-barrel protein transport complex [2]. 21.50 21.50 21.50 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.44 0.72 -3.71 17 336 2012-10-03 14:45:55 2008-04-16 18:03:22 4 12 211 0 210 319 1 67.70 30 20.00 CHANGED LHlWGhshslssIsspClAshahhphshspp..........hplVtSsNsslSsoscLPsLhs..ssphls....GatsIlpaLp ..........lahWus..s..au.LPSlcspsLAlhsYh+hssss...........hclhtpsN.s.h.hSPoGcLPhLps.....ss....ph..lo......thtpIlpaL......................... 0 40 81 150 +10401 PF10569 Thiol-ester_cl Alpha-macro-globulin thiol-ester bond-forming region Finn R, Coggill P anon PROSITE_PS00477 Domain This short highly conserved region of proteinase-binding alpha-macro-globulins contains the cysteine and a glutamine of a thiol-ester bond that is cleaved at the moment of proteinase binding, and mediates the covalent binding of the alpha-macro-globulin to the proteinase. The GCGEQ motif is highly conserved. 20.10 20.10 20.10 20.10 19.90 20.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.30 0.72 -4.72 83 1638 2009-09-13 17:40:59 2008-04-17 09:17:48 4 94 646 55 611 1531 44 29.30 44 2.09 CHANGED sLspLlphPtGCGEQsMlphsPslhshpY.Lc ........tlppLlphPaGCGEQsMsthsPslhshpYLp......... 0 121 201 399 +10402 PF10570 Myelin-PO_C Myelin-PO_N; Myelin-PO cytoplasmic C-term p65 binding region Finn R, Coggill P anon Pfam-B_1437 (release 22.0), PROSITE_PS00568 Domain Myelin protein zero is the major myelin protein in the peripheral central nervous system and is essential for normal myelination. The family is a single-pass transmembrane molecule containing one Ig-like loop in the extracellular domain and this highly basic 69 residue C-terminal cytoplasmic domain which is the region that interacts with protein p65 [2]. 25.00 25.00 29.80 29.30 22.10 21.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.35 0.72 -3.91 4 47 2009-01-15 18:05:59 2008-04-17 09:23:04 4 3 30 0 19 35 0 66.80 75 27.58 CHANGED VRaCWLRRQuhLQRRLSAhE+GKLp+.AKDuSKR.uRQsPVLYAMLDpSRSsKuASEKKuKGh.GESRKDKK ............lRYCWLRRQAALQRRLSAMEKGKLHKsuKDuSKR.GRQTPVLYAMLDHSRSTKAASEKKuKGL.GESRKDKK..................... 0 1 2 4 +10403 PF10571 UPF0547 Uncharacterised protein family UPF0547 Coggill PC anon UPF0547 Domain This domain contains a zinc-ribbon motif. 27.30 27.30 27.30 27.40 27.20 27.20 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.04 0.72 -7.45 0.72 -4.24 31 219 2012-10-03 10:42:43 2008-04-17 09:30:18 4 12 202 0 65 235 57 25.80 44 10.55 CHANGED KpCPpCsppl........shusphC....shCGasFpt ...+pCPcCpppl........stuscpC....PpCGatFh.p... 0 21 30 44 +10404 PF10572 UPF0556 Uncharacterised protein family UPF0556 Coggill PC anon UPF0556 Family This family of proteins has no known function. 25.00 25.00 31.70 30.30 21.10 16.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.09 0.71 -5.04 6 52 2009-01-15 18:05:59 2008-04-17 09:36:22 4 5 40 0 36 49 0 142.50 48 69.88 CHANGED MAs.............hs.lh.hhhhh.sspuhpcspohEFDlRPGG.laoFopslt...cYpCsFTYAuQGGTNEpW.MSlGLS-DsphFSCSlWRPQGKSYLFFTQFKAEl+GAKIEYupAYSQsussupuDVPLKsEEapVo-osVopRsGpF+upLuKLsllu+spHDEL ................h...............hh.hhhhh........tusp.p........spohtFDl+PGG..lcoFopslt......capChFTYAuQ..GGT.NEpWpMolGhS-DpphFoCo..lWR...P...Q.G.KSYLaFTQFKAEl+G.AcIEYuhAYSpsuhttpuDVsLKsEEFpVoc..o..sVspRsGtF+ucLSKLslVA+t..t+sEL......................... 0 13 16 22 +10405 PF10573 UPF0561 Uncharacterised protein family UPF0561 Coggill PC anon UPF0561 Family This family of proteins has no known function. 25.00 25.00 25.40 25.90 22.80 24.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.65 0.71 -4.28 5 61 2009-09-10 19:05:18 2008-04-17 09:38:23 4 4 45 0 43 47 0 107.90 50 52.62 CHANGED MEs.u-upGcuVphKPGGRLDMSHGFVHHIRRNQIARDDYDKEVKQ.AKEKQRRRHTssPRRPRRPDLQVYpPRpRpGSp.ssss-sEEpsESoSo.poEsEssGspLFpLDYEADuG-ITSlIVaK ...........................thp.KPGGRLDMsHGFVHHIRRNQlARDDYDKcVK......Q..AKEchR+R+T...suPpR....P..R+P..D.....l.QVYhPR+R..cs.o...tt.s..ss-hEE...ssESoSS.to-.EspuppLFpL-YpADuGclTSlIVap........................................................ 0 10 13 23 +10406 PF10574 UPF0552 Uncharacterised protein family UPF0552 Coggill P anon UPF0552 Family This family of proteins has no known function. 22.20 22.20 22.40 23.70 19.90 22.00 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.40 0.70 -4.89 5 71 2009-01-15 18:05:59 2008-04-17 09:41:11 4 4 46 0 43 55 0 196.30 55 88.99 CHANGED MSRIYpsTALpNKsVHsE+asGoW-PusaQuG-GVLLEGpLlDaSRHoIoDsKs+.......KERYYVLYIRPSRIHRRKFDsKGNEIEPNFSDTKKVNTGFLMSSY............................KVEAKG-SDRLot-QLssLV...NKspLl.......KIT-+H.sP+ETaAFWhPEuEM-KTELEsGpEVRLKT+GDGPFlFSLAKlDSGTVTKCNFAGDupAGASWTDNIMApKSspssu.ScspuQGDGA-DDEW ..................MShIYpsssLpscsVps.phsusWsPs..t..aQuGsGVLLEGpLh.DlSRHsI.Dspsp.......KpRaaVLYlcPuplH+R+FDs+GpEIEPNFSsT+KVNTGaLMSSY............................KVEAKG-oDRLo.-tLpsLV....sKs-.LL.......ulTpph..oPspolAFWhPEuEM-thELElGstVRLKT+GDGPFl-SLAKL-uGTVT.K.CNFA.....G...DsKTGASWTDNIMApKsocsss..sE...h.+p.QGDGA-D-EW....... 0 18 21 26 +10408 PF10576 EndIII_4Fe-2S Iron-sulfur binding domain of endonuclease III Finn R, Coggill P anon PROSITE_PS00764 Domain Escherichia coli endonuclease III (EC 4.2.99.18) [1] is a DNA repair enzyme that acts both as a DNA N-glycosylase, removing oxidised pyrimidines from DNA, and as an apurinic/apyrimidinic (AP) endonuclease, introducing a single-strand nick at the site from which the damaged base was removed. Endonuclease III is an iron-sulfur protein that binds a single 4Fe-4S cluster. The 4Fe-4S cluster does not seem to be important for catalytic activity, but is probably involved in the proper positioning of the enzyme along the DNA strand [2]. The 4Fe-4S cluster is bound by four cysteines which are all located in a 17 amino acid region at the C-terminal end of endonuclease III. A similar region is also present in the central section of mutY and in the C-terminus of ORF-10 and of the Micro-coccus UV endonuclease [4]. 20.50 19.60 20.50 19.60 20.40 19.50 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.58 0.73 -6.71 0.73 -3.58 505 3890 2009-01-15 18:05:59 2008-04-17 09:52:34 4 11 2745 20 789 2381 391 17.00 50 6.34 CHANGED Cps+.pPpC....stCPlp..sh.C CpAR.KP+C....shCslp..ch.C..... 0 236 509 651 +10409 PF10577 UPF0560 Uncharacterised protein family UPF0560 Coggill P anon UPF0560 Family This family of proteins has no known function. 19.40 19.40 19.40 19.50 19.30 19.10 hmmbuild -o /dev/null HMM SEED 807 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.66 0.70 -13.35 0.70 -6.28 7 258 2009-01-15 18:05:59 2008-04-17 09:54:33 4 17 49 0 134 241 0 425.80 28 87.35 CHANGED lplKVQV.DsushpPLAsAsVElFuNpoul.ASGsTsuDGsshlthsY+LGo.llVoAoKpuYVsNSlPW+spRLPlauSVSLhLLPERsATLhlYEDlVpIluG..GARsQPhVpFQR+AlpLP.ssoYssLoAhLTsAuo..chcuFPahhGh-.uNuo.Gssshh-LsPlAAlSVHLhsusGspl.lsGPIpVSlPLPscss.LptsspVPAWRFD.phGsWl+sGhGhl+c-GsQLhWTaluPQLGYWlAAhsPspsG.lhsp....DIsoYHTlFLLuILGuhAlllLlLLClLLYYCRR+CLKPpppHRKLpLs.sL-..K+DQATShS+lNLl.....................................osts.ts..ss.L+...sopcDhspp..tphlpHpccspoth.....................tph-.a.hKuppsut.ppt.t.h.p--.ppuhsoh.sp....cp..tutusttphussss.sh.........t..tpths-u+ss-hhhopSlDpLpRPo..sh.opPGQlIhCuSlDplp-us.YRpshPTLVIPAHYh+LPuEtshsupshh.pspppp-htshpst.tpsa.........Q..stt.ptQthusppupsupuppW.u..ssshutSVoIPsshN-ushAQh.NuElQhLTEKpLhELGs...hPHPRAWFVSLDGRSNApVRHSYIDLQpuspstS......................sDASLDSGVDhNE.+suR+hc...............hpE+pt.pt.tssss....huhopLlYhEDh-.SuSEstsshs.SPEDsuLpslL-tuspsphsphsphtcpps+possSshps.sp.ppc.stpsptc.s-pst--tspsKKSPWQKREERPLhsFN .............................................................................................................................................................................................................................................................s........h.......................h...h....s..h.hphsh.......................ss.h.ha.ap.t...............G..W.pts.s.l............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 1 18 26 57 +10410 PF10578 SVS_QK Seminal vesicle protein repeat Finn R, Coggill P anon PROSITE_PS00515 Repeat \N 25.00 2.00 26.50 2.10 18.20 1.70 hmmbuild -o /dev/null HMM SEED 12 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -5.92 0.74 -5.80 0.74 -3.09 13 217 2009-01-15 18:05:59 2008-04-17 10:01:42 4 4 4 0 105 234 0 12.20 53 57.33 CHANGED uQlKSFGQhKSp ....uQ.Ko....FuQpKo.. 0 92 92 92 +10411 PF10579 Rapsyn_N Rapsyn N-terminal myristoylation and linker region Finn R, Coggill P anon PROSITE_PS00405 Family Neuromuscular junction formation relies upon the clustering of acetylcholine receptors and other proteins in the muscle membrane. Rapsyn is a peripheral membrane protein that is selectively concentrated at the neuromuscular junction and is essential for the formation of synaptic acetylcholine receptor aggregates. Acetylcholine receptors fail to aggregate beneath nerve terminals in mice where rapsyn has been knocked out. The N-terminal six amino acids of rapsyn are its myristoylation site, and myristoylation is necessary for the targeting of the protein to the membrane [1]. 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.47 0.72 -4.18 4 91 2012-10-11 20:01:02 2008-04-17 10:43:49 4 29 73 0 53 118 3 78.90 50 17.87 CHANGED MGQDQTKQQIEKGL+LYQuNpTpKALclWtpVLc+To-.sG+FRlLGCLITAHSEMGKY+-MLcFultQlssAREh-DP- ..........MGQcQs.KQpIE+.GLp.LY.puN..ppccALpsWp+sL...c...K...os.c.h...t.s.RF...p...lLGtL.hpAHsEhG+Y+-MLcFAlhQl-hAcELEDss................. 1 12 15 35 +10412 PF10580 Neuromodulin_N Gap junction protein N-terminal region Finn R, Coggill P anon PROSITE_PS00412 Domain \N 18.50 18.50 22.20 22.20 15.90 15.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.51 0.72 -4.30 3 72 2009-01-15 18:05:59 2008-04-17 10:48:03 4 3 46 0 26 57 0 28.50 80 13.39 CHANGED MLCCIRRTKPVEKNEEADQKIEQDG..IKPEDKA .......LCChRRTK.pVEKN--.DQKIEQDG..IKPEDKA.. 1 1 3 9 +10413 PF10581 Synapsin_N Synapsin N-terminal Finn R, Coggill P anon PROSITE_PS00415 Domain This highly conserved domain of synapsin proteins has a serine at position 9 or 10 which is a phosphorylation site. The domain appears to be the part of the molecule that binds to calmodulin [3]. 25.00 25.00 25.50 25.50 22.50 21.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.51 0.72 -4.13 5 136 2009-01-15 18:05:59 2008-04-17 11:41:42 4 5 42 0 56 160 0 31.30 77 6.20 CHANGED MNYLRRRLSDSuFluNLPNGYMsDLQRP-PPu .......MNaLRRRLSDSoFhANLPNGYMoDLQRP-ss....... 0 5 11 28 +10414 PF10582 Connexin_CCC Gap junction channel protein cysteine-rich domain Finn R, Coggill P anon Pfam-B_1437 (release 22.0), PROSITE_PS00408 Domain \N 22.50 22.50 23.20 24.60 22.10 22.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.22 0.72 -4.16 76 1238 2009-01-15 18:05:59 2008-04-17 11:42:18 4 11 126 6 649 1009 0 66.10 49 21.43 CHANGED hEluFlhsQYhLY.GFplsslahCs.ptPC...Pps.VDCFlSRPTEKTlFllFMhslos...lsllLNlhElhaL ..........................................hEluFlhsQYhLY..GF.p..lssla.pCs......p.............PC.............Pp.s..VDCFlSRPTEKTlFllFMhslus...lsllLNlhElhaL............ 0 56 121 321 +10415 PF10583 Involucrin_N Involucrin of squamous epithelia N-terminus Finn R, Coggill P anon Pfam-B_7423 (release 22.0), PROSITE_PS00795 Domain This is the N-terminal three beta strands of involucrin, a protein present in keratinocytes of epidermis and other stratified squamous epithelia. Involucrin first appears in the cell cytosol, but ultimately becomes cross-linked to membrane proteins by transglutaminase thus helping in the formation of an insoluble envelope beneath the plasma membrane [1].\ Apigenin is a plant-derived flavanoid that has significant promise as a skin cancer chemopreventive agent. It has been found that apigenin regulates normal human keratinocyte differentiation by suppressing it and this is associated with reduced cell proliferation without apoptosis [2]. The downstream part of the protein is represented by the family Involucrin, Pfam:PF00904. 21.50 21.50 77.20 77.20 18.70 18.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.66 0.72 -3.67 9 42 2009-09-11 05:37:53 2008-04-17 11:45:25 4 19 29 0 11 37 0 68.00 69 15.11 CHANGED MSQQHTLPVTLPPALSQE.LKsVSPPssTQQEQhKQPTPLPsPCQKV.SELPscVP.cHtEKHso.VKG MSQQHTLPVTLsPALSQE.LKTVsPPssTQQEQhKQPTPLPsPCQKVssELPVEVPsKpEEKHhTsVKG.. 0 1 1 1 +10416 PF10584 Proteasome_A_N Proteasome subunit A N-terminal signature Finn R, Coggill P anon PROSITE_PS00388 Domain This domain is conserved in the A subunits of the proteasome complex proteins. 23.00 23.00 23.10 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.30 0.72 -6.55 0.72 -4.61 74 2896 2009-01-15 18:05:59 2008-04-17 11:46:05 4 19 554 763 1805 2718 83 22.80 63 9.05 CHANGED YDpshosFSP-GRLaQVEYAhcA .......YDpslTsFSP-GRLaQV.....EYAhcA... 0 629 1011 1482 +10417 PF10585 UBA_e1_thiolCys Ubiquitin-activating enzyme active site Finn R, Coggill P anon PROSITE_PS00865 Domain Ubiquitin-activating enzyme (E1 enzyme) activates ubiquitin by first adenylating with ATP its C-terminal glycine residue and thereafter linking this residue to the side chain of a cysteine residue in E1, yielding an ubiquitin-E1 thiolester and free AMP. Later the ubiquitin moiety is transferred to a cysteine residue on one of the many forms of ubiquitin-conjugating enzymes (E2) [1]. This domain carries the last of five conserved cysteines that is part of the active site of the enzyme, responsible for ubiquitin thiolester complex formation, the active site being represented by the sequence motif PICTLKNFP [2]. 19.90 19.90 19.90 20.20 19.80 19.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.45 0.72 -4.57 65 1276 2009-09-11 07:00:39 2008-04-17 11:47:32 4 44 343 39 841 1263 19 45.50 39 6.15 CHANGED thTEsYssstcsspc..s...hPlCTl+shPpphpHCIpWA+.thFpphFs ............................hTpsYp..s..s.-..sspc...p......hPhCTl+shPpp..-HCI..pWA+..hFpphF............ 0 310 475 688 +10419 PF10587 EF-1_beta_acid Eukaryotic elongation factor 1 beta central acidic region Finn R, Coggill P anon Pfam-B_9497 (release 22.0), PROSITE_PS00824 Domain \N 25.70 25.70 25.80 25.90 25.60 25.60 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.64 0.72 -7.26 0.72 -3.79 37 495 2009-01-15 18:05:59 2008-04-17 12:56:35 4 11 254 0 245 487 0 27.50 62 9.97 CHANGED LFGSD-E.EDcEAc+l+p.cRLAtYstKK ..LFG.S.D.-..E.-..ED.tEAt+l+E.ERLtpYspKK.. 0 77 120 191 +10420 PF10588 NADH-G_4Fe-4S_3 NADH-ubiquinone oxidoreductase-G iron-sulfur binding region Finn R, Coggill P anon Pfam-B_202 (release 22.0), PROSITE_PS00642 Domain \N 20.10 20.10 20.10 20.30 20.00 20.00 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.03 0.72 -4.72 191 4123 2009-01-15 18:05:59 2008-04-17 12:59:17 4 128 3222 15 1243 3421 1974 41.00 43 5.37 CHANGED R+sllElLLssH....sh-CssC-.psGpCcLQchuhchGlpct+a .......RcslhEhLLtNH....PL.DCslC-.puGcCcLQ.-huhthGhsppRa......... 1 463 862 1067 +10421 PF10589 NADH_4Fe-4S NADH-ubiquinone oxidoreductase-F iron-sulfur binding region Finn R, Coggill P anon Pfam-B_339 (release 22.0), PROSITE_PS00645 Domain \N 20.70 20.70 20.80 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.35 0.72 -4.81 299 3722 2009-01-15 18:05:59 2008-04-17 13:02:48 4 63 2661 15 1286 2965 2211 45.80 42 9.34 CHANGED Vc.hspphhc.Fh...tcESCGpCTPCRtGot.hhhcllc+.l.tp...G..cu....p....................tpDl ............Vphspph.c..Fa.....t+ESCGpCTPCR-Gos.ahhcllc+.l..pp..Gcup.tDl........................... 0 477 891 1106 +10422 PF10590 PNPOx_C Pyridoxine 5'-phosphate oxidase C-terminal dimerisation region Finn R, Coggill P anon Pfam-B_685 (release 22.0), PROSITE_PS01064 Domain Pyridoxine 5'-phosphate oxidase (PNPOx) catalyses the terminal step in the biosynthesis of pyridoxal 5'-phosphate (PLP), a cofactor used by many enzymes involved in amino acid metabolism. The enzyme oxidises either the 4'-hydroxyl group of pyridoxine 5'-phosphate (PNP) or the 4'-primary amine of pyridoxamine 5'-phosphate (PMP) to an aldehyde. PNPOx is a homodimeric enzyme with one flavin mononucleotide (FMN) molecule non-covalently bound to each subunit. This domain represents one of the two dimerisation regions of the protein, located at the edge of the dimer interface, at the C-terminus, being the last three beta strands, S6, S7, and S8 along with the last three residues to the end. In Swiss:P21159, S6 runs from residues 178-192, S7 from 200-206 and S8 from 211-215. the extended loop, of residues 167-177 may well be involved in the pocket formed between the two dimers that positions the FMN molecule [2]. 20.30 20.30 22.10 21.20 19.50 18.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.45 0.72 -4.46 243 2399 2009-01-15 18:05:59 2008-04-17 13:57:12 4 8 2210 18 735 1792 1629 42.50 54 19.13 CHANGED WGGaRlhPpplEFWQGpssRLHDRhhY.............p.+..............................................p.ss.s.........WphpRLtP .....WGGaRltPcplEFW.......QGpt.pRLHDRhhYpR...........................................................p..ss.s.......Wpl.-RLuP................................ 1 211 437 608 +10423 PF10591 SPARC_Ca_bdg Secreted protein acidic and rich in cysteine Ca binding region Finn R, Coggill P anon Pfam-B_3882 (release 22.0), PROSITE_PS00613 Domain The SPARC_Ca_bdg domain of Secreted Protein Acidic and Rich in Cysteine is responsible for the anti-spreading activity of human urothelial cells. It is rich in alpha-helices. This extracellular calcium-binding domain contains two EF-hands that each coordinates one Ca2+ ion, forming a helix-loop-helix structure that not only drives the conformation of the protein but is also necessary for biological activity. The anti-spreading activity was dependent on the coordination of Ca2+ by a Glu residue at the Z position of EF-hand 2 [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.69 0.71 -4.05 28 665 2012-10-02 16:17:27 2008-04-17 14:18:31 4 31 135 6 337 1603 18 112.60 31 30.93 CHANGED hssCs-p-LscFspRhRDWlpslhtphhccpptp...........................t..ph.pcsppp.hphhppsltWpFscLDts.tDphLs+pELtsl+usL...hshEpChpsFhpsCDsspDphIohpEWspC ...................................................................................................................................h..sCsst-htphs.RhtDWhpslh.phhpptp.p...............................................................................hp..h.....tp.....pp..h..shppp.sltW.h.F...s.pLDps.tDt...hLs+........pE.L....t....s.....l...c.t...l................sp...E.....+.C......h.............c..........t.............Fh.cpCD.s....s..pDptIohpEWstC.......................... 0 74 101 207 +10424 PF10592 AIPR AIPR protein Iyer LM, Bateman A anon Iyer LM Family This family of proteins was identified in [1] as an abortive infection phage resistance protein often found in restriction modification system operons. 25.00 25.00 25.90 25.10 24.40 24.30 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.03 0.70 -5.10 65 420 2009-01-15 18:05:59 2008-04-17 14:47:42 4 2 382 0 123 388 93 298.40 19 53.89 CHANGED h-pNVRsaLtsp.........s....VNpuIccTLp...ppPppFhhaNNGITllusclph..sssptt.......hplcshQ.........IVNGuQTosolaps.........tp.ptslsp.......lhV.l+lhhs......ps-....slhscIochsNoQNpVptpDhtupcshppclEphhpshh............haYERtcGpatstpstt....................p.hhspsthtphhssatpcPphsststpphapph.............t.hpphhp..tphst....hatphlshh.llhcthcphlpptt.....................tshstYsl...thhsthhttt.........................................shttlhppp.........plspphhptlhth.spphpchhppsstshtsh ..................................hppNlRsaLsts...........s....VNcsIpcTlp.................ppsptFahaNNGITlls.s..clph..pppst..........hpl.cshp............IlNGuQTssslhps..........................ttttplpp.......shV.l+llth.................psc....phh.spIocssNoQNpVpspDh.t.utcshppplcc.hhppht..............hhYcctcspht.spptph....................thhhsh.sthtp.hh..s..s.h..p.......p.........p.........Pp.h.....s.t.p..ttpphhpph.............t.hpphhs............sphst......hhhphlthh..hlhp....thpphhtptt...........t............................tphhhath......hh..hhttp................................................p..t..tt..............th.tth..p.h......................hh.......................................................................................................................................................... 0 40 94 115 +10425 PF10593 Z1 Z1 domain Iyer LM, Bateman A anon Iyer LM Domain This uncharacterised domain was identified by Iyer and colleagues [1]. It is found associated with a helicase domain of superfamily type II. 25.00 25.00 25.10 48.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.68 0.70 -5.15 35 212 2009-01-15 18:05:59 2008-04-17 14:51:00 4 3 199 0 78 223 142 234.80 28 28.00 CHANGED pSLccAlpsFllusAlRplR.....Gt.sscHs................SMLlHsS+hssspcpltshl.pph..lpplcpslpttsstt.........hscL+placpch..................ps.sh.sas-ltstLhpsl....s................s..lclhllN.ussp..-s....LDYssspt.h......ssIslGGspLuRGLTlEGLssSYFhRso+...hhDTLhQMGRWFGYRtsYtDLCRlah..........ot-lhphFpplspssE-LRppl.cphttss.....hTPppaulpVpsps...sLtlT ..........sLccAlhsFllssAl+thR..........Gp...tpc+p.................SMLlHsSphpsspppltphl.pph....hpplppslpttpsts...........hschcplappcht.......................tt.ph.sa--l..h..p.tLhpsl...p.....................s..lplhhlN.ssss..ct.......l-appppt...........tsIslGGspLuRGLTlEGLssoYahRsop...thDTLhQMGRWFGYRts.YtDL..sRlah..........st-lhphFppls.pspp-lRppl.cphtt.ts......hsPhpashplpsp.....h............................. 0 24 58 70 +10427 PF10595 UPF0564 Uncharacterised protein family UPF0564 Coggill PC anon UPF0564 Family This family of proteins has no known function. However, one of the members, Swiss:Q22CP8, is annotated as an EF-hand family protein. 21.10 21.10 21.10 21.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.63 0.70 -5.54 15 182 2009-01-15 18:05:59 2008-04-18 09:08:17 4 5 94 0 110 180 3 304.80 25 51.40 CHANGED .cITVPpPFphshREpc+pp.t.hpppclcp.hpchpcc..-csEhp++F+ApPVPppVhlP.LYcclhccpEcRRcpl+ccS+thhLps.pKPFpFhtR-cpKcphh.pp.............pcccsppF+A+P...lPcphhtshlpph.....................pEcEhhRppRsphRup-hlps.SphPschtpptpppp.t...............+ptpspppttcpshcsh+s+slPDFccLacsFpcphscp.............KppppsTlscPFshppS.p+sss+cphspts.pht.............................pcsl+tspc................................P................................hpt.p.h.phhcpph.c+.t+pp...cphp.RcKptpchuspsppphpstssspphpcppccphhph+cc.cKpcpcEYpppl ...........................plTlPpPFpMplREtp++t....htp...s..phE..pp.hpcp...t-..c......t....cpp..+pFRApPVPspValP.LYpclhcppEtRRp.hppcp+thLhup..KPFpFht.........+-cpccth...ttpp..th.............tppp.tpt.h+A.+.........l.P+shhtssht-+....................................hpEpEhhRpl+.hphRAt-hLpp..ushP.p.tt..tp.....................................pptc.hs....t..........t..p.h.phps+hpstsPDa-tLhc.papc..phtcp...............+p....+.sTh.scPFp....Lpsu....htt..sp.+tp.h.t..s.httc................................................ppp.p.sph................................................P.....................................s...s..ppttt.pth+pp....pch.t.pt..p.p..h.p.php.p.....h.cp....ht.p.tt.....s.t.p..t................................................................................................................................................................................ 0 37 50 76 +10428 PF10596 U6-snRNA_bdg U6-snRNA interacting domain of PrP8 Coggill P anon Chen X Domain This domain incorporates the interacting site for the U6-snRNA as part of the U4/U6.U5 tri-snRNPs complex of the spliceosome, and is the prime candidate for the role of cofactor for the spliceosome's RNA core. The essential spliceosomal protein Prp8 interacts with U5 and U6 snRNAs and with specific pre-mRNA sequences that participate in catalysis. This close association with crucial RNA sequences, together with extensive genetic evidence, suggests that Prp8 could directly affect the function of the catalytic core, perhaps acting as a splicing cofactor [1]. 29.80 29.80 30.70 31.80 28.70 29.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.01 0.71 -4.87 11 381 2012-10-03 10:25:13 2008-04-18 15:27:45 4 48 305 0 252 401 3 149.80 76 7.47 CHANGED FpcYpls+tssahWTpp+HDGKLaN..LpsY+sDhIpALGGlEsILEHoLFKuTtassWEGLFW-K.............sSGFEpoh.Kh++LTNAQRoGLsQIPNRRFTLWWSPTINR..........usVYlGFQlQlDLTGlFMpGKlPTLKISL...IQlFRuHLWQKIHESlVhDLt...QhhDsEh ..............................FKpYQlhKpNPFWWTpQRHDGKLWN...LNsYRTDhIQALGGVEuILEHTLFK...........GThFPoWEGLFWEK.............ASGFEESM.KaKKLTNAQRSGLNQIPNRRFTLWWS.PTINR..........ANVYVGFQVQLDLTGIFMHGKIP..TLK..ISL...IQIFRAHLWQKIHESlVMDLC...QVFDQEL................... 0 89 139 206 +10429 PF10597 U5_2-snRNA_bdg U5-snRNA binding site 2 of PrP8 Coggill P anon Chen X Domain The essential spliceosomal protein Prp8 interacts with U5 and U6 snRNAs and with specific pre-mRNA sequences that participate in catalysis [1]. This close association with crucial RNA sequences, together with extensive genetic evidence, suggests that Prp8 could directly affect the function of the catalytic core, perhaps acting as a splicing cofactor [2]. 21.20 21.20 21.20 21.30 20.90 21.10 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.65 0.71 -4.57 20 335 2009-09-11 09:17:08 2008-04-18 15:56:36 4 42 290 0 237 321 1 133.60 72 5.89 CHANGED sppEusWsLhspsTKEpTApAa.LpVSccuIspFcNRlRpILMoSGSoTFoKIAsKWNTsLIuLhTYaREAllsTppLLDlLVKsEsKlQsRVKhGLNSKMPoRFPPsVFYoPKELGGLGMLShGHl.LIPpSDL+aS ...................s.+DGVWsLpNcsTKERTApAF.L+Vs-Eslp+FcNRlRQILMuSGSTTFTKIsNKWNTALIuLhTYaREAsVsTp-LLDlLVKCEsKIQTRlKIGLNSKMPSRFPPVVFYTPKELGGLGMLSMGHl.LIPpSDLRWS........... 0 88 134 197 +10430 PF10598 RRM_4 RNA recognition motif of the spliceosomal PrP8 Coggill P anon Chen X Domain The large RNA-protein complex of the spliceosome catalyses pre-mRNA splicing. One of the most conserved core proteins is PrP8 which occupies a central position in the catalytic core of the spliceosome, and has been implicated in several crucial molecular rearrangements that occur there, and has recently come under the spotlight for its role in the inherited human disease, Retinitis Pigmentosa [1]. The RNA-recognition motif of PrP8 is highly conserved and provides a possible RNA binding centre for the 5-prime SS, BP, or 3-prime SS of pre-mRNA which are known to contact with Prp8. The most conserved regions of an RRM are defined as the RNP1 and RNP2 sequences. Recognition of RNA targets can also be modulated by a number of other factors, most notably the two loops beta1-alpha1, beta2-beta3 and the amino acid residues C-terminal to the RNP2 domain [2]. 23.50 23.50 24.70 24.20 21.80 23.40 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -10.18 0.72 -4.12 25 346 2009-01-15 18:05:59 2008-04-18 16:21:12 4 41 284 0 246 321 7 92.10 69 4.12 CHANGED EKlDhTLLNRLLRLIlDpNlADYITuKNNVslsaKDMsHlNpYGlIRGLQFoSFlaQYYGLllDLLlLGlpRAsElAG..PsstPNsFhpFpstps ............EKIDLTLLNRLLRLIlDHNIADYhTAKNNVslsYK......DMNHTNSYGlIRGLQFuSFlhQYYGLVlDLLlLGLpRASEhAG..PPQhPN-FLpFpDh.s........................... 0 97 143 207 +10431 PF10599 Nup_retrotrp_bd Retro-transposon transporting motif Wood V, Coggill P anon Wood V Domain This is the highly conserved C-terminal motif GRKIxxxxxRRKx of nucleoporins that plays a critical and unique role in the nuclear import of retro-transposons in both yeasts and higher organisms. It would appear that the arginine residues at positions 2 and 9-10 constitute a bipartite nuclear localisation signal, with two basic peptide motifs separated by an interchangeable spacer sequence, that is crucial for the retro-transposon activity [1]. 25.00 25.00 27.50 27.50 18.80 18.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.48 0.72 -3.39 6 66 2009-01-15 18:05:59 2008-04-18 16:42:08 4 6 41 1 25 66 0 97.30 60 7.65 CHANGED ppuTPsPuSlFsh.GsoNsNhs.........psssPS....osFuFus..shs.......................tstsssSuhssss....hSso..........Ps.....hshGssst.........shssRKIApMR.pR+R ....u.soTPNuSSVFQF.GSS.TTNFNF........TNNNPS....GVFTFGAssSTP.......................AASAQPSGSGuFs....FsQS..........PAu....FTVGSNGKNhFSSSG.TSVSGRKIKTAVRRRK......... 0 3 4 10 +10432 PF10600 PDZ_assoc PDZ-associated domain of NMDA receptors Coggill P anon Pfam-B_10923 (release 22.0) Domain This domain is found in higher eukaryotes between the second and third PDZ domains, Pfam:PF00595, of glutamate receptor like proteins. Its exact function is not known. 21.20 21.20 22.00 22.30 21.10 20.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.44 0.72 -3.68 12 321 2009-09-11 14:48:04 2008-04-23 15:51:39 4 37 37 29 104 277 0 65.50 53 8.76 CHANGED KPoohahs..DsYuPPDITsSYS.thDNclS.su.lG.......ch.psLsP.sSPsRYSPlPKshLG-DDloREPR .....KPoohYhs..DsYuPPDITs.S.....aSt.....hDNHlSpsu.LGh.......c...shs.P..sSP..uRY.SPlsKphLG--D.hT.R.EPR................................ 0 4 14 41 +10433 PF10601 zf-LITAF-like LITAF-like zinc ribbon domain Protasio A, Bateman A anon Clustering of trematode sequences Family Members of this family display a conserved zinc ribbon structure [1] with the motif C-XX-C- separated from the more C-terminal HX-C(P)X-C-X4-G-R motif by a variable region of usually 25-30 (hydrophobic) residues. Although it belongs to one of the zinc finger's fold groups (zinc ribbon), this particular domain was first identified in LPS-induced tumour necrosis alpha factor (LITAF) which is produced in mammalian cells after being challenged with lipopolysaccharide (LPS)[2]. The hydrophobic region probably inserts into the membrane rather than traversing it. Such an insertion brings together the N- and C-terminal C-XX-C motifs to form a compact Zn2+-binding structure [3]. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.05 0.72 -4.11 46 755 2009-01-15 18:05:59 2008-04-24 10:49:46 4 16 214 0 542 755 1 70.80 27 40.68 CHANGED hpssPs.hhCPpCpppshTplphcsGssTa.........hsshlLhhhh..hsC.shlPashcssKDshHhCPsCsshlGhach ...........h...tPsphhCPpCpppl.......hTpl.phpsGhho.a.................lhsh...h.....l.....slhsh...h.....s.....C....sh...lPhhhss.hp.-spHhC..PsCpthlupap................ 0 237 298 446 +10434 PF10602 RPN7 26S proteasome subunit RPN7 Mistry J, Wood V anon Pfam-B_4112 (release 22.0) Family RPN7 (known as the non ATPase regulatory subunit 6 in higher eukaryotes) is one of the lid subunits of the 26S proteasome and has been shown in Saccharomyces cerevisiae to be required for structural integrity [1]. The 26S proteasome is is involved in the ATP-dependent degradation of ubiquitinated proteins. 29.40 29.40 29.40 29.50 29.30 29.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.75 0.71 -4.89 63 673 2012-10-11 20:01:02 2008-04-24 12:50:55 4 12 331 0 453 671 11 171.80 30 40.18 CHANGED DpshhcphppcNppclpcL-pclcchc.cNhscp-l+puhhchu-aYtclGDhcpAlcsapc.sh-hssusup+lDhhhsllRlulahsDhshVpphlp+Acshh-p..uuD........................................W-p+NR..............................LKsYpGlhtlshRcacpAAphhL-ohsTFsu......sElhoapslshYuslsulhoh-Rs-L+pK ...........................................DpshhpphpcpsptclccL-pclc......shc..pNhhc.pplR............p..uh.chu-aahclG.Dh.............psA.................lcsap.............+..sh....-.h....s....s..uhsphl-hshpll+l....ulaht.Dhsh.lp.pt..lpKAc...shh-p..usD.................................................................................hc.p.+s+..............................LKsh..pGLhplu.h....R...sa+pAAph..........FL-s..h...ssass.................hELhshpslshYsslsulhoh-Rs-L+p................................................ 0 160 250 371 +10436 PF10604 Polyketide_cyc2 Polyketide cyclase / dehydrase and lipid transport Lakshminarayan L, Mistry J anon Manual Family This family contains polyketide cylcases/dehydrases which are enzymes involved in polyketide synthesis. It also includes other proteins of the START superfamily [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -11.10 0.71 -4.01 179 5359 2012-10-02 19:24:03 2008-04-24 14:10:18 4 67 1547 79 1946 8699 1599 142.00 13 81.08 CHANGED thplpps......h....pls.ussppVashlsD...h..sshs......p....W......ts..slh...ps..p......h.s...t...sss......................hh....tthp.hsu..........................lppplsp..h....c.s....tpthsach...............hsht...ph..tss.hplps...ss..s.......G.....sp..ls...ash......pht......................shh.............t.h........ht....thlpsh....hpt.sLppLppthc ...................................................hptsh.tl..s..AssppVa.s...h...l.sD.......h..p.ph.s.......p..........W............hs....tlp........ps.......c..........hh....p.......sss......................................th......t..php..hsu.....................ht..........................h.p..p..p......l..h...p......h.............c...s...........sp..t..l.s..aph............................hsht.....ph...t......sp..h..pl..ps........ts.......s................G..............op....lp...........hph........p.hp...............................s.h....................t..h.........ht......thhp...t...........hpt.s....lptLtt................................................................................................................................ 0 564 1317 1714 +10437 PF10605 3HBOH 3HB-oligomer hydrolase (3HBOH) FIGFAMs, Mistry J anon FIG094011 (Release 2.0) Family D-(-)-3-hydroxybutyrate oligomer hydrolase (also known as 3HB-oligomer hydrolase) functions in the degradation of poly-3-hydroxybutyrate (PHB). It catalyses the hydrolysis of D(-)-3-hydroxybutyrate oligomers (3HB-oligomers) into 3HB-monomers. 25.00 25.00 27.00 25.30 23.40 24.90 hmmbuild -o /dev/null HMM SEED 690 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.44 0.70 -13.16 0.70 -6.45 13 134 2009-12-10 17:34:17 2008-04-24 14:14:15 4 3 127 0 41 138 14 584.70 48 90.33 CHANGED lsluACsGussssss.....................hNs+PuFl..GsVphpuYDGsoDDLLTAGLGKoGLuSATAPuhAsPssPTAAELRRLAIasNYRALVDsousGGYGoLaGP..NVDAsGssTs.G-GKIAGsEYlAYuDDGoGppNVTlhVQVPsoFsPspPCIVTATSSGSRGVYGAIuouGEWGLK+GCAVAYTDKGTGsGsHDLsTsTVsLIDGTRsstsuAGpsutFsAsLuAss.LAsFNuuhPpRhAaKHAHSQQNPEpDWG+hTLQAlEFAaasLN-paGstsssGts..hplpPusslVIASSVSNGGGAAlAAAEQDTcGLIDGVAVuEPplNlssssulsV+RGusP.lsusG+sLhDYsThANLLQsCAAlAsuhss.APhtsshsss......sltsNRCsoLsApGLloGuTTusQAssALstLct.sGapPESslLpAShashpsssuIAVTYANAYuRASVpDpLCsaSFAuTsAsss.....ssshusushAolFusGNGlPPTsGlsLVNssutG.sPh.shsSlS.usup.DhNl-GALCLRsLhoGsDus............u.tlppGlsplptoGNLpGKPAlIVpGRuDALlPVNHTSRPYlGLNptsEGusS+LSYlEVTNAQHFDAFlG....lPGYssRaVPLHhYhhQALDhMYupLpsGsPLPPSQVVRTlPRGGssu..sAPslosANVPPIssoPuAuDtIslssu....sVsVPD .........................s..huuCtus.psst.....................hNthPu.al..GsVphssY.........DG.........soDDLLTAGLGpoGLuusosPuhAsPspPTAAELRRlAIauNYRALVDhossGGYGphaGP..NVD.h...s.GssTh.G-GhIsGsEYlAauDDGoGppNVollVQlPsoFssspPCIlTATSSGSRGlYGAIus.GEWGLK+GCAVAYsDKGsGsGsH-luossVsLlDGThsstssAGssuhFsAshosu.p.LAAFNuthPNRhAaKHAHSQpNPEpDWGp.TLQAlcFAaasLN..-pa.Gshs.......ss..st.p..hphpssshhsIAuSVSNGGGAulAAAEQDs....p...GhIsuVsVuEPplNl......ssshs.VppGGss..lsshG+PLhDYsThANLhpsCAAhusu..h..ss..uPh.huhh.shsh....tuhtspRCAsLt..AtGLloG..ussssQAs-.....ALtpLcs.hGa.s-SDhlpAshhs.ps.PulAsTYANAYsRupVpDNLCsaSFAsssssss.....ssssssushsshFusGNGlPP.TsGlpLl.Nsu.....tG........uls.phus.DhshsGAhCLRpLhTus...................thultsulstlthsuNLpGKPAIIVpGRuDALlPVNHuSRsYluhNptsEGtt.SpLsahEVsNuQHFDuFls...........lPGaDTRFVPlHhYt.pALshMasaL+s.GsPLPPSQVlRTlPRGGssG..uAPAlosANlPslstsPuA.stIssssu....slsVP............................... 0 4 16 29 +10438 PF10606 GluR_Homer-bdg Homer-binding domain of metabotropic glutamate receptor Coggill P anon Pfam-B_17370 (release 22.0) Domain This is the proline-rich region of metabotropic glutamate receptor proteins that binds Homer-related synaptic proteins. The Homer proteins form a physical tether linking mGluRs with the inositol trisphosphate receptors (IP3R) that appears to be due to the proline-rich "Homer ligand" (PPXXFr). Activation of PI turnover triggers intracellular calcium release [1]. MGluR function is altered in the mouse model of human Fragile X syndrome mental retardation, a disorder caused by loss of function mutations in the Fragile X mental retardation gene Fmr1. Homer 3 (and to a lesser extent Homer 1b/c) has been shown to form a multimeric complex with mGlu1a and the IP3 receptor, indicating that Homers may play a role in the localisation of receptors to their signalling partners [2]. 25.00 25.00 26.50 25.80 18.30 18.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.73 0.72 -4.07 5 118 2009-01-15 18:05:59 2008-04-24 14:35:50 4 4 37 1 60 89 0 50.30 66 5.05 CHANGED ALTPPSPFRD.SVuSGSSsPu.SPVSESlLCsPSNspYsSlIL+DYoQSSSTL ....ALTPPSPFR.D.SVsSGSosPs.SPVSESsL..........C..........h.PssspYsolIlRDYsQSSSoL........ 0 2 8 30 +10439 PF10607 CLTH RanBPM_CRA; CTLH/CRA C-terminal to LisH motif domain Coggill P, Bateman A anon Pfam-B_3765 (release 22.0), UPF0559 Domain RanBPM is a scaffolding protein and is important in regulating cellular function in both the immune system and the nervous system. This domain is at the C-terminus of the proteins and is the binding domain for the CRA motif (for CT11-RanBPM), which is comprised of approximately 100 amino acids at the C terminal of RanBPM. It was found to be important for the interaction of RanBPM with fragile X mental retardation protein (FMRP), but its functional significance has yet to be determined [5]. This region contains CTLH and CRA domains annotated by SMART; however, these may be a single domain, and it is refereed to as a C-terminal to LisH motif [6]. 23.00 23.00 23.00 23.00 22.80 22.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.68 0.71 -4.50 132 1458 2009-01-15 18:05:59 2008-04-24 14:51:03 4 27 290 0 1000 1417 5 169.80 20 40.24 CHANGED hhtphpcIhcsl..ptclppAlpWssc..............................ppspLhch......p...........................ssLc.FcLph.paIELl+................p......................................................................................................................................sp..................................hhcAlpau+c......pl.s...hsp...........................t....hhp-lpphhuLLsats....................................................................pshpp.......................Lhss.pp..............hpplsppFspsh..hphh..shs.......p........ps........................LthhlpsGh.ulpshh ...............................................................................................h.ptpcItctl.tps.p.lppAlph.s..pp..........................................p.s.p..Lhcp..........s.....................................................................................................ssL...FpLpp.p.a.lELl+...............s......................................................................................................................................................................................................................................................................................................................................................................sp...............................................hp..cAlpaupp...........pl..s......hsp...............................phhc-.l.p.c.s.huLL.sa.ssstp.......................................................................sshtp......llss..pp.................hpplssthspsh...h.phh...shs...........p...........ps........................Lthhlptsh.sh....h............................................................................................................................................. 0 321 541 796 +10440 PF10608 MAGUK_N_PEST Polyubiquitination (PEST) N-terminal domain of MAGUK Coggill P anon Pfam-B_18049 (release 22.0) Domain The residues upstream of this domain are the probable palmitoylation sites, particularly two cysteines. The domain has a putative PEST site at the very start that seems to be responsible for poly-ubiquitination [1]. PEST domains are polypeptide sequences enriched in proline (P), glutamic acid (E), serine (S) and threonine (T) that target proteins for rapid destruction. The whole domain, in conjunction with a C-terminal domain of the longer protein, is necessary for dimerisation of the whole protein [2]. 21.60 21.60 23.00 21.80 20.80 21.10 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -10.03 0.72 -3.58 14 339 2009-01-15 18:05:59 2008-04-24 15:02:41 4 33 36 21 110 279 0 70.70 44 10.98 CHANGED p+YRYQD--os.P.E......pSssplsscshssELlclu-K..sluph-Nl.......HGaVsHsHISPh..................................................pAsssPllVNT-oL-sssY...VNGT-s-aEaE ..............................................tYth.spps...P.p.........s.hstt......................................sh..p..hosh...................................................pAsss..PllVNTDoL-sssY.........VNGT-u-hEYE................................................. 0 5 15 41 +10441 PF10609 ParA ParA/MinD ATPase like Mistry J, Wood V anon Pfam-B_177 (release 21.0) Family This family contains ATPases involved in plasmid partitioning [1]. It also contains the cytosolic Fe-S cluster assembling factor NBP35 which is required for biogenesis and export of both ribosomal subunits [2]. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.68 0.72 -4.10 250 4990 2012-10-05 12:31:09 2008-04-24 17:05:32 4 43 4013 3 1687 4768 2898 81.20 48 23.71 CHANGED LDYLllDhPPGTGDhp.LolsQpl..........s..............ls.......Gul.lVTTPQ-l..........AlhDscKulshh..c+hslPllGlVENM......ShahCPp.C.....ucppplFG..pG ..............................LDYLllDhPP..G..T.G..Dl.p.LolsQpl...........P..................................ls....Gu..llVTTPQcl.................AlhDA...cKuls.Mh..c..K...l......p.l..P.l.L.........GlV.E...N.M......S...h..a..hCss..C.....Gcc..ptlFGp........................................... 1 591 1076 1434 +10442 PF10610 Tafi-CsgC Thin aggregative fimbriae synthesis protein FIGFAMs, Mistry J, Coggill P anon FIG009025 (Release 2.0) Family Fimbriae are cell-surface protein polymers, of eg. E coli and Salmonella spp, that mediate interactions important for host and environmental persistence, development of biofilms, motility, colonisation and invasion of cells, and conjugation. Four general assembly pathways for different fimbriae have been proposed, one of which is extracellular nucleation-precipitation (ENP), that differs from the others in that fibre-growth occurs extracellularly. Thin aggregative fimbriae (Tafi) are the only fimbriae dependent on the ENP pathway. Tafi were first identified in Salmonella spp and the controlling operon termed agf; however subsequent isolation of the homologous operon in E coli led to its being called csg. Tafi are known as curli because, in the absence of extracellular polysaccharides, their morphology appears curled; however, when expressed with such polysaccharides their morphology appears as a tangled amorphous matrix. The gene agfC is found to be transcribed at low levels, localised to the periplasm in a mature form, and in combination with AgfE is important for AgfA extracellular assembly, which facilitates the synthesis of Tafi. The genes involved in Tafi production are organised into two adjacent divergently transcribed operons, agfBAC and agfDEFG, both of which are required for biosynthesis and assembly [1]. 25.00 25.00 26.80 26.60 22.20 20.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.53 0.72 -4.21 2 412 2009-01-15 18:05:59 2008-04-24 18:01:26 4 1 407 3 12 89 0 103.40 84 92.46 CHANGED LLLAALSSQITFNTTQQG-lYTIlPpVTLopsClCcVQIhulR-GpuGQSpTpQcpTLSLPANQPIsLT+LSLNISP-DpVKIlVTVSDGQSLHLSQQW.PSsEKS ...................LLLAALSSQITFNTTQQGDlYTIIPEVTLTQSCLCRVQILSLREGSSGQSQTKQEKTLSLPANQPIALTKLSLNISP-DRVKIVVTVSDGQSLHLSQQWPPSSEKS...... 0 1 3 7 +10443 PF10611 DUF2469 Protein of unknown function (DUF2469) FIGFAMs, Mistry J anon FIG004032 (Release 2.0) Family Member proteins often found in Actinomycetes clustered with signal peptidase and/or RNAse-HII. 25.00 25.00 29.00 137.90 20.50 19.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.36 0.72 -3.65 12 430 2009-09-11 15:00:03 2008-04-25 09:17:54 4 1 430 0 109 192 35 101.10 69 98.07 CHANGED MSAEDLEpYEoEMELpLYREYRDVVGlFpYVVETERRFYLsNpV-lpsRoss.G-VYFEVoMsDAWVWDhYRPARFVKsVRVlTFKDVNlEELsKs.-l.-lPc ..MSAEDLEpYEs-MELsLYREY+DVVG.FoYVVETERRFYLANsV-lpsRs.s.s.G-VYFEVphuDAWVWDMYRPuRFVKpVRVlTFKDVNIEE.lsKs.Dl.clPp...... 0 33 80 101 +10444 PF10612 Spore-coat_CotZ Spore coat protein Z FIGFAMs, Mistry J, Coggill P anon FIG014057 (Release 2.0) Family This family has members annotated as Spore coat protein Z, otherwise known as CotZ, It is a cysteine-rich spore coat family, and along with CotY is necessary for assembly of intact exosporium. 19.90 19.90 20.40 21.00 18.30 17.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.82 0.71 -11.46 0.71 -4.34 11 223 2009-01-15 18:05:59 2008-04-25 11:28:31 4 2 119 0 21 136 0 148.80 56 93.76 CHANGED MSCsppcth.......tspNCVs-sVctIc-LQ-us..--..sCsTuCasshLu..sos...shuDThPFlLaTKpGsPFpA...Fu.ssuplsss...Ch.SsFFRVEslcD.sCAsLRVL..........pshststshLchs-ps..........lCps..............hcLcKTshCIpVDLsCFCAIQCLssphl ......................MSCNpNccH.......ss.sCVssVV+FIpELQ-Cu..TT....TCGSGCEl...PF...LG...AHNsA..SVANTRPFILYTK....sG.sPFEA.....FA..PSusL..suCc......SPIFRVESlDD.D..s.C.AVLRVL...................oVVLGDs...osVPPsDDP................ICTFLu.........VPNARLlSTsoCITVDLSCFCAIQCLRDVoI........ 0 4 11 13 +10445 PF10613 Lig_chan-Glu_bd Ligated ion channel L-glutamate- and glycine-binding site Coggill P anon Pfam-B_203 (release 22.0) Domain This region, sometimes called the S1 domain, is the luminal domain just upstream of the first, M1, transmembrane region of transmembrane ion-channel proteins, and it binds L-glutamate and glycine [2]. It is found in association with Lig_chan, Pfam:PF00060. 20.30 20.30 20.30 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.10 0.72 -3.99 82 1132 2012-10-03 15:33:52 2008-04-25 14:02:08 4 36 106 435 539 1967 2 64.60 46 8.17 CHANGED PasM.h+ps.......h...tuspp.aEGaClDLlcclAchl.sFsYcltlV.tDG..pYGshpp..sG.........pWsGhlGELlp ..................................PYVM.h+cs.......tp.h...pGN-R..aE...GYC...lDLlp........E..l......Ac..h......l......G.......F.......p...Yc.l.plV...sDG.............KYGup-s...ss...................pWN.GMVGELl................... 0 155 196 354 +10446 PF10614 CsgF Tafi-CsgF; Type VIII secretion system (T8SS), CsgF protein FIGFAMs, Mistry J, Coggill P, Desvaux M anon FIG077109 (Release 2.0) Family The extracellular nucleation-precipitation (ENP) pathway or Type VIII secretion system (T8SS) in Gram-negative (diderm) bacteria is responsible for the secretion and assembly of prepilins for fimbiae biogenesis, the prototypical curli. Besides the T2SS that can be involved in the assembly of prototypical Type 4 pilus, the T4SS that can be involved in the biogenesis of the prototypical pilus T, the T3SS involved in the assembly of the injectisome and the T7SS involved in the formation of the prototypical Type 1 pilus, the T8SS differs in that fibre-growth occurs extracellularly. The curli, also called thin aggregative fimbriae (Tafi), are the only fimbriae dependent on the T8SS. Tafi were first identified in Salmonella spp and the controlling operon termed agf; however subsequent isolation of the homologous operon in E coli led to its being called csg. In the absence of extracellular polysaccharides Tafi appear curled, although when expressed with such polysaccharides their morphology appears as a tangled amorphous matrix [2]. CsgF is one of three putative curli assembly factors appearing to act as a nucleator protein. Unlike eukaryotic amyloid formation, curli biogenesis is a productive pathway requiring a specific assembly machinery [1]. 25.00 25.00 29.70 29.60 24.30 18.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.69 0.71 -4.19 6 534 2009-01-15 18:05:59 2008-04-25 14:25:13 4 2 527 0 56 165 266 127.60 69 95.73 CHANGED h+hhGl..hAhLlhhtA..ussApAusLVYpPhNPsFGGsPhNuuaLhupANAQNphpcsstsss.............ps.Spu-hFApQLQoQLLSuLusQlspAIFG-ssQcuGshoFsupoloas+sssslslsIsDssTGpsTpIsVP..ss ......................hl...........l.hhhh....SP.LoWAGsMTFQFRNPNFGGNPNNGAFLLNSAQA.QNSYKDPSY.s-DFGI..........ETPSALDNFTQAIQSQILGGLLoNINTG..............K..P.....GR...........MVTNDaIVD.IA.N.R.D.GQ.LQLNVTDRKTGpTSTIpVSGLQs...................... 0 12 22 37 +10447 PF10615 DUF2470 Protein of unknown function (DUF2470) FIGFAMs, Mistry J, Coggill P anon FIG076093 (Release 2.0) Family This family is a putative haem-iron utilisation family, as many members are annotated as being pyridoxamine 5'-phosphate oxidase-related, FMN-binding; however this could not be confirmed. 21.00 21.00 21.00 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.98 0.72 -3.76 50 682 2009-01-15 18:05:59 2008-04-25 15:11:28 4 12 625 9 274 592 273 81.30 25 33.83 CHANGED tuDPlsst...susplspHMNsDHu-AlhtYAptauGl.....sss...ssA.......pMtul-spuhcLpl.............ss.tslclsF-ps.......Lsssp-s+psLVtMs .................................h............tppllpHMNs-Htcsltths.c..p.a..ush...........tps...........pss..........................phhulDhpGlclth...........t...............sp.pslRl.Fsp.........ss.-.ps.l+psllth................................... 1 87 181 241 +10448 PF10616 DUF2471 Protein of unknown function (DUF2471) FIGFAMs, Mistry J, Coggill P anon FIG076041 (Release 2.0) Family The function of this family is unknown. Members all come from Burkholderia spp. Swiss:A2WH83 is annotated as Serine/threonine-protein kinase, but this could not be confirmed. 25.00 25.00 31.20 31.00 22.30 18.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.56 0.71 -3.93 7 81 2009-01-15 18:05:59 2008-04-28 11:07:54 4 1 56 0 27 68 4 122.40 49 95.15 CHANGED spp.slsslpFcpAAc....DLc+IVtpIAtRYlspt........VslTWphLhsIEtEAhuDLGFtSRpDushlshF.R.ushphPtsD..-hlDasp.ossLPAVathshsthEpsu.cA.pp................Ahss ...................p-psLAALpFcsAA+....DLEpIVppIApRYIpQp........VPLTWRLLHAlEAEALADLGFASRHDshlhsLFpRPu-hcaPETD..DsVDFGp.SsALPAVFuFAVuAYEtAA.c.sttp...sst................ 0 1 5 13 +10449 PF10617 DUF2474 Protein of unknown function (DUF2474) FIGfam, Mistry J, Coggill P anon FIG073099 (Release 2.0) Family This family of short proteins has no known function. 24.80 24.80 24.90 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.92 0.72 -4.36 18 344 2009-01-15 18:05:59 2008-04-28 11:30:32 4 1 323 0 76 225 3 39.40 44 83.46 CHANGED ppshW.pRLuWLllIWuuSVhALGlVAhlhRLhMsAAGLpo ......h...hW.+RlsWhlsIWsuSVLsLusVuhLhRllMsuAGhpo..... 0 6 24 51 +10450 PF10618 Tail_tube DUF2473; Phage tail tube protein FIGfams, Mistry J, Bateman A anon FIG030252 (Release 2.0) Family This bacterial family of proteins contains phage tail tube proteins related to the Mu phage protein Swiss:P79679 [1]. Bacteriophage Mu has an eicosahedral head and contractile tail. The tail is composed of an outer sheath and an inner tube. 21.60 21.60 21.90 22.00 21.40 21.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.01 0.71 -4.37 24 282 2009-01-15 18:05:59 2008-04-28 12:44:06 4 1 261 0 33 148 1 116.70 36 98.55 CHANGED Mut..plsGsshl+lsGt.plpspsu.sphs.ushpREslhG..stlhGapEp.p.sshlcsslhsstshs..ltclsshsshTlohEhsNGpsYllssAahs-s..sspsc-GplslpacGppsph. .............u..+luGTsal+lDGp.pLslsG.G.hchshsshtR-slhGh.sushsa.KEo.+.APalcsohpsstshs..lscIs.stsshTIs.sE.huN..GpsYlhosAWhtsps..shss--GTl-.lcFcGppush......... 0 6 14 25 +10452 PF10620 MdcG Phosphoribosyl-dephospho-CoA transferase MdcG FIGfams, Mistry J anon FIG003211 (Release 2.0) Family MdcG is a phosphoribosyl-dephospho-CoA transferase that is involved in the biosynthesis of the prosthetic group of malonate decarboxylase [1]. Malonate decarboxylase from Klebsiella pneumoniae contains an acyl carrier protein (MdcC) to which a 2'-(5' '-phosphoribosyl)-3'-dephospho-CoA prosthetic group is attached via phosphodiester linkage. MdcG catalyses the following reaction: 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA + apo-[acyl-carrier-protein] = holo-[acyl-carrier-protein] + diphosphate. 26.10 26.10 26.40 45.10 22.10 25.60 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.39 0.70 -4.80 47 349 2012-10-02 22:47:23 2008-04-28 15:00:13 4 2 315 0 97 355 6 191.60 29 95.30 CHANGED M.....................p.HDLlalpssu....hh.......ssss.shltshh.....tsuhPlVVRRs.s....sssGtlslGlth.....tspcp..RhuhhlssssltcshsP.sLsph.......hst..tshsh.htslps.L.s.s.ssh....shshtlhGSsuap......................hhTGlshlpssSDLDLllps.sssh...phpplhphLt...ppts.hRlDsplphPsG.uVuh+Ehtssss.....pVLlKstcGspLhsps..a ......................................t.HcLlhhts.s....hh...................s.st.taltthh.....ttuhPllVRRs.s....sssstlslGlps.....ttppp..RhAh.hshssltphhsP.sLsph.........tt...tsh.sh..h.slpt..ltshh.pth....shshGlhGShuap......................hsTGlshlpss....SDLDLL.lps.spsh........phtthlphl...tt.h.tphs.hRlDspl.hs.p.G.uhuh+Eahtsst.............plLlKospGspLhtss.................. 0 13 40 66 +10453 PF10621 FpoO zf-Fe2-S2-FpoO; F420H2 dehydrogenase subunit FpoO FIGfam, Mistry J, Coggill P anon FIG062058 (Release 2.0) Family This is the FpoO subunit of F420H2 dehydrogenase, an enzyme which oxidises reduced coenzyme F420. Reduced coenzyme F420 is a universal electron carrier in methanogens. 25.00 25.00 45.10 45.00 20.20 22.50 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.66 0.71 -4.03 3 11 2009-09-11 06:40:42 2008-04-28 16:08:27 4 1 10 0 9 10 0 103.40 43 94.51 CHANGED MTDCDLCG+AIPTVIPVRVFRPRLKFAYPEGVWKGLCEuCLDSAQKTYLElNKcpsSCR+GKCsLCGcKTpVaPVElQVPDFSKGlVlKcVclChKCL-ulsEoYIRaKKEQIE...CEHGH .............MsDCDLCGhulPTlhPVRVhtPhhchuYPEGVWKGLC-sCL-SApcTY.Eh..scspsus....ppGKCsLCGs+stlasVElplPsFpps...cshplChpCLc.spEsa...h+-.................... 0 1 6 7 +10454 PF10622 Ehbp Energy-converting hydrogenase B subunit P (EhbP) FIGfams, Mistry J anon FIG124174 (Release 2.0) Family Ehb (energy-converting hydrogenase B) is an methanogenic archaeal enzyme that functions in one of the metabolic pathways involved in methanol reduction to methane. This family contains subunit P of Ehb. 19.50 19.50 20.60 118.80 18.70 19.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.70 0.72 -4.03 6 26 2009-01-15 18:05:59 2008-04-28 16:16:51 4 1 26 0 17 23 5 84.10 58 91.35 CHANGED LhP+hhhuLGGYIhET...........pFPaRNlIVsNPTsEPIKIEVPsaD-pWIEcH+cLGLhVVPVs-DDDFluha+MsccKlc+u .LLPKhsMSLGGYIRETh.......t..pFPYRNlIVGNPTsEPIKI-VPsYDEsWIE+H+cLGLIVVPVpc-DDFVGlF+hVcpKlcc.... 1 3 7 13 +10455 PF10623 PilI Plasmid conjugative transfer protein PilI FIGfams, Mistry J anon FIG136100 (Release 2.0) Family The thin pilus of plasmid R64 belongs to the type IV family and is required for liquid matings. pilI is one of 14 genes that have been identified as being involved in biogenesis of the R64 thin pilus [1]. 19.70 19.70 19.80 26.20 19.60 18.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.83 0.72 -4.05 2 72 2009-01-15 18:05:59 2008-04-28 17:04:57 4 1 62 0 0 21 0 80.10 63 92.57 CHANGED Mstp...+hplllhsspCc++l..h.sspD.s.hhh.FpTsDNsLllVhpssRhlhRhERhPGSpCcW+cso.u.+p+.ps.L ......MPQQHPGRLQlLVVDsHCKR+LFSTKTsTDPDELARRFCTPDNCLVVVLpsNRFLFRLERAPGSHCR..W+KGSpSRHQHLQDWL....... 0 0 0 0 +10456 PF10624 TraS Plasmid conjugative transfer entry exclusion protein TraS FIGfams, Mistry J anon FIG105028 (Release 2.0) Family Entry exclusion (Eex) is a process which prevents redundant transfer of DNA between donor cells. TraS is a protein involved in Eex. It blocks redundant conjugative DNA synthesis and transport between donor cells, and it is suggested that TraS interferes with a signalling pathway that is required to trigger DNA transfer [1]. TraS on the recipient cell is known to form an interaction with TraG on the donor cell [1]. 25.00 25.00 42.70 31.60 23.80 23.80 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.24 0.71 -4.68 2 81 2009-01-15 18:05:59 2008-04-29 14:53:22 4 1 66 0 1 44 0 147.20 75 98.09 CHANGED MKNLApIsLVTVlQFIACYLA-WGsAETshILhFllLWQGLFIWLF.QIRKKpplSDEFKFSKGlWYllMPVsSLLSPLLSLMlFIhGTLYELRRlSGClSl+EWhpsQls-QhsEDhtLDF-sVpac.ssaY..NPuTGhsMHGGhDSAGNTFGopWQD.sDR ...................M+sLsHIsLVTVIQFIACYLAsWGsAETlFhLhFIVLWQGLFIWLFSQI..RKKRsVSDEFKFSKGVWYIhMPVSSLLSPLLSLMVFIIGTLYELRRVSGClSl+EWMQSQVN.EQsNEDLHLDFDslp...sDFYRTNPATGLPMHG.GhDSAGNTFGosWQDYsDR..................... 0 0 0 1 +10457 PF10625 UspB Universal stress protein B (UspB) FIGfams, Mistry J anon FIG002192 (Release 2.0) Family UspB in Escherichia coli is a 14kDa protein which is predicted to be an integral membrane protein. Overexpression of UspB results in cell death in stationary phase, and mutants of uspB are sensitive to ethanol exposure during stationary phase [1]. 25.00 25.00 61.00 60.80 18.10 17.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.47 0.72 -4.27 8 648 2009-01-15 18:05:59 2008-04-29 16:18:06 4 1 646 0 50 149 0 97.70 80 96.31 CHANGED MISGDsILhALhlVTslNhARYlSoLRsLLalMR-ucPLLYQQVDGRGFFTTHGNhoKQlRLapYI+opEYLcHHDPlFstKC-RVRcLFILoSuLlulsllshFhl .MISs.slhaALhlVshlNMARYFSSLRALLVVLRsCDPLLYQYVD.GGGFFTSHGQPNKQVRLVWYIYAQRYRDHHD-EFIRRCERVRRQFILTSALCGLVVVSLIAL..... 0 2 11 30 +10458 PF10626 TraO Conjugative transposon protein TraO FIGfams, Mistry J anon FIG055244 (Release 2.0) Family This is a family of conjugative transposon proteins. 25.00 25.00 26.10 25.90 20.20 22.20 hmmbuild --amino -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.94 0.71 -4.86 11 300 2009-01-15 18:05:59 2008-04-29 16:58:48 4 1 111 0 28 251 2 162.60 48 87.16 CHANGED LPGMKGlplTuGM..sDGh+h.pscs-hGYtFGlAhoTYsKssN+WVhGuEYLpKpaPY+shpIPluQFTuEGGYYashLSDssKshFlslGhSALuGYEoVNhGcpLL.DGusLpcpDuFIYGGAlTLEhEsYLoDRllLLlpsRERsLaGoDsG+FHsQaGlGlKhIl ................LPt.+GlElpuuh..s-Ghp...ststhsYthGhAlosYsKpuNpWlhGuEYlp+phsY.....+s.......hpI..PltQFTuEGGYaaplLSDspKslFlhhGhSALAGYEol.NhGcKlL.DG..uTLtc+DsFlYGGAlTL-lEsYLoDRlsLLhpsRERhLaGuDst+FHsQhGlGlKFhl................................ 0 7 24 28 +10459 PF10627 CsgE Curli assembly protein CsgE FIGfams, Mistry J anon FIG091002 (Release 2.0) Family Curli are a class highly aggregated surface fibres that are part of a complex extracellular matrix. They promote biofilm formation in addition to other activities. CsgE is a non-structural protein involved in curli biogenesis [2].\ CsgE forms an outer membrane complex with the curli assembly proteins CsgG and CsgF [1]. 25.00 25.00 35.50 27.90 24.70 24.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.06 0.72 -4.23 20 509 2009-01-15 18:05:59 2008-04-29 17:28:52 4 1 505 0 44 135 11 103.40 76 77.48 CHANGED ElsGLllDRTloRhG+DFYhhFSsta+-ls..stshNLolcEpssupuGShlsVphspcsIYpTalu.pppslccpAppAlthVhptlsphphpt.....sosDLu..pDEh ........EVPGLLTDHTVSSIGHDFYRA..FSDKWES-Y....TGNLTINERPSARWG.S.WITITVNQDVIFQTFLFPhKRDFEKTVVFALlQTEEALNRRQIsQuLL..STuDLA+DEF............ 0 8 16 31 +10460 PF10628 CotE Outer spore coat protein E (CotE) FIGfams, Mistry J anon FIG006437 (Release 2.0) Family CotE is a morphogenic protein that is required for the assembly of the outer coat of the endospore [1] and spore resistance to lysozyme [2]. CotE also regulates the expression of cotA, cotB, cotC and other genes encoding spore outer coat proteins [1]. The timing of cotE expression has been shown in Bacillus subtilis to affect spore coat morphology but not lysozyme resistance [3]. 25.00 25.00 28.60 78.40 18.10 17.40 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.22 0.71 -4.91 9 180 2009-09-11 05:35:09 2008-04-30 14:11:34 4 1 178 0 35 94 1 178.80 59 98.44 CHANGED MuE......YREIITKAVVuKGRKFTpuoHTIoPsc+PoSILGCWIINHpYcA.+KsGKsVElpGpYDINVWYSYscNTKT-VVTEpVpYsDllpLpYRDcssl.sD-h-VlA+VlQQPNCLEAsISssGsKIhVpVEREFlVEVIGETKVsVuVNPpspp-D.s....hph-hpD-EhE-lsPsFLtsppEE .....MSEaREIIT.KAVVGKGR.K.aTp..STHTspsssc..P.T.SILGCWlINHpY......EA......+Ks......GKpVEI-GaYDVNsWYSacsNTKTEVVTERVsYsDpVslsYRDc.N.ap.uDDhElIARVIQ.PNCLEAhlSPNGNK..IVVpVEREFlsEVVGETKlCVuV..N..P-..Gss.-.sD-........-...apl..t.D..-EaE-LDPsFlV-s.E............. 0 13 25 27 +10461 PF10629 DUF2475 Protein of unknown function (DUF2475) Coggill P anon SWISS-PROT(UPF0573) Family This family of proteins has no known function. 21.60 21.60 21.80 21.70 21.00 21.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.44 0.72 -3.73 19 290 2009-01-15 18:05:59 2008-04-30 14:19:30 4 7 77 0 189 287 0 56.90 28 33.47 CHANGED sPthlPGYpGasPthtaphGp..TYGpsTtphhps.pstt.tp.h.............h.p..t.ssphhlspR ......thlPG...YsGasPth..+..ap..hGp..TYGpsTtphh..tt......t...................................t..................................................... 0 73 91 134 +10462 PF10630 DUF2476 Protein of unknown function (DUF2476) Coggill P anon SWISS-PROT(UPF0572) Family This is a family of proteins of unknown function.\ \ \ The family is rich in proline residues. 21.70 21.70 21.70 35.10 20.40 21.50 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.88 0.70 -4.51 3 61 2009-01-15 18:05:59 2008-04-30 14:48:06 4 3 21 0 35 65 0 234.80 48 96.78 CHANGED M.GoRPsSPSADLAPhWu.QssGPoPAKRsRL.pEPAsPEsLAQPu.EDPA.....-shTShVhLsAGsALclPL-sVDLlLElpPhSVLpVSLQGHTlI..llPEsLtSSV.-hsht.tcuQPGh.s.s..GAtupD......VslppEoFCAsV.EhhhpE-ss-EDADsEFsEhWMsuPDDpAsGLasSusSl.SPhp-GQVPGPso.ussPuAEppSPRFIW-L-hsMLcPlPuSPLQPLPPSPSPNPQEQs.....LPsRPPC......KARRRL ...................................M.GoRPRSPSAp.sAPhWu.QPuGPu.PAKRhRL..cEP..AssEsh..ssPshEs..Pss..ssssLTS..lVVLuuGCALplsL-.-VDLVLEPtPTSlLpVSLsG..H..TLI..LlPEsLLuSs...tts.spssussGLEss.shLuA..tc-......Vllppt.FCuuVPElAsQE-A..h.-EDA..-spF.p.hhsussGpAuGL..asS..s..p..uh.uP..h.pu.hst.Pps...sssPusEppSPp.sha-L-h+LLEPhPoSPLQPLPPSP...S..PG.Pptcs...........hPt.pP......P...C......KARRRL....................................................................... 0 4 4 15 +10463 PF10631 DUF2477 Protein of unknown function (DUF2477) Coggill P anon SWISS-PROT(UPF0574) Family This is a family of proteins with no known function. The family is rich in proline residues. 20.60 20.60 20.80 87.60 19.70 19.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.15 0.71 -3.96 3 22 2009-01-15 18:05:59 2008-04-30 17:09:40 4 1 20 0 12 26 0 135.90 75 92.45 CHANGED MDPHEMVVKNPYAHISIPRAHLRPDLGQQLEsuPosSSSSEoQPLPVGoCsPEPltLLQsTEAPGPKGsKG.pGsAP.psQQAWQQPCNPYSSGQRPAGLTYAGPPPAGRGDDIAHHCCCCPCCSCCHCPRFCRCHSCCCl MDPpEMVVKNPYAHISIPRAHLRPDLGQQLEsAss..oSSsEhQPLPsGsCssEPT+LLQPT.EsP.GPKG...s...KGspGAsPhQsQQAWQQPGNPYuSuQRPAGLT...YAGhPPlGRGDDIAHHCCCCPCCpCCHCP.FCRCHSCCCl. 0 1 1 1 +10464 PF10632 He_PIG_assoc He_PIG associated, NEW1 domain of bacterial glycohydrolase Naumoff D, Coggill P anon Pfam-B_97991 (release 22.0) Domain The English-language version of the first reference can be found on pages 388-399 of the above. This domain has been named NEW1 but its actual function is not known. It is found on proteins which are bacterial galactosidases [1]. The domain is associated with the He_PIG family, Pfam:PF05345, a putative Ig-containing domain. 25.00 25.00 25.90 25.10 22.70 21.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.56 0.72 -7.11 0.72 -4.62 4 47 2009-09-11 10:36:46 2008-05-06 14:04:35 4 5 32 0 17 47 0 28.80 58 4.97 CHANGED PpINsP+slGNYPuoPFLFYIPTSGpRPM P+INuPpVhGspPuoPFLahIPsoGpRPM 0 11 14 17 +10465 PF10633 NPCBM_assoc NPCBM-assoc; NPCBM-associated, NEW3 domain of alpha-galactosidase Naumoff D, Coggill P anon Pfam-B_97993 (release 22.0) Domain The English-language version of the first reference can be found on pages 388-399 of the above. This domain has been named NEW3 but its actual function is not known. It is found on proteins which are bacterial galactosidases [1]. The domain is associated with the NPCBM family, Pfam:PF08305, a novel putative carbohydrate binding module found at the N-terminus of glycosyl hydrolases. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.62 0.72 -3.93 29 604 2012-10-03 16:25:20 2008-05-06 14:23:52 4 85 329 11 233 617 92 78.10 22 12.53 CHANGED spsGcpsslphshsssu.sssspslplslss.PsGWs...stssssphs...slssGpslpsshpVpsPssAsuGs....Yslpspsphs ..............................sGps.hshp.lsl.p.N.ps..ss......s..h..p..s...h..sls.h......s......s...P.sG.......Ws.........s....s..s...s....p.hs......sl.s............s..............G..........p.s.........ts....sshslss..Psss...sGs......Y..lshpst..s............................ 0 110 180 223 +10466 PF10634 Iron_transport Fe2+ transport protein FIGfams, Mistry J anon FIG005173 (Release 2.0) Family This is a bacterial family of periplasmic proteins that are thought to function in high-affinity Fe2+ transport. 20.50 20.50 20.50 21.20 20.10 19.70 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.86 0.71 -4.32 30 560 2009-01-15 18:05:59 2008-05-06 17:09:52 4 1 550 30 101 300 159 153.60 53 80.44 CHANGED AtAtEhPIGc...Ptph..sGMEluAVYLQPl-MEPp.Gh.....hssscoDlHLEADIHAsc..sNsNGFucG.-.WlPYLslsYpLpchsssc.spcGshMP.MVAsDGPHYGsNl.KL.......tGsGpY+lpapIpPPup......ttFGRHsDKETGV.........us..WFcPaslpY.-FpasG ...............................................................htEhPIGc.......Ppph..ssMEIAAVYLQPI-MEPc..Gh.....thsAucuDlHLEADIHAsc..sN.sGFu-G-..WhPYLs.lsYcLpppDoG......p.....h......pcGohMP.MV.A.s.DGPHYGsNl.Kh........GsGpYcLsahIpsPpp......tuatRHlDcET.G.V...........Gt....Wac..P..hslcY.-FpYsG................................. 0 26 56 85 +10467 PF10635 DisA-linker DisA bacterial checkpoint controller linker region Murzin A, Coggill P anon Murzin A Domain The DisA protein is a bacterial checkpoint protein that dimerises into an octameric complex. The protein consists of three distinct domains. the first, N-terminal region, from 1-145 is globular and is represented by family DisA_N, Pfam:PF02457; the next 146-289 residues is this domain that consists of an elongated bundle of three alpha helices (alpha-6, alpha-10, and alpha-11), one side of which carries an additional three helices (alpha7-9), thus forming a spine like-linker between domains 1 and 3. The C-terminal residues of domain 3 are family HHH, Pfam:PF00633, the specific DNA-binding domain. The octameric complex thus has structurally linked nucleotide-binding and DNA-binding HhH domains and the nucleotide-binding domains are bound to a cyclic di-adenosine phosphate such that DisA is a specific di-adenylate cyclase. The di-adenylate cyclase activity is strongly suppressed by binding to branched DNA, but not to duplex or single-stranded DNA, suggesting a role for DisA as a monitor of the presence of stalled replication forks or recombination intermediates via DNA structure-modulated c-di-AMP synthesis [1]. 25.00 25.00 26.20 74.70 23.60 24.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.81 0.71 -4.70 39 608 2009-01-15 18:05:59 2008-05-06 17:13:32 4 5 605 8 154 378 117 145.50 42 40.47 CHANGED LcDsusILs+ANQAlpTLEKY+ssLDcslssLssLEhEDlVTltDVspslQRhEMVhRIspEIcpYllELGsEGRLlshQLcELlss.l-p-thLlI+DYs.....pcshshpplhcplpslssp-Ll-hsslu+lLGYssssshlDs.l ....Lc-susILo+ANQAltTLE+Y+shLc-shssLotLEhEDhVTlpDVssVlQRlEMVhRIssEIptYlhELGsEGRLlpLQLsELlss.l-s-ttLll+DYh..................ppstshppsLppLppLoss-LL-hstlu.+hlGYssssph.Dp............. 0 67 124 144 +10468 PF10636 hemP Hemin uptake protein hemP FIGfams, Mistry J anon FIG024330 (Release 2.0) Family This is a bacterial family of proteins that are involved in the uptake of the iron source hemin [1]. 25.00 25.00 30.90 30.40 24.10 16.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.53 0.72 -4.36 56 837 2009-01-15 18:05:59 2008-05-06 17:34:36 4 1 806 3 146 406 26 38.00 57 57.52 CHANGED stlsocsLhp.GspplhIpHsGphYpLRlT+sGKLILTK .......RplsSpsLLG.scu+llI-HsGQcYh..LRpTpAGKLlLTK... 0 25 79 112 +10469 PF10637 Ofd1_CTDD Oxoglutarate and iron-dependent oxygenase degradation C-term Wood V, Coggill P anon Pfam-B_18095 (release 22.0) Domain Ofd1 is a prolyl 4-hydroxylase-like 2-oxoglutarate-Fe(II) dioxygenase that accelerates the degradation of Sre1N in the presence of oxygen. The domain is conserved from yeasts to humans. Yeast Sre1 is the orthologue of mammalian sterol regulatory element binding protein (SREBP), and it responds to changes in oxygen-dependent sterol synthesis as an indirect measure of oxygen availability. However, unlike the prolyl 4-hydroxylases that regulate mammalian hypoxia-inducible factor, Ofd1 uses multiple domains to regulate Sre1N degradation by oxygen; the Ofd1 N-terminal dioxygenase domain is required for oxygen sensing and this Ofd1 C-terminal domain accelerates Sre1N degradation in yeasts [1]. 20.70 20.70 21.50 20.70 20.30 20.60 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.94 0.70 -5.26 39 253 2012-10-10 13:59:34 2008-05-07 10:58:38 4 9 224 4 187 298 18 273.60 28 46.78 CHANGED s-lsaLhpYlsPpYLss-sh-clpcpFt-cSslpLccFLpschuspl+phlctp-pc...phPtpup-lpt..s...................WpsAtPPHKpRYhahcsps........t..pt..t...pEh.................................Lsslh.S.uF+KWLshlTul.pl..................................................sSpp..llsRRFR.G.DYTLAostc..........ttc.....stLEsoLsLT..Po..................................................sW-ss...............................................................EhGGYElYM...................ss-----...............................tDsAlY+u...................................................................s-pDDulLhopssuWNsLslVLRDpGlL+FVKYVStsAKGsRWDlospasVc..-p-.p-cpp ..............................................................................t.-hphLhcalsPsYLsschhpplpcpFp-p..SplpLppFLppchhpplpphlcpp-hp.....................................................WpsttPPpK++Yhhhptpp............................h.p.................................................h...h.hpLhpSpuFhphLu.hTuL.pl..................................................................................................................ssp.p...h.hRRa...+pG..cYTLssstp............................ttcstL-.hsLsls..ss..................................................uWps........................................................................................................-hGGaphYh....................spt.tt.......................................................t.th.pt..............................................................................................scs-Ds.Lhohsss.NpLslVhR.D.pusL+FVKYluppu.s....sp............................................................................ 0 65 99 153 +10470 PF10638 Sfi1_C Spindle body associated protein C-terminus Wood V, Coggill P anon Wood V Domain This C-terminal domain of spindle-body-associated protein Sfi1 has an important role to play in the bridge-splitting during bi-polar spindle assembly, and this separation event possibly requires interaction with integral components of the nuclear envelope, such as the Mps2-Bbp1 complex [1]. Centrally to this domain is a region carrying centrin-binding repeats with repeating units containing tryptophan, family Sfi1_central, Pfam:PF08457. 20.80 20.80 22.60 63.20 18.80 17.10 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.54 0.72 -3.62 5 21 2009-01-15 18:05:59 2008-05-07 13:35:04 4 2 21 0 13 21 0 108.00 46 11.30 CHANGED +shslsc-LpTPhKoPs.ttusTIPGSERVK+a+MEslKs.RYSRARRA..IPSPIKSSsVLDSTlK++Ls.........ssstls-csTTststph.lccpsK+luSKV+cIcFs+IPp .......hshpp-LcTPlR..oss..psusTIPGSERlKpaRMEAMKS.HYSRARRA..IPSPlKSSSlLDSTAKKpIs.........hcss.sslssssopshPhh...c+ps+NhsSKlc+IcFsRIPt.............................. 0 1 6 12 +10471 PF10639 UPF0546 Uncharacterised protein family UPF0546 Coggill PC anon UPF0546 Family This family of proteins has no known function. Many members are annotated as potential transmembrane proteins. 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.52 0.71 -4.26 16 235 2012-10-02 19:55:49 2008-05-08 09:31:42 4 5 195 0 151 384 13 111.00 33 75.97 CHANGED LLlVullWGsTNPhl++uStGhppspps.......hhh-h+a.........LhpphcYhlPhllNQoGSVlFahsLscs-LSluVPlsNSLsFlhTllsu.hlLtEchhst+ohlGhhLVlsGssLCs ........................................................................hlhVuhhW.G.hT.sP..hl.++uu....ts.hpthpp...........h...p..hhtchth...........................................Lhhs.pYhl.PhllN......sG.S.lh.a.a..h..h...Lup.s............-LSLuVPlsN..SLuF.lFTllsu.hhltccl.h.u.p.p.s.hhGhhLhlhGlslC................... 0 44 77 117 +10472 PF10640 Pox_ATPase-GT mRNA capping enzyme N-terminal, ATPase and guanylyltransferase Coggill P anon Karlin D Domain This domain is the N-terminus of the large subunit viral mRNA capping enzyme, and carries both the ATPase and the guanylyltransferase activities of the enzyme. The guanylyltransferase enzymatic region runs from residues 242 (leucine)-273(arginine) [1], the core of the acitve site being the lysine residue at 260 [2]. The ATPase activity is at the very N-terminal part of the domain [3]. 25.00 25.00 324.10 324.10 20.70 20.00 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.90 0.70 -5.64 14 75 2009-09-10 16:44:40 2008-05-09 14:14:22 4 1 47 0 0 67 0 314.60 67 37.27 CHANGED hspssssc-lpHEVELlalpPPLlTLoNlh..sluoppESYIhFolss.tccpsKlRs+lPhS+lHGLDlKNsQLV-slDsIlWE+KoLlpEpc..l.c.ptsllRaSTEE+alFlDY.....K+ahSuI+LELVNllps+lKsll.VDFKlKYFLGSGAQuK..SSLLasLNH...PKs+PssoLEFEIlsp.s....pplspstLhsELpslh+tlF.MusscslhL.s.sthcsPl+ThMLKKQ-l.sl-L-sLYlToKTDGVhshVplsppulaCaFoHLsYhI+Yshp+pl-splhL..aGEAlK..tsphhslalIKlhpPt.........lsDRlcEtcalp-pL .l-.QsSTAYEINNELELVFIKPPLITLTNVV..NISTIQESFIRFTVTN..KEGVKIRTKIPLSKVHGLDVKNVQLVDAIDNIVWEKKSLVTEsR..L.H.KECLLRLSTEERHIFLDY.....KKYGSSIRLELVNLIQAKTKNFT.IDFKLKYFLGSGAQSK..SSLLHAINH...PKSRPNTSLEIEFTPR-s....EpVPYDELIKELTTLuRHIF.MASPENVIL.S.PPINsPIKTFMLPKQDIVGLDLENLYAVTKTDGIPlTIRVTScGlYCYFTHLGYIIRYPlKRhIDo.EVVl..FGEAVK...DKsWTVYLIKLIEPV.........IsDRLEEScYVESKL. 0 0 0 0 +10474 PF10642 Tom5 Mitochondrial import receptor subunit or translocase Wood V, Coggill P anon Pfam-B_89651 (release 22.0) Family This protein family is very short and is only found in yeasts. Tom5 is one of three very small translocases of the mitochondrial outer membrane. Tom5 links mitochondrial preprotein receptors to the general import pore [1]. Although Tom5 has allegedly been identified in vertebrates this could not be confirmed. 20.20 20.20 21.40 21.40 19.70 19.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.20 0.72 -4.32 11 73 2009-01-15 18:05:59 2008-05-09 16:07:55 4 1 72 0 57 61 0 48.30 39 89.28 CHANGED MFGs.stsQ.ScEEl+tpEcpAstTlppAshuAshLa..lSPhlhchl..pK..h ...MFGs.s..p.ScEEl+AtEtpAstTlppssssushLY......LSPhslchVp+..h..... 1 12 29 49 +10475 PF10643 Cytochrome-c551 Photosystem P840 reaction-centre cytochrome c-551 FIGFam, Mistry J, Coggill P anon FIG055090 (Release 2.0) Family A photosynthetic reaction-centre complex is found in certain green sulphur bacteria such as Chlorobium vibrioforme which are anaerobic photo-auto-trophic organisms. The primary electron donor is P840, a probable B-Chl a dimer, and the primary electron acceptor is a B-Chl monomer. Also on the donor side c-type cytochromes are known to function as electron donors to photo-oxidised P840. This family is thus the secondary endogenous donor of the photosynthetic reaction-centre complex and is a membrane-bound cytochrome containing a single haem group. 26.30 26.30 26.50 26.60 26.00 25.70 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.61 0.70 -5.06 7 19 2009-01-15 18:05:59 2008-05-09 16:46:09 4 4 15 2 16 20 1 172.00 39 84.60 CHANGED MDNKSNGKLIALAlGGAVLMGALFFGVSFLTGYplPAENlSslLTPL+SFhGWFLLIFhASLIIhGLGKMSS+ISDKWFLSFPLoIhsIVAlMFs.L...W.+.........GRTTTlDGcYIRoVspLcAFL...................scPAsussVPsA.............PA.......................GhDFsAAKcLhss+CNKCHolsSVtDtL+.KY+KpGps-hlVhcMpuhPsSGIoccDsssIh.alsE...KY ...............................D.ppphpLhulAhhGAhhMGsLhahlShLTGa..PA.NhS.hLsPLpSFhGWhhLIhhuSlhIhshG+MSutISspWFlShPlshhhIVhlMFh.L...W.+.........G.RTTh.-Gp.IRoVtpLpta....................ppsshstphs.s..st................................s.shsuAcpLhst+CNc.CHTlcoVt-th+.+YcKpGpl-hlVp+MpshPsSsIoccDshpIh.YLpppa......... 0 1 7 13 +10476 PF10644 Misat_Tub_SegII Misat_Myo_SegII; Misato Segment II tubulin-like domain Wood V, Coggill P anon Pfam-B_7826 (release 21.0) Domain The misato protein contains three distinct, conserved domains, segments I, II and III. Segments I and III are common to Tubulins Pfam:PF00091, but segment II aligns with myosin heavy chain sequences from D. melanogaster (PIR C35815), rabbit (SP P04460), and human (PIR S12458). Segment II of misato is a major contributor to its greater length compared with the various tubulins. The most significant sequence similarities to this 54-amino acid region are from a motif found in the heavy chains of myosins from different organisms. A comparison of segment II with the vertebrate myosin heavy chains reveals that it is homologous to a myosin peptide in the hinge region linking the S2 and LMM domains. Segment II also contains heptad repeats which are characteristic of the myosin tail alpha-helical coiled-coils [1]. This myosin-like homology may be due only to the fact that both myosin and Misato carry coiled-coils, which appear similar but are not necessarily homologous (Wood V, personal communication). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.64 0.71 -4.03 51 270 2012-10-03 12:11:42 2008-05-09 17:02:44 4 8 225 0 187 2002 14 111.20 33 21.93 CHANGED +EIlTlQlGpauNalGoHFWNhQEuha..sYsspsp.......stl.spDVlaRp...Ghs..h.psphTaTPRlllhDLKGuhGoLpptut.LYp......................ptsp.tsssssWs.......s...phshpc.ps...htpspapps.L-p .................+EllTlQlGph.uNal.usHaW.N.......h........Q.....-uhh.............shssppp..................s.l...sp.D..V.haRs.........Ghs......h.pGptTYTPRhllhDLK.G.u.h.G...o...Lpp...ss...LYp..........................tpt..sss.hhWp.............u...phshpp..p..hsps.Y..psLt.t.................................................................................. 0 53 98 150 +10477 PF10645 Carb_bind Carbohydrate binding Wood V, Mistry J anon Pfam-B_63360 (release 22.0) Domain This is a carbohydrate binding domain which has been shown in Schizosaccharomyces pombe to be required for septum localisation [1]. 25.00 25.00 26.80 39.00 21.60 21.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.09 0.72 -4.15 5 22 2009-01-15 18:05:59 2008-05-12 12:10:23 4 4 14 0 20 21 0 52.20 46 19.55 CHANGED uoCGuApYDuupYVC.DsshLCPIlsGsPLpsCNGACYssShYuCoNGuLu.l t.pCGsu.YDPupYlC....ssphLCPIssG.shphCsGACYsthhYsCsNssLs..... 0 8 15 19 +10478 PF10646 Germane GerMN; Sporulation and spore germination Rigden D, Coggill P anon Rigden D Domain The GerMN domain is a region of approximately 100 residues that is found, duplicated, in the Bacillus GerM protein and is implicated in both sporulation and spore germination. The domain is found in a number of different bacterial species both alone and in association with other domains such as Amidase_3 Pfam:PF01520, Gmad1 and Gmad2. It is predicted to have a novel alpha-beta fold. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.35 0.71 -10.70 0.71 -3.93 160 1597 2009-01-15 18:05:59 2008-05-12 17:01:59 4 13 985 0 421 1297 99 116.80 19 37.80 CHANGED hslYa........hs...sssth..........Llspsphh...............shhptslppLl.pG.P.............hhsslPssspl...........h.slplp.....s.hshlshop.phhp....ssss.cph....h....ltsllhTLs.phs.t......lppVpl.hl-Gcshphht ..................................................................................h.plYa........hs.......pss.ph.............llspsptl.................sssps.hhptslptLl.pG....Ptst..............htssls..ss.spl............................h..shslp........ts.hspls.hsp..phhp.......hssp.tpph........h.ltpllh.TLs.phs.s.........lppVpl.hlsGp.h....h....................................... 0 178 338 387 +10479 PF10647 Gmad1 Lipoprotein LpqB beta-propeller domain Coggill P anon Rigden D Domain The Gmad1 domain is found associated with the GerMN family, Pfam:PF10646, in bacterial spore formation. It is predicted to have a beta-propeller fold and to have a passive binding role rather than a catalytic function owing to the low number of conserved hydrophilic residues. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.71 0.70 -4.99 26 420 2012-10-05 17:30:43 2008-05-12 17:22:56 4 16 404 0 114 623 108 247.70 24 43.11 CHANGED sGul................sp....hsuspssslsGshu..pst.shsSsAlStsuppsA.............uVs.......sssssLhlGshGussspsh....sup.....slocPSashs.sslWsVss.usssscllh......sss......Gpss........................stV-ssshs..........slsslplSRDGsRsAhll....s..uplhluslp+sssG...thtlssPhclsss.tpsssuluWtssssllVhspsssssV.h...VslDGutsssh.susshssslhusuussst.....lhsssssulhp.....spssphWpplsshhss....PshP ......................................................................................................Gthht......hss.tphp.h.s.GshG......pss.s.susAlut..s..s..p.....hsA................................uVs.............sstps..Lh.s..s.s.h...G..u.p.s.hpsh...............sGt......sL.s.+.P..oa.......s......h.......s...ss.........lW..sss-..sssslplht.......................ss..............Gpht...............................................phsV-.h.suhst.............tlsslpl....S.DGsRsAhll.......s....sp..l.hl...uhlp+spsu.......phhhshsh.phu..s....hs...slssluWhss.s.p..Ls.Vh..s..ps..s.ss..sVth.......lsl.DGus.......sss....ut...hsss.hsulsusssp.............lhls...s..stsll............tpts.tWppl.thhss.....s........................................................................................................................................ 0 37 82 109 +10480 PF10648 Gmad2 Immunoglobulin-like domain of bacterial spore germination Coggill P anon Rigden D Domain This domain is found linked to the GerMN domain Pfam:PF10646 in some bacterial proteins. It is predicted to contain an immunoglobulin-like all-beta fold. 25.70 25.70 26.30 27.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.78 0.72 -3.81 20 66 2009-01-15 18:05:59 2008-05-13 09:15:26 4 10 62 0 37 66 15 87.70 28 26.99 CHANGED hplplppPpssshlsu......slpVpGp..Ap.sFEuslshclp-usupll.tcthspAssussuhGpFpsslshsss..ss.pupltVhpssspsGs...h .................IhlhsPt.sshVsu......shpVpGp....Ap...sFEuslsh+l...hDusGpll.scshsp..As.su..usshGsFpsplshps.t.st..puhl..plaphSscDGo.h.................. 0 22 31 36 +10481 PF10649 DUF2478 Protein of unknown function (DUF2478) FIGfam, Mistry J, Coggill P anon FIG046046 (Release 2.0) Family This is a family of hypothetical bacterial proteins found in the vicinity of Molybdenum ABC transporter ATP-binding gene-products MobA MobB and MobC. However the function could not be confirmed. This family appears to belong to the P-loop superfamily by alignment to Pfam:PF03266. However, the characteristic P-loop sequence motif appears to have diverged beyond recognition in this family. 22.10 22.10 22.30 22.10 21.80 22.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.80 0.71 -4.90 57 205 2012-10-05 12:31:09 2008-05-13 16:49:41 4 3 152 0 78 206 9 157.00 37 79.33 CHANGED lAAlshsss..sss.DtlLsshAtcLtup.GhRluGlV............Qtp.....tptssppsshp.lpsL.ssGppltIoQsLGsuupuCRLDsuuLspAsutlppAlsps..sD........LlllN+FGKpEupG+GhpshIspAlutslPVLsuVsppthcsWppFuuuhustLss-ppultsW .....................lAAlhh.s.ps.sss.DsLLuphAtchppp.GlRluGhl........................Qpp.h...tssppspp.chc.lccl.soGt.p..hhI......S......Q...s....LGs..GS.....p..G..C..R..L.....DPuu....LA.c.A....u.u.sltsslpsu...sp........LLllN+FGKtEs-G+GhR.shItt.Ahut.sIPVLTsVsp.phl-u.W.c.cFuGshushLsssppulpsW.................................... 0 13 38 55 +10482 PF10650 zf-C3H1 Putative zinc-finger domain Wood V, Coggill P anon Wood V Domain This domain is conserved in fungi and might be a zinc-finger domain as it contains three conserved Cs and an H in the C-x8-C-x5-C-x3-H conformation typical of a zinc-finger. 20.90 20.90 20.90 20.90 20.60 20.50 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.40 0.72 -6.75 0.72 -4.45 14 159 2009-01-15 18:05:59 2008-05-14 12:01:54 4 9 125 0 107 155 0 22.10 48 1.64 CHANGED hlC.aElsGthCNDcsCpaQHh+ ...hC.a-LsG.sCNDc-CpaQHh... 0 31 47 76 +10483 PF10651 DUF2479 Domain of unknown function (DUF2479) FIGfam, Mistry J, Coggill P anon FIG015005 (Release 2.0) Domain This domain is found in phage from a number of different bacteria. It is purported to be a putative long tail fibre (Bacteriophage A118) protein, but this could not be confirmed. 21.10 21.10 21.20 21.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.66 0.71 -4.74 17 665 2009-01-15 18:05:59 2008-05-14 14:45:55 4 14 402 19 25 471 10 166.10 25 34.80 CHANGED M....lpKhss.l-hphssphpshhstsIpFaspDp.sTAhlpFpls+cshPLslSptpscstlhLhhsss........hhhtslclhDsh..pGhlpYslPschlKp..sGpVpuplalp.p......sspslshtp.FoFpIccuLlsphss.ht.hYIcpF-clcchlpcphpcl..........cpslspspshhpp ...................................M......K..t.lphp.psph.sl.s....hsI.pF.h.ptDp.sou.sLphs.l.s.c....ss........t........s............lsLosp.tlps.clslhtcss.......t.hhsc...s...l.p...IlDsh..cGhlpYhlPschlcp..sGplcAplh.lpp.........ss...ps....l...s.s....pp.FoFslp..cuh.lsshsutph.sY.l...hsh.....pcl.....hchlp....cphpph.............cpshpphpp....p...................................................................... 1 9 17 25 +10484 PF10652 DUF2480 Protein of unknown function (DUF2480) FIGfam, Mistry J, Coggill P anon FIG020045 (Release 2.0) Family All the members of this family are uncharacterised proteins, but the environment in which they are found on the bacterial genome suggests a function as a glucose-6-phosphate isomerase (EC 5.3.1.9). This could not, however, be confirmed. 25.00 25.00 114.20 114.10 24.20 19.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.00 0.71 -4.68 18 81 2009-01-15 18:05:59 2008-05-14 18:13:37 4 1 75 0 40 88 197 167.00 47 98.13 CHANGED M..t-...EIlNRVANSpLhTFDLEDaYPcGpRhhlDIppWLh-GllL+EK-FRptlKsHDWSpYpcpaVAlaCSTDAIlPuWAYMLlsspLpPaApKVVhGsL-sLEohlYp-llsplDlssacs+PVIIKGCScKPVPpsAYlhlhpKLQPVAKSlMYGEACSoVPLYK+ ........-EI.lN+VA.pSsLlsFDLE-aYPpG.cR.h.hhDIcsaLapGhlL+EK-FRptlKpaDWopYpspaVAlhCSsDAIlPsWAYMLlsspLpPaA+cllhGsh-pLEptLapchlsplDhopapDc.VllKGCoccsVPpsAYlhhsp+L.pPlsKSlMaGEsCSoVPlaK+......... 0 17 36 40 +10485 PF10653 Phage-A118_gp45 Protein gp45 of Bacteriophage A118 FIGfam, Mistry J, Coggill P anon FIG0160151 (Release 2.0) Family This domain is found in bacteriophage and is thought to have a gp45 function within the phage tail-fibre system. 21.60 21.60 21.60 79.80 19.40 18.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.05 0.72 -4.11 2 34 2009-01-15 18:05:59 2008-05-16 10:25:53 4 1 26 0 1 17 0 60.50 87 100.00 CHANGED MsERVFRKpT.FGsSEIahssRTKMIANPAFpQKIPL.ETGC-pMsDYIEELKLKGYEEVTR MTERVFRKpT.FGsSEIahssRTKMIANPAFpQKIPL.ETGC-pMsDYIEELKLKGYEEVTR 0 1 1 1 +10486 PF10654 DUF2481 DUF2482; Protein of unknown function (DUF2481) FIGfam, Mistry J, Coggill P anon FIG020094 (Release 2.0) Family This is a hypothetical protein family homologous to Lmo2305 in Bacteriophage A118 systems. 25.10 25.10 25.90 83.80 25.00 25.00 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.61 0.71 -4.44 4 25 2009-01-15 18:05:59 2008-05-16 11:19:24 4 1 20 0 1 17 0 126.00 65 98.31 CHANGED sVMElTENKARQREIISYlsNpsLshs-hKcLQKELNpLMNcNTEEKpKTaWsKThcRlVtNKpWp-ITltEFl-LRHAGLos.AIADaFplS+uslFNaT..pcN+pEYa+hFshs.YpKsKEhWsD ..sVMEhTcsKARQREIISYIsNNDlpLs-LhcLQKELNpLMNENT.EKQKTYWoKTFDRIV++KcWsEITIpEFs-LRNAGLTsYAIA-HFKVSKulVFNYT..QRNKKEYYplFDMsEYQKNKEhWND...... 0 1 1 1 +10487 PF10655 DUF2482 Hypothetical protein of unknown function (DUF2482) FIGfam, Mistry J, Coggill P anon FIG030041 (Release 2.0) Family All the members of this very small, very short family are derived from bacteriophages, of the SA bacteriophages 11, Mu50B, system, and from the Staphylococcal_phi-Mu50B-like_prophages subsystem. All members are hypothetical proteins. 25.00 25.00 45.10 45.00 23.50 23.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.21 0.72 -3.95 4 148 2009-01-15 18:05:59 2008-05-16 11:42:32 4 1 106 0 1 35 0 98.00 70 98.61 CHANGED MTKNYKDMTQ-ElRDLLuEKsGELYELspEIccETEFDlLLFSolGVssGDhluSSpsALGsshsLAsLLcNpssacDlsNVIpMpKLQKhLGlDDsKED ..MTKNYKDMTQEElKDLLSEKoGELYELAKEIKtEocFDILLFSoIGVhDGDaluu.SsSVIGcsFDLAsLLDsscuY+DIlNVlQMpK.QKhLGIDDsKED...... 0 0 0 1 +10488 PF10656 DUF2483 Hypothetical protein of unknown function (DUF2483) FIGfam, Mistry J, Coggill P anon FIG032091 (Release 2.0) Family This is a family of proteins found in bacteriophage particularly of the SA bacteriophages 11, Mu50B, family, homologous to phi-ETA orf16. 25.00 25.00 41.10 41.00 19.40 19.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.42 0.72 -3.90 7 131 2009-09-11 01:17:25 2008-05-16 13:49:18 4 1 122 0 4 32 0 71.50 78 98.11 CHANGED M.KpTV...TYlIKhKDt..sLYITN+PTstpsT.pYSss+pcAREFsGh--ssIDMstHpAIKKTVTETpEYEEVth ...M.KQTV...TYIIRHRDM..PIYITNKPTDNNSDISYSTNRNRAREFNGMEEASINMDYHKAIKKTVTETIEYEEVEH.......... 0 1 1 4 +10489 PF10657 RC-P840_PscD Photosystem P840 reaction centre protein PscD FIGfam, Mistry J, Coggill P anon FIG031038 (Release 2.0) Family The photosynthetic reaction centres (RCs) of aerotolerant organisms contain a heterodimeric core, built up of two strongly homologous polypeptides each of which contributes five transmembrane peptide helices to hold a pseudo-symmetric double set of redox components. Two molecules of PscD are housed within a subunit. PscD may be involved in stabilising the PscB component since it is found to co-precipitate with FMO (Fenna-Mathews-Olson BChl a-protein) and PscB. It may also be involved in the interaction with ferredoxin [1]. 23.40 23.40 23.60 117.30 22.40 23.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.77 0.71 -4.35 8 13 2009-01-15 18:05:59 2008-05-16 14:15:59 4 1 13 0 11 13 0 142.40 66 98.98 CHANGED MQsQLSRP.TusNQVRsSs.................SGPWSGNAAHKAEKYFITSAKRD+pGpLQlpISPASGRRKLSPTcEMIsKlIuGEIELaVLTTQPDIuIDLsQKVLDNENRYVIDFDKRGVKWTMRDIPVFYcSL+ppLCVEIDRpoYTLDEFFK ...MQsQLSRP.TusNQVRtSs.................SGPW.SGNAsHKAEKYFITSAKRD+ps+LQlpIsPASGRRKLSPTpEMIsKlIsGEIELaVLTTQPDIAIsLppKVLDNENRYVIDFDKRGVKWTMRDIPVFYsohp+pLCVEIDR+sYTLsEFFK 0 1 2 8 +10490 PF10658 DUF2484 Protein of unknown function (DUF2484) FIGfam, Mistry J, Coggill P anon FIG030013 (Release 2.0) Family A role of this family in UDP-N-acetylenolpyruvoylglucosamine reductase, as MurB, could not be confirmed. 25.00 25.00 25.80 52.30 23.30 19.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.78 0.72 -3.86 23 57 2009-01-15 18:05:59 2008-05-16 17:16:29 4 1 43 0 11 57 5 75.80 44 91.47 CHANGED Ms....luLhLuslWslsAsllAhLPSRppHWptAal..LlAlGlPLLGaVsapsGPhhGlhsLsAGhSlLRWPVhYlh+hlR ......MshSLlLAslWslsAsllAhLPuR.pHWptAhl..LIusGlPLlGaVsappGPahGlhsLhAGhShLRWPlhYLh+hlR.. 0 1 8 9 +10491 PF10659 Trypan_glycop_C Trypanosome variant surface glycoprotein C-terminal domain Bateman A anon Pfam-B_1351 (release 3.0) Domain The trypanosome parasite expresses these proteins to evade the immune response. 21.40 4.60 21.60 4.70 21.30 4.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.28 0.72 -11.30 0.72 -3.70 103 238 2009-01-15 18:05:59 2008-05-17 14:12:18 4 6 9 2 67 241 1 95.60 29 21.67 CHANGED C.tt.tpspspCp.....p.ssCpas..ppppcsp..Cchppsp....tppps.tsstttt.........stppsp+C.ttcp.cscCtp.s...........CKW..-sppC..KDSShLlsK+hAL..huuAFlAL........LF ..............................................C..pthpppppCp...pttCpap..tptpptc....Cp.pppptppps.sssststs........spsssppCps+p..cscCcp.s.....................CpW...........-tppC......+-SShLlsKpFAL..huAAFsuL.L............................. 0 0 67 67 +10492 PF10660 MitoNEET_N Iron-containing outer mitochondrial membrane protein N-terminus Coggill P anon Wiley S Domain MitoNEET_N is the N-terminal region of the MitoNEET and Miner-type proteins that carry a zf-CDGSH, Pfam:PF09360, redox-active 2Fe-2S cluster. The whole protein regulates oxidative capacity. The domain is an anchor sequence that tethers the protein to the outer membrane. 21.30 21.30 21.30 26.70 20.60 17.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.95 0.72 -4.12 9 136 2009-01-15 18:05:59 2008-05-19 13:08:18 4 2 85 3 88 127 1 53.10 38 42.79 CHANGED M..pslSpllps.hPthLttlPlPcohtshh+Lshp-WLsLlshsussAulGYlsY+sFhsKc+pp ..................hp..hs.hhtthPhPpohtsahp.LshpEWltLl.PhhuslAslGYLAh+sFhsKc+p............... 0 20 25 48 +10493 PF10661 EssA WXG100 protein secretion system (Wss), protein EssA FIGfam, Mistry J, Coggill P, Desvaux M anon FIG043089 (Release 2.0) Family The WXG100 protein secretion system (Wss) is responsible for the secretion of WXG100 proteins (Pfam:PF06013) such as ESAT-6 and CFP-10 in Mycobacterium tuberculosis or EsxA and EsxB in Staphylococcus aureus. In S. aureus, the Wss seems to be encoded by a locus of eight CDS, called ess (eSAT-6 secretion system). This locus encodes, amongst several other proteins, EssA, a protein predicted to possess one transmembrane domain. Due to its predicted membrane location and its absolute requirement for WXG100 protein secretion, it has been speculated that EssA could form a secretion apparatus in conjunction with the polytopic membrane protein EsaA, YukC (Pfam:PF10140) and YukAB, which is a membrane-bound ATPase containing Ftsk/SpoIIIE domains (Pfam:PF01580) called EssC in S. aureus and Snm1/Snm2 in Mycobacterium tuberculosis. Proteins homologous to EssA, YukC, EsaA and YukD seem absent from mycobacteria [1]. 21.00 21.00 21.80 21.60 20.90 20.30 hmmbuild --amino -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.95 0.71 -4.52 4 267 2009-01-15 18:05:59 2008-05-19 16:20:43 4 1 265 0 8 83 1 130.50 47 86.05 CHANGED ItuuADSYLpcsGKhchKlDRlpcocpEKNc..cthcETELDKsuIpLFssEh-cclpcKppsEpc-h-clcsuLF.cphcss.sVK-TKcpLFSu-Y.ssu..schApo....EspTcsshS.sllhhhuGsllhlCsGlYslhR+lac ................tshh.slssp............................pcE...Ec+hss-Lsp.YDTTLFNKD.sKtVN-.s..htcpK.c-.pQpIKNchFQNQ.A.S+u..T.RL..s...ET.KKVLFSK...o.NlpKo...oESDKS..PYI..QNKQEKpIaP.Y.ILhSlGA.hLTLGhlI.........FsIa+t..................................... 2 2 4 7 +10494 PF10662 PduV-EutP Ethanolamine utilisation - propanediol utilisation FIGfam, Mistry J, Coggill P anon FIG047026 (Release 2.0) Domain Members of this family function in ethanolamine and propanediol degradation pathways, however the exact roles of these proteins is poorly understood [1-3]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.59 0.71 -4.70 20 1157 2012-10-05 12:31:09 2008-05-19 16:22:45 4 8 759 0 91 4581 2185 127.50 43 87.54 CHANGED MK+lhLlGtoGsGKTTLsQsLpG.....c-L.+Y+KTQulcapsss.IDTPGEYlEsRpaYsALhsous-ADlIuLlhsAsp.hssFsPGFuuhFsKPVIGIlTKlDLu.sppplphscphLppAGApcI.FclSulsspGl-ELhsYLpp ..........................................................................MKRlh.h..lGsstsGKTT.Lh.pu.L.pG....................p...h........hh.........+......K............T........Q...........A..........l...............E.............a.............s.............s.............p...........s.........I...............D........T.......P...G..........E........Y.........h........p........p..................p........h.......Y....p.....A........L........l.......s......o........h........p.........-.....s.........D......h.....l....h..h.V.h...s........A........s........s........................t.......o.....h.......h...s.......s.....G.......h......h.........s.......h.......h........s........+..........h.....I............u......l......l...o...KsD.h...s...............s.....t...c......l.....u....h....s.....+.....p...h.....L........p.......c........s........G......h........c........c........I........F.........l..suhsspuV.ppLhcaLt.s.......................................................................................................... 0 46 65 79 +10496 PF10664 NdhM DUF2485; NADHqo1-M; NADHdh-M; Cyanobacterial and plastid NDH-1 subunit M FIGfam, Mistry J, Coggill P anon FIG006356 (Release 2.0) Family The proton-pumping NADH:ubiquinone oxidoreductase catalyses the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 sub-complexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit [1]. The cyanobacterial NDH-1 complex contains additional subunits, NdhM and NdhN, compared with the minimal set of the bacterial enzyme and these seem to be specific for thylakoid-located NDH-1 of photosynthetic organisms [2]. The three subunits of NDH-1, NdhM, NdhN and NdhO are essential for effecting cyclic electron flow around photosystem I, by supplying extra-ATP for photosynthesis in both plastids and cyanobacteria [3, 4]. 19.80 19.80 20.90 90.80 19.20 18.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.31 0.72 -4.14 21 90 2009-09-11 07:47:26 2008-05-23 11:37:31 4 1 84 0 43 86 121 108.50 58 75.56 CHANGED LKsTTRHVRIFTAcVc.ss-Llsss..spLTLDlDPDNEFlWs-sulpKV.p+FcELV-utuGp-Lo-YNLRRIGSDLEtaIRpLLQsGEluYN.suRVLNYSMGLPRssp .LKSTTRHlRIasAclc.ss-Llsss...spLTLDlDPDNEFlWs--ulpKVYp+FcELV-shsG...t-Lo-YsLR+IGSDLEHaIRpLLQsGElSYNhsuRVlNYSMGLP+lt.t.................... 0 7 27 38 +10497 PF10665 Minor_capsid_1 Phage_Gp9; Minor capsid protein FIGfam, Mistry J, Coggill P anon FIG016324 (Release 2.0) Family This is a putative tail-knob or minor capsid protein from bacteriophages. 25.00 25.00 25.60 25.30 23.50 21.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.40 0.71 -4.24 5 90 2009-12-03 14:34:41 2008-05-23 13:52:04 4 1 89 0 7 71 0 111.50 28 97.94 CHANGED Mlht.PlPhchLIHoloYcEY..hGEDca...GpssYuKPllIE+VRVsPscchssuosucolhaNAVlFlDulNS.sPhhpFpc...pSKIsFcGK-asIpKVIPsYss.ScslHHaELEVl .............................shchLlcslphc..th...t.s.c.sca...spss.YscslpIcpVRhDh.......s.......pshsss......ssu..cptths..ul.....IFl.sshS..ssh.s-hpp.....tu+lh.....as.G.c.-Y.sIscl.ssYts.s.splapaElEV.......... 0 2 6 7 +10498 PF10666 Phage_Gp14 Phage protein Gp14 FIGfam, Mistry J, Coggill P anon (Release 2.0) Family This phage protein family is of unknown function but is expressed from within a cluster of tail- and base plate-producing genes [1]. 21.30 21.30 21.70 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.80 0.71 -4.32 2 24 2009-12-01 14:53:38 2008-05-23 14:29:37 4 1 22 0 1 16 3 127.00 72 98.20 CHANGED MsQNNVINIQLEESYQEFQLGTELFRVGLGDcMRRKWIEADEKYKKKLEKLNKYNIDNTDEMSSE-YFsLEEDVKEALTEAYAlLLDDEcAFsKCYtQCKDILKMYQVYsQVAE.IVGSVEKQQNEIQKKYKAKMTKKAK ....MsQNNVINIQLEESYQEFQLGTELF+VGLGDEMRRKWIEADEKYKKKLEKLNKYNIDNTDEMSSE-YFsLEEDVKEALTEAYAlLLDDEcAFsKCYtQ.CKDILKMYQVYsQVAE.IVGSVEKQQNEIQKKYpAKMTKKAK........................... 0 1 1 1 +10499 PF10667 DUF2486 Protein of unknown function (DUF2486) FIGfam, Mistry J, Coggill P anon FIG008383 (Release 2.0) Family This family is made up of members from various Burkholderia spp. The function is unknown. 21.50 21.50 22.30 21.50 21.00 21.30 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.90 0.70 -3.83 10 64 2009-01-15 18:05:59 2008-05-23 16:06:39 4 2 63 0 15 70 3 213.10 43 94.62 CHANGED MopspssS...IPsLTDVLVPG+......Ps.ARuouuDss.........s+DsAAhPlLss..s.s.t.susut.....ccspsssc.lsscPlPoPclssVthPuc...................sDAPAcPu..uut+VluccAsAhpAPhPssLAsDsstsssuuAs...........hsAu-sA...PcussPuAsssussphutA............sspsAAshTs.DAppIAERLRsRlTsYLTG-GR-sIEARCRDALH-HouWLVGQITREVALALETEVhcWVR-AVcEEIARRsuG ..................................MspspssS...IPsLTDVLVPGp......Ps.ARssuusss............................pAu.P..............sssu...............pp.....sttpscss.sscPsssst..sssst.hPss...................pDAshtPu..st.cssA.tt.uhthPhsssLAs-.....s.s.shsAh.............h.AscsA...Pch...ssPAAhssssPtlsps....................................................Asu.hhss.h-AppIAERL+uRhTpYLTGEGRulIEARCRDALH-HuuWLVGQIsREVALALETEVhsWVp-AVsttLARRpss.......... 0 1 2 7 +10500 PF10668 Phage_terminase Phage terminase small subunit FIGfams, Mistry J, Coggill P anon FIG022212 (Release 2.0) Domain This family of small highly conserved proteins come from a subset of Firmicute species. Its putative function is as a phage terminase small subunit. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.94 0.72 -4.19 6 212 2012-10-04 14:01:12 2008-05-23 17:12:29 4 8 173 0 19 242 9 60.70 37 24.56 CHANGED MARpRsPcRDcAhchahESuGsh.LhDIAscLslssSQIRKWKupDKWs-.......p........hNuslT.......ppK ....................MsRt..RsP.p.....RDpAhcla...hc...ssGphpL+-I..AscLs......Vo.s.....upIR.....+WKopD.K...Wsp..............p.......hpussp............................... 0 5 13 15 +10501 PF10669 Phage_Gp23 Protein gp23 (Bacteriophage A118) FIGfam, Mistry J, Coggill P anon FIG018382 (Release 2.0) Family This is the highly conserved family of the major tail subunit protein. 22.60 22.60 23.70 26.80 22.40 22.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.54 0.71 -4.00 3 16 2009-01-15 18:05:59 2008-05-23 17:22:57 4 2 15 0 2 7 0 120.90 88 55.90 CHANGED MYEGLTKVFDYALAKEMFFAALFVALFIILLIITKRIWDDSKIVRVEMKEERDKMETEREKRDKESKEERDKFISTMNEQQRLMDKQNDMMGQQQQSIDSLSKSVGKLAHKVDLLEHKITK .......MYDGLTKVFDYALAKEMFFAALFVALFIILLIITKRIWDDSKIVRIEMKE.EREKVEEEREKRNKESKEERDKFISTMNEQQRL...MDRQNDMMKQQQQSIDSLSKSVGKLAHKVDLLEHKITK........................... 0 1 1 2 +10502 PF10670 DUF4198 NikM; Domain of unknown function (DUF4198) Coggill P, Bateman A anon Pfam-B_42996 (release 22.0) Family This family was previously missannotated in Pfam as NikM. 34.00 34.00 34.00 34.00 33.80 33.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.44 0.70 -4.34 108 1021 2009-09-14 15:50:14 2008-05-29 08:59:34 4 7 729 0 303 893 241 211.50 19 81.58 CHANGED ApAHthWlhPssshl.ssst.sh.htlthspshsssh...........................shph.......hsP............cGp.sshhp.......shtts.ptt.h..............ptssaplsh....thptsshashhhpst.hhpppt.....................s....sphhhphsKshl.......stsssspshppsh..GhslEllPls+P.ss....lhsGpshphpll.hcG+Phss.scVplhhtstchpsp..................shtlpTDssGhhohshspuGhahhsAhhpsst ..................................................................................................................AtAHthal.ss......p.....h..pttt..........htlha.s.c..shptt.h..tt....................................................shph...hps................sGp....tsh.t......................phtphph.........................ppssatlsh..........thp..s.uhas..h.hhps.h..h.hphpp...........................................t...sphhhphsKshl........p..t...s..s...p..t..h...p..psh....GhslEllPlsc.......P...s......lhsGps.....hphpll.hcG+Pls..s..splplphtsh.h.p.t....................................................shphpTD.s.pGhhshshspsGha.hhts.hpt..h........................... 0 105 215 264 +10503 PF10671 TcpQ Toxin co-regulated pilus biosynthesis protein Q FIGfam, Mistry J, Coggill P anon FIG032035 (Release 2.0) Family The toxin-coregulated pilus (TCP) of Vibrio cholerae and the soluble TcpF protein that is secreted via the TCP biogenesis apparatus are essential for intestinal colonisation in the disease of cholera. TcpQ is part of an outer membrane complex of the TCP biogenesis apparatus, comprised of TcpC and TcpQ, and the TcpQ is required for proper localisation of TcpC to the outer membrane. The domain is found in other Proteobacterial species apart from Vibrio. 21.50 21.50 21.60 21.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.74 0.72 -4.10 60 393 2009-01-15 18:05:59 2008-05-29 13:17:40 4 6 315 2 76 322 9 85.90 27 34.59 CHANGED stsaphp.sspsLcpsLpcWApps........GWplh...Wp.sst..Da.lsushsasG..sFpp.........Alpplhps..hpssshslpsshat..usplltlspht.............p ............s..tWph..sssTL+psLpcWApps...........sWplh...Ws.ssh........-apl-uslsapG..sFcs.........Alpplhph..h..p...sspt.s....lhsph.p......tppllhVss....pp................................. 0 19 40 58 +10504 PF10672 Methyltrans_SAM S-adenosylmethionine-dependent methyltransferase Coggill P anon Meireles D Family Members of this family are S-adenosylmethionine-dependent methyltransferases from gamma-proteobacterial species. The diversity in the roles of methylation is matched by the almost bewildering number of methyltransferase enzymes that catalyse the methylation reaction. Although several classes of methyltransferase enzymes are known, the great majority of methylation reactions are catalysed by the S-adenosylmethionine-dependent methyltransferases. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.82 0.70 -5.56 3 4405 2012-10-10 17:06:42 2008-05-29 15:15:01 4 12 3015 23 953 3434 475 251.00 27 55.36 CHANGED RRLFHGRGRpWPGLEQITCDWLQGQLLVNLFKEVDDAFLuuLKcGLsALsuuslWAoKQGRolVLQHRYADGAPSEVLlGELs-oPVVVEsGLKYQLDIGRNQNFGLFLDMRhGRcWVQENAKuKNVLNLFAYTCGFSVAAIAGGAcQVVNVDMARGSLSKGRDNHRLNGHDlspVSFLGHDIFKSWGKIKKuGPYDLVIIDPPSFQKGSFALTKDYKKILRRLPELLsEGGpVlACVNSPAVoPDFLIEoMAEEAPsLcFlERLDNPPEFsDVDs-AuLKVLLFR .........................................................................................................h.htt.............................................................................................................................................................................................................................t....t....h..h...h..G....c...h..s..p..h..h.....l....p....E...p..G....h....+...h.h.Vs...l.p..c..s..h.cTGlFLD.pR....t.s....R......p........h.......l.......t........p.....h............u.........c.......u........K...............c.......VLNh.F.oYTGu...FoV..t..A.u...h..G........G..A...p.p.ss.oVDh.SppuLch.u.c.....c.N.h.p.LN........u.......l.......s.....h......p......p...t..............c...............h....l...p...s.......D.....s.......F.................c...h....h.........p....p......h...........c....c........p.........t...............p...a.DlIllDPP...........s..F......s......c......s......p......h....s.....l.....p...+...s...Y....t...c...l........t...t....s....h....pl....L...p..s....s.G.h...l.h......h..s......ss......s.........h......................................................................................................................................tttp.hh.t.............................................................................................................. 0 328 573 786 +10505 PF10673 DUF2487 Protein of unknown function (DUF2487) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 26.50 26.30 22.60 21.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.76 0.71 -4.39 15 163 2009-01-15 18:05:59 2008-05-30 11:09:48 4 1 162 0 32 124 0 137.50 48 89.88 CHANGED l-pYLpu+-YIDTAlIPLlslsh..spchKphsppGEFstlLupElERQhKGRlhLhPuFTYlsssppptth..cLpcWpsclpppuFcHVhalTuDpsWK...ttpshpsp.llW.lPulPLEp.....................hscshK+cllc-plpQllshLhp+W ......lEpaEQAR-YVDTulIPLlSISs..ucchKpsVEQGEFlcLLShELERpaKGRVlLLPAFTYLs-.sQ+scps..RLp-WoscLpppGFKHIsYVTSD..hu...WK..pthp-lpGc.LhW.hPolsLEp.....................hsDptKREll+s+l+plhshL.pKW.................................... 0 11 23 26 +10506 PF10674 Ycf54 DUF2488; Protein of unknown function (DUF2488) Mistry J anon PRODOM Family This protein is conserved in the green lineage and located in the chloroplast. 25.00 25.00 32.80 32.20 20.20 19.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -10.13 0.72 -3.86 24 115 2009-01-15 18:05:59 2008-05-30 11:33:57 4 2 101 7 50 116 96 90.40 50 67.26 CHANGED sTYaalhASc+FLhpE...EPLEEVL+ERpRaYtEpsKcIDFWLVppPAF..LpuPEhupl+schPpPuAAllSTsspFIsaLKLRLEaVhpGpFEAP .......pTYaallASp+FLl-E...EPh-ElL+ERhRpYtEpsKElDFWLVhpPuF..L.-ssphsclpt+l......sp......PusAlVSTstpFI.T.alKLRL-aVlpGpFEA....... 0 14 36 46 +10507 PF10675 DUF2489 Protein of unknown function (DUF2489) Mistry J anon PRODOM Domain This is a bacterial family of uncharacterised proteins. 19.80 19.80 20.70 20.80 19.40 19.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.40 0.71 -4.47 51 301 2009-01-15 18:05:59 2008-06-02 09:38:11 4 1 297 0 73 216 44 130.00 39 84.44 CHANGED IluLusYAshLLhpL+cQpt.p........pppptstppRptplh-SIclIApA.hhpcpC-lSEGslRltsLh-hlshs.p...tspYsuhhplYchlcchPpt-sR+pLsKpcRh+hDhpR.phEschcctIhp-sppL .............IluLuuYAsaLLhpL++Qpt.p........pptphAhppRpspIh-SlpllspA.hl.psQC-LSEusIRlhsLh-hlttptp........tppYPAhhcLYclV+cMPpt-s.RppLsKpERM+.-Lp.RppAEucLpcsIhtElptL............. 1 15 35 59 +10508 PF10676 gerPA Spore germination protein gerPA/gerPF Mistry J anon PRODOM Family This is a bacterial family of proteins that are required for the formation of functionally normal spores. Proteins in this family may be involved in establishing normal coat structure and/or permeability which could control the access of germinants to their receptor. 20.80 20.80 20.80 20.80 20.70 20.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.37 0.72 -4.13 32 769 2009-01-15 18:05:59 2008-06-02 11:22:48 4 1 146 0 83 320 2 70.30 37 91.63 CHANGED MPuhl...uslpIpslsusGslslGDshtISPpussKohuGuGuhNsGDhl....h.NhhshTsshDsDlsDQs.htNs ......MPuhV...sslhIpN..ssGshslGDsasloPhssoKuasGuGusNsG.hls...shstlSsTsshDsDlsDQs.hhs.s.................... 0 17 45 53 +10509 PF10677 DUF2490 Protein of unknown function (DUF2490) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. They appear to belong to the outer membrane beta barrel superfamily. 29.80 29.80 29.80 33.60 29.70 29.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.78 0.71 -11.19 0.71 -5.03 62 262 2012-10-03 17:14:37 2008-06-02 12:42:47 4 1 180 0 128 283 185 196.50 20 83.77 CHANGED chssWhphthptpl...s......pchthph-hphRhtcs.............hs.chpphhlRsulsYpl.ssphplsh...GYsahhsps.tt.............thsEpRhapphhhphs..htphplspRhRhEpRahpt..........................ssphphRhR.plphphPlspptht......hhhhsElFhsl........stpsaspsRhhs.Glsaplspp..hplphGYhtp.......ttsspspchlth .............................................................thWhpht.hptcls.........pchphth-hphR...htss.............hs.phpphhl+sulsYpl.ssphplth.......GY...sahhsp.hts.....................ptEpRhatphp.hphp..hs...ph.....pls..pRhRhEpRahtt..........................................ssch.phRhR.tlphshsl.spphht.......hhh.sElFhsh........t.hsppthcpsRhhs.Glsaplspp...hpl-lGYhtQh.pt...ttss...p....h......................... 0 66 112 122 +10510 PF10678 DUF2492 Protein of unknown function (DUF2492) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. 19.70 19.70 20.90 20.90 18.50 16.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.50 0.72 -3.90 22 608 2009-01-15 18:05:59 2008-06-02 12:58:25 4 2 593 0 55 219 6 75.70 69 97.41 CHANGED hsSlHGHpVhplMltpspsho+tpLpphltpcFGppARFHTCSApshsA-pLlpFLhpKGKhl.scpGhsssts+lCp .....MDSIHGHEVLNMMIESGEQY.THuSLEAAIK.....A...RFGEpARF.HTCSAEsMTAuELVAFLAAKGKFI.s.s.E.-.GFSTcpSKICR............... 0 8 20 36 +10511 PF10679 DUF2491 Protein of unknown function (DUF2491) Mistry J anon PRODOM Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 33.50 33.40 20.80 18.60 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.16 0.70 -11.18 0.70 -5.33 16 485 2009-01-15 18:05:59 2008-06-02 13:05:10 4 2 472 0 38 197 8 212.60 70 97.47 CHANGED M.......FpchFG.+csps..ssPps.........P..hGLtlGtulplDsLthc..LLsschtltls.sssphItAhGcVcLspuspLhRaYsDD-sal.QVlssGs.stsclcDlpLahaa-otsluucu-apchlts..plutspYch-Gh.papRhWsssts..pscsVshsEclhptsss...spclpQhsMLYtRplst.spcEhLLlssEEc.....pps-hslshulGlsLpssDlpl .........................h....FQRLFG..KcsKP..AlsRG................P........LGLHLNuGFTLDTLAFR..LLE-.pLLlALP..GE-..a.....TVAAVS+IDLG..GGSQIFRYYTS........G..DEFL.QINTTGGpDlDDIDDIKLFVYEESaGIocEsHWR-AIs.s......ps.....hGAM.TLN..W.....Q......E.K...RWQRFFNSEEP.GNIEPVYMLEKVENQscA...KW-VHNFTMGYQRQVT-....D....saEYLLLNGEESFN-.hGEPEWlFSRALGVDIPLTSLcI......... 0 9 17 24 +10512 PF10680 RRN9 RNA polymerase I specific transcription initiation factor Wood V, Coggill P anon Pfam-B_44021 (release 22.0) Domain Initiation of transcription of ribosomal DNA (rDNA) in yeast involves an interaction of upstream activation factor (UAF) with the upstream element of the promoter, to form a stable UAF-template complex. UAF, together with the TATA-binding transcription initiation factor protein TBP, then recruits an essential core factor to the promoter, to form a stable preinitiation complex [1]. This Rrn9 domain, which seems to be constrained to fungi, is the two highly conserved regions of proteins which form one of the subunits of UAF and appears to be the region responsible for the interaction with TBP. The family includes the S.pombe Arc1 protein, Swiss:Q10204, which is found to be essential for the accumulation of condensin at kinetochores [2]. 25.00 25.00 31.70 29.40 23.30 22.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -10.06 0.72 -4.24 22 106 2009-01-15 18:05:59 2008-06-04 12:54:52 4 4 105 0 80 106 0 72.80 33 14.00 CHANGED L-sLc...tcDLulHLYsuahL+ph...A...ttp.hp.s.........................................sthhPp+pWouWPh.sspVP.sspphh....D ......L-sLc...ppDLulHLYsAahLK+t................ttp..p.s.................................................sthhPp+pWTAWPhs.sscVPtsspph.p............................ 0 11 36 65 +10513 PF10681 Rot1 Chaperone for protein-folding within the ER, fungal Wood V, Coggill P anon Pfam-B_27706 (release 22.0) Family This conserved fungal family is an essential molecular chaperone in the endoplasmic reticulum. Molecular chaperones transiently interact with unfolded proteins to inhibit their self-aggregation and to support their folding and/or assembly. Rot1 is a general chaperone with some substrate specificity, its substrates being the structurally unrelated Kre5 Kre6 Big1 Atg22, which are type I, type II, and polytopic membrane proteins. The dependencies of each for Rot1 do not share similarities. However, their folding does require BiP, and one of these proteins was simultaneously associated with both Rot1 and BiP. In addition, Rot1 may cooperate with BiP/Kar2 in the folding of Kre6 [1]. 23.20 23.20 23.20 36.10 22.90 22.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.53 0.70 -4.99 28 127 2009-01-15 18:05:59 2008-06-04 13:13:12 4 3 108 0 89 128 0 202.10 44 86.02 CHANGED LhGTWSoKSspVhTGPGFYDPls-hLlEPsLsGISYSFTcDGaaEEAhYRssuNPpsPsCPpuhlhaQHGoYplp.sNGoLhLsPhtVDGRQLlSDPCsss.....hus...YoRYsQsEpFppapV.lDs.Ya.Ghh+LpLapF.DGoPhpPhYLsY+PPhMLPTpTL..NPostst........................tpcthR...........phhcpshps.tpp......shhpp...thh..sthaWhulhhhuhGuhshh ..........LhGTWooKSppVhTGP...G.........FYDPlc-hhlEPshsGISYSF....TcDG.aaEEAhYpshuNPpsPsCspuhhhaQHGoYplp.sNGoLhLsP..htsDGRQLlSDPCssp.........tup........YoRYsQs.EhF.........ppap.Vhl......Ds....Y.......H...uhhpLpLap.a..DGo.PhpPhYLsY+PP.MLPTpoLNPsspst........................ttp..t.hR.................phhppp.h.t...hpp......shhtp....t..p.shhWahuhhhhuhGuhhhh................................... 0 27 48 76 +10514 PF10682 UL40 Glycoprotein of human cytomegalovirus HHV-5 Mistry J, Coggill P anon PRODOM_PD113623 Family This is glycoprotein UL40 from human cytomegalovirus or herpesvirus 5. The signal sequence of the UL40 polypeptide contains an HLA-E ligand identical with HLA-Cw*0304. The first 37 residues of UL40, including this ligand, are predicted to encode a signal peptide. The virus thus prevents the lysis by NK (natural killer) cells of the cell it has invaded [1-2]. 25.00 25.00 38.00 27.60 20.60 19.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.75 0.70 -4.83 2 171 2009-01-15 18:05:59 2008-06-04 14:21:54 4 1 6 3 0 168 0 212.50 94 96.35 CHANGED hRhtFhhssMAP+TLll...llhhtl.u.hs..ApTssTTsGAhhupsP+.....C.hVFpGWVYAhYHpGsMsLMTlDV.CCR.pssNo........tcspLLI-VGNpTRpt....oCpsH..u.Q.tDC.sphVHVpGlspStFhLopLpSCCLNp.SpLSE+VAYHL+hRPAsFGLETWAMYTlGlLuLGSFSSFYsQlh+sL....psaHYAhKt ......................................TRIGFTCAVMAPRTLILTlGLLCMRIRSLLCSPAETTVTTAGshSAHGPh.....CPLVFQGWAYAVYHQGDMALMTLDVYCCRQTSSNTVVAFSHHPADNTLLIEVGNNTRRHVDGISCQDHFRA.QHQDCPAQTVHVRGVNESAFGLTHLQSCCLNEHSQLSERVAYHLKLRPA.TFGLETWAMYTVGILALGSFSSFYSQIARSLGVLPNDHHYALKK........ 0 0 0 0 +10515 PF10683 DBD_Tnp_Hermes Hermes_DBD; Hermes transposase DNA-binding domain Bateman A anon PDB:2bw3 Domain This domain confers specific DNA-binding on Hermes transposase [1]. 20.50 20.50 20.70 23.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.29 0.72 -4.51 4 22 2009-01-15 18:05:59 2008-06-04 16:21:41 4 5 13 2 13 19 0 61.90 36 13.87 CHANGED pspEL+hVStpDKcEAIEKCTQWVVcDCpPFSAVoGuGFhchVKFFlKIGA.YGEpVDV-DLLPsPsT ............................pKpchhc+ssp....aslpDhRPFshVpGpGFhcLsphhlplGApYGppVsl-slLPpP.T..... 0 1 7 11 +10516 PF10684 BDM Putative biofilm-dependent modulation protein Mistry J, Coggill P anon PRODOM_PD064586 Family This is a family of tightly conserved proteins from Enterobacteriaceae which are annotated as being biofilm-dependent modulation protein homologues. 27.40 27.40 27.40 90.60 27.00 27.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.12 0.72 -3.76 2 398 2009-09-11 08:44:46 2008-06-04 16:56:56 4 1 396 0 9 28 0 71.60 95 100.00 CHANGED MCFINLRSVADTINTQTRRITMFTYYQAENSTAEPALVNAIEQGLRAEHGVVTEDDILMELTKWVEASDNDILSDIYQQTINYVVSGQHPTL .....................MFTYYQAENSTAEPALVNAIEQGLRAEHGVVTEDDILMELTKWVEASDNDILSDIYQQTINYVVSGQHPTL.. 0 2 2 5 +10517 PF10685 KGG Stress-induced bacterial acidophilic repeat motif Mistry J, Coggill P anon PRODOM_PD027049 Family This repeat is found in proteins which are expressed under conditions of stress in bacteria. The repeat contains a highly conserved, characteristic sequence motif,KGG, that is also recognised by plants and lower eukaryotes and repeated in their LEA (late embryogenesis abundant) family of proteins, thereby rendering those proteins bacteriostatic. An example of such an LEA family is LEA_5, Pfam:PF00477. Further downstream from this motif is a Walker A, nucleotide binding, motif GXXXXGK(S,T), that in YciG of E coli, eg Swiss:Q8X7B4, is QSGGNKSGKS [URL]. YciG is expressed as part of a three-gene operon, yciGFE, and this operon is induced by stress and is regulated by RpoS, which controls the general stress-response in E coli. YciG was shown to be important for stationary-phase resistance to thermal stress and in particular to acid stress. 20.40 20.40 20.50 20.40 20.20 20.30 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.29 0.72 -6.39 0.72 -3.93 45 2185 2012-10-01 20:07:30 2008-06-05 11:36:42 4 12 679 0 402 1036 15 21.90 59 59.07 CHANGED sFAs.Dt-+tpEhupKGGcuSsu ......NFtp.D.c+A...SEAG+KGGQpSsG..... 0 82 192 315 +10518 PF10686 DUF2493 Protein of unknown function (DUF2493) Mistry J, Coggill P anon PRODOM_PD031789 Family Members of this family are all Proteobacteria. The function is not known. 24.50 24.50 24.60 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.43 0.72 -4.43 34 308 2009-01-15 18:05:59 2008-06-05 14:06:06 4 3 207 0 113 326 118 70.40 36 28.33 CHANGED PpGs+lAFoGGtDap...Daph........IWssLDcl+A+h.....PDMl..LlHGGss+GAEpIAApWAcsR..sVsQlsF+PDWp+HG .............................Gs+lhloGGt-.as...Dpch....................last.L.D.p....l+s+t.....P-.hl...LlHG.....Gu..+GA-tIAupWA...cpR.....t..Vsp..l..s...FpsDWpc+............ 0 22 74 95 +10520 PF10688 Imp-YgjV Bacterial inner membrane protein Mistry J, Coggill P anon PRODOM Family This is a family of inner membrane proteins. Many of the members are YgjV protein. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.74 0.71 -4.81 34 708 2009-09-11 16:42:10 2008-06-05 17:27:52 4 3 624 0 127 358 26 145.00 42 89.78 CHANGED thhhuQhlGhlAhslslhuF.pKpcc+lhhhlhstsllhulHFhLLGuhsAAshhhluulRhhhulhspS.........phlhhhFlslsllh...shhshpshhsllslhGolluThAhFphcGlp.hRhhhlluossWlhpNlllGShGGsLhEshhlssNhlslYR...happptps ....................ahlAQulGsl.AFhlGIosFas+--+Rh+hpLslausllulHFhLLGshsAuhoslLsulRoh.l.o....l+Tc.S.........hhVMs.lFIlL...ohsh..............Gl.sph....p.c.l.ELLPllGTlluTaALFpscGls.hRs.V.MhhuTsCWVIHNhahGSIGGohlEuoFllhNulsIlRaaRhpppu................. 0 23 55 90 +10521 PF10689 DUF2496 Protein of unknown function (DUF2496) Mistry J, Coggill P anon PRODOM_PD581819 Family This family consists of proteins from Gammaproteobacteria spp. Many members are annotated as being like the E coli protein YbaM. 25.00 25.00 40.00 39.90 18.50 15.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.73 0.72 -4.37 25 670 2009-01-15 18:05:59 2008-06-06 09:53:35 4 1 667 0 66 195 1 43.90 73 79.54 CHANGED sL-sAP-ElKLAVDLIhLLEsN-I-PpsALuAL-IVppDappKL ..SLENAPD-VKLAVDLIVLLEENpIPAcTVLRAL-IVKRDYEpKL. 0 4 17 43 +10522 PF10690 Myticin-prepro Myticin pre-proprotein from the mussel Mistry J, Coggill P anon PRODOM_PD189357 Family Myticin is a cysteine-rich peptide produced in three isoforms, A, B and C, by Mytilus galloprovincialis, the Mediterranean mussel. Some isoforms show antibacterial activity against gram-positive bacteria, while others are additionally active against the fungus Fusarium oxysporum and a gram-negative bacterium, Escherichia coli D31. Myticin-prepro is the precursor peptide. The mature molecule, named myticin, consists of 40 residues, with four intramolecular disulfide bridges and a cysteine array in the primary structure different from that of previously characterised cysteine-rich antimicrobial peptides. The first 20 amino acids are a putative signal peptide, and the antimicrobial peptide sequence is a 36-residue C-terminal extension. Such a structure suggests that myticins are synthesised as prepro-proteins that are then processed by various proteolytic events before storage in the haemocytes as the active peptide. Myticin precursors are expressed mainly in the haemocytes. The family Mytilin has been merged into this family. 28.50 28.50 28.60 39.60 25.70 28.40 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.82 0.72 -3.96 5 179 2009-01-15 18:05:59 2008-06-06 09:56:07 4 1 5 1 0 179 0 98.50 78 99.26 CHANGED MKATILLAVlVAVlVAVpEAcuhuCTSYaCuKFCGoAuCoaYlChlLHsGKhCtCLHCSRs+.PhthotcA+shNEt...hDhoPpMN-MENLDpGMDM...........l MKATILLAVVVAVIVGVQEAQSlsCTSYYCSKFCGSAGCSLYGCYhLHPGKICYCLHCpRAESPLALSGSARNVN-pNpEMDNSPlMNEhENLDQEMDMF.. 0 0 0 0 +10523 PF10691 DUF2497 Protein of unknown function (DUF2497) Mistry J, Coggill P anon PRODOM_PD475087 Family Members of this family belong to the Alphaproteobacteria. The function of the family is not known. 20.90 20.90 21.80 29.30 20.00 20.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.48 0.72 -4.01 43 281 2009-01-15 18:05:59 2008-06-06 10:13:57 4 1 277 0 96 223 45 73.60 40 34.00 CHANGED pcsLlStssspssssshpsLupslp............ssp..tpTlE-lVc-hLRPMLp-WLDpNLPslVEclVcpEIcRls+p .........................................................................................................................sllSptotpplusuFpsLscslp.............ss..tppolE-lstEhLRPhLpsWLDcNLPsLVE+lV+cEI-Rls+.s... 0 29 54 67 +10524 PF10692 DUF2498 Protein of unknown function (DUF2498) Mistry J, Coggill P anon PRODOM_PD060534 Family Members of this family are Gammaproteobacteria. Many are annotated as like E coli protein YciN. The function is not known. 25.00 25.00 26.00 26.00 17.90 16.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.58 0.72 -4.37 12 633 2009-01-15 18:05:59 2008-06-06 10:33:02 4 1 631 2 51 152 1 81.20 75 98.44 CHANGED pspppPIscpsLLh.ANplI+-HEDYlpGMcATsVEQKssVLVF+GEaFLDEpGLPTsKTTAVFNMFKaLAHhLStKYpLhc ....p.KETQPIDRETLLtEANKIIREHEDTLAGIcATGVTQRNGVLVFoGDYFLDEQGLPTsKSTAVFNMFKHLAHVLSEKYHLVD................. 0 4 14 32 +10525 PF10693 DUF2499 Protein of unknown function (DUF2499) Mistry J, Coggill P anon PRODOM_PD077121 Family Members of this family are found in plants, lower eukaryotes, and bacteria and the chloroplast where it is annotated as Ycf49 or Ycf49-like. The function is not known though several members are annotated as putative membrane proteins. 22.10 22.10 24.50 23.70 20.90 20.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.36 0.72 -3.94 39 145 2009-01-15 18:05:59 2008-06-06 11:16:05 4 4 121 0 85 138 93 85.40 48 53.36 CHANGED sLSlsTWhIHluSllEWhlAIhllhcau..phptppshphLulAMlPsLlSAhsAhTWHhFDNs.sLthLVsLQAhhTllGN..hsLshAAapl ..........ALSlPTWhIHlSSVlEWlhAhhLlapYu..phsspptW+tLuhuMlPtLsuAhCACTWHhFsNs..uLphLVsLQAshTllGN..hTLshAAah.......... 0 27 54 76 +10526 PF10694 DUF2500 Protein of unknown function (DUF2500) Mistry J, Coggill P anon PRODOM_PD076478 Family The members of this family are largely confined to the Gammaproteobacteria. The function is not known. 22.00 22.00 22.00 22.00 21.40 21.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.27 0.72 -4.06 28 788 2009-09-11 12:25:21 2008-06-06 11:50:33 4 3 759 21 73 338 5 111.30 46 90.70 CHANGED PlhhhllhhlllshhsFhah......phhpcatpspsAPhhslpspllsK+ppshscspspp............psp....cYalpFchpsGs...................+hEFpV.usc.YptLs.GDpGpLTaQGs+FlpFsh .............................................PLFFIllluLI.lVAAo.F+Fh......QQRRE+AsN-hAPlppc.VsVosKRE+slsDRRSRQpEV.ssA......uooh....RYEsoF+Pp.sGG...................EpsFRL..suppYHALssGD+GTLoY+GTRFluF..s......................... 0 17 33 49 +10528 PF10696 DUF2501 Protein of unknown function (DUF2501) Mistry J, Coggill P anon PRODOM_PD096667 Family Members of this family are all Proteobacteria. Several are annotated as being YjjA or YjjA-like, but this protein is uncharacterised. 19.80 19.80 19.80 23.00 19.30 19.60 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.54 0.72 -4.00 19 613 2009-01-15 18:05:59 2008-06-06 13:05:51 4 1 595 0 55 226 8 79.70 64 49.43 CHANGED KNNhLu.u.ssAssVKspLhuKLGhsstp.tspDssYtsGlpGlLssusGpplsLs..Gs.sLppplKpKACDhVLpQGpu..Lh .............KQKLAS...s..TssENIKNQlLpKLGLsopE.QccDTNYL-GlQGLLKTKDGQQLNLsN.....IGoTPLAEKVKTKACDLVLKQGLN.h.h.... 1 4 20 38 +10529 PF10697 DUF2502 Protein of unknown function (DUF2502) Mistry J, Coggill P anon PRODOM_PD029719 Family Members of this family are all Gammaproteobacteria. The function is not known. 20.60 20.60 29.10 23.20 18.00 18.20 hmmbuild --amino -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.13 0.72 -3.88 13 798 2009-01-15 18:05:59 2008-06-06 13:44:39 4 2 515 0 54 211 7 83.10 63 77.09 CHANGED pIsL.l.......PuVpLQIGDRDpRGpYWDGhcWRDcsWW+pH............YpWcGsRWpc+ssttcpta.+cp...................chDc+p........s+Gss++H ............EITL.LPSIKLQIGDRDchGNYWDGGHWRD+caW+p+....................YEWRtNRWa+H-s..s..h.+.+.s.a.cK+pt...............thccRDDHR...G+GtG+tH.............................................................. 0 3 13 30 +10530 PF10698 DUF2505 Protein of unknown function (DUF2505) Mistry J, Coggill P anon PRODOM_PD099734 Family Members of this family are all Actinobacteria. The function is not known. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.53 0.71 -4.73 34 427 2012-10-02 19:24:03 2008-06-06 16:13:37 4 2 348 0 112 314 13 158.00 27 92.63 CHANGED h-hsspas.ssl-pVapshsccsYaps+hpphus...tsplsphsssusG.....hplshpp..sl.sc.pLPuhlpphhssclplpcpEpW.ssht..supspuphpsslsGsPsslsGshtLps......susuophplsuslcVcVPLlGGKlEphluspltchlssEpchsppWl ...................................................................p.sspas.sss-pVhthhsctsa...Wpshh.p.phus....splp..s.hss...s...s..cu.......lplshtp....hlssp.....L..Puhl..pp.hls.u...s.Lplc..pspoW..ss..hs..sus...tpuo...ls...ss..lt.....GsPsphsGptslps................susG.oclphsuslpVpl......P.....llG...uKlEphhusplsphhshEpchsspWl............................... 0 35 81 103 +10531 PF10699 HAP2-GCS1 Male gamete fusion factor Bateman A, Coggill P anon Billker O Domain The gene encoding Arabidopsis HAP2 is allelic with GCS1 (Generative cell-specific protein 1). HAP2 is expressed only in the haploid sperm and is required for efficient guidance of the pollen tube to the ovules. In Arabidopsis the protein is a predicted membrane protein with an N-terminal secretion signal, a single transmembrane domain and a C-terminal histidine-rich domain [1]. HAP2-GCS1 is found from plants to lower eukaryotes and is necessary for the fusion of the gametes in fertilisation. It is involved in a novel mechanism for gamete fusion where a first species-specific protein binds male and female gamete membranes together after which a second, broadly conserved protein, either directly or indirectly, causes fusion of the two membranes together. The broadly conserved protein is represented by this HAP2-GCS1 domain, conserved from plants to lower eukaryotes [2]. In Plasmodium berghei the protein is expressed only in male gametocytes and gametes, having a male-specific function during the interaction with female gametes, and being indispensable for parasite fertilisation. The gene in plants and eukaryotes might well have originated from acquisition of plastids from red algae [3]. 19.70 19.70 20.20 26.60 19.40 19.00 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.41 0.72 -4.00 22 100 2009-01-15 18:05:59 2008-06-07 15:35:39 4 1 68 0 69 107 1 48.30 38 6.85 CHANGED hllspshhshsGt...pCsKIGVShpsapsp.s...sh.Cst..GoCLpsQLtcaap ..........hlLs+shhslsGh...-CsKIGVSapuFpsQ.s...sh.Cst.huoCLpsQLtcah...... 1 30 46 62 +10534 PF10702 DUF2507 Protein of unknown function (DUF2507) Mistry J, Coggill P anon PRODOM_PD089657 Family This family is conserved in Firmicutes. The function is not known. 30.00 30.00 30.80 30.10 29.60 29.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.66 0.71 -4.48 22 327 2009-01-15 18:05:59 2008-06-09 15:33:57 4 2 325 2 57 203 0 123.60 42 79.59 CHANGED ssFuhpLlR-hlLPslLGp-psslLYWAGKcLAR+aPlpohE-l.tFFppAuaGsLplhKpK+pphhFpLoGshlspRlpp..ppssFpLEAGFlAEplppppthssEuh..thp++..pppVhlpVph ...............shFuhpLlR-hLLP-lLGsDtssILYWAGKcLARKaPLpohE-lhpFFcpAuaGsLollccK+pchpapLpGsllspRhcp..p.ccssFpLEAGFIAEplQpQpshssEuh.tphc++..p.c.pVphhVp...................... 0 16 34 44 +10535 PF10703 MoaF Molybdenum cofactor biosynthesis protein F Mistry J, Coggill P anon PRODOM_PD122919 Family MoaF protein is essential for the production of the monoamine-inducible 30kDa protein in Klebsiella [1]. It is necessary for reconstituting organoautotrophic growth in Ralstonia eutropha [2]. It is conserved in Proteobacteria and some lower eukaryotes. The operon regulating the Moa genes is responsible for molybdenum cofactor biosynthesis. 22.40 22.40 22.70 22.60 19.20 22.30 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.64 0.70 -5.32 12 151 2009-01-15 18:05:59 2008-06-09 16:14:14 4 2 115 0 36 122 6 252.70 41 92.97 CHANGED sssstaIsVGALA-GFA.csplLsssssLuG+shsLphssGtshtpthssp........psLpWpthttt.................GpusYRAoplRsulYaVDalc.tpspt.SVSLVlDhpptphouVhGpLPsc..Atsp.sshsRshpsh.LTuVcspFhaGslss.htsus..huPTc-LIGhRshYpYSPoEsYEHIYLNssaYsWQCLpGsE+GLADsDRC+haKlA-pLYLFVWREKllPTLGVllIDLp...thRoDGKIFGYpsuDFushsNFslGAaupVLNpTpH ....................................................s.pssalpVGALA-GF..-sphLsssssLsG+slslt.....hssG..thh.ph.t.F..ss........psLpWcttpss.................GpssY+AoplRsslaFVDal.........c.sptss.olSLVlDhsptshouVhGplPsc..st.chsuhsRVtpsh.lTuVpstFhaGslss.h.tsus...u.Tc-LIGpRshYpYS..PoEsYEHIYLNssaYsWpCLsGsE+GLAD.V.......DRC+haKlA-sLYLFsWREKl.l.PTLGVllIDLp...ttRocGKlhGYpsuDhusluNFPlGAauplLNpTha....................... 0 3 10 23 +10536 PF10704 DUF2508 Protein of unknown function (DUF2508) Mistry J, Coggill P anon PRODOM_PD057080 Family This family is conserved in Firmicutes. Several members are annotated as being the protein YaaL. The function is not known. 20.40 20.40 20.50 20.90 20.20 20.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.30 0.72 -3.80 23 434 2009-01-15 18:05:59 2008-06-09 16:57:29 4 1 431 0 88 236 2 72.40 30 89.37 CHANGED M......Fh++K....scl++....-hD-cLlphlpcs+ccappp+pl.ppoh-..ss.p-lhhptKlscAKYhFLh+EA+pRplphp .............................................hhp+p.....cl+c.........p.hD.....p.c..Llphlccs+pchppt.+phhcps..h-....s..pps...lhph+lAcAKYhaLh+EA+pRtl+h.p...... 0 36 65 75 +10537 PF10705 Ycf15 Chloroplast protein precursor Ycf15 putative Mistry J, Coggill P anon PRODOM_PD014917 Family In some species of plants the ycf15 gene is probably not a protein-coding gene because the protein in these species has premature stop codons. Most of the members of the family are hypothetical or uncharacterised [1]. 25.00 25.00 25.60 39.40 21.10 18.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.25 0.72 -3.93 3 64 2009-01-15 18:05:59 2008-06-09 17:56:30 4 1 62 0 4 51 0 72.20 68 98.05 CHANGED ETLVSSIFWTLAPWNNMLLLKHGRIEILDQNTMYGWYELPKQEFLNSEQPEPITHYIKKFPLMKcIGPpcpp+..htphSchlllotssoNHahN .....................M..LLL..KHGRIEILDQNTMY..GWYELPKQEFLNSEQP..IhTT.KKa.lhhclsP.cppK..h...................t............ 0 1 3 3 +10538 PF10706 Aminoglyc_resit Aminoglycoside-2''-adenylyltransferase Mistry J, Coggill P anon PRODOM_PD012767 Family This family is conserved in Bacteria. It confers resistance to kanamycin, gentamicin, and tobramycin [1]. The protein is also produced by plasmids in various bacterial species and confers resistance to essentially all clinically available aminoglycosides except streptomycin, and it eliminates the synergism between aminoglycosides and cell-wall active agents [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.02 0.71 -4.65 3 186 2012-10-02 22:47:23 2008-06-09 17:57:47 4 1 128 4 26 138 5 140.20 38 79.89 CHANGED MNcLHIsLIHpIFAAADclNLPLWIGGGWAIDARLGRITREHDDIDLTFPGDR+AEFEsLlcthGGpITEQTDYGFLAplQGlLLDCEPAaasD-AYEIE-sPPGSCPhssEGVIuG+PVRCNSWEAILWDYFYYtDEVPpu-WPsKHlaSasLAC-SLG-ssVcsLRcQF+oR ............................llshh-ctslshWlsGGWulDAhLG+.TRcHcDIDlsFsuc+ps.cl..sllc.h.G....h..+.l......c...h..s...hh..ltc.t...th..lDhcPh.hss-.u...htts.st.u.u...p...........t.....p....s.hts+.l.C.s............................................................................................................................................................ 0 7 19 24 +10539 PF10707 YrbL-PhoP_reg PhoP regulatory network protein YrbL Mistry J, Coggill P anon PRODOM_PD110862 Family This is a family of proteins that are activated by PhoP. PhoP protein controls the expression of a large number of genes that mediate adaptation to low Mg2+ environments and/or virulence in several bacterial species. YbrL is proposed to be acting in a loop activity with PhoP and PrmA analogous to the multicomponent loop in Salmonella where the PhoP-dependent PmrD protein activates the regulatory protein PmrA, and the activated PmrA then represses transcription from the PmrD promoter which harbours binding sites for both the PhoP and PmrA proteins. Expression of YrbL is induced in low Mg2+ in a PhoP-dependent fashion and repressed by Fe3+ in a PmrA-dependent manner [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -10.95 0.71 -4.98 22 504 2012-10-02 22:05:25 2008-06-10 11:39:02 4 2 480 0 35 279 172 183.00 67 87.86 CHANGED lpLpppp.lupGspRhsYtHPtcssphlKVhpspt...................tsth+phtpElptYhplph.......+pth.hshls+haGhlpTshGhGhlh-hltDhsGshu.TLpphh...cpsthss.thtptLcphhphlh-scIlsp.-lpspNIVhtcpspup.....phhllDGhGstph...lPltshuphhs++plp+phc+hhpcht .............................................................I+LSEQoPLGTGRHRKCYAH...PED...A...p...RC..IKIVY+Ru...........................-.G.G.DKE.IRRE.LKYY.AHLu........RRLcDWSuIPRYHG..TVETDCGT..GYVYDlIs....DF..D.G....K.PS.ITLT.EFAt....QCRY.E..E.D...lA.....p....LRQ...LL....K....pL...KRYL....pDN+IVTM.S.LKPQN..ILC+RISESE....VlPVVCD.N...IGESTL....IPLATWS.KWCChRKpERlW+RFIAQP.A................................................ 0 11 18 27 +10540 PF10708 DUF2510 Protein of unknown function (DUF2510) Mistry J, Coggill P anon PRODOM_PD056443 Family This is family of proteins conserved in Actinobacteria. Many members are annotated as putative membrane proteins but this could not be confirmed. 20.90 20.90 20.90 20.90 20.30 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.26 0.72 -4.49 31 349 2009-01-15 18:05:59 2008-06-10 13:08:27 4 19 198 0 127 284 30 37.80 43 13.84 CHANGED sGWYPDPus....sp........phRaWDGppWTsph...pPh...Pussstss ..sGWYPDPsG....ss........thRaWDGspWTcps.............pPs.....st......t....................................... 1 48 96 123 +10541 PF10709 DUF2511 Protein of unknown function (DUF2511) Mistry J, Coggill P anon PRODOM_PD064657 Family This family is conserved in bacteria. The function is not known. 20.40 20.40 21.10 20.80 19.70 17.00 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.07 0.72 -3.64 14 641 2009-01-15 18:05:59 2008-06-10 13:21:12 4 2 562 0 53 212 2 86.40 68 76.30 CHANGED slo+hcaGc.cWsFopEEVtLpC+sGsAL.ashNsuT.hpYPLN-lAppphct.Gp..upsIssIhlDDPs....pPG...................pKhS.LsPal-cA.pLC .........TVSRFEVGKDKWAFNREEVMLTC..R....P......G...........N......A....L...YV..IN..PSTLVQYPLNDIApppVAo.GKTcAQPIuVIQIDDPs.....sPG.......................EKMS.LAPFIERApKLC.......................... 0 7 16 31 +10542 PF10710 DUF2512 Protein of unknown function (DUF2512) Mistry J, Coggill P anon PRODOM_PD032002 Family Proteins in this family are predicted to be integral membrane proteins, and many of them are annotated as being YndM protein. They are all found in Firmicutes. The true function is not known. 23.70 23.70 23.90 25.00 23.50 23.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.93 0.71 -4.71 20 314 2009-01-15 18:05:59 2008-06-10 13:55:09 4 1 198 0 59 209 0 127.60 34 92.27 CHANGED M+..HltALslKalhshslLhllLshhassoFscllhlollLolloYhlGDLalLPR......huNhsAoluDFGLualllWlhuhhhhs..sshsluhuollSAllluluEhFFHtYhhcplls.....p.tp.....phpapTEhu-E .....................Mp.HhhsLllKhhhhhhllhlh...Lsl.hhs..ho.hspllhholhlohsuYhlGDhhILs+........hGNhsAohuDhsLualslWlhs.hhhs..sshpIuhuu.l.l.uA.l.llul.uEhaFHtahpppshp.....p.pt.t.....t.tathEhu-E.............. 0 21 44 49 +10543 PF10711 DUF2513 Hypothetical protein (DUF2513) Mistry J, Coggill P anon PRODOM_PD457411 Family This family is found in bacteria. The function is not known. 29.40 29.40 29.40 31.40 29.00 29.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.39 0.72 -3.94 27 250 2009-01-15 18:05:59 2008-06-10 14:04:32 4 2 231 0 43 168 3 102.10 31 83.09 CHANGED KhDh-llRclLLplEst.h...............tt.thstasp-..pl.YHlthL..p-AGllpuphpth............st..........hltpLTasGH-FLDslRcsslWpcsK.phtppsu...uhol ....................................KhshDhlRclLLclEsptphspsl...................pshth.spash-......slhYplthL..p-Ashlssp.htht................st.hh.h........hlpclTasGH-FLDsIR-spsWpcsK.phtsKsssho................ 0 14 25 36 +10544 PF10712 NAD-GH NAD-specific glutamate dehydrogenase Mistry J, Coggill P anon PRODOM_PD017095 Family The members of this are annotated as being NAD-specific glutamate dehydrogenase encoded in antisense gene pair with DnaK-J [1]. 19.10 19.10 27.00 26.90 18.80 18.50 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.41 0.70 -6.24 21 144 2009-01-15 18:05:59 2008-06-10 16:57:01 4 3 117 0 30 146 1412 408.70 35 78.00 CHANGED hsLptshptscsthDthhshshcLVAhlhpcLL.........GtVcpsluLVhuhcpLssLLVhhGVthGlLcHhlDlhlspsstuL.DtDLLhLsGuLVLGtcVcDAVGVDVEGchDLRHuARstRsshpVELs-cLVVtpHhsLsLEcsDtHutLVVhsGtEcLsLLGRDpuVAlDQsG-Hssp+hDAcRQRuHVEQQHVLcV.....ALQssuLDsuAcucsFVRVcshVRLLA.EElhHhhhDLtHsGhsADpcclVDlsttpAuVLpptLsRL-psL-plhcpuFpLGAuphcscV..LcstsltpDctpVDhGLhttRphDLtLhspFLpsLQsphVlsQVDullhLELsspVVD-spVEVFTAcEtVAVGshHFEpA....lsDhpDGcVEusAAcVlDRDthshh..LVcslGpRupGRLVDDspchcsuDhAGVLGsLTLuVVEVuRpGDDpltchhApluhGuFLHLhQccstcLtRplhLA....hphDPsVAlsulsDh.tpphhVLhphtVscusADQALctcpGVhRVtcsLsLuRLscpshsllscscDRRtGutAFsVhDchplsAl.HDucAuVGsscVDTssFsH .........................................hp..hph.pthhs....hh.hphlu..h.....hhphhh..........tthpphhtLlhthtthhthhVhhulhhslhpHhhshhhhpsttsl.cuDllhhsuhhlhttplpDsVulDlEuphDLRpsu+pthss.plEhspphVlttph.shsLpch-t.ttLlVhtttEtLthhutDttVhhDphtccss.........tthsspt...pttpVpQp.lhtl.....shppssLpttspspshltVpshsthhs.cch.p.hhphhHsuhsspppphVsht.hpsslhpt.htthptshcplhtphhphtstph.hph..hts.t...tts.tplchGhhtttphshthhsthhp.sLptphlh..hplpshhhhchhtp.hDps.lplhssp.tlshut.phcth....hschpptplEtssspV.spc.hhh....h....lpsluptstuthVDDs.phpssshsulhGsL...sLtlV.cVst.tDstht.hhsplhhuthLchhpc.ttsLhtt.hLs......hphc.tlshh..tph.h..h.lhh.hhlh.hssspAhsttpushtVtctLshstlss.pshshht.ssctRtushshtlhpphthhsl..+st.stVutsplsss.ht................................................. 0 6 13 28 +10545 PF10713 DUF2509 Protein of unknown function (DUF2509) Mistry J, Coggill P anon PRODOM_PD077226 Family This family is conserved in Proteobacteria. The function is not known but many of the members are annotated as protein YgdB. 25.00 25.00 26.00 25.80 20.90 20.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.98 0.71 -4.40 10 534 2009-09-10 15:07:53 2008-06-10 17:00:33 4 1 521 0 38 192 1 120.60 58 89.88 CHANGED hLLlLG.LLLpGlppQLDuhhthsusEpptL+AastApSALsWGpuQsWutpst..........WpCpphsp.sh+uCLRhhSsuphlllcGput......slpLaQ.......pGssssssllhssHGW.DFCP.K-suhCp.hs ...................hLLlLGSLLLQGhsQQpcSaAuRVohESpuLRRQAlVQSALtWG.+.hpsWpspss....................hQCpp.Yu.....u.....osARVCLRlLu-sEslLlAGh-G.......VoLWR.........TGcV.I-GsI.VFSP+GWSDFCPLKEtALCQlP...... 0 2 9 21 +10546 PF10714 LEA_6 Late embryogenesis abundant protein 18 Coggill P anon Covarrubias A Family This is a family of late embryogenesis-abundant proteins There is high accumulation of this protein in dry seeds, and in the roots of full-grown plants in response to dehydration and ABA (abscisic acid application) treatments [1]. This LEA protein disappears after germination. It accumulates in growing regions of well irrigated hypocotyls and meristems suggesting a role in seedling growth resumption on rehydration [2]. As a group the LEA proteins are highly hydrophilic, contain a high percentage of glycine residues, lack Cys and Trp residues and do not coagulate upon exposure to high temperature, and for these reasons are considered to be members of a group of proteins called hydrophilins [3]. Expression of the protein is negatively regulated during etiolating growth, particularly in roots, in contrast to its expression patterns during normal growth [4]. 20.50 20.50 21.30 51.70 20.10 19.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.71 0.72 -4.17 6 28 2009-01-15 18:05:59 2008-06-10 17:33:36 4 2 14 0 17 26 0 80.70 48 71.36 CHANGED SEcc..psp.-sLPhEsSPYscYcDLEDYKppGYGTpGHQEPKsG+GuGuTDAPT.SGu.hpucutsou....TDAtNp+usP .........p.....ppscpptLPh-sSPYlpYcc...LEDYKh+uYGscGH.pPK.sG+GGGu.TDAPTlSGssh..........ucutsus.........sDAhNpps.................................. 0 3 9 13 +10547 PF10715 REGB_T4 Endoribonuclease RegB T4-bacteriophage encoded Mistry J, Coggill P anon PRODOM_PD091708 Family The RegB endoribonuclease encoded by bacteriophage T4 is a unique sequence-specific nuclease that cleaves in the middle of GGAG or, in a few cases, GGAU tetranucleotides, preferentially those found in the Shine-Dalgarno regions of early phage mRNAs. Phage RB49 in addition to gpRegB utilises Escherichia coli endoribonuclease E for the degradation of its transcripts for gene regB. The deduced primary structure of RegB proteins of 32 phages studied is almost identical to that of T4, while the sequences of RegB encoded by phages RB69, TuIa and RB49 show substantial divergence from their T4 counterpart. 27.40 27.40 27.60 28.30 27.20 27.30 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.62 0.71 -4.10 14 136 2012-10-03 00:09:25 2008-06-11 13:56:22 4 1 74 1 0 99 0 142.10 24 90.69 CHANGED Mp..........c.hphaphpatph.sphtchpcs.........tthsp.Ft...........lhap.p.hsp.h.+phctcashthhpplhspl...ph.t.huh...........cthphpsthh........ElpcGshhlulsssp.s.sh...tulththtlhsptph.tphshhhl .....................p.t..h.hpp....c.hp.at.patph.sphtctscp........tslu.tFt...........lhappphhsc..sl...+..phccpashthFpclpspl..hph.....phluh...st....c....hlcshcapstph........Elp.....c....Gslhhu.h.osspss.ph...psuhphthsllpptph.tptphh................ 0 0 0 0 +10548 PF10716 NdhL NADH dehydrogenase transmembrane subunit Mistry J, Coggill P anon PRODOM_PD026898 Family The NdhL family is a component of the NDH-1L complex that is one of the proton-pumping NADH:ubiquinone oxidoreductases that catalyse the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. NDH-1L is essential for photoheterotrophic cell growth. NdhL appears to contain two transmembrane helices and it is necessary for the functioning of though not the correct assembly of the NDH-1 complex in Synechocystis 6803. The conservation between cyanobacteria and green plants suggests that chloroplast NDH-1 complexes contain related subunits [1]. 25.00 25.00 53.50 53.50 22.20 21.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.02 0.72 -4.21 24 91 2009-09-10 15:53:21 2008-06-11 14:19:09 4 1 85 0 38 89 84 79.60 44 78.07 CHANGED M..................................hhs.l..............psllVlhsYsuLuuhYLlVlPlhLahWhNpRWashuphERhhhYhLVFLFFPGhlLhAPFLNhR.psps. .....................h..........h...............phhhlhlhYhsLushYLLVlPhhlahahppRWYhtuphERhhhYhLVFhFFPGllLhuPFLNFR.ps+p.. 0 6 24 35 +10549 PF10717 ODV-E18 Occlusion-derived virus envelope protein ODV-E18 Mistry J, Coggill P anon PRODOM_PD579825 Family This family of occlusion-derived viral envelope proteins are detected in viral-induced intranuclear microvesicles and are not detected in the plasma membrane, cytoplasmic membranes, or the nuclear envelope. The ODV-E18 protein is encoded by baculovirus late genes with transcription initiating from a TAAG motif. It exists as a dimer in the ODV envelope and contains a hydrophobic domain which is putatively acting as a target or retention signal for intranuclear microvesicles [1]. 23.10 23.10 23.10 62.90 23.00 23.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.20 0.72 -4.54 18 56 2009-01-15 18:05:59 2008-06-11 16:54:42 4 1 53 0 0 50 2 83.90 47 99.70 CHANGED MDshRss.......ssssth...phsslNPNhLMTILIsLVIIILLIhLFQ.SSsussSu....ssssptu..ahNPLNATMRsN.....PhV.NTsQRphL ....M-.hRss.......sssss..stplstlsPNhhMTILlsLVIIILLIlLFQSSSsussSu........s..s..........sssphuFhNPLNATMRsN.....PFV..NTsQRph.......... 0 0 0 0 +10550 PF10718 Ycf34 Hypothetical chloroplast protein Ycf34 Mistry J, Coggill P anon PRODOM_PD019546 Family This family is of proteins annotated as hypothetical chloroplast protein YCF34. The function is not known. 25.00 25.00 44.20 44.10 16.40 15.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.81 0.72 -3.59 23 81 2009-01-15 18:05:59 2008-06-11 17:13:16 4 1 79 0 31 81 84 75.60 54 91.41 CHANGED MCICVsCpaVDRCpTYHsVEpQHt.sHLopsPDFcPppPpIHVslh..s..ssthtlEWDVhuCpSFhp-.G+WsRLRP .MCICVNCcaVDRC.TYHAVEpQHpp..s...HLo-...sPcF-Pp..p..PsIp.VNI+..........s.p....sstlchEWDVluCpSFhpEhGKWu+LRP....................... 0 8 22 30 +10551 PF10719 ComFB Late competence development protein ComFB Mistry J, Coggill P anon PRODOM_PD066657 Family This family is conserved in bacteria. Some members, with three conserved cysteines, are annotated as late competence development protein ComFB. 25.00 25.00 28.70 28.60 20.30 19.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.71 0.72 -4.24 60 450 2009-01-15 18:05:59 2008-06-11 17:24:40 4 1 402 0 154 351 11 83.20 28 65.09 CHANGED l+NahEclVhctlpp......tphpp..t.tspcslsDlsslALNpLPP+Ylppchshh.ahh.spptcpphcpplhsAlpcAhphVpppspc ..........l+NhhEplVhph.lsp.h...........ph..p......t.hspcpls..DlsslALNpLPPhYlppchshh.hth..sptthtphcsclhtAlppAhthltps.p................................ 0 52 109 135 +10552 PF10720 DUF2515 Protein of unknown function (DUF2515) Mistry J, Coggill P anon PRODOM_PD131865 Family This family is conserved in Firmicutes. Several members are annotated as YppC. The function is not known. 25.00 25.00 28.30 28.20 21.40 20.90 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -11.93 0.70 -5.52 19 260 2009-01-15 18:05:59 2008-06-12 09:26:28 4 2 151 0 33 210 0 311.30 50 86.73 CHANGED llptIcccTchtNpDNISRTpAYhpaY.RaPEI+WuhLAoMVSRNuGWsMTDL+GphapplLsppppcthFLhYERANWlIFpDAYPQLLLYE.S++pspPLFHLLshFpVSpFMppEWp+FWcctspcRLhhALIINEQNhIQpPlIppsha++pVFcol.Fhlp-hhHFssVlFPshcG.......tLaGholpcFpslccRIpLGKpLupLLFcsch.hsphhcFAhpssHTGSRhDY.pahh.......................Gsp+htSPtLRpsasslsHphssp.pDWFpcpt..shhh.hppp.scphclT-hYh+Kpcplphhhhlpc ......llppIKcpTchhNtsNloRTpAYhpaYhRasEI+WAhLupMVSRNuGWNMTDL+GchYsplLscpsppphFhhhERuNWLIFpDAYPQLLLYEpSp++ppsLFHLLsahNVStFMEp.WphF..Wc.................ptst.ppLhhALIINEQNhlpK.VIpNsaFKKpVhpohhFKLp-hhphspllFPhh.Es...................hLaG.olppFpoLpcRItLGK+LhuLL.F+spa.huphhpaAtppsHTGSRtDY.salh..........................suh+haSPsLp.sa.shtHcEhc....h.cDWFscht..shha.hccE..phpspITE.Yp+phEpIthA.lsp.+................................ 0 8 20 21 +10553 PF10721 DUF2514 Protein of unknown function (DUF2514) Mistry J, Coggill P anon PRODOM_PD034813 Domain This family is conserved in bacteria and some viruses. The function is not known. 24.70 24.70 24.80 24.90 24.60 24.60 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.01 0.71 -4.54 17 309 2012-10-02 17:03:51 2008-06-12 11:24:49 4 3 255 0 30 214 1 150.60 41 93.15 CHANGED WhhhhhlllLshs.hhsh....a...ttGpphpstsatpchA..........cccus....pt.stltspstARtcEQcRptAtscstpcAppctstApAsAssAsAuuspLRppsscL....suuppt.ssssuAsspupsAucsuhVLu-lLu+usppAtpLActuDcuplAG.sCE+tYDulpt ...........................................................h...hhhllhhshh...G.........a....hhGsshuDp..uWppK.WA..........-RDuu......t.oppl.ssphuA.Rh.hEQtRphApDEssKDAQpctAchp..AcAAs...hus.uss..pL.Rs-up+h....lsA..A+c..sushAAAspuKospsstshLsshLG-hstcApha.AchADcpa..hAGhTCpphY-olp.s............................................... 0 3 8 19 +10554 PF10722 YbjN Putative bacterial sensory transduction regulator Mistry J, Coggill P, Bateman A anon PRODOM_PD093695 Domain YbjN is a putative sensory transduction regulator protein found in Proteobacteria. As it is a multi-copy suppressor of the coenzyme A-associated temperature sensitivity in temperature-sensitive mutant strains of Escherichia coli the suggestion is that it both helps CoA-A1 and possibly works as a general stabiliser for some other unstable proteins [1]. This family was expanded to subsume other related families: DUF1790, DUF1821 and DUF2596. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.20 0.71 -4.10 185 1845 2012-10-01 22:01:34 2008-06-12 12:57:55 4 8 1501 2 429 1139 363 127.10 20 69.51 CHANGED sh-hlpphlpptshp.....hph.....tp-.spts..............ltsphpshthhhhhts..........pspthth.......shsh.hphs................pptsthhphlsphNpphhhs+h..hh.pppsthh.....hchslsl.....ttulospplpphlptshptsppahstlpt .....................................................................phlcphlpphshp.....a.p....sss.spuh............hhsphpsh.p..hhhshp............-h.s.hl....hhu.h.h.tcl................sssh.slhthLsthNt...pt.hhs+h...hl..ctp..scsh...........lptslsl......tGl..o....c..p.h..t..hhlpps.cthphhh.....t.................................. 0 128 285 369 +10555 PF10723 RepB-RCR_reg Replication regulatory protein RepB Mistry J, Coggill P anon PRODOM_PD763888 Family This is a family of proteins which regulate replication of rolling circle replication (RCR) plasmids that have a double-strand replication origin (dso). Regulation of replication of RCR plasmids occurs mainly at initiation of leading strand synthesis at the dso, such that Rep protein concentration controls plasmid replication [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.76 0.72 -4.25 15 388 2012-10-02 18:44:02 2008-06-12 13:18:45 4 3 310 2 20 199 17 75.20 41 85.33 CHANGED M.....SQhtNAVTSSsssKRtYRKGsPh........osAE+Q+suluRK+tTHKclpVFlpNtLK-pLhplCcccGlTQAEhIEcLIcpEhscps........p ...............................M.SQhtNsVTSSs..KRhYRKGpPl.........osuE+QphuluR..K....+so......+K.....clpVFlpsphKshLtphCcccGlTQAEhlcc.LIcpEhtth..s.............. 0 3 7 14 +10556 PF10724 DUF2516 Protein of unknown function (DUF2516) Mistry J, Coggill P anon PRODOM_PD057095 Family This family is conserved in Actinobacteria. The function is not known. 25.90 25.90 27.00 26.70 24.50 25.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.35 0.72 -4.01 22 270 2009-01-15 18:05:59 2008-06-12 13:23:55 4 2 269 0 89 211 12 99.70 35 97.19 CHANGED M...............hlhsltshlhhlLtlsshssulaAhlcuAhpRsDAFsAAsKtTKshWLsILGlulhlhllhh........shlshhul.luslAssVYlsDVRPtlcplp.................hssW ........................h.thlhhlLhlssllsulhAhVcAAhpRsDAasAADKhoKshWlsILGsAs.hlshlh......................sslsh.Luh..lu...hlAsuVYllDVRPpltplp...............G.....h....................... 0 25 67 85 +10557 PF10725 DUF2517 Protein of unknown function (DUF2517) Mistry J, Coggill P anon PRODOM_PD055257 Family This family is conserved in Proteobacteria. Several members are annotated as being protein YbfA. The function is not known. 25.00 25.00 41.40 41.20 20.70 19.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.18 0.72 -4.29 11 540 2009-01-15 18:05:59 2008-06-12 13:48:29 4 2 535 0 42 123 1 62.40 80 92.11 CHANGED hYpsYPhapIlLRRhhVlLsGlLALPVMLFh..+DRARFYSYLHRVWsKTSDKPVWLpQuEpusp ...LY+-YPAalIFLRRoaAVsAGVLALPhMLFW..KDRARFYSYLHRVWSKTSDKPVWMsQAEKAT.s... 0 1 10 24 +10558 PF10726 DUF2518 Protein of function (DUF2518) Mistry J, Coggill P anon PRODOM_PD032672 Family This family is conserved in Cyanobacteria. Several members are annotated as the protein Ycf51. The function is not known. 25.00 25.00 25.20 133.60 21.30 20.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.70 0.71 -4.66 26 73 2009-01-15 18:05:59 2008-06-12 13:59:10 4 1 73 0 29 78 119 145.00 41 86.59 CHANGED M..shsphltpsstWluauulshulLTllAFlhpWGlRFRLVGlTuFolLLosusaAFulua.ps.stl-GAlphslVaDNGsshlVspsssshsspslpsTLcQlAuNL+usGRsus....pVplRlRplppspsGlScPllLGElpRs ........M.hst.hhshspWhuhuolshslLTllAFlh+WGlRFRLVGlTuFhhlLosuhauhuluh.hs+spI.GAl+aslVYDNGusplVlslssslstsplEATLcQAAsNLhShGRsus..ptplsIRhRslhHspsGlScPlhLGclpR.... 0 5 19 27 +10559 PF10727 Rossmann-like Rossmann-like domain Bateman A anon Bateman A Domain This family of proteins contain a Rossmann-like domain. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.57 0.71 -4.41 8 557 2012-10-10 17:06:42 2008-06-12 16:45:08 4 5 547 4 188 1095 262 118.70 31 39.88 CHANGED MptP...........p.sRLpVGIlSAGRVGsALGtAL-RAGHsVsuloAlScAS+pRAppRLPssslhsl.-lsc+uELllLAVPDA.ELsGlVpGLAsstssRsGpIVsHTSGApGlsILAPLscpGsIPLAIHP .....................h.....................plGlIG.A.G+VGssLuhsL......p.p........s.......s....a..................l.....s.........u....s.....t........u....h....S.........p......s.........S......t.....p.....c.....A....t.....p.......h.....l..s....s............s.......s.......h............h.......s...............t....-...l.......s.......s..p...u..-.LlllsV.P.Ds...tl.ss.l.s....p....t...L...s.....t..t...t.....hp........s...G...p........l.ls.H..s..SG...A..h...u..ss.lLsP.hpptGu..hshulHP...................................................................... 1 69 144 174 +10560 PF10728 DUF2520 Domain of unknown function (DUF2520) Bateman A anon Bateman A Domain This presumed domain is found C-terminal to a Rossmann-like domain suggesting that these proteins are oxidoreductases. 22.20 22.20 22.40 31.20 21.90 22.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.46 0.71 -4.70 98 840 2009-09-11 11:03:44 2008-06-12 16:53:23 4 12 824 6 260 779 206 129.80 28 45.49 CHANGED ssshulEu...sppshslhp.plspplGscshhl.sscpRshYHsAAVhuuNalssLhshutclhpps..Glst.......................chLtPLlcsolcsh.hphGs.pALTGPls......RGDts.....TlppHlpsLpp.........sphtplYptLucthh ...............sshsl-u...sptsh.th.hp....sLstplGscshtl...spp.pRhhYHAAushuuNalsslhs..uhclLppt....Gls.......................................................................chLhPLlpsslcsh.hp.....p............G......p...............ALTGPls......RGDts.....slppHLphLps............sphtphYphhupth.h................................ 0 101 200 242 +10561 PF10729 CedA Cell division activator CedA Pollington J anon PRODOM Family CedA is made up of four antiparallel beta-strands and an alpha-helix. It activates cell division by inhibiting chromosome over-replication. This is mediated by binding to dsDNA via the beta-sheet. [1,2]. 25.00 25.00 79.50 79.40 19.80 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.74 0.72 -4.43 3 438 2009-01-15 18:05:59 2008-06-19 16:21:05 4 1 434 2 15 70 2 79.30 91 97.33 CHANGED lMKPLRQQNRPIISYVPRVEPAPPEHAlKMDuFRDVWhLRGKYVAFVLMGEHFRRSPAFSVPESAQRWANQIRQEGEIpE MKKPLRQQNRQIISYVPRTEPAPPEHAIKMDSFRDVWMLRGKYVAFVLMGESFLRSPAFTVPESAQRWANQIRQEsEVsE. 0 1 1 8 +10562 PF10730 DUF2521 Protein of unknown function (DUF2521) Pollington J, Finn RD anon PRODOM Family Family of unknown function specific to Bacillus. 25.00 25.00 115.60 115.00 19.30 17.80 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.90 0.71 -4.38 10 131 2009-01-15 18:05:59 2008-06-19 16:34:35 4 1 131 0 18 56 0 143.40 62 99.77 CHANGED MsVIsSFs-++REKQlcaEKplLRELSLcplppul+caFtsl.asFhpphpshlp-uCIDhAIEAYLLGu+aG+FGYYGEshpclptRstcEEccLscsLasaLpsWup.tpsptsp-sLYtAscpFIssWWpEGFpcucKRaKLRLH ...MNVIVSLpEKQKEKQLKYERKMLRELSLKTLRoNIRDAFp......MQELHRQYEDYCIELGIESYLLGARYSKFGYYGESFFDVKYRALEEEQQLTETLFQFLTSMThREIcLpDEELLFESCQQFIGhWWQEGYEKGERRYRLKLH. 0 3 10 12 +10563 PF10731 Anophelin Thrombin inhibitor from mosquito Coggill P anon Rawlings N Family Members of this family are all inhibitors of thrombin, the peptidase that is at the end of the blood coagulation cascade and which creates the clot by cleaving fibrinogen. The interaction between thrombin and fibrinogen involves two different areas of contact - via the thrombin active site and via a second substrate-binding site known as an exosite. The inhibitor acts by blocking the exosite, rather than by interacting with the active site. The inhibitors are from mosquitoes that feed on human blood and which, by inhibiting thrombin, prevent the blood from clotting and keep it flowing. 25.00 25.00 27.70 105.10 19.80 18.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.09 0.72 -3.87 4 8 2009-09-14 14:04:23 2008-06-19 16:55:34 4 1 5 0 1 8 0 64.60 58 71.31 CHANGED MAoKLlVIAhLClALlA.lVQuAPQYApG-EPoYDEDD.s-EslpPHSSSssD-s.--FDsSLL-c MAoKLFVIAhLClALVA.lVQuAPQYApG-EPoYDEDD.s-EsLpPHSSSsoD-s.-EFDsSLL-c. 0 0 1 1 +10564 PF10732 DUF2524 Protein of unknown function (DUF2524) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillaceae bacteria. 25.00 25.00 73.70 73.50 21.50 20.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.92 0.72 -3.86 8 132 2009-01-15 18:05:59 2008-06-23 11:22:36 4 1 132 0 18 52 0 84.00 70 93.53 CHANGED MATRQSV-EaLQ+sEpAl-aApEQacpAp+QEHYN-pEYS-AQhhLEsAVN-Ls+LshSAN-QQREQLaRhRLQLQpLQNpMIL .MAERQSLEsYITQAEQAVEYAKEQL-pGMRQEHYNTMEYSDAQLQLEQAYNDLQsMQQHANDEQREQLNRARMAIRQLQHQMII. 0 2 10 12 +10565 PF10733 DUF2525 Protein of unknown function (DUF2525) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. The family has a highly conserved sequence. 21.40 21.40 21.60 24.40 21.30 18.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.64 0.72 -4.13 4 435 2009-01-15 18:05:59 2008-06-23 11:30:14 4 1 433 0 18 61 0 57.80 90 76.89 CHANGED DVDALLAAINEISESEVHRT.-Dsp+sslDGRchHTaRELAEAFELDIHDFSsSEVNR .DVDALLAAINEISESEVHR..S..QND..SE..HVSVDGREYHTWRELADAFELDIHDFSVSEVNR... 0 1 1 11 +10566 PF10734 DUF2523 Protein of unknown function (DUF2523) Pollington J, Finn RD anon PRODOM Family This is a family of phage related proteins whose function is uncharacterised. 21.30 21.30 21.30 21.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.32 0.72 -3.75 28 269 2009-01-15 18:05:59 2008-06-23 11:32:35 4 1 213 0 36 185 2 82.70 23 83.97 CHANGED tsLhshLtslht.lls+l..LsulGluhsohsGlsshhsthht.htshhsulPs....sllslluhhGlspuluIIhuAlshRlshpsl ....................................h..LhshLh.lht.llh.+l........lhulshhhhs.h....ssl.thhs........hh....shlpsthsulPs...........shhthlhhhGlspuLshlhuAhsh+huhp..h...... 0 8 20 28 +10567 PF10735 DUF2526 Protein of unknown function (DUF2526) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function is restricted to Enterobacteriaceae. The family has a highly conserved sequence. 25.00 25.00 49.60 49.50 20.50 19.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.51 0.72 -3.99 3 445 2009-01-15 18:05:59 2008-06-23 11:37:21 4 1 444 0 16 60 1 76.60 90 99.99 CHANGED MSHLEEVpsRVDAAIAEuVIAHMNELLIALSDDAQLSREERYsQQQRLRTAIAHHGRQHKEDpE....ARREQLTKGGoIL MSHLDEVIARVDAAIEESVIAHMNELLIALSDDAELSREDRYTQQQRLRTAIA.HHGRKHKEDME....ARHEQLTKGGTIL... 0 2 3 10 +10568 PF10736 DUF2527 Protein of unknown function (DUF2627) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to a family of Enterobacterial proteins. It has a highly conserved sequence. 25.00 25.00 29.40 48.80 16.40 15.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.80 0.72 -4.34 2 354 2009-09-11 09:21:57 2008-06-23 11:38:56 4 1 354 0 9 18 0 37.80 95 81.88 CHANGED MCGIFSKEVLSKcVsVEYRFSA-PYluASsSNsSsLSM MCGIFSKEVLSKHVDVEYRFSAEPYIGASCSNVSVLSM... 0 1 2 4 +10569 PF10737 GerPC Spore germination protein GerPC Pollington J, Finn RD anon PRODOM Family GerPC is required for the formation of functionally normal spores. The gerP locus encodes a number of proteins which are thought to be involved in the establishment of normal spore coat structure and/or permeability, which allows the access of germinants to their receptor [2]. 22.40 22.40 24.20 23.30 22.10 22.30 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.15 0.71 -4.50 8 149 2009-01-15 18:05:59 2008-06-23 12:10:43 4 2 142 0 20 99 0 167.80 53 85.78 CHANGED IppLE+plpELQpElspLKp+PuTsI-RIEYKFDQLKIEpLEGTLNIGLNP.oDsp.slEsFpV.ssssssluhhpQ-pssplhppIcQpV-tYLsEEsPplLcpLEppY-spLD-o.+paIlEDI+KQhDSRIcYYlpphtpcpsssPspc....t-cIAptVK+DIp+Al-pFLpHIPs ......................................IhsLEcQV+pLQcELNEL..KsR....P....So....SIsKVEYK..FDQLKVEsLsGTLNIGLNP..uc..sp...pIEDFpV..-sETl.cVs.............PEs...-ssPcaaQ........sIhQchacYL-EEAhscIh+hEpcp.cosLDEhYRQhhl-DIKKQM-cRlsYYLSQsp...shEsh........sossc..hlc-hIlQthKpDI-+Ah.uFIpHIPu................. 0 4 12 14 +10570 PF10738 Lpp-LpqN Probable lipoprotein LpqN Mistry J, Coggill P anon PRODOM_PD017067 Family This family is conserved in Mycobacteriaceae and is likely to be a lipoprotein [1]. 21.60 21.60 22.00 22.20 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.27 0.71 -5.01 19 257 2009-01-15 18:05:59 2008-06-24 13:21:36 4 3 90 0 49 152 0 179.40 35 72.00 CHANGED ssssP..................................lspaLcppGVshps.sPpshsuLslolPhPsGWpshssPNlssuhslI.....upuhhhssApllVaKLhGsFDPscAIp.HG.sDuQth.sac.phsAShAsasGFPSShIEGoYc.sGhphpo.pRhVIATu...usp+YLVpLoVT.ohsspAs.stusss-AIlsGFpVus ...............................................st..shP.........tholt-YlcspuVphpPlpsss.su.slslPhPssWp...hs..sss..lsssassIs.p...usus.....sPsA.hllVhKLp.G.DhD.PAcslp.+uss-.uppLsuap...sssuShAsasGFPSu..h...lpGoYcp.s.Gh....p.hpsupRpVlssu.....sssp.YLVpLs..lT.shsspus.shusAscsIspGhpls.s...................................................... 0 5 27 42 +10571 PF10739 DUF2550 Protein of unknown function (DUF2550) Mistry J, Coggill P anon PRODOM_PD031809 Family This family is conserved in Corynebacterineae. The function is not known though most members are annotated as either secreted, or membrane, proteins. 20.60 20.60 20.70 22.10 18.00 20.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.59 0.71 -4.19 20 360 2009-01-15 18:05:59 2008-06-24 13:38:10 4 1 358 0 88 232 1 130.70 34 85.61 CHANGED sLslllssllhlh..lhRhhlhRphGuhssslR.....hsssss+GWphGlhRYssscLcWaRLhSLpstPchslsRpul-lhsRRsPpusEhhhlssssl.llc........lps....cs.sphElAhstsAhTuFhSWLESuPssp ..........lsslllhh.hhhh....hhRhh....hhRp.GuhssslR........sh.s.....uss.spG.WphGssRYs.ss.phcaYRlhSh+hhPshhLpRpulclh..s+..RsPp.s.-Ehhhhsssh..h..llp........lps...................+s.sshElAlDtsulTuhhSWLEuAPss.t....................... 1 28 67 84 +10572 PF10740 DUF2529 Protein of unknown function (DUF2529) Mistry J, Coggill P anon PRODOM_PD099482 Family This family is conserved in the Bacillales. The function is not known. Several members are annotated as being YWJG, a protein expressed downstream of pyrG, a gene encoding for cytidine triphosphate synthetase. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.08 0.71 -4.83 14 343 2012-10-02 15:05:26 2008-06-24 13:41:14 4 1 343 2 29 139 0 168.40 53 97.13 CHANGED M.KIFoTQLoGlFsRIt-KEp.uIEDuARLLAQAllG-GplYltGhsEhpulthpAhputEshsSutsLs.sss...plssoDRVLlFsphssDtEshthscpLh-pGlshVslS.sstpcssslsphsclHIDhplptsLlPsED.GsRhGaPushsuLYlYauLphslcEhL ...............................................MSKILsTQLhGIFNRl.EKQpL-IpMAAQsLlQ.AIGGEGaVYlKGYcDLpaaEoa.....lLaScE+LKSS++Lc.sl...pchpEIDSTDRVLLFuPFYsD.pVshDlpKLl-hDlDlVLIS....N.......pPKT-....DhP-HLsHaIDLSTPRPIVYTED..Y.DKIlQPHsMAhNYlYY-IYTQMlEMs......................... 0 7 16 23 +10573 PF10741 T2SM_b GspM_II; Type II secretion system (T2SS), protein M subtype b Mistry J, Coggill P, Desvaux M anon PRODOM_PD110875 Family The T2SMb family is conserved in Proteobacteria and Actinobacteria, and differs from the T2SM proteins in Vibrio spp. (Pfam:PF04612). 21.50 21.50 21.50 21.50 21.40 21.20 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.08 0.72 -4.44 28 187 2012-10-02 17:03:51 2008-06-24 13:57:02 4 3 172 0 71 217 26 108.80 24 55.89 CHANGED tusssaltusssshAuAsL.p+lpshlsps...GsslsooQhhs..sspup.....hsplulplshcsshssLpplLhsLEsupPhLaVDpLslps.......shsssssups..Lplphsluuhh ........................t.t..al.st.ssshAuA.sL.pclpphlsps...Gs..ssttsphhs..hp...s.psp..............hsplslplplps....sh..tsLtslLhsLEstp.PhLhl-pLslpt..............phttssstts.tLplthtltuh........................... 0 24 42 58 +10574 PF10742 DUF2555 Protein of unknown function (DUF2555) Mistry J, Coggill P anon PRODOM_PD060530 Family This family is conserved in Cyanobacteria. The function is not known. 25.00 25.00 74.40 74.30 24.40 19.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.62 0.72 -4.34 21 69 2009-01-15 18:05:59 2008-06-24 14:57:06 4 1 69 0 28 62 107 56.90 55 77.10 CHANGED lot-plssFscpslApLApRLE-DDYssPF-GLpDWHLLRAlAhpRPELspPYlHLl ....otcclsuhTpp-VApLApRLEpDDYssPF-GLpDWHLLRAlAFpRPELscPYlHLL. 0 5 18 26 +10575 PF10743 Phage_Cox Regulatory phage protein cox Mistry J, Coggill P anon PRODOM_PD064131 Family This family of phage Cox proteins is expressed by Enterobacteria phages. The Cox protein is a 79-residue basic protein with a predicted strong helix-turn-helix DNA-binding motif. It inhibits integrative recombination and it activates site-specific excision of the HP1 genome from the Haemophilus influenzae chromosome, Hp1. Cox appears to function as a tetramer. Cox binding sites consist of two direct repeats of the consensus motif 5'-GGTMAWWWWA, one Cox tetramer binding to each motif. Cox binding interferes with the interaction of HP1 integrase with one of its binding sites, IBS5. This competition is central to directional control. Both Cox binding sites are needed for full inhibition of integration and for activating excision, because it plays a positive role in assembling the nucleoprotein complexes that produce excisive recombination, by inducing the formation of a critical conformation in those complexes [2]. 23.70 23.70 23.80 28.90 23.50 23.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.87 0.72 -3.96 6 218 2009-01-15 18:05:59 2008-06-24 16:14:00 4 1 186 0 11 94 0 81.20 49 85.59 CHANGED M.......ppQlophsoDhlsh-tFAphIGKTspAVppMlcAGKLPslcMpDPppssG+.GEhWlahsEWsphs+phh-otPsE....WhhWhsh ...............lhth.hDAl.hpcFAchlGKs.sAVppMlctuKLPlI-h.p.DPppssuRAGEhWValPtaNcul+pAa.sRPhE.RDuWLhWhGL....... 0 1 5 8 +10576 PF10744 Med1 Med1-Trap220; Mediator of RNA polymerase II transcription subunit 1 Wood V, Coggill P anon Pfam-B_51442 (release 22.0) Domain Mediator complexes are basic necessities for linking transcriptional regulators to RNA polymerase II. This domain, Med1, is conserved from plants to fungi to humans and forms part of the Med9 submodule of the Srb/Med complex. it is one of three subunits essential for viability of the whole organism via its role in environmentally-directed cell-fate decisions [1]. Med1 is part of the tail region of the Mediator complex [3]. 21.10 21.10 21.30 21.50 20.40 21.00 hmmbuild -o /dev/null HMM SEED 393 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.24 0.70 -5.70 30 276 2009-01-15 18:05:59 2008-06-24 16:21:09 4 5 202 0 189 267 0 357.00 20 39.21 CHANGED l................s-hlphlhthh.tth..............sl-slp+hsphhuh-uhs-cl.....................clshtpshlsl-lDhspt.pshlhsstLhl........................................th.hss.psphsphphuh.tp...ps.stLhphhptpshscFppsLphLspl.phss...........................................p.ptchshFtAlcsLtpsLp........th.ph..pptsss.phpphhpsshGhhthc..pss....chthplpYahpst.lhctpp...........................................h..shphhtssthtshhNpssphpsssssthh.hhs..hs.........P.hhh.sppsssphssshshhlspshs...................................................................................pt.h..h.....st.ppapYths........sshps.tsph.lpclshscspplstllsl.............................................LRp.sh..hssLlposhpppt ..................................................................................................................phhphlp.hh.t................sh-sl.p+hhp.h.uht..shh-ch....................................p..htt.ph....ls.........clsh..stt....p....shlpsshhhh..........................................th..s.s.tsphs...sh.phth...tc..............ps...........t.h....spth...p.t...........pshspFptpLctLspl.pLst.................................................................................p.chpha.AlpultpsLp........................ph..h....ph..........................p.........t.s.s.........t.........t............hp.........p......h.h.pup.h.Ghht.+....pss....+l.hslpYahs.tplhptpp................................................................ht...h......ts.......sht...hstssp....tss.st....hh...hh.......s.hhts..............Psh......spsspsphs..s........sh.hphspshs...sht.lttht.sh.hsh................................................................h....tpt............tpp.t.h..h..sh...........s..t.....ppHsYhh................shpsh.Gph...lpclsFpHPtplshllsl.............................................LRp.sh..hssLltsshp...s............................................................................................................................................................................................... 0 46 83 151 +10577 PF10745 DUF2530 Protein of unknown function (DUF2530) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to mycobacteria. 22.40 22.40 22.40 23.00 22.30 22.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.55 0.72 -4.00 10 133 2009-09-11 11:12:09 2008-06-25 13:38:17 4 1 133 0 42 94 37 75.50 49 87.02 CHANGED sspPPPLPssLL-PhPVIsVGsLuWLV.AsVsAFsVsu.LcsWRPlTlAGLuVGlLGToIFLWQR+AARRGuRGAQsGL ...............h..psPsLPssL.l-shPVIsVGsluWLV.As..V.sAal.Vsu...l....s.....s.....WpPV..olAGLusGl.LGTsIFlhQhuAARRGuRGAQtGL......................... 0 12 31 38 +10578 PF10746 Phage_holin_6 Phage holin family 6 Pollington J, Finn RD anon PRODOM Family Holins are a diverse family of proteins that cause bacterial membrane lysis during late-protein synthesis. 20.10 20.10 20.90 20.60 20.00 19.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.21 0.72 -4.25 4 52 2009-01-15 18:05:59 2008-06-25 13:44:29 4 1 39 0 1 40 0 62.70 52 94.44 CHANGED LpLDFsNEVlKAAPIVGTusADsAuRlFFGLoLNEWFYsAAIAYTVVQIGshVhcslhc.K+tsKs .....................LsLDFNNEll.KAAPlVGsusADsuARLFFGLSLNEWFYVAAIAYTVlQIGAhVhcplhcaK+ts+.......... 1 1 1 1 +10579 PF10747 DUF2522 Protein of unknown function (DUF2522) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus. 25.00 25.00 43.10 43.10 24.00 23.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.84 0.71 -4.15 12 130 2009-01-15 18:05:59 2008-06-25 13:45:38 4 1 129 0 20 70 0 140.40 55 97.14 CHANGED RpY.lYLIc-EhApaYFGREphhFcLFp-hchss..tppcplhtKQlpYITcslPhh+hcptLpptL.s+.hphpphpshapl....stcuputlhlt-+hIpltspGshsAEoshFElLRKlsssFLAhDhpsp+aGWLs.Pl+tcp ......+TYELYLIpEDlA+uYFGREhLFF-LFscao-Su.ohSEKcVLhKQhhYIThPLpVh+IHH+LEQsLcsh.u+YcRs+aTHsL...aoGApauEIMVKs+YIchsoSGNlShETTFFEVLRKsEhTFLAMDYENsKYGWLN.PLKQs.+... 0 4 12 14 +10580 PF10748 DUF2531 Protein of unknown function (DUF2531) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 22.00 22.00 22.00 22.80 21.60 21.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.95 6 489 2009-01-15 18:05:59 2008-06-25 13:47:50 4 1 480 0 33 166 0 125.50 64 94.58 CHANGED pspRWlLLshsLshLTGMRDPF+PPEDRCRIucLspWRYQGhVucG-plsGlLKDuQpKWRRVcpsphL.sGWplspLTAsplsLpTGcGC-PspWRWpRpGspsE.AMDSpsssssssRtttu+usppDAsGG ................+spRhl.L.hsluLs.LLTGMRDPF+P.P.E.D.h.C..R..I..u..E.L..S..QWRYQGhVG+GERhIGlIKDGQ+KWRR.VppsDlL.E.N.G.WTIlQLTs-sLTLsT...GsN...C..EPPQWhWQRQ.Gc.s.NE.AMD...S...+sTssuD.sRRTGGKuucSDAsG............................... 0 2 7 20 +10581 PF10749 DUF2534 Protein of unknown function (DUF2534) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 21.40 21.40 22.20 53.20 20.40 21.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.59 0.72 -4.06 4 453 2009-01-15 18:05:59 2008-06-25 13:49:56 4 2 450 0 21 70 0 83.00 86 88.81 CHANGED MIhhtKLpotcGKKFLluLhlVFhlAhoVVuRAThsGVlEQYNhPhScWToSMFhlQuAMlhVYSlVFThLlAIPLGhaFLGuc- .........MIMAKLKSAKGKKFLFGLLAVFIIAASVVTRATIGGVIEQYNIPLSEWTTSMYVIQSSMIFVYSLVFTVLLAIPLGIYFLGGEE.. 0 1 1 11 +10582 PF10750 DUF2536 Protein of unknown function (DUF2536) Pollington J, Finn RD, Eberhardt R anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. Structural modelling suggests this domain may bind nucleic acids [1]. 20.50 20.50 23.30 22.50 20.20 17.30 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.14 0.72 -4.39 8 132 2009-01-15 18:05:59 2008-06-25 13:52:36 4 1 132 0 21 57 3 67.80 71 93.71 CHANGED MNFpLDLIcDKVEFFEApcLpsLEKKIN-QIEpNKAILLpVHsVSHQspVs.csGRhhYSAVVHFKAKp ....MsFoF-MLEDKVEFFEAuDLsSLE+KIuEQID..NNKALMLEVHHISHQMlhDPESKRPYYSAVVHFKLKK.... 0 5 13 15 +10583 PF10751 DUF2535 Protein of unknown function (DUF2535) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 82.70 82.60 21.70 18.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.97 0.72 -3.84 9 124 2009-01-15 18:05:59 2008-06-25 13:53:25 4 1 123 0 16 49 0 82.80 65 95.60 CHANGED MLhKSLEFKpssGppVKloEIPVLcpDpPahFhlphRLphaltclapupsp+sVYSFR-YLKRplKWsDYpplappstLKHNA ..ITKSFYFTHSTGpCIKIFEIPVLQuQHPLuFLIQSRLQLFIAKIQKpK+PRFSYSFREYLQsCLKWNDY.NVYKTNTLEKNA 0 2 8 10 +10584 PF10752 DUF2533 Protein of unknown function (DUF2533) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 48.10 48.00 22.60 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.80 0.72 -3.68 10 130 2009-01-15 18:05:59 2008-06-25 14:56:50 4 1 130 0 17 59 0 83.60 69 94.30 CHANGED hpVH+AITAHS+KQ+pplKpFlpLDtcREtAIE-AVscC+pGcsFoTDsINcITccMN-LAKpGIVPpRKlVTsEMV+EYVuRh .MEVHKAITAHSRKQNEuVKAhLQLDAQREAAIEAAVSLASNGKcFSVDlINsVTKQINsLAKN.G.Vo.QRKYVTcEMVMEYVSRL. 0 3 9 11 +10585 PF10753 DUF2566 Protein of unknown function (DUF2566) Mistry J, Coggill P anon PRODOM_PD060138 Family This family is conserved in Enterobacteriaceae. The function is not known. 22.10 22.10 23.00 23.00 22.00 20.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.91 0.72 -4.05 7 798 2009-09-11 10:22:14 2008-06-25 16:39:19 4 1 477 0 30 144 2 53.90 69 84.61 CHANGED hapthLlhYhlGhslohhlhahlS+D.phpIRhluAhllGlTWPhShPVsLLhSLF ......Lap+hLlFYslhssIuFlIhWFlSH-.Kp+IRhLSAFLVGhTWPMShPVALLFSLF............ 0 2 7 17 +10586 PF10754 DUF2569 Protein of unknown function (DUF2569) Mistry J, Coggill P anon PRODOM_PD088965 Family This family is conserved in bacteria. The function is not known, but several members are annotated as being YdgK or a homologue thereof. 25.00 25.00 25.10 25.70 24.90 24.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.84 0.71 -3.99 15 609 2009-01-15 18:05:59 2008-06-30 10:56:40 4 5 578 0 63 219 7 142.60 64 86.26 CHANGED ssshp+IGGWLlh.PhshLllo.lss.lsllhhh.ulhsstsaphLsupssshh.....hhhhhullhslsMa..........haTLalshLFF+R+pthP+haIlhlLloll.lslcuaAh........oPl.DshAl+plhhsLLuAslalPYh++ScRVKpTFlc ............T.TsPQRIGGWLLG....PLAWL.LV.AL......LSso......LA.L....L.LY...ssALuoP.QTFpTLuuQuhssp..........lLWGlSFITAI.....AMW..........YYTLWLTIAF..FKRR.RCVPK.HYI..I.WLLlSVL....L..AVKAFAF............SPVpD...ul.A..VRQLLF.....sLL..ATALIVPYFKRSuRVKsTFVN... 0 10 20 42 +10587 PF10756 bPH_6 DUF2581; Bacterial PH domain Mistry J, Coggill P, Bateman A, Ginalski K anon PRODOM_PD034109 Domain This domain has a bacterial type PH domain structure. This domain was previously known as DUF2581. This family is conserved in the Actinomycetales. Although several members are annotated as RbiX homologues, RbiX being a putative regulator of riboflavin biosynthesis, the function could not be confirmed. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.28 0.72 -4.09 95 695 2012-10-04 00:02:25 2008-06-30 11:00:18 4 2 302 0 232 583 18 75.20 24 44.92 CHANGED RsRltlsssG..lslRslh.us+hlsWs-Itslp...hspusph...........sclchsD....sphlsl.ulphs.st.ptshsuhsplt.s ..................RsRlts.sscG....lslRshh.ss+hlsWsplt.ulp..........hspu.sph........................................splchs-..................sph.lsl.u.lphs.sh.thshsuhtth...s........................................................................................ 0 71 179 218 +10588 PF10755 DUF2585 Protein of unknown function (DUF2585) Mistry J, Coggill P anon PRODOM_PD589556 Family This family is conserved in Proteobacteria. The function is not known. 25.00 25.00 38.50 38.10 17.70 16.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.22 0.71 -4.80 20 70 2009-01-15 18:05:59 2008-06-30 11:03:00 4 1 69 0 27 80 10 161.90 57 83.99 CHANGED lLhuMGRsPICsCGhVKLWcGsltSucNSQHluDWYT.SHlIHGFLFYuhsaLlhtRhs........huhR..LhlAlhlEuAWEIlENSshII-RYRsuTIuLDYhGDSllNSluDhlhMllGFlhAuRLPValTVslAIuFElhsuhlIRDNLTLNVLMLlaPlEAI+pWQuG ...............LahMGRlPICpCGh.VKLWcGsVsSStNSQHluDWYT.SHlIHGFLFYuLsaLlht+tP..........huhRLhlAhlIEuuWEllENSPhII-RYRsuTIuLDYaGDSIlNSshDTlhMhlGFlhA.tRLPVhlTVslAIsFElhsuhhIRDNLTLNVlMLlaPl-AIKpWQuG................. 0 6 14 18 +10589 PF10757 YbaJ Biofilm formation regulator YbaJ Pollington J, Finn RD anon PRODOM Family YbaJ regulates biofilm formation. It also has an important role in the regulation of motility in the biofilm. YbaJ functions in increasing conjugation, aggregation and decreasing the motility, resulting in an increase of biofilm [1] 25.00 25.00 35.80 35.80 21.20 18.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.63 0.71 -3.98 4 531 2009-01-15 18:05:59 2008-06-30 11:44:24 4 1 529 0 38 112 3 120.80 84 98.44 CHANGED MDEYSPKRaDIAQLKFLCEsLYDEuIAoLG-SpHGWVNDPTSAlNLQLNELIEHIAoFllsFKIKYss-u-LoEQlEcYLDDTasLFSSYGINs.-LQRWpKS+pRLFthFSspplCT.hpT ..MDEYSPKRHDIAQLKFLCETLYHDCLANLEESNHGWVNDPTSAINLQLNELIEHIATFALNYKIKYNEDNKLIEQIDEYLDDTFMLFSSYGINhQDLQKWRKSGNRLFRCFVNAT+pNPsS.L....... 0 1 8 23 +10590 PF10758 DUF2586 Protein of unknown function (DUF2586) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.20 20.20 20.30 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 363 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.04 0.70 -5.40 17 164 2009-09-13 14:48:51 2008-06-30 11:53:10 4 2 148 0 26 149 4 324.10 34 83.60 CHANGED uhGpVplNsLN.hQGshsplERphLFIGhus.p...N.hGpllslssQSDlDslLGtuDSsLKppltAAphNuGpNWsAhshslss-sp...a.sAlctA.ppshSsEuVV....lsssssspAplsshpstts-LhspaGRhVahlsAssGl......spspoWusYhsthssLppGlAuppVtlVPpLa..GsplGllAGRLCscuVoIADSPMRVtTGullG.LG....shPhDpsGh.LshAsLpuL-ssRaSVPtaYPDY-GhYWuDGphLDs.GGDYQsIEsLRVVDKAARpVRIlAIt+IuDRsLNSTPsSIAsscsaFu+PLR-MS+SsphsGppFPGEIcsPpDsDIsIsWho+ppVpIalhVRPYssPKpITssIhLD .............................................................................................................................hspVplsthN.h.G.htplEphhLFlG.us..p...s....sphh.lsspoDhsthl...u.tss.L+t.lhAA..Nusps...W.uhs.h.lstssp............a.pAlctA.ptshShEhVl....lstss.sspsplsthtshttcLhsphGRhlahlhsstu.........tpspsWs-YhsthsslppulAsptVtlVPp...La...........G..sp..........Gl.l.AGRLs..sp....u....Vo...........lADSPsRVtTGull..s.ls......phPhDtsG..t.t.ls...hA...sLpuL-ssRaSVPhWYsDY-GhYWuDGpTLDspGGDYQsIEslRVlcKAuR+VRlhAIs....+I.u.D.R.s....LNST.....s..s...S.lssppphFs+sL..RcMup.usplsu....FP..G..Elh....sPpDs......DlsIhW.spptVplhhhlcsapsPhpIslsl.LD............................................................................................ 0 6 15 19 +10591 PF10759 DUF2587 Protein of unknown function (DUF2587) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with no known function. 25.00 25.00 99.70 99.30 17.50 17.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.20 0.71 -4.87 15 199 2009-01-15 18:05:59 2008-06-30 11:59:36 4 1 198 0 76 162 39 164.30 68 89.78 CHANGED GPDG.....................sspusp-.sscstspslsDLVEQPAKVMRIGoMIKQLLEEVRAAPLDEASRsRL+EIHcoSI+ELEDGLAPELhEELERLoLPFo--ssPSDAELRIAQAQLVGWLEGLFHGIQTALaAQQMAARAQLEQMR..ptALPsGhs.s..........pGpsptsttuts.G..s...........GQYL ............................sssthhs..............sp..-s-sccpulTDhVEQPAKVMRIGTMIKQLLEEVRAAPLDEASRsRLREIHtoSI+ELEDGLAPELREEL-RLoLPFsED..us...PSDAELRIAQAQLVGWLEGLFHGIQTALFAQQMAARAQLEQMR.....pt...ALPP.Gsutsst........tG.......................................h....................... 0 23 58 72 +10593 PF10761 DUF2590 Protein of unknown function (DUF2590) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 20.10 20.10 22.30 31.00 17.40 18.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.27 0.72 -4.29 18 127 2009-01-15 18:05:59 2008-06-30 13:00:03 4 1 114 0 21 86 3 99.70 49 93.49 CHANGED lDLhIhssDlsLDuuspPhllssRsoIAQDIKHtIhESGLshpLIuERushhpsDhhpplELlVE-DtRLVPGTlplsEsssGplhlTApTh-aG....lslclsh .lDLLIpssDhsLssGspPhhCssRtSIuQDIhHuIlESGLsscLlAERSPshRuDlhpphcLLlEsDcRllPGTlplsEp....s...splalTApTaDFG..slshpl.h........ 0 5 12 14 +10594 PF10762 DUF2583 Protein of unknown function (DUF2583) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as YchH however currently no function is known. 25.00 25.00 51.90 51.80 23.20 19.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.83 0.72 -3.85 5 543 2009-01-15 18:05:59 2008-06-30 13:04:58 4 1 530 0 40 129 1 86.10 83 96.79 CHANGED MKRKNAutLGNVLMGLGLVlMVsGVGYSILuclsQFNlPQFFAHGAllSIFVGALLWLVGARIGGREpVADRYWWVKHFDKRCRRssHR ....MKRKNASLLGNVLMGLGLVVMVVGVGYSILNQLPQFNhPQa.F..AHGAlLSIFVGAlLWLAGARVGGHEQVsDRYWWVR.HY.DKRCRRsDNR............................ 0 1 8 23 +10595 PF10763 DUF2584 Protein of unknown function (DUF2584) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins have no known function. 25.00 25.00 25.30 25.90 20.80 18.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.62 0.72 -4.15 9 175 2009-01-15 18:05:59 2008-06-30 13:07:09 4 2 175 8 18 82 0 78.70 58 95.31 CHANGED MGMPlEhNTMIVTKG+EpR..lEENlFpLEKEGYRlYPl-lPl-VRKTKcGEpoGoAhlpKLEWcsu+TpITYcLlSLpSTN .............................M.hEhpThIl...op...tKEhR...............I..-.-.NlFpLch-GY+laslp..lslhKopE.EplGoAhlpKLEWENGKTpIsYpLlSLpSsN....... 0 2 10 12 +10596 PF10764 Gin Inhibitor of sigma-G Gin Pollington J, Finn RD anon PRODOM Family Gin allows sigma-F to delay late forespore transcription by preventing sigma-G to take over before the cell has reached a critical stage of development. Gin is also known as CsfB [1]. 21.00 21.00 21.10 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.66 0.72 -4.25 21 219 2009-01-15 18:05:59 2008-06-30 13:09:27 4 2 219 0 51 130 0 46.10 44 71.70 CHANGED hClICcpp+pc...GIhlhs+FICp-CE+cllsspss-spYpaYh++LK .hCIlCcpc+p-....GIhlhsphICh-CE+chVsT-ssDscY.aYl+pL+....... 0 21 37 42 +10597 PF10765 DUF2591 Protein of unknown function (DUF2591) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.30 21.30 21.80 21.70 20.80 20.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.60 0.72 -3.84 16 234 2009-01-15 18:05:59 2008-06-30 13:18:17 4 1 198 0 36 151 1 106.50 32 91.15 CHANGED McaScLSDhElNhhVAcApshp..h...........................s.h.hh..ps.sthttas.CssPuDAhPIIpcs+Iul..........tsssscWsAp...................ssssh.hh.spsPLRAAMIVFLh.p ....................................caupLSDhElNphVuth.htps..thh.........................t...s...h...hst...h..s.t.h....tshc.CssPuDuWPIIp+.t.+Iul................h.........sp.tspWsAp.............ssps.sh.h.h.sp...sPLRAAMIsFLh.Q........................... 0 0 9 22 +10598 PF10766 DUF2592 Protein of unknown function (DUF2592) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 25.20 25.30 18.90 24.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.88 0.72 -4.61 5 530 2009-01-15 18:05:59 2008-06-30 13:40:05 4 1 516 0 39 104 2 40.20 89 79.27 CHANGED LLKSLlFAVVMVPVVMAlILGLIYGLGEVFNIFS+lG+oK- .LLKSLVFAVIMVPVVMAIILGLIYGLGEVFNIFSGlG+KDQ...... 0 2 8 23 +10599 PF10767 DUF2593 Protein of unknown function (DUF2593) Pollington J, Finn RD anon PRODOM Family This family of proteins appear to be restricted to Enterobacteriaceae. Some members in the family are annotated as YbjO however currently there is no known function. 23.50 23.50 149.90 149.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.87 0.71 -4.50 6 522 2009-01-15 18:05:59 2008-06-30 13:44:05 4 1 520 0 34 121 2 143.70 77 88.68 CHANGED hssPs.V.VAulAIIuhRsLslLhLhspLGlcGlu-FIpcSlQsWshTLlFhuSLlLlhlEIhCuFulh+GRNWuRWsaLlsQllssuYLhhASLGahhPElFoIsGES+t-IhHSLlLQKlPDlLlLsLLFlPupSRRFFtlQ .L.NVPALVQVAAlAIIhIRGLDVLMIhNTLGVRGlGEFIHRSVQTWSLTLVFLuSLVLVFIEIWCAFSLVKGRpWARWlYLLTQIsAuuYLWAASLGYGY..PELFSIsGESKREIFHSLhLQKLPDhLILhLLFVPuoSRRFFpLQ..................... 0 1 6 19 +10600 PF10768 FliX Class II flagellar assembly regulator Mistry J, Coggill P anon PRODOM_PD868899 Family The FliX protein is possibly a transient component of the flagellum that is required for the assembly process. FliX may contribute to the targeting or assembly of the P- and L-ring protein monomers at the cell pole. The family carries a potential N-terminal signal sequence and at least one transmembrane domain indicating that it might function either in or in association with the cell membrane [1]. 25.00 25.00 40.10 39.90 21.20 19.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.88 0.71 -3.95 19 70 2009-01-15 18:05:59 2008-06-30 13:46:07 4 1 70 0 38 74 23 137.40 39 97.41 CHANGED M.+l.Gssusosusuutsp..ppuGu.uuFulss...ssssspst..usuustuhuulD..ALLALQuh...-D...shER.R+RuVpRGcshLDsLD-LKhuLLsGplspusls+Ltssstph+suouDPtL-ulLsEIELRscVELAKhup...s .................M.+l.Gssusssssuspss...ctsuu..uuFuh..ss...ssssspsp...usuustusuulD..ALLALQul.....-D....ssER..R+RuVpRGcshLDsLD-LKhuLLuGplssspLtcLtssltph+ssouDPtL-ulLsEIELRscVELAKhstt.. 0 15 24 28 +10601 PF10769 DUF2594 Protein of unknown function (DUF2594) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Enterobacteriaceae. 23.70 23.70 72.10 72.00 23.50 23.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.37 0.72 -3.90 6 530 2009-01-15 18:05:59 2008-06-30 13:52:11 4 1 527 0 36 97 0 73.80 83 99.99 CHANGED MSssDFoTuussppLAsEVoCLKAhLTLlLKAIGQADAG+VIlNMER.IAplEDspQAtVFsNTlpQIKpuYRQ ...MSTPDFSTAENNQELANEVoCLKAMLTLMLQAMGQADAGRVhLKMEKQlAhIEDEsQAAVFSpTVKQIKQAYRQ.. 0 1 8 21 +10603 PF10771 DUF2582 Protein of unknown function (DUF2582) Mistry J, Coggill P anon PRODOM_PD055730 Family This family is conserved in bacteria and archaea. The function is not known. 24.20 24.20 24.20 24.20 23.90 23.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.30 0.72 -4.12 23 279 2009-01-15 18:05:59 2008-06-30 14:03:51 4 3 136 4 47 185 10 65.40 34 76.42 CHANGED Mccph............IGtsAGclWphLs-..tschShspLp...+csuL.scc-lthAlGWLAREsKIthc.....cpstplhlpL ....................M.+pp........lGhsAGcVWphLsp..pt.p.hohppLp...+tspL..scc-lhhAlGWLuREsKI.php.....phpt.hhl............. 0 13 36 44 +10604 PF10772 DUF2597 Protein of unknown function (DUF2597) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 113.10 113.00 24.00 23.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.51 0.71 -4.25 29 118 2009-01-15 18:05:59 2008-06-30 14:05:21 4 1 113 0 20 81 2 133.10 52 88.93 CHANGED psFDhslhshhVHVcssoloITDsSssspTRGVPDGaVDGDVuA-GElELDspsFphlspAA+sAGSaRulcshDhhFYApsGp.-EhKVEsFGsKlhloDlLsIDPcGGscss+KlKa.VTSPDFV+INGVPYL ..................hsFDshlsu.hlHlEphoLsITDNSAsAQTRGVPDGaVDGDVuA-GElElsopsFphLsstA+sAGSWRul.PhDhlFYApsGs.-EhKVEsFGsKlplSslLsIDP.cGGutsT+KlKahVTSPDFV+INGVPYL...... 0 3 11 13 +10606 PF10774 DUF4226 BssS; Domain of unknown function (DUF4226) Pollington J, Finn RD, Bateman A anon PRODOM Domain This family of mycobacterial proteins are uncharacterised. 22.00 22.00 22.20 22.40 21.30 21.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.40 0.71 -4.03 9 168 2009-01-15 18:05:59 2008-06-30 14:13:42 4 4 64 0 19 82 0 109.40 45 37.19 CHANGED cpsGsuA-AlpstcsALAcQpussu-sDtplssAlhsAHssss-Gtc+LstltpEI-sAVsppus.uLDTPAGAR-Fp+FLluKh+-ItpVVssAshpspuKuslhsuLsuhY ..........ptGsustslpsccuuLAppp..ussAEsDcplsssLtsAHsuhs-GtRRLcAIuAEI-sA...Vscpss.huLDTPAGAR-Fp+FLhuKt+-IppVVAsAst-upu+uAllcuLsupY........................ 0 3 8 16 +10607 PF10775 ATP_sub_h ATP synthase complex subunit h Pollington J, Finn RD anon PRODOM Family Subunit h is a component of the yeast mitochondrial F1-F0 ATP synthase. It is essential for the correct assembly and functioning of this enzyme. Subunit h occupies a central place in the peripheral stalk between the F1 sector and the membrane [1]. 25.00 25.00 39.30 38.50 23.90 17.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -9.12 0.72 -4.25 18 131 2009-01-15 18:05:59 2008-06-30 14:18:51 4 3 129 0 99 122 0 69.20 44 51.92 CHANGED RsFSo..sstpt-llQDLYL+ELKuaKssPlsssDucGpV+pashPssPssPp....s-l..uu-.LcuYc.spsVE ..........RsFhs..ssppsDhVQDLYL+ELKAYKssP..h.....pt..sDA-G.....pVppFshPpsPpuPE....s-L...As-.LKuYEspsVE............... 0 27 55 84 +10608 PF10776 DUF2600 Protein of unknown function (DUF2600) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins. Some members in the family are annotated as YtpB however currently no function is known. 25.00 25.00 148.20 112.40 19.60 19.10 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.03 0.70 -5.60 29 211 2009-09-11 11:05:58 2008-06-30 14:30:12 4 2 203 0 50 164 0 326.10 55 92.33 CHANGED PlVH+ELuhW+c+Ah+IPscEL+pQAlASIccKTFHCEGGuIhALLAs-+p--sIRFIVAYQTISDYLDNLCDRSTSLDPpDFtsLHpSMhtALoscsE..susYYRaR--Q-DGGYLc-LVcTCQcVLpchcpY-tItPhLpELAsYYCDLQlHKHVc.-.ERcPRLpsWF-ta+csLPt.MoWaEFSACuGSTLGIFCLVAYAacs-Lp-c-stKIRpuYFPYlQGLHILLDYFIDQEEDRhGGDLNFCsYYpscpthh-RhpHFlEcA-cplusLPHscFHRLIsRGLLGlYLSDpKVsuQ+ph++hA++llKhGGhsSpFFYhNG+hYRK .........PlVH+ELshW+c+AhpIPssEL+sQAlASIccKTFHCEGGuILA.L.L.us.-c.+ccsI+FIVAYQTISDYLDNLCDRSTSLDPsDFttLHpSMh.ALoPcs-.....suNYY+aRc-QDDGGYLc-LVcTCQcVLpchp+Y-tItPhLcELAsYYCDLQlHKHVchE.EREPRLpsWF-tH+c.slP.-...MsWaEFSAC.uGSTLGIFC.LVAYAa.c.s-Lp-EchtKIRpuYFPYlQGLHILLDYFIDQEEDRhGGDLNFCoYYcscpthl-RhcHFlEcA-cslscLPHucFHRLIsRGLLGlYLSDpKVus.Q+.ph++hAR+llK.h.G.GhsShFFYhNG+hYRK.................. 0 23 39 42 +10609 PF10777 YlaC Inner membrane protein YlaC Pollington J, Finn RD, Eberhardt R anon PRODOM Domain Members of this family include proteins annotated as inner membrane protein YlaC in E. coli and Salmonella. The function of this family is unknown. 20.30 20.30 21.20 21.00 19.60 19.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.03 0.71 -4.67 8 536 2009-01-15 18:05:59 2008-06-30 16:19:27 4 2 531 0 39 134 3 153.20 75 97.48 CHANGED MsEIpRLLscsI-RlNpcEKRDN+PRFShsFIRcHPhLFlAMasualAsLsVMhhophh.hsSlhhhsVLFllhsAhhhhDlpPpYRaEDIDVLDLRVCYNGEWYsTRtVssphI-cILssPpVssshKsplcKhlssKGElsFYDVaoLAhspps ..............................MTEIQRLLoETI-sLNsREKR.DNKPRFSISFIRKHPGLF.IuMYVAaFATLAVMLQSETL.oGSVWLLVVLFILhNGFFFFDVhP...RYRYEDIDVLDFRVCYNGEWYNTRFVPssLlEsILNSPcVuDsHKpQLQKMIsR.KGELSFYDIFTLARAEuo...... 1 1 7 23 +10610 PF10778 DehI Halocarboxylic acid dehydrogenase DehI Pollington J, Finn RD anon PRODOM Family Haloacid dehalogenases catalyse the removal of halides from organic haloacids. DehI can process both L- and D-substrates. A crucial aspartate residue is predicted to activate a water molecule for nucleophilic attack of the substrate chiral centre resulting in an inversion of the configuration of either L- or D-substrates in contrast to D-only enzymes [1]. 21.50 21.50 21.50 23.20 21.30 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.63 0.71 -4.71 12 73 2009-01-15 18:05:59 2008-06-30 16:26:33 4 4 42 4 8 69 11 122.20 37 74.63 CHANGED WVuhsh+shApaspFlstuWpthcsthtTRhhEcuA-plRtto...lhs..hshss.sttLhutGas.t-lccl+ssl-hFsYGNPKYLllhoAhpE.uhptRshuGts...hsut.ssphPhGhsp.h...hpLl-.ccAspcspplLpDItcshhphtsuSDa ..aVuFshRs.hupaPsFlstAWtsh+PslpTRaAEcuADtlRhpS...lssu...sshss..Ts.+.LhthGas-p-Iccl+ssLDlhNaGNPKhLl...................................................................................................................................... 0 4 7 8 +10611 PF10779 XhlA Haemolysin XhlA Pollington J, Finn RD anon PRODOM Family XhlA is a cell-surface associated haemolysin that lyses the two most prevalent types of insect immune cells (granulocytes and plasmatocytes) as well as rabbit and horse erythrocytes [1]. This family has had DUF1267, Pfam:PF06895, merged into it. 34.70 34.70 35.30 35.10 34.60 34.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.44 0.72 -4.06 47 409 2009-01-15 18:05:59 2008-06-30 16:30:38 4 1 232 0 46 250 1 70.50 30 88.24 CHANGED hpclppclsplcsc.cp.......htpthpshEhtsthp-ps..........ltslscpLpcIppNppWlhR...hllGullsu....llshlhK ...........p-lhpclsplcschcp.......htpclcsLEpp...sstp-pc..........ItslscpLc+IssNspWllR....llluullsu....llshlhK.............. 0 21 36 40 +10612 PF10780 MRP_L53 39S ribosomal protein L53/MRP-L53 Pollington J, Finn RD anon PRODOM Family MRP-L53 is also known as Mrp144. It is part of the 39S ribosome [1]. 24.80 24.80 25.00 25.00 24.00 24.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.43 0.72 -3.99 20 175 2009-01-15 18:05:59 2008-06-30 16:32:23 4 2 165 0 124 165 0 51.80 33 44.15 CHANGED FNPFutsu+sARLFLuhl.....ssos+stuhplpsclLscso..sppPplcVtFKDG ........FsPFstpu.+ssRhFLshl.......sst+sptoslsspllscss...pptPplpVtFtDG.... 0 23 57 95 +10613 PF10781 DSRB Dextransucrase DSRB Pollington J, Finn RD anon PRODOM Family DSRB is a novel dextransucrase which produces a dextran different from the typical dextran, as it contains (1-6) and (1-2) linkages, when this strain is grown in the presence of sucrose [1]. 25.00 25.00 26.50 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -8.96 0.72 -4.48 5 506 2009-01-15 18:05:59 2008-06-30 16:43:54 4 2 504 0 33 111 1 61.60 90 97.22 CHANGED MKVNDRVTVKTDGGPRREGVILAVEEFSEGVMYLVSL-DYPAGIWFFNElDSpDGTFVEhts .....MKVNDRVTVKTDGGPRRPGVVLAVEEFSEGTMYLVSLEDYPLGIWFFNEuGHQDGIFVEKA.E.... 0 1 8 20 +10614 PF10782 DUF2602 Protein of unknown function (DUF2602) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 26.20 26.20 23.60 22.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.18 0.72 -4.13 14 323 2009-01-15 18:05:59 2008-06-30 17:10:47 4 1 304 0 25 89 0 57.20 57 82.95 CHANGED hs++phhtcls-L.spYCpuChlKcHhRKp.GKsaAHpFCIppCTlGcclKphGppLp ............scpphlscIsDLhsTYCppC.lKp+hRKhpGKTtAHpFCIscColGKpIKQlGspLp..... 0 4 10 17 +10615 PF10783 DUF2599 Protein of unknown function (DUF2599) Mistry J, Coggill P anon PRODOM_PD414589 Family This family is conserved in Actinobacteria. The function is not known. 25.00 25.00 25.70 28.70 23.70 24.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.40 0.72 -3.34 12 152 2009-01-15 18:05:59 2008-07-01 12:49:54 4 3 148 0 39 108 0 94.80 43 49.17 CHANGED lD+spWspass......h.oLpVhPTpsGRpsstp.s.....tstAWsEllsLuP-............AsssGMRtQFlCHap..........aAchhtPuKsSWNLEPWRPsVssp-hlApGCNPG ...................l-pspWsphss......h.SLpVhPops.G.R..ss.sp.p.s.......h-sAWsEVluhsP...c............AsosGMRsQFlCHap..........aAc..........ssKsSWNLEPa.RPsVsss-hlAsuCNPG................ 0 6 22 31 +10616 PF10784 Plasmid_stab_B Plasmid stability protein Mistry J, Coggill P anon PRODOM_PD189613 Family This family is conserved in the Enterobacteriales. It is a putative plasmid stability protein in that it is expressed from the operon involved in stability, but its actual function has not yet been characterised. 25.00 25.00 25.90 25.30 23.70 23.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.15 0.72 -4.36 11 396 2009-09-11 13:35:50 2008-07-01 12:52:36 4 2 221 2 9 189 0 68.40 37 59.10 CHANGED R+hohYLpP-t.puDphApshl-olspptRGchhRsAhlsGhALaplDPRLPhLlushhscphossplspll .........R+hohYL+Pst.ps-t.Asthl-ols.ptRuch.RsAhluGhALhphDPR...hshlLushhs--hsssslsph.................. 0 0 2 4 +10617 PF10785 NADH-u_ox-rdase NADH-ubiquinone oxidoreductase complex I, 21 kDa subunit Mistry J, Coggill P anon PRODOM_PD104546 Domain This family is the N-terminal domain of NADH-ubiquinone oxidoreductase 21 kDa subunits from fungi, lower metazoa and plants. 25.00 25.00 30.40 29.20 23.40 21.00 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.82 0.72 -3.74 35 183 2009-01-15 18:05:59 2008-07-01 13:39:47 4 2 163 0 125 165 0 84.40 35 51.07 CHANGED hpocYPlI.....DsDP...ahpRVlsYhRsSDYshhuussuuhPshhahhEphsPstst.h......ssshRhushlGhhGGFhhsYpRSstRFhGapE .......................hpscYPlI.....DscP............pap+Vlu.hRsuDYshhushuususshhahhtphssspht................usuMphuuhlGhhGGFhhsYQpSshRhhGapE............... 0 43 80 110 +10618 PF10786 G6PD_bact Glucose-6-phosphate 1-dehydrogenase (EC 1.1.1.49) Mistry J, Coggill P anon PRODOM_PD110134 Family This family is conserved in Firmicutes and Proteobacteria. Several members are annotated as being glucose-6-phosphate 1-dehydrogenase (EC:1.1.1.49) but this could not be confirmed. 25.00 25.00 25.70 25.40 24.20 23.80 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.66 0.70 -4.91 9 269 2009-01-15 18:05:59 2008-07-01 13:44:42 4 1 265 0 18 153 0 202.90 45 98.10 CHANGED hLospsh-lFshPhFpFtQlK.KasPE-IspIKA-YKtpWQhWKplp.pVupQLss..sFAcPHIEpWsNGWplRuHFaAhY+hph.pspuAhluVlLN++pLQVhL.appY+uD+pQholppYNphLspl....Dphchuca.lWctsEpEasDahslsph.....pppshshcsc-chaplGK.h.+sc.sh.DhtcFIhcTIccLhPLYE+h+ .......LTtpshsLFshPhFpFtQLK.pasP-cIspIKs-...YKppWppWKtl..pVAttLss...sFAcPHIESWsNGWpl.RuHFaAha+hEppp..spsAhL...ulLLN++pLpV.L.appYKu-cpthslspYNp.hLsph....cshchtsa.lWcts.EpEasDahslpph.....pppphphcsscchFpIGKhh.ps-.phpchtphhspslp-LhPLYpth................................................................................ 0 3 7 12 +10619 PF10787 YfmQ Uncharacterised protein from bacillus cereus group Mistry J, Coggill P anon PRODOM_PD089522 Family This family is conserved in the Bacillus cereus group. Several members are called YfmQ but the function is not known. 20.90 20.90 20.90 36.20 20.80 19.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.78 0.71 -4.85 7 191 2009-01-15 18:05:59 2008-07-01 14:15:02 4 1 116 0 14 119 1 139.80 65 98.44 CHANGED MThWhIlhLVlFuhhKllVoslPouVVE.lluKFElHscLp--ssolohsG+pLEGppKpclIppFNEAlFL-+YYh.PusE.........GTPLlIpTKpGK+-VphalYpYDDHlDVVKQY....KKKllAYpLRScsLQsss.h.hstshh .........MTTWFIVhLhlFGAhKllVSShPsoVlESIISKFEhH.KL-EE.NsoloIcGpNlEGEpK.plIH-FNEALFLD+aYhPPHsE.........GTPllIcsK+GKK-lpF.lYSaEE.HVDVlKQY....KKKVVAYpLRSKsLQsps.hhlotDh.A........ 0 3 8 9 +10620 PF10788 DUF2603 Protein of unknown function (DUF2603) Mistry J, Coggill P anon PRODOM_PD098479 Family This family is conserved in Epsilon-proteobacteria. The function is not known. 25.00 25.00 74.70 74.50 20.60 19.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.74 0.71 -4.41 12 175 2009-01-15 18:05:59 2008-07-01 14:22:06 4 1 173 0 16 72 0 135.20 56 83.07 CHANGED lschupsLGlpccp.pTlhchhp..psNEhhLpLcsGshspsEPWFhlDEpsph+sllohp.lptLlpslKpup+ENFcL+LEKsIhQphPlDFsDVWsVAh-EI++httpst.....sIslcpLlccIK+EHPNLFhshc ......................IDEhSpsLGhcKc-.RsIFKhKpops-NE+sLh..LEsGSFDo.sEPWFlhDENDclHTLlSlpSLpNILEsLKpuQKENFELRLEKAIaQQIPlDFsDVWpVAMDEIKppAQpss.c..lsIDL-KLlpcIK+EHPNLFVDMp. 0 4 13 16 +10621 PF10789 Phage_RpbA Phage RNA polymerase binding, RpbA Finn RD anon PRODOM Family Upon infection the RpbA encode phage protein binds to the ADP-ribosylated core RNA polymerase and modulates function to preferentially bind T4 promoters.\ This is a non-essential protein to the phage life cycle. 21.00 21.00 21.10 22.50 20.80 20.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.43 0.72 -4.20 8 36 2009-01-15 18:05:59 2008-07-01 14:49:10 4 1 35 0 1 35 0 103.20 35 85.48 CHANGED uDIpsp.hpo-us.cspNKIRKAWVLphsDsptcpLQul.Qc..sRFtLYupIDc-Vp-pWIcLMR++ps-uLssGuKhVhsh.GpchLtcpYphDsDEhLIsAAplV....huch ........tDIQsK.hpo-us.pspN+IRKAWVLphs.-sstctlQsh.pp..sRatlYptIDc-VpcpWI-LM+++ps-uLssGAKhlhsp.stchLEcpYphssDEhLl.AuplVhtp.h........................... 1 0 0 1 +10622 PF10790 DUF2604 Protein of Unknown function (DUF2604) Finn RD anon PRODOM Family Family of bacterial proteins with undetermined function. 20.90 20.90 21.20 144.30 20.30 19.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.23 0.72 -3.84 3 5 2009-01-15 18:05:59 2008-07-01 15:11:21 4 1 5 0 3 5 0 76.00 69 56.89 CHANGED VVVNGQPVsVEANVNAPLHsVlAKALEpSGNVGQPsENWELKDEuGsVLDlsKKVEDaGFTNGVKLFLSLKAGVAG VVVNGQPTQVEANPNQPLHVVRAKALENTQNVAQPAENWEFKDEAGsLLDlDKKVGDFGFANsVTLFLSLKAGVAG 0 0 2 3 +10623 PF10791 F1F0-ATPsyn_F Mitochondrial F1-F0 ATP synthase subunit F of fungi Mistry J, Coggill P anon PRODOM_PD068018 Family The membrane bound F1-FO-type H+ ATP synthase of mitochondria catalyses the terminal step in oxidative respiration converting the generation of the electrochemical gradient into ATP for cellular biosynthesis. The general structure and the core subunits of the enzyme are highly conserved in both prokaryotic and eukaryotic organisms. 23.00 23.00 23.00 24.40 22.50 22.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.12 0.72 -3.75 14 131 2009-01-15 18:05:59 2008-07-01 15:39:51 4 1 128 0 99 115 0 92.00 53 70.67 CHANGED Mhh.....+RuLSTLI............P.PKlsosts.luuussAtRhtpVVsFYcpLPpGPAPthpss..uh.lu+YpAKYF..pGcNASG+PllHhlhullhhGYuh-YaFHL+ ............................M.alhRRuLSTL.I......P.PKlA....Ssps.lGuAssA......tRMpcVVsFYc+LP+GsAPt..sKssGh...lGRYpA+YF...G+NASup...........PllHhlhullhlGYoh-YYFHLR............................ 0 26 55 84 +10624 PF10792 DUF2605 Protein of unknown function (DUF2605) Mistry J, Coggill P anon PRODOM_PD070476 Family This family is conserved in Cyanobacteria. The function is not known. 25.00 25.00 46.40 46.30 18.70 18.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.33 0.72 -4.32 17 56 2009-09-11 15:07:39 2008-07-01 15:50:20 4 1 54 0 21 59 22 96.20 46 90.09 CHANGED ps.ststLLcslL-sLLcDFpaWFpRuccLLpsps.shhstc-ppsLhpRlcpupptltus+uLhpAostthuVsMpsMsPWHpLVsEsWtlAsRhppt ..sp.sps-LL+olL-PLL-DFpaWFsRucpLLEs-plsFhospEQpcLLsRV+pAQpEVpss+hLFpATstQsGl-hpshhPWHpLVsECWplutRaRp.h.. 0 2 14 19 +10625 PF10793 Gloverin Gloverin-like protein Pollington J, Finn RD anon PRODOM Family This family of proteins are Gloverin-like. Gloverin is a 13.8kDa inducible antibacterial insect protein which inhibits the synthesis of vital outer membrane proteins leading to a permeable outer membrane. Gloverin contains a large number of glycine residues [1]. 25.00 25.00 35.90 63.40 23.70 16.70 hmmbuild --amino -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.93 0.71 -4.58 5 27 2009-01-15 18:05:59 2008-07-01 15:51:29 4 2 12 0 7 27 0 137.10 61 91.41 CHANGED QVShPPGYAc+YPpYhKaSKpsRHPRD....VTWDKplG.sGKVFGTLGQNDDGLFGKGGYp+-FFNDHRGKLTGQAYGTRVLGPuGDSTNaGGRLDWANKNApAALDVsKQIGGRoGloASGSGVWcLDKNT+LSAGGsLSKsFGHsRPDVGlQApIpHDW ....................t..p.Ys.....S+..RHPRD....lTW-+phG.sGKVFGTLGpsDpGLFGKuGYppphFNDcRG+LpGQAYGoRVLGPsGDSTsaGGRLDW..uNcNApAAlDls+QIGGpoGhoAoGSGVWsLDKNT+LSAGGslSK.pFGHp+PDVGlQAphpH-a...... 0 6 7 7 +10626 PF10794 DUF2606 Protein of unknown function (DUF2606) Finn RD anon PRODOM Family Family of bacterial proteins with unknown function. These proteins have been classified as membrane proteins 25.00 25.00 25.40 99.40 24.80 23.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.49 0.71 -4.72 4 45 2012-10-02 19:08:27 2008-07-01 15:55:09 4 2 45 0 2 34 0 112.50 51 95.78 CHANGED Ma.hIhp.G.lNKYsphlah....LsIlhhsuhhsss-pshpKsl.PVThHVcst-tpPlcshplhlhK..-ps.pPSpEIG..IGKTDccGclhW+ssRKGcYhVhLspsEsp.s....lhpD+cspplI.Isl .............................hhsushuss-pphuKsl.sVThHVcNKEKpPlKsFEIhLMK..DpsPpPS+EIGISIGKTDcEGKlIW+ssRcGcYIVhLPNsETphl....hlNDRctsclIsIs.h 0 1 1 1 +10627 PF10795 DUF2607 Protein of unknown function (DUF2607) Mistry J, Coggill P anon PRODOM_PD067168 Family This family is conserved in Gammaproteobacteria. The function is not known. 20.50 20.50 20.90 20.60 20.40 20.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.51 0.72 -4.17 7 95 2009-01-15 18:05:59 2008-07-01 15:59:13 4 1 95 0 10 53 4 93.30 56 99.24 CHANGED M..athpth.pph+Rpsl...huVsLhlhhshAsltHplDlsPEHHspHHCQLFuuspHGlsp.....u.P.l.sPsappt..pslhtpshphtplh..hhARuPP.hhu ...........................p+hsl..MLSVVLuLWFNVAVIDHQLD.LHPEHHLQHDCQLFASAAH...GLKT......SQWlL...P...SWRQNPP..QARVEQPIQ.RsQVLa.SYFARSPP....AA. 1 1 2 6 +10628 PF10796 Anti-adapt_IraP Sigma-S stabilisation anti-adaptor protein Mistry J, Coggill P anon PRODOM_PD028886 Family This family is conserved in Enterobacteriaceae. It is one of a series of proteins, expressed by these bacteria in response to stress, that help to regulate Sigma-S, the stationary phase sigma factor of Escherichia coli and Salmonella. IraP is essential for Sigma-S stabilisation in some but not all starvation conditions [1]. 21.80 21.80 22.10 22.60 20.80 21.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.87 0.72 -3.60 8 516 2009-01-15 18:05:59 2008-07-02 10:59:42 4 2 476 0 39 130 0 84.70 75 97.83 CHANGED MKNLIucLLsKLAcKEtEuKpLsAQVEALEllloAlLpsh.csss.pcLIcsVEpAlssApssssss.+.DoElLpp.lp+LLphsps ......MKNLIAELLhKLAQKEEESKELsAQVEALEIIVTAMLRNM...AQN-QQcLI-QVEGALhcVKPD..A..SlPDc.DTELLRsYVKKLL+HPRp........................... 0 1 4 25 +10629 PF10797 YhfT Protein of unknown function Mistry J, Coggill P anon PRODOM_PD140382 Family This family is conserved in Firmicutes and Proteobacteria. The function is not known but several members are annotated as being homologues of E coli YhfT, a protein thought to be involved in fatty acid oxidation. 25.00 25.00 79.40 79.30 20.20 19.60 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.30 0.70 -5.63 13 324 2009-01-15 18:05:59 2008-07-03 11:56:38 4 2 316 0 28 135 1 424.10 79 98.09 CHANGED lcIlLlAllGuhuAlLANpulAVFpDGlRPIlPEhlEGpMsR+ELuusuFALShGhVhGFGIPhoLusuIllsH.lhLsTDIIGlhssssh....lAullGulaGlhllhuLpsVlslFstLPVNFlsuLuplGsPVlsAFAlFPAlAluYQFGhKpGllshllshluRllls...+at.h.hu.s.....lsLsPEGIulllGMlhLlhaAh+c+hs-css.........ushsolFs-RlpRIpKNhhhLulsGAL..lAAsuuhtIlAGsslSltLLAcu............tlspAAlAshsRulGFlPLIATTAlATGVYGssGhTFVFssG.hluPNPhlAuILGAllIhlEVhLLssIu+aL-+aPulRsuu-NIRTAMspllElALLIGGshAupphuPshGFhllsulYlLNEshGRPll+hAsGPVAAIlsGIlhNlLhllGLF .....IQIIVVACLTGMTSLLAHRSAAVFHDGIRPILPQLIEGYMNRREAGSIAFGLSIGFVASVGISFTLKTGLLNAWLLFLPTDILGVLAINSL...............MAFGLGAIWGVLILTCLLPVNQLLTALPVDVLGSLGELSSPVVSAFALFPLVAIFYQFGWKQSLlAAVVVLMTRVVVV...RY............FPHLNPESIEIFIGMVMLLGIAITHDLRHRDEN.................DIDASGLSVFEERTSRIIKNLPYIAIVGAL..IA..AVASMKIFAGSEVSI.FTLEKAYSAGVTPEQSQTLINQAALAEFMRGLGFVPLIATTALATGVYAVAGFTFVYAVG.YLSPNPMV.....AAVLGAVVISAEVLLLRSIGKWLGRYPSVRNASDNIRNAMNMLMEVALLVGSIFAAIKMAG......YT......G.....FSIAVAIYFLNESLGRPVQKMAAPVVAVMITGILLNVLYWLGLF........... 0 8 13 22 +10630 PF10798 YmgB Biofilm development protein YmgB/AriR Pollington J, Finn RD anon PRODOM Family YmgB is part of the three gene cluster ymgABC which has a role in biofilm development and stability. YmgB represses biofilm formation in rich medium containing glucose, decreases cellular motility and also protects the cell from acid which indicates that YmgB has an important function in acid-resistance [1]. YmgB binds as a dimer to genes which are important for biofilm formation via a ligand. Due to its important function in acid resistance it is also known as AriR (regulator of acid resistance influenced by indole) [1]. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.79 0.72 -4.16 6 700 2009-09-11 05:26:58 2008-07-28 14:30:18 3 1 416 2 58 197 0 55.90 43 68.79 CHANGED tpEuulluslVpplLtossaVoNKsIIhpLI+pLETpsDlVptDlhRpsLElVVtpTsDD..I .................EptsLGphVspLhpuGcslsNKsIIhpLIppLEoppDhhphDlhRpsLEhVl..TsDDh.................... 0 2 6 42 +10631 PF10799 YliH Biofilm formation protein (YliH/bssR) Pollington J, Finn RD anon PRODOM Family YliH is induced in biofilms and is involved in repression of motility in the biofilms [1]. YliH is also known as bssR (regulator of biofilm through signal secreton). 20.40 20.40 20.50 119.10 19.50 18.80 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.59 0.71 -4.44 4 440 2009-01-15 18:05:59 2008-07-28 14:32:18 3 1 439 0 14 59 1 124.30 86 99.55 CHANGED MsVDRL+pDLLNKLINARIDLAAYLQLRKAKGYMSVSESDHLRDNhFELsREh+s+A.RLp.HlDtEEhssLR+At-ALAsAAVCLMSGHHDCPTaIAVNADKLENCLToLTLsIpsLpcHuPLpps ...MhVDR.RhDLLN+LIsARlDLAAYlQLRKAKGYMSVSESNHLRDNFFKLNRELHDKSLRLNLHLDQEEWSALHHAEEALATAAVCLMSGHHDCPTVITVNADKLENCLMSLTLSIQSLQKHAMLEKA..... 1 1 1 7 +10632 PF10800 DUF2528 Protein of unknown function (DUF2528) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. Some of the sequences are annotated as ea10 however the function of this protein is unknown. 22.00 22.00 22.10 31.80 20.00 21.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.29 0.72 -3.63 4 191 2009-01-15 18:05:59 2008-07-28 14:32:53 3 1 157 0 4 72 0 105.50 89 89.24 CHANGED uacIlV-lDHslLTEEKLsELspFWS-u-hclE+HGs.LpAhLphhAs+hhuhslpp......lSscssaN..ust.EGaPshDGSpGlRls-hDph..F-uDDhcVhtl .....KASIEIE.IDHDVMTEEKLHQINNFWSDSEYRLN.KHGSV.LN....AVLIMLAQHALL.IAIS..SDLN.....AYGVVCEFDWND.GNGQEGWPPMD....GSEGIRITDIDTSGIFDSDDMTIKA.A....... 1 0 1 2 +10633 PF10801 DUF2537 Protein of unknown function (DUF2537) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.30 20.30 21.10 31.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.04 0.72 -3.88 8 113 2009-01-15 18:05:59 2008-07-28 15:02:40 3 1 112 0 28 72 1 83.60 58 76.69 CHANGED TPWATGLTVAuFVAAVluVAlVVLolGLhRVHPLLAVGLNlVAVGGLAPTlWGWR+TPVhRWFVLGAAVGVsuAWlALLllAh.G ...sPWATGLsVAuFVAA...VlAVAVVVLSlGLl.R.VHP......LLA......VuLNl.VAVuGLAPTLWuWRRsPVLRWFVLGAAVGVAuuW...lALLsls............. 2 7 20 25 +10634 PF10802 DUF2540 Protein of unknown function (DUF2540) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Methanococcus. 25.00 25.00 82.90 82.80 23.90 19.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.21 0.72 -4.31 12 26 2009-01-15 18:05:59 2008-07-28 15:06:32 3 1 13 1 9 21 0 75.30 39 84.61 CHANGED phtLhcplDs+slRYaLHKL-sl.splss-lLtcuhcscKpa+poloLo-pEccIlcKYG.KuTNhLlNahIlpppp .pFtLh+slDsRslRYhLHKlEsl.cpIss-l..Lt+AhcscKpa++olTLo-cEccIlcKaG.KuTNlLlNhhIlppc.... 0 1 3 6 +10635 PF10803 GerPB DUF2539; Spore germination GerPB Pollington J, Finn RD, Eberhardt R anon PRODOM Family Members of this family are required for formation of functionally normal spores. They may be involved in the establishment of spore coat structure or permeability [1]. 25.50 25.50 25.60 50.80 25.10 25.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.56 0.72 -4.21 7 135 2009-01-15 18:05:59 2008-07-28 15:07:18 3 1 134 0 19 64 0 52.40 69 75.89 CHANGED MNFYlpQoIpINhlRltuloNSSVhQIGSAGSIKsLSpLYNTGuasEPAP.ssus ...MNFYlNQSIhINpl+I-SITsSSVFQIGTAGSIKuLSKFSNTGGFTEPhRPLpAK... 0 4 11 13 +10636 PF10804 DUF2538 Protein of unknown function (DUF2538) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 20.50 20.50 23.80 23.50 20.20 20.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.02 0.71 -4.34 3 217 2009-01-15 18:05:59 2008-07-28 15:07:59 3 1 215 2 10 43 0 153.40 91 99.52 CHANGED MSRKTYEKlANINGMFNVLEQQIIHSKDMALFRSEFFYVNHEHRENYEALLIYYK-SslNPIVDGACYILALPEIFNKVDVFESELPFSWVYDENGITETMKSISVPLQYLIAAALEVTDVNLFKPSGFTMGMNNWNIAQMRIFWQYTAIVRKEAL ....MSRKTYEKIANINGMFNMLEQQIIHSQDMAHFRSEFFYVNHEHRENYEALLIYYKNSIDNPIVDGACYILALPEIFNSVDVFESELPFSWVYDENGITETMKSLSIPLQYLVAAALEVTDVNIFKPSG...FTMGMNNWNIAQMRIFWQYTAIIRKEAL.............................................. 1 3 4 10 +10637 PF10805 DUF2730 Protein of unknown function (DUF2730) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.24 0.72 -4.30 11 133 2009-09-10 19:13:11 2008-07-28 15:08:36 3 1 124 0 31 115 5 98.70 21 91.33 CHANGED hh.hlpspWsl.lhulhshssshhhhhhsppYA++c-ltpL.......-pRLsphEs+lcsLPTpp-VpcLclplscl+G-hKshssslpsloHQscLLLEpcL..pcc ............................................................t.ash.lhshh...hsshhhhhh.pp.p.a.sp.ccchpp...............L..............................cpRlsp.lEsclpslPspp-lpcLplplsplcG-l+slsspl....psl....s+..pLLlEptl.....pt.................................. 0 10 22 26 +10638 PF10806 DUF2731 Protein of unknown function (DUF2731) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 21.40 21.40 21.40 22.30 21.20 20.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.71 0.71 -3.87 11 76 2009-01-15 18:05:59 2008-07-28 15:10:58 3 4 74 0 52 74 0 94.50 29 29.59 CHANGED hpVPsPlKplFDsFPLpsY..tslsppcsus.pslpp+pahF..............sssssp.sssssFpLGVaNVhphptst.....shLsoDPhuLhspLhLC+KNsLtLPo.........tsss...................spsssslhhLShhAusDppLPILlE ..hsVPsPlK+lFDpFPLhTY..ss.sttspshttp..hpp.+hasF..............ts.t.t.t..................................................................................................................................................................... 0 7 22 41 +10639 PF10807 DUF2541 Protein of unknown function (DUF2541) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. All proteins are annotated as YaaI precursor however currently no function is known. 21.90 21.90 24.00 23.70 21.70 21.10 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -4.39 2 436 2009-01-15 18:05:59 2008-07-28 15:11:09 3 1 433 0 6 84 0 132.60 88 99.68 CHANGED M+SlhplSVGLlhGluh.osAtANDHKILGVIAMPRNETNDLsLplPVCRlVKRIQLoAD+GDlpLSGAoVYFKsuRuASpoLNVPuuIKEGpTTGWININSDNDsKRCVpKIsFSGHTVpSSDMApLKlIGDD .............MKSVhTISASLA...I.S.LML.CCTAQANDHKILGVIAMPRNETNDLALKLPVCRIVKR...IQLSADHGDLQLSGASVYFKAARSASQSLNIPSEIKEGQTTDWININSDNDNKRCVSKITFSGHTVNSS.DMATLKIIGDD........ 0 1 2 4 +10640 PF10808 DUF2542 Protein of unknown function (DUF2542) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. The family has a highly conserved sequence. 25.00 25.00 57.30 57.20 22.20 22.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.75 0.72 -3.71 3 411 2009-01-15 18:05:59 2008-07-28 15:46:52 3 1 410 0 6 41 0 78.50 81 99.60 CHANGED MDVQTIFVVlAFLLlPLFCFREAWKGWRoGAVDKlVKNAREPVYVYRAcsPlLYWSYlsLYlGhGlLolGMIIYLLFYR MDVQQFFVVAVFFLIPIFCFREAWKGWRAGAIDKRVKNAPEPVYVWRAKNPGLFFAYMVAYIGFGILSIGMIVYLIFYR. 0 1 1 4 +10641 PF10809 DUF2732 Protein of unknown function (DUF2732) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 25.40 25.30 24.80 24.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.57 0.72 -4.43 8 350 2009-01-15 18:05:59 2008-07-28 15:49:26 3 1 279 0 29 161 1 73.20 45 96.25 CHANGED M+NschtoTpous--shLspLLs-ARhEERKsRAhAlShRL-ALAlHIsp+phousEAAELLR+EAs+aEsESQ.ElH ......................................t....hslLLspARhEERpspApthuuRLDuLAsHITpcpLs+VEhsELLRppAEphpNput.-........ 0 0 7 21 +10642 PF10810 DUF2545 Protein of unknown function (DUF2545) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function is restricted to Enterobacteriaceae. The sequence is highly conserved. 25.00 25.00 68.60 68.50 19.10 19.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.95 0.72 -3.58 2 416 2009-01-15 18:05:59 2008-07-28 15:50:10 3 1 412 0 5 40 0 79.40 86 99.91 CHANGED MIYLWhFLAlsIlsVSGYIGQVhshhSAlSSFhGMVILAALIYhhshWLpsGs-lVoGlhhFLAPACGLhIRFMVGYG+R ....MIYLWMFLALCIVCVSGYIGQVLNsVSAVSSFFGMVILAALIYYFTMWLTGGNELVTGIFMFLAPACGLMIRFMVGYGRR.. 0 1 1 3 +10643 PF10811 DUF2532 Protein of unknown function (DUF2532) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 280.50 280.30 21.30 20.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.04 0.71 -4.42 11 41 2009-01-15 18:05:59 2008-07-28 15:51:57 3 1 41 0 6 23 0 158.00 85 95.55 CHANGED KLIhCFLILVSAVKVNADFNsIQDNFEYQE...EQLsIELPWSDCTEIHKLLEEKLSFSEQQIKKENKI+EKYKQFYLKHNNP.oNFSMQFLEKKSETNGVETLISGFLKFCEDNFQTSKSKSNSLNYaIKKQQDQWaNsIRNENYKIYYRKKY-DNIhRNN .KLITCFLILVSAVKVNADFNNIQDNFEYQE...EQLhIELPWSDCTEIHKLLEEKLSFSEQQIKKENKI+EKYKQFYLKHNNP.TNFSMQFLEKKSETNGVETLISGFLKFCEDNFQTSKSKSNSLNYYIKKQQDQWFNDIRNENYKIYYRKKYEDNIFRNN.. 0 1 1 1 +10644 PF10812 DUF2561 Protein of unknown function (DUF2561) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacterium spp. 20.50 20.50 20.60 21.10 20.30 20.30 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.42 0.70 -4.72 7 80 2009-01-15 18:05:59 2008-07-28 15:52:41 3 2 78 0 14 39 1 203.10 60 94.39 CHANGED Mss.p.SAhRc..GsD.shu.-slDRILlGACAAlWLshlGsuVAAsVALhDLGRGapphuusscTsWVLYuVIsVSALlIsuAlPlLLRARRhAcsEPsspshshscpsut..shtsutssspstpcpss....ssuht.ssth.............sstAVDRlWLRsTlslsushGsAhlAVusATYLMAVG+DsASWVuYGLAGlVTsuMPsl.WhalRpLRt ..............MVS.RYSAYRR..GPD.sISPDVIDRILlGACAAVWLVFsGVSVAAAVALhDLGRGFHEhAGsPH.TTWVLYAVIVVSALVIVGAIPVLLRARR..MAEAE..P...As.RP....oGA....u.s..RGGc......oluSGpPA...tRAsA..E.SAP...VpHAcAh..csAAEW...............................SSEAVDRIWLRGTVVLToAIGIALIAVAAATYLMAVGHDGsSWluYGLAGVVTAGMPVIEWLYsRQLRR.................................................. 2 1 8 12 +10645 PF10813 DUF2733 Protein of unknown function (DUF2733) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 21.20 21.20 22.60 25.70 20.70 19.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.92 0.72 -7.29 0.72 -4.39 12 28 2009-01-15 18:05:59 2008-07-28 15:54:04 3 1 27 0 0 22 0 32.40 43 45.92 CHANGED MGs.lhSlC+RRhsPlhDVcGp.IsltcDFE.h ..........MGh.lhSlC+RppNslhDVcGp.IslscDFE.h.. 0 0 0 0 +10646 PF10814 DUF2562 Protein of unknown function (DUF2562) Pollington J, Finn RD anon PRODOM Family This protein of unknown function appears to be restricted to Mycobacterium spp. 25.00 25.00 26.90 25.80 24.40 23.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.75 0.71 -4.21 7 75 2009-01-15 18:05:59 2008-07-28 15:54:39 3 2 75 0 13 36 0 131.90 70 92.93 CHANGED LTPRpRLoRGLpYosVGPVDVTRGl....lGLGlcSApSTAutLRRRYppG+LAR....ELAAApEsls.ElAAAQEVVAsLPpslQcA.....Rpt+RR.+RPhllAG.VAVsVLAGGAVsFSIVRRSsp...PEPSPhPPSVEVpP+P .......LTPRERLTRGLuYSAVGPVDVTRGL....VGLGLQSARS...TAAuLRRRYREGR....LAR....EVAAA.QETLAQELsAAQDVVANLPQALQDA........RTpRRs+++hWIFAG.IA........A.Al.LAGGAVAFSIVRR.SSR...PEP.SPRPPSVEVQPRP............ 0 2 7 11 +10647 PF10815 ComZ ComZ Pollington J, Finn RD anon PRODOM Family ComZ is part of a two gene operon. It affects competence regulation by negatively affecting the transcription of the ComG operon. ComZ contains a leucine zipper motif [1]. 21.00 21.00 21.10 21.60 20.90 20.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.67 0.72 -4.19 7 116 2009-01-15 18:05:59 2008-07-28 16:00:15 3 1 115 0 14 41 0 55.40 79 83.53 CHANGED EKoMcFhQIAMKalPEAKt.L-csGI-LohEhlQPhhsLhhpVMsEAYELG+sDAp ......EKSMQFLQIAMKHLPE.AKAILDDNGIALDMEKAQPVLELLMKVMNEAYELGKADpE.... 0 3 8 10 +10648 PF10816 DUF2760 Domain of unknown function (DUF2760) Gunasekaran P, Mistry J anon Pfam-B_001564 (release 23.0) Domain This is a bacterial family of uncharacterised proteins. 25.00 25.00 116.20 116.10 19.50 18.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.49 0.71 -4.62 29 227 2009-09-11 06:04:09 2008-07-28 16:01:46 3 1 225 0 68 201 15 125.90 59 60.21 CHANGED psossuALQLLuLLQ+EARhIDFlpEDluuaoDA-lGAAARllHpGC+KlLp-aFolpPVRsE.....sEGoRlolstGFDusplRLTGNVsGpuPFsGsLhH+GW+sscV+LPKlusspDs...s.llAPAEVE .......sAosDuALQLLuLLQ+EARLIDFlpEDlusaSDtElGAAARVlHsGspKVLcEahTLsPlRsE.....pEtoRloltsGFssppIRLTGNVsGpAPFsGTLlH+GW+ssslpLPKLu-saD.s....o..llAPAEVE. 0 23 34 55 +10649 PF10817 DUF2563 Protein of unknown function (DUF2563) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacterium. 21.20 21.20 21.50 22.50 19.50 21.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.33 0.72 -3.59 3 55 2012-10-01 21:44:22 2008-07-28 16:02:45 3 1 51 0 6 23 0 102.30 73 98.01 CHANGED MFVDT-LLHSGGNESHRAGGHA+-GADQLAtGPLhSGMFGDFAAADAFHsAVsuAHAQHVRNLQAHpEALTuVGoKAHHAAsGFTsMDspNAsEL+AVRsSuuT .......MFVDVGLLHSGANESHYAGEHAHGGADQLSRGPLLSGMFGT.FPVAQTFHDAVGAAHAQQMRNLHAH.RQALITVGEKARHAATGFTDMDDGNAAELKAVVCSCAT................................. 0 1 3 5 +10650 PF10818 DUF2547 Protein of unknown function (DUF2547) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 23.10 23.10 23.80 46.00 22.90 23.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.36 0.72 -3.36 8 80 2009-01-15 18:05:59 2008-07-28 16:03:26 3 1 79 0 11 55 0 95.20 43 92.21 CHANGED KpsFWSQLLhShIAIFALPpsQshphtp....sNcs.QsollpQplsp.ssplspclpQQshaltph.h...t.hpIpPp......Fhssshpap....sPIRAGPhs ....KssFWSpLLLullAIFALPsuQuhp.ps.....ssEN.psol..Q..QhLp.slplsc-spp.Qs.h..sphshpsct.hphpPH......Fhscshshs....APIRAGPl.h 0 1 4 10 +10651 PF10819 DUF2564 Protein of unknown function (DUF2564) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 59.20 58.90 21.40 21.00 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.61 0.72 -3.86 7 123 2009-01-15 18:05:59 2008-07-28 16:06:46 3 1 123 0 17 47 0 78.60 67 93.60 CHANGED GasDh+QlEhAVETAQKhsGtAT+uhssshlcsAhQAlEsAR.Qhspupphts.lDp.sFltpppplLscspHQLcEucc .tVNDFEEVKFRVETAQKMVGSATISMDPDTLEHATTAVEAARSQLEIMKSVATDLDE.PFLMNEEKKLs+CEHQLsEA+H..... 0 3 9 11 +10652 PF10820 DUF2543 Protein of unknown function (DUF2543) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Enterobacteriaceae. The family has a highly conserved sequence. 25.00 25.00 103.90 103.70 21.10 20.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.60 0.72 -3.97 5 335 2009-01-15 18:05:59 2008-07-28 16:07:32 3 1 334 0 14 60 1 80.80 93 99.94 CHANGED MoNDIPLKYYDIADEYuTEoApPVSDuER-sLAHYFQLLITRLMNNEEISEEAQ+EMAsEAGIsEsRIDEIAsFLNQWGNE MNHDIPLKYFDIADEYATECAEPVA-AERTPLAHYFQLLLTRLMNNEEISEEAQHEMAAEAGINPVRIDEIAEFLNQWGNE....... 0 1 2 8 +10653 PF10821 DUF2567 Protein of unknown function (DUF2567) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.16 0.71 -4.73 19 131 2009-09-11 10:54:39 2008-07-28 16:08:36 3 1 131 0 34 108 0 156.80 42 71.54 CHANGED GLuusGsLlGuLWAWlAPPIHuVVAlTRuG-RV+tYLGuESppFFsAshhhlGLLSVLAVVAuVhhWQ.WRpHRGPhhVAuLuhGhssAAulAAGVGAhlV+LRYGulDhsssPlop.-+slsYVspAPPVFFu+pPLQIAhTLhaPAulAuLVYAlhAAuTuRDDLGG .........................GlussGlllGu.LWAalAPPl+u.VVs..hTRsGpplhtaL.G.uES.ppaFh.As.hhhlGLhsVluVVAuslh.Wp.hR.c+RGP.hVuuLulGhssAAu.lAA.uVGuhls+hRYus.....lDhsssPlst..sttls..hVspA..PPlaa...uctslph....Ahs.Lhhs.sslA.uLVYulhAuusuRDDLGs................................................. 0 7 23 30 +10655 PF10823 DUF2568 Protein of unknown function (DUF2568) Pollington J, Finn RD anon PRODOM Family One member in this family is annotated as yrdB which is part of a four gene operon however currently no function is known. 21.10 21.10 25.00 24.20 21.00 20.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.20 0.72 -3.76 12 114 2009-09-14 14:03:47 2008-07-28 16:14:56 3 1 105 0 44 113 0 90.70 29 80.50 CHANGED lhlRFlLELsslsuluhaGaphs.shhh+hslulshP...llhhllWuhFtuPpusp+.......lpGhsRhhlElllFuhushAhhhssp.hhulsaAslhll ............sltFlLELssLssluh....aGaphs...ts......hh....h+hslulhhP...llssllWGhFsAP+.uthp.......l.shh+lhlElhlFus.ushu.lhhhsphhhuhhhuhlhh................... 0 13 33 38 +10656 PF10824 DUF2580 Protein of unknown function (DUF2580) Pollington J, Finn RD anon PRODOM Domain This family of proteins with unknown function appears to be mainly found in actinobacteria. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.06 0.72 -3.66 56 605 2012-10-01 21:44:22 2008-07-28 16:17:35 3 2 159 0 185 723 2 97.70 19 80.66 CHANGED Mo....spLpVpPstL+phAupHspsAsplss...ussus..uhssplttoHGslsuphpsshpthhssRpsshsphtssssclApsLcsAAuhYppsDcssupslcs ................................lpVssstLcshA...up...hsphAsplss.......sssss........st.ss..t.s.ssshG.......h..s..s..t..hs.ss.hp.t...hhst.t....p...s...shsph....s...sshsshApsLpsuAstYppsDpssuttlt.t........................ 0 51 128 167 +10657 PF10825 DUF2752 Protein of unknown function (DUF2752) Gunasekaran P, Mistry J anon Pfam-B_001601 (release 23.0) Family This family is conserved in bacteria. Many members are annotated as being putative membrane proteins. 20.90 20.90 20.90 20.90 20.80 19.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.45 0.72 -3.94 79 584 2009-01-15 18:05:59 2008-07-28 16:18:56 3 1 475 0 185 535 65 51.50 32 38.74 CHANGED sCPh+tlTGh..CPGCGhpRuhhsLl+G-lsuAhphNshhhsshshlhhhhhh ................sC.h+tlT.Gh..CPGCGspRuhhsL.lHG-lsuAhphNshhlhs.h.shhshhhh.h.............................. 0 71 142 172 +10658 PF10826 DUF2551 Protein of unknown function (DUF2551) Pollington J, Finn RD anon PRODOM Family This Archaeal family of proteins has no known function. 21.70 21.70 22.10 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.80 0.72 -4.08 12 39 2009-01-15 18:05:59 2008-07-28 16:20:34 3 1 26 0 36 39 0 81.70 41 78.54 CHANGED LcpYLpRDpsGlR+slLplFLcscphThs-la-tLpcc.FsVSh+uVuuMVGllsS+LGILps..sshGspslYpLKEcYtslV ...LpcYLpRDpsGlR+slLplFLcscphTss-lach.Lpcc.FslSh+uVuuMVGlhsS+LGILps..pshsspshYpLKEcYtslV........... 0 7 28 32 +10659 PF10827 DUF2552 Protein of unknown function (DUF2552) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.70 21.70 21.80 117.40 20.80 19.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.79 0.72 -4.05 5 119 2009-01-15 18:05:59 2008-07-28 16:21:31 3 1 119 0 13 32 0 78.80 78 98.21 CHANGED M-QKLKsL+NTAQNKTWVSFLNpNHPYTLLHWSIGGs-SlKKDVWLLQDEMTFETcEFPTLEpAIsWIuENM-pITDVL .MDKQL+TLRNIANERTWASFLNDNHPYSLLHWSIAGVGQEuKDVWLLQDEVTFQTTEFPTLD-AhpWISENMEQVTDVL 0 1 5 7 +10660 PF10828 DUF2570 Protein of unknown function (DUF2570) Pollington J, Finn RD anon PRODOM Family This is a family of proteins with unknown function. 22.00 22.00 22.10 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.48 0.72 -4.46 7 240 2009-01-15 18:05:59 2008-07-28 16:22:39 3 2 193 0 18 152 10 105.20 32 83.99 CHANGED h.palhu...slshl..ILGLsuWhWhQSppIsoL+AENpsQAQTIppQpcANppLs.tLpQERQAV.tQQchsNElcptsppstEplKoIltppsCA+scLPpuVlD...RLHp .............................hh..ha.hlul.uhl...llu...hhGh....h.phS..as.lu+.pAc....scsQspTlcspscs...h...shl.......ss.....ulQ........ph......pplltp.ptpsQ.Qhp.p.-....u-.tppE....pl+ssIucDc.CA+s.LPsu..........+ht.................................. 1 1 4 12 +10661 PF10829 DUF2554 Protein of unknown function (DUF2554) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 76.00 75.90 18.00 17.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.73 0.72 -3.88 3 416 2009-01-15 18:05:59 2008-07-28 16:23:53 3 1 410 0 11 60 0 67.60 84 99.76 CHANGED MlTKsLSVVLLTCALFSGQLMAGHpGHEFVWVKNVDHQLRHEADSDELRAVAEESAEGLREHHNWQKSRKPEoaFR .MhpKslSslLLsCALFSGQLhAtppGH-FVWVKNVDHQLRHEADSDELRAVAEESAEGLREHFYWQKSRKPEAGQR... 0 1 1 5 +10662 PF10830 DUF2553 Protein of unknown function (DUF2553) Pollington J, Finn RD anon PRODOM Family This family of bacterial proteins has no known function. 20.70 20.70 21.00 54.20 20.10 20.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.70 0.72 -3.46 8 101 2009-01-15 18:05:59 2008-07-28 16:24:25 3 1 101 0 15 44 0 73.00 59 92.91 CHANGED pp+lDIT-+VTGRhcsGpLsLYc-NEhIGc..Msuts.QYELKsGYoacspKFY+hsDsssps-tKYVD.CD.EsGWC .o.hKIDITN+VluKF.+.sshLELYpsc.MIGKFYVYTEsKQYVLEDGYlYEsGKFYRIID.THR.GNspsAEuCD..LGWC... 1 2 9 10 +10663 PF10831 DUF2556 Protein of unknown function (DUF2556) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 71.90 71.60 22.90 22.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.85 0.72 -4.33 3 414 2009-01-15 18:05:59 2008-07-28 16:24:39 3 1 413 0 10 45 1 45.60 92 88.54 CHANGED MIRKYWWLVVFAVSVFLFDALLMQWIELLoTETDKCRNMNSVNPLKLVNCoDL MIRKYWWLVVFAV.VFLFDsLLMQWIELLATETDKCRNMNSVNPLKLVNCDEL 0 1 1 6 +10664 PF10832 DUF2559 Protein of unknown function (DUF2559) Pollington J, Finn RD anon PRODOM Family This family of proteins appear to be restricted to Enterobacteriaceae. The sequences are annotated as yhfG however currently no function is known. 20.50 20.50 20.60 20.50 19.30 18.90 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.81 0.72 -4.17 3 338 2009-01-15 18:05:59 2008-07-28 16:27:31 3 2 335 0 17 58 0 53.80 83 95.45 CHANGED KKLTDKQKSRLWEtQRNpNFQASRRLEGVEVPLVTLTAEEALARLEELRRHYER ...KKLTDKQKSRLWE.pRNtNFQASRRLEGVEhPLVTLTAsEALARL-ELRRHYER...................... 0 1 2 10 +10665 PF10833 DUF2572 Protein of unknown function (DUF2572) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.30 20.30 21.20 20.70 20.00 19.80 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.48 0.70 -4.93 5 87 2009-01-15 18:05:59 2008-07-28 16:28:22 3 2 84 0 12 71 0 200.60 36 82.57 CHANGED KGllTLTILhLLSulLlIhMLFDDDtL+aHpSlhuQRKlYVpQsLpLQ+hopEQKpslCpcl....PLNoopssppIoFcptutsDu...puaFlWCcRpoLFKppPKKuhNpGuLSpaINcEpluLFpH+FpusPt.Lsss+usYLYWlD-sQsElpIsGsIsAIlIAEGDLKIsGKGRI+GsVITGGsLoLE.uVpluY+KtTVssLVppYSpWQLAEKSWaDFss ...................................................................................................+GhhTLslLlhlSulLslhhLhcDshLphaputtsQRphYVppplpL.chotpccpptC.pl....shspstpshplsh.t..Lp..t...s-u...lpaalWCcR.sLF.p.p.pPp+st.p...s..tlppFls.t..p..tl...shFp..phu..psP.t....sspp.PplYWhss.sps-hplsts..lpul..lIAEGDLclpGpG+IpGslITsGpLoL-..s..l.plsYuKpsVstllppYSpWplAEpSW.DF......... 0 1 5 11 +10666 PF10834 DUF2560 Protein of unknown function (DUF2560) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 42.00 48.60 23.00 20.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.60 0.72 -4.02 2 125 2009-01-15 18:05:59 2008-07-28 16:28:58 3 1 115 0 8 42 0 78.70 72 95.74 CHANGED MAEIhshTE.QphpL-Ih+LV.spsAAAEcAhtFlusscLphELFK.ph..utupushhsRs.EAlRcuKEALDLFTsGA ...MAEIIPMTEEQKFQLEIYKLVMNQNAAAEEAFQFIGTDELKLELFKIHFQSG.GANSDITTRTIEAVRKSKEALDLFTTGA...... 0 0 0 0 +10667 PF10835 DUF2573 Protein of unknown function (DUF2573) Pollington J, Finn RD anon PRODOM Family Some members in this bacterial family of proteins are annotated as YusU however no function is currently known. This family of proteins appears to be restricted to Bacillus spp. 22.80 22.80 23.00 80.70 22.40 22.70 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.75 0.72 -3.94 7 119 2009-01-15 18:05:59 2008-07-28 16:29:26 3 1 119 0 11 45 0 80.30 77 97.34 CHANGED tpKhcEQlDGLlEKYTELLLGEos-EhKEcVKtWllYSHIAKSMPPLAKHWNusYP-AK-tlKclIpcIKcLNEt+R..psK ....S.EKFNEQFDGLLEKYTELLLGESNEERKEQVQKWALYSYIAKTMPALVKHWNET..YPDAKEEMVQLITcIK+LNEEKRNE...Q.. 0 4 7 8 +10668 PF10836 DUF2574 Protein of unknown function (DUF2574) Pollington J, Finn RD anon PRODOM Family This family of proteins appears to be restricted to Enterobacteriaceae. Members of the family are annotated as yehE however currently no function is known. 25.20 25.20 26.30 48.90 23.80 25.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.03 0.72 -4.28 4 385 2009-01-15 18:05:59 2008-07-28 16:30:48 3 1 383 0 7 60 0 75.30 65 99.38 CHANGED MpKYhL.GIIhLAYGluSPsFuSDTATLTIsG+losPTCSh-VVNsQLQQRCGphhahssspppASoPs+GVTTEVlslsusSpRpIVLNRYD ........................................hthVNuQsQQ+CGQLhasVDTpa.sSSPsKGVTTEVVsssuDSKR+IVLNRYD..... 0 1 1 3 +10669 PF10837 DUF2575 Protein of unknown function (DUF2575) Pollington J, Finn RD anon PRODOM Family This family of proteins appears to be restricted to Enterobacteriaceae. Members in the family are annotated as yaaY but currently there is no known function. 25.00 25.00 29.70 29.70 19.40 17.80 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.47 0.72 -3.88 2 193 2009-01-15 18:05:59 2008-07-28 16:31:56 3 1 192 0 4 26 0 68.10 82 99.23 CHANGED hpHSLRSDGAGFYQLAsCEYShShhKIAhuGtFhsslCpMAMKShFFhhp.hNRRLTLTAVQGILhRFSLF ................hpHSLRSDGAGFYQLAsCEYShShRKIAhuGtFhsslCtM..AMKShFFhhs.hNpRLTLTAVQGILhRFSLF......... 0 1 1 3 +10670 PF10838 DUF2677 Protein of unknown function (DUF2677) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as UL121 however currently no function is known. 20.90 20.90 21.00 31.50 20.70 19.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.06 0.71 -4.59 4 34 2009-01-15 18:05:59 2008-07-28 16:32:48 3 1 14 0 0 29 0 163.10 63 91.10 CHANGED hsllh.sstGstss.hCssspsplKlpChL.pLDpRLaW.lpDop....RVhsFc.-spp.hs...........apV-VRtsh.ssp.asl.LhhPLh.psTVuLLL.DhtpsR.EclLChuhlPph+tlpsCthDsDLulLYuVCllLSlSlVsAulhKlDYDpoht..hpuYKS ..............LMChALMARGTaGAYICSPNPGRLRISCALSV..LDQRLWWEIQYSSGRLTRVLVFH.DsGEcGDD.........LHLTDTRHCTSCTHPYVISLV.TPLTINATLRLLIRDGMYGR.GEKELCIAHLPTLRDIRTCRVDADLGLLYAVCLILSFSIVAAALWKVDYDRSVAVssKSYKS..... 0 0 0 0 +10671 PF10839 DUF2647 Protein of unknown function (DUF2647) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins are annotated as ycf68 but have no known function. 22.30 22.30 23.70 26.30 21.20 20.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.46 0.72 -3.90 4 31 2009-01-15 18:05:59 2008-07-28 16:33:42 3 1 25 0 6 19 0 67.80 65 51.29 CHANGED AYSSCLsRohhssKLLLRRIDGAIQVRSpVD.TFYSLVGSGRSGGsP.....susLhpp.aIsh.sh.uhLStst ..............AYSSCLNRSLKPNKLLLRRIDGAIQVRSHVDhTFYSLVGSGRSGGGs......st.L.SR.pIp.lSVautLShpp.................... 0 0 0 2 +10672 PF10840 DUF2645 Protein of unknown function (DUF2645) Pollington J, Finn RD anon PRODOM Family This family of proteins appear to be restricted to Enterobacteriaceae. Some members in the family are annotated as YjeO however no function for this protein is currently known. 25.00 25.00 26.10 25.70 20.70 20.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.53 0.72 -3.86 5 170 2009-01-15 18:05:59 2008-07-28 16:39:21 3 1 157 0 19 68 0 99.80 65 98.68 CHANGED MS.+htlLsthYhIlChLhIhlhSsLDcEaMIDGp-IsNlC-VhRtl.sDDoRDFsuhhsLPLhhPFFasslh++hKShFLsllullLluYWlWpFFIRapFC ........Ms.pMFlLCCIWFIVAFLWIsITSuLDKEWMIDGRGINNVCDVLhYLEpDDTRDV.GVIMTL.PLF.FPFLWFA.....LWR.....KK.RGWFMYATA....LA.IFGYWLWQFFLRYQFC.... 0 1 8 10 +10673 PF10841 DUF2644 Protein of unknown function (DUF2644) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Pasteurellaceae. 20.90 20.90 20.90 23.00 20.00 20.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.96 0.72 -3.94 10 46 2009-01-15 18:05:59 2008-07-28 16:40:06 3 1 38 0 8 46 0 58.90 55 70.91 CHANGED pELITNsDGRLSTTuFIQFaGALlMAGILlYuVaLDRsYVsELFssFAlFCG.GusATKGh ..ELlTNs.DGRLSTTuFIQFFGhLlMAuILhauVYLDRshVs-LFhsFAhFCu.GusATKGh.......... 0 1 6 8 +10674 PF10842 DUF2642 Protein of unknown function (DUF2642) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillus spp. 20.80 20.80 20.90 20.90 20.70 20.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.09 0.72 -4.48 10 100 2009-01-15 18:05:59 2008-07-28 16:40:24 3 1 82 0 24 81 1 61.00 45 78.44 CHANGED uPQlVSllDPYVYQTLQollGcclVVpTlRGolRGpL+DVKPDHlllEus.pslaaIRlQQIVhVhP ............phluhssPYVhpslppllGpplVlET.VRGsl.+G.pLcDVKPDHl.llE.s....sssahlRlpQIValhP.......... 0 8 18 18 +10675 PF10843 DUF2578 Protein of unknown function (DUF2578) Pollington J, Finn RD anon PRODOM Family This is a Saccharomycete family of proteins with unknown function. The protein in S. cerevisiae is strongly induced in response to many stress conditions and is repressed in drug resistant yeast strains. 25.00 25.00 144.70 143.00 20.80 19.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.33 0.71 -4.63 12 62 2009-01-15 18:05:59 2008-07-28 16:41:18 3 1 42 1 37 43 0 168.30 55 97.50 CHANGED Mo.KKcKp.....................PKspohohpSsES...............lKsFEDLpsFEsFl+sET..Ds-FDahHs+LpYYPPFVL+EsH--.EKIKsTsNp+SKKFhRcLppHlcKHLlKDlccshth.-L+FcctucpEoFs+lsW+atDpo-a.........asR+a+lplsVoCpp-sAMVDVDYKohP ..........................hpc.........................pKtPKhpTloTcsGEo...............lKVFEDLpsFETal+sETE.D.s.-FDalHC+LpYYPPFVLH-uH-.DPEKIK-TsNSHSKKFVRHLHQHlEKHLLKDIKpAlshPELKF+-KsKcEoF-.+......IsW+Ys-ET-a.........asR+FKlpVpVsCsH-sAMV-VDYKThP.. 0 4 18 31 +10676 PF10844 DUF2577 Protein of unknown function (DUF2577) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function 21.50 21.50 21.90 24.40 21.40 20.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.26 0.72 -3.96 46 176 2009-01-15 18:05:59 2008-07-28 16:41:52 3 2 132 0 36 161 2 103.90 26 92.34 CHANGED chlcphutsshpsspPspl..hhGpVlossP.Lplplsp..p.llLspc...Lhlsc.hlpchphphphpt..tp....................................................hhhpssLchGDcVlllp...psG.QpalllD+l ...............hlpthuhpuhpsspPspl..hhGcVhossP.Lcl.plsp..p..llLsp-.....lllsc.plpphphphphpt...t................................................................h.h.ssLcsGDpVhllp......hpuG.QpahllD+l................... 0 18 30 31 +10677 PF10845 DUF2576 Protein of unknown function (DUF2576) Pollington J, Finn RD anon PRODOM Family The function of this viral family of proteins is unknown. 20.40 20.40 20.60 21.50 19.80 17.60 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.47 0.72 -4.80 13 18 2009-01-15 18:05:59 2008-07-28 16:43:07 3 2 17 0 1 15 0 44.90 69 27.79 CHANGED NAPVVsSpHDYDR-QI+RELNSLRRsVH-LCTRS.uTuFDCN+FLcSsD ....NAsVVsotpDYDR-Ql+RELNSLRRsVH-LCTRS.uTuFD.CN+FLcSsD.. 1 0 0 1 +10678 PF10846 DUF2722 Protein of unknown function (DUF2722) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 21.00 21.00 21.10 21.70 20.20 20.90 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.57 0.70 -5.44 11 93 2009-01-15 18:05:59 2008-07-28 16:44:51 3 2 76 0 69 95 0 283.10 20 67.85 CHANGED M................poh.t..p..pphsspspsppppp...............................................................................................................suL.sLLGsNVssaPaSEsuhlculcL+sEQE+TKQpYYKLEssNKslpllcpAlpApIPsNhIPhLa..h.Nss.......................................................................................pspspspsss.spss.spssusshlsspp.s.............psPhsY+Fsssosss.............p.h..sspRRshSPA+IGAu.....AVAsLusssssh+............ptsssshR+...p.spsHpRphShPs................................ptspsssp.ps........................sTSslphps.sspslp+pstssspssp-.hToh.Hhl....h+.t.......................................p.p+cH.....+Rp+Ss.othtsIDLs..s.......t...............p.scss-ssp+pppspppps.......s....................................-Dpsho.soSh....................spsssposht+hPpsls..ph.N.s ..............................................................th...........................................................................................................................................................................................ph+.EpE+T+Qphh+LEptphph-lL+huhpuGlPsshIPhlF....hsssst.hs.t............................................................................................................................................................tp.t..tpttp.tpp..th.tss....t..p..............................s..hth.s......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ss...................................................................................... 0 9 34 60 +10679 PF10847 DUF2656 Protein of unknown function (DUF2656) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 23.00 23.00 23.60 23.00 21.30 20.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.64 0.71 -4.23 12 19 2009-09-16 17:37:28 2008-07-28 16:45:57 3 1 18 0 8 18 18 129.70 41 90.86 CHANGED hFlLSHNLQlpSshVPulusp-LApGL...hupustlpsspsLsHPHWhVclc...SsLSsp-hAp-LVcuWcphRpsh..GHshsHslLALGGRKDot.usPGuP..LQpGpWGVDVVEssss-sFLpuINW-ALKuGRPsD .........hhlLSHNhplpss.VPslshp-lApsl...hsppstlpsspslsHPHWhVclp.....ushSspchupthsp.uWpphRpuh....ucshsHslLALGGRKDos.ussuuP..LQpGtWGVDVVETsss-sFLpuINW-uLpuGRPtD....................... 0 2 3 6 +10680 PF10848 DUF2655 Protein of unknown function (DUF2655) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 41.10 40.60 21.30 16.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.96 0.72 -4.12 2 140 2009-01-15 18:05:59 2008-07-28 16:47:03 3 1 139 0 2 13 2 81.30 87 84.83 CHANGED hSVAphShGpTAQLStKQsGaYSPEhh.STGKDCNPQPANCLKsQYVLRHCCVDDRSsKMGYSsKhhVLTphssETASLFHC .....MSVAphShGpTAQLStKQsGaYSPEhh.STGKDCNPQPANCLKsQYVLRHCCVDDRSsKMGYSsKhhVLTphssETASLFHC. 0 1 1 1 +10681 PF10849 DUF2654 Protein of unknown function (DUF2654) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as a-gt.4 however currently no function is known. 25.00 25.00 73.00 72.40 21.30 18.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.42 0.72 -4.24 13 35 2009-01-15 18:05:59 2008-07-28 16:48:29 3 1 34 0 0 26 0 70.20 49 63.06 CHANGED AcKKApKhL+KNpREIcRL++HAEpAlhssN+-tYhYAIpKLRsIhKQ.Phsc-llpshWhToRpQlh-hl ...Ap++AsKlL+KNpREIpRLp+HAptAlhsNNhstYtYAIpKLRcIhKQ.Phs-ELlphhWpToRpQI.-hl....... 0 0 0 0 +10682 PF10850 DUF2653 Protein of unknown function (DUF2653) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 21.60 21.60 23.10 47.00 21.30 21.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.03 0.72 -3.68 4 127 2009-01-15 18:05:59 2008-07-28 16:49:31 3 1 127 0 16 75 0 81.40 66 89.06 CHANGED MppI.IsEQ-IIsAlClYIApp+pItPE-V.V.ELhYDDcoGFuAElpsshppp.LhosslIpALR.alc-.hpsNPausslcLpLDcccGIh .........l..p-llsulClahupcctltPE-V.V.ELMYDDDYGFSAEVEVN.GR.Q.QILIQANLIEALRLLLDREYNVNsFAARLQLELDDEEGIY.... 0 5 12 12 +10683 PF10851 DUF2652 Protein of unknown function (DUF2652) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 22.10 22.10 22.60 32.20 22.00 21.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.45 0.71 -4.09 6 72 2009-01-15 18:05:59 2008-07-28 16:50:17 3 1 65 0 15 46 51 104.00 71 46.52 CHANGED lApLLEuVI-Au.psLKLAKLEGDAAFFaAsssssssh.lscphstMRpuFhpRREphc+D+.CpCcSCtQlcsLSLKFVAHtGEVApQ+VK+psELAGhDVILVHRMLKNpVPVsEY .......VAQLLESVIDAu.KGhKLAKLEGDAAFFWAPGuNsSVl.VC-RsspMRQ+F+sRREQIKKD+sCDC+SCpQt-sLSlKFVAHpGEVAEQKVKRNVELAGVDVILVHRMLKNEVPVSEY.... 0 4 9 13 +10684 PF10852 DUF2651 Protein of unknown function (DUF2651) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus spp. 25.00 25.00 28.40 28.10 24.30 24.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.94 0.72 -3.62 4 152 2009-01-15 18:05:59 2008-07-28 16:51:26 3 2 106 0 11 94 1 77.30 58 96.00 CHANGED MsEL.hsFhIhPLhIhIlSIsGThhhKshYlMPhlohslhLllshTlas.uFhhWsshYollSFhlSYITllhl..hc.spN ......MsELIFllhIhPLhIhIlSVlGTpKsKTaYVMPIVTFu..sF..LIlsVhsF..sPpFFFWVGMYSIhSFIVSYhTLLF...V+GYclsE........... 0 2 7 7 +10685 PF10853 DUF2650 Protein of unknown function (DUF2650) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Caenorhabditis elegans. 22.90 22.90 23.80 23.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.24 0.72 -4.50 7 51 2009-01-15 18:05:59 2008-07-28 16:52:14 3 1 9 0 51 41 0 37.20 38 31.25 CHANGED CPppolaaaacCCGphspECChpLpsWVhlhLhlhhls ..CPpsohaaaacCC....Gpts....p....-CCaplpsWlhlhLhlhhh............. 0 19 24 51 +10686 PF10854 DUF2649 Protein of unknown function (DUF2649) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as Plectrovirus orf 10 transmembrane proteins however currently no function is known. 25.00 25.00 27.70 27.50 20.90 20.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.57 0.72 -3.86 4 27 2009-09-10 17:06:49 2008-07-28 16:52:44 3 1 6 0 0 26 0 66.10 69 99.22 CHANGED MQNDWIKLKEFFIaIFLFIDKTNVESIpMWNLTQNEYLTLMVGVWlVILFLTWFFLWMVFKIVGYFK ............MQNDW.KLKEFFIHIFLFIDKTNVESIThWNLTQNEYLTLMVGlWIVILFLTWFhLWMlFKIVuYFK................ 0 0 0 0 +10687 PF10855 DUF2648 Protein of unknown function (DUF2648) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillales Staphylococcus. 19.80 19.80 21.20 20.60 19.70 18.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.59 0.72 -4.52 3 72 2009-01-15 18:05:59 2008-07-28 16:52:57 3 2 72 0 4 21 0 32.80 78 72.31 CHANGED MKKLAVILsLuGAAFYGFKKYQN+VNQAPNIEY MKKLAVI....LsLsGuhaYuFKKYQ.p+VNQAPNIEY.. 0 0 0 4 +10688 PF10856 DUF2678 Protein of unknown function (DUF2678) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.10 25.10 28.00 28.00 24.30 23.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.72 0.71 -4.36 4 64 2009-01-15 18:05:59 2008-07-28 16:56:09 3 3 51 0 39 49 0 105.60 58 82.36 CHANGED M--apTRo.GT....ppPLFGETpsRDRIlNLslGGhTSlLVL.ThlSuhVFPp.PP.slNIFFslCIhhhs.osllL..............................IFWYRQGDL-PKFRsLIYY.hholVLLClCANLYFHDVt+ ...................M--asoRTYGTu.GhDN...RPLFGETSA+DR.IINLlVGuLToLLlLV.TlISAFVFPplP.P+PLNIFFAVCI.LssloshlL..............................IaWYRQGD..L-PKFRpLIYYhlhSIlhLClCANLYFH-Vt....................................... 0 13 16 24 +10689 PF10857 DUF2701 Protein of unknown function (DUF2701) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 25.00 25.00 25.30 96.90 24.90 19.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.85 0.72 -4.20 7 13 2009-01-15 18:05:59 2008-07-28 16:57:47 3 1 13 0 0 12 0 63.50 42 74.66 CHANGED lslhllslshlsalLlYLl+WohlhshhsplKl+llph..TTRRSFppLDsVYYTsDspV.GlNlE .lslhlllIsIlsaLLlYLl+Woalh-hhNclKl+llph..TTRRSFscLDsVYYTDDspV.GVNVE 0 0 0 0 +10690 PF10858 DUF2659 Protein of unknown function (DUF2659) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 34.80 34.80 34.90 326.80 34.70 34.70 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.52 0.70 -4.94 3 43 2009-01-15 18:05:59 2008-07-28 16:58:51 3 1 43 0 7 30 1 220.10 83 99.91 CHANGED MTDILDEVLsDcNEEKRLIFFKKLLPIVIIISLIAITIMVINNNNKs+QIcNNQKNGDIFVKoVsLEospsNcELAlsTLEN.LVosSNTKIQEIAtLEQVAIKISsppaSEAKDLLNKIIENKEYSEIoTSYARIuWCSLVI.....DDcNLDIsDKEKLlKYLNYFDDEsKPFWATAoIhKAIWDIKNNMcscAEKNL+uLltSNNoSDLLKDQAKALLsNL-+ MTDILDEVLSDQNEEKRLIFFKKLLPIIIIISIIAITIMVVINNNKDKRIKNNQKNGDILVKTVGLETTKDNcELAFNTLEN.LVTTSNTKIKEIAALEQVAIKIScKKYSEAKDLLNKIIENKEYSEISTSYARISWCuLVl.....DDpNLDIQDKEKLTKYLNYFDDEKKPFWATATIIKAMWDIKNNMKsQAEKNLKNLLISNNVSDLIKDQAKALLVNLNK. 0 1 2 2 +10691 PF10859 DUF2660 Protein of unknown function (DUF2660) Pollington J, Finn RD anon PRODOM Family This is a family of proteins with unknown function. 25.00 25.00 41.40 131.50 20.10 18.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.95 0.72 -3.76 4 44 2009-01-15 18:05:59 2008-07-28 17:00:12 3 1 44 0 7 28 0 86.70 82 63.36 CHANGED LhY.KIss+KKNhhsuptNNI--o.slALNsppp-NK...KLTLQE+IELSWpFLYDITEsILNKFSKEDlhpVNKCGplLaENGVRYEH LMYKKIAARKKNILPApGGNIDDSPNVALNSQKPENK...KLTLQERIELSWpFLYsITEVILNKFSKEDVIQVNKCGQVLFENGVRYEH 0 1 2 2 +10692 PF10860 DUF2661 Protein of unknown function (DUF2661) Pollington J, Finn RD anon PRODOM Family This viral family of proteins have no known function. 20.80 20.80 20.90 25.50 20.00 19.30 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.77 0.71 -3.90 7 19 2009-01-15 18:05:59 2008-07-28 17:02:46 3 1 18 0 0 18 1 103.30 41 36.17 CHANGED hsLVaVWYH.cspFlaNospaPFWHNlhYpuppYpsallYal-Npss..lPt....stslphlsFKchhscc.phppLpshhs...KIDYMKLpllhs..............schl..spphlLLMDMDCsl .hsLVaVWYH.cspFVhNTspaPFWHNlpYauppacshVlYhl-sps..sh..pl..Ps...stslphlNFKcshsph.phsplpslhp...KIDYMKlshlhs..............schl..spsalLLMDMDCsl. 0 0 0 0 +10693 PF10861 DUF2784 Protein of Unknown function (DUF2784) Gunasekaran P, Mistry J anon Pfam-B_001600 (release 23.0) Family This is a family of uncharacterised protein. The function is not known however it is conserved in Bacteria. 22.60 22.60 22.70 24.30 22.50 22.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.70 0.71 -4.28 45 266 2009-09-11 08:43:52 2008-07-28 17:03:21 3 1 255 0 81 218 171 116.00 35 92.57 CHANGED YpllADhllllHhhFllFVlhGGlLll..........Rh.thhhlHLsAluWushlphhGh.hCPLTsLEshLRptAGpsuY.suGFl-HYlhsllY......PstlssslphhlGslVlls....ashlhhRp ..........thhADhlllhHhhFllFVlhGGhLsh...........RhhphhhlHlsAlsWGsulth...h.sl..sCPLThlEshLRptAGtssh.ssGFlpHYlhsllY.......Psshs.ssspllhuslVlhs......ahhhh...h............. 0 24 57 71 +10694 PF10862 FcoT DUF2662; FcoT-like thioesterase domain Bateman A, Pollington J, Finn RD anon PRODOM Domain Proteins in this family have a HotDog fold. This family was formerly known as domain of unknown function 2662 (DUF2662). The structure of Rv0098 from M. tuberculosis [1] suggested a thioesterase function. Assays showed that this protein was a thioesterase with a preference for long chain fatty acyl groups [1]. The maximal Kcat was observed for palmitoyl-CoA although longer and shorter molecules were also cleaved. In solution this protein forms a homo-hexameric complex. 20.80 20.80 21.00 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.09 0.71 -4.69 6 71 2012-10-02 20:54:35 2008-07-28 17:09:44 3 2 71 2 9 28 0 151.50 66 81.73 CHANGED +VLcPYp.csCRYLlcA..........ph+Ap.sulhAhGsFsIsESsYIcsTGHFNAVEl.lCaNQLuYshhApuVtNc.IssLcuWSl-DYhc+QLSshLItphSSpF+KPlNPpKFSGRlps+slphhp+o..h.aLhlssshcFWD-sGGtupGEs.LAh ..............RVLEPYSCKGCRYLIDA..........QYSATEDSVLAYGNFTIGESAYIRSTGHFNAVELILCFNQLAYSAFAPAVLNEEIRV....LRGWSIDDYCQHQLSSMLIRKASSRFRKPLNPQKFSARLLCRDLQVIERT...WRYLKVPCVIEFWDENGGAASGEIELAA....... 0 3 7 9 +10695 PF10863 DUF2702 Protein of unknown function (DUF2702) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 25.00 25.00 56.30 51.40 21.60 20.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.85 0.71 -4.47 8 43 2009-01-15 18:05:59 2008-07-29 08:49:20 3 1 41 0 27 41 0 140.30 44 69.44 CHANGED MSRupEIK-KpsLQA+lQhuFSsssuKVLuWLpsStppssussh...tp....s-hs-S+cuFhcLPVlQhGSGLohp...pstspssssIpTIGDFIcSDKclSoLuKcK+spps...pppsslaRlsK-DTKAMlALKpKMRsppRcplRcc ...MSRtKEIpEK.sLQAKLQ.oFSsNsutVLsWLcpsppss.s.sst...pp................p-lp-u+cuFacLPVlphGSGLpFt...pts..tsspc-IpTIGEFI...puD.KKlSoLuKKK++sp.ss.....pRs...shaRlsKDDoKAMlALKpKMRcsp+-slRpp..... 0 3 13 24 +10696 PF10864 DUF2663 Protein of unknown function (DUF2663) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as YpbF however currently no function is known. 22.90 22.90 24.10 24.50 22.50 22.30 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.69 0.71 -3.93 10 52 2009-01-15 18:05:59 2008-07-29 08:50:26 3 1 51 0 18 47 0 129.20 42 87.62 CHANGED QMLpALIcRKcKaE+hc+QshhhphAullshshallalhsKshu...hphsthLutlhusssaLhhlLssuhuYssuhYaKKKcEKAEsEaHtLRCEIIQKSpDLWsppEpW+uRcpVFchMK+cYDINLYaE .....QMLpullcRKpKaEphtcpshhaphsullshslhhlalhhpshs....ts.phhlpthlupssaLhallhsuhuY.hsuhYaKKKcEKAEs-FHpLRCEIIQKSsDLW.ps-pWcsRcplFchMK+cYDINLYaE........ 0 4 11 14 +10697 PF10865 DUF2703 Domain of unknown function (DUF2703) Pollington J, Finn RD anon PRODOM Domain This family of protein has no known function, but it may be distantly related to the thioredoxin fold. It contains the CXXC motif that is characteristic of thioredoxins. 20.10 20.10 20.80 21.60 19.90 19.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.70 0.71 -4.18 14 60 2012-10-03 14:45:55 2008-07-29 08:51:33 3 10 50 0 45 62 2 107.50 29 63.03 CHANGED LsIcWp+Lsss.stTC-RCusTucslppAlppl+phLp.hGlpVplpcttlsspphuht...hESNpIhIsG+slEphl.uupVsposC....Chss.sDscCRslphssppYEslPtcLllcAu .................l.Ipa.hLshs...TCsRCtsTtpslpcAlpplpthLp.hGlclhlpch...plss...p..phsht.......pSspIhlsGpsl-.hh.shpsspohC....shss.ssspCRsh.thp.G....ppY-shPtthIhcAh......... 1 21 40 43 +10698 PF10866 DUF2704 Protein of unknown function (DUF2704) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 19.90 19.90 23.70 22.80 19.80 19.10 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.17 0.71 -4.87 12 19 2009-01-15 18:05:59 2008-07-29 08:53:22 3 1 19 0 0 16 1 161.50 41 78.97 CHANGED sss.sss-tpssRKsVcuss-EYTVDGLKLKstYVtYYKpLphlVDFhVMhlSKplsMKEY-pVYSLGRQLYElLRulFVDEPFKLWLEpNspchsss...+cpIhKhLQspLphslt....hKosTFKshlhNlLNocLs...s+YDsustYIKPNCIVsTaNCCsLsFc. .............tsss.......tpspp+slcs..cpaTVDGL+LKosYVtYYKQLKtLV-hlV..halSK.plshK-YcEVYoLuRQLYEllRulFV...DEPFKLWLEp.....NsppLsss..tth+D..cIaKpLpspLcsssss.......scssThKshllNVlNscLstp.scaDssstYlKPNCIV.TasCCsLsFc.............................. 0 0 0 0 +10699 PF10867 DUF2664 Protein of unknown function (DUF2664) Pollington J, Finn RD anon PRODOM Family This family of proteins is a viral family, annotated as UL96. Currently no function is known. 21.20 21.20 21.20 29.40 21.00 21.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.86 0.72 -3.36 8 32 2009-01-15 18:05:59 2008-07-29 08:54:00 3 1 29 0 0 24 0 91.60 35 73.95 CHANGED MRhsLE+pQ+cFL+pshGscHPLospQslpsh+stsRpps+psppslpsVustlh.cp+tpl+........cppppA+pLQ+.hDlD-hlDoLsElKDs .....MRhcLE+pQ+pFL+csaGspH.LoppQulpshcsss+pppc.sppssppVustlh.cp+uplp........p-hpps+pLpp.hc.VD-hLDoLsElKDs................... 0 0 0 0 +10700 PF10868 DUF2667 Protein of unknown function (DUF2667) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Arabidopsis thaliana. 20.90 20.90 21.10 21.20 20.60 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.60 0.72 -3.61 5 19 2012-10-01 23:31:40 2008-07-29 08:56:03 3 1 4 0 17 30 0 84.20 29 94.17 CHANGED MGSLRLSTVAIA.VVVCLSILLISPTEVDGRtVCDhstGtCosh...STCs-sCpslc....usFpGGECtshuuhsGholCaCC+s...VpSuAEhESM ..................h.slh.lllClSlL.Ll....SPh.....c...lsG..p..h..C.D.h.......hGsCs.h......tppCscsC+php....ppatGGpChshst.ss....shChCCh...............t......... 0 10 10 11 +10701 PF10869 DUF2666 Protein of unknown function (DUF2666) Pollington J, Finn RD anon PRODOM Family This Archaeal family of proteins has no known function. 22.00 22.00 22.60 33.90 21.30 20.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.74 0.71 -4.12 9 43 2009-01-15 18:05:59 2008-07-29 08:56:51 3 2 30 2 32 46 0 127.30 38 94.58 CHANGED E-+IpFTAK+GKWhVlK+LhIDEpTspl-IARLLASIsETlstKI.-Fhs..hDhc+Ic-hhsthhchKK....EE-Iscslpth+S.tsophhst...ppEs+.hl+ch........Lp+LGlphcVsuK.lEKYlEKs ...................E-+IpFsA.....K+GcWhVsK+LhID-pTpsh-IARlLASIsETlstKIP-YLs..hDlctlpphhc-lhphKK....-c-Ispslp+LKSPuTo+Klsphhppc-tK.hLKchL.p...hlLpRlGlppclssK.lEKYlEK....................... 0 3 4 19 +10702 PF10870 DUF2729 Protein of unknown function (DUF2729) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 20.20 20.20 79.20 79.00 19.80 18.40 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -9.07 0.72 -4.24 8 13 2009-01-15 18:05:59 2008-07-29 08:59:52 3 1 13 0 0 13 0 54.80 65 84.38 CHANGED NLLsYCKLKLVKpVSKTVuuLLCKCV....APEDo.D..sGDRYlQINNNCNFIYINVVp .pLLTYCKlKLVKuhSKphuuLhC+CV...hus-Ds.D...GDRYhQhNNNCNFIYINVVK. 1 0 0 0 +10703 PF10871 DUF2748 Protein of unknown function (DUF2748) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 25.00 25.00 791.70 791.50 18.90 18.70 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.60 0.70 -6.07 2 44 2009-01-15 18:05:59 2008-07-29 09:06:05 3 1 44 0 7 27 3 439.80 89 99.98 CHANGED MoslYHILc+lPAI.+pDM.lEYEpLA.pLlpSGKLRlDs-sphNFsRhoEPuLNlslhlSpEELssP+L..cTpthh.NlY+p.hpc....pKlppIhssLpKphth..sVcp-lh.hLARlFVQSAHPIVI+WLLLp+sEVFloYSppIGDhMDhsoWphsGtNSGMQShNGpslAIaVSCGGNP..Fspp.p-pshYGsGasAhARLQIIAAQELGHaADIhRD.pup.lsRHSsN.ShTKApspVhhAR+sDl.+CaplLppL.psGhpp.lsYEpplKFYptNKVpGlKlhhh+hh.FhYK.+Lh.h.pppshIFV+haKs-pY.uLMlcAMhhDM.uNLpPtA-VYKpcsP-hEEAIAClEALARVPQQsIKWGalTThphMpDLYhIYYppVIPSLIspYphITGcsY.Rsh.....NahSph.hap.KKL..hhK....PsREl MTSIYHILDRVPAIYKQDMEIEYEHLAMQLIKSGKLRIDTDDCCNFARFTEPALNISLMVSpEELTSPHLIPETTKLFQNLYRNSASD....QKIKSIFDNLKKQIQKLQPVKKEVTEMLARIFVQSAHPIVIRWLLLNKTEVFLTYSHNIGDMMDMVSWQRVGGNSGMQSTNGKDVAIFVSCGGNP..FAENNKDHPTYGNGFAAsARLQIIAAQELGHFADIKRDDKGRQITRHSANFSGTKATDKVRIARKNDIIHCHNLLuKLLKAGMKKQLDYETKLKFYNANKVSGLKVYAIKFMIFIYKFRLLNYSSRNNLIFVRKFKTDcYMALMI-AMFKDMQANLSPsADVYKNKNPEIEEAIACIEALARVPQQTlKWGYLTTKETMHDLYKIYYNEVIPSLITSYNAlTGENYpRDFKKPKSNFFSKINIFSNKKL..VLK....PVREL. 0 1 2 2 +10704 PF10872 DUF2740 Protein of unknown function (DUF2740) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function has a highly conserved sequence. 25.00 25.00 30.20 122.00 24.10 23.70 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.46 0.72 -4.30 3 83 2009-01-15 18:05:59 2008-07-29 09:12:15 3 1 81 0 1 6 0 48.00 94 100.00 CHANGED MPKQLSPDQDKLHKNILRDRFLSSFKQPGRFRAELEKVKLMQKEKGHE MPKQLSPDQDKLHKNILRDRFLSSFKQPGRFRAELEKVKLILKRKGHE 0 0 0 0 +10705 PF10873 DUF2668 Protein of unknown function (DUF2668) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as Cysteine and tyrosine-rich protein 1, however currently no function is known. 26.20 26.20 26.40 26.40 25.50 26.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.20 0.71 -4.06 5 49 2009-09-16 09:25:17 2008-07-29 09:12:45 3 2 35 0 26 38 0 129.00 62 85.86 CHANGED MDALR..LPRRPGVLL.KllLLFVYAGDCpAQCGK-C+SYCCDGSTPYCCSYYAYIGNILSGTAIAGIVFGIVFIMGVIAGIAICICMCMKNNRGTRVGVIRAAHINAIS..YPM.APPPYTYDHEMEYsTDL.PPPYSPAPQASAQRSPPPPYPGNSRK ....................R.....tshlhL.cllLLhlhA-cCLA.QCGpDC..+SYCCD..GoTPYCCSYY.AYIGNlLS..GTAIAGI...VFGIVFIMGVIAGIAICICMCMKNp.Ru.TRVGllRTo+INslo......oYPh...sPP.PYsY-aEMpassDL.PPP..YoPsP...ptssphSPPPPYPG.s+K....................... 0 4 7 13 +10706 PF10874 DUF2746 Protein of unknown function (DUF2746) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 21.90 21.90 22.20 22.20 21.40 20.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.90 0.72 -3.78 5 73 2009-01-15 18:05:59 2008-07-29 09:14:46 3 1 72 0 1 59 0 60.10 54 51.59 CHANGED I+pQVsNoHDTNlRDDLD-lt..........EhVp-GF+clc+DIstL+E-LsTERpERIEGDRRR- ..I+-QlsNTH..-TNMRDDLD-lt......................................-hV...+-GF+plp.......R.DIuGLREEL..RTERlERIEGD+R+........................ 0 0 1 1 +10707 PF10875 DUF2670 Protein of unknown function (DUF2670) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 29.50 39.60 19.40 19.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.65 0.71 -4.14 3 44 2009-01-15 18:05:59 2008-07-29 09:15:40 3 1 44 0 7 29 0 139.20 78 86.46 CHANGED MWQALRRLIAANPMGFFLWSIITKWYLIIAVASLITLYYTVLGLKKIGFIDYFGRTTVEILDTSKAVAQNCTsKLGPNWs+LVN.......FWNCLSDPGEYcHEEGTGAKVLEDEINKLhsKQAD......SluDscsPIINPYEcLsNsN .......MWQALRRLhAANPMGFFLWSIITKWYLIIAVASLITLYYTVLGLKKIGFIDYFTETTVEILDTTKAVAQNCTTKLGPNWN+LVS.......FWNCLSDPGEYKHEEGTGA+VLEDEINKLTPKQAD......SlADAE+PIINPYEtLENsN.................. 0 1 2 2 +10708 PF10876 DUF2669 Protein of unknown function (DUF2669) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 34.40 34.30 21.80 21.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.57 0.71 -4.45 4 30 2009-01-15 18:05:59 2008-07-29 09:16:31 3 1 24 0 4 23 0 126.40 50 93.33 CHANGED hpIEslTYsMTPANAhsAWsuLKpAhtLLpusDlsslGcpps..huushLsslLuNLGDPulptlEslVLKaTosc.tDGppYRLS..-RFspHFNpaRuHLl.VLhEGLhYQaADFFhGGsuhhssh.s.hstsp .....hpI-slTYhMTPANAMsAWpuLKpAhsLLpuhDhsuluNsps.....huussLuslLupLGDPulpElEslVhcpTuhcssDGsp.YRLS..DRhspHFNs+RsHLl.VLhEGlhYQauDFFsGGhuuhpsl.P..sApp....... 1 0 2 3 +10709 PF10877 DUF2671 Protein of unknown function (DUF2671) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Rickettsia spp. 25.00 25.00 31.50 186.70 20.90 20.10 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.16 0.72 -4.19 2 42 2009-01-15 18:05:59 2008-07-29 09:17:29 3 1 42 0 6 19 0 90.00 93 99.82 CHANGED MQEKELSNNFLEEQpp.KEDsSPF.DlKYICQASLLITDSIRKGYDVTQLsNGDINVTElRIVNVHYNWNSEKGKFVKTNQIEFNNsKGG MQEKELSNNFLEEQE...KSKEDsSPFFDVKYICQASLLITDSIRKGYDVTQLPNGDINVTEVRIVNVHYNWNSEKGKFVKTNQIEFNNSKGG. 0 1 1 1 +10710 PF10878 DUF2672 Protein of unknown function (DUF2672) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Rickettsiae. 20.70 20.70 23.40 38.90 19.20 17.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.25 0.72 -4.24 3 43 2009-01-15 18:05:59 2008-07-29 09:18:14 3 2 43 0 7 19 0 67.80 79 64.98 CHANGED hIIKclKKIKAYF..IKS.hIKNIDcSLETEQlNFYLKKIINLEGYYaGNYDLTTIKEKYYTLI...INNDL ..........IIIKELKKIKAYLINIKSSIlKNIDEPLETEQlNFYLKKIINLEGYYHGsYDLTTIKEKYYTLI...INNDL....... 1 1 2 2 +10711 PF10879 DUF2674 Protein of unknown function (DUF2674) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be conserved to Rickettsia spp. 25.00 25.00 150.20 150.10 20.60 19.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.16 0.72 -4.24 3 40 2009-01-15 18:05:59 2008-07-29 09:18:51 3 1 40 0 6 13 0 67.00 93 99.93 CHANGED MQNPTQKVISFSEHKADIERIKKuIEEGWAIVKLVPNKDRFIGLLEKISHAE.DETIYIPPRKKIIVN MQNPAQKVISFSEHKSDIERIKKSIEEGWAIVKLVPNpDRFIGLLEKISHAE.DETIYIPPRKKIIVN. 0 1 1 1 +10712 PF10880 DUF2673 Protein of unknown function (DUF2673) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Rickettsiae spp. 19.80 19.80 19.80 89.00 19.40 19.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.02 0.72 -3.93 3 42 2009-09-11 07:30:40 2008-07-29 09:21:59 3 2 42 0 6 19 3 64.20 87 78.54 CHANGED MKNLLKILLILAFSAPVFASS...QlP.DPASVTTTQIpAMSTsDQQAWVASLTADQYNMLSPDVQKW MKNLLKILLILAFASsVFASS..MQMP.sPASVTTTQIQAMSTDDQQAWVASLTAsQYNMLSPDVQKW.... 1 1 1 1 +10713 PF10881 DUF2726 Protein of unknown function (DUF2726) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 24.70 24.70 24.80 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.28 0.71 -4.52 35 880 2012-10-11 20:44:46 2008-07-29 09:23:18 3 9 678 0 167 661 25 119.50 22 36.87 CHANGED pc......tLhsssEpshhptLcpulss....ph.plhs+Vphu-llps......stpptppAhsphsu+phDFllsD.psh...pslssIELsss..sHp......psptppRDthhcpshcsAGlPllclphppshssspl+cpl.tsl .................................................t...lhs.tpEtthhptL.phlsp.........ph.hlhs..pVphspllp...........ttpphhph.hp.h.hs.p.tp......hDaVlhc.t..psh.................pslssIELDss..p.Hh.............ptp..ptcRDthhpplhcpAGlPll.Rhp.pt.....p............hh...................................... 0 45 92 132 +10714 PF10882 bPH_5 DUF2679; Bacterial PH domain Pollington J, Finn RD, Bateman A anon PRODOM Domain This family of proteins with unknown function appear to be related to bacterial PH domains. This family was formerly known as DUF2679. 22.30 22.30 22.30 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.93 0.72 -4.22 27 319 2012-10-04 00:02:25 2008-07-29 09:24:37 3 5 295 0 86 264 11 101.30 19 37.49 CHANGED lcYtl-cpsLhI.pthhu..phpIPh.....spIpplphssshh..tsl+lhGhuthtahhGphh.hpchGpsphasTp.scphlhlcTss.psYsISPcsh-pFhppLcp+t ...........................................htlpsppl.l.ps.hs..phpIs.h.......spIp.s...l.........p.....h..h.s...sls.....tth..Rs...Gsu..ssthhhG.pFp..hp.shu.p....hhhh.sTp..spsllhl.c.Tc...c..psh...h..l....oscp.pt.........t....................... 0 36 71 81 +10715 PF10883 DUF2681 Protein of unknown function (DUF2681) Pollington J, Finn RD anon PRODOM Family This family of proteins is found in bacteria. Proteins in this family are typically between 81 and 117 amino acids in length. 27.20 27.20 27.20 28.30 26.30 27.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.91 0.72 -3.78 13 42 2009-12-08 16:02:13 2008-07-29 09:25:47 3 1 32 0 7 42 0 83.90 34 81.75 CHANGED M.hslhlh.ussulhuslhualha+sc+upcc........tpLppEppQlpsEtpstpspVKNhcl+QKNEEss+phSpcsV.-pLppcG.hR.D ...........hshp.l.lhuhsulhshlhuYlha+lcptccc.......stpLhppNpQLpsEpsstpspVKpapl+pKN-EsspphuRssll-pLpppG.hR-....... 0 1 5 7 +10716 PF10884 DUF2683 Protein of unknown function (DUF2683) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Methanosarcinaceae. 22.80 22.80 25.00 24.70 21.80 21.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.69 0.72 -4.05 7 17 2009-01-15 18:05:59 2008-07-29 09:28:00 3 1 13 0 12 17 10 74.50 35 88.79 CHANGED MVQAhlsIoDcoNpILsIlKAKYsLKDKStAI-hllppYpp.hLEP.EL+PEFlEchppIhc.cchl.lsol-sL+thhtt ...................MVps.lslsDcsNpIlpIhKApasl.cKS.AIshhlppapp.hhEs.El+PEFlcchpcl.KtpchIclsshcsLRccht.h... 0 2 4 6 +10717 PF10885 DUF2684 Protein of unknown function (DUF2684) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as yqgD however currently no function is known. 25.00 25.00 35.70 35.70 19.50 19.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.12 0.72 -4.12 2 241 2009-01-15 18:05:59 2008-07-29 09:29:20 3 1 239 0 7 33 0 76.70 77 95.94 CHANGED hstDsThoLpVtsTGSLSVNpYGWINIWhAILGpFFsQFPhFFEuph.lhps.h..hsDNAsIhRIYhL.F.sllGhK......p++ss ........MstDsThoLpVpsTGSLSVNpYGWINIWMAILGQFFTpFPLFFESCLILLKTWLEIFPDNAGILRIYLLQFSAIVGYKT..........RRAA....... 0 1 1 2 +10718 PF10886 DUF2685 Protein of unknown function (DUF2685) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as uvdY.-2 which is an open reading frame within uvsY. However currently there is no known function. 25.00 25.00 31.50 31.30 23.00 21.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.65 0.72 -3.95 6 36 2009-01-15 18:05:59 2008-07-29 09:30:35 3 1 35 0 0 25 0 53.80 53 96.13 CHANGED splCVVCKpPlccA.LuVcTspGsVHsG.CtpalpEhslSESs.....-p.LpETQLLh ...cICVVCKpPI-sA.LVV-T-pG.PVHPGsChsYlp-h..P....lS..ES.s.....-cpLsETQLLl 0 0 0 0 +10719 PF10887 DUF2686 Protein of unknown function (DUF2686) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as yjfZ however currently no function is known. 25.00 25.00 69.60 69.50 18.10 17.40 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.75 0.70 -5.67 3 406 2009-01-15 18:05:59 2008-07-29 09:32:52 3 1 257 0 2 128 0 263.10 67 99.79 CHANGED MSMPLSNAhsoQasTsNHFLHHPpVDSElT+Ka.cYARhDLENIYL.PLsRGNNHNYDGKSsVEIRKLDISKpSW.PFNYVTssCREaDGITTTGRMLYRNLKITSALDEIYGGICKKAHAATELAEGLRLNLFMKSPFDPVEDYTVHEITLGPGCNVPGYAGTTIGYISTLPASQAKRWTNEQPRIDIYIDQIhTVTGVANSSGFALAALLNANIELGNDPIIGIEAYPGTAEIHSKMGYcVIPGDEDAPLKRMTLQPSSLPELFELKNGEWNYIGK .......MohP.....po.h.hssah.HaspsDp.lphKa.phARhDpENIYL.PLsRGNNHNYDGKSVVEIRKLDISKps.W.PFNYlTpsC+E.DGITTTGRMLYRNL+ITosLDEIYGGICKKAHAuTELAcGLRLsLFMKuPF-PVEcYTVHEITLGPGCNVPuYAGTTIGYISTLPASQAKRWTN.EQPRIDIYIDQIhTVoGVANSSGFALAALLNANI-LGNDPIIGIEAYPGoAtIauthGYcVIPGDEDAslKRMTLpPSSLPELFELKNGEWNYltp........ 0 2 2 2 +10720 PF10888 DUF2742 Protein of unknown function (DUF2742) Pollington J, Finn RD anon PRODOM Family Members in this family of phage proteins are the product of the gene phiRv1, however no function is known. 22.70 22.70 22.80 22.70 21.80 22.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.36 0.72 -4.06 4 68 2009-01-15 18:05:59 2008-07-29 09:33:11 3 2 51 0 7 28 0 93.70 67 74.31 CHANGED ASRtVSWWsVHpaVtPhLstA..GsWPMAGTPAWstLDDsDPhKWAAlsDAucHWsLRVETsQpA.A-ASp-VSAAADWsuluRclpc+cshahtRshl ..............sSRtVSWWSVHEaVAPsLsAA...spWPMAGTPAWssLDDsDPhKW.AAICDAARHWALR..VETCQ.....sApA-ASRDVSAAA.DWPAluREI.pRRRssYItRssV................. 2 3 5 7 +10722 PF10890 DUF2741 Protein of unknown function (DUF2741) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as ubiquinol-cytochrome C reductase however this cannot be confirmed. 24.30 24.30 25.00 24.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.51 0.72 -3.84 4 42 2012-10-01 20:10:32 2008-07-29 09:36:22 3 2 24 0 29 45 0 69.90 60 74.83 CHANGED MGKpPl+LKAVsYALSPapQKVMPGLWKDlsuKItHKVSENWluAsllhu.PlsGThpYA.aYpEpEKLcHRY .........MGKt.PVRhKAVVYuLSPFQQKlMsGLWKDlPsKIHHKVoENWISAsLLls.PlVGTYpYst..apEpEKLpHRa........................ 0 8 21 25 +10723 PF10891 DUF2719 Protein of unknown function (DUF2719) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Nucleopolyhedrovirus. 21.40 21.40 21.90 21.50 21.20 20.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.98 0.72 -4.30 5 33 2009-01-15 18:05:59 2008-07-29 09:37:14 3 1 32 0 0 27 0 74.40 32 83.87 CHANGED MLRALKRRFKsAssEp+REED..VVLCPRCYFVAPGcISVADYTRMHIKFNEQFAD+CsNNFsVTQPKTWuNYoNCSALYYPL ..........................................cc..psp..VVhCs+ChFVAPhSlSaEEYlcLHcpFNphhsspC.........................h..t.................. 0 0 0 0 +10724 PF10892 DUF2688 Protein of unknown function (DUF2688) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as KleB however currently no function is known. 25.00 25.00 25.00 59.10 24.70 17.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.00 0.72 -4.31 3 19 2009-01-15 18:05:59 2008-07-29 09:37:23 3 1 15 0 2 9 1 60.30 64 84.44 CHANGED MsKGKIE..IVETsCRRCGKSIRTLSHSLIGAD-LREKLGGICG-CITPEEDpcITEuhLuA ..MsKGKIE..IlcTsCRRCGKSIRTLS+SlIGAD-hREKhGuICGsCITPEEDpclpEhhLtA..... 0 0 1 2 +10725 PF10893 DUF2724 Protein of unknown function (DUF2724) Pollington J, Finn RD anon PRODOM Family This is a family of proteins with unknown function. 25.00 25.00 26.00 25.70 24.50 24.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.67 0.72 -4.11 4 172 2009-01-15 18:05:59 2008-07-29 09:38:01 3 1 164 0 8 86 0 64.50 68 94.77 CHANGED MLpsEPSFASLLVKQSPuMHYGHGWIhscDGKRWHPC...+SQsELLtGLuTK+.tpp.........allKul+plpR ...MLTKEPSFASLLVKQSPA...M...H...a...GHGWIMGcDGKRWHPC...RSQDpLLAELSTKKpGp..........WLLKuhhRLF+..................... 0 0 2 6 +10726 PF10894 DUF2689 Protein of unknown function (DUF2689) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as TrbD however currently no function is known. 25.00 25.00 30.10 29.70 20.10 18.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.10 0.72 -4.19 4 128 2009-01-15 18:05:59 2008-07-29 09:38:19 3 2 101 0 2 84 0 58.30 79 62.49 CHANGED MNMRNINlITAhSVPsKoVSDDFMHAVLSNCTTRIVLPAPKcFuSESLPHNFNMAAVGVMK ..MNMRNINVITAhSVPsKoVSDDFMHAVLSNCTTRIVLPAP+cFuSESLPaNFNMAAVGVMK......... 0 0 0 1 +10727 PF10895 DUF2715 Protein of unknown function (DUF2715) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Treponema pallidum. 21.70 21.70 22.10 23.10 21.30 21.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.21 0.71 -4.77 8 104 2009-01-15 18:05:59 2008-07-29 09:38:40 3 1 15 0 11 46 0 146.80 34 86.65 CHANGED Chh......sshAupVFlSP+lGhsuhhhhGpsh.upthptst..p...........asPhlG.lsVulhAcNGhshssslDAuLTpLMFRuQsLlGYuhRhG..........................................shtalPsoGlsl....huop-ct........lhG......VPlpLshQaaFssahGl-sssouuVGlshp.sh.................................Dhpa......................................spa.........plPlolRlGPVFRl ..........................................................................shsspl.lSs+lG.ht.h.hG.ph.s.hh...........................sPhhG.lsluh.Ac.NGhhhthslDAuLTpLhFp.......upsLhGYuhR.G..........................................t..hhhs..hhsh....hspttt..............u......lslplthpahhsthhGlshshssuhsl.......................................s.th.............................tpa..........hhPhslplGPsFRh................................................................................... 0 11 11 11 +10728 PF10896 DUF2714 Protein of unknown function (DUF2714) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycoplasmataceae. 23.90 23.90 26.40 51.60 22.60 23.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.67 0.71 -4.39 7 55 2009-01-15 18:05:59 2008-07-29 09:39:24 3 1 52 0 16 37 0 145.60 37 85.27 CHANGED spYcclhps.s....FlsYppLhspsLLcss.shpSs.hpcahcphh.Aatp+h-lVFcsFVIoashN.+FShstLlPhlstsEsSNo-uhN.hpssNsppapphLsp.aNphhp.pLhppp+hVElhPslIlFhuppTcpLKllFscphlhs ...pYc-hhssss....hloY-pLhuoVLLcsplGFpSclYpcFhp+hphAhcp+h-IhFcsFlIoFNlNLKFSsshLlP.lLsspEsSso-AlN.Fpss..ps.paspFLts.aNphIp.pLlpps+hVEIhP.slIlF+Spp.ssoLKllFScchl.o...... 0 9 15 16 +10729 PF10897 DUF2713 Protein of unknown function (DUF2713) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 65.30 65.30 22.90 22.20 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.59 0.70 -5.12 3 258 2009-09-11 09:11:29 2008-07-29 09:40:13 3 1 235 0 1 98 0 209.60 84 88.19 CHANGED VFLIEQINDLKMWVNKYIDDCTDEDLNDRDFIASVVDRAIFHFAINSICNPGDNKDATPIERCTFDVETKNGLPSTVQLFYEESKDNEPLANIHFQAIGSGFLTFVNACQEHDDNSLKLFASLLISLSYSSAYoDLAG..+VNINEYNENYLKAQFEELSQRDMKKYLGEMKRLADGGEMNFDGYLDKMSHLVNEGTL-PDILSKMRDAAPKLIDFAKSFDPNSKEKIKILTDTSKLIYDLFGVKSEK ..............................MWVNcYIsDCTDE-LNDR-FIASVVDRAIFHFAINSICNPtDNKDAo.IEpCTFDVETKNsLPSTVQLFYEESKDNEPLANIHhQAIGSGFLTFVNACQEHDDNSLKLFASLLISLSYSSAYsDLut..pV.INE.NEsYLpAQFEcLSQRDMKKYLGEMKRLADGGEMNFDGYLDKMSHLVNEGpL-PDILSKMRDAAPpLIsFAKSFDPsSKEcIKILTDTSKLIYDLFGVKSEK............................. 0 1 1 1 +10730 PF10898 DUF2716 Protein of unknown function (DUF2716) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.00 21.00 28.90 24.40 19.10 18.50 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.17 8 75 2009-01-15 18:05:59 2008-07-29 09:40:36 3 1 68 0 20 62 0 133.90 45 81.17 CHANGED NWhtLScpEh-plW-+lYcca+F+..PuhStaPSh+sPsPalTaDlSshFsstuthss.....--lEpcsL+AFppsTtssEahhALDWQH-CYhlsP+lshp+D...EFs.EWhlPVaPNGDYYFFlpcDF+WGhLGHPWEpSITIFGc-LI ..............tWh.ho-pE.-plWsclasch.cFp..Pu..h..s..taP.uFchPs.PalT.a..clSph...h..sc.ss.hss........h--L-ccsLpsFpc.....sTss..sEahhALDWQH-CYhhsPHhph..p+s............EFs.EW.......l...PlF.PNGDYYFFlpcDFcWGhLGHPWEpoIolFGctLI........... 0 6 14 16 +10731 PF10899 DUF2743 Protein of unknown function (DUF2743) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 21.10 21.10 21.60 22.30 20.90 21.00 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.28 0.71 -4.92 9 148 2009-01-15 18:05:59 2008-07-29 09:41:27 3 2 147 0 18 78 0 120.60 50 33.97 CHANGED pphh..pYssEchpa.h.s+pp.......lp.hshPMsCFsDIPLpclp.Hsp.....tYGpauIuhcKchulppshsPVhY....h.pss.hhpslhphhptl.s.........................................ptpspptl.psLthhhhhlKsa-ss..............pcsFYsE+EWRhlssh....t.....hh.cpp.....otpph..pt.hphp....+FsssDIcaIhls ............sSWSYRNGQRTVYGD..................................................SPVVCFTDMPIAAYLE.T.GVRRL..ERNEKIGLYAIVLPKEQM.FN..YGARPVIY.......GLDpHN.hp...............................................................................................................................................................................................suphppucph.c.hs............................................................................... 0 8 11 15 +10733 PF10901 DUF2690 Protein of unknown function (DUF2690) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 21.70 21.70 21.70 23.80 21.10 21.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.63 0.72 -4.03 6 208 2009-01-15 18:05:59 2008-07-29 09:43:39 3 5 124 0 29 142 0 109.60 43 54.09 CHANGED pG+ShsGKsP.h.uCstsh.....s+hlsusshshuhVEL+aSsoCKTAWA+lT...s.s.s.cs.A.lhRsoDGKclss.uAGGsGch.....ssGpTssYTPMVhshDsRpuhAp ...............................tYDGKsPh.h..s.......oCDssuhs.....tcsphlsp......ss..........t...h......u.h.VELRaSssC+sAWAKlsl.....s.ssshss.A.....Ahl.......s+...ts..sGp.thos.sSsuGNGsl.........ppGQTS..sY..TsMVaDLss...................................................... 0 9 22 27 +10734 PF10902 DUF2693 Protein of unknown function (DUF2693) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 21.40 21.40 21.40 21.70 21.30 20.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.70 0.72 -4.07 6 77 2009-01-15 18:05:59 2008-07-29 09:44:26 3 1 68 0 3 63 41 71.70 32 60.54 CHANGED GpHpllFcKuDGolRsMhATRDshLl.spppGc.hh..sscsp..RKEscES....lsVYDlcscuWRSFplD+LISlsGhsltcLltl ............................phhapKsDGolRptpGThcsshhs..hppttp.........ts....p+cspss....lsaa.Dl-tpsWRoF+h-pLlsl...........h...... 0 0 3 3 +10735 PF10903 DUF2691 Protein of unknown function (DUF2691) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 26.50 26.50 24.40 21.50 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.99 0.71 -4.39 9 107 2009-01-15 18:05:59 2008-07-29 09:49:18 3 1 92 0 9 81 0 144.00 44 95.65 CHANGED pRGloF-IPNtYsphLhcILpsl-IssasW..l.ustEuYhltcGpL.scpLFscs.plhpGtsh+chlcss.YYlIFsDLKAaPpGchls-IpTYE-FhcScCElllLlsDusYVslYsKDpchlEhLYpNAhppuFppl-YITDENDsRTpLoV ...KRGIoh-I..Ps..p..Y..ssh.Lh..+lLKP.lcIs..s...a....sWh.l.sspEuYllhp.spL..sptLFsc-spl.h-Gp-L+cllK..sNhYYlI..FsDLKAaPKG....c..hl.....h-IpT.....YE..EFhcScCElVlLlsDupYlpIYsKspchIEhhYpNAhspGFh.V-YlTDENDsRTpLoV........... 0 2 6 7 +10736 PF10904 DUF2694 Protein of unknown function (DUF2694) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacterium spp. 20.90 20.90 20.90 21.50 20.20 20.40 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.25 0.72 -4.01 3 64 2009-01-15 18:05:59 2008-07-29 09:49:44 3 1 61 0 7 23 0 99.50 77 97.97 CHANGED MTDANPAFDTVHPSGHILVRSCRGGYMHSVALSEuAMETDAETLAEGILLTADVSCLKALLEVR-EIVAAGHTPSAEVPTsRDLDVAIEKLLAHQLRRRs+ ..............MTDANPAFDTVHPSGHILVRSCRGGYMHSVSLSEAAMETDAETLAEAILLTADVSCLKALLEVRNEIVAAGHTPS....AQVPTTDDLNVAIEKLLAHQLRRRNR............. 0 1 3 6 +10737 PF10905 DUF2695 Protein of unknown function (DUF2695) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 30.50 29.90 21.20 20.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.69 0.72 -4.24 6 82 2009-01-15 18:05:59 2008-07-29 09:50:16 3 1 77 0 8 46 3 53.30 52 46.29 CHANGED Llsa.lsc+Lss.sCDHTh+auppahpp+plsh...csllchLtcpGGaCDCEllhN ........LhDY.VDE+Lu.cp.sCDHThRaupcahps++l-h...EolhEtLpEhGGYCDCEIlhN.. 0 5 8 8 +10738 PF10906 DUF2697 Protein of unknown function (DUF2697) Pollington J, Finn RD anon PRODOM Family This is a eukaryotic family of proteins with unknown function. 22.80 22.80 23.40 22.80 22.70 22.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.30 0.72 -3.97 9 39 2009-01-15 18:05:59 2008-07-29 09:50:42 3 1 38 0 29 37 0 66.00 40 78.69 CHANGED EEWLYhKLlsSsuFppaVR+lasKlNtIp.pPh.cppss.sphh....Y+PTphpKFpAFRllaaDEhKpoF .....EEWLYh+LlsSPuFHRFVR+lapKVNtI+.s..scps.ss..phl......a+PTt.hpKhpAaRhLFhDEh+ssh........................... 0 3 14 25 +10739 PF10907 DUF2749 Protein of unknown function (DUF2749) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins appear to come from the Trb operon however currently no function is known. 22.00 22.00 22.30 22.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.31 0.72 -4.10 8 39 2009-01-15 18:05:59 2008-07-29 09:50:55 3 1 18 0 15 40 1 58.90 40 41.19 CHANGED MSptVlIALlls..VAuuuusATslIVpsc.......susssuhuEEQRssRE+FFGusp-.PPI+cGQEM+PRW .....................................................................s...tt.ps+REpFFuus.c..-IRGGQcMcPRW....... 0 2 6 11 +10740 PF10908 DUF2778 Protein of unknown function (DUF2778) Gunasekaran P, Mistry J anon Pfam-B_001575 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 24.10 24.10 24.20 24.90 24.00 24.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.82 0.71 -3.90 28 375 2009-01-15 18:05:59 2008-07-29 09:53:21 3 7 237 0 108 328 3 100.10 35 40.73 CHANGED tpl-AaSGhGphhDcPctsslc.hGshPPusYhlh.R-u..........................................hhcGVc..AhRLpPl....................G.pGtSpGClohcshscFhphhcthchp.ph..ssVPuh ...........t..h.AaSG..st.hhspPp..hss.t.tGshPstp.Y.lh...Rps..............................................................hh.....pGVp..shRLpPs...................G.hG.SpGClohp.....shs.....pF.phhthh.hp.ph..shlst....................................... 0 19 44 70 +10741 PF10909 DUF2682 Protein of unknown function (DUF2682) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 24.80 24.80 24.90 90.40 21.80 24.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.61 0.72 -3.70 7 13 2009-01-15 18:05:59 2008-07-29 09:54:05 3 1 13 0 0 12 0 82.20 44 87.91 CHANGED LhsVQcssLcLspEspsaLssssss..............pLEpcLhpLlhpsNsIsFD.....cppthp.LKsNlhsCINIhIDLIhIKphh LltlQssVLDlh+ElspaLNsssPsh.........cthshL--hLTKhLhcuNAIsFD.....cppsLchl+sNIshCLNhhIsLITIK+Yl. 0 0 0 0 +10742 PF10910 DUF2744 Protein of unknown function (DUF2744) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 25.00 25.00 26.00 25.70 24.50 18.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.42 0.71 -4.30 6 89 2009-01-15 18:05:59 2008-07-29 09:54:53 3 1 88 0 4 50 1 133.50 44 89.78 CHANGED ch.opEssDPscPEEtFlWsLpslPshsGssu......lhsssaL+thSKHLW-CGht.sD..............S.lPtQpl.KaQsPhRGppphhNssupWVshDsP-Pp.hRlpDstshTsQEppA.Lt.a+phGhl.st.s ....h.oQEcsD.ccPEEthAWAhh.s.LPtsu.G..ssu......lopPshhctWS+HLa-hGht.HsD.LcpLADEsGNIHVSpLPpQph.KaQsPhRGsRppa.NsA.ApWVspDsP-P..hRl.Ds+..pLTpQEppA.l.p..a+phGhI.s....................................................... 0 0 3 3 +10743 PF10911 DUF2717 Protein of unknown function (DUF2717) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as gene 6.5 protein however currently there is no known function. 25.00 25.00 29.70 29.30 19.90 18.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.68 0.72 -4.48 4 32 2009-01-15 18:05:59 2008-07-29 09:55:47 3 1 29 0 0 19 0 75.70 44 90.28 CHANGED MLpPIp+hlpNPsDIPslPRAstEYLQVRaNtuYlhtSGhlstLRtsGaSEuaIAGFlpGLphA...SpslDEhE.lRKEQ ...........MLpPIpphhppPpDlPslPRustEYLQVcFNsuYhhtSGhlsth+ts.GhSEuaIhGFltGLtYA...SpllD-h-..hR+-..... 0 0 0 0 +10744 PF10912 DUF2700 Protein of unknown function (DUF2700) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Caenorhabditis elegans. 21.80 21.80 22.00 24.40 21.70 21.70 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.80 0.71 -4.20 6 52 2009-01-15 18:05:59 2008-07-29 09:57:19 3 4 5 0 52 47 0 136.20 22 65.64 CHANGED lPARPLVssLulhGllRuhuphhhu.sshhpRlschhaLhLNLLLLFGAsKNstsALKWSQRlshhsVlLuVIpFMIaPVhFASasASG....h-pNtThh...clEplusKTp.Ec+..FVhGhLoGYslEFussLhIGlElLKYlLlNRLW .................................hR.hlhhhuhhGlltshh...hhhs...ssh..h.t...h.....hs.hh..h..lhl..phllLaGshppsthsL+hupplshhsl..lls.hl.hhlhPVhhuShhASG............hpts..tsh.........p.....h..........s..t....ppp..........Fh.Ghh..Gh.hEhhhh................hhlsh.hhpahhlp............................. 1 8 11 52 +10745 PF10913 DUF2706 Protein of unknown function (DUF2706) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Rickettsia spp. 25.00 25.00 107.70 107.60 17.90 16.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.10 0.72 -4.37 2 39 2009-01-15 18:05:59 2008-07-29 09:59:14 3 1 39 0 4 15 0 58.70 90 99.87 CHANGED MLK.hKhhllLIMLuQLLSCTPSAPYEIKSPCVus-IsDtuplshNPClRRPVNS.lsIs MLKSLKhLLVLIMLAQLLSCTPSAPYEIKSPCVSADIDDGSSlSVNPCIRRPVNS.VNIV. 0 1 1 1 +10746 PF10914 DUF2781 Protein of unknown function (DUF2781) Gunasekaran P, Mistry J anon Pfam-B_001738 (release 23.0) Family This is a eukaryotic family of uncharacterised proteins. Some of the proteins in this family are annotated as membrane proteins. 21.20 21.20 21.30 21.20 20.90 21.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.98 0.71 -4.43 42 369 2009-01-15 18:05:59 2008-07-29 09:59:44 3 7 206 0 245 335 1 142.00 24 59.60 CHANGED Dhlhhlaalhphshsh.....llDs..hhP....thh.t........htsltpaYlspapDhlh.....p.sPs.........ahpsh..................................hhhEhhaplPhhlhslY.................ulhps....sshhhshsLlauspsshophsplsphhh...............h.h..spctp..lltlYhPah.....llshlhslchhhpht ........................................................Dhhhhhahlhthshsl..................hhDh..h.hs..........lh.sh.........tpltphY.hpphpDhLhs.......sss...........ah+sh..................................hhhEhhapl.PhhhhuhY............................................ul..hts.........s.sah.shsllaustssp...sphspluthht...............hshthspc..tp..hlhsh.hsYh.....llPhllhhchhhp....................................... 0 57 107 171 +10747 PF10915 DUF2709 Protein of unknown function (DUF2709) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 23.00 23.00 25.00 386.50 21.20 22.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.39 0.70 -5.37 3 41 2009-01-15 18:05:59 2008-07-29 10:02:31 3 1 41 0 8 17 2 238.00 79 99.72 CHANGED sVIosslKchLLQFLK+pKsAELLsTYLFFLEpKaQLQPVLFlRDKlIYQSAEDsIspLEcEGKLWRETEIpIusG+PuVNEpTKKIYICPFTGKVFGDNTHPNPQDAIYDWVSKCPENKERIuGlRVKRFFVSEDPEVIKoYI...KsRR-PIpKlVYSSuITGKLFsS+pAVIEDFK+SYLKsloLVEVQsQNKFKIEDsFLoFIQDpL-E-KIApFVEoLA-asEFcsYVcpWVEsE- .MNISGSIKQKLLQFLcKQKuPELLATYLFYLEQuLpLsPVVFVRDKIIFKSsEDAIplLEtDKKIWRETEIQIoSGKPEVNEQTKRIYICPFTGKVFADNVYANPQDAIYDWLSSCPQNpERQSGVtVKRFLVSDDPEVI+sYI...VPPKEPIlKTVYASAlTGKLFHSLPsLlEDFcoSYLRPMTLEEVQNQNKFQLEoSFLoLLQDALEE-KIAEFVESLADDTAFHhYISQWVDTEE. 0 4 5 7 +10748 PF10916 DUF2712 Protein of unknown function (DUF2712) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillales. 21.70 21.70 21.80 22.50 19.40 21.60 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.93 0.71 -4.23 2 33 2009-09-10 14:58:50 2008-07-29 10:03:04 3 1 30 0 2 23 2 139.40 58 98.59 CHANGED hppFhppNhRhlhAhslGl.lhAss.a.KAussNhtFchhl..shuNuhSsstaRpTopssNPWKVpLppSsEGKGTIhoFWL.h.stNpphspuSpIhNVKQGuts+YhtA.p.us+shshLAsENNNYsupoYhlDGlWDEETW ..............hppFhp+.hRLlhALVIGlLVF.APshH.SKAA..DNsIGFDFKLKPNCANSGSoSRYRETSSVNNPWKVRLcsSTEGKGTIASFWLGTYNKNKsAspGSsIMNVKQGAKT..RY.CGAYKVANKNTTYLAAENNNYNSKTYYVDGIWDEETW.... 0 1 2 2 +10749 PF10917 DUF2708 Protein of unknown function (DUF2708) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Caenorhabditis elegans. 23.20 23.20 23.40 23.60 22.90 23.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.31 0.72 -4.16 6 26 2009-01-15 18:05:59 2008-07-29 10:04:11 3 2 5 0 26 18 0 43.70 55 53.18 CHANGED MNhYSlFVFAlLuISuVo......ssG++Ct.GGNG.YGuG....VlIGAtK ...MNVYSVFl.FAlLAISSVS.......tG++Ct..GssG.YGuG.......s.....IlIGAtK....... 0 8 13 26 +10750 PF10918 DUF2718 Protein of unknown function (DUF2718) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. 25.00 25.00 42.80 42.30 19.40 19.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.01 0.71 -4.11 4 13 2009-01-15 18:05:59 2008-07-29 10:04:30 3 1 12 0 0 8 0 124.50 54 96.89 CHANGED MLCIFYLuRLCNLIIYSLYSLLMYPMpKLISFMFGELNPFc-VLPcscKKDD-ssl.........sIhPsEssslPpphP.plp-pt-pssls.pNs..................NGVFDFMKhPNPFKRY..YEYs.sYs.ppspspPp+Vp...cKuFlE+MlEMVE MLCIFYlARLCNLIIYSlYSLLMaPMpKLISFMFGpLNPFsss.s.spKhpDshss..........htPh-sppIsp-hPLslh-ptppsshs..ss..................NGVFDFhKIPNPFK+Y..YEh....s.pNshKpPs.......KGhh-.MMphlE..... 0 0 0 0 +10752 PF10920 DUF2705 Protein of unknown function (DUF2705) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 46.40 45.90 24.50 24.30 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.79 0.70 -5.16 2 30 2009-01-15 18:05:59 2008-07-29 10:07:08 3 1 28 0 4 15 3 206.10 66 99.79 CHANGED M+.pthhIhlVhhlF.tul...uas.shpshPhLDGhPluhus.h.hp.lLhWalPIluhSFhhSGsI+Dhh.SYt.LplsRpap+.hWlhpQFLhlhlhlllFT..QlAlhaIao.hShas.......st.FlhhhLhY.lhLhslFShQhhhELahcuQhA.L.IssYlIhSllhAchlhQ.sosph.aYhLlPNYu.GhRTGL..aopouThlIps.huLhIllll.lslhIlulhKFKphDhL ........MKNNKLIILVVIC..LFLQAILFMAFDFPFKTLPILDGFPVGLATPVVTRLLLYWYLPIIAFSFYISGNlKDLLSSYGFLQISRNaKKEYWLMKQFLKLhI+VILFTSLQLALIFIFTPYShas.......Tu.FlYLILGY.lMLFTIFSLQYLLELFIDAQKALLLINGYVIISILMADLIYQNTTVTWPYYLLLPNYGMGFRTGLI.FoNTSTlLIShPTSLIILLVVLLCVFIlAIKKFKTTDIL............ 0 2 4 4 +10753 PF10921 DUF2710 Protein of unknown function (DUF2710) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Mycobacteriaceae. 26.20 26.20 26.20 27.40 23.60 26.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.26 0.72 -3.91 2 57 2009-01-15 18:05:59 2008-07-29 10:08:06 3 1 56 0 5 17 0 107.90 88 90.74 CHANGED MVStssuRuE...LSD+DLVESVLR-LSEAADKWEALVsQAEsVTYSVDLGsV+AVANSDGRLLcLTLHPGVMTGYuHuELADRlNlAlsALR-EsEAENcARYGG.LQ ......MVSGSDSRSEPSQLSDRDLVESVLRDLSEAADKWEALVTQAETVTYSV..DLGDVRAVANSDGRLLELTLHPGVMTGYAHGELADRVNLAITALRDEVEAENRARYGGRLQ.... 0 1 2 4 +10754 PF10922 DUF2745 Protein of unknown function (DUF2745) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 21.40 21.40 21.90 21.70 21.00 19.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.73 0.72 -4.07 4 21 2009-09-10 19:09:16 2008-07-29 10:09:42 3 1 18 0 0 15 0 87.50 50 98.76 CHANGED MGRLYSGNLNAFKAAssRL.p.hDlsVhh--a.-phspppChp....LRlEDRuGpllsopTFpH+DEDVLaNhsTsWLN+hasQLKcWK ..MGRLYSGNLssFKsAssRLhc.hDl...sVhhEsap.ppsu+tChp....LRl.sRuGpll.sop..TFp..HpDEDVhhNhpT-WLp+haspLKcWK. 0 0 0 0 +10755 PF10923 DUF2791 P-loop Domain of unknown function (DUF2791) Gunasekaran P, Mistry J anon Pfam-B_001611 (release 23.0) Domain This is a family of proteins found in archaea and bacteria. This domain contains a P-loop motif suggesting it binds to a nucleotide such as ATP. 22.20 22.20 22.20 22.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 417 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.91 0.70 -5.87 28 286 2012-10-05 12:31:09 2008-07-29 10:10:10 3 3 243 0 96 309 63 370.90 34 90.05 CHANGED Ips+cRssllsuL+uGVVP+hGLchIsVGRspElpALhp-l-plusGGuuFRhllGEYGuGKTFFlphlRshAhc+saVsucsDLoP.-+RLHustGpuculYpcLhcsLoT+T+PcGsALpsIl-+Wlsslpc-shspu.............sssEplIcp+Lutls-hstGa-FAplLcuYa+uptpG-Epl+susl+WL+GEass+s-A+psLGV+slIDDsuhY-aLKhluthl+puGasGLllslDElVNLhKlpsstsRppNYEplLpllNDshQG+s.sLGhlhuGTPpFl.DsRRGlaSYtALpSRLupstas.psuhp-hpuPhIRLssLo.E-lhlLhp+lRclaAtststp....thlsDptlpsFhcpssuRlGuphhlTPRphl+cFl.slLsll-QNPshchpclhstst..hss- ...............................................................................................l..+.tpsllpuL+uGV.VP+h.GlthltVGRptElpuLhp.D.l..-.h.l.s.c.G..Gu.u.FRhllGcYGoGKoFhLphlRphAhc+shVs....scsDLoP...-R.RLpustspu....hshYpELhpNluT+...s...cP-G.s.......ALt.ll-+alsphppp..shtps..............................ttscphIhphl.spl..s-..h.st.Ga-FA.pllptYhc....u.....h.ps......c-ph+.st....sl+WlRGEhss+o-A+.....p...tLG...V.c.t.lIsD..s..sh.Y-hLKhhutFl+tuGYsGLllhlDEhV.N.l.a.K.......ls.......s..s.sRptNY..EplLphhN..DshQGp.spt.LshlhuGTPphl.......DtR+G.laSYpALps......RLupsp.......hs.....p.....sshh-h.uPsl+.L..t..sL.....o..sE-..hh.hLhp+ltclaAtt.s.p........hhls-pt.l.ttFhpttht+lGsth.hhTPRphl+sFl.plLsll.Qssshshpplhtt..h..t.......................................................................... 0 40 67 83 +10756 PF10924 DUF2711 Protein of unknown function (DUF2711) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as ywbB however currently there is no known function. 21.20 21.20 23.20 26.20 20.20 20.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.47 0.70 -4.86 6 65 2009-09-10 23:06:20 2008-07-29 10:12:01 3 1 63 0 5 62 1 198.10 52 90.20 CHANGED -cSPILtQLPpsFpSAAlLLHPFlpMPhGWEsuhRKpsYEHI.YPSsEEIlphG+sVSWpcVMossGLpShsELAlAlhTuIsA..LR-EYtRcDLAc+LasslctDLYYPoEDhTSlFLltsLLKVLGSKGAcslYYuEPIh-scGshpls-ssPhslhsLssuElIITDEphDaAFMSlaDSFoTLFLAK-psIccIlpuhshEAlICscsThIsWYh ............-cSPILpQlPtpapSAAILhpPFlQMP.GWEcuhRKpPYEHI.YPSsEEIIppGKuVSWKchMShoGL+SaA-LAhAh.hTSIuA..hp-EYpRcDLAE.+LasN..l.+.p.D.LYYPoED.aTShFLlppLLKlLGSKGucplYaS.EPIh-ssGlLplssToshD.lhDlu.ss.ELIITsEcp-aAFMSlYDSFsTLlLAK-cNIc.lVpuMNhEAlICDccThIsWY............................................................... 1 1 4 4 +10757 PF10925 DUF2680 Protein of unknown function (DUF2680) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as yckD however currently no function is known. 23.90 23.90 24.10 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.76 0.72 -4.10 14 166 2009-01-15 18:05:59 2008-07-29 10:13:36 3 4 89 0 53 148 1 60.30 27 44.87 CHANGED LT-pQKp-IpsLtpQlh-lpKpllsKaVchGllTp-Qu-pIKppIDpthphh....cpN...........GFhP ..................tplpshppphhph+KphlsKhVctGhlTp-QA-pI+ppl-pphphh....pps...........sh........................... 0 30 43 50 +10758 PF10926 DUF2800 Protein of unknown function (DUF2800) Gunasekaran P, Mistry J anon Pfam-B_001630 (release 23.0) Family This is a family of uncharacterised proteins found in bacteria and viruses. Some members of this family are annotated as being Phi APSE P51-like proteins. 19.80 19.80 19.80 19.80 19.70 19.70 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.37 0.70 -5.34 27 496 2012-10-11 20:44:46 2008-07-29 10:43:57 3 3 403 0 62 832 284 331.50 38 92.63 CHANGED HAhLSsSuu+RWLpCPsSst.LppphsDpsSshAp.EGTsAHtluEhtL.............pptl.cshshhs..........hs.....................tt.hhs-EMt-hsptYl-hVhEphp.h.....sshlhlEQRlchuphlP-......uFGTuDslllu..ss...hLpIl.DhKYG+G.V.VsA...................ppNsQhhlYALGAlc.ashlYD.hcpVphsIhQPRh...cs.hSsap..lssp....-LhpWupphlcstAphA..............tspucapsGcaC.pFC+s+.ssCpARActshpls.ph-Fpsssh............................Losp-lup....lLsphs.lcpWsc-lcshAhpphhs.Gcph...sG..aKlVpGR.usR+as.D-csltpsLpstGhtpp.lYp.pcLlo.TphEKhh.....sc...........cpascll.tshlh+ssGKssLsspoD+RPul ....................................................................................................................HAhLSASuu++WLpCP..s...S........hp....h..p...p..t.h...s..-..p...s....S.....aA..t..EGThAHt.L...uEh..hl...................ptth...pt..st.h...............hs....................................................................p.thhs.c..E......M......t.....c......h..s.....c..t...Y.....s...s..h...V....-hh...pth........................sshhh..l...E...p....+..lDh..uc.aVsp....................uFGTuDslI.lu..........ss.......hLpIl..DLKYGpG...l....V..s..A...................t.pN..s...Q.hh..LY....A.L......G.....A.h.....c.....h....a....s..h.....l......Y.......D.......hcp........Vp......h....T.....I......h.....QP..Rh..............ss...hS.s.h.p...ls..hp.........c..L.h..p.W...upp..hlc...s..tAphA.................................htGpG..ca....ps......G...p...a..C..pFC+h+...sp...C+sRAch.hpls...p...ph.psPth......................................................Losp-luc.....lL.phstlppWsp-....lcpaA....h....s....pAhp....Gc.ph...sG....aK..L.V..EG..R.SpRtas...Dppsshp.h.l.h.p..sGa...cs...hhc....pcLlolTphEKLl......GK...........+tFsclh..tshI.KPpG..K.oLssp.oD+RPsh................................................................................................. 0 28 52 60 +10759 PF10927 DUF2738 Protein of unknown function (DUF2738) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 20.30 20.30 20.50 20.50 18.70 19.90 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.68 0.70 -5.32 6 39 2009-01-15 18:05:59 2008-07-29 10:44:37 3 2 27 0 12 40 35 219.30 26 63.81 CHANGED ssshphp+hhFsKs.hascltsuH......RINl+Y-ccsshs..PLsh+TslLFSFGlpp.sshQDpsc.sYShsLsha-tp.GPospEptahshLpplts+s+cHL+c.sl++shs.....Kh.hssLhssMsshYhKhp-sls..........sPs+uPsLYPKllhut+ss.phsT..hFaKcscGpslpIs....llpp+C+Vlssltl-SIFlGsKsSlQlKlh-VllsE.........sls.p++plhhsphPss.ppE.p+csssssE--Lspp...tp.Ecl ..............................................s...................tss+......hlslpYcpp...t..PLhl..pss..hha.S.FGlps.s.....ohpsuss..saShsLhh..h.shp...s.sspEs.sFhpslctIts+s+cal.hc.s.+csht..........Kh......p.cslh-hh....p.....h.hhpp.t-sh...........sPscsPthYs...KLlhsppss.chhT...hhhp.sctptlch........hltt+spshsslth-Sl.a.h..s..sK.uhQhKL.psll..hE..........ht.............................................pt........................................... 0 12 12 12 +10760 PF10928 DUF2810 Protein of unknown function (DUF2810) Gunasekaran P, Mistry J anon Pfam-B_001682 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 32.00 38.80 18.40 16.90 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.35 0.72 -4.23 12 654 2009-01-15 18:05:59 2008-07-29 10:46:18 3 2 649 1 55 192 7 53.90 79 45.01 CHANGED plpsLsFsRslTKAEQADMGKLKKSV+GLVVVHPMTALGREhGlcpVTGaAP+t ......KLMcMPFpRAITKKEQADMGKLKKSVRGLVVVHPMTALGREMGLpEMTGFuKo.s. 0 3 12 34 +10761 PF10929 DUF2811 Protein of unknown function (DUF2811) Gunasekaran P, Mistry J anon Pfam-B_001693 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 20.90 20.90 21.60 24.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.90 0.72 -3.96 31 108 2009-01-15 18:05:59 2008-07-29 10:46:54 3 2 64 0 50 111 199 58.20 47 69.16 CHANGED ShpsElPEsLtpuMppFI-sHPsWDQYRlhpAALAsFLlQN.G........sssRsloRhYlssLF ..Sl.sElPEsLapuhpsal-sHPsWDQ.RlhsAALutFLlQN.G........ss..sRslsRlYLssLF.......... 0 6 29 46 +10762 PF10930 DUF2737 Protein of unknown function (DUF2737) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 46.10 47.40 16.30 14.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.85 0.72 -4.36 5 174 2009-09-11 15:20:40 2008-07-29 10:47:25 3 1 162 0 1 62 0 54.20 81 99.29 CHANGED M..RGLSYNPuILPoEhI...IR++hKPMPo..REELLKRNSFPSVNcNKYLNAMLRKs..KK ...MRGLAYNPGILPAEMI...IRQRsKPMPS..REELLKRpSFPSVNpNKYLNAMhRSG.KK.... 0 0 0 1 +10763 PF10931 DUF2735 Protein of unknown function (DUF2735) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as glutamine synthetase translation inhibitor however this function can not be confirmed. 25.00 25.00 31.20 29.30 23.40 21.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.53 0.72 -3.72 11 50 2009-01-15 18:05:59 2008-07-29 10:47:42 3 1 50 0 27 53 0 51.00 37 76.29 CHANGED SA+IYQFPl..GGRsGhsp+p...tth.spttssssssslssuSWYH--AlpE-ps ............SApIYQFPl..uuRtuhsppc...pst.spptuss.hssssssuuWYH--AlpEpp.. 0 4 10 15 +10764 PF10932 DUF2783 Protein of unknown function (DUF2783) Gunasekaran P, Mistry J anon Pfam-B_001590 (release 23.0) Family This is a bacterial family of uncharacterised protein. 19.80 19.80 20.00 22.60 19.60 19.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.81 0.72 -4.48 27 179 2009-01-15 18:05:59 2008-07-29 10:48:18 3 1 165 0 68 159 43 60.50 54 83.47 CHANGED LspsPNlssP...........DsFYptLIssHcsLo--pSptlNARLILLLANHIGDhsVLpcAlphA+pu ..................LsTpsNl.scP...........DsFYEALI-sHRsLo-ppSphlNA+LlLLLANHIGDhsVL+EAlshARp.s.. 0 10 32 49 +10765 PF10933 DUF2827 Protein of unknown function (DUF2827) Gunasekaran P, Mistry J anon Pfam-B_001771 (release 23.0) Family This is a family of uncharacterised proteins found in Burkholderia. 25.00 25.00 25.40 25.30 21.20 23.60 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.19 0.70 -5.98 9 211 2009-01-15 18:05:59 2008-07-29 10:49:20 3 4 68 0 56 172 41 347.10 44 93.20 CHANGED plGIolhl.RsssQSlW.NGItQNslaLshLLppSPhltcVslVNssDs.shssuL.hsthssslhshs-stcphDVlIEhuuQlss-ahchh+tRGsKlVohpsGs-YVhshEshhFs+s..shlFpsssYDplWslPpht+oshsaLpolhRuPVphVPalWcPhFl-pptpsL.ttG.tFGY+P.......G+.sthRlolhEPNIsVVKsslhPMLlu-pAYRtpP-hlcal+VsNoh+h.+-pPpFVthApuLDlVRcG+soF-uRashspFhApasDsVVSHpWENs.NYlYaDlLaGGYPLVHNSshLsDsGYYYPD.DspsGApsLh+AhpcHDsch-sYpp+uRshLtslsstsstNlutYsstLs ...................................lGIolhl.Rs.spulWtNGIpQNslaLshLLcpSshltcVhhVN.s.G..ss....sh..ssuL.hst.hsls.lhshs-s.hc.plDVlIEhuuQlss-ahpth+s+GsKlVshpsGp-YlhshEshlFs+ss..uplFsussaDtVWslPphtposhsaLpolhRuPVplVPalWsPhFl-+tttpL....t.tGhpFGYcP..........G+...tthRlohhEPNlsVVKoshhPMLls-EAYRtpPDhlpalaVsNoh+h.K-cssFVpF.s.p.sLDlV+puhsoF-sRashspFhApa.s.DsVVSHpWENs.NY.hYaDlLYGGYPLVHNSshLs..c..sGYYYPDFDstsGu+sLhcA.hccHDsph-sYpp+ucclLcplo.tNstNlstYsptl.h.......... 0 14 17 36 +10766 PF10934 DUF2634 Protein of unknown function (DUF2634) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as phage related, xkdS however currently there is no known function. 31.80 31.80 32.00 32.00 31.60 31.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.25 0.71 -4.30 26 250 2009-09-11 16:39:40 2008-07-29 10:53:59 3 2 179 0 57 234 7 109.00 29 77.38 CHANGED oYplcFppschhsp.......l-slEAl+QhlhhsLpT-Raca..IY....SpsYG.sElpsLIGp..hspshhcsElpRhlpEALlhcs+IpsVcsFph.phpsspl..plsFsVhTh.Gph ...................pathDhcps..chhtp..........l-sh.EAl+QhlhKsLpTcRac......a..IY........opsYG.sElp.c.LlGp...spshhcsElpRhIcEALhh-s..RIpsVssFph...ph.p...t...c.sl..plsFsVpohhGp............................... 0 27 49 51 +10767 PF10935 DUF2637 Protein of unknown function (DUF2637) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 22.80 22.80 23.10 22.90 22.60 22.60 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.94 0.71 -4.29 35 337 2009-01-15 18:05:59 2008-07-29 10:56:07 3 7 169 0 121 387 2 142.20 20 43.45 CHANGED tsshshshslussuaslS.F..suL+clAhttu.hsst..hualhPlslDusllsuohhhlhh...tpssttt+hhsashhshu..........................................suhSlsuslhashhssts..pt...t.t.............hlthhhhhl....sPlthhssh+h......h.h.......hhtpshss ..................h.hshssshsluslAhslS..a.......suLpslAhp...tG...hssh..............huahhPlslDuslhssshhhlhh....ht....h..s.h...h...hh....phh.sWhh...h....ss..u..................................................sshs.lsusl..h.ashhhtth..ph......................hh.hhhh............ssh.hhh.h.....h....................t.t.......................................................................................................................................................................... 0 33 90 114 +10768 PF10936 DUF2617 Protein of unknown function DUF2617 Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 19.50 19.50 19.70 19.80 19.30 19.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.05 0.71 -4.68 15 185 2009-01-15 18:05:59 2008-07-29 10:57:39 3 1 180 0 42 119 0 142.50 39 95.31 CHANGED hhppLsss.sDsuAusLuluLsusssssLAstclsh..ssuslpLsVLGASHtVslptss.uphsEpVuChssss....tsLPs.....psp.usGYchpopscslstucFpspAssLpscssccssuLsGsFPG-.stAlTALsAcss......ss..uhpW+TWHlY...Pps..GplVsT+S+hc...P ..............................hhpLths.sDspus.LuhsL.....s.t.s...sLAshcl...th.........sus..........tlpLsVLGASH.Vslct.........t...up...hsEpVuChstss.....ssLPt..................shp.s.t....Ychtuc.s...cshs.........ts.sFtthApcLht.......hs..s..c.....t.....st....hLsGsF.PGs....st...AlTAl...hup.p...............ts....thpW+TWHhYPpt...........GplVtTpoph...... 0 12 29 39 +10769 PF10937 DUF2638 Protein of unknown function (DUF2638) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 22.00 22.00 22.10 22.00 21.00 21.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.73 0.71 -3.05 16 190 2009-01-15 18:05:59 2008-07-29 10:58:45 3 5 164 0 134 177 0 92.30 28 77.69 CHANGED sPhI+FlGpRp..sppsspsstsHPsossshlP.........suhuuappphppasPhp......pss..tu..ustsuuuhusl......pPhp.GEhhshs-LPsRF+.ppPhsEAElEuIpoG.GA ...........................................................................I+FhG....................t..tts...sHPhsss.t.hs.................t.h..h.....h.t...P................t................tth.us.ps.uss.sss....................pshp..sphhsht-LPtRF+...R...ps...h..sptElEsIpoGGs.......... 0 35 67 108 +10770 PF10938 YfdX YfdX protein Bateman A anon Bateman A Family YfdX is a protein found in Proteobacteria of unknown function. The protein coding for this gene is regulated by EvgA in E. coli [1]. 22.60 22.60 23.00 23.40 22.50 22.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.98 0.71 -4.56 23 564 2009-01-15 18:05:59 2008-07-29 11:02:18 3 2 501 4 62 218 7 153.00 55 68.75 CHANGED pAlpclppAttAlps.GpscpAhptlpcApuplchhtsc.sphs...t...................................hlPVsspltlh-t.hssssptcsulppApptlppGchptAhchLchhss-lshshshlPLsph.sulppAtpLLcpsKhpEAstsLptAh.solVlsp.shs .................................AMRDlQhARhALFc.GDs-..KAKcLss-AouLLsDDST-WsKFAKssKKsslssD...........................pYIsINuSlsluEs.Y....ls..TPEKcAAIchANEKMs+GDKKGAhEpL+LAGVuVhENQhLhPLcQTRsAlscAp+LLDcppYYEAsLALKuAc.DGIIVDSpu..l...................... 1 20 43 52 +10771 PF10939 DUF2631 Protein of unknown function (DUF2631) Pollington J, Finn RD anon PRODOM Family This is s bacterial family of proteins with unknown function. 25.00 25.00 28.30 27.90 18.00 16.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.25 0.72 -4.13 13 179 2009-01-15 18:05:59 2008-07-29 11:51:58 3 1 177 0 44 115 0 66.90 47 58.98 CHANGED hAuTElEhasG.......VDst-VPSAsWGWSphshRshpIuGlluuhFLLsMlhGNHpG........+VEDhaLIGF.AA ...................tsptsEhasG.................V-.T......t-sPSAAWG..Wpp.....lspRshpIsGhhssh.FLLuM.l.hGNHpG........HVEslaLlsFAs......... 0 11 33 41 +10772 PF10940 DUF2618 Protein of unknown function (DUF2618) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. The sequences within the family are highly conserved. 25.00 25.00 35.90 35.50 18.40 16.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.16 0.72 -4.18 3 84 2009-01-15 18:05:59 2008-07-29 11:52:30 3 1 84 0 9 19 0 34.40 68 89.18 CHANGED KGRSIMAHIRRTRHIMMPSYRSCFSYSlFsSQ.SoSHhAL .......ts+IMAHIRRTRHIMMPSHRshFDaSFFst.........h..... 0 2 2 5 +10773 PF10941 DUF2620 Protein of unknown function DUF2620 Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 25.00 25.00 35.00 43.10 23.90 23.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.41 0.71 -4.06 12 324 2009-01-15 18:05:59 2008-07-29 11:54:02 3 1 316 0 27 96 0 113.90 80 97.87 CHANGED h+IsIGGtlpKcpIcchlcch.ussphpshlhuDh-AAMtVKsGphDYYlGACpTGuGGALAMAIAllGhs+ssTIuhPut.spcEcIcphVspGKhAFGFTs-HhEpAlshLlshLh ...KKIGVAG.LQREQIKKTIEAT.APGsF.E.VFIHNDMEAAMKVKSGQLDYYIGACNTGAGAALS.......IAIAVIGYNKSCTIAKPG.IKAKDEHIAKMIAEGK.VAFGLSVEHVEHAIPMLINHLK..... 0 7 12 21 +10774 PF10942 DUF2619 Protein of unknown function (DUF2619) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 55.20 55.00 19.60 17.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -8.91 0.72 -4.03 15 172 2009-01-15 18:05:59 2008-07-29 11:56:57 3 1 170 0 42 92 0 69.00 58 71.95 CHANGED RlLSusIEloAAlLML+hNDlcKAltINulLAlVGPsIhIlohoIGLhulAu+lShsKLlhIhhGVsLI RLhSGShEIhAALLMLhlNDs+KALhINuhL.AhVGPTVLIlTMTIGIsulA.uclSahKLhalslGlsCI. 0 19 34 37 +10775 PF10943 DUF2632 Protein of unknown function (DUF2632) Pollington J, Finn RD anon PRODOM Family This is a family of membrane proteins with unknown function. 25.00 25.00 290.30 290.10 20.80 20.30 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.52 0.70 -4.85 2 43 2009-01-15 18:05:59 2008-07-29 11:58:42 3 1 4 0 0 24 0 180.40 93 100.00 CHANGED MF-TNaWPFPDQAPsPFpAQl-pLouTENVYIFLTTLFGILQLVYVhFKLLCTMFPoLHFSPIWRGLENFWLFLSLsSLAIAYWWLPSMTFTGYWALTlIATILVhlhLIMMFVKFlNFVKLFYRTGSFAIAIRGPIVLVALDVTIKLHCTPFAILVKElGsIFYLSEYCNKPLsAAQIAAL+ICVNGQWFAYTRSSTTSAA+VAAANSTAKYHLFlLQGVA-YTQLSSVKFE ......................EQLSSTENVYIFLTTLFGILQLVYVhFKLLCTMFPsLHaSPIWRGLENFWLFLSLsSLAIAYWWLPSMTFTGYWALTlIATILVLlMLIMMFVKFlsFVpLFYRTGSFAIAIRGPIVLVALDVTIKLHCTPFAILVKEVGNIFYLSEYCNKPLsAAQlAAL+ICV................................................. 0 0 0 0 +10776 PF10944 DUF2630 Protein of unknown function (DUF2630) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins have no known function. 21.10 21.10 21.20 21.20 20.50 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.86 0.72 -3.88 14 143 2009-01-15 18:05:59 2008-07-29 11:59:06 3 2 141 0 52 97 1 79.40 54 94.45 CHANGED ssDp-ILsplpcLVsEE+cLRpphppGcIDpupE+p.RLpplEspLDQCWDLLRQRRAhRsuGpDPD-ApsRPsspVEsYhp .....s.sDp-hLu+Ic-LVAEE+tLRuphppGtIspsEE.pp.RLRclElcLDQCWDLLRQRRAhRpsGsDPc-AtVRPsspVEGYp........ 0 15 37 49 +10777 PF10945 DUF2629 Protein of unknown function (DUF2629) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as yhjR however currently no function is known. 20.80 20.80 20.90 22.50 18.90 20.40 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.32 0.72 -4.61 7 472 2009-01-15 18:05:59 2008-07-29 11:59:37 3 1 469 0 27 83 0 43.90 84 69.07 CHANGED phQsDhhALSQAFSLP-lsYtDISppEpLstAltRWPLLAEhAc ...............h.FQNDIlALKQAFSLP-IDYADISQREQLAAALKRWPLLAEFAQ.. 1 3 6 17 +10778 PF10946 DUF2625 Protein of unknown function DUF2625 Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as ybfG however currently no function is known. 19.90 19.90 20.50 20.10 19.70 19.20 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.29 0.70 -5.39 12 157 2009-01-15 18:05:59 2008-07-29 12:00:21 3 1 150 0 24 112 2 191.50 47 90.87 CHANGED Lc-Llshc-sAWsllp-WhspApN+hElLss.sspAppsLhsLQVoTRSshGAllYpoGGllIDtGWLRlLGSGp.+LsRslssWNtG+.tht..hptssaLLlADDslGGaFAlNGGuLG.....-csGplYYaAPDoLpWEsLplGYS-FLtWslsscLscFYcsLRWpsWps-VtpLsucps.asFhPhLWspps...slctpS+csVslpEha ........hppLls.-cSAW.llppWlpsApN+sElLsss.ps.tA.c.psLhphQloT+SPMGAllYpoGGlLIDpGWLRIhG..SGp.+.LP...Rshhs...WN.t....pFs......tsphLllADDVhGGhFAlNG.....GsLG.....ccsGplYYauPDoLpWEsLpluYSEFLtWALsGDL-sFYpslRWpsWp-DVtpLsusps.asFhP.La.ppt.....-tpp+p.lslppha.................... 1 7 15 23 +10779 PF10947 DUF2628 Protein of unknown function (DUF2628) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as yigF however currently no function is known. 22.80 22.80 22.80 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.68 0.72 -4.25 71 909 2009-01-15 18:05:59 2008-07-29 12:40:52 3 16 750 0 179 662 139 105.40 19 55.47 CHANGED hshashhp....sts..............ttts................ththlp.pG..........asa..hAahF..uhh.........ahhhp+hW....hhs.hshlslslshslhhhhh............s.shhh..................hltlhl....slhhGhpusth...hhhcht+ ...........................................................................................pt.....................ththhc..u...................asa..hAhhF..shh.........ahhh++h..W............hhu...hhhhhl.sl.shs.l..lshhht.......................hhh..............................................................hltlhl....tlhhuh.uNth....ahpph.................................................... 0 41 96 128 +10780 PF10948 DUF2635 Protein of unknown function (DUF2635) Pollington J, Finn RD anon PRODOM Family This is a family of phage proteins with unknown function. 20.10 20.10 20.40 20.10 19.70 19.70 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.29 0.72 -4.66 20 259 2009-01-15 18:05:59 2008-07-29 12:43:16 3 1 235 0 33 149 2 46.20 45 73.56 CHANGED lKPAp.G....hsV+DPptGchLss-GcpVscsu.aWhRRLpDGDVlhspsps .........................lKPst..G......+sV.DP....sp........uchL..PsEGc...pVscss.aWhRRhssGDlhplspp.s.... 0 4 11 25 +10781 PF10949 DUF2777 Protein of unknown function (DUF2777) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Bacillus cereus. 25.00 25.00 69.00 68.20 22.20 18.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -10.88 0.71 -4.61 10 135 2009-01-15 18:05:59 2008-07-29 12:51:55 3 1 134 0 21 80 0 182.20 56 96.18 CHANGED Ms....pR..pphLhppsRpashGTl.hIc-pasFh--EEs-thLlEslspsslElhcsscWppuphh.-pshlppusEphsLpsGEpIRhcKpLthuhhpLLs-LsDssFhtFlspLpSLGaSlYDClaCaNtLsF.......spppsspGVNFlpFsN--tlCulQHHasRtpsps......DRFEaTtusGcRhlhpps ..............M...hQR..KHILYNQPRAHTlGNVEYINNEWlFF.DDEN-EAFLLE-IAEDGFEILYNNNWLPARFY.EQslL.QIAsEQHpLQNGEMIRIRKKLLLSYpEWLEELPDSlFsLLTEsLQSL+YSLYDChYCHNaLSFL......PcEEssEGVNlLLFDN-EMICoLQHHFVR+soSNK.....NhFcFTKsNGEcLHI-u.T... 0 4 13 15 +10782 PF10950 DUF2775 Protein of unknown function (DUF2775) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 22.50 22.50 25.40 22.70 21.80 21.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.09 0.72 -10.75 0.72 -3.83 8 136 2009-01-15 18:05:59 2008-07-29 12:54:08 3 9 11 0 35 126 0 76.40 38 100.80 CHANGED pDlt.YW+hlMKDpDMPEtIpuLLs.p......phcs.pph..t..........pctulc-FEPRPpho..sYssN-lchpEKKpslc-FEP+PNlSAYGDN-IctpEK.KsslpDFEPRPNlSAYs ...................................................................................pph-sh....sslo...tYs-spl.cspp.ppp..hscDFEPRPNlotY.sDt..hchcpp.pshs............cDFEPRPslotYs.................. 0 0 31 35 +10783 PF10951 DUF2776 Protein of unknown function (DUF2776) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.90 20.90 21.10 83.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.46 0.70 -5.39 7 355 2009-01-15 18:05:59 2008-07-29 12:55:53 3 1 344 0 11 142 0 329.40 81 98.46 CHANGED MNYGISlLFRAIPLlMulhChuYGuFlhp.Gss..usthVAG.VlhSLuhICIALFsTAATIIRQlI+sYNphhpahLPllGYLuAslThlhGhsh.hspusssusFVAGHVlhGVGhIsACVATsAsuSTRFoLIPtNucss.ssthPtsAFopttuhhLIslsllholluaIWuahLLuco-ppss.aVAGHVhhGLAsICsSLIALVATIsRQlRNsaoc+ERhhWsthVllMGSIolLhGlaVL.usosssphusGYIhIGLGLVCYSISSKVlLLAtlWR+chKLANRIPLIPVhTALACLFLuAFLhEhusscssYFIPARVLsGLGAICFTLFSIVSILESGTS ......MNIYIGWLFKLIPLlMGLICIALGGFVLESSGQ..SEYFVAGHVLISLAAICLALFTTAFIIISQLTRGVNTFYNTLFPIIGYAGSIITMIWGWALLAGNDVMADEFVAGHVIFGVGMIAACVSTVAASSGHFLLIPKNAAGSKSDGTPVQAYSSLIGNCLIAVPVLLTLLGFIWSITLLRSADITPH.YVAGHVLLGLTAICACLIGLVATIVHQTRNTFSoKEHWLWCYWVIhLGSITVLQGIYVLVSSDASARLAPGIILICLGMICYSIFSKVWLLALVWRRTCSLANRIPMIPVFTCLFCLFLASFLAEMAQTDMGYFIPSRVLVGLGAVCFTLFSIVSILEAGSA......... 2 5 7 9 +10784 PF10952 DUF2753 Protein of unknown function (DUF2753) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 27.10 27.10 27.10 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.88 0.71 -4.19 7 99 2009-09-11 03:22:03 2008-07-29 13:02:31 3 2 99 0 15 59 2 134.70 69 95.28 CHANGED WE+HTLLA-pAhppss.htoIlHYQ.ALu.Sppl...psspt-L-DhlslpVlSCHNLApFWRttGDscYELKYLQLASEpVhsLlPQCPpppC-uFlsoLGCCpuALl-FhKRHPNPtIA+plpcIsosspCELIApF+Lp ....................................WE+HTLLADhAhQ-sDHLRSILHYQQALTlSppl..sEu-EI-hEDRhhISVISCHNhApFWRshGDspYELKYLQLASE+VLTLIPQ...C.pssCEAFlDSLGCC+KALlDFMKRHPNPcIA+hVQ+IDTAopCElIApFRLN.............................. 0 1 4 8 +10785 PF10953 DUF2754 Protein of unknown function (DUF2754) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Enterobacteriaceae. 25.00 25.00 91.90 91.80 24.10 19.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.57 0.72 -3.92 3 443 2009-01-15 18:05:59 2008-07-29 13:05:01 3 1 440 0 15 74 1 70.00 90 92.22 CHANGED MpLosKIRRDWHYYAVALGLIFILNGVVGLLGFEAKGWQTYAVGLVTWVISFWIAGFIIRRRPEEopsAE MNLPVKIRRDWHYYAFAIGLIFILNGVVGLLGFEAKGWQTYAVGLVTWVISFWLAGLIIRRRDEETENAQ....... 0 1 1 8 +10786 PF10954 DUF2755 Protein of unknown function (DUF2755) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated as YaiY however no function is known. The family appears to be restricted to Enterobacteriaceae. 25.00 25.00 90.60 90.60 18.60 17.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.33 0.72 -4.29 6 443 2009-01-15 18:05:59 2008-07-29 13:09:54 3 1 440 0 15 59 2 100.70 89 98.89 CHANGED MADFTLSKSLFuGK.RssSSTPGNIAYALFVLFCFWAGAQLLNlLVHAPGVYE+LMQsQ-TGRPRV-IGLGVGTIFGLlPFLsGClIhuVlAlhLRWR+R+ ............MADFTLSKS.LFS.GKYRNASSTPGNIAYALFVLFCFWAGAQLLNLLVHAPGVYERLMQ..VQETGRPRVEIGLGVGTIFGLIPFLVGCLIFAVVALWLHWRHRR. 0 1 1 8 +10787 PF10955 DUF2757 Protein of unknown function (DUF2757) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as YabK however currently no function is known. 22.50 22.50 23.10 34.70 22.30 22.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.75 0.72 -3.79 8 173 2009-01-15 18:05:59 2008-07-29 13:10:27 3 1 173 0 32 85 1 72.70 56 99.25 CHANGED MAlaYaCRHCGhcVGoLspsslpo.-pLGFacLT-cE+sDMIoYcpNGDlHVKTICEDCQEuLpcNP-YHpacpFIQ .....MthaYYCRHCGspVGSlsu..pp....Vho.-sL..apLTEpEhs-MIpa+E.NGs.I.alKTICEpCQEsLss.PEYaEY-pFlQ.......... 0 10 22 24 +10788 PF10956 DUF2756 Protein of unknown function (DUF2756) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins are annotated yhhA however currently no function is known. The family appears to be restricted to Enterobacteriaceae. 20.70 20.70 21.40 31.10 20.60 20.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.61 0.72 -3.78 5 476 2009-01-15 18:05:59 2008-07-29 13:14:17 3 2 469 0 27 111 1 101.90 81 74.37 CHANGED MKRLLlLAALLPFuuLAQPINThNNPNQPGYQIPSQQRMQTQMQTQQlQQKGMLNQQLpTQTRLQQQHLQoQlNNNsQRVQQGQPGphN.sRQQVLPNTNGGMLs .......MKR.LLl.LTALLPF...VG...FA..QPINTLNNPNQPGYQ..IPSQQRMQTQMQTQQIQQKGMLNQQLKTQTQLQQQHLENQINNNSQRVLQSQPGERNPARQQMLPN.TNGGMLN........ 0 1 3 15 +10789 PF10957 DUF2758 Protein of unknown function (DUF2758) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 20.50 20.50 20.60 20.60 19.80 20.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.93 0.72 -4.15 7 170 2009-09-11 11:48:29 2008-07-29 13:15:39 3 1 166 0 16 74 3 59.30 56 94.78 CHANGED MlKVtVFDcEHEKDLpp-lNpFLKplc-spllDIKYsVAAh.p-s-tEQlYCFSAhIlY+p ..............Ml+VKVF..DEpH.E...K...DL..E...DAVNsFL.KK.IcDs..phVDIKYQ..Vuso...hss...--sQIY.CF.SAMIlY+s................. 1 1 8 10 +10790 PF10958 DUF2759 Protein of unknown function (DUF2759) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillaceae. 20.70 20.70 21.30 23.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.57 0.72 -4.38 11 150 2009-01-15 18:05:59 2008-07-29 13:16:40 3 1 147 0 29 71 0 51.80 62 84.06 CHANGED IFuLVolLAlaGslRuLKcKNlLullFuuuohhVFGWFoIMTllasGaP.ssH .......IFsLV.TLLAVFATLRTL..RE..KNhLAsGFAlATVLVFGWFTIMTVLasGYP.sA.s........ 0 8 20 23 +10791 PF10959 DUF2761 Protein of unknown function (DUF2761) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as KleF however no function is known. 25.00 25.00 26.40 92.20 20.20 19.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.07 0.72 -4.36 5 34 2009-01-15 18:05:59 2008-07-29 13:22:50 3 1 21 0 4 19 1 92.10 78 70.28 CHANGED PYPPGFVCPsTGRVAVLVRDYAsSDLNGDAPAYWYSAQSEEWGLDPWRLVEGVDPHTpGGSaDVCFAsGSoRTVGPLMTFFL..uAuDAARLsA+cGc .PYPPGFVEPoTGRVAVLVR-YAsSDLNGDAPAYWYSAQSEEWGLDPWRLVEGVDPHstGGSFDVCFAsGuoRTVGPLMTFFL..SAucAApLsstc.s... 0 0 1 3 +10792 PF10960 DUF2762 Protein of unknown function (DUF2762) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as holin-like protein BhlA however this cannot be confirmed. 22.60 22.60 22.90 22.70 22.50 22.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.44 0.72 -4.27 14 187 2009-01-15 18:05:59 2008-07-29 13:23:53 3 1 139 0 26 134 0 71.30 32 92.17 CHANGED M.Ep-llplhhoQGhaAlLFshLLFYlLKpsc......................EREp+YQplIpc....lT-pLshlpshtccVp-hhp+ ...................tlhphhhoQGsaAsL...FlhLLF....alhKpsc..........................cREc+hQphI-+....hs.-phpslp....c....hhp-lpch.t............................... 0 11 24 26 +10793 PF10961 DUF2763 Protein of unknown function (DUF2763) Pollington J, Finn RD anon PRODOM Family This eukaryotic family of proteins has no known function. 21.60 21.60 21.90 22.30 21.10 21.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.16 0.72 -3.59 10 138 2009-09-10 16:34:48 2008-07-29 13:24:32 3 3 107 0 90 143 0 89.80 34 86.04 CHANGED VYlSs.GsVh-.ppRSPWRLShlsDFFhGllsFIshFFpTlls.ssscstppssuusp+ascGtGssGsssGtpthGpIs+GuGPsssP..hGG ......................sYlsp.GpVh-....pposWRLuhIsDhFaGIhpFlhhF.FpTlls..............ssscp......tp.h.s.s.......s..s.sp.....c....a.....s.s.....Gt..Gs....sG.s.....s........stthGths.th..tGst...................................... 0 27 43 67 +10794 PF10962 DUF2764 Protein of unknown function (DUF2764) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 20.50 20.50 20.90 20.50 20.30 20.10 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.56 0.70 -5.17 10 168 2009-01-15 18:05:59 2008-07-29 13:25:16 3 2 164 0 42 131 10 218.90 28 88.05 CHANGED MopYYaLluGLPslslEDsKLsYols-FcsELtssLSccDpKLIDLhhhcFDNtNlLtahcss-stscscGp...lop-Elsshlush+-u-....sscaPsYFp-FlppYht..................psucEp......lsaEDcLuAhYYsYAMcsuNcFlpsWFpFNLslpNILsAhsuRKhphDlu.hl....VGcs-Vs-sLRTSsARDFsLu-ElshLpsllcluEcp-lslREKpLDll+WcWlE-ss.F.cYFslEplhAaLLpLtMIERWlpLDKc+GpphFRcIIsslK+ul ........................................................sp.YYhL.uhLs..t.-s..l.aohtphps.lh.thotpD.p.hslhhh.hD.tN.hthh..httcs......u...............hs.ppl.thht....t.t...........th.sah.pFl.ta.......................spp..............h...stLht.ahta.h.p..tspFlpsaapFp.slpslLsuhpuRhht.h...D...hu....l....su.ss.lhphLhpp.su..satLst.p.h..lppl....hp...tppsh..ht.....EcpLshhcW....phlE-hs.hhcYFsh-tlhuallpLthlpRhhphcp-+Gtp.F+pllpphcpt................ 1 17 30 35 +10795 PF10963 DUF2765 Protein of unknown function (DUF2765) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 25.00 25.00 25.50 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.52 0.72 -4.03 23 119 2009-01-15 18:05:59 2008-07-29 13:28:37 3 1 113 0 21 93 0 81.90 40 88.01 CHANGED IsLslsG..sDhpFssTsssYNcalNshspssKVsPA.+NaLhpsVcsEpK-sLccLl....psPGu..shplsutlhcEasPclpIsVKc ..................IsLslsG..s-lsF.pPs.suYNpalN-hshsNKVuPA.+sYLhRhVs.s.EsK-ALpcll....chPGs..uhQlsupVs-.YuPcl-IpVKp........... 0 4 11 14 +10796 PF10964 DUF2766 Protein of unknown function (DUF2766) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 21.20 21.20 21.30 86.00 21.10 19.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.66 0.72 -4.09 3 372 2009-01-15 18:05:59 2008-07-29 13:29:13 3 1 371 0 20 76 0 78.70 90 95.56 CHANGED MScPLNsDQELVSDLVACQLVIKQILDVIDVIAPsEVRDKMSSQLKNIDFoTHPAAADPVTRRAIEKAIALIEMKF...TPp MSQPLNADQELVSDVVACQLVIKQILDVLDVIAPVEVREKMSSQLKNIDFTsHPAAADPVTMRAIQKAIALIELKF...TPQ... 0 1 6 12 +10797 PF10965 DUF2767 Protein of unknown function (DUF2767) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 20.70 20.70 20.80 21.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.27 0.72 -4.13 4 459 2009-01-15 18:05:59 2008-07-29 13:29:51 3 1 445 0 22 84 1 64.50 87 98.89 CHANGED MGNcsK--tLYQEMCRVVGKVVLEMRDLGQEPKHIVIAGVLRTuLANp+IpRStLphQAMEpVI+uLst .........Msp.sK.-D-LYpEMCRVVGKVVLEMRDLGQEPKHIVIAGVL.RTALANKRIQRSELE...KQAMETVINALVK...... 0 1 1 13 +10798 PF10966 DUF2768 Protein of unknown function (DUF2768) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appear to be restricted to Bacillus spp. 20.00 20.00 22.20 22.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.73 0.72 -3.83 11 157 2009-01-15 18:05:59 2008-07-29 13:30:25 3 1 157 0 28 87 0 57.20 58 86.96 CHANGED KMWhulGuMGhMFluVssIYLSRaKlps+FL+hlsuhlAYhhMllSGlIlhhVVhSGP KMWhALGAIGFMFhAVuhIhLSRaKlKNKaLKuIsALVAYsLM.IVSGIlIFlVVFSGP.......... 1 9 19 22 +10799 PF10967 DUF2769 Protein of unknown function (DUF2769) Pollington J, Finn RD anon PRODOM Family This family of proteins have no known function. 20.70 20.70 23.30 21.20 20.30 19.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.90 0.72 -3.67 16 59 2009-01-15 18:05:59 2008-07-29 13:34:54 3 3 24 \N 52 56 1 55.10 30 45.26 CHANGED Ch..CssCPSastp.....tsctlFChpGpSptphhpc..GChCsp.CsVhpphpLpst.YYCh ..........Ch..C...Csoas.p..........................t.ttlaCspG.p.S.p.sphppcp.sChCss.CsVapc..a..pLpps....YaC......... 0 12 31 43 +10800 PF10968 DUF2770 Protein of unknown function (DUF2770) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as yceO however currently no function is known. 25.00 25.00 34.60 34.40 19.90 19.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.54 0.72 -4.28 6 426 2009-01-15 18:05:59 2008-07-29 13:36:04 3 1 421 0 15 52 0 36.00 77 92.93 CHANGED MRRLhshLlNNlREHhMLYlhLWhLLAllDllalaa .MRRLLHYLINNIREHLMLYLFLWGLLAIMDLIYVFY 0 1 3 10 +10801 PF10969 DUF2771 Protein of unknown function (DUF2771) Pollington J, Finn RD anon PRODOM Family This bacterial family of proteins has no known function. 25.00 25.00 28.00 27.70 22.70 22.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.08 0.71 -4.96 13 173 2009-01-15 18:05:59 2008-07-29 13:37:06 3 1 173 0 40 114 0 161.60 33 94.94 CHANGED KR.....llulLhAslslllsuusGhhsWhLsRcsuPppPEISAYSHG+hsRVGPYpYCslls.c-.......Cpsspsp..GELsVst+tPlQLSlPpsIucAPWpLLplY-DPssssssh....aR..PsophAVTIP.oVDP.....pRGRLsGltVQLlTlVhDc..sGE.pslPHAEWSlcss ............hlAhl.sllVlllus..ss..uts.h.Wh.h.s.....ts.p.s.P.ppPcIosh.S..cGchpcVsPa.aCsls.s.p............................C......p.s...s.....p.st......spLs.......Vstc.tslpLSVPctIucsPWpL.Lpl.Y.pDPussspsh.........ap..ss.s.phu.VTls.......olDP.......pRsRLssltVph.l..........sl.s..........h.Dc..sG-.pshs.spWSVph.h............................................. 0 8 28 37 +10802 PF10970 DUF2772 Protein of unknown function (DUF2772) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as spore germination protein GE however this cannot be confirmed. 25.20 25.20 26.00 66.70 24.10 23.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.61 0.71 -4.04 10 140 2009-01-15 18:05:59 2008-07-29 13:38:11 3 1 138 0 20 94 0 122.30 52 94.64 CHANGED sShVpsh+lsSlGISSVlQlGDopcIsh+o+sLAVQRphslFassEG...ctcpFplFs.cPIPhPtsposVps.shhHEsPsIcVpul+lhGlSuSSllpIGSTstVpu-SRlKHIRQLhs.....Psup ...lSlVQNVsIlSLGIuuVFQVGDuNQhELKSRALAVHRElPsYl+sEG...+hDAFcIFTDEaITIP+.R.T.....TD....V+l.NIlNE.CPFIEVNNVclRTlLNSusFQIGNVDYVFsNSRIhQIRQaIT..P...up...... 0 4 12 14 +10803 PF10971 DUF2773 Protein of unknown function (DUF2773) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 40.00 40.00 24.40 17.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.69 0.72 -4.01 3 466 2009-01-15 18:05:59 2008-07-29 13:38:37 3 2 290 0 3 167 0 76.90 66 18.48 CHANGED AL+NAHTPsuLLTTLTEPcaRSLAMNNPQLAADVKTAWLKEDPSLLLFVEQPDLSLLRDLVKTGATRKIRSEARH+LEEKQ ...........ALpNtHTPsulLss.h-Pp.h.lAhNNPphssDVhpAhLKcDP.LhLhlspP-LsLlRpLshsGtTRtIRppAh++L-Eh.. 0 2 2 2 +10804 PF10972 DUF2803 Protein of unknown function (DUF2803) Pollington J, Finn RD anon Pfam-B_1049 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 33.30 25.00 19.10 22.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.32 0.70 -4.83 51 345 2009-01-15 18:05:59 2008-07-29 13:47:24 3 2 277 0 85 277 98 147.80 25 71.11 CHANGED hhtllsLlhlhhus......sAhA..................pphacVEVllFcpss......tpssEpWs.tps..hpsspshslhs..............................................................................................................................................................................ttt.hhLspsphpLsshtpcLpppssapsLlHhuWpQshh.scssu..........sl+lhuGcphsppa.............................................................................................................................................................hpl-Gslplalp+.....YLal-ssLplpp ...............................................................h....l.llhh..hhu.......suhA..................tt.aplElllFcpss............t.ssEpWs.tt...ht.pt...h...........................................................................................................................................................................................................hhs.pt.tL.......ss.hppLpppssapsLhHhAWpQs.t....ppu..........s.htl.uGpchttpa.......................................................................................................................................................................tl-Gplplhht+.....alhh-s.h.lp.......................................................................................... 0 20 38 65 +10805 PF10973 DUF2799 Protein of unknown function (DUF2799) Pollington J, Finn RD anon Pfam-B_1111 (release 23.0) Family Some members in this family of proteins are annotated as yfiL which has no known function. 25.00 25.00 31.90 31.10 23.20 19.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.42 0.72 -3.96 46 616 2009-01-15 18:05:59 2008-07-29 13:48:22 3 4 513 0 80 303 17 85.20 45 64.46 CHANGED ssWtphGhpDGhpGpstps.sphtctssp.sht..sshttYppGYpcGttpYCs.s.uYplGhp........................................GptYhGlCs.s.....pFtppYppGpppa .........DWYslGhcDAloGss.h..+.sscuau-s......p..ADpuhYhcGYtEG.pcchCpsDhsYthGLSG+sa...usCsssE.pAsph+csWQ+GtcE.p........... 0 14 32 52 +10806 PF10974 DUF2804 Protein of unknown function (DUF2804) Pollington J, Finn RD anon Pfam-B_1045 (release 23.0) Family This is a family of proteins with unknown function. 25.00 25.00 25.30 26.60 23.80 23.20 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.95 0.70 -5.59 56 302 2009-01-15 18:05:59 2008-07-29 13:50:15 3 3 259 0 114 274 21 317.20 26 94.31 CHANGED GpspaGthpu.spslshtchshhp.hcpshssht+hh+hKpapahslhssc..ahlulAluDlpYlusuFsYlaDhcssp..................hhppshlpPh.............shthphsspshpu..ps...papptphplph..p......................t...psthclp..............hsspslps-lplp..sststslslssPsuhs.G..a.saTpKpsulslpG.pl..plsscs................hs...hs.................puhAshDaotGah++cT..sWpWAohsuhhpt..........G..plGlNL.uuGhs-..TussENslWlsGchahL.sslpFcas+......ps.h.......t.........sW+lpS.....p.cscl...-LpFpPhspRp-+hNLhlltSsF+QhhGpFsGpl.hsss...GpplplcslhGhsEDHaA+ .............................h.s.hpt.spplshpch.hhp.hppslpu.......+hh+hKcapahslhssc..ahlulsluDlsYluhuhsYlhDh.pss.p.........................hh.ppshlpP...h......................ththphsps....shpu..ps.........ph.pp.tphplph...p...................................................p..puthplp.................hpst.slphchplp....ssstpsl...slssPhuhss....apYTpKps.slsspG..pl...phsspp.......................................hs....hs......tpuhAshDaupGhhphcT..tWpWu.uhsuhsp.....................G...tlGlNL.usGhs-....os..tsENsla.lsG...c..hphL.ssVpFchsp........................ts.h...............p................sWplpo.....t.sspl...-LpFpPhhpRp.pc..hNlhl...ltSsh+QhhGpFsGpl.thss...GpplplcshhGhsEcphA+.............. 0 52 83 103 +10807 PF10975 DUF2802 Protein of unknown function (DUF2802) Pollington J, Finn RD anon Pfam-B_1126 (release 23.0) Family This bacterial family of proteins has no known function. 25.50 25.50 25.50 25.50 25.10 25.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.14 0.72 -4.22 44 303 2009-01-15 18:05:59 2008-07-29 13:52:17 3 1 299 0 84 222 66 70.00 42 48.15 CHANGED lshGpcltclcpplp.plpp+lpcl.......p.ppDPpsthYscAsKhVphGAsl-ELhppCsLs+AEAELlhpL+pp .....................................luhGp+lpEhpchlp.pLs-+l.pcL...........E.ppDs.suphYo+AsKhVpLG.AslsELhpcCcLs+AEAELhhpLpp.... 0 22 43 66 +10808 PF10976 DUF2790 Protein of unknown function (DUF2790) Pollington J, Finn RD anon Pfam-B_1206 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Pseudomonadaceae. 20.70 20.70 21.20 32.40 19.30 18.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.86 0.72 -4.19 48 247 2009-01-15 18:05:59 2008-07-29 13:56:06 3 1 62 0 76 187 5 82.00 36 89.77 CHANGED hhlsshussAhApssssps.......................s.lppYcYGMpLDlA+Vluhos.ss..ssC.pVVPupMsYcDSpGch+tlpYpthGs.uCpp ..........................h.hlhshu.hAhAtpstptstt.....................stlcpYpYGMpLDlA+Vluho...sss..ssC.tVVPupMsY-DSpGph+slpYpshGs.uCs...... 1 5 19 46 +10809 PF10977 DUF2797 Protein of unknown function (DUF2797) Pollington J, Finn RD anon Pfam-B_1162 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 25.50 25.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.62 0.70 -4.73 54 434 2009-09-11 12:03:15 2008-07-29 13:57:29 3 2 422 0 141 419 288 227.10 37 82.58 CHANGED G+plslpasGtI..pClsCG+cTKKSasQGaC.........asChpcLA.....pCDtCIM+PE...tCHactGTC.R-PpWucptChpsHhVYLA..NoSulKVGITRpoQ..lPTRWIDQGAspAlsIhcVssRhhuGllEstLpcp..luDKTNW...RpMLKups-.slDLhspcspLhphlspplppl.......................h.thst.......ht...p..thhplsaPV.paPpKlpShNhDKsPplp....GsLhGIKGQYLIhDsG....VlNlR+asGYpls .............................................................GpplplpahGtI..hCspCG.c+TcKSau.QG.aC..................asCh.p.c.lA......pCDhCI.h+PE.....pCHact..GTC.R-spaucphChp...sHhVYLA....NoSslKVGITR..pop..lPTRWlDQGAsQAlPIhcVsoRhhuGhlEstLtpp..luD+TsW...RphLKGc.sp...slDLhthpppLhtt.h..sp.tl..tpl............................ttphu..........slp..l..pt...t.hplpYPV...pa...PpKl..p.Sh.N.l-..KsPhlp....GhLhGIKGQYLlh.DsG.......VINlRKasuYpl............................................................................ 0 34 85 122 +10810 PF10978 DUF2785 Protein of unknown function (DUF2785) Pollington J, Finn RD anon Pfam-B_1219 (release 23.0) Family Some members in this family are annotated as hypothetical membrane spanning proteins however this cannot be confirmed. The family has no known function. 25.00 25.00 25.20 25.10 21.60 24.50 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.76 0.71 -4.82 29 640 2009-01-15 18:05:59 2008-07-29 14:24:14 3 3 450 0 63 382 3 163.20 34 63.61 CHANGED LhhcIspttsD.ulFpRSFos...LllAhllptcpp.....h.hLosp......phpplhpphlpYhthEpDhRGalppcGWAHuhAHsuDhLspLsppsph.spschhhl..lpslh.phlpp.sthahssE--RLupslhshlpps.lspcplhthlpph.....tshsshpt..t...s.hhthhNhppFLpsLhlp ............................Lh.pl...p.tpt.ushpRoFss...LlhuhlLpscpp.....h.hLopcphpslhsQhlpYlshE+Dh+Gas.p.phGWsHuhAHuADhLsEllppsph.sppphcEl..hssLt.phh.+h.hohhaps.sED.RLspslhphl.ps.lpp-plhshl+sl......phs..cp......p.hhh.htNh+shLpplahp............................ 0 24 37 46 +10811 PF10979 DUF2786 Protein of unknown function (DUF2786) Pollington J, Finn RD anon Pfam-B_1231 (release 23.0) Family This family of proteins has no known function. 22.20 22.20 22.30 22.20 21.90 22.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.99 0.72 -4.24 127 678 2009-01-15 18:05:59 2008-07-29 14:24:43 3 4 601 0 146 563 51 41.60 35 15.60 CHANGED pKhLp+ItKLLthup.u..ss.pEAtsAhppApcLMtcaslcts ....pKhLp+Ip+LLthAp.u......ssttEAtsAhppApcLMtcaulst.......... 0 43 88 127 +10812 PF10980 DUF2787 Protein of unknown function (DUF2787) Pollington J, Finn RD anon Pfam-B_1249 (release 23.0) Family This bacterial family of proteins has no known function. 19.70 19.70 20.70 21.70 19.30 19.10 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.85 0.71 -4.19 38 284 2009-09-11 12:53:47 2008-07-29 14:26:16 3 1 121 3 38 170 13 107.90 36 89.71 CHANGED sppLtplLsphls......t.pt.ssuptlslNFRD.sYSA-pGGaHPVEIplp+.....psspWplsYITDFuYhG.shasELp+-LDFcFpst.sapthtG.hhslp...psp-LaplWppNFlsY.hshssac.lplos ............s...hpthLtthlp......p.ph.tpuctllhNhRD.sY.tcptGhHPVEltlpp.....pps.W.l.alssFuY.s..s.hspL-hpL.Fchtpt.haps.tGhhs.ltts.-st-LaplWpssFhta.lshpsap-lplT............. 0 12 15 27 +10813 PF10981 DUF2788 Protein of unknown function (DUF2788) Pollington J, Finn RD anon Pfam-B_1255 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 39.30 39.10 22.70 18.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.23 0.72 -4.17 33 319 2009-01-15 18:05:59 2008-07-29 14:26:59 3 1 319 0 80 181 25 51.90 46 72.55 CHANGED lhlsslhhaMuFIIaDLuK+SpAG+FGphllFhsLulGshGFlhKsllphhh .....hlsulhhFlGhhIhDlhKpupss+FGphIlaLVLhLGshGFlsKslIphhh... 1 16 42 63 +10814 PF10982 DUF2789 Protein of unknown function (DUF2789) Pollington J, Finn RD anon Pfam-B_1269 (release 23.0) Family This bacterial family of proteins has no known function. 20.40 20.40 21.50 69.20 20.20 19.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.49 0.72 -3.96 60 290 2009-01-15 18:05:59 2008-07-29 14:30:47 3 2 263 1 93 240 24 74.30 46 91.22 CHANGED M-psppshssLFpQLGLsssppuIcpFIspHp..LssslpLt-AsFWosuQtsFL+EplpcDA-WuEllDpLsstL .....M-psppshspLFpQLGLsusctuIcpFIspHp..LsschpLt-AsFWosuQusFL+EplpcDA.-WutllDpLshtL. 0 19 43 70 +10815 PF10983 DUF2793 Protein of unknown function (DUF2793) Pollington J, Finn RD anon Pfam-B_1370 (release 23.0) Family This is a bacterial family of proteins with unknown function. 21.20 21.20 21.30 26.60 21.10 20.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.98 0.72 -4.18 50 230 2009-01-15 18:05:59 2008-07-29 14:32:52 3 6 199 0 76 196 45 87.10 41 24.90 CHANGED HNEALphL...DuLVQLuVtutshssPPusPupGspalluuu.A.oG..A.WuGps.GplAhapsGu..WtFlsPpsGWtsaltsc....uthhl..ac..GusWt .......................HNEALphL...DulVpLuVpuhshssPPuoPA-GDRYIVuus.A.oG....A.WuGps.GplAta.s..Gu.....WhFlsPts..GWhuaVssE..............utlhl..ac..GusW............. 0 19 52 62 +10816 PF10984 DUF2794 Protein of unknown function (DUF2794) Pollington J, Finn RD anon Pfam-B_1384 (release 23.0) Family This is a bacterial family of proteins with unknown function. 21.70 21.70 23.70 42.40 19.70 16.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.81 0.72 -4.12 33 238 2009-01-15 18:05:59 2008-07-29 14:34:32 3 1 235 0 81 190 1084 84.10 62 68.45 CHANGED VtFcRpELspILsLYGRMVAuGEWRDYAIDhh+DtAlFSlF....RRuuEhPLYRIEKcPcLup+QGtYuVlutsG.pILKRGp-LtpVL ..sFcRpELstILslYGRMVAsGEWRDYAIDaL+D+AVFSlF....RRuuEhPLYRIEKsPKLtpKQGtYuVlussG.hILKRGH-LppVL................. 1 21 51 61 +10817 PF10985 DUF2805 Protein of unknown function (DUF2805) Pollington J, Finn RD anon Pfam-B_001474 (release 23.0) Family This is a bacterial family of proteins with unknown function. 20.80 20.80 20.80 55.90 19.30 19.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.45 0.72 -4.03 58 227 2009-01-15 18:05:59 2008-07-29 14:35:38 3 1 217 0 75 214 1538 70.80 51 81.03 CHANGED lsRIIEMAWEDRTPFEAIctpFGLsEppVIpLMRppLKsuSF+hWRKRV.oGRpTKHtth+s....ths...RhhsssQ..h ....sRIIEMAWEDRTPFEAIctpaGLsEs..pVIplMRppLKsuSF+hWRKRV.oGRpTKHtphRs......pht.....Rhhs.pQ................ 0 28 57 68 +10818 PF10986 DUF2796 Protein of unknown function (DUF2796) Pollington J, Finn RD anon Pfam-B_1354 (release 23.0) Family This bacterial family of proteins has no known function. 25.50 25.50 25.80 29.10 24.30 25.40 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.00 0.71 -4.27 46 343 2009-01-15 18:05:59 2008-07-29 14:37:18 3 2 245 0 71 281 191 133.20 31 85.86 CHANGED tsAH.HGtuclslAh-G.ssL.lplpuPuhslVGFEHAspocp-+ttlspAhspL.ppstpLFslssuAsCplppsplctshhtt.t.t...................+-+csHD....................+pp........................................pHu-hpupYpapCssssp...Lpplshs.hFppFPuscclpVphlsssuQpuscLosssspl ..........................tAH.HG.splsls.-G...ppL.hchpuPuhslVGFEHsspsstpcttlttAht.L.tps.tlFsls.tAtCphtp..lptshhsc.ptpc.t.c.....................+-+.c.tHD.............Hptc...................ccptpccttt.........................................pHu-hpspYphpCtt.t....Lptlpht.hFphFPshpplpsphls.ptQtuhplt.tps.................................. 0 19 39 58 +10819 PF10987 DUF2806 Protein of unknown function (DUF2806) Pollington J, Finn RD anon Pfam-B_001505 (release 23.0) Family This bacterial family of proteins has no known function. 24.50 24.50 25.10 24.80 24.40 21.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.22 0.70 -4.85 32 239 2009-01-15 18:05:59 2008-07-29 14:37:38 3 1 199 0 53 155 23 210.80 36 73.28 CHANGED tpRutpRtptpttppQpNlEsIhthAhsh...s.s-ssucs..hDsDWltpFhphAccIpsppMQpLWu+ILus.ElppPGoaSh+oLpsL+phTp+EAphhp+ssuhusphss-tp.cll.uhh.t.s.hphhpc.sspslsluphGlsauslLpL.-lGllptsE.....hEouplshsptlphphpspshpLps+pss.........lhhsYY+hTssGsELsp........Llss.........cssppYhctLts .......................................h.pRAhpRtpppptp+QpNlEpIhthAtsp......scs-..ssu-s...DpDWlh+FF-hApcIpNspMQcLWAplLtp.ElsNPGshSh+sLchL.psMT.KEApllp+ssuhusphGu.-pph+ll.Ghh.t.....................uhhshsc+hsspsl.slupatLsYuSLLhLh-lGLlpssE......hEoG.cl..phcssl.hls.Y.pGpsh...p.....Lpspscs.........lplhYY+FTssGsE.Lsp........LlGs.........KsstpYa-tl..s............................................... 0 10 23 39 +10820 PF10988 DUF2807 Protein of unknown function (DUF2807) Pollington J, Finn RD anon Pfam-B_001516 (release 23.0) Family This bacterial family of proteins shows structural similarity to other pectin lyase families. Although structures from this family align with acetyl-transferases, there is no conservation of catalytic residues found. It is likely that the function is one of cell-adhesion. In PDB:3jx8, it is interesting to note that the sequence of contains several well defined sequence repeats, centred around GSG motifs defining the tight beta turn between the two sheets of the super-helix; there are 8 such repeats in the C-terminal half of the protein, which could be grouped into 4 repeats of two. It seems likely that this family belongs to the superfamily of trimeric autotransporter adhesins (TAAs), which are important virulence factors in Gram-negative pathogens [1] [2]. In the case of Parabacteroides distasonis, which is a cmoponent of the normal distal human gut microbiota, TAA-like complexes probably modulate adherence to the host (information derived from TOPSAN). 21.90 21.40 21.90 21.50 21.80 21.30 hmmbuild --amino -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.03 0.71 -4.75 118 860 2012-10-03 02:33:51 2008-07-29 14:38:36 3 13 349 33 304 1003 203 162.60 17 72.10 CHANGED sFsplp.lssshslhlp.p...u.spp.plplpuscsl.hsplpspl..c...sspLpIphc......cshsssp..............h.......hlplshss.Lptlshsuu.uplpsps......................lpspphplph............sGuGslplp.lpspplpsphsGuuslp.ls....................Gpspphpl.pls......GuGplcupsLp.sppsplplsGuGshplt.sspplcupls..GuGsltht..GsP ......................................................................................................................................................aptlt.ht.s.sh.pl.hh..p....u....st...pl...l....p..s...t.p..p.h..hp.l....p.hth...p......sstLh.lthp.........................pthp..t..................h........lhl......t...h...p...l.ptlp.h....s.u...s..uslp..hps...............................................................l.p.s..s.p..h.p.lph................................sGu......usl...p.h...p...l..p.s.s...p.l.php.h..s.G..u.uslp..lp.......................................G.p..s.p.phph..phs..........Gsu...pl..ps...t.p..lt....sppsp.h.ph.pusuphph.....s...p...t..p..l...p..hp...ht....usupl.h.s........................................................................... 0 98 229 290 +10821 PF10989 DUF2808 Protein of unknown function (DUF2808) Pollington J, Finn RD anon Pfam-B_001529 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 22.20 22.20 22.30 23.00 22.00 22.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.66 0.71 -4.58 57 175 2009-01-15 18:05:59 2008-07-29 14:42:16 3 6 79 0 83 185 160 141.50 22 69.84 CHANGED uhh.F.......s.ps.pplphhpspsttp..ptspahhhlhspcttpultplslsh...Pctacs..thpspplplpthp.s.........tpplP...stsphsp...s....spslplh.spPlssG.sslslshpslh.N..PptsGhYpFsspuhssGss..PhstalGohplsl .................................................F....sps.phlphhts.stht..tsphYhhhhhspstspsltplslsh...spthcs....hphptlpspsh..t....................spsls...tpsphsp...c....spslslhhspPlssG.sslslslpslp.N...PphsGhYhFsspshssGps..shshalGshhlph............ 0 16 57 79 +10822 PF10990 DUF2809 Protein of unknown function (DUF2809) Pollington J, Finn RD anon Pfam-B_001533 (release 23.0) Family Some members in this family of proteins are annotated as yjgA however currently no function for the protein is known. 23.50 23.50 24.70 24.30 22.90 22.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.96 0.72 -3.58 49 416 2009-01-15 18:05:59 2008-07-29 14:43:19 3 4 395 0 103 363 9 94.20 33 68.37 CHANGED GLhs+p........salpsahGDlLhshhlahhlthlhst............shhhluhhsllhuhhlEhhQlapsshl.sl+ssplutllLGssFsWhDlluYslGhh ..................................h.s+c........shlpsYlGDsLaslhl.ahhhthlhsph...................pstpluhhuLhFshhlEhuQLapsshl.slRss...slutllLGp..sFsWtDlluYslGs........ 0 38 73 87 +10823 PF10991 DUF2815 Protein of unknown function (DUF2815) Pollington J, Finn RD anon Pfam-B_002212 (release 23.0) Family This is a phage related family of proteins with unknown function. 21.70 21.70 22.70 22.30 20.90 20.60 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.13 0.71 -4.74 20 443 2009-01-15 18:05:59 2008-07-29 14:50:00 3 2 386 0 55 299 47 172.00 42 92.86 CHANGED TKVlTsp.VRLSYs.plaEPcuh.sGp.....csKYSloll.......IPKuDspTlpuIcpAIcsAhcEGh.uKh...GsKhs..sslKhPLRD.GDh-ps.D-..tYss......saFlNAsS+p...+Ptll.....D+sspP...............l...h-psEVYSGCYuRsSIsFYAasssGNKGIusGLsNlQhl+DGEsLG.G+s.sAEDDF-slp..s-DD .................................................................................................hKVlTGp..VRhSYs.plaEP+oh....pGp.....csKYSloll.......IP...Ko.D...sp...Tl.....p.......tIcp.AI-sAhc-..Gh..uKh......Gtpls.........usLK.h.PLRD.GDh......E...Rt.....-..........D....sY....ts......saFl..NAsSpp.....tP.tll.........Dpstp....................l....h-puclYSGsYsRsSI..s..h..au...a.N..o..NGN....KGIAsGLsNlQhlc..DGEsLG.Gts...uA-DDFsshsps............................................. 1 28 46 52 +10824 PF10992 DUF2816 Protein of unknown function (DUF2816) Pollington J, Finn RD anon Pfam-B_002257 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 37.80 36.10 20.40 17.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.24 0.72 -4.38 12 70 2009-01-15 18:05:59 2008-07-29 14:53:14 3 1 3 0 60 73 1 63.00 55 23.64 CHANGED EYEppppp..VPhp+hlTDYYsVEYpTEYIPQshhEK.lEYVPV-+ht-RV-YhsVERpsshp .EYEEpppp..VP+E+hVTDYYAVEYQTEYlPQVh.EKhsEYVPV-RhpERVEYhsVERQVV+p...... 0 60 60 60 +10825 PF10993 DUF2818 Protein of unknown function (DUF2818) Pollington J, Finn RD anon Pfam-B_002280 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.20 25.20 22.80 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.20 0.72 -3.49 21 217 2009-01-15 18:05:59 2008-07-29 14:54:45 3 2 214 0 65 167 17 94.20 50 91.41 CHANGED uulWlllLLALluANLPF.lspRlhslh.Phts......KshhhRLlELllhYhlVGhluhhLE.pphGpltsQGWEFYAIThsLFllhAFPGFVYRYLh++ .....................uhahlllLALlsANLPF.LspRLFull.Pl+p............Ksh...hh+lhELlshYhlVGsLuahLE.uRuGshasQGWpFYAlThsLalVFAFPGFlaRYhh+................................... 0 12 36 51 +10826 PF10994 DUF2817 Protein of unknown function (DUF2817) Pollington J, Finn RD anon Pfam-B_002258 (release 23.0) Family This family of proteins has no known function. 19.60 19.60 19.60 19.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.95 0.70 -5.36 32 141 2012-10-02 19:46:12 2008-07-29 14:55:34 3 3 121 0 72 727 173 323.40 32 89.81 CHANGED F.up.oYspARp+FLsAAcssGs.....plpsashsh..GssGEsLuhDlAhhGsscApp.lLlloSGsHGlEGasGSusQlshLp-t..hspshssssul.LhlHAlNPaGFuahRRssEcNVDLNRNFl.......DaspshssNssYsclpshLlPsphsss...tt..tptslhphhtp+GhtshppAlssGQYpaPcGlaaGGptssWSstsL+pllpcah.sss..c+lsaIDlHTGLGshGtsphltts..pcssshp+spphaGst....ltshhss......sususshpGhhhpu...h..hphhsshp.hsulslEF.....GThsshpVhpALps-pWLah.................asc.sss.sptttl.+pplt-AFYPssspW+ptVltp ..................................................................................................................FupsYtpARp+FlsAA...pstut.....tlppa.pst.....G.....ssGEsLsh...Dl.Ah.......hGs.t.c..A..p+..lLllsS.GsHG...V...E..G......asGS.us..Q.....l.....s..h....Lpct..........hs...p...th....s........s...ss...ul....L.....hl.HAl..NPaGa..A..a..h..R..Rs.sEcNVDLNRNal........Da.s.p..shss....N...ssYtpl.p....shLhPsphsss...tt...stttl.tphhtpcG.suhppAlstGQYphPcGlFYGGptssWSptslcpllpcah..uss.......pclshIDlHTGLGshGtsph...lhts...psssthpcAt..p..h.aGss.............ltshhsu.......sususshpGhh...hps...h....hphh.s...ptp...hsslslEF.....GThss.......p...h..hpALp......s-pWLah.................a.sc.sts..sttttl.+ptlt-uFYs.ssssW+thll......................................................................................... 1 19 37 62 +10827 PF10995 DUF2819 Protein of unknown function (DUF2819) Pollington J, Finn RD anon Pfam-B_002304 (release 23.0) Family This bacterial family of proteins has no known function. 19.10 19.10 19.80 20.60 17.80 17.50 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.60 0.70 -5.67 20 554 2009-01-15 18:05:59 2008-07-29 14:56:58 3 1 539 0 47 328 5 305.80 70 59.78 CHANGED hstp+tlu.GLAsLpp.spsphtapVsaWpscpGVsuspphhLptsssshhhsssppsstpst.......tsDEcRlhsspssl-tus.lPppWplh-sNssLhstups..spAATllFuhsppspl-tLApplapLRRppGstLKIlVREs.pssLRtsDEpLLLssGANLllstssPhSRhLThIEulQGQhFoR.lPpDlcsLLsh.spshth+GYlssssFsptVpshhssohhsp.lptsLVpLphlPGlpstpsLpLC+hRRsGDlsTsssspLYLFLpACRhNDl-sALspIFclPlsDlFsschlatsptpIpuElppl .....................hpEYRSLF.GLASLRF.QGDQHLhDIAFWCNEKGVSARQQLslpQQsshWTLsppEEstIQP.........RSDEKRILSsVAVLEG.APPLSE.HWpLFsNNEsLFN-ART..AQAATlVFSLpQNsQIEPLARsIHTLR..RQRGS..AhKIlVREN.sASLRATDERLLLuCGANMVIPWNAPLSRCLThIES.VQGQpFSRYVPEDITTLLSM.TQPLKLRGFQp...WDsFCsAVpsMMsNsLLPAcGKGVLVALRPVPGIRVEQALTLCRPNRTGDIMTIGGNRLVLFLSFCRlNDLDTALNHIFPLPTGDIFS....NRMVWFEDcQISAELVQM...................................... 1 5 11 29 +10828 PF10996 Beta-Casp Beta-Casp domain Mistry J, Wood V anon Manual Domain The beta-CASP domain is found C terminal to the beta-lactamase domain in pre-mRNA 3'-end-processing endonuclease. The active site of this enzyme is located at the interface of these two domains [1]. 20.30 20.30 20.50 20.80 20.20 19.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.83 0.71 -4.19 183 2338 2009-09-11 08:10:03 2008-07-30 13:33:19 3 44 1375 45 1245 2305 359 124.90 25 21.06 CHANGED sQEllhh.......Lpchhp...ptt...........................hslalDSshuhcssplap..pa..chhs...p.h....pphht.tt...................................h.ph.phh..............cshcpscp.........l..p..t.t.P........s...lIluuoGM.hsuG....hlhcalcphhscs+Ns.llhsG...aQupGThG......+pl .......................................................................................sQELlhh.......Lcphhpptt..............................................tphPlals..SshAtcssplap..pa..hp..hhs.ppht............cphhttpt........................................F...p..h..phl.........................................pshccppp...........l...tt.t...t..P............slllAusGM.h.puG...hshcah.c.ph....t....s....c.t+....Ns.llhsGYpspGThG+pl................................... 0 446 759 1049 +10829 PF10997 DUF2837 Protein of unknown function (DUF2837) Pollington J, Finn RD anon Pfam-B_002349 (release 23.0) Family This bacterial family of proteins has no known function. 20.40 20.40 21.80 21.10 19.60 19.60 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.45 0.70 -5.22 23 271 2009-01-15 18:05:59 2008-07-30 15:56:18 3 3 243 0 64 198 15 241.70 42 92.79 CHANGED LThlIHlIsTLAYusRluGVRTt+lAsAhSLFNlhsLlSRsuNhhQuPLlupll-pulp.....................tpthusLtsphRh.llhuATlGTllGhlLlPTFlplFs+AIhth.cpsGSVP+Lhh+sh..ohpslcphcpplplPphp.lcpht...hppIP+cllllNhllouIYTlGVLuALYAGhLsPE.aRsTAutLSullNGhATILhslhlDPphSllTDcslcGcpspt-l+phshhlsho+llGTLLAQhlFlPuAhhIsalsc .........................hThlIHsl-TluYulRLuGVRsp+IAlAlSlhsllhLlSRTuNhlQuPLlGtlVDpuhh......................sss....hsL.t.hRl.lLluAolGTlluhlLhPThh+lFuhsIpph.-ssGS.h.+hhhpsh..sh.p.tl.+hh+c..hl+hPphphlcplp...htsIPK+lhllNhhsTuIYTsG.VLSuLYAuhLh.P-.atosAsshSGlINGhATILLslhlDPplullTDcslpucpstpshpphhshLhhoRlhGT.LLAQllhlPuAhhIhalh.p....................... 0 24 47 53 +10830 PF10998 DUF2838 Protein of unknown function (DUF2838) Pollington J, Finn RD anon Pfam-B_002381 (release 23.0) Family This bacterial family of proteins has no known function. 25.20 25.20 28.20 28.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.86 0.72 -10.55 0.72 -4.18 23 224 2009-01-15 18:05:59 2008-07-30 16:17:06 3 5 177 0 166 216 8 108.90 41 24.04 CHANGED -Klsashulhslh....hsualhGthPphFhhaYTshhhhhMPlRaYoYpKpsaHYFLsD....hCYFlNhLsLlaI.....W.........laPpStpLFlssasluhGsLuhAlIsWRN.SLVhHSlDKlTS .........-KlsahhGVh..slh......hsualhGthPphhhhaYol.hhhhhPlRa.as.Y++..p...saHYFLhD....................hCYalNh...Lhllhl......W...................haPp..SppLFhs.sasluhGsluhAl.lhWRN.SLVFHShDKlTS.................................. 0 67 116 150 +10831 PF10999 DUF2839 Protein of unknown function (DUF2839) Pollington J, Finn RD anon Pfam-B_002396 (release 23.0) Family This bacterial family of unknown function appear to be restricted to Cyanobacteria. 21.90 21.90 22.90 23.50 21.40 21.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.45 0.72 -3.94 37 107 2009-01-15 18:05:59 2008-07-30 16:21:36 3 2 77 0 38 103 267 65.90 40 86.86 CHANGED MGEAKRRppp.GLsP+ptcppp.........hlshlPlocpQsp.hhtloh+usWlGIuhLslhWlslphl....GPssuWW ........MGEAKRRcph.sLss+ttpppp...........llsWlPlT+sQsc.hhphos+uuWlGIuhhshhWlslRhlGPshGWW........... 0 6 25 36 +10832 PF11000 DUF2840 Protein of unknown function (DUF2840) Pollington J, Finn RD anon Pfam-B_002399 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 25.50 55.60 21.70 19.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.87 0.71 -4.66 34 168 2009-01-15 18:05:59 2008-07-30 16:25:56 3 1 109 0 89 180 13 146.90 51 87.02 CHANGED LTcVELsWlEKRlEpWIRFG+sspEpllDRpRRlsuFtPuulFAFVRWAuNDaGTllSRlDIlRAlsPG-uaQTlPaV+PGG-lLL+lpGWPKVEcVLptIDAlEAlGIDPs-suPDaWRHVHNRLuAut.PRsYTt-RHtAWLtRRpl ................................LT+VpLsalEcRlEhalRFGcsucpphL.....DRpRRlssFtPGulFuhVRWtANDaGTlhSplDIlpAssPs-shQTlPhVcPGu-lLL+hcGWs+VcpVLptIDAIEAlGIsPs-VuPcaWRcltNRLuAs.s.psYTtERHtAWLtRRt.h...................... 0 9 54 74 +10833 PF11001 DUF2841 Protein of unknown function (DUF2841) Pollington J, Finn RD anon Pfam-B_002409 (release 23.0) Family This family of proteins with unknown function are all present in yeast. 23.50 23.50 24.30 30.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.87 0.71 -4.27 34 181 2009-09-11 16:39:17 2008-07-30 16:29:38 3 2 115 0 128 184 0 134.50 39 28.24 CHANGED L.l.uss.ptlhpaYppsFcplQQhsC+tIAKAaIKllEP+KQspa.PYsttc..............poKPsWWP........ssVpH+EPDHLpK....................-RlpLLlaIlpphh.............hhsst+Lcpsstcstpt.hps.......t+hplLcEIacVtchE .........pluspptlpsYYcpuFcshQQhsC+tIAKAaIKllEP+KQspa.PYsGuc..............poKPsWWP........psVhH+EPDHLpK...................s-RlcLLlHIlpplht............htl.ssc+Lc-sstsspp..p...hps.......c+h.lLcEIacVtc..................................... 1 19 56 103 +10834 PF11002 RDM RFPL defining motif (RDM) Mistry J, Bonnefont J anon Manual Domain The RDM domain is found on RFPL (Ret finger protein like) proteins. In humans, RFPL transcripts can be detected at the onset of neurogenesis in differentiating human embryonic stem cells, and in the developing human neocortex [1]. The RDM domain is thought to have emerged from a neofunctionalisation event. It is found N terminal to the SPRY domain (Pfam:PF00622). 27.00 27.00 45.00 58.70 22.70 20.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.76 0.72 -4.29 23 72 2009-01-15 18:05:59 2008-07-30 16:34:42 3 3 20 4 24 79 0 42.00 65 13.91 CHANGED VVSQKsDI+PshQLGpLVS+IKELEPpL+slLpMNPRM+KFQ .VVSQKNDI+PshpLttLVS+IKELEPKL+plLpMNPRMRKFQ 0 6 6 7 +10835 PF11003 DUF2842 Protein of unknown function (DUF2842) Pollington J, Finn RD anon Pfam-B_002411 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 30.20 30.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.13 0.72 -4.11 51 214 2009-01-15 18:05:59 2008-07-30 16:46:55 3 1 213 0 71 172 148 61.50 37 83.93 CHANGED RKhluhllLllhlslYhllusslsshhh....p.shhlphlhYlllGllWl..LPhthlhphhsps- ...RKhlGsllLlshlhlYullAsslAsthlh....ssshWscLlaahlsGllWl..LPAhslltWMupP..... 0 17 44 53 +10836 PF11004 Kdo_hydroxy DUF2843; 3-deoxy-D-manno-oct-2-ulosonic acid (Kdo) hydroxylase Pollington J, Finn RD, Eberhardt R anon Pfam-B_002426 (release 23.0) Family This is a family of 3-deoxy-D-manno-oct-2-ulosonic acid 3-hydroxylases, which catalyse the conversion of 3-deoxy-D-manno-oct-2-ulosonic acid (Kdo) to D-glycero-D-talo-oct-2-ulosonic acid (Ko). It contains a potential iron-binding motif, HXDX(n)H (n>40). Hydroxylation activity is iron-dependent [1]. 25.00 25.00 38.70 74.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.69 0.70 -5.41 13 168 2009-09-10 20:14:51 2008-07-30 16:52:13 3 2 164 0 50 124 11 277.40 55 93.40 CHANGED cWsspss.sc.ttshhstLEpGKVLaFPcLsFsLsscEc.shLcPsh........sDsKpKNIShcPppGsl+G............ssscssstttlpullsRapppstsLlppLLPpYsssL+husTShR..PsphtsRssSWRKDDoRLHVDAFPSpPshGcRILRVFoNINPcstPRlWRlGEPFcshA+RFl.Ppsps.hP.hpuhL...........LptLtlTKph..........RSpYDHhMLpLHDtMKuDh-YQ+suPQ.pphsFPPGooWlsFSDQssHAsMSGQaMLEQTaaLPscAhtcsppSPLplLE+LsG+sL ..........t.Wstp...ss.tpphltuLEpGKVLaFP+LpFslpstEc.tLLDPsl........sDsKRKNIShcPptGsLpG............VsG.D...uss..utlRsLlsRatppAtsLVssLhPpYp...stL+sAsTSLR..PhpltsRpTSWRKDDSRLHVDAFPSRPNYGERILRVFTNlNPpGtPRlWRVGEPFpslA+RFL.Pplcs.sP.hsAWL...........hchL+lTKp...........RStYDHLMLpLHDtMKADL-YQKsuPQ.QshsFPPGSsWlCFSDQssHAsMuGQFMLEQTFaLPVpuMtpsppSPLtILE+LpG+sL... 0 9 26 37 +10837 PF11005 DUF2844 Protein of unknown function (DUF2844) Pollington J, Finn RD anon Pfam-B_002433 (release 23.0) Family This bacterial family of proteins has no known function. 21.20 21.20 21.90 40.70 20.30 16.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.63 0.71 -4.36 22 138 2009-01-15 18:05:59 2008-07-30 16:55:01 3 1 72 0 46 122 8 136.00 42 85.09 CHANGED ApAsLGusssossusstshst.........................sttssssssuYol+phs.usGT..........slREYsssuGhVFAluWpGPshPDLssLLGoYhspahsuspt....tttsttshtVpssDlVlcouG+htsasG+AalPshlPuGVsss-l ........................ApAtLGGsshossAsststss.........................ssuussss..suYTl..RphshuuGT..........sl+EYsosuGsVFAluWpGPshPsLssLLGuYFPpYpuuspssc....tA+uspssstVssuslVlcoGG+MtuasGpAWLPsALPuGlossDI... 0 5 18 29 +10838 PF11006 DUF2845 Protein of unknown function (DUF2845) Pollington J, Finn RD anon Pfam-B_002437 (release 23.0) Family This bacterial family of proteins has no known function. 21.00 21.00 21.30 21.10 20.80 19.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -9.88 0.72 -4.15 40 206 2009-01-15 18:05:59 2008-07-30 17:05:35 3 2 125 0 59 181 22 83.40 32 79.66 CHANGED sssuohRC.s.sslVohGDsth-VLtKCG-Ps...............p+s.hststst...............tthsttspsplE......cWsYshGP.sphhphlpFcsG+LscIcstt ............tssohRC.G.spLVstGDsth-Vl....t+CG...pPh...............s+cshshshsh...........................sthh...ptptsplE...........cWlY..GP....sshhhhLpFcsu+LhcIcst.t........................... 0 21 34 50 +10839 PF11007 CotJA Spore coat associated protein JA (CotJA) Pollington J, Finn RD anon PRODOM Family CotJA is part of the CotJ operon which contains CotJA and CotJC. The operon encodes spore coat proteins. Interaction of CotJA with CotJC is required for the assembly of both CotJA and CotJC into the spore coat [1]. 25.00 25.00 26.10 25.90 22.80 20.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.52 0.72 -4.30 50 348 2009-11-09 12:01:11 2008-07-30 17:14:02 3 2 319 0 62 234 1 37.80 39 49.19 CHANGED htlAhAYVPhQpap.s..hY.ssccALppGTlFP-Lst.........Pa ......p.sphYlsaQ..hs.s...a.sPc-ALp+.GTlaPsLhcPY........... 0 31 48 53 +10840 PF11008 DUF2846 Protein of unknown function (DUF2846) Pollington J, Finn RD anon Pfam-B_002451 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as lipoproteins however this cannot be confirmed. 34.30 34.30 34.30 34.30 33.90 34.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.35 0.71 -10.54 0.71 -4.26 42 198 2009-01-15 18:05:59 2008-07-31 09:09:09 3 2 175 0 66 183 26 121.40 24 62.95 CHANGED hloGC...............tstsssphpphtssp.....sp.utlYlY..R.suhhu......s..uhphsl.alsuphlGp.htsssahah-lssGp.aplus...cpthts................splslss-uGcsYalc.p.phss...............hsGsssl .............................................hsGC.............hhstt.utp.htthpssst..........sp.AslYlY....Rssohhu......t....shthsl.a......lsu..ch....lGp.htsssahah-lssGp.aplus......ppthts................pplslsscuGpsYal+.p.p.sh...............hsG................................................ 0 17 33 52 +10841 PF11009 DUF2847 Protein of unknown function (DUF2847) Pollington J, Finn RD anon Pfam-B_002453 (release 23.0) Family Some members in this bacterial family of proteins with unknown function are annotated as YtxJ, a putative general stress protein. This cannot be confirmed. 23.10 23.10 23.10 55.60 22.50 23.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.18 0.72 -4.44 34 477 2009-09-11 05:12:55 2008-07-31 09:20:33 3 2 462 1 92 238 54 101.40 49 90.37 CHANGED hp.LpohpphcplhcpotppssllFKHSTsCsISphuhppacphhstppp..lssYaLclhptRslSNpIAccasVpHcSPQllllcNGpslacsSHhsIstssLp ............KLooI-paEpllEc...Nch..hhlhKHSpTCPIStsAY...cpFppah.Ecc...hcuYYLhVQppR-lSshIAcchsVKHESPQshYhhsGchVWNssHtsIshsuL.s..... 0 34 68 84 +10842 PF11010 DUF2848 Protein of unknown function (DUF2848) Pollington J, Finn RD anon Pfam-B_002463 (release 23.0) Family This bacterial family of proteins has no known function. 23.40 23.40 23.40 48.00 23.30 22.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.13 0.71 -4.83 37 154 2012-10-02 17:33:27 2008-07-31 09:27:34 3 2 143 0 64 156 32 191.50 44 83.57 CHANGED AGWTGRDpsAlp+HIsELstLGVs.PussPlaYRVusshLopuspl.pV.GscoSGEsEhlllps...puplaluluSDHTDRclEsauVuhSKQhCsKPlupphWchs-Vts.HWDpLhLRua.ht.s..G.ppsLYQ-GoLuulhsPs-Llsths...sps.......hss...GsuMhsGTlssh.Gsl...psAspachELpDPstsRolcHuY .............AGWsGRDtsAlpcHIcELt.t.LGVstPussPhaYRVusshLTQuspl.pVh.Gs.coSGEsEhl.Llps...sGchhVuluSDHTDRclEsauVshSKQhCsKPluppsWch.s-Vts.HWDplhLRoa.hs.s..G.-.csLYQ-GsLuulhsPp-Llpphs...tts.........thss.GsuMhsGTlus....h.Gul...csuspFchELpDPlhsRolcHsY............................. 0 14 28 48 +10843 PF11011 DUF2849 Protein of unknown function (DUF2849) Pollington J, Finn RD anon Pfam-B_002560 (release 23.0) Family This bacterial family of proteins has no known function. 22.50 22.50 24.30 33.10 22.20 17.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.99 0.72 -3.74 52 228 2009-01-15 18:05:59 2008-07-31 09:33:50 3 2 227 0 69 182 40 89.40 38 80.45 CHANGED sKVlTANcLh-GsVVahsusspWoccls-AtlhscctpuphhLttu..ttpsspVVGsYls-sp.sssG.scPs+..hREphRspGPoshhctp ...KVlTANRLhDGpsVWLsAs...GpWscslspAhlhccs-ssssL.ttu...sspsstVVsshllDVc.pp.sG.....lhP..h+...lRE+IRt.pGPTlhs...s.... 0 19 44 52 +10844 PF11012 DUF2850 Protein of unknown function (DUF2850) Pollington J, Finn RD anon Pfam-B_002587 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Vibrionaceae. 21.00 21.00 21.30 21.20 20.70 18.30 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.48 0.72 -3.80 16 208 2009-01-15 18:05:59 2008-07-31 09:42:15 3 1 111 0 20 110 1 78.70 42 55.31 CHANGED plYGpWlE.ssssYAs-phsLospGVhhNsRlluTsF-FDGphlpapsGsshhhYphssstps.pl+phpPuaY.shFhh ...clYGpWlE.ssssYts-plplsppGVhhNsRLloTpF-FDGstlphpsGsthaha-l.sptps..QhKpppss.s...hFh......................... 0 2 5 15 +10845 PF11013 DUF2851 Protein of unknown function (DUF2851) Pollington J, Finn RD anon Pfam-B_002589 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 40.10 35.00 18.50 18.30 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.19 0.70 -5.73 27 226 2009-01-15 18:05:59 2008-07-31 09:46:59 3 2 217 0 67 232 79 357.70 37 86.43 CHANGED MpEchLHalWpa+hFstps....LpTTpGc.lpllssGhhNpp.uGPDFFNA+lcIsup.hWsGNVEIHh+SSDWahHpH-pDsAYDsVILHVVaEpDs...pI.hRpsss.lPsLhLpthlspplhcpYppL.....htpcp..hlsCtspltslsshhhpsW....l-+LhhERLEpKsphlppLLppspscWEslhaphLs+sFGhplNucuF.plApslsFpslcK.......ppps.hplEALhaGpAGLLp.....tc...pD....pYhtpLpcEapaLp+KapL..pshssp.hpFhRlRPsNFPolRLAQLAsLap+p........psLFSplh.pspohp-ltplhp.lps.............SsYWcsHYsFs+topp+sKp...lucshl-lllINTllPlhasYupptupcphp.phhphlpplpsEpNs .....................EphLHYlW+a+hFshps....LpTT....sGpslcllcsGhhNps.AGPDFFNA+l+I.ssp.lWsGNVEIHh+uSDWatHtHcpD.sYcsVILHVVh....pt..Ds.....pl.h.c......p.sGp.lPpl.Lp..l.....s.pplhppY..c..pL......htspp.......a.sC..hph..lss.lsphhhpsW....hstL.hERLEpKsptIpphLppspssWEpshFhhLA+sFGaslNu-AFcphAppl.Ph.pslpK.......p+s...sh..hQlEAlhhGpAGLLp..........pp.....pD.......sYhtpLp.cEapaLp+KapL...pshssphW+FhRLRPtNFPplRlAQLAtLatpp..........psLhSplh.pspslpplpplhp.sps................SsYWpsHYpFsp.St.ppp.Kp...lupstlslllINTllPhhasYupppspcth.p...pcshphLcplpsEpNp............................................ 0 32 62 67 +10846 PF11014 DUF2852 Protein of unknown function (DUF2852) Pollington J, Finn RD anon Pfam-B_002611 (release 23.0) Family This bacterial family of proteins has no known function. 22.50 22.50 23.20 41.40 22.30 22.40 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.60 0.71 -4.13 35 197 2009-01-15 18:05:59 2008-07-31 09:51:09 3 1 178 0 58 151 19 110.90 47 81.94 CHANGED shLDscG+sAWIAuMVLGFIhFWPlGLALLuYhIWS....+RMh..sp................uspscptp.t.t+hth.ht...............hpoSGNsAFDsY+t-TL+RLEEEQc.......pF-sFLcRLRcAKDKpEFDpFMs-Rpp ...................s.hlcsthpsAhIAhMVLGFhl.FWPl....GLAhLAYhlau......p+ht.thpc................spttcph.h.th...cptptp.s..................hsooGNsAFD-aRp-pLcRL-EEp+.......-F-sahpcLR+A+DK-EFDpFMs-Rc.s.............. 1 11 31 41 +10847 PF11015 DUF2853 Protein of unknown function (DUF2853) Pollington J, Finn RD anon Pfam-B_002619 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 31.80 31.70 21.30 19.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.36 0.72 -3.56 29 185 2009-01-15 18:05:59 2008-07-31 10:25:34 3 1 180 4 55 137 20 101.30 48 93.30 CHANGED scYhsDlKccsut..sDh-llpKlstuhGPuIYNpDuuhVuuSDtpEL-pV+pNalhKKLGL.sD..sscLcsuIppVhEphGtopRsKaRsshYYhLsK+hsKpSs ...........s.cYhsDl+chsus..sD.sllpKlsptLG.AlhNpDuShVSuSDscELcpV+sNalhKKLGl.pD..sschDcuIscVsEshtss.RNKaRsshYYLlAKphGK.t.h.......... 0 12 40 48 +10848 PF11016 DUF2854 Protein of unknown function (DUF2854) Pollington J, Finn RD anon Pfam-B_002643 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 26.30 35.40 20.00 24.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.95 0.71 -4.67 23 116 2009-01-15 18:05:59 2008-07-31 10:33:03 3 2 100 0 54 115 127 144.00 44 69.27 CHANGED lGhlAYhs........ssssLSLsshFYGlPlLLGGLALK......uuELpP.shptssssplhtlR-p.ATsp.splhpDVTRaRYGQcAHLEsSLctLtL..h....s--p.PpLhtlcEhptpusYuLhLcF-ssuVsl-cWp-K.p-+lu+FFu.GLc............Ac.lsp ..............................hGFsAYhs........ssusLslsshFYGlPlhLhGlALK......suEL+PlP..psots-hhsLR-ppATsp.splRpDVTRaRYGpcsHL-cuLcpLtL.s.....sccphPhLptlcEp.s.h................-.G.pYsLlLcF-us.tlsLppWpc..+..QpKhspFFGPGlpAclt........ 0 12 38 50 +10849 PF11017 DUF2855 Protein of unknown function (DUF2855) Pollington J, Finn RD anon Pfam-B_002665 (release 23.0) Family This family of proteins has no known function. 27.00 27.00 27.70 27.10 25.90 25.50 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.78 0.70 -5.28 35 108 2009-01-15 18:05:59 2008-07-31 10:38:43 3 2 98 0 56 115 238 310.10 33 86.50 CHANGED FAlTANNlTYAshG-th.......sYWpFFPs..........ss...................uaGhlPVWGFAcVlpSpssslssGERlYGYaPhAoc...LslpPscVsssuFhDsusHRpsLsPlYNpYtRssuDstaps..sp-shphLlpPLFhTSaLlsDaLt-ps.......................a.aGA.......ppllloSASSKTAhGLAasLpt....pssuhcllGLTSssNhs.........FVculGsYDpVloYDclssLss........sssslhVDhAGsspllssLHp+hu-pLt....hsshVGsTHhcp.....ts.ssslPusp...sphFFAPsphp+Rhc-WGsssapp+hspuWpsFh..ppu......psalplpchpG.cAhppsYpchlsGclsP .................FulTANNlTYAhhG-..t..h.......tYWpFFPs..........pp...................saGhlPlWGaApVltSppssltsGpRlaGYaPhuoa...Lhl....ps.sclsttuFhDs...usHRtsLsslYNpYtp..sss....-s..ha..ps..pp-shphLhRPLFhTSahlsDalt-ps............................h.auA.......pplllsSASSKTAhuhAahLp....................tptshcllGLTSssNhs.........FscuLGhYDpVlsY..-plsslss...........ss.slhVDhuGsssl....hspL+p+huc..plt........hss.hlG...hoc.pph...........................ts.stshsusp.......sphFFAPsphpcRhp-hGsst.htp+hstuWppFh.tts......tsalplpphpG.puhppsatplhsGpssP............................ 0 16 35 47 +10850 PF11018 Cuticle_3 Pupal cuticle protein C1 Mistry J, Coggill P anon PRODOM_PD021041 Family Insect cuticles are composite structures whose mechanical properties are optimised for biological function. The major components are the chitin filament system and the cuticular proteins, and the cuticle's properties are determined largely by the interactions between these two sets of molecules. The proteins can be ordered by species. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.70 0.71 -4.12 14 170 2009-01-15 18:05:59 2008-07-31 10:41:37 3 7 37 0 121 192 0 118.90 26 69.21 CHANGED sssAVuuoppsllRSas..usVSpYSKuVDTPaSSV+KsDoRloNsshp.uhutshh........................................aAAPshs...........huAPshsphAaAAPs.hhttts.......................tPsht..ths.huAPs.........spssYAAsAshhtt.h..................................stslsY.....SPAssV...SHhoas.GhGspYua ............................................................................t.t.t..sshS..ph..p..h...h..........................................................................................................hhh.sAPsht...............hsAPs..ht...t...s.a.u.sPs.hh...tt..........s....................uPsht....p.h..httPs..............h.st.s.....................................u.............................................................................................. 0 31 42 102 +10851 PF11019 DUF2608 Protein of unknown function (DUF2608) Mistry J, Coggill P anon PRODOM_PD862984 Family This family is conserved in Bacteria. The function is not known. 22.10 22.10 22.10 27.30 21.60 22.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.52 0.70 -5.39 27 271 2009-09-11 09:18:18 2008-07-31 10:42:32 3 3 120 0 52 180 6 238.10 23 89.29 CHANGED hhpspohpcV.tchlppt......pssLllhDlDcoLl.psp.pslups.ahphphpcl......thhts.pcshcplh.chl....hl.phhphchl-sshsphlsp.lpppphslhulophs........shpthphcpLpp.hslsFssss.........h.pps.h...hh.sthspsshahcGIlhosshs+u....tsLphhLsphsthPcpIIalDsspcsltshtpthpp..tsItahGhpYsstpt......s.hsphsps.h.pptphlpschsthhhpth.............sp ...............h.pspshpcl..chltpt......pshLllhslDpsll.ps.........t.p.htppshhph.hppl..........h.s..ppshp.phh.tchl....hh.pptphchlEs..phsshlsp.hpppthslhulophs................shpthhhctLpp.hslsFspps...................pp..........stttptshahpGIlaous..hs+u....tsL.hhLpphshhPc.....pllalssppcsl.shpphhpp...hsIsahGhcYsshph.........h.hspltp.hhh.p..tchlps-hthh..p......t........................................................... 0 12 17 27 +10852 PF11020 DUF2610 Domain of unknown function (DUF2610) Mistry J, Coggill P anon PRODOM_PD199303 Domain This family is conserved in Proteobacteria. One member is annotated as being elongation factor P but this could not be confirmed. This domain is related to the Ribbon-helix-helix superfamily so may be a DNA-binding protein. 20.70 20.70 20.90 21.40 20.60 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.63 0.72 -4.20 9 66 2012-10-02 18:44:02 2008-07-31 10:43:35 3 2 66 0 20 44 4 81.70 67 73.33 CHANGED hKKF....hlsC-FGGQpuPFs.lYIGpP+s-sHPlpaQssWLScERGGsIPpcVh-SLp+LacLAccNslsFt-LCVYALs............sApps ...............YKcF....EFDCDFuGQRAKFK.FYIGTPQEGHHPLQFQAKWLSDERGGTIPD-VMcAISQ.LNDLAKKNGVPLsDL....CVYALGsAQE.s............... 0 5 9 10 +10853 PF11021 DUF2613 Protein of unknown function (DUF2613) Mistry J, Coggill P anon PRODOM_PD383784 Family This is a family of putative small secreted proteins expressed by Actinobacteria. The function is not known. 21.70 21.70 23.50 23.30 21.40 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.60 0.72 -4.26 8 144 2009-01-15 18:05:59 2008-07-31 10:44:20 3 1 135 0 33 86 0 57.40 51 94.35 CHANGED Ms........RhlsPAlASAVlGlsLGusAlhGlThhsppsopPsh.p.ussuDsulLspVEYGuR ..................M.RhluPAsASsVVGllLGuuAlFGlTLhsppDspPsl..s.uscssSS.VL.s..cVEYGsR. 0 5 20 29 +10854 PF11022 DUF2611 Protein of unknown function (DUF2611) Mistry J, Coggill P anon PRODOM_PD055124 Family This family is conserved in the Dikarya of Fungi. The function is not known. 20.40 20.40 26.30 25.80 20.20 19.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.52 0.72 -3.89 20 135 2009-01-15 18:05:59 2008-07-31 10:45:16 3 3 114 0 102 127 0 71.80 35 85.81 CHANGED MGssYpIhG+pVsSHhLAluTLGolhu.slshsstGss.....ps....ssPsIsASSc........DEEKFIppaLtch.............ppcp ..MsshYpIhG+pVssHhLAhuTLGshhu..sshh.ussGsp.....................ptt......ssPPIsASSs.........................DEE...c...FI...pcFLcphctt.......pp................... 1 27 58 90 +10855 PF11023 DUF2614 Protein of unknown function (DUF2614) Mistry J, Coggill P anon PRODOM_PD355753 Family This is a family of proteins conserved in the Bacillaceae family. Some members are annotated as being protein YgzB. The function is not known. 19.90 19.90 20.00 20.20 19.80 19.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.66 0.71 -4.29 9 167 2009-09-14 14:03:03 2008-07-31 10:46:40 3 2 166 0 33 113 7 109.80 67 95.57 CHANGED thKYSsKINKIRTFALSLVFlGhllMYlGlFF+sp.llMslFMlLGhLslIASTsVYFWIGhLST+AV.VlCPsCtK.TKhLGRVDhCMHCcpPLThD+sLEGKEFDEpYNp.....K .............................u..KYSsKINKIRTFALSLlFlGhhlhY...l...G....l...FF...+...pshllMThFMhlG.hLulIASTVVYFWIGML.STKs....VQll.......CPSC-KsTKMLGRV..D.tCMHCNpPLTLD+sLEGKEFDEKYNKK.shp............ 1 10 23 27 +10856 PF11024 DGF-1_4 Dispersed gene family protein 1 of Trypanosoma cruzi region 4 Mistry J, Coggill P anon Pfam-B_187 (release 23.0) Domain This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. Other domains on this protein include DGF-1_N, DGF-1_2, and DGF-1_5. This domain is just downstream from the C-terminus, but not the C-terminus of proteins, also annotated as being DGF-1, that constitute family DGF-1_C. 20.90 20.90 23.60 23.10 19.90 19.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.89 0.72 -3.59 20 257 2009-01-15 18:05:59 2008-07-31 10:47:44 3 4 2 0 34 257 0 73.20 79 3.14 CHANGED VDGCNRTPuMP.LSHTATLTETRShTPT......W....TPuh..STs+YSPTp.....YusTETLQVTETVALsPTRTPTA..SVSSTLWWSD ......VDGCNRTPuhP..LSHTAT..LTETRS.......T..P..T......W........TPSh.....SssHYSPTp.....YGPTETL......QVTETVALsPTRTPTA..SVSSTLWWSD...... 0 0 0 34 +10857 PF11025 GP40 Glycoprotein GP40 of Cryptosporidium Mistry J, Coggill P anon Pfam-B_197 (release 23.0) Domain This family is highly conserved in Cryptosporidium spp. Many members are annotated as being a 60 kDa glycoprotein. 19.70 19.70 19.70 19.70 19.50 19.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -10.91 0.71 -4.68 11 884 2009-01-15 18:05:59 2008-07-31 10:48:02 3 1 25 0 1 890 0 141.60 80 54.38 CHANGED EoTPKEECGTSFVMWFGEGTPVATLKCGuYTIVYAP.KDpTDPAPRYISGEVpoVTFEK..SDNTVKIKVsGpEFSTLSosSSSPTENsG.Sus.QspSRSRRSLoEEsuETsATVDLFAFTLpGGKRIEVAVPssc-suKRsKYSLVADDKsFYTGuNSGsosGlY+L ....EsTPKEECGTSFVMWFGEGTPsATLKCGsYTIVYAP.KDpTDPAPRYISGEVpoVTF-K..p-sTVpIKVsG.-FSTLSssSSSPTENsG..Sus...QspSRSRRSLo.EE....su..Es.ATVDLFAFTL.cGGKRIEVAVPssE-soKRscYSLVAsDKs.FYTGuNSGsosGlY+L.................... 1 1 1 1 +10858 PF11026 DUF2721 Protein of unknown function (DUF2721) Mistry J, Coggill P anon Pfam-B_520 (release 23.0) Family This family is conserved in bacteria. The function is not known. 28.30 28.30 28.60 28.50 26.80 28.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.53 0.71 -4.51 95 398 2009-01-15 18:05:59 2008-07-31 10:48:42 3 1 349 0 183 381 997 124.60 29 85.49 CHANGED shssPshLlsuIuhLLhshosRhspl...ssphRpLp....sp..hcs.pps.th......ptpl....ppLc+.RhpLIppshhhushohllsslshhhlFl...........shshhsshlFshuhlhLhhuLlhhLhElpluspuLclph .................hssPuhLhsAluhlLhuhoNRhhplsshlRpLp.............sp....hpp....stsshh..............ttp.lpsL++Rhpll+th.hhushShhlsslshhhlal............shphhsshlFshullhLlhoLhh.lhElploscuLclp........ 0 49 119 161 +10859 PF11027 DUF2615 Protein of unknown function (DUF2615) Mistry J, Coggill P anon PRODOM_PD288703 Family This small. approximately 100 residue, family is conserved from worms to humans. It is cysteine-rich with a characteristic FDxCEC sequence motif. The function is not known. 20.80 20.80 21.20 34.70 20.70 20.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.84 0.72 -4.09 13 97 2009-01-15 18:05:59 2008-07-31 10:55:39 3 2 82 0 60 106 0 96.60 47 97.95 CHANGED MuDs......FDsCECIaSHEtAMRRLlsLLRQSQuYCTDoEC.pDlPu.P..ptsusus...shhhlhhsWhllAhsLYlhRPs.....ohRssp......sssKPpsspsssuss...PPsPsls ......Mu-s..FDPCEClhSHEtAMRRLIslLR.QSQuaCTDoEChp-.lsG.P.......uu..sss.........shhhlh..hsWhllAhhLallRPs.....shRssp.........hss.KPssspssps...........PPsPs..s............................ 1 23 28 42 +10860 PF11028 DUF2723 Protein of unknown function (DUF2723) Mistry J, Coggill P anon Pfam-B_590 (release 23.0) Family This family is conserved in bacteria. The function is not known. 24.20 24.20 24.30 24.90 23.80 24.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.09 0.71 -4.79 45 364 2012-10-03 03:08:05 2008-07-31 10:55:57 3 5 295 0 152 370 354 175.40 39 18.99 CHANGED DsGEaIusuhpLtVuHPPGAPLahhlGph.Fohhs...huplAhtVNhhSuluSAholhhhaaslshLltc.h..............tphstsphhshhuuullGALAauFSDoFWFsAVEuEVYAhuuhh.sAllhalhLpWccph.ps.............css+WLlllualhGLSh.GlH...hhsLLslPAluhlaaa+ca.tploh+s .....................................DsuEaIssuhcL.pVuHPPGAPhFhLl.up.......l..F.o.h.F....s.....pss....plAh...hV.N.hhSuLhSAsslhFLFaolo+Ls+c.hh.................t.pths.hsphlhlhuuGllGALsaoFSD.oFWFsAVE..uE.V.YAhuuhF.sAllhWL....hL+.....W.-c.tssps................cusRWllLIAalsGLSl.GVH...hlsL....L.s...lP...AIshlaa.a+ch..p.p...t...................... 1 64 106 129 +10861 PF11029 DAZAP2 DAZ associated protein 2 (DAZAP2) Pollington J, Finn RD anon PRODOM Family DAZ associated protein 2 has a highly conserved sequence throughout evolution including a conserved polyproline region and several SH2/SH3 binding sites. It occurs as a single copy gene with a four-exon organisation and is located on chromosome 12. It encodes a ubiquitously expressed protein and binds to DAZ and DAZL1 through DAZ repeats [1,2]. 25.00 25.00 33.60 31.60 23.60 23.40 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.06 0.71 -3.71 9 94 2009-01-15 18:05:59 2008-07-31 11:08:40 3 3 68 0 56 93 0 128.90 44 53.94 CHANGED YssAPPu...YpphYpsuashPPsA..ths.hsAsa....Pss.hhhP..stshslushuppsPMtaYPhG..........sVYPsGu..........TVhVpGG.aDAGARFGsGsu.s.oIPPPPPGpsPNAAQlAAMQGtNVlhTQRKsNaFhGGSsGGYTIW ...................................................................................................................PPs....Ypth..Ypss...as.h...ssss....thsthsush....Pu..ss.ha..hPh...spshsl....u..sh.up.s.....h.P..h...u.YY.PlG..................s....hY..PP...Gu..................olhVpuu..a.DAGARF.ss.u.us.sssIPPPPPGCsPNAAQLAsMQGtsVlloQ+KssFFhGGosGGYThW..................... 0 16 20 36 +10862 PF11030 Nucleocapsid-N Nucleocapsid protein N Pollington J, Finn RD anon PRODOM Family This is the N protein of the nucleocapsid. The nucleocapsid functions to protect the RNA against nuclease degradation and to promote it's reverse transcription [1]. The NC protein promotes viral RNA dimerisation and encapsidation and initiates reverse transcription by activating the annealing of the primer tRNA to the initiation site [2]. 19.60 19.60 22.00 26.80 17.30 16.70 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.20 0.71 -4.44 2 41 2009-01-15 18:05:59 2008-07-31 11:15:03 3 1 5 0 0 33 0 133.30 84 100.00 CHANGED MNSMLNPNAhPhQPpPQVVAhPhQYP.GFpPtFRRpRNPGFRPMFpRR..NNuNQNRuRQsR.RlQNppRG..hNhpsstQRuNRRQ.NQ.S.slPFEQQLLMMANETAhuATaPPEhQslAPTKLVKIAKRAAMQIVSGHATVElSsGspDos++lATFTIKVshN ...MLNPNuhPFQPQPQVVAMPIQYPMGFQPRFRRRRNPGFRPMFQRR..NNSNQNRSRQsRsRIQNQRRG..lNoSRTQQRANRRQNNQQSLSLPFEQQLLMMANETALSATFPPELQSLAPTKLVKIAKRAAhQIVSGHATVElSsGppDos++lATFhIKVshN 2 0 0 0 +10863 PF11031 Phage_holin_T Bacteriophage T holin Pollington J, Finn RD anon PRODOM Family Bacteriophage effects host lysis with T holin along with an endolysin. T disrupts the membrane allowing sequential events which lead to the attack of the peptidoglycan. T has an usual periplasmic domain which transduces environmental information for the real-time control of lysis timing [1]. 19.80 19.80 19.90 22.40 18.40 19.40 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.30 0.70 -5.19 5 43 2009-01-15 18:05:59 2008-07-31 11:27:23 3 1 38 0 0 44 0 200.40 49 98.42 CHANGED scVStuh+u-lLhslLDRLFKDssoGclLlpRVhlllLLFlMAllWYsssElFuaYKco+YETYsEIlQsERsc+FEsAApEQLQIVHVSScADFSuVaSFRPKNLNYFVDlIAYEGKLPoTlsEKsLGGaPVNKTS-EYpVHLsG+HYsScp-F...AYLPo+ccohE...lsYMYSCPYFNLDNIYAGoIoMaW++KPc.Is-.....E+LssICNQAARILGRA+ .........................s-lLFGlLDRLFKDsuTGKVLhSRVhllllLFlMullWY+s-plhshYKpopapsYschlpp-+ss+F-ssAhEQLQIVHlSStADFSAVaSFRP+NlNYFVDllAYEG+LPsolscKsLGGaPlDKTSpEYpsHLsGppa.Sspch......saLPo+..c.s.c...hsYMYSCPaFNLDNlYAGoluhaWhc.c.Pp.lsp.......-+LpslCuQAuRhLGRs+... 1 0 0 0 +10864 PF11032 ApoM Apolipoprotein M (ApoM) Pollington J, Finn RD anon PRODOM Family ApoM is a 25 kDa plasma protein associated with high-density lipoproteins (HDLs). ApoM is important in the formation of pre-ss-HDL and also in increasing cholesterol efflux from macrophage foam cells [1]. Lipoproteins consist of lipids solubilized by apolipoproteins. ApoM lacks an external amphipathic motif and is uniquely secreted to plasma without cleavage of its terminal signal peptide [2]. 20.60 20.60 21.00 20.70 20.00 20.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.31 0.71 -4.72 4 77 2012-10-03 08:47:39 2008-07-31 11:35:15 3 2 40 5 37 102 0 164.30 37 93.66 CHANGED MhHplWsaLLYLYulhhsSlu.CPu.s.LsssGlctppFPp.aLGpWYFlAuAA.PspctLATFcPlDNlhFshttuuss.pL.LRAsIRhKsGhCVPRcWhYhLscGsT-LRhEG+Pch+TpLFS...usCPcsIILKET..spuYpRhLLYuRpPpsstcsVp-FpshsuChsao.hL.hPppQ-ACploS .......................................h..hh.hh.lhhts....C.....s.lss.shstpph...hLGpWaFlAuAu...spppLtphpshD..shhhshssuoss.pLplptslRhcpGhClscpW.h..Y.pLs..p..s..o.ss..L.p..h...E..G+.sc...h+spLas...ssCPss.IhLp.Eo.........spsapR....hL.LYsRsscss-cslEEFcuhspCLshc..thh.hP.ppthC.h........................... 1 1 3 18 +10865 PF11033 ComJ Competence protein J (ComJ) Pollington J, Finn RD anon PRODOM Family ComJ is a competence specific protein [1]. 25.00 25.00 25.90 25.40 24.70 24.30 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.37 0.71 -4.49 7 123 2009-01-15 18:05:59 2008-07-31 11:38:10 3 1 121 0 16 69 0 128.10 58 96.00 CHANGED M.......ELoISYpQhhlhph-upPPslDWTDEshE+GYApuDsAV.FEAlpshpspltlpLssphphtuh.RplTVPFpVtp-slhIpSlhSp+lphsIPpGcYpLsh.osP...spcs-Lat.pY.l.Fpsh .............MELTISYSQLMLMNYDGEQPYVDWTDEDFERGYAcADGoV.IFEA.LSDY..T..CEVKVTsGKH..I.E..K.EEV.lRTloVPFTV.c.N.E.sIslTSILSN.K.FpIPIP.NGEYTVVLQAsPLEEPTDDELYKIQYEFFFES.K....................... 0 4 10 12 +10866 PF11034 DUF2823 Protein of unknown function (DUF2823) Pollington J, Finn RD anon PRODOM Family This family of proteins are possible glucose repressible proteins however this cannot be confirmed. Currently, no function is known. 25.00 25.00 34.90 32.90 24.20 24.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.23 0.72 -3.85 11 69 2009-01-15 18:05:59 2008-07-31 11:43:39 3 3 46 0 56 63 0 67.40 55 89.33 CHANGED METlK....NAuNYVuEoVQuAsusASKEANKpVAKDSsAuluTRssAAtDAlsDKt-EppHDAKA-sHKpu ..................M-olK....pAuNYVoEpVQuAousASKEANKpVAKDS...sA...ulGTRsoAAtDAluDKhcEppH-ucA-saKpt........... 0 18 32 49 +10867 PF11035 SnAPC_2_like Small nuclear RNA activating complex subunit 2-like Pollington J, Finn RD anon PRODOM Family This family of proteins is SnAPC subunit 2-like. SnAPC allows the transcription of human small nuclear RNA genes to occur by recognition of the proximal sequence element [1]. 21.50 21.50 22.00 23.60 20.40 21.40 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.03 0.70 -4.96 3 65 2009-01-15 18:05:59 2008-07-31 11:57:22 3 2 32 0 36 49 0 234.00 43 98.19 CHANGED MKPPQRRRAlPARYLGEVTGPAAWSAREKRQLLRLLQARQGQPEPDAAELARELRGRSEAEIpcFIQQLKGRVAREAIQKlHPGGhcGPRRpETQsPAPIEVWMDLAEKITGPhEEALTVAFSQVLTIAATEPVSLLHS+PuKPTQARGKPL.LLSAPGGQEDPAPEuSSPAPtAPu.........sscssGSsP+TPG.......PAPEAPSESLAGsSTEcDFAVDFEKIYKYLSSVSRGG+GPELSAAESAVVLDLLMALPEELS+LPCTALVEHMTcTYt+LTAPQssLAGGuLu.PGTEDuGAGS+GPEETsQASPQAoEsAtpSEP+SuWQAAGICPLNPFLVPLELLuQAsoPAR .................................................................................................................................................................ltsalp.LKt.sspEslpp...th..t.Rpppsp..APIElWhDLAc+lTGshEEulssAFSQhLTIuAsEPloLhaS......hPs+s.spsps+.h..hp.s.t.ppp.ss..sts...s......................................................................u.ss.s...hp....ph..........VDFEKIYKYLSphu+sspuspLS..s.s..ESAVlLcLLMuLPEELshLPCssLhcHhhpsYhpL..hu..Pp.s....ust....s.s....sss...Essssssp.s....pps.....s....s.......st.s.................p.....hptht......hs.PLNPFhlPLphLtp......................................... 0 1 3 9 +10868 PF11036 YqgB Virulence promoting factor Pollington J, Finn RD anon PRODOM Family YqgB encodes adaptive factors that acts in synergy with vqfZ , enabling the bacteria to cope with the physical environment in vivo, facilitating colonisation of the host [1]. 20.10 20.10 22.90 50.00 18.50 15.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.18 0.72 -4.45 2 382 2009-01-15 18:05:59 2008-07-31 13:08:21 3 1 379 0 6 24 0 43.00 84 96.41 CHANGED MKKKPVAQhE+Q+.LLENPhsYGLLSphphAIVVNCFTLsph. MKKKPVAQhERQHoLLENPCAYGLLSQFQAAIVVNCFTLNKII... 1 1 1 3 +10869 PF11037 Musclin Insulin-resistance promoting peptide in skeletal muscle Pollington J, Finn RD anon PRODOM Family Musclin is a muscle derived secretory peptide which induces insulin resistance in vitro. It encodes a 130 amino acid sequence including a NH(2) terminal 30 amino acid signal sequence. Musclin expression level is tightly regulated by nutritional changes [1,2]. 20.70 20.70 22.40 21.60 18.20 17.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.61 0.71 -4.53 3 49 2009-09-11 15:35:46 2008-07-31 13:09:12 3 1 36 0 27 34 1 123.70 65 94.62 CHANGED MLDWRLASVHFILAlTLMLWuSGKVLSsDlAocsF-.StslulpuPPTAoEEKSAT-LAAKLLLLDELVSLENDVhETKKKRSFSGFGSPLDRLSAGSVDHKGKQRKVVDHPKKRFGIPlDRIGRNRLuNSRG ...........MLDWRLsusHFI....LAlo.LhhWSSGKVlSs-ss.o.E.A...FD....SullclQS.sPTsp..EEKSATDLsAKLLLLDELVSLENDVIETKKKRSFSGFGSPLDRLSAGSVDHK.GKQRKsV-hPKRRFGlPhDRIGhNRLssoRG.......................... 1 1 3 9 +10870 PF11038 DGF-1_5 Dispersed gene family protein 1 of Trypanosoma cruzi region 5 Mistry J, Coggill P anon Pfam-B_157 (release 23.0) Domain This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. Other domains on this protein include DGF-1_N, DGF-1_2, and DGF-1_4. This domain is just downstream from the C-terminus, but not the C-terminus of proteins, also annotated as being DGF-1, that constitute family DGF-1_C. 25.00 25.00 39.10 39.10 17.90 16.70 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.88 0.70 -5.19 3 286 2009-01-15 18:05:59 2008-07-31 13:12:31 3 5 2 0 52 286 0 248.30 89 11.79 CHANGED AGGSLT.sDIRso.....GuAVPc+LhVALPPPFR..WARDPQLGTHLoF.shsSpuQPsGauGPWGE.MLRNATWVRNATNPSTVLELAVPVHRGYFIGADETIVIRCDAVAVaGGCKGVLLGuFTIsSNTPPAlASALSAITGVVAGAAAVAVVVTGGLGSILEMQALGVFARMSCASAQERASTVALPYFLSVFAALDPLWMVVGNALLAAVFGCVHYGVTAAFQRWRGVDAASAWAAMRFPSLTYVVAHAMHLGIFFGSVFALAMPGARVQHYVIGVVGVLYG ....AGGSLTQNDIRGG.....GSAVPThLMVALPPPFR..WARDPQLGTHLSF.VPVSTAQPpGFGGPWGA.MLpNATWV.RNATNPSTVLELAVPVHRGYFIAADETIVIRCDAsAV.GGCKGVLLGSFTIRSsTLPAAASALSAITGVVA.GAAAVAVVVTGGLGSlLEMQALGVFARMSCASAQERASTVALPYFLSVFAALDPLWMVVGNALLAAVFGCVHCGVTAAFQRWRGVDAASAWAAMRFPSLTYVVAHAMHLGIFFGSVLALAMPGARVQHRVIGVVGVLYG............... 0 0 0 52 +10871 PF11039 DUF2824 Protein of unknown function (DUF2824) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. Some members in the family are annotated as the P22 head assembly protein gp14 however this cannot be confirmed. 25.00 25.00 25.00 25.20 24.20 24.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.87 0.71 -4.49 3 167 2009-01-15 18:05:59 2008-07-31 13:13:44 3 1 163 0 4 76 0 148.80 65 99.31 CHANGED MITFpPTRNIDLIEMVGNHPDIIAGSNNGDGYDYKPECRYFEVNVHGQFGGIVYYNEIQPLTFDCHAMYLPEIRGFSKEIGLAFWRYILTNTTVQCVTSFAARKFRHGQMYCAMIGLKRVGTIKKYFKGVDDVTFYSATREELIDFLNHGR ........................MIpFpPTRNIDLIEhVGNHPDIIAGSNNGDGYDYKP-CRYFEVNVHGQFGGIVYYpEIQPL...TFDCHAMYLPEIRG.FSKEIGL.AFWRYILTNTTVQCVTSFAARKFRHGQMYCAMIGLKRVGTIKK.YF.K.G..VD...DVTFYSATREELIDFLNHGR...... 0 1 1 1 +10872 PF11040 DGF-1_C Dispersed gene family protein 1 of Trypanosoma cruzi C-terminus Mistry J, Coggill P anon Pfam-B_30 (release 23.0) Domain This protein is likely to be highly expressed, and is expressed from the sub-telomeric region. However, the function is not known. This is the very C-terminal part of the protein. 25.00 25.00 49.90 48.00 20.30 19.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -9.84 0.72 -4.09 26 256 2009-01-15 18:05:59 2008-07-31 13:20:46 3 4 2 0 31 256 0 80.50 80 3.82 CHANGED WYAEDRHWQELREPRRGGLEALLRDDEESDE-TQKPH-hTSSSYASGTTsASSYRPPAP.....................QPMAGDTRSDALSLhDRASSASspIs .WYAEDRHWQELREPRRGGLEALLRDDEESD.EETQKPH.-.MTSSSYASGTTVAS.SYRPPAPP.....................QshAGDTRSDAhSLhDpuSoAut.l............ 0 0 0 31 +10873 PF11041 DUF2612 Protein of unknown function (DUF2612) Mistry J, Coggill P anon PRODOM_PD048079 Family This is a phage protein family expressed from a range of Proteobacteria species. The function is not known. 22.90 22.90 23.00 23.70 22.70 22.80 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.80 0.71 -4.72 18 287 2009-01-15 18:05:59 2008-07-31 13:21:32 3 2 240 0 38 254 0 165.80 26 78.34 CHANGED shtshllhQYpspP+hhuhlpthtshh.sshhphlss.lsslaDlDsAsGhtLDllGchVG.luRhlps....t.aFuF..shtuhsa.......stu.ahs.hcstsssstLsD-sYRhLI+sKlhpNhhcsTlsslsshhp.lFuss...sallDshDMo..hsssVssphhoshhhtllpphsllP+PsGVplpa.ll ..........hhthlhspatspPph.shlphhsp.h..sstshhps.h.phaslDoA.GhtLDlhGphlG..hsRhls...s...t.ahua.....s...t..s.hsa..................spus..a...s.s.p.s..s.ss.......htLsD-tYRhll+sKhhtNhh.cuossslsthLchhassp...phhlh....Dst-Mo....hhlh.p....t..hssh...ph...thlpp....h.....lh..s+ssGV.h.....h.......................................... 0 3 17 27 +10874 PF11042 DUF2750 Protein of unknown function (DUF2750) Mistry J, Coggill P anon Pfam-B_609 (release 23.0) Family This family is conserved in Proteobacteria. The function is not known. 21.60 21.60 22.10 21.80 20.90 20.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.36 0.72 -4.01 71 710 2009-09-11 13:47:04 2008-07-31 13:22:01 3 2 507 0 130 400 30 105.20 33 68.29 CHANGED RhphFl..pcltpppplWsLpcccG..alhhsos-t-s........hPlWsscchApt..asss-WpchpstsIsLscah-pWlssLpcDslhlul...stsp.pGhhlpPp-lsppLt .........RhphFl..p-ltcpppVWuLpccpu....alhlsss-p-p.........hhPlWsp+ptApt..hss..-..-...Wt.-h.cshsIshshFhEtWLssLc-Dsht.Vu.lsh.sssh..GhllpspcLsp-L.................. 0 27 54 99 +10875 PF11043 DUF2856 Protein of unknown function (DUF2856) Pollington J, Finn RD anon PRODOM Family Some members in this viral family of proteins with unknown function are annotated as Abc2 however this cannot be confirmed. 25.00 25.00 71.00 70.90 24.30 23.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.19 0.72 -3.87 3 202 2009-01-15 18:05:59 2008-07-31 13:23:26 3 1 181 0 3 78 0 94.10 88 99.85 CHANGED MPAPLYGADDPRRCSGNSVSEVLDKFRKNYDtIMSLPQETKAER-FR+sIWLAEKpEKERIpQTSIRPFRKATYTKFIE.IDPRLRNYRSRYGAISNN ................MPAPLYGADDPRRCSGNSVSEVLDKFRKNYDhIMSLPQETKEEKEFRHCIWLAEKEERERIYQTSIRPFRKATYT+FPE.IDPRLRNYRSRYGAISND 0 0 0 0 +10876 PF11044 TMEMspv1-c74-12 Plectrovirus spv1-c74 ORF 12 transmembrane protein Mistry J, Coggill P anon PRODOM_PD019016 Family This is a family of proteins expressed by Plectroviruses. The plectroviruses are single-stranded DNA viruses belonging to the Inoviridae. Except that it is a putative transmembrane protein the function is not known. 20.80 20.80 20.80 22.00 20.40 20.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.45 0.72 -4.41 4 35 2009-01-15 18:05:59 2008-07-31 13:24:13 3 1 6 0 0 33 0 48.60 76 87.95 CHANGED MPTWLTTIFSVVIlLulFhahGL.IYQKIRQIRGKKK-KKEIccKEspc ....MPTWLTTIFSVVIlLGIFsWIGLSIYQKIKQIRGKKK-KKEIE+KEspK... 0 0 0 0 +10877 PF11045 YbjM Putative inner membrane protein of Enterobacteriaceae Mistry J, Coggill P anon PRODOM_PD079046 Family This family is conserved in the Enterobacteriaceae. It is a putative inner membrane protein, named YbjM, but the function is not known. 25.00 25.00 31.30 30.70 20.50 20.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.78 0.71 -4.36 6 477 2009-01-15 18:05:59 2008-07-31 13:25:27 3 1 474 0 30 104 2 118.90 78 99.49 CHANGED MtppphWhuhhsCFlLF.lVhL..thtspGuFc.uuG+sElGLLhFlLPGAVASahSpR+RlL+PLhGAllAAPlCLllh+LahsPsRSFWQELAWlhSAVFWCuLGALCaLFlpoLhpthRp+..pR ..........MKHKptWAuslCCFVLFIVVCL.Ls..hHM..KGAFR.AAGHPEIGLLFFILPGAVASFFSp....RREVLKPLFGAMLAAPCSMLIMRLFFSPTRSFWQELA.WLL.SAVFWCALGALCF.LFISSLFKsQHRKN.Q...... 0 1 5 17 +10878 PF11046 HycA_repressor Transcriptional repressor of hyc and hyp operons Mistry J, Coggill P anon PRODOM_PD091544 Family This family is conserved in Proteobacteria. It is likely to be the transcriptional repressor molecule for the hyc and hyp operons, which express, amongst others, the protein HycA. This protein may be harnessed for the reduction of technetium oxide, an unwelcome product of radio-nucleotide bioaccumulation. HycA produces formate hydrogenlyase, one of the key proteins necessary for metal compound reduction [1]. 25.00 25.00 91.40 91.30 21.50 19.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.85 0.71 -4.26 4 465 2009-01-15 18:05:59 2008-07-31 13:26:04 3 2 462 0 21 85 0 147.90 88 96.20 CHANGED MTIWEISEKADYIApRHRpLQ-QWHhYCNSLVQGITLSKARLHHAMSCAPpc-LCFVLFtHFtIaVsLA-GFNSHTIpYaVEsK-Gp-+pLIAQAQLshDGhlDG+Vs.RDR-QVLEHYL-KIAsVYDsLYsAlEpDhPVcLSpLlhuc ..MTIWEISEKADYIAQRHRRLQDQWHIYCNSLVQGITLSKARLHHAMSCAPDKELCFVLFEHFRIYVTLADGFNSHTIEYYVE..TK...DGEDK...Q.R...IAQAQLSIDGMIDGKVNIRDREQVLEHYLEKIAGVYDSLYTAIENNVPV..NLSQLVKGQ........ 0 1 3 12 +10879 PF11047 SopD Salmonella outer protein D Pollington J, Finn RD anon PRODOM Family SopD is a type III virulence effector protein whose structure consists of 38% alpha-helix and 26% beta-strand. 25.00 25.00 27.20 26.00 24.00 23.00 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.79 0.70 -5.47 2 272 2009-01-15 18:05:59 2008-07-31 13:26:32 3 6 136 0 9 152 0 274.40 60 89.82 CHANGED MPVTLSFGN+pNYplNcSRLA+LhSsDKEcAlaMGsWD+hpDpFRscKKpcsLEVLaolIHGptRtp.uEhpVslpshpKIaAFppLpchAsPupQDhFsh+hDhsQTQhLh.lsspVIspsNl+clLNlS-sslhcsMpc-EcpLFLpIC.hhGtKhohaPELLpt.hspL+ctVstssplKstVYchMRsuEs.ch.hVEWpsoLTE-EKshLtClphGsFp.TTQFhKIGY.ElpGEVhFsMhHPslSYLLpsYpP...u-hh.TNoh.F.chLNpDYsDYpspKh.IDsILc+lYhoHppoLaIuccussRNhLl ..............MPVTLSFGN+p.NYplNcSRLA+L..hSuD.K.EcAlaMGsWD+hpDpFRscKKpcALEVLaolIH...Gp.GRtc.uEh-Vsl-shs..K....IaAFc+LpphAsP...up..QDhFsh+hD.ho..Q..T...QhLhhlsspVISpsN.l+..clLNlSDssVlcsMsc-EcpLFLpIC.hhGtKhoh+PELLQp.hspLRctVstsspIKstVYchMRPuEs.chshVEWpsoLTtDEcshLsClphGsF-.oTQFhKIGY.El.pGEVhFsMhHPsl.YLL+uYpP...s-Fp.oNoh.F..chLNpDYsDYpspKh.IDsILc+lYhoHppoLaIucsussRNhLl..................................................................................... 0 2 3 5 +10881 PF11049 KSHV_K1 Glycoprotein K1 of Kaposi's sarcoma-associated herpes virus Mistry J, Coggill P anon Pfam-B_38 (release 23.0) Domain This is a highly glycosylated cytoplasmic and membrane protein similar to the immunoglobulin receptor family that is expressed as an inducible early-lytic-cycle gene product in primary effusion lymphoma cell-lines. This domain would appear to be the cytoplasmic region of the protein [1]. 20.90 20.90 21.30 43.00 20.40 16.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.59 0.72 -3.99 13 1340 2009-09-11 12:24:34 2008-07-31 13:28:52 3 4 3 0 0 1052 0 67.90 71 41.93 CHANGED YTLTCPSsASLPISWYCNsTRLhRL.Tsp.TlTl.ss.lTCNFTCVsQSGHpHSIWIpWasQPVLQTLCAQPSNT .YTLTC.SssSLPISWYCNsTRLhRLTpp.olTV...so.l.s.CNFTCVpQSGHRpSIWITWasQPVLQTLCAQPSNT................ 0 0 0 0 +10882 PF11050 Viral_env_E26 Virus envelope protein E26 Pollington J, Finn RD anon PRODOM Family E26 is a multifunctional protein. One form of E26 associates with viral DNA or DNA binding proteins, while a second form associates with intracellular membranes [1]. 25.00 25.00 69.30 69.20 20.30 20.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.53 0.71 -5.13 8 16 2009-01-15 18:05:59 2008-07-31 13:29:20 3 1 13 0 0 16 0 186.80 49 99.87 CHANGED MEosp.....lsssaAs.K..tuAlss.....hV+TVVTTTssSspstsp......cs+IsQlIAQLp+TRLsFsKLopLQ+KRVRNMQ+LlRKKNplIAsLAApLpsppp........+...sKaFAVshsKNllhThSGSEpFVRpRVA-LCAh.GGEQVFCuRRADCARDRpRlAcALssSLGuGVlspusNKRFEIh-s-KlVSAKLIlQQVLHDGhcuDssAa .........MEosp....h.sstaAs.K.pthAVss....ohs+TVsTTTssSshscp.....-ppp+IsplIupLppT+LsFsKlpplp+K+lRphQpLlRKKNpIIAsLstpLpstpc................hKaFuVshscNslhTh.GsEpFVRpRlA-LCsh.suE.VFCttRsDss+DRptlAphLssuhGutVlshtsN+RFEhlps-clsSuKhll.phLpDt.puDhsAa..... 0 0 0 0 +10883 PF11051 Mannosyl_trans3 Mannosyltransferase putative Mistry J, Coggill P anon Pfam-B_379 (release 23.0) Family This family is conserved in fungi. Several members are annotated as being alpha-1,3-mannosyltransferase but this could not be confirmed. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.95 0.70 -5.05 117 725 2012-10-03 05:28:31 2008-07-31 13:29:41 3 19 160 0 543 759 22 250.80 23 44.84 CHANGED u+GIVhsu.......Gs...th.hh.h.thl+hLRp.hG.....spLPlEllh.s..s-h..spchppplls............................................hls....spsllhsshhsp.hh...........................s..ap.......hKhlAllhSSFEcllhLDADslslps.P-p.hFp..scsappsG.hlhW...-hhppshsshhhplhs.....................h.....tt...t.......................th.hpph.sth.ptssESG.llls..Kp....pHhpsLLLuhaaN.hasss.......hha.hhs.G......D.....KEoFhhAshhh............................s..psYa.spp.ssslGhhst..........................................p.phs.usthuphD ............................................................................................................tcGIVhss.........us....phh.hsh..thl+hL.Rp.hG.....spLPlElhhhs..s.-h.......spphpptlhs................................................h.ls...........sp..s.l..hh..p......phhsp..hh....................................s.ap....h.KhhAll.hSoF-c.llhLDuDslshp....s.Ps.........h.lFp................sp.sap..ps.G.hlha........s.....hh..p..p...p..h....tshhhp.lhp.........................................................................................tth.tth...phptESG.llls..Kp....p..H.h..ts.Lllshahs..hht.p........h.h...h.hs.G.......DKEhFhhuhhhh............................s...psahhspp..s.ssh.Ghhpt.......................................................thpu.thhphs................................................................................................................................................................. 1 117 294 503 +10884 PF11052 Tr-sialidase_C Trans-sialidase of Trypanosoma hydrophobic C-terminal Mistry J, Coggill P anon Pfam-B_103 (release 23.0) Motif This is a highly conserved sequence motif that is the very C-terminus of a number of more diverse proteins from Trypanosoma cruzi. All members of the family are annotated putatively as being trans-sialidase but this appears to be a diverse group. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.89 0.72 -7.06 0.72 -4.06 45 797 2009-01-15 18:05:59 2008-07-31 13:30:14 3 9 3 0 143 798 0 24.70 62 3.66 CHANGED uuos+GsshL...LLLL..LLGLWG..hAAl ...uuTs+G..us..lL..PLLLL..L.L.GLWG.FAAL..... 0 0 0 143 +10885 PF11053 DNA_Packaging Terminase DNA packaging enzyme Pollington J, Finn RD anon PRODOM Family Phage T4 terminase functions in packaging concatemeric DNA. The T4 terminase is composed of a large subunit, gp17 ad a small subunit, gp16. The role of gp16 is not well characterised however it is known that it binds to double-stranded DNA but not single stranded DNA [1]. 25.00 25.00 30.30 29.60 19.80 18.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.91 0.71 -4.61 7 72 2009-01-15 18:05:59 2008-07-31 13:33:08 3 1 69 15 0 59 837 144.80 36 88.29 CHANGED MsD..L.DhspLhDlsu..lPG.lpuE-spV.YpPlVLp-VcSpPpsRshDL-cDYslVRcNhHFQpQMLMDhAKIhLEsAKNu-SPRahEVFusLMsQMTssNKElL+lHK-MK-IT.pphtp...uupsshpsplQNssl.....FhGSPs-Lh-.ElGD..p. .................................................htt....G....s.p.....apP......l..lpSp....P...p.s...cssDlcpDYphsRpshH.hQpphhh-AuchhLEsA+so-uPRthEVFupLhpphssssccLlcLpKcMK-los-p..t.p..ts.ssspsshpNs.Tl.....FhG....Sss-Lhc.plts.......................... 0 0 0 0 +10886 PF11054 Surface_antigen Sporozoite TA4 surface antigen Pollington J, Finn RD anon PRODOM Family This family of proteins is a Eukaryotic family of surface antigens. One of the better characterised members of the family is the sporulated TA4 antigen. The TA4 gene encodes a single polypeptide of 25 kDa which contains a 17 and a 8 kD polypeptide [1]. 25.00 25.00 63.50 63.40 18.30 17.90 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.83 0.70 -5.00 12 29 2009-01-15 18:05:59 2008-07-31 13:51:46 3 1 2 0 0 29 0 240.70 41 97.62 CHANGED hshhsllpsSLLhlu.s..GputssttsspYTAshGtslpCLuElNuARcAAGLssFt-A.ossppLscPssp-h....psuopW+.clCcaLlP..ps-ssstusshpPFptGTYAFKsLTsspssCK-sV-YWKuAacNFoG.LPPocspuss..lYssQpNVSFVALYNPpusAoADCpVVTCTpssssss..........ussplpu-stpt.........spsGaAlICKThPuAF.s-sosPFTp-QWcKIVsSLTGS........sSsshPSlsshhIsshuhhuL ......................h.h.sllphSLLhl.s.....upptsptttsphTAs..tslcCLsEhNssRcAAGLssFtp.A.sssthLspsuspph.....psso.Wp.clCptlls..pscss.spssshs.F..GTaAatslTsspssCK-sV-YWKuAacsFsG.LPPohptsss..lYscppsVSFVALYNPpssss.AsCtllTCTpssss........................ussplpu.-sttt.........sppuhAllChT.PsAh.sssosPFTp-QWcKIlpSLoGS........sSsssPolsshhlsshuhhuL..... 0 0 0 0 +10887 PF11055 Gsf2 Glucose signalling factor 2 Pollington J, Finn RD anon PRODOM Family Gsf2 is localised to the ER and functions to promote the secretion of certain hexose transporters [1]. 25.00 25.00 46.40 46.30 17.70 17.40 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.29 0.70 -5.94 9 45 2009-01-15 18:05:59 2008-07-31 14:01:25 3 1 44 0 29 42 0 367.10 46 95.97 CHANGED hElYlRhNDDhE+DYsFQlcpscThcs.lhKIFs.......oh.hsLRPSlFacscPlGFpKSspPGYLTEsGsLlFcY-Ascpcahcpls..cchlh-plWPGQLILPhWchsphshasFlslhlsWLYTDLPDhISPTPGICLTNQLSRhhh.lAppash.clAppLht-lp.s.sulsAQhLFFlhHllKllhIhhhhYhGhhNPlphN.aphhsh+pph..................hKcpLhslGWhGARRAThD-Yp-hYhpYtIccaGGhlpAaRA..Glhcphts.GlpLusGEGFpTPL-c+hThsThpshcpct.....KFhLS.-YFspLtpshcp.l-cpsG..uchsspI+pFRRaGlhcss-clpclVphRKthsspcc..............p.KhE .MEIYlRhNsDhE+DYsFQVss-cThps.lhKIFs............u..hsLRPSIFacpcPltFhKShpPGYLTEsGsLlFcY-Aspcc.lppls.csc.lh-plWPGQLllPcWchschshasashlhLsWLYTDLPDhISPTPGICLTNQlSRhh.h.lAcp.hshsclAs+LtpElp.s.sulsAQhlFFlhHlhKlslItLFLhhGlhNPISFNshphhs.l+s.s.......t.............s.hKppLpolGWhGA+RAThD-YpspYhsYhIcKhGGhVtAa+A..GhhcphtssGlpLssGEGFpTPL...-c+.aTtsTFpsl.cp-t.......KFlLS.EYFlcLppsL+c......l...-ch-G..-l....uchNtpI+cFRRaGlaEss-+ltplVphR+phscp.p.............pppt................................. 0 4 15 26 +10888 PF11056 UvsY Recombination, repair and ssDNA binding protein UvsY Pollington J, Finn RD anon PRODOM Family UvsY protein enhances the rate of single-stranded-DNA-dependant ATP hydrolysis by UvsX protein. The enhancement of ATP hydrolysis by UvsY protein is shown to result from the ability of UvsY protein to increase the affinity of UvsX protein for single-stranded DNA [1]. 22.80 22.80 24.80 27.40 22.20 22.70 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.45 0.71 -4.01 9 67 2009-01-15 18:05:59 2008-07-31 14:03:14 3 1 65 0 0 59 954 129.50 31 92.29 CHANGED phccDhhID........sopLppEuhc.PhLauKWLphaoshpp.hhclEhpt+pslK-Rh.aYoG+u-..sEVsh-h.....hcpS.ElKhslsuD-cllclssplpYhphlhcFhppuLctlpsRGFsIKshIEhR+hEuGt ........................hcpD.hID........sspLp.-uhp.shLasKahchasshpphhhplEtchKphhKp+hpYYsG+us..sphhh-p.....hppo.ElchhlsADc-ll+lssplpYhphllcFhcssLctIpsRuapIKNhI-h++FpsG... 0 0 0 0 +10889 PF11057 Cortexin Cortexin of kidney Pollington J, Finn RD anon PRODOM Family In the middle of cortexin protein there is a single membrane-spanning domain which indicates that this protein may be a membrane protein involved in intracellular or extracellular signalling of the kidney or brain, since it is expressed specifically in the kidneys and brain only. The protein is highly conserved among species [1]. Cortexin is also thought to be important to neurons of both the developing and adult cerebral cortex [2]. 25.00 25.00 47.50 47.30 19.80 19.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.90 0.72 -4.38 5 92 2009-01-15 18:05:59 2008-07-31 14:10:38 3 1 36 0 63 63 0 80.10 57 97.67 CHANGED Msosasl.PSPss.............................................sSussluusSLoLEQKTuFAFVGlLhlFLGLLIVRCFRILLDPYSSMPoSSWsDclEGLEKGQFDYALs ........................Msus.sh.spsh...............................................ssssssssul....oLEQKTsFAFVhlLhlFLGlLIVRCFRILLDPYSSMPoSTWsDphEuLEKGQFDYALs...... 0 3 8 22 +10890 PF11058 Ral Antirestriction protein Ral Pollington J, Finn RD anon PRODOM Family Ral alleviates restriction and enhances modification by the E.Coli restriction and modification system [1]. 25.00 25.00 84.80 84.70 21.70 16.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.53 0.72 -4.60 2 90 2009-01-15 18:05:59 2008-07-31 14:13:30 3 1 82 0 1 27 0 53.70 89 99.79 CHANGED MTTTIDTNQWCupFh+CpGCKLpuECMVKPEEMh.VhEDGKhVDKWAIRTTtMIARELtK...KAh ...........................MVKPEEMFPVMEDGKYVDKWAIRTTAMIARELGKQNNKAA..... 0 0 0 0 +10891 PF11059 DUF2860 Protein of unknown function (DUF2860) Pollington J, Finn RD anon Pfam-B_002667 (release 23.0) Family This bacterial family of proteins has no known function. 21.30 21.30 21.30 22.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.60 0.70 -5.37 29 262 2012-10-03 17:14:37 2008-07-31 14:23:33 3 1 230 0 25 207 14 288.20 40 91.00 CHANGED GFSGplolhsGhsuspSsh.....ss.psssphssh..ssuupocosslshPLGslpYTFupt.spQlFhGsscsDlhsGphthElGYcpphssssslshShlPslhp.sEsWpDPYhssssRppTDhsspuhRlphpplhs..usasl-hAaucp-lDpEpou.................ts.LcRsuphahhchsYphsL..spshhLpPulsYtppDADGcAhSaspaGsplohhhhhscHplslThuhupppYDu..sNPIFs+s.p--sphuhhhsYpYpshasacshuhsuhsGaspscSNIsFYDpsphllSlGlsYp ...............................................................................................GaSGsLSlssGhtcsKSNh.....sT..ssssh..losh...sus.sS-oshlsh.huplhYs.h.s.NsplFhts.sscslsG...htLGYc+ta.cthshohShlsSLh..pcsatsPYhhss.RppTDlst.GaRlu.hh..s..stFoh..sYsau..cpKlDc-p.u.................pupLpR...-upYaplchsYshuL.......Ls.ulsYs.pDAD...GcAp..SaopaG..aplGsph.h.F.u.p...s.hh.lT..suhuhpcY-s..scP...IFsKp.pDu..shhph.hthV...hpphhGapsl.hhu.hGlpcpsSs.I.s.FYDpp.pll.TGluYp.............. 0 6 14 20 +10892 PF11060 DUF2861 Protein of unknown function (DUF2861) Pollington J, Finn RD anon Pfam-B_002683 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 26.00 48.60 21.50 19.80 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.57 0.70 -5.04 43 183 2009-01-15 18:05:59 2008-07-31 14:27:32 3 1 112 0 17 107 0 251.20 41 89.45 CHANGED WFcs.TPLppuaQtLlEschsphappLlcshppsP...lppph.pLltpul..pspCG+uLsspshsDhh.uVThh+phsQo.tt.ph+Luh-shTps.lscls.sp..ssplluu-.uhs...........ttpYs.scphsLhuPhuAGlYpLolsspc.t.......lllusssupphlc.o...Spc-apIchsAh.supsP....SluhapahDtNap.l...............aSpThcsD.Psuh.ssshPSptahlolSlIppcaQGsItlEQhQplohshD ...WFcp.TPLppuaQpLLpschpphappLlcshphp....lpspL.phlhpul..pssCG+uLs.sp.shPD.WlpuVT..lhRp.lQSssp.th+lul-stop...s....l..sclphoph.sspllus-suhs..........ts.Ys.scphsLhsPhuAGlYplslsstctt...a.tWllluss.upphlcWo...Sc..c...-.apI.pp..sshhsupCPhPphSlula.sahDsNYp.l...............WSpoh...cs.DhPs..s..L...ssslPSspahloVShIppRaQGsltlEQ.QhIohshD................. 0 3 6 13 +10893 PF11061 DUF2862 Protein of unknown function (DUF2862) Pollington J, Finn RD anon Pfam-B_003005 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 26.50 33.20 23.50 21.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.90 0.72 -4.26 26 91 2009-01-15 18:05:59 2008-07-31 15:02:59 3 1 73 0 34 85 165 61.70 46 76.61 CHANGED IGpKV+..l..s+l+DRlspcllctltppshGplpsaKhsDGpGlGllVcLssGppsWFFEDElp. .IGpKV+..V..pRlRDRlsspllcpLsp..hGslpsFKhTDGp.GIGllVchs.D.ssssWFFEDElc... 0 5 22 32 +10894 PF11062 DUF2863 Protein of unknown function (DUF2863) Pollington J, Finn RD anon Pfam-B_002981 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 26.60 26.30 24.10 23.00 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.10 0.70 -6.38 12 116 2009-01-15 18:05:59 2008-07-31 15:34:25 3 1 112 0 44 96 40 389.90 56 97.80 CHANGED t+hRsppps+Lu.-ApcLlsLAhuLstSGSRlEDtaWEppLsttls+LL+suspssL-AALDHLaptsssAY-.LA-tsEohuESh.lppsGtcaDlLLlAAPlLAWoRYpIPoGsl.sshhpsLtspLQuHVLAsss+lALssaLaS.DQLPRoaspThpLspcLupAALsup.sl+lphpshPETushLuDsRYLlAulusPpGpPlFRWQEpsps............pR-tsLtpWppQutssLusLLPGCthElLLPDAYasusRpAD+psRPhSlRAuVsaLpssLshssspL+AVluuFsEc....plEEYRIGFTh+sps-VlYGlVWPLhGp-......................u-s-sssslppIpAlL+EsGVp-lhphsthFs.EYC-DCGAPLaPs.pGElVHAEhPE-.u.sss.ph ...............................................h...Rp+tupRLsPDA-+LVuLuLALhASG.SRlE.DpaWEs+LsshLuKll+NGsQosLDAALDHLtpscs-.AYssLA-hAETtSEShslE....+.-...G..p..YD........ALLlAsPlLAWTRY..hI..P..SGslKs-lspsLpsHL.QAH.VL.AssspVAlAPaLYSIDQLPR.pHs.......-TapLsp.........pLApA.ALusp.ss......KlshsDhPETuPILA.D....P.....RaLLAlVuAPt.GtPLFRWQE-pcs.........p+..hERspCLEQWssQusssLushL..PG.......CEFEs..LLPDAYasACR-ADcRlRPhTVRsAlpYLhsTlu.ssPpcLRAVlAGFGEc....RIDEYRVuFTtRGS.sDVIYGlVWPLYGREsGps............p.puEss.suPLEEIsuLL+EsGVoDlRRHuu+FEPEYCDDCGsPLYADPhGEIVHAEMPED.AssuQPHF...................................................................................................... 0 6 22 32 +10896 PF11064 DUF2865 Protein of unknown function (DUF2865) Pollimgtom J, Finn RD anon Pfam-B_002953 (release 23.0) Family This bacterial family of proteins has no known function. 19.90 19.90 38.80 33.00 18.00 16.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.87 0.71 -4.22 28 141 2009-01-15 18:05:59 2008-07-31 15:47:56 3 1 83 0 72 148 4 114.00 35 34.84 CHANGED GuspshCVRoCDGhaFPlsttsssush......ptCpuhCPuucsclah....sssshctAsuhsG.csYuchPsAFtYRpphs...ssCoCpsts.s....uhu.hshpscsohct.GDllsopsuh ......shpshCVRoCDGtYFPluhssssuc..h.t....ptCpuhCPuucsplYh....s........upshctAsuhsG.csYssLPsAFtYRpphs...ssCoCpsts.s.....u.u.hs.hppcs.s.h.ct.GDllsstptt......................... 0 12 29 41 +10897 PF11065 DUF2866 Protein of unknown function (DUF2866) Pollington J, Finn RD anon Pfam-B_002950 (release 23.0) Family This bacterial family of proteins have no known function. 25.00 25.00 29.80 32.10 20.40 19.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.43 0.72 -4.05 9 112 2009-09-11 09:26:37 2008-07-31 15:52:26 3 1 56 0 28 74 3 63.30 54 73.21 CHANGED hslpuCRVSsPlppPWGtuhRlVEW.hctsuphpRRVVss-uTstElstsltpHV.GR+athsss ..slpuCRVS.shppPWGtshRlVEW.hchDuphtRRlVss-sTEAElstsltp.V.GR+Yhh...... 0 2 4 14 +10898 PF11066 DUF2867 Protein of unknown function (DUF2867) Pollington J, Finn RD anon Pfam-B_002931 (release 23.0) Family This bacterial family of proteins have no known function. 20.90 20.90 20.90 21.00 20.70 20.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.65 0.71 -4.48 57 1086 2009-09-11 16:38:42 2008-07-31 16:01:40 3 3 983 0 243 790 164 137.50 35 34.25 CHANGED ltshhssscahDsasltls....s..ss.ph.htchhs....psPsWlstLhtlRstlVp.h.GLcs.....................shtsusp.........luhFslhspssp......ElllutcDpH.Lshclslthpstpt........lphoThV+h+Nhh.GRhYhhsltPhHt.lIV.shL ......................................hhhs.................................................................h..s..h..LWpsRuhhD+hl..Gtch............t+G.Rsptp..........hLpsGDs......................lDsW+Vl.hl-Pc.................p.L.L.hG.h..c.....ss...t......L.....G....R..L..s..h...s..lc..-cG..chcp..............lclp.....Aaa....HP.+Gh..GhhYWhhhhPsHh.aIF+GM.............. 1 87 158 214 +10899 PF11067 DUF2868 Protein of unknown function (DUF2868) Pollington J, Finn RD anon Pfam-B_002930 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as putative membrane proteins. However, this cannot be confirmed. 26.50 26.50 26.60 27.60 26.30 26.40 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.05 0.70 -5.29 35 236 2009-01-15 18:05:59 2008-07-31 16:14:17 3 3 228 0 62 233 20 301.60 30 66.01 CHANGED phhhhht..p+htpsspss..hs.AL......hthhtptplt+WhlutlsHthWlshhhuuLhsLLlllsspcauFsWpTTL..LusssahpLspsLuh.ss...hlGhulPssphIpASchs.............sssssustttWusaLlusllsYGlLPRLlLhlhshhth+psttp..LslshPtYtpLppRhtssshphus.t-s-ssthspspsssshttts.....................................sslhsulElssp..............hs.shuttpsls-............sccthpplhtthsttsssplllsscsppsPDRGs.lshlspLsppssssphllLhsssu......................shssp+lppWpptlpphsls ...........................................................................tphh+stpss.phs.tL.......ht..hcp.hsRWhluthsHuLWLhsLlusLlslLhLlhsRpYsFsWEoTL..Lus..sshlplsphL.u.hlPu...hL..G..Fs...l..PDsphlhuoph...................ss.sstpuWushLlGsllsYGlLPRLLhhhhChhhh+puppt...LDLptPhYptLhcRhtsphtctss.p-ss..ssshspht.hsss................................................................................................................................................................................sthhlslEhc.p..............h.sthsptlhDts.....................hssRcpht..tL.ppLpph.P.....up..lll..us.csppsPDRGs.LthlscLuc..sAtust..lhhL.s.tu.....................ps..sp+ltpW+ptLpphth...................................................................................................................................................... 0 17 34 54 +10900 PF11068 YlqD DUF2869; YlqD protein Pollington J, Finn RD, Eberhardt R anon Pfam-B_002915 (release 23.0) Family The structure of a representative of this family has been solved (pdb:4dci) and found to form a tetrameric structure of prefoldin-like architecture with the beta-barrel core and helical coiled coil tentacles. This suggests that this family may act as molecular chaperones. 27.00 27.00 27.20 27.60 25.90 25.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.52 0.71 -4.11 29 228 2009-01-15 18:05:59 2008-07-31 16:23:28 3 3 221 8 66 190 120 129.00 34 95.09 CHANGED lplhRslsVKslVT.phKc-htpplptplspl-pplpQL-hpsp+hlp-hppps.........pQltplppQhspc+schhEp+ppllpQlpQlppLplspEVtpGplEuhhclplGDslhppM.pspIll+DGllpEIR ....hplhpsVsVKtllT-p.Kpclhpphppphppl-p-hpQLchptp+h.pc..p...................pQ.pplppphspchschhEp+cpl..QhpQlchLpLGsElpptplEshhcVplGDshpcph.sspIll+DGllhEIR............. 0 20 49 60 +10901 PF11069 DUF2870 Protein of unknown function (DUF2870) Pollington J, Finn RD anon Pfam-B_002904 (release 23.0) Family This is a eukaryotic family of proteins with unknown function. 21.50 21.50 21.50 21.80 21.40 21.40 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.13 0.72 -3.96 21 183 2009-01-15 18:05:59 2008-07-31 16:36:42 3 2 128 0 101 164 1 83.20 47 31.03 CHANGED cApLWaAGKpL.tcscpLs-Y.lG+NEKTKllVKlpt+spGAPuREPhlsp-pQ+phMthha++QEEhKcLEps--csalsSpWusspuL+pphpGhts.l ................ApLWWAuKEL.tcsKpLsDY.lG.+NEKTKIIlKlpp+GpGsPuREPhlsp-pp+thMhaha++Q............EEhKcLtps-.-cshhsu.WusspsL+pphpGht............................................... 0 32 46 78 +10902 PF11070 DUF2871 Protein of unknown function (DUF2871) Pollington J, Finn RD anon Pfam-B_002884 (release 23.0) Family This family of proteins has no known function. 26.10 26.10 26.50 26.70 25.30 26.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.59 0.71 -3.99 17 570 2009-01-15 18:05:59 2008-07-31 16:50:41 3 1 524 0 40 228 0 122.30 48 84.36 CHANGED MKKlh.suhhYhIlGLluGlFYREaTKhpsFs..GpTpLsllHTHhLVLGhlhFLIhLhL-K.FtLops..phFshFFllYNlGLllTlshhhh+GlhpVhGhsh....ssuluGlAGlGHIlloluhlhFhllLp+ul ....................M++Lh.uhhhYhIIGLhSGhFY.REhTKshsas..G.sTpLslVHTHoLlLGhhhFL...IlLsL-.K.lFpLoph..hh.FshFFhlYNlGlllTluhhss+GhhQVs..Gts.h...........spuhuGhAGlGHshhhsuLlhhhhLL+pu................ 0 19 28 37 +10903 PF11071 DUF2872 Protein of unknown function (DUF2872) Pollington J, Finn RD anon Pfam-B_002883 (release 23.0) Family This bacterial family of proteins has no known function. 23.20 23.20 23.90 41.70 21.20 23.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.70 0.71 -4.31 31 147 2009-09-10 21:27:12 2008-07-31 16:58:52 3 1 144 0 38 146 453 138.40 54 92.64 CHANGED YLSGEIHTDWR-cIhpGupt.sLslpFouPVTDHsuSDssG.sILGsEcspFW+D+KuAKlNuIRT+phI-cuDlVVVRFG-KYKQWNAAFDAGaAuALGKslIllHsc-htHPLKEVDAAAhAVscTP-QVVclLcYVhp .YLSGEIHTDWR-cItcuuct.hsLslsFsuPsTcH-uSDssG.sILGtpsssaW+D+puuclNuIR...........T+phlpcADlVVVRFG-.KYKQWNAAFDAGYAuALGKPlIll+sc-lpHPLKEVcAuA.AsscTscQlVclLpYVh.p......... 1 18 31 35 +10904 PF11072 DUF2859 Protein of unknown function (DUF2859) Gunasekaran P, Mistry J anon Pfam-B_001915 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 25.60 25.20 21.50 21.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.64 0.71 -4.73 34 268 2009-01-15 18:05:59 2008-07-31 17:10:28 3 1 208 0 65 232 4 133.20 41 84.96 CHANGED ltDhG.GtsshPaapuls.pssp............s.stss.sssspushLPVpSscLo...PGsVptRslp.......hPGh...pPlFLlGDDshSppWLpp+tspLcphpAlGLVVNVsohtsLppL+plA.PGLslhPsoG-DLAcRL...sLcHYPVLITsTG.lp ..........................ltDhG.utss.Phhpsls.p.p..............s..ssst..stushLPVposcLo...PGpVt.tR.slp.......hPGh...tPlFllGDDshSppWLpp+tstL+phpAlGLVVNVpohptLptLpphu.sGl.LhPssuD-LApRL...tLpHYPVLITssul....... 1 8 30 52 +10905 PF11073 NSs Rift valley fever virus non structural protein (NSs) like Gunasekaran P, Mistry J anon Pfam-B_001643 (release 23.0) Family This family contains several Phlebovirus non structural proteins which act as a major determinant of virulence by antagonising interferon beta gene expression [1]. 25.00 25.00 25.80 25.50 22.00 21.20 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.44 0.70 -5.11 15 202 2009-01-15 18:05:59 2008-07-31 17:12:38 3 1 47 0 0 155 0 225.20 55 91.26 CHANGED PsIspssuhschhsoVsYlsFsp.tspslSsapshEIPlcpaR.uhcpRspLScFhspuEhPtpWGsu...SpVpptssphFDstIpcLuchslcshpR.shPNlccALSWPLGaPohcFFphus.h-sappshppKsshuTpllRh.....Gs.upsLD-slVpsH++lLtEuppRGlsp-hhsGaDlhKEIAhlQhlRllsAlshDhss.....................................sssssslhshlhpp+tshpsp..thlGNppWhPl....s .........................................VSVEYI+GDGPPRIPYSMVGPCCVFLMHHRPSHEVRLRFSDFYNVGEFPYRVGLGDFASNVAPPPAKPFQRLIDLIGHMTLSDFTR..FPNLKEAISWPLGEPSLAFFDLSS.TRVHRNDDIRRDQIATLAMRS.....CKITNDLEDSFVGLHRMIVTEAILRGIDLCLLPGFDLMYEVAHVQCVRLLQAA.+EDISN.....................................AVVPNSALIALMEcSLh..................h...................... 0 0 0 0 +10906 PF11074 DUF2779 Domain of unknown function(DUF2779) Gunasekaran P, Mistry J anon Pfam-B_001581 (release 23.0) Domain This domain is conserved in bacteria. The function is not known. 23.10 23.10 23.10 23.60 22.80 23.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.99 0.71 -4.10 49 211 2012-10-03 01:22:09 2008-07-31 17:15:01 3 3 194 0 74 206 203 136.70 28 22.99 CHANGED halDFEThssAlPhacsspP.YpQlPFQaSlHl.ppss...tp...hpHh.paLs.t.s......hDP+pthlcpLhptlspp................................................................GsllsYNpu.FEpopLpE.huph.........................hs-htphlppIh..............pphlDLhchFppt..................aYcsphpGShS ...............hahDFEoh.p.ulPhacsspP.ap.QlsFQaSlcl.ppss...ts...h.pHh.talt.p.h......tDPRpt....hhppLhphlspp................................................................ushlsYNp.u.FE...ps..pLpc.lAph......................................................hschpptlppIh......................pphlDLhshFppt..................hhpsphpGshS........................ 0 34 61 70 +10907 PF11075 DUF2780 Protein of unknown function VcgC/VcgE (DUF2780) Gunasekaran P, Mistry J anon Pfam-B_001695 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.10 22.10 24.10 22.70 21.60 21.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.94 0.71 -4.19 32 234 2009-01-15 18:05:59 2008-07-31 17:15:52 3 1 220 0 62 185 18 154.60 35 91.16 CHANGED llhhhslhossupA.h.hsshstt.......................tt.htttsspssssspsssLlsslhupLsloppQAtGGhGuLLulApssLuss-aopLuptlPGh-sLhuus...............s..su.suhLsph...LG................sslpuhsslpsAFptLGl...ssshlstFssllhsY.LsppG...sushLhpuLu .......................................h.hhhhslsussuhA..as..tssts........................ss..ss.ss..tphsps.ttsssLlstlsoQLslospQAsGGsGuLLuhA....p....NpLsusphSpLsphIPGlssLtuss......................s..s................t....Lu................................sltshspVspAFssLGl...DsuMlppFsPllhpYLspQG....ASpsLLtSLu............. 0 13 26 45 +10908 PF11076 YbhQ Putative inner membrane protein YbhQ Mistry J, Coggill P anon PRODOM_PD083760 Family This family is conserved in Proteobacteria. The function is not known but most members are annotated as being inner membrane protein YbhQ. 25.00 25.00 26.40 26.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.86 0.71 -4.55 3 433 2010-01-12 15:19:43 2008-08-01 11:37:37 3 1 432 0 22 82 1 131.30 87 98.04 CHANGED MKWQQRVRVATGLSCWQIMLHLLVVALLVMGWMSGoLV+VGLGLCALYuVTVVhMLsFQRHHEuRWREVGDFLEELTTTWYFGAALIALWLLSRVLHNNLLLALAGLVILAGPAVVSLLAKDKKRastsFuSKHG ..MKWQQRVRVATGLSCWQIMLHLLVVALLVVGWMSKTLVHVGVGLCALYCVTVVMMLVFQRHPEQRWREVADVLEELTTTWYFGAALIVLWLLSRVLENNFLLAIAGLAILAGPAVVSLLAKDKKLHHLo..SKH.RV...... 0 1 1 12 +10909 PF11077 DUF2616 Protein of unknown function (DUF2616) Mistry J, Coggill P anon PRODOM_PD264505 Family This cysteine-rich family is expressed by the double-stranded Nucleopolyhedrovirus, a member of the Baculoviridae family of dsDNA viruses. The function is not known. 25.00 25.00 81.60 81.50 22.40 21.80 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.54 0.71 -4.77 14 34 2009-01-15 18:05:59 2008-08-01 11:38:50 3 1 33 0 0 29 0 179.10 33 94.21 CHANGED ME...LIKPFlKYS+hYRsss.ssst..+phlacpWhp-hpppphth.......pshphts.th..........................CpFChs......sppps.......thhCppChFPL..s.......s.cpEhthYsLLSVCYaEssspss.sp.............................+sVWRp................Rl+hsW.p.cpp.+........lYplhhs..................p.ClQCpp......scpsssp.hhpFshchFCppChFPLFpI ...MELIKsFlKYS+tYRsss.sppt...+phlacpWsp-ltspphth.......pphhhpshtt..........................CpaChs........sspps.......thhCcpChFPl..h.p.....s.cpEhthasLLSlCYaEps..sst.....spst...........................+hVWtp......Rl+hsW.st-hstp........hYplhts...............p.ClQCppt.....sppsstp.hhpFshphFC.pChFPLFsI..... 0 0 0 0 +10910 PF11078 Optomotor-blind Optomotor-blind protein N-terminal region Mistry J, Coggill P anon Pfam-B_595 (release 23.0) Family This family is conserved in Drosophila spp. Optomotor-blind is one of the essential toolkit proteins for coordinating development in diverse animal taxa, and in Drosophila it plays a key role in establishing the abdominal pigmentation pattern, in development of the central nervous system and leg and wing imaginal disc-formation of Drosophila melanogaster. This is the N-terminal region of the protein and does not include the T-box-containing transcription factor that plays a part in DNA-binding. 21.00 21.00 21.80 81.10 19.60 20.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.11 0.72 -3.63 3 125 2009-01-15 18:05:59 2008-08-01 11:41:54 3 4 24 0 11 132 0 86.40 95 25.50 CHANGED suPPsPPYFPAAALAALuGSsAGsHP.GLYPGsLlPKhPPH..huHPHP...HHPL.GuAYTTAEDVVLAAVAAHQHHPAM.RPLRALQ ....Q.QPPPPPY.FPAAALAALAGSPAG..PHH.PGLYSAAGGLRFPPH..PGHPHPHSHPHAHHPL..GSAYTTAEDVVLASAVAHQLHPAM.RPLRALQ 0 3 3 8 +10911 PF11079 YqhG Bacterial protein YqhG of unknown function Mistry J, Coggill P anon PRODOM_PD123329 Family This family of putative proteins is conserved in the Bacillaceae family of the Firmicutes. The function is not known. 25.00 25.00 41.30 29.50 21.40 22.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.56 0.70 -5.57 11 186 2009-01-15 18:05:59 2008-08-01 13:12:26 3 2 165 0 35 146 0 223.40 52 94.96 CHANGED MpQp-IHpFLpRaFpANsCsIlEcSPuahTVQLTlEMDKcLMNRPFYWHYLEKTGGlPNPMpLTLITcppcss-slcGEhIHFGSPRLaQIFpus+chGualRLYEphssssstpsPLcPWLGhNlKlSYQCDRKKDhllSlGLpLIsGpllEsFa-+LpphsLoscIPDYCFTlSPlIKPcSGlpRlcphlcshhcs-sc-WAcpAhcRWpcDLcLL-pFYEcsEEKPEsYclEKpALcc.YEP+IplsllNGGLFYLp .................M..tpl.pah.pahts.ts.hhptt.thhpVQLoh-MDK.LMNRPFYWHYlEKTGGsPNPM+LTLITssEs.tps.-....GEhIHaGSPRLHQIFposKchGuaIRLY...Ecl.....c.......ps...u........us...c...sPLc..PWLGlNlKlSYQCDRKKDhLhSlGlHLISGThhtsFH-pLpplcLTP+IPDaCFTlSPlIKPpSGlpRlEshLcshlup-DHsWAcEA+hRWpcDLsLLs+FYE.....-.........s.........-Eh.....P.........Es....YclEKpA.Lpc...QYEP+IslpIINGGLFYl.p....................................... 0 10 23 25 +10912 PF11080 DUF2622 Protein of unknown function (DUF2622) Mistry J, Coggill P anon PRODOM_PD066031 Family This family is conserved in the Enterobacteriaceae family. Several members are named as YdiZ, a putative cytoplasmic protein. The function is not known. 20.80 20.80 21.10 20.80 19.70 19.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.00 0.72 -4.13 7 726 2009-01-15 18:05:59 2008-08-01 13:13:26 3 1 473 0 30 125 0 88.60 52 98.26 CHANGED Musu-lTRYVlTVpaHE-oLTEINELsNHLTRsGFlLThsD--GslHELGTNTFGLlSu.St-ElptLsuGLupuALsKcs-IsVsTaE-WtKs.p ......ups-lspYVlohhhpEssLTElNELsNhLTRsGFhLThsD--Gs.HELGTNTFGllSop.ut-EI+-LlsuLspsAss+Ds-loIsTa--a.pt..p.................. 0 2 3 17 +10913 PF11081 DUF2890 Protein of unknown function (DUF2890) Mistry J, Coggill P anon Pfam-B_629 (release 23.0) Family This family is conserved in dsDNA adenoviruses of vertebrates. The function is not known. 23.30 23.30 23.60 24.10 23.00 23.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.72 0.71 -4.23 18 154 2009-01-15 18:05:59 2008-08-01 13:32:05 3 4 79 0 1 180 0 167.20 35 88.93 CHANGED MsP+tps.KpLtsp.sPsc.........D.EEpWD..SQA.......tEEthE-W............DSL-E-.pEtE.EVEEtsssp.....phssoSsu.utSpsstSsPspsst...h+sp...pRWDpTuphssPossusss...................................................thstpc.suhRph+NpIhssL....QpSpGp.............tShTRp.hLYH+u.s..p+sLc.hctLaspYCuhs ....................................................................................................hp.s...p.............-.EEpW-..SQA.......pE-phE-.............tS.t--tE.E.hE..El-Etpssp..............ss..ss...t..s..sssspssh.osPstsst.......+ss...pRWD...pstt..ssst...t..ssttt...........................................................thp.tpshpShRth+stIhssL.....QpstGp................hShTRp.hLaH+u.s..ppsLc.hctLas.aht.................................................................... 0 0 1 1 +10914 PF11082 DUF2880 Protein of unknown function (DUF2880) Pollington J, Finn RD anon Pfam-B_001492 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.00 93.20 20.10 19.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.71 0.72 -4.09 3 5 2009-01-15 18:05:59 2008-08-01 13:35:43 3 1 5 0 5 5 0 78.80 60 65.78 CHANGED lMAAulhhASuAsAAsslEhP+PRGKDEAPEAPVACMKAVKAALPNPDpFKWVuGTsRKVAEDAYSVVADVEYLupDGA ...lMALSlhsASuAsAAussEAP+PRGKDEAPEAPVACMKAVKAALPNPASFKWVGGTsRKVAEDAYSVVADVEYLAQDGA 0 0 1 3 +10915 PF11083 Streptin-Immun Lantibiotic streptin immunity protein Coggill P anon Pfam-B_43518 (release 22.0) Domain Streptococcal species produce a lantibiotic, streptin, in a similar manner to the production of nisin and subtilin by other lactic acid bacteria, in order to compete against competing bacteria within the environment. The immunity protein protects the bacterium from destruction by its own lantibiotic. In general, there is little homology between the immunity proteins of different genera of bacteria. 20.70 20.70 20.90 23.50 20.60 20.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.12 0.72 -4.01 12 173 2009-01-15 18:05:59 2008-08-01 13:38:42 3 2 152 0 7 87 3 96.80 46 22.38 CHANGED IAplDl+LsphpEKIATLNKMAEVLlNLpSc-.po++LA+Y-FSKLNLTEoloLEpVpcEIphLQppLshhlDcYEphlR+L-pFVclLN..hsctht.ca .............IAElDhcLppsQEKIATLNKMAEVLINLKS-sppo+KLA+Y-FuKhNhTESIpL-plscEIhcLQpELup.lscYEclsR+L-pFlKllN..hsK................... 1 1 3 4 +10916 PF11084 DUF2621 Protein of unknown function (DUF2621) Mistrey J, Coggill P anon PRODOM_PD086666 Family This family is conserved in the Bacillaceae family. Several members are named as YneK. The function is not known. 25.00 25.00 25.50 25.50 24.70 24.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.83 0.71 -4.50 12 164 2009-01-15 18:05:59 2008-08-01 13:46:17 3 1 163 0 32 112 0 136.90 71 96.97 CHANGED M....LpG..WFhhFILhWsllLlsLhuIGGFFMFRKFLKRLPKEDGKS.LDWp-aYI-pT+HLWs-EpKpLLpELVpPVPELFRDVAKpKIAGKIGELALcEcAspIsp-LlIRGYIlATPKRDHKFLhK+LpE+pIDhuPYEpLh .......LpGWF.WFIlhWsVlLlGLMSIGGYFMFRKFLKRLPKEDGhShLDWpEaYIsKTRHLWsDEpKQLLEELVSPVPELFRDVAKuKIAGKIGELALpEpAopITpDLII+GYIlATPKRDHKFLlKKLpEKcIDaosYpsLL........ 0 11 23 26 +10917 PF11085 YqhR Conserved membrane protein YqhR Mistry J, Coggill P anon PRODOM_PD101610 Family This family is conserved in the Bacillaceae family of the Firmicutes. The function is not known. 26.00 26.00 38.90 37.50 25.80 25.80 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.25 0.71 -4.73 12 156 2009-01-15 18:05:59 2008-08-01 13:48:14 3 1 155 \N 28 109 0 166.00 51 98.23 CHANGED M......sppppphcpppppp.hohhs+slhhGFsGGVhWShluYlsahFsFoEluP.NhlLpPaslG-WKcshlGshluIlhIGllSIusAFLYashL+KlcuhWsGllYGlhLWhlVFalhNPlFPsl+slp-LshsTllTTlClYILYGlFlGYSISaEhNEhp..ppp.t....tppp .......................pphts.....................pphl.pIGhFGGlFWGuIhYhhalFsFTEsuP.NalLhPFAhGuWK-GshGNllGIVshGLLSIllAFLYpAhLtKFcGlhPGllYGLhWWuLLFauhG.lhPslKosh+Ls+-TIVTTICIFILYGVFIuYSlSatsNsp+tcpEt.t+s.....c.......... 0 6 18 20 +10918 PF11086 DUF2878 Protein of unknown function (DUF2878) Pollington J, Finn RD anon Pfam-B_001539 (release 23.0) Family This bacterial family of proteins has no known function. Some members annotate the proteins as the permease component of a Mn2+/Zn2+ transport system however this cannot be confirmed. 20.90 20.90 23.00 23.00 19.10 18.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.18 0.71 -4.22 59 319 2009-01-15 18:05:59 2008-08-01 13:54:18 3 1 313 0 84 276 229 151.30 30 86.58 CHANGED hthllNhlhFQhsWhhsVlhs.sphhhhhhhh.....lhhHhhh......s..phts-hphllhlsslGhhlDulhhthGlhpF.............ssshhPl..WLhhLWhhFuhslspuL.saltphsh.lhsllGuluGshSY.hAGt+luA.VphshshhhohhlLulhWullhPll ....h.hhlhsslhFplhWhhuVlut..p...p.hhhlhsll.........llhphhh......s..pttsphphhlhhsllGlhlDohhhhhGlhsF.............ssshhPl....WLlsLWhsFuhhhs+.L.shlpphsh.lhslhGulhGslSY.asGh+.huA..VphshsshhshlsLsltWsslhsl........... 0 18 34 64 +10919 PF11087 DUF2881 Protein of unknown function (DUF2881) Pollington J, Finn RD anon PRODOM Family This viral family of proteins has no known function. Some members are annotated as p34 however this cannot be confirmed. 25.00 25.00 115.60 115.30 24.60 24.60 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.65 0.72 -4.49 2 14 2009-01-15 18:05:59 2008-08-01 14:59:55 3 1 6 0 0 2 0 54.00 75 88.52 CHANGED Ms-FstsllTllTAIIGVAIlAVlVSppSNTAGVIpuuouGFSshLtoALuPlh Ms-FstsllTllTAIIGVAIlAVlVSppSNTAGVIpuuouGFSshLtoALuPlh 0 0 0 0 +10920 PF11088 RL11D Glycoprotein encoding membrane proteins RL5A and RL6 Pollington J, Finn RD anon PRODOM Family RL5A and RL6 are part of the RL11 family which are predicted to encode membrane glycoproteins. Two adjacent open reading frames potentially encode a domain that is the hallmark of proteins encoded by the RL11 family. 25.00 25.00 25.20 33.50 24.50 24.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.38 0.72 -4.18 4 33 2009-01-15 18:05:59 2008-08-01 15:20:20 3 1 5 0 0 28 0 87.50 44 92.09 CHANGED hpKLpsopGcNlTIscccD.hoTcWcph..ssGst.LCNVTupGssllNs..TlCVSSCoHTSLsLCNhTpts-ulaslG+hhs...DE.sGELWhloVS .................hp+LpsopGcNlTlscc+c.hootWcpa......D.sG..st.LCNVTupsssslNo..ohCVosCuHooLsLCNhTpts-uhaslu+hhs...DE.sG-lWhlpVp............ 0 0 0 0 +10921 PF11089 SyrA Exopolysaccharide production repressor Pollington J, Finn RD anon PRODOM Family SyrA is a small protein located in the cytoplasmic membrane that lacks an apparent DNA binding domain. SyrA mediates the transcriptional up-regulation of exo genes involved in the biosynthesis of the symbiotic exopolysaccharide succinoglycan. It does this through a mechanism which requires a two component system [1]. 25.00 25.00 25.50 25.50 24.70 24.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.81 0.72 -4.15 6 37 2009-01-15 18:05:59 2008-08-01 16:07:23 3 1 26 0 17 35 0 37.80 45 41.27 CHANGED AlAsYFsotShhsAhVsTLsCulLLQluYFluVLFLla .AlAoYahstShhsshlpTLhCuVLlQlGYFhuVLhLVh.... 0 1 6 9 +10922 PF11090 DUF2833 Protein of unknown function (DUF2833) Pollington J, Finn RD anon PRODOM Family This family of proteins with unknown function are found in the bacteriophage T7. Some of the members of this family are annotated as gene 13 protein. 25.00 25.00 26.70 26.30 24.60 24.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.78 0.72 -3.82 5 41 2009-09-11 15:53:06 2008-08-01 16:15:47 3 1 38 0 0 34 35 84.80 42 57.97 CHANGED ssVshslsGhVLAIGGNpGDpVWFVTSchVa+LocKpKREFRKLIhEYRDtML-Q.YsoIWNYVWVGNKSHIRFLKoIGAVFHcEaT ................lsh.hhGhslAIGGss....u.spsWFlTSspV...h....phstpt+hcFR+hlhcahDphLcp..Ysp.LWNaVWsGNpuHIRFLKolGAh..Fcp-........ 0 0 0 0 +10923 PF11091 T4_tail_cap Tail-tube assembly protein Pollington J, Finn RD anon PRODOM Family This tail tube protein is also referred to as Gp48. It is required for the assembly and length regulation of the tail tube of bacteriophage T4 [1]. 20.20 20.20 20.30 20.50 19.80 19.90 hmmbuild -o /dev/null HMM SEED 348 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.01 0.70 -5.74 8 48 2009-01-15 18:05:59 2008-08-01 16:25:33 3 1 47 0 0 41 153 313.60 32 90.19 CHANGED +VK-.Is.csschhtuh.uupssAGtsocsc..ssophhsAQFPspRAuGNDsst.a.lsDLYKNGLLFTAYshouRsos....sLRshRp.....ssssIhSptsusVpsphstho...........tshhsppAlANILLPRSpSDVDssSH+FND.....ls-SLls+GGuouoGsLSs.......hASTAlaGuLESITpGhhAD.........suEQIYssoRoMYuGu-sRTKsasWpLTPRShpDLhpIlpIYchFshaSYGpoutSphAtElKuplDshY+sThhc.hss.sshpNpT........LhEtIT.uFLoNVhVVSNPslWhI+NFGsooua.....-shp-sFGPsQIpSlRFDKTPDGpFNGLAluPNLPSoFsLElTFREIIsLsRuo ........................................................................................h...hhs.htt..stt.suG..o.tt....pthhsAQaPstRsuusDss..h.hssLYpNGLLFoAashsup.os....phRs.Rp.....ttp..phhp.tttssl..th.t.s..............hspps.l...ssILhPRupoDs-ssSH+FND.....Vt-SLls+Guuouo.GhLSN.................hAST...AlaGulE.......Slo.......p.GhhAD........................pGEQlhssu+uMYsGs-sRTKsFoachoPRshpDLhpIlpIYchFp.....hhSYGpsGpSp..hAt-..l+s.lDshY+sThhp.h.....s.s.tsps...............hhEthT.uhloNV..hVlosP...slW...hl+sFu...pssph.........-shp-hF...GPstIpSIRhsKoP-GpFsuLA...huPNhPS...o...hsLElThpEllsLsRu....................................... 1 0 0 0 +10924 PF11092 Alveol-reg_P311 Neuronal protein 3.1 (p311) Pollington J, Finn RD anon PRODOM Family P311 has several PEST-like motifs and is found in neuron and muscle cells. P311 could have some function in myo-fibroblast transformation and prevention of fibrosis [1]. It has also been identified as a potential regulator of alveolar generation [2]. 25.00 25.00 56.20 56.10 20.10 16.20 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.27 0.72 -4.01 4 38 2009-01-15 18:05:59 2008-08-01 16:31:11 3 1 24 0 16 33 0 62.60 68 93.40 CHANGED MVYYPELhVWVSQEPFPsK-MEGtLsKGRLPVPKEVNRKKpsEstAASLsPlGusEh+SPtIuYLHsF .MVYYPELhVWVSQEPFPNK-MEGRLPKGRLPVPKEVNRKKssETsAASLTPl.GSs..EL+SPpISYLH.F...... 0 1 1 3 +10925 PF11093 Mitochondr_Som1 Mitochondrial export protein Som1 Pollington J, Finn RD anon PRODOM Family Som1 is a component of the mitochondrial protein export system. The various Som1 proteins exhibit a highly conserved region and a pattern of cysteine residues [1]. Stabilisation of Som1 occurs through an interaction between Som1 and Imp1, a peptidase required for proteolytic processing of certain proteins during their transport across the mitochondrial membrane [2]. This suggests that Som1 represents a third subunit of the Imp1 peptidase complex [2] 21.40 21.40 22.70 22.10 21.30 20.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.04 0.72 -4.18 10 77 2009-01-15 18:05:59 2008-08-01 16:33:09 3 1 76 0 57 72 0 83.00 31 76.66 CHANGED MAPPTPVhotcElppphssthpsstch......cCpLKSLTQaECsF+so.tts....sEhICLPFKRLFpcClhP.............c+hlNIElTDppTN .............................MuPP...sslhstppl.tphtp..pt.tph..............pCp.LhplsQapCsh..css.......t.......stll.ChPhpRLFc+C...........................ts.thslEsTshpt.................. 0 9 30 48 +10926 PF11094 UL11 Membrane-associated tegument protein Pollington J, Finn RD anon PRODOM Family The UL11 gene product of herpes simplex virus is a membrane-associated tegument protein that is incorporated into the HSV virion and functions in viral envelopment [1]. UL11 is acylated which is crucial for lipid raft association [1]. 20.30 20.30 20.50 43.60 19.80 19.20 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.80 0.72 -4.14 11 49 2009-01-15 18:05:59 2008-08-01 16:39:40 3 1 32 0 0 40 0 39.80 52 42.96 CHANGED MGQusStutss....CCR..pNhLlTcsGEsluLsA-sF-sF-L....- MG.uhSsupss....CCR..pNhLlTcsGEVVoLsAcsF-shDlE........... 0 0 0 0 +10927 PF11095 Gemin7 Gem-associated protein 7 (Gemin7) Pollington J, Finn RD anon PRODOM Family Gemin7 is a novel component of the survival of motor neuron complex which functions in the assembly of spliceosomal small nuclear ribonucleoproteins. Gemin7 interacts with several Sm proteins of spliceosomal small nuclear ribonucleoproteins, especially SmE [1]. 21.50 21.50 23.30 30.60 20.90 19.20 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.50 0.72 -4.24 7 68 2009-01-15 18:05:59 2008-08-01 16:52:05 3 2 59 2 45 74 0 78.10 46 57.14 CHANGED popEQ+tRusLRERaL+SL.sMss+sssFThHEtsp..VsApFtAoDlsltNFhVSpLpTPIGlpsEAlLRsoDllSaTFcs ............pEQcARuhLRERaLRo...LluMs...G+p.VsFs..LHEslc..VsA.cFsAoDlDltNFhVSpLpTPlGV.Q.sEALLRssDIIuaoFc...... 0 15 19 34 +10929 PF11097 DUF2883 Protein of unknown function (DUF2883) Pollington J, Finn RD anon PRODOM Family This family of proteins have no known function but appear to be restricted to phage. 25.00 25.00 30.40 30.30 19.00 16.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.67 0.72 -3.86 3 35 2009-09-11 15:42:54 2008-08-01 17:04:17 3 1 34 0 0 22 0 73.20 86 100.00 CHANGED MLNNNVVYLGYPGLPPNKLEGLMLELRTVARCSGLEFRFQDTPRRGKNYTQMHILKQRSKTGAFVMHYKPRKEKF MLNNNVVYLGYPGLPPNKLEGLMLELRTVGPSSGLEFRFQDTPpRGKNYTQMHILKQRFKTRAFVMHYKPRKEKF 0 0 0 0 +10930 PF11098 Chlorosome_CsmC Chlorosome envelope protein C Pollington J, Finn RD anon PRODOM Family Chlorosomes are light-harvesting antennae found in green bacteria. CsmC is one of the proteins that exists in the chlorosome envelope. CsmC has been shown to exist as a homomultimer with CsmD in the chlorosome envelope [1]. CsmC is thought to be important in chlorosome elongation and shape [1]. 26.30 26.30 26.70 126.80 26.10 26.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.47 0.71 -4.47 5 13 2009-01-15 18:05:59 2008-08-04 09:12:01 3 1 13 0 11 12 0 138.80 73 99.18 CHANGED MSESYQKLRKDFKDLEFTDRLTFLAEGsLLTGQSAVVGGLELAGSVVETVAGTVGSLlDATGIG+LLGsTGGVVGETIDRVAITVKDVSRSAGELYSDAVKNVENVTDNAA+AIGDAGVSASEAVKNlsGSFQKosGKK MSESYQKLRKDFKEL-FTDRLTFLAESlLLTGQSAVVGGLELAGSVVETVuGTVGSLlDAoGIGslLGsTGGVVGETIDRVAITVKDVSRSAG-LYsDAV+NVENVTsNAAKAlGDAGVSASEAVKNlAGSFQKssuKK.. 0 1 2 8 +10931 PF11099 M11L Apoptosis regulator M11L like Pollington J, Finn RD anon PRODOM Family Apoptosis regulators function to modulate the apoptotic cascades and thereby favour productive viral replication. M11L inhibits mitochondrial-dependant apoptosis by mimicking and competing with host proteins for the binding and blocking of Bak and Bax, two executioner proteins [1]. 25.30 25.30 25.40 42.40 24.70 25.20 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.96 0.71 -4.51 9 129 2009-01-15 18:05:59 2008-08-04 13:01:09 3 1 37 6 0 96 0 160.50 52 77.66 CHANGED +sllh........................hYlsctslsc....LsshEpslLshIppsC-hIppsYppshs.lss.hlchsshSh.sIpcIKsplhpsLhsDspPSVKLAolSLlShIhc+hh..scslhh.shlhs-IhstIotptcplIsFIpcppc.sss...h...cphlplhshhshh.hlsYhhlKahh .p..hVY........................YYhsKpRLD-hYRpLsopoRSalDlIshhCDKlNNDYs+DhNlMYD...hASscSa.slhDIsNEV.soILhsspulGVRLATISFIopluKRshNslcTI+MhoLLScsIsD......-pFlDYIschs..ssss..s.hpT.Rchl+lhGlssIh.FsTYtsLKYh....................... 0 0 0 0 +10932 PF11100 TrbE Conjugal transfer protein TrbE Pollington J, Finn RD anon PRODOM Family TrbE is essential for conjugation and phage adsorption. It contains four common motifs and one conserved domain [1]. 25.00 25.00 74.40 74.20 22.10 19.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.32 0.72 -4.12 4 124 2009-01-15 18:05:59 2008-08-04 13:04:03 3 2 103 0 2 68 0 65.30 77 78.99 CHANGED +llsFLlRLolTlIVISPslYWSWDsVKsTTA-DhlhAsllIhhsGlhhhlLYhFhslLTKlhptD ..RFIDFLIRLLITAIVISPVIIWSWDTVKETTADshLAAAFVILYSGVL.LFILYFCFSALTDLQKs.... 0 0 0 1 +10933 PF11101 DUF2884 Protein of unknown function (DUF2884) Pollington J, Finn RD anon Pfam-B_001481 (release 23.0) Family Some members in this bacterial family of proteins are annotated as YggN which currently has no known function. 30.60 30.60 31.30 32.70 29.20 29.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.31 0.70 -4.98 39 793 2009-01-15 18:05:59 2008-08-04 13:22:51 3 1 717 0 94 390 15 209.20 48 88.49 CHANGED CsVslpt-lhlsspplplhpsss.......pphhIspsspLalsGcplsLsscQpptlppYppslpptlPplhplAp-ulplApsAlsplhsshhu..spshsplpphhsplptplpphhhppssphhhsspthst....hpp.appchEpthcphlppShGolhhslusph.....tpGs....phsshtpphssltppl-pphcppupsl-t+AcplCsplpsLsp.EppLpttlPpLtshplhp ........CSVoP+DDVIlSPQoVQV+GcNG........NLVIo.PDGNVhhNGKphoLsAAQRcQA+DYQucLRSsLPWID-GA+sRVEKARlALDKlIspphG...psSphRuRLTcL-AQLKpQMNRIIEpRoDGLTFHacAIDQ.............VcA....-GpQLVsQuMGGILQDSlNEMGsK..tshcuGG......NPLQ.slhGSLGGLQsu.IQsEWKpQE+DFQQFG+-VCuRVsoLE-sRKAL..............sst............ 0 12 31 60 +10934 PF11102 Cap_synth_GfcB DUF2886; Cap_synth_GfcC; Group 4 capsule polysaccharide formation lipoprotein gfcB Pollington J, Finn RD, Eberhardt R anon Pfam-B_1366 (release 23.0) Family This family includes lipoprotein GfcB (YmcC), involved in group 4 capsule polysaccharide formation [1]. 20.20 20.20 20.20 21.10 20.00 19.90 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.13 0.71 -4.76 39 859 2009-09-11 10:59:46 2008-08-04 13:41:14 3 1 602 2 69 362 161 193.80 52 91.11 CHANGED LsuCopp.........hpshssolphuh.............hsstssslospplps.sshsshhhplsstspshhlLshs-p...............hpWhosDpthlshcsGhllpTpuLss..sLhus..............pssshhthhpt.....ssstshphphphsst........chshh.hpsphph.upcsltlstt.shpshchpEpsphs........stp....apNpYWl-sssGp....llpScQhluPshshlphphL ...............................LpuCoto.............ppplssolhsSL................FGssslploDp.p.IQs.hPYAS.Yhp.LNsGsplFVVLAasEs...................sQpKWlo.QDpAhLVTppGRLVKTl.hhss..NLlEVs...........N.usDPLhpshpI.......hDGusWTRshsWoEt.......pph..RhATspSsFp.asGs-Tlpluuc...cshspVhsEEVoos............ptpWpNpYWlD.S.pGQ....lRQScQhLGAshhPVchTh..................................................................................... 0 8 27 49 +10935 PF11103 DUF2887 Protein of unknown function (DUF2887) Pollington J, Finn RD, Bateman A anon Pfam-B_1330 (release 23.0) Domain This bacterial family of proteins has no known function. These proteins may be distantly related to the PD(D/E)XK superfamily. 22.40 22.40 22.70 22.60 22.10 22.30 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.13 0.71 -4.71 29 232 2012-10-11 20:44:46 2008-08-04 14:02:13 3 5 44 0 85 297 45 160.80 36 68.55 CHANGED TDslFY+LFpp.PshlFELls.pssspAcsYcFsSVElKppsFRlDGVFlPhtst.stPlaFsEVQFQpDpphYpRlFAElaLYltppp.p.scWpuVlI....YssRsl-.sshhsacplLsuspVpRlYLsELsshpp.slsluLltLh.lhspppssppA+hLlppsp.....pphhsttppppll-lIpTIllYKFsplS+cElcAML .........................TDplFYplFtp.PphhF-L..ls..sstpsp.YpFsShplKphtFRlDGlFhP.tp...s.PlahsEsQhQ.DtthY.RhFsElalYltp.p...psWpslll....ass+ph-......t.......apthlp....tp....l......p.RlYL....s....-.....Ltp.....t.s.lslu.h.l.pLl..h.sptps.ppuptLlppsp.....pp........ppllpLIEoIllYKhPphoccElptMh...................................... 0 5 66 85 +10936 PF11104 PilM_2 Competence_A; Type IV pilus assembly protein PilM; Pollington J, Finn RD, Eberhardt R anon PRODOM Family The type IV pilus assembly protein PilM is required for competency and pilus biogenesis [1-2]. It binds to PilN and ATP [3]. 66.50 66.50 66.50 67.00 66.40 65.90 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.87 0.70 -5.56 37 952 2012-10-02 23:34:14 2008-08-04 14:34:40 3 4 881 1 318 846 348 315.90 25 86.32 CHANGED GlDIoSouVKllELoc.ps..spa+lEsaAhtslPcsuls-tsIt-h-uVucsl+plhcctssps+psAhAVsuSuVITKhI.hsusLs-pELEsQl.chEAspaIPasL-EVslDFpllG.s...tsssscVcV..LLuAsR+EsV-sRlssl.....-tAGLpscVlDVEuaAlpR.Ahphlhpp........Lsssspsp.....sVA.............llDIGAshTslsVlpsGphlYsR-QsFGGpQLTppIt+pYGhohEEApttKppGsLPc.............sYp.-lLpPFhpslsQplsRuLQFFhoooths.pVDtllLuGGsAsl.GLschlppclGhsThlsNPFtsMplusc.lptptLpp-usuhhlAsGLALRuF .............................................GlDIuspulKhlpLp......t....pt.....sp....hplppauhhslPts....sl.s....-s.p.....lh-h.pt...l........sptlpchhpph...s.h.p..s+.pss.h..ul..ss.s.s.V.Is+.hlp.hss.sh....s-.c-.lEt.t.l.phEs..sphl..P..a..s.l-E..lsl.Daph.ls.p..................s..ss..s...p..h.....pV..LlsAs++-.l-shhssh.....ptA..GLpshll..D.V..csaAlt.p..sh.p.....h....h..tp.......................hs.t.t.....t..tt.....hls...................llDlGu.shoslslhpsup.hl.a.pR....ph..s.hG.u.ppls.ptlt......pt..h.....s..l.s.h...ppA...........t.......h.h...t.....p.....t....s....l.....sp.....................................s..h..........p....s....lp.....h...hp.p...lspplpRsl...paa..h...s...s...s...t..tp...p................lsp..lhLsG.G.sutl...GLsphl.ppp.hshss.hhs.Ph..t....h.th.s.p...p..p.....ht.ptst..hhhuhGLAhRt..................................................................................................... 0 121 226 283 +10937 PF11105 CCAP Arthropod cardioacceleratory peptide 2a Pollington J, Finn RD anon PRODOM Family CCAP exerts a reversible and dose-dependant cardio-stimulatory effect on the semi-isolated heart of experimental beetles. CCAP also increases free hemolymph sugar concentration in young larvae and adults of the meal-worm beetle [1]. 20.20 20.20 21.20 35.90 19.70 19.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.81 0.71 -4.43 4 47 2009-01-15 18:05:59 2008-08-04 14:41:47 3 2 33 2 22 50 0 102.10 32 82.57 CHANGED hpsohshLLhLlshlhC....l-CuhssppPRsacthssEs...ss......pKRPFCNAFTGCG+KRSpsssss.......PsshhpRp.........-.lppc..sNE...EuLusLlDLNoEPAVE-L.RQIMSEAKLWEAIQEAs+EIahQKptpK..p ..........................................h.hhhhhh.....h.ss.s...h..p..p..p...tp..............pKRPFCNAFTGCG+KRop.......................................p....tshtsh..hps-.t.-pl.+QhhopsKlaEsIpEAphEl..pppt................................................... 0 10 12 19 +10938 PF11106 YjbE Exopolysaccharide production protein YjbE Pollington J, Finn RD anon PRODOM Family YjbE is part of a four gene operon which is involved in exopolysaccharide production. The expression of YjbE is higher than the rest of the operon yjbEFGH. It appears to be restricted to Enterobacteriaceae [1]. 25.00 25.00 26.40 26.00 21.40 21.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -9.89 0.72 -4.01 2 379 2009-01-15 18:05:59 2008-08-04 14:48:45 3 1 368 0 17 71 1 79.80 92 99.09 CHANGED MKKlL.GlFAIsALuAsSspAAPVpVGEAAGSAATSVSsGSSSATusSTVuSsVGVALAATGGGDGSNTGTTTTTTTSTt ......................MKKVL.YGIFAISALAATS.AWA.....APVQVGE...AAGSAATSVSAGSSSATSVSTVSSAVGVALAAT.G..GG....DGSNTGTTTTTTTSTQ...... 0 1 1 13 +10939 PF11107 FANCF Fanconi anemia group F protein (FANCF) Pollington J, Finn RD anon PRODOM Family FANCF regulates its own expression by methylation at both mRNA and protein levels. Methylation-induced inactivation of FANCF has an important role on the occurrence of ovarian cancers by disrupting the FA-BRCA pathway [1]. 21.10 21.10 22.50 22.50 19.70 20.40 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -12.16 0.70 -5.19 7 69 2009-01-15 18:05:59 2008-08-04 14:58:34 3 3 50 1 39 61 0 285.20 27 85.35 CHANGED M-slLcplctFl-lLslu....postVtsWD.tsl+RAhpWAtYhcpla++hpspsslcpuLppcLpst.pp.uss..hPs.hp.hsFpsLupscpllhhpLLpN.sltstph.hll.pL.......s.pt-t-sLpsshsphssp+ushphL......................phsutppssphppsohhpsp.................uclLhchLpchhp..sps.cpstphLspLh..cth.pssahpllAssLLpssss.pp.p.............spchLlpWLht...psshhssFCp.l.utLLsploh+hsphpshYhshLpphupph..........................ph-l.cuhhlusEsp.hsa-tLhp+hpuL..hpus.sl..pptshssLcshptQD....Gs.pl.GhSlWsDlhLtLt .................................MEtlLpplctFs-lLslu....postVpsW-stslcRAlpWApYhcclac+htpp.stlR.pulpccL..cst.tp..sss.....hs...t.h......shpsLupsc.ll.hpLLpN.sL.....tstththll..ph..........s.p.p.ptl..tphsphhppc.s.phL.....................................ps.htsp.................uplLhppLpphhp.....hps.p.stphL..splh...pth.ps..s..hhp.hAssLL.........s.....p.........................pp.llpWLht.................thhtshCh...shlLs.hs.+hstht.hhhshhpphhphh...........................h-l..cthhlss...tsp....appL.hp+h.sL..hpus.sl..ppts.ptLpthptpD.......ps......pDhh.th............................................................................................................................... 0 7 13 24 +10940 PF11108 Phage_glycop_gL Viral glycoprotein L Pollington J, Finn RD anon PRODOM Family GL forms a complex with gH, a glycoprotein known to be essential for entry of HSV-1 into cells and virus-induced cell fusion [1]. It is a hetero-oligomer of gH and gL which is incorporated into virions and transported to the cell surface which acts during entry of virus into cells [1] 25.00 25.00 42.00 41.60 20.70 20.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.70 0.72 -4.09 9 36 2009-01-15 18:05:59 2008-08-04 15:34:02 3 1 25 16 0 30 0 109.10 32 71.22 CHANGED PCCpl.sLstsp.lPulasIssIalssspo.CsGhslApL...+ptsspsTh...phCuNGFNlhSFhlulLp+ls.ss.tEphcLLstLpp.hsuF.lsshpsssssu.thp....uhpG .PCC+I..psh..sspp.hP..thasIssIaLssspp.CsGhslApL...+ppssphsh...phCsNGFsLhuFhlullp+h.s.ss.t--lcLLstLpp.hssF.hpsFpssssNuSth.....hss..... 0 0 0 0 +10941 PF11109 RFamide_26RFa Orexigenic neuropeptide Qrfp/P518 Pollington J, Finn RD anon PRODOM Family Qrfp/P518 has a direct role in maintaining bone mineral density [1]. Qrfp has also found to be important in energy homeostasis by regulating appetite and energy expenditure in mice [2]. The c-terminal 28 residues are the functional 26RFa [3]. 25.00 25.00 39.20 39.20 23.70 21.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.78 0.71 -4.26 3 30 2009-01-15 18:05:59 2008-08-04 15:46:31 3 1 27 0 16 31 0 123.40 60 97.11 CHANGED hRPYsLlYhLFLPLGACFPLLDRREPTDAlGGlGAcMsWADLAcG.RPas.WGSPsWlRAPQPQALLVlARELQASGREHAGhpFRLGRQD-GSEATGFLPA-uEKsSGPLGTLAEELNGYSRKKGGFSFRFGR ...........tsasLshL.LhLPLGsCFPLLDR+pPsDshGshGu.thsWAcLAtG.+..P.as.W..Gus.pWhRAspPpALLVlA+tLQsSGRE+AGhpFRFGRQD-GSEAsGFLPAs.uEKsSG.PLGsLAEELNGYSRKKGGFSFRFGR 0 1 1 4 +10942 PF11110 Phage_hub_GP28 Baseplate hub distal subunit Pollington J, Finn RD anon PRODOM Family These baseplate proteins are also referred to as Gp28. Gp28 is the structural component of the central part of the bacteriophage T4 baseplate, which possesses a hydrophobic region and is membrane bound [1]. Gp28 forms a complex with gp27 which is another structural component of the baseplate [1]. 25.00 25.00 36.10 35.50 23.00 17.40 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.90 0.71 -4.53 6 31 2009-01-15 18:05:59 2008-08-04 15:49:07 3 1 30 0 0 24 0 148.30 46 84.65 CHANGED IPKLGlKHh+LLKDh+GsD-sh+lLlDSIpPGLoAAEsDhVhLHLLtFNsKlpohpphDGashclsDlYlCp+hEFpapGpTFaFKsPthh.-pFlohsDhLo+...phsD-ps..-hsFh-hPAFVlcWA--IhoTIAlssPsGsIpGhusIlGll IPKhGLKHapllKDhKu.P-csL+lLlDSIpP.sLosAEsDFVslHLLEFNGKlpsppplDGasYclsDlYlC.Q.+LEFpapGpTFhF+sPthh.-pFhsls-hLpp....lplsD-sh....c..sFh-MPAFVhcWAs-IhoTlAlsGPNGsIpGlhsIlsl.......................... 0 0 0 0 +10943 PF11111 CENP-M Centromere protein M (CENP-M) Pollington J, Finn RD anon PRODOM Family The prime candidate for specifying centromere identity is the array of nucleosomes assembles with CENP-A [1]. CENP-A recruits a nucleosome associated complex (NAC) comprised of CENP-M along with two other proteins [1]. Assembly of the CENP-A NAC at centromeres is partly dependant on CENP-M. The CENP-A NAC is essential, as disruption of the complex causes errors of chromosome alignment and segregation that preclude cell survival [1]. 25.00 25.00 25.10 25.40 24.00 24.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.95 0.71 -5.04 6 65 2009-01-15 18:05:59 2008-08-04 15:53:17 3 1 51 0 36 61 0 158.40 50 95.51 CHANGED MA...lLRPaDKLPpLNsATlLLVGsE-uhppQLApuML+ccpsFplplHLApSLPLPs-ppphRPRIDLIVFlIsL+SKhSLpsVctSLsHLDssFFLGKVCFLlTGAGpssHCsVchsoVhKLAcoapSPllhs-hchEDhpsAhApRLLphLQICAGhVP.GVSALhLsoLhRsots ...................................MulLpPhsKhPtLN...sAolLLVGsE-thhppLA-uML.+..E.-ss...p..lpVHLAp.SLPLP..sp.ss....RPRIDLIVFVlNL+SK...a..SLpssEpSLpHVDusFFL.GKVCFLsTGAGptspsSl+hssVhKLApoYpSPlLas-Lcsc..shRsshAQRLl+hLplsAGhVP.GlSAL.L.shh+so..s................ 0 9 12 20 +10944 PF11112 PyocinActivator Pyocin activator protein PrtN Pollington J, Finn RD anon PRODOM Family PrtN is a transcriptional activator for pyocin synthesis genes [1]. It activates the expression of various pyocin genes by interaction with the DNA sequences conserved in the 5' noncoding regions of the pyocin genes [2]. 18.40 18.40 19.60 18.40 17.50 17.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.26 0.72 -4.15 26 221 2009-01-15 18:05:59 2008-08-04 16:51:16 3 2 194 0 36 165 0 74.50 34 80.92 CHANGED TsahLhApa.upsllPl-cVsp-YF.pl.oscphhcKlpsGcIsLPlh+hc.sSpKus+hVplpDLAsYlDc+tptAc ...............phhhLhtca.ssshlsLptVspcYF.tl.ospshppKssuscl.slPshRls.sSp..Kuth...hVplpDLApYlDc+ppp............ 2 6 10 24 +10945 PF11113 Phage_head_chap Head assembly gene product Pollington J, Finn RD anon PRODOM Family This head assembly protein is also refereed to as gene product 40 (Gp40). A specific gp20-gp40 membrane insertion structure constitutes the T4 prohead assembly initiation complex [1]. This protein in T4 stimulates head formation [2]. 25.00 25.00 26.50 29.70 24.20 23.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.70 0.72 -4.08 10 33 2009-12-03 14:07:40 2008-08-04 16:51:44 3 1 32 0 0 25 0 55.70 46 51.23 CHANGED VlQEllIh.c-GpsHLVYIaclpac...DGplplDauTss-t....K-ELtPHVccslphQI .......VlQEIlIpLcDGssHIVYlpclcas...cG+lslDFuT.s--c...KsELuPHVEKClshQl... 0 0 0 0 +10946 PF11114 Minor_capsid_2 Minor_capsid; Minor_capsid-2; Minor capsid protein Pollington J, Finn RD anon PRODOM Family Most of the members of this family are annotated as being minor capsid proteins. The genomes carrying the genes usually have three similar proteins adjacent to each other, hence this one being named as No.2. 26.40 26.40 26.50 26.40 26.10 26.30 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.51 0.71 -4.21 5 120 2009-12-03 14:36:31 2008-08-04 16:51:54 3 1 116 0 15 97 1 109.60 29 94.67 CHANGED M.h+IpVDLuGhKcKlSspuh+RG+lAlsNQhhhDMEQYVPhR..-GhLRuSu+lsSsGptIsYoTPYARAQFYGss....staph+NYTTPGTGKRWDhK..AKuhahuDWp+....AFlK.GMG ...........................tlhlcLs...t...hct...pl.p.p.uhp...+u...phtlsspshtshs.YVPhc.......sGpL.+s.o..u...p...l.sss..G...t..lhasssYARtQaYGss..................hphpphssPs...sGt+WDp+..ApsphhppW.c....sh.p............................................................. 0 10 13 15 +10947 PF11115 DUF2623 Protein of unknown function (DUF2623) Mistry J, Coggill P anon PRODOM_PD065638 Family This family is conserved in the Enterobacteriaceae family. Several members are named as YghW. The function is not known. 25.00 25.00 33.80 33.50 21.90 21.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.03 0.72 -4.05 6 450 2009-01-15 18:05:59 2008-08-04 17:16:34 3 1 449 0 21 54 0 94.90 86 99.75 CHANGED MNNHFGKGLMAGL+AspAcoAsclspFCuDYKRGFVLGYoHRMaEpTGDRQLSAWEAGILTRRYGLD+EMVMDFF+EssSshAlRFFhAGYRLEs ....MNNHFGKGLMAGLKA............THAD........SAVNVTKFCADYKRGFVLGYSHRMYEKTGDRQLSAWEAGILTRRYGLD.KEMVMDFFRENNSCSTLRFFMAGYRLEN................... 0 1 4 12 +10948 PF11116 DUF2624 Protein of unknown function (DUF2624) Mistry J, Coggill P anon PRODOM_PD060869 Family This family is conserved in the Bacillaceae family. Several members are named as YqfT. The function is not known. 20.80 20.80 20.90 21.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.73 0.72 -3.47 11 138 2009-01-15 18:05:59 2008-08-04 17:17:48 3 1 137 0 21 76 1 84.00 58 88.95 CHANGED M.hlhQplVpQKLNplTsc-LL+YuKQYGlslTpsQAcplhsll+GKsINIFscsERp+llKclppITuPpTAppVNcLFpQFs.........s ......M.NLI+QlVNKKLNpIosKELLKYSKEY-VPITsuQA-QIVhLMKGKNINIYDssERLcLLKQIAKVTSPuTAQQVNsLFQQLl..K.... 0 3 12 15 +10949 PF11117 DUF2626 Protein of unknown function (DUF2626) Mistry J, Coggill P anon PRODOM_PD060869 Family This family is conserved in the Bacillaceae family. Several members are named as YqgY. The function is not known. 25.00 25.00 50.50 50.20 20.60 20.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.77 0.72 -3.57 8 157 2009-01-15 18:05:59 2008-08-04 17:19:28 3 1 157 \N 28 77 0 79.60 73 98.46 CHANGED MDRMFRVLuFWTGIFAVMFYlGDMpssuLLFFGQTuhFlhLuYLpLSERMYlYIFGAYLTVFFlGFTYYSTFlhVPGhGc .M-RMFRVLGFWTGIFuVMFYlG.............DM........ps.....sALLFlGQTGFFVLLSYLKLTERMYIYlFGAYLTVFFlGFTYYTTFlhVPGuGc..... 0 9 19 22 +10950 PF11118 DUF2627 Protein of unknown function (DUF2627) Mistry J, Coggill P anon PRODOM_PD058321 Family This family is conserved in the Bacillaceae family. Several members are named as YqzF. The function is not known. 21.70 21.70 22.20 39.70 21.40 18.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.50 0.72 -3.88 13 153 2009-01-15 18:05:59 2008-08-04 17:19:58 3 1 153 0 29 89 0 77.20 61 95.06 CHANGED MtRllALllLLIPGslAAlGIKLMRDolFGIlhsPFshL.......WLQFLuGllhFshGlhhlAGFILaRDRKRNKVssRF++ ..MpRhlALLlhLIPhulAshGIKLMRDTlFGILh.s.....P.h.u...hL.......WLQFLlGhlhFulGhYlhGGFlLHRDRKRNKVQsRFR+......... 0 10 20 23 +10951 PF11119 DUF2633 Protein of unknown function (DUF2633) Mistry J, Coggill P anon PRODOM_PD053432 Family This family is conserved largely in the Bacillaceae family. Several members are named as YfgG. The function is not known. 20.90 20.90 21.50 20.90 20.40 19.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.91 0.72 -4.25 7 453 2009-01-15 18:05:59 2008-08-04 17:21:01 3 3 451 0 33 99 7 54.10 73 72.81 CHANGED hR++hssphT+IlLLISFlhhFGRhlYuuIsAh.HHQp+.pu.phs.olp........pthpp ...KRHRFNoRMTRIVLLISFIFFFGRFlYSSlGAWQHHQsKKE.AQ.QSoLSVE....oP.......sQR................. 0 1 4 19 +10952 PF11120 DUF2636 Protein of unknown function (DUF2636) Mistry J, Coggill P anon PRODOM_PD053231 Family This family is conserved in the Enterobacteriaceae family. Several members are named as being YhjT, but the function is not known. 21.90 21.90 21.90 23.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -8.99 0.72 -4.47 8 456 2009-09-11 14:18:15 2008-08-04 17:21:37 3 1 452 0 25 86 0 61.90 80 97.67 CHANGED MslSDIlQLlllCALIFFPLGYLs++shRRlRsThRhhhh+PRYVKPAGsL+Rs........o+V+uscp ...MTISDIIEIIVVCALIFFPLGYLARHSLRRIRDTLRLFFAKPRYVKPAGTLRRT........EKARATKK......... 0 2 4 15 +10953 PF11121 DUF2639 Protein of unknown function (DUF2639) Mistry J, Coggill P anon PRODOM_PD049467 Family This family is conserved in the Bacillaceae family. Several members are named as being YflJ, but the function is not known. 21.70 21.70 21.70 25.80 19.80 18.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -7.90 0.72 -4.45 8 197 2009-01-15 18:05:59 2008-08-04 17:22:01 3 1 123 0 16 79 0 41.90 69 77.77 CHANGED HaGSKGWYVcELKKhGIppaE..GRKLESYKsHhLuNLL..cph ............YaGoKGWYVtELKKLGlRhaE..G+KLESYRsHlLpsLLt........... 0 2 9 10 +10954 PF11122 Spore-coat_CotD Inner spore coat protein D Mistry J, Coggill P anon PRODOM_PD057197 Family This family is conserved in the Enterobacteriaceae family. CotD is an inner spore coat protein that is expressed in the middle phase of mother cell gene expression. Along with CotD, CotH, CotS and CotT it is assumed to assemble into the loose skeleton of the matrix, between the shells of SpoIVA and CotE. Coat proteins do not share much sequence similarity between species, but this does not imply they do not share secondary, tertiary, or quaternary features [1]. 19.60 19.60 20.50 20.40 18.60 18.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.48 0.72 -3.67 15 144 2009-01-15 18:05:59 2008-08-04 17:22:21 3 1 131 0 25 95 0 100.70 52 82.33 CHANGED sslVHPT+pssscshscslVPHIHPsHTTpVN+phhcHhHYaPpTpSshspss..................ppahts..sss............................................G+sss ......APVlHPTKQCVsHoFSsTVVPHIaPTHTTHVaHQplK..sQpaF.PQTsSNVNsVs.........................................................................HsHpluPhs..P..ssssushGs..s.................h...........sss............................................................................... 0 6 15 20 +10955 PF11123 DNA_Packaging_2 DNA packaging protein Pollington J, Finn RD anon PRODOM Family This DNA packaging protein is also referred to as gene 18 product (gp18). This protein is required for DNA packaging and functions in a complex with gp19 [1]. 21.00 21.00 21.60 50.20 20.30 19.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.62 0.72 -4.10 5 41 2009-01-15 18:05:59 2008-08-05 09:05:58 3 1 31 0 0 31 0 78.70 63 93.67 CHANGED DKoLIKFLEMLDTEMAQRMLuDLpDDERRSPQLYNAIGKLL-RHKFQISKLpPDEsILGGLAAuLEEYsclVGssGLTDD-h ....hsL.phLEMLDTEMAQpMLtDLpDcE+RoPQLYNAIsKLLDRHKFQIuKLQPDhpILGGLAuALEEYpphVGssGLT-D-............................................ 0 0 0 0 +10956 PF11124 Pho86 Inorganic phosphate transporter Pho86 Pollington J, Finn RD anon PRODOM Family Pho86p is an ER protein which is produced in response to phosphate starvation. It is essential for growth when phosphate levels are limiting [1]. Pho86p is also involved in the regulation of Pho84p, a high-affinity phosphate transporter which is localised to the endoplasmic reticulum (ER) in low phosphate medium. When the level of phosphate increases Pho84p is transported to the vacuole. Pho86p is required for packaging of Pho84p in to COPII vesicles [2]. 23.70 23.70 29.40 133.80 20.00 23.60 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.85 0.70 -5.43 10 45 2009-01-15 18:05:59 2008-08-05 09:16:28 3 1 43 0 28 40 0 287.50 41 91.77 CHANGED QKDssLNcPLDt-APPTltpoSLpPELApAuLsLpuDah+QtQuhhN+alFaHPlslollslslsshluhpL...as.hs.hSsols-.LYphhlhsK+-hlhsllhslsssuhlFuhluh.sahVoDthtchsschltpspsEpIFGhNL+cau............................spchsscs+cl.t........pscNTaIIlYR-oPIAllolssshspSoc-shlh+ITGltlR+ValKushh-sLIDWAhlRoRplhp-a.psK........upSlplllDsYSFDpphcKhLtp+GFphlp.ShcLss............clL.....ssLFGIo+-TaGlph .QhDssLscPLDh-APPTIhsssLcPEhuoAALNLsuDal+QpQulsNKalhaHPlslsllslslhlals.+l...shPlp...o.s.Slst.hYplhhhNK+shlsullhohhssuhlFollut....loDsahppp.s.lstspuEplFGhsLpchs............................tpcpspcsh.............spNTcIIVYR-TPIAlISLssshs..lSoc.-shVhslTolGsR+VYlKSGIlEDLIDWAhl+o+slhpp...sK.......hupoh+LLlDlYSFDsshcchLcc+GFshlp.Shcls-.......................+LL..............GuLFGVp+-hWGlph.. 0 3 14 25 +10957 PF11125 DUF2830 Protein of unknown function (DUF2830) Pollington J, Finn RD anon PRODOM Family Several members in this viral family of proteins are annotated as lysis proteins. 25.00 25.00 46.90 46.40 17.40 15.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.76 0.72 -4.26 3 70 2009-01-15 18:05:59 2008-08-05 09:18:49 3 1 15 0 0 56 0 52.50 83 73.16 CHANGED FKHEEYPCpcQQRSSTLYVLIsLAIFLSKFTNQLLtSLL-LLIRIVcTLQQLLT .FKHEDYPCRRQQRSSTLYVLIFLAIFLSKFTNQLLLSLLEAVIRTVTTLQQLLT 0 0 0 0 +10958 PF11126 Phage_DsbA Transcriptional regulator DsbA Pollington J, Finn RD anon PRODOM Family DsbA is a double stranded binding protein found in bacteriophage T4 which is involved in transcriptional regulation. DsbA, along with other viral proteins, interacts with the host RNA polymerase core enzyme enabling initiation of transcription. DsbA acts as an enhancer protein of late genes in vitro. The protein consists of mainly alpha helices [1]. 25.00 25.00 65.90 65.40 20.20 18.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.21 0.72 -3.89 7 36 2009-01-15 18:05:59 2008-08-05 09:23:25 3 1 35 0 0 26 10 68.50 52 76.26 CHANGED hIpEASs.+hp.Euat-hlK-I+stAKpEhGl-GKhFNpLh+lYH+QpR-pFEsps-ElsplYDplFpt .hIKEASDpKhplEuYs-hIKDI+p+AKcELGVDGKhFN+LlsLYHKpsR-pFEsEs-EllELYDslFs.t 0 0 0 0 +10959 PF11127 DUF2892 Protein of unknown function (DUF2892) Mistry J, Coggill P anon Pfam-B_604 (release 23.0) Family This family is conserved in bacteria. The function is not known. 21.10 21.10 21.20 21.20 20.90 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.31 0.72 -4.14 172 1924 2009-01-15 18:05:59 2008-08-05 09:28:24 3 11 1526 0 602 1414 277 64.10 27 55.37 CHANGED Mp...............NhGshDRhlRlllGlsllshshhsh.................thhhhhlGhs..hLhTulhuaCPhYtll.G...lsT.....sthp .............................hpRhlpl.s.u..Gslll..huslh.uh..hst.......................hhhlsuhlGss..LlhsGloGaCsh...ttlL.p....h......p.h........... 0 191 418 532 +10960 PF11128 Nucleocap_ssRNA Plant viral coat protein nucleocapsid Mistry J, Coggill P anon Pfam-B_645 (release 23.0) Family This family of nucleocapsid proteins is from ssRNA negative-strand viruses of plant origin. 25.00 25.00 107.60 107.30 20.00 19.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.13 0.71 -4.80 5 129 2009-01-15 18:05:59 2008-08-05 09:33:54 3 1 10 0 0 80 0 178.00 68 65.42 CHANGED RsLSKFIRETlF+-uDlcossLC-aLSSADPShFPASVFLKIsLDNLPTEVSSRCKMuIAGNKAIRYAlFApKF-.KDplusPTsussEslpEYlpKpEKLEKA+AIV-hLCSLuSNF-AQKKMHPLSPERoSRKNFTLQLTCAIVaSLSpcGRlDMRctI-ocKIEAFKRDENlYGctNu .RKVPEFVKSKLY.DGDVSLSQISEELSHAPTKKFPARVFLKIDIDNLPSAVCSRCKLNIAGNRSVRYAGFASSFQTKQKLSPAVGATPESLMPLLETNQKIEKSIAIRDFLKTMEGQWKNQKRLHPLSDEKPTIKNFTLKLTCAIIYSLTPDGRIDMAERIITDKNKGFQNDRNFFGDGE.G.. 0 0 0 0 +10961 PF11129 EIAV_Rev Rev protein of equine infectious anaemia virus Mistry J, Coggill P anon Pfam-B_124 (release 23.0) Family The sequence of this family is highly conserved and carries a nuclear export signal from residues 31-55, and RNA binding/nuclear localisation signals of RRDR at residue 76 and KRRRK at residue 159. Rev is an essential regulatory protein required for nucleocytoplasmic transport of incompletely spliced viral mRNAs that encode structural proteins. Rev has been shown to down-regulate the expression of viral late genes and alter sensitivity to Gag-specific cytotoxic-T-lymphocytes (CTL). Equine infectious anaemia virus (EIAV) exhibits a high rate of genetic variation in vivo, and results in a clinically variable disease in infected horses. 25.00 25.00 121.80 121.70 18.40 19.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.76 0.71 -4.17 2 322 2009-09-11 15:12:54 2008-08-05 09:34:35 3 2 4 0 0 217 0 132.90 91 96.22 CHANGED PQtPL-sDpWCRlLRQSLPEEKIPSQTCIAR+pLGPGPsppssuRRDpWlRtQl.pAEsLQEpLEWRIRGVQQsAKEL-cVN+tIWRELpapccQ+GDauuassYpRtpEc+WGE.SSPRVL+PGDSKRRRKHL .PQGPLESDQWCRVLRQSLPEEKIsSQTCIARRHLGPGPTQHTPSRRDRWIREQILQAEVLQERLEWRIRGVQQsAKELGEVNRGIWRELHFREDQRGDFSAWGsYQRAQER+WGEQSSPRVLRPGDSKRRRKHL. 0 0 0 0 +10962 PF11130 TraC_F_IV F pilus assembly Type-IV secretion system for plasmid transfer Mistry J, Coggill P anon Pfam-B_678 (release 23.0) Family This family of TraC proteins is conserved in Proteobacteria. TraC is a cytoplasmic, peripheral membrane protein and is one of the proteins encoded by the F transfer region of the conjugative plasmid that is required for the assembly of F pilin into the mature F pilus structure. F pili are filamentous appendages that help establish the physical contact between donor and recipient cells involved in the conjugation process [1]. 21.80 21.80 21.80 22.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.23 0.70 -11.46 0.70 -4.93 71 713 2009-01-15 18:05:59 2008-08-05 09:37:15 3 7 493 0 136 633 29 242.30 22 29.40 CHANGED stsphschLPa.....tpYsscsplal.....sspS..hGhhaElsPlsstscp......htctLpshlpp..sh..ssss..slQhhhhsssslsphlpp.hpshh...................ts............hhpphhtpphpaahcuht.p.t....sshsh......plRchRlhlhhphs.......tspsshpplpphpcpltusLpssGlt.spphsspslhsalhchh..NPpsshthss............Yst.p.................slspplhhssschcs.pp.shhhhs.....................t.scphlhshsl+phPct ......................................................s..sphschLPa.....hpY.......pp-..........splah.........sspo...hGhhh-hhPlsssscs......lh-slpphLps...chPcss....slQhhhh.s.s..s..plsshlcp.hhphhp.p................up................hhpchhttphtaahcuss.phth......pshsh........plRchRlhl.......hphs..............sthps.....pss...ht..phpplpcplp..u..uLtssulp.spphsupshhshltchh..N.sssthh.ptt..............phs..p..................slspQlh.ss.ch....cl.ct...shhhhs...............................t.spthlhshplpc.Pp........................... 0 28 67 108 +10963 PF11131 PhrC_PhrF Rap-phr extracellular signalling Pollington J, Finn RD anon PRODOM Family PhrC and PhrF stimulate ComA-dependent gene expression to different levels and are both required for full expression of genes activated by ComA, which activates the expression of genes involved in competence development and the production of several secreted products [1]. 25.00 25.00 26.10 43.40 23.60 17.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.64 0.72 -4.38 4 36 2009-01-15 18:05:59 2008-08-05 09:44:28 3 1 21 \N 6 17 0 37.00 62 95.28 CHANGED LKSKLhlhCLAhusVFsussl.ApAspppFcVApRGMh .LKSKLFVICLAAAAIFTAAGl.AsA-tt-FHVsERGMT 0 2 2 2 +10964 PF11132 SplA Transcriptional regulator protein (SplA) Pollington J, Finn RD anon PRODOM Family The SplA protein functions in trans as a negative regulator of the level of splB-lacZ expression in the developing forespore [1]. 25.00 25.00 29.20 29.10 21.70 20.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.57 0.72 -4.23 7 78 2009-01-15 18:05:59 2008-08-05 09:46:18 3 1 57 0 13 43 0 66.80 50 92.81 CHANGED Mph....psapsGD.VYVIYRNPHstsVApIpEAtlVpHP.c.sELALFLaETYaPlspD.AlFso.-pAEphYpphFc ...............p.apsGD.VYlhYRNPHstsVApIppAElVsHPp+cGELALFLaETYHPLu--DAlauo.-EApphYp.hat...... 0 3 7 9 +10965 PF11133 Phage_head_fibr Head fiber protein Pollington J, Finn RD anon PRODOM Family This head fiber protein is also refereed to as Gp8.5. Gp8.5 is a structural protein in phage. It is a dispensable head protein. 20.60 20.60 20.80 20.80 20.00 20.10 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.77 0.70 -5.20 3 66 2009-01-15 18:05:59 2008-08-05 09:51:36 3 3 62 1 12 64 7 123.30 22 66.00 CHANGED MhsSFTAhANSsIlAY+LLsalEuE-pIEISaAcEcTIP-YVul+DLcsGDhTslshYPLAAWpVIAsSDIslG....D+lTTGKNGTlKhTcDspssFGYAVupApcGQLVTlI..IS+uFDplIpsDDlG-ssDsGphLhchssssGspshlI...DsKAhlpuNoTssNKKpLcD.LLlSsLsVKAF......LsusToD-NKANL............cpLhVSNPclLuhLsGsPSoEsKssLRoMIGAGsPYTLPAATTTTLGGVK+uAAVusSTATDVssuVKDFNuLLTALKNAGIIu ........................................................................................................................................................hs...................................................................................................................................................................................................................................................................................................................K..ph.st..........ss...uo.u.ssstshhtphNsllstL+suGhh................................................ 0 3 7 8 +10966 PF11134 Phage_stabilise Phage stabilisation protein Pollington J, Finn RD anon PRODOM Family Members of this family are phage proteins that are probably involved with stabilising the condensed DNA within the capsid [1]. 19.70 19.70 21.50 21.10 17.20 16.60 hmmbuild -o /dev/null HMM SEED 469 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.71 0.70 -5.92 3 145 2009-12-03 14:24:52 2008-08-05 10:02:23 3 2 137 0 6 107 10 449.10 82 97.02 CHANGED hQLPLhKGLGKDhKsADYIDALPVNMLATPKEVLNASGYLRSFPGIsKKsDVsGVSRGVcYNT+pNAVYRVCGNpLYKG-KsVADlAGpGRVSMAHSRsSQAVsssGKlpLYRYDGTVKTLSNWPKDKtYTQYDLGsVRDlCRLRGRYlWsKDGoDpFGVTDLEDESHPDRYpAhYRAESQPDGIIGIDSWRDFIVCFGSSTIEYFSLTGAADsuuALYlAQPALMVQKGIAGTaCKTRauDuYAIISHQATGAPSVYLIGuGQAosIATATIEKIIRSYTADELAouVMETlRFDSHELLLIHLPRHVLCYDASASQNGsQWSLLKTGFYD-PYRAIDFMFtDNQITCGDKsEuLLGQLpFsuSuQYEpQQEHLLYTPLFKADNARlFDFELEASTGVAQIADRLFLSATTDGINYGREQMIEQNuPFsYDKRILWRRlGRVRKNIGFKlRVITKSPVTLSGCQIRME ...........................................QQLPLMKGVGKDFRNADYIDYLPVNMLATPKEILNSSGYLRSFPGIAKR.SDVNGVSRGVEYNMAQNAVYRVCGGKLYKGE....S.....E..V....GDVA..GSGRVSMAHGRTS...QAVGVNGQLVEYRYDGTVKTVSNWPs...DSGFTQYELGSVRDITRLRGRYAW.SKDGTDSWFITDLEDESHPDRYSAQYRAESQPDGIIG.IGTWRDFIVCFGSSTIEYFSLTGATTVGAALYVAQPSLMVQKGIAGTYCKTPFADSYAFISNPATGAPSVYIIGSGQVSPIASASIEKILRSYTADELADGVMESLRFDAHELLIIHLP..R..HVL..VYDASSSANGPQ..WCVLKTGL..YDDVYRAIDFlYEGNQITCGDKLESVTGKLQFDI..SSQYDKQQE.....HLLFTP.LFKADNA..R.sFD.LEVESSTGVAQY....AD.RLFLSATTD..GIN.YGREQMIEQNEPFVYDKRVLWKRVGRIRKNlGFKLRVITKSPVTLSGCQIRIE...................................................................................................... 0 2 2 2 +10967 PF11135 DUF2888 Protein of unknown function (DUF2888) Pollington J, Finn RD anon PRODOM Family Some members in this family of proteins with unknown function are annotated as immediate early protein ICP-18 however this cannot be confirmed. 21.50 21.50 21.60 262.10 21.20 21.30 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.87 0.71 -4.48 2 13 2009-01-15 18:05:59 2008-08-05 10:27:34 3 1 13 0 0 11 0 143.20 81 92.36 CHANGED GcPapppGshh.lPFupsFTIDLVNh.hpoEhpV+lpMoPphslGTFVVtPKphFSI+RAspGDAuFKVtRutGW.spT.QsLohhhYcR...l-hcsGs..pplETDG..GTVlVPGcsTGQRFupAhAh.hhFLap+.FllpGV GEPYTCKGDLCEIPFuRNFTIDLVNLSVSTEFQV+ITMTPHHDLGTFVVEPKKVFSIKRAsKGDAAFKVtRAAGWLPDTPQVLoLFVYERLpPVEWHStChYENLETDG..GTVIVPGEATGQRFGTATAVPThFLFKRMFVVKGV 0 0 0 0 +10968 PF11136 DUF2889 Protein of unknown function (DUF2889) Pollington J, Finn RD anon Pfam-B_001473 (release 23.0) Family This bacterial family of proteins has no known function. 20.80 20.80 20.90 25.00 20.00 16.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.27 0.71 -3.89 49 270 2009-01-15 18:05:59 2008-08-06 10:06:50 3 2 200 0 113 274 147 120.40 34 54.07 CHANGED HpRplphpuat+.p........DGLa-l.EuplpDsps.s.s........h.sspslHchplplTlD.sshpIhsspAphctsPas.hCspsssshppLlGhslu.Ga+cplpcpluGspGCTHLpELLt.slussAhQs ..................HpRtlphcuYtR..s........DGLa-l.EApLpDpKsh-hs..t...thh.sutslH-hhl+lTlD.pchslhcscA.shchsPas.pCssussuhptLlGlslt.tsaR+plpctLuGssGCTHLsELht.slsTsAhQs......... 0 25 62 89 +10969 PF11137 DUF2909 Protein of unknown function (DUF2909) Mistry J, Coggill P anon Pfam-B_764 (release 23.0) Family This is a family of proteins conserved in Proteobacteria of unknown function. 28.30 28.30 28.40 32.20 27.00 28.20 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.07 0.72 -4.23 51 339 2009-01-15 18:05:59 2008-08-06 10:14:11 3 1 327 0 118 287 343 64.30 35 88.64 CHANGED hKl.llllhllsIlsSLsSALaFLh+D...pu...........cupRhs+uLshRVuLSlsLhllllluhhhGhl....pP ...h+l.llllhhlhIlsSLsuALaahh+D..cG............popRhlpuLshRVuLSlsLhlhllhAhhhGhI......ss..... 0 22 62 92 +10970 PF11138 DUF2911 Protein of unknown function (DUF2911) Pollington J, Finn RD anon Pfam-B_001491 (release 23.0) Family This bacterial family of proteins has no known function. 20.90 20.90 21.80 23.40 19.60 18.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.90 0.71 -4.57 69 234 2009-01-15 18:05:59 2008-08-06 10:24:24 3 10 92 0 137 269 303 145.40 35 62.05 CHANGED SPtspstt..........pl.uhsc......lplpYSRPuh.....+GRp..IF.......Gs...LVPYG...........cVWRTGANpsTplshscDltlsGcpltAGsYuLaTIPsccs.WslIhNp......sts.pWGs.........pYc..ppcDll.Rlplssp.phs..pshEphohshsshpsst.uhlpltW-pshVslsl .......................SP.spspt......ph.uhsp...lplpYSRPuh......+GRp..IF.......Gs.....L..VPYG..............clWRTGANpuTplsFscDVpluGctltAGsYuLaoIPscsp..WslIh.Np.......shs.pW.Gs........hpYc..pspDllRlpVtsp.phs..p.hEsholsh...sshssst...upltltW-pstVslsh.......................... 0 68 117 136 +10971 PF11139 DUF2910 Protein of unknown function (DUF2910) Pollington J, Finn RD, Eberhardt R anon Pfam-B_001487 (release 23.0) Family Some members in this bacterial family annotate the proteins as cytochrome C biogenesis proteins however this cannot be confirmed. Currently no function for this family is known. 31.00 31.00 31.00 31.20 30.90 30.90 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.33 0.70 -5.20 46 566 2012-10-02 18:22:22 2008-08-06 10:24:54 3 2 322 0 120 408 26 215.20 21 94.89 CHANGED lLshAlslulsPhplshslLlLpp.....s+PhhphlsFlsGhhlsshslsslhlhlhcsls......shpts.......sstthshlplllGssllhlushhhhpt..................................................tststhsphhsphp.....shs.stuhhhuhlhslhps.tshl.ahsAhuslsuuuhssssphsAllsFsllussslhlPLlualluspR..spshLtpl+sWhpsppphlluslhsslGhhllhp.Gls ....................................................llshAlshulsPhhlhs..s..l...l..h.....lpp.........scsh...p..h..hsFlsGhhlshsu..lshsslhhhsshs........shstt.........sphssshlp.lllGllLllhushhhtpp.................................................................................httsthsph..hpphp..........sht..sss.s.hh.huhlhsl.sps.tshh..hluAhshIsuushshssphhshlhFsh.l.shss.shlP.llual..ht..sp+....h..pthhtthp..sW..hp..p..p.pth...hh.shlhhhlGhhhlhpul......................................... 0 35 83 107 +10972 PF11140 DUF2913 Protein of unknown function (DUF2913) Pollington J, Finn RD anon Pfam-B_001499 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Gammaproteobacteria. 20.40 20.40 20.80 20.50 20.20 19.70 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.19 0.70 -5.03 24 383 2009-01-15 18:05:59 2008-08-06 10:47:32 3 1 273 0 51 246 3 190.00 28 91.67 CHANGED sYsptlhclspsuLspLptpp.psupshpsssscsHaLspWlspALKppRFs+hluc..sLptW.+puRShGssApLctlhp+Ipt.Yt....sstp.tpshstsplcuhLsplcptsWhVtT....-p.lss..KlplpoDGpsSLl.lsupphpppFss..spLlKPlohalRG...scpthhptAhppGhhlaphschpShVKaHt-YhlaPtNptstLspL..s ..........hp..lppll.sALstLpttp.cst.p.t.h.s.u.stpscaLlcWlspAlKpp+Fp+hlsc..cLpthhctu.Rsh..ust..upLtshhph..........sspp.tt.hshhclcuhLsphcptsW..p..lts......sp..lsp....plphh.ssGpss.Lh.lptsph-psFss.....schlpPlshhlpu...ph.t.thhpthhtsu.hh.p..c.t..lhhthph.h..tt.......l........................... 0 3 12 32 +10973 PF11141 DUF2914 Protein of unknown function (DUF2914) Pollington J, Finn RD anon Pfam-B_001640 (release 23.0) Family This bacterial family of proteins has no known function. 20.70 20.70 20.70 21.90 19.80 20.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.48 0.72 -4.45 39 174 2009-01-15 18:05:59 2008-08-06 10:47:56 3 2 158 0 77 178 90 68.50 36 22.50 CHANGED pu..pt.lhHhWh......pssc.ssclsLslp.GsR...aRsaSpKphh..stsGcWcVcVhspsGpllushcFpls ..................ppplhHlWh..........tsGcphs+Is...LsIp.GuR...uYRsWopKpshs...sssGc.WpVcVloE..s..Gp.hIuslcFpV.... 0 29 52 69 +10974 PF11142 DUF2917 Protein of unknown function (DUF2917) Pollington J, Finn RD anon Pfam-B_001647 (release 23.0) Family This bacterial family of proteins appears to be restricted to Proteobacteria. 25.00 25.00 25.10 25.00 24.40 24.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.97 0.72 -4.78 40 253 2009-01-15 18:05:59 2008-08-06 10:49:20 3 3 129 0 98 236 21 65.20 30 55.56 CHANGED tapLs.sGpshsh+stpssp.LpVt..sG...plWl...Tt.-u.......s.....spDaWLpsGpoLpLtpGp+lhluut...hssu ..................atls.stpshsh+..ssp.ssp...LpVp...sG..........tlWl...Tp.ss........c.....scDaaLpsGpslcltcGp+..lhlout..h...ss............... 0 9 40 68 +10975 PF11143 DUF2919 Protein of unknown function (DUF2919) Polllington J, Finn RD anon Pfam-B_001684 (release 23.0) Family This bacterial family of proteins has no known function. Some members are annotated as YfeZ however this cannot be confirmed. 22.70 22.70 23.00 23.60 22.60 22.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.71 0.71 -4.37 29 662 2009-01-15 18:05:59 2008-08-06 11:06:50 3 1 644 0 64 283 15 137.30 54 96.12 CHANGED as.ls.aDc+GplKsPlhhahsLlaLARuWslFlhusssRpstssLLplFYP-+ssFaluLssGlsAlLhh.llhucRp....ct.phlt..laphh+hhLlhslllphshhhhtlhhpthhashshulphllhhWhhlYLhpS++Lpthhpshpp ..............at...pYDspGhL+hPhhhWh..sLLh.ARsWVLF.VIAGuS.REQGosLLNLFYPDHDNFWLGLlPGlPAVLAF.LLSGRRs....shPp......lW+hLh.hLLLLAQlV.LCWpPa.lWLsG....-....uVoGlGLALllADIVALIWLLTNRRLRACFst...c.................... 0 6 16 41 +10976 PF11144 DUF2920 Protein of unknown function (DUF2920) Pollington J, Finn RD anon Pfam-B_001778 (release 23.0) Family This bacterial family of proteins has no known function. 24.70 24.70 24.70 24.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 403 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.35 0.70 -5.45 20 738 2012-10-03 11:45:05 2008-08-06 11:26:01 3 2 129 0 22 553 3 187.50 29 98.00 CHANGED NpTapIDSCDDVELsIKR..pSKLEaRloYDDpK-IKAIVhIIsGhGuDss.sahcahpcalA+pacVsslsVsYHChssR....sphuAphhh-c.DhhllcssLcslsl....shtslss.cphpphhphLspplpphKppshlspsapLp.LSsohhPscsEYQNFGIMtAlDllNAlhalh+c..........hsphus......lPpIhsGuSYGGYLApLsAKIAPWhlDuVlDNSuhAh...shhchI.hG+El-a.ph.ptush...hp..slplthasKTaWTp.scsSsaaFussphhIRslLNpcHLpIQupa..pshYloYHShpDphs.Psc-KpphachLcpLGFDssLplIpcEspIDG+FIKsL-HGhGlohKtLh+KcLPhlLEKl.shpsch..pc.cpISYPCc-hlYpFc-pscKlpLcI ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 10 14 19 +10977 PF11145 DUF2921 Protein of unknown function (DUF2921) Pollington J, Finn RD anon Pfam-B_001920 (release 23.0) Family This eukaryotic family of proteins has no known function. 18.80 18.80 19.30 18.90 18.70 17.90 hmmbuild -o /dev/null HMM SEED 909 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.63 0.70 -13.66 0.70 -6.83 7 137 2009-01-15 18:05:59 2008-08-06 11:43:06 3 5 21 0 96 115 0 572.50 24 90.89 CHANGED Y......pp+Cps..stus.h...s.sststhhuppl.h.pssah..uGssslhshssspsh...........+.soFssppl.....t+s+sst.hlpVsuhLsLt....uspsshhshsht.pp.ph.hlst.........ph.hpGhYoE............upt.LCMVGsu...................sshus-ssslhh.tshlLhLphPKs.oLssphVpGpLcSssthsp...F-slpLluh.pp..pY.a.....pLs.tt..C.....cPhs.t.cthh.tupusps..h.tthCclLcch..t...shpl..shc.Csuoc.hsshhsshph............tshthhhpsl+Cps.t.......sss+s.lsFpshos.Eph..hs...t.t....ptoLsAEGhWcsosupLChsAC.lup.s.....sthhsts.CphRlSLpFPssaSIRsRShllGplhssss......sptshuFcplh.s.....pshppph.phh.h+YsYTcl-cAtchhtps.c.ush.....++...spaPts.t..........ShSDht.choh+hsut........upu.........sshslGshhhp....h..hutsphsshss.h....scshslpppp..LLNVShcIoh.................ossh.hppstlShEGlYDpcsGphhhlGCRcl.ph..p...s......tsuhDCpI.lplpasslsu+..spsph+spIpSpR.tcsD.LaFcthclpspshhh....c.ph.-sl.RhslEslhsllohThShshhshQLhah+pps.-shPhlSlsMLsl.ALGahhPLlhshEALFh....stppp.h...........hptsthhps.-hhl+llTllsFLLpLRLhQhVWpuRtps.pppstc.t..su-++sLhlsLslYlhGhlluhhlphstsph.hstt.ps..............u.p.tshhp-ltpYsGLhhDhFLLPQlIhNuh.pscs+.PLushFYhGsThlRhhPHsYDhhRupshsshhp..ssahaAssph....DaaupAhDVllPlsAhhLAhllFlQQRashthlh.t ...................................................................................................................................................................................................................................................................................................................................................h.s.h.s............................ta.a..............t...............................................C....h.......................................................................................................................................h..s-G.ap...ts.h.h.uC..h..................t.Ct.thph.h....hohptts..hG.h.s.t...............h..........................YpY..o.hp.stt.h.t......t......t......aP..................h...h..t.......................................h..l...h..................................................................hth..sh.h......................................h.hEGha....G.h.hluC..h...................t.DC.l...l.hths..t.........h..h.ItS.R..t.sD..hh...hth....h............................t..t.h.+...h-.hh.hh..sh.hhh.h.Qlhahpt.s..p..s...hSlhhhhh.shGh.h.Lhh.s.t.hhh........................................t........p.hh+h.hhhthhh.hplh.hsht.+.......t.......................sc.t.shhh...hahhhhhhhh..p........................................h.tahGlh.DhFLlPQllhNh.h.....t..p.p..slt..aYhGhThhRhhP+hYchhp........................s.t.....shash..hhDlhlshhsh.huhhlahQQphs.......p.................................................. 1 4 55 82 +10978 PF11146 DUF2905 Protein of unknown function (DUF2905) Mistry J, Coggill P anon Pfam-B_542 (release 23.0) Family This is a family of bacterial proteins conserved of unknown function. 22.20 22.20 23.90 23.90 21.70 20.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.18 0.72 -4.03 79 498 2009-01-15 18:05:59 2008-08-06 11:50:21 3 1 488 0 217 396 105 63.30 39 91.19 CHANGED hu+hLlhhGll.llllGhlh....hl.t+.....hs.l..GRLPGDIhlcctshsaYFPlsTsIllSllLSllhhlh ........hs+hLlshGll.LlllGlhh....al.t+..........hs..h...GRLPGDIhlc.+s..NhsFYFPlsTslllSllLSl..lhhlh.... 0 80 152 189 +10980 PF11148 DUF2922 Protein of unknown function (DUF2922) Pollington J, Finn RD anon Pfam-B_001999 (release 23.0) Family This bacterial family of proteins has no known function. 22.70 22.70 22.80 23.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.89 0.72 -4.32 49 773 2009-01-15 18:05:59 2008-08-06 11:52:37 3 1 650 0 145 467 0 68.00 28 90.76 CHANGED KpLpLsFpsssG...Kphslslsssp-slotsplcssMspllspslFtssuG.sL.sshpuA+lV-psssslh. .........KsLcLsFpssts....Kssplplspspss......lT....pp.lcssMssllstslhpss....s.u.sl..sphpuApll-+ssosl................................... 0 71 119 135 +10981 PF11149 DUF2924 Protein of unknown function (DUF2924) Pollington J, Finn RD anon Pfam-B_002010 (release 23.0) Family This bacterial family of proteins has no known function. 21.80 21.80 22.00 24.10 21.30 21.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.59 0.71 -4.15 31 178 2009-01-15 18:05:59 2008-08-06 12:38:05 3 5 96 0 88 183 81 132.10 37 82.95 CHANGED lhs....clAtLsshshscL+thWpclas.stssphs+saLppRLAYRlQEhsaG....GLsppscp+L-plucptp.tphht......................ppsspphssGTpLlREWpGhpHpVtVhsDG.F-apG+pa+SLSAlA+tITGTRWsGPtFFGL+ ..........................................................................................h..tplAtLtshshscLpthWpclas.ptssp.hs+paLpp+lAYRlQEhshG....GLstpspp+Lctlucphsttthht..............................ttsstt.hsGThLlREWpGppHpVhVhs-G.F-apG+hacSLSAlA+tITGT+WsG.hFFGL+............... 0 43 68 79 +10982 PF11150 DUF2927 Protein of unknown function (DUF2927) Mistry J, Coggill P anon Pfam-B_739 (release 23.0) Family This family is conserved in Proteobacteria. Several members are described as being putative lipoproteins, but otherwise the function is not known. 20.40 20.40 20.90 21.30 20.20 20.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.15 0.70 -4.65 65 243 2009-01-15 18:05:59 2008-08-06 12:55:41 3 1 191 0 58 212 104 206.30 34 67.68 CHANGED sssplscsFhclAhttEYsps............hstlpRacsPl+ltlp...usssuspstDhstl...lsRL...uclsGhsI.shsss..........pANlplhhssccc.......l........pchlPpsustsls.h.shsctthChststsshsp.hptspsllhl.s-psshhhtsClHEElAQuLGLhNDo.clhsSlFNDD.s.aulLTsaD.lLL+hLYcPcL+sGMohs-ltshL.tl .........................................................................................................s.ss.ltcsFhplAhpsEYstu...........tphlp+Wc..tP.lRlhhc...........ttssc+shctphltsalp+L...uplTGhsI.phsss...........pANlhllhsppsc...ht.tl............................cchhspsustsh........csshChsshps.s.s.s.s.phstuplllsVcpspsct.chluClhEElsQslGLsND..Stp...s...hPSIFNDc....s.pslLoshDhlLL+lLY-PcLcsGMotsplpslLp..h.... 1 11 31 45 +10983 PF11151 DUF2929 Protein of unknown function (DUF2929) Pollington J, Finn RD anon Pfam-B_002101 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 21.90 21.90 22.90 22.30 21.70 21.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.78 0.72 -3.89 28 624 2009-01-15 18:05:59 2008-08-06 12:57:35 3 1 618 0 61 231 0 57.20 33 90.30 CHANGED M+Y....llshFWohlLsphlsalluuLsus.sas...hhsssIluVlhulllhllss.llstcss .........M+a....llohhWuhlLsphlsalluSLsGG..oas.........hsps.hIhuVlhsllhhllus.slsspp.s......... 0 14 33 49 +10984 PF11152 DUF2930 Protein of unknown function (DUF2930) Pollingtin J, Finn RD anon Pfam-B_002135 (release 23.0) Family This family of proteins has no known function. 23.10 23.10 23.40 23.20 22.60 22.90 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.10 0.71 -5.15 30 160 2012-10-02 14:34:25 2008-08-06 13:06:36 3 4 101 0 74 152 122 170.80 32 78.48 CHANGED lsLhsGsLshsLhllNp.lss....ssloPu.pRA-VLuslhussLlLlulLWpphpP+.ss-tVsLpG..cpGhtlsssLspul+pELAWuSchLLTsTsssolLlaa.cGplLLRRGll............sss....phpPGsIspRshcppp........sl.LVNhtLYPGRs....EF-...hlPsNh.uVllQPLGsc...........GhlllGuhosRsFo+pDEpWlsGhA-KLc ...............................................LPlhsGshshhhlLlN+..lss..........sphos.uQsRu-lLulhLuss.lLsullWp.plp..........P+.u.......s....ssVsL.G........ppsh.hsssl..sp..shppELAWuo.hLLpsT.sstulllhh..ps..phllphGhh......................s...........phh.sthhppshpp.tt..................Lssh.haP...u+.........Eht........hlP.sh.ull.hQ..Plspp............Ghllluups.RuaoppDctWltshupKl........................................................... 0 20 50 66 +10985 PF11153 DUF2931 Protein of unknown function (DUF2931) Pollington J, Finn RD anon Pfam-B_002146 (release 23.0) Family Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. Currently, there is no known function. 22.20 22.20 23.10 23.70 22.10 22.10 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.28 0.70 -5.00 30 307 2009-01-15 18:05:59 2008-08-06 13:16:21 3 2 183 0 64 250 1 193.90 24 84.36 CHANGED hphlhhll.hsL...lsuCssssh...............p.hpWphusshPpthsshVspsthhstscps..h.ts.sshs.....pp.ttt..tWsst........t..suh.sscspslPchlhlpWhSlh-pKtYppplpl..PcslpppMtpstphpspts.h......hRsslllGLAPGGhVpVWLps...tstsslhls+hpApplsss..phh.......phshshsph......pcpppsalcpcslPhG ......................................................hh...hh...h.uCtt.......................tW.hsh.hhPphhP.s.VThshh..hsspt.h....h.p...s.......p..tt...hspasth.............t...hstspphPpplhhhW.sSlhDpKhYpTchtl..sps.ltphhhs......hshsptss.hh............ahsphlhGLsPsGplpVWLps....stpslhl.s.pht.......pplpscc.shsK........t.p.h.h..............tthtshlc.cshPhs................................................ 0 9 34 45 +10986 PF11154 DUF2934 Protein of unknown function (DUF2934) Pollington J, Finn RD anon Pfam-B_002301 (release 23.0) Family This bacterial family of proteins has no known function. 19.80 19.80 20.50 20.20 19.60 18.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -7.88 0.72 -4.59 64 414 2009-01-15 18:05:59 2008-08-06 13:46:20 3 11 258 0 181 424 25 39.60 35 34.70 CHANGED tpp.p-cpIRcRAYplWEpcGpP.pGpspcaWhpAEpplpup ..........ttcccpIRctAYplWE.pcGpP..pGc..sp..caWhpAcpplct...... 0 44 82 116 +10987 PF11155 DUF2935 Domain of unknown function (DUF2935) Pollington J, Finn RD anon Pfam-B_002056 (release 23.0) Domain This family of proteins with unknown function appears to be restricted to Firmicutes. The structure of this protein has been solved and each domain is composed of four alpha helices. A metal cluster composed of iron and magnesium lies between the two domains. 21.20 21.20 21.60 21.20 20.80 20.40 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.68 0.71 -4.03 73 699 2009-10-15 12:33:13 2008-08-06 13:51:02 3 4 223 52 132 510 4 123.20 30 85.59 CHANGED hhppslcpchFWhcIhtEHuhFIpssLsspE....pcllppAcpFtpp.F-pLhtcuhphssth.............................................................tlpplspcshp.tspplpsFKpplhcthlss+l.hshhhPLhhDHhlREupaYlphLpp ..............................................................ptlppphFWh+lht-HuhFlptsLcspE..........pcLlcpAppFtpp.F-.plhtpuhphps.h.................................................................tlpph.spcspt.tspplppFKpplhphh.lps+l...tshhhPhhsDHhhREsphalphLp.h.................................... 1 53 112 117 +10989 PF11157 DUF2937 Protein of unknown function (DUF2937) Pollington J, Finn RD anon Pfam-B_002314 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.30 25.30 44.30 30.20 25.20 25.20 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.92 36 166 2009-01-15 18:05:59 2008-08-06 14:10:53 3 1 162 0 56 163 20 162.70 29 95.05 CHANGED hlhchLclhhhs....sGslhusQhPtFsppYtQRLsuplpEhppslssFptsAppa.ssshpphltpappss-.hhpscupshpphhsRhptLppshpth..pus.ap+hhhhhphschclhppThpsap.ulsLospulsaulssullhs.hlhthhhtlhthhht...+pppp ....MltphlcLllFs....hGhlhulQlPtFhspYtQRlsutl.EsppslpuaptoAppahtsshpthlp+apsssDsshpscupshpphhpRhphLppphtsh..pushat+shahhpsschclhppThssap.tl.Lss-ulsaGllsuLlls.hlhchhhhlhshhhp..tt............. 0 12 28 39 +10990 PF11158 DUF2938 Protein of unknown function (DUF2938) Pollington J, Finn RD anon Pfam-B_002317 (release 23.0) Family This bacterial family of proteins has no known function. Some members are thought to be membrane proteins however this cannot be confirmed. 23.00 23.00 23.10 23.10 22.70 22.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.96 0.71 -4.87 36 258 2009-01-15 18:05:59 2008-08-06 14:19:11 3 1 249 0 69 220 189 145.00 41 88.88 CHANGED lhIGlGATllMDLWullh++lhGlsshsaAhVGRWlsHhh+G+hhHpsIupussV..tuEphlGWhsHYhlGlhFAslLlslhG.sWltpPTlhPAlhhGllTVssPahlhQPuhGhGhAAS+TPpPhpsRlpS.....LlsHslFGLGLYluAhhl ...................................lhlGlGATllMDlWuhl...hphh.Gh..sshsauhVGRWhh.al.h+Gp....lhHs..sIupusPh....ttEhslGWhuHYulGllauhlhhhls.Gss...WhspPshhPAll.hGl.l.T.lsAsaFlhQPuhGhGhAAS+sPpPsts.R..lho......LluHssFGlGLYhsAhh.h................... 0 16 37 50 +10991 PF11159 DUF2939 Protein of unknown function (DUF2939) Pollington J, Finn RD anon Pfam-B_002321 (release 23.0) Family This bacterial family of proteins has no known function. 21.50 21.50 21.70 21.50 21.30 21.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.87 0.72 -3.81 35 245 2009-01-15 18:05:59 2008-08-06 14:27:23 3 2 210 0 84 227 37 94.70 26 44.55 CHANGED slsshsshhhuSPYl....ulaplcsAlcspDstsluphlDaPuLRpSL+sQlssthhpph....ssphtssshusLuthhus.ullssh....VDshlTPpGltslh .....................lhssh.hhh..hu...oPah.......sLtplppAlcs+Ds....sslspaVDaPu.lRtSL.+...pQlsstlhpph.....ssphpsssh....s.tlut.hlus.....slsssh.......VDshlTPpulsth......................................... 0 24 48 66 +10992 PF11160 DUF2945 Protein of unknown function (DUF2945) Pollington J, Finn RD anon Pfam-B_002448 (release 23.0) Family This family of proteins has no known function. 20.60 20.60 20.60 20.70 20.30 20.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.68 0.72 -4.15 31 280 2009-01-15 18:05:59 2008-08-06 14:57:41 3 4 259 0 131 297 20 61.40 36 61.29 CHANGED GD+VpW......sScsGcspGplhchpTc-schpG..+phcASp--PQYclcSDKTs+hAsHKssuLp+h ..........GDcVpW.............sSpsGpspGplhc....h....h..T..c..cs....c..h..p..G..+..s.....h..c..ASpD-PpYp...lcS-+os+.AsH+spsLpc.h............... 0 35 77 107 +10993 PF11161 DUF2944 Protein of unknown function (DUF2946) Pollington J, Finn RD anon Pfam-B_002487 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 21.60 21.60 21.60 22.80 21.20 21.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.50 0.71 -4.62 41 142 2009-01-15 18:05:59 2008-08-06 15:05:40 3 1 141 0 58 133 19 185.90 47 96.43 CHANGED MD-IV+pAlAKWPNVPsCaGWLuLDtRGpWhhRD-t.sQ.....ttG............h.Gs.lpHssLlsFIsRNYpsDtcGpWFFQNGPQRVYVEL-hTPalhRlps......t.s........hslpsHTGt..sht.spusalDEpGplhLs...................sshtluLlHD+Dlshhustsp...t...s................................ttplslps.ltpu-lstRFGFVtsPtst ...............MDDIV+QAlAKWPNVPpChGWLhLDcRGpWRhRD-tAQ.....AsG.............t.GsPlRHsuLlsFIsRNYppDtcGpWFFQNGPQRVYVELshTPWllRLsst......ss.......................huLssHTGt..sh..p.sstsaLD-tGsllhs...................ss.plAhLHD+DLshhuctscht.t....ut.s........hhth.s........................stshsl.psltps-VstRFGFVssPAtt................................... 1 7 30 46 +10994 PF11162 DUF2946 Protein of unknown function (DUF2946) Pollington J, Finn RD anon Pfam-B_002520 (release 23.0) Family This family of proteins has no known function. 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.01 0.71 -3.91 87 687 2009-01-15 18:05:59 2008-08-06 15:45:01 3 3 389 0 224 685 20 122.20 19 86.11 CHANGED suW....lul..hAl...Lhthl.APhlupuhh........thstssh...ssshCssssst....tsshstststptsss.t.................................sC...saCshhstss......sLssshsshhshhhthtths.......sshhtshsstthassups.RAPP ..................................................h..luh...hAl...l.h.h...hl...uPhlupshs..............................stssh......ttsh.C.ssssst.............tsshstsss.t.pts..st.t..t.tt.....................................sC...saCs..h.hst.s......sLs.ssh..ssh.h.s.hh.hhhst.hs.......sshhhshh..tt.hast.sps..RAPP................................. 0 35 85 163 +10995 PF11163 DUF2947 Protein of unknown function (DUF2947) Pollington J, Finn RD anon Pfam-B_002524 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 26.30 25.80 23.40 23.30 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.96 0.71 -4.64 22 169 2009-01-15 18:05:59 2008-08-06 15:50:15 3 1 168 0 34 105 7 151.10 56 96.65 CHANGED YlPL-pYpRKWIFsHpshPVss-DhApIKPhoptRAsplWpcaISspSscs-pFscsDWss+sssWhps...cWpuAWDS.--ssLP-tlhtalc.WpD-ssVYFCYEKYplIET+WsVFhRsWKNFLFaDDGPlLlG+K+pQAlhFppsGphpLGp ........YlPLDpYpRKWIFTHtSMPVP-tDLApIKPMsptRAAQhWKENISspSPDAERLSSpDWPtK..sssWst-..ssWhutWES.D.-.spLPEtlssals.WQDDVTVYFCYEKYNVlETKWuVFKRaWKNFLFY.D.DGPhLlGRRRKpALWFsocGpVKhG......... 0 5 12 25 +10996 PF11164 DUF2948 Protein of unknown function (DUF2948) Pollington J, Finn RD anon Pfam-B_002527 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 25.00 25.00 47.70 47.50 23.70 22.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.48 0.71 -4.55 42 207 2009-01-15 18:05:59 2008-08-06 15:56:21 3 2 205 0 71 164 1063 138.90 46 92.73 CHANGED L+LhAhDs-DLtVISAhlQDAVh.su-hpWctpp+RFuLllNRFcWEss........tptpssERVposLth-sVhsVpSpGlc+sctDt.VLuLLulsFcP....uEssuGplhLshAGsGslRL-VEslEspLpDlotsatusuc..PsH .........LKLlALDtEDLpVlSAHlQDAVlcluDlpahscc+RFsLshNRFsWEcs.......................hcptshcRhpouLpFscV.ts+upGIs+p.spD.sVLSLLulcF.s....ups.....PuGslpLsFuus..u..AIRL-VEClEspLsDlGssWpstut..PcH.......... 0 20 43 51 +10997 PF11165 DUF2949 Protein of unknown function (DUF2949) Pollington J, Finn RD anon Pfam-B_002571 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 22.20 22.20 22.70 24.70 21.90 22.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.68 0.72 -3.79 37 156 2009-09-10 15:59:59 2008-08-06 16:07:02 3 1 70 0 69 147 81 57.80 39 82.18 CHANGED ssspLlpFLpp-LuLspsulslul+........ptctppuPLPhlLWpYGLloLpQL-plhDWlpp ....h..ppLlpFLpp-LulsssulslAlR........ppp..tp.......us...LPMlLWQYGLloLpQL-plaDWL-.t.. 1 10 50 67 +10998 PF11166 DUF2951 Protein of unknown function (DUF2951) Pollington J, Finn RD anon Pfam-B_002585 (release 23.0) Family This family of proteins has no known function. It has a highly conserved sequence. 24.90 24.90 25.40 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.39 0.72 -4.14 6 299 2009-01-15 18:05:59 2008-08-06 16:12:50 3 1 192 0 5 56 0 95.60 72 98.34 CHANGED ht.hpR..hEp-hRIpRLE-NDKphFsoL-cI+cGQ+sQEhVNpKhDhTLDuIpRERELDccsKEcNpKNI+DlKMWlLGLlGTIhuSLlIAlLRTlFGI ......FGFTKR..HEp-WRlpRLEENDKTMFpph-cIc-u.+TQEpl.pKLD+sh-p..lpR-...+E.DEKNKccNsKNIRDlKMWILGLIGTIhSTlVIALLRTlFGI........................ 0 4 4 5 +10999 PF11167 DUF2953 Protein of unknown function (DUF2953) Pollington J, Finn RD anon Pfam-B_002617 (release 23.0) Family This family of proteins has no known function. 21.70 21.70 22.30 22.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.47 0.72 -4.06 60 397 2009-01-15 18:05:59 2008-08-07 09:14:46 3 1 368 0 104 330 4 50.90 29 23.20 CHANGED sphGhtDsAhTullhGhlaultuhlhshh........pp.plplpPsFpcphh.cs .......hphGhsDAAhTGllsGhhaulhuhhhshl.phhph...ptsphplsPsFppphh.t......... 1 45 85 92 +11000 PF11168 DUF2955 Protein of unknown function (DUF2955) Pollington J, Finn RD anon Pfam-B_002614 (release 23.0) Family Some members in this family of proteins with unknown function annotate the proteins as membrane protein. However, this cannot be confirmed. 21.90 21.90 21.90 21.90 21.60 21.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.72 0.71 -4.51 42 253 2012-10-02 19:04:43 2008-08-07 10:34:55 3 2 195 0 82 281 23 138.00 25 40.93 CHANGED +sLRluhGssluhsluhhhuashuhhsslhshhlL.shssshshphhhplhhsslhssh.shllsshlppaPlhhslllulhhhhthhhhhcssthLhushhllsholl.thushs..sssh.sllhulhhuhllulhlshls ...............psLRIuhGssluhslsh.hhshshuhahslhPhhlL.uhssshshcshhphlhssslsslpssllsshhtpaPhlhs.l...llhh..ha..hapat..h..h..scs.shhLFGshs..lls..ho...lhltFu...o...as...ss..shpslhhu.hhusllulhlshlh....................................... 0 7 21 53 +11001 PF11169 DUF2956 Protein of unknown function (DUF2956) Pollington J, Finn RD anon Pfam-B_002632 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 82.50 82.20 22.90 22.20 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.57 0.72 -4.07 27 155 2009-01-15 18:05:59 2008-08-07 11:07:38 3 1 154 0 34 105 7 105.30 60 91.04 CHANGED SsETQpEAh+lAKuTQKPGQTKEQTKLIAQGIEKGIApYKKQQKsKuR-tDKt+Kpph+sKpp........sppppspp..hspsp.shLPWs...LLsLSWlGFhuYlhh ....SppTQQEAlKIAKATQ+PGQTKEQTKLIAQGIEKGIA.YKKQQKEKpRQADKhRKKslKAKppsspt.....ts--.s...sph.s.ssspsppupLsWl...LLsLSWlGFluYhh.h. 0 5 13 24 +11002 PF11170 DUF2957 Protein of unknown function (DUF2957) Pollington J, Finn RD anon Pfam-B_002671 (release 23.0) Family Some members annotate the proteins to be putative lipoproteins however this cannot be confirmed. Currently no function is known for this family of proteins. 25.00 25.00 36.40 39.30 17.70 17.50 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.21 0.70 -5.83 12 197 2009-01-15 18:05:59 2008-08-07 11:20:59 3 2 60 0 48 160 10 358.20 48 79.41 CHANGED TaQlpalESsVPposGoVsPTR...sG..hshoGohsp.Ts.LPTsc.NpCAFhLpsuo.........lDsscPshlFlGpGVsGGuIPGATIpFsGl.....lGlGslPspTFPaYPFIuFupTETDhoKlAGsYNtlGaH.lPSt..........satPsssshp.TLNADGS...Csst...ssuoCpsoGssas.+....ususssF.Sssh..tGp.shPohupsts.h......A+GlhIVGKLpstLVPllIRsGYApsssssh.....sssADDEsGIulLuPssAlAssSlpGtYIGssSshsYtsoslsGs......pushLDPh.sspssLsusaphDaoQss.sGslossshsus.sushTGplhFsGusauaL-..osusoP.Fslus .TaQlpal-SPVPtoTGTVsPTR....AG..ssloGTlspt.Tu..LPTsc.NpCAFhLs..suS...........................ls.scPshlFlG.GVsGGuIPGATIQFsGl..........lslGpVPspoFsaYPFIGFospETDloKlAGsYNpLGYHpVPSt..........Naussulsup.TlNADGo...Cssosh.......suG....uCh....soG....ssas.p....ususssFpSpsh...tsQ....lhPohuphs..h....t.A+GhhIVGKLpsQLVPllIRTGsAN..s..sssssh....sssADDESGISlLuPtsAlAuGS.sGtYhGsDSsFcYpsTulsGu......puThlDPh.sspsuLssuhsLDYsQss.PGslTsspssuo...us..ss...TGphlFoGGlauaLD..ssssss.Folsh................................ 0 4 8 22 +11003 PF11171 DUF2958 Protein of unknown function (DUF2958) Pollington J, Finn RD anon Pfam-B_002712 (release 23.0) Family Some members are annotated as lipoproteins however this cannot be confirmed. This family of proteins has no known function. 20.30 20.30 20.30 20.30 20.10 20.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.44 0.71 -4.23 21 220 2009-01-15 18:05:59 2008-08-07 11:26:14 3 10 148 0 82 208 67 105.00 36 22.38 CHANGED LlTsp.RspLLANGptp........pchDPhPVVKLFsPsusuTWLlTELD.tDGD.phFGLsDLGhGhPELGsVSLuELtul+GPhGLslERDLaFpuppsLSsYActActsGuIls ....................................................hs.t.+t.Lhtpstt..tt......thD.hPV.V+hFsP..ussTWLLsEhD..P.t.Ds..........D...tuFGLs..D.l..GhG..h.PELGhhuLsELtul+...s.hs...ls........lcRDlaFp...........sp+.l..a.t.u...t....s........................ 1 9 48 70 +11004 PF11172 DUF2959 Protein of unknown function (DUF2959) Pollington J, Finn RD anon Pfam-B_002747 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 31.50 31.50 36.30 36.00 31.10 30.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.31 0.71 -4.56 27 148 2009-01-15 18:05:59 2008-08-07 11:40:00 3 2 143 0 54 137 29 200.00 51 92.68 CHANGED llhLoG..CQSAYYuAMEKVGlHKRDIhVDRV--A+-oQp-ApEpFpSALEpapullsa-GG-LEctYspLscpYEsSpsuAccVpsRIcpVE-VAcALF-EWppELcpYsssoLRcsSpp+LcpT+ppYppLlpuM++AEsKMpPVLssh+DpVLYLKHNLNApAIuuLpGEhsslps-lspLlp-MppuIsEuspFIpphp .....................h.hhLoGCQ.S....A...YYuAME+.V.G.hHKRDI..hVDRVE-A+-SQp-AQcpFsSALEphpuLssaDGG-LEssYsplNDcYEsSppAAp-VpsRIsslEDVA-ALF-EWpsELstYosA.oLRRsScpKLccT+ppYppLlpuM++AEsKMsPVLssh+DNsLYLKHNLNApAIuuLpGEFsoLcpDIstLIppMNpuIsESscFIppL.......................... 0 16 29 45 +11005 PF11173 DUF2960 Protein of unknown function (DUF2960) Pollington J, Finn RD anon Pfam-B_002756 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 20.60 20.60 21.10 99.90 18.50 20.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.66 0.72 -3.68 20 165 2009-01-15 18:05:59 2008-08-07 11:57:25 3 1 165 0 35 76 9 78.50 62 89.53 CHANGED MARpItYTaKspsKcIsFSY-caHslaEAVAAAEGIDLTsaLtMEpQlthsS.+uspAV+saRcscFt+hGFocIpal+- MARTIlYTYKsQ-KsLsFSY-KHHNIaEAVAEAEGIDLT-FLKMEtQlEulS..DTKuVRNFRDNaF+KLGFupITLt.... 1 5 12 24 +11006 PF11174 DUF2970 Protein of unknown function (DUF2970) Mistry J, Coggill P anon Pfam-B_713 (release 23.0) Family This short family is conserved in Proteobacteria. The function is not known. 20.20 20.20 31.30 31.00 20.00 20.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -8.58 0.72 -4.38 68 334 2009-01-15 18:05:59 2008-08-07 12:00:31 3 1 266 0 122 275 95 55.70 35 81.05 CHANGED hhpshpuVlhuhhGVpppps+p+Dhsphs...PlhhIlsGllhshlFlhsLlhlVphVl ....hhpsl+uVhhuFFGVp.pscs+ppDhsphs...PltlllsGllhsslFlssLlhlV+hVl... 1 19 56 89 +11007 PF11175 DUF2961 Protein of unknown function (DUF2961) Pollington J, Finn RD anon Pfam-B_002770 (release 23.0) Family This family of proteins has no known function. 20.30 20.30 21.60 20.30 18.20 20.20 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.64 0.70 -5.05 37 285 2009-01-15 18:05:59 2008-08-07 12:01:15 3 5 193 0 83 254 31 241.70 34 57.17 CHANGED hhRp.lllRhYWDspptPoVpsPlGDFFusGau......hsthsShslss..ssssu........hssYa.MPFp+pA+IplpNpsstshtthaaplsY.s.h....ttlsccssYFHApW....+cppsh.......................shspsashl-.lcG+GpYVGshl...ulps.............hpss.....WaG..EG-.+halDG-..phPolpGTGoEDYFsuuWs...........h.hppassPatGhshhppsssh.t.....................hhuhYRaHlhDPIhFpcsL+lTlp...........ctch.p....phpsDhuSsAYW...YQs ......................................................................................................hRc.lll+hYWDspppPoVpsPlGDFFssGau.............................tsphsSlslss.....sPstu..........hNsYa.MPFp+pA+IslpNppspsh..t....ha..apIs....Y..s.h.......plspc......shYFHApW+Rpp.s.....................................phtps.a.sllD...lcG....+GpYVGshl...ulps......................pph........WaG..EG-.+ha..lDG-.....phPolpGTGoEDYFsuuWs...............................hppassPahGhsh...hpppssh.t.....................hhuhYRaHl.DPlhFpcsL+lolp................csph......ptpsDhuSsAYWYp................................................................. 1 33 56 69 +11008 PF11176 DUF2962 Protein of unknown function (DUF2962) Pollington J, Finn RD anon Pfam-B_002773 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 23.30 23.80 20.50 19.80 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.81 0.71 -4.65 31 261 2009-01-15 18:05:59 2008-08-08 09:11:59 3 6 220 1 177 255 0 150.00 30 79.84 CHANGED KlpKplu....tthcslHPpSRKsppLsRsshR-cKlpptKttptc+ps..hhp+ltaFpctlpp......................pppsaohp-hpplIc...................palsRp..DsELcplcpc....RRtuRP.ssRpphLcpphctEpc-ac..sGa.lPDLocscslchL+p.W....s.Gshss..lsslKhl+lop .........................ptht....tpppslHstSR+utpLpRtshRpc+hpch+p.t+t.t+pph.......hh......c.........+ltaFppplss......................pptshotp-hpplIp...................p.alpRh..cpEL-plchc.......+Rp...GR.....ssRcsllcpph-pEpppac..sGh.lPDlssspslchl+p.W.......s.Gshtt..lssl+hh+ls.t.................. 1 51 86 131 +11009 PF11177 DUF2964 Protein of unknown function (DUF2964) Pollington J, Finn RD anon Pfam-B_002804 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 20.80 20.80 20.90 21.70 20.60 20.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.97 0.72 -4.04 8 102 2009-01-15 18:05:59 2008-08-08 10:27:27 3 1 52 0 27 87 3 59.70 48 91.07 CHANGED MlRtchRlVLATlAVFIALAGlssslHGlLFD.ssshhYGlluLllGlAuFVlhLNPsPsDc ......MlR.phRlVLAsIAlFlsLAGhhsAl+GLLFDtsssltYGshAlslGVssFVlhLNstspD...... 2 2 4 9 +11010 PF11178 DUF2963 Protein of unknown function (DUF2963) Pollington J, Finn RD anon Pfam-B_002790 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Mollicutes. 21.70 21.70 22.40 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.96 0.72 -4.54 42 215 2009-01-15 18:05:59 2008-08-08 10:44:50 3 8 18 0 139 213 4 48.20 33 56.38 CHANGED Yp..DGc.pIphI..p....EYsspTGphIKcThY.p.DGcsIphIp..EYs..pssphIKpT............h ...............................a..cGp..pIphI..p........Eassp.TuchIKpThY.p.D.G.p.sIp.Ip..-as...ptp.h.p..................... 0 33 35 98 +11011 PF11179 DUF2967 Protein of unknown function (DUF2967) Pollington J, Finn RD anon Pfam-B_002838 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Drosophila. 25.00 25.00 37.20 37.20 21.70 21.10 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.88 0.70 -4.89 2 40 2009-01-15 18:05:59 2008-08-08 11:05:57 3 2 15 0 8 40 0 242.90 79 53.62 CHANGED PSTAPPPTRhREHFTFDPPQSPKSARoSEKupSHFoFK....EsAppAthuSsphhuuG...................AAoEusEtp-ER...uluNRsKKLRsRERDs...................N+ISPSVSPSpSpRsSPKRE++RTTP.sSTGAIuKlsSAPPTMKDuNFFGSStpQKQR.SQp.Ps...pQLSPSS....QQRKYSSSSSSGSS-RCLR-sTusGTMFPFDREALDYERIQRECFAPSSsTASsSSDS.EAENCSVYERK.uADIFQ ...................PSTAPPP.TRGREHFTFDPPQSPKSARTSEKARSHFTFK..........EDAQQARRASNSYY.AGG..........................................AAoEAAE.AQEER......AVANRNKKLRARERDSMAGNANANANANGGS.NSoRNRISPSVSP..SSSNRTSPKRERKRTT....PSVSTGAIAKINSAPPTMKDGNFFGSSQNQK.QRPS.QQ.QPSPQQQQLSPSSQA.AAQQRKYSSSSSSGSS-RCLRD.V...A.AAGTMFPFDREALDYERIQRECFAPSSA.TASTSSDSDEAENCSVYERKLSADIFQ............... 0 1 2 5 +11012 PF11180 DUF2968 Protein of unknown function (DUF2968) Pollington J, Finn RD anon Pfam-B_002850 (release 23.0) Family This family of proteins has no known function. 27.00 27.00 27.40 79.90 25.60 25.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.24 0.71 -4.92 12 142 2009-09-11 05:56:16 2008-08-08 11:12:26 3 1 95 0 44 135 4 192.10 43 81.57 CHANGED pPsst.......sstsssss.ssAsutuu.....spusVsELpphlpsppLoELRTTYNGuYGASLLFpsc-hTYYVALFQpKsFWRVIKTsshspAEtlY+sFucQospLA-lElRRspLpAQKA.hER.lAhupsRAppLQADlpltppQcttVsscQpQsRpEsssLptp+ppspsQLcphQRQlctLQtptpp...ulP .....................s..........t.ssssss..ssAsusus.........upusVsELQphlpu+cLoEhRTTYNGoYGASLLFssp-hTYYVALFQpKsFWRVIKTtscs+AEtlYcsFucQotpLAssElc+scLEuQKAhh-RtIAhsppRAppLQADLslsRpQpAtVusRQcsspsEssALpsp+pttQsQLRpLQpQlRpLQ+Qsps......s........... 0 4 14 28 +11013 PF11181 YflT Heat induced stress protein YflT Pollington J, Finn RD anon PRODOM Family YflT is a heat induced protein. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.90 0.72 -3.87 12 479 2009-01-15 18:05:59 2008-08-08 11:33:15 3 3 229 0 85 298 1 95.40 28 57.40 CHANGED .hlctapNppclhpslppLptpGhspcDIYVluHDc-Roccls-sT.....csssluhcE.pGhhsslhshFp.cpGDELRs+hpclGlocsEAcpaEccLDcGKlllhV .......hVt.apscpEshpslpcLpp..cGhppc-I.hVlo.+....-...c.......cch........-cl.t.-so................ssN.....s.h.sscc...pulhsphhshF......p........tpt...D...p.....c.sth.....pp....hs..hscpEhp.YcpclppG.+hllh....................... 0 28 65 76 +11014 PF11182 AlgF Alginate O-acetyl transferase AlgF Pollington J, Finn RD anon PRODOM Family AlgF is essential for the addition of O-acetyl groups to alginate, an extracellular polysaccharide. The presence of O-acetyl groups plays an important role in the ability of the polymer to act as a virulence factor [1]. 25.40 25.40 25.40 26.20 24.60 25.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.81 0.71 -4.60 13 100 2009-01-15 18:05:59 2008-08-08 11:43:13 3 5 83 0 31 89 3 173.80 41 71.82 CHANGED slhLuluuhsAtA..u-ssLYsssAPpsSAFVRhhNuosuplslsl.GssshpslusspsusahahssGuts.....hplGupslsscl..suspahTlVspssu.....pshLlE-PshpscpKA.LthhNLoss..ssoL+TADGcs-VVcsVussupucRpINPVKlsLAlhsusppVushcsl.sLc+G-sho .............u.huLulhuht....A.A..u-...uALYussAPcGSsFVRhhNAusup..lss.o..V.GssslppVushu.uosa..pFhPtGsYo.......splGups..l.PVcL...ss-cYYTlVspsuG......pspLlE-.P.s.FcN+pKuLlRV.NLosp..pLoLKTADGKT-VVcsVuspupGcR-I.NPVKVsLALasGs+KV..ucl+.sV.sLcRGEsh................................ 0 4 12 24 +11015 PF11183 PmrD Polymyxin resistance protein PmrD Pollington J, Finn RD anon PRODOM Family PmrB forms a two-component system (TCS) with PmrA that allows Gram-negative bacteria to survive the cationic antimicrobial peptide polymyxin G [1]. The TCS is linked to another one via the polymyxin resistance protein PmrD. PmrD is the first protein identified to mediate the connectivity between the two TCSs. It binds to the N terminal domain of the PmrA response regulator which prevents its dephosphorylation, thereby promoting the the transcription of genes involved in polymyxin resistance [2]. 21.00 21.00 22.10 27.10 20.20 20.00 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.93 0.72 -4.17 3 441 2009-09-10 18:00:43 2008-08-08 11:46:37 3 1 427 2 10 87 0 79.60 76 92.99 CHANGED MEWLVKKSpsNKpcu.CHVllLCDuGGALKMIAElcS.cltL+sGDlLSPLpDApYCINREKpQTLKIlsAoCYSsDEWpRtsK MEW..LVKKSppsKpcs.pHVLMLCDAGGAIKMIAEVKS.DFAVKVGDLLSPLQNALYCINREKLHTVKVLSASsYSPDEWERQC+.... 0 1 3 5 +11016 PF11184 DUF2969 Protein of unknown function (DUF2969) Pollington J, Finn RD anon Pfam-B_002861 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Lactobacillales. 25.00 25.00 29.40 28.90 20.80 19.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.39 0.72 -3.89 19 584 2009-01-15 18:05:59 2008-08-08 11:53:44 3 2 582 0 56 191 0 74.00 46 97.69 CHANGED MSKK-KcIElplp-tcs....ss.sshplhI.GK+hIGpItph.-pcF..sshpssssphhhKsh-pAlptllcpaNLp ....MS.KK-KcIEIplsDsKsplstcsh-ua..pLhI.GKKlIGEIs-l..DspF..AIlpsuss-uFaKpLEcAlEhlIcsYNLp........ 0 10 25 40 +11017 PF11185 DUF2971 Protein of unknown function (DUF2971) Pollington J, Finn RD anon Pfam-B_002776 (release 23.0) Family This bacterial family of proteins has no known function. 21.80 21.80 21.80 21.80 21.60 21.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.31 0.72 -3.27 145 706 2009-01-15 18:05:59 2008-08-08 12:00:33 3 24 537 0 185 600 22 90.70 22 28.45 CHANGED MWspYuss.t.....pGlslta.....................................................tttt............thhhtpVpYsspp..tht.hphhttt....................................................pthhhh.......KppsapaEpEhRl ........................hWu.tYuss.p..........pGhslta..........................................................................t..pt........................tthhh..t..p..V..p..Ypsp....pht..hhph.h..t..........................................................................................h.pthhhhKspsWpaEpEhRl................ 0 76 122 149 +11018 PF11186 DUF2972 Protein of unknown function (DUF2972) Pollington J, Finn RD anon Pfam-B_002895 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as sugar transferase proteins, however this cannot be confirmed. 21.90 21.90 23.40 23.30 21.20 20.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.51 0.71 -4.42 21 237 2009-01-15 18:05:59 2008-08-08 13:53:51 3 1 88 0 9 179 0 178.40 36 43.56 CHANGED alDhpEIhsc+sFpThppLucta..pFs.Pp..s-cphappth.huch.hslLPlsLhlp................sshslhh...........s.h.h.......................hhphpcphhsIspplh....pps.hh-pl.......hlhlcpp-achlhp...spplhpplKpYLpcF...l.tLccplchccs+hlpEcDlLpYL..Kcp+plph+hKplLDcE.LsaIKppRPDIVsSWKYYQEFE+hCcELc ..YlDhp-IptppAapThphLuhpa..sFs.P....pp.phFp.p....hs.l..hhlh..PlpLhhsp................phpI.h...........p.l.l..s....................h.phppp.hlcIh.pplh.....pp.hhssh..hlhIcppcap.Lpp...sphLhppl+pYl.cF...l.tlcppls.pcpphh+EcDlLsYh..pcspslhhphppIh-pE.LpalKpp+PcIlsSWcYYpEFEchhpt.................... 0 3 8 9 +11019 PF11187 DUF2974 Protein of unknown function (DUF2974) Pollington J, Finn RD anon Pfam-B_002933 (release 23.0) Family This bacterial family of proteins has no known function. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.36 0.70 -4.98 24 1013 2012-10-03 11:45:05 2008-08-08 14:07:40 3 19 778 0 142 1032 52 195.30 27 47.94 CHANGED hupSsRFpslcltsalsplstc..ppQFuAhsapls..........ssohhluFRGTDsollGWKEDFpMuap..lsAQctAhcYLpplhpphss........s.lhLsGHSKGGNLAhYAAhp.hpsslp.......................pRItclYoaDuPGh.cphhp..sstapplts+lpchlPpsSllGhhLEpspph....hlVpSss..ulhQHcsaoWpl.psspFlpt.cslsssSphhppslppWlsslst-pRcthh-sLFs ................................................................................h.....Ratthph.thhp.th.....p........p......tp...Q.Fu.A..h.s.a.pls........................s.s.h.hl.s.FRG.....T.D........s........o.....l...l......G.............W.....+..E.....D.h.....p...h.....s......a.h..t......p......h...s..u......Q........c.......p..A......h.....p.Y..L..c.p...h.h.t.p.a....ss...........................................p.lhlsGHSKGGNL.A.h.YAAsp....hp.tp.h.p...............................p.p..I..t....t.lasa........Du.P....Gh...p..p..h..hp.......p...t....h..p..t..h...h..t+.h.h.hl.Pp.tul.lGhhhp....t..............hl...V..p..S.....p......hh.....Q....H..hp.Wph....t......ttph..........t..s.....u...htt..h.th.tths.tpht.hht.ha.t....................................................................................................................................................... 0 35 74 101 +11020 PF11188 DUF2975 Protein of unknown function (DUF2975) Pollington J, Finn RD, Bateman A anon Pfam-B_2875 & Pfam-B_3379 (release 23.0) & JH:B0MX27 Family This family of bacterial proteins have no known function. These proteins are likely to be integral membrane proteins. The proteins contain a highly conserved glutamic acid close to their C-terminus. 26.10 26.10 26.10 26.10 25.90 26.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.65 0.71 -4.45 99 988 2009-01-15 18:05:59 2008-08-08 14:12:43 3 1 759 0 233 831 18 140.00 17 82.57 CHANGED hhhsthhtth...........sphhsthshhhhshh.................................lh...shhsslshhhulhphh+Llpplpcs.psFot..pslctL+hIshshlshsllhhhshhhhhhh.st..t..................hhhhhhshsshhlu.llhhllpclhtpAl.......cl+pEpDLTI ...................................................................................................h................................h.hh....................................lh.....hhhssls...h...h..h...slh....ph.h+....Ll..ph.l..pc.......s..psFop..tslptl+plths..hl...s...hu....ll....h.h....lh....h....sh.h..h.hh.....ht........s...h................hhhhsh.h.s.shllu.lhh..hllp.c.lhppAl.......clcpEsDhTl...................... 0 90 171 199 +11021 PF11189 DUF2973 Protein of unknown function (DUF2973) Pollington J, Finn RD anon Pfam-B_002929 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently they have no known function. 22.20 22.20 22.30 30.30 20.10 21.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.18 0.72 -4.20 34 106 2009-01-15 18:05:59 2008-08-08 14:16:03 3 1 95 0 32 106 363 66.70 34 72.64 CHANGED hhsLlYhhsFhhLhh.AapshhpGhhsssht...............p.p.tt......................s+ssphTsHPELLDpsGclscE-LLsVR ...........................hh.LlYhhAFsllhhlAhtshh+uhhsssht...............ppp..s......................sppsphssHPELLDcsGpllpEsLLslR. 0 4 18 29 +11022 PF11190 DUF2976 Protein of unknown function (DUF2976) Pollington J, Finn RD anon Pfam-B_002963 (release 23.0) Family This family of proteins has no known function. Some members are annotated as membrane proteins however this cannot be confirmed. 26.10 26.10 29.80 40.90 25.30 25.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.89 0.72 -4.46 22 218 2009-01-15 18:05:59 2008-08-08 14:22:53 3 1 179 0 63 180 9 86.90 41 72.80 CHANGED LPshpsPosG.......sGsshhpplpsYhhDGhhLlGLlluAhuFlsVuhsulpsasEl+s..GKtsWucFGshllVGllLllhslaLlscAssI ......LPshEsPopG.......sGsGlhpTlpsYuhDhlhLluLllsAsuFlsVuhpuluoYsEl+s..G+tsWupFGhslsVGslLlllsIWLlTcAssI...... 0 6 27 50 +11023 PF11191 DUF2782 Protein of unknown function (DUF2782) Gunasekaran P, Mistry J anon Pfam-B_001700 (release 23.0) Family This is a bacterial family of proteins whose function is unknown. 21.20 21.20 21.60 21.60 20.80 21.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.61 0.72 -4.06 41 297 2009-01-15 18:05:59 2008-08-08 14:24:31 3 2 290 0 102 233 41 111.00 24 91.26 CHANGED p....llhss....hLussshAtsst.pss........ss..................p.-P.............-lsI..hpcss.pplpEYRlsGplhhlKVpPcsG...hsYaLlcs-Gp.upascpst....s.shhsPtW.............................plapW ..........................................................................h.....h....hhsh.s...usss......ps.s............s..................................cs..................-hhl......hp-ss..splpEYRlsGphaslcVpPtpG....tsYallcssus.ssascppt.....schhhstW...............................plapW........................... 1 20 64 87 +11024 PF11192 DUF2977 Protein of unknown function (DUF2977) Pollington J, Finn RD anon Pfam-B_002980 (release 23.0) Family This family of proteins has no known function. 20.60 20.60 20.80 25.10 19.50 17.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.05 0.72 -4.14 7 236 2009-01-15 18:05:59 2008-08-08 14:27:35 3 2 170 0 4 81 0 61.20 58 49.41 CHANGED MplhlN...........D+stIhGasshGuh-pu...l-lc.splPtpFpcpFcPctahh.su..pIlhNpsYhc.p. .........MpIhVN...........c+NElIuYs.hGGhcpu...lDl-...NlP.pFpp.FcP+tFhaoNG......EIhaNpsapcEc.s..... 1 4 4 4 +11025 PF11193 DUF2812 Protein of unknown function (DUF2812) Gunasekaran P, Mistry J anon Pfam-B_001697 (release 23.0) Family This is a bacterial family of uncharacterised proteins, however some members of this family are annotated as membrane proteins. 22.10 22.10 22.20 22.30 21.90 22.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.43 0.71 -3.94 34 805 2009-01-15 18:05:59 2008-08-08 14:30:02 3 2 587 0 77 554 3 113.20 26 57.20 CHANGED hsh-+.-EpaLpcMupcGatLpchshhh......YpFc+.scPpchhYclDap.........sppcpp-Yhphac-sGWchl.....sph..sphp.......................aFpcttsptt.....-IaoDppo+hpphpp.hhphhhhhhhhhl ............h.sh-+-EpaLccMth+GWphpp..h.phhh.........apFcp.sp.Pccs...h..Y+l-hp............p.ppchpcYhphac-pGWchl.......s.h....ushp................................ha+K....psssh......plao..-.pts+....hphh+p..hhp.h.hhhhhh...h................................. 0 35 59 69 +11027 PF11195 DUF2829 Protein of unknown function (DUF2829) Gunasekaran P, Mistry J anon Pfam-B_001848 (release 23.0) Family This is a uncharacterised family of proteins found in bacteria and bacteriphages. 25.00 25.00 25.10 27.30 24.70 24.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.81 0.72 -3.50 24 638 2009-09-11 11:01:00 2008-08-08 14:31:38 3 2 599 0 49 240 8 75.90 53 85.74 CHANGED MsFpEAl..tlKpGcKhsRp.GWsGtphaltl...........................psssh..psalhlpss....psph....hs.WsPossDlLA-DWhlV .............MTFEEILP.uL.KAt+KhVRs.GWuGsEpYVpL................................................hDsl.ctsGhsLp.hTPYFLIsso.........cGEG....FSMWuPTsCDVLAsDWl.V.. 0 13 27 37 +11028 PF11196 DUF2834 Protein of unknown function (DUF2834) Gunasekaran P, Mistry J anon Pfam-B_001850 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.90 21.90 22.50 23.10 21.70 21.60 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.33 0.72 -3.83 47 198 2009-01-15 18:05:59 2008-08-08 14:32:12 3 1 185 0 55 167 264 99.00 30 76.31 CHANGED lYLhLAlhGslhPhht.ltal..tpsG.......h-ltshlss.hhuN.suupulshDLhluAlslslallsEu.+hph+thah...slhsohslulusGhPLaLaLRp+p ............................lYhllulhuh.slsahhslpal...pp.u............hshst.alph..hhsN.sAusu...huhDlhIuslslh.lahhs-uhR..hsl.+phWl.....hhluohhluhuhuhPhYLhhRER..... 0 14 32 46 +11029 PF11197 DUF2835 Protein of unknown function (DUF2835) Gunasekaran P, Mistry J anon Pfam-B_001851 (release 23.0) Family This is a bacterial family of uncharacterised proteins. One member of this family (Swiss:A4VM42) is annotated as the A subunit of Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV). 25.00 25.00 56.50 56.40 21.60 19.00 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -8.89 0.72 -4.20 42 218 2009-01-15 18:05:59 2008-08-08 14:34:21 3 1 218 0 59 150 18 64.60 43 91.10 CHANGED hhFslslShpcals.aYpGtAppV.VhspsGcplplPApphRPFlTppGl+GpFclphspps+alsLp+ ......hhhplslShpphls.aYpGhAspVhVpocpGh+lpLPAp+LRPFLop.GV+GpFRLshDpss+FlpLE... 0 15 31 47 +11030 PF11198 DUF2857 Protein of unknown function (DUF2857) Gunasekaran P, Mistry J anon Pfam-B_001886 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 31.00 31.00 32.70 38.20 30.70 30.50 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.00 0.71 -4.76 32 266 2009-01-15 18:05:59 2008-08-08 14:35:22 3 2 219 0 73 222 8 177.30 33 90.10 CHANGED sLNpAlLsphlpsL+pGp.lppCcshGhs.--lptLpphshpcl..LssusVshss....lslsp-sht+llppsccppp....cpphl-RhLcLGAShpMhppaFGL.ospEVutRRclL.ulstppGRhshhsE.ppcptlWctWpt..pptslt.h-ssphL-hhMhlAEphsl.............sLoslWshlppW.pps ........................lNpulLsphLhsL+sGp.l++CcshGhs.-ElshLppholpcl..Lu.susVshss....lsls+-sLp+lLspuccppp....c.ppl-RALcLGAShphhpca.FGL.ospEVutRRcll.Gls...h...+.pG.RpsshsE.-p-stlWcpWp.....ctslpt.DssphL-hhM.lAE....phsl.............sLosVWstlppW.pps...................... 0 7 33 54 +11031 PF11199 DUF2891 Protein of unknown function (DUF2891) Gunasekaran P, Mistry J anon Pfam-B_001921 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 45.90 45.60 22.40 21.90 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.14 0.70 -5.85 33 328 2009-01-15 18:05:59 2008-08-08 14:37:19 3 1 324 0 81 223 116 319.90 46 94.86 CHANGED As+FApLsLssls+EYPNKlsHshsussDlp.sP+pLHPsFYGsaDWHSuVHGaWhLV+LL+pa...P-hspsspIpusl-cplTsEslAsElAYhppststu.FERPYGWAWLLpLstELppac.........sstupcWupsLpPLsphlspRhpsaLP+LsYPlRlGsHsNTAFALuLAhDaAcsstcpsLpphItppupcaahpDpssPstaEPuGcDFLSPsLtEAcLMRRVLs....sspFspWhctFLPplsstt.sphlpPusVoDRoDGKluHLDGLNLSRAWshculAsuLPps..ttstlpssApcHlsuulsplsut.cYhGsHWLuSFAlhA ......................ppaAplsLsslpREaPN+hsHphsussDlp.sP+tLHPhFYGsaDWHSSVHuaWhlV+LL+pa.....schspt..-pIhssLcsphTcEsstsEhtYLph....t..p.+........GFERPYGWuWhLtLuspls.hs..........stuppWApslpsluphhhcphhpaLP..KhoYPlRsGTH.NTAFALuLsLDaARt...hp...csp.lttsIlppA++aahsDts...h.u.........aEPuGs....-FLSPuLhEAcLMpRVls....ss-FssWhcsFLPpLspp-.s.sLhsPs....pV.....o..D+..oDGK...l.....A.HLDGLNLSR.AWsh+uLAshLsEs..t+.ttlcpAAscHlspulsplss..DYhGsHWLuSFALhA......... 0 24 50 66 +11032 PF11200 DUF2981 Protein of unknown function (DUF2981) Pollington J, Finn RD anon Pfam-B_003040 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.00 21.00 23.30 23.20 20.20 18.50 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.15 0.70 -5.35 2 8 2009-01-15 18:05:59 2008-08-08 14:59:40 3 2 5 0 8 8 0 120.40 34 56.25 CHANGED YLILFsFSLVVVSMVALDMAElVHFFPKAPELHshoLLGhTuPFVTRFKsPNASVLFPFNFFKLPNDTTGVLPQL.H...............................DphVVTKFThoplDpHsHKHCNKhsot....RFDDDlSuVLDNhDssDDLpsLRNALKQRLtspN.......TstssoGssD...NYNAYDNPDGKV.uuKDlNS.................spsSsNsssSsspKpptsDSFYDHLPFMPHPGETEGESEEVSKDEFPPETNDLTPEGKSEVVVLYKLQKRs.phhhS.....alhhhlY ................................................................................................................t...........................................................................................................................................................................................................................................................Y-clPFhPH.spTEGEoEElStsEhPPEspsLT.pt.pEVsslhKhQKR............................ 0 4 6 7 +11033 PF11201 DUF2982 Protein of unknown function (DUF2982) Pollington J, Finn RD anon Pfam-B_003059 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 25.70 38.90 19.50 17.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.78 0.71 -4.86 24 166 2009-01-15 18:05:59 2008-08-08 15:05:09 3 2 160 0 37 120 8 148.40 48 67.74 CHANGED SlpLotptlpYaH+pGsWhlcWpNIpRlsls+hppshpphpLPaIGIKLKphsshLssIsPRLAotLLhEQRsLLhhusppcp.shpshEshLhs-tsalsspGppY+GllAMaupRhphLpchLGYclalstssLDRsspEFluLLRcaptts .......................s.aTLTsoHlQ.HahpGGWVl+WsNIppIGlsphc.p-GWaQPLPWlGI+LKcYsPaLsuIsPRLuTpLLlpQRuLLhLuApQps..c...tpp.......aEDhllDspsYls.scGcpYpGL.AMLANRMpYpRcahGYDlFISpp-LDRsu-EFVGLhRRYLAAA......... 0 6 14 27 +11034 PF11202 PRTase_1 DUF2983; Phosphoribosyl transferase (PRTase) Anantharaman V, Pollington J, Finn RD anon Anantharaman V Family This PRTase family has a C terminal RNA binding Pelota domain [1]. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 31.70 31.70 37.40 37.30 26.20 25.20 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.58 0.70 -5.41 68 350 2012-10-10 14:25:38 2008-08-08 15:38:30 3 7 335 0 73 275 5 247.50 50 60.57 CHANGED sl...p...........GSYtssDVpFLL+sl.s..hpp.sss--+EchIQSG.phHYSEMLshEhpPoptYhcLFcpALppsutRlAtsVuhlActlhp......c.ps.ssllLVSLARAGTPlGlLl+Rhlpp........Ghs.....ssHYuISIIRDRGIDpsALcaI..hp+Hs.spslVFVDGWTGKGAIopELpcAl.t.p.h.ppth.............s.......hss.cLsVLADPutCutlhuTc-DaLIPSuhLNuTVSGLlSRTVLssch.l.....ssssF...HGshaYccLtstDlSphFlDslssth ............................h..GSYtPsDVpFLLKsl.s..hp.sssEc+EclIQSGptHYS-MLs.E.tPoptahcLFccALcpsupRlAptVstLAptlhp.......c.ts...p..slVLVSLsRAGsPlGVLl++hl+t.........Ghs...s.HYuISIIRDRGIDtsALcaI..hp..+..H..s..spsIVFVDGWTGKGAIotELtcAL.phpsth.........................s.....hpscLsVLADPsuCuhLhuoc-DaLIPSuhLsusVSGLlSRoVhss-hl..........tthHGshhaccLpphDsSphalDsVsph.h........... 0 20 44 61 +11035 PF11203 DUF2984 Protein of unknown function (DUF2984) Pollington J, Finn RD anon Pfam-B_003006 (release 23.0) Family Some members in this bacterial family of proteins are annotated as membrane proteins however this cannot be confirmed. 21.30 21.30 21.30 21.40 21.20 20.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.14 0.72 -3.89 22 356 2009-09-11 11:43:42 2008-08-08 15:46:26 3 2 151 0 77 242 0 95.90 31 21.98 CHANGED PAsupRpTWlsLRhcsppshtAlptR..............sts..ultpslssuspRlAspLtspGhsApshpusEhspssstlstt....................................+EsWcthcstsu.assuYtls ............ssupRcTWlsLRhs.spcs..htAlptR.............................sts.hultcshhsuupRlAscLcspGhcAps.s.sus-l.schssshu..s..........................................................cEpW+sl+.ttsu.ahsuYth........................................ 1 18 50 70 +11036 PF11204 DUF2985 Protein of unknown function (DUF2985) Pollington J, Finn RD anon Pfam-B_003090 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.00 20.00 21.00 27.60 19.90 19.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.85 0.72 -4.23 20 240 2009-01-15 18:05:59 2008-08-08 16:59:53 3 12 109 0 186 235 0 78.80 38 14.21 CHANGED h+alpTPhGFhlTlYuh.lshWGuhlhLhLlshlsh........ssup+chWIElssQILNALFslsGhGLAPaRh+DhYahlhht ..................h.palppPhGhhlslYuhhlssaGuhlhLlLhGhls.h................ssup+phWlElssQlLNALFslhuhGlt.PaRhhDhahhhph............. 1 43 96 150 +11037 PF11205 DUF2987 Protein of unknown function (DUF2987) Pollington J, Finn RD anon Pfam-B_003120 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 25.30 25.30 20.30 24.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.78 0.71 -4.54 25 171 2009-01-15 18:05:59 2008-08-08 17:07:43 3 1 169 0 42 124 3 144.00 43 67.67 CHANGED stAps.lslpYssFYs+LKpssKtsaphVcluFhlscsps.t..CpIpsuplssEccptsLslosspcL.hlPhDcsL+ss+ALlsls.s..spuppCsluhQVhAKp.....hptphotscLtplhspMcshLschuGhhs+....aFtPclsGlshcFs ..............................................App.hhhoYScLaophKsNs.KEsHsDV+huhaFsDspopp..CsIcKAaMEKE.-HYEpLslssupEL.hlPlDssL+pANPLVhVs.o..sp-ccCDhSMhVhAKcs....hpuclohpclpsLlsQMpshL-pLGGMFuu....aFsPsVpGVTlEFs............ 0 6 16 29 +11039 PF11207 DUF2989 Protein of unknown function (DUF2989) Pollington J, anon Pfam-B_003139 (release 23.0) Family Some members in this bacterial family of proteins are annotated as lipoproteins however this cannot be confirmed. 20.90 20.90 20.90 54.50 20.50 20.50 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.47 0.71 -4.87 26 162 2009-09-10 14:55:00 2008-08-12 09:52:50 3 1 160 0 37 114 8 199.60 47 74.36 CHANGED hLsGCtp..stsssplCcssPpL.CsDLs.cDuaCRhcRs-LIhpRaplpc..sPospppYppLphlEsYpKClELAutIpaIppp-+pscRhcAhhpShpplpcLpppT+sSp-PtllYY+WophsDpsAhpphLthEspspl.pssElQhuLAoYYh+pDspKshpLLh+sLpLhpts................phN....................s-llpuLAolappppshcpAYlWuhl .....LsGChE..pcsT-plCtsNPpL.CppLN.cDGQCRl.RT-LIW+RF-lhK..sPoDcpplcEhphlpsYcKCLELASQ.IQsIcpochKpsRhsALlpSscE.cRlst-L+pSpsPpsLYahWSQhGDpsAp+tFLplEGpscL.cT.AEMQaALATaYo.sRDp.KTlpLLp+sLcLsstp................slN....................s-lLcuLAShapthpch-cAYlWAhl...... 0 6 13 25 +11040 PF11208 DUF2992 Protein of unknown function (DUF2992) Pollington J, Finn RD anon Pfam-B_003142 (release 23.0) Family This bacterial family of proteins has no known function. However, the cis-regulatory yjdF motif, just upstream from the gene encoding the proteins for this family, is a small non-coding RNA, Rfam:RF01764. The yjdF motif is found in many Firmicutes, including Bacillus subtilis. In most cases, it resides in potential 5' UTRs of homologues of the yjdF gene whose function is unknown. However, in Streptococcus thermophilus, a yjdF RNA motif is associated with an operon whose protein products synthesise nicotinamide adenine dinucleotide (NAD+). Also, the S. thermophilus yjdF RNA lacks typical yjdF motif consensus features downstream of and including the P4 stem. Thus, if yjdF RNAs are riboswitch aptamers, the S. thermophilus RNAs might sense a distinct compound that structurally resembles the ligand bound by other yjdF RNAs. On the ohter hand, perhaps these RNAs have an alternative solution forming a similar binding site, as is observed with some SAM riboswitches [1]. 23.80 23.80 27.80 27.60 22.90 22.30 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.82 0.71 -4.25 29 465 2009-01-15 18:05:59 2008-08-12 10:41:15 3 3 432 0 61 366 9 127.70 37 93.45 CHANGED LTlaFDs.FWlGlhEphp.sschcls+lsFG.sEPpD.ElhcFlhpch.s.Lphspshtsph......pp++hNPKRhQRpsp...Kchcp.tsluTKAQpAlKhphEppKpc+KptsKpcKcptpcp+aplKppK++pKH+GH ..........LTVaa-s..tFalGlhEpp.c...p..........s.......p..........hpss+hsFG.sEPpDtElhcFlppp.h......hp.Lpassthtsph...........pp+phNPKRlQ..RpstKp..hpp....tthu.TKAQpAl+hphEtpKpc+Khpp+pc+-tp+p++htlKppK.t+pKH+GH................. 0 24 41 48 +11041 PF11209 DUF2993 Protein of unknown function (DUF2993) Pollington J, Finn RD anon Pfam-B_003144 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 27.30 27.30 27.40 27.40 27.00 27.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.45 0.70 -4.44 148 794 2009-09-11 14:20:34 2008-08-12 10:49:44 3 4 452 0 220 612 29 216.40 16 79.06 CHANGED sssts...................hsthhpptlsptl.psph..ph........spss.pVplsuhP.h..pllpGcls.............plplsspsshh.....sht.....ht...phplpsps.lplshsth...........h.st..hphhp.....sspssspltlstsslsptlss..t.........hpth...................................hspsplplsu.....................................lshpsph......pl......tpt.............h.lphsshp.ht.s........h..............hsthhpths..h..sht...t..........hshsl.plp..plp.lpssslplps.p ..........................................h....shthhscptlsppl.ppts.......t..........ssss..pVslt.s.hP.hlh.plh...sGcls.............plslpstssph.....sht.......ht...........phphshcs.lclshssh..........h........phhp......ssplsusstlst.splschlst.........t...........htth..........................................sts.s.plpltu....................................h.lslpssh..............pl...............ssst.............thhlphssss.huhst.....................h.......................................................................................hpslhpshs.tph.....shtp.............hshsl.phs..slp.sssssl.hph............................................................................................................................... 0 56 159 205 +11042 PF11210 DUF2996 Protein of unknown function (DUF2996) Pollington J, Finn RD anon Pfam-B_003176 (release 23.0) Family This family of proteins has no known function. 21.90 21.90 23.60 28.90 21.20 20.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.57 0.71 -3.83 21 102 2009-09-11 16:37:34 2008-08-12 14:04:55 3 1 88 0 45 104 110 115.30 41 66.98 CHANGED tlE-KPFsEFlppchlPuLppuLpsc.GlsshsLshtcsphPlsG....spCWhVhGchsts....RpFWLsFspcsIsusKshuLA-sGscPShlESFLIDE++lTLsLLlsRllQRLNGQKWLGs .....hEcKPFsEFhppchlPuLppuLppp.GlsslcLsFpcs.hsh.h....t.h.plpGsa.pst...+pFhlaFsptsIpG..KuFohussGspPSTlEsFL.IDE+...+lTLcLlVhhllQRLNGQKWLs.h................ 0 7 30 41 +11043 PF11211 DUF2997 Protein of unknown function (DUF2997) Pollington J, Finn RD anon Pfam-B_003181 (release 23.0) Family This family of proteins has no known function. 20.80 20.80 21.90 21.30 20.60 18.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -7.91 0.72 -4.58 41 147 2009-01-15 18:05:59 2008-08-12 14:10:29 3 1 124 0 60 136 151 48.10 38 65.72 CHANGED lcapIpsDGpVcEpVpGlpGpuCpchTctlEssLGsVp.pcchTuEaat ...........lcahItsDGcVptpVpGltGpuCpchTptl.EstLGsVh..spchTsEaa.......... 0 22 47 58 +11044 PF11212 DUF2999 Protein of unknown function (DUF2999) Pollington J, Finn RD anon Pfam-B_003194 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 26.70 134.00 22.90 20.20 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.78 0.72 -3.94 10 145 2009-01-15 18:05:59 2008-08-12 14:26:43 3 1 145 0 26 66 7 81.80 78 97.98 CHANGED MNPIIAlLKEHNVSDEQIsELFQsLT-NPhhAMAhIsQLGIPsEKLQtLMulVMsNPuLIKEAVEELGLDFuKVEAAKA+Lp MNPILAhLKENNISsEQINELFQTLTQNPLAAMATluQLGLPQDKLQhLhAQVMQNPALIKpAVEELGLDFuKVEAAKAKLQ. 0 3 7 17 +11045 PF11213 DUF3006 Protein of unknown function (DUF3006) Pollington J, Finn RD anon Pfam-B_003197 (release 23.0) Family This family of proteins has no known function. 21.50 21.50 21.50 21.50 21.40 21.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.21 0.72 -4.22 42 334 2009-01-15 18:05:59 2008-08-12 14:37:12 3 2 303 0 107 256 3 68.80 33 87.57 CHANGED psllDRF.E....schAVlEhpscp.....hsls+shLPtsu+EGD.VLpl...........plplD.cpTcpt+...cclpchhccLhc ..........+sIlDRh.E.....sphAVlchcspp.......hsls+spLPp.ss+.E.GD.Vlhl......................ptsphplDtp-T-cc+...ccIpchhccLh........................... 0 52 85 101 +11046 PF11214 Med2 Mediator complex subunit 2 Wood V, Coggill P anon Wood V Family This family of mediator complex subunit 2 proteins is conserved in fungi. Cyclin-dependent kinase CDK8 or Srb10 interacts with and phosphorylates Med2. Post-translational modifications of Mediator subunits are important for regulation of gene expression [2]. 21.10 21.10 21.50 22.00 21.00 21.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.42 0.72 -4.00 10 58 2009-01-15 18:05:59 2008-08-12 14:40:30 3 1 35 0 39 58 0 122.50 45 41.27 CHANGED M.........................shcsKLssshDDIL+sSu.lh..p.l+sschpSsllTGhNspL.....ppsLsc+IspFauILD-s.spLNcocahl....Dslhp.+pcpc................EhK...EEEE+p++...........E-EE+K ..............NLps+Lp...ssLD-ILKoSGalh..ElIcpN++QSNlIousNNpLIp..lpppLsscIppFHuILDpTloKLNDAcWCL....sshlEp..KKKh-...........................EhK.lKEEtt...+K+c........EpEt..tcp.tct......................................................................... 1 18 27 38 +11047 PF11215 DUF3010 Protein of unknown function (DUF3010) Pollington J, Finn RD anon Pfam-B_003238 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 77.50 77.40 19.00 18.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.52 0.71 -4.25 31 135 2009-09-11 10:24:04 2008-08-12 16:16:03 3 1 126 0 48 117 14 136.00 43 95.65 CHANGED M.+lCGVELKGsEAIlsLLshcsshaslP-sRsp+hslscsssspsl+pFphsFscLhpDYpV-pVlI+cRtpKGKFAGuAsuFKlEAAIQLl..sslpVsLlSssslKppLK+sPh.lsFptsGLKpFQ.csAFpsAauhL ..M+lCGVElKGuEAllsLLo.hcsssasls-sRsp+hslscsssspsl+cFphsFtKLhpDYpV-cVVI+cRppKGKFAGuAsuFKlEuAIQLh..ssls..VpLlSPssIKpphK+sPh.lsFcpssLKpaQ.cpAFpsAYAh..... 0 8 17 36 +11048 PF11216 DUF3012 Protein of unknown function (DUF3012) Pollington J, Finn RD anon Pfam-B_003254 (release 23.0) Family This family of proteins with unknown function is restricted to Gammaproteobacteria. 20.40 20.40 21.70 21.00 19.90 19.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.32 0.72 -7.56 0.72 -4.27 16 235 2009-01-15 18:05:59 2008-08-12 16:33:21 3 3 142 0 40 118 13 31.90 62 64.02 CHANGED VGSEsWCccM+-KPKGDWTANEAs-aAKHClh .VGScuWCEDMcEKPKG-WTAp-As-aAKHClh. 0 7 14 29 +11049 PF11217 DUF3013 Protein of unknown function (DUF3013) Pollington J, Finn RD anon Pfam-B_003257 (release 23.0) Family This bacterial family of proteins with unknown function appear to be restricted to Firmicutes. 25.00 25.00 35.40 35.30 18.90 18.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.98 0.71 -4.50 12 479 2009-01-15 18:05:59 2008-08-12 16:37:24 3 2 473 2 34 218 0 155.00 58 98.53 CHANGED MuKhshLshL-Epl-Kp.FsaDhtlsWDKKNHAlElsFhLpApNssulchsDspGpsSsE-Il.FEDtlLFYNPsK.SpFD.t-DYLsslPY-sKKGlu+phLshFspaLp-sh-pG.sDLMDFL.sDsss-pFtLpW-spsFEpshsp..LpEs-...aYsYP+ ...MApaGFL-lLEEELDKp.FsaDaplsWDK+NHAVEVoFlLEAQNs.u.uVEhlD--GElSs-DIl.FE-uVLFYNPuK..Sshs.tEDYLssIPY.PKKGhSREFLAYFAhFLKDsA-.GLcsLMDFL.pDs-AEEFshcWspEsFEcG+ss..LcEsE......FYPYP+........... 0 4 14 23 +11050 PF11218 DUF3011 Protein of unknown function (DUF3011) Pollington J, Finn RD anon Pfam-B_003246 (release 23.0) Family This bacterial family of proteins has no known function. Most members belong to Proteobacteria. 25.00 25.00 25.00 26.20 24.10 24.00 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.02 0.71 -12.00 0.71 -4.54 5 96 2009-01-15 18:05:59 2008-08-12 16:42:20 3 3 52 0 30 104 9 167.40 36 75.04 CHANGED VVRCES+DpcRVHCDhsscsGlQLVRQLSuoSCI+GSpWGoDRuGVWV-pGCRAEFsuu.....supGuuaPusst....hRRlVRCESNGs+p.SCPVtLRGAPVRLLRQLSlhPC+EGRoWGp++sEIWVScGC+GEFElAc--GpsFVDs...........PaoLTCESKsR+RRoCGsSlcpG.sVLppQLSuTPCEEG+oWGWS..RssVWVssGCRAEF ................................................lpCpSpc.tphcCsh.httts.LVcplStspC.cGppWs.c..ptt..lWVppGCRucFstt.......ts...shst............ttslRCcSs.stt...C..sshh.Rts..pLlRQhS.hPChcspsWs...ctsplWVscGCcGEFt.ut.....ts.G.....s.hssh.......................shhlsCpScppphptCshss.p..ht..shlhpQhSpo.C.EspoWGas..pstlWVssGCRAcF.............................................................. 0 8 20 25 +11051 PF11219 DUF3014 Protein of unknown function (DUF3014) Pollington J, Finn RD anon Pfam-B_003267 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 127.80 127.20 19.80 19.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.68 0.71 -4.35 31 126 2009-01-15 18:05:59 2008-08-12 16:46:51 3 1 121 0 57 133 68 160.20 37 55.15 CHANGED -llR+hVVhlDNLApGphsp+htPlpsPptpFsVpc.......sspth....IssssYpRYDsasshlsulsspphsshYpchpPLhppAYsELGYPcttFsspllpAI-plLss.PhscsPlpls...........psuVhYpFADPpLEuLssuQKhLlRMGPcNs++lKutL+cl+p ..cllR+hVVhVDNLupup.lsp+h.Plpsst.....ppFpVsph................ssphh....lss.ssapRYssYVshlsulDspshsphYpchhPLhppAasELGYPc.tt.FN-RllpAI-hLLss.PtspsPltlh...........pspVpYpasDPpLEuLssuQKhLlRMGP-Ns++lKutLRcl+.s.. 0 12 30 46 +11052 PF11220 DUF3015 Protein of unknown function (DUF3015) Pollington J, Finn RD anon Pfam-B_003292 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 26.80 26.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.81 0.71 -4.59 16 196 2009-01-15 18:05:59 2008-08-13 09:16:49 3 1 165 0 52 142 74 138.20 38 88.21 CHANGED AhAsustGs.GCGhGs.MlFpGpsGhssHlLAuTTNGTSGNpTFGMTSGTsGCsosu..slshsupshlhhss.h-pLucDMA+GpGEsLsuhAslLGlpspD+s+Fstlh+cpFspIFsst-s.TucclhsuhhslhppDspLuKYst ....................................h..A.Assss.GCGhGs.hl...FpG...Q..cGhss+lLAuTTNGT.GNt......TFG.hToG.TsGCpssu....tl.shsucshhhhss.h-pLucDMAhGpGEuLsshAslhGltspD+scFstls+ppFu.pIFsssss....outpVhssh.slhtpD.hLttYs............................ 0 17 31 44 +11053 PF11221 Med21 Subunit 21 of Mediator complex Coggill P anon manual Family Med21 has been known as Srb7 in yeasts, hSrb7 in humans and Trap 19 in Drosophila. The heterodimer of the two subunits Med7 and Med21 appears to act as a hinge between the middle and the tail regions of Mediator [2]. 28.80 28.80 28.90 29.10 28.40 26.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.78 0.71 -4.22 30 284 2009-01-15 18:05:59 2008-08-13 10:00:02 3 4 250 3 206 269 0 134.00 30 86.37 CHANGED MuDRL.......TQLQssLDQlspQFsuolsalspspsss.hsssp........................phustpsss...........................................................sspsFpsphcELupDlllKs+QI-hLIsSLPGlssStccQh.cpIccLpcELpplEp-+tctl+ct-pLLccl-pllsslspu ...............................................MuDtLTQLQssls..p...lAppFssslshlpppts.sphss.p...............................................thsptps.................................................................spshppttp.hApslhhps+..pI-...hLIsoLPu....-..s..otptQh..pplpcLpc.E.cpstpphpcslpct-.tLLcclpshltslsp.s................................ 0 67 112 168 +11054 PF11222 DUF3017 Protein of unknown function (DUF3017) Pollington J, Finn RD anon Pfam-B_003304 (release 23.0) Family This bacterial family of proteins with unknown function appear to be restricted to Actinobacteria. 25.00 25.00 25.30 25.60 23.30 24.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.54 0.72 -4.19 44 367 2009-01-15 18:05:59 2008-08-13 13:05:21 3 1 325 0 76 203 15 71.80 41 71.31 CHANGED PILlVshlhssuhsLssusaWRRGALlIGIGVGlAAlLRLsLo--RAGLLsVRS+sIDFlTTsTVuAAMlYIAhTI ...............hhhhh.hhhhuhl.ls....h....s..s....aW..RRGshllGhullhAAlLRlVL.s-pp..AGLL.uVRSRhlDhlhhss....lGAsMVhlAhTl..... 0 25 60 73 +11055 PF11223 DUF3020 Protein of unknown function (DUF3020) Wood V, Coggill P anon Wood V Family This family of fungal proteins is conserved towards the C-terminus of HMG domains. The function is not known. 25.00 25.00 27.60 27.60 16.50 17.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.42 0.72 -3.56 11 47 2009-01-15 18:05:59 2008-08-13 13:24:13 3 3 35 0 35 50 0 48.80 43 5.16 CHANGED NRcRKK+.WRctNuc+NK-NDL+SRlh.....KKApthaGpcso.tcKpsWh--EF .....N+cRK++.WR-tNsp+N+-sDLRuRlp.....KRAsthFG.cp-o.tcKppWl--EF.. 0 12 25 35 +11056 PF11224 DUF3023 Protein of unknown function (DUF3023) Pollington J, Finn RD anon Pfam-B_003319 (release 23.0) Family This bacterial family of proteins with unknown function appear to be restricted to Alphaproteobacteria. 20.30 20.30 22.90 22.20 19.80 17.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.58 0.71 -4.49 35 63 2009-09-10 17:53:36 2008-08-13 13:36:20 3 5 5 0 38 59 0 122.90 21 52.07 CHANGED shCIGpT.s.ssspLplhlsps.....pppphhsh...spGpSLFhlpsplssphlppst..pLpphhthsppp............spslphshYhLVsppslppFhp.stp..................ptphhshpslspaGsllhs+.....tpsshhch............FsEpcsLp ...........................hhCIGpT.t.ssspLhlhhsps.....p.pphh.....spGpSLFhlpsplssphltps...tLpp..hhthsppph...........pthlphphYhLVpppplppFhp.h.p...................tthh.hpslsphuphlhsp.....httt.hph............hsEtphL.................. 1 8 8 8 +11057 PF11225 DUF3024 Protein of unknown function (DUF3024) Pollington J, Finn RD anon Pfam-B_003325 (release 23.0) Family This family of proteins has no known function. 21.20 21.20 23.00 22.60 20.00 19.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.80 0.72 -4.09 30 192 2009-01-15 18:05:59 2008-08-13 13:40:33 3 1 180 0 35 131 2 57.50 36 48.42 CHANGED thpsslA+lpaspspstWplahhcps......hcWp.Yp...cspsLcthLchlpcDspusFW ......hppslAKlhapcu.....pppWtLYhhcpp......ttWh.Ys.hspspsLoshlctVccDPcuhFW.. 0 14 20 32 +11058 PF11226 DUF3022 Protein of unknown function (DUF3022) Pollington J, Finn RD anon Pfam-B_003318 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 25.30 25.10 21.70 19.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.35 0.72 -4.19 13 102 2009-01-15 18:05:59 2008-08-13 13:42:54 3 1 53 0 25 82 4 109.60 39 85.85 CHANGED -hsEL-pslsD.Fs-posssVHsDspoGRlplpluWltss.suss+thRpuVsLsh-ssslcRYAsLDsusRhRV+AhL+DhVcssL-p....LEDtptc.psVsl-lTss.LDsA ....chsELs+llsDhFP-Q.ophs.+sD.psGR.l.lpVpWluhRhuuos+..RpolslRhssssLtRYhuLcshtRuRs+AhL+AhscAsLsu....LE-p...cAsupsVsh-sT.htLs-t......................... 0 2 4 12 +11059 PF11227 DUF3025 Protein of unknown function (DUF3025) Pollington J, Finn RD anon Pfam-B_003327 (release 23.0) Family Some members in this bacterial family of proteins are annotated as transmembrane proteins however this cannot be confirmed. Currently this family of proteins has no known function. 25.00 25.00 59.80 47.00 18.40 18.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.46 0.70 -4.83 21 157 2009-01-15 18:05:59 2008-08-13 13:46:56 3 3 155 0 60 158 30 208.90 45 74.55 CHANGED PlRFVsQst..LPsGpAYEsaIapTGpVPTR.-NLHDFFNALlWLpFPcsKstLNtlputsIupt..Glt.sh..RGslRDAlTlFDENGllll.uss.....sLtss.................Lcs+pWppLFlcpRstWt........ppspshhFGHALhEKLlpPaKulTAHshhl.l..sst.h..shsp.puhL-tpLs.....httushhss+sFuPLPlLGlPGWas-NpssuFYcDopVFRPtRR ...........lcFlsQss...LP.sGhAYEsaItpTGpVPTR.cNL...HDFFNALlWhtaP+hKutLNApQuss...Istt.....Gl....u..ss......RGslRDAlTLFDENGslhssu-.s.....s..Lt..sA.................Lpu.a-WppLhl.ppRstWs.........................s.pscshlFGHALhEKLl.sPaKulTAHuhhVpV..ssshh.th.....stsppp...............uhLDtpls......................tspthss+sasPLPlLGlPGWhstNtssuFYsDstVFRsuRp............................................. 0 10 35 48 +11060 PF11228 DUF3027 Protein of unknown function (DUF3027) Pollington J, Finn RD anon Pfam-B_003334 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 26.80 25.90 24.90 18.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.32 0.71 -4.63 20 441 2009-01-15 18:05:59 2008-08-13 13:56:36 3 3 428 0 110 316 124 189.60 48 66.63 CHANGED G-HLGspsEu-+lsTHhFtsphPGYpGWpWAVTVu+sscucplTVsElsLlPGssALlAPcWVPWp-RlRPGDLGsGDLLPsssDDPRLssGa.......................................ssssDstl-.................psshELGLGRpRVLSttGRppAAcRWY-G-tGPcothA+sAP..ssCsTCGFalPLuGuLsphFGVCANEhuPsDG+VVuhDHGCGAHSEs.l ...........................................G-al.GshsE...s-plsTHhFtsphPGYcGWpWuVslupss...c..u..cp.sTVsElsLlPGt..cALLAPpWVPWc-RlR..P..GDl...us.....GDlhssss-D.PRLtPGa..........................................................................................s.s.stD.st......................tsstE.lGLGRp+lLSt.GRspsApRWhsGphGPcsshu+...pAs....tsCso.CGFalsLsGoLst.hFGVCuNEaS.uDG+VVphcaGCGuHS-s.s........................................................... 0 32 80 102 +11061 PF11229 DUF3028 Protein of unknown function (DUF3028) Pollington J, Finn RD anon Pfam-B_003337 (release 23.0) Family This eukaryotic family of proteins has no known function. 19.00 19.00 20.50 25.00 18.20 18.40 hmmbuild -o /dev/null HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.87 0.70 -6.24 3 60 2009-01-15 18:05:59 2008-08-13 14:00:20 3 4 42 0 35 71 0 530.00 60 38.40 CHANGED MNKLRLLVENSQQTSGFALALGNIVHGLSVCGHGKAEDLGNRLLPAWIKlVLTEGAPTMLCLAALHGMVALVGSEGDVMQLKSEAIQoSHFQGRLNEVIRTLTQVISVSGVIGLQSNAIWLLGHLHLSTLSSNQSRTSVPTDYSYLPEuSFIRAAIGFFITGGKKGPESVPPSLLKVVMKPIATVGESYQYPPVNWAALLSPLMRLNFGEEIQQLCLEIMVTQAQSSQNAAoLLGLWVMPPLIHGLSLNIKKYLLlSsPLWIKHlSDEQIlGFVENLMVAVFKAASPLuSPELCPSALQGLSQAMKLPSPSHHLWSLLSEATGKIFDLLPNKIRRNDLELYISIAKCLSEMTDD-ANRVAQITKSNIEKAAFVKLYLVSQGRFPLMuLTDlLSVAVQHREKETLAWMILHSLYQARIVSHANTGVLKRMEWLLELMGYIRNVAYQSTSlQNVALDEALDFLLLIFAAAVVAWADHsAPLLLGLSASWLPWHQENGPAGPAuSLLGRSPMHRVTLQEVLTLLPsSMsLLLQKEPWKEQTQKFIDWLFSIMESPKEALSAKSKDLLKATLLSLRVLPEFKKKAVWTRAYGW .....................MNKLRhLsEssQQTSGFALALGNlVHGLSVCGHGKAEDLus+LLPuWI+lVLsEGsPTMhCLAALpGhVALVGSEuslhQLKS.EsIQoSpFQuRLNEVIRTLTQVISlSGVIGLQSNAhWLLGHLHLSoLSSsQSRsSVPoDYSYLPEsSFIRAAIsFhlsG...GKK..G..PEuVPPSLlKlVMKPIAoVGESYQ..YPPVNWAALLSPLMRL.NF..G...EEIQQLCL.EIhVTQA.....QSSQNAAsLLGhWVhPPLIHuLSLshK+YLLlShPLWhKHVSDEQI..uFVEsLhVslF+suS....hssPELC.SALQGLSQ..AMKLPSPu+HLWSLLs-ATtKIFDLLPNKIRRsDLELYIulAKCLSEMTD--.ssRlsQlTKuNlEKA.AFV+LYLVSQG......RhPLhsLsDlLosAlp...HpEKEsLAWMlLHSLYQAR.I.VSHA.NTGVLK....RM.EWLLELMGYIR.NVAY..QSTShQNs.sLcEALDFLLLlFAsAVVAWADHs.APLLLGLSASWLPWHpEsGPuG...ss.suhLG+SPhcRVTLQEsLTLLPsSM.LLLpKEPWKEQTQK..FIDWLFSIMEuPcEuLSApS+DLLKATLLuLRsLPEFKKKAVWTRAYGW......................... 1 6 8 17 +11062 PF11230 DUF3029 Protein of unknown function (DUF3029) Pollington J, Finn RD anon Pfam-B_003347 (release 23.0) Family Some members in this family of proteins are annotated as ykkI. Currently no function is known. 23.00 23.00 23.10 23.00 22.90 22.70 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.88 0.70 -12.60 0.70 -6.31 19 631 2012-10-01 23:28:04 2008-08-13 14:10:42 3 1 584 0 51 368 5 448.40 70 94.14 CHANGED sLoh+QKpphLAhEAEssLshhplsstspcth....cpGlICDhFEGaAPa+PRYlLPDYA+FlppGScaLELsPspDlDEALNsLhIlY+HVPSVTshPVYLGpLDsLL.PFlpths--plYpKLKhFhlhLDRTLPDuFhHsNIGPsDshhsRhIL+l-uELKQssPNLThhYD.-lTPD-hlp.AspshhpCSKPahANashaspsas.psaGIVSCYNuLPluGGu.TLVRLNL+plAc+Apu.h--FhsplLPcYsphhhplI-sRtpFLhEESpFF-o.FLV+EGLIccsRFssMFGIaGhAEAVNhLlpp-shpp+..............YG+scpANpLGhpI.ppLsclVsspsstY..uhssRhLLHAQuG..I.shDpsVTPGsRIPhGcEP.-.lsHlpssAtaHpYasSGISDIhslDcTl+pNP-AhlslsKGAhshGhR.FTsNlussDLlRVTGYhVKhSDltKacp.cuuRpNTThLGutAscNoplhpRps.R ...........................................................V.LoPEQKRHFLALEAENs..LPY.Pp.LPs-ARcAL....DEGVICDMFEGHAPYKPRYVLPDYARFLANGSEWLELEGAKDLDDALSLLTILYHHVPSVTSMPVYLGQLDALLQPYVc.ILTQDcIDlRIKRFWRYLD.......R..TLP.DA.....FMHANIG....P....uDoPlTR.....AI.....LR.....ADAEL....KQVuPNLTFIY......DsEIT......PDDLLLEVAKNICE.CSKPHIAN..GP.V.aDKIF.......T......K...G....tYG...........IVSCY.........N.........S..L.........PLuG..GG..STLVRLNLKAlAERS.c........S..lD...DFFo.RTLPHYCpQQIAIIDuR...........CE...........FLYp....pSHFF..ENSFLV.......KEGLIsP..E......R..F.......sP.......MFGMYGLAEAVNLLCEpEG.lsA.R..............YGKpssANElGYRIS....AQLA-FVsNTPVKY.................GWppRAh..L..H.AQS..G...I...SSD.I.G.TTPGARLPYGDEP.D.PITHLQTVAPHHAYYaSGISDILTLDETIKRNPQALVQLCLG..AFK.AGMREFTANVSGNDLVRVTG..YMVRLSDLEKaRA.EGSRTNTTWLGEEAARNTRILERQPR...................................................................................................................................................................................................................................................................................... 0 14 27 38 +11063 PF11231 DUF3034 Protein of unknown function (DUF3034) Pollington J, Finn RD anon Pfam-B_003362 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 27.80 26.70 19.90 23.60 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.44 0.70 -5.02 28 135 2009-09-11 03:37:16 2008-08-13 14:21:12 3 1 128 0 44 144 8 247.80 49 87.40 CHANGED ATGGloslEGuAGGGIsPWAlluGYuocsEhGuouhhTcVslsDYsLsshGAuluhtsRlELShA+QsFDlss....hust........................s.clpQDIhGsKlRLhGDllYsp...hPQlSlGlQaK+.c-.ts...............lspulGA.p-DpGoDhYluAoKLaLsuhhGpNlLlNsTlRuT+ANQhGLLGFGGs...pps.....sapl.hEuSlAllLs..+phAlGsEYRpKPDNLu..........hspE-DWtDlFlAaFPNK+LSlTuAYsDLGsIAsh.......ccQpGhYLSlQu ......ATGGsopIEGSuGGGIsPWAsluGYuopsEhGs.oAasT+lsls.DY..pLsVtGsul..uhsNRlELShA+QpFDlss..........htt.........................................s.plsQDlhGsKlRLhGDllYsp...hPQlSlGlQaK+.pD.hs................lPphlGA.ccDpGsDhYluAoKLahsuhhGhNlLlNuTlRhT+ANQhGLLGFGGs...pps............uhplhhEuSsAllls.....phAlGsEYRpKPs.NLu........................hupEsDWtDlFlAaFPNKHlSlssAYscLGpIAsh........csQpGhYLSlQ................ 0 10 27 38 +11064 PF11232 Med25 Med25_PTOV1;Med25_PTOV-SD2; Mediator complex subunit 25 PTOV activation and synapsin 2 Coggill P anon manual Domain Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-active part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [4]. The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA domain, an SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This family is the combined PTOV and SD2 domains. the PTOV domain being the domain through which Med25 co-operates with the histone acetyltransferase CBP, but the function of the SD2 domain is unclear [3]. 25.00 25.00 29.30 41.30 21.40 24.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.99 0.71 -4.54 7 184 2009-01-15 18:05:59 2008-08-13 14:21:37 3 11 64 4 76 182 0 147.60 62 31.02 CHANGED .ppssp+.LhWSGsLEWpEps+s....sps+lp+oL.Cplhls.t-.p..lcT-pWPpKLhMQLIPppLLsslGshF.+sS+hV.F+.T..ssEsLcuLhphM...usGFAGClHF...PppssC-l+VLhLLYSsc+phFlGhIPpDQusFVptlRpVlspp+ps ...............ptlsNKhLAWSGVLEWQEKs+P...-sssKlpRoLPCQVYVNp..GE.s...L+T.-QWPpKLIMQLIPQQ.LLT.TLGPLF..RN.S..+h.VQFHF.......T...+.....Dh.EoLKuLhRIM....sNG.F.AGCVHF...Pap...u...sCEVRVLML...L..YSScKKIFhGLIPaDQSuFVNuIRpVIss+KQ.s......... 0 20 23 39 +11065 PF11233 DUF3035 Protein of unknown function (DUF3035) Pollington J, Finn RD anon Pfam-B_003372 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Alphaproteobacteria. 25.00 25.00 25.30 28.10 23.40 22.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.91 0.71 -4.18 39 170 2009-01-15 18:05:59 2008-08-13 14:37:10 3 2 168 0 49 144 739 125.90 27 84.80 CHANGED slsL...uuCut.....shpslttspssPDEFsllsptPLshPssas..LssPsPGssp.sD...sPpspAlsALsG..tt.......shssu-tuLlspAuphussssIRphlssE-tphtc+ssthsphhhhspsshhtshtpt..lcs.t-htthcp.sGstssuusP ......h.hhlouCut.....phtcsluhspssPDEapVhpptPLphPPsat..LssP..ssGt.s...p.p-....ssttpuh....tsl.s..............tshstupthllttu.st..t.tsshp......phthhtttt.h...hhh..t....s.....................................t........................................... 0 18 37 39 +11067 PF11235 Med25_SD1 Mediator complex subunit 25 synapsin 1 Coggill P anon manual Domain The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA, domain, this SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This The function of the SD domains is unclear [3]. 25.00 25.00 53.10 52.60 21.70 21.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.54 0.71 -3.94 3 61 2009-01-15 18:05:59 2008-08-13 14:48:58 3 6 30 0 22 54 0 153.70 77 22.21 CHANGED LPVGGu.SVPGsh.sKQ.VSLPPhsVlPPQluuAPQpPLPPV..pPPQMQVPQNsoLoAAHsAAQsAVEAAKNQKsu.uNRF..sslsPh.p.APuVGPPFSQsPAPsLPPG...P.hsPKPS....PASQsSLVTTVoPGsG.APV.hQQQssPQQPssPSMssTsAlGGV..QuPQPSQ.Q .................LPVGGG.SAPGPLQsKQPVPLPPAsPSG.AoLSAAPQQ..PLPPV...PPQYQVPGN...LSAAQVAAQNAVEAAKNQKAGLGPRF.....SPIsPLQQAAP.GVGPPFSQAPAP.L.PPG...PPGAPK.PP.......PASQPSLVSTVAPGuGLAPs........AQP.GAPS.MAGTVAPGGV..SGPSPAQL.G........................................ 0 1 3 7 +11068 PF11236 DUF3037 Protein of unknown function (DUF3037) Pollington J, Finn RD anon Pfam-B_003382 (release 23.0) Family This bacterial family of proteins has no known function. 21.30 21.30 22.30 21.70 21.10 19.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.27 0.71 -4.04 37 274 2009-01-15 18:05:59 2008-08-13 15:41:15 3 1 266 0 105 270 22 115.80 32 66.30 CHANGED YAllRlVP+VERpEFlNlGVlLastpptaLts+h.plDc.pRLpuhssclDhchlpttLpuhptlCsGspsuGs..........luthshtcRF+WLoAsRSollQsStsHsGhss.DsstpL-+LhcphV ........YAllRhVP+VERpEFlNlGVllast.p...ppalps+h..plcp..sRLpshs.s.ch......D.hsslcttLcuhcplstGsppuGs.............hut.s.h...spRF+..W.LsAsRSo..llQsS.sHsG..hss...D.ttpLc+LhcphV..................................................... 0 46 87 101 +11069 PF11237 DUF3038 Protein of unknown function (DUF3038) Pollington J, Finn RD anon Pfam-B_003387 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 25.00 25.00 58.10 58.00 20.30 19.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.74 0.71 -4.45 26 109 2009-01-15 18:05:59 2008-08-13 15:46:52 3 1 71 0 43 118 119 162.30 35 84.26 CHANGED hs++spp+LDLLLLAlEuLslsuuEuhLtsuppLsLpshhssRVpLW+hRssNPlR+us.pRpsLshtEscALlhllsthAcphpshlRplL......supp.s.hspthhupahcRFpshhpcRMN.+Rutl.hh.s.spphspLthcLLhpLhhsoGssGhpRLhhSLhD ......cphpppLDLlLLAlEuLshsuuEshLtsuppLsLpshlssRlsLW+hRssNPlR+ut..tRppLslp-A+uLlhIlsahA+phpshIRphL.h.pQhpppphs.tpshhLupYl-+FpshappRMN.c+stl.hh..s.p-plspLAlpLLhpLLFsoGssGhpRLWhoLhD... 0 6 30 41 +11070 PF11238 DUF3039 Protein of unknown function (DUF3039) Pollington J, Finn RD anon Pfam-B_003408 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 21.80 21.80 34.50 32.20 20.90 20.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.87 0.72 -4.52 19 435 2009-01-15 18:05:59 2008-08-13 15:55:13 3 1 428 0 112 273 89 58.20 56 62.78 CHANGED psGDp-RFuHYV+K-KIhESAlsGpPVlALCGKlWsPsRDPc+.aPVCPcCKEIYEshp ....s.suDs.+FhHYVpK-KIs-SAVsGs.VVALCGcVassoRsPc.....PVCP-CKcIY-uhp..... 0 33 82 104 +11071 PF11239 DUF3040 Protein of unknown function (DUF3040) Pollington J, Finn RD anon Pfam-B_003409 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed. 20.90 20.90 21.00 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.67 0.72 -3.93 28 478 2009-01-15 18:05:59 2008-08-13 16:00:21 3 1 392 0 158 361 191 86.20 37 67.41 CHANGED MPLSEcEpRhL-plERuLhs-DPcFAsslpu........sshptsstRph....lhulsshVhGlslLlsGlshphhh......lulhGFllMhuuslhu ..................MPLS-cEQR.hLcplEpuLhs-DP+FAsulps..........................sthct.ss.tRph.........lhu..s...s.h..h.l.lG.LuhLVuGVshspsh.........LuVhGFllMhuushh............................. 0 67 123 152 +11072 PF11240 DUF3042 Protein of unknown function (DUF3042) Pollington J, Finn RD anon Pfam-B_003420 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 25.00 25.00 32.60 31.20 20.00 18.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.51 0.72 -4.17 17 543 2009-01-15 18:05:59 2008-08-13 16:08:46 3 1 531 0 49 136 0 53.90 62 94.27 CHANGED h+pFspGhlsGsluTluAlAGulhshKKslI-P.EcKtshIEENRK+AsRKphu ..KsFuKGllTGVsuTVAAVAGAVauhKKpVIEPEEp....KsAFIEENRKKAAR+RVS...... 0 8 22 36 +11073 PF11241 DUF3043 Protein of unknown function (DUF3043) Polligton J, Finn RD anon Pfam-B_003428 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins. This cannot be confirmed. 25.00 25.00 79.90 79.80 23.10 22.40 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.16 0.71 -4.67 28 434 2009-01-15 18:05:59 2008-08-13 16:14:09 3 1 431 0 114 317 91 175.10 37 78.01 CHANGED sKGRPTPKRcEA-s..................ARpR.......shs.p.-RKsu+ptpRsptpcccp+.ptuMhsG-EpYL.sRD+GP.RRalRDaVDuRhsluphhhPluLlllllhhl....ss.......splphh..sshshhllh.llhllDuhlLuR+lp+tlsp+FPc...psthulshYAhtRAhQhRRhRhP+PpV.cRGpp .....KGRPTP+Rp-Acu.....................sph+.............pLs...p..-+Kts++ppR...tpt+c+ppc........uM.sG-...-...taL.sRDRGP.RtYlRDhVDSRh.slupahMPlALsllllhhs........s...........stlthh....hoh.shl.lll.llhlIDuhhLsR+lp+tlcp+FPs.......t.s.thul.shYAhsRAhQhRRhRsP+PpV.cRGc.t............ 0 35 84 106 +11074 PF11242 DUF2774 Protein of unknown function (DUF2774) Pollington J, Finn RD anon PRODOM Family This is a viral family of proteins with unknown function. 26.50 26.50 26.50 29.10 26.30 26.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -9.02 0.72 -3.98 7 26 2009-09-10 16:41:49 2008-08-13 16:41:06 3 2 25 0 0 18 0 60.70 49 66.55 CHANGED hc.cVpVahh.+EtGhsFl-IA+th.slpup-suclWspV-pA+...tcFcs+EKVVYRK+hlN.Kh+p ...Mc.cVhVahL.HEoGhuFIEIA+cl.slpA+-sAhhWscscsAK...t+FcsREKVVYRKRhlNpKlKp.. 0 0 0 0 +11075 PF11243 DUF3045 Protein of unknown function (DUF3045) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as gene protein 30.1. Currently no function is known. 21.40 21.40 21.90 73.90 18.50 17.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.02 0.72 -3.75 2 40 2009-01-15 18:05:59 2008-08-13 16:59:00 3 1 39 0 0 16 0 88.30 84 99.19 CHANGED MFVVHsIh-sc.NTTRDYGHVN.FFRspP.FRu.KDtcIFKcCVEQGFIYl.caahpG.thR.TTYp+oLppL.DElhYNRsVsph.c.c MFVVHTIYENEGNTTRDYGHVNQFFRCNPE...F..RAQKDERIFKKCVEQGFIYV....KHWM.....QGNKVR.TTYHRSLTELNDELIYNRAVNQTLKDE... 0 0 0 0 +11076 PF11244 Med25_NR-box Mediator complex subunit 25 C-terminal NR box-containing Coggill P anon manual Domain The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA, domain, an SD1 - synapsin 1 - domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and this C-terminal NR box-containing domain (646-650) from C69-747. The NR box of MED25 is critical for its recruitment to the promoter, probably through an interaction with pre bound RAR [3]. 28.40 28.40 28.60 28.90 26.10 28.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.34 0.72 -3.53 7 67 2009-01-15 18:05:59 2008-08-13 17:52:11 3 7 31 0 14 67 0 76.00 58 13.24 CHANGED pppptstsshs...pQsht..p.............h......sstp...pQh.Q.hus..GQhhhpusspus....thp...ttpshtst....shhh.hh .......................HLQPPGAPALLPPPHQGLGQPQ............LGPPLLHPPPAQSWPuQLPP.RAPLP.GQhhhuGGsRuPlspsGLQP....SVMED.....DILMDLI......... 0 3 3 4 +11077 PF11245 DUF2544 Protein of unknown function (DUF2544) Pollington J, Finn RD anon PRODOM Family This is a bacterial family of proteins with unknown function. 25.00 25.00 25.50 70.20 19.10 18.20 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.63 0.70 -5.02 8 347 2009-01-15 18:05:59 2008-08-14 09:01:38 3 1 334 0 4 148 0 222.80 56 84.50 CHANGED FaLsVsTPcuVtYGpYK..uhhhps-pLsllSWoG..uusAPolsLpDhsp.lspSsCPGLps.......osWs.CshhshsVhV-uDl..a.GCPWlVShassSpsPh......uhssYsGPss+sSlCPoVsVAsYDlSWsENYVsHsKsLpLQSTGGslEpTLSTYLMEsGKLCDGSpa...D-RGAYCRaVuQhlTFoooGCDsAcVTVTPscHPITDKQLHDMVVRVDTSS..RQPIDSTCRFQYlLNEL .....F.loVlTPppVtYG.p.Yp...o.h..s.hpG-sLpllSWS...G...sssAPoLsLpDacs..lspSsCPGlst.......phap.CuhhThclTV.so.D.s..Y.GCPWlsSh..ashosss.......uhsoYouPTs+solCPolPVuoYDISWsENYVsHsKsLpLQSTGuslppTLSTYLMEuGKLCDGSpF...DsRGAYCRhVSphlTFoo.GCDpupVTVTPsRHPlTD+pLHDhVVpVsTSS..pQP.IDSTCRFQYlLNEL............. 0 1 1 2 +11078 PF11246 Phage_gp53 Base plate wedge protein 53 Pollington J, Finn RD anon PRODOM Family The baseplate of bacteriophage T4 controls host cell recognition, attachment, tail sheath contraction and viral DNA ejection. The structure of the baseplate suggests a mechanism of baseplate structural transition during the initial stages of T4 infection. The baseplate is assembled from six identical wedges that surround the central hub. Gp53, along with other T4 gene products, combine sequentially to assemble a wedge [1]. 20.60 20.60 20.90 67.30 20.10 19.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.21 0.71 -4.84 12 70 2009-01-15 18:05:59 2008-08-14 09:05:47 3 1 68 0 0 63 1097 187.90 33 87.24 CHANGED M.lFoaFsslcYtup.s.................ssIFRsa+hYastshhsaphcsYhlsGu.RPEplApplYGNspLYWllLMhNslhDPaasWlpsp-ssYphAtp+Ysps....splhaHhstpuc+a...aslVphPcsstsWYDh...............GspsppalpapGslts............VsshEtphhcNEp+RpItIlsssslpsFls-hhR.MEts ...............M.hFsaFssl.Ysspss.........tp....h.hpsIFRph+hhhchhthshhhcsYhIpsu.RPEplApplYGsspLaWllLhhNslhDsahsW.psp-shaptstp+Ysss....splhYalspc.spca...hsllphscs.tshY-h...............GshspphhphpsuLss...........................VssaEttlhcNEpKRpIpIlsss.lssFls-hh+.hEh.s............ 0 0 0 0 +11079 PF11247 DUF2675 Protein of unknown function (DUF2675) Pollington J, Finn RD anon PRODOM Family Members in this family of proteins are annotated as Gene protein 5.5. Currently no function is known. 25.00 25.00 30.30 43.10 20.40 20.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.22 0.72 -3.93 5 48 2009-01-15 18:05:59 2008-08-14 10:08:16 3 1 22 0 0 46 0 89.70 54 82.40 CHANGED MAMTK+FKVSFDVTAVlsSEsEcphc-sLl-LAKKAGAGEcl.oPh-REhLVQuLTHGsEGAsuFslKpGlREuIK-tasEhS..Dps.hKlSPATVREVF ...MshsKph+VoFphthhh.pp..tlLcK-hL+LsKpVGuGthV.Nu+Q+EhlVQhLTHGhEGlhoFVVRoShREAIKDMHEEYu..-KcoFK.SPATVREVF........ 0 0 0 0 +11080 PF11248 DUF3046 Protein of unknown function (DUF3046) Pollington J, Finn RD anon Pfam-B_3651 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 25.70 25.20 21.10 16.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.82 0.72 -3.99 23 439 2009-01-15 18:05:59 2008-08-14 11:27:04 3 2 408 0 109 273 72 60.60 40 73.10 CHANGED MRLTEFaphlp-cFGsuYucslspDHVLusLGGRTsspAl-sGs-sRpVWpALCs-a..DVPtcc .......MR.TpFaphls-tFGssaup.lstDpVLsuL.uuRTstpAl-cGl-s+tVWpAlCssh..-lPtp...... 0 33 80 100 +11081 PF11249 DUF3047 Protein of unknown function (DUF3047) Pollington J, Finn RD anon Pfam-B_3654 (release 23.0) Family This bacterial family of proteins has no known function. 21.30 21.30 22.90 22.20 19.40 17.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.15 0.71 -4.90 39 127 2009-01-15 18:05:59 2008-08-14 11:30:24 3 2 110 0 59 135 779 187.20 27 71.98 CHANGED ssuWp.hthss...tpTcYplh...pp....sspt.sl+AposuuASsLhpchslc.....ppssh..LpWpW+lsphhs..ssDtp............p+suDDhsuRlhVsFc....thshh.pht...............sLsYlWus.phshGsh..hsNPao.spsphlslpoGss.psGcWhscpRsltsDYc+sFG.........cc....sstlsululhoDoDNTsspupAaYGDIpl ..................................................tsWp.hth.t...ttopYphs..pp.......tsttsLcupucsuuSsLhp...p.hs.lc......hppssh..LpWpW+lpphhs..ssD..tp............p+suDDhshRlaVsFc.......thshh.pst................sLsYlWus.p.....tshGsh..hsNsas...s+sphll........lcoGss.psGpWhsccRslhtDac+sFG.........cc....sstlsululhoDoDNTtspupAaauDIth............. 0 18 40 53 +11082 PF11250 DUF3049 Protein of unknown function (DUF3049) Pollington J, Finn RD anon Pfam-B_3659 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 29.00 27.60 24.70 21.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.83 0.72 -4.20 24 167 2009-09-11 15:04:28 2008-08-14 11:41:50 3 2 18 0 118 149 0 55.40 38 19.82 CHANGED psFPPPlsolsttt......sthhhRscRc-GRLVlptVpsss.tp..hhpApRpsGRLpLphs ..........sFPPPlsolu..pts.........s.lhhRspRp-GR.L..l..LptVp...lss.pp...hhcApR.psGRLpLphh........ 0 11 68 95 +11083 PF11251 DUF3050 Protein of unknown function (DUF3050) Pollington J, Finn RD anon Pfam-B_3661 (release 23.0) Family This bacterial family of proteins has no known function. 22.20 22.20 25.70 23.60 22.00 21.30 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.61 0.70 -5.12 32 145 2012-10-02 21:56:19 2008-08-14 11:51:00 3 1 136 0 65 178 375 227.80 46 88.88 CHANGED pLhpHsLYpplpol--LphFMEpHVFAVWDFMSLLKuLQppLTssssPWlP.stssphtpLINEIVluEEoD.shpG.....thtSHFEhYLcAMccsGAsTptIcpFlphlppG.sl.tAlppsslPtsstpFlphTaclIpsupsHplAAAFsaGREcLIPsMFppllcph...shsppphspF+YYL-RHIELDuD-HGPhAhpMlppLCGsDspKapEstpsuppALcpRltLWDuI ...............................................LhpHPlYsplpolcsLphFMEpHVFAVWDFMSLlKpLQpcLTCsplPWlP.ssss.pstRLINEIVLuEESDpt.sG.......tthSHFELYLcAMc-sGA.sTptIcpFlphlp......pGhsl............tpALpp.s......s......lstssppFlptThchltputsHp.......lAAAFsaGREclIPsMFpsllcch...shs.spphspFpYYL-RHIElDu--HGPhAhphlscLCusDsp.+hpE.shpsuhpAlpsRlpLWDul......... 1 30 51 58 +11084 PF11252 DUF3051 Protein of unknown function (DUF3051) Pollington J, Finn RD anon Pfam-B_3671 (release 23.0) Family This viral family of proteins has no known function. 21.30 21.30 21.90 299.10 19.20 21.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.33 0.71 -4.81 5 73 2009-01-15 18:05:59 2008-08-14 11:55:00 3 1 1 0 0 64 0 189.00 94 84.08 CHANGED WLEHIRLIVVTMVPILLLPDTTIGRHIHIKRQYLQSVLLWLARNKLYYRVKRCRSKRFVLEQDHIGPELFRTQLLQYAGLDVFQDVARLQDFLERGYDTFRSVLGVHLLEHERFVSRILEQPIMEPDQAVRLCFHYHNTLDHDDDDSGLETDFNNLSLGRPYINGLPPNEKIIFIHSNLVGHSTsSHDI WLEHIRLIVVTMVPILLLPDTTIGRHIHIKRQYLpSVLLWLARNKLYYRVKRCRSKRFVLEQDHIGPELFRTQLLQYAGLDVFQDVARLQDFLERGYDoFRSVLGVHLLEHERFVSRILEQPIMEPDQAVRLCFHYHNsLDHDDDDSGLETDFNNLSLGRPYINGLPPsEKIIFIHSNLVG+STYSHDI... 0 0 0 0 +11085 PF11253 DUF3052 Protein of unknown function (DUF3052) Pollington J, Finn RD anon Pfam-B_3674 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 20.90 20.90 20.90 20.90 20.70 20.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.37 0.71 -4.54 19 439 2009-01-15 18:05:59 2008-08-14 11:58:01 3 1 432 0 121 286 61 125.90 48 86.46 CHANGED Ap+LGhpsGtlVQElGaD-DsDp-lRcAlE-shGs-LlDEDs--VVDsVLLWWR.--DG...DLsDuLVDAhssLu-sGhlWlLTPKsGRsGaV-Pu-IuEAAsTAGLsQTousslGstWsGoRLlt.P+u+ ......................................................Ap+LGhpsGplVQEhGWD-DsD--lRpslE-hhGs-LlDE.DsD-slDlVlLWWR.--DG...DLsDsLVDAh.ssLu.-cG.hIWllTPK.sG+..s..GplpPu-..luEuAsTAGLstToohsluptWsup+LVp.+..t.... 1 38 85 108 +11086 PF11254 DUF3053 Protein of unknown function (DUF3053) Pollington J, Finn RD anon Pfam-B_3678 (release 23.0) Family Some members in this family of proteins are annotated as the membrane protein YiaF. No function is currently known. 20.70 20.70 21.00 20.70 20.50 20.60 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.55 0.70 -5.10 8 635 2009-01-15 18:05:59 2008-08-14 12:50:38 3 1 566 0 51 216 3 224.70 65 95.30 CHANGED sRhFtP.....LLAlLlsh.LuuC.GDKEPEQRKAFI-FLQoplhcS.Gs+lPsLTs-QKcsFGsYssDY.AILosFSQphspAlsuu.pssltQhssIRospDhls+RDsLcpupsuLshlupplpssKupADsA+usLKQP-DLKsVaDpAas+TVosPAssLhsslPhssuhlpsLlpVGDaLpsQGsQVpFsuuu......lQFpsspQsspaNphlus.....LssQtpshhsAQpplsslt .................SRWFAP.....lsALLMVlSLSGC.FDKEGDQRKAFI.DFLQNTVMRS.GERLPTLTADQKKQFGPFVSDY.A.ILYGY...SQQVNQAMDSGLRPVVD.SVNAIRVPQDYlT.QpsPLRphNGSLGVLAQQLQNAKLQADAAHuALKQ.s.DDLKPVFDQAasKVVTsPADALQPLIPAAQsFTQQLVhVGDYIAQQGTQVSFVANG......IQFPTSQQASpYNpLIuP.....LsAQHQAFNQAWTsAVss.s............................................ 0 2 11 30 +11087 PF11255 DUF3054 Protein of unknown function (DUF3054) Pollington J, Finn RD anon Pfam-B_3684 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 25.00 25.00 27.60 26.90 24.90 24.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.56 0.71 -4.23 34 368 2009-01-15 18:05:59 2008-08-14 12:51:26 3 2 352 0 112 243 15 107.70 39 76.76 CHANGED hlluDllslllFsslGRtSHups....hsssslhtTuhPFLhGhhluhhls...tuapt........pthhssulhlWlsslslGhlLRsh..putusshoFll...VuhsssullLlGWRulst ......lsuDVlsVllFuslGR.t.SHucs....LsssGlhpTuhPFlhGhslGWlls........tshpt......s........sthh.sGlllWlsTlllGMllRth..oupGsu...s...s...Fll...VAsshsAlhLlGWRAls.u........... 1 33 85 106 +11088 PF11256 DUF3055 Protein of unknown function (DUF3055) Pollington J, Finn RD anon Pfam-B_3685 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Firmicutes. 25.00 25.00 26.80 54.90 23.30 18.10 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.61 0.72 -3.85 9 448 2009-01-15 18:05:59 2008-08-14 12:57:36 3 1 339 0 39 150 0 80.60 55 88.16 CHANGED LYDDsEps+sRFVuFhGEppRaDLullaTsRaaGKsLVlshQosRFAIlGtDDlcEsGYLp+lapl.sEE-ApELppFLtEl ..LYDDsEpopVpFVGFhG.E.c.oRYDLhLlaTsRHYGKTLVLsMQTNKFuIIGsDDLcE.GYltHlhul.stEEu-ElppaLpEl..... 0 12 23 32 +11090 PF11258 DUF3048 Protein of unknown function (DUF3048) Pollington J, Finn RD anon Pfam-B_3658 (release 23.0) Family Some members in this bacterial family of proteins are annotated as YerB. However currently no function is known. 25.00 25.00 72.10 30.40 24.20 19.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.65 0.70 -5.27 32 284 2009-01-15 18:05:59 2008-08-14 13:08:54 3 3 253 1 96 284 560 276.90 28 78.69 CHANGED PLTGhss...pch..ptRPlAVhlsNt.ssApPQuGLspADlVYEslsEGulTRhhAlat.....ophsctlGPVRSAR.Yalchu.ta.cuhasatGuS.....spAhshlcsss...lsslsu.....h....sushF...hR.sssRpAP....HNhYsohcpltpsspp.puhphptt.t..........t.ssssssGptspplplpa....stspssapYD.psptYhRhpsGpsplDtsT....GppltspNlllhcsphpsh...DstG+hs..h-lh......GuGc..uhlhpsGphhclsWc+..sstpshhthhstsGpplshssGpoWl .........................LTG..ht..tth..ptRslAVhlsNp.....ssA.RP.QoGLspADlVYEhh...s...EG.s.l.......TRhhAlap.....sphscplGPVRSuRsYalplupta.culhlatGuo.....st.sh.shlppss.....lsplss..........hp......sushF............aR..sss.RpuP...............HN..hYsshpplt.pshpp..pGh.......ph..stp.phh...........................tts......sss...s...s..uptsp..plplpa............sttps.papYst.psptYhR......h..t.s........G......p......s......p........hD.tps...........uptlsspNlll.tsshpsh.......DstG.p....hs.....lchh..................uuGp..uhlhpsGchh.cspWp+........ss..sps.hhhhh..s.ts.G.p.......l.lssGpTWl........................ 0 51 81 89 +11091 PF11259 DUF3060 Protein of unknown function (DUF3060) Pollington J, Finn RD anon Pfam-B_3702 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. 20.70 20.70 21.10 20.80 19.90 20.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.06 0.72 -4.35 29 317 2009-01-15 18:05:59 2008-08-14 13:35:01 3 2 141 0 67 209 0 62.90 38 39.12 CHANGED sVuGssNslslsGsCssVTVsGssNpVss-s...lcp.lolsGhsNoV...................oYcsG.....sPpl.sspGus....Ns ....................sVsGssNolsls.GsCtsVTVsGssNpVssDs...lc..s.lslsGhsNsV...................sa+s.G.....sPpI.sstGssN................................................................................. 0 13 34 55 +11092 PF11260 Spidroin_MaSp Major ampullate spidroin 1 and 2 Pollington J, Finn RD anon Pfam-B_001419 (release 23.0) Family Dragline silk is composed of two proteins, major ampullate spidroin 1 (MaSp1) and major ampullate spidroin 2 (MaSp2) [1]. MaSp1 contains five alpha-helices [2]. Only the C-terminus of the proteins are shown. 25.00 25.00 25.90 25.20 22.30 21.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.79 0.72 -3.90 26 173 2009-01-15 18:05:59 2008-08-14 13:44:16 3 9 43 3 0 180 0 82.40 52 17.94 CHANGED susSRLuSsuAuuRVSSslSoLlSu.....GssssuuLussISslsSploASssGLSuC-llVQsLLElloALlplLuSAslGpVNhuus .......suuSRLSSPsAuSRVSS...AVSsLlSu.......GssNsAALuNsISslsSQlSuSsPGLSuC-VLlQALLElloALlpILuSSsIGpVNhuus.................. 0 0 0 0 +11093 PF11261 IRF-2BP1_2 Interferon regulatory factor 2-binding protein zinc finger Pollington J, Finn RD, Bateman A anon Pfam-B_1430 (release 23.0) Domain IRF-2BP1 and IRF-2BP2 are nuclear transcriptional repressor proteins and can inhibit both enhancer-activated and basal transcription. They both contain N-terminal zinc finger represented in this family and C-terminal RING finger domains [1]. 25.00 25.00 25.20 39.60 24.60 23.90 hmmbuild --amino -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.75 0.72 -4.51 11 146 2009-01-15 18:05:59 2008-08-14 13:46:33 3 2 75 0 86 140 0 53.40 76 10.11 CHANGED o+RQaCYLCDLPRMPWAMlaDFSEsVCRGCVNYEGADRIEhVI-sARQLKRsHG ...SRRQpCYLCDLPRMPWAMIWDF.....o..EsVCRGCVNYEGADRIEhV.I-oARQLKRuHG................. 0 21 31 55 +11094 PF11262 Tho2 Transcription factor/nuclear export subunit protein 2 Pollington J, Finn RD anon Pfam-B_002604 (release 23.0) Family THO and TREX form a eukaryotic complex which functions in messenger ribonucleoprotein metabolism and plays a role in preventing the transcription-associated genetic instability [1,2]. Tho2, along with four other subunits forms THO [2] 23.10 23.10 23.10 23.50 22.80 22.80 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.84 0.70 -5.37 35 353 2009-11-24 09:31:40 2008-08-14 13:48:43 3 7 271 0 252 345 7 279.40 35 17.64 CHANGED pshshlsss.hassFWpLSLhDlthP..pchY-p..plp+lcpplpp..............tpsthpppp+chcplp..pphppLtsEtppH.hp+sppspppLpcppsp.Wh......ttsspplp........pFlppClhPRhlhSshDAlasu+FlphLaphpsstFshhshhcp...lhpsphLhshlhssTppEApNlGhFhsclLphlppWppcpp.hacc.s.......................hp.......p.pphhsac...papphla+Waphlhpslhp.........................................................................................................................sL..ppp-ahpI+.NulhhLppllshFPhlpphupplhptlcpltpp-............pRcDlplsusullupLp ........................hWpslosp.FYsTFWpLohYDltlP..pptY-c..Els+L+tphpsl................................................p-.pphs.s++c+..-+..cchp..sh.c+LhpEpccp.hc+hppshp+Lpp.c..Kc.p.Wh..................................tsscspplp..............phLQhClhPRslhSshD....AlYCu+Flc.hlH.p.pTPsFs.TlhhhDp.................lFp..s..lhhhlhsCTppEApphGRFLsphLcplt+W+s..-cs..ha-+Es...................................tshPGFhhhhc.................p.ss.lsa-...sF..++lhaKWHhplspuhhp.........................................................................................................................CL..pss-YhpIRNslhlLpcll..s..haPh..l...tph...uptlppplpclpppE.............pR.DlhhhAtuhhupL......................................... 0 96 149 211 +11095 PF11263 Attachment_P66 Borrelia burgdorferi attachment protein P66 Pollington J, Finn RD anon Pfam-B_003349 (release 23.0) Family P66 is an outer membrane protein in Borrelia burgdorferi, the agent of Lyme disease. P66 has a role in the attachment of Borrelia burgdorferi to human cell-surface receptors [1]. 25.00 25.00 29.50 39.60 23.50 19.50 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.44 0.70 -5.33 7 107 2009-09-11 05:11:33 2008-08-14 13:50:47 3 1 47 0 3 91 0 182.80 58 57.03 CHANGED SDFSIhGHISKKAN........Tsccsp.FsPpsN+LpFsppRosNFAFSlGsGIGhAWNpD-GEKESWuIpGusSYSpRIFGpQDKKSGIGlGIoYGQNLY+PTSSN.plIQpIAtKoFpTLNAEISTYEDNKKGIIPGLGWIASIGlYDLLK-pPpSD..sIIssLTssTssp...sssps....loFscAsKl.GGALYIDYAIPlESIS.sTaIlPYVGsHhLG...SLpuScKolYLKsGLEL-pLIKLT ...............................................P.tNtLpFspppp.shtFShGsuIGhAWNpDpGEcESWtlpGupSYspRlFGtQDKKSGluhGIsYGpsLY+s..psop.phlppIutpuFQoLNsElSoYEDNKKGlIsGLGWIsSIGlYDlLRpKShEN...shhos.hosNppst.....oTssps.......loFp-AhKL.GhALYLDYAIPlcShSspsYllPYlGAahL.G...plp.sspplYLKsGLpL-pLI+hT..... 0 1 1 1 +11096 PF11264 ThylakoidFormat Thylakoid formation protein Pollington J, Finn RD anon Pfam-B_003380 (release 23.0) Family THF1 is localised to the outer plastid membrane and the stroma. THF1 has a role in sugar signalling [1]. THF1 is also thought to have a role in chloroplast and leaf development [2]. THF1 has been shown to play a crucial role in vesicle-mediated thylakoid membrane biogenesis [2]. 19.80 19.80 20.10 55.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.27 0.70 -4.72 27 122 2009-01-15 18:05:59 2008-08-14 13:52:47 3 3 108 0 59 137 144 204.00 37 80.16 CHANGED TluDoKRsFapsaP+sIsulYRRll-ELLVElHLLShpssFphDslFAlGlspsF-pFhpGYpPppchsslFsALCsussh-sppl+psApphtphspspshpplhpaLppt...stts.t.lts.htthttp.pa+YSRLhAlGLasLLppups...ttc.pphpchlcclupsLsLstc+VcKDLslY+SNL-KhpQAhELh-EhltuER+K+Ecptt .........TVuDoKpsFhptap+PIsolYppllpELLVphHLlphppsFpYDslFALGlVTsa-phMpGY.Ppp-+suIFpAhhpA..lstDPcpYRpDApplpphA+ut.sssslhphhsp....ttphps.LtshhpthtspspFpYSRhaAlGLapLL-.uss........pcsphlcc.lspuLslspc+lp+DL-lYRusLpKhsQAhcllcEhlptE+KK+cppt.t........................ 0 16 42 54 +11097 PF11265 Med25_VWA Mediator complex subunit 25 von Willebrand factor type A Coggill P anon manual Family The overall function of the full-length Med25 is efficiently to coordinate the transcriptional activation of RAR/RXR (retinoic acid receptor/retinoic X receptor) in higher eukaryotic cells. Human Med25 consists of several domains with different binding properties, the N-terminal, VWA domain which is this one, an SD2 domain from residues 229-381, a PTOV(B) or ACID domain from 395-545, an SD2 domain from residues 564-645 and a C-terminal NR box-containing domain (646-650) from 646-747. This VWA or von Willebrand factor type A domain when bound to RAR and the histone acetyltransferase CBP is responsible for recruiting Med1 to the rest of the Mediator complex [3]. 20.40 20.40 20.50 21.40 20.00 19.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.45 0.70 -5.32 5 158 2012-10-10 16:07:06 2008-08-14 13:59:46 3 13 86 0 84 159 1 182.70 42 27.72 CHANGED Msss.Scu.hpt...VuDVVFVIEGTANLGPYFEoL+ocYILPsIEYFNGGPlsEo-..FGu-hGuTQYuLVVFNTsustPEshVQsauPTpssa-FlpWL-uIpFsGGGuESCSLlAEGLusALQhFDDhpcMRpplGQTshHRHCILICNSPPYlLPo..............sESsoYsG+TsDsLsslhs..ERuIaLSIIuPRKLPALRtLF-KAsssssl.s.possDYAK-PpHMVLl+Gls ...................................................s.lADVVFVIEGTAsLGPYapsL+ppYllPslEYF..suGs.s-pc...h.tp.h.sss.YulVVaposs.sh..spshlp.s.hu.Tpss.phlphlct......l.........pFhG.GGhEssu.lAEG......LusAL.thF-...-hpph.R.pt..h.......sps..p+hC.lLIsNS....PP..Y.hPs..............s-.sh..pa.pu.t..os....-p..Lst.ht..cpsIphSlluP.R+lPsLhhLap+Asss..h.......hshups.pahVLlps..s...................................................................................................... 1 24 44 62 +11098 PF11266 DUF3066 Protein of unknown function (DUF3066) Pollington J, Finn RD anon Pfam-B_3735 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.60 25.60 26.20 89.80 25.10 25.50 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.40 0.70 -4.64 15 78 2009-01-15 18:05:59 2008-08-14 14:55:13 3 1 74 1 27 84 174 214.60 68 92.46 CHANGED DFsS-sYKDAYSRINAIVIEGEQEAHDNYIsLucLLPDct-ELp+LAKMEtRHhKGFpACG+NLpVsPDM-FA+cFFutLHuNFQpAhAEGKlVTCLLIQuLIIEuFAIAAYNIYIPVADPFARKITEGVVKDEYoHLNaGEpWLKtNF-osKtELpcAN+pNLPLVW+MLspVssDAclLtM-KEuLlEDFMIAYuEALssIGFoTREIMRMSAtGLs .DFsS-sYKDAYSRINAIVIEGEQEAa-NYIsLupLLP-pp-ELt+LAKME.RHhKGFpACG+NLsVssDMsFA+cFFusLHsNFQpAhtE..GKlVTCLLIQuLlIEuFAIuAYNIYIPVADPFARKITEGVVKDEYoHLNaGEcWLKsNF-sSKsEL.cAN+tNLPLlhpMLsQVssDApVLuMEKEsLlEDFMIuYtEALssIGFooR-IhRMuAhuL.s..... 0 5 18 25 +11099 PF11267 DUF3067 Protein of unknown function (DUF3067) Pollington J, Finn RD anon Pfam-B_3740 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 29.80 29.00 21.50 18.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.22 0.72 -3.92 16 113 2009-09-11 02:36:25 2008-08-14 14:58:43 3 1 93 1 61 110 105 98.80 42 66.27 CHANGED ucElhpLLhc+WGtSYDlQLp+p+sR.....laLQVMWtYLEQtSFPLoEt-YhtHLsplsphLsthGtuptV+salpsT+-+PRL....GKAVSlPLcl...spRhsEal ...........................p-LtpLlhpKWG+SYDlQLh+p....c..tp......lalQ.......VMW.+YLEQpSFPL.oEpEYht+LsslAphLpsWGssppV+salpp.......o.......+-.......R.PRl.....G.K....A..VSI.l-h.....s.t+.sEah................ 0 19 46 57 +11100 PF11268 DUF3071 Protein of unknown function (DUF3071) Pollington J, Finn RD anon Pfam-B_3805 (release 23.0) Family Some members in this family of proteins are annotated as DNA-binding proteins however this cannot be confirmed. Currently no function is known. 24.10 24.10 24.70 53.90 24.00 24.00 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.98 0.71 -4.52 27 435 2009-01-15 18:05:59 2008-08-14 16:07:47 3 1 433 0 113 326 73 167.20 38 47.77 CHANGED Mp-L+lVulssDGppllhpss....sG..ccaplslDDcLRuAlRts.............................................................hst.......splchEhss...................tLpPR-IQu+IRAGtosEcVAptuGhslp+VcRFttPVLtERs+sAchAptstssps............tsPu........TLuEllsptlss+Ghs.ssspWDAWRcs-spWhVplpapsutpsps.......AcWpas ......................M.cLplls.lpssuppllhpss.................u....ppahlslD-pLRsAltts...................................................................................................................phphphcs................................................sLsPREIQuRIRuGAosE-VAttuGlsls+VcRFAtPVLtERs+hsEhApsstsh+s............s.u.Push..........TLuEllspshsA+Gl.s.sslsWDuWRc-Ds...pWtlplpW.tuut..s..s..pp.........Acapap............................. 0 34 83 105 +11101 PF11269 DUF3069 Protein of unknown function (DUF3069) Pollington J, Finn RD anon Pfam-B_3783 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Gammaproteobacteria. 25.00 25.00 71.90 71.70 21.30 18.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.76 0.71 -4.03 12 138 2009-01-15 18:05:59 2008-08-14 16:08:23 3 1 138 1 25 67 4 120.10 63 83.47 CHANGED Vlsh-clPEpLh....shhsplhEssEtphpcuWsuLPASApslLsp.EpFHuhh.lupAahtlshlup.phs-hs-s.ss.pcpEYpu.lhs+lhcsslK-hlKpLKKARpDtshhpsh+pVht VlEFDpVPEuMa....cMVTSIHEVSEEsVREAWDoLPASAQNlLDNFEQFHALlSVSQAFAGLNVMEEFPTLNLPcpMoEE-K-tYRAQLLDQVLHNCVKDMVKQlKKARRDPILKR-FK-VF.A.. 0 2 6 17 +11102 PF11270 DUF3070 Protein of unknown function (DUF3070) Pollington J, Finn RD anon Pfam-B_3804 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 28.60 28.60 16.90 15.70 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -7.18 0.72 -4.18 20 37 2009-01-15 18:05:59 2008-08-14 16:09:35 3 2 1 0 37 37 0 32.60 43 23.32 CHANGED hAPsccPP...sE-sHEAP.sschssustpssssu hAPscpP....sE-sH.uP..Schtsu.tsstspu. 0 37 37 37 +11103 PF11271 DUF3068 Protein of unknown function (DUF3068) Pollington J, Finn RD anon Pfam-B_3769 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed. 25.00 25.00 25.60 25.40 23.90 21.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -12.03 0.70 -5.36 22 301 2009-01-15 18:05:59 2008-08-14 16:10:06 3 3 204 0 84 243 5 319.50 26 82.78 CHANGED Rh...hushLhulGshhllhAlllshalsutltclPlDlssss..lstsssssh.s.stss.pt..thps..s.............ppphsstcstssDhlshpsspslhc.............scspphlpshssphslDRpouhslss..............psthssssh..shpGl.a+FPFcsE+coY.aaDshhppshshsasup...-slsGh..psY+FpQpls.sshs.................sh.hh.tt................sstssshshhYsssR.shW........V-PhTGslVphpEp.pchhsssstp................hshl-h.chshs-colps.lspAcctpsplthhschlPhshhhlGllshlsGlhL ....................................................................hushhluLG.ssLlh.hAlllstasssc.ltclPL..DlstTh....hop...s......s...s........s........shss......s..h.h.st....p...h..s.s...ssshsp.............................ppphsl.tssus...sD..s.lslplus..oltRtp.................................tpssssll.Ahs-phs.lsR.couhslsssst......................stpsss..ssl.....t+-Glp.Y+FPF.cTE.+.+..o.Y.aFDshspcshs....hsass-......ccl..sGh...ssY+FpQs....l.s.sslut.............................................tst.sh.hshssth.........................................sspphshshaYs..spR..shW........V-PhoGslVctpEchpcaaupsstp..........................chshhph.phphs--TlpptlstAcst..........pcp..lthhuchlPhshtslGllhLlsGhh........................................... 0 20 56 75 +11104 PF11272 DUF3072 Protein of unknown function (DUF3072) Pollington J, Finn RD anon Pfam-B_3823 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.60 25.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.83 0.72 -4.31 13 81 2009-01-15 18:05:59 2008-08-14 16:19:39 3 4 79 0 40 88 5 56.90 53 46.16 CHANGED NsEKDPc-WsTGDEPMTGAQASYL+TLsEpAG..EshsssLoKA-ASchIDcLpppsGR .............hpKDPs-WsTGDEPMTGAQtSYL+TLuccAG....Eshss..sLTKA-ASc+IDcLpppoG+...... 0 15 28 34 +11105 PF11273 DUF3073 Protein of unknown function (DUF3073) Pollington J, Finn RD anon Pfam-B_3852 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 30.80 30.50 21.70 21.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.42 0.72 -3.44 29 430 2009-01-15 18:05:59 2008-08-14 16:25:40 3 1 424 0 111 261 50 67.00 49 84.81 CHANGED GRGRAKAKQTKVAR-LKYpSPsTDhspLpREL..uG.........sspspssss-shsDc.st......................cpYuc.s-t.c .GRGRAKAKQTKVARcLKYpSssTDh-pLpREL..uu........tstssp.sc.s.....-.-...s..hs..D.-.h.s-.................................tDcYsc.st..p...................... 0 34 82 103 +11106 PF11274 DUF3074 Protein of unknown function (DUF3074) Pollington J, Finn RD anon Pfam-B_3858 (release 23.0) Domain This eukaryotic family of proteins has no known function but appears to be part of the START superfamily. 27.50 27.50 27.50 27.50 27.30 27.20 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.31 0.71 -4.66 28 152 2012-10-02 19:24:03 2008-08-14 16:30:32 3 2 107 0 125 153 0 197.00 26 41.03 CHANGED WhuR+SlHc..................uhsapcacptLpsp.......+s-sEcpa.............lcslsutchlpph..................tst.spsphplhplpapFPh..sho...sRsFsslllshchs.................................ptsscpahllShPlsc....................pss.tpshlpGpYtSVEhlp.hs.................................................................................tpssslEWhMsTpSDsGG..slP+Whpcp....uhPpuIspDlstFLcWs ..........................................................WhuR+SlHc..................shsapcappsLppp.........+s-sEppa.......................htslsuhcplpph..........................................tssphplhplpaphsh.....shssRsFssLllosphs...................................ts.cpahllShPhpt.........................................ts.s.ppshlhGpYtSVEhlcpl........................................................................................................................................tpsstlEWhMsTpSDsGG...slPcaltcp....uhPsuIspDsshFlpWh................................. 0 33 68 106 +11107 PF11275 DUF3077 Protein of unknown function (DUF3077) Pollington J, Finn RD anon Pfam-B_3820 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 20.40 20.40 20.40 20.40 20.30 20.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.61 0.72 -4.02 19 142 2009-01-15 18:05:59 2008-08-14 17:03:34 3 1 33 0 59 171 1 73.60 36 84.52 CHANGED TsGtssFhpttt.pst.sLFRlpPGlPhpcAh-puShLhGhl+cLThcu.M...-sc.hhhhAu+YLSuMAKALhDDhElGhp ................t.Fh...t...tt..sLFplpsGlPhccAhppAStLhssspcLshcuhh....cs.ppp.hha....AuhaLutMAKALlDDhths........................ 0 0 0 15 +11108 PF11276 DUF3078 Protein of unknown function (DUF3078) Pollington J, Finn RD anon Pfam-B_3846 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 29.80 28.90 20.60 19.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.09 0.72 -3.81 36 294 2012-10-03 17:14:37 2008-08-14 17:08:44 3 2 198 0 84 291 141 93.90 33 25.50 CHANGED hsQsuFs.NWtuGGsssluus...hslshptNYc.+cchpWcNplphpaGlsptc..spc.......h+KosDplclsSphGh..c.ttspWYYShhhsF+TQFssGY ..............hsQstho.NWh.tGGps.shuhh...sslshptNYp...+p+htW-NplphchGhspspucs........................h+posDtlplsSphGh...p..ssppWYYohthpFpTQFssGY............... 0 31 75 84 +11109 PF11277 Med24_N Mediator complex subunit 24 N-terminal Coggill P anon manual Domain This subunit of the Mediator complex appears to be conserved only from insects to humans. It is essential for correct retinal development in fish. Subunit composition of the mediator contributes to the control of differentiation in the vertebrate CNS as there are divergent functions of the mediator subunits Crsp34/Med27, Trap100/Med24, and Crsp150/Med14 [2]. 25.00 25.00 25.80 28.00 23.60 23.60 hmmbuild -o /dev/null HMM SEED 990 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.89 0.70 -13.65 0.70 -7.05 6 199 2009-01-15 18:05:59 2008-08-14 17:12:07 3 4 83 0 82 183 0 558.40 40 94.03 CHANGED hKsspLKQhILpAWKERWoDhQWuINlKKhhP+GVohDlhNLA-ALLpQAhIGsusNPLlLSYLKaulsuphVSauuVLssISKaDcFsRchClpALLEhh-.hsDsloC+GKuEEsI.Ls+AhlShVaWLLQhhstss-+hp-st-h..ssttEphLptshpsL-+llpSsh.hullaIuKhEEsp.asslccphLclsp.lss...........lsssplcsphccssoL.psh.pMhslcucpl.cshhsolpsLIhlEshhN.ssDTQ.hVtQLhhlKRhp+hPhshhhhEIh+AChluL.pss-socEhhWsAFTFlKlPQlLtpL+thspstp.pD.......ao.DVspAFEhLLp.TPLLDhhDp+CsCsslphLLpEhsKhtLLoEspscpLsAKRsu...hsspLKps-.Nus.pP.NsshILRAEPslosILKTlsuDaSKs.EuLLGVLsphLSGpSLDLlLusAuspGKLKoFsp+hIphNEaoKp.lsGEhuKsAslRAhLFDlSFLMLsalsQTYGSEVILSEuusS....FFEpWlpsCMsEcsKshNPcp.shp.sDssKVEpLlshLNsSs....phKhsphKWcElCLoIPAslhcVL.AWENtsLSstsIpKIhDNIKu+lCShAVCAsuWLsAahpMlt.DEp.KPhsMIpQLsoPlsu...EphhQ..ah+ERlslMspIlc+MptDVhppsssp.Khtsth.p.....ppLsu+tPlcEthc-sa+sVLEKGWlss+uhahL-oLLphGGsaWhsspLVccLL+pphhcchsRsh-llaAIhpLDhppsTlsLLuallPhLLhcpuphcslsDP.uRsLAKLsVaChloohpop.tstsSutptp++RpchcDhspL.sLDshpsSthMRhl.....ussp--sshhuusGsc.....uhsSShSASpL+s........lsh+EPLppsLtsLFhlhSphluocphuP+s.FV.pFlp.hVEsucp.phuslLphhP.uhVppLlKlsuhsc.KV..lLplhDLslPhGRphAspsls .............................................................................................................................................................h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 26 32 56 +11110 PF11278 DUF3079 Protein of unknown function (DUF3079) Pollington J, Finn RD anon Pfam-B_3866 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 21.50 21.50 24.90 42.80 20.70 19.20 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.91 0.72 -4.42 10 130 2009-01-15 18:05:59 2008-08-15 10:29:55 3 1 129 0 38 95 4 50.40 70 69.48 CHANGED MAKKFPLHPuHPERICWGCDRYCuADALACGNGS-RTQHPsELFG-DWhtau ....MAKKFPLHPpHPERICWGCD+YCssDuLuCGNGSsRTQHPsELhG-DWYch.... 0 5 18 29 +11111 PF11279 DUF3080 Protein of unknown function (DUF3080) Pollington J, Finn RD anon Pfam-B_3870 (release 23.0) Family Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. Currently this family has no known function. 25.00 25.00 48.50 48.30 20.40 19.90 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.10 0.70 -5.28 24 161 2009-01-15 18:05:59 2008-08-15 10:30:32 3 1 160 0 29 132 67 301.60 36 90.76 CHANGED hhcsYhpRLAslLDss...s.sh.shsshsl...Ps+Rp....hhhslsclsluLL-h.hpLppC.pLtpLIuE+NS.LGKVtsshpchpYphphlpuhppCl......p.t.tspslpspLpphhppKpppLshthhNslhsscphcpthohusphLshst.tsphuchhsAlpplsthhtt.........tttpsphssppltph.cpLpppchhGpLhhShpptsthLsssTphLppp.stpslCssspssschphLpNlFhphYltplQPYlApLspthppLts.ltpltpphs.h...hss.hptah................................sshappa+pustpHVphWQpLacpC..uhssu ........s.F-cYlpRlANV.pts................shs.ss..hss.sL.......PcKR-....Lhls.lssloIuLLDu.YpLRpC.uLFpLIAERNShLGKVQDthpcacYQlsLLpulpsCL..................ss..splspsL+spLhslps.KpppLsspaaNhLasS-shRpQLo.uopahstph....shu-ll.ALcpLsslppp.................hhsp...pslsopsLs..stQEsLcKp+llGclhaSLspuosaLcssTptLcsp.sssllCusp+ss........s+hpYL+NVFpp.YlcclQPYLApLDptYhQLsstLshhpp.ps.....hs......................................hpssappFRhAsppHspaWQpLFtRCtlslG........... 0 7 14 23 +11112 PF11280 DUF3081 Protein of unknown function (DUF3081) Pollington J, Finn RD anon Pfam-B_3884 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 64.20 64.10 22.50 19.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.60 0.72 -4.45 22 143 2009-01-15 18:05:59 2008-08-15 10:44:01 3 1 139 0 27 89 11 79.40 54 89.94 CHANGED cl-hpphLpla-pIpppGcp..p-st.....YhhpGlpAapDaDGYTlaLpspsVsLslhFHNoYcl-Y..................-pccphspFh++lpslh ...L-ssKlLpAYEsVMpNGoP....TEaG.....KlYEGIEAauDYDGYNlahRGNGVELKlGFHNTYHLsY..................EQEHL+DSFLKKLuhL.A.. 0 6 15 22 +11113 PF11281 DUF3083 Protein of unknown function (DUF3083) Pollington J, Finn RD anon Pfam-B_3898 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 21.00 21.00 22.70 22.40 19.80 17.00 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.09 0.70 -5.31 12 45 2009-09-11 05:40:26 2008-08-15 10:44:43 3 1 40 0 16 36 16 301.00 55 87.65 CHANGED ssYQpLSctlFpLsEcc-LpNlHlIANDKLPhVRapsEuYshpTscQllFFYNPtYHEupphahsssh+A+KlRllFLATG--lRuNSApFHp+VpphlpcLtspLslpp.plKlRDHQHLoYDlFAKuKGsKpoYGaKLRuIssRYpuRpCsLPpsaouloYsTlslPlsR+l+ppl.sp.psp-.assLYpplpDsFhpAspsppLs+sAMlANGlsPlVRNSch-plssssElQMlGFDPptppsphls+WsuscLV-olpFllsAuppDps-tGaGRFhNpVEpAl+shss-lsl-tp+p-lhlRFHQHISYp ......................sCYQsLSRhlFsLu-pa-L+NVHVIsNDKLPVVRaHsEAYChpTsEQlLFFYNPtYHEAppLasp-saRARKlRIlFLATG-DIRuNSAsFHh+VpclLscLhspLPlpp.plKlRDHQHLoYDLFAKuKGsKEoYGYKLRuIsPRY+ARpCpLPcshuuLTYVTVoLPLSR+LKptlhs-.sssD.FsPLYQ+LpDsFlcAsss+pLs+lA....MlANGLTPLVRNSKa-+l-upsElQMlGFDPsssEpQllp+W-uspLVEssHFsIVAuscDpcDtGaGRFMNpVEsAL+sFssElulD.-R-DLlVRFHQHISYp................ 0 4 6 10 +11114 PF11282 DUF3082 Protein of unknown function (DUF3082) Pollington J, Finn RD anon Pfam-B_3896 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 25.90 25.60 24.20 24.30 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.65 0.72 -3.92 20 104 2009-01-15 18:05:59 2008-08-15 10:45:45 3 1 98 0 52 104 104 81.20 36 44.12 CHANGED cpuPLshLSGuloSuhlualsahLopplsshFAt+PsphSsslsQsIuoAl+TLllGhshLATFsFAFlulGLhllhlpsLh ...............................sPlpsLhGulhuusluhhhYtLosslsssFut+sls...s.s.h.sppIusslRTLlhGlshLATFlFuhlulGLhllslQlhh...... 0 13 36 48 +11115 PF11283 DUF3084 Protein of unknown function (DUF3084) Pollington J, Finn RD anon Pfam-B_3912 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 50.80 49.90 19.60 19.30 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.63 0.72 -4.12 16 146 2009-01-15 18:05:59 2008-08-15 11:08:44 3 3 145 0 52 150 102 78.60 44 18.38 CHANGED uGalLIlulLlLGGlIATlGDRLGoKVGKARLSlFsLRP+pTAVLlTllTGolISAhTLulLhssS+pLRpGlFpl.-pI .....ualLllslllLGGlIAhlGD+lGo+lGKpRLoLFsLRP+pTAlllTllTGslIuuhT.lullhhsScslRpuLFph-p........ 0 16 36 50 +11116 PF11284 DUF3085 Protein of unknown function (DUF3085) Pollington J, Finn RD anon Pfam-B_3922 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 25.10 25.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.92 0.72 -3.79 24 126 2009-01-15 18:05:59 2008-08-15 11:13:49 3 1 107 0 43 108 2 88.70 37 66.35 CHANGED sslhLVtDcGVYlhusst..hs..sGp...hlsYApGCsPcts..--WachtRpthGGDDhsEhl-s..shlpphlpss.c...Lplphossplplhs ...stllLVKDpGVYlhuptu.cts......sGp.....hlAYAtGCsPcsD...--Wa-hucpphGGDD..FuEhlcs..shhppllpssts...Lhlph.osoplplt.s......... 0 4 22 34 +11117 PF11285 DUF3086 Protein of unknown function (DUF3086) Pollington J, Finn RD anon Pfam-B_3929 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 23.60 23.60 24.20 96.50 23.00 23.50 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.55 0.70 -5.59 18 73 2009-01-15 18:05:59 2008-08-15 11:19:56 3 2 71 0 28 88 163 274.90 53 76.67 CHANGED llplALp-LppRRpuLptEIEcLEpRKcplEpEl+ooFuGpSDsIA+RVKGFQDYLsGALQDLApSsEQLELVsQsshVpPSPLD....ppusssssss......tssssusupsFps-pcLIRptLppapppPDaYAsPWpLRRSlEshcsEhl-DWFFNQGGRGA..ShGSR.+NlLVuuAlIuILu-LYGDpFQsLVLAupPERLGEWRRGLQDsLGLuREDFGPsSGIVLFERuDALlERADRLEEcGElPLIlIDsAEcsV-lslLQFPLWLAFAusPp..Ehhp--c ..................................lp-uLp-LppR+psLphplEpLE+R+ccIcpEh+ooFAGtSpslAhRVpGFpDYLsGuLQDLutuAEQLELls..psshhpPu..Phs...........................ptttsstt............ssss.hsspsFpsppchI+phL-papppPDaYusPWpLRRohEshcsEhlpsWFFsQGGRGAlpShGSR.pNlLluSAlISILscLYGDchpsLlLAssPERLGEWRRGLQDsLGluRpDFGPspGIVLFEps-ALlp+ADRL.ccsplPLIlIDsuEcplslslLQFPLWLAFAssPpph.p..t........................... 0 3 18 26 +11118 PF11286 DUF3087 Protein of unknown function (DUF3087) Pollington J, Finn RD anon Pfam-B_3938 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 22.60 22.60 22.70 22.80 22.50 22.20 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.82 0.71 -5.01 25 105 2009-01-15 18:05:59 2008-08-15 11:24:09 3 1 103 0 37 106 10 163.40 38 96.62 CHANGED MpLppIDKppYR+phNhlhlshlusLslLSLshushLIuLFG.................stusuNF+aNLlGVlluhllsuslLp.ph+s+saMpElhYVWcLKQhhN+IhRKL++lKsAAspsD...hsAlhlLpFYYsu.+QlapLDsNTlTlsslpp-lspLppthtphuLslstcpa-sphL ..MpLppIsKphYR+phNhlhlshlssLslhSLshushLIsLFG.................spusuNFHhNLlGVllulhhssullp.plKs+PaMpElhYVWcLKQhhN+IaRKlpplKAAAp.psD...ssALhhLpFYYsu.+QlapLDsNTlTh.s.sls..p-hspLpphhtphuLsLshspacsshL.............. 0 4 14 26 +11119 PF11287 DUF3088 Protein of unknown function (DUF3088) Pollington J, Finn RD anon Pfam-B_3952 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 30.90 64.70 24.60 24.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.28 0.71 -4.61 19 111 2009-01-15 18:05:59 2008-08-15 11:28:02 3 1 87 0 36 105 3 111.40 45 85.00 CHANGED s+DhLFLLcPGFtDsut.PGt.FaCscssslEGLLusaPsLtsplDVc+lsasRPRtslltLlG-spQSlPVLlLusspssssss...tstsspRFlsDsccIhchLupRaGhPcs ...........+DhLFLLcPGFtDstp.Ps..FhCsculslEGLLushPsLpsplDVc+lsasRPRcsllAhlGEscQShPVLVLus..stss...ssu...pshsspRFlsssccIhchLuppaGhs+.h............... 0 10 17 28 +11120 PF11288 DUF3089 Protein of unknown function (DUF3089) Pollington J, Finn RD anon Pfam-B_3953 (release 23.0) Domain This family of proteins has no known function but appears to have an alpha/beta hydrolase domain and so is likely to be enzymatic. 23.40 23.40 23.40 23.60 22.80 23.30 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.58 0.70 -5.17 25 100 2012-10-03 11:45:05 2008-08-15 11:32:33 3 2 78 0 52 106 398 198.30 29 55.10 CHANGED tssVFaVaPToa...hstu..tWNuslsssps...thtchhltspAusF.spssclaAP+YRQAoltAa.....h.sspsp..ustA...hchAYsDVtcAFctaLsphssuRPllLAGHSQGuhhLhcLL+ccIuGcP.lpcRllAAYlIGhs.lsl...-ts.sshtslPsCpsssQsGClloasoa..t........ttt..s.su.c..upsts.spphlCsNPlshsssss .............sDVFalaPTs....h......ssss......h..Ns....sl...ssttt..tthspthlttpAusF.ssssplaAPhYRQssltsh...................h.tttss......spps.....hp.hA.YsDVtpAFchYLpph.NpG.RPhILAGHSQGuhhlhcL.L+..c...phssps.lpcRlVAAYllGhs.lss.....pth.tth.slshspssspoGClloasoh...s........................s.t..ths...........ttthhChNPhshtss.s...................................... 0 23 42 45 +11121 PF11289 DUF3092 Protein of unknown function (DUF3092) Pollington J, Finn RD anon Pfam-B_3988 (release 23.0) Family This viral family of proteins has no known function. 18.70 18.70 20.00 19.80 17.60 17.50 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.92 0.70 -5.18 4 77 2009-01-15 18:05:59 2008-08-15 13:33:13 3 1 69 0 0 37 0 267.00 88 99.20 CHANGED MDLFMSIFTLGuITRQPuKIENASPASTVHATATIPLQAShPFGWLVlGVALLAVFQSASKVIALHKRWQLALYKGlQLVCNLLLLFVTIYSHLLLLAAGMEAQFLYIYALIYILQIlSFCRFIMRCWLCWKC+SKNPLLYDANYFVCWHTpNYDYCIPYNSVTDTIVlTSGDGhopPKLKEDYQIGGYSEDWHSGVKDYVVlHGYFTEVYYQLESTQloTDTGhENATFFIasKLVKDsspVQIHTIDGSSGVVNPAMDPIYDEPTTTTSVP ................MDLFMRFFTLGSITAQPVKIDNASPASTVHATATIPLQASLPFGWLVIGVAFLAVFQSATKIIALNKRWQLALYKGFQFICNLLLLFVTIYSHLLLVAAGMEAQFLYLYALIYFLQCINACRIIMRCWLCWKCKSKNPLLYDANYFVCWHTHNYDYCIPYNSVTDTIVVTEGD.GISTPKLKEDYQIGGYSEDRHSGVKDYVVVHGYFTEVYYQLESTQITTDTGIENATFFIFNKLV.KDPPNVQIHTIDGSSGVANPAMDPIYDEPTTTTSVP................................... 0 0 0 0 +11122 PF11290 DUF3090 Protein of unknown function (DUF3090) Pollington J, Finn RD anon Pfam-B_3954 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 19.50 19.50 19.70 19.70 19.00 19.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.18 0.71 -4.79 15 303 2009-01-15 18:05:59 2008-08-15 13:34:29 3 1 301 0 84 177 117 172.20 51 88.62 CHANGED asFDsP-RFVsGTVG.PGsRTFaLQARsGuRlVSVuLEKsQVulLAE+ls-LLDElsR+tG..sslPss.ssshsDscPL-tPl-EEFRVGThuLuWDscsppVVIEshuls-..............-sDsc....shs-ss-ssDsLRVhLssssARAFAcRApcVVuAGRPsCPLCGpP.LDPcGHlC ........hacpPDRFVsGTVGQPGsRoFaLQA.tc.s.uRVVSVhlEKpQVshLA-+lspLL-Elsp+hG..s.VPPs.ssp.hcDLu..PLcs...Pl..-tEFRVGTMuLuWDsEspplllEh..hAlo-s.................thDsu........llhs-sEpuPD.ulRVhlTsppARpFusRuppVVu.AGRPsCPLCupP.LDPE.GHlC.................... 2 30 66 80 +11123 PF11291 DUF3091 Protein of unknown function (DUF3091) Pollington J, Finn RD anon Pfam-B_3979 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 31.00 31.00 21.10 21.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.25 0.72 -4.05 11 57 2009-09-11 07:39:49 2008-08-15 13:35:06 3 1 1 0 0 77 0 93.00 33 58.13 CHANGED IcpLp-EIc+DYssaSccslEclp+ppspLcphasp.pSctpNhTCs+PcNIstcDlpsLpshItcpppshhshshacL++caLhplpcsLpNsscc.SE ........hcphccEIc+cYssasccVhEclhppsspLcphasphQSctpshTCs...........cPcNIstc.DlssLpshItp.phshh.hshhpLcpphh.plhcphpN.spp.u.............. 0 0 0 0 +11124 PF11292 DUF3093 Protein of unknown function (DUF3093) Pollington J, Finn RD anon Pfam-B_4007 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. Some members are annotated as alanine rich membrane proteins however this cannot be confirmed. 21.30 21.30 21.30 22.20 21.20 21.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.73 0.71 -4.39 23 347 2009-01-15 18:05:59 2008-08-15 13:41:23 3 2 338 0 90 226 64 143.40 38 85.77 CHANGED shYcERLWsPhWWWlhushl...sulhAh.lshus.slhsh.lshAllhslusslhLhh.....upscIcV.......ssspLhsGcA+LPsshlucstslsssstputhGppLDstAallhRuWlsshVhlsLsDPsDPTPYWLlSTR+P-cLluALp ......................hYpERL..WsPhh..WWlluhul...hALh..sh.shh.uhs.u.Lssh.lshsll.ssl..ssssllth.....Gp..s+IpV.......ssttLhsGcAplPhshluRstsls.upstpushGpphDsuAFllpRsWltshlhlhLsDPsDPTPYWlVSTR+Pc+LhuAl..................................... 0 26 66 86 +11125 PF11293 DUF3094 Protein of unknown function (DUF3094) Pollington J, Finn RD anon Pfam-B_4017 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 20.90 20.90 20.90 21.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -8.57 0.72 -4.50 13 47 2009-01-15 18:05:59 2008-08-15 14:02:50 3 1 47 \N 13 42 25 53.60 58 92.01 CHANGED oSRLSPEDQp+V-pYLuuPQHQVERpPFRPWhLhllVLAVsIGLGLLSRLLShLs ..oSRLsPEDQp+V-pYLpuPhHQVERtPFRPWhLhhllLAVVIGLGLLSRLLShLs...... 0 1 5 9 +11126 PF11294 DUF3095 Protein of unknown function (DUF3095) Pollington J, Finn RD anon Pfam-B_4020 (release 23.0) Family Some members in this bacterial family of proteins are annotated as adenylyl cyclase however this cannot be confirmed. Currently no function is known. 20.40 20.40 23.20 22.50 18.00 17.30 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.20 0.70 -5.43 25 117 2009-01-15 18:05:59 2008-08-15 14:20:26 3 2 97 0 48 132 14 331.50 38 95.68 CHANGED FYssLPs..hstFpplsDsstYpPLP-DWslsluDIVsSTtAIusGRYKsVNMsGAusIuAlhNuh....ssh-hPFVFGGDGAshAVPsshhcpuRpALussptascp-asLsLRlulVPVuslRspGh-V+VARausSsslsYAMFuGGGLuWAEsphK.......sucatlssssssst.PDLoGLSCRWspI..PucpGpIlSlllhPsssssstsattlhccllshs-p...sscuuHPls.ps.plphsspuLshEA...RhptG.....t.hhhppltllh.slhuhlhh+p....shphssasscpYhpplspNoDFRKaDDuL+MslDsss-ptcplcshLppupspGhl+YGLHtQspAlMTChVsoshpccHlHFlDGAsGGYAtAAppLKs ....................Fattlsh..hptF.ptlhDss.YpsLP-sWhlulsDIVsSTtAIupGRYKsVNhsGAusIuAlhNuh......sthchP..FVFGGDGAshAlPsshhttAcpALussttasppphsLpLRsulVPVsslRspGhDl+lARatsStphsaAMFsG....GGlsaAEtthK.......tupahl...sss.sst.PDLoGLSCRWssl..suppGtllSlllhPsssssstthttlhpcllshhct....p.ctu+Pl.st.p...t.thphs...p..slthEu......+stts......h..hh...hthhhlhhtshhhhhhhph....th...hsth..s..sppYhpplstsoDFRKaDDuL+hslDssspphpplpthLptutttGhhpYGlHppspAlMTChVsssh.pcHlHFlDGAsGGYAhAAttLK............ 0 9 22 27 +11127 PF11295 DUF3096 Protein of unknown function (DUF3096) Pollington J, Finn RD anon Pfam-B_4028 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 20.70 20.70 21.00 20.90 20.00 20.60 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.73 0.72 -4.45 19 138 2009-01-15 18:05:59 2008-08-15 14:25:07 3 2 126 0 65 135 5 38.60 59 76.26 CHANGED PlluLIAGILILlhPRLLNalVAlYLIllGLlGLhshth .....PlluLIAGILILlhPRLLNYIVAlYLIllGllGLhsh..h..... 0 12 33 47 +11128 PF11296 DUF3097 Protein of unknown function (DUF3097) Pollington J, Finn RD anon Pfam-B_4031 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 46.70 46.60 20.80 20.00 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.73 0.70 -5.38 18 374 2009-01-15 18:05:59 2008-08-15 14:29:13 3 2 369 0 98 278 7 265.80 56 95.51 CHANGED DcYG.pDVLAsstRsttht...ss-lPsEhGlVVE-suoGFsGAVVpsEput....VpLEDR+G+pRsFPLusG.FLl-GpPVsLstP..tsusAp.......PsR..TASGSlsVtutcARVApASRIaVEGpHDAELVE+VWG-DLRlEGVVVEaLcGVDDLsuhlAcFpPGPGRRlGVLVDHLVsGSKEoRIA-uVst.....sHVLVsGHPYlDIWQAVKPpplGlcuWPsVPRGp-WKpGlCptLGW.tu-pAD...uWpRlLupVcSapDLEPsLLGtVEcLIDFVTs ..............YutDlLu........st.......h..Rp......p..................ss-hPsEhGhVVEDss....oGFVGAVVthEpuh....VsLEDR+G+pRsFPluPG.FhlDGpPVsLstP....pt..usAu......................pR..TASGSlAVsutcARVAtsSRIaVEG+HDAELVE+VWGcDLRlEGVVVEaLsGlDDLsslVA-FpPGPGRRLGVLVDHLVsGSKEoRlA.-p.Vpp.....spaVLVsGHPalDIWQAVKPpRlGlpsWPcVPhspDWKpGlCctLGWPtu......stuD...uW.p+lLupV+sa+DL-PsLlGcVEcLIDFVTt................. 0 30 74 94 +11129 PF11297 DUF3098 Protein of unknown function (DUF3098) Pollington J, Finn RD anon Pfam-B_4061 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 29.70 28.00 24.60 23.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.27 0.72 -4.26 28 232 2009-01-15 18:05:59 2008-08-15 14:34:55 3 1 213 0 70 203 188 67.00 45 81.64 CHANGED hsFsKpNYlllhIGhslIAlGFhlMuGtsSss......tlauah+lplAPhlllhGasl.laAILhpP+ ......hsFsKpNallLsIGhAllllGFlLMoG.......suSs.......-.ssF.p.s.-.IFShRRI+lAPlVsllGFlhhI.YAILh+P+... 0 28 55 67 +11130 PF11298 DUF3099 Protein of unknown function (DUF3099) Pollington J, Finn RD anon Pfam-B_4064 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 23.20 23.20 23.40 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.61 0.72 -4.23 24 391 2009-01-15 18:05:59 2008-08-15 14:43:59 3 2 356 0 114 278 53 69.10 35 54.99 CHANGED hITsAstohp--h+sRh++YslhMulRhssllLAslsh.....ualpllhls..sulsLPWlAVllAN.spssppppcs ...........hITsA.t.u.t.phct.R...+cYhhhMulRsssllhA.slsh.h.....shl.ulshls..sulsLPalAVllAN.sts.+psp..t............ 0 34 89 109 +11131 PF11299 DUF3100 Protein of unknown function (DUF3100) Pollington J, Finn RD anon Pfam-B_4068 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 20.20 20.20 21.80 22.80 19.90 19.00 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.52 0.70 -5.15 27 247 2009-01-15 18:05:59 2008-08-15 14:51:52 3 1 215 0 74 233 4 231.70 44 74.12 CHANGED llluE.hIGshplslGsu.sllLLPhlaAlllGhhlshthh........phlsccptphAusllhlulh.LlA+hGsslGPsl.pllsAGPALlLQEhGpl.GTllluLPlA.llLGh+REuIGATaSIuREPslAlIu-KYGhcSPEG+GVLulYlhGTlFGslFholLAuhlushsh.FcPhALAMuuGVGSGSMMuAusGuLsuth.P.chscpIhAaAuASNLlosssGhYhslFluLPLsphhYphl ..................lluE.hIGhhphslG.u.pllLLPhlaAlllGhhlu.hthh........phlsccphphAuslltlulhhhhAKhGhslGsslspllpuG.........ALllQEhGpl.GTllluLPlA.llLGl+REAIGATaSluREPslAlIu-+YGh-SPEG+GVLuhYlsGTlFGslahullAuhluu.hsh.FcPhALAMuoGVGSuSMMuAAsuuLsshh...P.-...hucp.lhAhAAASNLloshhGhYhslFluLPLspahYch.... 0 23 50 62 +11132 PF11300 DUF3102 Protein of unknown function (DUF3102) Pollington J, Finn RD anon Pfam-B_4016 (release 23.0) Family This family of proteins has no known function. 20.10 20.10 20.10 20.10 19.80 20.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.57 0.71 -4.12 14 260 2009-01-15 18:05:59 2008-08-15 15:05:59 3 5 129 0 69 230 14 117.60 29 44.05 CHANGED RTPhlIAuEINhI+cQopKhLLtsAlEIGRRLpEAKuLlPHGEWhcWLcESVuYSppTAspLMplacEYGphh................so.ssp-u..N..............ttshssLsYTQALlLL...GlPEEER-pFls-pDVps.MopRELpQAVcE+ ................................EIp.h.pp....stp....hh.shlEIG+RLtcsK.....p.h.l..sHG.....-atcWLcpplsaSppoAp+hMplhcca.us.....................t...........t.........................t.h.pL.hppslh.Ll........sl..s-t.-c...phh........t..................-........l.tp...Moh+ELptslpp................................................................................................... 0 28 52 58 +11133 PF11301 DUF3103 Protein of unknown function (DUF3103) Pollington J, Finn RD anon Pfam-B_4046 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 25.00 25.00 30.40 29.20 22.30 19.70 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.08 0.70 -5.75 15 133 2009-01-15 18:05:59 2008-08-15 15:14:23 3 1 126 0 20 86 3 341.20 63 88.23 CHANGED KRpLAhpLScpYsplcssLpppIsphpLssslspLlppstsss....h.pphppA-pslRphKGlssho-s...............LLplRLADssMLsuWQpGc.sPLFAFEPsGDDcsWpYIEAYDltGplHhLDVYplP-pPVhVVDssupc-l+AGLtsM+sEhs............t...sspp.phtstusts....ppt.spupspsIsTTlLKKIRLpDDpEPWISGKAEIYAIVTGVsPSRDEPsLDlVEMPYLDYDcpsYYPNQllIaWsRYRWGAADllLMEpDDGTNYKpLAphLlcAAEplLKsIPDPEVQGYAIIsQITscIIcslPDuhhTNDDDFVDVaYTLhpspsYsDHsGAuGNAssTFsPLTIsPT ...........................................................................................KRpLApphSpsYAshtpoLKoQIospsLSlslS-Llcs.sPss-......hSpQLppA.DpslRolKGIs..paT-p...............LLQLR..LADsoMLppWQpGp.SPLFAFEPS.G.sDcsWQYIEAYDVYGQIHQLDVYQLPDVPVFVVDsDSuhELKAGLQAMRAEMp+L..........ttssplsspcSs.uhcsuspo....hspuusu-ssPISTTVLKKIRLpDD+EPWISG+AEIYAlVTGVDPSRDcPTIDLl-MPYLDYDcQDYaPNQllIHWsRYRWGAADhILMEQDDGTDYK-LAKpLVcVAEEVLKhIPDPEVQGYAIIsQITuKII-AIPDGVLsNDDDFVDVFYTLMQDTpYsDHPGAsGNAsATFEPLTI.PT................................ 0 3 5 12 +11134 PF11302 DUF3104 Protein of unknown function (DUF3104) Pollington J, Finn RD anon Pfam-B_4053 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 20.00 20.00 20.00 22.10 19.40 19.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.34 0.72 -4.60 20 52 2009-09-10 14:59:42 2008-08-15 15:35:57 3 1 25 0 24 55 153 68.90 44 75.02 CHANGED shFLuV+sGDhVlVpsss..tsspttpp.sWWMGpVlpspGG....ARsPpssoLFQVADVDoGhI+WVNADpVo+llhshc .....PhFLtV+sGchVlVpppp..........t.sWaMupVlhspGG....ARsPcssoLFQVADVDoGhIpalNAD.Vo+Il.p..s...... 1 1 7 14 +11135 PF11303 DUF3105 Protein of unknown function (DUF3105) Pollington J, Finn RD anon Pfam-B_4062 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 25.00 25.00 25.20 38.20 23.40 24.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.80 0.71 -4.31 37 224 2009-01-15 18:05:59 2008-08-15 15:41:28 3 4 181 0 135 235 28 128.10 34 48.60 CHANGED ssH...lpts..lsYs.....psPPsuGsHsshWhsCu..hY..spsl.sppsVHsLEHGAVhltYcPs.lsssplppLpphsps.ts............hsllSP.....hss.hssPlslsAWu+pLpl.css--splppFlppa.........hpuP..ptPEsuAsC .....................t..H.hphs..lsYs.....psPPsuGsHss....hWtsCst.lY..spsl.sEphVHuLEHGAVhlsYcPs..hsssp...lppLpphlpu.ts.......................hhlhSP..........h.s.....cs..PlsLsuW...........G+p..............Lpl.cssc.....D...tclspFlppa............hpss..phPE.su.C............................................ 0 43 90 126 +11136 PF11304 DUF3106 Protein of unknown function (DUF3106) Pollington J, Finn RD anon Pfam-B_4069 (release 23.0) Family Some members in this family of proteins are annotated as transmembrane proteins however this cannot be confirmed. Currently no function is known. 23.10 23.10 23.10 23.70 23.00 23.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.83 0.72 -3.63 30 277 2009-01-15 18:05:59 2008-08-15 15:48:36 3 2 230 0 106 262 41 94.00 27 48.62 CHANGED sWscLossQppsL.uPLupcWss.hsstp+c+WlplAppasphoP--Qp+hppRMpcWspLoscQRppARpsapph+pLs....Ppp+p...ppWctYQpLssEc+ctLAtpt ...........................WtpLs..pp.hL.tsht.pWst.hs...p+p..+hh.p.hutca.phoPppppc.hptRhs.cWtphoP-pRctsRpp.ap.p.h.+pLs....spp+p......ppap.ta.ppLs.tp+pthtt..t..................... 0 26 67 86 +11137 PF11305 DUF3107 Protein of unknown function (DUF3107) Pollington J, Finn RD anon Pfam-B_3881 (release 23.0) Family Some members in this family of proteins are annotated as ATP-binding proteins however this cannot be confirmed. Currently no function is known. 21.00 21.00 21.00 25.60 20.90 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.37 0.72 -4.10 26 426 2009-01-15 18:05:59 2008-08-15 15:52:41 3 1 423 0 109 263 78 73.40 43 94.06 CHANGED MElKIGlpsssRELslpSsQos-EVpphVu-ALssss.GlLsLoD-KGR+hlVPssplAYVEIGssssR+VGFus .MElKIGlpsusRElslsos......p...os-..-lcptlspALuss.s....ulLs.LTDcKGR+hLlPuspIAYVElGss.ssRpVGFG....... 0 33 80 101 +11138 PF11306 DUF3108 Protein of unknown function (DUF3108) Pollington J, Finn RD anon Pfam-B_3856 (release 23.0) Family This is a bacterial family of putative lipoproteins. The structure for Swiss:Q64U78, PDB:3fzx, the first structural template for this large family including several homologues in the human gut microbiome and in metagenomic datasets, folds into a beta barrel that topologically looks like a small-scale porin (such as FepA). Swiss:Q64U78 is a putative exported protein, and this fold is of the YmcC-like type, with a predicted signal peptide SpI cleavage site AGAMA|QNQDC, and a Phobius server prediction of non-cytoplasmic localisation for amino acids 21-236. The possibility of it being a membrane protein can be ruled out by the hydrophilic nature of the solvent exposed surface outside the barrels. Analysis of sequence conservation suggests that an area near Glu172/Trp206 is potentially interesting. These two residues are also conserved in Dali hit PDB:2in5, a hypothetical lipoprotein classified as a new YmcC-like fold in SCOP (SCOP:159271, with a 12-stranded meander beta-sheet folded into a deformed beta-barrel) despite large structural differences between the two structures, suggesting similarity in function. 27.00 27.00 27.00 27.10 26.80 26.60 hmmbuild --amino -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.52 0.70 -4.71 135 1119 2009-01-15 18:05:59 2008-08-15 15:57:36 3 3 918 1 417 1035 746 234.00 15 85.25 CHANGED hlshhssssssusps........................................................stphpYplph.tsh.....thu...pushph......hpss....ttaphpspspssul.......h..hptthts....hhhst.tth....stpattptppsshppptp..hpFshp.ptpsh.................hptt.tp.sh.tsh...D..huthhhhphthtt.sh.........phshslhsscch.hphphph.........hup.Eplp.sstGp.hcslphp..................ttthhpp.ppshphWhu.......ss..hl.P.V+lcttt.........hG.shphpLsphp ........................................................................................................................h.......s..s...ttt..................................................................t................shphpYphph..tsh..........phu......pushph........tpss...................tsaplshp.sps.sth...................thhs...thth.ss..tslh...Ptpappptpt.tttpp.t.p....hpFshs.stpst................ttptpt.tphshtt.ssh.....Dh..hohhhpl.tt.htsssh.............shshtl....h.csc.ph..ph.p.hph.........hGp..Eplp..s.....s..s.G....p...hc..sl.....+hp.......................thhpp..ppp..hphW.hu........sshshl..P.V+lphhpt.......G.phph.l.t..t.......................................................................... 0 129 280 358 +11139 PF11307 DUF3109 Protein of unknown function (DUF3109) Pollington J, Finn RD anon Pfam-B_4077 (release 23.0) Family This bacterial family of proteins has no known function. 20.90 20.90 22.80 20.90 20.60 20.70 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.26 0.71 -4.78 28 212 2009-01-15 18:05:59 2008-08-18 12:42:32 3 1 210 0 65 204 121 181.80 49 92.84 CHANGED MlplGcslVSc-llcppFlCsLssCKGsCCVEG-hGAPLcpcEstlL-clhscV+shLpcculcsIccpGs.lpstpG-h.Tshl.su+ECsassatccGhshCuIEpAYppGhlsacKPlSCHLYPIRVp+hsshsAlNY-+WplCcsAppLGcEhpVPlYcFlKcALlR+aGcsWYp-Lcph ....................MlQlscslVS.DllcccFlCs..LssCKG...tCC.lEGDAGAPl-.-ElthLEclhstlh.s.Lsscutt.sI-cQGsshpD....p.-GD.lsTslV..s.s+-ClFssat-..........c.....G.....hshCAIE.+.A.Y.cpG.cscah.KPlSCHLYPI..Rlp..ca.s.s.ap.AlNYc.RWc.lCcsAsthG+chslPVY+FLKEPLIR+FGc-WYpELc..h.............................. 0 23 50 62 +11140 PF11308 GHL1-3 DUF3111; GHL; Glycosyl hydrolases related to GH101 family, GHL1-GHL3 Pollington J, Finn RD, Naumoff D anon Pfam-B_4091 (release 23.0) Family This family of bacterial and lower eukaryote glycosyl hydrolases is related to CAZy family GH101, and is made up of sub-families GHL1-GHL3. In the example Swiss:C02A26, the substrate-binding Asp is residue 596, the nucleophilic Asp is residue 706, and the proton donor Glu is residue 747. 25.60 25.60 25.60 28.20 23.40 24.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.10 0.70 -5.09 24 306 2012-10-03 05:44:19 2008-08-18 13:05:20 3 2 258 0 36 239 5 281.20 36 43.87 CHANGED sLpp.hGlc+halth.DsWttuha...pP-hlssuccuG................YLhusaDpYtshh....pDtahsAp......htcptulpptDGohhtthp..usGhhh...sstshsaV+pphsclhp....ththsuhFLDV.uss.sp-sa........sscHhhscppshps+ptphpaltpc.sllhGSE-GsshsspslsFsH.........asst-hhtpcpus.ahG........hP...........................lPLYphVaHDslIs..........s.........Whhsp.ph.....ss.ppchhLhslLasssPhlphs.ts.hth....p.......htphhctpphhpshHcplsppchssachlststhlQ............pop ....................................hpp.sGlp+hWlGL.ssWt.uhh...pPphVspAcphG................YLlGsYDSYpohht.u.s.csW.TAph......ssphh-pssl.pptsGph.tGFh....upGhhLN.........P..s..hths.V+pRhpcIlp....hspaNShFlDsDuTu.hh-DY........pscc...hssppp.lpAt.pRhpalsp.p.shllGSEsGNshsspslsFAHGhpo.s.htWsDpcM+ps+pSsYYlGtaa....stthPth..ah.K.s.lKt.a+......plhhsP.YpVPLYphVapDplIo..........oa.......+Wt.solKh.....ps.hssR.LhthLaNsPPhhHLs.csphpp..........+.............hpplpca.psat.hHcphhccthssFpaLscpt.VQpo.............................................................................................................................................. 0 16 23 29 +11141 PF11309 DUF3112 Protein of unknown function (DUF3112) Pollington J, Finn RD anon Pfam-B_4107 (release 23.0) Family This eukaryotic family of proteins has no known function. 23.30 23.30 23.80 23.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.06 0.71 -4.44 32 153 2009-01-15 18:05:59 2008-08-18 13:10:15 3 2 86 0 121 150 3 176.40 30 42.87 CHANGED PhsG.pphFthhhhslYsllsslllh....llsusl..hYhLs.ptappt+plhhsuulhlhlashsslsllslua.hlPpp.......................h.IESFu.h...................................................phssphhllhhoolllhluuhh.......................RssssFp..........t.hsps.u.htutsshYlh.hshElllsllYllsRlDLRFY .............PhhG.pphhthhhhslYhhlsssllh....llsusV.thahL.s..psh.pth+sl..hhuush.lhlhuhhslsllsluh.hh...Ppc..............................lEpFu..h.......................................................................phpsphhllhhoohLlhlGAsh.......................RssssFt..........p.hsps.uhhps+ssaYlh.aslEllVshhYhluRlDhRFa.................... 0 25 60 105 +11142 PF11310 DUF3113 Protein of unknown function (DUF3113) Pollington J, Finn RD anon Pfam-B_4134 (release 23.0) Family This family of proteins has no known function. It has a highly conserved sequence. 25.00 25.00 56.30 56.30 23.10 22.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.00 0.72 -4.29 3 239 2009-01-15 18:05:59 2008-08-18 13:28:53 3 1 165 0 3 60 0 59.70 73 98.36 CHANGED MQQQAYINATIDIRIPTEVEYpHFDDVDKEKEsLADYLaNNPDELLKYDNIsIRslslEV ..MQQQAYINATIDIRIPTEVEYpaasDVDKEKEsLADYLaNNPsELLcYDslpIRslslEV 0 3 3 3 +11143 PF11311 DUF3114 Protein of unknown function (DUF3114) Pollington J, Finn RD anon Pfam-B_4178 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as cytosolic proteins. This cannot be confirmed. 25.00 25.00 26.20 25.10 20.70 20.60 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.77 0.70 -5.02 10 265 2009-01-15 18:05:59 2008-08-18 13:51:55 3 3 200 0 27 196 0 253.50 36 72.05 CHANGED lGSstFppLWpttptpsss....KLLphlLshlcMPpELoG-LccsppL.....lscFSs-LuP+csFW+phuplVQpAFPsssLop...psp..lp++lHQFRYlISoQQAQaVRcHa+..ptGMTDupALApYLpt.............p+.sssYshhESARLHNKht...h..cssphl.YPDspsp.h.....NhKlLlsFHoEFILDppGpFLNplDs....................EthopNGllNGASFNYus.........+....NsspHtpLDVcPsphaDPtFRccshcu....F+SP..........p.pchphuahspKuhY ..........................................................................lGSssapplaphpthp.pt...........clL..hhthlth..cLuuplcppthL......lt+FusslsPcssFWc.huphVppAaPspphup...................spp...hs+plHQhRYhIspQphpa..lRsa....a+.....ppGt...TDtpALttYlp..................t..shchshtpSuRLHNKh......h........ptphh.aP-sts..h........NhKl.h..s...F..H.oEFILs.pp.GpFlsphDs.............................ps.......pppsllNGsSFNYus...............p..........N.scpHp.LDl.sPs........t.......haDsphR+psh+s....ahS.P..................chp...h.......................................................... 0 1 9 19 +11144 PF11312 DUF3115 Protein of unknown function (DUF3115) Pollington J, Finn RD anon Pfam-B_4191 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.10 21.10 21.50 21.60 20.80 20.60 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.06 0.70 -5.57 20 129 2009-01-15 18:05:59 2008-08-18 13:57:06 3 3 119 0 96 125 2 302.80 36 84.10 CHANGED LF+ssFpspL....................................tusc.......LpphlQsVKucLYNRDahuAFss..--hhtAYAsRWSPSRALuYuolFssLt...........phhp.lssstss...................................................s+VLCIGGGAGuELVALAula.........stsppttustsus.................................lslslVDIADWSsVVc+Lssslpos.....................h..........pscsFslsFtcsDlLshsps........................phhslhps................hsLlTLhFThNELFops.hucThcFL.+LospscsGoLLLlV-SsGSYSclplG..................pK+aPhpFLlDphLlussssp.......sssWEhlpppDSpWaRh-tp.....tlcYslt....LENMRF.h+LYR .....................................................h.t.h.....s.tp.......lpphlQtlKspLYsRDa.sAFss..pphhtAYAhRWSPuRALuYuulFtp..l............chht.htsstt....s............................................................................................................tpVlClGGGAuuElVALAuhh....................pph.st...tt.......................................................tlslshlDIAsWusVVppLssslpot..................h.............p....spphsspFtppDlLphspt........................phhtl.ht......................t.sLlTLhFThNELFops.hscThpFLhpLss...htsGslLLll-SsGSYSplslG..........................................p++aPhpFLlDphLlustttp................Wch.l..pp-ShWaRhstp..........LcYslt.........LENMRa.h+LYR.................... 0 15 45 81 +11145 PF11313 DUF3116 Protein of unknown function (DUF3116) Pollington J, Finn RD anon Pfam-B_4194 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Bacillales. 23.20 23.20 23.30 23.80 22.90 23.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.72 0.72 -4.37 6 124 2012-10-04 14:01:12 2008-08-18 14:10:29 3 1 42 0 3 60 0 84.90 44 97.09 CHANGED ME+ssccLlhpVL.hs+sssssIpcLohphlphssspsaTKNELLhslYWLEhpGYlpRsppssppR.YohTtcGchLLp+lpsph .............................MEcPscpLIhpVLphscsssssIccLol.E.hlpFsslssaTKNELLaslYWLEppGFIhRs..sps..s..p.....pR...Yo.hTpKGchLLp+lcp..l..... 0 3 3 3 +11146 PF11314 DUF3117 Protein of unknown function (DUF3117) Pollington J, Finn RD anon Pfam-B_4211 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 52.10 52.00 21.10 20.00 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.65 0.72 -4.44 9 364 2009-01-15 18:05:59 2008-08-18 14:14:43 3 1 361 0 92 169 30 50.70 73 87.12 CHANGED MKPRTGDGPhEVTKEGRGIlMRVPLEGGGRLVVElsssEAtpLussLssVs .MKPRTGDGPhEsTKEGRG.IVMRVPlEGGGRLVVELssDEAstLGstLKsV....... 0 32 75 88 +11147 PF11315 Med30 Mediator complex subunit 30 Coggill P anon Pfam-B_28118 (release 23.0) Domain Med30 is a metazoan-specific subunit of Mediator, having no homologues in yeasts. 25.70 25.70 30.10 28.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.89 0.71 -4.08 6 107 2009-01-15 18:05:59 2008-08-18 14:37:18 3 4 82 0 74 96 0 135.60 50 66.20 CHANGED NslsLsRlGQETVQDIloRhhEl...FthL+shQhssssTpppusup-+huKlQEphRol+lLF++L.RllY-+Cs-.s.sshp.sslEsLIPYtsEshsph-sp.hutph+hllQE+cEllE....pV+tKNcQL+cIlD+hR.hlW-INoM..LuMpRs ..................NssoLCRlGQETVQDIV.RThEl...FQ..hL+shQ...lPNG.sT.ppsshpDRhsKlQ-pLRplplLF++L.RLlY-KCNEss....uGh-......h..ssE..........p..LIPYh-E.sts.+p.-sc....u...Rh.sspE++E..lhE......pl+.KNpQLKpIhDphRp.lIW-INsM..LshR.................................................................. 1 24 30 52 +11148 PF11316 Rhamno_transf DUF3118; Putative rhamnosyl transferase Pollington J, Finn RD, Eberhardt R anon Pfam-B_4218 (release 23.0) Family Most members of this family are uncharacterised, but one is a putative side-chain-rhamnosyl transferase [1]. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.40 0.70 -5.16 30 99 2012-10-03 05:28:31 2008-08-18 14:48:30 3 4 64 0 36 148 27 203.30 22 69.03 CHANGED pllGLCRFSYsu....hG.GFp....assh.tcRtAhLYAPsRL-cRFphFEslsLPSLtuQTD.-FphlllhGcshPcta+sRLccLstshP...Qh+lhhpsPtp.pRpsh+csl.ptsppsssssslQFRlDDDDAlulDFVtRlRpsspsh.slh..sppsplulDFs+Galh.phssc.Glsht.thpsahssuLuhhhps.ssppolhsasHc+lhpphP.slohsstsMalRshpspNDSppphstp .......................................hhh.hRFsh.......s..................ttp.thhhs.thlp...pRhtlFEphsLPSlttQTs...s....FphllhhssshPt.hpp+Lpplh..tshs...phpl.h.......h....hs...sht....pt...p.....hhpphh....tt.............hh.....s..sss.hhphR..lDsDDAluhcFltpl+pth.t.th......tppp.hslsaspGhhh...t.t...s.......h....h...hhshhhuhh..............tt.......shht.h..s.Htph.phhs......hh......p..t.....shalpshpt.Nssp.....h................................................................................ 0 15 32 33 +11149 PF11317 DUF3119 Protein of unknown function (DUF3119) Pollington J, Finn RD anon Pfam-B_4223 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 30.70 30.20 20.80 20.20 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.72 0.71 -4.06 23 112 2009-01-15 18:05:59 2008-08-18 15:54:12 3 1 99 0 51 110 104 120.40 40 74.53 CHANGED VpLsPSaplPlllllhulsLLhl....sW...sulllulFGLFLLlQohoLRLcFTscsL.VaRuscplR...........RFPYs-WlsWRlFWshlPsLFYFRElpS.....IHFLPILFDsppLcppLcp+sss ...........................V.lpPsaplPllllhluhsLhh....lp........h.....sul.sl.uLhGLFLLhQosplRlpFsss.sL-VhpusphlR........................RFPYspWhNWclF.........Ws..hPlLhYF+EspS............................IHFLPIlFsscpLpspL.c+ss......................... 0 14 36 48 +11150 PF11318 DUF3120 Protein of unknown function (DUF3120) Pollington J, Finn RD anon Pfam-B_4230 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 27.60 134.60 20.60 19.70 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.65 0.71 -4.09 22 71 2009-01-15 18:05:59 2008-08-19 09:01:02 3 1 71 0 28 77 136 201.10 47 85.40 CHANGED AuhLVslPVFlQAPWVRhpPhuuhLhThlllsluh.Lthhpppp.hthhGsLLlGFShSWLAGslaWGWLRhcPlhHLPVEAlALPlAlsGLps+W.+lGssFYLuSLlGTAhTDlhhhlTGlMshW.pVlpAs..spA..s.lLppAutplhpPhulshlhhhAsllltluphhhptup....p.psWuhuuAVL.oTLlVDuLFLlsAll .AuFLVslPVFlQAPhVRhhPhhSllhThshlhlu..lhLhpp.sc.pthWG-LLlGFShSWLAGulYWGWLRhcPlhHLPlEAluLPhAlhGL.tppW.+lGshFYLGSLlGTAlTDlYhaLoGLMsaWRQlhps-..ss.A..s.lLpsAltpltTPhuluhsllLuhlLlhlGhhshp.pp.......ptchWuhuGAVL.oTllVDuLFhluAh.... 0 4 18 26 +11151 PF11319 DUF3121 Protein of unknown function (DUF3121) Pollington J, Finn RD anon Pfam-B_4233 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as phospholipase proteins however this cannot be confirmed. Currently this family has no known function. 20.40 20.40 20.40 20.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.11 0.71 -4.78 17 451 2009-01-15 18:05:59 2008-08-19 09:15:55 3 4 419 0 48 252 42 169.90 40 71.10 CHANGED hstsppCpp.supLpRLuCFDplhsTPhphsttt.t......psstWpRAhspEtpRsscsshhhhppst.....ssshllTs.....................sAlG..ss..p.PlLhlSClssIoRlpLhLscslpps+Vplsl......t.sshsp.Whsc-sGhlLcuGRGLPAIcplKphluuppLplco.sssslDsLpFDhssLspulcPL ...................................................................................l.pthpsCRpEsusLERLsCYD+lh..s.P...hpssshssuhst..........................hutuWpRAhppE.p.cRpuss.s.tlLlTps.Gc...tsollITo.....................PAlG.....psss+.PV.LhhSClDNITRhpl.....AL...+..s..l..c..s....pcIsV..ol......cpRslcspWh..l.R.-sGsLL-SuRGLsuI-pIKpLhuucpLhlco..sssuAspLTFslcGLscAlsPL..................................................................... 0 12 22 34 +11152 PF11320 DUF3122 Protein of unknown function (DUF3122) Pollington J, Finn RD anon Pfam-B_4242 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 25.00 25.00 32.80 32.50 22.20 21.30 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.74 0.71 -4.24 21 64 2009-01-15 18:05:59 2008-08-19 09:22:56 3 1 59 0 24 73 130 133.00 37 78.06 CHANGED Atl+ppppsssphhhR.....ShpoLRDhctpoWQllha+cscs....tpslpLRlVGFPGphclsHPpsLhlpsuppphhhs....ssshh..tpssssssuEaDLsslLspLspspPL+LpLP...sshspLsVPPaVVpEWppL .........A.l+ppp-tsGphhh+.....ShpoLRD.chpoWQlVhaKcsps....spslsLRlVGaPGphclsHPpsLplpsuptphlhs....ssshh....tpsstsssupasLsslLspLspspPLcLpLP...uh.splsVPsaVVpEW+sl........ 0 3 16 22 +11153 PF11321 DUF3123 Protein of unknown function (DUF3123) Pollington J, Finn RD anon Pfam-B_4246 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 44.10 44.10 23.70 23.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.45 0.71 -3.79 4 54 2009-01-15 18:05:59 2008-08-19 09:36:50 3 1 4 0 31 42 0 100.10 64 58.26 CHANGED VSVRTRVGKLsss...pRpLVLWLuAVVVSsA-..E..GaLsVlYKGsFP.-DPF+sVRV.ARc-sKhhsssAAsssuss.s.................st.susAP.RPTTAGKSltlLKt.h.Et ..........................VRVRT.VG+LGTo...sh+LVMWLGAVVVSDAD..D....GHLEVIYNGNFPRDDPFRTVRV.AVKDVKL.uPRPAPTPA...........................NhAAP..RPTTAGKsLPRLKMhhLE................. 0 0 4 16 +11154 PF11322 DUF3124 Protein of unknown function (DUF3124) Pollington J, Finn RD anon Pfam-B_4248 (release 23.0) Family This bacterial family of proteins has no known function. 20.40 20.40 20.60 99.30 20.30 20.20 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.44 0.71 -4.21 30 107 2009-01-15 18:05:59 2008-08-19 09:39:30 3 1 102 0 63 113 21 125.30 39 72.35 CHANGED LspGpTlYVPlYSplYoss.....cppshsLssTLSlRNTc.spslhlsplcYaDTsG+Ll+pYlspPltLsPLuosclll-EcDssGGoGANFIVcWpuspslstPllEuVMIGstusQGlSFsopGpsI .....h..GpolYVPlYSpIYpts......cppshsLosTLSlRNTs.spslhlsplcYYDosGchl+sYlcpPltLtPluohchhlscpDspGGoGANFlVcWpusptlspPllEuVMIus.tu.sQGlSFsopG+sI............ 1 20 45 60 +11155 PF11323 DUF3125 Protein of unknown function (DUF3125) Pollington J, Finn RD anon Pfam-B_4250 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Staphylococcus. 19.20 19.20 19.20 19.40 18.60 18.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.75 0.72 -4.18 5 369 2009-01-15 18:05:59 2008-08-19 09:44:08 3 3 69 0 3 125 0 40.30 58 77.09 CHANGED MIFSQNLFRRPTPThIVCRNWESNFSLLGP+.QLAp.scasF..chhLlauP ...........MIFSQNLFRpPTPs..hhTRIEK..SLLQAHF+SVNYCQYNFlccpTLIasP........ 0 2 2 3 +11156 PF11324 DUF3126 Protein of unknown function (DUF3126) Pollington J, Finn RD anon Pfam-B_4268 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Alphaproteobacteria. 25.00 25.00 29.60 48.30 24.40 19.00 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.71 0.72 -4.30 18 204 2009-01-15 18:05:59 2008-08-19 10:47:42 3 2 195 0 74 130 111 62.70 53 85.20 CHANGED ElcKL-sYL+cpFtssplpVpsRP+psDSAEVYlG-EFIGVla+D--EGEhSYsFpMsILDlD .....El+KL-AYhKRsFsNsclpVcARP+K.sDSAElYlu-....EFlGlla+D---G-lSYsFsMAILDhD... 0 22 45 55 +11157 PF11325 DUF3127 Domain of unknown function (DUF3127) Pollington J, Finn RD anon Pfam-B_4273 (release 23.0) Domain This bacterial family of proteins has no known function. However, it does show distant similarity to Pfam:PF00436, with proteins such as Swiss:D1W984 being similar to both families. This suggests that this family may have a DNA-binding function. 20.90 20.90 20.90 21.00 20.70 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.80 0.72 -3.97 29 271 2012-10-03 20:18:03 2008-08-19 10:48:04 3 1 225 0 71 260 272 86.50 36 67.45 CHANGED ElpGK.Iplls-ppshGp.sG..a+KpEhVlc.....TcsQYPpclplphhp.DKssh.sshp.sGpclcVuhslcuREas........s+aFsslpu..W+l- ............ElpGKlItllssppus.op.sG...a+ppEaVlE.........Tc-QYPp+lsh.....-hhs.DKlsp...slp.hG-clpVSFslcuREWs........s+aFNolpA..W+l-.................... 0 28 57 70 +11158 PF11326 DUF3128 Protein of unknown function (DUF3128) Pollington J, Finn RD anon Pfam-B_4309 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.20 21.20 21.20 21.20 21.00 20.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.12 0.72 -3.86 24 194 2009-01-15 18:05:59 2008-08-19 11:52:37 3 1 184 0 150 191 0 86.90 29 48.69 CHANGED hPs.....shSChpAFDphhhCa...................olGGQh+shYRYGphssCscphccFhaClpppo.....................cspclpEha+cchhpp.........ptpsSSEDlWchR ........................p.phSCtpsFDthhhCh...................ShsuQhpshYRYGchc..sCspphsDahhChch+s..h.........................................+tptlp-ha+c.+thpt.........pttssS-DlWchR........................................................ 0 41 82 124 +11159 PF11327 DUF3129 Protein of unknown function (DUF3129) Pollington J, Finn RD anon Pfam-B_4316 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 25.00 28.20 23.90 23.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.35 0.71 -4.46 23 214 2009-01-15 18:05:59 2008-08-19 11:59:31 3 3 57 0 193 217 0 197.00 32 62.76 CHANGED VsAHGslhsspGssGusss.uLulssuo...............................PRDssst.sssQtDToIhRstch.sstusshGRT.uuGsscsupsspthM...........................................................................................................uussLPpl.osGGslphThHQVNsDGAGPaos.lDsTusGs..tsapphpVTpslPGhs.t.....................................................................uhopupspDFPlsVphPushoCTGTVuGtpNVClVRspNsAtAGPFG ....................................................................................................................................tuHuslhss..pG.....s...sG..s..s.hs..ululs..ss..s...............................sRss.sp.....sshQtDoslh+ptph............s.sts......s.....s.hG+T..t.s...G..sh...s..h..sphspshh..........................................................................................................................sssslPpl.ss..sGp..lshT.hH..........QVN..t...DGA.GP..asCtlDsousGs....sap..shp..Vs.p.sl..PGts......................................................................shs.....t...u...p.sp-a.slplphPsshsCsGs.su...G....p......N....VClVRspN........sAhu.GPFG................................................... 0 84 133 176 +11160 PF11328 DUF3130 Protein of unknown function (DUF3130 Pollington J, Finn RD anon Pfam-B_4322 (release 23.0) Family This bacterial family of proteins has no known function. 20.60 20.60 20.60 21.80 20.20 20.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.96 0.72 -3.94 2 128 2009-01-15 18:05:59 2008-08-19 13:10:10 3 1 49 0 2 91 1 82.80 54 94.66 CHANGED MpEIKVcEcThppauochtppupu.sYLPhKsGNMAaSRANSIsQLRoALh-LV-sVEsFQhVscpDAoRLKphG.uaAhpDphhtphhs .MpEIKVpEsThppHATKLtScusu.pYLPhKsGNMAYSpAN..SIsphRoALh-LVDsV-sFQsVsppDAsRLKchGhuas+pDQthupchs............ 0 2 2 2 +11161 PF11329 DUF3131 Protein of unknown function (DUF3131) Pollington J, Finn RD anon Pfam-B_4335 (release 23.0) Family This bacterial family of proteins has no known function. 20.30 20.30 20.30 20.60 20.20 20.20 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.14 0.70 -5.78 24 249 2009-01-15 18:05:59 2008-08-19 13:20:03 3 11 171 0 76 236 7 243.20 30 16.30 CHANGED hphApsAWpYFcpNhp.spTGL....VNolss...asosTMWDhuSalhALlAAccLslIsppEF-pRlp+hLssLuplPLhpspLPN+sYsTpTtphssYtspPst..lGWSAlDlGRLLlsLpllpppaPpasstlspllt+Wphsphlp.cGpLaGuphtps..thphhQ...EGR..lGY....EpYAApuapLaGhsshpAhph......tsa.p.hspl.GlslPhDsRcstph.ts.s.lso-PYlLpulEhGhD.st.............htthAcplapsQcpRacpTGhlTAhoEcslspuPYFlYssl..aupGpsWsoloc...sGpphsphp....slSoKuAFuhasLa.cssYocpLhptl.ppLhs..pcGaYpGhYEssutsscuhT.hNTNullLEuLhYptpGp ...................................h.phA+psW+YFtp.sp.tp.s.sL.........s.N...p.......ttpTo.hslGsYLhullAAR-hGhIshcEh.cRlptsLsoLs+hpha.pG.cL..phYpTpThp.......h.p....Ph.................h.SAlD.GpLhshL....hhlppt............................................................................................................................................................................................................................................................................................................................................................................... 0 16 35 58 +11162 PF11330 DUF3132 Protein of unknown function (DUF3132) Pollington J, Finn RD anon Pfam-B_4348 (release 23.0) Family This viral family of proteins are 55kDa. No function is currently known. 19.70 19.70 20.50 268.50 18.70 18.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.63 0.71 -4.29 3 62 2009-01-15 18:05:59 2008-08-19 13:35:40 3 1 2 0 0 38 0 124.00 96 74.21 CHANGED SSSHYFFSKNITPTSVERNFGGVAQLEVERAKLSFETFGNKFLLKDVFMFSDQSLGDNILSYTLLKEEGHIDGMRTAGDDVLLEKDGEVVMILDSRDEGRMWIKDDVWAEVTEHGSKSAREYCM SSSHYFFSKNITPTSlERNFGGVAQLEVERAKLSFETFGNKFLLKDVFMFSDQSLGDNILSYTLLKEEGHIDGMRTAGDDVLLEKDGEVVMILDSRDEGRMWIKDDVWAEVTEHGSKSAREYCM 0 0 0 0 +11163 PF11331 DUF3133 Protein of unknown function (DUF3133) Pollington J, Finn RD anon Pfam-B_4400 (release 23.0) Family This eukaryotic family of proteins has no known function. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.46 0.72 -4.39 13 112 2009-01-15 18:05:59 2008-08-19 13:43:25 3 2 20 0 70 97 2 45.10 40 5.96 CHANGED uGGAPFlsCssChELLQlPpchhlsp+p.pp+lcCGuCSplLphSls .......hGGAPFllCpsChcLLQl.Ptchhls.p+t.tp+lp...CGuCScllphsh....... 0 10 32 55 +11164 PF11332 DUF3134 Protein of unknown function (DUF3134) Pollington J, Finn RD anon Pfam-B_4408 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 25.80 25.40 24.50 24.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.53 0.72 -3.84 16 82 2009-01-15 18:05:59 2008-08-19 13:50:37 3 1 74 0 34 81 101 72.70 40 82.45 CHANGED NPuLpc.sRppPAsVlPl+pEssLLsWLcsoGRLlsc-.p-...c..osEEEElS-Lhus--sh.c..--ss--h ...NPuLpc.sRp-PAsVlPl+cEssLLsWLEsoGRLlsc..-pp-t....chsssEE..EElStLMut--sapsc.--ps-.......... 0 5 23 32 +11165 PF11333 DUF3135 Protein of unknown function (DUF3135) Pollington J, Finn RD anon Pfam-B_4409 (release 23.0) Family This family of proteins with unkown function appears to be restricted to Proteobacteria. 20.70 20.70 20.70 23.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.44 0.72 -3.82 23 146 2009-01-15 18:05:59 2008-08-19 13:53:26 3 1 145 0 36 92 4 80.70 49 70.31 CHANGED ppLPsFD-LhtLAcssP-th-pl+cchscphIsssscphptRLcuhQh+IDthhp+sKNPhpsslhltphhpcphhchpp...sLs ........sLPsFDELstLAcccP-AF-Qh++-Ms-EMI.SAScsM.QpRLaAQQSHIDRVlup.CKNPsHsNVsLMpELppQhl+F+sAL.p.......... 0 6 16 30 +11166 PF11334 DUF3136 Protein of unknown function (DUF3136) Pollington J, Finn RD anon Pfam-B_4419 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 20.90 20.90 21.90 29.10 19.90 19.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.10 0.72 -4.55 15 46 2009-01-15 18:05:59 2008-08-19 14:02:40 3 1 27 0 17 48 116 63.50 56 84.37 CHANGED LoIGELEAsYslYCKALRhLlt-G+s.pcIcRTlCWc+LppLHpSLPppY+SP-cLhthhQ+chp .LTIGELEAGYPlYCKALRhLltcG+ohp-IcRTVCWs+LEoLpRsLPsRYKuPshLhsllpR-lp......... 0 1 6 12 +11167 PF11335 DUF3137 Protein of unknown function (DUF3137) Pollington J, Finn RD anon Pfam-B_4422 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 25.00 25.40 24.70 24.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.79 0.71 -4.51 34 227 2009-01-15 18:05:59 2008-08-19 14:23:45 3 1 152 0 68 230 31 142.20 21 43.07 CHANGED hhTlFcGllhthshsKpFpupTllhpc.........sthhspltt.ttt.......p+lcL-sPpFEctF-VYosDQlpARYlLoPshME+lltLppph.....ssslphuFhs........sclhIslsspp......shFE..sslhpslsptpplpp.hhp-lpthhsIlcpLpLs .............................................p..s.FpG.hhhhph....+p...hp...upshlhss...........tthhst.htshttth..........p+lchEs.s....pFpcpFpVausDplpARhlLoPshME.pLhplppph.....tsslphsFh.s........sclhlslsstp......shFp.......sslh...p....sl......s...p......p....tlpp......hhpclt.hhsllctLp.......................................... 0 22 42 60 +11168 PF11336 DUF3138 Protein of unknown function (DUF3138) Pollington J, Finn RD anon Pfam-B_4423 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 21.90 21.90 22.10 28.50 21.80 21.80 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.70 0.70 -6.32 4 54 2012-10-03 17:14:37 2008-08-19 14:28:28 3 2 46 0 17 61 7 491.40 62 97.68 CHANGED MK+KhlsALlAhALPGhAs.....AuosAspI+ALQAQlssLQpQhsELpsu.LAApsu.......AAuG....QuuuAAAuusP.sD........ussshTpDDlsph+pQlANtpLKVDuLs-AAsTGPIAGLSVTGYlDPTYlaNRussTSuFQFhNH-.usYsYaNSTFGDVYLDIKKTFGVGPhAPSAEITlMPNRGsG.olhssu.GusGNNIlNTAVVsVPLSsTpTF.uGLhsSFGGYEVQQSNQMLTLTHsLLYDFSDPGShIGs.GhNa..spssWAWKFllGNEQaRTtGulspTGsNA.hGpP.TpSNpsPTFTARVDYshSSALDlGGShNlGRQTLhSusspAGG...YGhtssuuuPYGsaFFsEADATYTLuDhQYNAElDYGQQQ+uAaNGGpAQWYGlSLLuHRKasssslGRMGATLRYDaLsNsKNGGGGuuIuLsusG....hDsssGFGIDssCLAsSpANGs...ECKGAsR.AlshDLLFYPTQQlTVKlEYRHDWAsptVFLRsDGSYuKSNDLLusQhIYoF .........................MKKKLICLLVAGALPGhAh......AuSTSApIKALQAQlsALQtQhKthpst.LAup.u..............uA...suGAtutss..u..sAsuAsP.ss.............G+ApAsLTsD-VopMKQQIAN.QQLKVDuLTDAAsTGPlA.GLSVTGYIDPTYlYNRAAGTSSFLFANH..E.......s.......s.....YNYFNS...TFGDLYLDIKKTFGVGPMAPSAEITLMPNRGNGITLLQNSRGsIGsNlLNTAVVNVPloA..oT.T.LVAGLlPSFGGYEVQQSNQMLTLTHNLLYDFSDPGSYlGh.GsNY..TKGsWAWKFhLGNEQYRTYGSVTQTGTNA.LGDPITTSNKVPTFTARsDYTWS..SALDlGGShNIGRQTLs.SAh..stssus.....YGsGGtAsSsYGsFFFuEADATYTLADlQYNAElDYGQQQHAAa....N...GG.h...AQWYGLSLLAHRKFNsPVlGRMGsTLRYDhLsNsKNGGG..GGGI.AL.NGNG....MD.s.usG...FGlDADCLAsSKANGGLGFEC.KGAsRQDVALDLLFYPTQQITVKVEYRHDWANNKVF.LR.NDG.S.......YuKSND..LLATQFIYSF........................ 1 2 6 10 +11169 PF11337 DUF3139 Protein of unknown function (DUF3139) Pollington J, Finn RD anon Pfam-B_4425 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.79 0.72 -3.70 12 437 2009-09-11 16:35:17 2008-08-19 14:32:33 3 3 346 0 28 180 0 81.70 40 65.70 CHANGED MKKh.hhhhllhllllo....llluhhhaFsh...ptcpshchlssYlscptlppspl...Kphc.hphshKpG..haYttV.sFKD-PshpY.Yp ...............MKKhK.h.hhlIlllhIu........lllu.....h..saaF....uhcshpt+cs..lph..lDs.....YL.p.-.Ks.lK.ccI...Ko..cK....spYSsKcG.....ha..YpcV.sFKDEPs.lTYshp............................................ 0 12 19 25 +11170 PF11338 DUF3140 Protein of unknown function (DUF3140) Pollington J, Finn RD anon Pfam-B_4435 (release 23.0) Family Some members in this family of proteins are annotated as DNA binding proteins. No function is currently known. 25.00 25.00 27.50 27.10 21.70 20.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.09 0.72 -3.87 24 152 2009-01-15 18:05:59 2008-08-19 15:01:41 3 3 123 0 77 151 3 90.50 41 67.61 CHANGED -Fp-lVNMospELccWLcT-cSpSsGpspc....GEosGHpSGRRIV-ILc....Kc+sDLoD-Dh-HM++VVuYl+RHLuQ.....pPcuc..hpcocWRaSLM ......-FpchVNMTssELccWLcoccSpssGppptt.........uEosGHcSGR+IlcILc....K++sDLo--DhcHMRKVVuYl+RHlAQ........cPsss....sp..cocWRaSLM.............. 0 26 49 63 +11171 PF11339 DUF3141 Protein of unknown function (DUF3141) Pollington J, Finn RD anon Pfam-B_4443 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 29.70 29.70 29.70 29.70 29.60 29.60 hmmbuild -o /dev/null HMM SEED 581 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.94 0.70 -6.18 28 171 2012-10-03 11:45:05 2008-08-19 15:06:39 3 4 134 0 86 231 61 521.50 42 76.93 CHANGED DAhQRolLahDlhRpRGspah-HptpshPsVLtFcaEllhDGRcLP+PVNYuLlRIlPPss........hssDs...sK.RPFVVlDPRAGHGPGIGGFKsDSEIGlAL+AGHPCYFluFhPcP.PGQTlEDVs+AEutFlccVtchHPcus.KPsVIGNCQAGWAlhhlAAhpP-LhGPIllAGAPLSYWAGh....pGcNPMRYuGGLLGGoWlouLsSDLGsG+FDGAWLVQNFEsLNPANTLWsK.YsLYuclDTEusRaL-FERWWGGahhLNstEhpaIVDNLFlGN+LusGclhsuDG.pplDLRNIRSPIlVFsSaGDNITPPQQALsWIsDLYsss--I+upsQTIVYslH-slGHLGIFVSuuVA+KEHsEhsuslDhIDlLPPGLYEhsIs-ttssssssphh.spYhlchctRpl-DItu.hscssc-DERtFssVARlSElNhslYcpFlpPaV+AhsTs.sAchhRphHPhRlphphaSDpNPhhshlsshApplRpsRpPVusDNPFlphQpthSctl.puLDtaRDhRDshtEphFhslYuuPhlpALsGhpttpcs.t+......s.ss-cRphhphchtplpssl ..................................................................................................................DshQRulLahDshRpRGsphhpH.ttshPslLpFphEhlhDGRph.+PsNYuLlRIh...ssts.....................h.h.Ds.........tK.RPalllDPRAGHGPGIGGFKt.DSElGhAL+.s.G..H.PsYFl..sFhPpP.PGQ..Tlt..DVhpActtFlccVtt...hH..P..ps...+Ps.l..lGNCQuGWtlhhlAAhpP-.lsGPlllsGuPlSYWAG.....pGpN.PMRYsGGLhGGo..W.h...s...t...Ls.uD....L.....Gs.....G+FDGAaLVpNFEsLNPusshWsKhYsLYupl....Do..E..s..t..RaLc....FERWWGGahhLstp.EhphIV-NLFlGN+LspGplhh.......ssG.pplDLRsI+uPIllFsShGDNITPPp..QAL...sWIsDlYtssp-ltstsQ.pIVYhhHpplGHLGIFVSupVA++EHpphhsslchI-hLsPGLYEhplsp.ss........ssspt.p..........pahlphctRpl-Dlpt.....t.ps-phFtsVtplS-hN.thYctahpPhlpuhsss.sAchhcthHPhRhph.hhS.c.t.NPhht.lt.hAp.lRt..pRp.sss....tcNPa.hthpphhup.lttuL-haRchRDshhEthFhslYus.shht.hh.........................................h........................................................................................... 0 21 45 70 +11172 PF11340 DUF3142 Protein of unknown function (DUF3142) Pollington J, Finn RD anon Pfam-B_4454 (release 23.0) Family This bacterial family of proteins has no known function. 30.00 30.00 30.50 30.00 28.80 28.20 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.33 0.71 -4.62 16 151 2009-01-15 18:05:59 2008-08-19 15:13:56 3 1 145 0 46 134 20 188.10 38 62.79 CHANGED ssuR.hhu.sVhLssphps..Lchs-plhppllp.ltcWQupGsplsGlQIDFDAsot+LssYstFLccLRppLPtphtLSITuLhsWssou-...LssLhs.VDElVlQsa......pGhashsthtpal.pls+ls.lPF+lGLsphG.............Eh-sshp..............ppLhssP......................WFR..............................................................shslaLhNs .............................s.ssR.shs.slhlssRhsp..Lchssplhttllph.hpcWpsuGsplsGlQIDFDAsTt+LscYssFLcpLRppLP..............s.shtLSITGLhDWspous...LssLht.lDElVlQsa...............QG.htsssphtpYLstlspLp....lPF+luLsphG.....................Eh-s.h...............p.LtssP..htthh..l........................................................................................t....................................................................... 0 4 16 28 +11173 PF11341 DUF3143 Protein of unknown function (DUF3143) Pollington J, Finn RD anon Pfam-B_4460 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 28.70 69.00 22.20 16.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.08 0.72 -4.09 23 90 2009-01-15 18:05:59 2008-08-19 15:17:38 3 1 85 0 41 88 79 64.50 43 57.69 CHANGED lGsp+ssp-.ssWtlppPsWsAplhL-pE-LtVpatpsG.....pcppRuFsYuLSRpDVEsAlhuG .LGspQspp-hssWhlc+PsWpApL.LDh--LtVpYhpuG.t..tp-hpRsFpYuLSRpDlEsAlhuG. 0 6 26 37 +11174 PF11342 DUF3144 Protein of unknown function (DUF3144) Pollington J, Finn RD anon Pfam-B_4465 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 25.40 25.10 21.60 21.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.52 0.72 -3.91 20 130 2009-01-15 18:05:59 2008-08-19 15:19:34 3 1 113 0 41 104 45 79.40 41 72.66 CHANGED sFachADpFIpLANcpsppp..c.......sGcVuuuhhaAAARFNAahuussststsphts-K-pAlcaFsspY+cMLc-NL-Dal .....Fa-RADpFIpLANphspst..c.........hGpVuA.AhhaAuARFNuahAAtshtpts-htscK-pslcaasppappMLp-NlD-Yh............ 0 5 20 31 +11175 PF11343 DUF3145 Protein of unknown function (DUF3145) Pollington J, Finn RD anon Pfam-B_4467 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Actinobacteria. 25.00 25.00 27.00 26.40 20.10 20.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.94 0.71 -4.83 17 330 2009-09-10 15:28:37 2008-08-19 15:23:36 3 1 327 0 91 220 102 153.90 50 92.65 CHANGED VLaVHSAPsALCPHlEWAluusLuttVsLcWosQPstsGshRAEhsWpGs.sGouApLASuLRGWptLRaEVTE-sosGsDGtRasaTPsLGlFaAsTsssGslllsEsRLRshhp.utt....ssh-lt.cElchsLGpAWD-ELEsFRaAG-..GAsVpWL.+pV .............VlalHSuPsALCPHlEWAluusL....sst........s....sLcWTsQPAtP.GphRAEssWsGs.VGTuupLAoALRuWphLRFEVTE-sosG.....s-GpRaSaTPsLGlapAshsssGsllVsEhRLRuhlstut.....sutpLssElsplLGpsWDsELEPYRtuGs..uu.VsWLpp.h...... 0 33 74 87 +11176 PF11344 DUF3146 Protein of unknown function (DUF3146) Pollington J, Finn RD anon Pfam-B_4468 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. 22.00 22.00 22.00 71.70 21.90 21.70 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.61 0.72 -4.55 18 70 2009-01-15 18:05:59 2008-08-19 15:27:15 3 1 70 0 28 68 130 79.20 60 91.76 CHANGED PpTTAalRVppQSasptpl-GEVpAGsFcWpFpWpFcpGc.LhVcPSLGRALIpDuLhRFLh+sDYpLEPGGDYsFTlRA+ .PpTTAalRlpcQSWppGplEGEVpAGsapWpFpWpFcpGc.LpVcPSLGRALIp-PLtRFLE+pDYpLEPGGDYpFTlRA+. 0 4 18 26 +11177 PF11345 DUF3147 Protein of unknown function (DUF3147) Pollington J, Finn RD anon Pfam-B_4475 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 21.80 21.80 21.90 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.50 0.72 -4.04 11 537 2012-10-01 21:19:26 2008-08-19 15:39:40 3 2 300 0 60 163 5 109.80 44 87.60 CHANGED hlL+FllGGhAVsluhIluphlsuK.hGGIFAsFPAVaLAAllhsGhpasupp....upplSpGAlsGMlusllClLsshhhltt.ptWthullhullsWFVuussIaplhph .........................lh+FhlGGhAVhLu.IlusplshK....hGGIhAThPAVFLs.u.l.hhh.u.hpatsp..............uhpl.SpG..Alh.Ghhu.s.l..l.slhl.T.hhl+..t.+.ta..hhuh......Ihuhl.....sWFl.ulsIFphhch............ 0 24 34 51 +11178 PF11346 DUF3149 Protein of unknown function (DUF3149) Pollington J, Finn RD anon Pfam-B_4495 (release 23.0) Family This bacterial family of proteins has no known function. 20.60 20.60 22.10 21.10 20.50 19.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.89 0.72 -4.40 14 230 2009-01-15 18:05:59 2008-08-19 16:01:56 3 2 230 0 48 142 6 38.60 43 21.33 CHANGED chWhpL.FusslGLhShhVIhhslGlhuahsthFlhKhhpssp .....hhhpL.FusslGL.SMlVIlsslGlhhaahuaFhYKhhp-pp.......... 0 5 22 35 +11179 PF11347 DUF3148 Protein of unknown function (DUF3148) Pollington J, Finn RD anon Pfam-B_4488 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 34.60 33.40 18.50 17.70 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.98 0.72 -4.36 23 92 2009-01-15 18:05:59 2008-08-19 16:04:34 3 2 88 0 41 92 86 62.20 48 59.37 CHANGED slGspV+Lhst.PYLKTADPMPMLRP.PDLVslsE.GpVluh+PtshhuVRFcRGoFLlssphL ...lGspVpllthPPYLKTA-PMPMLRP.sDllplu-.GtllsR+PtshWuVRFs+GoFLl-upYl.. 0 7 26 37 +11180 PF11348 DUF3150 Protein of unknown function (DUF3150) Pollington J, Finn RD anon Pfam-B_4471 (release 23.0) Family This bacterial family of proteins with unknown function appears to be restricted to Proteobacteria. 23.50 23.50 23.70 25.50 23.30 23.40 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.42 0.70 -5.29 25 147 2009-01-15 18:05:59 2008-08-19 16:09:22 3 1 122 0 27 96 19 250.10 31 72.16 CHANGED lLcsllllsLs.lslWoG+K+Lps.cDlth......splPPpplAoLGoK+lhDPcsLpsFsplKpcAp+hhpchGl+F..LG....GaAlPpccssplspcLcsIcscFtpt............KpsFLssY-ptlc-Whsp.s.sc.......auslI+ssss.stcplcpplsFsap..hhclpsss.......thscuLsppVsuLsspLhpElAppAcc...haccsht.........G+..pplop+sLss.L+slpsKlsuLuFlcP.plsslsphlcplLsslPc..sG.slcGsphhplhulls.hLscs ..............L-plllhcl-.hslWSGc++Lps.pDhphs....suplPPcclASLGSK+IsDPcsLpsFppLKpcApRLh.pphGlRF..hu....GaAVPps+h-plsscLscIpp-Ftpt............Kpp.FlssY-pslp-Whsc.p..Pc........auc.hIRpuh..sh-sVcpRlpFsat..hhphpPs............ttstsLsccVss..hucsLhpElAp-Apc...haccshs.............G+..splspps.lss.L+pl+-KlsuLoFlcp.chpPll-hlcphhsplPc....sG.slsss.htplhuhlhhhSD................................................................................................................... 0 10 19 22 +11181 PF11349 DUF3151 Protein of unknown function (DUF3151) Pollington J, Finn RD anon Pfam-B_4506 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 25.00 25.00 37.00 36.80 23.80 18.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.63 0.71 -4.13 18 342 2009-01-15 18:05:59 2008-08-19 16:16:08 3 1 337 0 92 227 37 130.20 54 86.91 CHANGED LhGPPPohLPs-ssApttLssGsss......ssVAAcHPsuSluWAt....LAEpALssG.............csVpAYAYARTGYHRGLDpLRRsGWKGaGPVPWuHEPNRGFLRuLuALu+AApsIGEs-EhsRCtphLcDuDPsAsspL ............................huPpPlhLPs...D...s...ssc.thhttupss......ssVAspHPouSluWAh....LAEsALts.s.................psVsAYAYARTGYHRGLDQLRcsGWKGaGPVPauHpPNpGhLRsltALA+AAptIGEsDEhsRCpshLcDs..sPpAhtsL...... 0 29 70 88 +11182 PF11350 DUF3152 Protein of unknown function (DUF3152) Pollington J, Finn RD anon Pfam-B_4512 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.10 25.10 25.30 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.34 0.71 -4.80 17 313 2012-10-03 04:41:15 2008-08-19 16:23:17 3 2 261 0 93 280 119 198.50 43 61.58 CHANGED LssG.ushspsGsGTacsVsususplGpGt.+hapYhVEVEsGlsssu..sssuFAthV-tTLusP+uWo..pssphuFp+V-sus......PDFRlpLsSstTscphC...GhchthE.sSChs.......ssRVhlNpsRWlcGAhsFpGDluuYRQYlINHEVGHulGa.sHpsCstsGtLAPlMMQQThulss..................ss..CchNsWPaP ...........................................l..sssasptGssTacsV..s...u.s.s..sps..G....pGt..+lapYsVElEsGlssss...s.suFAphV-pTLss.P.+.uWs.........cssphuFpRlsuup......................PDFRIpLsSPs...Tsc..p..hC....GhchphE....sSChss.............ssRVhINtuRWl+GAss..F...p.G...D......luuYRQYl..INHEVGHul.G...a..sH-sCstsGtLAPVMMQQThulsst.....................c.....shhCp.NPWPYP......................... 0 28 67 89 +11183 PF11351 DUF3154 Protein of unknown function (DUF3154) Pollington J, Finn RD anon Pfam-B_4516 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 24.80 24.80 25.40 24.80 24.60 23.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.63 0.71 -4.37 32 105 2009-01-15 18:05:59 2008-08-19 16:45:58 3 1 98 0 38 117 312 117.70 25 71.28 CHANGED ssltcss-sFhsNsEptuttptphpptulpQhutEFts...tuhacphhsulNRlP...........RPshuhushuLhlhA.hhcPlhFustMtuhul.............VPEPLWWLhGslVuFYFGAR..p.sKspshp ...................................................sltphh-hhh..ss-.ttsttphphpt..tthppht.t...Ehth.....t.....sh.....hcphhstlNchs...............RPhluass.hul.......hlhu..hhsPlh.....hstthtuhsl......................lP.-.sL..ahLlu..ssl.hY.huu.R..p.tKtpt.................................................... 0 13 27 33 +11184 PF11352 DUF3155 Protein of unknown function (DUF3155) Pollington J, Finn RD anon Pfam-B_4534 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 21.00 21.00 24.00 109.80 19.40 18.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.75 0.72 -3.76 6 71 2009-01-15 18:05:59 2008-08-19 17:19:52 3 1 69 0 28 65 125 88.50 74 75.98 CHANGED ARRRKRKSRRR.EGRRIL-hVPQYuIESGE-KPVTAAR+FIcupGItPPAlLlV+RNEHTTDRYFWAEKGLFGAQYVEENHFLFPSL+lL ............u+KRKRhSRRR.tGpRlLtpVPpasIEoGEcKPVTAAR+aItppGIhPPALLhV+RNEHTTDRaFWuEKGLFuAQYsEENHFLFPSLRh... 1 5 18 26 +11185 PF11353 DUF3153 Protein of unknown function (DUF3153) Pollington J, Finn RD anon Pfam-B_4513 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Cyanobacteria. Some members are annotated as membrane proteins however this cannot be confirmed. 20.80 20.80 21.10 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.28 0.70 -4.89 26 198 2009-01-15 18:05:59 2008-08-19 17:20:04 3 4 196 0 50 139 128 206.20 24 80.70 CHANGED lh.lllhLolLLoGCVchcsslshsu.c......+lpLuhplsShosphh.PWppphcpph+phtsphp.....psspsch..plpssshsup-hpphhpphhsssup..........suuls.hsss................plplpcpNaLlulcppLplslDLcsLs.lss...........LsLplsLssshshpthptss.s.st.t.......pt.plsWpLpsGclNcL-hsh.........WhhS.LGlGullIllllhluhhLpth+hph ..................................................................................................................shlLhLl.LLoGCl+lcsolslsscc.................................tpIs...hs..h..........sh...s.S.....h..s..st.s..shtsshpp+.l..p..tts.hssc.......p.s.h...thp..hsh.....s..ht-.l.phh...hs.ssp..............sssls......................lslcpss.ll.h....LphchDL.pslss.ss......................-lphslshPht.spsh.....susp....t..................p...lpW....pLpPG.lsphpsp.........................................................s................................................................................................................................. 0 12 30 44 +11186 PF11354 DUF3156 Protein of unknown function (DUF3156) Pollington J, Finn RD anon Pfam-B_4555 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 19.10 19.10 19.40 19.30 18.10 17.50 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.96 0.71 -4.86 12 216 2009-01-15 18:05:59 2008-08-19 17:21:06 3 1 212 0 21 130 0 157.70 53 85.54 CHANGED psLuu..p-t.ut...puthhphssGlplclpERscppFLhHlVSscaplpsssshsspuph+l+psGWLRRpGlsstsppGss.t..tllstLp.s.PsLtpsLtsLDa+chslstc.....supWplslE.aGASEVVsRhPuaRRYlRLssEQRhhLhuuhhthpphLp .....................RDLuGahCE+LoE...+SAlL+Lssh.spVhs+.ppKRLFMASIpSCEFcVcGsls..hPlpG+IRlHQsGWLKR.hPVlFsuuKuou....uLlsaLN.paPsLQQALSELDaRRFoLVlH.....c+cWhCSIEhaAASEVVC+MPPlRRYLRLEccQRlLLLSVlsMlsQAhs.... 3 3 4 12 +11187 PF11355 DUF3157 Protein of unknown function (DUF3157) Pollington J, Finn RD anon Pfam-B_4561 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 21.70 21.70 23.40 22.30 20.90 20.30 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.30 0.71 -5.02 15 126 2009-01-15 18:05:59 2008-08-20 09:00:52 3 2 125 0 23 84 4 187.60 46 94.64 CHANGED h+shs..llALlLls.....soAhAA-......thlTLEsGtpVpL+DDFTWEYlhhcots.................p.ss.sstssssssusssssssPsss..........ssp..hTshslupsc.ltphuKuGlcVsLssupa-us.cLsLshslospSocsVlhVclclolas-sGplLcpcclplWpuIhRMP-TYLRttppccucslhl-ss-+spap...lslcIhElco ....................................................MKphlLlAsllsh......SsaAuE.......slpLcDGR.lpLNDDFTWpYV.pETps............................tpsophuPllhusPVls...........psT..GsolsVusc+PlhQLSDSGVDVlLuuspYEpG.pLtLoouITNQSSQSVItVcltlpV.DspGsh.ccpcVTlWQSIKRMAETYLRPppuhcGKslcLsls-puQYp...lpApIppI-T................................ 1 2 4 14 +11188 PF11356 Pilus_PilP Type IV pilus biogenesis Pollington J, Finn RD anon Pfam-B_3750 (release 23.0) Family Type IV pili are required for auto-agglutination, twitching motility, biofilm formation, adherence and DNA uptake during transformation [1]. PilP is an inner membrane protein, required for pilus expression and transformation [1]. PilP interacts with PilQ which suggests that the two proteins may have coordinated activity in functions such as pilus extrusion/retraction [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.67 0.72 -3.75 91 1330 2009-01-15 18:05:59 2008-08-20 10:47:43 3 3 966 2 259 918 191 88.20 22 37.51 CHANGED lauphssttt..ss..............sssuspopLsl...pLhGll..ssssppuhAlIt..ssupppsatlG-plsG..ssplppltsD..+Vll.ppsGchcsL .....................................................................................................sssssp.sphs.l...sLtGll....st...sppuhAlIp...psspQp...shthG-p.lsu..ssplppIptD....pVll.pppG+hpp.............. 0 73 139 197 +11189 PF11357 Spy1 Cell cycle regulatory protein Pollington J, Finn RD anon Pfam-B_3875 (release 23.0) Family Speedy (Spy1) is a cell cycle regulatory protein which activates CDK2, the major kinase that allows progression through G1/S phase and further replication events [1]. Spy1 expression overcomes a p27-induced cell cycle arrest to allow for DNA synthesis, so cell cycle progression occurs due to an interaction between Spy1 and p27 [1]. Spy1 is also known as Ringo protein A. 25.00 25.00 29.30 25.60 19.50 23.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.65 0.71 -4.07 6 228 2009-01-15 18:05:59 2008-08-20 11:48:45 3 6 51 0 119 278 0 110.50 46 48.09 CHANGED p-hpAFh+LLEDslVQcFLuhDpsh+lSDKYLLAMVlsYFpRAGLhstpYs+IpFFLALYLANDMEEDppssKp-IFsahLGKs.WpphhPpFLKLRcphaspMsaRAhVSRcpCEEl.AhsPpHWlWsRDR ........................pchpAF.+Lh-.D.sllpcFLhh....Dhsh+lu...DKYLLAMVh..sYFp.RAt....h...p.Yp+lpFFlALYLANsM.E.EDpptsK.pIF.ahhG................+..........s...+sph.s.FhKhR.phappMsaRAhVSpcphEEl.Ahs...PpHalWtR-R.................. 0 37 40 55 +11190 PF11358 DUF3158 Protein of unknown function (DUF3158) Pollington J, Finn RD anon Pfam-B_3964 (release 23.0) Family Some members in this family of proteins are annotated as integrase regulator R however this cannot be confirmed. This family of proteins with unknown function appear to be restricted to Proteobacteria. 21.10 21.10 21.70 21.30 20.40 19.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.98 0.71 -4.74 14 141 2009-01-15 18:05:59 2008-08-20 11:49:37 3 1 114 0 44 138 10 148.80 45 91.21 CHANGED FpsLpQssFppLp+uA....sLKGLLpPFKGKGsL-thApQCpsLRssLhsLAp.plLsQspphPFs.LLsVcLsp.QsTuAGTTFLRWR.....phDputMGVulWppllssstTPssLlcDLauhEhQRIsLNMQISLlHoluRQAt-CAsKhApAEssYhpRhtthsss ..........psLppssa.pLpHuu....sLKGLLKPF..KGKGphpthAppCtuhR-tLhsLAp..plLtQ.AptaPap..LLPlcLsp.QsTuAG...TsFLR........WR.....phspspMGVulWpplhsss......pTP.sLlp-LauhEhQRIsLNMQISLlHoluRQAtECApKhupA-ssahpRhtt...s................................... 0 5 21 34 +11191 PF11359 gpUL132 Glycoprotein UL132 Pollington J, Finn RD anon Pfam-B_4015 (release 23.0) Family Glycoprotein UL132 is a low-abundance structural component of Human cytomegalovirus (HCMV) [1]. The function of this protein is not fully understood. 24.40 24.40 24.90 33.70 22.80 24.30 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.57 0.70 -5.03 3 84 2009-01-15 18:05:59 2008-08-20 12:50:24 3 1 12 0 0 73 0 190.80 78 86.68 CHANGED MTSSTssPooTsoosTVToATSsssosSTNlTTAa-uSTpsc.ss.lpclLuIllYCVoGoSILSFLlVLlAVLYSSCp++PGRha+FoD-EuApLLD.sDDsGupo.h...GuGSRRG.pIPAu.SSSShYQRL-stDaDE..-usSAAREoMc+DP-.NVIYF+KDGNLDTSFVNPNYG+GSPMTIESHuDDDEc..IRYYMSVYDELTASEMEEPScS.sWQIPKLlKVoTpPVTLKEPEY ......hTSSTsVPTSTSSRNoV-sATSSs.PTTuhNMTTsHESS..VH.ssRNDEIM...KVLAI...LFYIVTGTSIFSFIAVLIAVVYSSCCKHPGR.FRFA..DEEAVNLLDDTDDSGGSSPF...GtsSRRu.......ssSS.hpRLpspDapp..pptStscpp.ccDs-.N.VlYFcKcGNL.sSFVNPpYG+tSshhIESphsDsEp..I+YYhSlYDELsAptMt-PSpS..WpIPKlhKVuhp.VoL+-PEY......................................................................................... 0 0 0 0 +11192 PF11360 DUF3110 Protein of unknown function (DUF3110) Pollington J, Finn RD anon Pfam-B_4086 (release 23.0) Family This family of proteins has no known function. 20.20 20.20 20.30 20.20 18.70 20.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.92 0.72 -4.17 25 139 2009-01-15 18:05:59 2008-08-20 13:01:11 3 4 125 0 55 137 647 85.00 34 47.06 CHANGED VaVLLasAGo-sEGIHSLclsu.........cslVLMFEscDDApRYAsLLEAQDFPsP...oVEsl-c-EI-tFCpcAGY-sclVtuGFhPts..-RLLluPP-p ...............haVLlap.s.p.-sEGIa.olp.hss.........+stVLhFEscDDApRYssLL.EAp.Da..sss...sVptl-sc-ltphCppss.Yphplltts..........lhh.PPp........................ 1 15 39 51 +11193 PF11361 DUF3159 Protein of unknown function (DUF3159) Pollington J, Finn RD anon Pfam-B_4163 (release 23.0) Family Some members in this family of proteins with unknown function are annotated as membrane proteins however this cannot be confirmed. Currently this family of proteins has no known function. 25.20 25.20 25.20 25.70 24.30 25.10 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.03 0.71 -4.82 29 387 2009-01-15 18:05:59 2008-08-20 13:48:20 3 2 327 0 139 362 262 187.60 35 79.02 CHANGED hGGhpGllcSslPsllFlss.s...........htsLhhulhuAlulAsllllhRLlpRcslpsAluGhhGVuluAhlAhhoGc........A+saFLhGIhhshsaullhhlSlllRhPllGhlhuh....lpucst........sWR.ccpthlRAashuThsWsslassRhlVQh.LYhu...spsu.........hLusARlsMGaPLhslulhVoahsl+putpt. ........hGGhpGllp.SsLPsllFVls..s...........s.tuLts..ulhsAluhAsll.hlhRLl++c.slp.s.A.luGhh..GVuls.Ahl..Aaho.G.p........A+saFLhGlhhshs...hullhhlSlllRhPllGllhuh.....lpup.st.................................sW.........R..ps......s...h...h...+A...ashAThsWshlFssRhlVQh.L.Y.u...spss.........hLGsAR...lsMGhPLhuLulhloahhl+ps...h......................................................... 0 44 101 127 +11194 PF11362 DUF3161 Protein of unknown function (DUF3161) Pollington J, Finn RD anon Pfam-B_4173 (release 23.0) Family This eukaryotic family of proteins has no known function. 25.00 25.00 25.40 26.20 23.00 23.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.01 0.72 -4.08 7 30 2009-01-15 18:05:59 2008-08-20 13:54:41 3 2 18 0 17 31 0 77.50 33 35.88 CHANGED pchLpsslc.ssh.SssAhsctRuhlss.Nt.aKalt-+shp+GusVPVhIsEhhp-GLpspsQTt...s+KlsshhpYhppoaFc..llcVls ..........slp.stF.slssLh+tRu+lsslNhlaKhlh-.....+shp.K....Gu.......s.lPlhls-hhphGLpstuQTs....tKLssh......pahc..llpl................. 0 8 10 15 +11195 PF11363 DUF3164 Protein of unknown function (DUF3164) Pollington J, Finn RD anon Pfam-B_4249 (release 23.0) Family This family of proteins has no known function. 23.70 23.70 23.80 25.60 23.50 23.60 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.01 0.71 -4.91 30 235 2009-01-15 18:05:59 2008-08-20 14:24:56 3 1 198 0 53 219 8 187.30 33 91.65 CHANGED YhpDAcGpLlP.stlKslDph....RDchVpclVspActlppplscFKscsFsDluAFlsLSAccYus.+lGGpKGNlTLhSFDGpYKlphtls-plsFDEcLQAAKsLIDEClp-WopGucsEl+ullscAFpVDKpGplssu+lLuLRRlcIsDc..+WpcAMcAIuDSlpVsGSKsYlRhYER.ss..supapsIsLDlAs ..........................................ah.supGpLhP.t.l+shDth....cD-hVpclhspAttlpppltpFK.tpshs-lsuFlsL.A-cYus.ch..GGp..KGNlTLh..oaDGph+lphuht-plsFD-.c.l.ps.A.KsLI....D...-ClpcW.oc....G.u.c....s.c.l.psllppAFps...D.KpGplsssclLtLR+l..c..Ip...D...p...cWpcAMpAIs-ulpVssoKsYlRhacR.sp...sspap.IsLDlu............ 0 16 37 45 +11196 PF11364 DUF3165 Protein of unknown function (DUF3165) Pollington J, Finn RD anon Pfam-B_4331 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.00 25.00 27.50 27.00 23.50 22.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.88 0.72 -3.85 7 341 2009-09-10 15:00:51 2008-08-20 14:42:00 3 1 339 0 21 93 0 81.80 62 96.35 CHANGED MhYLIluILllhaYlFhAPcSIKsThNhluhVhllshLllLhsLuhl+IhQhPsEhFlGluMllluYaAL+DIhphspcs+. ..MVYLllGILLLLLYlFATPcSIKGTVNIVuMVslLVALLILLVLSFLKIFQLPoEIFluIAMLlLAYFSlRDIoLMPlKKp+... 0 1 4 12 +11197 PF11365 DUF3166 Protein of unknown function (DUF3166) Pollington J, Finn RD anon Pfam-B_4333 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.90 21.90 21.90 22.20 21.60 21.30 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.26 0.72 -3.53 4 340 2009-01-15 18:05:59 2008-08-20 14:45:14 3 3 45 0 193 276 0 94.50 47 14.45 CHANGED EDsuDLRCQLQFs+EEuuLMRKKhAKlscEp-chcpELpKY+ShaGDlDS...huchsuGuPcosRpttLcLcLKhsc.puNhLutKlsEL-sENR..sh+ ....Essu-L+ppLQFVcEEApLhR+phAcl-ccNcplpp...ELpK.Y+o..h...a.G.-lDu.....................sp....p.....t..u.u..ss..ss.......+p.s...t..L.p.cLKh...sc.phN.LStKlhcLphENR..hLp.................... 0 20 34 85 +11199 PF11367 DUF3168 Protein of unknown function (DUF3168) Pollington J, Finn RD anon Pfam-B_4337 (release 23.0) Family This family of proteins has no known function but is likely to be a component of bacteriophage. 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.80 0.71 -3.96 123 1027 2009-01-15 18:05:59 2008-08-20 15:06:50 3 4 854 1 179 692 99 112.10 16 88.23 CHANGED slhttLtss.s...slssllss........la.....-tsP.......ts...s.....shPalslutspspst.ssh...ssts..t..phplplcl...a...u...pss.......tps.s..ppl....ssslpsAL................t.hthhthphttspsh..pcsssh.....hh+.....ss..lph .........................................................ltthhss.........la........ch.hP........ps.......s....shPal.shs.h.p.s.hss..ssh...ssps..t..phtlplcVa.....u......pss....sps.h..ppl...tptltpsl..........................t.phh.tsh..p-.pt.......hh+h............................................ 1 46 110 141 +11200 PF11368 DUF3169 Protein of unknown function (DUF3169) Pollington J, Finn RD anon Pfam-B_4342 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.60 25.60 27.20 29.40 25.50 25.20 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.57 0.70 -5.17 11 562 2009-01-15 18:05:59 2008-08-20 15:09:01 3 1 550 0 31 240 2 235.20 36 99.32 CHANGED MKpt+p......hhRhlhhlLluullGGllGhhsuth.......hpplshsshtslthlphlupllllllhshshhhhhpshKapphhppphD-Dtsc..ph.ppt.+phphuoIlhslshllshlsllls..hllshhsssss...Lhhslh.hlhhlhhlhhplhhhKhhphl+sh+hsthsshc.hcchhtuhDEuE+ptphcpuachhhslNthlL.slhllLhlluhhTs.splhulLllshIal.....Ylslthhhhs++aa ............................MK.............hLhalhhllLG.G....hlGhh..lGh.hhuph-.........cplhhs...sh.s..hsh..ISh......lshlI.hh.l...shh.l.shh.hh.+.+.uhKa+pLhp...c...Eh.D.-Dhs-..pY.lph.Rp......lth...GoIhhslps..lhh...hlslhI.....Vl.hh..tsssth...............hah....h....h...l...hhl...hhhhhphh..hhKh..t....h..p.c.hs.hhAs.c..scchlp.t.h.DEGERphpLptsF+hhht..s.hlL..hhhlhlh.lhSh.hT.G..Q.hhuh.L....Llh..AIal.....Y.shh.L.hs+Raa............................ 0 4 12 24 +11201 PF11369 DUF3160 Protein of unknown function (DUF3160) Pollington J, Finn RD anon Pfam-B_4384 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 38.00 34.70 21.10 20.80 hmmbuild -o /dev/null HMM SEED 631 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -12.86 0.70 -6.20 17 97 2009-01-15 18:05:59 2008-08-20 15:47:45 3 6 73 0 39 99 5 586.80 29 79.38 CHANGED sLspsshshLp+NGFVVl.........ts..ssppcplsph........YcsLc.p.........slPlFIToDSlLHlYHIQFD-TL+plEcccFastLWclspsLLssSlcsYs.....suoGc...+EAARRNsAYFuVAhsLLp..Pc..pht........................tpaphElPuhV+p-VEAELsLI-AppGhshSPIF....p......YpE...DYSQYlPRGHYT+S-pLpsYF+AhMWaGRhShLLcss...........p....................stp.....-AplQTlp....AhLIosphc.pcpcLhccWcRlYslTAFYVGhSDDLGPYEYhcALcsVFGsp....tsshsspslpcL+scLtchcs...PcIYGGTGthh....hss-ttsppLcsTcGFRhMGQRahPDSYlhpsLl.........................s......RhaPpGLDlMulLGS-RAtphL.cphs..SsapsYstpappL-sEFsuhsst-WN+NLYWuhLYuL..pPLhpsassGYPTFMQTpAWpDKpLsTALASWTELRHDTILYAKQsYs....psushs.pE........psshGYVEPsP-FYsRhLALT+MTpsGLs-hclLDcpuc..........pchppLcshLp+LhpISpKELENKcLT-E-Y-aI+sFGpplpsh..............htsVDtcspposlVADVaTss.....tstVLEEGsGhlDhllVAYc.sDGRlhlusGPVhSYYEFhQPhu-RLTDEcWR-ML.pspsPE..+PEWs .........................................stt.hthLtpNuFslh.........................s...p.pplhph........Yc.ptchp.....................phP.FlToD.hLchaHlhFDphL+plEcpphhstLhclsp...shhptp.hpphp.....p..pspp..hcpsAtpshs.aFulAhtLLp....................................p.plPtthppt...sptElph.Iput....ps...t.Sshh.s............hp-.....DYS.apPRGHYT+s-..pLppYF+uMMWhGphsFhhpsp......................................tphhp....Ahlls.phhp.....p.phhchWpplapshsFhhGpoDslshhchtphlppshstt.......h...hsppth.p......phh...tpl.tp..h.tt...spI.s................................p.ppstsaR....hMsQRah.Du.lhppLs...................................................ststt..RshP.pGLDl.hAshGocpAtplL.cph.t...ppap....pYptph.pplcpphtshstp.t.hppslYtsWlhsL....tsh.hpp......hs..pshPsFMpotsWppKpLsTuLuSWsEL+HDTILYuKQshs.......EhG.sss...t..............shGY..VEPssphap+hh.......sLsphstps.....Lp....p.....hsh......ls-ptp..........tphpplpphhphLhpIucKELp.sp..pLop--YchIp.h.Gs.pl-.h.................................hps.sts.spphslVADVhTss........ts..tlLc.GsGhstplhVsh..hsGpl..hlspGsVhSYYEFhpsh.s.c.RLTDEcWpchL....ptpsst.....hPtWh............................... 0 23 28 33 +11203 PF11371 DUF3172 Protein of unknown function (DUF3172) Pollington J, Finn RD anon Pfam-B_4527 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 55.60 54.20 20.20 19.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.78 0.71 -4.68 10 83 2009-01-15 18:05:59 2008-08-21 09:45:08 3 2 78 0 41 83 146 136.60 53 66.78 CHANGED FNhuTlA......lLGGVhVLGIGIGluhSSTsohsPpNVASpEhIDpuAPssElCVQaGASAhVhDhRlFlTLNPFsVaVoQPsMQPGCVLRRsNWslLEQcpLlou-QlR-CKsRMNTFGYsGsL-u.cPcIcClYQN-uApNLF ........hNhsTlAlLAGlhVLGIGlGhuhoSTsshsPpNlASp-hlDpusPsPElChQaGASAhVhDhRlFlTLNPFslaVoQPshQPGCVlRRsNW.ulLcp.ctlloscQlR-CKpRMNTFuasGsLcs.pPplcClYQs-supNhF.................. 0 12 30 39 +11204 PF11372 DUF3173 Domain of unknown function (DUF3173) Pollington J, Finn RD, Bateman A anon Pfam-B_4543 (release 23.0) Domain This family of proteins with unknown function appears to be restricted to Firmicutes. These proteins appear to be distantly related to HHH domains and are therefore likely to be DNA-binding. 25.00 25.00 25.20 29.10 21.60 24.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.66 0.72 -4.36 20 326 2012-10-03 02:11:09 2008-08-21 10:35:08 3 1 232 0 31 166 3 61.50 41 86.57 CHANGED ptTlo+pDLIclGappppApsII+pAKplhVp.+..............GasaYsNKRLshVPsulVEE.lLGlpl ........tTlopcDLlpl.GaspppApcII+pAKplhVp.+..............GasaYss+RLshVPtplVEE.lLGlpl................................. 0 5 15 23 +11205 PF11373 DUF3175 Protein of unknown function (DUF3175) Pollington J, Finn RD anon Pfam-B_4566 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 21.00 21.00 22.40 22.40 18.10 17.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.80 0.72 -3.96 14 130 2009-01-15 18:05:59 2008-08-21 10:43:47 3 2 128 0 55 117 6 83.60 60 71.96 CHANGED +WSpcVTE+SsALDLEtsVFp.cDPccIAtSLKRSA-pScRRKuoPFpSAMSMLsFYINRAG+sLscspRpsLEcAK-ELRctFGR ...............+WSpcVTcpSD.ALDLEtslFcpcsPccIAtSLK+SA-cScRRKusPFQSAMSMLsFYINRAG+sLscsRRpsLEcAKccLRcsFGR......... 0 12 29 40 +11206 PF11374 DUF3176 Protein of unknown function (DUF3176) Pollington J, Finn RD anon Pfam-B_4567 (release 23.0) Family This eukaryotic family of proteins has no known function. 21.40 21.40 21.50 23.40 19.70 19.60 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.31 0.72 -4.19 34 177 2009-01-15 18:05:59 2008-08-21 10:46:33 3 4 49 0 146 180 1 104.10 27 17.65 CHANGED llulLhhhcs+shss.Wsh...hloLNsllSlLoTlspushhhslupuluQLKWhaFp.ppp...p.......LsDhphhDsASRG.saGulh.lL.....hphps+.pluslGul....lhllu.luhsPFsQQllp .........................lslLhhhcspslsp.Wsh.........hlohsslluhlsslspuslhhslupuluQhKWhhap...pt......p.......LtDhphhDpASRG..shGuhh.lL..........hphps....t...t........lusluul....l.hl....hs.hhhsPhhQphl.s................... 0 21 72 113 +11207 PF11375 DUF3177 Protein of unknown function (DUF3177) Pollington J, Finn RD anon Pfam-B_4580 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 25.00 25.00 74.10 74.00 21.10 20.60 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.43 0.71 -4.85 22 74 2009-01-15 18:05:59 2008-08-21 10:54:46 3 1 72 0 29 77 136 186.90 44 95.59 CHANGED hcsLVWLsYRLAshFslulPLlLLIWAhh++tsAl.RLLsIYW+VuSLLsIolhLhhsph............PlualouhlA.lLhslSl....WFWVDLNEELtDhPs.pPLsLsh+hWRWulTsaulluhhhshssLsCshshs....tss.CpsWLEsP.........lFchlFtushotuhhuFlGhluLlhYllsLlpalllRLP+QGRsAst ......psLlWhDYRLAllFslhlPLlLLIWuhhc+.culsRLLhIYW+VASLLhITlhLhlsth............slualouhhAplLIslSl....WFWVDLN-ElcDhss.psLtLshpsWRWAlThausluhlhplshLsCuhshs....sss.CplWL-sPh........hac.hFtushssuhhuFlGhluLllYllhLh.alllRLs+QGRsAh.t...... 0 5 19 27 +11208 PF11376 DUF3179 Protein of unknown function (DUF3179) Pollington J, Finn RD anon Pfam-B_4591 (release 23.0) Family This family of proteins has no known function. 19.30 19.30 23.00 22.10 18.70 17.50 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.62 0.70 -5.22 35 131 2009-01-15 18:05:59 2008-08-21 11:11:21 3 4 102 0 49 143 88 243.70 28 77.52 CHANGED P+DGIPAls.....sPpFhssspssh..lcss-sVlslplsGp..sRAYPh+ILsWHEIVN.Dpl.uGtPluVTYCPLCsouhs.F-cplss..tslsFGVSGpLhNSshlMYDR....pT-ShWpQhhGpAlsGsh....sGppLcplPshhpoWspa+pcaP-u.hVLupssuapRsYup........sPYss.........YDsssp..h......lassthsscc..lssppcVlGl.shtst.thAashspltpts.....t.plsspslslsacsstsS.....................thhsups....................................pclsphssF..WFAWt.....A..FaP-osl .....................ssIPuls.............pPpahs.stpsp...ltsp-.Vhsl.thsGp..s+AYPhphlhaHElVN..Dpl.ushslslTaCPLssouhs.a.cpph.ss.............t.hpFusoGhLhpushlhaDR....pTpShWpQhhGp..AlsGsh...........tGppLp.lP.shhpoWtpappt..aPcs.hVlsps......p..s.h..tRsYsp...........sPYts................Ytt.tt..............hhsst..spp..hsshphVlult.tt...th..........Aashstltptt.......plsttslhhhhpss.ts....................................h.................................................................................t.l..t...h..hFua........s..FhPps................................ 0 16 36 49 +11209 PF11377 DUF3180 Protein of unknown function (DUF3180) Pollington J, Finn RD anon Pfam-B_4592 (release 23.0) Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently there is no known function. 24.50 24.50 55.10 55.00 24.40 24.40 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.77 0.71 -4.31 30 431 2009-01-15 18:05:59 2008-08-21 11:16:20 3 1 428 0 112 293 37 137.30 29 82.88 CHANGED LlusAlsuAlluall..lhtha.tshPs.lshhsuloLhllAllplhhAhhlRp+l.....husspptlcPltuARslsLAKAoAhsGAlluGhasGhhlalL.ptspltsuus-shsusluuluulALllAGLhLE+sC+lPs ...........lssslsuAsluahh..lhthh.sshPs.lPhhsslslhllAlsphhhAhtlRs+l...............hsssppt.lc.PhtuApsLsluKAuAhsGAllsGhahGhllhhl.tphplssstpcshsssluuluulAhssAGlaLEpsC+lP... 0 35 82 104 +11210 PF11378 DUF3181 Protein of unknown function (DUF3181) Pollington J, Finn RD anon Pfam-B_4595 (release 23.0) Family This family of proteins has no known function. 19.70 19.70 20.70 27.90 18.40 17.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.92 0.72 -3.97 23 76 2009-01-15 18:05:59 2008-08-21 11:23:09 3 1 75 0 34 78 96 87.90 43 84.25 CHANGED lccLsusIuD+lYlplupWHLYLGDAtLupsLA.Eshshl-pG..upsAAcpuL-ulpVslGGGpspLPLu+LlPsuQlp-LEEILEpa ..lcsLAupIG-plYIDlApWHLYLuDA.........+......LppsLAcchhshlpps...hscsslppsLcslpVtlGGG+pplPLh-LlPspphhcLh-lLEca.. 0 7 23 32 +11211 PF11379 DUF3182 Protein of unknown function (DUF3182) Pollington J, Finn RD anon Pfam-B_4440 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 26.00 26.00 26.70 44.20 25.40 25.90 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.98 0.70 -5.76 15 90 2009-09-11 05:33:34 2008-08-21 11:34:51 3 1 85 0 36 90 3 331.40 45 93.94 CHANGED hVlspsscstssp..HEtsTpttLucplApLhGhpauGshc......sstptsu..sYlVPscTLVs.spAppLGlpuscDLaGGlVP+uFlATKAIoHsLspssAssPsGWopsFupplpssVLPGaosFShsDARpAut+LLtsGsVRlK.spusGGpGQpllcssspLDstLuuhssspLtspGLVLEccLcpspTaSVGQlpluGllsSYaGsQpLThsssGpcVYGGScLhVVRGua-ALLtLsLsspsRpAVppApsYDpAAttsY...PuhhASRRNYDVA....pGhDupGtsR..sGVLEQSWRlGGASuAElAALcuFppDPslptVpAuT+ElYG.-stlPssApllYRGsDspsG.LoKYsp ............h.Vlsasscsts..sp..HEhtspttlAcplApLhGhpauGthc........sshttsu..hYhVPscTLss.tpAtpLGlpuspDLhGGlVPHuFlATKuIsHPLstss.ussPtGWsttFAppltsslLsGaosFShsDAppAutpLLt.pGsVRlK.spusGGpGQhllpDhspL-thLsuhs-ptLtppGlVLEcsLppshTaSVGQlplsGhhhSYaGsQp.TtsspGpcVYGGScLhVsRGuapsLhsl.sLsp.thRtAlppAphaDpAsttsa...PuhhASRRNYDlA.......pGhDupGt.+..sGVLEQSWRlGGASuAElAALpAFttDPuLptV+AuohElYt.s..hPssAplhY+GsDspsG.lhKYh.......... 0 4 13 24 +11212 PF11380 DUF3184 Protein of unknown function (DUF3184) Pollington J, Finn RD anon Pfam-B_4192 (release 23.0) Family This eukaryotic family of proteins has no known function. 19.30 19.30 19.30 19.30 19.20 19.20 hmmbuild -o /dev/null HMM SEED 691 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.37 0.70 -13.36 0.70 -6.59 4 258 2009-01-15 18:05:59 2008-08-21 11:50:57 3 9 118 0 96 260 13 177.60 23 32.61 CHANGED lssR....DRETDELRHSLRSVEQHVRWHRGRVVhVSPGHHPsWVDGAKNFLAGhCGuARVQALRSSGTHLRVTTVHQDAVMPYuhRLTVDSHsIEQpLWRVRNhTsVHVYMNDDYFVNRDVAITDLhNEYGGTIVRTE+GhltcGhpGssS.uoWsEGVtNTpLFNhhELDlpHEDaLPcsLl+pWpp......t.sQ.sspulhp.....hpc.lPs.............................luhschlssA....aspsshSpP..PTuhP+.R.R.....aYATHAPFVYCTNMaRaLssRYptEhAtsphp+RtRSApDLaVPFlYNAFIMARPWQASP+FLPYLhpL+puh+ttcscAh............................s..sPPhp...............IhL-N-DGCAPATLhts..ASEslauKFssNlctNcchlccVppssPLaFNINAGFooscAADQLRcFLHGhFPTPVYLE......................oSuuGsA...pptsLSRLFGDLMALPVVsVVSYEEGVCPLVRSLALAFAGHHRGGV+VpV-p+G....sAsLtEsRtsLsHRVhSAMPssACTYtcpVoVcsutRGESlAElARRAhsth....tGGVELPuTCGuGGAGLRVRGFVVDARTRusPlRSssALhcALAVPAQTLSLEDFRAVAVGPSEtDVVLVVSREDA-AKAVHWVNGASESDLLlTYPLPVEAYEDMsAEVRWS ............................................................ts...............................................................................................................................t..h...s..............Th.sS.sIE..lapl..shst.alYhNDDhhh.p.l..tphh........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t................................................................................................................................ 0 54 64 83 +11213 PF11381 DUF3185 Protein of unknown function (DUF3185) Pollington J, Finn RD anon Pfam-B_4606 (release 23.0) Family Some members in this bacterial family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently no function is known. 21.40 21.40 21.70 25.70 21.20 21.20 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.83 0.72 -4.21 19 111 2012-10-03 00:20:40 2008-08-21 13:37:02 3 2 105 0 46 85 6 58.90 45 81.59 CHANGED KllGlsLllsGlsLhhaGaphhsShuSphochhTGssTccshhhhlGGsVusllGlhhl ..+hIulALlVuGlVLLYFGh....QuacSlsssloRhFT........GoPo..sKTlhllsGGsVAsllGLhtl..... 1 13 27 37 +11214 PF11382 DUF3186 Protein of unknown function (DUF3186) Pollington J, Finn RD anon Pfam-B_4607 (release 23.0) Family This bacterial family of proteins has no known function. 24.30 24.30 24.40 24.70 23.60 24.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.82 0.70 -5.29 25 294 2009-09-11 00:08:05 2008-08-21 14:08:03 3 3 288 0 100 240 10 292.40 28 96.48 CHANGED MIshRaHllSlsAlFlALAlGlllGssh............LpsslhssLpschspLcpctspLpsptpslpppssss-satstlsstllsspLsu+sVsllphssusssssculpchLppAGAsVsuplsLp-pah..sssss-clpolsss.hhssustlsptss-sus...sthlu.slh.s........stssss-csslLssLpcss..hls..hp...sssshsusssllls.......Gsssss...tsssttstslschspuLsstGtus.....Vl...sGtptousssuhluhhRu....tssVSTVDslDsshGclsshLALtptl.sGt.sGpYGsussAsulhPs .................hIshR.HslSlsAlFLALAlGllLGush............hssslhssL...p...pphp.s..LppphspLps.ptsth.ppplssucsa.sppluspllpssLsu+sVsllph.ss.uscsDhsulschltp..AGuslsuplsLs.ppFh...sssps-pLpsllss.hhPs..GspLssphh.-tus.s..Gph..Lu.sLh.s...................................stsspspcsslLssLp..-sG...als....ap........ssthtsAsssVlls............Gss.ss........sssttstslschutuLsstus..us...................ll...sGp.t.ou.spsu....hlushRu.........sstlSTVDslDpt..sGplsshLALtp.l..sGt..sGpYGsutsApulss................................................................. 0 46 80 94 +11215 PF11383 DUF3187 Protein of unknown function (DUF3187) Pollington J, Finn RD anon Pfam-B_4660 (release 23.0) Domain This family of proteins with unknown function appear to be restricted to Proteobacteria. These proteins are likely to be outer membrane proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.90 0.70 -5.31 18 149 2012-10-03 17:14:37 2008-08-21 14:09:55 3 2 139 0 32 161 75 297.70 39 89.81 CHANGED hhhllhhhsshss.sssssshshuPhhshsQoPlpshtLoPpLRSuhsLssuphEhhhotohASlWu....po-phhhDY.psphsluhcaphssphplclphpahhtusspLDuhlpsFHDhFGlsQsGRscssccpaplph..s.ptGhphpsFpGcol.tuulolh....lpaplhpstpc..ulSlGuoLhasssssupFpss.uh-.ulplsauhpts.pash.......auslGhsahssspshhs....h+stshthuhGhcaphhspasLlsphchapGhh-ss..schucsSsElshGaRYhh.pssAlElullENlhNhDNSsDluFplGhRaph ....................................................................................................h...h.hhhhhhshsshssphpaGPhhsYAQuPltutuLoPpLRsGFoLP.sp.Elauuh.ohASlWu.........cossYthDaYpNQhslGl+WQhsscWQhELsYRas.AhsNpLDulThuFHDhFGlsQNGRDpVc+cRFpIsh..P.cptlthcDFsGsTL.suuholY....spYQlasscpH.....GLSL..Gu..oLYYNp..sGhhchs.....pFE..QuLQLNYoYpp..s....sHpl....auhlGl.s.a.+sssss..s....lsh+.cs...oh...s...husGYcYplsspH.pLhspa+..a.Y..QGss.-us....pEhp...cs...us.Ehl..L.GYRYhh.ssSALEluls....ENl.hssDNSTDIAhpluYRa+.................................................... 0 10 20 27 +11216 PF11384 DUF3188 Protein of unknown function (DUF3188) Pollington J, Finn RD anon Pfam-B_4573 (release 23.0) Family This bacterial family of proteins has no known function. 24.60 24.60 25.00 24.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.14 0.72 -4.39 33 165 2009-01-15 18:05:59 2008-08-21 14:10:22 3 1 164 0 20 53 76 50.00 44 76.71 CHANGED luAPhLIhlul.luhh...pRpGsD+ltulPsllsGh..uLllpusVsRth+R++ ..NuLFLsSIGhlIlLa...ohsspsthhsL.uLsTGl..hllslGshhhh+t+K.......... 0 5 9 17 +11217 PF11385 DUF3189 Protein of unknown function (DUF3189) Pollington J, Finn RD anon Pfam-B_4499 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes 20.40 20.40 25.90 22.30 19.40 18.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.56 0.71 -4.64 23 202 2009-01-15 18:05:59 2008-08-21 14:20:44 3 2 152 0 63 128 0 145.10 44 89.91 CHANGED hlIYpsaGGsHSSslAAuIHLspLPhc+hPspcElhplPha-php.pp-hGplhYhGpDEhGNcVYslGhts.tphltsslcshh.plhphp...ppcllllso..hlshhh+lGGahSR+htls.hGRPllshGhppsY.plsplVcpsKpp .......................................hhIYpsaGGoHSoslAAAhHLscLPsD.Rh.oKEEILslsaFsKLp.scDhG+llFhGhDEtGN.VYolGsts.u+lVlPAh+cLh.clLppp...pEcIlhssToPsVsl.MphGGhhSRRh+lshIGhPLllaGsphshcsl.cLVphsKc.t................................ 0 37 50 58 +11218 PF11386 VERL Vitelline envelope receptor for lysin Pollington J, Finn RD anon Pfam-B_1349 (release 23.0) Family VERL, the egg vitelline envelope (VE) receptor for lysin, is a giant unbranched glycoprotein comprising 30% of the vitelline envelope. Lysin binds to VERL and creates a hole as VERL molecules lose cohesion and splay apart. These proteins are important in the mediation of fertilisation [1] 25.00 25.00 26.50 26.80 23.60 22.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.65 0.72 -4.01 5 193 2009-01-15 18:05:59 2008-08-21 16:03:27 3 6 11 0 0 191 0 76.70 70 39.52 CHANGED VPITRESGINMMhIHYspNcosDSPGMCVFtGPYSVPKNDTVVLYTVTARLKWSEGPPThLSIECYMPKSPV..APEPEA ......VPITpEpGINMMLIQYoRN..ch..L.DSPGMCVFWGPYSVPKNDTVVLYTVTARLKWSEGPPT.LSIECYMPKSPs..APKPE....... 0 0 0 0 +11219 PF11387 DUF2795 Protein of unknown function (DUF2795) Pollington J, Finn RD anon Pfam-B_1395 (release 23.0) Family This family of proteins has no known function. 21.00 21.00 21.10 21.00 20.70 20.20 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.80 0.72 -4.09 58 486 2009-01-15 18:05:59 2008-08-21 16:11:08 3 5 367 0 217 407 134 44.80 39 51.93 CHANGED L..pshcYPusKppLlppA+cssAss.c.llcsLcplP.D+c..Ysoss-V ......LpsssaPAoK--Ll-hAt+sGAs..-.Vl-sLppl..Dcs...Y-ohp-l...... 0 58 141 184 +11220 PF11388 DotA Phagosome trafficking protein DotA Pollington J, Finn RD anon Pfam-B_001493 (release 23.0) Family DotA is essential for intracellular growth in Legionella [1]. DotA is thought to play an important role in regulating initial phagosome trafficking decisions either upon or immediately after macrophage uptake [2]. 25.00 25.00 37.20 33.00 20.20 18.20 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.32 0.72 -3.99 2 138 2009-01-15 18:05:59 2008-08-21 16:11:59 3 1 11 0 2 137 0 103.60 83 27.67 CHANGED FDosTGLDcSsFDPsQLTKPFGKTCQssYuLLCsWFQNKSDKLlQIQSLIsGsPALuQDGVKQPDLSssPpRt.VpGPhSSTVYGFlNNSMMVQLPGQPGIKPLT ........................................FDSsTGLDsSsFDPsQLTKPFGK.TCQGTYALLCTWFQNKSDKLVQIQSLIN.GsPALSQDGVKQPDLSPsPpRthVEGPLSSTVYGFlNNSMMVQLPGQPGIKPLT. 0 1 1 2 +11221 PF11389 Porin_OmpL1 Leptospira porin protein OmpL1 Pollington J, Finn RD anon Pfam-B_001515 (release 23.0) Family OmpL1 is a member of the outer membrane (OM) proteins in the mammalian pathogen Leptospira. Specifically, it is a porin [1]. 25.00 25.00 49.90 48.90 19.00 18.20 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.82 0.70 -5.34 3 83 2009-01-15 18:05:59 2008-08-21 16:12:22 3 1 36 0 4 74 1 247.50 83 87.46 CHANGED ITKDGLDAATYYGPVRSTDTCTVGPSDPTCVQNPuKPAGEGNYlGVAPRKAIPAENRLITLDRTTGGlINARSTKGAMVGGNLMVGYESDFGKYFFWRVAAEYTQKISGGITKADIAGYNIVDMTWGFSSIVIPATVGIKLNVTEDAAVYMGAGLNYFNGGWSLNGSNNIKGGHDILAAAGAGSVANLLSDGTDPITTREHVRFRTSGIAPNFLIGTQARVTDKGHVFLELETIMSAAYAVGKTQSlGGAsTLuPFPAYPIVVGGQI .........................ITKDGLDAATaYGPVRSTsTCT.VussDPTCVQNPuKPsGEGNYlGVuPRKAIsAENRLITLDRTTGGhINARSTKGAMVGGNLMVGYESD..FGK.YFFWRVAAEYTQKISGGITKADIAGYNIVDMTWGFSSIVIPATVGIKL.NVTEDAAlYMGAGLNYFNGGWSLNGsNNIKGGaDILAAA...Gss..oVANLLuDGTD........PlTTREHlRFRsSGIAPNFLIGTQARVTDKGH.VFlELETIMSAAYuVGKTQShGGAosLuPFPuY........................... 0 2 3 3 +11222 PF11390 FdsD NADH-dependant formate dehydrogenase delta subunit FdsD Pollington J, Finn RD anon Pfam-B_1352 (release 23.0) Family FdsD is the delta subunit of the enzyme formate dehydrogenase. This subunit may play a role in maintaining the quaternary structure by means of electrostatic interactions with the other subunits [1]. The delta subunit is not involved in the active centre of the enzyme [1]. 20.70 20.70 21.20 44.80 20.50 19.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.05 0.72 -3.88 52 250 2009-01-15 18:05:59 2008-08-21 16:12:50 3 2 245 0 113 242 44 61.60 45 72.72 CHANGED +LlcMANQIusFFpo..scsculsslAsHlp+FW-PRMRcpLhsh.lp......t.uuts.LsPlVhcAl .....+LlcMANQIusFFpoh.s+c..-AlsGlAsHIp+FW-PRMR+pLhshl-......s..uut...s..LsPlVhcAl.............. 0 21 62 82 +11223 PF11391 DUF2798 Protein of unknown function (DUF2798) Pollington J, Finn RD anon Pfam-B_1194 (release 23.0) Family This family of proteins has no known function. 30.40 30.40 30.40 30.50 29.90 30.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -8.73 0.72 -4.22 84 363 2009-09-10 17:28:17 2008-08-21 16:15:59 3 2 310 0 117 347 466 59.20 27 71.18 CHANGED llMShhMuhl..hoslhohhsh.Ghs.s..sFhtpWhpuahlAaslAhshsl.llsPhlp+lstpls ............hhhuhhMuhl...hShlhohhsh.Ghss.....talttWh.p.uahlAaslAFsssl.llhPll++lsthh....... 0 24 71 99 +11224 PF11392 DUF2877 Protein of unknown function (DUF2877) Pollington J, Finn RD anon Pfam-B_002434 (release 23.0) Family This bacterial family of proteins are putative carboxylase proteins however this cannot be confirmed. 25.00 25.00 26.70 26.50 22.60 23.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.31 0.72 -3.85 21 929 2009-01-15 18:05:59 2008-08-21 16:23:37 3 3 528 0 50 363 13 109.10 43 39.83 CHANGED hlGhG.GLTPSuDDhLsGhh.uhhhh.t...pshpphht.htphlths..pTTtlStphLppAhpGphspsltpLhtslhpst..spstpslcplls.lGuoSGsDhLhGlhhuhph ................hlGhG.GLTPSuDDhLoGhl.sshahsu...........tsucphptp..htp..s..ph.p.sTTtlSsthLchAhQGhhspslh+F.....lHsl.ts......t...............ps......s.pAI-...plht.lGpoSGsDhLhGhhhGppL.......... 0 24 36 42 +11225 PF11393 IcmL Macrophage killing protein with similarity to conjugation protein Pollington J, Finn RD anon Pfam-B_002787 (release 23.0) Family IcmL contains two amphipathic beta-sheet regions, required for the pore-forming ability which may be related to the transfer of this protein into a host cell membrane [1]. The icmL gene shows significant similarity to plasmid genes involved in conjugation however IcmL is thought to be required for macrophage killing. It is unknown whether conjugation plays a role in macrophage killing [2]. 21.00 21.00 21.10 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.11 0.72 -4.11 23 289 2009-01-15 18:05:59 2008-08-21 16:26:36 3 3 199 0 37 173 10 101.70 27 48.53 CHANGED su+Ih.l.Ph-pPthspstlhsaupcslhpuashDassYcsplsphtsp.FTppGats.ahsuLppSshLcsl+sp+hslsusss..usspllppshh..tshhtWplphPlpl ...................................shppshhspsthhpauspslspuFshDa..spacppl.ssltst.aos.cGatt.ahs.AL.p.s.SsllcslKcc+h.sls.sss..Gsullsppsph....tshhhWphphPlhh........... 0 19 23 32 +11226 PF11394 DUF2875 Protein of unknown function (DUF2875) Pollington J, Finn RD anon Pfam-B_002814 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 20.70 20.70 21.70 22.10 18.00 17.90 hmmbuild -o /dev/null HMM SEED 451 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.49 0.70 -5.92 8 205 2009-01-15 18:05:59 2008-08-21 16:27:43 3 2 67 0 20 260 0 232.10 34 83.25 CHANGED AppuppYsLElRGlGlslsp.pQpcIW+cIccKssNauSlhSQcPcDYssS.soRps-hclsspsAFKauApcuV-YWPlPVhllGPP+shccua..RAAusIsusRppAoLGVTLFLWQ-DtNTscupuMlE+LFsFFDsHPDVPpALlhSpDGslsRshhRsPG...oss..psuphVPshPDSMsAlLVoRSDRVDRhIRPYAV-psEtlspssTpaDlsKLWNFaW-psc.....uFssaYE..AspKptGscsP.ususMSosaWQupLPsLWKTIuNKGPGpFcPSPalPVRWspWQVKpFDsAPlLGYLHRPI+VpLsD-HGKPLKsAtQAcAL+AGWtQAlsTLP-GpK.PsRVFYDTTsspthtIsLsQALHsls..upul-LuDV+EGYDIGRRl.GNTGVSSsLVQIuLAlIASYcDGGsSAsVptpssGoATIhMVSPPDtApKAttsQsp.GscPF ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 4 8 11 +11227 PF11395 DUF2873 Protein of unknown function (DUF2873) Pollington J, Finn RD anon Pfam-B_002845 (release 23.0) Family This viral family of proteins has no known function. 25.00 25.00 57.00 57.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.32 0.72 -4.39 2 75 2009-01-15 18:05:59 2008-08-21 16:30:08 3 1 73 0 0 10 0 43.00 96 97.46 CHANGED MsELTLIDFYLCFLAFLLFLVLIMLIIFWFSLElQDlEEPCsK MNELTL..IDFYLCFLAFLLFLVLIMLIIFWFSLEIQDLEEPCTK. 0 0 0 0 +11228 PF11396 DUF2874 Protein of unknown function (DUF2874) Pollington J, Finn RD anon Pfam-B_002962 (release 23.0) Domain This bacterial family of proteins is probably periplasmic and of unknown function.\ There may be between one and six copies of this domain per sequence. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.79 0.72 -4.00 181 1589 2012-10-01 23:09:26 2008-08-21 16:38:52 3 9 279 21 355 1152 36 57.30 21 63.99 CHANGED psl.shsplPss....lpstlppp..asstp.lpc....hphpp........phYclclp.......tpphclhasppGphl .................h..hstLPps.......lpshlppp...asssp.ltc.....lcpcp........stYclcls.........sphclhFstpGphh..... 0 136 252 283 +11229 PF11397 GlcNAc Glycosyltransferase (GlcNAc) Pollington J, Finn RD anon Pfam-B_002901 (release 23.0) Family GlcNAc is an enzyme that carries out the first glycosylation step of hydroxylated Skp1, a ubiquitous eukaryotic protein, in the cytoplasm [1]. 20.40 20.40 20.40 20.60 20.30 20.00 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -12.12 0.70 -5.33 25 216 2012-10-03 05:28:31 2008-08-21 16:46:00 3 11 113 0 109 246 398 269.10 25 60.42 CHANGED oIFVslASYRDsEhhsTLcsLhspAspPcRLalGVs.Qpts..s-ss...........................................................................................................ssFhsc...........................tht.........h.tsplphhslssspu+GssaARahspthYpsEcYhLplDSHpRFl.sWDppllshlppLp.........ssKsVLSsYPsGYps...tsppphpcpsssththspF.sspGhlplp..........................uphhss.pp.ttP.....l.ssalAuGFlFucGp.Fs+-VPaDPal..hFpGEElhhosRhaT+GaDlYsPs+sl..laHhYsR.....sspsKhWs-pp....................tpWhtppppScpRsphLL..................................shtsspss........hhthshYGlGspRolspa.pauGlshtp ......................................................................................................................lFlsluuaRD...p.hh.Tl.shhppAttP.ppl.hulh.Q.....t.s.........................................................................................................................................................................................................................t..h.tsplphhphph.pupGsshARahsp....t....ha.....ts...EpahhplD.SHhpFh.tWDp.hlt.hptht.................................s.pslLotYP..s...hp...............ttt....t.p.p.h...h.hhhht.h....ttp..Gh.hphh..........................................u.t.ht..............P...........h.t..ahu.uG.a..Fupup.hhpcV.P.hDPph...hF.GEE.hhh.......usRhaTpGaDh.YsPpp..s..l..haH..Ypp.........tptsphaps.........................th....t..pphu.hpRhhhhl..................................................................................h..ht.ashGp.Rohtpa..h.sls...t.................................................................................. 1 60 86 105 +11230 PF11398 DUF2813 Protein of unknown function (DUF2813) Pollington J, Finn RD anon Pfam-B_002207 (release 23.0) Family This entry contains YjbD from Escherichia coli (Swiss:P75828), which is annotated as a nucleotide triphosphate hydrolase. 20.40 20.40 20.40 20.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.24 0.70 -5.38 21 824 2012-10-05 12:31:09 2008-08-21 16:54:35 3 5 785 0 109 1485 191 326.10 57 61.92 CHANGED MaLERIEIsGFRGIpRLSLslcp.sVLIGENuWGKSSLLDALSLlLss-scLYpFshpDFHhs.u.ppsps+cLpllhTFpEpc.s.-ppstRY+pl..pslWs.scs.GhcRIYYRlpuphs.sssVpTphuFLDtpGps..lslcch-pLsppLlplHPVlRlRDA...RRhtptph.tt.t.p........Rlp+clcphhRcLhspPtplsc..sEl+puLpAhppLl-HYFu.pstppsp.+ph+cp.ppsppsaph..hpslsphlcpsss..+phRllLlGllsshLpA+Gshp..Lc+tARPILLlEDPEsRLHPhMLulAWpLLshLPhQ+IsTTNSuELLS.VPLcpIpRLVRpoc+stuapLs.psLop-DhRRIuFHIRaNRstA ...............................................................................................................................................................................................................MhLERV.EIVGFR....G.I.....N.R.......L..S...L.h.L...-...........p.......N...s.....V...L..I...GENAWGKS...S...LL.D...A...L.......T.L.....L......L.......S.......P..........E.......s.....c....L.....Y......H......F...p..c...c......D.....F..a...a....Ps.....G....D......h.........p......u....+.E..+...H......L.p....I...I....L.........T..Fc......E.o...sG.....+.....a........+.s.....p..R.Y..R.sL...........css...W...........s.s.s.p.D.....G...hH...R..IaYRl..E....GE.....pu...t....D....G.......o....V....h....Th..R..o...F...L....D...p...-.....GpP.......lsl...-..c.....I.....s......c...s......+.cL....l..RL.h.....PVL....R....L...RDAR............FhRRlRs.us...s.s.N.sss.s-.........................sss+QLD...L...s......REL.ss.p..P.Q..p..L.oc.........GpI..R..Q..G.L.S...A.M.sQ.L.LEHYF......................SE....Quu.....u.....p......s..c.h.R...hh...Rc..+..s....p.s...c...pcu.W+h.............LD.I..N.RMI....c....cP.s..u..........RS.h..RlIL.....L.GLF..uTL......L.Q.AK...G..o.lc......L.c+...c..A...R.P....LL.L.lEDPETRLHP...I.M.......L.S..V....A.W....p....L.L......s........L....L........P......L....Q....R....I.....sTTNSGEL..L..S..L.s..PlE.pls..RLVRESu.R.V..AAaRL..GPsG..............LS.sEDuRRIu..FHIRFNRsSu............................................................................................................................................................................................................................. 0 15 46 81 +11231 PF11399 DUF3192 Protein of unknown function (DUF3192) Pollington J, Finn RD anon Pfam-B_002991 (release 23.0) Family Some members in this family of proteins are annotated as lipoproteins however this cannot be confirmed. 21.80 21.80 21.80 22.40 20.20 21.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.21 0.72 -4.09 32 113 2012-10-01 23:09:26 2008-08-21 16:59:05 3 1 64 0 48 96 32 102.80 40 79.35 CHANGED susVlhlh-ssssphs.......W-c.........+pthN+pplucLpLGt..ohspVhslhGsuDFoEAhpps.......cpplpVLFYRTp+h+uDGhTTKDECTPLlFcNspL..luWGpsu.Ypph .........................................susVl.lh-spspths.......Wcc..ppthN+pplscLsLG...ohspVhshhGssDFoEAhtpp.......cpplpVLFYRTp+....p+SDGhTT.KDECTPLlFcNspL..luWGpsAYpph............... 0 8 20 34 +11233 PF11401 Tetrabrachion Tetrabrachion Pollington J, Finn RD anon pdb_1fe6 Family Tetrabrachion forms a parallel right-handed coiled coil structure with hydrophobic interactions and salt bridges forming a thermostable tetrameric structure. It contains large hydrophobic cavities. No function is known for this family of proteins [1]. 20.50 20.50 21.60 114.20 18.10 17.00 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.60 0.72 -4.17 2 2 2009-01-15 18:05:59 2008-08-22 10:30:29 3 1 2 8 1 5 0 49.00 100 3.22 CHANGED IINETADDIVYRLTVIIDDRYESLKNLITLRADRLEMIINDNVSTILAS IINETADDIVYRLTVIIDDRYESLKNLITLRADRLEMIINDNVSTILAS 0 1 1 1 +11234 PF11402 Antifungal_prot Antifungal protein Pollington J, Finn RD anon pdb_1afp Family Antifungal protein consists of five antiparallel beta strands which are highly twisted creating a beta barrel stabilised by four internal disulphide bridges [1]. A cationic site adjacent to a hydrophobic stretch on the protein surface may constitute a phospholipid binding site [1]. 25.00 25.00 41.10 40.80 23.90 23.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.05 0.72 -3.77 6 27 2009-01-15 18:05:59 2008-08-22 10:34:21 3 1 23 2 9 23 0 52.00 53 56.43 CHANGED ApYsGKCaKKDNhCKYKspuGKTsIsKC.s...K+Cs+DGsKCEaDSY+sKsh l.cYhGKCTKu-NpCKYKsDpGKsslppCPphs..NKKCsKDGNpCcaDShs+...... 0 0 2 6 +11235 PF11403 Yeast_MT Yeast metallothionein Pollington J, Finn RD anon pdb_1aoo Family Metallothioneins are characterised by an abundance of cysteine residues and a lack of generic secondary structure motifs. This protein functions in primary metal storage, transport and detoxification [1]. For the first 40 residues in the protein the polypeptide wraps around the metal by forming two large parallel loops separated by a deep cleft containing the metal cluster [1]. 25.00 25.00 93.70 93.70 21.20 19.80 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.76 0.72 -3.81 2 7 2009-01-15 18:05:59 2008-08-22 10:37:10 3 1 6 6 2 4 0 40.00 100 65.57 CHANGED QNEGHECQCQCGSCKNNEQCQKSCSCPTGCNSDDKCPCGN QNEGHECQCQCGSCKNNEQCQKSCSCPTGCNSDDKCPCGN 0 2 2 2 +11236 PF11404 Potassium_chann Potassium voltage-gated channel Pollington J, Finn RD anon pdb_1b4g Family Fast inactivation of voltage-dependant potassium channels occurs by a 'ball-and-chain'-type mechanism. It controls membrane excitability and signal propagation in central neurons [1]. Inactivation is regulated by protein phosphorylation where phosphorylation of serine residues leads to a reduction of the fast inactivation [1]. 20.00 20.00 20.20 23.80 17.90 19.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.18 0.72 -4.16 2 56 2009-01-15 18:05:59 2008-08-22 11:11:04 3 3 29 3 24 54 0 28.50 76 4.63 CHANGED MlSSVCVSSh.+GRKuGNKsssKsChpt-Ms ..MISSVCVSSY.RGRKSGNKPPSKoCLKEEMA... 0 2 7 13 +11237 PF11405 Inhibitor_I67 Bromelain_inhib; Bromelain inhibitor VI Pollington J anon pdb_1bi6 Family Bromelain inhibitor VI is a double-chain inhibitor consisting of a 11-residue and a 41-residue chain. This protein is the 41-residue heavy chain which is joined to the 11-residue chain by disulphide bonds. The inhibitor acts to inhibit the cysteine proteinase bromelain [1]. 25.00 25.00 298.50 103.10 18.80 17.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.45 0.72 -3.91 2 3 2009-09-25 10:05:36 2008-08-22 11:27:54 3 1 1 2 0 4 0 41.00 95 50.00 CHANGED -EYKCYCsDTYSDCPGFCKpCKAEFGKYICLDLISPNDCVK EEYKCYCTDTYSDCPGFCKKCKAEFGKYICLDLISPNDCVK 0 0 0 0 +11238 PF11406 Tachystatin_A Antimicrobial peptide tachystatin A Pollington J anon pdb_1cix Family Tachystatin A contains a cysteine-stabilised triple-stranded beta-sheet and shows features common to membrane-interactive peptides. Tachystatin A is thought to have an antimicrobial activity similar to defensins.Tachystatin A is also a chitin-binding peptide [1]. 25.00 25.00 39.90 39.90 22.80 18.70 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.57 0.72 -4.10 2 3 2009-01-15 18:05:59 2008-08-22 11:36:20 3 1 1 1 0 4 0 36.30 96 82.58 CHANGED YSRCQLQGFNCVVRSYGLPTIPCCRGLTCRSYFPGSTYGRCQRa YSRCQLQGFNCVVRSYGLPTIPCCRGLTCRSYFPGSTYGRCQRa 0 0 0 0 +11239 PF11407 RestrictionMunI Type II restriction enzyme MunI Pollington J anon pdb_1d02 Family Type II restriction enzyme MunI recognises the palindromic sequence C/AATTG. It makes contact with the DNA via the major groove [1]. 25.00 25.00 85.10 84.90 18.90 17.30 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.61 0.71 -4.70 2 9 2012-10-11 20:44:46 2008-08-22 12:45:49 3 1 8 2 3 11 4 168.90 46 99.93 CHANGED MGKpELpsR.sWQthuGLhuptAEpph.sVF.A.FpGTcYVlhccPKcLKslYup............NP.......paGVS.DaAIpNpcT+KhLasEIKpQ-GaV.Gcs.PpsGRGNAHERSCKhFTPGLLKshpplusl.s.thLPFWlVapGcITRDsKRsREITaWaDcYtspaFhW+ss.Sup.LlpHFpcpL++hLD ....Mupp-LptRtsWQs.SG..h..tAtsAEpshhssFptsFc...sTcYhlpp+Pp-hKslYspV.Lstp.httIasP......p........hhp......aGlS.DaAIpNpcTtKhlasElKRQDGWVEGtp..SsGRGNAHER.CKhFTPGLhcshRphutl.s.thLPFWlVh.GDITRDPpRsREIsaWappYptpaFhWRss.stpsLlpHFpp.Lh.hL...... 0 3 3 3 +11240 PF11408 Helicase_Sgs1 Sgs1 RecQ helicase Pollington J, Finn RD anon pdb_1d8b Family RecQ helicases unwind DNA in an ATP-dependent manner. Sgs1 has a HRDC (helicase and RNaseD C-terminal) domain which modulates the helicase function via auxiliary contacts to DNA [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.46 0.72 -4.15 5 29 2012-10-03 03:05:55 2008-08-22 12:54:14 3 2 28 1 20 63 15 77.40 40 5.87 CHANGED ElscLshuYERLRslulslGNRMNPPlssaM.PDslLKKlAshLPATE-EFlsLlGsN-ss.sRKYKYFKcTltcLRK+Rpc .......EhsslphsYc+LRElSlslGsRMsP.P.l.ss.Fh.sDslLKKhAshLPhs-ppFspLsslpcp..pc+F.KY..FKsTlhcLp+cRp.p.............. 0 1 10 18 +11241 PF11409 SARA Smad anchor for receptor activation (SARA) Pollington J anon pdb_1dev Family Smad proteins mediate transforming growth factor-beta (TGF-beta) signaling from the transmembrane serine-threonine receptor kinases to the nucleus [1]. SARA recruits Smad2 to the TGF-beta receptors for phosphorylation [1]. 25.00 25.00 27.00 27.00 20.50 18.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.10 0.72 -4.04 3 74 2009-01-15 18:05:59 2008-08-22 13:01:21 3 6 56 3 44 72 0 40.10 79 3.24 CHANGED uRSPNPNNPMEYCSTIPPaQQAssSP..uSPPPSVMVPVGV ...pSPNPNNPAEYCSTIPPLQQAQ.A..SGsLuSPPPTVMVPVGV. 0 8 15 28 +11242 PF11410 Antifungal_pept Antifungal peptide Pollington J anon pdb_1dkc Family This peptide has six cysteines involved in three disulphide bonds. It contains a global fold which involves a cysteine-knotted three-stranded antiparallel beta-sheet along with a flexible loop and four beta-reverse turns. It also has an amphiphilic character which is the main structural basis of its biological function [1]. 21.00 21.00 21.60 21.30 20.90 20.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.43 0.72 -3.86 8 33 2009-09-11 11:38:32 2008-08-22 13:09:29 3 2 21 2 11 33 0 33.30 43 33.95 CHANGED CIsNGutCpsDuu.ssCCSGFCap.p.PG..hshGhC+sR .......IssGtsCptDGShG.CsSGaChp.t..s...s.GhC+........ 0 1 4 9 +11243 PF11411 DNA_ligase_IV DNA ligase IV Pollington J anon pdb_1ik9 Family DNA ligase IV along with Xrcc4 functions in DNA non-homologous end joining. This process is required to mend double-strand breaks. Upon ligase binding to an Xrcc4 dimer, the helical tails unwind leading to a flat interaction surface [1]. 21.20 21.20 21.20 21.90 20.80 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.59 0.72 -4.21 7 68 2009-01-15 18:05:59 2008-08-22 13:42:40 3 9 55 4 47 66 1 35.80 55 4.08 CHANGED ST+cHFAcEYDpYGDSYhsDTsttpL+-VFpplpss .....ST+cHFA+EY.DsYGDSYasDTDh.sQLKEVFstI+s...... 0 14 18 30 +11244 PF11412 DsbC Disulphide bond corrector protein DsbC Pollington J anon pdb_1jpe Family DsbC rearranges incorrect disulphide bonds during oxidative protein folding. It is activated by the N-terminal domain of DsbD, a transmembrane electron transporter. DsbD binds to a DsbC dimer and selectively activates it using electrons from the cytoplasm [1]. 21.60 21.60 21.70 21.60 21.20 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.96 0.71 -4.27 187 2597 2009-01-15 18:05:59 2008-08-22 13:54:41 3 21 2012 10 617 2080 1106 118.10 24 21.34 CHANGED pschLssppA.h..........tsssp...pltlphp.lssGaalYhcph..th.....ph.....pssssh....hsphpaPss.p...hhpD.th...ps.sYc.spltlsls..lssss............tth...plplsaQuCs-..slChPspsphhhhhs ....................................t...hl..ppA.F.hph.........ppspp....plslphp..lp.s.Gaa.lYp.cph.........h.......................ph.ssspsh.....hup.h.p.h.P.tu..p.....hh..p...D.....pha.u...p...s...phYc.sp...l.slsls....lptss......................ssh...plplsa..QGCu-...u..hCYPPpsphh....s............ 0 170 352 492 +11245 PF11413 HIF-1 Hypoxia-inducible factor-1 Pollington J anon pdb_1lqb Family HIF-1 is a transcriptional complex and controls cellular systemic homeostatic responses to oxygen availability [1]. In the presence of oxygen HIF-1 alpha is targeted for proteasomal degradation by pHVL, a ubiquitination complex [1]. 17.80 17.80 17.80 18.10 16.40 17.70 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -7.85 0.72 -4.77 17 330 2009-01-15 18:05:59 2008-08-22 14:00:38 3 13 102 4 83 283 0 34.00 61 4.79 CHANGED .sspshsDLDLEMLAPYIPMD.DDFQLsslst.s. .......s.hspshs-LDLEMLAPYIPMD..DDFQLpshs..p......... 0 4 15 35 +11246 PF11414 Suppressor_APC Adenomatous polyposis coli tumour suppressor protein Pollington J, Finn RD anon pdb_1m5i Family The tumour suppressor protein, APC, has a nuclear export activity as well as many different intracellular functions. The structure consists of three alpha-helices forming two separate antiparallel coiled coils [1]. 22.70 22.70 23.30 23.50 22.60 22.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.58 0.72 -4.20 13 215 2009-01-15 18:05:59 2008-08-22 14:08:54 3 43 72 1 114 197 0 80.70 35 6.60 CHANGED ssashL+phcpLEcE+-hLLtGL-tlE+u+-WYhsQLpslQcp.cplGphusth-hho-hppcpLshphs+lpclNcsLttLhp ...................ht.hphhcELEpE+....slLLtsL-t.E+t+-WYhs.QLpslpcRhc......pLstscs.......h...........shtoDhppcp.Lphpttplpps.cphhshh................... 0 20 29 61 +11247 PF11415 Toxin_37 Termicin; Antifungal peptide termicin Pollington J anon pdb_1mm0 Family Termicin is a cysteine-rich antifungal peptide which exhibits antibacterial activity. A cysteine stabilised alpha beta motif is formed due to an alpha-helical segment and a two-stranded antiparallel beta-sheet [1]. 25.00 25.00 41.00 41.00 19.80 16.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.18 0.72 -4.33 8 150 2012-10-01 23:31:40 2008-08-22 14:24:34 3 1 18 1 0 139 0 34.90 64 56.29 CHANGED CsFpSCWAoCQtQHGIYFRRAaCDGSpCpCValNG ..CshppCWAsCQApHGRYFRRAYC-GShC+CVFNNG.... 0 0 0 0 +11248 PF11416 Sed5p Integral membrane protein Sed5p Pollington J, Finn RD anon pdb_1mqs Family Sed5p interacts with Sly1p , a positive regulator of intracellular membrane fusion, allowing SM proteins to stay associated with the assembling fusion machinery. This allows for participation in late fusion steps [1]. 21.30 21.30 22.70 21.40 18.60 17.90 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.64 0.72 -7.24 0.72 -4.51 11 45 2009-01-15 18:05:59 2008-08-22 14:31:23 3 1 44 1 30 42 0 28.10 50 8.41 CHANGED h.sIpNRThEFQQsVsoYcKhN+Kpsspt .M.sIpsRThEFQQsVhoYcKpNKp.ppp....... 0 5 17 29 +11249 PF11417 Inhibitor_G39P Loader and inhibitor of phage G40P Pollington J anon pdb_1no1 Family G39P inhibits the initiation of DNA replication by blocking G40P replicative helicase. G39P has a bipartite stricture consisting of a folded N-terminal domain and an unfolded C-terminal domain. The C terminal is essential for helicase interaction [1]. 20.40 20.40 20.80 20.70 20.10 20.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.42 0.72 -3.99 2 43 2009-01-15 18:05:59 2008-08-22 14:38:22 3 1 41 3 6 44 42 67.20 27 43.94 CHANGED MI.c-slpILphlpthYP...tchpPsDhKshVphWpphLt-Y.hp.l..slpchhtsNKFPPoVu-llcA .........................Mhcp-shcllthlpssYP....php...p...s........c..t....hlslWhphL.cDhsaphlttslcpal..t..ss.p..aPPolA-lh..p............ 1 4 5 5 +11250 PF11418 Scaffolding_pro Phi29 scaffolding protein Pollington J anon pdb_1no4 Family This protein is also referred to as gp7. The protein contains a DNA-binding function and may halve a role in mediating the structural transition from prohead to mature virus and also scaffold release [1].Gp7 is arranged within the capsid as a series of concentric shells [1]. 25.00 25.00 28.00 80.70 22.70 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.27 0.72 -3.67 3 7 2009-01-15 18:05:59 2008-08-22 14:45:34 3 1 5 11 0 9 0 91.00 65 97.55 CHANGED PLchEEHE-ILNKLsDPELscSERTEALQQLRssYGSFlSEYsELTcApEKLsAEK-DLIVSNSKLF...RQlGLTcKKEE..EcKQc-lSETITIEDLEup .Phc.-pHE-ILNpL.DPELspSERTEALQQLRssYGSFlSEYsDLTcopEKLsAEK-DLIVSNSKLF...RQlGlTccpEE..-hKpt-lSETITIEDLEtp 0 0 0 0 +11251 PF11419 DUF3194 Protein of unknown function (DUF3194) Pollington J anon pdb_1pu1 Family This family of proteins has no known function however the structure has been determined. The protein consists of two alpha-helices packed on the same side of a central beta-hairpin [1]. 21.50 21.50 21.90 22.00 20.40 21.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.77 0.72 -3.78 4 34 2009-01-15 18:05:59 2008-08-22 17:01:13 3 1 34 1 25 37 2 83.60 34 86.78 CHANGED IGLPcLoEE-LIElG-luQ+lIIcaIF-+Lu+SEV+DlEVTsRINpGETLDLELEVYlEVPlFV+VDVEuLIDEAlDKAYculEcaL ..................h.cLsptpl.clu-hAtcsh.thlFs+l.spS-VcDl-VTlplpc...s.psLsLEl-VYlpsP.hsc.sDs-pllD-AlcpAhpsV-ch.... 0 4 11 19 +11252 PF11420 Subtilosin_A Bacteriocin subtilosin A Pollington J anon pdb_1pxq Family Subtilosin A is a bacteriocin from Bacillus subtilis.The protein has a cyclized peptide backbone and forms three cross-liks between the sulphurs of Cys13, Cys7 and Cys4 and the alpha-positions of Phe22,Thr28 and Phe31 [1]. 25.00 25.00 33.90 33.80 18.10 16.60 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -7.79 0.72 -3.92 2 17 2009-01-15 18:05:59 2008-08-26 12:13:47 3 1 16 1 2 6 0 33.70 94 77.33 CHANGED NKGCATCSIGAACLVDGPIPDFEIAGATGLFGLWG NKGCATCSIGAACLVDGPIPDFEIAGATGLFGLWG. 0 1 1 1 +11253 PF11421 Synthase_beta ATP synthase F1 beta subunit Pollington J anon pdb_1pyv Family The NMR solution structure of the protein in SDS micelles was found to contain two helices, an N-terminal amphipathic alpha-helix and a C-terminal alpha-helix separated by a large unstructured internal domain. The N-terminal alpha-helix is the Tom20 receptor binding site whereas the C-terminal alpha-helix is located upstream of the mitochondrial processing peptidase cleavage site [1]. 20.90 20.90 22.10 39.10 20.20 20.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.61 0.72 -3.30 12 41 2009-01-15 18:05:59 2008-08-26 12:47:47 3 3 21 1 19 40 0 45.40 55 8.25 CHANGED MASRRlLSSLLRSouR+p...uu+.......sPthssst.u......RsSPsGaLLNR MASRRlLSSLLRSuSRtp...uu........sPthssPp..hp.....RsSPsGaLLNR 0 3 10 14 +11254 PF11422 IBP39 Initiator binding protein 39 kDa Pollington J anon pdb_1q87 Family IBP39 recognises the initiator which is solely responsible for transcription start site selection. IBP39 contains an N-terminal Inr binding domain connected to a C-terminal domain. The C domain structure indicates that it interacts with the T. vaginalis RNAP II large subunit C-terminal domain. Binding of IBP39 to Inr recruits RNAP II and initiates transcription [1]. 25.00 25.00 184.80 184.10 21.00 19.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.34 0.71 -4.77 7 7 2009-01-15 18:05:59 2008-08-26 13:06:39 3 1 1 5 7 8 0 181.70 26 53.63 CHANGED hcslpIhths.pppppF+ppslphWpcllp.stth.shshp.FIcthsppF...................ptspQshcN........uhpsIp.lLsspssshlThsDFhpFhAtFGP.-olhhKItphL..s.t.tpWL.h..pPph.pphs....slsuaFsps..NChlhph.sGhpa+saNhPhl.sususYLhDEssppapSWcphhp.h ..cslpIhths.pppppF+ppslphWpcllp.stth.shshp.FIcthsppF...................ptspQshcN........uhpsIp.lLsspssshlThsDFhpFhAtFGP.-olhhKItphL..s.t.tpWL.h..pPph.pphs....slsuaFsps..NChlhph.sGhpa+saNhPhl.sususYLhDEssppapSWcphhp... 0 7 7 7 +11255 PF11423 Repressor_Mnt Regulatory protein Mnt Pollington J anon pdb_1qey Family Mnt is a repressor which is involved in the genetic switch between lysogenic and lytic growth in bacteriophage P22. The C-terminal domain of the protein consists of a dimer of two antiparallel coiled coils with a right handed twist, which is both stronger and has closer inter-helical separation compared with those found in left-handed coiled coils [1]. 20.90 20.90 25.70 34.00 19.30 17.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.69 0.72 -7.10 0.72 -4.63 3 59 2009-01-15 18:05:59 2008-08-26 13:11:58 3 2 55 6 1 38 0 29.60 66 36.09 CHANGED RDDAERhADpQSElVKKMVFETLKDhY+K+ .DDAERhA-pQS-hVKKhVFDTLKclY+K.s. 0 0 0 0 +11256 PF11424 DUF3195 Protein of unknown function (DUF3195) Pollington J anon pdb_1rki Family This archaeal family of proteins has no known function. 25.00 25.00 64.20 64.00 19.80 18.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.18 0.72 -3.80 4 9 2009-01-15 18:05:59 2008-08-26 13:26:27 3 1 9 2 6 10 0 82.00 44 90.33 CHANGED alIl+TlPKKEtIVARDLCDClYYYDppVhC+slusuRVYlhTpl-hLcpCLph+YF+KLlKslElaD.Vpp.cPs.CscChllpIG-VY ...lllpTls+KEthVARDLCDCLYaa.DptVhCcsluPG+VYVpT.p.hphLcpCLsM+YFKpLlKtlElaDcVSp.pPs.ps.shh................. 0 1 2 3 +11258 PF11426 Tn7_TnsC_Int Tn7_TnsC; Tn7 transposition regulator TnsC Pollington J anon pdb_1t0f Family TnsC is a molecular switch that regulates transposition and interacts with TnsA which is a component of the transposase. The two proteins interact via the residues 504-555 on TnsC. The TnsA/TnsC interaction is very important in Tn7 transposition [1]. 20.30 20.30 20.70 23.30 19.30 18.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.19 0.72 -4.25 10 45 2009-01-15 18:05:59 2008-08-26 14:47:22 3 2 43 2 12 38 1 46.90 38 8.70 CHANGED IKps-WcTLcS-DLRFlaSQssssp.shYptLcssGLlFDlpuhh+csG ....lK.s-WcoLsssDLRalYSQppspp.sha-pLKpp.Gl.IlDhpslhpt..... 1 3 6 7 +11259 PF11427 HTH_Tnp_Tc3_1 Tc3_transposase; Tc3 transposase Pollington J anon pdb_1tc3 Family Tc3 is transposase with a specific DNA-binding domain which contains three alpha-helices, two of which form a helix-turn-helix motif which makes four base-specific contacts with the major groove. The N-terminus makes contacts with the minor groove. There is a base specific recognition between Tc3 and the transposon DNA. The DNA binding domain forms a dimer in which each monomer binds a separate transposon end. This implicates that the dimer has a role in synapsis and is necessary for the simultaneous cleavage of both transposon termini [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.24 0.72 -4.33 2 160 2012-10-04 14:01:12 2008-08-26 14:57:22 3 12 12 2 152 76 7 47.00 39 24.22 CHANGED RGouLSDhEpu.l.sM+..ssplpEhup+lsRpRpCIpcaLKsPspYGso ...............lo..EpAplDlM.hQLG.hSlptMS+plsRSRsslcpY..lsDPlsYG..t....... 0 8 136 152 +11260 PF11428 DUF3196 Protein of unknown function (DUF3196) Pollington J anon pdb_1td6 Family This proteins is the product of the gene MPN330 and is thought to involved in a cellular function that has yet to be characterised. The proteins has 11 helices and a novel fold [1]. No function is currently known for this protein. 26.70 26.70 26.80 26.80 26.50 26.50 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.81 0.70 -5.11 5 47 2009-01-15 18:05:59 2008-08-26 15:32:04 3 3 47 1 11 28 1 224.10 27 86.95 CHANGED Ml....KK.sshlKphsshKppFtshppl.pshpp...tN.DpLTpFFhslLpKlcsLVKcKDFKpAh-plpEELsoPYlPhsLluaFcuhhhlIs+sL.....aEsENstLAsLs+c-llsplLssaPs..sLshlcYLLsKpcsFlcss-LphhsalLTsKclh-lcKhshhpALspIssFlsppF-YYNSKLKQpFslTLscFslhtp.sopsYFsQLlcplpphFhKEPSppEFAsEIIssllVsYFPhHPsa.slscLApsIaQYVpNslpNclsshKs.ElpKlIVcslhcpLDc .............................................h........................................ppYa--ILcplcpllcc+castAhslIspELshPYIPh.....hhppFcphhhclK+ph............h.pp.t.....sp.Lsppphhphltpshsp..plshhph.hh.+.ht.hp.....l..t..hpphL.spphtt.sKh.ll.hLt..p..IsppFphhpphhcpoh.ls.hp.s.....pt...h.pl.s.lpp..h...K.s.Pshhphsppll..hhh.haPhh..h.p.p.lAhsIhtYhppthts..hphp......p.l.......................................................................................................................... 0 6 9 10 +11261 PF11429 Colicin_D Colicin D Pollington J anon pdb_1tfk Family Colicin D is a tRNase which kills sensitive E.coli cells via a specific tRNA cleavage. It targets the four isoaccepting tRNAs for Arg and cleaves the phosphodiester bond between positions 38 and 39 at the 3' junction of the anticodon stem and the loop [1]. 21.70 21.70 22.90 23.70 21.50 19.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.24 0.72 -3.83 8 39 2009-01-15 18:05:59 2008-08-26 15:48:53 3 10 37 3 7 52 0 87.90 39 12.70 CHANGED QLpKKFK.HAsDFGlsspstNppTLscFcDsIpcHlusssTVpKGTYRptpsSKVYaNssTthsVIlcpsGsFl..SGW+lsPto-phphYlcss .....QLpKKaK.HAsD.FGlsspptNppTL.spFccuIppHlsst..sT.lp.cGTYRt...tpsSKVaaNss.TspsVllcpsGpFl..SGWKlsPtspphp.ahps.h.......... 0 3 5 6 +11262 PF11430 EGL-1 Programmed cell death activator EGL-1 Pollington J anon pdb_1ty4 Family Initiation of programmed cell death in C.elegans occurs by the binding of EGL-1 to CED-9 which disrupts a complex involving CED-4/CED-9 and allows CED-4 to activate CED-3, a caspase. It is the C terminal domain of EGL-1 which is involved in the formation of the complex with CED-9. The formation of the complex induces structural rearrangements in CED-9 and EGL-1 adopts an extended alpha-helical conformation [1]. 20.50 20.50 23.50 22.70 19.90 15.70 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.40 0.72 -6.46 0.72 -4.32 3 13 2009-09-11 00:00:58 2008-08-26 16:23:42 3 2 5 2 13 13 0 20.90 62 19.47 CHANGED Y-IGoKLAAMCD-FDAEMMSY .a-IGoKLAsMCDDFDAcMMSY.. 0 2 3 13 +11263 PF11431 Transport_MerF Membrane transport protein MerF Pollington J anon pdb_1waz Family The mercury transport membrane protein, MerF has a core helix-loop-helix domain. It has two vicinal pairs of cysteine residues which are involved in the transport of Hg(II) across the membrane and are exposed to the cytoplasm [1]. 21.20 21.20 21.50 22.00 19.40 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.13 0.72 -4.31 13 68 2009-01-15 18:05:59 2008-08-26 16:37:18 3 2 60 3 14 54 6 45.20 58 57.94 CHANGED TPlLVILLGsVGLSAlsGaLDaVLLPALAlFIuLTlYALW++pppp ....TPlLVILLGsVGLuALsG..YLDYVLLPALAlFIGLTlYAlhR+cpt.s...... 0 6 10 13 +11264 PF11432 DUF3197 Protein of unknown function (DUF3197) Pollington J anon pdb_1wn9 Family This bacterial family of proteins has no known function. 20.60 20.60 21.00 106.70 20.50 19.00 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.41 0.71 -4.07 3 15 2009-01-15 18:05:59 2008-08-26 16:40:03 3 1 15 2 9 15 0 113.80 55 85.09 CHANGED hQAlpA+Lscl-LoEARLlLITDRQDERsQARYAALLThG+....EALLoAPAFGPAYGPuGAcALAELVRWApupGh.RspETVLSuGDFsRVLAEPDA-EV+RLlAAoNPoDPAIY ..LcALKsAL...cGl+hsEAKVhLITDWQD+R-pARYAL.LL+sGK....+cLLssDAFGPAF.PuGEcALuELVuhLlppGAR+FYEAVVSPGEhsuLL-LPPEEll+RlhAhANPTDPuIY 0 2 5 9 +11265 PF11433 DUF3198 Protein of unknown function (DUF3198) Pollington J anon pdb_1x9b Family Some members in this family of proteins are annotated as membrane proteins however this cannot be confirmed. Currently, this archaeal family has no known function. 25.00 25.00 58.40 58.00 21.40 18.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.44 0.72 -4.09 3 4 2009-01-15 18:05:59 2008-08-26 16:42:54 3 1 4 1 4 6 4 51.00 50 41.21 CHANGED L+DTu+FEShINSsSKSVFVRNLsELERLAKRLGKSYcIQLEpAKEKWKVK L+Dph+FEphINSpSKphFVcNLsELEcluh+LGcuYp.pLEpAKcKWKVK 0 2 3 3 +11266 PF11434 CHIPS Chemotaxis-inhibiting protein CHIPS Pollington J anon pdb_1xee Family The chemotaxis inhibitory protein, CHIPS, is an excreted virulence factor which acts by binding to C5a and formylated peptide receptor (FPR), blocking phagocyte responses. A fragment of CHIPS, which contains residues 31-121 comprises of an alpha helix packed onto a four stranded anti-parallel beta-sheet. Most of the conserved residues of CHIPS are present in the alpha-helix [1]. 25.00 25.00 43.10 42.30 20.00 18.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.14 0.72 -4.37 3 98 2009-01-15 18:05:59 2008-08-26 16:52:19 3 1 97 2 1 14 0 89.50 99 60.91 CHANGED NSGLPTTLGKLDERLRNYLKKGTKNSAQFEKMVILTENKGYYTVYLNTPLAEDRKNVELLGKMYKTYFFKKGESKSSYVINGPGKTNEYAY NSGLPTTLGKLDERLRNYLKKGTKNSAQFEKMVILTENKGYYTVYLNTPLAEDRKNVELLGKMYKTYFFKKGESKSSYVINGPGKTNEYAY.... 0 1 1 1 +11267 PF11435 She2p RNA binding protein She2p Pollington J anon pdb_1xly Family She2p is a RNA binding protein which binds to RNA via a helical hairpin. The protein is required for the actin dependent transport of ASH1 mRNA in yeast, a form of mRNP translocation. ASH1 mRNP requires recognition of zip code elements by the RNA binding protein She2p. She2p contains a globular domain consisting of a bundle of five alpha-helices [1]. 25.00 25.00 36.30 196.30 21.20 20.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.42 0.71 -4.93 6 29 2009-01-15 18:05:59 2008-08-26 17:02:09 3 1 26 2 17 23 0 208.20 61 83.75 CHANGED -hlcpllslaSsYISsYlclLNKaIshLRRVSTLRFERsTLIKaVKKLRFaNDsLhoash.......psphshcscsLpcslpslGSaFlKsLEhlDLLNYYLTQsLQsElISKTLNpDLlls--sIssl-DTYsaFVKFoQWhlESLu...lsDsLLslEllQFolKCAlEDslDls-T-sIhLQEVhPVcsppEapsLhtpWpslLssKhut ......-llEQIlsLhS+YLSSYIHlLNKFIuHLRRVuTLRFERTTLIKFVKKLRFYNDslLSYNsp......cs-h-spuDohc.........clllPIAShFlKClEThDLLNYYLTQSLQKEIlSKTLNEDLTLosEoIlAIDDTYNHFVKFoQWMIESLp...IsosLLsLEVVQFAlKCAcEDGT........slsETDNIFLQEllPVsSEEEFpoLostWpuILcuKLss.. 0 1 7 14 +11268 PF11436 DUF3199 Protein of unknown function (DUF3199) Pollington J, Finn RD anon pdb_1xn8 Family Some members in this family of proteins with unknown function are annotated as YqbG however this cannot be confirmed. Currently the proteins has no known function. 25.00 25.00 25.40 27.60 24.60 24.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.68 0.71 -4.27 6 44 2009-09-10 23:59:37 2008-08-26 17:06:38 3 1 38 2 7 46 0 124.00 47 96.62 CHANGED .hITP-ELhsYSVaEpVKsRssEhLctDILEAEs-lhplsG+cFoDtch.P.....lPEplRLALL+LAQaaAhhNsDEShhKGapSEKl..GDYSYTlusus.SlsKPDVhtLLtDYl....sphstscs+h+hR ...LIsPs-lhsYSVa.-pVKsRPppLLpQDIlEAEuEhtplsGH.cFsDpsh.P......LP-cl+LALlKLAQYaALl.Nu...D....E...oshcuYpSEKl..GDYSYTlussu..ulpKP-VapLLp-aI....sths.tpsph+hR............ 0 2 5 6 +11269 PF11437 Vanabin-2 Vanadium-binding protein 2 Pollington J anon pdb_1vfi Family The Vanadium binding protein, Vanabin2, contains four alpha-helices connected by nine disulphide bonds. Vanadium accumulates in Ascidians however the biological reason remains unclear [1]. 25.00 25.00 39.00 38.40 19.20 18.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -11.94 0.72 -12.75 0.72 -4.00 10 14 2009-01-15 18:05:59 2008-08-26 17:19:14 3 2 3 1 8 11 0 92.40 33 55.68 CHANGED CtspCposCssl+sC.+tpCtpsCtuspstp......hCp+sCt+spChss.......sCcsChpc.C.ht.hctCRsssCuppCP.t............tKshKsssC+cCMcpNC ..ChspCpssCssl+sC.ptpChpsCtuspp.t......hCp+sChhspC.ss.......sCcpChts.C.ht.hctC+pspCuppCP.t..........t.tpth+sssC+pCMhpNC 0 5 5 8 +11270 PF11438 N36 36-mer N-terminal peptide of the N protein (N36) Pollington J anon pdb_1qfq Family The arginine-rich motif of the N protein is involved in transcriptional antitermination of phage lambda. N36 forms a complex with boxB RNA by binding tightly to the major groove of the boxB hairpin via hydrophobic and electrostatic interactions forming a bent alpha helix [1]. 25.00 25.00 27.90 36.70 17.60 16.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.82 0.72 -4.44 2 67 2009-01-15 18:05:59 2008-08-26 17:27:41 3 1 62 1 0 41 0 34.90 97 32.86 CHANGED DAQTRRRERRAEKQAQWKAsp.LhsshptpsspR. DAQTRRRERRAEKQAQWKAANPLLVGVSAKPVNRP.. 0 0 0 0 +11271 PF11439 CesA DUF3200; Type III secretion system filament chaperone CesA Pollington J anon pdb_1xou Family This family represents a chaperone protein for the type III secretion system - TTSS - translocon protein EspA, to prevent the latter's self-polymerisation. The TTSS is a highly specialised bacterial protein secretory pathway, similar in many ways to the flagellar system, that is essential for the pathogenesis of many Gram-negative bacteria. The twenty or so proteins making up the TTSS apparatus, referred to as the needle complex, allow the injection of virulence proteins (known as effectors) directly into the cytoplasm of the eukaryotic host cells they infect; however, the injection process itself is mediated by a subset of extracellular proteins that are secreted by the needle complex to the bacterial surface and assembled into the type III translocon - EspA. EspB and EspD. EspA polymerises into an extracellular filament, and, as with other fibrous proteins, is apt to undergo massive polymerisation when overexpressed. CesA is the secretion chaperone protein that binds to EspA. CesA is dimeric and helical, and it traps EspA in a monomeric state and inhibits its polymerisation. 25.00 25.00 28.10 140.30 22.90 20.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.30 0.72 -4.06 2 64 2009-11-23 17:28:21 2008-08-27 09:06:07 3 1 63 3 1 15 0 91.60 95 88.57 CHANGED MsIVpQT+NKELLDKKIRSEIEsIKKIIAEFDVlKEsVN.LSEKAKTsPQAAEpLNKLIEGYTYGEER+LYDSALSKIEKLIEThpPsRStSQ.T M....SQTRNKELLDKKIRSEIEAIKKIIAEFDVVKESVNELSEKAKTDPQAAEKLNKLIEGYTYGEERKLYDSALSKIEKLIETLSPARSKSQST. 0 0 0 1 +11272 PF11440 AGT DNA alpha-glucosyltransferase Pollington J anon pdb_1xv5 Family The T4 bacteriophage of E.coli protects its DNA via two glycosyltransferases which glucosylate 5-hydroxymethyl cytosines (5-HMC) using UDP-glucose. These two proteins are the retaining alpha-glucosyltransferase (AGT) and the inverting beta-glucosyltransferase (BGT). The proteins in this family are AGT. AGT adopts the GT-B fold and binds both the sugar donor and acceptor to the C-terminal domain. There is evidence for a role of AGT in the base-flipping mechanism and for its specific recognition of the acceptor base [1]. 25.00 25.00 31.00 30.10 18.90 16.90 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.14 0.70 -5.57 2 14 2009-01-15 18:05:59 2008-08-27 09:18:47 3 1 11 9 1 15 23 335.00 57 79.25 CHANGED CGVTKFSLEQRDWFIKNGHEVTLVYAKDKSFTRssuHDaKSFSIPVlLAKEYDKsLKLVNDCDILIINSVPATSVpEsTINNYKKllDNIKPSlRVVVYQHDHSsLSLRRNLGLEETVRRADVIFSHSDNGDFNKVLMKEWYPETVSLFDDIEEAPTVYNFQPPMDIsKVRSTYWKDVSEINMNINRWIGRTTTWKGFYQMFDFHEKaLKPAGhSTlMEGLERSPAFIsIKEKGIPYEYYt.+plDphplAPN.PsQILDpYlNSEMLERMSKSGFGYQLSKLspKYLQRSLEYTHLELGACGTIPVFWKSTG-NLKFRVDNTPLTSHDSGIIWFDENDMESTFERIKELSSDRA .......CGVTKFSLEQRDWFIKNGHEVTLVYAKDKSFTRssAHDaKSFSIPVlLAKEYDKTLKLVN.DCDILIINSVPATSV.EE-.TINNY.KKIIDNIKPSlRVVVYQHDHSsLSLRRNLGLEETVRRADVIFSHSDNGDFNKVLMKEWYPETVSLFDDIEEAPTVYNFQPPMDIsKVRSTYW.KDVSEINMNlNRWIGRTTTWKGFYQMFDFHEK.aLKP.AGhSTIMEGLERS.P.AFIsIKEKGIPYEYYph+plDphKlAPNhP..sQILDpYVNSEMLERMSKSGFG...YQLSKLsKKYLQR...SLEYTHLELGACGTIPVFWKSTGENLKFRVDNTPLhSHDSGIIWFDENDMESTFERIKELSSDRA................................................................................................................................................................................................................................. 0 1 1 1 +11273 PF11441 MxiM Pilot protein MxiM Pollington J anon pdb_1y9l Family MxiM, a Shigella pilot protein, is essential for the assembly and membrane association of the Shigella secretin MxiD. MxiM contains an orthologous secretin component and has a specific binding domain for the acyl chains of bacterial lipids [1]. The C terminal domain of MxiD hinders lipid binding to MxiM [1]. 25.00 25.00 259.70 259.40 19.40 17.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.70 0.71 -4.01 2 8 2009-01-15 18:05:59 2008-08-27 11:11:16 3 1 8 3 0 4 0 115.00 99 80.35 CHANGED uSSNSEKEWHIVPVSKDYFSIPNDLhWSFNTTNKSINVYSKCISGKAVYSFNAGKFMuNFNVKEVDGCFMDAQKIAIDKLFSMLKDGVVLKGNKINDTILIEKDGEVKLKLIRGl SSSNSEKEWHIVPVSKDYFSIPNDLLWSFNTTNKSINVYSKCISGKAVYSFNAGKFMGNFNVKEVDGCFMDAQKIAIDKLFSMLKDGVVLKGNKINDTILIEKDGEVKLKLIRGI 0 0 0 0 +11274 PF11442 DUF2826 Protein of unknown function (DUF2826) Gunasekaran P, Mistry J anon Pfam-B_001753 (release 23.0) Family This is a family of uncharacterised proteins that is highly conserved in Trypanosoma cruzi. 20.20 20.20 22.50 22.50 18.60 17.40 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.43 0.71 -4.57 2 60 2009-09-11 10:16:18 2008-08-27 11:20:43 3 1 2 0 2 60 0 131.00 79 68.08 CHANGED pRERtRE.LLL.LASLLPhVASHADYh..ADHGWCASTSDVVCRHFTAPVKHTSRRMLWLWIWCRtCSRHhFARLLFTALR.IDASP.ESFsAAPCVVLstphohll................Csh.LTV.tpsVhhp+s...+pRhasshhtCspaspuphlt ...t..cRERE.LL.LLLAS.LLPFVASHADYCLSADHGWpASTSDVVCRHFTAPVKHTSRRMLWLWlWCRcCSRHYFARLLFTALRQIDASPPESFTAAPCVVLPAQ.ShVVhRLsDupsPLRWsMQGCGLPLTVLGTAVWMR+P...HERMYCGphKCVKYAESQhLQ... 1 0 0 2 +11275 PF11443 DUF2828 Domain of unknown function (DUF2828) Gunasekaran P, Mistry J anon Pfam-B_001814 (release 23.0) Domain This is a uncharacterised domain found in eukaryotes and viruses. 29.00 29.00 29.20 29.40 28.00 28.80 hmmbuild -o /dev/null HMM SEED 534 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.85 0.70 -6.30 19 303 2009-01-15 18:05:59 2008-08-27 11:21:35 3 3 139 0 139 295 228 343.40 21 87.77 CHANGED TENsusTahoS....ssssLDhFhplss.pss..........cplhsLhttAWs....cDs.hslKllhplRslR.sG+u-+pu...FhpshhaL.tcp..aPpTlttN....................................................hpplspaGha.KDlhpllhphlc.....................................................................................................................................................pshcLauctLtpDhp.lt..tt..............hpplSLAAKWsPo.spphsctshhsptlut..........................hthsccchRK.sLsPLR+tLplsEhhMuA+pWsplsYs+VsSlAMtpYpchFh++DspR.............FptYLpslppGc.......sKlsAuulhPa-llpphhs.sspst......................A-hQWcshscplpppGp..lpNslAlsDVS.....GSMsu............PMcVsluLGLLlSEh..sssPa+sclITFSpsPphcpl..pGc.sLt-+sphlpphsWG.hsTshptVF.-hILpsAlcspLs.-cMl+clFVFSDMEFDpAssttt................................hcTsaEslpc+apcsGYt..lPclVFWNLpsssu...sPVsuscpGVALVSGFStslh+hhLps...........................................phsPhshMh..................................culs .....................................................................................................................h......t.....t.h...h.....s................h.....................................................................................hGhh.c.h...........................................................................................................................................................................................................................................................huKh...........................................................hpt.....hp....h.-..ht.t...........h..h.a...st.s.........hF..p...t.....................h...a..th..t...................h..t........h...h.............................................................................................................................p.hsh.shs.....tp................h..s.uhshhhst....t..att.hh.att.......h..................t.........h..........thph.tsh.p.ll.........uht..th......t.hhlhoDh.hp.s............................................................t.h...a.t.t.s.......h.........PphlhWslt.............................h........sh.hhsG.s.thht.h.t........................................................................................... 0 40 86 114 +11276 PF11444 DUF2895 Protein of unknown function (DUF2895) Gunasekaran P, Mistry J anon Pfam-B_002001 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.60 22.60 22.80 50.90 21.60 22.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.18 0.71 -5.10 26 239 2009-01-15 18:05:59 2008-08-27 11:22:14 3 3 191 0 62 214 10 196.20 50 89.80 CHANGED +ptlsspcpHIhoLRlshuhLsllhlshhhuhhpAPpcLslHlPPDLRu.GSTRph.....W-VPspsVYuFuaYIFQQlNRWspsGcpDYtpsIt.tLpsYLTPuCpshLppDhc.RtpsGELppRsRsVa.EIPGRGYusp....+VhlhopcsWsVpLDlsscEhap...uEsVK+sl.lRYPL+VVRhDlDsE+NPaGLAlDCYsus.PpRl ................ptlsptpsHIpTLRlusu...hLsllhlshshGWhpAPccLTIHlPPDLRS.GSTRhW....WEVPPpoVYuFuaYIFQQLNRWPpsGEpDYspNlp.pLusYLTPuCpsaLppDachRpssGELRpRVRslY.EIPGRGYu-s..........+VpshSpcsWhVpLDlsuDEYat........uE.VKRAL.lRYPl+VVRh-lDPppNPFGLALDCYsus.PQRl................. 0 8 29 50 +11277 PF11445 DUF2894 Protein of unknown function (DUF2894) Gunasekaran P, Mistry J anon Pfam-B_001968 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 59.20 59.10 22.50 21.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.78 0.71 -11.41 0.71 -4.05 44 142 2009-01-15 18:05:59 2008-08-27 11:23:51 3 2 136 0 46 157 12 178.40 44 83.45 CHANGED LDAW....REpGADRLDPVRF+hl-ALpRRAAApsGsARcLLDsRLAsLl-uYAsplpcApsssssspss......................................................sussstPA+GsLAuLlchlu...+p.......A.scpRus..............sth..hsE.c....hL.DY...................FRcT...WS+lss-pQlRQSL-QVP+NAGPLNSSsLVHRSLoLMRELSPcYLpQFLSYVDALSWLEQhsuss ................................L-AWRppGADRlDPVRF+hl-ALt+RAAupsGtsRplL-sRLusLlcuYAttlppstsssts.t...............................................................ss......ssstsspusLusLlp.lu.................u.sthht............................................ssth.hhsE.t....hL.Dh...............................FRco...Wo+lps-pQlRQSL.ppVPcNAGPLNSssLVHRuLsLMRELSPtYLppFLSYVDALuWLEQhsuts................. 0 7 21 34 +11278 PF11446 DUF2897 Protein of unknown function (DUF2897) Gunasekaran P, Mistry J anon Pfam-B_002015 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.70 21.70 22.50 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.79 0.72 -4.14 32 221 2009-01-15 18:05:59 2008-08-27 11:24:52 3 1 220 0 49 148 6 61.10 35 93.54 CHANGED .shpsWlIIllVlGVIsuNLAsLKaoA+hKhsphsc.hpphppps........t.cppsst- ..........apsWlIlllVluVIVGNlAsLKaTAp.hKhsph.-cppcpsc.pt..........t.........t.............................................. 0 7 14 34 +11279 PF11447 DUF3201 Protein of unknown function (DUF3201) Pollington J anon pdb_1yb3 Family This archaeal family of proteins has no known function. 25.00 25.00 26.60 38.10 24.00 22.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.01 0.71 -4.80 4 14 2009-01-15 18:05:59 2008-08-27 11:29:45 3 1 14 1 12 17 0 150.90 53 88.93 CHANGED EFLN+hWE-sFcLREEL+EELc..GFcVEEVsEVFNAYLYlDGcWcEMKYPHPAFsl+PGGEVGATPQGFYFVFAFsKE-LocEFlccFlcsFcK.SFIYGhENFLEDFYN...ssPhSh-EVac+IhpScEchINFEVDhshs+EELK+cLh+.F .....paLN+hWt-lFcLpEELKEELp..GFcVE-VEEVFNAY.IalDGEWccMcYPHPAFplKPtGEVGATPpuaYFVhAlsKEc.loc-FlptFlctF.+.SFIYGuENFLpDhYN..ccsPhsscEVhccIccScEclhpFEsshs.sh-cLKctLh+............... 0 2 2 7 +11280 PF11448 DUF3005 Protein of unknown function (DUF3005) Gunasekaran P, Mistry J anon Pfam-B_3492 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 45.50 45.50 19.40 18.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.45 0.71 -4.47 11 108 2009-01-15 18:05:59 2008-08-27 11:30:58 3 1 68 0 27 98 3 119.10 48 72.35 CHANGED usssRRAssR.lpLDNssTc..DsTVDTDGKshEAA+hAotht...DplhhSNASLsNAls-ss-G..lAGhDSRPGGNpPtlAhRsGapVlcpGhlsss..........................sspGpRsp+VIplpcs ..........ssstRAtsR.lpLDNssTt..DsTVDTDGKshEAuRhASth....DplhhSNASLsNAhPEssDG..hAGhDSRPGGN+PtlAhRsGatVlccGhssss............................st.s.pRst+llplp.s.. 0 2 5 15 +11281 PF11449 DUF2899 Protein of unknown function (DUF2899) Gunasekaran P, Mistry J anon Pfam-B_002023 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 24.40 24.40 25.80 24.50 22.80 24.00 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.94 0.70 -5.24 19 285 2009-01-15 18:05:59 2008-08-27 11:31:55 3 3 211 0 48 243 397 219.80 29 79.21 CHANGED AAlhGAlPGCGGAllVsotassGpluFGAlVAVLsATMGDAAFLLLAspPpsulh..llsluhllGlloGhlVshhatcch..h+st.tptsttspts........................pthphthh........ha.hlllPuhllulh..tuhpl-hsphh.th.thslthhGshh.uh..hhlWAhop.htshpshsspssth....p....lp+ssp-TsFVosWVlsAFLha-lslthsGl-..LtshatthuhhhPLIullIGLlPGCGPQIlVToLYlsGslPhSulluNuISNDGDALFPAIAluPKuAlhATlYoulPAllVGYGaYal .............................uullGslPtCGhulhsss.ahpthlshGsllAlhhuT.s-Ah.lllutt..s...hshh..lhslthlhuhlsGhllshh.hht.h...hp............................................................................................................................................................................................................p.p.ptth.......t...htpuh.tcT.hl.salhhshhhhphhhhhhG.-..lss.htthuhh.PhhuhhlGhlPsCusplllTpLY.lpGslshuu.hushlossGsuLhshht.hs.....ct.t.h..hpl..hs...l.....Ahhluhshh........................................... 0 18 29 42 +11282 PF11450 DUF3008 Protein of unknwon function (DUF3008) Gunasekaran P, Mistry J anon Pfam-B_3521 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.50 21.50 22.00 45.70 19.00 18.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.69 0.72 -3.83 21 166 2009-01-15 18:05:59 2008-08-27 11:37:14 3 1 163 0 47 105 274 57.50 69 82.60 CHANGED AKSKAQQ+AAGAALuAKRG-scpS-L+GAS+pMh-SMSEKELE-hAST++KGKPcHts .AKSpAQQKAAGAALuAKRGEhKhSELpGASKpMa-SMoEKELEEhApTKRKGhPpHhS........ 0 12 24 32 +11284 PF11452 DUF3000 Protein of unknown function (DUF3000) Gunasekaran P, Mistry J anon Pfam-B_3481 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 41.60 41.50 18.10 18.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.02 0.71 -5.03 38 408 2009-01-15 18:05:59 2008-08-27 11:38:31 3 1 407 0 108 282 65 181.80 45 89.57 CHANGED upPstF+cAVsAMpAsslRPEIcLGsIRPP.QRLAPaSaALGA-Vp....cs-...t.hstpu-ssAFGRLILLaDPsGs-AW-GT.hRLVAYlQADLDss.AsDPLLPEVAWSWLs-ALcuR..ssphsALGGTVTuTTSVRaG-IuGPPcAaQlELRASWTAos.....s.DLuuHVpAFs-VLppsAGLP..PtG....VTcL ..........h.Ps.FppAlpuhpu..s....p...hRs-lcltpI.sP.pRLAPaShA..luu-..Vs.....ts-.................................-s.AhGRLILLHDPsGp-AWp......G........T.hRlVAalpADlDsuhAsDPLLPcVAWSWLsDuL...............-u+.............sss.......apAluGTVTussStpFGsluus.....PcupplElRASWTsss.............................DluuHlpAas-lLtpsAGLP...Psslss........... 0 35 80 100 +11285 PF11453 DUF2950 Protein of unknown function (DUF2950) Gunasekaran P, Mistry J anon Pfam-B_002484 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 20.20 20.20 22.20 23.30 20.00 19.50 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.56 0.70 -5.41 42 260 2009-01-15 18:05:59 2008-08-27 11:39:24 3 3 231 0 69 187 20 245.90 54 89.42 CHANGED QpsFsoP-tAspAhssAlss..sDpssLtplLGsshccllssss..hsppshtcFlptapppH..pl.tc.....scscAhLtVGsssWshPlPlVKpusG.WpFDstAGpcEllsRRIGRNELsAIpshhAYlDAQp-YApp...ttstshcYAQ+llSosGK+DGLYWssss.GcstSPLGPhhupus.tpsh..........spsYaGYaaRILs.uQGssAsGGthsYllpGphhtGFALlAWPAcYGsTGVhoFhVspcGpVYp+DLG.cTsphApshppFsPD..tsWphV ........................................................................................QppFsoPEsAAsAFusA..lAs..psEspLssLLG-DWRpaLPs-G..sDs-uVsRF.RDW+cuHcIVpc......-ssAaLsVGc-sWpLPlPhVKcssG.W+FDhAAussEILTRpIGRNELuslQAh+AYVDAQpDYhhp.......sp+aApRIISS-GpKDGLYWPocs..G.-...........sPSPLGPsFSsAu.................ss-GYHGY+FRIlo.spssc...................GhALlAWPh+YG-TGVMSFMVNQDcclYQuDLGc-Tcu+spAITcFsPD..spWQs.............. 1 12 25 46 +11286 PF11454 DUF3016 Protein of unknown function (DUF3016) Gunasekaran P, Mistry J anon Pfam-B_3517 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 27.30 73.70 23.20 19.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.70 0.71 -4.60 32 121 2009-01-15 18:05:59 2008-08-27 11:41:40 3 2 116 0 55 121 21 139.00 37 77.50 CHANGED sG....tVpVpapcPcpapDlc.tusthpsphcpphhppLscplschAp+hLsssQpLclplTDlDLAGchcP......ttss.s-lRlV+-laPPRIshsYpLh.psGpVltp.Gcc+LsDhuF.hppsshhss...csh.tYE+phLsDWhccp ..............G.VpVpWp-PppFpDl+..osptpsthcpchhppLspphpc.As+.LssGQpLclslTDlDLAGchcP...........htusshsDlRlV+.DIYP.PRIshsYpLh.tsG+Vltp.G-c+LsDhuF....h.s.hs..thss...-sh.tYEKphLsDWh+cp.. 0 13 29 42 +11287 PF11455 DUF3018 Protein of unknown function (DUF3018) Gunasekaran P, Mistry J anon Pfam-B_3532 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 21.30 21.30 21.40 21.80 21.20 21.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.16 0.72 -4.04 24 130 2009-01-15 18:05:59 2008-08-27 11:42:13 3 1 117 0 50 103 9 63.50 43 76.54 CHANGED stpRV++HR-+LRstGLRPVQIWVP.DsRsPpFtsEsRRQsthlsp.-tp-sclhtFl-cs.sDh.......-sW .........s.ppRVp+aR-pLRttGLRPlQIWVP.DsRpPcFstEs+RQutLVAt.ctt-s-..sFlDts..sDh.-s........................ 0 10 21 37 +11288 PF11456 DUF3019 Protein of unknown function (DUF3019) Gunasekaran P, Mistry J anon Pfam-B_3539 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 25.00 25.00 50.70 50.60 24.20 19.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.55 0.72 -3.70 30 88 2009-01-15 18:05:59 2008-08-27 11:47:29 3 1 49 0 36 69 8 100.50 29 77.97 CHANGED LploPchCls.ccupsCphslplpWQsss.tshClh.sppsh.hhCapssppsphslphcsspslpFhLhcpcssps....LAssplcVs.s..hpsR+RhRssWsL ..hploPchCls.ccspsCphslplpWQtspstphClh.sppph.hhCapstpptphshphcsppshpFsLl....s.pcsspsLApsclcVs.s..hpsR++hRssWsL... 0 4 13 24 +11289 PF11457 DUF3021 Protein of unknown function (DUF3021) Gunasekaran P, Mistry J anon Pfam-B_3526 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 23.10 23.10 23.10 23.20 23.00 22.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.96 0.71 -4.28 33 949 2009-01-15 18:05:59 2008-08-27 11:48:11 3 2 811 0 85 479 2 136.00 30 95.57 CHANGED M.....h+phlptshhGlslGhhl.llhhhh.ts.....psshss..p.hlshh....................llGhlhuhhohIFcp..-chSlhtphlhHFhsshhshhshshlss.Wh.ht...shlhhhlh..Fl.llYllIWhhhahphppclc+INppL .................................................M.....Kphhp.s.hh..h....G.lhIGhhls....llhs..hlhsst.....hs.hsshos.........hhp.hh......................hIGhlash.us.hlFch....pc.aS...h.hp.t...slhHFhlh...hssals...l...uhlsG.WF.Phph....hhlhhhlh..Fl.llY.ll.IW.hIhahpptpplcpINp.L.............. 0 17 41 63 +11290 PF11458 Mistic Membrane-integrating protein Mistic Pollington J anon pdb_1ygm Family Mistic is an integral membrane protein that folds autonomously into the membrane [1].The protein forms a helical bundle with a polar lipid-facing surface. Mistic can be used for high-level production of other membrane proteins in their native conformations [1]. 22.30 22.30 22.40 134.60 20.30 18.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.58 0.72 -3.58 3 23 2009-01-15 18:05:59 2008-08-27 11:50:52 3 1 23 0 3 19 0 84.00 77 88.55 CHANGED MKVTDQEKEQLSsAID+MNEGLDsFIpLYNESEKDEPLIQLEDETAELl+cApEpYGp-clNpKLNAIIKEILS.lSLc--GcEc MKVTspEKEQLSsAIDRMNEGLDAFIQLYNESEhDEPLIQh-D-TAELh+QARD.YGQEpLNEKLNTIIKQILS.ISLScEGcc.... 0 1 1 2 +11291 PF11459 DUF2893 Protein of unknwon function (DUF2893) Gunasekaran P, Mistry J anon Pfam-B_001947 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.00 22.00 23.60 38.50 20.70 19.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.01 0.72 -4.18 32 244 2009-01-15 18:05:59 2008-08-27 11:53:01 3 2 227 0 72 189 24 65.70 41 26.50 CHANGED SsPERAlLEhLsclscppoh.ccscplhpGLssLRPchLppLLppCpulKspRLFhhhAcctsHsWtp+L ........SsPEhAhhEllssls....pthSF.EcsspLhpGLssLpPctlppLLppspSVpspRLFLahAchhsHtWhpcl...... 0 13 39 62 +11292 PF11460 DUF3007 Protein of unknown function (DUF3007) Gunasekaran P, Mistry J anon Pfam-B_3514 (release 23.0) Family This is a family of uncharacterised proteins found in bacteria and eukaryotes. 25.00 25.00 33.60 40.10 21.90 21.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.21 0.72 -3.95 30 109 2009-01-15 18:05:59 2008-08-27 11:58:10 3 1 100 0 54 107 95 98.30 43 74.84 CHANGED MsRhcVlh....IGlulhllGGluYhshp.hhGl-uhsAGIhupulLVlsl.luWsuSYla..RVloGcMTahpQR+cYccsa............LQcRh-uLosEEhptLht-lpp .MpRhDVlh....IGlGlhlhGslhYhsLp.hhGlDuhpAGhhuQs.lLVlG.l.luWluoYlF..RVsspcMTYtpQh+-YEcth............lpKRl-pLo.-ElptL.t-lE.......................... 0 16 38 50 +11293 PF11461 RILP Rab interacting lysosomal protein Pollington J anon pdb_1yhn Family RILP contains a domain which contains two coiled-coil regions and is found mainly in the cytosol. RILP is recruited onto late endosomal and lysosomal membranes by Rab7 and acts as a downstream effector of Rab7. This recruitment process is important for phagosome maturation and fusion with late endosomes and lysosomes [1]. 21.20 21.20 21.30 21.30 21.00 20.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.03 0.72 -3.82 14 188 2009-01-15 18:05:59 2008-08-27 12:46:05 3 2 72 1 109 162 0 59.20 43 18.62 CHANGED TLQELR-VLpERNcLKuplhllQEELthY+st.hppcpt......sss..t.....t.hsphppEsst+ ...TLQELR-VLpERNELKuplhlLQEELthYK..s.....p.hppcpt............st....h.....p.hsp.p.tp................................. 0 21 29 59 +11294 PF11462 DUF3203 Protein of unknown function (DUF3203) Pollington J anon pdb_1ywy Family This family of proteins with unknown function appears to be restricted to Gammaproteobacteria. 25.00 25.00 87.60 87.40 20.00 17.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.28 0.72 -4.07 5 34 2009-01-15 18:05:59 2008-08-27 13:11:16 3 1 34 1 9 28 1 74.00 55 97.07 CHANGED Msl-IDs...sopsCslslEss+c+usssslcItTDuEtRMSVlsl-GcRlaITEsEADALTVAGAsDuRRHLKADD MoIEIDs...ppthColplEGsRc+u..sls..sl+IsTDsEtRhSVlhlDGcRlaIoEp-AptLsVAGApDpRRHLhADD. 0 2 2 7 +11295 PF11463 R-HINP1I R.HinP1I restriction endonuclease Pollington J anon pdb_1ynm Family Hinp1I is a type II restriction endonuclease, recognising and cleaving a palindromic tetranucleotide sequence (G/CGC) resulting in 2 nt 5' overhanging ends [1]. HINP1I has a conserved catalytic core domain containing an active site motif SDC18QXK and a DNA-binding domain [1]. 25.00 25.00 27.40 27.00 23.00 22.40 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.22 0.71 -4.92 3 24 2012-10-11 20:44:46 2008-08-27 13:12:27 3 1 23 6 3 21 5 187.10 54 83.72 CHANGED MGYDL-cIEpVKAsVLSG.YKADVNVlILVphK..csLDlcNIQVKLVSNp+GFNQIDKRWL+oYQEMWNFPcNIYcILpYFTGELcPp+pcsK.DKRRMFhsEFSQEEQsclLDWL--NKlLILoDILKGRG-FAAEWVLVIQKlsNNh+WlLKNINEVlQHY.suG-VplSP+GSLKIGRVTIQRKGGDNGRESANMLQFKIDPTcLL ...............MGYsLspIcpVKAslLsG.YKADINl.lhlhhK..pslDlcNIQVKLVS.N.p+GFNQIDKRWlKsYpEhWphscsIhplLpYFTGELp......t.sp+RhhhsEFoppEQ.hlLsWFppNKlLlLoDIL+GRG.-FuAEWhLVhQ.Klsp.N.+WsLKNINEVlQHY..ssG-V.ISP+GSL.KIG+...lThQRKGGDNGR.oANMLQFKIDPsELh....................... 0 1 2 3 +11296 PF11464 Rbsn Rabenosyn Rab binding domain Pollington J anon pdb_1yzm Domain Rabenosyn-5 (Rbsn) is a multivalent effector with interacts with the Rab family.Rsbn contains distinct Rab4 and Rab5 binding sites within residues 264-500 and 627-784 respectively [1]. Rab proteins are GTPases involved in the regulation of all stages of membrane trafficking [1]. 20.60 20.60 20.80 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.88 0.72 -4.39 6 155 2009-01-15 18:05:59 2008-08-27 14:31:52 3 5 98 4 111 146 0 41.40 48 9.63 CHANGED LLQQIsNI+uYI+QA+tutRhDEVcsLpcNLRELQcEhtcQQ .........LLQQIpNIcuaIcpA+pssRhDEVcsLpcNLRELpcchtpQp...... 0 25 38 74 +11297 PF11465 Receptor_2B4 Natural killer cell receptor 2B4 Pollington J anon pdb_1z2k Domain 2B4 is a transmembrane receptor which is expressed primarily on natural killer cells. It plays a role in activating NK-mediated cytotoxicity through its interaction with CD48 on target cells in a subset of CD8 T cells [1]. The structure of 2B4 consists of an immunoglobulin variable domain fold and contains two beta-sheets. One of the beta-sheets, the six-stranded sheet, contains structural features that may have a role in ligand recognition and receptor function [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.05 0.72 -10.36 0.72 -4.13 4 44 2012-10-03 02:52:13 2008-08-27 14:39:07 3 2 25 6 18 121 0 106.40 44 31.50 CHANGED CsDSuEEVVGVSGKPVpLRPSNIQTKcVSlpWKKph..SHsphc.IlsWsNsuspssshsSSDIYGF-scsFALSIKSAQLQDSGHYLLElTspsGpVCTKNFQlLIhD ........................C.sSu-cVVulSGpPlpLp.Ps..slQTK.h.h.S..lpWKhph.....Spsphc.I..LpW.cNsus...sp........s...s............s...c....pau..F.p....hc....shuL.....IKuAQ.QDSGhYhLElTs.psGpV.pstpFQV.lFD................ 0 1 1 3 +11298 PF11466 Doppel Prion-like protein Doppel Pollington J anon pdb_1z65 Family Dpl is a homologue related to the prion protein (PrP). Dpl is toxic to neurons and is expressed in the brains of mice that do not express PrP. In DHPC and SDS micelles, Dpl shoes about 40% alpha-helical structure however in aqueous solution it consists of a random coil. The alpha helical segment can adopt a transmembrane localisation also in a membrane [1]. The unprocessed Dpl protein is thought to posses a possible channel formation mechanism which may be related to toxicity through direct interaction with cell membranes and damage to the cell membrane [1]. 25.00 25.00 27.40 27.40 18.20 17.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.19 0.72 -7.10 0.72 -4.38 7 74 2009-09-11 05:32:31 2008-08-27 15:40:05 3 1 51 1 14 79 0 29.70 73 16.75 CHANGED MRKHLGshWLAllChLLhScLSsVpARGIK ....MRKHLGGCWLAIVCVLLFSpLSuVKARGIK 0 1 1 2 +11299 PF11467 LEDGF Lens epithelium-derived growth factor (LEDGF) Pollington J anon pdb_1z9e Family LEDGF is a chromatin-associated protein that protects cells from stress-induced apoptosis. It is the binding partner of HIV-1 integrase in human cells. The integrase binding domain (IBD) of LEDGF is a compact right-handed bundle composed of five alpha-helices. The residues essential for the interaction with the integrase are present in the inter-helical loop regions of the bundle structure [1]. 24.00 24.00 24.30 24.80 23.50 23.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.24 0.72 -4.34 9 180 2009-01-15 18:05:59 2008-08-27 15:40:40 3 4 78 27 106 162 0 106.50 42 20.50 CHANGED EccLQ+LHsEIK.uLKlcssDlcKClcAL-ElusLpVTophLQKNs-slsTLKKIRRYKus..................QslMcKAstlYs+FKshFl.scsEohhshs.phsppEpc.p-pscp .........-p+LQ+lHu-IK.uLKlDssDVp+CLpAL-ELusL..QVTpQhLQKpo-hlsTL.KK......IRRaKus.......................................p.slM-KAshlYs+aKshFLhscs-sh.st..shs.ts.t-pttcc.t..p..................... 0 19 25 69 +11300 PF11468 PTase_Orf2 Aromatic prenyltransferase Orf2 Pollington J anon pdb_1zb6 Family In vivo Orf2 attaches a geranyl group to a 1,3,6,8-tetrahydroxynaphthalene-derived polyketide during naphterpin biosynthesis [1]. In vitro, Orf2 catalyses carbon-carbon based and carbon-oxygen based prenylation of hydroxyl-containing aromatic acceptors of synthetic, microbial and plant origin [1]. 22.00 22.00 175.70 175.40 21.40 20.90 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.80 0.70 -5.20 12 25 2009-01-15 18:05:59 2008-08-27 16:02:56 3 1 21 8 8 28 0 292.20 28 93.44 CHANGED hDhp+hhuslctsAthlssPhu+cpshslLssFp-sFsp..Gslla+sss..phssplsYRahs..sh.DshuhAlstGLl.scssHPlssLlschssLhs.ssp.usDhssstGhpKhWsaFss..hpsluclhulPuhPtulttptchFtchGL.-+VphlulDYpp+ThNlYF....tutGslstppstuhh+.sGhssPup....thltaspc.....uaslhsTLsa-oucI-Rlsahshph...sssphPA.lpsclc+F...lcssP.t..tph...shuhsausptcYlKhps.Yphshsphhsh.s.tshs ...hshschhuslccsAthlssshu+cpVhslLssFp-sFsp..uV.lhaRsTo..psscsLsaRFh.s..ssh.DPhuhAlspGLl.scssHPlusLls-lpshhP.hsttulDFulspGhpKsWsaFss..hpsluclhulPuhPtultsptchFt+aGL.-+VphlulDYpp+ThNlYF.......hssGshstcslpuhlp-sGhs...sPop....phlthspc.....uaslYsTlsWDSscIcRlsasshsh...sst...slPupl..pspl-+F...scsAPhtspscp..hlhuhuhus....pscYhKlps.Yphp.h.h.........s.............. 0 2 4 7 +11301 PF11469 Ribonucleas_3_2 DUF3204; Ribonuclease III Pollington J anon pdb_1ztd Family This is a family of archaeal ribonuclease_III proteins. 25.00 25.00 155.20 155.00 23.90 17.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.47 0.71 -3.90 3 14 2012-10-03 08:45:47 2008-08-28 09:06:36 3 1 14 2 12 17 0 118.40 62 88.42 CHANGED KGLAKLGDSLINFLaSLALTEaLGKPTG-RVPNASLAIAL-hoGLSKll+PRsDKHAKGDhAEALlAYAWLcGpISpEEAVEILppNLss-VhcFoRKKEsIG+ALAsLh-hIuERLsuc KGLuKFGDSLlNFlaSLALoEaLG+PTGsRVPNASLAIAL-hAGLp+hl.PRsDKHGKGDhAEAlIAYAWLEGtITlEEAVEIl+pNhopDVhcFSRKKEAIGpAhA.LL+lIuERLs..h. 0 2 2 7 +11302 PF11470 TUG-UBL1 TUG; GLUT4 regulating protein TUG Pollington J anon pdb_2al3 Family TUG is a GLUT4 regulating protein and functions to retain membrane vesicles containing GLUT4 intracellularly. TUG releases the GLUT4 containing vesicles to the cellular exocytic machinery in response to insulin stimulation which allows translocation to the plasma membrane [1]. TUG has an N-terminal ubiquitin-like domain (UBL1) which in similar proteins appears to participate in protein-protein interactions [1]. The region does have a area of negative electrostatic potential and increased backbone motility which leads to suggestions of a potential protein-protein interaction site [1]. 23.30 23.30 23.30 24.30 23.20 22.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.69 0.72 -4.05 28 216 2012-10-03 10:59:06 2008-08-28 09:53:01 3 9 197 1 158 213 3 62.90 38 12.64 CHANGED sls.ss+ptplKVosusslppVLppuCcKaslcss.....casLp+ps.Kh....lDLSLsaRhusLsssu+LEL ...........slssstRRtplKVoPsp.hLhpVLp-u..CpKhs..lsss.......pasLK..app..Kh....l.DLSlsaRhusLsssA+LEl...... 0 41 75 126 +11303 PF11471 Sugarporin_N DUF3205; Maltoporin periplasmic N-terminal extension Pollington J, Coggill P anon manual Domain This domain would appear to be the periplasmic, N-terminal extension of the outer membrane maltoporins, Pfam:PF02264, LamB. 21.60 21.60 21.60 21.60 21.50 21.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.00 0.72 -4.32 19 821 2009-01-15 18:05:59 2008-08-28 09:54:01 3 6 505 0 61 363 7 58.20 36 11.80 CHANGED Mp.phh.lslhslLhhssh.....tAhAstLTlEQRLtLLEccLppscpELppscpchpc.hcp ..................pp..luhhlhL.hssss....shpu.hA.p...p.L..T......lEQRLtLLEpcLppsc.pchpcscsch+p.hh.p...................... 0 4 15 34 +11304 PF11472 DUF3206 Protein of unknown function (DUF3206) Pollington J anon pdb_2au5 Family This bacterial family of proteins has no known function. 23.60 23.60 23.60 243.30 22.70 23.50 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.73 0.71 -4.36 2 13 2009-01-15 18:05:59 2008-08-28 09:57:28 3 1 13 1 2 3 0 128.00 91 94.76 CHANGED IlSTpKtPNFpYp-.hcpFL.NhLAFohGhhTtDhSpFsP.VLt.MEc-PsWLpEusuhhQullVtSLl-stNassstpLhsEhspLlpLY.thtpcpLTpNp-sLalslaDKFhhLLLssDEhIh.L IISTNKAPNFQYTDEMDRFLMNTLAFSVGLVTEDYSTFDPEVLKIMEEEPDWLQESVAWCQSLVVGSLVDSGNYDDTGELMDEFNCLLNLYDRARQRELTSNEDNLFLNIHDKFLALLLTDDELITNL 0 1 2 2 +11305 PF11473 B2 RNA binding protein B2 Pollington J anon pdb_2az0 Family B2 is expressed by the insect Flock House virus (FHV) as a counter-defense mechanism against antiviral RNA silencing during infection. In vitro, B2 binds to dsRNA as a dimer and inhibits the cleavage of it by Dicer. B2 blocks cleavage of the FHV genome by Dicer and also the incorporation of FHV small interfering RNAs into the RNA-induced silencing complex [1]. 20.50 20.50 20.70 21.20 20.40 19.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.56 0.72 -4.39 2 10 2009-09-11 16:34:22 2008-08-28 09:58:13 3 1 6 6 0 12 0 74.50 55 62.45 CHANGED M.SKLALIQELPDRIQpAVEsshuMSYQ-APNNVRRDLDNLpACLNKAK.TVsRMVTSLL-KPShsAYLEGKs ............SphtLlQ-LPc...pIQpAVcsAhuhshpssPspVt+DLDNh+ACLsKhctTshRhssSLLsKPpVVAhLcGcA.. 0 0 0 0 +11306 PF11474 N-Term_TEN Telomerase reverse transcriptase TEN domain Pollington J anon pdb_2b2a Family This is the N terminal domain of the protein telomerase reverse transcriptase called TEN. The TEN domain is able to bind both RNA and telomeric DNA and contributes towards telomerase catalysis. The TEN domain has a structure that consists of a core beta sheet surrounded by seven alpha helices and a short beta hairpin [1]. 25.00 25.00 417.10 417.10 19.70 18.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.42 0.71 -4.91 2 2 2009-09-11 09:41:36 2008-08-28 10:51:22 3 1 2 3 1 3 0 188.00 100 16.91 CHANGED INNINNNKQMLTRKEDLLTVLKQISALKYVSNLYEFLLATEKIVQTSELDTQFQEFLTTTIIASEQNLVENYKQKYNQPNFSQLTIKQVIDDSIILLGNKQNYVQQIGTTTIGFYVEYENINLSRQTLYSSNFRNLLNIFGEEDFKYFLIDFLVFTKVEQNGYLQVAGVCLNQYFSVQVKQKKWYKNN INNINNNKQMLTRKEDLLTVLKQISALKYVSNLYEFLLATEKIVQTSELDTQFQEFLTTTIIASEQNLVENYKQKYNQPNFSQLTIKQVIDDSIILLGNKQNYVQQIGTTTIGFYVEYENINLSRQTLYSSNFRNLLNIFGEEDFKYFLIDFLVFTKVEQNGYLQVAGVCLNQYFSVQVKQKKWYKNN 0 1 1 1 +11307 PF11475 VP_N-CPKC Virion protein N terminal domain Pollington J anon pdb_2bai Family This is the N terminal domain of a family of virion proteins which contains a zinc finger domain. Currently no function is known. 21.10 21.10 81.60 80.60 20.50 17.00 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.72 0.72 -4.41 5 30 2009-01-15 18:05:59 2008-08-28 11:34:25 3 2 7 1 0 33 0 32.00 93 1.67 CHANGED MATTMEQEICAHSLTFEECPKCSALQYRNGFY MATTMEQEhCAHSLTFEECPKCSALQYRNGFY 0 0 0 0 +11308 PF11476 TgMIC1 Toxoplasma gondii micronemal protein 1 TgMIC1 Pollington J anon pdb_2bvb Family TgMIC1 is released as part of a complex by Toxoplasma gondii prior to invasion. The complex which consists of TgMIC4-MIC1-MIC6 participates in host cell attachment and penetration and is critical in invasion. This is the C terminal domain of TgMIC1 which has a Galectin-like fold which interacts with and stabilises TgMIC6 providing a mechanism for an exit from the early secretory compartments and trafficking of the complex to micronemes [1]. 25.00 25.00 29.50 252.00 19.10 17.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.57 0.71 -4.64 2 5 2009-01-15 18:05:59 2008-08-28 11:35:37 3 1 3 2 3 5 0 137.00 66 29.94 CHANGED chclhGDShpAhLctGQQLhlTa.SspLcVuVGSCHpLssNF.DhaLpFpTsSpSu.D.VEl--sAGsu.LTIGLGppGRlsVVhpYsp.suu.t.sAYsVtDSGCpo.EtV.hpGlssGApLVhsTlG-sssu.St KTEIHGDSTKATLEEGQQLTLTFISTKLDVAVGSCHSLVANFLDGFLKFQTGSNSAFDVVEVEEPAGPAVLTIGLGHKGRLAVVLDYTRLNAALGSAAYVVEDSGCSSSEEVSFQGVGSGATLVVTTLGESPTAVSA 0 2 2 3 +11309 PF11477 PM0188 Sialyltransferase PMO188 Pollington J anon pdb_2c83 Family PMO188 is a sialyltransferase from P.multocida. It transfers sialic acid from cytidine 5'-monophosphonuraminic acid to an acceptor sugar [1]. It has important catalytic residues such as Asp141, His311, Glu338, Ser355 and Ser356 [1]. 25.00 25.00 28.50 39.90 21.00 20.80 hmmbuild -o /dev/null HMM SEED 381 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.21 0.70 -5.64 4 21 2009-01-15 18:05:59 2008-08-28 11:52:36 3 3 19 18 3 32 0 347.70 37 81.47 CHANGED TlElYlDhAoLPoLpQhhchIQpp--h.ss.RlhuhuRasIssshlsc..hNhpFa..hsN+spstllstL.DphstspphhplplahNhhHulsll.sIhshh.pt.cKloh.cLsLYDDGStEYVsLaphpchs-hppp..Iptpcs.LtphLssspsphsNshhsRYsWpplasopYHhLptDaF-pp..LpsLKchLupshppMcWstasphsspQpshahsllGFssEh.ppQhhts.pssFIFTGTToasus.-hh-hhAQQQlslLNctppssSshalG..psYcLaFKGHPsushlNchIhsph.shIpIPApIsFElLhMTshLPDpVGGhASSlYFolPs-plN+llFhoSDp.p.+p-hLsss....LlpVMlpLsIlsEs ............................................lplYlD.AoLPsLpQhhchhpppc-h.pp.RlhuhuRatls-s.lpp...Nh.phh.hhsN+ss.....ptLhshl.cphsts...lplclahNhuHShplltPIhthh.pths+lpIppLsLYDDGShEYVcL.p.pcstshptp.............lptucppLpphL.ssp.phsN.shhspYsWpphaPspYHhLppDYF-pt..LpsL+cYLupshppMcWssapp..LospQpshahslVGFs.sEh..pphhpsppssFIFTGTToapus.c.h-hhAQQQlNllNchhptpushalG..spYclaFKGHPp..uu.lNchIhssh.shhpIPApIsFElLhMTGh.LPDpVGGlASSLYFSlPpE.KlsaIlFToscp.p.+cDhLpss....hlpVMhpLsIlccp......... 0 0 1 2 +11310 PF11478 Tachystatin_B Antimicrobial chitin binding protein tachystatin B Pollington J anon pdb_2dcv Family Tachystatin B is an antimicrobial chitin binding peptide and consists of two isotopes B1 and B2.Both structures contain a short antiparallel beta sheet with an inhibitory cysteine knot motif. Tyr(14) and Arg(17) are thought to be the essential residues for chitin binding [1]. 25.00 25.00 106.00 105.90 18.70 18.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.35 0.72 -4.23 2 2 2012-10-01 22:06:18 2008-08-29 08:54:07 3 1 1 2 0 2 0 42.00 95 100.00 CHANGED YloCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF YloCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF 0 0 0 0 +11311 PF11479 Suppressor_P21 RNA silencing suppressor P21 Pollington J anon pdb_2cwo Family P21 is produced by Beet yellows virus to suppress the antiviral silencing response mounted by the host. P21 acts by binding directly to siRNA which is a mediator in the process. P21 has an octameric ring structure with a large central cavity [1]. 28.90 28.90 29.00 351.60 21.10 28.80 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.10 0.71 -4.53 2 5 2009-01-15 18:05:59 2008-08-29 08:55:10 3 1 2 4 0 6 0 173.80 91 100.00 CHANGED MKFFhpDGETSRAlSRSESLLRRVKELGTNS.QSElSEClsEFNELApFNHLLVTVEHREWMEpHPpQSScLRsPSRlGEMLKEIRAFLKVRVVTPMHKETAS-TLNAFL-EYCRIsGLsREDALREKMRKV+SsVLFHHSELLKFEVTENMFSaTELLKLNLSLRVISSQILGhAl MKFFhpDGETSRAISRSESLLRRVKELGTNSPQSEVSECINEFNELARFNHLLVTVEHREWMEKHPKQSSELRsPSRLGEMLKEIRAFLKVRVVTPMHKETASETLNAFLEEYCRITGLTREDALREKMRKVRSTVLFHHSELLKFEVTENMFSFTELLKLNLSLRVISSQILGIAV 0 0 0 0 +11312 PF11480 ImmE5 Colicin-E5 Imm protein Pollington J anon pdb_2dfx Family Imms bind specifically to cognate colicins in order to protect their host cells [1]. Imm-E5 is a specific inhibitor protein of colicin E5. It binds to E5 C-terminal ribonuclease domain (CRD) to prevent cell death. The binding mode of E5-CRD and Imm-E5 mimics that of mRNA and tRNA suggesting an evolutionary pathway from the RNA-RNA interaction through the RNA-protein interaction of tRNA/E5-CRD [1]. 25.00 25.00 31.30 31.30 20.60 17.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.64 0.72 -4.16 5 26 2009-01-15 18:05:59 2008-08-29 08:55:51 3 1 21 2 5 23 0 78.30 50 81.21 CHANGED MKLSsKAAIEVCpcAAK+GLsIu+IEGGIWHpP..GFEARlDsIWDGhDsPlDh.oclscNNppAtEsI+-D.SsGHDAFIVTlsu ..........MKLSsKAAIEVCpcAsK+GLhIhtI-GGhWhsP..GFchc.ss.Ws..hD.P.ch..splscNNplAhEsI+-DhpsGasAFIlTlt... 0 0 2 4 +11314 PF11482 DUF3208 Protein of unknown function (DUF3208) Pollington J anon pdb_2ebe Family This bacterial family of proteins has no known function. 25.00 25.00 104.60 104.30 16.80 16.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.52 0.72 -3.92 4 19 2009-01-15 18:05:59 2008-08-29 09:07:12 3 1 19 6 13 20 2 107.40 58 95.15 CHANGED htAVRLhQGYLWHP+uhslDLcuhLPtEls..........sA+lLWDpVsPPhsFFEsGpPTtoQpFYQhTlLhlh-E.P..EuL+s.AEtAupALG.lLEGhPPGVGW.LhEDLRsL ....hpAVRLhQGYLWHP+-hslDLcshLPtEls...........sA+lLhDpVsPPhsFFEDGTPTtoQpFYQlTlLhls--tP..-uL+PhAptsupuLsslLEuhPsGVGW.LhEDLRsL. 0 3 8 13 +11315 PF11483 DUF3209 Protein of unknown function (DUF3209) Pollington J anon pdb_2ehw Family This family of proteins has no known function. 25.00 25.00 25.70 82.10 23.90 22.20 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.55 0.71 -3.78 5 29 2009-09-11 16:33:41 2008-08-29 10:10:54 3 1 29 4 14 28 0 122.20 47 99.94 CHANGED MoCHEIEALRLGLMNVLGTsD-uARpHAccELEGcL..cGPIEALAEA-oLAAlcRHLDAALVDLEEElAAT-c--PEYDYLRGRLVAVRDAEpALpRLsscGEAlLsDLGEoHcsLHEsFPVDE MuCaElpALRLGlMNlLGhcD-ssppHtcpELtstL....pGPIcuLA-AcoLsulpRaL-uALlDLEEclAst-s-DPchsYhRGhLlAV+csEtpLppLptpu-ulhcDLsEhHchLHEhFPscc. 0 4 10 13 +11317 PF11485 DUF3211 Protein of unknown function (DUF3211) Pollington J anon pdb_2ejx Family This archaeal family of proteins has no known function. 21.60 21.60 22.30 22.80 21.20 21.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.62 0.71 -4.13 11 43 2012-10-02 19:24:03 2008-08-29 10:24:31 3 1 17 1 16 38 2 130.40 35 98.28 CHANGED M.chplpIpTsH-h-ulhpILSDPpFslPplhPshKplp.hpssoFpu.Gc....ahhhsach+Gslalu.sclsYsaplsss.................sstGsG+Lphsh.ppsclplplEY-GhhE+h.u.shlc+hlpchtccl-EcIRhERIKRKI ....M..phsh.TpHDhpslhcILSDP.Fhl.tlLss.cplp.spsspFcs.ss.....huhhsllhpGshYlGss.ploYshphttt...................ssuG+lphph.pcs...cIplhl.-a-Ghhtphst.hhlpp+lpchhcph-EclRLERIKRKI.................................................................... 0 2 2 14 +11318 PF11486 DUF3212 Protein of unknown function (DUF3212) Pollington J anon pdb_2euc Family Members in this family of proteins are annotated as YfmB however currently no function for this protein is known. 21.50 21.50 24.40 23.40 19.90 19.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.69 0.71 -4.35 4 24 2009-09-10 16:55:32 2008-08-29 10:36:29 3 1 24 2 5 22 0 116.00 57 76.49 CHANGED MaYFSPEQQaNAWlVSDLVKQlFp+cstppssh+cLssFAEc+F+IsIDaVFSIIhNIGDIEp.hlsp-hEchLSSYLssLhPaVTtDMhcsS+pNAppYL.+E+ssDVY+LFhs.sshhph .........hpYFSPEQQaNAWlVSDLVKQlF+++stCssGI+ELssFAE-+FHIsIDFVFSIIhNIGDIE..sLspEIEspLSoYLouLhPhlTADMhcoSKsNAatYLp+E+ss-lY+LFh..sshhph 0 1 2 3 +11319 PF11487 RestrictionSfiI Type II restriction enzyme SfiI Pollington J anon pdb_2ezv Family SfiI is a restriction enzyme that can cleave two DNA sites simultaneously to leave 3-base 3' overhangs. It acts as a homo-tetramer and recognises a specific eight base-paid palindromic DNA sequence. After binding two copies of its recognition sequence, SfiI becomes activated leading to cleavage of all four DNA strands. The structure of SfiI consists of a central twisted beta-sheet surrounded by alpha-helices. 25.00 25.00 206.60 206.30 18.30 17.30 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.74 0.70 -5.22 2 6 2012-10-11 20:44:46 2008-08-29 10:52:14 3 1 6 4 3 7 0 235.80 54 95.61 CHANGED hcpcYhc.s.-clEplEKpTLRhlVQAl.pYppcAppIFEppss.sSs....luEDITp.ALEhhtha.IspRhhGhIDYKpspaL.pPchhl.QsLhVDuKAppttsps.TLQhSQlsM..chRpppSGcslshputl.P...L.phNDs...hlTToIFV+a.YR....VpuR.+ELpSIhVhulPpuhLpppYNPDsssohahAG+cuPsRtEhhRlRl.FDRLKctssWRlQ.l.hssDs.a....W..........st ..........LppsL-+LEEIEKsTLRLVVQAIYDYRcpAlEIF+cEuDLsSD....IuEDITREALDRLGMsRIDpRLFGKlDYKRACYLFHPDYAl+QALFVDSKAEKsuspu.ATLQhSQLSM..tV+QpRSGpsVslpGchPT...lIolcsp..+YLTTTlFVKYNYcc....-uts+pLKSIlVAAVPNGhLQDRYNPsPpDTIWtAGRNAPohGE-FRVRLSFSpLKcKAAWRVQsIPMPP-s.a..................... 0 2 3 3 +11320 PF11488 Lge1 Transcriptional regulatory protein LGE1 Wood V, Coggill P anon Charwan C Family This family of proteins is conserved from fungi to human. In yeasts it is involved in the ubiquitination of histones H2A and H2B. This ubiquitination step is a vital one in the regulation of the transcriptional activity of RNA polymerase II. In S. cerevisiae, Rad6 and Bre1 are present in a complex, also containing Lge1, that is required for H2B ubiquitination. Bre1 is the H2B ubiquitin ligase that interacts with acidic activators, such as Gal4, and recruits Rad6 and its binding partner Lge1 to target promoters [1]. In S. pombe the equivalent protein to Lge1 appears to be Shf1. 21.80 21.80 21.80 22.20 21.10 21.30 hmmbuild --amino -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.64 0.72 -3.79 20 157 2009-01-15 18:05:59 2008-08-29 11:04:03 3 1 85 0 81 150 0 100.90 37 32.56 CHANGED sPal.............tlhplc.............Dpspppc..............lcptap-hs......plDpcLcpLptphh+hE.hphshLpspsp+-uLpVQLT.pEK........L-shhhh ...........................................................................................................................s.ah..............h.ch-ttsp............Dpsppsc.....................lpsRsKAIs......oKsKEIEpVY+QcsETFGMVVKMLlcKDPsLE+sIQhsLcpN........L-Elhh..................... 0 7 20 48 +11321 PF11489 DUF3210 Protein of unknown function (DUF3210) Wood V, Coggill P anon Charwan C Family This is a family of proteins conserved in yeasts. The function is not known. The Schizosaccharomyces pombe member is Swiss:O94497 and the Saccharomyces cerevisiae member is Swiss:P40563. 17.50 17.50 22.90 20.60 17.00 16.60 hmmbuild -o /dev/null HMM SEED 711 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.48 0.70 -5.84 15 113 2009-01-15 18:05:59 2008-08-29 11:05:14 3 5 100 0 90 116 0 597.80 28 77.07 CHANGED P+IPPRP.s+Rh-RSsSP....ssaAPSPLN-.s.............ptp.s.p...sPpRPsSV.slPSlGEEGtEYpslsssshoss........soPsp..oRsVusDL+LHAP+PSlPsuoAKu+lpAVTRTDSppAsAsGhG.tsuuPttcshtcpsocSLpspsosopssSusspstp..Sht..s-EHGI.PEIGQpVPM.hsNAGDVQAPSPuPhp.............pt.tptHtRppSuRt..uLPPGSYGLHGHGl.ss-+FEKAaYEKHP--hs+E-pupatsslGpsRs.DaAlSuDDLN+IV+toAspGsGLus..thsuTPpEplGahAs-EhopRhuoPsPcSsp.................hcsPl+.p..s.t...............................hIHlDpPh+p...............tp..tt.sttt.s.......-pPILAuD.....EVphcsuhpp...PAVSPohc+c............p...ptp..SsscSRssS+ssshptshst.hc...........psPLEDVcEYEPLFPED-pcsh.ctshspss..........................+.c.+sp.hp++FPSpDlWEDoPsShphp.usVSTP-..................tpcshEsPcQpssRcoptsshcs.....................pps.....psc.....ptpcstsRsph.pQRFPS...RDlWEDuPESppLsTTlpss...................................--hpspSPtss.sKPslPu..RPp...+.............s.csttps.s.spcc+pPPslPsRP..................KPQIPARPuKstsppsu-.......pstp.s............sKsKPsVPuRPtGSKIAA..L+AGFluDLNuRLpLG........PQuPp+.....ppppct...............ppptsssEKuPLSDA......RKGRARGPtRR+PAst...............sst+hsshsplshspsassWpls .............................................................................................P.lPsRP..pt..ps..s............SPhs.......................................+..s...h..hs.p..-ht.....................s.p...t.htp-..h.AspPslPstpsptph.sVTps-sp.utthh.....s.s...t.........................................t..t...................tpcpuI..plu.pVPh...phGsVQAPoPus.t.................t.p.p..St......Psuohuh+sHt......psth-pshhpKHPc.hth.pts...........c..phuhSspcLNclV+pst......s...t..usPp-pluh.t.-chspRhsss.sts................................................................lHlc...p.........................................................t.............................ptPlLAsD.....Elt.pst..t...PAlsPthtpp...........................u.s...ps...+.ps+s....t.....s....t........................ttslcclcEhEPLF..---tp....p...tttp.................................................................p....p.........pcFPS..pDlWE...D.sPsuhphp..spVoosc...........................t..psPt....pt.tppppt.t.p...................................t.......t.t.p...tt.c..t........pp+..FPS...+DlWEDs.P-Stphtsplp.s.......................................pp..t.pss..t....pp.P..tlPt...RPp..................................................s.p.s.....t..s...ttp+tsPslPc+P..................KPp..lPsR.sp.tpppstp.........p....s...................s+sKPslPsR.P....s..GuK.....IAA..lpAGFhscLNs+LpLG............Ptssp...........tpp.t....................................pttp.tc.+..sPLu.DA......RKGRARGPtRRtP..sh...........s...ssstc..hstssp.hph..shshWpl............................ 0 15 45 72 +11322 PF11490 DNA_pol3_a_NII DNA_pol3_alph_N; DNA polymerase III polC-type N-terminus II Finn R, Coggill P anon Pfam-B_853 (release 23.0) Domain This is the second N-terminal domain, NII domain, of the DNA polymerase III polC subunit A that is found only in Firmicutes. DNA polymerase polC-type III enzyme functions as the 'replicase' in low G + C Gram-positive bacteria [1]. Purine asymmetry is a characteristic of organisms with a heterodimeric DNA polymerase III alpha-subunit constituted by polC which probably plays a direct role in the maintenance of strand-biased gene distribution; since, among prokaryotic genomes, the distribution of genes on the leading and lagging strands of the replication fork is known to be biased [2]. It has been predicted that the N-terminus of polC folds into two globular domains, NI and NII. A predicted hydrophobic surface patch suggests this domain may be involved in protein binding [3]. This domain is associated with DNA_pol3_alpha Pfam:PF07733 and DNA_pol3_a_NI Pfam:PF14480. 22.10 22.10 22.10 22.10 21.90 22.00 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.30 0.71 -4.48 63 1162 2012-10-02 15:09:17 2008-08-29 11:09:41 3 24 1157 0 139 793 71 115.20 23 8.07 CHANGED sYappllpphsp.tssshpshLpppphph.cssclh.lhstschhtshl+ppthshlhpphhphGFs....hphpsplpp.p.spp.h....ppappp+tpc-p...phspp...shpphpppppptpppppp ............pYappslppsst..to..s..hh+uh.hppppsph..ps.spLh.l.sssslppsahccpt.hsslhcpapphGFs.....phphphchsc.t.s....ppph.............psactpppppsp...ptspp...........Ahcthcphcttts..tp.p........................................ 0 44 85 113 +11323 PF11491 DUF3213 Protein of unknown function (DUF3213) Pollington J anon pdb_2f40 Family The backbone structure of this family of proteins has been determined however the function remains unknown. The protein has an alpha and beta structure with a ferredoxin-like fold [1]. 20.30 20.30 21.30 86.90 19.80 18.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.01 0.72 -4.11 2 12 2009-01-15 18:05:59 2008-08-29 11:11:46 3 1 12 1 9 16 0 87.70 47 91.88 CHANGED h.lKF.ssls.EcApIhQYELppc-chaRlFlNsYA+su.VlhD-phhshEEL...hEcLKsEVltEKplTlQELIEtShSWNNVh.S+u .l.pl+F.GpIs.EEAphhQYELppctuVaRlFlNGYA+sGhVlFD-pplscEclLchLcchcPEVlpE+clTlpELIEpShSWpNlh....t.. 0 1 1 6 +11324 PF11492 Dicistro_VP4 Cricket paralysis virus, VP4 Tate JG, Finn RD anon pdb_1b35 Family This is a family of minor capsid proteins, known as VP4, from the dicistroviridae. The dicistroviridae is a group of small, RNA-containing viruses that are closely structurally related to the picornaviridae. VP4 is a short, extended polypeptide chain found within the viral capsid, at the interface between the external protein shell and packaged RNA genome[1]. 22.60 22.60 23.40 40.80 22.30 20.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.83 0.72 -3.96 11 50 2009-01-15 18:05:59 2008-08-29 11:12:18 3 7 19 1 0 54 0 57.20 45 6.17 CHANGED uusEhpps....GsIScsAosVuplAssls..plPllGshA+s....spWlustVuslAplFGa ......soEssp.....GsISclAouVssVANsl-..pIPllGpIAKP....lpWVushVusVAulFGa 0 0 0 0 +11325 PF11493 TSP9 Thylakoid soluble phosphoprotein TSP9 Pollington J anon pdb_2fft Family The plant-specific protein, TSP9 is phosphorylated and released in response to changing light conditions from the photosynthetic membrane. The protein resembles the characteristics of transcription/translation regulatory factors. The structure of the protein is predicted to consist of a random coil [1]. 25.00 25.00 43.60 43.40 21.10 20.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.78 0.72 -3.43 5 29 2009-09-11 09:35:24 2008-08-29 11:52:16 3 1 16 1 15 31 0 77.20 43 68.38 CHANGED AT+G.GoAss+EEKuhhDalhGhlhKcDQhhETDPlLpKVDtKuPSGSTsu+KAsu+uPAsSA.....Ac-EGGsGGFsLGuLFAK ..........stt.tuusucEEKGlhDaIhGtlhKc-Q.h.lETDPlLpKV-pKs.u.Gs.....osup....Kus..ssustpts......tcccGGsGG.....hG...GLFuK... 0 1 7 11 +11326 PF11494 Ta0938 Ta0938 Pollington J anon pdb_2fqh Family Ta0938 is a protein of unknown function however the structure has been determined. The protein has a novel fold and a putative Zn-binding motif. The structure has two different parts, one region contains a beta sheet flanked by two alpha helices and the other contains a bundle of loops which contain all cysteines in the protein [1]. 22.80 22.80 22.80 23.00 22.70 22.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.33 0.72 -3.92 4 21 2009-01-15 18:05:59 2008-08-29 13:09:29 3 1 20 1 8 17 1 99.50 61 93.18 CHANGED M...KIlVNG+EAGTKEKGCALCGuTWGDYYE-lDGE+LFFCCDICAhEFlNMlsEVKK+TuWs+lDELlINGNY.pGRsC.uKsGs+ch+FYVKFs--AsIcTF+.hs .....MKIlVNG+EAGTKEsGCALCGGTWG.DYYEEIDGEKLFFCCDlCA.LEFVNMlNEVKKRTsWSRIDELlI..NGNYYTGRTCsAKsGs+EYKFYVKFNDDAsIETFKEls.................. 0 3 4 7 +11327 PF11495 Regulator_TrmB Archaeal transcriptional regulator TrmB Pollington J anon pdb_2f5t Family TrmB is an alpha-glucoside sensing transcriptional regulator. The protein is the transcriptional repressor for gene cluster encoding trehalose/maltose ABC transporter in T.litoralis and P.furiosus [1]. TrmB has lost its DNA binding domain but retained its sugar recognition site. A nonreducing glucosyl residue is shared by all substrates bound to TrmB which suggests that its a common recognition motif [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.31 0.70 -5.39 30 611 2012-10-02 13:01:53 2008-08-29 13:10:22 3 5 362 2 163 568 67 147.60 19 52.42 CHANGED tl.hl+uhpsllpphp-lIpsAcpElhluls.chLcplpctLhsuhccGVpVhLllhs.....sss.s.hchhsssstlRhpcsss.shl....lluDtppulhsspp........pscscpYulhhpcpsLhhhlsthFhsshWtpuphlh...spshshPhpasshRhulpslpphhppuhslpuplpGhshp.oscphslpGcl..ss.............hsthst.hsshhlcoscu.plsVGGhsAhlEDlEupplplptt ....................................h.lhsl+uppsllpphpplIppAcpcl.hl....u.....s....h....p....-.....l.....p.p.....l....c...s....t.....LppstccG..Vplh.hl..hhs..............hpssh.p..h.c...h....h...t.h....s.p..............s..c..ht..tt..t..c...hh.l..................lssDs.cchlhu.s......................ppp..u..hhs...p.....hhhhh.............................................................................................................................................................................thhh..................................................................................... 0 39 99 140 +11328 PF11496 HDA2-3 Class II histone deacetylase complex subunits 2 and 3 Wood V, Coggill P anon Chahwan C Family This family of class II histone deacetylase complex subunits HDA2 and HDA3 is found in fungi, The member from S. pombe is referred to as Ccq1 in Swiss:Q10432. These proteins associate with HDA1 to generate the activity of the HDA1 histone deacetylase complex. HDA1 interacts with itself and with the HDA2-HDA3 subcomplex to form a probable tetramer and these interactions are necessary for catalytic activity. The HDA1 histone deacetylase complex is responsible for the deacetylation of lysine residues on the N-terminal part of the core histones (H2A, H2B, H3 and H4). Histone deacetylation gives a tag for epigenetic repression and plays an important role in transcriptional regulation, cell cycle progression and developmental events. HDA2 and HDA3 have a conserved coiled-coil domain towards their C-terminus [1]. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -12.01 0.70 -5.60 26 146 2012-10-05 12:31:09 2008-08-29 13:22:22 3 6 101 6 102 1604 22 269.30 22 34.08 CHANGED sssu-ahlPssMsphQK-Lh-pllslatpsIlca.h...cspspppsh.................................pphchhhpphphlusHP..L.Ll-HaMP+ph........hht-.stchttoSuKFtlLscLlshl..............pppp......................hpllllscssKph-LlEulLhG.....+tlphpRhsuphlhscs+chs............................................................................................t.thusslaLhsocslhpp..sss..tp.phshlIuhDssl-spsPslpplRppptp................................................t............sPllRLlshsSsEHhtLpasp.....tt.p.h.th..t.......................LRchlGslssD...shslhpp..lchlssal .........................................................................................................................t..stcahlPhshsthpK-hh-pll.s.h.ctps.lhca..h...........csp.s..pps.h...................................................................................................pph..ph..h..hpplp.lssHP....L...l........l.....-....H.h..h..Ppph......................................................h.t..c..........s.....t......p....h..t...t..s..SuKFphLppLl.shl.........................pphp......................hcllllscs.s+phDLlEuhl..lG...............pphp..hpRhsu..p..p..h.hsp.ppths............................................................................................................................................................................thshtlp.L.h.ss.p......thhp..........p.s.....phsh.lIshDs.s..h.......-spt..ssl...phlcppt.pp...................................................................................t.hPll+Lls.soh-HhtLhhsph.........pp.p..l..phhtt.......................hps.hGpl.ss....h....p.................................................................................................................................................. 0 21 53 89 +11329 PF11497 NADH_Oxid_Nqo15 NADH-quinone oxidoreductase chain 15 Pollington J anon pdb_2fug Family This protein, Nqo15, is a part of respiratory complex 1 which is a complex that plays a central role in cellular energy production in both bacteria and mitochondria. Nqo15 has a similar fold to Frataxin, the mitochondrial iron chaperone. This protein may have a role in iron-sulphur cluster regeneration in the complex. This domain represents more than half the molecular mass of the entire complex [1]. 25.00 25.00 26.50 114.90 19.00 17.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.51 0.71 -4.40 3 17 2009-01-15 18:05:59 2008-08-29 13:32:55 3 1 17 15 11 16 2 127.70 62 96.40 CHANGED MAsAs-ccLYcQWVELLGWLc-EApoRGLuFEKVADFPDYIYRMERPYDLPTTVMSVuLSsG.GQPLLLAAVSPRHVDLKGISLRLMGGSKHWHLHA...Gu+GLLEGKRPFTRERLuVLLDGAhRGlAsV ......Mutup-ctLYcAWVELLuWM+EYApt+.GVpFEKEADFPDFIYRMERPYDLPTTlMoASLSDuhGEPFLLAsVSPRHAcLK+IuLRL..s+tHhHLHA..EsG+G.L.l.sGKlPLTKERhasLADRARculuh..... 0 3 7 11 +11330 PF11498 Activator_LAG-3 Transcriptional activator LAG-3 Pollington J anon pdb_2fo1 Family The C.elegans Notch pathway, involved in the control of growth, differentiation and patterning in animal development, relies on either of the receptors GLP-1 or LIN-12 [1]. Both these receptors promote signalling by the recruitment of LAG-3 to target promoters, where it then acts as a transcriptional activator. LAG-3 works as a ternary complex together with the DNA binding protein, LAG-1 [1]. 23.30 23.30 23.70 24.90 22.50 23.20 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.93 0.70 -5.74 2 17 2009-01-15 18:05:59 2008-08-29 13:48:28 3 5 8 1 16 20 0 259.40 31 60.52 CHANGED MKPSTSK...ospSPPPEEPsusaVNspLPsP-DEPhlu-hssappGpE.tR.RSp.AhtpapKsR.E.IusQRAVTApLapRaTEDEERKRhEpQKNKEAMNASsu..ouSRNG..pl-NRKRRND..Vus.ouEEEW+RA.QQQHWMGQttP.h.paQMQQQYH.QQQ....httQHHphh................u.+SVPTP.uShHpPSPutMpssC...................PhsDENsLsVPpGEWFDKLAlhVAEpYsssTILGPDTYDsaLsELDh.ps.u.ThpoP.Ehs.....poAs.P..NPQp.tQhtQQQNKMRhhQQQp.....hEQQRpQQhMpQpQQQ....HppQQMLL.QQQQ.pQhQQppQMN.GGQF.TQApQpAsYhQpMQ+M...pQ.ppQQQQAQpHQQA.QQHQQ.tQpt.MGYulPNGY.tp.phasP.YG.HHMPp.TsFANIN .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..ppQ...QpphhhhQQQp............p.hppQR.pQQh.Qtp...Q....p.pp.QQh....p.......ppt..........pt...psshhp.....h..pph...p................................................................................................... 0 4 5 16 +11332 PF11500 Cut12 Spindle pole body formation-associated protein Wood V, Coggill P anon Chahwan C Domain This is the central coiled-coil region of cut12 also found in other fungi, barring S. cerevisiae. The full protein has two predicted coiled-coil regions, and one consensus phosphorylation site for p34cdc2 and two for MAP kinase. During fission yeast mitosis, the duplicated spindle pole bodies (SPBs) nucleate microtubule arrays that interdigitate to form the mitotic spindle. Cut12 is localised to the SPB throughout the cell cycle, predominantly around the inner face of the interphase SPB, adjacent to the nucleus [1]. Cut12 associates with Fin1 and is important in this context for the activity of Plo1 [2]. 21.50 21.50 22.80 22.80 21.30 20.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -10.99 0.71 -4.52 12 71 2009-09-11 11:20:04 2008-08-29 14:19:16 3 3 70 0 60 71 1 150.90 34 21.61 CHANGED -ups+PRo+LTpALhDSR......cp.....st.t.....stpct...css.sscss..........t.pshp..tc-sD.TINLs-PRSpSGKYWKuEF...DsY+s+optEl+KLIp...............Y+phAKsYA+cKDpEApcLucKLKEEE...tKVupMEcclTpLsSsMsscsspss.....+EpLhp- ..............................................t..p.p.Ro+LopuL.c.uRp.............t..p........sp.................shppp.ppstpscp.................................pspspc..ps-sDsTlsLs-P+SQSG.KYWKuEF.........-sY+scsctEhcKLlp...............Y+plAKSYA+cKDsEAhcLs...cK.LKcEp...tKVtpMEc+lsc.hAuphssptsp...ts.....p.............................. 0 10 27 46 +11333 PF11501 Nsp1 Non structural protein Nsp1 Pollington J anon pdb_2gdt Family Nsp1 is the N-terminal cleavage product from the viral replicase that mediates RNA replication and processing [1]. The specific function of the protein is unknown however the structure has been determined. The protein has a novel alpha/beta fold formed by a 6 stranded beta barrel with an alpha helix covering one end of the barrel and another helix alongside the barrel [1]. Nsp1 could be involved in the degradation of mRNA. 25.00 25.00 41.00 38.30 19.90 18.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.49 0.71 -4.25 3 172 2009-01-15 18:05:59 2008-08-29 14:20:12 3 4 67 2 0 150 0 115.00 98 1.95 CHANGED HVQLSLPVLQVRDVLVRGFGDSVEEALSEAREHLKNGTCGLVELEKGVLPQLEQPYVFIKRSDALSTsHGHKVVELVAEMDGIQYGRSGITLGVLVPHVGETPIAYRNVLLRKNG ..................HVQLSLPVLQVRDVLVRGFGDSVEEALSEAREHLKNGTCGLVELEKGVLPQLEQPYVFIKRSDALSTNHGHKVVELVAEMDGIQYGRSGITLGVLVPHVGETPIAYRNVLLRKNG 0 0 0 0 +11334 PF11502 BCL9 B-cell lymphoma 9 protein Pollington J anon pdb_2gl7 Family The Wnt pathway plays a role in embryonic development, stem cell growth and tumorigenesis. BCL9 associates with beta-catenin and Tcf in the nucleus when the Wnt pathway is stimulated leading to the transactivation of Wnt target genes [1]. 25.00 25.00 28.60 38.20 24.40 24.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -7.95 0.72 -4.43 7 124 2009-01-15 18:05:59 2008-08-29 14:29:02 3 1 64 6 72 108 0 39.90 55 3.04 CHANGED GLSpEQLEHRERSLQTLRDIQRMLFP-.-+shuhts.st.p ..GLSpEQLEHRERSLQTLRDIpRhLFP-.Ep-.shtt.................. 0 9 15 39 +11335 PF11503 DUF3215 Protein of unknown function (DUF3215) Pollington J anon pdb_2grg Family This family of proteins with unknown function appears to be restricted to Saccharomycetaceae. 25.00 25.00 26.30 46.00 22.00 19.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.32 0.72 -4.35 4 29 2009-01-15 18:05:59 2008-08-29 14:51:24 3 1 19 1 17 22 0 80.50 36 91.50 CHANGED hslsEhlssslGolsFDENtNll-ooGlG....ppRlpDIscLSpscLDppGaulhpDssl.spla+css+TlsVYTs..............up .........tEhlPcslGTLsFD-NtNll-ooGlG.....cs+lpDIhclSpscLsppGa.ulhpD...splllplaKc..ss+TlslYTs...pp.............. 0 2 9 17 +11336 PF11504 Colicin_Ia Colicin Ia Pollington J anon pdb_2hdi Family Colicins are toxic molecules secreted to kill other bacteria in times of stress. Colicin Ia kills susceptible E.coli cells by binding to the colicin I receptor leading to the formation of a voltage-dependant ion channel. The protein can be divided into three domains, a translocation domain, a receptor binding domain and a channel forming domain [1]. 25.00 25.00 25.90 53.30 24.00 16.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.35 0.72 -4.08 3 56 2009-01-15 18:05:59 2008-08-29 15:50:56 3 2 40 2 0 53 0 70.40 98 12.45 CHANGED KNTPDGKTIVSPEKFPGRSSTNDSIVVSGDPRFAGTIKITTSAVIDNRANLNYLLSHSGLDYKRNILNDRNP .....KNTPDGKTIVSPEKFPGRSSTNHSIVVSGDPRFAGTIKITTSAVIDNRANLNYLLTHSGLDYKRNILNDRNP 0 0 0 0 +11337 PF11505 DUF3216 Protein of unknown function (DUF3216) Pollington J anon pdb_2hjm Family This family of archaeal proteins with unknown function appears to be restricted ton Thermococcaceae. 25.00 25.00 72.60 72.20 24.10 23.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.17 0.72 -4.19 2 14 2009-01-15 18:05:59 2008-09-01 09:09:30 3 2 13 4 11 16 0 95.10 49 60.44 CHANGED MDhsEKVKtLs.cLtE-pLtcAI-RFlTLscGIEKTRGEtFAKAuIYGFLEGILTTLKhKapsEcIppLLs.lKpARE.pEAhLRKspPPlh.ppsL ........lE-VKuLscELGE-sLlpsIDuFlsLNcGLEsK+GE-FlcVuILGFLEGILTTLKhKa.p--+lscLL-cV+ppRtEL-thFRKs+sPlh-......... 0 1 1 6 +11338 PF11506 DUF3217 Protein of unknown function (DUF3217) Pollington J anon pdb_2hql Family This family of proteins with unknown function appears to be restricted to Mycoplasma. Some members in this family of proteins are annotated as MG376 however this cannot be confirmed. 23.60 23.60 23.80 108.70 21.90 23.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.19 0.72 -4.33 2 7 2009-01-15 18:05:59 2008-09-01 09:20:58 3 1 7 6 3 5 0 99.70 69 100.00 CHANGED MLNpVFLEGEIESopWShKKTGFLVTIKQhRhFGER.FTDaaVhYANGQLAaELEtaTpKaKTISIEGILRTYLE++StIWKTTIElVKIhpPpsclhIDYpE. MLNpVhLEG.I-S.pWShpKTGFhVTIpQhRhFGpphFTDYYVIYANsQLuhELEKashcachluIcG.LpTY.-+KSclWKTpI.h.KIh................ 0 2 2 2 +11339 PF11507 Transcript_VP30 Ebola virus-specific transcription factor VP30 Pollington J anon pdb_2i8b Family VP30 is a nucleocapsid-associated Ebola virus-specific transcription factor [1]. It acts by stabilising nascent mRNA in Ebola virus replication. The C terminal domain of VP30 folds into a dimeric helical assembly. VP30 assembles into hexamers in solution by an N-terminal oligomerisation domain which activates the transcription function of the protein. The oligomerisation is mediated by hydrophobic amino acids at 94-112 [1]. 20.00 20.00 22.50 22.30 19.30 17.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.62 0.71 -4.32 3 31 2009-01-15 18:05:59 2008-09-01 09:42:50 3 1 20 4 1 29 0 130.00 60 46.56 CHANGED LTLclLlcIAEaWAsp-IsclDDoKLRALLTLoAVLlRKFSKSQLupLCEoHLRHENLGQDQA-SVLEVYQRLHSDKGGsFEAALWQQWDRQSLoMFISAFLaVALQIPCESSoVVlSGLcpLhPPQsNus ...LTLcsLs+IhpYhpRp-IsclD-opL...RAhLoLosshlRKh..s+S.lsshsEhHlpHENLsQDQssslhpsYptlH.DKGGpFEAALWQtWD+pSlohFlpAhLaVhpsIPCESShsV.uuhcphl.Ppspu....... 0 1 1 1 +11340 PF11508 DUF3218 Protein of unknown function (DUF3218) Pollington J anon pdb_2igs Family This family of proteins with unknown function appears to be restricted to Pseudomonas. 25.00 25.00 30.30 134.00 21.50 17.40 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.31 0.70 -4.83 2 9 2009-01-15 18:05:59 2008-09-01 09:51:37 3 1 6 8 1 8 0 187.90 63 96.74 CHANGED pINIYQNPGQSluslYKGhApQCsPG.sFPEsQhlEAWDIPLhLHPEFlPsGDlSKhDppYuTlLAtE.ApslhlthpMsp-Kt+sCs.ElhsLloShupNLspIKuRaGusYLshhKtSPNhYPTsVGhph.uuGu.sQ-SGl.VSYGssLtpLT.uphQAMsLPtslKtLls.GlGl+LstPpa.ssaNsItoGlRYTTuVslhLAYaAsl ..pINIYQNPGQSluslYKGhApQCsPG.sFPEAQhlEAWDIPLhLHPEFlPsGDlSKhDppYuTlLAtE.AphlhlthpMsp-Kt+sCs.ElhsLloShupNLspIKuRaGuuYLshhKtSPNhYPTsVGhph.uuGu.sQ-SGl.VSYGsNLtpLT.uphQAMsLPtslKtLls.GlGl+LstPpa.ssaNsItoGlRYTTuVslhLAYaAsl....... 0 1 1 1 +11342 PF11510 FA_FANCE Fanconi Anaemia group E protein FANCE Pollington J anon pdb_2ilr Family Fanconi Anaemia (FA) is a cancer predisposition disorder. In response to DNA damage, the FA core complex monoubiquitinates the downatream FANCD2 protein. The protein FANCE has an important role in DNA repair as it is the FANCD2-binding protein in the FA core complex so it represents the link between the FA core complex and FANCD2 [1]. The sequence shown is the C terminal domain of the protein which consists predominantly of helices and does not contain any beta-strand. The fold of the polypeptide is a continuous right-handed solenoidal pattern from the N terminal to the C terminal end [1]. 22.20 22.20 24.00 23.50 20.40 20.00 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.86 0.70 -5.25 2 95 2009-09-11 15:02:58 2008-09-01 11:06:15 3 2 65 1 56 91 0 198.80 32 49.62 CHANGED u....LupShpLPcthpstLPhlppLLcs.pE....h-DussstLplLHECsPsph-lLCu.LpLsplS-.sL.phCohLLtLuPDLShSsAosLhppLhLt+ILSLspsASRhLsTAlTShCu+YshPsCpALl-PlLQtsthGsAQs-LLCpLV.h-sLEPctplLhhtphLthsWcEtsh.VlpuLL-pplEho.EcFslhhE+LCppu.thopSMtaAKhhLoVhTKYQuNls.sp+hsLu.sLp.NpTFL+KSLpAALK+lu ............................................................................................................................ht.lst.Lph.phs-.slhplCs......lhs..L.o...sshSh....ss.....u.ss.lh.psLhLs+lluL..sp.sASRh..LhsAlsshC.t..+ashssspullhP.l.L.p....ts.t.......h.Gss.Qs-LLspll...t-..sLc...............sch.....p...h..hh...l....t...p..h.....L................p........h..s...........W..pEth.h.VlpslLppp.....hs....p.ht.hhp.l.....t.....tt..u.t....hspShtauphhhshhsphttplt..p.....l..hlt.stThhp+sh.shl....t.......................... 0 15 22 37 +11343 PF11511 RhodobacterPufX Intrinsic membrane protein PufX Pollington J anon pdb_2ita Family PufX organises RC-LH1, the photosynthesis reaction centre-light harvesting complex 1 core complex of Rhodobacter sphaeroides [1]. It also facilitates the exchange of quinol for quinone between the reaction centre and cytochrome bc(1) complexes. In organic solvent, PufX contains two hydrophobic helices which are flanked by unstructured regions and connected by a helical bend [1]. 25.00 25.00 30.80 30.70 23.10 22.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.92 0.72 -4.58 5 18 2009-01-15 18:05:59 2008-09-01 12:54:39 3 1 18 3 3 14 33 66.20 38 84.78 CHANGED MscaNDhLss..NsKoRLRADlshLMLKGAGYAAVFVlulWFlIuuhtlIG+hLPEQSRpTPDPsppuA ..........ho..sDaLss..ssKspLRlWlshQMhKGAGaAussFhuslhllsshtslGphLP.ppppAPuPs.h..... 0 1 2 2 +11344 PF11512 Atu4866 Agrobacterium tumefaciens protein Atu4866 Pollington J anon pdb_2jmb Family Atu4866 is a protein with unknown function from Agrobacterium tumefaciens however the structure has been determined. Atu4866 adopts a streptavidin-like fold and has a beta-barrel/sandwich which is formed by eight antiparallel beta-strands [1]. Atu4866 has a potential ligand-binding site where is has a stretch of conserved residues on the surface [1]. 25.00 25.00 25.70 25.30 24.50 23.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.57 0.72 -4.24 14 114 2009-01-15 18:05:59 2008-09-01 13:19:10 3 2 95 1 47 122 1 77.40 61 65.27 CHANGED tpp+sYlGMWVTuDGaIRHELLPsGRYDEARGsRcSAYpGRYclTGsHI-YhDDTGFTADG-Fh.ssVLaHuGMVLaRc ..................tHPYVGMWVTsDGpIRpELLPsGRYDEARGsRcSAYpGRYpVsGsHI-YhDDTGFTADG-Fh.-slLaHuGMVhYRp..... 0 11 26 33 +11345 PF11513 TA0956 Thermoplasma acidophilum protein TA0956 Pollington J anon pdb_2jmk Family TA0956 is a protein from Thermoplasma acidophilum which currently has no known function however the structure has been determined. The protein has a two-layered alpha/beta-sandwich topology and is a putative Elongation factor 1-alpha binding motif [1]. 25.00 25.00 181.50 181.30 22.20 18.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.27 0.72 -4.01 3 3 2009-01-15 18:05:59 2008-09-01 13:27:54 3 1 3 2 3 7 4 110.30 51 97.07 CHANGED MsLCAMYNISMcspHPTTICVVMDKFLDSFuELlDVl--sDpDE.LMDFISRYARTDEIMPEDKTVGFVVINuDKKlMSVSFSDIDENM...KcsI+EIlKKYKDpGYKVEsDh MsLCAMYNISMcspHPTTICVVMDKFLDSFuELlDVl--sDpDE.LMDFISRYARTDEIMPEDKTVGFVVINuDKKlMSVSFSDIDENM...KcsI+EIlKKYKDpGYKVEsDh 0 1 2 2 +11346 PF11514 DUF3219 Protein of unknown function (DUF3219) Pollington J anon pdb_2jn9 Family This family of proteins with unknown function appears to be restricted to Bacillaceae. Some members in this family of proteins are annotated as YkvR however this cannot be confirmed. 25.00 25.00 82.10 82.00 16.80 16.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.37 0.72 -4.10 2 29 2009-01-15 18:05:59 2008-09-01 13:38:47 3 1 29 1 7 31 0 92.60 55 97.71 CHANGED KplhLNsspl-hhsYpEEo...cscptI..uFshpVopppYHDIssLLY-hhFsVpVPEcsLtFRG.hspY.TSLTNLYE.sAVuEFalElpEhDtptD ..................lhLNsVplchhsYpEEs.......cstRpI..uFsLcVTSETYHDIAVLLYEKTFcVcVPE+sLsFRGpITNYSTSlTNLYccspVu-FalElTEl......... 1 1 5 5 +11347 PF11515 Cul7 Mouse development and cellular proliferation protein Cullin-7 Pollington J anon pdb_2jng Family The Cullin Ring Ligase family member, Cul7, is required for normal mouse development and cellular proliferation. Cul7 has a CPH domain which is a p53 interaction domain. The CPH domain interaction surface of P53 is present in the tetramerisation domain [1]. 25.00 25.00 25.10 28.80 20.10 18.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.81 0.72 -4.08 8 206 2009-01-15 18:05:59 2008-09-01 13:50:29 3 43 71 2 103 188 0 76.00 52 2.97 CHANGED RcDFpSsDsYAhYVR-slpsGMhVRCCc-YEElppGDhGpVh+l-s-GlpDLNVQVsWps+GpTYWV+ahHVEllGsss .........Ru-FtSsssYAhYVR-slpsGMpVRhhc-YE.Els.tGD.Gc.h+.sssGl.s..VQV.WpspGpTYWV+ahhlEllGh.s............... 0 19 23 51 +11348 PF11516 DUF3220 Protein of unknown function (DUF3120) Pollington J anon pdb_2jpf Family This family of proteins with unknown function appears to be restricted to Bordetella. 25.00 25.00 240.50 240.50 20.50 19.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.34 0.72 -4.06 2 4 2009-01-15 18:05:59 2008-09-01 14:06:47 3 1 4 1 1 4 0 106.00 98 16.14 CHANGED KQQLpEpAPSHANLDVKWLDGLRAGSMALQGDVKVWMQNLEDLHTRRPDEFsARLQQSTDALYSHLEAQWAKQHGTPPTASDVsGMPQWQEYTAMLRERFAGLDTI KQQLHEQAPSHANLDVKWLDGLRAGSMALQGDVKVWMQNLEDLHTRRPDEFAARLQQSTDALYSHLEAQWAKQHGTPPTASDVAGMPQWQEYTAMLRERFAGLDTI 0 1 1 1 +11349 PF11517 Nab2 Nuclear abundant poly(A) RNA-bind protein 2 (Nab2) Pollington J anon pdb_2jps Family Nab2 is a yeast heterogeneous nuclear ribonucleoprotein that modulates poly(A) tail length and mRNA. This is the N terminal domain of the protein which mediates interactions with the C-terminal globular domain, Myosin-like protein 1 and the mRNA export factor, Gfd1 [1].The N-terminal domain of Nab2 shows a structure of a helical fold. The N terminal domain of Nab2 is thought to mediate protein protein interactions that facilitate the nuclear export of mRNA [1]. An essential hydrophobic Phe73 patch on the N terminal domain is thought to be a important component of the interface between Nab2 and Mlp1 [1]. 25.00 25.00 32.40 30.40 23.10 22.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.44 0.72 -3.96 4 26 2009-01-15 18:05:59 2008-09-01 15:41:18 3 4 26 4 17 26 0 101.20 57 19.60 CHANGED MS..p-pIspsLKslVAEKLpsl.NFsEDVpYVAEYIVLLhSNGGoh-SlVQELsoLFDoVSppsLssVVQTuFhAL-hLppG-slpslhuKlp...hhuupssupusos .......MS..pEQhoENLKVIVAEKLssl.NFNEDlcYVAEYIVLLIsNGGTlESVVpELuoLF.DoVSp-sLssVVQTAFFALEALQQGEosEsIVuKIR...hM.supuhupt...s.............................................. 0 2 8 14 +11350 PF11518 DUF3221 Protein of unknown function (DUF3221) Pollington J anon pdb_2jqo Family This family of proteins with unknown function appears to be restricted to Bacillus. Some members in this family of proteins are annotated as YobA however this cannot be confirmed. YobA is a protein with unknown function. 20.80 20.80 21.00 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.45 0.72 -4.39 9 132 2009-01-15 18:05:59 2008-09-01 15:47:07 3 2 109 1 15 95 1 105.10 40 84.54 CHANGED lhL.lhhhlhotsssptsE...spphohEGYlIl+.NspshhlsDcshpsK.-hpphhEpplhpcasusIVL....thpshsshcpLpsGpKIKVWasplhES.Pu+hhlpKaEll ..............................hh.lhLhlhu..s.sssppsE..........sccsshEGYlIl+.N-.s...s..a...hlsDcshpsK.ELppY.hEp...php..p-aPucIlL.....hc-c..-.u..acpLKsGDKIKVW..S.p...h.hESYPu+hhVpKaEl.......... 0 2 9 11 +11351 PF11519 DUF3222 Protein of unknown function (DUF3222) Pollington J anon pdb_2js3 Family This family of proteins with unknown function appears to be restricted to Rhodopseudomonas. 25.00 25.00 141.40 141.30 19.50 17.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.56 0.72 -4.02 3 7 2009-01-15 18:05:59 2008-09-01 15:50:14 3 1 7 2 6 7 0 74.70 74 100.00 CHANGED MT-FAAEDVRKIAAALVKTAIETVSEEDGGARNQCKLCNASVPWLQTGDEIpHpPDCAVAIAQRIL.AKs+LHSV MTDhAAEDVRKIAsALlKTAIEIVSEEDGGAHNQCKLCsASVPWLQTGDEIpHAPDCPVVIApplLuu+P+LHuV 0 0 1 2 +11352 PF11520 Cren7 Chromatin protein Cren7 Pollinton J anon pdb_2jtm Family Cren7 is a chromatin protein found in Crenarchaeota and has a higher affinity for double-stranded DNA than for single-stranded DNA. The protein contains negative DNA supercoils and is associated with genomic DNA in vivo.Cren7 interacts with duplex DNA through a beta-sheet and a long flexible loop. The function has not been completely determined but it is thought that the protein may have a role similar to that of archaeal proteins in Euryarchaea [1]. 39.00 39.00 60.50 60.20 38.80 37.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -8.87 0.72 -4.33 7 51 2009-01-15 18:05:59 2008-09-01 16:18:15 3 1 43 5 34 42 0 58.70 53 95.26 CHANGED M....sscc..shcs+ss.sGKEhcLhPhKsW.LsP+GRKGVhlGLF+sPp.GKYFRt+Vs-t.P ............tcc..sl+l+ss.sGKEh-LhPpKVWsLsPKGRKGVKIGLFKsP-oGKaFR++lP-sYP.. 0 10 19 24 +11353 PF11521 TFIIE-A_C-term C-terminal general transcription factor TFIIE alpha Pollington J anon pdb_2jtx Family TFIIE is compiled of two subunits, alpha and beta. This family of proteins are the C terminal domain of the alpha subunit of the protein which is the largest subunit and contains several functional domains which are important for basal transcription and cell growth. The C terminal end of the protein binds directly to the amino-terminal PH domain of p62/Tfb1 (of IIH) which is involved in the recruitment of the general transcription factor IIH to the transcription preinitiation complex. P53 competes for the same binding site as TFIIE alpha which shows their structural similarity. Like p53, TFIIE alpha 336-439 can activate transcription in vivo [1]. 21.10 21.10 21.20 21.40 20.90 20.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.99 0.72 -4.04 8 111 2009-01-15 18:05:59 2008-09-01 16:44:00 3 5 89 3 80 101 0 84.30 38 19.99 CHANGED SsSDTSESDcD.ssscsPstt..........spch--p--DD-.E-ssD-PsVhVuGRPasYcEVSQpPcLVuQMTPQEKEsYIphGQchFpclY- ......................................spS-oS-s-c-.s....sc.ss.ss...............phc.tc-p-----hE-s...sD..-P..sVhVAG+Pashs-V.o...P-LV.A.QMTPpEKEsYIp.hsQchFpcha-...... 0 28 35 58 +11354 PF11522 Pik1 Yeast phosphatidylinositol-4-OH kinase Pik1 Pollington J anon pdb_2ju0 Family Pik1 is a regulator of membrane traffic and participates in the mating-pheromone signal-transduction cascade. The protein is localised to the nucleus and cytoplasm in the Golgi. Pik1 is thought to have an actin-independent role in membrane transport [1]. 21.40 21.40 21.40 24.60 21.10 21.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.62 0.72 -4.22 20 95 2009-09-10 18:11:33 2008-09-01 16:58:07 3 4 83 1 64 94 0 50.50 41 5.27 CHANGED SauFQlsRRllN+lQpllFsss...................pps+ps+h+ENltPALVLsuhVhoSlA ......ShuFQssRRlhN+lQpIlFsss....................sts+pp+h+ENlhPuhVLuShlhuSlA................. 0 16 39 60 +11355 PF11523 DUF3223 Protein of unknown function (DUF3223) Pollington J anon pdb_2k0m Family This family of proteins has no known function. 21.00 21.00 22.60 21.90 20.10 20.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.61 0.72 -3.69 30 196 2009-01-15 18:05:59 2008-09-02 09:24:51 3 16 68 1 129 193 9 74.50 34 13.45 CHANGED h+thL+..+YssGcpls..sp-tpsllptlLpaHP.....cs-pKlGsG.l.cthpVtpasthp.t....S+CFalVRs..DGop.....-DFSYpKCl ...........h.ptlL+..catssppls..tp-pphllcplLpaHP................csccKIGsG....l.ctlpVshHPtap..t.....SRCFallRp..DGot......pDFSYpKCl.............. 0 38 90 114 +11356 PF11524 SeleniumBinding Selenium binding protein Pollington J anon pdb_2jz7 Family Selenium is an important nutrient that needs to be regulated since lack of the nutrient leads to cell abnormalities and high concentrations are toxic.\ SeBP regulates the level of free selenium in the cell by sequestering the nutrient during transport. SeBP acts as a pentamer and delivers the selenium to the selenophosphate synthetase enzyme [1]. Each subunit is composed of an alpha helix on top of a four stranded twisted ss sheet, stabilised by hydrogen bonds [1]. 27.90 27.90 28.00 58.30 27.10 27.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.74 0.72 -3.85 3 13 2012-10-01 20:55:33 2008-09-02 09:31:10 3 1 13 5 6 11 0 83.00 52 98.99 CHANGED MlFEDKFIITTADEIPGLpLYYlGIVSslSD..NVDcIVEsL+EKVKAKGGMGLIAFRITs...ADG.KaLGYGTAVKADEGQFTMA ...MlFEscFIITTAc-IPGlpLYhhGIVSssSD..NVDpIlcsLcEpVpAKsGhGLluFRITs....uDG..KhlGYGTAVKADEuQFsMs. 0 0 1 4 +11357 PF11525 CopK Copper resistance protein K Pollington J anon pdb_2k0q Family CopK is a periplasmic dimeric protein which is strongly up-regulated in the presence of copper, leading to a high periplasmic accumulation [1]. CopK has two different binding sites for Cu(I), each with a different affinity for the metal. Binding of the first Cu(I) ion induces a conformational change of CopK which involves dissociation of the dimeric apo-protein. Binding of a second Cu(I) further increases the plasticity of the protein. CopK has features that are common with functionally related proteins such as a structure consisting of an all-beta fold and a methionine-rich Cu(I) binding site [1]. 19.90 19.90 19.90 69.40 19.80 19.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.62 0.72 -4.33 5 22 2009-09-11 07:29:11 2008-09-02 09:51:31 3 1 19 9 13 18 18 71.80 56 78.22 CHANGED VDcuslcKSl-LKDGSTVHlFKDGKMuMEDKhG+uhpMKcGpVMET+DGQKIhM+GDEVhRLDshL+KcH.p+G ..A..spuscchl.LtDGuTlYIFKDGKMA.Es+aGRAVhhphGsshpTKDGppIshpusEVARLsSLLp+cH....Gs.. 0 1 10 13 +11358 PF11526 CFIA_Pcf11 Subunit of cleavage factor IA Pcf11 Pollington J anon pdb_2npi Family Pcf11 is a subunit of an essential polyadenylation factor in Saccharomyces cerevisiae, CFIA. Pcf11 binds to Clp1, another subunit of CFIA whose interaction is responsible for maintaining a tight coupling between the Clp1 nucleotide binding subunit and the other components of the polyadenylation machinery [1]. 21.10 21.10 21.30 21.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.13 0.72 -3.84 10 54 2009-01-15 18:05:59 2008-09-02 10:47:12 3 4 53 2 37 53 1 83.30 29 14.42 CHANGED s...............ps.spNIQSRNWYLDD.-WVpFKDD-llphoossss.t....................pphppshss.p.ts..hs.pph+spYVVVPsssosMs.+ ...tsp....ssstKNlQSRsWYLcDpcWlp...F..+-..--Isuss.pssst.p.ts...................................pptspstss..........................hptpYVlVPpstpsMs.p......................... 0 6 20 34 +11359 PF11527 ARL2_Bind_BART The ARF-like 2 binding protein BART Pollington J anon pdb_2k0s Family BART binds specifically to ARL2.GTP with a high affinity however it does not bind to ARL2.GDP. It is thought that this specific interaction is due to BART being the first identified ARL2-specific effector. The function is not completely characterised [1]. BART is predominantly cytosolic but can also be found to be associated with mitochondria. BART is also involved in binding to the adenine nucleotide transporter ANT1 [2]. 20.90 20.90 20.90 21.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.62 0.71 -4.32 32 294 2009-01-15 18:05:59 2008-09-02 10:47:57 3 6 116 4 198 272 4 113.80 29 40.86 CHANGED hs..cpchlhpplhpaltSs..WpsslpsFh-ppC..hhF--p...-EsplphpplapcYppll-phlpphlp........phuls.cpatpsl........................p.tpppphtpslhp.lhshpDFphF+chMlppNh-l-hpu ....................................tt....c.chlhttl.shlhsst.aph.hpsFh-pps..............hF-.cp...............-E....sKLpYopIapc.YppLlEchl-phLp........t.Ghs..ppFtpsh.....................................ttcschst...s.lhp..llshsDFhhFKpMMlphphEhph.......................................... 0 84 104 150 +11360 PF11528 DUF3224 Protein of unknown function (DUF3224) Pollington J anon pdb_2ooj Family This bacterial family of proteins has no known function. 25.00 25.00 27.50 29.70 21.10 18.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.47 0.71 -4.65 11 250 2009-01-15 18:05:59 2008-09-02 12:59:52 3 1 238 4 66 200 15 131.80 34 97.93 CHANGED Mphp..hhGsFplohWsE..oshs-ssss..hspsplspsa.pGDlcGpSplcaLhsY......pu.tuuAsaVGhEphpGslsG+pGoFVLQHpGphspGssssshs.lV.sSGTupLtsLsGshshshss.ssp.u.cFphshtsu ...............................hp.......puoFoVspWsE..............ps.l..ss...t...s.......c..s.h.s...........l...........spAslshph.sGsLpG.p.u.ps.EYLhsY.......su..hpusAphl..Ga.+....FcG.shtG.+pGoFs.hpcp..Gsa.s.pG.t.h...c.s..s..hp..ll..puTG-LtGL...sGshshphsp..spc..hhpath.....h............ 0 28 48 56 +11361 PF11529 AvrL567-A Melampsora lini avirulence protein AvrL567-A Pollington J anon pdb_2opc Family AvrL567-A is a protein from the fungal pathogen flax which induces plant disease resistance in flax plants [1]. The protein has a novel fold [1]. 19.30 19.30 20.30 69.80 18.00 19.10 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.57 0.71 -4.16 3 13 2009-01-15 18:05:59 2008-09-02 13:09:21 3 1 2 2 1 14 0 126.90 81 84.57 CHANGED MEDVPAELTGVSEGYTRFYRSPTASVTLSGLVcVKWDNEQMTMPLFKWIGGEQAEELHFCVHIAHSSGRRLNpARTLGTVNSNMDQHWVEAYRSSGVTRCTIQDCHLFANDIPNFPDYIKIKLVPKT .........MEcVPAELTGVSEGYTRFYRSPTASVsLSGLVcVKWDNEQMTMPLFKWIGGEQAEELHFsVHIAHSSG.+LNpARTLGTVNSNMDQHWspAYRsSGsTRpTIQDpHLFANDI...PNFPDYIKIKLVPKT..... 1 1 1 1 +11362 PF11530 Pilin_PilX Minor type IV pilin, PilX Pollington J anon pdb_2opd Family PilX is a protein from Neissaria meningitidis which is crucial for the formation of bacterial aggregates and adhesion to human cells [1]. The structure of PilX is similar to all pilins as it has the common alpha/beta roll fold. PilX subunits have surface-exposed motifs which are thought to stabilise bacterial aggregates against pilus retraction. It also illustrates how a minor pilus component can modulate the virulence properties of pili which have a simple composition and structure [1]. 25.00 25.00 35.30 34.70 23.90 23.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.47 2 59 2009-01-15 18:05:59 2008-09-02 13:32:49 3 3 58 6 2 56 0 126.50 71 79.05 CHANGED SYIEKGYQSQLYTEMVGINNl.KQFILKNP.DDNpTlKSKLcIFVSGYKMNPKIAcKYsVSV+FVstEKPRAYpLVGVPpsGTGYTLSVWMNSVGDGYKCRDAsSApAa.-TLSuDsGCEAFSNRKK .SYIEKGYQSQLYTEMVGINNlhKQFILKNPh.DD.NQsIKoKLEhFVSGYKMN..PKIAcKYsVSV+...F..V..s..tE........KsRAYpLVGVPKsGTGYTLSVWMNSVGDGYKCRDAASAcAapETLSuDsGCEAFSNRKK.......... 0 1 1 1 +11363 PF11531 CARM1 Coactivator-associated arginine methyltransferase 1 N terminal Pollington J anon pdb_2oqb Family CARM1 is an arginine methyltransferase which methylates a variety of different proteins and plays a role in gene expression. This is the N terminal domain of the protein which has a PH domain, normally present to regulate protein-protein interactions.A molecular switch is also present on the N terminal domain [1]. 20.40 20.40 20.50 21.20 19.90 20.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.60 0.71 -4.35 3 78 2009-01-15 18:05:59 2008-09-02 14:17:24 3 4 42 10 39 69 0 91.60 59 17.53 CHANGED M...AAVSVFPGVRLLSIGDANGEIQRHAEQQsLRLEVRAG.DuAsIALaNsE-VCVFKCTVoRETECSRVGKQSFIITLGCNSVLlQFATPADFCSFYNILKoCRGpcuERSVFSE ................................................................................................h.stE-V....CVFKCSVSRETECSRVGKQSFIITL.GCNSVLlQFATPsDFsSFY.N.ILKsCRGHshE+SVFSE.................... 0 8 12 22 +11364 PF11532 HnRNP_M Heterogeneous nuclear ribonucleoprotein M Pollington J anon pdb_2ot8 Family HnRNP M is a splicing regulatory factor that binds to the auxiliary RNA cis-element ISE/ISS-2 which promotes splicing of exon IIIb and silencing of exon IIIC in the fibroblast growth factor receptor 2 (FGFR2) [1]. By binding to ISE/ISS-3, HnRNP M plays a role in the regulation of alternative splicing in FGFR2 as it induces exon skipping and promotes exon inclusion [1]. 20.70 20.70 22.70 23.10 20.30 18.30 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.02 0.72 -7.44 0.72 -3.85 3 73 2009-01-15 18:05:59 2008-09-02 14:30:45 3 4 35 2 30 54 0 29.50 80 4.60 CHANGED -psoQNEKRKEKshKR.GGNRFEPYSNPsKR ........ERPsQNE..KRKEKNIKR..GGNRFEPYuNPTKR. 0 1 3 11 +11365 PF11533 DUF3225 Protein of unknown function (DUF3225) Pollington J anon pdb_2owp Family This bacterial family of proteins has no known function. 20.80 20.80 20.80 20.90 20.70 20.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.69 0.71 -4.45 24 251 2012-10-03 02:27:24 2008-09-02 14:42:48 3 5 240 6 86 307 27 123.10 51 80.95 CHANGED hpINtPcVlAEVsAAFtcYEpALssNDVssLDtLFWssPcTlRYGsuENLYGh-tI+AFRtsRsusuLsRplh+TslTTFGcDhAssssEFpR..cGus.RlGRQpQTWlRh.s-.GW+lVAAHVSlh.ps ..........................................................pIshPtllAEVoAAFhcYEpAL....luNDlssLDtLFWpss+.TV...R.a.G..........s.u..E.N.LYG.h.-sI.+.A.F...Rt.u.R...sus.......G...h...s..R...p.L.......t........c....T..sITT......FG..c.....D.h.....A....s...s..o..T..E..FpR........cG..os....+l.G..R..Q........Q.....T..WlR....h......ss....G........W+lVAAHVSLh..s...................................................... 0 14 41 60 +11366 PF11534 HTHP Hexameric tyrosine-coordinated heme protein (HTHP) Pollington J anon pdb_2oyy Family HTHP is from the marine bacterium Silicibacter pomeroyi and has peroxidase and catalase activity. HTHP consists of six monomers which each binds a solvent accessible heme group and is stabilised by the interaction of three neighbouring monomers [1]. The heme iron is penta-coordinated with a tyrosine residue as proximal ligand [1]. 25.00 25.00 25.70 32.10 24.00 23.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.56 0.72 -3.91 9 36 2009-01-15 18:05:59 2008-09-02 15:55:30 3 2 34 8 21 45 5 69.40 51 80.69 CHANGED hs-sWLPoLhTsTPpEGapLAVKhARluVKhTQPDs-lRppLRssYucDAsuLIAsSpVlAsaFATlAAANsYW+ ..........hsoLhTsTPpEGhpLAlKluRhulKhTQPDs-lRcpLRs.YupDustLIAsSQVVAhcFtTlAAANNYW+...... 0 6 12 18 +11367 PF11535 Calci_bind_CcbP Calcium binding Pollington J anon pdb_2p0p Family CcbP is a Ca(2+) binding protein which, in Anabaena, is thought to bind Ca(2+) by protein surface charge. When bound to Ca(2+), the protein becomes more compact and the level of free calcium decreases. The free Ca(2+) concentration which is regulated by CcbP is critical for the differentiation process [1]. Calcium signalling is widespread in bacterial species, and prokaryotic cells like eukaryotes are equipped with all the elements to maintain Ca2+ homeostasis [2]. 21.50 21.50 21.90 21.50 20.10 20.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.32 0.72 -3.79 17 53 2009-12-03 17:50:01 2008-09-02 16:15:08 3 2 45 3 25 55 5 97.80 26 66.48 CHANGED cRIthEIlVDAY........stcEpthGWYpaL--sLphPFpAhhht...............tpVpVluhu.p-ps.tphhVplph....spcphslsLppLtshcsDspopp........AltDW+YWls ..................................................th..chhVDsY........stpEphhuWhtaLp-plphPFpuhhht.......................s....tcpVpVluhssppsp.tthhVplch....sccphslsLs-Lcsh-ssspspp.............slsDapaWh.................. 0 7 19 22 +11368 PF11536 DUF3226 Protein of unknown function (DUF3226) Pollington J anon pdb_2p62 Family This archaeal family of proteins has no known function. 20.80 20.80 20.80 21.00 20.00 20.70 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.51 0.70 -4.92 4 17 2009-01-15 18:05:59 2008-09-02 16:23:43 3 2 15 2 12 23 0 205.50 28 76.19 CHANGED h+lLllEG+TDsuFFhsllKKLYGF+Eu+..scsl.hhE...Kht-hs+slsLcKDshtLlVhHupGKsplh+sLpshLcAlch.hh.slchlGlARDVDp-c-lhsWspShl+puuFEs+hsssalllp..slKIhPhGIGpl...sFst..l-hKK.ELEhlhshLAKt-ulLEKh+sSlpuLppDhGRKLpPKDlMHlLuIApsasGDshSGLYcpFItc.I+cN+chV.chLs.hslL.hLshhh ....plLhlEG+TDspFFpslhKKLatF..+Eucthstpl...hh-.............phhEhsph.hL..c+..-sshlslhsupGpsslh+sLtshlcu.hcl.hthplptlGlshDlDcsc-s.shtp.hhp.ttap.t.tshhhhl...th.lhshhhG.h...shp.t..l..-.+K.plE.hhhhLhctpuhLp+hc.ulp.Lp.shtcKLpPK-lhalh..A.tahGs.hpGhYc.al.h.hh.php.l.p.hs..t.........th........................................................................................ 0 2 2 8 +11369 PF11537 DUF3227 Protein of unknown function (DUF3227) Pollington J anon pdb_2p9x Family This archaeal family of proteins has no known function. 21.20 21.20 23.90 23.80 20.80 20.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.19 0.72 -3.91 2 9 2009-09-10 21:06:52 2008-09-02 17:19:47 3 1 9 4 8 11 0 96.40 31 95.49 CHANGED hp.s+sllstThhphLpphuPth.ssLEAaLpAphNtslElAhEDPtKFhcAVpcLFGEauAphh.hsll.cLp.h.sKp....shEpllt.lKKhh....G .............p.sc-llspslpthl+cluPsLcslLEh+L+uphsK.Gh.ElAhEDPpcF+-AVS+LFGEa.SA+LlthhllscL+.hlt.p.phpsLEpllp.l+.............................. 1 2 4 6 +11370 PF11539 DUF3228 Protein of unknown function (DUF3228) Pollington J anon pdb_2pd0 Family This family of proteins has no known function. 25.00 25.00 207.50 207.30 21.00 19.10 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.37 0.71 -5.21 11 64 2009-01-15 18:05:59 2008-09-02 17:22:04 3 2 62 6 40 71 54 192.10 49 89.79 CHANGED hplsL-sFsh+Qacpp.hpsshIs.hspEpFlp+lNchhc.sshcllDGYAPFCKHlFlcNFT-sp......stslcITscNc+LL....+ouY.ARs-pELPVLsRWFshpsVp.ppl..ApYLDlILYS+EQlpKEssthtp...........ssDauIlSIKsQspsaElPMtPITMhRNAL.l-EGGSGVsLDREcYhcSVcaWpcaAslhs .........plsLssFAhpph.sps....hpsstIp.hs.-pFlp+lN-t.....s...shcll-GYAPFCKhhhlcN.aT.us+......shslsITscNcHLL....RSGYpARsspELPVLsRWF..cs..Vc.ssl...............ApYLclILYSREQlsKEsssh...............-ucWGIlulphpsEs.ElPMsPITMhRNALGlEEGGSGVPLDR-tYpcSVtaWcppAshhs. 0 21 28 36 +11371 PF11538 Snurportin1 Snurportin1 Pollington J anon pdb_2p8q Family Snurportin1 is a novel nuclear import receptor which contains an N-terminal importin beta binding domain which is essential for its function of a snRNP-specific nuclear import receptor [1]. Snurportin1 interacts with m3G-cap where it enhances the m3G-cap dependent nuclear import of U snRNPs in Xenopus laevis oocytes and digitonin-permeabilized HeLa cells [1]. 20.80 20.80 20.80 21.00 20.40 20.60 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.49 0.72 -8.09 0.72 -4.18 6 77 2009-01-15 18:05:59 2008-09-02 17:22:04 3 3 55 15 45 67 0 40.00 63 12.48 CHANGED HPRLSpYKsK.tsuh-QucRRR+hLElQKp+R..LDahs+uR+ ....HPRLS.Q....YKuK..aSuL.EQSERRR+LLELQKsKR.....LDYVNHARR......... 1 12 17 28 +11372 PF11540 Dynein_IC2 Cytoplasmic dynein 1 intermediate chain 2 Pollington J anon pdb_2pg1 Family Intermediate chain IC 2 forms part of the complex cytoplasmic dynein 1 along with a heavy chain (HC), two light intermediate chains (LICs) and three light chains (LCs). The complex is responsible for hydrolysing ATP to generate force toward the minus end of microtubules [1]. IC binds to the HC via the N terminal binding domain on the HC and ICs contain binding sites for the LCs. The ICs are responsible for binding to kinetochores and the Golgi apparatus through an interaction with the p150Glued subunit of dynactin which is another complex [1]. 25.00 25.00 26.60 26.90 21.00 20.10 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.25 0.72 -4.58 10 218 2009-01-15 18:05:59 2008-09-03 09:13:31 3 4 80 7 86 257 0 32.80 60 5.67 CHANGED u++sh+LuhScVsplDFsPKEsVoYSKETQTss .....tRtshKLGhuKlTQVDFPPRElVoYoKETQTPs.. 0 24 30 53 +11374 PF11542 Mdv1 Mitochondrial division protein 1 Pollington J anon pdb_2pqn Family Mdv1 is a component of the mitochondrial fission machinery in Saccharomyces cerevisiae. The protein is also involved in peroxisome proliferation [1]. Mdv1 along with Fis1 is also involved in controlling Dnm-1 dependant devision, a GTPase involved in the mediation of mitochondrial division. In this role, Mdv1 is the linker between Fis1 and Dnm1. Mdv1 plays a key role in the regulation of Dnm1 self-assembly [2]. 25.00 25.00 28.00 27.00 23.40 22.30 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.48 0.72 -4.04 2 12 2009-01-15 18:05:59 2008-09-03 11:17:30 3 2 12 3 4 11 0 49.60 90 7.29 CHANGED DADGKLLTEGGENENLRKNASKKETSLFQGFKSYLPIAELAIENTERLNY ..DADGKLLTEGGEDENLRKNASKKETSLFQGFKSYLPIAELAIENTERLNY... 0 1 1 2 +11375 PF11543 UN_NPL4 Nuclear pore localisation protein NPL4 Pollington J anon pdb_2pjh Family Npl4 is part of the heterodimer UN along with Ufd1 which is involved in the recruitment of p97, an AAA ATPase, for tasks involving the ubiquitin pathway. Npl4 has a ubiquitin-like domain which has within its structure a beta-grasp fold with a helical insert [1]. 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.37 0.72 -3.70 9 177 2012-10-03 10:59:06 2008-09-03 11:20:48 3 14 147 2 124 200 2 78.60 30 14.25 CHANGED MstphllRVQSs-GhKRlphopppThsphhcKVtcphuFspp.tFulah-RNposEl.uSpspplp.ltl+HGDhL.aLh.s .................hllRlpSs-GhcRl.p.s..s.t..pp.T....huplh....ccltcp.h..s.hsss...u...h.sl.a..hs........c........s...........t......s...........sc.......l.t........u.......s...s.......s.p..o....L....s........LtlpHGDhl.aLh............................... 0 46 72 100 +11376 PF11544 Spc42p Spindle pole body component Spc42p Pollington J anon pdb_2q6q Family Spc42p is a 42-kD component of the S.cerevisiae spindle body that localises to the electron dense central region of the SPB [1].Spc42p is a phosphoprotein which forms a polymeric layer at the periphery of the SPB central plaque. This functions during SPB duplication and also facilitates the attachment of the SPB to the nuclear membrane [1]. 21.10 21.10 21.30 21.50 20.90 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.69 0.72 -4.05 5 30 2009-09-11 12:37:10 2008-09-03 12:45:58 3 3 26 2 21 27 0 72.90 38 16.96 CHANGED NcLIKQNKELpsKLcEKQ-EI-RLNlLlGSLRAKLIKYTELNKKLpc-tQstQpp.sslscsto-stsDulhhsc+u .......NchlpQNK-LphKLc-KQsEI.cLpplspoLRuKL.KYs-lsKK..LEcpshshphp.sshppphs-..ss........................................ 0 3 9 18 +11377 PF11545 HemeBinding_Shp Cell surface heme-binding protein Shp Pollington J anon pdb_2q7a Family Shp is part of a complex which functions in heme uptake in Streptococcus pyogenes. During which, Shp transfers its heme to HtsA which is a component of an ABC transporter. The heme binding region of Shp contains an immunoglobulin-like beta-sandwich fold and has a unique heme-iron coordination with the axial ligands being two methionine residues from the same Shp molecule [1]. Surrounding the heme pocket, there is a negative surface which may serve as a docking interface for heme transfer [1]. 25.40 25.40 26.10 31.60 23.20 25.30 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.96 0.71 -4.40 3 63 2009-01-15 18:05:59 2008-09-03 13:00:48 3 2 61 2 4 59 2 148.50 42 44.61 CHANGED ADKGplYoClIpRsYRHPVSGQIEDSGGEHuF-IGQGMVEGTVYSsGMLEVTDAG-lYLTFRMSLADYSGNYQFWVQPGGTGuFQAsAYulTpsGTDTNGTTtDIAIuLPoVNoVVRGSMYVEPMGREVVFYLSPSELpEGYSGDMlAohVT ......................tuplYsshlppsYcHPloGpIED...uG...Gptuh....sIGQGMVEusVausuhLEsoDuG+lhLThRhuLADa...sushp...FhlQ.s.sGsG...uFpuVshslTppGoDs...NGTT........tD.ltIplPohNslIRuSMaVEPMGR-VlFYl..ssuph...pt.ossh.s.h........................ 0 1 2 3 +11378 PF11546 CompInhib_SCIN Staphylococcal complement inhibitor SCIN Pollington J anon pdb_2qff Family SCIN is released by Staphylococcus aureus to counteract the host immune defense. The protein binds to and inhibits C3 convertases on the bacterial surface, reducing phagocytosis and blocking downstream effector functions by C3b deposition on its surface [1]. An 18 residue stretch 31-48 is crucial for SCIN activity [1]. 21.30 21.30 22.10 21.80 21.10 21.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.37 0.71 -4.23 4 473 2009-01-15 18:05:59 2008-09-03 14:17:44 3 1 163 22 5 62 1 111.40 56 97.12 CHANGED MKhKKYIlAGTLAlLLuoTulssl-tNcAsASo..ph.hpspapcc+lA-EL+oLLsposVNcLAsGSLNsYYKRsIhhspY+AKuALKopsFspMu-AKhtLEpIYcEIDEhlpp ......................MKIKKYIlAGTLAlLLuoTulssl-KNEAsAST..ph.hpspYQcc+LA..-EL+oLLsp..osVN.cLAsGSLNsYYKRsIhhupY+.AKuALKoKsFcpMo-AKhpLEpIYsEIDEsL+S....... 1 4 4 5 +11379 PF11547 E3_UbLigase_EDD E3 ubiquitin ligase EDD Pollington J anon pdb_2qho Family EDD, the ER ubiquitin ligase from the HECT ligases, contains an N-terminal ubiquitin-associated domain which binds ubiquitin. Ubiquitin is recognised by helices alpha-1 and -3 in in the UBA domain. EDD is involved in DNA damage repair pathways and binds to mono-ubiquitinated proteins [1]. 25.00 25.00 46.70 45.50 23.00 22.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.53 0.72 -4.47 3 94 2009-01-15 18:05:59 2008-09-03 14:33:28 3 7 74 4 61 100 0 52.30 84 2.12 CHANGED suIPAuhVPEELIuQAQVVLQGKSRNVIIRELQRTNLDVNLAVNNLLSRDDED .........slPAoslPEELISQAQVVLQGKSRsVIIRELQRTNLDVNLAVNNLLSRDDED.. 0 21 26 44 +11380 PF11548 Receptor_IA-2 Protein-tyrosine phosphatase receptor IA-2 Pollington J anon pdb_2qt7 Family IA-2 is a protein-tyrosine phosphatase receptor that upon exocytosis, the cytoplasmic domain is cleaved and moves to the nucleus where it enhances transcription of the insulin gene [1]. The mature exodomain of IA-2 participates in adhesion to the extracellular matrix and is self-proteolyzed in vitro by reactive oxygen species which may be a new shedding mechanism [1]. 20.50 20.50 21.20 21.10 20.40 19.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.85 0.72 -4.07 6 114 2009-01-15 18:05:59 2008-09-03 14:53:12 3 6 52 8 47 134 0 83.70 52 9.80 CHANGED -EpaGYIlT-pcPLSs-cGl+LhElLAchl+lsoosFhslpVlGPAVTF+lRsNtQNloTADVsctAsssKspLEppoGL+ILQoGlup+s .............t.EcaGYIlTcpcPL.oh.ttGh+LlEhLAchlchsousFhsISVVGPAlTFRlRpNp.QNlohADVsppAs...s...KscLEspTGLpILQTGVGpR.......................... 0 6 10 24 +11381 PF11549 Sec31 Protein transport protein SEC31 Pollington J anon pdb_2qtv Family Sec31 is involved in COPII coat formation as it forms through the sequential binding of three cytoplasmic proteins: Sar1, Sec23/24 and Sec13/31. Sec13/31 is recruited by the pre-budding complex and polymerisation of Sec13/31 occurs to form an octahedral cage that is the outer shell of the COPII coat [1]. Sec13/31 is a hetero-tetramer which is organised as a linear array of alpha-solenoid and beta-propeller domains to form a rod in which twenty-four copies assemble to form the COPII cub-octahedron [1]. 25.00 25.00 25.30 48.00 23.70 19.20 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.58 0.72 -4.32 6 24 2009-01-15 18:05:59 2008-09-03 15:43:24 3 6 22 1 15 26 0 50.20 66 3.95 CHANGED sssPssssNVhSGQTPHLN+KANDGWNDLPLhVKEKPoRAKPVoVAPsuhh ......ss.PPh..NshSGQTPHLN+KANDGWNDLPLcVKE...KPSRAKAVSVAPssl... 0 1 7 14 +11382 PF11550 IglC Intracellular growth locus C protein Pollington J anon pdb_2qwu Family IglC protein is involved in the escape of F.tularensis live vaccine strain [1]. It has been shown that the expression of IglC is essential for F.tularensis to induce macrophage apoptosis [2]. IglC adopts a beta-sandwich conformation that has no similarity to any known protein structure [3]. 25.00 25.00 55.40 55.30 21.20 17.70 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.27 0.70 -5.02 2 33 2009-01-15 18:05:59 2008-09-03 16:03:29 3 1 32 2 2 17 0 203.70 95 100.00 CHANGED MIMoEMITRQQVTSGETIpVRTDPTACIGSHPpsRLFIDSLTIAGEpLDKNIVAIEGG-DVTKADSATAAASVIRLSITPGSINPTISIshGsLIKSsVRsKlpEKlSsILQASATDMKIKLGNSNKKQEYKTD-AWGIMIDLSNLELYPISAcAFSISIEPTELMGVSKDGMpYHIISIDGLTTSQGSLPVCCAASTDKGVAKIGYIAsu ..MSEMITRQQVTSGETIHVRTDPTACIGSHPNCRLFIDSLTIAGEKLDKNIVAIEGGEDVTKADSATAAASVIRLSITPGSINPTISITLGVLIKSNVRTKIEEKVSSILQASATDMKIKLGNSNKKQEYKTDEAWGIMIDLSNLELYPISAKAFSISIEPTELMGVSKDGMpYHIISIDGLTTSQGSLPVCCAASTDKGVAKIGYIAAA. 0 1 1 1 +11383 PF11551 Omp28 Outer membrane protein Omp28 Pollington J anon pdb_2r2c Family Omp28 is a 28-kDa outer membrane protein from Porphyromonas gingivalis. Omp28 is thought to be a surface adhesion/receptor protein. Omp28 is expressed in a wide distribution of P.gingivalis strains [1]. 23.90 23.90 24.50 24.70 23.80 23.80 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.15 0.71 -4.74 4 100 2009-01-15 18:05:59 2008-09-03 16:20:42 3 2 61 2 16 103 108 175.20 19 45.94 CHANGED Dshhhc.tsshpptatloGhPsshls.RcthhhS.shsh..hsashslhp....pstssslAIsSthsGpplsVTVpVth-pGso.sh+lV.lYlLENGLlhsQss............hGssl.sasHNcVLRtuhouhh.GDhhsshtshtthohuVslhhstuaNuENhslsAhVsDsss.pshss.+stlsspsDa ............................................................................................................sshPpshhs.R......p........th..h.......p...................t...tsh....t...t.pl...hp..............h.ss..s.....s..lsl.p..s.p..hs......s..s....p..hpls..lps.p...h...t.p.s.s...s.....t..h.+Lh..laLlEDulhu.Qt.s...................sssh..h..ps..Y...sHNHVlRs.....sl.s.u.sh....G-c.l.s.h..ts..ss....p..pshp..hshsls..p..s.h.s.....s......ps...hp...lVAalh....s..sss.....s.pshptth.......................................................................... 0 8 15 16 +11385 PF11553 DUF3231 Protein of unknown function (DUF3231) Pollington J anon pdb_2rbd Family This bacterial family of proteins has no known function. 27.70 27.70 28.10 27.90 27.60 27.60 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -4.75 51 448 2009-01-15 18:05:59 2008-09-03 17:01:37 3 3 139 2 115 456 0 159.20 22 94.95 CHANGED hthhh.....ppsLsusElutlassh.ssshshslhptFhpsscDc-l+phlpcuhcl.upcalctlpplhpc-slslPpua..-s-Vssssss....FoDphhlhalphhspsulssYuhuhuhshRpDlthhatchhtchhphhpcshclhlc+Ghh.pPPhhsspccl ............h........sppcsLpssElhplWshhhssphshshhphFhppspDc-l+p.hlp.p.s.h.c.h.s.ppplcplpplLpppsl..sl.Ps...s...s...cssssh.s.s.ss.....asDthhhthl.shhh.t.tulsshutuhuts.hRpD.l.thhasphhhcphphspchhclhhcK.GWl...PPhh..................... 0 45 112 113 +11386 PF11554 DUF3232 Protein of unknown function (DUF3232) Pollington J anon pdb_2rdc Family This bacterial family of proteins has no known function. 20.90 20.90 21.00 21.80 20.00 20.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.72 0.71 -4.30 3 14 2009-01-15 18:05:59 2008-09-04 09:07:04 3 2 14 2 7 16 0 125.40 28 65.53 CHANGED MhQ.p..usVlSaVupahKuoEpsM-RYK+VlsIsKuDEVAl+LLEGLIDAuTRYFuKVVEMEpRLQTARFRLEGEELR-LTE-LDRSRRhAHEAhISSLHVFNRYlVKEYGE-LpEAGapGGIFP+PEAsRDRIAIAD..WAGELLoGIYE.sR++ .........................................t.h...........................................llpshhpphpcYshhVlch..-spltsh+.chsGp-YR-hsEpLD.+pRoshHssslSsl+ILNRhA-ppt........atss...ps-hsc.t.sIsc..at...............h................. 0 3 4 5 +11387 PF11555 Inhibitor_Mig-6 EGFR receptor inhibitor Mig-6 Pollington J anon pdb_2rf9 Family When the kinase domain of EGFR binds to segment one of Mitogen induced gene 6 (Mig-6), EGFR becomes inactive due to the conformation it adopts which is Src/CDK like. The binding of the two proteins prevents EGFR acting as a cyclin-like activator for other kinase domains [1].The structure of Mig-6(1) consists of alpha helices-G and -H with a polar surface and hydrophobic residues for interactions with EGFR. A critical step for the activation of EGFR is the formation of an asymmetric dimer involving the kinase domains of the protein. Since Mig-6 binds to the kinase domain it blocks this process and EGFR becomes inactive [1]. 25.00 25.00 25.20 25.20 22.60 21.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.65 0.72 -3.58 6 119 2009-01-15 18:05:59 2008-09-04 09:35:41 3 9 40 6 49 99 0 64.70 64 8.47 CHANGED +PPpVPPR-PL.SpssSRTPSP+u............................Lso..stlMPsTQSFAuDPKYsosKslQtQus-uu ...................pPP+VP..P...RE.PL.S.psSRTPSPhs.....................................LsS..stsMPsTQSFAsDPKYsosps..lQt.uscuu........... 0 2 8 18 +11388 PF11556 EBA-175_VI Erythrocyte binding antigen 175 Pollington J anon pdb_2rji Family EBA-175 is involved in the formation of a tight junction, a necessary step in invasion. This family represents the region VI which is a cysteine rich domain essential for EBA-175 trafficking. The structure is a homodimer that contains a five-alpha-helical core stabilised by four disulphide bridges [1]. 21.50 21.50 22.80 22.80 19.60 18.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.18 0.72 -3.88 11 128 2009-01-15 18:05:59 2008-09-04 10:54:31 3 10 18 2 19 138 0 77.90 58 8.31 CHANGED lccTREcIIphSppNhCsNchS.cYCshhccchss.uTCSc-cpKsLCCSISDYCLKYFshsSpcYYsCh++EFpDPsYcCF .lKsTREpIIhhSph.pKCNNslSlcYCsolcDKISS.sTCS+E+oKNLCCSISDaCLpYF-h.ShEYasCMK+EFcsPpYpCF. 0 5 7 16 +11389 PF11557 DUF3233 Protein of unknown function (DUF3233) Mistry J anon Pfam-B_5068 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.50 22.50 22.70 43.60 21.40 22.40 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.98 0.70 -5.61 6 107 2009-12-15 10:43:27 2008-09-04 10:56:04 3 2 103 0 13 62 1 315.40 57 98.66 CHANGED Mlphsh.....+l......s...huhlLlhstuspAcsh.ch.....lpsclEpsFooulVLoDS-sIThGIsDFDPNchlchcpps....hGos-SIphRpphoVYoLPa...ThsLo--.su.............apHpLpsRLShlpt-s-lplhsssu.oDsh+-psaGuhltYshpYploEsWTlssuhGsHLMaY+NsaoYNsch.pphtslLDGhhVNsSAhAhllEPNlchpYppcpsWGKWcasSoh+YFhGpuaGsAss.upsuNPcGWhlsNulphaYshschtctsQulYsphKRVDlGGDss-sLGTcHYYEhulGWLhssPphhphlDNlGIGlohNhGSAL+GGSIVLaFNc ..........................................................h.Sho...uoA...cS...h.Dh.....IQcaLEQAFSSSVVLSDSDVFTsGFNNFDPN-WFcsDN-N....LGTsESIEpRK+aKSSTLPh...TluLSE-.-A.............haQHQLFFRLSAsVID--LsIus..h.s.u...co-+aRpSVLGGulhYRYQY+LT-HWTLTPAIGTHLlYYRNohTYNNPphKh.L.hSsLDGLLVNThAWAsLlEsNlKlQY-EEKSWG+W+ASSuWHYFsGaGWGcANN.G-VGNPEGWYlANoLTGhYDF..TQlGRSVQSIYuSIKRVDVGusspEPLGToNYYEAShGWLMTPPFEh-hVDNIGlGLTFNYGSAhKGGSIVLFFNQ.......... 0 2 4 9 +11390 PF11558 HET-s_218-289 Het-s 218-289 Pollington J anon pdb_2rnm Motif This family of proteins is residues 218-289 of Het-s, a protein of Podospora anserina. Het-s plays a role in heterokaryon incompatibility which prevents different forms of parasitism [1]. This region of the protein is the C-terminal end and is unstructured in solution but forms infectious fibrils in vitro which has a structure consisting of a left-handed beta solenoid which contains two windings per molecule [1]. 25.00 25.00 30.00 28.90 20.20 19.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.86 0.72 -4.00 5 15 2009-01-15 18:05:59 2008-09-04 11:24:25 3 1 9 18 14 19 0 63.00 33 22.32 CHANGED KlssIsVRN.s+cIpopEsAKV+LGNsaosuALusAIthsDRToNcs-oVpuKGsSsVHIGN+YG ...KlpsIsu+NpA+cIpsE-pA+lclGNsaSpssLspuht...lsDpTpNpV-sVsA+ssS+VpIGNpYG 0 0 7 13 +11391 PF11559 ADIP Afadin- and alpha -actinin-Binding Wood V, Coggill P anon Pfam_B017401 (23.0) Family This family is found in mammals where it is localised at cell-cell adherens junctions [1], and in Sch. pombe and other fungi where it anchors spindle-pole bodies to spindle microtubules [2]. It is a coiled-coil structure, and in pombe, it is required for anchoring the minus end of spindle microtubules to the centrosome equivalent, the spindle-pole body. The name ADIP derives from the family being composed of Afadin- and alpha -Actinin-Binding Proteins Localised at Cell-Cell Adherens Junctions. 29.90 29.90 29.90 30.10 29.80 29.80 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.66 0.71 -4.31 24 277 2009-01-15 18:05:59 2008-09-04 11:38:10 3 9 197 0 184 254 1 144.40 26 27.58 CHANGED cpshpaINppLhohGasp.tthhsss............................phshstllNslasLlttpc+shctpEslppphpphpu-hpphpsshp+Lc.sphpthp+Ehs.thpppc+plpppl+shpppl+sp+--lp+hpshlpshcoQhsp-h+++-pEhpKLKc+Lpp ...........................................ps.pYlsppLh.shGh.p.hp..p.t..............................thshspllNsl.cLl.htp..c......+shptpEslpsphpplpu-hs+hpsphp+L........c.pplpptcRchs.thppp-pphpsph.....+..shpppl+pt+-...Elp+hps......hlpphps...Qhsp-h+++-pEhp+LKc+Lpp......................................... 0 61 101 146 +11392 PF11560 LAP2alpha Lamina-associated polypeptide 2 alpha Pollington J anon pdb_2v0x Family LAPs are components of the nuclear lamina which supports the nuclear envelope.LAP2alpha is a non-membrane-associated member of the LAP family which is unique. This family of proteins is the C terminal domain of LAP2alpha which consists of residues 459-693 and constitutes a dimeric structure with an antiparallel coiled coil. LAP2alpha is involved in cell-cycle regulation and chromatin organisation and preferentially binds to lamin A/C [1]. 25.00 25.00 31.50 30.70 21.90 19.00 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.61 0.70 -5.18 2 25 2009-01-15 18:05:59 2008-09-04 11:50:26 3 3 17 2 9 47 0 209.20 60 33.77 CHANGED AKoVVSHSLTTLGlEVuK..sQHDKI-ASE.SFPhHESILKVlEEEWQQlDRQLPSlAC+YPVSShEAspILSVPKVDDEILGFISEATPhuuhQAuSTESCsppLDLALCRuYEAAASALQIAsHTAFVAKuhQADISQAAQIlsSDPScspQALtILs+TYDAASYlC-AAFDEV+MuApsMGsuThGRRYLWLKDCKIs.ASKNKLsssPFKGGTLFGGEVpKVIKKRGNKp ..............AKTVVScSLTTLGlEhSK.QSQHDKIDASE.SFPhHESILKVIEEEWQQlDRQLPSLACKYPVSS+EATpILSVPKVDDEILtFISEATP.sGlQAuSTESCsKpLDLALCRsYEAAASALQIAoHTAFVsKAhQADISQAAQILSSDPo.cttQALsILS+TYDAASalC-AAFDEV+MuA+oMGsSTsGRRaLWLKDCKls.ASKNKLsssPFKGGTLFGGEVpKVIKKRGNK....... 0 1 1 1 +11393 PF11561 Saw1 Single strand annealing-weakened 1 Wood V, Coggill P anon Pfam_B03980 Family This family of yeast proteins is involved in single-strand-annealing, or SSA. SSA entails multiple steps: end resection and ssDNA formation; annealing of complementary ssDNAs; removal of 3' single-stranded non-homologous tails; gap fill-in synthesis; and ligation. Saw1 in combination with Slx4 catalyses the 3' non-homologous tail removal during recombination. Saw1 interacts physically with Rad1/Rad10, Msh2/Msh3, and Rad52 proteins, and works by targeting Rad1/Rad10 to Rad52-coated recombination intermediates [1]. 20.50 20.50 21.80 21.00 19.20 18.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.34 0.70 -4.95 9 45 2009-09-11 06:58:03 2008-09-04 13:11:48 3 2 41 0 30 45 2 210.00 28 91.69 CHANGED Mssslshl+lusshllPlRlFlpR+plLt........sptshhp...lsppsIlpLpp.shplhLSpsDlpuLlspl+c-Ll.llhp.............................hppss.......hhshplpshcchs+hchpl+hhhphclslhl.shcclthlpph..h.h...................hsptstsLhhhspph.h............................p.ppstcp-...cK..lpaph+ssh...sltcsIclYVh ...MssplshlplspshlLPlRIFlNR+plLp.....................sp.o.suohhpt......P.lsspSIIsL+s.ss+IhLSppDhps.Lss-I+c-LLhIla.-hss....t.....................................................................................t...phl.c...l.hssspsp...phhssplpshpchsKhphpL+hctphclcIh.lsshccl.s.plRchlhh......................shs.ps.pL.hh.pph.h.....................hl.-stp.t..................t.pts.pp-.....K.....lpahhps.h....slschIclal.............................................................................. 2 7 17 28 +11395 PF11563 Protoglobin Protoglobin Pollington J, Eberhardt R anon pdb_2veb Family This family includes protoglobin from Methanosarcina acetivorans C2A. It is also found near the N-terminus of the Haem-based aerotactic transducer HemAT in Bacillus subtilis (Swiss:O07621). It is part of the haemoglobin superfamily. Protoglobin has specific loops and an amino-terminal extension which leads to the burying of the haem within the matrix of the protein. Protoglobin-specific apolar tunnels allow the access of O2, CO and NO to the haem distal site [1]. In HemAT it acts as an oxygen sensor domain [2]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.62 0.71 -4.57 104 1086 2012-10-01 21:46:00 2008-09-04 14:07:44 3 53 846 19 390 1089 31 155.20 20 36.20 CHANGED tpsclpthhpalshs...pcDhphLpphpshlpstlssll-taYpclhph.sphschhss......................psplp+h+pthppalpcl......hsus...hcttahcthpc.lGthHs............clslc.pahhushshlhptlhptlhpp............hshpchtthlpAlsKllhl-.slhhpsYhcstpp ...............................h.........thhths...ttphthltp.ht.thh.ts.phctlsppFY.ch.lt.p.p..Pc.h.tp.hlss.............................pps.hpcLK.pshpp..al.h.p.l......hsup........hD..p..c.a...l.p.h.p...pp...luphHs..............+IGls....schhhsuaphlhchlhthlhsp........................................................hs.htchhphhpshh+hl.lshplhhpsY.t....h.......................................................... 0 140 270 329 +11396 PF11564 BpuJI_N Restriction endonuclease BpuJI - N terminal Pollington J anon pdb_2vla Family BpuJI is a restriction endonuclease which recognises the asymmetric sequence 5'-CCCGT and cuts at multiple sites in the surrounding area of the target sequence. This family of proteins is the N terminal domain of BpuJI which has DNA recognition functions. The recognition domain has two subdomains D1 and D2. The recognition of the target sequence occurs through major groove contacts of amino acids on the helix-turn-helix region and the N-terminal arm [1]. 25.00 25.00 112.60 112.60 24.50 24.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.03 0.70 -5.46 2 14 2009-01-15 18:05:59 2008-09-04 14:28:20 3 2 13 1 3 15 0 256.30 42 61.35 CHANGED M..sPEc..FRhph.Rs+hKN.l-NlLshhAs.Is.ls.hsKtpFspphNshlhphhGp.-pTcKTlsNaRTEIu.pLFGMhhEc-thlasusRspchhEDpD..tFFKshhapFpaPsGh.K.scllchsthtlQh+.h.YlLplhh...pA-ppsI.Loc--lAahlhNsLpshps+..slElhp+IhEsRusclshcs+..Gc.hpYu....+-.LsYh.lAsLlplpGshhKlN.hEtpsIN...pFhtscspFsuYt..h....To.-Dh+sFhpsW.pY...VNpchu ...Y.sP-cYaaRlHH.RPRFKs-lEsVLlahAspIS.ls.LscppFspphNphlppah....s.N....scps.pKTIsNWRTEIu.uLFGhhh......EcpG.hlh.suspApcLs-sQDL.cFFKpFhasFpYPuGHhKsppIlchhp.slpFK.....PspalLpLht...pu.cp...+..shhLTc-EluahlaNDLRsspc+pssh.Elhp+IhcNRtsclta-sp................GDVhRYA....pDILDYM.lAsLl.plpG.s..p....ahlNs.EppuIs...cahpssshFcsYsphht.tpsohc-l+phcptWF-Y...VNpt......... 0 2 2 2 +11397 PF11565 PorB Alpha helical Porin B Pollington J anon pdb_2vqg Family Porin B is a porin from Corynebacterium glutamicum which allows the exchange of material across the mycolic acid layer which is the protective nonpolar barrier. Porin B has an alpha helical core structure consisting of four alpha-helices surrounding a nonpolar interior. There is a disulphide bridge between helices 1 and 4 to form a stable covalently bound ring [1]. The channel of PorB is oligomeric [1]. 21.10 21.10 21.10 30.60 20.90 19.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.33 0.72 -3.78 4 21 2009-01-15 18:05:59 2008-09-04 14:58:00 3 1 13 16 6 20 0 103.70 35 77.37 CHANGED AtsAslsSo..sp-lsshhDhhsCsILcsuLstsGLscEspppsELAAsLcspus.........lG-hs.......suhuuclADRAQTCGIVcsD......TtLppLSSNLSS ...................u..Aslsss..spthsshlsshsCulLcs...uLtss.....G..lhcEsoTRsELAtsLcspus...ss..hsplsshs.......sshuuplADRA.TCGIVKsD.......o.Lp...tLSSNhSs.................. 0 3 4 6 +11398 PF11566 PI31_Prot_N Inhibitor_PI31; PI31 proteasome regulator N-terminal Pollington J anon pdb_2vt8 Family PI31 is a regulatory subunit of the immuno-proteasome which is an inhibitor of the 20 S proteasome in vitro.PI31 is also an F-box protein Fbxo7.Skp1 binding partner which requires an N terminal FP domain in both proteins for the interaction to occur via the FP beta sheets. The structure of PI31 FP domain contains a novel alpha/beta-fold and two intermolecular contact surfaces [1]. This is the N-terminal domain of the members. 18.50 16.60 18.70 17.10 18.20 16.50 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.94 0.71 -4.63 42 340 2009-01-15 18:05:59 2008-09-04 15:45:36 3 5 214 2 203 338 1 146.10 22 42.76 CHANGED hh.htshpsslpsstDslshhlHhhhhps.sFchh...t........................lscspphpppp..........................t..lPttWst.s.ssYshpYtpsts..shpalLpshthssphllps...lshs.spphsshplsspcalst..................pptsshsshhpp.......hpcLhshhcpp................................................llp.lhsshpptth ..............t.......psshpsspDulhhhlHhhhlps.Gahhh...s........................hscssphpstp...........................................hhP.ppW..st..spssYshpYtcs...........s...stphllpshh.hss.tlllss.........lsht..scplpplpl.psccalsp..............................pp.sshsph.hts........................hpcL.phhcsp................................................ll..lhsthpp..................................................................... 2 59 93 148 +11399 PF11567 PfUIS3 Plasmodium falciparum UIS3 membrane protein Pollington J anon pdb_2vwa Family UIS3 is a membrane protein essential for sporozoite development in infected hepatocytes. This family is 130-229 of the Plasmodium falciparum UIS3 protein which is compact and has an all alpha-helical structure.PfUIS3(130-229) interacts with lipids, phospholipid lysosomes, the human liver fatty acid-binding protein and with the lipid phosphatidylethanolamine. The interaction with liver fatty acid-binding protein provides the parasite with a method to import essential fatty acids/lipids during rapid growth phases of sporozoites [1]. 25.00 25.00 118.90 118.00 21.00 20.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -3.86 2 9 2009-01-15 18:05:59 2008-09-04 16:22:46 3 2 8 6 6 11 0 97.60 57 46.07 CHANGED INKlNlpt.lhENpNslDlslKRaN.FhD.s+LuhQ+HFpcLSN-Q+c.hlNsh-YhpKhVQsLpEsRslslSKhQEshAVhslcaaLpc.Y.....Qpc. INKVNlKG.LhENsN-lDVPlKRFNhFhDNs+LAhQHHFscLSN-QpcYhlNDhDYlRKlVQoL-EsRNlslS+hQEDhAVLslEaFLpc.Ytp............ 0 1 2 5 +11400 PF11568 Med29 Mediator complex subunit 29 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-active part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Med29, along with Med11 and Med28, in mammals, is part of the core head-region of the complex. Med29 is the apparent orthologue of the Drosophila melanogaster Intersex protein, which interacts directly with, and functions as a transcriptional coactivator for, the DNA-binding transcription factor Doublesex, so it is likely that mammalian Med29 serves as a target for one or more DNA-binding transcriptional activators [4]. 25.00 25.00 27.50 27.10 24.50 23.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.99 0.71 -4.15 7 108 2009-01-15 18:05:59 2008-09-04 16:40:37 3 2 83 0 61 99 0 135.60 45 69.25 CHANGED p-ch....Dsls+VKsLl.ssLR-Sl.phhKpuAphLpQNp..Ds...pptpst-ssh.RF-KsLE-FaAlCDQlElsL+TAhpChpQtss........Ss+alPs.Vhss.....spsssh..sslsYspYLssVpspIppAKDIHcsLlssupplsst...........- ..............................................pch....Dslp+hKhLl.s.L+ESLp...shhKsAApsLhQNsslDNG.....tKuuD.s.slp........RFDKsLEEFYAlCDQlE.....LsL...+hAhECLo.Qsss........Ss+a.Ps.Vsss.....spsssspsssLsYsQYLssl+uQIssAKDIHssLlssupplssp...................................... 0 19 24 45 +11401 PF11569 Homez Homeodomain leucine-zipper encoding, Homez Pollington J anon pdb_2ys9 Family Homez contains two leucine zipper-like motifs and an acidic domain and belongs to the superfamily of homeobox-containing proteins. The presence of leucine zippers suggests that Homez can function as a homo or heterodimer in the nucleus [1]. It is thought that the first leucine zipper and homeodomain 1 (HD1)of Homez is responsible for dimerisation and HD2 has a specific DNA-binding activity. Homez is also thought to function as a transcriptional repressor due to the acidic region in its C-terminal domain [1]. Homez is involved in a complex regulatory network [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.89 0.72 -4.91 9 166 2012-10-04 14:01:12 2008-09-04 16:47:08 3 12 50 1 72 289 0 52.90 41 7.20 CHANGED sh.ssscsl....Lp-YYhpH+hLpEpDLDsLspKSpMShpQV+-WFAp+.pcpscs .....................h...sspth.....LccYY.h.p.H.+...h...L...pE.pDLspLss+SpMShpQV+-WFup+..c.s..t.......... 0 6 10 31 +11402 PF11570 E2R135 Coiled-coil receptor-binding R-domain of colicin E2 Pollington J anon pdb_2ysu Family E2 is a DNase which utilises the outer membrane receptor BtuB to bind to and enter the cell. This family of proteins is E2R135 (residues 321-443) which is the part of E2 which is responsible for binding to BtuB in a coiled coil formation [1]. 22.80 22.80 23.00 23.30 21.90 22.70 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.88 0.71 -4.07 2 37 2009-12-15 10:45:41 2008-09-04 17:03:58 3 11 21 6 3 46 1 126.60 56 19.71 CHANGED HP.EuhcRpY-+A+AELsttscslAp.ppR.Aps.pshsut+utlptAsKplt-..AEhpp.DhhsasP.tthtphWQ....psphhppDlpNpptthcAAtpphs-hs.......AALSuA.EpRKQKEpKtKDAcsKLs ...............HP.EuhcRsY-+A+AELspAsc-lApsQpRpApAlps.......hsuR+SELDtAsKslt-h.AElKph-RFA+-PMAuGHRMWQMAGLKAQRAQTDVNNKpAAFDAAAKEKuDAD.......AALSuA.EpRKQKEsKc+DAcsKLs....................... 1 1 1 2 +11403 PF11571 Med27 Mediator complex subunit 27 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species {1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Mediator exists in two major forms in human cells: a smaller form that interacts strongly with pol II and activates transcription, and a large form that does not interact strongly with pol II and does not directly activate transcription. The ubiquitous expression of Med27 mRNA suggests a universal requirement for Med27 in transcriptional initiation. Loss of Crsp34/Med27 decreases amacrine cell number, but increases the number of rod photoreceptor cells [4]. 25.00 25.00 25.70 26.20 22.90 24.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.38 0.72 -3.86 18 176 2009-01-15 18:05:59 2008-09-04 17:06:26 3 3 146 0 120 170 0 91.90 36 25.59 CHANGED shpths.apsSc............hplapclospsppAlhphhs.sp.tthsltphhsaIpSY...pslFssPCs+Cs+hl.....ptt.LP.....PshR..shss........h-saHpsCh ........................ts..thshastSp............apVFp+lo-+ApsAl.laah.....sphPc...hs.....l+shhsWlpSY...hpLFpsPCp+CG+hL......pstLP.....PsaR..chpo................................hEAaHpsCp................................ 0 38 55 93 +11404 PF11572 DUF3234 Protein of unknown function (DUF3234) Pollington J anon pdb_2z0r Family This bacterial family of proteins has no known function. Some members in this family of proteins are annotated as TTHA0547 however this cannot be confirmed. 20.20 20.20 22.30 45.80 19.40 18.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.39 0.72 -3.90 2 11 2009-01-15 18:05:59 2008-09-04 17:08:03 3 1 11 12 5 9 0 103.50 64 98.53 CHANGED MAPDLSGTWYVLEGDPGEHLVVEALGERLSGIWTSRELAEAFLAHHPHLGMRVSALESRALKEAaLRALGMLQVEAVMVDYRPGTHRAQVARVKDLLEEVRRA .....MtPDL.SGTWYVLEG..-..PGEHLVlEALGpRLSGIWTScELAcuFLA+.HPc.L.GMRVSsLESRALKEAFLRALGMLpVEAVhVDYRPGoHRAplARVc-LLEEVRRA......... 0 1 3 5 +11405 PF11573 Med23 Mediator complex subunit 23 Coggill P anon manual Family Med23 is one of the subunits of the Tail portion of the Mediator complex that regulates RNA polymerase II activity. Med23 is required for heat-shock-specific gene expression, and has been shown to mediate transcriptional activation of E1A in mice. 20.60 20.60 28.40 20.80 18.80 20.00 hmmbuild -o /dev/null HMM SEED 1341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.02 0.69 -14.13 0.69 -7.23 7 204 2009-01-15 18:05:59 2008-09-04 17:13:51 3 8 102 0 133 209 1 793.90 37 84.96 CHANGED Ms........ppllch.-p..........hlcsp..p....s......ps.psphtphst.....hsshhuthtpcp+pphl+.hlhhltthsp.p.........phchhhp.Lhchsttthl.uphLCtphh..pp..h.ppphh.Epa+hl+csIstlDYKGlRpIh+hhhE.phhphP.slSsphlsplhtlp-ll.+Ih-RsusLLPAYhslsEIh+shshps.h...W+Lsphlush.pcFRslAphhoIhG+sshLPIVpHs.....uatsthhs.Wclc.sshph.hpup..LPYcP-h.hpsQhhLLpaVLcQPYS+DMlsshhshp.pp+pppsshEp.LshllhpAMEpoEspspp.................pstspalW.HLos.lIYhlhapasshtshlpsL......+pKlstpplpRuRDclMWllLQalSuuhphstIss.hh.lhcLas.LYs-c..psl.lP-.sssphs+thAssClWhHL.KcAtscp.phscsIPpsLKhpaEhLpp.....ss.sh..ts-aplAlLsNAYSTsschFspsMssLh-sl.usscssps......uh.h.As.shsshshphLDuLTh+uKhSLl+pIlshhhp.upsKtssP....husALlETYsRhLVYpElESLuh+uhhuphh.......P.....pshpspAhs.LahLhEhhuYRhHHlpsah+hQh.....LoahpsLsulspssphQlapslEsssLRLI.............ptlGSsphpsp.shhlpc.c.PtslsSs........sEELN+sLIholARuh+lT....stss.upshsc-hLtsIhphTPasWsp+oLppFP.shp-hhpQpshsp.tsp.t.hhos--EhtphsoMpsEN-lIt+Fups...sssPlFLCllFKhl..h....pTpsIoshhYclLE+hus+slssplRphsDallhElusotsst.plpKsl.cslspMlW+aNllshDRllLsLhhRst-us-AQlsh.IhQLLLLp......ss-FppRlp.Fhpp.ss.-HappssahctHlsaHppFPEph.h-tssppsp.........LPlYFGNVChRhLPVlDlhlHRaLEh..hplsp....LtslLDplu.LYKFHcpPITalYsTLahacthhps+s..KKpLVsslhuplc-sRP..WsLoEtYpta.hppppsp..W.P-.sYahpLlcRhssolDuhs.....sFsupDWRasEFsNPsApALYloClELhuLPsuPptVsNulIsVllps.sllP.t.l.sahNslGLlhsALPpsYhssla-chhpslsssphtshp..h..sF-.FsFcshcpuhL-....pshssllslupuhapHhulupLsshscphh.plh.hV+TEhpLlYlh+lVGPaLpRh..........hpptcshuslslhhh.h.c.sshsp.uh.l.Y.s.lCDhhYphKYhFsG ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................W..h.s.hh.h.hh.t..hs..phh..l.......thh................p.p.lhWhhhphht....h.....................................h..sp.t..........h..h...s.....h.....h...t........................................................t..................t...hhhh....p.........h.hh.p...h..........................h.shshthls.....shohp.p..h...h.......h..........t...s............s.thh.Thsh.hhh.....phh.+.hhsth...............h.p.t...t..hhlhEhhsaRh...........h.....+.ph......L.h..hp.h..s..h...s..p.ht.p.pl.hhhEshshpLh.......................shhss.h...ts....hh.p...stthhp..................pLNRhhlholAhsh+hp.........t......hh..phLtplht.o...at....Ws.pohthFP..h..pt.hh..t...........p..........s................hts..p..p.hh.p.h.................tt.thl.pa.......................hLChhhhhh..h....p........s.....h.h..phl......pt..........hssptls.tl.hhsDhll.....ch....t.........t.t.........h....psh....t....ls.hlaphpllshDhhlLsLh.+s..cs..............ps.h...lhp..llp.......s-h.p.Rlp.ahp...s.cc...a......h.s..ph.pp..th.t..s-....p...t................................................................hPhaasslshRhLPlhslhlaRhlE.....h.p.......lLthhu.hhta.H.s..hoalhshL.hha...h...th..............Ll.thh.s.hp...p........hop.a.......t..........................pahhtLl.th............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 62 75 111 +11406 PF11574 DUF3235 Protein of unknown function (DUF3235) Pollington J anon pdb_3b4q Family Some members in this family of proteins with unknown function are annotated as RpfA however this cannot be confirmed. 21.10 21.10 21.90 22.10 20.90 20.10 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.28 0.72 -3.75 4 59 2009-01-15 18:05:59 2008-09-05 09:48:56 3 1 57 2 13 35 0 89.20 47 42.43 CHANGED GLNSAPTPRslsAsst.PtPAsAAPA.stEYsAssshNoN.l.......sVcuhYsAlsspLAphGlsVPsElpuaYNA..................hlu GLNSAPTPRDss.Ass..PA.......Ps.pssss..sQphA.Asuu.su-cL...........................AVDAlYsAlcsRLA...uhGluVPsplcuaYpAN+s.sFsuFYtANRtsIDhhh.h.................. 0 2 8 13 +11407 PF11575 FhuF_C FhuF 2Fe-2S C-terminal domain Moxon SJ, Bateman A anon Pfam-B_11690 (release 9.0) Domain This family consists of several bacterial ferric iron reductase protein (FhuF) sequences.\ FhuF is involved in the reduction of ferric iron in cytoplasmic ferrioxamine B [1]. This domain is the C-terminal domain that contains 4 conserved cysteine residues that are found to be part of a 2Fe-2S cluster [1]. 20.50 20.50 20.50 20.60 20.40 20.30 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.89 0.72 -7.19 0.72 -4.27 60 1049 2009-01-15 18:05:59 2008-09-05 10:52:59 3 4 988 0 178 640 12 22.30 54 8.86 CHANGED RcsCClhYpl..sus.ph..Css.CP..hh ..RRoCC.cY+l....Pss..pp......CGD.CsLh.... 0 35 93 140 +11408 PF11576 DUF3236 Protein of unknown function (DUF3236) Pollington J anon pdb_3brc Family This family of proteins with unknown function appears to be restricted to Methanobacteria. 25.00 25.00 121.40 120.20 21.90 20.40 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.79 0.71 -4.81 7 27 2009-01-15 18:05:59 2008-09-05 11:05:31 3 2 27 2 19 28 0 154.70 53 91.72 CHANGED lE-hIppAahEShpstRhGDctEElctIpphIpsA++lVVsspNpcKhcVlpcllpchs.scsphLpIsTN.ADLTRhPAloKuLhAlDlocADllIARGRLGlPGSGShLlIMDp+GRlLTuuhSPSplIHppslc-ulppEhpcALpRIGhp ..hEchIKpAYhESlpspRhGD+hEElctIpphIhsAK+IVVsTpNpKKFcVlp-llpcl..........s.......splphLpIsTN.ADLTRMPAlsKuLhAlDhscADLlIARGRLGlPGSGShLlIMDsKGRILTuulSPSplIHKpslEcsVcpEhhcALcRIGl.. 0 4 9 15 +11409 PF11577 NEMO NF-kappa-B essential modulator NEMO Pollington J anon pdb_3brt Family NEMO is a regulatory protein which is part of the IKK complex along with the catalytic IKKalpha and beta kinases. The IKK complex phosphorylates IkappaB targeting it for degradation which results in the release of NF-kappaB which initiates the inflammatory response, cell proliferation or cell differentiation [1]. NEMO activates the IKK complex's activity by associating with the unphosphorylated IKK kinase C termini.The core domain of NEMO is a dimer which binds to two fragments of IKK [1]. 21.00 21.00 21.00 23.10 19.90 20.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.28 0.72 -4.22 9 160 2009-01-15 18:05:59 2008-09-05 11:12:20 3 2 60 4 67 138 0 66.90 46 14.09 CHANGED .tpslpphpcLlpENppL+EAlKQoNptMK-RaEELttWpE+Q+EER-Flpp+hcEA+phlptLshEN ......pth.pphpchLpENppL+...-Al+QsNQhh+cRhEEL.taptpQ+EE+-Fh.p+FpEA+chlppLohEp.................... 0 9 17 37 +11410 PF11578 DUF3237 Protein of unknown function (DUF3237) Pollington J anon pdb_3c5o Family This family of proteins has no known function 21.60 21.60 22.10 22.20 20.70 19.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.79 0.71 -4.91 66 476 2009-01-15 18:05:59 2008-09-05 11:23:30 3 11 281 12 270 484 95 145.20 24 71.09 CHANGED PsL..chhasl........pl.......clsssh.....slGp..................sstG........pRpl.lsls..GGphpGs......plp.......................GplLs.sGuDathlp.s-G...........................hscl-sRasl..cTc..........DG......shIalpspGhhpss......thhp.thtt...upslss......s.phah................+ss.pFETu.s.s+Y.pWLsptlhVG....pupptss.t.....VhhcsapV ...........................................PtLphhhph........pl.........pl...ss.sh.....tlGp...............................ss.t.G........pRph..lslh....GGphpG..........tlp.....................................G..plls...sGuDath...hp..sss............................hscl.cs+Yhl....pTs................DG.....shIhlpspGhhp.ss...t.hht..htt.......spshs.................s..phhh..................pss...pF..ETu..s...tcY.paL.sp.tlalG..puphh.ss..t.....Vhhchap........................................... 0 46 135 217 +11411 PF11579 DUF3238 Protein of unknown function (DUF3238) Pollington J anon pdb_3c5p Family This family of proteins with unknown function appears to be restricted to Bacillus cereus. 19.80 19.80 20.00 19.90 19.40 19.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.29 0.71 -4.67 2 116 2009-01-15 18:05:59 2008-09-05 11:29:37 3 2 59 6 10 87 1 181.60 58 86.79 CHANGED MspIVKlRuSVFhPhuhhEPhKDstTG+lhpatGDuREFTPaAsNshRSRlEQEVslDFYKcElFoYAsssIsT.KlTNPDGSsphppGchSTENIVCTsIsWspDtVpFcMpASASNPLNshAPssDYLLslpVNKsGolclpG.HDGFPCaEFYKQVDFGsFEpIYhHDFRETsDTPtALuGEM-YSFpppl ..................................MspIVKlRuSVFhPhuhh.....E.shKD.t.TGplhpatGDuREFTPaAVNo..hRSRlEQEVllDFYKcElFoYAsssIs..T.KlT......N....P.DGSlphcpGcs.STE.N.IVCTsIlW.ss-.tVpFcMpASASNP.....LNshA.PssDYlLslpV.p+s.GolclpG.HDGFPCaEFYKQVD..FGs.FEpIYTHDFRETsDTsAALuG-M-YSFpppl......................... 1 1 7 7 +11412 PF11580 DUF3239 Protein of unknown function (DUF3239) Pollington J anon pdb_3c8i Family This bacterial family of proteins may be membrane proteins however this cannot be confirmed. Currently there is no known function. 25.00 25.00 25.30 27.10 23.90 23.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.57 0.71 -4.11 7 82 2009-01-15 18:05:59 2008-09-05 11:49:43 3 3 81 2 19 66 0 124.90 42 54.56 CHANGED P+phGshppLYsphpLsPAlluElsPRslVLLuLVsssssssspspaALssRslsplsGhs...++lGpRVPuVAlp....utcshcst-pW-plSPMPIsWGTsDssVlcRAEpsIsps.WspLppslspl-c ......P+pVGsAppLYssYsLsPAhIAEVNPRDhVlhALVNsssD.spss.P.....paALssRsloslsGhc...RpVGpRlPsVAVs....Gppospsp-pWppISPMPIuWuTsDssVlp+AtcsIPpcpWphLp+sls+l-.p... 0 3 13 18 +11413 PF11581 Argos Antagonist of EGFR signalling, Argos Pollington J anon pdb_3c9a Family Argos is a natural secreted antagonist of EGFR signalling which functions by binding growth factor ligands that activate EGFR by forming a clamp like structure using three disulphide-bonded beta-sheet domains [1]. 25.00 25.00 28.00 26.70 18.80 23.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.92 0.71 -4.19 3 39 2009-01-15 18:05:59 2008-09-05 12:56:11 3 2 23 8 25 50 0 93.60 46 41.30 CHANGED HSlKDlRILYQVGsSEcDLPV...........................CAPNAV.....................CSKI....................DLYETPWIERQCRCPcuNRsP.plIlHHHc+ssuo ......................................................................................................Cp.asVpp+..-.hh-ps.Ipp.CpCPcuaRCP......pHHopsu......... 0 7 8 19 +11414 PF11582 DUF3240 Protein of unknown function (DUF3240) Pollington J anon pdb_3ce8 Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.13 0.72 -4.07 11 148 2012-10-01 21:59:08 2008-09-05 13:09:45 3 2 146 1 47 234 49 95.10 29 92.06 CHANGED hopphh.LsLlhs+-l+s-LsDhLhph.DhlSGFTlhcssGaupchphhshuEpV+GttctltlpllhsppshpplLspL+pthscssltYWltPVtshGpls ...............................h.....h.LplhhshsL+DsllDh.Lh.cp..s.h.ssFhhhpshuaus..p...p...h...h....hS.......pE.QVpG+...p......chsphclhlscptstsLlpsLcpph..sspth...hah...........t.................... 0 10 34 40 +11415 PF11583 AurF P-aminobenzoate N-oxygenase AurF Pollington J anon pdb_3chh Family AurF is a metalloenzyme which is involved in the biosynthesis of antibiotic aureothin by catalysing the formation of p-nitrobenzoic acid from p-aminobenzoic acid. AurF is a non-heme di-iron monooxygenase which creates nitroarenes via the sequential oxidation of aminoarenes [1]. 25.30 25.30 25.30 25.30 25.10 25.20 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.88 0.70 -5.50 33 580 2009-01-15 18:05:59 2008-09-05 14:47:50 3 1 308 10 194 533 184 282.80 21 88.21 CHANGED M...........................spppshppLhcu.sc+uh...cPh....p..........sss+ahhPsphssLhssshapphspppphclsppchhshhssslhFEshlh....ptlh+shhstsssssptphshptlsDEshHslMahchhppls............ss.hphpRhhphlhphhsshh.pshstahsshluEphlssh.+shhcDsp.lpPhl+plhplHlh-EuRHhpFuc-hh+phhsph..sttpRthhushlshshthahss.lp.thhtts......slcstcultpsh..sssptt.phh+shhuslhphhccsGlh ........................................................................................................phhtpL.cu.usp.csh...sPhh.-lDWpss..........sssphhhssph.s...Lhs.pshapphsccp+hclspachushhshslahEp.h.Lh....phlhpphht.t.s...s.s.s.s.php.a.sh.s.p.hsDEspHshMFtch.l.p.+hs................sshhth..chhp..h.l..ht.h.............h.s..s......sh..hs......hshass....h.ll...uE-h.ls...ph...p..+...ph....hc....Dsp..l.pPhh+plhplHlh-EARHluFu+....chl....ccth.sph......sphpRt....h.hphh...hsh.sht.hh.hps.h..hs.st.hhhhh......shs..cshtpth...tssp.p..phhpt.hhtplh.hhcchGlh.................................................................. 0 49 126 168 +11416 PF11584 Toxin_ToxA Proteinaceous host-selective toxin ToxA Pollington J anon pdb_1zld Family ToxA is produced by particular Pyrenophora tritici-repentis races and is a proteinaceous host-selective toxin. It is necessary and sufficient to cause cell death in sensitive wheat cultivars [1].ToxA adopts a single-domain, beta-sandwich fold which has novel topology. The protein is directly involved in recognition events required for ToxA action. It is thought to be distantly related to FnIII proteins, gaining entry to the host via an integrin-like receptor [1]. 25.00 25.00 25.10 256.70 18.40 17.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.72 0.71 -4.25 2 16 2009-01-15 18:05:59 2008-09-05 16:00:25 3 1 6 4 2 14 0 117.80 97 67.67 CHANGED QGSCMSITINPSRPSVNNIGQVDIDSVILGRPGAIGSWELNNFITIGLNRVNAsTVRVNIpNTGRTNRLIITQW-NTlTRGDVYELFGDYALIQGRGSFCLNIRSDoGRENWRMQLEN QGSCMSITINPSRPSVNNIGQVDIDSVILGRPGAIGSWELNNFITIGLNRVNANTVRVNINNTGRTNRLIITQW-NTLTRGDVYELFGDYALIQGRGSFCLNIRSDSGRENWRMQLEN 0 1 1 2 +11417 PF11585 Stomoxyn Insect antimicrobial peptide, stomoxyn Pollington J anon pdb_1zrx Family Stomoxyn, localised in the gut epithelium, is an insect antimicrobial peptide which functions in killing a range of microorganisms, parasites and some viruses. Stomoxyn has a structure consisting of a random coil in water however in TFE it adopts a stable helical structure. Stomoxyn is thought to have a similar function to cecropin A from Hyalophora cecropia due to structural similarities [1]. 25.00 25.00 26.20 102.30 23.30 18.70 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -8.13 0.72 -4.16 2 2 2009-01-15 18:05:59 2008-09-05 16:08:59 3 1 1 1 0 3 0 42.00 81 62.69 CHANGED RuFRK+FN+hlKKlKHTISETAHVAKDsAVIAGSGAAVVAAs RuFRK+FN+hlKKlKHTISETAHVAKDsAVIAGSGAAVVAAs 0 0 0 0 +11418 PF11586 DUF3242 Protein of unknown function (DUF3242) Pollington J anon pdb_1vr8 Family This protein from Thermotoga maritima is a hypothetical ORFan protein, TM1622, whose structure has been determined. The protein is composed of seven beta strands and three alpha helices [1]. 25.00 25.00 134.20 134.00 22.70 20.60 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.81 0.71 -4.60 4 11 2009-01-15 18:05:59 2008-09-05 16:31:12 3 1 11 1 5 12 0 129.20 42 81.71 CHANGED PpuYSlcoAlhlLps.pYhLssVtcl.-uYGsl..ucGhsA..lF-shsGhFalFKYhsEp.AKp.WKKlsKchGhs.phsYh..shhshGhFos+h-upcIlsWWKDNWLFllpGcs.s--FspaV.clYtclK PcuYSl-oAIhlL-s.cYhLsDlcEI.DuYGDV.phKG+VA..lF-scpG.hlalatYcuE-.AKphWKKlsK+hGhsShRohL..-LsshGhFSThh-GKcIluWWKcNWLFllEG+sslE-FVcaVhcVYpclK. 0 2 3 5 +11419 PF11587 Prion_bPrPp Major prion protein bPrPp - N terminal Pollington J anon pdb_1skh Family This family represents the N-terminal domain (1-30) of the bovine prion protein (bPrPp). The proteins structure consists of mainly alpha helices. BPrPp forms a stable helix which inserts in a transmembrane location in the bilayer, with the N -terminal (1-30) functioning as a cell-penetrating peptide [1]. 20.80 20.80 20.80 21.60 19.00 20.10 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.96 0.72 -7.02 0.72 -4.28 8 461 2009-01-15 18:05:59 2008-09-05 16:47:14 3 6 190 1 19 408 0 28.10 83 11.54 CHANGED Mu+..luCWlLVLFVAsWSDVGLCKK.PKP ..MVKSHlGuWILVLFVAhWSDVGLCKKRPKP. 0 1 1 4 +11420 PF11588 DUF3243 Protein of unknown function (DUF3243) Pollington J anon pdb_3d0w Family This family of proteins with unknown function appears to be restricted to Firmicutes. 20.00 20.00 20.00 21.90 19.50 19.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.58 0.72 -3.99 17 247 2009-01-15 18:05:59 2008-09-05 16:59:24 3 1 189 4 63 156 0 80.70 49 91.39 CHANGED MslL-s.a-pWKsaLucplppupptGhsccslschAhplG-YLAppV-PpNpcE+lLp-LWcVAs--Ep+sLAshhVKlVpp ...........................MoVL-N.FDpWKsFLG-RlcpApptGLsptsluchAaclGD.YLAscVEs+NcpE+LLtELWcVADEpEQHsIAslMVKhVpp.... 1 25 49 53 +11421 PF11589 DUF3244 Domain of unknown function (DUF3244) Pollington J anon pdb_3d33 Domain This domain adopts an immunoglobulin-like beta-sandwich fold and structurally is most similar to fibronectin. 22.00 22.00 22.00 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.26 0.72 -4.33 10 185 2010-07-04 00:38:26 2008-09-05 17:09:39 3 9 75 3 38 154 25 101.30 21 65.17 CHANGED s.ohpphs+......slshcGcW....t-cttRSI.sslP.oASI..DGslLsIcFpsslsslTIslpc.ppGsVlYEsslssusuptholSlsshssGcYplclopstG.aLhGpFhlE ..................................t......................h.....t..t....RSl....hPhp.s...l...-..s..s..h..l.pl.pF.t.p.shss.l.s....lplps...psGpll.Ypsshss.s.ss.t.h.sIsL.ssh.s.sGpYpLplpssps.hh..hGpFp............... 0 18 33 37 +11422 PF11590 DNAPolymera_Pol DNA polymerase catalytic subunit Pol Pollington J anon pdb_1dml Family This family of proteins represents the catalytic subunit, Pol, of the Herpes simplex virus DNA polymerase. Pol binds UL42, making up the DNA polymerase. UL42 is a processivity subunit which binds to the C-terminal of Pol in a similar way that the cell cycle regulator p21 binds to PCNA [1]. 20.00 20.00 21.90 34.10 19.90 19.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.79 0.72 -4.28 8 108 2009-01-15 18:05:59 2008-09-08 09:10:44 3 2 21 4 0 170 0 36.70 78 3.03 CHANGED c-sAtRLsuAGFsslpuGA....spEEETRQ+L++AFcILA .DDVAARLRAAGFGslGAGA.....TAEETRRMLHRAFDTLA 0 0 0 0 +11423 PF11591 2Fe-2S_Ferredox Ferredoxin chloroplastic transit peptide Pollington J anon pdb_1fct Family The structure of chloroplast ferredoxin in water is unstructured however in a 30:70 molar-ratio mixture of 2,2,2-trifluoroethanol, residues 3 to 13 form an alpha-helix. The rest of the peptide remains unstructured [1]. This family is the N-terminal of the [2Fe-2S) ferredoxin from C.reinhardtii. This protein catalyses the final reaction in a pathway which allows the production of H(2) from water in the chloroplast [2]. 25.00 25.00 62.90 62.20 19.80 19.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.64 0.72 -4.31 2 6 2009-01-15 18:05:59 2008-09-08 09:34:00 3 1 4 1 3 4 0 33.00 80 25.98 CHANGED MuMAhRSoFAARV.GA+PAVRuARPuuR.hSs.A MAMAMRSoFAARV.GA+PAVRuARPuuR.hSs.A 0 1 3 3 +11424 PF11592 AvrPto Central core of the bacterial effector protein AvrPto Pollington J anon pdb_1r5e Family This family of proteins represents the bacterial effector protein AvrPto from Pseudomonas syringae. This is the central core region of the protein which consists of a three-helix bundle motif. AvrPto is part of a type III secretion system from P.syringae which is involved in the bacterial speck disease of tomato. In resistant plants, AvrPto interacts with the host Pto kinase, which elicits an antibacterial defense response. In plants lacking resistance, the Pto kinase is not present and AvrPto acts as a virulence factor, promoting bacterial growth [1]. 21.80 21.80 24.10 61.20 20.40 15.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.24 0.72 -4.26 2 21 2009-01-15 18:05:59 2008-09-08 09:57:52 3 1 19 2 2 19 0 105.30 53 65.34 CHANGED DNVTSsQLLSVR+QLAESAGLPRDQHEFlSSQAPpSLRspYNNLYSHTQRTLDhADMQHRaMTGASGINPGMLP+ENVDDMRSAIoDWSDMREALQHAMGIHADI .....s.sVTusQLLsVRHQLA-uAGLPR-QHcFIoNp.APpoLRsRaNsLYs+TQRTLchADhQHRYMTGsSulNPGMhPHENVssMRoslScWSDMREALQHAMslHssh... 0 0 0 1 +11425 PF11593 Med3 Mediator complex subunit 3 fungal Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [1]. Mediator subunit Hrs1/Med3 is a physical target for Cyc8-Tup1, a yeast transcriptional co-repressor [2]. 27.60 27.60 27.70 27.70 27.20 27.50 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.49 0.70 -5.22 5 30 2009-01-15 18:05:59 2008-09-08 10:35:47 3 3 26 0 19 34 0 319.90 39 85.24 CHANGED DsILsssloL--Lc-hLu.csEu.o+DsVsc+IpcA+DuILPLRLQFNEFlpllSsI..EchuspTsQEKFLhIRSKLL-Lpc+lQsLSpDFcpLQPLFsTVsEYScTh..+-KKFQlLETL.............................GoYscsussuASsShspsSs+SsAATTuSTAsTPuA..AslssApoAus.........PssTssl.............Gos.ssss.h.soTsssshstsKKPRKPRQTKK.....sAsAAKsQ..............ASAsAsAsAuA.sp............................................ssh.tss.NuuMtsslPN...sTPs.Mt.....lsusSP.........s...............NuMuSPL......NsMSPMpNhsQ....MGt.s.h...GQhs.sss.GstsspsspsshpussTPStSM...hN.NNITPANILNMS.........s.uF-p.sQsQsPQQttsQsQ.pshNMsM.sDsNN....hD.lDLNNLDLuSLNMDFL ....................................spllssslpL--LpshLA.p..........s-s.s+DpVsppIpcsccpILPLRLpFN-FlphMusI.......-t.ppssp.tKaLhIRsKlLpLss+hQsLSpchptLQPLFsTlsEY.coh..ps+pFp.LEsL.............................uohscsusAssSsu...tshsos+SsAAo.TsoTssTPtA....sshsp..upohss.........Psoossh.............uos.sTss.stsoTs.ssshsssKKPRKPRpTKK..............ApsQ......................spApApApAQs.tpp......................................ss..psshsuuMsuslPN...sTPs.M........lNusSP..........p..............NsMuo.PL......NhMSPMsNs.p....hGt.s.t...uphp..t...us.hss.s..sshpts.ss.ps....hs.NslTPANILsMs.............................shpp.pQ............p...p...th...s.Ns.......lDLNsL-LuuLNMDFL......................................................... 0 2 7 16 +11426 PF11594 Med28 Mediator complex subunit 28 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants to fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function [3]. Subunit Med28 of the Mediator may function as a scaffolding protein within Mediator by maintaining the stability of a submodule within the head module, and components of this submodule act together in a gene-regulatory programme to suppress smooth muscle cell differentiation. Thus, mammalian Mediator subunit Med28 functions as a repressor of smooth muscle-cell differentiation, which could have implications for disorders associated with abnormalities in smooth muscle cell growth and differentiation, including atherosclerosis, asthma, hypertension, and smooth muscle tumours [4]. 23.40 23.40 23.40 26.50 23.30 23.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.50 0.72 -3.80 3 97 2009-01-15 18:05:59 2008-09-08 11:35:36 3 1 79 0 66 90 0 102.40 48 51.22 CHANGED EIRstVDQsopKF..LDIARQpEsFFLQKRhpL..SVhKP-plLKEEsp-LK...pEl..........QRKDpLl.............pKHhoKI-aWcNL...LoDtpshaKshs-lPs-GRtslsE.uo...............uslPus.c ..........EI+stV-Qshp+F..LDlARQhEsFFLQKRhpL..Ss.KPEpllKEDlsEL+...sEL.........................QRK-sLl....................QKHhsKLcpWQpl...LpDlpsttc..tp..h...h..t..............................h................................... 0 17 22 45 +11427 PF11595 DUF3245 Protein of unknown function (DUF3245) Wood V, Coggill P anon Chahwan C Family This is a family of proteins conserved in fungi. The function is not known, and there is no S. cerevisiae member. 26.40 26.40 26.60 27.20 26.10 26.30 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.03 0.71 -3.93 12 68 2009-09-11 06:21:26 2008-09-08 13:01:24 3 3 59 0 51 64 0 134.70 31 67.63 CHANGED KssVuLu+oQ+LluSWLss.osstpups.ps-pELQp...c.hpsVPppLGlGAslPppuscu..o.pp.cLsS.....sDcL++QLLGKshp+hhutttt.....s.s..pppuss.ssptsspsstht--sD-DD--.EuRouhhu+pt...........+KR+ ........................tsVuLu+up+LluSWlsshossc.tsss.csEpEhp...tthh..ps.hPc+LGLGAs.lPppttcu....s..t...tS.....sDcLc+pLLGK.............................pppsst.tttppppst..sscps---DE-.EuRouhhu+pp....................t........................................................................... 0 13 25 40 +11428 PF11596 DUF3246 Protein of unknown function (DUF3246) Wood V, Coggill P anon Chahwan C Family This is a small family of fungal proteins one of whose members, Swiss:A3LUS4 from Pichia stipitis is described as being an extremely serine rich protein-mucin-like protein. 27.80 27.80 96.00 30.60 27.70 27.70 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.24 0.70 -5.08 5 14 2009-09-10 21:41:12 2008-09-08 14:01:14 3 3 10 0 9 15 0 190.90 37 28.88 CHANGED DDDDDETDC--..ET.............llPTsosslTTlsGoGuVTlTsGGSGu..sshttou..........oILPTtoup.......DDDD..sD--TDsEssT.sh.ssGoVTpsPTGoTotTllsoc...uTTssDDDDssssEooIs..DsTssTssT.hTssGsPTs.TVTTNusATTTs+Tpscss.....lTYTuTGpspTTtsTp--ED...CDEThshThThhsPoTTVh.sstlhsNsVTVIupho.h.pshEsDutpssGSGSGSsGSGGS..GSGSS ..........................................s.h.s..s....h.h.sss...hssssto...........s.lsThssp..................ssD-...D-ETDCETsh.sh.PsGss........ThhsTsosohTllsTc...sTTos----psssEssh...psTs.sp.T.hT.sts.TT..lssphssssp.....chs.....lTYTusGpshTThhoppsE....C-EThhhThThhsPpTTVh.sstl.sssVTllsp.o.h.ps.psDsh.......pGpssuuuS..uSGuS....... 0 3 5 9 +11429 PF11597 Med13_N Mediator complex subunit 13 N-terminal Coggill P anon manual Domain Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function. Med13 is part of the ancillary kinase module, together with Med12, CDK8 and CycC, which in yeast is implicated in transcriptional repression, though most of this activity is likely attributable to the CDK8 kinase. The large Med12 and Med13 proteins are required for specific developmental processes in Drosophila, zebrafish, and Caenorhabditis elegans but their biochemical functions are not understood [4]. 25.00 25.00 34.70 33.50 24.70 24.70 hmmbuild -o /dev/null HMM SEED 401 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.33 0.70 -5.79 18 237 2009-01-15 18:05:59 2008-09-09 14:16:19 3 6 144 0 154 218 0 328.70 28 18.84 CHANGED Ms..st.......................sthoNhhsl........sthssIha+ha..............s...s.hhp.hp+h.phc.LplhhRp.ss...................................tthtLWlF.hssp.p.ht...............ptts.pstlshpstoLhhpA.tsl.tpsLhcs.hsphspttsss.sssp+s......ssssp+........................................hushashFlousosslot.hl+ppssI.LsoRohlshspp..........................................................phh.sPhslssshou.sh..Los........................................................................................................................................................hpsplsusGcLsVuhpshshPulhsLss.s-sst.h..h..tsslhhuPouuhuphhusssssssssssssssssttsssps...............sshpphphtshphLpthGhs.sss.pspsWlplpshpshhsplssphhptsppsppss....shchlhWPtphCF .......................................................................................................................................splsuIpW+ha....t...................s..s..tsshh.u.h.cshps.phltshhRp.t........................................st+pLWlF....ah..s.p.-..ssh.t.ht.................pp..s.hp.st..lsh...ps.p.oL.lhcA.hts.lhp...+.s.Lhpc.shs+hGcahl..pP.hp..psc+.......hspSp+..................................................LSsuFsFFlHG-SsVCoSl-lppHpslhLlscpHlph..Apps............................................................tshpVhluPaGls.GsLTGpuh.chos....phhtc....................................................................................................................................................p....hc..ss..ssVcVlVu....u.s+hhaPuthlLl....stsD.s....................ssssuth.......u....t...h...s..spps..sus.psss.ps....s...hssss.s.t......................................p..ptpth.ph.s........t....tt...p...t.ts.....thh.hsht.C........................................................................................................ 0 30 56 106 +11430 PF11598 COMP Cartilage oligomeric matrix protein Pollington J anon pdb_1fbm Family This family of proteins represents the five-stranded coiled-coil domain of cartilage oligomeric matrix protein (COMP). This region has a binding site between two internal rings formed by Leu37 and Thr40 [1] 20.80 20.80 20.90 21.10 20.60 20.40 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -8.27 0.72 -3.89 12 203 2009-01-15 18:05:59 2008-09-09 14:26:53 3 22 60 15 94 168 0 44.50 45 5.26 CHANGED DhscpLl...spLpphsplltEL+-.l+p.......................Ql+EhshLRsTItcCpuCG ...........spthl...pplpphNplLsEl+-hlRp..................................................QVKEhoaLRNTIhECpuCG... 0 10 19 44 +11431 PF11599 AviRa RRNA methyltransferase AviRa Pollington J anon pdb_1o9g Family This family of proteins represents the methyltransferase AviRa from Streptomyces viridochromogenes. This protein mediates the resistance to the antibiotic avilamycin. AviRa methylates a specific guanine base within the peptidyl-transferase loop of the 23S ribosomal RNA [1]. 26.10 26.10 26.60 132.50 24.60 26.00 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.73 0.70 -5.35 2 11 2012-10-10 17:06:42 2008-09-09 14:39:40 3 1 11 2 6 13 1 242.50 39 98.23 CHANGED Mp.Y+a.scp.s.pDhAsGhVLapssG.PsFPVRLAhEIapRsLth..tcu.loLaDsCCGuuYhLolLGLL+ppoltplhuSDlsstslpLAscNLuLLo.uGlstRc.Eh+t..ppatKsSal-AhpuhcRlcEhLTtE.......I+Th.lFcspsLpss.hs.sPDlllTDlPYGphscWEt..sutslsthhpuLuushsucslIsV.hD+ppKI.ss.hptLER.hlGpRph.lh..+Au-lh ....p.Y+atosptsYpDhASG+VLasAsGhPuFPVRLASElhQRshphL.p...p...uPhTLaDPCCGuGYLLTVlGLLHscplpplluSDlsscsLplAt+NLuLLotpGLppRpcpL+chhcpasKPSah-AlcSAcRLcchlsts.ss.s.tshtpADlhcsps.tsh.stshss.cIVlTDLPYG-hosWpG...suss...PlsphLsultslL.P.spuVVsV.os+ucKlp.p.sch+tlc+lKlG+Rpssla..h......... 0 2 6 6 +11432 PF11600 CAF-1_p150 Chromatin assembly factor 1 complex p150 subunit, N-terminal Pollington J anon pdb_1s4z Family CAF-1_p150 is a polypeptide subunit of CAF-1, which functions in depositing newly synthesised and acetylated histones H3/H4 into chromatin during DNA replication and repair [1]. CAF-1_p150 includes the HP1 interaction site, the PEST, KER and ED interacting sites. CAF-1_p150 interacts directly with newly synthesised and acetylated histones through the acidic KER and ED domains. The PEST domain is associated with proteins that undergo rapid proteolysis [2]. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.26 0.70 -4.96 13 305 2012-10-03 14:45:15 2008-09-09 14:49:06 3 48 200 0 206 304 4 187.60 19 26.44 CHANGED ssEucshEsss-s-.SVLSpSSh.SShSso.SSPEussss.chpss..........sSPhsuSTslc+hopKpl+pptEKpKh+hp+....ttth..............hctE+pE+ccl+...pEtKtttc+tK......EEt++cKcEEK-l+E+ER..+E..KKE+-EKEKAEK.RlKEE+RKE+p..Es.t.....AKhEEKRK....KEEEKRL+EEc.......cRIKAE..KAEITRFFQKPK.TspAPK.........TLAuuCGKFAPFEIKEsMlL ..........................................................................................................ts........................................................t...............................ps.....ps.....hp.p.h..p..pp.hp...t.t..t...cp.p..p..p...pc............t.tt.........................pp.pc+p.c.c.c.ct+..ccp..+p.p.c....c....p..pK.......cpcccc..+.c.c....c+.c...tcc....cE.+......cc.....c+p..ccE.c-+..tc.....+pc.....+cccc.cc.cp...............t+..cp.cpt.......+ccp....cphtpct.............................ptp.....t.....h.t.h.................................................................................................................................................... 0 66 109 166 +11433 PF11601 Shal-type Shal-type voltage-gated potassium channels Pollington J anon pdb_1s6c Family This family of proteins represents Shal-type voltage-gated potassium channels which interact with Kv channel-interacting proteins to modulate cell surface expression and function of Kv4 channels. The interaction of the Shal-type protein Kv4.2 and the Kv interacting protein KChiP1 forms a structure which is like the structure between calmodulin and its target peptides when they interact. Interactions of an N terminal alpha helix in Kv4.2 and a C terminal alpha helix in KChIP1 are essential for the modulation of Kv4.2 by KChIPs [1]. 20.50 20.50 22.60 22.10 20.10 19.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.43 0.72 -4.43 4 194 2009-01-15 18:05:59 2008-09-09 15:44:02 3 6 79 11 112 179 0 29.00 79 5.01 CHANGED uuVAAWLPFARAAAIGWhPlAspPLP..P ...AGVAAW.LPFARAAAIGWhPlAssPMP.AP... 0 19 28 59 +11434 PF11602 NTPase_P4 ATPase P4 of dsRNA bacteriophage phi-12 Pollington J anon pdb_1w44 Family P4 is a packaging motor which is involved in the packaging of phi-12 genome into preformed capsids using ATP. P4 is located at the vertices of the icosahedral capsid. ATP drives RNA translocation through cooperative conformational changes [1]. 20.40 20.40 20.90 28.80 20.20 20.30 hmmbuild -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.02 0.70 -5.44 3 7 2009-01-15 18:05:59 2008-09-09 16:09:10 3 1 5 60 0 10 1 299.70 30 90.31 CHANGED s+LhDAKPsAl.+lApPAAlcTsussoah...AAVhE.Sl+DGGs.......LhAVGlc...PlVlDKDAtphlAsKtKSuDsESs..........uVshVsVpNssl+..........osPLLus+PVsu..usHRWsscshhSGlslVsGuTGSGKS.......csLNuK.cPDVTIRWGEPuEuYDp.DouVaVuDLsEMLhVslhLAsLsaRV.VIDSlRNLlFulsGAAouGGISsuhYuLLTDIuNlsAphGClVVAulNPMSsDDK.VELVassltASosGhllssDussVSQTlhRTGcGRIFsG.tPLsRsTHslphccPcHopLs-HTShuSsu+LESG....oVDsDDEN-SsPRRGA ..........................................................................................c.hDApshuh.hhut.sshpsss.ts.....uuVhc.u.cpGs........lhuls.......Vh.Kss..hht.t.huscs.ss..............V...sshhh..........ss.hluppPVs.....HRas.....SGlhslhGtssSGKo........tLstc.cs.shIRaGEshpuYsp.Dhsltlsslsc...sslhLu.shsapV.slDSlRsllhpltGsAouGGIstshauhLTDluslhsphsC.VVhslNPhsss-K....lE.hVatpshASssuhhlssDss...pshhRTscGhIhsu.tsLstssc..phphspphphh.+oo.susu.ltps....oV.ssDc..shsRRhs.................. 0 0 0 0 +11435 PF11603 Sir1 Regulatory protein Sir1 Pollington J anon pdb_1z1a Family Sir1p interacts with the BAH domain of the Orc1p subunit of the origin recognition complex (ORC) resulting in the establishment of silent chromatin at HMR and HML in S.cerevisiae [1]. The amino acids from the ORC interaction region of Sir1p are presented on a conserved, convex surface that forms a complementary interface with the Orc1 BAH domain, critical for transcriptional silencing [1]. 25.00 25.00 25.70 25.10 24.80 24.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.64 0.71 -3.99 2 55 2009-01-15 18:05:59 2008-09-09 16:23:57 3 3 21 4 21 61 0 126.40 31 27.48 CHANGED h.p....lssRhhVhDGaLlDhsccKPIN.+sPclhhLL.pDcphthlsQ.NLVcWschKK.pss..lslKuhpLFKahK.lh+cFhhhtDh+lhhh.hcupL+Yc.sc+hT..CpCs.l.Lsc.ss.Ylas. ......hh........lSsRasVl.DGaLlDhsc......+Ks.IN.c...sPclhp....hL.sDc-tcplsphsLlDWschK...c.s...pss..lslcuppLFchlcthh+cFhpstD.G+l..hlhsscsph+hc..scchT..p+Ch.lhLschsshYla................................ 0 2 12 19 +11436 PF11604 CusF_Ec Copper binding periplasmic protein CusF Pollington J anon pdb_1zeq Family CusF is a periplasmic protein involved in copper and silver resistance in Escherichia coil. CusF forms a five-stranded beta-barrel OB fold. Cu(I) binds to H36, M47 and M49 which are conserved residues in the protein [1]. 20.50 20.50 20.50 20.50 20.40 20.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.30 0.72 -4.12 186 1350 2009-01-15 18:05:59 2008-09-09 16:47:19 3 19 804 6 364 993 141 68.20 33 32.48 CHANGED Gplppl-.tssplTlpHuslss..lsh.P.uMTMsFs.ls...sss..lsslpsGspVcFphpp.ssss.hs..lsplp..p .......GhVcslDhpspplTlpHsPIss.........ls...W..P.uMTM.sFs.ls...sss..thsplKsG-cVpFshhp.psst.hh..lpslp...s............................. 0 72 191 266 +11437 PF11605 Vps36_ESCRT-II Vacuolar protein sorting protein 36 Vps36 Pollington J anon pdb_2cay Family Vps36 is a subunit of ESCRT-II, a protein involved in driving protein sorting from endosomes to lysosomes. The GLUE domain of Vps36 allows for a tight interaction to occur between the protein and Vps28, a subunit of ESCRT-I. This interaction is critical for ubiquitinated cargo progression from early to late endosomes [1]. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.79 0.72 -4.17 22 246 2012-10-04 00:02:25 2008-09-09 17:02:47 3 7 224 4 174 261 0 86.50 33 17.27 CHANGED phssSspPlLppsEp.lhlpcsVGLYpGcp..KhhsppsG+laLTo+RlIYsDstcspp.ulsltLphlpps....chsuuFh.ppSsKIhlhLcp ..................t.hpshL....sEshlhhp..csVsL.Y...-Gct..........Kh.............p.........apsGplhLToHRllah.Dpp.p...s....p..p.........s..lulsLppltph.........Eht.uuhl..tpSsKIhlalc............................... 0 52 88 142 +11438 PF11606 AlcCBM31 Family 31 carbohydrate binding protein Pollington J anon pdb_2cov Family This family of proteins represents the family 31 carbohydrate-binding module of beta-1,2-xylanase. This protein is from Alcaligenes sp. strain XY-234. The AlcCBM31 module makes a beta-sandwich structure with an immunoglobulin fold and contains two intra-molecular disulfide bonds. AlcCBM31 shows affinity with only beta-1,3-xylan [1]. 25.00 25.00 35.20 108.70 19.20 16.30 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.18 0.72 -4.02 3 5 2009-01-15 18:05:59 2008-09-10 10:58:01 3 2 3 6 0 6 0 92.00 48 19.81 CHANGED EPPENCoDDFNFNYVSDsEIEVYHVDK.GWSAGWNYLCLDDYCLSGTKSNGAFoRoFSAsLGQTYKlTFKVEDhsGQGQQIlD+slTFTsQVCN ...psPEsCo-DFGaNYVSDoEIEVFHpDp.GWSAGWNYlCLDDYCLsGsKSNGuFsRoFsAsLGQTYKITFKVED..GpGQaIlDKslTFTNpsCs. 0 0 0 0 +11439 PF11607 DUF3247 Protein of unknown function (DUF3247) Pollington JE anon pdb_2e12 Family This family of proteins is the protein product of the gene XC5848 from Xanthomonas campestris. The protein has no known function however its structure has been determined. The protein adopts a Lsm fold however differences with the fold were observed at the N-terminal and internal regions [1]. 25.00 25.00 66.70 66.40 21.00 20.00 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.17 0.72 -3.77 7 26 2009-09-10 23:21:42 2008-09-10 13:05:27 3 1 26 2 6 28 0 98.30 64 94.18 CHANGED MPKYAPHVYoEQsQIATLEtWVsLLDGQERVRIELDDGSMIuGTVAVRPolQTYhDEpccEGsNGQLRLDpLDASQEPQWIWMDRIVAVHPhPLGA.PpVMP ...MsKaAP+VYoEQsQIATLEpWVsLLDGQERVRIELDDGSMIuGTVAVRPolQTYhD-pcsEGlNGQLRLDpLDASQEPQWIWMDRIVAVHPL.LGAsPpVMP...... 0 1 3 4 +11440 PF11608 Limkain-b1 Limkain b1 Pollington J anon pdb_2diu Family This family of proteins represents Limkain b1, which is a novel human autoantigen, localised to a subset of ABCD3 and PXF marked peroxisomes. Limkain b1 may be a relatively common target of human autoantibodies reactive to cytoplasmic vesicle-like structures [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.92 0.72 -4.23 5 78 2012-10-02 20:46:34 2008-09-10 13:06:52 3 15 49 1 42 105 2 86.10 62 6.32 CHANGED CpSLLaVaNLPsN+D...uKAIuNRLRRLSDNCGGKVLuISGsSAlLRFuNQEuAERApKRMENEDVFGNRIoVSaoPt.R-ss-scschp.ss .........................CHTLLYVYNLPsN+D...uKSlSNRLRRL....SDN.C..GG.KV.L.....S....I......o...G..s.S...AILRFlNQ-SAERAQKRMENEDVFGNRIhVSF.o.P+s+Ehs-scsp....shs............ 1 12 14 25 +11441 PF11609 DUF3248 Protein of unknown function (DUF3248) Pollington JE anon pdb_2e6x Family This family of proteins is thought to be the product of the gene TT1592 from Thermus thermophilus however this cannot be confirmed. Currently there is no known function. 20.20 20.20 20.20 73.70 18.20 17.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.83 0.72 -4.27 2 19 2009-01-15 18:05:59 2008-09-10 13:13:55 3 1 19 4 13 17 0 63.20 63 84.51 CHANGED L-tLGtpLVWRhG+sE.pDslVVRlGhASATPRFtcLPRLhshs-AEhpchsQpGRlhlEWV- L-tLGpHLVWRIG+uEsE-l...LVVRVGLASATPRFtcLPRLhNlsDAElccLlpcGRVRVEWVE. 0 3 8 13 +11442 PF11610 Ste5 Scaffold protein Ste5-Fus5 binding region Pollington J anon pdb_2f49 Family This family of proteins represents the Fus5 binding region of Ste5. Ste5 functions in the yeast mating pathway and is required for signalling through the mating response MAPK pathway. Ste5 has separate binding sites for each member of the MAPK cascade. This region of Ste5 allosterically activates autophosphroylation of Fus3, a mitogen-activated protein kinase. Auto-activated Fus3 has a negative regulatory role, and promotes Ste5 phosphorylation which leads to a decrease in pathway transcriptional output [1]. 20.60 20.60 21.60 68.50 19.20 18.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.06 0.72 -7.31 0.72 -4.42 2 10 2009-01-15 18:05:59 2008-09-10 13:47:16 3 1 9 0 1 10 0 30.00 98 3.34 CHANGED TPVERQTIYSQAPSLNPNLILAAPPKERNQ TPVERQTIYSQAPSLNPNLILAAPPKERNQ 0 1 1 1 +11443 PF11611 DUF4352 TRF2; Domain of unknown function (DUF4352) Pollington J anon pdb_3cfu Family Members of these family are poutative lipoproteins that fall into the Antigen MPT63/MPB63 (immunoprotective extracellular protein) superfamily. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.52 0.71 -3.98 49 1093 2012-10-03 03:07:29 2008-09-10 15:00:56 3 22 747 2 203 850 26 123.50 19 50.75 CHANGED hplu-ssp.h.sshplsVsss.....ths.s.sphhp............pssspalllslolcN.puccshshss..pFpLh.cspupphc.hshsspptt......s................h.hsslsPGppssGplsFclspssp....hpLpa.....ssh.asst......tl ...............................................................spssp.......s.s.h.cl.....olsss..........hs.s.sp..h.t..ps.....................pstspa...lhls..lsl.....c.N...puccsl..s...h....s...s...t...pa.pLh....s...sc.sp...p...a..c...hs.ssst.t......t................hhhpplsPspshpG.p.......l.sF.-l..sp...ssp.....hplph....ssh.......t.................................................................. 0 69 147 184 +11444 PF11612 T2SJ GspJ; Type II secretion system (T2SS), protein J Pollington J, Desvaux M anon pdb_3ci0 Family The T2SJ proteins are pseudopilins, which are targeted to the membrane in E. Coli. T2SJ forms a complex with T2SI (Pfam:PF02501) and T2SK (Pfam:PF03934) which is part of the Type II secretion apparatus involved in the translocation of proteins across the outer membrane in E.coli. The T2SK-I-J complex has quasihelical characteristics [1]. 20.40 20.40 20.40 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -11.27 0.71 -4.52 87 934 2009-09-11 07:38:19 2008-09-10 16:15:32 3 4 804 11 192 656 242 157.00 26 74.61 CHANGED c-tspppspclppLQRuhshl-pDlpQh..................................ss....RssRst...Gt..sshhhtssp.................ltFsRsGWtNP.th............PRusLp+VsY..pl............p........ssp..LpRhhashhDs.stst.tPthp...slLss.........Vpshplca..h..........sss..............pWpcp...Wss.............sptLPpulclplphpsh....G.plpRhahlssts ..............................................................................................p..hsttcspclstlQpshshlppDlpQh..................................hs............R.sRss....ut..sthh.htss...h...t...............hltFsRtuhhNP..h..............sRusL.p.VsYcl.................c......................ssp.......Lp.R.hsa.s..hsDs....sss....pPthp............sLlst..................Vcuhplpa.....a.................................sss.........................pWppp.....Wss...........................sptlPh.AVclsLphpph....G.plpRhahl.t.s................................................ 1 54 108 151 +11445 PF11613 UCN2 Agonist of corticotropin releasing factor R2, Urocortin-2 Pollington J anon pdb_2rmg Family This family of proteins represents urocortin 2, a member of the corticoliberin family which is a selective agnonist of corticotropin releasing factor 2. The backbone of the protein is mainly alpha-helical but it contains a helix-loop-helix motif [1]. 22.00 22.00 40.90 40.50 20.70 20.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.74 0.72 -4.00 4 62 2009-01-15 18:05:59 2008-09-10 16:49:57 3 1 35 5 36 68 0 37.80 66 26.12 CHANGED lsLSLDVPhslhplLh-.A+AKshRsQAAsNA+lLA+l hTLSLDVPTNIMNILFslAKAKNlRApAAANA+LhAQI. 0 2 3 10 +11446 PF11614 FixG_C Bre5; IG-like fold at C-terminal of FixG, putative oxidoreductase Pollington J, Coggill P anon pdb_2r39 Domain This domain is part of a transmembrane protein, FixG, itself part of the FixGHIS operon closely associated with the FixNOPQ operon that is the symbiotically essential cbb3-type haem-copper oxidase complex. FixG expression is induced by oxygen-deprivation. This C-terminal domain adopts an E-set Ig-like fold. 25.60 25.60 25.60 25.90 25.50 25.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.69 0.71 -3.99 345 1156 2012-10-03 16:25:20 2008-09-11 09:45:12 3 18 966 1 346 998 117 120.90 25 25.60 CHANGED hlhslspRsslclsVl+DRssLah.phscGp....IcNsYsl+lhNpsppspp.aplslp...G.............shpl...............s...........pplplsssps......tphslhlps..sspt...........ht...pssp.slphpl....p........t........s.stthp.sp.....pcopFhsP .............................hhhslspRpshtlsVl+DRs.hhh..pp..ssGp....IcNsYsl+lhNpsppspp.aplslp..G..hs............slpl.............................t.....sppslpl.tsuchhplsVhlps..s.pt............hp...pssp...slphplp.......p.....s.ssphs.hp......pcopFlsP................................... 0 95 216 281 +11447 PF11615 DUF3249 Protein of unknown function (DUF3249) Pollington J anon pdb_2pqr Family This family of proteins represents the gene product of the protein CAF4, the yeast protein YKR036c. This protein contains seven WD40 repeats in its C terminus. The function however is unknown [1]. 20.70 20.70 20.70 151.10 19.50 17.00 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -8.99 0.72 -4.23 2 13 2009-09-13 21:11:59 2008-09-11 10:25:08 3 1 13 2 4 10 0 60.00 100 9.25 CHANGED QKGQVGIFSFQNNYADSATTFRILAHLDEQRYPLPNGAAEKNLPSLFEGFKATVSIIQQR QKGQVGIFSFQNNYADSATTFRILAHLDEQRYPLPNGAAEKNLPSLFEGFKATVSIIQQR 1 1 1 1 +11448 PF11616 EZH2_WD-Binding WD repeat binding protein EZH2 Pollington J anon pdb_2qxv Family This family of proteins represents Enhancer of zest homolog 2, (EZH2) a 30 residue peptide which binds to a WD-repeat domain of EED by residues 39-68. EED is a component of PRC2 complex which is involved in gene expression [1]. This interaction is required for the HMTase activity of PCR2 [1]. 25.00 25.00 32.00 30.80 18.50 17.20 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.14 0.72 -7.19 0.72 -4.08 6 144 2009-01-15 18:05:59 2008-09-11 10:59:13 3 3 38 1 55 146 0 29.80 68 4.73 CHANGED KuhFsSNRQKI.E+T-ILNpEWKphRIQPl ...KoMFsoNRQKIhERT-ILNQEWKpRRIQPV. 0 2 6 21 +11449 PF11617 Cu-binding_MopE Protein metal binding site Pollington J anon pdb_2vov Family This family of proteins represents a unique protein copper binding site that involves a tryptophan metabolite, kynurenine in the protein MopE. The production of kyneurenin by modification of tryptophan and its involvement in copper binding is an innate property of MopE [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.57 0.72 -4.17 36 222 2009-01-15 18:05:59 2008-09-11 11:40:36 3 34 44 0 161 228 684 29.10 42 8.14 CHANGED DCsDsssslpPGA..sElC.DulDNsCDGtsD-s ...........DCsD...tsstlhPuA....sE....lC.s..G.l.DssCsGthDp....... 0 78 87 128 +11450 PF11618 DUF3250 Protein of unknown function (DUF3250) Pollinton J anon pdb_2yrb Family This family of proteins represents a protein with unknown function. It may be the C2 domain from KIAA1005 however this cannot be confirmed. 27.70 27.70 28.70 36.30 27.60 27.20 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.27 0.72 -3.85 11 173 2009-01-15 18:05:59 2008-09-11 13:07:17 3 8 74 1 82 129 0 107.00 44 9.07 CHANGED TFCThsFaDFETpoTPlspGhpshYsFTopYsVphDsLhlpYLposulhl-LHpsh.upE..acTlAuutlshctll.....-psu.+ltuosshsususp..shGsl-Yhh+lphP .............TFCTYuFYDFEhpsTPllp.G.pPtY.sFTSQYlVcscsLFLpYlQ...............psol.pLElHQAh..us-...acTlAAsplphcclL...................Epss........+.lauos...sL..s.Gsp......G-.....saGslEYWhRL+hP.................................... 0 23 31 51 +11451 PF11619 P53_C Transcription factor P53 - C terminal domain Pollington J anon pdb_2rp4 Family This family of proteins is the C terminal domain of the transcription factor P53. While the rest of the protein is quite conserved between the different transcription factors such as p53 and p73, the C terminal domain is highly divergent. The DM-p53 structure is characterized by an additional N-terminal beta-strand and a C-terminal helix [1]. 25.00 25.00 56.50 55.20 21.80 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.46 0.72 -4.23 2 16 2009-09-11 05:01:43 2008-09-11 13:16:24 3 1 12 4 7 17 0 70.20 66 16.58 CHANGED DDSAAEWNVSRTPDGDYRLAITCPNKEWLLQSIEGMIKEAAAEVLRNPNQENLRRHANKLLSLKKRAYELP ........A-WsVSRT.DGDYRLuITCPpKEWLLQSIEGMIKEAAAEVLRNPNp.-NLR+HAN+LLSLKKpAaELP...... 0 1 1 4 +11452 PF11620 GABP-alpha GA-binding protein alpha chain Pollington J anon pdb_2juo Family This family of proteins represents the transcription factor GABP alpha. This alpha domain is a five-stranded beta-sheet crossed by a distorted helix termed an OST domain. The surface of the GABP alpha OST domain contains two clusters of negatively-charged residues suggesting there are positively-charged partner proteins. The OST domain binds to the CH1 and CH3 domains of the co-activator histone acetyltransferase CBP/p300, a direct link between GABP and transcriptional machinery has been made [1]. 21.90 21.90 21.90 34.20 21.80 21.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.92 0.72 -4.00 5 88 2009-01-15 18:05:59 2008-09-11 13:47:53 3 5 69 1 55 81 0 86.80 60 19.93 CHANGED s--lIVQcIDI+EPIusLKKLLEsRLplS.LctY-IaLQDhQ.L-PD+SLFDQGVKs-GoVQLSVQVQTt.s.ccKLNIlEIVKPs--Vcs .......s.s-hlsQsIDINEPlGNLKKLLEPRLQsS.LDua-ICLQDIQ.L-P-+SLFDQGVKTDGoVQLSVQlh.oht.................G..-.KLNILEIVKPsEsVc....... 0 14 18 35 +11453 PF11621 Sbi-IV C3 binding domain 4 of IgG-bind protein SBI Pollington J anon pdb_2jvg Family This family of proteins represents Sbi domain IV which binds the central complement protein C3. Sbi-IV interacts with Sbi-III to induce a consumption of complement via alternative pathway activation [1]. When not interacting with Sbi-III, Sbi-IV inhibits the alternative pathway without complement consumption. The structure of Sbi-IV consists of a three-helix bundle fold [1]. 25.00 25.00 34.20 34.10 19.60 19.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.36 0.72 -3.82 2 163 2009-01-15 18:05:59 2008-09-11 14:00:45 3 2 160 4 2 45 0 68.00 96 16.01 CHANGED VSIEKAIVRHDERVKSANDAISKLNEKDSIENRRLAQREVNKAPMDVpcHLQKQLDALVAQKDAEKKVA ...........VSIEKAIVRHDERVKSANDAISKLNEKDSIENRRLAQREVNKAPMDVKEHLQKQLDALVAQKDAEKKVA.. 0 1 1 2 +11454 PF11622 DUF3251 Protein of unknown function (DUF3251) Pollington JE anon pdb_2jwy Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. Some members if this family are annotated as putative lipoprotein YajI however this cannot be confirmed. 19.90 19.90 20.00 22.50 19.50 19.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.77 0.71 -4.84 7 846 2009-01-15 18:05:59 2008-09-11 14:18:11 3 1 485 1 36 261 2 165.70 41 85.84 CHANGED CA.QpEl.ph+pslSpLNQchTpLsppssKlpQQNtLNAcSspGVYLLPuApTsuRL-SplGpLphSLtsIpstAsGopssL+Ips.SscsLPAFsApVEaGQlpuTotshtpsssQsQ.hsAPsSlLssS-ssIsLpLsGloP-QLGFlRlHslQP......lsp .............................................................................................Qh+ppluTLppphTp.......lpppTspI...Tpps+.L......s......tcppps.la....L.P...utpp.hhLp.upIGsLthpllsIsPsssGoslpLcl.s..ssPls.shuhsspaGphpsTpsph.th.hQ...spL...h....N....ushplLsso.sDIsLpLKGlSPspLGal+Ip................shp.................................... 1 1 6 20 +11455 PF11623 DUF3252 Protein of unknown function (DUF3252) Pollington J anon pdb_2jz2 Family This family of proteins has no known function. Some members are annotated as Ssl0352 however this cannot be confirmed. Currently there is no known function. 20.80 20.80 21.10 69.80 19.80 20.40 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.56 0.72 -4.56 16 94 2009-01-15 18:05:59 2008-09-11 14:25:39 3 1 88 3 43 88 112 52.90 62 50.51 CHANGED hILPGssV+VpNssshYatapGhVQRloDGKAAVLFEGGNWDKLlTFpLsELE ...ILPGssVpVpNssssYatYpGhVQRloDGKAAVLFEG.GNWDKLlTFcLsELE. 0 7 28 39 +11456 PF11624 M157 MHC class I-like protein M157 Pollington J anon pdb_2nyk Family This family of proteins represents M157,a divergent form of MHC class I-like proteins which is the protein product of the mouse cytomegalovirus. This protein is unique in its ability to engage both activating (Ly49H) and inhibitory (Ly49I) natural killer cell receptors. M157 is involved in intra- and intermolecular interacts within and between its domains to form a compact MHC-like molecule [1]. 20.60 20.60 21.10 21.20 20.20 20.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.79 0.70 -5.27 5 132 2009-01-15 18:05:59 2008-09-11 14:50:58 3 1 6 1 0 116 0 237.50 23 63.71 CHANGED TVTRoGaHKRTISVsNGRPVVVWDsGDKNPKICKICPAVoSINsEYlFLDIQKMRLsNLLuQuLh-sQRICVRYsCLFL..+FDV.......lCDVYHTTD+VRlTYp+QTsKINIQGSGTFshSDAKshGlaMLpsNVcEIKp+WRsTVQpLKQLGYMNcTEVEFWYNT...TGLTTCVVTSRSNsPFTVELSLNTNSSAIVT.....EESTVDsQsVTVKAPGSasQRCYVTSSLGWKGVVT..PPSQYRTK......RsPVN.ISSSKhTGI .........................................................................................h.......................................s.so-h...ssEhhFhtpQppaLpp.hhphl.tt.stt.........l.slcYpCpah....hhsh........tCpV..hhsss....psp..l.....hhh...........hs....sp.t..s.......ss..ps..........s.s........s...............h........s....s......scuhulphLpspsppIppRWpssspclhphup.scs-hpFhhsh......pGhspCslpSpuslhaplpl...s..s...s..s..s....s..tshss........shssh..hps.ss....sps.s.s..s..t....s....tChlpSohG.Wpsslp.........................................t............................. 1 0 0 0 +11457 PF11625 DUF3253 Protein of unknown function (DUF3253) Pollington J anon pdb_2ns0 Family This bacterial family of proteins has no known function. 20.60 20.60 21.10 22.00 20.00 20.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.83 0.72 -4.10 28 140 2009-01-15 18:05:59 2008-09-11 14:57:13 3 4 137 1 72 146 38 83.60 37 82.99 CHANGED hsssspplctsILcLlupRusspolCPSElARAl.......us-s..WRthMssVRpsAtcLsptGclplppKGc....sV.DPssh.+GPlRlth..ts ...............................................h....ttplctsIlpLlspRus....spolCPSElARAl......................................us-s....WRsLMtslRcsAhcLspsGclpIhQ+Gc......sV...DPssh....RGslRlph...tt......... 0 26 49 61 +11458 PF11626 Rap1_C Rap1_C; TRF2IP; Yippee-Rap1; TRF2-interacting telomeric protein/Rap1 - C terminal domain Pollington J anon pdb_3cz6 Family This family of proteins represents the C-terminal domain of the protein Rap-1, which plays a distinct role in silencing at the silent mating-type loci and telomeres [1]. The Rap-1 C terminus adopts an all-helical fold. Rap1 carries out its function by recruiting the Sir3 and Sir4 proteins to chromatin via its C terminal domain [1]. Rap1 is otherwise known as TRF2-interacting protein, as it is one of the six subunit components of the Shelterin complex. Shelterin protects telomere ends from attack by DNA-repair mechanisms [2,3,4,5]. 20.10 20.10 20.10 20.20 19.90 19.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.93 0.72 -4.10 81 175 2012-10-03 10:46:08 2008-09-11 15:24:49 3 12 150 7 118 164 1 88.60 26 15.59 CHANGED lpp.hpphGh...spphls.pALtsoot-hthstp.llph.........hpt......sps.....................P.....sh.GlWTpcDDphLpss...................................sspshcp.............LhpKH...G.ppplptRhcFLpt ..............................................h....hpphsh...ststls.pALhpsSu-hthssphlhsh.................hpt..........sps.................................h..........P..sh.GlWTccDDpsLpts...................................-pcshcp.............LlcK+...G.spplptRhcFlp.t................................ 0 25 52 88 +11459 PF11627 HnRNPA1 Nuclear factor hnRNPA1 Pollington J anon pdb_2h4m Family This family of proteins represents hnRNPA1, a nuclear factor that binds to Pol II transcripts. The family of hnRNP proteins are involved in numerous RNA-related activities [1]. 25.00 25.00 25.00 25.00 24.40 24.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.23 0.72 -3.79 8 294 2009-01-15 18:05:59 2008-09-11 16:01:28 3 6 38 0 124 141 0 36.70 76 13.49 CHANGED GssYNDFGNYsuQ.pSNYGPMKuGsaGGh.R..sSG.PYGGG ............GGuY.N.....DFG.N.YNN....Q.SSNFGPMKG.GN.FGG..R......SSG.PYGGG............. 0 1 3 7 +11460 PF11628 TCR_zetazeta T-cell surface glycoprotein CD3 zeta chain Pollington J anon pdb_2hac Family The incorporation of the zetazeta signalling module requires one basic TCR alpha and two zetazeta aspartic acid TM residues [1]. The structure of the zetazeta(TM) dimer consists of a left-handed coiled coil with polar contacts. Two aspartic acids are critical for zetazeta dimerisation and assembly with TCR [1]. 25.00 25.00 30.20 42.30 19.50 18.80 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.46 0.72 -4.49 43 108 2009-01-15 18:05:59 2008-09-11 16:17:06 3 4 42 2 36 124 0 32.80 62 22.10 CHANGED DPKLCYlLDGILFlYGlIlTuLaLRhKFp+uts .DP+LCYlLDGILFlYGlIlTALalRhKhpcut.... 0 2 4 7 +11461 PF11629 Mst1_SARAH C terminal SARAH domain of Mst1 Pollington J anon pdb_2jo8 Family This family of proteins represents the C terminal SARAH domain of Mst1. SARAH controls apoptosis and cell cycle arrest via the Ras, RASSF, MST pathway. The Mst1 SARAH domain interacts with Rassf1 and Rassf5 by forming a heterodimer which mediates the apoptosis process [1]. 22.70 22.70 22.70 35.10 22.50 22.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.58 0.72 -4.14 11 152 2009-01-15 18:05:59 2008-09-11 16:32:35 3 4 93 2 86 143 0 48.80 64 9.94 CHANGED -a-FLKshoh-ELppRLtSLDspMEpEIEEl+pRYpuKRQPILDAI-AK ....Da-FLKsholE-LQpRLtuLDPMMEpEIEElRQ+YpuKRQPILDAh-AK.. 0 25 30 54 +11462 PF11630 DUF3254 Protein of unknown function (DUF3254) Pollington J anon pdb_2job Family This family of proteins is most likely a family of anti-lipopolysaccharide factor proteins however this cannot be confirmed. 25.00 25.00 26.50 45.80 21.50 17.00 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.16 0.72 -4.04 10 69 2009-01-15 18:05:59 2008-09-11 16:40:43 3 1 21 1 0 84 0 94.50 46 83.19 CHANGED CpAQuhpsL.lsullsKlssL..Wcsucl-hLGHpCpapVcPsl++acLYa+G+MWCPGWT...sIpGEu+TRSRSGslp+ul+DFVpKAhppGLITEE-ApsWLs ..............CpAQ.apsl.suulssK.ls.sL..W+ssps-hLGHpCpashpPplKRapLYa+G+hWCPGWT...sIcGc.upT.RS+S.Gsstcss+DFVpKAhppGLITpp-Ap.WLp. 0 0 0 0 +11463 PF11631 DUF3255 Protein of unknown function (DUF3255) Pollington J anon pdb_2joz Family Members in this family of proteins are annotated as YxeF however no function is currently known. The family appears to be restricted to Bacillus. 25.00 25.00 32.10 223.80 18.90 18.50 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.64 0.71 -4.50 2 18 2009-01-15 18:05:59 2008-09-11 16:44:32 3 1 18 1 3 17 0 121.40 77 86.84 CHANGED lMloGCpQpK.-EsPFYYGTWDtGhtPGPhDGV+StTVTFTcDpVlpppVhpGRGEVthP..sYKVISQsTDGoIEIpYLG..aPlKSTLKRGcNsTLIWc..GppKTMTRI..KTGtE-tcEK lMloGCQQpK.-ETPFYYGTWDtGLpPGPMDGV+StTVTFTKDcVlpppVIcGRGEVphP..AYKVISQsTDGoIEIpYLG..aPlKSTLKRGcNsTLIWc..GpTKTMTRI..KTGtE-tcEK.. 0 1 1 1 +11464 PF11632 LcnG-beta Lactococcin G-beta Pollington J anon pdb_2jpk Family This family of proteins is LcnG-beta, which with LcnG-alpha constitute the two-peptide bacteriocin lactococcin G (LcnG). This family of proteins represents the N terminal domain which has an alpha-helical structure and is amphiphilic. Both peptides have a GxxxG motif which they use for interaction through a helix-helix structure [1]. 19.90 19.90 78.50 78.00 18.90 17.00 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.68 0.72 -4.21 2 5 2009-01-15 18:05:59 2008-09-11 16:57:19 3 1 3 2 0 5 0 35.00 70 63.87 CHANGED t.htWLsWlpPAh-FlpGhuKGhhKEGNKsKWKNl KKWGWLAWV-PAYEFlKGFGKGAIKEGNKDKWKNI 0 0 0 0 +11465 PF11633 SUD-M Nsp3; Single-stranded poly(A) binding domain Pollington J anon pdb_2jwi Domain This family of proteins represents Nsp3c, the product of ORF1a in group 2 coronavirus. The domain exhibits a macrodomain fold containing the nsp3 residues 528 to 648, with a flexibly extended N-terminal tail from residues 513 to 527 and a C-terminal flexible tail of residues 649 to 651. SUD-M(527-651) binds single-stranded poly(A); the contact area with this RNA on the protein surface, and the electrophoretic mobility shift assays confirm that SUD-M has higher affinity for purine bases than for pyrimidine bases. 22.40 22.40 23.10 83.20 21.70 22.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.65 0.71 -4.91 6 195 2009-01-15 18:05:59 2008-09-15 09:39:56 3 10 84 11 0 175 1 142.70 83 2.41 CHANGED ssp.lhss.ussssssllsslshuapuMlpauKcpGhLhPlshDYsAhsKlLKR....hcspEGlhTs-GhcFYhYSpccPLp-V.pALNuhs+sllhhPFGalspGhsLA.SAssMRsLTVPasVllsScssVPlYpuYhsus .........S.AFYVLPSEAPNAKEEILGTVSWNLREMLAHAEETRKLMPICMDVRAIMATIQRKYKGIKIQEGIVDY.GVRFFFYTSKEPVASIITKLNSLNEPLVTMPIGYVTHGFNLEEAARCMRSLKAPAVVSVSSPDAVTTY.NGYLTSS.. 0 0 0 0 +11466 PF11634 IPI_T4 Nuclease inhibitor from bacteriophage T4 Pollington J anon pdb_2jub Family This family of proteins represents IPI from bacteriophage T4. This protein is a nuclease inhibitor which is injected by T4 to protect its DNA from gmrS/gmrD CT of pathogenic Escherichia coli into the infected host [1]. The structure of this protein consists of two small beta-sheets flanked by N and C termini by alpha-helices. The protein has a gmrS/gmrD hydrophobic binding site [1]. 25.00 25.00 25.30 93.50 23.90 18.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.57 0.72 -4.10 2 6 2009-09-11 14:10:27 2008-09-15 09:51:27 3 1 5 1 0 5 0 75.70 85 79.37 CHANGED ATLTSEVIKANKGREGKPMISLVDGEElKGTVYLGDGWSAKKDGATIVISPAEETALFKAKHISsApLKIIAKsLL ATLTSEVIKANKGREGKPMISLVDGEEIKGTVYLGDGWSAKKDGATIVISPAEETALFKAKHISAAHLKIIAKNLL 0 0 0 0 +11467 PF11635 Med16 Mediator complex subunit 16 Coggill P anon manual Family Mediator is a large complex of up to 33 proteins that is conserved from plants through fungi to humans - the number and representation of individual subunits varying with species [1-2]. It is arranged into four different sections, a core, a head, a tail and a kinase-activity part, and the number of subunits within each of these is what varies with species. Overall, Mediator regulates the transcriptional activity of RNA polymerase II but it would appear that each of the four different sections has a slightly different function.\ Med16 is one of the subunits of the Tail portion of the Mediator complex and is required for lipopolysaccharide gene-expression [4]. Several members including the human protein, Swiss:Q9Y2X0, have one or more WD40 domains on them, Pfam:PF00400. 25.00 25.00 25.60 25.10 22.40 24.80 hmmbuild -o /dev/null HMM SEED 753 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.19 0.70 -13.32 0.70 -6.57 18 264 2009-01-15 18:05:59 2008-09-15 10:38:09 3 7 176 0 170 275 0 534.40 21 81.27 CHANGED pphts..scsssssIluhpWLssp+.slhhpptshhs........................tt.sssshappplp.h.PhGshHPsssKtuhlslp+sGtlc.laYQ........K.ssplsshp.p..............spphlsaAuIs.ssssp.llls....saSthspplphY+lplsW.............s.....sptsp....hcs........p...........PSlplpchps.h...csh...s.............................p.lhpLo+Lcllstss.......ts.susp...I..Llsass.s..............t.sSllpRapLspps............phlpshFtpls.ppsss....t..p.h.phchhsclhhpppltsIssthhsh...............................................................hlshhacDGoIphhcpsohplhsss................scssshloshhssGFpa.........Ppls.......................htlSPohsullhhctpGp........th.....hthhpsshss.pp.t...h....susshAhta..shuhphshuuDDlslllphpl..lpplsccp...........tppahssllsthhshhsh.......shDt.scthlDKlls.ppsl.KshsLQhtLG..pht......sscsss.c.....luhllLpL+shsh........h...h..hs.....................hsc.-hlhSLlssh+Wll-hhsYlhppLh.l.........h.p.t.............p.s..polshhlhhuphsRthlhphh.ptlppl....pthltth........phaslh.spus...........p.....hh...h....phh..sss.Plshsha.....EpaLsclsshhpt.......................................................................t.pttsp.thE.pLllpuplPsp......................ltthsphllsps.......sssltschchsslahhDhshLplupsccsp..hhth....ttt.lh....................hDslpph.lthp................................................................tsplRRCsRCs..slotsspss........ps.......tshtt...Whhha....hRpClCGGhWh.p .....................................................h..........t..hth.ah...t.t................................................s.hh..ht...s.hs.as...h.uhlhlp.sG.lt.h.h................................p.p..ph.p.......................thlthushs..tts......hlhh....s.s....tp.....lphh+l.lphs...................p..............................s..sh.h..h.................................................tlsplphl............t...t.....l...lh...hhp................................tohl.pap.ltpt...............lp.hh.phs.........................hph.t.h...t..l.tlt...hs................................................................hlhhhhpDuslphhpt.shp......................p....h.p..tsuhpa.....t...........................hS.s.h.hh.hp.ptp...............h.hht.........p..............................hthhh.......hthh........s.-.h....hhh.............p.hs.p................h.tphht.h.p.hph.................-h.........tl.p.....l....+s.h.lt...t...t.............tp.hs...phshlh.lpl.+p..h...............................................c....hl..l.t.hpWhhchhhhhhtpl..........................................................tp.....h....s.t..h...h...lt..h........th.h...............................t.t.................................................h.........h.....ps....ht.h......phL.th....................................................................................................................................................................t....-..h...ts..s........................h......hh.....................................h..............l.t....t.....................................................hD.hpp..h..............................................................................................................................................h+tCs.RCs....s...............................hh....h..h........C........h............................................. 0 32 68 132 +11468 PF11636 Troponin-I_N Troponin I residues 1-32 Pollington J anon pdb_2jpw Family This family of proteins represents the cardiac N-extension of troponin I. This region of the protein (1-32) interacts with the N-lobe of cTnC and modulates myofilament calcium(2) sensitivity [1]. 25.00 25.00 26.70 25.50 18.30 17.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.93 0.72 -4.26 5 46 2009-01-15 18:05:59 2008-09-15 10:53:55 3 2 30 1 15 41 0 32.20 66 15.79 CHANGED EA..EEEEsVsc.PKP.....PPsssPP.lRRKSSANYRSYAT ..................sc....pp-sstps.PAP.....uP......lRRRSSANYRAYAT. 0 1 1 3 +11469 PF11637 UvsW ATP-dependant DNA helicase UvsW Pollington J anon pdb_2jpn Family This family of proteins represents the DNA helicase UvsW from bacteriophage T4. The protein is a member of the monomeric SF2 helicase superfamily and shows structural homology to the eukaryotic SF2 helicase Rad54. UvsW is thought to have a role in recombination and the rescue of stalled replication forks [1]. 25.00 25.00 71.20 70.40 19.10 18.00 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.62 0.72 -4.29 7 32 2009-01-15 18:05:59 2008-09-15 11:21:53 3 2 31 1 0 24 0 54.20 63 54.58 CHANGED ltuCpTl-GLpELEKYYsKRhKEl..-lpsoDDISlRDAluG+RpEhEsps-stcE IuSCQTlEGL-ELEtYYcKRsKEs..ELKDoDDISlRDALAGKRsELEspD--pEE... 0 0 0 0 +11470 PF11638 DnaA_N DnaA N-terminal domain Pollington J, Bateman A anon pdb_2jmp & Jackhmmer:B3CS04 Domain This family of proteins represents the N-terminal domain of DnaA, a protein involved in the initiation of bacterial chromosomal replication. The structure of this domain is known [1]. It is also found in three copies in some proteins such as Swiss:B5V0X4. The exact function of this domain is uncertain but it has been suggested to play a role in oligomerisation. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.12 0.72 -4.40 116 3333 2012-10-02 15:09:17 2008-09-15 11:28:02 3 9 3077 2 743 2291 1830 62.70 28 14.06 CHANGED sshWp.pltppLcppluppp..asoWlpslph.th.sss...............lhltsPspFhtsWlppp..........YhcpIpphhpp ...............plWppsLp.pLp..c..c..ls..tsp...........assWl+.s.lps...pl.pcss...............lhlhuP....Np.Fs..h.-Wlcs+..................YhshIpphh..t....................... 0 255 491 630 +11471 PF11639 HapK REDY-like protein HapK Pollington J anon pdb_2jdj Family This family of proteins represents HapK, a protein of unknown function, with two homologues PigK and RedY. The monomer structure of the protein contains a four-stranded anti parallel beta-sheet, three alpha-helices and a short C terminal tail which it uses for dimer formation [1]. The surface of HapK has a deep cavity with consists of a kinked helix and a beta-four strand. HapK could be involved in prodigiosin biosynthesis, specifically the binding of a bipyrrole intermediate such as HBM or MBM [1]. 25.00 25.00 30.20 32.60 21.90 21.80 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.38 0.72 -3.40 4 20 2009-01-15 18:05:59 2008-09-15 11:51:59 3 2 19 2 9 20 14 102.90 37 68.24 CHANGED McsIlHKIRLhDlucssAFEsWVpssDYATCPcLPSVRuFDVaRVSspt-APFHYlElIplou.ctFc+DMpTssFtsLVpsFspMAEVVEEluGEpLusGYpA .....MpsIlH+I+L+DusssssFEpWVcsoDYssCspLsSVpuFpVh+l.S....s...s.s-....A.......P.aH...YlElIcVsuhctFs.p-MpTpsFpcLsscFsphA-VVpEhsGc.ltsGYt............... 0 6 7 7 +11472 PF11640 TAN Telomere-length maintenance and DNA damage repair Wood V, Coggill P anon Pfam-B_6865 (release 23.0) Domain ATM is a large protein kinase, in humans, critical for responding to DNA double-strand breaks (DSBs). Tel1, the orthologue from budding yeast, also regulates responses to DSBs. Tel1 is important for maintaining viability and for phosphorylation of the DNA damage signal transducer kinase Rad53 (an orthologue of mammalian CHK2). In addition to functioning in the response to DSBs, numerous findings indicate that Tel1/ATM regulates telomeres. The overall domain structure of Tel1/ATM is shared by proteins of the phosphatidylinositol 3-kinase (PI3K)-related kinase (PIKK) family, but this family carries a unique and functionally important TAN sequence motif, near its N-terminal, LxxxKxxE/DRxxxL. which is conserved specifically in the Tel1/ATM subclass of the PIKKs. The TAN motif is essential for both telomere length maintenance and Tel1 action in response to DNA damage [1]. It is classified as an EC:2.7.11.1. 21.40 21.40 21.40 23.20 21.30 20.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.79 0.71 -4.49 24 182 2009-01-15 18:05:59 2008-09-15 12:47:21 3 6 154 0 117 188 0 151.90 24 6.10 CHANGED Msshth....t.hhshLpSsKl+-Rsculpp.....Lppllpss...................pplssKshctlhcuLhphlppE+pta.........ppsssttuss...tpRLspsupslRhhlcpulc............ph+h+shpsllspIhchhhsss......tslhpslshDas+sL.sslLphssahccLs.tppWpp ..............................p..l...shptLpss+hp-Rpctl-c..................hcpllppspt...................pphppt..sa..cslFc.sLh+hlppEppsh.....................htpsssssust..ttp+lpchuuhlRhhl....cpusc............+htphpsptLlsalh-slhsss........hhtshut.DasphL.pslLshphahpclu.pppW..p................................... 0 24 52 89 +11473 PF11641 Antigen_Bd37 Glycosylphosphatidylinositol-anchored merozoite surface protein Pollington J anon pdb_2jo7 Family This family of proteins represents the core region of Bd37, a surface antigen of B.divergens which is GPI-anchored at the surface of the merozoite. The structure of the protein consists of mainly alpha folds and has three sub domains [1]. 25.00 25.00 180.60 180.40 20.70 20.20 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.60 0.70 -5.06 4 14 2009-01-15 18:05:59 2008-09-15 13:13:05 3 1 2 1 0 15 0 205.90 60 73.07 CHANGED VKoLE....slRpELKGQREsFLScIIcSsGshTILQLVsaLRVlDTDLLLpVDusKV-cAGtKVKAYL-pIGI+GpsVEEuLDNLMhKVh.clT+GTVEuSsQuTD.SEELKsLLLKFSEDLKAEQEhHu-Kctoc.-LLp...sLpsp+DELlKKFsthsPoFLToEDISuFLTVP-YGsPhsuuKWKpVE+KIpD...KLESSDlsT..-LKoLlAcLIEQREpMMDLLYGPhGca ..VKTL-....-LRpEL+uQREphLSpIIcSDGPFTILQLlsaLRlIDTDLLLKVDpshVccAGcKVKsYLEpIGIsG-SVEtuLDpLMhKVY.clT+GTVcSsspuTD.SE-LpoLLLKFSEDL+AEQEhHupKctuc.cLlp...ohtpp+-ELlKKFsslsPTFLTsEDlSuaLsVP-YGhPhsuschKpVEthIpt...KLEoS-l.s..pL+sLlAclI..REphMDLLYG..Gc.. 0 0 0 0 +11474 PF11642 Blo-t-5 Mite allergen Blo t 5 Pollington J anon pdb_2jmh Family This family of proteins is Blo t 5, an allergen protein from Blomia tropicalis mites. This protein shoes strong reactivity with IgE in asthmatic and rhinitis patients. The structure of the protein contains three alpha helices which form a coiled-coil [1]. 20.70 20.70 20.70 21.70 20.30 18.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.66 0.71 -4.34 7 41 2009-01-15 18:05:59 2008-09-15 13:22:55 3 2 9 8 0 46 1 113.40 44 85.15 CHANGED hststhsspD-aRpEFDRLLlpphpc+hcclE+tLhpLScQlpELEKoKSK-lKtpILRElolu.sFlcGApGahERELKRTDLNhhE+aNFEtALuTuplLhKDLppLtK+VKulcoc .........s.....spcDc.aRNEFD+LLhpphpcphcchEctLLtLocQls-LEcoKoKEhK-pIL.RElsls.shIcGu+sahpRELKRTDLNlhE+aNaEtAlsosclLhKDLcchpp+VKslc.... 1 0 0 0 +11476 PF11644 DUF3256 Protein of unknown function (DUF3256) Pollington J anon pdb_2p3p Family This family of proteins with unknown function appears to be restricted to Bacteroidales. 25.00 25.00 40.40 39.40 20.80 19.40 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.07 0.71 -4.81 9 96 2009-01-15 18:05:59 2008-09-15 14:03:42 3 1 96 2 13 81 0 195.30 36 92.44 CHANGED hhsshulpAQEhKTlFlsMPDSlsPLLopVNRcDhI....DFlpSpMKApVcNpFGtpSEhpcLosDYlplQhospSohpMKlLsLsDoTpVlCsVoTVCuPACDSpl+FYTTDWK.LssssFl.olPshsDFlt.ssDoss.Ysap-ApppADM.LMKscLSc-sspLohThTTP-YhuKEstEKLcPFLR+.PlVYpWcsG+Fp .......h....hulpAQ-h+slFlsMPDSlsPhLo+ssRpDhl....Dalp.uc.MKApVcNpFGspS..chpcLos..DYlplQhospSohpMK.lLslssosp........lICsVsTssu..PssDSclcFYos-Wc.Lssspah...shPshssF..lt.....ssDohs..cap-hppphDhhLhpscLSt-sspLohohoTPpYhucEstE+lcsaL+c.PllYpWpsspF.................. 0 5 11 13 +11477 PF11645 PDDEXK_5 DUF3257; PD-(D/E)XK endonuclease Pollington J anon pdb_2ost Family This family of endonucleases includes a group I intron-encoded endonuclease [1]. This family belongs to the PD-(D/E)XK superfamily [2-3]. 21.50 21.50 21.50 21.60 21.20 21.40 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.92 0.71 -4.52 2 21 2012-10-11 20:44:46 2008-09-15 14:40:57 3 1 20 4 4 13 7 133.70 33 94.51 CHANGED STKLKGDIAEQAAILRALKLGWGVLKPLGDRLSYDLVFDVEGILLKVQVKSSWFSEKTGNYVVDNRRTRTNRRNIVRSPYRGNDFDFAVAYVEELELFYVFPVDVFISYGSEIHLVETDKRQRKPRSFGYREAWHLILQKGAAQKETSA .........p.TK.KGDlA.ttAhlchlchGasVLpPhuD+.sYDLVh.............-.t-GlLh+VQVKouhhsppsGhh.VcpRsoh.oscpslsppphpsp-hD.ahlsYstph-.hYhFssclF.............................................h...h..pt............................. 0 2 4 4 +11478 PF11646 DUF3258 Protein of unknown function DUF3258 Pollington J anon pdb_2oq8 Family This viral family are possible phage integrase proteins however this cannot be confirmed. 25.00 25.00 27.20 70.10 19.70 17.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.31 0.72 -3.88 4 7 2009-01-15 18:05:59 2008-09-15 14:47:15 3 2 7 4 2 11 0 101.90 39 21.51 CHANGED P..hhuclYhchL.tph.K.sLpshpppDYARa.llWstls.DttI--..........ah.+DIuh.lDpsapLsK.hphcP.Yp+huWsEc.NhslsEDDlhhscolt+Y .......hScVYh-FLpaKlpKtsLssKhppDYARhallWphlscDtsI-p..........Yps+DIGhFIDpCapLPK.hNhtP.YsKMuhsEclshDVsE-DhlsPKoVp+Y 0 0 1 1 +11479 PF11647 PMT_C C-terminal region of Pasteurella multocida toxin residues 569-1285 Pollington J anon pdb_2ebf Family This family represents the C terminal region of Pasteurella multocida toxin (PMT) which displays a Trojan horse-like shape with three domains, C1, C2 and C3. The C3 domain possesses the Cys-His-Asp catalytic triad. PMT is an enzyme toxin carrying the cysteine protease-like catalytic triad which functions on the cytoplasmic face of the plasma membrane of target cells [1]. 25.00 25.00 26.40 25.10 23.20 21.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.00 0.72 -4.36 14 184 2009-01-15 18:05:59 2008-09-15 15:09:40 3 27 97 6 23 185 1 66.70 40 3.18 CHANGED osppLhcp..ups.GhsIGESapplhuchtllcsh..L....tpppVpsha.....l-chls-p.sushssh .........oVsELl-s..ApVsGKlhGESYQpVlDtLs.hH....suhh.....t.-sVcpLh.pLp+pIEuYlhsHPsSGRspA...... 0 1 5 16 +11480 PF11648 RIG-I_C-RD C-terminal domain of RIG-I Pollington J anon pdb_2qfb Family This family of proteins represents the regulatory domain RD of RIG-I, a protein which initiates a signalling cascade that provides essential antiviral protection for the host. The RD domain binds viral RNA, activating the RIG-I ATPase by RNA-dependant dimerisation. The structure of RD contains a zinc-binding domain and is thought to confer ligand specificity [1]. 25.00 25.00 26.20 25.00 24.60 24.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.84 0.71 -4.25 14 227 2009-01-15 18:05:59 2008-09-15 15:56:30 3 13 67 45 119 217 0 117.10 32 14.61 CHANGED cphpLLCtsCpshsshusDlRpVEsoHaVslsssFp.chapspppPhs.tKhac-ap.supIpCtp..CucsWGhthhYKusp.LPsLKIcuhl....hEs.stsphphpKWpclsFphspF.Dhs-hstts ..............hpLLCp..pCpshsCpusDI+h.l.EpsH+Vsl.sss.Fp..phah.spt.p...p..s.t+pat..-ap..hs..upIhCpp.....Cup.sWGhhhha+s..hc...L..P..sLKI+sFV..h.hps....sts+p.ph.+KWpcls..hph.s.h.Dh.phs..t................................ 0 33 40 72 +11481 PF11649 T4_neck-protein Virus neck protein Pollington J, Finn RD anon PRODOM Family This family of protein represents gene product 14, a major component of the neck in T4-like viruses along with gene product 13. Gene product 14 is rich is beta-sheets. The formation of the neck to the head of the bacteriophage is crucial for the tail attachment [1]. 25.00 25.00 26.90 26.50 18.40 17.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.39 0.70 -5.09 12 78 2009-01-15 18:05:59 2008-09-16 09:11:18 3 2 77 0 0 70 1199 236.50 35 80.71 CHANGED M......................s.psphhNPYl..........NhppasspQsLt-sLVsESIphpGl-hYYlPREaVshD.lhsEDhpoKFp+uaphtAYlsoaEGYpGpssahSKFGhplsDEhThsls.chFppps........DGp..........cPppGDLIYF.....Ph...sNsLFEIsaVEs.cPFYQhGcshhhclpsppFhYSuE-l...pPt.....lpc.-sl..-ph.t.slt.lhslsGhsDhsltp....htcssphpscstchhpph.slsscu...................osFssh ..............................................tpp.hssYl..........Nh..stapspQpLh-sLVsESIphhGh-lYYlPRphVshDhlauE-.psKFspA..aphtAYlssaEGapGttshhSKFG.hplpDElThsls.phFccpl............ssp................cP.pEGDLIYF.....Ph...sspLFEIsaVE.....s.cP.....FYQhG+shlhclpsphF.YS.sE-l...sss.....lpc.-sI......p....ph.t.slp.lhslsGhs-hshsp....htpssphpscstp.h.....l.sts........................s............................................... 0 0 0 0 +11482 PF11650 P22_Tail-4 P22 tail accessory factor Pollington J, Finn RD anon PRODOM Family This tail accessory factor of the P22 virus is also referred to as gene product 4 (Gp4). The proteins structure consists of 60% alpha helices. Gp4 is the first tail accessory factor to be added to newly DNA-filled capsids during P22-morphogenesis. In solution, the protein acts as a monomer and has low structural stability. The interaction of gp4 with the portal protein involves the binding of two non-equivalent sets of six gp4 proteins [1]. Gp4 acts as a structural adaptor for gp10 and gp26, the other tail accessory factors [1]. 19.40 19.40 19.40 22.70 18.60 18.10 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.79 0.71 -4.68 6 137 2009-01-15 18:05:59 2008-09-16 09:28:17 3 1 135 24 8 65 4 158.50 51 97.54 CHANGED MsplhTKGDlVLhALRKhGlASNATLTDVEPQShEDGlNDLEsMMAEWh....upGI-lGYtFAssEps..PsPDD-oGLssphpsAVuapLhlRlhsDYulEPTs+lluoActuY-sLhhshlplPulcRRs..........DMPsGpGNpassasss+YYsc+t.....hssDss......ss .......................M.plhTKGDlVLtALRKhGlASNATLTDVEPQShEDuVNDLEsMMAEWh....upGIsl.GYhFAssDps..PhPsD-pGLsspAhsAVhapLAsRlssDYuLEsospllosAchuh-tL....hKlsuhc.Rtp..........cMPsGSGNphssast.+Ya.tct......stD.sss............ 1 3 3 5 +11483 PF11651 P22_CoatProtein P22 coat protein - gene protein 5 Pollington J anon PRODOM Family This family of proteins represents gene product 5 from bacteriophage P22. This protein is involved in the formation of the pro-capsid shells in the bacteriophage. In total, there are 415 molecules of the coat protein which are arranged in an icosahedral shell [1]. 23.70 23.70 24.30 23.80 23.50 23.60 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -11.94 0.70 -5.87 13 302 2012-10-03 06:22:39 2008-09-16 10:33:18 3 5 281 0 45 236 443 327.50 22 91.19 CHANGED MANshLT..hslIscEslcslpspsshupslc+tYsspF....suhphGsolslhhPsphpststsshs.........sspslsEtpVslslsp.csVshphoucEhtLcls-hpcRhl+PAhppLAspl-uslht.............shtppsoNhVup...ssush..s.hsshtsAsstLspttlPps.sRpshhsPpspschuss.sshhtsu.ptspsAaccusl.sphuGhDhhhspslhstssGu.usuhTVsGusphss...................Tlslsuhss..........lptGDhhohAGVhtlp.lTKpsh...sp.ppFpVst.....usuusslpIhPshlssssss.sp....atsVssssAsssslohlsssssshh.NLhapcsAhsLsssPL.hPhtuuspshssoa....cGlSlRlhsthDtpscsspsRlDlLYGs.sslcP-h.ultlss .........................................................................................t....h.hhpthl.th..t.hs.hsp..t..lp+.htsst......s.p.tssssl..hh.Pst.ps.....p....tp....shshss......ssssl.ptps.s.sphsp.cps.hphps..-....l..p.....t....p.hh.p...phl....p....sutptLA....ssl-ptl.u...p.......................hhhp.t.u..u..h.s.l..s.................s.....s.....s..sst.ps.......as.l.upstphlpstslsps..sphhlh..sPtshtp..ht.ts.st.h.ths..s...phsppA..h.c.supI..spl..uGhchhho.....sshh.....h.pts...u.s..s.hhlpst.p.t....................hlslssss.........hthGp.hphss..................................................................................................hht...h.hh......s...........................sh.h...................thth..u.................................................................. 0 14 29 36 +11484 PF11652 DUF3259 Protein of unknown function (DUF3259) Pollington J anon PRODOM Family This eukaryotic family of proteins has no known function. 25.00 25.00 34.20 48.80 21.00 18.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.89 0.72 -3.83 4 92 2009-01-15 18:05:59 2008-09-16 13:06:44 3 1 42 0 59 80 0 82.20 64 46.83 CHANGED MplQDpQLAtQLhRLRu-Ip+LKl-QsC+hH+EMLsDATaELEEps-.uDL.CDlPhuuuhuLSTPLKhlGlTKMNINoRRFSLC ....MphQDpQLARQLhRLRu-Ip...+LKlEQsC+LH+cMLsDAoaEL.EEp....sE........uDL..hC-h...Phs...u.uhuL..SsPL+hIGlTKMNINSRRFSLC. 0 6 12 28 +11485 PF11653 VirionAssem_T7 Bacteriophage T7 virion assembly protein Pollington J anon PRODOM Family This family of proteins represents the gene product 7.3 from T7 bacteriophage. The protein is localised to the tail and is thought to be important in virion assembly. Particles assembled in the absence of the protein fail to adsorb to cells [1]. 25.00 25.00 26.00 25.60 20.40 19.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.30 0.72 -3.23 5 48 2009-09-11 12:45:27 2008-09-16 13:30:53 3 1 31 0 0 44 0 92.00 51 99.01 CHANGED MG..KKIKKAVKKVsK.VpKl....s+.sttAuGGhLG..GGcstsp.V.......EAssP.As....AQlVEVP.cDcV-oED-uQTESu+KKARAuGKKuLSVARSSGGGINI .......................................K.lpKshKpVsK................ht.st.pssGGhhG....GGpscsphs.............psPQAA.....AQlV....-VP.cc-VssED.-AQTESG+KKARAGGKKSLSVARSSGGGINI............. 0 0 0 0 +11486 PF11654 DUF2665 Protein of unknown function (DUF2665) Pollington J, Finn RD anon PRODOM Family Some members in this family are annotated as Non classical export proteins, however no specific function is known. 21.90 21.90 23.10 22.40 21.30 20.80 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -8.15 0.72 -4.39 8 66 2009-01-15 18:05:59 2008-09-16 13:38:40 3 3 66 0 48 65 0 46.30 35 57.78 CHANGED YLISRslDPlLAlulGssSYYLaEp..Rs..GRPpGHoLNELlt++ascps ....YLISRhhDPlhAlhlGssAaalhpp..Rp....t+.pG+.plpp...ll..h.h................................. 0 16 28 44 +11487 PF11655 DUF2589 Protein of unknown function (DUF2589) Pollington J, Finn RD anon PRODOM Family This family of proteins has no known function. 21.00 21.00 22.30 21.90 20.90 20.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.98 0.71 -4.12 34 199 2009-01-15 18:05:59 2008-09-16 14:19:00 3 3 97 0 60 181 27 153.20 21 77.55 CHANGED shssLIsG.hpAsspAQthluppthchlpphh.............pssp.............hshphphslssppt.............php.lplPLlsllshssLtlcclslcFshclss..spsppsppstshph......psp.t................sspppssp......psssssphplplphpppssPEGlt+ll-hhsps ....................shppllsu.htAsspAQthhup.pthshlpphhh............psst..................hhhthphphslsspp.........................p.t.lplPLlsllshssL.tl-cssl..sFphcl..ss.spsppspsshphph......................thp.thh................hthpsphuspppssp..........psspssphp..lplphptpt.PEult+ll-hhsp............................................ 0 9 25 47 +11488 PF11656 DUF3811 ComReg_Spx; YjbD family (DUF3811) Pollington J, Finn RD anon PRODOM Family This is a family of proteobacteria proteins of unknown function. This family is unrelated to Pfam:PF03960 which contains a set of transcription factors that are also named YjbD. 21.50 21.50 48.00 47.90 21.40 21.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.98 0.72 -3.92 6 505 2009-01-15 18:05:59 2008-09-16 14:21:29 3 1 502 0 25 113 1 86.90 83 97.82 CHANGED M.+LT.KDMTESEQRElKTLLD+ARhstGRsLTNuEsN+IK-EhI-KlMspREthAKtAR.......tE+KppphpP.opsTFSW..SAss.usRu+R ......LPRITQKEMTEREQRELKTLLDRARIAHGRsLTNSETNSIKKEYIDKLMsEREAEAKKAR.......QLKK..KQAYKPDsEASFSW..SANT.STRGRR......... 0 1 6 14 +11489 PF11657 Activator-TraM Transcriptional activator TraM Pollington J, Finn RD anon PRODOM Family TraM is required for quorum dependence. It binds to and in-activates TraR which controls the replication of the tumour-inducing virulence plasmid. TraM interacts in a two-step process with DNA-TraR to form a large, stable anti-activation complex [1,2]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.66 0.71 -4.71 11 77 2009-01-15 18:05:59 2008-09-16 14:26:03 3 1 62 0 11 52 8 141.40 40 96.89 CHANGED MsDcI-ElI+-IAsKHGIAVuRDDPILlLQTINsRLMpDSp+AQQEhL-pFKEELEuIApRWGsDAKsKAERlLNAALuASKEAMs+shp-uspuuAcAl+pEl-suLuc.lsu.lpcuR+luhhNllAuuhslhAAslslhshL ........sDcl-ElIcEIAsKHGIAluRDDPILlLQTINp+LhpDo....tpAQ...p-lL.-pFKpELEuIupRWu..-D....AKsKAE+hLNAALs......ASK-A...Mscs.hp...-us....ptuuc..ul+c...E...l.-sshsp.l..ts...lccu..R+luh...hNhlAushslhAuslslas...................................................................... 1 2 5 10 +11490 PF11658 DUF3260 Protein of unknown function (DUF3260) Pollington J anon Pfam-B_003054 (release 23.0) Family Some members in this family of proteins are annotated as YhjU however this cannot be confirmed. Currently this family has no known function. 24.80 24.80 38.30 38.30 24.70 24.70 hmmbuild -o /dev/null HMM SEED 518 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.78 0.70 -6.36 23 576 2009-01-15 18:05:59 2008-09-16 14:50:18 3 1 549 0 49 313 9 502.60 71 95.59 CHANGED hWNlYFllKFuLaatGhlsFpsLhNLsFsshLll..Pls...pptl+hlRphlAlslulsLhaaDoaLPPhsRLhuQsupltsFShuYLlELluRFlshphlluhhllllsYhhlupWlRlosaV.........lsuLlhlslhshsssshshhsusss..........................sstspssstsssssssshsshLssFappEupRpssFss..sssupsFDlLVLsICSLuWDDL-sstlpsHPLhs+FDllFcNFNSATSYSGPAAIRLLRASCGQpuHssLYpPAs.pQCaLFsNLApLGFpspllhNHsGcFDsFLp.l+cp.GshpsPlhspsulssshpuFDGSPIacDhsVLspWhp.pR.psssstsAhaYNoIoLHDGNRlssu.th.sohsSYthRtp+Lh-DlspFhcpLcpSGR+lhVllVPEHGAAL+GD+hQluGLREIPoPsIsHVPVGlKllGh..tssppGsslpIspPoSYLAlSpLlu+llssssFpsss.shtpLspsLPpTphVuENssslVMphsu+.Yl+LsG.usW .................G.WNFYFLVKFGLLWAGYLNFHPLLNLVFAAFLLM..PIP...+YpLHRLRHWIAlPlGFALFWHDTWLPGPpSIMSQGoQVAuFSosYLlDLlsRFINWQMIGAIFVLLVAWLFLSQWIRlTVFV.........VAILlWLNVLTLuGPuFoLWP.....AGQPTsTVTTTG............GNAAATVAssGssPV.lGD.hPAQTAPPTTANLNAWLNsFYsAEuKRKoTFPusLPu.DAQPF-LLVINICSLSWSDlEAAGLMSHPLWSHFDI.FKNFNSATSYSGPAAIRLLRASCGQsSHTsLYQPAs.N-CYLFDNLuKLGFTQHLMMsHNGpFGGFLKEVREN.GGMQoELMs.QosLPVhLLuFDGSPVYDDhAVLNRWLs.spE.+-p.....NSRSATFYNTLPLHDGNHaPGV....SKTADYKsRAQKhFDELDAFFTELEKSGRKVMVVVVPEHGGALKGDRMQVSGLRDIPSPSITcVPVGVKFFGM..KAPHpGAPIlI-QPSSaLAIS-LVVRllDGKlFTE.D..s.......VsWpKLTSsLPQTAPVSENuNAVVIQYQsKPYVRLNG.GDW.................... 0 6 14 32 +11491 PF11659 DUF3261 Protein of unknown function (DUF3261) Pollington J anon Pfam-B_003077 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 26.00 25.70 23.40 22.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.91 0.71 -4.63 32 248 2009-01-15 18:05:59 2008-09-16 14:52:44 3 1 246 0 60 186 11 157.50 39 79.73 CHANGED lhsL.lLsuCup.p..pps.s.....lsls.ssplsLssssths............................................hshsQhlohphssp....p.......+pLhspLcl-sp.clsLsGhushGptLhsLsYDspslpsppt..hLsssl...scplLusl.LshWPhpuhpstLs...GhpLtsps............ppRp.lhspssph.............llcIcYpp ...............................................h.hsL.hLsGCSp..p..ppps.....pshLpsss..plsLPsPuhsss...........................................lstQQLlousasuc.....o.........poLLVhL..psDsp..+lsLAGLSulGhRLFhlsYD.u.pslcsEQu..llsspL.PPApQVLADlMLoaWPhuAWpspLs..sGWpLpDsu....................spRp.LhsssGpl.............ls-IpY..t............................................ 0 9 19 40 +11492 PF11660 DUF3262 Protein of unknown function (DUF3262) Pollinton J anon Pfam-B_003096 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 23.50 23.50 23.50 23.50 21.90 23.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -9.76 0.72 -3.88 24 210 2009-01-15 18:05:59 2008-09-16 15:02:28 3 2 173 0 64 172 6 74.10 33 90.38 CHANGED MouAQtsAFpAu.SG.lsPsthsslhlGhllulLhLWusWAlhssYpGWusp.plsptshhtsslRhslLLlVlsaahh ......MssuQhuAFpAu..oG..hsPushshlhlGhlhulLLLWusWAhhosYpGWusp..plsptphhthslRhlhLhllhsFhhh....... 0 4 30 52 +11493 PF11661 DUF2986 Protein of unknown function (DUF2986) Pollington J, Finn RD anon Pfam-B_003109 (release 23.0) Family This family of proteins has no known function. 21.30 21.30 23.70 49.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.27 0.72 -3.63 42 221 2009-01-15 18:05:59 2008-09-16 15:04:49 3 1 191 0 59 167 4 44.50 59 68.34 CHANGED MNRKKKIsphLpp+sK+sNAKlps..ss...KP+YISKAER....tKltupsp .MNRKKKIsQhLpp+tKKtsAKLcs...SN.....KP..+YISKAER....AKLsAcpt....... 0 6 14 39 +11494 PF11662 DUF3263 Protein of unknown function (DUF3263) Pollington J anon Pfam-B_003189 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Actinobacteria. 22.30 22.30 22.30 22.40 21.80 22.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.33 0.72 -4.52 18 368 2009-01-15 18:05:59 2008-09-16 15:51:34 3 1 342 0 112 259 77 76.40 49 76.45 CHANGED ssLocR-ppILcFERQWW+asGA..KEpAIR-pFulSuTRYYQlLNsLIDsPtALttDPMLV+RLRRLRssRp+sRuAR ......s.tLop+-+sILsFEcpWa+.......hu.Gu...KEpAIR....-phGlSusRYYQhLNtLlDcscAlstc..PhLVpRLRRlRu...pRp+uRuAp..................... 0 39 89 107 +11495 PF11663 Toxin_YhaV Toxin with endonuclease activity YhaV Pollington J, Finn RD anon Pfam-B_003231 (release 23.0) Family YhaV causes reversible bacteriostasis and is part of a toxin-antitoxin system in Escherichia coli along with PrlF. The toxicity of YhaV is counteracted by PrlF by the formation of a tight complex which binds to the promoter of the prlF-yhaV operon. In vitro, YhaV also has endonuclease activity [1]. 22.80 22.80 23.20 51.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.86 0.71 -4.44 17 374 2009-01-15 18:05:59 2008-09-16 16:06:56 3 1 357 0 48 161 27 138.30 65 91.19 CHANGED plaAHshFl-pL-tLhspVEph+t+DPpsYtp+sssKhLAAlh+Lhh-sIPtDPs+spaRQGsTLGs.c++HWhRA..KFhp.paRLFFRa..copu.KlIlhAWVNDEsThRsYGu+sDAYsVFp+MLspGpPP-DWssLLptupsp .........................A.LYAHPCFQETYDALVAEVEsLKGK.DPENYQRKAATKLLAVVHKVIEEHITVNPSSPAFRHGKSLGS...GKNKDWSRVKFGA..GRYRLFFR.Y..SE.KE..KVIILGWMNDENTLRTYGKKTDAYTVFSKMLKRGHPPADWEoLTpETEEs........ 0 12 30 42 +11497 PF11665 DUF3265 Protein of unknown function (DUF3265) Pollington J anon Pfam-B_001494 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Vibrio. 20.60 20.60 20.80 20.80 20.50 20.50 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.72 0.72 -6.86 0.72 -4.10 16 241 2009-01-15 18:05:59 2008-09-16 16:28:22 3 5 69 0 28 244 0 26.70 45 50.19 CHANGED +sAhHFhaAlsFsscssstphulushsP .+sAWHFaYALshVhKlVCuuhGIAhlTP........ 0 0 0 15 +11498 PF11666 DUF2933 Protein of unknown function (DUF2933) Pollington J, Finn RD anon Pfam-B_002197 (release 23.0) Family This bacterial family of proteins has no known function. 20.30 20.30 20.30 20.30 20.20 20.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.08 0.72 -4.43 40 286 2009-01-15 18:05:59 2008-09-16 16:47:17 3 2 243 0 91 187 35 55.20 43 68.32 CHANGED hlshhslAuaaLlsEHpAHl...hGhL...PaLLL.LACPLMHlFM.HGGHGsHupccsssps.s ..........................lshhulAuahLlpEHhuHl....hGhh...PaL..LL.LlCPL....MH...L....Fh...H...G.GH.GsHpccsspsp.p.............. 0 15 45 74 +11499 PF11667 DUF3267 Protein of unknown function (DUF3267) Pollington J anon Pfam-B_002418 (release 23.0) Family This family of proteins has no known function. 27.10 27.10 27.30 27.30 27.00 27.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.50 0.72 -3.86 44 858 2009-01-15 18:05:59 2008-09-16 16:53:05 3 1 553 0 83 438 6 109.10 28 60.12 CHANGED lslllhl.hlHEhlHhlhahhhttp..pht.hthphthhhhash.sspsls+ppahlhhlsPhlllohlhhhlhhh....h..htsshhhhlhs....hpsuhsssDhhhhhhllpp..spsshlp ....................h.hhllhh.hlHEhIHhlhFhhapcs....Ksp.hth.phchl...h.hats.ssp..shsKhpFp.IhhlhPh..llloshhlhLhhh.................hhh...p..hhh.F.lhu....lpsuhshsDlhhltlllpp..spsphl............... 0 30 56 74 +11500 PF11668 Gp_UL130 HCMV glycoprotein pUL130 Pollington J anon Pfam-B_002736 (release 23.0) Family This family of proteins represents pUL130 from Human cytomegalovirus, a glycoprotein secreted from infected cells that is incorporated into the virion envelope as a Golgi-matured form. The protein promotes endothelial cell infection through a producer cell modification of the virion [1]. 25.00 25.00 54.70 54.30 21.20 20.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.03 0.71 -4.71 4 61 2009-01-15 18:05:59 2008-09-17 09:14:16 3 1 11 0 0 55 0 141.30 64 69.22 CHANGED aCP.haPSPPRshu.FouYcphssGPECsNpTLYlLaNRpGQpLIERPSsWVpKlsaYLSGRspsVFQ+Fs+sAoosoc.slpIos-DtKlFGAHMlPKpoKhLRalVpDGTchphC.M+lpTWA+shpsY.luFQsRlphTpAspps.ohCT+PNLlV ............YCPFlYPSPPRSP.QFSGFQpVsTGPECRNETLYLLYNREGQTLVERSSTWVKKVIWYLSGRNQTILQRMP+TASKPSDGNVQI.SVEDAKIF.GA....HM..VPKQTKLLRFVVNDGTRYQMCVMKLESWAHVF...RDYSVSFQVRLTFTEANNQ...T...YTFCTHPNLIV............... 0 0 0 0 +11501 PF11669 WBP-1 WW domain-binding protein 1 Pollington J anon Pfam-B_003402 (release 23.0) Family This family of proteins represents WBP-1, a ligand of the WW domain of Yes-associated protein. This protein has a proline-rich domain. WBP-1 does not bind to the SH3 domain [1]. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.04 0.72 -10.85 0.72 -3.94 9 225 2009-01-15 18:05:59 2008-09-17 10:46:17 3 5 71 0 128 194 0 96.00 42 32.88 CHANGED YhC.-oGaCCGEotCC........................oYYYELWWFWLVWsllIlLuCCCsh+HRRsKpRL.....QpQQRQ+EINLlAYptApp......os.sh.hRhLssahLPsYEEVss+PsTPPPPYo .........YhC..-s.GaCCG..p.o.....tCC.....................sY..YYELW.W.F.......W.......L...lWsl..l...I..l...L...S.CCCsh....+HR...Rs.KhRL.......QpQQRQ+EINL.hAYptApph........ss..h.....h......ph.hssah.PsYE-Vspp..P....s..oPPP.PYo........................................................ 0 21 33 76 +11502 PF11670 MSP1a Major surface protein 1a (MSP1a) Pollington J, Finn RD anon Pfam-B_001532 (release 23.0) Family MSP1a is part of the A.marginale major surface protein 1 (MSP1) complex and exists as a heterodimer with MSP1b. The complex has adhesive functions in bovine erythrocytes invasion [1]. 21.30 21.30 21.30 23.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.44 0.71 -4.38 3 306 2009-09-10 23:09:43 2008-09-17 11:25:55 3 4 4 0 1 364 0 102.80 79 77.43 CHANGED MS.EYVSSQPADSSSAGGQQQESSVSSQSDQASTSSQLGsDSSSAuGQQQESSV.SQSDQASTSSQLGsDSSSAuGQQQESSV.SQSsQASTSSQLGTDWRQEh+SsVASVEYhLAARALISVGVYAAQupIApSpGCASLSVAEVEEI ............................sDSSSAuGQQQESSV.SQSsQASTSSQLG..sDSSSAuGQQQESSV.SQSsQASTSSQLG..sDSSSASGQQQESSV.SQSsQASTSSQLGs.D............................................................ 0 0 0 1 +11503 PF11671 Apis_Csd Complementary sex determiner protein Pollington J anon Pfam-B_4678 (release 23.0) Family This family of proteins represents the complementary sex determiner in the honeybee. In the honeybee, the mechanism of sex determination depends on the csd gene which produces an SR-type protein. Males are homozygous while females are homozygous for the csd gene. Heterozygosity generates an active protein which initiates female development [2]. 25.00 25.00 28.30 27.90 22.10 22.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.21 0.71 -4.40 5 496 2009-01-15 18:05:59 2008-09-17 11:44:59 3 6 18 0 8 464 0 126.60 68 61.76 CHANGED TSRKRYSRSREREQKSYKNENSYREYRETSRERSRsRptRERSRE+KIISS.ss..NNYNYNNNYNN.N..pYNKh...........YYNINYIEQ..IPVPVYYGNFPPRPIMVRPWVsMQEQVPRFRYIGPLTPFPPRFIPPNtYR.RPPLNPRFRPMY ............TSRKRYSRSREREQKSYKNENSYRc.YRETS+ERSRDR.pERERS+E.+.....I....SShs.......s.......p..h....N...................N........h...p..N....s.....Y......Np.....p............p.ph................YY.N..IN.Y...IEQ......IP..V....P....l...YYG.NFPPcPh..tPWls.M.QcQlP...R.FR.a...IGP..T.sF.PRFIPP.NhYR..RP.P.LNPR..FtPh......... 0 2 2 8 +11504 PF11672 DUF3268 Protein of unknown function (DUF3268) Pollington J anon Pfam-B_4693 (release 23.0) Family This family of proteins has no known function. 24.70 24.70 26.00 25.70 23.80 24.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.39 0.72 -3.94 13 115 2009-01-15 18:05:59 2008-09-17 12:50:20 3 2 104 0 24 97 7 101.90 37 68.03 CHANGED tPlpCPYCuupspLsssphlYG.+phsshPhl.....YlC..ss...CcAYVGsHP......tTshPLGTLADtshRptRppAHcsF-pIWc..sppho.RocAYtWLApcLsl.s.ccsHlG .......hCsYCGu.tuhL.sp.u....sth.Ys.hp.s..p...s..h.....alCss...CsAalGsHs.......ps..c..hPLGpLAsspLRpt+ppsH.tsh-slhp...............ss...thp...RscuYpWL...A...ppLGl.sht.sHhG.............................. 0 4 10 18 +11505 PF11673 DUF3269 Protein of unknown function (DUF3269) Pollington J anon Pfam-B_4706 (release 23.0) Family This family of proteins has no known function. 25.00 25.00 25.70 25.40 24.50 24.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.40 0.72 -3.54 3 181 2009-01-15 18:05:59 2008-09-17 13:34:48 3 1 153 0 5 62 0 72.80 80 98.93 CHANGED Ms.hEKYYLYRsDGTE-IKVtKcKDNVNsV+oLTGAHFS-EcKhMTDS-LK+FKAsHGLLYEEELGLQuTIFDI ....MP.KEKYYLYREDGTEDIKVIKYKDNsNEVYSLTGAHFSDE.KKIMTDSDLKRFKGAHGLLYEQELGLQATIFDI.... 0 2 2 5 +11506 PF11674 DUF3270 Protein of unknown function (DUF3270) Pollington J anon Pfam-B_4714 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Streptococcus. 25.00 25.00 25.70 25.60 23.70 21.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.14 0.72 -3.97 8 347 2009-01-15 18:05:59 2008-09-17 13:38:56 3 1 344 0 25 115 0 89.60 53 94.35 CHANGED hsL++hpsa.c..DhpYQc...cphPcYQ-aQshsppss..KLcELlFFupIAsFClhTVLhoFlhLuhpLsohaAFslAhslS....LulhpshcphIK+ .............MPVRKLQS..Y.....Es-YQE...ppQhP+YQsYsPEApocA..sLKElLFFlNIAVF.CIshslFSFIFLALKLuTsLAFshAIuhS....LllLpltRslIK+....... 0 2 6 15 +11507 PF11675 DUF3271 Protein of unknown function (DUF3271) Pollington J anon Pfam-B_4697 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Plasmodium. 20.40 20.40 20.60 21.50 19.50 19.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.75 0.70 -5.25 12 39 2009-09-10 14:53:48 2008-09-17 13:49:26 3 1 5 0 39 40 2 169.30 39 85.08 CHANGED NIILSFFILVIFSNVK....AATFQDs.NsssPKsIuYsSVuQPhssFs+pccpHspYLDlINslh+DpScNhKYAYpGuNYHWlITDFDISIDNSSptLK+phScKtpEuLhhGosYFIuYIKDpIKaLlSpaMHKYDFEpNYtssLphLucDLKsLIYDcF-pchKpDLIKYEstPEscKh+cpAKchhcsLVpNSuhplpGYFIKlpcDusYhcLscspslYFsIsIsKs.uphsathKh.cs-lsEhlss ................NIIlSaFILsl.sNV+....uAoFQst.Nssps.........p.IuY.slspPhshhph.cppahpaLshINphhpcpscs.h.cYsapsssaHalITcFcl.IsNus..lpthh.ppt.p.h..us...h.hh...h.......h.pYsF.....t....h.ppLKs.h.spF..ph...hlp.p....p...................h.hps........................................................................................................ 1 0 6 38 +11508 PF11676 DUF3272 Protein of unknown function (DUF3272) Pollington J anon Pfam-B_4726 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Streptococcus. 25.00 25.00 41.60 41.50 22.90 22.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.78 0.72 -3.96 7 324 2009-01-15 18:05:59 2008-09-17 14:07:26 3 1 321 0 18 94 0 56.30 61 95.59 CHANGED shpQFlhhAlhTshETYFFNtSlhoGpahhAhFauhLLhRslphsYhls+hsctl.K.....p+K ..hpQFlhhAlhTAhETYFFN-uhMTGRYIMAAFWAILLFRNFRVSYVMGKIVDlIDQ...chs+K.......... 0 2 5 9 +11509 PF11677 DUF3273 Protein of unknown function (DUF3273) Pollington J anon Pfam-B_4727 (release 23.0) Family Some members in this family of proteins are annotated as multi-transmembrane proteins however this cannot be confirmed. Currently this family has no known function. 25.00 25.00 25.40 25.20 20.10 19.90 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.80 0.70 -5.13 9 59 2009-01-15 18:05:59 2008-09-17 14:11:00 3 2 17 0 51 59 0 236.90 35 81.42 CHANGED ssEsspsRGPpapPhhsFF.sh.LRhGFhlQhluhh.LhhluhashGt..huIhsF.LpAhPE...phussha.hlhhhhuhaLlGTLhIh.FplhssDDo..KtsRGaRAGoKhLttAoolshlShhLphVpa..lhusaahstpWhschttstusWlhahhupllsuFuLhlYGuuhFaLEsYHsEGTuEphuWlshhhFhhAGlsElL..hsFhssGshhoLhhL......lulhsuolWAhhFEPlhc+asspLpposlpN-.....hhKscpshuY .........................sps.psRG.ph.PhhsaF......ssh.LRhGhhlQhhuhh.hhhh.hhshGs..hulhsF.Lpuhs-...phupuhhhhhhhhhuhallGslhlhsFQ.hlsDDo...p.sRGaRuGoKhLphAshlshluhhLphl.a..hhs.hahstpWhtchttstSpWhhahhuplhcuhuLhhYGhuhFhlEsYHsEGsuEtauahshhhFthuGlsElh..hhFhshush.sLhhh......huLhssslWAhhFEPlhp+hsspLpppslps-.....h.+.pph..Y............ 0 17 27 43 +11510 PF11678 DUF3274 Protein of unknown function (DUF3274) Pollington J anon Pfam-B_4733 (release 23.0) Family This bacterial family of proteins has no known function. 21.00 21.00 21.00 21.10 20.90 20.90 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.94 0.70 -5.19 10 137 2009-01-15 18:05:59 2008-09-17 14:28:57 3 5 81 0 29 183 0 241.10 28 36.78 CHANGED sohPAMssLtshRFaQRMWT+RpRsG......csVtVG..sPPtphsLRtcsEs+YPGuGss...........sshhupuslsps-pRhINGEsLpPPacPcM.asGEs........tphsGsso...........tuG....hDsPDDVstslALGNstAch+hhtlssp....ssstchppsctp...tuaN.pG+s.sDQTpsaRssssss.s............sthllhRcETPsEsRtRMppsscuhp-NSYHSulL+SsENHRWVTAMDVAIGQAcoLDDPsWRclLlAhADW+h........stcphcphp-Lss......ap+LStcsQcLlcAospYYcpGsFPupp ..............................................................................................................................................t..h.sh..L...tFaQRhaThh.R.t.........hlG...sst...hh...p.hh.ssth............hshh.pssh...p.hhlsu.tL..shtsph..tGp...........h.Gpss...........huG....h.t.Dshshshulus..h..p.....h..t......................t...h.t.tht.....hN...t...tppo...hph...tt...................thhhlpRpcT.Pp..EsRtphtp..s.ssthttsoaHSull..pssEspRhVsAhDlAIGpup.olDDs..s.ahphLhthADW+h..............s.t......p..t........at.t.p..tsp.hhpts..Yh.pG.hP...t........................................ 1 3 12 18 +11511 PF11679 DUF3275 Protein of unknown function (DUF3275) Pollington J anon Pfam-B_4743 (release 23.0) Family This family of proteins with unknown function appear to be restricted to Proteobacteria. 25.00 25.00 58.10 33.20 19.80 22.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.49 0.70 -4.70 5 108 2009-01-15 18:05:59 2008-09-17 14:35:44 3 2 82 0 33 99 2 189.60 49 92.06 CHANGED IsLPG.pLslRTIs.GRNG-FsVG+LtTsIGEFcVKDALLDQacEGKY+G-FVIocIaP+pYsuGG+hVsEIRApLDuMsLDuhspLScD-scchussEsDPLDE.Ptl.usossuSAPsPuPSPpsspPuSpRAo+D.h.chuPFGhcs.SsPAppsspASssc-sDA.......ELFGslWP...lGEsVKLDoTVDR+pLRpQssRLGpLG.YcLDF+pQhWshc .........IslPG.pLslRTIp.GRNGsFsVGRLuTsIGpFslKDs.L-QY.EGKYcG-FlIppIhPpsYss.GGthhhEIRApLDuMTLsslDpLScDEA.cph.usp-lD...PLDE.....t..t.ssssuts..ttsp...t........tss.pussD.....sPFGhss.stPstts..s.ss..p-sDA...................tLFG.hlWP....LG-sVKLDoTlD.R+...tLRtQhsRLGp.LG.YthDhppQpWph.p........................... 0 4 20 28 +11512 PF11680 DUF3276 Protein of unknown function (DUF3276) Pollington J anon Pfam-B_4744 (release 23.0) Family This bacterial family of proteins has no known function. 22.20 22.20 22.20 23.90 22.00 22.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.66 0.71 -4.32 27 215 2009-01-15 18:05:59 2008-09-17 14:53:19 3 1 206 6 67 189 126 117.10 36 92.57 CHANGED M..p-pc.h......-pE-lFS.Ksl+AG+RTYFFDV+uT+usDYYLTITESKKhspcDG...phpacKHKIaLYKEDFppFtEsLp-hhsaIhpcph.....p-Vho-capcs.pccppp.tt...pt...........spthpDlsF ..........................t......-+--lFS.Ksl+AG+.RTYFFDVKcsRt.sDhaLsITESKKhs...ss-G.....pF-KHKIFLYKEDFpcFhpsLp-slsa.Ipppph.....pcshpcp.tpps.pcpt..t.................................................................. 0 32 60 65 +11513 PF11681 DUF3277 Protein of unknown function (DUF3277) Pollington J anon Pfam-B_4749 (release 23.0) Family This family of proteins represents a putative bacteriophage protein. No function is currently known. 25.00 25.00 25.10 25.10 24.50 24.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.80 0.71 -4.71 7 159 2009-01-15 18:05:59 2008-09-17 15:46:29 3 1 142 0 23 117 4 138.20 38 96.58 CHANGED Mu.TYSFhDVoASlsGPsGuhsLGYGsAsAEEGIslAhussKNTMTlGADGEsMHSLHADKSGpITlpLLKTSPhNpKLMAhYsAQSlsStLWGpNlIslpppsuGDlssuRusAFpKtPDhp.AKDGshlpWVFDssKIDphLGoa ......................................shthsh.ts..h...l.s.Gtusu-p.sIslshsuspsTMTlGADGEVh+SlpADKSGTlTlsLLpTS...PsNppLSh.hYNs.Q.p.t.Ss.s.saGs.sslsI+spsuuDlh.TApsstFpK.PD.stupsusshsW..sFcshclD........................................................... 0 1 9 16 +11514 PF11682 DUF3279 Protein of unknown function (DUF3279) Pollington J anon Pfam-B_4753 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Enterobacteriaceae. 25.00 25.00 26.60 25.90 24.60 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.57 4 650 2009-01-15 18:05:59 2008-09-17 17:06:46 3 1 218 0 13 267 1 116.50 50 91.63 CHANGED phlKsaLA.sups+hlsAcpAtptshtchhC.hSCGssLpLp.tsDsQ.sWFEHDphslsE+thtpCsalcPEc+E.phl.+LpphshsshslscstpWaCVhCppcY.GcKhCsuCtTGlYShssspp .........................hhhpsahA.DupG+hssA+pAtpt..shpchhC.h.CGssL....php.thDsp.PWFEHsp....sl.sE.+t..pCsalcPtc+Ehphl.....+LQphsss.shPVs...+htpWaCh.CccDY.G.E...+hCs+CtTGhaScths.................... 0 0 1 4 +11515 PF11683 DUF3278 Protein of unknown function (DUF3278) Pollington J anon Pfam-B_4766 (release 23.0) Family This bacterial family of proteins has no known function. 24.40 24.40 24.90 25.00 24.30 24.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.74 0.71 -4.15 9 428 2009-09-10 23:48:43 2008-09-17 17:07:30 3 2 313 0 30 252 1 124.50 34 72.54 CHANGED MK.KEThs-KlIKpFYGIoGPLDEaKRppss+lGNpsFIhLahhhlhuslluhhLutpaPp..lAhhh...shlhlhshhsusYlhaps++ptlsslEl-htsp.ptp+phhhtulKhGlaaslhhahlhsL ................MK..KEshopKLI.K+hYGIsGPLDEa++pEs.s+lGNpsFIhLFalhhhusLIsLl..L...uh..ca...sp..plAhhYs...llhllsh.lh..usYl.s.hph.+.+ptl...st...hE.....p...h...h.s.t.....Kpp+.p....h....+....h.....u.lphGlhaulhhahh...h............................... 0 2 12 17 +11516 PF11684 DUF3280 Protein of unknown function (DUF2380) Pollington J anon Pfam-B_4754 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 27.70 27.70 29.30 34.20 26.20 27.60 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.39 0.71 -3.98 26 121 2009-01-15 18:05:59 2008-09-18 09:19:21 3 3 79 0 53 129 9 129.80 30 79.71 CHANGED s..tplAlFshphlDoSspssh.Gspss-tuRLshlsctlRctlsppGtapllDluPss...tpls+hsshtpCsGCctchApcLGA-hulsGpVQKVSNLILshNlhl+Ds.coGchlputsVDIRGNTD-SWpRGhpYl...l+splh ................plAlhshphhDsotpst...s.pss-ttRLthhpstl+ptlsppG.apl..lsh.s.sht...tthtp.......s.....s....htpCsuCthphAcphGA-hslsGhVpKlSsLIlslslhltDs....coGchltstss-lRusoD-oWpRuhpal...lcp.h.......................... 0 15 30 36 +11517 PF11685 DUF3281 Protein of unknown function (DUF3281) Pollington J anon Pfam-B_4757 (release 23.0) Family This family of bacterial proteins has no known function. 21.10 21.10 25.00 23.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.81 0.70 -5.22 6 95 2009-01-15 18:05:59 2008-09-18 09:36:49 3 3 24 0 4 63 1 178.40 40 67.00 CHANGED KKKlLIGuslISSssLLuSCGKoETAsELRIVDpCNssNDLCcFELsDAlVSRYTNlLGKTIERVESQTPL+..cIpGTITWNsPAGAoLADNo-VpTcLGsuCQsDsCTANSNPTAYNLssGoNoISVSGhVTVsGKplDLAo-V.PslIDTttVtsS.HVFPTGsLPsGLTLpsLVssLNhsSc.....sAHGTFSusGSNL+ITCsoGYEWLDstsPsYG.phThucssRuVAhspW.p.....-psshhstptDshs.hTpNG.hsss..sshsWpAGCWs .........................................KcKlLIuhsllSusslLtuCuKoETsp-LRIV.pCsss......pD..........hCchEhsss.sppYss.LsKpl.h.psppslp..tlptolpW...tsuphhs.ssl.sphs..tCpsssCssstNPsAaph...Gpp.lo..V.GphTVssK.......phslspph...s.sh..h..o..hhF................................................................................................................................................................. 0 4 4 4 +11518 PF11686 DUF3283 Protein of unknown function (DUF3283) Pollington J anon Pfam-B_4778 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 26.00 35.80 24.90 24.30 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.77 0.72 -4.17 10 120 2009-01-15 18:05:59 2008-09-18 09:52:53 3 1 119 0 13 56 3 60.80 64 84.46 CHANGED NLSLLPssEKN+IELDKQASFhVW+lKpAKsGPEsItcphpKlsDssE+saFcQulEKYK+ NLuLLsAsEKN+lELDKQASaLVW+LKpAKuGPEcIscQhuKIusEuEKpWFQQSlEKYKR. 0 1 4 9 +11519 PF11687 DUF3284 Domain of unknown function (DUF3284) Pollington J anon Pfam-B_4781 (release 23.0) Domain This family of proteins with unknown function appears to be restricted to Firmicutes. 23.70 23.70 24.80 24.20 23.50 22.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.34 0.71 -4.37 15 343 2012-10-02 19:24:03 2008-09-18 10:54:25 3 1 293 0 39 178 1 119.40 28 84.05 CHANGED M.cIspp...LshsscchFsplhpSshhDlcptTG+clthtpL.sGhsYpKpasstt....puplc..IT..chttsphYphpTsos+ssapscYplpsls-spoclpasEphpspshhtphNshlsshlhuh .................................................McIspp...LplsucphasplhpSllhDl+psTG+p.l.ph.cpL.pGapY..hKph....up.st.....puplc....Is..chhtsphYphphsoscsp.hslpYplps.l.s-s.p.sclpYsEphpshs..hhp+hNshlhuhhhs..................... 0 11 25 28 +11520 PF11688 DUF3285 Protein of unknown function (DUF3285) Pollington J anon Pfam-B_4791 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 82.00 81.70 19.20 17.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.12 0.72 -4.34 13 68 2009-01-15 18:05:59 2008-09-18 11:37:32 3 1 68 0 29 68 90 45.00 62 71.33 CHANGED ssPsPSaVKLAMRNMVRKGupSLhHFuLTslGLLGhllhlAaLsR ...pPsPSaVKLAMRNMVRKGupSLhHFsLTslGLLGhLlslAaLsR. 0 5 19 27 +11522 PF11690 DUF3287 Protein of unknown function (DUF3287) Pollington J anon Pfam-B_4801 (release 23.0) Family This eukaryotic family of proteins has no known function. 27.00 27.00 27.70 27.70 24.60 24.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.42 0.71 -4.00 15 46 2009-01-15 18:05:59 2008-09-18 12:48:28 3 5 6 0 16 44 0 106.90 28 35.22 CHANGED sp-h.s............t.........sPtah.sscsFpPu.........sVhhPpslh...phssc-+ccshsFhcpVtspH+chscc+pcLthKhQsl......hERch+tLEScPtcW.chshsplAcIPchLh.alctpspss ......................ps.sss.............ss.a..sscsF.Pu.........slhhPpshh...phssc-pccshsFhcplsspH+chpcc+pcLthKhppl.......ERch+tLEucPhpW.hhshsphA.lP..hh.hh..tst.s................... 0 15 15 15 +11523 PF11691 DUF3288 Protein of unknown function (DUF3288) Pollington J anon Pfam-B_4815 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Cyanobacteria. 25.00 25.00 28.00 37.00 22.50 16.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.11 0.72 -4.25 18 73 2009-01-15 18:05:59 2008-09-18 13:06:01 3 1 73 0 29 74 110 93.60 43 93.36 CHANGED p-QsHPhappDRphVDpLLsps..P....s-.sLsELARLhIRYcsFPGAcDlppDL-+lLppWpLoc-cLap+TRplaucG............tVhsutssp-p-Dh ....-QpHP.appDRshVDpLLspc....P....s-hsLs-LARLhIRYpsFPGAc-IQpDL-+lLppWpL.Tc-cLFpKTRpIaupG............tVapstss.p-pps............ 0 5 19 27 +11524 PF11692 DUF3289 Protein of unknown function (DUF3289) Pollington J anon Pfam-B_4824 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Proteobacteria. 25.00 25.00 38.70 29.90 21.40 21.40 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.97 0.70 -5.13 6 334 2009-01-15 18:05:59 2008-09-18 16:00:26 3 6 270 0 35 192 3 246.80 54 93.64 CHANGED Muss.......LuFPhslFpTQ++hNDYuADDM+sGDLo-ppLppsFsLpcVSs+lD......PY...pLp+hssFs..p.pah....st+hchlT+pcCscILFDEhRshShsFShaGsY+pLIs+hIsHMQpssGusFpDhpLspAh+-hIhsDpopsSolttIKpIlsctlDacptlaP.phpsphthsls+.shLPKFsRhpDpFNGLGIoVHDlaATpIoIcSLplppcpYpAlV+YpsQDHFGLDcpDI.ph+F+sFpFFRlWFVLQRaNcFGaKPFMTNMcAslpIsGs ........................................................h.s...........lshPCTLFETlshFDDaSADDMQYGDMsEpDF.LSLGLSDISAKVDPY...RLIK.Y.c..h....uP...s.shuh.....ssoSGsKISppECsDILFuEMK-LAK.MF.SFF.G...pYKoLIc-LI-HFRYG....NG.ssF+.Sp.................pLNhu.F+E...+Ipp..h....s.h.N.......S..slp....lIK....csI.-ss.I...sps.s.s.....h.Y..p......s...h....hhpp..I+......s.pLL.....u....SRL.......sKF........N.......cF.cDs.FNGLGISVHDIsAp+IoLhShQ.h........Y......s.h.uao.ATlHFhuQDHFGLDsTDI....KN......ph......Ys.+a....RF.FRIWFhLQRa+c..F..A.........FKPFhTNhpsh......ct......................... 0 10 16 23 +11525 PF11693 DUF2990 Protein of unknown function (DUF2990) Pollington J anon Pfam-B_4826 (release 23.0) Family This family of proteins represents a fungal protein with unknown function. 21.10 21.10 21.40 22.00 20.70 21.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.96 0.72 -4.11 6 16 2009-09-10 22:19:26 2008-09-18 16:02:46 3 2 16 0 12 19 0 63.40 49 26.64 CHANGED lusLsAsShAAPshFDNhYDYSDDLAEFLGRVSKaI-su.KDlhsuosTCDTSpIALPAaASGLP ..usLsAsuhAAPs..hF-slYDaSs-LAEFhu+VSKaI-su.+-lhs..uosoCDsSKIuLPuaASGLP.... 0 1 3 9 +11526 PF11694 DUF3290 Protein of unknown function (DUF3290) Pollington J anon Pfam-B_4838 (release 23.0) Family This family of proteins with unknown function appears to be restricted to Firmicutes. 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.74 0.71 -4.47 10 692 2009-01-15 18:05:59 2008-09-18 16:08:16 3 2 519 0 48 304 4 107.30 33 97.65 CHANGED sFYoYsYlpoQtsppsahthlLhllhllshlhFshhYlRc+hsTKYRDLuIIulLhLlLhlGIQIssapsspsscsQspphlsFIcsVAcDhsVccs-VaVNoouhsDGhIV+Is...sphYplshss-sssYhLpKscLhs....chV.VcK ..........pFYshpYl.sp.s..pshlhhhhhhhhlhhhhhhhhhhhpt..pht.caR-l.hh..h..h..hhhphppap..pstpsp....pt.htFlc...lupp.tVs.pplhlNoot.t-G.hll+ls...p.aYch..ss-.p..pYhLpch.Lhp..p.lchl.................................................... 2 11 24 31 +11527 PF11695 DUF3291 Domain of unknown function (DUF3291) Pollington J anon Pfam-B_4872 (release 23.0) Domain This bacterial family of proteins has no known function. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.99 0.71 -4.76 34 150 2012-10-02 00:20:33 2008-09-18 16:51:27 3 2 130 0 59 154 71 131.50 34 84.44 CHANGED LAQlNlu+.ltu.shDsPpls-FhssL-plNuLA-puPGFVWRhp...s-susu...T.shpshsDPt.....hllNhSVW-sl-uLpsFlacoh.Htphh+RR+EWF.cphscsphVLWWV.PsGHp.PolsEAhcRLpaL+ppGsostAFsa+psa ....................................LAplNluh.hht.Ph-s..P.....ht-FhssLs.lNuhA-pusGFlWRhp...s-.s.ssu.........sslphh.s-st.....................hlsNhSVWcsh-uLpsFsapsh.Htphh+..R+p-WF.pph..t.csthshWWV.ssGch.P.ohpEAhpRLphLcppGsoshAFsapp.......................................... 0 17 43 53 +11528 PF11696 DUF3292 Protein of unknown function (DUF3292) Pollington J anon Pfam-B_4874 (release 23.0) Family This eukaryotic family of proteins has no known function. 23.70 23.70 24.70 23.70 22.30 23.60 hmmbuild -o /dev/null HMM SEED 642 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.75 0.70 -6.65 8 115 2009-01-15 18:05:59 2008-09-18 16:53:43 3 4 60 0 98 119 2 426.50 31 86.56 CHANGED GPTDSHsLSQl-s.-E..KGLAQKAGsos..cVoDlGWs.pSDpl-EclVsGLSNEDLWMLIRRFNKQIYaVKAVP-uPLQcLDLNRAEDEpFSPDKLRATLERFYTTVlVGLTsFhKHIARLRSWREPcRTssFCu......VYFlAWLLDhLlPTIFGhLlsLVlaPsoRpaLFPPAPlALVD..osTGGVQKPKAGVLGSHDSlTGAPEKYKGEAAEQEASNLlsSlAoVAVGSAAGKHDQGsP-s.....APMEuSVPDAhDlsu+uADAQuAAtG-sPs-sHDKTRcPMKcTVhNuANpuMRVlSDIsDsaERFuNALSPTPPFsusTPpLRLuGlLuuGhLsSLlTSSYVFlKhuTFhlGhuFFGDPllQRslsYLN+caPpWp+LL-LQNoLLKGVPTNAQLTLTLLRIGEuNuAPLPPPPSS.......SL+KsPopPAolsccplsL.sASppEIspAAAP-Ppsssscpcppsc..K...KKshGuRlluFF+GTTATGIEoKLAlDRsRAtAGSpHAKN+lGlL++KGccTL.PhGPlcFDARYKGKRGsVVIDSSpcPPlLYFTTD.oshLsDhRLEsRKKGoVLFsIPVTDIpEhRKIGGLGWKGKLlsGWAVG.oKEVVDGLVIsG.ccscQpYQLTAMpsRNpLFNRLVAIDGQVWESa .................................t....................................................................hh.sh.sp...ph........a.h...................s.tthDh..up..ppphs..spplRuplERhYhslh.thhthh..pclhRLhoW..p..-..RTthhhh...................sY.huWhhshlhshh.hhhhhLl.h.s...R.hhFPsss.uhhs....psu.shtps.ts.huspsshT..Gus..cphc.GEusEp...EA...pshltuh.sslsh......tshs.up..s.....t.s.....t.........................t...tt....t......t.............................................tscsc..h.........tth.........h.t.th.t..hphhtth.D.hE+hus.................hh.....................a..........h.h....hh...hh.h..hhhs..hhh+..shhhGhshFG.pPll.......hhthlsp.hspWpp.h.....hphpp..o....l.h+GlPTsuQLslTLLRlGEhstsPlP.sPP..............................................................t..t..........................s.tt.....httt.t.........t................t......t...............................t.t...phhthh+t.s..htts..hpsththsphht..h.s.....t........s+p.hluhl...................t..............................................GPhp.......F.s+hptppGhhhlst..t....t............s...l.a...............stt....................................................ts................aphslt-l.phpKht......G...hGh.pt+hh...sths...tp.......chhDGh.l.....p..tpth.......hsuh..RptlFNRLhuhst.ph.Wp.......................................... 0 30 49 83 +11529 PF11697 DUF3293 Protein of unknown function (DUF3293) Pollington J anon Pfam-B_4879 (release 23.0) Family This bacterial family of proteins has no known function. 25.00 25.00 26.50 25.30 24.20 24.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.37 0.72 -3.86 40 218 2009-01-15 18:05:59 2008-09-18 16:58:22 3 6 213 0 70 177 62 70.30 36 47.65 CHANGED ITAaNPhS.phhosppNphtsppLtpcL...tphshshh..sshG....tssst...sW.hEcSahl.slshp....pAhpLGpcFpQNAI .....ITAaNPtS.....phlSsp-NphpsppLtppL...hph.shs..hh..ps.s........sssph...pW.hEcSasV.s..hshs.......pAhpLupcFsQNAI.................... 0 17 37 57 +11530 PF11698 V-ATPase_H_C V-ATPase subunit H Mifsud W anon Pfam-B_2481 (release 6.5) Family The yeast Saccharomyces cerevisiae vacuolar H+-ATPase (V-ATPase) is a multisubunit complex responsible for acidifying organelles. It functions as an ATP dependent proton pump that transports protons across a lipid bilayer. This domain corresponds to the C terminal domain of the H subunit of V-ATPase. The N-terminal domain is required for the activation of the complex whereas the C-terminal domain is required for coupling ATP hydrolysis to proton translocation [3]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.44 0.71 -4.18 34 388 2012-10-11 20:01:02 2008-09-24 15:29:57 3 8 307 1 258 379 5 116.70 42 25.79 CHANGED +phooFDcYhuElpSGpLcW.SPsHps........ppFWpENsp+h.c-ssacll+pLsclL..............ppop..DshsLAVACpDlGchl+h.aPpG+sll...pphssKtclMpLhsc..sDppV+hpALhulQ+lhspsWc ................t.pLooFDEYssElpS.G+L.cW..SPsH+s........ppF..W+ENAt+h..p-csacll+hLsclL................psSp....DsplL.A..V.As..aDlGcaV+..............p.....a.Pc.G..+pll...-pLG...uKphVMpLhs..c..c..DppVRapALhAlQ+lhhpsap......................... 0 97 150 216 +11531 PF11699 CENP-C_C Mif2; Mif2/CENP-C like Mistry J, Wood V anon Manual Domain CENP-C_C is a C-terminal family of fungal and eukaryote proteins necessary for centromere formation. CENP-C is the inner-kinetochore centromere (CEN) binding protein. In the budding-yeast, Mif2, the yeast homologue, binds in the CDEIII region of the centromere, and has been shown to recruit a substantial subset of all inner and outer kinetochore proteins [1]. Mif2 adopts a cupin fold and is extremely similar both in polypeptide chain conformation and in dimer geometry to the dimerisation domain of a bacterial transcription factor [1]. The Mif2 dimer appears to be part of an enhanceosome-like structure that nucleates kinetochore assembly in budding yeast [1]. This C-terminal domain is the region via which CENP-C localises to centromeres throughout the cell cycle 2,3]. 29.00 29.00 29.00 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.83 0.72 -4.00 11 190 2012-10-10 13:59:34 2008-09-25 10:52:40 3 10 165 2 133 209 11 84.50 36 12.58 CHANGED F+htKhhsp.s.FhuoGhlcLPs.uu.KpspsS+csphlFaVhpGtlpVolpcspFhhupGshFQVPpGNpYsltNht.sc-ApLFFsQ ..................h+htKhhsp.s.FhuuG.hlcLPP.sutKp.ps.oppsp.h.lFaVhpGplpVTl.........pc.....spFh.lspGshF.VPp.GN...hYslpNhh.spputlaFsQ............. 0 37 73 106 +11532 PF11700 ATG22 Vacuole effluxer Atg22 like Wood V, Mistry J anon Pfam-B_14077 (release 22.0) Family Autophagy is a major survival survival mechanism in which eukaryotes recycle cellular nutrients during stress conditions. Atg22, Avt3 and Avt4 are partially redundant vacuolar effluxes, which mediate the efflux of leucine and other amino acids resulting from autophagy [1]. This family also includes other transporter proteins. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 477 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.52 0.70 -6.02 50 1392 2012-10-03 03:33:39 2008-09-25 16:14:42 3 4 1200 0 527 2826 2253 417.40 25 93.84 CHANGED so++ElhuWYhYsauspsaslsshuoalPhhlpphucp.tGh.hssp..sC.........................................................tsspCllhhhG..hplsssSashYshulSshlQullhlsluuhAD.....ausaRKplLlsFuhlGuhsshhahhl.ssppa....hluulLhllus.ssa....us.shVhhNSaLPhLscspsphptttt..............................................tt.......t..t..h........................................................oplSuhGhulGaluullltllslhllhshpss...........shsl......plslhlsGhWWhlaolPshhhL+s+su.......................tthpshhsh..lhhua+pLhpsh..+psppL+pshlFLsuWFlluDulsTlsususlaupspLshssss.....LshlsllshhsuslGuhhas.hlsc+hslpsp.........psllshlhhhtllslaG........hlGhh.t......hhGhppsWE...........hashushYGlshGulpShsRShaupllP......GpE..utFFuLauls-KuSShlGPhllGhIhDtT..G...................slRsu..........FhFLslLhllslslh.hhlsl-+G+c-ut ...........................................................................................................................................................................................t......huWhhaDauspsa.shhl.h.o.h.h...h.shah.t.s.h.s.......................................................................................................s.....h.ps.s.u..hh...a.shu....luslllul.luPllGshAD..........ht.uh.++...h...hl.h..h...h....s...h.l..s...s..h...ss..h.h...h.....h....hl.....s..ssth...................hhsl..sl...h....h..l..u....t...luh........ss..u.l..hYs....uh.L..splsssc..ph........................................................................................................................................................................................s+lSuh.Ga.uhGY...l.Gu.hls.....h.l..l.s..l.h...h.h.h.t.spts.............................t.shs.h..............R.ls.h.hh.sAlWa...h.l..F.s.lPh..hh..hl..cp.t..........................................................................................h.t.h.tph.....lt.tu.a.p...c...L.....h...p.......o.....l.......p..p...l..p....c...h..+..s........l....h...h....F..L..l.A...h..hh...a..p..DGl...s...s....l..hs....hu.s.ha.u.t.........t..t..h..........G.hs...ssp...............lll.h...h.l.l.s...p....l..s..A...h......h.uul....l...hG....hl.sc...+.h.Gs..+................ph.l....h..h...s.l....h..hh..h.h.l.s..h.h....u..............h.h..............................h.p..s.s.ht.....................F.a.l.h.u..h...l...l....G.h.h........h..G..s...h.Q...u...h...S...R...Shhu...cl.hP..........spp.......upaFGh.Ys.ls.G..Ks.suhlGPhl..h...uhh.sp..ho.....G.............................s.h..p.....h.u....................hh..s...l..h.l..l.h.l..l..Ghhlh...hhl...........t................................................................................................................................ 0 195 351 455 +11533 PF11701 UNC45-central Myosin-binding striated muscle assembly central Wood V, Coggill P anon manual Family The UNC-45 or small muscle protein 1 of C.elegans is expressed in two forms from different genomic positions in mammals, as a general tissue protein UNC-45a and a specific form Unc-45b expressed only in striated and skeletal muscle. All members carry up to three amino-terminal tetratricopeptide repeat (TPR) domains towards their N-terminal, a UCS domain at the C-terminal that contains a number of Arm repeats Pfam:PF00514 and this central region of approximately 400 residues. Both the general form and the muscle form of UNC-45 function in myotube formation through cell fusion. Myofibril formation requires both GC and SM UNC-45, consistent with the fact that the cytoskeleton is necessary for the development and maintenance of organised myofibrils [1]. The S. pombe Rng3p, is crucial for cell shape, normal actin cytoskeleton, and contractile ring assembly, and is essential for assembly of the myosin II-containing progenitors of the contractile ring. Widespread defects in the cytoskeleton are found in null mutants of all three fungal proteins [2]. Mammalian Unc45 is found to act as a specific chaperone during the folding of myosin and the assembly of striated muscle by forming a stable complex with the general chaperone Hsp90. The exact function of this central region is not known [3]. 25.00 25.00 25.10 29.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.77 0.71 -4.58 31 280 2009-01-15 18:05:59 2008-09-26 10:22:29 3 10 207 3 188 266 1 194.20 28 23.10 CHANGED sspplcslLppLs.......................................................................................phstssRshAslhhs+h...L...csppcshpchhscalpshl.....pcsch-..................................shlhshsslsslaPsss-lsuslhhsc.....shhthlhshss............tcscphphssLELLsuAChscssRphlscphhphLcphhppspppt..........................hphhAslsLsKl .............................................................................................................t..cplchllspLh...........................................................................................h.lssssRhhAolhls+l...................L.....-st+-p...a...pchhpcalpsph.....ppts.h-..................................splpAhpsloslh.ussDlusplhttc.....Glh.ph.llsLss............scpcp.phsslEhLhtAssctsptshIhppslslLcclhppscs-p...........................l+lhAhVuLsKL.................. 0 46 82 138 +11534 PF11702 DUF3295 Protein of unknown function (DUF3295) Wood V, Coggill P anon Wood V, Chahwan C Family This family is conserved in fungi but the function is not known. 21.40 21.40 23.90 23.90 21.30 21.30 hmmbuild -o /dev/null HMM SEED 507 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -12.92 0.70 -5.51 9 124 2009-09-14 14:02:36 2008-09-29 10:57:28 3 5 88 0 107 121 0 354.50 33 70.17 CHANGED pptuuDlPELSuSV-SssS-pApchppphcp.......................Phs.th+PslhRpDSh.uShuRG+E+HlTShsLE+MVloIp..EKKsLEPLusshssluP....ht..sppspPp.ssSpoospc.s...........p.hpsoESsooousssNsS-.uu..slssusTSsouothhsp.......sSllRGF..SPS.lSoShRSpsphussPsPs.......psusshK.t..KKKtuhFTLGGSSGs-.EsSF-DRh...............shpss++poL..............Ss-hp+usssp................KKssoFccpV.sh+shp-ps.-sD.sthps---.VsESAI..-DD-DSDW.EDSlpESGpuSl--+phFQRVDS+PN.LsS..RRSLLThMhHQss+h....s.sSRSosALpRSR.ToPNGPSlsASPs-sDEtsLhM.RG...........splPRScshlh+sos.pSsuH....SPRTTRRNMLATELTESLRRpLLWERpQKuAT..ANAhLKRRHTupD.hANLppYPp......................sssscDK......Dst..ssSaN+Ys.chG.hpYHspGW ..............................................................................................t......hP..StShtS..s............................................c..h.t.............phhss.thtphh..l......pt........h.................................................................................................................................................................................................................t....p.p...hasl.GsS.ss..t.........t......................................................ott..tps...............................................h.sshtp..p............pshpt......p............p..spp-.l..s...ESAI..t...-D-....ss-W..EDShp.-SGpuSh--c..hFpRV.-S....+s..s.LsS.............R.SLlT.hhht.psp...+............s.sSpSosAl.p.s.R.....sssshssSP....c..s--...ssLhM..+u.................chP+Sps..hhshs....tthtt...........SPRTTRRNMLATELTESLRRpLLWERQQK..sss.....ss...AhhKRRHTupD.hssLppaPt..................................s.ht.tsp......p........thsphh...p...s..tYHspGW........................................................................... 0 21 50 91 +11535 PF11703 UPF0506 UPF0506 Bateman A, Ranganathan S anon Ranganathan S Domain This uncharacterised family is found in Schistosoma genomes. Although uncharacterised it appears to belong to the knottin fold. The sequence is composed of two repeats of a 6 cysteine motif. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.82 0.72 -4.01 11 31 2012-10-01 22:06:18 2008-09-30 15:03:58 3 2 6 0 18 81 0 56.50 41 53.64 CHANGED CRo.GQhCSRThF+RCCGNLVCQL+GaFNGsCVpCLAEt+FChhSSECCScRCRhFRCR ...........C+phGp.Cs+ThF.p+CCG.shlCpLpu.FpGpCV..p........CLstsphChpss-CCSt+Cphh+C+....... 0 17 18 18 +11536 PF11704 Folliculin Vesicle coat protein involved in Golgi to plasma membrane transport Wood V, Coggill P anon Pfam-B_3276 (release 23.0), ADDA_17305 Family In yeast cells this family functions in the regulated delivery of Gap1p (a general amino acid permease) to the cell surface, perhaps as a component of a post-Golgi secretory-vesicle coat complex [1]. Birt-Hogg-Dube (BHD)4 syndrome is an autosomal dominant disorder characterised by hamartomas of skin follicles, lung cysts, spontaneous pneumothorax, and renal cell carcinoma. Folliculin is the protein from the BHD4 gene and is found to have no significant homology to any other human proteins. It is expressed in most tissues. These same symptoms also occur in TSC or tuberous sclerosis complex, suggesting that the same pathway is involved, and it is likely that the target is the down-stream Tor2 - an essential gene. Folliculin appears to bind Tor2, and down-regulation of Tor2 activity leads to up-regulation of nitrogen responsive genes including membrane transporters and amino acid permeases [2]. 21.10 21.10 22.40 21.40 20.70 20.70 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.94 0.71 -4.67 20 292 2009-01-15 18:05:59 2008-10-02 11:05:11 3 3 213 0 195 274 0 170.90 24 32.06 CHANGED stsaVSopaPssspcYshLpphsh+oLSsEs..............shsusPlhFGDshcGasluhsFKlpDspARGscR+Yullllscsc........hp.LhpsWsalsptFsclIshIpppspp...tppptpppppsspps...................................sth.ss..........hhRtpshpps+SLsELTsD-tIFl+lHtahshlLpslst ....................................h...alShp.s..st.athlpp.shR........sLStEh.............................................tsscsuslhFGDs.ppG.a.shuatFplhD.pARGhpRhYullhlsscc.........t.lhppashlhsthp.c...lhpplpstsc+hh..tpppttppp.ts.p.s...........................................................t...s..............p........t.....t.........tsRsLsplssp..hahplH..hs...lL.th.s........................................................................ 0 45 80 143 +11537 PF11705 RNA_pol_3_Rpc31 DNA-directed RNA polymerase III subunit Rpc31 Wood V, Coggill P anon Pfam-B_203281 (release 23.0) Family RNA polymerase III contains seventeen subunits in yeasts and in human cells. Twelve of these are akin to RNA polymerase I or II and the other five are RNA pol III-specific, and form the functionally distinct groups (i) Rpc31-Rpc34-Rpc82, and (ii) Rpc37-Rpc53. Rpc31, Rpc34 and Rpc82 form a cluster of enzyme-specific subunits that contribute to transcription initiation in S.cerevisiae and H.sapiens. There is evidence that these subunits are anchored at or near the N-terminal Zn-fold of Rpc1, itself prolonged by a highly conserved but RNA polymerase III-specific domain [1]. 28.90 28.90 28.90 28.90 27.40 28.60 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.97 0.70 -4.42 17 327 2009-01-15 18:05:59 2008-10-02 11:50:45 3 7 238 0 222 301 0 213.80 25 92.96 CHANGED MS.......hRsuutsshs...........shsslsashs..tth..stssss.aPph.slPls..t.ssscEchhsthhhsatst.h+-uPaYTssh..t...............t.tt.............t...pcGlpRYSDKYppp++hssolcp..cP.aphchFPcELatshthsppt.pth.......s+h+pcssl.ph.......p.D.s.thphLE.Khpp..sp-.-ctpptct...........---cp-E-.D-Da....D---D..sDY.AEpYFDsG-t......DDhsD...................................tssa ....................................................................................................Mu........RGtu.tut.hsh................shptls.h.sts...t...................sh...pP.......s....slFP.........sh...hP.ls.....h.hsp.p-chhlthttphppt.h.+puPaahtst...t.....................................................................ppslcRYSc.+.Y.pt..t.p............t.....ph.ph........ps..hs..h..chhPp.ELhtphph...pp.p.t.........t.h..h....................sphttp..sh..t.............................t-....p..ch...p...t....L..EtKt..p.p...t.s.....p.-p-ptpptpp......................cccpc--...t..t---h.....-.--.p.-..tsDY.hppYFD.sG-s......D.D..-..............ts.................................. 0 59 105 164 +11538 PF11706 zf-CGNR CGNR zinc finger Bateman A anon Pfam-B_19432 (release 10.0) Domain This family consists of a C-terminal zinc finger domain. It seems likely to be DNA-binding given the conservation of many positively charged residues. The domain is named after a highly conserved motif found in many members of the family. 20.80 20.80 21.10 20.90 20.50 20.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.56 0.72 -4.45 80 961 2009-01-15 18:05:59 2008-10-03 09:25:53 3 5 409 1 366 972 15 43.60 41 22.67 CHANGED Rl+tC...ssssCshhFlDpS+sssRRWCSM.......spCGNRsKsspaRtRp ............RL+tC.....ts.ssCphhFlDpS+stpRRWCoh.......psCGNRt+sttaRsRp........ 0 143 279 331 +11539 PF11707 Npa1 Ribosome 60S biogenesis N-terminal Wood V, Coggill P anon Pfam-B_2493 (release 23.0) Family Npa1p is required for ribosome biogenesis and operates in the same functional environment as Rsa3p and Dbp6p during early maturation of 60S ribosomal subunits [1]. The protein partners of Npa1p include eight putative helicases as well as the novel Npa2p factor. Npa1p can also associate with a subset of H/ACA and C/D small nucleolar RNPs (snoRNPs) involved in the chemical modification of residues in the vicinity of the peptidyl transferase centre [2]. The protein has also been referred to as Urb1, and this domain at the N-terminal is one of several conserved regions along the length. 22.10 22.10 22.40 23.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.91 0.70 -5.44 42 265 2009-01-15 18:05:59 2008-10-03 11:36:46 3 8 231 0 187 273 0 302.70 22 18.51 CHANGED sths-lhpsWshhspsss..shhshlsplLsthlthlssphp....hpphG.sl................s+hlLpp..c......hchl.h+uLsutc.............plhp..ssLcLLsplVp.suuthAcclhssh..ch..shphh.phh..pppphtp.ptt..............olRpshlcFhLuhLphs.ssth+pclLpp+ch.hsshhctlpp.DshpslhplLpslcscllp-p.sls+stKsclasphsLp+lss.LYthcsp........................slschs+chLhtlsTssphGlhhssstha.tt......................................h.ppphplpNphL.hpllp.tL+PtpsshptpL.......llpIlcssPELlusYhsch.tphp....P+hossWhuts ......................................................h....phhphaphhtt.tpp.....h.hl.thLthhlhhhssphp......ht.hu.tl....................s+hllpp.c......hchl.csLsupp...............phhp...ssLpLLsth...Vs.pusthAcplhpph..ch..shpslsphhphpctphtt....................................slRpshlpFhLuhLth.....s..ssthhpplLpp...+ph...hsshhct.lpp.Dshphlh.lLpsLcppll.............p..........s.............p.sls+..stKsphFspt..sL.pplss..LYshpst...................................................................t..tltchsHphLhtlC...o...s.p.pGlhh.ss....................................................t.tp.th.tht.NthL.hphlp...hl.+.shp...ss.hptcL.......llpIhcssP-LlssYhpph.hpht....P+ho.stWhs........................ 0 56 98 151 +11540 PF11708 Slu7 Pre-mRNA splicing Prp18-interacting factor Wood V, Coggill P anon Pfam-B_999 (release 23.0) Domain The spliceosome, an assembly of snRNAs (U1, U2, U4/U6, and U5) and proteins, catalyses the excision of introns from pre-mRNAs in two successive trans-esterification reactions. Step 2 depends upon integral spliceosome constituents such as U5 snRNA and Prp8 and non-spliceosomal proteins Prp16, Slu7, Prp18, and Prp22. ATP hydrolysis by the DEAH-box enzyme Prp16 promotes a conformational change in the spliceosome that leads to protection of the 3'ss from targeted RNase H cleavage. This change, which probably reflects binding of the 3'ss PyAG in the catalytic centre of the spliceosome, requires the ordered recruitment of Slu7, Prp18, and Prp22 to the spliceosome. There is a close functional relationship between Prp8, Prp18, and Slu7, and Prp18 interacts with Slu7, so that together they recruit Prp22 to the spliceosome. Most members of the family carry a zinc-finger of the CCHC-type upstream of this domain. 22.90 22.90 24.80 23.10 21.90 21.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.99 0.70 -4.61 41 344 2009-01-15 18:05:59 2008-10-03 11:54:42 3 8 276 0 246 345 2 231.90 39 50.39 CHANGED phsaDuKRDRWsGYDsspacpVl-ca-ph-pt+cphptpp.................................sctttcpspst-ptpthhccs-h...................spppps..ssRsLRlREDTAKYLhNLcssSA.......hYDPKoRsh+-ss........t.ssthhts-sFlR.toG-....utchpchptaAWEstc+Gs...............................slclpAsPTphElhtKchppc.+....-pt+ppp+pslhcKYGspp...h.t..hp.hlspoEphhchpt.......httpths+SpatEDlhhpsHspVWGS .......................hsaDuKRDRWNGYDs.p.-a.p.p.ll.--.Ypch-tt++ph+tpp.......................................................................................sptpt-p.spps-p.cpphs-c.-hs...........................phssppphssRNLRIR.EDsAKYLhNLDs.sSA...........hYDPKTRuM+-ss..h..............tts.sts.asu-NFhR.hoG-....shphtphQhaAWEu.c+Gs..........................................................................clHlpAsPTphEhLhKphc.c+.......-ph+pp....p+psll-KYGupc...thp.s.s.pc.....h...hl.spoEpaVEYsctGt.....................lKGt.+thspSKYtEDlhhsNHTslWGS.................................. 0 88 139 206 +11541 PF11709 Mit_ribos_Mrp51 Mitochondrial ribosomal protein subunit Wood V, Coggill P anon Pfam-B_4972 (release 23.0) Family This family is the mitochondrial ribosomal small-subunit protein Mrp51. Its function is not entirely clear, but deletion of the MRP51 gene completely blocked mitochondrial gene expression. 21.80 21.80 22.10 21.80 20.10 19.80 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.85 0.70 -5.21 20 145 2009-01-15 18:05:59 2008-10-03 12:51:24 3 4 133 0 108 143 0 327.90 26 79.88 CHANGED Mutt.......husLLRsSRlAplspsh.sh.psstp.............hPTHQlItT.sSShtR.u-WGLKpsLPs+hso.....palphN-lDThcclsDaEssu.saphsph+FQEhGlslsts................sppsPlFpstsspo..sppsshpshhs...........ph.hhsshosuchpchLpp....lssh+scFppaLhcptsct.................................................hsspsltchltcFLs..............................tsspphsspusschpuouGLSYs.pGpL.........psoPNGVptphlsPGRhl.....psscp.hsAlGGhVAsssss..................tsshhpthsschsRptph.aclpcAplsp.sGplhltssss .....................................s.......p.hupLLRpSRlhtlspsh...ststtp................aPh+QsItT.sSohtR.u-WGLKpslPtK.ss...............shlpls.p.lDThEpl.T-Fcssu.spthsht+aQEhslPlshsh..................................................................................spp.pshFcsp.css...sts.shts.ht..................................................................phsh...hsshots-hpchLp+....l+ph+scFpphLhcphspp.....hp....................................................................tssttltsh.....lhcFLsh.s.................................................tsps.hs..ppssspspsouGLSY.pssth.................ppp.....psl...s+hl......hPtphh......tttpt..hhuluGhlsps..t........................t.stht..th.ssh..tttph.hps.pAplsp.pGpl.ltst.s.................................................................................................... 1 29 59 91 +11542 PF11710 Git3 G protein-coupled glucose receptor regulating Gpa2 Wood V, Coggill P anon Pfam-B_11657 (release 23.0) Domain Git3 is one of six proteins required for glucose-triggered adenylate cyclase activation, and is a G protein-coupled receptor responsible for the activation of adenylate cyclase through Gpa2 - heterotrimeric G protein alpha subunit, part of the glucose-detection pathway. Git3 contains seven predicted transmembrane domains, a third cytoplasmic loop and a cytoplasmic tail [1]. This is the conserved N-terminus of these proteins, and the C-terminal conserved region is now in family Git3_C. 20.60 20.60 20.60 20.60 20.50 20.40 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.37 0.71 -4.76 19 259 2012-10-03 04:04:29 2008-10-03 13:01:14 3 4 124 0 213 414 3 195.20 24 38.37 CHANGED sht.RlluIsuSshSlhusllulYhhhsIs.RR+.............lFRHp............LIhhLlhhDhlKAllLllYPshlLs...psssYssssFCpllGFFTAhuIEGu.................DlAIlsFAlHhulLIF+Ps............pssphEGGLY+aRhalYslsh.llPlllASLAFIs.............................tsGYpshssWCYLPscPhWYRLVLSWlPRYhIhlsIhsIYhsIYhaVp+chcpl ............................................................................................................hh.......l..shu..shu.lh.u...h..h..h.s...h...hh..h.h.h.h.....p.h.h...................haR.+...p.............................................................LIhs....Ll.lu...Dhhpulsh.l.l........sh..hhlt....ps..t..l..h..s..s...s..s..h.C.hpGahhp...hu.s....uo.................s.hhllsIAlH.T.a.lhlhps.......................ht.h....p....hh.l....as.h.sh....hl......lh..hs..s.L.s..h.ls.............................ttts..Y...s..s.s.Gs.WC..Wl..ss.....c....h...................h....h...RLh..lpal...h.alh..hhs.hl.hl.Yshl.ahhl++pht..h................................................................................... 3 53 120 185 +11543 PF11711 Tim54 Inner membrane protein import complex subunit Tim54 Wood V, Coggill P anon Pfam-B_3533 (release 23.0) Family Mitochondrial function depends on the import of hundreds of different proteins synthesised in the cytosol. Protein import is a multi-step pathway which includes the binding of precursor proteins to surface receptors, translocation of the precursor across one or both mitochondrial membranes, and folding and assembly of the imported protein inside the mitochondrion. Most precursor proteins carry amino-terminal targeting signals, called pre-sequences, and are imported into mitochondria via import complexes located in both the outer and the inner membrane (IM). The IM complex, TIM, is made up of at least two proteins which mediate translocation of proteins into the matrix by removing their signal peptide and another pair of proteins, Tim54 and Tim22, that insert the polytopic proteins, that carry internal targetting information, into the inner membrane [1]. 32.50 32.50 32.70 33.40 31.70 32.00 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.24 0.70 -5.87 17 146 2009-09-10 21:07:40 2008-10-03 14:14:01 3 5 135 0 114 155 0 370.20 34 85.84 CHANGED NPAL+hhGl.Ps....h..+LPSRNWMIFaolsuuhsuulhYD+tEp+chppKWschVc+lucEthsssp....hsRKlTVFlAsPPs.DhLcsuhchF+cYVKPlLsAAuLDY-llptcRpG-IRAtlAERIRclRRcth......................................tspst.sh-c..sppccslhshRphhGlh.....................-.ssstGslsIGRtsaKEYlpGLHEGhLGPL-.P.........................tspsttssst...pss.............tt.t.c..tssp...ptp......-s.-ccpp...........ssstP.aIsss-YssssLsPph..................P.hh.pPshslPhPpllGFLNhPh+lYRFhs+RhhA--huccsAulV.ss.hRPap.ts.................................tphhpcEEp-W.Kp............V+ptcE.........KcpEWlp............-lVhDsRlssRM+hap ....................................................................NPAh+..hhGl.Ps.......h..+LPS....RNW..hIFholsuuhsuulhYD++cp++hpp+atchVtplucEsl.ss.sp............hPRKlTValusPPu.D.t.LcsuhcaF+cYVKPlLsAuuLDaEllpuc+pG-lRstlAp+IRchR+phtt..............................................................-.....ptpp....h.phR.ph.hGlh.....................................-..hstGslslGRtsaKEYlpGlHEGh..LGPLp.P............................................................t.s.t..............................................tt.t.p........t...tp....p.tt....pt.pccpp....................p.pp.s.stP..alpsspYsst.pls.sph..............................................................P..h...pPshslshPpllGFhs.hPhRlaRF.hs+RhhA-plucpssslVhss...hRPaptsst..............................................................................................................................................................tphhp.pEEp-.W.Ks..............Vcptp-............................+pppWhp..............lshDsRlsp+Mpha............................................................................................................................ 0 32 64 97 +11544 PF11712 Vma12 Endoplasmic reticulum-based factor for assembly of V-ATPase Wood V, Coggill P anon Pfam-B_2410 (release 23.0) Family The yeast vacuolar proton-translocating ATPase (V-ATPase) is the best characterised member of the V-ATPase family. A total of thirteen genes are required for encoding the subunits of the enzyme complex itself and an additional three for providing factors necessary for the assembly of the whole. Vma12 is one of these latter, all three of which are localised to the endoplasmic reticulum [1]. 25.50 25.50 25.90 25.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.75 0.71 -4.48 35 291 2009-01-15 18:05:59 2008-10-03 15:33:25 3 9 252 0 209 275 2 136.80 23 54.40 CHANGED ppLLcuo..cl.alPs.s.psc.............................................osEahApht+L+tttcpccYp+hh........t................................st.t...sp.stthcpl+pQlssllNlllSVlusshulahhsp..h.............................shshssRlLLulhsullVhlAEVslYst.....Ylp+lccA+ ..................................................................pLLcsocl.hlP.........s.c.............................................ssEhhAphp+L+tptppccYpchs+ssss.pp.........................................p...sp..s.pphcsl..+p...plhhlhNhllolhushhssahhsp.h.h................................hshssRlllulhsuhlVhlAElhlhst.hhpp.t................................... 0 66 110 171 +11545 PF11713 Peptidase_C80 Peptidase C80 family Rawlings ND, Bateman A anon Rawlings ND Domain This family belongs to cysteine peptidase family C80. 19.70 19.70 19.70 20.60 19.50 19.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.92 0.71 -4.42 23 303 2012-10-03 02:24:44 2008-10-03 15:44:17 3 64 136 14 38 286 6 155.10 33 7.28 CHANGED spastplIlQL..................psDslstcAAt....sLhuK+PssSlllQhss-......Gsh+slaus...............sstL.....sGpl+hQlVGHGcs....t.ssspLuGhsscpLAstl.....pphppthsssh......pPc..+IoLVGCshsus......ptpsuaspchhptlpsp.Gl........pssVSAppucVpV............-tpG+Kcsl ............................+achQlIlQh..................psDslst+.AAs....sLAuKHsp..sSlllQhDs-..........usaRlhYGs.....................sspL......sGK.lRWQlVGHG+s.........ptspspL.u.Ghss-pLAscL.....pphppttphss......pPc..+I..slVGCulsus..........spppuFupphhpthptp.uh........phslsu.ptcltl............stt.G++.h.h................................................................ 0 18 28 35 +11546 PF11714 Inhibitor_I53 Thrombin inhibitor Madanin Rawlings ND, Bateman A anon Rawlings ND Domain Members of this family are the peptidase inhibitor madanin proteins. These proteins were isolated from tick saliva [1]. 25.00 25.00 26.30 26.10 19.10 18.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.75 0.72 -4.00 3 5 2009-01-15 18:05:59 2008-10-03 16:12:31 3 1 2 0 0 5 0 78.20 59 98.99 CHANGED MKHFAILILAVVASAVVMAYPERDSAK.DGNQEQERALhVKVQERoDs.DADYDEYDEDGTTPTPDPTAPTARPRLRGNQ MKHFAILILAVVASAVVMAYPERDSAK.DGNQEKERALhVKVQERs-G.DADYDEYDpDuT..TPTPDPTAPTARPRLRtNQ.... 0 0 0 0 +11547 PF11715 Nup160 Nucleoporin Nup120/160 Wood V, Coggill P anon Pfam-B_1841 (release 23.0) Family Nup120 is conserved from fungi to plants to humans, and is homologous with the Nup160 of vertebrates. The nuclear core complex, or NPC, mediates macromolecular transport across the nuclear envelope. Deletion of the NUP120 gene causes clustering of NPCs at one side of the nuclear envelope, moderate nucleolar fragmentation and slower cell growth [1]. The vertebrate NPC is estimated to contain between 30 and 60 different proteins. most of which are not known. Two important ones in creating the nucleoporin basket are Nup98 and Nup153, and Nup120, in conjunction with Nup 133, interacts with these two and itself plays a role in mRNA export [2]. Nup160, Nup133, Nup96, and Nup107 are all targets of phosphorylation. The phosphorylation sites are clustered mainly at the N-terminal regions of these proteins, which are predicted to be natively disordered. The entire Nup107-160 subcomplex is stable throughout the cell cycle, thus it seems unlikely that phosphorylation affects interactions within the Nup107-160 subcomplex, but rather that it regulates the association of the subcomplex with the NPC and other proteins [2]. 19.20 19.20 19.20 19.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 547 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.63 0.70 -6.38 25 339 2012-10-05 17:30:43 2008-10-03 16:27:08 3 28 235 9 234 658 13 407.00 17 36.15 CHANGED lslss....................tstsst.pssspsEcshtpchhsosu.....hhppshsRhlhWRhh.....ssscsLplpslslspsh.p..........sl+lpFspslh...ssslshs-ppsp......slhlhshssohplasLpltsshapp...............sshs-pshp-hspphsssshshptsthhsss........sssplhluhssGullclp+ps......pustapphphsstsh.ttlpshh.h........tttpht.ssssuhshss..htspsalh...slshcppL+lWslsotph....h..h............................st....tp.hphhhsssp.....sshhplh.....h.p.tpsstphhslhhus.psupFthaplp.ss....sss.hhps.hst........t.........slhshphp.hchs.sshclWsLW.cssssh.lpplphs.p................................tshpsshs.pssss.thhps.t..sDst.-palchlhpsu+aotpsl......cpALphapcsht.hpsps.s........sLpcclssslsuplphct..pt.h..........s.ppatptssppWp+FathstchccptpcPluLsl.sh....sss......shllpsssluhl..RsssshEpl .....................................................................................................h.....................................................................h.hp..........tphltl....shtht..t........................................lphph.ts.h.....sh.h....tp.ttt...................hh.lhhhs.ps....h.hhhh.h....th..t.....................................th...t..p..p......shsh..s..hhh....................................s.t...hhls..h.t.....sGslhhl.p..h.......................s.hh......hhptt.h...t.lp..shh.h.........................tt....shshshsh..p..........p.p.s............a.........lh..............sls.sppL+hWshpstph...............................................................................................h.s..t....................t...h.h..........................................hh.hhh.h........t...tu.p.hhhhth..s................t..............................sl.t..t...t.....hp..........t............p.t.....p.....lasLW..p...s.pst...h..l..h.hph...t.p...................................................................................t.................t.............cs..phahp.lhts.sh..as.thl....................tpuL....hap.p...........................................ltpplh.slttph..p.p.............................tth.t...t.W.pahthh.php.thtts.h..ul.h..............th........................................hllhtt..huhh..h..t.h-................................................................................................................................................ 1 67 116 193 +11548 PF11716 MDMPI_N Mycothiol maleylpyruvate isomerase N-terminal domain Bateman A anon Bateman A Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.85 0.71 -3.75 100 2030 2012-10-02 14:44:17 2008-10-08 12:55:26 3 16 384 2 776 2315 367 128.60 18 55.18 CHANGED ltsttpplsshlssLs.csp..hssPos.ssGWTltclluH...lststphhsth.............................h....ts...sh.t...........................stthpttsstssssllsthcpstsphtsshtshsspthhhhh....hssts......hhttphhEshlHthDls ..........................................................h.....tthsshlssls..ss..p.......h...s.s...P.....os..s...s.G..........W..........o..........lpcllsH.....ls.t.s...t.....p..hht..s.h..........................................h...........ts.............................................................................ttthp..t..t..s..s..h..s.s.s..p..l...h......st..hp.p...s..h..t...t...h.......h..t.t....h.....tt..h...........s...t..t.....................h.............s..h.........s..........hhhhhhh-hhhHt.Dl.................................................................................................................................. 0 295 606 732 +11549 PF11717 Tudor-knot RNA binding activity-knot of a chromodomain Wood V, Coggill P anon Pfam-B_4165 (release 22.0) Family This is a novel knotted tudor domain which is required for binding to RNA. The know influences the loop conformation of the helical turn Ht2 - residues 61-6 3- that is located at the side opposite the knot in the tudor domain-chromodomain; stabilisation of Ht2 is essential for RNA binding [1]. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.87 0.72 -4.26 56 1173 2012-10-02 16:56:36 2008-10-08 14:59:09 3 25 331 25 766 1153 19 57.70 30 10.62 CHANGED ltlGs....+lhsp+..............csph+hAcIlpt+h.....ppsp.....................hcYYVHYhsaN+RLDEWVstcRlsh .............................................................................h...Gp..+lhshc...........sshhacAcllp.hc..........ppsp.....................................................hpYa.VHYh.GaN.+R..........hDEWVstsRl..h...... 0 255 376 601 +11550 PF11718 CPSF73-100_C Pre-mRNA 3'-end-processing endonuclease polyadenylation factor C-term Wood V, Coggill P anon Pfam-B_2254 (release 23.0) Domain This is the C-terminal conserved region of the pre-mRNA 3'-end-processing of the polyadenylation factor CPSF-73/CPSF-100 proteins. The exact function of this domain is not known. 19.90 19.90 21.50 21.50 18.80 17.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.29 0.70 -4.98 35 365 2009-01-15 18:05:59 2008-10-08 15:00:35 3 12 290 0 246 354 0 216.70 26 31.03 CHANGED cGp..hl...SGlLVpps..FshpLluPsDL+E.YosLoToslsp+Qslplsus.h-LlpatLcphF.G....tl-.lsccc......................................thlVMsslslhhcppp........lplEWpu.NhhNDslADoVhAlLlsl-SSPAuVKtouppp..p.............................ptp.s.ppphcplhhhLcspFGss.hs.hppsc.....................hhlphspppuslsh......pshss.-sp.......scsLcpclpshlpRhpphstPl .....................................................................................................sp.hlSG.l.L.Vpcs..Fshp.lhuPpDLpp..YosLsso...slp.......Q+Qslshsus....hs..L...lp..a..tLppha.G....slEplpppc...................................................................................................t.shhVh.t........s.lslphpsst...........lhlEWtu.s.hNDhlADuVhslllplpus.PtssKtssp...t...t........................................................................................t.tp.....p.p.....hhcpl.hhLps.FGcs..h..htpsp.......................................................................................................h.lpl.stppAplsh.................pshpl...csp........spsLcphlp.hhp+hhphhhP............................................................................................................................ 0 86 140 205 +11551 PF11719 Drc1-Sld2 DNA replication and checkpoint protein Wood V. Coggill P anon Pfam-B_1966 (release 23.0) Family Genome duplication is precisely regulated by cyclin-dependent kinases CDKs, which bring about the onset of S phase by activating replication origins and then prevent relicensing of origins until mitosis is completed. The optimum sequence motif for CDK phosphorylation is S/T-P-K/R-K/R, and Drc1-Sld2 is found to have at least 11 potential phosphorylation sites. Drc1 is required for DNA synthesis and S-M replication checkpoint control. Drc1 associates with Cdc2 and is phosphorylated at the onset of S phase when Cdc2 is activated. Thus Cdc2 promotes DNA replication by phosphorylating Drc1 and regulating its association with Cut5 [1]. Sld2 and Sld3 represent the minimal set of S-CDK substrates required for DNA replication [2]. 28.00 28.00 28.20 29.10 27.80 27.90 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.87 0.70 -5.39 23 196 2009-09-11 16:31:55 2008-10-08 17:09:09 3 11 170 1 145 183 0 323.50 21 46.24 CHANGED hspL+sELKpWE+sFtspN.GRcPsRsDIKpsPEIutKYKpYscLKs..supts..............pspKRcp...............sh....psspTPpKs.............................t.p...t....p....ps.................shcstlGPTPQtsGKsLuLF.Dhh.ps.t......................ssuoPoss+hsospssss...........................pTPoKpctsh...............tt.tsptc.ssp...tcTPhssscchhlst.h........p...............................................hpTPsaL+R..............pp.sh.s.t.................................hSPpPl+..p+.hs....KuLSplsp.....Ec-plc--h-lLcEhEsEpt.................................p.tppphsDspss..................................................tsp.spshKKK..sQKRoTR+V+M+Psh.tcspspsphspsDhccE...........h..................................................................shhcpcsho-hhsph...t.p..sppcctpcscsptcpp.............tpspt.+K.............lpssup...Na+RLKlps+..tptp..hpuRa.pRR ......................h.tpl+hcLKpWE+sFtppp.GR+Ps+pDI.K.p.s.s.c.ltthYKp.Ypp....L+p..ttps...............................................t.pctpt..............................s.psPtpt......................................................................................hss..s.........................................................................................................os.p................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 39 70 114 +11552 PF11720 Inhibitor_I78 Peptidase inhibitor I78 family Bateman A, Rawlings ND anon Bateman A Family This family includes Aspergillus elastase inhibitor and belongs to MEROPS peptidase inhibitor family I78. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.87 0.72 -4.24 8 389 2012-10-01 19:32:51 2008-10-13 17:23:26 3 4 345 0 161 477 26 62.50 29 58.57 CHANGED CsApshQsLVGQPhssspssph.....spslRVl+PGDhlTMDYpssRLNlhlDtsssIosl+CG ........................C.st.hpthlGp.s..h...os...t.h.p..ph..........hsptsR..llp...P..G...sh.lTMD.......apscRLNlplD.s.s.spl.pplpCG.......... 0 32 86 132 +11553 PF11721 Malectin Di-glucose binding within endoplasmic reticulum Finn R, Coggill P anon Pfam-B_783 (release 23.0) pdb_2jwp Domain Malectin is a membrane-anchored protein of the endoplasmic reticulum that recognises and binds Glc2-N-glycan. It carries a signal peptide from residues 1-26, a C-terminal transmembrane helix from residues 255-274, and a highly conserved central part of approximately 190 residues followed by an acidic, glutamate-rich region. Carbohydrate-binding is mediated by the four aromatic residues, Y67, Y89, Y116, and F117 and the aspartate at D186. NMR-based ligand-screening studies has shown binding of the protein to maltose and related oligosaccharides, on the basis of which the protein has been designated "malectin", and its endogenous ligand is found to be Glc2-high-mannose N-glycan [1]. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.30 0.71 -4.49 62 826 2012-10-01 23:47:32 2008-10-15 13:30:42 3 160 245 3 487 1418 44 162.90 26 22.05 CHANGED hshtlNsGGsph....tsssGhhYpsDpt..suss..ph..t................................ht.stshhtspsp.....pLYpotRh.......us..o..hpYhh...........hlpsGs..YslpL+FAElhass......tthGcRlFDlhlpu.phllcsaDIhp..cusGhs.tsshcpa.ss.sl.ssstLp.......Iphhhhtpsp..l....ushs.Ph.IuAI .................................................................................htlNsGutth........s.ts...h...hapsDt.....ssst.......h..t...........................................htp.st.th..s.stss.......tL..apTt...Rh..........u.t...o.....hp.Y..hh................h..ht..sG..s...YsV.........pLpFAElhass................tthGp.........R..l.........FDlhl...p.........G..p.............h.....lhc-...h...D..I.....hp.......psG.....uts......sshcp.h.....ss..sV..ss.s.t.Lp.......lc.hh......ht..st........uhhs.PhlsAl.................................................... 0 154 331 413 +11554 PF11722 zf-TRM13_CCCH CCCH zinc finger in TRM13 protein Andreeva A, Bateman A anon Andreeva A Domain This domain is found at the N-terminus of TRM13 methyltransferase proteins. It is presumed to be a zinc binding domain. 20.90 20.90 20.90 22.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.61 0.72 -4.57 27 181 2009-01-15 18:05:59 2008-10-20 16:55:17 3 11 158 0 116 172 2 30.90 40 6.89 CHANGED thpCpaal.+K+.R....hC+Mtt.ttGspaCspHts ........pCpaalt.+K+.R....aC+Mss..tsGp...paCGEHt.... 0 38 62 97 +11555 PF11723 Aromatic_hydrox Homotrimeric ring hydroxylase Radauer C, Mistry J anon Pfam-B_24837 (release 22.0) Domain This domain is found on aromatic hydroxylating enzymes such as 2-oxo-1,2-dihydroquinoline 8-monooxygenase from Pseudomonas putida and carbazole 1,9a-dioxygenase from Janthinobacterium. These enzymes are homotrimers and are distantly related to the typical oxygenase [2]. This domain is found C terminal to the Rieske domain which binds an iron-sulphur cluster. 20.50 20.50 20.60 20.90 20.40 19.00 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.59 0.70 -5.07 7 55 2012-10-02 19:24:03 2008-11-07 17:10:43 3 3 44 49 16 58 6 216.30 38 59.09 CHANGED aVGDu-...PsPLtcDlPPs............FLD-Dhtlh.G.+RhlpuNWRLulENGFDsGHlaIHpsS.hVhusDhAlPLGF..tP.uDcsth..shs-s.suPKGlhchh......scHh.PlFEusl.GpsshtGphh..pthVu...............lplSlWLPGVLKV-sFP.PshhpaEaYVPIDEsoa.YaphLu+hssocE-tcsFcsEa.cpcW+chAlpsFNDDDlhAR-uMpsFYusDpGWscEhLapsDtsIltWR+LsSp+sRGIQ ......................alGDt-....PPPLscDlPsp..................hLD-shslh.GhpchltuNWRlusENGFDsuH.l.alH+sShllcspDhsLPLG..h..hP..sspst...ts..-t..stPKGlhshh......s-phhP.lh-shh.G..shtss.t.....hhs...............hp....hSlWLPGVL+VpsaPtPchsQaEaY.VPID-spH.Yaphls+hssscc-tpsFctca....cpt...ahsh...sLpGFNssDlaAREuhpsFYs-spGWspE.LhcsDhuIltWR+LAScasRGI........................ 0 7 13 15 +11556 PF11724 YvbH_ext YvbH-like oligomerisation region Bateman A anon Bateman A Family This region is found at the C-terminus of a group of bacterial PH domains. This region is composed of a helical hairpin that appears to mediate oligomerisation based on the known structure. This elaboration of the bacterial PH domain is only found in Bacillales. 21.50 21.50 22.50 26.10 20.10 21.40 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.92 0.72 -4.29 7 123 2009-01-15 18:05:59 2008-11-09 20:00:03 3 2 123 6 16 66 0 60.40 65 30.21 CHANGED -aAcpSLshAsslhsphR.t.csslsppFKchschAFsWLssshppashKDFG.VF-KYIpN ........QYAHKSLDMASNAFSR.ISNA.QVNLAEQFKEMNEIAFNWLVDTKKQYNVKDYGFVFEKFINN... 0 9 12 14 +11557 PF11725 AvrE Pathogenicity factor Mistry J, Coggill P anon Pfam-B_735 (release 23.0) Family This family is secreted by gram-negative Gammaproteobacteria such as Pseudomonas syringae of tomato and the fire blight plant pathogen Erwinia amylovora, amongst others. It is an essential pathogenicity factor of approximately 198 kDa. Its injection into the host-plant is dependent upon the bacterial type III or Hrp secretion system [1]. The family is long and carries a number of predicted functional regions, including an ERMS or endoplasmic reticulum membrane retention signal at both the C- and the N-termini, a leucine-zipper motif from residues 539-560, and a nuclear localisation signal at 1358-1361. this conserved AvrE-family of effectors is among the few that are required for full virulence of many phytopathogenic pseudomonads, erwinias and pantoeas [2]. 18.10 18.10 19.80 19.70 17.80 18.00 hmmbuild -o /dev/null HMM SEED 1774 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.70 0.69 -14.34 0.69 -7.55 9 200 2009-01-15 18:05:59 2008-11-11 16:24:21 3 3 58 0 14 220 0 959.80 27 91.93 CHANGED Mp....h.slspspsssltssssssssssssLpQ.pspsssQpuutSLsu.G.+shtphsplpQ..........pspussppsAs.....................sPps.ss.tts......tt.phtp.ht...p....p...st....p.t.........................................ph.ts.lsp.u................t.Phsptpssssshstpt.ssst.ts.stssQpsssuc.ct.pppthttpcsssPt...........t.hpstLstss...tuphchpspph.pshpspss............IoLDscGK.pFushpsPAlssLLcppLGKssQsYLAHpusssup....pHhLL-spGHLhcltps-suhull+SSpssshh...utscsssVpLppcssplpls...pupspts.sLPGcAHhAhLoGl.......phsssG-plRlHDDKlaphss.huhWp...ssc-ssaSpLospGDGplYA.+sscsLsNLSSsp..hschhlcclpSFSVu.cGpVAlLospDops.pLs.Mssl.us.ts+hphoLcLss.......Gcu..pApuVuLos-RLFlADo-G+LYSAsRsphcss.-s.LchhPpps.....spthGtccploGFhosDcGQlpALlKsp.GphHupsLs.pssphcsGWNLoDuLVl-NphGLssh.sPssccpLsLsRtGplGLp-G+lphaDusTcsWc-Aul.DhcpLpRGhDusAYlLK-Gcl+cLsls.cpsshsaucssshA.sthpsKsEhGsAlpGL-.scslpAhAhlusppFlALDDpGclpuHp.KsGp......PhsLo+cGLsG-lKslsLDccpNLaAlTpsGcLFphsREsWQssc.us.....................................................upWptlshP.sppslcuLtssscpplssplcD.......uhhpL+utpWpshps+.....sstpsuhpslFsRLspuhKshpIPGTGlTl+hssphhG+oGhEppp.+SphuEhlRAalapsThpsPRPhKNsu.plQHRapGR-GLptVYchpuhLhKQL-...pssupssssupDLpo+lspLD..LG.cGtpLlc-LcpFR-ELEspuppAlhhlG..........QppGVLNppGplscptK.....tt.tthspthNhppSG+DLsKtLpsAlppsuPSspssstpLLpphhssGlchSHpKuDhPhGppRDssDcpuLoKuRLsLDlVTLs-LtcLlDKAchlospss..stlppLcpcLssLR-ppYG-NPVKphTDMGFTcNtALEusYDuVKuFlNAF+KtcHAVSVshRsshtopsQu-LAcKhKssLhpL-pG.-pIuhoRSYGssLoTsFlsh..schsssshPuAGhTssRNYsLshpRs-GGlsVhhhR-GusssosuVuuG+DlhPhhsscspsAc..Ss.lusc++hsPshRlGusloushptoppsulsFsls---lsuFlDsLhpGpLsPLplLcKGh-Hps+pu++hsFslssuuss-LRsGlslo-.......ssScPhoAssRsusGhssosNLhohoctp.TQcs-pophppuScNRPRFhNssshGAphpsplusuHss.pts.............sootst.psGo.sAhssssVssslulDsRTspplphchKpApPlTspslscLopoLGcsFKDsAopthLucLtc.psuc.....Pc-....pLcuLpchFus+sspsDcQYcALRsLK+hsscpcAu.scpusLDsARapTsYTNLS+LsppulhshIhsplpuAhsPoNAsRluchMspDPtLKullKphQuSsuT.ApVpLE.KDsLhD+l-cG.h-GplspcElusLhpDRNNhRI+ulsV.posspoEGFsoPTsLlSsssSAulShsKsLGpINFhYGpDQ-pPpuaTl-GElu+ssssltsAhscLKK-GhEhKS ........................................................................................................hh.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sth.............t..tt.hp..tttla.hp........Wt.......t..t..hs.l.hthpGphh..psst.h.p.ss...............h..s..stssh..t..............................................................t..........h.thsh...t.t..p..lhhhstpsthh.h........................................................................h.hsh.........s.......t.t.pstWphpp..hh..p.Gh........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 0 4 8 +11558 PF11726 DUF3296 Protein of unknown function (DUF3296) Mistry J, Coggill P anon Pfam-B_768 (release 23.0) Family This family is expressed in Gammaproteobacteria. One of the E coli members is annotated as yagK, but otherwise the function is not known. 22.30 22.30 22.30 23.00 22.10 22.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.31 0.71 -4.71 36 1241 2009-01-15 18:05:59 2008-11-11 16:48:00 3 3 492 0 89 791 27 153.40 27 81.94 CHANGED ppltpslcpslscasRlhslRlDL+hPp..ph.sp.....s..........sslocFhpsLcsplptp.................phhpspltYlWs+Epsp.ss..phHYHlsLhlN.p.a.t.Gphppps.slsspIppAWtpuht.hth....cspsllphs....cps.....tYhls...htpts.ppshc..slhhRlSYLAK..tcoKph.spt..t+sFGsSp .........................................hpphlpthhp.as+......lhshRlDlth.c....s.ssh......st...........................tsls..phhptLps......clp......................p.h.shltYhWshEasp..pt..thHhHhhlhls...............Gph.p.t..........s.........pl..uthIppsWp.psht.....sh..........phtshsp.hs...tps......................................thhlp..............t.s.stpshp..pl.h...p.h.sYLAK......tc..s....Kth...s......psaGpS.................................... 0 20 37 61 +11559 PF11727 ISG65-75 Invariant surface glycoprotein Mistry J, Coggill P anon Pfam-B_785 (release 23.0) Family This family is found in Trypanosome species, and appears to be one of two invariant surface glycoproteins, ISG65 and ISG75. that are found in the mammalian stage of the parasitic protozoan. the sequence suggests the two families are polypeptides with N-terminal signal sequences, hydrophilic extracellular domains, single trans-membrane alpha-helices and short cytoplasmic domains. they are both expressed in the bloodstream form but not in the midgut stage. Both polypeptides are distributed over the entire surface of the parasite [1,2]. 22.70 22.70 22.80 23.80 22.60 22.60 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.08 0.70 -5.35 20 223 2009-01-15 18:05:59 2008-11-11 17:19:18 3 4 11 0 71 209 0 254.70 29 57.37 CHANGED thlhhhhssllhhslshApttph.......pus+KLsp-GApuLCph+cLucpV.scpuDplhccspphsstlptthcplp.hhpplt............c.s-ucpccl+phhpcAKpclpctltp.ucphsccsccptpclKcAApcAht.t.p..tt.pSsGLpcVLsWaCtsp....scst..s..spNC.-slshppch......ssst+ssIsC....sttspss.hpssosssMcpAl-pWspsKP.......K.....c.t.psssstCpp.ts.stspPCTlhEp.WpscYcsuhpclccLEcstcpuppstct .....................................hshhlhsshhslslshupp.s.hs.t.......hss+pLstpuAtsLCpMK+Ll-sV.sspsDph.c.ppshpa.l.ssspt.thp+l..............................S-pcphpsshsps..+hpthpthpp.usphhctht...cptccAKpAAssA..................Ss.u.LpclLcWHChsc....tcs..t.s...ssNC.-sssa+cca........cspphsI....C....phpshssspspsThu.sMctAL-tWsps.KP..........K........hph...susstssp.pusssscPCTlhEs.WhhsYcsstphltcLEpshp.uhtshc.h............................. 0 45 71 71 +11560 PF11728 DUF939_C DUF939 C-terminal domain Bateman A anon Bateman A Domain This region is a presumed intracellular domain found in a set of bacterial presumed transporter proteins. The region is about 160 amino acids in length. 21.50 21.50 21.60 21.50 21.20 21.40 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.95 0.71 -4.44 30 916 2009-01-15 18:05:59 2008-11-13 17:09:13 3 3 912 2 73 425 1 166.60 34 52.19 CHANGED h-cclccapppIEpph+pILhchuthLpssctp..hpstplscLcphlccA+slAa+chcNph.h+pcshYh+YF-MRcpQh-lLc+MhsplsslphsscQsphlAchhcclusslppsNsuphhLccLpch+cpFcchsLPpTREEFEsRAsLaQhLp-hEpaLhlKsp ........-cclccY+pplEpph+cILpchs.aLpptps.........c....sp........u.......t.htcLcph....lccAppluap-hcNa.h.h+.....p.s......sYah+...YF-.MRp..pQsclLcpMtt.lsslphss...pputhlApLhpclut....slspsNsusphLcc.l.pchhpha+ph.sLPpTRcEFEsRAsLlQlLpEhcpalplKs..... 0 25 44 59 +11561 PF11729 Capsid-VNN nodavirus capsid protein Mistry J, Coggill P anon Pfam-B_805 (release 23.0) Family The capsid or coat protein of this family is expressed in Nodaviridae, that are ssRNA positive-strand viruses, with no DNA stage. These viruses are the causative agents of viral nervous necrosis in marine fish. 35.30 35.30 35.30 92.30 35.20 35.00 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.16 0.70 -5.48 3 443 2012-10-04 01:49:40 2008-11-17 14:19:14 3 1 133 0 0 217 0 193.10 74 99.75 CHANGED MVRKG-KKLAKsATTKAANPQPRR..RsNNRRRsoRsDAPVuKASTVTGFGRsTNDVHL.SGMSRIuQAVLPAGTGTDGaVVlDpIIVP-LLPRLGHAARIFQRYtV-TLEFEIQPMCPANTGGGYVAGFLPDPT....DuDHTFDALQATRGAVVAKWWESRTVRPQYTRTLLWTSoGKEQRLTSPGRlILLCVGNNTDVVNVSVLCRWSVRLSVPSLEpP.EplAsPILTQGsLYNDSLostYN...................HFcSlLLGSTPIDlAPTGTIF.QLDRPLpIDYSLGTGDVDRAsYWHF+KhuGNls+PAGYhpWGlWDuFN+TFTsGlSYYSDsQPRQILLPV......GTlpsRhDSEN ...............................................................................................................................ANTGGGYVAGFLPDPT....DNDHTFDALQATRGAVVAKWWESRTVRPQYTRTLLWTSo...GKE.QRLTSPGRLlLLCVGsNTDVVNVSVhCRWSVRLSVPS.LETP.E-TTAPIhTQGsLYNDSLuT..N...................DFKSILLGSTsLDIAPDGAVF.QLDRPLSIDYSLGTGDVDRAVYWHLKKFAGsAuTPAGWF+................................................................................ 0 0 0 0 +11562 PF11730 DUF3297 Protein of unknown function (DUF3297) Mistry J, Coggill P anon Pfam-B_797 (release 23.0) Family This family is expressed in Proteobacteria and Actinobacteria. The function is not known. 19.10 19.10 19.10 100.60 17.40 19.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.18 0.72 -4.29 30 320 2009-01-15 18:05:59 2008-11-17 14:20:50 3 1 318 0 106 230 31 70.90 61 82.61 CHANGED PDRLSssPcSPaass-lLpRslGIRFpGpE+TsVEEYClSEGWVRVsAGKAhDR+GpPhTIKL+GsVEsaa .PDRLSsNPRSPaastEsh-+..sIGIRFNGcERoDVEEYCISEGWlKlsusKAhDR+GNPhhlpLKGsVEAaY. 1 15 52 79 +11563 PF11731 Cdd1 Pathogenicity locus Mistry J, Coggill P anon Pfam-B_826 (release 23.0) Family Cdd1 is expressed as part of the pathogenicity locus operon in several different orders of bacteria [1]. Many members of the family are annotated as being putative mitomycin resistance proteins but this could not be confirmed. 25.50 25.50 25.50 25.70 25.40 25.40 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.91 0.72 -4.06 25 195 2012-10-03 02:11:09 2008-11-17 14:34:06 3 3 188 0 48 189 25 90.70 35 88.25 CHANGED M..........s+s.scs..................................thpcLpcLPNlGcAsApDLphLGIppsppLtGccPhchYpcLschoGpppDPCVlDVFhulspFhpGp..-....spPWWpaTcERKpt ...................................................................................ppLpcLPslGcuhstDLp.hlGIcplssL+GcsPh-lYcch...Cphp.G...pphDPClL.latssVhahpspp......hp.WWpap-cp+t............................. 0 20 33 40 +11564 PF11732 Thoc2 Transcription- and export-related complex subunit Mistry J, Coggill P anon Pfam-B_819 (release 23.0) Family The THO/TREX complex is the transcription- and export-related complex associated with spliceosomes that preferentially deal with spliced mRNAs as opposed to unspliced mRNAs. Thoc2 plays a role in RNA polymerase II (RNA pol II)-dependent transcription and is required for the stability of DNA repeats [1]. In humans, the TRE complex is comprised of the exon-junction-associated proteins Aly/REF and UAP56 together with the THO proteins THOC1 (hHpr1/p84), Thoc2 (hRlr1), THOC3 (hTex1), THOC5 (fSAP79), THOC6 (fSAP35), and THOC7 (fSAP24). Although much evidence indicates that the function of the TREX complex as an adaptor between the mRNA and components of the export machinery is conserved among eukaryotes, in Drosophila the majority of mRNAs can be exported from the nucleus independently of the THO complex [2]. 20.40 20.40 22.10 24.10 19.10 20.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.41 0.72 -4.19 22 297 2009-01-15 18:05:59 2008-11-17 15:09:49 3 5 248 0 207 292 3 75.90 44 4.48 CHANGED luKlsasNPhhshpshLpQIcsYDNlhphVV-ssKYhosLuaDlLsasllppLs.ssRsphppDG.hs.uhWlpuLus ...luKluauNPsllhch....lLsQIpsYDNLIssVV-.sh.KYh.TsLsYDVLsa..........sl..lpuLu....p.s......p.....+p+hpcDs.hshSpWLQuLAo............... 1 67 114 171 +11565 PF11733 NP1-WLL Non-capsid protein NP1 Mistry J, Coggill P anon Pfam-B_837 (release 23.0) Family This family is the non-capsid protein NP1 of the ssDNA, Parvovirinae virus Bocavirus of cattle and humans. 25.00 25.00 96.70 96.00 21.50 19.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.32 0.72 -3.73 3 376 2009-01-15 18:05:59 2008-11-17 17:06:10 3 1 14 0 0 133 0 93.50 90 83.10 CHANGED MSScsTKD+HRSpKRpsSPhR-ERKRsWc.p++SRSRSPIRR+GE+ShGSaRppsQ-s+QSSsTASKsSDpATKTKEsTSG..sTRTNPYTVFSQHR MSSGNMKDKHRSYKRKGSPERGERKRHWQTTHHRSRSRSPIRHSGERGSGSYHQEHPISHLSSCTASKTSDQVMKTRESTSGKKDNRTNPYoVFSpH..... 0 0 0 0 +11566 PF11734 TilS_C TilS substrate C-terminal domain Sammut SJ, Bateman A anon pdb_1ni5 Domain This domain is found in the tRNA(Ile) lysidine synthetase (TilS) protein. 21.00 21.00 21.10 21.10 20.80 20.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.34 0.72 -4.75 140 2601 2012-10-01 21:04:40 2008-11-18 16:14:21 3 9 2576 4 480 1909 354 70.80 26 16.01 CHANGED lp.lRh+...tuGpchp...hs....+stsRpLK+laQEhslPsWhRs+lPLlah....s-.pL.lsss.Glhlspt.....h.s...........ttt..h....plp...W ................lplRh+...psG-hhph..hG....pssp.+KLK+la.-tplP.ht.R.c.phPl.lh........s.p..pl..lsl....s..Glhhspt.....h................................................. 0 178 312 403 +11567 PF11735 CAP59_mtransfer Cryptococcal mannosyltransferase 1 Mistry J, Coggill P anon Pfam-B_916 (release 23.0) Family The capsule of pathogenic fungi is a complex polysaccharide whose formation is determined by a number of enzymes including, most importantly, alpha-1,3-mannosyltransferase 1, EC:2.4.1.- [1]. 25.00 25.00 25.00 25.00 24.70 24.20 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.54 0.70 -5.15 37 382 2009-09-11 16:30:58 2008-11-19 09:33:18 3 7 108 0 245 384 4 212.70 30 50.86 CHANGED +lFIAhsLa.pstplltstastull....pLhchLGs.....-NVFlSIYEssSsDsopssLp.tLc..LcplslppsIhh..s.tschs..th......................h+RIsaL.............AclRNhALcPL.p....................ttpshtaD+lLaLNDVhFsspDllpLLhsss.................sp..........................................YstsCulDF...sp..s.pF.................YDoassRDhp.Gpth............shshaPaFpss....sspcshstppsl.VpSCW.........sGhsuhcAp.Fhtsp.........................slRFRu....sth....h-uSECsL....IpsDh.....ss ...............................+hahAhsla.pstpll.shhu.sllplhphLG.........................cN.lalS.lhEss.SsDpotshLp..hc..hp.....phsh.ph.lhh.......ppht........................................cRItaLAplRNtAlpP.L.p........................................t.t.s.ta...cpllF..lN.Dlh..hsst..DhlpLlhppp....................tp..........................................................hthsCu..hDa..............t..s.hF.........................................................YDsashRDhp.G.th.......................h.has..hh....t.st.............tppphh...t.t.sh.VhuCW.........sGhsshsst.h.....................................................................lpFRt.....t.......h.tuEspLhh.D.....s....................................... 0 69 141 197 +11568 PF11736 DUF3299 Protein of unknown function (DUF3299) Mistry J, Coggill P anon Pfam-B_876 (release 23.0) Family This is a family of bacterial proteins of unknown function. 21.20 21.20 21.30 21.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.30 61 436 2009-01-15 18:05:59 2008-11-19 10:02:20 3 3 323 0 135 407 201 144.10 36 78.53 CHANGED lsWp-Lh.P..ts.s................t.hspltsts.................thpps.sth.......................................pp.hsuslsppLsGptV+lPGalVPL-.sssttlTEFLLVPYaGACIHVPPPPPNQIVaVphscG.htlps.........LasPlWVpGpLpscphss.-lA........psuYshpusplpsY....c .................................................................sW.-Lh.Pts..p.....h....................p.ts.s.................h.ppt..th.......................................p...suslsppLsGp..pV+lPGallPLE.....s-psplTEFLLV..P.YaGAC.IHVPPPPPNQIlaV+hs.cG...ht.lp.p.........las.slaVpGpL+scph.ss..-LA........psuYphpusplt.Y...................... 0 38 74 105 +11569 PF11737 DUF3300 Protein of unknown function (DUF3300) Mistry J, Coggill P anon Pfam-B_886 (release 23.0) Family This hypothetical bacterial gene product has a long hydrophobic segment and is thus likely to be a membrane protein. 25.00 25.00 26.80 30.50 20.70 24.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.72 0.70 -5.10 71 636 2009-01-15 18:05:59 2008-11-19 10:38:50 3 1 598 0 129 443 35 275.10 57 50.70 CHANGED hopupL-QhlAPIALYPDsLLoQlLhASTYPL-VVpAsRW...psNspLpG-.........shhpAlpsps....WDPSVKuLlAFPplLphMu-clsWTppLGDAFLuQpp-VMsulQpLRppAppsGsLposppQpVsppp.ps...................................IhIEPAsPpVVYVPhYsPslVYG.sW.a.ssYP..Ph.Ya......s.Psh..........sshs..suhuaus.Glsl.uhsh.......a...uu.......hc..Wssc.....................................plslshpphpphstp...........sstpWp..Hs ....FSTAQIDQWVAPVALYPDuLLSQVLMASTYPsNVsQAVQW..S+..D..NPhhQGD.........AAIQAVu..sQP....WDsSVKSLVAFPQLMALMGENPQWVQNLGDAFLAQPQDVMDoVQRLRp..LAQQTGSLKSospQKVlossKpsssst.ssssss...........sshstsssTVIsIEPANP-VVYlPNYNPsVVYG.sW..sN....TAY..P..PV.YL......PPPsGpsF......sDSFV+GFGYSh.GV..ATTYAL......F...SS........ID....WDDDDHcHHccDpcs.t...............htpsussapHNGDNINIsVNNFNRIoGppLsspshs.........W...pss..................................................................................................................... 0 28 54 94 +11570 PF11738 DUF3298 Protein of unknown function (DUF3298) Mistry J, Coggill P anon Pfam-B_854 (release 23.0) Domain This family of bacterial protein C-terminal regions is highly conserved but the function is not known. Several members are annotated as being endo-1,4-beta-xylanase-like, but this could not be confirmed, and the structure can be defined as a heat-shock cognate 70kd protein 44kd ATPase. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.83 0.72 -3.47 347 1368 2009-11-03 10:59:44 2008-11-19 11:12:14 3 22 951 4 301 1129 32 88.50 19 30.96 CHANGED LpD.........l.hps..t.................hhptl......pphlppph.......t.....................pptt..t.................................ptht.shtt.......................................spsah..lssc......u.ls...lhas....Y-l....u........Pauh......G.hh....phplPhpp ................................................................................................................................................sl.hps...s............hhphl.pphlpppl....t.....................pp.pt..............................................................thh..tht....t................................spsFh......lssc........u.ll...lhas.YElu............Pauh...............G.hsphtlPht................................ 0 106 205 258 +11571 PF11739 DctA-YdbH Dicarboxylate transport Mistry J, Coggill P anon Pfam-B_935 (release 23.0) Domain In certain bacterial families this protein is expressed from the ydbH gene, and there is a suggestion that this is a form of DctA or dicarboxylate transport protein. Dicarboxylate transport proteins are found in aerobic bacteria which grow on succinate or other C4-dicarboxylates [1]. 23.00 23.00 23.10 25.50 22.80 22.90 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -10.98 0.70 -4.88 59 772 2009-01-15 18:05:59 2008-11-19 11:34:00 3 4 734 0 150 542 34 200.70 45 24.22 CHANGED lplsplss.Glslsshphphphphst..........t.lplp.................................ssssshhuG.pltlsphshsh.pst..pshlplpslsLscllpl.shpsltuoGplsGslPlhlsss.thplpsGpLsucss.Gplphsssss..cshu..tss.shphshssLcshcYspLsuslshsssGchhlslplcGpN.....ss...........htsspPlplNhshc-NlhpLh+SLphu..splpptlc ............................................LRIAplhN.h.lss+NITADLQGpYPa...................oEppPLhLo....................................-VSlDVL.GGpl.hhcpLRhPQ.HD......PAlL..RLpNlSuSEL..loAl..s.P..KQ..FAMSGshSGuLPLWLNNc.KWll+DGWLsNsGP..h..TLRlDKDsA...DA....ls...............+s................N.h...oA...GsAI....s....WLRYMEIo+SpTcI.NlDNLGh.LThpAsIsGpS.+s........................csKsssVNLNYpHEENlFsLWRSLRFG..DNLQsWLE......................................................................................... 0 31 66 113 +11572 PF11740 KfrA_N Plasmid replication region DNA-binding N-term Mistry J, Coggill P anon Pfam-B_844 (release 23.0) Family The broad host-range plasmid RK2 is able to replicate in and be inherited in a stable manner in diverse Gram-negative bacterial species. It encodes a number of co-ordinately regulated operons including a central control korF1 operon that represses the kfrA operon. The KfrA polypeptide is a site-specific DNA-binding protein whose operator overlaps the kfrA promoter. The N-terminus, containing an helix-turn-helix motif, is essential for function. Downstream from this family is an extended coiled-coil domain containing a heptad repeat segment which is probably responsible for formation of multimers, and may provide an example of a bridge to host structures required for plasmid partitioning [2]. 24.10 24.10 24.30 24.30 23.90 23.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.43 0.71 -3.66 107 609 2009-01-15 18:05:59 2008-11-19 13:15:57 3 4 377 0 187 552 51 116.60 23 36.79 CHANGED lsppplhpA.ustLhspGp.......pP...........TlcsVRptlG......pGShsTlsphLccacppptsts.........stst...hP.ssltpshsphhtplhppApptAppplpptcpphptpttphptptppttpphpphppphpp .................thpplhpA.sptLh.st.Gp.......pP.................ospsVRptlG.......sGShosl..sphlccapppppsts.................tss...lP.tsltp.....thsphhstLhpp...........Ap...p...tA....ppt..........lstt.c..........p....t...hptph...tp...hpp.phppt...ttphpphptpht.t......................................................... 0 27 74 132 +11573 PF11741 AMIN AMIN domain Finn RD, Coggill P anon Pfam-B_11438 (release 23.0) Family This N-terminal domain of various bacterial protein families is crucial for the targetting of periplasmic or extracellular proteins to specific regions of the bacterial envelope. AMIN is derived from the N-terminal domain of AmiC, an N-acetylmuramoyl-l-alanine amidase of Escherichia coli which localises to the septal ring during division and plays a key role in the separation of daughter cells. The AMIN domain is present in several protein families besides amidases suggesting that AMIN may represent a general targetting determinant involved in the localisation of periplasmic protein complexes [1]. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.31 0.72 -4.03 172 2653 2009-01-15 18:05:59 2008-11-19 13:52:53 3 59 1806 0 715 2042 505 99.40 24 20.71 CHANGED lclhss..ttth.clhlpos.tshphpshtl......ssPsRlVlDltssphshshp.........phpsssss...lpplc.ssp...pssssRlVlcl.ct...ssps........h..tppsptlllsl ...................................s...sp.s..+lslcts..p.....h...p....h.ptFsL.......ss.P.s..Rl.V.l...Dlpssshssshp.....................phph.s...ssh...........lppl..R..suph.......sss...s.sRlVl-L.cp.........ssps..........ss..tt.tpplhhs.................................................... 0 182 458 606 +11574 PF11742 DUF3302 Protein of unknown function (DUF3302) Mistry J, Coggill P anon Pfam-B_953 (release 23.0) Family This family of unknown function is expressed by proteobacteria. 21.00 21.00 21.00 21.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.85 0.72 -4.13 8 755 2009-01-15 18:05:59 2008-11-19 18:01:27 3 1 490 0 69 251 11 74.40 78 65.64 CHANGED LDYFALGlLlFVslVlFYGlIlIHDIPYEIAK+RNHPHQDAIHVAGWVSLFTLHsIWPFLWIWATLY+P-RGWGhssh .........LsYFALGVLIF.VFLV.IFY...G...I..IhlHDIPYLIAKKRNHPHADAIHsAGWVSLFT.LHVI.WPFLWIWATLYpPERGWGMQs.t............... 0 8 18 42 +11575 PF11743 DUF3301 Protein of unknown function (DUF3301) Mistry J, Coggill P anon Pfam-B_952 (release 23.0) Family This family is conserved in Proteobacteria, but the function is not known. 25.00 25.00 39.00 47.80 21.60 21.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.07 0.72 -4.46 55 323 2009-01-15 18:05:59 2008-11-19 18:02:21 3 1 323 0 92 257 40 94.80 33 82.77 CHANGED sLh...hlhll.shsshhaWpt+phpEhAhtts+ptCcchslQLL-tuhshp+hphtt...pGphphpppatFEF.SssG-sRYpGplhhhGt+ltplcLssa .....sLhlllhl.shsshhaWpt+phtEhAhthscppCpchclQLLs.uhst++l.phsh...sGphphpphYtFEF.SssG-spYpGplhMhGt+ltplplsPa.... 0 26 52 73 +11576 PF11744 ALMT Aluminium activated malate transporter Bateman A anon Bateman A Family \N 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.26 0.70 -5.82 18 320 2012-10-02 19:04:43 2008-11-20 17:12:55 3 13 54 0 205 1758 29 327.50 31 66.11 CHANGED achG...+cDPRRVlaulKVGlALoLVSllYahcs.acslGssAlWAlhTVVVVhEFSl................GATLsKGLNRuLGTLhAGuLAluhphlu.shoGc...hEsllluhslFlhuhhATah+haPthKs.Y-YGhhlFlLTFsllsVSuYRscph.lphAhpRhhpIslGuslClhlslhlhPIWAGEDLH+LlspNhpplupuLEG..........................CVspYhcshEh-........hps.s--slapGY+olLsSpupEEoL............hsFApWEPsHGta+.a+aPWppYlKlGuslR+CAaplhAL..........HGCl..poEIQuP....p-lRstF+csht+luhEuuKlL+-Lucsl+pMc+sos..p.llpcl+hAscpLpppI.cspshLhlsop..t........t..........................................................................st...p.................hcshE.upuLslATFASLLIEhVARL-sll-ul-ELu .................................................................................utpDs++hhauhK..hGl..uL.s.L.l...S..hh..h.h.h...p..........h.p.t...h....u.......p..u.h..W.Al.h.T.V.V.VV.h...E..a...o..l...........................................GuTlsKGhNRuhuTlh.AGhl..u.hs.h.t..lu...th.s.Gp.......h.ps.h.l..l..s..h...l..F...l....h............uh...h.....so.......a.h.+..h..h.Pt.......h.K.......a.-...Y...G...h.h.l..F.lLT.a.s.....l......l....s...V.......S........u.....a..........p.........s......p........p...............h.........h...p....h..A.........h......p...R..h.............hI.slGsshslhlslhlhP..l....WA....G...p-....LH....p.ss...tshpt..l....Ap.l.-u............................................hsppY.hp.t.t.t..............................ttp..hhp..sa+...ss.lpSp..s.p.--sl.............hthA..hWEP..HG.at...hpaPW.p.pY.+..l..............us.hhRp.h..u......h.Ah.............tu.hl.......s.p....p....ss......php....h....htt.h.phu.p.supsLct.hu.t....lpphph........t.hh.th.t.Ah.ppLp..l.p...............................................................................................................................................................................................................t.h.hh.hs..l.ch...hp.l..th..h.................................................................................................................................................................................................................... 1 50 144 177 +11577 PF11745 DUF3304 Protein of unknown function (DUF3304) Mistry J, Coggill P anon Pfam-B_956 (release 23.0) Family This is a family of bacterial proteins of unknown function. 20.60 20.60 21.10 20.70 20.20 18.60 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.67 0.71 -3.89 44 431 2009-01-15 18:05:59 2008-11-24 11:52:26 3 5 195 0 98 376 5 114.60 29 62.93 CHANGED sLuuCspst...................sshtuhs........lhuhNaTshsItpFhl..sGt......hGusshss.......huGGGGtsCChslPppW+.GhplpVcWp.......................ptphtp.p.tsphtptt.VslPphsth........sshlpV+FLPscclcl ........................................................................h...sssps............................sshsuss...lpshNahc.h..s..IhtFsV...sG..............tGusshu.........sGGGuhsCChsls.....GhsscVpWp......................................hpptphtt...thph...pp...pphs.lslPchpht........pshLpVHFhPsc+Vc.................. 0 10 27 46 +11578 PF11746 DUF3303 Protein of unknown function (DUF3303) Mistry J, Coggill P anon Pfam-B_958 (release 23.0) Family Several members are annotated as being LysM domain-like proteins, but these did not match any LysM domains reported in the literature. 21.10 21.10 21.40 21.90 20.50 20.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.92 0.72 -3.61 33 128 2009-01-15 18:05:59 2008-11-24 11:53:10 3 1 72 0 67 134 549 85.20 24 86.48 CHANGED hallpapF.sspsp..s.pshtcahcsG..t.t-p..-GacllsRhHsPtsupGhslscA-sspslacahtsWpcthslph-lsPsloDpEhls .....................halhphpF.stppp....pshtcahcsG.....tsh..-GhchltRhtssssupsahlscu-ssptlacaht.Wtch..hch.-lsPVlosp-hs.... 1 7 33 49 +11579 PF11747 RebB Killing trait Mistry J, Coggill P anon Pfam-B_983 (release 23.0) Family RebB is one of three proteins necessary for the production of R- bodies, refractile inclusion bodies produced by a small number of bacterial species, essential for the expression of the killing trait of the endosymbiont bacteria that produce them for attack upon the host Paramecium. R-bodies are highly insoluble protein ribbons which coil into cylindrical structures in the cell and the genes for their synthesis and assembly are encoded on a plasmid. One of these three proteins is RebB. 18.80 18.80 19.50 20.70 16.40 17.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.23 0.72 -4.10 32 294 2009-01-15 18:05:59 2008-11-24 15:51:45 3 1 94 0 104 256 12 66.90 41 67.18 CHANGED ITDuVT.QsNscVlGpAPAhAMGslaQusup.uh.....uluhpNAVsuQQptphhupAusspGVstlhohsosus ..............ITDuVT.QsNsKVlG.-APAhAhGsLYQuhup.uh.....uhshcNAs..suQQQtphhupAuostGltplhulssss.s.......... 2 31 51 92 +11580 PF11748 DUF3306 Protein of unknown function (DUF3306) Mistry J, Coggill P anon Pfam-B_984 (release 23.0) Family This family of proteobacterial species proteins has no known function. 25.00 25.00 42.50 41.60 20.80 20.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -3.43 58 273 2009-01-15 18:05:59 2008-11-24 16:08:42 3 3 268 0 74 236 87 114.20 32 54.64 CHANGED uFluRWSR.RKtss..................ts-psst.t......................ssssstssssptsptsphsspchht...................sLsDs-sl.s.ssDhssFhsc.sVsppl+psALR+Lap.sPtassh.DGLs-Ys-Da.ossssls .........FhSRWSp.RKhps..................ps-p.hpst.....................................s.sstss..s.ts.tspsshp.sppshp...................sL.ssscsl.stpsshusFlss..slspplK+AALRKLFh.sscFsth.DGLc-YstDYossssl.s........ 0 13 36 54 +11581 PF11749 DUF3305 Protein of unknown function (DUF3305) Mistry J, Coggill P anon Pfam-B_976 (release 23.0) Family Several members of this family are annotated as being molybdopterin-guanine dinucleotide biosynthesis protein A; however, this could not be confirmed. The family is found in proteobacteria. 25.00 25.00 26.40 31.60 23.20 23.10 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.91 0.71 -4.31 51 269 2009-01-15 18:05:59 2008-11-24 16:10:00 3 3 264 0 73 213 96 134.40 34 80.71 CHANGED htluVlhc+ps....ssRWsshpWphsullP.......sts.......phLppc...st..pahhsshslcLa+s-spuYhhNLsupsPslaVlh+tspss................hpshhlTAS.t-Atsa....hDsG-p.V-plPMP.sltuWlpAFlspHa..E...hKR+...Rc .......................htlhhpct.....suRWtshpWplpuhh.........................pshtps...p.......sshsLpLa+DEpssYRhNLoopsPpLFllhcpst-s..................hpsstlTAS.s.Aupa....hD.G-phV.sssMPhslpAWhcAFlscHt..E..th+RK+c.... 0 12 35 53 +11582 PF11750 DUF3307 Protein of unknown function (DUF3307) Mistry J, Coggill P anon Pfam-B_1017 (release 23.0) Family This family of bacterial proteins has no known function. 20.60 20.60 20.90 21.00 19.70 20.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.07 0.71 -4.40 37 380 2009-01-15 18:05:59 2008-11-24 16:32:56 3 1 364 0 129 369 88 125.40 23 62.06 CHANGED hlllhLllAHlLuDFhhQspphlppKpp.....+sphLhhHhhIhhlhhhllhhhhth............hhhhlllsloHhlID...hhK.hh.pt......................shhhFllDQlhHlhl..llslhhhh..................h..h.hhhhh.phlhh ............hhhhLl.luHlluDFhl.Q........opphu.ctKpp..................pht.hLhhHhhl.hsls..hhll.h.h.h.h.s....................hhhhhll...hlo.HhlID.........hhKsthtpph.............................phhhF...l....l....DQhlHlls...lhhlhhhh............................hhhh............................................. 0 40 87 112 +11583 PF11751 DUF3308 Protein of unknown function (DUF3308) Mistry J, Coggill B anon Pfam-B_993 (release 23.0) Family Some members of this family of bacterial proteins are annotated as being one of the several TonB-dependent siderophore receptors, but this could not be confirmed. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.76 0.70 -5.10 103 599 2012-10-03 17:14:37 2008-11-24 16:45:15 3 4 144 0 319 828 889 278.90 23 82.68 CHANGED stuppcspaoQYhh.NhhhlNPAaAGspss.hplthhhRsQWsGl-......u..APpT.shohss.lsc..ps..............GlGlslhsDphGs...hspsshtssauYplpl....ucpsp...........L........oFGlpuGhh.hsl...chsphs..........sssDs.thtt.......sphhsslusGhhahs....c......paahGlSsssll....ppphspsstt.........sp.thpahhhuGYsFshs................phphcPuhhh+hstss....sh......plDlssphhhpt..............phhhG...suYR..................hs....pulushlGhphs.....shhlGYuYDhs..hss.lst..hssGoHElhlta .............................................................................................h..uQpsspaoQYhh.N.hhlNPA.....h..uG.t.....p..ss..hplthh.aRsQWs.Gl.c..................s..uP...pThshuhstslsp...pp................................GlGlpl..hsD.phGs.....hspsshtssauYp.lpl...........spp...tp................LuhGlpsGhh..phsl...Dhsclsh........................sssDs.hhsss..............sphh.s.husGlhahs........s...............paah..GhSs.plh........s.phs.sstt......................sp.thpahhtuGYhhsls....p...................phph.c...Puhhh+hs.tss.sh..........thDlssp..h..hhpp..............phhhG.....suYR....................hs........sulsshhGhphs...........phtlGYuY-hs..hss..lst......hssGo..HElhlta............................................................................ 0 176 283 319 +11584 PF11752 DUF3309 Protein of unknown function (DUF3309) Mistry J, Coggill P anon Pfam-B_1113 (release 23.0) Family This family is conserved in bacteria but its function is not known. 20.90 20.90 21.10 20.90 20.70 20.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.76 0.72 -4.18 41 288 2009-09-10 20:38:52 2008-11-25 14:50:30 3 1 240 0 131 241 23 49.00 56 91.77 CHANGED uTILLIlLILlLlGuLP..sWsaSRuWGYuPSGhlGllllllllLlLhGRI ................GTILlIlLILlLlGulP..sWPaSRuWGYuPSGhlGllllIlllLlLhGRI.... 0 22 65 91 +11585 PF11753 DUF3310 Protein of unknwon function (DUF3310) Mistry J, Coggill P anon Pfam-B_1078 (release 23.0) Family This is a family of conserved bacteriophage proteins of unknown function. 20.10 20.10 20.10 20.40 20.00 19.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.28 0.72 -4.02 30 564 2009-01-15 18:05:59 2008-11-25 15:45:59 3 7 485 0 27 404 1185 61.10 40 51.67 CHANGED sVsp..PsHYp..psslEsI-hI.t.ht..hpphhuah.hGNslKYlhRhp+K.NG...hEDL+KAtpYlch ..................................lpp..PuHYs........h.......ssl...EhIDhI.p.hs.........h.tthuFs.hGNulKYl...sR..s....st....K..sG...................pEDLpKApaYlp............. 0 6 17 23 +11586 PF11754 Velvet Velvet factor Mistry J, Coggill P anon Pfam-B_963 (release 23.0) Family The velvet factor is conserved in many fungal species and is found to have gained different roles depending on the organism's need, expanding the conserved role in developmental programmes [1]. The velvet factor orthologues can be adapted to the fungal-specific life cycle and may be involved in diverse functions such as sclerotia formation and toxin production, as in A. parasiticus [2], nutrition-dependent sporulation, as in A. fumigatus [3], or the microconidia-to-macroconidia ratio and cell wall formation, as in the heterothallic fungus Fusarium verticilloides [[4]. 25.00 25.00 25.80 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.52 0.71 -4.72 28 414 2009-09-11 07:54:36 2008-11-25 15:51:15 3 6 103 0 314 445 0 200.10 30 48.62 CHANGED shpYcLpVhQQPhRARACGtGs................tDRRPl..DPPPllpLplh-tss....ppt-hshlpssha.hlhssLhssc....shspsph.t.sssss.................................................................................................pshLoGshVuo....hthLc-ssst..................................usaFlFsDLSVRpEGpYRLpFpLhchstts..........................stsssshhs-lhS-sFsVaoAKcFPGht-STsLS+slA-QG..s+l+lR+....-sRt ..................................................................................................................................................t...apL.lhQpP.tARhsG.st..................................t-R+..Pl..sPPPllpL.h...lhc...tp...................tt.s.s....h.ps......a.hhhssL.ssp.......s.tp..sth...............................................................................................................................................................................................................................................................................................................tpsLhGshsuu....hhhL..pc.ssst......................................usaFlFsDLSVRpEGpaRLpFsLhchst.t............................................ttsshhtpshSpsFpVasuKcFP...Ghhp.S...T.Lo+thupQ........G..h+l.lR+.cs....................................................................................................... 0 113 182 260 +11587 PF11755 DUF3311 Protein of unknown function (DUF3311) Mistry J, Coggill P anon Pfam-B_1042 (release 23.0) Family This is a family of short bacterial proteins of unknown function. 30.00 30.00 31.30 31.00 28.80 28.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.18 0.72 -4.44 56 487 2009-01-15 18:05:59 2008-12-03 10:22:49 3 2 384 \N 172 357 10 61.80 30 83.95 CHANGED hhllhlPhlshlhs.PhhNcscPhlhG.hPFhhaathhWlhloullhhlsaph.................................ttttpc ..........h.hLlllPhlsllh.s.shhNclcPhlhG.hPFhhaa.llWlhloullhsllYhh.........................................................tttt..................... 0 36 94 133 +11588 PF11756 YgbA_NO Nitrous oxide-stimulated promoter Mistry J, Coggill P anon Pfam-B_1030 (release 23.0) Family The function of ygaB is not known but it is a promoter that is stimulated by the presence of nitrous oxide [1]. It is regulated by the gene-product of the bacterial nsrR gene. 25.00 25.00 26.90 26.50 21.30 18.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.83 0.71 -4.05 25 846 2009-01-15 18:05:59 2008-12-03 11:49:24 3 3 813 0 106 453 8 107.60 52 93.30 CHANGED LsG+LtpEacTlpAMlclYC+pH...Ht..............tsthCp-CpsLhcYAcpRLDRCPYGc.sKPTCs+CPlHCYKPp.+cph+pIMRauGPRMLLpHPIhAIRHLlsE++sVPppsstpsS .................ss.+RIsREKhTIccMIcLYptcp....p................................uuu.spchcuLhsYApKRLD+CsFG..E...-..KPuCKpCP..VHCYpPupREcMKpIMRaAGPRMLa+HPl.LolRHLlD-..+.+sVPchPpc..+.................... 0 26 60 83 +11589 PF11757 RSS_P20 Suppressor of RNA silencing P21-like Mistry J, Coggill P anon Pfam-B_1073 (release 23.0) Family This is a large family of putative suppressors of RNA silencing proteins, P20-P25, from ssRNA positive-strand viruses such as Closterovirus, Potyvirus and Cucumovirus families. RNA silencing is one of the major mechanisms of defence against viruses, and, in response, some viruses have evolved or acquired functions for suppression of RNA silencing. These counter-defencive viral proteins with RNA silencing suppressor (RSS) activity were originally discovered in the members of plant virus genera Potyvirus and Cucumovirus. Each of the conserved blocks of amino acids found in P21-like proteins corresponds to a computer-predicted alpha-helix, with the most C-terminal element being 42 residues long. This suggests conservation of the predominantly alpha-helical secondary structure in the P21-like proteins. 23.30 23.30 23.50 24.40 21.70 23.20 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.62 0.71 -4.04 3 159 2009-01-15 18:05:59 2008-12-03 13:32:31 3 1 7 0 0 135 0 143.10 82 81.60 CHANGED +VGAVs+R........TDsLcphIp-FNsSLAllsAMKsDANoD+hsG+aS.AccKL+lLssIEAplRILLcllRR+llRDcLGlRShpDTFcFhlpKYSSL.ocVPaSEVhRpKlKhVuptVIc-LSREHKLDlSERuFPGa...GIL ............................................KVGuVVERLCDPSVTLTEVMDE.INDFNSFLALVHSMKSDMNGDHQDGHHEMGEHKSRLLCNIEAKLRlLLDIIRRRFTR-KLLCTSATDVhGFFVhRYMSS.SHTSFEoVMRTELKLVV.KAVLS-LSRsHKLDFSERAFsAY...GIL.... 0 0 0 0 +11590 PF11758 Bacteriocin_IIi Aureocin-like type II bacteriocin Coggill P anon Lonsdale D Family This is a small family of type II bacteriocins usually encoded on a plasmid. Characteristically the members are small, cationic, rich in Lys and Try, and bring about a generalised membrane permeabilisation leading to leakage of ions. The family includes aureocin A, lacticins Q and Z, and BhtB as well as an archaeal member. 25.00 25.00 25.90 52.70 24.50 23.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.57 0.72 -4.35 5 21 2009-01-15 18:05:59 2008-12-04 11:40:09 3 3 20 0 3 13 0 48.30 59 3.59 CHANGED MA.aL+ll+alAKYGoKAVpWAWcNKGKlL.-WLNlGpAl-WVspKI++IhGl ..AKYA.......IRYGAKYGKKAVKSGWDYGKKVAKSGWNKGKS...IAQKIPRIHKV. 0 0 2 2 +11591 PF11759 KRTAP Keratin-associated matrix Coggill P anon Lonsdale D Family The major structural proteins of mammalian hair are the hair keratin intermediate filaments (KIFs) and the keratin-associated proteins (KRTAPs). In the hair cortex, hair keratins are embedded in an inter-filamentous matrix consisting of KRTAPs which are essential for the formation of a rigid and resistant hair shaft as a result of disulfide bonds between cysteine residues. There are essentially three groups of KRTAPs, viz: the high-sulfur (HS) and ultra-high-sulfur (UHS) KRTAPs (cysteine content: 16-30 and >30 mol%, respectively) and the high-glycine/tyrosine (HGT: 35-60 mol% glycine and tyrosine) KRTAPs. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.58 0.72 -3.72 22 198 2009-01-15 18:05:59 2008-12-04 14:46:09 3 2 24 0 66 249 1 53.40 52 89.57 CHANGED Ms..YYusY.aGGhGYGh........GaG.GLGhGY.GCGhu................G.u...........................sGYG.....GsGYG.Ga..GYGs........hpP.sYGtY .......Ms...YYusY.YGGhGYGh...hG.GLGhGY...G...CGhG........h..G..hG.s......t.GaGY......Gs.GaG..GY..GYGs........hts.hhtth........... 0 17 17 17 +11592 PF11760 CbiG_N Cobalamin synthesis G N-terminal Enright A, Ouzounis C, Bateman A anon Enright A Domain Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process [1]. Within the cobalamin synthesis pathway CbiG catalyses the both the opening of the lactone ring and the extrusion of the two-carbon fragment of cobalt-precorrin-5A from C-20 and its associated methyl group (deacylation) to give cobalt-precorrin-5B [2]. The N-terminal of the enzyme is conserved in this family, and the C-terminal and the mid-sections are conserved independently in other families, CbiG_C and CbiG_mid, although the distinct function of each region is unclear. 25.00 25.00 25.40 26.30 24.90 24.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.57 0.72 -4.28 150 1206 2009-09-10 21:45:10 2008-12-09 13:07:16 3 21 1181 2 319 1016 221 83.40 43 18.54 CHANGED htshlpphap...p.hcsllhlhAsGIsVRtlAPllp...cKpp.DPAVlVlD-sGpalIsLLuGHhGGANpLApplAphLGupsVITTAo- ............h..phlpphap...p.hcslIhlhAsGIsVRsl.....AP......l.lp...sK.ts.DPAVlVlDEpGpaVIsLLuGHhGGANpLu+plAth...L.....Gu.sPVITTAoD..... 0 108 216 280 +11593 PF11761 CbiG_mid Cobalamin biosynthesis central region Enright A anon Enright A Domain Members of this family are involved in cobalamin synthesis. The gene encoded by Swiss:P72862 has been designated cbiH but in fact represents a fusion between cbiH and cbiG. As other multi-functional proteins involved in cobalamin biosynthesis catalyse adjacent steps in the pathway, including CysG, CobL (CbiET), CobIJ and CobA-HemD, it is therefore possible that CbiG catalyses a reaction step adjacent to CbiH. In the anaerobic pathway such a step could be the formation of a gamma lactone, which is thought to help to mediate the anaerobic ring contraction process [1]. 22.70 22.70 22.80 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.90 0.72 -3.75 143 622 2009-01-15 18:05:59 2008-12-10 15:34:47 3 14 614 0 166 505 48 88.40 20 21.68 CHANGED hhulDhhupt.hGapl...........................sshpshpplsutllsGctl..tlht-........................hh.thphhssp.stp................................................stlhlo....................thttstlhh+P ..........AlDhlApp.hshph.........................................................................................csh.ps.sKplsuhlVstcpVulh.-................................th.ht..h..h..h..p......p.s...phc................................................shlhlo.................h...........h.............................................................................. 0 63 122 149 +11594 PF11762 Arabinose_Iso_C L-arabinose isomerase C-terminal domain Bateman A anon COG2160 Domain This is a family of L-arabinose isomerases, AraA, EC:5.3.1.4. These enzymes catalyse the reaction: L-arabinose <=> L-ribulose. This reaction is the first step in the pathway of L-arabinose utilisation as a carbon source after entering the cell L-arabinose is converted into L-ribulose by the L-arabinose isomerases enzyme [1]. This is a C-terminal non catalytic domain. 25.00 25.00 26.90 31.50 19.90 19.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.35 0.71 -4.52 38 1016 2012-10-02 11:40:13 2008-12-17 11:58:04 3 3 992 9 186 624 33 114.70 58 23.32 CHANGED sKPplEVHPLuIGGK-DPARLVFsutsGsAlssSllDhGsRFRLllN-V-sVcPppshPpLPVA+sLWcPpPsLcsuupAWIhAGGAHHTsaShulosEplpDaAchsGlEhllI ........-KPhL-Vp.LGIGG.K.-.DP.ARLlFs.sp.s.G.P.AllASLIDL.G.-RaRLLVNslDsVcss+sLPKLPVApALWKspPsL.TAuEAWILAGGAHHTVFSaALshsphcpFAEhtsIElsVI.... 0 60 122 157 +11595 PF11763 DIPSY Cell-wall adhesin ligand-binding C-terminal Wood V, Coggill P anon Pfam-B_85585 (release 23.0) Family The DIPSY domain is characterised by the distinctive D*I*PSY motif at the very C-terminus of yeast cell-wall glycoproteins. It appears not to be conserved in any other species, however. In fungi, cell adhesion is required for flocculation, mating and virulence, and is mediated by covalently bound cell wall proteins termed adhesins. Map4, an adhesin required for mating in Schizosaccharomyces pombe, is N-glycosylated and O-glycosylated, and is an endogenous substrate for the mannosyl transferase Oma4p. Map4 has a modular structure with an N-terminal signal peptide, a serine and threonine (S/T)-rich domain that includes nine repeats of 36 amino acids (rich in serine and threonine residues, but lacking glutamines), and a C-terminal DIPSY domain with no glycosyl-phosphatidyl inositol (GPI)-anchor signal. The N-terminal S/T-rich regions, are required for cell wall attachment, but the C-terminal DIPSY domain is required for agglutination and mating in liquid and solid media [3]. 25.00 25.00 71.40 71.40 23.40 18.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.72 0.71 -4.33 4 5 2009-01-15 18:05:59 2008-12-18 10:57:59 3 1 2 0 5 5 0 126.40 31 15.91 CHANGED NDsIsPSYV.Yh-oN...........G.hhASopGss-G..NlFYYDSTlcRllTCss.RPhYplhp-Ds......souFpIhKssDGp..Fphs.usssE.hcl+l.ssGplahTo..hDscsushss.pshRAssVhLpA NDSISPSYV.Yh-SN...........GauhASosGss-G.lNlFYYDSTlcRIlTCCsVRPlYplhp-Dst.....uoSFsIhKNuDGs..FTasEuusuEPhcL+lLsDGRlahTSs.hDscs.ushs-.cshRAsNVsLpA 0 4 4 5 +11596 PF11764 N-SET COMPASS (Complex proteins associated with Set1p) component N Coggill P anon Pfam-B_7375 (release 23.0) Domain The n-SET or N-SET domain is a component of the COMPASS complex, associated with SET1, conserved in yeasts and in other eukaryotes up to humans. The COMPASS complex functions to methylate the fourth lysine of Histone 3 and for the silencing of genes close to the telomeres of chromosomes [2]. This domain promotes trimethylation in conjunction with an RRM domain [4] and is necessary for binding of the Spp1 component of COMPASS into the complex [3]. 25.00 25.00 26.00 25.40 24.60 24.80 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.14 0.71 -4.35 31 286 2009-01-15 18:05:59 2008-12-18 13:08:07 3 12 199 5 190 257 0 151.20 32 11.68 CHANGED cpphsthhsL.sl..pshlhDcEDhphL+pshpch..t-hs.......NhphWsW+ppphpt.p...tp.s................spp..cpp......h.sthlpssTGsARoEGa+KIscp-KucYLPH+h+sppshpphphcspt.................................ssttp.suppsSRssRussRRhlusIsspp..h...us-SDlL ......................................................................p.pthh.Lhsh...shshDpEDhpaLphsh....cch.......h.tpc.s...s..h...hNsohWla+pt.hp.th...........................................pp..ppp......hpsthhppsTGsARoEGah+Iscp-Ks+YLspph.hspptt-p.tspspt...................................sspsp.ssppsupppRuppRRLluslssp.......usD..SDLL.................... 0 43 78 141 +11597 PF11765 Hyphal_reg_CWP Hyphally regulated cell wall protein N-terminal Coggill P anon manual Domain The proteins in this family are all fungal and largely annotated as being hyphally regulated cell wall proteins, and several are listed as the enzyme EC:3.2.1.18. This enzyme is acetylneuraminyl hydrolase or exo-alpha-sialidase, that hydrolyses glycosidic linkages of terminal sialic acid residues in oligosaccharides, glycoproteins, glycolipids, colominic acid and synthetic substrates. 25.00 25.00 28.60 35.30 22.80 24.60 hmmbuild -o /dev/null HMM SEED 332 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.93 0.70 -5.68 8 149 2009-01-15 18:05:59 2008-12-19 16:57:30 3 11 20 0 117 153 0 315.40 33 36.77 CHANGED llslhhllssshAlsITps+VDRGslsLslGDITIpuGuaWSIIDNulosFsGsLpVpsuAGLYIoSTSsLLALQVTLsShLuSIpNsGlIoFNShsSLTuosYsLlGlSFsNsGEMYLuASG.shssshulTuuoWoNsGLLsFYQNpRooGsVSLGsPhGoIsNNGQICLaNclYpQTTpIsGoGChTANpsSoIalSNsLLslsosQsFYLADSpSSlhspAlSsspTFNVYGFGNGNpIGLsLPLlu.s.hssAYsYDsoTGILTLR.sGhLuQcFNIGhGYssuhFpIVTDsusGLsSs.hGuloYSGPVPsRsLPusCQ.PCKPlPsuP ..........................................................................h..hhhhht..hh.u...h.pls.p.spl..shGsh..shsh..G.Dl.pIpsG.uhaSIl.sN...s...h....ssahGsl..pVppsuuhYIooss.s..h.l.uLpVs...l.h......s.hhts..lpNsGllsh.suh.p.ohs.ussasl.hGh..o.FpN..sG..-hah..uu..sG....hs..ss.s.hs...l...s.u.s.s.W...pNs..GLlsa.aQ..s.p.+..o.s.....Gs.....l.p.L..G.t........s.......h....s.....oI...sNsGpICLpNps.apQ.s.o.sIpG.oGClo..sspsSsl..a..l....sN..s.hh...s..lsssQshYLssspS..Sl.h............s..p.uh..u.s....sp.Ta..pVhGFG.....N.....G..N+..I...GL.......shsL..h...s......h.................ssasYsssoGILTL.......p......s..sh.........h..s.pp...FsI....GhGY-sshF.p.l.s.o.D.s.ut.Gl.sshhGu..lpYsGssPs.t..sh.Ps..Cp.sCpshPthP........................................ 0 12 44 117 +11598 PF11766 Candida_ALS_N Cell-wall agglutinin N-terminal ligand-sugar binding Coggill P anon Candida_ALS Domain This is likely to be the sugar or ligand binding domain of the yeast alpha-agglutinins. 25.00 25.00 28.10 27.70 20.00 18.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.73 0.70 -5.47 24 165 2009-01-15 18:05:59 2008-12-22 09:42:18 3 36 42 5 84 174 0 240.80 38 24.30 CHANGED sWsl.DGsssssGDTFTLpMPsVFKFpss..psolpLss....ssssYAoCsh.sG-hhssaSpLpCosssslss..spps.GolshPlsFNsGGSusssDLpsuppFsuGsNTVoFsDGspplSsoVsF-uus.......ssssshlhtuRhsPolsphphahluspCsNGYsSGs...lGaossss..tlDCSslpstIosplNDWhaPpoucs.hsaohoCousu..lhIoapN.lPAGYRPFl-uhh...sssssshphsasscYpC ......uWsl.DGossssGDTFTLsMPCVFKFpos.....psolsLss......sussYATCshpsG-.hssaSoLpCTlss....slss..stpAhGolohPlsFNlGGSusssDLps.SpsFsuGsNTVoFsDGs...pplSssVsFpsss..........ssssshl.h..tuRhhPolsc.hps.hhluspCsNGYpSGs......lGas..ss..ss....sssID.CSslclu..Io..NslNDW.aPh.oucs....hoaohoCousu..lhIoapN.lPAGYRPFlDAhl....sss..ss..shshsYpNpYsC........... 0 10 23 82 +11599 PF11767 SET_assoc Histone lysine methyltransferase SET associated Coggill P anon Pfam-B_8752 (release 23.0) Domain SET domains are protein lysine methyltransferase enzymes. SET domains appear to be protein-protein interaction domains. A subset of SET domains have been called PR domains. The SET domain consists of two regions known as N-SET and SET-C. SET-C forms an unusual and conserved knot-like structure of probably functional importance. Additionally to SET-N and SET-C, an insert region (SET-I) and flanking regions of high structural variability form part of the overall structure [2]. This domain is found in fungi associated with SET and N-SET domains. 21.00 21.00 21.10 45.10 20.10 20.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.16 0.72 -4.66 20 114 2009-01-15 18:05:59 2008-12-22 13:49:53 3 4 113 0 88 118 0 69.40 38 5.99 CHANGED aIsccaVPspssosp-lK+pL+pYsWsclhs-+oGaYIlFsshc....EAc+CapspsspthFsac.....hhMc ..FIu+paVPlhssolsHlK+RLKtasacclR.sD+TGaYIlFpsoc.G...p.EsERCacssstpthFsYp.....MsMc........... 1 15 43 75 +11600 PF11768 DUF3312 Protein of unknown function (DUF3312) Mistry J anon Pfam-B_5984 (release 23.0) Family This is a eukaryotic family of uncharacterised proteins. This family shows similarity to WD40 repeat proteins. 26.20 26.20 26.20 26.20 26.10 25.20 hmmbuild -o /dev/null HMM SEED 545 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.81 0.70 -12.64 0.70 -6.28 7 103 2012-10-05 17:30:43 2008-12-22 14:09:06 3 6 72 0 71 106 0 404.70 39 68.62 CHANGED KppasEuRGh.Wol+N...K+Pp+LRDsLKcLEEhLppppslhs+W+sphhhQLhhusGlLlp..ls..sspLp+VhhD+.LVGKLhu-pIoDAllTcoaIllSa.spNplshVpLpK.sshs....ppLEKlShhD.KIpas.I...PGsssR+lsR+LslNupQ-lslsWhtssps-V.aPWpPssp-pDRANlhlhphsus.pLElLsYhhTEhDPlsVpFSh.psaQlhTVEpplutps-...shhDsCIYEhs+sKhcRVuVTpIPLpoplsssAhsssc-KLhLGC.DuSllLaDttRtlThLs.pAslhPshluWHssuullhVusppGpLQsFDlALuslphQLluE..-stPpusLpLppaF....sss.sLsclpWu.tP.lspptcshspssshLhLhF-pGPLuslphhhGshh+GpltsstLhu.....pYI+hsQV-cAlslLtShNWsThGt.ChhuhptIsNalh+t.Lss-REsQLEtsLGoFhsPs+PLs-pThLEapD.VschsRRFFHaLLRhppFpKAF.LAlDIsstDLFMDlH.lAtshG-lsLAc.A .........................................................................................K.ph.pt...Rsh.hh.cN...+pst+L+csL+.lE-hhpppphlhspWpst....h..lhhssGllhp..ls..sspl.phhh-+.LhGKL..p.lsDshhoppallhsa.tpsplshl.hp+..t........pt.-Klu.h-.+l.hh.l...sG.s..s+phtR+LslNt.tDhh..lsWh..sp..s-s.aPWtP.h.p-p-RANlhlhthtth.pLElLs.hhTEhsP..lsspFs.hpps.plhTlEppluhct-..........s.s-.ChYEh...........s......p.s...K.hp.p.h.......ulTpl....Pltopshssshsssc-+LlLGC..D.uolhLa-............hpp..sTh...hs.ps.......t.ll.........PshluhHssuullhVusppGpLQhFDhALuslphQLluE...sh.Ppph...Lphs..phF......thp.sLhphpW..ts.h...t.t.sph..shL..hLhacpGP..luslhhhh.GhhhpG..plt..tlh.......pYl+hsph.cAlslLtuhNWsT....h.Gt.ChhshptIs.NaLh+t.LsspREsplEtsLGsFasPs+PL.-ssh.EapD..lpchsRRFFHaLLRhphatcAF.lA.DltshDlFhp....................................................................... 0 29 34 52 +11601 PF11769 DUF3313 Protein of unknown function (DUF3313) Mistry J anon Pfam-B_1303 (release 23.0) Family This a bacterial family of proteins which are annotated as putative lipoproteins. 20.70 20.70 21.50 20.70 20.60 20.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.00 0.71 -4.95 64 738 2012-10-02 17:45:13 2008-12-24 10:50:44 3 4 658 0 152 414 39 195.50 44 87.44 CHANGED lhluGCuus.sp.......uuhlss...astLppsp...sstsshpahssshc........hspYsplhl-Plph...............h.t.spsshp.lsp.psh....pplssahsptlppplupp..hpls..spP.uPsslhl+sAlTslssss.slpsh.cllPhuhlhsssptu....sGtpstssplshEsclhDusoscllutsVc+ttupsh...stsspsh.shsss+ssl-paApchsp ...........................LALoGCASKlspP-p..............YSGFLss...YScLpEoTS.AoG+PlLRWVDPsac........tSp.YD.s.IlasPlTY......................YPs.PKPoo.Q.V.uQ...psL....-clLsYsNschKcAIupR....pPLV.....TTs..GP+oL..IhRuAITGVDTSK......E....G....LQFY....EVlP....VA.LlVAGTQhA.............TG+RTMDTcLaaE..uELIDAATNKPVlKVVRpGp.GpcL.................sNpoTPh.u.h-slKpVlD-hAsDs..p.............................................................. 1 30 68 109 +11602 PF11770 GAPT GRB2-binding adapter (GAPT) Mistry J anon Manual Family This is a family of transmembrane proteins which bind the growth factor receptor-bound protein 2 (GRB2) in B cells [1]. In contrast to other transmembrane adaptor proteins, GAPT is not phosphorylated upon BCR ligation. It associates with GRB2 constitutively through its proline-rich region [1]. 20.80 20.80 22.60 21.10 19.90 19.60 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.96 0.71 -4.55 3 26 2009-01-15 18:05:59 2009-01-05 11:29:25 3 2 23 0 15 20 0 149.50 60 96.31 CHANGED MLKsCGNosVAVSVGISLLLLLVlCGIGCVWHWKHRsoTRFTLPKFLQRRSSR+KDsTKTFSLSPplIGPRHKsSVETQDHKSAuKcNNhHDNYENVEVCPPKAKGcTDKcLYENTtQSNhEEHIYGNETssDYYNFQKPSPSEsPQDEDIYILPDSY .....MLKSCGNs.sAISVGISLLL.LLVlCGIGCVWHWKH...+suT.RFTLP+FLQRRSSR.+K.ssTKTh.LuPclIG.+HKhSVcTpDH+Sus+ssslHcNYENVEsGPP+sKtcTDKELYENTpQoNFEEHIYGNETuS-YYNFQKPpsScsPQDEDIYILPDS... 0 1 1 3 +11603 PF11771 DUF3314 Protein of unknown function (DUF3314) Coggill P anon SwissProt (UPF0575) Family This small family contains human, mouse and fish members but the function is not known. 25.00 25.00 150.50 42.80 19.80 19.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.41 0.71 -4.48 3 44 2009-01-15 18:05:59 2009-01-05 12:59:39 3 2 29 0 22 33 0 150.10 65 57.07 CHANGED MFASFuFssLEE.sPLSVSHFuIGQssIc.uh+VSIFRYCsPTPYLASuhTG.LYK+MRWNVEtssEusGcGcsaDu..........SlsEYYFLCYEDThEsAc........EutsossDSsuclsRlWSIGRWVpotPpssTDDllDWlLCPlPsGsYKQLLsLG.EEPSSshATDLLVQlL ...MYASFGFVsLEETsPL....SISCFFCGRFSIS.SH.-VSIFRYCsPsAYTAS+FPRYLYKKMRWNLEsTs.E.ssupGp-.............ShVDYYFLCYRDTWE............DsGpoPAN..SCPQIQKLWSIGRWVPLGP...s.-DDLhSWILCPQPsGDYQQLLTIGFEEPSphLATDLLVQlL............. 0 1 3 7 +11604 PF11772 EpuA DNA-directed RNA polymerase subunit beta Mistry J, Coggill P anon Pfam-B_4675 (release 23.0) Family This short 60-residue long bacterial family is the beta subunit of the DNA-directed RNA polymerase, likely to be EC:2.7.7.6. It is membrane-bound and is referred to by the name EpuA. 21.30 21.30 21.60 23.40 20.50 19.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.24 0.72 -4.52 20 529 2009-01-15 18:05:59 2009-01-07 11:27:24 3 1 523 0 56 207 0 47.00 48 72.40 CHANGED lll.VllLhllsLslGLMlGYullGsG.sPhslLp.cpWpcIlshhptp ........lll.VllL.uhLsLslGLMlGYGllGcG.sPauILSPspWpcllpKFTGp... 0 14 30 42 +11605 PF11773 PulG Type II secretory pathway pseudopilin Mistry J, Coggill P anon Pfam-B_4690 (release 23.0) Family The secreton (type II secretion) and type IV pilus biogenesis branches of the general secretory pathway in Gram-negative bacteria share many features that suggest a common evolutionary origin. Five components of the secreton, the pseudopilins, are similar to subunits of type IV pili. Pseudopilin PulG is one of the secreton pseudopilins, and is found to assemble into pilus-like bundles [1]. PulG interacts with proteins H, I and J within the multi-protein complex as well as blocking extracellular secretion and reducing the amount of PulE protein as well as the amounts of PulL, PulM, PulC and PulD when G is over-expressed [2]. In Klebsiella the pilus-like structure is composed largely of PulG [3]. 25.00 25.00 28.90 28.90 22.30 20.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.44 0.72 -4.25 9 340 2009-01-15 18:05:59 2009-01-07 11:56:40 3 1 333 0 23 146 0 79.20 58 88.03 CHANGED ILLESLlAlAlLssIsoLlLoplspsRpctspphpppElLslApMAlQTpQccLolNGlsIplpcopptlhlacpucEllcl .ILLEAlVALAIFASIATLLLGQIQ+NRQEEAclLQKEEVLRVA+MALQTGQsQloINGVEIQVhuSEKGLEVYHGoEpLLuI.... 0 1 5 13 +11606 PF11774 Lsr2 Lsr2 Mistry J anon PD026347 Family Lsr2 is a small, basic DNA-bridging protein present in Mycobacterium and related actinomycetes. It is a functional homologue of the H-NS-like proteins [1]. H-NS proteins play a role in nucleoid organisation and also function as a pleiotropic regulator of gene expression [1][2]. 21.00 21.00 21.30 21.00 20.30 20.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.38 0.72 -4.14 69 720 2009-01-15 18:05:59 2009-01-07 13:29:32 3 16 402 5 243 584 10 101.80 37 66.89 CHANGED MAcKVhVp..LVDDlDG.utA-...ETVpFuLDGVsYEIDLSscNAs+LRpsLp.alssuRRlG...ut....c+ut....................sssps.pssts+cpsssIRcWARpNGapVSsRGRIPu-ll-AYctA .............................................................................................MAp+hhl..pLlDDlDG..s.A-....-oVpFuL..D..GhsYpIDLospNA.pcLRp...s...Lt.a..lttuR+su.tp.......................................ttsts...pssssptpsstlRpW....A+p.sGap......V.....SsRG..RIss-lh-AYctA........................................................................................................................ 0 94 195 223 +11607 PF11775 CobT_C Cobalamin biosynthesis protein CobT VWA domain Bateman A anon Pfam-B_10956 (release 9.0) Domain This family consists of several bacterial cobalamin biosynthesis (CobT) proteins. CobT is involved in the transformation of precorrin-3 into cobyrinic acid [1]. 20.10 20.10 20.10 20.10 20.00 20.00 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.29 0.70 -4.92 2 462 2012-10-10 16:07:06 2009-01-07 13:44:22 3 6 430 0 134 465 1894 195.60 50 32.61 CHANGED sFh+cc-tphhsssVp.llD.SGSMtGR.IplAhssA.hlAcsL-RhsV.s.IhGFTTh...Gu.shEsh.ttGhst..uhh.slh+.h.cpAsuP.hRAR..hGhhhcph.LhpNlDGEuls.htphhhGR.Ep+KIhhhhSDGAPs.......sAGshhcpHLRplhcEIEThS.l-LhAIGlhpDssR.YY+phshlsssEELGsulspcLuclh.t ...........................................saKpE+-.scF+DTVVTLLlDNSGSM.R......G......RPI....slA.AhsADlLARTLERC.G.VKlEILGFTT......+....A......W.K.G...G.p.u.REtWltt....G..+..P......t.t..PGRLNDLRHIlYK..s..ADsPWRRA....RpsLGLMh+.EGLLK.ENIDGEAL..WAapRL.huRsEpR+.ILMlISDGAPVDDSTLSsNsus..YLEc.HLRtV.I....ptIEs.....+u.sVE..L.lA..IG.I..G..H....D...V...TR...YY..p..+.AVTIscs-pLuushhppLutLF..p........................................................................ 0 34 83 104 +11608 PF11776 DUF3315 Domain of unknown function (DUF3315) Mistry J anon PD456999 Family This is a Proteobacterial family of uncharacterised proteins. Some of the proteins in this family are annotated as being putative membrane proteins. 23.30 23.30 23.40 23.60 23.20 23.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.49 0.72 -4.44 112 1010 2009-01-15 18:05:59 2009-01-07 13:51:14 3 4 799 1 169 569 18 51.00 42 38.50 CHANGED Gc+lP.spYRsc.pYslcDW+tapLssPs+Gh+Wlths.GcYlLlshuoGlIlpl ................G-plP.phYRsc.cYsIcpWp.h+.s.LPAPstG..p+WshhG..GsYVLIssssGpIlc......... 0 21 61 115 +11609 PF11777 DUF3316 Protein of unknown function (DUF3316) Mistry J, Coggill P anon Pfam-B_4718 (release 23.0) Family This family of bacterial proteins has no known function. Several members are, however, annotated as being putative acyl-CoA synthetase, but this could not be confirmed. 24.40 24.40 24.40 24.50 24.20 24.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.35 0.71 -4.31 23 216 2009-01-15 18:05:59 2009-01-07 14:36:55 3 1 189 0 28 150 0 126.10 28 67.67 CHANGED MKplhlls.....hhlhhussuhA...............shtpp.pspolpssshso+psAYsh........Ghshlpshptpos....ppLstcLplhsp..hshpsh+lpD.upVoVpchhpss...............GplpYpshlclcac.....................YphRsuN ..............................................................Kphllhh.....hhlhhu.ssuhA...............th..t.pscol.p.us.hhsoppphY.us........Ghspl.ph.h.Pp....phhup-Lpl..hpp...hs.+s.s+lps..spVs.VQphhpts...................GhlpYpshl+hsap................................YphRpoN............................................................. 0 4 14 24 +11610 PF11778 SID Septation initiation Wood V, Coggill P anon Pfam-B_41015 (release 23.0) Family This family is required for activation of the spg1 GTPase signalling cascade which leads to the initiation of septation and the subsequent termination of mitosis. It may act as a scaffold at the spindle pole body to which other components of the spg1 signalling cascade attach in pombe [1,2,3]. In S.cerevisiae it is both required for the proper formation of the spindle pole body outer plaque and may also connect the outer plaque to the central plaque embedded in the nuclear envelope [4]. 25.00 25.00 27.80 25.80 21.30 18.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.72 0.71 -4.27 9 50 2009-01-15 18:05:59 2009-01-08 14:46:36 3 1 38 1 31 52 0 130.50 28 20.46 CHANGED slpplpGNssphlT.-sltpph.phs+Ep..YssLtl-plDplshVphQNhlKslllhLcIPasKLpp+lPLluIpLpaEpphLhpFANpLHhhlYsc.lshKphTp.Ahs-alps+shtplcHPLc.CL-sLacpltpp ........................htpl.Gssuptlshcsltpsh..clspcp...YspLtl-plDslshlpLQNllKslllhltIPatplhcplPhlsIpLphEhtlltpFsscLHhplYs..pphshK...p...hss...Ahppahps.pp.hsplcH.Lc.CLptLacplh..t................................... 0 6 18 30 +11611 PF11779 DUF3317 Protein of unknown function (DUF3317) Wood V, Coggill P anon Pfam-B_3618 (release 23.0) Family This is a short family of proteins conserved from fungi and plants to human. One each of the human and mouse members is annotated as being androgen down-regulated protein expressed in mouse prostate, with a potential signal transduction function, and all appear to be membrane proteins. 25.00 25.00 25.00 25.50 24.20 24.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.71 0.72 -4.53 27 282 2009-01-15 18:05:59 2009-01-08 14:49:58 3 3 177 0 194 250 0 56.70 29 53.89 CHANGED thtcalphhhapYplThulYhls.PhE+hlhNshl....hhllthllhuhhha....lPspltthhp ........h..hpalphhhYpYplshulYML-.PWE+hlF......Noll....lslluhlla.ssala....hPpalhhhh.t................ 0 44 78 133 +11612 PF11780 DUF3318 Protein of unknown function (DUF3318) Mistry J anon Pfam-B_1341 (release 23.0) Family This is a bacterial family of uncharacterised proteins. 22.30 22.30 24.30 23.80 21.50 21.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.67 0.71 -4.63 18 198 2009-01-15 18:05:59 2009-01-08 14:52:58 3 1 158 0 68 175 98 138.50 29 75.94 CHANGED hp+LpshhPsuhRhplhls.Apscptpllpuhh.h....sptphhlshshWhshshspRslhhh+psuhhps.......ppa.tlshh.ulslsGhhssslphsptcsltlslAuGLuuhushplWppspu...spsplsADp.tAlchAscpGY .......h.pcLpshhPhtlRp.lhlhtusspp.pLhpstt.hR.....splphhhsLpha.shs.hspRsLhhh+p.suplps..................spaPh.uh.h.uhlhhGhstulhchhspcuLthshAlGLuuhtuYRlWppspu....ctshtADt.tA.t.u.c......................... 1 11 37 56 +11613 PF11781 RRN7 RNA polymerase I-specific transcription initiation factor Rrn7 Pollington J anon Pfam-B_4705 (release 23.0) Domain Rrn7 is a transcription binding factor that associates strongly with both Rrn6 and Rrn11 to form a complex which itself binds the TATA-binding protein and is required for transcription by the core domain of the RNA PolI promoter [1,2]. 23.30 23.30 23.30 23.30 23.20 23.20 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -8.02 0.72 -4.45 25 186 2012-10-03 10:42:43 2009-01-08 17:32:32 3 6 161 0 119 197 6 34.60 32 6.39 CHANGED hphh.cs.t..Cuh..Cput....hhhsc-GhaaC.pCGsht- ..........h.ct....Csp..Csup....hhhtc-GthaC.psGphhE... 0 24 50 90 +11614 PF11782 DUF3319 Protein of unknown function (DUF3319) Coggill P anon Pfam-B_4745 (release 23.0) Family This is a family of short bacterial proteins, a few of which are annotated as being minor tail protein. Otherwise the function is unknown. 25.00 25.00 28.40 46.60 19.40 22.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.06 0.72 -4.18 9 129 2009-01-15 18:05:59 2009-01-09 10:19:37 3 1 128 0 20 64 2 77.90 55 71.29 CHANGED MtpslY.RGa.LpsusussshW+spIKs+llpGsLsAVKKSIDWWhDTuolIDP+EFsulsppp...tsouspoEsapGapIKNDTGEPNtW .Mth.hY.pGF.Lpsstspsp.WplpI+sp.hsGsLuAVKKSIDWFCDTASIIDPKEFpSlGpK+...psuuuspEpFNGaTIKNDTGEsNcW..................... 0 2 5 14 +11615 PF11783 Cytochrome_cB Cytochrome c bacterial Mistry J, Coggill P anon Pfam-B_4681 (release 23.0) Family This is a family of long bacterial cytochrome c proteins, found in Proteobacteria and Chlorobi families. 21.00 21.00 21.10 22.90 19.30 20.80 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.46 0.71 -4.70 41 134 2009-01-15 18:05:59 2009-01-09 14:18:37 3 13 96 1 78 132 23 171.90 31 31.52 CHANGED KhhWDWSpAGp..........................p.....t.sp.sahppKGsF....patc.sltPpYtWaNG.phphhhhs-tlc.s.............sps...ltlstPhGshsDst.u+IaPFKlapG+QPYDtptppllsscha..G.................tuhWss.....aDaspAlptGhc.............................................ts......Glta.....SGca.....................................sFlcTphaaslsHhVuPK-cA.L..pCs-CHspsuc .........................................ch.WDWupAGp.t................t....................tt.sp.sY.thKGsFtatc.slhPtYtWaNG.phph.hhhs-ths..s...............sts....splstPhGshsDsp.u+IaPFKlapGcQsaDttpppllss..cha.................t...suaWps.....aDaspAlptGhc...................................................ts......Gl.a.....SGca.................................................................................................sFlcTphaaslsHhVuPKc.cA.L..sCs-CHssss.... 0 23 48 71 +11616 PF11784 DUF3320 Protein of unknown function (DUF3320) Coggill P anon Pfam-B_4770 (release 23.0) Family This family is conserved in Proteobacteria and Chlorobi families. Many members are annotated as being putative DNA helicase-related proteins. 25.00 25.00 25.20 26.00 22.10 24.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.52 0.72 -4.44 41 216 2009-01-15 18:05:59 2009-01-09 14:23:31 3 10 211 0 81 219 9 51.20 28 3.04 CHANGED ssthhpts.pshLtphlpcllphEGPlpcshLscRltpuaGhpRsG....sRIppt .........t.hptp.tshLtphlppllcsEuPIppshLspRltpuaG.lpRsG....s+lcp........... 0 28 46 61 +11617 PF11785 Aft1_OSA Aft1 osmotic stress response (OSM) domain Mistry J, Finn RD, Wood V, Wahls W anon Manual Domain This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The OSM domain has been shown to be involved in the osmotic stress response [1]. 21.60 21.60 22.00 22.00 20.80 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.01 0.72 -3.43 10 122 2009-01-15 18:05:59 2009-01-14 09:32:12 3 6 114 0 86 120 0 51.10 40 9.37 CHANGED sosu-.sspuLAPPsRsusptp.......ssTPDYFuulps..shuLEPNPFEQSF......Guuss-......TP ...................................s.............s.p.s.............ssss.shhst.ts.....shsLEPNPFEQSF.........Guss.-...s................................ 0 23 45 72 +11618 PF11786 Aft1_HRA Aft1 HRA domain Mistry J, Wood V, FinnRD, Wahls W anon Manual Domain This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The HRA domain is involved in meiotic recombination. It has been shown to be necessary and sufficient to activate recombination [1]. 25.00 25.00 25.00 26.50 24.60 23.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.77 0.72 -3.72 5 90 2009-01-15 18:05:59 2009-01-14 09:44:44 3 6 87 0 65 91 0 79.20 61 14.87 CHANGED PuuusP..asW.usuSLRoG....PLSPAMLsGPTG......SsDYFSssu+hpu........GFPTPNESSLRTGLTPG.GuG.............SMFPAPSPNTQAlLsp ............sso..suss..asW.........ssSLRoG....PLSPAMLuGPst......ssDYFssht+..............GFPTPNESSLRTG....LTPG...GuG........................SMFPA.P.SPNoQAlht................................................. 0 16 33 52 +11619 PF11787 Aft1_HRR Aft1 HRR domain Mistry J, Wood V, Finn RD, Wahls W anon Manual Domain This domain is found in the transcription factor Aft1 which is required for a wide range of stress responses. The HRR domain is involved in meiotic recombination. It has been shown to be necessary and sufficient to repress recombination [1]. 25.00 25.00 34.60 33.50 19.20 22.40 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.47 0.72 -2.93 9 76 2009-01-15 18:05:59 2009-01-14 09:46:02 3 4 72 0 57 73 0 75.40 49 14.56 CHANGED uATPuTI-FHRTAlsAA...................cpss..usTSpP.pstsptsp....sshshpPsps....PFs.HDssDAANGLaMLApGs ....GATPoTlDFHRTAlsAA.........................pp.....s.....t.............u...sT...SpP.p-.sst.hpt......sph-hcssss.....PFs.HDssDAANGLFMLApGt. 0 9 26 45 +11620 PF11788 MRP-L46 39S mitochondrial ribosomal protein L46 Wood V, Finn RD, Coggill P anon Pfam-B_1897 (release 23.0) Family This is the L46 subunit of the mammalian mitochondrial ribosome, conserved from plants and fungi. 21.90 21.90 22.70 22.10 20.90 21.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.41 0.72 -3.40 37 276 2009-01-15 18:05:59 2009-01-15 13:02:55 3 6 247 0 192 272 0 110.20 24 36.02 CHANGED hplpuullLpR.PllotchsshEppa..a.........pcL.c+l.hpasphhhachsohs-h.ca.t.ptp.hppp.t..h..sh.ph.tpphpcp.cpplt.........................................pPssRlT..-AD ..........................clhuullLpR.PllopplsshEppa............pcL.c.plphphsth..achts.t-......ch.....ptph.h.pp...p..............s.tth.h..hp.......pht.p.c.thtt..............................................................pPtsRhTcAD....................................................... 0 54 98 153 +11621 PF11789 zf-Nse Zinc-finger of the MIZ type in Nse subunit Wood V, Coggill P, Finn RD anon Pfam-B_1696 (release 23.0) Domain Nse1 and Nse2 are novel non-SMC subunits of the fission yeast Smc5-6 DNA repair complex. This family is the zinc-finger domain similar to the MIZ type of zinc-finger [1]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.23 0.72 -4.45 22 399 2012-10-03 15:03:13 2009-01-15 16:14:09 3 17 271 2 265 1659 117 59.30 29 18.81 CHANGED D-ltlpp..sphsLpCPlThpshcpPVps+.........pC.sHsFE+puIhphl.....................pptppscCPl.uCu ........................................-l.hpt....t....hshpCP.lT...tt.......h...c...p...P..Vpsp........................pC...sH..s..a.-+.c.uI..hphl.............................................p.p..p..pp.h..pCPhhGC............................................ 0 102 155 226 +11622 PF11790 Glyco_hydro_cc Glycosyl hydrolase catalytic core Wood V, Coggill P, Finn RD anon Pfam-B_1680 (release 23.0), IPR013781 Family This family is probably a glycosyl hydrolase, and is conserved in fungi and some Proteobacteria. The pombe member is annotated as being from IPR013781. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.93 0.70 -4.94 51 454 2012-10-03 05:44:19 2009-01-15 17:13:44 3 34 246 0 280 584 106 211.60 20 50.29 CHANGED Gluas......sss.........spth....ttsusloWh.YNWs.hssushsst.........EFVPMlWGsp.st.......shhsslpss..............tsspalLuFNEPDh..ss...QushsP.psAAphahphhpP........tss+lsuPul..ssu..........sh......WhppFhss...Cs..................sC......plDalulHaY.......ss.sss.hpsalsphhstas........cPlWlTEFu..t..........sssssppp....pppFhppslsal-sp...shVpRYuaFs.............s..ssssstLlstp......G.sLTslGphY ....................................................................................................................................................................thsWh..Ysat.......................palPh..has.t..........t.htt.h.th.....................t.phlh..sa...NEPDh....ss..........tush.ss..ttAstha...phh.t.s.................thpls....uPuh..sss.........shs.........W.l.p..p.Fhps...sp.....................ss......phDalslHaY..............................tss..hps...h..hs...hl...pph....h...s...t.as......................cPlWlTEau.......................................stsstpp.............ttpa.hppshs..hh.-t..t............shV.t+Y.uaFt..........................hhp...t...........u...tho.hG..a.................................................................................. 1 99 179 242 +11624 PF11791 Aconitase_B_N Aconitate B N-terminal domain Bateman A anon Pfam-B_2605 (release 10.0) Domain This family represents the N-terminal domain of Aconitase B. 21.70 21.70 21.70 25.90 21.50 20.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.91 0.71 -4.40 94 1508 2009-05-07 16:09:31 2009-05-07 17:09:31 3 5 1476 2 324 1082 761 149.10 63 17.65 CHANGED sY+pHltER.us..GlPPhPLs.A-QTupLlELLKsPPsu.-cc..................................aLL-LLppRVPPGVD-AAaVKAuFLsuIspG-sssslIottcAlcLLGTMhGGYNlpsLl-hL....cDsc.................lAstAAcsLppTLLlaDA.FcDVt-hucs.NsaAKpVlcSWAcAEWF .....tYR+HVtER....A.A...GIsPhPLs.ApQsAsLVELLKN.PPsG...EE-..................................FLLDLLpNRVPPGVDEAAYVKAuFLAAlAKG....-spSPLlo.........sc+AlELLGTM.GGYNIcPLI-hL....DDsc..................LAslAAcALp+TLLMFDs.FaDVpEKAKAGNta....AKpVlQSWADAEWF..... 0 75 179 264 +11625 PF11792 Baculo_LEF5_C Baculoviridae late expression factor 5 C-terminal domain Bateman A anon Pfam-B_5141 (release 7.6) Domain This C-terminal domain is likely to be a zinc-binding domain. 20.30 20.30 20.30 20.30 20.20 20.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.38 0.72 -4.57 24 63 2012-10-03 10:42:43 2009-05-07 17:12:19 3 2 61 0 0 99 2 42.90 50 16.91 CHANGED pLpslNGholct.C.pHcal..shE+QhRAGDEhVSFI+YCphCshh ................L.slsGhoL+t.C.pHcFl..T.lE.+QhRAGDEhVSFI+YCphCsh.h. 0 0 0 0 +11626 PF11793 FANCL_C FANCL C-terminal domain KOGs, Finn RD, Coggill P anon KOGs (KOG3268) Domain This domain is found at the C-terminus of the Fancl protein in humans which is the putative E3 ubiquitin ligase subunit of the FA complex (Fanconi anaemia). Eight subunits of the Fanconi anaemia gene products form a multisubunit nuclear complex which is required for mono-ubiquitination of a downstream FA protein, FANCD2. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.86 0.72 -4.01 10 241 2012-10-03 15:03:13 2009-05-07 17:18:36 3 10 152 2 176 501 25 65.60 32 9.50 CHANGED tph-CGICaAYRL.........sGplP-hsCDNP+Cup.FHpsCLhEWL+oLhsSRQSFslhFGpCPYCsc.lolcho ....................-CsIC....a.uh.h.h..............................ss..sl..P.c...hs..C...cs.pCsphFHpsCLhc...Wh...pu.....t....s...o...p...p..........h.........h..G...p...CPhCpp.lt....h.................... 0 50 77 128 +11627 PF11794 HpaB_N 4-hydroxyphenylacetate 3-hydroxylase N terminal Bateman A anon Pfam-B_3148 (release 6.5) Family HpaB Swiss:Q57160 encodes part of the 4-hydroxyphenylacetate 3-hydroxylase from Escherichia coli [2]. HpaB is part of a heterodimeric enzyme that also requires HpaC. The enzyme is NADH-dependent and uses FAD as the redox chromophore. This family also includes PvcC Swiss:O30372 may play a role in one of the proposed hydroxylation steps of pyoverdine chromophore biosynthesis [1]. 30.00 30.00 30.50 43.10 29.10 29.60 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.52 0.70 -5.00 80 1009 2009-05-07 16:38:44 2009-05-07 17:38:44 3 4 755 14 223 742 285 248.40 39 52.63 CHANGED TGc-Yl-SL+.ct+..pVYltGE+V.t.-VssHPsh+sslpuhAphYDht...a...........cschp-......hhThhospsup.tssphaplscot-DLht+pchhcthuchotG.hh..sR..usDhhsuhhsshtst..chaus.............atcshpcahchsp-pDLhhstAhssPpsDRu...pss......pps.Dlhl+VVccs-c..........GIlVcGAKhhsTuushocplhlhshts......h.stt-...c-aAlsFulPhssPGl+hlsRpuhpsstt....ssaD.Pluu+a-..EtDulllFDcVhVPWE+VFh .................TGp-YlcSLp..-s+..clYlhGE+Vc..DVosH........P......uFRsuhtolAplYDhh.a..........csph..p-..........hhshs..ospsut.hsp+.a.FchscSs-DLhppRcuhtpWu+ho.h.G.hhGR....oPDahsuhhsshtsss.thaGp.............atpNhcsaYpchp-ssLhhsHAllsP.hDRp...css.........pcstDlal+l.cEs-s..........GIlVSGAKslATsusloc..hlh.sss.........hhscs....tDaAlhFssPhDu.GlKlIsRtuap.sssh...tosaDaPLSSRF-..EpDAllVhDcVhIPWEpVhl.................... 0 83 142 186 +11628 PF11795 DUF3322 Uncharacterized protein conserved in bacteria N-term (DUF3322) COGs, Finn RD, Sammut SJ anon COGs (COG4924) Domain This domain, found in various hypothetical bacterial proteins, has no known function. The family represents just the N-terminus. 21.60 21.60 21.60 22.00 21.20 21.50 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.05 0.71 -4.65 30 182 2009-09-11 10:44:22 2009-05-07 17:43:26 3 3 174 0 63 162 20 178.50 24 46.76 CHANGED PsDl+tpl.pRhWccuplhtuhhts..pshaPhpLsL+sPsupplupchs.pVRcWl.pplpss........ups.......clcW+plsaRh.hGpsplPsclhlcohpcAlsllG+p.....pphpp.acthlphspsphPpLlshltpcPhpsl..chustW........spllsllcWhppHscsGlYLRplslsGV.coKFlE.p++ulLscLLDl .................................................plttpl.t+.apptphhtthhhs.......tsh.aPh.p..lslthP.sstthstphp..sl+pal.pshpph...........utt......plcWcphph+h..hu.ssplPsclhlss...s....phlshhGcp......tchpp..hp....phhtt.httphs.t...Lht.h..h.........thh....phtpth................thtpllsllpalpt.p..ss.sGh.h...lRplslsGl.DoKalE.p+pullspLls.................. 0 22 46 60 +11629 PF11796 DUF3323 Protein of unknown function N-terminus (DUF3323) TIGRFAMs, Coggill P anon TIGRFAMs Family Proteins in this entry are encoded within a conserved gene four-gene neighbourhood found sporadically in a phylogenetically broad range of bacteria including: Nocardia farcinica, Symbiobacterium thermophilum, and Streptomyces avermitilis (Actinobacteria), Geobacillus kaustophilus (Firmicutes), Azoarcus sp. EbN1 and Ralstonia solanacearum (Beta-proteobacteria). 25.00 25.00 28.50 27.90 23.70 19.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.32 0.70 -5.02 17 132 2009-05-07 16:45:11 2009-05-07 17:45:11 3 5 126 0 50 132 0 205.00 23 50.04 CHANGED sphsGslpLss.ot.--+puLusLhGRsht..stshplslsch-ss.Lpss...thsshsLtcsL-thhGshlpppppctttppphtphaht.....t.p.hht......psttttWhpplhsttt.........lpphhtss.ctstpllppsspsLttL........s......th..lulhAuplsG..DsHuhDsspssupLlLpAL..............httsptsssss-t..................+pplatpsGllhD-lSsssLshulhshs ...................................................................................................................hsGtlpLp.shst.pE+psluthhG..+sht..tpphplslsph-ps.Lppo....thtshsltplLpthhG...l..p.p.pccpttp.ttppptaht............th.th.ht......pshh..tp.Wlppl.hptth..........ltphh...tps..pt.hp..phlthshpslsp.L...........................Ph.t......p...LslhAspls.u..DsHuhDpsp..supLllpuL..................thhhst.p..s.psucp..........................ppcLahpsGllh.D.-lSshlhshsLhs..t..................... 0 22 39 46 +11630 PF11797 DUF3324 Protein of unknown function C-terminal (DUF3324) Moxon SJ, Bateman A anon Pfam-B_7106 (release 9.0) Domain This family consists of several hypothetical bacterial proteins of unknown function. 24.10 24.10 25.10 24.20 23.90 23.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.67 0.71 -4.46 23 905 2009-05-07 16:47:03 2009-05-07 17:47:03 3 3 246 0 66 605 3 142.20 31 41.83 CHANGED sspslsIpNcYuYsIullLppsssp...lpPcLcLscVpsuphNt.+ssltAsLQNspsshlsplsl-ucVhppsscchLapscccslphAPNSsFsa..slsh.psp.......pLcsGcYpLchsspuscp..................pWcas+cFTIsucpAKclNcp ......................p.ppuhulpNcY..u..YslulhLpps.cpt.......lpPpLpLtcVpssphNu..+sslpsslQNspsshlsplplpuplhccspp.cslhpppppshphAPNSsFsa...slsh.ptp.........................tlcsG.pYplphpsp..sspp....................pWpap+.-FTIsscp.AcclNp.................................................................................................... 0 26 53 57 +11631 PF11798 IMS_HHH IMS family HHH motif Bateman A anon Pfam-B_1349 (release 2.1) Motif These proteins are involved in UV protection, eg (Swiss:P07375). 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.25 0.73 -7.56 0.73 -3.60 92 4693 2012-10-03 02:11:09 2009-05-07 17:52:39 3 18 3144 158 842 3371 466 32.10 35 7.97 CHANGED .psh.tphl..tsl.slpcl.GlGpphtpp.Lpp.hGl .........tch..phl....tsL.PlpclaGlG+toscK.Lpp.hGI........... 0 246 500 691 +11632 PF11799 IMS_C impB/mucB/samB family C-terminal domain Bateman A anon Pfam-B_1349 (release 2.1) Domain These proteins are involved in UV protection (Swiss). 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.64 0.71 -4.20 106 7945 2009-09-16 12:11:34 2009-05-07 18:00:12 3 45 4183 220 2026 6312 2268 123.60 19 27.86 CHANGED sp.lp...................pttt+olupppoh.st..sh...pshpplp......th.ltplspclsp+Lp....ppphh...............spslslp................l+........ht..........shp...phsc....phslsh.ssss.spp....lhph...shplh.......tph.hp.....t.....lch.lGlphsplt.....ptsptphsla ...........................................t............pt.pKSlusppo..a.sc.....cl.....pshp.phc.........th.l.p.p.ls.....pclsp+Lc...............ppp..hh......................................scpl..s..lp.................l+.............hs.............................................................................sap........s.ho+................phs..l......t...s....o....ps...sps............lhph........Ahplh.............tch.hp.............sts........lRh..lGlp..l..spl..........t.................................................................................................................. 0 644 1225 1687 +11633 PF11800 RP-C_C Replication protein C C-terminal region Finn RD, Bateman A anon Pfam-B_4463 (release 6.6) Family Replication protein C is involved in the early stages of viral DNA replication. 20.80 20.80 21.50 20.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.29 0.70 -4.62 53 475 2009-05-07 17:07:56 2009-05-07 18:07:56 3 4 187 0 99 454 17 181.40 26 51.77 CHANGED GsWpslpppapslhsplP.Rpsshsp..LcslhsclphLpp-ltshLctttp................spp.suN........-sps-+HhQNSsP-..uh.E.E......Ps..ppppt...........................spss..psppspspPh..............................+sh...PLuhVLcACP-ItsYussG.It...sWRDLhsAAshVRsMLGlSPSAap-AscsMG.csAAlslAsILpR..uspI.sSsGGYLRsLTcRAppGcFSlGPMlMALL..+sputs ..................................................................................................................................Wtthpthattlh.ttls..R..p...sshtp..Lt.lhsphpt.l+...ppl...phLctp.p.......................spphssN..........tspscp.phpsSpsc.....th.-.p..st...ppppt.....................................................................................................h...sl..shlhp.uCPpht.t...as.....s..t...ht........tWp-lh.tsA..hlR.hlGls.psaptAtphhG.ptAuhsluhl.hp+..........htpl..posGGYLR.hst+uttGthpht..hhuhhtt...s.............................................. 0 12 45 63 +11634 PF11801 Tom37_C Tom37 C-terminal domain Wood V, Coggill P anon Pfam-B_30563 (release 22.0) Domain The TOM37 protein is one of the outer membrane proteins that make up the TOM complex for guiding cytosolic mitochondrial beta-barrel proteins from the cytosol across the outer mitochondrial membrane into the intramembrane space. In conjunction with TOM70 it guides peptides without an MTS into TOM40, the protein that forms the passage through the outer membrane [1]. It has homology with Metaxin-1, also part of the outer mitochondrial membrane beta-barrel protein transport complex [2]. 27.00 27.00 27.10 27.20 26.90 26.80 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.11 0.71 -4.39 17 336 2012-10-03 01:14:49 2009-05-07 18:10:42 3 12 206 0 199 332 0 110.30 24 36.99 CHANGED Ahhuhlpp+lpslhpYsLYlsscNYpphTR+hauphl.FPh.a.tP.ph+spApcpschlsl............................ptppp.ppc.sptsspls.Shhpchpth......Kppppp.l+p.t.shchhshLpchLsph................phlhuss.....oSs-hLhhualhlhhh.pLPss.hlhsaL+ .........................uhhuhlcpch.shh.ashalsscNY.phT+.hauphhsFPhpahhPsphpptthp+hthhth.............................................................................................................................................................................................................................................................. 0 38 77 135 +11635 PF11802 CENP-K Centromere-associated protein K Wood V, Coggill P anon manual Family CENP-K is one of seven new CENP-A-nucleosome distal (CAD) centromere components (the others being CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S) that are identified as assembling on the CENP-A nucleosome associated complex, NAC. The CENP-A NAC is essential, as disruption of the complex causes errors of chromosome alignment and segregation that preclude cell survival despite continued centromere-derived mitotic checkpoint signalling. CENP-K is centromere-associated through its interaction with one or more components of the CENP-A NAC. 22.90 22.90 23.50 23.10 22.60 22.80 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.86 0.70 -5.16 5 82 2009-05-20 17:23:14 2009-05-20 18:23:14 3 3 60 0 52 68 0 210.10 40 81.50 CHANGED MSt.pp-L.Ps..pDssssh-scEELL+ECEslWK-ME-CQSKLoLlGsETLs-SDAQLSLLlMQhKsLTAElcQWQKRsP...................EIIsLNcDVLL..sLGKEElQKl+pDLEMVLSolQuKNEKLKEDLEREQpWLDEQQQIl-oLsslpcElKNpVsThSESRIFsELpsKhhclKEaKEKLLosLG-FLEEHFP..LP-cptSscKKRKupp.-sSlQLITLH.........EILEILIN+LhslPHDPYVclcDSFWPPYlELLLRsGIALRHPEDPoRIRLEAFHQ ................................................................................................thpt.ll.ppCEp.at.hp..c..hQpcl..h.tsEshs.....pps....t...lhh.phps..LpuEhpphpccpP...................c.hhs.s..tlLh..tlu+cchp+lpppLEhlLSshpuKpccL+csLcREQpWlpEppplhp....ulp.h.pclp...p.ph.pho-.p.phh.p.....-hppK...hhphc......phcccLh.tLuchL-cHaP......L..Pct..sspKK+...+s..hp..csssp...hhslc..................-hlE..hLlN+hhcsP+...D...PYVpIsc.oaWPPYlEhLLRsGIAlRHP-Dss+IRLpsF................. 0 14 20 32 +11636 PF11803 UXS1_N UDP-glucuronate decarboxylase N-terminal Coggill P anon Pfam-B_36254 (release 23.0) Family The N-terminus of the UDP-glucuronate decarboxylases may be involved in localisation to the perinuclear Golgi membrane. 20.50 20.50 20.90 20.70 20.40 20.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.53 0.72 -4.22 5 56 2009-06-11 15:38:14 2009-06-11 16:38:14 3 3 38 0 26 47 0 67.40 63 18.26 CHANGED Mlppthppll.oGlNRRMMKlLlALALIAYIASVWGsYlNM.RSIQEsGElKIE....QKI-EsVuPLREKIR-LEpSFTQKYP .............pt...hlh.suhNR..hhKllhulAhhA.YlAoVWG.....NFVNM.....RSIQENGElKIE....SKIEEhVEPLREKIRDLEKSFTQKYP....... 0 2 3 9 +11637 PF11804 DUF3325 Protein of unknown function (DUF3325) Assefa S, Bateman A anon PFAM-B_2004 (release 23.0) Family This family of short proteins are functionally uncharacterized. This family is restricted to Alpha-, Beta- and Gamma-proteobacteria. 26.80 26.80 27.70 27.40 25.40 26.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.39 0.72 -4.10 44 248 2009-06-16 09:39:29 2009-06-16 10:39:29 3 2 191 0 82 247 7 104.10 29 92.30 CHANGED hlsshhL.uauuasuLuLAMp+Hacplh.s+ssss.sptphL..R....hhGWhhLslohhhsltshGh...uhGslhWhGhlohuulllll.hLsYp.......P+...hlhhhu.hsuslhu.........slhhhh ..........lhshsL.sasuassLuLuMs+Hacpl.........h..s+s..ss...tpp..phL..R.....hhGahhLslulhhsltspGh...uhGsVhWhGhLohuAhllsh...hLsap.......PR....hhh.hu.shsslhs.hh...h................................. 0 13 33 62 +11638 PF11805 DUF3326 Protein of unknown function (DUF3326) Assefa S, Coggill PC, Bateman A anon PFAM-B_2030 (release 23.0) Family This protein is functionally uncharacterized. It is about 300-500 amino acids in length. This family is found in plants and bacteria. 25.00 25.00 56.30 38.10 23.80 23.00 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.93 0.70 -5.72 25 128 2009-06-16 09:50:57 2009-06-16 10:50:57 3 3 111 0 50 129 189 315.40 48 87.33 CHANGED hs...slL.lVPTGIGCs..................IGGYAGDALPsARLLAussssLITHPN......VlNGAsLYWsssslhYVEGYuLDRFApG-huL+PV+ppRlGLlLDAuIEsEL+pRHLQlA-AsRATLGLslsshlhTDtPLtVpLptus.SGuSWGsl-pPDuLLRAucpLh.csGAsAIAVVuRFPDD.sotthptYRpGpG.VDslAGAEAVISHLlVRcLplPCAHAPALuPlsl.....sspLDPRuAAEElGaTFLsCVLlGLSRAPclls........sstspssslpus.plsAVVsPtGAlGGpuVLAsh-+...sl.P.lIuVtpNsolLpVoscsLGls..........lltlssYhEAAGlllAlRcGls.suLpR ...............hsslLIVPTGlGAt..................IGGaAGDALPlA+hluulsDpLITHPN......VlNGApLYWs.sNshYVEGYuLDRFAtGphuLpPV+p..N+lGLlLDpuIEs-Lt.RHlQsA-AsRAoLGLslschlhTDsPLplphphus.SGtShGoltsPDuLLRAscpLhppssApAIAlVuRFPDDsssthhptYRpGpG.VDslAGsEAlISHLlV+pFplPsAHAPALtPhsl.....ssplsPRuAAEElGaTFLsCVLsGLSRAPphlp...............tt.s....stsss.lhss.pVsulVlPtsAsGGpulLuhhpp...ph..s..lIsVp-NposhpssscpLslp..........shtVssYhEAhGllsAh+sGls.sulp........................ 0 10 35 46 +11639 PF11806 DUF3327 Domain of unknown function (DUF3327) Assefa S, Coggill PC, Bateman A anon PFAM-B_2060 (release 23.0) Family \N 27.10 27.10 27.40 28.10 26.10 26.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.89 0.71 -3.81 35 840 2009-06-16 13:03:42 2009-06-16 14:03:42 3 6 641 13 81 482 3 128.80 46 31.19 CHANGED pt..................hVTF............lWR......uss..............sshtlasshsultpp..........tpphpRLsuT.........DlW...................................ahohplPusaRuS................Yshhsssss.................hcsstcpthRtlhsp......upsDPLN.pshhsst........hhpttSsLcLssAsspsths ............................................................tsp.h..cVTF............hWR....s-ptS..............sl++ValhlsGVTD++psu......spsMpR.lsGT.........DVW...................................phThpLsAsaRGS................YsFlPstp-s.......................PDct.t..LRcGWRplLsp......AhADPLNspsatsGR..........GpssSsLchPpAPhQstW.s................................................. 0 13 32 60 +11640 PF11807 DUF3328 Domain of unknown function (DUF3328) Assefa S, Coggill PC, Bateman A anon PFAM-B_2062 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in eukaryotes. 25.00 25.00 25.20 25.00 24.50 24.50 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.96 0.70 -4.66 101 775 2009-06-16 13:12:53 2009-06-16 14:12:53 3 5 79 0 677 817 0 184.60 18 76.48 CHANGED ptptpppttahhh.............hhhhhhhhhhshhhhhhthhhht..t.h.t.........................................h.h.........................s.....aps.sss...-h.-puW.............tshhlstpt....h.phshs.........h....st.............shhhtlp.saH....pLHCL.....................phlRptha.ph....................thtpss.pt.....................htHhpHCl-hLRQslhCpuDsslh....s..h............................stshss...............................hss...p+pC+sa..-tlhc..............Wspcp ................................................................................................................t......................................h...h....h.......................................................................h.....h...................................p.......a.....t...ss................ph.-ts...W.............................t....hh.h.stpt...............h.ths........................ss...................thhhtlp.saH.....pLHCL...........................phlRptha.th.............................t.t..t..tt.t....................................hhHh..pHCl-hLRQ......slhCpuDssl.......sh.hht...........tt.hss..................................hss.....HpC+.s..aptlhp..ahtt......................................................... 0 107 349 527 +11641 PF11808 DUF3329 Domain of unknown function (DUF3329) Assefa S, Coggill PC, Bateman A anon PFAM-B_2082 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. 27.10 27.10 27.30 27.60 26.90 27.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.06 0.72 -3.90 33 1029 2009-06-16 13:53:43 2009-06-16 14:53:43 3 10 1017 \N 175 598 65 89.50 47 20.74 CHANGED hphsap.phltcLslhhlsslllGhlhGplshhLhlushshLsWHhhplhRLppWLapc+phs.PPpupGsWptlFsGlYRLQpRsR++Rpc .......................E.RLoWK.+LlhELlLhslP..A..hl...lG....hhh...G....a.l...P...W..h..L..L...A..u..l..s..ul..L...l...W...Haa...sLl...RLShW..LWs-R..shT..PPsG..pGuWEsLhaGLaphQhRN++RR+E................... 0 29 76 127 +11642 PF11809 DUF3330 Domain of unknown function (DUF3330) Assefa S, Coggill PC, Bateman A anon PFAM-B_2077 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. 25.00 25.00 26.70 26.60 23.70 22.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.79 0.72 -4.11 9 204 2009-06-16 14:09:36 2009-06-16 15:09:36 3 2 158 0 20 76 4 67.30 73 44.19 CHANGED sssMNssssosTSCCVCCKEIPLDAAFTPEGAEYVcHFCGLECYQRFpARA...pssscssstPs.ss.....sspP...os ................s.ssMNAN-PS.TSCCVC.CKEIPLDAAFTPEGAEYVEHFCGLECYQRFQ.ARA...pTATETslcPs.ACDS.PSs......... 0 6 10 15 +11643 PF11810 DUF3332 Domain of unknown function (DUF3332) Assefa S, Coggill PC, Bateman A anon PFAM-B_2104 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. 25.00 25.00 43.10 43.00 19.10 18.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.03 0.71 -4.61 25 244 2009-06-16 14:14:27 2009-06-16 15:14:27 3 1 223 0 47 189 10 171.90 36 95.19 CHANGED +pphhtssshshssstLoGClGphuloshlh.chNl.....puVDNRYuRuGla..hlluPVYGlsssADhllFNSIEFWoGpNPl.......stc.PulsDs.s.cshh....clNsp.lctsLscsPlshh......pphcpuphphlDspshphplshssGpptpLhGh+.psspVshYl.DGchlsh............sohspLtshtpss .........hhhhshhhhhuss..houChGphuloshlt.caNh......psVDN+as+EhlF...hlluPVYGlss.hADhhlhNSIEFWTGpNPl.......stt.ssVsDh.h..cslh....clNsp.lshphpcsshphp........+.hEpush..c.lpscshph.lshssGppph.Lhshp.ssspVshaL.sGchhss....lstptltuhhpss................................................................ 0 10 21 37 +11644 PF11811 DUF3331 Domain of unknown function (DUF3331) Assefa S, Coggill PC, Bateman A anon PFAM-B_2106 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family vary in length from 96 to 160 amino acids. 25.00 25.00 37.00 37.00 20.20 16.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.37 0.72 -4.41 24 164 2009-06-16 14:22:22 2009-06-16 15:22:22 3 2 48 0 66 160 2 97.10 39 75.80 CHANGED ttpsts.hsutssss...........thtlpllER.osoolhVpWsDss+C+YuEQpW+hshARpsGhCALSGpsIchGDsVa+Pph.R..shPsNusAMILAuslsphhst ..........................htss.........sshp........tsplpllER.SsoslsVpWs-ss+C+YGEQcWRtthAcpsGpCALSGpsIptGDsVa+Ptt.R..ssPuNusAMIhAusltt...st.... 0 1 7 30 +11645 PF11812 DUF3333 Domain of unknown function (DUF3333) Assefa S, Coggill PC, Bateman A anon PFAM-B_2108 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. This presumed domain is typically between 116 to 159 amino acids in length. 26.50 26.50 26.50 26.50 26.20 26.10 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.59 0.71 -4.23 62 456 2009-06-17 11:52:16 2009-06-17 12:52:16 3 4 432 0 116 348 598 144.50 29 36.69 CHANGED hctpl++R.tAE+RF+haGluAIslulhhLslLLsoIlupGhsAFppThlplslplstpt.l-.ps.t.......ptltstsYtsllpsAltp....hshp.tst.st+thtt.llSpsAttpLR-hlhssPpllGpThshhlhAsucl...DshhKGps.s..tpshtps ............ctthK+R.tA-+pF+hhuhhAlhluLhFLsllLsSlhspGasAFpQThlhl.lphspts.ht.........................shlttu..sh.hlhpsultc....hsss..s.sp...t.p.chsp.hlS.pptthtlcchlhts.sthlu.pspshhl.usuch...D.hhKut..........s.......................... 0 37 80 95 +11646 PF11813 DUF3334 Protein of unknown function (DUF3334) Assefa S, Coggill PC, Bateman A anon PFAM-B_2118 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family are typically between 227 to 238 amino acids in length. 25.10 25.10 25.70 45.10 22.00 25.00 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.52 0.70 -4.81 19 187 2012-10-01 19:50:22 2009-06-17 13:02:51 3 1 184 0 41 122 9 223.30 68 99.13 CHANGED hppsplloT-DILLpLCpSVopVLosATpSpVpYSuMVQ+Is+TsLKPDlGCFVLFDGGFSGLVVINFSApAAhElYppYMLsMGMPcpELAhSHTSDEVuNVMGELMNQlVGDFTuKlp+ELQTsITQNQPKMLslNKQlhLSVDTNLDcPpARRVoFpTtpNpIFYLEhAMDKTEFIpLp-FEhcEc..DPDsLltppt......ttttspssssts..ttc............................s-sDDLLcpLGl .........MpKsplVTTEDILLhLCpSVSsVLoSATsS.lpYSAMVQKIsKTSLKPDhGCFVLFDGGFoGLVVINFTAcAAlElYssYMRNMGMPE-ELAl.HTSDEVGDVLGELMNQlVGDFTNKVRKELQTsITQNQPKMLoLNKQVhLSVDTNLDRPQARRVTFoTsNNNIFYLELAMDKTEFIQL-EF-hp.E-p.sPD-ILttsp......ppp.p.spssssps..tpc............................ssusDLLDpLGI............................. 0 6 15 29 +11647 PF11814 DUF3335 Peptidase_C39 like family Assefa S, Coggill PC, Bateman A anon PFAM-B_2152 (release 23.0) Domain \N 25.00 25.00 25.20 29.10 24.80 24.40 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.17 0.70 -5.19 25 210 2012-10-10 12:56:15 2009-06-17 13:04:35 3 5 202 0 49 172 23 201.10 47 55.40 CHANGED lPaYpQTT-FTCGPAsLhMAMusLcspht.spppELplWREATTIFMTSGHGGCuPaGLALAAh+RGacsclalsssusLFlDuVRsppKKpVhpLVccsFtpplpppsVslphpshohpclcptlspGttslVLISoYRhsGcKsPHWVllouhD-calYlHDPcl-.tpppcsshDstalPIu+p-Fs+MupFG+s+.LRAAVllppp ...hPaYhQTTsFTCGsACLLMAhus..L......c....t..shp.oRspElpLWREATTIFMsuGHGGCuPpGLALAAtR......R..Gac..V-lhsssp.u....s.FlD...uVRsssKK.......pllcLVHpcFsppLsppsVshh.tssstspLcphlpp.GupsLlLI..SoYRhsu..c.K..tPHWVllouh....s-.cFhalHDPc............s-................pc.....cpsh-stalPVu+ushsphhsFG+p+.hpAsVllt.p............. 0 15 30 42 +11648 PF11815 DUF3336 Domain of unknown function (DUF3336) Assefa S, Coggill PC, Bateman A anon PFAM-B_2157 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in bacteria and eukaryotes. This presumed domain is typically between 143 to 227 amino acids in length. 25.00 25.00 27.90 27.00 24.80 24.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.68 0.71 -4.68 82 500 2009-06-17 12:07:04 2009-06-17 13:07:04 3 5 249 0 353 491 21 145.30 26 20.75 CHANGED thhcpp...hhp...........pLhcphcs........................upoYc-WhpsAtpLDch.....hGtstW+.psspss..hYDaphlppplppLcpsRtps-h.........................................ppLhhllcsslpRNhuuh...ssspLYp+oah..GTKpLI--YlcElttslphlscs....pplssp...phhthhppspps ..................................h.............php....pLpp.phcs............................Apo..Yc-WtpsAtpLDch.....hGt.stW........+.....psspos.......hYDhpllppplppLcctR...p.ps...-h............................................pplh...h...ll+s.s....lhRNhu..sl......sss.pL.Ypcoah....G.T......KpLI-cYlsE..lt..psLchlspt............pphs.p...thhthhppht................................ 0 100 200 307 +11649 PF11816 DUF3337 Domain of unknown function (DUF3337) Assefa S, Coggill PC, Bateman A anon PFAM-B_2058 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in eukaryotes. This presumed domain is typically between 285 to 342 amino acids in length. 25.00 25.00 26.00 25.30 21.20 24.60 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.13 0.70 -5.04 30 297 2009-06-17 12:16:39 2009-06-17 13:16:39 3 23 247 0 215 309 1 296.90 26 35.92 CHANGED ptshssppphhspp...........sssssshhshsps..tthpssupss.ptt.t...........stsPst.shs..tppsphssohhs.cchch....s.h.tc.ttt.pt......................ptstppspppp.hc.shuuhltpshppa..pphtssst........................sphpPs.ttcss.hh......plPscoslllpE..sstusspslaRt..plsshspct..........-hlp...cslPhWluchLLcNphP.K.-s.sKlsFhLpPa..............c.....................sthPshscs-.................................sssRLsAscMLRs+KIhsYVsE+l......sps.....pp.pspt...............................................................hcPc-aLELhCps..................pl..Lss...sMTLuTlRshlWKouuDllLpY+tpsc .......................................................................................................................................................tpp...........stttsthhsh.ps......hhhps.up.ss.ppp.pp.....................s.hPshhsh-....cpuhhshsh....cc.ph......h.s.h..........................................................ttst.sstptp..s..c.Nhsshl.phhhca....pht.ps.p................................................................................tsphp...s.ss.....pp......tss...hh..................plP.....scosllhtE..........uust.slaRh..hlpshss-t.......................-...h.........cshP.Wlh-hl..l..psp.h........P...c.....sKlsFhLpPa................................s.............sshsphh.pts................................................RLsAs....cMLpl+K.lhtaVhE...+l.....st.ps............................tp.ptst..........................................................................................................hhs--hlELhCps....................pl..Lss...sMsLtTl+palWKs..us..DlhLpY+tp..p.............................................................. 0 69 121 179 +11650 PF11817 Foie-gras_1 Foie gras liver health family 1 Coggill P anon Pfam-B_4417 (release 23.0) Domain Mutating the gene foie gras in zebrafish has been shown to affect development; the mutants develop large, lipid-filled hepatocytes in the liver, resembling those in individuals with fatty liver disease [1]. Foie-gras protein is long and has several well-defined domains though none of them has a known function. We have annotated this one as the first [1]. The C-terminus of this region contains TPR repeats. 23.30 23.30 23.30 23.30 23.20 23.10 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.44 0.70 -4.90 22 379 2012-10-11 20:01:02 2009-06-17 18:05:52 3 16 226 0 279 399 0 215.80 20 20.99 CHANGED +h.-s+hluDhlsh+I................hRhhLhsupss..sAlppaptHhp+hp-h......lsp.h.ut......t..tatuW.uR....htlhA-Llcpsshsthsshp...................................sGaahppAAc.ahhtR+phtpph.............P........pssuuplhsphahhcsh............sspsppthshsts..pthsHSt.IlshLppAhtpFpphpp...........sRhsptLsh...chAcEYh+hs.sascAhphlcsl..shsaRp-sWhsLhpchhhtL+cCAhphtDscshlpsshELhsh ......................................................................................................................................................................................................................................................ph+.hhthhth+h.................h.chhh.....tps....tAhtphptHhphhtph.........................hs.....p............t..th.sW.up....h.hhuplhpps..................................................................................................suhhh..uA..hh....+pthht.h........................................................................................................................t.h..............................................t...th..............t......h..psthhlphhp....pAhtpapphtp.........................Rhtphlhh.....phucpa.ht.ht..pap...pAhphhp.h....hhpa+.........pEtWh.tlhpphhhthhcCthhhsphtshlphshchhs.t............................................. 0 81 141 219 +11651 PF11818 DUF3340 C-terminal domain of tail specific protease (DUF3340) Assefa S, Coggill PC, Bateman A anon PFAM-B_2330 (release 23.0) Family This presumed domain is found at the C-terminus of tail specific proteases. Its function is unknown. This family is found in bacteria and eukaryotes. This presumed domain is typically between 88 to 187 amino acids in length. 28.60 28.60 29.40 29.20 28.30 27.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.63 0.71 -4.23 98 1144 2009-06-18 09:33:45 2009-06-18 10:33:45 3 5 1125 0 222 767 468 142.30 45 20.88 CHANGED ohl-st-hGEss.-NALPWDpIssApYsphsshssh...lspLpppHppRlspss-Fthlpcclphhcpp+-c.pslSLN.spRctEpcpt-pp..pLpth.......NpRhp......t.tG..p....cslpsh.........................-.........................................-h..sc-h.........tt.Ds....aLcEuspIhhDhh.......phpp .....p....tsEsGEphEDNALPWDoIc.u.As..Y.s+s...sc..ls...sh....hscLhccHpsRIAcDPEFphIhcDIAcapsh..KD+...shVSLNhutRccEssccDup..RLsRl..................N-Rh.K.........p..cG......K...s.LKcL........................D..............................................................................Dl....PKDY...............pcPDs......YLDEolpIslDhscl..p............................ 0 60 117 178 +11652 PF11819 DUF3338 Domain of unknown function (DUF3338) Assefa S, Coggill PC, Bateman A anon PFAM-B_2474 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in eukaryotes. This presumed domain is about 130 amino acids in length. 34.60 34.60 35.40 35.30 34.10 34.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.59 0.71 -4.72 8 236 2009-09-11 14:35:59 2009-06-18 10:36:45 3 8 64 0 127 216 0 132.30 48 17.22 CHANGED hEuKsclIsuSSGolluSGStsu-sS.csKKE+lpsLKcKpcsLc-+Lpt+LcELKKlCLREA........................ELTG+LPsEYPLpPGE+PPpVRRRIGTA......FKLD-..lLs.sE-stLpsLEschAlQQQIsEAA++LusEscLSKsl .................................tuKsplI.sSs.GolluSG.u...pu-s.ut.psK...+.-hltsL+p+pcsLpEpLpp+lEELK+lCLREA........................ELTGcLPtEY.....P..LcP.G...Ec.....PPpVRR.RlGTA.................FKLD-...pl...L.....s.t..tE-...stLp...pLEpchulQppIsEAA++LAs-PsluKp...................................... 0 17 30 65 +11653 PF11820 DUF3339 Protein of unknown function (DUF3339) Assefa S, Coggill PC, Bateman A anon PFAM-B_2694 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in eukaryotes. Proteins in this family are about 70 amino acids in length. 25.00 25.00 36.80 25.60 24.40 23.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.39 0.72 -3.68 25 199 2009-06-18 09:40:15 2009-06-18 10:40:15 3 5 29 0 126 175 1 66.80 52 81.93 CHANGED MsDWGPVlluVlLFVlLSPGLLFQLPG.+sRhVEFGshpTSGhSIlV.Hslla...FullsIhllAlplH....lYs .......MuDWGPVllulVLFlLLSPGLLhQlP.G..+.sRhV.EFGshpTSGhuIlV.Hulla...FuLlsIhllAlslHlY.................. 0 16 74 100 +11654 PF11821 DUF3341 Protein of unknown function (DUF3341) Assefa S, Coggill PC, Bateman A anon PFAM-B_2731 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in bacteria. Proteins in this family are about 170 amino acids in length. 25.00 25.00 25.20 26.50 24.60 24.50 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.02 0.71 -4.79 37 213 2009-09-11 12:21:10 2009-06-18 10:43:59 3 3 197 0 114 226 320 170.60 31 77.66 CHANGED tptslhA.Fscs-sLlpAs+plRt...tGa..phh-saoPaPlHGLDcAhGl.t.oplshhshhhGlsGsssuhhhphahshhD..........aPhsI.GGKPhh..Sh....PAFlPlsFElTVLhAAhuhlhuhh.hhstLs.hp+Phh.ss...+socD+Fhltlsssss...-tpchpphLcphGAhclphlp ......h..hslhA.Fsss..-sLlpAs+pl+p...tGa..chh-...saoPaPlHG..L-cA.hG....l..tco+....lsh..hshhhG.lhGhssuhhhthashhhD.................aPhsl.GGKPha..Sa....PuFlPlhFElTlLhA.Ahshllshh.hhspLs.h.t+shh.ss.....R.s.ocD+Fhltlpspss....stpphpphLcphGAhclp...t................................................................................................................. 0 54 88 104 +11655 PF11822 DUF3342 Domain of unknown function (DUF3342) Assefa S, Coggill PC, Bateman A anon PFAM-B_2751 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is found in bacteria. This presumed domain is typically between 170 to 303 amino acids in length. The N-terminal half of this family is a BTB-like domain. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.32 0.70 -5.29 9 124 2012-10-02 01:20:04 2009-06-18 10:46:24 3 4 99 0 82 233 4 273.00 41 42.62 CHANGED s.IpVpDcspNppRsFpCspcLLhocMpYFssllph.................................................................sspch............lsIpV+CDlpIFsWLMpalct.................ctPplsPsNVVSIhlSSsFLpMppLl-psLtYh+t+LsplVtousNhsClsspLlsRLusMhscs-Lst....l+Dc+schps..+lhspLIp+.LC-..sps..............................ts..uph.....suLhhCthCtpLhsppp.p+lpp.........sPu....................+hslspRGclhhoHsts+..................shsspta..lsshpcELcuWt...WRlhGuhpalhCpRCpphhslh-ho..pCphtPtshsassssup.Dstu.suhashC.tppshca- ................................................hlIHVCDEsK.shpcDFhCPpcLLlscMpYFA-hl...................................................................s.u.Q+h-E..........lDIS.VH.C.D.lpI...FsWLhpal++s...............................................................tttchPp.L..-ssNVlSILlSSpFLpM-.sLl-pClpYC..Hpp.hs.....tIVt..ossNhsClsssLloR...........lAshFoptEl-t....l+DKK.DKhpS..+LasKhIpp.Lh-sp..s...................................cu..ssh.....usLa..RCshC..tpllspphpp..plsC..........hPu....................phplsp+Gplh..hHh.RD.p.............................................................sWslppY..l..sLacEL+SWRc.....VYWRLWGshpaL.hCspCtphF.sh-hs..aCpaHscss.a.ssssp........s.sGhYsCC.sppshRa-............................................................................................................................................................. 0 32 41 62 +11656 PF11823 DUF3343 Protein of unknown function (DUF3343) Assefa S, Coggill PC, Bateman A anon PFAM-B_2956 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 78 to 102 amino acids in length. 25.00 25.00 25.10 25.00 21.70 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.04 0.72 -4.72 58 857 2009-06-18 09:51:36 2009-06-18 10:51:36 3 6 709 0 160 521 6 70.90 27 76.57 CHANGED ptphlllFpospcuhpsE+.....lLKcpsl.shcllPsP+pl.psu..C.Glulchst.pp.tptltphlpctslthpt....laphp .....................calhhFpoTstslpsc+.........................hLps..t..uh.sh+lh..slPR.cl..puG...C.Glslhhs.....s.ss.t-clphll.ttt..hpsla..................................... 0 90 133 147 +11657 PF11824 DUF3344 Protein of unknown function (DUF3344) Assefa S, Coggill PC, Bateman A anon PFAM-B_3041 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 367 to 1857 amino acids in length. 27.20 27.20 27.50 27.50 26.20 25.90 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.88 0.70 -5.02 42 118 2009-09-11 16:28:30 2009-06-18 10:53:44 3 42 17 0 94 122 4 276.20 22 40.86 CHANGED ssshsss..a.Gst.....s....Lsohtp..ssV.....pGslhhss..........hhGhssps..............spsh....slP.su........slchA+LYls.sWst.........ppsh.sphslsFNG..........ppht.........................ssts.Ysshps......assh.p....shas.YDVTs...hlss....G..psssslss...........sshcG.....plhussLlllYc.ssstsphp.YWlN-GsDhlph...............sscpssups.Fsus.ss.s..lpsAsLhohhhoustt......sslhFNu............sslhsus............................pushhshp.tasVsshlpsspNpshh.s............tsssYhpsshulLslch ......................................................s.....tt..a.us......s....lsohtp..ssl.....pGslhhss............h.uhssps...............spsh.....slP...ss......s.....slphA..+LYls.sWst.........pssh.sshslsFNG...........................pth.................................sssstYhshss...........ass.hhs.sh.shhh.YDVTshlps.....u.pssh.slss............tsshcu......plhshsLllsYp.ss.sssphp.halN-GpDhlsht.t...............tscpssups.sFsss.hs.t..tlpsApLhshhhuu.s..s.........ushhFNu....................pslhssss.................................pus.hshp.taDV..ssh...lps..ss..spshhts................tsushhpsh.uhLslp................................................... 0 50 62 72 +11658 PF11825 Nuc_recep-AF1 DUF3345; Nuclear/hormone receptor activator site AF-1 Assefa S, Coggill PC, Bateman A anon PFAM-B_3322 (release 23.0) Family Nuclear receptors (NRs) are a family of ligand-inducible transcription factors, and, like other transcription factors, they contain a distinct DNA binding domain that allows for target gene recognition and several activation domains that possess the ability to activate transcription [1]. One of these activation domains is at the N-terminal, although there are two distinct motifs within this domain, between residues 20-36 and between 74 and the end of this domain, which are the binding regions. One of the co-activators is TIF1beta, which appears to bind at the first motif [2]. 27.30 27.30 27.30 27.30 27.10 27.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.83 0.72 -4.15 14 198 2010-01-08 15:29:35 2009-06-18 10:54:52 3 5 47 0 73 155 0 100.60 44 24.06 CHANGED uoSlssssths.hss........Hssh......sshss.t.h....osluSP.lNulGSPaSVIo.SolGssShulPuT.PulGassh.SPQhN...shs...uVSSSEDIKPP.GLpsl......phsspusGuh .........................................ss.ts......s.HPSh......sshss.t.l.....SsluSP.lNulGSPasVIo.SuhGs.uh...slPuo.PuluaushsSPQl...NS.....shN.........uVSSSEDIKPP.h.GLpGl.....hphsupssGsh............. 0 2 10 30 +11659 PF11826 DUF3346 Protein of unknown function (DUF3346) Assefa S, Coggill PC, Bateman A anon PFAM-B_3462 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 231 to 659 amino acids in length. 25.00 25.00 37.30 36.50 20.00 16.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.47 0.70 -4.57 4 170 2009-06-18 09:56:55 2009-06-18 10:56:55 3 2 126 0 11 121 0 220.10 88 43.69 CHANGED FTDFQLHELTGRWLSEN.MPEGFKSDRFRFLARTITASEEAPpEGpDGEIRIKPNLYILVWEPSFa-ELLTRDYFF.LFPPEILKQHTLVFQLYSFFRSRMuRRhTDsMLLSELNQKLARNI-WRRFShDLIRELK+LuEGKsoE-lFlVNLWGYHLTIpuh-pstKlsDYQIDI+CDs-E.VlRYSRA+TTNsGKRsM.APThPNPLRNElhsKQcL-pLSuIIDGEF .............................................FTDFQLHELTGRWLSEN.MPE...G.FKSDRFRFLARTITASEEAPsEG.SDGEIRIKPNLYILVWEPSFaEELLTRDYFF.LFPPEILKQHTLVFQLYSaFRSRMuRRHTDsMLLSELNQKLARNIEWRRFSMDLIREL+R........LS-G.KGoEDLFVVNLWGYHLTIcoh.EcGKVhDYQVDIKCDVEE.VLRYSRA+TTNAGKRNM.APTLPNPLRNEhVoKQpLsELSuIIDGEF............................................ 0 1 3 8 +11660 PF11827 DUF3347 Protein of unknown function (DUF3347) Assefa S, Coggill PC, Bateman A anon PFAM-B_3580 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 169 to 570 amino acids in length. 25.00 25.00 38.70 38.70 22.70 22.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.35 0.71 -4.27 46 213 2009-06-18 09:58:49 2009-06-18 10:58:49 3 7 83 0 98 245 40 172.30 25 54.87 CHANGED hshh...hhhsusppppcpp.h.....................pthtttt....................tphthsphtspphpplhssYhplKsALVss-sppApssAppLtpshpshshp.......phhspttpphtphh...............slcpQRctFptLSpphhshlcts...hsssslYhpaCPMA.sscGA.WLSpsccI+NPYaGcpMLsCGpVpc ....................................................s............tt.t........................................................................................tt.thstttppphptlhssYhplKsALspsDsptApssApplhpslpt.lshs................thts.ptt..pphhphhpp.h.tt................tclcpQRp.tFptLSpshhsllcth.....tsspslYhpaCPMsp.sspGu.WLSp..sc..c...l+NPY.......aGspMLsCGplp......... 0 47 84 95 +11661 PF11828 DUF3348 Protein of unknown function (DUF3348) Assefa S, Coggill PC, Bateman A anon PFAM-B_3615 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 244 to 323 amino acids in length. 25.00 25.00 25.10 98.20 23.90 24.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.52 0.70 -4.95 19 133 2009-06-18 10:49:21 2009-06-18 11:49:21 3 1 131 0 44 153 14 243.60 46 94.84 CHANGED MsQsPpRsshuGPsLlRLLARLsss-sspSppuLu-RLSQWLsWoDAlALSuA......LsupsPssssuspssusstc.....-tARlRusLspuIsscts.ts.tRtts....h.hsshssssp.stsDaAsaRQpYLuhQpsMcssIusLRGRLRstLAuposs.hARLAhlDAlMEpsLusRERsLLAslPsLLts+FERLRpsccts..ststss...........................hstsGuWLssFRpDMQSVLLAEL-lRhQPV-GLLAALRsp ..........................Mhpss.RssLuGPsLlRLLARLscs..Dls....tStpuLuDRLSQWLuWTDAIALSuA......LsussP.us..s..s.....u...s..cst.utstc....tpsARVRsuLApAIssssshhs..sRtts.........pssshss.s..ss...sss....DaAsFRQ+YLuhQQsM-sslGpLRGRLRptLAsposs.hARLAslDAlMEpsLusRERoLLusVPsLLus+FERLRcAcptsh.t.ts.t.st.sts............................................................sssPsuWLDsFRcDMQSVLLAEL-lRFQPV-GLLAALRsp. 0 6 19 32 +11662 PF11829 DUF3349 Protein of unknown function (DUF3349) Assefa S, Coggill PC, Bateman A anon PFAM-B_3716 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 99 to 124 amino acids in length. 25.00 25.00 28.80 28.80 24.70 23.70 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.25 0.72 -3.56 24 263 2009-06-18 10:51:17 2009-06-18 11:51:17 3 2 128 6 68 170 2 97.70 43 87.82 CHANGED hsshlsslluWLRAGYPpGVPssDhhsLLALLpR+.Lo--EVptVAppLhc..pup.s.......spsDIsshIsplTcc.PsP-DlcRVpA+LAutG.....WPLssscp .........spalpSllsWLRAGYPEG.VPssDphsLLALLpRp.Lo---lppVs..pcLhc....pGs.s...................Dp.-IushIoclTcc.PuPEDlpRVtu+LAAtG.....WPLsss.c.............................. 0 14 42 58 +11663 PF11830 DUF3350 Domain of unknown function (DUF3350) Assefa S, Coggill PC, Bateman A anon PFAM-B_3789 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 50 to 64 amino acids in length. 25.00 25.00 29.50 30.70 22.00 18.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.84 0.72 -3.94 11 178 2009-06-18 10:57:16 2009-06-18 11:57:16 3 7 70 0 79 164 0 57.80 56 5.47 CHANGED -lsPpusLSPshttt..........tssttct+RoscELRcLW+pAI+QQILLLRMEKENtKLp .............................EL.PhSPLpPshE-t..............ss.t..pcc+RsScELRpLW+KAIcQQILLLRMEKENQKLp. 0 13 18 42 +11664 PF11831 Myb_Cef DUF3351; pre-mRNA splicing factor component Assefa S, Coggill PC, Bateman A anon PFAM-B_3985 (release 23.0) Family This family is a region of the Myb-Related Cdc5p/Cef1 proteins, in fungi, and is part of the pre-mRNA splicing factor complex. 27.00 27.00 27.60 27.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.63 0.70 -5.14 29 298 2009-06-18 10:58:48 2009-06-18 11:58:48 3 12 252 0 217 288 1 227.90 29 30.07 CHANGED TPNPlhos.hppst...............thGhTPhpsh..........GtTPh..pTP...RDphslN...........tt.....htpospph+hp.pppt+ppL+suLuuLPpP.cN-aEl.lP-pt......pp-.scptEp.....h...EEDuu-t-tRc+ttcptpcptchc+RopVlQRsLPRPs.lsht.Lh..............phssp..s.sccllpcEhhtLlppDsh+aPh...sssp..ttp............................t.th-phs-stlppA+thlptEht..h.................t..sppppphc............sasps .................................................................................TPNshh..os.hRp.s...................t.uhTPtts............utTPh.....pTP..........hRDphsIN............ppt...........stss......P.p....+.t..ppph+pp....L+tuLuuLPtP.cN-aElslP-pp...............ppE...pptct..................h....EDsu-h-tRppttc-tpcttEh++popslQ+sLPRPst.ls.phLh...................................................p.s...st..hppu-cLIpcEhhphltaDsh+.a.Ph...sssp...ttp.................................h..h-phscppLp..pApphltpEht.h............................t.t...................................................... 0 74 117 174 +11665 PF11832 DUF3352 Protein of unknown function (DUF3352) Assefa S, Coggill PC, Bateman A anon PFAM-B_2160 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 538 to 575 amino acids in length. 26.80 26.80 27.40 26.80 26.60 26.60 hmmbuild -o /dev/null HMM SEED 536 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.62 0.70 -6.20 32 156 2009-06-18 11:54:13 2009-06-18 12:54:13 3 5 102 0 59 174 55 465.90 18 84.34 CHANGED slhuushlLlshusuhaahhtpp.psl.hhpps.hshPhuAphlPpsA.lslplhssPscl.shtphsss.pp+pthppthpp........h+cshhAtsGlcapp-.lpsWlGsplolAllss....s.sp..................tpsuhlhsLssc-sptA+pFLpph.appcshsG.sslphpsY+Glplhpspsshht.............shusAll..s-p.hlLlAss.tllcpAlDshQhss.shtus.thppslppL..scs.lAhlhh.s.ssht.......pa........hpl.spslsphp.slpullsulshpspuLthcuhhhhpsph.....sss.sps.....Lpphsusshhhlu..........sLpp.appls........psss.hsphlp.lhpphhpthshslspslhsh...pGchhh.hlsts..........suWlhsspppsssss............p..LDphhp.spG.hphsslshsspslssWop.........................................L.h..t.t.........ltsp........lsss...........+uthsspthaussLsulspthss.....tpsLtss.phppslssh.....tsstt.hhLshptstshLsp.hhPh........hhh.hthsupslhsslpululss...........................tss...ssshhphchhLplt .......................................................................................h...hsshhlhshuhushhhh.tt........ts..p....sshhlPppA.hhhph.sss..pphtth.....tphhs.......ppppt.......hppthpp........hppph...hst.ss....lsapp-..lpsWlGp.clslA..lhs.shpt..........................tpsshLlslshp..ctptucphl.pphhppps.ts.hplp.pppYpGlsl..hthpsstst......................h.ssAll......scp..allhuss.pl.lcpAI-shpsss.slsps..sappshppl......sps.luhlah.shssl....................................phh................hsh.sp.hs.ph.p...thp......shs.....hulshptpulthcshh....hhpss.......................s...st.......hlph..hsspsh.hh..hs........................................slsp.a.pt.htp.h......ttss.h..phhpphhtphpp.hslsltpslhsW..hpGEauh.hl.pt.t............................tsshlhhsptp..stt.............t..lcthht..ptt..h.htph.h.tt..l..Wp....................................................................tt.....................h.hh...........hs..st..hhht.htshtth..........sl.tp.th.p.h.............t.hhlshp.....h..t.....s...................t.hh.thpulshs...............................ttt..t.phhh........................................................... 0 17 45 57 +11666 PF11833 DUF3353 Protein of unknown function (DUF3353) Assefa S, Coggill PC, Bateman A anon PFAM-B_2231 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 205 to 258 amino acids in length. 28.40 28.40 29.30 28.60 27.70 26.70 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.22 0.71 -4.85 34 209 2009-06-18 11:57:17 2009-06-18 12:57:17 3 4 100 0 110 207 132 176.70 25 71.90 CHANGED sp-ASFEElQpARsphLpph.usDtpspsplEAAYDulLMppL+pRQpG+ltlsptlphscp...........tsssstsstsssshlpch................s.lshPpspslhhphsh.GsLullhllh.........s..ssssspLhLuluhhuslhhhh++...tp+hhpuhhholssLhlGlllGullsshl.s.hls..hshospplpulsshllLaluuhhL .............................................psASa-EIp..tA..+shllpph..sscppshtp.........lEuAYDtllMppLpp.RppG....Kl....plspcl+hsc......................stssssWlpph......................hp..h.Pstp.sl.hhphhhauhlsshslh............s...stssssh...lA..luhh...uslY.Flp.c.+............tp.phh+uhhhshssL.hhGhhhGshl..shl.s.........hs.p.h.uhhshhhhalss.aL................................................................................ 0 26 73 98 +11667 PF11834 DUF3354 Domain of unknown function (DUF3354) Assefa S, Coggill PC, Bateman A anon PFAM-B_2265 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 60 amino acids in length. 29.60 29.60 29.70 29.90 29.20 29.10 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.41 0.72 -4.36 43 290 2009-06-18 12:00:32 2009-06-18 13:00:32 3 39 100 0 145 279 0 67.50 36 10.74 CHANGED hhRVoIa...sspc....spts.GKLlhLPsSlcELlcIuupKFGh.s.s.ocVhsp-.GAEIDDlclIRDGD+Lall ......................tRVola..tsspp.......t.....GKl.lhlP..s.olp-LlplAupKhGh..t..s....opl.hstc.GucIDDIslIRDsD+Lal...... 0 25 77 113 +11668 PF11835 DUF3355 Domain of unknown function (DUF3355) Assefa S, Coggill PC, Bateman A anon PFAM-B_2268 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 111 to 177 amino acids in length. 26.70 26.70 49.10 49.10 23.00 22.60 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.13 0.71 -4.43 5 57 2009-06-18 12:04:15 2009-06-18 13:04:15 3 2 3 0 21 51 0 152.20 44 28.56 CHANGED TPTKCSTPVPSsAITKuVAESTPTslEHVFPATsusSsPSIuSAAtsTPVSLTtTKEAEADMsKVE-Ko-cTlpDLCsK....INpMLEsp.....E.hhusDSTtsVslhussTsssshsLEhsp-..App..h.phsppp.hs.hps.............ssc..hspc-hltthclsoK ...............hPTKCSs.sPSsATT+ssAcSosss.E+VFPAThus.sPSssSushsTsss.sETccs-usMDKs..ps-cThQDLCs+...............IsphLEshRshK.-.ohShD.stslsshSsNsssss.........hhhEVSsE..AsshphVsoschshssshps...tp..p....ssst....sMsp--hhchhcVsoK......................... 0 0 0 5 +11669 PF11836 DUF3356 Protein of unknown function (DUF3356) Assefa S, Coggill PC, Bateman A anon PFAM-B_2406 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 104 to 119 amino acids in length. 26.90 26.90 29.00 28.20 26.30 25.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.07 0.72 -4.21 42 205 2009-06-18 12:07:32 2009-06-18 13:07:32 3 2 175 0 72 166 41 99.10 39 86.19 CHANGED apGEV...........slslsGpppsh+LTLGALAELEsthus.....ssL.......suLspR...Fcsup.hSu+DlltllsAGLRGGGhsssps.-lhpspltGGsh..ss.ucsAApLLspuF..shs ................hRGElshplsGccasLp.L.TLGALAELEsshts.....ssL.......sALltR...FssG+.hSuRDlhplIsAGLRGGGpsss-c..-lushps...cG.G.ss..uh.AplsupLLsssF.ss..s.......................................... 0 17 48 58 +11670 PF11837 DUF3357 Domain of unknown function (DUF3357) Assefa S, Coggill PC, Bateman A anon PFAM-B_2464 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 96 to 119 amino acids in length. 27.80 27.80 27.90 27.80 26.20 26.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.59 0.72 -4.20 35 152 2009-06-18 12:11:01 2009-06-18 13:11:01 3 4 62 6 22 158 0 103.30 29 16.65 CHANGED hsshshp.YsP.LPssstsss.s...........+RRsh...pshsslhsullhLhhllull.............sspssssst...sss.ssspssss..............SRGsspGVSEKSsss........hsus.ssuFsWoNuML ...................s.....t.YsP.LPsssssutt............++R.......tshsslhsushlLhslsshh................stssss.ps............tss.pssts...............SRGsspGVSEKosss............hhuu.ssuFsWoNuML........... 0 2 11 17 +11671 PF11838 ERAP1_C DUF3358; ERAP1-like C-terminal domain Assefa S, Coggill PC, Bateman A anon PFAM-B_2558 (release 23.0) Domain This large domain is composed of 16 alpha helices organized as 8 HEAT-like repeats. This domain forms a concave face that faces towards the active site of the peptidase. 23.90 23.90 23.90 24.00 23.70 23.80 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.01 0.70 -5.19 123 3372 2009-06-18 12:17:03 2009-06-18 13:17:03 3 33 1415 15 1516 3073 300 295.80 18 35.56 CHANGED hlhlNssshuah+lpa.sscphsplhp............tltpht....sshsRshlhusshshsps..........GphsspshlsLlp.....th.....ssEs-.......hsVhspllsplsshttshh.hpspttpp.hpp...h..plhst.hhcpl.uhp.s...........sshphthl+shhhssus.........ssts.hphhpshls...G.sps............slss-LRhslhsslstt.Gs..tpt.....hpplhp.hppss....................o.....sstcptAlpuhuthsssplhpc.shshlhs.....s....s.lp....sps.lthshtGh.............t.sspp..phlhsahpppa.-pltph......hsstss..hhshhltlhssth.......s..........stctlpplcpah..p...........sct...stuhc.Rsltpsh-slp ..............................................hlhlNssph..uaY+lpY..cspth.pt.lhp.........................tltt.p.................l.s.s...h..sR.stlls-hhslscs..........upls.hsp.hlsl.lp.....hh......tp.Ess..........hhsh.ps.hh.....p..t.l......s..h....ht.hhh......t....s....s..t..hpp.......hpp...........................hh.p.l..hpt.....hh...p....p.....l...uhp.st.................................................psh..t.pt..hhR..sh.hh..shss....................t..ts...hppu..tph.h.p.t...ahp...........s.....s.......................................slss.s...lR...h..l.htshhpp...us....tpt..........hshl.hpph.p..p....ss...................................s.....ss.t+pph.htAL.....u....s.s........p...........s............st.h..l.pp..h.......l..p.hhhs......s............p.....lp.....sp-...h.hthh..t.s.h..................h...ssths.phhhsah..p..p.pa.p...h.l.ph........h.stts........hsthl......t.h.hsph.h.t...................stp.p.htphctah...p............t........................................................................................................................................................................................................ 0 434 694 1236 +11672 PF11839 DUF3359 Protein of unknown function (DUF3359) Assefa S, Coggill PC, Bateman A anon PFAM-B_2625 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 80 amino acids in length. 28.40 28.40 28.40 28.40 28.00 28.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.26 0.72 -3.65 2 169 2009-06-18 12:27:18 2009-06-18 13:27:18 3 1 117 0 29 87 0 84.90 74 98.60 CHANGED MpplL.hSulshuulLAsGCuSlocp..............sEttlouspssAspu.uRAsEAYtKA-EALAAA.tAQpsAsEANE+AhRMLE+AShK ..................MNNVLKFSALAL.A.A.VLATGCS.S..sS..K..E........................TEARLTATEDAAARuQARADEAYRKADEALAAAQKAQQTADEA...NERALRMLEKASRK.............. 0 8 13 23 +11673 PF11840 DUF3360 Protein of unknown function (DUF3360) Assefa S, Coggill PC, Bateman A anon PFAM-B_2754 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 489 to 517 amino acids in length. 25.60 25.60 26.40 27.10 25.00 25.50 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.75 0.70 -6.16 11 257 2009-06-18 12:32:27 2009-06-18 13:32:27 3 2 199 0 48 167 14 447.20 65 98.72 CHANGED MSss........................ppsSYpclH+PuSEFpTR--YL-HELpIMpPKRW+lNLPhRDaRFEhEDhVPAhAATIGKlVMVuAlsAAFAush.......GLusEFVlENVRYELLIAulLFVILhSGFLNPsANLAGTHGPhIPLIPlIsAAGGHPLALGIhlGlFGLlLuloKGGShLspLTScGVsGGLLlYLGFlGhhSQlpKLhuWAsuh........uMualuFVVIhsTIllYAhLt+hpKRWLAIPLsullAullAFuMGAPF-..FsTpPGLPNhsPhYWWGE.sTGWpLGLPslpHFIAVlPFAlLAVAMWSPDFLGHRVFQcLNYPK+u-+VLMslDDTMTssSlRQhVGSlLGGGNluSSWGTYMIPAAIAKRPIPGGAlLTGlhCllAAlhGYPMDLAlWpPVLsVALlVGVFLPLLEAGMQMsR-sKsoQSAuIslFuSAlVNPVFGWALTMLLDNhGLIGsKERuspLohhcRhlIPsssFllhsuuMuhVGMLP...GIPAlL ..........................-h.cspoYc-LHRPuSEFtoRs-YL-HELQIMpP+RatlNLPhRDaRFEhEDhlPAh.AGTIGhlsMhuAlhhuaAssL.......sLu.cpFslEssRhEhLIsAl.FsllhSGFl.PpANLAGsHGPhIPLIshlshAGuHPLAhuILlGVFGLlLuhhKGGShLssLTSpGssGGLLlaLGFsGThuQlppl.pWAsGl........sMuYIuhlVlhlsIllYAhLt+hpKRWLAlPlsshhuhllAhALGAsF-..FpTp.GLPNhsPsYWWGp..ppGWhLGhPshpHFlsslPFAlLAVAMWSPDFLGHplFQclsYP++TEKVLMslDDTMThCSlRQhVGohLGGuNhTSSWGTYhlPAAIAKRPIPuGAlLhuhLshhhAlhGaPMDlAlW.PVhpsALlVGVaLPLLEAGMpMs+csKs.oQuAuIslFuSAlV..NPVhuWALTMLLDN.GLIGsKERuupLShhs+llIPusshlIhhhAMhAVGML....Gl.Ahl........ 0 10 18 34 +11674 PF11841 DUF3361 Domain of unknown function (DUF3361) Assefa S, Coggill PC, Bateman A anon PFAM-B_2780 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 154 to 168 amino acids in length. 27.20 27.20 27.60 28.50 27.10 26.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.92 0.71 -4.52 9 228 2009-06-18 12:35:40 2009-06-18 13:35:40 3 3 88 0 122 212 0 153.90 50 23.12 CHANGED hTFAtEFIpccGLthLhpllEsusc............hu............-hLsasLoAFlELM-HGhVSW-hLosoFI++lhuaVs.psth.csollppuLuILEshV.sSsslathVtpElTltpLlphLps.sspplQspAIALlNALFlKAs-s+Rp............phAsslupKpl....RthIhssllt ........hTFApEFIsh-...GltlLsp..hVEsGoc..............hu.....................................-hL.uaoLTAFlE.LMD..H.G.IVSW.D.hlSlsFIK+IAuaVs....pshh..DsSIlQ....pSLAILEShVL.N..........SpsLYppVup.EITlspLIsHLQs....osQp..IQThAIALINALFLKA.s-.s+RQ........................-MAshluQKpL....RslILsplI+................ 0 26 35 68 +11675 PF11842 DUF3362 Domain of unknown function (DUF3362) Assefa S, Coggill PC, Bateman A anon PFAM-B_2839 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 117 to 158 amino acids in length. 27.60 27.60 29.10 28.30 27.20 26.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.01 0.71 -4.30 9 1200 2009-06-18 12:37:54 2009-06-18 13:37:54 3 3 1181 0 232 851 107 136.70 46 19.49 CHANGED DQVQsFYPSPMATATAMYHSsKNPL+KVsRc.....sEsVDIV+GEKpRRLHKAFLRYHDPsNWPlLR-ALKpMGRADLIGsGKpHLIPsaQPh.sDG......uYQSAR+KNSo.......................................ssG..shps...........spspKG+hLTQHTGLPPRsss .DQVQsFYPSPhAsSToMYYTGpNPLt+lt.a.c......oE.cVhV.PKG-+QR.RLHKALLRYHDPsNWPLlRpALctMG+c.cL....IGsp+cCLV...Ps..........ss....hc.......................ph.pp...u..R..+..p.spp...........................s.t.st...h.ttt.................................................................st.sttttttt........................................................................... 0 66 143 189 +11676 PF11843 DUF3363 Protein of unknown function (DUF3363) Assefa S, Coggill PC, Bateman A anon PFAM-B_2310 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 323 to 658 amino acids in length. 28.40 28.40 42.40 30.30 25.60 24.80 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.27 0.70 -5.82 40 413 2009-09-14 15:03:38 2009-06-18 13:59:04 3 5 192 0 169 399 41 221.20 32 58.09 CHANGED LhlGRLp+LEchGLApphuPGpWplpscsEssLRcLGERGDIIKpMHRAh.stcGh-..Rssusaslcspt.s..sPllGRLlsRGLc.DELpspuYsVVDGlDGRsHHlclsch-ssucsss.GuIVElRsh.s.......................................................................................Dsptppp..lslssRSDLsLppQlsAsGATWLDRphluc-s.sLut..uGFGtEVRpAhcpRs-+LhspGLAcR.pupRllasRsLlsTLRcRELsssupcLuucTGLsaptussG-pVuGsYRp+lsLASGR.FAMI.....D.....cGLG.FpLVPWpPsLE+pLG+cVoGls+ssGGl-WshGRcR.GLGl ......................................................................................................................................................................................................................................................................................................................................................................t...h.lthho.hslttQht..usTWLDpphlttt....shs......GF....Gt..psp..tAhttR.thh.tpt.App...ptttl....hhttshlthLctpEltthutphu.t.u..h..sttGp.ltGhhptph.LuSG+.aAhl.....-.....puht.FpLVPWps.l-pplGppl.uhhp.ssulsWph..uRt+.u.............................................................................................................. 0 24 107 141 +11677 PF11844 DUF3364 Domain of unknown function (DUF3364) Assefa S, Coggill PC, Bateman A anon PFAM-B_2336 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 60 amino acids in length. 27.40 27.40 27.40 27.40 27.10 27.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.77 0.72 -3.94 40 333 2009-06-18 13:02:42 2009-06-18 14:02:42 3 3 292 45 92 315 8 54.70 45 14.47 CHANGED MsQss-.......+lhDHhsLF+cPEYp-hhpsKpcpFEssh.ssccVscltEWTKTWEYREKN ............MsQ.osE.......+lLDHspLF+-sEYpchFpsK.ppFEsst...sspcVpclhEWT+ohEYREKN. 0 24 57 72 +11678 PF11845 DUF3365 Protein of unknown function (DUF3365) Assefa S, Coggill PC, Bateman A anon PFAM-B_2563 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 198 to 657 amino acids in length. 27.80 27.80 27.80 28.00 27.50 27.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.67 0.71 -4.44 153 684 2009-06-18 13:07:17 2009-06-18 14:07:17 3 87 394 0 307 670 98 181.10 17 39.40 CHANGED lhhshlh........hhhhhhhhh.........................h...p..pts..tppshpp.Acshhpph...............shRp..........asspphtsh..h...................................shthshhs...........P.uhhsppl...tc.......thsp.tt.s..hhph+hsoh..pspNPp.NpsDs.aEppsLcpFcpss............tphhph....su..pphh+hhp..P..lhh.pps..CLpCHu......t................t.thsaclG-lpGuhslsl.Ph .............................................................................................hhh......h....h.hhh...h....................p.....t.h.....t..hh..pp..uphhhpth...............t.h+p..........ahsphht.h..h........................................................................s..hth.s....s.shhshpl...tp........hsppt.s.......hph+hsuh...ps...pNsp..stsc.....s..aE.pchlcpFppst.................t........ph.p.....ss...pshhRhhpP..lhh..pps..CLpCHu....t.t.......................t.saphG-ltGshslphs..................... 0 130 249 287 +11679 PF11846 DUF3366 Domain of unknown function (DUF3366) Assefa S, Coggill PC, Bateman A anon PFAM-B_2678 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 200 amino acids in length. 27.90 27.90 27.90 30.10 27.70 26.80 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.36 0.71 -4.43 29 415 2009-06-18 13:09:44 2009-06-18 14:09:44 3 5 380 0 90 371 29 192.40 21 34.03 CHANGED lhhhullhslslHShLEYPhaYuhFhlshhh.hLhhh.s......sth+...hshphs......hsLt...shsllsshhhshhhhp..shppsphL.ha.....tspsss.....hppsssshhatp..sas.hsshh.hsh.p.tssshhhshhphuhclh...P.....psshhpph..hhuhshhGcpscA..............pphhppsphlas......................hppapsh...hpphstssss ........hhhhulhss..lhlHuhLEYPhaashhhh.hhl.lLhhhss............pth+..........hhs.hsth..................hhLh...s.shhlhsl.lshhhhhp....sh...p...p...s.phL.st..ap....tspshs.ph.....hppsp...shlh.t.pp..tashaushh.l.sh..tpppsphhps.hhchuhphlp.pP.....psshhpph..hlhhth.GcpscA............................cphhpphphlaP..........................................pth........................................................................................ 0 15 42 65 +11680 PF11847 DUF3367 Domain of unknown function (DUF3367) Assefa S, Coggill PC, Bateman A anon PFAM-B_2726 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 667 to 694 amino acids in length. 26.00 26.00 26.00 33.70 25.90 25.40 hmmbuild -o /dev/null HMM SEED 680 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.20 0.70 -13.27 0.70 -6.58 14 220 2009-09-11 09:45:00 2009-06-18 14:11:38 3 6 191 0 63 201 18 617.40 44 51.49 CHANGED llhhhlsFtQsPGplusDTKlDLslNPhtFLsRAhahWssshshGQlQNQAYGYLFPpGsFFhlschLtlPsWVsQRLWWulLLsluFhGsltLscsL......slGuss.sRVlAAlAaALSPRlLTsLGulSuEshPhhLsPWVLLPlltsh+.....................................ssRRhsA....hSuLAlshhGuVNAsATlsullsAsLalh..stt..pRhhph.huh.Wh.....hulhlAohWWllPLLlLG+YuPsFhsaIEouusTTshhohsElLRGsssWssals....sthsAG.tpLVssshhlhsTshVAulGLhGLshhthPtRthhsshLhsGlslluuuahst.suPhup.VpsaLDG..sGssLRNlHKh-sllRLPLslGLApLL.stl........slsh.ushtth...hhp.th...........t+hAsslhslhllssusuPAaoGclhss..GsapcVPsYWp-AA-WLssc...ssu.o........RsLllPGusFup.sWGpo.hDEPhQsLh-sPWuVRDulPLssPtsIRhLDul-ctLpsGtsssGLussLtRt.GluhVllRsDLsscsstsshsh.hh+pslt..coPGlspsts.FGsshtsssh........hlsDsshcs..hh.ul-lap....Vsss..........sssss..hthssscshhhVsGusEuLhpLsusshhtst.....................usslhsu-.t.ppss...ssss....ssoDsstspcssaGp.....sssssushhsss..................-....spsshshlss ................................sshhlsFsQsPG.lusDTKhDLssNPhpFLucAhphWssphshGQhQNQAYGYLFPp...GsFFhluchLt...l.PsWlsQRLWWulLLsluFhGhlRlA.ctL........ulGusu.oRlluAsuaALSPRlLTT.LGuISSEshPhhLAPWVLL.Phlhsht...t............................................sstthAAtuulAVAhMGAVNAsATlAuslsAslahh....................s+t...........s.Rh....hhRh...huh...Wh......huhsluohWWlhsLhhLuthSPPFLDaIE..S...usVTTpWhSLsElLRGTsSWTPFVs.....sptsAG.tsLVTsshhl..luTshlAA...hGLsG.L...s...h.t............hPtRt...hhlhhLhlG.lllhss.uahut...suPhut.lpsaLDu..sGssLRNlHKhsPllRLPLsLGlAphL.u+l...................sl.stssst.t....hhpspps............................+plAssllslhslhsuso.AWoG+lhPs..GsastlPpYWpcAA-WLspc..ssss.................RsLVlPuAsFApQsWGho.+DEPLQsLhs.s...PWuVRDulPLsPPtsIRuLDuVpphhssGtsss.....GLus.sLtRt.GluhVllRsDLDscsspostsh...lh+pslt..tsPGlt+l.spFGsshssssl.ts......hl.-sshcs...thsAl-lap...Vsss...............sssus...shhssscthshVsGuP...EslhpL..sp..p..tth...u............................uPslhsuD.u...ptsuh.....ssst.....sloDsshsR-s-aGp......lsspsSshhuss......D....hptshsh......................................................................................................... 0 14 44 58 +11681 PF11848 DUF3368 Domain of unknown function (DUF3368) Assefa S, Coggill PC, Bateman A anon PFAM-B_2745 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is about 50 amino acids in length. 27.30 27.30 27.30 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -7.93 0.72 -4.40 50 241 2009-09-14 14:59:35 2009-06-18 14:13:08 3 2 142 0 94 302 28 47.90 28 32.24 CHANGED plpGTLGlLltAKpcGlIs..pl+sl..l-pL.ppsGaalupplhpplLppss ....lhGTlGlLltAKppGlIs..pl+sh..lcpL.ppsG.ahlupplhpplLphh............... 0 32 74 88 +11682 PF11849 DUF3369 Domain of unknown function (DUF3369) Assefa S, Coggill PC, Bateman A anon PFAM-B_2927 (release 23.0) Domain This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 170 amino acids in length. The domain appears to be related to the GAF domain. 25.00 25.00 25.10 26.00 24.40 23.40 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.79 0.71 -4.32 63 346 2012-10-02 14:34:25 2009-06-18 14:16:17 3 20 234 0 123 289 44 170.90 27 29.85 CHANGED lhouLRSYcsIpsI-psRp.......GLE+llcAousLh..phcolppFusGlLsQlusLLsls.ssulhs.................sspss..........tpp......tt...hpllAuoGp.asshhsps......htt....hsspltphlppulpp.+psla.pcsthlhYhssp..s.spphllalcst.psLs-h-ppLl-lFspNlulua-NltLhpclccs ............lhouLRoYcplptI-ps+p.......GLp+llcASuslh..p.t+olppausuVLsQlsuLLslp..ssulhs............hshpss....................spp.....tt..hplluusGp.apt.hhsps........htt....lstcl...tptlcpslsp.....+pshh..tssthlhahpsp..p...uhps..llYl...pst...p..pLs-..h.-.cpLl-lFstNluluF-NltLhpcLcc............................................. 0 38 66 86 +11683 PF11850 DUF3370 Protein of unknown function (DUF3370) Assefa S, Coggill PC, Bateman A anon PFAM-B_3037 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 452 to 532 amino acids in length. 25.00 25.00 67.30 67.30 22.70 22.50 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.38 0.70 -6.02 31 109 2009-06-18 13:19:48 2009-06-18 14:19:48 3 3 55 0 43 130 73 441.60 43 92.32 CHANGED shhlsss.......................shhtsstssssthss....p........QclRPLPGpLssVPVlNSNsPElVpu-GILlSTFPs......ttptpPtspL......sasFsGRFDlFuHHlatspsspch.....cs.LaLullstNPuspPVTlplhpGuopLS......ps-APFlsLPshhscshG..slhuGPGsRsus-lL.RGcpstcLPsp.lsIPPtptphLlsLPIPlpGls.......................................NGRshhlRhcSsGPlhhAslAthu........................psPo.pcWhslLpsGpLu.sR-ahPoPh.....thpsthIYSRVAGVuhGopWpAploD...ttt.L..shsstslSaPloSLp+GphGTsQlQoA.LhsRhPDTAasAHGNYGVcYDLsLPLpNsusp..slpLsLpoPlKssc....tttLhF.t..ps.shFRGolclcht-sttt.....+hlHLs.RpGQ.G.sLsplsLpPGcp+pVcVsLlYPADATPPQVLoVhPlp .........................hhhhhhh...........s...hssp.ls...psQplRPLPGpLDslPVFNSNSPElVpsEGILLSTFPs......pscthPsAHL......NashpGRFDlFuHHlspspsspch.....co.LYlGllstNPu.spPVTlclLpGuSaLS......psDAPFlsLPshsc.sspG..sVauGPGSRlssDlL.RG.......pppsphPsp.llIPPspschLlshPIPlpsls.................................................NGRSshhRLp..Ss..G......s.......lYhAsLAhau................................p..sst+sPoLp-WpsLLppGpLu.PRDhhPTP......tsssphIYuRVAGVuhGSpWpAplsD.s.ttp..hLt..lPtsGpuhSYPlSolptGshGTsQlQoAshhsRYPDTAYpAHGNYGVcYsLoLPLhNsospspoVslsLpTPlKpsp....pstLhFhpsssstlFFRGoVclpYpDcput.pp..RahHLVQRpGQ.GpsLlplslpPGcpR.VpVchlYPPDATPPQVLTVpT..p..... 0 2 25 39 +11684 PF11851 DUF3371 Domain of unknown function (DUF3371) Assefa S, Coggill PC, Bateman A anon PFAM-B_3115 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 125 to 142 amino acids in length. 25.00 25.00 26.60 26.70 23.30 24.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -11.07 0.71 -3.46 20 373 2009-06-18 13:21:43 2009-06-18 14:21:43 3 4 107 0 112 312 0 132.00 43 32.44 CHANGED hQARAHGL.sshsSsuls...os-Lss+l..lKQpssh-c.............................................ssp....c......lhs.p.tppPphssss.............sLDls...........................-GshsFsDsLss.sspshshu..hptpp.cL--lLMDD..........sLSPl..uoDPLLSosSPs..uSKsSSRRSShShEEs- ......................................................................................................................hQARAHGL.shhs.SsGls......os-Lss+l..lKQEsslEp...............................................sspp.................hh...p....ts...shssss...........................sLDLs.................................-.Gshs..Fs.ssLu.stss....s...s.hshu...p.tp.cL--.lLhDD...............oLSPl..soDPLLSohS....Ps..ASK.s..S.SRR...S.Sh.ShEEs.................... 0 4 14 44 +11685 PF11852 DUF3372 Domain of unknown function (DUF3372) Assefa S, Coggill PC, Bateman A anon PFAM-B_3259 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This presumed domain is about 170 amino acids in length. 29.70 29.70 30.50 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.88 0.71 -5.11 37 294 2009-09-14 08:08:02 2009-06-18 14:23:36 3 29 264 8 103 277 30 163.90 36 13.66 CHANGED ohpsNN.WsVGLPhtp+stssWshIpsll..sssshcPsts-IttspphFpElLpIRpSSsLFRLsous-I.pRVsF+NsGssQhsGLIVMSI-D.G.sts...........tslDsph-ulVVlhNuosppho.hsls....shs.u...apLHslQtsus..Dshltpu.sasss......pGpFTVPAhTsAVFVpsp ......................t.hpsNNaslGLP.tscst...ssWslhcsll..sss..shcPsss-Ittssshap-LLplRpSSsLFpLsouscl.pRVsF+...NoG........s.s.p.hP.GlIVMoI.....-D..u.tts...........sslDsthsulVVVhNAospsho.hsh.........shs..u.....apLHslQs..suu....Dsslp.tu..shssu......sGphTVPAhosAVFsp...................................... 0 29 62 92 +11686 PF11853 DUF3373 Protein of unknown function (DUF3373) Assefa S, Coggill PC, Bateman A anon PFAM-B_3442 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 472 to 574 amino acids in length. 28.00 28.00 28.50 40.80 27.70 27.90 hmmbuild -o /dev/null HMM SEED 489 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.81 0.70 -5.96 18 144 2009-06-18 13:25:15 2009-06-18 14:25:15 3 2 135 0 39 127 11 496.60 45 99.04 CHANGED MKphhphhlsush....hshohshtutss-hsh.p.cl-pLppclppLcpQht.................thppplpchEc..............+ohuchLphuGDaRa+lDulch+..s.....................................................h.th...s.....................................................ssutcscN-slaTNRhtLsh+AKsocclohpuRLsMYKsaG.....t.ssssspsaauspsh.hhDGshscsPo.sshLpV-cAYhsWpN....IuDpP.hahSlGRRPSTsGsPupLRpNpp+sus..sPuhLlcasFDGhTlGas....s-.lsulsG.sasKlCYGRGF-su........assst......NshcDTDhhGhsllPhDssshplphpa......s+uaN..lhshPs.hss.F.................stsssssplGsl.sahGhuhh.phcslGsG......shshFsShuhSpTpPsss.........h.hssh.....puhLsus..............ss+TGausYlGscash...ssssKhGhEYN+GSKaWhoFs.PAtDDhhsSKluTRGssaEsYhIpEL........hs+hFh+LuapYYDa-YTGSNsalGA....PhKIs-lpus.h...hhs.......................................slcpApDlhuohcl+F ..........................................................MKThhSLlLuAsL....LooSLhAc.ss-DElspLQpQLAplpuELApI+cEp-spscpsp...............................ssptclADLNDR.s.DcTEh..............pAALs+lKFGl-FpTuVsNhsYK.......................................................................................................................................................................................VsGQDhssN..shahNcL+LNMsAclNDcscFaGRLSMsKNWu............QhGaSussh..sLDuspNspo.S.GssLhVDRAYhDYh.....Issp.WhsolGRpPuTDG..PsSNLRsNuLRpST..hPA.LsINshhDuAslsYc....PEsL.p...-ac......sslRhsYG+saphspt...............hhcWhusppsuD......sNlhhsss.....GtLsI-uhsDshlhh..slsa......hssFs.......l.shsshh..ss.h............................hpsssssNLGDl.sluslpFp.shc..uhGs.......NFNaFsSLGaSpusssph...............sshh......puh.Lpsp............t.s-cDGYAVaVGuRYDh.....ocuhKlGaEa.aGS+YWhTMo.PuhsDPlsh+h.TRGsAa-hYlIapL........-chtFlRLSYTpIp..auspGhPF.Gu.....sKh...DcucAssh......MhMh.......................................sVK........................................ 0 14 32 36 +11687 PF11854 DUF3374 Protein of unknown function (DUF3374) Assefa S, Coggill PC, Bateman A anon PFAM-B_3548 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 665 to 712 amino acids in length. 20.70 20.70 20.70 20.80 20.60 20.50 hmmbuild -o /dev/null HMM SEED 637 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -12.94 0.70 -6.27 30 127 2012-10-03 17:14:37 2009-06-18 14:29:26 3 2 73 0 59 117 34 615.40 28 90.44 CHANGED Gas-sDDh+..tuNsa.......Gs.psGhsutlsADlpapscs...GYpsplpAcpLGh-suphplpsG+.GpYplsL.sYcpls................pYpsssshos..............a..............WhssG..........hsthpssl....ssh-LslcRc........+hulGhp....Yps........shapsalp....Yp+EcKpGp+...p.......uShshh.s.ps...................s......lucPlD.toTcplsAGlphpG-s.....WhsslsYpGShacNchssLs.....hssshssh...............................s..AhsPDNpAHplulsGQYphs.pophsGRlshGpMoQDpshlshs......................ssPh..pshDGpVDTlshslphoo+lspcL+LsuuaDYsDRDN+oss.hpasQhph..........ssls.Gtst.....NsshDpp+pphclsusYRls+sh+LpuGY-acps-Rott-............REpTsEsslau+hph+shsshshtlKuuausRsGScYpss.phou........upsNsLLRKYY...L.ADRcRsplchcssa.....ssh-sLolshsscYupDDYsc.Tp.lGLTEucDhGYDlsluatls-clslpAFhspQaI-.......SsQu..GSsshusss..........W...puslcDchsslGsGhsYs...sLh-s+LsLGhDYoaSsSpScTpl.............spshsssYGDYaupsHs....lphaupYplo-phu..........L+LsYpaE+YpDsDaup..lss......suIs.....slhohGslsHsYsAHhlhLohSYpL ..............................................................................................................................................a.st-s.p..tsNth.......us...psthhutlsuDlp...h...pspp.......GYpsplpAppLGh-sshhplpsG+.Gpaplpl.sYppls................pap....sssshos..............a....tt.......h..........hhss..u..........t.....................hshh.ssl.............ssh-LulpR-........+hslGhp...aps........tshapshls....Yp+Ec+sGt+......p...........sShshh....s..ps...................h..........lscPlD.toTcplpAGlphtGcp.....W.sslsYp.s..Sha+N.chs..s.Ls.....apsshsss...............................h................st.uhsPDNpuHplu...lpuph...shs...psphsu+lhhupMoQD....pshlshsh.........................thPs.....pshDGcV-hhths...lphsu+lop..slplsuuac...YpDRDNposh..tasp.h.h................................................sshs.Gtst......NssaDhp.ppphc..lsusYRlspshpLpuGYcacps-Rsht-................................Rc..pTcEsslau+hphpshsshshhlKspaupRcGSpYpss..thop.........................upssshLRKa....L.ADRcRpplchcssa........................sshpsLolshssphs.DDYsc..ot.....l.GLocscsh.u.YDlshsa.lssslphpAahspphh-.............ScQu...Gussh.usss............................................................W....hsphc.D.cs..sslGhGhsap...sL..h-s+LplGhDY....oYSsup.SsTpl......................ttuhsssYsDhhup.Hs......lphaupYphs-phu..........l+lsapaEcYp-sDatpp.hs.......ssls......sllshGs.hspsYsAphlhlohsYph.................................. 0 9 23 38 +11688 PF11855 DUF3375 Protein of unknown function (DUF3375) Assefa S, Coggill PC, Bateman A anon PFAM-B_3589 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 479 to 499 amino acids in length. 25.00 25.00 26.50 30.10 23.60 22.90 hmmbuild -o /dev/null HMM SEED 478 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.43 0.70 -5.75 28 246 2009-06-18 13:31:11 2009-06-18 14:31:11 3 2 231 0 95 249 38 458.70 24 95.80 CHANGED lptlR..pcpPAWRLLtuppAPllluhLcphFh.ssspslPps-LtptLccpLtsLccpttp.........tthPps....ApsYLs-Wst..p...GWL++hhsp.sss.-spa-LTsssccAlpalssLpppp..hsuTESRLtolhptLcpluptspsDPctRltpLccc+sclctEIsclcuG.clshhDssplt-RhpplhphucpLhuDFRcVccsa+pls+plRcclhs.pus+G-lLcplhsphDs.ltsS-pG+SFcuFachLhssspppclsphlcplhsh...stpLcsctR..Lpplhtchhctu-cVpRshp+hucpLR+alcspshhEsR+lhplLpphpstAhsh.....pct...sthhhshsthusshphshpp.hhpsPs.......psthsshsh.sssssch...shsshhststlDhpclttt.....lppsLt.pps...pholucllpphP.hppG..LuclluaLplAt.........ptsh......................lscppppplpapst..sGh....hRpsplPplhhsc ...........hR..pcpssh+LLtuppuPhlluhLpphF..stpttlspscLhppLpcpLttLpppttt............t.hsps.......ApshlpcWsp..p...G...WLp+hhst..sss..c.........hacLTstucpAlcalpsL...p.ppp..hsuTtSRlpslhptLcplutpsssDPppRlttLccchpclctEIpclct....G..p..h....shl-spphh-chpplhphucpLhsDF+cVppshcpls+plRcclhs.....sct....spG-lLcplhsth-t.ltcS-pGRoFpuFaphLhssppppplsptlcplhph..hht.tLssp.Rt.l+plhtclhctuppVpcshpphucpl+palpspshhcpRplhplLpph.stAhsl.....ttt........t.thhh.ht.ths.ssh.h..s.ltp...hhpsss.......pst.h.s.sh.h..tsssssl.s...shsslhspstlDhtpLtpt.......lpphlt..pps........lolupslpphP..pps..LuclluaLslut....ptsh.................................sspsp.tphlpaps......-Gp.........RphplPtlhh................................................... 0 24 63 86 +11689 PF11856 DUF3376 Protein of unknown function (DUF3376) Assefa S, Coggill PC, Bateman A anon PFAM-B_3667 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 770 to 1142 amino acids in length. 25.00 25.00 25.90 25.90 21.90 21.90 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.73 0.70 -5.13 23 113 2009-06-18 13:33:06 2009-06-18 14:33:06 3 3 102 0 35 96 6 449.00 32 49.25 CHANGED hsslhuuluslPRppsh+spLttlpt.spphp+hcplhsuhRschppthpp...........thu...shhtsp.pstcLpthcscstptt.....shttshsatshhpsclst...hlctlspllsths.ttsssp.tttthpshhtph.t.........thsslsshps.sstsshtthhpttc.....hsh+hRRLphlsccLs.................csh......csssstc..........ssslsttcpslhshl.t.ulhpstcthtt...............hucshpc.lsssshsh...........ss......plhchlsphth..............t..pshtthctpu..-phLus.hltthsp........sh+tpl....LhsYL.u....asaaDlsshPhh..tstsh..sphp.lplsRISP-DAso..lpsssst........spLtGhphssFGAFhsRuaRcNDalWGRLcGA-Rllcllhsstc......................................................................................................stls......-hshhs......................lc+..ptFhslLcEEhspLh...........................................................................................................phs..............shlusLcsp.....ss .....................................................hpsIhup.ssh.tcpss+.cLtplttp.c....pthphlssuhhschcpp.sp...........t.u...slh.shhphhphss.cstssppu......h.tuauhphhst.schst............lscp.hshhh.hhsh.....ttsssp.shsp...stltph................stsshshs....pu..sshsshtshhphtc......shclttLt.lsctlp............................psh......css.ssts............sslst.hhssh.shht..sl.phtcthtt...........................................usshpp.lssshhss............s......sltphhsshss....................................................t.sL..hthcths..st.Ltp.hlshLss........p.t.ph.......hhthh.s....hsh.hphshhPh.....tt.c.......slclsplSs-stsh..Lssshps........pKLsGhpLtpFGAFapRuWRtNDahWGRLDGAshLVclLLsspc......................ht.................hhtthspptt....t.ttphhhs.hpp.tt...h.t.t.................................hhs......-hshhs....................st..ltt...th..plht-Ehstlh................................................................................................................................................p.............................lupssstst.hht.h......................................................................................... 0 13 30 34 +11690 PF11857 DUF3377 Domain of unknown function (DUF3377) Assefa S, Coggill PC, Bateman A anon PFAM-B_3829 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 70 amino acids in length. 25.50 25.50 26.00 29.40 25.10 25.40 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.53 0.72 -4.35 20 249 2009-09-11 16:26:36 2009-06-18 14:39:46 3 11 47 0 110 190 0 73.20 44 12.91 CHANGED sssscccccthspDs..s-lllcl..D....-s.suossAlAVVIPhlLsLClLsLlYsllp.F++KGTPR.................+lLYCKRSlQ-WV ..................s...ttc.-tttspDc.....scIllcl..D....-s...uuoVsAlAVVl.P.slLhLClLsLlYslhQ.F+RKGsP+.................plLYCKRSlQEWV. 0 4 15 44 +11691 PF11858 DUF3378 Domain of unknown function (DUF3378) Assefa S, Coggill PC, Bateman A anon PFAM-B_3989 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 80 amino acids in length. 27.00 27.00 27.00 27.70 26.90 26.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.64 0.72 -3.92 33 937 2009-06-18 13:41:57 2009-06-18 14:41:57 3 2 910 4 78 516 5 78.90 34 26.11 CHANGED sshVl.pssppplpphpppYtshhssphsssshFtAKhsssoIosYpSGKVlFQGpsAEp.Au+a.th.spppttsssssspsh .........................sIll.phopcsIpshhppY.ps..h.s.s.sts.shhpahuKhsssTlolY.pSGKVhFQGp...pAEthAspa..th.st.ppptt.t.....s..................... 0 18 35 60 +11692 PF11859 DUF3379 Protein of unknown function (DUF3379) Assefa S, Coggill PC, Bateman A anon PFAM-B_2469 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 234 to 251 amino acids in length. 26.50 26.50 26.80 29.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.45 0.70 -5.19 25 166 2009-09-14 14:58:51 2009-06-18 14:53:41 3 2 162 0 38 115 10 236.60 48 98.12 CHANGED MDELEFRR+hhuDPpspDs-hLpthppsspcppFls-lppLDt+lppALc.VDVPDDLADKlLLpQ.....phppcp..........spR+p.s+h.tlAlAAS...VAFssGlhh..u.lphus...............ssLu-pALuHVaHE.s.hshphDcslshppVNAKLushusphstcF...PG+VaYssaCcFpG.s+uLHLVhQGcpG.KVTLFIVPlcschsh.t..sFsDsphpGhshtsssAshlLVGEpupDLshlpcclppshh ............................................MDELEFRR+lhSDPKp+Ds-hLshhsuS-uNsKFlDDlLpLDppIspAhK.VDVPDDLADKILF+Q....lc-.c+.................llRPpFsR+.AMAlAAS...VAFsAGLll..GQlpWGNhhl........sPApASLu-hAlpHVhHEcs..FVpcl.DEpsshpQINAKMtPFuhphpucF...PYHVYYLNHCGFGc.sNAlHMVFQGE+G.KVTLFlsPIcStpss......tFppcGMsG...llpPl.usASlILVGEcsEsLsslAp+Lhshlp................................ 0 6 14 26 +11693 PF11860 DUF3380 Protein of unknown function (DUF3380) Assefa S, Coggill PC, Bateman A anon PFAM-B_2757 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 194 to 284 amino acids in length. This protein is found associated with Pfam:PF01471. 41.90 41.90 41.90 42.20 39.00 40.60 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.24 0.71 -4.23 35 230 2009-06-18 13:56:43 2009-06-18 14:56:43 3 11 203 0 58 211 23 167.30 36 58.52 CHANGED lphAslhAVscVESsGt.GFh...s....ss+PtILFEtHlFaRpLtt...........t..stphssphssls...s........p.tGtYts................usuca..............pRLcpAtulsp.....cuAhpSsSWGhaQlMGa.paptlGY.sSVpshscsMpt.uEspQLcuhl+Flct....ss.LhcsL+s+c......WssFA+tYNGPuatcN..pYDtKLspAYp+as ............................................................-.AslhAlscVESsGp.uah........sstcPtILFEtHhFhRplss............................................thhs.pts..sls...ss................p.tGtats..................tstca...............+LppAhslst.....cuAlcSsSWGhhQlMGh.pa....phhG.Y.sSVpshVsthpp.u-stQlchhl+FIct...............st.LhpAL+..s+c......WssFA+tYNGPuascN..pYDt+l..scAap+a................. 0 12 32 44 +11694 PF11861 DUF3381 Domain of unknown function (DUF3381) Assefa S, Coggill PC, Bateman A anon PFAM-B_2792 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 156 to 174 amino acids in length. This domain is found associated with Pfam:PF07780, Pfam:PF01728. 25.00 25.00 34.50 33.80 22.80 23.20 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.10 0.71 -4.73 39 321 2009-06-18 14:02:02 2009-06-18 15:02:02 3 10 278 0 238 322 5 161.80 31 20.48 CHANGED lhps....c+K++pRpGY--GDhhl.a+phslh-Fl+s-..cPl...shLuphNclsh............cD.thchlpchcpTTsElhpChcDLKVLG+K-F+tLL+WRpplRchluhspt................ppcpt.......phElcthsEEppl-..cElp...p..hhE+pptct+.+c++cpscpKp.....K-ll+.Qhphh ......................ppKKpKt-GY-EG.D.hsl..a+phsss-Flpss..sPl...shLup.hsclsh.....................................................sD..t....ptlpcp.tTTcEI+tCCcDLKVLG+K-h.+t..LL+WRhcl.Rchhuhphcpptt................pttpp................................tt-s.pshc--t.plp......c-lp.......p....hhccp..t...tc.hK..+..cc++..c.pc+Kp.....Kchh+.php................................................................................................. 0 84 133 200 +11695 PF11862 DUF3382 Domain of unknown function (DUF3382) Assefa S, Coggill PC, Bateman A anon PFAM-B_2882 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 100 amino acids in length. This domain is found associated with Pfam:PF02653. 28.90 28.90 29.40 29.40 28.70 28.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.22 0.72 -4.10 45 897 2009-06-18 14:04:04 2009-06-18 15:04:04 3 5 808 0 148 478 130 101.60 41 23.71 CHANGED hspsL+pAlhuAllshllhssllGlpLp.t.GspLslp..sp.s.hhhhVuhuslshFlhpLa......+shls.tshsssp..hsslss......shsptpRhllhsLllhAlla .......................h...plt.ALhSAshhhlLAulhhGlQL-...LDGT+LVV-sAuslpWpaVhIusAlVFhFQLl......RPhhp..+ulKsVS..us+alLPu........h-Guos.+QKlalhALLVlAVsW.............................................. 0 18 50 96 +11696 PF11863 DUF3383 Protein of unknown function (DUF3383) Assefa S, Coggill PC, Bateman A anon PFAM-B_3017 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 356 to 501 amino acids in length. 27.00 27.00 27.20 27.00 25.80 26.90 hmmbuild -o /dev/null HMM SEED 481 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.33 0.70 -5.82 26 463 2009-06-18 14:05:44 2009-06-18 15:05:44 3 3 282 0 49 410 14 282.30 18 97.36 CHANGED slsclVsVslslsstuusst.shsshLlhs......psshssspphpsaoohpsVus.sFGssoscYpsAthaFuQ.....sspPsplhluRasp......s..s.l.u..ht......s.hpth.sGshplshsu...h..stlshst.hshsssAt.hpstht...........ss.hs..tttahltssssGs.....................................................................................................................hs.s.sss.huthlthpss.t....sh.s.uhss-ohspAlsshts.hsssWashthust..hsssphlAlAsahp.....ttshp+lashsspcssslsss....ssslhttlhs.sshppohshYsss........ssY.sssuhhuthhossasuspstlohta+p..sGlsu......-.slssopusALcspssNhasph......ssshuhhpcGhhsuG....pahDphtshsWLpstlpsslhslLhss..sKlPhs-sGhshLhusl.psshspulsNGhlusGh..........stshGshssGchlt.tGYalhs.ss.hsp.upssRpsRptsshphshphuGAIHpl-ltssls ...............................................................................................t....hh............................h..h....t.h...............h.hs..hh.............h.h...........................................................................................................................................................................................................................................................................................h.t..t......tah.h.........h.................tth..huth.t..............t.thh...hh..st..t...........................t....h..h.............t........hhuhh.h.sh..tt.................s..hhhpt...ul.........t....h....s.sthtth.............t...t..h.hh..........tth....httGhh.ss.....ahD.hh...ah.t.hp.th.th...h.p.....tpls.sttG.shl..s.h...ttshtthht.Ghh....................................s...a.h.........t.s.t.httR....h.h.h..usulp.h.h........................................... 0 9 28 40 +11697 PF11864 DUF3384 Domain of unknown function (DUF3384) Assefa S, Coggill PC, Bateman A anon PFAM-B_3114 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 422 to 486 amino acids in length. This domain is found associated with Pfam:PF02145. 28.10 28.10 28.50 29.10 27.30 26.70 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.63 0.70 -6.02 26 254 2009-06-18 14:07:29 2009-06-18 15:07:29 3 12 181 0 161 269 0 408.70 28 26.91 CHANGED pps.lspRhcthcplscu.lppa....sh....ssl.tlWpsup...DLlpsppsscs.R+suhpLLptslpppct...............................ssstRthaFcsI...........sss.sps-s..ls.......hpLpuLpsLT.....ccG+cl..sa.hppslhshlspal.shh...............t.h..t+...tpt.........ts.s....-sshlsslLphls...sll+astphhsppplttllpplhpl.CppToss.............................................................................................................................................s-l....pssLplhDulIp..YuslPspohsshlplLC...................................................................................................................................................................................................................................................................................................oh.hshs..............slspssh+.....slcNLhco+htpphlpsLpshL.......tssppp....hp........shslLR.........................GAltllphllhss...tppthst...............lphs.......lhsuhhsslc....pssstlsh-llphhpsll......tphsph.hhppsWs....hhslhsp...................................hhsphtshttstss.tp............................pplhsphpplhsp.lEpLhcp..hphp.p+c.chhpFlhpstphlssussphllthhpspphssPsss......tWlpshppllcpFh.spspssslRlpslpslpcs .......................................................p..s.lspRhchhtplsch.hpph....ph....psl.t..lWpssp...DLl.ps.pp..s.s...-s.RpushpLLpsllptQs.p...............................hs.hhR....thaFpsI.............ps...sss-D..lp...................pLcshpsLT.....-pG+cl..sh.h-pclh.h..lhpWh.............................................................................................................................sh.sphl.hll...sllKFNtshhc.phlsth.lp..lhhl.ChpTsss.............................................................................................................................................sDl....css...LpllDAlls..YsslP.scsLs.hl....sLC..........................................................ph.hsht......................................plsps.sW+.....hh+NLht.o+hG..pssl.hshpplh...........................psps.........hc.................sssllR.........................GAVhhlth.hLhus........cth.s......................lp.s.......llsuhhp.Ahp....ssst.h.Vsh.Ell.hslppLl......tchtpp.ltth.sWs...hll.slhpp.....................................lhpplpsh.pt.p........................................................lp.s.lpcllss.lEpLhpp..sphp.sspcchhpllpps.tcphP-ushh.ll.....hctpphpPscs.tWlpsl...phlhcpFa.pppp...psslRlpsLpsl...s................................................................................ 0 46 76 129 +11698 PF11865 DUF3385 Domain of unknown function (DUF3385) Assefa S, Coggill PC, Bateman A anon PFAM-B_3188 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 160 to 172 amino acids in length. This domain is found associated with Pfam:PF00454, Pfam:PF02260, Pfam:PF02985, Pfam:PF02259 and Pfam:PF08771. 27.60 27.60 27.60 30.90 27.50 26.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.31 32 388 2012-10-11 20:01:02 2009-06-18 15:09:47 3 34 278 0 259 390 7 159.30 37 7.12 CHANGED VlpPYhcYPpLLslLhslL+sE.pp.hslRpEsl+llGlLGALDPa+a+....plppstp........s......tppss.ssshhLhh.uhts......ss--aassVsl.psLhp..ILpDsSLspa+s.sllpuI.......hpIF..ps.hulcC.lsaLspllPshlpslR.ssss.shhEh.happLupLlslV+ ............................VlpPYhcYPpLLslLlshLKoE..ps..tslR+Esl+lLGlLGALDPYKa+........lpppts.......................................................stp..t..tspss..s.ss..hL...l...u.h.s.....................st-EaYPsV..sI.ssLhc..IL+DsSLupaHs.tV...lpAI.......hhIF...+o.LGl+...C..VsaLspllPshlsVlR....ssss.....shhEh.hFpQLuhLlslV+.................................. 0 97 151 223 +11699 PF11866 DUF3386 Protein of unknown function (DUF3386) Assefa S, Coggill PC, Bateman A anon PFAM-B_3390 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are about 220 amino acids in length. 25.00 25.00 30.80 30.60 24.70 21.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.15 0.70 -5.14 27 98 2009-06-18 14:28:42 2009-06-18 15:28:42 3 3 89 0 35 100 142 208.80 36 90.83 CHANGED hssssssc-lFRsAYENRYTWDssFsGYpucshap....ps-cphpGphclu.sDLKspVpGI-Dpclt+ultuQLaEVsIHRVRRoFEpsHGcNoFsh.GcsspsG.lEl.lVuGKutGD+Y+l+sshlshVaR+IHGsllTIpThsspDTGpG.YLS+pYsS.YtDPpTG-tpuu+spFcDpat.l....ushWlLspR.sIcs.cspuppss....pp.......FpFss..........lphL ...h.hpphsAc-lFRsAYENRYTW.DtsFPGYpAclthp....pssphapGphpls..sD..hcscVp...sl-D..EclpculpsQLh-lslHRlRRsFE.psHG...cNs.Fsh..GcscpsG.lEl..lV.uGc..u.GspY+l+ssplshVpR+lcusslTIpTtssh-TGpG.YLS+pYsuhapDPpTs-h+s.....s+ppFcDpatpl....GsYalLsp.R.sIcs.pspupp.h....pp.......FtFpslphL.............. 0 9 24 33 +11700 PF11867 DUF3387 Domain of unknown function (DUF3387) Assefa S, Coggill PC, Bateman A anon PFAM-B_3465 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria and archaea. This presumed domain is typically between 255 to 340 amino acids in length. This domain is found associated with Pfam:PF04851, Pfam:PF04313. 26.80 26.80 27.50 27.10 26.40 26.70 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.89 0.70 -5.48 52 900 2009-06-18 14:30:44 2009-06-18 15:30:44 3 11 772 0 213 818 195 283.60 26 33.78 CHANGED osutG+GcsslDtpcAl.slhhEKh-ll+shh......+GFDYpsahsuss..pplphlssAhsalL..............u......c..........s..uK+RFhcsVhsLoKAauLsssp-E.Atsl+-EluFFpAl+utLhKhssssp..t..ssp-...hphsl+QllspAlso-t.VlDIFssuGlc+P-ISlLSD-FLt-V+phc.cKNLAlElLcKLLs--I+sRp+sNlVps+pFoEhLcpslp+YpN+uI...........soApVI-ELIphAK-hppstpRu-cLGLop-ElAFYDALAs.N-SAlc.hG--pL+pIAtELspplRpssol.DWs..tREolRA+lRlhVKRlLR+YsYPP.DhpppAscpVLcQA.Eh...lu....ppas ...................................................................................................................................................h...h.pth.ph.ht..thh.........thsh..t......h.......t............t.h...thh.......t...s.phlh.........................t..........p..hc...ppFht.shthtpuhsls...................s............p...t.......tt.......h.pp-.l.ta...httl+shl.h+h......ttst...........s.....tp............hpttl....ppllspul..h.........u.p..t....l........p.....l........h.t....h..........t............p.....l..sl..h.s............p.c.F..l.t.c....l.pphp.....pps..hthph..Lc+h..lppplp.hhcpN.sptppap-hlpphlpcYpspt..l.....................pstphlpc.Llp.h...u.p.chp.p.t.t.p.p...s...p...p.h..G..L....s..t..-.E...h..AFY....-.hLsp...s.............c..sh.....h......p...h...s..........c..........c..........p...L...p..t...l...Ac-ls..ptl....+...p...s...h..s..l...D.Wp....p+-ss+A+h.R.hhl++..l....L+...+...a...t..Y..P..P.....-h......t..ctAhptVlpQAE.ht....t................................................................................................................ 0 79 150 185 +11701 PF11868 DUF3388 Protein of unknown function (DUF3388) Assefa S, Coggill PC, Bateman A anon PFAM-B_3650 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 261 to 275 amino acids in length. This protein is found associated with Pfam:PF01842. 25.00 25.00 39.00 38.20 20.70 19.70 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.15 0.71 -5.16 8 391 2009-06-18 14:32:00 2009-06-18 15:32:00 3 2 390 0 45 120 0 192.80 73 72.19 CHANGED VTKhRpPKLRDRLAlRHGRYIpRDADDKKTFRFVR-ELGLLVDFMAELFK+-GHKLIGIRGMPRVGKTESIVAASVCANKRWLFlSSTLIKQTVRSQLhcDEaSc-sIFIIDGIVSsRRusE+HhQLlRElMRLPusKVVEHPDlFVcpSEYThDDFDYIIELRssssEEIpY-hsEcsphtstssF.......uuFsF ...lTKL+pPcLRDRLAVRHGRYI-pDAcDKKTFRFpR--LGLLVDFLAELFKcEGHKLIGIRGMPRVGKTESIVAuSVCApKRWLFlSSTLIKQTVRSpLIcsEYs.sN.pVaIIDGhVosRcuN.+H.pLVpElMsLPohKVVEHPDlFVcsSphTh-DFDYIIELRcs.spEIpYEchcc.p.ht.opNNh.....s.......... 0 14 28 37 +11702 PF11869 DUF3389 Protein of unknown function (DUF3389) Assefa S, Coggill PC, Bateman A anon PFAM-B_3739 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 80 amino acids in length. 25.00 25.00 28.30 48.20 21.40 17.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.39 0.72 -4.09 17 137 2009-06-18 15:30:29 2009-06-18 16:30:29 3 1 136 0 26 81 4 69.60 55 99.37 CHANGED MllsFSpGKlIso.pElhlRLssss.VsLQApsDslpLlu.sAsVllAsGuss+WSlKLDs-pQLpslupplGlslt .........................MVIpFStGKlIsT.+ElVlRLsttt.lTLQAps-sIpLhu.GANVhlANGSEsKWSlKLDsE-QLpsIAppLGhDl...... 0 2 7 18 +11703 PF11870 DUF3390 Domain of unknown function (DUF3390) Assefa S, Coggill PC, Bateman A anon PFAM-B_3832 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 90 amino acids in length. This domain is found associated with Pfam:PF02589. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.22 0.72 -4.06 34 1109 2009-06-18 15:32:31 2009-06-18 16:32:31 3 7 1031 0 194 665 49 93.20 28 19.55 CHANGED +LRpEshpps....tp..hpupsuptsphEphsaKsauhhsosPslY+.........hhsahssch...pshhP..s.ltsWTpsRshPcPAtcoL+-lh+t+tptp .................................................shs.ttpuhpsts..EphAhKhFuhssopPs..la+huhth.................sthhsa..h..h...ssh.........tsh..sP......u..l...psWscsR-hPpsstco..FRpWa+c+tt..p...... 0 51 126 163 +11704 PF11871 DUF3391 Domain of unknown function (DUF3391) Assefa S, Coggill PC, Bateman A anon PFAM-B_2190 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is typically between 122 to 139 amino acids in length. This domain is found associated with Pfam:PF01966. 27.80 27.80 28.70 28.00 27.60 27.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.31 0.71 -3.93 121 695 2009-06-18 15:45:03 2009-06-18 16:45:03 3 4 415 0 262 656 53 130.20 21 32.31 CHANGED hhh...........+IslscLplGMaVptls..sWp..scsal.h......sphhlcspppIpp.lpppGlppVhlDsp+u......s.s.......................thptssss....ph...thttpph.............t..............hshppch.......ppAp......phhscupshhpplhschptG.pslshpsspt .......................h+lslspLp.GMalpths..sWh..c+PFh.h.......ssFhlcsppplpt....l+p.Glp.pValD..ss+u....t.shs............................................s.ttt.sts...th.......t.tspth....................................................t.sshcccl...........ppup......phhpcutshhpshhsch+.s..thshtth.......................................................................................................... 0 57 149 212 +11705 PF11872 DUF3392 Protein of unknown function (DUF3392) Assefa S, Coggill PC, Bateman A anon PFAM-B_2322 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 110 amino acids in length. 25.00 25.00 63.30 63.20 21.60 21.30 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.19 0.72 -4.07 21 196 2009-09-10 17:12:07 2009-06-18 17:11:28 3 1 195 0 46 106 9 104.90 45 96.17 CHANGED -hllsLlsplupalpPaLs-IulAlVAClLVlaGs-IN+hL+phlushsFllRThsFlLlsAFGYGllllaloPhlspsLtplsshaLsslllssFllIGhhApRp .....lhshLAshuphltPaLSEISlALVAChLVlhGu-INuaL++tLpshpFllRTlsFlllsAFGYGLllVhAoPaluRsLuphssthhhslllssFllIGlhApRp.. 0 8 16 33 +11706 PF11873 DUF3393 Domain of unknown function (DUF3393) Assefa S, Coggill PC, Bateman A anon PFAM-B_2361 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is typically between 188 to 206 amino acids in length. This domain is found associated with Pfam:PF01464. 31.20 31.20 34.20 31.70 28.70 28.60 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.26 0.71 -4.81 30 814 2009-06-18 16:15:39 2009-06-18 17:15:39 3 2 791 0 95 402 21 192.00 64 53.05 CHANGED pKhlhlhhhlhl.....LsuCo............................tp.spptsshsssh............h.+Dppu....lctLhsphu.....pplcchWGpcEhhhAu++cYVKYoDsYpoRuclsF-cGtIhlETlus..pP...pppL+pAIlpTLLhssDPstlDLaS..s.pclslsu+PFLhGQVlDpcGcsIpapWRAsRaAcYLlpN+Lps+plpptp.laaVpIsMVssHhchRut+ .........................................................................KKhLALAlIAPL.....LlSCS........................................ooKKG..ssYNEAa......................VKDTNG.........FDILMGQFA.....HNIENIWGh+EV.lI.AGPKDYVKYTDQYQTRS..HINFD-GTITIETIAG..T-P.......sAHLRcAII+TL.LMG.DDPuSVDLYS..DVcDIpI..SKEPFLYGQVlDNTGQPIRWEuRAosFADYLLpN+LKoR..os....G....L+....l....I..YSVTIsMVPNHLDKRAHK...................................................................... 0 13 40 70 +11707 PF11874 DUF3394 Domain of unknown function (DUF3394) Assefa S, Coggill PC, Bateman A anon PFAM-B_2758 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 190 amino acids in length. This domain is found associated with Pfam:PF06808. 28.20 28.20 28.50 28.50 28.10 27.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.02 0.71 -4.74 44 269 2009-06-18 16:26:19 2009-06-18 17:26:19 3 4 259 0 78 243 664 173.50 39 20.60 CHANGED culllhlsAslAMLlFuAATQGaFls+o+haEoslLLLlAFoLFRPGFWhDhlhPsapphsssclhphhtphssGpslRlplpG.sh..pGc.hppTlhLsls........-su.sutpRLpsh.GLpLhp..........-ssphllDtltFGSs.A-psGl-FD.a.......pIstlphs.s-RPsKEhhaIPALLLLullshhQ..RRRtpps .....phhLshloullAMLlFoAATQGWalT+s+aWEslhLLllsFohFRPGFWhDhlhPsh..hhsuscltphspphssGpslphhVsG.sh..pG-.h.p+...TVtLPht............-tu...sup..-...Rl.....t.uh.GLt.Lh..p..........ps.s+.h.ll-hV.pFGSP.ActuGl-FDa............cItpVhhs.s-RP.KEa.halPAlLLhhllsh.Q+RRhpp.s.................... 0 24 51 66 +11708 PF11875 DUF3395 Domain of unknown function (DUF3395) Assefa S, Coggill PC, Bateman A anon PFAM-B_2767 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 147 to 176 amino acids in length. This domain is found associated with Pfam:PF00226. 27.10 27.10 27.60 28.30 22.80 23.10 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.84 0.71 -4.58 28 266 2009-06-18 16:27:41 2009-06-18 17:27:41 3 6 206 0 180 262 3 142.70 35 24.75 CHANGED Phpcppppcthtcp+cppcpplspc+pcAppslplhpstsp+phppEpp+sGLlIhpAhYGphss..................................................thstppshpst...........slDVTlslQshV.ccSpLhlsp.....................ssKupLhGFaDP..........................s....spcKhL+IpYpa+sphHplhlsDs-slplP ......................................Ph.ptppcpphtcp+cpttppltp.++.pEAppAlpLMpps..spRhhptE..c.p+...tGLlIlpAhYGphss................................................................................................ptptppps....................pslDVTlPLQsLV.......cc..S......p......LhLsc........................................ssKus..L.GFaDP...................................................ss...G-pKpL+.VhYpF+uhhHpVhstDp-sltlP.................................................. 0 63 99 146 +11709 PF11876 DUF3396 Protein of unknown function (DUF3396) Assefa S, Coggill PC, Bateman A anon PFAM-B_2995 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 302 to 382 amino acids in length. 25.00 25.00 26.80 26.40 24.30 23.70 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.41 0.70 -5.21 23 202 2009-06-18 16:28:56 2009-06-18 17:28:56 3 1 102 0 71 241 2 193.20 27 58.49 CHANGED utYpFphh..........uhpp..hp.thspc.ulsslsFolPhtalpp.s.Psh..aptLhhphAppLssppGaAGhuhsl.shshctspspEahhupRa.sGlDVssstps.....tshpls..sc.......................IKsVsWLThlssshlcpLG...GspuL+stL..s.s.hslpshss.GllIpAGsh.PphGsspcss.....hPssYlhlN+sL+slhs..pphstLphtshsupu.h..sptusptWL+RF .....................................a.h.hh..........s.pt.....t.stp.shssLphslPhtalpp..t..ssh..atthh...hthsppLps.pGauGh...uhs...l...sts..hp......p.....t.s...h...E..ahlu.p...+a.sGLpVs.sssth.............tshphh...sp.......................I+slsWlThlupthlppLG................Ghstl+ttL...s.s.hshpsa..........ss.GllIpAGth.PphGshsts.......hP.sYhhlNphL+PlRh..pph..ttLp.h..hsuts.h...sptsottWhtRF........................... 0 8 24 48 +11710 PF11877 DUF3397 Protein of unknown function (DUF3397) Assefa S, Coggill PC, Bateman A anon PFAM-B_3446 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 114 to 128 amino acids in length. 25.00 25.00 26.60 26.40 23.20 22.60 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.81 0.71 -4.09 22 609 2009-06-18 16:30:47 2009-06-18 17:30:47 3 1 607 0 69 290 1 112.90 36 95.07 CHANGED hhhhshhhllls.lhshllsth...hplp+p....th.ph...sDluh.hhhlulhhl.tphaspShlshlllhl.llulhlslhhhhpppphha++hhKhaWRhsFLlshhhYlsL....llhshhh ..............hlh.hlhllLshllshIl.sph.....FpLp+h.....th.pF....sDLAhPhLlhthalloscsascsh.L..Ph.l.hL..sl.lLuI.l.ls.h.ahhhKccs.....h...hYs+FhKhFWRhsFLLThlhYlth....llhhhh.h........... 0 18 39 50 +11711 PF11878 DUF3398 Domain of unknown function (DUF3398) Assefa S, Coggill PC, Bateman A anon PFAM-B_3712 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 100 amino acids in length. 28.30 28.30 28.50 28.90 28.10 28.20 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.29 0.72 -4.16 20 472 2009-06-18 16:32:43 2009-06-18 17:32:43 3 9 84 0 212 372 0 93.00 36 5.31 CHANGED hp+tsplpssPLR-LlcFPsDDlplphlsRctRTlpssVPc-s.pph..sha............V+-ClcsYsp-WplVph+Y.cphSushp.hhsppphchp+.....LspQsFE ...............h.p+.splps.ssLR-Ll.FP.DDlplshlsRctRTlpsoVP--u..pch..sha.............................................................V+-..Cl+oYspDWtlVshKY..cph.Su.s.a.p..p......h.s.scphchpK.....LPppsFE................................. 0 39 55 115 +11712 PF11879 DUF3399 Domain of unknown function (DUF3399) Assefa S, Coggill PC, Bateman A anon PFAM-B_3857 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 100 amino acids in length. This domain is found associated with Pfam:PF02214, Pfam:PF00520. 25.00 25.00 30.60 30.00 24.50 24.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.69 0.72 -3.80 11 201 2009-09-10 20:21:33 2009-06-18 17:34:18 3 7 78 0 106 192 0 103.70 45 19.28 CHANGED LppK+NG.LsptLp.sustt-p.hhsKspShhEpQHHHLLHCLEKTT...................sH.EFlDEph.a-pshhEsuhtshs.S+SsSlSSp.....ulosoCCoRRsK+..shpLsNushsu ........................lppK+NG.h.p.uLp.s.....Gs....t--p..hhsK..s.pS.h...FEpQHHHLLHCLEKTT...........................sH.EFlDEph.FppsshcsuhtshsoSRSsSlSS...ps.........u.lsooCCuRRtK+..sh+lsNushs...................................... 0 18 27 59 +11713 PF11880 DUF3400 Domain of unknown function (DUF3400) Assefa S, Coggill PC, Bateman A anon PFAM-B_3996 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 50 amino acids in length. This domain is found associated with Pfam:PF02754, Pfam:PF02913, Pfam:PF01565. 25.00 25.00 33.90 32.40 23.40 22.10 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -8.31 0.72 -4.74 20 251 2009-06-18 16:36:00 2009-06-18 17:36:00 3 7 245 0 83 236 35 45.40 65 3.53 CHANGED SRYs-Ds.s..l-ADYIVVEMA+HlLGENWhs-YVt+ANsGGIERVLl .SRYs-Ds.s...lpADYIVVEMA+HlLGENWhs-YVp+ANsGGIERVLl.. 0 15 52 69 +11714 PF11881 DUF3401 Domain of unknown function (DUF3401) Assefa S, Coggill PC, Bateman A anon PFAM-B_2478 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 231 to 250 amino acids in length. This domain is found associated with Pfam:PF02145, Pfam:PF00595. 25.00 25.00 40.40 34.60 24.90 24.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.96 0.70 -4.97 11 199 2009-06-18 16:41:43 2009-06-18 17:41:43 3 5 38 0 83 145 0 232.40 45 15.83 CHANGED K+hpusussh.upsRhRAoLRDlc.SPptstKSol--DlKKLIh..DSPsstpp+ch................s..sssSs.RRSLaRTLSDESlh.SGpRpsSauoScu.hL-QuLPsDlLFo......sSTLP..........Pp..shPhpp.uht.uhpsl+u-hSASDsSLsDh.-p.+th.h.DPGLMPLPDTAus..L-WSpLVDAA+AFE...................sQRss.Fsohs-s.ppupshpsstp......spphsspsspsh........hsuccSPss.....................LsGKVsQLEshLK ............................Kphpossppl.upspLRASlRDl+.SP+ts.hKSTlE-DLKKLIh.hDSPssEpp+ch................................s..ss.Ss.RRuLpRTLSDESlh.SupR-sSFuSstS.hL-puLPsDlLFo......sSThP...............Pp...shPhpp.u.ht...Ght..sl+...u-hSAS-sS.LsDh.-p.+....h.DPGLMPLPDTAus..L-WSsLVDAAKAaE.........................................................sQRus.h.su.s-s..pps.sh..tssps............ptssss.+sh..........ssu.c-u.Pss.....................LsuKVsQLEshL+............................................... 0 3 11 31 +11715 PF11882 DUF3402 Domain of unknown function (DUF3402) Assefa S, Coggill PC, Bateman A anon PFAM-B_2702 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is typically between 350 to 473 amino acids in length. This domain is found associated with Pfam:PF07923. 25.00 25.00 26.30 26.90 23.10 24.90 hmmbuild -o /dev/null HMM SEED 409 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.35 0.70 -5.42 22 336 2009-06-18 16:43:09 2009-06-18 17:43:09 3 9 231 0 228 328 0 384.80 36 46.72 CHANGED PhulhEAsclaus+l+hohuh+QLWcER...-cFh+h-RG..........tsspp..s.s.-lpp.h.p.p.p.hhphst.p...pt..........ssppplp......hlptVEshYppsLspLpohlhVLL+hllushs.........................sphptts......shhsss.scp.s.t...................sh..pl-hhRt+EIssKAlSulLlLLLKWFKlSHlLKFEYhoQLLhDusalsLlLKhFsh.............................pshsphlss+sch...phsFaphC...........hh...........................................................................hs-.lsphuhssst.sptsht...a.shRNF.FSsINhL+IhpK.lsKpKspRhhhLspaKSSsILK+hL.KlspspLphYsLKlhKpQlPYpGRKWRp..uNMclIouIYLpsR.cL+D-WLsGsDl-u-hp-uhspEpALRuLlcaaNhRRY................................c.M........................sht...thtthhp....................................................................................Ept-hFhpph ....................................................................................tu..ass+l.+.pchpp.L.ppR...p+FhtaphG..........................................tssps..ssh..s...psl..p.cthpphppch.hhshschphp..pcth.................................................ttpc-lp........sssEhhYps....hLPsLsphhIsLLKllL..usss............................................................sspscspu................hNhhscshspphs...psh.......................psh..p.tlDlsRpKEIhsKAlSulLLLLLKaFKlsH........lhpFEYhuQhLl.uNhlPLlLKhFs..............................................................................Qsl.phlssKssh.........s.hsa.ptshtp.....................................................................................................hsE.lst.uh.sssssp........a.saRNh.FSsIN.hLRILpK.loKhKppRs.h................h.....LV.aKSusILK+sL.KV................p.shhpLYlLKLlK.Qs.YhGRpW..Rp....SNM+shoAIY.psRhcLpD.DWh...hG..........s...D..l.....Ducs...-h.s.p.EpsLRu.lcha....N.RRY.......................................................sc.h..................................................................shp......thpthht........................................................................p....................................................................................................................... 0 68 106 173 +11716 PF11883 DUF3403 Domain of unknown function (DUF3403) Assefa S, Coggill PC, Bateman A anon PFAM-B_2739 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00069, Pfam:PF08276, Pfam:PF00954, Pfam:PF01453. 27.30 27.30 27.40 27.30 27.20 27.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.48 0.72 -3.69 31 401 2009-09-14 08:07:09 2009-06-18 17:45:01 3 35 35 0 184 445 0 45.40 38 7.07 CHANGED SEsssIPQPK.PGaCltRush-s-sSSSpph..s-ohTVNQhTsSVIDAR .......o-ssplPpPKpPGaslt+s..sh-s.....s..sSSopp.....-shoVNphTholl-uR......... 0 18 139 156 +11717 PF11884 DUF3404 Domain of unknown function (DUF3404) Assefa S, Coggill PC, Bateman A anon PFAM-B_2879 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 260 amino acids in length. This domain is found associated with Pfam:PF02518, Pfam:PF00512. 25.00 25.00 27.80 26.90 19.90 19.10 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.61 0.70 -5.50 10 181 2009-06-18 16:46:17 2009-06-18 17:46:17 3 3 113 0 17 103 2 254.20 48 54.22 CHANGED hlhus.AaAD..SLPE..RIDsFpphFshpsAspoYDlRplQusYPTpLLoPsShLPQTucYPLK-IQpLYplApTCpGphPL..SPLlTEPLVFTRAlC+GopLsscWFuRSGLIHPGGGSYAsRYlpKaP-pcppLtpaMHI+ERslAspspLLu+L.QpMss-uIsALluGushFls.s-ELWLR+GstYalaspssWpssssstsLshslhopsssCalppGNICWcl.ED+SclLpauhIhLllANlhLllG...WulYRWNsKRcEM+ .........................hlss.sphD..sLPE..RhslFhphh..ppuos.hs.polp.pYPptLLoscShhPphopYshpDIptLaphAppC......pu+hPh..SPhls-sl.FphALC+t.osLuscWFsRsuhlHPuGGoYAt.RYl.E+aPsp.tsL....hsahHlpcps.Aucs.....plL.ap.L.QphupsulsALluGhphalu.ss-LWLppss.......taalhspppWps.ssphslohs.hp.Ac.psChhphuNlCas...cspSclL......hhShllL.lhh.slhLlhG...hSlYphppc+pEhR........... 0 4 6 13 +11718 PF11885 DUF3405 Protein of unknown function (DUF3405) Assefa S, Coggill PC, Bateman A anon PFAM-B_3057 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 636 to 810 amino acids in length. 25.00 25.00 27.20 25.40 24.80 23.70 hmmbuild -o /dev/null HMM SEED 496 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.97 0.70 -6.26 25 217 2009-09-11 13:24:11 2009-06-18 17:47:24 3 5 82 0 180 215 1 433.40 36 68.33 CHANGED lCF-RauRaGPY.....GhG..ht...................................................ps-+puhppsht......................pVD..W.cpV...........sWuphQpcChp+NppR.Fts..................pssphpshshsp...ttt.t.....tt...........................phsRTAlllRsapsapaT-cDlhhlRuLIsELSLpS.GGEYsValLlcVKD.sslt..Iaus..--sYp+sLc-s.lPtEF+uMssLWsEpphphhYPs.lpc.h..................+tsaputh.hPlQhFuhpaPEaDaaWpWEhDsRaTGHaY+hhs+lspaA+pQPRKuLWERNuRFYlPuhHGsa-.sFpphVcht...............................................................tu.ccslWGP.......p....tshss.PPss....cD..cYcWGVGEEADLIshsPlFDPp.sTsWlhcs-lpGY.stp...s.tsPRRAulITtuRlSR+LLtsMHcEshtt+HthhSEMaPsTsALa........HG..............hKAVasPHPVahD+pWssctlsphaN...uG.sssoGGs+sSsau.s+Epsh........+..GsoWaYsusaussLacRWLGa.chss....sGGc...t........hc.................stcGRhCLPsMLLHPlKc .......................................................................Ca-RhsRhGPY.....Ghu.................................................................ppts.tt.htt..............................lc..a..ppl...........sWuphQppChp.tN.ttRFt...............................................t.t..h..ht.......................t..............................................................................................sRoAlllRsassh...pasppshh.lRuhIsELSLtS.GGEYpVahLlcV+-psh....I.asD..pcsYpphlpcs.lPtEFpuhshLWs-tththhYst.l.c..........................+slaputa.hshQhFu..hpHPEaDahWpWEhDhRaoG+aYchhs+ls.pau+pQPR+tLWERstRaYlPshH.Gsa-.sFpphlc...................................................................s..ppslWGP....................p...........t..shPPs..s.....pD.....papWGVGE-ADhIshsPlacP...sosWlhps.clhGY.ptp.....................sPRRssIlTtoRhSR+LL.sMHpEshhtpH.hhoEMhPsosALh........HG......................hKAVasPHPlahD......RpW.ss..p...htphaN.............sG.su...touu.tsSsau.tp....Epph..........p..GhoWaYputhsspLappWhGh.p.ss...tGGp.t........hE..............................ttpGRhCLPshLlHPlKp............................. 0 44 95 152 +11719 PF11886 DUF3406 Domain of unknown function (DUF3406) Assefa S, Coggill PC, Bateman A anon PFAM-B_3286 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in eukaryotes. This presumed domain is about 270 amino acids in length. This domain is found associated with Pfam:PF04548. 25.00 25.00 25.60 25.10 20.30 19.70 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.23 0.70 -5.55 9 103 2009-06-18 16:48:28 2009-06-18 17:48:28 3 3 27 0 72 111 2 243.70 45 26.62 CHANGED sstPssVuVPhPDMsLPsSFDSDs.PsHRYRhL-ssspaLsRPVL-sHGWDHDsGaDGlslE+thslppphPu..osssQVoKDKK-sslph-uuhShK.HsEstoohsGaDlQolGKcLAYolRGET+FKNh++NKTsuGlSsTaLGDslusGlKlEDplhlGKplpLlsSsGsMpupGDsAYGuslEApLRcKDYPlupphooLGLSlhcW+uDLAlGsNLQSQh.lGRsSphss+hsLNN+toGQlolRssoSEplpIALlullPlhtulhpphps .............s...ttssVPhPDhsLPsSFDSDs.PsaRYRhL-s.ssphLsRPVL-scGWDHDsGaDGlslEpshsltsp......hPu..uhssQloKDK+-hslph-susuhK.as-stoohsGhDlQo.lG.+pLuYslRuET+F+Nh++NpTsuGlShThlGcphssGhKlEDplhlGKRltLVsssGshpup....GDsAY..GushEspL+t+DYPlsps.uoLuhSlhpW+.t-huluuNLQSQhplGRsoph........ss+ssLNN+tsGQlol+ssoSEplQIALlullPlhttlhpph......... 0 13 41 56 +11720 PF11887 DUF3407 Protein of unknown function (DUF3407) Assefa S, Coggill PC, Bateman A anon PFAM-B_3559 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 360 to 454 amino acids in length. This protein is found associated with Pfam:PF02470. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.60 0.70 -5.11 43 1339 2009-06-18 16:50:23 2009-06-18 17:50:23 3 3 164 0 385 1189 0 215.70 19 52.68 CHANGED psLsslLcplDPsKLNuTLoAlApALcGpG-clGpulsshNslLtplNPphPslpcDlpsLsslu-sYu-AAsDllssLcsuoTTSpTlssppssLDulLlussGhusoGs-lLssspssLlcusssLtPTspLLscYSPthsChlpGh.tthssthtcshG.G.NGhultlsss...............................................hlhGss.PYpYP-sLPplsA.+G.G.tt.PuCh...sLPc..sscs.....a......Ps.hLV.ssTGh.........ssssa.p..............s.sl..................GpPhhtpal................h.Gst.sGsss.........................................................................ssP .........................................................p......lsthlssh....upul.s...G.p.G.spls.psl.......sphsplhspLs..s...ph..s.s..lt..p..s...lcsLsslssshucsssslhphlsshs.s.sspslssppspLsshLt.......shsshuss.s.s......shlspspspLhp....sls..pL.ss.sh.phLsphs.s.pl.p...h...lt.h.......h................................................................................................................................................................................................................................................................................................................................................h................................................................................................................................................................................ 0 62 221 326 +11721 PF11888 DUF3408 Protein of unknown function (DUF3408) Assefa S, Coggill PC, Bateman A anon PFAM-B_3594 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 128 to 160 amino acids in length. 50.10 50.10 51.50 51.20 49.70 49.70 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.72 0.71 -4.24 32 653 2009-06-18 16:51:41 2009-06-18 17:51:41 3 1 126 0 52 483 10 133.40 20 94.25 CHANGED sppplccctlhchhttts............................spsssptsssscpsppppssts......................t.ptpppcts.p-YcppFL.pssplssRps..lYlsc-l+-+ls+llpsluttcholuual-NlLpcHhcpap-pIpphhppphp...........a .....................................................................................t.................................................tt.tt.....t.p...tp.tttt.................................tp.tttpp+tphp-YcptFL..p..ssc..hp..sRps..lYlsp-h+c+lppllpslGp..pcholuual-NlLccHl-pap-cIpphhtpp....ph...... 0 20 47 52 +11722 PF11889 DUF3409 Domain of unknown function (DUF3409) Assefa S, Coggill PC, Bateman A anon PFAM-B_3824 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in viruses. This presumed domain is about 60 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF05550, Pfam:PF05578. 32.40 32.40 56.90 39.50 28.30 27.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.84 0.72 -4.27 8 239 2009-06-18 16:53:22 2009-06-18 17:53:22 3 16 46 0 0 263 0 55.50 75 2.50 CHANGED Dcsus+p+cKKPDRl+KGsMKIsPKEoEKDSKTKPPDATIVVEGVKYQVKKKGKV+ ........-cGAsspKppKPDRlcKG+MKIsPKEsEKDSKTKPPDATIVVEGVKYQVKKKGKVK........ 0 0 0 0 +11723 PF11890 DUF3410 Domain of unknown function (DUF3410) Assefa S, Coggill PC, Bateman A anon PFAM-B_1956 (release 23.0) Family This domain is functionally uncharacterised. This domain is found in bacteria. This presumed domain is about 90 amino acids in length. This domain is found associated with Pfam:PF02826, Pfam:PF00389. This domain has a conserved RRE sequence motif. 27.20 27.20 27.40 27.40 27.10 26.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.84 0.72 -4.33 64 939 2009-06-19 10:12:49 2009-06-19 11:12:49 3 4 934 10 136 570 33 80.40 53 21.55 CHANGED slLPss.lsplslstph-..ps.sLtpLs+hlYDlRcDDuhh...R.csh............tpssuFDtLRKpY.....RREaSuLpl.t......spspss..hphLptLG .....................t.LLPsPphu+lsLc..u..sLD....ps.TLK+LsHLVYDVRRDDAsL...R+su...................uhPGpFD+LRKNYh...-RRE.WSSLhV.h......s-ctss....AulLscLG................. 0 28 58 100 +11724 PF11891 DUF3411 Domain of unknown function (DUF3411) Assefa S, Coggill PC, Bateman A anon PFAM-B_1986 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 168 to 186 amino acids in length. This domain has a conserved RYQ sequence motif. 25.00 25.00 30.40 26.00 24.80 24.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.01 0.71 -4.58 14 218 2009-06-19 10:19:12 2009-06-19 11:19:12 3 6 31 0 146 212 4 165.50 34 39.91 CHANGED sRhLADPpFLaKluhEpslshsssshuEhppRt-p.Fh..sEh-hshsshlsuslsshhlValhAPshuhsssus..su................thpthhtslP.sNsFptuh.shppaoltpRluolhhKGsphusVGhsuGllusuluNsLhst++thp.....pp......sspsPPlhpouhsaGsFhGlSuNlRYQllsGl..- ........................tRhLADPp.FlaKlshEpslslsssshu-hppRtcp.Fh...pEhDhshssllhuslsshhlVaL.APshuhtssss.......................thtthhtshP..sshFptuh....shp.p..asltpRlushhh+GsphusVGhsuullGs..............uloNsLhsh+.+php.....ps.....................sspssPlhpsAhsausahulSuNlRYQllsGl....................... 0 38 100 128 +11725 PF11892 DUF3412 Domain of unknown function (DUF3412) Assefa S, Coggill PC, Bateman A anon PFAM-B_1106 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 120 amino acids in length. This domain is found associated with Pfam:PF03641. 25.00 25.00 32.00 58.30 22.50 20.80 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.63 0.71 -4.29 49 901 2009-06-19 10:27:43 2009-06-19 11:27:43 3 3 892 10 132 465 51 123.00 71 27.74 CHANGED uMppV+paR+spuDAYpFNWSL+Ip.-FQhPF.PTHEsMAsLsLHhsQsscpLAANLRRAFSGIVAGNVKp-GI+tIEcaGPFcl+GDspLMcthDpLLpuFVpQpRMKLP.GotY.PCYcI.ss .......MPhVKEpRR-TGDAYSFNWSh+IsPDLQhPFEPoHENMANLcLa.D.QPsElLAAsLRRAFSGIVAGNVKEsGIRAIEcaGPYKIpGDt-lM++MDcLLQuFVAQHRMKLP..GSAYlPCYEIs.s............................... 0 23 53 95 +11726 PF11893 DUF3413 Domain of unknown function (DUF3413) Assefa S, Coggill PC, Bateman A anon PFAM-B_1403 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 250 amino acids in length. This domain is found associated with Pfam:PF00884. 27.10 27.10 28.60 27.80 26.60 25.80 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.56 0.70 -5.50 39 977 2009-09-14 14:01:40 2009-06-19 11:32:37 3 2 864 0 128 567 33 247.20 52 42.40 CHANGED Mlpptp..pa+-clSphlsWGHWFuhFNlllAhllGsRYl.hhssa........PsThlGhh...YhhlShlGHFoFLsFhhaLlhlFPLoalls.p+hhRslusllATlulslLllDT.laspashHLsshVacLlhsstp.spls....tpWthhal...shPl.Illlphhhupah...Wc+lcplp+p.+.hG+hluhhhhhsFluSHllaIWADAshY.pPIThQcssaPLSYPhTA+oFhcK.aGlLsppphppphptptpsps.........plpYPlpsLphs ..........................................................................MVsppp..pYRE+VSQhlSWGHWFALFNILLu....hllGSR.YL.FluDW...............Ps.TL..sG+lY.hl.S.hlGH.F.SFLVFAsYLLlLFPL.TFI...V....h.....Sp.....R...LhRF.LSsIlAT.AGhTLLLlDoEVFsRF+LHLNPlVWpLllNP-p.sEhu.....RDWQ..L..hFI....ulPl..IL.LlEh..lFAsWu...WQ.K..LRSLsR+.....+...auR..P..L..A..Ahhhlu...Fl..A..S...H.llYIWADAsaY.RPITMQRANLPLSYPMTAR+FLEK.HGL..LDuQ....EYpRRLhE..QGsP-Al.........ulpYPLspLca.p........................................ 0 26 53 93 +11727 PF11894 DUF3414 Protein of unknown function (DUF3414) Assefa S, Coggill PC, Bateman A anon PFAM-B_1638 (release 23.0) Family This family of proteins are functionally uncharacterised. The family is found in eukaryotes and has a conserved LLG sequence motif. 26.40 26.40 27.00 26.90 26.30 26.30 hmmbuild -o /dev/null HMM SEED 1691 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.42 0.69 -14.40 0.69 -7.68 32 424 2009-09-11 06:03:47 2009-06-19 11:35:43 3 13 249 0 291 403 2 1089.40 19 83.73 CHANGED hWss..hcpLapslp....hhsppp.psl..ppL..pLcpht.shhsLLcpPs+sspsRpplp..............pGtlphuctschplsp-hlppulhLuDtLsLcEltAs-Llh..supppp....th.uhshh.sAllhYaptRphlLssL+hlhptt.t....phs.....p-l.pchhphh..........................pahcphltshshlcphLshls-.ls..............tsplh..sphp...shs..cphchh+s..........hlhcp+chLuthLashspp...shhstsshhpllptLpp..l.......sshDtlhlthlsulltsh.............................................tss.pthshhsscphtpslpsclpspt........................Wth.shpuslhhhahshhtuhsptssstts...shshpccs-...phhppAlp.............sGuhcaLhthshcs........p..t.....thh..L.pt.s.................................................................sshh.....cp....................................hhhpphcshlpshI.sshs.hlpcL+scE--sths..st.p.s.s.s....................................................................shptcLEcFhlhluhhYpscP...-huhp.aWssp-..ssh...............................htFlpaAuchtssslh.ssahcMLsuLos.G.psAptsaphLppss...........tt.tsloWcphFpsLphYhpplp...........p..spspsl.hspss.psh.................stcphhhLsuaLpLlsplspp..s.pssR.tl.......hcpssap..ssllhsLhps..........slP.tL+As.....................................lhpsLuulh..........s+s.ptutplWptlDphhhtsshtsss.t.....................pshshptc............hpthpshtppaststuFlpLlpsLlpst.....t.......h.aPpsLGsu..........................hR...hsGlpPYlc....Flhsplhhc..spshtcsspphplt.ssLchhhpsLpsas.s.....hl.thspsts.......................sh.salt.p.Puhtlh.phLhspchhphlhsll..ppussplsphst.....p.ltpslhhuLpllshsLphQssahchlpsslp.........................................s.hhtshshhhp..ulssh.cth.hplsllshh.LahusspspluhsSl+ILpplupp.phs...............s+llslhsp...tspupcIptuFlppl-sp.sp.tt.................................................................tt..hplKhpILshL.ssLstts..pPslAHhLLGFcsp.tshhphs.p..shhsstpohL+ulLslLpphhsshps..........lshh.scLsphshpllhpLCp..sshoSthsLpalRs.....ssalhphlpp.Phls..hhhps...............................ussshhshLphRu....hlhphhulEl+s..huhpuphoptpplhshLls.............tphspsstsh..sp.........................................................lhslLDhlshsh....pphs..plshacs.hslp.shpp..........hpspt..uht.......lhs.hphLppllphchpshps.h..........p.sptp.l.....pEhptllpahsphNptpphttsphphLcSWsQLlplllssss...hsssp+pshIl-slphllsKlp...h-....shthu.plsplshsLhstlcpshh........................................ttphsshhss.............+La...lhpshlcuIhpss.us.pLRsshYshhhpYLptlhcsc...................................................hpcpshpllcshGccLl-slCsDAhsGcshsRhtAlhlL-sLlpl....s..............psshllphlspssaLplllcSl+pp-ttLps................hhs.sshhhtLh...........hacuplshLhRlApo..+pGAptLlpsslhphltpsphhshcP.Dlslc..hhcp.................................sss.hppaaplLhPsLplhssllhSh.................G.p.....scpshtpshpaLt...pppchlhull+csshht.........................................t..tthspht..pptl.plspthhllpshpp ...................................................................................................................h....l..p............t....h........p.....tp.+t.l..........................tt.h............................h.ls...t....sh.lus.hplsEh.shthhh...........tu.tt...........................s.....hh.a.a...tpt.hht.h..hh............................t....................................hh.thh.t.t.h.t.h..htt....ht...........................tt.........................t...................................................h.p....ls..lh...............h..hht.htt...................................................t..p.h.......h.hh......hh..h..............................................................h...h.............................................................h...h.h.h.h.h..h.....................t.............h...h........................tsh..h...............................................................................................................................................................hh...............ht..c...t................................................................................................................................ht........hh...h..ha..................p.s.....hh.......................................................t.Fh.............t...............h..shh.hltsls.........s..su..haphh..t...........................hs.atthht.h..a...hp.........................................................................................t..............................................................tp..hl.shhtlhttlh.............t...................................hhthht................h...lhut.....................................hh.hltth.................t..hW....l.c..................................................................................................................t..h..p...pth....shhthh..Lh...........................t...........................................t.......h..hhp.....hh..h.h...................h...t..p.h...........l....hphh.....l..h.....................................................................................................hh.ph..tt...hht.hh.h..........h...................................................sh.hh........hh.hp....h.t.h..........................................................................................h.....................h...shtl.h..h.........................................hh..h..........tl..sh..t.h.............................................................................................................................................ht..lhthh..sht........shshhLLthp..........................t.......................ohhpsll.h..h................................h.t..htlh..Lh...p...ss..hh..hp........thh...h.........................................................h......t.....hhphhsh.h...............t................hht.l......................................................................................................................................................h.phhp.h...........................h.....ht.....................................................................hp...h..h...hh.........................................p.....h....h..................hht.....sW.phh.h.h....................................hh.p.h...h..........................h.t....h..h...h............................................................................................................................................h....hh...h..h..................h....+...Y..h....hh.hh..................................................................................t....h..t..s.tthhphl.pDs...stt....ph..uh.hlt.l..h........t...................t.hh..h.p.shh..hhp..l........h...................................................................hh.s.h.hh.phst.....G...lht.thh...h...thht.......p...........t......................................................................................................ph.thh..hhplh...shh......................................................................................................................thh.hh..........h..hhp...................................................................................................t.................................................................................................................................... 0 108 169 246 +11728 PF11895 DUF3415 Domain of unknown function (DUF3415) Assefa S, Coggill PC, Bateman A anon PFAM-B_1962 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00141. 25.00 25.00 25.00 27.40 24.50 23.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.76 0.72 -4.11 35 127 2009-06-19 10:41:03 2009-06-19 11:41:03 3 2 39 55 5 183 0 77.90 45 21.94 CHANGED sKhpssFpsshpKLulLGpstssLlDCS-VlPhPtPsssp...AaFPAGpohpDlE.tACsssPFPoLsoDPGPsToVsPVPss ....chtssFpsshtKLulLGpctssLlDC..SDVlPsPhshssp....uaFPAGhohsDlE.pAC..sp.oPFPoLsoDPGPtTolsPlPs......... 0 3 4 5 +11729 PF11896 DUF3416 Domain of unknown function (DUF3416) Assefa S, Coggill PC, Bateman A anon PFAM-B_601 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 190 amino acids in length. This domain is found associated with Pfam:PF00128. 25.00 25.00 25.00 26.10 24.40 23.90 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.28 0.71 -4.34 88 711 2009-06-19 10:45:08 2009-06-19 11:45:08 3 10 684 18 232 658 95 186.60 32 25.33 CHANGED stsRlsIEsVpPtlDsG+assKtlsG-.lsVpAclFpDGHDtluAsLhaR...........tsss......ps....Wp...................pssMp....h......uNDRWpupFsssp.Gpapapl-AWhD.auTW++slp+KlpAGt...-lsL-Lt-GutLlpcAst.pst...us.....ppttLpp.hsstLps..t..ssspp...luhhLssclsplhspts.R.shsTp.sp.hslhV-Rc ..............h..sRlsI-sVpP.l-s.....GpaPsKtllGE.lsVpAslat-GH-tluAsl.h.h+...............................sstt......pt....hp..............................phsMp..............sh......GsDpWpuphs..s.................cp.GpapapV-uWsDsauTW++shptKlsAG.............-lpl-LtEGstLlpcAsp.pss.....ts......................stp...sL....p....t...s.......sssLps..ts..........ssss+...luhhlssplsplht.pt....s.........R.phlop.utshs....lhV-R...................... 0 62 140 191 +11730 PF11897 DUF3417 Protein of unknown function (DUF3417) Assefa S, Coggill PC, Bateman A anon PFAM-B_724 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are typically between 145 to 860 amino acids in length. This protein is found associated with Pfam:PF00343. This protein has a conserved AYF sequence motif. 25.00 25.00 25.60 43.50 22.70 21.70 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.79 0.71 -4.15 79 640 2009-06-19 10:53:20 2009-06-19 11:53:20 3 8 576 0 243 588 45 116.40 36 14.10 CHANGED shpphs........Vhsp.LP....cpLptL....p-LAhNLaWSWpscspsLFcplDspLWcpss+NPVthLtplsppcLcplupDtsalschctlhpchcpYhss..spWapp....sptt.spslAYFShEaGlp-oL ..............t.pphsVpspLPcpLptLs-LApN..LaWSWs.cspcLFpplD.spLW.c.p..ss+NPVtlLtpls.pRLcpLupDpsFlpchcplhschcsYhst....hWapp..................sstt..........ssslAYFShEaGlscsL....... 0 92 186 230 +11731 PF11898 DUF3418 Domain of unknown function (DUF3418) Assefa S, Coggill PC, Bateman A anon PFAM-B_1028 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 582 to 594 amino acids in length. This domain is found associated with Pfam:PF07717, Pfam:PF00271, Pfam:PF04408. 25.00 25.00 25.60 25.50 23.30 23.10 hmmbuild -o /dev/null HMM SEED 586 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.63 0.70 -6.12 105 1852 2009-06-19 11:18:30 2009-06-19 12:18:30 3 22 1658 0 364 1658 366 534.70 42 45.93 CHANGED K+pGpVhAhE+lTLYGLslVscRpVsYupIDP...ttuRElFIRcALVpG-h.....pT+t....sFhpcN......pcLlc-lEcLEcKuRRRDlLVD--sLasFYDpRlPsclssspsF-pWaKptp....cppPc...LLhhs+-pLhpcpustlostpaP-thph.uslcLsLoYpFEP......GpscDGVTlpVPlslLNQlsspth-WLVPGhhcE+lhALlKuLPKslRRphVPsP-aA...cuhltp..........h........pst..................ps.....sLhcuLscpLp+ho.Gl.....plst-sach..sp....LPsHLphsF+V...lD-cG.+.......h.............lupGRDLstL+pph.tspsppsls....psu.......................................................................s.s.....hcppulps.........W..s.FGsLPcphphc...p.u.G.....hpl.puYPALV....Dps...s.......uVulclF-sttcAppsp+tGlpRLlhLpls.s.l..KhLc+...pLPst............................scluLha............ssh.........Gps.ptLh-D.............hlssulcphhh....pt...s............................hs....psppsFpphhccsR........ucLsstspclsphlpplLsthppl...........p+pL+....u.phshsh.shuhs..............Dl....c...pQLspLlhtGFlspsshppLtchsRYL+AlphRL-KlstsP.sRDptphtclpsl.ppap.phhsp..h.t.up.h......sspl.pchRWMlEELRVSLFAQpLGTshPVSsKRlpcthppl .........................................................................tpGuVhAhE+VTLYGLPlVstRtVsYupIDP.........s...huRElFIRpALVEG-a.................................pT.+a......sFapcNh+LhtElE-LEcKoRRRDlL..VDD-sLFpFYDpRI....sp-lhSu.........ppFDsWWKpsp............................cc..s....Pc......LLshp+s....Lh....p...c.s.A..p..p....lop.saPshWcp.....G..........s.....l......cLtLoYpFEP................Gs.s..s...DGVTVclPLslLNQl....p.t....psF-W.lPGLRcELlhuLIKSLPKslRRshVPAPsaAcAhLspl...................ssh..................ph.................sLlcsL.t+pL++hT...GV..............plst--W...p...h....sp....lPsHL+hsF+l...lD-...+s.+.........t............................................Ltcu+sLttL+ppL...ts...p.s...p.pols......tsA............................................................................................s..s...h-pp.s.l.ph........W..........s...F..GpLPc...p...hptc.....+.u..s.....hpl.puaPALV..........Dct.....c...............uVul+.lF-s.h......-t..ppuhhpG.l+RLlhLsls.sPl..KaLpc....pL..Pst.......................................................s+Lu.Lha.......sPa.................................Gpl..pLl-D.............slss.ulDpllt....tp..u.............................................................l.hsctsFttLt-c.lR........ucLsssshclu+plppILsthhsl................................................p++Lc.....u...phshsh....shuls.....................................................Dl...+...sQlssLlapGFlstsshpp..Lsch.RYLpAlphRL-KL.s.s..s....P...pRDpt...ph...hclcp...ltp...tap...phhsc........hs......stt..........stcl....p-lRWMlEELRVShFAQpL..GTshPlSsKRlhpshpp................................................ 0 97 210 297 +11732 PF11899 DUF3419 Protein of unknown function (DUF3419) Assefa S, Coggill PC, Bateman A anon PFAM-B_1329 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 398 to 802 amino acids in length. 28.20 28.20 28.30 28.30 22.10 25.20 hmmbuild -o /dev/null HMM SEED 380 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.08 0.70 -5.74 42 261 2009-06-19 11:23:06 2009-06-19 12:23:06 3 16 242 0 167 284 73 367.10 30 64.16 CHANGED tshhcchashhap........shlYsphWE.........DPclDhpsLpls....sscpllsIsSuGsNhLuY..LstsPsclpAVDLNPspstLlcLKlAAh+sLs.apsFachFGcupp.......ps.pplhtppLsPpLssputpYWpp+......t+phs..hhs+GhYcpGhhGp.hlths+.hls+lhGl....plcpLhpApolcEQRphapp+.lpslh.s..pllphls.........sphhhhhuL....GlPssQhshl.................................................tsss..lhphltc+L-tlhssh.l.p-NYFha.slstpYst.........sshPsYLptcsappl.......+sss...-plclassslp-hLtphsssolsthlLhDu.DWhss..............................splss.hpp.lsRsspsGuRVlaRoAutp....................shh.tph............cphsapsctusthpst.......DRsshYuuhalhp..ct .................................................................................h......hat.hhs.......thlYs.sWEDPclDhchLpls....ss-..pllsIsSuGsNhLsY..L.....psP.t+lcAVDLNPsQstLL-LKlAuhp..tLs.ap-ha..............chFG..cupp................s.shpplh.p.c.LuP..pLsspuhpYW.p+......hphhs......hhsculYcpGh.tp.h.lphs+..hlh+lh..G..l...........plcclhpup.olpEQRphapp+......lpshhhp.........hhlphhh............sphhhhhuL....GlP.sQhs.hl.........................................tsts..lhphlhspL-.lhpph.l.p-NYFhahsltG..cYsp..........pshP....sYLp.csatpl............+sss...-plclHsssls-h....lt....p.h.ssso....lshhllhDshDWhss....................................pphsp.hpt.ls+s...h.p.s.GuRVlhRoAuhp....................s..hh.tph............cp..hsapsctssthtst..............D...RsshYuuhalht..h............................................... 0 55 102 139 +11733 PF11900 DUF3420 Domain of unknown function (DUF3420) Assefa S, Coggill PC, Bateman A anon PFAM-B_1362 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00023. 25.00 25.00 25.90 25.50 22.90 24.80 hmmbuild --amino -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.33 0.72 -3.86 26 221 2012-10-02 12:10:21 2009-06-19 12:25:11 3 20 51 0 50 251 0 49.20 42 12.95 CHANGED DLDsloLEKpLP.-VlcpIcplRhp.t..........phsth.........sshp-K+l+RIH ..slDsloLEKpLP.EVlcKIctLRtpst...............sppsphsth......................sshc-K+IRRI+............ 0 6 27 37 +11734 PF11901 DUF3421 Protein of unknown function (DUF3421) Assefa S, Coggill PC, Bateman A anon PFAM-B_1420 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 119 to 296 amino acids in length. 26.20 26.20 26.80 26.60 25.80 26.10 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.51 0.71 -4.37 49 531 2009-06-19 11:28:17 2009-06-19 12:28:17 3 18 87 0 352 549 3 104.40 31 58.48 CHANGED huGpD.su..tslYVGRuh+pGshlPuKVlPs.+ttsalsaGGpEhttps..aEVLssssh........pWl.sssGslP.....sAl.sGpotp.GEsLYlGRupapGslssGK.lp.SHtshYIPasGtElp.hs ................................h.......t...haluR.s.h.ap.s.sh..lPu+lhsp.pt.hsahsa.s.utEht.....hpp..aElLsstth................pWl..sss.s.G.p.l.Ps.....................sAl.uG.....pss.s.....G..E.sLYlGRuh.a...p.G..slhsGK..lp.s...u.+.t...shalsasGtEht........................ 0 119 159 290 +11735 PF11902 DUF3422 Protein of unknown function (DUF3422) Assefa S, Coggill PC, Bateman A anon PFAM-B_513 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 426 to 444 amino acids in length. 25.00 25.00 38.00 37.70 22.70 22.30 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.31 0.70 -5.60 78 308 2009-06-19 11:40:50 2009-06-19 12:40:50 3 3 279 0 144 330 270 407.80 35 95.89 CHANGED hpsHPL..RtsLtsElHARPa.hlssPtplsalAhh.......sspppss.-tspLssLtpphGhs.P.........sssus+aths.hGt...hpL+WEpHTEFsTYThht......sssspsFssss.....hshhP.....tsWhtphPG.....phlsusclplt...stssssptt.....h.ph.F.........................sspoLsuSpltsGsAhlhoDFRl.cscGasRhlV..hspslsstphGRlVQRLlEIETYRhhALLuLPhA+plssplsph-ppLuplspphsss...ssss-.............plLscLoplAAclEshsAposaRFuAopAYttlVppRlppLREp+ls.GhpThsEFhpRRhsPAM+TCpuspcRhcsLScRlsRAusLLRTRV-lphppQNppLLpSMs+RuclQLRLQpTVEGLSVsAISYYsluLluYlhculptht.....ls.l.s..hslhsul..ulPlVlhsVWhslRRl+++ht ....................................................................................................................h..tHPhRttLhsElHARPa.hlssPtplhalAhh.......................sstptst.cpstLtpLspphGhs..P.........ptsusHathp.hGt....hpL+WEpHTEFsTYTahts...........sssstsFssss.....hthhP...tsWhtphPG....phlsulclplh....stsssstth........ph.F.............................ssssLssSpV....t.....sG.....sA.........tlhoDF+l.cs-.....GasRhLl.....hs.pshsspphGRllQRLlEIETYRhhALLuLPhA+phsspLsph-pp..L...sp..lspphsss...ssssc.........................pLLscLopLAAclEshsApotaRFuAopAYtplVppRlppL+Ep+lt.GhpThs-FhpRRhsPAh+TCpuspcR.psLopRlsRAssLLRTRV-lphcpQN.......p.......pLLpSMscRAclQLRLQpTVEGLSVsAIoYYsluLhuYhhculpt....ht.......hs..l.s.splhsul..hlPlllhsVWhslRRl++ph.h................................................. 0 33 80 106 +11736 PF11903 DUF3423 Protein of unknown function (DUF3423) Assefa S, Coggill PC, Bateman A anon PFAM-B_670 (release 23.0) Domain This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 73 to 118 amino acids in length. This protein appears to be related to ribbon-helix-helix DNA-binding domains, suggesting these proteins may also bind DNA. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.31 0.72 -4.22 57 485 2012-10-02 18:44:02 2009-06-19 12:44:36 3 2 458 0 138 370 49 72.40 32 83.02 CHANGED huhVKIsD-La-plRpsupshsRSIsuQhEaWh+lGhlsE..hsPsLsas.............................tltphhhpts........thphsph ....sll+Iu-pLa-slRttupshSRSlNuQhEaWh+lGhhsE...pPsLsap.............................tltphLhptt..............sh.................................................... 0 33 66 106 +11737 PF11904 GPCR_chapero_1 DUF3424; GPCR-chaperone Assefa S, Coggill PC, Bateman A anon PFAM-B_942 (release 23.0) Domain This domain, and the associated ANK family repeat Pfam:PF00023 domain, together act as a chaperone for biogenesis and folding of the DP receptor for prostaglandin D2. 25.00 25.00 26.20 25.90 22.10 24.80 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.10 0.70 -5.28 39 534 2009-06-19 11:46:30 2009-06-19 12:46:30 3 13 131 0 299 479 1 286.90 32 59.12 CHANGED RlDoTLhGF-.sh.......phpRucpSalF........cG-ssss...........plh.ls+cc+hlhsthhph............sttttcpplssh..............hpsslhsstlcspphshpcs................huW..Rp-+sEhl.s...sacu+lYsh.psV.l.h+pR..............................p-+Loc-pptphcs......................tpsshpshhs....................................-pph..........................................s.phpp.....shssss.sshs.c.phhsst...pht..spshG+...............sh.c.......sppppph+sslWloc-FPLp...............h.-pllPllDllA..sspthp+L+-h........................................loh...cLPs.GFPVKl...............................pIPlh.slpAtlTFspapthp..........................................................................schppshFpIPssYph .......................................................................................................................RhDsTLhuFp.ph..........phpRGphSalF........pGpssss..............slh.lsHcp+hlhsthht............................sttp.ctplshh......................hsos.lhsstlcscp.l.sFp+s.....................huW...Rp-Ks.............EhV.s...sac.AK...........VYsh.s..sV.l.s+pR...........................................................p-HLocc-pt+.cu.........................hpshpphhs.......................................ppph...s...........................................................................................................t..hpt...........sh...ssss.s...slo.-..-Yhsst...........p.t...s.phG+............................h..th.........ppppp.pF+..AslWhsp-a..P.Ls..................................l.-plhPll-lhA...sstths+LR-F........................................lph...cLPs.GFPVKl...............................-....IPlh.slsAplTFtphphh.................................................................................t.p.p.FtlPtsY......................................................................... 0 80 134 207 +11738 PF11905 DUF3425 Domain of unknown function (DUF3425) Assefa S, Coggill PC, Bateman A anon PFAM-B_1128 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 120 to 143 amino acids in length. 27.10 27.10 27.80 27.50 26.40 24.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.10 0.71 -4.27 62 451 2009-06-19 12:01:03 2009-06-19 13:01:03 3 17 80 0 392 468 1 130.20 22 31.52 CHANGED sthlu.a................h.sstts.tp.lPs.tLpPTt.Qh.....plPHtsh.lDhl.PaPphRDpLlpthpp...............h-ps.....chhp-hhss........................................hsttshtt.........................................psshlhh...tc...hc.psWEloptFhc+as..............hlhcut...phh.........ospa...Rt ..............................................................................................................s...t...tp..lP..tLpP.T..hQh.....................ph.PHtsa.lDhl.PaPphRDpllt....thst.....................hsps.....phhtshhss.........................................................................st..................................................................................................pss.hhh...tc..shchpsWclsttFhc+as..............hlhpst.................................................................... 0 75 180 332 +11739 PF11906 DUF3426 Protein of unknown function (DUF3426) Assefa S, Coggill PC, Bateman A anon PFAM-B_1212 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 262 to 463 amino acids in length. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.78 0.71 -4.54 92 481 2009-06-19 14:23:17 2009-06-19 15:23:17 3 4 476 0 190 502 91 144.30 22 40.45 CHANGED hsshhlhss.LhuQhsah..RsplsphhPphpshhpt..h...C..lGCpls...hslpslp..pt..shcphs.......sssslhlpuslhNpuchstshPtlcLslpDtpsp.lhp+sh..............P.tpalt.t........pst.....lsssps....hplpl......plp...ssss........pAssaclphh ..............................................................hshhhhhsLssQh.saht.ts..plstt.Pthp.shhpt...h........C...l.s..Cp......lss..hshptlp.h..ps...slcphs................ttsshlhlpuhlhNpushs.shPtlcLsltDtssphl.sp+hh.........................................tP.tpYltst.....s.......pst...................lsssps....hphtl.......plt...ssss........psssaplph.................................... 0 50 121 156 +11740 PF11907 DUF3427 Domain of unknown function (DUF3427) Assefa S, Coggill PC, Bateman A anon PFAM-B_1236 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 243 to 275 amino acids in length. This domain is found associated with Pfam:PF04851, Pfam:PF00271. 27.70 27.70 27.80 28.50 27.10 27.40 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.81 0.70 -5.45 60 723 2009-09-11 10:09:40 2009-06-19 15:27:28 3 13 675 0 139 602 88 256.20 25 28.44 CHANGED ph.lsppl....st.p.hth.hlt.hh.........................t.hthtph.p.lpp........t.........tphhphLphpaasspt......................hpht........hhplspphpphlpss...........thpp.lt-llphsl......pp.......t.......sslplapcYoRc-lhthls.hspspss.....htGhhhhps....pspslFlThpKs-c..hSsospYcDhhlspphF+WpSpsspshcSscspphlpppcp...Ghp...lhLFVRcps....uhsts.FhaLGps.phss......pp.................uppslshpacLc...pPlssslachhsp ....................................................................................................hhotphhsuh+.h-.hlLc.lh...........................................pp.p.hs..h.pp.h.p.h.pt...h.hspt.........shp...s...shphLshsFasss...........tph...........hlphpt........hhplspthpph..Lpsp.................tFpthlpDllchuh....pt........p..ttp.....ssLhLap+YoRc-hs+lhs...asps.tsus.........hhG.Yhhtpp.........phslFlTacKp-c..hssss.......pYcDpals..ppph+Who+ss+.slc.Spcsppllppctp...shp....lhlFV++pc.........sput..FaYLGps...phlp...ts..................tupssVshshpLc...pslpsclachh................................... 0 49 95 121 +11742 PF11909 NdhN NADH-quinone oxidoreductase cyanobacterial subunit N Coggill P anon Lonsdale D Family The proton-pumping NADH:ubiquinone oxidoreductase catalyzes the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 subcomplexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit [1]. The cyanobacterial NDH-1 complex contains additional subunits, NdhM and NdhN, compared with the minimal set of the bacterial enzyme and these seem to be specific for thylakoid-located NDH-1 of photosynthetic organisms [2]. 25.00 25.00 60.10 59.90 21.10 16.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.59 0.71 -4.89 20 98 2009-06-19 15:22:00 2009-06-19 16:22:00 3 1 89 0 45 90 139 149.60 50 87.57 CHANGED M...sLLh....oGppFh+........DLEpsGuLAlasPLEGGhEsRhLRRLRAuGYpohhhSARGLGDPpsaLhplHGVRPPHLG+pslGpsuAlGcV.hV.P.ls....pL..ssuKuLVLWlLEGpVLSpuELphLssLsppEPRLKlVlEhGGsRslRWpPLpph .......................MsLlhoGptFlcDLEppGuLAlYsPLEGG....aEGRYhRRLRusGYpshplSARGLGDspsaLhplHGVRPPHLGKpslu.ptuAlG.laalPP.lshpLppLsssuKGLVLWllEGpVLScuELpaLstLsph-P+lKVVlEhGGtRpFcWpPLpp.h. 0 8 29 40 +11743 PF11910 NdhO Cyanobacterial and plant NDH-1 subunit O Coggill P anon Lonsdale D Family The proton-pumping NADH:ubiquinone oxidoreductase catalyzes the electron transfer from NADH to ubiquinone linked with proton translocation across the membrane. It is the largest, most complex and least understood of the respiratory chain enzymes and is referred to as Complex I. The subunit composition of the enzyme varies between groups of organisms. Complex I originating from mammalian mitochondria contains 45 different proteins, whereas in bacteria, the corresponding complex NDH-1 consists of 14 different polypeptides. Homologues of these 14 proteins are found among subunits of the mitochondrial complex I, and therefore bacterial NDH-1 might be considered a model proton-pumping NADH dehydrogenase with a minimal set of subunits. Escherichia coli NDH-1 readily disintegrates into 3 subcomplexes: a water-soluble NADH dehydrogenase fragment (NuoE, -F, and -G),the connecting fragment (NuoB, -C, -D, and -I), and the membrane fragment (NuoA, -H, -J, -K, -L, -M, -N). In cyanobacteria and their descendants, the chloroplasts of green plants, the subunit composition of NDH-1 remains obscure. The genes for eleven subunits NdhA-NdhK, homologous to the NuoA-NuoD and NuoH-NuoN of the E. coli complex, have been found in the genome of Synechocystis sp. PCC 6803 which has a family of 6 ndhD genes and a family of 3 ndhF genes. Two reported multisubunit complexes, NDH-1L and NDH-1M, represent distinct NDH-1 complexes in the thylakoid membrane of Synechocystis 6803 -cyanobacterium. NDH-1L was shown to be essential for photoheterotrophic cell growth, whereas expression of NDH-1M was a prerequisite for CO2 uptake and played an important role in growth of cells at low CO2. Here we report the subunit composition of these two complexes. Fifteen proteins were discovered in NDH-1L including NdhL, a new component of the membrane fragment, and Ssl1690 (designated as NdhO), a novel peripheral subunit [1, 2]. The three nuclear-encoded subunits NdhM,NdhN and NdhO are vital for the functional integrity of the plastidial complex [3]. 25.00 25.00 31.40 47.00 22.00 19.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.12 0.72 -4.34 21 95 2009-06-19 15:36:07 2009-06-19 16:36:07 3 2 89 0 43 93 99 69.00 52 67.02 CHANGED lKKGuLVRVsREthpsSlEAtASDsphP.....sYlFEusGElLsl+....GDYAQVRa.phPsPsVWLRlDQLEsh ...lKKGsLVRVs+EphpNSlEAtASDs+hP.....sYlFEspGElL-l+.....G-YAhV+a.tlPTPslWL+lDQLct............... 0 7 27 39 +11744 PF11911 DUF3429 Protein of unknown function (DUF3429) Assefa S, Coggill PC, Bateman A anon PFAM-B_1072 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 147 to 245 amino acids in length. 39.10 39.10 41.90 41.30 34.60 37.40 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.95 0.71 -4.21 83 410 2009-06-19 16:34:30 2009-06-19 17:34:30 3 3 382 0 189 393 578 145.90 27 71.46 CHANGED tpsPphshh...LGhuGLlPFlhsshhhhhs....shtths...........................shhhhlsYuAlILSFLuGlhWGhuhptt..............st.shhthshullsuLhu......W.....sshhh....sshh......uhhhl.hhGalslh.hhDhthhpt....shsPsWahpLRhhLTsllshslhlshhh ...................p...Pp.shh..LGhAGLlPFlssslhhhhh.....sh.h.t............................hh.h.lsYGAlILSFLGGl+WGhuhstt........................sphsh.hphshu.llssl..hu......Wssllh.........ssth......ullhl.hhuFshlh..hhDtthhtt................thhPsWa.hthRhhLThlsshslhhslh.s........................... 0 58 110 145 +11745 PF11912 DUF3430 Protein of unknown function (DUF3430) Assefa S, Coggill PC, Bateman A anon PFAM-B_1305 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 209 to 265 amino acids in length. 28.60 28.60 28.60 29.00 28.30 28.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.36 0.70 -4.51 51 133 2009-09-11 05:55:51 2009-06-19 17:37:34 3 6 4 0 133 133 0 208.80 16 78.26 CHANGED MKl.....hh.llll....llhlhshh.................hssph...........lshh.Pap....sspCs....................................sss.GhG........as........hsp.Chths..................hhhhptssssshhphphhp..........ttsCt.....ts.hss..hshp....hssCht..................................................st..s.............hshhhhsss.ps.h.....sssoh....lthhh................................ss................pCsss.......shhhhtahssspp....l........ssshohpahCss......sh.shhphC............sssCpstsh......ph.sCs..tssshhtp.spt ...........................MKl......llll.lhhhhshh.........................ssspa......................lshp.sap...sspCs....................................sss.GhG..............ash...............hsp.Chsht................................h.hhtss.sssshh.php.hs............sCp.....ss.hss..psap....hspChp......................................................sst...........hhhhh.s.ss...ps.h......Pssoh....lhsha...............................................ss....................pCsss..................thhhhpahsssps.ht............ss.s.p.hhCss...s..sh..hs..........sssCps.sh........h.pCt............................................................ 0 98 133 133 +11746 PF11913 DUF3431 Protein of unknown function (DUF3431) Assefa S, Coggill PC, Bateman A anon PFAM-B_1346 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 291 to 390 amino acids in length. This protein has a conserved NLRC sequence motif. 33.10 33.10 33.30 33.20 32.70 32.80 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.70 0.70 -4.90 48 316 2009-06-19 16:41:07 2009-06-19 17:41:07 3 6 70 0 237 321 32 220.90 36 63.07 CHANGED spslVlAphpsED.ssWlpp...l...s-....appsIYsVD...........cs..sush........psPtNKG+EuM...........sYLoYIIDaY-pLPss.hlFlHup+....hsWHNDs..saDssshl+pLphs.hVpppGYsNLRC.....pasPG.CPspl+...Php.....t.s.........ppsscthhsp......uappLFssss.................................VPcslussCCuQFAVSR-plpcRP+p-YlcaRp.......Wllc.......................TcLsDpl.SGRVhEYlWHllF........utps..........laCPctppCYCcs ...............................s.tlVlAphpp-.s.ssWlpp...h.......s-.......aptsIYsVD.................ssps.h..........psPtNKG+EuM....................sYLoYI...IDpYDs....LPsh..hlFhHucc....................htWHNDs....thc.ss....hl..pp.Lphp..hlpc...pG.....YsN..LRC........pas.PG....C.P.spl+......Pht............t.t...........................pttpp.th...hsp..............satcLFssss.........................................................lPch.lussCCuQFAVoR-pl..pp+P+pcYhchRc........Wllc.......................TpLsDth....SGRlhEYhWHllF.................stps.................lhCPpt.t.CaCp.h...................... 0 33 85 167 +11747 PF11914 DUF3432 Domain of unknown function (DUF3432) Assefa S, Coggill PC, Bateman A anon PFAM-B_1326 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 100 amino acids in length. This domain is found associated with Pfam:PF00096. This domain has two conserved sequence motifs: YPSPV and PSP. 26.70 26.70 28.00 27.10 25.10 26.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.43 0.72 -11.15 0.72 -3.97 6 489 2009-09-10 16:59:27 2009-06-19 17:47:26 3 11 427 0 32 454 0 94.20 81 24.48 CHANGED A-KAussSosSs.l....ouYsssssoSYPSPl.TSYPSPV.TSYSSPsuSsYPSPVHooFPSPSlATTYPSsosTFQTQVhTSFPoSsVTNsaSS.VoTuLSD ...............V.EKAAPVSTASP.l....PAYSSSVTTS.Y.PSS...I.ATTYPSPVRTsYS.SPAPSSYPSPAHTTFPSPSIATTYPSG..TATFQTQVATSFSSPGVTNNFSSQVTosLo... 0 2 4 10 +11748 PF11915 DUF3433 Protein of unknown function (DUF3433) Assefa S, Coggill PC, Bateman A anon PFAM-B_1502 (release 23.0) Family This is a family of functionally uncharacterised proteins. The family is found in eukaryotes, and represents the conserved central region of the member proteins. 20.40 20.40 20.90 20.80 19.00 20.30 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.05 0.72 -3.82 166 434 2009-09-14 14:57:32 2009-06-19 17:52:00 3 9 77 0 373 464 0 90.50 22 15.65 CHANGED .pahhsalPsllushlthhapsl-hphptlpPahpL...pp.......susAppSlhls...Ysuphs.hhsshpAlp......pp.Hahlshsohssll.sh.hlsllsu.ul ..............................................h..ahhpalPsllushlthhaphl-hplphlpPahtL.upt..................sssAppolh..ls...Y..huths.hhsshpAlp......pt...Hahlshsshssll..s..hlsllsuu.................................................. 1 57 180 310 +11749 PF11916 Vac14_Fig4_bd DUF3434; Vacuolar protein 14 C-terminal Fig4p binding Assefa S, Coggill PC, Bateman A anon PFAM-B_1661 (release 23.0) Domain Vac14 is a scaffold for the Fab1 kinase complex, a complex that allows for the dynamic interconversion of PI3P and PI(3,5)P2p (phosphoinositide phosphate (PIP) lipids, that are generated transiently on the cytoplasmic face of selected intracellular membranes). This interconversion is regulated by at least five proteins in yeast: the lipid kinase Fab1p, lipid phosphatase Fig4p, the Fab1p activator Vac7p, the Fab1p inhibitor Atg18p, and Vac14p, a protein required for the activity of both Fab1p and Fig4p. The C-terminal region of Vac14 binds to Fig4p. The full length Vac14 in yeasts is likely to be a protein carrying a succession of HEAT repeats, most of which have now degenerated. This regulatory system is crucial for the proper functioning of the mammalian nervous system. 25.00 25.00 36.10 27.40 19.00 19.00 hmmbuild --amino -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.02 0.71 -5.13 38 357 2009-06-19 16:55:15 2009-06-19 17:55:15 3 16 279 0 254 343 2 162.60 47 22.35 CHANGED -cpLLcpRushIIRpLChhLss.........E+lY+sluplLpp........c...sDlpFsshMVpsLNslLlTusELtpLRppL+...t.t.pp....tshsLFssLapsWC+NsVuslSLCLLupsYEhAasllphhu.....-h.ElslshLlQlDpLVQLlESPlFshLRLQLLEPc+a....PaLhKsLYGLLMlLP.QSsAFphL+sRLpsVs .....................................................ppLLEtRGshII..RpLChhLss.......EpIa+shuslLtp..............................-.pDlcFAShMVpsLNhlLlTusELh...........pLRppL+....................shpspp....upslFssLa+SWCHNsluslSLCLLs.QsYcpAhsllphhu.......-l..EloVshLhplD+L.....VQLlESPlFsh.................LRLQLL..-sp..p..h.........PaLhKsLYGLLMLLP..QSsAFthLppRLpsVs................. 0 94 148 215 +11750 PF11917 DUF3435 Protein of unknown function (DUF3435) Assefa S, Coggill PC, Bateman A anon PFAM-B_1788 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 435 to 791 amino acids in length. This family is related to Pfam:PF00589 suggesting it may be an integrase enzyme. 25.80 25.80 26.50 26.00 25.50 25.50 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.21 0.70 -5.75 20 446 2012-10-02 14:09:14 2009-06-19 17:59:53 3 11 52 0 340 472 0 248.00 16 52.15 CHANGED lEDlhphh+shlsTsctpFhhGhpRlQLsLhhhLushTusRPsAlLp...........Lpa+DltloLh+sPcG.usshhhl-lpscasKpah.Gtpph....Ns...............FhlPElIa...-PoLlLsP+saLlulLFttpAFps......slsosccLhp.LpVsssptph.L.L+schhDhalFp+s.hpstthcI...spshohsshpshl+phGEIsGFppshpsYphRhGuuctlspSt.lo-upcNllhpH.AsscTF.caYhspplcpDhQulhpGhsspctlhRhssphSRolDsRRPpcLosppctslccpPclpchtccpccL+p..................paGpstcsptss......................hhpchp+tppclpssRpRhpcch++ch+ccFspcQsllDIERQLS..Gpslcp-.hppshppsphhPPpph+LlcpLhshP.upol--E..hpRRhpulcAVsth ....................................................................................................................................................................................................................................................................................................................t..hh............t.t..h......................l.....hc.ph...ph.lhp........t...........h..t.......t.s...s.....h..h.hp...h..h............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 20 91 252 +11751 PF11918 DUF3436 Domain of unknown function (DUF3436) Assefa S, Coggill PC, Bateman A anon PFAM-B_18 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF03572. This domain has two conserved sequence motifs: DPRL and SYEP. 27.00 27.00 27.20 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.67 0.72 -3.73 47 2173 2009-06-19 17:05:20 2009-06-19 18:05:20 3 4 1540 0 25 2173 0 53.40 58 14.49 CHANGED ApVLTAGVQoSLNDPRLhISYEPohlp.P.ttsPthssLo.EpLlAhLQpuI+a-l .ApVLTuGVQoSLNDPRLhISYEPSsl.E.sP...p.Q.s.PsLosLTpEELLAhlQcsI+aEV.... 0 1 3 10 +11752 PF11919 DUF3437 Domain of unknown function (DUF3437) Assefa S, Coggill PC, Bateman A anon PFAM-B_1910 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 142 to 163 amino acids in length. 21.20 21.20 21.50 23.10 21.10 20.60 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.99 0.72 -4.27 32 338 2009-09-17 10:53:11 2009-06-19 18:08:46 3 8 258 4 240 340 5 89.90 40 5.22 CHANGED pphhphHuulLGLuAlVtAFPYtsP.................WlPphLsp.Luphup.c.ss.ltpos+cslu-FK+s+...pDoW.phcpptFoc-..pl-sLc.sl.............lh.oYas ..........t..lhp+HuuVLGLuAhl.uhPYssPs................WhPplLhp.Lus+s......s..D...P........s....sltpolKcslu-F++TH.pDsW...p.cpptFTp-..QLpsLp.sl....l.ssYaA....................... 0 75 131 205 +11753 PF11920 DUF3438 Protein of unknown function (DUF3438) Assefa S, Coggill PC, Bateman A anon PFAM-B_1942 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 276 to 307 amino acids in length. 25.00 25.00 25.30 25.20 24.90 24.70 hmmbuild -o /dev/null HMM SEED 288 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.84 0.70 -5.56 25 270 2009-06-19 17:10:54 2009-06-19 18:10:54 3 3 195 0 68 269 20 253.70 42 93.79 CHANGED h+hhlhhhslhhshsss.ApAsElh+WERlPLslPLpVGpERlVFlD+NVRVGhPss..lsu+LRVQSsGGAlYL+AsEshssTRLQLQDscoGElILLDIsApsttsspsshEPVRIVhusssss.p.t..........sssussusssuss......ts.pts+hpsPlPVVLTRYAAQsLYAPLRTVEPVsGIppVsl+hshcLoTLh..PohPVcAssLuuWpL-shhVTAl+LpNpuspplsLDPRtLQGpFluATFQHshLGPtGsscDTTsLYLVTcG+uhspALlP.pht.hc..........shstp..................sspp .........................................................hh.....hhhh.hhh.hhs.hu.pA....sElh+WERhPLslsLpVsQERlVFlD+...NV.RVGhPss..Ls..s+.L.R.lQSs.G.GAlYLpAppsh..sTRLpLQsspsG-lILLDlsApt....spt.hE.PV+lV.sspsss...t...............ttttttt.st..t.............t..ps..sphpsPlPVsLTRYAAQpLYAPLRTVEPlsGltpVslchshslsTLh......PshPlpussLuuWp....lsshhVTAl+LpNpu.uptlsL...........D.PRtLp....G....pFhsATFQHth..LGstGs.spDTTslYLVTcG+s.spuhls.t...................h............................. 0 8 32 55 +11754 PF11921 DUF3439 Domain of unknown function (DUF3439) Assefa S, Coggill PC, Bateman A anon PFAM-B_1105 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 46 to 94 amino acids in length. This domain is found associated with Pfam:PF01462, Pfam:PF00560. 27.10 27.10 27.10 27.10 27.00 26.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.87 0.71 -4.36 20 546 2009-06-19 17:20:45 2009-06-19 18:20:45 3 27 7 32 0 565 0 67.30 55 33.02 CHANGED oCPGIcYLScWIpKNSull..h.ts.s..ssPDSAKCSGSGKPVRSIICPTTTTTTTTTTTTMPTTTTLPTTTKMSMVKVPLVPPEAFGRVMNACAYFPSYIFLHLVHGLAAVPLVYLVCHASQLL ......uCsDIhYLScWIupHsGlV...hp.t...s..sh....ssPDSA+CSGTNTPVRAVh........ps....oh..............................................................................s.............. 0 0 0 0 +11755 PF11922 DUF3440 Domain of unknown function (DUF3440) Assefa S, Coggill PC, Bateman A anon PFAM-B_1674 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 53 to 190 amino acids in length. This domain is found associated with Pfam:PF01507. This domain has a conserved KND sequence motif. 25.00 25.00 34.30 26.10 24.30 21.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.13 0.71 -4.72 18 1399 2009-06-19 17:24:49 2009-06-19 18:24:49 3 7 655 0 74 628 8 100.50 38 40.47 CHANGED GVslppMRVspPFtsptpcuLpLY+lIEP-TWu+hluRVsGsNFuulYGsopu..hGa+s.lpLPcua.TW+pYshFLLcTL...Pcph+ppYhcKlpl.lpaW+p+.G...GsLs-...................................ch.cDl.tl.phR-lPSaKRhChsILKNDahC+hhuFu.TKp-ttc+ .............h..t..RlspPa...t.psL..ht.l-sthW.thhtRVtGs..sshYstp......huh.t...h.hP.th.sWppa..hLLpsh...s..h.tt..Y.p+hth.htaatpp.......h.......................................................sa+thshsl.ppDh.hp.htas.sp.......................................... 0 15 36 47 +11756 PF11923 DUF3441 Domain of unknown function (DUF3441) Assefa S, Coggill PC, Bateman A anon PFAM-B_1795 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in archaea and eukaryotes. This domain is typically between 104 to 119 amino acids in length. This domain is found associated with Pfam:PF05833, Pfam:PF05670. This domain has two conserved residues (P and G) that may be functionally important. 26.30 26.30 28.40 26.80 26.10 25.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.55 0.71 -4.49 39 309 2009-06-19 17:27:43 2009-06-19 18:27:43 3 16 254 0 231 326 2 108.90 35 11.46 CHANGED p.stsppstsphhphlspLsupPpssD.plhtslPVsAPasAlp..caKYKlKlpPGss.KKGKuspphlphF....................................hpstp.....................hspEtcll+sl+...sp-lstsls.ucl+....lshs .........................t....tttttpphshlssLsGpPhspD.plLhAlPVCAPasulp..pYKYKVKLpP.G.ss.KKGK..A...sKpslshF................................................hpspc..................h.ss+Ep-Ll+ul+...-p-lhpsls.GKVKlsh.................... 0 87 138 194 +11757 PF11924 DUF3442 Protein of unknown function (DUF3442) Assefa S, Coggill PC, Bateman A anon PFAM-B_890 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 334 to 948 amino acids in length. 29.20 29.20 29.20 29.20 29.10 29.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.79 0.70 -4.82 50 2024 2009-09-11 14:47:36 2009-06-19 18:35:19 3 70 725 2 124 1347 552 264.00 39 30.81 CHANGED phhspts..pphssthpsp.....ssptspphths............hhsspsssplpphL...sthups....plslshss.chphpssplshl.....hshh...-sspp.LhFsQhuhpp....p.sscsssNlGlGhRp...h.t...ssah.lGhNsFaDh...-hsps.ppR.hGlGuEh.apcahchsuNhYhslosh..+sstshp.thp......................E.RsA.sGaDlphphhLPthPp.....husplhappahGcp..VslFsp...sphppsPpuhshulsYsP.lPllolss....ph.ppupuspspsphs....lplsaphGt.shppQls.stls.......tcsl.tspRa.-hVcRsN .......................................................................................................................hAp.u..tshGphhtsp......su-tA+shthG...........hsospsspplpsWL....utaGsA..........pVsLplsp..phphcs.S.ph-hh.......hPhh...Dspp......LhFo..Qhuhpp.....p.DcRhhuNlGlG.Ra..h.t.......ssWh.lGhNsFhDp...............Dhsps..ppR...hGlGuEh..WpDYL+hSuNhY..h...houW...+cu.shc...ap....................................p..RsA.pGaDlcspuhL..Ps.aPp......Lu.uplhaEQ..YaGDp.....VsLFsp...............sshp....+....sP..tAlohGl....s....Y.......T.....P.lPLlTlss........pa..+pGpuupscsphu....lplsYphGp..PhpcQLcsppVs......t....tcoL..tGSRY.DhVpRNN............................................................... 0 27 46 86 +11758 PF11925 DUF3443 Protein of unknown function (DUF3443) Assefa S, Coggill PC, Bateman A anon PFAM-B_1634 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 400 to 434 amino acids in length. This protein has two conserved sequence motifs: NPV and DNNG. 25.00 25.00 25.60 28.20 23.20 24.60 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.10 0.70 -5.78 24 153 2009-06-19 17:37:27 2009-06-19 18:37:27 3 2 74 0 49 152 4 356.40 44 86.77 CHANGED ssssssuNssslTVs.sussssh...NhPhVSVTlCsPG......o.o..pCpTlDsVLVDTGShGLRlhuoAl.sul.ssLPtpos......uuuslA.ECspFso.uaTWGsV+pADVplGuE.p.........AuslPlQlIuDss.ssslPs..sCsssGs..stsosssLGANGILGIGshstDC..GssCss...ss..hsusYYsCssu..soCsssplPlupQVsNPVstF.usDNNGVll.phPslu.ssGtsSsoGpLlFGIGTQsNNslsu..sshlsoss...sG..hos.sapGp....shss..FhDS...GSNuhFFs.ssulss.....Cu...s.ssa..YCP....soshsloAolsussG.s.ousssFslu.NAssLhuss..shAassLuGs.h....s.sshaDhGLPFFY.........GRsVYhulcpss.ssG......sGP.alAF .......................s.sssuuNshslTVs...sGssssh.....NhPhlSVTlCsPG.........T.o........pCQTIsNVhVDTGShGLRllsoAl.uul..uLPssss........uGusls.ECutFso.ua..TWGsVRpADVpIuGE.h..........AuslPlQlIuD.u....osssPs.........sCosuuu..shsTsusLG.ANG........ILGIGsh.shDC..GssCss.........os....h.usYYuCssu.....soCss..TosPluQ...QVsNPVstF.AsDNNGVIl.phPslu.ssGuu.SAoGoLlFGIsTQuNNsLsu..ssll..soso...sGs....lou.shpGp.....shss.......s....FhDoGSNuhFFs..su...u...hst..............Cu....stuaYCP....uSopohouTlsutNG.u.ssssohslu.sussLFush..shAhssLuGsh......s.ssh....hDhGLPFFa.........GRslYhuh-tsu.suG......suP.YhAF........ 0 6 21 32 +11759 PF11926 DUF3444 Domain of unknown function (DUF3444) Assefa S, Coggill PC, Bateman A anon PFAM-B_1267 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 210 amino acids in length. This domain is found associated with Pfam:PF00226. This domain has two conserved sequence motifs: FSH and FSH. 26.10 26.10 28.70 28.70 25.40 24.30 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.60 0.70 -5.13 29 318 2009-06-19 17:39:28 2009-06-19 18:39:28 3 14 19 0 231 311 0 192.20 32 39.01 CHANGED ppssssssphssPDs-FaDF-c.RsppsFtssQIWA.lY.DspDGMPRhYAhIc+Vhss.sF......+L+loWL-sps..psE......pWhstshshu.CGcF+l..upspthcshshFSH.h..hshcpsp+sthpIaP+KGElWAla+sWs.sWsss...t.scphacY-hVElLs.sassp.tGlsVs.LsK........lcGFpolFppptpps.....hhhIPpcEh.hRFSHplPua+hoGcEtp ..............................tt........hshs-s-FasF-ppRs..ppFtssQlWA.hY..D.s.DsMPRhYuhIp+Vhs.....sF.................clploaLcsps..psc....h..........pW.hpp.shshu.CGpFch....sp.p.p.........h.ps.hs...h.FSH.l...............thpts..t............p............t............h............hpIa............P++G-lWAl....Y..+s........Ws...p.Wstp.......s.....phhhcY-hVElls.saspp...tGlpVs.LhK.............................lp.Ga..tolFppt.ptt............hhhIsppEh.hRFSHplPuaphotpc..t............ 0 30 117 172 +11760 PF11927 DUF3445 Protein of unknown function (DUF3445) Assefa S, Coggill PC, Bateman A anon PFAM-B_501 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 264 to 418 amino acids in length. This protein has a conserved RLP sequence motif. This protein has two completely conserved R residues that may be functionally important. 25.00 25.00 28.70 28.40 22.90 24.20 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.64 0.70 -4.99 75 510 2009-06-19 17:41:17 2009-06-19 18:41:17 3 8 251 0 334 538 505 230.50 26 64.60 CHANGED hulps.....hc..h.ppWlcl....D.ppYhsphsp+pclLpc.cscpshsh..........................PcupsAshELh-hlhp.aLsp+YP.phFph.......ptss.............hpNphsGpphshshs.........................s......................P.....Lphluc.lpEDhsllhpc.................................sspahLpAuslsFsus.WslspKlGhs......LstIH..sPVPtYc..pclppuh..................p+aFp+lpsscPlpRhNWslps...cs.pLhhss.......tphtphs.t..........ttptpssscphalRsERQoLpRLPcSsAllFoI+TYhhsls-...lpp ...............................................................ul..ht....pphlpl...D.ppYhppht.+tplltp.csppshth....................................Ppsh.A..shEl.hchl...........hp..aLst+YP.ph.Fph.............pts.s.......................hpNphhupp.hshs.t..........................................p...............................................P.......Lphluc.lp-Dhsllhpp...........................................sspahltAuslshsus.Wshpp+hGhs......ltpIH..sPV.Ptac..pplttu..h..................p+Fh.p+lps...sp.slpRhN.....Wslps.................ss..pLhhs...................tthtp.s..................p.ptp.shpphalRsEhQoLhRL.Pp...SsAllFsl+sahhslpcl...................................... 0 69 177 275 +11761 PF11928 DUF3446 Domain of unknown function (DUF3446) Assefa S, Coggill PC, Bateman A anon PFAM-B_833 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 80 to 99 amino acids in length. This domain is found associated with Pfam:PF00096. This domain has a single completely conserved residue P that may be functionally important. 25.00 25.00 28.80 28.10 22.90 22.30 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.15 0.72 -3.79 13 977 2009-06-19 17:42:26 2009-06-19 18:42:26 3 16 615 0 79 862 0 79.10 48 22.95 CHANGED G+Fol-Pss..ssusShW..sEsllSLVS.GllGhsssPuo................ossSSSS.......oSuSotSssL.......Ssolppopss..sIY.S...usPsYSossuDIas- .........G+Fuh-ss......us..ohW...PEslhSLVS.GllGMsssPso.....................oosSSSS.........sSuSs.t.SssL.........SCo.sp.tS.-ss.....sIY..S....AsPsa...sss.suDIaPE...... 0 3 10 30 +11762 PF11929 DUF3447 Domain of unknown function (DUF3447) Assefa S, Coggill PC, Bateman A anon PFAM-B_10 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00023. This domain has a conserved SHN sequence motif. It seems likely that this region represents divergent Ankyrin repeats. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.96 0.72 -4.17 607 1243 2012-10-02 12:10:21 2009-06-19 18:47:56 3 305 1 0 1242 1262 1 73.30 47 16.44 CHANGED Fs.s.c.....IT.p.pCL.phSFLGGNs-IhsECL.K.....h..p..cPs....p.......pC.McaAI.lSHNIDF.VoF..L........hN..E..Y....sl.....c...I.s..........lp....Cs...h......apNLc.....uF....llY .....................ap.scIT.ppCL.phSFLGGN.-IhsECL.K..............h.p....cPs.........p..pC.McYAIISH.NIDF.VoF..L........................hN.E........Y...sl.........c....I..s........................Lp...........Ct...t.apNLc.uFlla................................................ 0 1242 1242 1242 +11764 PF11931 DUF3449 Domain of unknown function (DUF3449) Assefa S, Coggill PC, Bateman A anon PFAM-B_769 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 181 to 207 amino acids in length. This domain has two conserved sequence motifs: PIP and CEICG. The domain carries a zinc-finger domain of the C2H2-type. 49.60 49.60 73.20 72.20 44.10 43.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.47 0.71 -4.80 37 347 2012-10-03 11:22:52 2009-06-19 18:56:47 3 14 298 1 237 326 6 185.00 48 37.88 CHANGED uhtEatlpphsphLspphppThpplcRKpuhTtcEppt.................................................tt..spssppss............................pscsc.p-p-p.laNP.hpLPLGaDGKPIPaWLYKLHGLshpapCEICGNhsYhGR+AF-+HFsEhRHsaGh+CLGIs..sottF+pITpIc-AhpLac+lp.............ppppppph..........csppt..EhEDscGNVhscKsYpDLK..+QGL ........................................................................................................................u..hEhplhc.hschLs.cphptT+pNVpRKQuhTtcERpt.........................................................................pp.pppts.............................pscs-..--..--chlYNP.hpLPLGWDGKPIPYWLYKLHGLslpasCEICGNasYhGp+AFc+HFs.........EhRHsaGhRCLGIP.....NTspFtsITpI--Al.p.LWcKlc...............pppppp..ch...............ps-p-..EhEDupGNVhscKsYpDLp+QGL........... 0 83 133 197 +11765 PF11932 DUF3450 Protein of unknown function (DUF3450) Assefa S, Coggill PC, Bateman A anon PFAM-B_773 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are about 260 amino acids in length. 26.90 26.90 26.90 26.90 26.60 26.60 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.38 0.70 -5.11 59 359 2009-06-22 08:13:35 2009-06-22 09:13:35 3 6 256 0 117 325 524 242.50 31 91.02 CHANGED hphsh...........sll....sshhhssus.sst.........................sssls..pshpttsptspsutpSQp+I-phs-pspshht-hcpltccl-sLcl...YssplpchlssQppclsuLpppIsplpphcptlsPLMhcMl-sL-pFlphDlPFht-...cRtpRl.ppLcshMscu-losu...EKaR+lLEAYQIEh-YGpplpsYpupl.sl..sGpphpV-hL+lGRluLlhpohDuppshhWsspsp....pWps.Lss.phtsslscAlclAccQtsssLlpLPl..................ssE ............................................................hhh......hhhhhhsss...sst........................As..sLs..psp.shpsp.tspuu.sp.SQp.+IDphs-pstphhsEhcplpcElcsLpl....Y.pspLpp..hVs..sQpp-hsuL.......ppQlpplcpocpslVPLM.hcMl-u.L.cpaVt.Dh.Phphp.......pR....ppRl....pcL.cphhscADVosu...EKaR+l.LEAYpIEh-YGpplssYpupl.sl....supplpsDhLpLGRluLlhpohDupphhhWssppp......tWpt..lss...s.tpslspuhplApcQhsPsLlpLPl......t.................................................................................................. 0 40 72 94 +11766 PF11933 DUF3451 Domain of unknown function (DUF3451) Assefa S, Coggill PC, Bateman A anon PFAM-B_877 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 199 to 238 amino acids in length. This domain is found associated with Pfam:PF06512, Pfam:PF00520. This domain has a conserved ADD sequence motif. 27.50 27.50 27.80 27.80 27.30 26.30 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.66 0.70 -4.55 27 544 2009-06-22 08:19:43 2009-06-22 09:19:43 3 20 95 0 211 695 0 216.20 37 12.31 CHANGED SchSphS.cshpppc..............ptppcppccchp.tpEcscs-ph.+pcspss.pp.t...........ph.++h.......pps.LSlsGSsFssRRsS+u.SlFo..hR....sRt+psGs-s..................-aADDEpSsst.....S-p+........GulhlPhh...p+poutsShs.....S+uS+l....hsspG.....chh.u.sh-sNGhsshhttsthhs.......s.tphlPt...t.......sshp-sphpp....p.psh.hs..shlpp..sttc..p.pshstssh ................................................................S-hScLSSKSsKEcR....................st.pc..c.++.p+c.ptt.pEc.t.-s.-.c.h.K.S-S..--uh++psah...........................................p.sht++hs......................pQo.LSl.....GS....F..ss...R.RsS+s.SlFS...FR.....GRs+DlGSEs......................................-FADDEaSsht..........-sEuR.......RsSLFlPhth.tcRRsSp.....S.s.........S+uS.+hh..........hshsG......Khp....S.sVDCNG.VVSLlsussshs....................ssupLLPt..............................ssos-sph+p....h.sshtho.h-hhpc...ttRp.pshp....h................................................................................ 0 15 25 102 +11767 PF11934 DUF3452 Domain of unknown function (DUF3452) Assefa S, Coggill PC, Bateman A anon PFAM-B_1048 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 124 to 150 amino acids in length. This domain is found associated with Pfam:PF01858, Pfam:PF01857. This domain has a single completely conserved residue W that may be functionally important. 26.30 26.30 26.30 26.40 24.90 26.20 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.69 0.71 -4.85 35 334 2009-06-22 08:24:09 2009-06-22 09:24:09 3 6 142 2 170 321 0 135.60 30 14.91 CHANGED .stssh..........t..ttss......loLoplL+us..+lslhcFFcclppas.chssl.sshass................plccLcpsashhtsLh++acchapplFhss.............stp..p.............psssphscla+FuWlLFLhh+sc.shphhcDLVsshpLLlClLslllhp ...................................................ttsss.......t..psss...loLTclL+ss..clS............lhpF.Fpc.h+ca....chssls.phhpp..............................................................phccLc+sasV.slla+.Kac..hFpc...lFtss.......................................pp..........................hptssssp.clacFsWhLFlhsKup..hh.htcDLV..sS...apLLLCsL-llhtp.......................... 0 42 67 113 +11768 PF11935 DUF3453 Domain of unknown function (DUF3453) Assefa S, Coggill PC, Bateman A anon PFAM-B_1335 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 239 to 261 amino acids in length. 25.00 25.00 28.40 26.70 24.00 23.30 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.46 0.70 -4.96 30 316 2009-09-16 16:45:01 2009-06-22 09:26:07 3 9 245 7 222 311 2 219.90 24 24.22 CHANGED Lhcsp..DssVhKplI.sssslYtllhcal........hssssspphWsphstlKspIlsh.acos.hP......................GlKlsslKFlpplllsQopsss.st...........p.t.ps-hSLupVPssHsllphp.sLEtEuppLL-.tLLshhtpss.....................hsusllsAslNs...............................Luhlh+pRPph...ss+llsulhsass.sh......tss..h+lph+sV-+sl+hhlhphLK.pphs...phsuplpphls...chupshtclscps.p..cpsh..sccspcshc .........................................................pp..sstVhKpsI.shsplY.hshphh.....................ss.tpthWphhsthKspIlph...hcss.ss......................Gl+hpslKFlptllls.o.tssss............................p.pp-hSLs.hlP....t.s.H..shlpht.tLptEup...t.hL-..hLLphhhpss.........................hsuh.lhsslss..........................................................................Luhlh+pRP.h...hs.cllp.ul.shpssh..........ss..t+splpSlc+sh+hhlhpllK....psss..........hpspl..pphlt.............clutstt-..lhcph.p...tpth...tp......p.......................................... 0 68 123 184 +11769 PF11936 DUF3454 Domain of unknown function (DUF3454) Assefa S, Coggill PC, Bateman A anon PFAM-B_1847 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 60 amino acids in length. This domain is found associated with Pfam:PF00066, Pfam:PF00008, Pfam:PF06816, Pfam:PF07684, Pfam:PF00023. 29.10 29.10 30.00 30.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.79 0.72 -3.77 19 235 2009-06-22 08:32:58 2009-06-22 09:32:58 3 173 80 0 93 184 0 64.00 58 3.08 CHANGED sppa.TPPSQH..ss..ut.-sT...PsH.lp.....VP-.HPaLTPSPESPDpWSSSSPHS.hSDWSEGloSPss ....................stpa.TPPSQH.....uuuh....-sT........PsH..p.lp..........lss.HPYLTPSPESPDpWSSSSPHS.sSDWS..-us.oSPs.. 0 12 21 51 +11770 PF11937 DUF3455 Protein of unknown function (DUF3455) Assefa S, Coggill PC, Bateman A anon PFAM-B_1386 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 174 to 251 amino acids in length. 25.00 25.00 27.70 25.20 24.40 24.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.63 0.71 -4.36 40 358 2009-06-22 12:19:23 2009-06-22 13:19:23 3 4 227 0 214 360 21 143.70 28 72.58 CHANGED Puup.plthsshGcGsQsYpCps.....sssssh.............................shl..GshAs................................LaDs...uupt.....................................................................................lG+Ha....st....sP........sachp.....sG..SplsGpphu.........sAPsss.........uslsaLhLsupst........Gshssls.V.RlsTsGGsuPs..sCsst...spphpVpYsAcYhFas .....................................t.phhh.shuhGhQsYpC.......stssth.............................hhh..us.As................................Lhct....s...............................................................................................lGpHa.........s....sP.............sachp.....ss......st..hhupths.........................sAPsss............sslsaLhLpsps.........Gshsslphl.Rl..sTtGGs.sPs.....sCsst......s....tthtV.YtApYhFa................................ 0 52 118 175 +11771 PF11938 DUF3456 TLR4 regulator and MIR-interacting MSAP Assefa S, Coggill PC, Bateman A anon PFAM-B_1799 (release 23.0) Family This family of proteins, found from plants to humans, is PRAT4 (A and B), a Protein Associated with Toll-like receptor 4. The Toll family of receptors - TLRs - plays an essential role in innate recognition of microbial products, the first line of defence against bacterial infection [1]. PRAT4A influences the subcellular distribution and the strength of TLR responses and alters the relative activity of each TLR. PRAT4B regulates TLR4 trafficking to the cell surface and the extent of its expression there [2][3]. TLR4 recognizes lipopolysaccharide (LPS), one of the most immuno-stimulatory glycolipids constituting the outer membrane of the Gram-negative bacteria.\ This family has also been described as a SAP-like MIR-interacting protein family. 20.60 20.60 20.60 20.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.16 0.71 -4.22 23 696 2009-06-22 12:34:13 2009-06-22 13:34:13 3 19 141 0 392 667 1 101.80 23 50.13 CHANGED ssKCpsC+hlssELc.ulscss.++c...l..tuh+lsscGcpcs+ph................ppSEhRLhEllEslC-+hp-Yslpp.csspp+hhhths.s.tphtt..s..th..............ss.sssLcttCcpllEcaE-plt-aatpcp.t..tslpchLCpcp......sptCp ....................s...Ct...sCph....ls.ph....t.thtph....t.pp...................thp.t.p.pt.th...................t.SEhclhEhh-t..lCp.p...............................................................................................................h.tCpthl.Ep..E-..l.phhhp.....tt.........pl.phlC.p........t.C................................ 2 109 147 264 +11772 PF11939 DUF3457 Protein of unknown function (DUF3457) Assefa S, Coggill PC, Bateman A anon PFAM-B_1822 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 162 to 197 amino acids in length. This protein has a conserved CSL sequence motif. 25.00 25.00 25.20 26.50 21.70 24.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.77 0.71 -4.71 38 586 2009-06-22 12:36:17 2009-06-22 13:36:17 3 2 578 1 62 233 16 150.70 64 90.57 CHANGED thsshsttLtusaccltpspM+-lPlhN.suLsVcAlGFp.........hap...G.ph......lGlllTPWFMNLVhlP.....s..sspsass.hssGs+tsls.hPuGsa-Flsuphs.......slGsapuCSLFSPMhcFssptsAhssAcAsLssLhs........Ptppss...................................................s.........st.hsR.RshLpGth.s ................Fp.TuPtApVQAAFEEIA+RSMHDLSFLH.PoMPVaVSDFT............................LFE.G.QW.......TGsVITPWMLSAVIFP..............G.PDQlWPh.RcVuEKlGLQ.LPYGTMTFTVGEL..-....................GVSQYLSCSLMSPLS+S.hShEEGpRLsDDCARMlLS.......LPVoNPD............................................................s..PcsuR.RALLFGRR.S............. 0 12 32 45 +11773 PF11940 DUF3458 Domain of unknown function (DUF3458) Assefa S, Coggill PC, Bateman A anon PFAM-B_160 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 402 to 419 amino acids in length. This domain is found associated with Pfam:PF01433. This domain has a conserved FSAPV sequence motif. 20.90 20.90 20.90 20.90 20.60 19.90 hmmbuild -o /dev/null HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.24 0.70 -5.61 177 1678 2012-10-11 20:01:02 2009-06-22 13:40:18 3 12 1582 20 408 1377 673 363.90 42 42.45 CHANGED sspp....asLslpQpsPsTssQ......s..p....KpPhhIPlsluLl.ss.s.G..pt....hshp..........................p.pVLplsp....scQoFsFp..sl...s.......pcPVsSL..LRsFSAPVpLch.shosc-LhaLhtaDoDsFsRW-AuQpLhtphlhphlsphp...........tt..t......lss.....t.hlsAhpslL...tc...ssl..DsAhhA.hLsL...Pocsplupth....p..ssDPsulatuRctlppplAptLpsphhtlYp..ph......tt..sap......sspshGpRuL+NhsLsaL....sttss.sts...phshpQapsA....s....NMTDphuALssLsps..p........sp..ppp.....tLssFhp+Wpc-sLVhDKWFulQAsssp..s.....ss...lppVcpLh.pHPsFshpNPNRlRuLlGuFu.sNsssFHstDGuGYpFlu-hllcLDshNPQlAARlh ...............................................p.sppYoLTlpQ...p........s...P...s.....T.....s.sQ......s.....-.........KpP...LHIPhsltLh.ss.c....G.cs...lsLphsG........................pssssV..LslTpuEQTF..sFc....sV.......h.........t..p...P.V.PSLLRpFSAPV+L-a.....c.aS-ppLsFLhpHspssFsRW-.AuQpLhsphl...ttslschp....................pG..ps..........lsl.ss..........c.lh-Ah+sVL...h.D........cpl......DPAhtAplL....oL....PSts.-lA-hh...............chlDP.AItpsR-sl..pcsLAs.pLt.--hhulYp.....st............t..ps..tYp...........s-ttshu+RsLRNssLpaL.......uh.s.-...........s..t..us.............s.LspcQacpA...........s.............NMTDplAALuAhlsu....p......hss......R-t....hhtcFpc+W+..pDuLVMDKWFhLQ.AosPs....s............s.s.....L-.pV+.tLh.pH..u.F..ohpN..PNR.................lRSLIGuFusuNPssFHAtDGSGYpFLs-hl...hcLsshNPQV...AuRLl............................................................................................................................................... 0 111 235 329 +11774 PF11941 DUF3459 Domain of unknown function (DUF3459) Assefa S, Coggill PC, Bateman A anon PFAM-B_897 (release 23.0) Domain This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 110 amino acids in length. This domain is found associated with Pfam:PF00128, Pfam:PF02922. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -10.14 0.72 -3.86 120 2445 2012-10-02 20:10:03 2009-06-22 13:42:40 3 16 1643 30 652 2023 110 89.70 22 15.94 CHANGED haRpLLsLR+pc..PtLsss....phsssp.s.hhussslhspa+h.......sss.......tLt.lhhNLus...pssshs...........ssuclLausssstt..................sspLss.....tusshh ......................................YpcLlsLR+pp.......PhLs.tu............php.s..lt....s................s........s....s...s..h....l..hhhcch.............psp...............sLl.llsN...los.............pshshs.................tsstp.l..l...h.ss..ttst.....................shpL.s.hphhh.h............................................... 0 172 395 542 +11775 PF11942 Spt5_N Spt5 transcription elongation factor, acidic N-terminal Bateman A anon Bateman A Domain This is the very acidic N-terminal region of the early transcription elongation factor Spt5 [1]. The Spt5-Spt4 complex regulates early transcription elongation by RNA polymerase II and has an imputed role in pre-mRNA processing via its physical association with mRNA capping enzymes. The actual function of this N-terminal domain is not known although it is dispensable for binding to Spt4 [2]. 24.20 24.20 24.30 24.20 23.90 23.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.89 0.72 -3.37 23 279 2009-06-22 13:40:14 2009-06-22 14:40:14 3 21 236 0 194 277 0 95.10 31 9.27 CHANGED FlDhEAEVDD-E-E--.-p-t..s-..ptt.hhtph.............pcc+pp+ph-cccppppctssccls-hl.+..cRY.......tcp.....sstpstpsssshlspp...hLLP ..........Flt.EA-VDDE..-E-E-..-..E-.tsp-l..hp.t....tt..lcpst..............................................................hppD-pppR.chs..pphpp.pp.c.stEcluchh..+..c+Yucpp...........shttuust.usslsQp....hLLP.................... 0 53 97 162 +11776 PF11943 DUF3460 Protein of unknown function (DUF3460) Assefa S, Coggill PC, Bateman A anon PFAM-B_2362 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 70 amino acids in length. This protein has a conserved WDK sequence motif. 25.00 25.00 52.00 51.90 22.90 22.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.00 0.72 -3.89 22 216 2009-06-22 14:40:06 2009-06-22 15:40:06 3 2 207 0 69 136 18 60.20 49 91.95 CHANGED s.YpS-hTpFlspLKtc+PpLtpcQ+tGRALLWDK.slDh-tptchcsu+VsQ+PYVYpsp ....YpS-hTQFLspLKppKPpLEpcQpcGRuLLWDK..slDhE.Eppp.hptA+VsQ+PYVY.pp.. 0 9 37 53 +11777 PF11944 DUF3461 Protein of unknown function (DUF3461) Assefa S, Coggill PC, Bateman A anon PFAM-B_2125 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 130 amino acids in length. This protein has two conserved sequence motifs: KFK and HLE. 25.00 25.00 58.30 50.50 24.60 24.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.65 0.71 -4.28 11 664 2009-06-22 14:41:40 2009-06-22 15:41:40 3 1 661 0 62 158 6 125.00 83 97.46 CHANGED MYssLcolGIssscpIE+YTLRpEuspDILKIYa+KpKG..-hFuKSlKFKFPRQpKoVlsDuGu.psY+ploEIussLphll-ELDplsp..+p+p-sDlKpclLcDL+HLE+VVssKIsEIEccLEcL ..MYDNLKSLGITNPEEIDRYSLRQEANNDILKIYFQKDKG..EFFAKSVKFKYPRQRKTVVAD.GlG.QGYKEVQEISPNLRYlIDELDQICQ....RDRS.....EVDLKRKILDDLRHLESVVoNKISEIEADLEKL.. 0 6 18 40 +11778 PF11945 WASH_WAHD DUF3462; WAHD domain of WASH complex Assefa S, Coggill P, Bateman A anon PFAM-B_2071 (release 23.0) Domain This domain forms part of the WASH-complex of domains and proteins that activates the Arp2/3 complex, see Pfam:PF04062. The Arp2/3 complex regulates endocytosis, sorting, and trafficking within the cell. The WAHD domain attaches to the FAM21 proteins via its N-terminal residues and to the microtubules via its C-terminal residues. 24.20 24.20 24.20 25.80 23.30 24.10 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.69 0.70 -5.12 6 199 2009-12-01 10:25:16 2009-06-22 15:43:54 3 8 109 0 106 172 0 237.80 44 56.81 CHANGED EuQsYSVPlIpPDLRREEul+QlsDALQYLppVusDlFsRIspRVEcNRs+LQAIssRlplAQAKIcKI+GSKKAhKVFSSAKYPAsDcLQ-YsSlFsG.ss-shp.K+s+a+lQoKhhsLD-+ulQEKLpYasVplNs+pccc.....Dss-EGLGSLP.RNlsSVSSLLLFNTsENhYKKYVhLDPLAGsVTKT+sslEsEcEEKLhDAPloITpRpQLERpsAENYFYVPsLGpVPEIDVPshLPDLPGlADDLhYSADLGPGIAPSsPus..IP-LPohs..T-sl.-sutscppttt.sPP ...............................hh..t.t...sh.phhpsLt.Lpth.tplF..p.plptplcps+pplp........sIsp+lshAQAKl-.pl...pG.S..+K..AIpVFSuA....KY.P.A...s....cph..p.c..asul..F.........s...s...spc.....s...h...........p.p....s.........p.h.........c....l.......p...s.............c..t......p......hs.c..psl...p.Ecl...p.......ha.......Vpspp.p..p.t........sptccGLGsLP.ps.lsSloSLLLFNTsEN.h.Y...........cc.....Y.....s..h....l.D...sL...t.....u....s....hs.c..oc..hhlt....p...p..c.....c...cLh-APhSIs.+R.-pLcpps....s-s..Yh.Y..lPsLG..pV.P-I..D..VPshLP..DLPGIAsDL..hYs.s.Dh.s.....su..IAPSssss....lP-LPsht..sp............................................................. 0 41 51 78 +11779 PF11946 DUF3463 Domain of unknown function (DUF3463) Assefa S, Coggill PC, Bateman A anon PFAM-B_2277 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 140 amino acids in length. This domain is found associated with Pfam:PF04055. This domain has two conserved sequence motifs: CTPWG and PCYL, plus a highly conserved CxxCxxHC motif. 25.60 25.60 25.80 27.60 25.40 24.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.04 0.71 -4.58 67 326 2009-06-22 14:45:43 2009-06-22 15:45:43 3 8 316 0 145 297 55 138.20 51 37.89 CHANGED hhlSPGYuYE+AP-..........Q-pFLsRpcT+pLFRplht....tspp++.....WpFNpSPLFLDFLtG...pp.sYcCTPWGsPohslFG.WQ+PCYLl.sE.GYspTFc.......ELMEpTcW-pYG....supsP+CusCMlHCGYEsTAVh-shtp.hphhcssht .................hhlSPGYAYE+APD..........Q-HFLs.RccT+pLFRclFt....tGc.t.++.....Wp.FspSsLFLDFLsG....pp....sYcCTPWGsPshslFG.WQ+PCYLl...uE.GYspTF+.......ELMEsT-WDpYG....sGp..KCAsCMsHCGaEsTAVhcohspPhcshps...ht.......................... 0 44 98 125 +11780 PF11947 DUF3464 Protein of unknown function (DUF3464) Assefa S, Coggill PC, Bateman A anon PFAM-B_2676 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 137 to 196 amino acids in length. 25.00 25.00 52.10 52.10 24.70 24.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.06 0.71 -4.76 29 127 2009-06-22 14:52:24 2009-06-22 15:52:24 3 4 99 0 66 129 115 147.80 32 82.77 CHANGED .tss+pshsFc....Pcppppt.tt............................pstptptppppp........................tttt........psIPchVusRMhRRluhhoGlPohlGhusFlhuYhllspslh-.lPshlsllsShhhFhlGllGLSYGlLSASWDpp.sGSL...........LGhpphp.Nl...................sRhhpuh+stpp .................................................................................................................tp........................t......................................tptp.ptttttt.........................................tpppptsIPpsVupRMh+RhuhhsGlPshlGhusFss.Yhlhsp.thh-.lPshsshhsShhhFGlGllGlSYGlLSASWD.pp.GSh...........LGhpEhptNh...................schhpuh+tt........................... 0 16 44 59 +11781 PF11948 DUF3465 Protein of unknown function (DUF3465) Assefa S, Coggill PC, Bateman A anon PFAM-B_2827 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 131 to 151 amino acids in length. This protein has a conserved HWTH sequence motif. 25.00 25.00 25.30 25.20 22.70 21.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.63 0.71 -4.33 31 258 2009-06-22 14:56:44 2009-06-22 15:56:44 3 2 187 0 36 166 4 137.10 50 92.21 CHANGED hpthhhh.h...hslhss...................huhs......htus-s.............................tlppAFpscQSslQVpG..........pGsVh+lLPDDscGSR..HQ+FIL+L...so............GQTlLlA....HNIDLA..PRlssLphGDpVpFhGEY...........EWNs.KGGVlHWTH+DPp..s+H..sGWLKHsGphY..p ...........................................................................................................................ssss.............................sLpQAYps.....pQ.SDl..QVQG..........hGpVsKV..LP..DD.N-GS+..HQKFIL+L...sS...............GQTLLVA....HNIDLA........PRIPsLchGDpVEFYGEY...........EWNc.KG.GVlHWTH+..DPp..sRHs.cGWLKHNGplYp............. 0 10 25 32 +11782 PF11949 DUF3466 Protein of unknown function (DUF3466) Assefa S, Coggill PC, Bateman A anon PFAM-B_2541 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 564 to 612 amino acids in length. 25.00 25.00 25.60 26.10 24.90 23.60 hmmbuild -o /dev/null HMM SEED 599 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.22 0.70 -5.95 28 183 2009-06-22 14:58:19 2009-06-22 15:58:19 3 3 170 0 42 145 7 542.80 34 98.04 CHANGED hspplpLsslulLsusu....ssusA.........slYcllpls..............shssshpss....YuhAlssssp....................................................hsssucs+hph..t.........csssGlsh...t--s.ash-.ppapY.......hpshstsph..sasssps.aAph........................t.sWhs.h..pslssssssss...ssh..sps...................hsohsshlpul..sssGht..VGstosstp................t..h.hpsFppRuhspstst.........lsP...............sptspsGuassshs......hNG..s..lsVG.sSpshusss.....sp............hptsh.s.......ts.s...sh..............................sh......ap.s+AhlW.hDsss.............lsp...pph.shs.psssDssh..hhpupuhulsssG.........hshGsuassh.sscssh.......hpsulahspp..sps.sht.....sWssp..............DpshpsShAsDINcN.....................sllVGpt+p.......sGshss+hFhhDss...sss..spal.........s.....ss.FasGu...uocstuINNpsplVGhl-spssp-.....+sRt+pGFlYshss................pp.hsh.sstsWhhc..-lpsu.s........sshspssca+IlpAssINDsGsIsATAhhppsu......Yc...spup.sh..........susGt..s.phsVsVKLhPhsu....us.pss.lpscss-s.pshERp........GAu.hu..WhsLhsLs.Llhh......RR+ .......................................h.shphpllAhulLlusp......sAsA.........AlYpVhEls..................ssstshtss.......aGlAIpssss...............................hshsucs+hphsss.........css-GlsY..........p-Es.s.F.uh-..psasYs....-....hpsacpYChsph..hY..o..o..Cps...WAssph..........................s.tWps.......-.lsuspsssu....hAa...lts.........t.ssh........spStssllNul...sssups.......lG.pststphs........................t.hh.spshptR...ps........lsP.............t.spsl.stuWp...........ssu..s..hsVGShSps.sssp.....u....................................................................................sh.........ato+sslW.hsusu................ssp......hsh..stu.s..s.scs.sp.....hhpuSht..s..lspsG........splhuVGYsoa..ssssph.......hpAul.F.hss......sss.s.ts........sWssp.l...sshp.p.ss....DhhaoNSlsoDlNcN.....................hlslGsAK+tt.............sGus..sN+hFlh-ss.....oss....usal.........o..........sslFFoGu.....uucusuINshNplVGpl..Dssss+E.....tGKPRRKRGFIYshusst..............................ss.u+..hthhss+uWhLD..DLT.sss........sssssNspaRIl-AoDINDu.....GlIuATAh+sssu.......Y-..osupsuh.........................sussp....-plVAVKLhPIss......us..sss.Ip....p+....us-p..pssERp........Guu...hGhhhLhsLu.l.l.GF.....RR+............................................. 0 8 18 30 +11783 PF11950 DUF3467 Protein of unknown function (DUF3467) Assefa S, Coggill PC, Bateman A anon PFAM-B_2299 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in bacteria, archaea and viruses. Proteins in this family are typically between 101 to 118 amino acids in length. 25.00 25.00 25.50 26.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.17 0.72 -3.97 52 326 2009-06-22 14:59:57 2009-06-22 15:59:57 3 3 305 0 126 294 210 88.80 37 81.29 CHANGED Mppppttt...........................................p.....lplc.lsp.......pshpus.....................YuNhshlsp.SspEFhlsF....ht....hh.ssts.psc..........lpsR.....llloPppAK+LhpsLpcslpca....EptaG.......................................................................................pIct .....................................p.................................................ptp....lp.I-..Lsc...........-sApGs.....................YuNlAlIsH.SsoE.FllDF...............lp.......hh.PGhP..cup..............VpSR.....llloPpHAKRLlpALp-Nlt+YEptaG.Ip........................ 0 59 106 118 +11784 PF11951 Fungal_trans_2 DUF3468; Fungal specific transcription factor domain Assefa S, Coggill PC, Bateman A anon PFAM-B_2291 (release 23.0) Family This family of are likely to be transcription factors. This protein is found in fungi. Proteins in this family are typically between 454 to 826 amino acids in length. This protein is found associated with Pfam:PF00172. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -12.30 0.70 -5.86 90 3818 2012-10-01 23:57:08 2009-06-22 16:07:12 3 72 166 0 3117 4021 1 286.50 11 52.41 CHANGED hll.paa...hsplss..hhsshsps................tssatphllshuhpst.................slhpulluhuuhah.......................pth...p........pppthpattp..ulptlppslpphpppts.........................pphlss....l...Lhlshh................-.....lspus....tt....Wth.HLpssppllpphtt...........................t.ptsp..............thpahhphhsah-llushsts...........................................................................ptshhshpthpphhps...........................st.t...........hc....hhGhsss......lhhl.....luclspLspptcptptp...............................................phtppspplppplp.php.p.sshptthssttthtpht...................................hph.aphushlYlh..phl.................hshsss.........................s.plpphlpplhphlptls....sssh............tsLh...a.........PlFlsG...spshss................ppRphh..........pph.hppht....pph.....ushppshpl..lpclWcppc..................................st............................phc..W..hclh..............c................................................hshslhlh ...............................................................................................................................................................................................................a........h.......................................p...h.t..t.h.h..........h.shpp...........................................................................hlhp.u.ll..u.h..u..u..h..ah........................................................t.....t......................tttsh.pahtt.......u.l.ph.l.p.pt.l.t..p.ptt.p...................................................................tt.h....l.ss....h........hhL.hhh..................................................c...............hht.sp.....tp....................h.....H.h.pu..s...h.tllp...h......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h...................................................................................................................................................................................................................................................................... 1 361 1347 2518 +11785 PF11952 DUF3469 Protein of unknown function (DUF3469) Assefa S, Coggill PC, Bateman A anon PFAM-B_2159 (release 23.0) Family This family of proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 108 to 439 amino acids in length. 27.60 27.60 28.00 28.50 26.30 27.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.77 0.72 -4.29 18 183 2009-06-22 15:08:59 2009-06-22 16:08:59 3 7 89 0 117 169 0 88.40 34 28.79 CHANGED l-shRshhEo-cpWphR+cFlhpphssa........................thDcLlsLuhlauNhsFLGC+Ysp.clh-+lhcMAEGIsltcs.ppp...pRsplhtppp ....................l-phRs.tEo-cpWchR+cFlL+phsca......................t.ph..-pLluLShlWuNtlFLGC..pYsp.clh-KlhpMu-GItlpptsphp...pRs.l.tp..t..................... 0 33 45 80 +11786 PF11953 DUF3470 Domain of unknown function (DUF3470) Assefa S, Coggill PC, Bateman A anon PFAM-B_2503 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 50 amino acids in length. This domain is found associated with Pfam:PF00037. This domain has a single completely conserved residue N that may be functionally important. 23.20 22.70 23.20 22.70 22.60 22.20 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.01 0.72 -4.36 133 1106 2009-09-16 09:23:58 2009-06-22 16:12:12 3 11 970 39 322 784 1301 43.00 43 39.76 CHANGED ppp...alcLNu-LAc..tWP....sITp+K..-shsDA-cWc.....shpsKlp.h.Lpc ............ppalcLNAELAc..tWP....sITc+K..sshsDA--Wc.....GhtsKlp.h.Lpc........ 0 86 190 252 +11787 PF11954 DUF3471 Domain of unknown function (DUF3471) Assefa S, Coggill PC, Bateman A anon PFAM-B_2961 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 98 to 114 amino acids in length. This domain is found associated with Pfam:PF00144. 26.60 26.60 26.60 26.60 26.40 26.50 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.28 0.72 -4.16 67 608 2009-09-14 14:36:04 2009-06-22 16:15:38 3 30 431 0 263 599 75 102.60 19 21.22 CHANGED thpttttsshsstsLspYsGpYpps......h.........Gshslshcssp....Lhl.phssps.....t......hpL.hths..tspFhhchss...hp................hpFphs.ssGplsphshp.....ts..................hpshthtphs ...........t...t..tssssstsLpsYsGsYpps....h................h.......GslpV...p..t....c.s.sp.........Lhl.phssps....t............hpL.pa.hs...tcp.F.hhp..hss......hp.......................htFph..s..s.Gp.hsphphphs.............................t............................................ 0 85 167 217 +11788 PF11955 PORR Plant organelle RNA recognition domain Moxon SJ, Barkan A, Coggill P anon Pfam-B_1780 (release 8.0) Family This family, which was previously known as DUF860, has been shown to be a component of group II intron ribonucleoprotein particles in maize chloroplasts. The domain is required for the splicing of the introns with which it associates, and promotes splicing in the context of a heterodimer with the RNase III-domain protein RNC1. All of the members are predicted to localise to mitochondria or chloroplasts [1]. It seems likely that most PORR proteins function in organellar RNA metabolism [2]. 20.50 20.50 20.70 23.50 19.70 20.40 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.02 0.70 -5.69 27 336 2009-09-11 12:40:23 2009-06-23 17:10:07 3 12 23 0 211 323 0 298.50 30 73.17 CHANGED hh+-.sLDph..lp+p+cl+hshpl+cllhupPs...ph...........lslptLsch.pcpLsl........+sh.sFlc+aPplFclah...........tspt.hthpLTststsLhpcEtplhpppp..sshss+LpKLLMhohs+.plsLcpltpl+t-LGLPcDFcssll.caPshF+lsp.......tsstphLELlsa...cssLAV..oslEctt..p................ttthppthsFs.lpaPpGaplp+phpptl.........pcaQchPYlSPYp...ssp.pls....sso..tchEKRsVuVlHElLSLTlEK+sphc+LscF+ccaslsp+hpchll+HPGIFYlStK...spppTVhLREAYccs.pLl-+..sPlhhh+-KhhcLhphsthtppt ..............................h+s.thDth..hppp+ph.phhhtlhphlhppss...th................hslp.ltph.tp.ltl......hthhtalc+aPplFphh..............tt..hhplTsphttLhtcEtthhpp.pp...s.hsp+LpKLLMhoh.s+.plslpplttl+h-..LGLPpDF.pss.ll.pa.Pph..Fplsp...........pttthLcLssa....sspLAl..oshEpttt..........................t..t.tthsFs...hphP.pGap.hpcphpph.l.........pcaQ.clPYhSPYp........csp...tlp.....so....tphEKRsVullHElLSLTlcK+h.hcplspF+cthsls...pp...hpphlh+.HPslFYlSh+......spppoVhLREuY..p........p.........s..p..Ll-p..sP..lhhh+c+hhpLht.sh....t......................... 1 21 125 171 +11789 PF11956 KCNQC3-Ank-G_bd Ankyrin-G binding motif of KCNQ2-3 Cooper E, Coggill P anon Pfam-B_10256 (release 23.0) Motif Interactions with ankyrin-G are crucial to the localisation of voltage-gated sodium channels (VGSCs) at the axon initial segment and for neurons to initiate action potentials. This conserved 9-amino acid motif ((V/A)P(I/L)AXXE(S/D)D) is required for ankyrin-G binding and functions to localise sodium channels to a variety of 'excitable' membrane domains both inside and outside of the nervous system [1]. This motif has also been identified in the potassium channel 6TM proteins KCNQ2 and KCNQ3 [2], that correspond to the M channels that exert a crucial influence over neuronal excitability. KCNQ2/KCNQ3 channels are preferentially localised to the surface of axons both at the axonal initial segment and more distally, and this axonal initial segment targeting of surface KCNQ channels is mediated by these ankyrin-G binding motifs of KCNQ2 and KCNQ3 [3]. KCNQ3 is a major determinant of M channel localisation to the AIS, rather than KCNQ2 [4]. Phylogenetic analysis reveals that anchor motifs evolved sequentially in chordates (NaV channel) and jawed vertebrates (KCNQ2/3) [5]. 20.50 20.50 24.00 67.00 18.60 18.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.45 0.72 -3.96 4 127 2009-06-24 10:53:54 2009-06-24 11:53:54 3 9 37 0 50 111 0 100.90 62 12.78 CHANGED s.pDpsushshpussR-SDTsLSl.SVsHEELERS.SGFSISQs+-sh.shGss..............Auuus..ps+sRP.alAEGEoDTDoD.aTPsGs..PlSuTG-G.hu-ssWsu .............pDp.usR.pcushRDSDTslSl.SVsHEELERS.SGFSISQs+Esh...shss.s.................uusu...hschRP.YlAEGEoDTDoD.hTPsGs..PhSuTG-G.huDssWs.... 1 2 6 19 +11790 PF11957 efThoc1 THO complex subunit 1 transcription elongation factor Wood V, Coggill P anon Pfam-B_2059 (release 23.0) Family The THO complex plays a role in coupling transcription elongation to mRNA export. It is composed of subunits THP2, HPR1, THO2 and MFT1 [1]. The THO complex is a nuclear complex that is required for transcription elongation through genes containing tandemly repeated DNA sequences. The THO complex is also part of the TREX (TRanscription EXport) complex that is involved in coupling transcription to export of mRNAs to the cytoplasm [2,3]. 21.00 21.00 34.60 25.00 19.80 20.60 hmmbuild -o /dev/null HMM SEED 492 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.76 0.70 -6.00 21 319 2009-09-11 16:24:37 2009-06-24 18:08:11 3 16 247 0 226 332 6 432.10 28 70.86 CHANGED tplhsLLDllhhhsc......pEp..spsuhlFa.Llc-llDh.TlssCcclFs.YlESRh-phpptphhp..+sh..............lLRpCN-LLRRLS+.....s.ccssFsGRlhlFLsphF....Plu-+SulNl+G-aps..ENlTsas.........................c.s.pppssssshclDhs..........................................................hYshFWuLQpaFssPspl...asst.phssFcpslcsslssFpphpp....-h.tp.ttptpt...-sptusc+ptss.-s..................p-hssu....FssKYLTuccL.....h-LplsDss.............FRRplLlQhLIhhpalhslsppppsp..............psh......hhLspccspahpphcptltphLppsh..p.......GthFhRhVcplLsp-csWlpWKsEGCP..shccs......hshtsh.ssccs......hpKhhhs...h............h.hGstsLshLhp.cspsulcpLKctpR.hpl......Pshcsahpslh-schchDhupsp....ppp.thcsctspsW+sLRlhu+ppLttFs+ls-s.pslcslh.........-t.sspcsshh.pshsp.ppcch ......................................................................................h.thlslh..hsp..................pt...spssh.ah..Llt-lh-s.s..lstCcplFs.alEpphph..hpt..p.hh...p..Kph.................................................lLRhCN..-...LLRR.LS+.............s..psslFCGRl.lFLuphF....PLu-+S.u............lNlp......upash...-NhThas...............................p.tptptsss..pth.lDhs................................................................................................................hY.hFWuLQchFpsPsph...apt...........patsF...t..pth..ptsLssFp..phph.........p..........................ptpt.pcch.tt..t.....................................................t..tsp........a.s.KaLTopcL............hcLp....Ls...Dss.............FRRplLlQhLIlhpaLhs.sphpptp....................................................hLsppp............ppah..pphpp.tl...hphLppss...c............GthFhchlcplLp.p-csWspW.....K.t-.u.Cs..shp+.............ts......t.s.ppt.................hpKhh.........t........................................h.hGs...pLspLhp....p.sp....s....hpthcs......pR....h................Psl.ppahpth.h-...phD.tp..........................cpphth.sp....tshsW+sLRhhu+p.....p..thF..p...h...s....p.....ptl....hh......t...............p.........................t................................................ 1 81 129 187 +11791 PF11958 DUF3472 Domain of unknown function (DUF3472) Assefa S, Coggill PC, Bateman A anon PFAM-B_2598 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, eukaryotes and viruses. This domain is typically between 174 to 190 amino acids in length. This domain has a single completely conserved residue G that may be functionally important. 25.00 25.00 26.50 25.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.28 0.71 -3.94 23 181 2009-06-24 17:14:14 2009-06-24 18:14:14 3 13 133 0 72 188 25 176.30 25 40.36 CHANGED YhGhQssu.......pocRplL.FSVWustsocss..h..-.........upsshss..pFGsEGo.Gspsh.hsYPWpuGpsY+Fplpst....psGtohasuahpDss..s...p..cahhIAShphP+ssshlsssto...FlEsFs....scpGshsR.puaas.spauhstts.phhclhpsshosssssspt..phsa.suGsss.sthalpssGhh .............................YhGhQsps............pspcplL...FSlWsshsssss.....h....h..........ussspss....pF.GsEGo..Ghpst.hsas.Wpsu.p.tY+hhlpst......tss....pTpa...ssahpDss..s............s..phphIuohchPpsssshstshu..FlEsas......ppGptt...R..tuhhp..stauh..shss.....ph.phpptp....hohs.ssp......pa.thuss..s....thhhhpssG..t................................................. 0 35 53 61 +11792 PF11959 DUF3473 Domain of unknown function (DUF3473) Assefa S, Coggill PC, Bateman A anon PFAM-B_3065 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is about 130 amino acids in length. This domain is found associated with Pfam:PF01522. This domain has two completely conserved residues (P and H) that may be functionally important. 29.60 29.60 30.30 31.30 26.20 29.30 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.83 0.71 -4.73 51 224 2009-06-24 17:18:14 2009-06-24 18:18:14 3 3 209 0 97 225 89 131.50 30 46.29 CHANGED SSlaPlp.HD...hYG....hPcu.PRhP.aphsssst.....................lhEhPloThplh....stplPsuGGGaF..RlhPYs.....lh+hslpplNpps.pPslFYhHPWEl..DPsQPR....l..shshcs+FRHYhNLspsptRLcpLL.....p-Fcasphsplhtptt ............................SSl.Psh...s...pYG.....h.ps..PRhs.athtpt.........................lhElPsoss.hh.....hplPhuGGhah..RhhPht......lh+hhlppl.cp-.ts..hlhYFHPWEh....Dst.pP+......h...phshh..p..phRpassh.pphppRLcpLl.....psapasphpphh...th......................................... 0 34 79 92 +11793 PF11960 DUF3474 Domain of unknown function (DUF3474) Assefa S, Coggill PC, Bateman A anon PFAM-B_3095 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 126 to 140 amino acids in length. This domain is found associated with Pfam:PF00487. 27.80 27.80 27.80 27.80 27.70 27.70 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.02 0.71 -4.15 30 528 2009-09-16 09:00:21 2009-06-24 18:21:57 3 3 223 0 164 530 0 79.10 35 20.44 CHANGED MAoWVLSEC.GL+PLP.+lY.s+PRsuh....tpssss..plRhhssspshs.s.th....hs.s..t.....RN....WuLpVSAPL.Rlsolsc--cc...................ppspcFDPGAPPPFpLADIRAAIPKHCWVKDPW+SMSYVVRDVAlVFGLAA ......................................................................th................................................................................................................................................s....P....t....s.P.PF.olu-I+sAIPtHCap+sshRShSYVl+Dlsls.....h........ 0 36 99 143 +11794 PF11961 DUF3475 Domain of unknown function (DUF3475) Assefa S, Coggill PC, Bateman A anon PFAM-B_3098 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 60 amino acids in length. This domain is found associated with Pfam:PF05003. 29.70 29.70 32.00 30.90 23.70 23.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.49 0.72 -4.14 20 207 2009-09-11 16:23:54 2009-06-24 18:24:45 3 4 18 0 134 194 0 55.90 43 11.06 CHANGED ILAFEVAsshoKsssLhc.SLS-pslppL+ppsLpS-GVcpLVSsDpstLLpLusAEh .....ILAFEVAsshsKsss.Lap.SLS-pplp+L.+pclLpS-GVppLl.....SsDps.LLpLAsA-h...... 0 11 78 109 +11795 PF11962 Peptidase_G2 DUF3476; Peptidase_G2, IMC autoproteolytic cleavage domain Assefa S, Coggill PC, Bateman A anon PFAM-B_3119 (release 23.0) Domain This domain is found at the very C-terminus of bacteriophage parallel beta-helical tailspike proteins. It carries the enzymic residues that induce autoproteolytic cleavage to bring about maturation of the folding process of the helix in a chaperone-like manner. The domain thus mediates the assembly of a large tailspike protein and then releases itself after maturation. These C-terminal regions that autoproteolytically release themselves after maturation are exchangeable between functionally unrelated N-terminal proteins and have been identified in a number of bacteriophage tailspike proteins [1]. 25.00 25.00 25.30 25.40 24.60 24.90 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.53 0.70 -5.24 19 176 2009-06-24 17:27:32 2009-06-24 18:27:32 3 11 141 9 16 159 78 206.70 42 34.28 CHANGED ssstIhuStuspss..tsYslshGhGsssss..stslKhp.....uhsGslphsGslssuu..uDYAEhFEShsGpsIcs...GhlVTL-..G-KIRhAptuDp...hlGVlSpTuullhssushpWpsRYLpsEFGshlYcphp......D-........pus........hhchPh.NPcacPshc..YhsRpcRsEWslVGLlGQlhVRsDpTVpsG.chlp..ups.G......IuTpss..pG..hpVMclTssastp+GhulALs....hl+ ....................usptIhup.u.pu-....puYShhhutGsspssp...shssKW.......upsGphhhsGAshsuS.hoDaAphFEohDsphI-s...GYhVThs...t-KIRhAsssDs...ILGls.....SuTsAlIusuuuhpWpcRYhpDpFGth.Y.psp.....................................up...........phtPllNs-aDPspc...YluRc-RsEWhsVGLIGQhhVRsDsTspst.sYhc..sss.G........IATKu-..sG..ahVhKhTut.......s.h.....h........................ 0 7 9 10 +11796 PF11963 DUF3477 Protein of unknown function (DUF3477) Assefa S, Coggill PC, Bateman A anon PFAM-B_3147 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in viruses. Proteins in this family are typically between 246 to 7162 amino acids in length. This protein is found associated with Pfam:PF08716, Pfam:PF01661, Pfam:PF05409, Pfam:PF08717, Pfam:PF01831, Pfam:PF08715, Pfam:PF08710. 25.00 25.00 30.80 30.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 355 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.19 0.70 -5.46 4 151 2009-09-16 08:59:33 2009-06-24 18:29:47 3 10 56 0 0 167 0 295.00 47 7.14 CHANGED MuKhuKYGLGFKWAPEFPWMLPsAuEKLuNPp+SEEDGhCPSsAQc.cstGhshsNHV+VDCpRL.u.ECCVQSulIRDIFVcEDPpcVEs.ThhALQ.GoAVLVKPshthSlpAhtsLGshPKshsMGLF+RhslCNTtcCssctHVAapLahlpPsGVChGsGcFlGWhVPlshhPpYu+pWlQPWslhLRKsGpKGuhsScHhRpuhphsVYDFsVEDAht.VHsEPKGKaSpKAYALl+sYRGlKPlLaVDQYGCDYoGsLAcsLpsYG.-hTLp-M+th.slWtssh-h-VsVAWHVsRDsRhsMRLQohATlpulsYVApPTEDlVDGsVVl+EPsHlLussuIlL+hPs ............................M.KhsKYGLth+WAPEF.Whh.sAtEcLssP.pS-.shhCsosuQt.cshGhs..NHVhlDCppl.t.EhpVQSslIR-Ihhppc...lEs.h..Alp.tpAlLlpssh.......hshGs.PpshsMGLF+ph........tsptaVsapL.hhpss.sshGt.pFlGWhlPhshhP..s+pah..h.hhlcctGpc.sh.ps+hhtshp..sYsFpVEDAh-.V.HDEPKGKaScKAYALl+sYRGlKPlLaVDQYGCDYoGsLAcsLpuYu..-hTLp-M+th.slWtpshsh-VsVAWHVsRDsRhVMRLQohATlpulsYVApPTEDlVDGsVVl+EPsHlLussuIlL+hPp. 0 0 0 0 +11797 PF11964 SpoIIAA-like DUF3478; SpoIIAA-like Assefa S, Coggill PC, Bateman A, Eberhardt R anon PFAM-B_640 (release 23.0) Family These proteins adopt an alpha/beta SpoIIAA-like fold, similar to that found in STAT (Pfam:PF01740). They adopt open and closed conformations arising from different arrangements of their alpha-2 and alpha-3 helices. They may be membrane associated and may function as carriers of non-polar compounds [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.48 0.72 -3.76 151 602 2009-09-14 13:55:16 2009-06-24 18:34:40 3 3 394 6 265 584 59 107.30 20 83.94 CHANGED sll.slphs..GcloppDhcp.lhstlcph.hp..ppsplplLh-hs.sh..puhsh.tAhhpph...h.....hp.phpc...hc+lAllussp.ahphhsphhsh.h.....hssph+hF..pppspAhsWLpp .............................................hlshthp..Gcls.pp-hcp....hhsh.lctt.hp....ppsplp.lLhchs..ph....pGh..sh..sA..hhcsh.......pht.....hp..phpc....hc.+l....A.llu.ssp.Whphh.s..plhsh..h.......hstcl.+hF..scppcAhtWLp.t..... 0 106 181 232 +11798 PF11965 DUF3479 Domain of unknown function (DUF3479) Assefa S, Coggill PC, Bateman A anon PFAM-B_1065 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is about 160 amino acids in length. This domain is found associated with Pfam:PF02514. 27.30 27.30 27.80 30.40 26.90 27.20 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.66 0.71 -4.44 61 302 2009-06-24 17:37:06 2009-06-24 18:37:06 3 6 209 0 139 323 268 160.50 33 12.73 CHANGED h+lVhloh-sphsuultpAstpLppc..hsuLclshasss-htcsstshpphppslspADllhuohlFhE-hlpslhstLpstp.pscshlsh.Shsplh+LT+lGpFsh....spspuuhhshlK+hts...pppusspspt.......phphlRplP+lLKalP.GKAQDlRsah ..................+lVhlsl-spapuulppAsp..pLspp...h.ul-lssahspEL...........ccstsh...pphpp-lspADlhluSLlFhE-hsp.hlhpslpspRpphcAhllF.ShPElM+LsKlGsFsM...........upspuuhsphhK+ht.......p.p..p...u..s.shtsu....................hlKlL+plP+lLKalP.sKAQDhRsah............ 0 38 82 118 +11799 PF11966 SSURE Fibronectin-binding repeat Pallas J, Coggill P anon Pfam-B_5998 (release 23.0) Repeat Streptococcal surface repeat domain - SSURE - is a protein fragment found to bind to extracellular matrix protein fibronectin but not to collagen or submaxillary mucin in Streptococci. Anti-SSURE antibodies recognised the corresponding protein on the surface of streptococcal cells. The full-length proteins are thus fibronectin-binding surface adhesins. 25.00 25.00 29.60 47.50 20.30 19.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.76 0.72 -4.39 6 697 2009-06-24 17:48:00 2009-06-24 18:48:00 3 17 253 0 24 516 0 80.90 74 34.13 CHANGED DNGsAKNPAL.PLcG..LTKGKYFYpVsLNGNTsGKcGQALLDQlRANGo+oYpATV+VYGs.KDGKsDhsNlVATKpVsIsIN .......DNGsAKNPALsPLtp..hTKGpYFYpVsLsGNssGKpcQALIDQhRANGTQTYpATVpVYGN.KDGKsDLsNlVATKpVTININ....... 1 6 6 12 +11800 PF11967 RecO_N Recombination protein O N terminal Mian N, Bateman A anon COGs Domain Recombination protein O (RecO) is involved in DNA repair and Pfam:PF00470 pathway recombination. This domain forms a beta barrel structure. 21.00 21.00 21.10 21.30 20.40 20.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.43 0.72 -4.12 154 4019 2012-10-03 20:18:03 2009-06-26 14:16:15 3 8 3998 7 873 2670 1561 78.10 27 31.53 CHANGED M....h...pspulVL+spsasEsstllslhTccpG+lssls.+Gu.+p.+..S+htu.hhpPhshlchhht......tpps.Lts.lsps-hlpsh ......................ppculVL.+sp.sa..sEsshllslhTcctG+lphl.A.+Gu...+p..t+......Sphtu..hlQPFshlphphp.........t....p.....p.....s.lps.lpps-hlp............................ 0 293 578 740 +11801 PF11968 DUF3321 Putative methyltransferase (DUF3321) Wood V, Coggill P anon Pfam-B_6141 (release 23.0) Family This family is broadly conserved, being found in fungi, plants, arthropods and up to primates. it may be a methyltransferase. 19.80 19.80 19.80 20.20 19.60 19.60 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.65 0.70 -4.72 25 219 2012-10-10 17:06:42 2009-06-26 16:42:37 3 2 191 0 159 216 23 192.80 31 60.29 CHANGED upIcucIcphGGL-uYQhASphGQspcRGGDSSKlLVcWLpsthhphctp..p...................................L+sLElGALSspNshSpsshFc.VsRIDLNS..QpsuIhcQDFM-RPLPps-sE+FclIShSLVLNFVPsstpRG-MLpRhspFL+ssp......................t..s.LFLVLPLPCVsNSRYhspppLppIMsSLGFthspp..+popKLhYaLach...........psp........hpspha+KcplpsGss+.NNFsI ...................................................................................................t.l.tthtt.GGhptYQ.uS...up..p+t...ou+hlhphlt.......................................................................lphL-lGults..............N.hs.p.......thhp.shtIDLps..pp.tlhppDFhc................p........P....l..............s........p......t....p...............s....-.......p.................FcllshSLVLNaVPsstpRGpMh++spchLp.suh................................LFll....lPts.....C.l..p..NSRahs......pphpthhpuLGaphhp...cpss+lh.ahhach..................t................................................................................................... 0 50 81 129 +11802 PF11969 DcpS_C Scavenger mRNA decapping enzyme C-term binding Moxon SJ anon Pfam-B_9894 (release 8.0) Family This family consists of several scavenger mRNA decapping enzymes (DcpS) and is the C-terminal region. DcpS is a scavenger pyrophosphatase that hydrolyses the residual cap structure following 3' to 5' decay of an mRNA. The association of DcpS with 3' to 5' exonuclease exosome components suggests that these two activities are linked and there is a coupled exonucleolytic decay-dependent decapping pathway. The C-terminal domain contains a histidine triad (HIT) sequence with three histidines separated by hydrophobic residues. The central histidine within the DcpS HIT motif is critical for decapping activity and defines the HIT motif as a new mRNA decapping domain, making DcpS the first member of the HIT family of proteins with a defined biological function. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.73 0.71 -3.86 99 1192 2012-10-01 23:45:21 2009-06-26 17:03:18 3 21 724 20 661 5559 3144 113.70 25 50.60 CHANGED phla...shlhptpp..sppl....la..cD........-thlshpDhp........Pp.........ushHhLsIP.++..................cIpolpD......L.....spc.chsLLtchhphupclhppph.t..............h.ps.hphshHhtP..ohaHlHlHllu......h.sptht .............................................................hla.s.hl...ttp.........ssph......l.a..cD................-phlshp.Dlp......Pp.......................AshH..hLllP..+.c.........................................cIto.lp.c.......L....................st.c....c.h....s....L...L.t.c.h.h..p...h..u...p...c..l.h.p..p.p.h.t............................................s..p.p...h..ph....G.h....H....h.ts.............oh.h.H.LHlHllu..............t............................................................................... 0 218 363 534 +11803 PF11970 Git3_C G protein-coupled glucose receptor regulating Gpa2 C-term Wood V, Coggill P anon Pfam-B_11657 (release 23.0) Domain Git3 is one of six proteins required for glucose-triggered adenylate cyclase activation, and is a G protein-coupled receptor responsible for the activation of adenylate cyclase through Gpa2 - heterotrimeric G protein alpha subunit, part of the glucose-detection pathway. Git3 contains seven predicted transmembrane domains, a third cytoplasmic loop and a cytoplasmic tail [1]. This family is the conserved C-terminal domain of the member proteins. 21.00 21.00 21.00 21.10 20.80 20.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.48 0.72 -4.22 19 170 2012-10-03 04:04:29 2009-06-26 17:08:30 3 3 114 0 132 185 0 74.00 30 13.66 CHANGED +cRRppIp+Qh+uIFIYPhuYlhlWlFPhlhcshpas.ap.......h..pptPhhWlshlushhpPhsshVDshVFhhRE...+PW+h .........t..php+lpR.l+hhhlYPluYlllhL.Plsstthths.............p.spsPshhhhhluushhshsGhVDsllaslpc...+....h...................... 0 28 68 110 +11804 PF11971 CAMSAP_CH CAMSAP CH domain Bateman A anon Bateman A Domain This domain is the N-terminal CH domain from the CAMSAP proteins.i 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.84 0.72 -4.33 27 382 2012-10-03 10:10:54 2009-06-26 17:12:11 3 57 114 0 173 3124 9 79.90 31 5.82 CHANGED hspscpsshhs.lcslhcDhsDGssLstllHaYpPphlplcslshppshShtcslhNlpllpphspcpL..psh.h.sh-Dllhs. ...................stpssh..t.s.tV..psLs..p.shpDGshL.C.tLlpp...Y..pPp.hl.s...L.....c...-Is.........h..+sp.h.S............h..slp.Nlphh...s..hs.ppth..p...h.p..Dhh.h................................................... 1 40 54 97 +11805 PF11972 HTH_13 HTH DNA binding domain Fenech M anon Pfam-B_8688 (release 14.0) Domain This is a helix-turn-helix DNA binding domain. 24.20 24.20 24.20 24.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.56 0.72 -4.12 12 171 2012-10-04 14:01:12 2009-06-26 17:21:52 3 4 128 0 53 149 5 53.90 53 15.22 CHANGED LPpLl-LlloRPlVSuuMlscpLclTspuAlpLVpELGl...REhTGRGRaRAWGll .........LPpLhELlLppPLVSuthltKcLpVT.puAh+...L...l...sE...LsL....RElTGRGRFRAWGll...... 0 7 28 39 +11806 PF11973 NQRA_SLBB NQRA C-terminal domain Bateman A anon Pfam-B_3622 (release 8.0) Domain This family consists of the C-terminal domain of several bacterial Na(+)-translocating NADH-quinone reductase subunit A (NQRA) proteins. The Na(+)-translocating NADH: ubiquinone oxidoreductase (Na(+)-NQR) generates an electrochemical Na(+) potential driven by aerobic respiration [1]. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.20 0.72 -3.63 74 761 2012-10-03 10:59:06 2009-06-26 17:24:09 3 6 729 0 165 657 475 51.30 46 11.49 CHANGED VlALuGPpVpp..PRLlRTplGAslspLssGc..lpss.psRlISGSVLsGcpstu .....VlALAGPpVpp..PRLlRThlGAsLsp..Lss..u-....ltsu....-......sRlISGSVLoGppup.s..... 0 44 94 137 +11807 PF11974 MG1 Alpha-2-macroglobulin MG1 domain Bateman A anon PDB_2a73 Domain This is the N-terminal MG1 domain from alpha-2-macroglobulin [1]. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.78 0.72 -3.43 168 1084 2012-10-03 16:25:20 2009-06-26 17:28:18 3 42 1060 0 261 942 44 99.60 36 5.76 CHANGED lsslu.lt.spps.....................phhlps.ss...upshss...pVpl......hs.pps.t.........sht..psp..os......spuhhph.........................................................lph....psshshs.......phslsupss ....................lSDlG.lo.s+ph........cspl.........clFspSLcs..Gts.sGl.cVpL..ls.cpGp.............sLs..pus..oDupG+lplcss.................................cssslllA+.....c...................csphohLcL....phsuLDLu...-FsluGt..s................................................... 0 75 154 208 +11808 PF11975 Glyco_hydro_4C Family 4 glycosyl hydrolase C-terminal domain Mian N, Bateman A anon IPR001088 Domain \N 21.90 21.90 22.80 21.90 20.90 21.60 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.45 0.70 -4.57 8 2959 2012-10-02 13:39:36 2009-06-26 17:33:48 3 5 1463 33 464 1786 410 216.50 29 50.24 CHANGED -clclphhGlNHhsWhpclphp.GsshhspLh-tluphucss.phcs..h.tssascshh+utsh.ths..hlssoatpahhhs...cahlchphssh...........hsRsppllcchpKplhpp...........htchhKppp.hlpspcl-c+.uuhhu-tthshl...aNsccthhhlNl.NsGtIsNlPssusVEVsClls+NGspP.hhlGsLPppltu.LhcpplsspchssEAhlTtchpplapAhhhsPpV ..............................................................................................plphchhGlNHhsahppl..hc..Gps.h....h.s.....p.......Lhc...tl....t...p............t...t........................h.........s..........c...........s............hh............t...s..........h..s.............p.....s..h..l.+.........h....t....................hlP.s..s..Y.h+Yahhs.............c...h..lp.....th..tc.h...............................hsRu.....ps...l.p.........chc+p.l.h.............................................ph..h..c.s.......p.....h..t...h...p....s..p...c..l.-..t.....+....s...us..h..Y...u.c...........s..shphl...................as.s.c.t..s..h.h.h.lNl.N......p............G..........t.......IsNlP...s-ssVElsClls.tsGspP..htlG.pl..Ps.t.htG....Lhps.hshEpLsl-Ah....lstshpcshpAhshsPhs................................. 0 162 297 368 +11809 PF11976 Rad60-SLD Ubiquitin-2 like Rad60 SUMO-like Wood V, Chahwan C, Coggill P anon Wood V, Chahwan C Family The small ubiquitin-related modifier SUMO-1 is a Ub/Ubl family member, and although SUMO-1 shares structural similarity to Ub, SUMO's cellular functions remain distinct insomuch as SUMO modification alters protein function through changes in activity, cellular localisation, or by protecting substrates from ubiquitination [1]. Rad60 family members contain functionally enigmatic, integral SUMO-like domains (SLDs). Despite their divergence from SUMO, each Rad60 SLD interacts with a subset of SUMO pathway enzymes: SLD2 specifically binds the SUMO E2 conjugating enzyme (Ubc9)), whereas SLD1 binds the SUMO E1 (Fub2, also called Uba2) activating and E3 (Pli1, also called Siz1 and Siz2) specificity enzymes. Structural analysis of PDB:2uyz reveals a mechanistic basis for the near-synonymous roles of Rad60 and SUMO in survival of genotoxic stress and suggest unprecedented DNA-damage-response functions for SLDs in regulating SUMOylation [2]. The Rad60 branch of this family is also known as RENi (Rad60-Esc2-Nip45), and biologically it should be two distinct families SUMO and RENi (Rad60-Esc2-Nip45). 23.70 23.70 23.70 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -4.29 42 1068 2012-10-03 10:59:06 2009-06-26 17:36:11 3 36 345 50 689 8130 273 69.50 32 32.96 CHANGED lplhl+stssc..clthclctsoplpcLh.ptappcpslst.....pplcLlFDGcplpsspThp-hslcDsDh..l-lll ................................................................plpVt.s..p.-ss...pl.p.F..+.l...K..c....s....T......Lp...KLh...cu....Ys......c....+.....p...G...l..sh..................psl+Fh...F.D........G..p......p.l......s....t...s....p...T.P...ppL.-.ME.D.t.Dh..I-V............................... 1 230 363 528 +11810 PF11977 RNase_Zc3h12a Zc3h12a-like Ribonuclease NYN domain Bateman A anon [1] Domain This domain is found in the Zc3h12a protein which has shown to be a ribonuclease that controls the stability of a set of inflammatory genes [1]. It has been suggested that this domain belongs to the PIN domain superfamily [1]. This domain has also been identified as part of the NYN domain family [2]. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.90 0.71 -4.39 28 659 2012-10-03 20:43:45 2009-06-29 14:20:43 3 19 191 6 414 616 25 146.30 36 23.62 CHANGED phRhlVIDGSNV.Ah......sH......sppch......FSs+Glthslpa..FhpRGHcslslFlspa..phpt...................ssthp-pchLpcLhc....tshlshsP.....Spph.Gpp..saDDRaILchApcpcGllV.SNDpaRDlhp.......cpsch+chlcp..........................RlLhaoFs.........tDthh.ssD.PhGRpGssLpc ............................................................................s.h+.lVIDGSNV..Ah.................sH............Gppch.......................FSs.+.GItlAVpa...Fh.c..+....G..+.p.s...l..s..VFlPpa..Rpcp...........................................ss..hs-..pchLp...cLpc....ptl..Lsh...TP..........o+ps.G.pc..hs..s.aDD.Ral..l..clA..h.c..s..c............G..lIV.SNDpaRDhhs..........-pspa..+.....c.....hlcc...........................RLL.aoFs....................sshFM.PcD...PhGR.GPpLp.p.................................................................. 1 101 165 282 +11811 PF11978 MVP_shoulder Shoulder domain Bateman A anon Bateman A Domain This domain is found in the Major Vault Protein and has been called the shoulder domain [1]. This family includes two bacterial proteins Swiss:A6FXE2 and Swiss:A1ZGE7. This suggests that some bacteria may possess vault particles. 25.00 25.00 25.50 30.60 22.90 21.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.49 0.71 -4.21 20 152 2012-10-01 22:02:33 2009-07-05 15:15:55 3 8 82 41 76 140 4 117.60 58 14.85 CHANGED GPDFhTDllpVETuDHARLpLpLuYNWaF-V.stsc.t...tsp+hFoVsDFVGDuCKsIASRlRuAVAuhsFDcFH+NSu+lIRpAVFGhcpt..........Gps+...spLpFssNsLVlTsVDlQSVEPVD .........GPDFhTDlIslETuDHARLpLpLuYNWaF-l..spp.csp...psu+LFSVPDFVGDACKAIASRVRGAVASlsFD-FHKNSu+IIRsAVFGh-ps..........scsR...cphhFstNsLVloSVDlQSVEPVD............. 0 41 49 61 +11812 PF11979 DUF3480 Domain of unknown function (DUF3480) Assefa S, Coggill PC, Bateman A anon PFAM-B_2031 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 350 to 362 amino acids in length. This domain is found associated with Pfam:PF01363. 25.00 25.00 29.40 28.90 19.40 23.10 hmmbuild -o /dev/null HMM SEED 356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.16 0.70 -5.83 8 179 2009-07-05 15:08:20 2009-07-05 16:08:20 3 9 82 0 101 161 0 292.60 50 28.68 CHANGED slPssPYLhGlLIpRhEVPWAKlFPLRLMLRLGApYRYYPCPLhSVRtRcslYGEIGpTIhNLLsDFRNYpYolPsVpGLlIHhEsp+ToIpIPpsRYs-lhKslNsS.s-HlLAhGusF...ochADuHLVClQN.-u......pYpTQAIsIcsQPRKVTG...ASFhVhsGuLKoSSGhhAKsSIVEDGLMVQIsPEpMptLRpALRsMKDapIsCG.l......-usDsQ.EhVsIpWV-s-pshNhG..VlSPIDs+uh-GlsSh+l..apuo-Ypsss.+IIRWTEVFhlps--c.pstsusstshoRLuEplA+usCtALsPHLcLLtpsGhsKLGLRlolDo-pVuYpAGSpGp.LPspYhNsLDstLlPll ...........................................lPssPaLhGlLIp+hEhPWAKVhPhR...LhLRLGAcY+.hYPsPLhSlRhRcslaG.EhGc.T.IhNlLsDhRNYpYoL.slptLhlchE.tposI.p.IPpp+as-h...............h.Ks.hNpS.N-HVluhGAsF...spcADSHLVClQN.sG........................sYpTQA.S..h.pspPRK.VTG..............ASFhVFsGALKoSSGalAKSSIVE...DGlMVQIssE..sM-uLRpALR-..KDFp.IsCG+s........Dut-.....p..Ehl.pIpWVDs.-.cp.hN.t..G..VhSsIDGpSh.....-.....u.....lssh+l......ht...ts.-a..c.s.s..t..+hlRhTEV..Fahtp-....pp.....hu......ssttphscplApAhshALsPHLchL+psGhsKlGLRVolD.oDp.VtapAGS..pGp.LPtpYh...NsLDssLlPVl................................................... 0 22 32 63 +11813 PF11980 DUF3481 Domain of unknown function (DUF3481) Assefa S, Coggill PC, Bateman A anon Pfam-B_2819 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 80 amino acids in length. This domain is found associated with Pfam:PF00754, Pfam:PF00431, Pfam:PF00629. This domain has two completely conserved residues (Y and E) that may be functionally important. 28.50 28.50 35.10 48.00 25.10 24.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.08 0.72 -4.16 6 219 2009-09-16 09:17:33 2009-07-05 16:10:11 3 9 43 0 64 168 0 83.80 51 9.77 CHANGED -sos-T.lShpPsshLhoLDPILlTIIAMSulGVLLGAVC.GVVLYCsCh+suhocRs..hSsL.............ENYN......................FELsDGlKLKK-.hssQssh ...............t.............ps........LDPILlTIIAMSuLGVLLGAlC.GllLYCsCtasGhSpRs....hosL.............ENYN......................FELhDGlKhKhc.hNpQpsh........ 0 2 8 23 +11814 PF11981 DUF3482 Domain of unknown function (DUF3482) Assefa S, Coggill PC, Bateman A anon PFAM-B_3168 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is typically between 289 to 301 amino acids in length. This domain is found associated with Pfam:PF01926. THe central region of these proteins contains a hydrophobic region that is similar to Pfam:PF05433. 26.40 26.40 26.40 27.20 25.40 26.30 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.61 0.70 -5.18 27 234 2009-07-05 15:13:17 2009-07-05 16:13:17 3 4 229 0 64 219 15 277.10 36 60.69 CHANGED RcALAchsLHAhhp..FDoVAssh-sEt+LacslutLLcp.tps.LpcLlsspppptptRpptAsplIA-LLl-sAuhR..tplssptst....ppshpphppplRppEppshcsLLpLYpFcppDhpsscLPLhsttatpDLFsP-sL+thGlchus....GuAuGAusGsGlDLhsGGlTLGsuAslGAlsGGs.......hpsspphup.cl......huKl.......pGp.+cLsl....DsssLpLLhlRphpLlsuLppRGHAupcslpLts..sp....p..............pth.....cp..spLschLpcA....Rs+PpWSsLs.....................st..chtptpRpphlppLsspl ..................................................................................................................phLuRhsL.HshstFDoVA.sh-GEh+LacsLAhlL..p.hc.stLcRLhstpppphpphpppAtchIA-hLlDsAAhR..........pplspstsst..............................pshpphppplRphEpph...ptLhtLYpFhcs-hsssc..hhsttatpD.hFssEhL+phGl+hGs..........GsAsGAhhGhGl....DlhshGsoLG.hushlGulhGGh........h.ssps.......l..hsKl.............pGt.ppLps.....DstsLpLLAhRth.pLltsLppRGHAAps.slcLps...p...............tsW.............pt..s+LPp.Lp+A.Ru+..pWS.oLs.....................st...c.pptcRtthhptL....hh........................................................................................... 0 19 35 56 +11815 PF11982 DUF3483 Domain of unknown function (DUF3483) Assefa S, Coggill PC, Bateman A anon PFAM-B_3204 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 230 amino acids in length. This domain is found associated with Pfam:PF02754. 26.80 26.80 27.40 43.00 23.40 26.70 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.62 0.70 -5.05 14 145 2009-07-05 15:16:29 2009-07-05 16:16:29 3 4 143 0 52 139 9 221.60 55 35.17 CHANGED hLstlLPhLlahuLuLsslGAsRRsthWRpGpsopVsalu..GLhAhP+RYLVDLHHVVtRD+YhS+THVATAGGhVluhsLslLsashuLtsp.....hLsahLLsAshs.MhlGulFVupRRh...sPPuRLS+GPW.RLP+SLhuFuluhhlsoLssA.G.hlssshGGWsLsslLuhul.hhGssEhhhGhshGGPMKHAhAGALHLAaH.RsERF...........sGGRSTuLKslDLs......ss.L .......shLlssLLahulAhhshuss+RsshWRhGRuopsshhG..sLhAhPKRYhVDLHHVVARD.YlA+THVATAGGhluAhsLshlsaGhulau......hLshhlLlAshs.MhVGAlFVa+RRh.....ssPARLS+GPW.pLPh.LhAFAluhhLhsLssA..G.....t....h.G.....uaslhshLhlGl..GshphhhGhshGGPMKHAhAGhLHLAaH.RtERF...........uus..sTuLKs.DLpc...h..... 0 11 23 37 +11816 PF11983 DUF3484 Domain of unknown function (DUF3484) Assefa S, Coggill PC, Bateman A anon PFAM-B_3216 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 65 to 81 amino acids in length. This domain is found associated with Pfam:PF02491. 27.40 27.40 27.40 27.80 27.00 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.35 0.72 -3.14 15 535 2009-07-05 15:18:48 2009-07-05 16:18:48 3 4 530 0 32 179 0 75.30 36 16.84 CHANGED V-hLAQtAVsG-phlc++P.s...........phcpsssstpsss.ppPtppsstsppp...........spcscpcls-Rl+uhFGshFD ....................VshLAQsAl+GEpsLp+QPlsh............hs.pa.spss.sspss.s..s...-.t.....s..hts.scs.s-................tsspsKsKls-RhRulhGsMFD. 0 3 11 20 +11817 PF11984 DUF3485 Protein of unknown function (DUF3485) Assefa S, Coggill PC, Bateman A anon PFAM-B_3236 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 223 to 526 amino acids in length. This protein is found associated with Pfam:PF09721. 26.10 26.10 27.60 26.10 25.90 25.70 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.23 0.70 -11.15 0.70 -5.24 59 191 2009-07-05 15:21:24 2009-07-05 16:21:24 3 2 160 0 96 206 48 204.30 17 52.21 CHANGED thhhssslllh..hussshhhp.t......ptssssppshthhPtp.husWptssss............stht.phhsssphlh.ppY..tssp.....spsVsLaluYapsQ...pssphlHpPps.Ch...sus..GWphhspp..ptslshsst...shplschhhptts.....ppphlhYWathssph.h......ssshps........KhhthhstLhspts...Duull+lsss............ppsstsAtptlppFl.pphhsslsthl. ...............................................................hh...hhhhhhlh...hsshshhhtst.........tt...thss....tp.......ht.thPtp.husWpttstsh...........stht.phhsssphlt.psY......tsss........sptVsLalu..aapsQ...ptspt.lHsPps.Ch.....sus...GWphhspp..tt.s..lshsst.......thplschhhpt....ss.......ppphlhaWathsu.ch..h......ssthtt...........Khh.hhstlttpts...-ushlplss.............tts.ttu.thhppFh.pphhs.l............................................................. 0 36 81 91 +11818 PF11985 DUF3486 Protein of unknown function (DUF3486) Assefa S, Coggill PC, Bateman A anon PFAM-B_3271 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are about 190 amino acids in length. 25.00 25.00 25.90 25.00 24.70 24.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.95 0.71 -4.22 34 265 2009-07-05 15:23:11 2009-07-05 16:23:11 3 2 219 0 56 231 5 173.50 25 95.02 CHANGED Ms++SpV-hLPts...lRphLpphLpspshothclLppl.phh.t.......sh..........plS+SulpRYup+hEphhtpl+pupEhAchhspphu-.spschuchlhphlpohhFcshhsht-stcpp........hchLsplApslsplppAustpc+hpt-lRpphttpsss........ts-phsppsGl......Ss-shspIRcplL...G .............................huRtSpl-.hLPp...s....l+p...hLpphLp-pshoth......c.hl.ppl.phh...................sh.......plS+SulsRY...up.......+h....-p....hhtpl+pspEh...uchhspphu.-...spscho.chlhphlps.hacthhphpEstpss..............thlsplutsh.tpL...ppAuh.tpc+hcp-.....l.......+tphttthsp.........s-ch....s...tp..sGh......ot-hhptl+p.lLG....................................................... 0 24 45 51 +11819 PF11986 PB1-F2 Influenza A Proapoptotic protein Mistry J, Gavin OL anon pdb_2hn8 Domain PB1-F2 is a protein found in almost all known strains of Influenza A virus - a negative sense ssRNA Orthomyxovirus [1]. It originates from translation of the viral polymerase gene in an alternative reading frame [1]. PB1-F2 consists of two independent structural domains, two closely neighboring short helices at the N terminus, and an extended C-terminal helix [1]. Although the protein has originally been described to induce apoptosis, it has now been shown that PB1-F2 more likely acts as an apoptosis promoter in concert with other apoptosis-inducing agents [1]. PB1-F2 promotes apoptosis by localising to the mitochondria where it destabilises the membrane. This will cause release of cytochrome C which activates the caspase cascade of apoptosis through the endogenous pathway [1]. In this way it acts like the Bcl-2 protein family which are physiological apoptotic regulators in cells [1]. 19.50 19.50 19.50 24.30 19.30 19.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.24 0.72 -3.95 24 10057 2009-07-06 12:30:56 2009-07-06 13:30:56 3 1 10020 2 0 3113 0 83.60 71 99.95 CHANGED MEQEQDTPWTQSTEHINIQKRtsGQQTQ+LEHPNSTQLMDHYL+TMsQVsMHKQTVsWKQWLSLKNPTQsSLKTRVLKRWKLFsKQEWTN .MEQEQsTPWTQSTEHhNIQKRGsGQQhQ+LEHPNSTQLMDHYLRhMSQVDMHKQTVsW+.WhSLKsPTQtSL+T+sLKpWK.FNKQtWTN. 0 0 0 0 +11820 PF11987 IF-2 Translation-initiation factor 2 Mistry J, Gavin OL anon pdb_1z9b Domain IF-2 is a translation initiator in each of the three main phylogenetic domains (Eukaryotes [1], Bacteria [2] and Archaea [3]). IF2 interacts with formylmethionine-tRNA, GTP, IF1, IF3 and both ribosomal subunits [2]. Through these interactions, IF2 promotes the binding of the initiator tRNA to the A site in the smaller ribosomal subunit and catalyses the hydrolysis of GTP following initiation-complex formation [2]. 20.20 20.20 21.20 20.30 19.20 20.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.32 0.72 -4.18 243 5391 2009-07-06 13:34:25 2009-07-06 14:34:25 3 51 4886 6 1587 4288 2608 108.80 42 13.02 CHANGED s.....oL-shhsphp...pu..p...hcpLslIlKADVQGSlEAlpsuLpKls..s--VclpllcuGVGuIoEoDVtLAsA.S..s......AlIlGFNVRssspA.+chA-pcs...V-IRhYslIYcll--l+t ......................................spL-shhpphp.....pG..-.....hcp..lslllKADV.QG.SlEAlpsSLtKls......s..-....-.............V....+V....pllcuuVGuIoEoDls.LAsA.S.s............AlIlGF.NV.R.s..s.s..p.A.+c...h.A...-......p....-s......V-lRhYslIYclI--lc........................................................ 0 547 1002 1339 +11821 PF11988 Dsl1_N Retrograde transport protein Dsl1 N terminal Mistry J, Gavin OL anon pdb_3etu Domain Dsl1 is a peripheral membrane protein required for transport between the Golgi and the endoplasmic reticulum [1]. It is localised to the ER membrane, and in vitro it specifically binds to coatomer, the major component of the protein coat of COPI vesicles [2]. It is comprised primarily of alpha helical bundles [1]. It complexes with another subunit of the Dsl1p complex called Tip20 which forms heterodimers by pairing the N termini of each protein [1]. A central disorganised region between the N and C termini of Dsl1 contains binding sites for coatomer [1]. The C terminus of Dsl1 contains a binding site to the Sec39 subunit of the Dsl1p complex [1]. 25.00 25.00 43.40 43.20 20.30 20.10 hmmbuild -o /dev/null HMM SEED 354 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.97 0.70 -5.58 7 25 2009-07-06 14:57:28 2009-07-06 15:57:28 3 2 24 3 13 24 2 341.10 49 47.20 CHANGED plhsspspllptltpDP.lh.csss+.psp.....hDhpslLpp-spLsc-LptLppLKhlusLlhEapsNh-lhELENCYYSLQNL+KKh+ssss.htpuhpFQQSlsoalDsLHlpLls+lhcllsphFWpIstsohpFppplphtcDcsp...h-Y-shhpFlppsaFscshlD.ppWhItshshu-.p-pVpppLsoIhpsYlphsplhctlKphlFscphphohps....p+LplspS...Gt.hlscplpSatsLssFl.sslo.cDpttlhhcLGsslssEhhKhlKpNupplLpppss.hKp.ls.lN-pLppLSpcot..sWsYsGp-lpcLLNDcplapsLhLDplhpppl.cIR ..u.LhsNKsEIIcpLhKD....PLll.cs-o+..coNt............phcLDss-LLpREuhLusELphLssLKTh.sLIcEhKTNlplLELENCYYSLQSLRKKh......R..N.NuuaL.KQSFsFQQSluTYVDTLHL-LVooLYcILTstFW+ITEN.SIpFsPoVEaG+D...+V+...IEYDTFM-FVsQQYF..PcGsLDspuWFIhDMs.uDuQEpVRuKLNTIhK-YhsLSpllshIKsuIFhsG+cISYEs..ppshLlFuKS..SS..+GpcslossLsSFcuVsDFhLDsLuh+D++oLuhcLGPLhsTEFTKFVKsNASlILcshc.SPLKsLVSsINsKLs+......Lsu+Sc...sosWoauGcEIpDLLhNKplYaNLLLDKlLEuHIo-IR. 0 1 6 12 +11822 PF11989 Dsl1_C Retrograde transport protein Dsl1 C terminal Mistry J, Gavin OL anon Manual Domain Dsl1 is a peripheral membrane protein required for transport between the Golgi and the endoplasmic reticulum [1]. It is localised to the ER membrane, and in vitro it specifically binds to coatomer, the major component of the protein coat of COPI vesicles [2]. Binding sites for coatomer are found on a disorganised region between the C and N termini of Dsl1 [1]. The C terminal domain is involved in binding to the Sec39 subunit of the Dsl1p complex [1]. The N terminal complexes with another subunit of the Dsl1p complex called Tip20 which forms heterodimers by pairing the N termini of each protein [1]. 30.00 30.00 30.30 30.50 29.70 29.90 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.78 0.70 -5.25 5 58 2009-07-06 15:01:51 2009-07-06 16:01:51 3 4 55 1 39 61 0 287.90 29 38.41 CHANGED DGWDEEWDID..ID-lcppsspt...........p-cIpITpLPctFtpIhpcFEpuscslucupV-spYhtYKFNLLQTuFFAMsosKasN-WaQLYpDMRYlhoc..NscLhRLpELstRhhEsNLshp+KhVppllpcQLspL+cNE+sPsWDsTIcsLLPFIccEllssLp+Itt.-up+aLLsFLsFLaNDCllsNILpWcIISEKNSENLSELIpLllNuT-IsuLsspPcYR+hREKluIluKlLPLHLKDIMEMFYNGDFYLFuTEEIIQWIlLLFADTPLRRDAIDDIpEIRpEupD ....................................................................................sWs-..s.Ws.-......h-.ctt.pptppt........................t...ppplplTplPp.hhtl.ppap...p..sh.sp..s....t.hp.p..p....h.h......ths.......l..L..t..o..hhAh....sps........pa..s....p..s...h.LasDhpYlhp..c......spp..L.chp.......-hs....h......p.h....lp.............pphp.phchV..h........pl..lpt..p..hp.p.hp...c.s........-cp..s...sh..ct.shppl...l.shl.h..ppl....h.p....h...ppl.pt...ppt.....phlhp.hlshl..hNshllppIlphp.ISEhpSppL.......ucLls.......ll..h..s.........s..o..c..l....t..tL..s....p...ps....s....Yh...c...s..hpKFshltplLssHLK-IM-hFYpG-ha..FuT-ELlphIchLFA-osLRcssIs-Ih-hRt........... 0 10 22 38 +11823 PF11990 DUF3487 Protein of unknown function (DUF3487) Assefa S, Coggill PC, Bateman A anon PFAM-B_2242 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 121 to 136 amino acids in length. This protein has a conserved RLN sequence motif. 25.00 25.00 29.00 28.80 23.90 22.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.62 0.71 -4.72 25 230 2009-07-06 16:44:55 2009-07-06 17:44:55 3 2 188 0 64 200 9 119.40 36 94.45 CHANGED -sTlsFLP..cRLNp-PlVhRGhTssEhhlssshuhshG.lllGlsLuhl..suhlt..hlPshhllsslhslhlGushLpRlKRGRP-saLaRp.lph+lstph......lGsppLIh+SGhWolRRo....................tpt ..............tTlpFLPpRLNpcPl.Vh+GLTssEh.hlsshh.us.ssG.hllGlP.luhl..hs..h.h.h.hhPss...s..llsshlslhlGu....th..LpRl...KRG+P-saLaRp.Lp.h.p...lsphh.........lGsppLIh..+SGsWohRRo........tp.................. 0 7 28 51 +11824 PF11991 Trp_DMAT Tryptophan dimethylallyltransferase Assefa S, Bateman A anon PFAM-B_2054 (release 23.0) Family This family of proteins represents tryptophan dimethylallyltransferase (EC:2.5.1.34), which catalyses the first step of ergot alkaloid biosynthesis [1]. Ergot alkaloids, which are produced by endophyte fungi, can enhance plant host fitness, but also cause livestock toxicosis to host plants. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 390 to 465 amino acids in length. 25.00 25.00 28.60 25.50 23.80 22.30 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.41 0.70 -12.09 0.70 -5.24 53 313 2009-07-06 17:00:22 2009-07-06 18:00:22 3 10 118 12 161 321 0 311.10 24 78.37 CHANGED t-pch..WWcpouPhluplLpsAs.YslcpQapaLthapphllPhLGPaP.........ttttpahShl...optGhPlEhShNa........ppstp.sVRhshEPls...hsGTs.tDPhN.ptustchlpcLsph.hssl.DlphacaFtpplslspp..-pttlpppt..................h+s.....pthluhDLc....sup..hslKsYh......hPthKuhsoG.h.ssppllhcul+pls.t......thtsuhphlcsalssps........................sthpsth.luhDh.lsPs...coRlKlYltp.p.sohsplc-laTL.GGRhs..s.ssshcGLc.hl+cLW.pllsl..........sh......................ps.thstss..stphs.......hhhsa-l.pP........Gp.shPp.......splYlPl.h..............uhsDttlApuLspF.acphG..apchApp.YtssL ........................................t.Wh.ptsushhsphhttus.Ys.ptQhphLhhhtphl....lPhLGshP............tt..hahShl...ottusPhEhShsh.............spst..hlRhshEPls..hsG..st..tDshN.ptshtphlppltt................ssh...chphapthtpplhlstp...ptthh.tt...................hhs...............p.hluhDhp.......ssp........hshKsYh...........hPth+uhssu.............s.......tplh...hpulcpl.........thtsuhphlppahtpts...............................ththth..luhDh.lsss...........coRlKlY.hhpt..p.sshsplpchhTL.GGRh.p....s....ts.culp..hlcplW.tlhsh.....t........................t.....htt..t.ths..........hhhsapl.ps........sp..hPp........splYhssh..............s.sDhtlspslspahpphG..ht.t.sp..Y............................................................................... 0 28 75 132 +11825 PF11992 DUF3488 Domain of unknown function (DUF3488) Assefa S, Coggill PC, Bateman A anon PFAM-B_3123 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 323 to 339 amino acids in length. This domain is found associated with Pfam:PF01841. This domain has a conserved PLW sequence motif. This domain contains 6 transmembrane helices. 25.00 25.00 25.10 25.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.05 0.70 -5.79 60 397 2009-07-06 17:02:56 2009-07-06 18:02:56 3 6 379 0 159 408 140 317.90 25 47.85 CHANGED ptlsR..psh..ha.LL.hshshllhPhhtplPhWhsu...lsshsllWRshlhhtph............sh...PsR....................allsslsls...ussslhhshsp.hhuh-sslsLLllhhuLKhlEh+spRDshlllhLuaFllhssFlhsQulhhshhhllslhhlhsuLlsl....pt.stsp.......................h..tpsh+huuplhL.ulPLhllLFllFPRl.sPLWtlP.sstsupTGLSDsMsPGsIupLspSsplAFRs.pFsus............P..spspLYWRGhVLppaDGcsWpts...........tttthtss.stsss.................t.tupslc...YplhhEPopppWLauL-......hshsss....sssths..sDhp.hhsp+P..lspph+YpspShsph .........................................t.........h..hh.lL..hshhhsl.hPh....h.hplPh...hh...hs...lh.s.h...shhhp.h.h.h.hht.ph.............h..P.sp....................hl.h.shlhls...shhsl...h...hp..h.ss..h.......h.......u...h-susslLlhhhsLK...hlEh+st.RDhhhllhlu.h..FhlssshlhsQu.....h....hhshhhl....ls..l....hh..hhss..L.l..tL.......pt..sptp.....................................................h...ttsh+h.ust.lhhhu.l.P.L...hl...lLFlhhPRl...s.....PL.Ws.l........P.......s.s........s...p.....u.p...T..G.LSD.p..hsPGslupLspssplAFRs.pF.sss...............P..s.p........ph.....YWRuhVhppa...D.....G.p.pWpts...................................thts..sts.p....................................................tspslp...Yplh..hEPs.p.ppa....LhuL-........hshsts........ts..s..tht.....s-hp.lhtpps..lsp.h.hpYphpSh...................................................................................... 0 48 110 140 +11826 PF11993 Ribosomal_S4Pg Ribosomal S4P (gammaproteobacterial) Assefa S, Coggill PC, Bateman A anon PFAM-B_3290 (release 23.0) Family This family of proteins are ribosomal SSU S4 p proteins. This protein is found in gamma-proteobacteria. Proteins in this family are typically between 162 to 178 amino acids in length. 25.00 25.00 28.70 33.10 19.60 24.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.91 0.71 -4.47 19 166 2009-07-06 17:08:16 2009-07-06 18:08:16 3 1 165 0 38 120 7 163.90 44 97.59 CHANGED MQlcslt...LINEhQhGspLNpAVcpuRRu-.FuLlLuMLSpDsRDhs.hch.pstps.......pcptLRppFpls...psQsLtustsshphustpAphap....ptGhsuh+LpptLsPEsLshpsp-stsls-plhpNhShpsR++ltsp............pshphs.tphYspLssspptsplttp ........................................MQlHTLDKAslIsElphGpulspAV.pc..GRRAD.FALLLuhhSsDVRDsoPl-plcssss.......oEpsLRpcFELu...pPQ.tLcsDpSSYpluAcQAslFH.........puGhsSAKLuHYLpPEsLsaRPpDTpsLPEEVYtNLSGHpRR+Lus+............psspl.stchYcpLssAhRpsplps.t....................... 0 6 14 26 +11827 PF11994 DUF3489 Protein of unknown function (DUF3489) Assefa S, Coggill PC, Bateman A anon PFAM-B_3545 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 84 to 211 amino acids in length. This protein has a single completely conserved residue W that may be functionally important. 28.30 28.30 29.20 30.50 28.20 27.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.47 0.72 -4.03 26 127 2009-09-11 08:24:30 2009-07-07 12:41:14 3 4 70 0 54 132 26 73.80 42 46.18 CHANGED scsspsRp...................soKQsplIshLp+PcGA...TlspIscsTGWQsHTlRGsluGsl+KKLGLslso-Kss...u.sph..YRIs ............................................tsRt................soKQAplIsMLp+PpGA...TlspIspATGWQsHTVRGshuGshKKKL...GLslsScKss.......uttRs..YRI................................. 0 20 40 48 +11828 PF11995 DUF3490 Domain of unknown function (DUF3490) Assefa S, Coggill PC, Bateman A anon PFAM-B_3558 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 160 amino acids in length. This domain is found associated with Pfam:PF00225. This domain is found associated with Pfam:PF00225. This domain has two conserved sequence motifs: EVE and ESA. 25.00 25.00 31.50 28.70 24.40 23.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.13 0.71 -4.68 12 129 2009-07-07 11:50:29 2009-07-07 12:50:29 3 9 24 0 81 127 0 153.30 50 17.22 CHANGED -Fc+QpppII-LWpsCpVSLlHRTYFaLLF+GD.uDpIYhEVELRRLoaL+pohupss....A..ss.pslolsSSlKALpREREhLuKhhsp+hotEER-pLYhKWGlsLsoK.+RRLQlsp+LW.osspDhpHVcESAslVAKLlGFsEsGpts.KEMFsL.................sFss.s .............pFc+pppcII-LWcsCpVSllHRTYFFLLF+GDsuDsIYMEVELRRLoaL+poaupss............tsh...ss..sh.....ohsSSh+sLpRER-hLu+pMp++LotE.EREpLYhKWGlsLsoK.pR+LQlsp+LW.ocspDhpHlcESAslVAKLlGahEsGps..KEMFuL.sFss.s.................. 0 9 48 65 +11829 PF11996 DUF3491 Protein of unknown function (DUF3491) Assefa S, Coggill PC, Bateman A anon PFAM-B_3393 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 286 to 3225 amino acids in length. This protein is found associated with Pfam:PF04488. This protein is found associated with Pfam:PF04488. 25.00 25.00 25.50 25.40 24.20 23.60 hmmbuild -o /dev/null HMM SEED 936 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.47 0.70 -7.05 8 259 2009-07-07 11:54:01 2009-07-07 12:54:01 3 4 123 0 7 238 0 528.00 32 36.34 CHANGED AVh-ssSRaacPPLospNcpuTVIAGcoPLTVIPlRLL---osERlcpAhuYKDYKIplpGGpGGLTVQIGGAGaYslTusP.uscNsISFRAIPtsFuVsFsLS.+tpQsVPLh+PNG.TclplLKIpQKGFsTIlGSuuGpDpLTGN.+DT+FYlSoGGGslaSGuGpNpYaIP+.LpssLsIsLosNSssHplhLs.po....hEh+ssussLoLI..h.thtsssIal.shDspsphspasssFpV+hsDGITlpAlc+tsst...........spLuVpoCD.ptWptpaPEEsuas-sIlcaL+chsWhLAPcVplhtpcupssYhshp+pLVYp.Pc.aSElplpupcsYpTtVpGssGsoYIl.s..sssspspslcIhLA-DsspPQTlDLSsllPoLVpG+hss....ssSIsLpl..SSs+YslsLolSWpscs..lPppThlplpPpcphpLG-ha+hL..ppssupWssLa+suhlIPcch.slLSlNNTshLMls+spps..sEHlLulENpuslshKlhGpLhSGaIKGu.....Wcs.spshsslp+hsloIPsHshpYLsFcGc.......cNlLF+ShLcutsLcs+spsphplS++pWppYD-IpVp....AToLpLpcFpRYpIuotscsLSRpLMYAQshVpIssRDlslKLFYl.REssGIGAlRLlFKNFFpESM-shscpTLEKEsKPlLsusPcphIssuY+sHLclhLG-cpLNLAplVpEFusop+IlshpcDhssHtllh.psppspsLsllTaTlssps-ssptt.p.p.hahDsh.pcY+.LPhsThs-s.YYLsPsoGDLYlTplls..scspsQAFll+LKsaKppWtcapphllSusHpphpp.l..ssTsLpFsGPElp+hEIDaspsss.h.h.sscllShSshlF.ss-QVlpYsP+hupQFaSh.-YMLW-L+-Rsptupc......A+saDsYLh-ushphhc+NspWKIssslLcaAlG.YYR .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 1 1 5 +11830 PF11997 DUF3492 Domain of unknown function (DUF3492) Assefa S, Coggill PC, Bateman A anon PFAM-B_2107 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 259 to 282 amino acids in length. This domain is found associated with Pfam:PF00534. This domain has two conserved sequence motifs: GGVS and EHGIY. 23.30 23.30 23.40 23.40 23.20 23.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.73 0.70 -4.85 39 322 2012-10-03 16:42:30 2009-07-07 16:23:34 3 7 290 0 121 316 12 260.00 28 41.01 CHANGED s-lsLllEGTYPYVpGGVSoWlcplIpshP-hpFsllhlGuptcchtphpYplPsNVsclcphaLhst.........t..............tppup..ppshpthcplpchhcpsst.......stthhpph......hhtltpss..shs.psFLtScps.......WchlschYpcht.....spsSFsDaFWolR.hhtPLhtl..lupsl.PtsclaHslSTGYAGhLGulhptppspPhlLTEHGIYT+ERcl-lhpupWI...............hshhRchWl+aFctlu+hsYptA-pIluLactNRphQlptGAss- ............................................................................hclsLlhEGoYPYVpGGVSuWspplIpu.hs-hcFslhhluuptpp.s.phtYplPsNVsclcphhL.st..............................................tpttpptthphhpphhphhpp.hts...................stt.h..pt.h......h.tLtptt..thshtshL.tScph..........................ac..hl..pc.....hhpphh.....................spssFs-...haWsh...Rp..hht..sl.h.l..................lspsl..Pps..clhHulSTGYAGllGshhp.t.ppstPhllTEHG.....I.....YsRERch-lhpupWl......................shh+phWlpaFttluchsYptAc.lhsLap.t.spthQhphGAs........................................................................ 0 35 73 104 +11831 PF11998 DUF3493 Protein of unknown function (DUF3493) Assefa S, Coggill PC, Bateman A anon PFAM-B_3788 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 79 to 331 amino acids in length. 25.00 25.00 25.20 25.40 24.00 24.50 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.27 0.72 -4.09 21 125 2009-07-07 15:28:10 2009-07-07 16:28:10 3 7 83 0 70 127 107 77.70 35 29.61 CHANGED hssptcs+LhsEspsPaRGLR+hlalAhuASuslGhhlhhhRlhu.......Gssl.tslsNLulQlGuluLhshLhhhEpp .....h...pptt+LhuEspuPaRulRpFhYlAFsASuslGshlhls+Llu............ussl.....pslsNhulplGulslhshLahh-pp........ 0 22 52 66 +11832 PF11999 DUF3494 Protein of unknown function (DUF3494) Assefa S, Coggill PC, Bateman A anon PFAM-B_3080 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 243 to 678 amino acids in length. This protein has a single completely conserved residue G that may be functionally important. 25.00 25.00 25.80 25.80 24.50 24.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.50 0.71 -4.69 43 181 2009-07-07 15:31:23 2009-07-07 16:31:23 3 33 84 5 76 193 13 198.30 33 42.53 CHANGED lLutoul.TssGs..osls.Gsl.GsSP.susu..lsGFs....hss..............Gslasssh.............shspAhsDhtsAYs...........sAsups.ssshstl..ssGsl.uGhTLsP....GlY.....ch.suu...lslos.slTLDutG....sssuVaIFQlu...usLosu.uuu.......plhLs....sGApApNlFW......Vuuu..solGssosFpGsllups...u.Iol....sTG.....uols.GRhLAps..AVTLsssslsp ...............................................................hLutsul.ossss....osls.Gsl.GsSP..uou..lTGFs...shss...........................................sGplauush....................hhspAhsDhtsAYs...........sAAups.......s............s.s..httl......ssGpl..GGhT......LsP....G..lY.....ph..ssu...lslou..slTlDutG......ssssVaIhQhu...usLssu.uus.......pVhLs......sGAp.ApNlFW......pVuuu....solG...s...suphpGslLuts....u.lshsTG..........uo.ls.GRhLups..AVTLssssl..t........................ 0 20 68 75 +11833 PF12000 Glyco_trans_4_3 DUF3495; Gkycosyl transferase family 4 group Assefa S, Coggill P, Bateman A anon PFAM-B_3335 (release 23.0) Family This domain is found associated with Pfam:PF00534. 25.00 25.00 25.10 27.10 24.50 22.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.98 0.71 -4.63 53 208 2012-10-03 16:42:30 2009-07-07 16:34:35 3 4 180 0 79 213 126 168.00 33 40.39 CHANGED ppGHclshls.tpsptshs.....GVcllpYps..pctsstssas.ahpshEspsh+GpushcusppL+pp.GapPDlIluHsGWGEsLFL+-laPcupllsYhEa.aYpspGs.DssFD..PEa....sss...psth+........lRh+Nhs.hhphppuDhuloPTpW.......QpupFPs.a+s+IpVlHDGlDTsthp ..........................................t..upclshls.ppsptth.......GVphltYps...scts...sspsa....hhpshEptsh+GpushcsstpL+pp.GFtPDlIluHsGWGEsLFl+-laPcs.llsYhEa.aYcspGs.DssFD..P-h....shs..pshhp........lRh+Nss.llslpt....sD.hGloPTpW.......Q+spaPs.h+.s.+IsVlH-GlDTshh.t... 0 17 57 68 +11834 PF12001 DUF3496 Domain of unknown function (DUF3496) Assefa S, Coggill PC, Bateman A anon PFAM-B_3407 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 110 amino acids in length. 30.70 30.70 30.90 30.90 27.10 27.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.24 0.72 -3.81 7 174 2009-07-07 15:42:55 2009-07-07 16:42:55 3 17 36 0 67 132 0 100.90 43 13.79 CHANGED h+oQMELpIKDLESchu+hKTuQtDhspoELEcYKpLYl-ElKlRcSL..SscLsKosE+LAElsTKLhlE+cQ..scShhoohsTRPslEsPCVusLp.shshNRhhIPR...-slhl ....................psphELplKDLEscl.S+hK......T......upc-s.............poE.LEcYKphahcELKsppSL..SpKLs...K..o..sc+lA-lsTKLLhEKpp..p+shhoolssRPs.Es.PsVtNLs.ShslsRphhP+t.................................. 0 8 10 19 +11835 PF12002 MgsA_C MgsA AAA+ ATPase C terminal Mistry J, Gavin OL anon pdb_2r9g Domain The MgsA protein possesses DNA-dependent ATPase and ssDNA annealing activities [1]. MgsA contributes to the recovery of stalled replication forks and therefore prevents genomic instability caused by aberrant DNA replication [1]. Additionally, MgsA may play a role in chromosomal segregation [1]. This is consistent with a report that MgsA co-localises with the replisome and affects chromosome segregation [1]. This domain represents the C terminal region of MgsA. 21.00 21.00 21.00 23.60 20.70 20.20 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.93 0.71 -4.57 278 4662 2009-07-07 16:02:56 2009-07-07 17:02:56 3 19 4329 28 1089 3504 745 165.20 43 37.81 CHANGED GSDPDAALYaLARMl-uGEDPhaIARRLlhhAuEDI.GhADPpALtlAhuAhpAhctlGhPEuclsL..AQAllYLAsAPKSNusYtAhspAhpsl+p...ssshsV.PhHLRNAssphh+clG....aGpsYcYsHDhs....sua.ssQpY..LP...-p.lps......t.......aYpPs.ppG..hEpclpccLpplcp .............................................GSDsDAALYa.hARhlcA.GtDPhalARRLlhhAuED...IGhA-PpAhtlAluAh.puh..pc.lG.....hP.E.u..c..l.sL..Ap.All.aLAhuPKSNusYpAhstAhssl.+p....s...s....s...h....sV.PhH..LRsA..hph.h...K....c.LG...................h.G.ps..YcYsHDhs.........sua...ss...........QpY..hP-clts.......pp....aYpPs..spG...hEtpltpphthlt.p....................................... 0 359 698 920 +11836 PF12003 DUF3497 Domain of unknown function (DUF3497) Assefa S, Coggill PC, Bateman A anon PFAM-B_3419 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 213 to 257 amino acids in length. This domain is found associated with Pfam:PF02793, Pfam:PF00002, Pfam:PF01825. This domain has a single completely conserved residue W that may be functionally important. 27.20 27.20 27.60 27.20 27.00 27.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.51 0.70 -11.31 0.70 -4.98 32 930 2009-07-07 16:18:38 2009-07-07 17:18:38 3 137 93 3 416 750 0 222.40 24 14.93 CHANGED tp+hpsscss..p.lspcLtchT.....ps.shauuDlhssscllpplsphhs.pttthhsspccst................................................................pshVpsssNLLc.pstppWcplppscp....suopLLcslEchshhlApsh...hth..pphhhsssNlhLplthhssps.hps..hFPp..........hphstsplplstpshptssp........................................sG....shlshllY+sLGtlLsspssshth...tp.....hpsssp....hlsosllssslspp ..........................................................phhsGcsss..plsppLtchT......ps..sh..auuDlhsolclhppls..s.hhs...p....htth....h....su....s.p-sthp..............................................................pshVpslssLLpscshptWc....chppsct........sushLLcslE-huhsLApsh............hth.........p.ht.....hso...sN...l........h....L.pl.t.h..lssps..hps..hpFPp.................................stsstspl..plst.sshp.ssp...................................................................su......hhlshllY+..sLGthL.ss..p..p..ss..hth........t...............t.stp..................hlso.llsssl..................................................................................................................................................... 0 73 107 227 +11837 PF12004 DUF3498 Domain of unknown function (DUF3498) Assefa S, Coggill PC, Bateman A anon PFAM-B_3438 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 433 to 538 amino acids in length. This domain is found associated with Pfam:PF00616, Pfam:PF00168. This domain has two conserved sequence motifs: DLQ and PLSFQNP. 29.50 29.50 31.40 31.40 28.80 29.20 hmmbuild -o /dev/null HMM SEED 495 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.91 0.70 -5.36 7 291 2009-07-07 16:20:56 2009-07-07 17:20:56 3 9 66 2 120 241 0 410.40 38 42.45 CHANGED LpDlotALsNPsslppps.......spc....+sss........P.susulSoGlQp.hh.pDls..............u.sDhsRLPSPT...........................tsKDlFaVo+sshhpsSPuhsS.S.S-hsEs-h..thssG............+SlShhDLQ-...............spstp........uhs......s...........hstpsspushsss..lpp..........ssph....css.sp..thLhPLSFQNPVYHhusshP...ssst.....cuuopscuosSSH.........................SssE-h.h..........h..pt......c-hst+ss...-hoccphshst...ttp.....slPppsosG.t..R+h.............sututpuph......Pssh.....t.h..........pssshhss..................p.spsG.u+.+QpopSppt-sss.+stuh..tts...........SPss......ssl-RTAAWl.N.N....-...........c...spc-hppsEK.....YppEIshLpE+LRhSsp+L-EYEtRLhsQ-pQhQKhL.EYQtRL--SEpRLRp.Q.-KD.QhpuIIsRLMuVEEEL++DHu-MQtVl.-uKQKIIDAQc+R...hsuLpusss............................RlhsuLs.lpE+a ................................................................................................................................................................LtDlphuLpsPt..p..............t-..h..s............s.h.tu.SuthQt.hh..pshs.........................Dht+L.SPo...........................tspD.aahs+s....hh........psu.po.o.S-hsE.-...ph...ss............+SlShhDLQs.............................stshph..ts..s.......s..s.................................h..t..ss......thsth.....plpp..........ss.h.......css.....sp..thh.PLSFQNP.laphsssh.P...h.Ptu............cuu......t...sh..us.SS+.......................................S.pSE-hth..........................h..ps..................--hspp.s.s.......-hsR.R..phs..p.....ph............h.Pt...Q..so.sG.t.....Rpht............................ss.....s.s...pups............Pssh.........tph............................SoGs.h..u.......................................................t.s......t.G..sR..R.QQ.SpSpc.........p.ss....p......p.....s.h.tt.tts........................................oPss................ss..-RT...s..AWl.s...st...-................ch.p.pcph.p..psc..........................................hpp..-IthLp...-+L+hSs++LEEYEpchhsQEp.phpKllhpYp..AR...LE-...uE.....cRLRpQQt-KD.QhKuIIuRLMsVEEEL++DHs.......c.Mptsl.-sKQ+lIDA......QE+p...IsuLcuANs............................RLhsAlttlpt..................................... 0 16 27 63 +11838 PF12005 DUF3499 Protein of unknown function (DUF3499) Assefa S, Coggill PC, Bateman A anon PFAM-B_3439 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 125 to 163 amino acids in length. 25.00 25.00 27.20 30.20 23.10 22.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.86 0.71 -3.95 18 376 2009-07-07 16:22:24 2009-07-07 17:22:24 3 1 375 0 101 250 99 115.10 47 91.41 CHANGED ss.RRCSRsuCucsAVATLTYVYADSTAVlGPLAshuEPHoYDLCspHAcRLTAPhGWElVRlsst.t.....sstPssDDLhALA-AVREAup..stt.s.s.........................ssshssP........pssR....RGHLRVl..........s-s ....R.C.+suC.t.AsATLTasYu-STAVlGPLAs.t.EPHuaDLCspHAp+lTAPpGWEllRhsu......................sscPssDDLhALAsAVREuGhstst.hhsts.....................................................................ss.....tstthsR........RGHLpVlsD.s..................................................................................................................... 1 33 78 97 +11839 PF12006 DUF3500 Protein of unknown function (DUF3500) Assefa S, Coggill PC, Bateman A anon PFAM-B_3479 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 335 to 438 amino acids in length. This protein has a conserved GHH sequence motif. This protein has two completely conserved G residues that may be functionally important. 27.20 27.20 44.20 29.00 27.00 23.30 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.11 0.70 -5.49 36 209 2009-07-07 16:26:39 2009-07-07 17:26:39 3 7 142 0 112 226 111 297.20 27 81.26 CHANGED stpspsssssshhsAApshLssLsscQ+ppshhs..........h..Dssptps.........W.s.h....sh......RsGlslsphsscQ+phshslLpssLS...spGapcstslhth-.p.......hL...........phtttt.........................hs.tpYalslFGs...Pus...s..psWGapatGHHlulNhsh..ssspl.shoPhFhGupPsthpp........hsG..hp.....sLtpEcshuhpLhpuL....sscQpppAhltpth...............................................sc.hphsushtc.....................h..pGltsupLsssQpchLhsllpp.Ylshhspphsc..tphscl..ptthspsaFuWhGs....htsscsaY..aRlpuPshllEassps..................sstsHlHolhRssss.DaGts ......................................................................h....t..htthhtAupshlssLsspQ+ttsth.......h......ss.phpp..............................................W.s..........h.............+tGlpl.sp..hss..t..pppsshplLpssLS....spGYpcshshhths.p.....hLt..........ph.st.......................................................................hsttsY.hslFGp.....Pus....................s...psWuhphtGHHlslNhhh...sspl.shoPhFhGspPshhst..................htG...hc.....hhttEpphuhpLhpuL....ssppptpAhl.tth...............................................p-.hphsus.tc.....................hs.pGlhsu..phospQpphlhsllpp.althhspphtp..tphppl..ptthscTaFuWhGs....htssc..s.hY..aRlpuPslllEaspps..................sst.HlHolhRs.ss.DaG..t................. 0 49 81 103 +11840 PF12007 DUF3501 Protein of unknown function (DUF3501) Assefa S, Coggill PC, Bateman A anon PFAM-B_3488 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and archaea. Proteins in this family are about 200 amino acids in length. The structure of protein Swiss:Q63J81 from B. pseudomallei has been solved. This protein contains two domains, domain I (1:31, 46:81) is a helical domain, domain II (32:45,82-193) is a mainly beta protein with a beta barrel. According to crystal contacts the proteins probably functions as a dimer. The gene neighbourhood analysis suggests that this protein may be functionally related to rubrerythrin and ferredoxin. The wedge surface between the two domains might be functionally important. The fold of this protein could best be described as a circularly permuted C2-like fold (details derived from TOPSAN). 25.00 25.00 104.10 103.70 23.20 23.20 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.09 0.71 -4.63 22 127 2009-09-11 15:56:04 2009-07-07 17:34:23 3 2 125 2 46 112 664 190.20 41 94.10 CHANGED hplshsslhs.hpsYu+hRtph+spllthK+pRpVplGstlslhFEsptTlRaQIQEMl+lE+hhc-tsIpcEl-sYssLlPsuusLpATlhIEhsc.s-RcptLscL.hGlccplalclsstt.tlhuls-tDhtRpss-.+sSoVHaL+F-lsss.hsth+s...uss......ltlGsDHssYshpst..lsssstpuLhsDLt ........................plTRssLLo.lEsYuKlRtph+s+llshK+cRtVplGs+lpllFEscsTlpYQIQEML+lE+lh-cpsIppEL-AYsPLlPcGssLKATlhIEhps.scR+ttLs+L.hGIE-claLcVssct..VhAIA.....-EDh-R-su-.KTSuVHFLRF-Lssshhsth+s...Gss...lplGsDHPsYshpst..lsssltsoLsuDL..... 0 14 30 40 +11841 PF12008 EcoR124_C Type I restriction and modification enzyme - subunit R C terminal Mistry J, Gavin OL anon pdb_3evy Domain This enzyme has been characterised and shown to belong to a new family of the type I class of restriction and modification enzymes. This family is involved in bacterial defence by making double strand breaks in specific double stranded DNA sequences, e.g. that of invading bacteriophages. EcoR124 is made up of three subunits, HsdR, HsdS and HsdM. The R subunit has ATPase and restriction endonuclease activity. This domain is the C terminal of the R subunit [1]. 21.60 21.60 21.70 21.70 21.40 21.50 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.93 0.70 -5.20 79 1157 2009-07-08 08:20:11 2009-07-08 09:20:11 3 9 1042 10 158 990 58 263.60 21 28.79 CHANGED hlLhcsapchhcpapc............slp-Lpphhsss.......thspl..ts-psccpFlchFpch.+htstLpsas-.as.........................sptpht................hsppphp-acutYhsltpcl+cpp...ppppt.........s....pDlcF..El-LlcsscI...........NhDYIhpLltch..........ppspppppt..hcplhchlps..s.thcsccclltpFlpplph...........ph.sppslpctappatppcppcplpplspppsL....stctlpphlsshh...tptph..ptsplsc......hls.hphth......tpphp.....hcpplhpcltshlc+a ..............................lLhcsac-hhptapc...........................shpcLpphhsss....................hspl...tsEppp+cFlchFtph.+hhphLpsa--.ap...............................s.ttht........................hsppph..p-Yc..utY.slhcpl+...cppt..pppp.................sh...sDl..sF..El-h.h+stp.I...........NhsYIhpLltph.....................pppt..pppp.hc.....plcc.....hlpp........s.p.+tcccllppFl.p.php..................ph.psp...s.ltpta.pah.ppcpppchpthhpcpsl.....sppth+phhsphh...tt..hp.p.tt.tsplsc..........h...phhh.............phhp........h+ptlhptltthhcc...................................................................................................................... 0 53 104 135 +11842 PF12009 Telomerase_RBD Telomerase ribonucleoprotein complex - RNA binding domain Mistry J, Gavin OL anon pdb_2r4g Domain Telomeres in most organisms are comprised of tandem simple sequence repeats [1]. The total length of telomeric repeat sequence at each chromosome end is determined in a balance of sequence loss and sequence addition [1]. One major influence on telomere length is the enzyme telomerase [1]. It is a reverse transcriptase that adds these simple sequence repeats to chromosome ends by copying a template sequence within the RNA component of the enzyme [1]. The RNA binding domain of telomerase - TRBD - is made up of twelve alpha helices and two short beta sheets [2]. How telomerase and associated regulatory factors physically interact and function with each other to maintain appropriate telomere length is poorly understood. It is known however that TRBD is involved in formation of the holoenzyme (which performs the telomere extension) in addition to recognition and binding of RNA [2]. 21.20 21.20 22.20 22.60 20.60 20.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.70 0.71 -4.25 54 413 2009-07-08 09:22:28 2009-07-08 10:22:28 3 9 237 6 156 402 1 125.40 26 14.05 CHANGED ppVstFlhshlpp....ll........P.ph.aG.........spcNppthh.pplppalph.t+aEphslpclhptl+lschp.WLtttttsppht.......................chpppppllt.......palhWlhp.pllhsll+shFYlTE....p..ttppsclhYaR+slWpplsp.slsp..htp ...................................pVhtFlhthlpp....ll..............P.ph.hG..........................sp.pN.pphhh.pplppalpl.t+atphslppl.hpth+l..p.........sht.WLp.t.th..tt.p.h.............................................................................phphpppllt.........palhW.lhssh.ll.LlpshFYlTE....s..thp+p..plhaaR+slWpcL.p.tltph..t.......................... 2 52 83 123 +11843 PF12010 DUF3502 Domain of unknown function (DUF3502) Assefa S, Coggill PC, Bateman A anon PFAM-B_3448 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 140 amino acids in length. This domain is found associated with Pfam:PF01547. 47.30 47.30 47.90 47.30 47.00 46.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.51 0.71 -4.19 18 679 2009-09-11 16:21:44 2009-07-08 11:40:33 3 5 494 0 91 398 5 134.70 42 27.71 CHANGED GIEGpHY-+lscs................pI+hhss..psY.shss.Wsh..GNhhl...phshEs-s..D+W-pacchsppApsSPhLGFpFDsssVcopIuulsNVhpcapssLhTGoVD.P-ctlschhpKLcsAG.lDKVhcEhQ+QLD-a.ptpsp .....................G.EGcpaEKlssp................+l+....lL.....c....u...h...p..s..s.......h.....c.h.u.u....Wst.....GNshI....l.Yhp...Es...s...s-....p...plcp.p.c....c......h.tc....A.+..p.S.P..hLG.FhFsocsVKoEIoulsNlhppatsulsTGTV...D..P-cs...lPcLhcKLKsuG..h-KVhpEhQKQhDEa.lpsp........ 0 56 77 82 +11844 PF12011 DUF3503 Domain of unknown function (DUF3503) Assefa S, Coggill PC, Bateman A anon PFAM-B_2686 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in viruses. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF00271. 25.00 25.00 54.30 52.00 21.40 21.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -11.18 0.71 -4.53 12 81 2009-07-08 10:43:55 2009-07-08 11:43:55 3 4 41 0 0 79 0 166.00 60 25.25 CHANGED M-NcLPsIFaFPNCVslFPacYSQpEh-cMppp-+ctFShAVFPlIKHRWpcuallhc.spsaKLssE.pp.ph..p+ls.ssl.s.....Psplshph+pYhhss.h+IoFECYSYLpCpphs.clpshs-...llRGLlEGGNpLpIFSssh.....G.phssoIGIhGNspPFsKlPLtSLpP .......MEKNLPDIFFFPNCVNVFSYKYSQDEFSNMSc....sER-uFSLAVFPVIKHRWHNAHVVKH.KGIYKVSsE..A.RG..+KVSPPSLGK......Ps+INLouKQY.IYSE.+sISFECYSFLKCITNs..EINSFDEY..ILRGLLEAGNuLQIFSNSV.....G.KRsDTIGVLGNKYPFSKIPLASLTP................ 0 0 0 0 +11845 PF12012 DUF3504 Domain of unknown function (DUF3504) Assefa S, Coggill PC, Bateman A anon PFAM-B_2196 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 156 to 173 amino acids in length. 29.70 29.70 29.70 30.20 29.60 29.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.91 0.71 -4.49 23 439 2012-10-02 14:09:14 2009-07-08 11:46:23 3 21 79 0 263 356 1 158.10 35 17.41 CHANGED uhlh.Scl-E-hLWps+p.LGspSPhsLLpoLhaasT...KYFsl.+Tl-pHtcLuFuplh+ps+pt.........tsc.shlRahssp.t.pstpsh...ucppc...................ptt.h.hp.h-ssssPh+CPVpha-hYL.Kps..slcptpssFYLpPc+sssssuslWYsppslucpsLpphls+lhhs .......................................................................lh.S+l-E-hLWcs.KQ.LGsaSPhsLLsTLhaFNT...KaFsL.+Ts-pHhcLuFuplh+pp+p....................tsc.shlRah...s.s.h....ttp.s..t..p..t........scpp+...................ppt.h...hE.hENsp.NPh..RCPV+.....la-hY.LuK.sP..psl+pRs..DsFYLpP......E...p.....s.s...........ss........s.S.....P........l....WYospslscspLppMLsRlhh.......................... 0 90 109 169 +11846 PF12013 DUF3505 Protein of unknown function (DUF3505) Assefa S, Coggill PC, Bateman A anon PFAM-B_2856 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 247 to 1018 amino acids in length. This region contains two segments that are likely to be C2H2 zinc binding domains. 26.60 26.60 26.60 26.60 26.50 26.20 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -10.86 0.72 -3.82 16 197 2009-09-11 05:54:46 2009-07-08 11:50:44 3 15 33 0 172 211 0 103.00 23 12.06 CHANGED pLFha.spapVhICRp..CcauVhP..pplpsHL+t+H+ph........psttt.....plppsl.........psas..tpcspssphPs.s.s.PlPtLPla.sGhtC..s.spCpYlspshpsh+cHhpppHs .........................................h..ph.llICpp..Cpaulhs.....splpsHLpp.cH+th.............tt.tp..tltptl..............ppht......l..htss....p...s...l....p...h..Ps....t.stPlstL..shh....sG.ht.C.................stC.ta.hstshpshppHhpppH................... 0 42 80 167 +11847 PF12014 DUF3506 Domain of unknown function (DUF3506) Assefa S, Coggill PC, Bateman A anon PFAM-B_3293 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 131 to 148 amino acids in length. This domain has a conserved KLTGD sequence motif. 39.70 39.70 41.00 39.80 38.80 39.00 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.79 0.71 -4.35 12 152 2009-07-08 10:53:22 2009-07-08 11:53:22 3 10 108 0 101 146 1 130.20 41 25.46 CHANGED pslosaspl-...hoPoppcPapGlalGsausaGsEhlhLpp+.Gp.ptscssppp.................psh.hhthlEAlKLTGDPNVPtGploFhAc.IGcpshlp..cc.sha....Gs..thh+upG+lAs.GF+sscal-u-Llhlssc ...................................p.t.otaspl....hsssspcPapGlaVGsausaGsEhl.lp......p+.sph.......tps.................................................................ssh.ahthlEAlKLTGDssVPtGploFhAc....lGp....s.uhlp..pc.shas.........Gs...thhKupG+lApsGF..+ss+al-ucLlllssc................... 0 23 58 84 +11848 PF12015 DUF3507 Domain of unknown function (DUF3507) Assefa S, Coggill PC, Bateman A anon PFAM-B_3482 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 180 amino acids in length. This domain has a conserved ENL sequence motif. 25.00 25.00 25.30 25.80 24.60 24.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.04 0.71 -4.78 10 67 2009-07-08 10:56:31 2009-07-08 11:56:31 3 2 65 0 45 66 0 169.00 29 11.77 CHANGED lsslph.htols.sphhstps.hp.tpp.ppcWspl...FspuslFpGhD-llaGphlshVYccscos+lssh.lsKaGlosacNlslsspSRFaPAsENLpPcYpcSsVRRsLAlohLKp.......YsLLsssshphl.sphs...saDpTaAG-LAsshpLlss.psPp-hGppLlphGLLQs..+slpShllDVV .............................h......................................tphshapu.Dsll......aG.hl.....sss.pssp.o.....s+lpuhllsphGhpsasplslSssStaYsAlpNLs.ccQpspVp+uLAVshLKh.........aspL.ssst...hppl.....t....s.........phs.....ptaDEhaAG-LAspM.phlps.t.........s.........chspplh..GhhQp..+h.l.sh.lDVl....................... 0 7 20 39 +11849 PF12016 Stonin2_N Stonin 2 Mistry J, Gavin OL anon pdb_2jxc Family Stonin 2 is involved in clathrin mediated endocytosis [1]. It binds to Eps15 by its highly conserved NPF motif. The complex formed has been shown to directly associate with the clathrin adaptor complex AP-2, and to localize to clathrin-coated pits (CCPs) [1]. In addition, stonin2 was recently identified as a specific sorting adaptor for synaptotagmin, and may thus regulate synaptic vesicle recycling [1]. 96.90 96.90 125.00 124.30 96.60 96.30 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.26 0.70 -5.29 2 50 2009-07-08 10:57:23 2009-07-08 11:57:23 3 3 30 1 27 46 0 287.10 77 35.28 CHANGED MTTLDHVIATHQSEWVSFsEEP.FPs..pGGTEEHhPGLSSSs-pSESSSGENHVVDtGSQDhSHSEQDDSSEKMGLISEAASPPGSP.QPsPDLASAISNWVQFEDDTPWuSTSPPHp...ETALsLTMPCWTCPSFDSLtRCPLTSESSWTTHSEDTSSPShusSYTDLQLINsEEQsSGpASGsDSTDNSSSLQEDEEVEMEAISW.AuSPAMNGHP.APPVToARFPSWVTF-DNEVuCP.PPVsSPhKPNTPsuAoshPDVPaNShGSF.KRDRPKSTLMNhsKVQKLDISSLNRsPSVhEAPPWRATNPFLNETLQDVQPSPINPFSAFFEEQER ................MTTLDHVIATHQSEWVSFsEEP.FPs.SpGGTEEHLPuLSSSsDpSESSSGENHsVDGGSQDhSHSEQDDSSEKhGLISEAAS.PPGSP.QPsPDLASAISsWVQFEDDTPWuSTSPP.Hp.s.E.TALsLThPCWTCPSFsSLtRCPLTSESSWTTHSEDTSSPShusSYTDLQLIsAEEQsSGpASGADS.TDNSSSLQEDEEVEMEAISWQASSPAMNGHP.AsPVTSARFPSWVTFDDNEVSCPLP.PlTSPLKPNTPPsASVhPDVPYNShGSFKKR-RPKSTLMNFSKVQKLDISSLN+s.PSloEAPPWRATNPFLNETLQDVQPSPINPFSAFFEEQER.... 0 1 2 6 +11850 PF12017 Tnp_P_element Transposase_37; Transposase protein Assefa S, Coggill PC, Bateman A anon PFAM-B_3357 (release 23.0) Family Protein in this family are transposases found in insects. This region is about 230 amino acids in length and is found associated with Pfam:PF05485. 34.00 34.00 34.30 34.10 33.30 33.90 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.61 0.70 -5.13 6 118 2009-07-08 11:00:53 2009-07-08 12:00:53 3 6 45 0 59 139 0 205.90 34 36.85 CHANGED uNuSTQTEssllsp....ENcoLRpKIRsLEpEl+pLRQQLE-uppLEpSLspIFT-TQIKILKsGGKRusFNSsDhSsAICLHTAGPRAYNHLY+KGFPLPSRsTLYRWLSDV-IpTGsLDVVIDLM-N--MD-ADKLCVLAFDEMKVAAAFEYDSSADllYEPSsYVQLAhVRGLKKSWKQPVFFDFsTtMDsDTLpsIIpKLH++GYsVVAIVSDLGsGNQ+LWpELGISEp.K ..............................................................t....p.hcp....plp.p......hchchp....pLc...p....pl.....c.p...........p....p...lc.....c....sL...pph......Fop.s..Q.l.+..h...L.......p..ss.s...p.+..s.p.ass--hutAlsL.+...s.s.uP+uYpaLh.c.+.saPLP....SppTLh+a.lsslplp.G.hh..c.s.llcl....h....c..s..c...t..h...s...ptD+..lC....lLsaDEMp......lssth-YD...............s..........s.......t..........D.....h........l............h...........c.........s....u...sa.....lt.....ls...hlRGlpcsWKQPl...aa.....sF...s.o...t.Ms....s..ssL....p....p....I....l....p....+....LpphGh.VlAlVSDhGssN.phhpcLGl................................................... 0 23 28 57 +11851 PF12018 DUF3508 Domain of unknown function (DUF3508) Assefa S, Coggill PC, Bateman A anon PFAM-B_3527 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 280 amino acids in length. This domain has two conserved sequence motifs: GFC and GLL. This family is also known as UPF0704. 25.70 25.70 26.20 26.10 25.40 25.60 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.70 0.70 -5.42 10 142 2009-07-08 11:03:06 2009-07-08 12:03:06 3 5 95 0 95 138 2 241.40 28 45.54 CHANGED ulLccslPsosp+I-ppLpsspctshcYTulLEchst....ssthspcl...h..LK-ALYNlRQaEsFLphlLoDlhssApp......V.EhhppchtuplEpLKpsl+sKoAVPTsQVF.....PhFsALupLWsuLp-EphLlssLsNLhspLpsFlssacLhhP..tpsh.slLsstsV+oDtsRhcp.........shtcclslu-apspEa...LhPEsTAsFpcL.lQYpGFCuaTlss+DGLLLPGNPulGlLKa+-KaYsFuo+cAAh.cFupc.P-pYIstltEpA++ssELIpLLcLcQpFpol ............................................................s..lp.s...h.ptlptplp...tstp.shpa.sullcphht...............sshhttph...h.....hp-tLhphRQaphalphlhpsl.thtpp.......l.p.hhpphtsthpplp............s..........l..........p...........+s..........ulsotpVa..............PhFhtLuplWs.shpc.Eh.hls.lsplhtplpsh..ht.s..pt.hhs....t.h.h.thL.p.....th.lps.c..p.chpc..................................p..h.....hphs-h...pp....ph.........hhs..ps...sssh...p..c...h..l.papGFCuhohstp....s.G...LL......l...........PGs............P...sl..Gllpa....pp....+hYsFsoc-ush.pFupp.P-palthlhchs+ppscLlpLlpLppph...h................................... 0 37 49 74 +11852 PF12019 GspH Type II transport protein GspH Mistry J, Gavin OL anon pdb_2qv8 Domain GspH is involved in bacterial type II export systems [1]. Like all pilins, GspH has an N terminus alpha helix [1]. This helix is followed by nine beta strands forming two beta sheets, one of five antiparallel strands and one of four antiparallel strands [1]. GspH is a minor pseudopilin; it is expressed much less than other pseudopilins in the type II secretion pilus (major pilins) [1]. The function and localisation of minor pseudo-pilins are still to be fully unraveled [1]. It has been suggested that some minor pseudopilins may assemble either into the base or the tip of pili, or both. They function as initiators or regulators of pilus biogenesis and dynamics, and/or as adaptors between various pseudopilin component and other members of the T2SS [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.61 0.71 -3.93 214 1652 2009-07-08 11:47:00 2009-07-08 12:47:00 3 3 953 3 399 1257 143 122.00 15 68.53 CHANGED ssppLtsslphARscAlp..p.spslslsstss..stth................tt...Wssuhhlhht..tssstt....t...........................thssshphshs...........................................ssplpF..s.ssGhs..........................tshs..ht.......hsssspspt..............l.hl.ss.....sG .................ApclhstlphA+scAlh..p.s...ps.lslphs...ss..uhph..........................tsp...W..pss.htshhs....ssstt..........tth.h........................h.ssssththp.t.....................................................sspltF..p..spG.s................................sshs....lp.........htssstt.....................th........................................................................................... 0 100 222 319 +11853 PF12020 TAFA TAFA family Assefa S, Coggill P, Bateman A anon PFAM-B_3899 (release 23.0) Family This family of secreted proteins are brain specific and thought to be chemokines [1]. These proteins are found in vertebrates. Proteins in this family are typically between 94 to 133 amino acids in length and contain a number of conserved cysteines. 25.00 25.00 25.90 41.70 16.90 16.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.24 0.72 -3.88 3 241 2009-07-08 13:15:59 2009-07-08 14:15:59 3 2 38 0 140 188 0 84.20 68 71.79 CHANGED +sGTCEVIAAHRCCNKNRIEERSQTVKCSCLPGQVAGTTRA+PSCVDASIVlQKWWCcMEPCLEGEECKVLPDpSGWSCSSGNKVKTTRV ..............sGTCEVlAhHRCCNKNRIEERSQTVKCSChPGQVAGTTRApPSCVD..............A.SIVhpKWWCcMpPCL-GE-CKlLPDhoGWoCoo.G.N.KlKTT+l..................... 0 5 20 68 +11854 PF12021 DUF3509 Protein of unknown function (DUF3509) Assefa S, Coggill P, Bateman A anon PFAM-B_2180 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 92 to 110 amino acids in length. This protein has two completely conserved residues (G and R) that may be functionally important. 25.00 25.00 25.20 25.00 24.50 24.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.93 0.72 -4.24 19 201 2009-07-08 13:16:45 2009-07-08 14:16:45 3 1 60 0 39 130 2 93.10 31 94.84 CHANGED cph.phls-sFss.YpVshu..csDGullLTLpsssG.hlh+RhlotsQhs-tppLppllsul+R-LAlctGchs.plluthppt..............schts..........httt .....p..phls-sFss.YpVshp.sRsDGulLLTLpsppG..hlhcRslossQLs-.ppLppllpulRR-LAlptGch..plluthppt...........tp.........h.................... 0 2 6 26 +11855 PF12022 DUF3510 Domain of unknown function (DUF3510) Assefa S, Coggill P, Bateman A anon PFAM-B_2857 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 130 amino acids in length. This domain is found associated with Pfam:PF06148. 29.10 29.10 29.70 30.10 23.90 28.80 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.45 0.71 -4.02 21 198 2009-07-08 13:21:08 2009-07-08 14:21:08 3 8 161 0 133 191 2 126.70 34 17.21 CHANGED lpplhppsh.ptLcplpslsphYRhTN+psPop......sSsYVssllpPLpsatptt......thlspthhcchlpcllsplocpYhptss-llsoVcKpp-SLpRl+ptptpsuss...............shsD-DKI+hQLhlDl ..............................ppLscpCh.p.L+p.spslPphYR.tTNKp..lPop.............sSsYVssh..LcPLpp.hhstp............................pshltpthh....pphlptslspsTccYhphls-VLsoV+KhEESL..........+..RLKpspppssussss................................sssuhSD-..DKIRlQLhLDV............................. 0 56 76 109 +11856 PF12023 DUF3511 Domain of unknown function (DUF3511) Assefa S, Coggill P, Bateman A anon PFAM-B_3314 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 50 amino acids in length. This domain has two completely conserved residues (Y and K) that may be functionally important. 25.00 25.00 33.70 32.60 17.70 17.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.32 0.72 -4.64 14 157 2009-07-08 13:24:58 2009-07-08 14:24:58 3 4 20 0 99 137 0 46.60 52 35.52 CHANGED hthsDPEhKR+RRVAuYKsYuVEGKlKuSlR+ua+WIKs+ho....p........llcG .......hsDPEh+R++RVAuYKsYuVEGKlKuSlR+uF+WIKs+ho....plhaG................. 0 12 58 82 +11857 PF12024 DUF3512 Domain of unknown function (DUF3512) Assefa S, Coggill P, Bateman A anon PFAM-B_3525 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 231 to 249 amino acids in length. This domain is found associated with Pfam:PF00439. 33.70 33.70 37.20 34.40 28.00 30.30 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.52 0.70 -5.08 13 193 2009-09-11 13:40:57 2009-07-08 14:46:16 3 5 85 0 107 170 0 213.90 36 37.66 CHANGED ppKc.s....+p.h+ht........cscspspshssshstE+lLtplcpsucEAps+ls++hssuKhGFLR+psDGoTohslltss-sputEt..sp+sVsLGshsGKLpsGsssLtGFKED+RNKVTsVs.LsYGsaoSaAPpaDSsFuslup--oDLlYuTYG--oGspsAhSlp-FlcssspaupthscsLLDhlTsG-HS+oltplppppppp.p.sc......................pspstttppusshlDatuL+oloslGlDsphL .........................................................p.p....+p...phh........ps.hptp.hsssh...E+..t.lcphscE.uts+ls.phhssuphta.++psDGohh.hsllp..ss-s.st-..........thpsVcLu.hou+L.s..Gh..s.oL...G.FK..-D+RN+VT...lp.h..YusaoSaAPpaDSsFuslsp--o-LlYSsYG-.......-oulps.uhS..lpEFltss..ssYsht.hsDsLLD.lTsG-HS+ol.plcppp..s.t...c.................................pht.s.h....tss...s..chhuhcshs.shulshphh....................................................... 0 26 34 66 +11858 PF12025 Phage_C Phage protein C Assefa S, Coggill P, Bateman A anon PFAM-B_3530 (release 23.0) Family This family of phage proteins is functionally uncharacterised. Proteins in this family are typically between 68 to 86 amino acids in length. 25.00 25.00 61.90 61.70 24.50 21.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.26 0.72 -4.02 3 67 2009-07-08 13:51:34 2009-07-08 14:51:34 3 1 54 0 0 34 0 65.40 71 83.07 CHANGED sLSL+sSRSSYFATFRHQLslLoKTD.ALDEEKWLNMLGsLLKDWFRYE-HFVHG+cSLlDILKERGLL sLSL+sSRSSYFATFRHQLslLoKTD.ALDEEKWLNMLGshlKDWFRYEsHFVHG+cSLlDILKERGLL 0 0 0 0 +11859 PF12026 DUF3513 Domain of unknown function (DUF3513) Assefa S, Coggill P, Bateman A anon PFAM-B_3541 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 192 to 218 amino acids in length. This domain is found associated with Pfam:PF00018, Pfam:PF08824. This domain has a conserved QPP sequence motif. 25.00 25.00 28.20 28.20 21.60 20.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.38 0.70 -4.78 14 329 2009-09-16 12:09:40 2009-07-08 15:01:52 3 10 100 3 138 289 0 197.70 44 27.92 CHANGED ssst.p.phsttthhEDYDYVHLpup-php+pptplhc+t..............................................t.pp.EpLcppstcstpps...............oPsp.hs.........lsspD+pLLhaYucQCptahssLhpAlsAFhuSlsssQPP+lFlsHuKhVIluAHKLVhlGDTLsRpspst-lRscVhpsSstLCphLKslVLuTKsAALpYPSsuAlp-MsscVpcLsppsppF+shLt ....................................................................................s......p.sttshh-DYDYV+LpG..+-c..hp+ppcplh-pt.sh.....t.......................................phppachLcp.phs.p.s.h-ps..h.tt...........................................pPspshs..............sssluspD+QLLhF....Yh..-QCcsahssLhsAlDAFao.........sVussQPP+...........IFVAHSKFVILSAHKLVFIGDTLsRpsput...DlRscVhptSshLC-hL+slVhsTKtAALpYPSssAhQ-MVcpVp-LuppsppF+psL.h........... 0 22 34 72 +11860 PF12027 DUF3514 Protein of unknown function (DUF3514) Assefa S, Coggill P, Bateman A anon PFAM-B_3570 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 368 to 823 amino acids in length. 25.00 25.00 27.30 33.70 16.20 20.30 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.71 0.70 -5.14 26 49 2009-07-08 14:03:17 2009-07-08 15:03:17 3 2 7 0 32 48 0 222.90 18 45.07 CHANGED lhh+s-l.shLpph.ssphchsshllKluhlhp.hhhhhst.ph...sptlhhs.t.phhc.phlD.lLhclhssspss......thshpthhphhhshptpph.spshthasssF..appllslhpcll.ts+hsssh......h...pllshlllSCslplGsMY......t.hthhsptsss.tp......................ptthhshhshplhpshtahYssssp...slphl..sslshPphllchCSpph.hsplpppltpsph..sFslhhpclspplsphlspps ......................hhp.ph.phl.p..hpp.phthhllKhshl.p.hh.hhsh.th...sp.lhhs.t...h-..hl-.lLhplhsh.pps.......h.hphhhthhhshhtpth..tshthhsssF......Ypplhplhtplh..h+.sps.......h....plhshlllussh.lGtha........hhhhtp.tht.tt..........................t.h.hshhshplhhshhahYssssp.....l.hl..sshshPthllphsSpth.hs.lpp.htpsph..sFslhhpslspplsphh.p........ 0 9 9 9 +11861 PF12028 DUF3515 Protein of unknown function (DUF3515) Assefa S, Coggill P, Bateman A anon PFAM-B_3590 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 166 to 214 amino acids in length. This protein has a conserved RCG sequence motif. 25.00 25.00 37.50 32.40 20.20 19.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.28 0.71 -5.00 26 415 2009-07-08 14:05:35 2009-07-08 15:05:35 3 2 352 0 114 292 45 149.20 30 92.10 CHANGED tssllussslsslLssAusht.....tslulushPAPpAssst.CpsLhssLPppLushpRuthh-PsstGsAAW...Gss.tsllLRCGl-p.Psp.hsssush.sV..cs...VpWhtsssts...sus.........................so.has.VsRsshVtlTls.sss.......Gs.oshpsLoslhsss......hthtshpsss ..........................................................h.........hhhhhhhsh..........................................s..sh.s.sss....ust............Assst...Ctslh..ssLPpp.l.....u.....s....ht+....t...s..hh...ps...s...ss.usA.......uW.....Gss...sl.l.LRCGV-p.Pu.....t....h.........p.........s..s.......ush.ps.........V.........ss...........VsWFp.psss.....G..............................................ts.hao.lsRtshVtlTlP..ss.t.......us..tshss.Ls-lhsshhths..t.......................... 0 34 87 110 +11862 PF12029 DUF3516 Domain of unknown function (DUF3516) Assefa S, Coggill P, Bateman A anon PFAM-B_3601 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 460 to 473 amino acids in length. This domain is found associated with Pfam:PF00270, Pfam:PF00271. 25.00 25.00 31.60 30.80 21.60 20.50 hmmbuild -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.60 0.70 -5.68 23 417 2009-07-08 14:07:34 2009-07-08 15:07:34 3 5 397 0 120 382 22 445.70 48 54.35 CHANGED APEH.lENt+thAKA....GDDP...KKpRKlhRKKsPE.GaVsWucsTF-RLlpAcPEsLsSpFcVopuMLLNVl..........uRsGDsasuh++LLp-NH-sRspQp+hl+RAltlaRuLlsAGVVEc.l..-pPD....spGR.psRLTlDLQtDFALNQPLSPFALAAL.ELLDsESsoYALDVlSVlESTL-DPRQVLhAQQcpARGEAlAtMKA-GlEY-ERMtlLEEVTaPKPLtElLcsAa-hYRpuHPWlsDapLuPKSVVRDMaERAMTFs-aVspYGLuRSEGlVLRYLuDAY+AL+pTVP--tRTEELpDlI-WLGELVRQVDSSLLDEWEpLssPss...stssp........shsspsPpslTANpRAF+VhVRNAhFRRVELsAtcch.stLupLDs......tsG....hssssWp-AL-sYa-EH-cIGTGPDARGPpLhhl-cs..........s......................c......hWpVRQllcDPsGDHDWuIsApVDLsASDEAGcsVlcssslsp ..................................APEH.IENtKAlAKA........GDDP.....KKh+K...l....tRKKsPE.GFVsWuEpTFp+LlpupPEsLpu+h+lTcuMLLNll............sRs....G.....D...shtshc+Llcsspp......shtppp+hhcRAlpIacoLlsusVVc+l......-pP.s...........ssG.R.....phtlsh-LpcDFALNQPLSsFALAAl.-LL......D.....P......-S......s.................o......Y..ALDVlSVlEATL-cP+plLhAQp+pARGEAlAtMKA-Gl-Y-ERMttL--loaP+PLc-hLpsAF-hYtpupPWls-htLpPKSVVRDMhE+AMTFs-alupYtlsRSEGlVLRYLoDAYRuLcpTVP-st+o.-ELpDlIpWLGElVRplDSSLlDEWEpLssPts........ttspp.........................spsspslTuNpRAFpVhVRNAMFR+Vp.LhAh-ch.-tLGt...L-s.......................thu...........hsss.sWp-sLssYaDEa..--lssGs-ARuPtLhhl.-pp................s.....................................c...hWpVRQI..lcDPsG-HDWuIsApVDLsu.o.D-sGcsVlcshsh.s.................................. 0 42 88 112 +11863 PF12030 DUF3517 Domain of unknown function (DUF3517) Assefa S, Coggill P, Bateman A anon PFAM-B_3933 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 340 amino acids in length. This domain is found associated with Pfam:PF00443. 25.00 25.00 39.90 27.20 23.30 22.90 hmmbuild -o /dev/null HMM SEED 337 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -12.04 0.70 -5.89 7 66 2009-07-08 14:28:27 2009-07-08 15:28:27 3 3 65 0 54 70 0 333.30 29 14.32 CHANGED DYFEhLh.LAsFGshEsthlLppGFLLKCLEIlWLD+EDSKRL+RQYhsYh+LlEKGR+FSHRKLh-LLuhLLs+IDhTssPos-DcR+sLPsG+aoLThoEsshlRPL.GRspELslL+KlLpQ.ssPpAsRsIVulhlDAEPEAGLhDPICKVLE-GLRluP.AcLCAPFLEATLIFCpRSPDc-RIVuLIDaVAKGV-SINDSGG+EHLAFFTulhssRNERLuL-EsWFLSpllD+IPDWAPTLLhaPDRsVRNMTh-hL+pILFosEAp-hs..-DaQsRau-lAKELV+ASl-+LRKsaLssPGssVEs+sl..Eol+sVI-HCLsoYF.sDSE-DQ- ......................................................................................................DYFthlltlAphGthEsthlLpc.GFLh+.sL.cllhhD..pcs......o.p...cLpc.pYssh.h+.ll..p...K...sRphS.....atp.LhpLLshLlsp....l....D....h....sh..s.........P....s......s....s....s..pp.....c.....p....h....t.....s.....s..+...a..........sl...........Ths..E.spll.p.s.h.s+p.p..s.h.hhlcKlLp.ppsstso+....pIlshllst..p...sph..sL..t..-sl.h+sLccGlpspP..upL.ssPFLc.A..sLlFCptusshsplpsLIcalscsscS...lss...u......s......GcpaLsFapslh.ss.c.Npp.h..ttsc..t.hhhstl.l-plPcWAPsLLtasDc...sVRssT.caLpplLF...........s........t.....p.h......p....-.h...s...-.....-.......h......pp......h..h....pc....h....u+pL......upsClp.....hL+csals..s.s...s..p..l.pspsh.pslphVlppChthYF.sptp....................................................................... 0 7 24 41 +11864 PF12031 DUF3518 Domain of unknown function (DUF3518) Assefa S, Coggill P, Bateman A anon PFAM-B_3830 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 260 amino acids in length. This domain is found associated with Pfam:PF01388. 40.20 40.20 58.30 43.20 29.50 23.90 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.77 0.70 -5.58 6 191 2009-09-14 14:01:10 2009-07-08 15:30:20 3 4 82 0 96 173 0 241.10 64 14.63 CHANGED DSLAKRCICVSNIlRuLSFVPGNDsEMu+HPGLlLILGKLlLLHHEHPcRpptPtTYp+EE-c-pGluCS....K-EWWWDCLpsLRENsLVTLANISGQLDLSsYsEoICL..PlLDGLLHWhVCPSAEAQDPFPTsuPsSsLSPQRLVLEsLCKLSIpDsNVDLlLATPPFSR.EKLassLVRalGsRKs.VCREMuVsLLSNLAQGDstAARAlAlQKusIGsLluFLEDulshAQaQQStHSLhH.Mt..sPsh-PsSlDMM .......................DSLA+RClCVSNIlRSLSFVPGNDhEMSKHPGLlLILGKL................lLLH.HcH.....PE....RK...pAP...TY...E.KE..E...-....p..DpGVuCs.........................KsEWW...W.DCLEhLRENTLVTLANISGQLDLSsYs.ESICLPlLDGLLHWhVCPSAEAQDPFPT..lGPNus..LSPQRLVLETLsKLSIQDNNVDLILATPPFS..R..EKLYuoLVRaluDRKssVCREMulsLLuNLAQGDoLAARA.IAVQKGS.IGNLluFLEDulshsQaQQSQpuLhH.MQ..sPshEPsSVDMM.......................... 0 25 33 60 +11865 PF12032 CLIP Regulatory CLIP domain of proteinases Mistry J, Gavin OL anon pdb_2ike Domain CLIP is a regulatory domain which controls the proteinase action of various proteins of the trypsin family, e.g. easter and pap2. The CLIP domain remains linked to the protease domain after cleavage of a conserved residue which retains the protein in zymogen form. It is named CLIP because it can be drawn in the shape of a paper clip. It has many disulphide bonds and highly conserved cysteine residues, and so it folds extensively. [1] [2] 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.19 0.72 -3.87 120 622 2009-07-08 14:42:14 2009-07-08 15:42:14 3 13 68 4 294 670 0 53.10 30 15.22 CHANGED CpsP..ssp.sGpClslppCpslhpl.hpp.....p.hsspctpaLcp..upC..Gh.....tss.pshVCC .............ChsP..s..sp.sGpCltlppCs..lhpl..lpp.....p.hss..pptpaLpp...upC....Gh...............tss.pshVCC.................. 0 55 100 249 +11866 PF12033 DUF3519 Protein of unknown function (DUF3519) Assefa S, Coggill P, Bateman A anon PFAM-B_2444 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 117 to 1154 amino acids in length. This protein has a single completely conserved residue Q that may be functionally important. 27.50 27.50 28.20 27.70 26.70 26.70 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.53 0.72 -3.85 10 158 2009-09-11 06:57:10 2009-07-08 16:12:16 3 3 48 0 3 164 0 96.40 26 22.41 CHANGED ll-phsKl-cpst.hAhcapN....l+lslpcppsspclhsaa..puacchh-ss.hhpsSsh.......hsttphpshsuscsNPTsKPLTSQEsL....LKsoENLNEsTsEsspLSP ..........................llcph..+.h.cp.tht.hAhchtN.....h+l.s.lpsphssppL.saa..puac....hp.-sphhhphpsh..........htttt.hps..hs.scs..NsTpKsLpoQEsL.........hpppct.tp............................................. 0 2 3 3 +11867 PF12034 DUF3520 Domain of unknown function (DUF3520) Assefa S, Coggill P, Bateman A anon PFAM-B_3604 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 180 amino acids in length. This domain is found associated with Pfam:PF00092. 25.00 25.00 34.10 31.40 19.50 17.70 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.16 0.71 -4.16 49 418 2009-07-08 15:16:07 2009-07-08 16:16:07 3 13 396 0 115 369 71 179.40 45 30.57 CHANGED uTLhTIAKDVKlQVEFNPApVucYRLIGYENRhLscEDFsNDpVDAGEIGAGHsVTALYElsssusts........h-sLRYtt.......stts..........sstssELAhl+lRYKtP....suspSpLlphsl...t.tsshspuSs-h+F..AuAVAuFGphL.+sucah.......sshsasplh.sLApsutGp..DthG.........YR.sEFlpLlc...hApuLp .............................tTLhTlAKDVKhQlEFNPuhVsEYRLIGYEsRtLpsEDFN.NDpVDAG-IGAG+pVTALYEls.sGppup.........lDcLRYt..................sptss.........sspssELAalKlRaKtP.............pup..cSpLl-.hPl......s.ssh.spsS.cDhRFuAAVAuaGQhL.RuScah........sshoasplhphAppupGp..DstG.........YR...uEFl.cLlchAcsh.s.......................................... 0 48 77 96 +11868 PF12035 DUF3521 Protein of unknown function (DUF3521) Assefa S, Coggill P, Bateman A anon PFAM-B_3612 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 42 to 74 amino acids in length. 27.20 27.20 27.30 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.57 0.72 -4.11 23 1098 2009-09-11 15:50:18 2009-07-08 16:24:58 3 4 252 0 1 353 0 30.30 39 57.64 CHANGED MPDAhphuu..hpthspllsh...ttaVGphRRs+php.+ ...........MPDAhhhsu...hpsh...p...phhp.......ht.hRps+thp.c...... 0 0 0 0 +11869 PF12036 DUF3522 Protein of unknown function (DUF3522) Assefa S, Coggill P, Bateman A anon PFAM-B_3665 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 220 to 787 amino acids in length. 25.00 25.00 27.10 26.90 22.40 24.20 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.25 0.71 -4.50 24 294 2009-07-08 15:30:14 2009-07-08 16:30:14 3 4 111 0 167 260 2 162.00 32 30.55 CHANGED .ttthhtslhhslSslshlPslhhsh++pahhEuslhhFshhhShhYHuC-shss.hh.....hshhpachLp.hshluuhhuhaVplhshsthsp.hcpslphhshhhhhlhttts.hshh.hlsPlhhulhhhlstahhc.t+tpphhstphh...........................hhhhhhssulhhhhhuLs.....-spDsYthhHo..hWHh ..........................h....hhtsLlLsLSNLhFlPslhlul+ppa.hhE...uslYhaoMhFSshYHACD.tss..h..........hChhpachL......p....a......hshh...uohhuhaVol.h..shup..hpthhctshhhhshhhhuhhhthst.h...s.hhshhhPh.hhuhhlhhs..t......ahhc..hc..p.+phassphh..............................................................................................................................hhhhhlhs.Ghhhshhu.l.h...hp..sp.....-sYhhhHS..hWHh........................................... 0 45 67 110 +11870 PF12037 DUF3523 Domain of unknown function (DUF3523) Assefa S, Coggill P, Bateman A anon PFAM-B_3746 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 257 to 277 amino acids in length. This domain is found associated with Pfam:PF00004. This domain has a conserved LER sequence motif. 25.00 25.00 29.40 29.30 23.30 23.00 hmmbuild -o /dev/null HMM SEED 276 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.84 0.70 -5.42 14 270 2009-07-08 15:33:07 2009-07-08 16:33:07 3 5 136 0 167 233 5 216.40 41 43.44 CHANGED PsPPussussssssp.............t.tshsusFDPoALERuAKAL+pL-pSsaA+cAhELhKhQEpT+QtEhpschcchpAthuphpsE+tRl-t-E+RKslpppsppcptpupYcDcLuRcRhpccLppQcppNpE.L+pQEEush+QEthR+pT............ccEtcLc+cNhht+lcAEscuRh+tcRcNcDlphchl+.+usEcRcThlEuIpTshuhlGsGhpshLoDpsKlshsVGGlThLAhGlYTs+pGstVshpalEppLG+PSLlR ..........................s............................t.t.t..tFDspuLERuAcAh+plppS.pA..+c.............shpl.phQEtTh.QhE.pschpchcsthtphc...-ph+hptEEc............RKshpppspppptpApYpDpLuRpRhp.............c...........phtt.............Qphh.pp-.l+hQE.ESs.+QEthR+tT......................hccchcLc+cs.hh+hpAEscu+A+tpRpNtDl.hcpl..+h+AtEcRpThl-uIpTshsh.hGt............GhpshloDhsK.lhssVu.GlThLAsGlYou+pustVstpalEttLG+PSLVR.......................... 0 62 89 134 +11871 PF12038 DUF3524 Domain of unknown function (DUF3524) Assefa S, Coggill P, Bateman A anon PFAM-B_3749 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and eukaryotes. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF00534. This domain has two conserved sequence motifs: HENQ and FNS. This domain has a single completely conserved residue S that may be functionally important. 25.00 25.00 25.20 25.10 24.70 24.50 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.07 0.71 -4.70 31 155 2009-07-08 15:36:23 2009-07-08 16:36:23 3 6 115 0 73 154 105 154.80 47 44.42 CHANGED M+ILlL-sahuGSHppatctLhpp...pa-hplloL.PuRtW+WRhRGuAloau...pp.hhtpppa...DlllAToMlDLAslhuLpP.pLup....lPtllYFHENQhsYPhss......spp+chpashlNlhSALAADtVlFNSsaN+cSFLsulpshLc+hPDhts.tshlcpIttKupVLsssl ....................MpILllEsFaGGSH+plh-hLtcpl....t....-hslhTL.PA++W+WRhRsuALhFu....pp..lshsppa......clLhAoShLsLspLhuLpP.cL.up....hcpllYFHENQLsYPVpc......spcRDhpauasplhSsLsADhVlFNStFNh-SFLsulspFh+hhPDa+P..psltphIcsKspVlahPl.................................. 0 26 37 53 +11872 PF12039 DUF3525 Protein of unknown function (DUF3525) Assefa S, Coggill P, Bateman A anon PFAM-B_3833 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in viruses. Proteins in this family are about 360 amino acids in length. 25.00 25.00 334.60 132.90 18.50 17.70 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.57 0.70 -5.98 2 191 2009-07-08 16:05:37 2009-07-08 17:05:37 3 4 4 0 0 133 0 144.50 43 73.73 CHANGED LlFEPVTRGKaTF.YPFGHWCLRDTNSMIlYEG+FVs.ctTSlGs.FKLoKShRPl+sGtshHLVPFHlQKLLDSMD-cs.PYSAsHNCTTVILcuIMYRSslGFlFAYulSWAVYhVLRPPQhAATsYpWhaPERoWDpS+hYphLGFAAGGTlPMEhlDpE........P.E-c.S................................DsuRo...s-ND.................................................cQ+cs-h.pEWWhSpDSlcsVpNDlhYhLSFL+sTsIPE-l+L-lVELsYsQhscDEccRIPEP.GT+ILsMPsW+PsNWA+LIDETHRVLSQFopYsPRlLNEhssWL+GLupNLYRVsEPIL.LLlRAMRAAhoVSpRAsRSlYpChCHWLDVMYGGSAPpRlKTVWGLTGhIsSGMTSQ .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 1 0 0 0 +11873 PF12040 DUF3526 Domain of unknown function (DUF3526) Assefa S, Coggill P, Bateman A anon PFAM-B_3851 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is typically between 149 to 170 amino acids in length. This domain has a single completely conserved residue P that may be functionally important. 25.00 25.00 35.80 34.00 22.00 21.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.69 0.71 -4.08 38 152 2012-10-03 10:13:34 2009-07-08 17:13:32 3 4 74 0 62 180 55 155.60 23 34.11 CHANGED oph-hshsl+...csh......csssDhthpthtpphlppas.ptscs......LPh.saphhh...hhtscphssphhsphhcphhpthtpQpplschhuhlSPslulpphspslAGTDhtpahcFhppsEpa+tphtphhst.................stshpptphssspaptlPpFpap. ...............................................th-hshthc...csl......pss.D..htphhpphhtpas.ptscs......Lss..sachhh...hhttcphssphhsthstphtpthtpQptlspthuhluPslAlpph.hpLAuTDhtsphpF.pps-ta+tpltphh...................................................sthhpcpthss.scapplPpFpap............................................ 0 24 41 48 +11874 PF12041 DELLA Transcriptional regulator DELLA protein N terminal Mistry J, Gavin OL anon pdb_2zsh Domain Gibberellins are plant hormones which have great impact on growth signalling. DELLA proteins are transcriptional regulators of growth related proteins which are downregulated when gibberellins bind to their receptor GID1. GID1 forms a complex with DELLA proteins and signals them towards 26S proteasome. The N terminal of DELLA proteins contains conserved DELLA and VHYNP motifs which are important for GID1 binding and proteolysis of the DELLA proteins. [1] 20.70 20.70 20.70 26.70 20.20 18.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.64 0.72 -4.32 20 463 2009-07-09 08:13:55 2009-07-09 09:13:55 3 4 232 2 35 475 0 60.40 62 14.67 CHANGED DELLAVLGYKVRSSDMA.-VAQKLEQLEhVMuss.p-DG.lSpLuoD.TVHYNPSD..LSsWlpSMLoELN.P.ssssstss ......................s-hh.-VAQKlEpLE.sh.s....................t-DG...lS.......pLASD....TVHYNPSD..LSoWLESMLSElN.s.ssp....s.s....... 0 7 24 29 +11875 PF12042 RP1-2 Tubuliform egg casing silk strands structural domain Mistry J, Gavin OL anon pdb_2k3n Domain Spiders use fibroins to make silk strands. This family includes tubuliform silk fibroins which are used to protect egg cases. This domain is a structural domain which is found in repeats of up to 20 in many individuals (although this is not necessarily the case). RP1 makes up structural domains in the N terminal while RP2 makes up structural domains in the C terminal. [1] 21.10 21.10 24.00 21.30 19.30 20.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.27 0.71 -4.60 17 225 2009-07-09 10:00:34 2009-07-09 11:00:34 3 18 29 3 0 228 0 156.70 38 69.36 CHANGED susupuuSuuuuuus.......uSAFAQu.hSuuLusSSsFusAFuSsoSsSuuuslAhpluhssApoLGl..ususALAuAlupAVuuVGs.GASusuYAsAlusAluphLuspGlLs..uuNAu.ulASShAsAlououuosu................utuputuuuSuhtpuuopouupoAus .....................................sutuuutuuu.tuu.......uSuFupu..uuuLusSosFs...osFuSu....sS.tSssusluhphuhssAsoLGl..ssusulAsAluQA.VuuVGs.GASutuYAsAlusAhuphLuspGlLs..suNAu....oLASShASAlouSAuSsuush............ssu..tusupu....tuuuu.uhtpsuopSuutut..t............................... 0 0 0 0 +11876 PF12043 DUF3527 Domain of unknown function (DUF3527) Assefa S, Coggill P, Bateman A anon PFAM-B_3945 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 120 amino acids in length. This domain has a conserved CDCGGWD sequence motif. 25.90 25.90 60.30 44.70 25.40 25.20 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.49 0.70 -4.91 12 128 2009-07-09 12:13:05 2009-07-09 13:13:05 3 4 19 0 91 118 0 219.60 24 44.32 CHANGED SsophpuhLphshK.NGhPhF.Fslcs.c....-lhsAshh+ss.......sshshhYTFaohtt..++psuuhhsttp...p......lVGQMpVSs.hs.p.ttpt......hhpEFVLas...t.upp................p..ps.h.pt.t............p.....t...p.hptp..........sh.ps-LpssLElAAlVl.p.s..hpppps...................................h.tpSstsspV..llPuGhHGhPpst..uPSsLIpRW+SGGuCDCGGWDhuCsLtVLssttpps..pp.h.......tppsh-LFhpGspcps...PuLshsslc-GhYtVcFcupLSsLQAFSIClAhlHspc ....................................s..tuhlph..c.ps..hF.h.htt.p.....t.hsAp..p.s........tt.p..Yhha......p+tpt...t.............hluphpsSs....p.t...ttt.......h.pEaVLhs.......t....................................................................................................................................p.Eh.Ahl.t............................................................s..phpl..lhPsG.Huhsppt....tPssLlpRW+pGGt.CDCGGWDhuC.lhlLts...tt......t...................p.hcLhh............puttpt....shhphh.htpG.a.lpFpup.lo.LQuFuhslshlpsp......................... 0 12 57 73 +11877 PF12044 Metallopep Putative peptidase family Assefa S, Coggill P, Bateman A anon PFAM-B_3942 (release 23.0) Family This family of proteins is functionally uncharacterised. However, it does contain an HEXXH motif characteristic of metallopeptidases. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 625 to 773 amino acids in length. 25.00 25.00 25.90 25.50 24.90 22.80 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.25 0.70 -6.02 28 198 2009-07-09 12:24:12 2009-07-09 13:24:12 3 6 159 0 150 210 5 375.90 31 58.75 CHANGED shclhNlp-sph.VppsslllcGpssstp.....tssslpVppssp...t.aPshs..a.....................sVssup.........FKAllhLs..PGt.Nplphp.........h..........sssspsphlslpYpPhhps.Pl+Lslllu+DSstpaDusstphp..pptsssL-tAI+KlRhuuhLhQAFTsEpMhcsGaGpRoFpFpEEhphssh..........ps.h.+sph....KlHllRSc+TltElRc.slAQQNspupcsstLFshsh-slcphst......psspthpsushhLDo+aDs...ptp.............hIpGHAALGGGssp.lpLAIFGSHuLaSWPsshEpl.sssFpDsTpsssscVANDsNcsGThWEshslslGAahHElGHhhGsPHQps..GlMLRsYsp.h...NRoFhs+EshuhRocopG.....s.hhPpscC.....pWpRLDhLRFhhHPsFpLPtD...................sh.s.ssssha......shssst..shlsussGIh ....................................................t..pl.shppsph.l.p........hlllpGphsp.p................hsthlplpp.ps..........t.hPshs...a.......................Plp..s..up.........FKAllhLs..PG...Nplphp............h..................ss.psphlslpa.h.Phhps.....PlpLslllu+DSstp..aDss.tp.t..pp.sssL-sAl+KhRhuAhL.hQAaTuEpMhcss...............hGpRsFphpEEhphssh...............ps.h.+pph.....+lHllRop+TlsE.lRs.p....hsQQt...p.....upsps.tLaphshcslcphht......tstpp.hsushhLDo+aDsptp.............hlpuHAALGuussp.lpLAl.FGSasLaSaPssh-cl.sssFhDsT.pssss...VuNDsscsGo.WEshslslGAahHElGHhFGsPHpps...GlMhRsYsh.h...NRoFhs+EshshRspppG.......................t.hh..tpps............tWpRLDhlRFh.hHPsFplPtD....................h.sstt.pha......shtsst..hhhhs.sGl...................................................................................... 0 38 74 127 +11878 PF12045 DUF3528 Protein of unknown function (DUF3528) Assefa S, Coggill P, Bateman A anon PFAM-B_3981 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 185 to 298 amino acids in length. This protein is found associated with Pfam:PF00046. 29.50 29.50 37.60 37.00 27.60 26.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.12 0.71 -4.06 29 255 2009-07-09 12:25:28 2009-07-09 13:25:28 3 3 89 0 82 221 0 140.80 51 50.90 CHANGED sDFSSlPSFLs..psSSpPhTYsYS.SNL.PQVQPVREVsFRDY.GlDsSsKW.HaR......................GshupCY...........uuE-.lhHRDsLsssssh...uEhlhKN...su..ssh......Hsuos....uo.osFYusVGRNGVLPQuFDQFF-TAYGsu-s.sss-.hsssKsss+h...sssss ...............sDFS.olsSFLP..psoSp.hTYsYS.SNL.sQVQPVREV..o.FR-Y........ul-s.usKW.HaR.........................G.NhupCY...........S.uE-.lhH.RDCLsss.os................u-hLhKN.su..shh............H.uust.......so.osF...YosVGRNG...VLPQuFDQFF-oAYussps....sss-...stpKsttph.....sst...................................... 0 3 11 38 +11879 PF12046 DUF3529 Protein of unknown function (DUF3529) Assefa S, Coggill P, Bateman A anon PFAM-B_3346 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 176 to 190 amino acids in length. 25.00 25.00 27.80 27.30 22.60 21.60 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.88 0.71 -5.24 26 113 2009-07-09 12:27:30 2009-07-09 13:27:30 3 2 104 0 56 112 148 165.90 36 87.39 CHANGED .ssshhSTLlLTlLLuIGLhFFlRAusKDRTshhclh....Ssps.......shplhstlppahcpRuaphsssDscppllpFcGtVtsShhLAlhLohLuulGhsCLGLVlp.LhPphuhW.lh.LslLu.PLAGhaYhp+ApR.Eplcl+Lhsss.pss.............sSplplcAHRDELh....pLtcsLpLpucG ..........ssh.oTLhLslL.slGLaFFl+uSsK-Rhpphphh....ssts..........tplhsplpsaFppRuaplss.p.ppphlsFEGhVtsShhlAlaLohLsslGhssluLVLshhhPs.....hu.......hhhh.LshLu.PLAGlaYWp+AuRhEp..lpl+lhsss...sts.............tStlplpuHRDElh....pLpcpLpLppp........... 0 15 40 52 +11880 PF12047 DNMT1-RFD Cytosine specific DNA methyltransferase replication foci domain Mistry J, Gavin OL anon pdb_3epz Domain This domain is part of a cytosine specific DNA methyltransferase enzyme. It functions non-catalytically to target the protein towards replication foci. This allows the DNMT1 protein to methylate the correct residues. This domain targets DMAP1 and HDAC2 to the replication foci during the S phase of mitosis. They are thought to have some importance in conversion of critical histone lysine moieties. [1] 20.70 20.70 21.20 20.70 19.80 20.60 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.64 0.71 -4.50 38 305 2009-07-09 13:01:57 2009-07-09 14:01:57 3 35 122 5 168 317 0 136.40 27 11.89 CHANGED tcshPp+pLssasla-.scuchpsl-hh.hc.sh-l....ahoGhh.tsht......-sp..............sp.pt.............tuhpspu....lup......IcpWtIshh-..sut.shlhlsTshApYcLhKPuppYt.ha-hhhc.+splshtlhchLpcss....choh--llstltpt .....................................................pthPp+plssaslas..pc.............u.chhsh-hh.hc..sh-l.....ahSGhh.pslh..........cpp..............sp.pt.................Ghps.cs......hus................IppWh.I..shhc........sut.sllsloTs.....hA.t.YhLh........cPoppYtsha..shh.c.+hhluhhllchLpcss.....choac-lls+ltp.s............................. 0 57 94 132 +11881 PF12048 DUF3530 Protein of unknown function (DUF3530) Assefa S, Coggill P, Bateman A anon PFAM-B_2450 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 272 to 336 amino acids in length. These proteins are distantly related to alpa/beta hydrolases so they may act as enzymes. 29.80 29.80 31.40 30.30 29.30 29.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.88 0.70 -5.44 36 175 2012-10-03 11:45:05 2009-07-09 14:03:12 3 2 157 0 59 189 50 266.50 22 94.97 CHANGED hhhthhLhhhhhhshh.......tt.........stptss.tss........................tt...............hpchLPs...................sEhptLps...sscphLsLhcstpsup.pGsllllsstspsADhsshlu.LRcpLsctGWsolSlo.Ps..........t...shh...ttsttsssssstp.ss.spp...........s..t.t..........................................phhptpp.......pplhsplsshhs..hspptsup.llllupGpuAshshchLupppssp........ss...................sLlhlssh.......hPsptt......tsLsphlup.lplPlLDlatss.pshshpsAt.....pRcptup+tpp.sYcQhtLht.....ss.ssppp...lhpplcGaLpphs .................................................hs...................................................................................hpc.lPt............................................pp.p.lps.....ssppalsLapstpssp..GslIllsstspssDhsthlu.LRcpl.sshGWsoLolshPs.................t....h..........tsttsssssttt.ts.ttt..............................t...............................................................................................p..pspt.......ptlhsplpushs..hut.ppt.sc.llLlupGsuAhhshchluppp.st....p...................sLlhlss.........stptt.........sL..ph.lsp.lplshhDlh...hts.tsh.....stpsAt.....tRhphup+tptssYpQhsl.sh........s..s..psppp...lhpplcGWlpt.................... 0 14 27 45 +11882 PF12049 DUF3531 Protein of unknown function (DUF3531) Assefa S, Coggill P, Bateman A anon PFAM-B_2583 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 149 to 199 amino acids in length. 25.00 25.00 34.50 32.00 21.20 20.70 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.73 0.71 -4.53 21 130 2009-07-09 13:05:26 2009-07-09 14:05:26 3 3 98 0 64 130 123 144.70 44 69.87 CHANGED McVpFREhsPFssWIWLcFussPopsE+pYl-plFcSWalLG+LGGFNAENLQlpEsGsDlSaMsYDs-ps....sushsALMHNMG-hEYpspWARCWhDLGTSDulALDVLINALpQlsp-hVpIccllIGG.N.EDWPlp..-pt-sh......Fsp ............McVpFR-FsPhs.sWIWlEFts.sPoptEhphlcplhpSWallG+LGGFNupNLQlp.p.suhD..h..uahsYDs-pu.....ssshsuhhHNhG-lEaQssWuRsWlDLGTSDhluLDVLINuLpQlSs-aVsIcplllGGpN..pDWshp.spp-....s.............. 0 13 42 57 +11884 PF12051 DUF3533 Protein of unknown function (DUF3533) Assefa S, Coggill P, Bateman A anon PFAM-B_2348 (release 23.0) Family This family of transmembrane proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 393 to 772 amino acids in length. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 382 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.16 0.70 -5.71 39 601 2012-10-03 10:13:34 2009-07-09 14:10:01 3 6 468 0 205 969 15 280.00 21 43.58 CHANGED shllLslhhlulhu.lYWGuhYppss+h+slphllV.tDs....s........tt..shlGssltshhsph.s............thss..aplhs.scat...........hspclhchlacp+YWuAlhlpsNATpsLhsul..ssusssasso..shhphlapouRc.oshsohlhs.lptlpphhht..hssphl..plhpshsshspsshht......hlssshsashhDh+PhsssshhussplGlIYhlIloFFp.........hshhssl+tph.....tphl....+htph......llaRhlsShlshFhlSLhashlo.huFplsF...................shsa...G+.uGFlVaWMhsaLsMtAlGhssEshshll....sP.aluhaLlhWlIlNlus..shhPhsLs....PsFYRaGYAh.PlHsshclh+sI...aasspp.splGhshGlLhAWlslshh .........................................................................................................hh.................h.s...........h...s.....u....h.....a.....c....s...h.....s...phpp...l.lAVVNpDpG.............tt....s...s...lG..s..sh..s.shl.pp.ps..............hcaphh.......................................spccs.pctlp.pucYauulh..IPs.s.........hopplts............sl.......pst...p....s..s....hphp..................sssph.l...h..s.sup........t...s.s...h.......p.....p.........h.h.t.....p.h.h....p....slppshhp.....phsp..th..h.........s....l.psh..ss..................................................................................................................................................................................................................................................................................................................................................................................................................h.................................................................................... 0 50 115 181 +11885 PF12052 VGCC_beta4Aa_N Voltage gated calcium channel subunit beta domain 4Aa N terminal Mistry J anon pdb_2d46 Domain The beta subunit of voltage gated calcium channels is coded for by four genes 1-4. Gene 4 can produce two types of beta4A domain (beta4Aa and beta4Ab) according to how the gene splicing is carried out. This family is part of the beta4Aa N terminal domain. It is made up of an alpha helix and a beta strand. It is thought to regulate the channel properties through protein-protein interactions with non Ca channel proteins. [1] 25.10 25.10 32.50 35.50 25.00 25.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -8.10 0.72 -4.09 8 450 2009-07-09 15:44:57 2009-07-09 16:44:57 3 7 87 11 136 407 0 42.20 78 8.59 CHANGED GSADShsS+PS-SDlSL-E-REu.................LR+EsERQAtsQLE+AKsK ......GSADSYTSRP.S.DSDVS.LE..E.DREA.................lR+EsE+QAtsQLE+AKoK.. 0 16 28 69 +11886 PF12053 DUF3534 Domain of unknown function (DUF3534) Assefa S, Coggill P, Bateman A anon PFAM-B_2753 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 150 amino acids in length. This domain is found associated with Pfam:PF00595. This domain has a conserved GILD sequence motif. 25.00 25.00 25.20 25.00 24.50 23.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.82 0.71 -4.32 7 242 2009-07-09 17:29:17 2009-07-09 18:29:17 3 9 78 1 93 240 0 108.90 49 11.13 CHANGED MKV.TVsFG+TtlVVPht-up.pVppL.........lppAs.RYt+spsptst.WlpVa+LE.asDGGILD.DDlltDVs-.D+DpllAlaDE...Q-s..tpGG-usSusSs......sppSP-.apsEhusp..huAFpPh..tuEI-VTsos.L+h...uhPLhVR..RSSDP .....................................................................................st......hlpl.p+Lp.....spG.G.ILD.DDhltDVs-.D+-.......p.L.lAVF-E.....Q-P........ppuGDu.sS..uSSo.......GTQSP-h..FtsElusp..........huA.FpPh.......suEIEVTsSs..L+h...shP..LhVR..RSSDP............................................. 0 25 34 62 +11887 PF12054 DUF3535 Domain of unknown function (DUF3535) Assefa S, Coggill P, Bateman A anon PFAM-B_2858 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 439 to 459 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF02985, Pfam:PF00176. This domain has two completely conserved residues (P and K) that may be functionally important. 26.20 26.20 26.30 27.90 25.70 26.10 hmmbuild -o /dev/null HMM SEED 441 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.27 0.70 -5.86 32 289 2009-07-09 17:31:25 2009-07-09 18:31:25 3 20 241 0 212 284 4 412.50 26 24.30 CHANGED VWpsLlpph.....s.pslssthssalpshlpLhhsPhGh.h.shsh-sslll+PSut...................thpstcpcptpptptt.....s.ts...ph.DsthhtG-lpllGhD......sllRsRltAA+AlGhhhuhh...spssltshhps...........hLhstLsSstusp+hluulllscaspstt.ppsts.................phhsttLhphLp.....t....p.shYcElsshl.ptlRspCpsLlsohtssuh.h....pls.lshhspupspssspuFol-sAcclssph....apchtpthsssp+l........uhpsLE-sRpplhsulpps+p..scpphcsplhuuhAuAllshpsLPcKLNPlI+sLM-SlKcEEsthLQp+SApulupLlp.hst...st+ssPssKll+NLCsalCsDooETPchs........................ssphppsILoLt......+cpspts..........................hssssphpcpscpApIpR+GuphulpplsppFGsplhsplPpLtshh .........................................................................lW.tLlpph.....s.thl.....t..th.tsa.htshlpLhhpP.tl.....shchs.hhhcssutsh........................................t.tstphp.t.pttttt................t.....stp......h.h.ush......p.hl..s.-...........slh+uRlhAAcALGhlhshh.......spsshtshhp...........................................hLl.tL........sS.uuhp+lssuhlls-aAp.tttppshs............................................................................hlpttLhp.hLp.....................................c..hYcElss.h.pthpspCppLlsshtstsh.........................psp...sps......sss...sholc..pAp.clssp.........apchppshs..s..+h................................hp.LpspRpp...l.hslppspp...ppthphplpushAuAlls..........h..p..........tl..P.cK...LsPlI+slM-olKcEEs.t..LQphuAp.slApLlp.hs.........spp.sPssKllpNLsshhCsDsspTPphs................................................hpsthppsILoLh..............+cppttt......................................................................ts..sp.h..pctt.ct..ut.lpRRGAp.ALpplsppFGsplhpclPpLhphh.......................................................... 0 67 112 174 +11888 PF12055 DUF3536 Domain of unknown function (DUF3536) Assefa S, Coggill P, Bateman A anon PFAM-B_3129 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 274 to 285 amino acids in length. This domain is found associated with Pfam:PF03065. 25.00 25.00 39.30 39.10 18.70 17.90 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.78 0.70 -5.24 23 127 2009-09-11 10:41:18 2009-07-09 18:40:01 3 5 114 0 74 140 17 241.30 31 32.54 CHANGED WsQp........WRsPLRcALDWLR-pLAthaEppusthhpDPWtARccYIcVlLcR.s.-shpsFltc+stc.lstc-plpsL+LLEhQRpuhLMaTSCGWFFDElSGIEoVQllcYAuRAlpLAc-lss.sh.EscFlp+LppAhSNlsphusGsclYcthV+PuhlsLhc.VuAHaAlsSLapshscps.hasYplt.cphchhtuGpsplAl...G+htlpSphThEptphhasll+hGsashpsGVp.apsppsatphhpplhttappushstll+hlsccFutpsYoLccLFp .........WpQp........WRtPLRpAL-hLR-plstha-ptutth..h.p...D.PWtARc...cYlpVl........hsc......s........p......shppFltc.attp.Lstpcph..psL+LLEhQRtuhhMaTSCGWFF--lStlEslQhlpYAtRAlp.Lupch.......ss...........p...l....-tpFlptLtpA.SNh.p.htsGtplapphVhsshhsh.p.luuphA.lsslhpt...h................p..................t..thhs...apht..thp.....st..thhh..Gph.h.s.hT..pt..h.hssh.hGt.p..s.h................h.....t................................................................................................................. 0 33 59 68 +11889 PF12056 DUF3537 Protein of unknown function (DUF3537) Assefa S, Coggill P, Bateman A anon PFAM-B_3199 (release 23.0) Family This family of transmembrane proteins are functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 427 to 453 amino acids in length. 25.00 25.00 32.10 26.00 21.30 24.60 hmmbuild -o /dev/null HMM SEED 398 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.39 0.70 -5.76 9 121 2009-07-09 17:42:07 2009-07-09 18:42:07 3 3 18 0 83 119 2 332.00 35 86.54 CHANGED psscELt....pFcshLpWhshDpSs.hssslSW.lFhlLullVPhlu+hLLsCt....sC-.hppRsa-shVQlSLShhAuluFlsLSchhR+aGlR+FLFlD+LhscSp+VRhsYssclppSh+lLuhalLPCFsscusY+.IaWYhou.sphPalhpshhupslAChlph.sSWhYRTolFllsClLF+LlCaLQlLRh--FA+hF.....ptps-ltslLtEHl+IR+pL+hISHRFRtFILhsLllVTASQFsuLLhTT+sputlslhpuGELALCSlsLVoGLhICL+SAsKITH+AQulTulAo+WHsssTlsSh-ph........poPsus........hss..shp.ssstsssssEpsst....-DshcsTcI..hP.asp.shSaQKRQALVoYLENNpAGITVaGFhlDRoaL+TIFhl ..........................cpLp....pFcthLpWhsh-p..Ss....hshhhSahhFhhlulslPhhs.t.hhl.s.............s...p.csaph.Vphs.ohhAsluahsLophh++...........h.GLR+h..LalD.p.ltsc.otpV+tsYttp......l....ptuh+lLuhhllPshhscssa+.laaa...........t..h.P.........a....h.p....s.......h...pslAhhhth.sSWhYRTslahhsClLF+LlCpLQlLRhcsau+hh.....ptp.u-ltsllpEHhRIRcpLphISHRaRhFllhsLlhlTuSQhsuL......lhsst.........ps.hshhpuG-LAlsShs.lsGlhlsLpuAsKITH+AQulsulAu+WHshsThss.st.........tsPpss............t.......t.......s..pp..ps.-..t......ts.hs.ssph..hs.h...h.SapKRQAL...........VsYLppN.uGITlaGahlDRshLpslFhh................... 0 18 54 70 +11890 PF12057 DUF3538 Domain of unknown function (DUF3538) Assefa S, Coggill P, Bateman A anon PFAM-B_3373 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 120 amino acids in length. This domain is found associated with Pfam:PF00240. This domain has a conserved SDL sequence motif. 25.00 25.00 34.40 32.80 22.60 19.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.42 0.71 -10.57 0.71 -4.49 9 155 2009-07-09 17:48:44 2009-07-09 18:48:44 3 2 64 0 55 144 0 110.50 61 11.73 CHANGED LsEVlQchRpVppRLpPFlpRYa-ILpsssTh-.pssTptREpsQRlhshVuEuLHhLupA.hAlSDLhhsLpsssPRHL.ssRPh.........S.ahssPshhssuhh.lPh.hslus......sVs...housGs ....hsEVLQELpRlEsRLpPFLQRYaElLssAsTs-Y.sNN.p.-GREEcQRllNLVGEuLRLLGNshVALSDLRCNLussPPRHLHVVRPM.........S.H.YTTPMVLQQAA.....IPIQINVGT.......TVTMTGNGp..................................... 1 11 16 35 +11891 PF12058 DUF3539 Protein of unknown function (DUF3539) Assefa S, Coggill P, Bateman A anon PFAM-B_3564 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 90 amino acids in length. This protein has a conserved NHP sequence motif. 25.00 25.00 45.40 45.40 18.90 17.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.07 0.72 -4.16 17 75 2009-07-09 17:50:22 2009-07-09 18:50:22 3 1 72 6 29 75 90 86.10 51 97.33 CHANGED ssEpYLNHPTFGhLYtVsPss.....-u+-laATLYAQRhFFLVshpspG.........hpFEsIshtDARhhsEhpLpphRRss.opEhpphppl..FcQTFl .s.sEpYLNHPTFGhLYpVs.ss.....-sp-lasTLYAQRlFFLVshpscu..........spFEsIshsDARhhsE.+LpphRRsG.sp-apphppl..apQTF..... 0 4 19 27 +11892 PF12059 DUF3540 Protein of unknown function (DUF3540) Assefa S, Coggill P, Bateman A anon PFAM-B_2948 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 212 to 238 amino acids in length. This protein has a conserved SCL sequence motif. 27.10 27.10 27.10 28.30 26.70 26.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.19 0.71 -4.68 14 188 2009-07-09 18:02:22 2009-07-09 19:02:22 3 2 93 0 49 145 5 162.70 28 85.87 CHANGED phs.upVtt...tts-shhlhpssGh.hps++AsSCLltPpsGDhV.Ll...........ssss-spphYllAlLpRssssss.tLphsG.clsLcs.tttlpl.usstlcLputp.plshpspchphpupphththpchph.upthpsphtpsphlucphcshhpRlhQpscpuhRpVcpl-pscAtplshpscpshph+u+psslTAcsllKlDupQIHhG ........................h..hupVht...hhss.hhhhht.ts...hpsc+AsSCLltPphGDhV..l............ss..ctpphallAlLtRssssts.tlth.sG..tlsLch........tsttlpltutcplslcst..phshpupp..........................hphlucphpshhpplhptstps.RpVpss-pl+Auphphtscpphth+u+pshlsAptllKIDutQIHhG........... 0 18 30 39 +11893 PF12060 DUF3541 Domain of unknown function (DUF3541) Assefa S, Coggill P, Bateman A anon PFAM-B_2172 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 230 amino acids in length. 28.20 28.20 32.50 31.90 19.80 17.50 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.47 0.70 -5.06 17 126 2009-07-10 10:48:53 2009-07-10 11:48:53 3 1 122 0 23 93 2 222.40 66 59.53 CHANGED sssp.oappsAphI+sshEspLYoLPshhpGHYGlRMYR.ThDsKYusAshsDhhpVssp.shaAsplcpPc.Itthuppclssht.....cs.RuptRhpAhcshPEaLahussLLt.huRhDEaGLpt.scs+LpplL++hDhcphhTD.tMIcAWAAQLAN.VYWh+QLG.tDhVpcahpAFpcsYPDspDpcLsppQatNKlYGMTHhIFAsSEYYQH.Vcpp-aQW .......sss...hoaptoAsLI+pTYEoQLFTLPuFKEGHYuLRMYRQTLD-KYAAAIhSDLuRVAStLNaFAAEVsTPEQIppYupKRLppYp...cs-DERoQRRasATpsMPEYLYLGlDLLGsMARANEYGL..pH..Kp....DsKLRpVLRRYDFo.YATDcsMIEAWAAQLANQVYWLRQLGEQDVV-sFIpAFRcTYPDp+DscLscQQYGNKLYGMTHIIFADSEYYQHsVSpppHQW. 0 1 6 18 +11894 PF12061 DUF3542 Protein of unknown function (DUF3542) Assefa S, Coggill P, Bateman A anon PFAM-B_2032 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes and viruses. Proteins in this family are typically between 516 to 1283 amino acids in length. This protein is found associated with Pfam:PF00931. 27.20 27.20 55.50 30.90 26.60 25.80 hmmbuild -o /dev/null HMM SEED 402 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.61 0.70 -5.50 6 85 2009-07-10 10:50:28 2009-07-10 11:50:28 3 7 5 0 0 85 0 331.00 57 41.90 CHANGED hcEhIppTK.EhRApYS.FPpssLsuNcV........shcSPcFVMEhIDsVVtNLssLVKIsDssSh.FV.t..KcpIppVhKELKLLR.FVCFVSN+s.IEPQ.p+pTFas+sLhtAS+hAMVsWLY..hP....uNss.DLsPuEsspLLSDahcMKIKsIpPsIp+..IYIDVLQALKSTh.PpsQpcH......As..uGhVET.pHsL...........hVuLoDQMAshpEMLsLLRDNLIHLPILDLEFHLQDhDoVIlDuGLLlYSLYDhctp.EDsoLE-lNptLthDLP+sIE.IKhhlYLVhQKAFQsNLPRIHGLGYVDFLL+NLK-FQDRYSDS.LAFlKsQLQVIQpEhESLQPFLKsVsEEsHNKa-+..EcCss.lItKAYEVEYlVDAC...IsKcVPcWCLcRWLlDIIcEIssI.KtKlpE ............................................................................................................................................WKclIWKTKQEFRApYS.FPKosLAsNKV........s.tsPcFVMEhIDshVtNlNVLVKINDPsShhFVPG..KEQIEQVLKELKLLRFFVCFVSNKC.IpPQYppoTFYoHALIEASHIAMVVWLa..LPl.YGNtNQDLsssE..VSpLLS....DFhEMKIKsIpPslsp..IYIDVLpALKSTI.PpAQpKH.....sh...uuhVEhPsHsL...........hVGLSDQMAsLQEMLCLLRDNLIHLPI...LD.LE....FH.LQDMDSVIlDAGLLIYSLYDhcGphEDTsL--hNptLGFDLPRNIE.sIKsMVYLVMQKAFp...sNLPRlHGLGYVDFLLKNLcDFQsRYSDS.LuFLKNQLQVIQpEFESLQPFLKsVsEEPHNKhKpLNEDCAsQIIRKAYE..VEYVVDAC...INKphPpWClERWL.DIIEEITCIKAcIQE........................... 0 0 0 0 +11895 PF12062 HSNSD heparan sulfate-N-deacetylase Assefa S, Coggill P, Bateman A anon PFAM-B_2134 (release 23.0) Family This family of proteins is are heparan sulfate N-deacetylase enzymes. This protein is found in eukaryotes. This proteinenzyme is often found associated with Pfam:PF00685. 25.00 25.00 31.30 26.80 18.20 23.00 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.48 0.70 -6.10 7 310 2009-07-10 10:56:38 2009-07-10 11:56:38 3 9 87 0 174 264 0 383.60 61 55.88 CHANGED hshsllSlhh.saaLaos..s+t.tpstcsst...phsC..s..hh......P.p.h.s+shc.....sss+TDPpVLVFVEShYSpLGp-IltILpSpRFpY+sElAsG+.slPsLTsps+G+YsLIIaENlhKYlNMDpWNRpLLDKYChEYuVuIlGFhpu.sEcsLhshQlKGFPLalasNltl+DhslNP.SPlL+lT+suchcpGsLPG.sDWslFQhNHSTYEPVLhuph+oscp..s..s.......h.hATVlpDLGLcDGIQRVLFGpNLsFWLHKLlFlDAluaLoGccLoLsLDRaILVDIDDIFVGccGTRhpscDV+ALlsTQNplRshlsNFTFNLGFSGKaaHsGT-tEDtGDDhLLpsscEFhWFPHMWpH.QPHLaHN.ohLhppMhLNKtFAl-HsIPsDhGYAlAPHHSGVYPVH.QLYcAWKKVWslpVTSTEEYPHLKPARhR+GFIHsuIMVLPRQTCGLFTHThFacEYPGG.pcLDKSIpGGELF ......................................................................................................................................................................................................h..hshh.hhh.saal..s......p........................ps....t...........................................s.p.......t..+o-..PhVLlFV.ES.YSpLGp-IltlLEusRFp..YphplAst+sshPsLT.-.....ps....+G..+asLlIaENlhKYlshDuWNRpLLD+YCh-.Y.uVGlI...G.Fh.+s.sE....poL.u..h...Q....LKG.FP...L.lasNl.tL.+DhplNPpSPLLhlT+sschc.GsL...PG..pDWTlF...Q.NHSoYpPVlh.Aph.......pstc.....s...h..s..t...........shhsTV.lpDhGLaDGIQRVLFG.ssLsFWLHK...LlFlDAlu.aLosc+LsLsL..-RYILVDIDDIFVGKcG..TRMpspDVcA...LlpTQp.LR..s...lssFTFN.LGFSGKFaHs.GT-t.EDtGDDhLLp.hcEFWW..FP.HMWSHMQPHLFHNt...S.....sLs-QMhLNKpFAlEHGIPsshGYAVAPHH.SGVYPVH.QLY-AWKpV.WsIpVTSTEEY.PHL+PARaRRGFIHpsIM........VLPRQTCGLFTHTIFYpEYPGGspELD+SIpGGELF.............................. 0 34 48 107 +11896 PF12063 DUF3543 Domain of unknown function (DUF3543) Assefa S, Coggill P, Bateman A anon PFAM-B_2213 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 217 to 291 amino acids in length. This domain is found associated with Pfam:PF00069. This domain has a single completely conserved residue A that may be functionally important. 26.30 26.30 27.90 27.00 25.50 25.40 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.41 0.70 -5.15 32 333 2009-07-10 11:05:01 2009-07-10 12:05:01 3 7 232 0 217 306 0 231.20 29 26.39 CHANGED pss..cE-pc..slpplEphAo+u..cslhtFA-VKapQLhPhsP...............ts.s.spssh............psssLsscshhol........ScEuLVLYVKuLuLLuKuMclAusWWppppp......................sshts..Ss+lNplVQWlRsRFNEsLEKAEhl+L+L..pspphLspccsspsp........................sssloAEKLhYDRALEMSRsAAlsELsscDLtG.......CEluYsTAlhMLEAlL-s-..........................stlss-D+phlp+h......lsuIppRLssL+pKht .................................................................................pEcschlptLp.hhths..cslhthAth+hsth..shss...................................................thttt.s......................................ss.hsphshph...............psEtLVLYlKuhpLLupuh.p.lAtt.hpp.................................................sphss.....Ssplpp.V.Vphlpp+appslppschlph+L....pphh.ccp..hsp.............................................psloAEKLlYs+Al-MspsAAl-Ehhp...p..h..ps........C.hpYppAlhhLEuLhcph......................................................................hctpDpp.lpKh.........................hpsIcpRLssLpp...t.............................. 0 55 95 166 +11897 PF12064 DUF3544 Domain of unknown function (DUF3544) Assefa S, Coggill P, Bateman A anon PFAM-B_3553 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 198 to 216 amino acids in length. This domain is found associated with Pfam:PF00628, Pfam:PF01753, Pfam:PF00439, Pfam:PF00855. 27.00 27.00 91.00 58.30 19.00 19.00 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.52 0.70 -4.53 4 139 2009-07-10 11:09:03 2009-07-10 12:09:03 3 9 40 0 32 136 0 190.30 79 18.22 CHANGED uKPVLuGus....GRRISLTDMPRSPMSTNSSVHTGSDlEQ-sccK......soSSHaSAuEESMD....STASPASsKsG.uGshusSPKPFpspsus...oKQE+ousTGSILNLNLDRSKAEMDLKELSEoV......QpQussssLhSPKRQIRSRFQLNLDKTIESCKAQLGINEIS-DsYsuVEHSDSEDSEKoDSSDSE.hSD-EQKsKNpp-ssps+Eu ...........SKPlLSGGs......GRRISLSDMPRSPMSTNSSVH..TGSDVEQDsEKK......AsSSHFSASEESMDFLDKST......AS....P..ASTKsGQAGSLSGSPKPFS.PQhssP..lsoKt-KT..S..TTGSILNLNL.DRSKAEMDLKELSESV........QQQSs..PVPLISPKRQIRSRFQLNLDKTIESCKAQ.L.GINEISEDVYTAVEHSD....SEDSEKSD...SSDSEYlSD-EQKsKNEPEDsEDKE.G....... 0 1 5 13 +11898 PF12065 DUF3545 Protein of unknown function (DUF3545) Assefa S, Coggill P, Bateman A anon PFAM-B_3270 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 60 to 77 amino acids in length. This protein has two completely conserved residues (R and L) that may be functionally important. 25.00 25.00 30.70 45.40 21.60 21.10 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.12 0.72 -4.22 22 162 2009-07-10 11:21:43 2009-07-10 12:21:43 3 1 162 0 38 93 1 59.60 52 94.82 CHANGED MDphph....D-llth.-ps...ppo+u+.s.pKRKWREIEAlKD+pRLpKELp-hDhsh-.hsl--...lch ......MDpLca....DEllch.Ehsp.hcpSRoK.P.sKRhWREIEAI+DR+RLcKELh-hDsshD...hD-Ic.............. 0 7 14 26 +11899 PF12066 DUF3546 Domain of unknown function (DUF3546) Assefa S, Coggill P, Bateman A anon PFAM-B_3237 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 93 to 114 amino acids in length. This domain has two completely conserved Y residues that may be functionally important. 25.00 25.00 26.10 26.10 24.40 24.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.46 0.72 -4.05 19 273 2009-09-11 14:59:35 2009-07-10 12:24:31 3 13 212 1 194 287 0 104.60 30 13.50 CHANGED oaKpFhts..-Dslscs-uhp+YscYKh-apppQhpcFF.tHK-E-WF+pKY..HPcphsphpccppphhpsphpsFhp.hcsGhhsplpl-ht.t....chhsssllch-sGspp- ................hKtFh.p..--shscs-s.p+YscYKt-apppphpsFFptHKcEE....................WF+pKY......cP-..p..hs....thppctpph...hpsthp.Fhp.lcsGhhDph.l-h..t.....p.hssshlchEtGpt................................... 0 67 109 159 +11900 PF12067 Sox_C_TAD DUF3547; Sox C-terminal transactivation domain Assefa S, Coggill P, Bateman A, Eberhardt R anon PFAM-B_3310 (release 23.0) Family This domain is found at the C-terminus of the Sox family of transcription factors. It is found associated with Pfam:PF00505. It binds to the Armadillo repeats (Pfam:PF00514) in Catenin beta-1 (CTNNB1), which is involved in transcriptional regulation [1]. It functions as a transactivating domain (TAD) [2]. 25.00 25.00 25.30 27.10 22.80 24.20 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.68 0.71 -4.14 12 166 2009-07-10 11:26:27 2009-07-10 12:26:27 3 4 45 0 80 139 0 182.20 34 53.20 CHANGED hsshua.aR-htshG..spa-s...YuLPT.PEhSPLDsh-...s-suFFss............ss.pE-sphh.......s.hsYpssas.........pp.pssshh+phshsps...ssst.............shhss.sssthY..uphhss.pst.has.......phG.QhSPPPEut...ph-sl-plpps.-Lhu-lDRsEFDQYLs....ts+s-ht....hsaps..............ss.ssspp...ssL.SlLSDAooAsYYsshss ........................................................................s...tpaR-h.shG......sph-s...YuLPT.P-hSPLDslE......s-.sFFss..........................sh..--pth.............................s..sa..p..s..t...hs......................p..tss....h.tt.hshs.s...ss..........................h.ts..s.ss..thY..sthht............s..............tst..hps..................thG..QLSPPPEs.........sh-uh-php.s..ELLu-hDRsEF-QYLs....tpp..s-hs..........Lshps..............tp............................ss.ssspp.....sul.SlLuDAouA.YYsshs.s................ 0 5 10 32 +11901 PF12068 DUF3548 Domain of unknown function (DUF3548) Assefa S, Coggill P, Bateman A anon PFAM-B_3247 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 184 to 216 amino acids in length. This domain is found associated with Pfam:PF00566. This domain is found at the N-terminus of GYP7 proteins. 27.00 27.00 27.00 27.40 26.90 26.80 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.34 0.70 -11.31 0.70 -5.04 9 226 2009-07-10 11:34:16 2009-07-10 12:34:16 3 4 118 0 123 196 0 178.70 30 28.80 CHANGED ssshKVlFEK-GValHosst+..p-pDshIsGhlRll.EKcu-lhl-WpPlE-.shssstlhhttcsupus............ctscassuYEs-WshloTVshcc+.ttpt....stssspu+W.uFohsLs-L+Sl+ps+.uh...GasYLlhss+cGssLPsLHFHpGGo+thL+sLp+alhLspSPpDu+lhLVhsps.pALSpSF-pL.....--sohsllp+ahpD .....................................................s....pllapppsValHssstc.....cs.IsGhlpll.....p.....cc......s......p...sh.lcat.....Ph-.-.....s.s..ssthhhs.tps..sps.h.............................................p...sph.ssh-s-W...shlso.....sp.....p..pp.t...p....st.......s.....s.....s..t....s...s..t.u..pa.....uF.s.hsLs-L+Sl+ppc.uh...GW..sY..LVhsh+.sG.ssL.PALaFHpGso+.hlcsLppal.hLspSspDtphhLV.sps......puLspSFppL....pps..s.shh............................... 0 31 49 85 +11902 PF12069 DUF3549 Protein of unknown function (DUF3549) Assefa S, Coggill P, Bateman A anon PFAM-B_2034 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 340 amino acids in length. This protein has a conserved LDE sequence motif. 57.80 57.80 86.90 86.80 27.30 20.30 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.11 0.70 -5.66 29 187 2009-07-10 11:52:14 2009-07-10 12:52:14 3 1 184 0 44 152 13 331.00 50 98.51 CHANGED MspIsTLophLpsussQaplaDlGRRVpcIstppFtpIEptptPYPaPlQpHAphAIsF.W..sts..........pQsaIWFLKLPLDE+GLLp.AsRspFIchllEALGppLspsho--.QQ..ppLusNPYsFKPsp-KhAhFpAhl+tpLspssSpaYEhAtpYLSuph....s......h-sWQslGLQGIADlssRlpppsp.tshltcAlsp....hPtpVhhALstsLEHhs.lscpLu-tlhpphppt......cshhhshhlRALuu..usspshpsh.h.slLsptphhcs-..lLlsIAu...RsWpsL.p-spthphaL-pLAppp..Q....shFsQlFADLVtlPtLRstlLshLRpsspSstLupAIupLFp ....M-sIpTLopLL......psossQYQlFDLGRRVptlDsppFtplEtuQpsYPaPlQpHAphAIsa.W..spp..........+QPaIWFLKFsLDERGLLpQAshupFIphllEAhGscLsp........slo.EE...QQ.....pKLANNPYTFKPo--KlAhhHuplRtpLsLssSQYYEHAQp.YFSGsLG..........W-pWpoVGLQGIsDlCAR............Lpp-pN.ushLc....KALs+.............LPspPLaAlhusLEHls.Ls-cLAc+L.EhspsplttppsDlhLLuAllRALAu....ussshhpsh.hsulL.............sS...stls+sE...VLIulAG...RsWhsL.psssptppFLhRLApsts.Q.....sLFNQLFADlVhlPsLRsshLsLL.cussSspLApAltpL.p................... 0 8 18 32 +11903 PF12070 DUF3550 Protein of unknown function (DUF3550/UPF0682) Assefa S, Coggill P, Bateman A anon PFAM-B_2472 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in eukaryotes. Proteins in this family are typically between 249 to 606 amino acids in length. 25.00 25.00 27.50 26.70 17.50 20.60 hmmbuild -o /dev/null HMM SEED 513 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.96 0.70 -12.76 0.70 -5.76 6 181 2009-07-10 11:59:48 2009-07-10 12:59:48 3 4 103 0 130 194 0 376.60 40 82.71 CHANGED CaLL-KS+pLFNuLRDLPQaG++QWQsYFG+TFDVYTKLWKFQQQHRphLs.chGL+RWQIGEIASKIGQLYYchY..LRTSETsaLsEAFsFY.pAIhsRuYaspss+E-..s-Lhs..K+LRaYARFllVCLLLs+h.chVt-Lspclcphl--hppsapscDptEWpLVlpElsuFlcADs.ltVLpsDphslhhsp.....c.sPhst.Pshtpsshpp..LSLp-uLlhusspsplKFoELTlDhFRMLQsLEtEPssuh.ph...............htpth...t.stpsu.....ltEsuccPshRsNPHKYLLYKPThSQLhsFLusuFKELPsNuVLLlYLSAsG...........................shsos+pst-uPY-hGGlhTusp.+shssGsslphRspu...tKEs.HCLYPGDLhPFTRKPLFlllDSssShAFKshs.shFGQPLlhLLSP..sohPsuhp-p..sp+GSLFThFLpuPLhAFsalsGlu..clctcLW-+sQchLc+hhs-huplLsc.SRs.........lD.salQFhGD-FLRhLlLRFVFCpush+l.H+sF+ .....................Ll-+ucphFstlRDLP.aG....ptp.....pta.Ft+sFcsYT+LWKaQQppR...thL...s.c..hGL+RWpIGEIAS+IuQLYYthY..hRTS-spaL.Eua.FY.tAIhtRtYapp.......s...t.....t...........................cLhl..KpLRahARFllVsLLLp+h.chVppLlpclpt..lp-hpp.pap...p..-.hEWphVlpElttFlcA-s.hh..hps.p....hhhsp....................................t..sh...s.htp...h.t...LpLt-hllhu.tp.pp......lKFoELTlDhFRMLQsLEhEP.t.h.p................................................t.t...........h.p..s...pp...sh..NP+KhlLY+PohophhshLus.hc.....E..Ls.suhlLlYlSAsG.....................................................................................s..pt.p..t..p....s.......p...s..t......t...p................t......tsp.t..........tt....p.laPtDLhPFTR+PhFlllDSpsShsap.................t..h.....t.hGpPhhhLhSP.............t.h..s..hts.........ttpGs.FThFLpsPh.Ahh.h.sls.......ph..thappspthl..tp..t-htt..hlhp.st............l..p.sahphhsD.FLRhLlhRalFCpsshth.............................................................. 0 47 68 99 +11904 PF12071 DUF3551 Protein of unknown function (DUF3551) Assefa S, Coggill P, Bateman A anon PFAM-B_3610 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 79 to 104 amino acids in length. This protein has a single completely conserved residue C that may be functionally important. 26.50 26.50 34.20 28.80 21.60 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.19 0.72 -4.43 37 127 2009-07-10 12:00:59 2009-07-10 13:00:59 3 2 24 0 54 129 3 83.00 30 89.04 CHANGED MRphhhsshuh......................................shshhhsssssAp.A+D.YPaCLp...usthuas.GcCsasoapQCpsoASGpsApC.......shNPhhs..h..spsppttptp ...........................................hhhhsh.....................................hssshhhss.sssAp..A..t.-..a....saClp........utthuhs..ssCsasohtQCpAoASGp.sAtC.......shNPhas....h..st.tt.....t................. 0 4 14 28 +11905 PF12072 DUF3552 Domain of unknown function (DUF3552) Assefa S, Coggill P, Bateman A anon PFAM-B_3508 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. This domain is about 200 amino acids in length. This domain is found associated with Pfam:PF00013, Pfam:PF01966. This domain has a single completely conserved residue A that may be functionally important. 29.00 29.00 29.00 29.00 28.70 28.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.70 0.71 -11.39 0.71 -5.10 58 2218 2009-07-10 12:04:02 2009-07-10 13:04:02 3 6 2166 0 441 1381 168 199.80 32 38.76 CHANGED hhllhs.llull.VGhslGahl.....RKpluEp+lssAcppAcpIl--A.....................c+-AEshKKEAlLEAK-Eh++hRsEhEcEl+-RRsElQ+.EcRLlQ+EEsLD+Ks-sL-c+EppLpcKEpplttppp.lcppcpcl-pllpcQppcLE+ISGLopEEAKphllcpsEpElp+EtAhhIKEhEpcAKEcADK+A+cIluhAIQRsAADa .............................................................................................h..llhhllshl..lGh.s...lG....ahh..........................t+...ph....t....pp...p....htpAcppA.c.pI..lppA..............................c...+...EA-....sh....K...KEt..lLE.......AK...E.E....h...........p....chRp.......-.......hE...p.......E.......h....+.......p+...Rp......E....lp....c........E.p....R...Lh....p.......+.......E....c.s....L.-+....Kp-....p....Lcc+.......EppL.......-p+.......cpplt.......p.......c.......tpt.......l.......-p..+cpclc...........clhpppppE......LE+I.......uuLop-EA+phlLpplEc-lpc-hAhhl+-hEp-AK-.cu-KpA+cIlspAlQRhAu-h............ 0 198 340 403 +11906 PF12073 DUF3553 Protein of unknown function (DUF3553) Assefa S, Coggill P, Bateman A anon PFAM-B_3361 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 60 amino acids in length. This protein has two conserved sequence motifs: GQVQS and TVNF. 20.30 20.30 20.30 20.40 20.20 20.00 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.59 0.72 -4.65 22 88 2009-07-10 12:12:10 2009-07-10 13:12:10 3 2 88 0 32 90 325 50.90 50 47.26 CHANGED LEPGhhVRHPspPDWGlGQVQSNIuuRlTVNFcctGKlVIDus+VsLhhVh- .......LtPGhhV+HPspP-WGhGQV...QS.....slus+l.TVNFcctGKhVI-uspVtL.hV............ 0 10 24 28 +11907 PF12074 DUF3554 Domain of unknown function (DUF3554) Assefa S, Coggill P, Bateman A anon PFAM-B_2029 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is typically between 287 to 356 amino acids in length. This domain is found associated with Pfam:PF02985. 25.00 25.00 25.10 25.40 24.50 24.60 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.88 0.70 -5.01 30 248 2009-07-11 10:07:29 2009-07-11 11:07:29 3 46 204 0 185 264 0 312.50 20 13.21 CHANGED s-p+slhsphLttl.s.tt.............lSpclspulhsllsKE....uNEsuLsphlsshspahhtl.......psclscclsstltK...Ghs-K+.s..lR+hWhhphsshhh............pstshshsspFhsthhphhccssssPhssspssslsuAalllu.........t..........t...h..hhhs.+sshLhss+haoK.ssps-lpahhcsLpslhsstp.p.ttpt....saupAhlaslsus..phssRcpAhphLpplhtppsth.......lucsllsulhshltshchh............stssshchlphlhpsls.s.sshst..........hcp.hhpLLllup.thls.t.....sWhuLs.+s...plDPupllpcpsc ....................................phl.tls..ts.............lutplsphhhshltcE....spEssLhttlpshshahshh.........pspls..cplhphhtK...GhspKpss..lR+salhphhthhh........................ss.shthhts.llshLhphhccshsp.sh..t.ss..t..ss..th...suAhllhs..............ths..htt.........t...h..pl.h.....h.s...K..ph.hhs..+hhpp.....ssp-shhhh......hchhp.tlhhph.tph.ttth.....pthtpAhlhhlhut..shp.lRcpA.hp...slpplh.tp.sth...........lups..ll.ptlhph.lpshchh..pt...............ttssptt..p.h.hh...s.pslp.s...s.hpt...........t......hcp.hhphLllu+p..l..sts........Whs.Lhh+h....tlDPtphlpc...t....................................................................................... 0 49 92 149 +11908 PF12075 KN_motif KN motif Assefa S, Coggill P, Bateman A anon PFAM-B_3795 (release 23.0) Motif This small motif is found at the N-terminus of Kank proteins and has been called the KN (for Kank N-terminal) motif. This protein is found in eukaryotes. Proteins in this family are typically between 413 to 1202 amino acids in length. This protein is found associated with Pfam:PF00023. This protein has two conserved sequence motifs: TPYG and LDLDF. Kank1 was obtained by positional cloning of a tumor suppressor gene in renal cell carcinoma, while the other members were found by homology search. The family is involved in the regulation of actin polymerization and cell motility through signaling pathways containing PI3K/Akt and/or unidentified modulators/effectors [1]. 19.50 19.50 20.50 19.80 17.80 17.40 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -7.95 0.72 -4.16 15 233 2009-07-11 10:36:48 2009-07-11 11:36:48 3 29 67 0 116 195 0 39.40 60 4.24 CHANGED PYSVET....PYGYpLDLDFLKYV-DIE+GsTIKRl...slpRRs+ ...........PYSVET....PYGacLDLDFLKYV-DIE+G....s....Tl+Rl.......slpRRs+............. 1 15 25 59 +11909 PF12076 Wax2_C WAX2 C-terminal domain Assefa S, Coggill P, Bateman A anon Pfam-B_3756 (release 23.0) Domain This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 170 amino acids in length. This domain is found associated with Pfam:PF04116. This domain has a conserved LEGW sequence motif. This region has similarity to short chain dehydrogenases [1]. 27.00 27.00 36.20 36.00 26.70 26.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.95 0.71 -4.51 10 156 2009-07-11 10:43:14 2009-07-11 11:43:14 3 5 27 0 77 166 5 157.20 47 28.21 CHANGED VlLpGssoKVARAIAhuLC++sl+VsMh..sKE-acpLppcl.sschpcNLVhSpo...aos....KlWLVGDGLoscEQh+ApcGTLFlPFSpFPPpchRKD.CsYpsTPAMtlPKohpNlcSCENWLsRRVMSAa.Rsu..GlVHALEGWscHECG.sph...ul-+l.....WEAAL+HGFpP ....................VhLpGsh..SKl..upAlAhhLCp+tlpVhhh..sp-cappl.ppch.ss-hpphLV.......s.op...aps.........KsWlVGchlsscEQthAPtGThFh.Fs....P.h....p.hR+D..CsYtphsAMtlP.cs.hpslcoCE.hhsRtVhpAh.+su..GllHsLEGWstHEsG.....sl...cl-hlWpAALpHGFpP........................ 0 10 46 66 +11910 PF12077 DUF3556 Transmembrane protein of unknown function (DUF3556) Assefa S, Coggill P, Bateman A anon PFAM-B_2567 (release 23.0) Family This family of transmembrane proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 576 to 592 amino acids in length. 25.00 25.00 238.70 31.50 23.10 22.90 hmmbuild -o /dev/null HMM SEED 574 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.75 0.70 -6.50 14 188 2009-09-11 14:10:01 2009-07-11 12:04:22 3 2 108 0 38 132 5 566.10 57 98.52 CHANGED MGFlpPchPsVDhsEWpctsRu-Rl+sMAcHWAEhGFGTPhhlHlhYllK.IhlYlLGuWlls.htTsGlsGhscluuWWsEPIVaQKlVLaThLFEVlGLGCGaGPLssRFaPPhGuILYWLRPGTIRLPPWPc+VPhTpGspRTshDVsLYuulLssllsALhosGs.....GPlsth.....sstlGllssWplssllslLuVLGLRDKlIFLAARGEhYushhlsFhF......G...usDh......IlAsKllhlslWhGAATSKLN+HFPaVlusMhSNNPlh+s+...hlKRpha+caPDDLRPStLuchlAH.huTslEhLVPLlLhFupuGas.TslAAsl.MlsFHLsILSshPMGVPLEWNVFhlFullhLFlua...uslGls-lpsPh.lulLhAslssssllGNLaPc+lSFLPuMRYYAGNWsTolWCh+P.uA--Klcp.sllthuuhstsQLp+h.Yus.EsAplhhahuhAFRuMpoHGRALhoLhhRAl.ustDED-YslpEGEhlCusslGWNFGDGHLHNEQLIAAlQpRCsF-PGElRVllL-uQPIH+QpQpYRlVDAATG.hERGYVpVuDMlscQPWs-...........-...lPVcVt.s ..MGFLpPcLPD.V.DhspW.ptsRpp+lpshupHWAEhGFGTPhhlHLhYlsK.IhL.YlluGhhIl.hhTsGlsGh...oclusWWspPIVaQKVVlaThLFEVlGLGCGaGPLssRFaPPhGuhLYWLRPsTIRLPPWPD+VPaTpGspRTsVDVALYAllLhhllhALhos.Gs.....sPlsth........GstVGLlssh.lVshlllLuVLGLRDKsIFLAARGEhYhshhlsFhF.............s...hsDh......IlAhKllhLslWhGAATSKLN+HFPaVlusMhSNNPLlRsR...hIKRhha+caPsDLRPuhLu+lhAH.suTslEhlVPllLhhu..psGaP.ohlAssh.MVsFHLsILoslPMGVPLEWNVFhIFullhLFlua......uslulsDlcsPl.LAlllAVlAhlVIhGNLhPcKISFLPuMRYYAGNWsTolWCh+P.uAEsKlpp.ulVt.uuhsssQLt+h....Yss.-pApI.h.hs.hAFRAMpoHGRALhoLhhRAh...cDEscYsIp-GEhlCushVGWNFGDGHLHNEQLIAAlQcRCsFpPGElRVllL-uQPI..HpQpQpYRlVDAATG.FEcGYVpVuDMlsRQPWs-...-.lPVHVt..p.............. 0 7 22 33 +11911 PF12078 DUF3557 Domain of unknown function (DUF3557) Assefa S, Coggill P, Bateman A anon PFAM-B_2154 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in eukaryotes. This domain is about 150 amino acids in length. 26.20 26.20 26.30 26.30 26.10 26.10 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.91 0.71 -4.50 33 542 2009-07-11 13:46:58 2009-07-11 14:46:58 3 7 5 0 539 545 1 145.30 20 38.35 CHANGED Kl+-Ah+YLlp+Lh..uuRss.lplpp...Lplss....tslL.RlP...psl+l+lppLcl...sspsspllcsl..psllcsoohPLpolplsss........phscsllpouchLhltsthh.............ptLhplpsp+Vclt......hshpphhcL.lcpW.lcpu......+p.lGThaoFshpp ...........................................pshchlhphhh....ss.R...p.t...l.pVpp...lt..h.t.t..................tt.h..h..phP......sh..+h.c.l.p.pLph..............hp..........phhptl..........s.hl..c.s.ohPLcp..l.phtht..................tth.cp....sl.lp...s.AcpL.hlpt.h.................................phltpL.p.Npp..lhlp.......ththsh...p..s..h.htl....lcpW.hpst......+p.lGTpaph....t............................................... 0 42 43 539 +11912 PF12079 DUF3558 Protein of unknown function (DUF3558) Assefa S, Coggill P, Bateman A anon PFAM-B_3489 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 177 to 195 amino acids in length. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.49 0.71 -4.61 27 517 2009-07-11 13:51:31 2009-07-11 14:51:31 3 3 154 0 133 423 2 165.90 21 85.88 CHANGED sssluGCu..s.sGsssstsssss....sss.....phss........hhtt..CssloDspl.schh......Gsssh.h....ssshsus......sCpWptss......h....shs.loh.aacsuslspERphtpththslp-h....sIsG+puh.hupssss.....sssCslsl.stusshls.....W.l.phps....tshs..Ds.C...shApcLspholsps. .............................................h..hlsGCs....s.s..Gs..s.ss..ussss...................sps.ts.sptp.ss...............................hhc...C.ts.losptl.sc.hh.................Ghssh....h........psshsus...........tCpWpuss......h................ths..loh.....h.hp.....ts.s...l....s...p...p...c....p.....ssctht.hp...s.psh..........sIsGp.......su..h..hhtssps.......susC.tVsh.....tsu.s.....s...h..lp.....hsl....shps......sts....Ds..C...s.Atclhp.sltp..p............................................................................................................... 0 28 94 120 +11913 PF12080 GldM_C GldM C-terminal domain Assefa S, Coggill P, Bateman A anon PFAM-B_3275 (release 23.0) Domain This domain is found in bacteria at the C-terminus of the GldM protein. This domain is typically between 169 to 182 amino acids in length. This domain has two completely conserved residues (Y and N) that may be functionally important. GldM, is named for the member from Cytophaga johnsonae (Flavobacterium johnsoniae), which is required for a type of rapid gliding motility found in certain members of the Bacteriodetes [1]. 29.00 29.00 35.80 34.80 28.40 26.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.00 0.71 -4.33 26 164 2009-07-11 14:11:47 2009-07-11 15:11:47 3 3 148 0 57 170 155 176.40 31 35.45 CHANGED sIuuDKMNVlYRGlsNPhoIulPGlssspVs...uous..G..Lp+tuss.tahhpPs..p.Gpc.lpIsVoup.ss.Gpsls.sppsFRlKslPsPh.........Gslpspt....ssphs+psLpsus.lsAsh.pDFsF-lshpVsuFplplssp.....ssltlsGschsups+shlp+sc+G-tl.ItcIcsphss ........sluADhMNVLYtGh..sNPloIShPGlssspVs...sohs.....GupLsppGsu...palsp....Ps....s.Gpc.lsIsVoup.....Gpshp...uphsFRV+plPcPs.........ushpthp.....ssthsKssLhsss.lsAsl.-Dh.hD.lsapVsuFch.hh..hcp.......sslsssGsphos..pt+stl+php+GcphhIsclpspss.s.................. 0 28 51 57 +11914 PF12081 GldM_N GldM N-terminal domain Assefa S, Coggill P, Bateman A anon PFAM-B_3275 (release 23.0) Domain This domain is found in bacteria at the N-terminus of the GldM protein. This domain is typically between 169 to 182 amino acids in length. This domain has two completely conserved residues (Y and N) that may be functionally important. GldM, is named for the member from Cytophaga johnsonae (Flavobacterium johnsoniae), which is required for a type of rapid gliding motility found in certain members of the Bacteriodetes [1]. 25.00 25.00 29.70 25.60 24.30 24.10 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.17 0.71 -4.78 30 171 2009-09-11 04:40:41 2009-07-11 15:15:05 3 4 147 0 54 178 163 186.50 24 38.17 CHANGED EVLsuFshhN..cpLcposssscppNcpshssL..ssKss-sstchtphhppAppl+ptoschhsalpslKpplhpsss..ctp....hpshc+sstl-..pthhhGst.hsscGp..clhpclspY+stlhthh..stphpp.hh.ttlptphssss......ppps.tppcahphpFps.hPllAulshLTphQs-l+psEu-llspLluut .........................................................................................-VLsuFshlp..csLppostsssp...pNpthhpsl..ptp..hppsst..+htthhp.cAppl+pho.cslhsalppLKtplscp...u-st..csp...........htslc.pp-sl-.s.......sthhh.us....hsscGp..cLhpplssY+..pplhphl.............t..hpt..hh..phlp.sphsTpss....................ppps.ts+sW.phhFcs.hPssAulThLoplQuDl+tsEu-llpsLlss.h........ 0 26 48 54 +11916 PF12083 DUF3560 Domain of unknown function (DUF3560) Assefa S, Coggill P, Bateman A anon PFAM-B_2138 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 120 amino acids in length. This domain has a conserved GHHSE sequence motif. 27.20 27.20 27.40 27.40 21.90 21.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.68 0.71 -4.08 24 275 2009-09-10 16:38:42 2009-07-12 10:48:23 3 5 224 0 27 224 3 124.50 54 30.84 CHANGED sshtpRtpsRspRapshAs+Asscucptaptucphs-slPh.GQPILVGHHSEpRcR+hh-RhcsshtKAlthp-KAc.......aapp+A.tusspsshspcsP-s.lc+l+thhAsh+pspcph+sts+hl+pt .....os..LsERQEARAERFouYSuKRAuESsQAl-cV-RLAuhIP..GQPILVGHHSERRARRDAQRIENGMKRA.VMLaERAE.......YWEERAc..S.AL.hHAKYKERPDV..RaRRIKKIEADLRKAEKoIApSpKYLsha............ 0 6 17 26 +11917 PF12084 DUF3561 Protein of unknown function (DUF3561) Assefa S, Coggill P, Bateman A anon PFAM-B_2401 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 110 amino acids in length. 25.00 25.00 27.40 27.20 22.90 21.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.51 0.72 -4.20 10 531 2009-07-12 09:49:57 2009-07-12 10:49:57 3 1 525 0 37 121 1 104.80 72 98.87 CHANGED MRNopNl.hlopsDs.hppcDEsTaShsGAVVGFlSWLLALuIPFLlYGuNTLFFh.LYTWPFFLALMPVuVlVGIALasLLcG+LlYSlhuTlloVsslFuhLFhWLhG ......................MRNSHNI.TlT.s.NDu.hsEDEETTWS.LPGAVVGFlSWLh..ALAhPh.LlYGS.N.TLF.FF.lYTWPFFLALMPVAVVVGIALHSLhcGKLhYSIlFTLlTVGlMFGALFhWLLG...... 0 1 8 21 +11918 PF12085 DUF3562 Protein of unknown function (DUF3562) Assefa S, Coggill P, Bateman A anon PFAM-B_3549 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 62 to 84 amino acids in length. This protein has two completely conserved residues (A and Y) that may be functionally important. 26.30 26.30 32.40 32.20 25.00 23.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.04 0.72 -4.15 14 97 2009-07-12 09:51:43 2009-07-12 10:51:43 3 1 71 0 22 65 2 69.50 39 94.74 CHANGED hs.ssh.............-tlcphApcpuhss-.Vcphhs-phcsLuu-ARVHDYlplFAhR+VRcphR....................spsstt+ ................MhQssl.-.lpulApcpshss-tVpchh--plspLusGARV+DYLpVhshRRVREplR........sR.....st................... 0 2 4 13 +11919 PF12086 DUF3563 Protein of unknown function (DUF3563) Assefa S, Coggill P, Bateman A anon PFAM-B_3639 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 50 amino acids in length. This protein has conserved AYL and DLE sequence motifs. 26.90 26.90 26.90 27.20 26.50 24.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.92 0.72 -4.22 5 136 2009-07-12 09:54:44 2009-07-12 10:54:44 3 1 79 0 42 82 23 51.90 45 93.77 CHANGED MYL...lS+LFLFLTKshDpusKEcp...DAYLAEATDLYDLEFRMRKID.R-Ash+pPSshss+ .....................huhlh..hLop.hE..p..s.....t+ERp...-AYLAsuoDlhDLEhRhRpL-...pshu..........h................ 0 1 10 22 +11920 PF12087 DUF3564 Protein of unknown function (DUF3564) Assefa S, Coggill P, Bateman A anon PFAM-B_3736 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 118 to 142 amino acids in length. This protein has a conserved WSRE sequence motif. 25.00 25.00 44.20 99.20 21.90 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.71 0.71 -4.53 7 93 2009-07-12 09:56:13 2009-07-12 10:56:13 3 1 57 0 26 75 2 119.20 52 95.53 CHANGED MRlTl+lsT.-sss.tuaAlLWLDpsp+tWSREuHtGl-LPpWGsltsspGsstl...suADsspslCpLpGLclsshpGshEu.EpGpAhWh..ut+sPhhGtW+lQsVDpssspPEat.Fsttp MRLTI+ls.u.-uss...puaAVLWlDTDEphWSREuHpGIDLPsWGcV+sspGsstL...CuADsscuLCpLcGLshust..............pt...Es.EpGsAhh......us+sP..sGAWRLQsVDpssspsEHc.FTsht.. 0 2 5 17 +11921 PF12088 DUF3565 Protein of unknown function (DUF3565) Assefa S, Coggill P, Bateman A anon PFAM-B_3033 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 30 to 78 amino acids in length. This protein has two conserved sequence motifs: WVA and CGH. 25.00 25.00 29.40 29.30 20.40 19.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.18 0.72 -4.20 19 114 2009-07-12 09:58:00 2009-07-12 10:58:00 3 4 112 0 40 105 12 56.00 50 70.94 CHANGED lGaHpD-EsHWVApLuCGHsQHVRHpPPWpsRsWVlT.pGRpshLGt.LsCtKC-puustD ..hsacpD-EsHWVA.LuCGHsQHlRHpPPWpsRsWVlottGRpphlGphlsCt+Csputs........ 0 10 19 32 +11922 PF12089 DUF3566 Transmembrane domain of unknown function (DUF3566) Assefa S, Coggill P, Bateman A anon PFAM-B_3331 (release 23.0) Domain This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 136 to 304 amino acids in length. This region represents a transmembrane region found at the C-terminus of the proteins. 45.90 45.90 52.10 51.60 38.50 38.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.53 0.71 -4.09 24 434 2009-07-12 10:02:00 2009-07-12 11:02:00 3 1 432 0 113 310 175 118.90 40 54.26 CHANGED ssRRscLplsRlDPWSshKloFllSlulhllhlVAsslLahlLsuhGVFsplsshlsslsssssu......sltshlohGpVhuhusllullsVVLhTALuTluAhlYNlsusLlGGlcVTLu-c ......psR+ApLplsRlDPWSshKlShlLSlAhhllhhlAsslLahlLsuhGVasplNpslsslhsssuu......phsp..hlohGpVluhssLlGslNsVLhTALuTluAFlYNlsusL..lG.GlEVTLu-c.. 0 34 83 105 +11923 PF12090 Spt20 Spt20 family Assefa S, Coggill P, Bateman A anon PFAM-B_3386 (release 23.0) Family This presumed domain is found in the Spt20 proteins from both human and yeast. The Spt20 protein is part of the SAGA complex which is a large cmplex mediating histone deacetylation. Yeast Spt20 has been shown to play a role in structural integrity of the SAGA complex as as no intact SAGA could be purified in spt20 deletion strains. 32.00 32.00 34.90 35.10 30.30 28.50 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.21 0.71 -4.93 32 305 2009-07-12 10:47:42 2009-07-12 11:47:42 3 4 214 0 208 305 0 201.60 29 23.78 CHANGED ssppl.L++at.................................................p.......sSlhl+lasspa+a..................................................................s.ppshhhsYss.h.+thLcalcptplP.pthh-lL..........................pp..s...............slpFY-GCl.IlplhDa+s...................................................................t..............tt...........t..t.tp.tt...............pPcsapslL+PTstolhhDlhhhsss...................................................tphssp.hhphEucllt.sTsts................LsL-ss.s.......stpttshLp.p .......................................................................h...tl.LcKh............................................................ssLllpLaPsphtas.....................................................................................s.scshphPYcp...pplL-alcscplP.s.Ll-lL................cc..s......................sl..Fas............GCl.Iscl+DaRp.......................................................................................................................................................................................st.psp+llL+PT....poLhsDlp.h.sss..............................................................................ttpaopc-...tLtlESpllh.ATstP................LsL-Pshs.........stpstshh...hh................................................... 0 58 93 145 +11924 PF12091 DUF3567 Protein of unknown function (DUF3567) Assefa S, Coggill P, Bateman A anon PFAM-B_3056 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 90 amino acids in length. This protein has a conserved EIVDK sequence motif. 40.90 40.90 50.00 47.60 32.30 30.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.88 0.72 -4.01 11 117 2009-07-12 10:52:43 2009-07-12 11:52:43 3 1 116 0 45 74 12 87.40 57 98.69 CHANGED MQMlYsS-sasVVch.........sPDssh................ttLsptGaEIVDKpup+ElaLDGuhAEhFppclpshhpspPo.EEVDDhLspYsuLhppPVllH ..MQMIYNSsNYCVVEF................sPpss+..............tshsuGGYEIVDKNupREIFlDGplAE+FRccVcpLIps.EPol-EVD-FLGpFDoLMpQPVVlH. 0 2 20 33 +11925 PF12092 DUF3568 Protein of unknown function (DUF3568) Assefa S, Coggill P, Bateman A anon PFAM-B_3573 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 130 amino acids in length. 37.00 37.00 39.00 38.70 32.50 32.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.51 0.71 -4.43 8 164 2009-07-12 10:55:55 2009-07-12 11:55:55 3 1 45 0 26 90 3 129.80 34 95.07 CHANGED +KlhllhLhsh.slsLsuC.hlsAllsGsus.tuGsstY..lsGpYhs-l.stsacsVYsATlpAIpsspsaslpspshs.....p..psAsIsus........sKsss-slplclpKlscNsTclpIKhGshGDpttSusLhspIpcNl .............Khhlhhlhsh.s..lsLsSC...ssullsGhu..sssuu.......s.stY..lsG...pYhhpl.stshpsVYsATLpAlpssssaslp.spshs.......s..pcApIsus........scssssshplplp+lssssT+lpIKaGshG..DpttSusLhspIppsl.......... 0 15 19 19 +11926 PF12093 Corona_NS8 Coronavirus NS8 protein Assefa S, Coggill P, Bateman A anon Pfam-B_2038 (Release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in coronaviruses. Proteins in this family are typically between 39 to 121 amino acids in length. This protein has two conserved sequence motifs: EDPCP and INCQ. 27.00 27.00 52.50 52.50 18.40 17.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.79 0.71 -3.74 2 88 2009-07-12 11:00:27 2009-07-12 12:00:27 3 1 70 0 0 20 0 68.90 56 96.56 CHANGED MKLLIVhshlo.s.CI..csslQcCspNpPa.lEDPCPhtY.s-W.I+htoR....poAhLstL.....GKhlPhH..hatshs.oCpP.lpINCQsPssGuLIsRC.Y.a-...hsta+DVhVVL ....MKLLIVhshlo.s.CI..psslQcCspNpPa.lEDPC.lp....................................................................................... 0 0 0 0 +11927 PF12094 DUF3570 Protein of unknown function (DUF3570) Assefa S, Coggill P, Bateman A anon PFAM-B_3745 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 396 to 444 amino acids in length. 25.00 25.00 170.60 170.20 19.50 19.50 hmmbuild -o /dev/null HMM SEED 420 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.54 0.70 -5.74 41 141 2009-07-12 11:02:17 2009-07-12 12:02:17 3 1 118 0 71 143 85 412.60 24 92.13 CHANGED tlstsLshs..........ssullussstsps.....................................cl-...hthhhYpE............ps.psssstshsshc...............pshu-cp.slshchslDsloGASssssss..................h.tspt...sssapstuGtssh.t...................DTRhphssshsp..hspshphshusphSpEhDYhShuhsh.ulspshspcNTolohuhuht.Dpl....thshthpt.....t....................................s-s+pshshhlGloQlls+phlhphshshstssGYLosPY.+hlphhDss..............s.s.............................................t.hh.hEpRP-sRh+tul...hhpspatls..sssh....chpYRYasD-WuIsSHTh-scahhtlssph..h........lpPphRYYpQssAcFYp......shh.psp.s..h............s..........pahouDhRLushsuhohGlp...........hs...........hphtsthplshphthYppssshs.........G.....................................................thpshhlplshpF ......................................t......hhhshshhsts.t..sts.th.tct......................cls...hthhhYp-....................ps.psssssshsshp......................pshucct.slshchslDslouASssssss...................................t....ssths...h.uG.ts.......................................................DsRhphshshsh..hppstphshGsshSpEhDYpShGhsh.uhspshspcNTololGhuhhhDpl.psh..hssshps...................h........t......................scs+pohshtlGloQllspphhhphshshstpsGa........LosPY.+hlp..hhDss..................s..........................................................t.hh.hEphPDsRh+hul...hhpsphhls....sssl..chpYRYapDDWGlsoHTh-hchhhhlss.pa..t..........lpPphRYYsQ.ou.AcFYp.........shh.ts...s.....h...........................................................s............pahouDacLusasuhshGlt.................hs........hphttthphshphtaYppssshs........................................................................h.h........................................................... 0 28 54 65 +11928 PF12095 DUF3571 Protein of unknown function (DUF3571) Assefa S, Coggill P, Bateman A anon PFAM-B_3506 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 85 to 97 amino acids in length. 25.00 25.00 25.10 59.60 21.60 17.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.79 0.72 -3.75 23 90 2009-07-12 11:05:05 2009-07-12 12:05:05 3 2 87 1 41 92 116 83.50 41 82.07 CHANGED MuDP...LlRspDpYVVLE.PspsEphLTssEhLtaLpphLpp..h-sL.PtDLpchsolsstAp+Ll-TAC-LEl.uPGhslQWaAVRLE ......Msss...lhh.ppDpaVlLE.PspsEpFLospELht+LcshLpp..tssL.P.DLpchsolsstApaLl-osCEL-l.GsGt.hlQWYsVRLE. 0 7 27 37 +11929 PF12096 DUF3572 Protein of unknown function (DUF3572) Assefa S, Coggill P, Bateman A anon PFAM-B_2130 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are about 100 amino acids in length. 35.20 35.20 46.70 61.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.82 0.72 -4.14 41 202 2009-09-11 14:10:56 2009-07-12 12:08:04 3 1 199 0 63 161 66 88.30 42 89.90 CHANGED hst-sAEslAlpALuWlAus--LLshFLutTGhusssLRpsAsDPsFLuuVLDFlhhDDAhVhAFs-utGls.ppshtARtsLPGu....st. .........sptsApslAlpALuWLAsD.c-lLsRFLAhTGlpssslRpuAp-PuFLuuVLsFlhscEssLltFC-psGhcPsthtpAhthLsGu..s..... 0 14 37 46 +11930 PF12097 DUF3573 Protein of unknown function (DUF3573) Assefa S, Coggill P, Bateman A anon PFAM-B_2286 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 372 to 530 amino acids in length. 26.60 26.60 26.60 27.30 26.40 24.90 hmmbuild -o /dev/null HMM SEED 383 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -12.37 0.70 -5.85 5 128 2009-07-12 11:17:35 2009-07-12 12:17:35 3 1 33 0 12 96 0 354.20 51 73.37 CHANGED hFpKphLIloslLshuhhsahspS.thhs...........................s.QsssSpplD+psIocLQpQIppLQuQINpL-pp.......psoNtsupFsTYSSKVsss....olsulGpoKsLussphNNphsoDIhsNlss-ssIIsLuupslGGlFspcGuIDVGNAPAITTQGQlTYLGSYSGNNTIPIGQISSNLFASTIlGQR-KFDsYSIFFGGaIEADAQsWFGSuIo+u....ssuosFsSNGQNIYLToAsLYFLANlGHYVTAsaDFsTsEsNNFuLGNAFVIFGNLDTSPaFVTVGKs.RlSVGoFGGGGPWTSGITtNFLsPGRVTNluLNYKoDThNANVTVFuopNpHAsFSsAlFYAsKhTsNIA.VGFNhGYlaDlAGAsNsSls+hh.phsahsKoVGpFN .....................................h.Kh.llh.hhL....h.aht.S.th.s.................................Qsstup..-pptl.pLppQIppLQupIspLcpp............p.s....p.pFsTYSSKVssp.....hh..t.Gpspsl.sp.s..ss..tsclhsN.s.pps.lsLusp...tlFspsGuIDVGsAPAITTQGploYLGSYSGNNolPIGpIsSNLFASTlltQRsKFDsYSlFFGGaIcADAQhWFGosIs.s....psu.sshsuNGQNIYLToAsLY..FLuNlGHYVTAphDFsTs..-sNsFsLtsAFVIFGNLDTSPaFVTsG+s.+lSVGoaGG...GGPWTuGITtsFL.sPs+VTNl.SlNYKssshNANl.sVFuop....sp+AsFSsulFYAsp........hTsslA.lGFNhGYlaslAGAsNs.Slsp.h.phsh.scslGshN............................................. 0 7 7 7 +11931 PF12098 DUF3574 Protein of unknown function (DUF3574) Assefa S, Coggill P, Bateman A anon PFAM-B_3542 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria and viruses. Proteins in this family are typically between 144 to 163 amino acids in length. This protein has a conserved TPRF sequence motif. 25.00 25.00 25.00 27.10 24.70 24.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.34 0.72 -4.44 21 194 2009-07-12 11:23:53 2009-07-12 12:23:53 3 1 194 0 67 196 59 103.70 42 69.59 CHANGED lps-LhFGhshssss.......sVS-A-appFlcc-VTPRFPDGLTVhDApGQW+s.ssGp..lsREsoKllhllasssssp.csslpsIRpuYKp+FpQpSVhhsspPsCVSF .....................popLaFGhsps.sGs.......sIoss...-WppFlDcpVT..PRFsDGLTVaDAcGQWhs.ssGp..lsREpSKslhllas..s.s..s.ss..cssl-AlRpsYKpcFtQpSVhhlppsVCVuF................ 0 14 27 46 +11932 PF12099 DUF3575 Protein of unknown function (DUF3575) Assefa S, Coggill PC, Bateman A anon PFAM-B_2229 (release 23.0) Family This family of proteins are functionally uncharacterised. This family is only found in bacteria. Proteins in this family are typically between 187 to 236 amino acids in length. 27.90 27.90 28.50 28.80 27.40 26.20 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.18 0.71 -5.21 39 605 2012-10-03 17:14:37 2009-07-12 12:36:53 3 4 114 0 65 504 70 197.20 28 78.01 CHANGED h+phhhlh................................................hhllhhsshh............................................tuQplAlKoNlLYDA.sss.NlGsEhslu++WolslsspYNsWp..a.ucs........+ph+tahspPEhRYWh.......scpa.sGHFlGlauphGpYNh..ush...hs............phpcpR.YpG..hhaGu.GloYGYpahLuc+WslEhslGlGYsphcYcKYsssc.Cush.hspscp.sahGPTKAulSLlY.ll ......................................................................................hhhh..........................................hpspphAlKoNlLa.u.s.t.........s.........sNlGlEhs.l....u....c....+....h...olsls..s..sas.s...Wp....h....s.c...s.........+..p..h+hahl.pPEhRYWh............................................scph..sG..aFlGlau...tsup.aNh.......................ph.p.cp+Y.p.G.......h..h...Gu..GloYG.....YphhL.....u..c....+..Ws.....lEhslG...lG....YhphcY.......c.+....Y.tsh.......s......sst...h......htpt.pp...s...a....h....G....P....TKsulSllYhh.......................................... 0 21 53 65 +11933 PF12100 DUF3576 Domain of unknown function (DUF3576) Assefa S, Coggill P, Bateman A anon PFAM-B_2102 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria. This domain is about 100 amino acids in length. This domain has a single completely conserved residue G that may be functionally important. 25.00 25.00 25.60 42.20 24.50 16.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.37 0.72 -3.98 34 167 2009-07-13 12:12:39 2009-07-13 13:12:39 3 1 167 0 52 147 1293 101.70 45 60.38 CHANGED ssluV.NpYLWpASL-sLsFhPltosDPFuGVIlTs.YusPsusscpa+.sTVaIhDssLcAcuLpVul..QspsG......tsVsssTspslEsAILoRA....RQLRlp ....s.huVNpYLWpAoL-TLSFhPlsSsDs.GGVIlT-WYusPsssscRaK.lslaIhDssLcuDulcVslhcphpps.ttWh.ssssusTspslEDtILoRA....RpLRl.s............ 0 21 39 42 +11934 PF12101 DUF3577 Protein of unknown function (DUF3577) Assefa S, Coggill P, Bateman A anon PFAM-B_2116 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 143 to 307 amino acids in length. 25.00 25.00 27.00 26.40 24.10 17.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.51 0.71 -4.28 18 264 2009-09-11 09:40:19 2009-07-13 13:15:57 3 2 181 0 85 234 9 132.20 44 83.97 CHANGED ssssspspYFNLHssGlGYLNcIRcVp.........s++GpsFhACsIAALpGsoDsspYsaFDssVsGpEApcLIcRCpcAVcs-+KVLluFpluDLasDsFTasp.....Gc+pG..-suVSLKuRLlhlsWIKVDGchlYpApspssssss .........p..tsspppYFsLpTsGIGYLsRlR-Vs.....s+cGpsFLuCsIAALs.Gss...Ds...sp.....YphFDspVuGsEApcLlpR.C.pA........l.....-.t.....c+KV.LluFRL.uDlhsDsFhhsK.....G-+tG..-....susSLKuRLl+lshIKlsGphVYpstttptt..s........... 0 8 42 68 +11935 PF12102 DUF3578 Domain of unknown function (DUF3578) Assefa S, Coggill P, Bateman A anon PFAM-B_2328 (release 23.0) Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea. This domain is typically between 177 to 191 amino acids in length. 25.00 25.00 26.40 26.40 24.40 24.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.06 0.71 -4.69 18 298 2009-07-13 12:20:20 2009-07-13 13:20:20 3 10 279 6 54 186 75 180.50 34 35.53 CHANGED pclhspYhpt+spp.Fppp......thsphlcpcl.cplpppshlspptah.VcGSsGQGsWAclPWlulhscplTsospcGhYlVYLFssDhsslYLoLsQGhTchppph....tspsplcphsphh+shlpp..ppt..hsspIpLtsph.........hspsYEpusIthKtYshs.sl..PsccpLhpDLcpllshYpphhsph ...........................................plhp.hhpt+Tpp.http...........htpll+p.cls...cp..lps......alccc..cY...Vs.GplupG.N.aupVPWIulhDcsITp.......pTpcGaYlVYL.Fp..s-h-sl.YLoLsQGhochsc.a.........psKsthc.phu..plpspl.sp.....spp.hsssphhhupspt........hspGYspusIta+hYDls..ch.............s-..-chlpDLcchLchapplhs..s............................................... 0 17 37 44 +11936 PF12103 Lipl32 Surface lipoprotein of Spirochaetales order Mistry J, Gavin OL anon pdb_2zz8 Domain Lipl32 is an outer membrane surface lipoprotein of Leptospira like bacteria. 25.00 25.00 88.20 87.90 19.70 19.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.28 0.71 -4.82 4 121 2009-07-13 14:59:56 2009-07-13 15:59:56 3 1 47 9 6 90 0 179.70 90 77.15 CHANGED shcltlPYssslsYaGYlcsuspPDuhlcG.KcAYYLYlWVPAVlsElGVRMISPsu..upPscuDhVpcuFpsshpu-.p..+aFDTaIplERhsllps-cIs.KuAKsh.hptLsssDDus-h.cp.ptupYNSLhRhcSplusPsKA............LVRGLYRIuFTTYKoGcVcGSFlAoVG....sslPGVsh .TVKThLPYGSVINYYGYVKPGQAPDGLVDGNKKAYYLYVWIPAVIAEMGVRMISPTGEIGEPGDGDLVSDAFKAATPEEKSMPHWFDTWIRVER.MSAIMPDQIA.KAAKAKPVQKLDDD.DDGDDTYKEERHNKYNSLTRIK..IPNPPKSFDDLKNIDTKKLLVRGLYRISFTTYKPGEVKGSFVASVGLLFPPGIPGVS.P... 0 1 3 5 +11937 PF12104 Tcell_CD4_Cterm T cell CD4 receptor C terminal region Mistry J, Gavin OL anon pdb_1q68 Domain This domain is the C terminal domain of the CD4 T cell receptor. The C terminal domain is the cytoplasmic domain which relays the signal for T cell activation. This process involves co-receptor internalisation. This domain is involved in binding to the N terminal of Lck co-receptor in a Zn2+ clasp structure. 20.60 20.60 20.60 20.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.87 0.72 -7.13 0.72 -4.36 5 89 2009-07-13 15:23:31 2009-07-13 16:23:31 3 15 45 9 23 90 0 27.50 74 6.42 CHANGED RRRAcRMSQIKRLLSEKKTCQCsHRhQK .........RRpAcRMSQIKRLLSEKKTCQCP..HRhQK.... 0 1 1 5 +11938 PF12105 SpoU_methylas_C SpoU, rRNA methylase, C-terminal Assefa S anon PFAM-B_2024 (release 23.0) Family This domain is found in bacteria. This domain is about 60 amino acids in length. This domain is found in association with Pfam:PF00588. This domain has a conserved LFE sequence motif. Some members of the Pfam family SpoU_methylase, Pfam:PF00588, carry this very distinctive sequence region at their extreme C-terminus. The exact function of this region is not known. 25.00 25.00 25.60 26.50 24.20 21.30 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.72 0.72 -4.41 53 711 2009-07-13 15:25:21 2009-07-13 16:25:21 3 2 708 1 104 324 122 58.50 70 25.78 CHANGED RQRpsAGhYspp..tLss-php+lLFEhuaPhlAchC+c+GhPYPtLc.-pGpIlsctsa .....RQRQNAGMYLRENSMLPEsEQQRLLFEGGYPVLAKVAKRKGLPYP+VN.QQGEI-ADA-W............ 0 14 41 76 +11939 PF12106 Colicin_C Colicin C terminal ribonuclease domain Mistry J, Gavin OL anon pdb_2dfx Domain Colicin is a protein produced by bacteria with Col plasmids. Its function is to attack E. coli through actions on its inner membrane ion channels or through ribonuclease or deoxyribonuclease actions. The C terminal domain is the ribonuclease domain. It specifically cleaves tRNA anticodons which recognise codons in the form NAY (N:any nucleotide, A:adenosine, Y:pyrimidine) which corresponds to Tyrosine, Histidine, Asparagine and Aspartic Acid. E5-CRD can be referred to as an RNA restriction enzyme that specifically recognizes and cleaves single-stranded GU sequences. [1] 25.00 25.00 37.10 29.80 22.10 15.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.21 0.72 -4.37 12 86 2009-07-13 15:32:11 2009-07-13 16:32:11 3 14 21 10 69 94 0 34.60 49 16.18 CHANGED GsVssshscpssscT.PDulsppsSusVhs.psssY .GsVlhVls-ss-scThPDSVoo.PSuoVVV.pVVVY... 0 3 69 69 +11940 PF12107 VEK-30 Plasminogen (Pg) ligand in fibrinolytic pathway Mistry J, Gavin OL anon pdb_2doh Domain Pg is an important mediator of angiostatin production in the fibrinolytic pathway. Pg is made up of five subunit kringle molecules (Pg-K1 to Pg-K5), of which the first three make the protein angiostatin. VEK-30 is a domain of the group A streptococcal protein PAM. It binds to Pg-K2 of angiostatin and activates the molecule to mediate its anti-angiogenic effects. VEK-30 binds to angiostatin via a C terminal lysine with argininyl and glutamyl side chain residues known as a 'through space isostere'. [1] 20.80 20.80 20.90 21.10 19.20 20.60 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -5.87 0.73 -6.24 0.73 -4.10 8 126 2009-07-13 15:37:18 2009-07-13 16:37:18 3 14 3 12 0 125 0 16.50 71 9.92 CHANGED DDsELcRLKNERH-HDc ..-sELERLKNERH-HDc 0 0 0 0 +11941 PF12108 SF3a60_bindingd Splicing factor SF3a60 binding domain Mistry J anon pdb_2dt7 Domain This domain is found in eukaryotes. This domain is about 30 amino acids in length. This domain has a single completely conserved residue Y that may be functionally important. SF3a60 makes up the SF3a complex with SF3a66 and SF3a120. This domain is the binding site of SF3a60 for SF3a120. The SF3a complex is part of the spliceosome, a protein complex involved in splicing mRNA after transcription. 19.10 19.10 19.40 19.10 19.00 18.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.79 0.72 -7.02 0.72 -4.40 20 244 2009-07-13 15:39:18 2009-07-13 16:39:18 3 8 212 1 178 228 0 27.60 47 5.54 CHANGED sulu..ss-.asEFYcRLcpIK-aH++aPN ..........slo..uss.FsEFYsRLKpIK-FHR+aPN.... 0 61 94 144 +11942 PF12109 CXCR4_N CXCR4 Chemokine receptor N terminal Mistry J, Gavin OL anon pdb_2k03 Domain CXCR4 and its ligand stromal cell-derived factor-1 (a.k.a. CXCL12) are essential for proper fetal development. CXCR4 is also the major coreceptor for T-tropic strains of human immunodeficiency virus 1 (HIV-1), and SDF-1 inhibits HIV-1 infection. Additionally, SDF-1 and CXCR4 mediate cancer cell migration and metastasis. The N terminal domain of most chemokine receptors is the ligand binding domain and so the N terminal domain of CXCR4 is the binding site for SDF-1. [1] 20.30 20.30 20.50 26.60 18.90 19.70 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.39 0.72 -4.15 4 102 2009-07-13 15:43:43 2009-07-13 16:43:43 3 2 77 15 21 91 0 32.40 82 9.34 CHANGED I.huDNhSEE.lGSGDY-shpEPCFpcENucFNR .....IYTSDNYTEE....hGSGDYDShKEPCFREENAHFNR. 0 1 2 6 +11943 PF12110 Nup96 Nuclear protein 96 Mistry J, Gavin OL anon pdb_3bg0 Domain Nup96 (often known by the name of its yeast homolog Nup145C) is part of the Nup84 heptameric complex in the nuclear pore complex. Nup96 complexes with Sec13 in the middle of the heptamer. The function of the heptamer is to coat the curvature of the nuclear pore complex between the inner and outer nuclear membranes. Nup96 is predicted to be an alpha helical solenoid. The interaction between Nup96 and Sec13 is the point of curvature in the heptameric complex. [1] [2] 25.00 25.00 29.20 28.40 23.50 22.20 hmmbuild -o /dev/null HMM SEED 290 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.65 0.70 -5.30 36 380 2009-07-13 15:55:46 2009-07-13 16:55:46 3 13 307 11 219 392 9 259.90 30 20.01 CHANGED s.hEphhhhLsupclpcAschAlpupsh+LAsLlo.l.usssss+p.hppQLppW+ppss..s.lspshhclYcLLuGsshhspss.t....................tlsWhpshGL+LaYupssp....slccslppatcslsp............s.....................pt..hpshhatLL+lY......sspst..........shpth...lts.shssssLch+LsWhlhplLpshshhpt.t..........................................t........DpLshsaAspL...cspGhap.AlFVlhHlssssp....Rcpsl+plLtRphptlss.t..............sp.palhpcLplPpsWIapApAlhs+sps ........................-thh.hLsspclpcAsphuhpstsh+LAhlluph..s.........s..s.......t.h+p.hptQL.tWpptph...p.............lpp..hhpla...tLLuGpsh.hptuptt.....................h.tlsWhpslul.pLWYhhssss...............olscAlptapcshps.......................t.st.PhP...Yhpt................................sstpsh.DlhapLL..pLa.................upppt................................slpph......LpP...h...sh...o................sssh..D..aRLuWh..LhpsLpulshpphs..............................................tt...s.lphsaAtpL................ps..uhap.A..lFVlh..H.lp.pttt..............................RppslpphLt+p.s.t.hhtp........................................t.phl..hppLtlPtpWlhtAtAlhtt...................................... 0 79 126 184 +11944 PF12111 PNPase_C Polyribonucleotide phosphorylase C terminal Mistry J, Gavin OL anon pdb_3gcm Domain PNPase regulates the expression of small non-coding RNAs that control expression of outer-membrane proteins. The enzyme also affects complex processes, such as the tissue-invasive virulence of Salmonella enterica and the regulation of a virulence-factor secretion system in Yersinia. In Escherichia coli, PNPase is involved in the quality control of ribosomal RNA precursors and is required for growth following cold shock. This family contains the C terminal protomer domain of the PNPase core. The function of the C terminal protomer is to catalyse phosphorolysis through its two active sites. [1] 25.00 25.00 27.20 27.20 24.70 23.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -7.79 0.72 -4.76 16 611 2009-07-13 16:01:04 2009-07-13 17:01:04 3 3 604 16 52 368 0 38.90 67 3.77 CHANGED Essphu-WQcspahFcGKGuAGtHSAsspspussT+sps ...EuP+HSDWQ.RPoFsFEGKGAAGGHoAT+HASAsPsRPQP.. 0 2 13 34 +11945 PF12112 DUF3579 Protein of unknown function (DUF3579) Assefa S, Coggill P, Bateman A anon PFAM-B_2027 (release 23.0) Family This family of proteins is functionally uncharacterised. This protein is found in bacteria. Proteins in this family are typically between 98 to 126 amino acids in length. This protein has a conserved FRP sequence motif. 25.00 25.00 28.40 28.10 19.70 19.10 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.98 0.72 -4.08 19 284 2009-07-13 16:43:08 2009-07-13 17:43:08 3 4 249 1 88 221 35 93.20 46 62.29 CHANGED hpsp....scchhIpGlTppGKsFRPSDWAERLsGlhupF....psst........+hsYSPastPss.hsGlKCVhVctcL+-hEPhAacFlhNFA+DNcLplh-.tshh ...........h...pspchhIpGlTp.sGKpFRPSDWAERLsG..l..husF....psss...........+lpYS.aV.+Psl..lssl+CVhVDp+LcshpP.AFcFlhsFApDNsLpVh-s..h............ 0 18 51 68 +11946 PF12113 SVM_signal SVM protein signal sequence Assefa S, Coggill P, Bateman A anon PFAM-B_2829 (release 23.0) Motif This region is presumed to be a signal peptide sequence found in Sequence-variable mosaic (SVM) proteins [1]. This domain is found in phytoplasmas. This presumed signal sequence is about 30 amino acids in length. 31.70 31.70 34.10 34.10 28.70 27.50 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.46 0.72 -4.12 32 70 2009-07-13 16:58:34 2009-07-13 17:58:34 3 1 15 0 53 69 0 32.70 50 22.02 CHANGED MFKLKsphhll..pIhLFlhLGLhhIsNN.ppVMAM ..MFKLKNphpll..sIhLFhhLGLhhIsNN.ppVMAM.... 0 16 17 31 +11947 PF12114 Period_C Period protein 2/3C-terminal region Assefa S, Coggill PC, Bateman A anon PFAM-B_2037 (release 23.0) Family This domain is found in eukaryotes. This domain is typically between 164 to 200 amino acids in length. This domain is found associated with Pfam:PF08447. 25.00 25.00 49.90 27.10 19.20 19.20 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.43 0.71 -4.70 24 250 2009-07-14 11:56:36 2009-07-14 12:56:36 3 7 74 0 85 198 0 189.60 38 17.03 CHANGED ssDuhSsSS-LhshlLpEDutS............usuSusSsS..s............tpsS................sussoSpoSpYFGShD.SSEsscps+pstssp.....................ptphh+hVhpDPlWlhhssss-plMMTYQlPsRsh-pVL+ED+EKL+php+.QPpFoc-QKcELucVHsWlpptslPptlslpsClsCpspsss..............shctchpphphsch ...................................................................................ppDu.SsSS-LLplL.......Lp..E..DupS....................uT.GSusSuShssstst....................................uoG.Sts.sssost.............................ssuSpSSpoSKYFGShD..SSEssppsppststp....................tcphl+h.VLQDPlWLlhAss-ccVMMTYQlP.s.RshpsVL+ED+E+L+shQ+pQP+Fo--Q+cELupVHsWl...ppssLPptlslp.sClsCpspsps.............................s..................................... 0 10 19 41 +11948 PF12115 Salp15 Salivary protein of 15kDa inhibits CD4+ T cell activation Assefa S anon PFAM-B_2039 (release 23.0) Family This is a family of 15kDa salivary proteins from Acari Arachnids that is induced on feeding and assists the parasite to remain attached to its arthropod host. By repressing calcium fluxes triggered by TCR engagement, Salp15 inhibits CD4+ T cell activation. Salp15 shows weak similarity to Inhibin A, a member of the TGF-beta superfamily that inhibits the production of cytokines and the proliferation of T cells. 27.80 27.80 28.00 30.10 27.60 26.70 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.39 0.71 -3.87 44 93 2009-07-14 12:43:31 2009-07-14 13:43:31 3 1 6 0 7 99 0 128.00 23 94.54 CHANGED lphhh..........lhlhhslsh..................................sptpspsss.........................slttphsphhsssc...phtpplhphCpp.......................ptlssphls..apsCoahCt.........ssttsphphp....Ls-GhPCG....pspsC.ppGpCss..........h.sC ......................................lhhhhshsh........................................s...ttpp................................s.shphPshhsshp....shhspltptCspt...................p.htsIsshphc...hpsCphtCp.......tpspp.ssplohs.............Ls-GpPCu....sppsC..pscCstp.......ssC.............. 0 7 7 7 +11949 PF12116 SpoIIID Stage III sporulation protein D Assefa S anon PFAM-B_2045 (release 23.0) Family This stage III sporulation protein is a small DNA-binding family that is essential for gene expression of the mother-cell compartment during sporulation. The domain is found in bacteria and viruses, and is about 40 amino acids in length. It has a conserved RGG sequence motif. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.64 0.72 -4.05 19 411 2012-10-04 14:01:12 2009-07-14 14:22:57 3 3 388 1 93 317 10 77.20 61 88.65 CHANGED +-Y....IE-Rsl-lupYIlEs+ATVRpsAKsFGVSKSTVHKDlTERL.pINPpLApcV+plL-hNKAERHIRGGpAT+pKYpctc ..............+-Y..IcERslclupYI.l-s.+..tT.VRthAKtFGVSKSTVHKDlT..E..RLPcINPcLApEVKcILDh.....p.....Ku.RHlRGGcATK.KY+c..p................................ 2 48 76 82 +11950 PF12117 DUF3580 DUF_B2046; Protein of unknown function (DUF3580) Assefa S, Bateman A, Coggill P anon PFAM-B_2046 (release 23.0) Family This domain is found in viruses, and is about 120 amino acids in length. It is found in association with Pfam:PF01057. 25.00 25.00 56.30 56.20 22.50 17.60 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.97 0.71 -4.16 6 117 2009-07-14 14:37:12 2009-07-14 15:37:12 3 3 31 0 0 122 0 114.40 62 17.73 CHANGED t+WGKVPDWSENWAEPKlpTPINSlGSh+Ssppos+STPLSQNYALTPLsSDL.tDLALEPWSTPsTPVAsTlpspNTspuGspu.QsuQtSPTWSEIEsDLRACFopEpLcoDFp-..sLD ........HHWGpVP-WsEpWtEPKlQTPINoPss.pslpTos+ooPtspNYshTPlppDL.lsLALEPWSpPsTPshpssptpsst.sssh+....psSPTWSEIEsDlRAhFsppplpps........s.. 0 0 0 0 +11951 PF12118 SprA-related SprA-related family Assefa S, Bateman A, Coggill P anon PFAM-B_2057 (release 23.0) Family This protein is found in bacteria. Proteins in this family are typically between 234 to 465 amino acids in length. There is a conserved GEV sequence motif.Most members are annotated as being SprA-related. 27.30 27.30 27.40 27.40 26.80 27.20 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.48 0.70 -11.99 0.70 -5.09 40 267 2009-07-14 15:26:22 2009-07-14 16:26:22 3 4 261 0 76 193 40 227.00 38 76.53 CHANGED sl.ssuhsshsstssss.sttshtt.t.......................................................................sststsssssspsss..........................................................................s...........ssssstpscp.......................spppsppppttss...........................pp.s..tppphp.p..plpcLppRDpEVRsHEQAHsulGGpaA.GuPoasYppGPDG++YAluGEVsIDlS.lsGDPpATIpKMppVpAAAhAPA-PSutDhpVAApAtpphspA+uELsppcpppssptppspt...........ttpsp.pshp..pppp.phsspsptst................t.lp ............................................................................................................................................................shshhsspsthsp......................................................................................................................................................................................................................................................................................................pppsppppp.p.s.pp.ppspppps.pps.....................p.hs..t.tscL.s.p.pphpQVcELpuhD+pV+AHEtAHtAAG.Gsh.A.GusSFoYp+GPDsphYAsuGEVsIchpp.ussP..EtTIspucQltAAAhAPADPSsQDh+VAAs.AtphphEARAE.ht..c...+s..pcsp..cppppsp.........ttppp.p.ps.t.hpp.p..............s.p....................................... 0 31 54 66 +11952 PF12119 DUF3581 Protein of unknown function (DUF3581) Assefa S anon PFAM-B_2081 (release 23.0) Family This protein is found in bacteria. Proteins in this family are about 240 amino acids in length. 25.00 25.00 47.40 47.10 20.70 18.60 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.34 0.70 -5.12 22 187 2009-07-14 15:30:18 2009-07-14 16:30:18 3 1 183 0 36 129 22 211.70 61 90.92 CHANGED MFLssYaspps..splslospQAScFAKplAsDFNPIHDsDAKRFCVPGDLLFAllLscaGLSppMpFcFsGMVucsltLpF.spssssphslsDppsKsYLcVpRuG-sopspshIEshl+sYVsFSGhNFPHlLlPLMcp+pVMINPsRPLVIYESMSh-Ls...pLDhs.pssLcLsssslcl-GKRGsVsLpFpLhsssclVGoGhKcLllSGLRsY-pcs ...MFLosYaSpps.....p.....phpFTRpQAS+FAKtVAGDFNPIHDcDuKRFCVPGDLLFAllLpK.GlSQKM+FcFoGMVuDGVsLplps.c.spcE.uVsDssGKpYL+hSRcG-Vs+ssAhIEpllpsYVpFSGhNFPHIMVPLME-pQhMINspRPLVIYESMpl-Fo...R...LDls..cP-V-FsGAoh-V-GKRGhVTLsFsh+-sGplVGcGlKRMVsSGL+PYDQt.t............... 0 6 15 27 +11953 PF12120 Arr-ms DNApol_Rpb2_rif; RNApol_Rpb2_rif; Rifampin ADP-ribosyl transferase Mistry J, Gavin OL anon pdb_2hw2 Domain This protein is found in bacteria. Proteins in this family are typically between 136 to 150 amino acids in length. The opportunistic pathogen Mycobacterium smegmatis is resistant to rifampin because of the presence of a chromosomally encoded rifampin ADP-ribosyltransferase (Arr-ms). Arr-ms is a small enzyme whose activity thus renders rifamycin antibiotics ineffective [2]. 25.00 25.00 41.70 41.30 22.50 20.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.17 0.72 -4.07 15 184 2009-11-19 18:25:00 2009-07-14 16:38:57 3 4 154 1 54 158 11 99.60 62 64.61 CHANGED PFaHGTKADLpsGDLLpsGapSNYpcs.+hhNHlYFTuTLDAAsWGAELAs....G-G+sRIYlVEPTGsFE-DPNlTDKKFPGNPTRSYRSpcPLRlVGElTcW ..PFaHGTKAcLtlGDLLssGahSNacss.+hhsHIYFTAhh-uAsWGAELAh............u-G+.sRIYIVEPTGsFEDDPNlTDKKFPGNPT+SYRopcPLRIVGElpDW.......... 0 20 40 51 +11954 PF12121 DD_K Dermaseptin Mistry J, Gavin OL anon pdb_2jx6 Domain This protein is found in eukaryotes. Proteins in this family are typically between 30 to 76 amino acids in length. This protein is found associated with Pfam:PF03032. This domain is part of a dermaseptin protein which is used as an antimicrobial agent. The full protein is almost completely defined in an alpha helical domain. It creates high levels of disorder at the level of the phospholipid head group of bacterial membranes suggesting that it partitions into the bilayer where it severely disrupts membrane packing. 20.90 20.90 20.90 22.10 19.80 20.60 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.06 0.72 -6.41 0.72 -4.01 14 62 2009-07-14 15:43:20 2009-07-14 16:43:20 3 2 15 2 0 61 0 24.50 56 52.07 CHANGED uLWssI....Kphu.pA...AGKAALsAVs .uLWpslh...KphG.cA........AGKAALGAVs....... 0 0 0 0 +11955 PF12122 DUF3582 Protein of unknown function (DUF3582) Assefa S, Bateman A anon PFAM-B_2087 (release 23.0) Family This domain is found in bacteria, and is approximately 130 amino acids in length. It is found associated with Pfam:PF01694. There is a conserved ASW sequence motif. This domain has a single completely conserved residue F that may be functionally important. 22.00 22.00 22.00 22.20 21.50 21.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.48 0.72 -4.03 32 749 2012-10-01 21:59:08 2009-07-14 16:54:12 3 2 745 15 96 340 17 98.80 56 35.72 CHANGED MlclhslsNsRhAQAFlDYhtsps.Ichplhs.....pspu.hslalh.ccpphspsptElppFlpsP...tcs+Y.tASW-sGsTps.phc.Yps..o......hlst..h...hspAGPlTLs .............MhhITSFuNPRlAQAFVDYMATQG.VlLTIQQ.................csQo...DlWLA.DESQAERVRuELARFLENP...uDPRYLAASWQuGcTsS.GL+.Y+Ra.P......FhAsL.....RcRAGPVThl..................................... 0 16 36 66 +11956 PF12123 Amidase02_C N-acetylmuramoyl-l-alanine amidase Mistry J, Gavin OL anon pdb_2ir9 Domain This domain is found in bacteria and viruses. This domain is about 50 amino acids in length. This domain is classified with the enzyme classification code EC:3.5.1.28. This domain is the C terminal of the enzyme which hydrolyses the link between N-acetylmuramoyl residues and L-amino acid residues in certain cell-wall glycopeptides. 21.40 21.40 21.70 26.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -7.91 0.72 -4.32 16 260 2009-09-10 17:34:38 2009-07-14 16:57:45 3 4 95 2 9 151 0 44.60 42 18.36 CHANGED phshpu+llhps..sGLsYh.Tcshss.spLc+hptWLDp+...GWaYEh ...pLthsuphIlps..-GLsYhlT-spss.spLcthTsWLDp+...GWaYEh. 0 0 4 4 +11957 PF12124 Nsp3_PL2pro Coronavirus polyprotein cleavage domain Mistry J, Gavin OL anon pdb_2kaf Domain This domain is found in SARS coronaviruses, and is about 70 amino acids in length. It is found associated with various other coronavirus proteins due to the polyprotein nature of most viral translation. PL2pro is a domain of the non-structural protein nsp3. The domain performs three of the cleavages required to separate the translated polyprotein into its distinct proteins. 25.00 25.00 29.60 100.60 24.50 18.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.02 0.72 -4.41 5 173 2009-07-14 16:00:46 2009-07-14 17:00:46 3 5 66 2 0 152 0 65.90 97 1.12 CHANGED SEEHFVETVSLAGSYRDWSYSGQRTELGVEFLKRGDKIVYHTlESPlEFHLDGEVLPLDKLKSLLS SEEHFVETVSLAGSYRDWSYSGQRTELGVEFLKRGDKIVYHTLESPVEFHLDGEVLSLDKLKSLLS. 0 0 0 0 +11958 PF12125 Beta-TrCP_D D domain of beta-TrCP Mistry J, Gavin OL anon pdb_2p64 Domain This domain is found in eukaryotes, and is approximately 40 amino acids in length. It is found associated with Pfam:PF00646, Pfam:PF00400. The protein that contains this domain functions as a ubiquitin ligase. Ubiquitination is required to direct proteins towards the proteasome for degradation. This protein is part of the WD40 class of F box proteins. The D domain of these F box proteins is involved in mediating the dimerisation of the protein. Dimerisation is necessary to polyubiquitinate substrates so this D domain is vital in directing substrates towards the proteasome for degradation. 25.00 25.00 26.20 26.30 22.50 21.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.00 0.72 -4.35 6 176 2009-07-14 16:02:24 2009-07-14 17:02:24 3 10 86 3 91 159 0 39.60 79 7.41 CHANGED hcYFspWSEu-QVDFVEaLLSRMCHYQHGpINuaLKPMLQ .....lKYF-QWSESDQVEFVEHLISRMCHYQHGHINSYLKPMLQ 0 20 27 54 +11959 PF12126 DUF3583 Protein of unknown function (DUF3583) Assefa S anon PFAM-B_2092 (release 23.0) Family This domain is found in eukaryotes, and is typically between 302 and 338 amino acids in length. It is found in association with Pfam:PF00097 and Pfam:PF00643. Most members are promyelocytic leukemia proteins, and this family lies towards the C-terminus. 22.70 22.70 23.50 24.40 22.20 22.60 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -12.04 0.70 -5.36 3 98 2009-07-14 16:17:51 2009-07-14 17:17:51 3 7 36 0 23 120 0 236.60 58 40.35 CHANGED LDTMTQsLQEQDRTFGAAHAQM+SAIGQLGRARADTEEhIRERVRQVVAHVpAQERELLEAVsARYQRDYEEIAGQLGRLDAVLQRIRTGSALVQRMKLYASDQEVLDMHuFLRKALCpLRQEEPQSL+AsV+TDGFDEFKVRLQDLlSCITQGpDAAVS++ASPEAASTPRD.....PlsTDsPE-l.p......sQhQA.tL......AhVpsVPGAHPVPVYAFShQsPohREEuSNTTTuQKRKCSQT-CPRKlIKMESEEGcEuRLApSSPEQPRPSTSKAVSPPHLDGPPSP+SPVlccElhLPNSNHVTSDsGETEERV..................VVISSSEDSDAENss ................LssMTQsLpEQ-tsFsusHAQM+uAluQLs+sRu-TEELIRtRVRplVsHlpAQE+ELL.EuVpsRYQRDYcEhAucLuRL-AVLQRIRsGusLVpRMKpYASDQEVLDMHuFLRpALs+LRQ.EEPQsLpAsV+TDG..F-EFKlRLQDL.SCIT..QGpsuuss+p.AsPEusssPp-.....shss-....................................................................-sosTssspKR.Ksspsp..CsRKlIKMESpEt.pEsRLApSSPEQPtPSTS+AsSPP+LDG.sss.cSss.tp-h...sssNHssu.s.s...u.Estcpl..................hllusSccsct....s................................................................. 0 3 3 7 +11960 PF12127 YdfA_immunity SigmaW regulon antibacterial Assefa S anon PFAM-B_2114 (release 23.0) Family This protein is found in bacteria. Proteins in this family are about 330 amino acids in length. The operon from which this protein is derived confers immunity for the host species to a broad range of antibacterial compounds, unlike the specific immunity proteins that are linked to and co-regulated with their antibiotic-synthesis proteins. 26.70 26.70 26.90 39.30 25.10 26.60 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.00 0.70 -5.53 22 608 2009-07-15 08:13:42 2009-07-15 09:13:42 3 2 588 0 119 350 208 312.90 69 96.50 CHANGED lhhlllsslhllhlhlhFs.FlPhs..LWIoAhuuGVclulhsLlGMRLRRVsPppIVpshIpApKAGL.....c.lssspLEAHYLAGGNVspVVcALIAAp+As.IsLsFcRAsAIDLAGRDVLEAVQhSVNPKVI-TP........slsAVAKDGIpL+sKARVTVRANIc+LVGGAGEETIIARVGEGIVooIGSussHKpVLENPDpIS+sVLsKGLDuGTAFEILSIDIADlDVGcNIGAcLQsDQAEADKclAQAKAEcRRAMAVApEQEMpAhlpEMRAKVVEAEAEVP+AhApAhRcGpl.GVMDYYphpNlpADTpMR-uIu+tu......cs .......ls.hlllslllllhL.llh.Fo.F.V..P..lG..LWISALAAGV+VuIh.TLVGMRLRRVsPp+llsPLIKApKAGL...........s.losNQLEuHYLAGGNVDRVVDALIAApRAs.I-LsFERAAAIDLA....GRDVLEAVQMSVNPKVIETP........hluuVAhsGIE.VKAKARlTVRANIsRLVGGAGEETIIARVGEGIVSTIGSScpHpcVLENPDsISKTVLsK.GLDuGTAFEILSIDIADVDIG.....KNIGAcLQT-QApADKpIAQAKAEERRAMAVApEQEMKA+VQEM+AKVVEAEuEVPlAMAEALRsGNl.GVhDYYNhKNIpADTsMRsuIuKts.ct........................... 0 66 99 112 +11961 PF12128 DUF3584 Protein of unknown function (DUF3584) Assefa S, Bateman A anon PFAM-B_2142 (release 23.0) Domain This protein is found in bacteria and eukaryotes. Proteins in this family are typically between 943 to 1234 amino acids in length. This family contains a P-loop motif suggesting it is a nucleotide binding protein. It may be involved in replication. 28.70 28.70 28.70 28.70 28.40 28.50 hmmbuild -o /dev/null HMM SEED 1201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.17 0.70 -13.81 0.70 -7.23 10 142 2012-10-05 12:31:09 2009-07-15 13:32:20 3 7 123 0 45 146 29 971.80 22 91.34 CHANGED ILIDoa..lp.GpV.ELsLsGHTplsGTNuuGKTTLhRLlPlFYGEtPS+lVsps.tRcsFs-aYLP+-SShllYEYpR.sG.QLCpsVhspt.uDG+GVpYRFIcuGYcpshFlsupptthhtshossElt+phRpsGhssSphLs.hccYRuIIQN-Rohh...GpcppELRsLAppFSLssu-pslpHIDKlssushsKptch-slKsMlsAIhp-...Dsssssppp...lsppclpsWlpDhpthpulpplpschcclpp-aspLpSlhh+LptL+stlhucppplppcQp-ppstppchcpplcpL--phpEspcchsp-lSstcuclstpcscL-tl-cp+tsapDsDlEphptDh-plPphcs-hpplptplctlpsptpclppca-cppptlcppppcclpshcsc.tpslREp+sct...+sstccshpuLEuphppQhcttppchsccphclcpplschctphsssshosEphtslpts-pclccAcEphppupsshppLpscppph+pcRDpApctLpcusppLpch+sphp-lcpQLhPpusSLlcFL.RsE..AssWccsIGKlIssELLcRTDLsPphspus..s.s..sLaGlpLDLpsl-sP-astsEp......pLRp+Lsth-csLpoApspppptEcpLsplNspl-ptpcslscAppuhcpscpcLpRLpsppcshpc+lppAlpc++ppsppplspLcpplKtLpccppsalcphc-chpEtphppptthpsV.ushDsQlspLpptlputcsss+Aclcphccthcs-LpspslDspsltcLcpclcsLcppIppspsccpclpcYcsahpcsWhp......c+spLttplpslcsuhp-LppcLsptppsh+pctpcLppppcAs-cthsplsEslp.pl+shhcpL.AplphstssspAs....hplu.phcth--hpppccRLtsslcpalE+Fcsll....sc+SGSsLhEsWp+h.ppcsshlu-+ul...p.hch..l.hlpplhsll..ssQpppulh-hsslhusslscFYpsLsshsRRIcohS++Lo+cVssptphEulu-lsVclpo+l-EL-aWssLctFsc...sFcpWpcp..-Ghs-lPc-phssuMpRlsshLss.uuhshulssLl-lElsLpEsGpclhIRN-+QLpcsSSpGhoYLIlhphhlGFsphLRG..cusspIpWPVDELGpLctsNlc+LhshhssNpIolLSAhPssDsclLsLFp++Yhlcc ...................................................Eltlc.GpsphhGTpusGKoTl.Rhl.hFY.st..p+l.l.pp..+cuFpcaYhPhssSaIlYEh.+tsG.....hhpslht.t.spu..tlp.aRFls.tsap.p.....alpppp.....hshs.tphhpphh.t..t.slthothls.hpcYRsII..sstp............t.chh.h..tpaulhp..utpphppI.+.hh.psl.hspchch-hlKp.hlht..h.p-.......sh......ppp.........p.pchptWhp...c.p......h.....p..t..h...........hp.ph.ppltpt........hppl.th.p.plt.thttthtts.p.ttpp...hppp.pchptphpphpcp...hp...p.ppchppchsthptslsthpppLctlpppctta.tpts.Ipphhtc.h.pph.....phppch....pplppphphhspphp-lpstapt...h...hppltpph....p.......p........hpt....hptp.....sph....ppphtpt........tpttcpphpthppph.......ppphp..ttht.ph...p..ppp.ph.......p..p.htph..ch.....p...h....p...phph..pp-th..phth.......hc......ttl..pp....t.p.p....ptp....ph.ptt...h..p...p...h..p....tp.....pph.ptp....hp........pt.t...p....tl.pt....h....p.phpp........h....ptpltplpthL.sp..psoLhpaL.ppp....t.sWcps.lGKVls.c.l.Lhp..p-LpPphstt.....s....t.........shaGlp....l..s....Lpsl.....p.....h.....s.....phhtpcp.......phptp...hpthpp.th.tp....h.p.....p.....t....pp.h....cppl.tt....h.pt....p....lpp....hpp.......ph....s.tp.......t...thc.p..p.t....ch....pphhspp....pp.ppchppthptphpphtpplpphptplpphppphpphhp.t....ccphtphc.....phpt.hpph.sthcpphttlptphpp....tppphppphpthcp.hp...p-L.pupGsD.ttpltphcppltplppplphhcppcs.lhc....Ypc.hppha.........tp.phppphtth.cpthpphppphpthtpphptphpph.ppphpt.ctt.tphp..p.hp.......t.p.hh.p.....tp...h.........s....pt...t.php..........ht...phh.pphpph...h.ph...tt...hhtp...hpphht.phpt.h.......st...th.c....h.t...........t...sct...........hph........h..l.p.hhp.........sp..p.l.p.hp..h..h.pl.tt.hthhtphptcltt.stclsp...th.t.....hthlt....p...thp.h..sp.p..h.t.....h...h..lp.h.t.......th.....t.h..t.......t..thst........hh..htphhp..h.t..tt.....thtltphhclphplhEssp.c.hhhh..cpltpsuSpGhshLlhshh.lhhhphh+t.....ts.....phtlph.hDElGpLp.pNlptlhphhppppI.llsu.P.s.................................................................................................................................. 0 8 23 38 +11962 PF12129 Phtf-FEM1B_bdg Male germ-cell putative homeodomain transcription factor Assefa S anon PFAM-B_2140 (release 23.0) Domain This domain is found in bacteria and eukaryotes, and is typically between 101 and 140 amino acids in length. Phtf proteins do not display any sequence similarity to known or predicted proteins, but their conservation among species suggests an essential function. The 84 kDa Phtf1 protein is an integral membrane protein, anchored to a cell membrane by six to eight trans-membrane domains, that is associated with a domain of the endoplasmic reticulum (ER) juxtaposed to the Golgi apparatus. It is present during meiosis and spermiogenesis, and, by the end of spermiogenesis, is released from the mature spermatozoon within the residual bodies [1]. Phtf1 enhances the binding of FEM1B -feminisation homologue 1B - to cell membranes. Fem-1 was initially identified in the signaling pathway for sex determination, as well as being implicated in apoptosis, but its biochemical role is still unclear, and neither FEM1B nor PHTF1 is directly implicated in apoptosis in spermatogenesis. It is the ANK domain of FEM1B that is necessary for the interaction with the N-terminal region of Phtf1 [2]. 21.10 21.10 21.10 21.20 20.70 20.30 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.92 0.71 -4.88 3 198 2009-07-15 12:34:29 2009-07-15 13:34:29 3 2 71 0 83 159 0 143.60 60 22.68 CHANGED S+lcDAluWYQKKIGAYDQQIWEKSIEQ+pIKGL+NKPKKsGHlKPDLIDVDLVRGSTFAKAKPElPWTSLTRKGlVRVVFFPlFu+WWlQVTShRIFVWLLlLYhLQVIAlVLYsh.......sQp.HIVslSEVLGPIsLMLLLGTVHCQIVSTphs+PssssG ..................p.p-hlsWYQKK..IGAYDQQIWEKSlEQ.ppI.........K......Gh....+NKPKKhuHlKPDLIDVDLlR..GSsFAKAKPE....PWTSLTRKGlVRVVFFPhF.pWW.lQVTSh.hIFhaLLlLYl..hQ.VhAlVLah...........................................hs......lsloEVlGPlhLMLLlGTVHCQIVSTphs+sshss............... 1 14 21 46 +11963 PF12130 DUF3585 Protein of unknown function (DUF3585) Assefa S anon PFAM-B_2156 (release 23.0) Domain This domain is found in eukaryotes. This domain is typically between 135 and 149 amino acids in length and is found associated with Pfam:PF00307. 23.20 23.20 23.30 23.70 23.10 23.10 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.74 0.71 -4.33 25 600 2009-09-11 08:12:16 2009-07-15 13:54:22 3 17 92 0 329 593 0 135.50 35 15.02 CHANGED hhsspplpp-LpplEpp.csLEtc..GVtlE+pLRst.t.............sc......p--pLhh-WFpLlpcKshLlRREuELhhhtpphpLEccpspLcpELRthhuht-tpKTpt-cp+EctLlpcllplVppRstlVcsL-pcRlREpcEDcplcp ..................................................tp.lptplptlEpp.ctl-tc..ustlEctLRstht................................spp......p--plht-WFpLlpcKssLlRc-sE.............Lhhh.t.c..-..hcLE.c+.tpLppELRphh.sh..t.......-.......t.p..KTptc....cp+Ep.tLlpcllplVppRDtLVppL-p.....pRh..p.-tEEDp.ht.t............................................. 0 63 93 196 +11964 PF12131 DUF3586 Protein of unknown function (DUF3586) Assefa S anon PFAM-B_2164 (release 23.0) Family This domain is found in eukaryotes. This domain is about 80 amino acids in length and is found associated with Pfam:PF08246, and Pfam:PF00112. 25.00 25.00 27.50 27.20 24.00 21.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.91 0.72 -10.06 0.72 -3.93 16 127 2009-07-15 13:00:10 2009-07-15 14:00:10 3 3 28 2 21 137 0 77.40 43 21.85 CHANGED supuPoPsP.oTts......PtssslhQhhChDhtCpcGCpcsslPsspCl.hsGGGSshspCssppVhhpsY.pSscCoGs ....................s.s.usTPsPoTpopppA....Pp.sshhsQhhCTDttCppGCcpssls.TspCl.ssuGuSuhssCGspplp.psY.oospCoGs........ 0 16 21 21 +11965 PF12132 DUF3587 Protein of unknown function (DUF3587) Assefa S anon PFAM-B_2181 (release 23.0) Family This protein is found in viruses. Proteins in this family are typically between 209 and 248 amino acids in length. 25.00 25.00 40.90 40.40 23.50 23.40 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -11.48 0.71 -5.08 52 74 2009-07-15 13:11:26 2009-07-15 14:11:26 3 1 7 0 0 73 0 182.50 31 81.44 CHANGED ppstsss.s.s...........pt.hlPhchl..hhhucFlpFt-atphlpshhPptt.s....lptpLap.hST+phpssFlNGKpLpIcYNFDspRhcc-RlLIsl-oLlPlFGGlh...ss.shcpFsolscLpsFVchcVHLstCS.shpa.ASCsC..HLhsspt.t..........pthspP....sssCpps.HFHHaCupHVttWLphaLtssIlh+Esp.ph.ascphsc .....................................htt....................s..phh..h.hsphhph.ph.phht.hhsptt.s....lptpLap..hST+phpssFlNGKpLclcYNFDssRhcc-Rl.LIslcoLhPlFGGlh.......ss...shcpFsolsclpsFlctcl+LstCo.shpa..A.oCsC..+hhspptp....h.....pthhps..........sstCpht.HFHHaCspHVttWLphaLtshIhhpEsp.thhtp................... 0 0 0 0 +11966 PF12133 Sars6 Open reading frame 6 from SARS coronavirus Assefa S, Bateman A, Coggill P anon PfamB-2188 (release 23.0) Family This family is found in Coronaviruses. Proteins in this family are typically between 42 to 63 amino acids in length. 27.00 27.00 63.50 63.30 17.90 16.70 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.93 0.72 -4.29 6 86 2009-07-15 13:35:43 2009-07-15 14:35:43 3 1 82 0 0 15 0 57.60 96 98.29 CHANGED MFHLVDFQVTIAEILIIIMRTFRIAIWNLDVlISSIVRQLFKPLTKKNYSELDDEEPMELDY MFHLVDFQVTIAEILIIIM+TFRIAIWNLDVIISSIVRQLFKPLTKKNYSELDDEEPMELDY. 0 0 0 0 +11967 PF12134 PRP8_domainIV PRP8 domain IV core Mistry J, Gavin OL anon pdb_3enb Domain This domain is found in eukaryotes, and is about 20 amino acids in length. It is found associated with Pfam:PF10597, Pfam:PF10596, Pfam:PF10598, Pfam:PF08083, Pfam:PF08082, Pfam:PF01398, Pfam:PF08084. There is a conserved LILR sequence motif. The domain is a selenomethionine domain in a subunit of the spliceosome. The function of PRP8 domain IV is believed to be interaction with the splicosomal core. 25.00 25.00 34.00 33.00 19.10 18.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.53 0.70 -5.01 14 350 2009-07-15 15:36:11 2009-07-15 16:36:11 3 45 291 12 240 338 7 220.60 76 10.05 CHANGED EsaLNSpNYuELFuNps.paFVDDTNVYRVTlHKTFEGNlsTKPlNGslFILNP+oGQLFLKVIHTSVWAGQKRLuQLAKWKoAEEVsALlRSLPhEEQPKQlIVTRKuMLDPLEVHh.LDFPNIsI+sSELpLPFtuhhKl-KluDlVLKAsEPpMVLFNlYDDWLc..slSsaTAFSRLILlLRuL+lNtE+s+hIL+PshsllTpspHlWPoh.oDppWlcVElpL+DLILsD ........................EPaLsSQNYuELFSNQI.hWFVDDTNVYRVTIHKT......FEGNLT.T.KPINGAIFIFNPRTG.....QLFLKIIHTSVWAGQKRL.GQLAKWKTAEEVAALIRSLPVEEQPKQlIVTRKGMLDPLE...VHL.LDFPNIlI+GSELQLPFQAshKlEKhGDLILKATEPQMVLFNLYDDWLK..oISSYTAFSRLILILRALHVNs-+sKlIL+PDKTslT.psHHIWPoL.oDE-WIKVElpL+DLILsD............ 0 89 137 200 +11968 PF12135 Sialidase_penC Sialidase enzyme penultimate C terminal domain Mistry J, Gavin OL anon pdb_2vo8 Domain This domain is found in bacteria and eukaryotes, and is about 30 amino acids in length. The protein from which this domain is found is a sialidase enzyme which is used by virulent bacteria as a toxin. It is the penultimate C terminal domain. 20.40 20.40 21.20 21.80 20.00 18.50 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.65 0.72 -6.69 0.72 -4.22 23 64 2009-07-15 15:38:49 2009-07-15 16:38:49 3 6 14 1 18 69 0 24.80 37 4.57 CHANGED hsDsElass-pphppNcEIYsLspI ....sDsElYtsEphhphNpElasLspl. 0 2 6 18 +11969 PF12136 RNA_pol_Rpo13 RNA polymerase Rpo13 subunit HTH domain Mistry J, Gavin OL anon pdb_2waq Domain This domain is found in archaea, and is about 40 amino acids in length. It has a single completely conserved residue E that may be functionally important. It is found in the archaeal DNA dependent RNA polymerase. The domain is a 'helix-turn-helix' (HTH) domain in the Rpo13 subunit of the RNA polymerase. This domain is involved in downstream DNA binding, and the entire subunit has also been implicated in contacting transcription factor II B. 25.00 25.00 33.50 33.50 18.50 17.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.13 0.72 -4.26 7 23 2009-07-15 15:39:56 2009-07-15 16:39:56 3 1 23 4 11 17 0 46.10 56 46.09 CHANGED cEtth.EE-EEsEhss....lslpslEhhh+sTElW-pLlpGcl ...............t.EEpcVE.....EssE-EhPp....LSIQDIELLMKNTEIWDsLLsGKI 0 5 5 9 +11970 PF12137 RapA_C RNA polymerase recycling family C-terminal Assefa S, Bateman A, Coggill P anon PFAM-B_2201 (release 23.0) Domain This domain is found in bacteria. This domain is about 360 amino acids in length. This domain is found associated with Pfam:PF00271, Pfam:PF00176. The function of this domain is not known, but structurally it forms an alpha-beta fold in nature with a central beta-sheet flanked by helices and loops, the beta-sheet being mainly antiparallel and flanked by four alpha helices, among which the two longer helices exhibit a coiled-coil arrangement. 26.80 26.80 27.80 27.80 26.40 25.90 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.95 0.70 -5.62 50 1007 2009-07-15 16:36:59 2009-07-15 17:36:59 3 5 973 2 150 632 98 352.20 58 38.01 CHANGED pDIpIHVPYhcsoAQphLhcWYccGLNAFpcTCPsGpslacpatppLpshLt..ss....spssh-pLlppopptpppL+spLEpGRDRLLELNSsus-pAppLs-sItppDs.sspLssFhhplFDhlGlcQE....Dpu-sullLcP.....o-+Mhsss.aPGL..s...E-GhTlTF-RcpALuREDhpFloWEHPhlpsuhDllhou-hGsoululLcNcuLPsGTlLLEhlasl-upAP+sLQLsRFLPsTPlRlLlDppGssLusplsa-shscpLpsls+phApclVpthpsplcpllppu-phupsphptllppAppphpppLssElpRLpALpsVNPsIRp-EI-tLcppppphhshLspAplpLDAlRllVss ................+DIQIHVPYLEcTAQulLsRWYHEGLsAFEcTCPTGRslYDuhhscLIsh...LAuP......pps-uaD-LIcpsRcp+-uLKAQLEQGRDRLLElHSNG.GE+AQtLA-u......Ip.................EQDs...DTsLlsFAhsLFDIlGIsQ-....DRG.-N...hIVLTP.....SDHMLV..PD.FPGL...s..EDGhTI..TFDR-sALuREDsQFITWEHPlIRsGlDLILSGDsGooslSLLK.N..KALPVGTLLlELlYVVEAQAP.......K.........pL.........QLsRFLPPTPlRh.L.L.Dc.....NG.NNLAuQVEFEoFNRQL..su..VNRHouSKL.....VsAV.Qp-VHsllphGEsplEppAcuLI-sA+p-A-pcLouELuRL..c.AL..+.AVNPNI.R.DDElsAl-spRpplhpsLsQ.AuaRLDALRLIVVs.......................................... 0 35 65 113 +11971 PF12138 Spherulin4 Spherulation-specific family 4 Assefa S anon PFAM-B_2198 (release 23.0) Family This protein is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 250 and 398 amino acids in length. There is a conserved NPG sequence motif and there are two completely conserved G residues that may be functionally important. Starvation will often induce spherulation - the production of spores - and this process may involve DNA-methylation. Changes in the methylation of spherulin4 are associated with the formation of spherules, but these changes are probably transient. Methylation of the gene accompanies its transcriptional activation, and spherulin4 mRNA is only detectable in late spherulating cultures and mature spherules. It is a spherulation-specific protein. 28.50 28.50 28.60 36.00 25.90 28.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.97 0.70 -4.95 44 221 2009-07-15 17:01:16 2009-07-15 18:01:16 3 11 122 0 141 236 0 229.90 25 69.40 CHANGED tplllPlYl.YP....tssss.Ws.L.................t......................lsshPsltaslllNP.ssGPGtt.....sDssYssslscL....ptss...spllGY..............................VpT....sY..usRsls-Vtp-lspY.......................................ssW.t....................................................................................................................................................................................................................................................................................................................................................................................................thslcGIFlDE..ssssht...sphsYhpplsshl+s.................................htssthVlhNPGs.hP......tssahs...h..u.DhhVsFEssassa...sshsst....t..hlsshs..ts+hst......llaul..................P.............ssspltshsp.phpptu..uhlalT........shsss....Ypshs..shWpp.........hlsslss ...........................................................h..lhlPhYh.aP...........hsss..Ws.L...........................................hss..ssh..hhlllNP.ssGP........Gtt......sDssahpslspL.................phss....scllGY..............................VpTsY............usRshsplhp-lppY.......................................tsW..................................................................................................................................................htlcGlFhDE....sssph...............stl.sahpplsshl+s.................................hts.s..t.hVlhNPGs.hs..........ssahs........h....uDhh.Vs..FEssassa........tts.sst.........t...hhsshs.....p+hstllasl.....................P.............................t.pphttlhp.thtp.ts..uhlahT......................s....sss.....atshs..shapt.ht....t.................................................................................................... 0 41 82 114 +11972 PF12139 APS-reductase_C Adenosine-5'-phosphosulfate reductase beta subunit Assefa S, Bateman A, Coggill P anon PFAM-B_2202 (release 23.0) Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 112 to 142 amino acids in length. This family is found in association with Pfam:PF00037, and has a conserved FPIRTT sequence motif. The whole beta subunit has the enzymic properties of EC:1.8.99.2. 25.00 25.00 25.50 39.80 23.00 18.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.87 0.72 -3.84 43 261 2009-09-11 09:55:26 2009-07-16 09:44:11 3 10 227 18 53 254 806 83.70 48 62.12 CHANGED lRGYADFsPhGuoshPhRuo-c..IMWTlKFRNG.slKRFKFPIRTTsEG...ohsPhtGhst...ssDL.cophLhoEst...................tthclsssc .lRGYADFsPhGuolhPhRus-s..IMWTlKFRNG.slKRFKFPIRTTPEG...oh.ssa.s.uhst...sscL.csp.lLasEst...................ht...................... 0 25 44 50 +11973 PF12140 DUF3588 Protein of unknown function (DUF3588) Assefa S, Bateman A anon PFAM-B_2205 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 866 amino acids in length, and the family is found in association with Pfam:PF02820. The exact function of this family is not known. 25.00 25.00 25.40 30.90 24.30 24.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.58 0.71 -4.02 15 369 2009-07-16 09:36:54 2009-07-16 10:36:54 3 15 82 0 178 312 0 113.30 42 17.18 CHANGED ssssVslYlNK+sssGPaL-+p+ltpLPsaaGPGssslVLppslQuslssAYpsppVhphLp..ps...GGE..slpApa-GKsaplpLPs...VsosshVh+FLcplCppLp....C-NLFSspPho ..........................................ssp.tlYlN+pstsGPaLs+p+ltpLPpphGPupsslVLpps.lpthIssAacsppVhphLp....ps.....sGp....slpAp.acGcpa...p...hslss...VposshVhcFlcplCppLp....CsNLFusp.h.................... 0 23 35 88 +11974 PF12141 DUF3589 Protein of unknown function (DUF3589) Assefa S anon PFAM-B_2240 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 541 and 717 amino acids in length. The function of this family is not known, 25.00 25.00 50.60 44.50 17.60 17.10 hmmbuild -o /dev/null HMM SEED 498 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.05 0.70 -12.94 0.70 -6.27 31 145 2009-07-16 10:10:52 2009-07-16 11:10:52 3 2 27 0 113 176 2 461.20 29 74.19 CHANGED ppCpp.lphptslplopspshs....ssLcplhspllpp.p....shap-lpshF.tclpcphccphl.cK+Wa+FuGSSVWL-pYGVHaMlSRllYS.pGh+spPhlSLhYsQlFDcNWpElps.hpLllP...................h..sctsac.hp...FPsFLPIPhaashc...ppppaYGPEDPRllLhc.....NphGh-EPllVaNua+cc.ls......................p........ahRoMFhsaPap.p................tp..as+shEL+lcsppctppEKNWTPF..ls..........pt.pshDpalaFlYpassLclL+CsL....ssus......Ccaha+......phsspsplGsLRGGTpLlslsphh...........................thspsRclWlGaARuHlcpCGCGpsMYRPNhllLs+s..................spaclualSShlsFslsl.sWshp.............csh....Cts..tsVLIPNuISpWslpp.........................................t.t..hpDhhTlolShADposcllal+GlLppIhsls................................t...htppppttttsshtlcCAlptSpca ...........................................pCpp.lphptp..lpl.op.t.h...h.p....ssLtplhpplhpp.p...........hhpclpshF....clp.p.............p............pphl.....cK+Wa+FuGSSVWLcpatVHhMlSRllYS.pGt+spPhlSLlauQlFDcsWpElp.s.hpLh.lP................................................................tphphp..hp......aPphLsIPhhhs.p.....tptpahGPEDPRllLhc.....Np.Gh-EPlllFNhhcpp.hp.......................................hRuMFhhhPap.......................................sclhcLplpsp.pht.phEKNWoPFhs................t.tpttcsplaFlYpapsLclLKCsL....ssu.................Cphhac.........t.pppspsGslRGGTpllslsphhs.........................................h.ps+plWlGFsRsHlppC.GCGpphYRPphhllscs......................p.spaplshlSs.lsFslslhsWs.t.................pshCss..tNlLIPNuIu.Wpltp........................................................................p.thpDYhslolStuDtsspllpl+GlLphlhpl...........................................pttpt...pp..hpCsh..u.pa....................................................... 0 17 53 113 +11975 PF12142 PPO1_DWL Polyphenol oxidase middle domain Assefa S anon PFAM-B_2219 (release 23.0) Domain This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length, and the family is found in association with Pfam:PF00264. Most members are annotated as being polyphenol oxidases, and many are from plants or plastids. There is a conserved DWL sequence motif which gives the family its name. 20.60 20.60 20.60 21.00 20.30 20.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.59 0.72 -4.45 39 355 2009-07-16 12:13:27 2009-07-16 13:13:27 3 8 124 8 85 375 0 52.10 52 10.25 CHANGED DhTDsDWLsupF.lFY.DE..NsphVRV+V+DsLD.sc+LGYsY.pcVs..lPWlsuKPssp ....DhTDsDWL-u.uF..hFY.DE..pucLVRV+VRDsLD.spp..LtYsY.Q-Vs..lPWLss+Pss............... 0 16 67 78 +11976 PF12143 PPO1_KFDV Protein of unknown function (DUF_B2219) Assefa S anon PFAM-B_2219 (release 23.0) Domain This domain family is found in eukaryotes, and is typically between 138 and 152 amino acids in length. and the family is found in association with Pfam:PF00264. Many members are plant or plastid polyphenol oxidases, and there is a highly conserved sequence motif: KFDV, from which the name derives. This is the C-terminal domain of these oxidases. 20.60 20.60 20.60 20.60 20.20 20.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.70 0.71 -4.26 32 337 2012-10-01 19:33:20 2009-07-16 13:34:11 3 12 111 0 86 438 1 119.10 40 22.90 CHANGED FPl.sL-cs.lp.spVpRP+.poRoc+E.K--EEEVLVI-GIEh-psh.h.lKFDValN.D-scsss..ssspsEFAGSFlslPHt.+pp.......t+p...hpTsh+LGlo-LLEDLsA-sD-.ollVTLVPRsGs..s.l.pIsGlcI-h ......................FPh.sL-ps.lp..ssVpR.Pp.tsR....occE....K--E..EEVLV.I-GIEhccc..h..VKFDValN..s-sstss..sssssEaAGSFsplPHt..ppt..........ttp......hpoph+huls-LL.-Dlss-s...Dc..ollVolVP+hGs..t.l.slsulpIth................................. 0 9 70 79 +11977 PF12144 Med12-PQL Eukaryotic Mediator 12 catenin-binding domain Assefa S anon PFAM-B_2250 (release 23.0) Domain This domain is found in eukaryotes, and is typically between 325 and 354 amino acids in length. Both development and carcinogenesis are driven by signal transduction within the canonical Wnt/beta-catenin pathway through both programmed and unprogrammed changes in gene transcription. Beta-catenin physically and functionally targets this PQL (proline-, glutamine-, leucine-rich) region of the Med12 subunit of Mediator to activate transcription. The beta-catenin transactivation domain binds directly to isolated Med12 and intact Mediator both in vitro and in vivo, and Mediator is recruited to Wnt-responsive genes in a beta-catenin-dependent manner. 25.30 25.30 25.90 25.90 19.70 17.40 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.72 0.71 -4.94 4 106 2009-07-16 15:03:35 2009-07-16 16:03:35 3 6 38 0 42 87 0 181.20 53 11.28 CHANGED PP-LhQsts.G...RlsYtQpuMNMYTQNQPLPPGGPGLEPPYRPsRsP.MNK.MPsRPNYTuhMssMQ.usMsslMGL-K...QYsMsaKPQPsMPQGQhLRpQLQsp..sQuhIGQ.QlRQMTPNpsYouMQsS....QGYToaGSHMGMQQHsSQsGGMsPsSYGsQsFQuTHPAsNPslVDPpRQLQQRPSGYVHQQAP.uYsHshQsTQRFs.HQs ............................hhpp..........phsY....t..t.p.GhasQNQPLPs..GGPRlD...s..aRP.sRhs..hpK.hssRP..sYsuhh.P.............ssM...su.......lhul-s...p..pp..h.hp.ppQPshsQGQp......L.R.pQ......LQs+..............pQ.......GhhG.Q...p.......l....+Q.hsPs...s...p.Y.GLQsu.......Q....GYTsYsSHhuLQQHs..u...uus..hV..sPSYsupsY.usH..oNPsLhDshRplQQR.PSGYVHQQAP...sYsHsLsuoQ.Rhs.HQ.... 0 2 5 16 +11978 PF12145 Med12-LCEWAV Eukaryotic Mediator 12 subunit domain Assefa S anon PFAM-B_2250 (release 23.0) Domain This domain is found in eukaryotes, and is typically between 325 and 354 amino acids in length. The function of this particular region of the Mediator subunit Med12 is not known, but there is a conserved sequence motif: LCEWAV, from which the name derives. 20.30 20.30 20.30 20.40 20.20 20.20 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.94 0.70 -6.01 7 175 2009-07-16 15:05:44 2009-07-16 16:05:44 3 10 89 0 108 157 0 397.50 46 22.82 CHANGED s-FVQSthLuRRLAYhso++Lu........Lls...............................usppspsIpspsssshsssssspsssppsPhphsFo-ahpCspHRslVhtLSsILQsITLpCPoALVW.............shutscopsph.GSPLD+LPlAPSsLPMPst..ssthNppVRtpLhpsEppIK.RuptsEsRWshDKhQpu......AG.shs+VLpTL-sLDsHsF-RhDpsNSlDoLYs+IFsstpscstcpsp.........................................ppD-ulVplLCEWAVSppR.GcHRAMVVApLL-KRQs-l.sphss-S.....ps.D-K-SlsSGuhsssuhPVFQcVLhpFLDspAPsLs-..soppp+spFsNLVhLFsELIRHDVFSHsAYMCTLISRGDLhs...................Tushop.sususpsssstthc-pshtss.chc..ths-S...D..sp.................hhpchpEctpsS.-tssssp..cstpp-.shphshstshs.sRHh.YspHFPlsQ--s..upH..-CNQRhllL ..........................................................t.EFVQSthLSRRLAYhCsR+Lu.......h.Ls...............................................................................................................sspssch.h.s.........s.ss..shsssss.....stssss.sP.phsFsD.hh....CspH.+.s.lVauLSChLQslhLsCPSALVW...................pYuhscs+............ts.GSPLDhLP.lAPSsLPMPtG..sosh...sQQVRs+lhElEpQIKpRGpAVEsRWS...FDKCQEuT........A.GhTIuRVL+.T.LEVLDpHsF-..R..sD....oNS..l-oLhp+IFh.s.s.K.....DspEhs.........................................ssD.-AV.VsLLCEWAVSsKRsGcHRAMsVAKLLE.KRQuElEAE+.sG.-.S..................EshDEKtSl.uSuSluusoh.PlFQsVLlpFLDT...QAP.Lo-...spo.-..p...E+sEFhNLVLLFsELIRHDVFSHshYhCTLISRGDLuh.....................................................sus.spP.oP....sc..ss--...............K....-.t.cht.........t...t...t..........hs..p....h....h-..sp..............................................h..scs....pspsS.pp..s..pp...c.s..c-p........h.p.hh....s....hs.sRH..lpYsoH.FPI...P...--s.....sSH...ECNQRhllL........................................................................................................ 0 35 41 73 +11979 PF12146 Hydrolase_4 Putative lysophospholipase Assefa S anon PFAM-B_2243 (release 23.0) Family This domain is found in bacteria and eukaryotes and is approximately 110 amino acids in length. It is found in association with Pfam:PF00561. Many members are annotated as being lysophospholipases, and others as alpha-beta hydrolase fold-containing proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.62 0.72 -4.13 137 1506 2012-10-03 11:45:05 2009-07-16 16:11:07 3 13 1223 0 398 8635 1896 80.60 30 22.07 CHANGED GhpLh.hpp.Whs...s..ss......+uslll.h.H.Gh.s.EHus..+..Ypc.luppLst.pGaslauhDpRGHG.pS.....s......G.....p..Ru...h..hssas..ph..l.c..D.lpph...lp .........................................htLahpp.W.s...........ps........s+u.l.lhl.h.H...G.....h....t.....E..H..uu.....R..........Y...p......c.....l....s..c....pL...s..p.....s..G......a..s..V..a..u..aDtRG...HG.+S......s........................G...........+G......h......h..s..s.h.s......ph....V.p.Dltphh.t........................................ 0 130 239 317 +11980 PF12147 Methyltransf_20 Hydrolase_5; Putative methyltransferase Assefa S, Bateman A, Coggill P anon PFAM-B_2243 (release 23.0) Family This domain is found in bacteria and eukaryotes and is approximately 110 amino acids in length. It is found in association with Pfam:PF00561. The family shows homology to methyltransferases. 20.10 20.10 20.10 20.20 20.00 20.00 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -11.83 0.70 -5.55 16 478 2012-10-10 17:06:42 2009-07-16 16:16:36 3 6 461 0 55 393 49 296.00 64 53.99 CHANGED s+hRcFI.ppFspssppsuLhcAD+tGsop-Eu-tLuuPhst.S.psLaWphsRtul+hGGhhScGl+LGhcTGFDSGSTLDYVYRNpspGpG...slG+hID+sYL-AIGWRGIRpRKhHlpELlptAltRL+upGpPVRllDIAAGHGRYVL-ALsststt.....s-uIlLRDYSslNVppGpsLItp+GLssIA+FEpGDAFDtsoLAulpPtPoLAlVSGLYELFsDN-hVtpSLuGLApAlpPGGYLlYTGQPWHPQLEhIARALTSHRsGpAWVMRRRSQAEMDpLVctAGFcKlsQRIDpaGIFTVSlApR ...............................................-KMpsFIsRLYAsps......p+FDYpHEDRsGPSADcaRhLuusPsPhSPhDLsYRhhRtuMKL.hGsaSsGl+LGhsTGFDSGSSLDYVYpNQPQGSs....uFGRLIDKhYLN.SlGWR.GIRQRKs.HLQhLI+Q.AVAcL..+u..cG...hsVRVVDIAAGHGRY...VLDALsscPu.......ls.-I..LLRDYSELNVsQGQtMIAQRGhSups.RFEQGD....A....Fs...sE....-L.uAL......o.P.RPT.L..A.IVSG.LYELFP-.N-.t.V.KNSLAGLAsAIEPG.GlL.IYT..GQPWHPQLEhIAtVLTSHK.D.GKP.WVMRVRSQuEMDoLVccAGF-..KCTQRI..DEWGI..FTVShAVR.............................................................. 1 7 25 38 +11981 PF12148 DUF3590 Protein of unknown function (DUF3590) Mistry J, Gavin OL anon pdb_3db3 Domain This domain is found in eukaryotes, and is typically between 83 and 97 amino acids in length. It is found in association with Pfam:PF00097, Pfam:PF02182, Pfam:PF00628, Pfam:PF00240. There are two conserved sequence motifs: RAR and NYN. The domain is part of the protein NIRF which has zinc finger and ubiquitinating domains. The function of this domain is likely to be mainly structural, however this has not been confirmed. 20.90 20.90 21.60 20.90 20.20 17.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.05 0.72 -4.37 8 130 2009-09-11 05:19:17 2009-07-16 16:52:31 3 11 53 7 70 130 0 88.80 47 12.11 CHANGED hlDARDsshGAWFEAplVsVo+cssscs............-sllYHVKYDDYPEsGVlplssKslRPRARTlLpWc-LcVGpVVMVNYNsDpPKERG ...............................VDs+DpphGAWFEAplspls..pcs.sst-........................................EDlIYHlKYDDYP.EsGl.lphss+..DlRsRARTll+Wp-LpVGpVVMlNYNs-sPcERG....... 0 14 18 36 +11982 PF12149 HSV_VP16_C Herpes simplex virus virion protein 16 C terminal Mistry J, Gavin OL anon pdb_2k2u Domain This domain is found in viruses, and is about 30 amino acids in length. It is found in association with Pfam:PF02232. This domain is the C terminal of the HSV virion protein 16. This protein is a transcription promoter. The C terminal domain is the carboxyl subdomain of the acidic transcriptional activation domain. The protein binds to DNA binding proteins to carry out its function. Such proteins include TATA binding protein, CBP, TBP-binding protein, etc. 20.10 20.10 20.60 61.80 17.70 18.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.05 0.72 -7.38 0.72 -4.27 2 19 2009-07-16 15:54:40 2009-07-16 16:54:40 3 1 10 3 0 46 0 30.10 85 6.14 CHANGED DsssYuuLDhADFEFEQMFTDALGID-FGG .DuAPYGALDMADFEFEQMFTDALGIDEaGG 0 0 0 0 +11983 PF12150 MFP2b Cytosolic motility protein Mistry J, Gavin OL anon pdb_2bjq Domain This domain family is found in eukaryotes, and is approximately 50 amino acids in length. These proteins are found in nematodes. They complex with MSP (major sperm protein) to allow motility. Their action is quite similar to the action of bacterial actin molecules. 18.80 18.80 22.30 21.50 18.50 16.00 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.36 0.70 -5.75 3 68 2009-07-16 15:56:21 2009-07-16 16:56:21 3 4 10 3 60 63 0 286.80 43 80.89 CHANGED NPPAKEDTWAFQPIGSPFP-uPVKshGQQNMYVALWYKHGKPIHGRAWNNGGVVECSFPYNKAELTGKpDLGGQIQVLQYKGDHNSLGYWYEWIKYKDRaEKhDct+ElVRCGDSFPILWpcRsGGsLLGYVDNKTElAhFSasGKV-KhpGuALANMhIIsRNLhGGPPaC.Ct.CtusPPsP..llRVpcDEWhDlRpGDPWPTptssV+ALGRoLDTLPGsNPDQYVALWYppGEPVMGRlW.N-GGKlAAsFGWsNpEYs.pcVGSIQLLhhLP-SlRGFDYcWhPFPEAAQFGsKtWpPVHVspHKGNISPuVLplscGKEILGKlDlRNEKAohGYGGKEKlhTGsA.VcsshVLCRKAKA ................................EDpWAatsIGuPFPssPV+s.sppNhYVALWYKHGK.PIHGRu.W..NN.sGVVE.CSFsYpcs..ELosttpLtGQIQlLpY...h.....Gsaps.GFWY-WlpaKc..R.hp.p..-tttph.l.RCGsShPl.hh.p+.ttslLGalD.pTE.AhFu..sGps.p..Gst.hsshhllsRNhhst.st.s.p..t.t......sPPs.s.................+lh..hD-WhDhR.GDsaPstt....V.Ahs+sLsT.sGsp..QYVALWYh.pG.c.P.VhGRsW.sps.GKltAsFuh.spEas.tplGSlQlLhplsspshGa-YsWhsa...............s-uspast..K.ahP.VHl....G.hussll.....tG+plLuplDh+pEphs.huatGKEp.h.Gss.stshhVLCR+th................................. 0 22 28 60 +11984 PF12151 MVL Mannan-binding protein Mistry J, Gavin OL anon pdb_1zhq Domain This domain family is found in bacteria, and is approximately 40 amino acids in length, There is a single completely conserved residue G that may be functionally important. The domain occurs in two types of proteins. In mannan binding proteins [1], it forms a homodimeric molecule which complexes into a homo-octamer. In thiamidases it occurs without repeats but in the presence of other domains. MVL is distinct amongst other oligomannoside binding proteins in that it exhibits specificity for certain tetrasaccharides. Each molecule of MVL has four distinct carbohydrate binding sites. 25.00 25.00 28.20 27.20 21.50 19.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.29 0.72 -7.89 0.72 -4.29 16 75 2009-07-16 15:56:42 2009-07-16 16:56:42 3 5 57 32 26 83 0 38.30 38 13.12 CHANGED sspApussPshCushGup..WsGQWssTsssuh.....oVsshp .......tApAssPohCushGGp..WsGQassssssu.t...puspshp.... 0 5 13 21 +11985 PF12152 eIF_4G1 Eukaryotic translation initiation factor 4G1 Mistry J, Gavin OL anon pdb_1rf8 Domain This domain is found in eukaryotes, and is about 80 amino acids in length. It is found in association with Pfam:PF02854. This domain is part of the protein eIF_4G. It binds to eIF_4E by wrapping around its N terminal to form the eIF_4F complex. This complex binds various eIF_4E-BPs (binding proteins) to regulate initiation of translation. 18.80 18.80 19.40 19.70 18.10 17.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.59 0.72 -4.08 20 155 2009-07-16 15:57:30 2009-07-16 16:57:30 3 6 127 1 110 160 0 75.70 36 6.16 CHANGED stpstsspsss.p.ohSphLppLcpApsI...-DlhshpYPpslpuPDschppss....hKYcYsPsFLLQF+cllphpsDsp ...................t..t..sspssp.sspsSthhpsLpoA+hl...-.DltphsYPt..uIpuPsstlNtss....tKa+YstpFLLQFpsVhp-+Ps..h.... 3 33 64 96 +11986 PF12153 CAP18_C LPS binding domain of CAP18 (C terminal) Mistry J, Gavin OL anon pdb_1lyp Domain This domain family is found in eukaryotes, and is approximately 30 amino acids in length, and the family is found in association with Pfam:PF00666. CAP18 is a protein which is derived from rabbit granulocytes. It has two domains, an N terminal DUF and a C terminal Gram negative LPS binding domain. This domain is the C terminal domain. 20.50 20.50 20.70 22.80 20.40 19.90 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.98 0.72 -7.10 0.72 -4.30 11 48 2009-07-16 15:57:40 2009-07-16 16:57:40 3 1 33 6 9 49 0 28.10 49 16.48 CHANGED uchLpKutEKIuctlK+IGQKIKDF..Ftp .GshhRKupEKIGcclK+IGQ+IKDF..htp.. 0 1 1 1 +11987 PF12154 HCMVantigenic_N Glycoprotein B N-terminal antigenic domain of HCMV Assefa S, Bateman A, Coggill P anon PFAM-B_2260 (release 23.0) Family This domain is found in viruses, and is approximately 40 amino acids in length. The domain is found in association with Pfam:PF00606. There are two conserved sequence motifs: SVS and TSS. This family is the amino-terminal antigenic domain of glycoprotein B of human cytomegalovirus. 25.00 25.00 25.40 35.10 17.70 16.50 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -7.85 0.72 -4.05 3 111 2009-11-24 12:46:55 2009-07-17 10:42:19 3 2 5 1 0 89 0 36.30 76 11.90 CHANGED HHSSHTosAA+SQSGSVS.QHVTSSQTVSHcsNETIY .HpuSHTopsspuQotSVSSQ+VTSSEAVSHRANETIY. 0 0 0 0 +11988 PF12155 NADHdh-2_N NADH dehydrogenase subunit 2 N-terminal Assefa S, Coggill PC, Bateman A anon PFAM-B_2270 (release 23.0) Family This domain is found in eukaryotes, and is approximately 90 amino acids in length. It is found associated with Pfam:PF00361. All members are annotated as being NADH dehydrogenase subunit 2, and this region is the N-terminus. 25.00 25.00 26.10 26.10 18.40 18.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.08 0.72 -3.81 2 221 2009-07-17 10:13:53 2009-07-17 11:13:53 3 1 205 0 0 181 0 85.30 93 47.06 CHANGED MELTLGLIILIVLhYGLKAPTLRLAhlhhGhl....LLhtspLLshTQAIKMLVhLSGLAlLCMLDHphSaRSSSLLILLVILGNLLL MELTLGLIILIVLTYGLKAPTLRLAMLLAGA......V.....G.AAGLL.AEPHLLCWTQAIKMLVMLSGLAILCMLDHRTSHRSSSLLILLVILGNLLL 0 0 0 0 +11989 PF12156 ATPase-cat_bd Putative metal-binding domain of cation transport ATPase Assefa S anon PFAM-B_2287 (release 23.0) Family This domain is found in bacteria, and is approximately 90 amino acids in length. It is found associated with Pfam:PF00403, Pfam:PF00122, Pfam:PF00702. The cysteine-rich nature and composition suggest this might be a cation-binding domain; most members are annotated as being cation transport ATPases. 27.00 27.00 27.30 27.00 26.80 26.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.55 0.72 -3.45 88 725 2012-10-03 05:12:49 2009-07-17 13:04:01 3 7 717 0 192 664 62 83.30 30 10.60 CHANGED tsCaHCGtsl.s..tstphpsplpGpp..........+thCCsGCpuVspsIhcsGLssYYchRspsutpspt......tp.hp...phphaDps..-lpppFVppp..ssts .......pCaHCs.sh......tp..t.hphchtGpt..........+.hCCsGCpuVhphlh-u.GLssYYctcsssuhp..stt....h.pp..hp....phthhD....pl.tpalp.t.t..tt......................... 0 60 126 165 +11990 PF12157 DUF3591 Protein of unknown function (DUF3591) Assefa S, Bateman A, Coggill P anon PFAM-B_2298 (release 23.0) Family This domain is found in eukaryotes and is typically between 445 to 462 amino acids in length. Most members are annotated as being transcription initiation factor TFIID subunit 1, and this region is the conserved central portion of these proteins. 25.00 25.00 37.20 25.90 16.40 24.30 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.52 0.70 -6.09 20 386 2009-07-17 12:30:32 2009-07-17 13:30:32 3 36 271 0 294 397 1 387.00 32 30.51 CHANGED FNI...SNDctY-hLKcsapsKVRuTluslslEHShPAh+LQhPaYKlcLs+p-hRuFHRPshshp.......Pssphs..Fs+lKhpK+K+c+......uKslpElFp..sopDLohsD.susllLhEYSEEhPhhLSNFGMuoKlINYYR+pstcDssRPKhc..lGETplLtspD+SPFh.FGpV-PGchVPTLaNNMhRAPlFKHcscsTDFLllRSos...sssuscaYLRsIsplFsVGQpaPs.sEVPGPHSR+VTshsKNRL+MllYRllp....+stp++.....lplcsls+HF.PDps-hQNRQ+LK....EFhcY.pRcu..t..pGaWclKps-slPsE.cslRsh.....loPE-lChl-SMpsGtppL.cDuGYspssch............tc-tct............thslEppLAPWssT+NFlsAopGKAMLplHGtGDPTGpGEGFSFl+T.SMKGGFh.psscsspspstpshstpch.p.puuH.YNVupQQ+hY...--EIc+lWptQppSL ....................................................................aNl...SNDphY..h..pp........t...l....+...s...shu.s....lpHShPAlcLp.Paa.s.chs..chRpFHRP.hphh....................st.t.h......h.thhc+.Kt.p.........................................upsstphFh....pspD.LohpD..usllLhEYsEEtP.lhsphGMus+lhNY.........Y...........+...............+c.s.s..cD.............s........s.....h..s.chc...............h..GEs.hh..ht..........s...SPF..hG...p...lcPGph..l.......slpNshaR.A...........Pla................H................c...............p.....TD..FLllRopp......................................taa...lRpls.....pla.slGQph......P...hEVPuP.pS++ssshh+shL..ph.hhYRlhh...........ps..p.pc..............lphp-lpct..F...Ps..p...s..-.p.Rp+LK................hhpa.....p+..s.s......t..........pshWhl+.s..s....plPsE.-plRsh...lsPEp..........sChh.SM.suppcL.cDsGasppt..h............................................pp...ttpp......................................ths.E.phAPWssT+sFlt.....Ah...p...u..........K....s...h...LplpG.uDPT.G.p..G..pG..FSal+h.s.Kss........t...t..tp.....p......................t.............t........l...ss..pchc.tthp..sstp..pha...--pIp+l.phph.s...................................................................................... 0 118 175 242 +11991 PF12158 DUF3592 Protein of unknown function (DUF3592) Assefa S, Coggill P, Bateman A anon PFAM-B_2016 (release 23.0) Family This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 150 and 242 amino acids in length. 29.00 29.00 29.00 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -11.43 0.71 -4.41 88 745 2009-07-17 13:18:40 2009-07-17 14:18:40 3 7 584 0 211 607 46 147.30 21 83.87 CHANGED hhhhhhh......shhhhshuhhhhhhshhhh.......................................hth.psh................................................................................hpspGplhshp.....................t.tptppsstapsh.lpaphs......sG..........p.......php....atps.......ssp.st...........ht.....hGp.pVsVh.........YsPssPpp..................uhlp..............................................................................................h...........thashshlhhh.........................................................................................................................................................................................................................................................................Gh........hhh.....hsGlhhhht .....................................................hh.......hhhhh..sl.G...lh..l.llhu.shhhhtp.........................................hphscss................................................................................hcspG.hl.l.css................pp.pp.ps.s.....p.ss.sah..P....s..ltapss......ss.............p.....plt...........asss...........hsushhp.........................................s......tGc.pVs.VaYsPscPpp..................Ahls.....................................................................................................................psh....h.............htashhslhhh..............................................................................................................................................................................................................................................................................................................Gh..........lhh...hhGh.....hh.............................................................................................................................................................................................................................................................................................................. 0 73 134 179 +11992 PF12159 DUF3593 Protein of unknown function (DUF3593) Assefa S, Coggill P, Bateman A anon PFAM-B_2028 (release 23.0) Family This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 98 and 228 amino acids in length. There is a conserved LHG sequence motif. 25.00 25.00 43.70 43.70 18.80 18.60 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.10 0.72 -4.20 34 137 2009-07-17 13:37:51 2009-07-17 14:37:51 3 4 116 0 75 140 115 91.60 48 58.03 CHANGED hs.ssLFuLSLlPYLsFLaaL....p+spp.hPpLslhGFthTLlFVulTIssulhApphaG.spLusVDhLHGuAEuFLTLSNhllllGh..hpthpptp ......h..ssLFAlSLhPYLuFLaaL.....s+opp..hPpLslhGFahlLlFVusTIsAuIhAphtYG.psLusVDaLHGuAEuhLTLoNlllVLGhppultp.h..................... 0 24 48 68 +11993 PF12160 Fibrinogen_aC Fibrinogen alpha C domain Mistry J, Gavin OL anon pdb_2jor Domain This domain family is found in eukaryotes, and is approximately 70 amino acids in length, and the family is found in association with Pfam:PF08702. This domain is the C terminal domain of fibrinogen in mammals. The domain lies in the C terminal half of the alpha C region in these proteins. The function of the domain is that of intramolecular and intermolecular interactions to form fibrin. 20.50 20.50 22.10 20.90 20.20 20.00 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.46 0.72 -4.51 12 67 2009-07-17 15:11:56 2009-07-17 16:11:56 3 4 38 2 24 74 0 68.70 47 10.34 CHANGED usEKV..TSu...uToTT++SCSKTlTKTVl.GPDG++EslKEVlsS-DGSDCu..cs.cls.....hsHsF...pGuLD-hp ...........upEKVTSu...uToTTR+SCSKTlTKTVl.GPDG++EVsKEVlsS-DGSDCs..-sh....-Ls.........hhcsF...tGshDth...................... 0 1 2 8 +11994 PF12161 HsdM_N HsdM N-terminal domain Bateman A, Assefa S, Coggill P anon PFAM-B_2036 (release 23.0) Domain This domain is found at the N-terminus of the methylase subunit of Type I DNA methyltransferases. This domain family is found in bacteria and archaea, and is typically between 123 and 138 amino acids in length. The family is found in association with Pfam:PF02384. Mutations in this region of EcoKI methyltransferase Swiss:P08957 abolish the normally strong preference of this system for methylating hemimethylated substrate [1]. The structure of this domain has been shown to be all alpha-helical. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.20 0.71 -3.74 142 4604 2009-07-17 15:36:28 2009-07-17 16:36:28 3 25 2890 12 944 3684 555 134.60 20 25.85 CHANGED lpstlWshA.-hLRss..h...ssscY...tchlLsllFl+hls-hhptp....ppph................................................hh...t............hh.......ss............phpa...sp............lh.p.....................ss......ltptlpp.............hhptlppp........................lpslh..p...ph.t.t...........psphLtpllpth ...............................................ppplWphs.-hLRus.....h.sssca...pphllsllFlKhls-phppt...........hpch..................................................................................................ht.th.t.........................ah..l......st...................phpa.......ps.....................htpp......................................ttps......lhphlsp................hhptlcpt....s.............................s.........hculh..c...ph.p.....t.t..........tps..phLtpllph............................................................................................................................................. 0 338 667 826 +11995 PF12162 STAT1_TAZ2bind STAT1 TAZ2 binding domain Mistry J, Gavin OL anon pdb_2ka6 Domain This domain family is found in eukaryotes, and is approximately 20 amino acids in length, and the family is found in association with Pfam:PF02865, Pfam:PF00017, Pfam:PF01017, Pfam:PF02864. This domain is the C terminal domain of STAT1. This domain binds selectively to the TAZ2 domain of CRB (CREB-binding protein). In this process it becomes a transcriptional activator and can initiate transcription of certain genes. 21.60 21.60 24.80 28.00 19.40 19.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.35 0.72 -6.82 0.72 -4.48 8 87 2009-07-17 15:38:57 2009-07-17 16:38:57 3 8 48 1 30 65 0 24.20 65 3.35 CHANGED SRLQ..-NhhPMSPD-acELcRhVu ..SRLQoTDNLLPMSPEEFDElSRhVG........ 0 5 7 12 +11996 PF12163 HobA DNA replication regulator Mistry J, Gavin OL anon pdb_2uvp Domain This family of proteins is found exclusively in epsilon-proteobacteria. Proteins in this family are approximately 180 amino acids in length. The structure of HobA is a modified Rossmann fold consisting of a five-stranded parallel beta-sheet (beta1-5) flanked on one side by alpha-2, alpha-3 and alpha-6 helices and alpha-4 and alpha-5 on the other. The alpha-1 helix is extended away from and has minimal interaction with the globular part of the protein. Four monomers interact to form a tetrameric molecule. Four calcium atoms bind to the tetramer and these binding sites may have functional relevance. The function of HobA is to regulate DNA replication and its does this by binding to DNA-A, but the exact mechanism of how this regulation occurs is purely speculative 20.20 20.20 20.80 79.20 19.80 19.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.12 0.71 -4.99 15 195 2009-07-17 15:52:56 2009-07-17 16:52:56 3 1 194 6 22 95 3 176.60 56 99.59 CHANGED Mp-FlpWoLEsIRc-..puhhu....WhEE+RhEWsPLlAspLcaLL-.GtohIVlTD-cRcWFpsYhlspINpsp.psRP.hLPhhsL+ulas.hcslpss..EsIsLlpDMLsIuFsNsYhaaYIGK.usct+upIAKs+-sSahWlFDEphQNSFhLsSpD-tLDhKLLpLa+LF-cSLsAsLFu+VsL ..................MsDFLoaTLEsIRss..Gs.FMA.....WLEpRRlEWAPLhAuRL+aLLE.G+TFlLhsDEpRuWaEEYhLpNIN.o+.ssRP.hLPFVS.LsSLhc..++.lpsp...E..DIALLsDhL-ISFPNGalaFYIGp.uoDs+ShIAKS+DDShLWLFDEQ.LQDSFYLsS+Dc.cLDlKLIoLY+LFDsSLDAlLFuKVpL.... 1 5 18 22 +11997 PF12164 SporV_AA SporV_proteinAA; Stage V sporulation protein AA Mistry J, Gavin OL anon pdb_3g74 Domain This domain family is found in bacteria - primarily Firmicutes, and is approximately 90 amino acids in length. There is a single completely conserved residue G that may be functionally important. Most annotation associated with this domain suggests that it is involved in the fifth stage of sporulation, however there is little publication to back this up. 25.00 25.00 28.10 27.70 24.10 23.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.96 0.72 -4.00 17 224 2009-07-17 15:54:56 2009-07-17 16:54:56 3 1 222 8 32 178 4 92.40 35 44.56 CHANGED MppplYl+h++clpls.ppplpLtDlApltusp.slhp+Lpshslh+l...opcc+phhVlslhclIptI+pp.hPsl-lQslGtspsIVElphpK+ps ......ppslYl+hRpRlcVp.shcVpLGDlAQltuss.slhcpLpsh....hlY+l................sscD+s..+lVI.DlhKlIchIp....pp.hsplplp..hlGsupTlVElhhcK+ps................. 0 9 22 24 +11998 PF12165 DUF3594 Domain of unknown function (DUF3594) Assefa S, Coggill P, Bateman A anon PFAM-B_2040 (release 23.0) Family This presumed domain is functionally uncharacterised.This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00628. 21.00 21.00 22.10 21.20 20.60 20.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.94 0.71 -4.54 8 251 2009-07-17 16:40:26 2009-07-17 17:40:26 3 8 37 0 129 263 0 126.50 65 52.68 CHANGED RTVE-lF+DFKuRRuGIl+ALTsDVE-FYpQCDPEKENLCLYGaPNEsWEVNLPAEEVPPELPEPALGINFARDGMpEKDWLSLVAVHSDuWLLSVAFYFGARFuFs+sDRKRLFsMINDLPTVFEVVTG.AcKQsK .........................RTVE-lF+DF+uRRuGll+ALTsDVEcFYp...CDP.E.K.ENLCLYGhPsE.sWEVsLPAEEV.PPELPEPA.LGINFARDGMpcKDWLSLVAVHSDuWLLuVAFYFGAR........hs+s-R.....KRLFsMIN-LPTlFEVVTGt.pt.......................... 0 20 83 110 +11999 PF12166 DUF3595 Protein of unknown function (DUF3595) Assefa S, Coggill P, Bateman A anon PFAM-B_2166 (release 23.0) Family This family of proteins is functionally uncharacterised.This family of proteins is found in eukaryotes. Proteins in this family are typically between 578 and 2525 amino acids in length. 26.90 26.90 27.10 28.80 24.60 26.80 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.30 0.70 -5.66 24 389 2009-07-17 16:45:50 2009-07-17 17:45:50 3 9 130 0 250 313 3 308.60 33 22.84 CHANGED ptlPFLaELRslhDWshocToLslhpWhplEDIaspla.hpsthps.cph.t.pGpphshhsKhhhGshhlhhllhllahPLhlaSshsss.spsN.lhssshplslssh........shaph........ssphphhsssthtpl.....tt.htppptthp........hhtshstpslptsplsspSsshWslSssuhppllppLpssst.....slphphsap...hpRshsttsshtths..p...............hths.stptsLtphLps..............................sstsl.lsphaspalps.s...sstsp.hpt..............ttpshhshshthhpsptst...............sstaWsls.tpss...t....................t.sttsppsslhhhlhs-cssss.h..uhlsuh..GIlulYsohlLslG+hlR.uhhsstspplhhpchsps-cllplCpsIhlsR......p...ss-htLEptLahcLlplhRSPEpllchTtst ............................................hhlPFLhELRslhDWshos.Tolsl.pWhphEDlaspla...hc..s..ht...pp.......t............h...u.........h.K.hh.Gh.hhhhllhhlhhPlhhhu.hpss..s..N.shpss..hplpltsh.................shath.............t.pl...hp.p.hpph.................h........shp...................altt....a..t.p.-l.h........hphpspSsthWtlssssptphhppL.pst.................th.hhhpap...hpRph...s...tt...h..ht..........................h....t........ttt.lhp.hltt.....................................................ttshhl.phhP.alps.s...tspsp.hp.........................t.thh.shph.....h.tt.........................................taW.hp............................................tttt.l..hhhhs-cs.ss.h....shlssh...GIhuLYhoh..VLslu+.hlR.thhst.shpIha--LPssDcllplCp-IalsR......p...tt-htlEctLahcLlhlaRSPchhlchTp................................................ 0 95 131 192 +12000 PF12167 DUF3596 Domain of unknown function (DUF3596) Assefa S, Coggill P, Bateman A anon PFAM-B_2234 (release 23.0) Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00589. It is likely that this domain acts to bind DNA. 21.90 21.90 22.40 22.50 21.20 21.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.93 0.72 -4.44 60 686 2012-10-03 08:51:45 2009-07-17 17:52:51 3 6 484 0 96 500 13 62.00 47 16.03 CHANGED hsulcsR.....ssp.....LplsFpYpGhRsREph....s..L.sTstNhKpApplhspIptcIphGsFs...YucaF....PpSp ...............PsGVEs+.....Gup.....LRIhF...hY+G.....tRsREsL......G..lsD..Tsc......N......RKhA.G.c.LR.splsatIc.....tGsF-...YucpFPsS.............. 0 16 51 80 +12001 PF12168 DNA_pol3_tau_4 DNA polymerase III subunits tau domain IV DnaB-binding Assefa S, Bateman A, Coggill P anon PFAM-B_2296 (release 23.0) Family This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau. 25.00 25.00 29.50 28.00 21.50 20.70 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.21 0.72 -3.58 16 635 2009-07-20 12:56:15 2009-07-20 13:56:15 3 5 631 0 50 312 0 82.10 63 13.02 CHANGED Rsthp+p.........ptsossKKucsAuup+ht...sSsLERlApl....oppspsp..uspssE.ppstccEsYRW+sopss-.ppspsl.sTPpsLK .........RpQLQRs............QGuTKsKKSEPAAAoRARP..............VNNuALERLASV..............T-RVQARP.....sPSALE..cAPsKKEAYRWKATTPVh....QpKEVV.ATPKALK.......... 0 2 12 31 +12002 PF12169 DNA_pol3_gamma3 DNA polymerase III subunits gamma and tau domain III Assefa S, Bateman A, Coggill P anon PFAM-B_2296 (release 23.0) Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.51 0.71 -4.44 52 3954 2009-07-20 12:56:32 2009-07-20 13:56:32 3 19 3800 30 939 3190 2267 136.90 21 21.86 CHANGED losspV+sMLGLuD+uplhcLh-slhcGDsssuLsthcstYstGADPhslLpsLh-hhHhlohhchss.s.sps.t...hspsEpp+htshAppLuhtsLsRhWQhLLKGhpElppAscPhtAAEMsLlRLsasusLPsPc-hl+pl ..........................phptlpphLGslspshlhpllcAl.hp......s-s..t...pshshlpplh.spG.h-hpphlp-L.......lph.l.+clhl..hph.......s....P....s.ht..................t..s.ttp.p...h.p.ph........A...pp...l...s...s.ppltthh.p.h...l..h...pu.p.p...-lp.h........u..s.s......+hhlEhsllRhht........................................................................................... 1 304 617 796 +12003 PF12170 DNA_pol3_tau_5 DNA polymerase III tau subunit V interacting with alpha Assefa S, Bateman A, Coggill P anon PFAM-B_2296 (release 23.0) Family This domain family is found in bacteria, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00004. Domains I-III are shared between the tau and the gamma subunits, while most of the DnaB-binding Domain IV and all of the alpha-interacting Domain V are unique to tau. The extreme C-terminal region of this domain 5 is the part which interacts with the alpha subunit of the DNA polymerase III holoenzyme. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.67 0.71 -4.25 5 839 2009-07-20 13:12:31 2009-07-20 14:12:31 3 7 835 1 114 517 50 140.40 57 22.10 CHANGED KALEHEKTPELAAKLAAEAIERDPWAAQVSQLSLPKLVEQVALNAWKEESDNAVCLHLRSSQRHLNNRGAQQKLAEALSsLKGSTVELTIVEDDNPAVRTPLEWRQAIYEEKLAQARESIIADNNIQTLRRFFDAELDEESI .......................................................+ALEHEKTPELuAKLAsE.Al...ER...D.s.WAApVupLuLP..KLVEQlAL.N.A.a.K.Ec..s.s.s.s..l.s.LH..LRSoQRHL.N.spuA.QQcLucALSpL..pGssVE..LTI.lED.DNP.As.+TP.LEW.RQAIYEEKLAQ.ARc.SIIADsNIQTLRRFFDAELDEESI...... 1 17 44 81 +12004 PF12171 zf-C2H2_jaz Zinc-finger double-stranded RNA-binding Assefa S, Bateman A, Coggill P anon PFAM-B_2302 (release 23.0) Family This domain family is found in archaea and eukaryotes, and is approximately 30 amino acids in length. The mammalian members of this group occur multiple times along the protein, joined by flexible linkers, and are referred to as JAZ - dsRNA-binding ZF protein - zinc-fingers. The JAZ proteins are expressed in all tissues tested and localise in the nucleus, particularly the nucleolus. JAZ preferentially binds to double-stranded (ds) RNA or RNA/DNA hybrids rather than DNA. In addition to binding double-stranded RNA, these zinc-fingers are required for nucleolar localisation. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.24 0.72 -3.71 140 2095 2012-10-03 11:22:52 2009-07-21 10:15:49 3 234 343 2 1311 3181 32 26.70 31 6.11 CHANGED paaChhCs.+hFpsppsh..psHhpo..KpHK .....aaCthCs..+tFpsppph..psHhpo.....+pHp...... 0 368 611 979 +12005 PF12172 DUF35_N Rubredoxin-like zinc ribbon domain (DUF35_N) Bashton M, Bateman A, Krishna SS anon Pfam-B_1390 (release 4.2) Domain This domain has no known function and is found in conserved hypothetical archaeal and bacterial proteins. The domain is duplicated in Swiss:O53566. The structure of a DUF35 representative reveals two long N-terminal helices followed by a rubredoxin-like zinc ribbon domain represented in this family and a C-terminal OB fold domain. Zinc is chelated by the four conserved cysteines in the alignment. 23.20 23.20 23.20 23.20 23.10 23.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.06 0.72 -3.94 158 1253 2009-07-21 15:56:27 2009-07-21 16:56:27 3 17 452 2 560 1184 271 36.80 28 19.70 CHANGED hpustcscLhhp+CssCGph....h.aPPc.shC..spC.t.ut...shp ............tthtpscLhhp+C..ssCGph....haPP+.shC......PpC.t.ut.p.............. 0 123 340 478 +12006 PF12173 BacteriocIIc_cy Bacteriocin class IIc cyclic gassericin A-like Coggill P anon PfamB_170026 (release 23.0), manual Family This class of bacteriocins was previously described as class V. The members include gassericin A, acidocin B and butyrovibriocin AR10, all of which are hydrophobic cyclical structures [1]. The N- and C-termini are covalently linked, and the circular molecule is resistant to several proteases and peptidases [2]. The immunity protein that protects Lactobacillus gasseri from the toxic effects of its bacteriocin, gassericin A, has been identified. It is found to be a small positively-charged hydrophobic peptide of 53 amino acids containing a putative transmembrane segment [3] - a structure unlike that of the more common immunity proteins as found in Pfam:PF08951. 20.40 20.40 91.60 91.40 20.30 20.30 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.07 0.72 -3.95 3 5 2009-07-23 10:40:23 2009-07-23 11:40:23 3 1 5 0 0 6 1 89.00 48 98.02 CHANGED MVTKYGRNLGLsKVELFAIWAVLVVALLLsTANIYWIADQFGIHLATGTARKLLDAVASGASLGTAFAAILGVTLPAWALAAAGALGATAA ............hs.....huhNhsLsKIEpFsIaAlLV.VALLlssuNIYWIADKFGIHLAPGWYQDlVDaVSAGGSLuTAFAlIlGVTLPAWIlsAAuAFGAsSA 0 0 0 0 +12007 PF12174 RST RCD1-SRO-TAF4 (RST) plant domain Overmyer K, Coggill P anon Pfam-B_8550 (release 23.0) Domain This domain is found in plant RCD1, SRO and TAF4 proteins, hence its name of RST. It is required for interaction with multiple plant transcription factors. Radical-Induced Cell Death1 (RCD1) is an important regulator of stress and hormonal and developmental responses in Arabidopsis thaliana, as is its closest homologue, SRO1 - Similar To RCD-One1. TBP-Associated Factor 4 (TAF4) and TAF4-b are components of the transcription initiation factor complex TFIID. 21.20 21.20 21.20 25.00 21.10 20.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.01 0.72 -4.18 16 135 2009-11-10 14:39:24 2009-07-24 10:38:49 3 7 24 0 80 130 0 69.30 31 11.44 CHANGED scsPp...SPahPFshLhsslspplspsch.llpppYpch+cp+IoRp-hl+plRpIVG.DpLLtpslpplppp .................t..hPp...oPhhs..FshLhshlppplspsc...hphlpphYpch+.....cp+IoRcchl+plRtIVG.DclLhpslpplp....... 0 10 44 65 +12008 PF12175 WSS_VP White spot syndrome virus structural envelope protein VP Mistry J, Gavin OL lg7 pdb_2edm Domain This family of proteins is found in viruses. Proteins in this family are approximately 210 amino acids in length. There is a conserved NNT sequence motif. These proteins are structural envelope proteins in viruses. This is the beta barrel C terminal domain. There is a protruding N terminal domain which completes the proteins. Three of four envelope proteins in white spot syndrome virus share sequence homology with each other and are present in this family - VP24, VP26 and VP28. VP19 is the other major envelope protein but shares no sequence homology with the other proteins. These proteins are essential for entry into cells of the crustacean host. 20.40 20.40 23.30 143.10 20.20 19.60 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.30 0.70 -5.24 3 37 2009-07-27 13:17:05 2009-07-27 14:17:05 3 1 2 13 0 26 0 186.50 49 100.00 CHANGED M-hu.hhuVsuAILAILol.IlVlIVIhVhpIslNK+l-p+s+slhsNhDEhlRlPIsucA+usaF+hs-sSasTcsLGKVtl+.NGpSDu-MK-ccADLVIToVpG..sRTLsVssGpspohEhThKlaNNTuRDIsIsulolsPsl.......NssS+sFsuSKslpSFoPsllccDcluTFVsGATF-AsIssTssupLlsMhhcss.s..clccK .Mch....uVsuAILAhhsl.IhVhIsIhhhp.slsKpl-p+scsl.sNhDE.lRlPlsucstusaF+hs-sSasocsLGKltl+.NGpSDupMK-ccADLVITsVpG...RsLpVssGpshohEhThKVaNNTuRcIsIsuhphsPpl........ssS+sFsuSpsspSFoPsslccDcluTFVsGsTFsAsIssTssup.h..hhp......thtp.................. 0 0 0 0 +12009 PF12176 MtaB Methanol-cobalamin methyltransferase B subunit Mistry J, Gavin OL lg7 pdb_2i2x Domain This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 460 amino acids in length. MtaB folds as a TIM barrel and contains a novel zinc-binding motif. Zinc(II) lies at the bottom of a funnel formed at the C-terminal beta-barrel end and ligates to two cysteinyl sulfurs (Cys-220 and Cys-269) and one carboxylate oxygen (Glu-164). The function of this protein is to catalyse the cleavage of the C O bond in methanol by an SN2 mechanism. It complexes with MtaA and MtaC to perform this function. 25.00 25.00 184.40 28.20 21.40 21.00 hmmbuild -o /dev/null HMM SEED 461 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.49 0.70 -6.07 9 44 2009-07-27 13:17:44 2009-07-27 14:17:44 3 2 29 8 32 42 3 442.20 49 99.39 CHANGED ulK+aTcMAYsSADDMlFGpuhaPVKsGhslcIGAGhshPElNhA..PRstutpSh-cLl+EYc+IssDshpRhlplGhPulllEsEHVpQMopsP-WGuclA+sQ+shME-aHDcYGlKsuLRpTluDIREsc-hhtLR.GDphsshhEuFEpCApNGADhLSlEShGGKEVaDYuIlRNDlsGlLauIGlLGolDMEalWscIucIAKKpssluuGDTDCAQANTAMFlAGGLhsKNluHTlAslARAISAsRSLVAYEsGAlGPGKDCGYENsIlKAIuGVPIutEGKoSTCAHSDlMGNLshpsCDlWSNESVEY+tEFGGTTstsWsEoLuYDCALMNTAhphGppKsL..RDhhhlSD+YRDPQuYlLAYDNAY+lGcAIVK-GcDIYLRuKsAAlcsspIlcEGh.cG+LcLoRFE+cuLsssppslEuLsDDpDcFhp-ClpKYtpcVKsF+PEsYs ..h.hp+aTcMAYssuD-MlFGpuKpPVKsGhGhpIGuGhshPElNhA..P.t..tutpSh-KllpEYccIspslhpRhlplGhPulllEpEHVtQhopNPcWGu-lspsp+slME-YHDcYGlKsuLRpT.uDlREpcch.pLR.G-passhhEuFEtCApsGADhLSlEShGGKEVaDaullRsDhtulLauIGVLGohDMEhlWp-IscIAcKssslsuGDTDCuQANTAMFlAGGLhcKsluHThAslARAIuAsRSLVAaEsGAhGPsKDCGYENsIlKAIsGlPIutEGKousCAHSDlhGNLshtsCDlWSNESVEa+tEhGGsTspsWhEsLuYDCuLMNsAlppGppK.L..RDhhshSD+YRDPQuYlLuYDNAY+IGpAIl+sGcshYLRuKsAAlc.shpllc-us.sG+LpLoRFEpcsLspsppslEuLsDDp-cFhs-slp+YppcVchFcPpsYs.......... 0 8 24 27 +12010 PF12177 Proho_convert Prohormone convertase enzyme Mistry J, Gavin OL lg7 pdb_2ke3 Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF01483, Pfam:PF00082. There are two completely conserved residues (Y and D) that may be functionally important. This protein is the C terminal domain of a prohormone convertase enzyme which targets hormones in dense core secretory granules. This C terminal tail domain is the domain responsible for targeting these dense core secretory granules. The domain adopts an alpha helical structure. 25.00 25.00 28.20 59.10 19.90 18.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.96 0.72 -4.37 3 51 2009-07-27 13:18:22 2009-07-27 14:18:22 3 1 38 2 24 52 0 41.00 76 5.49 CHANGED phKDsEDSLYNDYlDsFYNsKPYKHRDDRLLQALl-Ils-D ..QLKDSEDSLYsDYVDVFYNTKPYKHRDDRLLQALVDILsEE.. 0 1 3 10 +12011 PF12178 INCENP_N Chromosome passenger complex (CPC) protein INCENP N terminal Mistry J, Gavin OL lg7 pdb_2qfa Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. INCENP is a regulatory protein in the chromosome passenger complex. It is involved in regulation of the catalytic protein Aurora B. It performs this function in association with two other proteins - Survivin and Borealin. These proteins form a tight three-helical bundle. The N terminal domain is the domain involved in formation of this three helical bundle. 20.60 20.60 21.90 20.80 18.30 18.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.57 0.72 -7.65 0.72 -4.41 7 80 2009-07-27 13:21:03 2009-07-27 14:21:03 3 2 45 1 33 72 0 37.40 46 4.94 CHANGED spuhtpLhplhspKlt-FlsplDshchlWL-EIppEAt ..t.Gsh+LLElC-pKLhEFlsshDNKDlVWLEEIpEEAt.. 1 2 6 17 +12012 PF12179 IKKbetaNEMObind I-kappa-kinase-beta NEMO binding domain Mistry J, Gavin OL lg7 pdb_3brt Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00069. These proteins are involved in inflammatory reactions. They cause release of NF-kappa-B into the nucleus of inflammatory cells and upregulation of transcription of proinflammatory cytokines. They perform this function by phosphorylating I-kappa-B proteins which are targeted for degradation to release NF-kappa-B. This kinase (I-kappa-kinase-beta) is found in association with IKK-alpha and NEMO (NF-kappa-B essential modulator). This domain is the binding site of IKK-beta for NEMO. 25.00 25.00 25.90 25.70 24.50 18.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.65 0.72 -4.13 9 110 2009-07-27 13:23:03 2009-07-27 14:23:03 3 3 39 6 48 84 0 37.60 47 5.40 CHANGED SE-LltEuhsLCspLEoslp-sh+EQ-pSlhuLDWSWL .....tplltEshshhupLpohlp-....sscEQspShMsLDWSWL.... 0 2 5 14 +12013 PF12180 EABR TSG101 and ALIX binding domain of CEP55 Mistry J, Gavin OL lg7 pdb_3e1r Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. This domain is the active domain of CEP55. CEP55 is a protein involved in cytokinesis, specifically in abscission of the plasma membrane at the midbody. To perform this function, CEP55 complexes with ESCRT-I (by a Proline rich sequence in its TSG101 domain) and ALIX. This is the domain on CEP55 which binds to both TSG101 and ALIX. It also acts as a hinge between the N and C termini. This domain is called EABR. 25.00 25.00 25.00 25.00 19.50 23.70 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.68 0.72 -4.60 6 110 2009-07-27 13:23:54 2009-07-27 14:23:54 3 1 43 2 55 98 0 34.90 50 8.56 CHANGED cppl+cs.-hNppW.pYDppREsYV+uLhs+lKEL ...cpplpc.s.-hNtpW.pYDtpR-tYV+GLhAplhEh.... 0 4 8 21 +12014 PF12181 MogR_DNAbind DNA binding domain of the motility gene repressor (MogR) Mistry J, Gavin OL lg7 pdb_3fdq Domain This domain family is found in bacteria, and is approximately 150 amino acids in length. MogR is involved in repression of transcription of the flagellar gene in Listeria bacteria. This allows a phenotypical switch from an extracellular bacterium to an intracellular pathogen. MogR binds AT rich flagellar gene promoter regions upstream of the flagellar gene. These regions follow the pattern 5'-TTTTNNNNNAAAA-3'. This domain is the DNA binding domain of MogR. 25.00 25.00 83.10 82.90 21.60 20.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.88 0.71 -4.43 4 125 2009-07-27 13:24:16 2009-07-27 14:24:16 3 1 120 2 5 94 0 143.50 71 47.21 CHANGED LLQNMSNsKpNDhpLEu.EFKKIEKQFQVcYEELlDLYNRMVLFQIDIEKpGGMcsYEKSsITWLKSELELLYtVYQFsQRHsLNIhNISKYlSKpELNLF.KTESQLQNTYYKLKKpEIPFENIcKQKPGRKRKYssVKET.sEhKpE .LLQpMSNNKhNDhpLEs.EhKKIEKpFpVcYEELlDLYNRMVLFQIDIEKHGGhRAYEKSsITWLKSELELLYEVYQFsQRHGLNIlNISKYVSKNELNLFPKTESQLQNTYYKLKKcEIPFENIEKQKPGRKRKYhPVKETlVphKpE............. 0 1 3 3 +12015 PF12182 DUF3642 Lipoprotein; Bacterial lipoprotein Mistry J, Gavin OL lg7 pdb_3ge2 Domain This domain family is found in bacteria, and is approximately 60 amino acids in length. There is a single completely conserved Y residue that may be functionally important. This domain is from a bacterial lipoprotein, a major virulence factor in Gram negative bacteria. 22.70 22.70 23.90 23.00 22.30 19.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.60 0.72 -3.55 10 315 2012-10-03 08:47:39 2009-07-27 14:26:22 3 1 274 1 16 161 0 78.20 62 47.50 CHANGED spsoslDGoYsusD.-s....DQlTLploGsoGThTclEsDG-cElcQVss-usNQ+LII...GD.DsKpYphsGNQLsl-DhDc-s ......AQPT-IDGTYTGQD.DG....D+ITLVVTGTTGTWTELESDGDQcVKQVTFDuANQRMII.....GD.D..VKIYTVNGNQllVDDMDRDP................... 1 1 3 7 +12016 PF12183 NotI Restriction endonuclease NotI Mistry J, Gavin OL lg7 pdb_3bvq Domain This family of proteins is found in bacteria. Proteins in this family are typically between 270 and 341 amino acids in length. There is a conserved CPF sequence motif. The type IIP restriction enzyme, NotI, is a homodimer that recognizes the 8 bp DNA sequence 5'-GC/GGCCGC-3' and cleaves both strands of DNA to create 5', 4 base cohesive overhangs. 25.00 25.00 62.30 62.10 19.30 18.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.90 0.70 -5.30 13 53 2012-10-11 20:44:46 2009-07-27 14:38:24 3 1 49 4 17 50 1 226.60 26 80.31 CHANGED ElFGaps..ss.u.hsspppupct.CPFh.spps.KsoKspst.shGVCSl...h..........scsupslIlCPhR.atpchpIhs-suchhhs...................h.tuhhhthtpththssshGcs...sGsEh.ls...t.....lDalLsphss....G+lhDaVulElQslshoGsh+shhpph.............hppspuphuhsW............osstKRllPQLhaKGsllc+.....hstKhshllpcshathlsc.h................lspht.hpsushsa.hlh-hc.sss....t.thpL ........................EhhG.th..s..t..h..hthhtp..CPFh.sppC.K.spKsps....GsCol.....h...................tpssp.hIlCPhR.hhp....chphhhpshphhhs............................................t+p...sGsElpls......t.sth.lDalLsshss....sclhDaVulElQslspTGss.ts.ppphht...........hppstsphuhs.W............psstKRllsQlhhKsplhcp.....hstKhshllppthathhsp.h................htphp..hpttshsh.hhhshc.stp............................................................... 0 3 7 16 +12018 PF12185 IR1-M Nup358/RanBP2 E3 ligase domain Mistry J, Gavin OL lg7 pdb_1z5s Domain This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00638, Pfam:PF00641, Pfam:PF00160. There are two conserved sequence motifs: TFFC and EDF. Nup358/RanBP2 is a nucleoporin involved in ubiquitination of many different protein targets from various cellular pathways. It complexes with Ubc9, SUMO-1 and RanGAP1 to perform this function. This is the ligase domain which binds to Ubc9. 25.00 25.00 25.00 25.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.07 0.72 -4.25 4 96 2009-07-27 15:58:16 2009-07-27 16:58:16 3 30 40 4 59 87 0 62.00 51 4.32 CHANGED Ds.IVaE+cPTsEpcAhAcpLpLPPTFFChhspsD.ospDpscsEDF-otV+KlptcLhspssEt ...-slIVaEtpPTsEpKAhAcpL+LPPTFFChhsts...D..ssE-p..pssEDFpotl+Klptt.h.pspc.t........ 0 8 12 29 +12019 PF12186 AcylCoA_dehyd_C Acyl-CoA dehydrogenase C terminal Mistry J, Gavin OL lg7 pdb_2oku Domain This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF02770, Pfam:PF00441, Pfam:PF02771. There is a conserved ARRL sequence motif. The C terminal domain is an alpha helical domain. The flavin ring of Acyl-CoA dehydrogenase is buried in the crevice between the two alpha helical domains and the beta-sheet domain of one subunit, and the adenosine pyrophosphate moiety is stretched into the subunit junction of a neighbouring subunit, composed of two C terminal domains. 19.80 19.80 20.50 21.20 19.30 19.50 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.37 0.71 -4.22 7 120 2009-07-27 15:59:57 2009-07-27 16:59:57 3 2 119 2 18 101 7 112.90 46 19.94 CHANGED TsGoYlApIR.EapslsssPEhpsLps+LtcMss+a-AssstVpEspDQELlDFhARRLhEMAAchlhuHLLlQDAsKs.D.FtcSApVYlpYupAEl-KapsFIppFst-DLAh .............TNGoYlupl+.E.apphss..usE.hpsLhsRlpcMss+aEsssstVK..EspsQE...l...hDFhARRLhEMAAclIMuaLLlpDAo.KusELFuKSApVYlpaAEuEVpKHtsFIcsFcsEDL........... 0 9 16 18 +12020 PF12187 VirArc_Nuclease Viral/Archaeal nuclease Mistry J, Gavin OL lg7 pdb_2w8m Domain This family of proteins is found in archaea and viruses. Proteins in this family are typically between 211 and 244 amino acids in length. These proteins are nucleases from fusseloviruses and sulfolobus archaea. 19.70 19.70 19.90 20.90 19.60 19.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.27 0.71 -4.89 4 13 2012-10-11 20:44:46 2009-07-27 17:00:09 3 2 13 2 3 15 1 174.20 44 76.43 CHANGED sFK.+YWuNps.-sa.lPs.aLGREYLlhGhLhIpLtpW+.KGLlcFDlYl+.TGlGTLTsVINcpYYpsl.DKYDLTlYh+hK....spYaPhlaIDlTGSSWTEEQSccR.....YGEulYAILSsKVpVApph-VhG.RshFIHYsDpEDKLKhIsALpILNLE+QGK...I+h-caEhsAtSpYYlIPlphWKNls-LRV ....................................FKLKYWGspt.-sYlLPs.hLGREYLllGKL.hIpLupW+.KGhl-aDlYl+.oGlGTLT.NsIN.pYYcslpDKYDhTlYl+sK....cpYaP..LlaIDITGSSWTEEQSccR.....YGESlYAILSsKV-VApcas..VhG.RVaFlHYsDsEDKLKsIoALQILNLE+psK...IKhDcFEpsAtSpYYLIPlphWKNlhELR........... 1 1 1 3 +12021 PF12188 STAT2_C STAT2_Cterm; Signal transducer and activator of transcription 2 C terminal Mistry J, Gavin OL lg7 pdb_2ka4 Domain This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF02865, Pfam:PF00017, Pfam:PF01017, Pfam:PF02864. There is a conserved DLP sequence motif. STATs are involved in transcriptional regulation and are the only regulators known to be modulated by tyrosine phosphorylation. STAT2 forms a trimeric complex with STAT1 and IRF-9 (Interferon Regulatory Factor 9), on activation of the cell by interferon, which is called ISGF3 (Interferon-stimulated gene factor 3). The C terminal domain of STAT2 contains a nuclear export signal (NES) which allows export of STAT2 into the cytoplasm along with any complexed molecules. 20.00 20.00 76.60 75.30 19.70 16.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.65 0.72 -4.17 5 42 2009-07-27 16:00:45 2009-07-27 17:00:45 3 5 23 1 11 33 0 56.00 62 7.09 CHANGED SQPlPEPDLPcDLQQLNT--M-IFRNslsIE-IMPNGDPLLAGQNo.VDEAYlSppS ......SQslPEPDLPpDLRHLNTE-ME...IFRNs.h+IEEIMPNGDPLLAGQNT.sDEsalsp.S... 1 1 1 1 +12022 PF12189 VirE1 Single-strand DNA-binding protein Mistry J, Gavin OL lg7 pdb_3btp Domain This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved IELE sequence motif. VirE1 is an acidic chaperone protein which binds to VirE2, a ssDNA binding protein. These proteins are virulence factors of the plant pathogens Agrobacteria. VirE1 competes for the ssDNA binding site of VirE2. 25.00 25.00 26.10 101.90 17.80 16.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.00 0.72 -3.85 4 9 2009-07-27 16:02:35 2009-07-27 17:02:35 3 1 5 1 3 6 0 62.30 61 95.08 CHANGED MAIIKLpsNKsRsssssEcPpEl.tEEhShsH.ssGFTsLDLcMIELEsFVL+CPLPE-NLAG MAIIKLpANKNRsssssEcPpcl.hEEhSssa.ssGFTSLDLcMIELEsFVL+CPLPE-NLAG 0 1 3 3 +12023 PF12190 amfpi-1 Fungal protease inhibitor Mistry J, Gavin OL lg7 pdb_3bt4 Family This protein family is found in eukaryotes, and is approximately 50 amino acids in length. These proteins are fungal protease inhibitors. 25.00 25.00 25.20 29.30 21.60 21.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.44 0.72 -3.48 2 9 2009-07-27 16:04:19 2009-07-27 17:04:19 3 1 6 1 2 11 0 85.00 68 91.29 CHANGED Csht..YGsLVCGosYCcppPChpP.h...C.pso.aRA+HAGKCACCPACVThLGEsAsCKhYSKELGETPSAlC+-PLKClptVCT+ls .Clhs..YGsLVCGTsYCcpNPChpPshs..C.pso.aRA...pHAGKCACCPACVTLLsEsAsCKhYSKELGETPSAVC+EPLKClptVCTKls. 0 1 2 2 +12024 PF12191 stn_TNFRSF12A Tumour necrosis factor receptor stn_TNFRSF12A_TNFR domain Mistry J, Gavin OL lg7 pdb_2eqp Domain This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 184 amino acids in length. This is the stn_TNFRSF12A_TNFR domain from the tumour necrosis factor receptor. The function of this domain is unknown. 21.40 21.40 21.40 21.50 21.20 21.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.79 0.71 -4.10 2 45 2009-07-28 09:54:12 2009-07-28 10:54:12 3 2 30 4 23 46 0 108.70 54 87.92 CHANGED MsP...RsL.phhV.hhhLVL......ppAsupu.CspGpuaStDLsKCM-CusC.sp.+SDFC.sCsuts.t..F.hlWsIhhuAhulhLllsllShhlhhp+CRR+pKFTTPIEETGucus.ul.LIp ..........................L.thLVhshhLsL.tusuG.ppsPGsuPCucGoSWSuDLD.KCMDCuSC.uRP+SDFChGCuAAPPAs..FpLLWPILGGALSLs..lV.LuLLS.GFLVWRRCRRREKFTT.PIEETGGEGCPuVALIQ...................................................................... 0 7 7 10 +12025 PF12192 CBP Fungal calcium binding protein Mistry J, Gavin OL lg7 pdb_2jv7 Domain This domain is found in eukaryotes, and is approximately 60 amino acids in length. There is a single completely conserved residue C that may be functionally important. This is a calcium binding domain from the fungal protein CBP (calcium binding protein). This protein is a virulence factor with unknown virulence mechanisms. CBP complexes as a highly intertwined homodimer. Each monomer is comprised of four alpha helices which adopt the saposin fold, characteristic of a protein family that binds to membranes and lipids. 25.00 25.00 59.60 58.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.86 0.72 -4.02 5 12 2009-07-28 09:54:30 2009-07-28 10:54:30 3 2 11 2 8 14 0 57.20 47 25.99 CHANGED QPuVscAhs.aNpsVssFpKAouAAuCsWlsCluSLuuoSAACAAAluELGLcPPsDLA .......tPultcAhspaNpsVssFpcAouAAuCsWlsClpSLuuoSAACAAAluELGLs..hDhA...... 0 1 2 5 +12026 PF12193 Sulf_coat_C Sulf_coatprot_C; Sulfolobus virus coat protein C terminal Mistry J, Gavin OL lg7 pdb_3f2e Domain This domain family is found in viruses, and is approximately 70 amino acids in length. It is the C terminal of a coat protein in sulfolobus viruses. 25.00 25.00 38.40 136.40 22.50 17.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.44 0.72 -4.16 3 5 2009-07-28 09:55:13 2009-07-28 10:55:13 3 1 5 1 0 5 0 69.00 86 54.25 CHANGED VSAVLTQYGITGPNRAIYQGFGLKVARALNRIGSGPALVNMINGLKuYYISAFNANPQVLDAVVNIITG VSAVLTQYGITGPNRAIYQGFGLKVARALNRIGSGPALVNMINGLKAYYISAFNANPpVLDAVTNIITG 0 0 0 0 +12027 PF12194 Ste5_C ste5minscaffold; Protein kinase Fus3-binding Mistry J, Gavin OL lg7 pdb_3fze Domain This domain family is found in eukaryotes, and is approximately 190 amino acids in length. This domain is the penultimate C terminal domain from the protein ste5 which co-catalyses the phosphorylation of fus3 by ste7. It is involved in the MAPK pathways. This domain is the minimal scaffold domain of ste5. It binds to the mitogen activated protein kinase fus3 before it is phosphorylated. 25.00 25.00 185.80 185.00 22.50 17.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.08 0.71 -4.67 5 22 2009-09-14 13:25:02 2009-07-28 10:55:46 3 2 21 1 12 24 0 192.70 54 22.33 CHANGED hTSISSILSLKRE+P--LsLlLQIDppKlp.-DshI..lItNoLpALshKFsshphClVDusGhVlshGols+hI.sLsSIus.hc+ssusp+FSPshL+splY..spsIpcNLGIVllSNSuMEpuKSlLFpDY+sFsShGRRRPNELKIKVGYLNsDYSDpIsELVEIuoWsalLEALCYSFuLSFDDDD..DD-Es h.TTISSILSLKRE+PDsLsIlLQIDFpKLKp-csll..llYNSLKALolKFscLphChVDpsshVlsaGslhctIssl-ulss.LcppsuuppFSPhWLKNoLY..PcsI+cpLGIlslSNSsMEscKSlLF.DY+sFsuhGRRRPNELKIKVGY..LNVDYSDKIsELVEsuSWshlLEoLCYSFuLuFD-DD..DD--..... 0 1 6 12 +12028 PF12195 End_beta_barrel Endosialidase; Endobetabarrel; Beta barrel domain of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is approximately 80 amino acids in length.This domain is the beta barrel domain of bacteriophage endosialidase which represents the one of the two sialic acid binding sites of the enzyme. The domain is nested in the beta propeller domain of the endosialidase enzyme. The endosialidase protein complexes to form homotrimeric molecules. 26.50 26.50 28.40 34.90 24.10 26.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.85 0.72 -4.25 3 30 2009-07-28 13:31:40 2009-07-28 14:31:40 3 9 26 18 2 27 19 83.00 73 8.81 CHANGED SRSLHlTGGITKAANQpsATI+IsDHGLFAGDFVNFSNSAVTGVSGNMTVATVIDKNTFTVTTsNsQsSDlNNAG+sWsFGTS .........SRSLRlYGGITKAANQQVAYIRITDHGLFAGDFVNFSNSGVTGVTGNMTVTTVIDKNTFTVTTQNT.....QDVDQNNEGRYWSFGTS....... 0 0 0 0 +12029 PF12196 hNIFK_binding FHA Ki67 binding domain of hNIFK Mistry J, Gavin OL lg7 pdb_2aff Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00076. There are two conserved sequence motifs: TPVCTP and LERRKS. This domain is found on the human nucleolar protein hNIFK. It binds to the fork-head-associated domain of human Ki67. High-affinity binding requires sequential phosphorylation by two kinases, CDK1 and GSK3, yielding pThr238, pThr234 and pSer230. This interaction is involved in cell cycle regulation. 25.00 25.00 29.00 27.90 20.40 19.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.64 0.72 -8.11 0.72 -4.57 5 73 2009-07-28 13:32:34 2009-07-28 14:32:34 3 2 40 1 31 53 0 39.30 63 14.58 CHANGED SVsSQDPTPVCTPTFLERRKSQlhEl..sDD-cDDEIVLKLPl ......oVDSQGsTPVCTPTFLERRKSpVA-h..NDDDcDs.EIVFKpP.... 0 1 2 9 +12030 PF12197 lci Bacillus cereus group antimicrobial protein Mistry J, Gavin OL lg7 pdb_2b9k Domain This domain is found in bacteria, and is approximately 40 amino acids in length. This domain is found in bacillus cereus group bacteria. It is an antimicrobial protein. 20.30 20.30 20.70 20.40 19.90 20.20 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.12 0.72 -4.37 4 37 2009-07-28 13:32:52 2009-07-28 14:32:52 3 1 31 1 9 30 0 38.90 38 34.23 CHANGED s.hhlpSssGsFANSFspsGsTWYhKGK.hpsspGsWsuaYE..s+ ...............pssG.FAssF.t.s....G..hpWYhKG...t.hc..scGpWsGaYE....t.... 0 2 4 5 +12031 PF12198 Tuberculin Theoretical tuberculin protein Mistry J, Gavin OL lg7 pdb_2g6r Domain This domain family is found in bacteria, and is approximately 30 amino acids in length. This protein is a theoretical model of the tuberculin protein from Mycobacterium tuberculosis. 25.00 25.00 83.90 64.50 18.90 16.30 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.46 0.72 -7.30 0.72 -4.06 2 6 2009-07-28 13:34:07 2009-07-28 14:34:07 3 2 2 0 0 6 0 34.00 77 21.36 CHANGED ohtplspsuGutS.u+NGsQhRhlAssGPtSuTh SLASVSSVSGGGSVSRNGNQARFVAPTGPASSTF 0 0 0 0 +12032 PF12199 efb-c Extracellular fibrinogen binding protein C terminal Mistry J, Gavin OL lg7 pdb_2gox Domain This domain family is found in bacteria, and is approximately 70 amino acids in length. There is a conserved VLK sequence motif. It is the C terminal domain of bacterial extracellular fibrinogen binding protein. It contains a helical motif involved in complement regulation. This motif binds to complement and changes its conformation to a form which cannot activate downstream components of the complement cascade. 21.40 21.40 21.50 21.80 21.20 19.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -9.08 0.72 -4.12 2 315 2009-07-28 13:34:20 2009-07-28 14:34:20 3 1 161 12 2 47 0 64.70 69 47.90 CHANGED TshphphppKllpAQphVp.Fc+T+TVusHRKAQ+AVNLlpFpaphcKhhLQcpID.VLK.shl+ ..TshphphppKllpAQphVp.Fc+T+TVusHRKAQ+AVNLlpFpashcKhhLQcpID.VLK.shlK..... 1 2 2 2 +12033 PF12200 DUF3597 Domain of unknown function (DUF3597) Mistry J, Gavin OL lg7 pdb_2gqb Domain This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 126 and 281 amino acids in length. The function of this domain is unknown. The structure of this domain has been found to contain five helices with a long flexible loop between helices one and two. 23.50 23.50 23.50 23.70 23.40 23.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.75 0.71 -3.75 24 206 2009-07-28 13:34:35 2009-07-28 14:34:35 3 7 187 1 69 167 11 119.50 50 74.91 CHANGED IFupIhstIF..........ucutsAsPsusssssssssuss...................suP...ussPsu................ssVDVuslL-thssp.pup+LsW+TSIVDLMKhLslDSSLsuRKELApELGYoGD.psDSAsMNIWLHKQVMpKLAtNGGKlP ...................................................IFspIhstIa.............ucups..A.s.ss...utssss.s...sss.s.sssss.....................uAs......ussPsu............................ssVDVsAlL-shstp...suptL...NW+TSIVDLMKhLGLDSSLptRKELApELGYo.GD..hsDSAoMNlWLHKpVhpKLt-NGGKlP........ 0 8 33 51 +12034 PF12201 bcl-2I13 Bcl2-interacting killer, BH3-domain containing Mistry J, Gavin OL lg7 pdb_2ipe Domain This is a family of pro-apoptotic Bcl-x proteins, B cell leukaemia/lymphoma 2, or BIKs. BIK proteins rely for their activity upon an intact BH3 domain lying between residues 48 and 80, as in UniProt:Q13323. 25.00 25.00 41.10 40.70 21.00 19.40 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.01 0.71 -4.76 2 21 2009-07-28 13:35:07 2009-07-28 14:35:07 3 1 19 0 10 21 0 135.00 52 98.40 CHANGED MSEsR.huRDl.hcTl.a-Ql.pPPsh.Es.uMp-......PhcDhD.hEChEGpstlALRLACIGDEMDlsLRuPRLsQLstlAhHp..LAhhYspT.slRslhRShhcuhTsL+ENIhp.WRs.sPGuWVSs-Qs.htLh.hlhLlh.LLuGuhaL.L ....MSpsRPloRDhhh-TlLaEQl.pP.sh.Es.G.ho-.....tEshsPhc..Dhs.hEChEssDtlALRLACIGDEMDVsLRuPRLAQLstlAMHS..LAhhYsQT.slRsVLRShhcGhTsL+ENIhp.WRs.sstuWVo...........p.s...h.lh.hlhLlh.hh........................................... 0 1 1 2 +12035 PF12202 OSR1_C Oxidative-stress-responsive kinase 1 C terminal Mistry J, Gavin OL lg7 pdb_2v3s Domain This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00069. There is a single completely conserved residue F that may be functionally important. OSR1 is involved in the signalling cascade which activates Na/K/2Cl cotransporter during osmotic stress. This domain is the C terminal domain of OSR1 which recognises a motif (Arg-Phe-Xaa-Val) on the OSR1-activating protein WNK1. 25.00 25.00 26.60 26.10 23.10 22.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.49 0.72 -4.32 16 453 2009-07-29 08:07:16 2009-07-29 09:07:16 3 6 84 3 231 395 0 35.70 50 2.76 CHANGED lpLhLRlc-sKKhtt.+.K-NpAIcFpFslt+DssE-VA .......lpLhLRlcDsKK.tt.+.+-NpuIcFpFslt+.DssE-VA.... 0 35 56 140 +12036 PF12203 HDAC4_Gln Glutamine rich N terminal domain of histone deacetylase 4 Mistry J, Gavin OL lg7 pdb_2o94 Domain This domain is found in eukaryotes, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00850. The domain forms an alpha helix which complexes to form a tetramer. The glutamine rich domains have many intra- and inter-helical interactions which are thought to be involved in reversible assembly and disassembly of proteins. The domain is part of histone deacetylase 4 (HDAC4) which removes acetyl groups from histones. This restores their positive charge to allow stronger DNA binding thus restricting transcriptional activity. 25.00 25.00 26.20 26.20 22.70 24.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.60 0.72 -4.05 6 235 2009-07-29 08:07:40 2009-07-29 09:07:40 3 4 46 8 78 202 0 86.40 69 9.97 CHANGED ssDPslREpQLQQELLLlpQQQQIQKQLLIAEFQKQHEpLTRQHpAQLQEHLK.QQELLAhK+QQELhE+cRKLEQQRQEp........ElE+HRREQQL ........sDPslREpQLQQELLhlpQQQQlQKQLLIAEFQKQHEpLTRQHpAQLQEHl.K................QQ..Eh.........L.A.........hKpQQ.ELL.E..+ppKLEQpRQEQ.........ElE+p+REQpL......................... 0 5 12 36 +12037 PF12204 DUF3598 Domain of unknown function (DUF3598) Mistry J, Gavin OL lg7 pdb_2o62 Domain This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 230 and 398 amino acids in length. These proteins are formed entirely from B sheets which form a barrel structure similar to those seen in the lipocalin superfamily. 21.20 21.20 21.30 21.20 21.00 21.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.69 0.70 -5.15 18 117 2009-07-29 08:08:10 2009-07-29 09:08:10 3 3 73 2 52 123 46 228.00 22 76.76 CHANGED spW-shh.pNLGE...WpGSFTchSPpG...-llpshsSlloLpuhsc.sphl+hslthas............pts.sp-hspcasulu......pphlaFss.GAFSpGshQLuPaucFuuEhGFlpcsRRhRlV.lascsGpLscloLIREahuus....susEpPsLps-p....LLGpWpGcshohhPDh..p.sph..hph.l.......................tsuslpusthha.tspp..spl.hhLPDGsusssPpplph.....cpsFhlElGWLlpssp...hpRllRpYsspGpWhuloLls.p ...............................................................................................................................hh..ph...tt....W.G.ah.hs.tt....p....pph..shhphp..........t..hp.....h.h...............................p........pth...........thhhh.s.Gsh.s.s...s.tht.....t.....................h.hh.....E.hhh..h..pt..s..pRhRh.shh..a........stpG...p.........LpplshlcEppsst..............sstpps.lp...pp.......h.hGpWpGpthohhs.sh.........h...............................................psthpsphh.hh.tt........ph.hhLPsGh.hssP....tplth.....tpsF.lphsWl.pssp...hpRllRsYsspG.hhu.ohhh..t................................................................................ 2 11 33 45 +12038 PF12205 GIT1_C G protein-coupled receptor kinase-interacting protein 1 C term Mistry J, Gavin OL lg7 pdb_2jx0 Domain This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF01412, Pfam:PF00023, Pfam:PF08518. GIT1 plays an important role in cell adhesion, motility, cytoskeletal remodeling and membrane trafficking. To perform this function, it localises p21-activated kinase (PAK) and PAK-interactive exchange factor to focal adhesions. Its activation is regulated by interaction between its paxillin-binding C terminal and the LD motifs of paxillin. The C terminal folds into a four helix bundle. 25.00 25.00 25.20 26.70 22.70 21.40 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.54 0.71 -4.28 5 199 2009-07-29 08:09:01 2009-07-29 09:09:01 3 11 78 1 86 202 0 120.70 62 17.57 CHANGED -u-sDssLPSTEDVIRKTEQITKNIQELLRAAQEsKH-SFVPCSE+IalAVsEMAuLFPKRPssEsVRsSLRLLsSSAsRLQsECKKAsP.-..tssDhphlTQQVIQCAYDIAKAAKQLVTlTT ...............s....sPsLPSTEDVIpKTEQlTKNIQELLRA...AQ...E.s...K.HD.S.al.P.CSE+IHlAVTEMAuLF.P...K+.P..t..p.-.sVRoS...LRLLsuSAYRLQSEC....+Ksl...Ps-s........usssDhQLl..TQ...QVIQCAYDIAKAAKQLVTITT............................................ 0 21 28 49 +12039 PF12206 DUF3599 Domain of unknown function (DUF3599) Mistry J, Gavin OL lg7 pdb_3f3b Domain This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. This domain is the phage-like element pbsx protein xkdh. 25.00 25.00 37.60 119.80 24.20 22.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.54 0.71 -3.83 7 38 2009-07-29 14:17:48 2009-07-29 15:17:48 3 1 33 1 5 32 0 117.30 49 97.21 CHANGED MSYpphLs+RCDIYHLtppcsptt.+aGIPucclp..hpYsDTPDlpslsCYFhEKsp....ollQpEPppslhpSaLlHFPhssDIRhpDKslhc..GhsahLppP.+tl+sHHhEVhAlRcpp .MSYpphLsHRCDIYH.ttptspsG.+FGIPu-clpP.hoYP-oPDhpclPCYFsEKsQ....plIQpEPspslYQpaLVHFPhsADlRlNDKllWc..shsYhLchP.+pl+s.HHhEVhAVR--.p. 0 2 3 4 +12040 PF12207 DUF3600 Domain of unknown function (DUF3600) Mistry J, Gavin OL lg7 pdb_3fgg Domain This family of proteins is found in bacteria. Proteins in this family are approximately 230 amino acids in length. This domain is the C terminal of the putative ecf-type sigma factor negative effector. 25.00 25.00 25.70 40.90 23.30 22.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.06 0.71 -4.70 6 85 2009-07-29 14:18:26 2009-07-29 15:18:26 3 1 85 4 6 63 0 158.60 80 75.11 CHANGED SIMADGIYGSFENLKKHAGsMTLEuYMRFsAKLSEAKDEMGTKEYE.FTKELKKLTNAKLAYGDSNGNIDYDuLSPAKREEMKKVSMuLQPYFDKLNGHKSSKEVLTQEEFDRYMEALMTHEIVRVKTKSTGAIKVEEVPEAYKERFIKAEQFMEYVDEKVR ..SIMADGIYGSFENLKKHAGsMTLEAYMRFsAKLSEAKDEMGoKEYE.FTKELKKLTNAKLAYGDSNGNIDYDtLSPsKREEMKKVoMsLQPYFDKLNGH..KSSKEVLTQEEFDpYMEALMTHEIVpVKTKSTGuIKVEElPEAYKERFIKAEQFMEYVDEKVR........ 0 1 4 5 +12041 PF12208 DUF3601 Domain of unknown function (DUF3601) Mistry J, Gavin OL lg7 pdb_3g1j Domain This domain family is found in bacteria, and is approximately 80 amino acids in length. 21.00 21.00 21.00 24.30 20.50 18.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.55 0.72 -4.04 6 50 2009-07-29 14:46:14 2009-07-29 15:46:14 3 1 32 2 6 48 0 82.00 55 54.34 CHANGED s..phLhsGshV+VIKsF+DYhGhTapsGEpaaFAsshahPY-sGaTLYI..up-K....sIhLpsps-sptcIhsps.cEYFE.lp ..sF.cYLYSGDYlKVIKsFKDYYG.FTHpsGEcaYFAstYaL.aEsGhsLYI...SpDKlN...........lssIYLpDRt-s........p.clhsHs.EEYFEIlE................. 0 6 6 6 +12042 PF12209 SAC3 Leucine permease transcriptional regulator helical domain Mistry J, Gavin OL lg7 pdb_3fwc Domain This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF03399. This domain is a helical domain in the middle of leucine permease transcriptional regulator. 25.00 25.00 32.80 31.80 23.10 20.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.49 0.72 -3.84 10 37 2009-07-29 14:46:34 2009-07-29 15:46:34 3 1 36 5 26 41 0 77.90 28 6.27 CHANGED llpQlhsplhp-sIcsplpshlpshlpctpt+pp+ppl.IsoLScELacAFl+EplYhshLES+A-.ahs+hlKK+hhc+ .........h.hcplhspllppsVpsplpsllpp..hlpctptc..pt+tpl.IcoLucELYsAFl+EplYhhhl-o+A-.hhppplK++hhp..... 0 4 15 26 +12043 PF12210 Hrs_helical Hepatocyte growth factor-regulated tyrosine kinase substrate Mistry J, Gavin OL lg7 pdb_3f1i Domain This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00790, Pfam:PF01363, Pfam:PF02809. This domain is the helical region of Hrs which forms the core complex of ESCRT with STAM. 22.40 22.40 22.70 34.20 22.30 22.30 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.09 0.72 -3.73 5 116 2009-07-29 14:46:51 2009-07-29 15:46:51 3 7 82 1 70 114 0 93.20 61 13.22 CHANGED EcFlsNlRouVElFsNRM+SNpuRGRSIuNDSAVQoLFQolosMHPQLLchlcpLDE+RhaYEuLQDKLuQI+DARAALDuLREEHcEKLR.RtAEE ......-pFlcuLpsuVosFVNRMKSNphRGRSIoNDSAV.oLFpoIssMHPQLLchlppLDE+RlYYEGLQDKLAQIRDARuALsALR-EHcEKLR.RtAEE....... 1 23 27 52 +12044 PF12211 LMWSLP_N Low molecular weight S layer protein N terminal Mistry J, Gavin OL lg7 pdb_3cvz Domain This family of proteins is found in bacteria. Proteins in this family are typically between 328 and 381 amino acids in length. There is a conserved LGDG sequence motif. Clostridial species have a layer of surface proteins surrounding their membrane. This layer is comprised of a high molecular weight protein and a low molecular weight protein. This domain is the N terminal domain of the low molecular weight protein. It is a structural domain. 25.00 25.00 48.70 42.20 20.60 20.20 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.82 0.70 -4.54 19 115 2009-07-29 14:48:39 2009-07-29 15:48:39 3 2 9 4 1 110 0 234.00 34 61.87 CHANGED spsYTVVQscYcKslKplQcGlpcssIosIsVhFD.Gp.ls..sVssttsssp.......AAspLhshV-sKLDsLGDGcYVDFpIoYssssphhT.ps-ls...shtstls....sclllssAss..sss...Gllh...tssssssssuAssslphSDlhchsh...ssspsuhpLohsspt.sshphGpl...........sssshssupslohsss..htlshucu.cplDlspShhhssssu.........hhsspssssssssspspVRVINAKEpTIDlDuSShpoAE ......................................sYTVVQscYcKslKplQcGlpcssIopltV.F-.Gp.lu..pVsstsssuc..t....AAcpLhshVcspLDsLGDG-YVDFpITYs....s....tschhT.pu-hc...shtspls....s+llIssAos.....ost.....GhVp...tssssspt.uAssslphuDhhphsh..sssspsuhploh.sstt.sshphGpl............tss.os.u.tslshsss.th.hthucs.chlDhs.....pShhhsssss.........hhsstsss.sssssspspVRVINAKEpTIDlDuSShpoAc..... 0 1 1 1 +12045 PF12212 PAZ_siRNAbind Piwi/Argonaute/Zwille siRNA-binding domain Mistry J, Gavin OL lg7 pdb_3da5 Domain This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. There is a conserved LKDIL sequence motif. There is a single completely conserved residue L that may be functionally important. This domain is part of an Argonaute protein. It is an siRNA binding domain. 21.10 21.10 21.10 21.90 20.80 19.80 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.15 0.72 -4.34 10 42 2009-07-29 14:51:18 2009-07-29 15:51:18 3 6 8 5 16 37 0 45.80 56 46.23 CHANGED AYAIlp+DSpsIEElL+luKENuILKDILsATtTIKasDucEtThTP ..sYAIhpKcShslEchL+luKENuILKDILsATsTIKYsDupEtThTP.... 0 1 15 15 +12046 PF12213 Dpoe2NT DNA polymerases epsilon N terminal Mistry J, Gavin OL lg7 pdb_2v6z Domain This domain is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF04042. There is a single completely conserved residue F that may be functionally important. This domain is the N terminal domain of DNA polymerase epsilon subunit B. It forms a primarily alpha helical structure in which four helices are arranged in two hairpins with connecting loops containing beta strands which form a short parallel sheet. DNA polymerase epsilon is required in DNA replication for synthesis of the leading strand. This domain has close structural relation to AAA+ protein C terminal domains. 21.50 21.50 21.80 22.30 21.00 20.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.10 0.72 -4.06 10 117 2009-07-29 14:53:03 2009-07-29 15:53:03 3 4 92 1 78 118 0 70.90 35 13.88 CHANGED csc+LRp+lsusFKl+GLhLRuEAoKYLs-sLtulscsEhEDhl-+Il-sVcKQPLSSshlEcullEsAVQEC ..........s...tlRp+lhssFKL+GhhLRsEAhcYL...sctL............psh.s..ct.Eh-chL-pll-sl-KQs.LsSshl-+sslEtAlpEC....... 0 24 32 57 +12047 PF12214 TPX2_importin Cell cycle regulated microtubule associated protein Assefa S, Gavin OL lg7 PFAM-B_2368 (release 23.0) Family This domain is found in eukaryotes. This domain is typically between 127 to 182 amino acids in length. This domain is found associated with Pfam:PF06886. This domain is found in the protein TPX2 (a.k.a p100) which is involved in cell cycling. It is only expressed between the start of the S phase and completion of cytokinesis. The microtubule-associated protein TPX2 has been reported to be crucial for mitotic spindle formation. This domain is close to the C terminal of TPX2. The protein importin alpha regulates the activity of TPX2 by binding to the nuclear localisation signal in this domain. 20.30 20.30 20.50 20.30 19.50 19.70 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.40 0.71 -4.19 10 167 2009-07-31 14:33:35 2009-07-31 15:33:35 3 9 75 0 104 164 0 147.30 34 25.41 CHANGED s+t+lTsP+EPsLpTupRsRslRsKSsA-LEpEplppl..YKFKAR.lN++IhE.sussl.KKsssK.Ppch.uF+LcTpcRApcRuSsspppspphphpp+ss...hh.DssssPphpshssssP+sstsstcs+pchspp.cc..Kt+PLs.KI...........hcs+tp.u.hsct++ppssPhpFcs .....................................s...hlTpPppP.LpTppRsRss...p.....sK..Ss.u..E.hEtEpltcl..........KF..K..AR.lsp+IhE..su......h.p+sss+..s.p..tFcL.chp....cRhp....p+pspt.p..t..ppt..........p...p....sp.s..............hh.s.st.s.h..h.hs.sppsthh.p.p.c...tt.......c.t.pPh.....hh............htst.........pt...hs.tF.................................................... 0 26 56 72 +12048 PF12215 GBA2_N beta-Glucocerebrosidase 2 N terminal Assefa S, Gavin OL lg7 PFAM-B_2416 (release 23.0) Family This domain is found in bacteria, archaea and eukaryotes. This domain is typically between 320 to 354 amino acids in length. This domain is found associated with Pfam:PF04685. This domain is found in the protein beta-Glucocerebrosidase 2. It is found just after the extreme N terminus. This protein is located in the ER. The N terminal is thought to be the luminal domain while the C terminal is the cytosolic domain. The catalytic domain of GBA-2 is unknown. 20.70 20.70 20.70 21.20 20.60 19.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.20 0.70 -5.27 38 367 2009-07-31 14:39:48 2009-07-31 15:39:48 3 14 220 0 196 361 64 289.80 26 35.26 CHANGED GhPLGGhGuGsIuhus+GchshWpl..csGc.......aha..t.h.........pFula.p.psspstshsLssps.pss...................LssWpa.h........susYpAtYPhuahpac.s.shtsplphEtFSPllPtshpcSShPVAVFpaplpN.ssspslplSlhlSapNslGh.....hs.sptssthhhts.....................h.pssthpGllhps.pstp......spupGphslAs.ss..th...clhtpspWsssu.........sst-lWpsFst...DGpl.....................ptsttscphuuAlulchsltPGpscclsFsluWchPs..h...tFussspth...........+tYocFF..uttsp.sAhslAshALpphppa .....................................................................GhPLGG.......lGuGsIspu.hcGpFppapl...psu......................................h................sspFslhhp..ps....tt.t.hhh...hs.sps.p.t................................................sltsWpa.hst......................tpspYpuLYPpuWh.Yc........s.....tlplshcthSPllPpsYpcS.ShPsulFhaplpN...s.s.p.hplolhhohp.N.slG...........t.stts.......................................................ttst.hpGllhpp.p........s.............s.......shsluspps...sh..........plshpst.as.su.............supphW..pphtp.....sGph......................................ttsstts..ppl....uuAlssp.htl.........Ptt.spp..lpFsLuWshP......h.......tFststpha............................RpYTcFa..upptp.su.pl....schALpphtph................................................................ 0 63 110 161 +12049 PF12216 m04gp34like Immune evasion protein Assefa S, Gavin OL lg7 PFAM-B_2496 (release 23.0) Family This protein is found in archaea and viruses. Proteins in this family are typically between 265 to 342 amino acids in length. The proteins in this family are or are related to the m04 encoded protein gp34 of pathogenic microorganisms such as murine cytomegalovirus. m06 and m152 genes are expressed earlier in the intracellular replication phases of these microorganism' life cycles. They function to inhibit MHC-1 loading and export. gp34 is theorized to prevent immune reactions from NK cells which would ordinarily recognise and attack cells lacking MHC. 25.00 25.00 29.40 29.10 23.20 24.70 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.87 0.70 -5.04 11 182 2009-07-31 14:41:40 2009-07-31 15:41:40 3 1 5 0 0 165 0 261.40 22 87.96 CHANGED hShssR......................hhLlhllhhsshlhssssssspp..................Cp....p.ppthpphhphtps............htCahKctss...sphsss..sss.plhsCpLP...sVhVNAoWolEWll.sslpuSlsshuYapSossSsPpFptshhsah..phh.h....pp.ltsKsGF+VDpSs.....GNLaVasNAo.......ssscuV+C+LphChW..Tossshs..ss.........DcshhpshSsVLsLPDYssP.ths....h.+....ssasasptpps.sss..............lssLoVlVsLlaVsshslLhYhas..........spLhRRhhopDssspa .............................................................................h.hhhhs.h....hs...h.ssspt..................ss......p..htt.....p...s..........htC.htppss......pphspp..sss.p.hasCpls...slplNAoWpscWll..sshss.......lsstshapSTsoSsPpFpthhsshhs.............sssssGFpVDpss.....GpLalhssss..........tsstslpCpLplClh...........sps.shh..ss..............................................................ppth.pshothhpLssat...t..........................s.ts.h.s.s...tp..thp........................t..hsslslhlshlh..ssshhh..LhhhYt..............s.hhcph............................................................................................................. 0 0 0 0 +12050 PF12217 End_beta_propel Catalytic beta propeller domain of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is typically between 443 and 460 amino acids in length. This domain is the highly conserved beta propeller of bacteriophage endosialidase which represents the catalytically active part of the enzymes. This core domain forms stable SDS-resistant trimers. There is a nested beta barrel domain in this domain (Pfam:PF12195). The endosialidase protein complexes to form a homotrimeric molecule. 20.30 20.30 21.10 308.50 19.80 17.60 hmmbuild -o /dev/null --hand HMM SEED 367 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.39 0.70 -5.77 3 30 2012-10-02 00:45:24 2009-08-03 10:19:26 3 7 24 18 2 29 0 420.90 83 47.65 CHANGED NGELaKITDTPaYNAWTQDKTFVYDNVIYAPFMAGDRHGVsNLHVAWVRSGDDGQTWSTPEWLTDLHsDYPT......VNYHCMSMGVCRNRLFAVIETRTLSsNKLpsAELWDRPMSRSLHlTGGITKAANQQsATI+IsDHGLFAGDFVNFSNSAVTGVSGNMTVATVIDKNTFTVTTsNsQsSDlNNAG+oWsFGTSFHcSPWRKT-LGpIPS.ssushsVTElHSFATIDDNuF..AVGYHNGDVuPRELGlLYFSDAFsSPGsFVRRpIPuEYEsNASEPCVKYYDGVLYLTTRGTLuTpPGSSLHRSoDlGQoWsSLRFPNNVHHSNLPFAKVGD-LIIFGSERAFGEWEGGAPDsRYcGSYPRTFMsRVNVNsW..SLDDVEWVNITDQIYQGcIVNSAVGVGSVCVKDuWLYYIFGGEDFFNPWSIGDNsuKhPYKHDGHPADLYCYRlKIc ..QGELFKITDTPWYNAWTQDKTFVYDNVIYAPFMAGDRHGVNNLHVAWVRSGDDG+TWTTPEWLTDLHENYPT......VNYHCMSMGVVRNRLFAVIETRTVSGNKLQVAELWDRPMSRSLRlYGGITKAANQQVAYIRITDHGLFAGDFVNFSNSGVTGVTGNMTVTTVIDKNTFTVTTQNTQDVDQNNEGRYWSFGTSFHSSPWRKTSLGTIPSFVDGSTPVTEIHSFATISDNSF..AVGYHNGDIGPRELGILYFSDAFGSPGSFVRRRIPuE.YEANASEPCVKYYDGILYLTTRGTLSTQPGSSLHRSSDLGTSWNSLRFP.NNVHHSNLPFAKVGDELIIFGSERAFGEWEGGEPD...NRYAGNYPRTFMTRVNVNEW..SLDNVEWVNVTDQIYQGGIVNSAVGVGSVCIKDNWLYYIFGGEDFLNPWSIGDNNRKYP..YVHDGHPADLYCFRVKIK.. 0 0 0 0 +12051 PF12218 End_N_terminal N terminal extension of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is approximately 70 amino acids in length. This domain is found in the bacteriophage protein endosialidase. The two N-terminal domains (this domain and the beta propeller) assemble in the compact 'cap' whereas the C-terminal domain forms an extended tail-like structure. The very N-terminal part of the 'cap' region (residues 246 to 312) holds the only alpha-helix of the protein and is presumably the residual part of the deleted N-terminal head-binding domain. The endosialidase protein complexes to form homotrimeric molecules. 22.20 22.20 22.40 27.30 22.10 22.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.26 0.72 -4.50 5 30 2009-12-16 16:06:22 2009-08-03 10:28:11 3 9 25 18 2 27 0 66.00 69 7.08 CHANGED AlGDGVTDDTsAlousLuuossGpKIDGpGLTYKVSoLPDlSRFKNARFVaERIPGQPLaYVSE-FI AlGDGVsDDToAlSshLusuosGpKIDGtGLTFKVSTLPDVSRFKNARFLFERIPGQPLFYsSEDFI...... 0 0 0 0 +12052 PF12219 End_tail_spike Catalytic domain of bacteriophage endosialidase Mistry J, Gavin OL lg7 pdb_1v0e Domain This domain family is found in bacteria and viruses, and is approximately 160 amino acids in length. There are two conserved sequence motifs: VSR and YGA. This domain is the C terminal domain of the bacteriophage protein endosialidase. The endosialidase protein forms homotrimeric molecules and this domain complexes into a tail-spike stalk. The stalk region folds in a triple beta-helix that is interrupted by a small triple beta-prism domain. The tail-spike is a multifunctional protein device used by the phage to fulfill the following functions: (i) to adsorb to the bacterial polySia capsule (ii) to de-polymerise the capsule to gain access to the outer bacterial membrane, and finally (iii) to mediate tight adhesion to the membrane, a prerequisite for the initiation of the infection cycle. 25.00 25.00 26.40 113.20 24.00 22.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.86 0.71 -4.39 5 36 2009-08-03 10:56:41 2009-08-03 11:56:41 3 8 24 24 2 34 2 136.90 78 17.33 CHANGED sDsRVSRDFsYGATPNRAIPTFMGTDGVRTVPAPLcFosplss.DlsVsHLTl+ASTSuNIRSEhhMEGEYGFIGKoVPoDsPTuQRLIlSGGEGTSSooGAQITLHGSNSSTuRRAVYNAsEHLFQuuslhPYlDNVsALGGPSNRFTTlYLGSsPIlT .p-paVSRDFsYGATPNRTlPTFMuTsGVRTVPsPlcFoD.....DlsVpuLTl+AuTSupVRAEVphEGsYulIuKpVPoDssTuQRLIVSGGETTSSADGAMITLHGSsSSTPRRAVYNALEHLFENGDVKPYLDNVNALGGPGNRFSTVYLGSNPVVT 0 0 0 0 +12053 PF12220 U1snRNP70_N U1 small nuclear ribonucleoprotein of 70kDa MW N terminal Assefa S, Gavin OL lg7 PFAM-B_2533 (release 23.0) Family This domain is found in eukaryotes. This domain is about 90 amino acids in length. This domain is found associated with Pfam:PF00076. This domain is part of U1 snRNP, which is the pre-mRNA binding protein of the penta-snRNP spliceosome complex. It extends over a distance of 180 A from its RNA binding domain, wraps around the core domain of U1 snRNP consisting of the seven Sm proteins and finally contacts U1-C, which is crucial for 5'-splice-site recognition. 21.90 21.90 21.90 22.60 21.50 20.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -3.66 37 328 2009-08-03 10:59:28 2009-08-03 11:59:28 3 7 272 0 222 311 0 93.50 35 26.69 CHANGED MsptLPPsLLsLFtPRPPLpalsPh-psscc+ppst..loGlupaL..sthpchpc......p.s....ps..sEohhp++tRc+cEKppphppplpcplppasPppDs .......MTphLPPNLLuLFsPRPPl.alPPh-+hP..c..c++pst............hsGlAsal......sph..cc.p-........................sss.......st..sEoppp+pcRc+.cEKh-ptp.pcl.cpplpp.acPppDP................................. 0 74 122 185 +12054 PF12221 HflK_N Bacterial membrane protein N terminal Assefa S, Gavin OL lg7 PFAM-B_2550 (release 23.0) Family This domain is found in bacteria. This domain is typically between 65 to 81 amino acids in length. This domain is found associated with Pfam:PF01145. This domain is the N terminal of the bacterial membrane protein HflK. HflK complexes with HflC to form a membrane protease which is modulated by the GTPase HflX. The N terminal domain of HflK is the membrane spanning region which anchors the protein in the bacterial membrane. 20.50 20.50 20.60 20.60 19.90 20.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.22 0.72 -4.21 70 1238 2009-08-03 15:45:40 2009-08-03 16:45:40 3 3 1224 0 245 716 1088 56.70 53 14.02 CHANGED MuWN-Ps......ss.......p..pDPW....................Gp..sp..G....PPDLDElhRchpc+lsuh...FG....Gp...u ................................MAWNpPG................NNs......pcpDPWGssp..........................pss+GG+...-QG...............PPDLD-lFRKLs+KLGGh....hG..GKG............. 0 46 115 184 +12055 PF12222 PNGaseA Peptide N-acetyl-beta-D-glucosaminyl asparaginase amidase A Assefa S, Gavin OL lg7 PFAM-B_2578 (release 23.0) Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 558 and 775 amino acids in length. There is a conserved TGG sequence motif. PNGase A is a protein which cleaves glycopeptides. 19.70 19.70 26.80 21.00 19.00 18.70 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.53 0.70 -5.61 9 253 2009-08-03 15:52:16 2009-08-03 16:52:16 3 6 146 0 168 266 3 347.50 26 62.87 CHANGED EsFplh.P.ssTs.......hp.hlhhstVFs.ShhpPaVs..lYhPPs....sashlhlNloV.pSpGpQaDRhAhhalssl.VFhsSTtE.ss.....Th.pDhohapsLhpts.ph.hsLuNhhscs..hTG.assslThhh..t...ssphsshhlPl.spto.shNh..uall..Psssshsp.h.lPssT.Rull.l.tpG.us.-EFWaSNl...................shREIQLYhDGlLAGVV.PaPlIaTGGls.haWRPlsuIsshsh+p..pIDlTPhLslhpcus...hplpVssLpsut......lososu..WsloGslhLals..............pus.lhuspPhhshsRhl...sh.ssGhsushpYp.suphplsh..........u.hpacpulcaSssu.hsp.shsQs.....hhptshlspphtt.s.....hpssplhash..Pl ............................................................................P.......................................sthhlhptsF....u.oh.s.p.P.hh...............hs.P..s.s............s.........s...a.....s....h...lllphps..p....spGpQaD.Rl..shla...ls......ss-lh....RoSTsEP...p.s.t..u...l..h.Wsh.KDlopY..sLh.....p...t...s..t..p..lhh.L.sNllssp........hTG.apsslohtha................................t......t................................................................t................................u.........Dhll................P...lstt.............s..........h.l.................s.tpp..h.......t...ph....lP.....p.NshRshlplhs.ssp..us..-.EF.W..Ys..Ns.................................................u...u.......saREV.lh.l.........Du....plsGshhPaPlIaT.GG..l...s.....Ph.hWcPlsulsuFsl.s.hcl-lTPaLsh.L...hD.up.......HphthpVs....s.h....................................................................Whlsutlhla.s......................t....h..........................t..h..................t...t.........h...........h.........h...t...h........................h.p..thth.....p....tt...h...................................................t................................................................................................................................. 0 42 102 147 +12056 PF12223 DUF3602 Protein of unknown function (DUF3602) Assefa S, Gavin OL lg7 PFAM-B_2582 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 78 and 89 amino acids in length. 20.60 20.60 21.60 20.60 19.80 20.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.41 0.72 -3.59 33 384 2009-08-03 16:04:58 2009-08-03 17:04:58 3 4 131 0 289 349 0 70.60 22 57.83 CHANGED NIh..ssp......tspsssttls..pt..osshpsst......a.oTGRGGuGNht.....ssp..thsRphpDh-..h.css.cpsph..............sGRGGsGN ..................................................................t...p..ss.tsst...............a.soGRGGAGNht........t.p..st.pR.t.psh-.....p.t..t...........................hs................................... 0 77 157 240 +12058 PF12224 Amidoligase_2 Putative amidoligase enzyme Bateman A agb Iyer L Family This family of proteins are likely to act as amidoligase enzymes [1] Protein in this family are found in conserved gene neighborhoods encoding a glutamine amidotransferase-like thiol peptidase (in proteobacteria) or an Aig2 family cyclotransferase protein (in firmicutes) [1]. 23.60 23.60 25.70 23.60 23.50 23.20 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.69 0.70 -4.77 53 463 2012-10-02 17:21:26 2009-08-04 10:24:11 3 7 264 0 248 457 70 231.20 19 63.15 CHANGED pppphGlElE.hs.......GlshppsuphlsphhGs.psppt..............................thtthpl...................tssph............spaclhhDuSlhppppttt...................................................hshElVoPslphpp....lpplpclhctL+csGAhs.ssS..sGhHlHlsspshs...spslpshlphhhthpchlhcthplsh...R+hs.as..cshspphlcp..........hh....ssshpplhchah..tsscsps.......sth.ttphhpt.lshp.hhp.+s...TlEaRh.s.........uslcsschpthlphshtls .................................................................................................................t...phGlElE.hh..........................................shs.tp.ph.sp..htphhss...ph.................................................................hth..............................................................................................t..ph..................................tpWplhtDu.Sltstpt.........................................................hshElVSP.hlthpp......hp.plp.pl............hp............tL....cpt.s.shs.spo..........CGhHlHl...s..........s..............s.s...............h.....s........hps..l+s.lhph...h...hhhE.sh...lhphhshs............c+...p..a.....s.......ps.hs.pt.hhpp............................h......sht..tl...p.....hhh........p....t...................h.tt......ptath...lNhpthhp.cs...........TlEaRh.s..............ushptsphpthlthshhh......................................................................................................................... 0 90 169 223 +12059 PF12225 MTHFR_C Methylene-tetrahydrofolate reductase C terminal Assefa S, Gavin OL lg7 PFAM-B_2600 (release 23.0) Family This family is found in bacteria and archaea, and is approximately 100 amino acids in length. There is a conserved NGPCGG sequence motif. This family is the C terminal of methylene-tetrahydrofolate reductase. This protein reduces FAD using the reducing equivalents from reduced FAD, subsequently reduces tetrahydrofolate. The C terminal of MTHFR contains the FAD binding site and is the catalytic portion of the enzyme. 20.20 20.20 24.20 23.50 19.60 19.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.58 0.72 -11.03 0.72 -4.49 33 210 2009-08-04 10:43:33 2009-08-04 11:43:33 3 6 173 0 95 202 48 93.30 36 30.57 CHANGED lhsuhssh.hhusppththhhpp..CpsCGpChLspTGslCPhs.CPKuLhNGPCGGs.psG.+CEVss-.tcClWphIYcRlcphsph-p.lpplhPPt-appp ..............h...t.sphhhthppththhh.t..CpsCGpChLspTuhlCPhs.CsKsLhNGPCGGs..psG.pCEls......s.....p..tcCsW.hhac+hcthsph-t.htpl.hsshshp.h................ 1 52 74 83 +12060 PF12226 Astro_capsid_p Turkey astrovirus capsid protein Assefa S, Gavin OL lg7 PFAM-B_2608 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 241 and 261 amino acids in length. These proteins are capsid proteins from various astrovirus strains. 25.00 25.00 29.10 28.40 18.70 16.40 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.45 0.70 -5.12 3 174 2009-08-04 10:47:39 2009-08-04 11:47:39 3 3 7 4 0 161 0 208.40 83 61.93 CHANGED YFVYDFQGcRVSTTETGVFCLuSIPAADSKSRYNNQIToPSIGYRN-SGTGTSFALDuASWWNILDVTQTGVIFGQPRLGVGVIQTMKTLKQHIKDFTEPAVKKYYPGTTNLDQpLKsRLNLAEGDPVISMGDTTGRRAALFYRTSDERFILLFSTTDDPGuQYpSLKMLsFWNWSLSDsKsAFLA+LRTVQFANLs....-SEsupCDSDDDDLSDVTSLFEQADLGDETDFK ......................YFVYNFQG-RVSTTETGVFCLAAIPAATTTSRYNNQITTPSIGYRNASGTGTSFLLDAASWWNILDVTQTGVLFGQPRLGVGVhQTMKTLKQHIKDYTEPAIQKYYPGTTNLDE..Q..LKQRLNLAEGDPVISMGDTTGRRAALFYRTSDEKYILFFSTTEDPGAQYQNLKMLYFWNWSYSDTKQQFLDHLRTVQFANLD....DSQPAPYDSDDDDLSDVTSLFEQADLGDETDFK.. 0 0 0 0 +12061 PF12227 DUF3603 Protein of unknown function (DUF3603) Assefa S, Gavin OL lg7 PFAM-B_2609 (release 23.0) Family This protein is found in bacteria and eukaryotes. Proteins in this family are about 250 amino acids in length. 23.70 23.70 52.10 51.80 23.50 23.50 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.37 0.70 -5.20 9 135 2009-08-04 10:58:43 2009-08-04 11:58:43 3 1 135 0 20 77 0 214.10 77 86.22 CHANGED LLDQVPLLKVss.LacYIEN-LS-LPppLLp-VaQKuYlRKNHERhQL-YCFVVTDGpsIlAVDTlGYpIPIRKSRLIPRQEQLVYEMlcstcs.pYphp.cphp..KEYHILSPsPphhsGLTRKERQLKQLLFMALDQL+oopNpAEIRYWhTEWsPppYspIQpMsFE-sWppLYcEs+pGWS-+H.phCEsLIKGQPFFEKLWEhEptsKVN .LLDQVPLLKVDuTLYHYIENELLELPQKLLEDVHHKAYIRKN..HERL..QQEYCFVVTDGKG..IIAIDTIGYNVPIRKSRLIPRQE.....QMVYEMVENVQAE..KYEFQlEEhE...KEHHIL....SPSPalMNGLTRKERQLKQLLFMALDQLHTTKNTAEIRYWaTEWDPSAY..GhVQHM-FEDIWs+LY-EAKsGWS-KHEQLCERLVKGQPFFEKLWEMENEpKVN.............. 0 3 12 14 +12062 PF12228 DUF3604 Protein of unknown function (DUF3604) Assefa S, Gavin OL lg7 PFAM-B_2610 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 621 and 693 amino acids in length. 20.40 20.40 20.80 20.60 19.80 20.30 hmmbuild -o /dev/null HMM SEED 592 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.16 0.70 -13.00 0.70 -6.35 34 171 2012-10-03 00:45:34 2009-08-04 12:52:57 3 2 97 0 20 168 1789 367.20 26 62.25 CHANGED ssscpsYaGDhHlHTshShDAhhhGs.phsP--AYRaA+G-sl......tps.sG....h.spLscPLDFhsVoDHA-hhGhhpthhssssth.....tpshtp.h.thhhpss.pssttshhhhsshsss.hspsht...tss.t....................htpssWpphlpsA-paNcPG...pFTshlGaEWTutss..usNLHRsVIaRsssphs.phlPFos...hpSssPccLWsahcshcppsuup.....hLAIPHNuNhSNGhMF..thsshs...GpslDtsYAcpRt+hEPlsElTQlKGsSE....sHPhLSPsDEaAsFEh..achushshssttssphh........sYsRsALpcGLplEpphGsNPYKFGhIGSTDoHTuhso.s-EsNFaGKhus.spss..ptphs............tt.............ppthph......uASGlAuVWAc-NTREAIaDAhcR+EsYAToGPRlpVRFFuGasaspsshpss-hsptuYucGVPMGusL........ssup.APsFLlhAh+D.....P.puAsLDRlQllKGWl-.ssGpspEpVYDVAh..Scspphssssph.sslGsoVDlsssoaosslGAsELpslWpDPDFcssQpAFYYlRVLElPTsRWosaDAl+hGhp...........ssPt..TIQERAaoSPIWYsP ............................................s...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t...........huuGLsuVaApcpoR-ulh-Ahtp+csYAToGsRlh..lpF.............t................................................................................................................................................................................................................................................................................................................. 0 8 10 14 +12063 PF12229 PG_binding_4 Putative peptidoglycan binding domain Bateman A agb Bateman A Domain This domain is found associated with the L,D-transpeptidase domain Pfam:PF03734. The structure of this domain has been solved and shows a mixed alpha-beta fold composed of nine beta strands and four alpha helices. This domain is usually found to be duplicated. Therefore, it seems likely that this domain acts to bind the two unlinked peptidoglycan chains and bring them into close association so they can be cross linked by the transpeptidase domain (Bateman A pers. observation). 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.30 0.71 -3.85 117 1823 2009-08-04 15:04:53 2009-08-04 16:04:53 3 16 849 4 342 1540 49 115.10 15 32.20 CHANGED hshsspcls.hph.shsphlppshshstssshh.phhtphht.........phs....hplshccpt....lpphlspl......spphsptsp..sAphph......ssss...hsls.s.pphG....hpl.ctcph...hpplhpslpss.ppshp ...........................................................................................t.pp.lt..hth..phpphlpt.hhsh..pht.t.ph.h....phhtp...phtt..............phs.........hphshDp.pt....l.p.sh.ls.pl......spph.sstsp...suphph......ss.ss......hslt..s...tp....G....hpl.Dtcph.tptlhpslpst.pt................................. 0 165 288 322 +12064 PF12230 PRP21_like_P Pre-mRNA splicing factor PRP21 like protein Assefa S, Gavin OL lg7 PFAM-B_2642 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 212 and 238 amino acids in length. The family is found in association with Pfam:PF01805. There are two completely conserved residues (W and H) that may be functionally important. PRP21 is required for assembly of the prespliceosome and it interacts with U2 snRNP and/or pre-mRNA in the prespliceosome. This family also contains proteins similar to PRP21, such as the mammalian SF3a. SF3a also interacts with U2 snRNP from the prespliceosome, converting it to its active form. 24.50 24.50 24.60 25.90 24.20 24.40 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.53 0.70 -4.85 16 364 2009-08-04 15:43:10 2009-08-04 16:43:10 3 14 282 1 262 366 5 219.60 28 38.44 CHANGED spshppplcpt..ps.....lLccshpRu-atcpp+ppcpcpc-tpEcc+hpaAuIDWpDFslVtTl-Fsct-.ht-LP.P.lshspLhttoLpp+ppshp..tpss......................pstpcccs-psspsssss..........pthphtsuspuplKlpcshpspspp.ts...........hhhCPIoGchIPtschspHh+l.LlDP+...a+cQ+cph.t+ppp...ophssspVhpNlKpluc+ ...................................................................h......plppp..ps.hp...lL-psh.Rs-Wt+apcpp+p+c--ctEpE+ltaAp..IDWHDFVVVET.lcFp.........s-...psphPs...........P.ho..p..-l.ttt.L...p....ct.......h........t....t..tptht.....t.......t............................................................................p..p.pt..c..ct.tpspt.t..................................t..h...s..s..s....s....t...s.th......hlpcsYssptpp.h..............ss.thhlsPlsuppIPhschpcHMRItLlDP+...WhEQ+c+....tch.tp....s.hss.-ltpsL+pLAp...................................................... 0 89 148 218 +12065 PF12231 Rif1_N Rap1-interacting factor 1 N terminal Assefa S, Gavin OL, Eberhardt R lg7 PFAM-B_2647 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 135 and 146 amino acids in length. Rif1 is a protein which interacts with Rap1 to regulate telomere length. Interaction with telomeres limits their length. The N terminal region contains many HEAT- and ARMADILLO- type repeats. These are helical folds which form extended curved proteins or RNA interface surfaces. 23.00 23.00 25.60 23.50 19.20 20.50 hmmbuild -o /dev/null HMM SEED 372 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -12.00 0.70 -5.49 34 226 2009-08-04 15:44:22 2009-08-04 16:44:22 3 4 185 0 157 234 1 323.80 22 19.96 CHANGED QLAGsscsS+lDAYhp..LhsuLpuhsslPsptsltpKhsLhspaIpRDls............tp.spus..-spLsppALpLLshhlapspIuuplssDFsh.....Fll-+slsshpssshPKslspchhpllupQsFss+lhTssRls+llssLcsIspp.lpGpullhpRLsIYp+LlpQstphMhscus.Whs......pllsshLpshK-lRpp.AlsLuhphuhslG.....................pppplu+slh-lhspslpsp...phhp.htp+LppMhss..cs...ushVPpIWulllLLL..Rsth..lcpWpahppWlhllQtCFNss-...sKhpAhhAWs+hla.....shpssppsspchlphLhpPl.hsQLc++pssc.t....phpphslsuhtsLL..YYuF+Pss..s.phLDhhWsphl ...........................................................................................................hcsa.....h.t.hhph..pt.......tl...pphshhhphhpp..clt..................s........s.pLsptALphlshhlhpsplsstlspp.h...................l.pslp...sh..ts......sKslhpthhhl.....luh.........QpFs....sc........lh..s.pt..h.....sp..h.lhshhh.h.pp...hputsl.hptlt...............lhh.......pLlpph.t...Mhpp...ss...hWht............................llshhl.p....s..spclp.p.....A.....hslt....hth.s.h.hlt...............................ppt.l..uphh.t.phhspt..h......sp............lpphhts..ps...tshs.plWshhlhLL...tpph.........hcshsahsphLpl..phsFps.s...ss..h+t.AhhuWctll......................shp.....s.h..s..s....t..chlch.LhpPl...p.lcpct.s.h.t....phh.hllh.....pltshl.....h..sF...c...ss...s....t.h.................................................................................................. 0 31 70 124 +12066 PF12232 Myf5 Myogenic determination factor 5 Assefa S lg7 PFAM-B_2654 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00010, Pfam:PF01586. There is a conserved CSD sequence motif. Myf5 is responsible for directing cells to the skeletal myocyte lineage during development. Myf5 is likely to act in a similar way to the other MRF4 proteins such as MyoD which perform the same function. These are histone acetyltransferases and histone deacetylases which activate and repress genes involved in the myocyte lineage. 21.70 21.70 21.70 21.70 21.30 21.40 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.87 0.72 -3.37 28 259 2009-08-04 15:48:27 2009-08-04 16:48:27 3 5 107 0 76 227 0 63.60 47 25.66 CHANGED .paYuus.....S-uoSPp.SsCSDGMsDhsu.P.shopcpssa..sus.Yhspsss.sspssKsssl.SSLDCLSSIV-RI ..............................paYShs.....S-soSPp...SNCSD.G.Mh..........-hsu...P.shspcpssa......cus.Yhspsss...s....tss.+sssl..SSLDCLSSIV-RI.............. 0 9 13 35 +12067 PF12233 p12I Human adult T cell leukemia/lymphoma virus protein Assefa S, Gavin OL lg7 PFAM-B_2655 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are approximately 100 amino acids in length. p12I binds to the immature beta and gamma-c chains of the interleukin-2 receptor retarding their translocation to the plasma membrane. p12I forms dimers which bind to these chains. 25.00 25.00 138.40 138.20 18.10 18.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.66 0.72 -3.72 2 51 2009-12-01 16:56:24 2009-08-05 16:42:23 3 1 3 0 0 51 0 97.90 96 100.00 CHANGED MLFRLLSPLSPLALTALLLFLLSPGDVSuLLLR.PPAPCLLLFLPFQILSNLLFLLFLPLFFsLPLLLSPSLPITMRFPARWRFLPW+APsQPAAAFLF MLFRLLSPLSPLALTALLLFLLsPGDVSGLLLRPPPAPCLLLFLPFQILSNLLFLLFLPLFFSLPLLLSPSLPITMRFPARWRFLPWRAPSQPAAAFLF 0 0 0 0 +12068 PF12234 Rav1p_C RAVE protein 1 C terminal Assefa S, Gavin OL lg7 PFAM-B_2692 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 621 and 644 amino acids in length. This family is the C terminal region of the protein RAVE (regulator of the ATPase of vacuolar and endosomal membranes). Rav1p is involved in regulating the glucose dependent assembly and disassembly of vacuolar ATPase V1 and V0 subunits. 20.80 20.80 20.90 20.80 17.50 20.30 hmmbuild -o /dev/null HMM SEED 631 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -13.08 0.70 -6.36 13 405 2009-08-05 15:44:38 2009-08-05 16:44:38 3 26 270 0 281 395 1 467.30 29 21.95 CHANGED Ghl+oac.Ahl.....sscppclcWhhs.pplpTGIpNsoLhoGSS.hsKsulV...Dps+ppLTIWDs+puh....LEYcccF......c-pIpDLDWTSTscsQSIlulGFsp+VlLhoQLR.....YDYsNpsPoahsI+clsIpshTsHsIGDSsWhssGslVVuoGNQLFlhD+pl-........spsolsophhhss......DLhclsShLNGPL.PVYHPQFLhQslluGKlsLV+cILLpLappL+hhp..ps.hc-l-SsLsls.pcFh...pss..htth.........................sc.hpshscsluusLsEpLo.chsLPhLopHpQhpLhsllEsVspVpKpccshD.NuhRFlLsh+.....ah.++sp....psSlohR-lsWAhHScsp-ILhshlspphp..h.WpcAREstlhhWhp-.ssLhspFEs.lA+tEaoKs-c+DPscCulaYLAL+KKpVLpuLWRhAshH.EQsphh+FLuNsF.sEsRW+TAALKNAaALLSK+RYhhAAuFFLLADsL+DAVNVlhpQLcDlsLAIAVsRVYEGD.sGPlLtElLcpclLPpAhp-usRWhsSasaWhL+++-hAlRALloshhsLhpspshssp..s....hsKSFLs-DPALllLYppLRpKsLpplpGu...cVss+hEa-hlL+suclYsRMGCDhLALsL ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................th..................................................................................................................................................t....t.hh.s......ph.s.t.lpt.Lh..p..tLP.LophpQh.Lhslh....-sltt.h..........p............p....p...........p...........c.........o.....lD-sGh..R.al.....Lsh+...................ah........hhpsp.....................h.p..p..s.....l..shp.....c.....hsWAhHSp..u.p.-.L.......ls.hl.st.tcs.............p.pWpp.hRthGlsaWlcs..ps.L..................Rpph......Ep...lA+....st...ap.....c..........................t....p....DP..l.D.s........ulaYLA.h+.K.K.sllhGLaR................spcp...p+htpF.h.u.psF....s-sRWRpA.ALK.NAauLLuKpRFc...................huAAFFLLA..s..s..L+DAl........pVCl..pplpDlQLAlsIuRl..YE...................u......-......s.................u.......s.......s...h..p...........p...lLppc.lL........s.......................s.............p............p...................s...........s...........ahtShsaWhlpchs.A..lcsLlp......p.........p....................................................................................................................................................................................................... 0 95 142 225 +12069 PF12235 FXR1P_C Fragile X-related 1 protein C terminal Assefa S, Gavin OL lg7 PFAM-B_2701 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 126 and 160 amino acids in length. The family is found in association with Pfam:PF05641, Pfam:PF00013. This family is the C terminal region of the fragile X related 1 protein FXR1P. FXR1P contains two KH domains and a RGG box that are characteristic motifs in RNA-binding proteins as well as nuclear localization and export signals. FXR1P is thought to regulate mRNA transport and translation. 26.60 26.60 28.30 27.70 23.80 26.40 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.45 0.71 -4.11 9 307 2009-08-05 15:45:56 2009-08-05 16:45:56 3 10 42 1 87 266 0 115.40 58 24.25 CHANGED VEQLRLERLQIDEQLRQIGhG.RssssRs......-+E+GYhoD-u....ssol+soR....sYssRGRGRRGsshsp...GTNSEhSNsSETES-++cE.Sshuluup-.-..Rps..ptt.stRR..tGtGRG..uGRGR.Gss+ssssohuSshcs.DsNPY.SLhcssEss ...........V-QLRhERLQIDEQLRQI.G.u.....ps.st.........tptth...-ts........p.u.oR....sYts.R.G+GR...RGss..hs.o..........................GTNS-..hSNsSETES-++-ElSDhSlusp-.-....R-....s....h.pRs.stRR.....GGtGRu...uuRGR.....G...s.+sspsp...Shhcs.spNPh.uhhcss-s.p.................... 0 6 12 33 +12070 PF12236 Head-tail_con Bacteriophage head to tail connecting protein Assefa S, Gavin OL lg7 PFAM-B_2709 (release 23.0) Family This family of head-tail connector proteins is found in bacteria and viruses. Proteins in this family are typically between 516 and 555 amino acids in length. This protein is found in Phage T7 and T3 among others. 19.90 19.90 20.20 20.70 17.70 19.80 hmmbuild -o /dev/null HMM SEED 480 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.11 0.70 -6.12 28 301 2009-09-11 16:20:59 2009-08-05 16:50:28 3 2 253 0 33 273 923 417.40 20 87.51 CHANGED appl+scRpsacs+hc-hucaslPthhshtss......ptsppphpphapususculpsLAupLhtulhPsspsWF+Lslss......pt.spttsplcthLsplp+h...hhphhps..ssahssltpshpcLlssGsuhlhhtp......p..shsthpshslspaslpcDs.pGs.l-hlh+.......t..pchuh..cslspplppthpst........cspcplclhptlh.c..............spsh.asshh.spstts..hlpcushcphPalssRapchsGE....sYGcu..st-hLsDl+sLpplpcthlcutphsspshhllsssuhhpstsltsuutshh.stsstsshshhsl.pptsc.hssuttslpslcpRlppuahhs.h.......psupchTApElphpspEhtphLGslhuthspEhlpPllpRshthht..+suh...lPs.....hPp...th.......lcsshhosLstht+tt.shsulpphhshlutluthtsslhstlshsphhctlusthGhs.ssllpoppp .................................................................................................................................................................tthcstR.psacsphpchtchhhPphh.h.pt.......................tppp...pphhpssustuhp.Luutlhtslhs.sp.ahclph.p...............p..tt..lc.hlptlpph...h.phhpp.....sshh.th.phhhplhshGsuhh.hl.p..........t.th.thp.hslsphhltpss.pu.p.lshlh+..........pc.hsh..stlscphp.pthpp..........................p.pcpl...cl.hphlh.p.................tpshsatshh.......tshp........hhpp.sshpthPhhssRa.hhss-....sYGpu..s.phlsshctLp.hppthhphhthshp.s.hh.sssshhhp...tsltsushshh...s........s....ttt...thh.h..........ss.hsssht.hpphcpplppshhhs.h.........tsspphTApElh.htpEhtthLGshhphhpp-hhtPllpphhthh...cts.............ls........Pp.......h..............h...c.ph.oshstht+t..thssltphh....th...h...utl..hth...s.p..h.h.s..tlshsthhpthsth.Ghs..thhhs.t................................................................................................ 0 12 23 30 +12071 PF12237 PCIF1_WW Phosphorylated CTD interacting factor 1 WW domain Assefa S, Gavin OL lg7 PFAM-B_2805 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 180 amino acids in length. This domain is the WW domain of PCIF1. PCIF1 interacts with phosphorylated RNA polymerase II carboxy-terminal domain (CTD). The WW domain of PCIF1 can directly and preferentially bind to the phosphorylated CTD compared to the unphosphorylated CTD. PCIF1 binds to the hyperphosphorylated RNAP II (RNAP IIO) in vitro and in vivo. Double immunofluorescence labeling in HeLa cells demonstrated that PCIF1 and endogenous RNAP IIO are co-localized in the cell nucleus. Thus, PCIF1 may play a role in mRNA synthesis by modulating RNAP IIO activity. 25.00 25.00 25.20 25.20 23.90 24.20 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.32 0.71 -4.98 14 172 2009-08-05 15:54:26 2009-08-05 16:54:26 3 8 108 0 110 164 19 167.70 39 24.91 CHANGED KLcpLYcps.......s.c..stc.phFhtplaslLhRY.pshh....st...usGhQuALstsVF-sL+ppasVohECFASPLNsha.....tpaCSAF.......sDsDtaFGShGsFhs..apPspG....SFEsNPPFscElhsthsp+htplL...........ss.uppsLSFlVllP.tWtpssh..hpphppStappps.hllsutpHtYhcG ........................................................t.....stt.chF.h.+lasLLhRY...pshh........Gs...t.............t...upuhQuulPspVF-sL+ctFs.........Vs..hECFASPLN..saa...........................ppYCSAF.......sDTDsaFGShGs...hh-......FpPhuG....................SFEsNP...........PFscELhcthssHh.c.cLL.................ts.ospPLSFl..VhlP.pWcps.s...........hpphcpShapRpp....hll.shp.HtYhpG....................................................................................... 1 49 62 89 +12072 PF12238 MSA-2c Merozoite surface antigen 2c Assefa S, Gavin OL lg7 PFAM-B_2755 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 263 and 318 amino acids in length. There is a conserved SFT sequence motif. MSA-2 is a plasma membrane glycoprotein which can be found in Babesia bovis species. 20.60 20.60 56.90 20.60 20.20 20.20 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.41 0.71 -4.65 14 159 2009-08-05 15:54:32 2009-08-05 16:54:32 3 2 1 0 4 148 0 182.00 36 75.26 CHANGED VsMPSDsSpDALsAhh-ILcslK-clPFcTSLFDptVLpsL-hQ-sDplFcSLl.RVsLIKphLotFNuFLN..DNPt+hLssppsEMTKYYKKHIsscDspVKDYshLVKFCNDFLDSESPFM+hYKthNpY-EL...VcKtPupssSPs.SSsQusoss.pPupsssu.soossstsus.s.............psscPAcospss.............uuSFTFGGLTVA .....................................VsM.ussupcshcALhtlhtllKpcsPFpTS.FDs.slc.hshQss--lFKhLl-ulhhhchhlpchNuFls...pssch.sphcschpcYYhcpIhscpup...h.shstLsphhpsFlsspushhhh..........cth.........ppa..........-...........s+.Kt.tps..p.h.sSspspsps....pPupspso............................................AspPs+PApospss.................GSSFTaGGLTVA....................................................... 1 4 4 4 +12073 PF12239 DUF3605 Protein of unknown function (DUF3605) Assefa S, Gavin OL lg7 PFAM-B_2795 (release 23.0) Family This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 161 and 256 amino acids in length. 23.30 23.30 23.60 23.90 21.80 23.20 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.13 0.71 -4.39 21 242 2009-08-05 15:54:41 2009-08-05 16:54:41 3 9 139 0 185 240 3 143.70 31 61.71 CHANGED hsWppl+cIlpsNcL-hhpR.PStLc+YlcapccltupY.GSlhcallpc+Lt..W.s.sp.cth........tstPFt.spDh+lLhNDaPYulE.slsHLVVWoKhtL..sDsso..sDlsscuRpcI-cFVccsFhp+h........ss-pVlWF+NWpuLpSV+ulEHh.HVhlhp .................................sWpplppllt....p..sp...LphhpRtssphc+Y.ht.aptplctp.a.uolhpalltp+Lt..W.t..s..t......................................shs.F.tp....s......sD...h+lLhNDWPYul..p.sl.sHlVVWoKh.l....s.c.ts................scho.psRt.lppalpcp.Fht+h...............stpplhWF+NahsLpSV+ulpHhHVhlhs.............................. 0 53 101 154 +12074 PF12240 Angiomotin_C Angiomotin C terminal Assefa S, Gavin OL lg7 PFAM-B_2808 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 197 and 211 amino acids in length. This family is the C terminal region of angiomotin. Angiomotin regulates the action of angiogenesis inhibitor angiostatin [1]. The C terminal region of angiomotin appears to be involved in directing the protein chemotactically [2]. 23.20 23.20 23.30 24.20 22.50 23.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.51 0.71 -4.82 8 207 2009-08-05 15:55:09 2009-08-05 16:55:09 3 5 58 0 103 167 2 196.00 58 24.48 CHANGED YVEKVERLQpALsQLQAACEKREQLEhRLRTRLEpELcSLRsQQ+Qupssuuosus.....hssssLpEpLREKEERILALEADhT+WEQKYLEESsMRpFAMDAAATAAAQRDTT..IIsHSPssSh.soSh.......pEElhsAN+RsQEMEsRIKsLaAQIlEKDAhIKVLQQRSR+-suKs-psS............LRPA+SlsSI.su...tsoshpu+spoLoss .....................YVEKVE+LQQALsQLQAACEKREQLEhRLRTRLEpELcuLRhQ.Q......R......Q...u........sst.ss..s.ssp................hsAssLhEh.LREKEE+ILALEADMTKWEQKY.LEEssMRpFAhDAA...........ATsAAQ.R..DTT.....lIsHSsssSh..soSh...........pEEllhAs+Rpt-MEsR...IKsLHAQIlEKDAMIKVLQQRS.....RK-s.uKss..psS............hRPA+Sl.SItsA.....ssGh.uhpoo.p..s............................. 0 15 25 53 +12075 PF12241 Enoyl_reductase Trans-2-enoyl-CoA reductase catalytic region Vella Briffa B, Coggill P pcc Pfam-B_10602 (release 10.0) Family This family of trans-2-enoyl-CoA reductases, EC:1.3.1.44, carries the the catalytic sites of the enzyme, characterised by the conserved sequence motifs: YNThhhFxK, and YShAPxR. In Euglena where the enzyme has been characterised it catalyses the reduction of enoyl-CoA to acyl-CoA in an unusual fatty acid pathway in mitochondria. the whole path performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. 30.10 30.10 30.30 31.70 30.00 30.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.32 0.70 -5.38 49 655 2012-10-10 17:06:42 2009-08-06 14:20:28 3 4 531 5 162 543 120 236.20 56 59.63 CHANGED ++sGoAGWYNssAFcctAcptGLaA+SlNGDAFSsEhKppsI-hIKp-h.Gp.lDLVlYSLAoPhRpcPc....TGclapSsLKPIGpsaospslD.hccc...plt-sol-PAo--EItsTlpVMGGEDWphWlcALpcAsVLA-Gs+TlAYSYIGs-lTaPIYhcGTlGcAKcDL-psAtslspcLus.hsGpAaVoVhKAlVTQASuAIPlhPLYluhLaKVMK-cGhHEGCIEQhpRLFpcpLY ......t.KsGTAGWYNsAAFc+hAcptGLYA+SlNGDAFSsEhKppsI...-hIKp.....DL...G...p.VDLVVYSLA....SPhRpp.Pc.........TGEla+SsLKPIGcs......h.oupulD..Ts+-....slhEsolEPA........T.-p.EIpsTVsVMGGEDWchWI-ALtcAGVLA-GsKTlAaSYIGs-lTaPIYacGolG+AKhDLDpsupuLsp+Lus.hG....GsAhVuVLKAVVTQASS..AIPsMPLYluhLFKVMKEcGsHEGCIEQl.RLFp-+LY............... 0 43 84 122 +12076 PF12242 Eno-Rase_NADH_b NAD(P)H binding domain of trans-2-enoyl-CoA reductase Vella Briffa B, Coggill P pcc Pfam-B_10602 (release 10.0) Family This family carries the region of the enzyme trans-2-enoyl-CoA reductase, EC:1.3.1.44, which binds NAD(P)H. The activity of the enzyme was characterised in Euglena where an unusual fatty acid synthesis path-way in the mitochondria performs a malonyl-CoA independent synthesis of fatty acids leading to accumulation of wax esters, which serve as the sink for electrons stemming from glycolytic ATP synthesis and pyruvate oxidation. The full enzyme catalyses the reduction of enoyl-CoA to acyl-CoA. The binding site is conserved as GA/CSpGYG, where p is any polar residue [1]. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.64 0.72 -4.19 44 648 2012-10-10 17:06:42 2009-08-06 14:30:23 3 3 537 5 165 901 265 78.60 60 19.95 CHANGED lIcP+l+GhICposHPhGCptsVppQIsYl+s...pstl.sG...PK+.......VLVlGASoGaGLASRIssAF.GssAsTlGVhFE+sso.- .....................................IIKP+lRGFICsTsHPsGCEtsVccQIsYlKs..........pGsI..tsG............PK+...............VLVlGASoGYGLAuRIuAAF.GuGAsTlGVFFE+suo......................... 0 43 86 123 +12077 PF12243 CTK3 CTD kinase subunit gamma CTK3 Wood V, Coggill P pcc Pfam-B_12814 (release 23.0) Domain The C-terminal domain kinase (CTDK-1), is a three-subunit complex comprised of Ctk1, Ctk2, and Ctk3, that plays a key role in regulation of transcription and translation and in coordinating these two processes. Both Ctk2 and Ctk3 are regulated at the level of protein turnover, and are unstable proteins processed through a ubiquitin-proteasome pathway. Their physical interaction is required to protect both subunits from degradation, and both Ctk2 and Ctk3 are required for Ctk1 CTD kinase activation [1]. The mammalian P-TEFb is mirrored by the combined complexes in yeast of the CTDK1 and the Bur1/2 [2]. 33.70 33.70 33.70 33.70 33.50 33.50 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.55 0.71 -4.54 9 129 2009-08-07 14:05:10 2009-08-07 15:05:10 3 5 116 0 105 123 0 134.20 37 32.69 CHANGED hDPFEVRhpFoshLp+LsAShpShpKAApaALKa.pDh-EDLasCILEpLEcs..shNsRsNIhaFl-p.hsE.shpps......pssYlchlpRDlh+lVDsVsPcsssst.ANlpsVR+lLpslpp+phls.pplp-l..........tsLcsRc ........hDPFEVRhpFos.Lp.+LsAShpShQKAApaALKa..+-hsEDLasCILEpLE+s..................shNs..RsNIhYFI-p.lh-huppps.......................p.sYlchl....pRDlh+lV.-uVsPsssu........u...h....sNl+ps++.....VLpslps+phLss.phlpcl..........t.lpt+.............................. 0 32 62 90 +12078 PF12244 DUF3606 Protein of unknown function (DUF3606) Assefa S, Gavin OL lg7 PFAM-B_2813 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 85 amino acids in length. There is a single completely conserved residue G that may be functionally important. 20.80 20.80 21.30 23.80 20.50 19.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.19 0.72 -4.34 28 202 2009-08-07 15:46:44 2009-08-07 16:46:44 3 2 117 0 106 199 9 55.80 27 86.33 CHANGED sssps+tsstDpsplsls-phElpYWs+chslopspLcpAVcpVGssspsVcpcLsp ..................tpppttstD+splshs-paElpYWs+chslotppLcpAVcpVGss.sppVctaLt........... 1 18 48 69 +12079 PF12245 Big_3_2 DUF3607; Bacterial Ig-like domain (group 3) Assefa S, Gavin OL lg7 PFAM-B_2816 (release 23.0) Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 27.00 5.00 27.00 5.00 26.90 4.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -8.71 0.72 -4.41 11 1694 2012-10-03 16:25:20 2009-08-07 16:47:49 3 301 461 0 392 7792 931 42.20 24 4.75 CHANGED sustps.....tpsstSshhPsushc-.aolpFhlhDpAGNpsphshpphhhDshhss.....Pstsas ............................t................................................................t..........a.....p....lp.....h..h..s..sDpAGN....p........s.s..p...t.h...t.h.h.h.cs................h........................ 0 165 256 330 +12080 PF12246 MKT1_C Temperature dependent protein affecting M2 dsRNA replication Assefa S, Gavin OL lg7 PFAM-B_2862 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 231 and 255 amino acids in length. There is a single completely conserved residue P that may be functionally important. MKT1 is required for maintenance of K2 toxin above 30 degrees C in strains with the L-A-HN variant of the L-A double-stranded RNA virus of Saccharomyces cerevisiae. MKT1 is a 93 kDa protein with serine-rich regions and the retroviral protease signature, DTG. This family is the C terminal region of MKT1. 19.40 19.40 20.60 20.40 18.50 16.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.29 0.70 -5.37 24 169 2009-08-07 15:47:53 2009-08-07 16:47:53 3 8 150 0 118 167 0 233.90 25 31.59 CHANGED RhLpLhGals..........cppphTtaGcsLtpsssh.............htEshllhlELlRhslLssp..............s....sshpss..sc-pphlhLlSRlholhplpppshsYpGPls+plLsF+shlphl+pslppLlEsllsshlhpGcssp......stsphtplstpLPFhhss.ssshGlhschaLpchhpptpsppps.p...h.hst.......................a.pssslpc-LspuhpFWcslhpsspplsppphhsts..........hphascAscalppth ...........RhLpLhG..als..........pppphTtaGpsL.pshp...stp..........hpEshhlhlELlRhslLssc.................................s....tss.hpss....s--pp.hlLlSRlholhpl........pp.....c......s.....hsYs.GPls+pLLsF+shlphlppsLRpLhEs..llsuhllsGcssR............................spschtplshp..LP.Fhtss.ssshGlhs+haLpch.p..p.tpsp..s.pt..ph..hh.tsp...........................a.ps..shppsl.chuhpFac.tlhpshpplsppth.stp...........t.appAspahpt................................... 0 42 69 102 +12081 PF12247 MKT1_N Temperature dependent protein affecting M2 dsRNA replication Assefa S, Gavin OL lg7 PFAM-B_2862 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 231 and 255 amino acids in length. There is a single completely conserved residue P that may be functionally important. MKT1 is required for maintenance of K2 toxin above 30 degrees C in strains with the L-A-HN variant of the L-A double-stranded RNA virus of Saccharomyces cerevisiae. MKT1 is a 93 kDa protein with serine-rich regions and the retroviral protease signature, DTG. This family is the N terminal region of MKT1. 19.40 19.40 30.80 29.30 18.90 18.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.99 0.72 -3.66 25 157 2009-08-07 15:51:14 2009-08-07 16:51:14 3 9 139 \N 111 153 0 88.80 33 11.26 CHANGED PsDhH-llGp+LPtElYaYhStGLlusplhsslopG.lh-psP..Lsst.....ocpYR+Llt.cplt.l....+spslsLlsppL...s+haphKpIpshhWF ..PsDlH-llGp+LPpElYaYhSpGLlusclhsslosGplh..p...P.............Lsst....tSppYR+Llt.cslh.h....+spslsLLsp.L...pRaaphKpIphhhWa................. 1 36 64 96 +12082 PF12248 Methyltransf_FA Farnesoic acid 0-methyl transferase Assefa S, Gavin OL lg7 PFAM-B_2872 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 110 amino acids in length.Farnesoic acid O-methyl transferase (FAMeT) is the enzyme that catalyses the formation of methyl farnesoate (MF) from farnesoic acid (FA) in the biosynthetic pathway of juvenile hormone (JH). 21.40 21.40 21.70 21.50 20.90 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.15 0.72 -4.00 23 286 2009-08-07 15:52:06 2009-08-07 16:52:06 3 50 107 0 180 312 1 98.80 28 18.05 CHANGED HltLoss.......hPh-ss.hhEIVlGuapNTtSsIRppht........ssslscspoPslL.sshc.phahlp.apsGplpVsps...G.cstPFlpap..Dsp.....thslpYhuF.osWssssha ....................ltLoss........sp-..ssshhElhlGGapNspSsIRpsp..................................spslsp.s.pTssl.l.s.spEh+pFWIs..hpsGhlpVGps.....u...c..t.pshhpap..-sp.....shsl.pahGh.ouWss.s............................................................... 0 77 104 162 +12083 PF12249 AftA_C Arabinofuranosyltransferase A C terminal Assefa S, Gavin OL lg7 PFAM-B_2900 (release 23.0) Family This domain family is found in bacteria, and is typically between 179 and 190 amino acids in length. This family is the C terminal region of AftA. The enzyme catalyses the addition of the first key arabinofuranosyl residue from the sugar donor beta-D-arabinofuranosyl-1-monophosphoryldecaprenol to the galactan domain of the cell wall, thus priming the galactan for further elaboration by the arabinofuranosyltransferases. The C terminal region is predicted to be directed towards the periplasm. 25.00 25.00 86.80 86.20 19.80 18.60 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.08 0.71 -4.85 9 161 2009-08-07 15:52:31 2009-08-07 16:52:31 3 2 159 0 34 126 0 178.10 55 27.40 CHANGED sIPcsLcssIslAYTD..TDGpGpRAD+cPPuAspYYsplDcslpcpTG+sRscTVVLTsDhoFLSaYPYaGFQuhTSHYANPLA-FspRAstI-.....sWSpLpssschlAALcpsP....WpsPssFlhRpuu....ps.....uaTlcLApDlYPNpPNVRcYsVpFssulF..-sPtFslpplGPFVls.sRp .....................DIP-hLps-lslAYTD..TDG.GpRuDRRPPGuppYYspIDtsIpchT.G.+.tccTVVLTADauFLSYYPYaGFQuLTSHYANPLApF-cRAspI-.....SW.....up...L...p...os--FltALDphP....WpsPsVFlhRtuu....ps......................uYoLRLApDVYPNpPNVRRYsVphssslF..sDP+.FsVpp..lGPFVlslRp....... 0 6 22 31 +12084 PF12250 AftA_N Arabinofuranosyltransferase N terminal Assefa S, Gavin OL lg7 PFAM-B_2900 (release 23.0) Family This domain family is found in bacteria, and is typically between 430 and 441 amino acids in length. This family is the N terminal region of AftA. The enzyme catalyses the addition of the first key arabinofuranosyl residue from the sugar donor beta-D-arabinofuranosyl-1-monophosphoryldecaprenol to the galactan domain of the cell wall, thus priming the galactan for further elaboration by the arabinofuranosyltransferases. The N terminal region has been predicted to span 11 transmembrane regions. 22.80 22.80 116.60 36.00 22.70 22.70 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.58 0.70 -5.76 5 168 2009-08-07 15:53:10 2009-08-07 16:53:10 3 3 160 0 37 136 0 421.50 50 67.31 CHANGED LGQMllulVVAulVAsVuLhAIARVEWPAFNSSNQLHALTTVGQVuCLAGLsuuGLLWR+..G....R.R...plARLGohshlSAFoVVTLGMPLGATKLYLFGISVDQQFRTEYLTRLTDTAGLRDMTYIGLPPFYPAGWFWLGGRlAALTGTPAWEMFKPWAIlSIAIAlsVALVLWusMI.RFEYALVVolATTAlsLAYuSsEPYAAlITVLlPPsLVLAWuGLuu....pcpGGWAAVVGsGlFLGlAAoFYTLLlAYAAFTlslMALLlAuu.......hRRuaDPLlRLlVIAVlSGAIALlTWuPYLLtAhRGsPA-SGTAQHYLPc-GAcLsFPMlpFSLlGALChLGTlWLVsRARoSsRAGALAVGVlAVYLWSLLSMLsTLuGTTLLSFRLQPTLTVLLuAAGAFGFVEssts.....lutRhps..uRRVlusAsAVGAlGAl .....................................htlssAlllAssVulVuhhAlspVpWPAFsoSN.h+ALTTVGQlssL..sslsuhshlW++..s..............................R..R......hLspl.suhlhsuu...hsVsTLGhPLuATKLYLaGISVDQpFRTpYLTRLTDosuLpDMoYlsLPPFYPsGWFWlGGRhAsLhGhPuWEhFKPWAIhSlAlAsslulsLWtRhl.phchAhhlslATs.AlsLsauusEPYAA.lIs.lh...lPPhLV....LshpuLpu............................ttuWuAllGsGlFLGhuAoaYTLhsAhsAholllhAlllAuh.......................hct..u..h..cP.L........h.....RLsVlGsluhAIAhhsWhPYLltshps.s.susouoApHYLPs-GAtLshPMlphSllGslCLlGhlWLVhRu+sss.AsALulGVlulYLWsLLSMlsTLutTTLLuFRLpPsLolLLssAGshGhsEhsts.......hstp.......uR...t..lh.shsusl.uhhGul............................... 0 6 23 34 +12085 PF12251 zf-SNAP50_C snRNA-activating protein of 50kDa MW C terminal Assefa S, Gavin OL lg7 PFAM-B_2919 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 196 and 207 amino acids in length. There is a conserved CEH sequence motif. SNAP50 is part of the snRNA-activating protein complex which activates RNA polymerases II and III. There is a cysteine-histidine cluster which contains two possible zinc finger motifs. 23.30 23.30 23.30 25.70 23.10 23.20 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.89 0.71 -11.46 0.71 -4.93 20 210 2009-08-07 15:54:04 2009-08-07 16:54:04 3 7 163 0 144 212 4 183.30 30 46.58 CHANGED tpcahlLuoQsLs-L+DshhChsstph.h..............spsphpuuaFFIcGsFYsDhR.....sscshDhSpsIhpa......sppps.............shuc.hpstpM-csphsDLplplGpPhhahHQGsCEHhllhoslRllsptc...shccstYPhhshcsphppptC.hC+httAphllhs.sphtscsPuahCssCFcha+asssGccls......pFpsatYhcc ...............................................................................h.tphhlLusQpLs-L+Dtl.C.s-h.h................................................................hstphhpSuaFalcssF..Y..sDhR................spshDhSpsIhcW......tpppp............................................thsp..hp.....st..pMcps..pasDL.p.l+l..G...P..YlYhHQ.............GsCEHhllhsDl..R...h.......hp.pc.............s.s........pt.YPhhh..h.p.........h.t.tppC.lCphh.A...phls.s.cphtspsPsahCc.Cachh+hs.tpGphlh......pa.sh.h.................................................................... 0 62 85 120 +12086 PF12252 SidE Dot/Icm substrate protein Assefa S, Gavin OL lg7 PFAM-B_2926 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 397 and 1543 amino acids in length. This family is the SidE protein in the Dot/Icm pathway of Legionella pneumophila bacteria. There is little literature describing the family. 18.50 18.50 18.50 18.50 18.40 18.20 hmmbuild -o /dev/null HMM SEED 1439 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.27 0.69 -14.42 0.69 -7.26 7 111 2009-08-07 15:56:26 2009-08-07 16:56:26 3 4 15 0 19 89 0 795.40 33 87.88 CHANGED MPKYVEGVELTQEGMHAIFsRMGHusIoSGoIYNG.PTIDptALspQGFMPVLTGVGP+pDSGHWIMLIKGPGNQYYLFDPLGKoSGEGYQNILAsQLPhGSTLSVIPNsssLNhGLCGYWVASVGLRA+AtLNpcsPPsLhNLGQTITsEMRNELscDGYcKITsWLRAVuDEFP...cGDsQP.DAKALREsTEKs.L+I-IPs.V.PsKDpoPKElslcPTsPQ.s.VPsWNGFSLaTD-sV+sAApYAYDNYLGKPYTGsVEusPAshGGph........h..RQpHGLAHTLRTMAYAElIVEEARKAKLRGETLtKFKDGRTlADVTPEELKKIMIAQAFFVsGRDDEtSstcp......YtKYHEQSRDAFLKYVcDNcSsLIPDlFKDpc-VsFYApVIEDKsHDWsuoPAHVLINQGHMVDLVRVKQPPESaLEpYFsohpsWIGopAsEAVFuhQRQFFHATaEVVsGF......DS-NpEPHLVVsGLtRYVIGEDGpPIREt.cpupK..cG-LKsFsQsYKLK-NERFMRVDEaLKLPEVQpTFPGuGK+LEGGhPGhs.hsYapRLNShpRARCENDVsFCLtQLQpAHcKsKIDPIKpAFQSSpcKsRRQPNhDEIAAApIIQQIhANPDCIHDDHVLINGQKLEEpFFRDLLAKC-MAVVGSLLNDTDItNIDTLMRHEKDTEFHSTsscAVPVK.IG-tW..cpRIpspusslT...QhKHDLIFLMQNDAWYFSRVNAIAQNRDKGSoFKEVLITsLMTPLTsKuLhDTS+u.........sPp+LaRGL.sLsEEFsptLINQANshIANTTppLFTDhSsEAFKQIKLNDhSphSuRTsASTTTshpLspphWs.....SNVIFEMLDPDGLLHPKQVGpHssGoEsEFSVYLPEDVALVPlKVThD.GKTppGcsRalFThVAVKSPDFIPRHESGYAVEPFLRMQssKLsElpsuIEK..sp.t..................PphEsIFs.LQscluh.ph.sclSstYKsFLpppVsPVLEpCLsulhpsssshLsKALAsFPoDpQWSAFN.sEAhpAKtQMDAlKQMltpKVVL-.....ALTQCQ-ALEKQNIsGAL-ALKpIPuEKEhup...IupELREQIQus+Q-....LESLQRAssTPlVoDccKV+.RY-sLIpssoK+lT-LEKupLssLDslKKuIusLsNLpQElTlLRNEKlRMHosoDK.VDFSDIEpLEpQlQ.lcTKLsDAYLlplTKplSAL-p.hPKs.o..-lKohlupFhshhs-IEhLRNERIKKHGuSKDPLDhSDLDKLpGpLQtlNQSLVssLlpsIRsSlsQMc.spTFchQcctIppNh-hLscLEKoLDcScsucK.+EDlsKLpsLLlsKQ.KAYPpMlQLQh+SEshIpQLRElCpsHaDsLsKsRpARLQEL-+...puGI...lGNlhasl...TshlGLTsDEpl-I+hKpQoLARFKs-LhNDK.DhDpLIspLAcKpPSELQEuLGISc- .........................................................................................................................................................................................................................................................................D..VhtshpYsapphLtpsYs..Gs.....h..c.t..ss....t...t.h..........................R..HGLuHThRThhhuplhhEtu+.........tthtsGpolADlo.p-l+Kl.IAQhFFVsGR-sEtS.hs....................YtcYH..utptF.cYsccp.....sclFp.pc-lphYuthI.Dc.tp.atsostt.hLlp.uHMlDLhRsKtP.Eshlt.........h.ph...hG..ss.slhthtRthFhAThtsVs.h......ssp.........sa.s..s................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pp.p...p.Ll.th.pplh..tp.t....t..pp.h......................................................................................................................................................................................................................................................................................................................................................................................................................... 0 8 8 19 +12087 PF12253 CAF1A CAF1B; Chromatin assembly factor 1 subunit A Wood V, Coggill P pcc manual Family The CAF-1 or chromatin assembly factor-1 consists of three subunits, and this is the first, or A [1]. The A domain is uniquely required for the progression of S phase in mouse cells [2], independent of its ability to promote histone deposition [1] but dependent on its ability to interact with HP1 - heterochromatin protein 1-rich heterochromatin domains next to centromeres that are crucial for chromosome segregation during mitosis. This HP1-CAF-1 interaction module functions as a built-in replication control for heterochromatin, which, like a control barrier, has an impact on S-phase progression in addition to DNA-based checkpoints [2]. 21.20 20.90 21.30 20.90 21.10 20.70 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.90 0.72 -3.86 32 308 2009-08-11 15:32:32 2009-08-11 16:32:32 3 13 258 0 216 310 1 80.70 39 11.05 CHANGED hKhlpFtEssRPPYhGTao+..........hh.h..+sPhppchsshDY-YDSDtEW.....EE..-E-G...E...-l...-.s........----.....---...-.-...p--...........-hDs..F ........................KhLpF..p.EshRPsYaG..Tas+................p...sthlhsRsPhsp.....-.......s....lDY......-hDSDtEW.....EE......-.E.G...E.....sl.c.s........--.--.....-.--......c..-....s--.......-.Ds.......................................... 0 70 115 177 +12088 PF12254 DNA_pol_alpha_N DNA polymerase alpha subunit p180 N terminal Assefa S, Gavin OL lg7 PFAM-B_2966 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00136, Pfam:PF08996, Pfam:PF03104. This family is the N terminal of DNA polymerase alpha subunit p180 protein. The N terminal contains the catalytic region of the alpha subunit. 20.60 20.60 21.30 20.60 19.80 20.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.36 0.72 -4.16 37 311 2009-08-11 15:36:44 2009-08-11 16:36:44 3 14 261 0 202 307 3 65.20 42 4.65 CHANGED LpcL+thRputppttsth..csc...-tp...pIYDEVDE--YcclhcpRhtp.DDFlVDD...s...G.GYsDpGt--.W-c ................................LtpL+thRputpppt.ph.........-V-......-.p...slY-EVDE-pYp+lVcpR.pp.DDFlVDD..................s...G..GYsDDGcE..a-.............. 0 61 103 161 +12089 PF12255 TcdB_toxin_midC Insecticide toxin TcdB middle/C-terminal region Assefa S, Gavin OL lg7 PFAM-B_3032 (release 23.0) Family This domain family is found in bacteria, and is approximately 150 amino acids in length. The family is found in association with Pfam:PF03534. This family is the C-terminal-sided middle region of the bacterial insecticide toxin TcdB. 25.00 25.00 29.90 28.90 23.60 22.50 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.77 0.71 -4.54 22 147 2009-08-11 15:37:11 2009-08-11 16:37:11 3 14 112 0 49 132 6 142.80 38 8.34 CHANGED hhtRALKGplLRoElYGhDso.........tptshPYoVopsRhpVR.lpstss..p..slhhshslEshoapYE.RlssDPpsspplsLpsDpaGpsLcolslsYPRRspst.........s.a....ss.hspthhss..ShD-pQptL+lstsppohpHlspsps.......hhLGLP .....htRALKGplLRoElYGLDso.........p.sshPYoVspsRYpVR.lQssss.....p......................s...VhhshslEploYpYE......R.ls............sD.PpsspplsLpsDpYGpsL+oVsIsYPRRspss.........ssa.....s.hspthhssphDppQptl+lspppppahp..ssp.....ahLGLP................................ 0 15 27 41 +12090 PF12256 TcdB_toxin_midN Insecticide toxin TcdB middle/N-terminal region Assefa S, Gavin OL lg7 PFAM-B_3032 (release 23.0) Family This domain family is found in bacteria and archaea, and is typically between 164 and 180 amino acids in length. The family is found in association with Pfam:PF05593. This family is the N-terminal-sided middle region of the bacterial insecticide toxin TcdB. This region appears related to the FG-GAP repeat Pfam:PF01839. 24.80 24.80 24.80 25.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.20 0.71 -4.82 29 294 2012-10-05 17:30:43 2009-08-11 16:37:22 3 55 209 0 101 298 21 169.80 24 8.84 CHANGED spsGstau..pshplshPsssphs.t.pplphADlpG.Ghsslll.ohsth..........ts...tpahhthstt......p....PhLLsslsNshGspsplpYt....oSsphhLc-ptt.t.t....ssspLPhshpllpchshpDtl....s.Gsp...hsppapYppGhaDshEREFpGFuplpppDssss...................ssh.spsaatTG ......................................................................................................hs.hht.s......phplsDlpGpGhssllh...sssp.............p.......phhhh.shsss...............................+PhLLsslssshGupsplpYc............................s.Ssph....h..hs.-p....tt.tt...........................sspLPh.s.lpllsp.......s.pp..p.Dtl...........s..Gss.........hsppapYpp.G..haDstpR.-FhGFupVpppDhsst...........................psssh.spphah..................................................................... 0 40 59 86 +12091 PF12257 DUF3608 Protein of unknown function (DUF3608) Assefa S, Gavin OL lg7 PFAM-B_3083 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 280 amino acids in length. The family is found in association with Pfam:PF00610. 25.00 25.00 25.30 25.30 23.20 24.10 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.71 0.70 -5.50 9 344 2009-08-11 15:40:12 2009-08-11 16:40:12 3 8 247 0 223 351 9 242.40 40 18.31 CHANGED VElalKDs.LsRssMWshSopLlsoCVahspRlsaLsshpusVKuIY+NGKKlhSGYIs-NTKIlFRSESA+llFllQlocEMWHFEEsGEhhFHKlVNSLFPKIF++W+-hsTHHoITIVhhsSlDhos.sassLs.GER.pNpcDYFRlVVDQVslhaW-cIMtsLRhEFhphp+Dlh.pp....pDssthshcGphhPslKuNlLEsINluhTllsD.F+ssDL+HTssHlIlloPGoGLaDV-YDLLh.To+KhhSl-hulDlICLSpPPLHlVPLFRY ..................................................................VElsh+DpalsRuDMWRl.t.p.p.LsspssYhs..p+l.......a.h....s...l+.sps.......t.pla..h...p.....s.........c.+V........h........sGah.s.ppT+.lFRSpSA..hhhlFIQ.........hSpEMW-....F.........-......................G................-lhFpK...slNuFLsc.LF.........p+Wtphs.....s..p..H.lT.lVL.....F.......s........p...h.....t...........................s...................t.............................t.............h........t.........p.........l.........t.......................s...................p.........t..........p.................h........c........DFY+V.V.Vp..p.t.p..t...cW.s...s.lLhpl++.F.hpa..h.............lhh..........p.............................................................................................h......t......ups.......Ss.....u.h.....p.GNhLEAl..........Nluhs....hhsccalsRshsRTGp..l..VITPGsGlF..-V....D.hpLh....hlTpp+hl.sp.Gl.G.lDLlChuc.PLHsVPLF+a.............................. 0 86 125 187 +12092 PF12258 Microcephalin Microcephalin protein Assefa S, Gavin OL lg7 PFAM-B_3105 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 384 and 835 amino acids in length. Microcephalin is involved in determining the size of the brain in animals. It is a protein, which if expressed homozygously causes the organism to have the condition microcephaly. Organisms expressing the mutated form of this protein in a homozygous manner develop a condition called microcephaly - a drastically reduced brain mass and volume. Microcephalin is predicted to contain three BRCA1 C-terminal domains, the first of which is the probable microcephaly mutation site. 25.00 25.00 26.10 26.10 19.30 19.20 hmmbuild -o /dev/null HMM SEED 391 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.34 0.70 -5.48 6 148 2009-08-11 15:40:18 2009-08-11 16:40:18 3 5 90 0 19 138 0 368.30 56 71.39 CHANGED ESFAuGLpSSFDDLCGtouCGsQEcKLtt.ls-hpoDsChSSPVLKsuSlaSSAussaLsQLoPQKssusLSKpElstQRDsAGclVTPDpKQutGlupEshDEKasLSPThSuoKs+.hsHSps+uSSAKRKRsu-hSpSuPcE+LK.KRspt+ushPplQLaKS-spLppsstsAscu.usttSSYDDYFSPDNLKERsSEsL.PtsQ.suuPAtFpC.RuLSKpERpsIL-MuDFSClGK+sRols.loslhAKosSSLpKPupscusAshuClhS.tTuAs--oPGsCsQAGs.tt-DspPtGsutspTh-s.Ahs.....cG.cGDloPLcGuSpEh+EssDs+STQKEGssscspsSutGEsQs-sc.uFusDsss-pSsp-KEslupGhSt ....-.FAGsLHSSFDDLCGsSGCGsQERKLGG.ls-hKSDsChSShVLKsssl+sSsS.saLsp.oPQKhhusLSKEElshQR.shAGclVTPDpKQAtshSptsF-EKhpLSPThSuTK.s+hhh+S.p...PpSSSsK......RKRVStsSpS.PPKE+hK+KRSlR+shhPRLQLh+SEsuhppsuusA.lEsLusGESSYDDYFSPDNLKERs...SEsL.Pt.sQ.Po.uPA.hpC.R.SLSK+ERTSlhEMuDFSClG+psRol-.hTs.hTAKohsS..QKsuN..scucsshSsVTSccTsAsE-o.tsCtQAsspt+-DA.pP.tGssh.saTl-s.sh.............KGpcGDhTP.LcGS.cE....h....KEhlslKST.....Q.p.c.GTs.SKhsNSuEGE.....AQS-cc...sFlsDsshEpSsE.E+EsLPtG......................... 0 1 1 4 +12093 PF12259 DUF3609 Protein of unknown function (DUF3609) Assefa S, Gavin OL lg7 PFAM-B_3173 (release 23.0) Family This domain family is found in eukaryotes and viruses, and is typically between 348 and 360 amino acids in length. 24.10 24.10 24.30 25.60 23.80 24.00 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -12.01 0.70 -5.78 7 125 2009-08-11 15:40:30 2009-08-11 16:40:30 3 5 70 0 28 124 0 295.00 28 53.39 CHANGED LthLGspL+popsAIhEAlhSAaQspLSPhVLolpQLptch..h.ucLspthtL.hpp.oISDIYplAols.stQhsNalVFpIpVPLlDsEpFNlYRLTPIPR.lsNGpIQLl-TETPYLGIsDHLDRYFPLQN..LDDClcLssERalCc.spITYGssDcohsCoLAAIRNpoSpsCThRpVpcpSlWT.hlAPNSWMVALoKELoLhGVCSs-cQEL+INGSGILoIpSDClVRSssVTLQGpspKthPS+puYASLQhssposccsshh-SFspLhpIlspLchpQcp.psltsh.hshlsVCPsllLIsLLlShsWhY+saRp+t.....sQpPh..Vssh.sspNcspTsshPLLEKpEl .................................................................................lpcAl..sA.pp.s+Lsshllo.ppL.sch..l...s.cLs..................t..p.....s...h...+sl...h.h..lss.....sh.....h.pt....ss.plhF.IpVPLl-s..pp.FslY+lsPIP..p....hsNs.....p.........h.p..ll.sco..Y..lG........l..ssc..h..c..p..Yh.L..ps..hs..sChcls.p.cphlChps.p.lhassssssh...s...Cs..l.......t......hh..+.s.....p....s.....s.......psCs.l.R.t.sc..p.p.p.lahphss..sN.sWh.h.slsc.-lsLhss..Cu..s..p...p.....p...pl.p..l..s.GoG.lL..o.l..pssChl+.......ostss......lp...............................................................................................................................................................................................................h.......................................................................................................................... 0 14 17 25 +12094 PF12260 PIP49_C Protein-kinase domain of FAM69 Assefa S, Gavin OL, Coggill P lg7 PFAM-B_3196 (release 23.0) Family This is the C-terminal region of a family of FAM69 proteins from Metazoa and Viridiplantae that are active protein-kinases. The family members have a short transmembrane helix close to the N-terminus, and thereafter are highly enriched with cysteines. FAM69 proteins are localised to the endoplasmic reticulum. Many members also have a short EF-hand, calcium-binding, domain just upstream of the kinase domain. The exact function of the more N-terminal family is uncertain. 25.20 25.20 25.20 25.40 25.10 24.90 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.47 0.71 -4.88 36 397 2012-10-02 22:05:25 2009-08-11 16:45:28 3 7 89 0 254 354 0 186.10 26 48.34 CHANGED lWsLLp....ps.Eal...lh.lhpc.p.-...hhP+lLGsCGchassEplshsphht..................hpphhpssWtt+u+lAltLL-hl-cl.pps..p..hahCDlshpsFGls.schc...lp..hlDhcplhscsphcphlpp.ppCpp.....s........pDCsa.hDCh.o.tCsh.pppCssphh..psN..LthlCp.p.l......hshh..........................LpGs........Ps...plppc...LpchLppCsp. ............................................hasLh.....s.E.l.lh..h.p.pc.......hs+hlG.C......G....c....hh..ssphls..p.h........................................................h.ph.h.s.ss.Wpp+s.clAlpLl-hlcpl.tpsshs..hhhsDh.....s.....hcsFuls....sc....hc.....lhhhDhcplhscsp.pphlp..t.ppCps.....s.........................pDCsh...hsCh....u...h...C..s...ptpCss.sh..ppN..LhtsCp.h.l...hshh...............................L..ss........Ps......plppp...Lpp.lppCht..................................................... 0 58 79 148 +12095 PF12261 T_hemolysin Thermostable hemolysin Assefa S, Gavin OL lg7 PFAM-B_3198 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 200 and 228 amino acids in length. T_hemolysin is a pore-forming toxin of bacteria, able to lyse erythrocytes from a number of mammalian species. 20.70 20.70 21.40 21.10 20.10 19.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.92 0.71 -4.73 33 192 2009-09-11 08:17:19 2009-08-11 16:45:50 3 2 185 0 57 157 33 174.30 36 83.54 CHANGED hthhpsscstRpplEpFIpppatpsasAclppFhPtLLul....sspupl..hAssGhRhAsppsLFLEpYLDpPlEphluphhsts.lsRspllElGsLAoh.s.sGsuphlhhslsthLhspGhcWllFTuTctLpshhpRLGLpspsLusAcss+L.scssspWGoYYcppPpVhuuslsp.uhptL ............................h.hhtsscsphscs.phlcpcYppsasAclptaMPshLuL....hs.cst...h..tussGhRh.A....p....p.pPLFLEQYLDpPs-pllupthups.lsRspllEhGpLASh.u..pGhu...thhFhhhsphLsshGa-WslFTATcsL+shhp.RhGLp.phlApAsssRl.ssssphWGoYYpppPplhAGsLspGhpt.h............ 0 13 26 43 +12096 PF12262 Lipase_bact_N Bacterial virulence factor lipase N-terminal Assefa S lg7 PFAM-B_3205 (release 23.0) Family This domain family is found in bacteria, and is typically between 258 and 271 amino acids in length. There are two conserved sequence motifs: DGT and DGWST. This family is the N-terminal region of bacterial virulence factor lipase. The N-terminal region contains a potential signalling sequence. 20.10 20.10 20.20 20.10 20.00 19.80 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.89 0.70 -5.17 8 221 2009-08-11 15:51:27 2009-08-11 16:51:27 3 6 188 0 50 183 24 250.40 33 33.22 CHANGED MK+p.hhhhllsSALhLuGCGD-opooGssTpss...pltpuLptETpIsFsLpu..ussslshPoYLhMDspDGTLpls...os..sssuloNPcsAMGphDGWSTo.PIslsF..pGssLssushsuulhlIKloschT....D.toss.p.lLs.ssDaslssoGsS..LhIlhLKsLssSSpYIFAVTsclpDhNGsPVGTSsSYAsLKSsshs.ssuuLsssQKVTQuoEulFutu...GVspssIVYSoWFoTQSlGDsLaAsKusTAsul..us...phssVW .............................................MKph.lhhshlsS..AlhLuGCG.s....pos.s.p.usssp..........t..l..t..op.lpFshtu.....tsup....lPlPs.hLh....s...sp...DG....T...LplP..........ss.........sss..s.......h.sNP.h.sA.hu..t..l..DGWus.o.hP..l.l..sh................pG....s......sL...s......ss....h..............h....s....u......u.....lhlhE..l.o...s.hs........................ss..s....s..lt........t......h.......Ls.G...sD.as.......s.s.u....Su.ss........lh.IhPhKsLssuSpYlhAlTsp....lpDusG.pslusSsoYuslK..s..c............sh.....h..p..pu.......s....t...slQ.tlsp.u....hEp.l....h.t.hu..............uV.s.p..spIlYSsaFoTQS.lussLh..As+ushAssh.......................................................................... 1 14 24 39 +12097 PF12263 DUF3611 Protein of unknown function (DUF3611) Assefa S, Gavin OL lg7 PFAM-B_3207 (release 23.0) Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 180 and 205 amino acids in length. There are two completely conserved residues (W and G) that may be functionally important. 24.90 24.90 25.20 25.00 24.80 24.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.13 0.71 -4.89 21 164 2009-08-11 15:51:34 2009-08-11 16:51:34 3 4 109 0 74 156 131 175.60 31 80.30 CHANGED ssss.slp+hApslRhhGWIuFWhQlVLulVou.llLlFuslhsspstsspps.Gh...s.GlhhshhullsLhhSlahuatYsRlucpLp.t..ssstpPs+u-ss+hl+hGLllsLlGhhhullGhpAhsGhLluKulo.pssGh.....shssssphlp.uLDlhllhANssslhAHFlGllsSLWLLpplpp ................plpphutth+phGhluFWhQllLulVuuslLhFu.s...h.hsspsss...............st...........s.................s.ulhhsssGllslhhSlahuatY.hRlucpLp...ts..ssst.p.P...s+u-sh.+hl+hGlllNllGhhhullGhpAhlGsLluKulo..pssuh............shssssp..lp.uLDlhlV..AsssslhuHFlGllhSLaLLppl......................... 0 18 50 69 +12098 PF12264 Waikav_capsid_1 VSL_protease; Peptidase_C03; Waikavirus capsid protein 1 Assefa S, Gavin OL, Eberhardt R lg7 PFAM-B_3208 (release 23.0) Family The rice tungro spherical waikavirus polyprotein is cleaved into 7 proteins, including three capsid proteins, by the tungro spherical virus-type peptidase Pfam:PF12381. This family represents the capsid protein 1 [1,2]. 24.40 24.40 25.50 80.00 24.30 23.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.29 0.71 -5.02 3 54 2009-08-11 15:52:48 2009-08-11 16:52:48 3 5 5 0 0 49 0 192.40 90 27.27 CHANGED pEAFQDsEsRssDPNhSDMaNAlpuEYLVKSFoWKVSDGQDKVLuslsLPpDLWsoNSRLsDIMSYFQYYcATGlTFRITTTCIPMHGGTLhAAWDAsGCATRQGIATsVQLTGLPuhhIEAHSSSlTTloVcsPsIQSsICLSGSEHSFGRlGILKICCLNVLNAPpAATQpVuVNVWVKFDGVKFHaYSL+KsPV .QEAHQDSSVAAAGP..TDEHNAMLQKIYLGSFKWKVSDGGGSILKTFSLPSDIWAANDRMKNFLSYFQYYTCEGMTFTLTITSIGLHGGTLLVAWDALSSATRRGIVSMIQLSNLPSMTLHASGSSIGTLTVTSPAIQHQICTSGSEGSIANLGSLVISVANVLCADSASAQELNVNAWVQFDKPKLSYWTAQHoI.. 0 0 0 0 +12099 PF12265 CAF1C_H4-bd Histone-binding protein RBBP4 or subunit C of CAF1 complex Wood V, Coggill P pcc Pfam-B_318 (release 23.0) Family The CAF-1 complex is a conserved heterotrimeric protein complex that promotes histone H3 and H4 deposition onto newly synthesized DNA during replication or DNA repair; specifically it facilitates replication-dependent nucleosome assembly with the major histone H3 (H3.1). This domain is an alpha helix which sits just upstream of the WD40 seven-bladed beta-propeller in the human RbAp46 protein. RbAp46 folds into the beta-propeller and binds histone H4 in a groove formed between this N-terminal helix and an extended loop inserted into blade six [1]. 22.70 22.70 23.80 23.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.65 0.72 -4.08 93 1002 2009-08-11 15:53:19 2009-08-11 16:53:19 3 13 342 12 648 938 11 71.70 33 16.24 CHANGED p-chc.lWp....cssshlY-hlhstslpWPoLohcalPDth........sssp........tsppllhGTps......ss.......ppNhlhlhclphssp .................EcaclW+....cssPhlYDhl.hspsLpWP.oLospalPDhp.....................sspt...........t...tsphLllGTpT.....ust..........cpNpLhlhplph.p.p............................ 0 212 360 531 +12100 PF12266 DUF3613 Protein of unknown function (DUF3613) Assefa S, Gavin OL lg7 PFAM-B_3240 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 126 amino acids in length. 25.00 25.00 25.10 25.10 24.90 22.80 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.34 0.72 -4.46 18 138 2009-08-11 15:54:42 2009-08-11 16:54:42 3 1 135 0 44 132 2 67.80 34 59.51 CHANGED Asps........ssuuc.hscuTcuWLtLQuss+AAu.ssQshsusptsLuhQRYL-SaKacIP-.haspptu .........................ss..........ssssssp.hscuTcshLslQuS.G+tAu.ssQPhpGspusLuYQRYL-SFcpcIP-.aatpt..h...... 0 6 15 29 +12101 PF12267 DUF3614 Protein of unknown function (DUF3614) Assefa S, Gavin OL lg7 PFAM-B_3244 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 162 and 495 amino acids in length. 25.00 25.00 35.60 35.50 20.10 19.80 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.03 0.71 -3.98 7 151 2009-08-11 15:54:47 2009-08-11 16:54:47 3 3 3 0 0 100 0 156.20 75 20.85 CHANGED sFsuRhP......+ht+chsAohhcsuhuhVspuph....s.sps.shhp+Scsu....phP.sphssspsssAh.uss...VchsssSsssss...Rts.S.Pps........l+sush.lsVuuss+c.hsKlR-shhp.phGt.hpFS.F.SuT.....VhNsRssppsah...................hVhhLR.souupF.hlLh ...hsuRhP......RYEHHYDASMKGAGLSWVRKSQF....PNVEV.IRRRRSRASGSSASFPDANLQVSSDRP...SES...VQVVPMLDEGSSPP.RGVsS..PRR......DPVEGSGW.YSVGSPSR+.SSTsRGQRSA.ERGKIGDFS.FPSGT.....VYNARVDERAYK...................RVLKLRDTSASSFLRIV... 0 0 0 0 +12102 PF12268 DUF3612 Protein of unknown function (DUF3612) Assefa S, Gavin OL lg7 PFAM-B_3220 (release 23.0) Family This domain family is found in bacteria, and is approximately 180 amino acids in length. The family is found in association with Pfam:PF01381. 25.00 25.00 91.20 91.00 20.00 16.80 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.28 0.71 -4.49 6 144 2009-08-11 15:56:56 2009-08-11 16:56:56 3 6 142 0 29 92 29 177.60 80 36.16 CHANGED TsVSPYPHWHYFDAYuPGKLKAVYRGNGIPLPWGNMRtVcDPCQHWAVFRMIupsssGSSAQISILsVuspPRIYCCESIKVpD.AGNsHVLCAGIDLNPAI-AQGsDAlulAcpLKshCVusGGSusIPppIKp-LhSlA+ILNINWIERGIpspARLICSRGAVCPRpPSCYt....sCs ...TVVSPYPHWHYFDAYG.GKLKAVYRGNGIPLPWGNMRKVNDPCQHWAVFRRLSpPp....pGSSAQISILsVGDEPRIYCCESlNVhDPAGNNRVLCAGIDLNPAIsAQGGDALuIApELKhuCVppGGoosIPtsIK+DLpTIAKILNINWlERGI-s-ARLICSRGAVCPRpPpCYucC.G..... 0 4 9 21 +12103 PF12269 zf-CpG_bind_C CpG binding protein zinc finger C terminal domain Assefa S, Gavin OL lg7 PFAM-B_3253 (release 23.0) Domain This domain family is found in eukaryotes, and is approximately 240 amino acids in length. This domain is the zinc finger domain of a CpG binding DNA methyltransferase protein. It contains a CxxC motif which forms the zinc finger and binds to DNA. 21.70 21.70 23.40 23.40 20.80 21.30 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.66 0.70 -4.88 6 134 2009-08-11 15:57:41 2009-08-11 16:57:41 3 9 86 0 79 130 0 213.60 55 40.04 CHANGED AspRIhplLPQRlQpWshoPshAsEps+ctL-cIR++Qp.sRspLtELE++apcL-slIpRA+cpolspppsp......-ss-psD.-.sIaCVTCGHpIssRsAl+HMEKCatKhESQsSFGShaKTRI-G.ssLFCDhYNsQu+TYCKRLRVLCPEHoK-PKlPsT-VCGCPLVpNVFE.T.....G-FCRAPK+pChKHYsWEKlRRAEIDLERVRQWlKhDELhEQERtl+pAhosRAGlLGLh .........................................................................................................................AssRIaplLPQRIQpWp.o.PClAEEps++hLE.cIR+cQptsRhcLp-hE+RacEL-slI.+AKpptlp..pp.c-s......s-s-.scD...s-hplaCloCGH.slss+sAl+...HME+CasKYESQsSFGSha.TRIEG..splFCDhYNPpSKTYCKRLpVLCPEHo+DPKV.........s.s.sEVCGCPLV....p...s....V.....F-hT.................G-FCRhsK+pCs+HYsWEKLRRAElDLERVR...hWhKLDELhEQERslRpAMosRAGLLuLM............. 0 27 32 57 +12104 PF12270 Cyt_c_ox_IV Cytochrome c oxidase subunit IV Assefa S, Gavin OL lg7 PFAM-B_3280 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. This family is the fourth subunit of the cytochrome c oxidase complex. This subunit does not have a catalytic capacity but instead, is required for assembly and/or stability of the complex. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.94 0.71 -4.42 24 381 2009-08-11 15:57:58 2009-08-11 16:57:58 3 3 366 0 107 227 117 134.00 43 95.16 CHANGED M+spu+lFhhlssFFhhsullYshhoth..........EWsGosALhLusuLuhhlGsYLthsuRRl....shtPEDpp-AEIsDGAG-lGFFSPtSaWPlhlAsusulsslGlA.hhh.WllhlGsshllhulsGhlFEYapGscp+ .....................MKhEu+lFthluhFallsullYhhhop...................EhAGTsuLsLouuhuLhluhYlpFsA+Rl.....-sRPED.c-AElsDGAGplGFFSPpShWPlhhAlusAlhslGls.hhh....WLhhlGls.h.llhussGhsFEYahGsptH........ 1 34 84 102 +12105 PF12271 Chs3p Chitin synthase III catalytic subunit Assefa S, Gavin OL lg7 PFAM-B_3298 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 288 and 332 amino acids in length. This family is the catalytic domain of chitin synthase III. Chitin is a major component of fungal cell walls and this enzyme is responsible for its formation. 25.10 25.10 35.60 35.00 23.80 23.40 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.81 0.70 -5.44 15 224 2009-08-11 15:59:44 2009-08-11 16:59:44 3 4 136 0 173 213 0 274.80 39 90.17 CHANGED MsFGsFsoICccAuLPLCulVGs.s.p.ssspGIhPsCYuRsIELANTh.IFQhusshlHlsALlholIhIl+VRuKaTAVGR+ElloFFYlYhhLplsSLllDuGVsPPuSssasYFsAVQhGlsSAlChCLLlNGhVGFQLaEDGTslSlWhLRlsShlhFlloFhIuLsTFKuWsG......LusspTlGLFVlhYllNAlhLslYlVhQllLVlpsLp-pWPLGtIuhGlFFFlAGQVlhYsFSssICcussHYlDGLFFATlCNLLAVMMlYKaWDSITc-DLEFSVGshpssa ...........................tFGsFpshCpc.....s.....sLPlCsll.............ss........t.................................................t......t......................Gh.ssChhR........sIplus.sh...lhphushhlphhAlhhsllhlh+scpKhsAV.GR.......+EhhhFhhhYhllplhplhlsuGl.P.suusshsaFsAlphGhhuAsshsLLlNGhVGFQ..lhEDGTslSlhLlphouhshFlsohhluLsThh.uWsG..........husspsluLaVlhhllshlhLhlYhlhQllLVlpsLp-hWPhu......tlshushhFshGQVhhYshSspICpusp+YlDGhFFuTlhsLLuVhMVYKaWcSITc-DhEhsVu.................... 0 48 94 146 +12106 PF12272 DUF3610 Protein of unknown function (DUF3610) Assefa S, Gavin OL lg7 PFAM-B_3173 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 146 and 160 amino acids in length. There are two conserved sequence motifs: FNN and IDS. 25.00 25.00 55.00 50.50 22.40 21.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -4.57 10 53 2009-08-11 16:00:08 2009-08-11 17:00:08 3 2 17 0 4 53 0 156.20 59 30.09 CHANGED MpupohhFlLlslLI...uLSHuosVcp-css..uPMuM+GFNNSLGTFVEYuGQASlAocDWplpsSFNL-SLhsuIpsFp..plYKsLhDhCchp.pshCP-hh..DlTcFADoILpDGLlcsccsL-a+.....ltRLS.u---hup.hpNsoSsIDSol..NllsVulhE ...MKAIuLsFILlPlLl...ALSHGSPVsR.-AIG..GPMAMRGFNNSLGTFVEYSGQASLAS+DWcLCASFNLESLYsAIhAFN..uVYKslVD.C-cQ.ps.CPEIh.....sITcFA-SILHDGLhDLEsALcaR..........................AGRLs.u.-.DDVupELtMuTSCIDSSI..NVINVuL........ 0 1 1 2 +12107 PF12273 RCR Chitin synthesis regulation, resistance to Congo red Wood V, Coggill P pcc Pfam-B_2935 (release 23.0) Family RCR proteins are ER membrane proteins that regulate chitin deposition in fungal cell walls. Although chitin, a linear polymer of beta-1,4-linked N-acetylglucosamine, constitutes only 2% of the cell wall it plays a vital role in the overall protection of the cell wall against stress, noxious chemicals and osmotic pressure changes. Congo red is a cell wall-disrupting benzidine-type dye extensively used in many cell wall mutant studies that specifically targets chitin in yeast cells and inhibits growth. RCR proteins render the yeasts resistant to Congo red by diminishing the content of chitin in the cell wall [1]. RCR proteins are probably regulating chitin synthase III interact directly with ubiquitin ligase Rsp5, and the VPEY motif is necessary for this, via interaction with the WW domains of Rsp5 [2]. 25.10 25.10 25.10 25.10 25.00 24.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.25 0.71 -3.59 29 193 2009-09-11 15:13:23 2009-08-12 12:50:30 3 4 119 0 142 179 1 129.30 23 66.82 CHANGED RWlhaslhlhslllhhhhhhphNpRRR+p.GhpPhhGTuWhs...PPsY.tQppp................pYst............hsP..sttss.t.s.hGY.Yspputa.ss.p.................h.lt.P.psh........ths.tth..hts..............................PsGPP...P ................RWhhhslh.l.lshllllhhhhph.s+RR++....p...Gh...p.....PhhG..TuWhs........sPsa..tps..pp..........................pYsts.t..st...............thsPt.Ystsss...tts.......hGh.Yst.p.sphtsssps.................................ht.P.....ts....................................................................................................................................... 0 27 69 121 +12108 PF12274 DUF3615 Protein of unknown function (DUF3615) Assefa S lg7 PFAM-B_3306 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is typically between 86 and 97 amino acids in length. There is a conserved FAE sequence motif. There is a single completely conserved residue F that may be functionally important. 21.00 21.00 21.90 21.00 20.90 20.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.74 0.72 -3.65 18 304 2009-08-12 14:43:54 2009-08-12 15:43:54 3 10 8 0 163 247 0 88.00 22 24.14 CHANGED Yspsps.......shpY-Lhclhppshhh-ptth..apHhNFhupp+s...pt.shs.phFFAElc........stpsppVhsCs.lcsscs.....GpCauCtspsh....LcHP ...............................................t.thch.ph.ht..p..h..........p....p..tth.....a.tHlNFhApscs.............s......ss.........pLFFAElp.........................ptppt.th....hCs..lpssps.................stCh.hC..tt.....hhHP.......................................... 0 0 25 96 +12109 PF12275 DUF3616 Protein of unknown function (DUF3616) Assefa S, Gavin OL lg7 PFAM-B_3338 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 335 and 392 amino acids in length. There is a conserved GLRGPV sequence motif. 20.60 20.60 22.80 21.50 20.00 19.60 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.12 0.70 -5.43 18 148 2009-08-12 14:49:12 2009-08-12 15:49:12 3 15 126 0 70 162 13 258.00 25 53.17 CHANGED -hSusshssctpLWl.uuDEs.sslERLshhsss....hhssH.ppFsLu-hh-LPss..st.....EhDlEGL..Al.usGYLWlsGSHSlKRKss+spcsp.cshpcLuclph-sNRtlLuRlPl.spsssh.psspshcstp.......htAthlthpptt.s.LtphLusDPpLusFl...sIPuKDN..GFDIEGLAVps.p....RlhLGLRGPVLR.GaAlllElcsppssss..hLtLtshttpGp...hY+KHFLDLsGLGIRDL...phsGD.DLllLAGPTMc..L-Gsl+la+Wpssss...tp......tstlpphhsLPatpGsD+AEGlshhs......tsssplLlVYDSPstpRcss.ssslhuD ................................................................................uh....t..hhl.stD-...th.plh...t..........h..p....h.h.t...........tt.........DlEuh..sh..tt.......shlahhsSHu.p+pt.c....................p.sR..hlh.ph.l...t..............................................L.hphlttc..tht.hh...........thPs...ccs....G.hsIEGl..s..hss..p....plhlGhR....uP....l....h...............c....G.....hAh.ll.lps.t..ph.p...........htht.h................hchhhLcL.s.G....h.....GIR-l...phs..ss..chh.llAG.P....shs........s..Gs..hplapWsus.s.........t...........t........h.l.....s.......t.t......psEuls.h.......tt..phllh.Dts....................................................................... 0 20 49 63 +12110 PF12276 DUF3617 Protein of unknown function (DUF3617) Assefa S, Gavin OL lg7 PFAM-B_3343 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 155 and 179 amino acids in length. There is a single completely conserved residue C that may be functionally important. 20.70 20.70 21.00 21.10 20.00 20.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.24 0.71 -4.78 62 339 2009-08-12 14:49:20 2009-08-12 15:49:20 3 2 307 0 107 263 20 153.30 24 93.08 CHANGED Mpphhhhhhhhhhh........................sssssssstpl.....csGLWEhsspht.........................................sth.....shptClotppsct.pt.........hspsppsCphpshppsusphshshsCsssth....ssspstssshssssassphshpsssst.t...hshphsh..pu+alG.sCt .......................................................Mpphh.shhhsshh..........................huhssshA.sl.......................pPG..Wchpshph.....................................pth....t.shhs...........p...shssthChTsctscssct........hsssssuCops..hscp..o.ssplhhchsC...sssss...t..upuh..spssthSss..cFshshpusssusutt......tsst.ch..cupalG.sCs........................... 0 26 70 91 +12111 PF12277 DUF3618 Protein of unknown function (DUF3618) Assefa S, Gavin OL lg7 PFAM-B_3350 (release 23.0) Family This domain family is found in bacteria, and is approximately 50 amino acids in length. 21.70 21.70 21.80 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -7.93 0.72 -3.93 61 612 2009-08-12 14:49:29 2009-08-12 15:49:29 3 6 483 0 202 459 7 48.20 30 32.03 CHANGED hscs.ssplcp-I-psRscLupTlDpLss+hs.Pcplsccttsps+spssp ...........t+s.sspIcp-I-psRspLAsTl-pLsp+ss.Pppls....c-stspspshht.t......................... 0 71 147 182 +12112 PF12278 SDP_N Sex determination protein N terminal Assefa S, Gavin OL lg7 PFAM-B_3366 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 168 and 410 amino acids in length. This family is the N terminal end of the sex determination protein of many different animals. It plays a role in the gender determination of around 20% of all animals. 25.00 25.00 34.60 34.40 23.70 23.70 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.22 0.71 -4.62 3 123 2009-08-12 14:53:08 2009-08-12 15:53:08 3 6 14 0 16 123 0 157.10 69 42.76 CHANGED MKRNhSSYhHRDEKFKQLRSEDSESGLRSR...TEDERLQHRREEWhIQQEREREHEKLKKKMILEYELRRtREhchhLSKRS+TKSRSPESQDRNNAS..NTSKTlILFEKLESSDDTSLFRGPEGTQINATELRKIKl-IHRVLPGKPTTTTuEVKRDIINPEDVlLKRRT ....................MKRNhSsYSH+DE+F+QLRsE..DsE..h..sLRSR...TcEERLQHRREtWhIQQEREREHE+LpKKMILEYELRRt...R.....EhpK.......hh.......Sc.......R........S........KSRSP.-.sR.DRsN.s.S..NTSKThILS-KLE..SS..D.shS.LFRGP.EGhQINATELRKIKl-IHRsLPGKsT.......T.TTsElKRDIINPEDVhLpRRT....... 0 5 5 16 +12113 PF12279 DUF3619 Protein of unknown function (DUF3619) Assefa S, Gavin OL lg7 PFAM-B_3353 (release 23.0) Family This protein is found in bacteria. Proteins in this family are about 140 amino acids in length. This protein has two conserved sequence motifs: AAR and DDLP. 21.90 21.90 22.10 22.40 21.80 21.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.54 0.71 -4.07 24 136 2009-08-12 14:53:12 2009-08-12 15:53:12 3 1 134 0 61 127 16 128.70 42 91.66 CHANGED hpEcca.A+clpttLccuss....sLPssls-R..LtAARptALup+Kspss...hhssthuhsGtuutsh............ttsahp+luhshPLhALl.s....GLlsIshapsppphsElA-lDAAlLoDDLPssAYhDpGFhtaLcps ............pEhcF.Ahcl+pALsEsuu....sLPssss-R..LAAARctALA++Ks-ss...lhss.....AhuGsuushh...........sp.tsuhh+RL...u.hshPLhsLl.s.....GLlGIshaps.pRs.sELA-lDAAhLoDDLPlsAYhDpGFstYLpp.s........................................ 0 7 34 48 +12114 PF12280 BSMAP Brain specific membrane anchored protein Assefa S, Gavin OL lg7 PFAM-B_3398 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 285 and 331 amino acids in length. BSMAP has a putative transmembrane domain and is predicted to be a type I membrane glycoprotein. 25.00 25.00 27.30 27.40 21.90 20.50 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.63 0.70 -4.44 11 161 2009-08-12 14:53:24 2009-08-12 15:53:24 3 2 62 0 76 147 0 172.00 43 59.27 CHANGED RGCRLFSICQF.VscspchNsTchEC-uACpEAY.scssEQhACshGCtsQ...spsEh...cpcpLtuL.s+.t..LpslphVpsahsDlhsSAQsaI..oSoWTaYLQsDsGKlVVhQocsplphhs.........p.p..p...pt...pt.t.pss.ttshsphRs.u.p.+s...cpspsu.ht.p..t..tpsFLpChS.......+psGhspWlLsssLhLSVllhLWl ...............................................................RGCRLFSI..CpF..Vs.cu.chNpT+..hECEu.ACsEAY.sppsEQhACphGCtsQ.....P.sEh.............ppcplhsL..sp.+h.L.sLsLlpohhsDhhsSAQSFI..oSoWTaYLQADsGKlVlFQopPplphhs.........php....shpt............p......hh..sh.p.h+p..s.t.hRs...ctppps..................FLpChS.......hp...stWI......LsssLhLSVhVhLWl...................................................................................................................................................................................... 0 14 18 38 +12115 PF12281 DUF3620 Protein of unknown function (DUF3620) Assefa S, Gavin OL lg7 PFAM-B_3384 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 281 and 358 amino acids in length. There are two completely conserved residues (G and P) that may be functionally important. 20.70 20.70 21.50 21.30 19.80 20.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.43 0.70 -4.62 29 119 2009-08-12 14:56:28 2009-08-12 15:56:28 3 5 97 0 66 129 4 203.20 28 62.83 CHANGED sphsucllcAlupuGhFRhtuhllGTtAapsYsuhLGl+l.tssh.tTGDlDlAtt.tplSlsl...scshpsslhsl.Lps.h-hsFpslsphsttttsptpps....ahV-FLTspptssch.....st.hsuh.sssApsLcaLcaLlscP.lcslsltcsG.....lhVplPsPcpaAlHKLI.lus+Rp..tts.sKppKDhtQAshlhchl.spcpsppL.tp.uatcAhsR ................supllpslspsGhaRhtssllGThAatsYtshhGlch..s.sh.tTsDhDlA....phshtl.....tsph..slhpl.Lpp...t.sFc...lsphst.t..t...tspttps.........hhV-FLTsptussp...........p.h.ssh.sssApsLcaLs.aLltpP..lps.sh....ltcsG.....l.l...plPsPtpaAlHKLl.luscRp..t.s.hKptKDhtQAthlhchl.tpph.t...t.s............................................................. 0 23 43 52 +12116 PF12282 H_kinase_N Signal transduction histidine kinase Assefa S, Gavin OL lg7 PFAM-B_3401 (release 23.0) Family This domain is found in bacteria. This domain is about 150 amino acids in length. This domain is found associated with Pfam:PF07568, Pfam:PF08448, Pfam:PF02518. This domain has a single completely conserved residue P that may be functionally important. This family is mostly annotated as a histidine kinase involved in signal transduction but there is little published evidence to support this. 22.90 22.90 25.00 23.00 22.10 20.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.45 0.71 -4.58 64 599 2009-08-17 12:16:09 2009-08-17 13:16:09 3 6 591 3 127 400 134 145.90 30 30.13 CHANGED sLs-ll..tppTsLsss-l-aLcpllu-WQllADLoFADlllhl.........sscus.........hlsVApsRPsTusTsahsDl.VGphs...............tspcPtltcshpsGphsRstcsthtssh....slc....ppslPV+ps......s...cl...IAVls..cc..sslsss+psutLEhsahpsAs ...........lpclhtppTsLs-sDl-aLcplls-WQllADhuhADlllhV....................sccss..........hlsVAps.RP..s..TssolhpsDl.VGphs.....................................................htssc.PhVtcshpouthscs.tp...sthptsh....slc...pshPlppp..........s..cV.....IuVlh....pc....ssltstcpsuphEpsaht..................................... 0 48 97 117 +12117 PF12283 Protein_K Bacteriophage protein K Assefa S, Gavin OL lg7 PFAM-B_3455 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are approximately 60 amino acids in length. This family is a protein expressed by bacteriophages which has an unknown function. There is evidence that it is non-essential for in vivo production of a mature phage. 25.00 25.00 78.10 78.00 19.80 19.00 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.72 0.72 -4.09 2 51 2009-08-18 10:28:34 2009-08-18 11:28:34 3 1 36 0 0 29 0 55.50 78 100.00 CHANGED Mp.KhhLlhQELLLLsYELNRSGLLsENEcIps.LtpLEhlLLpsLSPSopcusKs MSRKIhLIKQELLLLVYELNRSGLLsENEcIRsILApLE+lLLCcLSPSopcosKs. 0 0 0 0 +12118 PF12284 HoxA13_N Hox protein A13 N terminal Assefa S, Gavin OL lg7 PFAM-B_3464 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 149 and 306 amino acids in length. The family is found in association with Pfam:PF00046. This family is the N terminal of the Hox gene protein involved in formation of the digital arch of the hands and feet as well as in correct genital formation. Mutation of the protein is associated with hand-foot-genital syndrome. 19.90 19.90 20.50 20.20 19.10 19.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.86 0.71 -3.95 12 395 2009-08-19 10:03:12 2009-08-19 11:03:12 3 4 191 0 88 358 0 114.00 42 47.88 CHANGED hD-hsKNMEGFs.GGNF....uAsQCRNLhAHPssLAP..SusYsSSEV.ssuGhuEP.uKQCSPCS.AsQuSu.uASLPYGYF.GuGYYPCRMSHH....sulKSCAQ..s..uSa.u-KYMDTSso.GE-F.oSR ..................................................h..................s.ssh...........sAspC.RsLh.......uH......P.........u......s...hs.s...........u..u.....u......h.....s.....o.......u..p.............s...s.....s...s..u....h....................uE..........s....sKQC.....u..P..Cs...A.s.....Qu..SS...s.A......uLP...YGY..F...GuGYYsC.R.huHp.......sulKSC.uQ.............su...uua.u-KYMDTS....s....s.....s...uE-a.sSR....................................... 2 5 13 40 +12119 PF12285 DUF3621 Protein of unknown function (DUF3621) Assefa S, Gavin OL lg7 PFAM-B_3468 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 49 and 62 amino acids in length. There are two conserved sequence motifs: QPLDLS and EQQ. 25.00 25.00 30.70 30.70 24.60 16.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.60 0.72 -3.81 3 129 2009-08-19 10:07:37 2009-08-19 11:07:37 3 6 13 0 0 96 0 49.00 78 16.18 CHANGED KPCPEPs......TEsQPLDLSQKKEKQoEHEQQVsKPlKsQKNEPQPYSQTYGK ......KssPEPE......sEuQPLDLSQKK.....EKQPEHEQQVVKPhKPQKsEPQPYSQTYGK. 0 0 0 0 +12120 PF12286 DUF3622 Protein of unknown function (DUF3622) Assefa S, Gavin OL lg7 PFAM-B_3490 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 72 and 107 amino acids in length. There is a conserved VSK sequence motif. 20.30 20.30 20.30 21.10 19.30 19.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.21 0.72 -3.67 15 92 2009-08-19 10:16:05 2009-08-19 11:16:05 3 1 91 0 24 68 4 70.60 58 53.13 CHANGED MocsKKa-aRlsppcsuWsAEITR+lTuR+TlVSKpcsGFuoEAEApsWuEpELpuFLpN.tcRNcRKA-p ...MocsKKFshRlopc+sGWsAEIT..RphTSRKTlVSKRcsGFsTEAc....AQsWuEpELtuFlps.s.RNERKucp................. 0 2 7 16 +12121 PF12287 Caprin-1_C Cytoplasmic activation/proliferation-associated protein-1 C term Assefa S, Gavin OL lg7 PFAM-B_3504 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 343 and 708 amino acids in length. This family is the C terminal region of caprin-1. Caprin-1 is a protein involved in regulating cellular proliferation. In mutated phenotypes, the G1 phase of the cell cycle is greatly lengthened, impairing normal proliferation. The C terminal region of caprin-1 contains RGG motifs which are characteristic of RNA binding domains. It is possible that caprin-1 functions through an RNA binding mechanism. 25.00 25.00 25.80 25.80 20.60 19.40 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.29 0.70 -5.05 6 155 2009-08-19 10:17:39 2009-08-19 11:17:39 3 3 40 0 55 136 0 264.70 48 39.45 CHANGED QGsYNFMQDSMLEF-GQsLDPAIVSAQPMKPsQsMDh.QMVCPPsHsESRLuQPsuVPVQPEsTQVPhVSsss-saoooPPlYQPSHTsEPRPQs-slD.IQASMSLsoEQoPssSSLPuAsQPQVh.....KPLHSSGINVNAAPFQSMQTVFNhNAPVPPsNEs-oLK.psQYQuSYuQuFSSQstHsVEQoElQpEQLQoVVGuFHopDQshppus....uuHQQPopQGsGFsRpuQSFYNSRG.MuRGGPRNsRGhlNGYRGsSNGFRGGYDGYRPPFuNTPNoGYG.QsQFST.PRDYSNssYQRDGYQ.NaKRGuGQGsR ....................................................................................................................................QGshNFhQ.............-ShL-F-p.s...........sAIs..SuQ..P.pss...............ssss......p.....tE..ppL.up.sph....p..s.psTp..ss.hsss.s..s...s.h.......s.ss..................h.ps................ocs..t..p...p...tt..s.......-..p..ss...h....s...spp..o....shuSs....P..sh.u..p...QsFQ..........pP...p.S.S.uls..lN..sA.PFQ.uMQT..VFNhNAPlPPhpE...Ehhc.ps.pYpuuYNQSFoo...........ts...........ppspQs..pL............p.....p.pl......Qo..s.....V.......ss.....hcs.....s.....ssp..p...ss....Gsht..PtQsssFPRssQPahNSRG.ssRGssRGuRulhNuYRuP..sGa+..tGaDuYRs.......shs.sss.Nus..Ys.Q.QFsA..R-YSu..Y.QRD..s.aQQsaKRGuspus.s.......................... 0 3 7 19 +12122 PF12288 CsoS2_M Carboxysome shell peptide mid-region Assefa S, Gavin OL lg7 PFAM-B_3507 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 430 amino acids in length. This family is annotated frequently as a carboxysome shell peptide, however there is little publication to confirm this. 20.00 20.00 21.10 20.00 16.80 19.90 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.81 0.70 -5.31 13 104 2009-08-19 10:28:48 2009-08-19 11:28:48 3 3 69 0 42 110 359 315.70 37 60.72 CHANGED NsSRAlVLARREA.SK+GKoA.......upQsoouAuVARQuNPDlSSRElAQ+VRELRSKsGAs....ssppsGsoRPsGPNRs.....GuKQ.uuAuDAHWKVGtSETsoGQsVTGTQANR.ShKTTGNEASTCRoITGTEYLGAElFpoFCQ.SpPthtQPuKVsVTsTSHGN+VTG.NEVGRSEKVTGDEPGTCKslTGTEYISANQSssYCGs.sssSP+KVGpShTpsGppVSGVhVGRSp+VTGsEuGus+pLTGDQYLGu-P.s.sGRuspKVuShpTL+GsGVTGTsVGRo-pVTGsEsGSC+pVTGDEYIGuQQYcuFCus+PpPEAsKVGhSlTN+uQhVSGTpTGRSphVTGDEPGTCKAVTGTPYAGLEQuspaCsssulp-l+pRTPpphuTsu.sphTGlQPGIGGVMTGA-KGACEslTGTPYVGuDQhutAC ................................sR.hs..hRtA.stttt.t......................ths..........t.......t........h.ss+phu.t.hRt..s..Gt.......tt....s+Psu..+t..................................h..KVt.upT.tGphVTGTpspc.s.+sTGsEsuoCRslTGTEYhG..sE.hp.p.hCp.ttPt...tP...+..VtlotTsp..uppVoG.splGRSp+VTGDE.GsC+tlTGTEYluupp...hC....s..s......t.s.t.plh.s.ottsp...loGs..tps..........ttVTGsE.....Gs.spplTGspYht.............t................tup...s.....stK.Vu..pTlpGps...VTGT.Vu+usplTGsE.GuC+.VTGspYluspphpshC...ss.pP..s.p...stKVuhshT.csp.VoGTtsGRSttVTGsEsGsCpslTGTsYh.G..ppht..thCss....tt.t...h...t........h.oG..p..G.ss...hTGstpGtsp.loGo.Y.G.tt..................................................... 0 11 25 38 +12123 PF12289 Rotavirus_VP1 Rotavirus VP1 structural protein Assefa S, Gavin OL lg7 PFAM-B_3516 (release 23.0) Family This domain family is found in viruses, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF02123. VP1 is a structural protein of the inner core layer of the rotavirus virion. It complexes with VP2 and Vp3 to form this layer. 19.40 19.40 20.80 35.70 17.50 15.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.57 0.72 -4.34 2 470 2009-08-19 10:32:54 2009-08-19 11:32:54 3 2 231 10 0 377 0 51.00 94 5.17 CHANGED ScGItlTEKAKLNSYAPlhLEKRRsQhShhlohLQpPsoFKSpp.lTINDlL ..SKGIAVTEKAKLNSYAPVYLEKRRAQISALLTMLQKPVSFKSNK.ITINDIL. 2 0 0 0 +12124 PF12290 DUF3802 topoisom_IIA_B; Topoisom_IIA_B; Protein of unknown function (DUF3802) Assefa S, Gavin OL lg7 PFAM-B_3547 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 114 and 143 amino acids in length. There is a conserved KNLFD sequence motif. 20.80 20.80 21.00 94.70 20.60 20.70 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.39 0.71 -3.77 17 157 2009-12-04 10:35:14 2009-08-19 11:36:19 3 1 156 0 31 85 9 112.40 60 96.12 CHANGED MVs-pDGYhpLIpYLT-pLuLFE.tspssssus-TVh-lhE-plAsplhhVCsQNspLshspR.hllRElDAIVhDLEElLuuVhsppsTsEQhtFIsEFsuLlKNLFDptluch MVVETDGYLALIEHLo.NhslFs...ppGDTGsEoVEDVlTDMlASNIMuIFEQNPELHSSVRFpLLKEADuVVtDLGEVLAGVWt+sATNEQIuFLDEYIuLVKNLFDoAVAKY. 0 4 9 20 +12125 PF12291 DUF3623 Protein of unknown function (DUF3623) Assefa S, Gavin OL lg7 PFAM-B_3578 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 261 and 345 amino acids in length. 18.90 18.90 19.70 19.40 18.40 18.10 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.72 0.70 -5.42 30 93 2009-08-19 10:44:53 2009-08-19 11:44:53 3 4 86 0 33 90 126 256.20 37 85.67 CHANGED MsshhlssLhAlFlWWFSTGllLhll+h.sc+....sathuhlhussllshGlhGlssotsssoluuuYlAFhuALhlWGWhEluFLsGhlTGPppssCPssspGhcRFhpAhtTlhaHElsLlssslslhhlohGusNphGlWTFhlLahhRlSAKLNLFLGV.plNsEFLPstLtaLtSaF+ppshshlFPlSVoshTsssshhhppAhss.ssstptlGasLLusLsALAlLEHWFhVLPLPstpLWpWhLPutpssspsst ........M.shhlssLhAlhlWWFSTGhlLhLsph.sc+.....sathohlhusslhshulhGlssoupssossuAYhAFssulhlWGWhEhuFLsGhlTGP+ppsCPssspuhpR.FhtAltThl.aHElullsssshlhhlohs..usNplGhWTFhlLahhRhSAKLNLFLGV.slNs-aLPppLtaLpSYFp+cshNhlFPlSVTlsThssshhhppAhss.ssshptlGhsLLusLhALAlLEHWFhlLPLPsttLWpWsl.sptt.....s....... 0 8 20 23 +12126 PF12292 DUF3624 Protein of unknown function (DUF3624) Assefa S, Gavin OL lg7 PFAM-B_3603 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There is a conserved GRC sequence motif. 21.30 21.30 36.80 36.80 20.60 19.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.66 0.72 -3.58 18 129 2009-08-19 10:46:14 2009-08-19 11:46:14 3 1 129 0 25 82 4 77.20 60 92.95 CHANGED MuCpsC.....ppshFppKIGRCpRCMhQ..LTlLSlluWslWaahatDsPpslpSIALLhhshAFoGLLhLHL....llhsaRphps ..MACs-C.....ppchFWpKIGRCpRCMDQ..LTVLSVlsWIlWaauF+-cPtSIESIuLIhAGFAFNGLLFLHLWh+aVILPWR+Rp.G............. 0 2 7 16 +12127 PF12293 DUF3625 Protein of unknown function (DUF3625) Assefa S, Gavin OL lg7 PFAM-B_3621 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 327 and 360 amino acids in length. 18.30 18.30 18.50 18.70 18.20 17.90 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.41 0.70 -5.25 15 185 2009-08-19 10:51:41 2009-08-19 11:51:41 3 4 145 0 18 138 16 235.40 47 72.93 CHANGED PLoP-Q.IcpLRphh--sp+Atutssss...ssPRlSSlsVNLSPGAo.PllRsusspsoolsFoDuTGuPWsluAssl............suss....ssFslpahtsSsllslQAtp.YssGNloVhLcGLssPVllsLpoGc......sptVDhRlDLRlPtRGPsAps.h..shspspluhhsssLpuFLDGlPPcc..A+RLKssGs...-spsWths-cLYlRTphs.lpstFcpohSSsDGT+lY+LPhT.PhVshShpGpsssLpI-h .....................Lss-p.I+pLRplhs-s-+Ahsushso...ssPRlo.SloVNLSPG.A.SlPllRsuhs.lSsloFsD.sGuPWP.ussPh.............................susP....phFsl..Q.a........s.....sNhlslpsh+sausGNloVaL+GLusPVlLslsSG-sss............s.opphDuRLDLRlPppGPsuss..s...uhs...............ss........+.......luLaDssLQuFLDG..lPPc-..s+RLKhoGss..sDTplWQp.GD....-....LhVRoRsh.LpspF-pTloSA....DGTHla+LPsT..PlLoFS.l..sGpslplpsE.h..................... 1 8 11 14 +12128 PF12294 DUF3626 Protein of unknown function (DUF3626) Assefa S, Gavin OL lg7 PFAM-B_3627 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 294 and 374 amino acids in length. 20.10 20.10 20.60 20.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.89 0.70 -5.38 7 296 2009-08-19 10:53:28 2009-08-19 11:53:28 3 4 251 0 60 210 3 194.40 35 57.46 CHANGED RlALHFHPDRlDs+GhoVsEGLLc-GlY+sQFETplSNGplSs..GGsRDaWEppLFGsuYp....shcpRPKYGAL-LhhhPsGPAPRFGSCYhlh+spVhuRsTFoahDSa..psP+-KGTlppFDslLAALLsEoFpRpaA...........LGppslpPspllc+LsppLppsh.sth-tssutNLDHYIEAQIHGslSLscDlthLVADPSF+GTsIGc.LpplCc+YtIsLpWHsGhpLsVpcVPsDFRGsuMPsLApRI.....AhstslDAhhIGtAAtcLp+pPppWS-RGshtcshp-LKhLWHVLV+a .......................................................................................................................................................sspRPhYuALshh..t.sGsA..s..taGpsaF.LsspV......pspsTao.hDoa......p.sh-puphpp..F.h.........h.p.hA............spss.hhs......hphh....ppLht.....hpt.hh.......h.hs.p....ss.t..spYIEApIHG-lsLhcDlctl...l.-sSa.pto.....plht.........................................................................................................................h............................ 1 20 37 49 +12129 PF12295 Symplekin_C Symplekin tight junction protein C terminal Assefa S, Gavin OL lg7 PFAM-B_3634 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 180 amino acids in length. There is a single completely conserved residue P that may be functionally important. Symplekn has been localized, by light and electron microscopy, to the plaque associated with the cytoplasmic face of the tight junction-containing zone (zonula occludens) of polar epithelial cells and of Sertoli cells of testis. However, both the mRNA and the protein can also be detected in a wide range of cell types that do not form tight junctions. Careful analyses have revealed that the protein occurs in all these diverse cells in the nucleoplasm, and only in those cells forming tight junctions is it recruited, partly but specifically, to the plaque structure of the zonula occludens. 25.00 25.00 36.20 31.70 24.40 21.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -11.17 0.71 -4.37 34 216 2009-08-19 10:55:50 2009-08-19 11:55:50 3 7 161 0 151 211 2 171.50 39 15.68 CHANGED Dlchll.PllstLcKc-l.hpllPcllph.......s.phhppsls+lLp..............stspts.slossElLlhlHsl........sshKpl.lcAhshCFspps........h..asp-llutsLspls-p.....ssl..PhLahRTllQshptaPsLpsFlh.plLs+Ll.pKplWcp.splWcGFl+CspphtP.....pSatlllpLPstplpphlpch. .......................................................Dl+hLIPllsuLpKcEl.lthLP+llpL.......s..hlcpshsRlLt........................tsspshssLsPsElLlA.lHsI.....-stc..ssh..........Kpl...lcAsshCFsp+s..........laTp-VLusshpQLh-p..........sPLPhLhMRTVIQuls.....haP.pLssFVh.slLsRLl.hKQ.............lW+....splWcGFl+CspphpP.......pSatl.lL.Q.LPstQLttshpp............................................ 0 65 92 126 +12130 PF12296 HsbA Hydrophobic surface binding protein A Assefa S, Gavin OL lg7 PFAM-B_3635 (release 23.0) Family This protein is found in eukaryotes. Proteins in this family are typically between 171 to 275 amino acids in length. Although the HsbA amino acid sequence suggests that HsbA may be hydrophilic, HsbA adsorbed to hydrophobic PBSA (Polybutylene succinate-co-adipate) surfaces in the presence of NaCl or CaCl2. When HsbA was adsorbed on the hydrophobic PBSA surfaces, it promoted PBSA degradation via the CutL1 polyesterase. CutL1 interacts directly with HsbA attached to the hydrophobic QCM electrode surface. These results suggest that when HsbA is adsorbed onto the PBSA surface, it recruits CutL1, and that when CutL1 is accumulated on the PBSA surface, it stimulates PBSA degradation. 22.40 22.40 22.40 23.40 22.20 22.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.36 0.71 -4.27 71 311 2009-08-19 10:58:29 2009-08-19 11:58:29 3 9 66 1 199 305 0 119.30 19 53.41 CHANGED +cssslhsslss.IssplsslssslpsYs.....GG.......shslhsstsslhsslppuhpssps.s..ssloss-uhslhsslt...sLpspltsslssllsKKsthss.......uGhushlhssLpp.pssssshusslpsK ...............................thsshhpslss.lssplp..plssslp.sas.....Gu........sh.tlhssssslhsslppu.hpslps..s...ssL.ost-uhslhss.lp...sLp.splpsslssllsKKs.hsp.......sGhsshlhtsLpp.psssppLuptl.tK............................... 0 28 86 153 +12131 PF12297 EVC2_like Ellis van Creveld protein 2 like protein Assefa S, Gavin OL lg7 PFAM-B_3668 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 571 and 1310 amino acids in length. There are two conserved sequence motifs: LPA and ELH. EVC2 is implicated in Ellis van Creveld chondrodysplastic dwarfism in humans. Mutations in this protein can give rise to this congenital condition. LIMBIN is a protein which shares around 80% sequence homology with EVC2 and it is implicated in a similar condition in bovine chondrodysplastic dwarfism. 25.00 25.00 50.80 30.80 20.60 19.70 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.23 0.70 -5.84 5 63 2009-08-19 11:04:06 2009-08-19 12:04:06 3 2 35 0 29 54 0 376.80 49 36.14 CHANGED GEpauVSYTApL-u+cltsGES...LcLPApLTFpSuSQNcTs...QL..........ossFTITlEEpspVhPNHGlHAAGFllAFLVSLVLTsAuhFlLsRTpsLpsshLoRsRpp+cE....SKLEsusFoSu-sluEDluhNDQllDILThEEPGsMLQALE-LEIATLsRADADLEAsRhQIsKDlIAlLLKNLlSuGHLSPQlE+RMuulFKKQFLsLEsElQEEY-RKMlALTAECsLEoRKKMEuQ+QREpsApEEAEELlK+ssE+sAlECSsLL-pLHuLEQE+Lp+pLtLcQEE-FAKA+RQLAVaQRsELHsIFaTQIcsAspKGEL+.EsAKpLlc-YSKsQ-slEELMDFhQAoKRYHLu+RFAaREYLV+sLQupEo+lpuLLNTAATQLssLIpKhERAGaLcE-Qh-MLLERAQsElhSlKQKLDcDLK ..........G-tasVsYsApLputclh.sG.Es...LpLPApLTFpSsSpNco.......pL..........pAhholsspEphpVhPsHGlHuAGFhlAFllSlVLThsshF.hhs+hpslptshls..cpp...h.p.-........sKL..E....FsSu-uVsED..luhsDQhlDILo.E-PGsMlQALE-LE.....lATLsRAD.....ucLEAsRhQIsKD..lIulLLK.sLsSs.GHLSsQsE++huulFKKQFLhhEsElQ.EEY-RKMlALTAECsLEoRKK.hEsQaQREhsAhEEAEELhK+.suEcsAhECs.sLLcpLHtLEQ-cLp+pLhlppEEshAKApRQLAla........pRsELHpIFasQlcsAlhpGEL+.EsAKhLlpcYuchQ..-slEELMDFhQAsKRYHLu+RFuaREYLVpslQS.-o+lpuLLSoAusQLspLIpKpERAGaLcE-Qh-hLLERAQsElhSlKQKLDpDLK................... 0 7 7 12 +12132 PF12298 Bot1p Eukaryotic mitochondrial regulator protein Assefa S, Gavin OL lg7 PFAM-B_3672 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 168 and 381 amino acids in length. Bot1p localizes to the mitochondria in live cells and cofractionates with purified mitochondrial ribosomes. Bot1p has a novel function in the control of cell respiration by acting on the mitochondrial protein synthesis machinery. Observations also indicate that in fission yeast, alterations of mitochondrial function are linked to changes in cell cycle and cell morphology control mechanisms. 21.90 21.90 22.10 22.00 21.50 21.80 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.94 0.71 -4.32 23 175 2012-10-04 14:01:12 2009-08-19 13:06:09 3 4 149 0 139 193 1 150.20 25 45.85 CHANGED slpPFP.NPphhoshllSE-L+pcIaccltppuh..SlppVSpcaGlcl.RV-AlV+LhclE+cW.......pschps.hpsaucslhcMhPh..........htsptthtcE.................sls-lPV.ptshsphFhsluEScsFs.lDAA+lhph..tPAs-php+hpshstctpstpp..tpp.pp...hchlhsptpcuE ...............t..pPFP.Ns.hhop.lLS-ph+ppIacplh........ppsh.............olctlStcaslchpRVtAll+LhplEcpa............tpptp.hhhshscshhpMhPh...........tst..h.tE................................................sls-l.s.ph.....s..phFhslsESc.Fs..DAu+hht....ss..t....p.ht.....t..tt.......................p................................................................................................. 0 51 86 122 +12133 PF12299 DUF3627 Protein of unknown function (DUF3627) Assefa S, Gavin OL lg7 PFAM-B_3687 (release 23.0) Family This domain family is found in bacteria and viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF02498. 20.90 20.90 20.90 22.00 20.80 20.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.74 0.72 -3.94 56 290 2009-08-19 12:19:57 2009-08-19 13:19:57 3 9 69 0 2 291 1 90.20 23 28.40 CHANGED hhp+h.ph.spcslthPsssppp.pLslhpp......s.spathlpuQcpplcpphp+hp......shsllh-.shcPNshsshNplp-pl....ppc..phchp+p .................h..p+hs.h.sp-tlhhPs-spppppLslhpp..............tsspashl+sQcpthcpphc+hp......stcllhc.shpPNshshhNplK-pL....tpc..phphpt................. 0 0 0 1 +12134 PF12300 DUF3628 Protein of unknown function (DUF3628) Assefa S, Gavin OL lg7 PFAM-B_3692 (release 23.0) Family This domain family is found in bacteria, and is typically between 153 and 183 amino acids in length. The family is found in association with Pfam:PF00270, Pfam:PF00271. 25.00 25.00 110.40 109.60 20.40 19.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.37 0.71 -4.24 3 35 2009-08-19 12:23:46 2009-08-19 13:23:46 3 1 34 0 8 33 1 174.10 59 30.83 CHANGED EQKIPVEPVTsELLTPLPRPER.AsstGEEu-DssGDSVGsIFREAREsRAA-EtRRGGGRSGPG..uRSGust.G+GsRDG.cGG-GRsRsP.RKPRlEGEpDuAAsuAsTPVsuAuAscsP........ulsAA-GE+sPRKRRRRR+GRPVEGAEuV.sus..sssAuPhpPsQVVAKPVRsAAKoGupPSLL .EQKIPVEPVTuELLTPLPRssR.ssstGEEA---.sGDSVGsIFREAREQRAA-EpRRGGGRSGPG...uRSGu...GtGRR-....G..sGu-G+sRs..RKPR.VEG....Et..s.uAAsustsPVsuAuAs.psP.........usssA-GERsPRKRRRRRpGRPVE..GAEsu.Aus..sssAuPhpPsQVVApPVRsAAKsuupPSLL........................... 0 1 4 6 +12135 PF12301 CD99L2 CD99 antigen like protein 2 Assefa S, Gavin OL lg7 PFAM-B_3714 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 165 and 237 amino acids in length. CD99L2 and CD99 are involved in trans-endothelial migration of neutrophils in vitro and in the recruitment of neutrophils into inflamed peritoneum. 23.50 23.50 23.70 23.70 21.90 23.40 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.23 0.71 -4.57 13 175 2009-09-13 06:10:09 2009-08-19 13:25:43 3 7 56 0 65 166 0 141.50 29 62.51 CHANGED FDLuDAh......................................ccssssss..........pcPssPKKPusG...............t-FDLtDAlssssshs.t.+sss+spspst............uGuhSDsDLtDssspsshpsDtstsGuus................................sscsstpppstsGhIAGIVuAVusAllGAlSSaluYQKKKLCF........ppu.st.........+ssssEP ...................................................................................................................sppst........................ps.hssp.tPsss................ssFD..Lt.DAlssps.s..............t.s...........................sp.p.................suGsauDpDLtDhhtssth+scts..tussst...t..............................ppss..sttsstsGhIuGls.uAlshAllGAlSSYluYQpKK.hCF..........tt.............................................. 0 5 10 25 +12136 PF12302 DUF3629 Protein of unknown function (DUF3629) Assefa S, Gavin OL lg7 PFAM-B_3723 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 256 and 292 amino acids in length. 25.00 25.00 28.20 25.20 19.90 19.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.73 0.70 -5.00 3 44 2009-08-19 12:28:29 2009-08-19 13:28:29 3 3 6 0 2 44 0 239.70 74 86.97 CHANGED NSWKNSGFElRSGVAuACRLPKCPTLQRIPTN+DVNcK.EKPYSTFKIDHD..RIQ-MWQRSRIGAKEQNSLRGYQPANSDVNTDS+EVISALDKVRRIEERRKIQNKMRALMEEQHM+DERIQEEpDRVIRKEVLVERDPARKQTspEYSPVRNREKCKPsoWVSATVLAATGKMQQHRYMQTPP.KhNPIKVEAEVQpPSKSVP-NPAoGTTSGQPNQT+T..DMoSRAKWAKSlLELHKTRAKEMDDILSLsGYSN ..........................NSW+pStFt.RsGlAusscLPpCPTLQRIPTNcDVNcK.-pPYSTFKIDHD..RlQpMWQ+SRIGAKEQNSLRGYQPANSDVNTDScEVISALDKVRRIEERRKIQNKMRALMEEQHM+DERIQEEpDRVIRKEVLVERDPARKQTspEYSPVRNREpCKPsoWVSATVLAATGKMQQHRYMQTPP.KhNPIKVEstsQpPspSs.tpsAssTTSGQPNpTpt..sMspRscWsKuhLELHKpRAccMDclhSLtGYSN........................... 0 1 1 1 +12138 PF12304 BCLP Beta-casein like protein Assefa S, Gavin OL lg7 PFAM-B_3744 (release 23.0) Family This protein is found in eukaryotes. Proteins in this family are typically between 216 to 240 amino acids in length. This protein has two conserved sequence motifs: VLR and TRIY. BCLP is associated with cell morphology and a regulation of growth pattern of tumor. It is found in adenocarcinomas of uterine cervical tissues. 21.00 21.00 21.10 32.60 20.60 20.90 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.26 0.71 -4.87 9 123 2009-08-19 12:34:45 2009-08-19 13:34:45 3 2 38 0 53 105 1 160.80 47 83.93 CHANGED MKhGLsLllVGHVNFlLGAlVHGsVLRHlus.ppulshpYsluNlluVsSGlLuIhsGIsAIlhSRNLtphsLpWsLLVlSllsuLLSsACslGLllulshTluspG+sLlusCs..hsss....cphslus-CPFDPTRIYuToLsLWsPslLhSssEushSsRChhlshplhtLtPCtccthptphppts- .............M+sGLsLlllGHVsFlhuAllHGoVLRaVusspsAls.pYsssNllSVsSullslssGlsAllhSR.L.....p..L+WslhshulhssLLShsCulGLLhululThAstG+tLlusCp..h.ss......c.hshss-CPFDPTRIYsosLsLWh.ullhsssEsshushCs.hshpLhtltPhhtct.pt.h.....p........ 0 2 7 22 +12139 PF12305 DUF3630 Protein of unknown function (DUF3630) Assefa S, Gavin OL lg7 PFAM-B_3766 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a single completely conserved residue D that may be functionally important. 25.00 25.00 27.60 26.90 19.20 19.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.18 0.72 -4.10 23 171 2009-08-19 12:35:54 2009-08-19 13:35:54 3 1 171 0 37 112 8 94.20 45 93.84 CHANGED hplsphphscssL.Lshsu.lDF-sFtthAEsLlttLsspVlE+QWuADRHpWLL-FEGspLhLpaEaYu-hsWLsstpts-.hEVLsaLAsLhpp ....................................FuLs-YhsscG+Lllpsps.FDF-sFsphAppLlslLuApVlEKQhDADlHoWLlDFEGspLhL+....uEHYSpulWLEsLsss.p.ucEpLsalApLhp............................. 0 7 13 26 +12140 PF12306 PixA Inclusion body protein Assefa S, Gavin OL lg7 PFAM-B_3786 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 173 and 191 amino acids in length. PixA is thought to be specifically produced in Xenorhabdus nematophila. It is an inclusion body protein. 20.80 20.80 22.30 21.60 20.10 19.50 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.19 0.71 -4.41 33 128 2009-08-19 12:40:52 2009-08-19 13:40:52 3 2 61 0 31 100 3 167.10 25 93.80 CHANGED hsphIDlllslDs-p....Ihp.......thsphSpsssp..PTsI.s..ppt.......lYhlsspsphh............supusucLplpsplGDslRWRsoSlot.ss-hslllYchhthss.......hhs......hpsshhphsssspP..sPss......sshsspppshp.haapusltspGp.tsathsFulasp.....pppLhGYapWDPhIslt .....................phlDlllslDs-t.Ihp.........ph.sssSpsPst...PTsl.s.....pct..........lYhlssss.hs..................supusucLplpAslGDhlpapsTSlst.su-pusllY..phhttsu.......h.s......hsssh.phssu.spP..sPss.....ppshsstppshp.hhapupVtp...sGp...tsathsFulash..................ppsLhGYahWDshIpl................................. 0 4 9 16 +12141 PF12307 DUF3631 Protein of unknown function (DUF3631) Assefa S, Gavin OL lg7 PFAM-B_3787 (release 23.0) Family This protein is found in bacteria. Proteins in this family are typically between 180 to 701 amino acids in length. 20.80 20.80 20.80 20.80 20.50 20.70 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.46 0.71 -4.38 22 155 2009-08-19 12:47:29 2009-08-19 13:47:29 3 10 132 0 41 145 32 176.80 39 38.04 CHANGED +MRRRsssEpVtsaRhRpststhpsL+p+LspWhpsphsplcs.Ac.........PshP......t.lpDRAADsWEPLlAVADhAGGsWPp+ARtAulslss.ptspstt....sShsscLLtDlR.ssFtt.tst...ptlsos-LLstLpt.pEuPWus.hs.spsLssppLuphLpc.Y......sI.....cSpslRh..susp.sKGYp+...ssFtDAWsRY .............................+MRRRtssEpV.csaR.RhstsphctL+-+LAsWssshtstlpp.uh...............PsMPtsVpDRsADsWEPLlAVADsA.GGcWPcpARsAshphss...u.scstp.....sSlGlpLLpDIR.clF.....sst.........-ph.Tu-lLstLpp.-EuPWus.hp..GcsLssRtLuphLtc.Y......uI.....psp.h+...uups.hKGYpp.........spFtDAWsRY............................................................................. 0 13 30 35 +12142 PF12308 Noelin-1 Neurogenesis glycoprotein Assefa S, Gavin OL lg7 PFAM-B_3817 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF02191. There are two conserved sequence motifs: SAQ and VQN. Noelin-1 is a glycoprotein which is secreted mainly by postmitotic neurogenic tissues in the developing central and peripheral nervous systems, first appearing after neural tube closure. It is likely that it forms large multimeric complexes.It has a divergent function in neurogenesis. In animal caps neuralized by expression of noggin, co-expression of Noelin-1 causes expression of neuronal differentiation markers several stages before neurogenesis normally occurs in this tissue. Finally, only secreted forms of the protein can activate sensory marker expression, while all forms of the protein can induce early neurogenesis. 25.00 25.00 49.30 49.00 23.20 21.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.33 0.72 -4.21 8 216 2009-08-19 12:51:08 2009-08-19 13:51:08 3 4 46 0 85 186 0 96.70 65 23.41 CHANGED asuPEEGWQVYSSAQDsDGRClCTVVAPsQshCSRDsRSRQLRQLhEKVQNlSQShEVLDLRTtRDLQYVRshEslhKsL-sKL+ssp-s.+sLsAKuaQE ...........hsPEEuWQVYSSAQDs-GRCICTVVAPpQshCSRDARo+QLRQLLE.K.VQNMSQSIEVL-hRTQRDLQYVc+MEsQMKGL-uKF+QlE-s++phhu+pFp..................... 0 5 16 38 +12143 PF12309 KBP_C KIF-1 binding protein C terminal Assefa S, Gavin OL lg7 PFAM-B_3821 (release 23.0) Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 365 and 621 amino acids in length. There is a conserved LLP sequence motif. KBP is a binding partner for KIF1Balpha that is a regulator of its transport function and thus represents a type of kinesin interacting protein. 20.10 20.10 20.30 20.50 19.90 20.00 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.16 0.70 -5.49 21 192 2009-09-12 21:52:06 2009-08-19 13:56:09 3 6 137 0 122 185 3 298.00 31 58.76 CHANGED .h-WAlNAATLSpaYhuptpatpARHpLuAAshlh........sp..p.........t...stptct-pp..............-shcpphAclApsWsKYsLplLpsup.p.....hhc-s.sc.t.php.phpthp........h.....tp...................c..................h.tl........c.ltuhEsplssphshsF--A+tlFhhuppalscAK-aaph-GasT-HlcIlQDhupLa+tLAFFE.psh-.................ppsKMHKRRschLEslhs-LNspaYhtlsRplhaElucsat-Mh-l...................+lt.tc....stptsss+slpK.lNplsppulcaappFl-oh+..............pss.tchPcp..l-pD.h+PhhhA+F+lARLatKllos-spppl-slssSLphYchhscaspppsp...stt.h.psElpls+EMlpLLPhKlpclp ................................................................................................................................-auhNshpLupaahs......pa..put..ppLsuuphlh.....................................................................................t................p.hppt.uplshsah+atl.hhptsp.h........................htp.p.t...p.t.t.......thtt.......................................................................................................................h..h................lts.hcp.pl.sh.hhshsFc-ARplFhhupcalp......cAKcaapl..........-uasT-HlcllQDhStLa+hLAFFE.psh-.................+psKMHKRRlshLEs...lh.s.pLN....s.paYh....h....l.....sRp.l.aElucsah-hh-l...............................+lthhp....p......ppsssch...lpK....lNplsppulcaaphFlcohp.....................................psp..t.h.tp...htp-.hcshhhAhFplu+lh.Kl.hss.sstpplcpltpulphYphh.t....hht.................h......hth..ph..h...p.............................................................................. 0 47 57 98 +12144 PF12310 Elf-1_N Transcription factor protein N terminal Assefa S, Gavin OL lg7 PFAM-B_3835 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00178. There is a conserved PAVIVE sequence motif. Elf-1 is an immune cell specific transcription factor. It is found in T cells, B cells, megakaryocytes,and mast cells and is involved in the control of transcription for various immune proteins. These include IL-2, GM-CSF, IL-5, IL-2 receptor alpha chain, and CD4 in T cells, IgH, blk, and lyn in B cells, TdT in T and B cells, IL-3 in megakaryocytes, and SCL and Fc-epsilon-RI alpha chain in mast cells. 22.90 22.90 23.90 22.90 20.50 19.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.56 0.72 -4.11 11 183 2009-08-19 12:57:33 2009-08-19 13:57:33 3 4 40 0 81 167 0 91.20 44 17.08 CHANGED MusslQss-LlFEFAS......ssh.-ctpQhs-sssaPAVIVEpVPpAcLhphYuG..Lsh--p.shhhpDss...tEcpl.....-slhLo.......spsp.ssscpT.cTlpAAEALLsM-SP .................shslp..st.lhEhsS................ssh.-p.pp.t....csu.aPAVIVE.VPsuc...l...p.Yuu.....Lshs-t.s.hhp-ss....tEp.pl.p...ssh.s.lp.................spsps......ssscsTscTIEAAEALLpM-SP...................................... 0 5 12 36 +12145 PF12311 DUF3632 Protein of unknown function (DUF3632) Assefa S, Gavin OL lg7 PFAM-B_3839 (release 23.0) Family This domain family is found in eukaryotes, and is approximately 170 amino acids in length. There is a conserved ALE sequence motif. 20.70 20.70 22.50 22.40 20.20 20.40 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.35 0.71 -4.48 45 228 2009-08-19 12:58:27 2009-08-19 13:58:27 3 6 62 0 179 234 0 178.10 17 56.69 CHANGED WphlhphApphshsp.t.........Q-+LVplltplpphssh......................t.hhspptplWp..-LPhhu.......php-tWs................ssssspptpcalNhsuFhA+Lsupshssh.............shaulhsl+suLE..................c.sss.phpstls...sAuhWlhhuGpplhphstth.t.t..tt.st...............hpup...sshshcRWphWccRh ............................................................................................a..hhphupths.pp...........pscLlphlhtlpphssh................................................hh.pttplWp......-.LP.hhs.......thp-h.at...............................................tttsstptppahNhsuFhA+Lhstshhsh..............shhulhhh+puLE................................................................p..ss..th..sshl.sAstWlh..hu..uptlhp.s...tt.t...t.......t........................ts....tsh...s.pRW.hWppph.............................................. 0 22 73 140 +12146 PF12312 NeA_P2 NeA_polyprotein; Nepovirus subgroup A polyprotein Assefa S, Gavin OL lg7 PFAM-B_3843 (release 23.0) Family This family of proteins is found in viruses. Proteins in this family are typically between 259 and 1110 amino acids in length. The family is found in association with Pfam:PF03688, Pfam:PF03689, Pfam:PF03391. This family is one of the polyproteins expressed by Nepoviruses in subgroup A. 25.00 25.00 159.40 25.30 19.80 19.40 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.88 0.70 -5.15 4 141 2009-08-19 13:02:51 2009-08-19 14:02:51 3 6 6 0 0 143 0 229.50 76 29.65 CHANGED MsKFYaSsRRLssashups.+..GohEQWLtshE.DsuFRtpVKpcV.hsRth.ss.RhFpa.lGhGPl-sPthhsWtallpG.....s..........PTpPVKAcEVVssPQPK.VVIPSPPPsPp.PYFRsVGAFAPTRSGFIRATVERLoREREESRAAALFAELPLEYPQGAPL+Lolu.KFAMl+...HTTWR+WYDTSDERlhcsHPGGPsL...PPPP.IQpPPSFpERlpphCphpsCuRAFALETSLGLNhAWVshVsIPSISlCC .........................MsKFYaSsRRLAsas.ups.HLGGSVEQWLAAIpTDPSFRQTVKEDVQtNRpcPsAIRMFSWKlGaGPIDNPppCsWHFVLTGE..RPsQ...........PocPVKAcEVVVsPQ.........sK.KVVIPSPPPsPs.PYFRsVGAFAPTRSGFIRATVERLTREREESRAAALFAELPLEYPQGAPLKLSLAMKFAMLK...HTTWRKWYDTSDERLhcAHPGGPCL...PPPPPIQpP.PSFpERVREhCRhKSCARAFALETSLGLN+AWVGlVDIPSTSVCC.. 0 0 0 0 +12147 PF12313 NPR1_like_C NPR1/NIM1 like defence protein C terminal Assefa S, Gavin OL lg7 PFAM-B_3871 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 251 and 588 amino acids in length. The family is found in association with Pfam:PF00023, Pfam:PF00651. There are two conserved sequence motifs: LENRV and DLN. NPR1 (NIM1) is a defence protein in many plant species. 25.00 25.00 33.80 33.20 19.80 19.20 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.27 0.70 -4.90 21 173 2009-08-19 13:19:32 2009-08-19 14:19:32 3 15 50 0 40 169 0 153.10 51 34.03 CHANGED +RLTRstDYpspTEpGp-osKsRLCI-ILEp-..hR.....sPhsu.-sSso..shhuD.DL+M+LLYLENRVAhARLLFPtEA+lAM-IAps-uTuEFssh.s...spsosuphppVDLNETPhh.pc+hhsRhcALoKT............VEhG+RaFP+CS-VLDKhh....pDDLs...DlhaL-+sTsEEpp.KRhRahELp-sVpKAFoKDKt.Ehs+SuhSSSSSSo ..............+RLTRstDYpspoEpGpcusKDRlCI-lLEpE..h+.....p...h..p.s..s..hhs-.phphcLL.LENRVuhA+lhFPhEAchsMpIAphcsT.EFsh...s....t..sttphppsDLN.sPhh.....pcchhsRhpAL.KT............VEhG+RaFPpCStVLs+hh....t-...D.s...phh.htpsTs-cp..K+.RahELp-...l.+AFscDpt.p.sh....tss.......... 0 5 24 32 +12148 PF12314 IMCp Inner membrane complex protein Assefa S, Gavin OL lg7 PFAM-B_3921 (release 23.0) Family This domain is found in bacteria and eukaryotes. This domain is about 120 amino acids in length. This family is the inner membrane complex of parasitic organisms. This is a cytoskeletal structure associated with the pellicle of these parasites. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.72 0.72 -3.84 22 202 2009-08-19 13:20:31 2009-08-19 14:20:31 3 6 25 0 158 206 7 80.50 33 25.71 CHANGED sK.hhpE+lcpVsKhlhc..EKlVEVPcl+aV-KIVEVPph.hhpK..hVPKh.lpEpll.hP+...h.pc.h.....+hV-lspl+-ls+ac-hEtsEElhchl .............hhpEplhc..V..PK.hhp..EKlVE........VPp.lch....V-KlVE..VPphhhhpK.h.......VP+s.lp....Eplh.hs+...h........h........................................... 0 62 92 140 +12149 PF12315 DUF3633 Protein of unknown function (DUF3633) Assefa S, Gavin OL lg7 PFAM-B_3949 (release 23.0) Family This domain family is found in bacteria and eukaryotes, and is approximately 210 amino acids in length. The family is found in association with Pfam:PF00412. 20.30 20.30 20.70 21.80 20.10 20.10 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.48 0.70 -5.02 17 151 2012-10-03 04:41:15 2009-08-19 14:21:26 3 12 38 0 112 165 9 180.60 46 40.97 CHANGED uLsMKl-QplPhLLVERpALNcAh-uEKpGpa+h...sETRGLCLSEEQhlsol......ccs+hG.sppll.shhTEPp+LsR+.CE..VTAILlLYGLPRLLTGuILAHEhMHAWLRLpG.apsLss-VEEGICQVluHhWLEuphhsuss...........ttuuussssuusoSKKu.tpSpaEKKLG-FahHQIcoDsSssYGDGFRpu.tAVp..caGL++TLcHI+hTG .....................................................................shsM+l-Qpl...PhLLVERpALN-AhpsEKpu....h.ah...h................scTRGLCLSE.E.Q.hlpol............+pPphu.st..p...hh.shhops.+l....s.Rc..CE...VTAILlLYGLPRLLTGSILAHEhMHAaL..RL..p.G...at...sLss..cVEEGICQVluahWLE...uclhssss..........................................ssssss.sssssso...++u..thophE++LucFhhcQIcsDsSssYG-GFRsu.pAVt..p.aG..LcpTLcHl+hT............................................ 0 34 77 95 +12150 PF12316 Dsh_C Segment polarity protein dishevelled (Dsh) C terminal Assefa S, Gavin OL lg7 PFAM-B_3969 (release 23.0) Family This domain family is found in eukaryotes, and is typically between 177 and 207 amino acids in length. The family is found in association with Pfam:PF00778, Pfam:PF02377, Pfam:PF00610, Pfam:PF00595. The segment polarity gene dishevelled (dsh) is required for pattern formation of the embryonic segments. It is involved in the determination of body organisation through the Wingless pathway (analogous to the Wnt-1 pathway). 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.98 0.71 -11.52 0.71 -4.21 8 185 2009-08-19 13:22:41 2009-08-19 14:22:41 3 12 44 6 82 166 0 185.80 54 29.69 CHANGED NMAsLSLNDpDGSS.GASDQDTLAPLPHPuAAPWPh.h.uFPYQY.PsPHPa.P..PuYp-..uaSYGuGSAGSQHSEGS+SSGSsRStuttp+pt..cp...............uucpsuGSGSES-aoo+uuhtRt.tccuPstptusssScsSh+u...................uhupppopsShusPGlP..YssshLh.hsssssss..........PPGAPP.VR-LASVPPELTASRQSFphAMG ..................NhAsLsLNc.-GSS..GASDQDTLAPLPp.P.uA.uPWPh...h...uFP...........Y.Q.Y.PsPh.Ph.P..Pu.Y...p-..ua..SY..G.uGSAuSQHS.....E....GS+SSG.SsRSsuttp+ptc.ct.................tspppuGS..G.S.ES-hos+uu...p.s.hcpsssp.ptu..sp....u.ts......................................thpp.ttpssh.ssPGhs..Yss.hhhh.sss..s..s...............................PPGuPP..sR-LuuVPPELT..uSRQSFphAMG........................................ 0 7 14 35 +12151 PF12317 IFT46_B_C Intraflagellar transport complex B protein 46 C terminal Assefa S, Gavin OL lg7 PFAM-B_3974 (release 23.0) Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 298 and 416 amino acids in length. IFT46 is a flagellar protein of complex B. Like all IFT proteins, it is required for transport of IFT particles into the flagella. 21.10 21.10 31.90 31.90 20.40 19.70 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.51 0.70 -4.97 13 166 2009-08-19 13:23:45 2009-08-19 14:23:45 3 3 118 0 110 161 7 199.20 49 58.91 CHANGED GuYcPt-YspLsVos..El+-LFpYIs+YpPpsh-LssKLKPFIPDYIPAlGDlDAFIKVPRPDucP-sLGLpVLDEPsspQScPsVlsLpLRp..........hucptststsshVsslccA-pN..PKtI-sWIsslpElH..+sKPssTVsYo+sMPDIEsLMQEWPt-hE-lLps.ltlPou-lDhDLppYschlCuILDIPVa.....sshI-SLHVLFTLYpEFKsNQHF ...........................tYsPs-YccLsVos..ElKELFpYIs+....YoPQhl-..L-tKL+PFIPDaIPAVGDlDAFlKV.....PRP..............D...........s........c.....s......D......sLGLh.lLDEPu.spQSDP.oVLsL.Lpp.........................ss+pt.s.h.s..p..phhV....+SlE.s..A-..+.s..PKsIDpWIpsIp................-LH..+oK..PssoVpYs+.sMPDI-sLMQ-Ws.cF.....EE.....hLsp...lpL...Ps.u.....clDssLscYlchlCsl..LDI..Plh..............pshIpSLHlLFoLY.EF+sspHF............. 1 50 61 87 +12152 PF12318 FAD-SLDH Membrane bound FAD containing D-sorbitol dehydrogenase Assefa S, Gavin OL lg7 PFAM-B_3647 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 168 and 189 amino acids in length. There is a conserved ALM sequence motif. This family is a membrane protein (FAD-SLDH) involved in oxidation of D-sorbitol to L-sorbose. 23.00 23.00 23.00 23.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.03 0.71 -4.57 25 189 2009-08-19 14:35:08 2009-08-19 15:35:08 3 3 141 0 74 162 5 156.60 31 88.15 CHANGED uloRRplL......hsshussushsstshs.t.hsss.sssss.tsFhslSptLTs...+psL-stlupRlasALhppss.........pFssplspLtslls......ssp.htphpsts.s..lpssspsIlsAWYpGsVtcs.......cuplluYcsALMapsspDsLhlPTYCsssPhaWssp.Pstlss ............................hoRRphL..........sshuhs....us..shsss...h...sht..shs..ss.ssssslssFhsLSptLTu......+psLs...tslGpRlhpuLtcsss.........phsspLsp.Ltshls...............th..sss.s...psluhpIlpAWYhGlV..................cstll....sYcpALMapslpDsLll.oYssstPsaWspp.P.t..s.............. 0 12 28 55 +12153 PF12319 TryThrA_C Tryptophan-Threonine-rich plasmodium antigen C terminal Assefa S, Gavin OL lg7 PFAM-B_3676 (release 23.0) Family This protein is found in eukaryotes. Proteins in this family are typically between 254 to 536 amino acids in length. This family is the C terminal of a surface antigen of malarial Plasmodium species. It is currently being targeted for use as part of a subunit vaccine against Plasmodium falciparum, the main species involved in causing human malaria. 22.10 22.10 22.10 24.00 22.00 21.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -12.44 0.70 -5.15 52 134 2009-08-19 14:41:50 2009-08-19 15:41:50 3 3 9 0 89 162 0 199.00 31 46.96 CHANGED cWpsWhpcLEp.-WcpFptpl-pcK.pcWlppK-c-WppWlcphcpKWh+Yscphcp.-apsslhcpu.sWs-ppWcpW.lco-s+phhct-accWlppscpplspahhccW.pWKspK...lhpWLhp-WKpcEschWpcacptph.hchhphtc..+cpWhcWpcRlpcEpp-WppWlctK-phhlps.chspWpcWKs-KpthaspWhcsFlpcWlscKpWphWh ......cWppWhppLEp.-WctFptpl-pc+.pcWhcpK-c-asp.Wlpph-sKWhpaspsh-p.Eapsshh+cs......sWs-.p.pWcpW.lpTcuKphhct-accWlpppcstlspahhpcW.pWKspK...hhpWlhs-WKpcEpphWtcapptph.hphhphtc..+ppahpW+pRlp+Etp-WppWlphK-phhlps.chspW.cWKp-KphhaspWhcsFlspWIpcKpWp.Wh................ 0 4 9 65 +12154 PF12320 SbcD_C Type 5 capsule protein repressor C-terminal domain Assefa S, Gavin OL, Bateman A lg7 PFAM-B_3790 (release 23.0) Domain This domain is found in bacteria and archaea. This domain is about 90 amino acids in length. This domain is found associated with Pfam:PF00149. SbcD works in complex with SbdC (SbcDC) which is a transcription regulator. It down-regulates transcription of arl and mgr to inhibit type 5 capsule protein production. It acts as part of the SOS pathway of bacteria. 24.30 24.30 24.30 24.30 24.10 24.20 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.26 0.72 -3.97 203 1897 2009-09-13 17:34:05 2009-08-19 15:46:33 3 3 1883 8 311 1271 34 98.00 23 25.01 CHANGED sshp.sltplc........usl...cclhptl..........tt...........t.ttpt...................salclpl..pscphh..schhpc.lcphhssh.....lclphphtp.pptt.......tpptpslpcl.ssp-lFpchhpppptp ............................shpshthlc.......Gsl....splhpph............tp.......................................tpps........................saLclcl...pscphl..hDhh..p+.lcplhssl..........Ltlcp.sptp..ppp..........pppptslp.c.h..ss.-lFpchhtt.t..s................................. 0 103 188 252 +12155 PF12321 DUF3634 Protein of unknown function (DUF3634) Assefa S, Gavin OL lg7 PFAM-B_3607 (release 23.0) Family This family of proteins is found in bacteria. Proteins in this family are typically between 103 and 114 amino acids in length. 25.00 25.00 40.00 39.90 20.40 20.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.24 0.72 -4.10 19 141 2009-08-19 14:56:50 2009-08-19 15:56:50 3 1 140 0 30 92 5 101.20 51 97.32 CHANGED hlhhIlluhh.llahLlhss+...slhch+Fc-G+lsppKG+lP.pFp+ss+cIA+pp.h.oGpl+s.+.psss+LchStsVscplpQRIRNlF...Papta..Kstssp..+pu ......MLYVIllAsh.lIFWLlhlDR...PlLhVpFKDG+lspsKGHhPPoF+HNlh-IAp+-PF.oGElKVYppRoG.sKLsFSKpVPKKlQQRIRNVF...PHQGFs+pSsshKKt.t.......... 0 5 10 21 +12156 PF12322 T4_baseplate T4 bacteriophage base plate protein Assefa S, Gavin OL lg7 PFAM-B_3861 (release 23.0) Family This protein is found in viruses. Proteins in this family are typically between 208 to 249 amino acids in length. This protein has a single completely conserved residue S that may be functionally important. This family includes the two base plate proteins in T4 bacteriophages. These are gp51 and gp26, encoded by late genes. 24.50 24.50 24.50 24.90 24.40 23.50 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.21 0.71 -5.07 18 130 2009-08-19 15:13:34 2009-08-19 16:13:34 3 2 86 0 0 114 1244 193.00 25 91.02 CHANGED hchKlssGs+..+a+sFTVpEYpsll......pspt.pc.pphlscll.sph......htclsppppEalFlplastSlGKs.lchphpCspCuc.ptph.lslpptpl......ps.hhpsuslpIph+aPch................cshtchlscCI-plhts.cp.ashcDLs-pEpppll-hlohcshccIhcthps.s.lhhsltlpC..sspppshslpGLtshFc ................................hchcl.sss+.lph+sFTlc-...aptLl........pscp.tphpphlppll.ssh.......hpcLspt-tEhlalplhspSlGcs.lchpapCs...................p...................Csp.ch.h.lsLpph.pl.....t...cs.hh..h.ss.lpIph+aPp.h.t.................................cshtchlsssIcslhss.sp.hs.hc-ls-............p............-....hpclh....chls.pphppIt.c.hhpp..Pplh.hsl.lpC.psGpppshslcGLtphFt................. 0 0 0 0 +12157 PF12323 HTH_OrfB_IS605 HTH_14; Helix-turn-helix domain Bateman A, Gavin OL lg7 Pfam-B_1210 (release 3.0) & Pfam-B_4602 (Release 7.5) Family This is the N terminal helix-turn-helix domain of Transposase_2 Pfam:PF01385. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.29 0.72 -4.63 231 3827 2012-10-04 14:01:12 2009-08-25 09:44:16 3 12 1203 0 846 3330 126 44.80 31 12.81 CHANGED M.........h+...uaKaRlaP.spcQpphlt+phGssRaVYNhsLstppctYc..p.stc ...........M......pua+h+lhP.stpQpphlp+thGssRalaNtsLshppctacttt............. 0 214 582 720 +12158 PF12324 HTH_15 Helix-turn-helix domain of alkylmercury lyase Bateman A, Gavin OL lg7 Pfam-B_3505 (release 6.5) Domain Alkylmercury lyase (EC:4.99.1.2) cleaves the carbon-mercury bond of organomercurials such as phenylmercuric acetate. This is the N terminal helix-turn-helix domain associated with Pfam:PF03243. 21.30 21.30 21.30 21.90 21.20 20.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.43 0.72 -4.32 6 128 2012-10-04 14:01:12 2009-08-25 11:23:53 3 4 102 13 18 80 9 73.50 39 34.60 CHANGED ptpplusplspthspGptspuhshLaRPLLp.LApGcPVohEslAstTG...+ss-cVtpVLpptPusEhD-cG+IlG.hG ..............................phus.l.pphspsp.ssuht.LhhP...LL+.LAc.GcPVohpsLAuthG...hPsEcVtpVLpph..s..u..sEYDcpGpIlG.aG........... 0 10 14 17 +12159 PF12325 TMF_TATA_bd TATA element modulatory factor 1 TATA binding Wood V, Coggill P pcc Pfam-B_97264 (release 23.0) Family This is the C-terminal conserved coiled coil region of a family of TATA element modulatory factor 1 proteins conserved in eukaryotes [1]. The proteins bind to the TATA element of some RNA polymerase II promoters and repress their activity. by competing with the binding of TATA binding protein. TMF1_TATA_bd is the most conserved part of the TMFs [2]. TMFs are evolutionarily conserved golgins that bind Rab6, a ubiquitous ras-like GTP-binding Golgi protein, and contribute to Golgi organisation in animal [3] and plant [4] cells. The Rab6-binding domain appears to be the same region as this C-terminal family [3]. 28.70 28.70 28.80 28.90 28.60 28.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.48 0.71 -4.38 78 241 2009-08-27 10:02:35 2009-08-27 11:02:35 3 5 216 0 179 241 2 116.50 33 13.47 CHANGED hsshhuhsssstussspllE+huuslRpLEuElssh+pElu+LpspRsphppElVcLhcEsEch..+ptspclppLcpplcsLcpRapssLphlGEKoEpVEELcsDlsDLKchY+pQV-phl ................................h......pssthussspllEphpupl+phE.............uElsphpp.ElupLpppRsphppElVc....Lhpps-ch.........cptspclt.pLcpplccLcp.RYpohLphhGEKsEclEEL+tDltDlKphY+pQl-phl.......... 0 52 92 144 +12160 PF12326 EOS1 N-glycosylation protein Wood V, Coggill P pcc Pfam-B_29822 (release 23.0) Family This family is not required for survival of S.cerevisiae, but its deletion leads to heightened sensitivity to oxidative stress. It appears to be involved in N-glycosylation, and resides in the endoplasmic reticulum. 21.50 21.50 22.70 21.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.14 0.71 -4.56 19 118 2009-08-27 10:17:50 2009-08-27 11:17:50 3 6 105 0 85 125 0 130.70 34 35.05 CHANGED psRuSEahLsulWClVSuYLoYulLDuLMVRWIlpYSTsAAIlRhhSMSh..lhIThEhhLluuhSsp.tcYhLHsWIlISChLThsYIhQsalTSNLphh.......................................................................................thpppRphDlYpIsVFsVVPVGlASFlTMlGLLRsLh .......................p....sEhhLs.lWChsohYLoa.hhDsLMsRWllpYos.AsIlRlLohss.......h..h..lThplLhLouhS..s-..chhL.sWIhIussLThhYhh..........................................................................................................................................................................................................................................................................................................h........................................................................................ 1 20 45 71 +12161 PF12327 FtsZ_C FtsZ family, C-terminal domain Bateman A, Sonnhammer ELL, Griffiths-Jones SR pcc Prosite Domain This family includes the bacterial FtsZ family of proteins. Members of this family are involved in polymer formation. FtsZ is the polymer-forming protein of bacterial cell division. It is part of a ring in the middle of the dividing cell that is required for constriction of cell membrane and cell envelope to yield two daughter cells. FtsZ is a GTPase, like tubulin. FtsZ can polymerise into tubes, sheets, and rings in vitro and is ubiquitous in eubacteria and archaea. 21.70 21.70 21.70 21.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.94 0.72 -3.92 26 5666 2012-10-03 12:11:42 2009-08-28 16:26:11 3 7 4977 45 1192 3761 2518 93.00 47 24.25 CHANGED GhAMMGhGpAsG...-sRAt...cAscsAlssPLL-..lslpGA+GlLVNITGG.DhoLtEs-ssuptIp-tlD.s-AslIaGsslDsphpsplRVoVVATGlc ........................GpAhMGhG.A.sG.......EsRA.h...cAActAIuSPL.LE...ss.lpG.A.c..GV..LlN.I......T........GG..........D..........l.....oL..hEhppAus....h.lp..pt.s.....s........p........s.....A..s.l.....I.aG.s.s.l.D...p...s.h...p....D..c..lRVTVlATGh.................................... 2 392 791 1014 +12162 PF12328 Rpp20 Rpp20 subunit of nuclear RNase MRP and P Wood V, Coggill P pcc Pfam-B_33537 (release 23.0) Family The nuclear RNase P of Saccharomyces cerevisiae is made up of at least nine protein subunits; Pop1, Pop3, Pop4, Pop5, Pop6, Pop7, Pop8, Rpr2 and Rpp1. Many of these subunits seem to be present also in the RNase MRP, with the exception of Rpr2 (Rpp21) which is unique to RNase P. Human nuclear RNase P and MRP appear to contain at least 10 protein subunits, Rpp14, Rpp20, Rpp21, Rpp25, Rpp29, Rpp30, Rpp38, Rpp40, hPop1 and hPop5, although there is recent evidence that not all of these subunits are shared between P and MRP. Archaeal RNase P has at least four protein subunits homologous to eukaryotic RNase P/MRP proteins [2]. In the yeast RNase P, Pop6 and Pop7 (the Rpp20 homologue) interact with each other and they are both interaction partners of Pop4 [4]; in the human MRP Rpp25 and Rpp20 interact with each other [2] and Rpp25 binds to Rpp29 (Pop4) [3]. 24.60 24.60 24.60 24.60 24.40 24.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.70 0.71 -4.54 44 133 2012-10-03 06:24:24 2009-08-28 16:42:55 3 3 129 1 93 213 7 130.40 29 71.76 CHANGED phtpslYVsopTPahSulKRlpKhLcptp+..........................pt.........tt..................tt..t...............c.Vhl+uhG+AIpKslslutaF...pcp......shcVclpTuoVpVlD-lh...........................................................................................................s-tc-cpscsRpRhVSsVElp .........................................p...pslaVsopTPahStlKRlpKhLctst+.....................pt..............................................t...............tppt.Vhl+GhG+.AIpKslslutaF.......pppp......shpVplpTuTVplVD-lh.tt........................................................................................................................................................scscscpspsRhRtlSslclt......................................................................................................................................................................................... 0 19 39 65 +12163 PF12329 TMF_DNA_bd TATA element modulatory factor 1 DNA binding Wood V, Coggill P pcc manual Family This is the middle region of a family of TATA element modulatory factor 1 proteins conserved in eukaryotes that contains at its N-terminal section a number of leucine zippers that could potentially form coiled coil structures.[1]. The whole proteins bind to the TATA element of some RNA polymerase II promoters and repress their activity. by competing with the binding of TATA binding protein. TMFs are evolutionarily conserved golgins that bind Rab6, a ubiquitous ras-like GTP-binding Golgi protein, and contribute to Golgi organisation in animal [3] and plant [4] cells. 35.00 35.00 35.00 35.20 34.90 34.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.44 0.72 -4.17 51 411 2009-08-28 15:49:32 2009-08-28 16:49:32 3 20 270 \N 165 357 0 74.30 39 10.97 CHANGED sshpcplpEKDppIspLhcEGppLS+p-hcppspIKKLRtchp-t-..pplsph.p...p+hpch-pclpplcpchcpt- ...................t..htppLttKctplAph.pE...hEKL.cs...cupo.ccLcpcscEsE..+phscLp....pKls-LEcElssLchhLsus-............ 0 41 75 128 +12164 PF12330 DUF3635 Domain of unknown function (DUF3635) Wood V, Coggill P pcc Wood V Family This family may be a potential Haspin-related leucine-zipper. A leucine zipper was proposed to be present towards the C-terminus of human Haspin, (up-stream of the current family) [1]; however, as this domain would appear to span several helices and be largely within a loop structure [2], the actual zipper might be further downstream, and be this family, which is the very C-terminal part of the Sch. pombe sequence. 21.60 21.60 21.70 21.60 21.00 21.50 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.47 0.72 -3.92 6 219 2009-09-03 10:35:38 2009-09-03 11:35:38 3 7 187 7 156 232 1 85.70 24 13.60 CHANGED c-LFsGcG.........DYQF-IYRLMRp.hps..sWupFcPhTNlLWLaYLocplLpKhshKohs..sssshRphhcpI...........pch+phhhshcSto.hpCuc.Lh ...........................................hFpG.p.s..............chQa-lYRhM+p.hps.............sWp..papPhTNllWLpYLhcpLlpphp.hpph......pt.........p............................................................................................................. 0 61 91 130 +12165 PF12331 DUF3636 Protein of unknown function (DUF3636) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 160 amino acids in length. 25.00 25.00 75.40 46.00 21.60 17.20 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.78 0.71 -4.63 17 75 2009-09-03 15:34:27 2009-09-03 16:34:27 3 2 70 0 58 70 0 145.80 35 19.84 CHANGED suTcsLpLLYhhAsuChp................pspslppFW+plpaDFlLhMLssp.QslsDIhLhLslLSTSlhssSFGsl.so.......cpcQhssEN...aIlDRlusLLsEhPpsD.......EGttPaoshclhphRLEslthLTpluFssh.....tptuuhhlApHPoALuRLlRshaDEL ................ssTcsLplLYhhAhuChp.......................spps.ppFW+phphDFlLhMLssp.Q.hsDIhhhLpLLsTShhssShGPl.ss.......cspphsspp.............hIIDRVushLsEhP...........sptshoshclsslRltAhphLsshAhosh..........uuhhlApHs..sAlsRllpslpspl...... 0 8 25 44 +12167 PF12333 Ipi1_N Rix1 complex component involved in 60S ribosome maturation Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 91 and 105 amino acids in length. This family is the N terminal of Ipi1, a component of the Rix1 complex which works in conjunction with Rea1 to mature the 60S ribosome. 21.50 21.50 21.50 22.40 21.40 21.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.26 0.72 -4.05 43 316 2009-09-03 15:39:04 2009-09-03 16:39:04 3 6 274 0 202 292 0 102.20 27 18.54 CHANGED sssslpsahphllhalpuAMTHls..scIRtsohchLshLlchsspp.....sptsahK..hLpsahslLuhs.....tst.........s.shs....hstst............pstcthsphLpsLtpFL .......t..ppltsahsllhsalpuAMTHls..tsIppDSlclL-lLLcthPs.................hs.s..thsp........lLps..Flplluap.......tpp..................ht.shs.ttp....................pstphphphL.tLtpFL...................................................................... 0 59 101 160 +12168 PF12334 rOmpB Rickettsia outer membrane protein B Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 220 amino acids in length. The family is found in association with Pfam:PF03797. This family is the middle region of one of the outer membrane proteins of Rickettsia which is involved in adhesion to eukaryotic cells for uptake. 25.00 25.00 115.00 115.00 19.70 18.80 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.40 0.70 -4.97 9 158 2009-09-03 15:40:00 2009-09-03 16:40:00 3 2 90 0 6 136 0 200.30 77 15.45 CHANGED INFAu.s.ussDTlLNVGcGVNLYATNITTsssNsGSFsFsAGGTNIVSGTVGGQQGNKhNsVpLDNGTTspFLGNAhFNGtTTIcuNSTLQIGGNYTsDalp..SADNTGIVEFVNscsITVTLNKQAssVssLKQITVSGPGNVVINpIGNAuNshGhsTsTIuFpN....ASLGAsLFLPSulPhDs......LTIKSTV...GNGssGs.FNsPhVlVSGlDSsIAsGQl .IpFus.u.spsDolLsVGcGVNLYAsNITTssANsGSFhFpuGGTsIVSGTVGGQQGNKFNTVsLDNGTTVKFLGNATFNGpTTItANSTLQIGGNYTADFVA..SADGTGIVEFVNTsPITVTLNKQAAPVNsLKQITVSGPGNVVINEIGNAGNsHGAsTDTIAFEN....SSLGAsLFLPSGIPFNDAGNhhPLTIKSTV...GN.cTAsG.FDVPtVIV.GVDSVIADGQV 0 1 1 1 +12169 PF12335 SBF2 Myotubularin protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 220 amino acids in length. The family is found in association with Pfam:PF02141, Pfam:PF03456, Pfam:PF03455. This family is the middle region of SBF2, a member of the myotubularin family. Myotubularin-related proteins have been suggested to work in phosphoinositide-mediated signalling events that may also convey control of myelination. Mutations of SBF2 are implicated in Charcot-Marie-Tooth disease. 22.30 22.30 22.30 22.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.67 0.70 -5.03 11 260 2009-09-03 15:43:16 2009-09-03 16:43:16 3 40 96 0 156 235 0 186.70 35 14.02 CHANGED PhGPsls.......slh-cpsssh.NSA...RRLEVlRNCISaIF-NKhhEA+KohPAVLRuLKu+sARlsLscELsh+sQpNRs.lLDHQQFDhVVRhMNsALQDsSuhDEauVAAALLPLoouFCRKLusGVpQFAYTClQDHsVWsN.QFWEssFYp-VQspI+uLYL...........................t.t.sppp.pstsstscstEpoAl-lAA-QhRhWPTlsc-cQpELlppEESTlaSQAIHaANpM ...............................................................hht.....h.sSA...pRLEVlRsCl....shlF-sK.....hh-scKhhPAshRsL+s+sAR.sLspELth.a........lp.p.N.......+A..hLDcQQF-..hllRhMNssLQ-..sos..h-Easl..AAuLL.PLs.osF...hR........K.L....us...G...l...pQ.FAY..o.slQ........-H.slWps.QFWEuhFYs...sVQp.pl+...sLYLp...............................................pp.t......t.........t..t.p...t.t.......sp.tp.os.h-lss...pQ.t.....s..ht.t.ptphhppEEshlhuQAhHaAshM............................................................... 0 45 61 105 +12170 PF12336 SOXp SOX transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00505. There are two conserved sequence motifs: KKDK and LPG. This family is made up of SOX transcription factors. These are involved in upregulation of nestin, a neural promoter. 22.10 22.10 22.10 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.16 0.72 -3.37 6 314 2009-09-03 15:46:08 2009-09-03 16:46:08 3 3 110 1 118 281 0 77.80 43 26.18 CHANGED YRPRRKTKTLLKKDKYSLPGsLls..ssusss..sVusG..hDsY.sHMNGah..NGuYShMQ-.QLuhspHsshss..............ppl.HRY ............YRPR..RKTKTLhK.K.D....KY...s...L.P....u..s..lls...................ssu...sh...t..........sVu.....u....................h.s........th..shh.NGa.......suu...t............hhtc..Lu......Qh.u.hsu........................................................................... 2 18 32 68 +12171 PF12337 DUF3637 Protein of unknown function (DUF3637) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00073, Pfam:PF08935. 25.00 25.00 134.10 133.10 20.30 19.30 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.32 0.72 -3.99 2 82 2009-09-03 15:48:34 2009-09-03 16:48:34 3 4 13 0 0 84 0 67.00 90 4.17 CHANGED MACLKhFSLKsKsKsHSYSPRtlELKaN.DFsFKPRPLAsLLRLEPSDoKhRRVECsEVEhDSWYPN MACLKIFSLK+KsKSHSYSPRtlELKYNSDFAFKPRPLAPLLRLEPSDTKTRRIECAEVEYDSWYPN 0 0 0 0 +12172 PF12338 RbcS Ribulose-1,5-bisphosphate carboxylase small subunit Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00101. There is a conserved APF sequence motif. There are two completely conserved residues (L and P) that may be functionally important. This family is the small subunit of ribulose-1,5-bisphosphate. 25.00 25.00 28.00 26.30 22.50 20.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -8.37 0.72 -4.19 46 352 2009-09-03 15:49:56 2009-09-03 16:49:56 3 4 116 0 42 353 0 40.00 53 24.28 CHANGED ASShh.So.AsVAo....RsssAQA.oMVAPFsGLKSsuuFPlTRKsNss ..............uuhh..so.sshss............ssupA..ohVAPFsGLKSoA.uFPVo.RKsss.... 0 6 22 34 +12173 PF12339 DNAJ_related DNA-J related protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 130 amino acids in length. The family is found in association with Pfam:PF00226. There is a conserved YYLD sequence motif. Mostof the sequences in this family are annotated as DNA-J related proteins but there is little publication to back this up. 25.00 25.00 42.80 29.00 24.20 15.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.72 0.71 -4.27 41 208 2009-09-03 15:51:57 2009-09-03 16:51:57 3 2 207 0 51 141 15 125.50 47 61.22 CHANGED tpsPLlhsllplLpp.pspuhp.apLhppLpppshh..splc.ssptsLF+ppFLLhNALYpLQppLhscp..hLplpshsIpLh..sh..tsssts..h.....t....sDsLRcYYLDWpsa.csspspVccLLssFWpcap .......................................-NPLlWPIhplL+c.pPpGWKlHsLAscLpchGhh....ssL..D.pPccDLFKRNFLlMNALYQLQ-pLaP-p..WLQVcAMsIhLhshhp......sp.tp..l......-....sDPLR-YYlsWpNY.-ssEu-V+RLLspFWoRYp............. 0 13 25 40 +12174 PF12340 DUF3638 Protein of unknown function (DUF3638) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 230 amino acids in length. There are two conserved sequence motifs: LLE and NMG. 25.00 25.00 25.60 27.30 23.00 21.80 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.41 0.70 -5.37 26 218 2009-09-03 15:54:27 2009-09-03 16:54:27 3 22 110 0 186 233 13 197.50 30 6.43 CHANGED GHsNWcPh-aP-WLLLEIEusIhIRpsQhpVAtphIsPsuGsNuVLQhNMGpGKoSVIlPMVAusLAssspLlRllVPKsLhpQstQlLpo+LGGLlsRclhHlPFSRcTpsststlpthpplaccshppuGllLs.PEHlLSFKL.GLpplh-u+hs.ApshlchppWhcphuRDllDESD.sLulKhQLIYs.GoQpsVDGpPcRWplsQsLLuLVppashsLppca ..................taps......PcaLlhEh-sslhlRttQh....p....lApp.h.h.s.....s...t..ps..pss.lh.QhpMGpGKooVIhPhluh..h..L......A....ss..p........p...LlpllVscs.Lhpph.hphLpp+l.u.u.l.l.sR.plhplPF..........sRph..p..h.s................p.......t.h..ph....h.t..phhppsh.p...ptG.ll.l.spPE..p...lhSh.....p......Lh.s.....l......p.p.h.....h......p...t..p..........t..............s..........pt........hhp....htp.hh.p.p...sp......D...llDEsD..ht.h+hpLlYs.Gtt.slp.t..s.RW.hhptlhthh...........h.................................... 0 70 126 164 +12175 PF12341 DUF3639 Protein of unknown function (DUF3639) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00400. There are two completely conserved residues (E and R) that may be functionally important. 21.60 21.60 21.60 21.60 21.40 21.40 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.76 0.72 -6.62 0.72 -3.94 35 247 2009-09-03 15:55:19 2009-09-03 16:55:19 3 17 204 0 161 240 3 26.90 40 2.85 CHANGED GEplpulAsussalsVsTStsalRlFo ....sEplpAlulupu...alAssTSspalRlFo. 0 38 75 125 +12176 PF12342 DUF3640 Protein of unknown function (DUF3640) Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 25 and 211 amino acids in length. 25.00 25.00 45.10 44.30 17.50 16.00 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.59 0.72 -6.63 0.72 -4.41 5 29 2009-09-03 15:56:06 2009-09-03 16:56:06 3 4 2 0 0 24 0 25.10 79 5.26 CHANGED MSLLTNRhCRRVDKDQWGPGstGRTP MSLLTNRhsRRVDKDQWGPGshG+sP. 0 0 0 0 +12177 PF12343 DEADboxA Cold shock protein DEAD box A Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 68 and 89 amino acids in length. The family is found in association with Pfam:PF00270, Pfam:PF00271, Pfam:PF03880. This family is the C terminal region of DEAD box A, a protein expressed under conditions of cold shock which is involved in various cellular processes such as transcription, translation and DA recombination. 22.50 22.50 22.90 22.90 22.30 22.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.90 0.72 -3.72 6 500 2009-09-03 15:56:51 2009-09-03 16:56:51 3 4 494 0 21 135 1 63.20 88 10.05 CHANGED GGERR..GG...GRs.FuGERREG.....GRp..G-RREGuss........uGERR.u...RDGQR..APRRDDuAA.......ssRRRFG.DA .........GGERR...GG.....GRG.Fu.GERREG........GRNFS.GERREGGRGDGRRFSGERREG....R........APRRDDSTG............RRRFGGDA.. 0 1 5 13 +12178 PF12344 UvrB Ultra-violet resistance protein B Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00271, Pfam:PF02151, Pfam:PF04851. There are two conserved sequence motifs: YAD and RRR. This family is the C terminal region of the UvrB protein which conveys mutational resistance against UV light to various different species. 21.20 21.20 21.30 26.80 20.90 18.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.14 0.72 -4.43 509 4463 2009-09-03 16:03:57 2009-09-03 17:03:57 3 9 4402 11 984 3212 2153 43.90 56 6.48 CHANGED hYADc.lTcSMpcAI-ETpRRRphQhtYNccHGITPpo.lpKplp- .......hYAD+..ITsSMpcAI-ETpRRRchQhtYNccHGIsPps.lpKclp-..... 0 335 664 843 +12179 PF12345 DUF3641 Protein of unknown function (DUF3641) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF04055. This family consists of proteins which are commonly annotated as Radical SAM domains but there is little annotation to back this up. 21.50 21.50 21.50 24.70 21.40 20.90 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.86 0.71 -4.31 83 232 2009-09-03 16:04:57 2009-09-03 17:04:57 3 10 220 0 99 252 228 133.40 46 40.40 CHANGED LPPsQtpLEt-YKctLtpcaGIpFNpLaTITNhPIuRFtphLpppGpapsYhphLpssaNssslpslMCRsplSVsWpGplY..DCDFNQMLslPs.......pspt.hplp-Lhp.ss...ltsppItsusHCYGCTAGsGSSCGGu ...........LPPsQtsLEt-YKcpLtccaGIpFNpLaTITNhPIuRFtphLt........ppsphpsYhphLtssaNssslpslMCRsplSVsWpGpLY..DCDFNQhLslsh...................supt.hplp-lhpts...htsppItsusHCYGCTAGsGSSCGGu.................... 0 36 73 93 +12180 PF12346 HJURP_mid HJURP_repeat; Holliday junction recognition protein-associated repeat Coggill P pcc manual Domain Vertebral Holliday junction recognition proteins carry an SCM3 domain at their N-terminus as do the eukaryotic fungi, but they also carry this central, conserved region. The function of this family is not known. Further downstream there is also a repeated domain, also of unknown function. Investigation of Scm3 and associated proteins is likely to be directly relevant to understanding the mechanism of HJURP-mediated CENP-A chromatin assembly at human centromeres. 25.00 25.00 68.70 48.10 19.10 19.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.46 0.71 -3.96 4 36 2009-09-04 12:32:37 2009-09-04 13:32:37 3 6 24 0 14 39 0 105.50 61 17.80 CHANGED MSRLLSTKPSSIISTKTFIMQNWNSR+RHRtKSRM..NKTYC+GuR+SQRSuKEsFlPCSEPVK-TGALRDCKNlLDVus+KTGLKLEKAFLEVNKPQIHKLcPSWKELKVTPSKYSS .MSRLLSoKPSsIISTKT.FIhQNWsSRRRH+pKsRM..N+TaC+Gu+hSpRSu+-s.ssCSEPsKEsGsLRDCcNlLDVSs+K...TGLKLEKAhLEVNKsQlHKLsPoWKELp....shPpK..s.......... 0 1 1 1 +12181 PF12347 HJURP_C HJURP_repeat; Holliday junction regulator protein family C-terminal repeat Coggill P pcc manual Domain Although this family is conserved in the Holliday junction regulator, HJURP, proteins in higher eukaryotes, alongside an Scm3, Pfam:PF10384, family, its exact function is not known. The C-terminal region of Scm3 proteins has been evolving rapidly, and this short repeat at the C-terminal end can be present in up to two copies in the higher eukaryotes. 21.60 21.60 22.10 22.10 21.20 20.80 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.97 0.72 -4.39 13 404 2009-09-04 15:55:35 2009-09-04 16:55:35 3 7 88 9 135 350 1 63.80 39 14.80 CHANGED +phs.s..ppsscP-hph...s+sEc+Y+EIcEEFDphaQpYpLust..PtphshsssssVSshpuus ................G....C-SP.-PD.sp.sl....oPpoE-KY+KIsE-hDhhhpch.p.h..ss........lP.....s.N.......F......sMPVolPVos.suh.s............................ 0 16 26 59 +12182 PF12348 CLASP_N CLASP N terminal Mistry J, Wood V jm14 Manual Family This region is found at the N terminal of CLIP-associated proteins (CLASPs).\ CLASPs are widely conserved microtubule plus-end-tracking proteins that regulate the stability of dynamic microtubules [1][2]. In yeast, Drosophila, and Xenopus, a single CLASP orthologue is present. In mammals, a second paralogue (CLASP2) exists which has some functional overlap with CLASP1 [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.32 0.70 -5.00 57 1343 2012-10-11 20:01:02 2009-09-07 16:10:40 3 32 291 1 840 1391 6 203.10 18 24.51 CHANGED l-pphpphhshFpt.+EoEtNWthRppslhplRpll+Gs.sspsh.sshlssl+..th.puIh+shsSLRTolsssuCpLlp-luhhhtsp.l-sh.s-hlh.sLl+hsussKclsupsAshslssllssss..ss+l.hphlhhAhp-KNspsRthustWLphhlp+hs.............pppthptss.....hh....chhccslt+sLsDAsssVR-shRssaWpa..................hphaPs+Acplh..spL-sps++hlc .....................................................................................t.tp.Wp.+.p....sl.tl....pt..h...h...............tt...........................tt.hh.ttl+.........shht.p.hp.shp..st...ls....ppuh....thl....s.p.Ls....p...t..........h...tp..........p...hcph....sct....ll..s............sLlphh...s..s.....s..p.c.h...lppsuptslpt.lhp...p.....ss......s...pl.....h.thl.ts..s.h...ps.K..s.stlRpp........sht.aLtt.hlpphs.................................h..t...pph........h...........phlht...tl...t...ptlpDsssplRptAppshht.h..........................hp.h..hs.t............................................................................................................................... 0 270 417 646 +12183 PF12349 Sterol-sensing SREBP-CAP_SSD; Sterol-sensing domain of SREBP cleavage-activation Wood V, Coggill P pcc Wood V Family Sterol regulatory element-binding proteins (SREBPs) are membrane-bound transcription factors that promote lipid synthesis in animal cells. They are embedded in the membranes of the endoplasmic reticulum (ER) in a helical hairpin orientation and are released from the ER by a two-step proteolytic process. Proteolysis begins when the SREBPs are cleaved at Site-1, which is located at a leucine residue in the middle of the hydrophobic loop in the lumen of the ER [1]. Upon proteolytic processing SREBP can activate the expression of genes involved in cholesterol biosynthesis and uptake. SCAP stimulates cleavage of SREBPs via fusion of the their two C-termini [2]. This domain is the transmembrane region that traverses the membrane eight times and is the sterol-sensing domain of the cleavage protein [4]. WD40 domains are found towards the C-terminus. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.75 0.71 -4.76 34 952 2012-10-02 18:57:54 2009-09-07 17:54:24 3 33 349 0 545 2789 1133 148.90 25 14.06 CHANGED hlcS+hsLulsGllhlhholssohGhhuhhG..lshohlshpVhPFLlLulGlDNhFlllpuhppsspshs.....cpRlucuLucsGsSlhloulo-hlsFh.lGshoshPAlpsFslaAulAlhhDFhhQlThFsAlLulDh+Rp.tpschchhsphps ..........................................................h..tS+hhL.uluulhhl.hhS...h...hhu.l..u..l.....h.....s...h.....l....G.......ls..hs.h...............l.h.t.ps.l..P.F.......L...l....L...............s..l......G.......l..-...p...h..h.....l.L....s....p....s.......h......h.....p....s.......s...p.p.tph.........................ppp...l...u......c.ulsp...hGso.lhh.s............sls.....ch..ssh.............h....l...G......s...h..o..s.............l..s.u..l..c.tFs......hh..A.uh....sl.lhs.ah.....l...........h.o...h...F.s.u.l.L.o...l.clchp...ttt..........t............................................................... 0 161 245 407 +12184 PF12350 CTK3_C CTD kinase subunit gamma CTK3 C-terminus Wood V, Coggill P pcc Pfam-B_12814 (release 23.0) Domain The C-terminal domain kinase (CTDK-1), is a three-subunit complex comprised of Ctk1, Ctk2, and Ctk3, that plays a key role in regulation of transcription and translation and in coordinating these two processes. Both Ctk2 and Ctk3 are regulated at the level of protein turnover, and are unstable proteins processed through a ubiquitin-proteasome pathway. Their physical interaction is required to protect both subunits from degradation, and both Ctk2 and Ctk3 are required for Ctk1 CTD kinase activation [1]. The mammalian P-TEFb is mirrored by the combined complexes in yeast of the CTDK1 and the Bur1/2 [2]. It is not clear what independent function this C-terminal domain has. 25.00 25.00 33.20 37.90 18.30 18.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.78 0.72 -3.95 31 130 2009-09-09 12:11:31 2009-09-09 13:11:31 3 4 127 0 99 127 0 67.90 39 23.79 CHANGED lsKppI.hpRhE-DRERHKR.+EshWslst................psEF.cphWschsshs-sp.psshc-hpEhpphspt .....sK+QI..pRlEEDRERHKRhREshWslstss...........................................psEa.cphW-cso..shs--D.htsh--ht-hpphh..t.................. 0 26 54 84 +12185 PF12351 Fig1 Ca2+ regulator and membrane fusion protein Fig1 Mistry J, Wood V jm14 Manual Family During the mating process of yeast cells, two Ca2+ influx pathways become activated. The resulting elevation of cytosolic free Ca2+ activates downstream signaling factors that promote long term survival of unmated cells. Fig1 is a regulator of the low affinity Ca2+ influx system (LACS) [1], and is also required for efficient membrane fusion during yeast mating [2]. 25.00 25.00 25.10 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.38 0.71 -4.40 32 158 2012-10-03 00:20:40 2009-09-10 11:12:12 3 2 113 0 119 187 0 182.80 33 65.11 CHANGED hhppssss......sstp.lplRsGYhGlClspss..............ss....hhC.....ptshssLsshlp..................ssss.sLNLltlApsFpspllaPhllhssllLshlsFlllsh...............h..lhPh.st...lpphshsluhls..slhhhlushapHhuss.AusshstshohGh...lpuphGspAhshsW..huFsh.lhl.sslulhhhhhp ..................................................................s....ssh...sssp.lplRlGYhGlClpsss......................ushlC.........spsussLup.lp................................spp-.sLNL..lhlAppF+-tlVFPYLl.hs..ulllsh..ls..hllLshhs...................................h..lpPFssp...VsphsLulhhlu..slhhhl.ushWQHsuus.Ausplspshu.Gs....lp.utsGssAhlhuW...huFsh.lll....lslGLhhhhLp...................... 0 22 58 98 +12186 PF12352 V-SNARE_C Snare region anchored in the vesicle membrane C-terminus Coggill P pcc manual Domain Within the SNARE proteins interactions in the C-terminal half of the SNARE helix are critical to the driving of membrane fusion; whereas interactions in the N-terminal half of the SNARE domain are important for promoting priming or docking of the vesicle Pfam:PF05008. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.74 0.72 -3.90 20 1258 2012-10-01 21:14:52 2009-09-11 17:34:43 3 20 319 2 844 1215 13 65.10 23 27.89 CHANGED -tlh+pss+lcsSpRhhsEs.shGtullpsLppQRpsLpssps+lpssssslstosplLpshpRRh ..........................h.cppp.plpp.op.p...hhs-s.p.hGtsshpsLtpQRppLppspp+....lhssss...plstuppllptlp+R....................... 0 290 470 692 +12187 PF12353 eIF3g Eukaryotic translation initiation factor 3 subunit G Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 130 amino acids in length. The family is found in association with Pfam:PF00076. This family is subunit G of the eukaryotic translation initiation factor 3. Subunit G is required for eIF3 integrity. 22.00 22.00 22.00 22.00 21.90 21.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.80 0.71 -4.22 47 378 2009-09-14 13:49:11 2009-09-14 14:49:11 3 11 284 0 262 367 3 119.60 34 40.69 CHANGED sPssplhps.sGsKslhpY+hs--GcKlKlspph+..hhpc.+Vs+slAcRKpWsKFGsspsssP.GPsstTT.luE-lhhphspshcppppccppp.......ttpsssshlpCRlC.p.G-HaTs+CPaKsths ........................................s..phhss.p..GhKslhpY+...hs-.-.G.c.......K.lKl.s.+ph+..h.pc.+ls+tVA...c.R+........s.W..sKFGpsptc.s.s.GPs.ss....TT.tl.uE.-lhhp.........hssshcptpppcppp...........httthspphlpCRlC..p..G-HaTs+CPaK-sh.s........................ 0 86 144 213 +12188 PF12354 Internalin_N Bacterial adhesion/invasion protein N terminal Gavin OL lg7 Prosite Domain This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00560, Pfam:PF08191, Pfam:PF09479. There are two completely conserved residues (I and F) that may be functionally important. Internalin mediates bacterial adhesion and invasion of epithelial cells in the human intestine through specific interaction with its host cell receptor E-cadherin. This family is the N terminal of internalin, the cap domain of the protein. The cap domain is conserved between different internalin types. The cap domain does not interact with E cadherin, therefore its function is presumably structural: capping the hydrophobic core. 24.10 24.10 24.50 24.30 23.70 24.00 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.70 0.72 -4.35 32 845 2009-09-14 13:50:09 2009-09-14 14:50:09 3 116 41 39 12 749 0 55.90 39 10.30 CHANGED h++p.hl+thlhhhLlhhlsh.hlshs.ssclpA.ssIspPsPINpIFPDssLAcsl+p ...........++pshhK.hLlsh.Llhhhul..WIsh.usGscsQA..to...IspPTPINQIFsDsALA-slKs.. 0 12 12 12 +12189 PF12355 Dscam_C Down syndrome cell adhesion molecule C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00047, Pfam:PF07679, Pfam:PF00041. The Down syndrome cell adhesion molecule (Dscam) belongs to a family of cell membrane molecules involved in the differentiation of the nervous system. This is the C terminal cytoplasmic tail region of Dscam. 25.00 25.00 29.90 37.40 19.60 20.20 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.78 0.71 -3.86 2 83 2009-09-14 13:51:33 2009-09-14 14:51:33 3 25 26 0 21 106 0 122.90 86 5.83 CHANGED tGuuhass.uPEYDDPANCAPEEDQYGSQY.G.YGtPYDHYGSRGShGRRSlGSARN.PssGSPEPPPPPPRNHD.sNSSFNDSKESNEISEAECDRDphsptNYGsstRu...KDthTTEEMRK ........QGGSLYSGPGPEYDDPANCA....PEEDQYGSQYGGPYGQPY.DHYGS....RGSMGRRSI..GSARN.PG.NGS...........PEPPP.PPPRNH.D...MS......N...SSFNDSKE...SNEISEAECDRDHGPR.GNYGAVKRSPQPKDQRTTEEMRK............ 0 7 9 18 +12190 PF12356 DUF3643 Protein of unknown function (DUF3643) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 217 and 4852 amino acids in length. There is a conserved TLA sequence motif. 19.40 19.40 19.70 19.50 17.40 15.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -11.05 0.71 -4.91 5 94 2009-09-14 13:52:16 2009-09-14 14:52:16 3 6 62 0 49 100 0 143.50 50 4.10 CHANGED pVApL+coushsaRCs....s.S.hE.sL.sPSPuHlHCVAAILW+SYEL.VpYDLPsLLop-LFEolYcWSMsL-pspPLKKSVDSLLCSLCYI+PcYFohLLsWMGIpLpssus.......phs.TDDsKcpp...ulTDDSKsAp...hsu.h.p.sspshosLlLsESaLsTLAhACQ ..........................lhth+.s..hthhss..............sh..l.PusualHCVAuILW+oach..lpYDLssLLs....p-LFEhLapWohoLssN.......sLKKAlDSLLCShCaI+Ps.hFolLhsWMGIhsP.s.lpsc.....hphShoDDsKcp-.......uhTDDSKssp.........................usLsLo-SpLuTLAuuSQ.................................................. 2 10 14 31 +12191 PF12357 PLD_C Phospholipase D C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00168, Pfam:PF00614. There is a conserved FPD sequence motif. This family is the C terminal of phospholipase D. PLD is a major plant lipid-degrading enzyme which is involved in signal transduction. 21.30 21.30 23.80 27.40 19.20 18.10 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.45 0.72 -4.44 66 288 2009-09-14 13:53:00 2009-09-14 14:53:00 3 8 64 0 147 296 0 73.40 46 9.38 CHANGED l--sFhcPESlECV++VNplA-c.WchYuu--l...s-h.G.HLL+YPlpVspc.GpVssL.PGtEhFPD.stu+llGspo .....--sFhcPESlECV++VNplA-cpW...chYuu...--l......p-l.G...HLLpYPlpVs.......p-.G.cVstL.PGtEt..FPD.stu+lLGs+..... 0 22 86 118 +12192 PF12358 DUF3644 Protein of unknown function (DUF3644) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 65 and 80 amino acids in length. 21.80 21.80 21.80 22.00 21.50 21.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.46 0.72 -3.71 43 219 2009-09-14 13:53:48 2009-09-14 14:53:48 3 5 217 0 50 180 6 70.60 22 23.26 CHANGED sl.hhspuWphhL+uhhhpcthc......................hhth.tsGss+shpLppslcp.......s.sps..lt.pNlptLh-hRcp.h ......h.h.ssuaphhh+uhhhpctss...............................hh...ppss.s+shpLppslcc...........scsph.hptsLptlhchRspu...... 0 12 31 42 +12193 PF12359 DUF3645 Protein of unknown function (DUF3645) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. There is a conserved HPD sequence motif. 25.00 25.00 27.30 26.20 22.40 17.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.35 0.72 -7.53 0.72 -4.82 27 194 2009-09-14 13:54:36 2009-09-14 14:54:36 3 20 105 0 170 208 11 34.80 53 1.11 CHANGED ls.sR...phLAVPahuKDsPotpSEFuHPDVlIsLT ...........p.sR......shlAVPacAKDsPStpSEFuHPDVsIlLT.. 0 65 114 150 +12194 PF12360 Pax7 Paired box protein 7 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00046, Pfam:PF00292. Pax7 belongs to a family of genes that encode paired-box-containing transcription factors involved in the control of developmental processes. Pax7 has a distinct role in the specification of myogenic satellite cells. 21.60 21.60 24.10 33.20 19.00 17.70 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.31 0.72 -3.68 12 179 2009-09-14 13:55:18 2009-09-14 14:55:18 3 6 54 0 53 178 0 44.40 70 9.66 CHANGED uuuDuuSAYsLuosRHuFSuYSDoFhu.ssusuNtMNP.luNGLSPQ .....suDoSSAYsLsSsRHuFSSYSDSFhs.PouPSNtMNP.luNGLSPQ. 0 2 8 22 +12195 PF12361 DBP Duffy-antigen binding protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 449 and 1061 amino acids in length. The family is found in association with Pfam:PF05424. There are two conserved sequence motifs: NKNGG and QKHDF. This family is part of the Duffy-antigen binding protein of Plasmodium spp. This protein is an antigen on these parasites which enable them to invade erythrocytes. 25.00 25.00 274.00 273.10 18.50 17.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.99 0.70 -4.90 4 71 2009-09-14 13:55:58 2009-09-14 14:55:58 3 7 5 0 3 82 0 294.20 88 38.23 CHANGED SpshhQsVcSSpuE.KVQGDSAHGNlNsGttpSTTscAssGsGQNGNQTPAcSsVpcoDhsEuhSAKNVDspK.VoERusDTsuVTsIsEAGKEsLssopupsSEoTV-ususGDGhVpsAshPVhsSENPLVT.aphht.SKDNSD...ssGS......spssEohAssDSsuK..GEstcsQcstpsKus+DoSspSsuTSu...uoGD....shssl-pt.stusP.pt.......s..VGu.......KctpsE-opspKDutTV..........AGGoTss.pcN...DT-NsNlPsssNKQsEGATALStsESLESNESVH+ThssTTHuLENKNGGsEKDhQKHDFhN SNPISQPVDSSKAE.KVPGDSTHGNVNSGQDSSTTGKAVTGDGQNGNQTPAESDVQRSDIAESVSAKNVDPQKSVScRSDDTASVTGIAEAGKENLGASNSRPSESTVEANSPGDDTVNSASIPVVSGENPLVTPYNGLRHSKDNSD...SDGP.........AESMANPDSNSK..GETGKGQDNDMAKATKDSSNSSDGTSS...ATGD....TTDAVDREINKGVPEDR.......DKTVGS.......KDGGGEDNSANKDAATVVGEDRIRENSAGGSTNDRSKN...DTEKNGASTPDSKQSEDATALSKTESLESTESGDRTTNDTTNSLENKNGGKEKDLQKHDFKS 0 0 0 1 +12196 PF12362 DUF3646 DNA polymerase III gamma and tau subunits C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00004. The proteins in this family are frequently annotated as the gamma and tau subunits of DNA polymerase III, however there is little accompanying literature to back this up. 23.60 23.60 24.20 25.00 23.00 23.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.25 0.71 -10.10 0.71 -4.15 77 299 2009-09-14 13:56:33 2009-09-14 14:56:33 3 3 296 0 103 270 1082 115.70 35 19.43 CHANGED sthsoFc..cllpLhpppRDltLtsplcpslRLVpa..pPGRIEhp.sssAspsLsscLuptLppWT.GpRWhVolssp..sGt.PTltEpcpstcpthpscAppcPhVpAlLstFPuAcIs-.lR ................s..hsoap..-lVsLsccpRDhtl+htlcssVRLVph..pPGR..l-lphss..sAPpsLss-Lup+LppWT.Gt.RWsVslu..p-....sGt..TlsEtctttcpphhscAcscP.hVtAlLstFPGA+Ih-VR............................ 0 30 66 78 +12197 PF12363 DUF3647 Phage protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 119 and 164 amino acids in length. The proteins in this family are frequently annotated as phage proteins, however there is little accompanying literature to back this up or to describe the nature of these phage proteins. 21.30 21.30 22.30 22.90 21.20 20.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.52 0.71 -10.32 0.71 -3.97 39 387 2009-09-14 13:57:27 2009-09-14 14:57:27 3 1 317 0 24 178 4 110.40 28 83.37 CHANGED plpIsGK....ph-hKFshtFlpclsKphuh..ptt...t..h....phssGhs.tlhsslhp.tss.slschlcsuhhptppph.opccl-phlcp...h-pp..p.hpclhpcllpcl.ppushhKptscphhp ..............lcINGK....ph-lpFshtFlREl-c+hsh....cst....t..h.....phu.G.ls.tlh.ssLhp.tssssls-lhpsAsut.ppph.oppcI-chl-p.....p....pt...p..lcpLhs-lLpEl.sppshs+thlcph.................... 0 8 16 21 +12198 PF12364 DUF3648 Protein of unknown function (DUF3648) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 53 and 3115 amino acids in length. There are two completely conserved residues (A and F) that may be functionally important. 20.60 20.60 22.90 22.90 18.40 18.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.59 0.71 -4.18 9 103 2009-09-14 13:58:36 2009-09-14 14:58:36 3 3 12 0 3 85 0 108.20 64 10.89 CHANGED hSKltuSahSSuhAssusYohSRhWRphsshtsht.............haGs............sthspll+-hsAshs.sshRco.....lFPaolcAhh+t.sthupshspstpsshshhsl.pspusspPphhsu.shhsLCcPPL..........ossA++ulRpuK .................MSKLGAsF+SSoFAVRSDYhISRIWRshNTlGVLH.............aFGo............tshTpVlKEaSAshPIVPLRKS.....sFPFSVRAAVRAIcAAu+PhssP+usshsshPVhssuGsspPp.lAGh.RhuLCcPPL................................ 0 1 2 3 +12199 PF12365 DUF3649 Protein of unknown function (DUF3649) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. 21.20 21.20 23.20 23.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.84 0.72 -7.01 0.72 -4.41 39 161 2009-09-14 13:59:17 2009-09-14 14:59:17 3 1 142 0 52 149 2 28.00 39 24.75 CHANGED sFusRSAhRA.WhGlllsuslLuslhhhs ..sFAsRSAWRA.WhGlhlsuhlLuulshh........ 0 11 23 37 +12200 PF12366 Casc1 Cancer susceptibility candidate 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 216 and 263 amino acids in length. Casc1 has many SNPs associated with cancer susceptibility. 21.30 21.30 21.80 21.30 20.80 21.20 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.44 0.70 -4.87 25 204 2009-09-14 13:59:59 2009-09-14 14:59:59 3 4 60 0 109 201 0 216.40 19 39.55 CHANGED hsp.c.lphsphshph+slphchD+LuhauhuFcphhchs...............hpcWthQ.s.ps..Ellhsl-.....oh+sch.haIo..........spuh+uhspchtKphsu+shKhlcI.cP.p-hppLpcthhscslslFsEp-sphYl-ph.hs...KHluh...E.hshcthslpstlhphhRschs+hs.....s+tsIlL............th+..........ssKs.pcss.sp...........h+loP-sssFlcVo.tss-plslht.shts ...........................................................................................ph.h..hp.shth+hlphchD+lu.h.shu......h..p.h.phs.......................hppaphp...s.pp......phl.hphp........pt.hhphp.hhp........................ttht....t.tt........thsst..c..p.h....c............p.t.L.pthhssshplF.pp-t..hhph...h....cphth...p.hshcphshlGulaphshhchs.ps......t+tphll.............h.................................p.ht.t....t.sh......................hh.hs..s.t...h.s.....hltlT.phspplhhht.shh..................................................... 0 25 32 75 +12201 PF12367 PFO_beta_C Pyruvate ferredoxin oxidoreductase beta subunit C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF02775. There are two completely conserved residues (A and G) that may be functionally important. PFO is involved in carbon dioxide fixation via a reductive TCA cycle. It forms a heterodimer (alpha/beta). The beta subunit has binding motifs for Fe-S clusters and thiamine pyrophosphate. 21.40 21.40 21.40 21.50 21.30 21.20 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.45 0.72 -4.05 69 635 2009-09-14 14:37:33 2009-09-14 15:37:33 3 2 596 0 197 390 60 70.40 42 24.41 CHANGED CVoFNKlNTapWa+E+lhpL...-.saDPoD.+ttAhppuhE....s-p.lPlGllYcsc.+soaE-plshh.....ppsPL ..................................................CVTYNK.lNTYDWF............K...........E+Lsplp...........shE..sY.....D.....s...........o...........D....+p....hA.....h..c......plhE........p-s..LspGllYQsc.ppPoYEpplpth...pcps.................... 0 74 139 176 +12202 PF12368 DUF3650 Protein of unknown function (DUF3650) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00581. There is a single completely conserved residue N that may be functionally important. 20.90 20.90 21.70 21.40 20.10 19.10 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.73 0.72 -6.96 0.72 -4.45 23 786 2009-09-14 15:50:37 2009-09-14 16:50:37 3 2 782 0 48 308 0 27.80 55 8.66 CHANGED ECRsHPRNRYVpcHsLS-EElpcRLpsl .ECRsHPRNRYVpcppLoppEhtcRLssl..... 0 13 23 35 +12203 PF12369 GnHR_trans Gonadotropin hormone receptor transmembrane region Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00560, Pfam:PF00001. There are two completely conserved C residues that may be functionally important. This family contains the transmembrane region of Follicular stimulating hormone and leutenizing hormone - the two major gonadotropin hormone receptors. These receptors are G protein coupled receptors involved in development and maturation of germ cells in both fecund genders. The transmembrane region is conserved between the two different receptors while the extracellular ligand binding domains are less well conserved. 22.10 22.10 22.60 22.10 20.40 21.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.73 0.72 -3.53 14 158 2009-09-14 15:52:14 2009-09-14 16:52:14 3 24 102 0 24 139 0 60.60 56 13.56 CHANGED +.+.QhotLHPICN+S.h+pD.D-hspspt....pRposh.D..ssaups.s...s......E..a.DYDL.CpEVVsVsCSPc ...........h..p..othpslCNtS...hpp-hs..spspt........pc.ShAEDt..SuYupGFDhhhs.......E..F.DYsL.CNEVVsVsCSPK.. 0 1 3 6 +12205 PF12371 DUF3651 Protein of unknown function (DUF3651) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. This family is frequently annotated as a membrane protein but there is little associated literature to back this up. 21.70 21.70 21.90 22.40 21.60 20.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.99 0.72 -3.97 26 305 2009-09-15 14:06:27 2009-09-15 15:06:27 3 5 99 0 183 275 1 75.30 24 8.61 CHANGED slsFlPp..phGthcuslhlhT.shGs.hhhtl.........pu.uspsPa+L.....pshhthplshssshs..lslaNPaspsLplscl ..........llFlsp..ppGsl-sslhlpT.shGh.hph.V............................................huhssp......pl..........hhlshsss....lslhNstst.ltl.p.............................................. 0 32 49 98 +12206 PF12372 DUF3652 Huntingtin protein region Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF02985. This family is in the middle region of the Huntingtin protein associated with Huntington's disease. The protein is of unknown function, however it is known that a polyglutamine (CAG) repeat in the gene coding for it results in the development of Huntington's disease. 20.90 20.90 21.10 21.30 19.90 20.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.85 0.72 -4.35 16 142 2009-09-15 14:07:37 2009-09-15 15:07:37 3 5 63 0 66 142 0 41.40 43 2.12 CHANGED hcslhths+.....lh.chlsGphsuuppsssht......psllpDhhhhp ..u.+tllthPK.....Ih.pLs-GlMASGpcusThAl..sshpsllcDLhVlp.... 0 11 13 36 +12207 PF12373 Msg2_C Major surface glycoprotein 2 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF02349. This family is the C terminal of major surface glycoprotein 2 of virulent bacteria. It is a virulence factor antigen. 25.00 25.00 29.60 28.60 21.50 20.00 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.94 0.72 -7.17 0.72 -4.73 17 82 2009-09-15 14:08:36 2009-09-15 15:08:36 3 7 3 0 0 84 0 29.70 54 3.13 CHANGED ph+LV+Rsh.....ssstVoEtElKAFDhsA.Ahp .+ltLV+Ruh.....scsuVoEsElKAFDhsuhAhp. 0 0 0 0 +12208 PF12374 Dmrt1 Double-sex mab3 related transcription factor 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 61 and 73 amino acids in length. The family is found in association with Pfam:PF00751. This family is a transcription factor involved in sex determination. The proteins in this family contain a zinc finger-like DNA-binding motif, DM domain. 21.20 21.20 21.60 21.50 21.00 19.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.45 0.72 -3.62 46 292 2009-09-15 15:40:47 2009-09-15 16:40:47 3 4 110 0 38 292 0 63.10 31 39.59 CHANGED ELGIspPlsL.uus-shVKsEssus.shh...h-.ups.o.s.....ssuSsusssu+.shpssP.uhsuRsas-usSD .........ELGIspPlsL.suu-hhlKpEssus.sph..hh-..u.......sS.ops..........s.ssssoss..ss....sh..hps.s.uhssph...tt.s............................ 0 1 3 18 +12209 PF12375 DUF3653 Phage protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 112 and 194 amino acids in length. 22.20 22.20 22.20 22.30 21.90 20.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.72 0.72 -3.76 20 109 2009-09-15 15:41:35 2009-09-15 16:41:35 3 5 94 0 19 77 0 76.70 28 50.50 CHANGED GFph.....ptsc.LlTPEG+plpPp-lthhulhhs.Apc.acphhc.ttts............h....................usllthscshh+p+ ........GatF..cpu+.LlTPpG+clsstplcshphphs.sRt.acp...hhp.tcts.............th......................h.lhhlt.shhph.h................................. 0 5 13 17 +12210 PF12376 DUF3654 Protein of unknown function (DUF3654) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 193 and 612 amino acids in length. 25.00 25.00 30.80 30.80 23.80 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.72 0.71 -4.35 15 56 2009-09-15 15:42:41 2009-09-15 16:42:41 3 2 7 0 27 53 0 127.60 42 32.99 CHANGED IpphPusLScEEK++ccchLppIKEYGEpLsTcEKQ-cIlcAQKIlCDACc.IWt+.cE-RhthlhttaSp+LpLKhhtus....sch.chchsLhthlDHchLlsAacEYt.hDVsuELV+QhhL..csc-IDcc.lscs...V ..........t.ssthsp-E+ch..c.hpphKcaGtpLpspE+hcpIlEsEKIVCsAC+cIChsL+EEELhGLhAEGuh+KsLKtph.u-......cchpcttY.....LEhthlDpplLLDAH.+EaG.t-Vs+ELV+QhLLGKcG+-ID++Yls+VAssV............ 0 27 27 27 +12211 PF12377 DuffyBP_N Duffy binding protein N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF05424. This family contains the N terminus of the Duffy receptor binding domain. 25.00 25.00 28.70 73.30 21.90 21.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.99 0.72 -3.76 3 77 2009-09-15 15:43:41 2009-09-15 16:43:41 3 6 5 0 3 84 0 61.50 92 9.21 CHANGED cEEKDGEHKTDSKTDNGEGsNNLVMLDYDTSSNGHPAGTlDNVLEFVTEHEGNSLcNSSKGGN.PYD ...........GEEKDGEHKTDSKTDNGKGANNLVMLDYETSSNGQPAGTLDNVLEFVTGHEGNSRKNSSNGGN.PYD 0 0 0 1 +12212 PF12378 CytadhesinP1 Trypsin-sensitive surface-exposed protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 67 and 79 amino acids in length. This family contains trypsin-sensitive surface-exposed proteins called cytadhesins. Cytadhesins are virulence factor proteins which mediate attachment of bacterial cells to host cells for invasion. 25.00 25.00 25.10 31.60 23.50 22.30 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.01 0.72 -4.03 8 115 2009-09-15 15:45:13 2009-09-15 16:45:13 3 3 12 0 13 113 0 70.30 42 13.09 CHANGED hpoTshas.ct+PsW.........NshsGFssssoppptpu.pcps-..sSuPIuLPa-sYhsNlGNlhhhspuVllFGGN ...oTsphs.stcPpa.........NshsGFGlsGs.....uPpcasE..cupsPlpls.psa.sNlhsllhlspsVlhatt...... 0 11 11 11 +12213 PF12379 DUF3655 Protein of unknown function (DUF3655) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF08716, Pfam:PF01661, Pfam:PF05409, Pfam:PF06471, Pfam:PF08717, Pfam:PF06478, Pfam:PF09401, Pfam:PF06460, Pfam:PF08715, Pfam:PF08710. 25.00 25.00 56.30 55.20 24.80 23.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.47 0.72 -4.05 3 173 2009-09-15 15:46:42 2009-09-15 16:46:42 3 5 67 2 0 150 0 68.80 93 1.17 CHANGED MYCSFYPPDEEEDCEEYE-EEElsEETCEHEYGTEDDYKGLPLEFGASTEIlQVEEEEEEDWLDDAlEAc MYCSFYPPDEEEE.DDAECEEEEIDETCEHEYGTEDDYQGLPLEFGASAETVRVE.EE.EEEDWLDDTTEQS..... 0 0 0 0 +12214 PF12380 Peptidase_C62 Gill-associated viral 3C-like peptidase Coggill P pcc MEROPS_C62.001 Family a positive-stranded RNA virus of prawns, that has been called yellow head virus protease and gill-associated virus 3C-like peptidase. The GAV cysteine protease is predicted to be the key enzyme in the processing of the GAV replicase polyprotein precursors, pp1a and pp1ab. This protease employs a Cys(2968)-His(2879) catalytic dyad. 18.80 18.80 19.60 643.90 18.20 18.70 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.85 0.70 -5.58 2 13 2012-10-02 13:45:52 2009-09-15 16:57:05 3 3 2 0 0 15 0 284.00 95 6.70 CHANGED FLVGLVTHEVpTGNsTpVEDLNKHPYNKYRpNIVRVYGERGDLNGFLSGK.LaFPRHIFDoCTDNTLTRHIRVTKGEtTHDIELLSEEYDATPFIKs-SPFAEAT.LKFuKLQRTQasYFVTADDIRlGSMSsDGYHNISTKDGDCGSllFDHLtNVVGAHIVGIusIPPVNGALTWNsEpEhLCGPNsDYDaDPpKVsPPKVWPVEslTALSTlLNQLNYVTGDAFTTPKLPTNYQLlGCETLDQYVNApNLVTGQFPQIKEALDDFINGYVANLQRGsEAYN FLVGLVTHEVNTGNNTRVEDLNKHPYNKYRSNIVRVYGERGDLNGFLSGKFLYFPRHIFDSCTDNTLTRHIRVTKGEETHDIELLSEEYDATPFIKI-SPFAEATVLKFGKLQRTQYAYFVTADDIRVGSMSsDGYHNISTKDGDCGSLLFDHLHNVVGAHIVGIASIPPVNGALTWNAEKEMLCGPNDDYDYDPEKVGPPKVWPVESITALSTILNQLNYVTGDAFTTPKLPTNYQLIGCETLDQYVNARNLVTGQFPQIKEALDDFINGYVANLQRGTEAYN 0 0 0 0 +12215 PF12381 Peptidase_C3G Tungro spherical virus-type peptidase Coggill P pcc MEROPS_C03.024 Family This is the protease for self-cleavage of the positive single-stranded polyproteins of a number of plant viral genomes. The protease activity of the polyprotein is at the C-terminal end, adjacent to the putative RNA polymerase. 20.70 20.70 22.20 21.20 20.10 19.30 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.40 0.70 -5.11 3 29 2012-10-02 13:45:52 2009-09-15 16:57:36 3 4 11 0 0 48 0 225.10 36 9.79 CHANGED cKGLDKDPNMVoILos+LssISCVIlNlTPsRpAYhNVlRLCGTaVVCPAHYLEALEE-DTLYFICFSllIKLpF-PcRVTLVNoaQDLVVWDLGNSVPPSIDsLcMIPTl-DWD+FQDGPGAFuVTKYNu+aPTNYINTLDMIERIRANTQNPTGhYcMhGSpHTITTGLRYQMaSL-GFCGGLILRAsTsMVRKIVGlHVAGSpNHAMGYAECLlQEDL+pAIs+LuhD ..........................................h......D.sh.pLlss+.hssh.pl.hlst.ssh.h.hhsGLFlpushhLhPuHhlsh.lsppcp..I...hh.h..c..sl..h+....V.h.cs.+K.V.slV.N.....u.......p.Esslhsh.s.p.Vst+pDllc.aFtsu-shspa+t..s.ps...sLssh+a...Stc..s.spalps.L.ts-+h..-A.....D+Phohs-sppGpYhlRQGLEYphsohsGDCGuPllls-opshRKIsGlHVAGhst..p...u...+..uYAEoITQcDLhpAhppl...sso.................. 0 0 0 0 +12216 PF12382 Peptidase_A2E Retrotransposon peptidase Coggill P pcc MEROPS_A02.051 Family This is a small family of fungal retroviral aspartyl peptidases. 21.10 21.10 21.60 22.40 20.50 21.00 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.47 0.71 -4.37 2 4 2012-10-02 15:32:34 2009-09-15 16:58:21 3 1 2 0 3 7 0 137.00 90 10.28 CHANGED KpNsTpsRp.+KTNVSRIEYSSppFhpap++RYEMVLQAcLP-FKCSIPCLIDTGAQsNlITEETVRAHKLPTRPW.pSVIYGGVYPsKINRKThKL.IsLNGISIKTEFLVVKKFSHPAAISFTTLYDNNIEISSS ........................KNNTTNSRNLRKTNVSRIEYSSNKFLNHTRKRYEMVLQAELPDFK.CSIPCLIDTGAQANIITEETVRAHKLPTRPWSKSVIYGGVYPNKINRKTIKLNISLNGISIKTEFLVVKKFSHPAAISFTTLYDNNIEISSS.............................. 0 3 3 3 +12217 PF12383 SARS_3b Severe acute respiratory syndrome coronavirus 3b protein Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 32 and 154 amino acids in length. This family contains the SARS coronavirus 3b protein which is predominantly localized in the nucleolus, and induces G0/G1 arrest and apoptosis in transfected cells. 25.00 25.00 59.30 59.30 18.50 18.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.11 0.71 -4.29 3 70 2009-09-15 16:01:23 2009-09-15 17:01:23 3 1 66 0 0 16 0 118.70 96 99.22 CHANGED MMPTTLFAGTHITMTTVYHITVSQIQLSLLKVTAFQHQNSKKTTKLVVILRIGTQVLKTMSLYMAISPKFTTSLSLHKLLQTLVLKMLHSSSLTSLLKTHRMCKYTQSTALQELLIQQWIQFMMSRRRLLACLCKHKKVSTNLCTHSFRKKQV MMPTTLFAGTHITMTTVYHITVSQIQLSLLKVTAFQHQNSKKTTKLVVILRIGTQVLKTMSLYMAISPKFTTSLSLHKLLQTLVLKMLHSSSLTSLLKTHRMCKYTQSTALQELLIQQWIQFMMSRRRLLACLCKHKKVSTNLCTHSFRKKQV 0 0 0 0 +12218 PF12384 Peptidase_A2B Ty3 transposon peptidase Coggill P pcc MEROPS_A02.022 Family Ty3 is a gypsy-type, retrovirus-like, element found in the budding yeast. The Ty3 aspartyl protease is required for processing of the viral polyprotein into its mature species. 21.20 21.20 21.40 21.20 20.80 21.00 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.05 0.71 -4.94 2 18 2012-10-02 15:32:34 2009-09-15 17:03:42 3 6 11 0 13 32 0 140.50 27 17.31 CHANGED VLsDhELESKDpppL.IpohPIVHYIAIPEMDpTAEKpIKIpNTKlKTLFDSGSPTSFIRRDhVpLLph.ha-TPPLRFRGFluTcSssTSEAVTlDLplssLQIslAAYlLDpMDYQLLIGNPILRRYPKlLaTlLNT+psssu.KPKsY+SEsVN.V+shSAGNRGNsRNhp.SF ...................t........................ll..h....htpsh.hplphpssplcs.LhDoGSPTSFIRpDllpcLcLphapsPshch+...Ghlus.p.st.ospAsplslphsshphslsAYVh-t..hchpllIG.PhLc+aPpl.........h.s.hhp................................................................................ 0 3 6 13 +12219 PF12385 Peptidase_C70 Papain-like cysteine protease AvrRpt2 Coggill P pcc MEROPS_C70.001 Family This is a family of cysteine proteases, found in actinobacteria, protobacteria and firmicutes. Papain-like cysteine proteases play a crucial role in plant-pathogen/pest interactions. On entering the host they act on non-self substrates, thereby manipulating the host to evade proteolysis [2]. AvrRpt2 from Pseudomonas syringae pv. tomato DC3000 triggers resistance to P. syringae-2-dependent defence responses, including hypersensitive cell death, by cleaving the Arabidopsis RIN4 protein which is monitored by the cognate resistance protein RPS2 [1]. 18.80 18.80 18.80 18.80 18.70 18.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.07 0.71 -4.55 4 20 2012-10-10 12:56:15 2009-09-16 12:43:07 3 5 19 0 6 44 0 158.90 28 25.46 CHANGED sPhhGGhF.........-ouGSSuopuuSphh.............VPYlsQtspR.GCWYAsspMlua.hpsGP..RLGLPtLY-u.pG.PptLpt.pDl.chh+sEsLtplslPsscpaos-cLuALLscHGPIhhu...hcoP..sDsh..Hh.VLTGID+.sssIpaHDPctGPshsMsLshhNpthsWp.sH ...................................................s................................................h.Qpspt..uCWhAsssMlhtahc...s.....t..s..........+lG.s.L..Ycu.p...G.hPtthpt...p-h..........tchhpstuL.ps.ls.P.......tsps....aospphtsLLpcaGPlhhu...hcoP..uss.a....HshVl..TGIcp...-...s......c..ul.hhDP.....pG.p.h...shp.h..spt.................................... 0 1 2 4 +12220 PF12386 Peptidase_C71 Pseudomurein endo-isopeptidase Pei Coggill P pcc MEROPS_C71.001 Family This peptidase has the catalytic triad C-H-D at the C-terminal end, a triad similar to that in thiol proteases and animal transglutaminases. It catalyses the in vitro lysis of M. marburgensis cells under reducing conditions and exhibits characteristics of metal-activated peptidases. 20.80 20.80 20.80 22.20 20.70 20.70 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.09 0.71 -4.75 3 4 2012-10-10 12:56:15 2009-09-16 12:58:37 3 2 4 0 1 24 6 145.50 33 55.48 CHANGED ssssc-TT-l.....p-KlGsF+DATSLYs+V++RCKYKYYYNDQsPN+EAlpKMsT.sGINCTDACQLFp+VlEuLGYsV+IEHVRVRCNDGKWYGHYhLRVuGpELss.......GTlWDY..VSATKTGRPLGuPCCosGh...QHLGWGIVSPlYD ...................s..hsp.spch.....hchh.Gthp.shsphhshlp+RptYpaYYNsQhss+cslp+hhs.pGlNCTDusQLFh+lhcuLGYsVph.HV+.sR...ssGp..GHhh..LR..l..ttp.chs...........GssW.Y..suAstsGps..luushCpsGh....c.uW.IhoslYp.................................. 0 0 1 1 +12221 PF12387 Peptidase_C74 Pestivirus NS2 peptidase Coggill P pcc MEROPS_C74.001 Family The pestivirus NS2 peptidase is responsible for single cleavage between NS2 and NS3 of the bovine viral diarrhea virus polyprotein, a cleavage that is correlated with cytopathogenicity [1]. The peptidase is activated by its interaction with 'J-domain protein interacting with viral protein' - Jiv. [2, 1]. 20.90 20.90 20.90 79.00 17.90 20.10 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.23 0.71 -5.15 6 178 2009-09-16 12:02:53 2009-09-16 13:02:53 3 22 28 0 0 196 0 181.60 71 6.17 CHANGED WLG+lsYKRVsoVY-lDpStEGVYLFPS+QKssutsuhlLPLL+AlLISCISSpWQhhYLhYLllElsYYhHRKIIEEIAGGTNhlSRLlAALIElNWuhD-EEoKGLKKFFlLSuRV+NLIlKHKVRNEsls+WatEEElYGMPKllollKAATLSKsKHCILCTVCEsR-WKGssCPKCGRpG.PIoCGMTLADFEE+ ...............WLhthsYKplssl.Y-lDpuGEGVYLFPS+QKssp.huhhLPLl+AhLISCISsKWQhlYhhYLhh-h.YYhH+KlI-EIuGGTNhlSRllAALIElNWuh-pEEsKGLKKFaLLSuRl+pLIIKHKVRNEsVspWaG-EElYGMPKllolIKAuTLSKNKHCIlCTVCEuR-W..+Gus..CPKCGRaG+PITCGMTLADFEE+. 2 0 0 0 +12222 PF12388 Peptidase_M57 Dual-action HEIGH metallo-peptidase Coggill P pcc MEROPS_M57.001 Family The catalytic triad for this family of proteases is HE-H-H, which in many members is in the sequence motif HEIGH. 21.00 21.00 21.00 21.00 20.90 20.80 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.37 0.70 -5.00 5 69 2012-10-03 04:41:15 2009-09-16 14:03:59 3 10 40 0 35 188 15 198.00 31 60.86 CHANGED IhIEuDIshTcKELNcLp..u.ss-sKQYRTsNLVssusRTITIlGYTGssQ.uLoupupTALscAVNNYNsl.GhsIsF.RLTFGTNaQNuDMVVYsNolNNPSGo....GGuAGFPsusGcPsKFVQI....YsLcssSTNVsEH.VITHEIGHSIGFRHTDYasRp.SC..GpsGNEGoGGlGAVaIPGTPTGpDsoSlMQACFSsGsDGsFNoNDITALLsMY ........................................................................................................lttDhhho.pt.pp...h............t.........s.....t..cQYRTsslV.....s.t.s.p....sI.slhs.s.............uhs.s.p.hpsuLpp....AlspYNsl..sLslsF...pht.h.....ss.s....h.t....s.t.sh..s..l.h..p.s.sss...sGs.....G..us..A.G.F.P........o..s..Gp..P.....a....p..h.lpI...........h.u.h..s....s....h....s....s....s....s..h..p+....VlsHElGHslGhRH.T.D....ahs.Rp...SC....Gps..s.sEGsu...ssGAltIPGTPos...h..s..s.sSlM.u.Chss.s..psstFsssDhhALphhY....................................................... 0 14 22 31 +12223 PF12389 Peptidase_M73 Camelysin metallo-endopeptidase Coggill P pcc MEROPS_M73.001 Family \N 21.40 21.40 21.40 21.40 21.10 21.30 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.35 0.71 -4.85 2 432 2009-09-16 15:08:38 2009-09-16 16:08:38 3 3 167 0 49 281 4 171.20 43 87.42 CHANGED M..oLKKKLGMGlASAsLGhuLlGGGTFAaFSDKEVSNNTFAsGTLDLsLsPpTlVslpsLKPGDoVcKEFhLcNpGoLsIKcVhLtTcYsVcDVKtDNt.-DFGKHlKVpFLhNhDKppp.VhETsLscLptss.shlspDlhA..WsEK.GlpAGppDhhhVpFEFVDstKDQN.FQGDpLpLpWTFsApQtsGEpK .................M...olKKKLGMGlsoAs.L...Glu..L.l..GG...GTaAaF..S.Dp.E.s.S.s.N..TFAAGT..LDL.sl.................s...P....p.....s.....l.....l..s.............l.........p.........sL..K.......PGDpl.cK.pFpLpN.pGo.Ls.I.c.cV.hL..tTcY...s.V....pD..sK...t..s..N.t.........-DFGcHlcV.pFLh..Nh......D..K...........p......p......p.l..hpTT..Ls...cLpt..s.p...h...h..tp....cl......t......h..t.E.t....G.ltsGpp.DhhhV.FpF...DstpD......Q.......N.FQGDpLpLpWTFpu.QttGpp+................................................................................................... 1 12 34 41 +12224 PF12390 Se-cys_synth_N Selenocysteine synthase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF03841. There is a single completely conserved residue P that may be functionally important. This family is the N terminal region of selenocysteine synthase which catalyses the conversion of seryl-tRNA(Sec) into selenocysteyl-tRNA(Sec). 21.70 21.70 21.80 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.20 0.72 -7.68 0.72 -3.86 134 925 2009-09-16 15:44:25 2009-09-16 16:44:25 3 5 916 0 183 611 49 39.90 44 8.67 CHANGED hRplPulDplLppsthtsh......................lpphuRshVhcslRphl-phR ...appLPulD+LL+-s..shhsL......................h-pYG+otVl-hLRphLDcAR................. 0 66 115 155 +12225 PF12391 PCDO_beta_N Protocatechuate 3,4-dioxygenase beta subunit N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00775. There are two completely conserved residues (Y and R) that may be functionally important. This family is the N terminal region of the beta subunit of protocatechuate 3,4-dioxidase. This enzyme utilizes a mononuclear, non-heme Fe3+ centre to catalyse metabolic cellular reactions. 21.60 21.60 21.90 24.20 19.80 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.66 0.72 -4.52 79 491 2009-09-16 15:49:27 2009-09-16 16:49:27 3 3 474 172 150 420 62 35.90 41 14.71 CHANGED ahtRDtsh.HPPthtPsY+oSlhRuPppsLlsl....ssol ...............tRDhsh.HP.sthsPsYKoSVLRuPppsLlul...ppo......... 0 29 77 115 +12226 PF12392 DUF3656 Collagenase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF01136. 22.10 22.10 22.60 22.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.29 0.71 -4.05 199 1248 2009-09-16 15:51:28 2009-09-16 16:51:28 3 4 1215 0 283 989 60 120.40 32 17.09 CHANGED la.+spDpphtcpl.cp..........stp++lslph.plp..hc.....hsps........hplplp..............s.tp.u...h.....psp..........................sps....shshptApppshsp-plccpLu+lGsTsFph..............pp..lplp.h..............stshFlPsStLNplRRcul-pLppthhtsh ..................................................................................................................RshDpphpptL.+p..........sucR+lsVcl..pls......t........ht-t...............LhLols................s...-c.G....s.......slo..............................................h.s.h....-.s...h-pApssths....hssL....cctLu.KLGpT..ahs..............cc..lpls...l..............ss.slFlPsShLNphRR-Al-hLctsRltt................ 0 108 198 246 +12227 PF12393 Dr_adhesin Dr family adhesin Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF04619. This family is the Dr-family adhesin expressed by uropathogenic E. coli. 25.00 25.00 28.00 28.00 20.40 19.80 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.39 0.73 -6.50 0.73 -3.65 5 56 2009-09-16 15:52:41 2009-09-16 16:52:41 3 1 2 0 0 56 0 21.00 67 13.14 CHANGED MKKLAIMAAsSslhssSTAHA MKKLAIMAAsShlhAsSoAHA 0 0 0 0 +12228 PF12394 DUF3657 Protein of unknown function (DUF3657) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF05057. 21.80 21.80 22.40 22.70 21.50 21.10 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.98 0.72 -3.90 39 350 2009-09-16 15:53:31 2009-09-16 16:53:31 3 8 98 0 200 384 1 62.30 29 8.76 CHANGED lpl-Lhas-hhpssp.............................hptlustshpl..pstptl+ca.sslhF......DhhHhshlss.slassll ............................lpLhasph.psss.................................................hhus+sLtlp.+hphG...l++a.sslhF......Dh.Hhshlul.slatsLh...... 0 52 75 126 +12229 PF12395 DUF3658 Protein of unknown function Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF08874. There are two completely conserved residues (D and R) that may be functionally important. 20.60 20.60 22.30 22.10 19.60 20.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.13 0.72 -4.51 40 322 2009-09-16 15:54:13 2009-09-16 16:54:13 3 3 272 0 72 276 16 110.90 28 38.04 CHANGED lhcthpptp.lo.pctppLtpEWpplpppsu.LRlWcss.plpSspc.saaDshIlc.........psss-atpus+llGplhup....h...pph.lu......DsFltaRl+pLlppGhlchcGsht.shcphpl .....................h..tth.tht.lostchtphttEWpplpppss.LRlWpss.plpSs.pc.shaDphIlc..........ts.s.p-ahp..AscllGpshut....s.......cth...lu......DsaltaRl+pLlppGhl-hpGshs.thp.hp............................ 0 20 41 54 +12230 PF12396 DUF3659 Protein of unknown function (DUF3659) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 70 amino acids in length. 22.10 22.10 22.20 22.90 22.00 22.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.23 0.72 -4.30 116 553 2009-09-16 15:55:19 2009-09-16 16:55:19 3 17 61 0 480 572 2 65.10 35 43.00 CHANGED husLcG.hpVs+tGpllD.ssGpslG+llEG..Dscc...Ls..G+p...V.D-cGcIlDcsGsllG+sEhl..s........pptp .........ssLpG.hpVs+sGpVlD.psGphlG+llEG....Dscc...Ls.....G+p.......l.D-cGcllDc.sGsllG+sEhl.t....tt..p............ 0 80 213 374 +12231 PF12397 U3snoRNP10 U3 small nucleolar RNA-associated protein 10 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF08146. This family is the protein associated with U3 snoRNA which is involved in the processing of pre-rRNA. 22.00 22.00 22.00 24.40 20.80 21.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.48 0.71 -4.16 67 281 2009-09-16 15:56:45 2009-09-16 16:56:45 3 8 238 0 201 271 1 121.50 21 6.72 CHANGED -shl.plL.Phl.cult.ppss........DhphuuYhllshLus+ssLsspllpuLhcslspshptpst........pp..............uLhsLs..............hlh.....ppp.sstp.............lsppshctlhph.s.....hsphLspl.spphpl..sphlhslltuh.l ...............................sllspllPhltcuLp.sphs........-hphusYMlls.Lus+ssLpcsllsuLhcplhpshsppst..........ps..............uLhs.Lh..............hLh............Qpp..p.sp.p................Lspcshpt.Lh..ph.s.......lhshLppl.upphsl....sphltshh.sh........................................................................ 0 62 106 165 +12232 PF12398 DUF3660 Receptor serine/threonine kinase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00954, Pfam:PF01453, Pfam:PF00069, Pfam:PF08276. There is a conserved ELPL sequence motif. 25.00 25.00 27.20 26.10 23.60 22.00 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.94 0.72 -4.09 34 150 2009-09-16 15:59:03 2009-09-16 16:59:03 3 12 24 0 15 168 0 41.50 50 5.90 CHANGED ussIs..ppRsQsLl.MNthVl....SS++p...lS.tEN+o.E-LELPLhEa ......ATsIss.ppRsQsLl.MN.....thVl....SS+Rp...lS.tEN+s.E-LELPLhEh. 0 4 8 10 +12233 PF12399 BCA_ABC_TP_C Branched-chain amino acid ATP-binding cassette transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00005. There is a conserved AYLG sequence motif. This family is the C terminal of an ATP dependent branched-chain amino acid transporter. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.19 0.72 -6.41 0.72 -4.21 827 8057 2009-09-16 16:01:50 2009-09-16 17:01:50 3 14 2852 0 2574 6299 2877 22.90 46 7.99 CHANGED chIAcGsPp-lpssPcVlcAYL.G ........llAcGoPp-lpssscVhcAYLG..... 0 633 1531 2091 +12234 PF12400 DUF3661 Vaculolar membrane protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 123 and 138 amino acids in length. 22.00 22.00 25.00 24.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.85 0.71 -4.05 49 275 2009-09-16 16:04:10 2009-09-16 17:04:10 3 5 208 0 206 258 1 124.00 35 39.14 CHANGED CsaYhlNlllDo...TlGlhllah....hlphlsplhph....hthpplc.SGpY.....................................t.PphpsahpQhhlYhhslhhhKhhlhllhhhh....hlthluphlLsa..hcttsp.lplhhVMhlhPllhNslQaallDshI+pp .....................................................................CshYhlNhllDs........TlGl.llal....hl+hlstlsph.................thp..s.....lc..GpY.................................................................................GsPPpstsWltQshlYhhhlhhhKhslhlllhhh....hhtpluphlLsa..hc...s..Pp.lplhhVMllhPhlhNslpaallDshlpp..................... 0 66 107 164 +12235 PF12401 DUF3662 Protein of unknown function (DUF2662) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00498. 25.00 25.00 27.90 27.50 20.70 19.70 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.46 0.71 -10.26 0.71 -3.93 76 515 2009-09-16 16:04:50 2009-09-16 17:04:50 3 3 496 1 146 383 83 114.90 32 36.99 CHANGED Ls+hEptlEphlpusFu+sFpup..lpPsElsptLcREh-spsthluts+hl.sPNtaslpLussDacplss...h.tssLspElsshlpcaupppsashs.GslpVphpp..sssLcsG.pa+lpu .............lp+hE+plEssVsssFA+sFp.up..lpPsElsutLpREh-spsphlups.+sl..sPNcahlpLSspDa-+Lssh...sctLssEluspLpcaspcptashh..GslhVca-t..sssLpsG.paclp............. 0 54 113 138 +12236 PF12402 nlz1 NocA-like zinc-finger protein 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 42 and 57 amino acids in length. There is a conserved GAY sequence motif. There is a single completely conserved residue G that may be functionally important. Nlz1 self-associated via its C terminus, interacted with Nlz2, and bound to histone deacetylases. 21.20 21.20 26.30 24.40 19.80 18.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.77 0.72 -3.62 7 173 2009-09-16 16:05:48 2009-09-16 17:05:48 3 2 89 0 44 126 0 51.50 68 10.89 CHANGED AuhsYPGSlsGAYAGYPppals...uLDssK..uSLVuuQh.....uohGss.K.suuushsG ...AGMoYPGSLAGAYAGYPppFLPHG.shD.tK..uSL.VsAQL....AuSLGC.S....AGSSPLAG..... 0 3 7 18 +12237 PF12403 Pax2_C Paired-box protein 2 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00292. This family is the C terminal of the paired-box protein 2 which is a transcription factor involved in embryonic development and organogenesis. 28.10 28.10 42.50 29.80 21.90 20.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.73 0.71 -4.43 11 295 2009-09-16 16:06:57 2009-09-16 17:06:57 3 5 51 0 109 199 0 101.30 53 28.58 CHANGED suspsssshsuhuchusshuphpS.slhsGR-hu.STTLPGYPPHVPPTGQGSYsoSoLAGMV.PGu-FSGNPYSHPQYooYNEAWRFoNPuLLuSPYYYSsAuRss.PPTuATAYD+ ...............................st......s.sssschGushst.p.oh.sl....loGRDMA..STT...LPGYPPHVPP...oGQGSYso.SoLu.G...MV..PGS-FSGsPYSHPQYoo.Ys-u....W...RFs.NP..u..LLu.SPYY..YSussRsuss....sssAs..AaD............................................ 0 5 11 57 +12238 PF12404 DUF3663 Peptidase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00883. There is a conserved WAF sequence motif. 25.00 25.00 33.40 31.60 21.20 20.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.62 0.72 -4.20 55 805 2009-09-16 16:07:38 2009-09-16 17:07:38 3 3 792 0 98 412 11 76.90 63 18.01 CHANGED MplpLSppsAsupWGcpAllSFsssuAsIH.....L....ssscsLppIQ+AARKLcuQGIppVpLsG-sWcLEppWAFhQGFtss ..M+ITLSTQPADARWGEKAshShNs.DGlTlHL.....NGsDD..LGLIQRAARKIDG.GIKpVpLoGEGWDh-+CWAFWQGY+uP.. 0 9 32 66 +12240 PF12406 DUF3664 Surface protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 131 and 312 amino acids in length. 23.00 23.00 23.50 121.10 22.90 22.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.71 0.72 -3.40 5 53 2009-09-16 16:09:15 2009-09-16 17:09:15 3 3 4 0 1 50 0 100.20 81 54.80 CHANGED DRQLNPIDFDPNDDQQPLDPNQLIDQtEQSQEPTQQEPIEPQQPTQPuT.EPEELEPETVTVEVP.EPVTSEE...PKE.........SoQTEEsTETQDsKpE.PTpQPVDEPP DRQLNPIDFDPNDDQQPLDPNQLhDQhEQS....QEs....TQQEPIEPQQPTQPST.EPEEL-PETVTVEVP.EPVTSEE...PKE............SDQTEE.QKHEEPEAS.PsPEPVDEPs 0 0 1 1 +12241 PF12407 Abdominal-A Homeobox protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00046. This family is a homeobox protein involved in differentiation of embryonic cells to form the abdominal region. 25.00 25.00 26.20 33.70 19.80 18.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.06 0.72 -4.42 5 126 2009-09-17 12:02:55 2009-09-17 13:02:55 3 2 103 0 24 91 0 23.90 83 12.42 CHANGED ELRAVKEINEQARREuc..EcE+c++ .ELRAVKEINEQARRERE...EQDhMK+........ 0 8 11 21 +12242 PF12408 DUF3666 Ribose-5-phosphate isomerase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF02502. There are two completely conserved residues (D and F) that may be functionally important. 25.00 25.00 31.10 30.20 21.00 16.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.34 0.72 -4.86 37 553 2009-09-17 12:03:37 2009-09-17 13:03:37 3 2 544 19 66 225 3 48.30 52 22.79 CHANGED sssh+.shlssL+slDQ-LlKsAluGc+FQ-hFFsNCpspcIssYV+sl ....thsh+.shlshL+sID.-hlKsslSGt+FQchFFENCQ-cEltAal+pl.. 0 14 33 52 +12243 PF12409 P5-ATPase P_ATPase; P5-type ATPase cation transporter Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 110 and 126 amino acids in length. The family is found in association with Pfam:PF00122, Pfam:PF00702. P-type ATPases comprise a large superfamily of proteins, present in both prokaryotes and eukaryotes, that transport inorganic cations and other substrates across cell membranes. 21.70 21.70 21.90 24.20 21.50 21.60 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.38 0.71 -4.25 66 485 2009-11-19 17:32:02 2009-09-17 13:04:37 3 23 233 0 321 463 0 118.50 21 10.19 CHANGED -D..hhl.sIsGY+sshh+..hhlahhhslhThGlhaLlh+WhP+a+lphhsptssLtcA-..aVl..lE..........spas...phplhpVpp.....phascshSshhs.spp...................h.hspppss....tlsplRhhpYR ..........-.ph.plhGYcpshh+..hhlhhhhslhohG......lhh.L.lh+WhP.cWpVphpsp...s.....s.Lpc.A-.....hVl..lp......................................spap...............phhh.tpVpp................h.h.sp....th....t.......................................................................................................................... 0 115 158 251 +12244 PF12410 rpo30_N Poxvirus DNA dependent RNA polymerase 30kDa subunit Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 193 and 259 amino acids in length. The family is found in association with Pfam:PF01096. There are two conserved sequence motifs: GIEYSKD and LRY. This family is N terminal of the 30 kDa subunit of poxvirus DNA-d-RNA-pol. It has structural similarity to the eukaryotic transcriptional elongation factor SII. 25.00 25.00 53.40 52.80 21.10 19.60 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.51 12 114 2009-09-17 12:05:32 2009-09-17 13:05:32 3 2 83 0 0 76 0 133.60 70 60.24 CHANGED pDlccllp+YVs-pscspcLlpWAp-pAs+aYl+NIsNTK.SNIEETKF-PtNNIGIEYSKDsKNKLSYRNKP.It..TNh-YpDlCshI+sTNGsEK-hLRYlLFGIKCl+tGVEYsIDclpDhsY.ccYFNVLDcKaN ...DIc-llh+YVc-.uplc-llcWAh-KuSKaYI+NIhNTK.SNIEETKFEs+NNIGIEYSKDSKNKLSYRNKP.Is..TNh-Yp-lCshI+sTsGTEKEhLRYlLFGIKClpKsVEYNIDcI+DlsY.p-YFNVLDcKYN. 0 0 0 0 +12245 PF12411 Choline_sulf_C Choline sulfatase enzyme C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, eukaryotes and viruses, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00884. There are two completely conserved residues (R and W) that may be functionally important. This family is the C terminal of choline sulfatase, the enzyme responsible for catalysing the conversion of choline-O-sulfate and, at a lower rate, phosphorylcholine, into choline. 21.30 21.30 29.60 24.20 21.00 20.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.74 0.72 -4.14 37 292 2009-09-17 12:09:02 2009-09-17 13:09:02 3 6 284 0 123 283 111 54.00 51 10.51 CHANGED cpcVlpSQpRR+lVas......AL..ppGphtsWDaQPh.pDuSppYhRNHhc..LsslEttuRa ...cppVltSQRRRRlVap......AL..pp..Gpht.sWDaQPh.pDASpcYMRNHh-..LDsLEppuRF.......... 0 18 54 88 +12246 PF12412 DUF3667 Protein of unknown function (DUF3667) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. There is a single completely conserved residue P that may be functionally important. 21.80 21.80 21.80 21.80 21.50 19.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.59 0.72 -7.99 0.72 -4.65 78 186 2009-09-17 12:10:03 2009-09-17 13:10:03 3 5 129 0 78 203 27 46.20 33 13.02 CHANGED Du.+hh+TlhtLhh+PGplsccYlsGc..Rt+YlsPhphalhsuhlh........Fl ...Dsphh+TlhtLlh+PGplsccYlsG+..RhcYhsPhphalh.huhlhh.......... 0 39 65 76 +12247 PF12413 DLL_N Homeobox protein distal-less-like N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF00046. This family is the N terminal of a homeobox protein involved in embryonic development and adult neural regeneration. 25.00 25.00 26.50 25.60 23.50 22.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.41 0.72 -2.94 20 283 2009-09-17 12:10:48 2009-09-17 13:10:48 3 2 148 0 84 168 0 74.10 45 33.67 CHANGED p-SPTLPESTATDsG.YYSspts......HsYhss.....psY.upslN.sY.......Qa.phsGlsus.usYu.sK.sYs....Y.suuY...+QY.GsYsRssssssps .....p-SPTLP.SosTD.s..YYss..........tasuu......uPY..tpls.sY........QY.phsuhuu.........ssYs.AK..uYs.h...a.suoY...t.pY.GuYupssSsss..s............. 0 4 11 37 +12248 PF12414 Fox-1_C Calcitonin gene-related peptide regulator C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 69 and 99 amino acids in length. The family is found in association with Pfam:PF00076. This family is the C terminal of Fox-1, a protein involved in the regulation of calcitonin gene-related peptide to mediate the neuron-specific splicing pattern. Fox-1, with Fox-2, functions to repress exon 4 inclusion. 25.00 25.00 35.30 30.60 22.20 16.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.47 0.72 -3.58 6 257 2009-09-17 12:11:22 2009-09-17 13:11:22 3 4 42 0 88 228 0 87.20 67 26.35 CHANGED sVPuFPYPs...sss...suuAaRGutL.RGRuRsV....YsshRAAsP...PsulPuYsGVV.YQDGFYGA-.lYGGY.AAYRaAQPAo........AssAAYSDuYGRVY.sA.DPY ............................................................h.lPGFPYPs...Ass.......sAAAaRGAHL.RGRGRs.V....YsshRAA.P...PssIPAYsGVV.YQDG.FYGA-...lY....GGY...A........AYRYAQPss.............AsAAA......YS...........D.uYGRVY..sA.DPY................ 0 4 11 37 +12249 PF12415 rpo132 Poxvirus DNA dependent RNA polymerase Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF04566, Pfam:PF00562, Pfam:PF04567, Pfam:PF04560, Pfam:PF04565. This family is the second largest subunit of the poxvirus DNA dependent RNA polymerase. It has structural similarity to the second-largest RNA polymerase subunits of eubacteria, archaebacteria, and eukaryotes. 25.00 25.00 26.40 40.80 19.80 17.10 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.71 0.72 -7.02 0.72 -5.00 13 82 2009-09-17 12:12:17 2009-09-17 13:12:17 3 5 49 0 0 76 0 32.80 64 2.89 CHANGED -FI+psLoYDMPsEllYLVNulIESTKpllss.p .EFIRRSLSYDMPPEVVYLVNAIIDSAKRlsES.I.. 0 0 0 0 +12250 PF12416 DUF3668 Cep120 protein Gavin OL, Bateman A lg7 Bateman A Family This family includes the Cep120 protein which is associated with centriole structure and function [3]. 21.80 21.80 22.20 22.40 20.90 19.40 hmmbuild -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.11 0.70 -5.24 16 109 2009-09-17 12:13:09 2009-09-17 13:13:09 3 8 80 0 73 121 0 295.20 39 33.78 CHANGED hlVlsIlEG+sFspc.......+pplllpAphs........ucsLpTD...............Plspt-sstFsocLsWE........hD+psl+..........+h+sp+sPlKLpCaAsc.tsst+EsIGYllLslRus....ptsp.p....ps+WapLLus...capphKPEl.hlhlslEscsh.t.ts.p..h........psppsPsppu.....ss.h..s.tplhshhhs-pGhhQlGsssh.ssDhallslpltpsppLspL.hs.s..l.t+..t.sshhhhYslLGNDVTsc.pFpp.hsssa.hpcs..lRl+SSlpsL+hahspp.stL.Iplpp....t.......ppulusoplshssLlstssh.th.....sp+hsshpGsFshp..stscs...............ss-.htPplslsloLchc .............................................................hlVlpllE..G+pFspp.......cp.lllpApFsGEpLtTD.....PlpHs-pPpFsTELAWElD++sL+ppRhQRTPIKL......pCFAlc.....sho.st+EslGYllLDLRss....pps..p....ts+WapLLus...+.......Ys+aKsEl..lulslEs-sp....ssc.th................KuptsPPcpu...ps.s.....tlsspsl..hshLtp-tGaaQIGP..s-h..ssD.FlhSloluaAp.pLcpL.lsss....Lstc....spFaFhYs..LLGN.-VTs-.sFpsLhs....P......sFpsERsS.lRlRSSlcsL+hYLshp.stLpIaLCs....G...........spu.LGssclsLssLLtpssh.ph.......sp+sssh-GuFhlpsss+scp...............sh-..htPplslolsLph.p..................................... 0 29 34 53 +12251 PF12417 DUF3669 Zinc finger protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 64 and 80 amino acids in length. 21.60 21.60 22.30 21.90 21.40 21.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.22 0.72 -4.25 23 311 2009-09-17 12:13:48 2009-09-17 13:13:48 3 38 80 0 172 301 0 75.70 43 14.96 CHANGED hWhlDFstsps......hphs.css.ltp......hlsAahcNDsaaPRPt.......t..stpLWssFcppYl.suptlhpshhh...hcs .....................h-hshhsshss.h.slE+K.h-up.us+............LlsLEGRTGssEKKLA................-CEKTusEhuNplEuKWsVLGTLLQEYGLLQRRLEN.............. 0 16 39 68 +12252 PF12418 AcylCoA_DH_N Acyl-CoA dehydrogenase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF02770, Pfam:PF00441, Pfam:PF02771. This family is one of the enzymes involved in AcylCoA interaction in beta-oxidation. 21.20 21.20 21.20 21.30 21.00 21.10 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -7.23 0.72 -4.26 232 1684 2009-09-17 12:14:12 2009-09-17 13:14:12 3 9 805 0 589 1505 340 32.30 33 5.44 CHANGED ssYpuslRDhpFlLpElhshcthh..shssas-hs ........pYpAPlRDhpFlLpElLshcthh..shstas-hs................. 0 150 332 482 +12253 PF12419 DUF3670 SNF2 Helicase protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00271, Pfam:PF00176. Most of the proteins in this family are annotated as SNF2 helicases but there is little accompanying literature to confirm this. 27.60 27.60 28.10 31.00 26.50 25.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.51 0.71 -4.74 105 583 2009-09-17 12:16:04 2009-09-17 13:16:04 3 5 516 0 181 587 58 139.80 27 14.45 CHANGED L.psstPpshp..Lssp-shpFLppsu.tLpsuGlsVlLPsthpt.t...tp+lplclps............................sss.sst......uhlulcsLlsFcWclulGs....psLoppEhppLsppppsLVcl+GpWVplDspclppstchhpp.ttpt.........loht-sLchshs .............................shp..Lssp-shpFLspsuspLpsuGlsVlLPshWppht...ts+hplclps..........................................ttsss.ssp........uhhGhss..LlsFcWclulGs.............tsLocpEhpcLs...c....pppsLlcl+GpWltlDsptlccspchhpc.ttp.t..........lsht-hLphth.t.................. 0 61 131 163 +12254 PF12420 DUF3671 Protein of unknown function Gavin O lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 96 and 116 amino acids in length. 22.10 22.10 22.30 23.40 21.80 22.00 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.60 0.72 -4.05 42 76 2009-09-17 12:17:00 2009-09-17 13:17:00 3 2 4 0 39 97 1 108.20 37 46.74 CHANGED KKcGL..................c+LDsYCEKKlFsplcplpclscshshcKKpaKphlhKKYG...hhlll.sLhs.llGlIlslLht.ht.th......................................h..hphhFhhlhsslhlLhllYlhhKllKY- ................KKcGLcKLDCYCE+KIFsslD+lcKhtcstNhDKKsaKpl.llKKYG...htlIhssLhs.LlGlIlsILsh.tpsth.......t...........................h.thlsslshsFah.assIhlLsllYIhhKllKY-........................................ 0 0 0 34 +12255 PF12421 DUF3672 Fibronectin type III protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is typically between 126 and 146 amino acids in length. The family is found in association with Pfam:PF09327, Pfam:PF00041. There are two completely conserved G residues that may be functionally important. Many of the proteins in this family are annotated as fibronectin type III however there is little accompanying literature to confirm this. 23.60 23.60 23.90 23.70 22.80 23.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.65 0.71 -4.02 25 1261 2009-09-17 12:19:22 2009-09-17 13:19:22 3 19 403 0 22 1470 7 135.60 56 15.88 CHANGED lspsGphphpNuslpG..............slsAsSGshsssshspssphtGslpAppIc.G.....Dll+..........hh.tt.................h.hpsh..ss.sasRplhl..........................ttt.p.....hhh.pshshtlppstthlhstssssh....hssh.......hshPs.sp ..............LTPDG+LTAKNADISG..............sVNANSGTLN.....NVT.....INENCplhGKLSANQIE.G.....DlVKTVuKuF...........ss........................pasSGTlTVpl.DDQ....sFDRQIlIP....................ustapstpppppps..shYSoC+L.VpKNGsEIaspsshDsp...hlaSuV.......IDMPAG............................................................................................................................................................................................................................................. 0 3 5 12 +12256 PF12422 Condensin2nSMC Condensin II non structural maintenance of chromosomes subunit Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 150 amino acids in length. This family is part of a non-SMC subunit of condensin II which is involved in maintenance of the structural integrity of chromosomes. Condensin II is made up of SMC (structural maintenance of chromosomes) and non-SMC subunits. The non-SMC subunits bind to the catalytic ends of the SMC subunit dimer. The condensin holocomplex is able to introduce superhelical tension into DNA in an ATP hydrolysis- dependent manner, resulting in the formation of positive supercoils in the presence of topoisomerase I and of positive knots in the presence of topoisomerase II. 21.70 21.70 22.50 21.80 21.30 20.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.94 0.71 -4.47 26 201 2009-09-17 12:21:28 2009-09-17 13:21:28 3 7 99 0 119 184 1 148.20 29 21.30 CHANGED Llpshhssthlps.-Gp+hluhlhs.hpt.hhcchtshl+spl...phs+uhlptau-lhapuWKpu...............ptshppplEpshlpshhpsulcs.....ts.hhusphRphLpsFtpp+.hpttV-phL..hclhcPlLaRuLpssNspV...RtNuhtlhhp.hFPlpss ......................hLtsFhssshlts.p..u.phlShLhs.hphshh+thpshlcs.l...tths+slhphhu.-lhacuWppu................tphhpplcpspIQDhh.tulcL...............cp..sas+sRcsLthFhppK..h..tlcchL...hcl.hcPlLa+uLpssNupV...Rssuthlhlc.AFPlcs.................... 0 35 52 84 +12257 PF12423 KIF1B Kinesin protein 1B Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00225, Pfam:PF00498. KIF1B is an anterograde motor for transport of mitochondria in axons of neuronal cells. 22.20 22.20 22.20 22.40 22.10 22.00 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.43 0.72 -3.93 42 516 2009-09-17 12:24:20 2009-09-17 13:24:20 3 40 167 0 305 465 1 46.90 37 3.03 CHANGED hpNRlhtMR.-hYpph........................................hptt............pp...........h.ptpDPFa-s..-paslIGs ..........................LcpRL..MR.-hY.pch......................t............................pps.sst......................cp............hhptuDPFY-p..c.apLlGs........... 0 90 120 213 +12258 PF12424 ATP_Ca_trans_C Plasma membrane calcium transporter ATPase C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00689, Pfam:PF00122, Pfam:PF00702, Pfam:PF00690. There is a conserved QTQ sequence motif. This family is the C terminal of a calcium transporting ATPase located in the plasma membrane. 22.90 22.90 23.70 23.10 22.00 22.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.28 0.72 -3.61 19 534 2009-09-17 12:25:56 2009-09-17 13:25:56 3 22 95 2 238 453 0 56.10 47 6.66 CHANGED GQILWhRGLsRlQTQIRVV+AF+SsL..tcshcpspSpsu.l+shhs.sph..slpcphpp.shlsppc ................GQILWhRGLNRIQT.QI....+VVpAF+Suh....h.p.....u..l..c+..pS.hsS....lHshhst.ph...h.t............s.................................................. 0 32 49 134 +12259 PF12425 DUF3673 Protein of unknown function (DUF3673) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. 22.30 22.30 23.20 23.60 22.20 22.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.47 0.72 -4.12 19 52 2009-09-17 12:28:21 2009-09-17 13:28:21 3 4 3 0 29 47 0 51.70 42 28.82 CHANGED Rhssc-hhssspthshl-sc+t.h.......pc-suHApssssaspcchs.h....sspsph.+ts ......hhsscDhhToscchuYlEccKGYs.......E-EsuHAQsssVYANKKhl.Y.....usKsphs+..h......... 0 0 0 3 +12260 PF12426 DUF3674 RNA dependent RNA polymerase Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 40 amino acids in length. There is a conserved MFNLKF sequence motif. There are two completely conserved residues (E and P) that may be functionally important. 22.00 22.00 22.80 47.90 19.90 21.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.02 0.72 -4.55 8 125 2009-09-17 12:29:39 2009-09-17 13:29:39 3 2 47 0 0 100 0 40.30 63 3.25 CHANGED LIKs-ERuALEAMFNLKFHlus.KspsYhIP-Y+slp.spP ..........LIKsDERuALEAMFNLKFHloGsKsRsFsIPsY+PQsLCpP....... 0 0 0 0 +12261 PF12427 DUF3665 Branched-chain amino acid aminotransferase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 23 and 35 amino acids in length. The family is found in association with Pfam:PF01063. There is a conserved TRT sequence motif. 25.00 25.00 43.40 42.30 23.80 20.50 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.30 0.72 -6.73 0.72 -4.51 3 14 2009-09-17 12:33:33 2009-09-17 13:33:33 3 1 12 0 3 14 0 26.30 72 7.06 CHANGED M............L-YTVTRT-sPTSP-RLKEILA ..............L-aTVTRTENPTSP-RLKEILA. 0 1 2 3 +12262 PF12428 DUF3675 Protein of unknown function (DUF3675) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF00097. There are two completely conserved residues (R and L) that may be functionally important. 21.10 21.10 21.10 21.10 19.40 18.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.46 0.71 -3.84 30 175 2009-09-18 09:02:28 2009-09-18 10:02:28 3 3 21 0 96 161 0 110.10 34 44.96 CHANGED PGYTAPs.phsphscsslslp....usWp..ssth-.c-schlAhstuppphhpsth..s-hssssspuusaCRSlAllhMsLLLLRHsLslsh..sus-chuhsl.ho...LhhLRsAGILLPhYlhh+ulo ...............................PGYTAPP...hhp....schslslp............ssap....hsth-.c..cschlAhstuppphh.ps-a.s-hssssssuushCRSlAlI.......hMsLLLLRHslslsp...sss-c.shs...l..Fo...lhlLRsAGhLLPhYlhhhul....................................... 0 8 52 75 +12263 PF12429 DUF3676 Protein of unknown function (DUF3676) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 230 amino acids in length. 25.00 25.00 34.40 34.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.53 0.70 -4.75 27 92 2009-09-18 09:05:25 2009-09-18 10:05:25 3 8 2 0 40 93 0 193.10 63 27.25 CHANGED TDsSVASES+SEESs.sSaEcLsEsDTDcQ.EEpsVcs.VPAAssSTVsAGSSVsEPAhAAESAtNS+sEDNAQLSEGcTuQQuT.sEspcSMQRDSDVQsQD.QSpELTEVsDVEtSSES.DsEpPEEEGcANDRSGGoTSsVuASLSM-TATusVsGEHQVQQSsELuAENcDVRSTGTGTTGAEpSLSLEAGDuNSERTMuSDSSLTPSKSDAEPTSAEsTDslSRTEG ..TcsSVASESpSEES..sSaEcLsEsDs-cQ.EEpsVcs.VPAAsSSTssuGSSVsEPAhAsESAtNSh.-DNAQhSpGETuQQsT.pEspcSMQRsS-VQsQD.QS...tc.TEhsDVEtSuES.Dpp.PEE-ttss-tSGtoTSsVuAS.sM-TsstsV.uEHQVQQSsEhusENsDVRSTGTGTTGAEpSLSLEAGDtNSERTMsSDSS.TPS+SDAEsTSAEsTDslShTEt.................. 0 0 0 40 +12264 PF12430 ABA_GPCR Abscisic acid G-protein coupled receptor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 177 and 216 amino acids in length. This family is part of the abscisic acid (ABA) G-protein coupled receptor. ABA is a stress hormone in plants. 22.10 22.10 23.30 30.10 21.10 21.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.22 0.71 -5.17 44 301 2009-09-18 09:06:13 2009-09-18 10:06:13 3 10 234 0 216 311 1 172.90 36 36.74 CHANGED p.ptthupThhG+hhphh.shhhulYCla+lhhshlphlhhh...............................ssssDslstslshhlp...........hhshpl...Dhp.hhspplShlLsGslhlsShpsllhohpp...hhphhsushs..ss.....................................hhsLlluplhGhYhlSolLLlRssLPt............ph.pshlsc............lLu............sslch.tFhcpWFDhlFllSulhohlhlhhtcphs ..............................t.thupThhG.+hhsh.h.GahFSlYClaKlhhshlsllhpp................................supsDPlopslplhlp....................ahs.hph.......Dlt.hhoppIShlLlGhlllsSlRulLhTlp+...hhhh.luushu..ss.....................................hllLlluplMG.hYFlSolLLlRhshP..........................-h.+shloc......................lLG..............plpa.sFac+WFDslFll.......Sulhollhlhltc+..s.................... 0 82 125 181 +12265 PF12431 CitT Transcriptional regulator Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00072. There is a single completely conserved residue G that may be functionally important. CitT is a transcriptional regulator which allows transcription of the citM gene which codes for the secondary transporter in the Mg-citrate transport complex. 21.60 21.60 21.60 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.40 0.72 -3.80 48 854 2009-09-18 09:08:13 2009-09-18 10:08:13 3 6 644 0 81 293 7 29.40 49 13.06 CHANGED lsQptVDphhptts....pspst..sp..LPKGIDpl ..AsQcplDcMFNshA......+s-sp...sc..LPpGIDt... 0 17 32 57 +12266 PF12432 DUF3677 Protein of unknown function (DUF3677) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. 25.00 25.00 28.00 28.80 24.90 23.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.66 0.72 -3.54 19 104 2009-09-18 09:13:42 2009-09-18 10:13:42 3 3 83 0 79 115 0 81.20 45 4.34 CHANGED l...L+hLstssGhtplRhhss.....p+lEhWlpNsKlp+.sp-L.LhhlhhNsss.......pstpDh-.slstLl+lth+sps...........lhshahtsl+cl ......lL+hLosoCGhtEVRhhul.....pRLEhWLpNPK..Ls..........R...sAQ-L.LhplChNCso.......+uscDh-.VlupLlKlRLKsKs..........LlNaahhCl+El....................... 0 29 37 60 +12267 PF12433 PV_NSP1 Parvovirus non-structural protein 1 Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 109 and 668 amino acids in length. Parvoviral NSPs regulate host gene expression through histone acetylation. 25.00 25.00 67.70 67.00 22.30 19.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.73 0.72 -3.91 5 227 2009-09-18 09:15:31 2009-09-18 10:15:31 3 3 33 0 0 240 0 65.90 56 16.25 CHANGED lLTYpHKQTKKDYsKsVHFGNMIAYYFLNKKKIsT-..+-cGYFLSoDSGahsNFLKppER+lVSKLYTDEpKPETVETTVT .hpYhHKQTKpDYsK.VphtshlhhYhhNKcKIsc-..p.cGYahuusuGhh................................. 0 0 0 0 +12268 PF12434 Malate_DH Malate dehydrogenase enzyme Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00390, Pfam:PF03949, Pfam:PF01515. There is a conserved AAL sequence motif. There is a single completely conserved residue R that may be functionally important. Malate dehydrogenase is one of the enzymes involved in the citric acid cycle in mitochondria. It converts malate to oxaloacetate using NAD as a cofactor. 21.50 21.50 21.50 21.50 21.40 21.00 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.85 0.72 -7.06 0.72 -4.24 5 120 2009-09-18 09:25:20 2009-09-18 10:25:20 3 5 117 \N 46 104 24 22.60 67 2.97 CHANGED s......osSc-DLK...-QQRAALRKAALEYHEFP .............ss.................t+tpLRcAAL-YHEFP 0 3 20 33 +12269 PF12435 DUF3678 Protein of unknown function (DUF3678) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. 20.40 20.40 22.10 21.80 19.40 19.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.70 0.72 -4.39 16 44 2009-09-18 09:28:31 2009-09-18 10:28:31 3 4 4 0 30 39 0 35.00 34 14.18 CHANGED hhso-lhlpusus..........soSSushsHpppc+hFlchsol .....hho-lhltAsus..........sooSsshhHpppc+hFlcasoh. 0 0 0 5 +12270 PF12436 USP7_ICP0_bdg USP7; ICP0-binding domain of Ubiquitin-specific protease 7 Gavin OL, Coggill P lg7 Prosite Family This domain is one of two C-terminal domains on the much longer ubiquitin-specific proteases. This particular one is found to interact with the herpesvirus 1 trans-acting transcriptional protein ICP0/VMW110. 27.00 27.00 27.40 27.50 26.80 26.80 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.54 0.70 -5.14 52 419 2009-09-18 09:30:54 2009-09-18 10:30:54 3 25 258 2 276 415 4 231.30 28 22.05 CHANGED phRhWhhsp.RpN..........+ThRP.......sp.l..p-.shTlpp..ltsp.ss.pt.th+lalElh.......................tt.....ss.pspp..slLlFlKhaDsppppLphlG+lalptspKls-..llPhlpchhua....sssTplpLaEEIK......sMh-.l...........c.ctohpp.....uElpDGDIIsFQ+shs......................t.psphthssshpaa-aLhNRl.VpFtsh...tpPpc.s...........sFsLpLSc+hoYDplupcVup+Lsh.DPp+LRFsssts.h....stpP+...h+...s...hspoLp-l..L ...................................................RhW.h.p.RpN..........tThRP..........sh....t-....s....oh..tp........hhp.....p...tp....................hplalEh..................................................................t.t.h..ss.s..ppp.....slllFlKh...a..Ds.p....p...p...p.LphsG+lal.ptss+.lp-..lhshlpchhGa.......sss..spl.LYEEl+.............................shh-.l...........c.c.ohpp.................sEltDGDIIsFQ+ssst.........................................psps...phssstpaachLhpRh.........Vh.Fpsh.p.spc.s.........................................................tFsltLSpphsYp..p..lscpVup....pLs.........h..DP.....hl..phhthts........pttPt...h+.......p....tpl.ph............................................................................ 0 88 151 228 +12271 PF12437 GSIII_N Glutamine synthetase type III N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 160 amino acids in length. The family is found in association with Pfam:PF00120. This family is the N terminal region of glutamine synthetase type III which is one of the enzymes responsible for generation of glutamine through conversion glutamate to glutamine by the incorporation of ammonia (NH3). 25.00 25.00 31.10 30.80 19.30 18.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.83 0.71 -4.85 106 750 2009-09-18 09:35:47 2009-09-18 10:35:47 3 3 601 6 222 705 101 162.80 58 23.02 CHANGED ppls-hFGppVFscpsM+cpLPKssY+pLpcoIcpGptL.DhplA-sVAsAMK-WAl-+GATHYT.HWFQPL....T..GhTAEKHDoFls.sss-.G....p.slhcFSGKpLlpGEPDASSFPsGGLRuTFEARGYTAWDsTSPAFIhcp...s...sLCIPTsFsSYoGEALDpKTPLLRS ...........p.hs-hFGppVFscpsMccpLPKplYKplpcsI-pG.ptL.DhplADsVAsuMK-WAlE+GATHYT.HWFQPL........T.GhTAEKHDuFls.sssc.G.......p.sltcFSGKpLlpGEPDASSFPsGGlRuTFEARGYTAWDsTSPAFlh-........sTLCIPTsFhSYTGEALDpKTPLLRS............... 0 108 185 212 +12272 PF12438 DUF3679 Protein of unknown function (DUF3679) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. 20.90 20.90 21.30 23.10 19.80 20.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.61 0.72 -4.61 14 131 2009-09-18 09:40:58 2009-09-18 10:40:58 3 1 130 0 16 88 0 51.90 54 47.82 CHANGED +sllhshlhhhGVLhGMQpANcGhppMKGYcDsohpsshplscscssphEAulLGp ...hh.hhlhhhMVlAGVuLANHGLKSMKGYpQhSYEQIAHMTGTcupssEoEILGp....................... 2 2 8 10 +12273 PF12439 GDE_N Glycogen debranching enzyme N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is typically between 218 and 229 amino acids in length. The family is found in association with Pfam:PF06202. Glycogen debranching enzyme catalyses the debranching of amylopectin in glycogen. This is done by transferring three glucose subunits of glycogen from one parallel chain to another. This has the effect of enabling the glucose residues to become more accessible for glycolysis. 21.30 21.30 31.50 23.50 19.50 15.40 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.36 0.70 -5.11 94 351 2009-09-18 09:42:53 2009-09-18 10:42:53 3 6 336 0 118 343 25 219.70 31 32.83 CHANGED +EWLlTNGlGGYAuuT.lsGs.TRpYHGLLlAulps..PhsR.hlllocL-Eplth.ssp...........pasLuspca..........sss.htspGap.aLppF.ph-.shPhWpaph....sssh.lcKclhM.pu..............................................................pNsshlpY..p......hhpu..sp......shpLplcPhlshRsaHshspss..h.........................thphpht.h.tp..s.............lplpshs..........thssltlh..hs.t.....u..ph.httss............WahshpYshEpc.RGhssp-DhasPGhhphsLps...Gpslhl.s ............+EaLhTNthGuYusuT.lsssNTR+YHGLLlsslss..sss.R.alLLSp.L-Eslh..pGt...........................pasLuhp.....+a..........tss.hpPpGa+..alpcF..ph-..tlPshhYcl.....usll.lcKclhh.pt..............................................................pNpl.hlc.Y.p.........lhsu......ps............sspLcl+PhlsaRsh+tho+ps.th...........................shph..s..ps..G..............lphshhs..........shspLhhp.....hsp.......s..ph..hptss...........WYpshpYstEp-.RGhshpEDLasPGhFchslpt.G-slhh.s................ 0 48 89 109 +12274 PF12440 MAGE_N Melanoma associated antigen family N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 82 and 96 amino acids in length. The family is found in association with Pfam:PF01454. This family is the N terminal of various melanoma associated antigens. These are tumour rejection antigens which are expressed on HLA-A1 of tumour cells and they are recognised by cytotoxic T lymphocytes (CTLs). 21.90 21.90 22.00 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.18 0.72 -3.70 33 530 2009-09-18 09:45:57 2009-09-18 10:45:57 3 8 23 0 267 572 0 89.70 29 28.27 CHANGED +uQKSptpstEcchQAptEspsL.ssQsssu-cp..tusuSs.sh..ss.ppssu.uGssssPQ.usQtAsossoshsu.s.........stspSsEuups.ptEE.ssuso ...............................tQ+SphpthEcp.hpup.....sEsp.......uL..suQs...sssccp..........pssS....S.......u...s...s.......hht..ss.......ppss..A.uss.ss.....PQ.u.PQtuss.ssoshss..s....................shs.pS.sEuupu..QpEc.ssss................................ 0 24 24 36 +12275 PF12441 DUF3680 Protein of unknown function (DUF3680) Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is approximately 40 amino acids in length. 27.70 27.70 28.20 28.90 26.70 27.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.80 0.72 -4.11 33 91 2009-09-18 09:47:12 2009-09-18 10:47:12 3 2 76 0 37 92 16 40.70 32 46.20 CHANGED hp.KplP.cFco-tEtcpFW-pHDso-YhD......phpplpFs.h+. ..........ctlP.phco-pEtccFW-o..pDhT-Yhs.......phpslph............ 0 7 24 32 +12276 PF12442 DUF3681 Protein of unknown function (DUF3681) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 112 and 212 amino acids in length. There is a single completely conserved residue G that may be functionally important. 24.30 24.30 24.50 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.59 0.72 -3.85 22 90 2009-09-18 09:49:35 2009-09-18 10:49:35 3 6 6 0 54 67 0 96.20 30 49.03 CHANGED uspsh.ph.hALhshGlsTsusAhhshh.........hpsP..sGhhs.t.....hhYhlh...LsushlsGlstshsuhWVu.ssstt.....RRusG..+pllhsulsPLlh.s..........sh........uuhs.......l ..................s...s..th.tALhshGlsssuhAhslAh.......apsP..uGhht.h....hh...hYals...LsushlsGluplssu.h.WVu...sssts...........R+usG..+pllasulsPLlh.s.hulss...l............................... 0 0 10 27 +12277 PF12443 AKNA AT-hook-containing transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. This family contains a transcription factor which regulates the expression of the costimulatory molecules on lymphocytes. 26.20 26.20 26.60 32.90 25.80 26.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.37 0.72 -4.02 4 80 2009-09-18 09:52:05 2009-09-18 10:52:05 3 1 26 0 32 83 0 103.60 40 11.37 CHANGED QpQIps..pl+ssptsLpu.E..puCpc.pPssQhssSpuoshhFphhpchct.h.+Ltpph-pLKs+lcphp+chc.susspLQDpp.shppL..psspLstP.Gss ................................ppQlph..pl+htpthLpt.E..puhpc.psspQhssSpuoshhFp.hpchct.h.+Ltpph-pLKs+lpphppc.c.sussph.....Dpp..sL.pLp.p.phL.ts.ht.s.................................. 0 3 3 5 +12278 PF12444 Sox_N Sox developmental protein N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 69 and 88 amino acids in length. The family is found in association with Pfam:PF00505. There are two conserved sequence motifs: YDW and PVR. This family contains Sox8, Sox9 and Sox10 proteins which have structural similarity. Sox proteins are involved in developmental processes. 22.30 22.30 22.80 24.80 21.60 22.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.27 0.72 -2.87 24 321 2009-09-18 09:55:34 2009-09-18 10:55:34 3 2 129 0 110 284 0 74.70 53 17.54 CHANGED ssSPuhS--.sshSPs.s.usuGu....-o.......psst.tp........s.............................phcp-s-......D-+FPssIREAVSQVLcGYDWTLVPMPVRV ..........................................s..SPshS--.....pshSPs.s..Susuu........Do.................psststp..t......t.ts.............................chcp-s-.......--KFPlCIR-AVSQVLKGYDWTLVPMPVRV..... 0 14 22 59 +12279 PF12445 FliC Flagellin protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 125 and 147 amino acids in length. The family is found in association with Pfam:PF00669, Pfam:PF00700. There are two completely conserved G residues that may be functionally important. This family is the flagellin motor protein which confers motility to bacterial cells. 21.90 21.90 21.90 22.50 21.70 21.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.99 0.71 -4.02 27 245 2009-09-18 09:56:18 2009-09-18 10:56:18 3 4 177 0 2 121 0 140.00 37 25.15 CHANGED shpssuss......sussuoIThss....Gsohshsuu.........sss.......hsstssolSA-shtusspssu....aTs...ssus...tsassu..sssVshsu....................sssYscsDG.pLTTsss..s..sYahpsD.GsVTs....................ssGpslYhsADGclTT-Aso ......hh..tsuusuucsuolThs........GTpaohusus.........s..s............ssshsAoVSt-slhutsKusu.....sh.....ssuo.....hTa.ssGh.otslshsust..................sssYlDscG.slTsssshss..sY.lpcD.GoVTsst.................ssusGssVYVsusGKlTT-sTS... 0 0 1 1 +12280 PF12446 DUF3682 Protein of unknown function (DUF3682) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 125 and 136 amino acids in length. 25.00 25.00 97.10 97.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.07 0.71 -3.86 33 59 2009-09-18 10:02:37 2009-09-18 11:02:37 3 1 2 0 1 59 0 133.60 61 40.95 CHANGED usGG..pGoG.GsuSuuuu.......ss...Puusssusussss..sAsuVDSSAGSSsGpAGSSGoNsSNTTGDSsTGDQos.AAAAApsSSPPEGPAGTTSGTGHTRQEEE......EEE-pEKQQQSDEsQVQ.QHQQHEHPAEsGE..ESA ................................................................................sspusGGsuuGuus.............sus..suusssus..u.sssss.......sAssVDoSAGSSsGpAGSSGoNsSNTTGDS.....sTGDQTs.AAAAApsSSPPEGPAGTTSGTGHTRQEEE......EEEEpEKQQQSDEsQVQ.QHQQHEHPAEsGEESA.... 0 0 0 1 +12281 PF12447 DUF3683 Protein of unknown function (DUF3683) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF02754, Pfam:PF01565, Pfam:PF02913. 21.40 21.40 59.10 57.90 21.00 20.30 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.46 0.71 -4.26 42 271 2009-09-21 10:06:30 2009-09-21 11:06:30 3 6 267 0 96 256 43 122.00 63 9.53 CHANGED .ssRlREIPYNYTSFSDREIVlRLLGp-sWphLs-LRsERpTGRSARMLaEVLGDIWVVpRNPYLpDDLL-sscRRptLl-AhpHRLspIccRps.............ssp+VtpLlsuA+pAVptFppp ...s.sRlREIPYNYTSFSDREIVIRLLG--AWplLs-LRucR+TGRSARMLaEVLGDIWVVpRNPYLpDDLL-NPKRRthLlcALpHRLsEIcKRRs...................csppVphLlsAAccAVccFspp....... 0 19 55 79 +12282 PF12448 Milton Kinesin associated protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 143 and 173 amino acids in length. The family is found in association with Pfam:PF04849. This family is a region of the protein milton. Milton recruits the heavy chain of kinesin to mitochondria to allow the motor movement function of kinesin. 22.30 22.30 27.40 23.70 21.80 20.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.31 0.71 -4.05 16 296 2009-09-21 10:09:17 2009-09-21 11:09:17 3 7 65 0 163 225 0 164.70 29 31.66 CHANGED sssptpsSsssssts..su....u..........u........h.hs...u..hhLpp+slSsts..sspS......st.tp.s...............................PuoPssp-LptAL+pLo.......h.......pRcNaLup+.hhshppEtphpthupt..tst.........P.....s-SlhSsss.....sth..............tphhLP-KLQ.IVKPhEuS .....................................................................................................................p..........s...s..GSspo.Ss..o...............................hossc..S....ohphos..u...shlLpp+s..hSstlh.psts........tst.pp.us..............................P.uTPssschpsALppLo..........h.......tp-NaLu.p.....+..hhp...pEh+hpphs..................P......o-S..hh.Slus.................................t+hhLP-KLQ.IlKPhEu............... 0 13 27 87 +12283 PF12449 DUF3684 Protein of unknown function (DUF3684) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 1072 and 1090 amino acids in length. 19.50 19.50 32.70 23.20 18.50 17.90 hmmbuild -o /dev/null HMM SEED 1093 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.96 0.70 -13.85 0.70 -6.98 22 109 2009-09-21 10:18:36 2009-09-21 11:18:36 3 7 94 0 90 113 0 917.70 39 61.55 CHANGED LTFVu.LcsIElalDDasll+LpKKsuPshslsIP+clcT+TspGLM+lpslpppssQlDAshMphV..thpssust.h..tt.c..sss.......uoL+SFFS+housussp...cstpsppss..pttsptD...........ltphspuolFL+IsTAsIpsslupuhusELERATKKPPPKpoplAlLTsuYsph.AStts.................tspss..IFuulLPo+..uG..RlFIGFPTpQTTGlsuHlSAPSlIPTVERESIDLNsRalupWNhEhLRsAGllCRlAapsEMuslpscl..t.....t..ttuthccpcltsllscAlHshppFsF+-STPSotlGphIE-uFWoCs+sss.IElLSTpGVlsScpVRhsscsLS.FhcuIPVlPc-hhssApcFlp+Lp-hGLls-lTlsDIhpELcupsLspsQlh-FLpWl.......uccusuupl-.sshppLLssAVAs.ps.s.us.ss+llsLusIssalNPp+I.....Ps-LPlPPsllPapaoKsLspppLpul.GWpELpllsWLpaLlp...susscs.....-pDlTpSssFuspVLsVLSKpW-s.LusuoKpsVlshLpspTlIPTKhGM++PsEsYFsoV+LFDDLPlV.....pGlsulKEKhLsALGVRKTV-LslIF-RLLtssststt......ptcWSHVDLI+YLuSVcsDIPspDlcRL+pTslCsAE........ssppsptpRYKlS-LaEPc-uLRsLtLPllcW...PGcapssSsEu+FLhtLGL+saPss.-llclhu...tusDhpL+sKAhsYalspat.NsYusach..utsshsFLPl.p.....spp...pLusPppCFTscsAulhGFslLRp-L+s..HAsKhGVppHPshspClshLlppPPpocp-A+tlFpYlAuRlu-lsssc..lc+lupAtIVPV.pcphs.........pptpsh..phl......sPppCYL....Gcup-YpcIFDFVDFGtcANhFLhusGuKcEPTptElAphLl+EPARl.uthQSs-................+YLpLLRslA-shshL++c..+pLhpcM+pusFLLuS+-lspptpptst................pps-----psl+EWsLspApDhVlVDDhpSapLF+-plLu...APQEEhL.EsFYhsLGu.sLSuLVcEchchGs..hssDQc.AtcLcKlIhERo+LFLH .....................................................LTFVu.LppIEhalDsapllpLpKKsuPshplslP..+slpo+Tt..pGlM+ltslpppssQlDAshMpsl.....tWps.sss..tt..ttt..s.t...................soL+o.FFS+housusts.................psts.p.t.t..p.t.tc-...............lhthpsuolFL+lsoAplpsplspuautELERATKKPPPKpTplulLTsS.....asph.uStss....................tpstclFuullPs+....uG..RlFIGFPTtQTTGhshHlSA.SlIPTVEREuIDLNsRalppWNhEhLRsuGIlsRlAassEMuslpsch..t.........t.tttst...hptctltthhscAlHhhppFsFpcSTPoutVuphlE-uFWsC.pp.ss.l-lhSopGll.sppVRlss...c-lo............FhcsIPVlPcphhsss..Flp+Lh-hGLlp-lTlsDlhpELcs+sLsppQhhpFlpWh.........sppuhssplc.sshppLLpsA.......V......As..........pp......................s....ss.....sspl...lsLuslpsalssp+I.....PsclPlPssslPhth..o+slsttpLpul...GW.p.tLplssWlpaLlp....sstsps.....-pslTpospFuhpVLsllSKpW-s.ls.ss.+psl....hshL.pshsslPTKhG........M++PsEuaFsoV+L..F-DLPsl.........puhpslKEKFLsALGVRKTV-LphIFpRLls...t..t...........ptcWSH..h-LI+YLsSVpsDIPspDhp+L+pothCsAE......................t......tstpsptphY+ss-LaEPp-slRsLt.L.PllpW...Pupaps.sS.Eu+FLh..LGL+paPss.pllphh........supc.....tL+spAhsYalspah.s.....tYssaph..ut.shshLPl.p.........sp.....t....tLssPppCFTs.tAslhGFtlL+p-Lps....HAsKFGVtpcPshttClshLl....tpP...............PpspppAthlFtYhusRlu-ls.sp..ht+ltpu.IlPl.ppt.s................ts.h...thl......sPppCYl.......GpuppYtsI..FDFV.DFGtpANhFLhtCGuKpEPTp.ElAthhsp-PsRl.shhpos-................+YLpLLRslA-shshl+..+s+tLhpcM+pusaLLus+-lssttpt...................tts.----tsl+papLspApchlllDDh.oapLF+-plls...APpE-hL.EshYhtLGu.tLSslVpEchph.Gs..hstcpp.AhpLp+hlhERo+LFLH.......... 0 34 55 75 +12284 PF12450 vWF_A von Willebrand factor Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00092. There are two conserved sequence motifs: STF and DVD. There are two completely conserved residues (E and N) that may be functionally important. In hemostasis, platelet adhesion to the damaged vessel wall is mediated by several proteins, including von Willebrand factor. In solution vWF becomes immobilized via its A3 domain on the fibrillar collagen of the vessel wall and acts as an intermediary between collagen and the platelet receptor glycoprotein Ibalpha (GPIbalpha), which is the only platelet receptor that does not require prior activation for bond formation. 25.00 25.00 42.90 41.80 20.80 17.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.15 0.72 -4.29 102 412 2009-09-21 10:23:44 2009-09-21 11:23:44 3 14 391 0 114 363 52 102.50 46 17.29 CHANGED ssshss.....spEpatphppNshhssuppPlSTFSlDVDTASYuslRRhLspG.pLP...PssAVRlEEhlNYFsYcY.stPs.........s.s.pPFulssEhussPW.NspppLl+ .................sstt.....s.sspcYpphs-Nsl+psup..sPLSTFSlDVDTuSYuNlRRaLN.p.G.pLP...PsDAVRlEEhlNYFsYDa.shPs..........s..s.tPFulphElussPW.NppppLl+......... 0 48 76 95 +12285 PF12451 VPS11_C Vacuolar protein sorting protein 11 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. Vps 11 is one of the evolutionarily conserved class C vacuolar protein sorting genes (c-vps: vps11, vps16, vps18, and vps33), whose products physically associate to form the c-vps protein complex required for vesicle docking and fusion. 21.70 21.70 21.70 22.30 21.20 20.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.37 0.72 -4.10 39 245 2009-09-21 10:32:19 2009-09-21 11:32:19 3 9 213 0 176 248 0 49.00 36 5.09 CHANGED ssc.pslcsl+csQpc.......stcpa-lFpptL...cpopDpFpllo-ahGRGlhpp ..........scscplhshh+sQcp.......st-pHDhFpppL...cpSpDpFullA-aFGRGVhs....... 0 48 91 145 +12286 PF12452 DUF3685 Protein of unknown function (DUF3685) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. There are two completely conserved residues (L and D) that may be functionally important. 25.00 25.00 47.00 27.40 20.20 19.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -10.89 0.71 -5.14 37 133 2009-09-21 10:33:30 2009-09-21 11:33:30 3 3 97 0 60 145 115 156.80 29 37.26 CHANGED l.hsclPhs.pLhshLlhppsLhlDsthhssssscAhtphEhLLpNhlIplANslhp.lLNphuchppl.....+phlappphlSoR-lERhRNpLsWphRhppahpcPpsIaESpapLahlsspuIpph.lasPRppELppLsulthhVTLlLEhRDAlAPplcullthlGpslValLTpVlGRuIGLlGRGIlQGlG ........................................................................thllpshllphAshlht.hlp.hu...pl.....pp.hhp.ph....lSoRplp+hRNplshp.hhpphhppshtlaEsphpLhhlptt.t..Isph.l..sRppELppLphhph.lolhLEhpDshsP.lpshlphlGshhsahLspllGRulGLlh+GIhQuhG.......................... 0 10 38 52 +12287 PF12453 PTP_N Protein tyrosine phosphatase N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00041. There is a single completely conserved residue L that may be functionally important. This family consists of various protein tyrosine phosphatase haematopoietic receptors, e.g. CD45, which dephosphorylate growth stimulating proteins. This limits growth signalling in haematopoietic cells. 21.90 21.90 21.90 22.50 20.20 19.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.56 0.72 -6.82 0.72 -4.23 7 91 2009-09-21 10:35:08 2009-09-21 11:35:08 3 16 27 0 5 93 0 27.30 48 5.26 CHANGED LKLLAFGF.AFLDstsaVsGpussssssG ..LKLLAFGF.AhLDo-sFVTGpospsssos... 0 2 2 2 +12288 PF12454 Ecm33 GPI-anchored cell wall organization protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. Ecm33 is an essential cell wall component and is important for cell wall integrity. 23.20 23.20 23.40 23.40 22.60 22.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.26 0.72 -7.88 0.72 -4.08 18 53 2009-09-21 10:36:24 2009-09-21 11:36:24 3 3 53 0 38 48 0 40.10 45 10.10 CHANGED MthlKYhLPA.LAAusushAs....sCu..tsTtTIpsQuDAsuLu .........M.hhKYlLPA.LAsAGsAhAtt...sCs..tuohTIpsQuDAsuLu. 0 7 19 32 +12289 PF12455 Dynactin Dynein associated protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 280 amino acids in length. The family is found in association with Pfam:PF01302. There is a single completely conserved residue E that may be functionally important. Dynactin has been associated with Dynein, a kinesin protein which is involved in organelle transport, mitotic spindle assembly and chromosome segregation. Dynactin anchors Dynein to specific subcellular structures. 25.00 25.00 25.40 27.20 21.30 24.70 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.61 0.70 -5.48 30 281 2009-09-21 10:37:31 2009-09-21 11:37:31 3 11 178 0 166 278 0 267.40 33 22.25 CHANGED -L--hcsspp...pEs-ssphsspspshhslNhKLQsosuKApsKsIDlELp+hEuppuspHLpllphaLP-....ahc..u-+DulL.slLhhpRlutKusLlsstlpc+..........htpps.....hsupht-phh..hsscllppLshlsslsc+FhshlspCosEpFschsshh.EhpslE+tlDtaI-hLK+sclsEppssspLp+slshhscLtpsh.hs.....sph.shspphhtpsthhpstlDpsssshuhl+.shlpsthssssp............hhphhpslhspscusKhhspKlh...........Rpl .....................................cLp-hppp.ps..ppptusch..ppp.s.tshshKlphutoKApsK.........sI-hELRphEstpAspHlslLpuF.hP-s...Fhc.........uGD+DslL.sLLLh.RlhtKAcLltppspE+.....................htcps......hpGsss.-phs......austllhpLshLpushc+atpulspCos-.hap+hushhsEhss..pERsLDhhI-LL++....DpLDEssss-.sLp+sItahpHLhpla.ls....pp.tsps..pltsphphhposLDshuspsupl+.shlQ.ss.....ppss-............................lhhhlcsL.spspsh+phspKlpRph............................................................ 0 61 85 128 +12290 PF12456 hSac2 Inositol phosphatase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 120 amino acids in length. The family is found in association with Pfam:PF02383. hSac2 functions as an inositol polyphosphate 5-phosphatase. 21.80 21.80 22.10 22.50 21.60 20.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.30 0.71 -4.70 32 312 2009-09-21 10:38:54 2009-09-21 11:38:54 3 8 175 0 215 308 0 111.30 27 15.56 CHANGED tst-....hphtphhppAl-ps.pphllsc.s....EchluuWhLh.....sscp.s.....s.p-plLLLTcpulalspaDats-+lspapRlsLsslppIphGhh.tsshp....sppcshshhlpaptss ..............h...c....hphtthhppslcsh.pp.hllss..s........-ph..hGuWhLh.....ps.ct.s..........s-p-plLLLTc..pulhlscYDhphc..Kl..spapRlsLpslppIphGt..a.hssht..........sc.pt.shplpap...s........................................ 1 67 94 152 +12291 PF12457 TIP_N Tuftelin interacting protein N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 99 and 114 amino acids in length. The family is found in association with Pfam:PF08697, Pfam:PF01585. There are two completely conserved residues (G and F) that may be functionally important. TIP is involved in enamel assembly by interacting with one of the major proteins responsible for biomineralisation of enamel - tuftelin. 22.20 22.20 23.60 23.60 20.40 20.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.66 0.72 -3.98 32 238 2009-09-21 10:39:50 2009-09-21 11:39:50 3 7 182 0 141 228 0 99.20 30 12.68 CHANGED s-s-...-hE...cFEloDhDlcsE...........aNssRpRp..+poKcptlYGIaAppc-s-ppp.ttt.t...........ts++tpDYot.......PlsFVuGGlppsupc....cccpptppsp.....cpsccpst....ss .....................p...-t...-h-pa-h.s-hDhpsE.............aNPpRpR+.....hpoK-pAhYG.laA-cssD-ptst.h...............ss++s+DYot.......PV...sFlSuGlppuupcctpppcppp...pppt.......................ttt.................................................... 0 51 73 110 +12292 PF12458 DUF3686 ATPase involved in DNA repair Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 450 amino acids in length. There are two conserved sequence motifs: DVF and SPNGED. 25.00 25.00 118.90 39.80 18.10 17.70 hmmbuild -o /dev/null HMM SEED 448 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.31 0.70 -6.22 31 104 2009-09-21 10:40:47 2009-09-21 11:40:47 3 5 101 0 36 112 9 438.60 47 26.48 CHANGED cLpp+sppLNppRh-hFGus-hcLlGstRlRTENNClsRDIVpVG.................shlLFGYNVahGL+p-s.plsDVFSlach...........tpss.......sFcht.hshst....hLsDspFhpDFp-LY+YY+ss+LhpLthh.cs+LLAlFQlG-phpDlRVFRWplsscGt..pYlDsRGERDhshPspHDF-WpcsoR-s+VtG+HPHlSI..hDcVFVETlGGDLTIKlENNT-oGcGIYuEPV--.sQSLDDAclcYAclGsLILL+lhPYpEppaRYlVFNs+pppVhRlDuIGpAClpLP-DpGIIFPGGYYL.poG-hKsF-pshs..s....hcFcRslRSPNGEDVLYVFaptppGphlLhsYNlIc+plpsPlhCHGaulh-DGchllF+.up.-EPTRlHPhQlWQTPahS-pa.Aust..ssssuhLt+IGNs-LVRGlS-shulsphlpcp..ssosthYpsLhpsspclhDsYa ............LpppsppLNtpRh-.FGusphcllGp.RlRTENNClsRDIVpVG.................chLLFGYN.V...alG.L+.pEs.plsDVFoLY+h.....................t..spta-ht.hshsss...h.LsDssFlpDFs-LYpYY+ss+LlpLthp.-u+..LLAsFQlG-chsDlRVFRWplss.....DGp..pYlDsRGERDhshPst..aDF-WpcsTR-.pV.GRHPHlsI..h.DpVFVETlGGDLTIKlENNT-sGpGIYpEPVp-.sQSLDDAplcaAclGsLlLL+lhPY+EcpaRYLVFNohTppV.RlDuIGtuClpLPEDpGIIFPGGYYL.QsG-hKsF-...pshp......s....hcFcRplRSPNGEDVLYlFap.ppGchlLhsYNlIc+plpsPlhsHGaulh-DGchllFc.u.-....s.cEPoRlHPhQlWQTPFso-ca...AApp......ss...tsuhLsRIGNA-LVRGlS-hhsls+hlppp..ssotthYptLspsspclhDsYa............................. 0 12 25 33 +12293 PF12459 DUF3687 D-Ala-teichoic acid biosynthesis protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are two completely conserved residues (L and Y) that may be functionally important. 25.00 25.00 30.80 29.90 22.20 22.10 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.71 0.72 -8.16 0.72 -4.72 33 811 2009-09-21 10:41:44 2009-09-21 11:41:44 3 2 808 0 47 183 0 43.50 43 89.74 CHANGED h.....hhp+....ssspFlh+TlFYFhILlsLlYLYuYpGhspusFIYNE ................tpphhpFlh+TllYhhIhluLlalYua.pGpspusFIYNE.... 0 8 19 30 +12294 PF12460 MMS19_C MMS19_N; RNAPII transcription regulator C-terminal Gavin OL, Coggill P lg7 Prosite Domain MMS19 is required for both nucleotide excision repair (NER) and RNA polymerase II (RNAP II) transcription [1]. This C-terminal domain, along with the N-terminal, MMS19_N, form part of a silencing complex in fission yeast that contains Dos2, Rik1, Mms19 and Cdc20 (the catalytic subunit of DNA polymerase-epsilon). This complex regulates RNA polymerase II (RNA Pol II) activity in heterochromatin and is required for DNA replication and heterochromatin assembly [2]. This domain apparently shares homology with some HEAT repeat sequences. 27.00 27.00 27.00 27.20 26.90 26.50 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.33 0.70 -5.75 60 284 2012-10-11 20:01:02 2009-09-21 11:42:55 3 10 227 0 192 312 0 381.10 21 40.43 CHANGED lsthl...stLscsssst.........................tphpphLcsLspluspppl.....hcslsh.pLls+lshhhp.......................ssphshslLpolhplhppptppp.....shshahpp.llshh.hphshsssspsps...........psLphluplhshllppLstpcppph.hp...plhshFh.........................h..ttt....tppphlhlhstlLuuLs+sssh......ptsp....LLpplhphshspss......thh+huhhchluhLlNK.hs...psp..........hpshLpphhpph........tt.tp.p.pslcllhWlsKALllRspstusphlsp..Lls......LLss.........pph...uptsAcuFplLls-...cslhst...............pptssl+lLaKQ+hFsplhPhl...hptacssss.................K.sY...LpALStlLp.slP..psllhscLspLLPLLLpoL........shsss...p...........lph..usLpTLtshlc-s..spllpc..alp..oLlspLLplu .......................................................................................................................................h.phLphlstlshp.pl.....hp.h...hlhptl..h..........................................p.t.hhhhhtsl..hhpptt.t.........p.thhhpp.hh.hh.hthhhts...ttt.................................p.thLshhsplhshhsppls.phptp...hp..........ph.hslFh.....................................................h..t.t............t...ppphlhlhh.shl.suLs+ssphs........phtp.......................lhppllphs..hspss...............htphshhc......hhu.sLlNK.h.ssp..................thp.phlphhhpph..................................t.t..p.psh....pllhWlsKuLll....R.h..p..sh.ssplhsp..Lhp..............LLss.............................sph......u.tsAcuFslLhs-.....ss.lLsp...................................tstssl+lha+Q+hFsphhPhl.......lpthcssspt.......................................h+ssa...Lp.uLutlLp.plP....psl.lhs...clss.Ll...sL...LlpuL.........shsss.....s............................lph..usLpsLpsll.cs....sphhpt..alp....sLlsphLph...................................................... 0 57 96 156 +12295 PF12461 DUF3688 Protein of unknown function (DUF3688) Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is typically between 79 and 104 amino acids in length. There is a conserved YRW sequence motif. There is a single completely conserved residue Y that may be functionally important. 25.00 25.00 25.70 25.70 23.90 24.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.69 0.72 -3.60 38 86 2009-09-21 10:44:18 2009-09-21 11:44:18 3 2 6 0 0 86 0 91.90 31 26.06 CHANGED DNKaYallh+spps.......ssWcIhK.FpNsppth........hh...tph............................................shhK.ulYRWsGssEP.phPs....ID.ssGpIpsW..........p ......................................................................................................................DNKaYhllh+ppps.......ssWpIhK.Fppsppt..h...........hh..t..t..h.......................................a...........shhKulYRWsGs.sEP...phPp.....ID.sTGpIpsW.....p............. 0 0 0 0 +12296 PF12462 Helicase_IV_N Nucleolin_N; DNA helicase IV / RNA helicase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 170 amino acids in length. This family is found in bacterial DNA helicase IV, at the N-terminus of Pfam:PF00580. 21.40 21.40 21.50 27.40 21.30 20.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.57 0.71 -4.60 36 672 2010-01-05 17:03:59 2009-09-21 11:51:44 3 8 665 0 62 352 3 164.40 57 24.00 CHANGED McLpuoshuphlsQpsYpplcL.ssulploucp+phhIPFsplt.slps+RGllWGcLpFthss....ppslpl+GhpWp-sppFh............cplhpsappWspchschtsphLsphhppIpchpptspalscppltslhcplcpthpsLshslschtph.sspt...apphttWLpc ......MELKAToLGKRLA.QH.PYDRAhILNAGlKVSGDRHEYLIPFNQLL.AIHCKRGLVWGELEFVLP-....-KVVRLHGTEWuETQ+Fa.....................HaLDAHWpRWSsEMS-lAutVLppQlchIupRTGcN+WLTREpssGlpppIRQuhuALPLPV..sRLEEF-sCREh..WRKC.AWLpD.......................... 0 4 19 40 +12297 PF12463 DUF3689 Protein of unknown function (DUF3689) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 399 and 797 amino acids in length. 21.70 21.70 33.90 33.40 18.70 21.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.78 0.70 -5.64 8 115 2009-09-21 10:58:33 2009-09-21 11:58:33 3 4 60 0 63 117 1 265.30 55 34.69 CHANGED Vps.ltchtLl.sLsphF-pL.Whhptsp...hth.s.sssCosc.uLKIQFLRLlpsFsD+cpsp............tt.p..h.h..s.t..ss..s+shttcuppGLls+lLpshtp-st-ShaRFWlApuVEuFLRGssshtDQhalhpRGLlEHlLppIlcusscspcsLQhpFDLLGELhKFNpssFcRhsphlss-.KFph..FlctlsosLVDSNhFlRsVlLSL-pFcppts-hs..................tpshpcs+hlhhhp............spNph+hLpcLIshIplpslsQ-NlsCLNTuLlILhhAc+sGcL.hYLpuLRpt-hp .......................................VH+MlAEF+LIPGLNNLFDKLIW...RKposS.s.hV.lH.uHNpNCDCSP-loLKIQFLRLLpSFSDHH-s+hlLhs........................................phsphSAlshc..sslPElcullNo......cRoLV..CDGK+GLLTRLLpVMK.+EPs-SSF....R..FWQARAVESFLRGsTSY..ADQhFLLKRGLL.......EHILaCIl....DScC.+.SRDVLQSYFDLLGELMKFN........lDAFKRFNKYlN.......T-.tKFQl...........FL.........pQINSSLVDSNMLVRClsLSLDRFEsQs.DhK......................................VscVL....SEC+LLuYhu.................ps.sphoFLFRLINIIpVQTLTQENVSCLNTSLVILMLARR+t+LPhYLphLpchEh.s........................................... 0 26 33 45 +12298 PF12464 Mac Maltose acetyltransferase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00132. Mac uses acetyl-CoA as acetyl donor to acetylated cytoplasmic maltose. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.54 0.72 -3.95 351 3057 2009-09-21 11:01:47 2009-09-21 12:01:47 3 32 2089 58 602 2037 48 54.50 30 25.99 CHANGED cEKMluGcl..........Yps.t..........Ds...E.LhptRtcu+clhtca.N.........ps...p...p............p.tp........cRpplL+cLhGpsGc ..................hEKMlAGch..........Yps.t..........Dt...p.LhpcRh+A+plhtcaN...............ps...t..sp................-.tp............cRppllccLhGpss................................ 0 166 343 483 +12299 PF12465 Pr_beta_C Proteasome beta subunits C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00227. There is a conserved GTT sequence motif. There is a single completely conserved residue Y that may be functionally important. This family includes the C terminal of the beta-type subunits of the proteasome, a multimeric complex that degrades proteins into peptides as part of the MHC class I-mediated Ag-presenting pathway. 20.30 20.30 21.40 21.30 20.00 20.10 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -7.70 0.72 -4.69 63 391 2009-09-21 11:37:50 2009-09-21 12:37:50 3 3 243 90 207 367 0 37.50 38 13.88 CHANGED sNc+up+ptsY+atpGTTuVLscpl..hpl-lscp.pVps ..............sNp+sp+..ptpY+atpGTTAVLscpl..hpl-.lh-E.pVp................. 0 55 93 153 +12300 PF12466 GDH_N Glutamate dehydrogenase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF05088. There is a conserved ALR sequence motif. Glutamate dehydrogenase (GDH) is a homohexameric, mitochondrial enzyme that reversibly catalyses the oxidative deamination of L-glutamate to 2-oxoglutarate using either NADP(H) or NAD(H) with comparable efficacy. 25.00 25.00 25.80 38.20 22.40 22.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.85 0.72 -3.98 4 31 2009-09-21 11:39:53 2009-09-21 12:39:53 3 1 30 0 6 34 2 55.10 60 3.31 CHANGED MpupKuASpSspsssKPsAEpuVsssAp..ptloLEPVFAALRKRYPAAtQuEVQtFAAD .....MsuKKuhSpSspsssKssAcpulsshAp..puVoLEPVFAALRKRYPAAtQuEVQtFAAD. 0 1 3 4 +12301 PF12467 CMV_1a Cucumber mosaic virus 1a protein family Gavin OL lg7 Prosite Family This domain family is found in viruses, and is typically between 156 and 171 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01660. 1a protein is the major virulence factor of the cucumber mosaic virus (CMV). The Ns strain of CMV causes necrotic lesions to Nicotiana spp. while other strains cause systemic mosaic. The determinant of the pathogenesis of these different strains is the specific amino acid residue at the 461 residue of the 1a protein. 25.00 25.00 43.70 42.60 21.20 20.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.12 0.71 -3.90 14 89 2009-09-21 12:27:24 2009-09-21 13:27:24 3 3 22 0 0 90 0 156.90 59 16.17 CHANGED Whusho+sh..RsFlpshlpohFPoLR....+DcpEFLsKLSshsoF.NEpspsDhucphDVhusAAsl.......sshsVpsuKphts-KcKpht.stppPV.p.s.t.............s.ssc.sPtsssssps...spssp.pslospTcss-oRlApRusAMhEYssYpppLHsNsVSNL .......WFAuhTRPl..RVFFSoV.V+sLFPTLR...PREEKEFLlKLSTFVTF.NEECSFDGGEEWDVISSAAaV.......AsQAVsDGKlLAupKAcKLA-+LApPV.ElSsp...............s.ssSsTPcDsussCG.cEpEsSELDSLSuQTRSPITRlAERATAMLEYuAYEKQLHDTTVSNL 0 0 0 0 +12302 PF12468 TTSSLRR Type III secretion system leucine rich repeat protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. There are two completely conserved residues (Y and W) that may be functionally important. This family consists of leucine-rich repeat proteins involved in type III secretion. 21.20 21.20 23.00 21.80 20.30 19.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.31 0.72 -4.42 25 427 2009-09-21 12:29:20 2009-09-21 13:29:20 3 37 178 4 17 313 0 45.10 35 7.66 CHANGED shuhsssSh.st.......tssposs-YculWscWc+sAsss..EpRspAVp ...................t..psS...t......sssspssssYhslWs-Wc+pAsst..EpRppAVp...... 0 0 7 9 +12303 PF12469 DUF3692 CRISPR-associated protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is typically between 101 and 138 amino acids in length. The proteins in this family are frequently annotated as CRISPR-associated proteins however there is little accompanying literature to confirm this. 21.90 21.90 22.80 23.10 20.50 21.40 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.54 0.71 -3.87 86 200 2009-09-21 12:31:19 2009-09-21 13:31:19 3 1 170 3 93 221 4 122.50 24 16.65 CHANGED hllsloluPVQsFIspuR+s+DLWuGSalLShLstpshptl..hcp.............sst....llhP......t................t.................................................httsl.P.....................Nphhhhlsstt................hpthtcp......................scpshpctac..plscpl .....llhholuPVQsFIspuR+s+DLWuGSalLShLsttshptl..hcp.t...........sss......llaP.......s.hptp........h...hhpphtt......................................................tsuh.P.....................................N+hhhh..lsssp................hpthtpp...................scpthpptapplhp..h...................................................................................................................................................... 0 56 74 85 +12304 PF12470 SUFU_C Suppressor of Fused Gli/Ci N terminal binding domain Gavin OL lg7 Manual Family This domain family is found in eukaryotes, and is typically between 192 and 219 amino acids in length. The family is found in association with Pfam:PF05076. There is a conserved HGRHFT sequence motif. This family is the C terminal domain of the Suppressor of Fused protein (Su(fu)). Su(fu) is a repressor of the Gli and Ci transcription factors of the Hedgehog signalling cascade. It functions by binding these proteins and preventing their translocation to the nucleus. The C terminal domain is only found in eukaryotic Su(fu) proteins; it is not present in bacterial homologues. The C terminal domain binds to the N terminal of Gli/Ci while the N terminal of Su(fu) binds to the C terminal of Gli/Ci. This dual binding mechanism is likely an evolutionary advancement in this signalling cascade which is not present in bacterial homologues. 25.00 25.00 32.70 28.80 23.70 22.40 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.18 0.71 -4.72 6 111 2009-09-21 13:09:09 2009-09-21 14:09:09 3 6 78 4 67 106 0 191.20 52 43.07 CHANGED hcpl-cslE+-GSsLuGVsAchsacELs+ss.scp.sc.............cpsT-p.ppshpcshphppcus.sss.uppssshpps.............sshshspphsRspsLsGlcLphu.puAphLsLAlRsRlRHGRHFTF..pspchAlTFVopuVsGuhsoc-cPYushGsWlQILIss-hV.+Ml--hpDLooscs..LKlPhpYcWP-+sLKlhl ................................................................E+V-cGIEp-GSNLSGVSAKCsW--LocssEcc--ocshslup..........................................tp-TE.QIREsLp+GLchsscPlhP.slss.p+pNuhp+s........................ppsssshlPpELlR...TRpL-uVHLKFNtEuGuLlPLs..lRGRLhHGRHFTa.KS.IsGDhAITFVSoGVEGAFATEEHPYAA+GPWLQlLls-EhVp+MhcD....hp.sLsss-c.......hphPhpapWP-+pLpl.l................................. 0 20 24 45 +12305 PF12471 GTP_CH_N GTP cyclohydrolase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. This family is the N terminal of GTP cyclohydrolase, the rate limiting enzyme in the synthesis of tetrahydrobiopterin. 25.00 25.00 50.80 35.80 22.10 21.50 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.23 0.71 -4.70 39 190 2009-09-21 15:39:55 2009-09-21 16:39:55 3 7 177 0 131 193 27 187.10 54 40.49 CHANGED hss+IhLToaPsptshsPlPlpWGA.ssstcRGPVlso..hsshs+RNAIGsHuGSYulY+ALAVAuGpLsscH+sDhTNTpPshsIsPpPpWuDPpKIVShDPaGHlssphFschh.ppGhDIRPTIAlT+AHlplsElp-AlppGRLtsDGcllhs.................sG-ltVTKsAlEPVWYLPGVAcRFGlsEstLRRsLFEc ................h.s+IlLToaPsp.t..uhs.PlslcWGu.ssstpRGPVlso..ssshp+RNAIG...............uHuGSYSlYpALAVAuGtLss-HRPDhTNTpPsssIGPaPpWuDspKIVShDPWGHhlsphFtc.l..ppGl..DIRPTIAlT+A...HhpLPElp-AlppGR.....LhsDG+llls.....................................sG-lsVTKsAVEPVWYLPGVA-RFGlsEusLRRsLFEp..... 0 40 75 109 +12306 PF12472 DUF3693 Phage related protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is approximately 60 amino acids in length. 21.00 21.00 21.10 21.20 20.40 20.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.72 0.72 -4.37 31 83 2009-09-21 15:41:09 2009-09-21 16:41:09 3 4 62 0 13 83 0 55.90 28 40.57 CHANGED lltl+u-+uco.pt+shWpsIhK+hsuhuhs......sluhs.hsuhuhuhststpslspssls ......LlslcA-+ucsspt+shWpsIsKKhsuhuhs......sluhs..hsuhuhshstsppslhpssl.................... 0 3 7 9 +12307 PF12473 DUF3694 Kinesin protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 131 and 151 amino acids in length. The family is found in association with Pfam:PF00225, Pfam:PF00498. There is a single completely conserved residue W that may be functionally important. 21.80 21.80 21.90 22.20 20.60 21.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.76 0.71 -4.52 38 863 2009-09-21 15:44:23 2009-09-21 16:44:23 3 52 182 0 460 731 1 128.10 25 11.09 CHANGED pYhPV.h.pssp...s.GsFpL+QGlp+Rlslslsppsupph....hpchhtlhluth+.h..ss..h........s..h.Lpl.lspt.......ss......................................shshhupWDSShH.sShhLNRhT.sppp+lhlTlshslhhsc.hscPlhFph-lslpIh.uRsth .............................................................pahPs.h.pttt.....s.shF.L+Q...G.h.p++lslpl.pp.ps.s.ph................hcch.h.tlhl.....u.hp...........................s....h..htlhstp.................ps.........................................................................................................................................hphpusWDouhH.sp.hLNc.T..s..................s..c+l.ahtlts.l.............lpp.s.........hhhphchsh.hhsppt.t.............................................. 0 114 158 301 +12308 PF12474 PKK Polo kinase kinase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF00069. Polo-like kinase 1 (Plx1) is essential during mitosis for the activation of Cdc25C, for spindle assembly, and for cyclin B degradation. This family is Polo kinase kinase (PKK) which phosphorylates Polo kinase and Polo-like kinase to activate them. PKK is a serine/threonine kinase. 25.00 25.00 27.80 25.70 24.40 24.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.08 0.71 -4.46 23 413 2009-09-21 15:49:36 2009-09-21 16:49:36 3 10 84 0 193 333 0 135.60 35 24.19 CHANGED +aplt+cQh+cpFh.p+ppLh++a-p.ElEplpRhppcplEchcpcQptE++chsKplRsEpcpchthF+-SL+lptp...pscp-hE+l...cpQc+cchKtc+pchppKHppp.c-hhuppcsslcp.Lpplpsc+++tLh..EpE .........KcQht+pQl+cpa..pppphh+pa-p.EhEphpRhppphlEcLcpcQsp-++c.sKphR....uEtcpchuhF+csL+hppp.....psp...........p-h-+.l....p.pQcccp.K..p.c+.p.ppK+p.p.p.+-hhtppcps....lct............Lpplps...c+p+.Ls-pE................. 0 43 63 120 +12309 PF12475 Amdo_NSP Amdovirus non-structural protein Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 50 amino acids in length. This family contains proteins of each of the four types of Amdovirus non-structural protein. 25.00 25.00 40.50 39.70 19.40 17.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.36 0.72 -4.06 7 138 2009-09-21 15:51:29 2009-09-21 16:51:29 3 3 3 0 0 147 0 46.60 63 33.50 CHANGED YhDKspcPpcsp.sL....+phspDLtlhaoshcCshps.p-scscsp .FDKNEDPKDVpKSLGWhlK+LN+DLA.lIaSNHHCD.QsIKDPEs+Ac...... 0 0 0 0 +12310 PF12476 DUF3696 Protein of unknown function (DUF3696) Gavin OL lg7 Prosite Family This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. 21.70 21.70 22.60 21.70 21.60 20.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.55 0.72 -3.99 58 201 2009-09-21 15:55:04 2009-09-21 16:55:04 3 4 191 0 67 169 38 49.90 25 12.02 CHANGED lsscclulaah.....ppspttsplpplp.lscpGcl..spWP.....cGFFDphttph.pLh ............................ppltlhah.....ppssssoplpplp.lsppGpl..spWP.....cGFFDpt.h-h..h............. 0 26 46 57 +12311 PF12477 TraW_N Sex factor F TraW protein N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. There is a single completely conserved residue G that may be functionally important. The traW gene of the E. coli K-12 sex factor, F, encodes one of the numerous proteins required for conjugative transfer of this plasmid. 21.30 21.30 21.80 21.30 21.20 21.00 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.03 0.72 -3.91 21 169 2009-09-21 15:57:01 2009-09-21 16:57:01 3 1 137 0 29 121 8 30.20 55 14.12 CHANGED hhhhhlhshlhhussApApsLGshGssaPIu ......hpspuLlALLlaGQS.VsAADLGTWGDLWPV.p. 0 5 12 24 +12312 PF12478 DUF3697 Ubiquitin-associated protein 2 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF00627. There are two conserved sequence motifs: AVEMPG and QFG. 22.20 22.20 23.10 23.00 21.50 19.70 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.01 0.72 -7.51 0.72 -4.31 7 185 2009-09-21 15:58:24 2009-09-21 16:58:24 3 3 70 0 80 193 0 33.20 77 3.25 CHANGED .sopIPuoAVEMPGuu..slsuLslQFGAL-FGSE ..PsSKIPAoAVEMPGSA.....DloGLNlQFGAL-FGSE...... 0 15 21 45 +12313 PF12479 DUF3698 Protein of unknown function (DUF3698) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 89 and 105 amino acids in length. 25.00 25.00 34.50 34.40 24.70 22.30 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.48 0.72 -3.95 11 45 2009-09-21 15:59:39 2009-09-21 16:59:39 3 3 1 \N 45 47 0 98.90 31 34.17 CHANGED TltsGITLRSA.IAGFFYpsEs-uh-Tppphtpsasl.upKT.....ppsDPhhNVhP+shcps.....sh+..p.t........s.hhsltEsssp-.aps.....IcssTas ............hlcssls.h+sh.hsshhahspscu.soppp.thsFuI.shKTt.h.hppu...hplt.cs.ppp..phpphR.opp.s...............hushlhht.shpp..apA......Rs.h............ 0 0 45 45 +12314 PF12480 DUF3699 Protein of unknown function (DUF3699) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. 20.40 20.40 21.20 21.70 20.00 19.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.27 0.72 -4.34 17 310 2009-09-21 16:01:11 2009-09-21 17:01:11 3 9 30 0 145 276 1 75.50 33 13.82 CHANGED tss.slpLophlPLph..VclplHstpphpLcl+hsouRsaYLpLsssscp.-slFstWl+Llp...lLp.shpthspsspl ...................t..s.plpLo...RhLPL+F..VcLplaDpppppL+l+hsT......sRsaYLp.Lsssscp.-slFshWh+Ll....lLp.shsthsps.t............ 0 10 12 26 +12315 PF12481 DUF3700 Aluminium induced protein Gavin OL lg7 Prosite Domain This domain family is found in eukaryotes, and is approximately 120 amino acids in length. There are two conserved sequence motifs: YGL and LRDR. This family is related to GATase enzyme domains. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.46 0.70 -5.15 33 220 2012-10-03 21:14:07 2009-09-21 17:03:10 3 4 52 0 86 1785 730 191.40 46 86.72 CHANGED LAlFcKulAcsP-ELpS.Psss.s.usth............................c...............ssptllpcFhSspP.suholshGs..uuhlAYotspps.....slhPRLFush.DDIaClF.GpL-NLssL+pQYGL.uKssNEshlVIEAYRTLRDRGPYPADQVV+-LpGpFAFVlaD.spssolFsAsDsDGpVPLaWGlsADGslVhSDDh-llKtuCGKSaAPFPp.GChFoSs.GGLpSFEHPhN+lKu...........hP......RlDSE...G.hC..........GAsFKV .......................................................................................................LulFpttlspsPptL.u.st.t.........p.sttlhppFhstts..suhohphGs...uhhA..a...ot....tpps.......hp..R.Fush.D.-.Ia.C.l.F.GtL-NL....u....t....L.......p....p.....Q......Y....G.....L....u...K....s...s..N.....E....sh....h.VI.E.A.Y.+T..L.R...DR.u...P.....Y..P..A...sp.......V....lpcL...pG..pFAFVl.aDsp.......s.....p..olF.sA.t................sss.G.p....VsLa..WG.l..s............u.......D..G...p...l.shS..D....-.h....-...l..l.......K.....t.....u...C.u.K..S.hAP....F.Pp.G.Ch.apot.....uG.....L.......p...S...a.Ep..Ph..N+.l.pshPth-pc..hCGusFKV............................................................ 0 12 61 73 +12316 PF12482 DUF3701 Phage integrase protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF00589. 21.50 21.50 21.60 28.90 21.20 20.70 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.28 0.72 -4.24 38 151 2009-09-21 16:05:01 2009-09-21 17:05:01 3 5 74 0 61 147 10 96.20 37 16.67 CHANGED ptLcshs....sssPthsDslstWLssplstsLputGIp.......TLu-LssRlsRRt.+WWpulPGLGssuARpIEAFhAsHsslst+u.....................psllhhs.pusllP...h .........t..LcphssssPthsDsVshWLs..spss..tsLpAtGIp.......TLu-LssRlsRRt.pWWpulsGLGsuuARpIEAFhusHsshst+s..............puhlsh..ps.lsP................................. 0 3 11 45 +12317 PF12483 GIDE E3 Ubiquitin ligase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is typically between 150 and 163 amino acids in length. There is a single completely conserved residue E that may be functionally important. GIDE is an E3 ubiquitin ligase which is involved in inducing apoptosis. 21.20 21.20 21.30 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.87 0.71 -4.87 31 280 2009-09-21 16:07:55 2009-09-21 17:07:55 3 6 190 0 162 283 43 144.10 22 45.07 CHANGED ppphtccppp......hhpppochlspspcpsPFhLcD....s...........o.GcVhV..s.suutlshhhshcp.F....cs.ss.stsshthuhh..th.....................slGh+hhEclLPsspplhVlGps.pDstG..lpIppPppt.....FhlS.coc-pLhpphtpts+hhhhuulsh.......sllG ..............................................t....thhppppchlp.ptsp.psP.Fh...L...p..D...s.................s..stVhV..s..s.u.ss.l..s..l..p..sshcp.a......c.sst..shss.hh.hs.hhsutp.....................................hG..h..p.psEchL.sGssloslG-h..................h....tc.......s.....st....lp......l.p...sppt...........ah......l..o...s.pplhtp.tt.hphh.hhhshhh..h.......shhh................................................................. 0 44 89 131 +12318 PF12484 PE_PPE_C Polymorphic PE/PPE proteins C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF00823. There is a conserved SVP sequence motif. There is a single completely conserved residue W that may be functionally important. The proteins in this family are PE/PPE proteins implicated in immunostimulation and virulence. 21.80 21.80 22.10 22.10 21.40 21.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.16 0.72 -3.05 86 1529 2009-09-21 16:10:05 2009-09-21 17:10:05 3 5 63 0 165 799 0 84.80 32 22.29 CHANGED V.uAu.l...GpAusl.GsLSVPsoWusu.....sPsssssus..........Lsssshssssssss.......sssh...h..GhP.........hsuhuutttsu..hs...........sRYGh........Rss..VMs.R....PP .....VuAul.Gp.Au.sV.G.sLS.VPsuWusu........sP.u....s.sssuss.........lsssshsussssus.........ssh....hs..GhP.....huuhuu..t.usuu..su..................RhGh.....+.hVhs.+........................................................... 1 35 69 134 +12319 PF12485 SLY Lymphocyte signaling adaptor protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 144 and 156 amino acids in length. The family is found in association with Pfam:PF07647, Pfam:PF07653. There is a conserved LGKK sequence motif. SLY contains a Src homology 3 domain and a sterile alpha motif, suggesting that it functions as a signaling adaptor protein in lymphocytes. 25.00 25.00 59.20 59.20 24.50 24.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.16 0.71 -3.98 14 178 2009-09-22 08:22:33 2009-09-22 09:22:33 3 9 39 0 84 140 0 148.20 42 25.16 CHANGED c.p.pRSoSFGcFct.+..sSPlps-cchsscpt......tttsscsssp...SuhsLGKK.h+u.IShTM++KMGKch.KAlSEEhs-ss-tcshssssss...t...h-Kssl+uusShESLaS.hSGQSSoSu.sVsSsSsGsSNRDSl+LE....-psP..YsGPF ......................................phtRS.oSFGsFDtp+.spS..ssps-cphpsc-s..h......htts.csssp...Su.tuLGKK.h+u.IScTM+KKMuKKY.K.uLSE-ht...-sst.tsuhssss.ssst.-s.ph-KssLKuusShESLhsshSGQSSoSu.sloSsssGo..SNR-..Sh+.E....--sP..YsGPF................ 0 3 10 33 +12320 PF12486 DUF3702 ImpA domain protein Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 207 and 469 amino acids in length. The family is found in association with Pfam:PF06812. 25.80 25.80 31.70 31.50 25.70 25.70 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.86 0.71 -4.53 26 515 2009-09-22 08:23:51 2009-09-22 09:23:51 3 2 347 0 39 278 0 138.20 40 33.88 CHANGED hppQLspLpplsPhaslphGppllcpApplWPss.phpthsppWppplpspAhsssplsuWpputspLppLu-+Lsth-cp+Gp.hTlS.LKoslashppsh.spshPlEEhLRQLptp.tpspsss.uhhppl-p+LptLLsRYhhLpp ...............pppLpQLhchsPl.sLcpGhphh+.AcshWP-s.Q.pphsspWpctlcspAtsss.pLpGWhQsppcLcthA-hl.phEcct...lTlS.lKoslaphcpuL.spE.sP.lEpLLpQhp-s.+s.pppsss.sLpKQIspRLptlLuRahlLpp....................... 0 0 10 19 +12321 PF12487 DUF3703 Protein of unknown function (DUF3703) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 113 and 135 amino acids in length. 21.10 21.10 21.70 21.20 20.70 20.10 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.49 0.71 -4.40 51 103 2009-09-22 08:25:31 2009-09-22 09:25:31 3 5 84 0 62 114 13 106.70 36 81.37 CHANGED hsppl+thaptEhptAcps.ttushstsWpHLERAHIluQ.hshhHstsHhtMLthuh+p+DtREshGQllRllsussGShhG+hPhGNTGtusVushpPMPlPpDLtsllp ...............ppl+thaptEhptupps.ttsshptuapaLERAHIluQthshh...Hs....psHhtMLphuh+p+Dt+EshGQlhRllsus..stohhGhlPhGNTGtusVushpPMPlPp-Lttll.s...... 0 17 34 50 +12322 PF12488 DUF3704 Protein of unknown function (DUF3704) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. 21.90 21.90 22.80 22.30 19.50 17.70 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.66 0.72 -6.78 0.72 -4.32 8 22 2009-09-22 08:30:11 2009-09-22 09:30:11 3 2 7 0 16 27 0 27.10 65 20.88 CHANGED hL.VG.......ASaGYMP+ssIAGSSsSshSNF ..hLPVG.......sSSGYMPRRGIAGSSuSoMSNF.. 0 0 1 16 +12323 PF12489 ARA70 Nuclear coactivator Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 127 and 138 amino acids in length. This family is ARA70, a nuclear coactivator which interacts with peroxisome proliferator-activated receptor gamma (PPARgamma) to regulate transcription and the addition of the PPARgamma ligand (prostaglandin J2) enhances this interaction. 22.10 22.10 23.50 30.80 21.70 21.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.96 0.71 -4.19 17 169 2009-09-22 08:56:56 2009-09-22 09:56:56 3 2 39 1 59 142 0 127.10 39 44.89 CHANGED spspLp-.hhts+sthtsp..t.h.s.pshpsWL.....lhpphpppohp.pttph....pphhGphpsL.p.L.p................spops.ssphShphp+lusLsLpspEs................ochLhpssshphhpphsshG.hpspppch+.hh .............................t.plp-.LhtsKst.tsp..t.h.s.pshps....WL......h.hQh.pEp...o......pu..pph.......lhGphpsLpp.L.p................sps+shssphSlphE+lusLpLhspDp................oshLhpsspsthhpp.sshu..cshp...+hh.............................. 0 3 7 21 +12324 PF12490 BCAS3 Breast carcinoma amplified sequence 3 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 229 and 245 amino acids in length. The proteins in this family have been shown to be proto-oncogenes implicated in the development of breast cancer. 21.70 21.70 21.80 22.40 19.00 20.40 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.68 0.70 -5.07 19 188 2009-09-22 09:23:55 2009-09-22 10:23:55 3 6 103 0 117 200 0 227.20 26 26.76 CHANGED PsPloLoVVSRIKsus.sGW..........tsoVs...sAAuoA.oG.+sshhuGAlAusFH.....ss..hstsspsssst.sssh-pLhVasPoGpll.QYhLcPusus....pss.t.shsptsthpp.......s-l+lhVEslppWslsR+ssh.Ep--.phsshstsspts..h..hhtssssct..............p.tts.pt........phpscEppchalSpAE.lphapsp.hPlWtcschpFpsh.ssps.p.p....ts.hu........sEhEI..EclsscplEhRpK-LlP ...........................................................................hsl.slupIKps..hGh..........................................h...p.........phusu...t..oh.t.hsluushusshp.........ss....hppptsppp..p.hsl-pLhlhos.GsLl.pahLcPpshs................ssp..hs--..............................oslclhspPhtpWslsRp.ph..-hps....shstsp.hhh.t............th.......................s.....thssht....................................sscstpccc..WLSplE.lhTHsGP...t+.LWhsPQ.......FpF+shp.sss.psh.....sss.u...................................................................... 0 36 62 91 +12325 PF12491 ApoB100_C Apolipoprotein B100 C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. There are two conserved sequence motifs: QLS and LIDL. ApoB100 has an essential role in the assembly and secretion of triglyceride-rich lipoproteins and lipids transport. 21.20 21.20 21.20 38.90 21.00 18.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.76 0.72 -4.33 4 81 2009-09-22 09:27:57 2009-09-22 10:27:57 3 4 63 0 21 76 0 44.40 75 2.02 CHANGED Qh+hKLQDFSDQLSDYYEKFIAEocRLIDLSIQpYHhFL+YIhELLKcLQssTssshh ..QFRYKLQDF.DQLSDYYEKFIsESKR.................................... 0 1 2 7 +12327 PF12493 DUF3709 Protein of unknown function (DUF3709) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 30 amino acids in length. There are two conserved sequence motifs: RCLMK and LIEL. 21.20 21.20 21.20 21.20 20.40 20.40 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.87 0.72 -4.25 9 226 2009-09-22 12:07:19 2009-09-22 13:07:19 3 2 48 0 7 117 0 29.50 76 37.05 CHANGED hRCLMKpQCVCRCKFQpaCLIEL.psCVVSpFV ...hRCLMKRQCVCRCKFQpaCLIELSp.CVVS.FV....... 0 7 7 7 +12328 PF12494 DUF3695 Protein of unknown function (DUF3695) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 157 and 192 amino acids in length. There is a single completely conserved residue D that may be functionally important. 20.30 20.30 22.00 37.20 19.00 16.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.35 0.72 -4.29 14 77 2009-09-22 12:08:13 2009-09-22 13:08:13 3 3 63 0 46 79 0 102.70 35 46.11 CHANGED .ssappss+hupp.cPapRLa.ptThuSsRRsstahs......................sphPpDSLDFpLpotYDHpc-hFhsKs-sllQpETlst...................tphRhL+Nst.h.p..pDsl....t+PL+l ................t.satpss+hspp.-PapRLpspsTlsShRRsshahc......................PpIP+DsLDFpLsulYsHHsshFpsKs-lLlppEThpc...................pp.h.p.hlp.stchh....ss............................................ 0 16 19 30 +12329 PF12495 Vip3A_N Vegetative insecticide protein 3A N terminal Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 170 and 789 amino acids in length. The family is found in association with Pfam:PF02018. Vip3A represents a novel class of proteins insecticidal to lepidopteran insect larvae. 25.00 25.00 85.90 85.40 21.30 20.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.24 0.71 -4.59 12 67 2009-09-22 12:19:40 2009-09-22 13:19:40 3 2 11 0 0 62 0 166.50 96 26.51 CHANGED ALPSFIDYFNGIYGFATGIKDIMNMIFKTDT.GGsLTLDEILKNQQLLN-ISGKLDGVNGSLNDLIAQGNLNTELSKEILKIANEQNQVLNDVNNKLDAINTML+lYLPKITSMLSDVMKQNYALSLQIEYLSKQLQEISDKLDIINVNVLINSTLTEITPAYQRIKYVNEKFEELTF ....ALPSFIDYFNGIYGFATGIKDIMNMIFKTDT.GGDLTLDEILKNQQLLNDISGKLDGVNGSLNDLIAQGNLNTELSKEILKIANEQNQVLNDVNNKLDAINTMLRVYLPKITSMLSDVMKQNYALSLQIEYLSKQLQEISDKLDIINVNVLINSTLTEITPAYQRIKYVNEKFEELTF. 1 0 0 0 +12330 PF12496 BNIP2 Bcl2-/adenovirus E1B nineteen kDa-interacting protein 2 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 119 and 133 amino acids in length. There is a conserved HGGY sequence motif. This family is Bcl2-/adenovirus E1B nineteen kDa-interacting protein 2. It interacts with pro- and anti- apoptotic molecules in the cell. 27.00 27.00 29.50 29.50 25.20 25.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -11.01 0.71 -3.97 17 319 2009-09-22 12:21:50 2009-09-22 13:21:50 3 8 68 0 141 251 1 129.20 46 19.61 CHANGED cpp+++LsAPp..........................................lsLoL.DpS..................................................................EtShhSD-h.-oss-.........lDls.....lDDL..DTPs-sD.h-h.s...-h-............WEDDhPts.pusptssps...l.phos-.EEcpDs.RhWRslhIG-.QE+RIDM+lIEPYp+VISHGGY.YG-G ..................................................................................................................................................................................................................................................................................................................................................p..hR++LsAPp............................................lsLoL..DtS.....................................................................................-GSlLSDDh.-o.....s...................................Dls..........lD-l..-TPsEs-ph-...s.......ch-.............WE..DDhP..ps..p.sss..tc.......hs-ho...u..-...E.E.p.cDs..RhWRshhIGE..QEpR..lDM+sIEPY++VlSHG.G...Y.YG-G.......................... 0 19 31 67 +12331 PF12497 ERbeta_N Estrogen receptor beta Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00104, Pfam:PF00105. There is a conserved IPS sequence motif. There are two completely conserved residues (Y and W) that may be functionally important. ERbeta binds estrogens with an affinity similar to that of ERalpha, and activates expression of reporter genes containing estrogen response elements in an estrogen-dependent manner. ERbeta acts as a transcription factor once bound to its ligand and it can dimerise with ERalpha. 25.00 25.00 27.50 27.50 22.20 17.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.64 0.72 -4.22 29 224 2009-09-22 12:23:36 2009-09-22 13:23:36 3 4 124 0 31 209 0 107.90 43 20.22 CHANGED sPhhhS...ssLsh-.spslCIPSPYsDtuHDas..........slsFYSP...olh...uYut.Pulo-sPo...l+poLSPSlFWPuHuH..hssLsLHs.sQshshp-t.spoPWsEhps..-pslssSppsl ...........Ps.Ys...s.LshE.ptsIhIPSsYs-spH-Ys..........sh.sFYSP...ulhsYu...h.Pu...s...sssss....s+QohSPslhWPo.GH....lSsLslHp.ppuhlY.sEs..+SPWsEs+s.h-Hs.Ls.spps.h.................... 0 1 4 12 +12332 PF12498 bZIP_C Basic leucine-zipper C terminal Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 174 and 411 amino acids in length. The family is found in association with Pfam:PF00170. There is a conserved KVK sequence motif. There is a single completely conserved residue K that may be functionally important. Various bZIP proteins have been found and shown to play a role in seed-specific gene expression. bZIP binds to the alpha-globulin gene promoter, but not to promoters of other major storage genes such as glutelin, prolamin and albumin. 22.70 22.70 22.80 24.60 21.60 22.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.78 0.71 -2.85 28 167 2009-09-22 12:46:35 2009-09-22 13:46:35 3 3 36 0 44 171 0 111.40 34 37.25 CHANGED TLRAKVKMAE-oVKRlTGhsshh.sh..phsohshs..ss.Ss.ts...sssPhQsssppaat.pssss.shs.........................tptsst.su.shsuschsposuh....p+lA.....uLE.alQ+.RhpuGsssss .............................TLRAKVKMuEDolKRVhthss.........o.Shshsh.ss.Sss..s..sssPlpDshssaFs.s...sssssshssts....................t.psss.shsu.shsus...cMspsuu...h....p+sA.....uhE.hlQp.thtuh.sss.u.......................... 0 7 30 38 +12333 PF12499 DUF3707 Pherophorin Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 147 and 160 amino acids in length. The proteins in this family are frequently annotated as pherophorins however there is little accompanying literature to confirm this. 20.60 20.60 20.70 20.70 20.30 20.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.70 0.71 -3.87 78 350 2009-09-22 12:49:21 2009-09-22 13:49:21 3 15 4 0 307 345 0 144.00 20 42.98 CHANGED FP...hC........ppstssoPapl....shsssstss.........................................spaCFslpss.........s.ssss....sCC.sss.....LpK..lEhhs.........s....ss..stpssl....sGhsstssssstps.s...s...........sL..................+l....ssLs.hshspssuu.....plCls.lp.................sssolsplC...............sss..sCthu.................................ps..C......CP ............................................................P....p.....pppp.t...sP.apl......s.s.tstss.........................................spaCFplpsh...................................ssssss......sCC..sss.....lpK..lEh.sss..............sC..p....sslt.sssl........sG.h..sthth.t..t.h.t...s........................sl............................+l.ssLs...hshs..pssuu..........plClp.lp..................sssolpphC...................sss...hC.hs.hhs..................tstpC..CP................................................................................ 0 97 307 307 +12334 PF12500 TRSP DUF3706; TRSP domain C terminus to PRTase_2 Anantharaman V, Gavin OL lg7 Anantharaman V Domain This domain occurs C terminus to PRTase_2 and has a highly conserved GXXE and TRSP signatures [1]. It is found in bacteria. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 25.00 25.00 58.20 33.50 19.80 18.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.21 0.71 -4.75 67 331 2012-10-09 19:35:50 2009-09-22 13:50:58 3 6 324 0 70 262 6 151.80 33 34.43 CHANGED hstsss...............................................ts...................................hshtsptsa........................GRhGhpssp....s........h....shutp.lphtt......................................c+lLVLGTsEFhahPhhlAcpLppth.............s..VhapSTTRSPIts.....GYAIcsulsFss...s.s.sl.........NalYNVs.............tt.............................................aDc....lllssEs............ssss.s........ssLlptL ...................................ssstt.h.htsttsh.......................................GRhGhhs.t...t.......h.clucplpstt......................s.c+lLVLGTuEFhatPhhLAccLEptu................s..plhapSTTRSPIts......GYAIcssluFsssY.shul........sNalYNVs.t........tp......................................................aDc.lllssEosscshs........stLlptL.............................. 0 20 44 60 +12335 PF12501 DUF3708 Phosphate ATP-binding cassette transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 143 and 173 amino acids in length. The family is found in association with Pfam:PF00528. There is a single completely conserved residue P that may be functionally important. 21.70 21.70 21.70 21.70 21.60 21.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.00 0.71 -4.41 84 285 2009-09-22 12:53:06 2009-09-22 13:53:06 3 3 283 0 89 250 468 152.80 29 32.47 CHANGED lll.lllhLushuahhGRpRAhsh......u..sus.t....LHSLPsYYGhhlAlasslPAlllLslWhhh....pPhllpphVhuplP.......sstshsssphsL........lhucl+slApG................................thslsups.....ssthhsAAppapshpstuphhhssllls..lAluGhsa ......h.ll.hllsluhluahlGRpRAhsl......A.t.GssstplHShPsYaGhaluLhsulPALllLslWsls....pshhlpp.lhuplP.......tst.hstsptsL........hhuplcslAsG.............................................hslusps......pshhlshApthpphpshsphhhsslsls...lAluGhh.......................................... 0 30 62 74 +12336 PF12502 DUF3710 Protein of unknown function (DUF3710) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 237 and 284 amino acids in length. There are two conserved sequence motifs: DLG and DGPRW. 25.00 25.00 54.70 54.60 19.40 18.30 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.07 0.71 -4.96 59 439 2009-09-22 13:18:44 2009-09-22 14:18:44 3 1 430 0 116 319 51 179.10 38 69.44 CHANGED GPaDhtpsp......sssssstlDLGulhlPhspGhplplEhs......ps.tt...shulplshspuplpltAFAAP+osGLWcElRtElspulppp.G...........upspt.psGsaGtElhuph.h............................sssh.tshRhlGVDGPRWhLRuVlsGsu...utssp....tAthlcclhppsVVsRGspPhPsR-sLPlp...LPpphs...pphtts ...............GPaDls-hs......ssssssRlDLGSlhlPhhcuhplplchs.........ps.Gs...spulhllpssutlplsAFA.AP+o.....s.....GlWcEVppEl..scuhptp.G............upssh.tsGPaGtElhshl.h...................................................tG..tsu...tssRhlGVDGP.RWhLRuVlsG.u.......uhscp............tuphlc...-hhsshVVcRG-sPhssR-slPlc...lPpshttt....s.................... 0 34 84 108 +12337 PF12503 CMV_1a_C Cucumber mosaic virus 1a protein C terminal Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01660. There is a conserved GLG sequence motif. 1a protein is the major virulence factor of the cucumber mosaic virus (CMV). The Ns strain of CMV causes necrotic lesions to Nicotiana spp. while other strains cause systemic mosaic. The determinant of the pathogenesis of these different strains is the specific amino acid residue at the 461 residue of the 1a protein. 25.00 25.00 32.20 110.20 18.30 17.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.01 0.72 -4.26 11 89 2009-09-22 13:36:50 2009-09-22 14:36:50 3 3 22 0 0 90 0 88.00 68 9.08 CHANGED RlWshAGGcscsss.cS....Vh-TY+pVDshVNlHassGpWha..PpthcYoVGYN-pGLGPKh-sELYIVspsCVIuNscsLAcuoc .RIWsMAGGDsKRNSLEGNLKFVFDTYFoVDPMVNlHFuTGRWM+PVPEGlVYSVGYNE+GLGPKtDuELYIVNu-CVIsNS-uLSslT.+. 0 0 0 0 +12339 PF12505 DUF3712 Protein of unknown function (DUF3712) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 130 amino acids in length. 21.60 21.60 21.90 22.20 21.40 21.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.67 0.71 -4.10 72 289 2009-09-22 13:43:24 2009-09-22 14:43:24 3 10 90 0 237 309 1 125.60 19 24.44 CHANGED sshupltlsthph..sst......shhh.hspphtlsshs......saspaspsllhpcp.hslslpGp...s.p..hphGsl..shpslsh.sKslshp....................G................Lsphp.uhs.................lsshplhhss....ts.........GhNh.husshl....................sNPSslol.pl..G.......slolsl ...........................................................hh.h.hs.hph..t.t........t..h.hst.lplsshs.......sFspa.....sptlh..tspp..hslpl.p.up......s..p.sp....husl...thpslsh.s+slslp....................G................h..ss.hp..uhs...................lsshclstss.....ts....................................uh.sh.pus.ssl....................sNPSs...hol.sl..Gslshp................... 1 97 152 213 +12340 PF12506 DUF3713 Protein of unknown function (DUF3713) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 92 and 1225 amino acids in length. There is a single completely conserved residue S that may be functionally important. 21.90 21.90 21.90 37.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.57 0.71 -4.02 10 36 2009-09-22 13:46:33 2009-09-22 14:46:33 3 1 8 0 10 38 0 97.20 36 13.00 CHANGED cLssYF..p.lIsc...sKVsscso.........soocpss.sclhTpst-......lcKl+-c.pscl-sKlccaV.sKLKsostPpssYSplILlsspsD....pshossu.huLhuLlslposs.LsNtl++paFssss .cLspYY..cslI.....sKVuspso.........sospsosssph..pptp......lcKL+-p.Kccl-stlKcaV.scLKlol......Sp.h.lp.p.........p....h...s..t....p.....hh..hh.ps.. 0 7 7 7 +12341 PF12507 HCMV_UL139 Human Cytomegalovirus UL139 protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes and viruses. Proteins in this family are approximately 140 amino acids in length. UL139 product shared sequence homology with human CD24, a signal transducer modulating B-cell activation responses, and the sequences in the G1c variant of UL139 contained a specific attachment site of prokaryotic membrane lipoprotein lipid. 25.00 25.00 25.20 35.00 24.10 24.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.53 0.71 -3.95 8 69 2009-09-22 13:50:58 2009-09-22 14:50:58 3 1 17 0 13 66 0 103.10 56 63.02 CHANGED hs.hpuppphpls+splLAu........pLhhlhohphpCahLhRKlh....................................................psspGpspEtEp+ppchtRahps+tsp.sshhshussh.Sho.s.Qssohts.sp ...Clpu.uGosWsssQLALLAASGW..TLSGLLLLFTCCFCCFWLVRKI.....................................................CSCCGNSSESESKsTH..AYTNAAFTSSDATLPMGTTG.SYTPP.QDGSFPPPPR. 0 1 7 10 +12342 PF12508 DUF3714 Protein of unknown function (DUF3714) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 200 amino acids in length. 20.90 20.90 23.00 22.50 18.40 17.60 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.85 0.71 -4.85 44 464 2009-09-22 14:34:16 2009-09-22 15:34:16 3 4 143 0 56 408 7 192.20 34 48.13 CHANGED cshshssptsppphssshtp.hs-..pshhssh.spspsttFpThstsp....ppNsIpAslcpspTl..ssGspl+lRLLEshtl.ssthlP+sThLhG.suphpGpRlplpIsSlphsGsIlPVcLsVYDs.DG.pGlaVPsShct-ssKEhuushssuh.....ssshshup.suusQlusshupuslQusSphluK+hRplKVpLKuGap ...........................................................t......ht..pcphlosLt.pshss...tthhtth.sptpshsF...pTssusst......pcNoIpAslcps....p.sl..p-GppV+LRLLEsh..pl....sshhI..P+sohlhG.su+lpG..pRlclpIsSlch.sGpIlPVcLulYDs.D...GQcGlalP..s....S.-hs.As+EluAshuuuh......soShshus..sAss....Qlus-hs+ulhQGsSphluKKhRplKVpLKuGYp.......................................... 0 18 48 56 +12343 PF12509 DUF3715 Protein of unknown function (DUF3715) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 170 amino acids in length. 20.90 20.90 25.40 22.00 19.20 19.20 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.94 0.71 -4.39 12 131 2009-09-22 15:00:10 2009-09-22 16:00:10 3 11 42 0 67 140 0 157.60 27 12.42 CHANGED lpNphLppcastpR..pph+pct+pscEL.-shsFLhhcs.tcstsl......s.........ppGLpss...puKsshLGcsphGVhls..RhuDlhpppshpsu...shs.lhlhKlh+G+hKpl.-s..........spstlsPsPsa-sHl......uc.shsplssphh...ta.pothYhhEh...shs.shhcPpphhP ..............ps.hLppphoppc..pphthstpsspEL.Es.s..hL..hcp.hpsppl...............t...............p+GLpht...ppKhs....hLspPp.shh.s..ph..uDlhpspshpsu..........s.splhh..c...s.hptch.cplt-s.............spsslsssPsa-phl......uc.stsplsshhs..hhsattsthhhhEh...uhs.sphcP+ph.P.................................. 0 13 22 31 +12344 PF12510 Smoothelin Smoothelin cytoskeleton protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00307. Smoothelin is a cytoskeletal protein specifically expressed in differentiated smooth muscle cells and has been shown to co-localize with smooth muscle alpha actin. 21.70 21.70 21.90 21.90 20.40 18.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.58 0.72 -4.11 16 284 2009-09-22 15:02:46 2009-09-22 16:02:46 3 6 61 0 105 292 0 50.10 44 9.26 CHANGED sptps.ppspsssp.hst-l-pIhDEplLc.pLLEssosaEERRhIRAtlRcl+ppc ...............p.....tt.ptt.s..upcl.sh.D.thLc..ph..L-pss-aEER+hIRAAlR-LRppc.. 0 16 23 53 +12345 PF12511 DUF3716 Protein of unknown function (DUF3716) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. 21.30 21.30 22.50 21.70 21.00 20.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.84 0.72 -4.36 50 170 2009-09-22 15:04:55 2009-09-22 16:04:55 3 7 56 0 133 167 0 57.30 30 11.75 CHANGED Ahhl..QspGppt..sps.CspCp.p...sp.....GP.F..psCV........hhsst.........htsuCuNChasspupp.CShpp .........AhhhpspGp.s...tpsCppCp..p....sp............GP..F.spCl.........lhsup...........htsuCuNChas....spust.Cohh.t........ 0 18 64 110 +12346 PF12512 DUF3717 Protein of unknown function (DUF3717) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 75 and 117 amino acids in length. There is a conserved AIN sequence motif. There are two completely conserved residues (L and Y) that may be functionally important. 25.00 25.00 33.00 32.80 21.80 19.30 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.28 0.72 -4.33 29 154 2009-09-22 15:21:54 2009-09-22 16:21:54 3 2 113 0 59 128 14 70.90 44 76.54 CHANGED M.ss..lpIs-lEuAINaWRs+pPS....s-thsLssEupALA-lYAlMIhp+pspls.ssLsststsAhtuahpsp ........ssIoIp-lEAAINaWRuRuPu...ssD-ltLCsEAsALAcsYALMIsp+psslsh-uLsscARsAapuah...p.. 0 3 17 40 +12347 PF12513 SUV3_C Mitochondrial degradasome RNA helicase subunit C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00271. The yeast mitochondrial degradosome (mtEXO) is an NTP-dependent exoribonuclease involved in mitochondrial RNA metabolism. mtEXO is made up of two subunits: an RNase (DSS1) and an RNA helicase (SUV3). These co-purify with mitochondrial ribosomes. 21.60 21.60 21.80 24.70 18.90 21.30 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.32 0.72 -4.52 83 441 2009-09-22 15:25:28 2009-09-22 16:25:28 3 7 383 2 272 424 7 48.20 30 6.89 CHANGED LppLEshachlslYhWLShRa.sshFs-tphspch+ptlpphIpptLpph ...LhcLEshachlsLYlWLShRF..sh..FsD.tphspch+ptlsphIpptLpp.h........ 0 83 158 225 +12348 PF12514 DUF3718 Protein of unknown function (DUF3718) Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is approximately 70 amino acids in length. There is a single completely conserved residue C that may be functionally important. 22.50 22.50 22.70 31.90 22.40 22.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.95 0.72 -3.92 41 115 2009-09-22 15:28:03 2009-09-22 16:28:03 3 1 56 0 58 113 27 68.90 32 56.60 CHANGED lCcsstosshhpL++phK..........cp+l..............ph+plastlsCNGpSlhpFAhsps..Ascsuphl..........htphshp-lussp ..lCchltusc+scLR+plK..........-pRl..............+h+plasulsCNGpSLlcaAhtss..Ascsupal..........spplshp-Lu........................... 0 7 19 40 +12349 PF12515 CaATP_NAI Ca2+-ATPase N terminal autoinhibitory domain Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF00689, Pfam:PF00122, Pfam:PF00702, Pfam:PF00690. There is a conserved RRFR sequence motif. There are two completely conserved residues (F and W) that may be functionally important. This family is the N terminal autoinhibitory domain of an endosomal Ca2+-ATPase. 21.70 21.70 28.50 27.80 19.90 18.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.31 0.72 -4.45 28 163 2009-09-22 15:29:38 2009-09-22 16:29:38 3 12 30 0 103 162 0 46.50 48 4.79 CHANGED hhpcsF.-ltu.........KN....uStEuLpRWRpuss..lVhNspRRFRhssDLsKcs-s ....hp-sF.-l.u..KN.............uStEsLcRWRpAs...lVhNspRRFRaTsDLcKcpEh...... 0 13 62 85 +12350 PF12516 DUF3719 Protein of unknown function (DUF3719) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved HLR sequence motif. There are two completely conserved residues (W and H) that may be functionally important. 20.60 20.60 20.70 20.60 19.30 17.70 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.21 0.72 -4.41 14 139 2009-09-22 15:30:52 2009-09-22 16:30:52 3 2 57 0 72 128 0 64.50 40 12.14 CHANGED hLaEG+sss..t..spsLppECppWss...+hPHLRlhGsQhhtPpccGapah.....susshp.pshhutssssss ............hLaEtKlus..p..TpuLppECppWss...paPHLR..lLG+QllhPtsEGapha..ussssSssp.cs..s.ttp...s...................... 0 16 24 39 +12351 PF12517 DUF3720 Protein of unknown function (DUF3720) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. There are two completely conserved A residues that may be functionally important. 25.00 25.00 25.00 31.40 22.80 24.70 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.39 0.72 -3.30 38 82 2009-09-22 15:31:52 2009-09-22 16:31:52 3 1 2 0 19 82 0 93.20 33 25.57 CHANGED KAVEAuEss..ELuussss.pc..p-sossstl.ssTpusuAPGsGG..tGVAGtsss......hP.ssPGGSsTtspptp..pslsspGscpss.cpspp-upsops..Q .....KAVEAu-uu....pLuussss.ps..pcp.sstpl.ssTpusuAsGs..GG..suVAGt...ssss.....hP.ssPGsSsTttpp.tc..pslsspsscpsstcpsppssptsts.............. 0 0 0 19 +12352 PF12518 DUF3721 Protein of unknown function Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 30 amino acids in length. There is a conserved WMPC sequence motif. There are two completely conserved residues (A and C) that may be functionally important. 21.30 21.30 22.80 27.80 18.50 20.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.45 0.72 -7.68 0.72 -4.35 41 73 2009-09-22 15:33:12 2009-09-22 16:33:12 3 4 30 0 26 80 320 33.70 46 32.13 CHANGED asTctEAEppApc.hGCpGuHpM..Gs.pWMPCss...atp .....asT+tEAEppAtc.hGCpGsHpM...Gs.pWMPCspHt...... 0 2 10 20 +12353 PF12519 DUF3722 Protein of unknown function (DUF3722) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 415 and 473 amino acids in length. 25.50 25.50 34.20 35.60 23.20 25.40 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.77 0.70 -5.17 31 140 2009-09-22 15:34:49 2009-09-22 16:34:49 3 2 135 0 105 160 0 252.20 37 58.90 CHANGED l+lplSshuTPphtooasLushsh..lsGSloYLaoossLpp..stsSpplsLpchlpsYRhlp..sshs.ppp..p..htsh.t.......................................oLLYGRhal.PsopLEAhhl+RlSPssQlhlpslSs.t..........................................................................sssuslhshlQ+DoG+aspEalaSTs-uLhGaRsLaNFGssssptt.....................s.hssp.....................plSsGuEhaauslspSsGhSTulRasThsssTsp................................PhTlTLohNPlhGplSoT.Yol+sSsshshsSRaDFN ........................................l+lplSshuTPphsoSa....sLushsh......lsGSlSYLaSohsLpp......stpSsplsLpchlpuYR.lp..ssht.t...tt...t...t......ts...s.tt........................................................toLLYGRhal.PsopL-Ahhl+RloPshQl.ltslSs.t..........................................................................tstuslhs.lQpDs..G+aspEhlaSTssuLhGaRsLaNFGssspp.ss.....................................p.p..............hs.....hlSsGuEhYYuslspSsGhSTGlRFsTlPsssup...................................................PhThTLolNPLhGplSoT.YolpsusshuhsoRa-FN......................... 0 27 56 88 +12354 PF12520 DUF3723 Protein of unknown function (DUF3723) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 374 and 1069 amino acids in length. There is a conserved LGF sequence motif. 21.30 21.30 21.60 21.30 21.00 21.20 hmmbuild -o /dev/null HMM SEED 511 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.35 0.70 -6.01 20 202 2009-09-22 15:35:55 2009-09-22 16:35:55 3 6 44 0 161 209 1 265.00 18 50.16 CHANGED C+RL-spN+lsAllScpsLstALppuslotssLh..sspp.PpLpFstG.plpCLHG+HRlpAup-h....Lssu.D+WWsVDLYhD.................slup-L+suLhEEYuNp+psoDGEIYR+IRpYp..........p-sNtthppRWhuRL.osppt+hhp...................h.thht.uhulspLhchhuLhss.......................EllpYLs.pltchWu.llss-.st.....ht+lDscTVctLEhhuPthSctDuptlpslltuGplFssFsp.sERpslhccLp...shcGlIPSLaTFFcDhcYLEsCAcsl++Lls.s..p.olppshpthas.tssss..pptllQooEsshpp...ppsusspph-luY+QlWLaAMRaasphspsst....pcshhs+sspppuDppsla-hAsLAp+LGFpSspIcsLhspuscRp.hApthLhpARps-tYpYsssph-slVp+IV-shssA.h.ppht.sschhsspssph+s.....RCGhPptpupcpD+chLFlDclautpssssshl.TohhVRRslaFAFFG .................................................................................................................................................................................................................................................................................................................................................................................h................hs..sl..lp.............h.....h.tt..hh........h..h...h.....h.......l.oh..h.tsh.hht.....ht.h..................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 12 47 136 +12355 PF12521 DUF3724 Protein of unknown function (DUF3724) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF00073. There is a single completely conserved residue Y that may be functionally important. 22.00 22.00 22.50 22.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.66 0.73 -7.06 0.73 -3.91 5 178 2009-09-22 15:37:19 2009-09-22 16:37:19 3 11 6 0 0 111 0 22.90 88 6.99 CHANGED RupGcVVcDYSRYosAosssst- .RSQAYMVKNYPTYSQTITNTATD.. 0 0 0 0 +12356 PF12522 UL73_N Cytomegalovirus glycoprotein N terminal Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 30 amino acids in length. The family is found in association with Pfam:PF03554. This family is an envelope glycoprotein of human cytomegalovirus (HCMV). 20.30 20.30 22.80 22.80 19.10 19.10 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.95 0.72 -6.99 0.72 -4.40 5 115 2009-09-22 15:38:45 2009-09-22 16:38:45 3 1 5 0 0 82 0 25.90 67 19.32 CHANGED LGLLVLSVAAGSSGNNSSTSTSATTsS .LGLLVLSVsAuS..NNoSTuoososSS. 0 0 0 0 +12357 PF12523 DUF3725 Protein of unknown function (DUF3725) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF01577. There is a conserved FLE sequence motif. 20.80 20.80 21.10 112.00 20.60 17.50 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.48 0.72 -3.89 4 55 2009-09-22 15:39:49 2009-09-22 16:39:49 3 5 5 0 0 51 0 73.90 79 5.45 CHANGED ahoEFclScGAKILQLV.IGsAElGRsFLEGs+hlRAsIFEIl+KTMVG+LGYDF-sELWhCHsCspTS-KYFK YsSEFcISKGAKILQLV.IGNAEVGRTFLEGNRFlRANIFEIIRKTMVGRLGYDFESELWhCHNCscTSEKYFK 0 0 0 0 +12358 PF12524 GlyL_C dsDNA virus glycoprotein L C terminal Gavin OL lg7 Prosite Family This domain family is found in viruses, and is typically between 55 and 80 amino acids in length. The family is found in association with Pfam:PF05259. This family is the C terminal of glycoprotein L from various types of double stranded DNA viruses (dsDNA). 20.10 20.10 28.70 26.80 18.60 17.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.08 0.72 -4.06 10 48 2009-09-22 15:41:28 2009-09-22 16:41:28 3 1 20 1 0 45 0 70.40 44 32.42 CHANGED uuStsPVssGCVNh-hS+sRpChGpsshshFscsphhpP.susDD...............hs.uRtssc+s+ssRupss+ ..........AuSpsPVtuGCVNh-YSRsRpChGppcLGhhNtspsppPshssDDEAu.Qs.s.ss.ss.hA.ScssP+RssAs+u+pp+.................................. 0 0 0 0 +12359 PF12525 DUF3726 Protein of unknown function (DUF3726) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 80 amino acids in length. There is a single completely conserved residue E that may be functionally important. 22.10 22.10 22.10 22.40 21.70 20.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.42 0.72 -4.18 34 82 2009-09-22 15:42:57 2009-09-22 16:42:57 3 1 79 0 18 89 671 78.10 31 33.98 CHANGED hhShNEltuhspKAhcGhGhshG.A--sAphsthLphhG....LsGsptLsssLphhsspssssl.s.h................hpsss.hhsstpshss .....hShNEl.AhspKAhhGhthshGpA-thAphls.LphhG....LsGlpphspshphhshpsspsl.s...................tsss.h.hsh+t.S................................. 0 5 9 15 +12360 PF12526 DUF3729 Protein of unknown function (DUF3729) Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are typically between 145 and 1707 amino acids in length. The family is found in association with Pfam:PF01443, Pfam:PF01661, Pfam:PF05417, Pfam:PF01660, Pfam:PF00978. There is a single completely conserved residue L that may be functionally important. 25.00 25.00 43.00 25.30 22.40 24.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.55 0.71 -2.72 39 273 2009-09-22 15:48:27 2009-09-22 16:48:27 3 10 14 0 0 277 0 99.30 46 9.34 CHANGED HhW..........ESANPFCGESTLYTRTWSsSGFSSsFSP.Essssssssssshsps....sPP.ssslhs.PPssEpssssss.ssssspP..ss.............ssuss..sPss...PsppsspPs.......u.pRRLLaTYP ...................HhWESANPFCGEuTLYTRTWSsSGFSSsFSPPEsuhsssssssthsps....sPs.ssshhs.PPssEpstsssssssssscP..ss.............ssuss......sP.s....PspcsssPs.......uppRRLLaTYP....................... 0 0 0 0 +12361 PF12527 DUF3727 Protein of unknown function (DUF3727) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 100 amino acids in length. 21.50 21.50 21.90 21.90 20.10 20.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.39 0.72 -3.91 34 108 2009-09-22 15:50:23 2009-09-22 16:50:23 3 2 94 0 49 109 114 97.90 35 44.90 CHANGED pcLpLh+oAh.TLTVsGELs.h----l.ph-.-c-.....tc.-sEp..hphLso.Fhh--pEYulYsPLDPhhhlA+..hss.spspLlss-Ehc....clpPll...EppLh-ch ...........................cLpLhcoAh.sLTVpGELs.hp---l.phc.-s-t....sc.-sEplElLss.Fh.h-spcYulYTPLDPlLhlA+..hsp.sps.Ll.ss-Ehp....plpsll...EppLhpt.h....................................... 0 13 33 44 +12362 PF12528 DUF3728 Prepilin peptidase dependent protein C (DUF3728) Gavin OL lg7 Prosite Domain This family of proteins is found in bacteria. Proteins in this family are typically between 106 and 121 amino acids in length. The family is found in association with Pfam:PF07963. There are two completely conserved C residues that may be functionally important. This family is frequently annotated as prepilin peptidase dependent protein C. 22.50 22.50 31.20 30.60 20.20 16.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.97 0.72 -3.59 24 542 2009-09-22 15:52:35 2009-09-22 16:52:35 3 2 530 0 39 167 1 74.80 63 69.32 CHANGED YpQsLhpuFsphaQ.RQsW+hhaQph-hhs.s...............sssWQhpphpsp......pusCsploVplhoPtsppuploRhaCssp ..aQRsLMsShASRsQYpQLWRauWQQTQLpuhS....................PPssWQVNRMQTS......QAGCVSISVTLVSPuGRpGEMTRLHCPNR.. 0 2 8 23 +12363 PF12529 Xylo_C Xylosyltransferase C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 169 and 183 amino acids in length. The family is found in association with Pfam:PF02485. There is a single completely conserved residue G that may be functionally important. Xylosyltransferases are enzymes involved in the biosynthesis of the glycosaminoglycan linker region in proteoglycans. 25.00 25.00 25.60 25.10 23.50 16.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.04 0.71 -4.90 24 190 2009-09-22 15:54:46 2009-09-22 16:54:46 3 6 82 0 97 161 0 172.10 45 21.16 CHANGED -paLaGpYssssPsL+uYWpNlYcp..D.shsuhsDstlohhpShhRluhpp..t......tptts+hcsht...shplphYhpcDpFpG....aLV+acu............t..sstpt.pLEsaltPpsshplspts..ptupRLpsl..-..VGT-aD.KEplhRNaGtllGPpsEslhht+Wutut...........shssTllWIDPhthlAss ...............................................D.aLY.GsYPsGTP.uL+uYWENlY-p.sD.GhsuLSDlhLThYpuFuRLuL++stssht...sptpC..RatPhG..hPsSVHLYFhsD+FQ.G....aLl+pps.............ps..AsuphEoLEsWlhPppshKlssss...sphsRLQph...E..VGT-WDsKERlFRNFGGLLGPhDEPVuhQ+Wu+Gs...........NlTsTVlWIDPssVlAs....................... 0 20 26 57 +12364 PF12530 DUF3730 Protein of unknown function (DUF3730) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 220 and 262 amino acids in length. 25.00 25.00 29.80 25.10 24.80 23.80 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.49 0.70 -4.44 25 180 2009-09-22 15:55:45 2009-09-22 16:55:45 3 7 96 0 122 185 0 222.30 20 24.00 CHANGED hlhhLhht.......htcsssschhhphLpsLssLuspp............hth.hllpslssLspssstph..................................hthcLlsplatpss+ta.....shLpthLt.......tsspssppphchtluhssoh+slCptpPp............+us-llthlstsL.........pppssl.....................................spuhul-ulpslC............pscll-hhssWplltpcl......s.p.cP.lhp...........................phh.ll..hthsstphsp........hpppslpllWchhstpc.......spsphhshpuL ...................h....hhht........tc.ppsph.hthLphLsphsspc.......................................sshshhlpslthLsspsphph........................................hh.pLlstlhptpspha.....shLpth...hth...p..p..t.s.p.....p.p...........ph....cphhuht.ssh+slCptpsp............pus-hlttlothl.........ppssts.....................................................................spuhslphlpsLh............pupllshtstW.psLtt.pl.......t.t.ps.h..............................phh...Lhsththsp.tphpt...............hp.pllphhWphstp.p.......h.s..hh..................................................................................................................... 0 30 47 81 +12365 PF12531 DUF3731 DNA-K related protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 250 amino acids in length. There are two conserved sequence motifs: RPG and WRR. The proteins in this family are frequently annotated as DNA-K related proteins however there is little accompanying literature to confirm this. 25.00 25.00 145.50 144.60 24.00 22.70 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.65 0.70 -4.85 38 220 2009-09-22 15:58:22 2009-09-22 16:58:22 3 2 219 0 66 205 15 247.40 51 26.58 CHANGED SAcHERhWLNLAGaCLRPGFGsslDsWRlpQlWslaspGlQasp-sQsWs-WWshWRRlAGGLspspQtplhc....slA.hLpss.stpptph...ts...pttuhp-MlRLuAuLERlssppKhcLupalLpR...Lp+ss...............tstptW.WALGRlGARpPhYGSsHpVlPscpsppWlstL...LphD.W+c.t......suFAssplARhTGDRsRDLs-shRppVlc+LcsspAspsWlphVpEl.spL-ps-ppRlFGEuLPsGLpLl ............StsHE+sWLpLAGaCLRPGFGcshDuWRIEQlWsLYpQGIQas.ssQsWo-WWlhWRRlAGGLsp-QQpplLs....-IApaLpPuuh+...sstts....pcu...pctGY-sMVRLuASLE+LtVEcKs.LupWhLs+...Lp+sp................ppspW.WALGRLuARsPhYGStHsllPtEpsppWLspL...LcpD......Wpcps.....hsAFAsV.hsRhTGDRsRDls--hRppVlc+L+po+ss-uWlshVpEV..l-L-cu-opRsFGEoLPsGLpL......... 0 20 31 53 +12366 PF12532 DUF3732 Protein of unknown function (DUF3732) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is typically between 180 and 198 amino acids in length. There is a conserved DQP sequence motif. 25.00 25.00 35.00 34.80 24.80 22.40 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.24 0.71 -4.78 50 147 2009-09-22 15:59:45 2009-09-22 16:59:45 3 5 141 0 42 122 9 187.10 30 30.98 CHANGED pplppLcppls.tpshpp+h..psshsplsph.hschupplchEps...stslchshcphsl......hhsspscphhL......pchGSGuNWluhHlshaLALHcaFhpp.....ppslP..shLhlDQPSQVYFPs........tpshstppl.........................................pDpDhtuVpchFphLsphspchpt......phQlIVh-HAs.pt....p.htsshh.ppWR..pscuLI .................t.plptLctpls....pc...hpc+h...ctthsplsph.hschhtpLchEps...ssslphshc.phsl......hhspsscphhL......hclGSGuNWLuhHlulhLALHpaFspht....sssVP..saLlhDQPSQVYFPs...tttts.cp.pclt........................................pD-DlhsVcclFphlsphhpctpp......shQIIVh-HAspp.htth.....pshh.pcWR..psptLI.............. 0 16 23 32 +12367 PF12533 Neuro_bHLH Neuronal helix-loop-helix transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. The family is found C-terminal to Pfam:PF00010. There is a single completely conserved residue W that may be functionally important. Neuronal basic helix-loop-helix (bHLH) transcription factors such as neuroD and neurogenin have been shown to play important roles in neuronal development. 25.00 25.00 26.20 25.20 22.40 22.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.83 0.71 -3.30 21 229 2009-09-22 16:01:00 2009-09-22 17:01:00 3 3 65 0 114 175 0 119.40 49 36.79 CHANGED G+pPDhluFVQsLCKGLSQPTTNLVAGCLQLNsRshLs-p.tsctusp...ssusasspsasY......psP....tLsoPs....tsoh.....susH.h+.....stsYtush-shat.....ssss-tsoPpa-G..sLoPP.lslsGsFS .....GKpPDLloFVQsLCKGLSQPTTNLVAGCL......QLNsR.sFLh-Q.st-tss+..........ssusassHs.asY.......pSP....tLsoPP........aGsh.....ssoHsh+.....sasYs.u.u.hEshat.........usss-ssSPpa-G..PLSPP..lslNGNFS................................................ 0 12 22 56 +12368 PF12534 DUF3733 Leucine-rich repeat containing protein 8 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00560. There are two completely conserved residues (W and Y) that may be functionally important. Many of the proteins in this family are annotated as leucine-rich repeat containing protein 8 however there is little accompanying literature to back this up. 25.00 25.00 50.80 35.60 24.90 24.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.23 0.72 -4.68 22 480 2009-09-22 16:01:50 2009-09-22 17:01:50 3 39 44 0 283 366 0 62.00 36 15.95 CHANGED Mhsls-hppas.tpQssY+lLKPWWDVFhsYLsllMlhlulhuushth.t.phh.ChPs.pspthppp ...........hssh-hpQas..sQssY+lhhPWashahsYLsllhhhIhlhuush.h.......phPs.psuphpp........ 0 16 46 119 +12369 PF12535 Nudix_N Hydrolase of X-linked nucleoside diphosphate N terminal Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 847 and 5344 amino acids in length. These enzymes hydrolyse the molecular motif of a nucleoside diphosphate linked to some other moiety, X. 20.60 20.60 20.60 28.30 20.30 20.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.86 0.72 -4.51 85 803 2009-09-23 12:17:57 2009-09-23 13:17:57 3 4 787 4 107 492 28 56.60 37 27.66 CHANGED ppphLcaAtclQulAQuGLsYu+DsaDhERY-clRcIus-hhuptoslshcplcsLFs ....t..calcahpcl.ulupsGLsYo....K.DsFDpERYccLRplss-Mluphoch.sh-pltplh............... 1 39 72 90 +12370 PF12536 DUF3734 Patatin phospholipase Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF01734. There are two completely conserved residues (F and G) that may be functionally important. The proteins in this family are frequently annotated as patatin family phospholipases however there is little accompanying literature to confirm this. 22.00 22.00 22.00 24.10 21.60 21.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.29 0.72 -3.93 64 300 2009-09-23 13:23:11 2009-09-23 14:23:11 3 7 189 0 108 296 13 105.30 29 24.71 CHANGED IpYSSRTRhpo-thcphpphRcslpcLhp+lPtph+s.-Pthptltphus..ssthsllHLIYpppshEsps.KDa-FSptohc-+WpuGhcDscpsLp..c.pWhptsssspG ...........................IpYSSRTRhsT-hhpphpchRpslccllp+lPt..p.+p..c.........s.....h..pthtphus..ssphsllHLIY..ppcshEspt..KDY-Fotsohp-+WpuGhcDhcpsLs..p.phhthsp.................. 0 18 49 76 +12371 PF12537 DUF3735 Protein of unknown function (DUF3735) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved LSG sequence motif. There is a single completely conserved residue G that may be functionally important. 22.10 22.10 23.10 27.40 21.90 21.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.06 0.72 -3.93 46 266 2009-09-23 13:55:43 2009-09-23 14:55:43 3 8 220 0 188 278 2 71.80 38 14.39 CHANGED tshhhhcphls+lullGlTlhAlLSGauuVssPYphhs.........hhhc.ss............ct..clpshpppltpotshltpK+p..clp .........shhshcphloRlGVIGVTlMAlLSGFGAVssPYshhs.........hFhRsls............-s..DItshE+pLhpTh-hlhsKK++l......................................... 0 64 103 153 +12372 PF12538 FtsK_SpoIIIE_N DNA transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 107 and 121 amino acids in length. The family is found in association with Pfam:PF01580. The FtsK/SpoIIIE family of DNA transporters are responsible for translocating missegregated chromosomes after the completion of cell division. 21.50 21.50 21.70 22.30 21.00 21.00 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.19 0.71 -3.97 18 321 2009-09-23 14:46:06 2009-09-23 15:46:06 3 4 319 1 22 218 0 107.70 40 7.71 CHANGED hLhlhYppplp+hcLss...pcsloIGsshcsslTlssL...cpsIpLchcps.......hhphptpsLthscshplshttp.hhhhahspsppsplYsluscpplsluspct..sDIslpssps ........hLIIpYscpL+hhsLcs...sKohTIuc--+ADITLpSL...uEsIcLcpNN........pGshQsscsslNK.slshcs..sh-shpL.LYopsshsuhhasuhp-ohTIGsNsY..DDhsIpuh.hs..... 0 5 11 16 +12373 PF12539 Csm1 Chromosome segregation protein Csm1/Pcs1 Mistry J, Wood V jm14 Manual Family Saccharomyces cerevisiae Csm1 is part of the monopolin complex. Csm1 forms a complex with Mde4 and promotes monoorientation during meiosis [1]. Csm1 also plays a mitotic role in DNA replication [1]. This family also contains the Schizosaccharomyces pombe homologue to Csm1, Pcs1. Pcs1 forms a complex with Mde4 and acts in the central kinetochore domain to clamp microtubule binding sites together [3]. The two complexes (Csm1/Lrs4 and Pcs1/Mde4) contribute to the prevention of merotelic attachment [3]. 22.60 22.60 22.70 23.10 22.40 18.30 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.09 0.72 -3.50 29 121 2009-09-23 14:54:00 2009-09-23 15:54:00 3 4 118 16 91 114 0 91.60 39 22.85 CHANGED hplpcDLap...pLTGlplpssccc..-sshhFDshQoupN..G.......slca+Lsls+sts............tp...........sEhpYhPhL.......cppp-c...............pLhchLP-YLp-sloFPhpp ....................u.plpcDLYpcLTGLhl+ssccc..-sshlaDClQTGpN..G......................sl+aKLulspsps............ps................sEhpYhPhLcpsc-c......................pLhclLP-YLs-sloFPhp................. 0 17 45 77 +12374 PF12540 DUF3736 Protein of unknown function (DUF3736) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 135 and 160 amino acids in length. 22.60 22.60 27.30 23.30 21.40 19.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.72 0.71 -3.95 10 166 2009-09-23 15:32:06 2009-09-23 16:32:06 3 3 65 0 89 153 0 140.60 30 24.34 CHANGED psccshuSclshEup.........+lppPtpcauusspss+cSs...s.lhVssss+uVssoPPlsp.sss+plphlhcsstps.ppscph.cup.hhRcRhl..........scss.sp+.......psttss.........sPu...hspshshthoPs..shsQ-schQs.p.....lsLs+ ...........................................t..p.hsSclshEpp.........+hppstEc..u.h.s.ccss...s..tV.sth+sVs-pPPlsp.sso...cplphlhh.t..p.ppt-ph.pspptt.RcRhl............................csP.stp...........htst.s.................sPu.....t.sh..shthoPs..phspssphp........hsL........................................................................................................................................... 0 12 22 50 +12375 PF12541 DUF3737 Protein of unknown function (DUF3737) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 281 and 297 amino acids in length. 22.10 22.10 22.60 32.90 21.20 22.00 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.00 0.70 -5.42 36 223 2012-10-02 14:50:22 2009-09-23 16:35:19 3 2 209 0 39 195 4 258.30 45 96.08 CHANGED MppIcpphasGERALFss+shplpsshFs...DGESsLKEo+NIplpsshFchKYPhWascslplcssphp-huRAulWYopslphpsoplpAPKtFRcspslpLcssshssAtEThWsCcslplcssps.pGDYFhMsSpNlhl-slpLsGNYuFphs+NlEl+NuclhSKDAFWNsENVTVhDShIsGEYLGWpS+NlThlNCpIpusQsLCYh-sLslcNCchl.sTcLAFEY.SsV-AsIsusIsSVKNPhSGpIpAcpIGclIhDcsplss.spspIhsp ....Mp.IcpphasGERsLFstcDhplpsssFs...cGESsLKEspNIphpsshFchKYPhWascshplcsshFpphuRuul.WYopslphpsohlpAPKhFRcspslpL-NVphssApEThWpCcslplcNlph.pG-YhhMpSpNIhlDslp.sGpYsFphs+NVEl+NuplhoKDAFWpsENVTlYDS.lsGEYLuWpS+NlphlNCpIpupQsLCYhcsLshcNCphh.ssDLAFEY.SslpAsIpusIpSlKNPhSGpIpAcplGplIhDcsthss.spspl..t............. 0 12 23 31 +12376 PF12542 CWC25 Pre-mRNA splicing factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF10197. There is a single completely conserved residue Y that may be functionally important. Cwc25 has been identified to associate with pre-mRNA splicing factor Cef1/Ntc85, a component of the Prp19-associated complex (NTC) involved in spliceosome activation. Cwc25 is neither tightly associated with NTC nor required for spliceosome activation, but is required for the first catalytic reaction. 26.10 26.10 27.20 27.20 26.00 26.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.02 0.72 -10.59 0.72 -3.54 62 293 2009-09-23 15:39:42 2009-09-23 16:39:42 3 5 251 0 220 284 0 95.10 32 23.98 CHANGED c..cctp+l-........WMYpss.................ttttpppE-YLLGK......pclD.phlppppttptpthsptpt..............................sstp-thsKl.p-DPLhtI..KppEppthpthhpss ..........................p+pc+L-........WMYpuP...................ssttppE-Y..LLG+.....plDph.lt....ppp...pp..ctptsuspssh...h..ss....................................sottDhtsKl..+EDPLhhI..++pEppthcthhpsP................. 0 73 122 182 +12377 PF12543 DUF3738 Protein of unknown function (DUF3738) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 251 and 457 amino acids in length. 21.90 21.90 23.40 24.90 21.50 20.70 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.18 0.71 -4.73 49 107 2009-09-23 15:51:44 2009-09-23 16:51:44 3 9 14 0 98 138 12 196.00 27 60.42 CHANGED LpsllthAYslp....shQ.lsGPsWl........ssc+..aDlsA........Kh.....Psuss........pc......ph.tMLQsLLs-RFpLshH+Eo+chslYsLsluKs....GsKl+tsssssssstss.............................................tttphpspphshstLAc....hL.....u..phhs....pPV.lDpTGLsGpYDhsLpas.pt........................................................sssssslhs.AlpcQL...GLKL.cscKtPl-slVlD+sc .............................................ttllthAYslp......s.ph....h.suPsWh........ssp+..aDl...A...........+s...........ss.sss..............pp................phptMLQsLLs-RF.pLphHpEs+chssYsLsluKs....GsKLctsps.sssss.s.s..............................................tstphpspshshsplup............hL.........s.....thls...cPV.lDpTGLsGpYDhsLpassps........................................................sssssslhs.AlpcQL...GLKL.cspKssl.-slVlD+h.................................. 1 74 98 98 +12378 PF12544 LAM_C Lysine-2,3-aminomutase Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is typically between 111 and 127 amino acids in length. The family is found in association with Pfam:PF04055. LAM catalyses the interconversion of L-alpha-lysine and L-beta-lysine, which proceeds by migration of the amino group from C2 to C3 concomitant with cross-migration of the 3-pro-R hydrogen of L-alpha-lysine to the 2-pro-R position of L-beta-lysine. 26.70 26.70 26.70 26.90 26.50 26.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.52 0.71 -4.09 14 526 2009-09-23 16:00:04 2009-09-23 17:00:04 3 8 452 4 212 447 50 90.00 46 21.48 CHANGED IEuLRGHTSGYAVPTFVVDAPGGGGKIslpPNYlISQSs-KVVLRNFEGVIToYPEP-sYhs.....tps-shFtph....tpcpp.hGluuLhs-..c.phuLsPcsLpRh-RRcth.tp.ttpohKcpRc++- ......hEuLRGHTSGhAVPTaVlDAPGGGGKlPlh.P.s.Y.l...lS..p.....u...ss+.l...lLRNaEG.hlssYs-..P..t................................................................................................................................ 0 97 161 189 +12379 PF12545 DUF3739 Filamentous haemagglutinin family outer membrane protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF05860. 20.50 20.50 21.20 48.10 18.30 19.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.48 0.71 -3.95 24 96 2009-09-24 09:08:12 2009-09-24 10:08:12 3 4 63 0 46 104 5 111.90 55 3.05 CHANGED GGDIhhWSupGDIsAG+GuKTslsssPsph..l.......hDsp.Gssp...ls.sussoGuGIusLpshss.......s.tsGsVsLlAPpGslDAG-AGIRsuGNlsluAhpllNAsNIp.VsGsssGlPss .GGDIhsWSupGDINAGRGSKTollhsPP.+t..l.......YDsh..GNVo....LsspsPuoGAGIATLsslP-........l..ssGDlDLIAPhGTIDAGEAGIRVSGNlNlAALpVlNAuNIQ..l.pGpooGlPs....... 1 3 17 26 +12380 PF12546 Cryptochrome_C Blue/Ultraviolet sensing protein C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 113 and 125 amino acids in length. The family is found in association with Pfam:PF03441, Pfam:PF00875. Cryptochromes are blue/ultraviolet-A light sensing photoreceptors involved in regulating various growth and developmental responses in plants. 22.80 22.80 23.20 23.10 19.80 22.70 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.09 0.71 -3.42 20 114 2009-09-24 09:12:39 2009-09-24 10:12:39 3 3 39 0 22 110 0 116.80 39 20.11 CHANGED sPIuFPp-lp.MEhc+p..PlR.ss.........sshs..RRhcDQMVPoM.T......SSlhRs.p-pEhSu-h..pssu..tD.oRAEVPsph..h...........tspsppcphhspssspss.............hppppsh.p..pphhttt.htsSTuESSSShp ....sPIsFPp-lp.ME.scp..PsRsss.........hsss..RRhcDQMVPSh.T.......SSllR...s...p-pEhSsDh..pN.s...s-SRAEVPpsh..h...........psptppc-slspshspss..............ppppsh..t.phppphtttshtsSTuESSSShp.... 0 3 10 16 +12381 PF12547 ATXN-1_C Capicua transcriptional repressor modulator Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 49 and 781 amino acids in length. There is a conserved IQT sequence motif. ATXN1 directly binds Capicua and modulates Capicua repressor activity in Drosophila and mammalian cells. The polyglutamine expanded mutant type of ATXN-1 does not bind Capicua with as high affinity as wild-type ATXN-1. It is associated with spinocerebellar ataxia type 1 (SCA1). 25.00 25.00 25.80 27.40 22.90 23.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.50 0.72 -4.00 5 77 2009-09-24 09:15:23 2009-09-24 10:15:23 3 2 60 0 27 69 0 39.00 78 8.02 CHANGED GLpLGKPVSRSsShosppS.th.....p..p.tu.VsolSPHTVIQTThsuoEuLP ..GLHLGKPGHRSYA...........................LSPHTVIQTTHSASEPLP..... 0 1 4 9 +12382 PF12548 DUF3740 Sulfatase protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 144 and 173 amino acids in length. The family is found in association with Pfam:PF00884. 25.00 25.00 30.50 30.50 21.00 19.40 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.89 0.71 -3.92 21 188 2009-09-24 09:16:27 2009-09-24 10:16:27 3 4 75 0 95 159 1 142.50 43 16.51 CHANGED hptpas+pRppRul.hch....-splhclshE....cc.hph.pshslhccH..............pccsc+-spp.ssss........sssssshhss.pplpsTh............................................RCalhsN.DpVpCssslYcstcuWKcp+spIDt.IcsLpsKIpsL+El+tpLKcp+Pppssss+t ....................................................................................................+spas+sRptR.S.lu..lEh....-GplYclsL-...ptt.....psRslsKRH..............tcpscp.ch...t..s...sGss.............hhsss.sthussssl+VTH...............................................................................+CaIL.N.DTVpC-p-LYpShpAWKDHKhaID+EIEsLQsKIKNLREVRGHLK++RPEECsCsK......... 0 12 19 56 +12383 PF12549 TOH_N Tyrosine hydroxylase N terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. There is a single completely conserved residue G that may be functionally important. Tyrosine hydroxylase converts L-tyrosine to L-DOPA in the catecholamine synthesis pathway. 21.10 21.10 23.20 21.20 20.70 20.80 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.35 0.73 -6.60 0.73 -4.23 7 141 2009-09-24 09:17:42 2009-09-24 10:17:42 3 8 42 0 40 141 0 23.40 47 13.09 CHANGED PTPshuoPphpGhRpAVSEhDsKQA ................shsSPphhGhRpul.E.stKpt 0 3 5 14 +12384 PF12550 GCR1_C Transcriptional activator of glycolytic enzymes Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 80 amino acids in length. This family is activates the transcription of glycolytic enzymes. 21.70 21.70 23.30 27.10 21.30 21.40 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.81 0.72 -4.11 69 275 2009-09-24 09:19:23 2009-09-24 10:19:23 3 11 71 0 192 269 0 79.80 27 13.55 CHANGED paphs+shpoVt-lacEahpG..htGpPulpph......-pcaG.spWR....ssppp..phap+RKhlhchIpp............shp.cshstppslchlEphR .......achs+s.poVh-lacEahtG.......ltGpP..olcpl......-cpYG.spWRt...sspcp......+has+.....R+hlhchIpp.................s.p.pshshppslphlEphR....................................................... 0 38 111 186 +12385 PF12551 PHBC_N Poly-beta-hydroxybutyrate polymerase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 50 amino acids in length. The family is found in association with Pfam:PF07167, Pfam:PF00561. There is a single completely conserved residue W that may be functionally important. PHBC is the third enzyme of the poly-beta-hydroxybutyrate biosynthetic pathway. 25.00 25.00 31.30 31.60 21.60 21.30 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.24 0.72 -4.43 58 186 2009-09-24 09:22:42 2009-09-24 10:22:42 3 3 155 0 73 185 19 45.40 41 7.67 CHANGED ssssshptlD+th+Asl.A+hTuGlSPsulthAahDWuhHLAsSPGK ......s...s.hphlD+thcAsl.A+hTuGLSPAuLshAahDWhhHLAsuPGK........... 0 14 40 58 +12386 PF12552 DUF3741 Protein of unknown function (DUF3741) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. 21.80 21.80 22.30 35.20 21.20 20.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.17 0.72 -4.24 30 140 2009-09-24 09:27:59 2009-09-24 10:27:59 3 5 19 0 93 127 0 45.00 40 5.10 CHANGED lppKhh-u..K+Lus...DcphppSKEFh-AL-lLsSNK-LFlKhLQ-Pso ................lppKhh-u..KcLuo...cpphppSKEFt-AL-lLsSN+-LFLKhLQ-Pss... 1 11 52 76 +12387 PF12553 DUF3742 Protein of unknown function (DUF3742) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. There is a single completely conserved residue Y that may be functionally important. 20.90 20.90 21.10 21.00 20.10 19.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.06 0.72 -3.51 20 78 2009-09-24 09:30:26 2009-09-24 10:30:26 3 1 60 0 28 76 3 51.80 29 43.65 CHANGED LALL.LsFsllAAWhspssss....................DpsEtt...sEaRpG.uGaGLYcss.hRlD .........................................hALl.lshlslAuWhspsss..............................c.s-...........phRpG.sGaGhYsss.hRlp......................... 0 5 19 26 +12388 PF12554 MOZART1 DUF3743; Mitotic-spindle organizing gamma-tubulin ring associated Gavin OL lg7 Prosite Family The name MOZART is derived from letters of 'mitotic-spindle organizing proteins associated with a ring of gamma-tubulin'. This family operates as part of the gamma-tubulin ring complex, gamma-TuRC, one of the complexes necessary for chromosome segregation. This complex is located at centrosomes and mediates the formation of bipolar spindles in mitosis; it consists of six subunits. However, unlike the other four known subunits, this family does not carry the conserved 'Spc97-Spc98' GCP domain, so the TUBCGP nomenclature cannot be used for it. MOZART1 is required for gamma-TuRC recruitment to centrosomes [1]. 19.90 19.30 21.30 24.10 19.20 18.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.04 0.72 -4.50 48 221 2009-09-24 09:35:15 2009-09-24 10:35:15 3 7 188 0 167 215 0 47.20 48 44.27 CHANGED upcsl-llaclSsLLNT.GLD+csLslslpLlEsG.VsPcALApllpcl+ ......c-shDlLaEISplL....NT.uLD+cTLSlClpLhEpG.lNPE.ALA.....sVl+ELR..... 0 49 82 127 +12389 PF12555 TPPK_C Thiamine pyrophosphokinase C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 50 amino acids in length. The proteins in this family catalyses the pyrophosphorylation of thiamine in yeast and synthesizes thiamine pyrophosphate (TPP), a thiamine coenzyme. 22.70 22.70 24.30 26.70 22.30 22.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.42 0.72 -4.24 50 284 2009-09-24 09:39:24 2009-09-24 10:39:24 3 3 282 0 98 221 0 53.40 32 13.78 CHANGED +uVupLY+sRlSst.tlhhllLAuLlAlssslhlossu..pshhphlssphpshht ...KuVupLYRs+lSsu.ulhhLlLusLlAlhlslhlosss..tshlphlhspWsphh...... 0 44 78 92 +12390 PF12556 CobS_N Cobaltochelatase CobS subunit N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF07728. There are two completely conserved residues (P and F) that may be functionally important. This family is the N terminal of the CobS subunit of cobaltochelatase. Cobaltochelatase belongs to the AAA+ superfamily of proteins. CobS and CobT form a chaperone like complex. 21.50 21.50 21.70 23.20 21.30 21.00 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.10 0.72 -7.70 0.72 -4.52 79 346 2009-09-24 09:46:14 2009-09-24 10:46:14 3 3 337 0 109 304 959 35.70 48 11.07 CHANGED spshhstPDpplSVR-lFGIDoDhpVPAFoc.ts-+V .......shsshPDppVSVR-VFGIDoDhpVPAFop.sss+V.... 0 28 67 82 +12391 PF12557 Co_AT_N Cob(I)alamin adenosyltransferase N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 20 amino acids in length. The family is found in association with Pfam:PF02572. Cob(I)alamin adenosyltransferase adenosylates Co(I) in an ATP-dependent manner in the conversion of aquacobalamin to its coenzyme form. This is the third step in this process, after two steps involved in the reduction of Co(III) to Co(I). 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.64 0.72 -6.66 0.72 -3.98 89 1083 2009-09-24 09:49:35 2009-09-24 10:49:35 3 3 996 1 172 551 72 22.10 48 11.01 CHANGED tsstcspRHptRMp+hKpshDp+l ...........sERappRQQ+lK-pVDARl... 0 23 74 122 +12392 PF12558 DUF3744 ATP-binding cassette cobalt transporter Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00005. There is a conserved REP sequence motif. There is a single completely conserved residue P that may be functionally important. The proteins in this family are frequently annotated as ABC Cobalt transporters however there is little accompanying literature to confirm this. 22.50 22.50 22.70 24.00 22.40 18.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.28 0.72 -3.53 55 750 2009-09-24 09:52:23 2009-09-24 10:52:23 3 4 746 0 68 428 1 72.30 36 12.82 CHANGED poslLpchGlREPLYloALKtuGhslsphpclsslssLshsp.hpttlppWhp.ptshhppppppc.............sLLclcsLoa ......oslLppsGIREPLYlTuL+thGlDlsptcpLusL.cslshsc.h...hppa....psp....scpp.ctp.............sLLcLcpVoa.............................. 0 19 34 49 +12393 PF12559 Inhibitor_I10 Serine endopeptidase inhibitors Coggill P pcc MEROPS_I10 Family This family includes both microviridins and marinostatins. It seems likely that in both cases it is the C-terminus which becomes the active inhibitor after post-translational modifications of the full length, pre-peptide. it is the ester linkages within the key, 12-residue. region that circularise the molecule giving it its inhibitory conformation [1, 2, 3]. 20.50 20.50 26.20 22.20 18.00 18.80 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.17 0.72 -3.69 18 45 2009-09-25 12:15:30 2009-09-25 13:15:30 3 2 25 1 16 49 1 49.40 44 91.94 CHANGED Msps.........p..p.cAl.PFFARFL-pQ..................................t.tstsstpsp.s.h.ThKYPSDhE-s ..................psppsKAl.PFFARFLpspp...............................t.ps.psssscssh.sThKYPSDWE-.. 1 2 7 16 +12394 PF12560 DUF3745 Protein of unknown function (DUF3745) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00097, Pfam:PF10426. 21.20 21.20 21.20 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -7.87 0.72 -4.29 99 1647 2009-09-25 12:31:12 2009-09-25 13:31:12 3 7 890 0 1 1687 0 38.20 45 8.84 CHANGED ANs-KGKsAASLDKVs....DshsLhsQc.PFcpcppLNsuhQo ...ApppptcsAASLDKVsEpp.-ssslhsp..PFcoDp-LNpuhQT...... 0 0 0 1 +12395 PF12561 TagA ToxR activated gene A lipoprotein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF10462. There is a conserved GAG sequence motif. This family is a bacterial lipoprotein. 25.00 25.00 41.50 40.70 18.80 16.70 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.80 0.71 -4.17 9 172 2009-09-25 12:32:18 2009-09-25 13:32:18 3 8 73 0 7 89 0 135.30 45 17.05 CHANGED a-h.ts.p.th.spGW.passslssppltpsp...WpThplsspp.hlC+Fsa.sssGcptsFVGaV-...sssthCpuuc-hpap.suppp.h.Sp.sDYpLLS.hGc..GpV.oYsPss-lGEssLCoLs+sGss....GAGFlsss .......F-L.pPcp.EhpLcGW.Qtsssls.tp...h....Nsp......WpTMhls...sppphICRFsYhussGcphpFVGals...tpcplCoGGR-l+ahp.Dtpp..I-SphsDYELLShhGc..GpV.oYsPsspIGEspLCoLspsuss....GAGFl+s.t. 0 3 3 5 +12396 PF12562 DUF3746 Protein of unknown function (DUF3746) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF04595. 21.80 21.80 32.70 37.40 18.20 17.30 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.19 0.72 -7.77 0.72 -4.10 9 53 2009-09-25 12:33:09 2009-09-25 13:33:09 3 1 36 0 0 43 0 36.10 65 9.39 CHANGED SoFssPpL+ssspsp....sC+KpsFsNSpaTTRTssal ....SSFPVPTIKSVTNKKK...ICKKHCFVNSQYTTRTLSHI 0 0 0 0 +12397 PF12563 Hemolysin_N Hemolytic toxin N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF07968, Pfam:PF00652. This family is a bacterial virulence factor - hemolysin - which forms pores in erythrocytes and causes them to lyse. 22.50 22.50 25.00 24.50 17.50 17.40 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.08 0.71 -4.86 7 120 2009-09-25 12:33:59 2009-09-25 13:33:59 3 4 81 15 5 93 2 176.30 68 28.55 CHANGED .KlNpptssh.lslhSulusssshAsIspPsGtAlphhSplpssptlhYhNAu.W.sEcsph.......oLsplR-pVlNQphRhhlDFStIpspspp.sphpsphRpphGluFussFllIoEHKGELLFTPhDss-D.........lsstLLEAsttpRsh..ot.sott...sossETsoLPHVAFYlNVNRsISDpECTFsNS ..........................Kh.ppt.sl.hTILSAlS.SsTlhANINEPSGEAADIISQVADSHAIKYYNAADWQAEDNAL......PSLAELRDLVINQQKRVLVDFSQISDAEGQ.AEMQAQFRKAYGVGFANQFIVITEHKGELLFTPFDpAEE.........VDPpLLEAPRTARLL..uRSGFAS..PAPANSETNTLPHVAFYISVNRAISDEECTFNNS..... 0 1 2 4 +12398 PF12564 TypeIII_RM_meth Type III restriction/modification enzyme methylation subunit Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF01555. There are two completely conserved residues (F and S) that may be functionally important. This family is a bacterial phage resistance protein. It functions in a type III restriction/modification enzyme complex. It is part of the methylation subunit of the complex. It binds DNA and methylates it. 25.00 25.00 29.50 29.20 19.80 19.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.66 0.72 -3.99 21 216 2009-09-25 12:35:04 2009-09-25 13:35:04 3 4 186 0 19 219 15 56.10 43 11.50 CHANGED hDcpLlphLLpspp....lKppFFpcl.sshhlFchpcFlchlc.Kca......Lss..SaT+apNKIGL ......D.pLlshLLps-p....lKcpFFhcl....sssLVFchpcFh.hL-p....+p.......lss..SYT+YsN+IGL.. 0 7 10 14 +12399 PF12565 DUF3747 Protein of unknown function (DUF3747) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 215 and 413 amino acids in length. There is a conserved DSNGYS sequence motif. 25.00 25.00 141.80 141.50 20.50 20.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.03 0.71 -5.21 38 86 2009-09-25 12:38:19 2009-09-25 13:38:19 3 4 48 0 36 105 45 180.40 40 57.40 CHANGED hthhhphsslsshslsshhs..ssupAuh.FspptlsQscalslApP...hG..sspapLLllEQI...pspptCWpEpussPs.lV-PLLLsFD...FTGICsRuoDSNGYSlRhuGpDlGhsYpLRlhppss-LlLh....uhs.t.ssssst.lllGRopGh.s...sG.FlKlpLpPGWchs+RoYpG+sLGHlYhusspsss ............hh.....hsslsshslsuhhs..ssspAuhFspptl-QscalslAtP...hG..sspapLLllEQI...ssp+.CWs-sGusPs.hV-PLLlsFD...FTGICsRusDSNGYSlRlsGpDLGhcYhLpllppsu-LlLl....usspt..ssstsp.lllG+opGhu....sG.FlKlpLpPGWphs+RsYpG+sLGHlYhsssps.s.... 0 2 21 32 +12400 PF12566 DUF3748 Protein of unknown function (DUF3748) Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 120 amino acids in length. 21.70 21.70 22.00 27.50 19.90 19.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.70 0.71 -4.20 11 511 2009-09-25 12:39:24 2009-09-25 13:39:24 3 7 505 0 47 269 3 117.80 83 28.83 CHANGED AaVGVVTVSPspP.RYsFIHGPEpPDspWpYDFHHRRGVlVt..psGpApsLDAhDITsPYTsGALRGGSHVHVFSPDGsRLSFTYNDHV..MHEhDsphDhRNVGVAlPhpsVss.sKpHPREY ..........AHVGVVTVHPcuE.....K.YVFIHGPENPDETWHYDFHHRRGVIs...EuG....+VoNLDAMDITAP.YTPGALRGGSHVHVFSPNGERVSFTYNDHV..MH.....EhD..........PA....LDLRNVGVAAPaG......P.VNV....QKQHPREY................... 0 4 15 30 +12401 PF12567 CD45 Leukocyte receptor CD45 Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 77 and 1130 amino acids in length. The family is found in association with Pfam:PF00041. CD45 plays a critical role in T-cell receptor (TCR)-mediated signaling. CD45 interacts with SKAP55 which is a transcriptional activator of IL-2. 25.00 25.00 25.60 30.40 24.90 17.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.26 0.72 -4.12 10 83 2009-09-25 12:40:21 2009-09-25 13:40:21 3 19 34 0 13 87 0 60.40 47 7.86 CHANGED TVDYLYscpTKLFTAKLNVNEsVcCsNs.....sCTNNElpNLsECcp.toVolSHNSCTsPsKpLp .oVcYhYsppoKhFTAcLNVs.-sVcCsss......sC.sNElpNLsECcp.tsVolSHsSCssPtKpl..... 0 1 1 2 +12402 PF12568 DUF3749 Acetyltransferase (GNAT) domain Gavin OL, Bateman A lg7 Prosite Domain This domain family is found in bacteria, and is approximately 40 amino acids in length. The proteins in this family are acetyltransferases of the GNAT family. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.53 0.71 -4.54 21 619 2012-10-02 22:59:21 2009-09-25 13:41:38 3 2 602 1 54 719 54 121.30 65 95.94 CHANGED KLTIppLs....phSsQDhIDLuKIW.........Ppps.ptLpttl......sssppLaAARFN-RLLuAscVplpsp..pupLscLpVREVTRRRGVGhYLl....-EshRph..PplppWhhsttshp..chsshssFhpAsGF..stpsstWp ..........................................................................KLTIlRLE....pFS.c.QD+ID..Lt...K.I..W.................P.EYSsS.SL..p..V........D.-.s.HRI.Y.A..A..R.F..N..E...R.L.....Lu..A.V......R..V......T......L......S......G.....T........p..GA......LDSLRVREVTR.RRGVG....QY....Ll....EEV.....LRsN.....P.s.V..Ss.W.W.M.A.D..sGVE..DRuVMsAFMQALGF..TAQpsGWE....................................... 0 7 19 36 +12403 PF12569 NARP1 NMDA receptor-regulated protein 1 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF07719, Pfam:PF00515. There is a single completely conserved residue L that may be functionally important. NARP1 is the mammalian homologue of a yeast N-terminal acetyltransferase that regulates entry into the G(0) phase of the cell cycle. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 517 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.82 0.70 -5.75 32 545 2012-10-11 20:01:02 2009-09-25 13:43:38 3 42 309 0 357 564 23 365.10 30 57.91 CHANGED hEaSEhlLYpsplltEuGphEcALc+L..cphpppllD+LAlhEh+uchLlcLGRppEAptsYcsLLcRNP-NhpYYcuL.csh..........t..................hspt...p.pshhslYcphsppa.P+usAs+RlPLcFhp.................G-cFcphs-tYlcphLpKGVPSLFsslKsLYs.cspKhsllp-LlptYhp...shpssuphstps....p.ptcssoshhashYaLAQHYsa....hts.......p-..KAlcaI-pAIppo.Pohl-hahsKARIhKHuGDltcAAchM-cARpLDttDRYINoKsAKYhLRsscsccAtchhuhFTRpts..ushssLp-MQChWa.hEsucuahRpt+hGhALK+apsVp+hFsphhEDQFDFHoYslRKholRAYVchLRaEDpLRsHPaYh+AAtsAlcsYlpLaDpP...hsps.......ph..u.........shsssEcKKhtc...Kt+Ks.pp+tcc-ptct.ttppt....tp..ttttttKttsscsct.Ds...DPhGpKLhpop..-PLc-AhKFLpPL.phuspslEsplhuFEVahR+pKhLLALpslppA ....................................................................................................................................................E.SEhhlYp.s.lh......c.....t.......s.hpp....uLphl.....pp...ttphhDthth.Ehp..u.phhhpLs..ph......p-.AtthaptL..phNs.-s..Yattl.psh............................................thhth...ap...ph..t.h....s...p..s..ss+.h.ls.Lphh.......................s.p.p..F.pphhptal.....h..p+.......G.hPshFssl....+....L........Y.......p.........t........K....h....t..h..lt......plh.th.t...................p..t.pt....................p..ss..hh...s.hhLAQHash......htp..........p..hAhph...lp...tAIt..........s...Ps....h.l...-..hahhKu+IhK......+hGph.pAs.phh-pAptLDhtDRalNsKs.sKYhL+s............s....................p...........cA.phhshF.T+tt...................sshtsL.-hQshWa.hcsutua.t..............................t.p..h.................u.ALK+hht.l.pha..h.-DQaDFHsa.shRKhphRuYlphlchpD.lhtp.hah+us.....h.shph.Yhtlh-...p............ttt................t......................t......p...cch.p....p.ppt.t.p.t...tt........................................................t............p...s.t....l.......t.............t.htt.h...p.hh..h...s......t.............s..h..hc....h..t...tt.h..h..................................................................................................................................... 0 119 195 289 +12404 PF12570 DUF3750 Protein of unknown function (DUF3750) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 175 and 265 amino acids in length. 25.00 25.00 36.40 98.40 19.70 17.50 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.90 0.71 -3.88 33 129 2009-09-25 12:44:47 2009-09-25 13:44:47 3 1 127 0 59 131 32 131.70 45 55.96 CHANGED tpAlVpVYAA+shuWRGhaAlHoWlls....KtsuuspYsRa-V........lGWGh.....sl+p.sthsPDutWaGutPcllhshcGctAcplIspIcsAlpsYPass..pYcsaPGPNSNTFlAalh+pVPpLplsLPssAIG+DY ...pAlVpVaAApshuWRGhhAVHsWIlh....KctGuspYsRY-V.............luWGt.....slRp.sshsPDuhWaGupPcllsshcGtpApplIPcIcsAlpsYPass..sY+sWPGPNSNTFlAalhRpVP-LplsLPssAlGKDY 0 13 26 39 +12405 PF12571 DUF3751 Phage tail-collar fibre protein Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is approximately 160 amino acids in length. There are two completely conserved residues (K and W) that may be functionally important. The members are annotated as being putative phage tail or tail-collar proteins. 23.00 23.00 23.10 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.80 0.71 -4.40 47 961 2009-09-25 12:45:44 2009-09-25 13:45:44 3 33 662 0 136 906 8 148.70 39 30.54 CHANGED M...u.....sllTptGcphlAppsutGpslplsphlhAsssG.....s.hssspshsshsphVacsslsph..uhhNsNtVlhshllssslGsF.ashlGLh..sssusLhulspsPpphKhpstpGs...Gpslscshllpao..sspplTtlplssssW.hshtAch.pshD ..........Ms..cahollTshGts.+l.AsAs.A.h..G.p...l.p.lopMAVGDGsG.............shss..P.s..ssQ...T.t.Llpc.hhRuslNpl.....lDsp.N..s..s.tl...l..A.EhlI.P..p..s......h..G...GFalRElGLa....D-sG..sLIAVuN.s.P.E...o.YK...Pt.ltEGS....G+spThRhllhlS..ssssls.LplDsss....lhATpcYVD......................................................... 0 19 59 98 +12406 PF12572 DUF3752 Protein of unknown function (DUF3752) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 140 and 163 amino acids in length. 22.30 22.30 22.30 22.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.04 0.71 -4.09 41 249 2009-09-25 12:46:56 2009-09-25 13:46:56 3 8 209 0 181 244 1 139.20 25 38.15 CHANGED PPpppshtuhhsss.....RpFspu....+sut.........ssSsWT-TPt-+tcRhppphhuhpstst..........sttttttpsppstc.......ttlcpYscp.pRucSLlppHpccpppptpp..................................pppsssstRsFD+-+Dhplspp..sptpp+chlp+u.tshusRFus ................................................................................RpFpptt......tttts..............spShWT-TPt-+t+....+hp.pp..uhps.s.....................pttpp.thhpt.pcpc................htpplppascp...pRu.cSLh-hHpcchcppttp.....................................pppss.p....p+s..FDR-+Dlpssph....spsp++pllp+u.pplsoRFup...................... 0 62 95 141 +12407 PF12573 OxoDH_E1alpha_N 2-oxoisovalerate dehydrogenase E1 alpha subunit N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00676. There are two conserved sequence motifs: VPEP and RPG. This family is the alpha subunit of the E1 component of 2-oxoisovalerate dehydrogenase. This is the enzyme complex responsible for metabolism of pyruvate, 2-oxoglutarate, branched chain 2-oxo acids and acetoin. The E1 component is a heterotetramer of alpha2beta2. The homodimerised beta subunits are flanked by two alpha subunits in a 'vise' structure. 25.00 25.00 28.60 28.20 19.60 18.50 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.06 0.72 -4.41 28 200 2009-09-25 12:48:08 2009-09-25 13:48:08 3 1 198 5 52 156 12 42.80 58 10.39 CHANGED tssLpLHVPEPssRPG-pPDFSplcIscAGul..RPslsssPt- ...h..sLpLHVPEPssRPGspsDFSal+lstAGsVR.RPsIDspPt..... 0 9 26 39 +12408 PF12574 120_Rick_ant 120 KDa Rickettsia surface antigen Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. This family is a Rickettsia surface antigen of 120 KDa which may be used as an antigen for immune response against the bacterial species. 21.00 21.00 22.70 21.60 20.20 19.80 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.66 0.70 -5.28 7 128 2009-10-28 17:27:32 2009-09-25 13:49:09 3 1 98 0 11 123 1 227.80 70 29.18 CHANGED lL+-hhs.hs.-LAEphthc.....-cD++hcsFh....tNsspRphlspAhEssEhKKtLEsIEIsGY+NlhsoaSA........................................................................tstYtGGF+PlQWcNpl.SAusLRuTVlcNDAG-ElCTLsEpThKTpPhhlAKQDGTpVplsSYRpIDFPIcL-c.AsGoMHLShVAhcsDGppPucs+AVYFTAHYEps..PNGpPpLKElSSPpPLKFhGsss-AlAYIEHGGEIYTLsVTRGKYcEMMKEVtlNpGQuVDLSQ..tpDlhcVQGpup ...........................................................ILKDLAALTDRDLAEQKRKEI..EcEKDKTLSsFF....GNPANREFIDKALEsPELKKKLESIEIAGYKNVHsTaSA........................................................................AsGYsGGFKPVQWENpV.SASDLRATVV....KNDAGDELC.TLNETTVKTKPFTlAKQDGTQ.V.QIoSYREIDFPIKLDK.ADGSMHLSMVALKADGTKPSK.DKAVYF..TAHYEEG..PNGKPQLKEISSP.p.P.LKFAGTGDDAlAYIEHGGEIYTLAVTRGKYKEMMKEVcLNpGQSVDLSQ..A...EDlhhsQGtSp................... 0 1 2 2 +12409 PF12575 DUF3753 Protein of unknown function (DUF3753) Gavin OL lg7 Prosite Family This family of proteins is found in viruses. Proteins in this family are approximately 70 amino acids in length. There is a conserved YLK sequence motif. There are two completely conserved residues (D and F) that may be functionally important. 21.60 21.60 21.60 69.00 21.40 21.00 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.56 0.72 -4.18 14 56 2009-09-25 12:50:08 2009-09-25 13:50:08 3 1 43 0 0 36 0 71.00 54 97.91 CHANGED MDKLYsulFGVFhoSsD-.DFppFI-lV+SVLoDcps.tp.pss.s..t..hlllsllhlllllll..hFlYLKlh MDKLYAAIFGVFMuSs.-D...Dhs-FIEIVKSVLSD-Kosss.osssss.ashaaLIIlhhlVlIllLL..laLYLKVV. 0 0 0 0 +12410 PF12576 DUF3754 Protein of unknown function (DUF3754) Gavin OL lg7 Prosite Family This domain family is found in bacteria, archaea and eukaryotes, and is typically between 135 and 166 amino acids in length. There is a single completely conserved residue P that may be functionally important. 21.50 21.50 22.50 21.80 19.60 21.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.87 0.71 -4.55 22 186 2009-09-25 12:53:38 2009-09-25 13:53:38 3 4 103 0 108 188 16 150.90 23 30.01 CHANGED a-clllhh+...........................................hcssph.tt......................................tppphshtssplhl+lFcslPcsDL-hlFPNt+lshphhDplhlslsulhushslllphlht.hlhlsshh.hh........................hhtslhuhssLuuaha+passYKs+ph+apptlscsLaFKsLssNuG ...............................................................................................................................................................................................................................................ttpttslhlKtFKsIPhsDLEhlLPcp..c.lphs.hDtlhlslsslluhlslhsshhthh.......s..............................hhsslhllssh.s.hhsh+th.h..sFppphspap.hhlscsLY.KslssspG............................. 0 31 67 86 +12411 PF12577 PPARgamma_N PPAR gamma N-terminal region Gavin OL lg7 Prosite Family Peroxisome proliferator-activated receptors (PPAR) are nuclear hormone receptors that control the expression of genes involved in lipid homeostasis in mammals. This sequence region is found at the N-terminus of these proteins. The family is found in association with Pfam:PF00104, Pfam:PF00105. It is not clear if this region is a separate protein domain. 25.00 25.00 27.10 26.10 21.70 20.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.07 0.72 -3.67 8 132 2009-09-25 12:56:48 2009-09-25 13:56:48 3 3 66 0 30 125 0 83.90 64 18.98 CHANGED MVDT.QhhuWP.VGFGLSulDLsEL-DcSHSLDlKPFoTlDYoSISS...................hcY-ssPsps-.hs+sMDhophYuYch.....................................+hppsQsSIKL ..MVDT.EMPFWP.sNFGISSVDLSsMDDHSHSFDIKPFTTVDFSSISo...................PHYEDIP.........FsRsDPhVsDYKYDL.....................................KLQ....EYQSAIKV....................................................................................................................................................... 0 1 3 10 +12412 PF12578 3-PAP Myotubularin-associated protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 115 and 138 amino acids in length. Myotubularin is a dual-specific phosphatase that dephosphorylates phosphatidylinositol 3-phosphate and phosphatidylinositol (3,5)-bisphosphate. 3-PAP is a catalytically inactive member of the myotubularin gene family, which coprecipitates lipid phosphatidylinositol 3-phosphate-3-phosphatase activity from lysates of human platelets. 20.40 20.40 20.50 20.60 20.30 20.20 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.57 0.71 -4.33 21 204 2009-09-25 12:59:09 2009-09-25 13:59:09 3 9 92 0 111 195 0 124.90 25 18.37 CHANGED p.pth....s.........hcslssttpph.tss..hht.h.ph....ssspsp.tstspph.ts..ths.......p.pphLpPtptshplplWsQCYhRWhPhtplpsGG.spl.hphphhhsc..l...pclpchlcppchtplsssphc ........................................................................................hP.s.p....tpt.hh..pp.h.hp.......sp......p..sp.tss.cphhcphauhP............ss..cGlLLPt...........hpGscl+lWppCY.LRWlPEsQI.ttGG.sts..p.lp.Lh-E.....l....ppLpctlcptp.t........t................................... 0 19 30 63 +12413 PF12579 DUF3755 Protein of unknown function (DUF3755) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 40 amino acids in length. There is a single completely conserved residue N that may be functionally important. 25.00 25.00 27.80 27.10 24.90 23.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.40 0.72 -4.56 27 109 2009-09-25 13:00:09 2009-09-25 14:00:09 3 2 21 0 64 100 0 34.90 40 12.41 CHANGED ushp.lp-NlsLhppsRcNIhslLpchs...phPslMsp .....shp.hp-NlsLFp+sRcNIpslLsc.Ms...phPGIMsp........ 0 11 34 49 +12414 PF12580 TPPII Tripeptidyl peptidase II Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF00082. Tripeptidyl peptidase II (TPPII) is a crucial component of the proteolytic cascade acting downstream of the 26S proteasome in the ubiquitin-proteasome pathway. It is an amino peptidase belonging to the subtilase family removing tripeptides from the free N terminus of oligopeptides. 25.00 25.00 29.30 28.20 21.10 21.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.14 0.71 -4.99 24 162 2009-09-25 13:02:15 2009-09-25 14:02:15 3 9 122 1 112 157 4 190.60 42 15.09 CHANGED l-lp.u.LtsEclpPslsLcphthsh+Psp.uKIpPLu.sRDllP.sG+QlaplhLTYphp.........................ls.cuuEVssphPhlschLYEucFESQhaMlFDsNKphlusGDuYsp................................hKL-KG-YTl+LQlRH-ppplLEKlK-hslhlppKL.s.s.lsLclasshssslsG..stcassttltsGpspshYl.sslss-KLPKsshPG .........csp.osLphE-.lsPsloLKs............hspsLRPsp.uK....lpPL...............u..sRDl.LP..ssRQlYp............hl.....LTYsF+..........................s.KuuEV.sPpsPlLs...-hLYESEF-SQhWhlFD.p.NKphhusGDAYPc.............................pas.h.KL-.KG....-.YTlRLQlRHEphshLE+L.K-l..shllsc+L..u.ssloLDlapshs..............tAlhG......ppKh..s.shsLsPt............hspshal.ssl.s.cD.KlPKsssP....................... 0 42 59 89 +12415 PF12581 DUF3756 Protein of unknown function (DUF3756) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 40 amino acids in length. 25.00 25.00 36.30 34.80 17.80 16.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -7.98 0.72 -4.27 18 76 2009-09-25 13:03:18 2009-09-25 14:03:18 3 4 5 0 0 68 0 40.50 88 2.91 CHANGED lASGYRTN..ALVAPQAKISIGAYAAEWALSTEPPPAGYAIVR .LASGYRTN..ALVAPQAKISIGAYAAEWALSTEPPPAGYAIVR 0 0 0 0 +12416 PF12582 DUF3757 Protein of unknown function (DUF3757) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 154 amino acids in length. 21.10 21.10 25.60 24.40 20.60 20.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.02 0.71 -3.92 25 155 2009-09-25 13:04:14 2009-09-25 14:04:14 3 2 95 0 19 82 0 109.00 29 78.44 CHANGED sspsCPulssI.pts........GsYoA..ssssscWhGs..pG.hsptp..lppFppAhhl.tss.t.....t.Gph.pCoYpl.spstplDhhap......spshhholps.pst....Wcpppssh...h.hh.Co..sssscpCpFc .........t..ppCPshstIp..t......GsYsu....sstpWhG...pss.spts..lppF..Ahhlstst.......s.sth.pCTYtl..tstplDhhat....................hspshhholhs.psh....Wctppssh...hp.hhCo..ssusppCpFs..... 0 2 7 12 +12417 PF12583 TPPII_N Tripeptidyl peptidase II N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 190 amino acids in length. The family is found in association with Pfam:PF00082. Tripeptidyl peptidase II (TPPII) is a crucial component of the proteolytic cascade acting downstream of the 26S proteasome in the ubiquitin-proteasome pathway. It is an amino peptidase belonging to the subtilase family removing tripeptides from the free N terminus of oligopeptides. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.97 0.71 -4.04 7 69 2009-09-25 13:04:59 2009-09-25 14:04:59 3 7 30 1 23 76 0 126.70 54 20.09 CHANGED sGuuNsuususuTsAAAA.AsTssusKPKssus..ttsuh..sssAuGDGVssQo-sPsps.sssPuSPKKGKosuD-YuEuLRDFQCoaIsKs-hEhAEKIYp-VltAHPKHLtAHLhLIQNIESspLK.spLPLsFssupcsp ...........................P.PAKKoSSTANATTGAAs.AlTNsATNGN.VANA........GSNGT....GNNVITATNGAA..NGSlPNGTAVK.ENRSKW.DEYCEGLRDYQTAQISKLDA.ENA....ENVYQALLKDNPNHLA...AHLAMADHFDS.TDLK.Q..NLPYTFTAShD.u....... 0 9 12 21 +12418 PF12584 TRAPPC10 DUF3758; Trafficking protein particle complex subunit 10, TRAPPC10 Gavin OL lg7 Prosite Family This domain forms part of the TRAPP complex for mediating vesicle docking and fusion in the Golgi apparatus. The fungal version is referred to as Trs130, and an alternative vertebrate alias is TMEM1 [1,2]. 21.60 21.60 21.60 21.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.90 0.71 -4.70 32 265 2012-10-04 00:47:01 2009-09-25 14:07:17 3 10 233 0 196 280 1 163.40 21 14.07 CHANGED slslPslphlhsssht.hppssh........................................................................tVGp.lshplplcp.pphWss.t.t..............................p.sh....chhYcl..sss-sWhluG+++Gphph.................ppssphp......hslhllPLtsGaL.hPplplpshs...........................................................tt..sscl..psuuppllVl ........................................................................................................................................h.hP.lphhasspht....p.tt............................................................................hspsGphhshplplpp.sp.hpst.....................................tttst.......chhYEl...hsssssWhlsG+p.pGshsh................................tpssptp............lslhllPLpsGaL.shPslclhphh..............................................................t...ssps..pshu.plhV......................................... 0 49 96 159 +12419 PF12585 DUF3759 Protein of unknown function (DUF3759) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 107 and 132 amino acids in length. There is a single completely conserved residue H that may be functionally important. 25.00 25.00 49.70 48.60 23.30 22.50 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.07 0.72 -4.08 39 128 2009-09-25 13:08:28 2009-09-25 14:08:28 3 3 85 0 89 132 0 92.90 46 72.33 CHANGED psacpVh..st........pHcu+hSHELlAGAAuFEAhKsaE-+pc+....................pGKPsSHAhAKElLAGhAGAtlD+lhET.KGLD...alD+-cAK+cAccpscchhcpcY ..............t.tapplh..st......pHcu+hSHELlAGAAuFEAhKAaE-Hpc+....................pGK..PsSHAhAKElLAGhAGAtlD+llET.KGLD...alD..+-+AK+cAccpAcchhsppY................. 0 33 54 76 +12420 PF12586 DUF3760 Protein of unknown function (DUF3760) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 46 and 64 amino acids in length. 25.00 25.00 26.80 25.40 23.90 20.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.36 0.72 -8.25 0.72 -4.12 15 44 2009-09-25 13:09:30 2009-09-25 14:09:30 3 2 3 0 13 37 0 51.60 31 15.26 CHANGED oL.................sPL.ssVpclIh-cLutlsP..lphLplS...........................................+haY.....................+cllPplY+sVsls ....................LssL.tsVpclIh-pLstlsP..lphlplS...........................................+haY.....................+chlPhlY+pVsl.. 0 13 13 13 +12421 PF12587 DUF3761 Protein of unknown function (DUF3761) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 100 and 157 amino acids in length. 22.00 22.00 22.10 22.00 21.90 20.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.51 0.72 -3.87 9 145 2009-09-25 13:10:56 2009-09-25 14:10:56 3 3 109 0 45 146 5 71.50 43 53.39 CHANGED +tsslsuuLhAshhhhshsAaAhs.............stusLspcspYhN+DGtsVHuPA+sh..psPsGATA+C+DGoYSFSpH++GTCSGHGGVspWh ..............................................................................................t............................................................hpAPA.....p....s.P.sGs.T....A.....hC+DGoaShutp..+.+GsCSGHGGVssWh... 0 8 21 32 +12422 PF12588 PSDC Phophatidylserine decarboxylase Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF02666. Phosphatidylserine decarboxylase (PSD) is an important enzyme in the synthesis of phosphatidylethanolamine in both prokaryotes and eukaryotes. 23.40 23.40 23.40 23.60 23.30 23.30 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.88 0.71 -4.57 45 205 2009-09-25 13:12:22 2009-09-25 14:12:22 3 6 143 0 118 199 5 133.40 35 31.88 CHANGED LtPslp-hpcLI.......EscstlhhLhspMFpp.........tsssGp..plcsacchLpllNtllspAPpass...........suLlGhPlNAlLDWPMuTsuGashFhcPplNtpLK+lLstWupFLsoPcSs..pVLsss.pt........GWFussAhpphpp ..............tsslp-hppLl.......EssshlhMhhspMhc-...............tsssG.p...t..lcsapchLpllstlh.T.p.APpasp...........sGLlGhPlNAlLDW.................PMsTsuGauhFhcspVNtplKclLshW..s.p.FLp....oPcSt..tsLsss..s........GWhuspAhpph..t........... 0 34 64 91 +12423 PF12589 WBS_methylT Methyltransferase involved in Williams-Beuren syndrome Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 72 and 83 amino acids in length. The family is found in association with Pfam:PF08241. This family is made up of S-adenosylmethionine-dependent methyltransferases [1]. The proteins are deleted in Williams-Beuren syndrome (WBS), a complex developmental disorder with multisystemic manifestations including supravalvular aortic stenosis (SVAS) and a specific cognitive phenotype [2]. 21.00 21.00 26.60 25.20 19.90 19.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.29 0.72 -3.50 79 336 2009-09-25 13:14:03 2009-09-25 14:14:03 3 6 300 0 234 315 1 79.90 30 28.36 CHANGED LhsG...sh.pt.spsts..p..................t.pph.httpp.pttpppct+th.cp.....u.K-WIh+KKEphR++..G+.cVtsDSKYTGRKR+s+ ......................................................G.........sth.s..........................p.pph.t.hs.t.pct.ptp.+pt.c.tcth.cp......................o.+sWIlcKKEphRRp..G+.cV+s..DSKYTGRKR+s.... 0 82 131 194 +12424 PF12590 Acyl-thio_N Acyl-ATP thioesterase Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is typically between 120 and 131 amino acids in length. The family is found in association with Pfam:PF01643. The plant acyl-acyl carrier protein (ACP) thioesterases (TEs) have roles in fatty acid synthesis. 22.00 22.00 50.00 47.30 20.20 19.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.99 0.71 -3.68 24 113 2009-09-25 13:15:00 2009-09-25 14:15:00 3 2 47 0 24 123 0 119.70 58 29.72 CHANGED M.VAouAuSAFFPVs.Sssssusu..........t.....GphssSLs..GlKuKss.souuhQVKANA....pAsPKlNGopVuhpss.cshcp-s...sss...SssPRTFlNQLPDWSMLLAAITTIFLAAEKQWhMLDWKP+R..PDMLlD ....MsAo..sAs.SuFFPls.osussssu.............tp...hGphssolshtGltuKss..ssGuhQVKAsA....QAsPKlNGopVslhss..phpp-c...sss...ussPRTFlNQLPDWSMLLAAITTIFLAAEKQWhMLDWKP+R..PDMLlD......... 0 1 12 19 +12425 PF12591 DUF3762 Protein of unknown function (DUF3762) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF05533. 21.90 21.90 22.20 51.60 21.50 17.00 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.61 0.72 -3.95 2 72 2009-09-25 13:15:54 2009-09-25 14:15:54 3 3 1 0 0 63 0 78.90 86 6.53 CHANGED .PSMVAIPVPIuFGshssTAWCS.uDAAVLRCRLsYHAAETsFpspcKHVRYVYNDVSSAANRPRTVSsRKCGpVFPStS ..PPSMVAIPVPISFGshPTTAWCS.A.DAAVLRCRLDYHAAETSFRNE.GKHVRYVYNDVSSAANRPRTVSPRKCGRlFPSGS.... 0 0 0 0 +12426 PF12592 DUF3763 Protein of unknown function (DUF3763) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF07728. There is a single completely conserved residue F that may be functionally important. 21.80 21.80 21.80 23.50 21.40 20.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.56 0.72 -4.34 26 649 2009-09-25 13:17:07 2009-09-25 14:17:07 3 2 645 1 56 296 0 56.90 57 11.46 CHANGED tssphhppLschcpcl+cpRp.......tFpppQPplFIsschLstIEuSLhplscplcphppp ...........h.PuEIKQQLEcLEsDWR+QHs.......hFSEQQ+CLFIsuDWLGRIEASLQDVGtQIRQAQQC... 0 3 14 34 +12427 PF12593 McyA_C Microcystin synthetase C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF08242, Pfam:PF00501. There is a conserved YAN sequence motif. Microcystins form a large family of small cyclic heptapeptides harbouring extensive modifications in amino acid residue composition and functional group chemistry. These peptide hepatotoxins contain a range of non-proteinogenic amino acids and unusual peptide bonds, and are typically N-methylated. They are synthesized on large enzyme complexes consisting of non-ribosomal peptide synthetases and polyketide synthases. This family is made up of the C terminal of microcystin synthetase, one of the proteins involved in this synthesis pathway. 25.00 25.00 53.30 52.40 17.40 16.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.02 0.72 -4.20 4 77 2009-09-25 13:18:40 2009-09-25 14:18:40 3 12 60 0 1 68 0 39.10 66 5.19 CHANGED DVIFspcQspt..+hsLpsFTsTp.Qt....DWQhYANpPLQP+L DVIFs.hpsp...+tshhsFTPTH.pAKP..-WQhYANHPLps+L 0 0 1 1 +12428 PF12594 DUF3764 Protein of unknown function (DUF3764) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 89 and 101 amino acids in length. 25.10 25.10 25.20 29.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.71 0.72 -4.25 25 55 2009-09-25 13:20:22 2009-09-25 14:20:22 3 1 33 0 17 59 799 83.20 36 88.41 CHANGED hpToVhTFclossFsEWsthaDupc.pthpcphGIpsLYRGVSc-DPpKlhVlhQA.E.GshppFhpssp..-hIcuuGHlh-oT.hos .....EToVhsFclossF-EWtthaDup-.pthacphGlpsLaRGhSp-DPp+lhVlhQus-.Gshpphhpssp..chlpuuGHlh-oThho.h.. 0 1 7 13 +12429 PF12595 Rhomboid_SP Rhomboid serine protease Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 210 amino acids in length. The family is found in association with Pfam:PF01694. Rhomboid is a seven-transmembrane spanning protein that resides in the Golgi and acts as a serine protease to cleave Spitz. 25.00 25.00 61.90 56.20 22.80 22.00 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.56 0.70 -4.73 31 130 2009-09-25 13:20:38 2009-09-25 14:20:38 3 5 50 0 58 113 0 199.70 59 25.43 CHANGED VSKDs-ST.Q+WQRKSlRHCStRYG+LKsQVhREL-LPSQDNlSLsSTETPPPLYlsspp.....hGMQKIlDPLA...RGRAFRhs--.sD..G.SsPHT......PlTPGAASLCSFoSSRSGasRLPRRRKRESVA+MSFRAAAALlKGRSlhDuThpRsp..RRSFsPASFLEEDosDFsD-LDTSFFuR-shhp..EEhSohPD-VFESPu-uAhKphtps..s-pssLTGuALD.....+ .......VScD.-up.Q+WQR....KSl+HCS.RYGKLKsps.REL-LPSQ-ssShpuTEoPsPhhls................KIlDPLA...RGRAFRhsD-.sD.....t.psP...Hs..............PlTPGshSLsSFoSsRSGas+LP.RRKRpSVA+MSF+AAAALlK..GRS..Vh-..u..T.h.pRsp...+RSFs.sSFlEEDsVDhsDphDoSFF..u.......+..hh......EEhSohPD-VFESPs.uA.h.hths...sp.ss.sGstl.................. 0 3 7 21 +12430 PF12596 Tnp_P_element_C 87kDa_TransP; 87kDa Transposase Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 78 and 110 amino acids in length. The family is found in association with Pfam:PF05485. There are two completely conserved residues (D and G) that may be functionally important. This family is an 87kDa transposase protein which catalyses both the precise and imprecise excision of a nonautonomous P transposable element. 22.60 22.60 23.10 22.60 21.70 21.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.77 0.72 -3.71 11 61 2009-09-25 13:22:20 2009-09-25 14:22:20 3 3 21 0 20 62 0 83.40 39 15.90 CHANGED ssscpspp-pals....oopp.cspspppp.........lph.................t.s-Esps.......th.ss.ss.h-hsE....pcDulEYlsGYlh+Kh+.......Lu-hsppsso ...........................................................NltpDNs-sWLN.hs...o.psppcsc.sp.......................................shscEs.-..........phhsNl-h.h-hDE.........LTEDAhEYlAGYVl+KLR.......luspsppp..hs............... 0 3 3 20 +12431 PF12597 DUF3767 Protein of unknown function (DUF3767) Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 112 and 199 amino acids in length. 25.00 25.00 30.40 26.90 23.40 22.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.54 0.71 -4.43 31 226 2009-09-25 13:23:46 2009-09-25 14:23:46 3 2 207 0 158 219 0 110.60 25 72.96 CHANGED Psc.ss.....................sps.......tt.pc..olp-Ahcols.hsDF..tphhphPChR-uhlsGhuuhhslGulphlhtt..shhpA.sNWuVGuFhLuulsuaE.CphpRcpphpthppAhchhtc+ctcphccp.pp ........................................................................hpthp.h..sh...hsltphPChR-uhLhGhuuuhshGslpFlhsu..phtpu.ssauVGuFhlsolusW.hCphp...ptppp.thp.shchhtph..........t............................. 0 46 80 124 +12432 PF12598 TBX T-box transcription factor Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 77 and 89 amino acids in length. The family is found in association with Pfam:PF00907. There are two completely conserved residues (S and P) that may be functionally important. T-box genes encode transcription factors involved in morphogenesis and organogenesis of vertebrates and invertebrates 20.10 20.10 20.60 25.40 19.10 20.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.30 0.72 -3.61 15 119 2009-09-25 13:25:01 2009-09-25 14:25:01 3 2 42 0 52 109 0 84.90 42 13.25 CHANGED RhYEEppK.t+-susSDcSSuEtssh+..cpss..SPsuussush+l+c.............sS+D-+.tsssss........schpsss--csspssSPutpt ........RlYEE+.pK..+-susSDtSSsE.ssh+..spss..SPh.s.ussus.pL+c..................ps+-E+.stusss........sc.pppopEcsstshu.....s................................................................... 1 2 7 20 +12433 PF12599 DUF3768 Protein of unknown function (DUF3768) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria. Proteins in this family are typically between 108 and 129 amino acids in length. There are two conserved sequence motifs: NDP and RVLT. 25.00 25.00 37.60 37.60 18.80 18.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.91 0.72 -4.07 41 95 2009-09-25 13:25:45 2009-09-25 14:25:45 3 1 59 0 32 104 44 83.50 41 66.99 CHANGED hhTpGltuL........upp.h.....tpllptVpsFDsFss-NDPaGEHDFGsl................chtGppl..aWKIDYY..DhshpauSsDPuDsslTtRVLTlMLAsE ........................hTtGltuL........utth.........tpllctVtsF-sFss-NDPaGEHDFGsl................ch....pGppl..aWKIDhY..Dhsh.....paGSs-PuDsshTtRVLTlMLAsE....... 0 9 26 29 +12434 PF12600 DUF3769 Protein of unknown function (DUF3769) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 560 and 931 amino acids in length. 20.50 20.50 20.60 20.90 20.40 19.80 hmmbuild -o /dev/null HMM SEED 452 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.44 0.70 -5.67 39 99 2009-09-25 13:26:29 2009-09-25 14:26:29 3 5 90 0 50 103 251 383.70 26 53.78 CHANGED tssGplsRhRFpAs+lphsscG.WcApchphTNDPFoPsplclcAcssphpp.ss..sc..lpsppsRLll-p+lslP..l.pcphl..pcp...c-.......shhshGaDscDRs....GhFltRshsslp.ssshpLplpPQahlQRAlpsts.s............sppsssh..s-hFGLpucLsuphs..shphpspuslooh.sh-c.hpsshRhpsclppsls..h.sshphsh.hsYRpRlaNGSLG.psV.uuhGuhl............p.................sssh...sssulphsYp......huhthsshps-ph...........psss..........hssLhRhphhuSLspsasLWpGc.shs.TsppuhR.YoPtPllPhlsl.....sTslsushuhYu...sGss.QsoLshosG.phplGpFS+sahDYTthslshutslpsGt.SPFhFDRhlDhtsLshGloQQIYGPlhlustsulNlDs.....GchlsophtLcapRRoYslslhYNPhpplGulphRlsDFN .................................................................................................................................................................................................................................................................s..stlpphRhputplph.sps.WpupphthoNDPasPsphclputps.h.t.ss...c..l..ptspllh-pthshP..h..pp.h..ppp.p........h.hGhDst-+s....Ghaltpthp.lp.stsh.hplpPQhhlQ+uh...............................tt.tsh..sshaGl.schpsphs..phphp.psp.lssh.s.sp.htst..Rhphphppp...........hs............hphph.hs.YR.RhaNGoLG.psl.puhGshlt.................p......t.sshthsa.......htht.hphps-p......................pspphhshhRhphhsolspth.lWpup.shs.ssppuh+.YoshslhPhltl.....................ssslpushuhYs...sGpp..Qss....ltuolGhshQhGpFo+sahDYTt....hs........ls........h..........utshh........s.....Gs.S........P....F.hFD+hsD.......ht.........s.......L.......shulsQQlhGPlhhssposlslDs..........sp.hsop.h.lpap+...R...oYtlhhhYsPh.phGuhphRls-Fp........................................... 0 11 33 45 +12435 PF12601 Rubi_NSP_C Rubivirus non-structural protein Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF05407. The rubella virus (RUB) nonstructural (NS) protein (NSP) ORF encodes a protease that cleaves the NSP precursor (240 kDa) at a single site to produce two products. 21.60 21.60 80.10 80.10 20.80 17.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.19 0.72 -4.08 3 69 2009-09-25 13:27:13 2009-09-25 14:27:13 3 2 10 0 0 72 0 55.10 100 3.83 CHANGED VCAVGGGPRRVSDRPHLWLAVPLSRGGGTCAATDEGLAQAYYDDLEVRRLGDDAMARAALASVQRP VCAVGGGPRRVSDRPHLWLAVPLSRGGGTCAATDEGLAQAYYDDLEVRRLGDDAMARAALAuVQRP 0 0 0 0 +12436 PF12602 FinO_N Fertility inhibition protein N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 62 and 102 amino acids in length. The family is found in association with Pfam:PF04352. The FinOP (fertility inhibition) system of F-like plasmids consists of an antisense RNA (FinP) and a 22 kDa protein (FinO) which act in concert to prevent the translation of TraJ, the positive regulator of the transfer operon. 25.00 25.00 49.70 48.80 21.00 20.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.57 0.72 -3.68 3 274 2009-09-25 13:28:13 2009-09-25 14:28:13 3 2 214 1 3 135 1 61.60 89 33.64 CHANGED MTEQKRPVLTLKRKst.EGTAPVcuo....suPGhVpRKKlVVVoTPPAWKVKKQ.........KLsEKAARcAE.................AAARKAAPcP ..MTEQKRPVLTLKRKT..EGETPVRS............RKTIINVTTPPKWKVKKQ.........KLAEKAAREAE.................LAAKKAQARQ............. 0 0 0 2 +12437 PF12603 DUF3770 Protein of unknown function (DUF3770) Gavin OL lg7 Prosite Family This domain family is found in viruses, and is approximately 250 amino acids in length. The family is found in association with Pfam:PF04196. 21.50 21.50 21.90 66.60 21.30 21.20 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.53 0.70 -5.26 6 138 2009-09-25 13:29:41 2009-09-25 14:29:41 3 2 49 0 0 162 0 244.00 40 10.83 CHANGED ss--scEllaRaRLARslh.EL..+pl.Pth.cuDEsls+ppRElpuh.hpuIpI...DWplTEuKF.sPFo+chF-+Fss..hpsDp-Ylu+Ilpcssccuhccl-+spahs-slspptRh-+NuEpuhstlppKhsphpuapphc-hpc..HKSTVQhPshls+....sussspsLpsLpsls....spGsHPhpclWcp.lsssss.lpcIERh+-Ds.hELchAhuuloc+s.E.....RNKYHRssLshs.--+lYlAhlGVs .....ssEAEEllYRaRlAhplhscL..R........ssDpELsKoEcELLAh.lpuIph......NhshsEusF.PPho+EMa-+FhS..oPsDs-YIT+llStp.lpop-cLhsspahtcssstthRhp+Nu-Es..thpcsLpphput.c.s.+shss..pKuTlQLPPWLsh....hss-upDlsshpGh-....shtDH.PhspLWcc.slssss...LppIEchH.sDsAtEL-hAlSsst.-+s-E.....Rs+Y+Rs+LshuSc-plYhAthGVs.. 0 0 0 0 +12438 PF12604 gp37_C Tail fiber protein gp37 C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and viruses, and is typically between 49 and 166 amino acids in length. The family is found in association with Pfam:PF03906. In T-even phages, gp37 and gp38 are components of the tail fiber that are critical for phage-host interaction. 21.70 21.70 21.80 22.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.86 0.71 -4.42 9 109 2009-09-25 13:31:37 2009-09-25 14:31:37 3 18 66 6 4 116 1 139.60 26 22.38 CHANGED tssh.s.lp.......lpss.th.......pus.pt..a....cssstpthalu.tGussssDsoh..........hpss.h.hsc..h.sts....sGshpsscWt...ppWLsspL.p.....t.hh............................................hhs..th.t.sshs.hph...us.....sushhlsshuGhh .........................................................sthsGphplpsspsh......hpuusstutalhu.+ssstssWYlGpGussssDhs.Fashh.h..so.tlt.........lpp..shhshN.c.htlGtAh.ltssGsI.Gohht...staLsshlps......t.h................................................................................................................................................................................................................................. 0 0 2 2 +12439 PF12605 CK1gamma_C Casein kinase 1 gamma C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 54 and 99 amino acids in length. The family is found in association with Pfam:PF00069. CK1gamma is a membrane-bound member of the CK1 family. Gain-of-function and loss-of-function experiments show that CK1gamma is both necessary and sufficient to transduce LRP6 signalling in vertebrates and Drosophila cells. 21.20 21.20 21.40 21.30 20.40 20.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.22 0.72 -3.42 17 249 2009-09-25 13:32:50 2009-09-25 14:32:50 3 3 74 6 101 230 0 69.50 50 17.15 CHANGED +...............hsTPsGshps...-sssos......sR-pp.hhpp.....................t..ppN...............................................t.QsloSTNG-LN.sDDPTuGHSNsPI ...................................hsTPVGulp....-sshSs......sR-tHthps+..............................pN...........................................t.t.............sps.QVV.SSTNGELN.sDDPTAGHSNAPI..................... 0 15 25 51 +12440 PF12606 RELT Tumour necrosis factor receptor superfamily member 19 Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 49 and 288 amino acids in length. There are two completely conserved residues (K and Y) that may be functionally important. The members of tumor necrosis factor receptor (TNFR) superfamily have been designated as the "guardians of the immune system" due to their roles in immune cell proliferation, differentiation, activation, and death (apoptosis). The messenger RNA of RELT is especially abundant in hematologic tissues such as spleen, lymph node, and peripheral blood leukocytes as well as in leukemias and lymphomas. RELT is able to activate the NF-kappaB pathway and selectively binds tumor necrosis factor receptor-associated factor 1. 28.90 28.90 29.30 29.20 28.70 28.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.41 0.72 -4.82 17 207 2009-09-25 13:33:46 2009-09-25 14:33:46 3 1 110 0 84 175 0 48.90 35 22.33 CHANGED YhhhllVslFhlhGLLGlhICplLKpKGY+Cos-s.-sp..pccppttc......p .....hhhhllVslFhlhGLLGlhIC...plLK+KGY+Cos-c.Esps.tstp......p................. 0 6 17 30 +12441 PF12607 DUF3772 Protein of unknown function (DUF3772) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00924. 25.00 25.00 28.20 26.80 23.60 22.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.85 0.72 -4.33 69 289 2009-09-25 13:35:58 2009-09-25 14:35:58 3 3 288 0 87 297 69 63.60 29 7.86 CHANGED phActhhscAssLhspIsplhRsphsppLhpRusSPLsPshWssshpshspshpplts.ssphh ............ppApthttpApsLsspIsplRRshhpspLstRusS.LuPsFWsslhpshscDhp+Lpshtsp..h....... 0 12 37 61 +12442 PF12608 DUF3773 Protein of unknown function (DUF3773) Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are approximately 110 amino acids in length. 19.90 19.90 19.90 26.00 18.70 17.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.27 0.72 -3.95 5 53 2009-09-25 13:37:22 2009-09-25 14:37:22 3 1 51 0 6 43 6 101.50 63 75.38 CHANGED h-l++plchlAcshsAcF.................sLcuRploYDEV.FSDTGLLPA...........LsRRADQLsS.LCLGYGlGsoa--AEsALLGV+ssFDEsTP-uLRLhChhDVlsELMpuusuh ..........t.DIpKsMuhIAAuhNAKF.................YLNDRFVSa-EV.FSDTGLLPA...........IA+RADQLCS.LCLGYGLGATaDEAEsALLGlRVVFDEVTPNsLRLLCMTDVlNELIQGGPS.R.. 2 3 4 6 +12443 PF12609 DUF3774 Wound-induced protein Gavin OL lg7 Prosite Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 81 and 97 amino acids in length. The proteins in the family are often annotated as wound-induced proteins however there is little accompanying literature to confirm this. 27.30 27.30 32.80 31.10 21.70 21.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.91 0.72 -3.13 44 190 2009-09-25 13:38:21 2009-09-25 14:38:21 3 2 24 0 125 175 0 75.20 42 81.28 CHANGED lAuSluAVEu..lKDQ...thsRWs.shR.Slpppucsph.tshststt....tsutt............sssttspptcpsEESLRpVMYLS.CWGP ................................VAhSlGAVEA..LKDQh...GlCRW.NaALR.SlpppA+s...ss....tu.h....upup+.....hsuus...............ussspcc+tcpuEEuLRsVMYLS.CWGP.......... 0 6 76 103 +12444 PF12610 SOCS Suppressor of cytokine signalling Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF07525, Pfam:PF00017. The suppressors of cytokine signaling (SOCS) family play important roles in regulating a variety of signal transduction pathways that are involved in immunity, growth and development of organisms. 20.60 20.60 20.70 20.60 19.60 17.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -9.06 0.72 -3.64 17 118 2009-09-25 13:39:11 2009-09-25 14:39:11 3 5 41 0 58 93 0 55.20 45 12.19 CHANGED pspsstp.......+p-Rppussultshstthp.....pt.st+uhusRSLRQ+lQDAVGpChPl+oppppp .......................scssLp.......ppER+ausSSl.....th......Dpssu+phsuRSLRQ+LQDsVG.CFPl+spupp......... 0 5 12 25 +12445 PF12611 DUF3766 Protein of unknown function (DUF3766) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 20 amino acids in length. There is a conserved FTNID sequence motif. There is a single completely conserved residue T that may be functionally important. 21.10 21.10 21.90 21.10 19.00 20.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.42 0.72 -6.76 0.72 -4.42 43 199 2009-09-25 13:40:01 2009-09-25 14:40:01 3 1 191 0 76 173 5 23.90 55 18.69 CHANGED TVITAhDppphc.sNVFTNIDSAVl .TVlTAhDppphK.-NlFTNIDuAVI... 1 40 68 73 +12446 PF12612 TFCD_C Tubulin folding cofactor D C terminal Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 182 and 199 amino acids in length. The family is found in association with Pfam:PF02985. There is a single completely conserved residue R that may be functionally important. Tubulin folding cofactor D does not co-polymerise with microtubules either in vivo or in vitro, but instead modulates microtubule dynamics by sequestering beta-tubulin from GTP-bound alphabeta-heterodimers in microtubules. 21.70 21.70 21.80 23.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.23 0.71 -4.88 48 307 2009-09-25 13:41:32 2009-09-25 14:41:32 3 9 222 0 228 309 4 174.30 25 17.26 CHANGED ssphsppllsslh+QusEKlD+lRtpAspsLpplh.ptst.........................................t.Lpchhs.pp.t.............................................assssphF...splh.pL..Lsls......................happs...............llpGLssSsGu.hoE.ulh+suppALlpalpt........pppppthttlhssllpllp......cptps-Rlsl.PhlchlshLLss.thht.h..pp.t....hhppLhphlppphhp...op.....sht+ .........................................p....hpplhsslhp.usEKlD+hRttAtpshhpll.ptp.s................................h..........l..th.........ttLpp.lhs.stht...............................................asssppsF.stlh.pL...Lt.l..s........................sYpht................lltGLlsSlGu..loE.Sll+top.tuLhpahp............................ppp.pthtthspsllplhp...................pph.ts..-R.......lhl...PhlchlshLlss.shhp.........h.t.t.............h.plhthhppthht...ttsh........................................................ 1 108 145 192 +12447 PF12613 FliC_SP Flagellin structural protein Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF00669, Pfam:PF00700. This family is the bacterial flagellin structural protein. It is involved with cell motility. 25.00 25.00 61.00 61.00 20.50 20.50 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.70 0.72 -3.75 9 66 2009-09-25 14:21:04 2009-09-25 15:21:04 3 3 42 0 11 43 1 57.00 69 15.32 CHANGED suTuGTGTAVs.........................sLoLsouAT....uuhoAAptoAhsNulAQINAVNpPsTVSsLDIST .u.oTAGTGTAso......................TLuLSToAT....SuLSAsDQAsATAMVAQINAVNKPQTVSNLDIST. 0 1 2 8 +12448 PF12614 RRF_GI Ribosome recycling factor Gavin OL lg7 Prosite Family This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 130 amino acids in length. There are two conserved sequence motifs: LPS and LKR. Overproduction of ribosome recycling factor (RRF) reduces tna operon expression and increases the rate of cleavage of TnaC-tRNA(2)(Pro), relieving the growth inhibition associated with plasmid-mediated tnaC overexpression. 24.70 24.70 24.90 38.00 24.30 24.60 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.76 0.71 -4.33 21 130 2009-09-25 14:24:29 2009-09-25 15:24:29 3 1 128 0 22 78 3 124.00 55 96.85 CHANGED hcpsIoIsLPSLIHRIGp-ssKpApslAtphsC-LKRVRRSRNWplsGcAhplQuFtppL+sp..............pspphpaLIpKl-suLhpHuDKLEPLpsKLhRLlppNPsITLAELMptTpCTlsEARsARFsu-s .............p.cpsIsIoLPSLIHRIGu-sVK+hKh.AppacCELKRIRRSRNWQLlGEAtshQpFlhplKpp..............Ehp.shcYLI++lEsuLth.uDKlEPlEspLtRLlpQNPuITLAELMutTcCSLhQARsARFstE.... 0 3 5 15 +12449 PF12615 TraD_N F sex factor protein N terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is typically between 96 and 107 amino acids in length. The family is found in association with Pfam:PF10412. TraD is a cytoplasmic membrane protein with possible DNA binding domains. It is part of the bacterial F sex factor complex. 25.00 25.00 26.00 33.90 24.60 24.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.00 0.72 -3.66 24 278 2009-09-25 14:28:23 2009-09-25 15:28:23 3 7 186 0 20 265 0 98.40 51 15.94 CHANGED hhhhhhhlllsslhhahchstp...shhsuhhYahsp.....hhthl..s.p.hhpltap......spphptohtphLps.ahltss....sphhptlhhuulhuhllshllhhl ....YsLFIhFWILlGLlLWl+ISWQ...TFlNGsIYWWCT.TLEGMRDLI..+.SQPVYEIQYY......GKTaRMNAAQVLHDKYhIWCG....EQLWSAFVLAulVALVICLITFFl........ 0 0 2 12 +12450 PF12616 DUF3775 Protein of unknown function (DUF3775) Gavin OL lg7 Prosite Family This domain family is found in bacteria, and is approximately 80 amino acids in length. There is a single completely conserved residue G that may be functionally important. 21.90 21.90 22.10 27.40 20.90 19.30 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.44 0.72 -4.19 44 175 2009-09-25 15:18:47 2009-09-25 16:18:47 3 1 164 0 55 130 14 75.40 46 51.75 CHANGED hsppElcshIssLs--Eps-LVALhWlGRG..-aps-EascAhppAtppt......ssssucYLlGpPhLuDaLEpG........L-u.LGh .....scpELtuhIssLsEDEph-LlALMWlGRG..DasssEW--AlspApcct........sscsAcYLlGpPhLuDaLE-G....LsA.LG........... 0 14 33 39 +12451 PF12617 LdpA_C Iron-Sulfur binding protein C terminal Gavin OL lg7 Prosite Family This domain family is found in bacteria and eukaryotes, and is typically between 179 and 201 amino acids in length. The family is found in association with Pfam:PF00037. LdpA (light-dependent period) plays a role in controlling the redox state in cyanobacteria to modulate its. circadian clock. LdpA is a protein with Iron-Sulfur cluster-binding motifs. 20.60 20.60 24.10 23.20 19.60 19.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.22 0.71 -5.02 45 96 2009-09-25 15:24:46 2009-09-25 16:24:46 3 8 89 0 42 102 157 186.40 40 51.44 CHANGED sLltphssDAlEIHTpsG+tpsFppLWpslssstspL+hlAlSCst.u.............csLlchLhphapllp..s..............s..hhlWQhDGRPMSGDIGs.GTo+uAlpLup+l.....hstt...PGalQLAGGTNspTlshLcp.sh.........................................pthlAGlAaGuaARpLlpPlLcphcppt.p...............................Lc-hP-hltpAlphApuLVsPh ...............LlpphslDAlEIHTpsG+tptFppLWpplssshspL+hlAlSCst.s..............csllchLtphapllp.s....ls..........t...h.lWQhDGRPMSGDIGp.GTT+sulpLup+l.......hstt.......PGalQLAGGTNsaTlstLcp.sh................................................................pshluGlAaGuaARpllsPlLcphpppttp...............................Lc-aP-hlhpAlphApuLVsPh...................... 0 7 28 39 +12452 PF12618 DUF3776 Protein of unknown function (DUF3776) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. 25.00 25.00 30.90 29.70 20.80 20.80 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.26 0.72 -3.51 10 168 2009-09-25 15:27:22 2009-09-25 16:27:22 3 5 35 0 61 137 0 105.10 32 20.21 CHANGED sssppcDc.shshh.tchscK.-psoS...ucsh.slSppspscsuh.-sEs.lucpcppppGsshQhc+SRuuclTushuoc..hh.s.pssccK-cspthssssscul.S.p.p ................................hssKK-ctstsh.tss-t.K...K.-p.sos.....SEsF...ulutcsl...PKsshspsEs.locpc+pppGsuhQhc+uRhsclTushssc...hh.s.pspc+K--s.pthssh.s.pp..ul.S.p.................................. 0 3 6 15 +12453 PF12619 MCM2_N Mini-chromosome maintenance protein 2 Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 138 and 153 amino acids in length. The family is found in association with Pfam:PF00493. Mini-chromosome maintenance (MCM) proteins are essential for DNA replication. These proteins use ATPase activity to perform this function. 21.30 21.30 21.60 21.60 21.20 21.20 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.58 0.71 -4.24 58 331 2009-11-26 14:18:40 2009-09-25 17:02:30 3 9 280 0 227 319 0 142.50 31 16.94 CHANGED ---.-t...............-..t-l.t-h...p-h-c.t...E-.--G.DLauDshEc.DYcsp......c.DpY-ts.s..lD.D....-t-hc-hshusRRthEtpLscRD+.httt..............h..thah..s--....---Dsphp.............h.....RRRR+pa-c....-.-sh..sht..-........................hp-ElslEsL....sDlKupol .....................................ts.......................p.......h.p...t.t...pE-E-G--Lhu.Ds.h....Ec.DYRshs......-hDpY-sp.s........lD.D....-.-h.-.-lshusRctsEtphpcRDRphupt.......................phh.shhh.....ss.-...........----tp.t...........................pR+R+ph-c..........tp.st.....th-..-..........................h.-..olEsLtDhKupo............................................... 0 76 121 188 +12454 PF12620 DUF3778 Protein of unknown function (DUF3778) Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is typically between 48 and 61 amino acids in length. There is a conserved LRF sequence motif. 22.10 22.10 23.70 22.40 17.60 15.60 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.09 0.72 -4.41 19 53 2009-09-25 16:06:54 2009-09-25 17:06:54 3 4 3 0 34 41 0 51.00 35 17.06 CHANGED st.shhtsuhthtssshhh.sS.p.+sthllRVE...s.......s..LhLLRFNsELRGsh..LL ....................hh.ssh..t.s..hh.sS..shHsthslRVE...........s..LhLLRFNs-L+Gs.hL........ 0 0 0 12 +12455 PF12621 DUF3779 Phosphate metabolism protein Gavin OL lg7 Prosite Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. The family is found in association with Pfam:PF02714. There are two completely conserved residues (W and D) that may be functionally important. This family is likely to be involved in phosphate metabolism however there is little accompanying literature to confirm this. 22.10 22.10 23.20 22.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -10.16 0.72 -4.10 64 338 2009-09-25 16:07:58 2009-09-25 17:07:58 3 13 128 0 268 348 0 94.10 28 9.75 CHANGED FhcPchatsactl+phlPpshth......phs.cht.cpAYhpPul.supsPhlWIPRDs..hGlScpElpcspcs.....lsloD-sAthsE.....K.G+lha.......tspPP...ac ...........................................hpPth..sat.h+thh.t............phssphh.ppAYhpPul.puppPhlWIPRD.s..hGlScpElpcspch............l.loDEsutlDc.....K.s+lha.......sttPP.a.................... 0 57 138 226 +12456 PF12622 NpwBP mRNA biogenesis factor Coggill P pcc Wood V Domain The full-length Wbp11 proteins carry several copies of a PPGPPP motif throughout their length. This motif is thought to be necessary for folding of the molecule as it helps to bind the WW domain, Wbp11, Pfam:PF09429 [1]. This domain together with Wbp11 may function as components of an mRNA factory in the nucleus. 25.00 25.00 27.30 26.20 23.70 22.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.80 0.72 -3.48 27 69 2009-10-16 11:54:12 2009-10-15 17:08:00 2 4 67 0 48 62 2 63.90 30 21.31 CHANGED sp+SlYYcPphNPhGtsPsG..........hP.hh+shtt......sp.sp...............hsp............................lshPp ..sp+SIYYcPshNPhGssPsu..........hP.hh+stpt.t.....sp.ss.........................................hpp............................I.hP..................................................................................................... 0 22 32 45 +12457 PF12623 Hen1_L RNA repair, ligase-Pnkp-associating, region of Hen1 Coggill P pcc Gardner P Domain This domain is the N-terminal region of the bacterial Hen1 protein. This protein forms stable hetero-tetramer with Pnkp. The hetero-tetramer was able to repair transfer RNAs cleaved by ribotoxins in vitro [1]. This domain provides the ligase activity of the hetero-tetramer. 20.70 20.70 20.80 27.40 20.40 20.50 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.57 0.70 -5.60 27 120 2009-10-15 16:15:24 2009-10-15 17:15:24 2 5 118 0 59 128 2 241.90 54 50.93 CHANGED MLLTloTT.....cpP.ATDLGaLLHKHP-RlQoFshuhGpAHVFYPEAotcRCTsALLL-VDPlsLV...Rsp+ut...su.s.huLuQYVNDRPYAASShLuVALucVFpTAhsG+CcsRPELAusslPLclclssLPs.R..GGtpLlc+LFEPLGWp.VsApslsLDpsaPpWGcS+YlsLsLpG.slRLu-hLsHLYVLlPVLDssKHYWVusDEVDKLlRtGcGWLusHP-+-LIsRRYLt+ppsLscpA.....LsRLs-s ...........................MhLTloTT.....ppP.A....TDLGaLLHKHP-+sQsFshuaGpAHVFYPEAos-RCTAALLL-VDPlsLV......Rspctt........ssss..suLuQYVNDRPYAASShLuVAlupVFpoAhsGcCcsR.PELAspslPLclclPsLPs.R.........G..G.....spll..c+LFpPLGWs.VsupslsLDtpF........Pp...WGcSRYlpLsLpG.plRLuDALpHLYVLLPVLDcsKHYWVusDElDKLLRtG-GWLssHPE+cLITcRYLp++tuLscpAhpRL...h......... 0 22 46 54 +12458 PF12624 Chorein_N N-terminal region of Chorein, a TM vesicle-mediated sorter Coggill P pcc Pfam-B_PB000002 (release 24.0) Family Although mutations in the full-length vacuolar protein sorting 13A (VPS13A) protein in vertebrates lead to the disease of chorea-acanthocytosis, the exact function of any of the regions within the protein is not yet known. This region is the proposed leucine zipper at the N-terminus. The full-length protein is a transmembrane protein with a presumed role in vesicle-mediated sorting and intracellular protein transport. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.32 0.71 -4.56 121 1194 2009-10-16 09:35:15 2009-10-16 10:35:15 2 56 295 0 818 1223 15 109.70 25 4.37 CHANGED hE...ulls...plLspaLGp....Yl..cslssppL.plu.lasG..cVpLcNLcLKp-....uLcpL..pLP..lplptGhlGc..Ls.lplPWps.L..tsc.PVhlpl-slal.lssPps....tpchst-c.ppppptth....KtpplpphE .......................................thlt.lLppaLup...al..c..s..l....s......cpL.pl.u....l.h....p...G......sl.pLpNLpL+.....pc.........................s....L.p...........p...h........cLP.............lplppGhl..s..c..........ls..lp..............l.P.Ws..p..L..............hsc...Plh..lplcslhl.lhtstt.........t.p.p.t........................................................................ 1 329 455 664 +12459 PF12625 Arabinose_bd Arabinose-binding domain of AraC transcription regulator, N-term Coggill P pcc Pfam-B_PB000001 (release 24.0) Domain AraC is a bacterial transcriptional regulatory protein with a DNA-binding domain at the C-terminus, HTH_AraC, Pfam:PF00165, and this dimerisation domain which harbours the arabinose-binding pocket at the N-terminus. AraC positively and negatively regulates expression of the proteins required for the uptake and catabolism of the sugar L-arabinose 1,2,3]. 22.60 22.60 22.80 23.40 22.40 22.50 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.12 0.71 -4.47 221 2712 2009-10-16 10:27:52 2009-10-16 11:27:52 2 8 847 0 796 2533 293 180.40 18 52.86 CHANGED Gh-spslLpps..Gl..ss...sh..L..ps..thp.lshpphtpLhptu.hptssc.sslGLchu.pphphsshGhluhshhsusTLtpAlpphh+ahtlh...hhshph......ttttt....stlth.t....................pt.......h-hhhushhphh..ph..lh...sp.hs.hplphp..assP...s.t....ap..phF.s.ss.lpFststs.s.lhhssph..Ls.tPl ..............................................................GhssttlLtts..Gl.....s......t....l..t.....p.....s...psp...lsh.tp.hhplhpth...hp.hh....s....c....ss..h....G..l.p.....hu.p.p.h..ph.s.sh.G.h.luhsh.h.s.us...oLtpAlpthh.cah....plh..ssh.hphpl..........................ptpssh..stlph..p.tt..............h..............pth...h...h-.h.h.lsshhphh.........ph.lh.....upp...ht....h..plphs.......h..stPs.....thp......tYp.phF...s....ss.lpF..s.psts...t.....lhh..stph..Lshs................................................................... 0 161 340 592 +12460 PF12626 PolyA_pol_arg_C Polymerase A arginine-rich C-terminus Coggill P pcc Pfam-B_105 (release 24.0) Domain The C-terminus of polymerase A in E coli is arginine-rich and is necessary for full functioning of the enzyme. 22.00 22.00 24.00 23.10 21.80 20.80 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.63 0.71 -4.49 70 1295 2009-10-19 10:32:25 2009-10-19 11:32:25 2 4 1278 7 247 869 313 125.80 44 27.24 CHANGED pppG..hsshsAhppAsscllspQspphAIP+RFohshREIWpLQ.RLs..+RpG+RshpLlpHPRFRAAYDFLlLRspu.G-......phtpLupWWscaQpsssp..p+pphlpphstpttt.........p+RRRRs+++ps ......................csGLs.aDAh.shAhN-VLD-ts+s..lA.IP+RhTshhRDIWpLQhRhs..RRp...G............KRA.a..+LlEHPKFRAAYDhLtLRA-l.tss.......-hpcLupW....Ws-FQsusss...pp.csMl.splspcss.s..........................+RRpRRsR+p..s............................................. 0 50 124 193 +12461 PF12627 PolyA_pol_RNAbd Probable RNA and SrmB- binding site of polymerase A Coggill P pcc Pfam-B_105 (release 24.0) Domain This region encompasses much of the RNA and SrmB binding motifs on polymerase A. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.53 0.72 -4.40 312 6138 2009-10-19 12:08:10 2009-10-19 13:08:10 2 37 4516 22 1489 4542 2033 63.10 27 14.21 CHANGED sFpI-tcTtpuIpchus..hLpplusERlhcElhKllhus.psptshchLhctGLh.phlh..Ppl.sth .......................sFplss-Thp.sh..p..p...h...ss.......hL.p...p....l.o.....s.....E....R..lhpEhpK.l..L.h.........us..psptshphL.p.c.hslh.phl.h...Ppl....hh.................. 0 481 941 1254 +12462 PF12628 Inhibitor_I71 Falstatin, cysteine peptidase inhibitor Coggill P pcc MEROPS_I71 Family This family of peptidase inhibitors is expressed from plasmodial protozoal species. Falstatin is found to be a potent reversible inhibitor of the P. falciparum cysteine proteases falcipain-2 and falcipain-3, as well as other parasite- and non-parasite-derived cysteine proteases, but is only a relatively weak inhibitor of the P. falciparum cysteine proteases falcipain-1 and dipeptidyl aminopeptidase 1. Thus, P. falciparum requires expression of falstatin to limit proteolysis by certain host or parasite cysteine proteases during erythrocyte invasion. 22.10 22.10 23.20 63.40 21.50 18.30 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.23 0.71 -4.60 4 9 2009-10-19 12:24:47 2009-10-19 13:24:47 2 2 8 1 6 11 3 162.40 42 31.47 CHANGED DppYpLsGsEpCDs.lKLGsIlNpTNpcTIshSLoVscshCIshEusuGsGYlWsLLGVHKpcPhINPEpFPpKhlpKsaFSpEISVTQPKthphsp.sspKNsspsspsuuQNpssos+P.KPc...pllGGss.lpSlIKuHKsGKYalVYSYYRPFsPTusANTKIlpLTVQ .........spp.tLpssE.CDp.lKLGsIlNpsNpcTIphshsVsplLCIsLEu.sGsGalWsLLGVHKccPhIsPEpFPpKhlpcSaFSp-ISVTpPht.hph..sp.sspcssspptpssspNpssp.+P.psc...pllGGsshlpShIKsHKsGcYaIVYSYYRPFsPTtssNT+IlpLsVp.................................................................... 0 1 2 5 +12463 PF12629 Pox_polyA_pol_C Poxvirus poly(A) polymerase C-terminal domain Bateman A agb Structure Domain This domain is found at the C-terminus of the pox virus PolyA polymerase protein [1]. 27.00 27.00 48.10 48.00 20.80 20.80 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.33 0.71 -4.59 13 66 2009-10-19 14:10:58 2009-10-19 15:10:58 2 2 44 9 0 62 0 197.10 65 42.22 CHANGED LLsMlKMFSQIDRLEDLscs.EKhplRhuTLLEYsR...hcauIhhsGc...psphshpssh....shspRllTVDs+pY..shsacKChlYLDEss....................LspcIhchs..uD-u.lDFEsV......oNSsaLIcsssh...................YTYFSN....TlLhpscsclH-ISs+uloAHIL...lYplLT+sshtp.sLuDllNSLlshE...KhPlaplIPRDKKsG+HGIIDIEKDIIsH .LLNMIKMFSQIDRLEDLoKDPEKFsARMAThLEYVR...YTHGIlFDGc...+NNMPMKClI........DcssRIVTVsTKcY...FSFKKCLVYLDENV....................LSSDILDLN..ADousDF.ESV......TNSVYLIHDNIM...................YTYFSN....TILLSDKGKVHEISARGLCAHIL...LYQMLTuG-Y+Q.sLSDLLNShMsR-...KIPIYShsERDKKsGRHGhINIEKDIIl.. 0 0 0 0 +12464 PF12630 Pox_polyA_pol_N Poxvirus poly(A) polymerase N-terminal domain Bateman A agb Structure Domain This domain is found at the N-terminus of the pox virus Poly(A) polymerase protein [1]. According to SCOP this domain contains a helix-hairpin-helix motif. 27.00 27.00 38.20 85.80 24.10 23.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.44 0.72 -3.95 13 66 2009-10-19 14:14:05 2009-10-19 15:14:05 2 2 44 9 0 61 0 112.10 65 24.00 CHANGED MNp........p..hpllcpYLGR.PShsEYahLKpQh+sIp+IhtFNKDlFlu...LlKKNK++FFoDlc..sSsuEIKcRlhpYFoKQcpsp.plG+LhoIIELQolLVooaTclL........GVLTs ........ohsN....IslcIIEsYLGRlPSlNEYHMLKLQsRNIQKIslFNKDIFlS...LVKKNKKRFFSDlD..TSuSEIK-RILSYFSKQTQTY.sIGKLFTIIELQSVLVTTYTDILGVLTI............ 0 0 0 0 +12465 PF12631 GTPase_Cys_C Catalytic cysteine-containing C-terminus of GTPase, MnmE Coggill P pcc Pfam-B_102 (release 24.0) Family This short C-terminal region contains the only cysteine present in these proteins. It is proposed that MnmE is a tRNA-modifying enzyme and that Cys-451 functions as a catalytic residue in the modification reaction. 22.00 22.00 22.00 23.30 21.10 21.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.45 0.72 -3.57 534 4286 2009-10-19 15:56:53 2009-10-19 16:56:53 2 10 4170 8 1009 3196 2082 72.50 33 15.93 CHANGED ttsspsshlsptRHhpsLp.pAhptLp.ps...........hpslp.......tt.....hs...............h-l...hu.-LRtAhcsLucIT..G.c...hs.s.-.-lLspIFSpFC ..............................................................s...t-sshluppRHlptLc.pAtppLppu.............tptlp......ts......hs....................................................................h-L..lu.-L+hAhptLuEIT...............G.-....hs.sD.-LLspIFSpFC............... 0 338 634 843 +12466 PF12632 Vezatin Mysoin-binding motif of peroxisomes Coggill P pcc Wood V Family Vezatin is a peroxisome transmembrane receptor that is involved in membrane-membrane and cell-cell adhesions. In the movement of peroxisomes it binds to class V [2] and class VIIa [3] myosins to guide the organelle through the microtubules [2] and allow pathogens to internalise themselves into host cells [1]. Vezatin is crucial for spermatozoan production [3]. In mouse cells it interacts with the cadherin-catenin complex bridging it to the C-terminal FERM domain of myosin VIIA [4]. 22.30 22.30 23.80 23.90 21.00 20.70 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.72 0.70 -5.30 32 191 2009-10-20 13:22:29 2009-10-20 11:54:41 2 3 153 0 121 198 1 252.80 27 36.68 CHANGED sshhhlshLlphsphttsshh....h.......hhhlllhlhhslhththhc.....lphh+ppsltplpshlssspshsshhppslhhl..p..El-lhSpuhp.......................t.tpptpspph.tL+ctLppsls.hh.phpputpplh.hhs.........sssLpcYhslYsl...............s..sLt.........................h.p...................pptc-........................t.olptL+hhht.....+hphlRKhhLCpLLolpt......ts.stsshh........................................................capslhptlpsLspslsph .......................................s...hhlshLlth.sp..hhs..s.sh.hhth......................lllhlhllhhulhhatht+........LphhhpphhsplpshlssupsFsshsppultLI..Q..EsEllSRGap...................uss..shut.....pp.sp.+hltLR+slhpslpthhpshp...uphhhhhhhs...............ssslppYhslh.sl................pphsLs.....................u.pt...hs-...................ppscs.............t.SL.sL+.hLht.....p.chhR+hhL..LLuhss.........sG...sshh...............................................................................chsshhptl+sLp.hhp......................................................................................................... 0 25 53 91 +12467 PF12633 Adenyl_cycl_N Adenylate cyclase NT domain Finn RD, Bateman A agb Ref [1] Domain \N 21.60 21.60 21.60 21.90 21.30 21.50 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.30 0.71 -4.79 18 1003 2012-10-02 22:47:23 2009-10-20 16:23:27 2 7 911 0 113 583 22 184.90 65 24.20 CHANGED Mt.clpph+p+lDtLsplRl-RALuuhssphpcVFpLlPLLLHhNHPtLPGYls.ssPpGIspFplo-hQppaLssh........................hphp.....t......hpsspssIhGlYuMGSTuSIuQospSDLDlWVCasspLosc-hpLLppKspLlppWAcpasVElNFaLhcpp+FRpppsus......hstEsCGSuQHhLLLDEFYRSAlRLAGK.LLWh ..........................................................................................M.hYlpTL+QRLDulNQhRl-RALAuMussFQpVasLLPsLLHapH..PLhPGYl-..G.....s.VPp..G.IshasPs-p.QppaLs-L........................t.t.h.s......t......spuchPIsGlYoMGSTSSlG.QSsS.SDLDIWV.CH.Q.S.WL.DuEE.RQLLQRKCSLLE.sWA.A.S.LG..VEV.SFFLIDENRFRHNESGS......LGGEDCGSTQHILLLDEFYRTAVRLAGKRILW.s................................. 0 16 40 81 +12468 PF12634 Inp1 Inheritance of peroxisomes protein 1 Coggill P pcc manual Family Inp1 is a family of peripheral membrane proteins of peroxisomes. Inp1p binds Pex25p, Pex30p, and Vps1p, all of which are involved in controlling peroxisome division. The levels of Inp1p vary with the cell cycle, and Inp1 acts as a factor that retains peroxisomes in cells and controls peroxisome division [1]. Inp1p promotes the retention of peroxisomes in mother cells and buds of budding yeast by attaching peroxisomes to as-yet-unidentified cortical structures [2]. 20.70 20.70 21.80 20.80 19.20 18.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.87 0.71 -4.52 28 115 2009-10-20 15:46:51 2009-10-20 16:46:51 2 2 114 0 83 108 0 140.90 28 25.09 CHANGED stspcsoLFpasssKIl...........t......................h...hspspo.shpppsppllupGshcIaplpsu...........................pssaLssG...shVaPlL.P+hplhpl...ptpsspFlL.lhsPpp.YW+IElss.....p..--tpllcphcpllsplspYps ........................................................................................sLatasss+ll............php.....t.....u.pss.........hp.s.s..p.pshshpppspphlAhGslcIaplssu............................................ssFLssG...sllaPlL.PKsQsapl.....stpuspFllplhpPp..Ya+IElsst.......sp.E-pphlcphcpVlspllpac.............. 1 12 39 68 +12469 PF12635 DUF3780 Protein of unknown function (DUF3780) Bateman A agb Bateman A Family This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria. Proteins in this family are typically between 189 and 206 amino acids in length. There are two conserved sequence motifs: PEERWWL and GWR. This family is found in a very sporadic set of bacterial species, suggesting that it may have been horizontally transferred. One protein is annotated as plasmid borne. 21.70 21.70 22.30 22.20 20.20 19.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.45 0.71 -4.78 12 34 2009-10-29 09:18:49 2009-10-29 09:18:49 2 2 34 0 15 37 6 187.40 38 93.73 CHANGED Msp................ptpshGFGhsss.ssHpFhV.IPsu+spsVhlhEpauhpuGpsup.................ss...t.RspLs+ptWptlucslcpcFNpRL+cpphpsuRW.KsGcN.V-.RLLGKELsVLsWAlE.sAss-plPsAlpNWpuL+PEERWWLashTsAtsGtspcst.pGWR+AlRhALs-sP................t...h.s+p.h.............spscppphshhcp ................tttslGFGhsssps.HHFhV.IPp.upss..sVhlhEpashpssppsp..............................................ps..h.+shls+.pWptItstlppcFNtRL+pcshpsu+W..KsGpN...sVc.RLLGKELsVLsWAlE..cs.sscplPlAl+NW.uL+PEERWWLasMTsAuTGts.pDpt.+GWRhALRaALs-sP.................sp....p................p..................................... 1 8 10 13 +12470 PF12636 DUF3781 Protein of unknown function (DUF3781) Bateman A agb Jackhmmer:Q17ZV8 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 82 and 98 amino acids in length. There are two conserved sequence motifs: GKNWY and ITA. 27.00 27.00 28.10 32.00 24.10 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.26 0.72 -4.02 19 84 2009-10-30 16:24:37 2009-10-30 16:24:37 2 6 80 0 15 75 3 72.10 47 72.53 CHANGED LLpNl-.+lHTT-LGh.RI++NLsLcss-V.l-...aCKpKIhs.ssAhIp++GKNWYsps-s..shlTlNAaSYTIITAH .....LLpNl-.+LHTT-LGhhRI++NLuL..ssp-V.lp...aCKpKIhs.ssupIpR+GKNWYlps-s..hhITlNAhSYTlITAH....... 0 10 14 14 +12471 PF12637 TSCPD TSCPD domain Bateman A agb Jackhmmer:Q17ZZ5 Domain This family of proteins is found in bacteria, archaea and viruses. The domain is found in isolation in many proteins where it has a conserved C-terminal motif TSCPD after which the domain is named. Most copies of the domain possess 4 conserved cysteines that may be part of an Iron-sulfur cluster. This domain is found at the C-terminus of some ribonucleoside-diphosphate reductase enzymes. 21.60 21.60 21.60 22.60 21.50 21.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.07 0.72 -4.04 149 1269 2009-10-30 17:23:21 2009-10-30 16:52:16 2 33 1088 0 416 1079 1406 91.80 27 13.21 CHANGED acsps.sC....shhlslsh-.t.s...........lhtslshsGGC.suphpulu+Ll.......pGhs...lc-....llcpLcGIpCsspss.....................SCPDtlucALcphh .................................s..sp...phYlshsth...t..c...G........hElFl.s.htptG....us.tuhhpuhuthlShsLp.....hGss......l-p....hlcphpslchtssGhh.t............t.h.SlhDhlh+tLt...h..................... 0 186 331 383 +12472 PF12638 Staygreen Staygreen protein Bateman A agb Jackhmmer:Q181L4 Domain This family of proteins have been implicated in chlorophyll degradation [1,2]. Intriguingly members of this family are also found in non-photosynthetic bacteria. 27.00 27.00 27.60 27.50 23.10 19.60 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.96 0.71 -4.67 21 155 2009-11-02 14:20:47 2009-11-02 14:20:47 2 3 98 0 61 150 2 143.00 41 65.43 CHANGED spFsssKLpVpFhsstspstPh.....hPRpYTLTHsDhTucLhLsIupshshs.pl....sch.RDEVlAEW.pc.psp..hsL+VasaVS.GuphhhshuAp.RahIFp+ELPLlLcAlhaGDpsLFppaPcL.sA.VaVaFcSs..hP.....pa............s+hEsWGslp-hu .........s.hFpssKLpV.Fhs..sspp..ppPh................hsRpYTLTHSDhTucLhLsIupshshs.pl.....sphtRDEVluEW.+csp.......sp........hsL+Vasalu.GsphhhchssthRahIFp+ELPlsLcAlhaGDpshFspaP-Ltpu.laVaFpSs..hs.ca...................N+lEsWGslp-h.s................... 0 18 48 54 +12473 PF12639 Colicin-DNase Colicin-DNAse; DNase/tRNase domain of colicin-like bacteriocin Coggill P pcc pdb_1bxi Domain Colicin-like bacteriocins are complex structures with an N-terminal beta-barrel translocation domain (Pfam:PF09000), a long double-alpha-helical receptor-binding domain (Pfam:PF11570) and this C-terminal RNAse/DNase domain with endonuclease activity. Their competitor bacteriocidal action is by a process that involves binding to a surface receptor, entering the cell, and, finally, killing it. The lethal action of colicin E3 is a specific cleavage in the ribosomal decoding A site. The crystal structure of colicin E3 reveals a Y-shaped molecule with the receptor binding domain forming a 100 Angstrom long stalk and the two globular heads of the translocation domain and this catalytic domain comprising the two arms [2]. 21.50 21.50 21.60 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.87 0.72 -3.81 57 525 2012-10-05 18:28:12 2009-11-02 18:40:55 2 27 407 56 43 461 2 108.50 29 25.68 CHANGED sshspsG................a.ths...sthss.ls.phtcpLp.....ucchspFcphpcthhpsl...............tpsspltppFstpphpth.psthsP...............ppaphHHppp..tG.......shpllsschHtp.....hHpG ...............................................................................................................................................................................................................c..pphsphRKphhcsV...............ucss-huup.....F...s....s.....c....s....lt..ph.+p...GpsP...........................................................hsYslHH+hslpsGGss.ch-NlhLlpsc.Hcc........................ 0 5 14 22 +12474 PF12640 UPF0489 UPF0489 domain Bateman A agb Jackhmmer:Q181H4 Domain This family is probably an enzyme which is related to the Arginase family. 27.00 27.00 28.50 27.80 25.00 23.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.29 0.71 -4.04 34 190 2012-10-01 22:40:15 2009-11-03 13:56:57 2 4 158 0 102 176 2 174.40 25 49.67 CHANGED lall-sHccshhhWhctlppt.h...pshpllHlDpHsDhhhsh...................phshphshpt......t......phshctlsh.-saI..hsAlhsuhlsclhhlppshs................................................hh.t...h.....t....hpphptht.htshshpp.hhpt..sh...........................................................................p.ppsalLDIDLDaF ...................lalV-sHpcsl..........ahhcsltppcls..pshph..lHhDuHsDhhlPh......................................................shssssshsc...................cthhspLs.I..-NaI....hPulauGah.s.clhalcssau...............................................p..hthst...hhhspt.h.s........ttp.......hpp.h.hp.hps....h.hps......t................................................................................................................................................................................................................................s..psalLDlDLDaF.............................................................. 0 38 53 78 +12475 PF12641 Flavodoxin_3 Flavodoxin domain Bateman A agb Jackhmmer:Q180M7 Domain This family represents a flavodoxin domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.82 0.71 -4.81 44 396 2012-10-03 05:08:30 2009-11-03 16:27:47 2 6 311 0 78 1420 207 144.70 24 88.08 CHANGED hllYSShTGNT+plAcuItpslss..pshhshpcsps.....hsph..DllhlGFWsDKGssspchtcalps.L+sK+lhlFGThGhsssscahpphlppspphlsps.NpllGpFhCQGKhs.plpc+acph....tpstpptthsphlppa-pAhsHPDpsDlppucphsc ..........................llY.s.S.h.o..GNT+plAcsI....tctlss...........tph.h.....s.h..p..ps.ss.....................hpsa....D.h.l.hlG...h......W......s.........D..c..G......p......s......s...t..c.........h......p....c...a....l.......p.....p.....l......c.....s.....K....p....l.....h..lFu...T..h..G....s..t........s..p..p..a.h...p..p...h...h.p.p.h....t....p.h..hs...pt...sphh...s...th...hs..p......G...t.h.s..t........h.thh.t.......................t.h..s..cP.s..ch..h.....t......................................................................................................... 0 30 61 71 +12476 PF12642 TpcC Conjugative transposon protein TcpC Bateman A agb Jackhmmer:Q180I0 Family This family of proteins are annotated as conjugative transposon protein TcpC. The transfer clostridial plasmid (tcp) locus is part of some conjugative antibiotic resistance and virulence plasmids. TcpC was one of five genes whose products had low-level sequence identity to Tn916 proteins, having similarity to ORF13 homologues from Tn916, Tn5397, and CW459tet [1].\ This family of proteins is found in bacteria. Proteins in this family are typically between 302 and 351 amino acids in length. 27.00 27.00 28.00 27.80 26.90 26.10 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.47 0.70 -4.81 47 781 2009-11-04 11:35:59 2009-11-04 11:35:59 2 2 512 6 62 457 7 223.70 29 72.09 CHANGED saspsFsppYho....tspcsh-pRtppLp.tYlspphp..ststhpt.......sppltssplhslcpp.........cpthhlphpVphphs..............ptcptpp....................pthsVPVth....pssshslsu.Pshss.hsppushpspttpscsshst.tt......pclpcFLpsFFchYssuspp-lshhhpsss......slsssh...th........tclsssphhpppsshpsslt............Vpah-pt.....optphsppasLpLp.cp....sspahlpc ..............................................................................FscsFscsYao..hppspcsh-pRhppLp.tYLsp-hpshs.cts+pt......suslpshplhslcpp..............cspaslpapVc.pls.......................pscptpslp............................ssapVslhh.........tssshlllp.Pshss..hPp.p..u..s.....hpsK.th.-scsss-stss.........pclspFLpsFFchYsoust..p-.LuYasssshh.....slstph....ha...........pclsss.h..hhcc.....s.sps.hlsls............VpahDpp...............oc.sTpsp.pasLsLp..Kp.....sssWhIh................... 0 30 44 53 +12477 PF12643 MazG-like MazG-like family Bateman A agb Jackhmmer:Q181R3 Domain This family of short proteins are distantly related to the MazG enzyme. This suggests that these proteins are enzymes that catalyse a related reaction. 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.18 0.72 -4.06 25 305 2012-10-01 21:36:44 2009-11-04 14:23:11 2 3 298 0 61 203 5 93.40 35 86.29 CHANGED Ih+NlKhIEtLKuELLs......pluclF+hLs+Gup...p-uIl-sluslIllsYlLucRLGhsapclDcslccKL+luIhEccc.lEK.ht-LScLtp+L......cc+c ..........................................tRNh+.hcs.Ks.hlu......-LhEL...Fphhosp.p......s...tp......t..E..cIt-ELADllIYsYhlADpLGhD....lDEhlccKLccs.t.h.chs...Ec.................................. 0 25 45 57 +12478 PF12644 DUF3782 Protein of unknown function (DUF3782) Bateman A agb Bateman A Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 91 and 186 amino acids in length. 21.60 21.60 21.70 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.06 0.72 -4.18 54 159 2009-11-04 17:31:12 2009-11-04 17:31:12 2 7 87 0 50 145 10 79.80 20 52.38 CHANGED pchtchpc...........................................chtphpcclpph................................................................hsccphchlp........................chhstlstthshtsEpsa+pGhp-slphhtt ....................................................................................................................................................................th.c...............................phhthpccLpph...................................................................hocEs....hchlp..............................phhstlutths..sEtta+.Ghpthlp.......................................................................................... 0 17 23 28 +12479 PF12645 HTH_16 Helix-turn-helix domain Bateman A agb Jackhmmer:Q180H2 Domain This domain appears to be a helix-turn-helix domain suggesting that this might be a transcriptional regulatory protein. Some members of this family are annotated as conjugative transposon domains. 21.60 21.60 21.60 21.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.88 0.72 -4.04 52 618 2012-10-04 14:01:12 2009-11-05 10:29:19 2 3 295 0 35 325 44 61.90 36 77.88 CHANGED aplIht.AspGDspAlpplLpHYcuYIsphsh+.hhc.chGpshhtlD--l+pclct+Llps.llpFc ........a.lIhp.ApcGDspAlpplLp+YcuYIs+hshR.hhs.-hGphphhVDEph+pclcp+LIpt.ILpFc........................ 0 17 28 28 +12480 PF12646 DUF3783 Domain of unknown function (DUF3783) Bateman A agb Jackhmmer:Q180F4 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length. 27.00 27.00 30.80 30.00 25.30 22.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.59 0.72 -4.22 69 284 2009-11-05 10:33:20 2009-11-05 10:33:20 2 1 262 0 72 251 7 58.20 26 39.88 CHANGED pchllhssh.ssp..clcphlpth+ct....th..phslhAslT.sNhpWshppLh.cEltcE+chhp ...chllhssh.ssp...clpthlpsh+ct....tl...phslhAslT.sshsWshppLl.cElhcE+chhp... 0 35 57 68 +12481 PF12647 RNHCP RNHCP domain Bateman A agb Jackhmmer: Domain This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 143 amino acids in length. There is a conserved RNHCP sequence motif. 24.00 24.00 25.00 37.20 23.20 23.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.97 0.72 -10.73 0.72 -4.16 27 115 2009-11-05 10:59:04 2009-11-05 10:59:04 2 2 95 0 46 118 8 87.30 46 61.11 CHANGED spsssFpChtCGhtVsstusGos+RNHCPsCLpShHlD.phPGDRAus.CtGhMcPlulhsRpsG-WsllHRCppCGcLspN.....RluuDDN.hhLhp ..s.tpsFhChtCGh.V.s.usGotaRNHCPpCLhShHVD...PGDRuus.CtGhM-PlulhV+psG-WhllHRCppCGpLssN......RlAuDDN.hhLh................. 0 20 36 44 +12482 PF12648 TcpE TcpE family Bateman A agb Jackhmmer:Q180I5 Domain This family of proteins includes TcpE a conjugative transposon membrane protein.This family of proteins is found in bacteria. Proteins in this family are typically between 122 and 168 amino acids in length. 22.20 22.20 22.90 22.50 21.90 21.70 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.39 0.72 -3.90 31 672 2009-11-05 14:27:45 2009-11-05 14:27:45 2 2 429 0 44 272 10 98.40 39 70.53 CHANGED sYsphaphphhlYpItc.hpLPh.s..lshpph....shFllhhlslhlhhtlhs.........htpshhhhhh.hlPhhlshhhsphch-GKphhtalhshlpahhch+hpppthhht .............sYsphaph.hVlYtIsD..hpLPh.s......lshsph....saFll..h.hhl.hlh.hpl.s.s............hhpGsh.L.pYh.slPsslTaahopKpFDGKKsasFL+uhls.Yhhc.Klshsst...p........ 0 23 33 38 +12484 PF12650 DUF3784 Domain of unknown function (DUF3784) Bateman A agb Jackhmmer:Q180M1 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 96 and 110 amino acids in length. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.35 0.72 -4.05 48 316 2012-10-01 21:33:42 2009-11-06 14:28:11 2 2 238 0 39 256 75 92.90 24 80.06 CHANGED hhlull....hhllulh..lhpsKushLloGaNohsccE+cchDpppls+hhGphhhhhul..lhllsul....hthhhsph........hslhhhllhllhhhhhlhhsschp ...................hlull..hhhluhh.......ltst+tshLluGaN....cE+chhDcp+Ls..+hhGhhhh.lhul..lhhltul....hthhhsph............hhhhhh.hlhllshllhhhhss...h.......................... 0 18 31 33 +12485 PF12651 RHH_3 Ribbon-helix-helix domain Bateman A agb Jackhmmer:Q17ZT4 Domain This short bacterial protein contains a ribbon-helix-helix domain that is likely to be DNA-binding. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.70 0.72 -4.23 28 354 2012-10-02 18:44:02 2009-11-06 14:36:52 2 4 316 0 62 379 17 42.50 33 41.58 CHANGED pRpphosolsp-Lhp+LcpLSccTpIPhS+LlDEAl-hLLc+Yc ..........hpphohplspElhp+LDslScsp.u.l.sKScllcEAlptaLpph........... 0 25 39 51 +12486 PF12652 CotJB CotJB protein Bateman A agb Jackhmmer:Q181Y6 Domain CotJ is a sigma E-controlled operon involved in the spore coat of Bacillus subtilis [1]. This protein has been identified as a spore coat protein [2]. 29.00 29.00 29.00 31.20 28.80 28.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.67 0.72 -3.90 71 380 2009-11-09 12:44:13 2009-11-09 12:44:13 2 2 344 0 68 260 3 77.60 41 85.47 CHANGED cppLLppIptlsFull-lsLYLDTHPsDppAlp.apphspptpphhcpYpppaGPLs.thsss..st.............p.W.sWlpsPWP.....W- ...h.hpLLcplppl-FsllELsLYLDTHPcDppAlppaNphupppppLpppaEppYGPLppaGs.u...s.p.............ssW.pWscsPWPWp....... 0 36 54 59 +12487 PF12653 DUF3785 Protein of unknown function (DUF3785) Bateman A agb Jackhmmer:Q181X2 Domain This family of proteins is functionally uncharacterised.This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. These proteins share two CXXC motifs suggesting these are zinc binding proteins. This protein is found in clostridia in an operon with three signalling proteins, suggesting this protein may be a DNA-binding transcription regulator downstream of an as yet unknown signalling pathway (Bateman A pers obs). 27.00 27.00 186.90 186.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.89 0.71 -4.32 8 29 2009-11-09 15:44:53 2009-11-09 15:44:53 2 1 29 0 4 16 0 136.60 56 100.00 CHANGED M-.YKFsYD-KEYlLsc-NCsshFND...EpcElcGlSl-cILchLspuEEVsFupEYYp-sCs.ChsGhEEKpKhFsFLEYHFYIYTK-sKYVISsIsKEY-spSFNKLhRAsKVDcSYIVSllVCtNCGsYsIpIEpCpV ..h..aKFsaD-KEYhLsE-ph.hhFN-...t.t-VcGhsI-Klh-ILNpuEtVsFuptYYpssC..ChtGlEEKKK.FPFLEaaFaIYoKsGcaVISNIpK-YcGLSaNKLhRspKVDcSYlVslsVCcNCGsasVplEphpV. 0 2 4 4 +12488 PF12654 DUF3786 Domain of unknown function (DUF3786) Assefa S, Coggill P, Bateman A agb Pfam-B_16102 (release 23.0) Domain This presumed domain is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 201 and 257 amino acids in length. Some proteins also contains an iron-sulfur cluster. 27.00 27.00 27.70 50.20 22.80 20.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.88 0.71 -5.01 53 182 2009-11-09 17:15:17 2009-11-09 17:15:17 2 2 71 0 79 186 21 177.60 23 80.83 CHANGED hDhpplupphGs.h.t.ptpt...lpl.hhGpsaplshsshthsstcsssh..........hpllllcYLhpus.sts.ss.cWlsa+-lssGt.ah..ssFppc.s.psLschassp.hcphppssctLGGp.hstu......DhuhhhpslP+lPlhllhWpu..D--FPupuslLFDpssspaL.ssEslhsluthlsphL ....................h.s.tplupphusth...p.pt...htlpahuppatlshssspl...hstpssth...................................hplllLpYLhpup..sh...s..ss.caloa+El.s.s.G.thah..ssFppp.shpsLtchFusp.hcth.ppsspp...LGGp.hshu......Dhuh..hhpshPclPltllhWpu..D-EFPususlLFDssssphL.ssEslhslushlsth.................................... 0 61 79 79 +12489 PF12655 DUF3787 Domain of unknown function (DUF3787) Bateman A agb Jackhmmer:Q185C1 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in Clostridia. Proteins in this family are approximately 60 amino acids in length. There is a conserved TAAW sequence motif that may be functionally important. 20.70 20.70 21.80 23.00 20.00 18.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.57 0.72 -4.24 14 76 2009-11-11 17:47:45 2009-11-11 17:47:45 2 1 75 0 24 52 0 51.40 47 87.27 CHANGED pcpKcphhshPlEpHsTAAW.ANIcchKshSpVsIPoEppVcNAK-WVDsNpK .......................pppKcphhthPIEpHcTAAW.ANIpphKPpSsVsIPSE.pVpNAKEWVDsNpK.... 0 15 21 23 +12490 PF12656 G-patch_2 DExH-box splicing factor binding site Wood V, Coggill P pcc Pfam-B_900 (release 24.0) Domain Yeast Spp2, a G-patch protein and spliceosome component, interacts with the ATP-dependent DExH-box splicing factor Prp2 [1]. As this interaction involves the G-patch sequence in Spp2 and is required for the recruitment of Prp2 to the spliceosome before the first catalytic step of splicing, it is proposed that Spp2 might be an accessory factor that confers spliceosome specificity on Prp2 [2]. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.93 0.72 -4.13 74 304 2012-10-01 21:03:39 2009-11-12 14:11:33 2 12 256 0 228 1568 9 76.80 33 18.52 CHANGED tp.spppthcpclsstP-pssh-..-YcplPVEpFGtA..hLRGMGWc......tupshu+sptt.................ht.ppRss.tLGLGAcshtsp ............................................ht.....tt.p.chttt...s.-pssh-........-Y.c..s.....lP..V.......E......p..F.......GhA...hLRGMGWc......tGpshG+.s.t.tt..............sts.h...s.phRPt..tLGLGAc.h..s.................................... 0 78 129 190 +12491 PF12657 TFIIIC_delta Transcription factor IIIC subunit delta N-term Coggill P pcc Pfam-B_74169 Domain In humans there are six subunits of transcription factor IIIC, and this one is the 90 kDa subunit; whereas in fungi the complex resolves into nine different subunits and this is No. 9 in yeasts [1]. The whole subunit is involved in RNA polymerase III-mediated transcription. It is possible that this N-terminal domain interacts with TFIIIC subunit 8 [2]. 27.00 27.00 27.10 27.40 26.90 26.40 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.30 0.71 -4.55 35 186 2009-11-13 16:10:32 2009-11-13 16:10:32 2 15 151 0 131 188 0 162.20 21 22.39 CHANGED pshPoshssluWSsDGclAlusuctVpl.L...........sPp.......................pt..ttspshsssshpaphsphcsshh.s.pphP.................thhspshsh.hs.t........t-.psu..........plhuluWSP.GLup...p.......RClLAVLToshtLolapsstt...pucWsclsslschLt.....................................t..t....tts.hh.........pc.plpuhsWs ........................................................................p...ssh.pslsWSpDs.pluVsssc.ltlL.............sPt..............................................................t.t..ts.t.....t........ththh...psp.h..s....st.s.......................................th..t.h.s................t.t...................thhthuWSP.G.hsss..t.........................+ClLAsLTssspLslats.t....p.pWh..pl.sslschhh........................................t....st.............p....t......thta................................................................ 0 30 65 100 +12492 PF12658 Ten1 Telomere capping, CST complex subunit Coggill P pcc Wood V Domain Stn1 and Ten1 are DNA-binding proteins with specificity for telomeric DNA substrates and both protect chromosome termini from unregulated resection and regulate telomere length. Stn1 complexes with Ten1 and Cdc13 to function as a telomere-specific replication protein A (RPA)-like complex [1]. These three interacting proteins associate with the telomeric overhang in budding yeast, whereas a single protein known as Pot1 (protection of telomeres-1) performs this function in fission yeast, and a two-subunit complex consisting of POT1 and TPP1 associates with telomeric ssDNA in humans. S.pombe has Stn1- and Ten1-like proteins that are essential for chromosome end protection. Stn1 orthologues exist in all species that have Pot1, whereas Ten1-like proteins can be found in all fungi. Fission yeast Stn1 and Ten1 localise at telomeres in a manner that correlates with the length of the ssDNA overhang, suggesting that they specifically associate with the telomeric ssDNA. Two separate protein complexes are required for chromosome end protection in fission yeast. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution [2]. Ten1 is one of the three components of the CST complex, which, in conjunction with the Shelterin complex helps protect telomeres from attack by DNA-repair mechanisms [4]. 21.60 21.60 21.70 21.60 21.50 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.58 0.71 -4.36 27 90 2012-10-03 20:18:03 2009-11-13 17:26:38 2 2 88 4 62 79 0 132.90 26 80.45 CHANGED Mo....sP.ssphh..............Fh.pphsshstsp+lRhLu.................sVppYshssG...pLhL-+..shst.......t.t.ss.....lsVDlshlLss.lpsc.................clplGsWlNllGYlctpt.............................................................hVcAlhlhsusulplscY..cclLp .............................Mo....sPhs.p.h..............hhpp.sthtsup+lRhLu.................CVssYshtsG...pLhLc+.......shsh..........................ptt.sp.....................spVDlpllLps..lpsp............................plplGsWlNllGYlpttt.........................................................tpsshlpAlhlhssushcltcY.pchl....................... 0 9 30 49 +12493 PF12659 Stn1_C Telomere capping C-terminal wHTH Coggill P pcc Wood V Domain This domain consists of tandem winged helix-turn-helix motifs. Stn1 and Ten1 are DNA-binding proteins with specificity for telomeric DNA substrates and both protect chromosome termini from unregulated resection and regulate telomere length. Stn1 complexes with Ten1 and Cdc13 to function as a telomere-specific replication protein A (RPA)-like complex [1]. These three interacting proteins associate with the telomeric overhang in budding yeast, whereas a single protein known as Pot1 (protection of telomeres-1) performs this function in fission yeast, and a two-subunit complex consisting of POT1 and TPP1 associates with telomeric ssDNA in humans. S.pombe has Stn1- and Ten1-like proteins that are essential for chromosome end protection. Stn1 orthologues exist in all species that have Pot1, whereas Ten1-like proteins can be found in all fungi. Fission yeast Stn1 and Ten1 localise at telomeres in a manner that correlates with the length of the ssDNA overhang, suggesting that they specifically associate with the telomeric ssDNA. Two separate protein complexes are required for chromosome end protection in fission yeast. Protection of telomeres by multiple proteins with OB-fold domains is conserved in eukaryotic evolution [2]. 25.00 25.00 26.40 49.70 21.90 19.30 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.61 0.71 -4.31 10 23 2009-11-13 17:32:48 2009-11-13 17:32:48 2 1 23 2 15 23 0 124.20 41 26.69 CHANGED hKuhDslctcsap-lLspLsppGLIslcupop..lcLhsLKsla-Ysp+RIusLlKLQshTGslchs+Vpp..+LphPhhopphIVDlaKEsL++hptt.splLpsWWI-hcs+sth.......hlHFpYspup ............hKShDsLcpEsFcDlls+LlspGLIsLcs+os..hDLhPLKsLa-YspKRIslLhKLQChTGTlplo+Vpc..KLclPhlTspuIVDlFKEsLK+tpKphPplLKsWWIDLcscsth.......llHLEYsts.h.. 0 2 7 13 +12494 PF12660 zf-TFIIIC Putative zinc-finger of transcription factor IIIC complex Coggill P pcc Maraia R, Wood V Domain This zinc-finger domain is at the very C-terminus of a number of different TFIIIC subunit proteins. This domain might be involved in protein-DNA and/or protein-protein interactions [1]. 22.10 22.10 22.50 22.70 22.00 21.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.19 0.72 -11.02 0.72 -4.09 16 171 2009-11-13 18:18:58 2009-11-13 18:18:58 2 11 153 2 124 170 0 90.00 25 12.66 CHANGED +shpshps.u........EpCslC......cuslsasssppApCssGHhWhRCuLTFhulQsssh.+hCslCs.sthhstshttspp.....................hchLhpshssCh..aCuuchh ..........................hsp..................EpCshC..........pt...l..s.hps.hcp......uh.CssGHh.ahRCslThhslps.th..+hC.lss.phshp..t....ts....t..............................................................................hphL...p.t....C.aCss...h............................................ 0 30 57 92 +12496 PF12661 hEGF Human growth factor-like EGF Wouters M, Coggill P pcc Wouters M Domain hEGF, or human growth factor-like EGF, domains have six conserved residues disulfide-bonded into the characteristic 'ababcc' pattern. They are involved in growth and proliferation of cells, in proteins of the Notch/Delta pathway, neurogulin and selectins. hEGFs are also found in mosaic proteins with four-disulfide laminin EGFs such as aggrecan and perlecan. The core fold of the EGF domain consists of two small beta-hairpins packed against each other. Two major structural variants have been identified based on the structural context of the C-terminal Cys residue of disulfide 'c' in the C-terminal hairpin: hEGFs and cEGFs. In hEGFs the C-terminal thiol resides in the beta-turn, resulting in shorter loop-lengths between the Cys residues of disulfide 'c', typically C[8-9]XC. These shorter loop-lengths are also typical of the four-disulfide EGF domains, laminin ad integrin. Tandem hEGF domains have six linking residues between terminal cysteines of adjacent domains. hEGF domains may or may not bind calcium in the linker region. hEGF domains with the consensus motif CXD4X[F,Y]XCXC are hydroxylated exclusively in the Asp residue. 18.00 13.60 18.00 13.60 17.90 13.50 hmmbuild -o /dev/null HMM SEED 13 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.16 0.73 -5.96 0.73 -3.51 521 6130 2012-10-03 09:47:55 2009-11-16 12:53:17 2 1671 227 11 3408 13199 236 13.00 50 2.13 CHANGED pCpCssGaoGspC ......pChCssGaoGtpC.. 0 1352 1597 2396 +12497 PF12662 cEGF Complement Clr-like EGF-like Wouters M, Coggill P pcc Wouters M Domain cEGF, or complement Clr-like EGF, domains have six conserved cysteine residues disulfide-bonded into the characteristic pattern 'ababcc'. They are found in blood coagulation proteins such as fibrillin, Clr and Cls, thrombomodulin, and the LDL receptor. The core fold of the EGF domain consists of two small beta-hairpins packed against each other. Two major structural variants have been identified based on the structural context of the C-terminal cysteine residue of disulfide 'c' in the C-terminal hairpin: hEGFs and cEGFs. In cEGFs the C-terminal thiol resides on the C-terminal beta-sheet, resulting in long loop-lengths between the cysteine residues of disulfide 'c', typically C[10+]XC. These longer loop-lengths may have arisen by selective cysteine loss from a four-disulfide EGF template such as laminin or integrin. Tandem cEGF domains have five linking residues between terminal cysteines of adjacent domains. cEGF domains may or may not bind calcium in the linker region. cEGF domains with the consensus motif CXN4X[F,Y]XCXC are hydroxylated exclusively on the asparagine residue. 26.60 26.60 26.60 26.60 26.50 26.50 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.20 0.72 -4.29 733 2407 2012-10-03 09:47:55 2009-11-16 13:04:43 2 940 145 0 1458 7661 32 23.80 49 2.31 CHANGED SapC..s...C..sGYp...........h.........s.-u.ps.........CpDIDE ......SapC.....p.....C...sGap...........Ls.........s.DG.+s.........C.p.DIDE........ 0 669 767 1040 +12498 PF12663 DUF3788 Protein of unknown function (DUF3788) Bateman A agb Jackhmmer:Q189D9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 137 and 149 amino acids in length. This family may be distantly related to RelE proteins. 25.00 25.00 27.40 27.10 23.30 21.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.33 0.71 -4.48 39 174 2009-11-19 14:09:27 2009-11-19 14:09:27 2 5 124 0 34 153 9 124.80 26 87.02 CHANGED sppphPo.cplpsalGpsh...apclpphlpppYt..hph-ashsutphGWslKa+..KtsKsLCslasccshFsshlhlGp+ctpch-thh.shostlpchacpspshss.G+Wlhl-lpspshlp.DlhcLltlKt ...............pphPs.cpltthlup.sh...appLpphlpppYs...phcashsutphGWshK..Y+.........KtuKsLCslasccshFsshlhlGccctt..ph-thh.phos.spclaccspshss..G+..WLhhclpspshhp.DlhcLlpl+p......... 0 18 30 32 +12499 PF12664 DUF3789 Protein of unknown function (DUF3789) Bateman A agb Jackhmmer:Q18DB3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two completely conserved residues (V and C) that may be functionally important. 22.00 22.00 23.10 22.40 20.90 20.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.30 0.72 -7.34 0.72 -4.63 11 148 2009-11-19 14:19:49 2009-11-19 14:19:49 2 1 103 0 8 89 6 33.20 46 71.96 CHANGED pllpDhLLsohGsslGVslMCllpsGKtADccMc ......llKDhLLs.hGuhlGVslMClhpsuptADptMc...... 0 4 7 7 +12501 PF12666 PrgI PrgI family protein Bateman A agb Jackhmmer:Q187G3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 116 and 146 amino acids in length. This protein is found in an operon that is part of a Type IV secretion system. 27.00 27.00 27.30 27.20 26.90 26.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.04 0.72 -3.84 74 644 2009-11-20 16:12:33 2009-11-20 16:12:33 2 2 377 0 81 480 60 93.20 27 64.99 CHANGED pVPKDls+.hcsKlhhsL.ThRQLlshuhushlGlshahhh.+thl........u.........sshushlhlhshlP....hhhhuhachcG.hshEchltthlchc.lps+htsapo ..........VsKDls+.hcpKlhhsL.TpRQllhhssushl.u.ls.lahhh...pthh.................s.......sshuhhh...hl.hsslP....hhhhuhach..cG..hs....hEchlthhl+hc..hpscpRsat................. 0 46 65 75 +12502 PF12667 NigD NigD-like protein Bateman A agb JCSG target Q5LAY5_BACFN Family This family of proteins is functionally uncharacterised. This family of proteins is found in Bacteroides species. Proteins in this family are typically between 234 and 260 amino acids in length. These proteins possess an N-terminal lipoprotein attachment site. The family includes NigD a protein found in the Nig operon that encodes a bacteriocin called nigrescin. It has been suggested that NigD may be the immunity protein for nigrescin (NigC) because it is directly downstream [1]. 21.00 21.00 23.50 23.20 18.80 18.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.50 0.71 -4.51 55 331 2009-11-21 13:39:49 2009-11-21 13:39:49 2 1 107 19 39 282 1 190.80 20 78.62 CHANGED hsTspssss...t.........htsDc..GsplhPsssss.........hssssupRshss..a....p.htpt.......sshshslplhtlpsllopsst.......hssp...p.p.thusDPltlhsh........Wh...s..ttYLslhaphphs..........sptppHhhsLlhsp.............tt.tsssshlpLpLRHs.....s.u..Ds.psthtp..u...........hlSasL.spl.sppttt.....ptlcl+hpsht.suctph.p ...............................................................s....ss...t.....hhthDc.us..pl..hPsssph............hpsspspRsls...a....s.hppt..........sshshslpl.ptlpsllotsht..........t.hssc...p.c.thusDPlplhsh...............Wl.....s..ssYLslhaphphs..........pptp+..HhlsLltsp.................t..spsshlpLpL+Hs......stu.....Ds.pshhtp..u...............hlSasL.spl..sptttt.....pslplphpsht.suc.p..th............................. 0 11 33 39 +12503 PF12668 DUF3791 Protein of unknown function (DUF3791) Bateman A agb Jackhmmer:Q184Z5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 71 and 125 amino acids in length. 21.90 21.90 22.80 22.10 21.70 21.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.87 0.72 -4.36 45 302 2009-11-21 15:27:16 2009-11-21 15:27:16 2 4 144 0 37 238 2 61.40 24 67.45 CHANGED lpFslh.....sIEshAcchshssp-shphhp.chsllc.aIhppY-sLHTput-all-Dlhchlcpps ................tahhh.....hIcphAcchslsspcshphhp.chs..llp.alhppY-sLHTpupcall--lhphlppp........ 0 18 33 37 +12504 PF12669 P12 Virus attachment protein p12 family Bateman A agb Jackhmmer:Q18C01 Family This family of proteins are related to Virus attachment protein p12 from the African swine fever virus. The family appears to contain an N-terminal signal peptide followed by a short cysteine rich region. The cysteine rich region is extremely variable and it is possible that only the N-terminal region is homologous. 33.50 33.50 33.50 33.50 33.30 33.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.85 0.72 -3.77 134 610 2009-11-21 15:48:40 2009-11-21 15:48:40 2 2 570 0 65 310 5 50.80 31 79.85 CHANGED slllslllhshsh.hh...l.hphh.+p.....p+p..G...........s.sC...u..C.........tssCtstt.......................................................pppc ......sIlI.shl.Ihuhsh..as..l.h+hh.K+..sKp...G........pCusCthsps.C............tCssp......................................pp...................... 0 25 52 58 +12505 PF12670 DUF3792 Protein of unknown function (DUF3792) Bateman A agb Jackhmmer:Q183N7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. These proteins are integral membrane proteins. 27.00 27.00 27.90 27.70 26.30 26.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.57 0.71 -4.23 64 372 2009-11-21 16:02:30 2009-11-21 16:02:30 2 1 364 0 85 268 3 115.70 27 90.89 CHANGED sttphtsllpGllhuhhlohlhhllhulllhhss....lsEphhshhhhhlhhloshhGGhhuu++spp+GalhGhhsGllYhlllhlluhlhhss.hhs.hphhhphlhshsuGslGGhlG ........p..hspslhhGllhshllshlh.hlhu.LlLpaos....lsEsslshhlhllsllShhhuGhhuG++sppKGWlhGhhsGlhahlllhLlshlshsp.shs.sptllhhlhhhsuusLGGllG................... 0 42 70 76 +12506 PF12671 Amidase_6 Putative amidase domain Bateman A agb Jackhmmer:Q18BM0 Domain \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.50 0.71 -4.18 83 491 2012-10-10 12:56:15 2009-11-21 16:27:14 2 11 364 0 128 483 20 158.20 30 49.09 CHANGED tYspttAlp....YAc+assst.................Nss.......YtsF.........uuD...................CTNFlSQsL....puGGh...................................W...................ttss..........Ws.......supshhpYltps.thstthuttshpth.....................GDllpachs.uss.............................sHsslVoshsspshs........llssHTss....phphshs......hhtstphpathh .................................................................YsRhpAVp....YAc+.ahsst....................................Nst.......YpsF................ssD...............................CTNFlSQsL.....cuGu.h.shsst...................................................W.............ppsthoh............u....Ws.......sA....cuhh.Yl...tss..tps..t...h.ht.tt..sht.p....t...................GDlItY-hp.scs.......th............................sHoslVsuh-spuhs...................LVssHoss......phph.as.....sh.p.ph.....h.......................................... 0 61 107 112 +12507 PF12672 DUF3793 Protein of unknown function (DUF3793) Bateman A agb Jackhmmer:Q18C06 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 211 amino acids in length. There are two conserved sequence motifs: PHE and LGYP. 22.80 22.80 39.80 39.30 21.00 19.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.12 0.71 -4.69 77 325 2009-11-21 16:43:30 2009-11-21 16:43:30 2 2 302 0 75 276 6 175.40 30 88.86 CHANGED lshpC...APsLuGlKsusLhshs.................htstcplhphhp.hst.h...spslphhhLp..pspsphllhlY+pptLcphLpcppspphLpph..GY....pstslpphLppLppRh......ppst...............................FPHEIGlFLGYPlcDVtGFIppsGpshhhsGhWKVYss.pcuhphFppacps+pthhph.htpG.pslpplsss ........hhhpsAPsLsGlKsusLlshs.................hp.ptppl.t...phhp.apptht....tpslphhhLp..ps..ppphllhlY+cctLpphLpppcsppaLpph..GY......pstslpphLppLppRh..........ppp.p........................FPHEIGlFLGYPlcDVtGFIpppGpshhh.sGhWKVYss.pcApchFppYcps+phhhph.htpG.hsltplh..s.......... 0 43 67 71 +12508 PF12673 DUF3794 Domain of unknown function (DUF3794) Bateman A agb Jackhmmer:Q185X2 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF01476. 21.70 21.70 21.70 22.00 21.50 21.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.91 0.72 -3.83 173 709 2009-11-22 14:22:34 2009-11-22 14:22:34 2 10 223 0 199 603 18 88.90 18 45.01 CHANGED sclt.cIlpspsp.lp..lpch.cl....hps..................................+lhlcGhlphpllYl................upcs....pp.......lpshp.tplsF..sphl-l.tusptsh.psp..lpspl-pl..ssslhss.c ................................................................plt.pllp.spsp.lp....lpch..cl......hps.............................................................KlhlcGhlphpll..Yh........................up-s..........pt...............lpshp..tplsFsphl-l...tusppsh..tsp.....hphp.lcp.h..phph....t.............................. 0 122 177 188 +12509 PF12674 Zn_ribbon_2 Putative zinc ribbon domain Bateman A agb Jackhmmer:Q18AK6 Domain This domain appears to be a zinc binding DNA-binding domain. 21.30 21.30 22.00 22.00 21.10 20.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.36 0.72 -3.64 52 343 2009-11-22 14:37:54 2009-11-22 14:37:54 2 16 275 0 77 278 17 80.40 35 52.03 CHANGED paCQSCGMPlspss...hGoptDGopsc-YCtYCYpcGpFst.phoh-EMI-hss.ahsctst.....hs.-pA+phhpphhPpLKRW+ .........aCQSCGMPhspss...hGoptDGopsc-YCtYCYcsGtFhp.phoh.-EMI-hsst..ahschst.....hst-pA+phhpphlPpLKRW+.................. 0 35 59 70 +12510 PF12675 DUF3795 Protein of unknown function (DUF3795) Bateman A agb Jackhmmer:Q186T2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 99 and 171 amino acids in length. This protein is likely to be zinc binding given the conserved cysteines. 22.60 22.60 23.90 23.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.92 0.72 -11.75 0.72 -3.80 72 322 2009-11-22 14:49:21 2009-11-22 14:49:21 2 10 173 0 78 292 7 73.50 32 51.97 CHANGED hhuhCGlsCstCstahtsp...............................................Cs.GCtstpt.........htsttCp..ltpC..stc+.slspCucCsc..a.P.Cphhpp ...............huhCGlsCs.tC.ta.ttt.................................................Cs.GCtstph..........tsppCp..l+pC..sp..p.K..sl..saCscCsc.a.P...Cchh.t................... 0 46 75 78 +12511 PF12676 DUF3796 Protein of unknown function (DUF3796) Bateman A agb Jackhmmer:Q185I0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 23.00 23.00 23.00 23.70 22.60 22.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.93 0.71 -3.88 13 204 2009-11-22 16:47:27 2009-11-22 16:47:27 2 1 179 0 17 150 2 106.10 38 77.59 CHANGED ps+luYL.GFlGFlGF...LG.h.FhsppshFs.hhaFsFFsFFsa...uKl...lsDELFhpcV+tAsopAFhVullhssIlllhl...hlhcNlcl....Iclhl....sIshslsLsTFllsLhla-hppcc.hpD.......pp ...............s...hhhh.GFlGFhGF...LG...a..tt..hh...hFsFFuFFo.a......+pV.........ptDEhF.p.lsKu..sp.uFllsLhshhIlhhI.....hlhss.sL.......phpI....slhhusLIhsFuhshhha-+.........s.................................. 0 6 10 12 +12512 PF12677 DUF3797 Domain of unknown function (DUF3797) Bateman A agb Jackhmmer:Q18AB4 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 50 amino acids in length. There is a conserved CGN sequence motif. 21.20 21.20 21.20 23.10 20.60 20.70 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.52 0.72 -4.34 9 99 2009-11-23 09:31:14 2009-11-23 09:31:14 2 2 89 0 3 54 0 53.50 77 43.88 CHANGED M..pshcslpLhpK...YspCPpCGN-plGNGEGsL...l-DDpFKRTCKCGapIc ....MDLIIQTFPLDGKTLYYVQCPVCKNNRILNSGANVSRIISDDTFRKLCGCTCDVK. 0 1 2 2 +12513 PF12678 zf-rbx1 RING-H2 zinc finger Coggill P pcc Wood V Family There are 8 cysteine/ histidine residues which are proposed to be the conserved residues involved in zinc binding. The protein, of which this domain is the conserved region, participates in diverse functions relevant to chromosome metabolism and cell cycle control [1]. 30.60 29.80 30.60 29.80 30.50 29.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.22 0.72 -3.87 110 903 2012-10-03 15:03:13 2009-11-23 17:28:56 2 21 332 18 628 6919 344 76.70 40 28.82 CHANGED +hplp.....c...apuVuhW.p.....Wshs....................-..........s..CuICRsph..ssC.....pCphss.................--...Csl..sh...Gt.CsHsFHhHCI..........pcWL..c..pps.......hCPlCp .........................................................................................htl+.c.WsuVuhW..s....WDl..........................................s.D..........s.....C.AICRscl...h-..hC...l...cC.pusptt............................................................t--....Csl...........sa...G...C.....s............H.s......FHh.cC.l..........ppWL....+....ppp..........sCPLsp............................................... 0 233 348 516 +12514 PF12679 ABC2_membrane_2 ABC-2 family transporter protein Bateman A agb Jackhmmer:Q189I9 Domain This family is related to the ABC-2 membrane transporter family [1]. 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.86 0.70 -5.54 35 2420 2012-10-03 10:13:34 2009-11-24 09:49:33 2 14 1493 0 891 9805 1242 277.90 14 89.44 CHANGED hslhcpEapchl+s+phhllsllhhll.....................................hhhshhsphssthhsshtsstpshssh.hh.........................................................................................................hhthllslh........uhlhusshlutEhcpGTlchLLupPloRtpllluKhluthsh....lllshlluhll................................shshhhhhusshshtshhhhhhhshhhhhhh.......hululhlS.slh+..osstAhssululhhhhtllhshhshh......l..h..........................................................h.hl.Ptshhphhhpthhtst................................................................................h..ht.hlhhllhhllhlsluh....hhFp+cDl .............................................................................................................................................................................................................................................................................h..lhtpEhh.p.h..h...p...p...h...h..h......h....h..h.h....h...l.h.hhh............................................................................................................hh.h.h..h....h.......t..h.........s............h...............t....................t......h...........t....h................h....h............................................................................................................................................................................................................................................................................................................................hhh.h.l..h..s..l.h............s.h..h...l..u.......h.....s....h.....l......u......t....E....h..c.p..G.T.lc....h..L....L.s.p.P.l.oR.s..pl.lh...u....K....h..luh..h..h.h...............hh..l.s.h..l.h..u..hhh...............................................................s.h.h...h....h....h......h.......h....s.....s........s.......h.......s......h.....h......t....h......h....h.......h.......h......h....h...h....h...h...h....h..hhh..............h...h.u....l...u...h.....h..l.S...s..h....s..p......p....p....t..h....A..h..s...h....u....l..s.l.....h....h....h......h......h.....l...l..h..s..h....h...h....h.h..........h.......h.............................................................................................................................................................h...h...........h...s....h....h...p..h....h............t...h...h..t.s...............................................................................................................h...........h....h.h....h....h...h.h..h....h....l...h.h.h.lsh.......hhFt+t.............................................................................................................................................................................................................................................. 0 334 627 821 +12515 PF12680 SnoaL_2 SnoaL-like domain Bateman A, Finn RD agb Jackhmmer:Q11X43 Domain This family contains a large number of proteins that share the SnoaL fold. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -10.33 0.72 -3.61 623 6223 2012-10-03 02:27:24 2009-11-24 16:13:34 2 57 1993 66 2137 7931 2945 104.20 15 64.43 CHANGED l.pp...a.h.c....s.h.s......st...-...h....c....s.h....t........sh....h....u....sD.h.h...h....p.........ss.....sst...........hp.Gtp.....shtp........hhpthht.................hsshphp.lp.ph..........hss..G....c.........tlss..p.....hphph...........................sup.thph.................pshs..h..acl...c...c.G...+..Isch ...............................................ta.h.p....s.h..p........st...D....h.....c.....t.h....t........sh....h....s....sD..sh.....hp..............ps.....sst......................hp.Gp.p.....sltp.......hh.tphh.t........................th.s.s.h..p..h.p...h..p.ph............hss....G.....s.....................hs...hs...p......h.pht..........................................s.up.th.ph.................phhs...h..h.ph....c...s.G...+Ist......................................................................... 0 618 1329 1765 +12516 PF12681 Glyoxalase_2 Glyoxalase-like domain Bateman A agb Jackhmmer:Q187Q5 Domain This domain is related to the Glyoxalase domain Pfam:PF00903. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.39 0.72 -3.44 429 20226 2012-10-02 15:00:03 2009-11-24 17:23:30 2 76 3531 174 6614 23813 5121 112.40 17 76.73 CHANGED lhs..p..D..hpt..utpFYpph.l......Gh..p...htt...................pt...s....p..h......sh...h.ph......s................h..h.............................................t....h..tt.hh.....t......h..t...........................t...t.....ss..........ts........st........h..tl.....t..h.pl.......s.-....l..-s..h...hp...pltst.................G......sph..............h..p....s...s..t.......p...........hsh..............s........p...hhh..l..tDP...-Gshlplh .....................................................................................................................spD..hpp.u...h.pFY..p...ph...l.........G.h...phtt......................pt.s....s..h........sh..h..ph......s...........t.h..........................................................................................................................t...h...t...hh....t.......t..............................t...st....ss........ss..............st................hp.l............s.h.ts.............s..-...........l....-...s...h....hp.....c...l....tst..........................G.....sph..............................h..p....s...s..t.........p.............tsh...................................u...........p.......hh.h..h...tDP...-Gphlpl................................................................ 0 2073 4288 5609 +12517 PF12682 Flavodoxin_4 Flavodoxin Mistry J jm14 Manual Domain This is a family of flavodoxins. Flavodoxins are electron transfer proteins that carry a molecule of non-covalently bound FMN. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.05 0.71 -4.87 5 1133 2012-10-03 05:08:30 2009-11-25 13:20:28 2 15 627 3 273 1382 49 149.40 28 80.50 CHANGED KsLVAYFSso.....................GNTKhlAEhIAEtTGADLFEIcPscPYTctDLDas-ptSRSSVEMpDspuRPAluscl.shEDYDVlFlGFPlWWYsAPpIVsTFlEuYDFuGKpVIPFCTSGGSGlGsotKcLQ.pAsPcAolLEGptlsRGpl.......oRcpVs-Wlc+L ................................................................+hLlsYaSto.............................................G.sTctlAc.....hI.s......c......t.....h.......s.....u.......D......l.....h....c..I.p.....s...t...p...s...Y..s..p..............s.......a.p......s.t......h.......pp......u..p..t..............E.......h.....p..p..........s.t+..P..t....l.....p....s..p.....h.......s..h.sp....Y.......DslalGaPlW.at....p.hsh....sltoF...L....c...p....h.......-.......h.............s...G.K...pl..h....PF.s.T...p...u....G...o...u....hu..p.......s....h....pp.l...p....p.h.........t.....s.....s..s..p....h.h..p.Ghh..h.....t..s.t.....................t..t...lttWlt........................................................... 0 90 199 240 +12518 PF12683 DUF3798 Protein of unknown function (DUF3798) Bateman A agb Jackhmmer:Q183D5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 247 and 417 amino acids in length. Most of the proteins in this family have an N-terminal lipoprotein attachment site. These proteins have distant similarity to periplasmic ligand binding families such as Pfam:PF02608, which suggests that this family have a similar role. 21.00 21.00 21.80 34.00 20.70 20.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.40 0.70 -5.31 16 148 2009-11-26 10:12:10 2009-11-26 10:12:10 2 1 90 2 26 138 5 260.70 50 68.55 CHANGED ssa+IGllTuTlSQuEDpaRuA-thhccYGs................hIhpsTaPDNFssEhETTISplluLA-DPchKAIllspuhsGshsAhpKI+EcRPDIlhluusspE.......DsshhupsuDlshssD.lspGhslsptApchGAKsFlHhSFPRHhu.hpslupR+shMccsCc-LGlcFl-sssPDPsoDsGsuusQQFlhEclPchlcKY.GK-TAaFuTNsuhpEPll+plhEtGuhasptssPSPhhuYPuALGl-lstsctGDastlhcplpcKlsctGhsGRhG .....................................saHIGllTsoVSQSEDshRGAEtllc..cYGsssp...........GGhIpplThPDNFMpE.pETTISplluLADDPpMKAIVVspulPGTstAF+cI+EK..RPDILlhsupsHE.......DPshlsssADlsls..sDtluRGYlIltsA+chGAcpFlHISFPRHhS.YEhlu+RRsIMcpsCc.DLGhcalt.oAPDPsSDVGVsGAQQFILEpVPpWlpKY.GK-sAFFsTNDApTEPLLKplst.h.GGhFlEA-hPSPhMGYPGALGl-ho.c-tGsastILc+VEcsVlttGGsGRhG........... 0 17 25 26 +12519 PF12684 DUF3799 PDDEXK-like domain of unknown function (DUF3799) Bateman A agb Jackhmmer:JCSG target 392282 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 265 and 420 amino acids in length. It appears that these proteins are distantly related to the PDDEXK superfamily and so these domains are likely to be nucleases. This family has a C-terminal cysteine cluster similar to that found in Pfam:PF01930. 21.10 21.10 21.20 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.58 0.70 -5.02 19 347 2012-10-11 20:44:46 2009-11-26 15:13:40 2 3 303 2 52 350 410 226.40 26 57.25 CHANGED cYhSsSpaKpF......hp....CEAtAhApLpGcWp......ppsssALLVGNYVHS.YFES.csc-pFhc.......pcschhoph.....pKGpL+u-FphA-pMIpsLcsDchFtphYp.G......-KElIlTG-lhGl.WKuKlDslNhpcshFl.DLKTsc.sl+.pchWsp..t......ssFlctasY.lQhAlYpEl..lcQ....phG.cphpPhIhAVoKpssPD+sllplsp....phhctuLcplc.pslp+lhpVhpGc.cPs+CG+C-YCRus ..................................................................................................................................................hShS.hcph......h.p......s.A.hhhhc..h....pu...hp.......ppcsps.L.lGshhHs....h.h.Espc......hpp.cFhh............tppp.chh.ppst..........ppshh.tc....-.h...c..t.h...-hMh...ps..l..ht..pshh..p.....h..l..hp...G...................Epp..l..h..h..p..sp.....s.G...l..h....h+s+.Dp.l.....s.......-.......h..p....h....h.....h.D.lK..TTt..D..lp..................pa.....tshh.satYclQhAhYp-h..hct......ph..G..p......t.h.p.h..l..h.lul.Sc.p..h............shplhhhsp......-th..t..pp.....hc.pslpplhcshpsc.p.P.............................................................................................................. 0 20 40 44 +12520 PF12685 SpoIIIAH SpoIIIAH-like protein Bateman A agb Jackhmmer:Q18B63 Family Stage III sporulation protein AH (SpoIIIAH) is a protein that is involved in forespore engulfment. It forms a channel with SpoIIIAH that is open on the forespore end and closed (or gated) on the mother cell end. This allows sigma-E-directed gene expression in the mother-cell compartment of the sporangium to trigger the activation of sigma-G forespore-specific gene expression by a pathway of intercellular signaling. This family of proteins is found in bacteria, archaea and eukaryotes and so must have a wider function that in sporulation. Proteins in this family are typically between 174 and 223 amino acids in length. 25.80 25.80 25.90 25.90 25.50 25.70 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -10.92 0.71 -4.81 84 427 2009-11-26 16:59:55 2009-11-26 16:59:55 2 2 414 3 98 318 4 186.60 23 93.45 CHANGED +pthllssLslhls...luuY...................hsYths...tsttpspsttpptp..t.t.............................................................................................................................................ssspsspsssssspsspph.....................Fsph+lpREppRucph-pLppIlsssssop-pKpcAhpphhplsphtcpEttlEsllcuK.GapDulVhlsss.p..lsVlVpspp..LocscsspIt-lVpcp.sslssppltlp ......................................................pp.hllshLslhlslu..sY.....................Yhhs..p..t...sssssts.sp....t...tttt..p......t......................................................................................s...................................spppspppssssspsssph..........................Fsph+hphEppRucphppLppllsss.ssotppKscAtcphttlsphpppEttl.EsllKup.GapDulVphss......s..p.lpVsVpusc...hoppcsspIhplVpppss..t..pl.l................................... 0 48 79 87 +12521 PF12686 DUF3800 Protein of unknown function (DUF3800) Bateman A agb Jackhmmer:Q183E5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 215 and 302 amino acids in length. There is a DE motif at the N-terminus and a QXXD motif at the C-terminus that may be functionally important. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.38 0.70 -4.72 127 700 2009-11-26 17:12:18 2009-11-26 17:12:18 2 2 610 0 183 611 68 214.40 13 82.60 CHANGED hla.....lDESGshsh...............spshasluulhhppppht..phtppht.phppchhhtt................................................................l+tscltpp...ptt.....................................pplhshltpht...............hhhhs..hlhppthhppt...........htttt...............aphhhphllcclt........................................ppsp.....phtlhhD.....pppppptppltphh..................................htthtppthhpph..lpthhhhcScp...t.....thlQlADhlss....sltpthph...............................................................pptppphhphl...cptht ......................................................................................hahDESGphs..................tp.has..luu.l.hh..t.pp..hh....ph.p.ht...phppphhh.t.................................................................................................l+hsph.pp.....tht......h.................................................................................................................pthhphhtp..........................hhhhhhhh.ppp.httt..................................tt..................aphhhp.hhlppl...................................................................tptp........phh.l.h..hD........pppptpt.p.p.l..hthh.......................................h.ttthhtth...h..p....hhhhpScp........p.......hlQlADhlss......sltphhp................................................................th................................................................................................................................................................. 0 71 127 162 +12522 PF12687 DUF3801 Protein of unknown function (DUF3801) Bateman A agb Jackhmmer:Q187F8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 158 and 187 amino acids in length. This family includes the PcfB protein. 22.80 22.80 23.00 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.32 0.71 -4.75 54 614 2009-11-27 16:03:44 2009-11-27 16:03:44 2 1 330 0 47 447 58 155.40 25 85.15 CHANGED u+hLttultthLpchcKpp..................................GKpol+pLhcp.stslpslclscp..slKpFc+hAKKYGlcaulhKDpsssss..pa.VaF+ucDs-slstAFccastcplppp..................................................................................................................................................................................................................................................c+PSl+ppLpph+.tptttp.....................php+....pps+pKc+ ..............................................p.l..hhtthht..ph....pcttt...............................tGc....plKcLh+.c..up..LpsIplscs...slKchc+phcKaGVsFulhKD+psp......pY.lFFpu+Dhcsh....ptAFcc.hhsc.phc+pcc..............tphpc.hp..................................t................................................................................................................................................................................................................................................................................................ppp............................................................................................................ 0 30 38 42 +12523 PF12688 TPR_5 Tetratrico peptide repeat Coggill P pcc pdb_3kni Family BH0479 of Bacillus halodurans is a hypothetical protein which contains a tetratrico peptide repeat (TPR) structural motif. The TPR motif is often involved in mediating protein-protein interactions. This protein is likely to function as a dimer. The first 48 amino acids are not present in the clone construct. This Pfam entry includes tetratricopeptide-like repeats not detected by the Pfam:PF00515, Pfam:PF07719, Pfam:PF07720 and Pfam:PF07221 models. 21.40 13.60 21.40 13.60 21.30 13.50 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.62 0.71 -4.07 8 427 2012-10-11 20:01:02 2009-11-30 13:29:35 2 50 408 0 96 763 182 114.00 31 46.94 CHANGED upAhFERAGAhDSsG+pscAlPLYRcALAsGLsGspRRRAsIQLASSLRNLGcs-EuLALLpsphsthPuDELssAlsuFhALsLsutGRscEAluhlLtAlAsHLP.....RYQRShpsYAptL .................................................h.a.hA.haDshGh.E.pcAlshY..cp.....A...l.......s.......t......s....L...s.....u...c.....p...h...p...t.....A.h..l.....s......L..u...SohR.sl.......Gp.h-pAlslL...p.s.....s...l...t....c....a.....P.....s.......................s.s....s......h..c..s.F..h...u...h..sLa..s..l.uc..pc..E...Alphh..Lthl.....s.....pps...................ap+ult.Ytt.L.......................................................... 2 34 69 88 +12524 PF12689 Acid_PPase Acid Phosphatase Mistry J, Wood V jm14 Manual Family This family contains phosphatase enzymes and other proteins of the HAD superfamily. It includes MDP-1 which is a eukaryotic magnesium-dependent acid phosphatase [1-2]. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.08 0.71 -4.62 43 326 2012-10-03 04:19:28 2009-11-30 15:33:38 2 10 244 6 217 336 18 152.80 32 75.86 CHANGED .lP+LlVFDLDYTLWPhasDs+..................lssPh+tpsss.........plh........DctGpplphYscspsILt.......tLcs.........+ulplAhASRTssP...clAcphLphLcls................th.hhchF....st.lEIas....G..oKspHFp+lpc......coGl..sapcMlFFDDEpRNhpshop.LGVshhhl..ss...GlohphhccGLppapcp ......................................hP+lhVFDLDaTLWshas-sc.........................................htsPh+..tss....................thh..............................Dp..tup..ph.phascl.tlLp..tLcp.........pul..luhASRT..sts...-hApphLch.hcl.......................................phF.....sh....hpI.as......u......sKhpH....Fpplpp........................poul...a..pp.M....lFFDDEtR....N.........hps...p..............lGVs.........h.........hhl..s...Ghshp.hppGl.tatt.t............................................................................. 0 68 123 176 +12525 PF12690 BsuPI Intracellular proteinase inhibitor Mistry J jm14 jcsg_3isy Domain This is a bacterial domain which has been named BsuPI in Bacillus subtilis.\ This domain is found in Swiss:P39804, where it has been suggested to regulate the major intracellular proteinase (ISP-1) activity in vivo [1]. The structure of proteins in this family adopt a beta barrel topology. 27.00 27.00 28.20 27.10 23.40 20.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.68 0.72 -4.27 39 149 2012-10-03 16:25:20 2009-12-03 15:47:41 2 11 122 1 58 124 3 82.90 27 39.88 CHANGED slphplslpNpsscslslpFsSGQcaDlhlh...........DppGcclacaScs+hFTQAlpphslpsG-shsap.tphs.ts........sGsY.plps ..................s.lphpholpNpsspslslpasSGQ+aDhhlh...........spcscclapaS..cs+hFsQshp...spslpsG-ohsap.tphstts.........stY.h................. 0 23 39 49 +12526 PF12691 Minor_capsid_3 Minor capsid protein from bacteriophage Coggill P pcc manual Family This family is from one of three adjacent genes, all of which are involved in formation of the minor phage capsid. 20.40 20.40 22.40 23.90 19.20 18.50 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.63 0.71 -4.47 14 101 2009-12-03 17:53:01 2009-12-03 17:53:01 2 1 99 0 8 73 1 131.10 31 98.94 CHANGED M.....DhhphLtshlps..hthPhKschshl.spp-........ululh.lPuup..sppYh-GscplshpaplshKocsp.cAppsl.tIsstLspl.thc......l.ShssSapacshslhspPthsctsspGhalYhhshssclpl...ttp ......................M....DFhssLhptIcs....thsh.....ch+hshl.spp-........slslh.hPuuc..ppcYhDGsp-hShsFplshK.oK.....sptpupsslatIsshLsph....s......L.StssSapapsh-l.spPhlssts-pGhalYslshps+l.l.c.t..p.............................. 0 3 6 8 +12527 PF12692 Methyltransf_17 S-adenosyl-L-methionine methyltransferase Mistry J jm14 jcsg_3iht Domain This domain is found in bacterial proteins. The structure of the proteins in this family suggest that they function as a methyltransferase. 18.10 18.10 18.10 18.20 18.00 17.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.94 0.71 -4.55 18 51 2012-10-10 17:06:42 2009-12-07 14:19:28 2 1 41 2 19 80 17 158.40 46 96.81 CHANGED MSRLDSFIRRhoAQRDsLNaAsspsusls.GsVLELGLGNGRTYcHLREhhPsRRIhVFDRslsuHPsSsPP--shllG-lccTLst.ltthGssAuLsHADlGsGsc-KDsspAshlSPlIAslLAsGGlhVSupPLh.a.uLpplslP-ulssGRYFhYRR .................MSRLDpFIcRhoAQRDhLsasts.......pl.ssh.s...Gs.V.hElGLGNGRTYcHLREhhPsRcIhVFDR.s.ltuHP........uS..sPsp-c..h.lhG-.lc-TLss.h.thGssAuLsHADlGsG.c-+DsspushLuPLlsslLAsGGlhVSupPL..assLptlPlP-ulstGRYFhYR+................................................................... 1 2 11 12 +12528 PF12693 GspL_C GspL periplasmic domain Bateman A agb Bateman A Domain This domain is the periplasmic domain of the GspL/EpsL family proteins. These proteins are involved in type II secretion systems. 24.10 24.10 24.10 24.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.83 0.71 -4.47 23 933 2012-10-02 17:03:51 2009-12-07 17:13:58 2 6 819 2 175 737 69 154.50 24 39.73 CHANGED pppht.W..RhlshhhulhLllhlsttshphaphtppupthptpuppla....pphFPsppplhs....ptQhpppLpph...tutssssshlshLutLtshls..sssslclpuLcacupctcl+l..plpusshsth-......phpppsupt.aplp......uphpppssplpuphslc..s..p ..................hthW....Rhshhshhlhlls....h....sstpslphap.lp...ppssthcsp...spcha....pphF..Pppp+..ls..s...h+pQhpppLpph.......pstsss......s.ph...lshL...stlsssLt......ph.s.s.lp..l...puhsacppctpLpL......phpu...tshssh-......phpptLspp..a...lp........sth.ppps.-.sVpGthsl+..p...................................................... 0 38 86 133 +12529 PF12694 MoCo_carrier Putative molybdenum carrier Mistry J jm14 jcsg_3imk Family The structure of proteins in this family contain central beta strands with flanking alpha helices. The structure is similar to that of a molybdenum cofactor carrier protein. 23.10 23.10 23.30 25.30 22.40 23.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.80 0.71 -4.75 28 67 2012-10-01 21:16:48 2009-12-08 13:32:46 2 3 58 1 34 73 15 137.60 41 76.45 CHANGED IlSGGQTGVDRAALDsAlstGlspGGWCP+GRpAEDGslPs+Y..LpETssssYttRTchNVpDSDGTLIls.pGcLsGGTthThchAcctsKP.....hLll...phspsps..sptltpWltppslplLNVAGPRESpsPGIaptspshLppllp ............IlSGGQTGsDRAALDhAlt..hsl..shGGWCPpGRhAEDG.lss...cY.......LpEhss.ssYt........tRTchNVhDSDuTLIls......tu.t..LsGGo...thThphApchtKP.....hLhl..........phst...tps...sttltpWltppplplLNVAGPRpSpsPuIhphshphLpthh.t............ 0 17 28 32 +12530 PF12695 Abhydrolase_5 Alpha/beta hydrolase family Bateman A agb Jackhmmer:Q186B9_CLOD6 Domain This family contains a diverse range of alpha/beta hydrolase enzymes. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -11.17 0.71 -4.44 308 16512 2012-10-03 11:45:05 2009-12-10 17:24:36 2 168 4213 56 5692 81995 21732 192.90 14 59.02 CHANGED sllhh.....HG....s...........ss.....s...t..p.....s....h.......t....h.u...p.......tL.spp.G.hsllhh..................................ch..s...t.t...ss......s...........tsts......................phpphhpt....................................................................hh..........shp...pl...hlhGcSh.Gut.suh...hhAs...................................................................................p...p...t.....pl.p..ull....hh.u..s....................a...........................ss........hp........plt......p.hph..........................................................PlLllt...Gsp...Dphs.......s...p.ph..pp.........................................................hhpt..hs....sst.....p.h..........................................hhl.p.Gu.sHs .......................................................................................................................................................................................................llhh...H..G.......h...................ss...........s.......t....t...........t........h............................t.......h....u....p........................tl.....s....p........t.....G.......h.......s..........l...l.h.h.........................................................-h....t...s...h...ut.......s...................tst.........................................................sht..t.hlp..h.....................................................................................................................................tths.......s.p..p.l....s..l..h....G.......t..S.h..G.u.t....h..u.h.....t..h..us......................................................................................................................................................................................................................................................................p.p....t......pl...p.....u.l...l........h..h.u..s.......................a..................................................................sp.......t...........t.ht........t....th......................................................................................................................................................................................................Ph.L..l..l...p........G..p.p.......D..p.hs............sh.....p...t...s....t.p................................................................................................h.h.p..t....ht...........t.h.......p..h.....................................................................h..h..h......s.stH........................................................................................................................................................................................................................................................ 0 1991 3619 4800 +12531 PF12696 TraG-D_C TraM recognition site of TraD and TraG Coggill P pcc Pfam-B_1146 (release 5.4) Family This family includes both TraG and TraD as well as VirD4 proteins. TraG is essential for DNA transfer in bacterial conjugation. These proteins are thought to mediate interactions between the DNA-processing (Dtr) and the mating pair formation (Mpf) systems [1]. This domain interacts with the relaxosome component TraM via the latter's tetramerisation domain. TraD is a hexameric ring ATPase that forms the cytoplasmic face of the conjugative pore [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.59 0.71 -4.27 232 1419 2012-10-05 12:31:09 2009-12-11 11:56:10 2 13 918 0 310 4110 322 119.20 22 19.22 CHANGED tlhhlLDEhuslspl.sphpphluhhtutGlphhhlhQshuQlcp...hYG..c...ptt..cslh.uNsss....pl.hhs.....ss..-.....pTschlSc.hlG....ppol..tp.tpptpt............t.....p..........p+sLlsssEltplsp .........................h.hhlDEhssl....h..h.....h......p.....t.....h....tp.....hls.........put.......uttltls.lhhQsh.uQlct......paG....c.....tps....pplh.sNs.sshl..hht....sp...ss..pTAchl..oc..th.G...ch..phtp..pohststps......................................psphphttp.llsssclhsh....................................................................................................................................................................... 0 90 215 280 +12532 PF12697 Abhydrolase_6 Alpha/beta hydrolase family Bateman A agb Jackhmmer:Q186D8_CLOD6 Domain This family contains alpha/beta hydrolase enzymes of diverse specificity. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.69 0.70 -4.48 785 53202 2012-10-03 11:45:05 2009-12-14 16:32:03 2 430 5271 368 19769 83722 24117 238.80 14 74.29 CHANGED l.l.l.l........H..G....h...........s....s..ss................t.t....ap..t............l.h...p...tL..s............p.....s...........a............p.........l...............hs...h.....-...h.........G.....t...G.....t......o.......s.....................................................t.....t........................................h.s....h....................t...p.h............sp................................s.l........................................t.t........h....l.....p.....p......h.sh............................p....p.....s....................................................h...l..l..GaS.h.G.u.......h.h...s.ht....h.s.......................tp.....t...s..p.............tl..ps..........l..l....hhs....ss.s...........................................................................................................................t..........t.hhp..............thh...............tth.t.........h.....tth............t......t.t.......hht..h....h...................ttpth............p..phh....................................pt........................tht.th..hp.hhtt........................................................................tttth...t....pt.h...sp........h........s.........h.P....s......hh..l.....h.....upp.Dp..............................h.h.....s..............p.th........................pph...t....p..t...h.....s..s.........s.........p...h...h......h..l.s.....t..u.u.Hh....h...h.h...p...p.....s...p.t...l...sph ...........................................................................................................................................................................................................llhl.H...G.....h................s........s.st...............t......ap..t............................h.h.....t...tL..s.......................p.....s...................................a................................p...............l...........................l.s.....h...................D........h...................h....G.............h.....G...........t............S.......s................................................................................................................................................................................................t.........................................................h..s....h....................................................p...p...h...........................s.p...................................................................s...l................................................................................................................................................t..t.......l.....l.......c....p......hsh...........................................p..p...h........................................................................................................h...l....l...G.a...S..h.....G...G............h.l.....u...h.t.....h.u.........................................................................tp.......h..P.p..................................pl..p.s.......................l......l.......lhs......ss.s....................................................................................................................................................................................................................................................................................t.h.h.t.........thh.........................t..h......t..............h..............tth.......................t....t.......hh...t......h.h...........................t.t...t..h.............t.t.h.h..............................................................................pt..........................................t.t.h.h.t.hh................................................................................................................................ht.tsh....h.........t..t..h.....tp............l............p...............h.P......s...............ll...l.......h..................G..p..p...D...........................................................................................................h.hs..................p.hh...............................................pt.h.......t........p...h......h......s.....p...........................s.........................p....h........h.........h...h............................t...s..u.Hh..........h...p.......t......t...h...h........................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 5860 12066 16584 +12533 PF12698 ABC2_membrane_3 ABC-2 family transporter protein Bateman A agb Jackhmmer:Q17ZU3_CLOD6 Family This family is related to the ABC-2 membrane transporter family Pfam:PF01061 [1]. 31.60 31.60 31.60 31.60 31.50 31.50 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.58 0.70 -5.41 220 16168 2012-10-03 10:13:34 2009-12-15 10:08:38 2 60 3863 3 4148 18062 2565 336.20 12 63.90 CHANGED +t.h....l..h.tll..h..P...ll.h.h..h...l.hs.......hh..h....h....htht....p..............pshpl.................sl.lsps......................hspp....hh......................................................ptlp........s...p..s..p......hp....................thss....hp.psp..pt.l....ps.t.......p.hp..u.hh.h.......l.spshttth................................................................................................t...phphhh....ssps.h.....pts............................................ttltpt..lp..............................................................................p...l........hpt...................................h..pthshsh..p..h.hsh.................t.ptt.....p..sh....tt.......h.............hhs......hl...h..hhh....hhhs...sh....h..h.sh.......t....l....s....pE..+p.s.ph.h.c...hhh...so..l....shhp.......hhhu+.......h....lshhlh...th...h....h......h..l.h.h......h.......l.......h...hu..................................................lshs.....sh...........h.h...h........l.....lhh.llh.hh...shss..........luhhl....utl....h.p...st.......tp..s.......hh...hshhhh...........h.h.hsh....hh.sh...h..h.......h.s..t.........s...........sh......h..t.h....l.....hp.hl..P.hhs.hhhh..h..hp.l.h................hss.....hp............................................lh.....hslhhl.h...lh...sll....hhh...lshh ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................h.........h..hs..hh.h......h...h...................h.h.............................................................tth..h............................sl..hs.p........................................stt...hh......................................................................................................................................................................pt.ht.............t.................ht.........................h.s.....t.pht.......tt..h......................pp.t............p..h......h...hl...............................h..s.sht.tth.............................................................................................................................................................................................................................................................................h.t.hhh.....stt.........h.....................................................h.th......lp................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.....h........ht................................................................t.........hph...p....h.h..............................................hs.t.......th.....t.............h............................hhs........hh.....h..hhh....hh.hs.....sh......h....h.sh..............l..............s.....pE....+....p..p....t...h...h..c...............hh.h................s.s..s...l.....s......hp............h.h...hu+..........h.......ls....hh..h..l.......sh.......lt.......h...........h...l...h.h.....h.......l..........h..hs.......................................................................lsht.......s.................h...h......h...........h.......hhh....hlh..hh....s.hhs...................huh...h...l.......us.h........h.p...s.................tt..s.............h..hh.......hhhhhh.................................h.hh.h........hu..sh.......h.s..............h.p..t...............h.s...................................th..........h...p..h................l............................t.........hh....P..hth..h.h.ph.....h...tph..h............................htt........p.........................................................................................................................hh.......thh.hl..h...hh..sh.h.hhh.h...h.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1433 2455 3318 +12534 PF12699 phiKZ_IP phiKZ-like phage internal head proteins Hardies SC, Coggill P pcc Hardies SC Family Phage internal head proteins (IP) are proteins that are encoded by a bacteriophage and assembled into the mature virion inside the capsid head. The most analogous characterised IP proteins are those of bacteriophage T4, which are known to be proteolytically processed during phage maturation, and then subsequently injected into the host cell during infection. The phiKZ_IP family consists of internal head proteins encoded by phiKZ-like phages. Each phage encodes three to six members of this family [1]. Members of the family reside in the head [2] and are cleaved during phage maturation to separate an N-terminal propeptide from a C-terminal domain. The C-terminal domain remains in the mature capsid. The N-terminal propeptide domain is either mostly or completely removed from the mature capsid. In one case, an unrelated polypeptide is embedded in the propeptide and also remains in the mature capsid. The phiKZ-like IP proteins are not discernibly homologous to the T4 IP proteins, and it is not known if the phiKZ-like IP proteins are injected into the host cell, or have some other function within the head. The alignment and HMM model exclude most of the propeptide region, but include the cleavage sites. The first 100 residues, including the cleavage sites, constitute the most conservative part of the seed alignment. 22.30 22.30 22.30 22.60 22.20 22.20 hmmbuild -o /dev/null HMM SEED 339 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.87 0.70 -5.52 14 27 2010-01-13 14:45:13 2009-12-15 10:40:50 2 2 9 0 0 28 0 299.70 13 68.77 CHANGED tus.tho........ppstpshhssLEslsppl.thcthshu..hEsasss.pst.tthh.....slE..............slpctlppsscphhchlp+lhphlpphhsphpsulpplpc+hspLhc+hpsh..........tp.s.tpplslp.s..pclhhsGpFl..............shhhhsh.phsshhsptasp.hhshlpphschlsshphscp.hsphppthphhsc....................................htstthhhpustlsGNptlhhssspht.s..................hpa..lpsputsss-........hsl-s.sssp.lppplctlpphlptlpchppttpchp........-plcphh-shpp.sth..scps......hcshp-sstclhcsstshlpshssplsshl ..........................................t....ths.pththh.sshcphptph...pthshu.......hEshs.ss.p....th......sh-..............slppthpphhcchhchlpplhshlpphhpphtsuhpplpc+hspLhpchpph..............................tp.s.tpphplt.s..thlhhsupah.................hhs..phsp.htpshsp.hhphhpphtthhpsh.hppp.h.sthtphhphhtt.........................................hh.ts..lsGNthlhhs..tsthh..........................ttpt..st.p........hplch.sstp.ltphlptlpt......hh....phhtthpthpppht........ctlpp.hhpthtt.ttt..spt.......hphhtps.tchhps..thht.ht..ht............................................................................................................................ 0 0 0 0 +12535 PF12700 HlyD_2 HlyD family secretion protein Bateman A agb Jackhmmer:Q182V7_CLOD6 Family This family is related to Pfam:PF00529. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.97 0.70 -5.28 70 15970 2012-10-02 20:27:15 2009-12-15 12:46:01 2 62 3553 31 4798 25329 5154 298.90 18 77.75 CHANGED plppsslt..pshshsGhl.......l..+.pEphlpuptsGhl...phhlp-Gp+VppGshlhslsssphtpphhpt.tt.t.....hpphp....................................t.h..................h..thppp.apthht.ppphpthh.phpp.ptt...........................tthptphp...................................................lpushsGlls...hDuhEth......................t.hp.....................ppp..hpss....stlpsusslh+llss.pp........hplsh.l..s.....ccphpplp......p....sps..l...................plphts.spshpup..lp.ltpt......sp....t.hshlphsst.htchhspRhlclplhhpp.psGLpIPpoAlspcs..........................................................................................................stpGVYsl.ppu..hspF+plc.....lltp..sccahlV.................tpG.Ls.hDplll ....................................................................................................................................................................h............h.h.G..p.l...................t.....s...p.p...s...t...l...s.....s...p.....s....s........G........p....l.......tpl.........h........l.......p..............G..pp.......V.......c.p.G.p.h.L.s.....p...l...c...s.......s...p.......h....p....t....t.....h....p......p......s...p...u...t....h...t...t.......p.t...t...h..tp.h.p........................................................................................................................................................................................................................................................................................................................................hp..h.....t.....c.........p.....p...l.....h.......p.......p........t..........h.......h.......s.......p.......p.....p.......h.....c.......p.......s..p...s..p..h..p.t.spsph.........................................................................t..p...th.tp..............................................................................................................................................................................................................................................................................................lp.A.P.h.s...G.....h...ls....................................................................................................................................tht.....s.p....G............ph....l.......s......s........u.......p..........s.........l......h....p...l.....s...s...h....ss..................................h.hl.p...h...p...l......s...................Ep..p....l....s..p....l..p...........................h..................Gp.p...h........l.....................................................................p..h....t...s......h......s..........s.....p......p....h......p...up........lp.t..lsstss................................tst..........s.h..t..s...p...h.....p.........l............s.........s............s.......s........t..........t.............l.........h.......s..........G.....h.......h......s.......p.....s.........p.......l..................h.............t...........p......p.......s............s..........l.....h............l....P.t...p......A..l.....h.....t....p..s...................................................................................................................................................................s.t...t...h.....V...h...l.......h....p....ps..................ph....p.h......p.............V.p...................l..u..t............ss.t.....s...l........................hpG..Lp.G-pll.................................................................................................................................................................................. 0 1643 3132 4033 +12536 PF12701 LSM14 Scd6-like Sm domain Anantharman V pcc Anantharaman V Domain The Scd6-like Sm domain is found in Scd6p from S. cerevisiae, Rap55 from the newt Pleurodeles walt, and its orthologs from fungi, animals, plants and apicomplexans [1]. The domain is also found in Dcp3p and the human EDC3/FLJ21128 protein where it is fused to the the Rossmanoid YjeF-N domain [1,2]. In addition both EDC3 and Scd6p are found fused to the FDF domain [1,2]. 25.80 25.80 25.80 26.80 25.00 25.40 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.07 0.72 -4.18 156 564 2012-10-01 22:42:21 2009-12-15 16:50:39 2 12 284 5 358 507 3 94.00 40 21.39 CHANGED ..ssssaIGSpISLISKu-.IRYcGh.LasIss...ppST.......luLps.....................V+SaGTEsRtps.......lsspsplY-aIlFRGSDIKDLpVt-ssp........shssDPAIlpsph ..................hssalGSpISLIScsp.IR...........Yp.GhLhsIss...cpuT.......luLpp.............................................................VRSFG.TEsR.sst..........lsPpcpla-.YIlFR..GSDIKDLpVt-sst...........shspDPAIlt...h............................................ 0 104 170 266 +12537 PF12702 Lipocalin_3 DUF3803; Lipocalin-like Coggill P pcc JCSG_target_392987_3hty Pfam-B_17140 (release 24.0) Domain This is a family of proteins of 115 residues on average. The family has two highly conserved tryptophan residues. The fold is very similar to the lipocalin-like fold from several comparable structures. 20.20 19.40 20.20 19.40 20.10 19.10 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.13 0.72 -3.96 17 79 2012-10-03 08:47:39 2009-12-15 17:12:47 2 3 61 16 8 74 2 90.60 30 55.05 CHANGED sptsllGsWlpPlss.s.sthQGhpLctsGpAoSINMsTLhYcp....................Wc..ppGspLlLoGpShGNst..s..hp-ThpIcpLTscoLlL......cptshplpYs .....................pllGsWspPhshps.sthpGhplcpsGsAS.SI.NMsoLpYcp.........................Wc..hpGs..pL.lLpGpShGstt..s..hssohpIcplopcoLhL......pstt........................ 1 6 7 8 +12538 PF12703 plasmid_Toxin Toxin of toxin-antitoxin type 1 system Gardner P, Coggill P pcc Gardner P [1]) Family This family is the toxin of a type 1 toxin-antitoxin system which is found in a relatively widespread range of bacterial species. The species distribution suggests frequent horizontal gene transfer. In a type 1 system, as characterised for the plasmid-encoded E coli hok/sok system, the toxin-encoding stable mRNA encodes a protein which rapidly leads to cell death unless the translation is suppressed by a short-lived small RNA. The plasmid-encoded module prevents the growth of plasmid-free offspring, thus ensuring the persistence of the plasmid in the population. Plasmid-free cells arising after cell-division will be killed because the stable mRNA toxin is present while the comparably unstable anti-toxin is rapidly degraded. Where the system is transcribed chromosomally, the mechanism is poorly understood [1]. 19.40 19.00 19.40 19.00 18.80 17.60 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.39 0.72 -4.16 6 28 2009-12-16 11:43:50 2009-12-16 11:43:50 2 1 28 0 8 18 0 70.20 59 92.47 CHANGED Mss.p.hpstpslHpsAh.LuuLcalDQcsARpLu.hAEAVANhFMVVFYQAETGRATptDFpEAMsAlRQuhp .......................MsTtHsIE..spQAlHQAAhpLAALEaIDQcsARplhPlAEAVANhFhllYYQAETGRAT.pDFpEAhsslRQsh.p.. 1 3 4 7 +12539 PF12704 MacB_PCD MacB-like periplasmic core domain Bateman A agb Jackhmmer:3ftj Family This family represents the periplasmic core domain found in a variety of ABC transporters. The structure of this family has been solved for the MacB protein [1]. Some structural similarity was found to the periplasmic domain of the AcrB multidrug efflux transporter. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.22 0.70 -11.73 0.70 -4.77 1392 21380 2009-12-16 11:56:13 2009-12-16 11:56:13 2 26 4469 3 5700 17502 5573 229.30 13 48.61 CHANGED Ah+slhpp......+hRoh...Lohl......G..lsl..G..luu..llsh.hu.ls.pu...hppp.htpp..h.....tp....hus..sh.......lh...lpssts..................................................t.tt.t..sl.s..h.ps.hpt....lp............ph.s.t.lpsl..ss....th..s....................hplph..............sspstss...............ph.G..................ss.s.s.hhph............h......s..h..pl.hpG...chhstt.-...............tpss...llu.p.sl.Acp.las.pt.t..........s.....lGcp..lpl...............................s.....s......s........................hpVl...Glh.....p.............sp....st....................t..s................ttlhlPhsshp..........................phhtt...................................tthsplhlp.........hpsss..s.hsp..................s.ppplpp....h .....................................................................................................................................ttlh..t.p.......ph+.....s.h......l...s...h...l..........u.l..sl...u......l.sh...l.l..s..h...h.u..l.h..pu...h..p..p..t...hppp....h..........tt....hss....sh.........lh......lp.ttt.................................................................................................t.th..s.....p..p..hpp..lp..................ph.s..s..l...p......s.......s...ss....hh...s.................................s.lph..........................sspp.tsh............................................................p.hhG..........................ls..s..s..h.hph....................hs.h....pl.....hp..G.....c..h......h.s...t.......t...........................................pptl.................l...l..s..p......p.......l....A.............cp...hth..p.................................................lGc.p...l.pl...............................................................................s..t.....s.......................................hpl.....s......Glh....p......................st......t........................t.s......................................thlhh.s..h.sshp.........................................................phht..............................................tthsth.hlt........hp.ss...p...tt.h.......h.................................................................................................................................................... 0 2422 4192 5077 +12540 PF12705 PDDEXK_1 PD-(D/E)XK nuclease superfamily Bateman A agb Jackhmmer:Q18AP1_CLOD6 Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.99 0.70 -5.01 258 8235 2012-10-11 20:44:46 2009-12-16 13:18:01 2 43 4486 8 1945 8551 4890 244.70 14 25.38 CHANGED phSsSp.lcpatp.CPhpaah..ph.hh............plpttp...........th.pshs...............................hGslhHp......slcp..............hhp......................................................p............hptp.................................th...t.............................................ptt..thtpt.h..pt...................h.h..pp...htp..hht.........................................................................................tttt.......hp...hth.....Ehphp........hph.s..........................................sl...pl...pGplDRl........-tt....ssp.......................lpllDYKTupp..................sh....shp..........................................ch.....h.....h...ph..QLhhY.hhshtptt..........................s.pstsss....hhah.p.hppstht.................................................................................................................hp.ths....ppp..hpt..hp...p...pl....pphh.p.pl.t....p......httt...............................................h..t...psppsC.....p....a....CsapslCt ................................................................................................................................................................................................................................................t...............................................................................tshp....................................hGs.hhH.p..........hh...c.p..............h..t.....................................................p.......h..t..................t.h..............................................................................t.t..h..t..t.t..h..pt.......................h.....h....pp..........ht.p..hh..t....................................................................................................................................................................t.t...........hp......h..th........E.ht.ht.......htht.......................................................sh..hl.....pG...h..l...Dtl...............................pt.....ssp.............................................hhllD..YK..osph...................st...s.t...................................................ph.....t.......h.....ph.Qhth.Y..t....hslpphh.................................shp.h.t....s..t.....hh.a.h.h...h.t..t.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 700 1353 1685 +12541 PF12706 Lactamase_B_2 Beta-lactamase superfamily domain Bateman A agb Jackhmmer:Q189N7_CLOD6 Domain This family is part of the beta-lactamase superfamily and is related to Pfam:PF00753. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.84 0.71 -4.76 268 12584 2012-10-02 15:46:01 2009-12-16 14:03:32 2 97 4382 48 3955 21211 7413 209.10 16 60.92 CHANGED pt..lLhDss.s........................................tht.thhtt.t....h........tlctl.......lloH.........sHhDHh....hsh..hhpshhth.t..........................hlhss..................sstptlp.................................................................thsh..................p..........ht.hsht.........................................................................lps....h.sspHtsst.h...............................................ahlp........................................stplhau.uDssh........................................h........tth....................................pp...............hDlhlhssshh......................t....hHhsh........ppulphhp.phssc...p....hlhhHh ..............................................................................................................................................................................................................................................................................h..hLhDss..s..........................................................................h......................plc.t.l..........hl.oH.........hHh..DHh..........hs.h........h..h..t..hh..thhsp.................................h.laus........................................ts.s..t...p.h..lp.............................................................................................................................................t.th....................................................h....h...h..p......t.p.........................hh.p.h...t..s.hp................................................................................................lps..........h....ss..p....H.s.s...s.s..hs.....................................................................................ahlc....................................................................s.sstp..l..h.as..GDoth.........tp............................................................h....pth..............ps..................sDl.hl...h....-s.s..hh.....t......................................htH.hss.........................pp.s...h..p.h...t...p...p....h...t.sp.......p.......llhhHh....................................................................................................................................................................................................................................................................................................................... 0 1384 2524 3317 +12542 PF12707 DUF3804 Protein of unknown function (DUF3804) Coggill P pcc jcsg_3hzp Family This family is approximately 130 residues. Dali search indicates this protein carries a NTF2-fold with a hydrophobic cavity as a structural homologue to 1JB2, 2R4I, 3FSD and 2UX0. In this hydrophobic cavity, Arg 118 provides the H-bonding force to hold a PEG molecule from crystallisation. The interface interaction suggests that the biomolecule of Swiss:Q46KI2 is a dimer. Two members of the family are annotated as putative EF-Tu domain 2 but there is no match to this family so this is likely to be a false assignment. There are two highly conserved tryptophan residues towards the C-terminal end of the family. 22.50 22.50 23.00 44.60 22.40 22.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.65 0.71 -4.56 5 18 2012-10-03 02:27:24 2009-12-16 16:10:09 2 1 11 1 8 19 129 107.30 53 96.55 CHANGED SDocpIEuLIpGFAsscc.uSFLlsNsTsDFLAIRPSGNPIoAKGLsGMasSuDLVlEsSELlKIHRLEhhuushuaAlFTLsEpFSYKGspNcDLSTYTsIFKKlDGsWKISWMQRSoGTTDLSTWN ........tl.shlpthss.p..toFhhsNsTsDFLhIRPSGNPIsAcGhttMh.osDlV.EtuElsKIH+hEh.ss.shuhshFTLtppFoYKGs.NcDLsohT.IFKKlcssWKlsWMQRSoGsoDhShWs... 0 1 7 8 +12543 PF12708 Pectate_lyase_3 Pectate lyase superfamily protein Bateman A agb Jackhmmer:Q184L0_CLOD6 Family This family of proteins possesses a beta helical structure like Pectate lyase. This family is most closely related to glycosyl hydrolase family 28. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.85 0.70 -4.23 90 2481 2012-10-02 14:50:22 2009-12-16 16:15:15 2 119 1227 26 988 3936 234 234.90 16 41.55 CHANGED aRN.V+D........aGAcGD..Gso....DDTsAIppAIt............ts..ssusosssAlVYFPsGTYlloss.....................Ih.hhhT.........pllG..sstshssl+...usssas...Ghsllsu.s.hht.........................pssFh.ht.....l+N..hhlDhsshsh.st..............sulcWts.........uQuoolpNl..........hhp........hsss....ssspp.Glah-s.....................uu........sshh.sclhhsGGshG...........h..hhus...........Q................QaohcNhp..........hss.spsuIthhhshs..hshps...hslsssts.....................G..ls ..............................................hs.l.p.-.........a.GAt.GD..........Ghs..........DD.T......t...A.lppAl...............................t.s.ss..ss.ss.h...V..a.h...P...s......G..s....Y.....h...lsps......................................................................................lh.l....h..s....s.s.....................................................p.l.....h.G.......s....s...h..........s..................s...h...l...t.......................s..s...s..t...h.t........................t....h..t....h.h.t.s...p....t...............................................................................................................................................................................................................lps.........hhh....s..h.s..sh...............................................................s.hpht.......................................................................sps....s.t..l.ts...l..........h....................................h.......................................s..............t...........................s...h.hpt.........................................................................................h........s....................s.....................................................................p...........................................................h..htp.........................h...................s..........................................................thh................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 293 584 826 +12544 PF12709 Kinetocho_Slk19 Central kinetochore-associated Coggill P pcc manual Family This is a family of proteins integrally involved in the central kinetochore. Slk19 is a yeast member and it may play an important role in the timing of nuclear migration. It may also participate, directly or indirectly, in the maintenance of centromeric tensile strength during mitotic stagnation, for instance during activation of checkpoint controls, when cells need to preserve nuclear integrity until cell cycle progression can be resumed [1]. 23.00 23.00 23.80 23.80 22.90 22.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.16 0.72 -3.91 14 100 2009-12-17 14:51:34 2009-12-17 14:51:34 2 2 99 0 76 98 0 82.20 38 10.97 CHANGED shpcphpppcp-sscplphlA-DLYsQYSSKHEpKVphLKKuYEs+apc+lcclphcspslpcEl-pLcspLp.ERcEKppLlplL- ............tt....phppss+EVpcAVE+VARELHsLYKuKHEoK..VsAL...KKSYEsR...WEK+l+-lcpclcshpcEsEcL+pph...............t................ 0 11 35 63 +12545 PF12710 HAD haloacid dehalogenase-like hydrolase Bateman A agb Jackhmmer:Q18AI1_CLOD6 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.83 0.71 -4.14 272 9618 2012-10-03 04:19:28 2009-12-17 17:10:43 2 102 3541 13 3687 38047 3600 244.60 16 47.47 CHANGED shFDhDuTL.........hsss..h.............................hhhhhtth..........................hhh............................................................................................................................................hsth...........thhch..hh..............................tthtshttp...hhp..tht........................................................................pshh.................phl...t...............ppt....Gtp..lhllous.........p.....hhcsh............ht....thshst..llusph.....................................................................................h.....ttth...ssp.htt..........tKhp.....tlpph..................................................hhhGD..uh.s..Dlshlp ..................................................................................................................................................................................................................................................................................................................................................................................................................................................hhhDhDuTL..........................hppc.......h..................................h...hsth.h..h..................................h.....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.h..pt...............sht.p.....ht....................................................tt.h..t...s.h....t..t.t.......hhp......tht................................................................................................................................................................hhtshh...............................chl...........pp..................lp..pt........Ghc.........lhll....oGu.................hp........hspsl.......................................sp..............phs.l..s...t...............h.h..us.ph.......t..........................................................................................................................................................................................................................................................................................................................................h.h.t.sch......ssp.....................t.Ksp...........hlpch............................................................th.thttshuhG.D..us.N..Dlshl...................................................................................................................................................................................................... 0 1113 2065 2970 +12546 PF12711 Kinesin-relat_1 Kinesin motor Coggill P pcc manual Domain This family is closely related to Kinesin-related, Pfam:PF06548. 26.90 26.90 30.40 30.40 26.80 25.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.79 0.72 -3.58 20 117 2012-10-05 12:31:09 2009-12-18 13:10:42 2 3 54 0 61 122 0 86.50 42 5.74 CHANGED RE-KIpRLEsLhsGsLss-salh-Es..psLpcEIclL+ppl-+sPEloRaAlENhRLpEpl+phppFh.-pGEREhllpElspL+spL .......REDcItRLE+Ltcpuhsu.hsppp-p...ppL+-EIphLR-QlE+pP+ls+YAhENppLREEs+RL+thpsVcpspEh.spplupLEctF..................... 0 11 24 38 +12547 PF12712 DUF3805 Domain of unknown function (DUF3805) Coggill P pcc JCSG_target_3hlz Domain This family represent the N-terminal domain of the structure. In two related Bacteroides species the gene lies immediately upstream from a putative ATP binding component of an ATP transporter and a putative histidinol phosphatase. The structure of this domain is strikingly similar to the N-terminal structure of 1tui, also of unknown function. The domain carries four conserved tryptophan residues. 21.80 21.80 21.80 21.90 21.30 16.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.89 0.71 -4.43 6 50 2009-12-18 14:33:49 2009-12-18 13:56:41 2 2 50 2 7 40 1 153.60 68 57.55 CHANGED M...KKaISPGuWFShpYPuDWsEFEDuEsSFLFYNP-+WTGNFRISAYK.....ssussYGp-slcpEL+ENsSApLVKVGcW-CAYSpEhFQEEGsaYTSHlWVTGt-sluhECSFTVPKGEsl+tAEcIIASLclRK-GsKYPtEIIPlRl.EIhpIN ...hQG.KKFISPGuWFSMpYPuDWsEFE.DGEGSFLFYNP-p.WTGNFRISAaK......GsAsYGK-sl+QELKENsSAoLVKVGph-CAYSKEMFpEEGsYYTSHLWlTGh--lAFECSFTVsKGssVcEAE-lIAoLElRKEGpKYPAElIPlRLSEIYpIN.................................................................... 0 1 5 7 +12548 PF12713 DUF3806 Domain of unknown function (DUF3806) Coggill P pcc JCSG_target_3hlz Domain This family represent the C-terminal domain of the structure. In two related Bacteroides species the gene lies immediately upstream from a putative ATP binding component of an ATP transporter and a putative histidinol phosphatase. The structure of this domain is strikingly similar to the N-terminal structure of 1ma7 whose C-terminal domain is a phage integrase, Pfam:PF00589. 20.70 20.70 21.40 20.80 20.50 20.00 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.62 0.72 -4.52 25 109 2009-12-18 14:36:53 2009-12-18 14:36:53 2 2 100 2 30 98 23 85.00 28 39.15 CHANGED pDlsslQpllDpuhlssccpp......s.hpuhGlslGslLssEh.tGh-WhsltDstscs.uLphtsoc.thlh..Phshlhc+hcsGptsslschY .........................DlpplQpll-puthssccpc......t.htuhGlslGslLssEh..Gh-WhslhDsttcs.sLpht..ssp.hhl...Phpllhp+lcsGp.sslhp.a............. 0 10 26 30 +12549 PF12714 TILa TILa domain Bashton M, Bateman A pcc Pfam-B_897 (release 5.2) Domain This cysteine rich domain occurs along side the TIL Pfam:PF01826 domain and is likely to be a distantly related relative. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.61 0.72 -4.21 42 957 2012-10-01 23:42:56 2009-12-18 15:44:45 2 141 60 0 540 797 0 55.90 33 10.67 CHANGED GCpDspGshhPsGcoWhoss.CoppCsC.ssGtIpCpsapCsssohCp.p..sGspsCt .............GCh.hpG.p.Y.a.s..G.c.p.ah...s.ss..CsppCpC.t.s.s.u.tl....pCps..t.p.Csssp.h.Cplp..sGhhsCh.............. 0 58 113 320 +12550 PF12715 Abhydrolase_7 Abhydrolase family Coggill P pcc jcsg_3g8y Family This is a family of probable bacterial abhydrolases. 20.70 20.70 20.70 20.70 20.60 20.20 hmmbuild -o /dev/null HMM SEED 390 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.29 0.70 -5.54 4 102 2012-10-03 11:45:05 2009-12-22 11:09:37 2 2 85 7 21 133 18 320.60 36 84.06 CHANGED ppapPp-Hsll+SsRsDGRaLSSYGlVHsMLcchcPphAFpsDMSs+EFspWQctVR+AMpEIMKFPp.pcp.PuPVClKT.pREGYRLEKWEaYPhPcsVSTFLVLlPDsl..ppPVPulLCIPGSGtoKEGLAGEPGlssKLs-.cYpsP.KloMAhNhVKpGYlAVAVDNsAAGEAuDLE+YstGpNYDYDllSRFLLEhGWSaLGYsSYLDMQVLsWMKopsaIRKDRIVlSGFSLGTEPMMVLGsLDssIYAFVYNDFLCQTQERAhVMThPDKpGpRsFPNSIRHLIPsFW+pFNFPDIVAuLAPRPlIhTEGGLDRDFpLlppAYthuGtP-NschaHYPKFADPspRKcl-pLPEGLDRcpYFchVNVDsPsHYFKsELVIPWL+Kl ..................................................................................................................hhtsh...........................................................ppatpWRpp...s.Rchlp.p.hh........h.hP..s.ssh....s.....a...ssch........l..c.pp...c.Rs....u.Y.hsE.Klth.slo..sc..SR..VsuhLLsPcth..........cGPaP..A..llhL..H..G.p....s..h....s....K....EK....hl............t.....h.G.s....s..sc...lss.......sh.sct.........tph..........hG..........c.p.........LA.....K+G....YVVlulD....u.luWG-.....R.....G....s...h.....p.....h.....pp..t................p..sLAsshh.....p.....LG.pShuGhh....uY-....D.M.R.slDFhsoLPpVD+cRIGlhGFSMG.uaR.uW.LAALsDpl.tAss....hhuahsph.....thhhhsssps....htu.tsuhhhLhPGlhp.phDaPDlASlAAPRPhLhh....N......G.........u.....cD.........+.........L.....F..........h..V....c..pAYt.......................................................................shthhhttt............................................................................... 0 4 14 17 +12551 PF12716 Apq12 Nuclear pore assembly and biogenesis Wood V, Coggill P pcc Wood V Family This is a family of conserved fungal proteins involved in nuclear pore assembly [1]. Apq12 is an integral membrane protein of the nuclear envelope (NE) and endoplasmic reticulum. Its absence leads to a partial block in mRNA export and cold-sensitive defects in the growth and localisation of a subset of nucleoporins, particularly those asymmetrically localised to the cytoplasmic fibrils [2]. The defects in nuclear pore assembly appear to be due to defects in regulating membrane fluidity [3]. 25.00 25.00 26.60 25.90 22.80 21.80 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.53 0.72 -4.27 34 86 2009-12-22 13:18:28 2009-12-22 11:51:49 2 1 85 0 63 74 0 52.00 27 30.96 CHANGED hPslssllhlllllalsh+llchhhRhhhhhlhhll+lsaassllshuhhlYhp ....Pclssllhlllhlalsh+lLshhhRhhhhalhhll+lhaassllssshhlYhR... 0 11 31 52 +12552 PF12717 Cnd1 non-SMC mitotic condensation complex subunit 1 Coggill P pcc Pfam-B_410 (release 24.0) Family The three non-SMC (structural maintenance of chromosomes) subunits of the mitotic condensation complex are Cnd1-3. The whole complex is essential for viability and the condensing of chromosomes in mitosis. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.06 0.71 -4.43 77 571 2012-10-11 20:01:02 2009-12-23 16:26:08 2 33 298 0 412 1562 22 172.30 27 14.18 CHANGED sshlRsNhllsluDLshRassll-.a....sstlh......ppLp.....Dp..s.......................shVR+sslhsLo+Llhp-hlKh+Gpl.hchhhsL...tDssppIpshAchhhp-ltp+....s.shl....hNhhs-hls....tLsp..ttt............................hsppphpplhpallshlst..........-+ppcsls-Klst+httsp...............................................stp.hpclhhhLshhp .................................................................................hlRsNhlluluDlslpa..s.sll....-.h.....sstlh......tpLp...........Dp..s..................................................................V..R+...s...slhsl.......op....L.....I...h....p....s..h.....l..K....l.K....G...p.l........s.c.hs.t.s..............L......Ds..spc...I.sshAc..hhFscLs..pK........sssl...........................aNt...h.s-.hls....pLss..................................................................................................................................thpccphppIhcaL..l..p.h..lp..............................-.c.pt.pl.s..pKlstch.ts..........................................................stp.hpDhhhhLs.h..................................................................................................................................................................... 0 152 231 339 +12553 PF12718 Tropomyosin_1 Tropomyosin like Coggill P pcc manual Family This family is a set of eukaryotic tropomyosins. Within the yeast Tmp1 and Tmp2, biochemical and sequence analyses indicate that Tpm2p spans four actin monomers along a filament, whereas Tpmlp spans five. Despite its shorter length, Tpm2p can compete with Tpm1p for binding to F-actin. Over-expression of Tpm2p in vivo alters the axial budding of haploids to a bipolar pattern, and this can be partially suppressed by co-over-expression of Tpm1p. This suggests distinct functions for the two tropomyosins, and indicates that the ratio between them is important for correct morphogenesis [1]. The family also contains higher eukaryote Tmp3 members. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.97 0.71 -4.34 41 292 2012-10-03 05:16:33 2009-12-23 16:36:48 2 14 225 0 176 1155 1 117.90 33 58.23 CHANGED KhpsLKlEs-sAt-+sEphcpchKphEpcshcpEpElpSLp+KsptLEs-l-clEppLp-s+sth--uppttopsE....uLsR+lQLLEEELEcu-cpL...+ETsEKLccs-hpA-chERphpsLEpcppp.EcKhE-hpcphccu+t ...................................KhptL+l-t-su.p+.s-phctchKphEpc.sh.........p....p...Ep..ElpuLp+K...phlEsEl-...Kl.c.p....pLp-scpthc.p....s...pp..t..t.sps-...........shp.++.p.lE....pph-psc....tpl.......pps.p.clppsphtutp.t..Rt..h..hp...p.....t...-t+..t.......t........................................................................ 0 56 93 142 +12554 PF12719 Cnd3 Nuclear condensing complex subunits, C-term domain Wood V, Coggill P pcc Pfam-B_484 (release 24.0) Family The Cnd1-3 proteins are the three non-SMC (structural maintenance of chromosomes) proteins that go to make up the mitotic condensation complex along with the two SMC protein families, XCAP-C and XCAP-E, (or in the case of fission yeast, Cut3 and Cut14). The five-member complex seems to be conserved from yeasts to vertebrates. This domain is the C-terminal, cysteine-rich domain of Cnd3. The complex shuttles between the nucleus, during mitosis, and the cytoplasm during the rest of the cycle. Thus this family is made up of the C-termini of XCAP-Gs, Ycg1 and Ycs5 members. 25.80 25.80 25.80 26.60 25.70 25.70 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.86 0.70 -5.43 57 330 2012-10-11 20:01:02 2009-12-23 16:51:05 2 9 263 0 236 335 0 307.10 23 30.42 CHANGED h+CLtlspthLpplp.tslppshpl...sllssLlh...Pulpsp-s.slRchulcCLGLhsLLsc..........plAp-sh.lhhpth.pp.........s.ssplphhAlpslhDllhhaGhphhsspsp........................................stshslhclhh+hLcss......psc......lpshusEGLsKLhLsshlss........................................................spllpsLlltYFsPpopssp.................tLpQsLuhFhPlYsaSphpp..Q..pphtpshhsslhplhph.tch.........ssts.lp...........ssplhphllchTDscphsshsptt...................ttssH.tlu....hplLptl.......t..pscc...p+hlhphLs+Lhls..ps.s...ppphpth ...............................................................+CLhlstthLpphp...ts.hp.shsh........sllpsLllPulpstcs.slRphulhCLGlssLlsc......................phApcp.hsl.hhp..hh..pp.........s..ptslph.sA.....Lpsl..hDllh.ha...G.....hp.hhssptt..........................................................................................................s.ttsllcl.hhch...Lcsp..........ssc....................lpshuscGluKLhLsshlss........................................................sclLp.pL.llh.aFsP..to...pp.s...........................p.L+QsLu.hFhPsas...........h..u.....ph......pp.Q.........pthp..cshlss.lps.lhss.ts....................ss.s.ls...........hsplsphllchTcspths..s.sptt.................................................t.ssH.slu....hclhppl............ht...spt..tt+hhhphLspL.ls...t........th...................................................................................................................................... 0 86 133 200 +12555 PF12720 DUF3807 Protein of unknown function (DUF3807) Wood V, Coggill P pcc Pfam-B_6113 Family This is a family of conserved fungal proteins of unknown function. 25.00 25.00 30.50 59.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.88 0.71 -11.46 0.71 -4.10 19 65 2010-01-04 17:41:02 2010-01-04 17:41:02 2 2 65 0 54 64 0 172.50 31 78.96 CHANGED DLpuFpAcHFssps.................sssshst................tppshsE-h............DDsLGYYPDGsKRTLTDEQIpIFRHSEIcuLhRc+chpc.-p....t.............hp.ttchps..t..s..tp.tt......................ttpppssptpc+ptpstpcpsspps...........h.ss.ttstssLcYs-csstttpt.......psssshuRRllS..........Y-D ...................................DL.sFaspHFsspsh.................................ptp.ttt................tpsshpE-.t...........DDuLGYYPDGVKRTLTDEQIpIFRHSEIc..uLhRp+chpcppp................................tpstspssspt.pts..stssptpttps............................sppppstppppKp.ppshpcppspts.........tpphsssptuh...ssL.-hsppt...............sp..tR+hls................................................................................................................................ 0 8 23 41 +12556 PF12721 RHIM RIP homotypic interaction motif Coggill P pcc Masci AM Family RIP proteins are receptor-interacting serine/threonine-protein kinases or cell death proteins [1]. This interacting domain is involved in virus recognition. The RHIM domain is necessary for the recruitment of RIP and RIP3 by the IFN-inducible protein DNA-dependent activator of IRFs (DAI), also known as DLM-1 or Z-DNA binding protein (ZBP1). Both the RIP kinases contribute to DAI-induced NF-kappaB activation. RIP3 undergoes auto phosphorylation on binding to DAI [2]. 17.20 5.30 17.20 5.30 17.10 5.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.52 0.72 -3.55 28 243 2010-01-08 12:49:32 2010-01-08 12:49:32 2 18 70 0 89 231 2 48.60 23 10.48 CHANGED PsPpsss.sssssspss....................ssssshslsIp.sssulQIGspNhMslp .........................................................................................................s...................................tss...ltIp..su.pslQIGssNhMph....... 2 23 32 39 +12557 PF12722 Hid1 High-temperature-induced dauer-formation protein KOGs, Finn RD, Coggill PC pcc KOGs (KOG2226) Family Hid1 (high-temperature-induced dauer-formation protein 1) represents proteins of approximately 800 residues long and is conserved from fungi to humans. It contains up to seven potential transmembrane domains separated by regions of low complexity. Functionally it might be involved in vesicle secretion or be an inter-cellular signalling protein or be a novel insulin receptor [1]. 20.50 20.00 20.70 20.00 20.20 19.90 hmmbuild -o /dev/null HMM SEED 895 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.58 0.70 -13.63 0.70 -6.87 28 348 2012-10-01 19:21:38 2010-01-08 16:35:52 2 7 227 0 260 575 7 599.10 26 91.93 CHANGED MGso-SKLs..F+pulhcLspp.............................................pth..ss...............--s...aWppFW....chspospDlFs.Lloss-IRplRDps.sNltoLlhtlsp+lhhhspp.ssh..............shs.pp..plLNClRlLTRlLPalaE....tppWcs..taFWusp.c.hh..t...............................tt.psspPLAtpLlcuLlDLLFhssFTlssst..........pshspspa.IWEuGVGsssshsp.......stch-uNRsElL+LLLTlhScshYhsss.......sspss+alsahsosss+p.lLsLhsSLLNsss+Yssss....hslPY.splhapDs+p..............................hLVphsL................QlLllhlsashssssph...................................................shscNhahpYLu+lHRcpDhpFllcGhs+lLppPlp...........................................................................................pohlPsss+.lpa..t.EhLhLhWchhphNK+Fhpalh-opcshDlllhhlYalhpa+sssu+hG....................................ll+hssalLLhLSu..-+sFsh+LNcsass........ppLP.........h+lsshsG.....TauDalIl.hppll.............sosptp.p.lhssLlpllhNlsPY.......................................hpsLShsuuspllpLlsshSoPtFLhu................................sssNacLLthLL-shNshlpapFcs................NtpLlYullRp+clacpLtshsh-.upp.hptpsp.............p.............................................t...tshtsspppsstps.ptss..pssps.t.sphs.p.ssshshssspsptsp..cspss.ssss....................t.s.t.pt.p.tsttp.sthp.thspppsspp.p.sssh..psptpatsossWhcSWtscL..sLpoIhpll....pslhspl.phshppuhss-u.hl.....chhpcsplpulls.......sPpPlpl+paphsshohtWacolhWGhIaspph..............................................................................................................t............................................................................................ssslWssTclKLFcl .................................................................................................................................................................h..tL.t............................................................................................................................s........................sp.........hatthh.....t...p.p-lht.hhs.tpl+.ht.............ptt...NhtsLhhhhs.plh.hh..pp........................................p....p.....hlNs.Rl..Ls+llPhhh.....E..........thpt..................hhWt..........................................................................................................................t..sLuppLl.slhcLLFh.sF.....Tl.......................................................t.tp....h.lWpsG..luhsts................s..ht.sR...hElL+LLlshh.upshY.s.t................................s.hl.hhss.............sp......hhshhsSllNssh....ths.ss...................tls..a..s.hh.h..t..s.pp..............................................................hl..sh................phL.......h.hl.a...........................................................................tNhhh.a.hsp.lp+.t...Dhp.....hl.....hpuh..plht.sh...............................................................................................thhst......p...h...h.phhh.hhaphhphNppFh.hh.hp...t.s.c....hll...hl..h..hh...ctp..t.p.s.....................................................................lhphs.all.hLSs....c.th...h.p....L.p..hp..........plP.....hc..ls..h..s.......................oat.Dhhl.h.hhp.ll.........................hss......t...............l..h..shhhthl...N..l.sh.............................................hptlu..ss..plhpLhtthSs..a..Lhs......................................................................................s..ph.....hl..l..Lc.hsshlth..phpt....................s..Llasll..+ppphhp..tlts..h..p.st.........t..................................................................................................................................................................................................t............t.................t..t............................................................................................................t..............................t.........................................t.........th..s..a.............h.sW..ph..sLphlh.pll.....p.l...l.....t.....t.t....t.............t................h..h.........................sh..t...hp...............s..h.hhWh.thhWu.la.tt................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 101 146 222 +12558 PF12723 DUF3809 Protein of unknown function (DUF3809) Bateman A agb Jackhmmer:NP_295729.1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in Deinococci bacteria. Proteins in this family are typically between 117 and 157 amino acids in length. 25.00 25.00 42.00 73.70 23.40 17.00 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.94 0.71 -4.28 6 19 2010-01-08 17:04:36 2010-01-08 17:04:36 2 1 19 2 13 19 1 127.90 38 90.81 CHANGED oFsLphPus.ApuLs....Pthshuth..h+sLpt-uEtlcGELVspsPlLGElDLPFpSRLphpspGt....pLhPhsLsGE.s..WlEVuGpupssEsG.lsaphphRsHLsLPpuEsWGGtAFc+MlcAAhuRTLERVhppL ........................................shs.h...h-Lch....-upplcGpLhtpsPlLGElcLPFtSRL....-Gt....pLpPlPL...ss..hlEVpGpucss....u.....tu....lththclcl+LpLPpGcsWGtRAFt+hlcAhFtRsLERsLst.... 0 3 8 13 +12559 PF12724 Flavodoxin_5 Flavodoxin domain Bateman A agb Jackhmmer:Q186N5_CLOD6 Domain This is a family of flavodoxins. Flavodoxins are electron transfer proteins that carry a molecule of non-covalently bound FMN. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.72 0.71 -4.19 70 1747 2012-10-03 05:08:30 2010-01-11 11:17:35 2 13 1363 0 401 2543 510 137.00 25 68.21 CHANGED LIlYuSpcGpT+cIup...hlu...pplptt....sphsslps..h...t.th..slspaDp..VllGAul+hG+apstlhpFlppa...tstLsst.suhFsVsLs........uccspc.........h...hc.+h......l.p..sh..a..p.....PphhtlFu..GuL.pYs+Ysah-+hhlphI.hph...stup...s-ss .............................................lIlYu..o.p..p..G.pT+clAp...hl...A....p...pLpp.................sths.....clps.......s..............................p.......p....................s......h.p..sY.......Dp.....lllG.u.u.l.h.h..G...+.h..p...s..t..h..p.c.Fl.c.+.p.....t.s.p.L..s..s.....h...s.u..h..as.l..shs............upc.s.pc..............................h...hp.ch......l.p...p.....h.....p...........st..hthhs..Gtl...h..phthhc+hhhphh.ht...tt.....t................................................................................................ 0 140 253 344 +12560 PF12725 DUF3810 Protein of unknown function (DUF3810) Bateman A agb Jackhmmer:Q185R6_CLOD6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 333 and 377 amino acids in length. There is a conserved HEXXH sequence motif that is characteristic of metallopeptidases. This family may therefore belong to an as yet uncharacterised family of peptidase enzymes. 26.10 26.10 26.30 61.80 25.40 26.00 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.94 0.70 -5.53 60 206 2012-10-03 04:41:15 2010-01-11 13:16:19 2 1 203 0 62 211 151 314.90 30 87.94 CHANGED hYSpslYPhluthlphlhuhhPFSlGD..lhhhl..hllhllhhlhhthhphhpphtph...............h.hphhthls....llYhhFhlhWGlNYaRhshtpph..........slphtp.Yos-cLhphsptllppsNphptplsp..........spshhhths..s...pclhcpsh.puYpplsppashhp..hp.hspsKsslhShhlShhGlsGhhsPFTsEAplNsplsshphPhThsHElAH.lGaupEsEANFIuYLsshpsss.ha+YSGahhuLhYslspltc.hs.ctapclh.pplssslhc.shppsppaWppa.cs.sl......spltphha-tYLKuNsQpsGhcSYuchVsLLlu .............hYupslYPhluthlshhsuhhP.FSlG-.lhhhl....hllhll.hah.hht.hh.phhpphpph......................lhphhthls....hlYhhF.....hlhWGlNYhRhshhpph..............................pl.phtt......ao......s-cltphspphlpphNphtsplsp............ssp..h..............ppltpcsh.puYpplsppashhp...sp..hspsKsh.lhS.lhShhGlsGhhsPFTsEuslNspl.shphPhThsHElAH.hGaupEsEANFluYlsCppusshth+YSGYhhsLtYslssltc.hst-tapclh.pplpst........lhc..shp.ppptaWppaps..hl.........spl.tshhactYLKuNp.psGhcoYuchVsLLls........................................................... 0 31 54 59 +12561 PF12726 SEN1_N SEN1 N terminal Mistry J, Wood V jm14 Pfam-B_2547 (release 24.0) Domain This domain is found at the N terminal of the helicase SEN1. SEN1 is a Pol II termination factor for noncoding RNA genes [2]. The N terminal of SEN1, unlike the C terminal, is not required for growth [1]. 20.60 20.60 21.70 21.70 19.40 18.90 hmmbuild -o /dev/null HMM SEED 727 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.10 0.70 -13.27 0.70 -6.48 34 152 2010-01-11 14:18:12 2010-01-11 14:18:12 2 9 140 0 119 164 0 637.60 21 33.42 CHANGED Wlpp+hsppLpsCspClpsatpuKsphhpphh..cphstpp.lppFhphlspW-hpplhtsLcphppphpss..s..p.s........h.hulaEsLsssshL+sssthcth...FsthFchl.sppphh...h.........pphl.PGhhhhLF-s.ss..ppppWApphhpphtpp...hsppphs.slhptlshtlhpl...........s..ts.hs.shhtp.......FWpshthll..phlsp-hlppthps............htlps.lh+lhhsHLtss....stsLthlLcsLphlLc+hsssF.Ws.thpsho.p.........sll-plFssstFsphLhcsppss.hpppt................hpshh...uWhhsFhpSLsssp.p.................psscpls.hLlpphpp.pp.sp.......t+tsshtsuhshLlpslhs....hpcpcps.......shpsslhsls-shshlssth........shh.sssth..h.tpstslsphshpllppslsh.DhhtLppppttLhps........sth.sss....phhs.......lWpplhpthh.psshs...LuptlLtuht..........slsul.hh........spp..............phsstpppaNphhpphtphhspll............p+lu-h....sPspLpplhs.cp..ssspulhSsl.hSss.plhpAAhsllppsh.s.ssuRhEulpclLppphssslpulshslpplpphcs....apssP+.hl+shhDllssLsDs.sGlL.....pspphht....pssttplt....paWchtWphLshIFppThtWusp..actp.hh-FsRDTL-huchLh-paplhsssls.t.sss...........pssps.............lhpsshpshpshlhWLRLsDc.LLsssVpL .......................................................................hhp+hpt.LppCs.Cltpaapu+tthhpphh...cphsppp.lpph.phlsph-hpRlhpsLppspthhpph.s.ptt.....................................thhhul.aEsLss.thLppsp.hpth...Fs.thFphlpsppphh..h.........................pphl..PuhhhhLF-t..st........ppppWAhp.hppht.p......hst.pp.h.......s.slhp.l..tlhth.........s...tt.hs.t...hltp............................hWpuhthll..phhsp-.lhpplps.............htlp...s..hhcl.hhsHLthp....s.sL...lL.....p...slphhLc+hsppF.Ws..shtshssp..........sll-.lhtsstapthLhp..psp...pt................................................................h.shh...uWh.sahpSLptsp..p.................psschlhh.Lhpp.hpp..t..t.......sphtChhtuhshLhpslhs....hhptpts...........phpsthhhhsph..h.thltp.h........................phh..ss.h.......t.tlst.shtlltpslth.-ht.htpph.hlhpt....................sh.............ph.t.lWptlhpth...pssht.....LupthLhuhh.............slhul.hh.spt....................tttppaNthhtphtphhspll............t+hu-h..........psppLpplhp.st..ps.tuhhuhl.hssp.phhpuAhplltth..s...tuRh-ulpthlpp.ht.sh.uhshshppl.phth.......atshs+.hl+hhhDllpsLs-s.sGhL.....pstph......ptphhtl........phWp..WphlshlappT.tWt.h..hcht.h.pFsRDshphuc.Lhsphtlhtsslt..t.t.t..................................s.tpp................lltsshtshpshhhWLRLpDp.Lhpshlt................................................................................................... 0 35 71 106 +12562 PF12727 PBP_like PBP superfamily domain Bateman A agb Jackhmmer:Q18A58_CLOD6 Family This family belongs to the periplasmic binding domain superfamily. It is often associated with a helix-turn-helix domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.88 0.71 -5.35 186 867 2012-10-03 15:33:52 2010-01-12 08:16:29 2 14 708 0 360 5148 918 188.80 29 45.74 CHANGED spthtpt..ss.hp..lt..hthsGShsGLtuLt.cGcsclAuhHL..h.......................cs.css..p..........aNhs.hl.......pch.h.ssh.s.hsl.....lphspRppGlhlttG...NPt......s...............................lpshpDL....sc....tslphlNRpp.GuGoRhLlDphL....pp.t....slss......................splpGYsppttoH.hsV..........................AssVssGp...ADsG.............l.G.l......pss.....At..ph.uLcFl.......Pl.sp...........EcYDLll.+phh.pc..stlptllphlp .............................................................................................................................t............t..ht..lt..ht.....a.hGShsu.Lh.s.Lt....p....G....c....s....c.l.....A...uh.H......L...h...........................................c.s..cs..s..p.........................hN..hs..hl....................c.ch....l.sst....s..hsl........................lph.h.......p....Rp..p..G.l....h....Vt.pG....NPp...................p........................................................................Ips....h...t...D.L.............sc................ssl.+...aV..N.Rpp....G..S..G..o.R....h.Ll.D.p.h...L........pt..t........slss..................................pp.l..s.G..Y.p.....p..p...t...h........o....H..hu....V......................................................Ast.Vu.sGp.......A.D..l.G..............................................l...G....l............c.s..s.......At.......ph.....u...L...-...F..l........................P..l.tc............................................EpY...D.l.ll...+.s...t.h....p..c...h.lptllphh.t....................................................................................................................................... 0 130 258 307 +12563 PF12728 HTH_17 Helix-turn-helix domain Bateman A agb Jackhmmer:Q18A58_CLOD6 Domain This domain is a DNA-binding helix-turn-helix domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.41 0.72 -3.71 461 7911 2012-10-04 14:01:12 2010-01-12 09:58:31 2 67 2491 0 2062 7447 1041 50.90 22 40.39 CHANGED hlospEsA.chLs.l..uppo.lh.c.h...h.....c....pu..pl.sh.......t......t......hthpcp-lppalpppp ...................hospEsA.chLs..l.....SppT.lh.c.h.......h.........p.............pG.....pl.sh..............+.....hGpp....hhhpps-lppalpt..t.................................. 0 792 1556 1892 +12564 PF12729 4HB_MCP_1 4HB_MCP_2; Four helix bundle sensory module for signal transduction Coggill P pcc Ulrich L Family This family is a four helix bundle that operates as a ubiquitous sensory module in prokaryotic signal-transduction. The 4HB_MCP is always found between two predicted transmembrane helices indicating that it detects only extracellular signals. In many cases the domain is associated with a cytoplasmic HAMP domain suggesting that most proteins carrying the bundle might share the mechanism of transmembrane signalling which is well-characterised in E coli chemoreceptors. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.64 0.71 -4.93 65 4710 2012-10-02 01:04:29 2010-01-12 14:21:04 2 84 1259 0 1579 4943 147 176.50 14 30.92 CHANGED sh..plpsKLhhh..hhlhslhhhllG..hlGhhshpphspshpshYpcpLlslphlsplcsshtphcstlhcllhs.p-psc...ppplhppl.pphppchsphhppYcpshhssc.E..+c.hhspacpphppY.pptpppllsLsppsphc.....cAhphhpspst..shhpphhpslpcLhphstphAcpttppspsph ................................................................h.pltp+Lh..hu...Fu.llhll.hl.hlu...shul..h..p...l..sp...ls..ssh...p..pl...h....p...s...p...h...s..sh....phh...s...p....lp...s...s..h....t....p....h..p...hs......h...h...p.............h....l......h....s....p.....s....s...pp..............hp...p....h.t...pp...l....pp....t.....p.....p....p....hp...p....t...h........p....p.....h......p......p.......h........h..........h.....s......s...p.....-......cp...hh.p..p..h.p...pt...h....p.p.a...h...s.....h...h....p..p.....h...h...p.....h..h.....p....p.....s.p..h...p........................p.A..h...t..h.......h.h...s...p.h.t......shh...p...t.h...t..p.tl...p.p...l.h.p.h...pt...p.stt.............................................................................................. 0 404 885 1233 +12565 PF12730 ABC2_membrane_4 ABC-2 family transporter protein Bateman A agb Jackhmmer:Q18D57_CLOD6 Family This family is related to the ABC-2 membrane transporter family Pfam:PF01061 [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -12.05 0.70 -5.20 293 7890 2012-10-03 10:13:34 2010-01-13 08:24:29 2 18 2165 0 1696 13517 1344 250.10 12 82.58 CHANGED psEhhK.h...t...+...s.t..hh....hl.hhlhsll....hsh..hhh......h..hh.t...........................................................................................................................h...t....tt..h.......t....hh..........h..........h...............................................t....hhhh..........hhhs..l...hlsl.hs.uh.hh..s.tEh........psst........hp....hhhstshs+tplhhuK....hlshhl.h.hh....l.hh.ll.hhhhh.hl.h..shl.hs....h.........t.s.h.sh.s.................................hht..h.h...hhhhhh.s...l.hh.h.hh.......hhhl..h.h.....l......uh....hhc.....s....h.hhs...ls...l.sl.h....hh..h...hsh..hh........st..h.h.......................hhh.sh..s......................hthl..h.......h..th..........h.......t.s....h........h.t........s......h..s..h..........hh.s....lh.hh...hhhhllhhh .......................................................................................................................................................................................................................................................................................................................................-hh+.h.....h...p...p.h..hh....h.h...h..h..h..h..h..h.l.........h.s.h...hhh.........h.h.h..t........................................................................................................................................................................................................................................................................................................h...........tt...h........t.....hh.............................h..........................................................................................................................hhhh...........hhhh.......l......h.hsl.....hs.....s..h.....h...h........s...pEh.............pp.ss...........hp......hhhs..t.s.h.s...+..h.ph.hhuK.......hl...s..h...hh.h..sh......l...hh....l...l....h.h....h.h.s.....h.l..h....s.h.l..h.t...h...........t..s.h.shs.......................................................................h.ht...h...h...h...h..h..h...h.h..h....h...lh..h..hh..................hhhl......hh.........l..........uh......hhc..........s.............thhs.........lh........l.sl..h...........h.h..l......hsh....hh..........sh..h.....................................................hhh..sh.t...................h.hh.....................h....t............h..................................................................................h......h.............................hh......hh..hh...hhhhh....h..................................................................................................................... 0 700 1290 1510 +12566 PF12731 Mating_N Mating-type protein beta 1 Coggill P pcc Pfam-B_4610 (release 8.0) Family This domain is found in some fungi and is the C-terminus of a homeodomain-containing transcription factor protein involved in mating. 21.30 21.30 21.50 22.00 21.00 20.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.76 0.72 -4.11 24 64 2010-01-13 13:49:11 2010-01-13 13:49:11 2 4 19 0 5 61 0 90.90 21 18.04 CHANGED hs.ts......DpplppsLsslcpcFhuuLcs-st.s.LssFhouaspFcshlpShpspLss-TlshlhsFussluslosshl-hpupppsht.cchss .........hs.........t.DtpltpsLpshcps.alsuLpssst.s.lpsFhsphpp.h..psthp..u.t..p.s..pL...sspThphl.hsFushltslosshlclcsppsplp.sch..t......... 0 5 5 5 +12567 PF12732 YtxH YtxH-like protein Bateman A agb Jackhmmer:Q18C91_CLOD6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 100 and 143 amino acids in length. The N-terminal region is the most conserved. Proteins is this family are functionally uncharacterised. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.56 0.72 -3.61 235 1575 2010-01-13 15:49:26 2010-01-13 15:49:26 2 9 1168 0 352 993 72 104.10 22 81.49 CHANGED hlhuhllGuslGussuLLhAPcsG+-sRcclpcpsp....chtcphpch.t.....................................lppphpp...........................................spcpspchhsc ........lhuhlhGussGAssuLLhA....P.cpG+-hRpclpcthc.............chpcps..pchp.pp.sppp...............................................................................................hpcphpp.........h.tt....................................hppthp.....ptttt......................................................................................................... 0 141 267 317 +12568 PF12733 Cadherin-like Cadherin-like beta sandwich domain Aravind L, Coggill P pcc Aravind L Domain This domain is found in several bacterial, metazoan and chlorophyte algal proteins. A profile-profile comparison recovered the cadherin domain and a comparison of the predicted structure of this domain with the crystal structure of the cadherin showed a congruent seven stranded secondary structure. The domain is widespread in bacteria and seen in the firmicutes, actinobacteria, certain proteobacteria, bacteroides and chlamydiae with an expansion in Clostridium. In contrast, it is limited in its distribution in eukaryotes suggesting that it was derived through lateral transfer from bacteria. In prokaryotes, this domain is widely fused to other domains such as FNIII (Fibronectin Type III), TIG, SLH (S-layer homology), discoidin, cell-wall-binding repeat domain and alpha-amylase-like glycohydrolases. These associations are suggestive of a carbohydrate-binding function for this cadherin-like domain. In animal proteins it is associated with an ATP-grasp domain. 22.10 22.10 22.20 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.67 0.72 -3.70 218 661 2012-10-03 16:25:20 2010-01-25 14:57:29 2 169 264 0 277 708 92 93.00 22 12.69 CHANGED LssLsl......................s.s......Fs..ss..sts.Ys.s..sVsp...sss.s..lslssss.p..............................lplsG...t.........................s...slsL..s............G.ss...lslpV......sup.......sG......s...........................s..pp.Ys.lsl....pRt .....................................................................ltlp................s.s......Fs...ss....tts.Ys.s..pVst...sss.s....lslsspst....................sss...sp.l..p.l..sG..hts............................s..tslsL...s............GtNp....lslpV.........sup.............cG......s.............................s...pp.Ys.lslpR................. 0 145 224 258 +12569 PF12734 CYSTM Cysteine-rich TM module stress tolerance Aravind L, Coggill P pcc [1] Family The members of this family are short cysteine-rich membrane proteins that most probably dimerise together to form a transmembrane sulfhydryl-lined pore. The CYSTM module is always present at the extreme C-terminus of the protein in which it is present. Furthermore, like the yeast prototypes, the majority of the proteins also possess a proline/glutamine-rich segment upstream of the CYSTM module that is likely to form a polar, disordered head in the cytoplasm. The presence of an atypical well-conserved acidic residue at the C-terminal end of the TM helix suggests that this might interact with a positively charged moiety in the lipid head group. Consistently across the eukaryotes, the different versions of the CYSTM module appear to have roles in stress-response or stress-tolerance, and, more specifically, in resistance to deleterious substances, implying that thes might be general functions of the whole family. 21.90 21.90 22.00 22.10 21.80 21.40 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.59 0.72 -3.88 88 306 2010-02-01 16:08:54 2010-02-01 16:08:54 2 4 97 0 189 281 0 50.20 33 55.31 CHANGED s................P.ssYsp.......................t....tppp.sss....................hhpGCLAALCCCClh-hsh ....................................P..uYsp................................ts....sppp.tsu..........................hlpGClAAL...CCCCll-tCh...... 2 33 98 150 +12570 PF12735 Trs65 TRAPP trafficking subunit Trs65 Coggill P pcc manual Family This family is one of the subunits of the TRAPP Golgi trafficking complex [1]. TRAPP subunits are found in two different sized complexes, TRAPP I and TRAPP II. While both complexes contain the same seven subunits, Bet3p, Bet5p, Trs20p, Trs23p, Trs31p, Trs33p and Trs85p, with TRAPPC human equivalents, TRAPP II has the additional three subunits ,Trs65p, Trs120p and Trs130p [3]. While it has been implicated in cell wall biogenesis and stress response, the role of Trs65 in TRAPP II is supported by the findings that the protein co-localises with Trs130p, and deletion of TRS65 in yeast leads to a conditional lethal phenotype if either one of the other TRAPP II-specific subunits is modified [4]. Furthermore, the trs65 mutant has reduced Ypt31/32p guanine nucleotide exchange, GEF, activity [3]. 29.30 29.30 29.40 30.00 28.90 28.50 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.72 0.70 -5.11 38 136 2010-02-01 16:49:17 2010-02-01 16:49:17 2 4 134 0 104 139 0 297.20 24 49.38 CHANGED cssshhs.......hphshtphslshpsG..plp.hsp...h.....hPlphpspDslohhYKLss..................ss.st.s..................lplplphphh.......tsspspIt.hpWpTtlDFuh.h......P..spslppsspss...o.sh.......ssstssts.h........................................................sshsshtttspstst.ulphoFtGs...sVplGcsFsWpl.llNpSs.........................pshcLslhs.s..................ph...p..t..spsts.t........................spslh.pshhthhpp.st.p..........pssllsLosDl+lGPL.Psssaps-lchlsltsGhh.sL-u....l+lhDhpos-sh..-htclsplls .............................................................................................................................................................................................................hsshhp.hph.hpph..pLthtsu..plcslsps..h....................thPlpshspDplohhY+Lss......................ss...........................s.tlpIslphp..l.l........s.ssp.splp.hpWpTplDFshsh..........................s..sp.slp.p.sppss...phsh...........sss.ssts.h.s....................................................................................................................................................................................s.ssss.t.htsss.s..hGlphoF.us....sVpsGc.FsWplhllNpSs...........................................................................ps+clslhs.s.................................ppph..tsp..ht.sps.ssst.t.tt......................hspslhs..-Nhlpthp...+psthp.............tstllsLSs.DhRlGPL.Psssats-LchlultsGhh.sl-u....l+llDlsosEth........-ltclsslls............ 0 30 59 90 +12571 PF12736 CABIT Cell-cycle sustaining, positive selection, Aravind L, Coggill P pcc Aravind L Domain The 'CABIT' domain (for 'cysteine-containing, all- in Themis') is found in a newly identified gene family that has three mammalian homologues (Themis, Icb1 and 9130404H23Rik) that encode proteins with two CABIT domains and a highly conserved proline-rich region. In contrast, Fam59A, Fam59B and related proteins from mammals to cnidarians, including the insect Serrano proteins, have a single copy of the CABIT domain, a proline-rich region and often a C-terminal SAM (sterile-motif) domain. Multiple-sequence alignment has predicted that the CABIT domain adopts an all-strand structure with at least 12 strands, ie a dyad of six-stranded beta-barrel units. The CABIT domain contains a nearly absolutely conserved cysteine residue which is likely to be central to its function. CABIT domain proteins function downstream of tyrosine kinase signalling and interact with GRB2. 20.30 20.30 20.30 20.40 19.70 20.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.67 0.70 -5.37 28 405 2010-02-01 17:20:35 2010-02-01 17:20:35 2 12 102 0 234 418 0 231.50 21 49.09 CHANGED lPpll+.lp.puphptst............sphlhlpsstp..hpplh.....Apslpppp......hu.plpIPhsYpGpFchhsp..........FpoVp-l........uphhstplhspcshphp.....................s.t................phhlpp...G-plplh......................ttttctttthhp.......................slhshs.pssc....hlpLPhptcGpF.phhspp........paTlc-llc..phcLPhpVclssss.......sh.....t.hstl+Lhshhp-shllsssltpppps..............hclPhp...hplclhtscs..t.ststhhpthhp..p ......................................................................................sp.hhlpptt.........phhh.......hp..htp..tp..........ph.lP.s.Yt.G..hF+hhsp...............asos.-l........upths...c.lhsh..cshpht................................................phslpt...G-plplh.................................t.t.p.t..hhp............................................C.hh..shp..pppc.........tl.LPh.ptcG..pFs.phtstp..........................t.aolpplhp....hcLP.hsVpls.sts..........h...p.ht..h.t.hph..phh.cshlh..hpsh.p.t.h................hch..t...h..ph...pt.................p................................................................... 0 47 66 127 +12572 PF12737 Mating_C C-terminal domain of homeodomain 1 Coggill P pcc Pfam-B_4610 (release 8.0) Domain Mating in fungi is controlled by the loci that determine the mating type of an individual, and only individuals with differing mating types can mate. Basidiomycete fungi have evolved a unique mating system, termed tetrapolar or bifactorial incompatibility, in which mating type is determined by two unlinked loci; compatibility at both loci is required for mating to occur. The multi-allelic tetrapolar mating system is considered to be a novel innovation that could have only evolved once, and is thus unique to the mushroom fungi. This domain is C-terminal to the homeodomain transcription factor region. 22.50 22.10 23.80 23.80 22.40 21.30 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.39 0.70 -5.59 20 42 2010-02-01 17:28:21 2010-02-01 17:28:21 2 4 13 0 7 43 0 402.90 27 65.73 CHANGED sstlEsEFAuItupAcpLYp-KFspSpLAs+LDsAV+DhTPsLKtplpsc+p+++ppsp.pp..sp+ut+sYPoPEpSPss...uphhhSPsssslp.shspssspss++RcsSh...to-sccpsppssKRsRsssspsp...............p....uLPSPusSsh--Lsp.ssss.sosph.Pstsssss.................osKRKR+LSDu....thPAsKRPp.......RspslSDPhPh..tt.ss.spWhpthh.sssplhhhhslPsPVolhsPDss.........sP.............LDlplasFslh..hs.pssssssPsus...................ssspshsssslssssh.ssssssLD.shS.atsssasss......Lpsss.s..h..shP.s.ss......hsts..sh..........................hsshsssshshosLhspPssssssss....ulhssupso.ssssuLs...puphcAKp+ELcELcA+stALcAElA ..........shlEtpFAsltspAppLYspKFppSpLAs+LDsuV+DMTss.......l+tp.............hpp...c+tc..........ccpptp.pt........t.............p..........+A....t............csYPSPp..tSPsu...sphhhoPs.........s..ss.p.....ht.sts.h.......sp+RtpSh.....ssp.ptpsptssKR.Rp.shppp......................ssttsLPS...PssSt.-p.....s...ss.....ssh...sss.s.................shKRKRpLSDu....thPusKRsp......sRspssSDPhPh...tstpp..pa........sspl..hhslPsPVos.ssDss.........sP.............lDlplas.ashh.phs.psssh.sssut...................................s.pshplsuhsp.s.....s.tsssLD.shs.a.ssshs.s.............................lpssp.......shP.s.os......spts..sh...................................ssshshsslhspssssshsst........sl.s.upss.ssshsls........ppphpAKpccLctLpApstAlpAElu......................................... 0 6 7 7 +12573 PF12738 PTCB-BRCT twin BRCT domain Wood V, Coggill P pcc Pfam-B_181 (release 24.0) Family This is a BRCT domain that appears in duplicate in most member sequences. BRCT domains are peptide- and phosphopeptide-binding modules. BRCT domains are present in a number of proteins involved in DNA checkpoint controls and DNA repair [1,2]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -8.90 0.72 -4.11 49 1637 2012-10-02 11:51:29 2010-02-01 17:45:22 2 88 308 12 1119 4819 1157 64.10 25 10.14 CHANGED lhlslouassp-....RptlpphlpthGupastsL.s+.....psTHLls...tpspGpKYctAppas.ltlVshpW ....................................hlsh.os.h.tspp.......+pplt.phspthGu.......p.h..p..ps..l..sp.........csT.H.L.ls....................tp..s...p......u.....p..K....ac..t...A...p........c......h........s...l..........lVsspW.......................................... 1 356 574 889 +12574 PF12739 TRAPPC-Trs85 ER-Golgi trafficking TRAPP I complex 85 kDa subunit Coggill P pcc manual Family This family is one of the subunits of the TRAPP Golgi trafficking complex. TRAPP subunits are found in two different sized complexes, TRAPP I and TRAPP II, and this Trs85 is in the smaller complex. TRAPP I, but Not TRAPP II, functions in ER-Golgi transport [1]. Trs85p was reported to function in the cytosol-to-vacuole targeting pathway, suggesting a role for this subunit in autophagy as well as in secretion [2]. The overall architecture of TRAPP I shows the other components to be Bet3p (TRAPPC3), Bet5p (TRAPPC1), Trs20p (TRAPPC2) , Trs23p (TRAPPC4), Trs31p (TRAPPC5), Trs33p (TRAPPC6a and b) and Trs85p. 25.20 25.20 25.30 25.40 25.10 25.10 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.55 0.70 -5.82 60 382 2010-02-01 17:51:56 2010-02-01 17:51:56 2 9 271 0 274 400 4 394.30 22 43.06 CHANGED sop......s.s...sshpshtphhpphpphs..............P.ahpss..t..hh.halLlaDs....stsshppspplhpph+p.....phGh.psplLpl...............................tsppsh.s.t.........................s.assh.cph.t.t.t........................................................................................................t.tshhlshs-h.psl+shlp-hlspsllPa.........................ME+clphhs-plss.RKGl..ss+hhphs...+..+a...as........susssssssssts.............................................................................t.hYshsSsEttlR+LADhuFhLpDa.chAhosYchl+p..DapsD+AWpah.....AuspEMsu....luhhhss.p.s..............................tpp.hp.hl-s........Ah.sYhp........................................................................hp.....huhR.shllssEl....L...................pshsth.sust.hhchs......sc......pht........................pAllhEpsuhsa................................................................hps.ts.hh..thR.....KtuhahlLAu...................ccatpssphppAhpshppAhtlYs..........................................ttsWsthp-a............ltt .....................................................................................................................tshpth.ph.tp.p.....................P.aht.s...ph..lc.halllHDt.....ttss.pp..........spthhcph+p.....phuh.pshlLpl............................t...sp.....pss.p.............................s.h.ph...pt......t...................................................................................................................................................................................................t.hssh.ls.pDh.sslc.shlp-hshptllPa.........................hE+plphhs-p.lss..........++ul...ssphh.hs.....+..+a...as...................ssptssts.ss.t......................................................................................................................................................................t.hYt.posE.hthR+luDhsFhlpca.clAhstYchh+p..Da..pDpAhhah.....Au..sh.EMsu....lohhhtsts.................................tp...t.hh-p........Ah.sYhs.........................................................................t......huhR.ssllhsEl..............l........................+spst..h..puss...hh+ht.......pp........tlt....................................................................suLlhEpsuhsa...............................................................................................................................................................................................hp..h...h...hR.....KhuhahlLAu...................ppatptsphppuhpshppAhtlap............................................................................................................................................................... 0 83 136 219 +12575 PF12740 Chlorophyllase2 Chlorophyllase enzyme Coggill P pcc manual Family This family consists of several chlorophyllase and chlorophyllase-2 (EC:3.1.1.14) enzymes. Chlorophyllase (Chlase) is the first enzyme involved in chlorophyll (Chl) degradation and catalyses the hydrolysis of an ester bond to yield chlorophyllide and phytol [1]. The family includes both plant and Amphioxus members. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.87 0.70 -5.50 21 87 2012-10-03 11:45:05 2010-02-01 17:54:43 2 7 51 0 43 2038 336 210.10 26 60.91 CHANGED sPPKsLllshPsptGs..YPVllFhHGah.lhNsh.YoplhpHluSHGaIVlAPQ.................hYs........lhs...ssspcElpssAplhsWLsp...GLpshLP.....ss...VcPshp+luluGHSRGGKsAFAlALGh.ss...................plpaSAllGlDPVs.....Ghspstps.P.lLTa.pPpSFchs.hPshVIGTGLGs.t+.s.hhPsCAPsGVsHp-FasEC..psPshHFVApDYGHhDMLDDs.......shhu.thshshCKsGt...s+pPMRRFlGGlhVAFLpshlpGc.tscLpt .......................................................................................t..............p.....p.....t..tt....a....PVllFh.p......G......hh......h........s....s.....h.....Y.....s.....p....l.....h.....p..H.......l..A...S.....a.G.a.l.V.l..usp..............................................................ht........................hts.........s.s...p.....p...l..p......h..h...t....l......h....s..W.....h.sp...........sL...p...s.......h.s...........................t....s...p..s......D........h......s..+.....l..ul..u.GHSpGGc...s.u.h.s.hs.htt....................................................................hp..s.l..h.hl-....P...s.s....................t............h....................h.sh....p.......h..t...s......h.Ps.hl..h..GoG..hu..............C....sstuhsat....pFapth..tssthth........h..tphG.HhDh..lcss.........................hC.ts..........hh....s..huh..............s.................................................................................................................................................................... 0 17 35 39 +12576 PF12741 SusD-like Susd and RagB outer membrane lipoprotein Coggill P pcc JCSG_target-390164 Family This is a family of SusD-like proteins, one member of which, BT1043 (Swiss:Q8A8X4), is an outer membrane lipoprotein involved in host glycan metabolism. The structures of this and SusD-homologues in the family are dominated by tetratrico peptide repeats that may facilitate association with outer membrane beta-barrel transporters required for glycan uptake. The structure of BT1043 complexed with N-acetyllactosamine reveals that recognition is mediated via hydrogen bonding interactions with the reducing end of beta-N-acetylglucosamine, suggesting a role in binding glycans liberated from the mucin polypeptide. Mammalian distal gut bacteria have an expanded capacity to utilize glycans. In the absence of dietary sources, some species rely on host-derived mucosal glycans. The ability of Bacteroides thetaiotaomicron, a prominent human gut symbiont, to forage host glycans contributes to both its ability to persist within an individual host and its ability to be transmitted naturally to new hosts at birth. 20.50 20.50 21.00 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 529 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.94 0.70 -5.92 5 684 2012-10-11 20:01:02 2010-02-01 18:05:50 2 5 145 12 136 1575 477 436.30 24 89.00 CHANGED MKhpNIKshhph...LSlusLlLGusuCTuNF-.DINoNPhtlTc-DhchDshslGuhhssLpuuVl....s.pssocsNsYQlppsLsuDsauGYau.psTsFsuosNhoNYshsssWss..hlachlaosVYsshpclcphS..-socsPshYALApIlKVAAMHRsTDhYGPIPYSKl....Gp.sohsIPYDSQE-VYsuFFKELDEulplLs-plssupsu.....hpuhDhV.YpG.....cVpKWlKFANSL+LRLAIRIVpVcPuLAKEhAEKAVs.ppuGVI-sNsDNAch...sssshoNPLthIussWs......DTRMuADlhSYMsGYQDPRtAsYF-c.........s.thsssYKGlRsGIsl.ppK-sapsYS.......+PslosocPlhWMNAAEVsFLRAEG.ALRGW.NMGGTAp-LYEpGI+LSF-QaGl.sSuAsoYlADsTssPAsYTDPssscssAsAlSuITV+W-EGAopEEK.LERIITQKWIAhFPpGQEAWSEYRRTGYPKLlPVlsN..NSGGsIsos...sGlRRLPYPpoEYsuNupsVpcAV.uhLGGPDNGAT+VWWDKK ................................................................................................................................................hh.........................h......h...h................uC...t.....t..t...ap..phNp...s.........t.......s.............................t............t...........h.t..h....t...hh...................................h..Q.....h.......s.h...s.s....h...u...s..a..ht...........t.............s..t..p......s....s..a......s..............p....s.....a..t........h.a..p......t.......h......h..t....t...h..h...s.....t..h...p..l.......t.......................t......t....t......s.............h.....h......u...h.s..p......lh...cs...hhh...tp...h.sDhaG..s.lP..Y..s.ph............ut.....t..........t.....h....t....s....................Y.....D..s.pcp.....l..Y....p........t....hhp-.....L.sp...A.hsh...lp...t.......p.t..s.....t...............h.t...ph..D..h.l...a.t...G..............shppWh+aANSL+LRhAh....R...ls..........s....s.....s......t.....h....A.....p......p...........sp.....p..A...l.....p.............h......G...l.....h....p..s.s.s.........-........s...s.hh........................s....h....p.....N.....s.....h..........h....h..............t....as...............................-.....t....h....s....u...s...h.........s......h....h........t.....u......h....p............D........P.R...h.......h.h.....at.............................................................t.t....t....a.........G......h..................G......................................t....t.......t...........h...t.......t......h.S.............................t.........h....t......t.....s.....s...........h....h....h....h.psu..EshFLpAEu...ul..p....sa...sh.s...s...s.......A...p..s...h.Y...cp...Gl.........p........h.........S.......h.......p......p................h...........s...........h.........t..t....h.t.t...Y.h...t.............p.....t.....................t............................t.a........s..s.......................t.................................................s..t...h....s...............t......a......pt........s....s......s...............p...p..p..L-pIlT.QK..al..A......a.P.....u....E.u...Ws-h...R...R...T...G...Y.......P......p....h........h........s....s...h...s............s....s....s........l.sst.......................p......R.h.as.......s..t..p.t..s....t..t.h.t..t.uh....t..h.....L........s.........G...............D......p.h..sTclWWsh.......................................................................................................................... 0 64 120 136 +12577 PF12742 Gryzun-like Gryzun, putative Golgi trafficking Coggill P pcc manual Domain Members of this family are involved in Golgi trafficking. 21.30 21.30 21.80 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.59 0.72 -4.16 7 7 2012-10-04 00:47:01 2010-02-02 09:42:44 2 2 5 0 6 116 1 58.60 26 9.76 CHANGED th.pcs-lh....htV-...ScsFhhpG.pplphplhsGpcpch.asFhPLhsGh.hLPplsI .............t....tscLl....lpV-p..sctFhlsGhs.phphpl..supphpl.apFlsLpsGhhhLPpIpl.......... 1 4 5 5 +12578 PF12743 ESR1_C Oestrogen-type nuclear receptor final C-terminal Coggill P pcc Willis S Domain This is the very C-terminal region of a subfamily of nuclear receptors that includes oestrogen receptors and other subfamily 3 group A members. The actual function of this region is not known, but the domain is absent from all the other types of nuclear receptors. Oestrogen receptors modulate AP-1-dependent transcription [1] through two distinct mechanisms: via protein-protein interactions on DNA; and via non-genomic actions. The mechanism used depends on the cellular localisation of the receptor. In addition to the more extensively studied cross-talk on DNA, additional non-genomic actions might be very important in target tissues in which membrane-associated ERs are found. These non-genomic actions probably contribute to the overall physiological responses mediated by ligand-bound ERs [2] and might possibly be mediated via this C-terminal domain. 22.00 22.00 22.60 23.70 19.20 21.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.09 0.72 -4.42 17 117 2010-02-02 09:45:35 2010-02-02 09:45:35 2 7 69 0 25 113 0 42.60 53 8.76 CHANGED Pss+sus.hEEpspSQL.TouSTSuHSLQsYYls.pEtEsh.sTl ...PsuRuuushEEssQSpLuTsuSTSSHSLQsYYIs.tEsEuhPsTl..... 0 1 1 7 +12579 PF12744 ATG19_autophagy Autophagy protein Atg19, Atg8-binding Wood V, Coggill P pcc Wood V Domain Autophagy is generally known as a process involved in the degradation of bulk cytoplasmic components that are non-specifically sequestered into an autophagosome, where they are sequestered into double-membrane vesicles and delivered to the degradative organelle, the lysosome/vacuole, for breakdown and eventual recycling of the resulting macromolecules. In contrast to autophagy, however, the Cvt pathway is a highly selective process that involves the sequestration of at least two specific cargos that are resident vacuolar hydrolases, aminopeptidase I (Ape1) and alpha-mannosidase (Ams1). These proteins are sequestered within a double-membrane vesicle, termed a Cvt vesicle. The Cvt vesicle is fairly consistent in size, and is much smaller than the autophagosome, being 140-160 nm in diameter. The prApe1 is sequestered within either Cvt vesicles or autophagosomes, depending on the nutrient conditions, and delivered to the vacuole. Autophagy and the Cvt pathway are topologically and mechanistically similar and share most of the same machinery. The Ape1 complex is ultimately enwrapped within either Cvt vesicles or autophagosomes at the perivacuolar PAS. The receptor protein Atg19 binds to the Ape1 complex through the prApe1 propeptide to form the Cvt complex in the cytosol. In the absence of Atg19, prApe1 can form an Ape1 complex, but does not localise at the PAS. Atg19 is a peripheral membrane protein with differing binding sites for both Ape1 and Ams1. The Atg8-binding region in the yeast proteins is this very C-terminal residues [3]. 25.00 25.00 34.90 32.90 21.30 21.20 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.57 0.70 -5.11 10 35 2010-02-02 09:48:03 2010-02-02 09:48:03 2 2 24 6 16 35 0 228.10 40 50.46 CHANGED tsIplPED+PELlsFFo.....clcTscQLp-VaptY+sYE+LhQchDupch+h..h....T.huoscuhKphsIphE........ssPN-cLLplphup+DNSLaFpLaNpTNplluGNCpLcF.....psooQIp..h.IcMGPHEIGIKpaKEhha....FPpsho.hussTh-lVNQDGElIhlGKhusSs.IsL+sP.uphSstShQsu.....Q-Ps.sFcsDoLsp.D-SSIlSTshshphD.....Gss.p+shTWEEl ......................................................s.sIplPED+sELlsFFo.....clcTspQLp-VaptY+sYE+...L.QchDuc.............T..huuscuhKptpIp.E.........sPN-tsLplshsp+DNSLaFpLaNpTNpllsGNCpLcFp......spsooQI......IcMGPHEIGIKphKEhpa..............FP.thsh.us.Tlcl.N...Q.G-VIalGKh..u...s..S.s..IsL.+....sP.uphS....sp.ShQsu...........Q-..Ph.sFphD.sLsp...D-S.SIlSTohohphD.......Gss.p+sh.TWEEl............ 0 2 7 12 +12580 PF12745 HGTP_anticodon2 Anticodon binding domain of tRNAs Wood V, Coggill P pcc Pfam-B_20896 (release 24.0) Domain This is an HGTP_anticodon binding domain, found largely on Gcn2 proteins which bind tRNA to down regulate translation in certain stress situations. 21.00 21.00 21.10 21.10 20.80 20.80 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.66 0.70 -5.25 30 206 2012-10-02 17:25:11 2010-02-02 10:11:00 2 13 179 0 150 351 11 226.00 26 14.78 CHANGED W+spRCDVLVuSFssslLcosGlcllppLWupsISADl.hcss.ShE-llsphppDGhsWIVllKQ.................................t.csp.+sLKVKslsppc..DsDlc.h-EllsaLcsEl.....pt.+p..pcptssspsp.hppsspptsshss..........................pppslhl.sstsRuK.KsN++spW.Ehc.tAptuoppllcshh.....suPIhAlDs.RDElL-hIphToLu.pt-pW.R+Vhts.ssss.RpYsspIastLt..+pts+up+...........hAhlYshRTGcsslhDLp ......................................p+CDVLV.sShss...ss.L.o.pulpllppLWstsIoA-lhh-..s...s.......S....E-lhptsp..ccs.hsallll+p....................................sut....lK.VKslppcc.....-h-..l..p...ts..-Llsalppcl...........cp.+......pctt..t...t..t.....t..s...s..s...........................t.....l.l.sstp.h..ut.p.p.+R..h....p....h..t.tplhpp.......p.h...lhsl-h.p-plh.phl..p.....p....hs.p.pta....pplht.......hs.h.+.p.ahttlhp.l.......p.tt...c....t.............hhlas.h+ss............................................................................................................................................... 1 41 80 119 +12581 PF12746 GNAT_acetyltran GNAT acetyltransferase Coggill P pcc JCSG_pdb_2jlm Family Many of the members are annotated s being Zwittermicin A resistance proteins, whereas others are listed as being GNAT acetyltransferases. The family has similarities to the GNAT acetyltransferase family. 20.60 20.60 20.60 20.60 20.50 20.50 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.64 0.70 -5.57 2 486 2012-10-02 22:59:21 2010-02-02 13:52:51 2 2 325 2 48 433 12 231.20 31 95.29 CHANGED Mh.ph.....cphsphFtDaspsllhohLpGhMGshaVsD.p.PpsA.hhhG...hFsFhAGps+....c-LL+.h.s+.hllV.ps.pWpchlEshYtctIcpFhRYthK+ssE.FDhu+LQpLlssLPcua-h+RIDcNlhpsshlcchS+Dhsupa.sVEpalshGlGYsILaKGpVVsGASSYu.YssGIEIEVsTccsYRthGLAphsuAtLILsCL-+GlYPsWDAtshTShKLAEKLGY.hDKsYpsY...........p.p .........................................................................................................................................................................................hlh....hl...p.u...h.Gp..lah....ss.......p..s..ssh..h.....h.G.......aha..h..uG....p.....s...........p...............p....c.h...h.......p....t.........h.......t.....p.......t.....h....h.............l.............l.....s....ps.......p...p.W..pphl......c.p..h.....h.p.p......t.l...pp...............hs..Rht...a...p.p..psp........F.p...p.t...hpp...h....s.p.l..p.sYp..........lp.ID.p.ch.hpp.........t.....p..c.....a....o...p....-...h........hs.p.a..p.S.h.c.s.F.....l.p..........t.G.hG....as...I.l..h..s..s..p..l.l...u.s.ssS.hh.s..a.p.st..h..EI-.ls..T.c.sa.pscGLA.ptluuth...I....h....-CL....p.....p...s...lh...P...t...W...D..s..c..N..h..s..St+lAcpLGaphstsYpsa..............h.................... 2 21 36 42 +12582 PF12747 DdrB DdrB-like protein Bateman A agb Bateman A Family This family includes the Deinococcus DdrB protein which is a ssDNA binding protein. This family also includes some possibly distantly related cyanobacterial proteins. However, these are not strongly supported. The structure of DdrB is known. 25.00 25.00 33.70 33.50 21.40 20.00 hmmbuild --amino -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.64 0.71 -4.41 6 17 2010-02-03 15:48:23 2010-02-03 15:48:23 2 1 11 5 15 19 0 133.50 37 80.75 CHANGED M............pl.ahss.hsKsThsl..tshLhDVpcthsR.s..................hosuE..sssYQaPL-p.hsFDWshIGAR.chossEGtplV.aRG+uacRRc.pssD....KhsAAIhFSpusKs.........sDG-hc.YlpLhhFRstp .....................................pl...ss.hscsohpl...s.Lh-Vpphhup.s..................hoss-.sssGaphPLspttsFDWshIGAR.hhossEGtphV.a+G+sa+RRcLpssD....cLPAAlKaSRGAKs.-.....chsDGchc..YVoLhhFRst........ 0 1 9 15 +12584 PF12749 Metallothio_Euk Eukaryotic metallothionein Wood V, Coggill P pcc Wood V Family This is a family of eukaryotic metallothioneins. 21.80 21.80 22.50 21.80 21.20 21.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.49 0.72 -11.35 0.72 -3.94 4 26 2012-10-05 18:33:37 2010-02-03 17:50:25 2 2 15 0 5 61 0 67.00 50 92.47 CHANGED Mtt.sp...TpsChCsss.....CpCG..csCsCp.....stCGCssCKs......................uC+CSusssssCKCT...SCpCpp....p ......Mss.spsh.TssChCs.ssps..t.hC+CG..cACpCt.....ssCuCs.sCKs....uC+Css.s.ps.ssC..cCoustsC.Ctp.t....... 0 4 4 4 +12585 PF12750 Maff2 Maff2 family Bateman A agb Jackhmmer:Q187G1_CLOD6 Family This family of short membrane proteins are related to the protein Maff2. Maff2 lies just outside the direct repeats of a tetracycline resistance transposable element. This protein may contain transmembrane helices. 27.00 27.00 29.30 29.30 26.70 26.20 hmmbuild --amino -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.41 0.72 -4.45 7 359 2010-02-04 13:16:13 2010-02-04 13:16:13 2 1 171 0 29 229 33 58.10 69 94.26 CHANGED MtFFsSAlssLpTLVlALGAGLuVWGVINLLEGYGsDNPGAKSQGlKQLMAGGGlhLIGhTLlPLLSuLF ......MtFFspAVsVLpTLVhAlGAGLGlWGVINLhEGYGNDNPGAKSQGhKQLMAGuGlhllGh.LlP.Lushh..... 0 23 29 29 +12586 PF12751 Vac7 Vacuolar segregation subunit 7 Wood V, Coggill P pcc Pfam-B_10847 (release 24.0) Family Vac7 is localised at the vacuole membrane, a location which is consistent with its involvement in vacuole morphology and inheritance [1]. Vac7 has been shown to function as an upstream regulator of the Fab1 lipid kinase pathway [2]. The Fab1 lipid p[pathway is important for correct regulation of membrane trafficking events. 25.00 25.00 32.90 26.00 22.70 22.40 hmmbuild --amino -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.56 0.70 -12.54 0.70 -5.61 7 172 2010-02-04 17:05:01 2010-02-04 17:05:01 2 3 110 0 128 174 0 228.40 26 39.02 CHANGED IVETETVSSIPQVuLGsGsGERGsuuRsD.uGolRhKsSsETIRP+KEKKKo.RKPsA.LssGssSSKADIFEAKVASAVDEADsSDS-ETFVYESNPPDsaPsRp.RYHSRTPSATSMASQsDQhuGRoRhuhRDs.HulTGKRSMKFTNs.sY..uol-GDh.spcsu+upuRss.Gp.sHTsRHHHIGR.HGR.us...aPSLFDs-SPFspSQ.p.+SsRHalusuhRQupp....R.sssNYRoh.susKKsG-.YsYDFD.uEGADDERTPLVGSs.RssRSRpG.RRPNSASLRQMEYMppRpRShFuRYGhChllhlLlllllGGAToFllulh+PLlDVpVhtIQNVLASEQEIMlDLpVpAlNPNLhsloIsDMDVNIFAKSRYVGoDph.WR ......................................................................................................................................................................................................................................................................oKsDhF.tA+lAsA..Vs-sp.SDStE.....T....FlY-sss..p..............................................................................................................................................................................................................................................................................................................................................................................................................................................................h........hh..h.h.hshhhu..p.L.thtl..hpp.lsSpt.lhhsh.htAhNsshhslsltthphplFA+S.a.............................................................................................................................................................. 0 24 64 111 +12587 PF12752 SUZ SUZ domain Aravind L agb Aravind L Domain The SUZ domain is a conserved RNA-binding domain found in eukaryotes and enriched in positively charged amino acids. It was first characterized in the C.elegans protein Szy-20 where it has been shown to bind RNA and allow their localization to the centrosome. Warning- the domain has a compositionally biased character. 27.00 27.00 27.30 27.30 26.80 26.80 hmmbuild --amino -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.07 0.72 -3.44 57 527 2010-02-10 14:08:56 2010-02-10 14:08:56 2 8 208 0 305 471 0 58.00 34 9.30 CHANGED ptsphpIh+R....................Pstsssps...............tssspsttsptscolEEREtcYpcARpRIFusss .................................phhIL+R.................................sttsssss.........................tp..pss.pss..ptsKSlEEREpEYpcARcRIFupt.s............. 0 78 133 211 +12588 PF12753 Nro1 Nuclear pore complex subunit Nro1 Wood V, Coggill P pcc Pfam-B_4826 (release 24.0) Family In fission yeast, this protein is a positive regulator of the stability of Sre1N, the sterol regulatory element-binding protein which is an ER membrane-bound transcription factor that controls adaptation to low oxygen-growth [1]. In addition, the fission yeast Nro1 is a direct inhibitor of a protein that inhibits SreN1 degradation, Ofd1 (an oxoglutamate deoxygenase). The outcome of this reactivity is that Ofd1 acts as an oxygen sensor that regulates the binding of Nro1 to Ofd1 to control the stability of Sre1N [2]. Solution of the structure of Nro1 reveals it to be made up of a number of TPR coils [3]. TPR proteins are composed of three to 16 tandem peptide repeat motifs of 34 amino acids with degenerate sequence. The helical pairs adopt a helix-turn-helix anti-parallel arrangement with interacting helices. In general, TPR motifs are stacked together so that helix A from TPRn is packed between helix B from TPRn and helix A from TPRn+1. In Nro1, the 12 alpha helices forming the six TPR motifs are organised as follows from N terminus to C terminus - TPR1A, TPR1B, TPR2A, TPR2B, TPR3A, TPR3B, TPR4A, TPR4B, TPR5A, TPR5B, TPR6A, and TPR6B with the C-terminal helix (hC) running above the sixth TPR motif with an angle of approx 45 degrees with TPR6A and TPR6B. The corresponding TPRs structural motifs are longer (50 residues) than are canonical ones (34 amino acids) and are organised into two subdomains - Nro1-N (residues 55-225) and Nro1-C (residues 226-393). The Nro1/Etti protein plays a role in nuclear import suggesting that it is residues 4-19 that are interacting with Ofd1 [3]. 20.80 20.80 21.00 20.80 20.50 20.20 hmmbuild --amino -o /dev/null HMM SEED 404 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.31 0.70 -5.52 18 53 2012-10-11 20:01:03 2010-02-10 15:57:53 2 2 46 5 37 55 0 357.50 36 97.70 CHANGED M.A..KRsLGLGKts+tKKpKh.............psspsscpsss....ssplpVELsEthDs-DEluQL+uLWcsYhcS-+DsEl.....llNGIlHECDRLLRpp...............pp-cth........pLsc.FHuIYALALuELshF+st-pp.............plppaF.DsALERs-lG.hpphscS.hLhlspuKIllp+IPLpYISpLss-Spsp....pl.p.L-pAhpsaphsppphp............sh-lLphlDDLLDIl-NFG+c.pp.pEs.DsD-......t-t.-..plcLscpHPLYtlpps.t.N.pWhR-phlthLsslpc...........................spLhRpls++LGQsYLpcAE.PosVFsoLpYD-..t.ps.cp.hph..tcpAQchAppLhccAlcYLccAcs........---P-TWVslAEAhIsLGNLa-s-ScEQEchYpcAEcILpKANpATpGKYcDlL-NLL ........................................................MA..KRsLGLGKts+tKKpKhpp...........ppppsstpsss......pspholEL..s--sDh-DEluQL.......cGLappYhpS-+DsEh.................lLNGIlHECDRLLR.p...............-ppp...........pLss.hFauIYAlALuELshF+st-pc.............plppaF.-sAlERl-hG.Lpphscs..LhlshuKIlhp+IsLpa.ISpLplcScsp.....phcl..p.L-puhctaphh.cpcsp..........................sh-lLp...hlsDLLDIl-NFG+c.pp.p-s..Dp-s......................---.c...plcLp.pHP..la.lpps.phs.pWhRpph.phlcshpp...............................plhtplspplGp.YLpcAE.PsplahsLpY-c..t.tt.pp......pshptppuQchA.chhppAlcYLcpAp.........c--P-TWVplAEAhIsLGNLh-.-StEQEchYppAEcILt+ANpuopGKap-lL-Nl.............................. 0 12 24 36 +12589 PF12754 Blt1 Cell-cycle control medial ring component Wood V, Coggill P pcc Wood V Family During size-dependent cell cycle transitions controlled by the ubiquitous cyclin-dependent kinase Cdk1, Blt1 has been shown to co-localise with Cdr2 in the medial interphase nodes, as well as with Mid1 which was previously shown to localise to similar interphase structures. Physical interactions between Blt1-Mid1, Blt1-Cdr2 and Cdr2-Mid1 were detected, indicating that medial cortical nodes are formed by the ordered, Cdr2-dependent assembly of multiple interacting proteins during interphase. Q5KIH8.1/10-346; Q5KIH8.1/10-90; 27.00 27.00 37.10 29.50 21.10 26.60 hmmbuild --amino -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.04 0.70 -4.83 29 75 2012-10-03 10:59:06 2010-02-10 16:13:33 2 2 70 2 62 73 0 210.10 38 91.22 CHANGED Fs.....................................KSFLuuLDS..........................RPlKLs....................................................uDaVhDPcshshpsP.................YTLPRLps.PHP......................................MPKKh...............................KpstsPGSSKSIolp....................LKSARNPsLcloLsNs.sluo.............................TSVp-LK-sVppRl.ssp...........ssplPlDKIK.....ILaK+KPV...sucTlu-lLus..-sthluGGcElEh.GVMIhGGA.pls.s...........................................tttttttp.pp.hss.....ss.u......so...................uppVltTEtFW-D...............................................................................................................................................LpuFlpp+lK.DttpApplpsl..F+tAWpus .....................Fs.KoFLusLDS..........................+PlKLs....................................................uDaV.DPcsasspsP.................ahLPRhss..+........................................................MsKth...............................ppshsPGo..s...+S...IoVp....................LKSs.RNP.sL.clsLssh.slso.............................TSl.-lKpsVpp.................psplPlDKlK.....lLap+KPV...suKolt-lL........us.......su.spplEF.uVMlhGGAssh..ss....................................................t...t.........................................s.tllts-tFWtD...............................................................................................................................................Lpsah..+l+.s...utph..h..F+tuW.t.t................................................... 0 10 28 48 +12590 PF12755 Vac14_Fab1_bd Vacuolar 14 Fab1-binding region Wood V, Coggill P pcc manual Domain Vac14 is a scaffold for the Fab1 kinase complex, a complex that allows for the dynamic interconversion of PI3P and PI(3,5)P2p (phosphoinositide phosphate (PIP) lipids, that are generated transiently on the cytoplasmic face of selected intracellular membranes). This interconversion is regulated by at least five proteins in yeast: the lipid kinase Fab1p, lipid phosphatase Fig4p, the Fab1p activator Vac7p, the Fab1p inhibitor Atg18p, and Vac14p, a protein required for the activity of both Fab1p and Fig4p. This domain appears to be the one responsible for binding to Fab1. The full length Vac14 in yeasts is likely to be a protein carrying a succession of HEAT repeats, most of which have now degenerated. This regulatory system is crucial for the proper functioning of the mammalian nervous system. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.18 0.72 -3.60 35 386 2012-10-11 20:01:03 2010-02-10 16:22:32 2 28 266 0 281 523 5 95.30 42 10.54 CHANGED NtRpGGLIGLAAsuIALGpc......sspalcpIl.PVLsCFsDpDsRVRYYACE......SLYNIuKVu+uplLh..aFN-lFDsLs+.....LsADsD.sV+sGAcLLDRLlKD .....................tRpGGLlGLAAsuluLupc..........sstYLccll.PVL.sCFsDpDsRV.RYYACE......uLYNI.s.KV.u...+.G..c....l.Ls....aFNclF.DuLsK.....................Ls.uDs-.s..V+suA.EL.LDRLlKD................................... 0 99 154 226 +12591 PF12756 zf-C2H2_2 C2H2 type zinc-finger (2 copies) Wood V, Coggill P pcc Pfam-B_88 (release 24.0) Family This family contains two copies of a C2H2-like zinc finger domain. 21.70 10.00 21.70 10.00 21.60 9.90 hmmbuild --amino -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.23 0.72 -11.09 0.72 -3.90 112 2196 2012-10-03 11:22:52 2010-02-10 17:43:06 2 245 346 2 1419 5276 41 77.00 18 19.96 CHANGED p..CLFCspp.....ss.hc.......pslpHM......hppHuhalP...-pcaLs..DhpGLlpYLtcKlsh......tphClhCppp.tp...ohpul+pHM..psK.sHs+lsh..csptp....h-hscFYDa.psshs ...........................................................................t...................................................................................................................................................................h.t.........................................hpChh......Cspp...ap....................ohp..s..lptHM....p.p..p...t...Hh.t.........................................t..................................................................... 2 388 637 1011 +12592 PF12757 DUF3812 Protein of unknown function (DUF3812) Wood V, Coggill P pcc Pfam-B_8029 (release 24.0) Family This is a family of fungal proteins whose function is not known. 25.00 25.00 31.90 31.90 20.00 19.60 hmmbuild --amino -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.35 0.71 -4.15 39 146 2010-02-10 17:51:15 2010-02-10 17:51:15 2 1 115 0 106 150 0 123.70 26 14.39 CHANGED spssLhpsApcNscsplpsh-pcshtp..............shphppchpcpAlttAppp.....tptppppssplslG......GGhals.s-l-slApphlpPsLc-Isc+A.......-tpRsp-h-t+hcpcctccptppt+pcEcct+p ..................................pttlhttApcssptpLpsl-pcshtp....................th.p-apppAhthAppp......................tptpppspsplslG......GGhahs.p-lsslApp+lpPlLs-Is-+A.......cppRtc.......-tph+hcpcctccptppt+pc-cphc................... 1 17 53 90 +12593 PF12758 DUF3813 Protein of unknown function (DUF3813) Mistry J jm14 PfamB_1273 (release 24.0) Family This is an uncharacterised family of Bacillus proteins. 25.00 25.00 45.30 26.00 24.10 24.70 hmmbuild --amino -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -9.17 0.72 -3.83 4 128 2010-02-11 10:51:27 2010-02-11 10:51:27 2 1 128 0 17 59 0 62.90 68 92.96 CHANGED MtNpLaQpA+shVpphlSpuuus...EQQpsl.+AKNAlpSAYANSosAE+pQL+phQ-QLppls ...MGNLLFQQARDAVtsAVSCS.S.Gs.......EQQ-L....VYRAKNALpSAYANSSTAEKVQLREMQEQLQsIp... 0 1 9 11 +12594 PF12759 HTH_Tnp_IS1 InsA_C; InsA C-terminal domain Bateman A agb Bateman A Domain This short domain is found at the C-terminus of the InsA protein. This domain contains a helix-turn-helix domain. 21.70 21.70 21.70 22.00 21.60 21.60 hmmbuild --amino -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.20 0.72 -4.53 4 1328 2012-10-04 14:01:12 2010-02-11 16:39:48 2 7 456 0 75 676 9 42.40 62 42.85 CHANGED YoYcAppPGhKEpIl-MAhNGAGsRhTARsL+IGINTVlRTLKNSR ..........aTYsAsQP...Gs+p.KIIDMAM.NGlG........sRsTARl.ht.VGlNTll.RpLKNSt............ 1 16 31 61 +12595 PF12760 Zn_Tnp_IS1595 Zn_ribbon_3; Transposase zinc-ribbon domain Bateman A agb Pfam-B_3 (Release 24.0) Domain This zinc binding domain is found in a range of transposase proteins such as ISSPO8, ISSOD11, ISRSSP2 etc. It is likely a zinc-binding beta ribbon domain that could bind the DNA. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild --amino -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.85 0.72 -4.15 73 764 2012-10-03 10:42:43 2010-02-11 17:07:24 2 7 279 0 235 786 170 47.70 34 15.88 CHANGED ssEcpstphLpphRWssG.....hs.CP+CGsp...chaplps.........tthapCpp..Cp+p ...............s-ppChphLpphRWPpG.......hs.C.P+Cusp...tthphpp...............pph..a.pCps......Ct+p.............. 0 51 128 153 +12596 PF12761 End3 E3; Actin cytoskeleton-regulatory complex protein END3 Wood V, Coggill P pcc Pfam-B_51079 (release 24.0) Family Endocytosis is accomplished through the sequential recruitment at endocytic sites of proteins that drive cargo sorting, membrane invagination and vesicle release [1]. End3p is part of the coat module protein complex Pan1, along with Pan1p, Sla1p, and Sla2p [2]. The proteins in this complex are regulated by phosphorylation events. End3p also regulates the cortical actin cytoskeleton [3,4]. The subunits of the Pan1 complex are homologous to mammalian intersectin. 25.00 25.00 25.80 25.00 23.80 23.30 hmmbuild --amino -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.44 0.71 -4.66 21 129 2010-02-11 19:37:22 2010-02-11 19:37:22 2 5 114 0 90 124 0 172.20 38 48.58 CHANGED sKDtsLsFLHILNQRcc.GhRIPRsVPASLRAoFpKpp.sYsLs......pspsph.tssssssTss.pKtpFu-sYLs+l...Ghusp..........shp...ppGTDFSu.spsoDWEEVRL+RELA-LEshLscspptscsp.........spsspsp.uLlKcEhEQLLcYKccpLpphp...spussutsLpsl+cDlchlcpQVssLcpaLps+pppLpcLcp ............................sKDssLsFLHILNpRc-.GhRIPRslPASLRuoFppsplsYplcs.........tstpph.tsptsspTsou..pKtpFu-tYLs+lGhus.............pstGTDFSs...psp-WEcVRL++pLt-L-pclptspttsppp..........ttpstsp.sLlKc...Eh.QLLcYKccpLpchpp..ucsptutsLcplp-DlpslppQV-sLcsaL...tp+pp.LppLp.t.................................. 0 16 44 76 +12597 PF12762 DDE_Tnp_IS1595 Transposase_38; ISXO2-like transposase domain Bateman A agb Pfam-B_3 (Release 24.0) Domain This domain probably functions as an integrase that is found in a wide variety of transposases, including ISXO2. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.74 0.71 -4.38 78 1888 2012-10-03 01:22:09 2010-02-12 10:55:34 2 15 595 0 571 1721 293 125.10 25 51.91 CHANGED hLsG..........VE.........lD-sYl..GGcp....p......sp......cG+.......s........t..ssKssVlshl-ps...................t.sh+shstll.ts.hstpsltshl....pctlptsuplhTDphssYsslstt..asHpplsastp......hs.......tt.....sshshlcshhuplKRtlhGsaHtlu.scaLppYlsEhsaRaN .........................................................................tG......lp........hD-s.hh.....utpp............................tp.......................tG+.......s..............................................ttKh..lhs..hlcps.......................................pshh.hl......ss..hpptsl...h..l...............tpp.lp.s....s..uh......l.h..o.D..s..htu..Y.p.....t.L........p............t...t........a..t.......H.......h........t...l.spu...tp.............s.......................pt........phphlps.hhs.p.hKphl..t....t.aps.ht.tchh........aLtch.a+................................. 0 221 343 458 +12598 PF12763 EF-hand_4 efhand_3; Cytoskeletal-regulatory complex EF hand Wood V, Coggill P pcc Pfam-B_51079 (release 24.0) Family This is an efhand family from the N-terminal of actin cytoskeleton-regulatory complex END3 and similar proteins from fungi and closely related species. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.43 0.72 -4.29 13 2526 2012-10-02 16:17:27 2010-02-15 09:18:54 2 126 302 12 1472 3070 25 94.90 32 17.53 CHANGED MP...+LE-aEIKKYW-IFpGLpPtsNKLoGDpVuPVLKNS+LssDQLu+IW-LuDIDsDGpLDFEEFCIsMRLIFDlVNGshssVPscLPsWLVPuSKAHLIQANc ...................................hht.............p.ap.p.lF..p...s.h....s..s..h..s.....G..h..l.oG.s.p.A....+....s.h...h..h....p.....S....p....L..P..p..s.h...L...u..c.........IW....s.............L....u..Dhcp-G..tLstpEFs..l...AM.+.Ll...........h..h..p..t................................lPt.L.Ps..t.hh..............ttt............................... 0 408 686 1107 +12599 PF12764 Gly-rich_Ago1 Glycine-rich region of argonaut Coggill P pcc Pfam-B_7248 (release 24.0) Domain This domain is often found at the very N-terminal of argonaut-like proteins. 25.00 25.00 34.20 33.60 24.10 21.10 hmmbuild --amino -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.60 0.72 -3.39 12 50 2010-02-15 11:44:43 2010-02-15 11:44:43 2 2 30 0 16 50 0 101.70 50 14.75 CHANGED GGs.EYpsp...GRGsPP.Q..GGpsth.GGGpuGu.......Pss.....s.R.ssPELHQAT...pssYQA.hss.PhP........SEsusSstP.spsssh..tQQFQQLolpQtu.oSQAIQs.Pu ................GGP.EYQuR.....GRGG..Ps.Q..........GGtsta..GGGRGGu.............PSu...ssP.Rp..oVP.ELHQAT...pss.YQA.VsopPos........SEsusophP..scssss..tQQFpQLulcpt.u.sSQAI.QPhPs...... 0 1 8 13 +12600 PF12765 Cohesin_HEAT HEAT repeat associated with sister chromatid cohesion Wodd V, Coggill P pcc Pfam-B_443 (release 24.0) Family This HEAT repeat is found most frequently in sister chromatid cohesion proteins such as Nipped-B. HEAT repeats are found tandemly repeated in many proteins, and they appear to serve as flexible scaffolding on which other components can assemble. 21.00 8.90 21.00 8.90 20.90 8.80 hmmbuild --amino -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.61 0.72 -3.95 52 424 2012-10-11 20:01:03 2010-02-15 11:48:56 2 36 262 0 276 589 36 37.80 32 2.43 CHANGED +slstllptDsslLsps....plppslpp+htDsussVR-Asl-Ll ....................+sl..hlthDsplLst................ph.phlpp+h.tDsu.ssVR-AAlpLl.... 1 90 146 220 +12601 PF12766 Pyridox_oxase_2 Pyridoxamine 5'-phosphate oxidase Wood V, Coggill P pcc Pfam-B_2486 (release 24.0) Family Pyridoxamine 5'-phosphate oxidase catalyses the oxidation of pyridoxamine-5-P (PMP) and pyridoxine-5-P (PNP) to pyridoxal-5-P (PLP), the terminal step in the de novo biosynthesis of PLP in Escherichia coli and part of the salvage pathway of this coenzyme in both E. coli and mammalian cells. This region is the flavoprotein FMN-binding domain. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.53 0.72 -3.69 60 319 2012-10-02 11:35:36 2010-02-15 12:53:17 2 6 304 4 186 1478 2504 100.10 30 42.69 CHANGED huPW+shlppulcp......p..ssphhQLATls.s.ssp.....P+sRTlVFRGFhtp..............................ssshLphsTDtRocKlpplt...............p..ssts..EhsaaascotcQaRlpGpshlls ..................................h...Wt..htpulpp......t..s.phhpLATls....sup..................P.csRTlVaRuahtp...................................................................................................................sshLphpTDtRSpKltclt.......................p.sshsEhs.a..a...h..........s...........c...........stp...QaRlpGpstll.s........................................ 0 55 111 157 +12602 PF12767 SAGA-Tad1 Transcriptional regulator of RNA polII, SAGA, subunit Wood V, Coggill P pcc Pfam-B_319 (release 24.0) Family The yeast SAGA complex is a multifunctional coactivator that regulates transcription by RNA polymerase II [1,2]. It is formed of five major modular subunits and shows a high degree of structural conservation to human TFTC and STAGA [3]. The complex can also be conceived of as consisting of two histone-fold-containing core subunits, and this family is one of these. As a family it is likely to carry binding regions for interactions with a number of the other components of the complex. 27.80 27.80 28.10 27.80 27.70 27.70 hmmbuild --amino -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.61 0.70 -4.71 65 356 2010-02-15 13:09:22 2010-02-15 13:09:22 2 11 232 0 254 344 0 212.60 22 55.99 CHANGED hs.....tstRl-ltpl+pplhptlG.cphpcYhptLstFlh..............s+lo+pEh.sp.sptll.................spcp.....l+LHNpLlhuILsNu....htps....ss...ss...............................tptp.hstsssh..ssptspp.pphccphht.s.csptchpth...............................................t.t.t.....ssh.....tth.chs.hpstpphpph......................ppp.............................h.tt...shsscohpLPDspsLptRh.hhshcpGL........sulstssspllshuL-saLKsllpuslshsp ..........................................................st.......t.-lt.hhppltptlG....cphppYhttlphalh.............................t+ls+pEasp.stthL.......................................stcp...........l+LHNphlhuIlsss....tp........Ps...ts.............................................ttt.s....s....s..h..tst.ts.p....hptphh........ptph.ht..............................................................h.............t....ph.....hptspp..t.....................................................................................................................................................................t....h.s...cs.htLP-..ttplptRh..hs..hcpGL..........ssss.pssphlshul-salKpllpsshshht................................................. 0 71 137 208 +12603 PF12768 Rax2 Cortical protein marker for cell polarity Wood V, Coggill P pcc Pfam-B_2071 (release 24.0) Family Diploid yeast cells repeatedly polarize and bud from their poles, due probably to the presence of highly stable membrane markers, and Rax2 is one such marker. It is inherited immutably at the cell cortex for multiple generations, and has a half-life exceeding several generations. The persistent inheritance of cortical protein markers would provide a means of coupling a cell's history with the future development of a precise morphogenetic form [1]. Both Rax1 and Rax2 localise to the distal pole as well as to the division site and they interact both with each other and with Bud8p and Bud9p in the establishment and/or maintenance of the cortical markers for bipolar budding [2]. thus Rax2 is likely to control cell polarity during vegetative growth, and in fission yeast this is done by regulating the localisation of for3p [3]. 25.60 24.90 25.60 25.20 25.50 24.60 hmmbuild --amino -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.89 0.70 -5.19 34 157 2010-02-15 13:49:32 2010-02-15 13:49:32 2 7 131 0 124 169 15 251.50 25 23.26 CHANGED lhVGGsFppAGuLsCsulChashssspWspPuss......lpGsVsslpah..ss.spLlluG.sLTlss.sssslsoYshpspsapshtu.sp..t.ls.uslsuhshhtsDtsphhlsGp..upGssalhta...DGopWpphss.lhts..sTsIpulpllsL..s.ps+pp...sshhsssplLlloGplsl.sca..GpsSuALasGo..shhPalloo..................p...ssspsGslsplFhp..............................ssssaps.p....h....cphupG...hVVLIuhuhALGsshLlslhGlIhshhpp+ppthh.sspt.th-csp.hpplPP ..............................................................lhVGGsF..p.p.A...G..u.Ls.CsulCha.shssspWsp.Gss.......lpG..s.V..ssltas...ss..spLlluG.shs.....lss....s..sst......lApashpsp.s..Wsshsu.s...ttls.Gs.l...suhshstpsss...p..hhhuGp......s....s....u...ss..alhta...cGs.pWpsh...st..hhts...to.lpslphhsl..p...psctt...tshhspsp.hLhlsGpl.l.ssa..G...ssuslasGs..shhPahlos..................p...ssspsGt.hstlF.p....................................pp..hpt...............phhshG...hVVLluhslALGhh.....hllslhGllhshh.+c.ppt...h..t....hhpc...hptlsP................................................ 0 45 77 109 +12604 PF12769 DUF3814 Domain of unknown function (DUF3814) Mistry J jm14 Pfam-B_10 (release 24.0) Family This is a domain of unknown function. It is often found in combination with Pfam:PF05222, Pfam:PF01262 and Pfam:PF02233 on alanine dehydrogenase and pyridine nucleotide transhydrogenase enzymes. 25.00 25.00 30.30 30.30 22.90 22.90 hmmbuild --amino -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.07 0.72 -3.82 159 2249 2010-02-16 09:34:05 2010-02-16 09:34:05 2 12 2099 0 638 1524 1931 86.90 57 21.39 CHANGED lttlslFlLAshlGa.VlhpVsssLHTPLMSsTNAISGIllVGAllth..u.............................hsplLuhlAlhlAolNlsGGFhVTcRMLpMF++ ......h..phslFsLAshVGYaVVWsVo.ALHTPL....MSV.TNAISGIIlVGALLtlGpu...........................tshsphLuhlAVllAoINIF..GGFhVTpRMLcMF+K......... 0 187 373 522 +12605 PF12770 CHAT CHAT domain Bateman A, Rawlings ND agb Pfam-B_4 (Release 24.0) Domain These proteins appear to be related to peptidases in peptidase clan CD that includes the caspases. This domain has been termed the CHAT domain for Caspase HetF Associated with Tprs. This family has been identified as a sister group to the separins [1]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild --amino -o /dev/null --hand HMM SEED 287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.14 0.70 -5.28 82 1893 2012-10-03 02:24:44 2010-02-16 10:52:24 2 520 555 0 959 2220 184 278.30 18 28.37 CHANGED ptsuppLaphLlt.P.lttt..l.........tshp...pLllssDu......sLptlPhtu..Lhssp.........................paLlEc...asls..hhsShp....Lpphp.tshpstps..........Lshussphs......t........................................................................................................................ssLPusttE.hpslsph.............hhts..............pshhsppu..Thpslppth...........pphpllHlATHu.Fp.........................................ss.tpShlhLt..................................................ssthLshp-ltp..l....sLst...........scLllLSACpTu..........hG.........ss-uhhG....lstshhhAGspusluSLW.VsD..puTttLMppFYppLtp.......shttucAL+pAQlthhpsphh..................................................pc.PaaW...........uuF..hhhGs .................................................................................................................................................................................................t......h.p.h.h.......s..h.t....l........................................thp..........plhh.h.sps............Lt.lPhth......l.stp.......................................................thlhp.p.......h...lt.....hhss.h..p..........hh.th..t.......tt.t..................................................hhh.u..ssp.................................................................................................................................................................................................................sLs.t.st..tE...h...ptltph.....................................ht.t.........................phh.ht.p.p....s.....ot..p.plhpth.....................pphp..ll..HhAsHGths...............................................................................................tt.s...p.u..tlhlt..............................................................................................tsthLs...h.p.-lhp....h......plpt.................spL..llLSA.Cpou...........................................................hst..........tsc..t..h...h.u..............Lspuhl.t.u...G..s.ps.Vlu...o..h..W..t..VsD...........pss.tt.hh.......pt..FYp.p.Ltp..................shshs.p.....A...L...p..pA.phthhpt...........................................................................................s..W.......ush.h.............................................................................................................................................. 1 381 710 898 +12606 PF12771 SusD-like_2 Starch-binding associating with outer membrane Coggill P pcc JCSG structure (Target 390167) Family SusD is a secreted starch-binding protein with an N-terminal lipid tail that allows it to associate with the outer membrane. 20.20 20.20 20.20 20.20 20.10 20.10 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.59 0.70 -6.10 5 655 2012-10-11 20:01:03 2010-02-16 10:56:58 2 5 160 4 277 1675 449 427.60 19 89.34 CHANGED MKKpILlIVLu.....alClSCuNLEEMNINPspPTpTHPpLLLTslphssF+..pGToGlYAQKhllQsDGEsuDQYYKWsRGSFGuYsTLR........NVpKMsEEAERI..NsPs...................YhALuKFFRuYYFYcLTLsFGDIPYSQALKGETEp.YoPsYDuQEDVFcuILpEL+EAD-ILushsolIuG.......DIIYNGNsspWRKLINSFRLKVLlTLSN+S..GElshsSEFcuItTNSPLM-S.sDNGQLVaLDQQsNRYPpFNuuu.WSGhYMDuTFIQRM+ER+DPRLFIYSTQTsKuKs-GKsIsDFSuYEGGDPAAPYG-uh.KsucGDlS.lNDRa+pDPlsEPhhLhGYAEpQhILAEAAVRGW.IuGsAcsaYEcGV+uSFcFYEsaucDYutYLu.NAVApYLpEPLVD..................FopASSTEEKIERIIMQKYLsoFaQhsW-GFYDaLRTGYP-FRRPoGosIP..........+RWhYPQSEYssNosNVSoAIo+QFGuGNDcIspssWWtK ......................................................................................................................................................................h........h.h...h....s......s....s...p...p...h....t..-....l...N....p..s....P..s.....t......s...p.......p.......s.....s.........s.....t.......h.......l.......h..s.......t.......h...........h........p.......h.......h..t...................s.......................t.............h....h...........h.....t...............h.............h......t...........................s..........t.......................t......p...h.......h......t.................t......t.........s....t.........h......s......h...........h....t....t..h.t.................................s.h.....p...h.....h...p....t....s.....p..p..h........t.t..........................................................................h...h...u...l...u...pl.h..p.......uah...hthhsDhaGD.l.P.Y...o..........-..........A.....h.....p.........u........t........t.........................h..p.....P.....paD..s.Q...c.s......l.Y.....t....s.....l.........hp.......p.Lc.p..A........s....h.....l........s.....s.....s......s.....s..............h.....s..u.....................Dh.l.a.....s.....G......s.........h....s..........p......W....h+huso..L+l.Rh.h....h......+...l.s.....phs.......s.t....h.s....h..t..t.h.........h...t..t.....l....h.......s.....s.....t...s...h........h...p.....s......s.-........s.......h...................h...........h..............s...t........t.....s.....s....................s........h........h........t...............t.........t.......................h...............s....s..........h.......h...h............s..........p..h...h..h......s..............h...h...........p........t............h............p..............D.P...R.l..................h........h.........h.........t..............................................................................t..........................................t......................t.......h.................t.......................................................................s.........................................................................................................................................................................................s...h..................h........h..hshuEhtFlhAEu..h..h..+..u..h......h...s....s......s.......A...pph..Y...........p...p....ul.p.u....u...h....p........h..........s........h...................s....s....t..................................h.....h.......s......................................t...................................................................................................................t...t....t..l..p..p.Ih.h.Q.+alu.ha.....p..s..h.-sa.phRR.........o.....G............h......P..........t........h.......................s..................s...............t......hs...............................hRh...Y..P....t..p....p..h..N........th.....t................................................................................................................................................................................................................................................................ 0 119 251 277 +12607 PF12772 GHBP Growth hormone receptor binding Coggill P pcc Pfam-B_7 (release 24.0) Family Growth hormone receptor binding protein is produced either by proteolysis of the GHR (growth hormone receptor) at the cell surface thereby releasing its extracellular domain, the GHBP (growth hormone-binding protein), or, in rodents, by alternative processing of the GHR transcript. The sheddase proteolytic enzyme responsible for the cleavage is TACE (tumour necrosis factor-alpha-converting enzyme) [1,2]. Growth hormone (GH) binding to GH receptor (GHR) is the initial step that leads to the physiological functions of the hormone [3]. The biological effects of GHBP are determined by the serum levels of growth hormone (GH), which can vary. Low levels of GH can result in a dwarf phenotype and have been positively correlated with an increased life expectancy. High levels of GH can lead to gigantism or a clinical syndrome termed acromegaly and have been implicated in diabetic eye and kidney damage [4]. 21.30 21.30 21.30 21.40 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.00 18 1202 2010-02-16 14:01:20 2010-02-16 14:01:20 2 6 789 0 38 1084 0 236.70 57 83.11 CHANGED KGKL-ElNoILuup.....csYKPphYpDDsWVEFIElD.lD..DssEK..spGSDTpRLL......Spc.p.pcut.sh.GhKDDDSGRsSCY-PDl.-sDh................hL.ttp.sLLshctcstpp..ssspcus.....................sl.tst.cSsp.slps..Qhos..pohsNhDFYAQVSDlTPAGuVVLSPGQps+st.pspsst.............................cpE.hpp.hQh.h..s.ssuYhsEusA+phss.sP...pscst..shcPphsppc.a.ss....sssutsstoshh....uPsup..hPVsDYTsVp.VcS.puLlLNsss.....P......sscp.h ..............................................thap-DsWVEFIELDID....D..sDEK..TEGSDTDRLL......SsD.H.pKSl.sILGAKDDDSGRTSCY-PDI.L.-TDFpsuDhsDu..op.ht.ppL.KtEsDLLCLDpKN.ps....shsts..t...s.......s.csKPpsLl.utsESsp..Q...spT...hSNP...sSLA...N...IDFYAQVSDITPAGuVVLSPGQK.KAG....huQss.h.............................p.E..hss.sQtNa...hssAYFCEuDAKKCIshsP...+hEsps.phcPSFsQEDhYITTESL.TToA.hstss-h....sPsuE..hPVPDYT.olHhVQSPpGLlLNAsu.................sp.h................................. 0 1 5 14 +12608 PF12773 DZR Double zinc ribbon Bateman A agb Pfam-B_12 (Release 24.0) Family This family consists of a pair of zinc ribbon domains. 26.30 26.30 26.30 26.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.35 0.72 -4.14 109 1392 2012-10-03 10:42:43 2010-02-16 17:13:36 2 114 823 0 495 1601 122 52.60 32 16.02 CHANGED CspCtpt........ssss..upaCtpCGstLt..............................tt....CspCst....s.ssssspFCspCG ............................................CspCGt.t.......................sss.....ucFCspCG.stls............................................................................tt...hCs..pCup...........hs.ssspFCspCG..................................................... 0 174 347 430 +12609 PF12774 AAA_6 Hydrolytic ATP binding site of dynein motor region D1 Coggill P pcc Pfam-B_14 (release 24.0) Family the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D1 unit of the motor and contains the hydrolytic ATP binding site [1]. 21.30 21.30 21.30 21.40 21.10 20.80 hmmbuild --amino -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.79 0.70 -4.82 3 2434 2012-10-05 12:31:09 2010-02-17 10:48:36 2 274 341 14 1635 2341 119 209.40 54 6.46 CHANGED YSYEYLGNTPRLVITPLTDRCYITLTQSLHLlMSGAPAGPAGTGKTETTKDLGRALGIMVYVFNCSEQMDYKSCGNIYKGLAQTGAWGCFDEFNRISVEVLSVVAVQVKCVQDAIRDKKctFNFLGEEISLIPSVGIFITMNPGYAGRTELPENLKALFRPCAMVVPDFELICEIMLVAEGFLEARLLARKFITLYTLCKELLSKQDHYDWGLRAIKSVLVVAGSLKRGDP .......................................................YuaEYL.G.s.s.s.R..LVITP..LT............D...R........C.Yl.....T..LspA....L...p...h...p.h................GG..u..P......tGPA.G.TG...........KTET.sKDLu+uLGh.s.............lVFN........C...S.-........thD......apu.................hG+hFp.G..L.uQ...s.....G..........AWuCFDEFN.RIplc.VLSV.......lA...p...Q..l.h............s......I........p..........p........A..........l............p........p.............p.............t....................p..................p.............a............................F........................G..............p..............p...............l.....p..L...s.....s...s...s.u...lF....I.T.M....N.P.................G....Y.................A..G.......R........o...E....L.......P-.......NL................K..s............LF.R..slAMhV......PDh.t.lIs.ElhLhSpG.F..h....p..u.c...LupK...hhsh.a.pLs.p..EpLS..p..............Qt....HYDaGhRA.lK.oVLhsAGshKR....................................................................... 0 696 866 1339 +12610 PF12775 AAA_7 P-loop containing dynein motor region D3 Coggill P pcc Pfam-B_14 (release 24.0) Family the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D3 and is an ATP binding site [1]. 20.90 20.90 20.90 21.50 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.83 0.70 -5.32 3 2098 2012-10-05 12:31:09 2010-02-17 10:54:55 2 245 278 14 1513 2001 100 255.70 31 7.07 CHANGED FELDPElPLQAsLVHToETIRVRYFMDLLMERuRPVMLVGNAGTGKSVLVGDKLuSLssDuYLVpNVPFNYYTTSAMLQuVLEKPLEKKAGRNYGPPGTKKLVYFIDDMNMPEVDsYGTVQPHTLIRQHMDY+HWYDRpKLTLKEIHNCQYVSCMNPTAGSFTINSRLQRHFCVFALSFPGQDALSTIYNoILTQHLAhtSVSsALQKlSPsLVuAALALHQKIAsTFLPTAIKFHYVFNLRDLSNIFQGLLFSosElLKoPlDLlRLWLHE .............................................................................................................t.l.l......V....PTh-TsR.h.p.a..llp.hh.........l.p.........p......p..c...Pl.lllGss.Go.GK.o......s.hl.........p....s...h.L.....p............p...l...........s...............p.....................p.......p......h.............h..................s....h.......t....l..s..........F...Su....t....T..o........u......t...........h..Q.......c.........h....l....-...s......h...l.......-...K......+................p...t............t...............s...........a..G..P......s....t...........u...........K.....+..h...l.l.Fl.DDlNMP.............t....h-p.YG...........sQ.........s.l.p.llRQhh-.h..s..s..a...Y......D...p.....p...c....h...s.......h..hp.l.h..........c......l...p....h.l.u.u..M...............s....P..s.......u.....G........R.....p...s................l.ss...........R...h....h..R+Fs.lhsls...hP..s..p..p..s.lp..pIa...u..s....l..........h..........p.......t....a.......h.......p.........................t..........a...............s..................p.....................l..p....p..h...s..p.....t...l...l....p.u.s.........l..p.l..ap..p..s...t...p..p..hL..P.Tss..K..HY....lFNL...RDlo+....l...hp.......................G.l..h.h....s............p.....p..h.........h....p...s................p.....ll.+LWhHE............................................................................................... 0 634 799 1238 +12611 PF12776 Myb_DNA-bind_3 Myb/SANT-like DNA-binding domain Bateman A agb Pfam-B_16 (Release 24.0) Domain This presumed domain appears to be related to other Myb/SANT like DNA binding domains. In particular Pfam:PF10545 seems most related. This family is greatly expanded in plants and appears in several proteins annotated as transposon proteins. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild --amino -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.32 0.72 -3.36 73 836 2012-10-04 14:01:12 2010-02-17 10:57:23 2 29 65 0 554 860 0 91.40 20 25.51 CHANGED pWs..sphp+hhl-lhh-phptGsp.......tssasppuWpp.lhppapp....psttpas+pQL+s+...hcpL+cpaphhpplhppss.............huWDsppptlsAs.cchWcph ................................Ws.st.pphhlchh.h.c.phptGpp.......ssshpppuapp.lhp....phpp.......phsh.p.h......s.p.pQlps+.......hcp.h+cpaph.hppLh..p..p.su.........huWDstp.....p.h.l.sAs..c..c.h.Wpp............................... 0 85 356 454 +12612 PF12777 MT Microtubule-binding stalk of dynein motor Coggill P pcc Pfam-B_14 (release 24.0) Domain the 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This family is the region between D4 and D5 and is the two predicted alpha-helical coiled coil segments that form the stalk supporting the ATP-sensitive microtubule binding component [1]. 24.00 23.30 24.00 23.70 23.90 23.20 hmmbuild --amino -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.04 0.70 -5.70 3 2476 2010-02-17 11:31:39 2010-02-17 11:31:39 2 258 489 19 1602 2329 102 305.10 29 9.65 CHANGED ERLENGLhKLQSTAuQVDDLKAKLAuQEVELKQKNEDADKLIQVVGVETEKVSKEKAIAD-EEpKVAVINcEVocKQKDCEEDLAKAEPALLAAQEALNTLNKNNLTELKSFGSPPsAVlNVoAAVMVLhAPsGKIPKDRSWKAAKVsMuKVDuFLDSLINFDKENIHENCLKAIKPYLsDPEF-PEFI+oKShAAAGLCSWVINIVRFYEVYCDVEPKRQALpKANAELsAAQEKLAtIKAKIAELNANLAcLTApFEKATADKLKCQQEAEATuRTIoLANRLVGGLASENVRWAEAVpNFKpQE+TLCGDVLLITAFVSYlGaFTKKYRs-LhERhWlPYL .........................................................................................................................t..hpsG.LpKLtpsttp...Vt..thptpL..t.......p.....L.t....tp.t.....p.s.p.t.h...h.t...l.....t.......h...p...p..t..t...................s..p......p..p....c..t.........s..t.......t...p.....t............t......h...t............t.t....t...h...............t..t...h.......p.......p.......p.......s....p.......t.......-L.t.pA....Ps..L..p.t..A...tA............Lp...s...l..p..t.........t.sl..sp.l+uhtp.......PP..t.h.V...t.......hl..h..p.u..V..............h........l.l..............h...............t............................................................................................p..........................s............W.......t..s..u..+.....t....h.........h......s.....c....................p..............F...L....p.s.L...h...s......a....D...K......-....s...I..s...p...p....h..h...c..t.....l..p..t...........a......l..p...p..s..-....F.s....s....c............h...l...p..ps..S...t..Astu.Ls....pWVhA...h...t.Yt.c......V.h.+.....V.......t....P....K.+..p...p.......L....t...p...s...p.......tp....L.......pt....stp.p...L.p..ph.ptp.Lpp.l.pp...pltt.Lps...pa.c..p.t..hs...c..K....p..pl...pp...........p.............h.c........h.s........p............p..+......lpp..App...LlsuLuu..E+.........R.........W...................pp....s....s...p....ph.pt.ph..ppL.sGDsLl.......uuu..hluYhGsFstp.aRpphhpt......hhh................................................................ 0 659 838 1298 +12613 PF12778 PXPV PXPV repeat (3 copies) Bateman A agb Pfam-B_15 (Release 24.0) Repeat This short repeat is found in multiple copies in a variety of Burkholderia proteins. The function of this region is unknown. 20.20 15.50 20.20 15.60 20.10 15.40 hmmbuild --amino -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.40 0.72 -4.25 7 141 2010-02-17 11:33:22 2010-02-17 11:33:22 2 4 87 0 42 110 3 22.60 66 23.66 CHANGED Ghs...APVhs..tPAPVhl.....APtPslV ...........GsP....APVYV..tPAPVYVAP.tPslV. 0 3 14 32 +12614 PF12779 YXWGXW YXWGXW repeat (2 copies) Bateman A agb Pfam-B_15 (Release 24.0) Repeat This short repeat contains the motif YXWXXGXW where X can be any amino acid. It is generally found in 2-5 copies in short secreted bacterial proteins. Its function is as yet unknown. 18.00 5.00 18.10 5.00 17.90 4.90 hmmbuild --amino -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.18 0.72 -7.38 0.72 -3.97 88 527 2010-02-17 11:41:07 2010-02-17 11:41:07 2 11 179 0 276 526 23 23.20 37 20.84 CHANGED shhYlapsGhWah.....u.sa.sGsWhh ...........sYlWssGaWth.........taha..hsGhW...................... 3 66 118 207 +12615 PF12780 AAA_8 P-loop containing dynein motor region D4 Coggill P pcc Pfam-B_14 (release 24.0) Domain The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D4 ATP-binding region of the motor [1]. 22.90 22.50 23.10 22.60 22.80 22.40 hmmbuild --amino -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.65 0.70 -5.42 3 2160 2012-10-05 12:31:09 2010-02-17 12:48:52 2 240 286 14 1532 2037 96 247.80 35 7.05 CHANGED YNEVNAVMNLVLFEDAMpHVCRINRILESPRGNALLVGVGGSGKQSLoRLAAYISSLDVFQITLRKGYGIPDLKlDLAolClKAGVKNlsTVFLMTDAQVADE+FLVLINDLLASGEIPDLFuDDEVENIISuVRNEVKShGLsDTRENCWKFFIDRlRRQLKVlLCFSPVGosLRVRSRKFPAVVNCTAIDWFHEWPQ-ALVSVShRFL-ETEGIcs-VKpSIScFMAYVHTSVNEoS+lYLoNERRYNYTTPKSFLEQIKLYQSLL ...................................................h..sLV.lFp-AlpH.................ls.RIsRllc.p.P.p..G.ps.L.LlG.V.GG.SG+pSL..oR.LA.u.....a.hs..s.h.p..lF..QIp.....l.........s...+..s..Y.s.hs-....a....+...-D..L+plh.tp.....u...............G...h.....c....s...p.t.h.....sFLhsD...spl.............h.c....E.u..FLE..l.Ns..l....L.so.....G..............E........lP.s............LF...s...t.DE....h.p.p.l........h............s..............t........h.....+....s.......t..s........p...................p....t..................s.....h...........s.....o.....t..........p......s.......l..a......p.aF.....l.p.+...........l+.p..NL.....H....l...V.Ls..h..S..P.....lG..c....s...h.R.s.R....h.+..........pFPu.........Ll..N..ss.sIDW..Fp..p..WPp..-ALh..p...V.................u......p.................p.................a.................L....................p....................p.................h..............p...................h...................h..................p...........................p.................h.....................c....................p..........t......l......s.....p...........h...hsh.....h..........H.t.sl.t...ph.s.t....p..........a.h......t..p......p..R.hsa.sTPpsaLchlp.hatth............................................................................................................................................................ 0 634 798 1248 +12616 PF12781 AAA_9 ATP-binding dynein motor region D5 Coggill P pcc Pfam-B_14 (release 24.0) Domain The 380 kDa motor unit of dynein belongs to the AAA class of chaperone-like ATPases. The core of the 380 kDa motor unit contains a concatenated chain of six AAA modules, of which four correspond to the ATP binding sites with P-loop signatures described previously, and two are modules in which the P loop has been lost in evolution. This particular family is the D5 ATP-binding region of the motor, but has lost its P-loop [1]. 22.20 22.20 22.20 22.30 22.10 21.90 hmmbuild --amino -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.61 0.70 -5.50 3 2170 2012-10-05 12:31:09 2010-02-17 13:30:02 2 225 287 14 1548 2035 88 214.80 38 6.23 CHANGED MLTDDADVAsWNNEGLPSDRMSTENATILuNCERWPLMVDPQLQGIKWIKNKYG-DLRVIRIGQKGYLDlIEQAISuGDTVLIENlEESIDPVLDPLLGRNTIKKGRYIKIGDKEVEYNPKFRLILHTKLANPHYKPEMQAQsTLINFTVTRDGLEDQLLAAVVApERPDLEQLKSDLTKQQN-FKIlLKELEDSLLSRLSSASGNFLGDTALVENLETTK+TAAEIE .............................................................................................................pltpWp....p..GLPs.DphSh.-N..ul........I...l.......p...p.......u.......p............R.......a....P...L.h...ID.PQ...sQ.u..hc.W.I......K.......s................h.........................p.........s.....................p...................L......p........l...h.p.....h......s..p...p.............s...a...h..c.....p....LEpulphGpslLlEs...l..t.E...p..l....DP.sL......pPl.L..t.....+...p.....h....h.....+.....p.....G...................p.......h.........l................+..l....G...D..p..p..l-as.s.s.F+la..loT..K...L....s..N..P...c.Y.....P...E..l.p....s.....+...s.ollNFs.lT......................pGLE..............-QLLuhV..l.tp.E.+P....-L....E..c.p.+sp.....Llh..p.t.....s..................p.K.h...pL+pLE-plL.ptLs....su...............p..G...s...lL......-..D.p....pLl.psLppoKtput-l...................................................... 0 635 802 1257 +12617 PF12782 Innate_immun Invertebrate innate immunity transcript family Coggill P pcc Pfam-B_9 (release 24.0) Family The immune response of the purple sea urchin appears to be more complex than previously believed in that it uses immune-related gene families homologous to vertebrate Toll-like and NOD/NALP-like receptor families as well as C-type lectins and a rudimentary complement system. In addition, the species also produces this unusual family of mRNAs, also known as 185/333, which is strongly upregulated in response to pathogen challenge [1]. 26.90 26.90 26.90 31.50 21.60 26.80 hmmbuild --amino -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.36 0.70 -5.20 9 746 2010-02-17 14:14:11 2010-02-17 14:14:11 2 4 2 0 6 908 0 162.80 58 101.52 CHANGED MEVKVTLIVAIVAALAISAHAQRDFNERRGpENGRERGQGtFGGRPGGMQMGGPRQDGGPMGGRRFDGPsSGAPQ.........................MDGRRQNGGPMGGRRFDGPcFGGSRPDGAGGRPFFGQGGRRGDGEEETDAAQQIGDGLGGsGQFDG.GRtHHGHRQG.PQDpsEEQPFGQRN.SSEEDGRPHPHHHt......+HHpHH...........................................+RNHTEGHQGHNETGDHPHRHHNKTtDtDQDRPMFEhRPFphN...PFGRKPFG-RPFsRRNGTEEGSPRRDGpp+PaGNRGRWGENESEEKE+PTTESlTTSSPsEVVclAhN- ..........................................................u.RQDGGPMGGtRFDGPt.GAPpM-GRRQsGGPMGGRRFDGPtFG................................................................................................................NpTEGHQGHNETGDp.............DQD+.h.-.RPFhhN....FGRKPFGs+.F................................................................................... 0 6 6 6 +12618 PF12783 Sec7_N Guanine nucleotide exchange factor in Golgi transport N-terminal Coggill P pcc Pfam-B_13 (release 24.0) Domain The full-length Sec7 functions proximally in the secretory pathway as a protein binding scaffold for the coat protein complexes COPII-COPI. The COPII-COPI-protein switch is necessary for maturation of the vesicular-tubular cluster, VTC, intermediate compartments for Golgi compartment biogenesis. This N-terminal domain however does not appear to be binding either of the COP or the ARF [2]. 25.40 25.40 25.70 25.40 25.30 25.20 hmmbuild --amino -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.06 0.71 -4.82 70 1211 2010-02-17 14:38:16 2010-02-17 14:38:16 2 22 295 0 824 1186 8 143.70 24 9.89 CHANGED hshhtpDAhhlFcsLCpLs...p..sppp..shh....tthpschhuLcLlcslLpspts.l..Ftp...............chlp.sl+phLs.sLl+sssS.sh.hslhphshcIhhhLl.pph+stLKhElElhlshlhh...lL.........-.pst....shp..pKhlsLchhpplsp-Pphlh-lYlNYDC-.s..ppNlh ...................................................................................................h..shh.lFcs.LspL......................s...........t...tt...........................................t.................hs+hhuLpLlps.hLps...tss..h..ht..................................phht.hl......+p.lsh....tLhp........hs...........s...s..........................slh......t.h.shplhh.hLl..pp..h............+s.hL+..h........ph.E.............lahp.lhh.ll..............................................pspp.......sht..........p+.h.sl.p.........hlpp.......lh.t....p.s.....p.hlhphahsYDsc.t..ttslh................................................... 0 286 456 677 +12619 PF12784 PDDEXK_2 PD-(D/E)XK nuclease family transposase Bateman A agb Pfam-B_5 (Release 24.0) Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily [1]. These proteins are transposase proteins. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild --amino -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.27 0.70 -5.19 73 2282 2012-10-11 20:44:46 2010-02-17 14:49:08 2 7 653 0 358 1886 49 177.50 20 76.61 CHANGED spsDauFK+lFGsc.t.NcclLlsFLNslLp..tpppIpclc.llss.......c..t-hts-Ktull....Dlhspss....sGppllIElQhppppsFhcRsLYYhu+shsp...Ql.............p.pG...pt.Yp............pLppllsIsIls.Fshhts.........p....ch+sshplh-ppspp..hhh.........cclphhalElsKFpppppcppsp........h-cWlhalcstsphppt.h.h.....psshhccshpthcphshscc-ht..sY-p+.chhh....Dppsslcpu ......................................................hhp.hh.t.........p.thh.h.t..hlpshLt................t...tlp...lp....h...s...............................p........t.s........+..shh..............Dlhsp.p........sGp..h.lEh.Qh.p........p......p.....t.............h..h........p.Rhh....aYhsphhsp...pl...............................p...pG.......t...Yp..........................pl..t..shhI..l...hs...h.hh.t..................t.............thhp...h...h......p.......p....p.t.........h..............p...t..hh..lph..h.................................hh..hh.......................................h..h...t.h.....s..................................h.............................................................................................................................................. 0 126 250 271 +12620 PF12785 VESA1_N Variant erythrocyte surface antigen-1 Coggill P pcc Pfam-B_22 (release 24.0) Family This family represents the N-terminal of the variant erythrocyte surface antigen 1, versions a and b, of Babesia. Babesia bovis is a tick-borne, intra-erythrocytic, protozoal parasite of cattle that shares many lifestyle parallels with the most virulent of the human malarial parasites, Plasmodium falciparum. Babesia uses antigenic variation to establish consistent infections of long duration. The two variants of VESA1, a and b, are expressed from different but closely related genes, and variation is achieved through the involvement of a segmental gene conversion mechanism and low-frequency epigenetic in situ switching of transcriptional activity from the VESA1 gene-pair to a possible other gene pair. 21.40 21.40 33.10 21.40 19.60 20.60 hmmbuild --amino -o /dev/null HMM SEED 462 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.95 0.70 -5.52 42 227 2010-02-18 13:21:03 2010-02-18 13:21:03 2 2 1 0 141 227 0 260.00 32 38.40 CHANGED AAVTDLLQSVpLEYH.............GYQG-sK.......tssssKGAsccpVs..c+LNGLFSLVQGLGGTAVVRTYIDQLAQVLSALVGWS+I-KC......................tssspCpssststp........................................................HGppss....CcYLcDVctss.sCscCGCMKWsVsp...spscGppLGRtCTRCpsSGsstt......CpCusuust...Cou.spc.CKCAhsGKC....CKCC.....Cpspss.t........Cptpt............uChpppp..............-....sYhSAYsp...................h.ttttht..hpshWssLhp.s...............ssSp..+RHpCARILLGSVCLIWSGlTYMYWTGKYtpoSPRWNNHILDGSGLDDGTLSQWLQALGFP+-MLNNsGPtNRLDuVIWDGhpsKLaLGFscsushss...s.upDsssNThRsPsGMNYAGaIHTscRsuFsss.AsVFppsssss........ho-psppKpGALaKLYILSCAYFTGL....QKK....................puss+TPKTIREILYWLSALPYSpAY.clLcHuKc+L ........................................................................................................................................................................................................................................................................................................................................................................................................C.tptsss.....sts...cpCpC...s.h..s.G..t.............................................................................................................................................h..hAp.hLGslsLIaSslo.h......s..........s..t....hWpt................................tahtshGas...hp......................................................................................................................................................................................................................................... 0 141 141 141 +12621 PF12786 GBV-C_env GB virus C genotype envelope Coggill P pcc Pfam-B_19 (release 24.0) Family This the envelope protein from the ssRNA GB virus genotype C. 20.40 20.40 20.40 20.80 19.70 19.70 hmmbuild --amino -o /dev/null HMM SEED 413 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.86 0.70 -12.75 0.70 -6.01 6 568 2010-02-18 13:22:19 2010-02-18 13:22:19 2 10 6 0 0 453 0 191.40 59 45.18 CHANGED Rls.lPNLTCslcCDhcash.uhsl.....Dh.WshchlhcLPh+LW+GlsuhssLhllVlhhLlLEQRLVMsFLLLhssGpApss..............hapuCpCth.GuRsP.Pshpsh.RGNuTllC-CPFGpMhWhPsLCuGLsW+DGspc...GsspsLPhsCPcpVhGoloVhChWGSAaWhWRhG...shVcLacpLP.tSALCTFauhspucpsaPt.VssLospGsPCASCVVDpRPspCGsCVRDCWEpsGs...sFccCGlGsRlTccLpAVlVcGGsESploTPhGERP+YhuQHGsGsaauAVpthuhshTVoclGGYWHAltCPs.-assGsLPchIPGhPVNACls..scsu+shuuW.APGGaYAPlFT+CNWPpssGVcVCsGFAaDFPGc+sGFIHV+GuhQQlhuGshtssPpWLL ..................................................................................................................................................................uluWGDPIs....tGQsp..........aGusSVss....................................................................................................................................................................................................................................................................... 0 0 0 0 +12622 PF12787 EcsC EcsC protein family Bateman A agb Jackhmmer:Q186V8_CLOD6 Family Proteins in this family are related to EcsC from B. subtilis. This protein is found in an operon with EcsA and EcsB which are components of an ABC transport system [1]. The function of this protein is unknown. 27.00 27.00 27.20 27.30 26.70 26.80 hmmbuild --amino -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.46 0.70 -4.98 47 901 2010-02-18 14:38:34 2010-02-18 14:38:34 2 5 716 0 203 677 35 219.90 21 83.19 CHANGED a.pptlp-lppWcpc...p+csthhp+hs+hs.phl....scllPptlpctlssslc....phsphlhsGupalh................................................shpslpphsLpphDplscchhpthpphAsspGAsTGsGGlhhhhuDhPllLulplKhLa-lAhhYGaDsc-hcERlFILplhQluhS.s.tt+ptlhpplpp..........acttpp....t..splp..s....W+ph.ppYRD.hshtKhh........QhlPslGhlhGAhsNhphlcclu-sAhhhY+hRhl ....................................................................................t.....................................................................................................................................................................................t...h........p....h....p..t..h...h....p...p...h..ht..p..ppthA..sspGus.....oG.......h.......G....................Gl..h...........s......h.s....s....-lP.......s....hl....s....lt....h....+....hlt........plAp.hGaD.l...s.c......cpph.hhLt.l...h.t.h...uht....s.p......tc..p..t.h.hth.h.tt...........................................htt.t.t.................t..t..hp...h.s.......................hcpltpp....h...t...s.l...s....h....p...phh.........................phlP.l.l.Gus.lGAssNhhhhpplschApttathRhl............................................................................... 0 69 145 168 +12623 PF12788 YmaF YmaF family Bateman A agb Jackhmmer:Q181M3 Family This family of proteins contain 6 HXH motifs and is named after the B. subtilis YmaF protein.\ It seems likely that these are involved in metal binding. The function of this protein is unknown. 27.00 27.00 27.30 27.40 21.80 26.90 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.83 0.72 -10.59 0.72 -3.84 10 130 2010-02-18 16:13:56 2010-02-18 16:13:56 2 1 90 0 33 113 0 90.10 43 77.47 CHANGED +hQoHsHEFtuSTchAE-s-.-cHNHRhAGVTGpAI.h.G.sSHVHKI.pspTDFF.DHFH-lClTTGPAIhl.usGKHIHLVpGpTTlNDGHpH-ahFTTLI .........................pHsHEa.uSTphsp.t..........p.c.HNHRhuGVoupsI.h....G..ssHVHpl.p.s.pTDF.h.sHaHhIs.shTGPAIsl.uss+HlHhlsGhT..ohsD..G..HpHcahhsT.l..................... 0 15 24 28 +12624 PF12789 PTR Phage tail repeat like Bateman A agb Jackhmmer:Q181M3 Repeat This family largely contains proteins from the eukaryote Trichomonas vaginalis. These proteins contain multiple HXH repeats. Some proteins in this family are annotated as having phage tail repeats. The function of this family is unknown. 27.00 7.00 27.00 7.00 26.90 6.90 hmmbuild --amino -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.66 0.72 -4.13 16 1947 2010-02-18 16:37:43 2010-02-18 16:37:43 2 34 108 0 1744 1922 24 47.00 41 23.61 CHANGED chScKsHTHoIusITsLQETLspKuDhsHTHTIANITNLQ..ETL..pp..DVGHTHTIusITN ....................................HtHsI...usIssLp-oLsc.Ku.........Dh..s...........H..T.H.s....................................................t.............................. 0 1732 1740 1742 +12625 PF12790 T6SS-SciN Type VI secretion lipoprotein Coggill P pcc Pfam-B_27 (release 24.0) Family One of the virulence mechanisms of E coli is the production of toxins which it produces from dedicated machineries called secretion systems. Seven secretion systems have been described, which assemble from 3 to upto more than 20 subunits. These secretion systems derive from or have co-evolved with bacterial organelles such as ABC transporters (type I), type IV pili (type 2), flagella (type 3), or conjugative machines (type IV). The type VI secretion system (T6SS) is present in most pathogens that have contact with animals, plants, or humans. SciN is a lipoprotein tethered to the outer membrane and expressed in the periplasm of E coli and is essential for T6S-dependent secretion of the Hcp-like SciD protein and for biofilm formation. 21.60 21.60 21.80 21.70 20.60 19.30 hmmbuild --amino -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.49 0.71 -4.57 149 1326 2010-02-18 16:39:03 2010-02-18 16:39:03 2 3 848 1 228 812 20 148.70 26 83.77 CHANGED slhLuGCu........................s..h.tpssplslslpAssslNsst...pGpssPlhl+lYpLcssssFpsu-a.sL..hps...sppsLus-ll.spc-hh.ltPGpptp.ls................hshspsspalGllAtapchs..pup.W+hlhslsttt.........................pht.....lplplsspsl .............................hlLsGCu.......................................................tsu....ppspplslslhupsslNsss...puc...ssPlhlplapLpssstFpsADa...sl..tss.........tpssLssshl.s.ppchh.LhPGp.t.pp.ls................hpls.p.p.spalGVl.Atapc.s...tsp.W+hlhpltsts.........................chtlhlplpppt.......................................... 1 31 88 154 +12626 PF12791 RsgI_N Anti-sigma factor N-terminus Borovok I, Coggill P pcc Borovok I Family The heat shock genes in B. subtilis can be classified into several groups according to their regulation [1], and the sigma gene, sigI, of Bacillus subtilis belongs to the group IV heat-shock response genes and has many orthologues in the bacterial phylum Firmicutes [1]. Regulation of sigma factor I is carried out by RsgI from the same operon, and this N-terminal cytoplasmic portion of RsgI ('upstream' of the single transmembrane helix) has been shown to interact directly with Sigma-I [2]. 21.40 21.40 21.50 21.40 21.00 21.10 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.58 0.72 -4.30 52 279 2010-02-18 16:43:53 2010-02-18 16:43:53 2 6 205 0 71 235 0 56.00 24 13.15 CHANGED lclcccpsllhTscGcFlcl+ppss..hplGpcI.hpppshhphtpthh..hhhhshshh .......h-lcccpsllLTscGcFlp.lKp+sc...hhlGcEIphsppchht..pptth...hhhh.h...sh................ 0 35 63 67 +12627 PF12792 CSS-motif CSS motif domain associated with EAL Coggill P pcc Pfam-B_29 (release Domain This family with its characteristic highly conserved CSS sequence motif is found N-terminal to the EAL, Pfam:PF00563, domain in many cyclic diguanylate phosphodiesterases. 21.60 21.60 21.70 21.60 21.40 20.80 hmmbuild --amino -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.17 0.70 -4.99 138 2685 2010-02-19 09:08:36 2010-02-19 09:08:36 2 3 774 0 211 1380 30 205.10 21 40.43 CHANGED pphppphpshuppslpph-p......llsps.ppshsph...shssps..Csps.hhtLpphshtt.al+slsll.cssplhCSShh.G.h.shshs...s.h......h......shplhhhtssshhsspshl..hht.ts....hsssushsslssthh....hhthh..phsshhlt.......h.spthlttts....t.h.ttssh..........hhhphpSsc.....ashslh.......stsststhhphhhphhhhhl ..................................p..hppchsshuspshcph-p......lhtph.ptstpth...t.htsps...Cs.A.thpLpphutth.hlcslshl...cssphhCSSlh...s...t..shs...h....s.h.ssh..........hp.......ssplhl.hs.sssh..h.sp..hl..hhh..pss..shhssusa........stl.p.shhht...l.hsh.s.phssssls..................l.spshlptts......shhhp.sth...............htpspssc.....hslslh.......lhssstth.thaat.hhh..h.............................................................................. 0 15 45 130 +12628 PF12793 SgrR_N Sugar transport-related sRNA regulator N-term Coggill P pcc Pfam-B_33 (release 24.0) Family Small, non-coding RNA molecules play important regulatory roles in a variety of physiological processes in bacteria. SgrR_N is the N-terminus of a family of proteins which regulate the transcription of these sRNAs, in particular SgrS. SgrR_N contains a helix-turn-helix motif characteristic of winged-helix DNA-binding transcriptional regulators. SgrS is a small RNA required for recovery from glucose-phosphate stress in bacteria [1]. In examining the regulation of sgrR expression it was found that SgrR negatively auto-regulates its own transcription in the presence and absence of stress, and thus SgrR coordinates the response to glucose-phosphate stress by binding specifically to sgrS promoter DNA [2]. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.38 0.71 -4.08 49 1737 2012-10-04 14:01:12 2010-02-19 13:52:54 2 2 824 0 137 981 7 113.50 39 20.26 CHANGED +lhppapRLapths.....spstpsTLsElA-hLaCScRHsRsLLppMpptGWLsWpspsGRG+RSpLthhhsspplptphAcchlcpGchcpAlplls..cpttltplLppphGtphppu ...................h.pQa.RLaQtss........GcsppsTlsELA-hLhCScRHhRoLLpphQ-tGWLpWpupsG....RGKRo.pLpFLh.os..sLppphsEchLEps+hppllpLlt...s......tuplpphL.sahGtpapps...................................................... 0 24 45 93 +12629 PF12794 MscS_TM Mechanosensitive ion channel inner membrane domain 1 Coggill P pcc Pfam-B_24 (release 24.0) Domain The small mechanosensitive channel, MscS, is a part of the turgor-driven solute efflux system that protects bacteria from lysis in the event of osmotic shock. The MscS protein alone is sufficient to form a functional mechanosensitive channel gated directly by tension in the lipid bilayer. The MscS proteins are heptamers of three transmembrane subunits with seven converging M3 domains, and this domain is one of the inner membrane domains. 23.40 23.40 23.90 23.50 22.70 23.30 hmmbuild --amino -o /dev/null HMM SEED 340 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.04 0.70 -5.83 72 1372 2010-02-19 14:00:30 2010-02-19 14:00:30 2 5 800 0 153 777 52 322.70 42 30.04 CHANGED hpshlhhlhhlLlhhhll....hh+pphpptLpphspclGplppDshhhT.pulhhollhuLPhslhhhhsGhhlt.hs.hsts..tslusulhthuhh.hhlathhhphhpspGlhhtHFphsppplpphp.+hhphhhhhllsllhhhshs.....pthssphhpsslGplshllshhhlshhhh...slhctshs.hts......t...phlppllhhhlhhsPlhlhlhshhGYaaTAhtLht+L.tSlhlhhhhhllapllhRhhhlppRRLAacRApt+Rpp..hh......pEt...t.ssc..........hp.l-.Esslcl-pIspQSL+Ll+hlLhlhhlsslhhlWu-llss.huaL-s..IsL.W ...........................h....LshhshLllsGh.h....aptpahptalp+hAutVGplp.pDp.hhT.+slhhsllhAhPlsllhhslGhhLh.ph...hslushlhshspths.h.hhVhhlCh.phhcsNGlhltHFGhPcppsu+hh.R.hlh.hhhllPLhhh.lhh.............-..s..+.h.sssL.GphshllhhhhlAllsh...slp+tuh..h.s..............phhph..lhhshlhhhPlshhlhoAsGYhhTu.tLhuRh.poV.hlhhlh.llYpslhRhh.lttRR..l.....AacRAhtRRtp..hl......cE.........usp..................................uh.ss-.EsplsL-tIstQoLRlsp.llhhlhhl.hhslWS-lhos.FuaL-s..IoLW.............................. 0 21 56 106 +12630 PF12795 MscS_porin Mechanosensitive ion channel porin domain Coggill P pcc Pfam-B_24 (release 24.0) Domain The small mechanosensitive channel, MscS, is a part of the turgor-driven solute efflux system that protects bacteria from lysis in the event of osmotic shock. The MscS protein alone is sufficient to form a functional mechanosensitive channel gated directly by tension in the lipid bilayer. The MscS proteins are heptamers of three transmembrane subunits with seven converging M3 domains, and this MscS_porin is towards the N-terminal of the molecules. The high concentration of negative charges at the extracellular entrance of the pore helps select the cations for efflux. 31.40 30.50 31.40 32.60 31.30 30.40 hmmbuild --amino -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.73 0.70 -5.05 69 1277 2010-02-19 15:07:52 2010-02-19 15:07:52 2 3 738 0 127 672 19 235.70 40 21.69 CHANGED ppLcplpp..ppssspppthlpsLppsLshLschcctcpcspphpptlsshPpphcplpppLpshpss.......sstshhsshohspLEpplsptpspLtphpcpLpphpsplhplpsp.pphpsphspsppplppIpppLpsh.sss....sslppuppshLpsE.shlpsphp.hchphhussshQ-Lh..phpp-h.hptchppL-pplQtLpstlNppRhppuEpslccspp.tpp..p.sptsshlppp ....................ppL-phpt.....hpst.pt.l.psLpssLssL-chcpstEcspQhpphlsphPchhpphpA.pLssL.cs...........EsRpl.sshShcpLppclhQs.spL.s....tppphtp.psplhplpsp.ppl.pt.hsAppQLppIcp....RLsu....h....ssGp.........ssLp.uQphhhQu-pAhLpAplDp.chu........ussshQ-Lh..............phpp-h.sptpSppL-t.LQhLpptlNSpR.p.uE+shppsp..sEs..phphs.hlhpp................ 0 14 42 86 +12631 PF12796 Ank_2 Ankyrin repeats (3 copies) Bateman A agb Jackhmmer:Q183I8_CLOD6 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.45 0.72 -3.64 620 110723 2012-10-02 12:10:21 2010-02-22 09:13:01 2 6024 2889 200 69620 109701 6937 89.50 23 36.39 CHANGED Lh.hAsp...p..s..p.........h..p.h..........l.c....hL.....................l...p....ps.......s.....shs.....................ps...............................................sL.h......h.....A................s....................t..t...up.....h...........................................................................c..hlch.........L.lp...............................................................ts...s.s..h.............stp.......................tupTs............................Lhh.............A..........st..................psp...h...c..hl.........c.h.Ll.p.p...............s..s.sh.s....hps ................................................................................................................................................................................................................................................................................................................................................................................................h.t.........h....................................................................h.....t.......t..................s...phs..........................................spT................................................................................................sL.p.........h..........A............................u............................................t..p....G...p.....h..................................................................................................................................................................................................................................c....h....l.c.h..........L...l.p...................................................................................................................................................................................................p.G.....A....s...l...........................stps...................................ppGt.T..s........................................................................................L.p.h................................A............................s.p................................................p.u..p.......h....c.......l.....l.................c..h...Ll..p.t..........s..s.p....................................................................................................................................................................................................................................... 0 33547 42383 56433 +12632 PF12797 Fer4_2 4Fe-4S binding domain Bateman A agb Jackhmmer:Q184V9 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild --amino -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.65 0.73 -7.77 0.73 -3.72 423 1399 2012-10-03 08:56:42 2010-02-22 11:31:53 2 39 752 0 175 6075 978 26.70 70 12.06 CHANGED hthhhs....ts..p...C...hs...C..tt..C...ht..sC........Ps ...........Y.GFFID.......SS..RCTG...CKTCEL..AC........KDh+sLss............. 0 59 106 145 +12633 PF12798 Fer4_3 4Fe-4S binding domain Bateman A agb Jackhmmer:Q184V9 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild --amino -o /dev/null HMM SEED 15 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -6.94 0.74 -7.15 0.74 -2.97 447 625 2012-10-03 08:56:42 2010-02-22 12:42:34 2 99 466 0 231 9841 2305 17.30 47 7.78 CHANGED C..ht..C.............stCh.psCP..t...ss ....................C..hp...C......................................suCVsAC..s.......... 0 69 134 183 +12634 PF12799 LRR_4 Leucine Rich repeats (2 copies) Bateman A agb Jackhmmer:Q187Q2 Family Leucine rich repeats are short sequence motifs present in a number of proteins with diverse functions and cellular locations. These repeats are usually involved in protein-protein interactions. Each Leucine Rich Repeat is composed of a beta-alpha unit. These units form elongated non-globular structures. Leucine Rich Repeats are often flanked by cysteine rich domains. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.52 0.72 -4.27 569 21961 2012-10-02 21:32:02 2010-02-22 13:07:11 2 3572 1441 188 8913 43842 1514 44.60 29 11.83 CHANGED p..sLppLp......Ls.p...N......p....l...p...s.....l..s.....s...........l..p....s........L...s....p....LppL.....s...l...s.........p.....N.p..l....p.......s..........l......s...s...l...p....p .................................pLptLs...............Ls.s....N..........p....l......s.....s.......l..s........................s..............l..u.....p.........................L....s.......s....LppL...s...L...s......................s.........N.p..l.........s.........s..........l.....s......................................................... 0 2700 5198 7123 +12635 PF12800 Fer4_4 4Fe-4S binding domain Bateman A agb Jackhmmer:Q184V9 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 24.00 10.00 24.00 10.00 23.90 9.90 hmmbuild --amino -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.47 0.74 -7.83 0.74 -3.15 187 4289 2012-10-03 08:56:43 2010-02-22 13:24:58 2 168 1372 4 663 24462 7195 20.30 39 10.05 CHANGED t...sp......C.....h.....p......Cs...t.....Ch.ps..Cs.................hp ....................tpC..........l.......G..................C+.........s.........CphA..C...sh.................... 0 214 424 548 +12636 PF12801 Fer4_5 4Fe-4S binding domain Bateman A agb Jackhmmer:Q189G9 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 22.00 0.00 22.10 4.40 21.90 -999999.99 hmmbuild --amino -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.76 0.72 -4.17 217 6537 2012-10-03 08:56:43 2010-02-22 14:38:41 2 113 2159 0 1700 6678 530 48.00 21 19.61 CHANGED hshhhl.lhhh.hllhhshl.......hGR.haCuWlCPhGslp-hhtp.........hhppph..thp .......................h...hhh.lhhh..slhlhshh........htR..saCualCPhG.slhslhs................................................. 0 631 1249 1513 +12637 PF12802 MarR_2 MarR family Bateman A agb Jackhmmer:Q17ZV2 Family The Mar proteins are involved in the multiple antibiotic resistance, a non-specific resistance system. The expression of the mar operon is controlled by a repressor, MarR. A large number of compounds induce transcription of the mar operon. This is thought to be due to the compound binding to MarR, and the resulting complex stops MarR binding to the DNA. With the MarR repression lost, transcription of the operon proceeds [1]. The structure of MarR is known [2] and shows MarR as a dimer with each subunit containing a winged-helix DNA binding motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.62 0.72 -4.30 266 11347 2012-10-04 14:01:12 2010-02-22 15:12:01 2 65 2832 42 3627 27482 2684 60.70 22 32.41 CHANGED h.Gl..s..hsphtlLhh.ltptss....................h..ohsclsc..thtlsc.sslsphlppLtp..pGll.........ppp..ts.t.DpRt ........................................................tl.s....tph.pl.L....hh...L...t...ppss......................................................h.....shs-Luc......ph.tl......sp.sslophlpcLpp...pGLl.........p+p......ss.p..DtR................................................................ 0 1112 2404 3087 +12638 PF12803 G-7-MTase mRNA (guanine-7-)methyltransferase (G-7-MTase) Coggill P pcc Pfam-B_40 Domain The Sendai virus RNA-dependent RNA polymerase complex, which consists of L and P proteins, participates in the synthesis of viral mRNAs that possess a methylated cap structure. The N-terminal of the L protein acts as the RNA-dependent RNA polymerase part of the molecule, family Paramyx_RNA_pol, Pfam:PF00946. This domain is the C-terminal part of the L protein and it catalyses cap methylation through its mRNA (guanine-7-)methyltransferase (G-7-MTase) activity [1]. 25.00 25.00 31.00 29.90 21.10 21.00 hmmbuild --amino -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.86 0.70 -5.29 29 388 2012-10-10 17:06:42 2010-02-22 16:14:03 2 3 87 0 1 397 0 319.70 39 14.47 CHANGED phuaplYYhRlpGcpphh-YlhshLpRhstssLsslusslSHP+laRRhhssGllpPhpuP.Lso.sahthsh-hlhtuhcpaLs.lhsG..hchphllspsspshls-RhtslhAR+LslLssLassspphPpI+GLsshEKCtlLTcaLpptshts......hsshphslps.plssasssLhYlpRpslppIRhc...s.....s.hhltphhcshsh.ptshtttps..........tsshppssph..hp.....l.spspthpph.hsssttsshp................pHhhRhlGlsSTShYKAlplsphlpphh..pGspLaLuEGSGAhhohhchhlsss .........shuYQlYYLRlpGhpsIl.Yhsshh+pMsshllsslusTlSHPhIapRhhssGllp.stu.pLAspDahchuscllhoCscphlssLhsG..sca-lLhsss.-DsLs-+h.plluRhhClhssLauss+chPpIRGLospEKCulLT-aLhu-Ahps......tsppspsIhsPpIhsaPusLaYhpR+SLshIRtRp........csshls.hh.sp.sllc.p.spshhuphpt.....shs+pssuhl.ths....Lssss.phcthh..psps.psh.............t..lpahFRsIGhsSSSWYKAsclhol.chpts..pcGsuLYLuEGSGAhMolhEhhlstp.. 0 1 1 1 +12639 PF12804 NTP_transf_3 MobA-like NTP transferase domain Bateman A agb Jackhmmer:Q188E0 Domain This family includes the MobA protein (Molybdopterin-guanine dinucleotide biosynthesis protein A). The family also includes a wide range of other NTP transferase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.11 0.71 -4.16 571 9345 2012-10-03 05:28:31 2010-02-22 16:22:27 2 87 4058 75 2565 24779 11854 170.00 18 55.35 CHANGED ullLAGG......p...u......p.....Rh...........G.......s...........s.........Ks..l........l.h.......s.G.....p...........s...llp+sl.ptl............psh.......h..s.............................plhls....s..st....p.............p..h.h.....t..................................h............t..th........s.....ht....h.....lt-........t......s..t.........GPh....uu...ltsul.....................................p....t....s........p.....s...sh.............ll..lhssD.hPh...lsssh.lppLhpth...............t......p....s...........s..s.......h..ss..st.t.p.........s..p..h.......................................................hp.P.l......l..h.....stp...h...h...........st.lp....p.h........t..s....s.........p..u......h.......+p...ll.p...............p.h.....t....ps ................................................................sllL.A.u...G.......p......u.......s.....RM....................................t.........s.......s.................Ks..L........hpl........sG.....c......................................s...hl.p+..sl.....cs.h.................p.p.h.....t...s..s....................................clh..ll......s......sp......c.................p....h.t....t.......................................................h..h...............t..t......................s.......lp.....h...........l.h.s.................s..........p.....h.................G.s..h........t..u....l...h...s....u..h............................................................pt......h............t.s.pt........................h.l....l...h....s......u.D....h.......P..h.............l.....s.....s....p.....h.......lp..p.L...hp.th........................................t..........t..s........h....h..h.......h...............................................................................................................................................................................................................hhthhh...................................................................................................................................................................................................................................................................... 0 846 1696 2182 +12640 PF12805 FUSC-like FUSC-like inner membrane protein yccS Coggill P pcc Pfam-B_45 (release 24.0) Domain This family has similarities to the fusaric acid resistance protein family. The proteins are lodged in the inner membrane. 20.60 20.60 20.60 20.90 20.50 20.50 hmmbuild --amino -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.66 0.70 -5.48 75 1796 2012-10-02 19:04:43 2010-02-22 17:07:20 2 5 1161 0 248 1140 37 272.10 36 38.74 CHANGED sLhhoLlsFhluohslplhhsa..shhFslslhhsoFshshLGAlGpRYuoIuFuoLllAlYThlshs.........ts.thatpP..llllsGAlWYslhollhphlaPppPlQcsLApsappLusYLcsKusLF...cPs.ppppspphphtLAtpNspllsslNps+psLhpRht.us+tp.ssp..+hLphYahAQDIHERsoSSHh.pYppLpcthp.pSDlLaRhp+llphQupuCpplApulhhpp.Yp......asschptuhpplpsuLph.tpp...psttpt.hhtsLptLhpNLps....l-ppL ......................................LIlshhsFhluShss...pLL.h.s.h..shhhslsL.oh..T.hs.hhlluslGt.+a.Aplu.huuL.l.lA...IaThhssu.........h.....h.apps....llllhGulWYslhshhhahlas...pPlp-sLupsYcpLAc.YLEs.Kuph.a.....s..pc.....s-....sptslhsLs.h.tp...t.pl.hst.lsQs+tp..lhhp....us+sppss+..RhL+hahhA.D.laE+hS....uSph.p.Ypplp.ch.hc..+S-lh.hh..hpphhph.ut.shppL....ucsIL.tp.Yp......+tsphc...hshp.ch.uuLE+htpp.....ssst.hhhpluhlhpsL+s.......h..................................................................................................................................... 0 38 107 183 +12641 PF12806 Acyl-CoA_dh_C Acetyl-CoA dehydrogenase C-terminal like Coggill P pcc Pfma-B_46 (release 24.0) Domain this domain would appear to be the very C-terminal region of many bacterial acetyl-CoA dehydrogenases. 26.00 26.00 26.00 26.00 24.90 25.90 hmmbuild --amino -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.56 0.71 -4.28 470 2011 2010-02-22 18:15:35 2010-02-22 18:15:35 2 6 866 0 708 1823 1503 128.00 25 21.59 CHANGED thhsclpshhppttt.....t....th.t.shsptLtpu.hpplppssthlhtpst.ss..........s...pts..hAuussY..LchhGhlshuahah+hAhsA.tth......................tt...................................t....ts......Fa.puKltsA+FahpplLPcssuhhstlps..Gsss.lhshst ...........................h.hhtplpphsppttt.........st..th..t.thtp.Ltpu.hpplpp...hTthlht...psh..ss............s...ppssAuussYLphhGhlshuahat+.hAtsAtppl...............ss...............................t.....ts...ts.....FYpuKlssA+FahpplLPcssuhhstlpu..ussslhshs.............. 0 183 408 579 +12642 PF12807 eIF3_p135 Translation initiation factor eIF3 subunit 135 Wood V, Coggill P pcc Pfam-B_2213 Family Translation initiation factor eIF3 is a multi-subunit protein complex required for initiation of protein biosynthesis in eukaryotic cells. The complex promotes ribosome dissociation, the binding of the initiator methionyl-tRNA to the 40 S ribosomal subunit, and mRNA recruitment to the ribosome. The protein product from TIF31 genes in yeast is p135 which associates with the eIF3 but does not seem to be necessary for protein translation initiation [1]. 21.30 21.30 21.40 21.50 21.20 20.80 hmmbuild --amino -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.30 0.71 -4.32 65 500 2010-02-22 18:22:24 2010-02-22 18:22:24 2 76 258 0 376 485 1 166.80 22 13.01 CHANGED tsppssPhDGpoLophLHpRGINhRYLGc.......lup......htp....................pLttlppLslpEhlsRuhKHlhpphlp.....................................sl.ss.................sthuusluHhLNCLLGst.ss............................pspsphcpph..................................................t.....spss.satplTs...............ppLhppIppplpp+acasLp...............schhs........p.lp+hpLLRslshKhGlQlhs+-YsFs .....................................h......PhDG.toLsch...hHp+GINhRY.LGclhph.......htp....................................................................................plpt.l......pplslpEhlsRuhKHlhpshlp....................................................................................................................................................................................................sh.....................hs..ss.luc.hLNslLus..ss......................................................p................................................................................................................h..hs........................ptl.h.p..lp..p.hh.hapapl................tp.hp.........t.hppht.llRths.+hGlplh.+.ca.h.................................................................................................................................................................................................................................... 0 144 232 330 +12643 PF12808 Mto2_bdg Mto1_bdg; Micro-tubular organiser Mto1 C-term Mto2-binding region Wood V, Coggill P, Eberhardt R pcc Pfam-B_28820 (release 24.0) Domain The C-terminal region of the micro-tubular organiser protein 1 (mto1) is the binding domain for attachment to Mto2p.The full-length Mto1 protein is required for microtubule nucleation from non-spindle pole body MTOCs in fission yeast [1]. The interaction of Mto2p with this region of Mto1 is critical for anchoring the cytokinetic actin ring to the medial region of the cell and for proper coordination of mitosis with cytokinesis [2]. 21.40 21.40 21.50 21.40 21.00 21.30 hmmbuild --amino -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.08 0.72 -8.55 0.72 -3.86 9 126 2010-02-22 18:30:33 2010-02-22 18:30:33 2 5 97 0 91 132 0 50.50 39 5.17 CHANGED +WlpRLcELEppLKAER.EARhhD+oGA+cRLp-hptENccLpApLp+p+.pp ........+hl.RLcELpp+L+uER.E.....tRhhDpsuAcpRlpclcsENcpL+ucLph.p.t........ 0 13 35 67 +12644 PF12809 Metallothi_Euk2 Eukaryotic metallothionein Wood V, Coggill P pcc Wood V Family This is a family of eukaryotic metallothioneins. 80.20 80.00 80.20 80.00 78.60 78.40 hmmbuild --amino -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.27 0.72 -3.59 2 16 2012-10-05 18:33:37 2010-02-23 09:22:44 2 1 13 0 2 9 0 75.50 58 72.34 CHANGED sl+.pDCEstCCpcus.pCtSpCh.psStGphCpsph.hGp.pspsChcphpC.........ohs..pCpK.u+pphtKp ClRNQDCEoGCC+cAPspCESHCsEKGSEGSLCQspsFFGQYRsCPChcNLTChY.KstKW.oIsaGpCQKlu+ppLtKp... 0 2 2 2 +12645 PF12810 Gly_rich Glycine rich protein Bateman A agb Jackhmmer:Q183Y9 Family This family of proteins is greatly expanded in Trichomonas vaginalis. The proteins are composed of several glycine rich motifs interspersed through the sequence. Although many proteins have been annotated by similarity in the family these annotations given the biased composition of the sequences these are unlikely to be functionally relevant. 24.50 24.50 24.60 24.80 24.20 24.40 hmmbuild --amino -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.36 0.70 -4.79 103 677 2010-02-23 09:42:22 2010-02-23 09:42:22 2 34 107 0 572 663 155 230.70 22 43.92 CHANGED sCTs.YplpLs.sGhY+hElaGAuGGs.t.............................hpp......ss....h.G......GtGGYssGhlplppppphYlhlGupG............p.s.....tp............h..h...GGasGGGpust..........................t...hu.uuGGG..uTcl+hh...........................ssLtsRl....lVAGGG.GG.u.ss.......st..........t.sp.............................................................................G......GsGG..Gltu....tssh....tt....sp.........ss.s...........uu.sp.puuG..u.............hp...p...........s..t.......sGshshus......ss.....sp.................u.......GGGGGaaGGhuu.............p.....t.s........uu............G.GGGSSal...hsp..su..hh................s..tsslps.......sshhapssthhpss...................tp................pp.................GsGhhpITlLs.h..t.hs..s..phh ................................................................................Y.hph..hG.AtGG...................................................t....uhGs.hh.uhh......l.t.p..t....ha.hhlGtpG.......................p...................................tsh..sG.s.s...s........................................u.uuGGG..uT.l.hh.....................psh..phl....lVA...u..GG..GG.s.s........t.............sp.................................................................................u.....GtGG..uhtutssh.......tt.................st........................susp....h.tsu..tu........................h..p...........t.t.....pGshuhuu.........ss....ss..................................uGGGuGahGGtuu.............................t..ts.....ss...........G..uGGSSal...ts...h....................................................................................................................................................................................................................................................... 0 505 515 542 +12646 PF12811 BaxI_1 Bax inhibitor 1 like Coggill P pcc manual Family The Bax-inhibitor-1 region of the receptor molecules is conserved from bacteria to humans. 22.90 22.30 23.10 23.10 22.80 22.20 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.90 0.70 -5.24 8 694 2012-10-01 20:22:31 2010-02-23 10:16:08 2 3 645 0 171 575 640 247.90 32 92.43 CHANGED M...RcoSNPVFRSL.......PK.ppG.GYA........pFGou.Auuhus..QphstpsYtst.....ppssssRshTIDDVVsKTGITLuVLshsAVVSYFLVssNssLuhPhshlGulGGLslVLlATFGRKtDNPulVLsYAuhEGLFlGulSalhuNlhsuGusu..hIsQAlLGThGVFF..GMLVVYKTGAIRVTPKFTRMlluuLaGVlsLhLGNllLAhF.sGss....LpsGGsLAIhFSLlCIGlAAFSFLlDFDAADQMIRAGAPEKAAWGVALGLTVTLVWLYlEILRLLSYhpN- .................................................................................................................................................................................................................................................ttt.ss..p..p.s....MTlssllsK.T.u.l..h..L..t.l.l..h.s.s...u...s...h.....u....a.....h......h...h.....h......s......p.............s...h....s...h.........h.......h....l.....l...G.u.....l.s.u.hl....l.u.h.lss...F..t..p...ph.sPsh..s..l....h..YAhhEGlhLGulSh.ha.p.......t.......h.su........llhpAl..Lu..Thushh.sh.Lh.lY+.sthI+VTs+FpphlhuuhhGl.hlhhLs.s.hlLshF....s...ss...................hlts.....u...G...s.....l.G.Ilh....Sll..s.lslAAhsLl.lDFDh.h-pulct......tAPcp.......h.tWhsAhGLhVTLVWLYlEILRLLShhp..p........................................ 0 46 106 140 +12647 PF12812 PDZ_1 PDZ-like domain Wood V, Coggill P pcc Pfam-B_17100 (release 24.0) Domain PDZ domains are found in diverse signalling proteins in bacteria, yeasts, plants, insects and vertebrates. this is a family of PDZ-like domains from bacteria, plants and fungi. 28.00 28.00 28.00 28.20 27.90 27.90 hmmbuild --amino -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.45 0.72 -4.09 34 382 2012-10-02 11:12:46 2010-02-23 10:45:31 2 15 206 0 258 391 21 76.80 30 13.96 CHANGED sITPsRaVpVuGAoFHcLSYQ.ARpaslsl+..GVaVucuuGShph......ss...tspuallpplsspsTPsLDsFlcVh+plPDp ..............lpss+hlphsGAsh+clsaptsRph..s..h..sht...shhs.u.c.u.s.GShth.......ph...lssshlIppVsspsTssL-sFlclh+pIPDp....... 0 56 147 227 +12648 PF12813 XPG_I_2 XPG domain containing Wood V, Coggill P pcc Pfam-B_10579 (release 24.0) Domain This family is largely of fungal proteins and is related to the XP-G protein family. 29.20 29.10 29.20 29.10 29.10 29.00 hmmbuild --amino -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.69 0.70 -4.88 26 211 2012-10-01 19:52:02 2010-02-23 11:52:00 2 8 149 0 162 230 5 227.40 18 35.47 CHANGED tssFhVsslhEsL+sp.p...................................aushs.llsGEADsaCAhhA.+phGs.sVLTsDSDLLlHDL.G.pGullFhsol-hss...................................................t.hplpAhphpssslsc+Lul....sl.hlAa.........El.ppcsctshspllcpu+t..spssspppspYhsFlcpY........ts...........ss.thptLDs+loElhhph....................tcptsplYLshLl-stuRpsAWt..pupphR.lAYulhshpt.......hspptssltEhhRp...upchsspplsl ...............................................................................................h.s.hhh.shhpsLpph......................................ss.hhhs...u.EADt.hA....slA..pph.s............C.......sVLo..sDSDahla...cl......psualslsslp.hcs..............................................................................................hl.psphap.sphsp.p.hsh..............Ls..hh..Ah................-h.t..s..h..tp.h.....ph.h.th..stt........t......t...tph.thhp.h...................................................................................................................................................................................................................................................................................................................................................................................... 0 65 86 127 +12649 PF12814 Mcp5_PH Meiotic cell cortex C-terminal pleckstrin homology Wood V, Coggill P pcc Pfam-B_1220 (release 24.0) Domain The PH domain of these largely fungal proteins is necessary for the cortical localisation of the protein during meiosis, since the overall function of the protein is to anchor dynein at the cell cortex during the horsetail phase. During prophase I of fission yeast, horsetail nuclear movement occurs, and this starts when all the telomeres become bundled at the spindle pole body - SPB. Subsequent to this, the nucleus undergoes a dynamic oscillation, resulting in elongated nuclear morphology. Horsetail nuclear movement is thought to be predominantly due to the pulling of astral microtubules that link the SPB to cortical microtubule-attachment sites at the opposite end of the cell; the pulling force is believed to be provided by cytoplasmic dynein and dynactin. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.76 0.71 -4.20 26 320 2012-10-04 00:02:25 2010-02-23 11:54:51 2 30 183 0 235 344 1 121.30 30 9.25 CHANGED sllsAIsQTMlGEahaKYsR+p..p..h..............................tpsRHcRahWlpPYppsLhWSsppPtsussptspspp.lhIpuVtsVcDsNshPpGh.......ap+SIlIhTssRslKlTAsoppRHplWhsALpaLhpp .............................................t.hhpAIspshlG-ahaKYsR+t...t..............................ts+HcRaFW.ls..P..a..pps..LhW..Ssp.pPp.pss....t......hpsp...................p....ltIpu.Vps.V..pD...s...s......s..hP.uh................ap....c..Sll..ll.os.p....R..sl+hTssotp++psWhsuLpaLh.................................... 0 67 124 195 +12650 PF12815 CTD Spt5 C-terminal nonapeptide repeat binding Spt4 Wood V, Coggill P pcc Pfam-B_197031 (release 23.0) Domain The C-terminal domain of the transcription elongation factor protein Spt5 is necessary for binding to Spt4 to form the functional complex that regulates early transcription elongation by RNA polymerase II. The complex may be involved in pre-mRNA processing through its association with mRNA capping enzymes. This CTD domain carries a regular nonapeptide repeat that can be present in up to 18 copies, as in S. pombe [1]. The repeat has a characteristic TPA motif. 25.40 24.80 25.60 25.10 25.00 24.40 hmmbuild --amino -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.74 0.71 -12.09 0.71 -3.70 33 296 2012-10-02 12:52:19 2010-02-23 14:16:34 2 28 170 1 213 294 3 130.10 32 17.37 CHANGED uu+TPAasu..........sGu+TPAW...tsGupTssh.h................GuRTPsass....Gu+TPh..................htsGu+TPAauusss...........upTPuast......................................................ssshhuusoPush......sss.TPuu..tusTPu.....................s.uusoP ...............................................................upTPhass...........GuRTPta.....GSpTPh...s...............................GuRTPsat........GopTPh...............................tsGSRTPAauus.s...................spTPu.hstt....................................................................s.....s...ss.....................................................................ss......................................................................................... 0 76 113 178 +12653 PF12816 Vps8 Golgi CORVET complex core vacuolar protein 8 Wood V, Coggill P pcc Pfam-B_90 (release 24.0) Domain Vps8 is one of the Golgi complex components necessary for vacuolar sorting [1]. Eukaryotic cells contain a highly dynamic endo-membrane system, in which individual organelles keep their identity despite continuous vesicle generation and fusion. Vesicles that bud from a donor membrane are targeted and delivered to each individual organelle, where they release their cargo after fusion with the acceptor membrane. Vps8 is the core component of the endosomal tethering complex CORVET (class C core vacuole/endosome tethering). Vps8 co-operates with Vps21-GTP to mediate endosomal clustering in a reaction that is dependent on Vps3. Vps8 is the only CORVET subunit that is enriched on late endosomes, suggesting that it is a marker for the maturation of late endosomes. Late endosomes form intralumenal vesicles, and the resulting multivesicular bodies fuse with the vacuole to release their cargoes [2]. 19.90 19.90 20.90 23.50 18.90 17.20 hmmbuild --amino -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.19 0.71 -5.06 42 312 2010-02-23 17:00:45 2010-02-23 17:00:45 2 13 243 0 230 320 2 182.20 31 13.50 CHANGED pslFL-sLEsaIlpGplpslPPtlhKsLlpaassppphpplEclIspLDssoLDlDpsspLCccasLYDAlIYlaNpslsDYloPLh-Llphltp.tp..............hsts...........ssuhKlasYLuhlLoG+tYPoGcth.spp..cshpuKpplaphLFSupshshs.ssst............cssFPYLphlLcFDspsFLshLspuF ............................................................................slFL-sLEPaIlpsplp.s.l.sP..tlh+sLlsaa.t.spt.........h...h.........pplEphIh+hDhsoLDl.sQ..llpl.....CpcapLYD..AhIYlaNcu.hsD...YloPl...cLlp..llts.hptt................t.hstt........................................s.u.KlhsYlS..hsLs.GRsYPh.Gc.h..sp.p...............h..s.psKppl....apaLhptp.shphs.t................................c..aPal+hLLcFDsppFLssLs.sF......................................................................... 0 86 132 195 +12655 PF12818 Tegument_dsDNA dsDNA viral tegument protein Coggill P pcc Pfam-B_48 (release 24.0) Family This is a family of tegument proteins from double-stranded DNA herpesvirus and related viral species. 20.70 20.70 20.70 20.70 20.50 20.10 hmmbuild --amino -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.51 0.70 -5.53 11 214 2010-02-24 16:15:47 2010-02-24 16:15:47 2 2 24 0 0 95 0 223.90 74 46.53 CHANGED lppuhhHlHSslpshup.QLpshlFpsoLhPtspspuushGhY.sTssu.sushstshpcplttsspspucsLspsGVPVluGFl+sl....pptuthstsslhpsShLsosspstLshsR.thGQhlVsLGsFpPssGsDssPahYp-SshshN+lhpsLcLFtphhsssClSuhtRshG.soslcHLhuLl.tsGhpLalSpLPp-lhstLtussssp.st..lcphVpsaFLNshss.lFLllspcs......spsps.psL-hLppAuclsGCshhlLGcTsspsGl ................................................IHGAYTHVHSSVQRGIR.GLGNLLFHSTLFPGGQTQGALTGLY.ATEPA.LGPRAHSRFRRIFAKGVQ.....QAEMLQGAGVPTLGGFLKTV......RTIATTP...G.NALAVCS.....IS....TTTSKE....CIS..LRRMI.P...QQ.T.VVC.LGR..FEPTD.GPDTYPNLYRDSSDNAVRILETLKLVQRLA..KG..P..IF..SGL..NRSHDPAPVVRHLQALAP.R.TGLELFVSKLPDEV+s+LsAcPuAsssu..VcusVu-HFLNVYCSlVFsVVu-oG....ulPuDhGcTPLElLQRAARLCuCQlsVLGRToEpPGI..... 0 0 0 0 +12656 PF12819 Malectin_like Carbohydrate-binding protein of the ER Coggill P pcc Pfam-B_41 (release 24.0) Domain Malectin is a membrane-anchored protein of the endoplasmic reticulum that recognises and binds Glc2-N-glycan. The domain is found on a number of plant receptor kinases. 20.80 20.80 20.80 20.80 20.70 20.70 hmmbuild --amino -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.84 0.70 -5.36 155 1073 2012-10-01 23:47:32 2010-02-24 17:09:37 2 67 112 0 617 1235 5 284.20 22 44.21 CHANGED IDCGhsssps..a.-..s.slsasoDs.sahs..sGt...hth....t.t.t.s..h.p.htslR.FP...tsp..+.sCYsl.sh...ttt.+YLlRspFhYGNhp.........s..t.ph....ss.F-LhlGsshWssVph................hhtE.llhss.....t..splslCLlssu....ss..PFISuLElRsL..psshY.................h.ps.sts..LphhtRhsh.uss.................................lRYssDs..aDRhW........s..............ssthpphss.....s......hs..t.st.t..tsPptVhpTAhss..ssss................lphsa......sh.ssss..p............ahlhhHFuElpp...ps.........RpFslhlNst.........shps.hhhstsh..hpshhhph.tst.....hhh.sLttsssS....sl.sPllNAhElapl ............................................................................lsCG................sh.ahsDt..thht......ss.........s.pl.......................h.shR..Fs.............t....p..sY.sh.s....t....pahl.RhhF..h.......sshs............t.............s.a..l.hh.sh....s.hh...h........................hhhE.hh.ss...............s.lplsh.hsps........ss.sFlsslE..lhsh...ss.p.ha.............................ts..Lphh....hRhsh..Gspt................................lphssDs......hsRh..W.........sh...........................ssthtt.hss.............t.p........hp....ststh..hsP..t....V.hpT.......Ahsh...ssst..................hp.hsa........ph...ssss.h.p.............ahlhhaFsElpt...t.................tR.FslhlNsphh......p......sht.s....h..h.t.s...s.h.........ht..hhh.t.h...st..........h.lslt..ss.to........th..ssllNuhElhp...................................................... 0 88 348 486 +12657 PF12820 BRCT_assoc Serine-rich domain associated with BRCT Coggill P pcc Pfam-B_51 (release 24.0) Domain This domain is found on BRCA1 proteins. 21.30 21.30 21.80 21.30 20.00 19.40 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.13 0.71 -4.48 83 1101 2010-02-25 13:43:55 2010-02-25 13:43:55 2 15 761 0 20 1126 0 159.50 55 27.54 CHANGED ADsLstRKEhscQKs..CS-sPR.coQD.lPWITLNSSIQKVNEWFSRSD-hLTS.DsopDttsESNuElAuslEl..Ps-lDGaSuSSEKlDLhAS.-Pcsu..llscSERlpSKPVES.NIEDKIFGKTYRR.KuSLP..NLoHl..TE.sLIlGAh.AsEPQIsQEpPh....TNKLKRKRRT ...........................A-PLpGR+chpKQKssCS-SPR..DoQ-.lsWITLNSSIQKVNEWFSRSD-l.LTS.Dco+DtssE.SNsEsu.sslEl..PspsDGhSuSSEKhDLhAS.-sp.............s.A..LhpcSERspSKPVES...NIEDKIFGKTYRR.KuShP..NLs..+s..TE...sl.llGus......shEPp.lspEp.Ph....TNKLKRKR+h................................. 0 1 2 5 +12658 PF12821 DUF3815 Protein of unknown function (DUF3815) Bateman A agb Jackhmmer:Q183G2 Family This family of membrane proteins is functionally uncharacterised. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.52 0.71 -4.21 94 3677 2012-10-02 11:53:07 2010-02-25 14:38:39 2 9 2742 0 637 2738 47 132.40 26 47.57 CHANGED lphlhualAsluFullhsl..P..+..+tlhhsulsGulGWhlYhlh.h.p...hs.hs..h.hh.ushhuulslulhuphhA+hhKsPsslFhlPu.llPLVP......GstsYpshhthl...................p............sp............httu...........hph....................................hlpslhluuAIAlGlhlsshl ...............................h.phlhuhl.u.sluFu..l..lh..N.s..sh...+t..L..hhsull.G.u..lu.a.h.h.h....hlh..h.p...........hs...hs....h....h....h.uoF..h.u.uh.llGh...l..........u....t.........h..h......u..+...h...h..+.........s........P..s....h..l..a..........hl.su..llPh.VP......Gh.huY.p.uh...hshl...............................................p............ts........................hspu...........h.sp............................................................hhpshh..lssAlu...lGlhlsph.h..................................................................................................... 0 196 382 532 +12659 PF12822 DUF3816 Protein of unknown function (DUF3816) Bateman A, Iyer LM agb Jackhmmer:Q188H8 Family This family of proteins is functionally uncharacterised but are likely to be membrane transporters. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 177 and 208 amino acids in length. A subset of this family is associated with the TM1506 proteins. In this context, transport through the channel is predicted to be regulated by the TM1506 protein by either regulating redox potential or modification of substrates [1] 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild --amino -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.66 0.71 -4.37 172 3851 2012-10-03 02:46:00 2010-02-25 14:48:37 2 6 1995 2 522 3039 57 168.30 20 85.15 CHANGED +pl..............shhullsAluhlLh.........hh.hs....l.........p.hshsplssl..lu.....uhhhGPhsGhhluhl...psllphhh...h.................................................s.hh.hh...................hsshlsthhhslsss....hla..tppppptth.............................................................hhuhhluslhhslssslh......shhlh...h.ha...................h...............................................shhthlhhshlshsllcsllssllshhlh ..................................tlshlulhhAlshllt...........hhs.hs.....l...........p..lshsp..lssl...lu...........uhlhG......PhhGhlluhl....psllph.....l....l.....s..sh....................................................s.......................................hhshlsthl.hu.lssu.........hl.a.p..h.h...p..p..p.s....h....................................................................................................................hhu.l.llu..s..l.hh..o.ls.h....ol...l.............shhhh...........h.las................h.....................................................................slhthl..h..s.s.ll.s.hs.llctllsullhhhl.h.............................................................. 0 216 361 451 +12660 PF12823 DUF3817 Domain of unknown function (DUF3817) Mistry J jm14 Pfam-B_123 (release 24.0) Domain This domain is of unknown function. It is sometimes found adjacent to Pfam:PF07690 and Pfam:PF03176 which are both transporter domains. 27.00 27.00 29.10 28.80 26.10 25.90 hmmbuild --amino -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.16 0.72 -3.55 149 830 2010-02-25 16:16:58 2010-02-25 16:16:58 2 6 649 0 275 668 179 92.30 31 73.20 CHANGED sslpt...aRllAhhEulohllLl.luM.slKY...h...........h............shs....h..slpllG........l.........HGhl.FllYllhshtlshch+Wsh.tchlh.shlAullPFuoFhhE.+hlp...+c ........s.slttaRlhAhlpGlshllLl.luM.slKY...h.....................................s..................shs.....t...sVpllG.......h.s..................HGhl.allYllss.h.luh.ch+Wsh.tph...............lh.shlAuslPFsoFhhEphhp+.p............ 0 92 210 255 +12661 PF12824 MRP-L20 Mitochondrial ribosomal protein subunit L20 Wood V, Coggill P pcc Pfam-B_1364 (release 24.0) Domain This family is the essential mitochondrial ribosomal protein subunit L20 of fungi. 30.10 30.10 30.10 30.20 30.00 30.00 hmmbuild --amino -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.02 0.71 -4.28 34 157 2012-10-04 14:01:12 2010-02-25 16:34:37 2 4 148 0 105 147 0 159.90 28 73.35 CHANGED hhps+spuohp+s+pphp..lpPs.o...................llaNPPuSuPoshp....TPttFL...PtsDsR+phh.ttt.t.s............................spphPslhptcsp.....+p..YpLos-sIpEI++LRtpDPhpao+tpLA+cFssSshFlshls..pss...p++cthcph.Lpslcs+W.us+RthAR-..DRp+R+Eh.W ................................................................................h....p.pss.tptt.thp..h.Pt..................................hhapPssStsos.h....sPh.hFL...PtsDsRRth..........tt.................................................tphPslhpsppt.................+p....apLssp-lpEh++LRtpDPhpaohppLA++Fss...SthFVthls........psst.....p+pp..cph....hpthpppWs.p+phARc...-RphR+phh...................... 0 23 53 87 +12662 PF12825 DUF3818 Domain of unknown function in PX-proteins (DUF3818) Wood V, Coggill P pcc Pfam-B_972 (release 24.0) Domain This domain is found on proteins carrying a PX domain. Its function is unknown. 21.70 21.70 22.70 21.80 20.90 21.60 hmmbuild --amino -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.93 0.70 -5.77 27 228 2010-02-25 17:01:20 2010-02-25 17:01:20 2 11 134 0 189 245 0 268.10 26 34.73 CHANGED sLtclFpEl+ppsslp-LSP.hpsFsEWs+lplAuTlYphFlusD.NS.Ehasp.....s++lHpLhPYslhtpllRhoNPhulMpshlDLFL......................AQP...FG.u+SLLQphFshhLs--l+shcctlcpLcp.plts.....phhhp+lcpal..ss-.....s....hhstl+c-utp.....pshslllsILpssp......p...stlss.....splpclhpSYhsapshhpp..........................sptppstuhhasplKplhplhh+cRDKchMppLhp-.PthspLlKsllolFY-PLl+la+sAslp.ulpsFppFhsDLIphl-phppt..hhhssshpsVpthhsLls+HpssFacFlHclah+Dst.....lFpslhpWlcpllphL+psh .......................l.plhtplttptplppLs..hpphh-ahp...l...p...lAuhlaplFlsp-.su.thhtt..........h+clHplhPYthhpphl+hsNPhshhpuhlclhL......................ups....hG..upsLhQ......phhuhhlp-...s...h..pp..hpct.lctl.tt.plts............hhhpplctas..tscp..........pptl+tc.utt.....pshsll..hslLpsp.....t..s.ls..............tthtpl.ps...a.......................................tt.ha..hpphhphhhppRD+t.hhpl...hp........c...s...h.hphh+s.hh.s..hhYpPlh.c.laptusl..slpshttFhsDhIt......h.........lcth...........p................h.....................t........s................ps.........VpshhsLht+HppthapFlHplhhpsst.....latthhtWhpthlthh+p..s...................................... 1 59 117 167 +12663 PF12826 HHH_2 Helix-hairpin-helix motif Bateman A agb Jackhmmer:DISA_CLOD6 Motif The HhH domain of DisA, a bacterial checkpoint control protein, is a DNA-binding domain [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.82 0.72 -4.09 72 5697 2012-10-03 02:11:09 2010-02-25 17:13:14 2 57 4306 6 1394 5279 3189 61.40 32 9.52 CHANGED clh..hh...LGIRaVGppsA+hLApcFt.ol-sLtpAohE-Lhplc-IGshlApSlhpFFps.psppll ........................+hlhu..LG.I+pVGtpsActL.A.p.....c.Fs..o.......l.......-....s............l..........t....p..........A............o..........h............E..............-..........L...t.....p.....l...s...slGthlA.pslhsaFtptt..t.................... 0 461 920 1195 +12664 PF12827 Peroxin-22 Peroxisomal biogenesis protein family Wood V, Coggill P pcc Pfam-B_15020 (release 24.0) Family Peroxin-22 is a integral peroxisomal membrane protein family. The N-terminus is in the matrix and the C-terminus is in the cytosol. The N-terminus carries a 25-amino acid peroxisome membrane-targeting signal. It interacts with the ubiquitin-conjugating peripheral peroxisomal membrane enzyme Pex4p anchoring it at the peroxisomal membrane. Both Pex proteins are involved at the same stage of peroxisome biogenesis. 21.70 21.70 21.70 63.60 21.40 20.80 hmmbuild --amino -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.56 0.71 -4.39 17 42 2010-02-25 17:17:23 2010-02-25 17:17:23 2 1 41 3 28 39 0 120.70 32 66.90 CHANGED ptsptp.pp+hsscSlslslSpolhssph..ls.hhhsscshlhllsPsls.t...............phphppp.tthtssaKllpCsshp...................GhapllKpL+schLL...................................lssD-ls.s.......lspD...............ls+Flppllsl ........p..tppspcp+hspKShsIl.lScSl.u....Plph..hss--lVhllPPsho.ssh..............t.thtps.pppts.paKlIpCcohp...................GhauslKpLptpphl...................................lsocDhssu.......lPcD...............ls+Fl+plls....... 0 5 15 25 +12665 PF12828 PXB PX-associated Wood V, Coggill P pcc Pfam-B_972 (release24,0) Domain This domain is associated with the PX domain. 20.90 20.90 21.00 21.50 20.70 20.40 hmmbuild --amino -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.67 0.71 -4.56 28 179 2010-02-25 17:31:20 2010-02-25 17:31:20 2 11 129 0 143 195 1 138.40 37 14.82 CHANGED M......................tLossppHaLK+.LlchpLtpElpthspP.....ssLppaGhPFps.t.......s.ppppsp...............................hPlLpahhppFlh..oFPhl..........ptstppcFWps+lQsFlEphsptslSsS.D+p....ploKR+plspKhpshlhLhhsos ..................................s......p.LTspQpHYLK+pLlptQlp.tElppLss.P.......sALppaGhPFpss.......sph..ss.pt....t.p...........................................................................................................................lP.l.L.+alFhcaVh..sFPhL...................ppstpc-.....FWQsKlQs........FlEsFssppl...SsSt...DRt............c..oKR+pLutKhp+lltLhhsS.u........................................... 0 42 84 126 +12666 PF12829 Mhr1 Transcriptional regulation of mitochondrial recombination Wood V, Coggill P pcc Pfam-B_7788 (release 24.0) Family This family is involved in the transcriptional regulation of recombination in the mitochondria, 20.50 20.50 20.50 41.20 20.00 18.70 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.78 0.72 -4.16 24 112 2010-02-25 17:49:44 2010-02-25 17:49:44 2 1 111 0 86 104 0 90.70 37 29.53 CHANGED ausplalF+NlposQVlYSpsPshspppl.pQh.hsshcs+.......sshR+DhW+sMsVlsFs...shphulpsYpsLtchRhhR.pls.tcps.ph++ ........aG.plalacNlcosQVlYS.s.tlcpppl.pQhsh....upcsh......PssLR+DhWpPhsslpFs....ssphG....lpsYptLREh+hhR.pls.sc-h.ph................... 0 13 41 71 +12667 PF12830 Nipped-B_C Sister chromatid cohesion C-terminus Wood V, Coggill P pcc Pfam-B_443 (release 24.0) Domain This domain lies towards the C-terminus of nipped-B or sister chromatid cohesion proteins. 26.40 21.50 27.30 21.60 25.70 21.00 hmmbuild --amino -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.88 0.71 -4.65 57 312 2010-02-26 10:34:20 2010-02-26 10:34:20 2 9 257 0 213 314 3 182.20 28 10.29 CHANGED hsuulsQ+ahpplLc.hshsppsplphhAlcllphll+pGLlpPhpslPslIALpss.spshlpphAhphhpplppKapuhlpsp...hspGlphuapapppl.tp....h...............................ssshhsplapllp.ss+..psRpcFlpslh+hhp.p..............................t.lphhhalspslAhlsaps.-Eslhllppl.....ptl.l ..........................huoulhQhaLcplLc.hhhpppsplphhAlp...........llthhlp.QGLlHPhp....slPhLIA.....LtTs.spsthpphA.p.hpplccKatuhlct.c...hht.G.lphuaphQpsl.ss.pssh.t..................................................................ssuhhutLY..sl.l+..ss+..ppRctF...lpslhpha-tp...................................s.splphhha.lu-NLAhhsYps.-EsLhllppl-hh................................................. 0 73 120 180 +12668 PF12831 FAD_oxidored FAD dependent oxidoreductase Mistry J jm14 PfamB_47 (release 24.0) Family This family of proteins contains FAD dependent oxidoreductases and related proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.48 0.70 -5.40 240 2577 2012-10-10 17:06:42 2010-02-26 14:20:32 2 89 1337 8 853 18895 7151 241.30 22 42.20 CHANGED DVlVhGGGsuGlsAAlsAARtGt....pVlLlE..ppshLGGhsouuhs.sshhu...........h..t........................hhsG.lhpEhhpcl..........tt...t.......................t..........................a-Pcshptlhcphl......tc..s.........slplhhpstl............sslt....t.....p...s.......................c..lpulth..................spt.u....thplpAchalDuTs-GDLsshAGssaph.GcEsps...hsps.....ssh........................hh.......phssh..............................................sptht........h.....hph......thssps.tpth..sh..................stsas.tphph..................thhpt.............................hsup..........hsh...................h-hh.........shscuph..psR..cplh......p.hhtslhpal.....t..................Ghtp....splt..................thu.thhlREoRRlhGpaslTppDl.........hs............t.........ppas......D..uluh.usas.lDhH....ss.pth....................tsh..thtsss..........Ys.I........PaRuLlP.ps....hcNLlluG+slSuoHhAhuohRl.ssshshGpAAGsAAA...l...u....lcpst..........s..s.ppl.......s.....hppLpppL .........................................................................................................................DllVlG.uGsuG..h.sAA.l....s..A..A..c..t....G..t..........cV..l.L.l..E..........c..........p..........s...........h..........l......G.......G......t.....h...s....t....t....h..h............................................................................................................h........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 296 586 747 +12669 PF12832 MFS_1_like MFS_1 like family Wood V, Coggill P pcc Pfam-B_20770 (release 24.0) Domain In fungal members this domain is found at the C-terminus of putative transporter proteins. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.31 0.72 -4.19 39 546 2012-10-03 03:33:39 2010-02-26 14:36:54 2 10 360 0 338 2373 283 74.60 24 14.28 CHANGED .hhhLushYFhaFuslGslhPYhulahc.p.hGassppIGhlhsllhhoclluPslauhluD+huptthllhhushhsh ......................................h.lpsh.Y.Fh.a.a.u...sh....Gs..h..h....P..a..h....slah.....c.....p...hG.....hs.ss.p......l.....Ghlh....u.l.h..........h.....h.ch....lu....ss.l.aGhluD+.h.t.pt.thllhhuhhh..s........................... 1 135 186 282 +12670 PF12833 HTH_18 Helix-turn-helix domain Bateman A agb Jackhmmer:Q17ZT7 Domain \N 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild --amino -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.61 0.72 -3.88 512 57814 2012-10-04 14:01:12 2010-02-26 15:54:55 2 267 4045 19 12899 44029 2241 79.60 26 23.35 CHANGED lApthshSt+pLp+hFp.phh.G.hostpalpphRlppApphLh..p.............s...s...h..s.l.pc..lAhphGFsshupFschF+chhGhoPsph...+pp ..............................................lApphshStppL.p...+hF+.p.h......G..ho...htpalp.phRlpp....Ap.ph..L...p...................................s....s.......h...s..l..sc..lAhps..G.a.s.s...s.tFs....+tF+.+h..h..G.hoPspaRp.......................... 0 4087 7939 10539 +12671 PF12834 Phage_int_SAM_2 Integrase_l_N; Phage integrase, N-terminal Coggill P pcc Pfam-B_50 (release 24.0) Domain This is a family of DNA-binding prophage integrases. It is found largely in Proteobacteria. 21.90 21.90 21.90 22.00 21.80 21.70 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.84 0.72 -3.60 22 357 2012-10-02 14:21:04 2010-02-26 16:41:55 2 6 239 0 81 331 30 87.30 33 31.33 CHANGED MscLsh-hKpLA.+.p.s.u.GSapThpsRh+hhpphuccL.ttLshplpssppLKs+HlEuhlpphhupsIottTLpNcMutlRhhhppAGRspls .................................t.Lttphh.tLA.+..p.s.u..GSa.KT..hpDRh+lhpRhsccL...ht.h.sl..p..lpps..pplKs+HIcsYlpphh.s.p.G......luh+TlpN-MuslRslhptsG+ppl.......... 0 14 32 61 +12672 PF12835 Integrase_1 Integrase Coggill P pcc Pfam-B_50 (release 24.0) Domain This is a family of DNA-binding prophage integrases found in Proteobacteria. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild --amino -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.13 0.71 -4.53 21 412 2012-10-02 14:09:14 2010-02-26 16:45:15 2 7 286 0 101 918 314 159.80 31 55.67 CHANGED GluuASRs.GTKhAIss.-capplLspsctpD.pGlAsslpLuRhhGLRocEAVQsspSL+TWppuLtp..G-s+lpVVFG...TKGGRPRcTpll..-p-plhpslppAlthupppsG+LIc..+PsL....+sAhcpa+shspc.uGLsG....pauPHSLRYAaAp-AhpaahppG.hSp+EAhAhsSMDLGHGDGRGcYltpVYs .......................................................................................................hpthhpt..sp....t......p.....c......t......l...A......s....u.....l..p....Lu.+hh...GLRspEu.......lph.......s.........t.......s.h.......t....................................h....p...........spp...p.l...t...l.h.h.G...........TKG..G.Rs....Rps.......lh..........pp..th...h.p.s........l......p...p.....A.......h...........t..........h....s....t........p...p....s.........s.p...L.Is.......c.s.l...................+ps..h..p......h.....a.c..p.t.h.pc.....h....G....l.s.t............t.h..s...s...HuLRauaAQ.c.th....phh..................tt......s..h......s..........................................ht.thtH................................................................................................. 0 14 48 78 +12673 PF12836 HHH_3 Helix-hairpin-helix motif Bateman A agb Jackhmmer:Q182I3 Domain The HhH domain is a short DNA-binding domain [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.84 0.72 -4.03 335 8248 2012-10-03 02:11:09 2010-02-28 11:38:54 2 52 4067 5 1906 6460 471 64.20 37 14.92 CHANGED VNtVGV.....-lNTASssLLp.p.l.u.Gl..s.sslAcsIVpaRcp.p..G.sFpsRcpLh.cV.tlGsKsaEQsAGFLR ..........................tstV...sl.NTA.....Ssp......Lp..p.l...s..G..l..u..tphApsIls.YR-c..p..G.sFps..h..c-Lp...cV..stlG.sK.shEphtshlp................... 0 618 1220 1609 +12674 PF12837 Fer4_6 4Fe-4S binding domain Bateman A agb Jackhmmer:Q180F8 Domain This superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. 25.00 24.00 25.00 24.00 24.90 23.90 hmmbuild --amino -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.38 0.73 -7.55 0.73 -3.87 236 737 2012-10-03 08:56:43 2010-02-28 11:44:55 2 86 681 0 169 11950 1256 25.10 45 8.00 CHANGED hsphc.tspC.h.......p...C..tp..C..hts..CP.....t....t.....sl ........hthD.cpKC.s.......p...C.....h-.....ChcV..CP....E..................... 0 70 132 154 +12675 PF12838 Fer4_7 4Fe-4S dicluster domain Bateman A agb Jackhmmer:Q182B4 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.21 0.72 -3.58 464 18155 2012-10-03 08:56:43 2010-03-01 13:25:32 2 690 4323 11 4999 30460 7729 57.40 30 15.10 CHANGED pC.......h.....s...C......s.....tC..h..ts..C.................................................P.......hp..s.l.p...........h...p................p...............t......................................t..................t...h....................................t...h...p....s...p...p..........C......h.............t................C.........................s.........t...........Chs.........s..CP..p..ss ............................................................pC.l....s...C.....s.....tC..s..ps..C.........................................................................................................................................P............ts...s.....l.p...........................h.p...........................t....................t.................................................................tt....................t...h........................................................p...l....s...........p....p...............C......h.........................t........................C......................................................................G...........t.............Chp............sCPsth.......................................................... 0 1801 3427 4346 +12677 PF12840 HTH_20 Helix-turn-helix domain Bateman A agb Jackhmmer:Q182F8 Domain This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.71 0.72 -4.17 108 9773 2012-10-04 14:01:12 2010-03-01 16:12:49 2 62 2904 43 3329 15478 1487 61.20 27 39.10 CHANGED sphh....ps.....Lus.ssRhpllptL.................ttpshossplucth.....s......hstss.lphHL.ctLpc.uGL.lpspcptc .........................................hhpA.......Luc.PsRh.p.llph..L.........................tpts.hs..s.s.....cl..spth......s...............lspss.lSpHL.phLpc.uGL.lpsc+pG................... 0 1109 2238 2834 +12678 PF12841 YvrJ YvrJ protein family Bateman A agb Jackhmmer:Q189G4 Family This family of short proteins are related to B. subtilis YvrJ protein. None of the members of this family have been functionally characterised. 27.00 27.00 28.10 27.80 24.50 25.70 hmmbuild --amino -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.72 0.72 -4.66 46 323 2010-03-01 17:10:41 2010-03-01 17:10:41 2 2 270 0 80 239 0 37.80 44 70.26 CHANGED phIuNlGFPIsVulYLLlRlEpKl-pLspuIpcLsptl .........hIuNlGFPIsVolYLLhRlEsKL-sLhpuIpcLspt........ 0 33 65 71 +12679 PF12842 DUF3819 Domain of unknown function (DUF3819) Mistry J jm14 Pfam-B_986 (release 24.0) Domain This is an uncharacterised domain that is found on the CCR4-Not complex component Not1. Not1 is a global regulator of transcription that affects genes positively and negatively and is thought to regulate transcription factor TFIID [1]. 21.50 21.50 25.10 24.60 21.10 19.20 hmmbuild --amino -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.89 0.71 -4.51 61 378 2010-03-02 15:41:44 2010-03-02 15:41:44 2 10 282 0 261 393 2 143.20 37 6.88 CHANGED psp.L+pll.hAls+A......l+EllsPVV-RSVsIAshTTcpllpKDFAhEsDEs+l+pAAphMlpsLAuSLAhlTC+EP...L+ts....lpspl.+sl......lp..tssts............h........-phlphlssDNl-LusslIE+sAhE+Alp-I..-ctltsththR.+ppRp .......................p.L+phlp.Al-RA......lpE..ll.p.PVV-RSlpIAshTTcpllpKDFA......h-sDEs+hRpAAppMs+sLuuuhAhlTC+EP...Lphu....lsspl.+sh....ht.tthtts...........phh.......-pththlssDNl-LuCshIcKsAhEKAh.-l..-ctltsphphR+ptR...................................... 0 98 150 222 +12680 PF12843 DUF3820 Protein of unknown function (DUF3820) Mistry J jm14 Pfam-B_72 (release 24.0) Family This a bacterial family that is functionally uncharacterised. 21.10 21.10 21.20 21.40 21.00 21.00 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.25 0.72 -4.36 77 1116 2010-03-03 15:03:50 2010-03-03 15:03:50 2 5 1053 0 236 562 60 64.00 55 65.46 CHANGED hspctLlcLsptpMPFGKYpG+hLhDLPEtYLlWFtpcG.FPpGcLGphLtlhhElKlNGLEsLlcPL ............M-pEpLlclAssh.MPFGKY.......KGRhLlDLPE-YLLWFARK..s.FPtGcLGcL.hplsL.IKhpGLppLlpPL.......................... 0 53 126 190 +12683 PF12844 HTH_19 Helix-turn-helix domain Bateman A agb Jackhmmer:Q180H4 Domain Members of this family contains a DNA-binding helix-turn-helix domain. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild --amino -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.93 0.72 -3.92 149 14559 2012-10-04 14:01:12 2010-03-03 16:22:39 2 171 3576 20 2358 33000 4379 62.70 22 31.97 CHANGED Gcpl+chRppp..sl.o...hpplup.......phth.....p.....pstlsphEps....pp.phs.....sptl.......hplsphhsls.hp....phhpthpp ..............................ppl+plR.p.p.p.......sl.o.......ppclAp...............pl.sh............s................pstlS.p..hEpG..............pp..pss............hppl....................hpls.p.hh.s.ls.hs.......hh.....p........................................................................... 0 826 1594 1964 +12684 PF12845 TBD TBD domain Bateman A, Masci AM agb [1] Family The Tbk1/Ikki binding domain (TBD) is a 40 amino acid domain able to bind kinases, has been found to be essential for poly(I:C)-induced IRF activation [1]. The domain is found in SINTBAD, TANK and NAP1 protein. This domain is predicted to form an a-helix with residues essential for kinase binding clustering on one side [1]. 21.40 21.40 21.70 22.40 20.70 21.20 hmmbuild --amino -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.66 0.72 -4.41 10 157 2010-03-03 16:48:16 2010-03-03 16:48:16 2 2 39 2 71 128 0 55.70 39 12.77 CHANGED .Ltc-RsNlppAahELKEEhuRlphLupsQs-hLpKLshssssspp...QsssPIpCs-+ ..........lhccpsNlppsahELKEEhp+lphLopsQs-hLpKLphssststt......................p.t.ssPlpCp-c..................... 0 3 6 21 +12685 PF12846 AAA_10 AAA-like domain Bateman A agb Jackhmmer:Q180M9 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null --hand HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.84 0.70 -4.98 149 6551 2012-10-05 12:31:09 2010-03-04 08:58:08 2 39 2755 0 1413 9633 1186 297.10 16 39.53 CHANGED ssshhlhGtsGuGKSshhp.thh..hthhttG............................splhll......Dht.scats.....hsph.......................hsuphlths...........hsss.........htlN.....Ph.....pht.........................................................hppptpthhshLtthhpths.stpt.................pshlsps..lptha...................ptths...............................................................................................................................................sls-lhch..lpp...t..........................................ptphpph.tp.........tlpthh..........sthutlFs....t.oshph..........ssplls..hclsslpts.............tphhshhhhhhhshhh..pphht.............................t...........ptppphlh...........lDEs.athhss.......s....thhphlpphhRps.R+hssshhhsoQsh.sDhhs...t.............pslhstsshthhhptss..pphtt......ltphh...sls.s ...............................................................................................................................................................................................................................................phhlhGsoGu.G.K..o.h.....hhp..hlh.......tt..h..h.h.h..s......................................................................sph.hhh.......D.....t.....sp.h.tt.................hsph........................................................hsuphh.p..h.s...............................ssp...................hthN.............Ph..........................th.................................................................................................tpphph....h..h.....s..h..l...t...h..h.h.t...htstthpt...........................tshl....p..ps.......lpth.h...........................p............................................................................................................................................................................slpp..lh..ph.....lp..pt..................................................................p.thtph..tt........................................................tl.p.hh..................st.ht..h..las................................ttss.h.ph...................psph..hs.......h..-..l...s...p..ltpp..........................................thhs.h.s...h...h..h...l..h.ph.hh.....pphht.................................................................psp.thlh...........lDEs.ath.lps....................t.........htp.h...lt.ph...h..+ps.R....K.hssth.hhsTQ...s...h...sD.lh.tt..................pslhpss...s.hh.lh.h.pp.sp..tp.t.....h.t.h......t.......................................................................................................................................................................................... 0 450 933 1222 +12686 PF12847 Methyltransf_18 Methyltransferase domain Bateman A agb Jackhmmer:Q18BX6 Domain Protein in this family function as methyltransferases. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.66 0.71 -3.49 195 8520 2012-10-10 17:06:42 2010-03-04 15:04:54 2 133 3477 116 2843 74852 20479 109.50 19 36.69 CHANGED sst..pl.lDlGsGsG..phs...lthsp..........stp..lhul.........Dh......stphlphsc..........................pps.........................................tht....splp.....hhp....................uch........hsthst....................................hDh..............lhhss...............hhph......................lp..phh....ptLp.................................s.uGh........lllssh ................................................tpl..lDlG.s.G...s...G......p..h.s......l..th..sc.................................t.u.tc.......lhu..l.....................................Dh.........................u..s...t...h...l...p..h...s.c...........................................................................................................................pph........................................................................................................t......hths.......sp.lp................h.lp...........................................................u.-h..........t.h.s.h.h.tp..........................................................................................hD.h............................l..hh.st.....................................................hhsh....................h...........................lp.....t.hh...............ptLp................................................s...u.Gh...llh...hh.......................................................................................................................................................................................................................... 0 981 1879 2441 +12687 PF12848 ABC_tran_2 ABC transporter Mistry J jm14 Pfam-B_38684 Domain This domain is related to Pfam:PF00005. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -9.97 0.72 -4.14 246 15857 2012-10-05 12:31:09 2010-03-04 15:58:16 2 25 4624 0 3551 11380 1770 86.00 24 14.83 CHANGED l-l-puclp.tYpG.NYspahpp+ppchptptptacpQQcclp+hppaIcRh+.s...pu........s+up....pAp.SRhKtL-+h....-p...lcts.....p.c....tt..p..hpF ......................h-l-pGcl.p.Y.....p.....G...NYspahcp+ppph...pppt.......p.p....pcppcplpc........p.sa.......lc..+ht.t.........................................u+sp........pu..p...uR....h+tl-....+h........cp....hp.................................................................................. 0 1158 2264 2991 +12688 PF12849 PBP_like_2 PBP superfamily domain Bateman A agb Jackhmmer:Q180B2 Family This domain belongs to the periplasmic binding protein superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.99 0.70 -4.94 148 7526 2012-10-03 15:33:52 2010-03-05 19:02:32 2 29 4026 39 1757 5461 2312 251.70 21 78.69 CHANGED sssssstsh................................................lhsssooshtshhh....p.hhts..............Fppp.ssh..............................p...........lp................h...tusGoupulpthppG....................................sDlshss..t....thscpch..ts.......................hshpt..........hslshssllllss.ts........................ssst.t.....hsspsltpIht.t............................................................................sstslthhh..RucsSG.....h.sphtpt.............................................t.ht...................ttsh...................................................stsht.s................ps.sGhhsslshssth....t.........shshss.....tshhhsh................ts.hph..............................t.....hhs.........spt.....GsaPlhpsh...hhlhhssthh..........tstsps.......Fhsahh.Ssc ................................................................................................................tst.........................t....................lpss.G.o.....osh.t.s..lhp..t..hspt.......................................atp.p...s.s.s................................................p.................ls................................h......pusG...S.us.Glp.p.h.h.sGs...................................................................sDhusus.....t.......................shpp....p..ch.....................................................thhp..h...l.........uhs.........u.....lslslN..hs.................................sslp..t.......Lo.t.pp.lt....p..I....a...t..Gc.....................l........................................ppWpp.........................................h.thss.ppIt.llp....R.ss.s...S.G..TpthF.pphlhp.t...........................................................................................................................................t.ht..........................................t...............ssshtth.........l............pp..suulua.lphu..h...........................................h......................t............................................tpthph.....................................................hts.....................h.tpsht..sssa.P..l..hp.h...ahhsp.p.p...................sttspp.....Flcahh.s..t...................................................................................................................................................................................................................... 0 550 1144 1508 +12689 PF12850 Metallophos_2 Calcineurin-like phosphoesterase superfamily domain Bateman A agb Jackhmmer:Q180F1 Domain Members of this family are part of the Calcineurin-like phosphoesterase superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.18 0.71 -4.39 139 12839 2012-10-02 19:15:56 2010-03-08 14:03:49 2 60 4473 32 3683 17996 3872 206.20 16 75.47 CHANGED M+lullSDoHs..................h.c............h...p....................pll.c..th.....p..p....s-hllHsGD...........................h.....s.............s..............p..l....hp.h.........pp.h...s........lhsVpGNs.............................................D........................tt......h....s.pph................................................................hhpl.s......................sh...................ph.hhsHG.........pt......ht......................................................................h..........ht....p..t.......lh....ttsp.pt.sscll.hhGHoHtsth........................p..p......hs..s...hlhlNPGSsshs+.............ht.........t...ohsllclpss .......................................................................................................................+lhhlSDl...Hh...............................................s.h..p......................t.......h...p................................................p.ll...p..th...............p....p...............sD..h.l.lhs.GD.............................................................l....hs..............................t.........................................................................pph...........hc.tl.................pph..........s.............lh.h.l....t..G..N..c.................................................................................................................D.............................................t..........h...t.......h.....................................................................................................................................................h.......h..p.h.t..........................................st.........................................p.l...h..l..s.H...u..............p........h.s.........................................................................................................................................................................................................ht........................h.s.....p....t....................h.......t.hht....p......t.....s....s....c.....h..h.....l......h..GHs..H....t....s..th.......................................................................h............t.t............h..h...h........s....G...s..................................................t....................................................................................................................................................................................................................................... 0 1293 2430 3172 +12690 PF12851 Tet_JBP TET_DSBH; Oxygenase domain of the 2OGFeDO superfamily Bateman A, Zenonos ZA, Iyer LM, Aravind L pcc Manual Domain A double-stranded beta helix (DSBH) fold domain of the 2-oxoglutarate (2OG)-Fe(II)-dependent dioxygenase (2OGFeDO) superfamily found in various eukaryotes, bacteria and bacteriophages [1]. Members of this family catalyze nucleic acid modifications, such as thymidine hydroxylation during base J synthesis in kinetoplastids [2], and the conversion of 5 methyl-cytosine (5-mC) to 5-hydroxymethyl-cytosine (hmC) [3], or further oxidation to 5-formylcytosine (5fC) and 5-carboxylcytosine (5caC) [4]. Metazoan TET proteins contain a cysteine-rich region inserted into the core of the DSBH fold. Vertebrate TET proteins are oncogenes that are mutated in various myeloid cancers [5]. Fungal and algal versions of this family are linked to a predicted transposase and show lineage-specific expansions [1]. 26.00 26.00 26.00 26.00 25.90 25.30 hmmbuild --amino -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.99 0.71 -4.97 25 327 2012-10-10 13:59:34 2010-03-09 14:18:29 2 10 97 0 200 301 87 374.70 27 38.59 CHANGED uhshhhsshhpsp............h.s.hht.t.p.......htphlpphsshlsslhctlsPphapttsphhsshh................hh.hhupsaoshols...............psptst.H+Dptsh.tshslhsshtts.........hcGGtltlsp........................................................................................................................................................................................................................................................................................................................................................................................................................................................huhslsspsGslllhsuphhhHussslps..................................................................stRholVaah+c ...........................................................................................................................................................................................................................................................................................hYashpt.sp.......p..sR+hch.hpp.tt...................................lcpphpplushluslYcpluPptaps.Q.h.phpphs..c..scl..............sh.p.GpPFSGlTss...............hcFss+sH+Dh+shts.G.hol.lsTLscpp..............p--phal..LPh..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hGluls.spGolll.su+h.hHusTs.lc.p............................................................s...p..pspRlolVaY.+c........................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 64 106 140 +12691 PF12852 Cupin_6 Cupin Mistry J jm14 Pfam-B_918 (release 24.0) Domain This is a family of bacterial and eukaryotic proteins that belong to the Cupin superfamily. Some of the proteins in this family are annotated as being members of the AraC family of transcription factors, in which case this domain corresponds to the ligand binding domain. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild --amino -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.12 0.71 -4.68 212 1675 2012-10-10 13:59:34 2010-03-10 15:02:53 2 5 1105 0 494 1685 112 171.80 25 57.25 CHANGED DsLSc.lLstl+hpushhhpsphsusWul....ph....s...ss...............hphahlhcGpshlp..ss....t.....s....lpLss.GDllllPps...p....sahl....s...ssss..................................................................ts.....................s.s...............................stllsGphphcssts....p.LlssLPsllhl...t...............t..tshlsshlphlttEstpspsGpphllsRLh-lLhlpsLRtalp .........................................................................................................DsLoc.LLphhp...pusl..tps..h.su....sWtl......th......sst.................t..s........hpaH.h.......lhpGs.shLp..hss..................s......p.....hpLpsG-ll.llPps...s..sHpL.....s...ssss..................................................................................................p..............................................................sspllCGphphp.ps.st.....t.llsuLPchlhl........................ss.tshLpth.lthLtpE........upp.....s.t.sGsssll.spLssslhshslRtal..................................................................................................... 0 123 263 390 +12692 PF12853 NADH_u_ox_C C-terminal of NADH-ubiquinone oxidoreductase 21 kDa subunit Coggill P pcc manual Domain This family is the C-terminal domain of NADH-ubiquinone oxidoreductase 21 kDa subunits from fungi. 25.00 25.00 49.60 48.50 20.50 19.90 hmmbuild --amino -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.13 0.72 -4.28 10 102 2010-03-10 16:33:19 2010-03-10 16:33:19 2 2 99 0 80 97 0 77.00 57 40.60 CHANGED hhEhhcKlpcGcs.Y..GpSpLsshhQslAsRNSpYStLhlaVlPWFNhsNHs.HGVDhsKYYpptEtEhE................Acpst+tt.p ............M+EMVcKlKcGcPLY..GpSpLosahQGVAARNSRYStLFhpllPWFNhVNHsQHGVDTAKYYpQAEcELE...........................sEp......... 0 20 44 68 +12693 PF12854 PPR_1 PPR repeat Wood V, Coggill P pcc Pfam-B_105542 (release 24.0) Repeat This family matches additional variants of the PPR repeat that were not captured by the model for Pfam:PF01535. The exact function is not known. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.49 0.72 -4.47 433 3378 2012-10-11 20:01:03 2010-03-11 14:22:50 2 1621 145 0 2261 22733 138 33.10 30 7.47 CHANGED pGhpPsslTYssLIsGhC+sGclccAhclhccMp ..........................shtPssloY.....ssL.ls.u.h..C..+...s...G....c.lccA.hc.lhcpM................... 0 277 1399 1896 +12694 PF12855 Ecl1 Life-span regulatory factor Wood V, Coggill P pcc Pfam-B_42039 (release 24.0) Family This family is involved in the chronological life-span of S. cerevisiae. Over-expression leads to an extended viability of wild-type strains, indicating a role in regulation. 26.40 26.40 26.50 26.40 26.10 26.30 hmmbuild --amino -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.91 0.72 -4.66 23 165 2010-03-11 15:50:29 2010-03-11 15:50:29 2 2 112 0 132 156 0 43.20 35 21.05 CHANGED Mu....AFssYChsC-+lh........................sssplYCS-pC+ppDpppshsttp ..............F.paChsC-+Qh.....................................ssstlYCSEsCRhtD.pps......t............ 0 28 66 113 +12695 PF12856 Apc9 Anaphase-promoting complex subunit 9 Coggill P pcc manual Family Apc9 is one of the subunits of the anaphase-promoting complex, or cyclosome [1], which is essential for regulating entry into anaphase and exit from mitosis. The APC is a ubiquitin-protein ligase complex. All APC subunits are members of the cullin family proteins, which bind to a ring-finger subunit via a conserved cullin domain [2]. The APC is made up of four parts, the third of which is a tetratricopeptide repeat arm (TPR) that contains Apc9 [3]. 25.00 25.00 28.70 26.80 24.30 24.30 hmmbuild --amino -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.73 0.72 -3.92 15 42 2010-03-12 10:19:30 2010-03-12 10:19:30 2 2 40 0 25 35 0 117.30 28 33.36 CHANGED ssslcs+lhslppscKpt..............................h.tpstp.spYDYulF.scpss.............l+ESpIcuapsuE+hspsllFcps........s.psD.....s..............pssc.ctsp..........tptshl...slPGYTp ...........................................................h....hsh.hslppsph.t..............................slpc...tp.tppYDYSsF.sccss.............l+ES+Icua.pAE+ss+sLlFc+st..................................S.csDh................s..........ppssc.-psp............tssshll..SlPGhsp.................................. 0 4 14 24 +12696 PF12857 TOBE_3 TOBE-like domain Mistry J jm14 Pfam-B_1384 (release 24.0) Domain The TOBE domain [1] (Transport-associated OB) always occurs as a dimer as the C-terminal strand of each domain is supplied by the partner. Probably involved in the recognition of small ligands such as molybdenum (eg Swiss:P46930) and sulfate (Swiss:P16676). Found in ABC transporters immediately after the ATPase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.57 0.72 -4.11 101 1134 2012-10-03 20:18:03 2010-03-12 10:22:48 2 2 1112 0 221 704 35 57.20 31 16.08 CHANGED sssp...sslsu.plpplp.shGshs+l..El...phh....ssp.........hlElclspcp......t.thht.GpplhlpP+ ...........ps..hsuslPs.pVlcssPhGthspl...l......psh.....hhs-..........lpl.hst-c........s.pt.G-plals................ 0 46 110 166 +12698 PF12859 Apc1 Anaphase-promoting complex subunit 1 Coggill P pcc manual Family Apc1 is the largest of the subunits of the anaphase-promoting complex or cyclosome. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1,2]. Infection of human fibroblasts with human cytomegalovirus (HCMV) leads to cell cycle dysregulation, which is associated with the inactivation of the anaphase-promoting complex [3]. 25.00 25.00 27.00 26.70 20.50 20.20 hmmbuild --amino -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.48 0.72 -3.81 57 240 2010-03-12 11:41:32 2010-03-12 11:41:32 2 3 221 0 182 249 0 101.60 27 6.01 CHANGED csLsVlh..psphplahh...s........GcsaslsLPFp.Vsssashsp..GLlLpRchs...................ssphtttt.............................................................................sshs+l.aoL...scPLsEhs.Vshps ................................slsllh..psphplahh...s.........GcsallsLPFp.Vsslass.th..GLLlpRpts........................................spphs.sss............................................................................................................p..........pshPph.aoL...hcPLsEhs.llhp....................................................................................................................................... 0 57 95 148 +12699 PF12860 PAS_7 PAS fold Mistry J jm14 Pfam-B_21375 (release 24.0) Domain The PAS fold corresponds to the structural domain that has previously been defined as PAS and PAC motifs [4]. The PAS fold appears in archaea, eubacteria and eukarya. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.19 0.71 -4.08 164 2444 2012-10-04 01:10:46 2010-03-12 13:50:07 2 265 808 0 913 2460 395 113.00 23 15.73 CHANGED LcphsQGlslaDs-h+LlsaNp+atclaslPsphl.psGts.hpcllca...spcGphu....sc......h-th....lppcl....p.thpp...tpsphhE+pp.ssGpllclpusPh....Ps.GGaVsoaoDITp..p+cu-p .....................l-plspGlslaD.sctRLlhhNppa.t.cl.a..s.l..s..s.shl.........p.s.G..hs...hp.p.l.lc.ht....tppu.hhs.........tp........................hpph..........h.pph.h................t..t.hpp.....sps...p..p..h..p...h.p.h..sD.G..+............h..l..c....l..psp..s.h............s...s.....G......G......h....V.....h....s.ap.DlTphpptc............................................ 0 225 493 644 +12700 PF12861 zf-Apc11 Anaphase-promoting complex subunit 11 RING-H2 finger Coggill P pcc manual Family Apc11 is one of the subunits of the anaphase-promoting complex or cyclosome [1]. The APC subunits are cullin family proteins with ubiquitin ligase activity [2]. Polyubiquitination marks proteins for degradation by the 26S proteasome and is carried out by a cascade of enzymes that includes ubiquitin-activating enzymes (E1s), ubiquitin-conjugating enzymes (E2s), and ubiquitin ligases (E3s). Apc11 acts as an E3 enzyme and is responsible for recruiting E2s to the APC and for mediating the subsequent transfer of ubiquitin to APC substrates in vivo. In Saccharomyces cerevisiae this RING-H2 finger protein defines the minimal ubiquitin ligase activity of the APC, and the integrity of the RING-H2 finger is essential for budding yeast cell viability [3]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild --amino -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.35 0.72 -4.15 5 247 2012-10-03 15:03:13 2010-03-12 14:30:41 2 9 213 0 172 3252 290 81.50 49 56.19 CHANGED MKVKIpEW+uVATWpWDlPs.................DDVCGICRVuFDGTCPsCKaPGDsCPLVlGp.CsHsFHhHCIh+WLcspoSKGLCPMCRQoFphp- .........................................................MKVpIp.p...Wpu.....VAsW.pW.t..hsp........................................D-sCG..IC..R..hs.....F.....-.....G..s......C.....P......s..C..............K........h........P........G........D.......D.............C..........P....L......l..h.....G............p........C............s..Hs......FHhHCIh+W...l.p....p...p......p.....s....p....t........CPMCR.ppaph........................... 0 56 98 141 +12701 PF12862 Apc5 Anaphase-promoting complex subunit 5 Coggill P pcc JCSG Domain Apc5 is a subunit of the anaphase-promoting complex/cyclosome (APC/C) which is a multi-subunit ubiquitin ligase that mediates the proteolysis of cell cycle proteins in mitosis and G1. Apc5, although it does not harbour a classical RNA binding domain, Apc5 binds the poly(A) binding protein (PABP), which directly binds the internal ribosome entry site (IRES) of growth factor 2 mRNA. PABP was found to enhance IRES-mediated translation, whereas Apc5 over-expression counteracted this effect. In addition to its association with the APC/C complex, Apc5 binds much heavier complexes and co-sediments with the ribosomal fraction [1,2]. The N-terminus of Afi1 serves to stabilise the union between Apc4 and Apc5, both of which lie towards the bottom-front of the APC [3]. This region of the Apc5 member proteins carries a TPR-like motif. 21.00 3.90 21.00 4.60 20.90 -999999.99 hmmbuild --amino -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.05 0.72 -4.15 26 461 2012-10-11 20:01:03 2010-03-12 16:08:58 2 64 292 0 278 1604 191 89.20 21 16.11 CHANGED aalpYLpulpsp-Y.sAl-sLHpYF...DY..hhppsscs............................................tYpaA...LLsLAhLHspF...GctccAlpAlpEAlplARENpDpssLsahLsWlhphhcpp .........................................................................................................................................................................h..t.ph.tuhp.Lhphh.....Dh.......t.tpt.........................................................................................hp.h.A......hl.s..L.A..t.l.ps.ph...Gph..ppA......hts....lpEAl...pl....Ap..c....sp....D.......s.Lthhh.hh.......t.............................................................................................. 0 95 155 224 +12702 PF12863 DUF3821 Domain of unknown function (DUF3821) Coggill P pcc manual Domain This is a domain largely confined to sequences from Methanomicrobiales found on putative lipases. The function is not known. 20.80 20.80 21.00 30.30 20.50 19.40 hmmbuild --amino -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -11.47 0.70 -4.66 20 31 2010-03-15 09:40:29 2010-03-15 09:40:29 2 7 7 0 23 31 0 210.70 28 29.50 CHANGED sGusVFlGEpGLDlosshs......ssspIuWassuss................................ssssPsphhslss..spsFaVsPshFssps....GsWYthssp.t.........sssFpVc-PslulclhDtsssp-lo..spslspGshlsFpI..cTNLtuh...tpRsusssss....lsIplp........sPsGssassLhss.sGssps................Lssls.....Vss.....................ss.tss..suWsTush.ssust.Y.psGsYslhAcsslNsht- ..GssVFlGEpGLDlosshs.......ssspIuWassGsss...............................ssssPsphhslss..sssFa.VsPshFsscs.....GsWYthsss.t..............ssuFhVp-Pslulcla.Dhs...s..s...pDlo..spplspGshlsF+l..-oNL.ul...hpRsusssss....lsIpVp........sPsGsshosLhss.sussts...........lssls.....lss......hah................tt..suasossh.spssp.Y.psGsYpVhAcsslNsMtD.......................................... 0 11 23 23 +12703 PF12864 DUF3822 Protein of unknown function (DUF3822) Coggill P pcc JCSG_pdb_3hrg Family This is a family of uncharacterised bacterial proteins. However, structural-similarity searches indicate the family takes on an actin-like ATPase fold. 20.40 20.40 21.60 21.00 19.90 19.30 hmmbuild --amino -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.50 0.70 -5.15 46 193 2010-03-15 14:40:07 2010-03-15 14:40:07 2 1 190 1 54 196 147 238.40 24 89.79 CHANGED Mt.t................phshpptp.hp....LSIplshcGhSFsl.....hss.spphhhhpphphp....sstpltpplcphhcpp.phLppsacpVpllhssshaThVPpsLF.-ccphssYLpa..s..hhps-hltach.ltp.shssVash.pslpsalhcpass...hcahHpsosllcthh..pts.pstppcplalplpcpph-lhlhcppcLlhhNsFp...hpo.......scDhlYYlLashcQLsls.-psplhlhGp..l..sc..csphaphlppYl+plp ................................................h................s..pppthp..LoIclutsuhSFsl.......hss.ppp..hhhpphphp....sshsl.tssLcphhppp.phLpp.s..acclpllhsss.hsllPhph.F.cc..cpt..pphhpa..shptpps-hlhhsh.Ltp.sssslashscslpphlp-pasp...hcahptsssllphhh...ppu.ttuspcclYsphcccph-lhsapps+LlhsNoFp...hps.......spDhlYYlLalacQLshst-cscLhLsGp..l....sc...c-plhppLcpalcpl............................. 1 22 47 54 +12705 PF12866 DUF3823 Protein of unknown function (DUF3823) Coggill P pcc JCSG_target_3hn5 Family This is a family of uncharacterised proteins from Bacteroidetes. It has characteristic DN and DR sequence-motifs. The function is not known. 21.80 21.80 21.80 22.20 21.70 21.70 hmmbuild --amino -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.61 0.70 -4.84 28 92 2012-10-02 19:08:27 2010-03-15 16:51:00 2 1 54 3 14 85 2 225.90 25 92.24 CHANGED hllhhhhs...hsuCp...h.DNY-.tPcutlpG+l..hhsGEslthch....s..usplplappG..a.s...ph..sshslhlppDGoa.sshlFsGsY+llhhpGsh..............stDolp...lslp.Gs.sphDhcVpPYhhl..cssshshsGsplsAoFplppssss.....slccltlalspophVs............s..thsptt.shsshhs.sp.hohplslsps...............pshhasRlGspsssstph..YSpll+l .....h.hlhhhhhhsSCp...h.DN.YD.tPp..uslpGcl..hhsGEhlthch....s....usplplhphG..au....pp.....ss.shhlppDGoa.sshlFsGsYclhh.pGsh.............hstDTlp...lplp.Gs..sph-hcVpP...Yhpl..pssphph.s...u...splsApaplppssss..................slpcltlalspsphVs....................spphspts.p.hssths.ss.hshphs...lsss....h.s..........tpphasRlGspss.s....s...p....ph.....YStshcl................... 0 6 13 14 +12706 PF12867 DinB_2 DinB superfamily Bateman A agb Jackhmmer:P72629 Domain The DinB family are an uncharacterised family of potential enzymes. The structure of these proteins is composed of a four helix bundle [1]. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild --amino -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.64 0.71 -3.79 220 3722 2012-10-02 14:44:17 2010-03-16 10:45:46 2 34 1437 16 1256 5855 770 137.90 14 54.56 CHANGED th..ppsh.p...phhph..lp....sl.....s.p......pphphp..s...................................s.sst................slt.hltHlsh.....spp..hhh....tp.ht................................st.................s.....................................h........tht.ht.h.......................................s...........shs.pl........h...phhp.phppphhphlps..h....st..tp....h...pp.h.....hh.....................................................t.h......s........htphlth.hhtHptpHhtpl ..........................................................................t.th.ph..hp....sl.....s..p.......pp..ht.hp...s..............................................................................t..stt......................................olt.hlsH.l.sh........hpc...hhh....hp.ht.....................................s.p...............t.s..........................................................h........t.htp.t.p.h.............................................shp..pl...........h...p.h...h....p.p.s....p.pph...h...p...hl....ps...h.........st....tp.......h.....tp.h.......................................................................................................................h.............s..............h.t.t.h.l.h.h..hhtHptpHhtph.................................................................................................................................................... 0 533 952 1146 +12707 PF12868 DUF3824 Domain of unknwon function (DUF3824) Coggill P pcc manual Domain This is a repeating domain found in fungal proteins. It is proline-rich, and the function is not known. 22.70 7.90 22.90 7.90 22.00 7.80 hmmbuild --amino -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.29 0.71 -3.24 11 83 2010-03-16 14:05:24 2010-03-16 14:05:24 2 6 42 0 63 89 0 114.80 25 27.75 CHANGED +pRSRoRDLApAALAAsG.lGYAAHKYo.QRp-RKKt-+ER-+.+aDcD.hppSYt-sY.sPhPYssoP........sssspYYPpoNaFPPPPGosPs..ssssstP.sYNPADYPP.......P..PuAs.sP.psYsY....Pss..PusDsYAPRPRRADENV ......................+SR...sRp........huthul..AAAG....hG...hA..A..tp..hp..p++-+K.........cp-..+cR...ccpcpccc.hppuhpp.s..h..ss.ts..ssss...............st.t.h...st....ssh.as.PPs.t........................................................................................................................................................ 0 9 22 47 +12708 PF12869 tRNA_anti-like tRNA_anti-like Coggill P pcc JCSG_target_390051_3f1z Family This is a family of bacterial, archeael and viral proteins that is related to the tRNA_anti family Pfam:PF01336. The major characteristic of families like tRNA_anti is their OB-fold, and many of them bind DNA. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.93 0.71 -4.71 25 205 2012-10-03 20:18:03 2010-03-16 14:53:54 2 8 185 10 48 201 46 146.90 15 77.05 CHANGED Mt............pphlhhhlhl............hhluhlhs...............................sh.spsphsstptpssh...............shsu.ppLhspapcNpspAsppYh.....sKhIpVpGslssI.pc.tttsss..llhtpp.ts.sslhCphsssp.............ts..lppGpplplpGhspGht...........hsslhLcssh .....................................................................................................................................................................h................hh.hhh...............................................tttt..h..............................................hss.s....plhpsa.p.p.Nt.h.......sA.......s......p.pY.p......sKhlpl.s.G.pl....t..sl..pp..ttssph......hsh.h.s....s..t..t..s........h..sl....p.sthsssp....................t..t.......lppGpplslpGhhpsh.....................h............................................... 1 17 31 40 +12709 PF12870 Lumazine_bd Lumazine-binding domain Coggill P pcc JCSG_target_391926_3k7c Family This is a family of putative lipoproteins from bacteria. Many members of the family are defined as having a lumazine-binding domain. Lumazine is a fluorescent accessory protein having 6,7-dimethyl-8-(1'-D-ribityl) lumazine (DMRL) as its authentic chromophore; it modulates the emission of bacterial luciferase to shorter wavelengths with increasing luminous strength. The family is related to the NTF2-like transpeptidase family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.32 0.72 -4.02 26 650 2012-10-03 02:27:24 2010-03-16 16:51:30 2 9 571 4 86 427 52 96.70 23 43.48 CHANGED suC..ssssPpps....scpahcsltcGchcchhchhs.pspp..........th.thhcthspphppphpphtuhc.hcl.ppp.....sspApVplpssatsupppppshpllK.pc.spWplp ...................................................hs...............................t...p.tth.t....................................................................pl....cp.l...cVh-+s.........sDsAcl..+Vch...s......h........s......s.....s........s..s..p......p......p..p.......l.........L...+...c-...upWcl.h............................ 0 35 67 79 +12710 PF12871 PRP38_assoc Pre-mRNA-splicing factor 38-associated hydrophilic C-term Coggill P pcc manual Domain This domain is a hydrophilic region found at the C-terminus of plant and metazoan pre-mRNA-splicing factor 38 proteins. The function is not known. 22.40 22.40 22.50 22.50 22.30 22.30 hmmbuild --amino -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.35 0.72 -11.34 0.72 -3.46 72 139 2010-03-16 17:36:41 2010-03-16 17:36:41 2 24 85 0 82 113 0 102.60 25 19.68 CHANGED s+sssh......c...pc.h..p-tp..ps..p..-...cpt.ccs....s......p......cs....s..ts.hs..ttp............p.s.s..cppsRspp.........................ps..pc.p.ppp...pp...p+p.cs.......cs..pp..R..p+spt......p....pc+....-+....p...Rp.+..sRs...R.......sRsR.D..Rpcc ............................................................................h..shc...ts.hpp.ppt..p..-....-pt.pp................ps....s....s...sp..htp...........................p...tspsRpps...........................................................ph+p.ptp.s.......cp.......sRc..Rs..................+s......pcR.c+sp.......p.....+-R....-R....s.Rc...+.cRc...R...sR.sR.DRcc.t........................ 0 19 37 59 +12711 PF12872 OST-HTH OST_LOTUS; OST-HTH/LOTUS domain Bateman A agb Aravind L Domain A predicted RNA-binding domain found in insect Oskar and vertebrate TDRD5/TDRD7 proteins that nucleate or organize structurally related ribonucleoprotein (RNP) complexes, the polar granule and nuage, is poorly understood [1][2]. The domain adopts the winged helix-turn- helix fold and bind RNA with a potential specificity for dsRNA [1].In eukaryotes this domain is often combined in the same polypeptide with protein-protein- or lipid- interaction domains that might play a role in anchoring these proteins to specific cytoskeletal structures. Thus, proteins with this domain might have a key role in the recognition and localization of dsRNA, including miRNAs, rasiRNAs and piRNAs hybridized to their targets. In other cases, this domain is fused to ubiquitin-binding, E3 ligase and ubiquitin-like domains indicating a previously under-appreciated role for ubiquitination in regulating the assembly and stability of nuage-like RNP complexes. Both bacteria and eukaryotes encode a conserved family of proteins that combines this predicted RNA-binding domain with a previously uncharacterized RNAse domain belonging to the superfamily that includes the 5'->3' nucleases, PIN and NYN domains [1]. 25.00 11.50 25.00 11.60 24.90 11.40 hmmbuild --amino -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.54 0.72 -4.14 234 1690 2012-10-04 14:01:12 2010-03-17 11:05:37 2 67 816 6 797 1608 60 71.30 17 19.68 CHANGED pt.ppltphlpphl..tpspcp....sG.hspluplssphppph..ssFcs+sY..........GappLscLl.p..s.hsch.hcl....ppttssphhl ......................h....thtp.lhpll...t..pttst........t.hh.lupltsphpcph..shs.psY..........GapplpcLl.p....u..hssl..hcl.....tpttt.....hh....................... 0 219 388 594 +12712 PF12873 DUF3825 Domain of unknown function (DUF3825) Aravind L agb Aravind L Domain Potential uncharacterized enzymatic domain associated with bacterial Pfam:PF12872 domains. Has conserved residues suggestive of an enzymatic role probably related to RNA metabolism. 27.00 27.00 30.20 29.50 23.50 21.40 hmmbuild --amino -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.72 0.70 -5.22 20 92 2010-03-17 11:31:21 2010-03-17 11:31:21 2 5 85 0 15 78 3 224.80 31 63.68 CHANGED ppLuplAts.E.........sWthsps........pshsILcsYlphTFpRlhcpsp..............hhhssssspusFNTGLlopptcsIaAlFppsp.........................................tsttsWhhpuFs............tsptphhhpshsplP.shusYac.csp-llF..Dsc.tclpls...h-HI..hs-ph..-RhPsphppht....................................cptltshlpuAlchstp+spRNY+hAVPpaYp.......s+lphLLPlpL...spssps-hALslp+..scph....YcupTILTL.chAYpNARLls+P.-scW ....................................................................LtplA....E...Wt.tss...............ILhsYlhthFpplhcppp..............hhhstcps.hAsFNTGLhopth.psIauhFpppp...........................................pttahFpt....Fh.............pptt.hh....plP.phusY..s...t..p-..llF......Dsc...hshpls...h.Hl..hppp...cRlPphlpphs...........................................cphltphlpuslc.......hhtcphphs.+hslPtaYp.......p+l.lhlPLpL...........ppppsDhslVlc+....ppst....YtspTILs..c.sYpsARllshs.-spW................ 0 7 11 15 +12713 PF12874 zf-met Zinc-finger of C2H2 type Coggill P pcc manual Domain This is a zinc-finger domain with the CxxCx(12)Hx(6)H motif, found in multiple copies in a wide range of proteins from plants to metazoans. Some member proteins, particularly those from plants, are annotated as being RNA-binding. 21.10 13.30 21.10 13.30 21.00 13.20 hmmbuild --amino -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.71 0.73 -7.09 0.73 -3.60 388 5348 2012-10-03 11:22:52 2010-03-17 11:37:56 2 907 368 3 3248 16868 203 24.20 29 5.85 CHANGED hh..CplC.phphsspsshpsHhpu++H ............h.CplC..phphsopsp.hptHhpuppH............ 0 719 1232 2244 +12714 PF12875 DUF3826 Protein of unknown function (DUF3826) Coggill P pcc JCSG_target_393061_3g6i Family This is a putative sugar-binding family. 25.00 25.00 170.90 28.90 20.00 19.80 hmmbuild --amino -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.20 0.71 -4.60 7 68 2010-03-17 11:40:38 2010-03-17 11:40:38 2 2 39 2 14 57 0 174.70 46 54.17 CHANGED tYlcsl.pRupKIVspLslsDssttppVsplIAN+YhcLsDIaptRDs+lKtlKcsh.LststpppAlcttphctDAsLh+sHhpa.ApLShhLs-cQI-tVKDGMTYGsl.lTYcuh.DhlPoLTEEEKtpIatWLsEAREaAMDAcsScKKHAhFsKYKGRINNYLuK+GYDLpKEpc-WhcRhcu ........t.stSl.p+utcIVspLtLsDsptuppVtslIsN+ahclpDhapt+st..ppl.Euh..ssstt.ptLctt.htcsAthhcsHhta.ssLshhLs-cQIEtlKDtMThGhVthThcua.-hlPsLpEEE+upIhtaLhEAREhAlDhcNspph+AhFsKYKG+INNYLsc+Ghsh+pttc-ahc+hKA..... 0 9 14 14 +12715 PF12876 Cellulase-like Sugar-binding cellulase-like Coggill P pcc JCSG_target_394744_3gyc Family This is a putative cellulase family. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild --amino -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.91 0.72 -3.54 18 341 2012-10-03 05:44:19 2010-03-17 13:25:12 2 18 274 2 102 613 180 89.30 29 19.74 CHANGED llspatp-sRIlhWDLaNE.Ps...s...............h.s.ttsst...hct.hthLppshthhRslcPspPlTuGh......sss..........shss......hpplp..hpsl.DlIoaHsY ...........lspatsDsRlLhWDLaNE.Ps..s.t..............................t....ptst........hp...t....s....tp.Llppsh.pasR.slcP.s.Q.P.l.T.u..G.s..................Wps..................phss...................ls.thp......h.p.ps..Dl..loaHsY............................................. 0 39 77 90 +12716 PF12877 DUF3827 Domain of unknown function (DUF3827) Zenonos ZA, Mistry J zz2 jackhmmer:Q9HCM3 Family This family contains the Swiss:Q9HCM3 protein which has been found to be fused fused to BRAF gene in many cases of pilocytic astrocytomas. The fusion is due mainly to a tandem duplication of 2 Mb at 7q34 [1-2]. Although nothing is known about the function of Swiss:Q9HCM3 protein, the BRAF protein is a well characterised oncoprotein. It is a serine/threonine protein kinase which is implicated in MAP/ERK signalling, a critical pathway for the regulation of cell division, differentiation and secretion [1-3]. 19.40 19.40 20.60 20.00 19.10 18.60 hmmbuild --amino -o /dev/null HMM SEED 684 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.40 0.70 -13.18 0.70 -6.42 5 150 2010-03-17 13:51:21 2010-03-17 13:51:21 2 4 46 0 75 128 0 484.60 41 45.39 CHANGED PhpPsP-upFQV+TVLQFVPuuVDlRhCsFoQphEKGLhpAa...tEVR+sopushNlTVpIlNIT...slSspsp.p+sPVsIlFAV+sspGFLNGT-VSuLLRQLSAVEFSFYLGYPsLQIAEPFcYPpLNlSpLLRuSWV+TVLLGVh-pplss+sFptohER+LAQLLuElhshu.ppRhRFRRAToVGNsoVQlVpsoRLsGPDNPlELlYFVEttNGcRLsAsuoAclLNolDlQRAAIlLGYRVpGllApPV-+QAcPss-opPNNLWIIlGVVlPVLVVslIIIILYWKLCRo-KLDFQPDTluNlQQRQKLQ.PSVKGFDFAKQHLGQHuKDDlhlIp-PsPL.ss.hccsoPuEsu-lPoPKSKus.pcuoopssR+RGclSPS-uDSpsS-suSsRuus-cusRsussPusssQQ+so+ccupptssPuSGhDEsLSSuSIFEHVDRlSRsuuDus+RlSuKIQLIAMQPMPAPPl.s...s.csStsD+As-NuKVNKEIQsALRHKSEIEHHRNKIRLRAKRKGHYDFPusEDlpuuhGD....spEp-RlYppAQ.QIcKILcPs-cp..SsasEP+KSsRu+RSP+sRR++QsLNGsu...T-tD+DRLIpoDRDGTYR+sPGVcNpAYlusss.plP-spu.S.ocp........spGHsPS.PusLsoQPSIEEuRQQMH.LL--AFALsSPuS ..........................Pt.hhplpT.......VLpFVs.sssht.CpFsQhhEptL.hAh......thhh...ptp.s.hpl......................p.sVslhasVt.stpshLNGo.sS.pLLppLSs..huaYLsYPsLpIAEPhp.YPpLshSphh+s.WVh...T...V.....l.G.......Vspp.lt.p..ptphtRhhtQhLupl.......hht...t......hhh+tssshGs.sVQhVphpR..l.GscsPspLhYas...sGc.L.ustuucllsplD.QRhAlhLtahl..........h.tcPsscs.ssNLWlIsuV.l.hPlhVV..hlIllIlhhhLC....RpsK.-FpPDshhNl.QRtK.............VpGFDaAKQHLGQpstD-.hl.hp.ts.......l.................shP.csu...sp.+phh.......shphtppschp..S.cusSt.sup..Spcput.p.................s.tsht...pt+.pct.......tpt.s..............s..ss..pE...................ths.hs.....................u+s.s-s.cp..suplQLIuhpPhshPss.......p...tspsh.ssclN+.................AL+pKS-IEHaRNKl..RL+AKR+GaY-FPss-s.ps..tc....p...p+h.p...........chh...ssst...usahcs+p....R..phpsp.hRp+Q.lsuss...s-h-..h.hhhppspcGhhpps.ss...ps.hh.....tss....................s.st...........s.......s...s..hhsspPoI-csRppMH.LL--AFuLsuss.................................................................................................................................................... 1 8 17 34 +12718 PF12878 SICA_beta SICA extracellular beta domain Finn RD rdf Manual Domain The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. There can be between 1 and 10 copies of this cysteine-rich domain [1]. 21.70 13.00 23.40 13.10 21.60 12.90 hmmbuild --amino -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.91 0.71 -4.46 262 470 2012-10-01 20:19:39 2010-03-17 14:32:34 2 34 2 0 456 482 0 170.50 28 62.21 CHANGED oLCsplpCAup+.a..........hpppptp....ssss.................s...sa.W..pp....hpsclppLhpphspstp....pps......sss.hCsphtsss...............tspspKpACpalsAGLc+l.h......................pt...ssus..................pssp.FcQTMGClhL+tYAKpl+c..pu...............hCsI.-p.......GIc+A....Fpps.p..shhpss......Cs.......sssssCh.Cph......csc..ac...s.CpIs ...........................shCsplpCsspp.a....hppp.ttt...tsss......................ssh.Wpp......hpsclppLhpplspstppss.....sss..hCsshssts..............tspspKpACphlsAGLcclh................ph.......ssss.....................ss..acQTMuChh.LptYAccl+cpu............tC....sI.-p.....GIccA....Fpps.sph.hpss........Cs.......sssssCh.Cph......cps...ac...sCpI....................................................... 1 0 0 0 +12719 PF12879 SICA_C SICA C-terminal inner membrane domain Finn RD rdf Manual Domain The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. The C-terminal domain is thought to remain in the erythrocyte, found juxtaposition to the single transmembrane domain. To date, all full length proteins contain a single copy of this domain [1]. 21.90 21.90 32.80 21.90 18.30 21.40 hmmbuild --amino -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.66 0.71 -4.30 3 127 2010-03-17 14:33:42 2010-03-17 14:33:42 2 24 6 0 123 132 0 131.10 61 13.13 CHANGED KYFG.LhR+tRRY...+RuPspAssPSVQEQlLDHVEEAG......PHEYRLVKERKPsSsPsRTKRS.......GtVNRRTIIcIHFEVLDECQKGDTQLNQKDFLELLVcEFMGSEhM.EEEQVPKEEVLMESlshclVPhE-.VPSLSSGFMV ..............................................................KYFGPLGKGG.RF...RRSPs-IP.GsSVQEQVLDHVpp.su........sHEYpLVKERKPRS..u..PTRTKRS...........GtVNRRTIIEIHFEVLDECQK.....GDTQLNQK.DFLELLVpEFMGSEh..M...EEEQ..........VPK.....E-..VL.....M.....E.....ul.....P..........h.....E......VPhE.....c.....VPsLGSshh.......................... 0 12 12 17 +12721 PF12881 NUT_N NUT protein N terminus Zenonos ZA, Mistry J zz2 jackhmmer:Q86Y26 Family This family includes the NUT protein. The gene encoding for NUT protein (Nuclear Testis protein) is found fused to BRD3 or BRD4 genes, in some aggressive types of carcinoma, due to chromosomal translocations [1-2]. Proteins of the BRD family contain two bromodomains that bind transcriptionally active chromatin through associations with acetylated histones H3 and H4 [1-2]. Such proteins are crucial for the regulation of cell cycle progression. On the other hand, little is known about NUT protein. NUT is known to have a Nuclear Export Sequence (NES) as well as a Nuclear Localization Signal (NLS), both located towards the C-terminal end of the protein [1-2]. A fused NUT-GFP protein showed either cytoplasmic or nuclear localization, suggesting that it is subject to nuclear/cytoplasmic shuttling. Consistent with this possibility, treatment with leptomycin B an inhibitor of CRM1-dependent nuclear export resulted in re-distribution of NUT-GFP to the nucleus [3-4]. Inspection of NUT revealed a C-terminal sequence similar to known nuclear export sequences (NES) which are often regulated by phosphorylation [3-4]. 18.10 18.10 18.10 18.90 17.30 17.30 hmmbuild --amino -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.29 0.70 -5.30 4 167 2010-03-17 15:11:40 2010-03-17 15:11:40 2 10 28 0 77 136 0 260.30 49 37.96 CHANGED sh.GPsMohpPGsuLSsFsALPFhPPsPsPscpP.hEPss.PllsushSPuNPLlLSAhPSs.LVTtpGGsu.SuAGsupVhVpl+T-sGPscsuQsQNllLTQssLshpsPGs.CGGltsP.PP.alTAusVpsllsupsVGsoQ..EG....GLPh..ssPPsAQLsPIVs.tpuhPsPpGspGEGGPsus.ppsS.sDhupts+uVYENFRpWQ+YKsLARRHLsQSPDsEALSCFLIPVLRSLARhKPTMTLEEGL.RAlQEWp+TSNFDRMIFYEMAEKFhEFEAtEEMQIQpsQlMpGsQsLsPsss.+LDP.Gs.APEVspQPVY ...............................t.shshpPusu.ss.ssLPh..PsssPscts.........s.h.ssshsPssPLhL.sshPps.LVstpsu...s...sus..Gssp.lhlph+o-stsspsspsQshlLTQssLshpAPGs...CG.Gs.sP.sP.hhlsAuss..shhsups......lG.soQ..s..-G..s.G....LP.....sPPPsAQLsPIl.s..puhPhPpGspu...EGu.sss.pt.u.sDpSspsKSVYENFRhWQ+aKsLARRHLPQSPDsEALSCFLIPVLRSLARhKPTMTLEEGL.hAhpEWp+pSNFDRMIFYEMAEKFhEFEAtE.......E.......MQhQ.......p........Q..hhpGspshsPsss....+h-P.GssAsclspQPs................................... 1 8 9 20 +12722 PF12882 NUT_C NUT protein C terminal Zenonos ZA, Mistry J zz2 jackhmmer:Q86Y26 Family This family includes the NUT protein. The gene encoding for NUT protein (Nuclear Testis protein) is found fused to BRD3 or BRD4 genes, in some aggressive types of carcinoma, due to chromosomal translocations [1-2]. Proteins of the BRD family contain two bromodomains that bind transcriptionally active chromatin through associations with acetylated histones H3 and H4 [1-2]. Such proteins are crucial for the regulation of cell cycle progression. On the other hand, little is known about NUT protein. NUT is known to have a Nuclear Export Sequence (NES) as well as a Nuclear Localization Signal (NLS) [1-2], both located C-terminal to this domain. A fused NUT-GFP protein showed either cytoplasmic or nuclear localization, suggesting that it is subject to nuclear/cytoplasmic shuttling. Consistent with this possibility, treatment with leptomycin B an inhibitor of CRM1-dependent nuclear export resulted in re-distribution of NUT-GFP to the nucleus [3-4]. Inspection of NUT revealed a C-terminal sequence similar to known nuclear export sequences (NES) which are often regulated by phosphorylation [3-4]. 18.10 18.10 52.90 20.10 17.60 17.90 hmmbuild --amino -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.77 0.71 -4.59 5 230 2010-03-17 15:28:47 2010-03-17 15:28:47 2 8 26 0 96 163 0 127.70 39 25.81 CHANGED PRL+ssRPQ+PPps+sPpEIPPEuVKEYlDIM-.L.G....hTGEScs.+EE-G.spssQEE--LPuDstLLSYpc+LCSQKlFVSpVEAlIpPQFlu-LLSP-sptD.LALppsLEQEEGLTLAQ...LlpK+pPsLcccusAEAsPs ...............................................th...+sp.p.PspsctPp.hPs.ssp-hhDIMp.h.......sG-s.s.pppct..p.tpp.E.E.-.th.sDssLLSYhccLCSQc.FlopV.EAlIcPpFlppL.LSP-sphD...hA....LppcL..EQE..EGL.olAQ..............lhpK.+hh.hcp.t.sct.sP......................................... 1 14 14 23 +12723 PF12883 DUF3828 Protein of unknown function (DUF3828) Coggill P pcc JCSG_target_392985_3kzt Family This is a family of bacterial proteins of unknown function. 24.40 24.40 24.50 24.50 24.00 24.30 hmmbuild --amino -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.51 0.71 -4.19 23 492 2010-03-17 15:29:03 2010-03-17 15:29:03 2 4 451 2 44 225 5 115.80 42 47.98 CHANGED P-s.sspsFYpaY..lpphsp......spsshtssss.....lccYVupsslpcLpt..ph...pp-hh-uDYFhpsQDass.-Wlsplplspuph.sssshVtVphG.p..spshplhlhlp+EsGtWKIhcVpsss ...............................s.pQsV+phYpsY..hosh..........ssshhs-Tsp...........ct..hsStcl.ptLsLsssL........sshsh..lDhD...PhscsQDauc.hh...l.c.s.lslsp..scs.spu.cAsV.chpl.......h.....ps-cc+ps.....slp..hltE.s.GRWhIs-lsss.c................................. 0 6 14 28 +12724 PF12884 TORC_N Transducer of regulated CREB activity, N terminus Zenonos ZA, Mistry J zz2 jackhmmer:Q6UUV7 Family This family includes the N terminal region of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) [2]. The proteins display a highly conserved predicted N-terminal coiled-coil domain and an invariant sequence matching a protein kinase A (PKA) phosphorylation consensus sequence (RKXS) [1]. The coiled-coil structure interacts with the bZIP domain of CREB [2]. This interaction may occur via ionic bonds because it is disrupted under high-salt conditions [3]. In addition to CREB-binding, the N-terminal region plays a role in the tetramer formation of TORCs [2], but the physiological function of the multimeric complex has not been clarified yet. 25.00 25.00 26.30 30.90 23.70 24.10 hmmbuild --amino -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.42 0.72 -3.44 13 158 2010-03-17 15:32:13 2010-03-17 15:32:13 2 5 63 0 75 136 0 64.30 56 11.44 CHANGED NPRKFSEKIALppQKQAEETAsFEcIM+-Vpuo+hspppspppp........................s..s.u.thhGGSLP .NPRKFSEKIALHsQKQAEETAAFE-lMpDlssTRhpttplppsp...........................spu..paYGGSLP......................................................... 0 15 25 48 +12725 PF12885 TORC_M Transducer of regulated CREB activity middle domain Zenonos ZA, Mistry J zz2 jackhmmer:Q6UUV7 Family This family includes the region between the N and C terminus of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) [1]. Although the C- and N- terminal domains of these proteins have been well characterised [1-2], no functional role has been assigned to the central region, yet. 21.90 21.90 22.50 24.70 21.30 18.60 hmmbuild --amino -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.19 0.71 -4.45 10 208 2010-03-17 15:34:46 2010-03-17 15:34:46 2 7 51 0 92 168 0 147.20 52 26.60 CHANGED RTNSDSALHQSshsPsPQDsFsG.GuQslpspch...lLho................sPstE-s..ph-KcpsKQhW-tKK..ss.ouRP+SC-VPGINIFPSPDQphosSlhPuAHNTGGSLPDLTNIQFPPPLPTPLDP-Ds.sasohSuusSousLssshTHLGIouuup ..................RTNSDSALHpSsMsPssQ-sasG..usps....hh...p+h........hh.s...................................sP..s..h.EEsh..ps-+sh.cQ.W-sKK...os.uSRP+.SC....E....V...PGI....NIFPSs-Qp.sss.sh.hPushNTGGSLPDLTNlHFPsPLPTPLDPE.-s...sas...uLS..uusSTu.NLssshTH..LGIuus..t................................. 0 11 20 40 +12726 PF12886 TORC_C Transducer of regulated CREB activity, C terminus Zenonos ZA, Mistry J zz2 jackhmmer:Q6UUV7 Family This family includes the C terminal region of TORC proteins. TORC (Transducer of regulated CREB activity) is a protein family of coactivators that enhances the activity of CRE-depended transcription via a phosphorylation-independent interaction with the bZIP DNA binding/dimerisation domain of CREB (cAMP Response Element-Binding) [2]. The C terminus region is negatively charged, resembling the transcription activation domains. When this domain, from all three human TORC proteins, was expressed as fusion proteins with the DNA-binding domain of GAL4 (GAL4-BD), and tested for induction of a minimal promoter linked to GAL4-binding sites (UAS-GAL4), UAS-GAL4 was potently induced by GAL4-BD fusions containing the C-terminal portion of all three human TORCs [1]. 21.50 21.50 21.50 22.10 20.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.59 0.72 -3.80 8 195 2010-03-17 15:36:07 2010-03-17 15:36:07 2 8 60 0 94 170 0 73.50 55 12.73 CHANGED PNIILT...........sDSsP..uLSKDluuALAGVsshsaDu-s....FsL-DEL+..l-PLuLDGLpMLS.DPDhhLsDPAlEDSFRuDRL .....................................PNIILT...........s-.SsP..uLSK.-lsuuLAGVs-hsh...s..s..-s....FsL-..D.EL+..l-PLoL.....DGLpMLs.DPs.h.lLsD.PulEDoFRtDRL.... 0 13 21 43 +12727 PF12887 SICA_alpha SICA extracellular alpha domain Finn RD rdf Manual Domain The SICA (schizont-infected cell agglutination) proteins of P. knowlesi, one of the variant antigen gene families, are associated with parasitic virulence. These proteins are comprised of multiple domains, with the extracellular domains occurring at different frequencies. This domain is typically found at the N-terminus, with 1 or 2 copies per protein. The domain is cysteine-rich domain and similar to PFAM:PF12878 [1]. 21.70 13.00 23.20 13.10 20.80 12.90 hmmbuild --amino -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.20 0.71 -4.54 71 105 2012-10-01 20:19:39 2010-03-17 16:07:03 2 21 2 0 103 131 1 180.80 26 21.68 CHANGED sssLhppWhpph........hpss...ut.s.spss.........ppIsscL.+cpLcctaccLp..shLp.p.sp.us........ElsshCus.............hsttt.s...ss..t..pp...ph.KplCKullcl+YFMuGlcsppp..........cp.tsthsthpshctYsRClVGtlALsclYG-HCclccllptlpsplc....pplpsp..tspss..........hspCp.s.lshp-lhhG+slLsspIcpW ................s.stlhptWhpph.tts.t.s.s............tplhpcl.+cpLcchaccLp..phLp..t.tpst.........ElsshCss...............hhttt.sst...t..pp....h.KplCKullcl+YahuGlcpptp............cp.tsthpshpshctYhR...ClVGtlA...Lscl..Y..G..-......H......Cclp.cllpplpsplc............pplptptspss..........hspCc..s.lshpcLhh.G+slLtspIcpW............................................................................................... 0 0 0 0 +12728 PF12888 Lipid_bd Lipid-binding putative hydrolase Coggill P pcc JCSG_target_393040_3h3i Family This is a small family of lipid-binding proteins found in Bacteroidetes. 21.50 21.50 21.50 31.70 21.30 19.50 hmmbuild --amino -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.42 0.71 -4.25 5 26 2012-10-03 05:44:19 2010-03-17 17:49:49 2 1 23 3 6 26 1 114.50 33 67.72 CHANGED lGAGalphsTYNTAANssTEMWL-D..huNhWssKlKVsVDYsuRTFouTG....lusplsuDsKVslT.....DGKVLcGAATTPSGMPADSIVahlpFsDDssGhTYKVuGFRRTGFPADD ......................................hstthphpTYNTsssssscMWlc............hsshas..h+sKlssDhsupoFssss....hsst....h.psDs...p..lslT.........................-GKVlhsuuTT..uGsssDSIha.hpFosss..GhTYplsGaR+TGastD-.......... 0 3 6 6 +12729 PF12889 DUF3829 Protein of unknown function (DUF3829) Coggill P pcc JCSG_target_393163_3iee Family This is a small family of proteins from several bacterial species, whose function is not known. It may, however, be related to the GvpL_GvpF family of proteins, Pfam:PF06386. 25.00 25.00 25.30 25.30 24.60 24.60 hmmbuild --amino -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.78 0.70 -5.21 47 668 2010-03-17 18:27:44 2010-03-17 18:27:44 2 4 523 3 45 381 4 246.60 42 80.92 CHANGED ssspthpKhNsYlp.hhNplt...tshhpshtpYhshhtchcpst.ptpphhh.sh.th.......cpshpthcps.......ht.psshtslDpss.....pshhsshpplh.shhschpsYappcsYpcDshs+ucpL...psplhtthppapsshpshpstlpchpccppttphpt........hcp.pscththtthphh....tpscpl...ht.h.ttpp.tthchtshp................ttlschpptlpshcphhtt.thpt.........thhpshspapssspphlpcl+s...................hh....tsshsphhpsYNphlsshNpht ...................................................tttshscKhssYIc.saNplp.......hsl.pulscYsch......hc..Dh+pGP.TGcEph..lhslhsl..........htpCpcthKps.......sALsPshpslDusA.....luYI-uAsALusTI.NEM-KYYsQ-NYKDDAFAKGKsL...HpohlKsl-sFcslucpYcsuIpchs-c+phspLKp........lE-.pE..GKohp....YYo.LslM.....loAKQI.....NslI.op-p...FDscAhh................KcluELEohls.....phKchspss......................uFlsS.AspYQhpsKKalRRlRDpV.......hpphpssss.uahs.........p-Sastsl+pYNEhVDsYNph............................................ 0 13 26 38 +12730 PF12890 DHOase Dihydro-orotase-like Coggill P pcc JCSG_target_393237 Family This is a small family of dihydro-orotase-like proteins from various bacteria. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild --amino -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.86 0.71 -4.51 4 46 2012-10-03 00:45:34 2010-03-17 18:34:02 2 1 46 0 6 1447 801 171.40 49 50.21 CHANGED K+lllshlshLu..LsthuhpAQsLhsT...........TWsAYGloFpAP........................AulsVE-DoEE...........ualhssssaYlTlQhL-uEGhK+.u-LsptLKshAsDDpVTsQouVpsFELPQFaGs.L+G...sCEs-+ClYuhLLsKsuusGFYlSIlYocEs ................................................................................................KLlsPGLlDlHVHLREP.GsptKETIpoGohAA.................................A+GGaTTlsAM..PNT..p.P....sPDphEphp.ltpt....................IpcpuplpVLPauuI.T..V.c.psGpEhsDht..........................sLhchGAF.A.F..TDDGVGV.QsAuMMhcA.MK+AAcL.c.h.A.l.V.A.HC.ED..s..o.L.h.s.p.Gs...V.HE.G...c.hu...c..c..a....G.......L.pGI......PSlC...EoV...H...I.......A......RDl.L....L....A...E......A.A.uC.....H..........YHVCHlSTKp.................................... 0 0 4 5 +12731 PF12891 Glyco_hydro_44 Glycoside hydrolase family 44 Coggill P, Bateman A pcc JCSG_target_393205 Domain This is a family of bacterial glycoside hydrolases formerly known as cellulase family J, and now known as Cel44A. It is one of the major enzymatic components of the cellulosome of Clostridium thermocellum strain F1 and of many other Firmicutes. 25.10 24.70 25.10 24.80 24.90 24.50 hmmbuild --amino -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.85 0.70 -4.86 27 92 2012-10-03 05:44:19 2010-03-18 13:56:41 2 21 82 14 41 96 9 206.30 32 25.61 CHANGED NtsNuGsDWhaps............sphssPutssspahcpshttGs.hshlTlphhGYVutDs.s.s..hspstshPss+asp.......................lthspssshuhsPc.............hsss.lYhsEa......Vsalhs+..hGsu..sssGl+hYuLDNEPsLWss..............THsclHPpslohsElls+slphA+slKshDPsAplhGPs...taGasuYhshss...sss.......ss..scusap.al-aYLcph..+pspcssGhRLLDVLDlHaYPpup ........................................................................h.NsusDWh.ps................t.suthhpthhpp.sh.t.ss..shhTl.hhGaVutct.........ttshsss+att.......................hh.ttss.hs.sPs.............h.ss.hYhs-a.....lphlhpp.....hGtu....stsulpuYplDNEPs.LWsp..............TH.clHPsssThsElls+shshApAlKssDPsAplhGPu...taGatu..Yts.ts......t.t........sp.....p.sshp.alsaaLcph...+ptpptt.GhRLLDhhDlHaYPps................... 0 25 33 36 +12732 PF12892 FctA T surface-antigen of pili Coggill P pcc pdb_3gld Repeat The FCT and equivalent region genes of Streptococcus pyogenes and other related bacteria encode surface proteins that include fibronectin- and collagen-binding proteins and the serological markers known as T antigens. Some of these proteins give rise to pilus-like appendages [1]. The FctA family is found in many Firmicutes and related bacteria. In S. pyogenes, the pili have a role in bacterial adherence and colonisation of human tissues [2]. 22.30 12.50 22.30 12.50 22.20 12.40 hmmbuild --amino -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.20 0.72 -3.18 46 1757 2012-10-02 19:08:27 2010-03-19 11:32:17 2 125 375 11 237 1642 91 84.80 22 26.02 CHANGED YpYslpEp.p..ssh.sG..lsYDs.p.phplsVp.Vs-ssps.thhhs.thhhsh...pt.s.p..pt..sh.tFsNo.a.......sss...s.....Lplp..KplsGps.......cscpF..sFslphp .................................................apYslpE......h......t....s......s.......su.....lsYDs...t..p.hpl.sVp..Vsc.......p......s..pG.....th.....h......s....h..sp........................tt.t..............tt...................tF.sNs.Y.........................sss....p............hphp.....K.l...pG...t...t.........tttp...a....pF.l...t............................................................... 1 65 125 164 +12733 PF12893 Lumazine_bd_2 Putative lumazine-binding Coggill P pcc JCSG_target_391417 Family This is a family of uncharacterised proteins. However, the family belongs to the NTF2-like superfamily of various enzymes, and some of the members of the family are putative dehydrogenases. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild --amino -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.59 0.71 -3.55 61 306 2012-10-03 02:27:24 2010-03-19 16:13:43 2 10 241 22 137 612 378 116.00 21 75.47 CHANGED s-hpulpsslpp.Yh-Ghhpu.DsstLcpsFHscA...tlhsht.pup...hhthshcpa.hshlpsp......tssusshptpIhsl-lsG.ssAhs+lcsphh..sh............papDaLsLl+.h-GpWpIlsKsap .............................................s.p.pslppslpp.Yh-u.h.....tpu..DsstlcpsFpssu.............th.hshp..psp.............lp..t...h...s.h.sp.a...hshlppt...........ts..tt....t...p..t.p........tpl......p..lcls..u..shAhsclphphh.....st..............................pa....s....Dh........h....s..Ll.K.h-GpWpIlsKha................................................. 0 49 89 114 +12734 PF12894 Apc4_WD40 Anaphase-promoting complex subunit 4 WD40 domain Coggill P pcc manual Domain Apc4 contains an N-terminal propeller-shaped WD40 domain.The N-terminus of Afi1 serves to stabilise the union between Apc4 and Apc5, both of which lie towards the bottom-front of the APC, 21.10 13.40 21.10 13.40 21.00 13.30 hmmbuild --amino -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -8.08 0.72 -4.46 29 282 2012-10-05 17:30:43 2010-03-22 10:40:44 2 24 200 0 193 362 3 47.10 26 6.94 CHANGED ph.p.ls-Kslssp........lp...hhsasPpMDLlAluoccsplhlaRL.NhQ+........l.......as ...........................hspp........lp...hhsasPp.hD.LlAlus.........p.cGp...lhlaRl..shp+........lh....................... 0 65 98 152 +12735 PF12895 Apc3 Anaphase-promoting complex, cyclosome, subunit 3 Coggill P pcc manual Domain Apc3, otherwise known as Cdc27, is one of the subunits of the anaphase-promoting complex or cyclosome. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1, 2]. The protein members of this family contain TPR repeats just as those of Apc7 do, and it appears that these TPR units bind the C-termini of the APC co-activators CDH1 and CDC20 [3]. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild --amino -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.20 0.72 -3.85 111 1548 2012-10-11 20:01:03 2010-03-22 11:07:53 2 603 877 14 798 4682 1088 82.70 18 13.86 CHANGED pptpacsAlFhu-+lhshss..........p...psh........ahh....Apshappspa....ppA.hpll..............tphtt.ps...htspY.....Lh.A..pshh.cLpc...ac-AlssLtps ...........................................................................t...tappAlh.hhc+h.hp.sst...................p.....pst....................................hhh........Apsa.app.spa.......pcA.hphhpp...................................hphps..pp.............ps.ta...................lh...u.......pshh..phpc....hc-Alphlp..h..................................... 0 291 477 677 +12736 PF12896 Apc4 Anaphase-promoting complex, cyclosome, subunit 4 Coggill P pcc manual Domain Apc4 is one of the larger of the subunits of the anaphase-promoting complex or cyclosome. This family represents the long domain downstream of the WD40 repeat/s that are present on the Apc4 subunits. The anaphase-promoting complex is a multiprotein subunit E3 ubiquitin ligase complex that controls segregation of chromosomes and exit from mitosis in eukaryotes [1,2]. Results in C.elegans show that the primary essential role of the spindle assembly checkpoint is not in the chromosome segregation process itself but rather in delaying anaphase onset until all chromosomes are properly attached to the spindle. the APC/C is likely to be required for all metaphase-to-anaphase transitions in a multicellular organism [3]. 21.40 21.40 22.60 22.00 20.90 20.00 hmmbuild --amino -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.18 0.70 -11.14 0.70 -5.03 36 264 2010-03-22 11:31:27 2010-03-22 11:31:27 2 10 228 0 186 273 1 197.50 24 26.91 CHANGED pLphlthshshh.p..ttphlhplAppssplpsLlpYlppshpplppta..cshhphh....cchhsshtpt..pt................p.tsshs.sphhcllhoGhss..slc-aLh..spLuE+GhK+hp+ssssuapslpcllhppLlsAhERhhllLscLpGlu+ap.t...........ltl.sspplsch......lpsspslhhhsacllhplspEhctF......pt.FhpWlph.hlchhss-..sp ..........................................t...hh.hchphl.p..th.tlhtlApphsplpsLlpYlppshpphpptW......cshh..h......spplsphst...pt......................................................sspsslt.schhcLllhGpsss.tLpp.aLh.......spL.s-..........+GhK+hspulpsuapslpcLlhppL.s...u...hE.......phhhhLu........cLpGhupap.ph....p.........lGL.sss..tlpps...............................lpsssshhlhspcllhslspphppF.......ps.FhpWLhh..l.hhtpp....t....................................................... 0 59 98 150 +12737 PF12897 Aminotran_MocR Alanine-glyoxylate amino-transferase Coggill P pcc JCSG_target_390749_3ez1 Family These proteins catalyse the reversible transfer of an amino group from the amino acid substrate to an acceptor alpha-keto acid [1]. They require pyridoxal 5'-phosphate (PLP) as a cofactor to catalyse this reaction. Trans-amination reactions are of central importance in amino acid metabolism and in links to carbohydrate and fat metabolism. This class of aminotransferases acts as dimers in a head-to-tail configuration [2]. 20.30 20.30 20.30 20.30 20.20 20.20 hmmbuild --amino -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.33 0.70 -5.92 5 444 2012-10-02 18:26:03 2010-03-22 16:02:22 2 1 437 8 94 3944 814 413.70 53 97.54 CHANGED MTssAhsDALspAcAAY-sFKARGLKLNMQRGQPADADFDLSNGLLolLGEsDsRh.DGoDLRNYPGGVAGLPSARALFApYLDVKuENVlVWNNSSLELQGhVLTFALLHGlRGSsGPW..lu-..KPKhIVTlPGYDRHFLLLETLGFELLTVDMQsDGPDlDAVERLAAsDASVKGILFVPTYSNPGGETISsEKARRLAulpAAAPDFTIFADDAYRVHHLhuEu-+DsPVNFVuLuRDAGHPDRAFVFASTSKITFAGAGLGFVASSEDNIuWLuKYLGAQSIGPNKVEQARHVKFLp-YcsGLEGLMRDHAcLIAPKFcAVsEsLcsELG-.GGcYATWToPRGGYFISLDTlDPVA-RVVcLA-cAGVSLTPAGATYPuGpDPHNRNIRLAPTRPPL-EVcTAMcGVAsCV+LAoEEYRAGp+ .............................................................................................................................................................................ho.--Lssh+pplppcYscLpA+sLsLDlTRGKPSscQL...DL.SssLL.s.l....s.s.s.s....hss...-G.s.D.s.RNY.GGhcGlPElRplFuElLG......ls.....s-plIAusNS...S.L.slMa.D.llsau.hla.Gsssut+....PW.......spp....tpV..KFLCP.VPGYD..R.H...FA.I.oE.ph..G...I...EMIs..V...P..M.s...p.....c.....G....P....D....h....D....h....V.....c....c........L.......V.......A..........t..........D......P.........s.....l........K.........G...................h......W.s........VPhY.uN.PsGsoYS--T.VR+Ls.p.M.....p.s..A.A.P.D..F..R..I..a..W..D.N.A.Y..........A..l...H...p.....L...........t....D.t............p.........s.......h............h............s.......l.....L....t........h....u..p.......c...A.....G.N......P.........s.....R...s..a....h..F..A..S.T..........S.K.I.T.F..A.....G...u..G..Vu..F..l..A..u......S...p.........s.......N.........l....s...........W...........a...h...p...a...h....u.h.c...o.I..G...P..D.K...l.N.Q...L...R..H...s.+.F.F....t.....D..s..-.....G....V....+...t.H...M...c...+..H...t...pI...L....A.P....KFt...h....V.h-.lL...-...c....+....L....u.....s.t...s..l...A....o.W..T.c.P.p..G..G..Y..F..I......S....L...D........V...h.......-....G.........T......A..p....R....l...V....u....L....A..K...-...A...G...lu...L..T.s..A...G...A..o.a..P.....Y...t...+...D....P...c...D...+..N..IRlA.PS...a.Ps.l.-ELcsAh-ul.u.s.CV.hLAAsE+LL...pt............................................................................................................................................................................................ 0 32 71 88 +12738 PF12898 Stc1 Stc1 domain Bateman A agb Jackhmmer:O94276 Domain The domain contains 8 conserved cysteines that may bind to zinc. In S. pombe this protein acts as a protein linker which links the chromatin modifying CLRC complex to RNAi by tethering it to the RITS complex. The region is reported as a LIM domain here, but has a slightly different arrangement of its CxxC pairs from the Pfam LIM domain Pfam:PF00412, hence why it is not part of that family [1]. The tandem zinc-finger structure could mediate protein-protein interactions. 24.80 22.00 24.80 22.30 24.50 21.80 hmmbuild --amino -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.53 0.72 -3.92 21 74 2010-03-22 17:08:22 2010-03-22 17:08:22 2 3 71 0 62 73 5 83.60 29 26.08 CHANGED KC.tsC+Kh+spssFSp+QLcchp+shhsp..hpshsspshspCppCs..us.pssEL....................pCs.hCs+sKuL-tFSKsQR+c.-ss..+ChsClpt ....................cC.thstchpshstFSppQlcphppthhpp......thhstpshhtCppCs..up.pssEl..............................pCs.hCs.+s+sL-tFSKsQR+p.scss.........+ChsCsp........ 0 16 33 49 +12739 PF12899 Glyco_hydro_100 Alkaline and neutral invertase Coggill P pcc manual Domain This is a family of bacterial and plant alkaline and neutral invertases, EC:3.2.1.26, previously known as Invertase_neut Pfam:PF04853. 23.60 23.60 23.60 23.60 23.50 23.40 hmmbuild --amino -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.66 0.70 -6.09 20 347 2012-10-03 02:33:51 2010-03-22 17:10:24 2 7 135 0 171 342 258 358.00 51 75.19 CHANGED pAWc.lL-colVaYpGpPlGTlAAhDs.ss...csLNYsQlFlRDFVPSuLsFLlpG..csEIVRNFLhhTLpLQupc+plDsap.GpGlMPASFKVh....ctt.....cEhLhADFG-pAIGRVsPVDSuLWWIILLRAYs+sTGDhohucsP-sQpGl+LILcLCLsssF-haPTLLVsDGusMIDRRMGlaGaPLEIQuLFYuALRsAppLLp.ss.....pss.pp..hlppls........pRLpsLshHlRpYYWLDhp+LNpIYRaKTEEYGc.s.u.hNcFNIhP-SIP.sWLh-.WLPpcGGYLlGNluPuRhDFRFFuLGNhlAIlouLAotpQupAIhcLlEp+W-DLlGcMPhKICaPAlEscEWRIlTGsDPKNpPWSYHNGGSWPsLLWhhsAAslK.............sGRsp.lAc......+Alpl.AccRLs+DcWPEYYDG+sGRhlGKQARpaQTWoIuGaLlA+tLl-.sP ..........................................................................................................................................................AhchhcpulV.apGpslGTlAA.D..sp...p.LNYsp.lF.lRDFlPSulsaLhpG..c.-IV+NFLhhoLpLQ..................u.........hc+......................hD.ap.upGlhPASFKl.........p.....tp-hL.sDFGppAIGRVuPVDSuhWWIILLRAYsK.oG.....Dhsltcp.-sQpGh+hILpLCLsc.uFDhaPoLLssDGssMIDRRMG..........laGaPlEIQ.uLFa..AL+suh.phLt...p.....t.ts..pp...hlptls..................pRLpALoaHhRpYaWlDhpplNpI.Y........Ra+TEEYSp.s.A.hNKF.NlhP-p.IP.sW.l.hD.ahPpc..G.G..YhIGNlpPA+MDFRaFsLGNhhuI.lSSLuTscQ.spuIhcLlEt+Wp-L.l..up......M..PlK.I.....C...YPAl..E..s..c..E..W...+...Il...T...G...sDPKNT..WSYHNGGSWPs..L..LW....h..ssAslK.........................hGRsp.....lAc.......+....A....ltl...uEpRL.cD..p....W..P..EY.Y..Ds+pG+a..lGK.QuRhaQTWoIAGaLlu+hhLcsP...................... 0 28 99 142 +12740 PF12900 Pyridox_ox_2 Pyridoxamine 5'-phosphate oxidase Mistry J jm14 jcsg_3fkh Family Pyridoxamine 5'-phosphate oxidase is a FMN flavoprotein that catalyses the oxidation of pyridoxamine-5-P (PMP) and pyridoxine-5-P (PNP) to pyridoxal-5-P (PLP). This entry contains several pyridoxamine 5'-phosphate oxidases, and related proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild --amino -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.85 0.71 -4.36 153 1956 2012-10-02 11:35:36 2010-03-23 10:43:58 2 14 1393 22 640 2144 210 137.20 22 77.98 CHANGED cppphhplLc.sshlu+luhss....s....st..PhllPssashs......ssp......lhh+.sus...uu..+hhpshp............t.....sshsstp....hDs...hshsps.........s.a.SVllpGp.uchl...p-spEctpshpthhpph.hPt....ap..thc.......tthtt..sthlclphsphou+t ............................pphhplLp..ps.p.h.s......+luhss....s........st........Phl..lPlsashp.................ssp....................lYh..H..sut.....su..+hhchlp...........................tt....ssh.shsp.....................h.cs........hh...spp.......................tht.a.p....SVl.spGp.sphl.................p..c..t.p....E.p..t..t...ulchl...hp..p....h...sPt.......hp...h..............tt.htt.....ssll.+lphpphou+.................................................................... 0 204 440 569 +12741 PF12901 SUZ-C SUZ-C motif Bateman A, Aravind L agb Aravind L Domain The SUZ-C domain is a conserved motif found in one or more copies in several RNA-binding proteins. It is always found at the C-terminus of the protein and appear to be required for localization of the protein to specific subcellular structures. It was first characterized in the C.elegans protein Szy-20 which localizes to the centrosome. It is widely distributed in eukaryotes. 21.10 21.10 21.30 21.10 20.30 21.00 hmmbuild --amino -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.64 0.72 -4.27 26 299 2010-03-24 10:52:53 2010-03-24 10:52:53 2 12 94 0 157 272 0 32.90 42 6.69 CHANGED phpthshssups.p.sVlRpP+GPD..uop.GFphpR ....................tphphssuss...sVlRpP+GPD..sop.GFptpp... 1 27 41 91 +12742 PF12902 Ferritin-like Ferritin-like Coggill P pcc JCSG_target_390707_3hli Domain This is a family of bacterial ferritin-like substances that also includes a C-terminal domain of VioB, polyketide synthase enzymes, that make up one of the key components of the violacein biosynthesis pathway. Violacein is a purple-coloured, broad-spectrum antibacterial pigment. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild --amino -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.78 0.70 -4.75 38 153 2012-10-01 21:25:29 2010-03-24 11:50:23 2 10 120 2 80 218 6 215.90 25 39.49 CHANGED LppAl.plEhuTlP.YLsAhYSl.csssstp................................stphltslshEEMlHhslssNlLsAlGus.....Ptlst.p.....hPsYPsp.LP..tht...........shphsLtshohsslp.pFhpIEpP-............................................................oIGpFY.psl............hcul...ppLstpht...stsh..........Gc....tcpls.p.a..............stlhsVssh.soAhpAlshIh-QGEGsstss...........-..........sc.uHYh+Fpclhp ...................................LptAlplEhuTlP.YLsAhaSl..psststp...........................................shphlpslshEEMlHhslsuNlLsAl....GGs.....Pplst.th.........hssYPss.LP..th.t..............................shp..lsLtthohttlp...hFht.I...EpPp......................................................................................................t...th.o.IGt.hY.psl............hpsh.......tth.tth.........th....................Gt..........tpp.hh..t..h...........................................................s..h...lp...sh.psshtulp.IhpQGEGss.ss.........................................t.uHa.pF.pl..h.............................................................................................................. 1 40 50 67 +12743 PF12903 DUF3830 Protein of unknown function (DUF3830) Coggill P pcc JCSG_target_392654_3kop Family This is a family of bacterial and archaeal proteins, the structure for one of whose members has been characterised. PDB:3kop from Swiss:A0JVT3 probably adopts a new hexameric form compared to previous structures. The putative active is near the domain interface. 3kop is most closely related, structurally to PDB:1zx8, where the potential active site is located near residues E51 and Y53 (conserved in 1zx8). Beyond the two residues above, the other residues are not conserved. Also the shape of the active site differs from that of 1zx8. PDB:1zx8 belongs to family DUF369. Pfam:PF04126, which is part of the cyclophilin-like clan. 21.30 21.30 21.30 21.90 21.00 20.60 hmmbuild --amino -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.04 0.71 -4.82 13 130 2012-10-02 15:38:38 2010-03-24 14:34:14 2 1 126 6 52 135 16 142.90 36 93.05 CHANGED lTL-KRGVossA+LLD-cAPcTCsAVW-uL..PhuGplaHuKYARNEIYsLlPsFs......sscPstENsTVTPIPGDlsaFsFps..lss.uaGYcssstststss.lsDLAlFYGRNNLLlNGDsGWVPGNVFATIsEGL-....chApACpDlWhsGspGEoLoasRA .............plsttuhshhAchh--cAPpTstAhhchL...PhpsphhHs+auGptlas.Lss....as.......hts.GhENsTspP.sGDllaa.........................sG.ho-htIhhuhsshhhsuchG.lsGNhFhTI.s.-.Gh-....pltphspclhhcGAps.shp................................................................................ 0 12 33 42 +12744 PF12904 Collagen_bind_2 Putative collagen-binding domain of a collagenase Coggill P pcc JCSG_target_393067_3kzs Domain This domain is likely to be the collagen-binding domain of a family of bacterial collagenase enzymes. It is the C-terminal part of the PDB:3kzs structure determined from Swiss:Q8A905 (information derived from TOPSAN). 21.70 21.70 22.30 21.80 21.40 21.30 hmmbuild --amino -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.15 0.72 -4.20 12 120 2012-10-02 17:35:21 2010-03-24 14:49:05 2 4 80 4 53 113 4 92.40 34 19.50 CHANGED lPDQSllstsNGcchc+hhATRGpD..YhhVY.sasG+shplchuKIuGcchcAhWasP+sGctphIGpFcN..sstsFpssut...pGsDWVLllcs .....................PDpSlls.s...t..s......sp+.hs+hlAoRupD..YhhVY.shs.GcshplsLscl.uG......sp..t.sAhWasP+sGchp...h.....l..G...p.h.....s.....s.......s.ht......sFps.Pus...spGNDW.lLllp... 0 28 43 51 +12745 PF12905 Glyco_hydro_101 Endo-alpha-N-acetylgalactosaminidase Coggill P pcc Jackhammer Domain Virulence of pathogenic organisms such as the Gram-positive Streptococcus pneumoniae is largely determined by the ability to degrade host glycoproteins and to metabolise the resultant carbohydrates. This family is the enzymatic region, EC:3.2.1.97, of the cell surface proteins that specifically cleave Gal-beta-1,3-GalNAc-alpha-Ser/Thr (T-antigen, galacto-N-biose), the core 1 type O-linked glycan common to mucin glycoproteins. This reaction is exemplified by the S. pneumoniae protein Swiss:B2DRU5, where Asp764 is the catalytic nucleophile-base and Glu796 the catalytic proton donor. 25.00 25.00 25.30 25.30 19.50 19.30 hmmbuild --amino -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.58 0.70 -5.85 3 455 2012-10-03 05:44:19 2010-03-24 15:08:58 2 39 408 3 19 333 4 408.30 56 27.05 CHANGED DsKlDWQDGAIAYRKIMp-PlGuE-VsNpVGYRIsMNFGSQAQNPFL+oLDsVKKluLsTDGLGQuVLLKGYuuEGHDSGHPDY.GDIGpRhGGsKDhNTLIccGKEYNApFGVHINAoETYPEAKtFssDhl....DsssuhGWGWLDQuYsINpctDLpSG..pRFKRL.DhLtscsPDLDFIYVDVWssNQ......WsS+QLu-cINDhGWRLuTEaGGuhEpaSTWQHWAuDhsYGG.p.KGINScIhRFIKNHQ+DSWluNaPcVGGsAD+PLLGGaphtsFEGWQoR+Da-ua..I-shFssNLPTKFLQHYpITpWoNsEulc.Ksshpp.......hEssLKDsSsssslslpRK........DshRpRVhTLNGNsIh-Gt.GDcpYLlPWs.cQshKssoA-SEKLYHWNspGGTTTWTLP-GWpussTVYVYELTDLGRTcVKEV ...........................DpKVDWQDuAIAaRs..IMN......N...PpGhEcVt-lsshRIshNFuSQApNPFLhTLDslK+IsLtTDGLGQtlL...L.KGYsuEGHDSuH.sY.us.IG+RhGGhcD......hcs......LIEc.u+KYsAchGIHVNAoEoYPEuK...aF.NE....cIL..............sss..Y..ph...GWsWL..D..QuhpI.Dt.uhDLupG.....cLhch..c.h.K.....+....h....u...........s..s.LD...aIYVDVWGNutp....suWso+hLA...KEINpp.GWRhuhEWupuhEYDSsapHWAADhsYG...G..........h.TNKGh.NSsIsRFIRNHQKDsWlsD...hts.....s.PLLGGhsh..c-FEGWpG+....oDYNuY......lsNlFs+sl.TKahQHaslopWEs...G..s.ss.......ssp.....................hpsplVsssss...c.Vs...Voct.................Pphp.RTlTLN....Gpsl..........cG...............................................uuYLhPWs................sssp.-.KhYYaNspsGuoTWoLPssWAt...opVaLYcLTDQG+sc.t..ph................................................. 0 9 12 17 +12746 PF12906 RINGv RING-variant domain Bateman A agb Bateman A Domain \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild --amino -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -9.35 0.72 -3.92 26 1986 2012-10-03 15:03:13 2010-03-24 18:10:39 2 31 326 2 1289 1950 106 50.00 37 10.43 CHANGED CRIChpstspss........slhpPCpCpGolphVHpsCLppWlsp........psspp.....CclC ....................CRICh...pp.spppp............................Lh.p.PCp.Cp...........GolcalHp........sC....LpcWlpp......................psspp...............CElC............................. 0 402 693 1007 +12747 PF12907 zf-met2 Zinc-binding Coggill P pcc manual Family This is small family of metazoan zinc-binding proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild --amino -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.03 0.72 -3.99 13 251 2012-10-03 11:22:52 2010-03-24 18:12:56 2 6 164 0 171 237 2 36.30 39 40.41 CHANGED slhCclChpTFhsTsststLpEHA-sKHPK..sshpsCFPph .....hpCplC+...hphss.tp.....h+pHhEuKHPK..ss.................. 1 59 101 143 +12749 PF12909 DUF3832 Protein of unknown function (DUF3832) Coggill P pcc JCSG_target_391895_3k6q Domain This is a family of proteins from bacteria and archaea of unknwon function. The N-terminal part of the structure from Swiss:Q0AZ30 shows remote homology to the N-terminus of the bacterial toxin/antitoxin 'addiction module', and the C-terminus is distantly related to the TTHA1013/TTHA0281 superfamily. 21.80 21.80 22.40 22.30 21.50 21.20 hmmbuild --amino -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.86 0.72 -3.92 12 47 2012-10-02 16:06:15 2010-03-26 16:28:33 2 2 38 4 18 50 1 91.30 29 62.79 CHANGED L.pshphpsphht-csGpl.olslspl-..lhssusTh-pAhpcLl-sLh-YupDYhschphahp.uPNRppHaPYlh.................plhhps.sp-clppll ........hh..pshphssplhpEsDGol.olsLspl-..lhususoh--AhscLlcsLl-YAcDahs-hphahs.uPNR+cHhPYlh.................plhlpc.sc-plppll........................... 1 11 14 17 +12750 PF12910 RelB_N Antitoxin of toxin-antitoxin stability system N-terminal Coggill P pcc JCSG_target_391895_3k6q Domain This domain appears to be the N-terminus of the RelB antitoxin of toxin-antitoxin stability system or prevent-host death system. Together RelE toxin and the RelB antitoxin form a non-toxic complex. Although toxin-antitoxin gene cassettes were first found in plasmids, it is clear that these loci are abundant in free-living prokaryotes, including many pathogenic bacteria, and these toxin-antitoxin loci provide a control mechanism that helps free-living prokaryotes cope with nutritional stress [1,2]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.21 0.72 -4.60 97 364 2012-10-03 00:18:00 2010-03-26 17:37:23 2 5 297 4 132 2153 384 44.10 28 52.34 CHANGED M......pslsho-ARspLppllDpsspscpsl...IsR.p..sppsslllSh-php ...............M...tphNhp-A+spLopLl.-ps.t.p.Gc.p.sl...I..s..+.t....scssshllshp..................... 0 38 80 111 +12751 PF12911 OppC_N N-terminal TM domain of oligopeptide transport permease C Coggill P pcc Pfam-B_1473 (release 24.0) Domain Oligopeptide permeases (Opp) have been identified in numerous gram-negative and -positive bacteria. These transport systems belong to the superfamily of highly conserved ATP-binding cassette transporters. Typically, Opp importers comprise a complex of five proteins. The oligopeptide-binding protein OppA is responsible for the capture of peptides from the external medium. Two integral highly hydrophobic membrane spanning proteins, OppB and OppC, form a channel through the membrane used for peptide translocation. This N-terminal domain appears to be the first TM domain of the molecule [1]. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild --amino -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.74 0.72 -4.59 398 11398 2010-03-26 17:47:28 2010-03-26 17:47:28 2 16 3658 0 2635 7363 2094 54.70 23 17.25 CHANGED shoh.hp..-sW..+Rh++NKhAlluLhlllllllhA.lhu.Phl..uP....a...................sp.s.hs.......ht...hsPu .......................hp.....phh.......cphh...+...s+...hA.h.h.ulh.l.l...llll.l.hu...l...h...u...Phl........us........a........s.........st.s....hs............h.......sP.................................................... 1 740 1563 2104 +12752 PF12912 N_NLPC_P60 N-term_NLPC_P60; NLPC_P60 stabilising domain, N term Coggill P pcc Pfam-B_845 (release 24.0) Domain This domain, at the N-terminus, appears to be the stabilising domain for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The next domain is an SH3b1, the third an SH3b2 and the last, the C-terminal region, the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN). 21.90 21.90 22.00 22.00 21.30 21.50 hmmbuild --amino -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.57 0.71 -4.02 25 228 2010-03-29 10:40:00 2010-03-29 11:40:00 2 6 225 2 40 175 4 120.00 31 26.33 CHANGED hhhhlhsslLh..suCus+ss.s......................h.....tDlphhPQ-hpsYsps..........lhsttpptt.spcapp+aFuPWppspsphstp-lhhshpth...tpppu...YuENhp.hs.shhcshhpsAshcsa .................................................................hahshhllhh..suCupKshs...............................................pslShLPQ.spshshs..............................sccYp+haF.......uPWcss.hh.....s.....hps.....ps......lFWs.ashh...........hssps.......Yh.Ntp.hshuaacphIpNAslpca............ 0 14 33 38 +12753 PF12913 SH3_6 SH3 domain of the SH3b1 type Coggill P pcc JCSG_target_405735_3m1u Family This domain appears to be an SH3 domain of the SH3b1-type, and is just C-terminal to an N-terminal domain that is probably the stabilising domain for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The next domain is an SH3b2 and the last, the C-terminal region, is the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN). 20.90 20.90 21.00 22.50 20.70 20.20 hmmbuild --amino -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.41 0.72 -4.73 53 435 2012-10-02 18:48:24 2010-03-29 13:38:20 2 10 430 2 70 327 10 52.70 44 11.45 CHANGED PhFtsPp.sGpGaPF.DhhQpSslhsGoPVhlsHhSpDt....sWhaVpos.hshGWlcu .............hapsPp.tsEGhPF.DhhQpShLpsGTPVhlhHhS+Dp....cWtaVhos.sshGWVcS... 0 19 49 61 +12754 PF12914 SH3_7 SH3 domain of SH3b2 type Coggill P pcc JCSG_target_405735_3m1u Domain This domain appears to be an SH3 domain of the SH3b2-type, and is the second SH3 domain to be found, downstream of an N-terminal domain that is probably the stabilising domain, for the structure from Swiss:Q72DN3, PDB:3m1u, which is a four-domain protein. The last, the C-terminal region, is the catalytic domain of the cysteine-peptidase type, ie family NLPC_P60, Pfam:PF00877 (details derived from TOPSAN). 21.30 21.30 21.70 21.30 20.60 20.40 hmmbuild --amino -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.72 0.72 -8.20 0.72 -4.19 41 382 2010-03-29 12:38:44 2010-03-29 13:38:44 2 5 381 2 47 261 5 49.80 32 10.79 CHANGED scphlppapsh..p.ahsllp-psslh...-ppGpahhpu+lGslhPlhspstst .........spchlp.atpL...p.hhs.lpsplsla....stsGsaaFpuRhGslhPhhcpcts........ 0 12 30 39 +12755 PF12915 DUF3833 Protein of unknown function (DUF3833) Mistry J jm14 Pfam-B_259 (release 24.0) Family This is a family of uncharacterised proteins found in Proteobacteria. 25.00 25.00 25.50 25.40 20.80 19.70 hmmbuild --amino -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.69 0.71 -5.01 105 426 2010-03-29 12:42:56 2010-03-29 13:42:56 2 1 405 0 121 374 1121 163.30 40 91.88 CHANGED hhs.hhLsuC..uu..slsD.Ytsp.sPphcLcpaFsGclpAaGhlpchsGcVhRRFsVclsusW....c....GsphsLcEcFhYsDGp....pppRlWpLstsus....GpapGoAsDVlGpAp........GptsGs.AlpWpYslcLPl.....-usshpVshDDWMYLhc-ssllN+ophpKFGlcVGclsLshcK .....................h.hhhLsuC....uu..pls-Y..psp.pPphDLtpaFsGphpAaGhlQDhoG+lsRRFpVslpGph....-....Gs..phsLs.EcFlasD..Gc....pppRlWplp....+t.u-......spYpGs....AsDllG...pAp........GptsGN.AlpWpYshpLsl.....-usshpVpFDDWMYhhD-pplhNcophpKFGlcVGclTLhFcK........ 0 24 63 93 +12756 PF12916 DUF3834 Protein of unknown function (DUF3834) Coggill P pcc JCSG_target_400673 Family This family is likely to be related to solute-binding lipo-proteins. 20.80 20.80 20.80 21.10 20.60 20.30 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.34 0.71 -5.31 10 19 2012-10-03 15:33:52 2010-03-31 17:06:15 2 2 13 1 18 29 4 188.30 31 82.80 CHANGED M...........+llsAP..GPVSYPLIAu.hhcpcDl-I..hFuK.cGpuDV..VLDShVSLsKp...Gl+Ishsll+cLhsIhPclup.+IulWRKGSAADlLsRsllDlpsh+uElVYu-D.ppllcMLscGclsoAllu.ushupGcsF.E-Lhpphsl.lPGSCGAhlpsp..-cFIsAYpcGI-hh+pcPEpsA-YIucpLPhphspcFIcsll+pscas ...................................................llsAP..GPVSYPlIsu..hcpcDlcl...FsK...pspuDl..lLD...ShssLsKh...Gl+lshss...lpcLhhlhPplsp.......+IuVWR.+GoAADlLhRhllchpshc.......u...El.VYs-D.tplhcMhppGclpoAV.....l........s...u.hpc..G.th.E-hhs........hPGuCGs.s.ps.....p....pthlssYpcGI-hh+pcPpssA-hluppL..Phhhspchltslhcpsph........................................................................................... 1 4 7 14 +12757 PF12917 HD_2 HD_1; HD containing hydrolase-like enzyme Coggill P pcc JCSG_target_394139 Family This is a family of bacterial and archaeal hydrolases. 21.70 21.70 21.70 21.80 21.60 21.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.47 0.70 -4.55 3 447 2012-10-01 20:28:14 2010-04-01 14:05:03 2 2 442 3 51 577 15 210.00 60 97.87 CHANGED MGIHQYFQSLSDLENIYRCPGKFKYQEHSVAEHSYKVTSIAQFLGsVEEQAGNEVDWRALYEKALNHDYSELFIGDIKTPVKYATPELREMLSEVEESMT+NFIEREIPEcFQslYRahLKEGKDsTLEGKILAVSDKVDLLYESFGEIQKGNPENVFlEIYoEALATIYEFREMASs+YFLKEILPDMLAEKGIEKTELPQLTscIsScuL+.-- .........................................MGlHQYF.ppLSDhEplhRhPGcFKYhEHsVAAHSFKVTcIAQaLusV.E.....E.h...p...Gp.c.INW....K.uLYE.KALNHDauElF.hGDIKTP.VK.YAosEL+chhupVEEcMs.-sFIc.-...EIPtp..ap.-lY+p.R.L......p.....E....G....K.D..Do.L....E..G.....Q....I...L.SV..ADKID.LLYEoFGEIQKtNPE.plFhEIYc.S.L-TIhp.F.-.c.Ls..SVQ.FIppIlPEM.LsEs..F..hs+sc.LpchThsIlp......pcpt.................................................................................. 0 16 27 42 +12758 PF12918 TcdB_N TcdB toxin N-terminal helical domain Bateman A agb Bateman A Domain This is a short helical bundle domain found associated with the catalytic domain of the TcdB toxin from C. difficile [1]. The function of this domain is unknown, but it may be involved in substrate recognition. 27.00 27.00 27.20 27.80 24.40 24.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -9.05 0.72 -3.90 20 211 2010-04-04 12:49:17 2010-04-04 13:49:17 2 17 131 14 9 218 0 60.70 39 3.43 CHANGED scYsLhh-slcpc..l.slo-ppp.cphpplpclpctIspYpsh..pcKNS..ppGpsLLppQuplLppl .s.p.sLh.-slhpt..l.sho-p.hsKhh.pIp-lpclhspYpth.....sKNS..RpGlcLLpcQu-LLctl...... 0 3 3 6 +12759 PF12919 TcdA_TcdB TcdA/TcdB catalytic glycosyltransferase domain Bateman A agb Bateman A Domain This domain represents the N-terminal glycosyltransferase from a set of toxins found in some bacteria. This domain in TcdB glycosylates the host RhoA protein. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.83 0.70 -12.84 0.70 -6.02 14 376 2012-10-03 05:28:31 2010-04-04 13:55:39 2 24 176 14 22 503 17 327.40 31 26.98 CHANGED -KplHhlWIuGs.ss-sshsYIphahcs...sDYsahlWhDssAahssphspslhc.Ahstulpplccshs....sspcFhcch.clphch.................................................QcsFhNYshhps.pshs...D-hRhpYLppth.phsp-clppY.............l.cslschhupspscI+shc.h.phpctp.hphYppEhlhRWNhAAASD.lRlhhLKEhGGlYhDlDhhP.......uhsp-lhcsIpc.u...shh.-ssphccshs-ulh+htph..p.......hpshchspLsppspsplppllsphpphs..............clFpslsshhlp-h.s.....p.h.ppuhhssphlNshhlo+KsStssshlIpt.pptYp.Lpchhp...............................................................................................................................................................................................................................................................................................................................................................................................NshsspFhcolsshpshcp...hsh...p.hhhhuhhssYhpDuh...hPcAhSTLslSGPslhstshhchhp.ht..sc.hlhppth+..........................thsahpPpolhupsss.-cpSoWshsct+s ....................................................................................................................................................................................................................................................................lHhlWluGt...s-...pYhp.ahth.......pash.hahD.pAhhh.phpt.lhp.u.p.s.h.th.pt...................h..h..t...........................................................p.h.............D..hh.aL.p.h.th....p..tth.........................t....t...t......l..t.t........h.....................Y.......thh....R.h....N.hsuAoD.lRh.hLpphGGlYhDhD..hhP.........sht.pl.....h............................t......p.h..c.lhphh..........................t.p.s.l.tt...t.h.p.hhtphp................................................................................phF..ht....p........................hth.NthhhshtsS.h.phl..t.p.tY..l.t.ht................................................................................................................................................................................................................................................................................................................................................................................................p.......h.................................................................................................................................................t......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 3 4 13 +12760 PF12920 TcdA_TcdB_pore TcdA/TcdB pore forming domain Bateman A agb Bateman A Family This family represents the most conserved region within the C. difficile Toxin A and Toxin B pore forming region. 27.00 27.00 33.70 34.40 26.00 25.80 hmmbuild -o /dev/null HMM SEED 663 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.01 0.70 -12.88 0.70 -6.61 18 84 2010-04-04 13:21:11 2010-04-04 14:21:11 2 19 42 0 12 90 0 620.80 33 30.49 CHANGED hupchssAspcLppcppLsppWlslhsolccpspstYplpFlN..cssspopalpTp-ppFhcapcahpcph.sslspthhh..psGclhtc...tsls-spsVssLNAuFhIQoLIpa......p.suppts.ssLusALKlpsYlshsQhuhGslpDssclspLlppAhptphphhsTt.......hpsh........hupssspGlullhsGsslGhsIaELupAcsslp+shhuTpLuhsusuhssusuulshuhhuu............ussuulLGshuV.luGlulGlsuLspsauhluEcA+uVucYFsplspuhppsua..phpsppp...llhPhssuVlsplDhpsNplpasoptIaps....tpGpotsGphsaFF...........ssssh-cpp.ulsIppulGhsppphphsh.spupshlLPssPpp.hhpYEYshlPGspopcssGhclL...................................++lcpsp.p......F.acFYsFPu-a..sIspl+.cYhsTslclhLDpcsRpLlVPplsc-h.+s+LpYclpGsGGpYpLsLsst.splplss.......ss......psspWIIDsspl...cssoIp..pssplhlGslhhcl..shs.ppshlhhspp-Ip..........................pl-hsspphpl......llutctpphps...............sspslpp....+lppLuhsppLpsphsh......spsh.....ptptpshspsaaDssppchla...h.DssstpsptshhsslssshAahhststtslWtlD ..............................h.hcplssuhhcLpppppLs.cpah.sFpslpc..ss..p...taplpFIs..cpstcshaVpTccphFscatpalscpl.uplppphh...ssGclhtp...hslspsp.......tVssL.NAAFhIQoLIpa............suspts.ssLusAhKVpsYspLhphuhsslpDus+lVpLlpsAlppshsllsTh........pul.........................s..slusllsGl...slGhsIhELucspssl.+t.htspluh.usshssussulh...suu............hshuushuhhhVPLAGlusGl.suLlps.hhl.t-cAppVscY......Fsclshu.ppGsa.....ph.-.cp.......lhhP.sslVIocIDh....sss..p...lhhsp..ppIa+h....soGpTsosslsaFF...........susshshpp..tlsI........hssltlpppphsh.......sp......clhlLPssPpp.hhsaEhsh.hPGh..pohcssGhclL...................................cRlc-ph.c......FhacaauF.u-h......hIspL+.cY.-TsIcIpLDspsRphIlPhlsp-..........h...+pKLSYphhG...uG..GpYtlsLsph.hslslph.........s......csshWllDscpl.....cssoI....pssplphGsll.sl..pl...shpcstlllsppcIp..........................cVDhhs.p.phpl......llus-hphhh................sspplpp....+lshluhssclptshsh......spp......spppt.hspth.-ssh.phla...hpssp...t..hs.hu..hpshpshsh..hthhsuhahhD.............................................................................................................................................................. 0 2 4 9 +12761 PF12921 ATP13 Mitochondrial ATPase expression Wood V, Coggill P pcc Wood V, manual Family ATP13 is necessary for the expression of subunit 9 of mitochondrial ATPase. The protein has a basic amino terminal signal sequence that is cleaved upon import into mitochondria [1]. 21.00 21.00 21.00 25.30 20.90 20.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.38 0.71 -4.78 14 101 2010-04-06 09:10:04 2010-04-06 10:10:04 2 3 99 0 74 100 0 128.60 24 20.08 CHANGED spphhssllhuhua.GplphlcphlcplWsIsspsp.tt...........hstssshaPopclLhullsua.shspslpsAlpll-pF.cpYs.lcls....cphWccLhpWuthhhsh+tssttphhttshthhtph.pss ............sEphhsslhhuhu+tGplptl.cphl.c.plW.sIsssth.ppst...............ht.ssslaPos+lLhAlspuF.usN.scl.tAlpllDhluppYs..lsls....pphWpcLhpWoalhos.chs.t.ptp....t..........h....................... 0 11 33 62 +12762 PF12922 Cnd1_N non-SMC mitotic condensation complex subunit 1, N-term Wood V, Coggill P pcc Pfam-B_15091 (release 24.0) Domain The three non-SMC (structural maintenance of chromosomes) subunits of the mitotic condensation complex are Cnd1-3. The whole complex is essential for viability and the condensing of chromosomes in mitosis. This is the conserved N-terminus of the subunit 1. 21.50 21.50 23.10 22.60 21.20 20.50 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.85 0.71 -4.63 44 291 2010-04-06 09:13:56 2010-04-06 10:13:56 2 14 238 0 204 298 1 161.40 25 13.28 CHANGED LssphppplhpLlsSulsshupplps.lpss.................-shspa+ptLEhYuaLlhhllphlp.......................t...ts..tt.hssttppppsspssstpWchs.splpphLpslsplLc.lcLs+lFhTos-+DpFls.LFoRsha.hlhEspth..hKspsl+hhlF+lluhuVK+Hupuhssp ......................................................thhpphh-hlhpslushuptlts.lpss..............p..........tsshspat..phLchhs.aLLthhlpshE.........................t.stps..sp...hstt......sp..ppptps....spspsa......chp.sphpthLphls...plLp...L............cLp.+l.atsos..c-pFls.LhTcssY.tlLE.s..ph......hKspsl+.tlh+lLshul++asHhhus.................... 0 68 113 170 +12763 PF12923 RRP7 Ribosomal RNA-processing protein 7 (RRP7) Mistry J, Wood V jm14 Pfam-B_1057 (release 24.0) Family RRP7 is an essential protein in yeast that is involved in pre-rRNA processing and ribosome assembly [1]. It is speculated to be required for correct assembly of rpS27 into the pre-ribosomal particle [1-2]. 27.00 27.00 31.00 29.20 26.60 26.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.93 0.71 -4.13 61 327 2010-04-06 11:06:40 2010-04-06 12:06:40 2 9 269 0 220 324 2 124.30 31 42.70 CHANGED GhpcahppYcpp.hss..ptLppplcpahpta-...pccppppcpt.........tp.s.sDE.DGassVs+s..t+t.............sshspsp...htppttttptppc+c+Kphps..FY+FQhR...Ep++pclt-Lh+KFE-DKc+lpthKtpR.+F+Pa ........................................................ttahp.paptp..h.s......ptLppplcpaMpta-pcptpppcct.........ttp..shsDE.DG...alpVs+p...s+p..............................sshspst...........tspt.pht....tptpp.++..c+Kthts..FYpFQhR...Ep++pclspLh+KFEED+p+lp..hh+tpR.+F+Ph...................... 0 71 119 181 +12764 PF12924 APP_Cu_bd Copper-binding of amyloid precursor, CuBD Coggill P pcc manual Domain This short domain, part of the extra-cellular N-terminus of the amyloid precursor protein, APP, can bind both copper and zinc, CuBD. The structure of Cu2+-bound CuBD reveals that the metal ligands are His147, His151, Tyr168 and two water molecules, which are arranged in a square pyramidal geometry. The structure of Cu+-bound CuBD is almost identical to the Cu2+-bound structure except for the loss of one of the water ligands. The geometry of the site is unfavourable for Cu+, thus providing a mechanism by which CuBD could readily transfer Cu ions to other proteins. 26.40 26.40 28.10 28.10 24.80 26.30 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.24 0.72 -3.46 17 340 2010-04-07 17:32:00 2010-04-07 18:32:00 2 18 84 23 111 306 0 57.50 64 8.54 CHANGED pCpF.Hhcph...shCpsapcW+ssApcsCps......cshpL+SauMLLPCG.lDhFpGVEaVCCPs .............KC+FhHQERM...DhCEoHh......HWHTVAKEuCup......cuhsLHs.....YGMLL.......PCG.lD+FRGVEaVCCP...... 0 20 27 61 +12765 PF12925 APP_E2 E2 domain of amyloid precursor protein Coggill P pcc manual Domain The E2 domain is the largest of the conserved domains of the amyloid precursor protein. The structure of E2 consists of two coiled-coil sub-structures connected through a continuous helix, and bears an unexpected resemblance to the spectrin family of protein structures.E 2 can reversibly dimerise in solution, and the dimerisation occurs along the longest dimension of the molecule in an antiparallel orientation, which enables the N-terminal substructure of one monomer to pack against the C-terminal substructure of a second monomer. The high degree of conservation of residues at the putative dimer interface suggests that the E2 dimer observed in the crystal could be physiologically relevant. Heparin sulfate proteoglycans, the putative ligands for the precursor present in extracellular matrix, bind to E2 at a conserved and positively charged site near the dimer interface [1]. 21.90 21.90 22.30 23.00 21.50 21.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.24 0.71 -4.73 11 385 2010-04-07 17:34:15 2010-04-07 18:34:15 2 22 91 16 118 340 1 175.40 59 27.13 CHANGED sssssPsss.sul.DsYFppsssp........sEH..........ppFpcA+pcLEp+HRc+hscVMK-WpEAEcphcsL.+uD.......+pplhp+FQpplpoLEpEusuERppLlETHttRVtAhLN-+RRtAlEsYhsuLpssPPcs++lLpAL++YlRAEpKDRpHol+HacHlpps-P....E+Auph+splhpHLclI--RhNQSLsLLh ....................................h...sTsts.T..DsV.DhYhEsPuD-........NEH..........A+Fp+AKEpLEt+HRcRMspV..........M+EWEEAE...pQAK.NL.P.KAD.......+pslIQHFQph...VcuLEpEuAsE+Q.QLVETHhARV.EAhLNDRRRhAL.ENYlsALQ.u..s..PPRP++Vh.phL++YV......RAEp.KDRpHTL+HapHVhhVDP....cK.AAQh+s..QVh..THL+VI-ERhNQSLoLLY............. 0 21 30 66 +12767 PF12926 MOZART2 Mitotic-spindle organizing gamma-tubulin ring associated Coggill P pcc [1] Family FAM128A and FAM128B proteins have been re-named MOZART2A and B. The name MOZART is derived from letters of 'mitotic-spindle organizing proteins associated with a ring of gamma-tubulin'. This family operates as part of the gamma-tubulin ring complex, gamma-TuRC, one of the complexes necessary for chromosome segregation. This complex is located at centrosomes and mediates the formation of bipolar spindles in mitosis; it consists of six subunits. However, unlike the other four known subunits, the MOZART proteins, both 1 and 2, do not carry the conserved 'Spc97-Spc98' GCP domain, so the TUBCGP nomenclature cannot be used for it. The exact function of MOZART2 is not clear [1]. 23.30 23.30 23.90 30.60 23.20 22.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -10.09 0.72 -4.40 12 71 2010-04-08 11:58:30 2010-04-08 12:58:30 2 2 50 0 43 73 0 86.80 47 58.46 CHANGED Mu...............................s.h.ph.hpptpsLss-Es..ELaELsphAGlslDs-VF+..........................lllDLl+hNVuPhAlhQhLKShCuupths.t......sssspusuh ....................ss..............s.........p....ushQKhul...++KKVLosEEh..ELaELuQAAGsslDP-VFK..........................ILVDLLKLNVAPLAVFQhLKSMCAGQRlusp......st-ssuhs.h..................................... 0 13 17 27 +12768 PF12927 DUF3835 Domain of unknown function (DUF3835) Wood V, Coggill P pcc Pfam-B_14632 (release 24.0) Domain This is a C-terminal domain conserved in fungi. 21.00 7.90 23.20 8.20 20.60 7.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.90 0.72 -3.02 55 188 2010-04-13 08:34:32 2010-04-13 09:34:32 2 10 114 0 149 197 0 71.90 25 17.68 CHANGED lluDtllE+s..............ssstchspphhpcpls.pahch+pthh.t.t.....................................pssPtKhSRFKuu..Rhs ................................................................................................lhcp.......................thp.thh.ppls.....pahch+pphlppps.t........tttpt.....................sl-c................psss.c+...lSRFKuuRh.... 0 35 78 128 +12769 PF12928 tRNA_int_end_N2 tRNA-splicing endonuclease subunit sen54 N-term Wood V, Coggill P pcc Pfam-B_644 (release 24.0) Domain This is an N-terminal family of archaeal and metazoan sen54 proteins that forms one of the tRNA-splicing endonuclease subunits. 21.10 21.10 21.10 21.40 21.00 20.80 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.29 0.72 -4.51 49 270 2012-10-01 20:02:48 2010-04-13 10:50:06 2 9 232 0 196 267 0 78.90 30 17.41 CHANGED MassL.shsRhpph+shshuhap..s....................................................ppptspVsps+GpaapohG..............hs...pts+ha..LhsEEALYLlERGoLplhhss ..................................................phL.s..Rhp..p....p.shs...huh.ah..P...................................................................cpthspl..h.ps.+GpaapohG..............................hs........ppG+ha...LhPEEALYLlEpGolplha..s..... 0 58 101 158 +12770 PF12929 Mid1 Stretch-activated Ca2+-permeable channel component Wood V, Coggill P pcc Pfam-B_1244 (release 24.0) Domain MID1 is a yeast Saccharomyces cerevisiae gene encoding a plasma membrane protein required for Ca2+ influx induced by the mating pheromone, alpha-factor. Mid1 protein plays a crucial role in supplying Ca2+ during the mating process. Mid1 is composed of 548-amino-acid residues with four hydrophobic regions named H1, H2, H3 and H4, and two cysteine-rich regions (C1 and C2) at the C-terminal. This family contains the H3, H4, C1 and C2 regions. suggesting that H1 is a signal sequence responsible for the alpha-factor-induced Mid1 delivery to the plasma membrane. The region from H1 to H3 is required for the localisation of Mid1 in the plasma and ER membranes. Trafficking of Mid1-GFP to the plasma membrane is dependent on the N-glycosylation of Mid1 and the transporter protein Sec12. This findings suggests that the trafficking of Mid1-GFP to the plasma membrane requires a Sec12-dependent pathway from the ER to the Golgi, and that Mid1 is recruited via a Sec6- and Sec7-independent pathway from the Golgi to the plasma membrane. 28.60 28.60 29.10 29.00 27.80 28.50 hmmbuild --amino -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -13.04 0.70 -5.76 34 145 2010-04-13 13:17:33 2010-04-13 14:17:33 2 5 138 0 108 145 0 402.80 31 66.37 CHANGED hhlalSsNhCppPs.........tssss.spLplYhS....hspss.tstssuphph....hcsGYhpshhp.sss................................slYIuVpu...............sssos................upasYclusSp.sshhapa-s..csalhhVDoDssuALLsTsNLTssstssts.pphht...............s.Yolasa..shs.cs.ths..uLppSaCAlpssspl.......hsstshps.......................ShTsRus......u....+pQFYlsGLNsSosYhuaLs..............stshsssGGtVapshpFsT+s.................sssCuLlasLsFCspVAYuVPusssh...............stspLsphYDshAtulYpNFohuLpQlsCsssscupYS.h+sCsDCssuYKsWLCAVoIPRCsshsss.salp..........................................tspssoRNshIschIpP.ssYhElLPCh-lCaslVRsCPushGFsCPs....s.hlstSYshcs...sss.hTCNYlGssh ..............................................................................................................................................thlalSuNhCtpPt...................t.stssspLplYhS............hpps....s.ts....ts....sp.sh......Fp.pGahth.hs..sss................................slYlultu...............sss.sp...................................................................................hsu.asaclusS..sshhaphcs.....pshlhhlDoDssuALL.TsslTss.s...st.........pph....s...............s.ashash....shp.ss.thp..GlppShCulpststl........htstshps................................................................uhTpRG........................s.....+pQFalsGLNsSosYhuhLs............pt.....tss.shsssGGtVapshpFpT+s........................sssCtllasLsFCspVAYuVPussph...............ststLuphYDshApshYtNFopuLpplsCsssspshYShspsCsDCtpAYKpWLCuVoIPRCs-h.sss....salt.............pt...........................................hspssoRNs.hI.s..phIpP..usYhElLPC.-lCaslVpsCP.ushGFsCPp....sthhp.SYshtt.....sss..oCsY.Gss................................................................................. 0 27 59 91 +12771 PF12930 DUF3836 Family of unknown function (DUF3836) Finn RD rdf Manual Family Family of uncharacterised proteins found in Bacteroidales species. Test. 25.00 25.00 26.80 25.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.66 23 146 2010-04-13 15:35:07 2010-04-13 16:35:07 2 1 61 1 16 108 1 125.20 32 86.98 CHANGED MKspslhKsllhsuhhhsoslsssAsu....p.ssaIYNpEppsGhlsucTlaKh-.G..phLppah+YsYpYD-psRlopKcAhKWNss+spWpshaplsYs.Yssp..-lsssYucWNp++psYshshppoVY.phDs ..................MKspshhKsll.h.sAhlhssshssssps....t.sshIaNp-.p.psshllupslY+h-..G....ptLspahKYsYpYD-ppRhspKcAhKWsuscppWts..hplsYs..Yssp...plshsYscWNsc+psashsschoVh.h.s.s............ 0 2 14 16 +12772 PF12931 Sec16_C Sec23-binding domain of Sec16 Wood V, Coggill P pcc Jackhammer:O14029 Domain Sec16 is a multi-domain vesicle coat protein. The C-terminal region is the part that binds to Sec23, a COPII vesicle coat protein. This association is part of the transport vesicle coat structure [1]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.99 0.70 -4.67 37 648 2010-04-13 16:54:12 2010-04-13 17:54:12 2 27 269 2 424 664 2 240.20 24 17.51 CHANGED plhshL.s....Gc+-+AlhhAlcpR.WuaAhllu......Ssls+sh....WppVlp-Fl+.c-h.p.suss.......h...p........LuslaplFuGNhcpul-E.Lsss..................s.t..hs.ts...............W+-sluhlLsN.....+ss.p....sp..pulhs...........lGcLLsphGphtAAHlCalhA...tssh........s.sts..h...h..s..ss...s...shh.hlG...........h.ps.cuhLho........ElYEauhol........ss...h.....s...hPHLhs....aKLhHAhsLA-hGhpscAppYCDsIuusLK.uts+pS........h.hph...pLh..ppLp....c.lusph....p....s...s....s...o...SWl....SKPsh-KV.Gpl.tpFspFluGD ......................................................................................................h.hpphLlhGph..c.pAlphshcpphaucAlllA...pth...spch...atp.shp....pahppp........t..................p................l.pslht.....lhusph...ps.h.ps...hs..................................................hts...........................................W+ppLAhl...lo.s.........tss..s........p.t...ps.l.ss...............LGc.pL...t...p...c.......G.........h...h...p...uAphCYl....h..A.....ts..sh...................t........h.....h...s...p..p......sphshlu.......................................ph.hs.....csl.hs.........................................E.hhEaspsl............ss.p.h....hlssh.s...hK.h...YAphL.A-hG.hhs..pAhpYhphl.t..h..............................................................................................................................................h............................................................................................................... 0 106 230 333 +12773 PF12932 Sec16 Vesicle coat trafficking protein Sec16 mid-region Wood V, Coggill P pcc Jackhammer:O14029 Domain Sec16 is a multi-domain vesicle coat protein. This central region is the functional part of the molecules and thus is vital for the family's role in mediating the movement of protein-cargo between the organelles of the secretory pathway [1]. 23.00 23.00 23.30 23.90 22.80 22.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.54 0.71 -4.14 58 342 2010-04-14 13:53:08 2010-04-14 14:53:08 2 8 218 2 231 328 0 110.60 28 6.82 CHANGED stPlhpaGhGGpllsshP...s.................st.sshhpsssuplclpslpsllsssp.................thppFPGPLhst.................KoKKKDlhpWLpspIsthppt...............................stpcpc-chLLWclLclhlc.psGs .............................ashhpFG.GGpllpshP..........................................s.stp.spsu..Vclcshcsllppst..t...................phpsF..PGPLhpt.................cs+KcDlhpahppphsphtps...............................phtcpcsphLLWplLhlLscpsG........... 0 52 104 174 +12774 PF12933 FTO_NTD FTO catalytic domain Bateman A agb Jackhmmer:Q9C0B1 Domain This domain is the catalytic AlkB-like domain from the FTO protein [1]. This domain catalyses a demethylase activity with a preference for 3-methylthymidine. 27.00 10.00 27.20 15.80 26.00 9.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.79 0.70 -5.24 8 102 2012-10-10 13:59:34 2010-04-14 15:13:07 2 3 51 1 40 84 3 255.40 57 61.02 CHANGED Ds-.FtpshpssYsGFllDsPssLPscLHc-VppAFcoht+cGpFh+DlVpsGsK.lohT.VSRsLlG-pGhTY+Y.cLRLFAhPWss--p.......................chttshcshpRLN-hLspcopphLcchsssph.ssshpupCE..........................aNVTLINhM-P.ptpspstLK-EshFGMGK...hSVSWHpDSuLp-pSTVAVYppost..cs-...............cssWpVAL+.uhDttT.......PuLtlPLcstssYYMhcDFNuTHcHAVLsGsos.RFSSTHRVAhs ...................Ds-.Fap.Wph+YsKLllRcusolPE-LHccVQcAFLTL+KHGChF+DLVR...IpGK..DlhTPVSRlLIGsPGCTYKYLNTRLFTVPWPscGsphKYsp........................................s-ItsACpshlKLNDYLphEolpALct.s....ht-p...ppss.s..s.t....thh.ts.............................tp-.hshpsRsuYNlTLLNaMDP...tpMsYLK-EPYFGMGK...MAVSWHHDE..NLV-RSsVAVYsYSCE...s.-tpspp.....................................................phcGRDPshWHVGhKluWDIET.....................PGLAlPL+QGDsYaMLDDLNtTHQHCVLAG.pP.RFSSTHRVA-C....................................... 0 8 11 20 +12775 PF12934 FTO_CTD FTO C-terminal domain Bateman A agb Jackhmmer:Q9C0B1 Domain This domain is found at the C-terminus of the FTO protein which was shown to be associated with increased BMI and obesity risk in humans. The N-terminal domain of this protein is a DNA demethylase and this domain is found to associate with the N-terminal domain in the crystal structure [1]. This domain is alpha helical with three helices that form a bundle [1]. 27.00 27.00 31.20 31.10 18.20 23.30 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.23 0.71 -4.69 8 94 2010-04-14 14:20:04 2010-04-14 15:20:04 2 3 55 1 41 75 2 134.40 57 31.48 CHANGED sTapYI+sRCppALpslsshpsps..........s.thpsh-ssslphhtElHpEVEFpWlR.FWlQGsRHAppHc.YWpptIAELspuWcpMEtshphlLsc.l+pusp.......os-ppscsasslLhsLpEpp-LRcEastRspusAatsLPsDQ+PsshP........taD-ss...PLPFDLcsVIspLcppp .......................GTL-YIhpRCplALQNlps-scss..........shSLKShEsuVlKQsEEIHN..EVEFEWLRQFWFQGpRYpKCTD.WWspPMspLEthW+KMEthophlLpp.l+pct...........................................s.cppschhsslL..LptRppLRpEWttRppsphhptlP.pptP.shP........happ.pp...PLPhsLp.hl.tlpt..h.......................... 0 7 10 20 +12776 PF12935 Sec16_N Vesicle coat trafficking protein Sec16 N-terminus Wood V, Coggill P pcc Jackhammer:O14029 Domain Sec16 is a multi-domain vesicle coat protein. The overall function of Sec16 is in mediating the movement of protein-cargo between the organelles of the secretory pathway. Over-expression of truncated mutants of only the N-terminus are lethal, and this portion does not appear to be essential for function so may act as a stabilising region [1]. 20.30 20.30 23.60 23.60 17.00 16.60 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -12.08 0.70 -4.50 14 66 2010-04-14 15:49:59 2010-04-14 16:49:59 2 2 59 0 47 69 0 218.80 25 12.80 CHANGED sshWss...tussp-sstcFFsQlpoQTKPI.............ahPsE.sE....SRaEEGhPLlDpsst.........ssPs-pst.pssplDslFstDcs...............s-sutFFsphpps.s.p.tpsssplpRKsToQVLsShpsst..s...uPhS................sPsup..asphLs..ssstp..ptp..cs.S..--sLutp...E.....h.p...p...sE--..............LAtRWpAhL-sD..D.DLLl-D-h.t........t.tph.stss...stssttsls.....SPhsosp..s.spPhhts..sYTP..............H .........................................t.....t....s...ptt..F.sp.psQshPh.....................................hs.E.s-....uRaEEuhPLlssspt...........shppst.ttstl-shFptDts...............s-pusFFsp.t.t......p.....pp..s.sshp+KsT.QVLsohphpp.....p.ushs...............ttst.sp.....p.sp.l....ststp...hp....p..p..t-phttt..........................tp...................sE--..............LAp.+....W.p....AhLts..D..D..DlLlDD-h......................ttp..stss.....ss.s..h.s.....p.hsspp..s...tPh.t.......YsP......................................... 1 6 15 32 +12777 PF12936 Kri1_C KRI1-like family C-terminal Wood V, Bateman A pcc Pfam-B_8372 (release 7.7) Family The yeast member of this family (Kri1p) is found to be required for 40S ribosome biogenesis in the nucleolus [1]. This is the C-terminal domain of the family. 25.00 25.00 37.90 37.70 20.90 19.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.13 0.72 -4.04 58 317 2010-04-14 16:07:41 2010-04-14 17:07:41 2 6 275 0 236 318 6 94.60 36 14.27 CHANGED hccph-chl-EhapL-h--hl.........ssthssR........FKYRcVsPps.FGLosc-ILhA.DDppLNpaluLKKhAsYRscctch....+-p+chp++t+hcch++csh .................p..cphEphl-c.hhpLDhE-ll........sc.hssR..............F+YRcVsPss.FGLosc-ILhA.DDcpLNpasuLKKhusYRscctch...........c-p+phpcKt..c.pph+cph......................................................................................... 0 84 133 196 +12778 PF12937 F-box-like F-box-like Wood V, Coggill P pcc Pfam-B_22368 (release 24.0) Domain This is an F-box-like family. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild --amino -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.40 0.72 -4.23 369 8154 2012-10-02 00:56:31 2010-04-14 17:42:23 2 360 366 19 5916 10186 51 47.30 25 8.94 CHANGED hspLPsElllpIh.................s....h.....L.s..sp..-.........Lhp.s.u.....tVC+pWppls..tcs.tL.W+chhhp .................................................tLPsE..l.l.h.p.Ih................................................................................................s.........a.......................L..s.....sp.......s..............................L.hp..s..u.................tVC...+p.Wp.pls....tss...l..Wpph...t................... 0 2058 3220 4628 +12779 PF12938 M_domain M domain of GW182 Coggill P pcc Pfam-B_71410 Domain \N 21.90 21.90 21.90 23.90 21.50 21.40 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.84 0.70 -4.55 11 119 2010-04-14 16:45:07 2010-04-14 17:45:07 2 12 69 0 70 104 0 239.10 38 16.86 CHANGED sHsu.u.tsshussc..........asuGuu..suhsFspNs.....sshuusssssGsG..ssNssls...ulssh......caLs..ps..............ushss.usuGupssGuushsustss...t.s.....tpu.QPSspQlRhLVQQIQhAVpsGaLNsQILNQPLsPQTLhLLNQLLs.pIK.......................pLQtsQpsLpRttsu...........................pshplsltIsphKppIssLQNQIss......pQulalK....t.....t.....................t..hupuusshhtspsshssLpsph ..........................................................................................s.tts...t.sa..p...........uuhh...ttsssu..hsssshp......u..suhs.s.tth.thhst.thQs.........................................Ghhusuuuupupsh..p.p...ss..t...PsstslsusQPs.........hpupsPphh....S.ph.AQ.LQhAs+shtLNsuLLTpP.Is.PQphshLNQLhQ.Qls.......................+LQl..Q.QQhLQs.ptNsot.............................cppppQVuhsIsshpQQIQQhQpQlA........QALhhKQ............................p.shpssuu+SuhDsF....s.s+sphsuL.s...p.................................................. 0 16 24 51 +12780 PF12939 DUF3837 Domain of unknown function (DUF3837) Finn RD rdf JCSG_target_388609 Domain A small, compact all-alpha helical domain of unknown function. This domain is currently only found in Clostridiales species. 25.00 25.00 28.70 31.00 23.50 21.80 hmmbuild --amino -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.31 0.72 -4.02 6 18 2010-04-22 13:29:43 2010-04-22 14:29:43 2 1 18 0 3 18 2 102.20 34 95.09 CHANGED MlohIsKQAlslKsphp..AshsuNYEhYYAlGlhuKuhslshsctchs.hEL+-pL-pclct.p..PtDEp.phLhtlLpcac.sDDsaDtQMhELhpaGhpscp MlopIA+QulhIKsp.hp.puslTuNYEhYYAsGlluKhhGlshs-......chpshELp-pLppclcphp..PpDEp-+hLhplLpsYc..s--shDpQMhELlpaGhp-p......................................... 0 1 3 3 +12781 PF12940 RAG1 Recombination-activation protein 1 (RAG1) Mistry J jm14 jackhmmer:Q6U1Q1 Family This famiy contains recombination activating protein 1, which is the catalytic component of the RAG complex. The RAG complex is a multi-protein complex that mediates DNA cleavage during V(D)J (variable-diversity-joining) recombination [2]. RAG1 mediates DNA-binding to the conseved recombination signal sequences (RSS) [3]. Many of the proteins in this family are fragments. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.50 0.70 -6.13 3 12026 2010-04-22 13:54:52 2010-04-22 14:54:52 2 13 8629 0 36 11047 0 335.50 71 76.21 CHANGED LLPGYHsFE........WpPPLKsVSsSTDVGIIDGLS.GhssSV-Easl.-sIu+RFRYDAALVSTLKDLEEDILEGhpcQGL--su.StlFTVVlKESCDGMGDVS.EKHGSGPhLPEKAhRFSFTIMsIoVK+cDGsslpVFEEsKPNSELCCKPLCLMlADESDHET.TAILGPllAEREAMKsScLhLEIGGloRsaRFIFRGTGYDEKLVR-VEGLEASGSsYICTLCDATRsEAuQNLVhHSITRoHoENL-RYEhWRoNPYNESsDELRDRVKGVSAKPFlETpPS...IDALHCDIGNAsEFhKIFQcEIGEVY+..NsNsTKEERK+WQotLDKpLRKKMNL+PlMRMNGNFARKLMTcETVEAVCELVpsEERREALRELlcLYlpM+PVWRuspPAKECPDLLCpYSa+SQRFAcLLST+FKYRY-GKITNYLHKTLAHVPEIIERE ...................................................LPGaH.FE........WpPsLKNVSsshpVGIIsGLS.Ghs.SlDDhPs.DTIs+RFRYDsALVsALKDhEE-IhEGh+tpsl-D.h....susFoVllKESCDGM.GDVS.EKHGuGPAVPEKAVRFSFTlMsIo.........l.....................h.......t.s..............p............-.............s.................l....p.............I.FpEsKPNSELsCKPLCLMhsDESDHETLTAILuPllAERcAMKpScLlLphGGl.RoF+FhFRGTGYDEKhVREhEGLEASGSsY...lCTL..........C.DuTRhEASQNhVhHSITRSHsENLERYElWRoNPapESsDELRDRVKGVSAKPFhET.Po...lDALHCDI....GNAsEFY+IFQ..EIGElYp.......pss...so+EER+pWpssLDKpLRKKhpLKPhMRMsGNaAR+LMotEsV-sVCELl.sEE.Rp.ALpcLMpLYlpMKPVWRuosPs+-CP-.lCpYSapSQpFA-lLuo.FpYRYtG+ls.................................. 0 8 10 17 +12782 PF12941 HCV_NS5a_C DUF3838; HCV NS5a protein C-terminal region Mistry J, Bateman A jm14 jackhmmer:A1YSL9 Family This is a family of proteins found in the hepatitis C virus. This family contains the C-terminal region of the NS5A protein. CC The molecular function of the non-structural 5a protein is uncertain. The NS5a protein is phosphorylated when expressed in mammalian cells. It is thought to interact with the ds RNA dependent (interferon inducible) kinase PKR, Swiss:P19525. 25.00 25.00 29.00 28.90 18.90 24.90 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.77 0.70 -4.80 17 15967 2010-04-22 15:12:44 2010-04-22 16:12:44 2 30 112 2 0 9643 0 185.50 77 38.13 CHANGED SHITAEsApRRLuRGSPPSLASSSASQLSAPSLKATCTspssHPDAELIEANLLWRQEhGuNITRVESEsKVllLDSF-PLsAEh.DDRElSVuAECaRPsRPKFPPALPIWARPDYNPPLlEsWKtPDYcPPsVpGCALPPts.PPVPPP.RRK+sVpLDESsVSpALApLAcKoFspsssssct.SssGhsssosssssssstsDssSDssSaSSMPPLEGEPGDPDLSSGSWSTVSpE-D....VVCC ...................SHITAEsAtRRLARGS..PPSlASSSASQLSAPSLKAT....C.....TT...+....H....D....S.....P....DA....DLI.EA....N....L....L.....W....R....Q....E.M....GGNITRVESENKVVILDSFDPLpAEE.DE.RElSVPAEILRKSR.+FssAhPlWARPDYNPPLLEoWKcPDY.PPVVHGCPLPPspuPPlPPP.R+KR.TVVLTESTVSoALAELATKoFGSSpoSu..............hs.sssssss.......................................................................t.S.-.s.t......................................................................................................................................... 0 0 0 0 +12783 PF12942 Archaeal_AmoA Archaeal ammonia monooxygenase subunit A (AmoA) Mistry J, Finn RD jm14 jackhmmer:A7U5N3 Family This is an archeael family that contains ammonia monooxygenase subunit A. Ammonia monooxygenase is an enzyme that oxidises ammonia to nitrite and nitrate, thus playing a significant role in the nitrogen cycle. Ammonia-oxidising archaea (AOA) are widespread in marine environments [1]. 25.00 25.00 33.70 33.50 20.80 18.20 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.03 0.71 -4.50 8 13679 2010-04-22 16:07:26 2010-04-22 17:07:26 2 1 283 0 2 13456 21 177.80 85 90.34 CHANGED VAsNSTLhTINAGDYIFYTDW.....AWTSFVVFSISQShMLsVGAhYYhsFTGVPGTATYYAhIMTlYTWV...AKGAWFA.LGYPYcFllsPsWlPSAhLlcLuYWAT++NKHuhILlGGsLhGLSLPLF....NMVNLLhVpDPLEsAFKYPRPTLPPYMTPIEPQVGKFYNSPVALGAGAGAVLoVshAALG ..............VAVNSTLLTINAGDYIFYT.DW.....AWTSFVVFSISQohMLsVGA.....sYYL.hFTGVPGTATY.YAhIMTlYTWVAKGAWFA.LGYPYDFIVsPVWIP.SAMLLDLsYWA.T++NKHu.LILhGGsLVGhSLPLF....NMlNLlhVtDPLEsAFKYPRPTLPPYMTPIEPQVGKFYNSPVALGAGAGA..VLoVshAALG................. 0 1 1 2 +12784 PF12943 DUF3839 Protein of unknown function (DUF3839) Mistry J, Finn RD jm14 jackhmmer:A2DDZ9 Family This is a family of uncharacterised proteins that are found in Trichomonas. 20.00 20.00 22.40 21.80 18.60 18.00 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.70 0.70 -5.21 3 2523 2010-04-23 10:20:04 2010-04-23 11:20:04 2 6 1 0 2523 2523 0 188.50 85 78.54 CHANGED TTuPNSLVMNP.TSMLVEMKsFIPSSFTFETEIQKIKQELLTSsLDCoAKDETNEQYLYEMQDIIDHLPKLPEIQQQKLTIPEFDEItVKsTDSAEIKKFIRKVNsEFLGFHCNHKVMDKDCDMVYKNISDIYKSEEFKTYDNFVSLVAcCVWEIRDKDKRGKVWNEQIKPTsSELKKTIDALVVLAGQISMYNAKMNPQCSKCKAAIRKYNYSVKEIERMRNDYADLKKEAEKPAEDKMNML ....................TTuPNSLVMNP.TSMLVEMKsFIPSSYTFETcIQKIKQE.LLpusLDCoAKDEpNEpYLYEMQDlIDHLPKLPEIQQQKLTIPEFDEIEVKsTDSVEIKKFIRKVNYEFLGFHCNHKVMDKDCDMVYKNlSDIYKS.tEFKTYDNFVSLVAcCVWpIRDKDRRGKVWNEQI+PAhFEhK+sIDALVVLAGplS.YNAKMNPQ.C.SKCKAAhRKYNYSVKEIERMRNDYADLKKEAEKPAEsKMDML................. 1 2523 2523 2523 +12785 PF12944 DUF3840 Protein of unknown function (DUF3840) Mistry J, Finn RD jm14 jackhmmer:Q1H8S6 Family This is a family of uncharacterised proteins found in hepatitis A viruses. 25.00 25.00 37.80 37.70 23.40 19.10 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.39 0.72 -4.18 2 2908 2010-04-26 15:16:45 2010-04-26 16:16:45 2 5 19 0 0 947 0 80.40 91 47.07 CHANGED LSFSCYLSVTEQSEFYFPRAPLNSNAMLSTESMMSRIAAGDLESSVDDPRSEED+RFESHIEsRKPYKELRLEVGKQRLKYAQEELSNEVLPPPRKhKGlFSQA ....................FYFPRAPLNSNAMLSTESMMSRIAAGDLESSVDDPRSEEDRRFESHIECRKPYKELRLEVGKQRLKYAQEEL.................... 0 0 0 0 +12786 PF12945 YcgR_2 Flagellar protein YcgR Mistry J, Auchincloss A jm14 jackhmmer:Q9KNC3 Family This domain is found N terminal to Pfam:PF07238. Proteins which contain YcgR domains are known to interact with the flagellar switch-complex proteins FliG and FliM. This interaction results in a reduction of torque generation and induces CCW motor bias [3]. This family contains members not captured by Pfam:PF07317. 21.60 21.60 21.70 22.10 21.40 21.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.85 0.72 -4.10 119 585 2010-04-27 15:34:41 2010-04-27 16:34:41 2 3 451 5 179 481 9 88.40 21 37.67 CHANGED lphGpplp.l....ph.....tss....ttt.....hp.opl..luhpps.thlhlshP.....hhsstthhlh..pGptlpl+hhspss....lhsFpoplhp..hh..ppPh.hl..hlshP ...................phGpclp.l....pl......psss........tpp.......hhhp..oplluh.....cps..thlllshP.....hpsspthhhp..pGhtlplchhssps......lhtFpoplhp..hh..pcPhshlhlphP........... 0 70 123 158 +12787 PF12946 EGF_MSP1_1 MSP1 EGF domain 1 Bateman A agb Jackhmmer:P04933 Domain This EGF-like domain is found at the C-terminus of the malaria parasite MSP1 protein. MSP1 is the merozoite surface protein 1. This domain is part of the C-terminal fragment that is proteolytically processed from the the rest of the protein and is left attached to the surface of the invading parasite. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -8.15 0.72 -4.17 15 1331 2012-10-03 09:47:55 2010-05-02 12:25:03 2 13 39 10 19 696 0 36.90 71 6.05 CHANGED sCls.sssPpNAGCFRassGpEEWRCLLGaKK..-usp..Cl .......pClc..ppsP-NuuCaRaLDtpEEh+..CLLsaKp..EGsK..CV............ 0 8 9 15 +12788 PF12947 EGF_3 EGF domain Bateman A agb Jackhmmer:P04933 Domain This family includes a variety of EGF-like domain homologues. This family includes the C-terminal domain of the malaria parasite MSP1 protein [1]. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.66 0.72 -3.99 174 4024 2012-10-03 09:47:55 2010-05-02 12:53:04 2 685 140 8 1803 4138 79 37.60 36 6.42 CHANGED Ctssss..sCcspApCsNosG.......s.......asCsCp...sGapGDGhs.C .............Ctps.s.s...sCc..s..s.A.pCp.pssu..........p.........................hsC.p.Cp......sGah..G..-.Gh.C........... 0 749 845 1435 +12789 PF12948 MSP7_C MSP7-like protein C-terminal domain Bateman A agb Jackhmmer:Q95VZ1 Domain MSP7 is a protein family the malaria parasite that has been found to be associated with processed fragments from the MSP1 protein in a complex involved in red blood cell invasion. 27.00 27.00 28.70 33.70 21.00 25.80 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.71 0.71 -3.89 21 213 2010-05-02 13:01:12 2010-05-02 14:01:12 2 1 10 0 31 191 0 128.40 38 35.71 CHANGED lKalDpLaD............................-lLss.pspKsplcsspa............+sKYNpF+ccY-...shNppEYcIlK+Llssahppsstss.ttsslh.......clFKKuLpDccapccF+NFlaGlYuFAK++NYLs......................sp+hps.pc.YpplFcNslsL .................lKhlDcla-............................-VLpp..spcsclssspa............psKYs-FKKca-.F.slNppEY-IIKpLIhsFhpcsspspptpsclh.......plFhKsLcDcca+cpFKNalYGlYuaAKp.+sYLp..................................tc+hcs.cc.Y+plh-pshsL.... 0 8 11 27 +12790 PF12949 HeH SAP_2; HeH/LEM domain Mistry J, Sazer S, Wood V jm14 Manual Domain This is a HeH domain. HeH domains form helix-extended loop-helix (HeH) structures.\ This domain is closely related to Pfam:PF03020 and Pfam:PF02037. 28.90 28.90 29.10 28.90 28.60 28.80 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.09 0.72 -7.38 0.72 -4.40 47 275 2012-10-03 03:04:30 2010-05-04 14:05:07 2 13 241 1 131 254 0 34.40 38 7.90 CHANGED ssoLpVscLRplLsp+slpaPusAKKupLlpLhpc .....psLTVscL+slLsppsl.sasusAKKu-LltLhp....... 0 35 72 115 +12791 PF12950 TaqI_C TaqI-like C-terminal specificity domain Bateman A agb Bateman A Domain This domain is found at the C-terminus of the TaqI protein and is involved in DNA-binding and substrate recognition. 27.60 27.60 27.60 27.70 27.50 27.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.61 0.71 -4.35 40 635 2012-10-02 00:09:28 2010-05-04 17:03:46 2 18 482 22 163 675 44 141.90 19 16.42 CHANGED hslLpG.....cs.lp+Yplpas..................spYlsat.cthppsp...................................cchac...........p.KIll..Rplus.pls....usaDpcshhshsshhhlhhpstt..................lshchllulLNSclhp.aaapphh.p...tphh.+lphppLpplPl .................................................................................h..hhcG......p.s..Ip+a.t.h.p.h.s.....................................t.al...h.h.....pthpptp.................................................................................................................tphac.................ptKIlh...tphss...cst.........FshD.s...p.....s...hhh.....p.......ss........sah..lh..tt...............................hs.h.ca.L.lu.lLNSclhp.aah..cph.htth....tssh..h..chptp.lp.plPl...................... 0 70 132 152 +12792 PF12951 Autotrns_rpt Autotransporter-associated beta strand repeat Bateman A agb TIGRfams:TIGR02601 Repeat This model represent a core 32-residue region of a class of bacterial protein repeat found in one to 30 copies per protein. Most proteins with a copy of this repeat have domains associated with membrane autotransporters (Pfam:PF03797). The repeats occur with a periodicity of 60 to 100 residues. A pattern of sequence conservation is that every second residue is well-conserved across most of the domain. These repeats as likely to have a beta-helical structure. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.49 0.72 -4.24 713 7020 2012-10-02 14:50:22 2010-05-12 12:53:43 2 164 932 0 1415 6703 697 31.50 41 9.95 CHANGED uLsKsGsGTLs.L..o...G..sNT..YoGuTsl.suGoLplu ..................LsKsG.s.G.o..Ls.L...o............G....sNo..........Y.......o..G.s...T.....sl.s.u.G.oLtl................ 0 364 771 1068 +12793 PF12952 DUF3841 Domain of unknown function (DUF3841) Bateman A agb Jackhmmer:Q189I4 Domain This presumed domain is around 190 amino acids in length. As yet no function has been given to any member of the family. 27.00 27.00 27.40 27.30 25.70 26.80 hmmbuild --amino -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.31 0.71 -4.81 31 186 2010-05-13 13:05:00 2010-05-13 14:05:00 2 1 139 0 28 161 0 172.10 25 88.45 CHANGED plaTtQscpshcplccsGhhhs+cE...Ylchchtp.u.hahpsYcWhlccupctl.shPpsscaPlWh..............uhpsctsh.p.ssssllLpLclPp-pllhh-hpcWshlLNhtYlspc-pDcttacchl....cphGltpphphh.o...thYPpl+pclpcSWcR.............................lF.........sttshs.ptlpuslWcl++E.Wlh .................hlaThQsppshcplccpGhh.hsppc....alp............p.hhhuY..cWhV+ph.cp.+l...s.....pspaPIWsh...............ssppctsh...p...s.......pchVlLpLclPcchllhoshchWshhhspt.hhs...ppp..-ppthcchh....pp.th............hh.t........hsphh.phpcSac+................hF......................................t.tt.t.....ptlpus.Wp.++E.hl................................................................. 0 11 21 23 +12794 PF12953 DUF3842 Domain of unknown function (DUF3842) Bateman A agb Jackhmmer:Q189S9 Domain This short protein is found mainly in firmicute bacteria. It is functionally uncharacterised. 25.00 25.00 31.40 31.30 24.00 20.90 hmmbuild --amino -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.54 0.71 -3.99 37 142 2010-05-13 13:13:52 2010-05-13 14:13:52 2 1 128 0 53 126 4 128.70 46 92.09 CHANGED +IsVIDGQGGGIGppllcpl.+cphsc..l-IlALGTNuhATusMLKAGAscGATGENAllhsst+..ADlIlGPluIlhAsuhhGElTPtMApAlupStApKlLlPls.+ssh.ll...GspspPLscll-phlp.clp .pIsVIDGQGGGIG+pllcpL.+cphsc.......lcIlALGTNulATusMLKAGAscGAoGENAllhsspc..sDlIlGPluIlhssuhhGElTPtMApAlupSpApKlLlPl.s.+ssh.ll...Gsp.stPLschlcphlp.l.t... 0 36 50 51 +12795 PF12954 DUF3843 Protein of unknown function (DUF3843) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 25.00 25.00 119.70 116.80 18.30 17.40 hmmbuild -o /dev/null HMM SEED 387 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.32 0.70 -5.43 12 103 2010-05-17 00:57:42 2010-05-17 01:57:42 2 2 61 0 10 91 1 236.70 27 97.99 CHANGED h+IYhKsWLplHshspshsTDpaYlshAN+lhslhcpo.Lh.shpt...pKplslhhAhYhEDsIushGhW+tFhptHppLYG+YLPFYshs-sYhsDEINhEDIpFlLWshhp..thht.tctsh.sPh-tslhchAphhYslh-cpaEpAP.s-phsphahs.sphhpt...........................................................................................hss..stpstpchscphcpFhpuopG+.LlYFssYc-LppFhlcsLpWpsccsphLPphppp+pFllaAss.KGlLlutslscYhsDccNPhYsucpAtppuachFs..hhCP.DLL+Yshp+slLPDAQhP......suKcll+cNWDFIARaaLtEY ............p.Whthp.h.t...sD.aahthAsplh.hh............l............thsl.hshahpDhlst.G.Wp.F...h.t.Ysp.LPFY..t....t............Yh.sElN.EDl.FllWth.p................P.s.s..thu..hathhpt.ap.AP.st..t..h...................................................................................................................................................................................................................................t...t.ht.hhthstst.hhahtt.tth..Fhhp..th..t....t..hst....tthhhhss..p.Gh..l...p.s.hhtp..Ns.Y...s.t.Attpuh..hhh....h.s..h...h.tpthls-h.h.........tpthhpp.hcFlsc............................................................. 0 2 8 10 +12796 PF12955 DUF3844 Domain of unknown function (DUF3844) Bateman, Wood V agb Jackhmmer:O74728 Domain This presumed domain is found in fungal species. It contains 8 largely conserved cysteine residues. This domain is found in proteins that are thought to be found in the endoplasmic reticulum. 27.00 27.00 29.80 38.70 25.40 25.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.56 0.72 -3.92 32 106 2010-05-17 16:38:42 2010-05-17 17:38:42 2 1 104 0 83 109 0 102.90 41 23.96 CHANGED CasSpcuCspuTssCSGHGpChcp.tst..............cCauCpCpsoh.pp..t......Koh.pWuGssCpK+DlSs.FaLlshsollllhhlsuuIthLaulGp-cLPuVluA .CasSpcuCppuTssCS.GHGpChpp.tst..............tssCauCpCpsThspptt..t............+sspWuGssCpKcDlSs.FaLlssholsLlhhlshuIshLaulGpEcLPGVluA..... 0 25 47 72 +12797 PF12956 DUF3845 Domain of Unknown Function with PDB structure Ellrott K kellrott JCSG structure PDB:3GF6 Family Member PDB:3GF6 has statistically significant similarity to TNF-like jelly roll fold may indicate an immunomodulatory function[1] or a bioadhesion role[2] 25.00 25.00 479.30 479.10 17.60 15.20 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.56 0.70 -4.97 3 18 2010-05-17 17:26:20 2010-05-17 18:26:20 2 1 18 2 1 17 0 243.60 89 95.08 CHANGED hIMNKIIGhAVLLLCLoGCVRDNDAIYYPVGNVDIERGGPALEVGpculLVARSYNEEDYVLDTLAQYPGDPTLGKLTFMINLKNQSuDQEVA-FNGVGKSKLTMSLGYKDGNYPVESQVPVYTSuDVTASYAIKLRLKGELTLTGDEWMIDYVYAQLAGLFQPYPPASFPEVFMCKGGEQsFuTFDSFRRTWTFDITYDRS-LSFSQLYFNLFVNLAGQKRE-RVRLRIDKESYFEIYKpKEEM .hIMNKIIGLAVLLFCLSGCVRDNDAIYYPVGNVDVERGGPALEAG.KGDLIARSYNTEDYVLDTLAQYPGDPTLGKLTFMIsLKNQpADQEVsGFNGVG+SKLTMSLGYKDGNYPsESQVPVYTSSDVTASYAIKLRLKGELTLTGDEWMIDYlYAQLAGLFQPYPPTSFPEVFMCKGGEQsaATFDSFRRTWTFDITYDRSNLSFSQLYFNLFVNLAGQKRE-RVRLRIDKESYFEIYKEKEEM... 0 1 1 1 +12798 PF12957 DUF3846 Domain of unknown function (DUF3846) Ellrott K, Bateman A kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain is found associated with an Pfam:PF07275 like domain. This suggests that this family may also be involved in evading host restriction. 24.60 24.60 24.80 24.60 24.30 24.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -10.11 0.72 -4.26 53 201 2010-05-17 20:05:53 2010-05-17 21:05:53 2 10 147 0 52 192 71 95.20 25 44.35 CHANGED psLllpPsptsts.....hplsss...........LcslQphV......GG.IEslhh..........sc.s....sslhlN-EG+hpuLP.hNpthst...............tth.hhDhlsGshllsG...sspss.hs...sLssctl ...................................sLhlpPtp.s.h.....hpltss...........LcshQphV......GG.IEs.lhh.................pc.s....ssllsN-EGKhpGLP..lNcthts...................t...hh-hlsGshhlsG.....ppptp.hh..sLs.t................................................ 0 30 42 50 +12799 PF12958 DUF3847 Protein of unknown function (DUF3847) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 40.00 39.00 40.30 40.10 39.70 38.90 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -9.91 0.72 -4.22 40 356 2010-05-17 20:46:12 2010-05-17 21:46:12 2 1 216 0 14 212 18 83.20 36 80.67 CHANGED cp.LEpLppEhE+uEp+Lccsppc.KtLcpQhKpLp...............RKcRTHRLCTRGuhLEShl.csccLTD--lh.LLchIFppp-sp-hL+ ...........................pph-cplcpt-cclKpLpNpp+pLcp....t....-....RKpRs+RLIp+GAlhESlhpE...sp-LTc-EhhpLlctl.t....................... 0 6 13 13 +12800 PF12959 DUF3848 Protein of unknown function (DUF3848) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain frequently seen with DUF3849. 30.00 30.00 30.00 35.10 26.40 29.10 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.39 0.72 -4.05 3 58 2010-05-17 21:43:24 2010-05-17 22:43:24 2 2 36 0 3 54 8 98.70 65 39.48 CHANGED -LNTALYEKMoAEQ-KFRDWLKSQPPEEILNHAYEYTVREDIVMAMEELELTDAQAQALLDSPSPLADVYRYFEKVETDYMDsIRDCIEsRADDVCRAQcE .........-LNTALYEKMAAEQDKa..RDWLKSQPPEEILpHsYEYTlREDIVMAMEEL.E.LTDuQApALL-SPSPLADVYRYFEKL.ETGYMDs.IRDSIEsRADDVCRApEE.............. 0 2 3 3 +12801 PF12960 DUF3849 Protein of unknown function (DUF3849) Ellrott K kellrott JCSG-Joint Centrer for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain frequently seen with DUF3848. 25.00 25.00 26.10 28.60 21.40 20.10 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.64 0.71 -4.21 8 86 2010-05-17 21:44:30 2010-05-17 22:44:30 2 8 49 0 8 81 12 131.00 42 25.96 CHANGED PlYpHsAsYApE+sEL-tYRsSppsNhuCKEAIEsAIp-HYssNRLc.cuAVcpVlEpFGhERshaVLAsTlQpp-aDGRaSpsNK-WA+slshPss.ssh....sctshhhVssspPGLlDLFhcpsR+shtppQp ...............................................................PlY.aSAuYApEcGEl-pYRASppsNlpCKcu...IEtAIppcacs....pLs.csAs+sVlEpaGhERVpaVLANTlQpp.-aDG.RhSpcNK-WAKoIssssspsst....pths.ht.hh.ssspsGllDLFhcptR+phptpp.c......... 0 6 8 8 +12802 PF12961 DUF3850 Domain of Unknown Function with PDB structure (DUF3850) Ellrott K kellrott JCSG structure PDB:3IUW Family The search results from NCBI sequence alignment indicates a conserved domain belonging to ASCH superfamily [1]. Dali searching results show that the protein is a structurally similar to the PUA domain, suggesting it may be involved in RNA recognition. It has been reported that the deletion of PUA genes results in impaired growth (RluD) and competitive disadvantage (TruB) in Escherichia coli. Suggestions have been put forward that, apart from their usual catalytic role, certain PUS enzymes (e.g. TruB) may also act as chaperones for RNA folding. The interface interaction indicates that the biomolecule of protein NP_809782.1 should be a dimer. 30.00 29.00 32.20 37.80 29.20 26.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.37 0.72 -4.01 10 184 2012-10-02 17:37:24 2010-05-18 18:54:59 2 3 161 0 15 153 9 73.00 46 53.09 CHANGED HsLKItPEaFpAVlEGpKsFEIRKNDRNaQVGDlLlLpEassG..pYTG+tspuEITYITD....YuQ.p-sYVVLSh+ ......HcLKIhPcYFc..sVhpGpKpFElRKNDRsapVGDhLhL.......pE..a..p.p.G.............pYT.Gcp.......htscITa.l.T.D........asp...p-.....GYVlLulp...................................... 0 2 8 12 +12803 PF12962 DUF3851 Protein of unknown function (DUF3851) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 23.00 23.00 210.20 210.10 22.40 21.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.62 0.71 -4.13 4 33 2010-05-18 21:24:42 2010-05-18 22:24:42 2 1 27 0 1 22 4 124.70 69 99.32 CHANGED MKspILppcpMMFFDRAL-sQRotLLTsMADAVSECRTAADQAsELNEsGEsGLhRLsEIWsshhstcGhu..hllEGopsplLu-VVAQlYAYLotp.hhDPlGLAlYsELpaMMuSLMLGEWFE MNPNILNpNPLMFFDRAVNAQRSQLLTVMADAVSECRTAADQAAELNETGQlGLLRLAElWSsIRAKEGMG.GLlLEGTEAKILSDVVAQFYAYLSGCMFNDPVGMAIYAELHYMMSSLMLGEWFE 0 0 1 1 +12804 PF12963 DUF3852 Protein of unknown function (DUF3852) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This domain frequently seen with DUF3848. 25.00 25.00 25.70 27.70 23.30 23.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.65 0.72 -4.02 10 46 2010-05-18 21:55:23 2010-05-18 22:55:23 2 1 31 0 11 44 3 106.80 63 97.58 CHANGED KpKKhhhl.hlsVLlLshhFsssAYAuss.GDVAGAIEGTWpsASsQIKTVVNpVVFPAIDLILAVFFFuKLGTAYFDYRKHGQFEWAAPAILFACLVFTLTAPhYIWQILGM ...................KhK+hhhhlsllLlLshhFsssAaAAs.s.GDVAGAIEuTWssASsQIKTVVNpVVFPAIDLILAVFFFuKLGhAYFDYRKHGQFEWuAPAILFACLVFTLTAPhYIWpILGh.. 0 9 11 11 +12805 PF12964 DUF3853 Protein of unknown function (DUF3853) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 22.10 22.10 22.10 22.50 21.90 21.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.19 0.72 -4.18 8 97 2012-10-04 14:01:12 2010-05-18 23:13:38 2 2 62 0 5 95 5 90.60 54 87.14 CHANGED sLpcLLpKPVWpMTGEELlFLh++u....sppEscos.psspss-++aVYGIpGIARLFGCSlPTANRIKKSGKID+AITQIGRKIIVDA-LALELAs+c .....................lppLLtKPlhQMTGEEhlFLtp+u......ppsc..sp.s.tssscs-++YVYGltGIA+LFGCSlPTANR....IKcS.GKID+AITQl.GRKIIVDA-LALELAG+K.............. 0 2 5 5 +12806 PF12965 DUF3854 Domain of unknown function (DUF3854) Ellrott K, Bateman A kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. This domain is likely to be related to the Toprim domain. 21.60 21.60 22.10 21.80 21.20 21.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.67 0.71 -4.50 54 235 2012-10-01 21:47:57 2010-05-18 23:31:30 2 15 148 0 78 233 13 113.00 26 15.36 CHANGED FWpWVhpp.pIPlhITEGuKKAuuLLStGasAIuLPGIhsGYR..+cph....h.t.pL.lPpLthhApsGRclhlsFDp.DoKscThtsVppAlp+hGpLlpptGCpVpl..lpWs..t.tKGVDDhI........sspGtcsa ...............................................................................lhlsEG.hKs...th...h............p.........sh..slul......Gl.sh............t..................................l...hs.Lt...h.s.......s..+plhlsFDt..D...h.pp....ppVppAl.hchu....phL.pp.t.G.h..p..Vhl...........hs..Ws......s.sKGlDDhl........hst.....h................................. 1 16 59 75 +12807 PF12966 AtpR N-ATPase, AtpR subunit Dibrova DV, Galperin MY, Mulkidjanian AY, Finn RD rdf [1] Family Membrane protein with three predicted transmembrane segments, two of which contain conserved Arg residues. AtpR genes are found in the N-ATPase (archaeal-type F1-Fo-ATPase) operons and are predicted to interact with the conserved Glu/Asp residues in the c subunits, regulating the assembly and/or function of the membrane-embedded ring of 'c' (proteolipid) subunits (PFAM:PF00137). 26.00 26.00 26.00 26.00 25.70 25.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.04 0.72 -3.84 38 110 2012-10-01 19:47:47 2010-05-19 14:12:51 2 1 103 0 51 134 7 85.70 30 84.35 CHANGED hshhhulhsGhlLGhhaFuuLWhTl+phhsuppPshhhhhShLhRhulslushhh...l.....sssthttLLsshhGFlluRhlhlp......h...hpsp........t ...h..hhluhhhGhshGshaFsuLhhssR..........hhl..sup...sshhlhho.lhRlulslushhll.....upu..shttLlush.hGFhsARhlsl+hh...st............ 0 16 35 44 +12808 PF12967 DUF3855 Domain of Unknown Function with PDB structure (DUF3855) Ellrott K kellrott JCSG structure PDB:1O22 Family Family based on orphan protein (TM0875) from Thermotoga maritima that has been structurally determined as PDB:1022. The TM0875 gene of Thermotoga maritima encodes a hypothetical protein NP_228683 [1] of unknown function. Analysis of TM0875 genomic context reveals the presence of MMT1 (a predicted Co/Zn/Cd cation transporter) and an inactive homolog of metal-dependent proteases. 1O22 shows weak structural similarity with the phosphoribosylformylglycinamidine synthase 1t4a (Dali Z-scr=4.6), the yggU protein (PDB structure:1n91; with DALI Z-scr=3), and with the thioesterase superfamily member (PDB structure 2cy9 - found using FATCAT), even though they have very low sequence identity. 27.00 27.00 307.40 307.20 25.10 23.00 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.04 0.71 -4.68 2 5 2010-05-19 16:59:37 2010-05-19 17:59:37 2 1 5 1 1 5 0 157.80 81 100.00 CHANGED M+LMDSLEIhYh+KsK-htsLE+Kh+EIhpETGloL-sVNSE.hGRIFL+IsVLE-tEplPSFhlKALhPcpsAscLPLG-WssLp.VFVEEhsYL-sYs.MKIhS-tNhYTlYVPaSuVKpKNRsElVt-FMKYFFEoKGWsPGpYpF.VQElDslh MRLMDILEILYYKKGKEFGILEKKMKEIFNETGVSLEPVNSELIGRIFLKISVLEEGEEVPSFAIKALTPKENAVDLPLGDWTDLKNVFVEEIDYLDSYGsMKILSEKNWYKIYVPYSSVKKKNRNELVEEFMKYFFESKGWNPGEYTFSVQEIDNLF 0 1 1 1 +12809 PF12968 DUF3856 Domain of Unknown Function (DUF3856) Ellrott K kellrott JCSG structure PDB:2HR2 Domain TPR-like protein. The 2hr2 structure belongs to the SCOP all alpha class, TPR-like superfamily, CT2138-like family. A DALI search gives hits with the putative peptidyl-prolyl isomerase 2fbn (Z=16), the SGTA protein (Z=16), the PLCR protein 2qfc (Z=16), a putative FK506-binding protein (PDB:1qz2-A; DALI Z-score 15.3; RMSD 2.9; 16% sequence identity within 132 superimposed residues), and with the tetratricopeptide repeats of the protein phosphatase 5 (PDB:2bug; DALI Z-score 15.1; RMSD 2.5; 19% sequence identity within 117 superimposed residues). 27.00 27.00 27.00 28.70 26.90 26.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.75 0.71 -4.37 10 11 2012-10-11 20:01:03 2010-05-20 00:48:56 2 1 11 6 10 16 0 144.00 56 82.41 CHANGED KPL+EVAtAYhALS-AE+pLp-Gta-EAAtSsR+AM-hSRTIPsEEAFDHsGFDAhCHAuLSuAhutLGRY-EuLpSA-cAL+YFNRRGELpQDEGKLWIAAVFSRAlAL-ulGRp-EAlptFRhAGEMIAERKGEhsGKEpLh ....................KPL+EVAtAYhALS-Ap+pLpsGta-EAAtssR+AM-hSRTlPsEEAFDHsGFDAhCHAuLusAhutLGca-EuLpSA-cAL+YFNRRGELpQDEGKLWIuAVaSRAlAL-ulGRt-EAlttF+hAsEMIsERKGEhsGKEph.h.... 0 1 1 7 +12810 PF12969 DUF3857 Domain of Unknown Function with PDB structure (DUF3857) Ellrott K kellrott JCSG structure PDB:3KD4 Family This family is based on the first domain of the PDB structure PDB:3KD4(residues 1-228). It is structurally similar to domains in other hydrolases, eg. M1 family aminopeptidase (3ebi, Z=10, rmsd 3.6A for 152 CA, seq id 12%), despite lack of any significant sequence similarity. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.74 0.71 -4.68 94 529 2010-05-20 21:30:11 2010-05-20 22:30:11 2 34 361 2 180 535 31 170.10 15 23.93 CHANGED plp.ssGshph.thphshplhsppGlcph.uphpl..sa...ssphpp..lplhpuplhpssGp..h.clsss..t.hhphpt.tshtt.hasstpphh.lshsslpsGs.hlchpaph...............psps..shhsstasshhthpht............PstchphplphPsshslphpth....ts.stsp.pppsst.pha...paphcpl.shh..Es ...........................................................................................tst.s..hhphtlplhspsulcph..uphpl..........a........stphpp..........lplhpsps.hp...s..sGp...h..p..plsss...p..htshpt.t.st..stshasst.....ct....hs.l....shPslcsGs...h..lchp.aph...............ptpp...shh.sha..h.t.h.h..h...hp..t............PhtptphplphPtp..hslphphh.......s.st.p...p......tsst..phh..phphpph........t............................................................. 0 81 124 159 +12811 PF12970 DUF3858 Domain of Unknown Function with PDB structure (DUF3858) Ellrott K kellrott JCSG structure PDB:3KD4 Family This family is based on the third domain of the PDB structure 3KD4(residues 410-525). It is structurally similar to part of neuropilin-2 (Z=4.6, rmsd 3.6A for 83 CA, 7% seq id). This domain and the second domain appears to be part of peptide-n-glycanase (1x3w, 2g9f). 27.00 27.00 27.60 32.50 26.80 26.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.32 0.71 -10.45 0.71 -4.31 6 27 2010-05-20 21:56:00 2010-05-20 22:56:00 2 3 27 2 6 31 0 115.80 41 20.73 CHANGED YthhsLPpA+sGhAuhs..hshlNocRssNLLLPtLsDEsYTYhVcssssMcssTssppKcIsNsVGplslTV+ppucchcVsRoL+LpKQhITPAEYssa+pLMoEWtDssspoLLF ...............YhlhsLPstcsGlus.t..hsthNScRosNLLLPuLs-EsaTYsVssspGMc.sTsshcKcIsNslGplslolpspusphcVhRoLcLpKQhITPu..-YssaRpLhsEWtsscspoLla.............. 0 3 5 6 +12812 PF12971 NAGLU_N Alpha-N-acetylglucosaminidase (NAGLU) N-terminal domain Moxon SJ, Bateman A agb Pfam-B_6295 (release 7.7) Domain Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate [1]. Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations [2]. The structure shows that the enzyme is composed of three domains. This N-terminal domain has an alpha-beta fold [3]. 25.00 25.00 27.20 25.40 23.40 23.70 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.93 0.72 -4.31 33 349 2010-05-23 13:20:51 2010-05-23 14:20:51 2 28 248 5 145 351 5 87.10 28 10.19 CHANGED ptsuspsllpRllstcupp.Fphclhss.....stDtFpl..ss.......ssuc...lhlpGsssl...ulusGLpaYLKahC......tscloW..Gsp..lcL.....PpsLPhl ............................ssspsllpRllstpspp...Fphp....lhps.t.........spDhFpl.ss..............pss+...lhlpGsssl...ulAsG........LpaYLK.as......tsploW....s.usp..lpl.......PtsLP................ 0 53 90 122 +12813 PF12972 NAGLU_C Alpha-N-acetylglucosaminidase (NAGLU) C-terminal domain Moxon SJ, Bateman A agb Pfam-B_6295 (release 7.7) Domain Alpha-N-acetylglucosaminidase, a lysosomal enzyme required for the stepwise degradation of heparan sulfate [1]. Mutations on the alpha-N-acetylglucosaminidase (NAGLU) gene can lead to Mucopolysaccharidosis type IIIB (MPS IIIB; or Sanfilippo syndrome type B) characterised by neurological dysfunction but relatively mild somatic manifestations [2]. The structure shows that the enzyme is composed of three domains. This C-terminal domain has an all alpha helical fold [3]. 25.00 25.00 27.20 26.60 21.70 20.90 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.04 0.70 -5.01 34 392 2010-05-23 13:23:40 2010-05-23 14:23:40 2 27 266 5 168 383 11 256.40 28 31.60 CHANGED pWlppYucpRY.....Gt.pstplppAWphLhpolYssst.ssttps....pslhstRPsls.........................................spsphhYsspclhpAhclLlpsus....phpssssacaDLlDlsRQsLu.ptupphhhphhpAYppcDhtphpttuschl.pLlpshDcLLuocspFhLGpWlps.A+uhu...........ssstE..................+chYEaNARs.lThW.....uspGslh.DYAs+pWuGLlpsaYh.RWphahstlppslpts..............pshsttpa..thh.phEhtW.sp....sschass.....pstG-slphupp...lhp+h .....................pWlppYsppRY......Gs...ts....tphtpAWplLhpolYssst.ttttps.......cslhstRPs.hs............................................tpsp.h.h.Ysssc.lhcAhclh.lpsss....phpss...ssacaDLlDlsRQsLu.phspthhpphh...puappp...............c......h..p.............t.h..ptt..upphl.cLl.shDplLuopppFhLGpWlpp.ARshu............s.stp-.........................cc..hYEhNARs.lThW........usp..G...s.lp..DYAs......+pWuGLl.......p.....cYYh.RW.phahstltpslpt.s..........................t.hsttp.a...thh.th.EptW..sp....sp.phass......psts.sslphupp.lh..................................... 0 58 102 140 +12814 PF12973 Cupin_7 ChrR Cupin-like domain Bateman A agb JCSG target 403193 Domain Members of this family are part of the cupin superfamily. This family includes the transcriptional activator ChrR. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.93 0.72 -4.14 88 1076 2012-10-10 13:59:34 2010-05-23 15:59:43 2 8 611 11 370 1097 835 89.10 27 55.05 CHANGED sWhsSPhsGVpRt.Lc.Rhu.s.E.u.+sTolVRasPGopFssHsHsGGEEIhVL-GsFsDEp.GcY..PuGoYlRNPsGSpHsPa.S.cpG..CsIhVKLt ................................................hsGlthh.Lp....t.s..t...p.......tssLl+hs.s.G...s...p...h..s...p...H...p...H..tG..sEp.h.h.V.L..cG....s....F.....p....D.c.....p......G.....c....a.........ssG..salh.ps.s.s.s...p.H.s.P.h...s..p..p..G...slhh............................................ 0 92 205 288 +12815 PF12974 Phosphonate-bd ABC transporter, phosphonate, periplasmic substrate-binding protein Bateman A agb JCSG target 416811 Family This is a family of periplasmic proteins which are part of the transport system for alkylphosphonate uptake. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.33 0.70 -4.99 178 3450 2012-10-03 15:33:52 2010-05-23 16:08:34 2 64 2087 10 931 7437 1283 236.80 20 68.71 CHANGED hullPt.p.sssphhpp.....apslhshL...scpl......G.h...p...lchhssssasshhpsht.sGplDluahs.sh..shlh..h..p..pp.s.s..h.pslsph............ht......pG...p...s.........s...................apul..llspp.-.u..s..l...............................p.......sl............................p..DL.......cGcpluhssssSsoGhlhPthhL..............pp...t.G.l...........s.....p...........p.......h........h.p................h.h..a...s....u..sH-ssh.h.uVh.pGpsDuus.................sts............t............sh..pphh...................pps.....h...............ppl+l...........l...........h...........p.o.s...h.Pst.......s.lss.p.ss....ls....sp....h....ppplppsllshs..p.s................................sps.pphlpsh....s...h.....s...uFhsss.spsY ............................................................................................................................................................................hhs.p..s.tp.h.tp.....hp.s.hhph..l....p.cpl...........G..h......p....lp.h..h.h....s..s.s..a....s...s.h......h...p.u..ht...ts.p....l.D....l..u......h..h...s......s...h....s.h.s..........s....t......pp...s...s.......s...ps.l...spt............................ht........ts.....p.....s.............s..................................................a..p...S...h......l..l...sp.p....-....S....s...l......................................p............sl............................p...DL.........+G.+...p...h..u..h...s.s...s.s.Ssu...G..a..lh.Pt.h.h..L.........................................t.c...t..G...l........................s....sp.................................................p.........................h..p........................................................t.s...h..........s....u...u.....H......-.......s....s.l..h.s.lh.s.Gp........s........D.......s..us...........................................ss.s...........................................s....s.h..pp.ht...........................................................pp.t....h...................pcl.+.l...........l................................................h.................................p..o.....s.......h.....h.....P.s.........................s...h.s.s...p...ts.......l..s.............tp........h........tp..plpp..sh.hshst..s.......................................pt.tth.h.p.h.....t...h......t...th...p.......................................................................................................................................................................................................... 0 290 614 788 +12816 PF12975 DUF3859 Domain of unknown function (DUF3859) Bateman A agb JCSG target 416836 Domain This short domain is functionally uncharacterised. 21.00 21.00 21.00 21.80 20.90 20.10 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.61 0.71 -4.46 33 110 2010-05-23 15:32:20 2010-05-23 16:32:20 2 7 98 2 33 109 13 130.10 29 51.56 CHANGED hsthpshlshhpaGIasphsscspphsp..................hpptTphVPAclslpFGhhh.slpcucspp...............lphplhHPtI...hsspGpshpsasuplaspss-hshahh-ph..acsls......GpWph.plthcscllA-KoFpV ............................h.p.hlshhpaGlasphspc.p.h.p..................hhp.TspVPAclshcFGhhh..slp+s.cGpp..................lphhhhHPsl.....spcGphhpsapsplhssssDhthYhhsp...a-sls........GcWRh.tlhhs.scllA-KoFpV... 0 5 14 24 +12817 PF12976 DUF3860 Domain of Unknown Function with PDB structure (DUF3860) Ellrott K kellrott JCSG structure PDB:2OD5 Family A protein family created to cover PDB:2OD5. 2OD5 is a hypothetical protein (JCVI_PEP_1096688149193) from an environmental metagenome (unidentified marine microbe). 27.00 27.00 32.50 138.50 21.90 19.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.98 0.72 -4.19 2 2 2010-05-25 22:37:37 2010-05-25 23:37:37 2 1 1 0 0 5 106 88.00 26 100.00 CHANGED McoppLRphI+paLsER..tNThEI.talsppMc.sosPpplsNlLptDcsIl+luTs++uGhh.sch.Is.Ws....lR....sta.-tcp. McoppLRphI+paLsER..tNThEI.talsppMc.sosPpplsNlLptDcsIl+luTs++uGhh.sch.Is.Ws....lR....sta.-tcp. 0 0 0 0 +12818 PF12977 DUF3861 Domain of Unknown Function with PDB structure (DUF3861) Ellrott K kellrott JCSG structure PDB:3CJL Family The 3cjl structure is likely a representative of a new fold with some resemblance to 3-helical bundle folds such as the serum albumin-like fold of SCOP. No significant hits reported by a Dali search. This protein is the first structural representative of a small (about 60 proteins) family of proteins that are found among proteo- and enterobacteria (REF http://www.topsan.org/Proteins/JCSG/3CJL). 27.00 27.00 30.10 30.40 24.00 20.30 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.99 0.72 -3.88 13 132 2010-05-27 00:12:12 2010-05-27 01:12:12 2 1 130 2 37 96 3 95.10 40 92.80 CHANGED pppYRITlEpLsstpspstc...sLpFEhpsHDDlFsIVE+lKp+psh-..sppuspFuVGLKLhGEVMhpsRKHPLFt-FtPpFtsFMpsLKsts........p ............a+YRITlp.LpctcGcshsps.slpFEspNHDDIhpIlE+Lcs+csls..p-posuFuVGLKLFoElMhcsRc..HP..LFpshtstFppFMppLKpts.sp...... 0 6 20 28 +12819 PF12978 DUF3862 Domain of Unknown Function with PDB structure (DUF3862) Ellrott K kellrott JCSG structure PDB:3D4E Family PDB:3D4E shared structural similarity to beta-lactamase inhibitory proteins (BLIP) which already include 1XXM, 1S0W, 1JTG, 2G2U, 2G2W, 2B5R, and 3due. All of structures are involved in beta-lactamase inhibitor complex. (REF http://www.topsan.org/Proteins/JCSG/3d4e) 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.76 0.71 -4.59 9 255 2012-10-01 23:09:26 2010-05-27 22:46:43 2 2 218 1 31 175 0 130.40 26 74.29 CHANGED ppp-lRtpF-KIKlushpspFcGGoSl--LcplFG.cPspppppsAGsVpLcsYTWshDsVolslp.LhpNSolsKoIoNFpFs.RD.ploLK-YsslpcGhoYc-VschLGEPDshSpAsSS-cpplQAlWlSGlKuc.spuusloLsFENstLosKoQssL .....................................................t........................p..usp.tplhthhG.p.tpp..........t..........W............s.t........s.....h.h..h.pst..s.....h.pt..s..h...t......+ptp..lshp.passl.........ppGh.oY..ccVpchlG.-P.-..s.h..spssh...s..s.ppphph.lahpshpus....sushsloF.pssplpsKsQ.sL............................. 0 10 20 27 +12820 PF12979 DUF3863 Domain of Unknown Function with PDB structure (DUF3863) Ellrott K kellrott JCSG structure PDB:3LM3 Domain Domain based on 1-364 domain of PDB:3LM3 which is encoded by the BDI_3119 gene from Parabacteroides distasonis atcc 8503. 27.00 27.00 27.80 198.70 26.60 25.10 hmmbuild -o /dev/null HMM SEED 352 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.12 0.70 -5.85 3 9 2010-05-27 23:42:17 2010-05-28 00:42:17 2 2 9 1 4 10 0 350.70 77 74.19 CHANGED oslTL-GNRFVTLCIMIRTTPWEVSRDVKLHPRDEsSWHTLEGVRALREAFAKNNP-GRLTWGFTLNALEDpR-NY+QIR-YVVECQKKYGDEVSYFPGYFPAMYLPRERVNREMSEAIQLISKMVGNGYRPQSIMGGFLSADNLRYLAEKENIHVAHAVIWSQHNIDGGGADGSPSYPFYPSTEHFCKPAQGKSDFIDCVNLDGWTMDFICARRSGAMGHGIEGYNSRRGVGPIETYTGWGLDLGpREVMHTQAIHFDKGlELNGFGWVTNIWEAQMVHEFGQEFICKAMETWVTETKERWPDT+FVTFGEFGMLWRcpHKoN-DWNYRFVERGSGLGDSYNNLEIKWFMN ..EsLTl-GNRFVTLCIMIRTTPWEVSRDVKLHPRDEssWHTLEGVRALREAFAoNNPNGRLTWGFThNALEDGRcNYR-IRDYVVECQKKYGDEVTYFPG.YFPAMYLPRERVNREMSEAI-IISKMVGNGYRP.QSIMGGFLSADNLRYLAEKENIHVAHAVIWSQHNIDGGGADGSPSYPFYPSTEHFCKPAQGKSDFIDCVNLDGWTMDFICARRSGtoGHGI-GYNSRRGVGPIETY+GWGLDLGHREVMHTpAIHF.DKGlELNGFGWVsNIWEAQMV.HEFGKD.LICDAMchWVTGTKERWPDTHFVTFGEFG-LWRcQaKoN-DWNYRFVERGSGLGDSYNNLEIKWFMN. 0 1 4 4 +12821 PF12980 DUF3864 Domain of Unknown Function with PDB structure (DUF3864) Ellrott K kellrott JCSG structure PDB:3LM3 Domain Domain based on 366-449 domain of PDB:3LM3 which is encoded by the BDI_3119 gene from Parabacteroides distasonis atcc 8503. 27.00 27.00 106.30 105.40 25.20 24.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.62 0.72 -3.78 3 8 2010-05-27 23:42:58 2010-05-28 00:42:58 2 1 8 1 3 9 0 81.60 80 17.10 CHANGED RLALLRDWHpKNAPAYVIDFTRYDLKA+EPADPSPcKPAKDWSLINVINQKGLRPQDKPVLLscL-sEcQ-LIRKYYPELFK RLALLRDWHTKNSPAYVIDFTRYDL.AHEPADPSPpKPAKDWSLINKINQKGLRPQDKPVLIDKLEKEDQDLIRKYYPELFK. 0 1 3 3 +12822 PF12981 DUF3865 Domain of Unknown Function with PDB structure (DUF3865) Ellrott K kellrott JCSG structure PDB:3B5P Family Family based of PDB:3B5P encoded by ZP_00108531 from nitrogen-fixing cyanobacterium Nostoc punctiforme pcc 73102 is a CADD-like protein of unknown function. Superposition between protein structures encoded by CT610 from Chlamydia trachomatis (PDB code 1rwc), pyrroloquinolinquinone synthase C (PqqC, PDB code 1otv) and ZP_00108531 revealed that putative active sites in CT610 and ZP_00108531 are identical. ( REF: http://www.topsan.org/Proteins/JCSG/3B5P). 31.60 30.90 31.60 30.90 28.60 27.20 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.70 0.70 -4.57 3 20 2012-10-02 21:56:19 2010-05-28 01:00:09 2 2 12 2 4 21 2 140.20 31 89.32 CHANGED hK+lScpLsphLspDaluFSlNNNPll..SpISstSFuQhh.VhpQYSlFPKplluhh.hAthph.htsWsGVuEELLpNlNEEMGushGtIo.....HYTlLR+uLc-ulGlsVsNshPSVATppFlcoVcuLl-+ps.-hVLGusYAlEooAIPELhLl+ELV...tEuApcKcLsappohl.pFF-hHLD-lElEH+DcLtshluuYIpsEE.......QatEFt-GFpAsIDsM-sWWo-LspEth ......................................................................................................................................s.lstEl.tNh....EE.s.....................Hh.hhtpuh.p..h...t....hs...lpshh..P...uTpthh.pl.tlh.hps..phshGshYAhEstul.Eh.lhhcls...tchs..ct..ht........F.aphHl.DthE.tHpstL......tp.hsthlt.tp.............hhtGhhshlshhpsaWptL.....s... 0 2 2 4 +12823 PF12982 DUF3866 Protein of unknown function (DUF3866) Bateman A agb Jackhmmer:Q18B83 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 352 and 374 amino acids in length. 25.00 25.00 112.90 28.00 22.90 22.40 hmmbuild --amino -o /dev/null HMM SEED 320 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.92 0.70 -5.46 30 119 2010-05-28 08:24:37 2010-05-28 09:24:37 2 3 117 0 62 121 2 317.00 43 87.68 CHANGED tRAlsYspLsGpscsGDcVlLNsTAlthsLGTGGYcaVlus.sphs.s........ssGHIMKhRYTPhQhpVLusEEpcSPaHchhpssc.oLcGhPVlVu-LHShLsPlssul..+pts.....Ps...........hRluYlMTDGGALPltaSpsVtpL+cpGhlsuTlTsGpAFGGDhEAlNlaouLlAA+pllcADlllVu.GPGllGTGT+aGFSGlptGchlsAlssLGG+PlulsRlShADsRtRHpGlSHHolTslsclsLsssplslPhh.tt-.............ttshlpcQlpsh...sp+Hp....ll..hl.sssthtpsLpphslploTMGRuhp-DssaFlAsuAAuh .......................+AlsYsplsGpspsGDcVlLNsTAlthsLGTGGYshVl..us.sths....-ss...................ssGH.lhKhRYTPhQhtVLus-EppSPaHshhpp.sc...sLcGhPVlVu-LHShLssls.uul.+........p...t.s.....Ps...........h+lsYlMTDGGALPlhaS+sVttL+cpGhlsu.TlTsGpAFGGDhEuVslaouLluA+cVlcADlslVu.GPG.lGTGT+aGFSGlptuchlsAlstLGG+Pluh.RlShuDtRpRHpGlSHHolTshsclshsssslsl.Ph.h.ss-...................htthlppQlpt.......tp+....H.p..ll..hl...ss....sth....tp...h...L....c....p....h....s.lploTMGRuh-pDssaFlAAuAAu.h.................. 0 36 54 59 +12824 PF12983 DUF3867 Protein of unknown function (DUF3867) Bateman A agb Jackhmmer:Q18BR7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 190 amino acids in length. 27.00 27.00 131.90 131.60 22.60 22.60 hmmbuild --amino -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.29 0.71 -4.31 10 47 2010-05-28 12:03:52 2010-05-28 13:03:52 2 1 46 0 9 31 0 187.50 55 98.74 CHANGED D+IIDFNELKNKVKDKDlDKFEsYIYSLYYclApGKLoMuDFS+cIhcYMEENNISQEKFhNIQKKhhER....YGhDsu-IEcQhKshGlDssshu...psssY...........................Esl+KohuFQEKYKs+IpsKosopYaIKN-KNDlclllEpEcVlLpSsKKIDLsDsELNEFLCSYKKllcDKKLcIslCEslKpY-Y .D+IIDFNELKNKA+DKDlDKFEpYlYsLYaSV.spGphoMu-FS+cIhcYMc-NNISQEKFlNIQKchhER....YGhD.....hp-lEcQhKshG..lDhsslG...psssY...........................Esl+Ks..lSFpEKYpuKlpsKslopYaIKN-KNDlclll-sEclhLpSsKKIDL.pDoELNEFLCSYKKhhpsKpLpIslCEslspYsY..... 0 5 9 9 +12825 PF12984 DUF3868 Domain of unknown function, B. Theta Gene description (DUF3868) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_1065 Family Based on Bacteroides thetaiotaomicron gene BT_1065, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.40 27.30 26.60 26.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.45 0.71 -4.57 11 235 2010-06-17 22:02:38 2010-06-17 23:02:38 2 7 74 0 30 212 2 109.50 21 24.44 CHANGED +phhhlhhhLhlhsh...sh.AQst...apGtlslssspLcQcG-olhlshslslpslplcSppulslsPhLhS....uspphpLPplllpG+pchKsYcRplAltstpccspath......llht ........................................h.....hhh.h.lhhhsh.......sstAQph.....tstlplssh.p.l.pp..pGc.p..lhlshslsl.s.s.l.plsoscslhLoPlLts.........uscpht..LPslhlsG+p.+.phh..p..Rphuh..ptp................th.h...................... 0 7 25 30 +12826 PF12985 DUF3869 Domain of unknown function (DUF3869) Ellrott K, Bakolitsa C kellrott JCSG structure PDB:3KOG Family A family based on the N-terminal domain of 3KOG, which shows weak but consistent remote homology with adhesive families such as immunoglobulins and cadherins, suggesting it might form an attachment module. 20.40 20.40 22.30 28.10 20.20 18.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.42 0.72 -3.60 10 29 2010-06-17 22:50:21 2010-06-17 23:50:21 2 2 22 2 4 27 2 110.80 32 32.71 CHANGED shFTSC.EKEEFNVs.lcsssApATIusTVhDhsoG..sslT............oussTluuuu.....ssplAupotshsA..ostsYho.uospVplPALscGQaAslsVoIhLQchtuAscsss ..shFTSC.EKEE....h..sl.....s..spsssAphhIssoVhDssTG..pslo............oApsTlusuuh........hssplA.tpu.h...slss...ssssYhs.sstsVplstlpsGQhushsVslhLpp.tsst..s.......... 0 0 3 4 +12827 PF12986 DUF3870 Domain of unknown function (DUF3870) Ellrott K, Bakolitsa C kellrott JCSG structure PDB:3KOG Family A family based on the C-terminal domain of 3KOG which shows structural similarity to pore-forming proteins [1][2], suggesting it may have a lytic function. 22.00 22.00 23.00 28.10 21.10 17.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.17 0.72 -3.74 19 200 2010-06-17 22:53:45 2010-06-17 23:53:45 2 2 158 1 42 130 0 94.70 40 81.03 CHANGED lYIlG-A+os.sNsITcpYphFFIuFllcccsscIl..Dh-souTlpLTpsFl+plFlG+shhc.p-tlhp.ElcpRYaGSSQKAllVAY+cAhpKYpp ................YlsGcAKsPpsNsITchacoaaluhlls+cTGcIl..DA-CosoltLTppFV+pLFls+slpD..stLlt.ElcsRYFGSSQKALlsAlKDAp++Yp.... 1 18 33 35 +12828 PF12987 DUF3871 Domain of unknown function, B. Theta Gene description (DUF3871) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2984 Family Based on Bacteroides thetaiotaomicron gene BT_2984, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 34.00 33.90 20.90 19.80 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.83 0.70 -5.73 13 110 2010-06-17 23:00:13 2010-06-18 00:00:13 2 1 53 0 17 109 3 294.40 52 93.83 CHANGED opNhG.EaAEEAsII..psspspcpspFIEANTpElTLpHLKNDCIlPVFAKDNElTISHpsFIEsVa-AAsoFFuGEpIspP-IRVSHlIKGRIPEAIpKsAspLLEuDKThYYERhAFsI-IPTIYETVsGN+LsLoIGGVRAYNchNLY.SKKusE+FKlFIGF+spVCoNhCloTDGYpsslEVoNopELYpulLELFpsYNPAKH..lHLMQoLusThloE+QFsQlLGRMRLYQsLPpuhQKplP+hLlTDoQINsVAKAYlsD-NFGuh..Gs-lSMWchYNLLTGANKSSYIDSFLDRulNATElusGIssAL.+G.D-+..YpWFl ...........................p.hE-AsIl..ps.t...ppps.FIEANTpElTlpaLcp-CIlPVFuKDNEhTISH.sFI-sV.-AApsaFsGEplppP-IRVSHlIKGRlPpAlpK.sspLLEsDKTIYYERhAFshcIPTIhEslpGN+LsLoIGGVRAYNc.NLY.SKKus.E+FKlFIGFpspVCsNhCl.oTDGapsplcVhssp-LYpusL-LFppYNsAKc..l+LMpsLuso.hoEpQFsQllGRhRLYQsLP.u.pKp..lP+hLlTDoQINsVA+uYhsDcNFush..ssslSMWchYNLLTGANKS.SYIDoFL-RulNATElusGIspAL.pG.Ds.c..YpWFl....... 0 4 16 17 +12829 PF12988 DUF3872 Domain of unknown function, B. Theta Gene description (DUF3872) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2593 Family Based on Bacteroides thetaiotaomicron gene BT_2593, a conserved protein found in a conjugate transposon. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 29.40 32.30 26.30 23.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.61 0.71 -4.63 21 300 2010-06-17 23:03:44 2010-06-18 00:03:44 2 1 109 2 24 255 3 133.40 51 87.93 CHANGED hlsplhshss.lshuhlsloAC..sc-LDlQQsYPFoVETMPVsKcIspGETsEIRCpLpR-GcFssTpYTIRYFQPDGcGpL+h-cGTVhhPNDRYPLscEpFRLYYTStS.s-pQoIDlYlEDsa..GphpQloFsFNNcs ........l...phhsh.sh.lshsshsLsuC..-c-.LDlQQuYPFoVEoMPV.cclspGpTsEIRCpLKRpGcFssTtYTIRYFQ.DGcGpL......+h.DsGhsFhPN...........DRY.Lp...........c...........-.pFRLYYTStS.s-pQslcVaVEDNF..GphhpLsFsFNNc....... 0 7 20 24 +12830 PF12989 DUF3873 Domain of unknown function, B. Theta Gene description (DUF3873) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2286 Family Based on Bacteroides thetaiotaomicron gene BT_2286, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.60 27.30 26.50 26.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.39 0.72 -3.85 9 114 2010-06-17 23:08:19 2010-06-18 00:08:19 2 2 63 0 6 83 0 67.20 61 90.38 CHANGED tsphshNGVSlCpssGcEKYpKaphslt.t+tpphYQYDYRcs.ssELFSCVusTL-ECRcpRDcWLppKp .....ppMTlNGVSTCppuGpEKYE+FQ.GlG.RR+RThlQYDYRHs.DGELFSCVKPTLDECRptRDcWLstKp......... 0 3 5 6 +12831 PF12990 DUF3874 Domain of unknonw function from B. Theta Gene description (DUF3874) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4228 Family Based on Bacteroides thetaiotaomicron gene BT_4228, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.00 27.20 25.90 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.35 0.72 -3.86 12 222 2010-06-17 23:12:49 2010-06-18 00:12:49 2 4 61 0 19 204 0 72.40 34 14.28 CHANGED Y+hss.EEVhtsCFR..hstt.sEcsh.LSAusIFphLpctpPAAMRGssshphuplLhuhGlcRpHTcaGNVYpV ........hsshEplFhphFR..sApp...tE-uchLoss-IhphLp+pssh.s.hps.s.plspFGRlLpph.Glpp+HTppGslYpV.......... 0 9 18 19 +12832 PF12991 DUF3875 Domain of unknown function, B. Theta Gene description (DUF3875) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4769 Family Based on Bacteroides thetaiotaomicron gene BT_4769, a conserved protein found in a conjugate transposon. As seem in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231). It appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 27.50 27.20 25.40 26.50 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.69 0.72 -4.28 17 294 2010-06-17 23:17:18 2010-06-18 00:17:18 2 4 123 0 37 265 8 53.20 67 7.11 CHANGED RNlhKssTLESKFPLLAVEpsCIlSKDADlTVuFcV-LPELFTVTuuEYEAhHS ...........RNh.KhoTLEsKFPLLuV..E.p..GCIlSKDADITVAFcVELPELFTVTuuEYEAIHu... 0 13 32 37 +12833 PF12992 DUF3876 Domain of unknown function, B. Theta Gene description (DUF3876) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0092 Family Based on Bacteroides thetaiotaomicron gene BT_0092, a conserved protein found in a conjugate transposon. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or other bacterial species vs when in culture [1,2]. 27.00 27.00 28.00 28.00 25.40 24.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.09 0.72 -4.05 19 215 2010-06-17 23:20:33 2010-06-18 00:20:33 2 1 93 0 21 156 1 93.10 42 80.70 CHANGED hpsspp..shchcplsGsWcSlst+PsVhla+-ucpYplolhthsthstphpPpTY.l.p.ccsGshFIsTGa.Rl.lsYcptpDlLohSstG...-YlRs ...........h..oCp..sspch-tlsGsWcSVp.G.+.PsV.hIY+-....Gcs....Y+VTlhp+SshpRph+PcTY.l.p..E-sGsLFhsTGa.RlsluYscusDlLohSPsG...DYlR..................... 0 6 14 19 +12834 PF12993 DUF3877 Domain of unknown function, E. rectale Gene description (DUF3877) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_0237 Family Based on Eubacterium rectale gene EUBREC_0237. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 129.70 129.40 26.10 25.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.23 0.71 -4.71 14 33 2010-06-18 21:49:19 2010-06-18 22:49:19 2 2 33 0 4 31 2 175.20 45 90.95 CHANGED acRLE+sLIDlIKEEQAKLGYRKEpIRLYYPLSSLNHFFpscsss-cMpctLpp............Fschhcp+LGclpVopKs-RFCFpIPEpGu-YVHEphptNEFI+-LIELlu+HGCTh--IhsLF+paS-plhhEcMsNGEFDhhIpFpp-s-DsYYYCFKDEGCHIIYHRFLPEDYsDFGF ..ap+LE+slIDlIKEEQhKLGYc+EsIRLYYPloSLN+Fhps..-s..s.tccMpptLpp............FscphpppLG-lclo.pKs-RFChpIPtcGupYVHEpsscsEFIK-Llplluc.HGCThE-lhclFc....pYS-.plhh....EchpsG.EFDhllpFp-ss.DsYYYCh+DEGCHlIYHRFLPEDYtDFsF... 0 3 4 4 +12835 PF12994 DUF3878 Domain of unknown function, E. rectale Gene description (DUF3878) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_0973 Family Based on Eubacterium rectale gene EUBREC_0973. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737). it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 63.70 63.50 20.50 19.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.05 0.70 -5.24 3 29 2010-06-18 22:00:08 2010-06-18 23:00:08 2 1 28 0 2 27 1 283.00 34 86.14 CHANGED pISRAFphLAElFEYQVFELCVuE-....pYlIPYMMNDAVECYLol+GsploGcYp+D-ElEsuupLttcEcRYGLIVHQGEENVFTLWFDELcEHpsCF+YHEIGHFWV-GQEQWRQLVYMIGTItEKYpYLGEEYCNEsEptIMSLIEFAPFRhWoPVsEDLEEa.YPAThEGIDCMEcLARcAuD+DYLKWIc+YR+aPo++hE+LLuR+LtDPKRQcLYEhICccVpsAS-sYPpRNYGEcINpRIQ+KRcpl-KKLLEcGFTGpYPpYpKKphoVsVTEEHPFTluhLEaEDFKFKlQ ................................sh.hLtplh-.p.FE.lh...h.s-p................phhls..YhMNDAVEsaLsFc..su+hoGpYpp..-h........E...sthtApl...ptp......-s..tYsLlV+Q......t-.sVh.T...laFcclp.chphYpYtcIGHFWVcGhE.hRpL.YhluhlpDKacYLGcchCs-pEtcLhpLh-FsPhp..h..YssVsEp.........c-.....asso..t..EulshMpclAtcssDcshl+hLchYc+pPs+hhp+hluthLpcs++tplhchlhcclppAopsYspRsaG...pp...hc...p+hp...c...h...tcphcp...cLhc...pGhpsh.................................................................... 0 1 2 2 +12836 PF12995 DUF3879 Domain of unknown function, E. rectale Gene description (DUF3879) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_1343 Family Based on Eubacterium rectale gene EUBREC_1343. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 27.90 32.60 25.00 26.70 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.20 0.71 -4.70 3 33 2010-06-18 22:08:48 2010-06-18 23:08:48 2 1 13 0 7 33 6 165.70 48 87.36 CHANGED plNSSSVQcQLKAAGIDTNSKQYKAAlSEMM+sGNGAMYTNIQAIKNLMS+YDKDGDWINPsTGLAGLLVTDENcNS+KRIISIPESS+EEMFELTKKEFLpENGVpNGDTTKRo-VYNNLYRKM-KcDRLAAGYTLEcYERQYRQAFsDAAKsADPsWEAGKPIhAGALDcITRESAEoG.........RKSs- ................................sGIsTNSKpYKA.........s.........l..pp...........MMpstpthhapsh...tu.....IKNLMpQaDpsGDhlss.sGlsGhsVTscs.sSap+IlSlsEs.+pcMF-.sK+EFlpENGh.NGDTTKRpslapsh.hphsK-cRLuAGWTLEQYEtQYRpAhhtAsKsAsPsW+sGpshssuhLDslTRESsEus....................... 0 4 7 7 +12837 PF12996 DUF3880 DUF based on E. rectale Gene description (DUF3880) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_3218 Family Based on Eubacterium rectale gene EUBREC_3218. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), It appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 21.70 21.70 21.70 21.80 21.20 21.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.75 0.72 -4.07 48 247 2010-06-18 22:21:15 2010-06-18 23:21:15 2 10 156 0 76 245 24 76.70 27 17.81 CHANGED WhsDsPhhtlas.h.sl.hsshN.hIFhFDpsphppa+shGhpplaaLPLAssssRhs.hhtp..................t.ppapsDlSFVGs..hYpp ...............................WhsDsPhah..hs...h.....pl..hs..hs.hlFThDt.s.slpha+.s.h.GhppVa.aLP.LAsssph.apPhhtp.....................ppaps-lsFlGssa.................... 1 35 64 67 +12838 PF12997 DUF3881 Domain of unknown function, E. rectale Gene description (DUF3881) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: EUBREC_3695 Family Based on Eubacterium rectale gene EUBREC_3695. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE14737), it appears to be upregulated in the presence of Bacteroides thetaiotaomicron vs when isolated in culture [1]. 27.00 27.00 106.10 105.20 21.80 20.00 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.72 0.70 -5.40 28 81 2010-06-18 22:31:02 2010-06-18 23:31:02 2 2 75 0 8 77 13 258.60 41 95.97 CHANGED LRAIGFSslpp+c-lcclLcplhcphsppphl..p.pcspcasEhp+-ausshGIsVpG-hDc.....sspFch-YYaPYapGssloopt-lslE++s-+EuYsGlCDDh+lGloLIFYLQNsh-Yhcp+....................thsphshphpulsLSGLuspGpILLPlpKscpppcp....pcppscs.RppLlsAA+sGDpsAIEoLTl-DIDhYStlSRRlhsEDlaSIVDThFMPaGlECDpYSllGEIl-lcpppNphTsEplY.hcL-CN-lpFclCINcpDLlGEPtVGRRFKGpIWLQG ............L+ulGFuplpp+p-lcclLcpl..c.p.hsp.pphl..p.pc....tpphsEhpp-h........usshGIslhG..ch-c.....pspFph-YYaPYhpusslooht-sslE++h-+EsYsGls-Dh+lGloLIFaLpNshEYhppc.....................htt..hpspulsLoGLuhpGpILLPlpKsppphcp....p.pptscsRppLlpAA+sGDpsAhEoLTl-DhDhYSplS+Rlh..pEDlaSIV-ohFMPhGlECDpYSllGEIhplcpppNthTtEplY.hplcsN-l.FcVsIscpDLhGEPtlGRRFKGplWhQG................ 0 6 8 8 +12839 PF12998 ING Inhibitor of growth proteins N-terminal histone-binding Wood V, Coggill P pcc Pfam-B_205 (release 24.0) Domain Histones undergo numerous post-translational modifications, including acetylation and methylation, at residues which are then probable docking sites for various chromatin remodelling complexes. Inhibitor of growth proteins (INGs) specifically bind to residues that have been thus modified. INGs carry a well-characterised C-terminal PHD-type zinc-finger domain, binding with lysine 4-tri-methylated histone H3 (H3K4me3), as well as this N-terminal domain that binds unmodified H3 tails. Although these two regions can bind histones independently, together they increase the apparent association of the ING for the H3 tail. 23.70 23.70 23.80 23.70 23.60 23.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.25 0.72 -3.76 79 827 2010-06-28 12:29:44 2010-06-28 13:29:44 2 12 279 6 538 851 2 102.20 26 28.97 CHANGED shL-cal-sl-sLPtElp+hhs.l+-lDtp...hppthppl-pphccalppstt..............................ht.p....pcpphhppIpcphpcsppht-EKlplusphh-ll-++h++LDtchcph .......................................................hl-cal-s.....l-sLPh-lp+phs.h+-lDtp..........hpshhp.p.l...-phhpca..hppspp...................................................................................t.p...pcpphhppIppshtcstchuDEKlp.lupphh-l.l-++l++LDtclth................................................. 0 154 245 401 +12840 PF12999 PRKCSH-like Glucosidase II beta subunit-like Coggill P pcc Wood V Family The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing [1]. The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum [1]. 26.30 26.30 28.10 27.00 26.20 26.20 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.49 0.71 -4.69 5 445 2010-06-28 12:42:30 2010-06-28 13:42:30 2 22 268 0 317 453 13 145.60 32 32.92 CHANGED lhlPlLuluLhlusA.lscLRGVuPDcLcLYpPD.ENGN..WKCLNcscIhLSFDQVNDDYCDCPDGSDEPGTuAC.pNG..KFYCsNcGaIPuYIPSF+VDDGVCDYclCCDGSDE..plG+CPN+CsElAcpacchpsE+Nspl+sGLKIKccllltup+KsDElpp+hcELccoLhAcppct ........................................hhhh............................hGl..t..thY.p........t.......a.pC..l.ss..s....th......lshsp..l..ND-aCDC.s.D.GSD.EPGTu.A...C....s...........ps....................tFaCp.....N...t.G..a..hs...h.....h.....lspsp..VNDGlCD...CCDGSDEh.............st....s...t..C......spCtpht.pt..pt..p..t.....p.hp...tu....ht.h.+t..hh....t.....t.h....t....t..............tp.tt.t...............tht........................................................................ 0 105 178 269 +12841 PF13000 Acatn Acetyl-coenzyme A transporter 1 Coggill P pcc Wood V Family The mouse Acatn is a 61 kDa hydrophobic protein with six to 10 transmembrane domains. It appears to promote 9-O-acetylation in gangliosides. 102.60 99.30 109.50 100.10 102.50 99.20 hmmbuild -o /dev/null HMM SEED 544 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.79 0.70 -5.99 2 464 2012-10-03 03:33:39 2010-06-28 15:33:01 2 7 252 0 348 478 3 242.70 26 72.83 CHANGED phYhLIhLYLhQGlPhGLshGolPahLKs...psSaosLuhaShAsYPYSLKllWSPIVDohYs+phGRR+oWllPs.hl.u.sLhhhuasl..........Dsahs+GsuhlpSho....TW.F.LLVFlCATQDIAVDGWuLshLs.EpLSYASTAQTlGLNhGaFhSFTIhLshsSs-FANpahRsIP.scGhIoLuGYhKF.uhhhhlholhlhF..............................aD-us...pQphusIp.ha+shhtsLpLKshRpLhhlHhluKhuF.sNEshT.LKhhE.GhppE.LulhlLIshPhtlhhGhYss+hSs.+s............................Ls.WLhGahGRlsuhlLsohlV.......+pFP........hFh.lhhpahLsu.hsTlQFlulusFHo+luDPhlGGTYMTlLNTLSNhGGoWPphlhhpMhshhTV.pC.ThP...+lhsotsuphptC.phLsGTshhhRDGYYlTsllsIhlul.lhhuhlp.hlh+LpphPISSW+h........T ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p............................................................... 0 113 181 282 +12842 PF13001 Ecm29 Proteasome stabiliser Wood V, Coggill P pcc Pfam-B_682 (release 24.0) Family The proteasome consists of two subunits, and the capacity of the proteasome to degrade protein depends crucially on the interaction between these two subunits. This interaction is affected by a wide range of factors including metabolites, such as ATP, and proteasome-associated proteins such as Ecm29. Ecm29 stabilises the interaction between the two subunits. 20.50 20.50 21.00 21.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 501 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.69 0.70 -6.01 45 308 2010-06-28 15:32:17 2010-06-28 16:32:17 2 15 231 0 217 302 1 426.60 25 27.72 CHANGED ls+V-LRl.ALA-sDccLcphLspaLsPlLLKLuSspsuVRpKVlcllpH.lNpRlp.u.tsIpLPVssLLcQacpss.....ssshV+pFsllYlppGl-RL.sssc+hpllPpllpGI...........S......ptshsptuhhFtllh.+lL..h+h..P.cssp-ppphcsp..htl.t-...p.sthLh.hhsph.hh.......t..................................ss.sssulssppsshhoh.t.....stsssh.s..pLpcsKhslLcFLsuuhh...............................s-tphhlshLlASuD............ss.ppluchu-phLK+hpss........hEsspllppLapLah...........................s.t.tptt.PspssLph+ILshLsKShtAssphspshplhptuLhos.h..ss+lcshshphh.................................spshpphssphlptlpshlhspshshh.......spsp.t......tpshplRshsYcslGhLsp+tsphh.c-......hsllpaLFcuL..cspss-ltsuIpcALuulhsuhsp..p.............................t.thphhh.lh.ttpp...........p.ttssRasAl+aANpsaPasDssuRalsllu ......................lp+l.hRl.uhA-sD.ppLpphlppaLsPll....L....KLuS.s.pt.uVRp..............K.........V...............h...pllt...H.lspRlp.s.splpLPltsLl.Qappss................sshl.ppFsl.lal..phuhsRl.sspcphplhPtllpsh............................p......p.t.hhphhh.h..hhh...+h............p....s.p..p.t....pp.pph.....hl.s....h.phhLh.hh.hh...............................................................tsth......t...shhoh............st..ssh.h....p..pL.pphK....lsllpFl.tut.h...................................................s-.phhh.hlhAu.uD............sp.pp...l.ushu-..L..K+hts.....................hp..s.sllppLaplah..............................................s...t.th.Pspstlph+llshL.+.Sh.Asp....th..st.hpll.t..slh...us.........pt.+hcthshphh................................................h...ths..hlp..ltshlhsthh.hh......................tps.phhshuYpslGhL.upphsp.h...h.ppc..............hsllt.LFpuL.........ppp.tsphthuI.ppALsthhsshtt..t........................................t..hhh.h..lh.t.............................................................................p..htsR.sul+auspshs.pch.uRal.llu.............................................................................................................................................................................................................. 0 63 112 179 +12843 PF13002 LDB19 Arrestin_N terminal like Coggill P pcc /wood V Family This is a family of proteins related to the Arrestin_N terminal family. 21.60 21.60 21.90 21.60 21.30 21.30 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.25 0.71 -4.80 5 121 2012-10-02 22:29:00 2010-06-28 16:40:07 2 3 103 0 96 133 0 181.80 37 30.53 CHANGED VpLsLlQKVHFHKPFhsshpuIQTCpNCpoKTTcl+SWDIQsNTs-LsVGoasaPFSaLIPGSlPAoooLGusuETplKYELpAsVsYhD............PctthSs.uKcplLpLsMPIsVTRSlhRGPDKNSLRVFPPTELTAsAVLPNVVYPKSoFPLEMKl-GlSpGD....RRWRMRKLoWRIEEpTRI+uHAC-pHKH-L ...................................hphhtplphpKP..............s..........hpsCtsCpsphs-LppWph....L.spst..s.LppG.p.HsaPFSaLlPGpLPuSss.......s.s.l..sp..lpYcLpAp.uph.....................ss.up...t..lphphPlslpR..ol.h.t.G.s.D..+..sSlRl....FP..P.T.slsAsssLPsVlaPtu.o.FPlph+LcGl..s....s..ts......pRWRlRKlsWRlEEpt+lhu.ACs+Httc........................................................................... 0 18 48 84 +12844 PF13003 MRL1 Ribosomal protein L1 Coggill P pcc manual Domain This family includes putative ribosomal L1 and L10 proteins and fragments. 21.90 21.90 23.10 22.40 21.40 20.80 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.91 0.71 -4.34 7 64 2012-10-01 21:21:48 2010-06-29 16:43:58 2 1 40 0 28 58 2 142.90 51 46.98 CHANGED M..................................................hs..pshc.st.htscppthsplph..............................................................slYh..............................................................................sslhhsp.hhsphppssVs ...................................hhtt.......................h...hs.RpaAAA.....pKs..sKKsKKss.KcKss-EKp.D-lEKhKsYsaMEuEPEDDVYLKRLYPRpIYEVEKAlcLLKKFQhLDFTsPKQ..............sVYLDLTL.DMsLt....K.K...KKVEPFs............................................................SslpLPYPFsSElNKVsVF. 0 2 5 11 +12845 PF13004 BACON Bacteroidetes-Associated Carbohydrate-binding Often N-terminal Coggill P pcc Rigden D Domain The BACON (Bacteroidetes-Associated Carbohydrate-binding Often N-terminal) domain is an all-beta domain found in diverse architectures, principally in combination with carbohydrate-active enzymes and proteases. These architectures suggest a carbohydrate-binding function which is also supported by the nature of BACON's few conserved amino-acids. The phyletic distribution of BACON and other data tentatively suggest that it may frequently function to bind mucin. 20.00 13.30 20.00 13.30 19.90 13.20 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -10.03 0.72 -3.75 432 1404 2010-07-09 08:33:15 2010-07-09 09:33:15 2 119 153 0 197 1347 35 73.70 18 20.89 CHANGED sls..............ls...............................................ssss...W...l..........sls.tst.....................tts.lsls....lstN...ss.tp...t....Rp.upl...pl.....................t........tp........ts.......l.sl....p..Qtu .......................................................................hp...ss..................................spssss.....W........l...........sls.sst...........................tpsplsls..ss.t.N....ss.sp...t..Rs..upl...pl.............pss....shs........tp......l..sl..p..Qt....................................... 0 119 177 197 +12846 PF13005 zf-IS66 HTH_Tnp_IS66; zinc-finger binding domain of transposase IS66 Coggill P pcc IS-finder Domain This is a zinc-finger region of the N-terminus of the insertion element IS66 transposase. 20.00 20.00 20.00 20.00 19.90 19.80 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.54 0.72 -3.99 190 3219 2011-09-20 07:06:31 2010-07-12 16:03:04 2 18 1017 0 565 2843 332 45.40 39 10.71 CHANGED psptCss..CGs.plp..plGc-.l..sEpL-hlP.uphc.VhcahR.+auCpp..C.c ..................ppsCPs....CGG.pLp...hlG--.s...uEpL-lls..ush+.VI.cphR.KhACsp..C-............ 0 111 286 407 +12847 PF13006 Nterm_IS4 Insertion element 4 transposase N-terminal Coggill P pcc IS_finder Family This family represents the N-terminal region of proteins carrying the transposase enzyme, DDE_Tnp_1 (that was Transposase_11), Pfam:PF01609, at the C-terminus. The full-length members are Insertion Element 4, IS4. Within the collection of E.coli strains, ECOR, the number of IS4 elements varies from zero to 14, with an average of 5 copies/strain [1]. 21.80 21.80 22.00 22.10 21.20 21.10 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.07 0.72 -3.97 36 1119 2010-07-12 15:15:56 2010-07-12 16:15:56 2 6 317 0 125 971 61 88.10 61 26.57 CHANGED sshsslusLschlPh-hl-pslptTGpushR+.RcLPuchsVahVluhuLapcps..hp-VhppLshsLss................hphsusSAlspARpRLGscPlctLFcp .............SL..RNPLTSLGDYLsPELISRCLAESGTVTLRK.RRLPLEMMVWCIVGMALERKEP..LHQIVNRLDIMLPG...............sRPFVAPSAVIQARQRLGuEAVRcVFs.c..................................... 0 19 55 80 +12848 PF13007 LZ_Tnp_IS66 Transposase C of IS166 homeodomain Coggill P pcc IS_finder Family This is a leucine-zipper-like or homeodomain-like region of transposase TnpC of insertion element IS66. 22.90 22.90 22.90 22.90 22.80 22.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.91 0.72 -3.32 254 3061 2010-07-12 15:45:03 2010-07-12 16:45:03 2 20 942 0 489 2621 269 73.50 31 17.94 CHANGED ppLcpplthhcRphFGp+SE+h..s.......tQhpL...h...p..-h-sststht.........sphcptsttsss.............ttt+pc..ss.RpsL.P.scLP.Rtch .................cLptpltpLpRhhFGppSEKl....sc...............tQhch.........h..p....clptptsphp.....................tp.tc..ts......................t..+pp...ppR+PL.P.tpLP.R-p.p..................................... 0 97 233 341 +12849 PF13008 zf-Paramyx-P Zinc-binding domain of Paramyxoviridae V protein Karlin D, Coggill P pcc manual Domain The Paramyxoviridae, which include such respiroviruses as para-influenzae and measles, produce phosphoproteins - protein P - that are integral to the polymerase transcription-replication complex. Protein P consists of two functionally distinct moieties, an N-terminal PNT, and a C-terminal PCT [1]. The P gene region transcribes proteins from all three ORFs, and the V protein consists of the PNT moiety and a more C-terminal 2-zinc-binding domain. This conserved region consists of the two-zinc-binding section sandwiched between beta sheets 6 and 7 of the overall V protein. It is the binding of this core domain of V protein with the DDB1 protein (part of the ubiquitin-ligase complex) of eukaryotes which represents the key element of the virus-host protein interaction [3]. In the Henipavirus family which includes Nipah and Hendra viruses, the V protein is able to block IFN (interferon) signalling by preventing IFN-induced STAT phosphorylation and nuclear translocation [2]. The P gene of morbillivirus is co-transcriptionally edited leading to a V protein being produced. 25.00 25.00 33.20 47.40 21.50 16.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.16 0.72 -4.77 26 152 2010-07-13 14:16:56 2010-07-13 15:16:56 2 5 76 3 0 147 0 46.90 50 16.38 CHANGED SlsWs..supsplpcWCNPtCuPlsstspptpCsCGpCPphCspCtpDs ......SlsWs..ssclhlpcWCNPhCS.lTsssp+tpCpCGpCPphCcpCcpD... 0 0 0 0 +12850 PF13009 Phage_Integr_2 Putative phage integrase Coggill P pcc Pfam-B_5288 (release 24.0) Family This family is found in association with IS elements. 22.80 22.80 76.30 41.10 22.70 22.20 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.14 0.70 -5.49 22 56 2012-10-02 14:09:14 2010-07-21 11:59:58 1 14 53 0 20 60 8 318.40 30 34.49 CHANGED PphtpWtphupcalptpsp..uhsp+hpuLspFltpYlh..shPh...sPtsahp........sppph.shlpth...........ssspspphhshlppFlcalLpphho.pD...Dp.Gp.lh.stapNPlsph..shpsh.tt...sETs+ssLPhpaIpph+phLsss..........................papDatWspp..............ssWh.Vs.p..lscsDscClaRshth.p................htplWSPVpshsLashLpLPlRohQlRhLDSGEuDshhY....ppt.spWlhN.ssphA........ppshpcGhh.++hp-st................t.sGLalsTNKTup.............sp.ppGYsIPWt.....pp-lhaWLh+LRpWQpKY.NPIscsTsWp- .........................................................................................................................phttWtphht-alpsppt..shsp+hpuLstFhppYlhtpshsh...sPttahp............hstpph.phlcpt...........sssptpphhshlpsFlcallpchhoppD...-s.Gphlh.sta+NPls+h.....shps..p.....sEos+ssLPYpYIpchRphLsst..........................pFpDapaApp............ssDWhtVs.p..IDcs..........DPDC..VaRtpps.t................hhplWSPV+hhslashLpLPLRshQlRhLDSGEADshha........pt..spWhhN.psthA........t..ppshppGlh++hpcpt...................sGlalsTNKT...............st.tpGYhIPW........sp-lhYWL.KLRsWQpKY.NPIspPTsWs.............. 0 5 9 14 +12851 PF13010 pRN1_helical Primase helical domain Bateman A agb Jackhmmer:Q54324.1 Domain This alpha helical domain is found in a set of bacterial plasmid replication proteins [1]. The domain is found to the C-terminus of the primase/polymerase domain. Mutants of this domain are defective in template binding, dinucleotide formation and conformation change prior to DNA extension [1]. 27.00 27.00 142.00 142.00 23.10 20.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.86 0.71 -4.54 9 9 2010-07-21 11:16:13 2010-07-21 12:16:13 1 3 5 4 2 12 0 132.40 48 16.02 CHANGED tG+cEcp-...tc-hEKL+cEhsKYs+a+GKTlEAIRpElCpclK+pl..........ppKhpthhphshpVlC-uKoYu-lGIDRSRGDW+VlphLhoHGVTDlDhlhQLLPpDSKVas.PKWD..KYFlHTLtKAWphVK.aLch ..t.scpEcp-..s-c-hEKL+cEhsKYs+a+GKTlEAIRpElCpclK+pl..........ppKhpthhphshpVlC-uKoYu-lGIDRSRGDW+VlphLhoHGVTDlDhlhQLLPpDSKVas.PKWD..KYFlHTLtKAWphVK.aLch. 0 1 1 2 +12852 PF13011 LZ_Tnp_IS481 leucine-zipper of insertion element IS481 Coggill P pcc IS_finder Family This is the upstream region of the conjoined ORF AB of insertion element 481. The significance of IS481 in the detection of Bordetella pertussis is discussed in [1]. The B portion of the ORF AB carries the transposase activity in family rve, PFAM:PF00665. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.80 0.72 -3.64 2 445 2012-10-04 14:01:12 2010-07-21 13:08:03 1 5 189 0 209 1294 169 81.90 41 27.81 CHANGED M.sHtpARLTlhGRsLLVpRVhppphshtpsupthGVShpsua+WlsRFRuEGLcGLhDRSSRP+psP+tsuPEp.cthhptRtQ ........................................M.sHpNAh.LT....htRh...chs..pp..ll..p.p.th.s...lscAAc...taGVS.htT.sp+WhsRaRspGt.s.Gh..s...D..R..S...S........R...P..p.p..o.P.p.p..h.s.s.th.tp.tIlplR..h................................................... 0 121 173 198 +12853 PF13012 MitMem_reg Maintenance of mitochondrial structure and function Wood V, Coggill P pcc [1] Family This is C-terminal to the Mov24 region of the yeast proteasomal subunit Rpn11 and seems likely to regulate the mitochondrial fission and tubulation processes, ie the outer mitochondrial membrane proteins. This function appears to be unrelated to the proteasome activity of the N-terminal region [1]. 22.10 22.10 22.20 22.70 21.80 21.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.52 0.71 -4.00 148 1224 2010-07-21 12:18:05 2010-07-21 13:18:05 1 19 336 4 844 1138 8 118.00 23 35.91 CHANGED pEsEc...........l......ulctLl+............slpcpt........ho.sl....s.p+hptptpuLtsLpp+lhphstYLpsl......ttphshspplhpplpslhsLlP..sh.......................spph...........pcphphpssD..plhlhYluplspsph...slpsllssp ..................................................................Escp....h..........ulptLh+...................slpcpt.........hu..sl....s.p+lptptpuLtsLpp+ltplttYlpcl.................ttph..s.h....s....ppl...hpplpsl..hsL.hP..sh................................spph....................pc.thphpssD.....pLhlhYluplspsph.......slpphlpp........................................................... 0 315 475 695 +12854 PF13013 F-box-like_2 F-box-like domain Wood V, Coggill P pcc Wood V Family The F-box domain has a role in mediating protein-protein interactions in a variety of contexts, such as polyubiquitination, transcription elongation, centromere binding and translational repression. 21.60 21.60 21.80 21.70 21.50 21.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.53 0.72 -4.34 7 39 2012-10-02 00:56:31 2010-07-21 15:07:26 1 2 22 0 34 54 0 97.80 19 19.17 CHANGED sostos....huosps..tp.l..tlhDLP-ElLphIhphChctphh..hshs.shRpppch.......................................................hl..spsCptl+pl.............p.hpshhsh.s.hhhpshph...stp ...............s..............p..phtsl..plhD.LP.-LLphlhcpCspsphhtl.ppsp.shh..shtph.....................................................................hl....spppphhpt..............................................hh.......................................................................................... 0 24 26 31 +12855 PF13014 KH_3 KH domain Coggill P pcc Wood V Domain KH motifs bind RNA in vitro [1]. This RNA-binding domain is required for the efficient anchoring of ASH1-mRNA to the distal tip of the daughter cell. ASH1 is a specific repressor of transcription that localizes asymmetrically to the daughter cell nucleus. RNA localisation is a widespread mechanism for achieving localised protein synthesis [2]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.68 0.72 -7.98 0.72 -4.24 992 970 2012-10-02 00:34:43 2010-07-21 15:11:14 1 161 261 0 679 13179 2478 42.40 26 10.20 CHANGED hhutlI..G+s.Gp.s...Ipplp.ppo.u.sp.Iplsp..............sss.spchlplpG ...........hGtlI...G+s..Gp.s.......Icplp..ppo.G.sp.Iplsp.............................sss.spchlplpG.................................... 0 314 459 633 +12856 PF13015 PRKCSH_1 Glucosidase II beta subunit-like protein Coggill P pcc Wood V Family The sequences found in this family are similar to a region found in the beta-subunit of glucosidase II (Swiss:P14314), which is also known as protein kinase C substrate 80K-H (PRKCSH). The enzyme catalyses the sequential removal of two alpha-1,3-linked glucose residues in the second step of N-linked oligosaccharide processing [1]. The beta subunit is required for the solubility and stability of the heterodimeric enzyme, and is involved in retaining the enzyme within the endoplasmic reticulum [1]. The beta-subunit confers substrate specificity for di- and monoglucosylated glycans on the glucose-trimming activity of the alpha-subunit [2]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.89 0.71 -4.75 5 406 2012-10-02 14:19:22 2010-07-21 15:42:45 1 19 259 0 274 570 7 146.00 26 30.43 CHANGED Lp+clDEhcc-IcsI-p-lotlhEsLNpcaGhDDIaRAl-GppsscKlGGYsY+lsFhuSlaQ.....-DIpIGsFcctE...........Gs+LhY-cGuKCWNGP+RSAIVcVECGcsN-LlSVuEPEKCEYplpV+SPAuC...sss.hhKSLscE..EphAsF+I........s-hDEL ...................................................................................................................................tp..................t.......................t.......th...G....p.t.....at.....Lts..p...Ch.p..h.ptsp.Y.hYc..hC.........a...pp...spQp....................sttps.......p.........lGp...a...p....p..a.ph.................................................tph..h..s...h..h..ap..s..Gs.pC......W....N.....G.P........s.R.o.sp.......Vp.LtC....G...p.....p....s....c.....l....h..p.......V...s......E..Ps+..CpYthphpTPtsC...p................................................................................ 0 110 156 224 +12857 PF13016 Gliadin Cys-rich Gliadin N-terminal Coggill P pcc manual Family This is a cysteine-rich N-terminal region of gliadin and avenin plant proteins. The exact function is not known. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.13 0.72 -3.78 28 2134 2012-10-01 19:46:35 2010-07-21 17:26:44 1 3 145 0 43 2573 0 79.20 44 32.15 CHANGED QQQ..........hp..l.ppQLsPC+sFLhQQCsP........hhhsahpSphhQpSsCQVhQQQCCQQLtQIPcQsRCpAI+slVpAIIh ....................................................................................................................QQQ............hlQspl..pQQLsPC+sa.LhQ.Q.Css.......sh..sh..s...p.Sp.hhtpS.uC.p.lhQQQCCQQLsQIPpQsRCpAI+ullauII.......... 1 0 3 21 +12858 PF13017 Maelstrom piRNA pathway germ-plasm component Zhang D, Coggill P pcc Zhang D Family Maelstrom is a germ-plasm component protein, that is shown to be functionally involved in the piRNA pathway. It is conserved throughout Eukaryota, though it appears to have been lost from all examined teleost fish species. The domain architecture shows that it is coupled with several DNA- and RNA- related domains such as HMG box, SR-25-like and HDAC_interact domains. Sequence analysis and fold recognition have found a distant similarity between Maelstrom domain and the DnaQ 3'-5' exonuclease family with the RNase H fold (Exonuc_X-T, Pfam:PF00929); notably, that the Maelstrom domains from basal eukaryotes contain the conserved 3'-5' exonuclease active site residues (Asp-Glu-Asp-His-Asp, DEDHD). However, the animal and some amoeba maelstrom contain another set of conserved residues (Glu-His-His-Cys-His-Cys, EHHCHC). This evolutionary link together with structural examinations leads to the hypothesis that Maelstrom domains may have a potential nuclease-transposase activity or RNA-binding ability that may be implicated in piRNA biogenesis. A protein function evolution mode, namely "active site switch", has been proposed [1], in which the amoeba Maelstrom domains are the possible evolutionary intermediates due to their harbouring of the specific characteristics of both 3'-5' exonuclease and Maelstrom domains. 18.90 18.90 18.90 21.20 18.70 18.80 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.26 0.70 -4.85 27 133 2012-10-03 01:22:09 2010-07-21 17:42:00 1 4 76 0 89 141 6 189.70 29 46.12 CHANGED +......sschalPAEluhscaSLcp.G.lhs.apshIsPGpl.hG.hthcsppHop.sTHp.lPlsssshGcpshsplhpplhpalptp.........p........c.hs......................laopscplshVpsChcaLt............spsp...t..............lpVhslp.Lhhsl+ppshpht...p.h.shpls..sshhppsha-apsshuCpaHEc...spspaCshSh.VpRauahhschh...CtDluIphhss+H..........lP.ptcs .................................................p...spptalPsEluhscaSLpp.G..IhspaHphIs.......P.G.plshG.hthcspttop.soHp.lP.lss........t.huc.shsplhpplhp...alpss.................t.........phs.......................laspscphshVp.Clcahs............ptsthppp..................lplhslppLhhtlhpp....th.........pp.stphs...pshhshsha-asssh..pCcaHEc...Dh.haCuhuh.sp+hu..aslss.h...sp.hulplp..tH...lP.p..s..................................... 2 26 30 64 +12859 PF13018 ESPR Extended Signal Peptide of Type V secretion system Coggill P, Desvaux M pcc Desvaux M Domain This conserved domain is called ESPR for Extended Signal Peptide Region. It is present at the N-terminus of the signal peptides of proteins belonging to the Type V secretion systems, including the autotransporters (T5aSS), TpsA exoproteins of the two-partner system (T5bSS) and trimeric autotransporters (TAAs). So far, the ESPR is present only in Gram-negative bacterial proteins originating from the classes Beta- and Gamma-proteobacteria. ESPR severely impairs inner membrane translocation, suggesting that it adopts a particular conformation or it interacts with a cytoplasmic or inner membrane co-factor, prior to exportation. Deletion of ESPR causes mis-folding of the TAAs passenger domain in the periplasm , substantially impairing its translocation across the outer membrane [3]. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.46 0.72 -6.70 0.72 -4.30 290 2495 2010-07-21 16:49:15 2010-07-21 17:49:15 1 530 854 0 281 2182 23 24.40 44 1.62 CHANGED MN.+.lY+llWNcspss......alsVSE....l..u+ .....MN.K.lY+llWNcspus......alVsSE....l...A+........ 0 52 130 207 +12860 PF13019 Telomere_Sde2 Ubiquitin-like; Telomere stability and silencing Wood V, Coggill P pcc Pfam-B_2457 (release 24.0) Domain Sde2 has been identified in fission yeast as an important factor in telomere formation and maintenance. This is a more N-terminal domain on these nuclear proteins, and is essential for telomeric silencing and genomic stability. 26.40 16.00 26.60 16.00 26.30 15.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.01 0.71 -4.55 20 270 2012-10-03 10:59:06 2010-07-21 17:50:03 1 9 236 0 202 1794 27 145.00 31 39.30 CHANGED lNVLlooFsGhsLPsTLulsLPsooolo-Lhcclss+lPtohsp.....hhLTTsSN+tl.sssssslSsLlussss............shLsLRLss.LpGGKGGFGSQLR.AAGGRMSS++KpN..t-sNuSsRNLDGRRLRTVsEAKALAEYLAlKPEM-..........+K-KEtRR..................cRWcslVctsE+ .........................................................................................................................................................................hhshpLthR...LpGG..KG...GFGShLR.AhGuph...cKosN.......p-uC...RDL.s.GRRLRsVNctKtltE.altppsEc-..............+c...p+.ct....c.+..................pphcth.t................................................................................. 0 73 113 166 +12861 PF13020 DUF3883 Domain of unknown function (DUF3883) Mistry J jm14 Jackhmmer:O25123 Family This is a domain is uncharacterised. It is found on restriction endonucleases. 22.10 22.10 22.20 22.10 22.00 21.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.28 0.72 -4.25 140 729 2012-10-11 20:44:46 2010-07-22 11:22:32 1 17 639 0 217 541 130 96.80 24 14.27 CHANGED sEphshphhhpt.ppt.........s.............ahsccs.t.........ps...hGYDItuh...........p..........sGp......p+a..IEVKupt............tttssht.lopNEhthAp..pps-..pYhLhhVhss.........ssps....phhhl ......................................hEphshphhpph.ptp.................s........................hphscspt..........pG.tGYDlhuh.........s...............psp....................pha..IEVKupp..........ptpsshh.lotsEhthup......ph..p..p..cYhlhhVhsh...scts..ph...h.......................... 0 88 154 191 +12862 PF13021 DUF3885 Domain of unknown function (DUF3885) Mistry J, Aldam G gba Pfam-B_1173 (release 24.0) Family A putative Rac prophage DNA binding protein. This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved YDDRG sequence motif. There is a single completely conserved residue D that may be functionally important. 21.80 21.80 22.40 24.00 21.10 21.30 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.82 0.72 -4.25 21 280 2010-07-22 13:00:10 2010-07-22 14:00:10 1 2 244 0 30 151 2 37.40 48 20.14 CHANGED hHlYDDRGsDllusspcpLpsLYccascWILD..YDRpcI .aHlYDDRGh-lhssNsEshtchhcKYpDhI.-.aDpEcI....... 0 9 21 26 +12863 PF13022 HTH_Tnp_1_2 Helix-turn-helix of insertion element transposase Coggill P pcc pdb_2ao9 Domain This is a family of largely phage proteins which are likely to be a helix-turn-helix insertion elements. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.96 0.71 -4.39 4 30 2012-10-04 14:01:12 2010-07-22 14:02:37 1 1 29 9 7 63 4 133.90 38 93.68 CHANGED Mu+.hKpLEupLohcQRcAA.LLVtN...ELh.Ess.EKKTQ-ElApELGlsRhsLacWRTQspsFIsYhNhlADchLup+RspVYsQLM+hIpGsQ...PSVKAlDLFM+RaGLLTDKpVIE...Dsssssp+ost-IcpElscLsALl...K.c ...................Mu+.hccLctpLThpQhpAA.lLspN.....Elh.ps...s..cK+..TQ-ElApElGlsRoTLac.W.Rs.cNpsF.I.sa.p...s...clADshLup+RppV.....YspLh.phIhGsQ.......PSVKAhpLah.pRaGLLTD+.p.llc......sc.hssus+oN.t-IccplpcLpclht..c.......................................................... 0 3 4 4 +12864 PF13023 HD_3 HD domain Mistry J jm14 Jackhmmer:O25415 Domain HD domains are metal dependent phosphohydrolases. 21.60 21.60 21.60 21.60 21.40 21.50 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.12 0.71 -4.50 143 2536 2012-10-01 20:28:14 2010-07-22 14:13:12 1 13 2105 12 718 2045 280 160.20 26 71.76 CHANGED h-+LKplhRpshhtss.p.RtEssAEHoW+lAlhuhlluths..s....ttlDhs+slpMh...LlHDlsEl.sGDhhshs....t.hsptt....pptcEppAtc+lash....LPps......tu.........pchtsLWp..............EFEs.......spos-A+a.............A+slD+hpPlltshtspspshtth.plphsphhsp.sptltps.tspla ....................h.s+LKhhpR..Wsh.....h.t...p........p...s...EsluEHohplAh....hA....h...h....l....s....t...hh.....s...........pl.s..h...p...+l...h.hhs.............lhHDhsEsl.sGDlssss.....................s...h...tpp......h...c...t...hE..p.t.A...pc.lh.sh.....lPpp..............hp.............................cpht.s.L.hp..............................Eapt............tps...-.u.ph............................s+thDtlpshl...ps...h.p.t.t..ts...tth...ht.t...................hh.............................................................................. 0 211 416 587 +12865 PF13024 DUF3884 Protein of unknown function (DUF3884) Mistry J, Aldam G gba Pfam-B_1352 (release 24.0) Family This family of proteins is functionally uncharacterised. However several proteins are annotated as Tagatose 1,6-diphosphate aldolase, but evidence to support this could not be found. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 106 amino acids in length. There are two completely conserved residues (Y and F) that may be functionally important. 21.70 21.70 22.20 22.00 20.20 17.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.88 0.72 -4.57 12 338 2010-07-22 13:17:09 2010-07-22 14:17:09 1 1 312 0 12 116 0 66.60 42 86.39 CHANGED lacpVYllsFpchsp..phss..cLppLGcWhssoGppWhCHS...cLshs-FcptFhp..pLsssphthhpht.sahPap ........lY.lpF.chs....hsu..ccLKpLGcWhsoTG+.WhCHS...chph-EFKphFLp..hlss-chch.s.sssah.hp....... 0 1 3 7 +12866 PF13025 DUF3886 Protein of unknown function (DUF3886) Mistry J, ALdam G gba Pfam-B_1536 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two completely conserved L residues that may be functionally important. 25.00 25.00 31.80 31.10 20.30 19.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.50 0.72 -4.01 17 157 2010-07-22 13:20:39 2010-07-22 14:20:39 1 1 157 0 25 101 0 70.30 53 82.87 CHANGED p......ptppcthoLKDtLss-lhtpLKttKcpLctcEpcRccccctc+tccp+c+EK..sKSFEELLsESpL.DW+c ...............QsccEslTLuDpLN-sLhppLKsKK+ELpscEE+KcAsEhcR+RpEpKEREK..NKSFEELLsESsL.sWKc.......... 0 6 15 17 +12867 PF13026 DUF3887 Protein of unknown function (DUF3887) Aldam G, Mistry J gba Pfam-B_1534 (release 24.0) Family This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 128 and 167 amino acids in length. The proteins in this family contain an N-terminal lipid attachment site. 24.50 24.10 24.60 24.20 23.10 23.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.40 0.72 -3.73 5 144 2010-07-22 13:28:10 2010-07-22 14:28:10 1 1 102 0 6 78 0 101.00 50 66.99 CHANGED KEsKEcVcQSAE-pEsM...+sY+-VHcK.YDpKMNKELNpulpLaEpAKEKGGKsIspssaKEDVQKITsSMLEDID...HlRTEIRVPKSKEQEHclYlGFLNEoEQA ...KpsKEKVcpusEps.-h...+cY+tVHEK.aD..KMscpls.....hEsuKEKsucpIspAs.hhE-lpKl.Ts.shlEDhD......c.lpptIpVsK...cEpc+plhluFhsEsEpA... 0 1 4 4 +12868 PF13027 DUF3888 Protein of unknown function (DUF3888) Mistry J, Aldam G gba Pfam-B_1080 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 111 and 149 amino acids in length. 22.20 21.80 22.20 21.80 21.80 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.76 0.72 -4.13 23 254 2010-07-22 13:30:48 2010-07-22 14:30:48 1 3 127 0 20 172 0 85.40 40 59.20 CHANGED hpDslhshLtP.IscslpcaYuc........h.a..ppscllclc+hts.spa...Fhlslclpsa.....Gs+ssshupDplThpls.....stscllcacHhc .....hEsALhshLashIppslEcQYt.........h.a..pCsclI...slK+lh.p.spal..Fplolphpsap....GspsPP.-KVTlThpsp......sclclpKlppc.... 0 4 15 16 +12869 PF13028 DUF3889 Protein of unknown function (DUF3889) Mistry J, Aldam G gba Pfam-B_1146 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two completely conserved residues (A and Y) that may be functionally important. 25.00 25.00 46.20 45.40 23.20 17.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.09 0.72 -4.33 19 153 2010-07-22 13:45:40 2010-07-22 14:45:40 1 3 127 0 20 104 0 96.20 45 78.51 CHANGED llhshhhsh.hstsuhspsp...sshuKWt+lAhpcsKc+YPtAcllDYhalGRpptssp.oh-pF+lhL+cssK-FGVaVclpFcshTp+lhpIsl.Ep ..............lLhhsosushhsssulVcAp...PsYAKWGKLAVcKTKEpYPcA-IlDYLHlGR+s+TsphTVEKFKLWLREcGKEaGVFV-VpF-opT-KhIKloap+... 0 6 11 14 +12870 PF13029 DUF3890 Domain of unknown function (DUF3890) Mistry J, Aldam G gba Pfam-B_1148 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 70 amino acids in length. 22.80 22.80 24.40 26.40 21.00 19.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.24 0.72 -3.93 6 114 2010-07-22 13:48:49 2010-07-22 14:48:49 1 1 28 0 9 75 0 76.90 64 51.96 CHANGED Mspppsss-s.................lsclYpcIltLLslsccclSFpcFp.ahpLLEhIL.oRGIslpsLNhSphhLLlYYaI ..MSEQKsLQsQltuEEE.............LLVTKLHSEVLLLLGIDchALSRQNFLLHLSLLQAILVTRGIDASSLTYEQIFLLTFYHM. 0 6 6 6 +12871 PF13030 DUF3891 Protein of unknown function (DUF3891) Aldam G, Mistry J gba Pfam-B_1216 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 250 amino acids in length. 21.40 21.40 21.40 21.90 21.20 19.60 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.36 0.70 -4.87 29 151 2010-07-22 13:55:02 2010-07-22 14:55:02 1 2 146 0 32 130 3 223.80 44 89.22 CHANGED MIlp....pptp.sahlIpQpsHuhluGplAtpWpps.hts.sphhtpslhAltpHDcGWhch-..tsPhls.....psutPhsFhchP...sph+lthappulspstpps.YuuLLsShHhstlhpppt...............spphppFlspphp+............QcplhpsLs..................hp........pt.hcppYtLLphsDpLSLhlChsc.....Puspctt-lshhtsu...........tpphpsp.tsssplplsPaPFppc.hslslph+tlspp ........MIhR....Ecs-.c.lLIcQHDHGaLAGEIA++h+-chF..........Es.c..........sahcETlsAIYEHDRGWIELD..+sPILND....ucshPYTFhDsP...........oslRhlFYolGLsEl.EssNPYuALLCS+HahSFshNc-.........................D-EhhuFaptEL-RQKRlLpsLo..................................p-........phshhD+HY+LL+FCD-LSLYVCMNc.........PGscKcpEIslFK-GFctoc...hssppcc.lpAcWlD-pTI+IoPFPFps-.FpshV+YKslsK........ 0 13 26 28 +12872 PF13031 DUF3892 Protein of unknown function (DUF3892) Aldam G, Mistry J gba Pfam-B_1252 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 87 and 104 amino acids in length. 22.30 22.30 22.30 23.60 22.10 20.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.94 0.72 -3.58 62 309 2010-07-22 13:57:37 2010-07-22 14:57:37 1 1 292 0 68 216 6 75.70 33 78.93 CHANGED plsslp+ss.tus....Isplt......hssspth...ohppslphlcs..G..............thssVtVh.......tspsG.pcYl+op..sDuspssNL.sL...P ..............................pIssl+Kss..cGs....lhthp......tssGpph...sh.pulp.hcs.G.....................plstVsVh.........p+pG.pcalRop..sDGtppsNLtsLP............. 0 27 50 58 +12873 PF13032 DUF3893 Domain of unknown function (DUF3893) Aldam G, Mistry J gba Pfam-B_1590 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 123 and 144 amino acids in length. There is a single completely conserved residue E that may be functionally important. 22.10 22.10 22.10 22.80 21.70 21.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.97 0.71 -4.06 15 111 2010-07-22 14:00:18 2010-07-22 15:00:18 1 2 106 0 18 96 1 127.70 47 17.45 CHANGED lIRINss.....-VPsahsh-..p-pshsps....GLat.........cppslYYSsGphshstss.....p........................hhpppllEIlslGs..c.pcEt-plAchl...Hhhp..pushhhpcslphPhPhHhsKh.lKcYlss..Dshthcp.--p.c.-h ............................................lRI..Nsu....FDVPpYGVIE.....sD.....-sLDss................uLYh..........DQKGMYYSTGEYS...hNsSt...............................................hhp+YILEIhPLGV.............K.sVERsYIAKMl...HYMC.CNS.SMLhcKN..lHMPYsMHMAKV.IKsYhTD.IDAREFKEFDDELDVDl............. 1 1 13 16 +12874 PF13033 DUF3894 Protein of unknown function (DUF3894) Mistry J, Aldam G gba Pfam-B_1594 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 66 and 79 amino acids in length. There are two conserved sequence motifs: FNIC and MALLNLT. 25.00 25.00 67.90 67.90 20.90 18.00 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.92 0.72 -3.96 3 82 2010-07-22 14:03:13 2010-07-22 15:03:13 1 1 82 0 3 25 0 52.10 94 74.74 CHANGED LATFNICSYLVAIVCMALLNLTFVlGAFQQKQYTSFVhALhM.FSFSIVAIlLl LATFNICSYLVAIVCMALLNLTFVIGAFQQKQYTSFVIALVMSFSFSIVAIVLY.... 0 0 1 1 +12875 PF13034 DUF3895 Protein of unknown function (DUF3895) Mistry J, Aldam G gba Pfam-B_1598 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two completely conserved residues (Y and L) that may be functionally important. 25.00 25.00 25.30 30.80 20.40 24.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.47 0.72 -4.50 11 104 2010-07-22 14:05:47 2010-07-22 15:05:47 1 2 87 0 5 53 0 76.10 69 54.50 CHANGED L.sphppchlp.alpsp.....ptIoAp-lCEhLIcpsussscpYoTsKsKIYs.VC.aL-aLspcGhlhhlcshshpDRlYtp ...........LSPLQKDILE..LlsS.....EEISALELCEpLIRuGKIsDERFTTNKPKAYGQVCLlLEGFV+EGKLIFVKsDEKRDRVYK.L............... 0 0 3 4 +12876 PF13035 DUF3896 Protein of unknown function (DUF3896) Mistry J, Aldam G gba Pfam-B_1603 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 122.80 122.60 20.70 20.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.15 0.72 -3.92 5 83 2010-07-22 14:08:07 2010-07-22 15:08:07 1 1 83 0 3 28 0 61.00 84 96.53 CHANGED MK+TYDYsATKKHLELKKQpLCKKLSNh+LSEKEREQIKhEIDNYEYILNLVEMNHYERGF MK.TYDYsATKKaLE.KKQQLCpKLssh+LScKEREQlKhEIDNYEYILNlVEMNHYERGF 0 0 1 1 +12877 PF13036 DUF3897 Protein of unknown function (DUF3897) Mistry J jm14 Jackhmmer:O25995 Family This is a bacterial family of uncharacterised proteins. Some of the proteins in this family are annotated as putative lipoproteins. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.84 0.71 -4.66 74 1082 2012-10-01 20:48:06 2010-07-22 15:45:39 1 3 1047 0 176 569 152 184.90 36 90.44 CHANGED hhhsul...hluGC.............us.s.sphh-..s...............sss.shosshssp........DhphsApcMspshlssPhltphhtpss.t..sl..lhlss..lpNcTsp.pIso..cphssplpptLhpo..G+hph.....ssssphs.shppphp.pppsuhhspsostthGp.luApahLpGplssh..lc...pstpp.hhYphshpLhslpoGhllWsscppl ..............................................................hshAh...hLuGC...........................s...........s.....................sslsp.ssslppp...........s.tst..spchs.shhhpPh.VuphhtssGsp......sVlhV-s....V.pN+Tst..pl......ss.....schT..cs.lp.suL.tss...GKFsL.......Vusppls.hs+pQ.Lshpsp.D.uL.sspS....p....AIuhu.............+tVGA.pYsLYushSus....Vp......................................t.p....hpMpLM.lpTGcIlWoscssl................................................ 0 49 94 138 +12878 PF13037 DUF3898 Domain of unknown function (DUF3898) Mistry J, Aldam G gba Pfam-B_1179 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. There are two conserved sequence motifs: DFG and FEKG. 25.00 25.00 40.70 40.50 17.50 15.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.75 0.72 -4.11 9 132 2010-07-22 15:16:23 2010-07-22 16:16:23 1 2 128 0 19 80 0 88.90 74 25.58 CHANGED ElQEpaosEQVhEAoAtIlEppPElELKhKLschplKuhLADFG-olHlAKVNGRYVlLIEuDslpFEKGsSPVEFLKP-sLp-Vlc+Ipp ..EIMEQFSTEEVMEATAQIVEHAPEVELKLKADHISVKALLADFGDpIHIAKVNDRYVLMIEADTLTFEKGFSPIEFLKPDELQDVIERIEN. 0 6 12 13 +12879 PF13038 DUF3899 Domain of unknown function (DUF3899) Mistry J, Aldam G gba Pfam-B_1174 (release 24.0) Family Putative Tryptophanyl-tRNA synthetase. 21.70 21.70 21.80 22.20 21.60 21.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.11 0.72 -3.83 37 490 2010-07-22 15:18:45 2010-07-22 16:18:45 1 2 484 0 38 179 2 87.20 36 73.50 CHANGED hlNlhF.llullhlllGhhlhlhpsGh..FcshtauF++hppphtppp.tphhpp......p.....p.hhtshhhshhlss..hlhhlhslhltah .....alNIhF.hluLhhhIhuhhlhlhQpGh..FDss.pauF+Rlph.........phppp.c.pc.Ipcs.......spc...........ch.phhlupa..hhslllhs....ILhhlholhlohh................ 0 11 22 32 +12880 PF13039 DUF3900 Protein of unknown function (DUF3900) Aldam G, Mistry J gba Pfam-B_1279 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 360 amino acids in length. 21.00 21.00 26.90 25.80 18.10 16.00 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.41 0.70 -4.83 8 130 2010-07-22 15:34:20 2010-07-22 16:34:20 1 2 127 0 19 78 0 246.30 79 69.68 CHANGED DFTIpaLSFYVlpVEGcuEpssKpaKHaQTLDpspYEcSsLKDFLDGELtKIuKRKV-RHPKSEpsPTKIGRFlVEPGH-LDSNPNYNLFpRhRhAEThEsFp-tSEphV+oYLDTSAVRGGVFLVspA+L+KYaDDsFVFlhKCDFEpKVAsIoDpSTLI+cVEMAITTKNMKSIQYPYMPEEGMVEEuELKIHQuSHARYFEDFLKFVEYscSMPEIlKsQVhsMsp-HIt-Tap-pSEERppFEEslElW .DFEINYLSFYVVQVEGKGEuVDKRYKHFQTLDAEEYEDSSLKEFLsGELLKISKRKVERHAKTEQAPTKIGRFIVEpGHELDSNPHYNLFNRIRFAETKENFKDMSEPLVYTYLDTSAVRGGVFLIAQAKLRKYFDDPFVFVMKCDFEPKVASISDESTLIRNVEMAITTKNMKSIQYPYMPEEGMVEsGELKIHQASHARYFEDFLKFVEYERSMPEIMKT.QVMDMVYDQIEDVFEE..GT.EEREQFDQAMEVW 0 6 12 13 +12881 PF13040 DUF3901 Protein of unknown function (DUF3901) Misrty J, Aldam G gba Pfam-B_1316 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a single completely conserved residue L that may be functionally important. 25.00 25.00 43.40 43.40 21.00 20.30 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.92 0.72 -4.67 32 262 2010-07-22 15:36:13 2010-07-22 16:36:13 1 1 134 0 36 125 0 41.30 48 84.45 CHANGED +K....p++.....hoFc-LVpENKcpLLsDpcth-cIEc+l-c+attp ..........R+.p+R......KSFcpLlpENKQpLLss+-shccIEERIEKRaph....... 0 8 23 26 +12882 PF13041 PPR_2 PPR repeat family Coggill P pcc Wood V Repeat This repeat has no known function. It is about 35 amino acids long and is found in up to 18 copies in some proteins.\ The family appears to be greatly expanded in plants and fungi. The repeat has been called PPR [1]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.51 0.72 -3.99 436 33080 2012-10-11 20:01:03 2010-07-22 16:48:00 1 3913 396 0 23020 36618 476 49.20 26 28.42 CHANGED P.....s....l....l...o.YNsllsu.hs..c.pu.c.h.....cc.Ahclap-Mp..c.p...G.h...p.Pss.hTYshLlpu.hCc .......................shh.oasslIsu.hs.....c...p.....G...p..h...................cc...A.h.p..l.a....p....c..Mt.....p...p............G...l............p..P..sh..hTa.ss.llsu.hs............................... 0 2730 14486 19222 +12883 PF13042 DUF3902 Protein of unknown function (DUF3902) Mistry J, Aldam G gba Pfam-B_1357 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. There is a conserved LGI sequence motif. 25.00 25.00 92.50 92.40 20.40 20.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.24 0.71 -4.95 10 107 2010-07-22 15:54:01 2010-07-22 16:54:01 1 1 79 0 3 76 0 158.30 69 95.33 CHANGED KuVLKsIlIShIFulsGhhhLLFsLh....hGsuDWlLsWlGVLMAYLSLahLIsLYs+sTYcKphsKlLlKosllSFshAVLGIhFGIlapLLssWSLolMhWYWLLlLlLaLhTIIoLVILVFVNppspsashlY+hLILLNlhLTLGPVLWPlhloIl.GNGM ......KSVLKSILISFVFSAVGMCWLLFlLF....+GDGDWLLSWlGVLMAYLSLYTLIDLYCKsTYDKKlsKhLIKTsVTSFSFuVLGISFsIIHELLTPWSLSLMVWYWLlMLlLFLTTIIoLV.LVFVNRKNHNFTssYRlLILLNlhLTLGPVLWPLLLoII.GNGM........ 0 0 1 1 +12884 PF13043 DUF3903 Domain of unknown function (DUF3903) Aldam G, Mistry J gba Pfam-B_1600 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length. 25.00 25.00 51.40 88.80 22.50 21.70 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.48 0.72 -7.85 0.72 -4.45 4 84 2010-07-22 16:14:07 2010-07-22 17:14:07 1 1 84 0 3 25 0 40.00 89 61.79 CHANGED hsATSphLAIpKVRsECKRRFGKoLLLQTEIpEElhhcQK VPATTQLLAIEKVRAECKRRFGKsLLLQTEIKEEIVFEQK 0 0 1 1 +12885 PF13044 DUF3904 Protein of unknown function (DUF3904) Mistry J, Aldam G gba Pfam-B_1386 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in viruses. Proteins in this family are typically between 437 and 448 amino acids in length. 25.00 25.00 190.40 190.30 16.50 16.30 hmmbuild -o /dev/null HMM SEED 436 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.50 0.70 -6.06 2 198 2010-07-22 16:22:22 2010-07-22 17:22:22 1 1 2 0 0 189 0 398.50 96 99.09 CHANGED LVLFLhKEVLCEPChC-NPTCLGlTIPpsGaVRSAPGGVLLTETITEpPtLsEWTTSRs+LE-ohW.sG-sKsGKVSQTLFEAIQGTQMENCAVKAVhDToFVNLT+pDlVLG+lKVS.FGG-pDISKCGRKGLKVFICGGTsGYVTRGCPPEEC+G+KGRMMuLEPTsDCGVEKGhTT-RIKTGhlDlsSCCTQHGCTKGIRVEVPSPVLVSuKCpEloFRVVPFHSVPD+LGFARTSSFTL+ANhsNpHGWSKYsFNLRuFPGEEFIKCCGFTLGlGGAWFQAYLNG.VQGDGAASA-DVKEKLNGIIDQINKsNhLLEGEIEAVRRIAYMNQASSLQNQVEIGLIGEYLNISSWLETpTLTKTEEGLMKsGWCpSssHCWCPPcsshlPTIGYVDsIKEVTGTSWWMVMIHYIIVGLIVlVlVVhGLKLWGCl ..................PChC-NPTCLGlTIPpsGaVRSAPGGVLLTETITEpPtLsEWTTSRsRLEDSsWpGGEVKSGKVSQTLFEAIQGTQMENCAVKAVFDTSFVNLTRHDVVLGRVKVSPFGGEHDISKCGRKGLKVFICGGTTGYVTRGCPPEECRGRKGRMMSLEPTADCGVEKGFTTERIKTGKVDLDSCCTQHGCTKGIRVEVPSPVLVSAKCNEISFRVVPFHSVPDRLGFARTSSFTLRAsLANQHGWSKYNF........N...........LR..........A...........FPGEEFIKCCGFTLGIGGAWFQAYLNGEVQGDGAASAEDVKEKLNGIIDQINKVNLLLEGEIEAVRRIAYMNQASSLQNQVEIGLIGEYLNISSWLETKTLTKTEEGLMKDGWCRSSNHCWCPPDTVGIPTIGYVDNIKEVTGTSWWMVMIHYIIVGLIVVVLVVLGLKLWGCI.. 0 0 0 0 +12886 PF13045 DUF3905 Protein of unknown function (DUF3905) Mistry J, Aldam G gba Pfam-B_1447 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 19.70 19.70 21.80 31.80 17.60 16.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.95 0.72 -4.21 7 99 2010-07-23 08:51:41 2010-07-23 09:51:41 1 1 99 0 10 51 0 82.80 83 71.87 CHANGED csPhLDtThPHQIsh.PSFKGoGhphppPFVNpaGVVIGDSpYsS.NSPLppWSDEhDPulMAGDEWlHPTNDIGWpotENR-LL .......QSPILDETLPHQMNF.PSFKGTGKpMQQPFVNQYDVVIGDSKYNSENSPLpNWSDEVDPAIMAGDEWIHPTNDIGWISEENQELL............. 0 4 7 8 +12887 PF13046 DUF3906 Protein of unknown function (DUF3906) Mistry J, Aldam G gba Pfam-B_1532 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved EKK sequence motif. 25.00 25.00 51.20 51.00 18.80 17.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.20 0.72 -9.01 0.72 -4.36 5 108 2010-07-23 08:53:15 2010-07-23 09:53:15 1 1 108 0 10 48 0 64.00 72 94.54 CHANGED MaLYRFEAsl.cschlsIVIlApsEEpAF+LAElELEKaaLKlPslpElSLaEKK+Is+GAGYVl .MDLYRFEAVL.sNSIVPIV..VVApSEEQAFKhAEIELEKHFLPLPEVKEIuLFEKKKIRKuAAFVI... 0 1 4 5 +12888 PF13047 DUF3907 Protein of unknown function (DUF3907) Mistry J, Aldam G gba Pfam-B_1274 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 160 amino acids in length. There is a conserved AYTG sequence motif. 20.30 20.30 20.80 145.50 19.80 20.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.88 0.71 -4.59 16 138 2010-07-23 08:55:27 2010-07-23 09:55:27 1 1 138 0 20 93 0 147.50 63 90.38 CHANGED Qs-pstphLpcslpplpsaLNppTLspLhpEp.su-c..sYYcslLusLR+LLVFsE-Gh-uCtllLpppsFccsAAEKsLYclYHQCIpEFFpPKpDsWYEDSRuAYTG+suIcF+ppsPpulcpLhhSlEutFpphREEL-YYETDYpT ..QTEQVuhFLE-sIslIosYlNaHTLPSLLEEossGsE..pYYKGlLuohRRLLVFCEEGtDAChVLLsSQPFRKTAAEKsLYKIYHQVIAEFFSPKuDpWYENSRSAYTGKNSIVFQQTPPASlEpVM+SLEGKFQlMREELEYYETDYQT.. 2 5 11 13 +12889 PF13048 DUF3908 Protein of unknown function (DUF3908) Mistry J, Aldam G gba Pfam-B_1533 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 140 amino acids in length. There is a single completely conserved residue Y that may be functionally important. 22.90 22.80 22.90 22.80 22.70 22.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.84 0.71 -4.17 9 99 2012-10-04 14:01:12 2010-07-23 09:58:18 1 1 97 0 5 46 0 121.50 72 89.42 CHANGED shpphcchltpsphhpppsh...clhchh+clp.hL-..pshphhYPKNlFss..scclElhhhhpDs.hhhI.hppppc.thphchhph-slpcsplcpp.s...tshpLplhFssst.pIhhsSht..Ds..pp.ah..asc ...................NMKTIEEWIAESNARcEEDF..GpVVEEMKEVCl..GLD...NATLIYTKNVFCF..GKKVEVhFFFQDH..VVI.GQEK-E.YlEIEKLKYDsITpSNLKT..N..DKNTTLELKFANGQ.SINLDSLN..DNYGTKNWL..FAR............. 1 2 3 3 +12890 PF13049 DUF3910 Protein of unknown function (DUF3910) Mistry J, Aldam G gba Pfam-B_1539 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 21.50 21.50 22.00 72.10 20.80 21.00 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.13 0.72 -3.90 6 84 2010-07-23 09:00:14 2010-07-23 10:00:14 1 1 84 0 3 46 0 92.40 90 91.92 CHANGED MNlQAKVDWIGTPKPYIYK.DDVTYDAToIDFSLppDDNRYKLIVLKHEpssHYKhVQYGlKPGSQKPFPIDIPFcpsMLPLlEpILpDPYVQA MNVQAKVDWIGTPKPYIYK.DEVTYDATSIDFSLAGDDNRYKLIVLKSEcNTHYKhVQYGIKPGSQKPFPIDIPFEQNMLPIIEQILHDPYVQA. 0 0 1 1 +12891 PF13050 DUF3911 Protein of unknown function (DUF3911) Mistry J, Aldam G gba Pfam-B_1540 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 20.20 20.20 22.30 40.20 18.00 17.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.47 0.72 -3.87 5 86 2010-07-23 09:01:18 2010-07-23 10:01:18 1 1 85 0 3 28 1 76.10 94 94.08 CHANGED MACVQIKGTRQEVVEMLQLFDLMDTKGFCKFDNYVEVEPNsccHNNFTASIDIQSNssSAQDp.LNDQFVSQMLTGVY MACVQIKGTRQEVVEMLQLFDLMDTKGFCKFDNYVEVEPNsKEHNNFIASIDIHSNTSSAQDT.LNDQFVSQMLTGVY 0 0 1 1 +12892 PF13051 DUF3912 Protein of unknown function (DUF3912) Mistry J, Aldam G gba Pfam-B_1615 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 21.80 21.80 21.80 148.90 20.90 21.50 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.29 0.72 -4.23 5 84 2010-07-23 09:02:45 2010-07-23 10:02:45 1 1 84 0 2 28 0 68.00 93 90.32 CHANGED NFDIVGQKAYIKDGPHRNRIGIVKKNEcQLESpFAIVIGEQsIDVELKDIVLVGVDVGQFHcWCEQNG NFDIVGQKAYIKDGPHRNRIGIVKKNETKLcSQFAIVIGEQsIDVELKDIVLVGVDVGQFHKWCEQNG 0 0 1 1 +12893 PF13052 DUF3913 Protein of unknown function (DUF3913) Mistry J, Aldam G gba Pfam-B_1619 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 28.80 106.80 18.30 17.20 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -8.76 0.72 -4.03 2 84 2010-07-23 09:04:03 2010-07-23 10:04:03 1 1 84 0 3 13 0 56.80 97 93.66 CHANGED KIhFYEKTAQ.DDLLGIWDNVPTIPRIGEKVEILKTlRTVTDIKYlKpGNNF+VEIl KIWFYEKTAQLDDLLGIWDNVPTIPRIGEKVEILKTVRTVTDIKYVKNGNNFRVEII 0 0 1 1 +12894 PF13053 DUF3914 Protein of unknown function (DUF3914) Mistry J, ALdam G gba Pfam-B_1562 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two conserved sequence motifs: KFDIR and DLW. 25.00 25.00 42.10 41.60 21.30 16.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.34 0.72 -3.83 5 85 2010-07-23 09:05:22 2010-07-23 10:05:22 1 1 80 0 3 51 0 88.00 89 78.88 CHANGED MQIGLNlHTluQsTKlTPlslE+NTpusssppuKEssND.sIKFDIRSSEKEhKQucHKFTELDLWKMLKDKGVPLWIILEMLpKFRKEKEp MQIGSNIHTLSQPTKITPSNLEHNTISSTKLESKK.lND.PIKFDIRSSEKEMKQPEHKFNELDLWKMLKDKGVPLWIILEMLQKsRKEKE.A 0 0 1 1 +12895 PF13054 DUF3915 Protein of unknown function (DUF3915) Mistry J, ALdam G gba Pfam-B_1549 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 25.00 25.00 146.30 145.90 18.90 18.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -11.07 0.71 -4.14 5 84 2010-07-23 09:06:43 2010-07-23 10:06:43 1 1 84 0 3 45 0 119.00 86 97.62 CHANGED MFGSFGCCDNF..RDCHH.....HEREp..HR...hcREKERc..hcspRsAVCNVLANISIGTEISLLolKGNGTFRNVIFEGFCNGVALFSALsh.ssDKD...sNKDDKNNpNpN..+FTGILRVCP-DIVAI MFGSFGCCDNF..RDCHH...........HEREp....cR...-HREKERE..V+PQQPAVCNVLASISVGTELSLLSl+GsGoFNNVIFEGFsNGVALFSALAR.NssD.KD......NNKDDKpN..QN..R..N..TFTGILRVCPTDIVAI 0 0 1 1 +12896 PF13055 DUF3917 Protein of unknown function (DUF3917) Mistry J, Aldam G gba Pfam-B_1608 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 25.00 25.00 149.20 148.60 21.90 20.60 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.43 0.72 -3.73 4 84 2010-07-23 09:41:22 2010-07-23 10:41:22 1 1 84 0 3 28 0 71.00 95 82.96 CHANGED MTLKQNGLKRFVPGSILAGIALITYVsSIFlESlSl-hSTShhFIGITLFAGSlMVLMVAGIIlFIHMNSE MTLKQNGLKRFVPGSILAGIALITYVISIFIESVSVDMSTSLMFhGITLFAuSIMVLMVAGIILFIHMNSE 0 0 1 1 +12897 PF13056 DUF3918 Protein of unknown function (DUF3918) Mistry J, Aldam G gba Pfam-B_1567 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two completely conserved residues (G and R) that may be functionally important. 21.30 21.30 21.70 21.30 20.90 18.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.60 0.72 -7.98 0.72 -4.64 11 120 2010-07-23 10:07:22 2010-07-23 11:07:22 1 1 120 0 13 31 1 42.20 66 98.98 CHANGED MN+shTSLlAlGsGsAAYphAp+sDhMNsRsMKKhR+Rlh+hh .MN.LRNSLIALGVGAAAYQYARKQDVFSKRNMKKARKMIKSYL 0 1 7 9 +12898 PF13057 DUF3919 Protein of unknown function (DUF3919) Mistry J, Aldam G gba Pfam-B_1479 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 251 and 262 amino acids in length. There is a conserved YLNG sequence motif. 25.00 25.00 76.40 76.00 21.90 20.60 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.55 0.70 -11.43 0.70 -4.89 13 118 2010-07-23 10:09:05 2010-07-23 11:09:05 1 1 111 0 10 94 0 220.90 57 86.08 CHANGED cDKpsVlpphspSIPsKlclhsc+WGchslsDcspLppIhshlcpIspspoptss....csss.sIsGslaYLNGcKcoFplushLplssphYuss.hsshIstL+shLlshhYoPssLssllssssclIlhcp.ppspclssssKphIhspIcph+ploDsc-lt+hlspppcs.hhHI+lYhcscpt........cscspsllsIDVY-s.YhVVQYhGDENGpshYhKGsLsslhhK ..NDKQsVLQRVNESLPsElKVRHEKWGEIVlTDEVRLHTIVSFFDRIpIpPs-s.......KsQEQVFTGEVTYLNGpKRTFAVGDLFQYGsshYGKNGtDPMISAhQTYLLSLYYTPERIuDFF.ASApDVlV....RQGDVhRshNLTHILDSIRYAKQITDYGEIQKLLQSQNEP.IAYITAYKTGK........+lKN-REDILTISVYPS.YFVVQYLGDNNGNVMYMKuSLAplFVK.............. 0 3 8 8 +12899 PF13058 DUF3920 Protein of unknown function (DUF3920) Mistry J, Aldam G gba Pfam-B_1595 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. 25.00 25.00 26.20 83.30 21.40 18.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.74 0.71 -4.32 3 86 2010-07-23 10:10:41 2010-07-23 11:10:41 1 1 86 0 5 41 0 125.50 85 87.16 CHANGED VLDSE.LPWDlQRl+sDlFSLIEKpchPVIFCDTCDANpVLhsLGEEEEEFLFPlGGFYHKEKQhIFVCMWEpYEQVLKTLLHEFRHuMQHEp-VLYVGpEsYEERhIEKD..ARtFAERKlDEYtcRs VLDSE.LPWDVK+LR-DLFSLIElC.KTPVIFCDTCDAN+VLLSLGEEEEEFLFPlGGFYHKEKQLIFVCMWEEYEQVLKTLLHEFRHAMQ.....HKp-lLYVGSEpYEERWIEKD..ARKFAERKLDEYKNRK.. 0 1 3 3 +12900 PF13059 DUF3922 Protein of unknown function (DUF3992) Mistry J, Aldam G gba Pfam-B_1628 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 87 and 98 amino acids in length. 25.00 25.00 161.50 161.30 19.40 18.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.80 0.72 -3.79 9 83 2010-07-23 10:11:49 2010-07-23 11:11:49 1 1 82 0 3 40 0 78.90 94 86.70 CHANGED FGMDKSKKLG-YVNALQALCEQYNVETDKIAIIEATEEYYLFLVKQE-CYDVVKVETVDTNI-YYTKAYKISSFNHTAY FGMDKSKKLGDYVNALQALCEQYNVETDKIAIlEATEEYYLFLVKQE-CYDVVKVETVDTNIDYYTKAYKISSFNHTAY. 0 0 2 2 +12901 PF13060 DUF3921 Protein of unknown function (DUF3921) Mistry J, Aldam G gba Pfam-B_1624 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 29.40 120.80 21.80 19.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.23 0.72 -8.93 0.72 -4.09 3 83 2010-07-23 10:12:38 2010-07-23 11:12:38 1 1 81 0 3 28 0 57.70 95 91.77 CHANGED MDGFQLSMIQKAIHRTYDELGKEIDLQGVlADEIQKAQEEYLSALSHETLIDKRYLKS MDSFQLSMIQKAIHRTYDELGKElDSQGAIVDEIQKAQEEYLSALSHETAIDKRYLKS 0 0 1 1 +12902 PF13061 DUF3923 Protein of unknown function (DUF3923) Mistry J, Aldam G gba Pfam-B_1586 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 26.60 26.20 22.10 20.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.00 0.72 -4.12 19 152 2010-07-23 10:14:53 2010-07-23 11:14:53 1 1 147 0 23 92 0 65.20 41 83.99 CHANGED h+hahlsslhhlllFlhhuhhIahRcVDGuGlhQThph+hlslhlhslhhlllllhplIWhllh++ ...ahsWWluNlhWlIlFshhAslIWlRcVDGAGVhQTsclK.IoLlVllIsallslhhQlIWLlIsh+........... 0 3 10 15 +12903 PF13062 DUF3924 Protein of unknown function (DUF3924) Mistry J, Aldam G gba Pfam-B_1601 (release 24.0) Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 20.90 20.90 22.30 125.80 19.60 19.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.94 0.72 -4.10 2 84 2012-10-02 18:44:02 2010-07-23 11:16:03 1 1 84 0 3 14 0 62.00 96 96.87 CHANGED MsTLTIELPp-hAEKLDLLKQsYpKKTGAoIS-S....TLISKEFlQtITPFDLQQal.tKE MNTLTIELPKETAEKLDLLKQAYEKKTGASISESTLVQTLISKEFIQAITPFDLQQFIsGKE 0 0 1 1 +12904 PF13063 DUF3925 Protein of unknown function (DUF3925) Mistry J, Aldam G gba Pfam-B_1644 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. 25.00 25.00 92.40 92.30 20.90 19.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.22 0.72 -3.94 2 82 2010-07-23 10:17:35 2010-07-23 11:17:35 1 1 81 0 4 27 0 65.50 95 94.02 CHANGED MKTAQ+EhISNREFYFVLYMMLLaVhGWVhDVNGLFLSpYFsLAG.I.LPlVGGlVGhFlMSIsKE MKTAQHETISNREFYFVLYMMLLYVhGWVIDVNGLFLSSYFNLAGEIMLPLVGGIVGLFVMSINK.Q 0 0 2 2 +12905 PF13064 DUF3927 Protein of unknown function (DUF3927) Mistry J, Aldam G gba Pfam-B_1668 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 50 amino acids in length. There is a conserved SVL sequence motif. There is a single completely conserved residue D that may be functionally important. 18.60 18.60 19.80 21.50 16.30 18.10 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -8.12 0.72 -4.48 10 463 2010-07-23 10:19:14 2010-07-23 11:19:14 1 1 177 0 1 147 3 44.00 64 83.66 CHANGED hchRLslAslLLFLVVhVDFTSRIMSVLADGsLVuulVllhWPllK ...........hKLpLhssslLLalsVMlDFTSRIMSVLADGsLVCGIVVLLWPllK.................. 1 0 1 1 +12906 PF13065 DUF3928 Protein of unknown function (DUF3928) Mistry J, Aldam G gba Pfam-B_1675 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 25.00 25.00 194.40 194.30 20.30 19.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -10.17 0.72 -3.70 6 81 2010-07-23 10:20:22 2010-07-23 11:20:22 1 1 81 0 3 16 0 95.00 97 97.76 CHANGED MYTLKIVSDREAlYQFASYV+VVQGVEDVYVEVGEPLYEHPLMKFYVHIsIcETY-QpKALQEIARLVELGRFTYVHYRN-EIEcAFEAVKYESF MYTLKIVSDREALYQFASYVRVVQGVEDVYVEVGEPLYEHPLMKFYVHIKLEETYEQHKALQEIARLVELGRFTYVHYRNDEIEEAFEAVKYESF 0 0 1 1 +12907 PF13066 DUF3929 Protein of unknown function (DUF3929) Mistry J, Aldam G gba Pfam-B_1716 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. 25.00 25.00 151.10 150.80 23.80 18.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.23 0.72 -4.20 3 81 2010-07-23 10:22:33 2010-07-23 11:22:33 1 1 80 0 4 17 0 65.00 97 95.14 CHANGED MVYHLENGETIKDIKEFCYRDpGKMLERVAHRVMDN+EVTAIDKQGTIISIAC-DIVKVELDYIp MVYHLENGETIKDVKEFCYRDQGKVLERVAHRVMDNREVTAIDKQGTIISIACEDIVKVELDYIT 0 0 2 2 +12908 PF13067 DUF3930 Protein of unknown function (DUF3930) Mistry J, Aldam G gba Pfam-B_1721 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 51 and 67 amino acids in length. 25.00 25.00 63.80 58.90 18.30 17.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.38 0.72 -4.11 4 87 2010-07-23 10:23:47 2010-07-23 11:23:47 1 2 80 0 3 25 0 52.00 82 95.06 CHANGED M.......p+ppcahaE......IhKhlFlFhpshslshsthhlVQhIhph .MEYQYE....V....G....QTKEEFMHEDQWADSLIKWLFIFLIIVGIPYTAYVVVQFILSF. 0 0 2 2 +12909 PF13068 DUF3932 Protein of unknown function (DUF3932) Mistry J, Aldam G gba Pfam-B_1731 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 173.90 173.80 20.30 18.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.78 0.72 -3.89 2 80 2010-07-23 10:52:45 2010-07-23 11:52:45 1 1 80 0 3 20 1 81.00 93 97.49 CHANGED MKEsFRLQTDFSSSFDRWVSSFVSsaPsQLcWoTLKELIHEYTooHTNpolPpYISSuhTYYAQRlSTANNoEIlIapN.T MKEsFRLQTDFSSSFDRWVSSFVSDHPAQLEWTTLKELIHEYTToHTNDoLPTYISSAlTYYAQRVSTsNNSEIVIFEN.T. 0 0 1 1 +12910 PF13069 DUF3933 Protein of unknown function (DUF3933) Mistry J, Aldam G gba Pfam-B_1720 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 25.10 98.90 22.90 16.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.53 0.72 -4.27 4 81 2010-07-23 10:53:53 2010-07-23 11:53:53 1 1 81 0 3 24 0 52.50 93 95.15 CHANGED MKQYVICQlIsGsKYLAAYAETKQEAIEKAELLGLRTGsRYlVITAEEA-GLp .MKQYVICQIINGEKYLAAYAETKQEAIEKAELLGLRTGNRYhVITAEEAEGLT 0 0 1 1 +12911 PF13070 DUF3934 Protein of unknown function (DUF3934) Mistry J, Aldam G gba Pfam-B_1719 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There are two conserved sequence motifs: GTG and SKG. 25.00 25.00 31.30 31.30 19.40 15.40 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.29 0.72 -3.73 5 100 2010-07-23 10:55:23 2010-07-23 11:55:23 1 1 99 0 10 25 0 41.50 85 88.81 CHANGED MSKoK..sKuGTGpGTGKKGWNRWQuSAK+.KKuAKPY..pSKGT ..MSKTKAKPKKGVGQGTGSKGWNRWQSSAK..K..KKAAKPY..KSKGT...... 0 2 6 6 +12912 PF13071 DUF3935 Protein of unknown function (DUF3935) Mistry J, Aldam G gba Pfam-B_1715 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two conserved sequence motifs: FVF and LGV. 25.00 25.00 37.30 78.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.19 0.72 -3.59 3 83 2010-07-23 11:43:06 2010-07-23 12:43:06 1 1 83 0 3 36 0 70.70 95 76.27 CHANGED MTRLKQIFGIhISFFVFWFSMLGVQM.FAEFLDI-SLKFluGKTEsARAFLFsYPaFIlFLl...ShYh.FlI .MTRLKQVFGIIISFFVFWFSMLGVQM.FAEFLDIESLKFVAGKTEAARAFYSPYPFLIVFLIT..LLSLYF.FVI.. 0 0 1 1 +12913 PF13072 DUF3936 Protein of unknown function (DUF3936) Mistry J, Aldam G gba Pfam-B_1705 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved GKAW sequence motif. There is a single completely conserved residue G that may be functionally important. 21.80 21.80 21.80 29.00 21.20 21.70 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.70 0.72 -4.50 7 122 2010-07-23 11:44:47 2010-07-23 12:44:47 1 1 122 0 13 56 0 37.40 68 82.30 CHANGED MKlahhscullLsGKAWEIRtKLKcYsppaphVp-Wls MKVYILPNRVTLVGKAWQIRHKLKQYGKEYTTVQEWIT...... 0 2 8 9 +12914 PF13073 DUF3937 Protein of unknown function (DUF3937) Mistry J, Aldam G gba Pfam-B_1711 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 22.50 22.50 22.50 75.40 21.70 20.70 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.25 0.72 -4.09 7 81 2010-07-23 11:45:39 2010-07-23 12:45:39 1 1 73 0 3 31 0 72.30 81 90.49 CHANGED MFpNKKLIRhGLoLFlhLslIsFTIuYFQoYLcSAssIcWllschW+TILlDAPcGILVlLGAIALY-FTKcs .MFTNKKLIRFGLTLhVhLhlIsFTISYFQTYLESAAGIKWVIsEhW+TILLDsPEuILVILGAIALYDFTKET. 0 0 1 1 +12915 PF13074 DUF3938 Protein of unknown function (DUF3938) Mistry J, Aldam G gba Pfam-B_1607 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. 25.00 25.00 209.70 209.20 24.40 19.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.36 0.72 -4.17 6 83 2010-07-23 11:46:34 2010-07-23 12:46:34 1 1 83 0 3 21 0 102.00 96 80.04 CHANGED NlQLGEpIIFNGIEoLVSASILGGYIaFLFNPEENAQKTMLLTMIGIVGGCISYSMTNYTLPLQLSSAFFHGLWTWFIAFCLADVFNLLQDsEE-sGRpIES .NTQLGENIIFNGIETLVSASILGGYIaFLFNPEENAQKTMLLTMIGIVGGCISYSMTNYTLPLQLSSAFFHGLWTWFIAFCLADVFNLLQDNEEENGRQIES..... 0 0 1 1 +12916 PF13075 DUF3939 Protein of unknown function (DUF3939) Mistry J, Aldam G gba Pfam-B_1535 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. 21.30 21.30 21.90 21.40 20.90 19.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.70 0.71 -4.45 6 100 2010-07-23 11:48:02 2010-07-23 12:48:02 1 2 96 0 9 42 0 135.70 75 85.22 CHANGED pcpt.ctlcVTlDEVR+AlpcatpshscGIshpsLlpsspcIDhctLtsaLGGhPcQhFYMS+ETaEIF...tEE+-lsh.lDhVQlAVDpYlp-ptchPlhpsspshpVshpKL..ptYLcEhPpa.sLYls-pphlVohcPcp ........F.+TGKEEREITKDELEQAMApFLEp.NANIVYTVLVNDDYTVNYDLLKPYLPAFPTNhFLITKETLEVFEHTEENLNLVKE.IDlVQKAVDQYVTEKEMFPIVEGS......ED......RLICGMKL..GPYLsRlLKR.DLYISEKHYLVSSKPDR..... 0 4 6 7 +12917 PF13076 DUF3940 Protein of unknown function (DUF3940) Mistry J, Aldam G gba Pfam-B_1673 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 50 amino acids in length. 21.70 21.70 21.90 22.80 21.10 21.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.48 0.72 -4.41 29 195 2010-07-23 11:49:13 2010-07-23 12:49:13 1 1 129 0 24 102 0 37.80 50 66.79 CHANGED pcKphLIppLIppGlaKtpc..RpLaEhohpELcc.Ycph ......+KcaLI-cLIssGlaKhcD..RQLYELSLpELE+EYcs.h......... 1 1 14 16 +12918 PF13077 DUF3909 Protein of unknown function (DUF3909) Mistry J, Aldam G gba Pfam-B_1537 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 25.00 25.00 211.60 211.50 20.00 18.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.59 0.72 -4.18 11 84 2010-07-23 11:57:32 2010-07-23 12:57:32 1 1 84 0 3 30 0 107.80 93 97.97 CHANGED MDLQKFDtMIDAVQRATClpIN-KQKEAFKQKYDFEPpFEYGRDEKGHYVIRTSKKMLEEMEFYLALKYDRDGlDLYMcAEIDGlCHVSVSYSEDALHLQELFQFLEE MDLQKFDEMIDsVQRATClQINEKQKEAFKQKYDFEPcFEYGRDEKGHYVIRTSKKMLEEMEFYLALKYDRDGVDLYMQAEIDGIhHVSlSYSEDALHLQELFQFLEE 0 0 1 1 +12919 PF13078 DUF3942 Protein of unknown function (DUF3942) Mistry J, Aldam G gba Pfam-B_1722 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. 25.00 25.00 99.80 99.70 21.30 19.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.67 0.71 -4.03 5 85 2010-07-23 12:01:07 2010-07-23 13:01:07 1 1 81 0 4 34 0 124.20 88 93.83 CHANGED LDEFsp+VKEYL-sEK-EKIIK-GHRDVIFpYLYcLEscIGVV+NP-FsFFTSGcRSHIVlENlEFKTEVcsE+NIIEITKIVDpVA.TsLDTIIlQDGELFALGRNEKFTppILp-YLpEsFuEhL R.FEFTTKlKEYLDDEKDEKIIKDGHRDIIFpYLYsLESEIGlhKNPNFTFFASGRRSHIVLENIEFKTEVNVKSNIIEITKIVDNVV.IPLDTIVAKDRELFALGRNEKFSVQILEQYLFDTFGEKL... 0 0 2 2 +12920 PF13079 DUF3916 Protein of unknown function (DUF3916) Mistry J, Aldam G gba Pfam-B_1564 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. There is a single completely conserved residue S that may be functionally important. 21.30 21.30 21.80 21.80 20.60 20.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.73 0.71 -4.55 14 109 2010-07-23 12:11:47 2010-07-23 13:11:47 1 1 107 0 11 80 1 146.20 59 82.08 CHANGED hhcphptph.shPsshasstY..Wph+lPVstualpup+sspplKphClQsLlspAppLhphKsssppphRVsshIslssLasSpIllF..cs..-cYFcsFhsRss.chppWlsLsspcsltpcWsLplssshpphGapElhpDp-...s..tcpElWaIGE ................MIKRIEEHTKsFPSTFYND..EY..W.M.LPVSQsFI-S+KTPRKVKRLCIQTLlspsNHLIph.K.PoDTHTYRVVsLISIpNLWcSQIIlF..KN..-DYFHNFFNRss.EFQKWIhLSNElDFWETWtISIssohphL+FQElIYDED...t...EKEIWFIGE............ 0 0 4 7 +12921 PF13080 DUF3926 Protein of unknown function (DUF3926) Mistry J, Aldam G gba Pfam-B_1663 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 46 and 63 amino acids in length. There is a single completely conserved residue P that may be functionally important. 25.00 25.00 26.10 82.60 19.10 18.50 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.06 0.72 -8.23 0.72 -4.42 8 78 2010-07-23 12:15:49 2010-07-23 13:15:49 1 1 77 0 3 35 0 44.00 80 80.29 CHANGED MpILcELPsPIQQSAKpMLNILQEELSSYspEpsQspsNLKsII MHILEELPsPIQQSAKQhLNILQEELuuYPpEQsHHcsNLKNII 0 0 2 2 +12922 PF13081 DUF3941 Domain of unknown function (DUF3941) Mistry J, Aldam G gba Pfam-B_1728 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 30 amino acids in length. There is a conserved YSK sequence motif. 25.00 25.00 28.70 43.80 23.80 24.00 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.99 0.72 -6.85 0.72 -3.95 5 93 2010-07-23 12:21:47 2010-07-23 13:21:47 1 1 93 0 8 35 0 24.00 82 47.55 CHANGED RccKNpt+..cKNtQpGK+AYSKKTD .RsQKNEQE..QKNIpQGKRAYSKKTD. 0 1 4 5 +12923 PF13082 DUF3931 Protein of unknown function (DUF3931) Mistry J, Aldam G gba Pfam-B_1734 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 40.30 161.30 21.50 18.70 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.32 0.72 -4.11 4 82 2010-07-23 12:31:39 2010-07-23 13:31:39 1 1 82 0 3 10 0 66.00 98 78.64 CHANGED MDNNEKKCNVISIDGKKKKS-TYSYPKLVVEsKTYEFSSFVLCGETPDGRRLVLTHMISTDEFAGF MDNNEKKCNVISIDGKKKKSDTYSYPKLVVENKTYEFSSFVLCGETPDGRRLVLTHMISTDEFAGF 0 0 1 1 +12924 PF13083 KH_4 KH domain Bateman A agb Jackhmmer:O25768 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.30 0.72 -4.34 211 3577 2012-10-02 00:34:43 2010-07-23 16:26:45 1 7 2027 3 775 2186 412 73.20 25 47.89 CHANGED cch...l...cplhcsllpp...p...plplptp.....p.c.ppthlplpl.sspD.hGplIG+cG+slpAlphllsss.s.s+...p.s..p...c...h..plpl .....................................thlpthlcsllst..s.c.....sl.p.lphp......psp..pttplpl.pl...s.s.p.D....hG+..lIG+pG+slpAlc.t.llpsss..s.+....t..s..p....p.h.hl..................................... 0 331 604 701 +12925 PF13084 DUF3943 Domain of unknown function (DUF3943) Bateman A agb Jackhmmer:O25483 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. 27.00 27.00 27.90 81.00 22.90 21.70 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.20 0.72 -4.48 33 236 2010-07-23 16:14:36 2010-07-23 17:14:36 1 2 221 0 52 187 8 110.00 50 30.54 CHANGED puhha................DsDsahhNhhuHPYtGuhYassARssGashapShhaohsuShh.WEhusEs..EhPSlpDllsTsluGhllGEhhac.huphl....tsutphhsppshu....hllsPhs ..................p.GPsWDcDpahhNhlhHPYFGulYYsAARpAGas.acShhYShshSThFWEYGlEAFsEVPSWQDlhlTPhhGullGEhhac.hpptI.....hssGGclhGS-hhGthslhhlsPl.G.............. 0 21 33 45 +12926 PF13085 Fer2_3 2Fe-2S iron-sulfur cluster binding domain Bateman A agb Jackhmmer:O25214 Domain The 2Fe-2S ferredoxin family have a general core structure consisting of beta(2)-alpha-beta(2) which abeta-grasp type fold. The domain is around one hundred amino acids with four conserved cysteine residues to which the 2Fe-2S cluster is ligated. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.45 0.72 -4.23 52 5265 2012-10-02 17:47:23 2010-07-24 12:26:47 1 25 3727 84 1318 3398 2510 108.10 39 42.50 CHANGED plclhRhss................................................................tt.psahpsaplshc..ptholL-sLptIcpp..........-ssluac..tuCppulCGoCuhhlNGcs......pLACpshlpshhpt...............................................lplpPL..spaPVl+DLlV........Dhsshhc ............................................................................................................................................................................hplaRasP....................................................................p.ss....pPphps.Y..pl...c...hp...pshhlLDuLh.hlKcp...........DssLoFR...pS.CR.E.GlCGSCuM.lNGps...........pLACpsh....lcshs.p..t...........................................................................................lp.lcPL...ssh.PVl+DLlV..DhopFh.t..................................................................................... 0 387 813 1105 +12927 PF13086 AAA_11 AAA domain Bateman A agb Jackhmmer:O25195 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.79 0.70 -4.79 118 5668 2012-10-05 12:31:09 2010-07-24 13:20:17 1 234 1842 10 3094 6877 709 222.70 20 21.82 CHANGED pLNpsQtpAl...ppslsppt......................hsLlpGP.............P.............GT........GKT...polstlltth...................................................................................................................................................................................tttsp+lLlsusSNsAVDpllp+...Lhp.................hph...............pllRlGpsp.........psslpphs....hpt.h.........................................................................................................................................................................httphtphppthpth.pthtt.ptth............ppllppupllhsThsuu.upthhpph..................pF-hlIlDEAuQ.............usEsssLlPl..hhu.............c+hlL...................lGD.pQLPPslhSp .............................................................................................................................................................................................................................................................................................................................................................................................Q..uh...........h.h.t.............................h.l...G..P.............P.......................GT...............GKT...........h.hh..t..hh...h...................................................................................................................................................................................................................................................................................................................t..hl.hh..s.s..s......A.hs...hh.t...h.t...............................................................................................................h.h.R..hh..t.....................................h...........h...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..h...t..t...h......p.....l......l........h..s..T....h.....ss..........p............h.t.t..h..............................................................th..c..hl.l...lD..E.....A....uQ................................h...h..t.......p......s..hh...s...l.......hhs...........................pphl...l...................................lG...........D..pQLsPhh................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 1106 1755 2545 +12928 PF13087 AAA_12 AAA domain Bateman A agb Jackhmmer:O25195 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.41 0.71 -4.94 125 5685 2012-10-05 12:31:09 2010-07-25 09:30:25 1 268 2107 10 3041 5759 458 189.40 23 15.53 CHANGED hspSLF-RLhptt.................ttshhLshQYRMHspItp...FsSphFYp...Gc....Lpsus......shtppph..............................................................h.tshtP.lhFhcs................................sssppppppt...............sShhN.sEAphlhpllppLhpptstt..................pcI..GlIoPYpuQlphl+phlp.........p..............................pht.t......................................lclsTVDuFQG+E+-lIIhSsVRo...............s......pps.......................................................s....lG.FLsDhRRlNVAlTRAKptLlllGssp ...........................................................................................................................................................ohh.hh............................................hhLphp.a.R.h.p...p.lhp...h...s.s.....p.hh.Y.p...........s.p.......L..sts..................phtp.t.........................................................................................................................................................................................s...hhh.hss........................................................ts......t..ttt...t...............................tuhhN...tE.u........p.h..l.h.p.....hl...p.t.hh....pt..thtt.................................ppl......u.l..l....o....P...Y..p..u..Q...h..p..h...l..p..ph..l..p..........p..................................................................................................................................t..h......t..t.................................................................................................lp..l.sT.V.-s...a..Q............GpE.p-.l.lll..S..h.....V..cs.........s..................tpt............................................................................................................................s........hG....F..l.......p....s.............c.....R........l....N..VAlo.RA+pt.lhllGs..p.......................................................... 0 1103 1793 2543 +12929 PF13088 BNR_2 BNR repeat-like domain Bateman A agb Jackhmmer:O25303 Domain This family of proteins contains BNR-like repeats suggesting these proteins may act as sialidases. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -12.30 0.70 -5.11 78 2868 2012-10-02 00:45:24 2010-07-25 11:16:42 1 108 1391 46 640 2427 442 267.90 18 47.16 CHANGED Gp...LlAhWFu.........GscEG..ssDlsIhho......ppp.ss.....p.......W..ussthlssst......................................htshtNPllht...ts.supLhLF...atssssss...............tWh...........uhh......hpSsD.......sGt..o.Wotsp....cLssu..........hsGsl+s....................s....lhhp....sG.pllhP...s.pE.......tt....ashhlthosDsuts.........Wppsphh.................................ph.stlQPoll..ss.......u..plhhhhRs..tps............plhtotSpDsGpoW..ss.......sp..........hslPNss....Sulsuls.LtsG..phlLshN........................................................RssLsl.....th..........SpDt.GpsWp.thhsL-sss......................................................phSYPsllpssc......s..plal ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s...h..........................t....s..lh......h......h..................................................................................................................................................t..................hpSsD..................sG.p.....T.....Wotsp..................pls.t.....................hh.h..h...s..s.spu..............................................................................................l..hp.............sG..cllhs...................shtp..........................ttt........ps.h.h..h....hS...c..DpG.pT.........Wphu.pss.........................................................................................ph..sps...ps...s....l....s..s........G.....slh.h..h.....h...Rs....ss.....................phhh.u.tSpD..s.......Gt.....TW....st............................................sp................................h.s..h...s..s......s...............u..sh..p.....h.h..p.....h.t...ss.........phhlh.hs..........s.pt....................................................................Rt...h..s..l..th...............s..t..Ds...uh.....sW.....hh.....l...p..s......................................................................huYsshh..ss................................................................................................................. 0 246 392 511 +12930 PF13089 PP_kinase_N Polyphosphate kinase N-terminal domain Bateman A agb Splitting PF02503 into domains Domain Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules. 21.90 21.90 21.90 21.90 21.80 21.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.49 0.72 -3.96 174 3094 2010-07-25 13:24:08 2010-07-25 14:24:08 1 9 2787 6 696 2356 835 105.30 41 15.31 CHANGED hahNRELSWLpFNpRVLpEAtD.p.p.hPLLERl+FLuIhoSNLDEFFhVRVAuL+p...............plptuhsphs..sGho.............Ppcp.LptIpppspphhccptphapp.lhstLt.cpuIpllp ......alsRELSWLsFNpRVLppAtD.p.s.hP.....LLERh+FLuIaosNLDEFahVRVAuLKc...............plphu..h...pp....tu....ssu.h.s.............spc..LspIppcsp.chhpcp....tplas-.lhspLt.cp.sIhll.p......................................... 0 209 457 597 +12931 PF13090 PP_kinase_C Polyphosphate kinase C-terminal domain Bateman A agb Splitting PF02503 into domains Domain Polyphosphate kinase (Ppk) catalyses the formation of polyphosphate from ATP, with chain lengths of up to a thousand or more orthophosphate molecules. This C-terminal domain has a structure similar to phospholipase D. 23.90 23.90 23.90 24.70 23.80 23.80 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.90 0.70 -5.85 174 3665 2012-10-02 13:01:53 2010-07-25 14:32:09 1 10 2915 6 706 2979 1325 303.40 49 47.92 CHANGED slFssIcc.pDlLLHHPYcSFssVlc.FlcpAApDPsVlAIK.TLYRsu.scSsIlpALlcAAcsGKpVTVlVELKARFDE-sNIpWA+pLEcAGs+VlYGlsGLKTHuKlsLlVR+..Ets.t.....l++YsHlGTGNYNspTA+lYTDhuLhTscpplupDlsclFshL.oGh...uc.s......phc+LhluPhsl+ppllchIccEhppActGc.sutIhhKhNSLsD.plIctLYcASpAGVcI-LIVRGICsL+PGlsGlSENIcVpSIVGRFLEHoRlahFtN.s...........G..........csclYluSADhMsRNLc+RVElhhPlhDspl+pclhch.LphhLpDNspuhtlpsDGsYpphps..p..........pp.h.suQphhhpth..p ......................................................................slFcuIRc.p.D.lLLHHPYcSFs.s.Vlc.hLcQAAtDPpVlAIK.TlYR.su...p.D.....S.IlpuLlcA...Acs.GKpVTVlVEL.p..A..RFD..E...EuNIpW.A.+cLEcAG.sHVla...G..hs..G...l....K.sHuKlhL.lsR+..E......ss....p..............lhcYsHlGTGNap...tTA+lYTD.hulhTss.plsp-spplFs...l..p.s........p..............................p....hp.....pL...hhu...P.sh+ptlhchIcpE.htt..t.p......tG....h......u........tIhhKhNsLsD.....tllctLYtASpuGVplcLll.R......Gh.CsL.hP..s.l.Gl.......S-....N....IclhSIl..sRaLE.HsRlahFts.s.................s..........................p.....plaluSADhMtRNlpp.RlEshhPlhs.plptplhp..hph.htDshpuh.lpt.-hp.thh.......t...........p.t..h.puQ.hhhp..h........................................................................... 0 212 466 607 +12932 PF13091 PLDc_2 PLD-like domain Bateman A agb Jackhmmer:O26029 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.17 0.71 -4.38 205 18675 2012-10-02 13:01:53 2010-07-25 14:56:24 1 85 4401 14 4237 15098 1180 139.20 19 41.80 CHANGED lhphlpp...up..cplpl.ssh..ah.............ppp.....l....hpsl.ht.ttp+G.lcl+llhsp..............................................p..h.thtthpthtph.hptshp..........h..........................................HsKhhll.D.......sp......................h.sh.l..GSsNhotpuh.........ptNh......Ehs......lhlp.sp........thspphpp..hF...pt...a ....................................................................................................................................................................h..phlpp..Ac.....cplhl.ts..ha.......................................s.spp....l........hpsL...hp....sspc.....G...V.......c..V..+..l..lhs.s..s...............................................................................................................h.....h.ts.hph.h.t.ph....h.p.s.Glc.lhhh..........................................................................tts..hhHpKhhll..D.......sp.........................................................................hsh.l...G.o..h.N....h.....s..s..c.uh...............thsh........................E.hs.............lh..lp...s............t.stthtt..h........................................................... 0 1240 2500 3500 +12933 PF13092 CENP-L Kinetochore complex Sim4 subunit Fta1 Coggill P pcc Wood V Family CENP-L is one of the components that assembles onto the CENP-A-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. Fta1 is the equivalent component of the fission yeast Sim4 complex [1]. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. 21.70 21.70 21.90 22.10 21.50 21.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.85 0.71 -4.46 30 154 2010-07-26 14:40:08 2010-07-26 15:40:08 1 5 141 0 110 144 1 169.30 25 45.25 CHANGED pshTpLPLLLsRhPssLRpshhoFLupsFDspsSsLRLsophLssshEpalsslspssss.....................plh+-hpLTLuFs.sssss.......................uL+olslsIPtpslssFhptu.....................................tt.psp.s....ssFhsuLusYhcpHLAhcLs.................tttl+LoKlusuuFllusEGRlKlluss ..............................................................................................phspLPLhLsphspshp.phlhsalppsFD...shho..sL....p....lsspsLshhhthahsshsppths..........................................phplhas.sstss.................................slpslsltIs.cchpshhpps......................................................t.s.t.p..pp.pps.....s.FhpsLtsahcc.Hhtlc.Ls.................................................................ss+Ls+lusushsht..s....-G+lKlhs..t.................................................. 0 24 43 77 +12934 PF13093 FTA4 CENP-U; Kinetochore complex Fta4 of Sim4 subunit, or CENP-50 Coggill P pcc Wood V Family Fission yeast has three kinetochore protein complexes. Two complexes, Sim4 and Ndc80-MIND-Spc7 (NMS), are constitutive components, whereas the third complex, DASH, is transiently associated with kinetochores only in mitosis and is required for precise chromosome segregation. The Sim4 complex functions as a loading dock for the DASH complex. Sim4 consists of a number of different proteins including Ftas 1-7 and Dad1 [1]. 24.80 24.20 24.80 24.80 23.50 23.00 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.38 0.70 -4.90 31 102 2010-07-26 14:45:24 2010-07-26 15:45:24 1 3 88 0 82 102 0 200.70 30 81.33 CHANGED Msto.p.......olhplKpsFlcsQl...+ILSpsL.........pssccW+shst.ts.pp...............p.............Ls-.......+slpclLpKlNstL+pHsRtlaSsQAlpHVApQItpLYhpphtpsspp.sphpthlc.............................cssDLos...........ptsIppLPp......................................................................ph.t.....ps...ssppptp......................+YpcLppcLlpLsppppptpc+LsphppLpplL...........................EPacsspp........................slQsNLl.....o+supLspEl..p+MRhLls+Vuu+ .................................t......pl.phKpsFlpsQh...+lLSpsL.........tPsc..sW+shst..stpp.........................s........lsp.......+slpcsLt+lNthl.ppHsRtlassQAhp+VApQIppLYhpphpttst.p..p.t.hlp.............................ctsDLss...........tt.IppL.Pt.......................................................................ph.t............t.p.tptp......................+YtplhtcLhpLspp+pphpp+ltph+pLpphl...........................cPapsspp............................................slQ..NLl.....T+su.lttEl..p+MRhLlu+Vss+......................................... 0 14 40 68 +12935 PF13094 CENP-Q CENP-Q, a CENPA-CAD centromere complex subunit Coggill P pcc Wood V Family CENP-Q is one of the components that assembles onto the CENPA-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENPA nucleosomes directly recruit a proximal CENPA-nucleosome-associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENPA NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENPA-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. Fta7 is the equivalent component of the fission yeast Sim4 complex [1]. 28.50 28.50 31.00 29.90 27.80 28.20 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.05 0.71 -4.21 38 159 2010-07-26 14:57:46 2010-07-26 15:57:46 1 3 124 0 96 153 0 156.00 22 39.12 CHANGED pLtp+Lsp.hshPsts...................pcs.hsh-plhpppptLEppLu....s.hcplphLppElccp-sthpp-tcpLpchccsscttcsphccppcp......+.lLpt.ppssspp..tsppp...............tp.ssph.................-p-ltsllpplp......pclcshpsstt.......lpslpctlpcspst.....Ls ................................plhpphtp.h.hPstp.........................................cs.hsh-pl.lptppsLEppls....ss.culthLpp....Elc.ctptphpppptplppLcpsspshppphccppcch..................+plhph....s...pp.ss.p..hsthsppp........................................sssh.................ppElhslhtplp......pchcshpsstp.......hpshhphlpcshttLp....................... 0 14 34 62 +12936 PF13095 FTA2 Kinetochore Sim4 complex subunit FTA2 Coggill P pcc Wood V Family Fission yeast has three kinetochore protein complexes. Two complexes, Sim4 and Ndc80-MIND-Spc7 (NMS), are constitutive components, whereas the third complex, DASH, is transiently associated with kinetochores only in mitosis and is required for precise chromosome segregation. The Sim4 complex functions as a loading dock for the DASH complex. Sim4 consists of a number of different proteins including Ftas 1-7 and Dad1 [1]. The equivalent higher eukaryotic protein is CENP-P. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. 20.70 20.70 21.40 21.40 20.50 19.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.49 0.70 -4.73 13 63 2010-07-26 15:12:01 2010-07-26 16:12:01 1 2 35 0 56 62 2 191.60 25 60.04 CHANGED Ms...........................h.p.L.hspPscpss...............stPpLchFht...Hs..............psIphlpalstss-..........uhVaphclpu+.pYALKl...h..asapss..hh.cltt+th.......hahs...PhssEsRAauRLspltcpGhh....AV+CHGWhhLo..cpphpph................sp.hphhspWAIVKD...alssph........s.pplsplhpchplh+cshlhspDlp.pNYRsuhlVDLGssh .................................th..................................tPpLt.F.h...pt..............tslphlphLststp..........uhVa+sclpup.....YALKl.............a....htts.....t...phttp.h.............h.ahs..................PFssEsRAauRLpchscpsh.....AV+CaGah..hLs....c...pphp.h....................................tpttphhshhAIVKD...hlssth..................p.psh.c.lhpchphh++htlhstDlpspsYh.suhllDhups..................................................................... 0 3 27 48 +12937 PF13096 CENP-P ShortName; CENP-A-nucleosome distal (CAD) centromere subunit, CENP-P Coggill P pcc Wood V Family CENP-P is one of the components that assembles onto the CENP-A-nucleosome distal (CAD) centromere. The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [2]. Fta7 is the equivalent component of the fission yeast Sim4 complex [1]. 25.00 25.00 30.30 29.40 20.60 18.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.99 0.71 -4.96 5 51 2010-07-26 16:04:31 2010-07-26 17:04:31 1 1 35 0 27 49 0 127.20 51 60.61 CHANGED Q+aRLuGsCpuLsFQLEFQlLElQsc-slssslTDLsIlhEssEapDLScFVSRsEEc+sLLLFFRSLpoFuEWCEaR+pTFcHFKEKYP-lVsLPEGspuEpMtlRNPQpPGhELlIVWKIHID-EG.sVlPlLDLLsKlPpQALELDcKuslEsuussFRoLLtlhGIEuoIEsLI .....................+a+LSGsCp.lsFpLEFplLEh.psp-phSusloDLsIlhEssphuELScFloRsE-ctsLhhFFRuhp.asEWhcaRcpTF.HhKt.KYPphV.LscG.t.up.h.lpssp..sGFELhIVW+lplsE-G.pshPhLDLLsKhPppsLt.ppptsl-suP.tFRshlhhhGlEssl-pLI................ 0 6 7 11 +12938 PF13097 CENP-U CENP-A nucleosome associated complex (NAC) subunit Coggill P pcc Wood V Family CENP-U is one of the components that assembles onto the CENP-A-nucleosome associated complex (NAC). The centromere, which is the basic element of chromosome inheritance, is epigenetically determined in mammals. CENP-A, the centromere-specific histone H3 variant, assembles an array of nucleosomes and it is this that seems to be the prime candidate for specifying centromere identity. CENP-A nucleosomes directly recruit a proximal CENP-A nucleosome associated complex (NAC) comprised of CENP-M, CENP-N and CENP-T, CENP-U(50), CENP-C and CENP-H. Assembly of the CENP-A NAC at centromeres is dependent on CENP-M, CENP-N and CENP-T. Additionally, there are seven other subunits which make up the CENP-A-nucleosome distal (CAD) centromere, CENP-K, CENP-L, CENP-O, CENP-P, CENP-Q, CENP-R and CENP-S, also assembling on the CENP-A NAC [1]. FTA4 is the equivalent component of the fission yeast Sim4 complex. 19.50 19.50 19.50 19.50 19.10 18.80 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.18 0.71 -4.33 9 57 2010-07-26 16:13:52 2010-07-26 17:13:52 1 2 39 0 30 56 0 158.90 39 44.36 CHANGED p-s-spcpVtssEp.......pspp.pshssss....thsEcPupsVTsppsuspsupsSstctss.AspoppcTQK..p...t+Rp+scptsptocsu-shplW...C.cth+tS..DIpELDVlLutFE+hhLEY+QclEScsC+pAIscFasplKcpLhchlcElQhLKsLK+KNsKlluslEKKRQRL ........................................s-sppcltssc+......hsspppcsh.sss.......phsEcPupsVssppsus.ss..psssEcpsh.....s.pspp.....csQKp......p.t.s++p+s....c..opshs.scs..S-.......ssplW...C.cshKpS..DIpEL-lVLstFEKhhhEY+QplES+sC+pAIspFasshKEpllchlpEsQhLKNLKRKNAKllusIpKKRQRL.................... 0 3 7 13 +12939 PF13098 Thioredoxin_2 Thioredoxin-like domain Bateman A agb Jackhmmer:O25140 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.89 0.71 -3.87 144 3289 2012-10-03 14:45:55 2010-07-26 17:16:55 1 21 1889 26 693 17334 6561 121.70 21 46.68 CHANGED s..p..s..p...s...Kh.hlhhF.ssss.Cs..aCcp..hc.pplhp..sp...ltth......h....................pt.p..hth.hhl................sh...................t.....psp................t.........hht.....................ttt...............h......tpp.......................p..........................................hu..pph.s..........lpu.TPsllh.hs.t.p........G........p.......p....hs.G.hh.s.s.p.phh.phL .......................................................ts....p..t+h.hlhVF.s.D..ss.C.s.YC.+c....h.c....p...p.h.t.s........st.......l.s.h.t...h...............................................p.....t....hhh....h..h.l.....................................ss...............................................................................t.......s.s.p........p.......tt..t............hps..............................................tttp..................................t.......................................p...................................................................................................Ls...p.ph...G.............................l...s.u...TPsl...lh...s................G............p.........h.......hs..G..h....s..s..pph.th............................................................................................................................................................ 0 189 415 577 +12940 PF13099 DUF3944 Domain of unknown function (DUF3944) Mistry J jm14 Jackhmmer:O26108 Domain This short domain is sometimes found N terminal to Pfam:PF03981. 21.70 21.70 22.30 28.00 20.80 21.00 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.27 0.72 -7.32 0.72 -4.46 19 557 2010-07-27 11:58:35 2010-07-27 12:58:35 1 3 517 0 12 169 3 34.80 69 14.48 CHANGED Y+.hDsDLEFLscCoscDLpsLVphLT+D.KDGphRh ..YL.pDsDLDFLQHCSEEQLAsFARLLTHN.EKGKsRL... 0 3 5 8 +12941 PF13100 OstA_2 OstA-like protein Mistry J jm14 Jackhmmer:O26089 Family This is a family of OstA-like proteins that are related to Pfam:PF03968. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -10.95 0.71 -4.90 30 372 2012-10-01 21:43:16 2010-07-27 14:41:27 1 9 305 0 104 461 481 129.20 26 25.21 CHANGED Ko+VhL.HuDpLphDp.t.pPDAQlLhG...sVsh+HDushMaCDSAhhaEpoNSlEAFGNV+MpQGDTLhlYG-YLaYDGsTQlAhlR.cNVRMcN+ssTLhTDSLNYDRlhslGYYF-GGolsDpcNsLTScaGEYSPuTKhAlFspsV+LhNPcFsLsSD ...........................................................l...us.h.....hsp......s......s.....s.....h..h..G.....pVth..p.a.pu......hh..hsD....p...shh............p........p...p.....p..........hp...Ah..Gs.....V.p.....h......p....s..D.s.h...p.....lhu-hh.Yss..pp.s.hh....tp.V....h..h.h.p..p........t......p..L..h..s..-..pL.Ysph...p..h...uh......a.p.s.s..pl..h.s..p..t..s.hlh..u....p...G.Y..tsc...h..ts.h..p.t................................................................................... 0 41 89 102 +12942 PF13101 DUF3945 Protein of unknown function (DUF3945) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This is a C-terminal repeated region. 21.90 11.10 22.10 11.20 21.80 11.00 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.98 0.72 -8.53 0.72 -4.67 53 731 2010-07-27 21:34:35 2010-07-27 22:34:35 1 9 130 0 73 590 11 54.20 26 21.58 CHANGED chhssalslDctTNclhsh.........ss....p.......lpIP.......sclps.lcLosppppsLppGcslhlc.hhspcs .......................................hlphD..Ttph..h.........ht....p...................ltlP.......pclpG.lcLosppppsLppGcslhlcshhscp.......... 0 26 64 72 +12943 PF13102 Phage_int_SAM_5 DUF3946; Phage integrase SAM-like domain Ellrott K, Bateman A kellrott JCSG - Joint Center for Structural Genomics Domain A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. This family appears related to the N-terminal domain of phage integrases. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.17 60 914 2012-10-02 14:21:04 2010-07-27 22:57:47 1 2 196 1 136 888 67 145.40 17 38.17 CHANGED lplRpKpL.ssGph.SLYLDhY...........suc.....RpaEaLplYlhsc.psttc.........................+cpNppslphAcsI+ucRhlElps.......pptsh.tspp.csctshl-ah...cphtcpppppsp.t........phpsshp.aLcpasttp.......lsFp-lDtcahpsFhcaLh .............................................................................................................p.h.tpGph..slhl..chh..............................hptp..pph..h.t..lh.l..hsp..hstpp..........................................................pp.t.pp.h.h.t...ht.p..t..l.p.....t...p.chh.p.h.p............................tp.th...hp..p..t....ptp.......ts..hh....sah....cphhpphp....t.....p....s.p.......hp............papss...hp....p.....lppFhppp.........lshp-lshpalpsFcpaLh.......................... 1 45 110 134 +12944 PF13103 TonB_2 TonB C terminal Mistry J jm14 Jackhmmer:O25752 Family This family contains TonB members that are not captured by Pfam:PF03544. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.84 0.72 -3.94 102 1325 2012-10-03 21:09:15 2010-07-28 10:51:51 1 14 909 1 457 2915 858 85.30 19 30.22 CHANGED hssYhspltptlpp.....p..W...s..st......ssshpshlplplss.sGp.l...shplhcsSGsptaDpuVtcAlp....pspshP..s....uth.....phthsF ........................................tahstltpplpp..........p.........a.....s.....ts............tstshp..s..plp......l..p........ls.s..sG.........p.l....h..s...s....pl...s.c.....o.S...G.....ssthD.....p.u.....shpulc........pssshP..s......tt...................................... 0 136 268 371 +12945 PF13104 DUF3956 Protein of unknown function (DUF3956) Aldam G, Mistry J gba Pfam-B_1228 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 22.20 22.20 23.90 93.00 20.90 19.80 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.12 0.72 -4.40 3 98 2010-07-28 10:22:01 2010-07-28 11:22:01 1 1 64 0 4 13 0 43.80 89 99.68 CHANGED M..shssFVNGQPhLVVSVAGIEIARLEISLQVALTLIALGIPICA ..M-SCVlFVNGQPhLVlSVAGIEIARLElSLQVALsLhsLGIPICs. 1 0 2 2 +12946 PF13105 DUF3959 Protein of unknown function (DUF3959) Aldam G, Mistry J gba Pfam-B_1424 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 260 amino acids in length. 25.00 25.00 25.40 25.30 20.20 19.80 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.67 0.70 -5.04 5 82 2010-07-28 10:32:12 2010-07-28 11:32:12 1 1 77 0 5 56 0 229.50 81 95.47 CHANGED MLLSGLFPIAGIhKQIPLEQSLYIGGLLFFTSFGSYFAKKhYSRICSWIAYAPFITLLLlIWcQDITTuSlLANAKIAAC..IALlPsIaRFRTYGlTFGLlALWGALLWDlKEVQSLVILERMoSLMTScalYLLLLlGGLIlGGLLAshIHRKEKD-NKENINLapQKKKRK+LSFKI.LPRLPKL+MKLFKFGtK.oppK..c..+c+pYEEsh.....ch.pp.s.acpp..ol.GQTRMERRRN ..MLLSGLFPlAGlhKQIPLEQSlYIGGLLFFTSFGSYFAKKhYSRICSWIAYAPFITLLLVIWHQDISTSSIIANAKIAAC..lALIPCLFRFRTYGLThGLFuLWAALLWDhKEVQSLVILERMoSLMTSpahYIhLLlGGLIlGGLLAMhIHRKEKDsNKENINLFpQKKKRK+LSFKI.LPRLPKh+MKLFKFGGK.SKpKpPEKI+E+pYEEsss....TYEMpEQIc.YKE.s..slQGQTRMERRRN............. 0 0 3 3 +12947 PF13106 DUF3961 Domain of unknown function (DUF3961) Aldam G, Mistry J gba Pfam-B_1483 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 40 amino acids in length. 25.00 25.00 63.10 63.00 21.50 19.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.95 0.72 -4.39 11 103 2010-07-28 10:37:55 2010-07-28 11:37:55 1 1 84 0 4 33 0 39.10 76 52.23 CHANGED pslNcaFGI-tstSDpIWFYGhaululhlhhhsYllStll .QSVNKFFGLD.TKEDCVWFYGFYGVAVSILLFMVFTSNIF.. 0 0 2 2 +12948 PF13107 DUF3964 Protein of unknown function (DUF3964) Aldam G, Mistry J gba Pfam-B_1516 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. There are two conserved sequence motifs: FYF and AFW. 21.60 21.60 24.10 179.00 20.10 20.10 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.39 0.72 -3.81 4 117 2010-07-28 10:39:55 2010-07-28 11:39:55 1 1 117 0 4 44 0 107.70 71 96.81 CHANGED TRpEpIhpLsFFEDKPtLAEQIL+lE+cEplaLPsQFEIKQsssYphGEKpsllGRlcpFYFlul.to.Espa+hQAFssEhcs+tFFlsLssIpcp.lAFWhNplELl .TRQERILQLPFFENKRELAEQVLKhEREEHlYLPDQFEIKQVPPYSFGEKpuIIGRIHEFYFlSl.GS.EusWKYQLFKDEMKCREFFVpLPsIsDQQIAFWFNNIELL. 0 1 2 2 +12949 PF13108 DUF3969 Protein of unknown function (DUF3969) Aldam G, Mistry J gba Pfam-B_1576 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 25.00 25.00 40.40 40.30 20.10 19.60 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.15 0.72 -4.39 9 294 2010-07-28 10:43:29 2010-07-28 11:43:29 1 1 290 0 11 74 0 100.10 49 88.47 CHANGED htspppl-pahhhhhlGhhpuh+hshIols.hEthlFp.hhhcllpchslcccLl-IItpGhpLEDl........t.lhspcLpcsIcslpspslphLhphp.php..cptlphl. ........htp...LEKhhLhhhhGlhppLKLtllSlDpAc+hlFs..hhEhLtshulccsllDLIHpGsELEDh.............tshshoIE-hlslCLQhhcEh.pphcsVEhp.c.l.l............. 0 5 6 8 +12950 PF13109 AsmA_1 AsmA-like C-terminal region Coggill P pcc Jackhammer-O25308 (H pylori) Family This family is similar to the C-terminal of the AsmA protein of E. coli. 22.50 22.50 22.70 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.24 0.70 -5.06 21 213 2012-10-03 05:41:17 2010-07-28 11:44:00 1 5 211 0 30 230 477 212.80 47 23.87 CHANGED splpssupacsGphsh.hppsp..lplc..ucshsD-FlNplhs.....p.phhptGpFsl..p...u.t.s.s.s.shapGclplpsThl+shphlpNlluFIcTIPSLlsF+sPsFsscGaplcpGpllFthpc-hLslcslpLpGsShDIhGpGhIsLcspplslsLpLpTlKshoshIsKIPlls....YIlLGK-tpISTslplpGsLDsPchpTplspDlLhuPFsllKRll .........................................................................................s.tlchcushtNAphslh.h.sssc...LpLp....s.p.N.h.s.DpaLNp.hLQ.....K.puVps....GlFsLp......h..t.G....s..s....chF.......cG..pl..cF..p..NTalKDL+sls.p.LISFI..s..T..........lPSLL...........hF.KsP.........sFN.p.KG.lsl+cG+l..lFst..KK....D..lLulpsIsLsGsSlDIhGhGohsL+hNslDhsLELKTLKohScsISKlP..IlN....YlIL.G...Ks...p.c..I.S.T.N.l.+lcGolDcPKh+T...........plloDsLpsPFNllKpIl............................... 0 10 25 29 +12951 PF13110 DUF3966 Protein of unknown function (DUF3966) Aldam G, Mistry J gba Pfam-B_1525 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 86 amino acids in length. 25.00 25.00 111.50 110.90 18.80 17.40 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.50 0.72 -4.16 2 85 2010-07-28 10:46:03 2010-07-28 11:46:03 1 1 84 0 4 20 0 49.80 97 63.60 CHANGED VIhYISRKFSQERpLEKSEITAEL-MLscEsaKKQKIKEDHEtsHHLNsp .VIVYISRKFSQERELEKSEITAELEMLADESYKKQKIKEDHEASHHLNAN 0 0 2 2 +12952 PF13111 DUF3962 Protein of unknown function (DUF3962) Aldam G, Mistry J gba Pfam-B_1505 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 233 and 796 amino acids in length. There is a conserved FSY sequence motif. 25.00 25.00 25.30 32.70 24.30 23.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.56 0.70 -4.95 5 80 2010-07-28 10:48:59 2010-07-28 11:48:59 1 2 78 0 5 63 0 214.20 84 34.62 CHANGED QLLAFKslV-PLhpEsVaYlYFPcEWlsLLctHh+sacLssKLKtLNERLYhMFSDILFIQHNPYsLsEsoPWIVuKEPl+QEQLEYIs+uWY..ElIH-WK...PschlsPscLEWQsshISNLPlLHDNcTaaKWIPALITHlFCEcPLHLslsNcs-EElhFsPLRSQ+lsEAMSEPI+DccTQDYFSYVYRFEsITRGGE.NtPLLKVSIGIRRFYQp ................................................+LLTFENIVEPLLNEpVSFIYFPIEWLDIVEIHYKTFLLTSKLKRLNERLYDMFSDILFIQHNPYVLNENTPWIVSKEPIRpEQLDYIFQSWY....EIIHDWK...PNKLIEsPKYEWpYDLISNLsVLHDpEs....YSKWVPALISHIFCERPlpLE..NhNEE-IYFSPLRSQNICEAMSEP.....I...KDEKTQDYFuYVaRFEhITRGGE.NIPLLNVSIGIRRFYQE. 0 0 2 3 +12953 PF13112 DUF3965 Protein of unknown function (DUF3965) Aldam G, Mistry J gba Pfam-B_1524 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 380 amino acids in length. 20.10 20.10 20.50 21.20 18.90 19.50 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.01 0.70 -5.25 3 85 2010-07-28 10:52:17 2010-07-28 11:52:17 1 2 83 0 3 38 1 284.80 92 75.80 CHANGED QEIGWYKEAYTFMVs+sLssFVHTSlEYETWDlLTQAVAWNYLIIKYRIGcLEDuDlhIWERIKFNEECIEcCcsLLSHKEVLEFTFFYlCKQAKpLSKEcLNp-MMsLAIYCNTYVYDLYoYDLL+KYRKCTDFLSYYGPSpuVLACQRAVlAQISDRLNPLKTTHVDDYLYVMKEMMEHMoapFMcRY-HFIGKLLSYVPFFEMIQVPQHAYYCEELMYICKGIuYKEEILRNYlFIQLHDCLPSFIKlFLKNKRYATIHDILFYWCDcEQRMuLERKYNLSFIYEKYA .......QEIGWYEEANsFMlsQGLAEFVHTSLEYETWDLLTQAVALNYLIIKYRIGELTDtDVEIWDRVKFNEKCITDCKHLLSHKEVLEFTFFYMCKRAKSLSKEQLNSDMMSLAMYCNTFVYDLYTaDLLRKYRKCTDFLSYYGPSQAVLACQRAVLSQISDRLDPLKTTHVDDYLYVMK-MMEHMTIGlMDRYsHFIGKLLSYVPFFEMIQVPQHAYYCEELLYICKGIcYKEEILRNYIF...IQLHDCLPSFFKLFLK..NKRYATIHDILFYWCDDE..QRMSLEKKYNLSFIYEKYA............ 0 0 1 1 +12954 PF13113 DUF3970 Protein of unknown function (DUF3970) Aldam G, Mistry J gba Pfam-B_1596 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved NPKY sequence motif. 21.10 21.10 21.10 21.50 20.60 19.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.85 0.72 -4.28 7 111 2010-07-28 10:55:07 2010-07-28 11:55:07 1 1 110 0 11 35 0 61.30 65 99.96 CHANGED Mhp..lRlpGpcE.EI.chlpshsc..taEloa...spcshttsNPKYchSKslhsYlclKh.....pK MIR..VRIEGTEE.EMLEFMcKMPDIPGFEKTH...hREPR.KGNNPKYDSSKNVLAYLSYKKIE..VANK. 0 2 6 7 +12955 PF13114 RecO_N_2 RecO N terminal Mistry J jm14 Jackhmmer:O25605 Family This entry contains members that are not captured by Pfam:PF11967. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.08 0.72 -4.26 24 198 2012-10-03 20:18:03 2010-07-28 14:46:01 1 3 198 0 22 850 192 71.10 66 34.33 CHANGED MQGaILpsp+V+DEDLIVplLTpsplhphY.....RFYGARH......SsIplGaKIDFElEpst.ph.ls+LRslhHLGatWh ..........MQGFIL+TQK..VK..DEDLIVaILSPctL..lKsY......RF.....YGh.RH.........................SoIhsGYKIDF..t..LEcss..sF..LPRLKDVLHLGFlWI........................................ 0 6 19 22 +12956 PF13115 YtkA YtkA-like Coggill P pcc Jackhammer_O25884 Family \N 24.40 24.40 24.40 24.60 24.30 24.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.28 0.72 -3.63 90 656 2012-10-01 19:30:51 2010-07-28 15:05:13 1 14 358 0 179 609 32 85.10 22 36.73 CHANGED ptpsps......hplpls.ssp...ph....psGp.s.s..l...hs....t...ss...psGc..sVs.sA.s.l.phphhhst.t..uhtph.pshh...........p.spp...tsGhYphp.ssh.sh....sGpWplplp .......................................tshplplt.ss......h....psGcs.s..h...pl...........p...hs...pssc......sVs.cA.p..V...p.hphhps........Ghtph.pths.......................p.spp...tsGhYpsc.tsh.sh....sGsaplplp............... 0 58 121 145 +12957 PF13116 DUF3971 Protein of unknown function Coggill P pcc Jackhammer_O25308 (H.pylori) Family Some members of this family are related to the AsmA family proteins. 22.50 22.50 22.50 22.60 22.30 22.40 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.69 0.70 -5.03 104 1538 2012-10-03 05:41:17 2010-07-28 16:08:13 1 12 1521 0 320 1280 259 286.00 26 24.21 CHANGED htssspshc.YhPt..hhhupsLpsaLssAlp.......u.GpspsuplhapGslsp.aPat.....ppp..G.FpshsslcsuphpFpss..........WPslpslshslhFpNsslthpsspupltsss.hssspssIscl.sppsh...LpIcuchsu.pspsltchhppoPLhsslu..tsL.stlplsGplsupL.pLsIPLp.....st.......tspGphsl.pssslhls....shplpslsGplpFs.s.......sslsupslpAphhspPlslshss........ppptpshtl.....slslpGph....phptl.t......hst.....ltt...hlsGpssapsslslths...........ptshphpl ....................................................................s.ssspshR.YhP......hhucs.LhcaLssAlp........u...Gpscs..uplh.....h.t...G...s....pt...F.....Pap..........tsp.....GpF..plh..s..sl..csupa.tapss...............................................................W.Pulpslsh-Lsa....s.ss.....L......hh.....p.....ss......pu..............p.....l......s.....s......l.....p.....s.o.......s......l.............s..ss.............I.P..Dh.sccp.......LhIcuc.....hpG...sups.l.tsh...h.s.p..o.P...L.t..s.hu..ssL.sth..p...l.s.Gs.ls.u..cL.cLsIPLp.....................st...s....sspG.plsL...psssL.lt...s...s......sLpslsGp.hpFs..s........sslpup..slp..AphhstP..lslc.hss...........pp.tsp.shps........tl.slsush.......ps.tth.s.........hls.t.......l.p...hlsGsssaphpltlt............................................................................................................... 0 70 171 243 +12958 PF13117 Cag12 Cag pathogenicity island protein Cag12 Mistry J jm14 Jackhmmer:P97245 Family This is a Proteobacterial family of Cag pathogenicity island proteins. 25.00 25.00 26.20 25.90 21.70 19.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.41 0.71 -4.17 13 185 2010-07-28 15:30:54 2010-07-28 16:30:54 1 2 135 0 10 136 1 109.40 36 56.11 CHANGED hhhshlLsuC.SSsPcPscl-tsp.tlslNsplhppp.hshVPKss.hlss.sWsYplhlpsh..c-chlssc.hsphFhlAHsuc+IlllGptsphtpYKpahppNGspusIp..lQP ......h.lLsllLoAC.Ss.sc.s.h.pcs+shpsl.Np.Ll-p..aSplPhss..plpshsahhplhl.sa...cDhllcsc.hslpFtLsHpo++IsllGcusphhpYKsYhptNGApusI-..lQ................... 0 2 5 9 +12959 PF13118 DUF3972 Protein of unknown function (DUF3972) Mistry J jm14 Jackhmmer:O25162 Family This is a Proteobacterial family of unknown function. Some of the proteins in this family are annotated as being kinesin-like proteins. 22.70 22.70 22.70 23.00 22.50 22.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.49 0.71 -4.05 47 197 2010-07-28 16:11:16 2010-07-28 17:11:16 1 3 197 0 21 106 3 123.20 63 65.04 CHANGED EFs+LspLsc-hlhthhspGtLpsKp.EcGKlhI-AspGT.ulV.sstpshhuMspshshht.s..FVEKTIGTILNLHEKVLsAKDETlpAlKNENpFLKDALhSMQElY-ED+KTI-hLppEL+pAR ..................EFCKLVHLsE-VVcuMhssGsLshKE.E-GKIYIEAppGT.h.........SVVPuu..spshsuM.......s.........sShslsupS..FVEKTIGTILNLHEKVLDAKDETL-ALKNENKFLKDALYSMQELYDEDRKTIETLspELK+AR... 0 8 18 21 +12960 PF13119 DUF3973 Domain of unknown function (DUF3973) Aldam G, Mistry J gba Pfam-B_1636 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved YCI sequence motif. 25.00 25.00 53.00 52.30 24.50 23.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -8.27 0.72 -4.03 3 87 2010-07-29 08:33:42 2010-07-29 09:33:42 1 1 86 0 4 44 0 41.00 92 51.35 CHANGED MYYCIsCSEIHHEKusNDKVFKNGFYIDPFLGERYHLGMCK MFYCINCSDIHHEKHPNDKVFKNGFYIDPFLGDRYHLGMCK. 0 0 2 2 +12961 PF13120 DUF3974 Domain of unknown function (DUF3974) Aldam G, Mistry J gba Pfam-B_1643 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. 25.00 25.00 38.30 38.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.86 0.71 -4.30 2 82 2010-07-29 08:38:48 2010-07-29 09:38:48 1 1 81 0 3 41 0 124.00 94 45.69 CHANGED MuFIphVLLLlGTLLLIuFTlVVLlVYFGRKhYhSWsKPYKRAp-Sl-KLSNKShPFLQEFTQHPLFYRWIRTEGKKEQpshNTLFCsusQRTREQVFSMLPK-+QKKVHsMAKoTKKlTNE..DI MSFIQTVLLLLGTLLLIAFTVVVLVVYFGRKLYFSWTKPYKRApDSL-KLSNKSlPFLQEFTQHPLFYRWIRTEGKKEQ+TLNTLFCASuQRTREQVFSMLPKEKQKKVHVMAKTTKKLTNEDIDV.... 0 0 1 1 +12962 PF13121 DUF3976 Domain of unknown function (DUF3976) Aldam G, Mistry J gba Pfam-B_1743 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 40 amino acids in length. 25.00 25.00 83.50 83.00 18.60 18.20 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.02 0.72 -4.11 3 82 2010-07-29 08:43:40 2010-07-29 09:43:40 1 1 82 0 3 20 0 40.00 94 67.13 CHANGED MYLFVRKDITKDNTLTKRGFYKLIGCLVVMFIGIIVMIVLl VFLFIRKDV.QGGTLTKRGFYKMIGCLVVMFIAIIVMIVLI 0 0 1 1 +12963 PF13122 DUF3977 Protein of unknown function (DUF3977) Aldam G, Mistry J gba Pfam-B_1744 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 61.30 61.20 20.00 17.60 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.67 0.72 -4.17 4 85 2010-07-29 08:51:07 2010-07-29 09:51:07 1 1 85 0 2 40 0 76.40 87 91.52 CHANGED MKYIEhGIGN+WhVRTETEpEDGTEFEQKGIlKPIYFESlYlRlWFRKTClIhDoKEGFKKh+KpRsEYKFIhGIVS .MKYIEIGhGNRWFVRTETENKDGTEFEERGIIKPIYFESLYVRlWFRKTChIFDTKEGFKKVKKRRIEYKFIlGIVS 0 0 1 1 +12964 PF13123 DUF3978 Protein of unknown function (DUF3978) Aldam G, Mistry J gba Pfam-B_1745 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. 25.00 25.00 27.00 26.70 19.00 17.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.72 0.71 -4.80 3 84 2010-07-29 08:52:49 2010-07-29 09:52:49 1 1 83 0 4 30 0 143.90 88 98.04 CHANGED MEAYNMHNFINTNIESpPcETsFNLHICEoNEFDsNLTKSTTLSFIVTK+NIKIlTKKWINScpESMIGKSYIIPTKAFHYlLPIIsEoEEEMsIQVQSFGlsGELLLNERLLIcKNN+hNS.KIsuFFEALNENI+QALRTLQIp .......MEAYKMHDFINTNVESHQNETVF.NLcICE.TsEFDVSLTKSTTLSFIVSKKNIKIVTKKWINSNQESMIGKSYIIPTKAFHYFLPIISETEDELNIQVQSFGLHGELLLNERLLIDKNNKHNu..KIToFFETLDENVNKsLRGLQI.H.... 0 0 2 2 +12965 PF13124 DUF3963 Protein of unknown function (DUF3963) Aldam G, Mistry J gba Pfam-B_1512 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 85 amino acids in length. There is a conserved DIQKW sequence motif. 25.00 25.00 84.10 84.10 18.40 17.50 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -7.98 0.72 -4.16 4 53 2010-07-29 08:58:40 2010-07-29 09:58:40 1 1 51 0 2 29 0 40.00 86 64.97 CHANGED MlhINshFIERYFcDIQKWIRNIThCFALLVVsLVuLWIG MLSIYTAFIEKYFcDIQKWIRNITFCFALLVVVLVALWIG. 0 0 1 1 +12966 PF13125 DUF3958 Protein of unknown function (DUF3958) Aldam G, Mistry J gba Pfam-B_1404 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. There are two conserved sequence motifs: RLF and TWH. 22.80 22.80 24.30 24.20 21.30 21.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.42 0.72 -4.11 9 95 2010-07-29 09:08:48 2010-07-29 10:08:48 1 1 45 0 3 73 0 92.20 64 82.10 CHANGED VFEEQDRNQpAIQpQEpAEtDFaEh+sRssRLFsRILETWHsDKElSpFFhNhpQEuQaIERKLTFELENQKETLlKE+RcLpDLENDLoYppQpLt+E ...............................lFEEQscNp.AlQtQEpAEAsFaEW+sRspRLFsRILpTWHGD+EhspFFhNhhQEspplERKlTFELENpKETLLKE+RcLS-hENDLSYpQQ.QLtRE....... 0 0 0 1 +12967 PF13126 DUF3975 Protein of unknown function (DUF3975) Aldam G, Mistry J gba Pfam-B_1736 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 25.00 25.00 43.10 43.00 18.90 18.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.04 0.72 -3.83 2 81 2010-07-29 09:50:29 2010-07-29 10:50:29 1 1 81 0 2 42 0 78.80 90 93.89 CHANGED MWKEKGpQllshIhlGIVlLLQhSFHhIE.LFHKslSILTFhPNMsLEllSIVWSIIASIhIlIIW.....uIthLhpplhhKcS MWKEKGKQlLAWITLGIVILLQISFHIIEWLFHKVlSILTFLPNMTLEhISIVWSIIASIAIVIIW.....SIAKLWNKLFKKDS 0 0 1 1 +12968 PF13127 DUF3955 Protein of unknown function (DUF3955) Aldam G, Mistry J gba Pfam-B_966 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 68 and 87 amino acids in length. There are two completely conserved residues (G and E) that may be functionally important. 20.60 20.60 22.50 22.20 19.40 19.40 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.88 0.72 -4.43 30 317 2010-07-29 10:35:54 2010-07-29 11:35:54 1 6 248 0 29 152 36 61.90 33 75.10 CHANGED hppahluhlhhl.lGlhChhhashhsShVsssGhLpEP.FaLIPlualhlhhu....llshlhshlp.ph .......hpphhluhhhhl.hullhhhIpshs.ohlsssGhLcEP.hF...ahlPlGalhllhu....hhshlhshlpp.h........ 0 13 21 25 +12969 PF13128 DUF3954 Protein of unknown function (DUF3954) Aldam G, Mistry J gba Pfam-B_934 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 60 amino acids in length. 22.20 22.20 22.60 22.80 21.60 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.33 0.72 -4.54 12 144 2010-07-29 10:39:09 2010-07-29 11:39:09 1 1 89 0 4 82 0 48.70 54 84.47 CHANGED MK+.EIshtpNtlYlV.pcGclphl.pPPpoGFGEQslhWpsGKVs+scsppT ......MKt.EIDlppNtIYlV.KNGpVphl.pPPtoGFGEQshsWpsGKVsRs-sphT.......... 0 1 3 3 +12970 PF13129 DUF3953 Protein of unknown function (DUF3953) Aldam G, Mistry J gba Pfam-B_875 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 47 and 76 amino acids in length. 21.80 21.80 21.80 22.10 21.60 21.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.76 0.72 -4.33 36 454 2010-07-29 10:41:40 2010-07-29 11:41:40 1 1 294 0 22 219 1 41.90 39 68.35 CHANGED hhhhL...GhhhlhhGlp-h+...ccc....KshGhhshlsuhaslhVul.p ........hLL.GlhslpIGhpphK.....Kcc........KhhuIlshLAGshlllVulh..... 0 5 13 13 +12971 PF13130 DUF3952 Domain of unknown function (DUF3952) Aldam G, Mistry J gba Pfam-B_704 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. There is a conserved VMSAS sequence motif. 25.00 25.00 52.60 52.10 18.20 17.50 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.35 0.72 -4.07 19 114 2010-07-29 10:44:54 2010-07-29 11:44:54 1 1 37 0 2 107 0 104.40 60 41.53 CHANGED usLLSGCuFGE......TKIEYERhVKALDEGDMKTVMSASDDGYAaVcEcsI....aSsaEcKEDGpHp+slYQTT-GlYNhK-KsLYGpToQplsoclcsccp+cp.psYKcEpl .....SLLSGCuFGE......TKIEYE.hVKALDEGDMKpVMSASDDGYAYVKpcsI.....STaEpKEDGcHppsIYQTTcGlYNsK-KsLYGpToQclsoslcsccp+cc..sY+pp................ 0 0 0 0 +12972 PF13131 DUF3951 Protein of unknown function (DUF3951) Aldam G, Mistry J gba Pfam-B_698 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 56 and 71 amino acids in length. There is a conserved YTP sequence motif. 25.00 25.00 39.60 39.30 20.60 20.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.69 0.72 -4.19 4 137 2010-07-29 10:49:09 2010-07-29 11:49:09 1 1 86 0 5 52 0 50.70 61 82.10 CHANGED MILhTIGhlLhTlFIFFIIGFlTFpMFVsKATPQIYYTPC-shTsQohpctpp ...hlLhTIG.l.LTlhIhhIIGF..ahhFlcKto.p...aYTPh-slTspohuca+.c 0 0 2 2 +12973 PF13132 DUF3950 Domain of unknown function (DUF3950) Aldam G, Mistry J gba Pfam-B_688 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 30 amino acids in length. There is a conserved NFS sequence motif. 25.00 25.00 36.80 36.30 23.60 23.00 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -7.39 0.72 -4.42 8 501 2010-07-29 10:51:02 2010-07-29 11:51:02 1 1 257 0 7 125 0 30.00 81 50.68 CHANGED LL-QIstuhtpEp....ouNFSAWVh-ACRcKLp .MIEQINIAL-pKG....SGNFSAWVIEACRRRL.. 0 2 2 7 +12974 PF13133 DUF3949 Protein of unknown function (DUF3949) Aldam G, Mistry J gba Pfam-B_636 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 69 and 87 amino acids in length. 25.00 25.00 61.30 61.10 17.30 17.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.04 0.72 -3.75 5 147 2010-07-29 10:52:29 2010-07-29 11:52:29 1 1 88 0 5 77 0 60.90 70 78.40 CHANGED MIPIQYtYIcuLK...Ec++KpGlSQcELY-NMSFEEEQLHYHsQGNlFsIPuuhVAShIY+lKp .hlPIQY.YlphL+...EKpKKhulSQpELY-pMSatEpQlHaHhQuNsFsIPuuhVA.hIhKVK.. 0 0 3 3 +12975 PF13134 DUF3948 Protein of unknown function (DUF3948) Aldam G, Mistry J gba Pfam-B_550 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 25.00 25.00 51.80 51.60 20.80 20.10 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.47 0.72 -4.48 8 175 2010-07-29 10:54:03 2010-07-29 11:54:03 1 1 80 0 13 52 0 34.70 69 93.11 CHANGED MpN.cQVLQVTKhDFLGSASGAslLTAhIVFLusVL ...Mps..EQVLQVTKsDhlGShuGAVVLTuhIlFLusVL 0 0 4 4 +12976 PF13135 DUF3947 Protein of unknown function (DUF3947) Aldam G, Mistry J gba Pfam-B_493 (release 24.0) Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 25.00 25.00 26.90 26.90 21.60 19.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.97 0.72 -3.78 13 175 2010-07-29 10:55:40 2010-07-29 11:55:40 1 1 80 0 4 91 0 77.00 55 98.40 CHANGED hssYa.sphplt.......suIThuuAQuTlpAVpQAhQ.....MQQQh..th..p.......uh..aYsshtY.......hYPs........sFoTIPaGssY.L .........h.pYFhstttht.......suIThuGAQuTlpAV.pQAlQ.....MQQQhQ.....Q.......G....YSph.h.......hYPs........oFhoIPYGusY.L.......................... 0 0 3 3 +12977 PF13136 DUF3984 Protein of unknown function (DUF3984) Aldam G, Mistry J gba Pfam-B_3236 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 393 and 442 amino acids in length. 21.30 21.30 40.40 30.50 21.10 20.70 hmmbuild -o /dev/null HMM SEED 325 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.36 0.70 -5.21 20 72 2010-07-29 12:40:09 2010-07-29 13:40:09 1 2 68 0 59 69 0 298.50 34 77.17 CHANGED RSRRSasSLpHlSLAPLTsRFPlDDDss..........s.tst....t.hstsspsspTSYLuShSVPsTPslL..ScSRssSpsRtp.....p+ppoopptphS-.....osLcupsstpshHHppp.............pcptpppsssst............p.ss.tppc.pDsEWLLRAGlALASSTREEKGQSWLVKR-SSTSLV..................uEss.s..ptht+ppc............tppptoRRuR......................SGhSTP...sAhSRRsSpSRsuSRtu.SRs-L..sMTuhch............sttsttths...ss..p-sp...thlPDFVDcclRsEMt.h................tp.cp..........s.............st....u-sDsE-.......-.hDEtEhQRLTRc.cGhGLGuWI.DRlVEWTLFuVE- .............RppRS.ssLpHlSLAPLTs+hPl-D-s..............................thtsh..poSYLpuhSsPsTPs..lL..Sp..Stssupsphp.......tpss.tt.hSp.......o.h.puhsss..p.hppppt......................pt.tppphspt..........................p....shsppc.tDs-WLLRsGhsLuSpsREpKGQSWLVpRpSS.TSLl.....................spsp.s....cthtcppc......................tspptuRRup.........................Ssh....uoP...hshSRhs....S+s.sS+....hu.SRspL......MTshph.........................tt................psh...hhsPDFV-.s.+.Eh.......................................................................t-p-spp...-..DEtclpRLsRc.pshG..lGuWl.-plltWoLFuVE-................................................................................... 0 12 27 46 +12978 PF13137 DUF3983 Protein of unknown function (DUF3983) Aldam G, Mistry J gba Pfam-B_2658 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 40 amino acids in length. There is a conserved AWRN sequence motif. 25.00 25.00 39.50 39.50 19.90 18.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.56 0.72 -4.27 15 83 2010-07-29 12:42:07 2010-07-29 13:42:07 1 1 55 0 3 48 0 34.70 68 79.89 CHANGED KK+Kl+KAIsRRuKsl...EK.cRVcpAWRNIFVQAGI .KK+KlRKAIARRsKsV...EK.aQVsKAWRNIFVQuGI. 0 0 2 2 +12979 PF13138 DUF3982 Protein of unknown function (DUF3982) Aldam G, Mistry J gba Pfam-B_2022 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 47 and 73 amino acids in length. There are two conserved sequence motifs: EKL and EIP. 25.00 25.00 34.50 33.60 19.40 17.60 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.28 0.72 -4.09 20 72 2010-07-29 12:45:00 2010-07-29 13:45:00 1 1 40 0 0 43 0 54.10 58 87.08 CHANGED M.....hGI.IshtVshTEIlAPAsNVSIVVNESESPI+hEEKLSVATAPLIEIPTPsGAHPGAVV-hDTLIT .......hGI.IshtVshTEhhsPAspVoIV.pEot.PIph.EKLpVushPh.EIPsPsGtcPGtVV-h-pLIo 0 0 0 0 +12980 PF13139 DUF3981 Domain of unknown function (DUF3981) Aldam G, Mistry J gba Pfam-B_1754 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. 25.00 25.00 186.40 185.60 20.20 20.20 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.41 0.71 -4.09 2 80 2010-07-29 12:48:28 2010-07-29 13:48:28 1 1 79 0 3 48 0 114.00 96 19.90 CHANGED hIPWTRuuptLRsVDKcGscKVlKtKpusILhIPlLhWIGIAIYEYhWLIDDRVDSIlTHYSVslAlLIGlVLhSQsphG+LEspLKullMhILLsSYGYFGYLHDIVISQKKY LIPWTR.SGSKLRAVDKKGDEKVVKGKKSSILVIPVLFWIGIAIYEYFWLIDDRADSILTHYSVAVAILIGLVLFSQDQIGKLEGTLKGLLMFVLLASYGYFGYLHDIVISQpKY 0 0 2 2 +12981 PF13140 DUF3980 Domain of unknown function (DUF3980) Aldam G, Mistry J gba Pfam-B_1748 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. 25.00 25.00 77.20 77.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.06 0.72 -3.71 3 79 2010-07-29 12:54:34 2010-07-29 13:54:34 1 1 79 0 2 48 0 86.60 85 67.47 CHANGED MEQEQTSYLSIKILKIMSVIYLIVSILsAlSTGuFIpss..GFs.uISlSGoG.uAlGllhLGSIFQSVLVFCGIWVFILLVETVIKIYEK .VEccQTSYLSIKILKIMSVIYLIGSILMAFSTGPFIHNl..GFD.EISISGSELGLISIVMLGSIFQSVLVFCGIWVFILLVETVIKIYEK 0 0 1 1 +12982 PF13141 DUF3979 Protein of unknown function (DUF3979) Aldam G, Mistry J gba Pfam-B_1747 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 22.50 22.50 24.50 216.30 20.40 19.10 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.38 0.71 -4.19 3 81 2010-07-29 12:55:49 2010-07-29 13:55:49 1 1 81 0 3 37 6 113.70 90 95.87 CHANGED hoLFQsAPhEDtKGGWKYIIQEQNGKYpIsN-IussHMSVELYFNEYDElRITLYKDGpPITTMQRIsIlKlELEEDEEGIQFVLERMPSRMIRLQLKPYLAlEMGLYWEVCED .MTLFQAAPKE-sRGGWKYIIQEpNDKYEIVDEMLKNQMSVELYFNEYDEVKITLYK-GhPIoTMQRIAISKVELDEEEEGIQFVLERMPSRMIRLQLKPYLALEMGPYWEVCDD 0 0 1 1 +12983 PF13142 DUF3960 Domain of unknown function (DUF3960) Aldam G, Mistry J gba Pfam-B_1431 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 72 and 89 amino acids in length. 21.50 21.50 22.10 22.60 20.40 20.20 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -9.98 0.72 -4.21 4 84 2010-07-29 12:59:48 2010-07-29 13:59:48 1 2 82 0 3 37 0 88.60 94 23.50 CHANGED MhAsp..PNWsLVhDsYhEPNNFADLFSLLVPs+PKGEuKERTILsWKEKEFYKEENLhPFILYGMNKuK-LPQFHKDEIPTLVRIVRL ......MKAVQtDPNWNLVTDTYIEPNNFAELFSLLVPCHPKGEGKERTILVWKEKEFYKEENLAAFIVYGMNKAKNLPQFHKDEIPTLVRILRL. 0 0 1 1 +12984 PF13143 DUF3986 Protein of unknown function (DUF3986) Aldam G, Mistry J gba Pfam-B_362 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 20.30 20.30 22.30 22.30 19.90 19.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.11 0.72 -3.53 12 206 2010-07-29 13:04:43 2010-07-29 14:04:43 1 1 96 0 12 100 1 86.80 50 91.36 CHANGED hpaDsspHLHlGYY-cshDl...EulAYKphscslWslahsatthshhhpph.pth...hh-thGhpl..aolcspDLs.-tusthFEcWLhcNp ......cYDsShHhHlsYatsthD..l...EshAYKRhNEsVWDlYhsahthss..hcclEpt+.....h.-.hGhhV..aSlcspDl..s..E.upt.FEcWlh+Np....... 0 1 6 8 +12985 PF13144 SAF_2 SAF-like Coggill P pcc Jackhammer:O26012 Family The members of this family are similar to those in the SAF family, and include flagellar basal-body proteins and pilus-assembly proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -10.97 0.71 -4.82 119 1927 2012-10-01 20:51:14 2010-07-29 14:46:06 1 4 1667 0 431 1918 464 178.10 24 76.94 CHANGED lpptlpp....hlppp...hsth........ph.........plpshplc.....s.php..hss....s....ps.hp.....h.plss...spt.tspsslpl....p....s.p..........ssp...sa...p..lalssplph....h....sphl............VAs+sLs+GphlsssDlshpphslsplt.s.shh..s...-..p..llGtps+RslpsGpslptsplt.shlVp+GppVplhAps.suhplpspGpALpsGshG-pl+V+N.pSs+llsupVpusGpVcV ........................................................................h......t.h.tp........h............p.....h.....hp................hs....s...p........ht..ss....ss.t....h...up...h..s..h.....h....p....s................s....p....haltsplp.s............p...ts.hl..................................Vuspsls.+..Gphlss....s.slp..hcp......hcl.s.p...ls..p..shl...s....hs........p...ll..st..h......ut..+.s.lssspslphstlpp..shhVptG.pp..V.lls.ps.suhslsspGpALpsushuppl+V+.....sSs....p..l....lpuhVsu.sGpl............................................... 1 114 243 333 +12986 PF13145 Rotamase_2 PPIC-type PPIASE domain Bateman A agb Jackhmmer:O25628 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.39 0.71 -3.48 206 2673 2012-10-02 13:30:10 2010-07-29 16:26:11 1 27 2057 2 569 6738 2215 123.40 20 28.69 CHANGED l....s-..p-lcphYc....p..pts..pa....t.ps.pt...p.h.hp.........hhh.hs...sp....st.s...p..tt.....tpt.....hpt..t.........sh...pp.h..ts..h.tp..t..p..shs......hps..hs...........htp.ts.....................p...hst..................................t.htptl..h...sh.....ps..G..p....h.s..s.s..l.......ps..st......u.h..hlh+lssh.p.s..sp.shs.h-..c..s..+.s .............................................................................................................................................................o-t-lpphY-..............p....pps....pa................t..t....pt...h.t.hp...................hlt..hp............sc.........sp..A.........c....ts..............hpp.........hpt.t...............................t.......ts.F...ss...l..uc...c.....p...uss...............sss..hs................hhp.ts.............................................s..lPt.....................................p.ltp..As...h.......p.h..........cp..G..p.....l.S..s..s....l........ps..ss...........u..a.hll+lsch.p..s...tp..sts.hpcs+.......................................... 0 194 373 478 +12987 PF13146 TRL TRL-like protein family Bateman A agb Jackhmmer:O87326 Domain This family includes the Swiss:O87326 TRL protein that is found in a locus that includes several tRNAs. The function of this protein is not known [1]. The proteins in this family usually have a lipoprotein attachment site at their N-terminus. 25.00 25.00 27.90 26.10 24.80 24.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.31 0.72 -4.00 22 210 2010-07-29 16:26:30 2010-07-29 17:26:30 1 1 172 0 33 102 11 77.10 51 73.76 CHANGED sGhlaspsshPstss.......sspssKpGcACspo..lLslVuh.GDuSl-sAtcpusIsclspl.....Dapspshl..slasphCslVpGp .........................u.huLYTcVpsPlouT.......slsuoKoGKACApo.....VLGlVsT..GDASI-oAK....KuGcIShVoSV.....DYETTGsa...sh..YGKsCVVV+Gp. 0 9 26 27 +12988 PF13147 Amidohydro_4 Amidohydrolase Bateman A agb Jackhmmer:O25045 Domain This family of enzymes are a part of a large metal dependent hydrolase superfamily [1]. The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source [2]. This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit [3]. Dihydroorotases (EC:3.5.2.3) are also included [4-5]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.94 0.70 -4.37 111 10639 2012-10-03 00:45:34 2010-07-29 18:01:37 1 79 4086 62 2860 18795 7896 309.00 16 70.36 CHANGED Dsp...G.thlh..PGhlDhHs+.....................hstthsst.ht.hh.........tuhhshh............tsshpssthttthtp........................................hhh.sththtsttthhtthpphhthhtttshsh.th...............................thttsthtthhp...tstpts.hhhhhsttstt.........tt.httththhthh........................hthstshplhpthssthshththhttttsthh.t.............hthstttslssssht.......hhp.h.th................shthhhhssh..................tptshtsltphhpsG...hhhhluoDth.........................sssshhtthhhhhht...hshs........pslphsotssuphhsh........tphGtl..t.GtpAchl ........................................................................................................................................................................................................................................................................................................DspG.thlh....PGhlD.hHsH...............................................................h..t.t.s.s.h...h..h.p....s..h.................................h........t..h.suGhoshhp...............................hssspsthh.ttth.hp.h.................................................................................................................................................hhh.h.h..t..h..h..h..t...t.........h..h...t......th..p...p...h.t....t....h....h..t.......h..t...h...h...s.h....t..h..............................................................................hh.sst..h..h.t.hht..............tt..........t.ths................................hhhh..t.tp..t.......................................t.h.h..ttt..t.h.hh.................................................................sts..hp..h..h..h..h....h..t..t....t.......h....s....h...th.ph....htt..ph.st.......t.ht.................................hths..cttulssst.t.....................................................hhtpth.th...........................................sshhhh..sPsht................................................pppshtsl.h.......p..h.lp.sG.................shs.s.lu..oDa.ssts....................................................................................shts.hl.hh..h.t.hhht....................htl.oh......t...............................phlph..h.o.ts.sA.c.hhsh..........................tptGpl.......tsGtpADls......................................................................................................................................................................................................................................... 0 927 1826 2398 +12989 PF13148 DUF3987 Protein of unknown function (DUF3987) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 27.00 27.00 27.00 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.88 0.70 -5.90 117 899 2010-07-29 17:03:48 2010-07-29 18:03:48 1 19 543 0 189 834 169 314.80 20 61.84 CHANGED ppspsshshhshusLsslusshtst.sclph....sshht..sssLahhslupuGpt.Kosstphhhtslpphcpphtpphppphppactpt....th..hphctpshcpphtt...t......................ttppPphP......t.pllss-sTspulhptLtpsss.shhlhssEusshhsuhs.h......phhpphshhpcsacGss.lshsRpspsp..ht.lpp..spLolhlssQPshl.phlhthps..hpspGhhuRhL...hshPsstttp...c............shsp....tthpsatpclpplhpt...............tttp.hhlphoscAppha.phasplcpch.t.suph.....hpshsuKhsttssRlAullphhc..............................................ttstpIss-shptAlp.lscahhpcutclhs.hhsssp ..................................................................t....h..shhhhuhLsshuh..shts..........hplth.....sshht.sssLahhhlu.supt.Kos.h.t.h.h.h.t...........hh.hcpthhpphtptht.t.aptt.......hp.p.tth.pt........................................tpts...P.....hhhhlhs.s.sT.pulhp........h...tps.t...shhlhp.sEhss.lhsshs...........pt...sh....hpphas.......Gs.....s...lshsR...ps...pst.....hh..lp.p....splolhhhhQPshh.phhhptps.........sp.....GhhuRhL...hshP.s..s...tphh..p...................................p.....thhptht...p.phpclhph.h..........................................ttp.hs..lpho.sp.Appha.ph..hp.p..ltpch.t.....s.......hps.hhs.+h.st.hhRlAhlhthhp....................................................tph...Is.pshptAht.ls.p.ahh.p.chhhl.s......t............................................................................................ 0 73 131 165 +12990 PF13149 DUF3988 Protein of unknown function (DUF3988) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 29.90 29.90 29.90 29.90 29.20 29.80 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.44 0.70 -4.89 252 1094 2010-07-29 20:05:34 2010-07-29 21:05:34 1 66 98 7 150 1009 2 288.40 14 64.34 CHANGED hhlsuCspsc......................t.t.stststs..........lplsss..................ststTRusss..............tts....-plG........lash..............................t.ssshsstthhsshhhhtss..............stshssss.thaa........s.spphsh.hA....Ya......P.........hpssssss........ht...ssssp......................t......................Dh....lhApsps.........sssssshs........................tFpHthopl.plplp.t..ssshsst.....hh....plpshtsp.................ushs.....h.....sGshsssss............................ssshshts...sssshsh...........hhll..P.......................tsst.........hplphshssp......stphhhsstttt..........................sGppasas.........lslsps...........hhhsssslssWss ..................................................................................................................................hhsuCspcp........................t.....tstpts...............l.phsss.....................ssstTRusss............thpss..........cplG...............lash.................................ts.sst.hhssh.hh.h.tss..........................................shshssss...hha.........s.ssph.sh..hA.............Ya..........P.........................asssssss............shs...hs..........hsspp...................................t......................Dh........hhupsps.....................tsssssls.....................................h.....pF..pHthopl.plplp.s...ssshsss.......................h....hplps..h..tsp...................................ushs...............hs.........sGshsssss..................................................s.sshshtt......sssstsht..................hllh.....P......................tsss...........................hpl.phsh..sst......................shthhhsttth...........................................sGppaths.lplstt......................t.ph..W...................................................................................................................................................... 0 23 126 150 +12991 PF13150 DUF3989 Protein of unknown function (DUF3989) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 27.00 27.00 27.00 27.30 26.40 26.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.60 0.72 -4.53 17 195 2010-07-29 20:28:02 2010-07-29 21:28:02 1 1 80 0 16 135 3 83.90 32 85.32 CHANGED +phlcthpcth-scLRthhstLsPctRlhllLshhshFusLulYhhssuIacl....G+p-tpphpIcHIcpl-L....pp-ohNhhp.hp ............h.hlpchpchhss+L+thhstLsPcpRhtllLsMhshFusLulYhhspulYcI....G+..p-G.pc..hphcHltplcL.........s-ohp.hp.................................... 0 4 14 16 +12992 PF13151 DUF3990 Protein of unknown function (DUF3990) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterised proteins found by clustering human gut metagenomic sequences [1]. 27.00 27.00 27.10 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.86 0.71 -4.84 47 384 2010-07-29 21:37:02 2010-07-29 22:37:02 1 9 247 0 57 326 11 140.90 29 74.19 CHANGED hhLYHGSsphlc.pPclttu+.ppDFGpGFYsTp.h-QApcWApph...........tpsshlNhYpl-hsthp...LclhcFp.thspcWLsFlhpsRpstt....................csYDllhGshADD.phashlptahsstIoh-thhctL+htp..spQhshpop+Als.pL+ah ...........................hlYHG....S......s.....h.......l.....c......p.......P..ph.ths.......+...p..h..DF...G....p....GFYhTs.hcQApcWAtph...........................ttpshls..hYphs......phh.......hphhhF........p.....ths.cWlpFlhpsRpt..pt....................ppYDllhGs..hAsD...phh...phlp.ahp...s.......h...........l.......sh...c....p...hhpthp..htp......spQhshtoppshp.hLph.h................................................ 0 28 42 49 +12993 PF13152 DUF3967 Protein of unknown function (DUF3967) Aldam G, Mistry J gba Pfam-B_1529 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 173 and 249 amino acids in length. 21.60 21.60 21.60 22.20 21.30 21.40 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.43 0.72 -4.31 15 228 2010-07-30 08:43:20 2010-07-30 09:43:20 1 2 107 0 8 152 0 35.30 50 17.38 CHANGED RtppRDppLMpsIRElQEoK+LIAAScpK..hpFW ...+.ppRDppLMpsIREIQETKR....h.......lA....Ao..KE...p...p..hhp.a................ 0 1 3 3 +12994 PF13153 DUF3985 Protein of unknown function (DUF3985) Aldam G, Mistry J gba Pfam-B_3329 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 25.00 25.00 82.80 82.60 20.40 19.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.38 0.72 -4.15 8 55 2010-07-30 08:48:10 2010-07-30 09:48:10 1 1 55 0 2 11 0 44.00 94 95.65 CHANGED MEILoIILIVLLIYVVFKVAYVALKILAILLIIFLIVEFGSKLL MEILTIILIVLLIYVVFKVAYVALKILAILLlIFLIVEhGSKLL 0 0 1 1 +12995 PF13154 DUF3991 Protein of unknown function (DUF3991) Coggill P pcc Jackhammer:O25192 Family This family of proteins is often associated with family Toprim, Pfam:PF01751. 22.20 22.20 22.20 22.40 22.00 22.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.75 0.72 -3.79 113 763 2010-07-30 16:16:39 2010-07-30 17:16:39 1 15 512 0 102 654 29 82.50 24 16.66 CHANGED sYLpppRtlstpllpthhp.....pshlhp.........sph............t......Nhl........FsttD.............p.pGpspGhphRGs...................ttsa.+uhspG.......sshhashsh.........sps .............tYLpppRslsppllpthhp.....pshlhp.........sph................p..................sll........Fhth-.....................ppsphtuuphpGh....................ptsa.KthspG.....sshthuhsh.......s.......................... 0 36 72 92 +12996 PF13155 Toprim_2 Toprim-like Coggill P pcc Jackhammer:O25192 Family This is a family or Toprim-like proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.64 0.72 -3.67 328 1811 2012-10-01 21:47:57 2010-07-30 17:17:53 1 42 948 0 254 5018 2583 95.50 22 20.72 CHANGED hlhEuhhDslShhp....hthps...................slushGsshp.........pthphL..pph...tp............lllshDsD...........pAGppAsp+hh..phhtp.....................hphphhphstt.c.....D...hs.........-hl .......................................hlhEuhhDhlSahp......lthpp..t..........................lsl.susu..s..h....t..............................phhphL.....pth........pp......................................lhhs.hDsD.............cAGccsspclt.....pthttt...............................................psth.h.h.tt..K......DhN-hL.............................................. 0 97 201 234 +12997 PF13156 Mrr_cat_2 Restriction endonuclease Coggill P pcc manual Family Prokaryotic family found in type II restriction enzymes containing the hallmark (D/E)-(D/E)XK active site. Presence of catalytic residues implicates this region in the enzymatic cleavage of DNA [1,2] 22.00 22.00 22.10 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.61 0.71 -4.24 8 260 2012-10-11 20:44:47 2010-08-02 13:31:32 1 15 227 0 57 253 94 120.90 35 9.82 CHANGED csDPshpspYscVpTas-WAc..pGhsppDTGIDLVA+hRcsDuasAIQCKFYsssHpIpKsDIDSFhsASGK........c.FopRlIlsTTc.cWScNAEshLcsQplPlsRIsLscLEpSpIDWstats+ucllLp .............................-Phhtppa.c.pVahWtD......Ws..........p.....t..s.....pp..DsGIDL.VAp..pp..s..........s...s.p.......a..hAIQCKFY....pss..t..pltK.s-.IDSF..hs....s....SG.+...............sh..Fsp.R.lIlo....T...Ts.cWu....p...NA-cs.l....ps....Q.t...hslp+Isht-ltp.....S.IDWshh..t......t............................................................. 0 17 32 42 +12998 PF13157 DUF3992 Protein of unknown function (DUF3992) Aldam G, Mistry J gba Pfam-B_480 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 98 and 122 amino acids in length. There is a single completely conserved residue T that may be functionally important. 22.00 22.00 22.00 22.90 21.50 21.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.14 0.72 -3.79 10 220 2010-08-02 13:24:37 2010-08-02 14:24:37 1 1 84 0 11 114 0 90.20 46 77.59 CHANGED VCssWshs..suAsshllYssNlsQsIsGTGaVchDsGsu.....sITVshl...ssGTsVsThTVsPGoStSFThRcFsoIpIls...ssuGoppGcaClTh .............VCosWSh...ssAhspllYTNNIsQpVhGTGaVKYDVGsu.....PITV-hL...suGTVlDThTVpPGoShoFThRcFsolpIss......Tssss.QGEFCITs......... 0 0 5 5 +12999 PF13158 DUF3993 Protein of unknown function (DUF3993) Aldam G, Mistry J gba Pfam-B_782 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 160 amino acids in length. 22.40 22.40 23.60 63.80 21.20 20.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.37 0.71 -4.31 5 94 2010-08-02 13:25:49 2010-08-02 14:25:49 1 1 94 0 5 39 0 119.70 79 72.48 CHANGED hIhLlthlsVAFLVsaulTpl.uK-csKlDRcEVFpTlQouaEsQFSLoEKcRoMs-MhulL-PYFT-uFtslFl-ENups-cpG..haGoDhGEYhIPs....FSFou-TKluhD-E+..lYVYEass ..................GIWLVLFVCVAFLVGYSVTTVLGKEEVKlDRKEVFTTIQKGYETQFSIRGKHLPMNKMIETLSPYFT-NFLQVFTDENSRSDKQS........GEYLLPAKEAPFSFNSETKMSYDEEHK.LYVYERs... 0 0 2 3 +13000 PF13159 DUF3994 Domain of unknown function (DUF3994) Aldam G, Mistry J gba Pfam-B_903 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 97 and 111 amino acids in length. 25.00 25.00 25.40 27.40 21.60 24.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.65 0.72 -3.79 21 98 2010-08-02 13:26:55 2010-08-02 14:26:55 1 1 44 0 2 81 0 109.20 40 32.91 CHANGED psKphs-cupchlccGschWppsacslcsch.tt.p...............sthptsohossc.p..p......................sspsslspDGpELlGsWGh.pss.tFphulsh+p..DsTFpsYss.uphs .......TDKEll-KupphlcEucphWt.sFccLcu-h.t..tt...............phssuosSspchpphptKsGl.ps..............NsppNl.KDGTELlGsWGhptus.GaphoLlLKu..DKTFEoYus.GpYP... 0 0 1 1 +13001 PF13160 DUF3995 Protein of unknown function (DUF3995) Aldam G, Mistry J gba Pfam-B_958 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 138 and 149 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important. 22.70 22.70 24.00 23.10 22.60 22.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.83 0.71 -4.08 35 198 2010-08-02 13:28:22 2010-08-02 14:28:22 1 1 181 0 58 169 10 119.90 35 82.04 CHANGED hLssluhlH....lYWAhGGpWshps.shsspss.......tsshtPuhsushhVAssLhsu..Asllhhpsh.h.ht.hh.ttlh..phushsluslhhlRu.....lushtaluhh+chts..shFuchDshlYsPLCLhLuluhh ............................hLhhluhlH....lYWAhGGpWussu.slPscss..............chshpP.sshh...TL.hlAlhLshA..Ahllltpss.h..hththsshll.phuuhls...hllFhlRs..............lG-FpYhGh.FK....+h+s..opFA....RhDThlY.PLChhLuluh......................... 0 16 35 46 +13002 PF13161 DUF3996 Protein of unknown function (DUF3996) Aldam G, Mistry J gba Pfam-B_998 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 172 and 203 amino acids in length. 21.90 21.90 21.90 29.80 21.40 20.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.18 0.71 -4.58 16 146 2010-08-02 13:29:55 2010-08-02 14:29:55 1 1 31 0 17 100 1 156.00 34 82.20 CHANGED ths+pptsppphGhGhulssPIsNhhlphshhsl-IGaGsaNGlpss.........sFhshlhhul......DhlFhpphhcphs..lssulGhGh....GphhhSshpsst................psssphulGhRlPLhlpaslh.Ksl-IhhKssPuIt.shahsss........h.Gh+aphFuuhhl+h ...h.tKcppsppthGhGhulG.PlANhhlshsasshDlGaGuasGlpss.........sFhshlhhul......DhIFpp.lhcshs..lshulGhGh....GslhhSs.ppp.................o.ttclulshRlPLslpYshh.+slpIhhKhsPolt.shhhsss........h.Gh+aNFhushhl+h.............. 0 5 7 7 +13003 PF13162 DUF3997 Protein of unknown function (DUF3997) Aldam G, Mistry J gba Pfam-B_1597 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. 25.00 25.00 27.20 25.40 24.70 24.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.45 0.71 -4.26 10 103 2010-08-02 13:32:12 2010-08-02 14:32:12 1 1 101 0 15 57 0 109.60 67 75.10 CHANGED pls-tYcLlNsussshtlhs.psslhpopa.h.l...sApVs..-IuaD-paIIAKpp..ph+sDshNscsslhscpsE...YWIIDlK......pschaGPasccpFpcKpcphpIscplphlpsahp ...............................................lNDEYELIRTSGNAFELFPoQDAVYATQY...I...PAKIT..DIAWDDKYIIAKQT..EEKSDPNNPDAAIANKKSE..HYWIIDVK........HNKRFGPYNEKQFpEQKDAFKIKVPFQslDuYI.K... 0 4 12 13 +13004 PF13163 DUF3999 Protein of unknown function (DUF3999) Aldam G, Mistry J gba Pfam-B_2128 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 440 and 470 amino acids in length. There is a single completely conserved residue D that may be functionally important. 29.00 29.00 29.30 29.00 28.40 28.60 hmmbuild -o /dev/null HMM SEED 429 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.35 0.70 -5.53 34 171 2010-08-02 13:36:19 2010-08-02 14:36:19 1 2 165 0 56 185 19 389.10 29 87.18 CHANGED stDaupthsLphsuu.usaYpLsLPtsVYttutpsD.LsDlRVFNusGcslPaALhtspst..s.ss..phpslphFsL.....sssuss.spssspltlptsssGsl....hthsssstssptpt..t.aLlDhoth......cs.slptLhLch.ssstps..clsVcASDDLpcWpsl.ucutlhcLspssppltpccIt.....Lss..hpuRYLRLhW..s..susthpssplpttsssshss.........shpaptsltsppsss.....spahaplstshPlsclclsLsQs..Nslssspl.u.......................Rssspt.......sWpslusuhL.aRLttsu..tc.tsssltls..spsscthRlpssp...uGhGsssPsltsuhpstpLsFlApGssPapLAhGssssts........sslshssLlsshpstp........ls.Aphst.....ssssussshhs..sssps...h++hsLWusLllG..VssLuhhAapL .................................saspphsLphsut.usaYplpLs.sV.htutpss.LtDlRVhsutGpslPaul.s.pst..ttts...p.htlphFsL......sstts.ttsss.hhlph.sssGsl.......tststsuttth......saLlDhSth........ct..slptLhlca......ssshps.tphsl-uSDDLpcWpsl.ucuplhcLshsspplppccls..........................Lsu....tpARYLRLhh.ts.tsuP.tLtsscltsts..sss..ss.............shpWptshtsptsss............schhaph.ss.s.hPl..sclclslsQs...Nsls.spl.tu..............................Rssspt.......sWp.slusshL..YRLtts.s....tphpsstltls....uphs+thRLhlcp.s..sGLus.t..sP.pl...s...l.uh...pstp..LsFl.....ApG.ssPasLuhGssssps........s.slslssLlsshs.pp........ls.Aplus......htsussshh..s..sssps....h++hhLWusLllu...VhlLuhhAhpL....................... 0 10 27 41 +13005 PF13164 DUF4002 Protein of unknown function (DUF4002) Aldam G, Mistry J gba Pfam-B_3350 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes and viruses. Proteins in this family are typically between 112 and 125 amino acids in length. There are two completely conserved C residues that may be functionally important. 21.00 21.00 24.50 23.90 18.20 17.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.51 0.72 -4.00 26 48 2010-08-02 13:38:04 2010-08-02 14:38:04 1 2 18 0 22 50 0 74.50 36 63.43 CHANGED CC......p.tppltaphtpst.......Ctshsupts.....tsC.phsICsDGpsl.hGs.aCGpGsCNlFGCsCcGGCl...pGshhpsFhcpssth ............................CCh.ttpltaphttus.......Cthssupts....shsC.chsICsDGpsl.hGo.YCGpGuCNlFGCpCcGG..Cl...pGsh.psFhchst......................... 0 5 5 15 +13006 PF13165 DUF4001 Protein of unknown function (DUF4001) Aldam G, Mistry J gba Pfam-B_3337 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are at least two pairs of cysteine residues in this short family of proteins. 21.70 21.70 23.10 23.00 20.80 17.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.58 0.72 -4.29 32 277 2010-08-02 14:58:05 2010-08-02 15:58:05 1 2 276 0 55 184 1 43.90 66 92.19 CHANGED KHIKTlsppsLpcohppGG.CGECQTSCQSACKTSCTVuNQsCEp ................KHIKTlNp.psLp..po..lp+GG.CGECQTSCQSACKTSCTVuNQsCEp... 0 36 51 53 +13007 PF13166 AAA_13 AAA domain Bateman A agb Jackhmmer:O25761 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. This family includes the PrrC protein that is thought to be the active component of the anticodon nuclease [1]. 45.20 45.20 45.30 45.40 45.10 45.10 hmmbuild -o /dev/null HMM SEED 712 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.59 0.70 -13.53 0.70 -6.07 43 601 2012-10-05 12:31:09 2010-08-02 18:32:18 1 5 517 0 126 556 33 537.10 15 85.43 CHANGED shusa.s..tp........ssp..lss.......hp+lNlIYGtNGoGKTTlSplhtshp.....s........cap..............ps.ph............phps.............s....p.............s..plh..VaNp-Flcc..Nh....s.....p...sp..lpul.asLGccsh-tpppIpphcpplpphppp..hpphp....pplpp..tpp.........phpphpsphp..cpshcph.pp...hppthp.pshpu..hpt.cpph........tpphlp..c.hps....hsts......shhs.cclpp.p....hptl.hssshpp.lshl.s....hs.h.s.hpt.l..-p....ssllpppllup...s.slscLlppls.s-WVc.............pGhc.ahp.......p..s...ppCsFCpp..lspphtppLppaFc-.sa...ppphpplpphhpph...psp...hpt..hhpp.lpphh.s.h..............t.hthp......hpplpp....ph.pt........lppt....l....pt...p....ppLppKhpcPsp.slp.l.csh.sshl......s............................plpshlsphNphIpcpNc....hspNhppcppphp.pplhtah.h.........t.chp..pslp..thp....cphp...shppslsshppplsptppclpshppc....lpcLcpplsshp.slcpINchLpsa...GhsshpLp.h....ps.c....c.st.....YcIhR.....p..s....s.p.ss...psLSEGE+ThIsFLYFhptlc...sss..sh..t.cspllVIDDPlSSLDsshlahlsuLI+shh......tp.sp.............h+QlFlLTHNlhFa+clp..h......hhp........p..........p..pts..p..p........spaa.h.......................lc..+...sss....tSpl...p.............sh.p.p.....pshpopYphL.appl.......pct..............t.........pps.....ssh.slsNshR+lLEsY..Fph...sph.....p...........c.plh.phhps..........pcp.th...tpu.lhcal.NctSHst.s.s-l.hshst.shp.chhplhcpIFp .........................................................................................................................................................................h..............................h..shhaG.NhtGKoshsphh..................................................................................................................................................th...ha.tphht....h...............t...t................h........h.t...............t.h..t..t...h.......................................................................t.h...................................................................t..........................................t.......t.................h..................................................h.........................t...........................................t......................t.a..ht........................................s.ht.h...........t.t......t.C.hCtp.........................h..ht....h...tt...h...hp...p......t.t...........hp..t.....h....t...................h....................................................t.h...........h.t...........h..t.....h......t...........t..h....t...c............t..t..t.........................t....................................................................................................th....t.h.t.thtt..h.t.pp.......t.t.....h..t....t.....t...h....t........h..............................t............................ht.......t........h....t..t..t....t.h....t.t....p......t.htt.hp..t.t.....h..tth.p.t...ph.............p........h.....t.....t.....hst.lpth......t..th..p..lt..h.......ptp......ptt................ht.l...t............t..s.............t.p.........pplScGEcshluhhaFhtphp..............ptp.sh...........ptthlllDDPlSSLDppphhh...lh.sh...ltphh.................sp...............hp....p....lhlhTHsh..Fh.p.lp..p..hp..........................t..tp...............phh.h..........................................................lp.p......ptp.......p.h..................htt........p.h.p...a.t.h..h...ht.l.hph..............t.........t.t.........h..h.N.hRplhE...h..hth....t.........t.........................h.....t.h.t..................................h..phh.p..S..H..................................................................................................................................................................................................... 0 35 78 107 +13008 PF13167 GTP-bdg_N GTP-binding GTPase N-terminal Coggill P pcc manual Family This is the N-terminal region of GTP-binding HflX-like proteins. The full-length members bind and interact with the 50S ribosome and are GTPases, hydrolysing GTP/GDP/ATP/ADP. This N-terminal region is necessary for stability of the whole protein. 21.90 21.90 21.90 22.30 21.80 21.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.10 0.72 -3.86 6 4196 2010-08-04 10:27:35 2010-08-04 11:27:35 1 5 3967 0 960 2981 1188 93.80 42 21.38 CHANGED DcpFNF-STMEELpuLSpTCQL-VhuQITQNR-pVDcKYYVGKGKl-EIKuFlEh+DI..DVVVsNDELTTAQSKoLN-sLsVKIIDRTQLILEIFA ..............................tpt.......ttsh.pELtsLuco.A.G..s.cl.l.t.s.l.sppRppsc...sphalGpGKl.cEltp.h...........l.......c........t........p...............s..............s....................s.lVlhs.c.c.LoPu.QpRN....LE......ch.h..............p.........s+V..IDRTtLILDIFA............. 0 319 633 820 +13009 PF13168 Poxvirus_B22R_C Poxvirus B22R protein C-terminal Mifsud W pcc Pfam-B_3510 (release 7.3) Family This is the highly conserved C-terminal region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses. 25.00 25.00 211.20 210.90 20.70 20.40 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.08 0.71 -5.04 19 94 2010-08-04 13:07:16 2010-08-04 14:07:16 1 2 34 0 0 88 0 196.20 58 10.66 CHANGED cspslhpsVSs.uLoplGuslusuG........hsuuPplAhAGhulpuIuGLIDhupsIYallSGpp...P.DPllcpFssYupalu.oscuGsRhChMPsS-lhlhlAYRp.....cs...p.uhEKhthaahDslsShlhYLpTStIshshplpVsCP.IGtLR.h-sDIsuYshLhhssc-sV+aYhhstlhshLSpaPsVphTCGp-.sLh ..K-EKIFEAVTh.oLSTIGSTLooAG........hhusP.LMIAGlGIohIoGlIDsuKDIYYLhSGpE..+PVDPVIKhFNTYAuLVSDosKhGVRKCLhPGpDTlIYlAY+N.....Do.SFKpssEthuLYFlDVIsScIhYLNTSNllL-YpLKVACP.IGsLRSlDlDITAYTlL.Y-TsDslK+Y+FlRhusLLSKHPVlRLTCGhssTL... 0 0 0 0 +13010 PF13169 Poxvirus_B22R_N Poxvirus B22R protein N-terminal Mifsud W pcc Pfam-B_3510 (release 7.3) Family This is the highly conserved N-terminal region of poxvirus proteins from eg, Fowlpox virus, Myxoma virus, Lumpy skin disease, Variola virus and other members of the Poxviridae family of double-stranded, no-RNA stage poxviruses. 24.70 24.70 25.10 57.90 23.60 24.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.13 0.72 -3.80 18 90 2010-08-04 13:12:09 2010-08-04 14:12:09 1 3 32 0 0 83 0 90.90 44 5.09 CHANGED pcpChRKhulYHshspshpt.+-phDhpup..ush+YLslscptEppphhpsFNWopIppsl+cpFlppCsssss.......YhYNYolshslolsspssh ...ETChRKoALYHD.phscs..c-NpDssAS..lsYKYLpVVpcRERoRLluoFNWTsIuEuV+N-FI+hC-lsus.......YLYNYTIslShhIcupc..ch.. 0 0 0 0 +13011 PF13170 DUF4003 Protein of unknown function (DUF4003) Bateman A agb Jackhmmer:Q188C9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 327 and 345 amino acids in length. 27.00 27.00 30.40 29.50 22.80 19.10 hmmbuild --amino -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.73 0.70 -5.16 20 197 2010-08-04 15:11:32 2010-08-04 16:11:32 1 1 189 0 22 159 1 290.40 39 89.43 CHANGED csYcplcps.thphssshlphhhAhhashpscphs..hschcclpphIKppouhFSshRuphphsluuhLslcts..psccthpphlplYspLppttFpcsp.ahhLuAhhlhpp..pppshcpplp+spplYcthKccH.FLTus-DhshssL.LAt..sspsl-plh-phEphYphLpc.hsht+uNsLQhLSplLsLtpspsppp.st+shplhptLcccclKlpp.a.hshlGlLullpssc.c.lcslpplh-pLpccctht...hp+chphhlAlsLhhschhsppp........hhcpsLttslphlltspps .............sYtpLKsp.caphpDsRhthhIAthaAuss+lhs..ht+FhEIspaIKpQlGhFShL.......+.uhpRaslAuhLslc.s..sh+cuhcphlclYcpLlpuGFpRoh.aTYLAAhlLLpp......pp-phsppIp+uhplYctMKK-HhFLTuopDhshAVL.LAs..psEsl-pLh-csEhhYpcLAp.tGF+KGNsLQFLSpILoLtps.c-phhlupsssIhp.LKpc.clKlKphH.YPuIGlLALlpDu-.K.lculpslI-cLptc+hFR....app-tslhlAIpLassppu-c.p........tpspGLts.lclLItAQQA...................... 0 9 17 19 +13012 PF13171 DUF4004 Protein of unknown function (DUF4004) Bateman A agb Jackhmmer:Q183T3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 210 amino acids in length. 27.00 27.00 27.00 49.70 21.90 20.70 hmmbuild --amino -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.30 0.71 -4.15 9 172 2010-08-04 15:48:56 2010-08-04 16:48:56 1 1 169 0 26 122 0 196.20 57 94.62 CHANGED EpLISKKELL-hssISYGQLYRWKRKNLIPEEWFIRKSTFTGQETFFPR-KILpRIspIpphK--lSLDELAchFSsp.hp-lplsppcllpcsllScsslchatp.hs.ptpsas.pcllslal...LEcLLpSG.lSl-EuptlstsLcpp.tp.ppKpspLhlhRKLGlshahllussscIhh-sssKVlp+lslsch....hEc ..p.-LISKKDLLELTGISYGQLYRWKRKNLIPE-WFlRKSTFTGQETFFPKEKILERI-KIQoMKEDLSLDELAsMFSPs.lp-lhLT+--lL+KGIsScsVLphFhEpps....css...pFpFs-lLhlYh...LEcL..LQSG-ISLEE.GKhlhpVLc-N.Ypuh.ccKss-Lll..lRKLGloTChLl.uss--llFEcGsKlVlR.slh+h.oE.t............................................................ 0 8 16 20 +13013 PF13172 PepSY_TM_1 PepSY-associated TM helix Coggill P pcc Jackhammer:O25020 Family This family represents a conserved TM helix found in bacteria and archaea. 22.90 3.00 22.90 4.30 22.80 -999999.99 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.31 0.72 -4.10 183 2589 2012-10-01 23:59:14 2010-08-05 10:40:52 1 41 1548 0 728 5088 380 32.10 26 8.70 CHANGED htphhtphHhahuhhshshlllhulTGhhlsapp .............hhhphH.hahulhssshlllhulTGlhhha.............. 0 180 437 605 +13014 PF13173 AAA_14 AAA domain Bateman A agb Jackhmmer:O25880 Family This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.88 0.71 -4.24 476 3483 2012-10-05 12:31:09 2010-08-05 13:21:57 1 24 1307 0 833 5752 2489 125.60 23 29.71 CHANGED sptslllpGsRpsGKool...l.hph.hpphh........pphlal.s.h-c.....ph................ht....ht.....t...h.................phhhp.h....h...............tt.h..lhlDElQps......................s.s.h.phl+tlh-.......pt...t..h....clhlTGSsshhl...p.phs.........splsGRhh............hclhPlSFpEah ...........................................t...hlllpGsRpsGKosl...l....tph...hpphh..................................tph..h..h..l..s...h-s......ph......................................................tp...ht.....t..................h....phhhp.t.......t......................................ttth....lhl...DE....l....Q..ph..................................s..p..h...p.h..l.+thh-..........pt.......p.....h..........ch.h.loG..S...s..s..h...h..h......p.....ph.s.............p...h..s..G.R.hh..........................h.p.lhPhoatEh.............................................................................................................. 0 327 580 714 +13015 PF13174 TPR_6 Tetratricopeptide repeat Bateman A agb Jackhmmer:O25749 Repeat \N 25.70 10.00 25.70 10.00 25.60 9.90 hmmbuild -o /dev/null HMM SEED 33 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.27 0.73 -7.87 0.73 -3.13 402 6355 2012-10-11 20:01:03 2010-08-05 13:32:42 1 1348 2675 10 2337 34545 12501 32.30 19 8.58 CHANGED pAh.hphu.hshh.p.t........t.pts..pAtp..hhppllpp.aPso ........................................................sh.hphu.hhhh.p.t......................................s.php.....pAhp....happllpp.aPp................................... 0 982 1643 2054 +13016 PF13175 AAA_15 AAA ATPase domain Bateman A agb Jackhmmer:O24997 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.61 0.70 -12.52 0.70 -5.43 19 1024 2012-10-05 12:31:09 2010-08-05 14:14:22 1 20 787 0 211 6175 1983 226.90 14 45.90 CHANGED l+plpIpNa+shpsh.............................clshsc..........sls........lllGpNssGKoslLcul.........t.h.s.p.....tp...............................................t.tph....pp....hp...................h.tt.ptlplpthlppssschhus.....h...p..h....ppspsp..hhhh.p.lc.thchhh.sphpchchhpphst..................................................ph..h....t...p.......phph.ph.hhtth.p.....h..pt.h.tt.phhpchhssts..phpphhp.shhcthct................ttcthhpshspplpphhpcths................tshthp.spphts...........................st.hp.phplsps.lphhhp.........psp...p......lslpppupGhp.hhhhslhhsphptps.sp..........IllIDEPEsaLHsshQpphlchLpphsp..........sh..QlI.................lTTHSPall ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h.....p...t......t...u.......G....hp...............h.h...h.............h......h........h..................h..t......t..t.....h..t...t.......................................lhhl-EPEsphHsphQh.....h.h.p.h.l....p.h.ht..................................th.....Qhl.................lo.THSshl............................................................................................................................................................................................................................................................................................................................................................................................. 0 67 124 176 +13017 PF13176 TPR_7 Tetratricopeptide repeat Bateman A agb Jackhmmer:O25374 Repeat \N 25.00 14.00 25.00 14.00 24.90 13.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.64 0.74 -8.06 0.74 -3.73 162 2738 2012-10-11 20:01:03 2010-08-05 14:29:41 1 891 1597 5 1042 30248 7638 35.70 24 5.94 CHANGED sl.ttLuph.app...tGchccAlphacp.......l.tht........ps.tstp ...............h.hsLGpl.app.......pG.ch-cAlchapp...................h.t.t..................ph............................................... 0 404 612 833 +13018 PF13177 DNA_pol3_delta2 DNA polymerase III, delta subunit Bateman A agb Jackhmmer:O25829 Domain DNA polymerase III, delta subunit (EC 2.7.7.7) is required for, along with delta' subunit, the assembly of the processivity factor beta(2) onto primed DNA in the DNA polymerase III holoenzyme-catalysed reaction [1]. The delta subunit is also known as HolA. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.08 0.71 -4.58 82 9575 2012-10-05 12:31:09 2010-08-05 15:02:19 1 36 4883 47 2332 8351 6088 159.20 34 34.48 CHANGED tpsphhphLpptlpps...+luHA..YL.FpG.pGsGphphAhhhupt.lhCpptt.t...............sCtpCpsCp.pltp.........................tsaPDlhhl...........ps.cst.....................................................................................................................................sI+l..-pIR.pLppphshpshc.up.hKlaIIcsA-phstsAuNuLLKhlEEPs.spslhlLlopshspl....LPTIhSRCpllphpshs ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................pt.hhptLtp.u.l..p.p.s......+l.sH.AaLFs.Gs.cGsGKsshAphhA+.sl.Cpps.ss.......................................psC.spCp..s.Cp.....tlpp..............................Gp.asD...l...h.tl...........................cs..sup................................................................................................................................................................................................................................................................................................................................................h..s...l........-......c...l.........R.......c.......l..h....-..p....s.....p..h..s.....P....s......p.....u....p.....aKValI--sc...h...L.....o...p.sAh......NALLK.T.LEE.P.P.sp.s.....h.FlLs..T..s..c..s.p..+l...........sTIhSRCppaphp...s............................ 0 792 1517 1986 +13019 PF13178 DUF4005 Protein of unknown function (DUF4005) Coggill P pcc Pfam-B_2171 (release 24.0) Family This is a C-terminal region of plant IQ-containing putative calmodulin-binding proteins. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.33 0.72 -11.05 0.72 -3.79 69 399 2010-08-05 15:10:13 2010-08-05 16:10:13 1 6 28 0 239 398 0 120.00 20 26.44 CHANGED soPp...............s...t.ttt.t.sss.p..tt.........................hPsYMAsTpSu+AK..lRSQSuP+.QRsps............tppssp............+RtShss.ss.s...............tttts.+Sss ..................................................................................ts....................t..t.......p.....p.ttppp.h..s...........................................sshPsYMAsTpSA+AK..s.R.....u..p..S.u...P+..pRsp.s..........................pppssp........................+RhShs..sssstt...........................tthtt............................................................. 0 31 151 199 +13020 PF13179 DUF4006 Family of unknown function (DUF4006) Coggill P pcc Jackhammer:O24959 Family This is a family of short, approx 65 residue-long, bacterial proteins of unknown function. 20.90 20.90 21.00 21.50 20.20 20.10 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.12 0.72 -4.42 21 189 2010-08-05 15:41:21 2010-08-05 16:41:21 1 1 189 0 21 73 0 65.00 57 90.90 CHANGED M.ps.pRslFuLNGloGhLlAsVLLLuILssLThhuItsQpssAspsYp....lpcssulKMh..uspNsc+hh ...MENsNRsVFuLsGVTGMLIATVLLLuILVsLTlWGlKsQQEVhQpPYo....LKDl....puVKMh..uSccQDHh.s... 1 4 17 21 +13021 PF13180 PDZ_2 PDZ domain Bateman A agb Jackhmmer:O24877 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.68 0.72 -4.09 52 19233 2012-10-02 11:12:46 2010-08-05 17:07:01 1 162 4727 36 5564 18896 7404 86.90 22 21.84 CHANGED uslGlphptpps..................sshl.p........VtpssPutcss.lph..............GDhIhplssptl...psh.......spLh......phlh.ptp.GssVplplhR......sscppslplpl ............................................................................................Gh..h...t.............................................................tGs.h.Vsp.............Vh.s..s....u...P...At.......cA..G...lcs......................................GD..lI..l...p..l....s...Gps..l............psh...................p.clh................p.hl...t.....p......t....p....s..G..p...p...l.p.l..p...l..h..R.......s..G...p.p.hphpl........................................................ 0 1926 3646 4675 +13022 PF13181 TPR_8 Tetratricopeptide repeat Bateman A agb Jackhmmer:O25864 Repeat \N 25.00 10.00 25.00 10.00 24.90 9.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.62 0.74 -7.70 0.74 -3.77 158 8299 2012-10-11 20:01:03 2010-08-05 17:20:12 1 2662 2423 16 3530 83745 27261 32.40 18 6.01 CHANGED h.....ps.....hh.thGpl.ahp.hsph..cp...Ahphhpculplss..s. ........................................t...hh.tlup.h..Yhp..hs.ch........cp..............Ahphhpcshph.....t................... 0 1467 2293 2991 +13023 PF13182 DUF4007 Protein of unknown function (DUF4007) Bateman A agb Jackhmmer:O26600 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 284 and 326 amino acids in length. This domain is found associated with Pfam:PF01507 in some proteins, suggesting a functional link. 27.00 27.00 27.90 27.70 25.30 24.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.58 0.70 -5.58 34 141 2010-08-06 09:59:40 2010-08-06 10:59:40 1 6 122 0 41 146 81 259.60 24 81.04 CHANGED pFuuHE...........TFslRhGWLpKuhctlp.ps....sssFh..ccsuhsphGVGpNMVculRYWhpAsplhc-s.tt.........phTphGchlh.......shDPYlE-.uoLWLlHatLsss....pstAssWYhhFN.hshsc..Fo+s-lhphlpchhsp......tpphsppolpcDlssllpsYs...............ptsphssE-.hssPhsELuLlphs..ttpt.taphshss+ssLsstllhYAllcah...s..pstpolulscLhtps....sSPG+lFpLscpslschLcplpph..stlplscTAGLcplhhpp.......phtshchLcpaY .............................FutHETF.hRtt....WLpKuhpth..tt..........phF....tppuhs.phGVG+NMVpu.l+aWhtAssllcpstt..........phTth.Gphlh..........................shD.ahEp.solWLlHah...Lsst............tptsssWahhFN.hp..t...p..p....Fscpplhphlpphhtp.........tpt...hupsolpc.DlsshlpsYs.................ttts.E-.hpssls-Ls..Llpth..........tpt.haphtpstp.slsstlhha..ullchh...t........tstpolshpplh.t......suPGclFpLscpslhchLpplpph.....shlp.hscouGlcpl.h.t...........thlt.................................. 0 18 36 39 +13024 PF13183 Fer4_8 4Fe-4S dicluster domain Bateman A agb Jackhmmer:O27906 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.92 0.72 -3.94 344 10837 2012-10-03 08:56:43 2010-08-06 11:04:20 1 158 3616 30 2967 16116 5623 77.90 24 15.69 CHANGED p.hp...pChpC.u..tCt........ss.....C...........................P...................................................hhht.................................ht....h....tttth..t...................................................................t..h.........................................................htC...stCtt.....Cs..phCP.t.....sl ....................................................................t.htpClpC.G...hCh.............ss........C....................P................................................shtt...........................................................................................................................h.s......p.h...ht..ss.pstsht......................................................................t....htt.............................................................pthhsC...ssCts............Cs.psCPhpl................................................................ 0 989 2028 2572 +13025 PF13184 KH_5 NusA-like KH domain Bateman A agb Jackhmmer:O27285 Domain \N 27.00 27.00 27.00 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.93 0.72 -4.17 54 4542 2012-10-02 00:34:43 2010-08-06 11:35:28 1 31 4481 7 1040 3009 2319 69.80 50 16.41 CHANGED sGpRsKlAVtops.....plDslGuslGhpGsRlpslspELp..............sEKIDllpascD.......ppaltsALsPAcVhsVpl .....................sGsRuKIAVpo..p-.....ppl...DPVGACVGh+GuRVpuVssELt..............G.E+IDIl......W.....s-.-......PApFlhNA.Lu...PAcVssl..l.............. 0 356 687 885 +13026 PF13185 GAF_2 GAF domain Bateman A agb Jackhmmer:O27394 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -11.13 0.71 -3.71 408 8344 2012-10-02 14:34:25 2010-08-06 13:27:49 1 1430 3034 23 3170 13907 1709 140.50 14 25.15 CHANGED sts...h.pclhpt...h....l.p..tl.hp.......hh..sphuh.lh.......lh..cppp...................thh.hs......htsht..........tht..ht.t.....................................hpptcshhhs......................tt......t.h.htt..htshl..........sl..Plh.tps...p.lhGll.s....lts....tt...................tasp.p..chphlptlupthuhulp ............................................................................................................................................t......hht......h...h...p.....h..hp......hhs..hphuh..lh.............................hh........-tpt...................thhhhs........................h.t.uh................t...........h...t...........................................................................................hpppcshhls........................................................tt.h...........t.phspptsh..p..uhl................sl....Pl.........h..t........p.....s.......p.llGlL.s...........l.su.........tph...........................ttas.p..p....-..hphLptlut.hu.sh................................................. 0 1082 2191 2819 +13027 PF13186 SPASM DUF4008; Iron-sulfur cluster-binding domain Bateman A agb Jackhmmer:O26216 Domain This domain occurs as an additional C-terminal iron-sulfur cluster binding domain in many radical SAM domain, Pfam:PF04055 proteins. The domain occurs in a number of proteins that modify a protein to become an active enzyme, or a peptide to become a ribosomal natural product. The domain is named SPASM because it occurs in the maturases of Subilitosin, PQQ, Anaerobic Sulfatases, and Mycofactocin. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.70 0.72 -3.63 441 4320 2010-08-06 12:57:53 2010-08-06 13:57:53 1 60 2271 0 1134 3391 1345 67.60 24 17.48 CHANGED C.........h....hpphs....lp.........ssGsVhs....C....................s..hhtph........tlGN.l......p......p....s.lt-.......l....a.pu......pphp...ph.....+pthhpt...............................hC........p..p.............Cs .....................................stpt.hs..lp.......ss.Gcl.hs....C..................pth.....shsph........plGN.l....pp.........p....s.lpc..........................l............h...pS......tt.hp.ph.................ppt..phtp..............................t.C...p..p..C.......................................................................... 0 484 821 978 +13028 PF13187 Fer4_9 4Fe-4S dicluster domain Bateman A agb Jackhmmer:O27418 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.23 0.72 -3.70 188 5796 2012-10-03 08:56:43 2010-08-06 14:12:30 1 300 2244 20 1131 20878 5640 56.40 33 18.14 CHANGED pChs.C......t....t.....C..hps.C.........Ptthh.hhtthtthhhh......................................................C.ht..Cst.........................Ch.psCPp.ssl .............................................................ClG...C.....t.....h.....C..hsu..C............................P.h.s..lh.....ph...s...s...t.s...s.t...h.t...............................................................................................................................cC.sh.......Csp..........................................Cs....psCPs.sA....................................... 0 435 821 1007 +13029 PF13188 PAS_8 PAS domain Bateman A agb Jackhmmer:O26558 Domain \N 21.00 11.00 21.00 11.00 20.90 10.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.55 0.73 -9.73 0.73 -3.93 488 8124 2012-10-04 01:10:46 2010-08-06 15:34:09 1 1381 2453 5 2746 38264 3152 67.80 15 10.70 CHANGED pphps....l...hc.....shst.ulh...h............h..c......t......................lhh......sNpshtchh........G.......ht...............hhsp.h....t...........thhsthptttt..thtp....hh.........ptht ........................................hps....l.........hc.......shsp.ull.....l...............................l..D......tts...................p.................................lhh.............sNp.sst.c...hh...............G.......hs................hhsp.h..t.........thh.........................tthtthttt...................................................................... 2 951 1890 2360 +13030 PF13189 Cytidylate_kin2 Cytidylate kinase-like family Bateman A agb Jackhmmer:O26138 Domain This family includes enzymes related to cytidylate kinase. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.85 0.71 -4.37 174 1110 2012-10-05 12:31:09 2010-08-06 16:01:58 1 17 600 2 300 3886 1461 168.30 26 77.44 CHANGED lITIuRpaGSGG+pluctLAccL.Glsh....YD+.-llphsAcc.tGlscphh.pph-Epss.........psh.h.thhht...................................shshs-plaphpp...c......lIpcl.A.p.ct.ssVIlGRCAcalL+-..ps.ss...lplFlt.Ash-tRl...............cR.lhcp....h.sls.c.c.cApchlccpD+pRppa.apaYT..spcW.....GcspsYDLslsoup ......................................lITIuRphGSGGppluctLAccL.Glth...........Y..D......c....c...l....l....p.........h.....A.....c....c.....p.....G.l......s......t......p....h.....h.....pph.sEpts..............tsh....h....hh..........................................................ph.hscpl.a..ph..pp....c......hI....p........c...l...A.....p...c.....t...s...s....V..l......l.G.R.s.A.s......a.l..Lcc........hs...ps........l..p....l..a.l..h..A.s...h-hRl........................cR.....lhcp.............t...s..hs...t....c...cA.....pc..hl....p..c..p..DcpRtpa...Yphas..sp.c.W.....sc.s.psYDLslsou................................................................................................................................................. 0 141 243 275 +13031 PF13190 PDGLE PDGLE domain Bateman A agb Jackhmmer:O27741 Domain This short presumed domain is usually found on its own. However, it is also found associated with Pfam:PF01891 suggesting it may have a role in cobalt uptake. The domain is named after a short motif found within many members of the family. 27.00 27.00 27.30 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.17 0.72 -4.30 127 379 2010-08-06 15:35:55 2010-08-06 16:35:55 1 2 367 0 183 378 172 89.10 25 37.34 CHANGED llsulllushluhhAS............usPDGLE.shtth..........pstp-hsh.pps.....t.........u.hsDYul.s....uhsp.........................................................hGssluGllGshlslsluhuluhl ......................h.lhulllusslu.hhAS............ssPDGL..E.shpph.....................tstp.ch...sh.ttt.....t.ht...................u.hsDYul..s......Ghss...................................................lGshluGllGshlslslshslsh.h.... 0 65 130 163 +13032 PF13191 AAA_16 AAA ATPase domain Bateman A agb Jackhmmer:O27636 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.44 0.71 -4.30 217 4712 2012-10-05 12:31:09 2010-08-06 16:51:29 1 525 1445 1 2447 16457 2610 171.70 16 17.45 CHANGED plhGR-ppl.....ppLtshl..t..thtpups..............thllltG.sGsGKosl......lp........p..lhpthtpp...................thh.hhpspttph.......................shtsh.tphhpplhpp...hhst...........................................................................hh....................................sshsshsssptp......................h....phlp..........ph.l.............................tthspt........pp.......................................lllhlD-lp..............hh-.....pss.........hph.lpt..L.....hpt.........hpt.....................slhll ........................................................hhGRptpl...ttl.t.phl..........t....tht.ts.ps..............................p.s...l.l...l.t...G....sGsGKosL...............lp..............................p...l...hpp..htpp.........................thh...h..hp..s.p.sspt.....................................................sh.ts..h.....tph.l........p..p...l...h..pt...............hht................................................................................................................................................................................................................................................................................................tt.hs..s.t.tht.......................................................................h................thht.........................ph...l................................................................................t.t.httt....................tt.............................................................................................................................................................................................................................................lll...h....lD....-....hp....................hhc..........tts.................hph..lt.....l...hp.............................h........................................................................................................................................................................................................................................................................................................................................................................................ 0 925 1663 2175 +13034 PF13192 Thioredoxin_3 Thioredoxin domain Bateman A agb Jackhmmer:O26981 Domain \N 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.70 0.72 -4.16 112 2658 2012-10-03 14:45:55 2010-08-09 14:50:14 1 25 2065 41 788 3102 540 77.70 30 21.59 CHANGED hcI+l..l..GsG.C.spCp...pltphscpulpch.u.lsu....plp+....l.pDhpcI..hpaGVhsTPul.lls....sclhhsGp.lPstcc.lpplLp ............................h...cs.hh...Sho..C.psCP.......sVpsh.s...h.h.u...l...h...ss....s...lpt...........shl-........s....sh..h...p-l.....pp..h.....s.lMuVPul..hl....s........G...c.........h...s..p..G+....hsh.tc..ltp................................. 0 267 506 666 +13035 PF13193 AMP-binding_C DUF4009; AMP-binding enzyme C-terminal domain Mistry J, Eberhardt R jm14 Jackhmmer:O26318 Domain This is a small domain that is found C terminal to Pfam:PF00501. It has a central beta sheet core that is flanked by alpha helices. 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.82 0.72 -3.03 574 45096 2012-10-03 01:00:17 2010-08-10 11:53:00 1 1760 5076 96 14992 41390 10507 76.60 27 9.63 CHANGED ElEssL.ssH......P....s.V..sEu...AV..lG...hs.Dp...........hpG.ptl.hAaV..lh.........................................t...pc....lpph.......l.....pp......p.lu..........shthP.p...lhhl...s...LP+TpoGK ..................................................ElEssL..hp.a............P......s..V.....t-u.......s.V................lu............hs..ct...................................hhG..ctl...hAal..lhpt...........................................................tshpt......pp........lppa..........l........pp............p..Ls......................sat..hP.........hta...l.......s.........LPhTssGK............................................................................... 0 3967 8666 12279 +13036 PF13194 DUF4010 Domain of unknown function (DUF4010) Mistry J jm14 Jackhmmer:O27500 Domain This is a family of putative membrane proteins found in archaea and bacteria.\ It is sometimes found C terminal to Pfam:PF02308. 30.00 30.00 33.10 33.10 29.80 29.50 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.37 0.70 -5.03 138 426 2010-08-10 12:49:28 2010-08-10 13:49:28 1 3 381 0 187 409 49 207.30 28 50.08 CHANGED VlluuloasGYluhRhhGs+tGlhloGlhGGlsSSTAsThshucpu+p.pss......hsth..hsuulllAsssMhlRlllls.sllss.s....lhh.tLh..hPhhs..hslsshssuhhh..h+p.......tttttt.ssshs....pNP...hpLpsALtFuhlhsllhlhsphhpphhG.ssGlhhluhluGlsDlDAhslSlsph.hsss.plshssAshulhlAshuNslsKssluhhhGs .....VlluuluahuYlshRhhGscp..GlhloulhGGlsSSTAsshshucpu+p..pss..........sth.hsuuhllAss.sMhlRhl...lll.sllss..s........lhh....tlh...hshhs..hslssh.shuhhh....h+p...........ttttts...stphp.....pNP.hpLpsALhFuslhsllhll....sphspphhG.suGlhhluhluGlsDlcAhslolsph.tsss..plssssushulhlAhhuNslsKsshuhhhG..................... 0 54 131 166 +13037 PF13195 DUF4011 Protein of unknown function (DUF4011) Mistry J jm14 Jackhmmer:O26588 Family This family of proteins is found in archaea and bacteria. Many members are annotated as being putative DNA helicase-related proteins. 22.20 22.20 22.70 22.20 21.70 21.70 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.10 0.71 -4.25 100 425 2010-08-10 13:28:15 2010-08-10 14:28:15 1 28 396 0 147 436 75 172.30 23 10.16 CHANGED cLLDLohR.NpLLNhp...tspp.slplhsss.....ssplt-h..L.sp......upshphhs......hsp.t.....................................ts.p...stthhptpshp..........chttph....htpth......................pcpplh....................................................stlstppL...pp+..LhpLappA+ohh-EsGsNsLYLAlGhLcWhc.spps....cp.hhAPLlLlPVpLpRp...sspp.....sapl..phps--.hthNhoLhphL ......................................................................................................................pLLDhsh.R.Npllsh........stp..sl.lhs.t..........s.l.ph..l.tp......tpthphhs....h.t...................................................................................................t.........t.p.....................p...t.......tth...................................................................tpppl.................................................................................s.h.st.ppL.....ppp...LtpLhcpu+shhcEsGsssLaLAlGhLcWhc...spss......scshhAPLlLlPV.pLp+p..ssts.....tapl...phc.p--..sthN.oLhphL........ 0 62 95 120 +13038 PF13196 DUF4012 Protein of unknown function (DUF4012) Mistry J jm14 Jackhmmer:O26332 Family This is a family of uncharacterised proteins found in archaea and bacteria. 28.00 28.00 33.00 31.40 26.20 26.20 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.70 0.71 -4.04 44 176 2010-08-12 09:26:21 2010-08-12 10:26:21 1 3 119 0 73 178 61 143.00 31 24.84 CHANGED cGsRsYLlhsQssAEhRusGGlsGuhullps-sGplslsc.h..tsssch....htpssss.hss-pts..has.....t.shhhpDsshsPDFspuAphhpphWppps...Gt.slDGVlulDPVsLshlLpss.GPVsl..s.DGp..........slsusNsschlhsssY ....sGsRsYllhsQssuEhRuoGGllGuhuhlss-sGplslsc.h..tsssch.......hspsshs....hssc..ptp...lau..............hthsh.hpssshsPDFspsAphhpshWptps...sp.slDGVlulDPVsLptlLtss.GsVsl..s..DGp............slsusNssphhhsssY........................ 0 22 51 61 +13039 PF13197 DUF4013 Protein of unknown function (DUF4013) Mistry J jm14 Jackhmmer:O27260 Family This is a family of uncharacterised proteins that is found in archaea and bacteria. 26.90 26.90 27.10 27.30 26.10 26.80 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.40 0.71 -4.92 99 205 2010-08-12 09:39:34 2010-08-12 10:39:34 1 1 98 0 137 208 21 168.40 18 63.89 CHANGED lllssll....hlsllshhllhGYhlclh+ps....hst.pshPc...a.s..sasphhhcGlhh....hllsllahllshllhhlhhhhshh...........................s..hh.........slhshhhsh....lhhllsllhuhh.hsh....uhspautps.phssuFphsplhphlp...hhhhta......lhshll.hhllshlhshlsslhhhl ......................h.hlhhll....hlsllshhllhGYhhclh+ts....hs..sthPc...a.p...sasphhhcGlph....hllsllYhllshllhhlhshhhhh.................................s..hh................hl...hsh..hhhl......lsh.llsllhshh.hsh......uhsphutps.phtuuFphpplhphlp.t..hshhpa.......llshll.hhllshlhshlhhhh...h.............................. 0 37 99 126 +13040 PF13198 DUF4014 Protein of unknown function (DUF4014) Mistry J jm14 Pfam-B_4873 (release 24.0) Family This is a bacterial and viral family of uncharacterised proteins. 23.00 23.00 23.70 23.50 22.20 20.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.41 0.72 -3.57 5 132 2010-08-12 10:49:27 2010-08-12 11:49:27 1 1 102 0 1 55 0 70.70 74 91.89 CHANGED Ms+ha+K+YPRKSRsTEFLFLILFIVLM..lPISPLIllWlIG+..........................IlEhVIELYsDVVWuSFNsLHNKINPY.......KEN .....MsphhcKpYPRKSRsTEFLFhILFIVLM..IPISPLlhVWhIGK............................IIEPVIELYsDVVWASFNTLHNKINPY.......KEN.................... 0 0 0 1 +13041 PF13199 Glyco_hydro_66 Glycosyl hydrolase family 66 Bateman A agb Pfam-B_3959 (Release 24.0) Domain This family is a set of glycosyl hydrolase enzymes including cycloisomaltooligosaccharide glucanotransferase (EC:2.4.1.-) and dextranase (EC:3.2.1.11) activities. 23.70 23.70 23.80 23.70 23.60 23.60 hmmbuild -o /dev/null --hand HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -12.87 0.70 -6.11 25 143 2012-10-03 05:44:19 2010-08-16 13:58:58 1 19 112 3 25 168 3 475.00 26 70.15 CHANGED sTDKApYpPG-.sVphslshpss.......ussplchpHLs.psltpp.phshtht.spshs.....WpsPssDF...pGYhVclhs.ppssstlspsshAlDVSSDWs+FPRYGalus..Fsss...........s.pphpppl-pLsc.aHINulQFYDWta+HcpPlstss..tp..psWsDhu......s.RplttpsVKshIstsHphGhtAMhYNhlYGuhpshtp............sGlp.-....Wtlaccsspt......p.-pasL.....sp.h.ss..lhlhs.PsNssWQsYlhsptscshcphsFDGaHlDQlG.sRusl.a.......shsGpsl..........p..................LsssausFlpshKptl.sstp.........LVhNsVssautpplus.ushDalYsElW............tspssYssL+phlcp.....scph.sps.....upssVlAAYMsa...........................................................stus.ssGt.................................................................FsTsuVLLsDAsIFAuGGsHlEL......G-........phLspEYFPspsLphs.ccLpcphhsYYDFlTAYENLLR.....DGtsp....sssshsssstph.Shs....................sptspVWshuK.cssstc..llHLlNhhGss..shsW+c........ssuspspPpphpslplpl........ptss.plppVahASPDh.tsGsspcLsFpp....sGs.V..pholPpLpYWsMlhl ................................................................................ssKuhYtsup.pV.hphp.t..............hthphphhhht..phltp...ht.....ttph...........hp.Ptpph...pGYhspl.h..ttspth......tshulsVssshtpFPRYGalus..a.pp................tp.tttht.hpp.hplNsh.FYDhh.ctp............Phsts................................tpa.sh.......t.ppl.hpsl+phlpth+phGhhuhhY.shhh.us.ps.h.p.............tsst.p....hhlapsssp...............t..thsl.....st..h..s.....hh...hhs.Psstp.WQpYlhpp.ppshpphuFDGaphDplG...ppssh..h.......shs..up.h.............................lppsassFlpshKpth..sh..........lshNsVst.s.pplst.....up.DhhYsElW.............t.pstatsLpthltp....stth..............thshlhuAYMphtttt...............................................................................................................................ttt.hppsuhlLssAshhA.GG.Hhpl............Gp.........thLtptYaPsps.hphs.pphppt.hhpY.pFlsuYpNlLp.....Dsthp....t.s.phshsshth...uhs...................sptsplhshsK..ps......sshp......hlphlNh.shs....s.sW+s................us...pPth.pphtlph.........htt...spplahsSPD..t........tut..plthp......pst.l...hslPpLphWsMlh........................................................................................................................................ 0 12 19 24 +13042 PF13200 DUF4015 Putative glycosyl hydrolase domain Bateman A agb Pfam-B_597 (Release 24.0) Domain This domain is related to other known glycosyl hydrolases suggesting this domain is also involved in carbohydrate break down. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.04 0.70 -5.52 62 311 2012-10-03 05:44:19 2010-08-16 14:08:57 1 12 292 0 87 459 246 298.50 31 62.60 CHANGED GlYlouhsuus.pt.h-cllchlcpTcLNuhVIDlK....-DpGplsapp.psstspthssspsh....hDhpthlccL+ccsIYsIARlVsFK.DshLAct.....+P-huh.ppssGslWpstpu.....u.........alNPapc-VW-YNlslAccAAchGFDEIQFDYlRFPs.....pt.tcpLpastsshpp.......................................................................+ssAIssFlthA+-cL.pshsV.lSsDlFGhssts....cssuIGQshppluppVDhISPMlYPSHassG................aGlc.pPDtcPYclltpuhpptpptlsth........tshhRPWlQsFTssaltt..hhpYGspplcsQIcAltDs..GhspalLWNAuNpY ...................................................................................................ulYlouhshss..p.h.hcphlchhcpotlNuhVIDlK....sDpG.lsa.s.pst....h..s...p.t..h.....s..s....spsh..............hD...h.tt.l...lcph+cpsIYsIARl....V...s.F.......K......D.sh.Lu..pt................pP..-.hul.....pptsG..p.h....Wp..s..tps..........s.....................W.lsPapc.csWcYN.l....s.lA.c.E....A.A.c.h..GFDEIQFD.YlRFPs..........tt.sppl.pa.s...tsptp...p...................................................................................+ssAIssFLphA+.cc.L..t...h..s..l..lSsDl.F.Ghsshs....sstu...IG........Qphpt..l..u..p.....VDhISPMlYPSHas.uh...............huhp..PstcP...Yphlhpuhtcstphh..tt..................tshhRPWlQsFpsshhtt...h.Ys.pplctQlcAhp-t..shstahLWNusNpY..................................................................................... 0 43 66 74 +13043 PF13201 Xylanase Putative glycoside hydrolase xylanase Coggill P pcc JCSG_target_393205 Family This is a family of putative bacterial xylanases. Comparative structural data from TOPSAN indicates there to be a C-terminal carbohydrate binding domain similar to those of carbohydrate enzymes such as glucanase and xylanase. There is also structural similarity of the N-terminal domain, according to TOPSAN, to endo-1,4-beta-xylanase (from Streptomyces sviceus) and beta-xylosidase (from Magnetospirillum magnetotacticum MS-1). The N-terminal domain fold is an immunoglobulin-like beta-sandwich. 20.90 20.90 20.90 20.90 20.60 20.80 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.04 0.70 -5.62 30 518 2012-10-03 05:44:19 2010-08-16 14:49:09 1 35 171 3 105 458 18 216.30 18 43.09 CHANGED oSCI+-EA.NAEADIp....u..s......psssshLhp...........ps.hssspIplhV..pp.ssDloplAPpFsLTsGATI........sPs...u......G......os.p......D......Fos...Ppp..............YTVTSEDGpWc+sYpVsh....hsspl....s.o..pacFEslpht.........p.t.cYplaY.-h.psuspp.....hp..WASGNsGFplouhu.p.ssp-YPTsQsss.G.hpGKClK.LsT+STGshGuh...l..tMP.IAAGNLFIGoF-ls.sAlsssLKAT+FGhPFp+..cPhpLsGYYKYKAGcpap....-.s.Gp..l....ss+KDpssIYAVhYEsscsst..........h.LDGs....Ns..hT..usp..lVulAcl..s..-s+E.........T..DpWTcFslPF.h.p.GKsIDts+LppGcYsLAIVFSSStcGupFcGAlGSTLaIDEVcLI .........................................................................................................................................................................................................................................................t....t..t..p..l.h.l.............ts...s...l.pt.h....s.h.t..hs.......s..Aol............t.Pt..s......s......s.........................sasp.....t..............apVsuts.....t.th...p...Yp...lph................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 40 93 105 +13044 PF13202 EF-hand_5 EF_hand_3; EF hand Bateman A agb Jackhmmer:O26668 Domain \N 25.30 11.50 25.30 11.50 25.20 11.40 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.82 0.73 -7.13 0.73 -4.38 149 5306 2012-10-02 16:17:27 2010-08-17 09:27:41 1 515 903 68 2797 37229 2719 23.80 30 8.51 CHANGED lp.s..hFp.thDhstDGpIshpEh...pp..lh ...........h....hFp.thDt.s.tD..Gp.lohcEh.........h............. 0 1041 1668 2149 +13045 PF13203 DUF2201_N Putative metallopeptidase domain COGs, Finn RD, Sammut SJ, Bateman A agb COGs (COG4900) Domain This domain, found in various hypothetical bacterial proteins, has no known function. However, it is related to Pfam:PF01435. 23.20 23.20 23.30 23.30 23.10 23.10 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.95 0.70 -5.24 16 492 2012-10-03 04:41:15 2010-08-17 13:36:00 1 4 403 0 173 512 550 212.40 15 53.68 CHANGED phppRhstAlt+hlEstPhhuuLsLhhphp-..ucsh......................tsstTDGc.plaYsPsF....-sLslsppVGllAHEVhHlALcHstRttsht......hD.plaNhAuDuhINssLlpsGa.tLPtsslh.pp.............csshtsasAEclYptl.........................tt.ttttuttptt..sputppstpppscu............t.hthssGtps-hsstsputussEttstput..cWpppltcthptG+usGph.utLtchls-hspsthP.WcplLRshls+slpc.s...chSWpRPsRRalu...........pssa.Pu.hp ......................................................ht.........................hhs..h..h.th.........s.th.........................s.h....ts....c...uh....hlhhsPthh......th...s........p...p...h..t...t...l...l....hH-lhHhhhtH.h..hRt.tt....................................hst.hhshAsDhs...lN...p.h....l.......p....t.h...........h..Ptshh..........................................................th.sEt....h.h.t.l...............................................................................................................................................................................................................................................................................................................................................................................................................t.............................................................................................................................................. 0 70 126 158 +13046 PF13204 DUF4038 Hydrolase_6; Protein of unknown function (DUF4038) Coggill P pcc Jackhammer Family A family of putative cellulases. 22.60 21.70 22.60 22.00 22.10 21.50 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.75 0.70 -5.08 28 345 2012-10-03 05:44:19 2010-08-17 13:38:46 1 8 222 4 120 308 23 264.50 24 58.13 CHANGED spRaLhppsGpPFFaLuDTuWthhp+lsp..--hchYLcpR+ppGFNVIQhssLsph-uh....sh.sphsh..tpp.th.....thN..........sYa-HhDthlchAspcGlhlulVslWssts..st.h..........sslhsh-pAcsYu+alspRYcch.NlIWllGGDs............csspthchWpulupslpptsst.......LhTaHPpGp...pssschFcsp.WLDFpMaQSGHpp..psp...............s.hphl..phshpppPsKPllDuEPsYEshshshps........passhDVR+tuYhulhuGu.sGhTYGspuIWQah ...............................................................................t..+hht..tsGpPahhhu.-TsWthh.....p.phsp.............p-...h....phY.L.psppcpG.FNslphslls......p...h.s...h......................sh.s.....h...sh....t..................................hshtthN.........................................sYFc+h-hhlptstphGlh..s.s.l..l.h..a..s.s.h....t...tWh.....................shhs..-p...sptYscalspRatsh.Nl.l..Wh.luGDh.............................sptshphap.thsp..slp.........ph...sstp..............LhohHspsp.........ts.sph.......h...........psp..shlshshhQo.uHtt...tt..................s.hth.l.....t....p.ptt...hKPVlssEs.tYEs.h...........hshts..............hss.-lR+t.sa.ulhuGu...s..GhsYGtpslhph.......................................................................... 0 57 94 107 +13047 PF13205 Big_5 Bacterial Ig-like domain Bateman A agb Jackhmmer:O26670 Domain \N 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.86 0.72 -3.44 269 1557 2012-10-03 16:25:20 2010-08-17 15:32:03 1 286 641 0 798 1779 1039 104.70 19 16.89 CHANGED DsssP.sls..us.sPsss....us...sl..ss..ss..............slslsFsE.sl....pssshstshhh..............ssstsssls....s.s...................hsu.psl..slsPs..............ss.Lsss.TsYslsl.....ssu......lp.....Dhs...G..s..........h...............shs...a....sFoT ..............................................................................................P..tlh....sh.sPsss......us......sV.....ss..st...................sls...ls.Fsc..sl.........shssspts.l..hh.....................................ssssshslshp.........................................hsu..ppl..sls.Ps..............ss.Lpss.....o.....s...Yslsl.........ssu.....lp......Dhs..G.....s..................................shs...hsFoT.................................................... 0 256 541 758 +13048 PF13206 VSG_B Trypanosomal VSG domain Bateman A, Jackson A agb Jackhmmer Domain This family represents the B-type variant surface glycoproteins from trypanosomal parasites. This family is related to Pfam:PF00913. 25.00 25.00 25.00 25.00 24.70 24.80 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.42 0.70 -5.32 132 958 2010-08-18 11:04:25 2010-08-18 12:04:25 1 4 12 0 708 958 0 291.60 15 75.39 CHANGED hhlhlhhhs................ssusssssp......................Ntt-FpsLCp...llp.....luptt.....hp.st.sp...psssthpplt....tlNhohusss....ahp.h.ptttttp.ttt..tt.t..................ttWppsttthpp.ttp..........................................................t................thtphshpthsstttpthp................pltppAtplhpphp..ptttpttsss......thppt....lpp...AlYGsssssssst.............hssssspsss............C...........stsssssu.polssslhCLCssss...........................................................ssssphCspt............sssssshtssss...sspssap.pltstC...sttsssplT....ssplpsslt.h..........sthppttsssssphhLGp..........tssssCsG.s.....ssuhC.VpYsshhsst.....shsslsWlppLppAsppLpptppsptptpp.........htp.plptl .....................................................................................hh.....................................................................................................................................................................................................................................................................................................................................h...h..h.tt.t...........t...tt.......hcpt...............htp.....sla.Gptssts..p...................................................tththsss....+sts......................C..............tsttsp...t....s..G...colssDhhCLCssss...................................................................................tpstpphCstt...................tsttt.s.h....t.s.s.t.t...................t.h.......pp.....s.....Wp..p.........lp.ptC....pttpp..t..h.p.................tplpphl.tph.........................tthhpt..tp.p.p.t.p.........t.....t.......h.h..LGt..................tptts.....CsGpp..................ssshC.V.Yttp...t.....................ttttlsWhpcl.ppuh.p..p.hpp.hpp..ttp..pt...................................................................................................... 0 622 708 708 +13049 PF13207 AAA_17 AAA domain Bateman A agb Jackhmmer:O26135 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.06 0.71 -3.29 412 3187 2012-10-05 12:31:09 2010-08-19 09:37:34 1 91 2065 29 1073 22418 10222 153.40 15 55.53 CHANGED hlhls.G.ssGuGKoTluptL.....uc.p...............h..........s........h......h...l..phss.....................ht..shh.tht..pt....................t..p.....ph.......lpph.......................hhhcs.........................................................h.cht..tss......phlhhs...................hsh...........sttt......hhp..c.............................................cshsp .........hlhlh.G.ssGoGKo....TlA....ppL....................up..p..............................................h..................................s..................................hs....................h.............l......chDs....l.h................................................ht...sht.....tht.....pt.........................................................................t..h.p.......................ph..............lpt................................................................hll-G..........................................................................................................ht.............................hhhh.............................h..................................................................................................ttthtt.h...................................................................................................................................................................................................................................................................................................................................................................................... 0 356 664 893 +13050 PF13208 TerB-N DUF4016; TerB-N Anantharaman V, Aldam G, Mistry J gba Anantharaman V Family The TerB-N domain is found N terminus to TerB, and TerB-C containing proteins [1]. It has a predominantly alpha-helical structure and contains an absolutely conserved glutamate [1]. The presence of a conserved acidic residue suggests that it might chelate metal like TerB [1]. These proteins occur in an two-gene operon containing an AAA+ ATPase and SF-II DNA helicase suggesting a role in stress stress response or phage defense [1]. 25.00 25.00 38.00 38.10 24.80 23.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.56 0.70 -4.89 152 214 2012-10-09 17:40:07 2010-08-19 12:01:53 1 12 206 0 57 217 8 206.60 24 30.87 CHANGED Psc.hhphtshth..s.....................hh......u....................ht...sp.....-tsalptuh.h.As....sshsht.hshh.sYaPoYpslospp++uYhsWhupsRps.........ssssluYlFlahYtL.pplhl-ss......pcsht.tlhp.hpch............lshY...........hppah.pshl...hhhsl....s..........................hpph..........hhh.p...s...............................................pp.....hutc.shthhhsLuphhhc..pssl.........................schshhhst.sh.pltt....+ps ...............................tphhthtthhh.s.......hlu...............ht..sp...-tsFlp.uh.lAs.....sphsht.hshh.sYa.PoYsslospp++uYhpWhupsRps.........ps.sluYlFlahYtL.pplhl-ss......pcphstlhp.hp+h............lshY......hppahpshl...hhhsh.st..............hpch.........hhhp.s............................................pp..tutc.shthhhsLuphhhcppsl..........................schshshshsh.tlt..................................... 0 16 35 45 +13051 PF13209 DUF4017 Protein of unknown function (DUF4017) Aldam G, Mistry J gba Pfam-B_2009 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 21.10 21.10 21.30 91.40 20.20 20.50 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.05 0.72 -4.18 15 75 2010-08-19 11:03:21 2010-08-19 12:03:21 1 1 75 0 4 26 0 56.90 82 94.07 CHANGED MKNIlPALllYIIVClIAhIlPAS-GYNpVGWKLFVGQAYAIPIFlITAIITFYINKKKS MKNIhPALllYhIVClIuhIhPASpGYNalGWKLFVGQAYAIPIFlITAIITFYINKKKS.. 1 1 3 3 +13052 PF13210 DUF4018 Domain of unknown function (DUF4018) Aldam G, Mistry J gba Pfam-B_2010 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 190 amino acids in length. 25.00 25.00 162.40 161.90 20.00 19.90 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.26 0.71 -4.92 6 74 2010-08-19 11:48:07 2010-08-19 12:48:07 1 1 73 0 3 66 0 186.50 77 56.30 CHANGED hpKKsTGFllLhllQIIuCShFLsFSllGoILLPLFFFIVHAhssGhPVQKSLGuIlWFVVSAIFYA.PFPPLWKLlLLsLHIhlTFWLTGuNRNQQLlRFlSIITIGlhShLll.lFPYIRLlhSYlhphVALGFGYAlpPLhSAApLKDT--hWpNKGpLtcspIc-ss.p...FDPhllNSITIIlhTsIAl ...................................MpK+TTGFVILLlhQIlhhSlFLPFSLFGTIhLPLFFFIVHVVGPGYPVQKSLGGIVWFsVSAIFYA.PFPPLWKLLLLllHIMITFWLTGANRNQQLLRFsSIITIGlMSILIVQVFPFIRLIFSFIsEVVALGhGYAlNPLlsAAELKDTEDVWANKGHLLKPpIEDs+t...DFDPTLINSITIIsCTAIAI....... 0 0 2 2 +13053 PF13211 DUF4019 Protein of unknown function (DUF4019) Aldam G, Mistry J gba Pfam-B_2012 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 130 and 183 amino acids in length. There is a single completely conserved residue E that may be functionally important. 22.10 22.10 22.10 22.40 22.00 21.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.24 0.72 -3.77 34 143 2010-08-19 11:48:56 2010-08-19 12:48:56 1 4 124 0 45 144 12 100.90 29 57.03 CHANGED puAppalpLlDsGcas-sWcpAushh+stlspcpWsstlpssRts.LGsltsR..phhstpthsshsssPcGpYsslpapTpFsstts..shEplohhh-pDutW+lsGY ................sA.phhphlDus.....phsphWp.sAushh+stlsp.s.tasspltstRtp.LGslhsR..t.lsth.phs..sh.sshPsG..YsslpasTpFspssp...shEplohph-pDstW+lsGY............. 0 10 24 33 +13054 PF13212 DUF4020 Domain of unknown function (DUF4020) Aldam G, Mistry J gba Pfam-B_2014 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 176 and 195 amino acids in length. 21.60 21.60 22.60 21.60 21.20 20.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.00 0.71 -4.70 10 80 2010-08-19 11:50:41 2010-08-19 12:50:41 1 2 80 0 5 73 0 175.10 70 16.29 CHANGED VFIKCVLYWE..........................QKTDWLYPLLlpEsEEsKIKFMpaLCYYVKTLSs+EQQKFWsAWLusFLRERP..KMGsI....TsREYVMlLRhlLaMDEVhEKGLpll.ppF.sVpGcssptEhKQLahchLcKcEphKtahEhYAslFFhLLQsspEAshhEsEIIQIK-hLspapV-+clLshIcNEh ......................................VFIKCVLYWE..........................QKTDWLYPLLIpENEENKIK.FMQFLCYYVKTLSsKEQQKFWsAWLSlFLRERP..KMGtI....TAREYVMLLRIILaMDEIlE+GLsIls+uFSsVpGK.ssppEhKQLahEhLcK.pEShKhaKElYANVFFhLLQThQEAsLhEsEIIQIKELLltYcVEcpVlphI-NEI.................................... 0 2 3 3 +13055 PF13213 DUF4021 Protein of unknown function (DUF4021) Aldam G, Mistry J gba Pfam-B_2025 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved YGM sequence motif. 25.00 25.00 29.30 29.30 19.20 18.30 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.47 0.72 -4.33 5 82 2010-08-19 11:51:21 2010-08-19 12:51:21 1 1 81 0 4 36 0 45.50 80 77.05 CHANGED K-NspcNsTsIpNNNTsNLslEEQAMNGLYGMPETsIEDADHAcT- .............KENVTENsTsIQNsNTANLsIEEQAMNGLYGMPETsIEDADHAts... 0 0 2 2 +13056 PF13214 DUF4022 Protein of unknown function (DUF4022) Aldam G, Mistry J gba Pfam-B_2027 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 85 amino acids in length. 25.00 25.00 79.30 79.20 21.10 19.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.78 0.72 -3.88 9 74 2010-08-19 11:52:15 2010-08-19 12:52:15 1 1 74 0 3 36 0 75.30 92 97.18 CHANGED MLLSHIMtMDaIMSIlTLALLLLAElLVAIILIGVSIEICSYGWKKSNGIKYSCLLLSLLLGTASILGLhAAPAYFFIQLTEK ...............MSIlTLALLLLAEILVAIILIGVSIEICSYGWKKSNGIKYSCLLLSLLLGTASILGLhAAPAYFFIQLTEK. 0 0 1 1 +13057 PF13215 DUF4023 Protein of unknown function (DUF4023) Aldam G, Mistry J gba Pfam-B_2030 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved KLP sequence motif. 25.00 25.00 34.40 34.10 19.90 19.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -7.66 0.72 -4.18 6 106 2010-08-19 11:53:06 2010-08-19 12:53:06 1 1 106 0 13 35 0 37.50 69 92.19 CHANGED MpsTp-FV-Kl+EsQuKsc+N+c+QGpGsPu+KLPNKQ ...MSNSN-FLDTLHEKQAKDEQNRKRQGNGNPAKKKPNKT.. 0 3 10 10 +13058 PF13216 DUF4024 Protein of unknown function (DUF4024) Aldam G, Mistry J gba Pfam-B_2031 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved RDE sequence motif. 25.00 25.00 93.50 93.40 21.20 17.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.68 0.72 -4.15 6 71 2010-08-19 11:53:56 2010-08-19 12:53:56 1 1 71 0 2 7 0 35.00 97 94.59 CHANGED MVGLSVTKlHLFRDENVNFLFCIEFMQKNELLLTH MVGLSVTKlHLFRDENVNFLFCIGFMQKNELLLTH 0 0 1 1 +13059 PF13217 DUF4025 Protein of unknown function (DUF4025) Aldam G, Mistry J gba Pfam-B_2033 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved EGT sequence motif. 25.00 25.00 33.60 33.40 23.30 22.40 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.76 0.72 -4.12 17 126 2010-08-19 11:54:36 2010-08-19 12:54:36 1 1 126 0 18 64 0 51.10 49 81.61 CHANGED Kpppppt.p.lAscpasssshcsss.h.tuLAhTHEQVSDsYtEGTI-shlpcpsp ..........................KQpNKQslp.stpQs.YTScssssup......SVhcEQISDTlAEGTIDsKLs+tS........ 0 5 10 12 +13060 PF13218 DUF4026 Protein of unknown function (DUF4026) Aldam G, Mistry J gba Pfam-B_2037 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 450 amino acids in length. The family is found in association with Pfam:PF10077. 21.30 21.30 21.30 29.50 19.50 20.70 hmmbuild -o /dev/null HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.87 0.70 -5.56 11 104 2010-08-19 11:56:37 2010-08-19 12:56:37 1 3 104 0 8 85 0 295.70 54 65.82 CHANGED EKcsSpMVAlPuoclTht.LEQRLEp.QThaT-G-IsYhs-t..suFFapC++s-c-L+FalpLsEsDs-.pI.pPYauTDslosELhAcAsAssQ-lhlEsLFps..cPLssYhQQLphlphLsPDLLLulD.SAAGKVhTREWlpFQLEs.DLhP-I-SLYVIHAVYDs-.......E-ssPThYWFHTHGLsRCGLoEsElIIPp.IuSYYGIPDLFpoFVNNuIpNGQIsFNEPIhIGQTpsGhEYLVAVPFEEGLcHVGpSTPlDsL+PLEEMpachpsss.pspFLGDhsDRDEhHQcPSVMLFRsspEpPhLESFFKGaEEQsAhMFhRT ............................................ERHFSDMIAVIPTRI...T..I.EQLKQRLEsIATKVD.-LKI..V....Y....SDE........TSLIVEL.....HMc...-plIPYELHIDEs.s.D..PE.EY...KhYNRQDoT.IVDRsFED.A.AaGTEIFTRT..LFVG....DVL-CFFQQLQFLWNLAP.DLLFVIDSSAAMKVISRsYIE.YHVEN.ELLPDIPDLYVIHSVYEDD.......K-uEPTQYWF.HTHGLLRAGVTEIELI.IP..N..R...ISSYYG..IuDLFQTFANNAVE.NGQVP.MNEPIVIAHSQQG.SIH.TVA.VPWEKGLSYIGHKTshDQLSSIE-EEVKLQPIsAQNTFLGGMDsRD.EYHQSPSVLLFKhsTSEEh.IESF.FKEHEEATGLMFYKT..................................................................... 0 2 5 5 +13061 PF13219 DUF4027 Protein of unknown function (DUF4027) Aldam G, Mistry J gba Pfam-B_2038 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved CLGGF sequence motif. 25.00 25.00 30.40 69.60 24.90 16.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.28 0.72 -7.64 0.72 -4.52 13 86 2010-08-19 11:58:14 2010-08-19 12:58:14 1 1 75 0 2 42 0 36.00 77 90.50 CHANGED MKuhQNLSYSQGVoLICLGGFsuSVsLAllIKhhpQ MKuhQNLSYSQGVoLICLGGFAASVTLAVlIKlhHQ 0 0 1 1 +13062 PF13220 DUF4028 Protein of unknown function (DUF4028) Aldam G, Mistry J gba Pfam-B_2040 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 67 and 93 amino acids in length. There are two conserved sequence motifs: IVKI and YVKKWF. 25.00 25.00 88.70 88.60 21.20 20.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.19 0.72 -4.33 12 74 2010-08-19 11:58:51 2010-08-19 12:58:51 1 1 74 0 2 39 0 64.00 92 87.78 CHANGED MIVKILKDSSNSFLCTVQNKNG-pYVKKWFpKpcNpEELGRPTFKEVEKDWKENRESFMYPNlKA MIVKILKDSSNSFLCTVQNKNGDQYVKKWFRKHENNEELGRPTFKEVE+DWKENRESFMYPNVKA 0 0 1 1 +13063 PF13221 DUF4029 Protein of unknown function (DUF4029) Aldam G, Mistry J gba Pfam-B_2041 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 119 amino acids in length. 25.00 25.00 170.80 170.60 20.40 20.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.16 0.72 -3.61 10 73 2010-08-19 11:59:44 2010-08-19 12:59:44 1 1 73 0 2 37 0 94.80 90 89.30 CHANGED MTKIELMALFLGYVFIFSNLNRIQEQSILEICIFSISIELFSIVSIVLLNELFphIHSFELhKFGNlVLQVICAYIVFVVLDKIlGQQTVFQDpRK MTKIELMALFLGYVFIFSNLNRIQEQSILEICIFSISIELFSIVSIVLLNELFpWIHSFELMKFGNlVLQVICAYIVFVVL-KIVGQQTVFQDNRK 0 0 1 1 +13064 PF13222 DUF4030 Protein of unknown function (DUF4030) Aldam G, Mistry J gba Pfam-B_2044 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 164 and 197 amino acids in length. 21.60 21.60 21.60 45.10 21.50 20.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.71 0.71 -4.48 6 71 2010-08-19 12:00:24 2010-08-19 13:00:24 1 1 65 0 1 48 0 136.00 73 72.54 CHANGED sDpsRpVDLEIADT.s.oSclKp-INpQLKNQsI+PYTINlsQRsMcIVKpEpRWscVhuoIh--lFsKNGYKGFuIp.hNhEusQPhslsIpTpIssuDsGAKEFGcKIEKElsslLKTcclpKWI-sDSYTIEIYSpDpQK ..D-KpRhVDLEIADSEN.uNEIKKEINKRLQIQGIhSYKVNISQRN+EIVNAE+RWpLVFGQIFDDVFRKNGYEGFGIQQINYKKNQPVTIDIKTKIsDDEVGAREhGQKIEKEVEsVLKTEAVKKWIENDSYAIGIYDI-sR..... 0 0 1 1 +13065 PF13223 DUF4031 Protein of unknown function (DUF4031) Aldam G, Mistry J gba Pfam-B_2059 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 91 and 130 amino acids in length. There is a conserved HYD sequence motif. 25.00 25.00 25.10 25.10 22.90 24.60 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.73 0.72 -3.83 53 158 2010-08-19 12:01:13 2010-08-19 13:01:13 1 2 152 0 76 163 23 81.10 42 63.48 CHANGED VYlDsstaP....h.+GphWuHLluDo.....h-ELHAFAs.plGlscRsFpp......-HYDlstpp+scAl.shGAlsls..sR-....ls+RLpsuG ......VYlDsstaP.....h.+GphWuHLluDs.....h-ELHAF.As.plGlsR.RuFpp........................-HYDlPspchs.cAl.thGAltls..p+c....Ll+plht..s............... 0 22 48 68 +13066 PF13224 DUF4032 Domain of unknown function (DUF4032) Aldam G, Mistry J gba Pfam-B_2062 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 170 amino acids in length. The family is found in association with Pfam:PF06293. 22.30 22.30 22.30 22.40 22.00 21.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.95 0.71 -4.54 44 312 2010-08-19 12:02:02 2010-08-19 13:02:02 1 7 304 0 87 228 135 157.30 46 36.80 CHANGED cpLWsELTspEsFsssEp.WRlppRIcRLN-LGFDVuElplpost..sGsplplpP+VVDAGHHpR+LhRLTGLDVpENQARRLLNDL-saRAs.......stpshs-phsAHcWLs-VFEPslculPt-L+uKLEPAplFHElLEHRWaLSEptG+DVshtEAlpSYlcsVL. ..............................pLWsELTst-pFsss-h.apl-pRlcRLNsLGFDVuELclpTss.........-GpclplpP+VVDAGHHpR+LLRLTGLDspEtQAR........RLLNDL-saRAp...................sshpsh-.plsAHcWlsElFEPsVptIP......E.....l..p....p...+....h....Es...AQhaHEVL-HRWYhSp+tt.+-VshsEAspuYlcslL........ 0 35 64 80 +13067 PF13225 DUF4033 Domain of unknown function (DUF4033) Aldam G, Mistry J gba Pfam-B_2072 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 80 amino acids in length. 21.40 21.40 23.30 31.30 20.50 18.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.13 0.72 -4.37 27 109 2010-08-19 12:02:47 2010-08-19 13:02:47 1 3 33 0 82 111 5 84.10 49 32.97 CHANGED WLhGPsc.lpph-..................tptsuVhlcKCRYLEpSsCsGhClNhCKlPTQsFFccchGlPLpMpPNF-DhSCphhFGppPsshp-D ....................................WLhGPsc.V.ps-.......................ssGhpppoGVhlcKCRY.LEpSsCsGhClNhCKlPTQ.sFFpcchGlPLpMpPNFEDhSCphhFG.tPPshppD.......... 0 29 63 75 +13068 PF13226 DUF4034 Domain of unknown function (DUF4034) Aldam G, Mistry J gba Pfam-B_2075 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 280 amino acids in length. There is a conserved PRW sequence motif. 21.30 21.30 21.40 21.30 21.10 21.10 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.87 0.70 -5.31 8 236 2010-08-19 12:03:56 2010-08-19 13:03:56 1 9 217 0 29 143 1 257.00 50 39.77 CHANGED +ppph.clcphLp-pca-ELDchh-pthst.apu+puEpc...Yshshss.s..hhDhsollus...t.ptLAhL+AWppApPcShHAalspupYWp+pAhchRohuWApcVTcstWlsAttss-hslhAtLpAlsLcPR.hhAuhhhhssoshFGpPsWLusllpGpcstspslhts..thctph.pEspAhhuppGLpshsph...st.hPssLPsts-.cchpcsh.YWLpssLuIaPptFhshp-Yl.ahhPRW.GGSac-IccFlsSslCcpLSptE+spLchhlhWD .........................................................RcWpIsDIsuLLREcRY-ELDEpYsQAL..TcSFTS.R-AE+R......YFhAWspMc...FYDMcTLV-A....GP...pGLALI.KsWQ....+ARP+STHAWL...AEAQYWsHRAWLYR.SYGWA+-TT+AMW..lCAAACNEpMVlAsLpAIDp-PRQWMAAuLhp..TsSpsFGpPsWLsthLsGscssu.PLht-Lt-YaccoPQElsALMAaSGL.uascAlsPslshPulLPcpsD.D..uGpK...YWLtVsLsIFPTsFYlhsEYIPF+MPRW.tG.SH-EIp-hL-SssC-HLSstE+-+LclLIWWD......................................................................................... 0 3 14 21 +13069 PF13227 DUF4035 Protein of unknown function (DUF4035) Aldam G, Mistry J gba Pfam-B_2076 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 67 and 93 amino acids in length. 22.10 22.10 23.20 22.80 21.00 20.40 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.69 0.72 -4.59 18 233 2010-08-19 12:04:48 2010-08-19 13:04:48 1 2 184 0 14 135 5 52.90 61 63.23 CHANGED htFDRpsPlGDhRsDh+sAQIup..AshsuQGs+..sslsDhM.tWspc.....-cctpDDu ..hEFDRlSPLGDERGDIRNAQIV+..AVFGAQGhs..VuLcDAMLsWGED.....EDcsEsDP............................. 0 0 5 11 +13070 PF13228 DUF4037 Domain of unknown function (DUF4037) Aldam G, Mistry J gba Pfam-B_2110 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 100 amino acids in length. There is a single completely conserved residue P that may be functionally important. 22.10 22.10 22.70 22.10 21.60 21.70 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.11 0.72 -3.78 37 317 2010-08-19 12:59:41 2010-08-19 13:59:41 1 17 297 0 68 282 5 100.10 31 26.79 CHANGED WLplPpppLups.TsGpVFtDsh.GphophRppL..paYP-DlhhthlAsphhphuQtGQaNhsRshpRsDthuutlshscFlcsshpLlaLLN+....pYhPYa...KWh .......................................pphlush.psGtlhaDt..Gchspl+ccl..tsYP--LtcphIucpLhhh...sp..........a....Nht....+sL+RpDhluhhtsls-hhcshhsllFhLN+....hatPhh...KW.............. 0 36 54 60 +13071 PF13229 Beta_helix Right handed beta helix region Bateman A agb Jackhmmer:O26996 Family This region contains a parallel beta helix region that shares some similarity with Pectate lyases. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.18 0.71 -12.28 0.71 -4.48 178 5365 2012-10-02 14:50:22 2010-08-19 14:58:07 1 525 1536 7 2617 9515 1881 170.30 13 29.99 CHANGED sGl....hlpssss.................hp.....lpssplpss......................tssGlhltssss....................hhlp.s......splps.........t..ul.h.t........................spsslpsstl......psss.........................uhhh...sssshlpssplpss........ts..............Gl...h..s................sss....spl....psNplpsss.........................tsGlhltsss....................sh..psNplp............sst........................tsGl.hhssss...............splpsNph ................................................................................................................................................................t...................................lt..t..p.h..t......................................tt..tG.lh..ht..ssst..................................................hhlp..s.........................splts.............................t.tu.l..h.ht.ts.................................................................sss.tlpss.pl......................psst..............................................tGl..hh....t.....s......s....s.....s...s......h..l.p.s......N..plt...ss.....................sss......................................................................Gl..hlt.s..............................sss.....................stl............ps.N.p.lp.s..st.....................................................................tsG...l...h...ltsss..................................ph..h.ps.N.h.lh...................s.st...............................................................G.h.......................................h...................................................................................................................................................................................................................................... 0 1057 1738 2217 +13072 PF13230 GATase_4 GATase_II; Glutamine amidotransferases class-II Mistry J jm14 Jackhmmer:Q7LYB4 Family This family captures members that are not found in Pfam:PF00310. 20.70 20.70 20.70 20.70 20.60 20.60 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.65 0.70 -5.71 97 1953 2012-10-03 21:14:07 2010-08-19 15:05:10 1 8 1663 4 535 6397 3754 248.90 35 89.16 CHANGED MCQLLGMNCAsPTDlTFSFoGFusRGGlT-cHuDGWGIAFFE..........DKACRLFlDpQuuusSPlAEhVKcYP.IKS+NsIAHIRKATQG+lhLENsHPFhREL.WGRHWIFAHNGDL.ps..asPpL....uGsYp..PVGsTDSEpAFChLhptLRctF.s.ts.pPsLsELFc.tlu-Lo+cIsc+GlFNFLhSNGQALFAHCS....T+......LaYlVR+a..PF.usAHLl.DtDlslDFuch..TTPEDRVAVIATpPLT.....csEsWTshpPGELlhFpsG...clstp.hplsss-tlhcchpss .............................................................................MCpLhu.h.s...s..s...s...P.s...c..l..s..F..S...h...p..u...h.h.......p......R.u...G.....t......T.s...s....H..tDGa...G.I.u...FY-..............................s+.u.s.R..h..F..+....D..s...p...P....u...a......s.S.s..l....A.....c...h...l........p......p.....a.....s......I.......K......S.......p....s....V.......l..A..H.IRp...A..s.p..G.....p...V...s..l.p.N.o.HP.F........s.....R.....Eh...hG......c........p.W.s.a..AHNGpL...ss......a...c...slp................ssha.p........s......l.......G.....p.......T......D..S..Et.....A...F...C..h.l...L..p.....p..L....pp..........c..........h.....s.....t.........s....................ss...........h..s...........t........l.......hc.......hlt...........p.......l.....s............p..........p........l............p...........p.................t.................G.........s.........F.....N.h..l.L..S..D....G.c.h..lhA..as.s..............sp...............................Laals...R.c.s.........P...F...s...u......p.L.....h....D..p..D....h..p....l...c....h....t......p................sss..p..Dh..VsllATpPLT.................t.s.E.s..Wpth.sGphhhhp..Gp...................thh.................................................................................................................................................... 0 134 273 423 +13073 PF13231 PMT_2 Dolichyl-phosphate-mannose-protein mannosyltransferase Mistry J jm14 Jackhmmer:O26471 Family This family contains members that are not captured by Pfam:PF02366. 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.14 0.71 -4.33 106 4872 2012-10-03 03:08:05 2010-08-19 17:21:47 1 112 2057 0 1921 6160 2870 159.90 15 29.25 CHANGED c+P.....PhhsW..lh....tlhstlh..G.s.sthulhlssslhsslshhhlatls+ph.hs.ppsulluslhhss.sshhsssuh...ths..Dshhlhhhhhshahhhpshp..csp.h..phh...lhsulhhGluhhoKattshllhs.hll..aLlhs........th.hhpp..thsal.sshlslllh.s.......lhW .........................................................shhhh..hh................th..h.h...t...l.....h.............G.............s..............h...............s........h.....+......l....h...s....h...l...h....s.h.h....s....s....h....h.l......ah....l............s.............c......c...........h.......h...........s.....p.............p..............s............u.........h.........h.............u.......u....l.l....h....sh....h.s..h.....h.....h.h.........hut.......hsp.....-...sh...h.........h..h...h..h...h.....h...u....h.....h....h.............h.....h...........p.....h......h..p....................pt.......p....h...........t.......h....h..............hh...h..u...l..h..h......u....l..u..h..h...s...+....h....h....s...h...h...h..l...h...s...hhl........hl.lhp.....................pp...hhtp........hhhh...h.......s.h.hh.h..h.h.hh.......hh............................................................................ 0 764 1422 1713 +13074 PF13232 Complex1_LYR_1 LYR-motif-like; Complex1_LYR-like Wood V, Coggill P pcc Manual Domain This is a family of proteins carrying the LYR motif of family Complex1_LYR, Pfam:PF05347, likely to be involved in Fe-S cluster biogenesis in mitochondria. 20.80 18.60 20.80 18.80 20.70 18.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.07 0.72 -3.76 236 283 2012-10-01 20:54:40 2010-08-19 18:36:27 1 8 162 0 201 1849 13 65.40 24 34.50 CHANGED pplLpLYRpl....LRpu.pp......hsshshp........................h...ppp...lRspF+.........................cs+s......hpD......................p.......pIpph.lp.pupc......................pL...phlp .................................................tshpLYRpL..................LRpu..pp........................hs.s..hshR...................................pas...pc+........l+..ctF+..............................................cppp..........hpc.................................p.........clpph..hp..cuhp......................pLphh................................................................................................................................................. 1 61 104 165 +13075 PF13233 Complex1_LYR_2 Complex1_LYR-like Coggill P pcc manual Family This is a family of proteins carrying the LYR motif of family Complex1_LYR, Pfam:PF05347, likely to be involved in Fe-S cluster biogenesis in mitochondria. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.91 0.72 -3.55 66 376 2012-10-01 20:54:40 2010-08-19 18:48:37 1 4 227 0 259 553 1 103.20 18 73.89 CHANGED lhplYR..................plL.+pL.p....h................................................s.hcppl...........ppp...hpp.ppphs....................................................................httthpchpphhpalpsp+...tahph...hppY........p.Ght.hsp-cp..l...................chsuphVshp .....................................................hplYR....................plL..+th.t........h......................................................................shhcphl.......................+pp...Fpp.pptss.................................................................ttthptthpchpp..hhphlpppp.....ph.ph....htph..................s....hs...pp.....................t............................................................................................................................ 0 63 125 203 +13076 PF13234 rRNA_proc-arch rRNA-processing arch domain Wood V, Coggill P pcc Pfam-B_8473 (release 24) Domain Mtr4 is the essential RNA helicase, and is an exosome-activating cofactor. This arch domain is carried in Mtr4 and Ski2 (the cytosolic homologue of Mtr4). The arch domain is required for proper 5.8S rRNA processing, and appears to function independently of canonical helicase activity [1]. 21.30 19.20 21.30 19.20 21.20 19.10 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.62 0.70 -5.23 86 683 2010-08-20 10:00:23 2010-08-20 11:00:23 1 18 387 9 414 671 13 215.90 22 22.40 CHANGED EaMLc+.SFaQFQsst.........................ulPtLEcclpclcpchsshpl......s-.........Esslp-YYcl+ppLpphpc-hRpll.......s+PpasLsFLQ.sGRLl+lpt...........ssp-aGWGl.....Vlsap.++p.st..pt....................................hsspppYlVDVLlps..s..cssstttps.................stshcPspts.-..cuch....pVVPlsL.sslpuISslRlhlP..cD.L+st-s..+pslh+sl.........pElp+R..F.P-G...lPlLDPlccMpIcDssFp+ll+KI-sLEs+LhssPLpsu..scLp..clYppapcKhclppcl+plKcclppu ......................................................E.Mlc+SFhQFQspp................................................................................slsthpc..plp.p.h.pp.phstlth...............c............................................pts...ltpY...aplppplp.p.hp.p.....p..h.p.phl.......hp.st.....hs....l..h.Lp...sG....Rlltlpt...............................................tt.p..c.hs.hGl.....llpht.ppt....................................................................................hhh.shh.........................................................................................hh.h.h....lttlsthhh...h...........p..h.....tt.............ht.....t..hhttl.........................tcl.ph........h..sts....shlcPhps..htl....p.s.thhp.............hh.php.hpphh.t.t.h.hp........s....ph..............t.h..htt+.tht.phpthc......................................................................................................................................................................... 0 147 240 346 +13078 PF13236 CLU Clustered mitochondria Fey P, Coggill P pcc [1] Domain The CLU domain (CLUstered mitochondria) is a eukaryotic domain found in proteins from fungi, protozoa, plants to humans. It is required for correct functioning of the mitochondria and mitochondrial transport [1,2] although the exact function of the domain is unknown [4]. In Dictyostelium the full-length protein is required for a very late step in fission of the outer mitochondrial membrane [2] suggesting that mitochondria are transported along microtubules, as in mammalian cells, rather than along actin filaments, as in budding yeast [1]. Disruption of the protein-impaired cytokinesis and caused mitochondria to cluster at the cell centre [1]. It is likely that CLU functions in a novel pathway that positions mitochondria within the cell based on their physiological state. Disruption of the CLU pathway may enhance oxidative damage, alter gene expression, cause mitochondria to cluster at microtubule plus ends, and lead eventually to mitochondrial failure [3]. 21.40 21.40 21.60 21.50 21.30 20.90 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.51 0.70 -4.92 42 412 2010-08-23 12:34:58 2010-08-23 13:34:58 1 64 248 0 310 412 5 207.90 35 16.22 CHANGED DWN-EaQuh+ElPppolp-RlhR-RhltKlhp-FspsAsc..sAhsllpGplhPlNPp-spp........sphalhNNIFaShu.s-shspapphGG........DcA..AptAuspDLpulphlsph.....D.lsslppLsTsllDYtGpRllAQohlPGlhp....................p...slhYGus-.....................supplhscppFtphl.cphuchh+lKpHpV...h.....t.....hplhsSh-sKGlhGsDuR...pYlLDLhRshPhDlsah .......................DWNEEhQss....+E....lP.pp.....slp..-R............lhR.-..R.......h.l.K..lhsDFssAAs+..GAhtllc.sp......lhs.lNPp-..pp......................hphalaNN.....IFFShu..hDs.h..spapphGG......................................Dp.A.....AhsAsspD.LpGl+shs.p.h........D.lpG....Las.sosllDYtGhRl.sAQ................SllP..GIhpt.........................................................ptpp.slhYGu.h..-.....................tsc...slh.s.p.......c.a.h.phh.pchuc.L+lptHtVh.................................spps..thcLhuos-sKGllG.s.DuR...pYlLDLhRhhP.Dhta...................................................................... 0 124 189 271 +13079 PF13237 Fer4_10 4Fe-4S dicluster domain Coggill P pcc Jackhammer:O26799 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 25.50 24.60 25.50 24.60 25.40 24.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.32 0.72 -4.16 189 7356 2012-10-03 08:56:43 2010-08-23 16:55:13 1 391 3234 1 2292 29622 8087 55.40 30 19.67 CHANGED thhhs.pt.C.....h.tCt.....tChp.....s.C......P...................................h.httthttthth.ss.p.......................Cht.........................C......s.......tChps.CP ......................hh...hs.cp..C........l...sCs.......hChp..s..C............Ps............................................................................................tshp.t.t.p.p.h.h....pl....ss...s.p.........................................................C.h.p................................................C.....................G...............pChps..CP............................................................... 0 833 1631 2006 +13080 PF13238 AAA_18 AAA domain Bateman A agb Jackhmmer:O27656 Domain \N 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.82 0.71 -3.83 124 2477 2012-10-05 12:31:09 2010-08-24 11:57:14 1 35 1523 30 881 16173 6513 139.40 17 66.84 CHANGED IhlsGssGsGKoTluctL.....tct......h.......................thhltchhhptshhhthsph..............tpttthshptht.hhpthtpt.........................tp.h...ll-shhsthh..tphh.hh...........................lhLpss..chhhcRlpp..Rshpt.p.............cptpschhp ..................................IhltGssGsGKo....Tls...ppL...............pcph.............................................................................thhh.h..p.........h...h..p..t...s...h.h..t...t.h.ppt.....................................................t.p.t......t.h..s.t.s...t...h..........h.h.p.thtp...t.......................................................................................................tt.t..t...l..........l....l..-....s....h..h...s...h..h.........t.p.h.h.hh.........................................................................lhLps..s...c..hh.h...cR.hps.....Rs.tp.p.t.p.......................t.................................................................................................................................................................................................. 0 254 472 688 +13081 PF13239 2TM 2TM domain Bateman A, Galperin M agb Jackhmmer:O27758 Family This short region contains two transmembrane alpha helices that are found associated with a wide range of other domains. This domain may be involved in cell lysis or peptidoglycan turnover. 24.80 22.90 24.80 22.90 24.50 22.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.96 0.72 -3.92 128 506 2010-08-24 13:02:44 2010-08-24 14:02:44 1 11 339 0 207 516 126 80.20 23 53.61 CHANGED phppApc+l...cch+tFat.HLhsYllVssh..LhhlNh.hs.s.sh.................Ws...........l..a....sh...lhWGlGLhhH..uh.p...s.a......hhhup..pWcc+clp.chhp+ .......................pApc+l...pph...psFhh.Hlhsalllssh....Lhhlsh....hs...sssh.h........................Wh.............l.....a......sh.hsWGluLlhH.ulp...s..a.........hhst....pWppp.php.p.h......................... 0 49 140 187 +13082 PF13240 zinc_ribbon_2 zinc-ribbon domain Coggill P pcc Jackhammer:O26621 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR. Pfam:PF12773. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.03 0.72 -7.62 0.72 -4.35 442 1062 2012-10-03 10:42:43 2010-08-24 14:40:20 1 99 704 0 242 1992 209 23.00 39 6.97 CHANGED hCspCGp.pl.s.c.s.spF......CspC..Gsp...l ........hCscCGp.pl.p.-.s.spF.........CspC..Gpt.......... 0 98 197 231 +13083 PF13241 NAD_binding_7 Hydrolase_like; Putative NAD(P)-binding Coggill P pcc Jackhammer:O27094 Domain This domain is found in fungi, plants, archaea and bacteria. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.27 0.72 -3.77 422 3188 2012-10-10 17:06:42 2010-08-24 15:53:40 1 33 2756 11 874 5098 3558 109.30 30 31.68 CHANGED lhl.clps..+cllllGGGpVAt..c+lpsLlp.ts..A.c.l..sV........l.u...Pp.............htc.......................................ph.lp..............h........t........p.........+p.................................................a...........c.sD..................................l.p.....s..........sh..........lVl...uAos-..splNcp..ltp....ts..+.....ph..LsN.ssD........ss...ppss...........hhhPu .................................lhhpLps+plLl.VGG..G..c.V..A...t....RKsphL..lc..sG...A.c......l..pV..........l.u..sp..............hp...................................................sh.lp...............h...........h........p.........c.t....................................................................a.........c.sp.....................................................L.c.s...............sh........Lll...A.AT...s...-....s..t.....l.....N.p.p....V.tp........sA.....ct......cpl.....h..sN.llD.....ss...ptssFhhPu................................................................................................................................... 0 272 556 743 +13084 PF13242 Hydrolase_like HAD-hyrolase-like Coggill P pcc Jackhammer: Domain \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.81 0.72 -4.27 437 8522 2012-10-03 04:19:28 2010-08-24 17:11:02 1 48 4225 45 2343 23735 6396 74.00 26 28.24 CHANGED s.GKP...s.shhhpt......Ahtth..............................shp..............................pp..........shh.lGDp..sDlhuuppsG..h.ps...lLVho.Gh...pst............pthtt.........h..pssh.l..hss.l...s-h ......................................................................................h.htKP..p..sh.h..hpp........Ahpph.................................................s.h.c................................................hpp............shh.V.GDphhoDl.t.u.ut......p......s.......G.............h....p.o.........l.L....V.h..o....Gh.....tp.......................tth........................ssh..h...h.sl.................................................... 0 726 1394 1948 +13085 PF13243 Prenyltrans_1 Prenyltransferase-like Coggill P pcc Jackhammer:O27751 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.50 0.72 -4.04 26 810 2012-10-03 02:33:51 2010-08-25 13:25:01 1 65 544 6 312 4104 349 105.50 17 18.46 CHANGED lpshlphllspQpp-GuW....s..hs......hs......s........s.hssssthh.tshttt.....t.......ss...sstpul.c+us-alhc..pQps..-Guat...tp...stp...............sh....sp.ssh.hs.sh.hht.thht.........scpp.lc...+ulcalhpp....p.sc ..........................................................................h...hhth.lh..ptQ.....p......-....G.u..W.............h...hp..............hs...............s......................................s...h..s..o...s..h...sh....u.Lt.th.............s.....................ss......ssp.s..t.l..p.....+..u..s.p....aLhs...........p.Q..p.t...........D.........G..u...a.u.........ts.........stt.................................................................h....th....h.....h.....h...........................................................tt......................................................................................................................... 0 121 211 269 +13086 PF13244 DUF4040 Domain of unknown function (DUF4040) Coggill P pcc Jackhmmer:O27316 Family \N 30.00 30.00 30.30 30.30 29.90 29.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.25 0.72 -3.99 434 1807 2010-08-25 15:48:25 2010-08-25 16:48:25 1 9 1353 0 544 1494 224 71.00 34 10.23 CHANGED hhlllluulss.lh..t.+s+lsAllhhGlhGhslulhFhhhuAPDlALTQhsVEs.loslLhllslppl..............sphtp ...........h.llhlssu.l..h..h.lh..t..+pRLsullhhGshGhslulhFlhhpAPDLALTQh.lVEs.loslLhlL..shp+LPp................... 0 159 338 455 +13087 PF13245 AAA_19 Part of AAA domain Coggill P pcc Jackhammer:O26587 Domain \N 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.74 0.72 -4.22 209 2781 2012-10-05 12:31:09 2010-08-25 16:49:56 1 100 1622 0 906 20876 4566 80.60 23 8.66 CHANGED cAlt.uht......ssl...hllsGGPGTGKT.sslptlstlh........................t...sttlLll..uPoscAsc.pl.tpth...........................h..upolcplh .......................................t.....t.t......psh.......hll.pGsP.GTGKT.s....s.h....l.p..t.lAhLl.............................................................................tt.......s.p..p..l...Ll..l.....u...s...p.s.h.hc...hl.pphL.........................................t................t............................................................................ 0 336 668 820 +13088 PF13246 Hydrolase_like2 Putative hydrolase of sodium-potassium ATPase alpha subunit Coggill P pcc Jackhammer:O26582 Family This is a putative hydrolase of the sodium-potassium ATPase alpha subunit. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.28 0.72 -4.03 225 1746 2012-10-03 04:19:28 2010-08-25 17:23:33 1 71 1074 0 530 10781 261 97.10 34 14.14 CHANGED sLCNpup.....h..p...tt..............................................hhG-.soEsALlph....scph..hts..............................................hpt.....h+...pphp+ltp..lPFsSspKhh..ssl.......................t..............................................sst.......hh.hhKGAPEplLc+...Co.pl .............................................................................................................................................hLCNcup..h..sptpts.......c..............................................hsG-.soE.uA..L.lcs.....s.c.hh.....hts..........................................................................................................................shs..........h+.......pc..........c...+...l...sp.......lP.FsS...s.p.Kh.....ssl...........................t..cs..............................................ssss....t..hllhhK...GAP.EpI..L-R..Coph............................ 0 204 318 445 +13089 PF13247 Fer4_11 4Fe-4S dicluster domain Coggill P pcc Jackhmmer:O26500 Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.29 0.72 -11.02 0.72 -4.11 103 12420 2012-10-03 08:56:43 2010-08-25 17:37:53 1 136 2648 60 1943 6770 905 93.70 38 31.84 CHANGED shhhYLPRlCcHCLNPsCluuCPuu....AlYKRpE...........DGlV.LlDpcpCR....GaRhClouCPY+psaaNapoG..+uEKCs.hCaPRlE....sG.s....ssCscoCsu+hRYhGsh ..........................................h...hhshtC.pH...C..p......c..s...sClssCPsu................Ah.h+cp....................s.GlV..h.ls..p-p.Cl.....GCc.hChhACPas........s.....h...............p.........h................s...............p....ss.....................ps.p.K.Cs..hC......h......t......R.ht...................G............................PsClc.sCs.s...p...AlhhG..h............................................ 0 603 1190 1602 +13090 PF13248 zf-ribbon_3 zinc-ribbon domain Coggill P pcc Jackhmmer:O26570 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR. Pfam:PF12773. 23.80 23.80 23.80 23.80 23.70 23.70 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.63 0.72 -8.29 0.72 -4.63 487 1966 2012-10-03 10:42:43 2010-08-26 11:58:27 1 152 1261 0 583 3186 312 25.60 36 8.81 CHANGED t.hhCspCGp.p.h...sss.......spFCspCGspl .........hhCPpCGp.p.h......pss...................spFCspCGppl..... 0 221 418 509 +13091 PF13249 Prenyltrans_2 Prenyltransferase-like Coggill P pcc Jackhmmer:O26856 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -11.04 0.71 -3.67 105 3084 2012-10-03 02:33:51 2010-08-26 14:46:15 1 123 1058 183 1552 4407 373 121.60 19 29.92 CHANGED lchlhs..t.Qp..sDG...u..as..................h.................tp................tsssshTu.hAlh...uL....t.shspps.......s...............hcch................................hpal.p.p.ppp...sGu..ast..........sst............s..tss....hps.shh..sl.t...sh.......phh..spp......................phhpp....s.....lp.........alhs.hQ....s.......s....s.G.Gaph ...........................................................................thlht.h.Qp....t.s.....G....u...au..............................................s..................tp.........................spsssTs...ts..lt....sL.......s..hhuppp......s....................tpph............................................................hpaL..hs..hQ..........p....s.D....Gu..ahs..................p...........................................t..s.s..t...h.h.s...oh....h......ul..s..ul............thh.s.h.tt........................hpc........u.........hp..........al.hs..hQ......p............c.G.Gat.t.............................................................................................. 0 523 955 1309 +13092 PF13250 DUF4041 Domain of unknown function (DUF4041) Aldam G, Mistry J gba Pfam-B_2162 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and viruses, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF10544. 21.00 21.00 21.30 22.50 20.40 20.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.39 0.72 -4.41 57 210 2010-08-26 13:56:30 2010-08-26 14:56:30 1 3 206 0 39 173 8 55.90 33 12.28 CHANGED KhhlRuFNuEsDsslsKVphsNlsphccRIp+uFcplNKlspt..slpIstpYLpLK ....KhhlRuFNuEs-shlsKVshpNlpshpp+ItKua-tlNKlhcss.slcIsppaLclK...... 0 15 26 32 +13093 PF13251 DUF4042 Domain of unknown function (DUF4042) Aldam G, Mistry J gba Pfam-B_2172 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 180 amino acids in length. 21.60 21.60 21.60 22.20 21.30 20.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.07 0.71 -4.82 19 156 2010-08-26 14:01:37 2010-08-26 15:01:37 1 11 113 0 95 151 1 167.50 31 16.22 CHANGED KVRluALthlpsls+sh-++shaGYWpslhP-s.......ttsttpsoLhshlLpDPss+sRssAlpsluthLpGS+.aLsQAsptc..ssptuFTsFSsoLAshlhplHcsLhhhLppEsssshLsQllKCLulLlpsTPYpRLphGllschlppl+.hlcc...pDsslpVuuLhshshLlu.s.t.hsEh .......................+lR.uAlhsh..shhcp.c.p+.sl..auYWsshlP-s.............................hs.pstss...........oLhT.hhLp.DP.ssKsRssAhpsLuslL-G..u..+paL.......sh........Ap-sp....................spptuFTsh..Ssplus.lhpLHcsLlhuL............tEss...st..........sLs.pllKsLu.....sLlpssPYpRL.....p.............sLlsplh...ppl+shlpc.........pDsslpsssl.hhthlhus...h................................................... 0 38 54 75 +13094 PF13252 DUF4043 Protein of unknown function (DUF4043) Aldam G, Mistry J gba Pfam-B_2174 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 369 and 424 amino acids in length. There is a single completely conserved residue G that may be functionally important. 21.10 21.10 23.50 22.80 19.30 18.90 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.73 0.70 -5.46 33 188 2010-08-26 14:03:29 2010-08-26 15:03:29 1 2 154 0 29 134 71 315.30 39 84.63 CHANGED supsusI.chs-LpK.ssGDp.lsasLhspLsucs.shGspplEGptEsLphhspplpIsphR+sVcss..GpMspQRohaslRctA+stLssahschhDphhhlpLuGsp.s...s......s.ttpsphsthtsNslpAPossRhhhuusssstt.............slsu..sDhholchl-phsthuct......s.thhpsh..hs.sucsh..YVhhlsPtQhssL+sssshtp...W.phtpsshsuspu.psPlFp.GshGhasslll+cht.hshphssuss...............................usssslsRAlLLGuQAlshAaGp...........tuuttatasE.......EphDasschtlusstlhGhKKsRFss..st.....pDaGllslDT ........................................t.sssuPlsRhsDLsK.puGDc.lsFslhppLotcP.shGDpclEG..+GEsLpass.sL+INQuRHhVcuG....G+MopQRohasLtppARshLssahschtDpshllHLAGAR.shhss..phhlshstcscapchhhNsVhsPTpcR+hhuussoshp.............plcu..sDlFolsllDshshhl-phshs....slc.h.sst.....hh..sp-sh....YVlhloPtQhschhososs+p...W.phhstssstu+u.ppPLFc.GpsuMapslll+KhsthPIRFhpusp..hstsp.st........................stphsussslcRAhLLGAQAls.AaGp..................cuGtpFphsE.......cphDhsN+sElulshIpGlKKhRFspcsGph......pDaGVIslDT........................... 0 5 12 21 +13095 PF13253 DUF4044 Protein of unknown function (DUF4044) Aldam G, Mistry J gba Pfam-B_2177 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 56 amino acids in length. There is a single completely conserved residue M that may be functionally important. 20.30 20.30 21.00 21.00 19.60 19.20 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.34 0.72 -7.48 0.72 -4.80 32 476 2010-08-26 14:04:45 2010-08-26 15:04:45 1 1 473 0 39 139 0 34.90 49 77.51 CHANGED tp++KKosFpKlThlhVhlMlllTluullhuAluu .....NGpRKKThFEKlTlhlVllMLlsolhGlhAoAluu.... 1 5 18 26 +13096 PF13254 DUF4045 Domain of unknown function (DUF4045) Aldam G, Mistry J gba Pfam-B_2180 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is typically between 384 and 430 amino acids in length. 21.50 21.50 22.00 21.50 20.90 19.60 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.75 0.70 -5.19 12 108 2010-08-26 14:45:42 2010-08-26 15:45:42 1 8 72 0 88 107 0 241.30 29 23.98 CHANGED RSoSPTKGLGGFVQSAMMKRSDSVSKRWSAQ.........PsshsRs.sShhSsRsShtustt.ss..s...........sp...........................sRPuSSHSEATlV+psp.cs-h.so.ss.........s-shs+ssLs.+stSpSsssss.....sushs.shssosS+TMD.+RWSP..TKuoWLESALN+P-SP+pKtQ..spp..spWhK-.....RQuRuSVDLGRssShK-......htp.s.Gscspssohouhssh.spp-spsscttps.............s.ts...pt...css.ps...csp.ph.pssEtssppss....................tstthtsPs.hsssststssss.....lsu.....pDsl.s+sKPpoP.l.hDFRANLR+RElsp-posp-E.PEFKNVFGKLRKsEopNYVAPD.LK-NIL+GKAALNuTGGPKKop+VDEhKESILK ...........................................................................K..s.................t...s.....................................................................t..................................................................................s.sso+s.p.+RWSP..oK.u.oWL-sALp+.s.-.Pp..................t............t.....................................................................................................................................................................................................................................s........saRusL+.R............t.....t.t...p..s..EhpslhGpL++scsppahsPD.hKtNIhpGKssLs.osGP..s.hhD-h+-ul............................................ 0 16 41 71 +13097 PF13255 DUF4046 Protein of unknown function (DUF4046) Aldam G, Mistry J gba Pfam-B_2182 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 64 and 331 amino acids in length. 20.90 20.90 20.90 20.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.31 0.70 -5.67 13 108 2010-08-26 14:47:10 2010-08-26 15:47:10 1 4 56 0 23 94 4 180.70 41 86.05 CHANGED IE-IYQEILDGKRpRFPsNTWp-DpcNELARRVT+YLIEslLKW-c--I+psWNopLIlKY+LpGlLpp+YsNSPY+MlNDlYPscFKEWEFpMTPLNFWTKEKALEsLKWTIEEKEpLSspcLLclYupKWLccpKLuuPLphaWsGSPYsMIN-LYPsRFKEWEFpMTPNpFWTKEKALEALKWTIEEKEpLss-QLhplYsl+WLpppsLposCplaWssSPYuMIN-LYPspFKEWEFKhTPssFWTKEKALEALKWTIEEKEKLo-EQLLpVYoh+WllKp+LhTPLhRYWpGSPYA .................................................pIYpplL-GKpp+FP.shW..tpc..s.p..c.h.t+R...s...hpYLlpphLchp.....pp...I....php..hlhpY+L.hhl....pt.s..thlp-haPp...............................................................................hapsSP...athlssLYP.t.+apc..h.................................................................................................................................................................................................................... 0 3 21 23 +13098 PF13256 DUF4047 Domain of unknown function (DUF4047) Aldam G, Mistry J gba Pfam-B_2183 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. There are two conserved sequence motifs: TEA and FPKT. 22.00 22.00 30.30 30.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.75 0.71 -4.17 5 74 2010-08-26 14:53:30 2010-08-26 15:53:30 1 1 73 0 3 62 0 125.00 72 47.63 CHANGED VVTYTEAAFVsETKVQuoISTAIVFPKTIDTLscpAcQHEclIL+sYEsMKpElcs-.SlElLEQplssW+pQREKVssEREALQ+IYTEIEsYYsQlpEsl+sccS-SsKcVLpYVNAGF...ppVK- .hMTYTEAAFIHETKVt.A.TISTA.IFPKTVDpLhEQAcQHKclILHEYcpMKuKLpsp.SspEIEQAlslW+QGREKIsAEREuLQ+VYppIEpPYNQlQEELKs.NpoESsKQVhsYVNtGF+hVKE....................... 0 0 1 1 +13099 PF13257 DUF4048 Domain of unknown function (DUF4048) Aldam G, Mistry J gba Pfam-B_2186 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is typically between 228 and 257 amino acids in length. 25.00 25.00 29.60 29.60 20.70 19.30 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -12.12 0.70 -4.74 14 71 2010-08-26 15:06:14 2010-08-26 16:06:14 1 2 69 0 56 72 0 205.90 30 36.84 CHANGED HTRTLSLLSPsssss.........ss.sssssssssRsoRpPRSATLP.SV-Rosssps.t......uspso--.hspWRRohPP....PopEALMRTG+QMASDLREGLWTFLEDIRQATVGEEGINATcSRs.......ss...+pssspusSRu..............tsGpsupSsoupSupupssssup.......psop...Sss.-sSFWSEFGIDossQp............tsppspsossssp.......pppppsp.....pss.LDl...D.DNWDsWDTPQP.pKoHTPSSSpS ..................................................HhRsLSLLSs....s..................tps...t....Ptssp.P.Sh-ct....................p.hs.WptshP.....spt-tlhpsG.+QhApDh+pGLWTFlEDIRQATVG-EuI....N..u..s..p..sRs.tt..............tpssspuss+s......................ttsssstspoupu.sp.p.s...stt.............sppsp.......stt.p..sF.Wp-hGhss......................sptstss..st..............................t.pt..................-....................-.ps..Wp.WD.oP............................................................................... 0 8 24 43 +13100 PF13258 DUF4049 Domain of unknown function (DUF4049) Aldam G, Mistry J gba Pfam-B_2191 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 310 and 324 amino acids in length. 25.00 25.00 40.40 40.00 20.60 19.00 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.06 0.70 -5.46 3 501 2010-08-26 15:31:45 2010-08-26 16:31:45 1 1 233 0 2 271 0 263.30 60 58.68 CHANGED pNsNTAYFGDTDGRVGAVLYALLVSGHIGIRuEGWSLLCQLLKHEDMASsAYc+K........NlKsLaoLLNTRDMILNELHQHVFLKcDAITPCIFLGDHTGDRFSTIFGDKYILTLLNSMRNMEGNKDSRINKNVVVLAGNHEINFNGNYoARLANHKLSuGDTYDLIKTLDVCNYDSEpKVLTSHHGIIRDEE+KCYCLGALQVPFNQMKNPlDPEELANIFNKKHKpHMDD+LFHLIRSNol+STPVYsNYFsNTTDFRPKhEcIFtCGQTLKt......I+QKYGHaGsGVDcsQphDNslMGLNShKpA+scRschhhsSGLSCF .......psspssYFGDTDGpVGAVLYALhsoGHlGIhtcG.shLspLLphED.s.sshh+c........NsphlhslLNpRDhlLppLp.alhlpcDAlTPChFLGDpTGDRFSsIhGDpaIlsLLpphhs........INcNVhVLAGNHEhNhNGNYhtphsphK..stDTYshIKshsVC.YDsch+lhssHHGIhhD-ppKpYhlGslpVshspMpNshDP.ELAsIhNKKH+thhss+hF+h.Ru.ohts...aspYFssoTDaRPK.EslhtCuQhLt.......IpQhhuHpG.Gscpp.t.ssslhGLNuhctthstph......ShhsC..................... 0 2 2 2 +13101 PF13259 DUF4050 Protein of unknown function (DUF4050) Aldam G, Mistry J gba Pfam-B_2193 (release 24.0) Family This family of proteins is functionally uncharacterized. This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 173 amino acids in length. There are two conserved sequence motifs: IPL and FLVD. 22.80 22.80 23.60 25.10 22.60 22.50 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.79 0.71 -4.19 61 243 2010-08-26 16:15:28 2010-08-26 17:15:28 1 5 148 0 159 242 0 146.50 25 51.54 CHANGED hssushch-sssshspp.........................................tcpptt...pp.hpp...p......hhNpGLtlWppRRptWsGscppp................................................................................................................................................................pspthRp.......s.hsas.shYcpLlspspshs..pPIsLu-hl.........chLVsGWcp-.Ghas .......................................................................................................................................t.t...................................t......t.p.p.pp.p...tptspp..pt.......hhNpG........LhlWpppRptWsGsppp.p................................................................................................................................................................p..ppth+p.........s....hsas.sh..Y-pL.ls.........s..scs.hs..pPIsLu-Ml................chLVssWcp-Ghas............. 0 35 92 135 +13102 PF13260 DUF4051 Protein of unknown function (DUF4051) Aldam G, Mistry J gba Pfam-B_2194 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 90.10 90.00 24.70 23.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.93 0.72 -4.63 5 389 2010-08-27 08:24:20 2010-08-27 09:24:20 1 1 389 0 3 31 2 54.00 83 94.76 CHANGED MFIAWYWIVLIVLVVlGYFCHMKRYCKAFRQDRDALLEARNKLhRRssEEsStp MFIAWYWIVLIsLVVVGYFLHLKRYCRAFRQDRDALLEARNKYLNSTREETAEK.... 0 0 0 1 +13103 PF13261 DUF4052 Protein of unknown function (DUF4052) Aldam G, Mistry J gba Pfam-B_2197 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 220 amino acids in length. 25.00 25.00 61.60 61.50 20.30 20.30 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.49 0.70 -4.90 7 72 2010-08-27 08:59:02 2010-08-27 09:59:02 1 1 71 0 2 52 0 212.20 66 97.78 CHANGED LMKQLKLHINaHYKAILIFWhVALLIKGssoshDlKsI+luFLpDIhNNPSIAIhhFIVlSsFlIQ.DlFRLAVSFGVTRLQFFIGSlCYIlLQSAhFSFLQllhLQshhYpscshSLGupSlcQFFVQFLhYVTlAshFQssVIFppRFpWIGhulGuhFFlGLsSVhYutsGlKtLshpsotsLlsIP.FIhISIsLhllYhllSuIhIRKVSFE LMKQLKLHIpapYKAILIFWhVALLIKGshsAscLpGlKluaL.-IhNNsSIAIhhFIVsSVFlIQ.DlF.hsVSFGVTRlQaFIGuICaIlLQSAlFShLQllhLQshhYph.plshGppulpQFhlQFlFYsTlAChFQssllFppRFpWlGLhhushhhhuhsSshYutlGIKtLlFhsstsLl-IPaFIslSIsLhhlYIlhSulFIRKVSFE...... 0 0 1 1 +13104 PF13262 DUF4054 Protein of unknown function (DUF4054) Aldam G, Mistry J gba Pfam-B_2204 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 120 and 152 amino acids in length. 22.40 22.40 23.20 22.80 21.40 19.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.39 0.72 -3.79 59 275 2010-08-27 10:40:30 2010-08-27 11:40:30 1 1 248 0 36 227 20 106.50 27 80.09 CHANGED F+ttaPpFss...hP-stlphhls.A.phhlssp.........phuc.hhppuhtLhsAHhhsLsttstt........sus.ssu..hhoStssGplSlS.hssssssss.shh..hspTsYGppahpLhp ............................................F+ttaPpFss...............hscsplphhls.A.s.hlsps..........thsc.hhthhhtLhsAHhhsLtstsst......................................ssutssu...shoS..cosuplSlS.assss....s.....tss.ts..a.....aspT.YGtpahpLh........................... 0 6 22 28 +13105 PF13263 PHP_C PHP-associated Coggill P pcc Jackhmmer:O27523 Domain This is a subunit, probably the alpha, of bacterial and eukaryotic DNA polymerase III, associated with the PHP domain, Pfam:PF02811. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.61 0.72 -4.46 143 736 2012-10-03 00:45:34 2010-08-27 12:03:46 1 15 540 9 305 732 62 49.30 25 16.25 CHANGED spsNccAtphAcchslPhhuGSDAH.hspplGpuhThh.......stshp.....s...c-llpul+cGps ..................s.phApphs.lPhlsuSDAH..thp..plG..psas.h........................s....tthhthlht...................................... 0 98 213 275 +13106 PF13264 DUF4055 Domain of unknown function (DUF4055) Aldam G, Mistry J gba Pfam-B_2501 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 140 amino acids in length. 25.00 25.00 27.90 27.70 23.70 22.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.59 0.71 -4.08 30 180 2010-08-27 11:19:37 2010-08-27 12:19:37 1 2 157 0 27 167 145 136.70 29 31.58 CHANGED PLLsLAplNltHapsSADhcp.lahsupPphhlsGh..ssp..........tslpl.Gupush.LPp.sus.htalchousuls..+pshcchEspMhphGA+llppssss.cTuspupt-pssppSsLtshssslp-AlspuLcasApalG ....PLLsLAplNltHapspuDhpp.lahsupP.hhhpGl...spp........................pslsl.Gupssh.L....sp..s...us.ht.....alptsusult..+pshcchcspMhphGA+..l.lppssss..couspspt-pssppSsLtthssslp-AlspALphsAcah............................. 0 4 12 19 +13107 PF13265 DUF4056 Protein of unknown function (DUF4056) Aldam G, Mistry J gba Pfam-B_2502 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 355 and 380 amino acids in length. 21.10 21.10 22.10 71.80 20.70 19.50 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.70 0.70 -5.40 27 134 2010-08-27 13:34:41 2010-08-27 14:34:41 1 1 128 0 22 109 0 263.80 52 72.18 CHANGED hsshs.ls.sPpGLRPCCAFGYsL+sclhGlPVPFYplsNVl-s-sLGpH+YNDuhhus...susLlG..lusEpNGLlYTp+GGFIDlAHVRDTADhThYLFopIhs+LGppaplsLssELusRpIpa.ps...ssPhssp-RasLoAaLAApLAFpLAtWHEIAQWYGapSVsGFsEtlSAFSPEDLYSNhLGA+LAhslILpupshShptaspuhsphL.puLppLtAhscspT+ptFcplDGhWWsSp+RlP-KaLVL+RcYclupsRhPshsstp ..................s..ssP..GLRPCCAFGYsL+splhGlPVPFaplsNVl-lDsLGtH+YNsus.uh...ssuLlG....LScEpNGllYTcRGGFIDhA............HVRDTADhThYLFpplhspLGp.thplsLssELtsRpIpa..ps..o..sslsscERhpluA.lAAalAFpLApWHEIAQWaGhpSVsGFsEtsSAFSPEDLYSNhLGA+LAhsllLs............s.stspppaspshsphLcptLpcLtAps.pshTptphppLDGhWWsSpRRlP-KaLlL+RcYcLu.shLPs.ss.s-....... 0 2 7 16 +13108 PF13266 DUF4057 Protein of unknown function (DUF4057) Aldam G, Mistry J gba Pfam-B_2503 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 279 and 322 amino acids in length. 21.00 21.00 33.20 23.10 18.90 18.80 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.16 0.70 -11.98 0.70 -4.95 13 114 2010-08-27 13:36:17 2010-08-27 14:36:17 1 7 22 0 65 120 0 222.50 44 87.59 CHANGED RuTPVRKPH..TSTADLLoWs..EsPPssus.....usus.sutRsHQPSDGISKVVFGGQVT-EEAESLsK..RKPCSuaKhKEMTGSGIFussucs-uuEsuuuss.ss.KTslRhYQQussuI.SQISFupEESVSPKKPTSlPEVAKQRELSGTLcoEuDsKhpKQlSsAKsKELSGHDIFAPPPEIpPRsh.ss..Rthth+tshshucsss+shppushhsssAGu.Sph.hsp-sVlKTAKKI.asQKFsELTGNsIFKGDs...sPuSAEK.sLSsAKL+EMSGSsIFADGKu...poRDYlG.GVRKPPGGESSIA ....................................................slRtsH..tsTusLL.Ws.................................ss.ssh.sh.........pPu.tslp.hhh.Gu.lopcEspsLsK......+K.CSs.KhKEhTGSGIFsttupstss-su...sss....+Ts.+.aQth.ssl.SpISFut-tslSPKKPoolsEVAKQRELSGThpo.-s..-sKhp.+.QhSpAKsKELSG.psIFuPP.-..s+s......................t.t....t.........s..ss.....hs.p-sshKT.uKKI..spK..ht..-L.oGN.........sIFK.tD....ssuoA.E.K...LSpAKL+EhoGssIFADG.Ks....sRDhhG.GhRKPPGG-SSIA.............................. 2 13 45 55 +13109 PF13267 DUF4058 Protein of unknown function (DUF4058) Aldam G, Mistry J gba Pfam-B_2520 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 244 and 264 amino acids in length. 25.00 25.00 39.30 33.10 16.40 15.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.76 0.70 -5.25 21 75 2010-08-27 13:37:48 2010-08-27 14:37:48 1 3 27 0 25 78 0 216.80 37 96.42 CHANGED MtsPFPGMsPYLEpPslWP-VHppLIssluDtLsPQLpP+YcssI-cRlYhtsspps...lllu..l..PDVsVhcpp..........ssssssuss.ssspPlsVslshs-.l+psaLElR-Vss.ppVVTsIElLSPsNKRsGcGRt.sYp+KRpclLsStTHLlEIDLLRuGp.....shPh.hsshstucYpILlSRuscRPpA-lYshsLp-PlPshslPLpssDs-shlDLpsllpplYc+AuY-htIDYp...ppPsPP.Lssc-ssWlcphL ......M.sPFPGMsPYLEpsthWs-VHptLIsths-hLtspltPpYhstlp.hlh.tt..pts.......hhht..lPDlsVhppp.................ssssssss.s.stP.hslth.h...s..h+pthlElRplts.tplVTsIElLSPsNK+sG.spt.tYppKRpplhtS..sHLlEIDLLRtGp.................h...ss...ssYhlllSRupcRPt...s-las..hsLppslPshslPLp..Ds-shlsLptllpplYccutYch.tIDYp...p..s.Ps.Ls.p-.tsWlpth....................... 0 7 23 25 +13110 PF13268 DUF4059 Protein of unknown function (DUF4059) Aldam G, Mistry J gba Pfam-B_2521 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved DKT sequence motif. 25.00 25.00 43.60 43.50 21.20 21.20 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.41 0.72 -3.79 11 344 2010-08-27 15:00:50 2010-08-27 16:00:50 1 1 341 0 20 91 0 69.40 64 96.52 CHANGED MLlpIFuLYlpGLlLuslhllllullWlhaRAhp+hDKTu+ERQuaLYDhLMIuIhTIPILSFAhMuILLVl ..MLlplFSLYhcuLILToILVLIhLGIWIGLRAhSGVDKTA+sRQAHLYDMIMIGVLllPVLSFAVMSLlLVF. 0 1 3 11 +13111 PF13269 DUF4060 Protein of unknown function (DUF4060) Aldam G, Mistry J gba Pfam-B_2524 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There are two conserved sequence motifs: VEVV and SYVAT. 20.10 20.10 22.70 25.00 20.00 19.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.38 0.72 -4.05 7 128 2010-08-27 15:01:51 2010-08-27 16:01:51 1 1 118 0 14 64 0 72.90 57 90.07 CHANGED hI.RucpsPlthhAscsALstHhptYG-.GRQth.ssYolthcus+lsVEVVsR+pSYVATsMsGsR+LppLPG .hI.+.uc.ps..thhsscsAlstHpcpaG-.uRQtasosYpVhhcss+VsVEVVsRppSYVATsMIGsRpLppLsu...... 1 0 3 9 +13112 PF13270 DUF4061 Domain of unknown function (DUF4061) Aldam G, Mistry J gba Pfam-B_2526 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 90 amino acids in length. There is a conserved AFG sequence motif. 21.90 21.90 21.90 22.90 21.80 20.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.92 0.72 -3.89 14 156 2010-08-27 15:03:06 2010-08-27 16:03:06 1 3 79 0 89 134 0 85.20 58 39.73 CHANGED VsDV+cME+uLLsLLsDFHSGKLpAFGpsCohEQMp+lRE.QEpLA+LHF-Lsspt-chs-c...........ttpppspuppNMc+LlppLEpLS.SIpK ..........VoDVpEMEpGLLsLLNDFHSGKLQAF.G............p-C..S..hEQMEHVR-MQEKLARLHF-L.sphE-....sE-...............p++stuDpNl-pLLssLEpLs.SIpp................................ 0 20 27 54 +13113 PF13271 DUF4062 Domain of unknown function (DUF4062) Aldam G, Mistry J gba Pfam-B_2536 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 80 amino acids in length. There is a conserved SST sequence motif. 21.90 21.90 21.90 22.10 21.80 21.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.65 0.72 -3.96 28 368 2010-08-27 15:04:52 2010-08-27 16:04:52 1 88 282 0 170 346 35 85.70 28 9.79 CHANGED plFlSSTapDLp-ERpslhpsl.hchsahPhGMEh..F...sAu.DccphchhpchI.DcsDhYlLIlGsRYGShs....csu...hSaTctEY-aA ................................plFlSSTap..D.hptE......R...p........tlhc........s...l........hp..........h......s...........h.hs.h..th.Eh.....h.................ssp..sppp...hc.ls...h....cpl.cpsD......ha.....lh.llGsRYG.hs.......t.ps...................hSh.sp.Ea..A................................ 0 73 110 140 +13114 PF13272 DUF4063 Protein of unknown function (DUF4063) Aldam G, Mistry J gba Pfam-B_3026 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 95 and 123 amino acids in length. There is a conserved RRA sequence motif. 22.10 22.10 34.40 31.50 19.10 21.40 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.47 0.72 -4.24 16 129 2010-08-27 15:06:18 2010-08-27 16:06:18 1 1 114 0 24 92 1 88.90 52 81.40 CHANGED sRLssallsullLLhslullSPQQ...LPVllYKLSLlsLAAlsGYWLDRsLFPYARPuuYL....p......ctssh.hupu-aPls...pGYphlFsAAhLRRAlIVu.ushLuVuLG ..........PRLouWLlsollLhulIu.hsSPtQ...lPVVlYKLoLloLuAVLGYWLDRSLFP.a.A.RPsuah....h............................................................................... 0 6 15 20 +13115 PF13273 DUF4064 Protein of unknown function (DUF4064) Coggill P pcc Jackhmmer:O26620 Family \N 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.74 0.72 -3.90 50 1109 2012-10-02 01:14:40 2010-08-27 16:08:03 1 4 408 0 62 435 1 101.60 24 57.04 CHANGED KRTsEhlLulIGs...Ilu....ll...huhhhlh...l..u.h.h......u.s...........tp....h..........................................hhhshhullhsIl...uI......luull..lp+....cs..p...luGllhlluullslls .....................RhsEhlLuh...Iuh...llt....ll...hslhshh...h.h.hl......us................sshtpphttphs..t..................sp.hhshhpsh.h.hslhlhh.l.l.shll....uh......luhl.t...hpt....pp.+..luGlLhlIuuIlsh..s................ 0 17 32 58 +13116 PF13274 DUF4065 Protein of unknown function (DUF4065) Bateman A agb Jackhmmer:B5ZAK8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and viruses. Proteins in this family are typically between 155 and 202 amino acids in length. 25.70 25.70 25.80 25.70 25.50 24.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.83 0.72 -3.35 264 905 2010-08-27 15:57:42 2010-08-27 16:57:42 1 8 726 0 195 777 25 106.70 22 56.07 CHANGED LpKLlY.a.upsht....lth..s....p............s.......lh.s..-ph.pAapaGPV.......h.....p...lYpph+.........tht.....................................................lspppt..c....ll-...pl....hppau....ph...su.hpLpch.oH.....................t.ts.W.........pps ...................LpKLlYaupshtlth.hs...c...................s........Lh.s...pph.pAWpaGPVhsp..........lapth+.........tht......................................................................p.pplspppt.....c.........lls...pl....hppau.......ph..............ss.hpLtch.oH.tp.ts.W...h............................................................................................ 0 85 142 174 +13117 PF13275 S4_2 S4 domain Bateman A agb Jackhmmer:B5ZAQ9 Domain The S4 domain is a small domain consisting of 60-65 amino acid residues that was detected in the bacterial ribosomal protein S4. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.95 0.72 -4.38 202 2674 2012-10-01 23:15:27 2010-08-27 17:10:54 1 5 2633 1 435 1579 178 65.30 38 73.24 CHANGED lpl.ps.-aIpLsQlLKhssllsoGGpAKhhlt-s..tVtVNG-sEsRRG+KlhsGDhVph......s..spphplh ......................l.p.paIpLsphLKhtGlscSGGpAKhhls-s..pVhVNGp..lEoR..RG+KlhsG.DhVph...s.stphpl.............. 1 129 267 362 +13118 PF13276 HTH_21 HTH-like domain Bateman A agb Jackhmmer:B5ZBQ5 Domain This domain contains a predicted helix-turn-helix suggesting a DNA-binding function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.02 0.72 -3.96 515 11498 2012-10-04 14:01:12 2010-08-27 17:18:47 1 58 2749 0 1982 9551 1161 59.50 26 23.43 CHANGED p....h.c.p.t..h.h.pt.............Ipplap...pp...pt...........pY...GhR+l.pt.tL.c.....c................p....h..t...h........ls+++VtRLM...+.p.hGL....pu......h.h...+..++t ...............................ttp......l.p.pt......ltplap....cp...pt...........sY...GhR.+lt......t..tL.p......c................p..........t...h.......ls++.+VtRLM...+.phuL....ps.....p..h..+.tc..................................... 0 514 1189 1570 +13119 PF13277 YmdB YmdB-like protein Bateman A agb Jackhmmer:B5ZC04 Domain This family of putative phosphoesterases contains the B. subtilis protein YmdB Swiss:O31775. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.62 0.70 -5.36 140 1315 2012-10-02 19:15:56 2010-08-27 17:30:23 1 3 1256 8 347 881 1521 250.70 47 94.97 CHANGED LFlGDlVG+sGRpslpcpLPpL+cch.plDhllsNGENA.A.uGhGlTtchsccLhssGlDslThGNHsWDp+Elhsal-p.ps.+........llRPhNaPp....ssPGcGht......lhcs..sGp+ltVlNlhGR.lFM......pshs....sPFpsh-pllp...ph.......ttp..sshl......lVDhHAEATSEKhAhGaalDGRlS......sVlGTHTHV.TADppILPsGTAYlTDsGMsGsa.cSVIGhcp-tslc+FlothPp.R...FpsApG..ps..pLsGlhl-hD-pTGcAtpIphlp .................................................LFlGDlVGcsGRcslpphLPpL+pca.c.shsIlNGENA......A......s.GpG...lT.....ccIhcpll.csG.lDslThGNHsWDp..+.E.lh-.FI-c.t.p.+........llR.......PANaPc......ssP....Gp..Ght........hlps.....Ns....tcl..uVlNL.GR....sFM.........s.sl-..sPFc.ps-pllpph..........pcpsshI......hVDFHAEsTSEK...A.........hGaaLD.....GRsS......AVVGTHTH.VtTADpRIL.....P................pGTAYlTDlGMTGs.Y.DullGhc+-tllc+FlTslPp...+...hps.s.p.G...c..s..hLsGVll-lDc.p..oG+ApcIcpI................................................ 0 143 256 301 +13120 PF13278 DUF4066 Putative amidotransferase Bateman A agb Jackhmmer:B5ZBC5 Domain This domain contains similarities to other amidotransferase families such as Pfam:PF00117. Some members of the family lack the likely catalytic residues. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.82 0.71 -4.98 51 5657 2012-10-03 00:28:14 2010-08-27 17:42:27 1 16 1638 20 2096 9111 779 160.80 25 54.62 CHANGED Lsa-.GFs-lDhhhshslLspsp........t.t...aplplsusssp.Vp..........Sh.sGlsl.tsptsLpp...sspsD.sVL....lsuGhp.....pthssc.stlhspL.phc................stt...phluutCoGshlLAchGLLsshsAsTchpspshlppttspVh...sp.asssG.........slsTAG.Gsluuh.Luhallt ......................................................................................hhsshphhshsssh-.sh.thss..................................t....ha...p.h...p...l..h...u......s......s.....s......s...l.p.........................................s.s....s.....G.....l....p.....l......s..s....t.s..h..ss................hs..p.....s...D....h.....ll.................V..s.G.uhs....................t.t.h..ts...t....tl....l.....s.....h....l...p.p.tt......................................tpu.......thlsulC.o.G.u..a.l.L.At.A.G.L.L..c...G...+..c.A..T.....T.......H.......W............t..........h............h.......s.......t.......h...t..p...t......a........P....p....l....p....h...t...............s...t.....h...a...l....p...D.u............................................................s.lhTu.u...Gss...A..ulDhsLtll.................................................... 0 461 1105 1633 +13121 PF13279 4HBT_2 Thioesterase-like superfamily Bateman A agb Jackhmmer:Q7P2C3 Domain This family contains a wide variety of enzymes, principally thioesterases. These enzymes are part of the Hotdog fold superfamily [1]. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -11.44 0.71 -3.66 179 4845 2012-10-02 20:54:35 2010-08-31 15:04:51 1 19 2807 37 1526 6021 3343 126.80 18 76.66 CHANGED Vt.s-.hDh................................t...Hh....s..sspYl......................t.h....hpt.up..............phht...............hsh.....thhttt...............th..................uhhhsps....plpah..p........................plp.hs-p.h.........lp..............hpl..hs........hsppphthhhph..............hp..............tsp...h...............Apspthhhhh.shpp.................................................................................ssshP......pt............ltptl ..............................................................t-.hD............................hhs.Hl....s....NupYh......................pa....h-p.ARh...............................................................phh.p..................phsh..........hp.hptp....................................s.h.............................shl.lsch........phpYh..p..........................lt..hs-p..l..............plp.................spl.....hp....................hssp..php.hp..hp..l............hp.................................ssph.....s.........................................up..uph.h..hlhl..-hps...p................................................................................................hs.....................h.h.......................................................................................................................... 0 432 883 1254 +13122 PF13280 WYL WYL domain Bateman A agb Jackhmmer:Q7P457 Domain This presumed domain is around 170 amino acids in length.\ \ It is found to the C-terminus of a DNA-binding helix-turn-helix domain. This domain may be involved in binding to an as yet unknown ligand that allows a transcriptional regulation response to that molecule. There are a number of proteins that contain two tandem copies of this domain such as Swiss:Q47P13. This suggests that this domain may form a dimeric arrangement. 25.80 25.80 25.90 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.14 0.71 -4.39 746 6059 2010-09-01 09:50:02 2010-09-01 10:50:02 1 38 2680 0 1655 5249 711 156.10 17 51.55 CHANGED p.h...l...p.........t.......lt....pAltppc...tlph.....pY..ps..................................pptp.....p................R.p....l..........c.........PhtL.h..h..p..p..s..p..WYL..hu....h........s..p.............................p.......c...........p.........s................h..R...........tF+..lcRl....p..s..lp....hh..s.....p..................................ph..p.......hs...sh....c.......l......p........................p..hh.p......p........h..h.....t..hht................t...hp..l......pl............c..h.........s...sps...sph........lh....p....p.......hh.......t....s.............t.p..........................lt...............p.....ps..........ss.t.h........h.....hp.h...p.....h..t.s.....p.....t.....h.h.thlhua.G..s.p..lcVlp....Ptp..L+ppltpphpp.hh ..............................................................................................t..httltpAltppp....hlph.....p.Yps.................................ttptp......p......................................R.p...lp.......PhtL.hh........t...s..s...p.....WY....L..hu..a........st............................................................p......c.............p...........p........................h..R.........sF+..l..sRI....p..s...lp....hh..s..p.............................................th.t..............t......s.........h..t.................................p..h.h.p......p...................t.....................ph..hl............h.h.........t....th......h..h........h....t.............h..................t......................................................h................t.......t..........pt.......h..........h.....hp.h........h.....t....................h..h...h...l...ht..hu...............t..htllt.....P.t....lht.hht.ht....................................................................................................... 0 652 1199 1458 +13123 PF13281 DUF4071 Domain of unknown function (DUF4071) Aldam G pcc Pfam-B_2008 (release 24.0) Family This domain is found at the N-terminus of many serine-threonine kinase-like proteins. 24.10 23.40 24.60 24.10 24.00 23.30 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.23 0.70 -5.61 14 275 2010-09-01 12:57:33 2010-09-01 13:57:33 1 13 141 0 144 258 14 309.10 37 31.02 CHANGED LhYcLGVRESFphppNIlLas-ss.p...-sh.tl+..h........ushphlPYhlsspu.......thhps.....pttcthh..s.th..sh.shsLsstlhpLlpss.phppstah+Ephhs-lRpA...R-phsu..cpLpctLpclct+LDssplLos-IlhsLhLSYRDlQDYsuMlcLV-cLpsls.h..lssss.lpapYAFALNRRNpsGDREKALpllhph.......lppccshusDhhClhGRIYKDhFhcSshpcpso.....LcpAIcWY+KuFEsp.PstYuGINhsTLLhhsGpcFpps.ELpplu..hhLssLlG+KGsLsphpDYWDVATahElulLApDhtKulpAuEpha+L+sPsWYL+SThpNIpLlcch++phpt....s.c.phhpFWh-h .....................................................LhYHLGVRESFsMtpNll.Lhppts......th.uL+..h..........usYhhlsYhlssps...............phhsspst.h.....pthsphh..s.........t........hhsL.sRhhp..lLpsh.php..S..p..t..ah+.E.phhpD.lRpA....Rphapu..........p....pLtttLtclptRlD.s..c.lLos-llh..slLLS.YRDlQDYsuhlcLV-sLphlPshc.hsp....p.pl..pFpYuFALNR...R....N..sG.DR...t+ALplhl.h............lpp.p...tp.....sssD..hhCLsGRIYKDh...Fl.cS..sh..p..Dsps..............h-pAh.pWY+...KuF-hp..s..........sh...auGINhAs..L...Ll......huG.......p.....p...Fc..............so.E.............Lp.p.lG........hpL.ssLlG......+...KG.....s..........lpph.ppYW-VuhahtsslL....As...Dhhc.slpAuEpha.KL.p.sPhW..........Y.Lc.S.....hhpslhlhp+Ftt..t......s...phhpFWhc.................................................................................... 0 29 48 89 +13124 PF13282 DUF4070 Domain of unknown function (DUF4070) Aldam G pcc Pfam-B_2005 (release 24.0) Domain This is a bacterial domain often found at the C-terminus of Radical_SAM methylases. 21.60 21.60 21.60 21.80 21.50 21.50 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.74 0.71 -4.66 55 231 2010-09-01 13:08:55 2010-09-01 14:08:55 1 4 185 0 116 262 53 141.90 28 27.44 CHANGED TsLWcRLc+EGRLh.......tpssssQ.sshhNFlPshPh-clsptYhcshhpLY-PcpYhcRshphhhphss...sptph..........shtsL+AlhplhW+.Glhp.cpRhpFW+hLhshlh+pPpsl.hhlslsshucHFhcap.phlpcplcppl ...................TsLacRLc+EGRLl.................pstsssp..s..sthNFlP.p.hPh-clhstYhcshtplYsP..ctahcRhhphhtphtss....t..h.t..................s.htsl....puh....hpl.....ha+......G.l....ht..ctRhtFW+hhhthlh..+...s...P..t..h.hthhlshshhscHahpat.p.shpt.t...h................................................... 0 33 79 103 +13125 PF13283 NfrA_C Bacteriophage N adsorption protein A C-term Aldam G pcc Pfam-B_2015 (release 24.0) Family The function of this domain is unknown but it is found at the C-terminus of bacteriophage N4 adsorption protein A, in association with an N-terminal region of TPR repeats. 25.00 25.00 34.10 45.50 24.90 23.80 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.88 0.71 -4.91 14 355 2010-09-01 13:23:58 2010-09-01 14:23:58 1 21 350 0 31 204 0 175.00 65 19.09 CHANGED sps.susTGhsTsQGulGsRaKPlpstNLllssp...RhhtlG.shutsDWLLRhuYS....ss.Gs..DL+VstPsWhohplYsEuuaalppsphh.sssEuchG+oaRlsuhss+LslhPassluusaDostssp...hAlGuGsGlshRaWFREscYsAPtSalDlolQYRhpLsss-.RucGlhhcAs .................G.ENGVMMPVKNPMSGTGLRWKPLRDQIFFLAVE................QQLPLN.GQNGASDTMLRASASFFNGGKYSD......EWHPNGS..GWFAQNLYLDAAQYIRQD.IQA.WTADYRVSWHQKVANGQ...TIEPYAHVQD.N.........G....YRDKGTQ.....................GAQLGGVGVRWNIWTGETHYDAWPHKVSLGVEYQHTFKA..IN..QRNG......ERNNA.... 0 2 9 18 +13126 PF13284 DUF4072 Domain of unknown function (DUF4072) Aldam G pcc Pfam-B_2021 (release 24.0) Family This short domain is normally found at the very N-terminus of Hyrdrolases Pfam:PF00702. 21.90 21.90 21.90 23.70 21.60 21.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.11 0.72 -3.78 23 106 2010-09-01 13:49:34 2010-09-01 14:49:34 1 2 105 0 40 85 2 47.80 41 16.54 CHANGED LllQSss.LusschcsLssLupusplpp.lsspAhRltsA..ssspRs-lc LVlQSsuPLSsuHt+sLsALucusclst.lsspAhRltsA..ssupRsDlD...... 0 5 19 30 +13127 PF13285 DUF4073 Domain of unknown function (DUF4073) Aldam G pcc Pfam-B_2039 (release 24.0) Family This family is frequently found at the C-terminus of bacterial proteins carrying the family, Metallophos Pfam:PF00149. 25.00 25.00 25.30 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.92 0.71 -4.80 8 102 2010-09-01 13:49:58 2010-09-01 14:49:58 1 6 101 0 3 91 0 136.80 62 22.17 CHANGED LNLPDWAGKKKIsG.GDcKGFTVVNTGGIETGWMSAGPNGGEKTAPDGhSFKQGLQVKAYGNDVVVTAYDYKRDKsIKKLLISDuKIAQMAPDVTADDsKNVIVGATEYMEYoVEGTNEWpTYs.usPPKFDGDKlVYVRHKGEMNLEPGLTQLLRFSs ..................LNLPDWAGKKKItG.G.Dc...KGFTVVNTGGIETGWMSAGPNGGEK.TAPDGYSFKQGLQVKAYGs.DVhVTAYDYKRDK-IKKLLISsSKIAQMAPN.VTADDoKNIIVGATEYMEYSl-GTsEWhTYs.uNPPKFDGDKhVYVRHKGEMNLEPGLTQLLRFS.s.............. 0 1 2 2 +13128 PF13286 HD_assoc Phosphohydrolase-associated domain Aldam G pcc Pfam-B_2016 (release 24.0) Domain This domain is found on bacterial and archaeal metal-dependent phosphohydrolases. 22.20 22.20 22.30 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -10.30 0.72 -3.71 344 2449 2010-09-01 14:47:52 2010-09-01 15:47:52 1 4 2227 6 684 1967 1173 91.00 22 22.24 CHANGED lshssp.httthptLKpFlapplYcpsplpphpt+uppllpcLFph....a...hs......c...sp.hLPspaptth....ppssp.........tptRllsDYIAGMTDpaAlchapc .......................................................................................h..hs.thttththL+phhhphlhpp..p.h..th.p.h..p..s.ppllppLhch....h.....hs...........................................c...sp.hLs..t..p..h.t..thh...............................pttt...................t+hRllsDYIuGMTDpaAhc.app............................ 0 235 468 581 +13129 PF13287 Fn3_assoc Fn3 associated Coggill P pcc Jackhmmer:O26995 Repeat \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.10 0.72 -3.88 342 282 2012-10-03 16:25:20 2010-09-01 16:25:02 1 69 219 0 75 1442 337 63.70 33 7.78 CHANGED VsLo.o..............spsA.....sIYYTlDGosP....T.s........pSsh.......Y...........s.sPIhl......sp.........ss.s...............................l..KAhAhcpu....hssSslsohsa ....................pLp..o..t...stsu......pIaY.Tl.DG.S.sP.......s..p.............................pSht.........Y...........................s..ss.I.hl......sc......ss..s...................................................................................................l...KAlAhcpu.....hppSslsoh........................................................... 0 29 55 68 +13130 PF13288 DXPR_C DXP reductoisomerase C-terminal domain Bateman A agb Jackhmmer:Q7P3U3 Domain This is the C-terminal domain of the 1-deoxy-D-xylulose-5-phosphate reductoisomerase enzyme. This domain forms a left handed super-helix. 27.00 27.00 27.20 27.30 26.70 26.50 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.49 0.71 -4.10 236 3405 2010-09-01 15:25:30 2010-09-01 16:25:30 1 7 3273 79 826 2642 1652 119.10 39 30.61 CHANGED LGsPDMRhPItYALsaPcR..h.........s..s......ss.ptL.Dhs..pl.up.LsFc.pPD....hc+FPsLpLAh-Ahct..GG..shs.slLNAANElAVpuFLpt+IsFh-Iscllcpslpp..........hps.t..................ssL-sllpsDphARphA ............LGsPDMRhPIuaulua.PcR...l..............s...s......s.s.psL.D..hs......cl....u........s.Ls..Fp.tPD....hc+FPsLcLAh-Ahct......Gs..shs.slLNAANElAVsAFLsp.cItFh-Isplltpslcp...............hsht........................sssl--lLp.hDt.ARch....................................... 0 289 560 710 +13131 PF13289 SIR2_2 SIR2-like domain Bateman A agb Jackhmmer:Q7P447 Family This family of proteins are related to the sirtuins. 24.50 24.50 24.50 24.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.94 0.71 -4.31 164 1624 2012-10-03 09:55:27 2010-09-01 16:29:03 1 38 1214 0 458 1490 86 152.60 17 27.46 CHANGED ptllTTNYDsllEpuh.....tptstthh...........................hhspshtt.....................pssphhlhKlH.Gsl............t....s......................psl.llopsc...........Y...tphh......................psh.htphlp...shh.t...sp.....sllFl..GaS..hsDs.slppll....pplhpphss..............pt....ahlh.pst.................tttpt......hhpphslphl .............................................h..llTTNYDphl.E...psh...........pptshth..........................................h.hpsht........................tpthhtlhKlH.Gsh................................................................................................psl...ll..o..p..s.c.................................Y....tchh........p..........................................................ps...ht..phlp.........shh..p.....sp......sl.lFl..Gao...h..s..D...plp..t..lh...pphhpphtp................h.........ahhh.tt........................................................................................................................................... 0 165 285 360 +13132 PF13290 CHB_HEX_C_1 Chitobiase/beta-hexosaminidase C-terminal domain Coggill P pcc manual Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -9.68 0.72 -4.19 268 225 2012-10-03 16:25:20 2010-09-01 16:33:34 1 57 161 0 67 1534 350 68.50 26 8.39 CHANGED sssG.php................st..plslps....sssss.....sIhYThD.GosPoh.........t...S......hY...............t...............s..l.l......t.ss.....ss....l+shuhsss.tps..u.tlh..o ....................t.............pstlplss............sss.ss.....pIaY.ThD.G.o.pPoh..........................p.......S...........hY..................p..t..................................P....lpl..................s..ts........................ss...............lcshshp.s.sp................................................................ 0 33 48 57 +13133 PF13291 ACT_4 ACT domain Bateman A agb Jackhmmer:Q7P344 Domain ACT domains bind to amino acids and regulate associated enzyme domains. These ACT domains are found at the C-terminus of the RelA protein. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.22 0.72 -3.64 182 5583 2012-10-02 00:29:19 2010-09-01 16:38:22 1 27 3945 4 1173 4363 2094 78.80 25 11.21 CHANGED sspp.....apsslplpu.hDcpGlLs-lspslupp.psslps.lshps.........pthsphplslpVpshppLpplhppl+plpsVhpVpR ..........s..tta.splclpu.hs.R.pGlLs-lspslusp..psNlhu.lsscsp..p....sphushplslp...l..p..s....h..p....p.L..spllp+l.+plss.VhpVpR........................ 0 347 739 993 +13134 PF13292 DXP_synthase_N 1-deoxy-D-xylulose-5-phosphate synthase Mistry J jm14 Jackhmmer:Q7P481 Family This family contains 1-deoxyxylulose-5-phosphate synthase (DXP synthase), an enzyme which catalyses the thiamine pyrophosphoate-dependent acyloin condensation reaction between carbon atoms 2 and 3 of pyruvate and glyceraldehyde 3-phosphate, to yield 1-deoxy-D- xylulose-5-phosphate, a precursor in the biosynthetic pathway to isoprenoids, thiamine (vitamin B1), and pyridoxol (vitamin B6). 21.20 21.20 21.20 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.84 0.70 -5.46 331 4453 2012-10-02 16:07:47 2010-09-01 17:03:14 1 13 3512 8 1043 7165 5251 238.80 43 43.01 CHANGED L-pIssPpD.L+pLshppLtpLApElRphllcsV.Sp.....oGGHLuuNLGVVELTlALHhVFsoPc.D+llWDVGHQuYsHKlLTGR+.-papolRphsGlSGFPcRsESt.aDsFssGHoSTSISAALGhAhA......pc.l.p....G...p.........s.....ppVlAVIGDGAhTuGMAaEALNpA..Gph.p.p..sllVlLNDN-MSIu...........NVGulu.paL...sclhs...sth...Ypph+ptscphl.p.....hh.......hhphhc+h....ccshKshh...h....s......slFE-hGhpYlGPlDGHDlppLlpsLcps+......s.....h....cG.PlllHVlT .........................................................................................................................ttlpsP.t-L+.tLsh......ppL..pLu...p...ElR.p.h...l.l.p...s.V...S.p.....o.G.G..H.....huusLGs.........V.........E..L.........T.........l......A.....L.........H.........h.........V.........a..........s..........o..........P.........h........D.p..llW..D.....V.....GH....Q....u.......Y....s.....H.....K...I......L....T.....G.....R......c........c....p.....h.....t.....T......l....R......p.h.....s....G.......l....s....G...F....s....p......R.......s......E...S.......p..a.D......s......h.....s.....s.......G.H.....o...S..T.....S....I.SA......u.l.GhAh.A......tc..hp....G..p....................p.....cps.l.uV..I..GD...GA............l.TuGM.AaE.A.h.Np.s....G..t.h...c.t...........s.hl..l.l.L.ND.N...-...M..S..Iu.....................s..V..G...u...l..s.........p..a..L........u..p.l.h.......s.......s...ph...........Y.p..p..h.+...p.th..c..phh..p.................tl.......h.hphhc+h..............cc.t.hKsh..hs........s..sslFE-L..GhpY..lGPlDGH.slttLlpsLcph+...............p.........h.....p.G..PhllHlhT....................................................................................................................... 0 343 706 895 +13135 PF13293 DUF4074 Domain of unknown function (DUF4074) Aldam G pcc Pfam-B_2050 (release 24.0) Family This family is found at the C-terminal of Homeobox proteins in Metazoa. 22.30 22.30 22.60 43.30 22.00 19.10 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.13 0.72 -3.75 14 230 2010-09-01 16:25:41 2010-09-01 17:25:41 1 5 74 0 87 199 0 64.30 67 16.92 CHANGED PsoGPulFGLsHL....PHssossMDY.sGAssMusupH.H........GPCDP..HP.TYTDLosHH.sSQGRIQEAPKLT .............PsSGPulaGLsHL....sHssSushDY.sGAsshuuspH.H........GPCDP..HP.TYTDLouHH......sSQ...GRIQEAPKLT....... 0 3 10 35 +13136 PF13294 DUF4075 Domain of unknown function (DUF4075) Aldam G pcc Pfam-B_2028 (release 24.0) Family The members of this family are putative mature parasite-infected erythrocyte surface antigen protein from Bacillus spp. 25.00 25.00 65.40 27.80 17.80 17.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.81 0.72 -3.94 12 77 2010-09-01 16:37:33 2010-09-01 17:37:33 1 2 76 0 3 33 0 77.00 93 46.08 CHANGED KETKEIFSKKKlEstEKPETIEIQAVSPKVDELKAEEEPVVAEDGGMKEARELFMKDSNAEEKKTEAYIELKQDKEEKK ..KETKEIFSKKKVEPtEKPETIEIQAVSPKVDELKAEEEPVVAEDGGMKEARELFMKDSNsEEKKTEAYIELKQDKEEKK.... 0 0 1 1 +13137 PF13295 DUF4077 Domain of unknown function (DUF4077) Aldam G pcc Pfam-B_2043 (release 24.0) Family This is the N-terminal region of methyl-accepting chemotaxis proteins from Bacillus spp. The function is not known. 25.00 25.00 373.30 372.70 21.40 18.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.39 0.71 -4.55 4 74 2010-09-01 16:52:32 2010-09-01 17:52:32 1 1 74 0 2 55 0 175.00 95 34.95 CHANGED MEWLKRTCFSNLEKESQKNHLLLFITICSFFLGIIAIGYYGYIFTERAIAFWhCGISVVVFGTLhTFIcSMEuhYKYIMTFMLLhMSFIMVQAFNESPAVFQMVYFTLAVSLIYLSERLllILGGVAVVlTFILCSYWPEQFFAYTAuSEAANFASLLAIVTIAMWGVTKIGSNL MEWLK+TCFSNLEKESQKNHLLLFITICSFFLGIIAIGYYGYIFTERAIAFWlCGISVVVFGTLhTFIcSMEohYKYIMTFMLLhMSFIMVQAFNESPAVFQMVYFTLAVSLIYLSERLllILGGVAVVITFILCSYWPEQFFAYTAuSEAANFASLLAIVTIAMWGVTKIGSNL 0 0 1 1 +13138 PF13296 T6SS_Vgr Putative type VI secretion system Rhs element Vgr Aldam G pcc Pfam-B_2052 (release 24.0) Family This is a family of putative type VI secretion system Rhs element Vgr proteins from Proteobacteria. 22.40 22.40 22.50 24.90 22.10 21.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.20 0.72 -4.19 159 943 2010-09-02 08:37:27 2010-09-02 09:37:27 1 14 320 0 249 1034 26 106.20 38 12.58 CHANGED S+phtG.ss..hNpLhhDDssGQ.pspLuo..sa.upSpLsLGaLsct..tsptctphRGpGFELRTDuaGAlRAupGLhloocspstAt....up.LDhspshppLppAtphhcsLups.A ............o+phtuss..hNpLhhDDssGp.+hpLuo.....sa..upSQLNLGaLsct..tsp.ptp.RGcGFELRTDuaGAlRAupGlhlSocspsp....Ap....uphL....Dhstuhp.....LpputphhcuLupsA.................................... 0 24 77 152 +13139 PF13297 Telomere_Sde2_2 DUF4076; Sde2_C; Telomere stability C-terminal Aldam G pcc Pfam-B_2053 (release 24.0) Domain This short C-terminal domain is found in higher eukaryotes further downstream from the Sde2 family, Pfam:PF13019. It is found in all Sde2-related proteins except those from fission yeast, fly, and mosquito. Its exact function in telomere formation and maintenance has not yet been established. 21.80 21.80 21.90 23.80 21.20 21.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -8.83 0.72 -4.70 33 282 2010-09-02 08:39:50 2010-09-02 09:39:50 1 10 133 0 179 257 4 59.10 58 13.20 CHANGED lDLssasSscELE.sLGl-RLKptLtuhGLKCGGTLpERAsRLF.lKupsh-clD++lhAK .....lDLssFoShE.....ELt....sLGL-RLKsALhALGLKCGGTLpERApRLFSsKGpsh-plD.sLhAK....... 0 62 92 138 +13140 PF13298 LigD_N DNA polymerase Ligase (LigD) Mistry J jm14 Jackhmmer:Q05W82 Family This is the N terminal region of ATP dependant DNA ligase. 25.00 25.00 34.50 27.40 21.80 20.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.55 0.72 -3.93 134 675 2010-09-02 12:44:19 2010-09-02 13:44:19 1 11 606 8 296 710 139 106.30 50 16.03 CHANGED QcH.....cAp+..LHYDFRLEh-GV.LtSWAVPKGP...oh-PssKRLAlp.sEDHPlsYssFEGsIPpGpYGuGs...VhlWDpGsac.sh.............pstp............th..............pcG.clphpLcGc+Lc.G.casLlR ...........QcHcApR..LHYDFRLE....hDGV...LtSWAVPKGP....ShcPs..............sKR..LAVc.....sEDHPL-YusF....EGsIPp..GpYGuGs...VhlWDpGsap.s......................s.tp...........sh..................................cpG.clphpLcGc+LpG.pasLlR...................................................... 0 77 177 244 +13141 PF13299 CPSF100_C Cleavage and polyadenylation factor 2 C-terminal Aldam G pcc Pfam-B_2065 (release 24.0) Family This family lies at the C-terminus of many fungal and plant cleavage and polyadenylation specificity factor subunit 2 proteins. The exact function of the domain is not known, but is likely to function as a binding domain for the protein within the overall CPSF complex [1]. 22.30 22.30 23.40 22.50 20.80 20.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.07 0.71 -4.05 58 300 2010-09-02 13:14:05 2010-09-02 14:14:05 1 16 262 0 216 303 3 149.10 29 18.79 CHANGED V+LscsLlppL..+Wppl......cshpVutlsGpLt.............................................................................ht..t.p..pttsptptpsthchlss.....p.s..ssthss.spslalGDlRLuDL+chLts.....pG..hpAEF+.u-GsLllsst...........VuV..RKsus...............GcltlEGshs.....................ssaatV+chlY-tL ..................................................VpLpcsLlppL..papps......cshplshlsG.Lt..................................................................................t.....ht...............t.t..sppp.....shlss................h.s..ssphss..ppslal..G-lRLuDhKphLhp.....pG....h.pAEFp..tGs..L.l.s.sst...........................................VuVR+sss...............Gc.ltlEGshs..............................psaYplRchlY-th.................... 0 72 116 178 +13142 PF13300 DUF4078 Domain of unknown function (DUF4078) Wood V, Coggill P pcc Pfam-B_3305 (release 24.0) Family This family is found from fungi to humans, but its exact function is not known. 25.00 25.00 26.80 26.80 22.30 18.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.01 0.72 -3.86 34 194 2010-09-02 13:35:35 2010-09-02 14:35:35 1 4 176 0 149 186 0 87.80 36 21.56 CHANGED ppMpplAc+RD+s.T.P..phHY-ust..ElRs+GsGFYuFS+DEcpRccQM-cLpptRpc.TpccRc.pRcct+tcRcchhc-Rhcclcpc+ .......................h.phpphtcccccshp.P......sshHY-s.t....EsRs+GsGaYuFS.p.DE-pRpcQMcpLcphRc.p.TpcpRp....cR......c....p....h......+ccR....cthlcpRhtclcp++............................. 0 49 73 117 +13143 PF13301 DUF4079 Protein of unknown function (DUF4079) Mistry J jm14 Jackhmmer:Q05X76 Family This is an uncharacterised family of proteins. 25.40 25.40 26.10 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.20 0.71 -4.26 90 277 2010-09-02 14:02:26 2010-09-02 15:02:26 1 3 104 0 136 293 83 147.20 27 80.26 CHANGED ahth...........lHPllMhll.aslsshshhLGhphRppRstps.......................................................................tphtpLl...pschpt....hHaphu.hlLshhslhslGGhsssahpssp....lFhus.HhasGluhssLhhhSsulsPth..p....up.p.thRplHlsLNslslhLFhhQulTGhcllhcl ................................................hhHPhhhhhh.hshshh.shhhGhphRphR..tt...........................................................................................h..hh.......hhph+ptHhphushhls.hhslhhlGG.hssshhpssp......la.us.HhhsGhshssLhhhusulssth..psp...p.hhRt.....hHlhLNshhhhLFhhpuhoGhpll.ch........................................................... 0 30 94 126 +13144 PF13302 Acetyltransf_3 Acetyltransferase (GNAT) domain Bateman A agb Jackhmmer:Q7P3G3 Domain This domain catalyses N-acetyltransferase reactions. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.03 0.71 -4.02 406 20736 2012-10-02 22:59:21 2010-09-02 16:53:55 1 153 4229 65 4844 27191 2993 139.30 19 68.41 CHANGED RLhLRs.h.p....s...Dhsslh.phh.s..s.....sc.lh....ca.....hs.....shppspphl....t..............htt.http....s.hhh............aslt..........................t+s....s...........t.................p.....h.lGh.hul.................ht.............................tspsE.lG.ah.....ltps.hh........Gp.GausEAspshh......pas.....h.p....phs......h........................p...................pl.huhhts..pNhsStpl.hc+hGhp ......................................................................................................................................hhL+...h..p.....p.....D....h....t....t...h....h....phh...s.......s.................p...hh........pa...........h..h.................sh.....p.p....h..p..p...hl.....p...........hhtt..ppt..................hh......................a.s.lh..............................................................................................pcp...s...................t..............................................p..................h..lGh..lsl...........th....................................................tppps-...l.G....a.h........l.s...tp...at.....................Gp...G.h.u......s....-Ah.pth.l................cau......a..p......................phs.................l.................................................p..................+l..hh.ps..t.s........pNh....sS...t+....l..h.c.+.hGF........................................................ 0 1518 2985 4028 +13145 PF13303 PTS_EIIC_2 Phosphotransferase system, EIIC Bateman A agb Jackhmmer:Q7P503 Family The bacterial phosphoenolpyruvate: sugar phosphotransferase system (PTS) is a multi-protein system involved in the regulation of a variety of metabolic and transcriptional processes. The sugar-specific permease of the PTS consists of three domains (IIA, IIB and IIC). The IIC domain catalyses the transfer of a phosphoryl group from IIB to the sugar substrate. 27.00 27.00 27.00 27.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -11.97 0.70 -5.58 67 1977 2012-10-01 19:13:17 2010-09-02 17:03:21 1 3 895 0 191 1006 7 317.10 29 92.29 CHANGED hcsLsGhAhullsuLlsusllpplsphlhh...........s...................hlhplsslspthhusulGsslAhthphsslhshusshAuhlGussh.h.....................tth...hhuhGs.lsshlsuhlAshlsphls....thsslclllhPllshhluuhlGhh.ltPhls.hhphlGshIsssoshpPllMullluhlhulllsoP.lSSsAlulAluLsG.....................luuGAAslGssushh..shshhoh+.NshGshlAhhlGosKlQhsNll+pPh.lhlPshlsuAlsGslushh........slp..ssssuu...GhGhsGLlGPlssh.................s.us..ss.hlhhllhhhllPhlluhlhthlh.........................+phthh+ss-.hpl ................................................+sLuulAsuhls.uLlssAlLttlhth..hhth..........p..........................hLhp.lush.h..t..uhsuhhlGsLlAhthshsPlhshllusushhus.......................................htGhGcllsshlsuhlushlhphlp....pt.sul.-....lIlhPllsss.l.ushluhl.lhPhlpshhptIGshIpshTshpPllMuIlluslhullhhoP.lSosA..lshhluLoG.....................luuuu.AslGssussh..shlhtsh.+lNs.hG......sslAlslss...shhp...hsslhpp.Pl.Ihlss.hlsuslsGllsshh........slp..ssstuu..............GhGhsGhlu.lshh.....................ss.hs..lllhllshhll.Phlsualsphlh.+hhtlhppt.h................................................................................................ 0 71 123 178 +13146 PF13304 AAA_21 AAA domain Bateman A agb Jackhmmer:Q7P3F0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.96 0.70 -4.45 220 6048 2012-10-05 12:31:09 2010-09-02 17:17:35 1 63 2772 13 1685 97673 27953 227.70 14 53.08 CHANGED lsslhGsNuoGK..Ssllculthh.......................................ththh.hhhptpthptshphpht.hhtpshpaphthphpppth..................t.htthhh.ptt.t.th.th..t.ht.tt.ht..hhh.hthtt...hh.hthhh.hht.h..h........................thphhphhpphhhtpphhphhphhhptht.hththtth................................................................hhhh..ttth.h.h.t......SsG...ppph.h..slhshlhpshtt.....................sl...ll....lDEh.-ss.LHP....phh....ppll.....phh.............pptpp...............................tsQl.lhoTHs.shl.ls ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h....htth..............SsG.........ppph.h..........hlhhhlh.p...t.t.........................................................................................sl.......ll.......lDEP.Ess.LHP.....................ph.........pplh....phl................pphtp...................................ts.Q.l..llsTHu.s.l.l.............................................................................................................................................................................................................................................................................................. 0 627 1112 1447 +13147 PF13305 WHG WHG domain Bateman A agb Jackhmmer:Q7P3F5 Domain This presumed domain is around 80 amino acids in length. It is found to the C-terminus of a DNA-binding helix-turn-helix domain. This domain may be involved in binding to an as yet unknown ligand that allows a transcriptional regulation response to that molecule. The domain is named WHG after three conserved residues near the C-terminus of the domain. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.76 0.72 -3.41 239 2407 2010-09-03 08:44:01 2010-09-03 09:44:01 1 6 1383 8 710 1956 289 84.90 20 41.89 CHANGED tshutuYlpFAhppPshaclhF...........ttp.h...................ps...sphpp.h....h...........p..............hhtph...lpp.h..h..tt........hthst......pc...sp.phshs...hW..uhlHG.husL...hhs......sth .............h.thuhuYlpFAhccPshachhF............ttshs.................................................tt......tphpp.s....h..............p.................................thtph...lpp.h..h..tt....................hth..ss.........................pp...st..thsht...hWuhlHGhss.L.hhss..h............................ 0 225 496 610 +13148 PF13306 LRR_5 Leucine rich repeats (6 copies) Bateman A agb Jackhmmer:Q7P2P7 Family This family includes a number of leucine rich repeats. This family contains a large number of BSPA-like surface antigens from Trichomonas vaginalis. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.90 0.71 -12.10 0.71 -4.44 205 12012 2012-10-02 21:32:02 2010-09-03 10:37:11 1 746 915 12 6389 30082 1356 113.30 19 48.31 CHANGED spIs..stsF.hs.s.sLp.s.ls.lP..s..s..lp..p....IuptuFtsC.s.Lp..sl.p..l..Ps..s..lppIustuF.ts.C.s.L.psl.sl.....s.s..s.lppIs.pt.sFptCs..s..Lp.sls..l..s..s..s..lp...pIupt.s..F.ts.s.s.L.p.sl.s.ls.s..slp...pI..ss.ps...Fps.C..sLp ..........................................................................................................................................................................................................................................................h.p....p.....l......l..s........p.....s.........l..p...p...............I.....s.......p........t........s........F..........t.........s....s..........................s.....Lp...........p......l....p.........l......s..s........s..............l...p......p......I........s........p........t..........u.......F......t...s.....s.....................s......L.........p....p......l.......s.l.............s....s....s....l........p........p...........l..........s........p.......t..........s........F......t...t........s.s..........p................L........p..........p........l.....p.....l..........s.....p...........s..........l.p.............t..l...s.......t.................s............F.....t........t....l........t...........h.........h................................................................................................................................................................................................................... 0 5510 5860 6131 +13149 PF13307 Helicase_C_2 Helicase C-terminal domain Bateman A agb Jackhmmer:Q7P4K4 Domain This domain is the second of two tandem AAA domains found in a wide variety of helicase enzymes. 25.10 25.10 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.04 0.71 -4.30 152 5901 2012-10-05 12:31:09 2010-09-03 10:41:35 1 61 3620 5 1731 4652 739 165.30 27 22.05 CHANGED lhplh.p.t.hs..ussllhFsSaphhpplhp.t.h...pp............p..h..................th.........t.lh..h..p................s...pt..s..tppll.................ppap....p......s....................................psu.l.Lhust....ph...EGlDhs...sct..hpsVll..stlPa..........ssspsshhpt+tpahc.p.....pt.....................................................tss........hp..p.hhhspAhtplpQuhGRlIRpps...DhGsllllDpRht...p.....pp.htpt.l.....phltst ..............................................................................................h...hhphtt..sthllLFsShphhpplhc.h.h...pp..............................................p.h..........................ph..............ll.hQ.......................................s...pt..sptpll.................pp.Fp..p........s.........................................................................................psu..lL..l.uss...u.....FhEG....lDlt...G....c...t......lpt....V....lIs+lP.F...........................sssssP.....lhps+h.chhc.p.....ps.....................................................................................................................tss...........Fp.p.h.tlsp.Ahh.p.lpQ..ulGRLIRs.ps...D...pG.s........l.l....lh.............Dp............Rlh...p........pp...Y.s...pth..psls..h............................................................. 0 576 1031 1435 +13150 PF13308 YARHG YARHG domain Bateman A agb Jackhmmer:Q7P768 Domain This presumed extracellular domain is about 70 amino acids in length. It is named YARHG after a conserved motif in the sequence. This domain is associated with peptidases and bacterial kinase proteins. Its molecular function is unknown. 21.30 21.30 26.20 26.10 19.10 17.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.68 0.72 -4.44 105 463 2010-09-03 15:16:03 2010-09-03 16:16:03 1 33 361 0 77 396 21 81.60 28 22.53 CHANGED sphhh.sp...sssphlspsplpshs..sp-LplhRNpIYAR+GhpFps.tplpsYFs.spsWYpssh..........................pshLsshEppsl...p.hlpphEp ...................................t....h.t....s.phhs...pplpths....ppcLclhRNEIYAR+GahFps.t-hp.pYFs...pps..WYpssh.........................sshlochEppNlc.hlcphp....................... 0 38 61 67 +13151 PF13309 HTH_22 HTH domain Bateman A agb Jackhmmer:Q7P8J8 Domain This domain is a helix-turn-helix domain that is likely to act as a DNA-binding domain. 29.60 29.60 29.70 33.10 29.30 29.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.77 0.72 -4.21 187 1886 2012-10-04 14:01:12 2010-09-03 16:51:08 1 5 1465 0 294 997 44 63.90 37 28.64 CHANGED -hlpphl.ppslp.ph.stss....shhsppc+hpllptLpcpGlFtlKsusstVAptLslS+hTlYpYLc .............................-llsphl.cpslc.pl..sssh....sl.spscp+pI....VhpLa-+GlFplKcAlspVAcpLsIS+tTVYhYl+... 0 75 143 221 +13152 PF13310 Virulence_RhuM Virulence protein RhuM family PIRSF, Bateman A agb Jackhmmer:Q7P417 Family There are currently no experimental data for members of this group or their homologues. However, these proteins are implicated in virulence/pathogenicity because RhuM is encoded in the SPI-3 pathogenicity island in Salmonella typhimurium [1-2]. 27.00 27.00 27.10 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.72 0.70 -5.38 118 1101 2010-09-03 16:18:37 2010-09-03 17:18:37 1 10 730 0 253 1002 83 187.10 38 65.40 CHANGED cusl+cathstp-Gp+.....tcplpaYNLDsIIuVGYRVpShRuTQFRpWATphL+EYllKGFlhDDERLKsss......hstDYF-ELLERIR-IRuSERRFYQKlpDIYA.sSlDYDssuptTppFFupVQNKLHaAlpGpTAAElIhpRADupKspMGLToW+su.....+.lpKsDVslAKNYLsccElcpLNRlVohaLDaAEhpAcR+hsMsMpDWtp+LDpFLphs-pclLpsuG+lSpctAcppAcpEa-cachpp......php.sDa- ............................ushpphhhstt-Gpp........hp...hpaYNLDsIIuVGYRVpS.+u..T.pFRpW.A.TphL+-YllKGashsc..c.RL+pss.......paacchLtclR..p...IR.SEt.hh.a..pl.h-lat.hu.D....Yp...p...s..Fat.hQ.Nh..hhauhtt.....Tus-ll..pss.....huh.thttt.........-h.hu.KNYLt..ph..ht.....hht.hp..h.tt...h.ht.h...hp.hh...t..hL.t...Gthp...h...s................................................................................................ 0 95 189 225 +13153 PF13311 DUF4080 Protein of unknown function (DUF4080) Ellrott K kellrott JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. 22.90 22.90 23.10 23.40 22.50 22.70 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.56 0.71 -11.24 0.71 -4.27 54 385 2010-09-03 18:13:29 2010-09-03 19:13:29 1 5 375 0 101 364 15 171.30 22 30.98 CHANGED uccaGhlYpstsPYEVLpTcaLSYc-ll+LKplE-hlEhYYNSupFspolcalhppa..soPFchYppLucaa-ccGahphs+uptphYclLh-Fh.........p......-pthc..t..hh+-lLpaDahhppp..t+shPpFh.tppppt.cpp.hpth.hpctpthph.....ht......pphh+hsHlEhFp........hs.................ppp.hlLF ........................ccaGhhYpshPPYElLpochlSap-lhpLKplEchl-tYYNSspFppolchl.h.p.ph.....tssFcFapphu.p.a.apppshhphshuhpchaclLhcFh..........................................p........pph.t......p.phhppllphDahhh.pp..hc.h..Ptah.tt.......p.....p.t...tpt...hhth....hpp.......................t...................................................................................................... 0 48 88 94 +13154 PF13312 DUF4081 Domain of unknown function (DUF4081) Aldam G pcc Pfam-B_2088 (release 24.0) Family This domain is often found N-terminal to the GNAT acetyltransferase domain, Pfam:PF00583 and FR47, Pfam:PF08445. 21.70 21.70 23.30 21.80 19.20 21.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.31 0.72 -4.11 49 307 2010-09-06 12:54:03 2010-09-06 13:54:03 1 4 306 0 85 197 16 108.80 45 38.43 CHANGED Gu-lWGhhtt....GtLsulCauGANLlPltu........sssu...lcAFA-+ApcpuRRCSSlVGPA-sVhsLWcpLcs....sWGsA.REVRssQPLhshsssP...tlssDPt........VR.lR.cEl-hll .............uplaGhh.t....GtLpuLCasGAN...LlPltu......................................sssslcAFA-+Atct.RRsSSlVGsAcsVLsLWcRLp.....sWGsA....REVRssQPLhAhss.pP....plssDst..........VRplp.c-h-sY.................................... 0 28 65 80 +13155 PF13313 DUF4082 Domain of unknown function (DUF4082) Mistry J, Aldam G pcc Pfam-B_2054 (release 24.0) Family This family appears to be a parallel beta-helix repeated region that sits between successive Cadherin domains, Pfam:PF00028. 21.50 7.00 21.50 7.00 20.80 6.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.85 0.71 -4.76 61 169 2010-09-06 13:08:15 2010-09-06 14:08:15 1 38 53 0 108 198 22 144.50 38 27.55 CHANGED lasssssPssss.ssD..ssulELGs+FpusssGplTGlRFYKu...suNTGTHoGoLWousGslLAosTFTsE.ouS..GWQpssFuoPVslsAGTTYVsSYaussGpYusossaFsss.hss.GsLpA.....husu.....N..GVYsYuuushFPs..sSapuoNYWV ...................................lasssssPssss.ssD..ssslELGh+FpusssGploGlRFYKu...uss..TGoH.sGsLWou...s...GshLAo.sTFo.sE....ouS...G.WQpssFusPVslsAsoTY.Vs.SYpsssGtY.ussss.aFss........s.hss..usLpu.usu............NGVYtYuus....hFPs..soa.psoNYWV......................................... 0 31 55 84 +13156 PF13314 DUF4083 Domain of unknown function (DUF4083) Mistry J, Aldam G pcc Pfam-B_2061 (release 24.0) Family This is a family of very short, approximately 60 residue, proteins from Firmicutes, that are all putatively annotated as being MutT/Nudix. However, the characteristic Nudix motif of GX(5)EX(7)REUXEE is absent. 21.90 21.90 22.10 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.84 0.72 -4.14 10 78 2010-09-06 13:30:24 2010-09-06 14:30:24 1 2 65 0 3 58 1 57.20 60 92.23 CHANGED M......ltshIYhh..hhllllhussFshhIRhllpsSstKKQc..slEQKLD+IIpL...LEKp .........l.shIYTCLVIGLIVLFFlSFT..L..F.IRRVLQSSsA.KKppshsMNQKLDRIIEL...LEKD......... 0 0 1 1 +13157 PF13315 DUF4085 Protein of unknown function (DUF4085) Aldam G, Mistry J gba Pfam-B_2570 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 101 and 269 amino acids in length. 24.10 24.10 27.30 37.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.44 0.70 -11.20 0.70 -4.96 7 89 2010-09-06 14:00:34 2010-09-06 15:00:34 1 2 79 0 6 74 1 167.80 52 85.01 CHANGED KYFs+-aYccMQlpsalpa.Eo.cEWEph.........-ahppLKEEhc.h.p.-LL+aLPcolaPhlpsso.lsothssscLKchhhEWsp-aEtchpplppuYh-paspItc+LPusVtQLa.phSLHDuhIpslc.+scsoLpIhLDsSGsFSpFsKLplTFhslT+sphsENF.-GuWWLYaEItLT-cG.FEhtVLFDsPhc..EloIhA ......................................KYFs+-WYKcMQl.tFlpF..EolcEWpEh...........hpSL+--hE-+.K.DLLKFLPcSlashIps.T...lsS-.YPStcLKKhhhcWopDYEKRhspLppuYh-.as..SIcc+LspNVhQLa.chSLHDu.lpslc+cS....cDol.IsLDCSGoFsEFDKLplTFhGV.oKCShsENF.EuAWWLhHEIsLs--G.FELGVL..FDsPFc..EVhIsA.................. 0 3 6 6 +13158 PF13316 DUF4087 Protein of unknown function (DUF4087) Aldam G, Mistry J gba Pfam-B_3066 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 140 and 280 amino acids in length. There is a conserved RCGW sequence motif. 25.00 25.00 34.60 34.40 19.20 17.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.35 0.72 -3.83 11 83 2010-09-06 14:02:00 2010-09-06 15:02:00 1 1 79 0 19 63 0 99.90 52 52.17 CHANGED suspsEpRCGWh-NPoPANhoLpDc-GpWpIusQG.Ga..pscG...hPs..hssspaVpT.susaGYuCAChslssDscptplspIpps+phPLupCRpDhuL..pc. ...t..spAEpRCGWhpNPTPu....NhhLsDR-GpWpIGTpG.Gh...pscG......Ps..hu.s..sQaVcT....N...GsaG..Y.uCAClTssT.D.pppRlsplhKApthPLStCRpDKpLKEP.................. 0 3 10 14 +13159 PF13317 DUF4088 Protein of unknown function (DUF4088) Aldam G, Mistry J gba Pfam-B_3345 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 258 and 300 amino acids in length. 24.10 23.70 214.20 214.10 24.00 22.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.33 0.70 -11.52 0.70 -5.01 4 61 2010-09-06 14:03:01 2010-09-06 15:03:01 1 1 61 0 20 51 0 229.00 80 80.43 CHANGED RLR+-F-sFlpVSTGLDtpFlPPpFsDFLRARLLQpDGPLTERAV.RLLuuGEYGWA++VFDKQLPNALAuLMRDApRFGFGLAVQs-WoPpQRhcHAR-WAAQlLuEsGADAAaT-ALAuQluASApDlRsLEERM+TPAWRLAESLRQRAYDlMYALQTEssEshGRu+VGELRuhLsLALpYGShQh-EAsRVLEQlcRs+PcLFpEAPDDVFARLAAWLRRhFsps S.LRKDYDAFVRVSLKLDPQFsTPSFEDFLRAKLLDsMVPLTEHAVQRhLQGGQYAWAKRTLDKEFPDVVuILhRQAG-FGFGFASRSEWTPDELAKACRDWAAAlVupAQGDAuLVDPLAAQIKSAVpDIQTLEEpMQTPAWRLAESLRQRVYEAKLACEMSVGSsA.REKLGELRGLLRLGlAHGSFQKQEAQQIMEYLRLLKPEIFlEEPYDlFsRlAAWLRShFhsu.. 1 1 4 11 +13160 PF13318 DUF4089 Protein of unknown function (DUF4089) Aldam G, Mistry J gba Pfam-B_3700 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 21.60 21.60 22.40 22.30 20.80 20.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.21 0.72 -3.66 33 163 2010-09-06 14:03:51 2010-09-06 15:03:51 1 2 162 0 52 131 2 49.60 39 77.12 CHANGED YlpphutLLuLslcsphRssVlsphp+IsshAp.lhsFPL.s-chEsAsVF ...YlpphtplLuLsLD-spRsslhtpFp+IAshAp.lhsaPL.s-chEhAuVa....... 0 5 23 35 +13161 PF13319 DUF4090 Protein of unknown function (DUF4090) Aldam G, Mistry J gba Pfam-B_3702 (release 24.0) Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 22.70 22.70 24.60 31.70 20.80 16.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.88 0.72 -4.04 15 61 2010-09-06 14:04:41 2010-09-06 15:04:41 1 1 60 0 23 58 101 82.80 63 88.35 CHANGED GsDAVDpAIusGlDLDGoPIPssMLsLYpcVMsLEupRpRSGVpKSMRNRIVRoGAKHhsp-sLNQhLl-AGW-GLK-KEIuFF ....GsDAlDtAIssGlDLDGSPIPsshL-LYpcVMsLEupRQRSGVppoMRsRIVRhGAKHhsQ-pLNQhLlcAGassLK-KEIAFF. 0 2 13 21 +13162 PF13320 DUF4091 Domain of unknown function (DUF4091) Aldam G, Mistry J gba Pfam-B_3704 (release 24.0) Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 70 amino acids in length. There is a single completely conserved residue G that may be functionally important. 22.50 22.50 23.10 22.60 22.20 22.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.46 0.72 -4.07 63 346 2010-09-06 14:05:22 2010-09-06 15:05:22 1 22 286 0 79 284 31 71.00 34 11.20 CHANGED hshsGhL+Wuas.ha..sp.......sPap..ss.phth..a.ssGDsallYPGps.........ttshsSlRhchhpcGlpDhchlchLcp ...........................t.phsGhL+WuhN.hW...sc........................-Pht.........Dsphch.......a..suGDshllY.Pusc...........tt.hs.SlRhchLtpGhpDhchl+hlc.p.... 0 29 54 74 +13163 PF13321 DUF4084 Domain of unknown function (DUF4084) Mistry J, Aldam G pcc Pfam-B_2026 (release 24.0) Family This family of Firmicute proteins is frequently associated with the EAL, GGDEF and PAS families, Pfam:PF00563, Pfam:PF00990, and Pfam:PF00989. The exact function is not known. 22.00 22.00 23.40 23.20 20.80 20.50 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.97 0.70 -5.18 8 77 2010-09-06 15:42:46 2010-09-06 16:42:46 1 5 76 0 2 60 0 194.50 89 25.51 CHANGED MINQKKYVHFVhhYIIIFSLWIFLIPK-LNIKEIGILFLFCFAsLFSCYCLYKAIKKMKRGDKLFWVLlLCTCLCGLTMEITLFLHSLSIYDQVIFSYKALPFFIlQYILLFSGFAIKFIKHYSIRGLAQFSFDSIF.IlIMNIYFTLTFILDlSSFRMLTpDTWVLIGYFIAQSLVIYAVISLYRREQYSSSRISLIIGFTIILVYGYIHLFQLNAGhKTSSEVSYLIHTASILLIGLSSILYILDKPMQHETKTKYYRFDYVRFILPYFSIIITFSFIIhQPWDDKFMLIGLVLSLILLFLRQ ..............................................................................................................................................................................................MLTpDTWlL..IGYFIAQSLVIYAVISLYRREpYS..SSRIuLIIGFTIILVYG.YIHLFQLNtGhKsSuElSYLIHTASILLIGLSSILYILDKPhQ...HETKTKYYRFDYVRFILPYFSIIITFSFIIhQPWDD.KFMLIGLVLSLILLFLRQ............ 0 0 1 1 +13164 PF13322 DUF4092 Domain of unknown function (DUF4092) Mistry J, Aldam G pcc Pfam-B_2068 (release 24.0) Family This family is found in Proteobacteria. The function is not known. 22.20 22.20 23.40 69.80 21.00 20.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.00 0.71 -4.52 16 289 2010-09-06 16:06:56 2010-09-06 17:06:56 1 3 274 0 9 193 2 170.60 77 11.51 CHANGED lAtpIDppLptp.sthspth.......hsshsssuuplpsslp+Lhsh..t.....a+sVspFHVFHDsosFYGuoGsARupsslNIoNpAFPVlMsRNDpNYWlsFGpppAWD+....suhAYIT...........................-APS........hlpP-+Vop-TATFNLPFlSlGclGcGKVMVMGNu+YNSlLsCPssYSaNGul ..............................................................Q.AKEIDTAICAK.TsGCNEARWFS....LTTRNVNDGQIQGVINKLWGVDss.....YKSVoKFHVFHDSTNFYGSTGNARGQAVVNISNAAFPILMARNDKNYWLAFGEKRAWDK....NELAYIT...........................EAPS........lVcP..ENVTRDTATFNLPFISLGQVG-GKLMVIGNPHYNSILRCPNGYSWNGGV. 0 2 4 7 +13165 PF13323 HPIH N-terminal domain with HPIH motif Mistry J, Aldam G pcc Pfam-B_2165 (release 24.0) Family This family is found in fungi on proteins carrying the PAS, Pfam:PF00989, domain. There is a well-conserved characteristic HPIH motif, but the function is not known. 20.30 20.30 22.40 30.70 18.80 18.60 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.02 0.71 -4.52 40 137 2010-09-07 08:27:00 2010-09-07 09:27:00 1 7 118 0 92 146 0 149.80 28 13.89 CHANGED p+plTssLptlu+puCpHPIHTIllsALLASooYlull-sslhc...sspshstps..phsshltGu+sLhhGpsouW....cWphh-stsstsps..........spHhALhTLsFPco...ssssshPthcslshssNhStphLsposshhoshsp......-sulsaslPasps ..............+plotslttlu+huspHPIHTIVlsALLASsoYlull-tslh-....ssss..ssps.........chssh.ltGupslhhupsouW.....pWpshssppststs..........spHhALhTlsFscs...ssssssPthpsl.hssshothhLspo.sshhoshsp.......-sulsaplsasp.................................... 1 13 44 76 +13166 PF13324 GCIP Grap2 and cyclin-D-interacting Mistry J, Aldam G pcc Pfam-B_2169 (release 24.0) Family GCIP, or Grap2 and cyclin-D-interacting protein, is found in eukaryotes, and in the protein Swiss:O95273, residues 149-190 constitute a helix-loop-helix domain, residues 190-240 an acidic region, and 240-261 a leucine zipper domain. GCIP interacts with full-length Grap2 protein and with the COOH-terminal unique and SH3 domains (designated QC domain) of Grap2. It is potentially involved in the regulation of cell differentiation and proliferation through Grap2 and cyclin D-mediated signalling pathways [1]. In mice, it is involved in G1/S-phase progression of hepatocytes, which in older animals is associated with the development of liver tumours. In vitro it acts as an inhibitory HLH protein, for example, blocking transcription of the HNF-4 promoter. In its function as a cyclin D1-binding protein it is able to reduce CDK4-mediated phosphorylation of the retinoblastoma protein and to inhibit E2F-mediated transcriptional activity [2]. GCIP has also been shown to have interact physically with Rad (Ras associated with diabetes), Rad being important in regulating cellular senescence [3]. 21.40 21.40 21.40 21.40 21.20 21.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.82 0.70 -5.18 12 246 2010-09-07 13:57:06 2010-09-07 14:57:06 1 4 169 1 171 242 1 213.70 19 65.93 CHANGED Wpclschu-pVS+pAThluhlW.pG.hPcs-shpcsh-saasuLpGhlLssHGpslGAGsTLppsl+sulKplVDuohpLhptsVS..h.tS.ppshcsslsplsGsVWEACsshc+lPpoNhsAIGpuhoplushlKDlLcEMcchh...............hhspupsstsssp.s-ss..s...spD..hS.E-hcVsp.shullppohsslKclIpsIsshh.......t-spshVspLEcLLclspclusplD-LGsSlY.PP.-hspl+tslp+ltuslcchhp.l ............................................................................................................................................................................................................................................................................................tt.p...t.......t..........sutlWt...sCpth....p.hs....p.p.sh.h.u.h.h.h.h.hp.p....hshlcDsh.cEhcph................................................................tpptp.....s.......t-p..p......t....t.s.......pD.................hp-.-..st.tlhp..sh..sllphsh...s...hlcpl....h........t....h.h.t.t..h......................t..p.......pps....s....p.L-cllshs.pcIssp..lD-lstolY...PPhs......lc.p..ttl..................................................................................... 0 50 78 117 +13167 PF13325 MCRS_N N-terminal region of micro-spherule protein Mistry J, Aldam G pcc Pfam-B_2099 (release 24.0) Family This domain is found in plants and higher eukaryotes, and is the N-terminal region of micro-spherule proteins which repress the transactivation activities of Nrf1 (p45 nuclear factor-erythroid 2 (p45 NF-E2)-related factor 1) [2]. In conjunction with DIPA the full-length protein acts as a transcription repressor [1]. The exact function of the region is not known. 22.60 22.60 22.70 22.70 22.50 22.50 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.22 0.71 -4.80 17 183 2010-09-07 13:59:08 2010-09-07 14:59:08 1 4 121 0 119 177 0 156.60 41 30.57 CHANGED RWKPhDDLALItulpQTNDLchVHpG.lKFSC+FTLpElppRWauLLY-PslS+lAssAh+sLHPEsltulQs+ALaSptEEpLLuolpSsp.....pPsL-pFQ-LLccpssVFatuRTAKoLpsHWhLhKQYhLLPDQsVpPhtpsp.pslSFS....DAE-plsDs-Lp-scDE....uLEpELtlsDR+pKR-IRhLENELsRWsVLVD ....................+WpPtDDLhLlsuV.psssLptlahG...V+.FSp+FTlpElpcRWauLLYDPsl.....SphAstuMpp.............LcP-h..hs.u.lp..p+slaS.tEEplLtp.lts..sp.....................tsph.ppFpcLLp..ppspsFa.uRTspsL...sHW...hh+pah..LL...DQshtsh...tt...........shs....c.pt.h.p.................................................................................................................. 0 38 58 94 +13168 PF13326 PSII_Pbs27 Photosystem II Pbs27 Mistry J jm14 Jackhmmer:Q05RN1 Family This family of proteins contains Pbs27, a highly conserved component of photosystem II. Pbs27 is comprised of four helices arranged in a right handed up-down-up-down fold, with a less ordered region located at the N-terminus [1]. 25.00 25.00 26.40 34.10 21.40 20.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.67 0.71 -4.21 61 146 2010-09-07 15:30:42 2010-09-07 16:30:42 1 2 100 3 77 153 105 141.20 29 87.46 CHANGED hhpcll.......hulsL...sh......sl...h............lsus.............................t.ssss.slo...Gs.....YhcDThsVlpsLpssl..shs.....pDsss+pcstspA+chIs-alu+YRtpsplsu...................h..sSFsshp.oALNuLAGHYssaus+.PLPccl.+pRltpEhppAEpul ....................................................hhhhh.............hshhh....h.......h...h......h.ss....................................h.sssp.sho....Gc...........Yhp-ThsVlpsL+psl.shs.....p-ssshtcstspA+ptIs-alu+YRppsslsu.....................h..pSFsshp.oALNuLAGHYssaGs.p.PlPccl+pRlhpEhspAEtul................................. 0 22 53 70 +13169 PF13327 T3SS_LEE_assoc Type III secretion system subunit Mistry J, Aldam G pcc Pfam-B_2123 (release 24.0) Family This is a family of bacterial putative type III secretion apparatus proteins associated with the locus of enterocyte effacement (LEE). 25.00 25.00 47.40 47.00 22.80 22.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.80 0.71 -4.67 16 253 2010-09-08 13:46:21 2010-09-08 14:46:21 1 2 242 0 12 81 0 129.80 46 81.76 CHANGED hhpLHpLsWpPupaAHPhWLssl.Glpsps.apYGcustLDssLsphLtphRsa.ppsLPssLsspQpp.lthttRlsshslALGLlpLpCsDYLhLppYRpsLtshLu-ssIpQLhGh.hhpGpp......sspL..sPppLsshAhplGpulhpphtsssslh+AlulhLPP .............haphh..Ph.hhhs.WLstl.Ghp..s.hthtcp.thcp.LsthL.p.ht...phl.hs..sppth.ht.h.+l.hhshu.ulhtlpCsDYhhL.cYRQhLlphhu-s-IhQLhGa..hGtps.........tt.L..sPphhppsALplGsulL......sp.tpDsslhpAlhlLLPP.......... 0 2 6 9 +13170 PF13328 HD_4 HD domain Mistry J jm14 Jackhmmer:Q05RX6 Family HD domains are metal dependent phosphohydrolases. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -11.08 0.71 -4.50 118 7076 2012-10-01 20:28:14 2010-09-09 10:34:10 1 37 4412 2 1646 5095 2697 149.10 37 23.59 CHANGED AhthApphat................tppttsGpshlsHslslAt..ll...tph....t...hD...ps.......sh..hAulLacss..........c..............-..........h.............pp............lp..cp....F...GspVApLVpuss.chtplpph..................ttp.......ptph-...slR+.hl...luh.....pD...h....RVl..Ll+LA-RlpslRth.......................tlApEshslausL.AsRLGlh ....................................................Ah.hAtptHp................sQhRp..S..G..-......P.Yl.h.HPlt.VAt.....IL....uch......p........hD........ts.................ol.hAA.LLH..D.ll...............E.................D.......................T......sss.....-c..........................lc..pp......F......Gpp......V.A...p.LV-GVo.Klsclpht..............................................spp...............ptpuE.....Nh.RK.MllAM.s.cD.l......RVl....llKLADRlHN.....M..RTL.....pth.....-...........KptclAcETh..-IYAPL.ApRLGl.t........................................................................................................................... 0 513 1040 1361 +13171 PF13329 ATG2_CAD Autophagy-related protein 2 CAD motif Mistry J, Aldam G pcc Pfam-B_2170 (release 24.0) Family The Atg2 protein, an integral membrane protein, is required for a range of functions including the regulation of autophagy in conjunction with the Atg1-Atg13 complex. Atg2 binds Atg9. The precise function of this region, with its characteristic highly conserved CAD sequence motif, is not known. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.02 0.71 -4.65 37 161 2010-09-13 10:50:17 2010-09-13 11:50:17 1 7 152 0 115 168 1 145.80 25 7.67 CHANGED slslcDCslGLNPhphsuKsllllspuphsssh.tttpsphshpl+cuolhlIDDlps.t..........p..tp.ss.tssphps....hp.shGalslupIoosplslphtp.sppttpp............lDlcl+sDhlhLEsCADSTQTLIsllssLpPPs..Ps..ppKYR.T ...............................................................p.lthhDCslsLpPh.thsu+hllslsssphsssh..tp....sp...p.hphplccuslhlIDDsps......................tt.tstph.p.....hh.spGYVslsplushpls...lph.hpss..ppsppp............l-lclpsDllhLcTCADSspTLlsLlssLt.P...ss..t.Kap.................. 1 27 56 95 +13172 PF13330 Mucin2_WxxW Mucin-2 protein WxxW repeating region Mistry J, Aldam G pcc Pfam-B_2205 (release 24.0) Family This family is repeating region found on mucins 2 and 5. The function is not known, but the repeat can be present in up to 32 copies, as in Swiss:C3Y5K5, from Branchiostoma floridae. The region carries a highly conserved WxxW sequence motif and also has at least six well conserved cysteine residues. 22.40 1.00 22.40 1.80 18.40 -999999.99 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.88 0.72 -3.55 91 678 2010-09-13 12:29:30 2010-09-13 13:29:30 1 101 53 0 385 687 1 85.80 34 12.24 CHANGED tWTsWhstD.pP....s.ssG.DhEThss.lp..tt............hC....ptPs...s.IpCR..sh.........s.tpsGpt.lpCsh.ssGhh...ChNs.-Q..s.......ChDYEVRh.C.......C .........................tWTpWhshchP........u.ssG..DhEohps.l+....tstp..........lC.........ppPhslcCRsps................sthsh.pp.hG.ps....lp..C.sh..shGLh...ChNc-Q.sth.......ChsYclRhhC.................................................. 0 168 181 262 +13173 PF13331 DUF4093 Domain of unknown function (DUF4093) Coggill P pcc Jackhmmer:Q7P4Q3 Family This domain lies at the C-terminus of primase proteins carrying the TOPRIM, Pfam:PF01751, domain. The exact function of the domain is not known. 25.00 25.00 28.60 30.40 22.30 21.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.77 0.72 -3.78 141 1316 2010-09-13 13:16:25 2010-09-13 14:16:25 1 5 1303 0 153 724 4 86.40 42 46.33 CHANGED slGVEpAos-sIpcALpclts...ppp..p.ppp.......................lohpDLhphGLhusssu+............pRRctLGchLsI.GYsNuKQLl+RLshatIocc-hppAlp .....slGVEHAS.EsIccA.Ltplpp.....hc.p..t.pp........................IopsDLlchGLluGssu+............cRREhLGcpL+I.GYoNGKQLlKRLphFGlTps-lccAh...... 0 59 102 128 +13174 PF13332 Fil_haemagg_2 Haemagluttinin repeat Coggill P pcc Jackhmmer:Q7P6T3 Family \N 22.00 15.60 22.20 15.60 21.90 15.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.39 0.71 -4.48 155 4185 2012-10-02 14:50:22 2010-09-13 16:16:57 1 347 516 0 992 4738 66 124.60 16 23.22 CHANGED luhs..p..op....opsp...ppss.ss...su..Ssls.uG...ss...lslpAs.......p................uslsltGopl....pus...............s..ltLp.Asp.slsl.puup...ssppppsp.s.............p.SpususGluluhus.ss.......h.sh.su..ssutucu..p..ts..u..sus..shss.opl...s.uu.ss.ls.lpS....Gp..DoslpGAplsu..pplsssl.Gs.....s..LslpShQ..Do.......s..p..h....s..sp......s....tu..u ...............................................h...........tptp...tp.p.p..pt......u..oplp.us......ss.lslpAs......p..........................slsl.p.G..opl.....pu.s..............................................p.s.ls.....l.....t.A.....s.....p..sl.sl..tu.u..p....s..p...p..p..p....p....p...p..p.............................................p....p...t.............t......s.h.........t.............................................................................................................................................................................................................................................................tt.......................................................................... 0 79 422 731 +13175 PF13333 rve_2 Integrase core domain Coggill P pcc Jackhmmer:Q7P766 Family \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.76 0.72 -3.89 38 2406 2012-10-03 01:22:09 2010-09-13 16:55:03 1 11 1136 0 213 9884 987 54.10 42 39.33 CHANGED EuFFGhLKsEhhasp..papohcchcpslp-YIpa.Y....NpcR....L....K....G....LoPlpYRspsL ...................EsFFGhLK.sEh....a....Y..ut......pa...p..o..h..c..p..L.c.p.s.I..h-.Y.I.c.Y.Y.....NpcR.................l....K...........G....LSP.lp.Y.Rpp.................................. 0 41 116 163 +13176 PF13334 DUF4094 Domain of unknown function (DUF4094) Mistry J, Aldam G pcc Pfam-B_2504 (release 24.0) Family This domain is found in plant proteins that often carry a galactosyltransferase domain, Pfam:PF01762, at their C-terminus. 24.50 23.80 24.50 24.30 24.40 23.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.32 0.72 -3.60 46 197 2010-09-13 16:05:43 2010-09-13 17:05:43 1 6 33 0 130 187 0 86.40 30 23.32 CHANGED St+h.sllLChsSFhhGllhosRh...W...s....t..............tppppchplhpp-ssspp.......tpppclhtcVscT+psIp...sLDKoIosLEMELAuARup .......t+h.shlLshhSFhhGhh.ho.sRh...W....s.t.............................pptppchpll....pp.s.sspcp................pppchht.pVscTp.csIt...sLDKslusLEMELAuA+u.......................... 0 16 77 104 +13177 PF13335 Mg_chelatase_2 Magnesium chelatase, subunit ChlI Coggill P pcc Jackhmmer:Q7P325 Family This is a family of putative bacterial magnesium chelatase subunit ChlI proteins. The domain lacks the P-loop region present at the N-terminal of Mg_chelatase, Pfam:PF01078. 22.20 22.20 22.20 22.50 22.10 21.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.03 0.72 -3.59 197 3113 2012-10-05 12:31:09 2010-09-14 12:06:40 1 12 3054 0 770 2544 357 95.70 39 19.76 CHANGED puEoSssl+.pRVhtARptQhpRhs.................ph.......Nupls......uptlc.ch.stLsppstphLcpulp+hsLShRuhcRlL+VARTlADLpuppplsppHluEALsYR ...........................t.uEoSssl+.pRVhtA.RchQhpRht.......................ch..............................NApls......upplc.p.a..C..t.LsspstphLcp...A...hp..+hsLSsRuacRlLKVARTIADLcs.s...-.......p....IsppHlsEAluYR........ 0 270 528 664 +13178 PF13336 AcetylCoA_hyd_C Acetyl-CoA hydrolase/transferase C-terminal domain Coggill P pcc Jackhmmer:Q7P7G0 Domain This family contains several enzymes which take part in pathways involving acetyl-CoA. Acetyl-CoA hydrolase EC:3.1.2.1 (Swiss:P32316) catalyses the formation of acetate from acetyl-CoA, CoA transferase (CAT1) EC:2.8.3.- (Swiss:P38946) produces succinyl-CoA, and acetate-CoA transferase EC:2.8.3.8 (Swiss:Q59323) utilises acyl-CoA and acetate to form acetyl-CoA. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.67 0.71 -4.55 251 2410 2012-10-04 00:26:15 2010-09-14 13:36:31 1 17 1795 21 696 2411 257 149.60 42 31.00 CHANGED GopcLY-al.....-.....p.....NPplth.tsssasN.........s...PtlIu..ppsphluINoulclDLhGQVsu-sl.G.......s+..hhSGsGGQhDFl+GAthS..pG..G+oIlslsSTs..p........s...G....p....l.S+IVPhLs.GusVossRsclcalVTEaGlA.sL+G+ohpcRAcsLIs.IAHPcFR-pLhcpA .................................................pchYc.h.....-p.....ps.cl.hh.RPt-hoN.........s..Pcl.Ip..+.hsl..lulNsulEhDlaGplNSspl..G.......s+hhsG.lGGpsDF.sRsAhh.........SIhsssSsA..c..............s...G....p.......I.SsIVPh....ls........pVspocpclcllVTE.Gl.A..D.LRGhosppRActlIs..hAHPcaRstLpch.h............................................... 0 268 472 621 +13179 PF13337 Lon_2 Putative ATP-dependent Lon protease Mistry J jm14 Jackhmmer:B8D5Z7 Family This is a family of proteins that are annotated as ATP-dependent Lon proteases. 19.90 19.90 20.10 20.80 19.30 19.70 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.54 0.70 -6.13 53 313 2010-09-14 13:27:09 2010-09-14 14:27:09 1 3 292 0 83 269 74 438.10 48 68.76 CHANGED sphF.sGhV.....VRKDLs+plKtuss.VPsYVLEaLLGpYCAosD-ppIcpGl.ppV+chLscpYV+scEuchl+S+l+EcGp.a+lID+lsV+Lsp+cDtY.AphsNLGl..c..clhIssphV+c.a-+LLsG.GlWsllslpYp...................hs-.................spcs.......sP..FhlpsL+PIQhsshD.hcphhpuRppFTp-EWlDlLlRSlGhEPss..hsc............RtKhhhLsRhlPhVEpNYNllELGPRGTGKSHlapElSPa.uhLlSGGpsTVApLFhN.ust...plGLVGhWDsVAFDEVuGl.pFc-.+Dsl.sIMKsYMASGSFuRGc..-..plsAsAShVFlGNls..psV-thl+su..H.LFsPhPpthp.Do...AFhDRlHsYlPGWElPKh..+s.-haTspaGhlsDY.LuEhh+phRc..ps..assslpcaFcl.GssLspRDppAV+KTlSGLlKLLaPs....GphocE-.lcchLchAlEsRRRVKEQL.K+luuhEFhcspFSY .............................................ptF.sGpVVRK.D.L..s+..p..lK.tGA.N.VPsYVLEYLLGhYCuo-D-phIppGlpsVKcILu-sYVRP-EAptl+Sp.l.R.EcGp.a+lIDKloV+LspKcDhY......AphsNLGl..p....ss.lssphV+c...-.+LLsG..GIWsllslpY.....................hc.c..........................................spps.......sP.F.lpsL+P.IQhs.s.hD.h-c.lhpu.RppFop-EW..lDlLlRSlGhE.Psp..h..sp............RsKhhlLsRhlPh.....V.....E...sN....YNlsELGPRGTGKSHl.Y+EhSPp.ulLlSGGQsTVAsLF....YNhus+...plGLVGhWDsVAFDEVAGI..p....FK.D....KDuV...pIMKDYMASGSFuRG+....-.....plpupASMVFVGN.IN..po......V-.s...llKTS...............+LhsPhPpthh..D.o...AFhDRhHsYlPGWElPKh..+P..EaFTspYGhloDY.LAEhhRc.hR+....p.s....a..uDsl-caFcL...Gss......LspRDshAV++TVSGLlKL..LaPs........GpaoKE-.lc.phLphAlEhRRRVKEQL.K+lGGhEFa-lpFSY......................................... 0 38 63 75 +13180 PF13338 DUF4095 Domain of unknown function (DUF4095) Coggill P pcc Jackhmmer:C2D0I5 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.64 0.71 -3.98 231 2088 2012-10-04 14:01:12 2010-09-14 16:14:53 1 8 1014 0 614 1837 131 118.00 17 48.01 CHANGED Gl.......s.....htlp..chh.cpGplp+lp+GlYh...........h.......sst..................hcp.........hh........hh.stt......h..............s.pu.....l.lu.htoALhhau..l..s.s..p.hP.t.thpls...sspstp...t.php.......tlphhth.p.tphhp..h.Ghpphph....stsltlhshERTlsDhh ................................................tlpchh..ppG.t.l.+l...t.+..G...lYh.............h.ssh...................................hcp.........hh...........hh.ttt..h..............................s.pu......l.l..u..t.t.oA....h.th...aG....l.......s.s....p.hs.....t...th.......p.lh....ssp.stp..................s..tht..................slph.hhh..p.....tphh..........t.h.....t.....h............sl.lhs.t+ThhDh...................................................................... 0 204 424 536 +13181 PF13339 AATF-Che1 Apoptosis antagonizing transcription factor Mistry J, Aldam G pcc Pfam-B_2199 (release 24.0) Family The N-terminal and leucine-zipper region of the apoptosis antagonizing transcription factor-Che1. 24.70 24.70 25.70 25.80 24.60 24.60 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.99 0.71 -4.00 65 310 2010-09-14 15:30:23 2010-09-14 16:30:23 1 4 266 0 215 308 2 134.70 26 27.10 CHANGED DstKGh..AVcpQpphaDplL-sRI+LQKulssuNpLP.ptpshp.h..................tspptppslpssppsshpLhsslhsL+psLh................................tttp.spstpppKR............t.shschhpphpp....hcpphpsaRsslLpKWupK .................................................................-spKGt..AV+pQhtlaDplL-hRI+LQKulsssNpLP..pspshs.h.........................ttspphppslcsspcsht..pLhps.Lh..sLpppLh.............................................................tttpttpttpppKR....................p.phshp.ch.pthsp....hppphpsaRspsLpKWppK.............................................................................................. 0 68 115 176 +13182 PF13340 DUF4096 Putative transposase of IS4/5 family (DUF4096) Coggill P pcc Jackhmmer:C2D5Z9 Family \N 21.90 21.90 21.90 22.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.52 0.72 -4.12 381 3268 2010-09-14 15:35:10 2010-09-14 16:35:10 1 25 878 0 1120 3161 331 69.30 31 41.37 CHANGED LoDppWshlpPlL...Ps..........tp....t.tt..........t..phspRpllsuIlalh+oGssWRsLP.pc..au..sap..TlappFpcWpcsGlapc.lh..p ........................loDtpWth.lpshl....Pt..........tp....t.t...........t..thctRtlls...ull.a....l.h....+o.Gst..WR.................t.LP..pc......au.....sap.....TlappFpc...WpctGsapplh............................. 0 323 715 905 +13183 PF13341 RAG2_PHD RAG2 PHD domain Bateman A agb Bateman A Domain This domain is found at the C-terminus of the RAG2 protein. The structure of this domain has been shown bound to histone H3 trimethylated at lysine 4 (H3K4me3) [1]. 27.00 27.00 27.10 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.97 0.72 -4.00 6 1007 2012-10-03 17:27:21 2010-09-14 16:38:22 1 3 816 14 26 965 0 55.10 80 13.56 CHANGED GYWIKCChuCpVD.NTWEPaYSTELs+PAMIaCS+G.uGHWVHAQCM-LoEohLlpLSQuNsKYFC.-HstLs+t..TP .GYWIpCCsTCpVDlNTW.PFYSTELs+PAMIaCS+G..sGHWVHAQCM-L................................ 0 1 3 10 +13184 PF13342 Toprim_Crpt C-terminal repeat of topoisomerase Coggill P pcc Jackhmmer:C2D5X4 Domain \N 43.70 43.70 43.80 43.80 43.30 43.60 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.40 0.72 -8.88 0.72 -4.34 157 1275 2012-10-03 10:42:43 2010-09-15 11:56:09 1 20 649 0 258 1109 139 62.40 33 11.70 CHANGED psCsFplh+p.lsu+plopsplccLLppG+Ts.llcGFpS.KsG+pFsAhL.hlpts...pp............lsF-.Fsp .......sCcFplh+p.lsu+plopsplccLL...ppu+Ts.ll+GF+S.K.sG+.sFcAhLhhpsc...tp............lsFpFt......... 0 74 171 216 +13185 PF13343 SBP_bac_6 Bacterial extracellular solute-binding protein Bateman A agb Jackhmmer:Q7P755 Family This family includes bacterial extracellular solute-binding proteins. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.57 0.70 -5.05 82 6234 2012-10-03 15:33:52 2010-09-15 13:11:13 1 13 2738 20 1426 11548 3747 237.10 19 69.13 CHANGED sphsDllhu......hspthhpphhppshhp..............ssthsphst......tshtDs.cGtah..hhshsshlhhhsppplssp.h....P..psWsDLh.sPpacs..plslssss........shhpslhlslhp......phG.hcsstchhcshttsh+ssp...hschhsphpssp..........lslhshahsphhtppp.........shpllhPc-.GuhlsPhhhhlpps......ptctspthlcahhusc.htphhupsth.........hPs...........hpsshtsph.pstthphlsW-alppp .....................................................................................................................................................................ADllhs....................hsss.h.t.th..t.p..p.u.l.h.t.s.hp.............................................st.t.h...s...p...l...s......................tthp..cs...p.....s..p..ah.............sh..s.......h.t....s.h....u....h..hh...N....p....c....t....l....p....p..........h.......P...........ps...a......p......D...L........h......c........s......c.........a.......+..s........c.....ls.hs.ssts..............tssshsh.l..tshhp.............................................................thG..tc...t...u..h...p...a...h.p....p....L...t......t....s..h......s.phs..........tsss.sh.p.t..h..t.pG-.................hs..l.s..h...s...a....h...h...s...h..h..t.t...ppps................sl.p.h...h....h......P.......p..........p....G.........s.....h.....h.....p.....s.......p......s.....s...ul..h+s...............upp......ct..A.cpa.l.-ahl...S.sc..sQ.p.h..h.spssh...........hPst....................t............................................th.................................................................................................................................................... 0 374 805 1139 +13186 PF13344 Hydrolase_6 Haloacid dehalogenase-like hydrolase Bateman A agb Jackhmmer:Q7P3Y5 Domain This family is part of the HAD superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.11 0.72 -4.12 179 4798 2012-10-03 04:19:28 2010-09-15 14:26:07 1 25 3106 27 1565 3385 1469 102.30 28 34.77 CHANGED hlhDhDGVLa....................p.......Gsp..s..lPGAs-slptLcpp.G........................hphhhlTNs..us+otpphtpcL..pp.l...G.ls.....h........s.t.c.....pllsSupsstphLpp..........t..............................spplhllGs..p.....s..htptlcphGhp ....................................................................................hlhD.lDG....sla.........................................................................c.........G.sp....s..lP.u.A.tchlc.t..Lppp.s........................hshlalTNs....ss+o.psls.p+L...pp.h...G.ls........s.................st..c......plhTSuhAss..c....alpc.............t...............................spp..l..allGp..t......u..ltptlpphGh............................................................................................................................................. 0 484 900 1295 +13187 PF13345 DUF4098 Domain of unknown function (DUF4098) Coggill P pcc Jackhmmer:C2D3F5 Domain This domain is a C-terminal repeat found in many bacterial species. 24.40 7.80 24.40 7.80 24.30 7.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.39 0.72 -4.03 289 4531 2012-10-03 02:33:51 2010-09-15 14:57:46 1 38 1424 0 1491 5507 345 72.60 17 44.79 CHANGED shp..s.s..pl..plps.s..sG.s.lpl..p.s..l...pup....l.pl.p.ss..s..G.cl.plp..ss.pu.....shphpsssG.c.lp...l..p...s......phs...slph..p...s.s..sG.s.lp..ls .....................................................................................t.......hphps.s.sG..s..lpl.....p...p...h......psp........h..pl..p..os...s...G..sl...plp.....ss.ps.............slp.l.p..o...ss.G..c...lp.........l....p................t..................tht.........php.h...p...s.....sG.pl...t................................................................................................................................................ 0 607 1005 1297 +13188 PF13346 ABC2_membrane_5 ABC-2 family transporter protein Coggill P pcc Jackhmmer:C2CY30 Family This family is related to the ABC-2 membrane transporter family Pfam:PF01061 [1]. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.78 0.70 -5.03 126 1444 2012-10-03 10:13:34 2010-09-15 15:17:57 1 3 724 0 151 2276 340 201.80 18 90.65 CHANGED uLllKDhh...hh+p....h..hhlh..hh.lh.hlhhhh.hh.....ptth...hh...hshh.shhhs..hhhh..h......ss......hp..hDc.p...sch...sthlhoLPlsRcplVhuKYlhsllhh....hluhll.sh.lh.......s.h.l...h..h...htts.....hsh..t.hhhh..h.hhshh..hs....hlh....hulhlPlhacaG.c+up.h.......lhhhhhh...s.......lh..h.....hhh...th...hp...h..........p..tl..........hht...hhshss...h.....hl.hh...hhhhl.h.lh...hhhlShhlSlpIap ..........................................................................Lhhpshh....hh+t.......h...hhl..h..hh..l.h..shhhhhh...................s...h......hh...........hs.h..h....ss...h..hh.....h.hh..h....h...........ps..........hp...h-c.c.........sch...thhlhoLP.ls..Rcpllh..u+..Yl.....h...sllhh....hh..u..h...l.l.s.h..lh...........................s..h..........h...l........lpss......................h..h........h.h.h.......s.....h.h..h.hh....h.h....hhh.....hu.l..h.....hP..h.....h.a....t....h..u.h...cps.p.h...........lhhhhhh..h.......................lh...h.............hhh....sl....ht...h...........................p..th...............................hp...hhsh.s.....h..........ll.hh...hh.hhl..s.ll...hhhlShhlSlpla............................................................................................................................ 0 67 117 132 +13189 PF13347 MFS_2 MFS/sugar transport protein Bateman A agb Jackhmmer:Q7P3E5 Family This family is part of the major facilitator superfamily of membrane transport proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.27 0.70 -6.08 92 6648 2012-10-03 03:33:39 2010-09-15 15:21:27 1 29 2127 0 1465 15146 4138 393.70 22 86.65 CHANGED p+ls.YuhG...s.hupshhhs....hhsh.alhhFaTDlh..Glss.......uhluslhhlsRlhDAltDPhhGhlsD...pop..o+aG.+a+PWllhuulshulshhlhF.........................................................................................s.P.shshss...........phha......................................h...hlsYhlh.slhYohh..slPahuhssslopcsc..-RsplsshRthhusl.us.hlssslhhsll..........shhussst.................................................................................................................................................................Gahhhshlhulluhl..hhhhshhss+E....................................tpppphshtphhcslhpNc.hhhlhhhhlh..hhhshsltsuhhhYahpYslsss...s...hhshhhh........hhhlssllusslh.sh.lsc+h...u++p.hhhhuhhlhhlshhlhhhhs...........sshhhhhshhslhshshshhh.hlhhshlsDsl-Yuph+..sGpRtpulhhuhhshhtKhuhAlusslsuhhL.shsGasuss.........s..posssltslphhhshlPslhhlluh.lhhhhYpLsc ...............................................................................................................................t..hh.YuhG......s..hutshh.hs..h.hhh..al..hh...a...Y.......T...c..l.h............G..l..ss.....................................shsGh....l.h....l..ls.+l...hD.A..l.s....D....P.h.h....GhlsD...............psp.......o+..a.....G..........+...h..R.P..........a..............l..L..h..u....s.....l.....s....h.u..l...h..h.h...l..h.F.........................................................................................................................................................................................................................................................................................................h.s.....s.......t..h.s.h.ss..........................p.hla....................................................................................................................................................s..........hlsah.l.h.....sl..ha.o..h.h.....sl..P..a.t....u....h....h..s..s..l...T...p......s.....s.p......c..R..s..p..l...s...u....a...R....h..h....h......u.....s..l...us....h...l..s.s.h.h.h...h.s..lh......................shh..u..s..s...p..t........................................................................................................................................................................u.a..h...h...h...s..h....l..h...u..l..h..u..h...l....h...h..h..h....s...h...h..s..s..+E.......................................................................t...t....t.....p.....p.......h.......s..........h........t.......p........t.......h......p.......t........l........h..........p..........N..........c.........h.........h.....h.......l........h....h....h........h....l...h......h..h......h.........u.....h...s.....l....h...s....u....h...........h..........h.........Y......a......h....p......a....h.....l..s...ps...............t.........h....h....s....h.h..hh...............................................................h.h..h....h....s.....s....l....l.....u...s....h...h...h......sh....l...s.c...p.h...............u+...p.....p....h.......h.....h....h....u....h...h....l...h...s...h.....s.....h.....h..h..h..h.hs...............................tsh.h..h....h...h....h.....h...h....h....l....h......s.....h....s.....t..........u.....h....h.............s......l.........h...s.h...h.....u.D...........s....l.-.as.......c.....hp.............s.G.....h..........R.......h...p...G.hha...u..h.h..s.h...h.h...K....hu..u.lu.s.h..h......h.u...hh..L.....sh.....h..G....Y.ss...............................s....Q.ss..t..s..l.s..lh.h.h....h....hh....h....P.s.hhh..hlsh..hhh.hh.a.lp................................................................................................................................................................................................. 0 457 814 1153 +13190 PF13348 Y_phosphatase3C Tyrosine phosphatase family C-terminal region Coggill P pcc Jackhmmer:C2D3Y7 Domain \N 22.00 22.00 22.10 22.30 21.90 21.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.15 0.72 -3.86 223 1604 2010-09-15 14:47:59 2010-09-15 15:47:59 1 18 1059 3 505 1321 176 65.90 20 23.01 CHANGED ph..hpph.tt.tstspthhp....h.....hhss...p..tp.al.pssh.stlcppaGuhcsYLppslGlsspplppLRpphL .........................................tht.................ht....hts.....hhss....c...tc.alpssh.ptlcppY.G..uh-sYLpcslGloppplpp.L+phhL............ 0 153 323 426 +13191 PF13349 DUF4097 Domain of unknown function (DUF4097) Coggill P pcc Jackhmmer:C2D3F5 Family \N 23.00 22.60 23.00 22.60 22.90 22.50 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -11.41 0.71 -4.70 53 1584 2012-10-03 02:33:51 2010-09-15 17:17:30 1 18 1137 0 191 1342 30 157.20 17 50.57 CHANGED KKh.lhhuhslh...ll.....Gsll.hhh.u..htssu......h.a...spt..pltp..................ppph...cslsplsl.psss.sslpIpp.ussppl..pl..ph........ppp..........hpl........ptss.spLpl.ppp.tpp.hhhh.......shsht......t..tpsplplplPcsh....phs....slplps.s..Gsls...lp.s..l.phc.sl..p.ls.s.su...slphp ..........................................................................hhhh...lh.....uhhh..hhh...s...h...s..................................h.p....................................phsht...pshc..plpl..s............hss...t............s..lplpp..us.s.p...pl..cl..ph.................psp.................lph.................p.s.s..psLpl..ssc....tt...hh....................................hhphts.................tpppl......sltlPcph.........................lc...................plslps....s.......sGslp........lps...l..shc..sh..s.lp.s.ps..G.pl....................................................................... 0 72 121 154 +13192 PF13350 Y_phosphatase3 Tyrosine phosphatase family Coggill P pcc Jackhmmer:C2D3Y7 Family This family is closely related to the Pfam:PF00102 and Pfam:PF00782 families. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.13 0.71 -3.99 403 1860 2012-10-02 20:12:17 2010-09-15 17:39:40 1 30 1165 3 645 2255 428 158.20 28 56.25 CHANGED NhRDlGG............a...st............sGp...pl+hGhlaRSusLspl..o.s....s.DhphLt..slGlppllDLRsstEhppp....P.........c.hhh....s......usp.h.hpl..slh..sss.s.s...........................tt..h.t...thht.t...............t...t...stphhtch..Ypphlps....t..tss....appl..hchlts....s.s...s...slLaHCoAGKDRTGlsuALlLthLGV.sc.-s.IhpDYlloN ......................................................................................................................................................NhRDlGG....................................h...st......................sGp..pl+.shlaRSu.p.L...s.p.l....o.p....t.Dh.t.hL....t...p.....hsl.ptlh...DhRo..s.tEhptt......P.................................s.....h..t........ss.p.h.hth..s..lh..stttt..................................................t..h.t...t.htt...............tt.s.phhhphY.p...p.h.lpst.........pps....apph..hp..h.lhs.................ss....s.....sllhHCo.............AGKDRT..Ghs.u.A.L.l.L.t.h.L.....G......V.sc..cs.Ih...pDYlhop....................................................................................... 1 218 425 550 +13193 PF13351 DUF4099 Protein of unknown function (DUF4099) Ellrott K pcc JCSG - Joint Center for Structural Genomics Family A family of uncharacterized proteins found by clustering human gut metagenomic sequences [1]. The C-terminal repeat region of this family is DUF4098, Pfam:PF13345. 22.00 22.00 22.50 25.60 21.90 21.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.61 0.72 -4.09 51 427 2010-09-15 16:54:58 2010-09-15 17:54:58 1 5 128 0 40 345 4 84.10 31 20.97 CHANGED ap.pclsWcpLc.phGlo+EpLEp.stp.L-plLpGt+os.llslphshsss.tl.ph-A+LulhpspsG.plsltlaslpp.pspL-p.hh .....hc.sclsWppLc.phGlo+-p.LEp.ssp.L-phLpGtKos.llslp.hshsst.sh.ph-ARLuhhpss-G.plslshHslRc.c.pL-t........................... 0 13 35 40 +13194 PF13352 DUF4100 Protein of unknown function (DUF4100) Mistry J jm14 Jackhmmer:Q05Y37 Family This is a family of uncharacterised proteins found in Physcomitrella. 21.40 21.40 21.70 21.40 21.20 21.30 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.37 0.70 -4.89 15 2840 2010-09-16 09:21:40 2010-09-16 10:21:40 1 6 1 0 2840 2906 0 132.50 52 29.62 CHANGED AYIAKSQHEALMEEKRRGNFDDTREGNSSKRQTRGDKAREAASQELPVKDTSsSLtEKTKETKDKGKSIAYKLLSDIEAATNLKGVLEE+ILNAKlEFtLKElLtIsKKEFHDVIIDSIK+KRQLMuEstMsHAIDARIY+DEEEV.D.sYKQsTNEKNuYNQRVRFED.SDKEhEshSHYTRKHWARsTTEVLVKVGDIEEPIVALVDHGSE ..............................................................................................................................................................................................................................................................................................................................................................................................................................................h.....t...hpssh.shKt..........hL-.Ep...l....Lss.KlEFhl+-hLt..IsK.+-.....Fa-lI....I..ssIK...p.Kp.................................l.h......c-.E.E.E....l...s.........h......t...p...........h.........p....p...................t...............................................................................................................hhhc............................................................................................ 1 2840 2840 2840 +13195 PF13353 Fer4_12 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:C2CYL4 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.03 0.71 -4.06 147 15922 2012-10-03 08:56:43 2010-09-16 11:21:53 1 90 4368 5 3584 16094 3629 134.80 18 43.59 CHANGED lNGsG...s...R...s.....oLa..VSG..Cp..+..pCc..G..C...aN...pssW............shs....................tGp.a...s.....pchpcpIlppLp...s....shl......pGLol.GGEPhh....Nt..p..sllpLl.++l+pch...s...sKsIWhaTGYph-.cLt......p....p.........ppclLshlD...V.LVDGcFh.psht.D.....s..Lt.aRGSuNQ+lh ................................................................................h.h....h.p...s...Cs..h...........cCt.a......C............h.s.........p..s..h..............................................p..h..p........................................................................t.s..p.....h.........o.........p-h...h.......c..p....l...l...p..t.h.t.........p..........tth..............ts.l.s...l...o....G....G...E.....P.h..l.......th.......p...hl.h..c....l.h....p....t....h....+....p.ps...............h............p.........h....h......h....p...s....s.G...h....h....h.t..p.h...................................h.th....h....p........h...s........h........l....D...h..................................................h........................................................................... 0 1305 2427 3058 +13196 PF13354 Beta-lactamase2 Beta-lactamase enzyme family Coggill P pcc Jackhmmer:C2D5A8 Family This family is closely related to Beta-lactamase, Pfam:PF00144, the serine beta-lactamase-like superfamily, which contains the distantly related Pfam:PF00905 and PF00768 D-alanyl-D-alanine carboxypeptidase. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.14 0.71 -4.87 138 5244 2012-10-02 21:13:33 2010-09-16 14:09:41 1 27 2386 268 694 10619 1975 200.40 28 66.42 CHANGED lss..sp...hh...sh...sscph.hssASshK....l....s....l....h..........ptlp....p.....Gcls....Lsc.....plphp.p..pp.......h..s..s..Gu.....Ghhp.hh..s..s.s..p..holp-LhphMlshSDNsAoNhLlchl....G.hps...lsphhpp.hGhps.stlpph......h.........................................................shp...t.....t...p..N.....to..osp-hspllp....tlhp....s..............p..h.ls................hp.......h........l.p.p.....pth.....p.p.t.ls...ts.l.s..............ps...h...p.lu...pKoGs.......l.ss.....hppD...sGll.hh..ssps.allul ...................................................................................ssp..h...sh...ps.c.c+.ash.sSThKlh....l.s........................ttl...........t...........s.p...p....Lsp.....p.l.p..hp....p...s-........................l...s...p......ho.......s..l.hc.....+t....h.....ss...s...h...olt-..L..s..p.u..s..lphS.....D.N.....s.A..s.....N.h.....L....l.p.....pl.............G.Gs......p....t...................l..s....s...h.......h.......+.......p.......h...G...s.......p..s...s..p...lc+h.......cs............................................................................................................phs....ths....s...st...c...c...............oooPtu.hup.s.Lc....pl.ht.....s....................................p..h.Ls.tpp.......................t.lhp...h......................ht....s............s.ps......s...t..h.....lp....us.lP...........................................tsh...h....l.u..c.Ko.Gs..................t.hs.......s.ps.p......luhlhs..stts..hhls...................................................................................................................................................................... 0 200 452 595 +13197 PF13355 DUF4101 Protein of unknown function (DUF4101) Mistry J jm14 Jackhmmer:Q05S35 Family This is a family of uncharacterised proteins, and is sometimes found in combination with Pfam:PF00226. 22.80 22.80 22.80 22.80 22.20 22.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.43 0.71 -3.69 63 153 2010-09-16 15:48:57 2010-09-16 16:48:57 1 4 101 0 70 172 95 115.10 26 17.52 CHANGED AppllppWLssKupshussas.hs.....tLppllssshlsp.hpppsp.........thpppstahpa..hphplpulph.sps...sppst...lcAplpEpsphhs.sGphpsp.ohsssh.pV+Y.Ls+p.ssp.W+Ips ............AppllppW.ssKupAhGssap.hs......tLpplLssshLpp....hppput....................thpppshaapat.hplplpslph.sss....sppAt.lcAplpEssplhs..sup.ppt.ohpssh...pl+YpLt+p....sst.W+Ip.................. 0 14 49 65 +13198 PF13356 DUF4102 Domain of unknown function (DUF4102) Bateman A agb Jackhmmer:B3CUZ6 Domain This presumed domain is found at the N-terminus of a wide variety of phage integrase proteins. 27.00 27.00 27.10 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -10.12 0.72 -4.10 255 5922 2010-09-17 08:49:24 2010-09-17 09:49:24 1 16 1625 5 949 4352 701 86.60 34 23.48 CHANGED LT.cptlc.........sh..psps........pp.......htht..D.......t.........GLh.lpVp.s.s...G...sK..sah.hcY................ths..G+..pp......p...h.sl..Gpa....Ps..................l......oLspARppspchpthl.s.p.GhD..P.ttp....+cppct .....................Loctplcsh....KPpc........K......hpls.....D.....G.........tGLh..Lh.Vp....s...s............G.......s+...hWp..h+Y.................+hs....GK........pc........p...l..ul..G.sY.......Pp.....................l......oL...u.-ARppt.p.c.h+phl.u.p..Gh.D...Ptpp+pttp.h............................................................................................. 0 174 512 736 +13200 PF13358 DDE_3 DDE superfamily endonuclease Bateman A agb Jackhmmer:B3CQR6 Domain This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.93 0.71 -4.45 69 7080 2012-10-03 01:22:09 2010-09-18 09:34:17 1 125 1242 0 3290 6631 624 121.10 21 54.12 CHANGED pllahDE.sshphph.thttuas.pGpp...h.htstt+tp......phshluA.lshps...hhsh.hhh.......................................ps.shsuptaht.alcphltshhp.................thhllhDNss..hH+u........ptlpph..........lps...............................sshplhaL.....PsYSP-LNPIE.thWuhlKppltp..tthpshcslp .........................................................................................................................................................................................................................................................t...............h....hhus...hp.....tt......h......h.h......................................................................................................................................................................................tt...thst..t...h.....t...hl.p..p.h...h..........h..p..................................thhllh...D.Nss..............hHps....................................pt.lp.p.h................hpp..................................................................................................................................tp...h.p.....l.h.h.h.......P.s.a.S.P.-.L.N.P.IE..phWth.l.+.p.phhp.............h.................................................................................................................................................. 0 1274 2326 3074 +13201 PF13359 DDE_Tnp_4 DDE_4; DDE superfamily endonuclease Bateman A agb Jackhmmer:B3CSL4 Domain This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.86 0.71 -4.63 61 3886 2012-10-03 01:22:09 2010-09-18 10:01:42 1 51 549 0 1579 4471 221 120.50 28 53.91 CHANGED lDsocl.l..ppPp...s........pstsasthKppp........olphhl..sssssGhlhh.lstuasGpssDpplhppS.shhph.......................ss.chlluDpuFs..hpp.........h.hshpt...........tttpl.sspc..tphNpplu.phRhhVERshshlK.paphL...pptlthp.............hpphsclltssssLpN ...............................................................................................................................................................hph.h....h........s.s...p..hp.hh....h.h.....h.....s...........h......pDh..p..l..h.p.......u......h................................................................s..hh.hluD.u..GYt.....sh...........tl..................sPh+t..........................................................phps......L......sspc......cthNct..l.u....p......t.............RthlEp.hhup....lK...pa+hh.....ppthc.p.....................hpphsh.h.h..h.hsslhN............................................................................. 0 715 1056 1438 +13202 PF13360 PQQ_2 PQQ-like domain Bateman A agb Jackhmmer:B3CRY3 Domain This domain contains several repeats of the PQQ repeat. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -12.25 0.70 -4.91 112 7855 2012-10-05 17:30:43 2010-09-18 13:57:24 1 358 2271 34 2581 8458 4130 182.50 15 45.19 CHANGED spupl.tuls...t....sGpplWpt.....sh.t...........................................s.............huuGl..sh.sssplhls...s.......stuplhulcsssGphlWpppl..suthhus.....................P.hlssspl.....hlhs...........sDs..plhu.....ls.spsGch.......................hWp.hpts.......s.sshs..ltussssshs....sshl.....lsshs.....uGcl...........hulshpsGph..hWptsl.utsp.............tssphp....plsDlsusPlls....ss...plhssuhpG....p...h.....ssh.-htsGp....hh.W.sps.huu................hps.shsssshlahss.p...................p....uplhslctpsGphhWpp ..........................................................................................................................................................................................................t..................................hWt............th....................................................................................ts.h.......s.h........s..s...h...l...a...hs...........s....................tp.s..t.l.h..A..l...c..s.....t...s....G......c......h....h.....W..p.h...ph......ss.t.h..tss...........................................................P...s..h....s...s...s...tl......................hlss......................................tsu..........pl.hA...............l.....s....tts..G.ph..........................................................................................................hWp......................................................t........h.........s....hh......................tthh......................h.h.s.................................ss....l..........................................sh...t....t.....p...G..p........hh...t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 857 1659 2192 +13203 PF13361 UvrD_C UvrD-like helicase C-terminal domain Bateman A agb Bateman A Domain This domain is found at the C-terminus of a wide variety of helicase enzymes. This domain has a AAA-like structural fold. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null --hand HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.30 0.70 -4.99 35 17009 2012-10-05 12:31:09 2010-09-18 16:32:58 1 133 4760 29 3900 14321 6091 300.30 21 37.70 CHANGED lpLEpNYRSstsILpsANplI.............................ppsppchpK...tLhsspssu.pplphhpspscpc........EAchl...spcItp...............htpp....shp.hpDhAlLhRs.NtpucslEcsLhpts.IP......Y.clsGshpFa-RtEI+DlLuhL+.llsNspDc.............hulhRllssPt.+G....IGssolppltp..hspppplshhphh.............................................................................................................................................................................................hh.ttlstcstptlpsFhphl........................pphp.p..ht...hshpphlcpllcctsh.hphlppt.....cpucp+lpNl..pcL..hshhcca..............pp.t.................p.tsLhsFlpchsLpsptpcpp.......tpt....st.VpLMTlHuuKGLEFPhVFlsGh....pcu............................................hhP.............................ptpuh..tptcplp...EERRLhYVulTRAccpLalotspp ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................LppNaRSp..tllphs.s......lh..........................................................................................t..t......t....................t............h...h..................t....t...t.....t.................t..........h............h.....h..............h........s....t...t......................................c...s....p..h...l.......st...p..l.p................................................................................h.tt.........tht..hp-....h..AlL...h.....R....s.....p.....t.........p........s...t..................h...p....p....h..h....h....p.....t.....l..P...............h...hh...s..t.......p..h..h..p..p.......p.l.p.t.h.......h.s.h....L.p.....h.l..........s..............p...p................................s.h........t.h..l.t..........sh.....h.......t.......h..s.........t.p....h..t..l.......................................................................................................................................................................................................................................................................................................................................................................................t..........................h......t..h....h...t...h.h.....................................................................................t...th...t...............................h...h....t...t....h....h..........p.......t..............t....h........p...h.h.tt.......................tst.t.ph..t..s....l......tth.........hp.h.h.p.ph.......................................................t....................................................................................t........t....l..........t...h...l...p.....p......h....t......h........t..tt....ptt................................tt......................st..V....p.......l...h.......Tl........Hsu...KG..LEa.s..h..V.....a.l....s..s.....h.......pc...s..........................................................................h..hP.....................................................................................................................................................................................................................t.t.sh............p.tpp..l..p..........E..E.....p......R....LhYVulTRAccp.Lhlsh...t........................................................................................................................................................................................................................................................................................................................................................................................................... 0 1318 2605 3363 +13204 PF13362 Toprim_3 Toprim domain Bateman A agb Jackhmmer:B3CQ99 Domain The toprim domain is found in a wide variety of enzymes involved in nucleic acid manipulation [1]. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.30 0.72 -3.75 51 1268 2012-10-01 21:47:57 2010-09-18 17:02:44 1 39 840 0 288 2429 926 98.00 25 17.03 CHANGED hlhlsEGlEouLSl....htshsshsshAshus..sslp......slths..t.thccl..hlstD.sDss...........GppAstcl........sc+hps.tuhpstlltP........pu........DaND..slpptGtc ..........................................lhluEGhtTALol................thhss.h.ss....h....A...s.h.su......s.p.Lp.....................................sl.ths.......t..ss..p..pl.........lIs.u.D.sDhs.t...............................Gpp..su.pph.....................s.pt...h...t...t.........s.....h..t..sh..h..l.P.................shs..................DaND...hhpt.G............................................................. 0 65 161 226 +13205 PF13363 BetaGal_dom3 Beta-galactosidase, domain 3 Bateman A agb Jackhmmer:Q700S9 Domain This is the second domain of the five-domain beta-galactosidase enzyme that altogether catalyses the hydrolysis of beta(1-3) and beta(1-4) galactosyl bonds in oligosaccharides as well as the inverse reaction of enzymatic condensation and trans-glycosylation. This domain has an Ig-like fold [1]. 27.00 27.00 28.80 29.80 25.70 26.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.52 0.72 -4.59 56 193 2010-09-18 17:35:32 2010-09-18 18:35:32 1 10 99 6 137 210 0 80.10 32 8.04 CHANGED WsPsl.ssss.s.......sp.sllVpGsYLVRoAslp....G..ssLpLsGDhsso.TslEVaus.p.slpslpWNGcclpsspos.hGslpu ...................Wss.h...spss.s.............sp.sllVpGsYLVRoAslp......GssLpL....pGDhsso.....TslEVh..........us..p..slpslpaNGcplptp.psp..Gshh............ 0 39 74 112 +13206 PF13364 BetaGal_dom4_5 Beta-galactosidase jelly roll domain Bateman A agb Bateman A Domain This domain is found in beta galactosidase enzymes. It has a jelly roll fold [1]. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.92 0.72 -3.80 105 562 2012-10-03 19:46:52 2010-09-18 22:23:13 1 26 198 12 334 675 18 112.50 22 20.36 CHANGED Ys-p..uW..ssss..sos.........s...sss.........hsu...hpsGsthYRspF......p....h-..hu....hp.............stsusuat.........splalNGh.hGp.as......shusp........ssaslPp....slLsh.....p........stlhsslhspsuts ................................................................as-p..sW...........sshs..sst...............ttss.........hss.....p...sG.shaYRupF................s.u.ths......hshp.............hs.spuus.s.ht.........splalNGh.hGp.ah.......sshGsp.......................ssaslPp..ulLsh...p........sslhsslhs..t.............................. 0 96 180 272 +13207 PF13365 Trypsin_2 Trypsin-like peptidase domain Bateman A agb Jackhmmer:C0AF95 Domain This family includes trypsin like peptidase domains. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null --hand HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -11.06 0.71 -4.12 318 12156 2012-10-02 13:45:52 2010-09-19 13:18:49 1 253 4752 80 3773 11497 4680 143.10 28 31.28 CHANGED GoG..h....ll...ssss............................hllT.s.................tHVlp...................................................................ts.pplthhhhsspp.ht...................................................uplhths.....t.................................hDl..Allplpsstttthshtssstthtssthhhhhh.......................................................................................httstshhhttthtstttttstphhhh.........................sssht.s.....GsSGuPl.hs...ppGp...llGl ..................................................................................................................GSG..h...ll.....sp.s.G...............................allT.N.................sHVls...................................................................................................................suspl.p....V..p.h.scucp..hs.....................................................................................................Acl...l...G.tDs....p..................................................sDl.All+lpssphhshhhhsss....................................................................................................................................sthshsssssshh...ssss.hhs.s.h.s.t.s...s..s..t.t.t......tp...h.....................................l....QTDA.u.IN..s...............GNSGGsL...lN....hpGc...llGI............................................................................................................................ 0 1232 2423 3164 +13208 PF13366 PDDEXK_3 PD-(D/E)XK nuclease superfamily Bateman A agb Jackhmmer:C0AD98 Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily 22.00 22.00 22.00 22.10 21.90 21.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.26 0.71 -4.04 106 398 2012-10-11 20:44:47 2010-09-19 14:41:19 1 8 204 0 173 467 252 110.60 36 80.21 CHANGED p-loppIluuAhcV+ppL.GsGhLEsVYcpuLthELpppGlshcpQhtlslhY+uhhlsp.apsDllV........psplIlELKu.VcplsshHpAQllsYL+hos..........hclGLLlNFss...h+.t.......h+Rl .............................t.plshtIlssuhcVappL.GsGhLEslYcpsLhhEL.pppGlthpp...Qh.ls.lhY+s...hh..ls..p.a+hDllV........psplIlElK.u......Vc...p....l....t......s....hHpuQllsYL+hos..........hclGLLl..NFss....p.thpR....................... 0 96 155 171 +13209 PF13367 PrsW-protease Protease prsW family Coggill P pcc Jackhmmer:B0SDU2 Family This is a family of putative peptidases, possibly belonging to the MEROPS M79 family. Swiss:B7GHM8, PrsW, appears to be a member of a widespread family of membrane proteins that includes at least one previously known protease. PrsW appears to be responsible for Site-1 cleavage of the RsiW anti-sigma factor, the cognate anti-sigma factor, and it senses antimicrobial peptides that damage the cell membrane and other agents that cause cell envelope stress, The three acidic residues, E75, E76 and E95 in Swiss:B7GHM8, appear to be crucial since their mutation to alanine renders the protein inactive. Based on predictions of the bioinformatics programme TMHMM it is likely that these residues are located on the extracytoplasmic face of PrsW placing them in a position to act as a sensor for cell envelope stress [1]. 21.50 21.50 21.60 22.00 21.10 21.10 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.73 0.71 -5.11 184 1339 2012-10-01 21:07:14 2010-09-20 14:09:30 1 22 1111 0 303 941 360 189.90 23 55.85 CHANGED pPhthllh.sFlhGshsuhh......shhlp.......thht..............................hh..hhts.hlhusllEEhsKhlslhhhh.......h.htppph..........sp...hD.Gl.lhusssuhGFAhhENlhYh....h.............................................shhts......hlhR.slhu.shuHsla...ouhhG..huluhst...htt...........t.....hhhhhuhl...huhhlHulaNhhhhh..........................hhhhshhhhl .................................................................................hhhllh.shhhGshhuhh.......shhlp......................................phlh..h.........................tth.s.tshtsslsusllEEssKhlsllhhl..................................h...tthch..........................................spl...hs..sl.lhG...sss....GhGFAhhEslsYhhp.h...........................tt.s.........................sslsh............................hlhR..shhu.lsuHhla.oul..sG...hulslsh................pptt.............t.hp....hthhhhhl...hAhslHhlass.hsh.ts..h...............hl...hhhh.h....hhhhh.................................................. 0 113 225 277 +13210 PF13368 Toprim_C_rpt Topoisomerase C-terminal repeat Coggill P pcc Jackhmmer:B0SHF2 Domain This domain is repeated up to five times to form the C-terminal region of bacterial topoisomerase immediately downstream of the zinc-finger motif. 22.30 22.30 23.10 22.30 21.90 22.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -9.05 0.72 -3.49 42 3284 2010-09-21 08:44:24 2010-09-21 09:44:24 1 25 1144 0 1046 2994 3373 67.30 30 21.53 CHANGED cPtsG.c.sh.V.+-GRFGPYV.....TcG...csN.....s.oL.+s..sss-slT...hccA............hp.LLA.-+.cA....+usu...+...K....p..s.Ap.K ............cPtsG.c.l..l..psGRaG.PY.l.....................p.c..G...........pss....................................u.oL.+...s.......ps......s..tslT........L-cA..................................lc.Llu...t.....+t...........tu.t........................ts............................................................................. 0 338 755 931 +13211 PF13369 Transglut_core2 Transglutaminase-like superfamily Coggill P pcc Jackhmmer:B0SCJ3 Family \N 23.50 23.50 23.60 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.91 0.71 -4.72 183 1446 2012-10-10 12:56:15 2010-09-21 10:38:47 1 21 1341 0 427 960 170 155.40 32 47.73 CHANGED pthhtpLcplstcscpp.....lstp........................................................p........plp.tLtchhapchG.....FpGspp....sYhssc..........Nualsp.......VLcpRp..GlPloLullaltlAc+.lslslhuVsh.PscFll+ht.........................hhl-Pas.G.c....hlstppl.pthLpthhs............hp....hpsp.....hLp...sssspplltRhLs ......................................................................................................p..hhscL-pLsp.scpp.............lsp.......................................................................................................................................................hs.cp.............plc..tLhplFY..t-..h.G......................................Fpu..sps.......sYh.s-.................sha.Lcp....VL..c..p..Rp..G.slSL.......uslhLh......lAp+..LsL...PLhsVh.F.....Ps..p...h.lLRhp.....................s.-sp.........................hhIsPFs.G.c........pLspctL...chhLcsphus............sc...lh.p............Lp...tAsshpllt+hL.s............................................................. 0 135 239 341 +13212 PF13370 Fer4_13 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:B0S9J0 Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.09 0.72 -3.58 110 1093 2012-10-03 08:56:43 2010-09-21 10:44:49 1 12 947 18 263 2183 451 59.60 39 58.75 CHANGED V......D..ssCIsCssCtthAPclFphssctttshl.h......c..........p....sp..s..........pp-.....p.....ppshpAhtuCPssuIts .............VD..-pCIA.CG...tCsshA...P..-...l....FD..a.-.D.-...s...ht..hll.h..........-.................c...sp..s....................-c............h........-shh-A.hc.uCPTcuIc............................. 0 91 192 235 +13213 PF13371 TPR_9 Tetratricopeptide repeat Coggill P pcc Jackhmmer:B0SCJ3 Repeat \N 21.00 8.30 21.00 8.30 20.90 8.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.09 0.72 -4.12 103 4976 2012-10-11 20:01:03 2010-09-21 13:12:18 1 1320 2289 0 1951 43963 9522 67.30 16 14.49 CHANGED LKthh.hppcchppALpsh-hlLhl.....tPc...ssh-hRDRGllhtpLsshpsAhpDLptal....ppsP.....pssssphlctplp ...................................................h.........ttp.h...p.A...h...t....h.h..p.....t..h...l..t..h.........................sP.p.................sst....h.....h.......h.......p........p..........u........h.....h....h.....h.......p.........h........s........p.......h.....p....t...A...h..p..s......l...p.t..hl............p.p.s......................pt..................................................................... 1 652 1138 1559 +13214 PF13372 Alginate_exp DUF4104; Alginate export Coggill P, Eberhardt R pcc Jackhmmer:B0SIJ8 Domain This domain forms an 18-stranded beta-barrel pore which is likely to act as an alginate export channel [1]. 25.00 25.00 25.00 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.22 0.70 -5.85 23 682 2010-09-21 12:51:04 2010-09-21 13:51:04 1 7 487 4 258 716 125 365.20 17 76.18 CHANGED shlshGhslRtRhE.hssssthGstsstcDshllpchplaA-hchss....phphhlphpDsRsaspp.................sssssD..pNtlDlcpAals.......as.ssLs.stshc.lRlGRQchshstpRa....Iusp-h.NsppuFDulhhcac..s....spaRhpuhhspPsphhssph.s........DspscshpFsusphphps.hsssslpsYasthpcpsup..........................hhcssssc+hpshulRhsGpt.......sthDa-hEuhhQoGps....Gspshcua............AhuucsG..YTh.shshpPRlulphDhASGD........pcssDGslpoFssLFssut....aauhAshsuhuNLhpspsplolpP....ssplsh.suhthhWRtsssDulhspstss..............l.ssuGpuu.casGpthclR.............hc.ahhssphthtlcuuaFpsG-sl+susspssshsssp ...........................................................................................................................................................................................................t...phshphR.cht........p.t..........h............................t..t..s..t...t......h......h.........ph...thh.h..hphtt....................thphhhph..ss.h.h...st..t..............................tss-.......ps.thslpp..uahp............hp...tht..........s.....hp....hphGRQph......ths.s...p..+h..................l..u..s...hs........h...ss...t.p.s....a.D.uhththp...h.....................sphp.h........s.h...h...h.h.....p...s.............sth....................................cst..s.t..p.pth.h...s.....h...h....h......p......h.........p...............h......t...........h..t.......h.....p...s..ahh.thp.pptst..............................................ht...t..t...t..s.p.p.hpshGh.+htuph.................tths..a..phphhhQ..t..Gph...............sspshpuh................................uhts..p..h..G......a..ph...t...s.h...pP......ph..s.......h..th..sh...h.....SG.s..................p.t.ss.p........h.t.sF......p.sh....a.sssh...........................hhu...h..th...h.....t...........s.......N......l...sh..t...th.phps.......ptph.shthth.hh.hhh...pttsshhtts...............................................st..p...hGt.hph...........................hp..a.ht..ph..hththshh..up...hht..t............h................................................................................................................................................. 0 94 183 221 +13215 PF13373 DUF2407_C DUF2407 C-terminal domain Wood V, Coggill PC, Bateman A agb Pfam-B_17915 (release 21.0) Family This is a family of proteins found in fungi. The function is not known. There is a characteristic GFDRL sequence motif. 25.00 25.00 25.50 26.50 22.50 24.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.98 0.71 -4.26 28 151 2010-09-22 10:47:12 2010-09-22 11:47:12 1 7 140 0 116 144 0 129.40 32 39.30 CHANGED hGFDRLLssGFoppElssLRpQFhulaus......ppss-shss.p..............slRpLE-pWh-ssus..................................tssts.s.ssphss.........hsssstps.-DLLhGhllGhFhGlhuhlaLh+--..Glas+Rp+hulhsGlhlNhsauhlRs ............................................pGFDRLL.ss.GFotpElstLRpQFhuhh.u......hppss-shssss...................clRpLE-pWh-ssus......................................................stss.s.tsthst................hssssttshcDhLhGhhhGFFaslhshhaLh+pc..ul..astRpphulhsGlhlNhhhuhh+..................................... 0 38 68 100 +13216 PF13374 TPR_10 Tetratricopeptide repeat Bateman A agb Jackhmmer:Q7P422 Repeat \N 22.00 10.00 22.00 10.00 21.90 9.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.33 0.72 -7.76 0.72 -3.89 291 5651 2012-10-11 20:01:03 2010-09-22 18:02:07 1 1557 998 7 3285 40335 8366 38.20 20 7.91 CHANGED shsshssL.At..hhtpt.uchpcA.pplhcp.s.lphpcpl....hG.s.....cH....ss ....................................shpsL.ut......hh....tp....t..G.....c....hp.....cA..t...sl....hpp.s.lt.htpth............................................................... 0 1339 2317 2944 +13217 PF13375 RnfC_N RnfC Barrel sandwich hybrid domain Bateman A agb Jackhmmer:Q7P7C9 Domain This domain is part of the barrel sandwich hybrid superfamily. It is found at the N-terminus of the RnfC Electron transport complex protein. It appears to be most related to the N-terminal NQRA domain (Pfam:PF05896). 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.17 0.72 -4.29 96 1912 2012-10-02 20:27:15 2010-09-23 09:15:51 1 35 1463 0 364 1936 307 94.90 37 16.91 CHANGED hsF+.GGlHP.s-s.KphS.pspPIpph...hspclllPLpQHIGAPucPlVphGDcVLpGQhIucu.s.G.FhSuslHASsSGsVpuI-.+sssp.uo..t..s..p..sllIEsDGc ........................................................th..GGlH..P..c...Kt.o.pttslp.ph.s....h.s..p.c..hs.l...PL..p....QH.....I....G......A........s.up..sVpsGD+Vh+GQh..lscu.....p......G..hh.....ss...PlHAssSGsVsu.Isspsss.p.ss.t..h......slhIpsDu............................. 0 145 261 317 +13218 PF13376 OmdA Bacteriocin-protection, YdeI or OmpD-Associated Coggill P pcc Jackhmmer:B0SHC2 Family This is a family of archaeal and bacterial proteins predicted to be periplasmic. YdeI is important for resistance to polymyxin B in broth and for bacterial survival in mice upon oral, but not intraperitoneal inoculation, suggesting a role for YdeI in the gastrointestinal tract of mice [1]. Production of the ydeI gene is regulated by the Rcs (regulator of capsule synthesis) phospho-relay system pathway independently of RcsA, and additionally transcription of the protein is regulated by the stationary-phase sigma factor, RpoS (sigma-S) [1]. YdeI confers protection against cationic AMPs (Antimicrobial peptides) or bacteriocins in conjunction with the general porin Omp, thus justifying its name of OmdA, for OmpD-Associated protein [2]. 22.40 22.40 22.40 23.10 22.30 22.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -8.59 0.72 -4.32 168 1185 2012-10-03 20:18:03 2010-09-23 13:04:02 1 11 931 0 364 981 110 61.90 27 33.61 CHANGED spshplP.sDltsALpssspAtthFpslstst+pshlthlt..sAK+scTRt+Rlpchlphlscsc ...............t...h.lP.p-lpstlp..ppPth.tstFpsLosutp+palhalt..sAK...pscTRp+Rlpchlphl.pG............ 0 160 276 335 +13219 PF13377 Peripla_BP_3 Periplasmic binding protein-like domain Bateman A agb Jackhmmer:C0AC70 Domain Thi domain is found in a variety of transcriptional regulatory proteins. It is related to bacterial periplasmic binding proteins, although this domain is unlikely to be found in the periplasm. This domain likely acts to bind a small molecule ligand that the DNA-binding domain responds to. 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.04 0.71 -4.24 246 26367 2012-10-02 13:57:41 2010-09-23 13:14:04 1 51 3428 168 5469 22392 1655 160.60 23 47.18 CHANGED scaL.hppG.a++lu....hls..............sttt..hsptRhpu........atpth.pptsht.sphhhhhttttt.tt..t..............................................................s.....s....Alhs..ssDthAht.lh.pshpp.t............Glp..l.P.cDl.ullu.hs........t......hh..phssP.sL.....o.olph.shpphu.ppAschLhph...l.......tstpts.pp.....hh.hss.p.llhR.pSs .............................................................................................................................paL.lp.t.G.+.c..c.Iu....hls.............................sstph.tsspp.R.h.pG......................app...uh....p..c.t..s.ls...h...st...t...h..h....h..t...s...t......s...t.t.s.s.httht....................................hl......pth...............................................ts...........s......Alhs...ssDt.h..Ah..G..s.l.ps.h.p.c..t...................G.lp.........l....P....c.........D.........l...u..ll..G..aD.............sh.........hu....ph...h....p..P...sL........................T.Tl...p..p..s..h.p.p...h.G..ppAschLlpt.....l........................pt..t.p....h..s....pp.................hh....lss..p.LlhRpSs.................................................................... 0 1692 3369 4443 +13220 PF13378 MR_MLE_C Enolase C-terminal domain-like Coggill P pcc Jackhmmer:C2D1R2 Domain This domain appears at the C-terminus of many of the proteins that carry the MR_MLE, Pfam:PF01188 and MR_MLE_N Pfam:PF02746 domains. EC:4.2.1.40. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.44 0.72 -4.09 1015 7138 2012-10-02 01:07:48 2010-09-23 13:18:26 1 22 2468 500 1681 7613 3271 111.80 23 28.55 CHANGED sEp.h...hshtshpphlp.tsu.s.....t..h.lphcls+sGGlspsh+lush.Acshslt...lhsHs......ssluh.suslp.l..sssh.s.............s......hs..........h.h-................hh...h.............tph....ht..............s.h........s.Gh...l.tl..P.....t..P.....GLGlc ..................................sEhhtshhpht.phlp..ttu.l....Dh..lphchs+sGGlstsh+l..Ash.A.ct.a.s.ls............hssHu......ssluh...utshH.lss..ss..s............s...........hs.......h.pE............t.h.t......................ch........lp..................s.h..t..hc.s...Gh...l.ps...s..p....t..P...GLGlp........................................................................... 0 405 960 1336 +13221 PF13379 NMT1_2 NMT1-like family Bateman A agb Jackhmmer:C0AC77 Family This family is closely related to the Pfam:PF09084 family. 24.70 24.70 24.70 24.70 24.60 24.60 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.43 0.70 -5.00 82 4162 2012-10-03 15:33:52 2010-09-23 14:15:53 1 25 1726 8 1437 7965 1155 236.20 18 65.58 CHANGED ssPEpspl+lGFIsLTDsAPL.llAtcpGaap+a....................Gls.l..plt+puSWussRDpll....sGplDuAHh.LhshshhhphGh...su.thshhsLhsLshNGpuIslusphhsthh..............hssssl+thhtp.pt........tshphAhoFPsuTHshhlcYWLAusGlcP...............pDlphlllPP.PQMVushcsGsh-uaCV....GEPWstpAl.tcslG..assh....oot-l.......WtsH..PEKs......husptcal-ppPssspAlltAll-AspahD..sstN+pcsApl.l....u ................................................................................................................t......tlp..lG..h...........h.h...t....h..h..sh....h....h.....u.t...p.....p.....s.h.hpct......................................Glp..l......phh...p..h.s...u...s.s.s...l..h..p.....ulh...............uG.plD..h..uth..hss..h..h...h...s.h.s.t.Gt......................s.h..h..s..h...s......s.....h...s...t......s.......G.....p.....s...l...s...l..p.p.t......................sshp.slt-l...............cuhplu.h..s.h.....s..u..s.p.p.h..h..l..p..h..h.L....t.p..s..Glss............................pDl..p.l.h..h.h.sP...s.ph.......s.s..u.h...p...s..G..p...l..D..u...h.s..s...........h.-..P..a...s....s...ts......t......p..s..h..u......phlh............ss..t.p.l......................h.s.sh.........s..t..ps...........hhs.pp....p....a.h..c..p...p..P...p..s.s..ps...h...lp...u.hhc...Atpa.....hp...t...t.....t....ttt..................................................................................................................................................................... 0 383 868 1164 +13222 PF13380 CoA_binding_2 CoA binding domain Coggill P pcc Jackhmmer:B0SEN3 Domain This domain has a Rossmann fold and is found in a number of proteins including succinyl CoA synthetases, malate and ATP-citrate ligases. 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.45 0.71 -3.92 341 4531 2012-10-10 17:06:42 2010-09-23 16:03:17 1 30 2872 8 1431 4521 2487 121.00 29 26.68 CHANGED +s.........lAVlGhS..s.pss+...suht.lhphLt.t.......pG.hplh.....PVsPp........pplhG.t.......t.....sassLs...t.s........tslD.....................hVslapssptsssllcch.hsh........s....scslWhQ.G...........shspcstphAcp......s.Glp.ll..spChtltts .............................................................................+olAVlGAS..s..c..ss+.......s..u.ht....lh+.Lhp...........pG.....p...lh......P..V...sPph...................................ppl..h..G...............p................sYsols.........p..........PtslD......................lsslhpsu.c.ts.sllc-h..hpt.....G........s+.s.hh.l...u...................hhp.c.c.h.t.t.hAcc............t.G.hp...llh..s.pCltl...s............................................. 0 459 926 1215 +13224 PF13382 Adenine_deam_C Adenine deaminase C-terminal domain Bateman A agb Jackhmmer:C0A9T6 Family This family represents a C-terminal region of the adenine deaminase enzyme. 27.00 27.00 27.50 27.40 26.40 26.20 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.68 0.71 -5.10 110 1480 2010-09-23 16:28:37 2010-09-23 17:28:37 1 12 1271 6 346 1189 171 167.40 36 29.18 CHANGED spl.lTcchhhpl......pl.p...sGhh..ss..ppD..l.hKlAVlERapt.s.GslulGhlcGaGl+.pGAlAoolAHDSHNllVlGssDcDMhhAlsplhchtGGhsls........psGc.llupLsLPlAGLMSs.pshcpVscplpplpp.shc.p.lG.s.s.h.p...sPFhoLSF.LuLsVIPcLKlTDpGLhDVppaph .........................s..h.Tp.h..ph....h..t....ss.h......s...p.p.D..l.shlAVlE.Raup.p....tsh.uhGllpGF....G..l.p..p.G.A.lAoTluHDSHNIlVlGp..ss--MthAsNplhphGGGhslV........p.s.Gp...Vhup..lsLPIAGLMSs.pshpp.lsc.plctLcp.Ahc.c..h..G...st....hs....cPFhphuF.LuLsVIPtLKlTspGLhDspphp................... 0 136 260 294 +13225 PF13383 Methyltransf_22 Methyltransferase domain Coggill P pcc manual Family This family appears to be a methyltransferase domain. 21.90 21.90 22.10 21.90 21.80 21.40 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.90 0.70 -4.84 9 252 2012-10-10 17:06:42 2010-09-23 17:52:57 1 11 100 0 209 288 84 207.40 20 68.36 CHANGED u.AsKcu.uFFDchhsshhpthpc+hh.Fpphspst.................ppcsttWY.-NlEPshTC...sp.pRlGs..tG.........-GsKWlCDPpRLhp..........pts-CLlYSlGSsscasFE-ulhc.lups.CEIHVFD...tshsps.ssp.pthaap.hGluuuh-.shss................hhsht-h..tLsH.htRTIDlhKIDCEtCEWusYtsWhssDs...cphLlElHu..............................Psppss...........spcFapthhcpsahhFpKEsNl.a...pptClEauaI+ ...................................................................th.....................................................................................h........hhu......s.........................DGsp..h..lC.t.sp...t.h.........................................................pC.hlaSh..G.....h.s.sphsF..-tph.hp.hs...CclasaD......s.......t...p...t...t.......p....t.t......p.h..ha.....tph.........ulu....tt...s...s..p.....t...st.............................hsl.tslh.p..t.h...s....H.....t..p.h.l-lLKh.DlE......u....s.E...a....p....s..L.........p..s..h.l....p..p.th...................hpQlhhElH.h.............................................st..............................hh...p...t...htt.s..hhhat........................t................................................................ 0 84 110 190 +13226 PF13384 HTH_23 Homeodomain-like domain Bateman A agb Jackhmmer:C0A3L3 Domain \N 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.92 0.72 -8.52 0.72 -4.18 345 2487 2012-10-04 14:01:12 2010-09-24 17:38:01 1 100 1542 3 717 11862 1498 45.90 20 22.10 CHANGED stpttpRhp..hlthht...p........uhshpplAphl.s.loppolt.ch..hppa..........p.pG....hpulh ......................................t......h.t...hhhhht......p...........Gho.hppIActl..G.lo.csT.Vt.+h...hpp.........................t................................. 0 199 482 617 +13227 PF13385 Laminin_G_3 Concanavalin A-like lectin/glucanases superfamily Bateman A agb Jackhmmer:C0A826 Domain This domain belongs to the Concanavalin A-like lectin/glucanases superfamily. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.36 0.71 -3.99 726 6160 2012-10-02 19:29:29 2010-09-27 13:17:45 1 933 1411 34 2404 8060 5104 168.50 14 16.23 CHANGED p.u......h.h..h....sus........ss...h...lp..........h...ss......ts.................h.....s....ss.......sh........................T..lssWl..p.hs........s.....h.s......st..tth.....hh......................................s....ss..s.................sht.l...t.....................h.......s....ss.s............ph.t..h...........th...tss....s................................t.t.........php.....s.ss...........sh.s..................s....s.pW.pHlsh.s..hs.............u..s...................s.......hp....lY.l.......s...Gp........hhs...s..........t...s..t..sss........................................................................hs..............s..ss....h...................h....lG.........................ss.t................s.....s............spha.s.......G.....t.....l.D-lplas......p.uLoss-lp ..................................................................................................................................................................t..................t......h.........h....s........................h......s....sp.......sa..........................................T..lss.hl....p.hs......................s.....h.s.......st...tth....hh...........................................................t.sts.....................tht.l.t...................................................................h.........s...sss...............ph.th............thtss....t.................................................................................................thp.......s..ss..........sh.p...........................s.....sp.W.t.+..lsl.s..hs..............s..s......................................p........hp..lY..l.........s...Gp..........hhss........t...s..h..sss.........................................................................................................................ht........ts..ts..th...................h....lG.....................................................sst........................s.s.........tp.h..p.........G.....p.........l..spl.tl.as......p.sLsttpl.......................................................................................... 0 1029 1475 2077 +13228 PF13386 DsbD_2 Cytochrome C biogenesis protein transmembrane region Coggill P, Eberhardt R pcc Jackhmmer:B0SI46 Family \N 24.20 24.20 24.20 24.20 24.10 24.10 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.80 0.71 -11.67 0.71 -4.57 161 1138 2012-10-02 18:22:22 2010-09-27 14:35:20 1 9 1040 0 354 3522 1325 201.20 26 81.07 CHANGED sshllGLh.Guh.HC...lGM..CGGlshuh.....sh.....st...........p.......p........p..t.....h...t.........h.lhYslGRlhoYsllGslhG.hlGs....sl..thh....st....h.....t..h....lph.hhu.lhhslhhlhhuhthh.h....h...h....hh..t........l....tp....ls.....ph.l...hphh....psh..tpph.........psh....hs.shhlGhlhG.hL...P.C..G...l..VYsslhhAh.so..G.ushpGulhMhsFGLGT...lPshlsh..uhhssh.l.........pt......hhp.......p.th.h.+..hs..ul..llll.h.Glh .................................................................................h.hhllG.ll.uu....h..HC........hu.M....CGG.l.s....h.uh............shs............................................t..............p............p.h........h...h............h.lhYslGRl.....hoYsl.l.Ghl.h.G.hlGp....hl....s......st..............h....lph.hht...hlhslh.hl.h..h.u..hhhh..............t....hh..h...............................l........tp........h..s.........th....l...h..p...hl......p.l.hpp..........................psh..h..sshhlG..h..l..h..G..hl......P..C....G........l....VY.s.h....l.h....h.......A.h.s.s........u........s........s...hp.G.uL.hM..h..sFGLG...T.....lPshlhh.....uh...h.u...ph..l..............sp.........hhp...........p..hh...h...+....lu..uh..lll.h.hGl.................................................... 0 126 259 317 +13229 PF13387 DUF4105 Domain of unknown function (DUF4105) Coggill P pcc Jackhmmer:B0SCN2 Family This is a family of uncharacterised bacterial proteins. There is a highly conserved histidine residue and a well-conserved NCT motif. 22.20 22.20 22.20 26.90 22.00 22.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.22 0.71 -5.09 149 716 2010-09-27 13:47:27 2010-09-27 14:47:27 1 1 544 0 199 698 188 171.80 22 36.56 CHANGED ss......tttpahp...........thtssplphs.hhsu.hhspshshaGHohl+h..............hpsthh.......thshsaus.sh....sshshhh+uhhGpa.phhhshtshpchltpYsphct...RslaphpL.s.Ls.pptptlhtplhctt.shp.t.....htYpahpsNCoTplhchl.ctsh...sttlth......ph....hh.sh.............shhchltph ................................t.......phs.cplphl.hhhu.httpsh..shaGHThl+h................thtD.pp......thshsaushst.......sp..hhhcGhhGpa..hthss.hshpchltpY.sphct...Rslaphp.L.s.Lo.pctptLhtphhcht.phs.......hhY.ahssNCsoplhchl.ctsh...ss.l.hs....ph....hhss.............thhphl.p........................ 0 69 127 171 +13230 PF13388 DUF4106 Protein of unknown function (DUF4106) Bateman A agb Pfam-B_5 (Rel 25.0) Family This family of proteins are found in large numbers in the Trichomonas vaginalis proteome. The function of this protein is unknown. 27.00 27.00 27.60 32.80 25.20 26.80 hmmbuild -o /dev/null HMM SEED 422 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.65 0.70 -12.54 0.70 -5.51 13 401 2010-09-27 14:06:47 2010-09-27 15:06:47 1 5 1 0 401 401 0 270.60 73 66.03 CHANGED MQKAMKSAEYIKANNDWLDAQANAKAAQLIGSIRTKIQADEDSSNEALTNADFKNAFEALHSKVK.VNDFSSGKKLKSEGFDKEL+EVAQNMTKITDAATRQAVQSAYDAVRATVVESQEKELQQTKTDLVNAFL+TKSQVGHYAADGTYVPAGGTYIPAGGTYILASGTYIPPNPPREAPAPGLPKTFTSSHGHRHRHAPK....PTQQPTVQNPA...........QPTVQNPA.Q...............QQPsQQPssQ..............PAQQPssQpPA..........QQP..QTEQGHKRSREQGNQEFLKMLKE-YGYPDTlDFSDRYKEAIRKFKEGNTDPNLFSFMAQHQIGYNLKPGKYKLAKGYDLIAYHPNDMsEFTPRYLMSELNDNSTlFMKRVKNRDGTKEERhMssDDLsRELVKNGLGIYEMPA.......DEVQETPQEE.VQIQPDMEEIVQQQQLEEP ..........................................................................................................MTKITDAATRQAVQSAYDAVRATVVcSQEKELQQTKTDLVNAFL+TKSQVGHYAADGTYVPAG.......GTY........I...................................PPNPPREAPAPGLPKTFTS....SHGHRHRHAPK........PsQ.......Q..PT...Q....ss............pPs.Qpss.t...........................s....Q.ss.p............sA......Q...P..osQ..ssA..........Q.QP....QTEQGHKRSREpGNQ-FLKMLKEsYGYPDTlDFSDRYKEAIRKFKEGNTDPNLFSFMsQHQhGYNhKPGKYKLAKGYDLIAYHPNDMsEFTPRYLhSElND..NSTlFMKRVKNRDGTKEERhMssDDLsRELhKNGLGIYEMPs.......DEVQET.Q.....EE......VQIQPDMEEIVQQQQLE.............................. 0 401 401 401 +13231 PF13389 DUF4107 Protein of unknown function (DUF4107) Bateman A agb Pfam-B_5 (Rel 25.0) Family This family of putative proteins are found in Trichomonas vaginalis in large numbers.\ The function of this protein is unknown. 27.00 27.00 37.00 35.00 18.30 18.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -10.88 0.71 -4.54 4 582 2010-09-27 14:17:08 2010-09-27 15:17:08 1 2 1 0 582 582 0 140.50 87 90.96 CHANGED MGTKALAFGAKALGAYDAVNKMSGGRVSKTLDANKGKIGGWVAKKLRLNKIGLINKASNLVATESENALGKDDEFAKHAKDFNDQMKGETMHLNRVDGTKENVSSPPAVhPYGPYGMYGNPYERPFDPLTGGSNWYHYGRRRKTVKLETDVKKATKKK ..................................MGTKALAFGAKALGAYDAVNKMSGGRVSKTLDANKGKIGGWVAKKLRLNKIGLINKASNLVATESENALGKDDEFAKHAKDFNDQMKGETMHLNRVDGTK....ENV..SS...P......P.....V..............VL.P..............Y......GP.....YG...M.....Y.G.NP.....YERPFDPLTGGSNWYHYGRRRKTVKLETDVKK.ATKKK......................... 0 582 582 582 +13232 PF13390 DUF4108 Protein of unknown function (DUF4108) Bateman A agb Pfam-B_8 (Rel 25.0) Family This family of putative proteins are found in Trichomonas vaginalis in large numbers.\ The function of this protein is unknown. 27.00 27.00 132.80 132.50 20.90 19.00 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.98 0.71 -4.44 38 686 2010-09-27 14:18:04 2010-09-27 15:18:04 1 1 1 0 686 686 0 131.20 85 97.24 CHANGED TKFISVNKYMotlKEELDPFsYLNVYFYHFEKS.oFcKVWNIEPVKFAIVTK..NGApFEDL..DIEGLLsVKENFDR+FSpLcEGKAYKLVIPYEPKKADDYEYYESKIVEVQGKLGKKILESK.....PVFAPKEEENIDIDPE ............................sKaISVNKYMotlKEELDPF....sYLNVYFYHFEKS.oFsKVWNI.EPVKFAIVTK..NGAKFEDL..DIEGLLsVKEsFDRRFSNLKEGKAYKLVIPYEPKKADDYEYYESKIVEVQGKLGKKILESK.....PVFAPKEEENIDIDPE...................... 0 686 686 686 +13233 PF13391 HNH_2 HNH endonuclease Bateman A agb Pfam-B_14 (Rel 25.0) Domain \N 25.00 25.00 25.00 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.40 0.72 -4.19 198 1540 2012-10-05 18:28:12 2010-09-27 15:37:21 1 23 921 0 616 1405 179 64.70 29 19.80 CHANGED Csl..oGhph................................lcAuHIh.Phu.........................h...tsp.....................................................sspNGlhLsssh..HphFDpthlsls .....................................CsloGhp.......................................hlcAAHIhPhp.........................t.s....sss...............................................................sspNGLhLssph..HphFDpuhlulp.................... 0 118 339 479 +13234 PF13392 HNH_3 HNH endonuclease Bateman A agb Jackhmmer:Q7P8C3 Domain This is a zinc-binding loop of Fold group 7 [1] as found in endo-deoxy-ribonucleases and HNH nucleases. 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.11 0.72 -4.56 226 1385 2012-10-05 18:28:12 2010-09-27 15:51:52 1 43 897 1 251 1201 508 45.20 27 24.40 CHANGED hhhHRllaphh....hG....thP..s.s.h.........hlcHhs.s.s+....pssph..sNLchsoppcNtt ....................................hhHRllhtth.........hs........shs..p.s.h..........plsHls...s.s+.......tsN...ch.....sNLchsopppN................... 0 83 160 209 +13235 PF13393 tRNA-synt_His Histidyl-tRNA synthetase Coggill P pcc Jackhmmer:B0SA16 Family This is a family of class II aminoacyl-tRNA synthetase-like and ATP phosphoribosyltransferase regulatory subunits. 21.20 21.20 21.30 21.20 21.10 21.10 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.92 0.70 -5.37 129 7540 2012-10-02 14:22:40 2010-09-27 16:44:42 1 40 5007 33 2112 6233 5624 291.80 27 66.39 CHANGED GlcDlLPtcApph-pl+cpLl-happaGY-hVhPPhlEah..-oLlsGs.....ut.s..h.c......p.saKl.sD.p.oGRhlGlRuDhTsQlARlsAp.p.l....s..pp......tstRLCYsGsVl..+spsst..h......t..ss..REslQlGAELaGps.ulpA.DhEllpLhlcsL....t....sG.l...pplplDLuHsslhcsl.h.p.sss..lststpptlhstLppKshsplp.plst...t.....hs............tsh.t.phLttL....pLtGshcsLp..cAtph.lss.h...s.t..htt.slspLcpltshhpsh.....t.splplDLu-lRGacYaoGlsFssYs.....sG..hu..p..slApGGRYDplstt.F.....G..+..........sc.......PAsGFSl-lctL .......................................................................................................GhpDhlPtp.hhh..ph....lppthpphh.p.paGappl.cs.PlhE.....h.clh..tp.th.................G-..s...s.c.............c..h..Y..p...........h...........................D......c.....s...............s...........c...............p..............l.......................s.LRs.-.h.Ts.s.h.s..R..hh.hp..p...h..........hs...................tsh+haY.hGshF.Rh.-.+.s..p...t..................G.....Rh..R..pFhQhG...h.E..........hh......G........s.....s..s...t....h......D.u.E..ll..t...hshchhp......t.....LG..l..................pp.h..p.l.clN..shs.h....h.c..........................h....p........t........t....h....p.....p........s....l....h......p...h...l.....p.............p.......h.....p.......h.....t.....t......l.......c.....c.h...h.p....c...........t.....lp..................................................................pp.....h....ch.Lps.h..........ph.p....s...s........t........p.........t.......l.t...........p.s.....h...t...h....l.....s...t...........................p..p.........u.....h....p.c.........h.ptltphLc......sh..............t.hth.p.l-.....s.l....l..R.G....L..D...Y....Y.o....t......slFE..h.......h..s.........................s....s.......hs....s..................slsuGG.....RYDsLlpth....tG...p......................sh.............PulGFuhGl-Rl............................................................................................... 0 694 1329 1754 +13236 PF13394 Fer4_14 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:B0SHY7 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -11.08 0.71 -4.08 151 9987 2012-10-03 08:56:43 2010-09-27 16:54:42 1 73 4097 8 2936 16021 4178 110.20 19 33.66 CHANGED lFl..uGCsh...........pCp................sC......sttthcF................ssGc.hstphhspll.............hpssth..ttlsloG.GEPhh.hspt.hh.Llcpl+ppt.......hcIa.l.Tshohp...........th..t..hlu......hh-.llscGc .................................................................h.......tsCs..l.........................pCp..........................................................aC.h...............s.t..t...t..h.sh........................................................................t.t.t..p..h...h...s...h..c..p...l..h..p..h.lt.............................h.tt..t.s.h.......p...p..l.s.lo..G....G.E....Ph..h..........p........t.......l...h....t...l.l.....c...t....l...p...p.ts..................hp.l..p....l..p..T.s.G.h.h...........................................hhttthh............................................................... 0 1066 2000 2532 +13237 PF13395 HNH_4 HNH endonuclease Bateman A agb Jackhmmer:Q7P7J1 Domain This HNH nuclease domain is found in CRISPR-related proteins. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -8.68 0.72 -4.33 93 798 2012-10-05 18:28:12 2010-09-28 10:39:15 1 12 747 0 166 2662 2689 53.20 33 6.29 CHANGED shYoGctIslscLhs..tta-IDHIlPhS.hhhD........DS.hsN+VLstpphNpp...Kusc.sPh ........................shYoG.c...t..l...s..hp.pl.t..........h-IDHllPhS...h..h..hD...................Do..htNhVLsppptNpp....KusphP....................... 0 59 115 147 +13238 PF13396 PLDc_N Phospholipase_D-nuclease N-terminal Coggill P pcc Jackhmmer:B0SFY4 Domain This family is often found at the very N-terminus of proteins from the phospholipase_D-nuclease family, PLDc, Pfam:PF00614. However, a large number of members are full-length within this family. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.33 0.72 -4.29 441 4348 2010-09-28 09:45:17 2010-09-28 10:45:17 1 24 2893 0 835 2871 257 46.90 27 12.42 CHANGED hhllslhs.llpll.tp......p....tss......ss+hsWlllllh...lP..llGsllY.llhGcp .........................h.hll.lhs.h.lpll.hp.............c.....+ss.........ssphuWllllhh....lP..llG..hllY.lhhGp......... 0 273 568 736 +13239 PF13397 DUF4109 Domain of unknown function (DUF4109) Coggill P pcc Jakhmmer:C7MCW6 Family This is a family of bacterial proteins with several highly conserved characteristic sequence motifs, such as: APR, WxC and ERR. The function is not known. 25.00 25.00 58.90 58.50 21.10 19.80 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.57 0.72 -3.65 79 618 2010-09-28 12:50:06 2010-09-28 13:50:06 1 1 430 0 167 347 96 101.60 49 89.31 CHANGED RuLRGoRlG......usShEo-+ss-hAPRpplpYhCss.GHphplsFAs...-AEl..PssW-C+.CGt.Aths..cus..sP-sp.tsKPsRTHWDMLhERRS.pELEplLpERLphLR ....RsLRG.plG......uhShEs-Rss-hAPRphlpYhCsN.GHchclsFAs...-A..El..PssW-C..CGh...Aths..cus....sPp..sc..sKP.....RTHWDMLhERRS.tELEtlLsERL-hLR. 0 60 130 159 +13240 PF13398 Peptidase_M50B Peptidase M50B-like Coggill P pcc Jackhmmer:C7M9M6 Family This is a family of bacterial and plant peptidases in the same family as MEROPS:M50B. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.44 0.71 -4.93 100 639 2012-10-03 04:41:15 2010-09-28 13:50:45 1 11 589 0 249 815 96 197.80 26 80.07 CHANGED ll..sh.llA.lhllh.s.....h....lht..........h........+hls....ohhHEhGHAlsAlLoGtclpuIpLpsD........pSGlsho..pG........tGhuhhlsshAGYhusulhGhshhh.hs..t.s..sps...sh...hL.h...lsl.s.l.Llsh.L.lhh.Rshhul...hhlls...huslhhslhhh...sss...ph....s.....hh...shh.....lulhlhl.uult.s....l...........h.-L........h......pt...t.st........pSDAstLAc.lTt............lP..uhhW..ssl.ahhl.ulssl.hhu ........................................hh..hhhsuhhlhhtt..........lhp....h........h........p.h.ls..............shhHEhGHslh....shLsGG.+sc.slhlhs..s.............................................pp..Ghslopu..............thh....u.h..........lo..shu...GYh..h..s.s.l......h.h....hh...h..lh...ss....th.....phs....sh.....hlh...hhl..h.l..hlhh...L.lhs...R.ph...sl.....lslhl......hsh..h..lh.hlhh.......tp........phhh.......hh.....shh.....hhlhlhLstlh.p...................l.........hcL......h...................pp.....ss........p.Du.......stLtc.lot.................lP..hhlashl.ahlhslhsl...h.............................................................................................................. 0 91 170 223 +13241 PF13399 LytR_C LytR cell envelope-related transcriptional attenuator Coggill P pcc Jackhmmer:C7MGV9 Domain This family appears at the C-terminus of members of the LytR_cpsA_psr, Pfam:PF03816, family 22.20 22.20 22.20 22.30 22.10 21.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.95 0.72 -3.61 203 1654 2010-09-28 13:27:52 2010-09-28 14:27:52 1 14 748 0 494 1394 193 91.50 23 24.14 CHANGED sspls..V.pVhNuo.sh......sGLAuplustLps.p.GFs.s...sss..uNs....s......ss.....t.....spop.lh.a....usss.pss.A.t..s.lust..l....s...ss.p.lh.t.s....s...tts..s...s...lplll..Gs-aps .......................tphpV.pVhNuo.sh........sGlAupsustLp.s...t.G...Fss...sss..uNt..s...........ss.....t.....spop.lha.....ssus....pss...A..p...plupt..l........s....ss..s...lt.t..s...........s.......tts....s............s....lplll..Gpsat.................................................. 0 156 375 464 +13242 PF13400 Tad Putative Flp pilus-assembly TadE/G-like Coggill P pcc Jackhmmer:C7ME36 Domain This is an N-terminal domain on a family of putative Flp pilus-assembly proteins. The exact function is not known. The Flp-pilus biogenesis genes include the Tad genes, and some members of this family are putatively assigned as being TadG [1,2]. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.38 0.72 -3.93 284 1158 2012-10-01 21:13:59 2010-09-28 16:26:36 1 8 766 0 467 1471 102 51.90 26 14.21 CHANGED G...sss...lhs..lhhlsllh....hhuhsl.D..huphhhs+sclQsu...uDsAALA..u...........A...ts ......Gshs..lhu.lhhlsllh..hsuhul.Dhuphhhs.+sclQsA...ADsAALA.u......A.................................... 0 144 280 372 +13243 PF13401 AAA_22 AAA domain Bateman A agb Jackhmmer:C0A2E2 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.90 0.71 -4.01 168 4268 2012-10-05 12:31:09 2010-09-29 11:23:30 1 160 2155 9 1502 14349 3197 134.70 17 27.70 CHANGED hp........ppshh......hlhGpsGsGKThshpphhpth.................................pspsl...hlpsssss.......ohppl.hpplhptlshthtt..........................sttphh.ptltptlppptt..................llllDEApp.L....s.....tphLchl..ptl........hs...........ppslsllLhGps.plpphl .....................................................tt...tthhhlsGpsGsGKTsh..h....pp...hhp.ph.................................................................phphh..........hlp..ssstt...........ssppl...hp.....t...lh....p..p.l.s...h.thss.........................................................................................sttphh.....ptl...t..p...h...lppppt...................................................hllllD...E.scp..L.........s.................tphL.ptL........ch.l...hs..........................pt.h.t.l..l..lh..G........................................................................................................................................................................ 0 443 929 1274 +13244 PF13402 M60-like Peptidase M60-like family Nakjang S, Hirt RP, Bateman A, Eberhardt R agb Nakjang S, Hirt RP, Domain This family of peptidases contains a zinc metallopeptidase motif (HEXXHX(8,28)E) and possesses mucinase activity [1]. 27.00 27.00 27.50 27.40 26.30 26.90 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.71 0.70 -5.52 67 818 2012-10-03 04:41:15 2010-09-30 13:21:49 1 49 547 1 178 859 44 289.90 27 25.67 CHANGED sshp.sTGlYsssscplslpl.....tsspslplhlusp..scp...........tp.shhtppas....Lssupsplps.shGGllYlh.....ssstst.....slplplsu.uh.sPhahhGppop.p-Wpp.lpphs.uPhsElpscphllTs.sscsl+ph.....sDsptlhchaDchhptts.-lsGhscp..t..pt............+hVsclphs.huhha.uGYslhhsss....shppllsh.stht..ssWG.hHElGHstQps.sapas...u.ssEVTsNlaolhspcthh..sp.tpthp...........tsch.....ppshpalppspsphth................tshhhtLshahQLphhaG .............................................................................................................................................s.shp.sTGlaA.stpplslph.....tssss......s..slpl.tlu...sc.hs.cchtp........th.tRs.sh..s..s.c.p.as..............Lp.s.s....p.s.phps..s.............aGGLlYlh.............sspsst...........ssphoh...su...ssp.....sPa...a........t.........pp......tc.......Wpp...hhps...................s...u.................P.......h.......sELpo-shlhTs.s...p...cs...lps.......................ss...s......p........ph.......h...cp...h.D..p....h...hps.h...s.c........h.hGhsp......t.s.sppp.hp........................+ass-lplu...huhha.o.GY.slhsssh.........s.s.p.sl......s.......h...ps.......l............s............sWh...hHElGHNtp....ps..shphs...............u..sTEVssNlhuLahpc.phh........sphpp.................htsch...phu..p.alp.p.s.sspt.h.....................sshspLhhahQLp.hht.................................................................... 0 67 89 126 +13245 PF13403 Hint_2 Hint domain Bateman A agb Jackhmmer:A3PK18 Domain This domain is found in inteins. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.27 194 984 2012-10-03 10:25:13 2010-10-01 10:01:36 1 80 208 0 187 918 160 135.40 32 24.73 CHANGED sCFssGThItTspG..hsVEsLpsGDhVhTtD.....sG...hpPlpWlGp.pp..l........h.sstsph..t.Pl+Ip..sGALGss.....hPp+DLhVSPpHRlLlss...sphhhuptcVLlsAcpL..........l..s...t.ss..l.p...tt......s...s.p....t.VsYaHlhh-pHcll.hApGs.sESahsGs ....................CFhsGThIpTs.pG.....hsVEsLpsGDhVh..Th.-............su......h..p...s..l..pWlG.p.p.p..h........th.ssst.sh..hP.lRIt..t.s.A.luss..........hPtcDllloPpHplhlp..............th.hlsA+tL..........Vs....u..tu..I.h....pp......s....hs..shsYaHl.h-..pHpll.hA-.Gh.sESahss.s.................................................................................... 0 22 148 169 +13246 PF13404 HTH_AsnC-type AsnC-type helix-turn-helix domain Bateman A agb Jackhmmer:A3PS88 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.22 0.72 -7.78 0.72 -4.42 347 5527 2012-10-04 14:01:12 2010-10-01 11:47:45 1 33 2295 47 1669 9314 1799 42.00 35 25.97 CHANGED l.DclDpcILphLppc...uRhohpcluc..plGLSsssstcRlc+Lc .........lDclD+pILphLpp-....uR.h.s.h.s.cLAc..plG.....lSsssltpRlc+Lc.................. 0 419 1008 1383 +13247 PF13405 EF-hand_6 EF_hand_4; EF-hand domain Bateman A agb Jackhmmer:A3PLV3 Domain \N 25.00 14.90 25.00 14.90 24.90 14.80 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.69 0.74 -7.96 0.74 -3.58 793 5737 2012-10-02 16:17:27 2010-10-01 13:55:03 1 526 696 127 3421 36588 2275 32.40 24 8.28 CHANGED plp..phFphh.D.ps...psG..hls.h.p-l.pp....hlp..............t.hu .................hp.psFpha.D.p-........tsG......tls.h.pEl.pp....hhp................................................ 0 1356 1922 2697 +13248 PF13406 SLT_2 Transglycosylase SLT domain Bateman A agb Jackhmmer:A3PHY1 Domain This family is related to the SLT domain Pfam:PF01464. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null --hand HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.63 0.70 -5.48 176 3080 2012-10-03 00:09:25 2010-10-01 15:54:35 1 19 1866 9 798 2548 2044 255.30 32 69.52 CHANGED sapshltsh.pppAtppGlspsslpps....hsss.php...........tpllcht.......ppQs....Ehs.p..s......htcYhsth.ls.spRlppGpphh.ppatshLsclEpcYGVssplllAlWGlETsaGp.hhG..s....hsllcuLATLAa.-..t...........................RRssaF....cpELhssLpllccsp.lss.................pp.hpGSaAGAhGpsQFhPooahpYAV...DhDGDG+tDlass..ssDAluSsANYL.ppp.GWpsG.pshuhplpls..............tshsh.ht...s...hpsh............spht.thGlp.hsst..........s.......thts..pL...tutpG.........ss..aLshpNF.hlIt+YNpSshYAhuVutLA- ................................................................................................s..t..h.tth.pt.stttGhsttthpth..h...sts..ph...................tllph.........................ppQs.........thp..........s.............hhpYhpph...ls...tpl.pp..Ghthh.ppatshLp.chtppYGVssphllulhGlEotaGp...hhG..p....hpl.l....suLuTLua.s.h.............................RRtpaF....ptcLhshLhhhp.pt.p.hs.s...................p..h.pG..S..aAG...AhG.sQF............hPooatpYAV...DhcGDG+h..Dlass..ssDAluSsANYL.....ppp....G..WppG.tshuh.sp.h........tshp......t......h....h...........................................................s....tst..u.lhh.tsttG............t...ahsh.NF..slhcaNp.....S.hYAhulh.Lu............................................................................................ 0 186 454 620 +13249 PF13407 Peripla_BP_4 Periplasmic binding protein domain Bateman A agb Jackhmmer:A3PPI0 Family This domain is found in a variety of bacterial periplasmic binding proteins. 28.20 28.20 28.20 28.20 28.10 28.10 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.69 0.70 -5.03 173 12924 2012-10-02 13:57:41 2010-10-01 18:32:26 1 44 2863 86 3019 24092 3112 255.70 20 75.86 CHANGED lu.....lsh..stsss.ahpt....htpuhpptspp.hshphhh............ts..upsss.spQhp.lpsh.lsps........sc..ull..lssscs..suls.sslcp.ApstGI.s.Vls........aDss...........ssstphh..........hluh-stph...Gphtu................................chlscthssp............uplsll....sGsss.ssstp...tt....hpGh.pps.....lpp..................hs.plplhs...............................s.s.hssspuppt.hpslLs...ss.........l........sul.hu..sssshsh...........u....shpulpp.tuh..tsp..h...........lsGhDssstth.phl.............psGphp..s...slhpssh...th..uttuhphsh.ph..hp......Gcp ...............................................................................................................................................sh.h..thsss..ahst......hppu...h....p.p....t...u....c....p...huh.phhh.....................supsc....sp..Q....h..p.t...l.csh..l..s.ps............sc.....u.l.l........l..s.....s...s....c.s.........s.u..h...s.....s.....s..l...c...c.....A...p.........p.....t....s.....I....s...V.l.s........................hDpt...............hsp.s.t.h.s.s......................hl...u...s...D...s...h...p..s....G..ph..tu.........................................................................................c.hl.s..c.phstp...............................................uplshl....................t.G..ps..s...s....s.s.sp............tR.............tpG.h...tps...............lpp....................................s...sh.c.lls......................................................p......ss.s...h.s..t...s..p.u....h...ph....h...p.sh.Lp.................sp.s..............pl.......................................................s.ul..hu..ts..Dshuh....................G.........Al.p.A...lc...s...tuh.......ssp.hh................ls.Gh.D...u...s...s...p...s...h...pt.l.........................p.s..G.p..hs............s......o.lh.p.sst....th.u.t.h.uhp.hsh.ph...hp..s............................................................................................................................................................ 0 815 1763 2389 +13250 PF13408 Zn_ribbon_recom Recombinase zinc beta ribbon domain Bateman A agb Jackhmmer:A3PQ69 Domain This short bacterial protein contains a zinc ribbon domain that is likely to be DNA-binding. This domain is found in site specific recombinase proteins. This family appears most closely related to Pfam:PF04606. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.33 0.72 -3.71 726 3758 2012-10-03 10:42:43 2010-10-01 18:35:27 1 37 1459 0 677 3269 314 62.10 23 12.28 CHANGED hL..s..G..l..lhCup.......CG....p...s.......h.....tt..........p.....p....pp........tpt.....................hh..Y....tC.s....st...h..pt.t...................p.....Css..p......plpt.ptl-...phl...hpt...lp ...............hsG..l..lhCup........CG......s.s.......h..tt..........p....p.....pp............ppp...................th..Y..tCs....pt.....t..pts........................p...Cstp......plpt.chl-...phlhptl........................................................ 0 325 545 625 +13251 PF13409 GST_N_2 Glutathione S-transferase, N-terminal domain Bateman A agb Jackhmmer:A3PFR8 Domain This family is closely related to Pfam:PF02798. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.50 0.72 -3.98 315 3673 2012-10-03 14:45:55 2010-10-04 11:15:34 1 29 1984 30 1074 16648 5196 85.80 29 29.54 CHANGED hs.Pas..p+shlh.Lpht.....ul...s.hp.....h..p.h................l............t.h..................p...th.......................................................h.p.hs......P..t..s..................................................p.....lPsLhh.............................t...sup.............sls..-S....tsIhc.aL.ppt ......................................................sPhu..p+shIh..hc.h...........GL.....-...h.....l..sh.................s.........hh..htps.....................ph..u.ss.F....................................................................................................s.c.hs......s..p..s...................................................p....VPsLhD...................................c...psp...............llss...ES.up.Ilphlsp.................................................................... 0 300 612 885 +13252 PF13410 GST_C_2 Glutathione S-transferase, C-terminal domain Bateman A agb Jackhmmer:A3PFR8 Domain This domain is closely related to Pfam:PF00043. 24.90 24.90 24.90 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.27 0.72 -4.08 279 7419 2012-10-03 01:14:49 2010-10-04 11:42:28 1 51 2398 100 2549 11894 3037 85.10 19 31.76 CHANGED pth..hpp................................................h.htp.....ltpsls..tl........cp.p.hs.......................................pt.............hh.Gs.......phohuD..hsl.h...s....hl...t.h.h......phh.....hh...t.h.......................thh.......p....t.h..spltsah.cp ...............................................................................................................tt................................hhtp.....hhptLs....hL.........-p.t..Lu.............................................................................................ppt............als.Gc.....phohAD......htl..h...s.........hl.....h.p...h........phs........hh...t.h................................hh........t.......s.h....spl.tahp.t........................................................ 0 615 1327 1986 +13253 PF13411 MerR_1 MerR HTH family regulatory protein Coggill P pcc Jackhmmer:C2D3E8 Family \N 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -8.89 0.72 -4.20 214 18606 2012-10-04 14:01:12 2010-10-04 13:16:52 1 99 4180 5 3931 16622 3391 68.70 27 35.72 CHANGED hsls-luchhsl........stpslRaaccchh...ltssc....pspsh.+hYsppclpplphIppLhp.pGholptlpchlptt ..............................hpIu-luch.s..u..l..................osco...LRaY.....-.c..pGL.....lp.Ptc..........spsGh...RhY......s.p...p-lppL.phIpphpp..hGh.sl.p.p.lpphlt..h...................................... 0 1233 2583 3343 +13254 PF13412 HTH_24 Winged helix-turn-helix DNA-binding Coggill P pcc Jackhmmer:B0SAM5 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.74 0.72 -7.77 0.72 -4.57 608 12234 2012-10-04 14:01:12 2010-10-04 13:54:13 1 121 3550 54 3360 26657 3880 46.50 30 21.83 CHANGED hDphDhcIL.phLpc..su+.lotp-LAcplu..lSsssstpRl++LccpGlIp ........................hsph-hpIL..ph.Lpc........suc..l..otp...-L....Acplu......LSsssstcRl+cLccpGlIp................ 0 820 1884 2697 +13255 PF13413 HTH_25 Helix-turn-helix domain Bateman A agb Jackhmmer:Q87B38 Domain This domain is a helix-turn-helix domain that probably binds to DNA. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.74 0.72 -4.33 195 2987 2012-10-04 14:01:12 2010-10-04 14:51:59 1 16 2861 3 621 3650 1038 61.70 32 21.10 CHANGED hL+psRpppulolcclupph+lphphlpAlEpschs.tLP....ss....sas+Gal+sYAchLG.L..Ds...p...tl ........L+puRcphGlSlpplspchplphphlpulEpscas..tlP....us....sasRGalRpYAchlsl.-spp............................................ 0 195 382 519 +13256 PF13414 TPR_11 TPR repeat Coggill P pcc Jackhmmer:B0SDR7 Repeat \N 26.80 26.80 26.80 26.80 26.70 26.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.52 0.72 -4.33 458 62040 2012-10-11 20:01:03 2010-10-04 16:34:37 1 8099 4563 105 30435 78055 23252 67.00 19 21.46 CHANGED p.p...u..t..t..h.pphGpt.hh.p....p.t..c..hppAlptappu.l...p...h......s.......s.s...s....sp..........ha.hs..h...uts.hh..ph....s......p..p..hp....p.A....hpt..h..p.p.u....l.p......l.s..s ...................................................h.hth.G.h..hh..p...................t...s.......c.......a...p.p.A..l....p...t....a...p.c......A..l..................p.............l................s...................................P..p.......p........sp...............................................sa..hs.........h.........uhs..h.h.......p.h........................s..............................p....hp..............p.A..........hpt..h..p.p.A..l.p.ht............................................... 0 11553 19108 25290 +13257 PF13415 Kelch_3 Galactose oxidase, central domain Coggill P pcc Jackhmmer:B0SIN7 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.56 0.72 -3.69 122 3739 2012-10-05 17:30:43 2010-10-04 16:49:49 1 745 564 0 2451 13422 417 52.60 26 10.03 CHANGED ss..tlhlhGG..hs.........t..tpth....ssh.h..hh....sh....sss........pap......ph........ss...h....ss.....sRtsassshh ..........................................splhlFGG......hs.......................t....tphh....................sDl..a....hh.......cl...........sst.......................................pWp.............pl..........................tus........h...........Ps...........sRssHsssh.h.............................. 0 922 1445 1978 +13258 PF13416 SBP_bac_8 Bacterial extracellular solute-binding protein Bateman A agb Jackhmmer:A3PHX7 Family This family includes bacterial extracellular solute-binding proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 281 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -12.25 0.70 -4.67 141 17239 2012-10-03 15:33:52 2010-10-04 17:51:20 1 26 3652 65 4107 24014 7960 298.20 16 73.07 CHANGED phscpa.p....c.....p....s..s.......l.p..lp.htp..h..ss..s.-..h..spl...pst....ht..s...s..s..h..ss...h...sl...hhhs.sst..h.tphsp....ps...hlh........-l..s.............ph...t.phsp.........................................h..ss......hhpsh.......sh..cu.........hu..hP.hshs.s.sh..s................hhYspshh.........p.c...............s...........ss...poh.s.......sl......h.......c.....hp.......t..h................t......Gp.h..........................sh............h........p....s...s.....h.s.......hh...........hh.t.hhs........................sGt.t.h..sps.........stsh.......................sps.hphh.pp...ht....s..p..h.....h........hh................s.s..s...s...s..shs..ths......sG..c..sshhh......s....us....hs...hs...s.hp..................pt..s........t......s.hs.....h.........s......h.........s................p...........p.......u..........s.hh.....ss.p...s......hs..l....st....s...ss.pp...ph...tuhcF.l..pah.s.s.s-.spt....p....hhp....ttuh..h.P.sppss..t..t......sst....hp.......ps......st..h.s ..........................................................................................................................hhppF.p....c......p.s...s..........l.c..V..p...hph.......h....st......s...p...............h..h.....t..pl.........pst...............h.t....s..........G...s.......s....s.s......h.....s..l............hh.s.s.....s.s.h.......h....s..p...hhp..................ps.....hlt.......sl.s.............ph...s..phpp......................................................l..ss..........hhp.sh................sh...sG.p..............................hau........l..P.....h.....h..h.s....s..........h....s..............................................l.h..Y..N..p..c..h..h...................c.p....................................t..........................ss....p..o...W...s........-l..................h......p...........t.............ch.........................p..........sp.h......................................sh..............h.........s.....s.s.....p..t...........hh............th..h..hht............................................tGh.s.h..sps................sstph..................................tps..h.p..h..l...pp.......ht....s...p...h........t..........h.h...............................s..s..s.....s......p....h.....ts.......hh..t.............s...G.....c........s.s...h.sh............s..........hs.......hs....hs.....t.hp........................................pp...s.....h...........s..hs.......h...........s........h.........P................................................p.........p........u.............s.h.h.......ts..s....s........hs.....l.........s..p.......s.....up....sp........c.......Ahc.F.l........sal..h..s.s.....-...stt.........p......hsp........thuh...h..s..s.p.p..s.s.h....t..................................hh............................................................................................................................................................................................. 0 1273 2551 3313 +13259 PF13417 GST_N_3 Glutathione S-transferase, N-terminal domain Coggill P pcc Jackhmmer:B0SCP7 Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.38 0.72 -3.75 216 16191 2012-10-03 14:45:55 2010-10-05 09:52:41 1 91 2388 143 5495 19464 7212 77.70 24 34.58 CHANGED La..s..h.t.SPas..p+lph.sLphps....l....s.....hc..h.....h......h..s.......sh.....p...p.......s..t..h........hth..s....shs...plPsL......h.p.s.....up.......slt....-S.t.sIh.ca.L...-pths.........ss..s ...................................................h...h..ss......hs......t..+....l..pl....s...L..pt...p...u............l........s...................ac...h........h...................tl....s...........................ht..........p....p............ss......p..h...........................hph..N...........P.ts..........p.VP.sL..........h..c..s.......st.............................slh.......ES..t...sI.hcY.L.scpas....t..................................................... 0 1291 2807 4225 +13260 PF13418 Kelch_4 Galactose oxidase, central domain Coggill P pcc Jackhmmer:B0SIN9 Repeat \N 21.60 19.00 21.60 19.00 21.50 18.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.49 0.72 -4.19 84 4909 2012-10-05 17:30:43 2010-10-06 13:08:44 1 961 757 2 3302 16453 618 51.00 22 10.48 CHANGED sRhtts.ssh.h...ss..s..p.lhlhG.Gpsss.s........thhs-h.a..h..a....sh..ps..s..p..W........ppl.......s..sh..Ps ..................................RhsHo.ssh.l..........p......s..........p..lh.la....G..G....h...s..s..s..t.....................................ph..h...s...D...l...a.......h........a........ch.....ps........p........p.........W.........p.h....................t............................. 0 1424 2079 2760 +13261 PF13419 HAD_2 Haloacid dehalogenase-like hydrolase Coggill P pcc Jackhmmer:B0SBQ3 Family \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.05 0.71 -4.05 169 33740 2012-10-03 04:19:28 2010-10-06 14:29:26 1 189 4905 123 8845 29816 9459 180.80 18 75.44 CHANGED llFDlssTL..h...s..hph........hhhpth.......p....p..........hh..hcph.......sh..s...........hp.tpp.....hcp.......h.......h.......thsh..t.....ph...................ltp.lh.....pph....s.hp..................st..ht....p.thpph.............th.....th..t...h.hss.ssp.hlppL.p...p...thpl.hlh.oss..s..t..tt.hp.t...hhpp..h....sh.....thac...tlh.su...p..p..ht.h..h...KPpsts.apthhpph.....sh.ss...pp....h....lhlsDsh.psl..ts.Ap.shGhps.l.hl ......................................................................................................................................................................llFDhDGT.L....h.......D.....st...................hh.h.ps.h..........p.p........................................h....h.pp.h...................sh.....p...................hs....pp............htp..................h..............h......................s.hs.h..t................ph.....................................................................hp..t...h...h......t.t.h.....s..hp......................................................htt..hp.....p....hh.tph....................................h.....pph......p......h..h..s...s.....s...h....c...hL.p...p...L...c..............tp.........s..h..p..l....s..l..s....oss...............s................p............ph...hp...t..............hl.p.p......h..............................slt........ph.F.......c............tl.h...su................p.....p........ht...t....t........................K.P.p.......P..c..h.......a....h......t......s....h.p.ph.....................sl...ss........pc.....................s..................lhl.s.Dsh.ssl..tu..Ap.su.Ghps.hh....................................................................................... 0 2568 5341 7324 +13262 PF13420 Acetyltransf_4 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:B0SAL8 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.84 0.71 -4.27 21 5238 2012-10-02 22:59:21 2010-10-06 17:18:10 1 36 2675 35 970 25638 3580 151.00 22 79.81 CHANGED IRhlppcDht.tlhplh..pp...hhpt..th.s.....hth...sshphhc.chlpph.....p.....s..phhahlhc..sspllGhsplp..th...ch...pppps.lp.hhhh..tpspcpslspc...lhstlh.pah.cphslcplhssl.sss.......Nh....s........uhhFapphGFchhGh.+ssthhts.cahDhhW .............................................................................................................lR.hp..tDh....t...t........l.....h.....t.....lh.........sp....h....h.........pp.......th...s................h..p.........................s...h...p.......p....h.....p.......p.....h......h.....c.....p...h.............p...................tp.........p.h....s...h.....h....V........t..........c...................s..........s..........p.........l....l.....G.....a..u...s.hs......sh..................cth...................shca.....s.....s.....E....h.........u......l......a.....lp........ssh...p..G..+..G..l.Gpp.........L.l.p.t.ll..ph...A..........c..........p......t.....s.......h....c...p.l.h..s..s.l...hsp................Np........s...............................ul.p.L.a....c.p....h....G.....F...p.h.h.G.t.h.p..p.s.t....h..h......s...p.ahD...h................................................................ 0 272 584 802 +13263 PF13421 Band_7_1 SPFH domain-Band 7 family Coggill P pcc Jackhmmer:C1ZGV5 Domain \N 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.36 0.70 -11.13 0.70 -4.82 24 734 2012-10-01 22:02:33 2010-10-06 18:16:45 1 19 672 0 189 878 126 216.40 26 57.04 CHANGED VWRF.RasNEIKhGApLsVREGQsAVFVpEGQlAD.VFsPGpYpLpTpNlPlLoTLpuWcaGFpSPFKuEVYFVNTppFssLK..........WGTpNPlhlRDPEFG.PVRlRAFGoYuhRV.sDPupFlpEIVGTDGpFTs-EIstpLRslIVpc....FuchlupotIPlLDhAANhs-Luchltptlssch.scYGLslschhlENISLPp...EVEcALDKRoSMGl ...................................................................................................t.p..IppGotlhVtpuQhslhV...pp....Gpl.h.D...s........h.p.sGh..Ys........lp..o.s..........s............h.............P..............h.........l..............p..o..............l............p.p............a........p.......aG.h.p..o....Ph........c.pcVaFlNhpchhshK..........a.GTs..s.P..l.....ht.....Ds....pa......s...................ltlRA..aGsYoh+l...sD.........P.h...h.F....h..s..p......l...l........G....s...p......s...p...a.s...h....s-........l..........p....p.....pl.pu.........llst................lp...s...s....l....s....p.....p...s...h.....s....h..h...pl...s..u...p.t.t-l.uphl...pp...t.l.s..tp.a...pph..Glplsshtltul.oh.s-......-spchlpph................................................................................................................ 1 75 138 165 +13264 PF13422 DUF4110 Domain of unknown function (DUF4110) Coggill P pcc Pfam-B_8504 (release 24.0) Family This is a family that is found predominantly at the C-terminus of Kelch-containing proteins. However, the exact function of this region is not known. 27.00 27.00 30.90 29.60 26.90 25.80 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.27 0.72 -4.15 56 186 2010-10-07 10:44:02 2010-10-07 11:44:02 1 38 161 0 138 184 0 96.20 32 14.76 CHANGED tpsssp-shPpP+.PFEoLR-FasRTuspWtphhhsphc....tt............ssK.................EL+KcuFcLuE-+aW-pR-tlptl...E-ppE-.uGlt-llt.tpct-ss.sss+p..........R .....................................t...shpDshPpPc...PhEoLR-FasRTstpWtphhhsphc.t...........hssK..................EL++cuF-LAE-+aa-tRctlptL...E-ppE-.uuhtEssshspptp........pt........................ 0 37 82 120 +13265 PF13423 UCH_1 Ubiquitin carboxyl-terminal hydrolase Coggill P pcc manual Family \N 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 295 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.12 0.70 -4.98 21 393 2012-10-10 12:56:15 2010-10-07 16:51:41 1 11 243 0 279 8427 287 334.80 20 34.06 CHANGED sGLEs.phs.ssYhsuLLQhhah.p.shhphlltps.................p.hcpsLhsELuhlasMLc.s...ss.uhssQsoNhLp.shuth......p............Auslst...............................ppslpp...sppFllcplphslhsh...............hhs.........................................................................................................hpsplsp.hhshph.ppt+s...hpsps.....pp...+.s...shhshslshss.......psp.......................................thshsshLcp..hpppp.pp...hhsppspph..ps.........................................................................................................................................................sp.shphpphP.lLsIshthts.p....h...s..hphsp........................................................hltlPhp..hslsph...............................htp-..tppt........shssphhYpLpulVCc..lss.shp.psH.hVShlRVs.......tp..................pc..................WalFND....ahl .............................................................................................................................................................................................................................................................................................................................................................GhEs..phs..ssY..hsu......l.....LQ..............h.h........aa....h..............h.......h...p....h...h....l...p..ch...........................................................s.....h....c.........s...L....h.....s.......E......L......u...........a..........l....a....p.....M...L...c...t......................up.....s.........s....s...p....s.....o.......N.....h.....l......p....s....h.........p....p.h.............p.........................sss.lth.................................................................................tphhpp....hp.p.F.l...lp.p.l.p............p....h.h..ph..................t.....................................................................................................................................................................................................................................................................................................................................................................t...............l......t.......p....h.......h...s..h..t..h......t...........p..ps..............p.stp..................pp......hp..s.......t.h...h.....h..s.....ls..hss..............psp..................................................................................................................................................................................................................................................................th..s..h..t....p..h...L....c..ps......h.p........p...p...psp...............shCp...p..C....p...ch.....p.s....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................hp..p.p..p...l...p.....p.........h....P....s...l....L....s..l.s..h.ph........s.p...........phh..........hp...t............................................................................................................................................................................................................................................................................................th...ls......hp....hp..h...h.h......................................................................................................................................................................................................................................................................................h.tp........t.t.......................t..s..h..h...Y..pL..u.hV....sc....lts....p..hp.....t.sp.h.........V.u....h...l.+..ss...........p.p.................................................................pp........................................................................W.hhFND.....hhl.................................................................................................................................................................................................................................................................................................................................................................................... 0 88 148 232 +13266 PF13424 TPR_12 Tetratricopeptide repeat Coggill P pcc Jackhmmer:B0SDL4 Repeat \N 32.60 32.60 32.60 32.60 32.50 32.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.70 0.72 -4.04 266 22703 2012-10-11 20:01:03 2010-10-08 09:43:34 1 4096 2284 44 13792 31171 6927 76.00 21 24.33 CHANGED p....p...h...s....p.s.h..t...pl.u.thht.phschppAhphhppulph..hp.t..h..u.p...p..p...ph.s.ts..h.tsluphhtt.hs..c..h..pp.Ahphhpculpht...pp .................................................h..ts.h..s........sl.u.t.sht..phG.c......a......c...cAhp..ha.p.cuLpl...tp........h.......G..p............p........p..s..p.......h....u.ps.........h..ssLuts.a.....tp..hG.......c...........h......cc..Ah.p..hapcuLph...t............................... 0 7608 10139 12329 +13267 PF13425 O-antigen_lig O-antigen ligase like membrane protein Coggill P pcc Jackhmmer:C1ZFT0 Family \N 33.00 33.00 33.00 33.00 32.90 32.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.27 0.71 -4.20 326 5848 2012-10-01 22:04:45 2010-10-08 16:03:28 1 44 2998 0 1467 5882 1859 156.00 16 35.08 CHANGED +.h.......h.......h..h.......hs.hhhh..hhslllotoRs.u....hlshhlsh.......hh..hhh.................ht..h.ph+.............hhlh.........hsh..ls..lhslh.......hhh......................hh.tt..l..............................................p...ps..s.o.hss..Rh.p..........h.h......p..thh...p.hh........tp....t........s...h....hG.....hG.......h.s..sht.........................th...............thhh.s.....csh.............a...hp...hhhp.....hGllGhlh...hhshh .................................................................h...hhhh..hhhh..hh.s.l.h.h..o.t.S..R.s..u......hluhh.lsh.......hl....hhh.................................................ht.tph+......................hhlh.........hsh..ls..lsslh.......hhh........................thht.h....................................................................................................................tt..ps.s.ohss....Rh.t..........h.a.............p.tuh..p.hh.............tp....p........P....h.......hG.......hG.........hsshh...............................................................hhhtsHNh..................a.lp...hhhp.....hGllGhllhlhh.h.................................... 0 494 986 1232 +13268 PF13426 PAS_9 PAS domain Bateman A agb Jackhmmer:A3PI49 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -10.17 0.72 -3.65 245 26423 2012-10-04 01:10:46 2010-10-08 17:13:29 1 3737 3198 105 10645 42862 3384 103.90 18 17.69 CHANGED ssu...lh..lhst....p................splhhsNpsh...tphh...Gh..s...........t.p.....cl..h..u..p.s................h.p...t..lh............................s....tt.....t...........p...t.p..h.h......pp...........ltp..th.p..............ptp....t....h.....p.h..c.hhh...h...p...p.s.......................G....p......h..hhtlphtsl...........hs.p..p..sphh........th.ls..hhp....D...loc ....................................................ulh.hh-.......c..........Gpl.h.h..s.N...p..s..h.........p.c.hh....Ga..s..............t..c......E.l....l....G...p...s................................h..p..........l.h.............................................................................................................s.....sp......t....................t....t...t...h..h...........pp..................h.h..p....tl..p..........................ps.p.........t......h.........p.t.....-....h..th.....h............p......+..s..................................................G.......ph...........h...hh..p..h...s..h.s..s.l..........................................hs..p..p.......G.p..h.h...............th..ls...hh.p..Dlo.................................................................................................................................... 0 3685 7467 9280 +13269 PF13427 DUF4111 Domain of unknown function (DUF4111) Coggill P pcc Jackhmmer:Q2YCQ2 Domain Although the exact function of this domain is not known it frequently appears downstream of the family, Nucleotidyltransferase, Pfam:PF01909. It is also found in species associated with methicillin-resistant bacteria. 22.20 22.20 22.30 22.40 21.60 21.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.28 0.72 -4.20 35 1222 2010-10-11 10:57:40 2010-10-11 11:57:40 1 5 685 0 86 694 11 106.30 45 40.45 CHANGED Ap-lFsPVPpsDhhcA.lp-olspW......sDhpGDEpN.......l...lLsLuRlWaolpTGpIuuKDtAApWslpRLP.scapslLptAppuYLGptp-shst.psppltsFlpah+ppI ...................................A-clFsPVPcpDLhcA.Lp-oLshWp.......u.sDas...GDE.....RN..................V...VLTLuRIWYoss..T..G......+IusKDVAA-WAhc..R..LP.spa.psllhcA+p.uY.LGp.c.-Dchts...pssplpcFl+asKscI............................................ 0 28 51 68 +13270 PF13428 TPR_14 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2Y691 Repeat \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -8.26 0.74 -8.82 0.74 -2.80 819 700 2012-10-11 20:01:03 2010-10-11 12:02:20 1 232 562 0 291 7605 2621 43.80 22 8.89 CHANGED h.p.h..h..h..t..h.u..p.h..h..h..p..t..u..c...h..p.p...Ahp.h.hpp..h...h..p....t..t..s...s..s...s..p...hh...hth.up..h ....................h..hhh.tl.Aps..h...h.p...t..G...p.hcc...Ahphhcps...Lp.....t.s..P...s.s.......p..uh...htLu..................... 0 121 209 262 +13271 PF13429 TPR_15 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2Y889 Repeat \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.81 0.70 -5.34 27 995 2012-10-11 20:01:04 2010-10-11 14:33:27 1 217 880 3 284 2894 1122 173.90 17 26.66 CHANGED +l...t.chts...p..ps.h.th..AshhasptchcpALplL.ssscpts.ss-s.....taWRshApLAhsLpc-DtuptAYcpLLssstA.pssDhsphlth...h-usPhcAtclu.huac+s+ssptLppAl.hhpptcsapcltsLLts..hp..t..tst....Ap...pSshhhssRAchhcppGpspsAh+.h+cAlshssusspl+usLLWhLlDtGppsp..h+.hLstacsph.....pDusLWtsaAuAhLhLscsspALpahpp.ptpptppD.LhhhuYADAh-huGpt-tAhplRRpsa+pLpc ............................................................................................................................................tttt.................................................................................................................................................................................................................................................................................................................................................h............h.h........h....h.....p..h....p.......p....h..t....p.......p......h...hpp......h........p.....t.t..............................ht.............s..s.......h...a....h....t.....h...A...p...l.h.t.p.h.s..p..h..st.A...h..pshcp....A..Lc..l...p...P.....s.....s....s....p....h........t.......s.....h....s.....h....h....L...h.....p....p...t..p..h...tp.........h..t..h..l...p...t...hh...pt..............................ss..s..t....h....h..........t....h......u....h......s...........t..h...t..p...........t..s......th...hp.......................................................................................................hhht.......................................................................................................... 0 84 161 223 +13272 PF13430 DUF4112 Domain of unknown function (DUF4112) Coggill P pcc Jackhmmer:C1Z9H4 Family This family has several highly conserved GD sequence-motifs of unknown function. The family is found in bacteria, archaea and fungi. 21.90 21.90 21.90 24.50 21.30 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.20 0.72 -4.12 134 487 2010-10-11 14:44:25 2010-10-11 15:44:25 1 7 424 0 240 486 28 104.40 32 53.33 CHANGED ptl.p+lc+hAhhhD.ps.h.....plsGh..thRhGhD..u...llGL.l..Ps.l...GDhlshhhuhhll.h.pA.pcl....GlPppllh+MhhNlslDhll........G.hlPl..lGDlhDhhaKuNpRNhtLLccaLp ........................t..hp+lcphAhhhDpsh.....plssh...th+h..Gh-..u...l...lGL.l..Ps.l...GDhlshhluhhl.l.h.pA.pph....GlPtplhh+MlhNlslD..hll........G.hlPl..lGDlh.......DhhaKuNpRNhtllcpal....... 1 66 144 206 +13273 PF13431 TPR_17 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2YD44 Repeat \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.62 0.74 -7.98 0.74 -3.31 374 768 2012-10-11 20:01:04 2010-10-11 16:05:33 1 146 199 0 464 10633 7068 36.70 39 7.58 CHANGED hhpculph..s..Ps.s...spshh..plu..h.h.h.t.........pp.u...p..ht...pA.tp .................h.KSlEs..D........Ps.s.......upSaa...pLu.p.s.h.S.......................st.G......p.s..st.............................. 0 367 402 433 +13274 PF13432 TPR_16 Tetratricopeptide repeat Coggill P pcc Jackhmmer:Q2Y5H2 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.35 0.72 -3.19 121 9987 2012-10-11 20:01:04 2010-10-11 16:24:43 1 2573 2894 10 3726 30807 9308 65.00 20 12.94 CHANGED hthApthhptuc.hppAtphh.pphhptt...st....sspshhhhupshh.pt...sphspAh.thhpthh......ptsstss ..................................hu.thhptsc.hspAhphh.ppslptp.................Pp...............sspAhhhL..G.p..shh..pp.................G.chspAh.phhppsl.........phtPt...................................... 0 1425 2531 3216 +13275 PF13433 Peripla_BP_5 Periplasmic binding protein domain Coggill P pcc Jackhmmer:Q2Y8R1 Domain This domain is found in a variety of bacterial periplasmic binding proteins. 21.10 21.10 21.10 21.10 21.00 21.00 hmmbuild -o /dev/null HMM SEED 364 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.90 0.70 -5.71 64 1016 2012-10-02 13:57:41 2010-10-13 14:55:12 1 18 666 4 355 8653 3387 355.80 48 85.60 CHANGED IKVGlLaShoGohAloEpolh-sshhAIcEINssG........Glh....G..+..plEsVlhDPuSDhshaucpAccLls.p-+VsslFGCaTSsSRKuVLPVhEctsuLLF..YPspYEG...EsScNlhYT.GAuPNQQslPhlcaLhsphG.........cRhaLlGSDYVYPRpsN+Il+s.h.Lppp..Gu..c..s..lu..EpYhPl..Gp...o..-..apsllscI+....ph....t....Ps....slhSTlsGDuslsFY+thtst.....G.....ls.sp.c.hPlhuhSluEpElpul...ssp....shsGHhuAhsYFpSlcoPpNcpFlppa.+ph..a........G.s.sp..lsssshEA.uYhtlphWspAVcpA....G.os..-s...ctV+pAl.hGp....phsA...PpGt.l.p.lcs..NpHhthsstIGclpsDGQF-..llacoptsltPcPasshhst ......................................................................................................................................................................................IKVG.lLHS..L.S..G.T..M.A..I.S.E..o..s.l....p.D.sthhs...I......-....-...I..N...s...p..G.................G...V.L..........G.....+......p...l..E..s...V..l..h..D......P..A..S......s.W..P.h.F.........A..E...K.A....+pL..l..s......p.D.+.V......us.VF......G.......C.W.T.......S....s.S......R........K....u..V..L..P..V..a...E...c....h......N......u........L...L....a...............Y....P......V......Q.........Y...........E.........G..............E............S........p..........N.......V......F.......Y....T......G...........A........u........P....N.....Q.....Q....u....l......P......A........l...-.......Y....L......h...p...c..p..G..................s+..+...a.a...L...l...G...o...D....Y....V...a...P....R.T.....o.....N...+.I.l...+.s...a...L...c...sc........Gs...............c..s.....l........t.....E....s.....Y.....s.P....h.......Gp........o..D..a....p....o....Il.u....c....IK..ph.........t..............s.......sV...l..S....T..l.N..G..D....S.N.V....s....F....Y....K.p....L....t....st........G.....l.p.....u....s.....c...h....P.....V...l....u....h......S.....V......u.....E....E....E...l...p..G..I.......s....s.c.............sL..s.G...a......l.......u......A.....W.......N....Y.....F.......p.......S.......l......-..............s........P.......s......N.......c.....c.......F...l...p...p....a.....+sh....h......................G...s.....cp......VT......s....D.....P.....h....E..A...s...Y...l..u...h....a...h....W......t......p.A...VE.K.A.....................G....os........Dl..........D....c....V.pt..A.h.....h..G............p..h...s.A...........P....p....Gh...l.....p.....h...c......t...........N.....H.H...l.....p..K...s..s.h...I...G....c.l..p.s..D.G......Q...Fp...lV....a.p..o..s.pslcs.pPassh................................................................................................................................................................................................................................................................................ 0 75 185 265 +13276 PF13434 K_oxygenase L_oxygenase; L-lysine 6-monooxygenase (NADPH-requiring) Coggill P pcc Jackhmmer:Q2Y7Z9 Family This is family of Rossmann fold oxidoreductases that catalyses the NADPH-dependent hydroxylation of lysine at the N6 position, EC:1.14.13.59. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 341 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.53 69 1152 2012-10-10 17:06:42 2010-10-13 16:42:34 1 13 829 4 434 10129 2446 319.20 29 71.40 CHANGED claDllGIGlGPhNLuLAsLhcphs.........slsshFl-ppspFsWHsGMhLssuphQssFLpDLVThssPTS.aSFLNYL+p+.s..RLapFh.tcsahssRpEascYhpWsAspls.s...lpFuppVpslphsp...p...t........h..hpVps.....ts...ppp...phhu+plllGsG.ssPhlPss.hpsh......splhHoo.cY.Lpphsp.........tss+plsVlGuGQSAAEIah-LLpc..hs..s...hplsWloRsssahPhDposhspE.h.FoP-Yl-aFasLsp.ppRppllpppcthsasGlstsLlppIYchLYp.p.clp..u.....p....p.hpLhsppplpshpps...s...tt...shpLshcptt.ps.pp...shcsDsllLATGYch ........................................................................................................................hDhlGlGhGPh......NLul..Ashhppht......................th.pshFh-..p..p.......s......p......F..s...W...H.....s..G.M....hls...s.s..p..h.Qs.s.F.....l.pD........L.............V......o..ht.....s.Ps......s...a...SF.....l....N....YL..tp.p...s.....R.l...h...p....F.h......h........c.....p.....h......h........ssR.....p.....E.a....s......-..Y.h....p.....W.....s..A..p...p..h.....s....s..................lp.....a...u..p.p......V...p....s....lc...hst.....................p...................p........th..................ap....V.p......s.......................t.....................p.....p..p...............p..h....h..u....R........p......l..l..l.....G..s.......G.......s.....p.....P....h........l....P......p..s.....h...p...t..h.....................t...p....l....h.....H...u...o.....p...a.......ht.p...t..t...............................thpsc+lsVlG.u.G.QSAAE...l...hh...sL...hpp.......hs..t....................hpl..s..h.l.s..R..p....s.......s....a....t....s.....h......-....p.....o...........h...s...p...E.......h......F....s.........P...-......as..c...........h......F....a......s....h..sp.....p.t..R....p.p................ll.t.p....p.....+.......h.......h.s...u......l..s....c.h......l.....t...p.....I...Yc.....h.l...Yp..p....p....lh.t............................p.....p..hplh.s...s.p.p.l......pshp....tp..............s...........t.......th......p.L........t......h.p............p.....t.....................p......t.........t.........p................tp..h.csDsl.l..hATGYp..................................................................................................................................................... 0 99 242 354 +13277 PF13435 Cytochrome_C554 Cytochrome c554 and c-prime Coggill P pcc Jackhmmer:Q2YA34 Family This family is a tetra-haem cytochrome involved in the oxidation of ammonia. It is found in both phototrophic and denitrifying bacteria. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.34 0.71 -12.51 0.71 -4.09 93 858 2012-10-01 23:37:15 2010-10-13 17:17:09 1 116 351 4 437 1110 218 122.30 16 25.29 CHANGED sCh.sCHsp..tt.ppatp.....ot.Htp.hhpsh.t.....ttt..................h...t...........t.sp..spCs.uCHs.sh.thp.s....tsh.....sht......p.........h..........tsl.sChsCH...ssss.sa...spts.ss.sst............h.t..............................hsts.stsstpsCusCHt ......................................................................................t.....................................................................................ptt..ttCh.pCHs..ss.....htp....s......t.sh........st.s..............p.........tt......................................tsl.sCtsCH.......uss...u...pa........tph.s.t.t..t....................................................................................ht......ht.ptp.C.h.tCH.............................................. 0 157 293 384 +13278 PF13436 Gly-zipper_OmpA Glycine-zipper containing OmpA-like membrane domain Coggill P pcc Jackhmmer:Q2YCQ8 Family \N 28.90 28.90 28.90 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.80 0.71 -4.44 34 349 2012-10-03 03:18:43 2010-10-14 11:39:32 1 7 288 0 122 1054 136 99.20 26 65.73 CHANGED uCAshPsGPs......hshPusscs..hppFp..tDcttCRpaAtpps...ss..psp.....pss......p.sussu.uslGsu.lGA....usGA.shGs.....upG.AulGA..usGhLsGuAuGusuuphtshtsQ.tpY-suYhQCMhu+ .......................................................................................................................tsp.........psp.........................ppss..su.ussGAA.lGA....ssGu..shGu.......upG....AshGA..uhGuls.Gu.......hs.....G.......u....s.......ts.....p...p.....t.t..........t..................................................... 0 29 74 102 +13279 PF13437 HlyD_3 HlyD family secretion protein Coggill P pcc Jackhmmer:Q2Y7Y5 Family This is a family of largely bacterial haemolysin translocator HlyD proteins. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.48 0.72 -3.69 147 4655 2012-10-02 20:27:15 2010-10-14 13:36:12 1 49 2429 0 1181 25718 5084 112.10 18 29.22 CHANGED tlpAPhsGhl.p..t..h..s...h..p....G.ph...lp.s.Gps....lh..p....lhs.........s...p.....h.t.lcsh.l.sspphstl............Gpplpl.pht....s.hst....p...l...pGplppl..u..s......ss....s.......p....psp.....s..ht.....h.....ph..plsss........................thtlpsG ...........................................lpuPhsGh..l..t...p........h..p..............s...p..............G...ph........l......s.....s.......G....p..s.................l.h....p........................l.hs....................h.....s..p................l.h..lp..sh..l.....s.s.....p.........c....l....s.t.lc.....................G.p..p.........l....p....l....phs............s.htth....s......l................pG.p..lpp..I...u..s................ss........s..........................p.......ppp................h..h............s.....ph..phpt.................................................................................................................. 0 360 721 990 +13280 PF13438 DUF4113 Domain of unknown function (DUF4113) Coggill P pcc Jackhmmer:Q2Y7D7 Domain Although the function is not known this domain occurs almost invariably at the very C-terminus of the IMS family DNA-polymerase repair proteins, IMS, Pfam:PF00817. 24.00 24.00 24.30 25.40 23.90 23.90 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.38 0.72 -4.37 197 1492 2010-10-14 15:22:47 2010-10-14 16:22:47 1 11 959 0 244 1129 1436 51.00 46 13.70 CHANGED sppLMpslDplNp+aG+uslhhAu..p.G...h.p........p..........sWpM+RchhSPpYTTcWs-LPhl ....................sppLMpslDplNt+pG+GslaFAu..p.G...h..p..........p..........pWpMKRphLSPpYTTRas-L.hl................ 0 65 133 204 +13281 PF13439 Glyco_transf_4 Glycosyltransferase Family 4 Coggill P pcc Jackhmmer:Q2Y6X7 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.96 0.71 -4.55 52 6671 2012-10-03 16:42:30 2010-10-14 16:40:50 1 54 3442 22 2039 11319 3388 166.90 15 42.72 CHANGED lllsspphtp.huGsEhhsh-LApthtp.pGapVplh...usphsp.htpp........................hhhthshhhthtphlpph..chDll..+.p.........................hhhshHssh.....................h.......hp.h.hhhhthh....h.tsctllAlSptstcplhp.hs....lsp....pclpVl.Nul-tphFp ............................................................................................................................................h.................hu.G..s.t..p..h.hhpltp..t....L....t.p....p....G...a...p....l...h.lh.................s...t.......t..t................................................................th..........................................h.........t...........t.........h....h...........h......t.......h..........h............h........h.......h...........p.......h.......t.........p.....h.......h....c...ph..............ph.D.ll..........Hh.p..t...........h......s...h...h....h...s..h.hthh.................................hph..h.l..t..o....h.Hshh....................................................................................h...h.....t........h..h...t........h....h..h....thh.............hp...p.sc..t...l..l...s.....l.S.p...h...h..t..c.p..lhp...hs.............h...............pplp.l.l......s....Gl-.t......................................................................................................... 0 677 1353 1746 +13282 PF13440 Polysacc_synt_3 Polysaccharide biosynthesis protein Coggill P pcc Jackhmmer:Q2YCG5 Family \N 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.76 0.70 -4.92 52 1715 2012-10-02 21:24:20 2010-10-21 10:27:40 1 14 1385 0 471 5839 872 248.50 15 56.48 CHANGED tssshhhtlhlu+hlshpshGh.athshshhs.lhhhl.s...th.Gh..pssh.ht....t..s.tstp.p..htsh.hthsh..hs..shlluhshshh..hlsh...hh..t.s..thh.hh.hluhhhhshhhpthhpthhhsttchth.sshhshlhshhh.hhshlhh.hhhth...slhshhhs..hshuuhlshlhhh.h....htt...hc.ht.hp........shth.h...c.h.h.p.puhhhshtsh...hshhs.hphsh.hl.lsh.h.us..stlGhaps.st.hlhth.hs.lhhtsltphhhPphu.p.t ......................................................................................................................h...thhhhhhls.+.h.h...s...stt.h...Gh...hs..h...h....hs...hh...h..h.h....l...h.....th...th......p......h...ht...............tt...tstp...p..........ht.p....h....h....p....h..s.h........hh........sh.l..h....h....l...s....h....h.....h....h......h.............h....l.sh................hh..........s....s.....t.....p.....h..........h.........................h...........h..........h.....h..........h......s......h...h...h....h...h....h...s.....h.....h....s....h...h...p.....s.......h....h......p...t......t....p........+.......h...ph.....hu.......h.......p......h.....h..p.....s....hh...t.....h..h..st...l.hh..hh.hsh..................sh.h.u.......h..l...lu..........h....h.........h....u...s...l....h......u.....h.........h....h...h..h..h................hht...........tp..h.p...hp..............................ph.ph..h.........c...h...h....p...p....s....h....h....h.......h..h....tsh.................h.t.h.h....t.....tp.....h.........s......t.....h..........l....l.........s.....hh....h...us.............ss......s.G.....h...as.....h....s....h....ph....h.s.h...hs....hhsts.l.sp.hhhsph...h.............................................................................................................. 0 144 297 385 +13283 PF13441 Gly-zipper_YMGG YMGG-like Gly-zipper Coggill P pcc Jackhmmer:Q2YAQ0 Domain \N 27.60 27.60 27.60 27.60 27.50 27.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.47 0.72 -4.44 284 420 2012-10-03 03:18:43 2010-10-21 10:38:44 1 7 388 0 100 1126 106 43.70 46 28.65 CHANGED sss..stGAulG....A.usGAs....lGuhsG........tu.s..p....G...................A....h.lGAusG....ussGu.......shGs........t ................psstGuhlGAusGAlhGshsG............ss+....G........................A.AlGAGlGAlsGhh..................... 0 21 55 80 +13284 PF13442 Cytochrome_CBB3 Cytochrome C oxidase, cbb3-type, subunit III Coggill P pcc Jackhmmer:Q2Y8Z1 Domain \N 27.00 22.00 27.00 22.00 26.90 21.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.80 0.72 -3.82 488 9400 2012-10-03 10:02:11 2010-10-21 13:26:28 1 190 2200 132 3123 13490 4902 79.30 23 30.40 CHANGED sss.psGpplY.p.p....sCt.s.CH.u......sG......ss.....u............sslt.......sp...shs........s.............cplhp.......hsh..p.G.......t.....s.....u...MP....shs.t......p.ho-c-lptlssal ............................t..htpGppla..t.p........sCu..s.CH..u..............ss......ut.....G.............................................PsLt......................ss.......sht......................................................................pplhp.................hlh.......p...G........................t.............s..............s.......MP..............uas.t..........tL.s.-.pc..ltslssYl...................................................................... 0 889 1958 2631 +13285 PF13443 HTH_26 Cro/C1-type HTH DNA-binding domain Coggill P pcc Jackhmmer:Q2Y718 Domain This is a helix-turn-helix domain that probably binds to DNA. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.74 0.72 -3.77 196 4823 2012-10-04 14:01:12 2010-10-21 13:31:50 1 44 2292 4 901 17516 1639 60.70 23 55.90 CHANGED +.Lpph..htc+....ph....shpcL.....t.....c......ts......G.....lop..sslscLtp..s.p.....p.tlsh..ssLpplCphLsC.p.suD.ll....chh..s- ....................................lphh.htc..+........ph....ohpcL.....u........c.......ps.........G..........lop.......sslsp.....l.tp.........s..c................p...s.l.p......h...ssL.p..p.I......C.c.hL......s....s..p....s-.lh.....ph................................. 0 334 628 760 +13286 PF13444 Acetyltransf_5 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:Q2Y7M2 Domain This family contains proteins with N-acetyltransferase functions. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.61 0.72 -3.44 244 1571 2012-10-02 22:59:21 2010-10-21 13:46:37 1 11 1299 0 478 1970 489 102.60 30 27.02 CHANGED LRa.pVFtpEhsusspt......t.hD...hDpFD...shscHLllh....-.ppp...............lVGsYR.lhtsstst....th..ts.hYops.F.....c.h.stlts..hhs...phlElGRSsVpscYRsttshhhLWt ..........................LR.psFtpph....suphps.........t..hD..hDpaD....p.h.s.pHLllh.....Dpsp...................c...lVGsYR..lhhssphh....................th.....ss..hYops.F..c...h..st.hht.......hhs...phlElGRosVp..scYRss.t.sh.hLh........................................................................ 0 133 304 401 +13287 PF13445 zf-RING_UBOX zf-RING_LisH; RING-type zinc-finger Wood V, Coggill P pcc Pfam-B_49 (release 24.0) Domain This zinc-finger is a typical RING-type of plant ubiquitin ligases [1]. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.44 0.72 -4.24 80 584 2012-10-03 15:03:13 2010-10-22 16:12:06 1 39 268 1 422 2209 29 41.40 40 9.31 CHANGED CPls+-....ss.tp.......Ph.h...Ls.CGHlls+pslp+lsp.......tt.................p.....hKCP ...............CPls+E.....osp.pNs............Ph.h....Ls.CGHsls.+c.sLp+Lhp..............ss......................p......h+CP............................ 0 166 244 345 +13288 PF13446 RPT A repeated domain in UCH-protein Wood V, Coggill P pcc Pfam-B_2127 (release 24.0) Domain This is a repeated domain found in de-ubiquitinating proteins. It's exact function is not known although it is likely to be involved in the binding of the Ubps in the complex with Rsp5 and Rup1. 24.10 11.40 24.20 11.40 23.80 11.30 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.82 0.72 -4.56 66 317 2010-10-22 16:03:35 2010-10-22 17:03:35 1 11 124 0 248 350 8 60.80 23 12.16 CHANGED hhshppAhphLpl.ccsssD-hllosaphKls........-sP.s..phchh++ALphIAcpRpSth..LhpFLps .......................shtpAhph..L....t..l...s......p......s....s....s.D-..hllssaptphp..........ssP..s...phphhpcALphIAptRpSth..Lhphl.............. 0 59 132 216 +13289 PF13447 Multi-haem_cyto Seven times multi-haem cytochrome CxxCH Coggill P pcc Jackhmmer:Q2YA36 Domain This domain carries up to seven CxxCH repeated sequence motifs, characteristic of multi-haem cytochromes. 21.90 16.20 21.90 16.20 21.80 16.10 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.52 0.70 -5.33 11 1428 2012-10-01 23:37:15 2010-10-25 16:31:19 1 16 119 2 112 1635 47 181.40 54 67.74 CHANGED ppVGCIDCHu.....slst.pphpHppc.......LtMPstssCGsCHlppFAE+EuE+cs......W.........Pptp.........Ws.G+PSHulsacA.NV.EsuhaAuMspREVApGCsMCHsp.QN+CDuCHTRHpFSsAEARcPpACusCHsGlDHNEaEsYhhSKHGslapsptc.pWsapsPLK-Ah.pcGG.TAPTCssCHMEacG.-aoHN.lsRKlRWu.sP.sPtIA-sl..sp.WhEtRh-uWlsTCspCHSspFA+saL-thDpGphpGlshhpEAcplltsLYcDGLLsGQpTNRP ..............................ss....................................................................................................................................................................................................................................................................................tGCT...F.C..HT...........s.....p............p........+.........Cso.CH..p.RH...pF...s.s.t.......AR+...s...E.pCps.CH.h.G........KD........HRDWE..AYDISlHGs.......VYQlN..K........a.D...ashp....ctL.u.D.A.D...YVGPTCQYCH....M.....R.....GG.....H..............HN.VQR.huTV.................Y............TS.....M.....GM..S...........A...D..RG..................A.Pl...WpEKR..DpWsSVCDDCHSPRFAREpLQAMDEAsK.D.AG.LK.Y.pE.TFKVAEsLhhDGhh-PMPKD..Ls........................................... 0 45 90 107 +13290 PF13448 DUF4114 Domain of unknown function (DUF4114) Coggill P pcc Jackhmmer:C1ZAY0 Family This is a repeated domain that is found towards the C-terminal of many different types of bacterial proteins. There are highly conserved glutamate and aspartate residues suggesting that this domain might carry enzymic activity. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.23 0.72 -3.16 74 320 2010-10-25 15:42:29 2010-10-25 16:42:29 1 61 151 0 82 297 55 84.00 22 9.76 CHANGED phsuG...splua....hLhssuhssthht.............................h...ao.sshN.............ss....hp.....phtshttsst...h......lGaEDhh.....uD....p..DaNDllFtlphss ..................................................................................h.tG.tluhhl.hssuhttthht...................................................h.aohtshN............................ss............hp.............ph.tshtsss......hh..............luFEDhh.s..........uD.............t....DaNDllhtlph......... 0 26 59 81 +13291 PF13449 Phytase-like Esterase-like activity of phytase Coggill P pcc Jackhmmer:Q2Y944 Domain This is a repeated domain that carries several highly conserved Glu and Asp residues indicating the likelihood that the domain incorporates the enzymic activity of the PLC-like phospho-diesterase part of the proteins. 21.30 21.30 21.40 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -12.38 0.70 -5.17 51 1288 2012-10-05 17:30:43 2010-10-25 17:12:27 1 51 942 0 459 1415 255 314.60 19 66.08 CHANGED phlGp........hslssshthsu.........sthGGlSGlsass.psstaaslSDcts....sssRaYshplsh....tst..thsslphtshssLpcss..................Gpsa...........h.DsEulth..sssslaluSEGc.....tt.h...Phl..hchs.hs.Gph.hpch.sl.Psthh.t.t.................pGhcsNhuhEuLolss-...Gp......hLasAsEss......LhpDustsp....tt.......sRllpachts.G..ts....htpahY.h-sh...........stsGlo-llAlsspt..hLslERu...au.u...................hGtsh+laplsls.s..upsssshsslt....s....lsslpKpLlh-htp.......s.h...lDNlEGhshGshh.-GppoLlllSDNNF......ss.t.QpTphlshplp ...........................................................................................................................................h.h................s...........htu.h.Sulhh............t........t......s............h....h.sl..sDps...................................t....h.hhh.hph............................t.sth.ph.h..p.h....h...lp.css...................................................................Gt.sh.......................................DsEulsh................ss......u......s......halusEhs...................Phl....hchs.....ts...Gph....h...t.......h.....s....h...Pssh..................................................hthpp..stGaE...ulshs.s..-...Gp.........hLa.shh....pss....................l.h..p.cs.t....t...................hRllpa...sht...s.....t.............................htta..h..Y.h-t......................tshsl.u-hshlsspp...hlllE..Rs....t.s......................................uhhp+.l.ac....ls...ls..t.....t........................h...........t.s........h..tt....t................t..............h.h.hptp...l..h.s.h....................................s..h......h-phEGl....sl.............................................s......s........p.......p..........lhlhs.Dssa.......................t.s.hh.............................................................................................................................................. 0 109 269 375 +13292 PF13450 NAD_binding_8 NAD(P)-binding Rossmann-like domain Coggill P pcc Jackhmmer:D2BHP8 Domain \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.46 0.72 -3.95 465 10693 2012-10-10 17:06:42 2010-10-25 17:56:16 1 305 3705 149 4167 53665 24260 63.30 26 12.65 CHANGED IVGu....Glu..GhshA.th..h..s.....c.....p.....s..h....c..l..hlh-...p.c.sc.lG..Gps.h.sh...ph.......p..u........hhh.c.hG.s+.ha.ts.......t........tt...hh.c..hh...cp.l .................llGuG...hu..G.LssA...hh...L..s..........................c.............p..........G...h.......c.....V...hl...h.E............c....p....s.......p.....h.....G.......G.p.h....h....sh....th.........................t.................h.p..hs...............................................h....................................................................... 0 1314 2568 3528 +13293 PF13451 zf-trcl Probable zinc-binding domain Coggill P pcc Jackhmmer:D2BIP7 Domain This is a probable zinc-binding domain with two CxxC sequence motifs, found in various families of bacteria. 21.50 21.50 21.50 22.20 21.40 21.10 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -8.73 0.72 -4.33 91 388 2010-10-25 17:00:03 2010-10-25 18:00:03 1 9 215 0 117 275 17 49.10 57 50.87 CHANGED pDKsLsCKDCGpEFVFTsGEQEFY.tE...KG.F...p.NEPsRCssCRcARKpppss ...tDKsLsCKDCGpEFVFTsGEQ..EFYtE...KG.F...cNEPsRC.sCRcARKpppp.............. 0 65 108 113 +13294 PF13452 MaoC_dehydrat_N zf-MaoC; N-terminal half of MaoC dehydratase Coggill P pcc Jackhmmer:D2BH16 Domain It is clear from the structures of bacterial members of MaoC dehydratase, Pfam:PF01575, that the full-length functional dehydratase enzyme is made up of two structures that dimerise to form a whole. Divergence of the N- and C- monomers in higher eukaryotes has led to two distinct domains, this one and MaoC_dehydratas. However, in order to function as an enzyme both are required together. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.39 0.71 -4.37 39 2537 2012-10-02 20:54:35 2010-10-25 18:06:56 1 31 1010 26 1023 4405 1494 127.80 17 29.40 CHANGED shlGpph..ss.hp..hplppuplRtFAcAh..Gp......ss..Ph.YhDptsApts.ta.sl.APPTF.hh.sls.....h.....ss.......hh.....ptl...s..ls..h....tplLHG-.Qpapa...cp...s.lhsGDplshpspls-lh-.Kps...Gs.h.calshcopssspcGchVush ..............................................................................t....................h...hsttplh.ash...ul...ss..............tt...s....hhs.t.............................s.h.h.s....P...s.o....a..sh..l.hs....................h..........ts..............................hh..........tl.........s....hs....h..................hpl..l...H..s...s.....p.phph............t+...P.....l.h...s...G.-p...l.psp.sp.l.ss.lhs.+.t.....Gp...s..sh....ls.hc.sp.h.p..p.t.p.G.p.lh..h................................................................................... 0 248 602 865 +13295 PF13453 zf-TFIIB Transcription factor zinc-finger Coggill P pcc Jackhmmer:D2BJR6 Domain \N 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.48 0.72 -4.76 126 674 2012-10-03 10:42:43 2010-10-26 09:24:28 1 7 520 0 270 585 125 41.10 44 38.76 CHANGED pCP..pC.p.s.s.hp.....hhcp..............pu.lplDhCs.pCcGlWL.DpGEL-+llpp ..............CP..hC.ps.s.lh.........hs-+..............pu.lcIDh..Cs.pC+GlWL.D+GEL-+llp......... 0 84 174 240 +13296 PF13454 NAD_binding_9 FAD-NAD(P)-binding Coggill P pcc Jackhmmer:C2D2T2 Domain \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.08 0.71 -4.45 180 1586 2012-10-10 17:06:42 2010-10-26 10:52:14 1 24 1273 0 358 3901 1444 156.50 25 30.28 CHANGED AIlGuGspGlsshtpLhpph.......t..ts....lplplh.-s..t.sh..G.G...tsaps..s..p..spthlhNssusph.ohhscps.............ssahcWhp.....spt......................ths.s.ps..assRtlaGcYLpthhpphhp.ph...st.th...plph.h.p...s.cls...s.lpt...ps......ss....h...h.l...hh....s.su..p..s...h..tsDtV.lLAsGps .............................................................AIlGuGssGlhshh..pLlpp......................tss......lsls...la.-p.....s...th....G.........G.........hsat.s....p.....p....st.h....L.h...N..h..s..u.t.p..h..s.h.h.sp..........................................................ssa..h.cW.hppppt..........................................................t..sh..p..s..p..p.......a..h.P.R.......h..l.......a..G.cY.h....c....p...h.........h..pth......hp....ph......p.hh.......p.l.t.....h.h....p......s..plh......s...l..p.......ss...................ss.........h.....hl.......th................s.ss.....t......s..............h...shDhl.lLAsGp.h...................................................................................................................... 0 84 201 290 +13297 PF13455 MUG113 Meiotically up-regulated gene 113 Wood V, Coggill P pcc Pfam-B_48720 (release 24.0) Family This is a family of fungal proteins found to be up-regulated in meiosis. 22.30 22.30 22.40 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.99 0.72 -3.53 212 101 2012-10-01 19:55:08 2010-10-26 13:41:55 1 2 86 0 70 797 308 100.00 30 25.08 CHANGED hKIGhTs...ss.pcR...lpphppts......shphpllthh.ht................................sstclEphlHpphps..tRlpt..................EaFc....lsh.....pplpp................s.......lcch ..................................LKIGRus....NV.p+R.lspWpcQC........spplpll+ha.hhs.p.t.............................................................phsscs++lERLlHlELss...............hthpt.....t.Ct.tC....s+.......pH.....pEaFc.lps.........pphtt................l...l............................................................................ 0 20 39 58 +13298 PF13456 RVT_3 Reverse transcriptase-like Coggill P pcc Jackhmmer:D2BGL4 Domain This domain is found in plants and appears to be part of a retrotransposon. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -9.62 0.72 -4.20 89 3907 2012-10-03 01:22:09 2010-10-26 15:56:17 1 208 854 5 1534 3767 350 81.20 25 12.10 CHANGED s.h..........AEhhAl..ltGLphAhphGhp.+lhlpuDuphllptlpsp...hpsps.phstllpclcplhpp.Fpphplp+lsRcsNpsAcsLAp..hutpt ......................................s.p.AEhtul...lhuL.p....h..A.h......p...h........s...h.......p....p........l..h.l...hs.D.SplVl...p..pl...p.tp................hpsp....s...p...ht........h..h..pp....l..c..p..hhp....p....F..p....t....h......p..l....p....al.............R....p....p....N..ph....ActLAp.u...t..................................... 0 197 508 925 +13299 PF13457 SH3_8 SH3-like domain Coggill P pcc Jackhmmer:C2D335 Domain \N 21.00 5.00 21.00 5.00 20.90 4.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.66 0.72 -3.82 69 1627 2012-10-02 18:48:24 2010-10-26 17:18:41 1 78 162 3 111 1462 0 74.30 26 36.50 CHANGED ss.cslshpuplp..pspsculas.pshthsuspplss...ups....Ysspp...lplh.ccApTs+........u...o......ahph...........ph....supslGWlDpcAhs ...................................................pcslshhshVp....s..ssssu..las.t..Phtss.usp.p.......sst...........lss........Yps+s...lpl...+cApsst..................................s..s........WYpl..........ph.....sspsIGWlcscsh........................... 3 37 65 98 +13300 PF13458 Peripla_BP_6 Periplasmic binding protein Bateman A agb Jackhmmer:A3PL33 Family This family includes a diverse range of periplasmic binding proteins. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 343 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.94 0.70 -5.09 259 10778 2012-10-02 13:57:41 2010-10-27 11:15:08 1 73 2751 52 3781 13919 6511 334.70 19 81.74 CHANGED sl+lGllhsh.o...Gsh...u...ss..upshhsusphulcphN.t.s.G...............Glt....G..c.....p..lchl.ht.D.sts.ss.stssptucchlp..p...cp..Vts.lh.sshsossstul.ts..hh.pp...ps.h..........hl....s.......ss..........t.....hs........s....t....p.s......ss.t...ha..hh.s....h......ss.t.tp..st......shspa............h......h......p......p.h........G........sp......c.hh.hl.s..ss..hshu.psh.t.p...sh..pph.hp....tt.Gsp..ll.............sp....hhhs...h..ss....s-....h..ss....hltp.lps..u....tsD..................s....lhsss.susssss.hhcth..p.p....t.G.....ls........t..h.hh....u.hs..ht.psp..l..tsh.......us....ts...s.p.......G.....h.....hhs.ss....ah......s......h.....s..s....st..........sptahppa..pp........t....h.............s.....p..t............s.s.t.htt...s...uYhus....thh....h.pAl.............cp....A.........G........s...s..s.....sps..........l....h................ps.l.c.s..hs...h.....s..........s.......s.h...........u.t..hth.........h...s.s.....s.pp..s....h........p.s...hh...lsp.lp......ts..u...p ..............................................................................................................l+lGhhhs.h..o........G.sh.......A....th....G.p.t....h.pGh.p.h...A..l.cc.lNt..t..G.........................slt..........G...c.........p....l.pll...hh..D.....s................t......s.....c............s....p.p........u......s....s...s....s....p.c.....l...l..s...........................cp....lt...h..l.l.G....s....h..s.S.u..s...s.h.s.s..ss.....l.h...pc.......t.t...l.......................shls...........su..................................s......ss.............s......p.........ps....t..t...h.....ha....+h..s........s....................ss....s...tp....ut................s.h.up...a.......................................h.............h..........c........p..h........s.............sc..........+.l.u..ll..t....ss.....ss...a.....G...pu.h...t..p..................sh...pps..h.c........t...t...G.hp........ll............................t.p......t.th..s...........h....ss........pD..........a.....s.u...........h..l...s....p.....l..p..s.......t.................s..s.D...................................................................s.........l.h.h...s.s.......h...t...s...p...s..s........hl.+....p.h....p.p.....h....G............hp...............s...........h..hh........uss......hs...s..s...p....h...ts.h..............................us........ts......s..p.........................G..........h...............hh.s..ts........hh........s...............p...s........st..............................................sp..t..a....h..p..t...a....ct.....................p....h.............................................................s....p...........................s...s..h....h..sh............h....uYsus.......hh.l....h.pA.l......................................pp...s..........................u........s....h....s............stt..........................l.h....................................................p.s...l.p.s.....hp....h.....p............s........s.h.............G.......h.t..h............t....t.p......s..t......h......................h.h..h.h.p.h.......t............................................................................................................................................................................. 0 935 2299 3103 +13301 PF13459 Fer4_15 4Fe-4S single cluster domain Coggill P pcc Jackhmmer:B0SHL0 Domain \N 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.71 0.72 -3.41 122 1717 2012-10-03 08:56:43 2010-10-27 11:35:52 1 33 874 0 647 2723 504 59.90 29 46.40 CHANGED lhVD.c..spClGstt...Csths.....P.csFph-.s.c.u.hupsh...............h.................hssc.ppphp.cA.hcsCPsssIpl ........lhlD.p..spCh.Gsuh......Chths........P....-.l...Fp.hc.-.-..G..hstlh...........................................h.ss.s.tp.ppsp..cA..scsCPspAIp.............................................................. 1 174 451 575 +13302 PF13460 NAD_binding_10 NADH(P)-binding Coggill P pcc Jackhmmer:C2D4U5 Domain \N 23.50 23.50 23.50 23.50 23.40 23.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.94 0.71 -4.28 74 14495 2012-10-10 17:06:42 2010-10-27 11:37:19 1 78 3768 30 5105 51820 16534 186.90 17 65.92 CHANGED IsllGAsGpsGptlspchhpcG....hpVpuhs...Rs.ss....ths..........thphlphDl..hsh...tthspslp...G..hDsllssh......ush...tt...........c........tp..s..sp....pllcshp.t...s.s..sp.+.ll...l..l.uusu...hhp.....stpth..............h............h.h....ttt..t.ts.....-.....chl....p..s.s..sl.sWThlpPut.hhps.......t...sp....phph.....hts...........ss.....s.t.....st.....shlshpDlApshl-tlps ......................................................................................................................lhlhGA..o...G....h...l..G...p......t....l...s......p.......p......L.........h......p........p......G..............t...p..V.....t......u.....h....s.......................R.....p.....s.p...............p...htt............................ts...h....p......h........h.........t........s........D..........l......p.s......................t.s....l.......t.......p.......s.......l.......p...........s...............h.......D.............s........l.......l...h..sh....................ss.h..tt........................................................-.................................hp.s....sp..........p..l.....h....p...u.......h....p....t...............s...s........lp.......+......h..l.............h...........l...Su...h.u................s...t..................p.t...s.........................................................................h..h........ps.c.......t....ts..........-............ph.l............................p....p....s.......sh.....s....h......s....l....l...R.....s..u......h....l..hss..................t.....tt.............hth......hht..............................................................h...........t.l...t.h...t...D...hAthhht.h..t................................................................................................................................................................................................... 1 1558 3201 4341 +13303 PF13461 C-term_anchor Cell-wall surface anchor repeat Coggill P pcc Jackhmmer:C2CZF8 Repeat \N 27.00 27.00 27.30 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.04 0.72 -3.50 178 2550 2012-10-02 15:23:12 2010-10-27 14:32:45 1 182 317 1 148 2314 6 59.40 35 18.55 CHANGED YlDp.sGpp.ls...ssps..loGp..lGcsY....so.....psc....sIsG.....Y..plsps.....sssts.GsFs.pss.psVsYV..Y ........................YhDp.sG..pp.ls....ssps....loGp........lG-sY....so..................psc.......sIsG.................Y...sLsps................ssNss..GsFs..sss..poVsYVY................ 0 51 84 132 +13304 PF13462 Thioredoxin_4 Thioredoxin Coggill P pcc Jackhmmer:B0SES9 Domain \N 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.08 0.71 -4.23 52 3857 2012-10-03 14:45:55 2010-10-27 15:14:37 1 30 2252 39 1197 5536 2874 166.60 17 66.73 CHANGED ss....s..shh.lGstcAslsllEYsshsCPaCsp.app.p.s.h..p.lhpc.YlcsG..Klpalh+ph....hs.c.s....lhAuhh...ucs.stp...s...phF.h.hhpphhhpp....ppt....hsps.........pt........hs........tp............sssh.....t.cp.h.t.sh.pss.p.hpshl.t.t.pp.p.u.cps.slp.sTPohhl..sGch..h......s.shsh-chpthI-p .......................................................................................................................................ts........G......s.u.....l.sllt.ahDapCPa.C....t....p....hp............t..p....l.....t.....p....l..h....c...p......h..........s..s........c.l...p..h..h..h+..ph................sh......sps..............uht.A.u..ts..............u.ts...s..tpp...........s.........pha....h.hh..c...p...h...l....h..p.p............p.pt.....hspt................pp............hs...........tp..............................................................tshs......h.pp..hpp...sh......ps....t....p....h......p..p.t..l....p......p......s...........t.p...........h........u.......p..........p...........h.....s....l..........p......u..........TP.o..h..h..l.........sGph.....h.........t..t........s.....p.th.thl........................................................................................................................ 0 389 766 1022 +13305 PF13463 HTH_27 Winged helix DNA-binding domain Coggill P pcc Jackhmmer:D2BJI8 Domain \N 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -8.92 0.72 -3.65 61 2080 2012-10-04 14:01:12 2010-10-27 15:25:49 1 20 1390 11 608 21196 2353 65.80 23 40.16 CHANGED lot.phhl.Lppls........ttpptph.sclsthhshc.psshs......tslccLhct..Ghl..t..tp..spt.....thhplTscG ..................................................phhl..L.h.tls...............tppthoh..sc....ls.p.t.h....s.lp..pssls......pslcpLhcp........Ghl.....ppp.ts.....p.D+Rp...........thlpLTspG........................ 0 154 358 487 +13306 PF13464 DUF4115 Domain of unknown function (DUF4115) Bateman A agb Jackhmmer:A3PK73 Domain This short domain is often found at the C-terminus of proteins containing a helix-turn-helix domain. The function of this domain is unknown. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.34 0.72 -4.33 73 1846 2010-10-27 16:37:37 2010-10-27 17:37:37 1 11 1803 0 409 1357 405 75.40 29 23.46 CHANGED ltlhss.tsuWlpVpsADG...pllhpslh...psG-shsl....ssptPhplplGsuu.ulhhslsGpthss.ushup.spsspLohs ...................lshshs..uc.sWlp.Vp....D..u.s.G...+pLh....s..G.hh...+tG..p..s.hsl....supsPhc.l.plGssu.uV.plph.sG.cslcl...uthhp...sp.sschsh.t.................... 0 117 257 347 +13307 PF13465 zf-H2C2_2 Zinc-finger double domain Coggill P pcc Jackhmmer:D2BGQ7 Domain \N 22.50 20.00 22.50 20.00 22.40 19.90 hmmbuild -o /dev/null HMM SEED 26 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.11 0.73 -7.56 0.73 -3.41 163 227898 2012-10-03 11:22:52 2010-10-27 17:41:20 1 6697 1851 312 138033 194266 376 25.80 50 27.05 CHANGED pLppH...h...p.p.Hp.......sp.c.....s....a.......pCt..h..Cs..tpapp ......................LtpH.......p.............R..l.HT........................GE.K.....................P.....Y...........................cCp.......p..CG...KuFs................ 0 21562 34039 60664 +13308 PF13466 STAS_2 STAS domain Bateman A agb Jackhmmer:A3PG87 Domain The STAS (after Sulphate Transporter and AntiSigma factor antagonist) domain is found in the C-terminal region of Sulphate transporters and bacterial antisigma factor antagonists. It has been suggested that this domain may have a general NTP binding function [1]. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.33 0.72 -3.95 89 3458 2012-10-02 18:52:36 2010-10-28 17:48:09 1 38 1847 0 1117 4550 627 79.20 22 51.70 CHANGED LpLsGplsh.psss.s.Lhptltt..hl........ts..s.....pl.plDhutlpphDsuulslLhphtptstt.ps.pplpl.ps.sss.t.lt.plhplhGls ......................................lsGclDh...ssss...s..lh.pthtp.....hh...............pt..s.....tl.sl..DLuplshlDouGlulLlph.hcp......s...p..p......pG....t.plpl..t.s..s.ss..p.lt..plhplhsl............................. 0 371 742 971 +13309 PF13467 RHH_4 Ribbon-helix-helix domain Bateman A agb Jackhmmer:A3PHV5 Domain This short bacterial protein contains a ribbon-helix-helix domain that is likely to be DNA-binding. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.08 0.72 -4.09 57 610 2012-10-02 18:44:02 2010-10-29 13:14:09 1 3 438 4 200 495 214 68.70 37 66.55 CHANGED t.hpR.......SlplsGHpTSlpLEstFWshLc-lAppcuholspLlucl.....Dtpp.......................cs.NLuSslRlhsLpalp ..............t..hpRSlplcGtsTSlpLEshFWphLc-IAspcshol.spLlucl.........Dtp+.......t..............ch..NhuShLRltsLpal.t....................... 0 50 109 147 +13310 PF13468 Glyoxalase_3 Glyoxalase-like domain Bateman A agb Jackhmmer:A3PMH5 Domain This domain is related to the Glyoxalase domain Pfam:PF00903. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.42 0.71 -4.36 115 1009 2012-10-02 15:00:03 2010-10-29 13:27:31 1 8 863 2 301 1880 849 182.90 23 72.75 CHANGED lDHlllssp..sLspussth.p..pLGhshs..........sGGpHs.th..........GTpNtLlhh..u..s.....s.YlEll.u...l.c....Ppts.......................ss....t....ts..p.h.athc......................php.....p......s..sultshsh........cs...sDl..sssttpht..........p.t.G...........p.h..phst............sp.....lpWchs..hhts..sthsh...........tshhPhh....IpWtss....c.s.............p.......ts.ss.shslppltl...sss..cs...stht.thh......stlh ...........................................................hDHllhhsp.....sL-pshph...........h.p..thGh...p..ht......................s..G..GtH.s..th...................GT.tN..p..L.h....h...h....s....p................s..Y..lEll.s....l..c...sspp..........................s.....t.ts..p.hhshp........................hhp...t......stGhtshsl..............cs.....sDl..pshppchp...........p.pult.....................st.h..ph-t...............tp......lcWphh...hhts..tt..t........................phthPFh...I.pWpps....ct..................tp.......a...ss.shslpslhht.sp..p....ppshphh....h.......................................................................................................... 0 84 176 244 +13311 PF13469 Sulfotransfer_3 Sulfotransferase family Bateman A agb Jackhmmer:A3PS36 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.13 0.71 -12.53 0.71 -3.29 403 1788 2012-10-05 12:31:09 2010-10-29 13:28:25 1 182 817 11 719 2609 1547 224.00 14 47.11 CHANGED phlhl..hGhs.Ro.G.....................oohlp.tlh.........................................................................................................................................................................t.tstthhhttt...........thhthhttthh.tthttht.....thhhhh.h.ttthh.............................................h..h.thhhh...thh...............................hh..........csshph...................hh.....lhhh..Rc...Ph.th..hhs....h...htt...................hhtht.....h........................................................................htthhhthtt............................................hp..ph.tt..pthh.ttlht.h.t.....plspt .........................................................................................................................................lFl..hGhs.RS.G....................TThlp.tlL.............................................................s...t...............................................................................................tts.thhht.t.....................................thhtt..hhthhh......tthtt.ht........th.h..hh.htth.tt.h..................................................................................................................hh...h.thht..................................................................................t.hh....h...KsPtphh.h........................................................lhthaPsA+hlhhh.Rc.....Ph.ss......htS...t.h............htt...............................................................tht....th.....................................................................h.thhhth.........................................................cphhtcP.t.h.pplht.h.......t.......................................................................................................................................................................................... 0 256 451 586 +13312 PF13470 PIN_3 PIN domain Bateman A agb Jackhmmer:A3PJ99 Domain Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases). 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.69 0.71 -3.43 201 1089 2012-10-03 20:43:45 2010-10-29 14:04:15 1 5 687 0 443 1286 243 110.50 21 70.24 CHANGED plV....lDTNVll.....sull.......................................sthllphh.....hpsthpsh...hotpllpEhtpsl.......hc...t....hthttt.....t.h..ht...............................................................thhpsh.hhsh.....t....D...scDp+hlssAls.........u....p...AshlVTtsh+ .................................lllDTNVll..........ssll..t.............................................sphlhphh.....t..psthphhhosphlpEhtpsl.......t+.h.t....h.thpthtt....hhhhhh................................................................................thspsh.....ht.......hp.......D....cDphhlssAls.........u......p......AchlVTtst.................................................... 0 161 315 393 +13313 PF13471 Transglut_core3 Transglutaminase-like superfamily Bateman A agb Jackhmemr:A3PJ09 Domain This family includes uncharacterised proteins that are related to the transglutaminase like domain Pfam:PF01841. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.75 0.71 -4.30 84 283 2012-10-10 12:56:15 2010-10-29 16:51:48 1 7 246 0 123 398 53 113.40 20 54.94 CHANGED hlhhlshphhhtthshtth...httts.tst.sssstpph......pplspslptsup....hhPhps....tCL.pulusthhLctp.GhssslhhGVppp..........s..htAHAWlp.sssth.....ls.st...tshppasslhs ....................................................................................h.......thhh.hhs.h.h....h.t...ths...tt..t.sptpph......pphspslptsup.............hsshcs....sCL.pulusthhLp.pp.s.h.s.sslhlGltpp.......................shtAHAWlc..sssth....ls..st....tthptas.l..t....................... 0 38 92 106 +13314 PF13472 Lipase_GDSL_2 GDSL-like Lipase/Acylhydrolase family Bateman A agb Jackhmmer:A0LCN0 Domain This family of presumed lipases and related enzymes are similar to Pfam:PF00657. 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.24 0.71 -4.14 323 11620 2012-10-02 11:02:24 2010-11-01 11:55:58 1 260 4043 70 3589 11502 2666 188.90 16 56.62 CHANGED lhlGDS..hs...tG......h...s..........s.s.....................t..t........t........sa...s..th..l...tp........................p.........t....t........s...h.p.h.h.shuh..sG...s.s...st.....p...........................h...........h.t.......p.htp............................................th...............ttps..c..ll.hl.t..hG.s.NDhtt...................................................................t.....ht....hpp...htpt.lpp...l...lpp.h...........p.s.ps......p........l.l.l..l....s.h...s..shh..tts...................................................ttthp....ph.s.....p...t.l.......p..........p..h...u...p..p.t......s..sh.hl...c.htss.....ht.....s........t.........t.......th......t..p...h.ht...................................sDs....l...HPss.tGhphh ...........................................................................................................................................................hlGDShs.....t.G.....h...t...............................................................................................tt......t.......sa....s....th..l....tp............................................................t.............t...t..s...h.p...l...h...N...hu....h.......s.G...t...s...st....p.................................................h............h.t.......p.hpt...........................................................................................t.h.....................ttps...c........hl..ll.t....hG.s.NDhht.................................................................................................................................................................................................................s........hs...h.p.p....h.tps.lp....p...l......lpp.l..........................................p..sps..........p...........l.l.l..h......s..h....h....s.ht..tht........................................................................................................................................pphhp......ph..s............p....h...h............p.....................p....h......u......p...p.h.............s.....s....hl......s..hhth.............ht......s.............t..............................t........................t.......hht................................................................................................sDs......l....H.st.tGhth......................................................................................................................................................................... 0 1283 2468 3145 +13315 PF13473 Cupredoxin_1 Cupredoxin-like domain Bateman A agb Jackhmmer:B3CV67 Domain The cupredoxin-like fold consists of a beta-sandwich with 7 strands in 2 beta-sheets, which is arranged in a Greek-key beta-barrel. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.17 0.72 -4.14 62 2319 2012-10-02 17:41:00 2010-11-01 15:20:39 1 22 1660 14 491 3072 708 105.60 23 33.67 CHANGED thhhlhh..hh.hhsp.....sst..........s..ssptp.....hplplp.ss.........tapPsplplsuGp.hp.lp.lcNpsss..........s..tEac..c.....Lp.hc.cslssGpsspltlssLcsGcYpFhss...hp...p..+GpllV .............................................................hth........................t.....................t..sshpp......lpls.l..p...st................................tapPt..pls.lpsGp.sp.lh.h.p.N..p.ss.p..................................s...p-h..h.c................................ht..hc....c....s......l..s...s.G......t.p......p..s..l.p.h.s.s..ps.Gc.Y....phsCs.......h.....ph....t...+GplhV............................................................... 0 136 300 392 +13316 PF13474 SnoaL_3 SnoaL-like domain Coggill P pcc Jackhmmer:D2BK05 Domain This family contains a large number of proteins that share the SnoaL fold. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.52 0.71 -4.07 92 1249 2012-10-03 02:27:24 2010-11-01 16:36:19 1 78 757 19 493 3716 1050 118.60 16 47.55 CHANGED lpphhspa.tpu.hspsDh-thhshhs.....-csshlG..ssssch..hh....stpphpphhcthh....s.p.s...p..shphph..t..p.hpl....p...t.s.s....ssuhhst..hhph......h......p.h.....t.h...Rs.osl..h.c+pss..sW+lsHhHhS..h..shs ........................................................thhpta.hpA.hpp...tD...hcs.h.h.s.has.................sc....s.......s....h.......h....t.......s.s....s.......th.......hp.........Gh.p....t.h....p...p....h.h.p...t..h..h................s..t..h.....p......sh.p....h.p.h...p......p...hpl.............p......h..s..s....shu...h..st.t....t.h..ph.............p..h.....t...spth............tthh....+s....T...h..l.......h.p+..p......s.s......t.......Wc..lsphHhS......t.................................................................... 1 171 331 420 +13317 PF13475 DUF4116 Domain of unknown function (DUF4116) Coggill P pcc Jackhmmer:D2BI90 Domain \N 21.30 21.30 21.30 21.30 21.10 21.20 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.51 0.72 -4.47 608 1708 2010-11-01 16:51:33 2010-11-01 16:51:33 1 50 79 0 1429 1843 1299 48.20 25 44.03 CHANGED D+-l...lhps.....lc..ps....s.h.............slpa...s...s...pp.....L....+s........D+......-l...lhp...A..lc.......p..ss....p..............s...l....pass...p................p.l.c ...................................................s+-llhpA..lp...ps....u.h.....................slpa....s...s....cp.......l.+s.........................D+......El.....lhp....Alc............p.ss....h....................s...l.....pahs.t.................................................................................. 0 1384 1406 1429 +13318 PF13476 AAA_23 AAA domain Bateman A agb Jackhmmer:A0LBT8 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.82 0.71 -3.99 469 5945 2012-10-05 12:31:09 2010-11-01 17:09:13 1 57 3641 10 1564 18355 6884 299.00 13 41.98 CHANGED plplps..a.tsa...............p.s.....t...p....l....cF................sp......s.............l..s........lIhG.t.......N.........GuGKoTll.-Al....p..h.s.L.....h.u.....ph......tph...ppt.................................pt...t......tht........ptph.p...........ht.t..........................................ptp.t...tsh.....hclp.............................hpt...tpt...................................................................................................lp......ptp..ht..p...p....pt.pth...............p............tt............t..tp....hh..ph...............pp.h.p.....p..h...lpp................ht..tp...tt.....................................................th...p.....h.....h.h...h..hspp.p.........................................t...........................pp......p....................................................................ph.p.t..h.......p.pt...h.......pp..h..p.p..hpp..........h.......hpp.........................................................................h...........................pt..pp....hpp.......................................php...pl....ppplp...ph.......p..clp ............................................lplps..h.tsh...............p.p......h.....p....l.....-F.......................ss....s.....................l.s...............l.lsG.t...........s...............Gu..GKo........oll...-.Al........p........h...s.......L......h...s...ps.........th..ppt.............................................tt.................ttph.......h.........................................................................................t.p.t....tt.......hpht...................................hp......ttt..................................................................................................................................................................................................................................................h......t.....hh............t....t..t.......................................................t.........................h...th...........................tt..t.....p.h.......htp......................hht...t...t...............................................................................h...t............h..h.ptp..........................................................................................................p...............................................................................................................................................................................................................p..t...h.......t..t......h......tt...............................ht...................................................................................................................................................................................................................................................................................tttt................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 583 1015 1360 +13319 PF13477 Glyco_trans_4_2 Glycosyl transferase 4-like Coggill P pcc Jackhmmer:D2BIQ3 Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.05 0.71 -4.34 45 1823 2012-10-03 16:42:30 2010-11-01 17:19:16 1 17 1376 0 466 6381 2986 141.00 15 38.32 CHANGED Klhhlusss...p...h...a...s...h...+...a....spt...Ltpp....Gh-lclho....css....pphth..ttth.phhpl......p.s......hps....hph..l..p.h....h.pl..p+llcchp.PDllHsHhs..psh.ulhutlh....h.htt.h..shl..losaG.s...Dl...hpt..spps.........hh..ch......lh+.h.shp..pustlh.ssu .....................................................................................................................................h.........h........h........t...h................hpt.......hh.pt......sh.c..lhl.ls..............psth.......t.............p..hh............t..s...h....p..h..htl..............................s.ht.......................ppt..........hp.......l.....p...h.........................h...pl.....h+....l...l...+.....c....h......p.....D.l.la...s...a......s..h......p.s........s....l.....h....u...t....l.u...s.......p...h....s....t......h.......th.l........h...s..h..p....G...h.........sh........hpp...s.ht................hh.ph.............lh+....h.....ht.....sp.hh..................................................................... 0 196 332 414 +13320 PF13478 XdhC_C XdhC Rossmann domain Bateman A agb Jackhmmer:A0L5F2 Domain This entry is the rossmann domain found in the Xanthine dehydrogenase accessory protein. 27.00 27.00 27.00 27.40 26.90 26.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.68 0.71 -3.86 296 2511 2012-10-10 17:06:42 2010-11-01 17:23:08 1 15 1622 6 738 2055 1067 138.90 30 40.65 CHANGED LllhGAGclupsLsplAttl.G.acVsllDsR...................p.....p.....t.....a.......s...........ts.....sp........lh........sh...s........................t..p..th.......ttl.......p..hs..s.ps...h..l..l..h..THsa.p.hDh.tsLpthL.p.p.........s.st.YlGhlGS+p+ttphhppLt..p..Ghs......tpplsc.lpuPlG.Ls.IGucoPpEIAlSIlAEl .....................................lllhGAGclupslsphushL.s.acltlhDsR...................t.....p.......hF............P............s..st........lt......s.h.s..................................s.th.......tth.......p.lsspo..h..l..l..l.THshp..hDh..sLptsl.cp.........s.st.YlGhlGS+++ptphhccLp.tc...Ghs.........cpclsR.l+uPlG....ls.lGu....coPpEIAlSlhAEl.......................... 0 233 476 620 +13321 PF13479 AAA_24 AAA domain Bateman A agb Jackhmmer:A0L701 Domain This AAA domain is found in a wide variety of presumed phage proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.46 0.70 -4.75 49 566 2012-10-05 12:31:09 2010-11-01 17:32:31 1 4 488 0 83 510 154 223.00 18 78.95 CHANGED spsh+hllYGtsGhGKTohstsh....scsLhlDh-sG...htsh.....tscslplp..............sap-htc..............................h...htpp...ls..pYcslVlDolophpchhhthhttp...thscp.sts....hpsYGhhs.pthhphls...thhp..hstsllhsAatspcp.....phsthshh..phts....pstsplhshsDhV..s.h.....t..pt..........Rhh.hpsssthhuKs............................................pLss .................p...hphhlYGpsGsGKT....o....hs..pph.................spsl.....h.....l.D.h..-.tu..t....h.p.sht.........ssss.l.p..lp...............shpphtp.........................................hltth.t.pp......tp..pacslVIDols..ph...p..c.h...h..htthh........tt.cp.sp..........hpsauhhs...pph.hphlp.......plhp....hshpllhs.....u.+psppps.........tss.t..hspht.phps.......phts....tlhs.sDll......sphth......pttst..t....................thh.h.p....p..h...sc.p.................................................htp.hhh................................................................................ 0 35 62 70 +13322 PF13480 Acetyltransf_6 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:D2BI49 Domain This family contains proteins with N-acetyltransferase functions. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.60 0.71 -4.23 211 1584 2012-10-02 22:59:21 2010-11-02 10:42:47 1 24 914 0 667 4625 1145 142.00 14 36.55 CHANGED pphR...p....pl+ppt...++h.p....c.h....u...th.ph...ph......s...p....ss....p.s.......hp.t.......h....hphh.......h...p....p.....hptp...........sh...hs.p.s...hht.p...ahcplhp...phtt.......ts..t...h.p....lhhlp....h....s....s..c....h...lAsh..hshhpss..phh..h..hhsua..c.p.....h..s...chuPGhlLhhphlcpshp....p....G..hp.t...hDh....st........G.......s....p...........p....YKp ................................................................................................t....tph+pth+ch.t........c..t...u......h..ph....ph............t....p....s.s....p.p................hp...t.......h....hphh.......t....p....t......hptp...............sh.h............hst.p...ahpphhp...th..................ts...p....h..c................l...h.h.lp..............h.......s........s.....c..........h....l.A.s.h.......l.s....h.....h.....p......s............s.........p.......hh.........h.......h..h....s.....u....h......ctp.........h..p.....phus...s..t.lL....hh....ph...l....cts.hp.....p.....G..hp..h.....h-h....sh............s.......t................................................................................. 0 248 474 580 +13323 PF13481 AAA_25 AAA domain Bateman A agb Jackhmmer:A0L707 Domain This AAA domain is found in a wide variety of presumed DNA repair proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.11 0.71 -4.90 101 6612 2012-10-05 12:31:09 2010-11-02 11:53:00 1 60 4750 17 1429 15488 5931 168.50 30 36.74 CHANGED ttp..th...phhs.......hs.h.tt....hss.t..ss....hchllp....s.....hls...tus...hshlsGtsssGKohlshsluhslu..........................pGt.hht.......h.s...pt.spVlalssEss...tt.plp...cRlpt.ht..........tp.h.....................st.....sph.hhhp..................t...................t......h...tt..hppltphlpp..ts....ssllllDsl..sp...hh..ss.....sp..ss.s.ss....h....tthlptlppl.tp.ph.s..ss.llllcHssKs...ss ......................................................................h.t.................c...h.o.sh..tE.......hsR.VLG.....G............GlV...sGo......ll.Ll....u.....GsPGhG......KSTL.L.L.p.hs.s.p.lu................................p..............................................t..tp.lL.Y.....l....o.....G..EES........hp...Ql.p...........h.R..A..p...R..ls...............hst........................................................................spl...hl..h..s................................................................................................................................................cs.s...l.c.p..l.t..t..t..l..c.p....tp............Pc.ll.l.ID..SI.....Qs....h.h.....ts.................sh...su...s...ss.....................h......Rcsss....t.Lh...c.h...AK.......pp...s..........hs..lhlVG.HlTK-G.t................................................................................................................................ 0 493 948 1211 +13324 PF13482 RNase_H_2 RNase_H superfamily Coggill P pcc Jackhmmer:D2BIZ8 Domain \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.09 0.71 -4.43 108 1043 2012-10-03 01:22:09 2010-11-02 12:04:53 1 24 932 19 331 2850 1595 173.30 19 32.29 CHANGED lFaDIETs.......Gl..tstt.sh.....l.aLlGlhhh.....ps..sph.t.....hhp..hhhc.p..s.t-phhh......p...........ltph...sh.....lssaNGpsFD.hshlc..phh...........p.hp.hs..............th......t......al.....DL.h....p...hh.....pp..hsh.........pu....uLKslEch.lGh.p+..c..ss..lsGtpulhh...a......p......p......ah...css-......ps.........h..LcpllpYNcpDshsLhpLhshls ..............................................................................................................hhDlEss................s....h.............t.sp............h..hlh.......G.....h..h....h.........st....pph...........................hht....h.h.h..p....s........s.....tcpthh..........................p.h.....................thh.p.pt......s.h..........................h.hs.a..shp........s........a.-.hshl..pphh......................tph.t..hst...............................................sh......t........hl....D..L...h....p.....hl............cc..phh....................pu.huLKs....l........pph....h..Gh.......ph.....c..ps...h.....s....Gt..p..u..h..ph......a.......................p..............p.............ah......p.p.ts.............tp.......................h..h.p....plh.pYNcpD.lhuhhtlhphh.h.................................................................................................. 0 128 259 310 +13325 PF13483 Lactamase_B_3 Beta-lactamase superfamily domain Coggill P pcc Jackhmmer:D2BIZ6 Domain This family is part of the beta-lactamase superfamily and is related to Pfam:PF00753. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -11.10 0.71 -4.53 85 2488 2012-10-02 15:46:01 2010-11-02 12:47:16 1 7 1789 3 832 7626 2715 181.30 21 68.69 CHANGED plpalGHusaLlc.s.s.....G.hpllsDPa..............p.......t...hGh..h.st........................hpsDlVhhSp.sh.D.Hsss.....ssl.s...............................tspllts......................s.........us..........hp.l..s..sl.tlpsl.sssp................cph...s.Gh...ph..........s.s...Nsha.la..p...s.....GlslsHLGchsp..lstpph..ttlu.clDVlhlPVsG.......s.hshstcpuhclscpLpPplllPhH .......................................................................................ploahGHushhl..cs..s.............s...pp..lllD.Pa........................................................t..........t.sh...t..t.....................................................spsD.h..ll..l.oH...sHs....D.H.h..ss..................p..h..l..tth............................................................................................................ttps.t.h..l..ss.............................................................................G.........sp............................hp...h.......s.....th...p...l.p....h.s...tuhH...................................................................ss.........p........................................s........s..s....G....h.....l.l.....ph.......t.................sh.p.l...a....H.....s.....G.....D.s...........s.............h.............................t.........h...........t.p...h.......t.....t........lD..l..hh.....l.......Plus.............................sh.sh.s..p.p.A....s..h.h.p..h..l.p..s..p.h.slPhH...................................................................................................................................................................................... 0 285 572 722 +13326 PF13484 Fer4_16 4Fe-4S double cluster binding domain Coggill P pcc Jackhmmer:D2BJB3 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.49 0.72 -3.23 562 3179 2012-10-03 08:56:43 2010-11-02 13:29:30 1 73 2633 0 658 3760 1232 65.10 42 16.23 CHANGED hC.ss.Cst.ChcsCPs.....sA......ls....ts............p..h.c.sp.p....................shsh..................hht..h....c.s..hh...st...hc.sh...h.............hh......huCs...h.....Ct..tlCPaN .............C.Gs.Cst.Chct.CPT.........sA..........ls....ts............hp...l...D..u.p.+....................ClS.a.........................sh..ph....c..G...hh....s-.p...hR.sh..hu....................scl........YGCD...s.....CQhlCPaN................................................................... 0 209 436 555 +13327 PF13485 Peptidase_MA_2 Peptidase MA superfamily Coggill P pcc Jackhmmer:D2BH64 Domain \N 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.89 0.71 -4.10 226 1955 2012-10-03 04:41:15 2010-11-02 13:30:43 1 88 1162 0 810 8921 2335 137.20 14 25.04 CHANGED sssu....ht.....h...stt..thlh..............h..........t...h.....t...s............tt..ts.hp.......tlltHEhsHhhhtp.h...........s..........s.....................s.......thP..hW......hsEGlApahs.......................tp..........................h........p.s........t.......ht.tthtt...tht..ps....ph....hs..................hp...p......l.......pt..sh...................pttpss......h...sY...t..puhhh...spalt.....p....p.h....G...p.p......pl.tp.............h.lp.....ph .............................................................ht..............................................................................t....t.hh.......tl...lsHEl.sH..h...h..ht..th.......................h.s.......................................t.....pts.....hW........hsEG.hAp..ahs......................................tp....................................h...pt.............ht..tth...tp..........hht.......pt......ph.....s...........................................ht...p..........h............ts..........................ttt.htt........................h....sY.........t...tuh.h.h.......h.thlt.......p.....t..h.....G..........t......hh.thh...h................................................................................................................................. 1 380 616 731 +13328 PF13486 Dehalogenase Reductive dehalogenase subunit Coggill P pcc Jackhmmer:D2BJ91 Domain This family is most frequently associated with a Fer4 iron-sulfur cluster towards the C-terminal region. 24.30 24.30 24.70 24.80 23.80 24.20 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.07 0.70 -4.97 89 620 2010-11-02 15:39:17 2010-11-02 15:39:17 1 12 161 0 63 596 74 241.20 24 54.61 CHANGED ush++PWWVKER-htcPTsElDWslhp+.hD....hppt.th.......................t.t.h.t.h.s..t............t.....htpttp...thtpthppphPGhshps.......................hALt....tuht....s...h.t....sh....t.s.............................shhs........s..sT..P-phG..lP.+WpGT.PEENhpMlRAAh+aaGus.pVGshEl.Dpps+.Klhastst.............................spth.sa.ED..l-..csYp.........sssphlIPs+s+.ahlsasshps....p-hh+ps.............ss...h...t.....ss.u.sh.huYsp..tshlp.sphppFl+uLG.Ypul....s.......s...s.........shsss..suhulhoGlGEhuRhs.hsl.oPcaGshhRh..hthlTDLPL ..................................................................................................................................s.....sWalpph-..p...oh.lDWs.h....t..................................h..................................................thhtp...st.t.t.........................Al.....u.........................h................................h.t............t...tths..hs..hWp.GT.PEEN.thl+sAhphhGus..l.Ghh.l.spp....h...p...phhhtht...............................h.h..h.ps..h......sh...............sstphhhPpphp..hlshsh..s...-hhcps......................sh...ht..ss.sshhs.Y.phs.ht.hhltpFl+sLG.Ypu..h....s.......t...s............shh.s..sshu.hhuGlGEhuR.u..hl.sPca..Gs.hh+h...hthhTDLPL............. 0 25 57 57 +13329 PF13487 HD_5 HD domain Coggill P pcc Jackhmmer:D2BI63 Domain HD domains are metal dependent phosphohydrolases. 21.80 21.80 21.80 21.80 21.50 21.70 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.90 0.72 -4.00 81 3171 2012-10-01 20:28:14 2010-11-02 15:49:01 1 216 1158 7 1242 5797 561 64.10 32 13.77 CHANGED ss...Los.p-pt...h....l.p...p+shhshphLpp..l......P..........ht.......pls...cllstppEphDGoGaP.c.uLpu-pIsltuRlL ......................................tLospEhph....h..ppHshhG..hcl.L..pp..h.......................s.............................ht..................tlt.......c.l...s.h.p.H.HE+hDG.o.G.YP.p.G.....Lp....G-pIsl.uRI...................... 0 538 906 1111 +13330 PF13488 Gly-zipper_Omp Glycine zipper Coggill P pcc Jackhmmer:C1ZB73 Domain \N 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.40 0.72 -4.32 279 2160 2012-10-03 03:18:43 2010-11-02 17:37:02 1 18 1426 0 437 2410 297 47.50 42 26.37 CHANGED hG.ushGA.ssGAhlG....s...s...s....G......s..t.......s.tG.Ahl.GA.u....lGussGu....s....lG....p..t....h.cppp .........................GAslGA..hlGAsl..G......s....h....s......u.........s.p.......t.cG.AlI..GA.u....lGA....ssGu....h....lG....h.h....h.-.p..................... 0 116 258 352 +13331 PF13489 Methyltransf_23 Methyltransferase domain Bateman A agb Jackhmmer:A0LD74 Domain This family appears to be a methyltransferase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.06 0.71 -4.52 129 14355 2012-10-10 17:06:42 2010-11-02 17:48:41 1 362 3944 67 5881 52099 19466 173.70 14 51.54 CHANGED htp.th......ph...ht.......phl.pp...hh.................ttst...................p.lLDhGsGsG..h..h..................................hp.hh....pp...............p..........G.......h.....p..ht...shD........................................................................................h.....................................................pp....................................paD..hlssh.cllEHlt....s...st................phlppltp...h.l..p..s.s.Gh..lhlps......h.................................................ph..htp..................h.t..h.....h.........ts......sHh....saa.......otpslptlh.cpt.G..ap..lhph ..................................................................................................................................................................hhh...............................h...........................tst..................................p..l..L.D.lG.s......G..s....G....h....h.............................................................................................s.t..hh...........tp..................................................t.............s....................h.........p..lh.....GlD.h...u....thhphst.........................................................................................................................................................................................................................................................................sp..................................................................................................paD....h...l....h....s.....h.....p.....s........l.....p.....a....ls...........-.........t...........................................p..h...l...p...p...h...t...p................h...L.........c...P.......G...G..h.....l..h....h..ss..........sh....................................................................................................h........................................................................................................................................................th..............h.h...................t.............h........h.h.....................t...................................................................................................................................................................... 0 1716 3566 4946 +13332 PF13490 zf-HC2 Putative zinc-finger Coggill P pcc Jackhmmer:C1ZG19 Domain This is a putative zinc-finger found in some anti-sigma factor proteins. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.68 0.72 -3.98 461 2372 2010-11-02 17:57:24 2010-11-02 17:57:24 1 52 1229 16 847 2033 89 34.60 27 15.34 CHANGED C.p..c.hp..phlstalDsp....Lstt.p..p.tplcpHLtpCssCpp ...........C......h.....thltsal-Gp....Lstt.c..p.tplcpHLtsCspCp...... 0 368 627 768 +13333 PF13491 DUF4117 Domain of unknown function (DUF4117) Coggill P pcc Jackhmmer:C1ZFF5 Domain This family is frequently found on DNA-translocase FtsK proteins at the N-terminus. The function is not known but might well be enzymatic. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.19 0.71 -4.81 92 2646 2010-11-02 17:57:43 2010-11-02 17:57:43 1 4 2575 0 664 2313 1905 162.10 24 17.91 CHANGED p+hhcEhhhlhhhhhulalhluLlSa...s...ssDPu..............W............sp......sss..ss.ts.lpNhuGhhGAalADh.hh.h.lhGhuAahhshhh...h...hhs....aphhpp................p..............p..p............ph......hhch......huhhlhll.sss.ul.h....uh.phhth....t.......th.shs.sGGllGphlush....hhphl..GhsGuh......Ll..llslhhluh..slh....sthS.........................W...l...pl...h-............plG ...........................................t......hh.hl.hhhhhshhlhhu.lhoa.........s....tD.su...............a................................sp......ss..........hpNhsG.hhGAaluDhlh.h...hFGh.hAah.lP.lhl...h...hhs...hhhhpp...................................p......s.tp...........th........sh+.h...........l.G.h....l.h...l.l.l....ss.s....u.l.h................s.l......t.....hs............s.....................h...h.u...uGGllG.t.h.l.u.s....h...........ht..s....h....l....sh.h....G..us.......l.l....Lls..l.hhhul......hlh......s..shohhplht........................................................... 0 226 437 557 +13334 PF13492 GAF_3 GAF domain Bateman A agb Jackhmmer:A0LBX7 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.34 0.71 -10.37 0.71 -4.04 153 4956 2012-10-02 14:34:25 2010-11-03 10:47:51 1 693 1936 7 1590 12597 1586 135.70 16 20.13 CHANGED sh.c..plhpps........hph.lt.phh.s.sct.....ss.lhh...h.c..ps..pt.th......phh...ssh.....t..ptth..tt.....................................................sl.s.......tsp.s.lhp.....tshppt..p..h..h....h.........s........h.......tt.......p......p............h....................h..s.s..........t............sh.........hh...lPlhs...tt.................pshGlls.l.t.....p.t.........s.tp.p.......h..s..t.pph.phlpt..hushluhulpp ......................................................................................................................................................................................................................................pplhpph...hph..lt..phh..s..hcs........ss...l..hh........h...c......pp.........tt..ph............phh....ssh........tt...p.......................................................................................................................ph.s...........hsp.s.lsp.......ts..h...cpp..........p....s.......h......hh.....................ts...................h..........tp...........p........s...........t........................................................h.s.h...................p.....................sh..............lh...lPLhs.......ss.......................phhGllh.l..t.....p..s..........................p..tp..t....................a...s...t..cph.plLpt....hAshlAhAlpp.................................................................. 0 627 1138 1442 +13335 PF13493 DUF4118 Domain of unknown function (DUF4118) Bateman A agb Jackhmmer:A0LBX7 Domain This domain is found in a wide variety of bacterial signalling proteins. It is likely to be a transmembrane domain involved in ligand sensing. 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.47 0.72 -3.84 102 1922 2010-11-03 11:55:06 2010-11-03 11:55:06 1 58 1608 1 460 1522 70 101.40 25 13.48 CHANGED th..l...cshlhhhlshh..lshhlts.tts.....h.t.sh.hh...h...ha..l..lslll.hu.lta.Ght.ulhuull..u.hsh.....hhhhh.h.t.h.............shhhht.p.........hhhhhhhll....lullsGth.ssthppp ........................s....slhhhhh..ssh....hs.hhh.t...hst......h....s..hh....h...la..L.LuVlllA..l.hh.G.h.h.s.ullAull..ss.ls.a.....shaFh.s.s.h..........................ohsltcsp............YllTFslhLh.lullsusLssth+................. 0 139 291 383 +13336 PF13494 DUF4119 Domain of unknown function, B. Theta Gene description (DUF4119) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0594 Family Based on Bacteroides thetaiotaomicron gene BT_0594, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2] 27.00 27.00 80.80 80.60 22.10 20.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.23 0.72 -4.00 3 17 2010-11-05 17:33:43 2010-11-05 17:33:43 1 1 16 0 1 13 0 95.60 61 72.61 CHANGED pcSKKsuKNNsuspph+olE+..-+Qs+-EIIScDELEKRsGITGDt+tYLTsaLRpFhEG-tHpsYsKKLpsLAcYIaDp+ILYIsKHGGYKLMElS .............puKKGSKNsKsKRNVQThcKAPs+pSKEEIISEEEL-NRIAIoGDIRLYhTMaL+IFIDGaF+HPKKKKLINLAQYIYDQKVLYIHKHGGYKLMELS. 0 1 1 1 +13337 PF13495 Phage_int_SAM_4 Phage_integr_N2; Phage integrase, N-terminal SAM-like domain Bateman A agb Jackhmmer:A0LBL0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.63 0.72 -3.66 83 3438 2012-10-02 14:21:04 2010-11-05 17:53:17 1 21 1789 4 599 5743 1666 84.40 20 27.95 CHANGED llcphppth.+h...cthuhpTpcsYhhhlpp....alpah....p......+..........p..P.pphssp-l..........ctFLstLs..pcp.....susuTppps...lsALhFhacplLppshst ................................t.hhp....h..ph......tthS...pThpsYhptlc.p..........Fhp.ah.......t..........c................................p........p..p...l...s...s...p..-....l........................ctalsaLt........c+p.........................hShsThspt......hsuLphh.ap.hlhpp................................................ 0 213 418 518 +13338 PF13496 DUF4120 Domain of unknown function (DUF4120) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2585 Family Based on Bacteroides thetaiotaomicron gene BT_2585, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 27.00 27.00 27.30 38.90 24.70 26.40 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.42 0.72 -3.97 6 44 2010-11-05 21:26:43 2010-11-05 21:26:43 1 1 29 0 6 29 1 93.80 70 87.90 CHANGED KIhC.QEHY-pVVpYAKSIsDpTLQpClERLKQWEcNsssPCEIELYYDaAPYSFGFspRYPDGpsGIVGGLLYHGpPDcSFAVhl.pPFHGWoIHT ....KIhs.pEHa-pVtcYAESIGDToLQcCLERLKpWEcNPstPsEIpLYYDHAPYSFGFsp+YPDGRpGIVGGLLYHGhPDcSFAVTl.pPFHGWpIHT............ 0 1 6 6 +13339 PF13497 DUF4121 Domain of unknown function (DUF4121) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2588 Family Based on Bacteroides thetaiotaomicron gene BT_2588, a putative uncharacterised protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 27.00 27.00 27.70 27.50 19.20 18.70 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.67 0.70 -5.38 7 50 2010-11-05 21:42:42 2010-11-05 21:42:42 1 1 30 0 9 43 1 254.20 40 94.59 CHANGED psps+YslEoLpphNs.aD+tatlsQcDVDhsNRhlplIEpsRSchh.PpsGDplhYloRpGDaaspAhI-thss+p..lpIC..P.lPFsacstsslthsspGGsap.lpscslK.suhppttF+sWGHsGtCuNGuVhFpApV.hWpYpEP-PLYGcaT..Tcsap+aalpKp.-sE..st.hYpu.shshhscp-hcphlt.hcGplFpG.hpsplVlWsaRh-ahhLs.pEWpphc..Ap.Rhh.ht.p.VKIlpDh-pHhshFY ................hpshYslEoLphLNhh.DptaslscpDV-KVNphlp+hEcsRschh.PpsGDslhYlo+tGDYaspAaI-phs..s+p..lpIC.hPpsPFsac...stpshthsspGGsashlsscpl+.suhpptpF+pWGHsGtspNGuVhFcAhVthWcYsE.P-PhYscaTT+pWp+aaIc+ps-.E..su.hYpu-shohhsc-ELcphls.hcGplFpG.hssp.hllWsaRh-hhcl.sstEWpthct.sphRhhahthp.VKIhpDccpHlsThY................................ 0 2 8 8 +13340 PF13498 DUF4122 Domain of unknown function (DUF4122) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_2607 Family Based on Bacteroides thetaiotaomicron gene BT_2607, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 27.00 27.00 52.90 41.70 24.40 24.30 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.46 0.70 -4.97 3 36 2010-11-06 00:25:54 2010-11-06 00:25:54 1 1 27 0 6 28 0 204.50 43 88.78 CHANGED EEIVYLSIRluCsuYLLYKVaGQK+RIccICDLLYuK.PPVK+ccsEsV.suEPGu-o-VMGSTRFVYLDENAGKTVAPYMSQPLETuuDFIGEEEDVsEEEVECKLPLEEMRMLKEEQEELDucSPEVEAVSPsVTPcDL-NlG-VLh+LN-AspDEsKShRAAhTLHSIRETDLFElFSSQVENKslIEELMGKYLDc-GNPLPLR+c+c.NPVs-sWRQ ...............................................tllYhslRhsChsYlLYpVht.+c+ltpl..CsLLYs....shpp.ctEps...spssss..t...s-VMGpTRaVYLDENAGKTsAPaMSQPLE..p-hIGEDEDIsp-DVECpLsLEcM+hLp-EQEEL...Du.ps...P-sEslo.ulT.cDlpNVGDVLhphstA.pDccKuhpAApTLa.uIR-TslF-lFsSplpNpphlEcLlcchlDc-GNshPL+ppp.p.spssspWR.p................. 0 1 6 6 +13341 PF13499 EF-hand_7 EF_hand_5; EF-hand domain pair Bateman A agb Jackhmmer:A0LDI7 Domain \N 28.10 28.10 28.10 28.10 28.00 28.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.30 0.72 -3.83 146 22951 2012-10-02 16:17:27 2010-11-08 14:23:57 1 974 2128 881 12350 22877 1126 70.20 26 23.94 CHANGED plpp....h...................F.pt..hDtsp..sGhlshp.-ltphhppht.......................................ppphpp.........h....hp..phDts..tcG.plshpEFhp.hh .................................................................................................................................................................................................t..pp.....s...............F..ph....aD.p.D.s......sG.hI...o......tp...EL.pp.hh.p.s.h.s.............................................................................................................................................ppph...c.p.............................hh.....h..hp........ph..Dt.D......sD..G..pl..s..ap.EFhph................................................................................................... 0 4511 6719 9545 +13342 PF13500 AAA_26 AAA domain Bateman A agb Jackhmmer:A0L3M2 Domain This domain is found in a number of proteins involved in cofactor biosynthesis such as dethiobiotin synthase and cobyric acid synthase. This domain contains a P-loop motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.16 0.71 -4.71 55 5230 2012-10-05 12:31:09 2010-11-08 15:01:41 1 28 3140 43 1197 6492 1083 206.80 25 57.02 CHANGED +slhloGT-TslGKTllossLspuh.........p..ss.....YaKP.l.QoGht........p.ss.........Dsphl.pp...lhshsp.s.hh....hs....................................................psht.lptPh.oPph.uAph-.s...hsls....lppl................p.lsp.s..s........chlllEGuGulh.VP.lspp.ph.hDlhppL......shsllLVups...sL.G.o.......I.N+s..L....Lolcsl.+.....s.....+....sl.s..lhGllhNs......tss...........c................shptl.pph.st.........l......slL.uplPhhsplss.pp .......................................................................................thaloGT..cTsVGKTslotuL.hpu..hpp.......................pG.hp..ss......................s.aKP....l...ssGsp..................................p...ss............D..s..thl....pp.............h.s....s....h.....s....h.....s....hp.....th.....................................................................................................................................................s.P.ht....hs...p...s......h......us........pl....uu...p....t..s......h.s.l.............hppl................................hpp..l.s.p..p..s..........................-hllVEGu....GGh.......h.....s.......s.......h.......s........s........p.....t.........s.......h.........h.......-.hspph...................ph.P..l....l..l.Vsss....pl..G..s..................l...sc..s.....h.....L.o.h...p..s.l..p.....p.....c.........s.l..s..lhGll.hNc........h.tss..............c....................................th.t.hl...t...p...h...hs....................h..........Pll..G..tlPh........p......................................................................... 0 335 731 1019 +13343 PF13501 SoxY Sulfur oxidation protein SoxY Bateman A agb Jackhmmer:A0LE08 Domain This domain is found in the sulfur oxidation protein SoxY. It is closely related to the Desulfoferrodoxin family Pfam:PF01880. Dissimilatory oxidation of thiosulfate is carried out by the ubiquitous sulfur-oxidizing (Sox) multi-enzyme system. In this system, SoxY plays a key role, functioning as the sulfur substrate-binding protein that offers its sulfur substrate, which is covalently bound to a conserved C-terminal cysteine, to another oxidizing Sox enzyme [1]. The structure of this domain shows an Ig-like fold [1]. 22.00 22.00 22.20 22.00 21.60 21.70 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.32 0.72 -4.01 101 444 2012-10-01 19:22:21 2010-11-08 17:43:11 1 3 280 16 196 483 305 109.60 32 55.82 CHANGED shhpshhGspsh......ssplplpu...PphAEsGuhVPl..slpss.h.......lcpltlhs-pNPsPhsusFph.Ps.supst..lusRl+lspsosVpAl.ucst.cGphah.usptVKloh.GGC ............................t..hpthhGstsh...t....sstlplsu...PplAEsGusVPlsl....ss..s.hst.........lcpltlls..-p.....N....PsP....hsA...sFph..Pt....sststluoRl+lup.oosVhAlucst...DGphah.usppVKVoh..GGC.... 0 44 122 160 +13344 PF13502 AsmA_2 AsmA-like C-terminal region Coggill P pcc Jackhmmer:B8J2T2 Family This family is similar to the C-terminal of the AsmA protein of E. coli. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.25 0.70 -4.99 168 2606 2012-10-03 05:41:17 2010-11-10 11:25:13 1 26 1712 0 739 3716 1170 219.90 17 21.31 CHANGED luplpsplph...p..s..s.........sl.plsslp.hs..hssupl..ss...sup...a..ps..p.........ss..s..p...........sphshplpstslsphh.......pt.hs.hss.......s..l.p.G..p..hs...hshplsh.p.sss.ht....t.h.soLsG...shphpltpG...........plt......p..........................hp.ph.......................shsshhsp...........sh.sa....cslpushplpsGhhps.cs.hplpus.suplshp.GphsLsp..pp..lshphslssphst.hs.hsh...................hh.hhht..htphs....sl........pa..p..lsGshssPp ......................................................................................................................................................................................................................................tphphphth....p....s.s..........tl..plppht.hs...h.h.t.u.pl....pu........sup....hts..s.............ss..t...p..............................sph.p.hp..l...p.s.hs..ls.t.hh.......................................ph.hs..hss....................................s..l...sG...p....hs...hs.h.s..l...ph..p.sss..ht.....s.....h...ssL..sG..........phphp.l.t.pG......................plp.........ph........................................................h.hp............................................................................thshpsh.hsp.......................sh...hF.....cslp.....u..s.....h...pl..p..s....Gl.hp...s.ss.h....h..l......p.....us...t.u.....p......l.s.....hp..G.pl...s......Lsp............pp......l..sh....ps...sls.sphs.t.......................................t...h..tt..h.........l.................ph..p..lsGshspP...................................................................................... 0 242 470 596 +13345 PF13503 DUF4123 Domain of unknown function (DUF4123) Bateman A agb Jackhmmer:B5FDW6 Domain This presumed domain is functionally uncharacterised. It is about 120 amino acids in length and contains several conserved motifs that may be functionally important. This domain is sometimes associated with the FHA domain. 22.20 22.20 22.20 22.20 21.60 21.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.57 0.71 -4.15 139 819 2010-11-10 11:40:33 2010-11-10 11:40:33 1 3 345 0 199 758 13 123.60 20 45.28 CHANGED phYhllDusph.s.p.....hhpthh..pt..t.t....t..hts..Latss.shppht.phuPa..Ll......pl..................s............s......h........tphhpp....t...tpsh..........u....h.hlt.......S.s..t.........shp..plhp...HL+p..hl.pl.ph.s.p.G.c.th..hhRaaDsc....lh....tsh......lssh..............tptst.......hh ...........................................................hahllDshhh....p.....h.tth.....th....t...t..hhs.Lattp..sh..ttlt...phuPaLlpl.................ts................s....hl........pphhtp............tt.tt.h.............u....h.hlt.......S.s..t...................................sh.s..pLt.p...HLpphlt..s.ph.s....p..G.c.ps..lhRaaDsclh.sh..hps.hs...tpht.h............................... 0 40 78 125 +13346 PF13504 LRR_7 Leucine rich repeat Bateman A agb Jackhmmer:B5ETY5 Repeat \N 22.00 7.00 22.00 8.30 21.90 -999999.99 hmmbuild -o /dev/null HMM SEED 17 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.76 -6.69 0.76 -7.30 0.76 -2.74 1228 2943 2012-10-02 21:32:02 2010-11-10 13:37:07 1 941 590 28 1121 55766 2241 18.00 36 3.46 CHANGED spL....ppL...p...l..............pp.....s.....p.....l....p......p.....l...s ........................sL...cpL...s...l.......us...N......p.......L..s.....p...LP............................... 0 385 667 922 +13347 PF13505 OMP_b-brl Outer membrane protein beta-barrel domain Bateman A agb Jackhmmer:B5ESE4 Domain This domain is found in a wide range of outer membrane proteins. This domain assumes a membrane bound beta-barrel fold. 27.70 27.70 27.70 27.70 27.60 27.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.63 0.71 -4.33 190 7576 2012-10-03 17:14:37 2010-11-10 14:45:20 1 53 2032 12 1809 11637 730 195.60 15 81.14 CHANGED hshshhss.hu...ss..shu................ssp..........tsh.h...........l..s..s.u...h..shs......sh...s...................sss.......ss.s.t.......................s...s..sh..sltss.a...p...h....s....s......h......hu..l.ps.sh..s.............h......tss.....pttss.......................s.hp....hphht....................hsss......ht.h.p.....h.sh.....ss.............h..p....hYs......tsGhs.............................................hsp.p..hp...hts.................................ptstsp..huhsh..us..G......lpaph......s.....p.....ph....t.l.....ss..cY.p..h..t.ph.........................ss..............hcs.......ss...hpluluY+F .........................................................................................................................................................h...hhhhh..hs....ss...shu....................................sts...........................th...h.........................l...s.......s.s....h...uhs............ph...p.............................................st..........t..p.s............................................s....hsh..sl...ths....Y.................p.....h..........s.......s.........h.........hu....l....ps..s.h..s.....................................h........tpt........pptss....................................................p..p.....ph.tt...............................................................ht..hp.............st.h.s........h...sh.......sph............hp......h.Ys.......tsGhu......................................................................................................hs.p.h.p..hp......t..............................................................spsps.p...hu.h....s.h........G.s......G.............lp.a.p.h......................s..............p..........sh......s..l............p.h...p..Y...p....h.....h.ph.........................................................ts.................hp.......ts.........h..hh..G..hsapF........................................................................................................... 0 544 1089 1459 +13348 PF13506 Glyco_transf_21 Glycosyl transferase family 21 Coggill P pcc Jackhmmer:B8IZF6 Domain This is a family of ceramide beta-glucosyltransferases - EC:2.4.1.80. 21.30 21.30 21.30 21.30 21.20 21.20 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.97 0.71 -4.98 54 703 2012-10-03 05:28:31 2010-11-10 14:55:10 1 8 542 0 308 9471 1198 168.40 23 39.16 CHANGED P...p..l..c....lhhs...ss....shuh.N..P...KlsNhhhshpt....Aca-hlllsDuslhlsschLpplssshpp...psGL.Vou.hsh.ssssp...u....hhutl.t.ssh...h.......s.sh.....thhhu....h...t.....uh..shshGtohhhR+psL-phGGhpulsphLAEDhthu.phlpst.Gh+ltlssts.ht.p.s.ss...tp.htt...hhsR.h..RWu+ ..........................................................................................................................lhls..st....hus...N.s......Kls.N.Lh....hhct...........sc.....a......-......h.ll.l.s.Du.D.h.h.l......p...s...c....h...L.p...c..........l.s.......s..s..h.............s................-.............p...........l.....G.....l......V.........o.........s......h.....s..........h........s....t.....s.s..p........s..................h.h.u...t..l...t...tha..h........................s...h........sshhhs.............h...t.........................tt...t.h...s..h...G....t....s......h..........s......h..R......cs......s......L......c..............p......h..............G......G...........h......t.........s......h........t...........p...........t........L..........A.........-...D.a.hh...u.......phl......p.......s........t......G.......h.......+......h..h..h..u..s....s..hh..p.sss............phtp......hhp+....h...pWs........................................................................................................ 0 96 178 246 +13349 PF13507 GATase_5 CobB/CobQ-like glutamine amidotransferase domain Coggill P pcc Jackhmmer:B8IZF6 Domain This family captures members that are not found in Pfam:PF00310, Pfam:PF07685 and Pfam:PF13230. 20.80 20.80 20.80 21.10 20.70 20.70 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.69 0.70 -5.48 255 4672 2012-10-03 00:28:14 2010-11-10 14:55:38 1 18 4440 7 1177 3706 2727 235.40 37 29.43 CHANGED +P+VsIlpt.GsNschEhAtAFcpAGhcsh-.V..p.........h.s...-.........l.........h.........su.........ch.........s.Lcc..hphlshsGGFShGDsh.GuGcu..hApslhht.......plc-phppFh.s.R..DshsLGlCN.GhQhLspLu...Ll............Ps...............s...........cp.PphspNpSt+aEuRhspl..h....h.ss.p...S...Psl..hh.psh...s..shplP..V..uHGE..GRhhh.....spphhp..plhssstlAhpYVDs.......p..GpsTtp......YPhNPNGSstuIsGlsSsDGRlhuhMPHPERshcshpps.hs............t.t........t............ushhclFcNAhpah ...................................................+lAllt..GsNs-h-h..At.Ah....c.c.u.......G.......h.-.s...ht..Vh..........h...s.........l.........h.....ts.........ch...................s.L.ps..h...csllhsGGFSYGDhL...tuGt.h..hu.tphh........stltptlppah.p.c..ssh.sLGlCN.GhQhLs.chu...Ll................PG..................................shh+Npu........t+........F...u..+..hshl..h..............l........s...p......o........s...lhh....psh...p...thplP...VuHGE..G+ahs............ss.p...p.l....t..pLc.spsp....lsh+Ys-t....................s...t.......................hNPNGSspsIsGlss.psGp.VhuhMPHPERshcsl..s................................................hh................................................. 0 380 751 997 +13350 PF13508 Acetyltransf_7 Acetyltransferase (GNAT) domain Bateman A agb Jackhmmer:B5FCA0 Domain This domain catalyses N-acetyltransferase reactions. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.38 0.72 -3.82 163 13825 2012-10-02 22:59:21 2010-11-10 15:58:29 1 107 3917 33 2973 41376 6818 86.50 18 43.91 CHANGED ppp..p.hhsh.pp...........ssc...ll.u...hh.tl.................tt....t........p...ht..........hlt.......tl..slpsphRspGhu...pp...llpt......h.......h...pph.....t...............t.....p...t...h.........h...........l............h............s.....p.......s.......p............t..........h.paYpphGFph ..................................................................................................t....hh.h.h...p..........................ps.p.....l.l...u....hh..pl.........................................................h......t..................p.....hh........................................hlt........tl.....s...V...p.s.php........u..p..G..l..G......pp....Ll.pp..........h..............t...pp.h...........t...............................................................t...........p....s.......l............h...................l...............t................................s........p........p.........p.............................s.............h..sF.Y..p.+.h.GFp............................................................ 0 930 1796 2449 +13351 PF13509 S1_2 S1 domain Bateman A agb Jackhmmer:B5FDY3 Domain The S1 domain was originally identified as a repeat motif in the ribosomal S1 protein. It was later identified in a wide range of proteins. The S1 domain has an OB-fold structure. The S1 domain is involved in nucleic acid binding. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.56 0.72 -4.05 96 2158 2012-10-03 20:18:03 2010-11-10 16:15:05 1 7 1694 1 363 1496 70 60.30 29 26.77 CHANGED lGphssLplh+ps.chGha.L.......s...ssp....s-lLLPpp..tl..s..cs..hplGDplcVFlYhDu-c..+llATsp ....................lGphthLpVlc..........p.......s.c.......h.Gha.L.................p....tst.......pslhLspp........Eh................pp....hplG-plpsFl.Y.h.Dpps.+lh.ATp.............. 0 127 239 313 +13352 PF13510 Fer2_4 2Fe-2S iron-sulfur cluster binding domain Coggill P pcc Jackhmmer:B8J1F8 Domain The 2Fe-2S ferredoxin family have a general core structure consisting of beta(2)-alpha-beta(2) which a beta-grasp type fold. The domain is around one hundred amino acids with four conserved cysteine residues to which the 2Fe-2S cluster is ligated. This cluster appears within sarcosine oxidase proteins. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.66 0.72 -4.17 254 5655 2012-10-02 17:47:23 2010-11-10 16:31:22 1 204 3363 9 1816 5235 3983 79.40 31 11.18 CHANGED sp.lshphDGpp.hpuhtG-TlAuALlusG................l.p.ls.+.oh.t......htpP...Ruhhsshups.ssLVpl....s.........u....ps...s.h.pAshs.lt-GhplpoQss ...........................................lplplDGp..p..l..p..s.....t.......G....s...ol..lpAs.tp.sG..........................l..p..lP..p..hCa....................p..............stl..s.s......s.G.sCRhCl.V..-.l.....c.........................s....t....t...l...t...uCsssl..p..-..GMhlpopp...................................................... 0 630 1190 1513 +13353 PF13511 DUF4124 Domain of unknown function (DUF4124) Bateman A agb Jackhmmer:B5FFC8 Domain This presumed domain is found in a variety of bacterial proteins. It is found associated at the N-terminus associated with other domains such as the SLT domain and glutaredoxin domains in some proteins. The function of this domain is unknown, but it may have an Ig-like fold. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.29 0.72 -3.92 232 1444 2010-11-10 16:35:35 2010-11-10 16:35:35 1 18 588 0 495 1312 147 82.30 18 43.19 CHANGED hhhhhh......ss...ss..s...u..u....plY+WsDps.GpspaoDp....Ps.t....s.s.ps.p..pl..........................phts.sss......................hs.t.....s.....ss.tts.......tpt .......................h.hhhhh.....ss.....ss....s....u...u.........slYcasDss.Gsspao-p...........Pst.....s.s..pt..p..pl.............................................p...........................................................sttttttttttt............................................................................................................................. 0 129 310 423 +13354 PF13512 TPR_18 Tetratricopeptide repeat Coggill P pcc Jackhmmer:B8IZE5 Repeat \N 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.92 0.71 -4.16 19 80 2012-10-11 20:01:04 2010-11-10 16:50:44 1 7 80 0 30 1458 1509 130.10 30 50.38 CHANGED sc-slP-hssuplYspAQcsLpsGsYpsAIcpLEuLDsRYPFGsYupQsQLDLIYAYYKss-hshuhAoIDRFlRLNPsHPNlDYVhYMRGLsshshDcs.h.phh.....phs.t.st..................tAFc...DFppLlp+YPsSpYAsDAppRh ..............................................p.s.splYspupp.tLpc.t.p.ac...pA...hpph.c.t..L.cs+..aP.h.u...au...p...Qsp..L...pL...h.Y..........u....a........a..cs...sc...h.......tAhu......sh-RFl.....+L.pPs.H........s.s.....l.....s.....Y.sh....Y.h....+GLsp..h...t.......s................................tsc+...D..p.sh............................................Aht..phppllppaPsotYu.cup........................................................................................ 0 12 19 26 +13355 PF13513 HEAT_EZ HEAT-like repeat Coggill P pcc Jackhmmer:B8J2H7 Repeat The HEAT repeat family is related to armadillo/beta-catenin-like repeats (see Pfam:PF00514). These EZ repeats are found in subunits of cyanobacterial phycocyanin lyase and other proteins and probably carry out a scaffolding role. 20.00 18.30 20.00 18.30 19.90 18.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.20 0.72 -3.35 261 1731 2012-10-11 20:01:04 2010-11-10 16:59:27 1 168 403 18 1145 5921 533 54.80 25 5.68 CHANGED .plR......ptAhh.uL...s..........htt.t.ph...ht...hhtpllshLh.....hh..t....sss...tl..Rptu..shuLupl .......................................................hR....cuAlh.ul......Gs...............lstsshpt.......hps......hls.pllshLl...........phL.pD.......sps.....tV..Rpsu...shsLup...................................... 0 376 651 956 +13356 PF13514 AAA_27 AAA domain Coggill P pcc Jackhmmer:B8J072 Domain This domain is found in a number of double-strand DNA break proteins. This domain contains a P-loop motif. 29.20 29.20 29.30 29.30 29.00 29.10 hmmbuild -o /dev/null HMM SEED 1111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.09 0.70 -13.83 0.70 -6.88 10 1921 2012-10-05 12:31:09 2010-11-12 16:18:39 1 17 1239 0 319 1500 94 415.80 15 69.93 CHANGED laGsNEAGKSTlRsAlpsLLFGFPtRoshs.FlHs+psL+lGGsLs....tcsGupLcFpRl++sst...oLhsscGc.sls--sLsshLsGhs+phF-ulFulDccsLhpGG+pIl-ApsclGphLFuAuAGlGS..LssVc-pL-cEt-pLaKPp.GppstINsulpphK-lppcl+chpl+scsWccppcsLccuccclppl+ccpcpL-pc+pclERlppltPhlp-c+uhpppLutl.uEslphPscus-Rhtphcschpsspppl-phpc+lpplcschsuIplDc-hLucAssl-uLpppcsphcpupp-lpphpuclsstcc-tsuLttQlG.Pshspsslcuhcsuhss+cplspLsp.......c+psLcptlcsApcpLcEpccclcplcpphsulssh..spsLhtul.ss.thhshtt...thtstcpcltps++cttpulspLG..attsl.t.hthslP.htplpthp+ctpEhtsstppt+cchpcsptpLtpl....tlptpthstssslsoss-ltssRstR-tlWps.................hs..tshs.th.chlppADpLsDphhspsppsupltplRpptEptptRhtthptchtshcpphAthctsWttthtshu.hPh.s.tth.sWltphpthh.t.pth.ptptEhp.hhptttchpttLtt.Lth.u.........t-LsthLptucphlcphc+sutccspL-c+hppscpuhpcAcc+ppcApcpl-sWcccWpphlhphtLsuphosstsls.ul-hhpchppchpcs-chsp.Rlpuhcc-lucFcpclcsLscshssth.s..s...pth+tLpsRLppA+-cupthc+LsEclcphccclspsspslpptptclssLhc....hApssohE-hhssscRu-ptcchccplscLppplsphssulsltuLttchsth-ssplpucl-plspcl-c.hpsphscLsppluctcsplupl-GsosAApltcchppthuplp-tAEcalpluhApplLppsh-RYR-s+psPlLp+AuEhFspLThGcFopLpsDs.-cpP.hLsucRssGpplpltpLSpGTRDQLYLALRlAsLEhhhtpppshPFllDDlFlsFDDsRocAslcsLc-Luc+sQVIaFTHHcHLlshstps.asspspllcL .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h........................................t....................h........hStush-.lhh.hRhu.h.ht..............................h.P....h.lh.DD.hh..D..Rht.hhthh...p........s.....t.....t..............Q.llhho....t..........................th...................................................................................................... 0 119 219 276 +13357 PF13515 FUSC_2 Fusaric acid resistance protein-like Coggill P pcc Jackhmmer:B8IZ01 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.84 0.71 -4.22 286 4566 2012-10-02 19:04:43 2010-11-12 16:27:32 1 34 2561 0 1051 5938 148 130.10 23 22.04 CHANGED l..uh...hl..s.....t.h......h...s.h..........t.......+sh..........Ws.sls....shhlh.........p..s..s..hssshp+shpRhlGTllGs.hluhh..lh....h........hhs...s..s....h..h....hh......h..l....lhhh.....hhhh........h...h..........................h....s..ts......Y.....sh.ts..hh..l..T........hhsl....h.hhsh...............sssht.................hsh....RlhssllGsh....lulls ..........................................hhh..h..hl.s.p.h......hsh............t............+ua..........Wh.h.lo..s..hhl.h......................p..s...shssotpRhhpRllGTllGllluhs..ll........h.................................h.hs.......s..p..........hh....hl.............h.l.....hhhs........shh.h..........h...h...................................h....p....ts......Y.......uh..us.hh..lT.....hhsl........h....h.h.s.h......................ttshp.............................................hhhsRl.l-sllGshlAhh.s................................................. 0 275 612 877 +13358 PF13516 LRR_6 Leucine Rich repeat Bateman A agb Jackhmmer:A5IFW6 Repeat \N 23.00 8.70 23.00 8.70 22.90 8.60 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.75 -7.04 0.75 -7.40 0.75 -3.27 889 30484 2012-10-02 21:32:02 2010-11-12 16:29:19 1 2898 807 67 21001 56590 2857 24.90 27 9.82 CHANGED h.....ps..Lp.....pL.....s.....L....s........p..s.p.....l...ss...pu....h.......p....sl....u.............p ......................................pp..Lp........pL.........s...........L............u...............................p...N.p.........l.....sc......pG....s.......p..tls.............................................................. 0 10309 14782 17695 +13359 PF13517 VCBS Repeat domain in Vibrio, Colwellia, Bradyrhizobium and Shewanella Coggill P pcc Jackhmmer:B8J4T8 Repeat This domain of about 100 residues is found in multiple (up to 35) copies in long proteins from several species of Vibrio, Colwellia, Bradyrhizobium, and Shewanella (hence the name VCBS) and in smaller copy numbers in proteins from several other bacteria. The large protein size and repeat copy numbers, species distribution, and suggested activities of several member proteins suggests a role for this domain in adhesion (TIGR). 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.62 0.72 -3.61 1255 7532 2012-10-05 17:30:43 2010-11-12 16:50:10 1 572 812 12 4031 8611 7152 66.10 25 19.29 CHANGED Dhss..DGthDll..s....s................s....s.....ss...................spl........h..h...t.p....s......s..G....s........hp............h...s.p......h........h..hss..s.................sts.h....s..........s....s....hu..DlssDGp.hDlls .................................................DhssDG.h.Dlh..s..s............................s...s.....ss..................................stl.........a.h...s..p......u................s..G.......s..................as...............................t.h.....s.t........h.........thss.s...............................................stsh....u................s.....s.....hu...Dhss..DGp.hDlh............................................. 0 2314 3279 3827 +13360 PF13518 HTH_28 Helix-turn-helix domain Bateman A agb Jackhmmer:A5IDV5 Domain This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.46 0.72 -3.92 193 5836 2012-10-04 14:01:12 2010-11-15 09:27:30 1 87 2346 0 976 15814 2048 50.90 20 29.02 CHANGED p+hpl...l.ph.hh..p....s..p...shpp.....supphsl...sp.psl.tpWhppap.p.tG.h..s..uLt.s......c..p...p....psp ......................................+hpl....l.ph.hh..p.......u..p.....ohpp................hupcasl...sp.ssl.tpWl+tac.p..tG.....p..uL............................................................ 0 285 617 813 +13361 PF13519 VWA_2 von Willebrand factor type A domain Bateman A agb Jackhmmer:A5II27 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.30 0.71 -4.15 168 10039 2012-10-10 16:07:06 2010-11-15 09:44:16 1 392 2980 5 4071 16696 2611 193.40 16 32.32 CHANGED slllllDhSsSM...........t..sp.....sh..........................................t.......s................Rlst.s.+tth.........tsl...lp..........p...h.........s.ss.....ph......u.l..lsau.........u.......s..............u.................h......h....h............h..................s........h....o...........s....-....t..ssl.....tsh.....l..ss......lssp.h..................................hs..s......s....u..........os....hs...t.ul.....................................tt..Ah....p.hh....................................p..p...t..s.............................tp...s.s.l.lllo..D.......G.....................ts.s..............sp..hh......p...............hh.p...........................................t..h...p..pp..s...hp.....l..hsl.........sl.G..........ss.......ps..s...............................................................t..s.................t..lpp.l..Ap..ts.....u......Gt.......hhph..psss..psls.............ph...hpp.h ...................................................................................................................................................................................................................................................lhlllDs.S.u.SM...................t.sp.................................................................................................................................................................................................................p..........+l..p....t....s....+p.sh...........................pp..h.....lp..........................................................p.......h................s..ss..........pl...........u...l......l..s....Fs................................................................s.......p....................u...............................................................p.........h........h..........h......................................................................................s.........h.........o....................s......s....t....pt.h...........tpt............l...ss.......lp.s...................................................................................s......s....s...................Ts..l.s....t....u..l.......................................................................................................................................pt....A.h.........p..h..l........................................................................................................................................p.....p...p...t................................................................t..tp.......p...h....l....l....ll....T....D........G.............................................................ps.s...........................st..h......p...........................hhp.................................................................................................................................................................t...h.......p.......pp...s.........lp...........l..hs.l..............sh..u.........s.s.....ts........................................................................................................................................................................................t............h.hpp..l....up.......s.......s..............Gt.........................h..h..h............................................................................................................................................................................................................................................................................................................ 0 1621 2750 3486 +13362 PF13520 AA_permease_2 Amino acid permease Bateman A agb Jackhmmer:A5ICF5 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.78 0.70 -12.77 0.70 -5.94 61 23261 2012-10-03 01:44:59 2010-11-15 11:34:15 1 79 4110 20 6607 26829 2040 418.20 16 86.76 CHANGED luhhsshhlsh.....us.h.....hu...ss......h..h.s...s...........hss....sGh.s..hhlhhh.lsshhh...hshthsh.hEl..u..s....hspsGGhhsas......pts....h...........s....phhu.....hhsshhh..hhshhh..sh.ss...h.s....sh.hhph.h..hth...h....t.................ph.t.t................................................................ph..hh.hhlu....lsllhhhhhlshhuh+hu.splp..h.hshhplh.hslhhhllhulhhh....tt....s..hhs.........................tp....s.................hhss.....shs......t.....h..........h...s...........u.h....hh......s..hauhsGa-ss...................sshspE...hp......p..+..s.h.huh......hhuhhhshllhhl..............hsls..hhh.lls..sp.l........s.t..ss..........................h.shhhpth.s...s.p..hhthlls.lhlsl......shhu....slhshhhuss+hltphuc-..........shlP.......chhsph.............s.c..hss........Phtulhhthlls.lhhhl.hh.........................................hh......tsshs................hlhslsshshh...lhhhlh.hhuhl...hh+...hp...p.....ph.......t+.................hh.h............h....h.lshhs.....hlhh..lhh....l.......hhsh...........hs........t.s.........s......stts...h.hhh.........hl.hhs.hh .............................................................................shhshhhlsh....us.h........h.G....sG.......h...h..h.hs.................................h..hts......sGs...s.....s.lh..h......h....l.....l.s..sl.hh...........h...h...h..u.hs...a.....ucl...u.............s...t.......hP.....p..s......G...G..h..a.s..a.s...................pts..................h.......................................G.....p..h.hu.....................ah..s...u..a.h.h........hl.s.h...hh....s.s...s..s............h...s............hh....hs..s.h..h.......st.h.....h......s.........................th....t................................................................................sh....hh....hlh...s........................h.s.l...l..h...l.h......s.....h.l...s...h...h....G....s...c...h...s....up...l......s...sl....h....s.......h...........ht........l.......h.....s..l..h...h.h.l..l..h...u...h...h.h.h.............ph........s....hhs........................................................................................................ht..s....................................hhst..........shs..........s.......h.............................h.....s............................................u..h....sh.........s....ha...u.....a.h........G...h....-....s..h.................................................s...s...h..u...p-..........sc................p......+....s...hPp...u.l........................................l.h..u.h..h.l..s..s..l..l.....Y..h..l..............................................s..s...hs......h..h..s....h....l...s......pp..l...................s.t..s.ss..................................................................h..s.h.hh.p.h.h...t.................h..s.....hh.s....h..lls....h.s.s.h.l...............................uh.hu.........................sh.hs...h...h...h..s.............s...s..+...l.h.h.u..h..u.c..-........................................................u.h.hP...............ph.h...u...+..l...........................................................s...c...pt.s..............................P.h...t.......u....l...l..h....t.......s.l.l...s..h.lhh...h.h..h...........................................................h............tth.s..........................................hl..h....s.....h....s..s....h....s..hh............l...s.a..h..h.....s.....h.h....u...h..l..........h..l.p.......hp........t.........ph.........tp................................h.h...............................................................h....h...h.s...h..hs.................hlhs.....h..h..h....h....................h.h..h.......................................................................................hhhhhhhh............................................................................................................................................... 0 1861 3597 5318 +13363 PF13521 AAA_28 AAA domain Bateman A agb Jackhmmer:A5IAR3 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -11.10 0.71 -4.28 115 1816 2012-10-05 12:31:09 2010-11-15 12:03:12 1 15 1500 1 435 2151 632 163.30 27 52.65 CHANGED +lllsGu.usGKTT....Ll....ptL......u........p.p.........G.hs...hs..sEhuRpllppph.....spst.....thh......h..................-hhths.pthh.ptph.cp.h....p..s.s..........p.s...sphlFhDpuhh-shs.Y..h.chh.tt............................s....hss..p.....lt.p........tsp....ptc.Y...ch.lhlhs.s.hs.......l.....pDst.R..p.p.sh.cc.ttph.tpthtp....thpp....huh.phltl...p.ush.ccR ......................................lsIhGGtSoGKo.T....Ll....sp.L...................A..........p.h...................hs.ss..........ss...........E....h.....u........R.....c..hl.pph........stsp...............t.Lp........hp..................................................Dhht..hu.p........ttph..hs.h.......t.st.................h.u...spl.sFhDs...shl.s....s.p...u...a...s...cth..ts.............................................................c.....t.s..h..........lp.s..........hhp......ch+..a........Dl..Vlllp..s.ss............sa.........ssD.uh.R........s...t...s.......t....-.....cpph..pp....hlhc....hlpc........sh....phlpl...psshppR............................................................................................................................. 0 157 265 373 +13364 PF13522 GATase_6 Glutamine amidotransferase domain Bateman A agb Jackhmmer:A5IAK2 Domain This domain is a class-II glutamine amidotransferase domain found in a variety of enzymes, such as asparagine synthetase and glutamine--fructose-6-phosphate transaminase. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.94 0.71 -4.20 67 6720 2012-10-03 21:14:07 2010-11-15 13:25:09 1 18 4568 0 1845 12173 7462 149.00 32 26.58 CHANGED PDspGh..ah..ss......p.....................ssLuHs.RLull-hs.ttus.QPh...stssp....h...hlsaNG-lYNat....cl+ppLtt.tGhs..hps.poDoEVllt.hh....pphG........pcsl.p..........clsG.hFAhulaDp.pppplhls.RDch......G...hKPLaatht.........ssshhFASE ................................................................thttttth...............pG.....s.........................sGIG.HT...RWA....TH..GtP....s..p..t..N.A..HPH.............sssp.......................l.......slVH.NG.lIENat....................pL+....c....c....L.....t....s....p...Ghp..............F.pS...pTDTE....Vls+..Llt............................pphut................puhppsl..p........................................plcG.....ua.A.......h.....s.....l.....hs..t....p........s.......s..pllsA......p...........s.......sP..LllGhs.........pt...h.......................................................................................................... 0 629 1210 1572 +13365 PF13523 Acetyltransf_8 Acetyltransferase (GNAT) domain Bateman A agb Jackhmmer:A5IFT1 Domain This domain catalyses N-acetyltransferase reactions. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.70 0.71 -4.33 54 3083 2012-10-02 22:59:21 2010-11-15 13:26:02 1 23 2166 17 599 11897 940 153.50 19 66.16 CHANGED lRshc...ppD.hs.hlppWhsps+ls..ha.h.pp.st.sh.pth.cp.hhp.tlt...tssathshlu..........thsu...cPhu.........Yh.....Eh.....Y.spcc........h.....................ts.........p..s....tDpGhH..lLlu.ssc.h..hGtshspshhpulh...calF.t..-..tspRllsEPcscNpphhphhpphG.F.phhtph.c.h...P..cKcAt.Lh ...........................................................................................................................................pD.h...hh.....h.....p..h....h....p...p.....s...c..hh........h.....h.....t..t.....t.............t..................................t.t......h.....p......pl.............tp..p.p..h...t.....s.....h...l.s..............................h.h..c..s..........p..s..h.u.........................ah............ph...........h..h.s.p.ps.............h.....................................................ps..................p.s.....t.s...h...s...h..c.........h.h..h.....s...ssp..h.................t..G..c.Gl.u.pth.l......p...s.....lh..............ch.hh...p.....c.........t.........sp..c.lh.h-s..c..s...s....N....t.t....hh+....h.h.c..+.tG....F....ph..hup..h......h...s..t......h...................................... 0 136 318 477 +13366 PF13524 Glyco_trans_1_2 Glycosyl transferases group 1 Coggill P pcc Jackhmmer:B8J2T1 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.83 0.72 -3.71 238 1210 2012-10-03 16:42:30 2010-11-15 13:26:32 1 65 678 0 497 17241 5704 98.20 19 21.71 CHANGED l.sLN............hspp..............................ss.ss.....h.....RhFEshAsGs.hllos.............pslcp..h..FpsGp..................-..llhhp...shp-hhctl.cthh..p.ss.p.t.tp.pl.uptutpcl.hs.cH....Thpp...R..spplhs ........................................................................................................ts...s.....h.......Rh.FEsh.A.sGs..h..llo.s.t...........tsh..pp.....h...a..p...s...s.p..........................................c....ll..h.hp............shp....-.......l....h....c....tl....p....h....h..h.....p...s....s....p....p....t.....p..pl....u.....p.pu.h...p.p...l...hp...c.H..........o...a...p...p....R..hpph..t.................................................... 0 190 360 424 +13367 PF13525 YfiO Outer membrane lipoprotein Coggill P pcc Jackhmmer:B8J028 Domain This outer membrane lipoprotein carries a TPR-like region towards its N-terminal. YfiO in E.coli is one of three outer membrane lipoproteins that form a multicomponent YaeT complex in the outer membrane of Gram-negative bacteria that is involved in the targeting and folding of beta-barrel outer membrane proteins. YfiO is the only essential lipoprotein component of the complex. It is required for the proper assembly and/or targeting of outer membrane proteins to the outer membrane. Through its interactions with NlpB it maintains the functional integrity of the YaeT complex. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.31 0.71 -4.85 30 2784 2012-10-11 20:01:04 2010-11-15 13:53:42 1 103 2215 4 696 2342 2289 181.20 26 61.86 CHANGED s-hsspplYppAppuLppGsYtsAlcpLEsL-sR..YPFGsYupQuQL-LIYAaYKssDhstAhAsh-RFlRLpPpHPslDYshYMRGL.......oshpt..........tcsh..........c...RDs......sth.....+pAFp-FppLlp+aPsSpYAsDAppRhhaL+spLAcaELplAcaYh+RpAalAAlNRuphllcsYPsT.ustc.ALslhhpuYcpLuhsp.ttcpp.phL .........................................................sspplY.spA...pp.t.l.p..s.G.s.a...ppAh.pthc.sl..p....sc.....a.Ph.u.s.aup.p.....u........p.l.......L......sa...A...a........Y..c...p........s...-.......hstA......hssh-+....F...l...c.ha..Ps..p........s...p......h.c..Ys...hY..hpGl............spht........................................spsh................................p..RD.....pts.....ptAhpshppll....ppYP...s..S..pY.s.s-App+.hh.hl.ps.....tLuth-...h..t..hu.ca.Yh.............p.p.............t...........t.....a.....h...........A.shtRhp...hhpp..a.ts.....h....c.ul.hh..sa..ht..t..........hh................................................... 0 207 426 568 +13368 PF13526 DUF4125 Protein of unknown function (DUF4125) Coggill P pcc Jackhmmer:B8J0H5 Family \N 25.00 25.00 60.40 59.80 21.40 20.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.18 0.71 -4.77 22 120 2010-11-15 16:02:55 2010-11-15 16:02:55 1 8 113 0 25 111 3 186.40 37 80.57 CHANGED cppllcpIlchEWphFppspstuG+AsCQss.p.......sFclMRtSQahsWspphLpSYLpDLppActtGRNLlsEKYARMMphss.P....c..............s...........hpsh..ls...........cIsp...........h...........pht...........Wp+-htc+YPplspt..ts.hps...pD.......s...p.....sS....FETYLRuELtTYS.+TlpLYhphl..pchtppppNLscpshcphVphhGYcoL-cAEc ...............................t.cpllcpIlctEWs.FppspN.GGRAsCQssh........s.FplMRtSQahsaspplLpSYhpDLppActtGRNLlsEKYupMMp.os.P....c.a...........sts.hpshIp...........cIsp...........hplt...........Wtc-htc.......+...YPplupthRsltosEDs..tp...TShETYLRGELtTYSpcTlpLYtphl..tch..t..pp..phNLstphhtphsph.Gapsl--sEt...................... 0 14 19 22 +13369 PF13527 Acetyltransf_9 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:B8J1N3 Domain This domain catalyses N-acetyltransferase reactions. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.26 0.71 -4.28 105 3978 2012-10-02 22:59:21 2010-11-15 17:08:39 1 19 2411 47 796 9816 1092 126.00 21 48.77 CHANGED clRtls.ps-..hcphhpLhphsFphspsth............p.h.phhpthhc......p...s...p.shshh-.....ssc....lluphthhshplsl..G.phhth.uulssVuohPpaRp....+Gl....hppLhpptLpphcpp.utsluhLhP...hp.h...shY.c+aG.aphs ....................................................................................................t..t-....ht.th....t....p..l....h...p......t....u......F.tsstps.......................................h..t.t.h.p....p..t..h.t..............................s........h...s.....h..s...s..h.c...............psp.............l..l..u..p..l...t.............h...............s..........h.......................h............p...........h.....p.......G.....p.....p......h...........p..........h.....h........s..........l..u...s.....l....u...V..t..Pc..a..+t........pGl....up.pL...l...p...ps....l.c...p....h....p.......c....t.....G.......h......s........h..s......h...l......h...u..........s...s.......s..a..Y..t+aG.ap........................................... 0 278 537 676 +13370 PF13528 Glyco_trans_1_3 Glycosyl transferase family 1 Coggill P pcc Jackhmmmer:B8J061 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.72 0.70 -5.24 39 1194 2012-10-03 16:42:30 2010-11-15 17:29:34 1 27 910 0 454 4822 1304 259.50 16 66.89 CHANGED MKILYGVpG.o...GsGHlsRuRslsctL..+..p..H..-VchlhSG.Rs.t.chh...p..c.F..s.........hpph.........pGlohss.ppu...+lsh.........hcThhps.........thscthtphhp.hl..-hpp..hDLlIoD.FEPl..........o.u..hAA.....+hpslPsluluHQhAh...p.asls.ht....hss...hh...t+hhlpt...auPuphplulpaa........cap.ps.l.hPPllcpplhstp..s.t..p..t..st.lLVYLsh-shc.....pls-hLpt.hs....phpFhlY.Ghs.....p....-tp.......puslp......a+shocpsFhpDLtpCsuVlssuGFpL.uEALpLGK.lLhhPlc.G..QhEQp.NAhhLcpLGhGhs..h.sL-sshltcaLpph.......s ............................................................................................................................................................................................................................................................hh.h.h.h.p.s.....G...G.H....hsRsh.s.ls.p.t..L....p..t............h.........p.l..h...h..h.....s...s....t............phh......t....t...h..........................hh..th...............sh.............tt.t........p.hp................................t.t.h..h..............................t.........h..t.....t..........h.p..hl....pt.t......D.l..l.l.sD...h.....h.................h...h....hhs..............p..h........s.....h.....s....h..l.....h....l..s..p..........h........................................................................................................h...................................................h.....................................................................................h...h....................................................................................................................................l...h....h........hs.......t.t......................th.h.t.h....h...t....h..........................h....h.....h..h....t..............................................tplp......................hh...h....p..........t.h....p....h.....h.....t....t....s....p....h.........hlst.uGhp.....h...Ehhhhsh..h.l.h.h.P..h...........t....-...Q.....p.u.....h.t.th..uh..h............................................................................................................................................................................................................. 0 155 304 384 +13371 PF13529 Peptidase_C39_2 Peptidase_C39 like family Coggill P pcc pdb_3erv Domain \N 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.15 0.71 -3.83 127 2151 2012-10-10 12:56:15 2010-11-15 17:30:24 1 145 1292 1 439 2081 233 142.70 20 40.19 CHANGED lsVPhhtQh......s..p................h.sG..CtsToluMlL.....sah....Gh...s...hstsplApp..h.t..pss................t..tsahss.h.....ts....sh.ushs.tsl.tp.hup.pas........pshshsspsh......splh.ptlps..GpPVlshss..ht..................phsh.....ssHhlllsGYcp......st.......lhlsDPh .............................................................................................................l.hhtQ........t....................................h.ps.CtssShsMlL..............pah.........Gh.........p............ls.t...t.p..l..sp.p..h.h..pts.................................................................hh.tp..............................t....ush.....ts...h...sp.hsp..pas....................................pshs..h..s.st.sh...................ps..l.t..ph.l.s.p.....G.p...PVlh.hh..s......t............................................t.h.st.....ssHhhl.lhG...Y-p................pt...........hhltDP............................................ 0 190 306 388 +13372 PF13530 SCP2_2 Sterol carrier protein domain Coggill P pcc Jackhmmer:B8J1N3 Domain \N 25.00 25.00 25.20 25.30 24.80 24.70 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.20 0.70 -4.96 41 1071 2010-11-15 17:31:58 2010-11-15 17:31:58 1 4 843 39 229 841 26 209.70 22 53.40 CHANGED spsGslsRsptWWcph.h.......t.........tttsc..h...p...hslahstsspspGYlhYchps..........sshpVp-hhussstAtpuLWpFlpuhpshhpplp.h.pp.ss--.sl.h..hLsD.P.cs.sp...........p..l..p..sahhsRllDltpsLpuh.sass...........stslsLpVp.Dshhsh.....NsGpapLph.ssGsssspps.........s...ps.s-lplslssLuslhLGspssspLsthGcl.c.....tps.sAlppl-p.lhss.c.p.Pah.- .............................................................................................t..tGslhRsthhW...p.h.h.......t.........ttt.pp..h.....p.hslhhstst.cs...pG...Y..hh..Ypltp.............pphplpEh.hhhst.cA.ppuLap.Flsu..h.shlp...plp.h.ph.....sc....sL.h..hlp-..s.ch...pp............................p..h......p...shhMsR......IlD.VpthLp...th..sapt......................ptshslclp.Ds.hh.h.....NsGhaplsh...ts.......u......p.......sp.lsct.............................s........ts..sslplslpsLsslhhGhtp....sppLstht+l..p.......ssp..ptlpt.lcp..hhst.t...shh............................................................... 0 85 172 207 +13373 PF13531 SBP_bac_11 Bacterial extracellular solute-binding protein Coggill P pcc Jackhmmer:B8IY91 Family This family includes bacterial extracellular solute-binding proteins. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.52 0.70 -4.73 225 7719 2012-10-03 15:33:52 2010-11-16 11:58:06 1 25 3267 36 1791 14857 3154 235.70 22 78.01 CHANGED lpl..hs..uuuh..ptshp.c.l.sstF.p.p......c....su........h.......p..l..plp....hu..so.....ut..lhp.....pl...........ppG..t..............s.....Dlhh.s..........us...phhpp.L.hpp......Ghst..............s.ttsh..sh..s.p....lslhs.tps.s................s.....................h....s.......h.p........s......h.p.s..........Ltp....s.............................sl.......+.....lu..husP....p.....s..sshGt............huh.phh..p...............ph..........................G..........................lhpplp.sp.hl...h..ssss....................pp.shph..........lt.p.G..p.....s.D...h.ul.s....h....to...ts.......................h...........ss..t.lph........l..l.Pssh...t................................h..thshulh..p....pu....tp........p...sA.psF....hpaL.tS.sp.upsl.h.pchGap .........................................................................................................................................................................................................................................................l.s.sssh...pp...shp...p.l.s.pta.p.p.......c...ss...................lp..l....p.h.s......a.u....u.S......up......hs.p.......pl..............................tpG..t............A.................D.l.hh..s..................................As...t....p.t..h...s.t...l....t.cc........shlt......................................ss.sps.h......ht....s..s...........l.V......l...l...s..pcss................................s....................................................p......s.........l.p.........s..........h..sD.............Lhc....s.............................................sl.............c......ls......h.s...s...P......c............s....s..s.s.Gp...................hu.h..t.h.h.p...................p.h.........................................s.........................................................h..h........p..p..l.t...p.p....ls..........h.....s.s.ss......................................+s..shsh..................lp...p...G....p............u.D......l....h...l....s............a.........p.o....-u....................................h...............tss.....ph.c.l.................V.....h....s..p.....sh.......h.....................................................................sl.....p..h...s.....s....u....l.l...c..........ps.............tpp........p.tA...csa....lc.a.L.h.S.sp.u.p.p.l...h..tchta..................................................................................................................................................................................................................................................................................... 0 472 1088 1474 +13374 PF13532 2OG-FeII_Oxy_2 2OG-Fe(II) oxygenase superfamily Bateman A agb Jackhmmer:A1B8L5 Domain \N 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.47 0.71 -4.48 71 3255 2012-10-10 13:59:34 2010-11-16 16:25:36 1 66 1836 23 1480 2897 465 187.80 22 55.86 CHANGED G.hhhl.sal...s...t...p........p...hhphhpplh....p.pss....h.pp..th...t...Gpt.h...sl..........t.h....h....h....tthuW.........hs..........ct..t..sYcY....sstp......shsspsasshPthlhplhpchst.tt...................hs.s...hsP...sssLlNhYs.s..........uu.phGhHpDc-.Eh.....shssPllSlSLGssshF.........................................................ph....t..........................stp..css.....................spplhLpsGDlllhuGs..u..Rhta.Hulspltp.s.............................................t..................................ht.ssRlNLThR ..........................................................................................................h......................................ht.h.......t.............h..p....h...............h...................................................................................h.th........................hs..........................t....tY..pa......ss..h.................................p...st...s.h.s..s..h.P.t.....h.t.p.lh..pphtt.............................................................s..h.p.....s.ss..LlNhY.p..s....................Gs..pluhH..........p.Dpc..ch............sh.p.ssIs.S.lS.L......Gss.shF................................................................................................................ph......t........................................................................thp...pss....................................................hhplhLp.p.....Gsll.lh.sGp..s....phta..Hul..hpt.s...........................................................................................................................................................htstRlsLTFR.............................................................................. 0 460 838 1193 +13375 PF13533 Biotin_lipoyl_2 Biotin-lipoyl like Coggill P pcc Jackhmmer:B8IZB1 Domain \N 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.73 0.72 -8.24 0.72 -4.30 99 2242 2012-10-02 20:27:15 2010-11-16 16:33:40 1 39 1526 0 592 25272 6066 54.60 29 15.15 CHANGED ps..........lsl.s.spl..uG.tlspl......hVp.-sp.hV+cG-lLhpl-s.sphp........hthpp...............sps..p.......l..t ...............................p.....tl.s.ssl......uG..hl.spl....................................Vc..-..s.Q..hV+KGpl.L.h.p.l.Dp...schp.....................ttlpp............................sptt................................................................................... 0 178 334 468 +13376 PF13534 Fer4_17 4Fe-4S dicluster domain Coggill P pcc Jackhmmer:B8J3I4 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.76 0.72 -3.62 126 4064 2012-10-03 08:56:43 2010-11-16 16:34:27 1 53 2739 54 1193 12425 4526 71.60 29 15.95 CHANGED tChpC..s..h..Css....................Cs......h..............hhh...........hs.t.......t..........sp......p..hhpph.....th..............s....hh.p.p.......h..t.t........p......t.h...ppCstCGh..Cp..htCPt..sls .............................................pClhC.u..t........Css..........s.............CP.....o....................................ahh.....................ss.c..........c...................................ss.......s..l.h.t.t.h...phh.............................................ts..ph.p..t...........h..pp..t.................h........pl....pC...tsshs.........Cs..psCPpGls................................. 0 417 815 1025 +13377 PF13535 ATP-grasp_4 ATP-grasp domain Coggill P pcc Jackhmmer:B8J4T0 Domain This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.16 0.71 -4.55 109 4886 2012-10-10 13:17:03 2010-11-17 14:52:06 1 70 2718 6 1546 35532 13206 172.70 17 35.99 CHANGED th+.cKhth+phhp.ttGls.sst.......thhlpshsshpthhpth........h....PhllKPtpu.su.....S...hslhh...lp...sts-lpsh..hpphttph.................tthllEpals............G...s.asl-uhhh....cGch.hhhsstchhhs.s.ph.....h.hsth....tt.......tthpphspph..lpsh.uhp..pGshHhEhhhssc.G....hhhlElss.Rs.uGsth .......................................................................................h..psKhhhp.c....hhp...p....t..G..l.s...sst.......................................t.t..h...h......s......s......h........p.......p.....h.......t.....p.....h......h...p.p.hs.............................h...........P...l.l.lK.......P......t....s......u...su...................u...........tu.l...hh.................lp...............s.t.p..-....l...p...p.h.....hp..p.hh.tttt........................................................................thl..lE.c.a.ls..............................................G.......pp...h.......s....h........p....s......h..........s...........sG.........c..........h.........h........h........h........s......h.....t.......p..........h........h....h....t.............................h........................h.s................................................ttl.h..p.h.s....t....t.h.........h.p...t...h....s.h....................u...h...h...p...h...-..h..h.....h.........s........t.......p....s.....................h.h.h..Ehs..R....t............................................................................................................................ 0 543 1039 1334 +13378 PF13536 EmrE Multidrug resistance efflux transporter Coggill P pcc Jackhmmer:B8J0H9 Family This is a membrane protein family acting as a multidrug resistance efflux transporter. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.59 0.71 -3.88 52 1585 2012-10-02 19:55:49 2010-11-17 16:30:08 1 10 1267 0 447 11223 5450 114.20 18 37.49 CHANGED AulR...ah..hhhhhhhlllh...h...+t..p...lt..........p....h.hp..th+p...pshh......h....hlhhuhlGhslh...hshhshusph.....u......s.u.hls.s...h.ht..h..sslhsslluhhhh....p.....p...chst+............tllhs.hllhsGl..hllthpp.h.tu...hs ..................................................................h..hp.h.h...h...h..h.h..h.h..h..h..h........h.....pt......p.....h.t..............................t....l....hp......t..h.pp.......p..hh....................h......h..l..h..h..u..h..l.s.h.........ht.....hhhh..h.hA.hth....u...........s..u...hlu...sh.ht..l..ssl.h.ssLluhhhh.............+.........c............+.ls.h.t..................................................ph..l.u.s.hl.hh.h..Gl.hh.hth.......tt.................................. 0 132 261 374 +13379 PF13537 GATase_7 Glutamine amidotransferase domain Coggill P pcc Jackhmmer:Q2LTR9 Domain This domain is a class-II glutamine amidotransferase domain found in a variety of enzymes such as asparagine synthetase and glutamine-fructose-6-phosphate transaminase. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.60 0.71 -4.39 568 6228 2012-10-03 21:14:07 2010-11-17 16:30:36 1 28 3641 10 1948 11436 7426 122.40 33 21.85 CHANGED HpRLul..........ssGpQPhh........t.ssp...hs..llaNGEIYNa....p-LRp.-Lt..s.t..G.....a...p..F..p.o.p.SDTEV.lhthhtt.................aGpssl.......p+LsGMFAFAla..D....p..c..pp....pLhlARDRhGlKPLYYsh........p.......ut....sl....hFuSElKALls .....................................................................tRLul....ss..tt..u.tQPhh....t...............t.st..p..........hsls.......aNGE.......lY.........Nh..........ppLR...p...cLt.....p..p......G.................h......t...F......p.....o......s......S.....D......o.EV....l.l.thhpc...............................................ts....t..csl....................cc..l.........p..G..h..FA.F.s..l.h.D...........p.c.pt......tl.h...h.uR.D.hGl.+.PLahup..................pp........ss.....sh......hhuSEhpAL.................................................................... 0 640 1243 1641 +13380 PF13538 UvrD_C_2 UvrD-like helicase C-terminal domain Coggill P pcc Jackhmmer:B8J482 Domain This domain is found at the C-terminus of a wide variety of helicase enzymes. This domain has a AAA-like structural fold. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.48 0.72 -3.89 132 6202 2012-10-05 12:31:09 2010-11-17 16:43:43 1 104 3952 12 1430 12988 3540 110.90 23 15.99 CHANGED hths-thlhhhsshpp....................tlhsspltph...tpht.........pttttlpt............................................hsasholH+upGuEassVhlstsstt...ht..........htpch....LYTAlTRApptlhll ..........................................................................................................................h...uc.lh.hhNs.th...................................slhNG..slGhh..h.t.h......t...................................t.t..h.ht...................................................t......p...........................hphuaAhTl.HKuQG.SEFst..Vl.l...hh..ssth..........................................................hhp+pL.........lY...TAlTRA+cpLhl............................. 0 456 937 1217 +13381 PF13539 Peptidase_M15_4 D-alanyl-D-alanine carboxypeptidase Coggill P pcc Jackhmmer:B8J0M5 Family This family resembles VanY, Pfam:PF02557, which is part of the peptidase M15 family. 25.00 25.00 25.10 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.91 0.72 -3.53 215 1064 2012-10-02 01:02:30 2010-11-18 11:18:06 1 38 858 0 257 963 223 71.80 28 28.71 CHANGED su.sst..hSpHs..........aGhAl......D..ls........shh.hstp......................................................................thstspp..ls..p.hhtp.........hG....tWGGc.W...............h..h..Dh.HFph ........................................................tts....tp..pStHh..........h..GhAlDls.......t..s.hlt.h.ssp...............................t..............................hphst.h.pp..hs..p..hhpp......hG..h..p....WGGc.W..............................p.....sh...hDt.HFph.................................. 0 109 185 224 +13382 PF13540 RCC1_2 Regulator of chromosome condensation (RCC1) repeat Coggill P pcc Jackhmmer:B8J0U2 Repeat \N 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.26 0.72 -4.24 320 3569 2012-10-05 17:30:43 2010-11-18 13:23:35 1 413 646 32 2155 12729 3878 29.90 34 6.54 CHANGED l..s..p....lu...s..G.t.tH.....shu.Lp.s....s.Gs.......lhsaG..s..N..s..tGQ .......................hht.lu...s.....G..t..tH....................ohu.Lp.s.......s.Gp..................lhsaG.....p...N....p...hGQ........... 0 959 1559 1984 +13383 PF13541 ChlI Subunit ChlI of Mg-chelatase Coggill P pcc Jackhmmer:Q2LRR9 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.31 0.71 -4.59 173 7224 2012-10-03 01:04:38 2010-11-18 16:46:55 1 17 4260 0 1657 8626 2674 111.30 31 23.71 CHANGED VEVcl..u...sG.L...Ps...aslVGLP-sAV.KES+...-RV+oAlpN......oGacaPsp+ITlNLAPADL+K-GutaDLPIAlG..IL......u..u.....p...t.p...h..s..t..h...........p..p...hh...h..lGELuL-GpLRslpGsLPhsl....tA.p.ctu.h.+p.lllPtpN ........................................................................................................................................................t................sGh...t.......h.......s.......s.....p.....c.....l...h.............lN......l..s...........s.u....s.......l...+.hs.....tsuhDLulAlAll.......ou.....h..p..p....h..s..hs....................p..p..hhhlGEluLsGclR.....sVsu.s.pplt....EA.t.....+....h..G....a.c.p...hllPptN........................ 0 556 1110 1418 +13384 PF13542 HTH_Tnp_ISL3 Helix-turn-helix domain of transposase family ISL3 Coggill P pcc Jackhmmer:Q2LSW8 Domain \N 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.76 0.72 -8.06 0.72 -4.77 134 3356 2012-10-04 14:01:12 2010-11-18 17:14:59 1 11 862 0 381 2084 96 51.40 41 17.15 CHANGED tpss.hlpctsphTpthcp...hlhphhtpp...ohpslActhsluhpTVpclhpchs ..................A-Ts.lV+KNpQIschlsQ...KIAQpLlE+..hSMT-IA+pLulSTSTVhRhLsca.p............... 0 89 236 288 +13385 PF13543 KSR1-SAM SAM like domain present in kinase suppressor RAS 1 Page R kellrott NMR Structure Family \N 27.00 27.00 28.00 27.60 26.20 25.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.65 0.71 -4.35 10 142 2012-10-02 20:42:54 2010-11-20 00:12:54 1 6 79 0 70 133 0 111.40 45 15.00 CHANGED hshlQcMIDISus+LcGLRTQCAs.Ss-LTQQEIRsLEuKLV+aFSc.LlsKp+lsEc.s...AstL.sY.....PcLpQWLcVVGLpscolpulhsplpTL-shLcMs-cEl+plLsc...spsp.EEEp+RLppAhpNLR .................lQphI-lShspLpG.LRTpCuh.....S...ssLTQpEIRsLEuKLV+YhucQL.sKh+.ls.p.....sstL.sa............PpLppWLclVslp.-slptl...........s.p...h..oL-sLLchs-tcl+phlpc.....husp..pEEstRLstALpsLR........................... 0 18 24 45 +13386 PF13544 N_methyl_2 Type IV pilin N-term methylation site GFxxxE Coggill P pcc Jackhmmer:Q2LVK2 Motif This short sequence motif appears a the N-terminus of type IV prokaryotic filamentous adhesins or pilins. The N-terminal residue, which is methylated, is hydrophobic (generally a phenylalanine or a methionine), and this leader peptide is hydrophilic. The fifth residue of the mature sequence is a glutamate which seems to be required for the methylation step. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.49 0.73 -7.55 0.73 -4.25 777 9430 2012-10-03 10:38:27 2010-11-22 10:00:16 1 25 2209 25 2394 7502 2666 27.30 43 14.94 CHANGED M...............h.ppppGFTLlElllulslhull ....................h.................ppppGFTLlElhlVlsIlulL......... 0 899 1563 2033 +13387 PF13545 HTH_Crp_2 Crp-like helix-turn-helix domain Bateman A agb Jackhmmer:A1B3V6 Domain This family represents a crp-like helix-turn-helix domain that is likely to bind DNA. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.81 0.72 -9.15 0.72 -4.19 94 8343 2012-10-04 14:01:12 2010-11-22 11:39:05 1 18 3028 101 2466 7943 935 72.00 22 30.98 CHANGED pR.lApaLlthtschst....................hpls.lopptlAphlGl.pR.ps.lopslppLpppGlIp......h.....p+.s.plplhDhptLpph .................................................RlsphL..l.t.h..s.pp.hst........................................t.......hpls...loppcl...Aph...lGs.o+.po.lsRhLpchpcc.G.lI.p.......h................pp...p.pl.tl.hchptLtt.h...................... 0 769 1663 2072 +13388 PF13546 DDE_5 DDE superfamily endonuclease Bateman A agb Jackhmmer:C0W963 Domain This family of proteins are related to Pfam:PF00665 and are probably endonucleases of the DDE superfamily. Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.69 0.70 -5.54 14 1337 2012-10-03 01:22:09 2010-11-22 13:29:01 1 10 331 0 429 1483 75 212.80 20 63.11 CHANGED sLpphhspassh....Fspt..cpppthhthltGhlushpR+ol.tluh..hhutpssp....uhpchlspsca-tsplpttltp.shsph....sssptllslDsTshsKp.....Gp+osuVtRpasGshG..Ktpss.sslalh.sspth.pshlstplalP.ptWhps......t.+pphsslsss.hapsK.plAtthlcphhtsu.h.hth..lssDutYu.tcttFlptLpptt.hthlsplpsspshahtsst.ts.......pGRPthhs.h.....hslpssplh....phhspsapphsh+ttsKG. ..................................................................................................h....................h.t...........t..t.h..hh....s.....hh......tp.+s.h..hut......h....ht..t.t.t.....shpph.lspsp...as.t.t.lht...tlht.h..h...h.............ttttt.h....hhlD-osh...Kp......G..h..po.s....sl.s.+...p..a..ssph.G......+.......h....t....p.s.....hs....l....h....hs.......st........t....p...h.l..shp..l.h.h.P...t..thhtt...........................cp.pts...th....p...p.....h..h...t....s..K......t.....lsh.t...lpp.h..ht......t..h..t.h...lluD..uhYu....pt..th...h...t...h.pp..h...s...h.....hhhtlp.psphh..h............................................................................................................t.............................................................................................................. 0 104 270 331 +13389 PF13547 GTA_TIM GTA TIM-barrel-like domain Bateman A agb Jackhmmer:A1B6L4 Domain This domain is found in the gene transfer agent protein. An unusual system of genetic exchange exists in the purple nonsulfur bacterium Rhodobacter capsulatus. DNA transmission is mediated by a small bacteriophage-like particle called the gene transfer agent (GTA) that transfers random 4.5-kb segments of the producing cell's genome to recipient cells, where allelic replacement occurs [1]. The genes involved in this process appear to be found widely in bacteria [2]. According to the SUPERFAMILY database this domain has a TIM barrel fold. 27.00 27.00 27.00 27.10 25.80 26.90 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -12.00 0.70 -5.43 66 218 2010-11-22 15:16:03 2010-11-22 15:16:03 1 5 195 0 83 214 30 265.80 48 25.22 CHANGED WuYRRFILHYAtLst...u.....AGG.V-uFlIGSEh+GLTplR............suss..uFPAVstLtsLAu-VRulL..........Gsss+loYAADWSEYaGapss-GsG-haFHLDPLWAcssIDFlGIDsYhPLoDWR-GpsHhDAt.......................thsulaDhsYLpuNlpGGEGYDWaYsos.........tsRsAQhRoPITDushuc.......PWlaRhKDl+sWWsNsH...a...-RsGGVctusPTuWlPpSKPIWFTEhGCsAVDKGsNQPNlFlDPKSSEStlPhaS..........sGtRDDhhQppaLcAhlsaWs......s......sspNPsSslYG....GtMlD.schalWsWDARPaPtFP ..........uYRRFILHhApLsh...t.......AGG..V.DAFlIGSEh+GLTplR............ssts..saPhVstLpsLAu-lRulL..........G.sss+loYAADWSEYauapstsGsG-hhFpLDPLWAcssIDhIGIDsYhPLoDWRDustp.ss........................thts.aDh.shLptsltuGEGa.DWYYsos................tsRtAphRsPITDGhhuc.......PWlaRhKDl+sWWsN.Ha......sRhsGlc.us...........sTuWlPpSKPlWFTEhGCPAVDKGsNQPNlF.DPKSSEsthPhaS..........sGtRsDhhQcpaLcAhhpaWp......s...pNPhSs.lYG....G.Ml-.pchalWsWDARPaPtFP............................................. 0 18 58 68 +13390 PF13548 DUF4126 Domain of unknown function (DUF4126) Coggill P pcc Jackhmmer:Q2LV71 Domain \N 27.00 27.00 27.30 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -10.88 0.71 -4.84 52 380 2010-11-22 16:08:38 2010-11-22 16:08:38 1 3 355 0 157 366 68 174.00 31 86.32 CHANGED slhLuhGLuhAuGlplYlslLhlGl.hut.hGh..lsLPss...h...phLssshslsssulhhllEhhADKIPsVDslhDslpThlRhsAGuhL.sAu..sh....uc...hsP..sh..........p..hshAslhGGuhAussHus+uusRsslNsostsluNslsSssEDssslsh.hlAlhhPllhlll...ll.......lh...l...lhsh..hhlh+lh ......s.lhluhGLuhAuGlRl.a.lslLhhGl.hut..hGh.........l.p.LPss...h........p.hLssshVlsshulhslsEhhADKlPhlDohhDslpThlRhsAGAlluAu...sh.......up.....hsshh.............t..h.hAslsGGsh.Asss+hs+u.usRshlssostshuNh..lsShsEDshsluhhhlAhhhPllhhll.hl.......hh....l.......hhsh.hhh.p..h....................................................... 0 44 105 139 +13391 PF13549 ATP-grasp_5 ATP-grasp domain Coggill P pcc Jackhmmer:Q2LV27 Domain This family includes a diverse set of enzymes that possess ATP-dependent carboxylate-amine ligase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 222 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.14 0.70 -5.13 220 2119 2012-10-10 13:17:03 2010-11-22 16:09:47 1 27 1555 1 765 4111 1934 216.30 36 28.42 CHANGED pAlsc..u..Rsh.LsphEuptlLsAYGIsss.tohl...Ap.ospEAsthAppl...G.aP.VslKlhS.....Pc...Is.........H.........KoDlG...GVtLsLp.ssppVcpAhpplhppl..ppth.........P.s..........................Ap.l.pGlh..VQpM...sp......tupElllGsspDPhFGPllhFGh.GGshVElhpDt.........uhsLP.P..........Lsh.slAcc.h.l.pps+st.plLp.G..hRs..p.P...sD.........hsALtphLl+l....SpLls.....D.h.Pc........ItElDINPLls....ss....p........G..shAlDARltl ..................................................tt.tt...Ls..ptEsp.s.lLpuYG...lssh.sshl............As.ss.sEAlp..hAc.pl..........G..a.P.....V.s.lKlhS..........Pc.......Is.........H.......................KS.-...l.s.....GV.hL.......s..Lp...sspc.Vp...p.Ah.ps.l...h...s....p....s....phh.h........P.p.......................................Ap....l..c....G.lL....V.Q.pM.....ss.........sutE..Lhlu..l..p..c.D..P...s...F..G..P..l..l..h.h..G.....GGl.h.s-.h.h..c.Ds............s.h.t.L.s.P............Lsh....s...A.c...t....h...l....pp..l..+..st.......c.....l...l..t..u.....h+.sp...s....lD................l.s.u.LsplL.l.p.l....Sp.Lls.......-.t..P-...........I..p..c..lDINPLl..s..ss.......s.........t....hsAlDspl.......................................................................................................... 0 255 518 663 +13392 PF13550 Phage-tail_3 Putative phage tail protein Bateman A agb Jackhmmer:A1B6L4 Domain This putative domain is found in the large gene transfer agent protein. These produce defective phage like particles. This domain is similar to other phage-tail protein families. 21.80 21.80 21.80 22.00 21.40 21.40 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.60 0.71 -4.47 106 1924 2012-10-01 22:58:23 2010-11-22 16:42:01 1 32 968 0 203 1978 371 159.10 38 15.89 CHANGED sp..pss+sslpsLhpsashsshpps.uplph.htc..s...................s.ps.s..hsl.stsslstssp..t.........t......hphs....Rssps....-hssplplpah-ss...ssYpsss...sp.utp......s...s....sts.....tthsshphshshs.....pspApplupphLtpstsspcs.hpasls.pt.ht......lp..PGDllp.ls...s.................s...p....st..thRlspl...c....p ....................................................................pppcAa-VLsDhsushR.shs..lWsG.p...s...Lsh..lpD..t...................s.-h...V...asa..spusVVhs-p.t...............FcYo...ho.uh.+....DR.asuVcVsa.....hDPs.....Nuapsus.......EhVcD.........s...p......ultR..............hGcNhtch-A.aG.CTS........RGQA+....RsGhWllcTphhEspT.VsFolGhcG.lp......hs..P.GDlIc.lsDst..............huG..h....thuGRlhul..s..................................... 1 37 114 158 +13393 PF13551 HTH_29 Winged helix-turn helix Coggill P pcc Jackhmmer:Q2LS51 Domain This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.55 0.71 -3.92 85 3523 2012-10-04 14:01:12 2010-11-22 16:50:17 1 49 1390 0 1218 5925 803 103.40 18 36.88 CHANGED RtpslhhhspGh......tssphuchhul.sppslh+hhcpap.ptG...hsu...Lhs...ptp..tspspp....lsstpcp....tllp.hsp.ppssp....G........shhohphltp.l...........hshplStpslpphlp+.tu ...........................................hlhh.h.tp.uh...........hpph..uchls..l...upp.ol.h+ah......p.p.ap..ptG.........hpu.................Lhs......pt+......ss.p.spp......hstp...hpp..........tl.h.p..hhp.pps.t.....s...................ph.o.h..p.hl..tp.h.....................s..h.t..l..u.....ps..l.tphhht............................................................................. 0 289 753 1056 +13394 PF13552 DUF4127 Protein of unknown function (DUF4127) Bateman A agb Jackhmmer:C0WER5 Family This family of uncharacterised bacterial proteins are about 500 amino acids in length. 27.00 27.00 46.40 46.20 17.60 17.30 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.60 0.70 -6.10 50 186 2010-11-22 16:53:42 2010-11-22 16:53:42 1 4 127 0 59 185 2 426.60 27 92.87 CHANGED +llalPLDsRPsshpash.luphs.shpllsPPpchLushppsucs-tLtpWLppp.s..ss-shllSlDtLlYGGLlsSRppphshcpshpRLphL+pl+ppsPshpIYAFslIhRsst.s..sspp-PtYa...tpYGtplaphuh.......hhD+.t....hs.ppttph..tplpttlPtchlpDahs+RpcNhplNtthlcLsc.cGllcaLslspDDss.auhsshEpcpLtphhpphsl.tp+...............lhhaPGADElGhsLluRshsphtt.p....PplhlhYsssputphls.YEspslpcolpp+lpuuGuhhsss.ppADhlLhVsssstttt......tt...........pshpphsppIpphlsp.GcsVulADlAasNGuDppLlphL.tctshltcLtuYuGWNTuuNolGoulApuhlthth...............stpsphphLhtRhl-DahYQusVRsplp..cphtt.....shshhslscpptt.............hpphhpp.......hhpphhtpphshphhthph..............phPWsRhFElslpl ..................llalPlDsRPsshphsh.huphs.shpllsP....Ppphluth..........pps.............ussptlhpWl.pp.ht.pscshllShDhLlYGGLlsSRhpph........s.pph..........t+h.phlcpl+ptpsph.lYsFshlhRs...s..s....sst.-s...aa.tas...tlh.phuh.......l.c+.t.....t.ht.t-ttph...ttlpttlP..phlpcaht+RppNhtlNpthlp.hsp..pGhhshL..slspDDss..hu.ssh-...tptLtthhpp......ht....pc................hhhhsGADEluhhLlsRhhsch...t.h....PplhlhYs.stutthls.YEs.t...sltpolptpltusGuhhsp..tpuDh.lLhlpss.......................................................tshtthsptlpphltp.sh.lulsDlu...hsNG...u-ttLhthL..pts....h.......l.cltuYuGWNTsuNolGhslupuhlhhhh........................s.ttphphLh.chl-Dah...Ypu.lRpplt.pth.t......t.s.hth.t.pttt.t.............hpthhpp......................................hpphhtt....hh...h........h..ph.......................phPWpRhFEhphp......................................................................... 0 21 38 55 +13395 PF13553 FIIND Function to find Weichenberger CX, D'Osualdo A kellrott Joint Center of Structural Genomics (JCSG) Family The function to find (FIIND) was initially discovered in two proteins, NLRP1 (aka NALP1, CARD7, NAC, DEFCAP) and CARD8 (aka TUCAN, Cardinal)\ [1]. NLRP1 is a member of the Nod-like receptor (NLR) protein superfamily and is involved in apoptosis and inflammation. To date, it is the only NLR protein known to have a FIIND domain. The FIIND\ domain is also present in the CARD8 protein where, like in NLRP1, it is followed by a C-terminal CARD domain. Both proteins are described\ to form an "inflammasome", a macro-molecular complex able to process caspase 1 and activate pro-IL1beta [2]. The FIIND domain is present\ in only a very small subset of the kingdom of life, comprising primates, rodents (mouse, rat), carnivores (dog) and a few more,\ such as horse. The function of this domain is yet to be determined. Publications describing the newly discovered NLRP1 protein failed to identify it as a separate domain; for example, it was taken as part of the adjacent leucine rich repeat domain (LRR) [3]. Upon discovery of CARD8 it was noted that the N-terminal region shared significant sequence identity with an undescribed region in NLRP1 [1]. Before getting its final name, FIIND [4], this domain was termed NALP1-associated domain (NAD) [5]. 27.00 27.00 28.40 28.10 24.30 24.10 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.61 0.70 -4.95 7 140 2010-11-22 18:21:59 2010-11-22 18:21:59 1 31 32 0 69 160 0 224.30 41 24.92 CHANGED RV+hPsAGoYphPsTGLtFlVpcsVTl-IcFCu..WsQaLc.ch..pcsWhVuGPLFDIKAEP.GAVsulaLPHFluLpsGcVDs.ShFpVAHFK-cGMlLEpPsRVE.aasVLEsPSFSPhGlLLRhlsushh.lPlTSssLlYa+l.a.E-lsFHLYLlPsDsslpKAIDDc..EhKFpFVRIpKPPPlssL..hGuRYlVSGS..spLEIhPcELELsYRSPGE.QlFSEhYlGphtssI+Lplp-K+atsLVWcALlKPsDL ......................................................................VphPsAGpYphssTGLtFVVppsVTlcIpFss..Wspa.Ls.ch...ppsWhVuGPLFDIpA.Es..sAVsslaLPHFlsL.p.s..u.cVDs.ShFpVAHFp-cG.MlLEpPu+VcsaasVL-sPSFS.hGlLL.+.hhpushh.lPlsShsLlY.a+h.......p.--l..p.hHLYLlPsDsslpKAID-c.......E.+F.phl+lppsPPhp.s.L..hGs.cYhVSss.......t.lc.lhP.p.E.Lc..LsY+Sstc.QhFochahtphtp.IpLplppp.....pp.sllWcs.l+ss........................................................ 0 20 29 43 +13396 PF13554 DUF4128 Bacteriophage related domain of unknown function Wahab A, Serrano P, Geralt M, Wuthrich K kellrott Bordetella bronchiseptica RB50 PDB:2L25 Family The three-dimesnional structure of NP_888769.1 (PDB:2L25) reveals\ a tail terminator protein gpU fold, which suggests that the protein could have a bacteriophage origin. 27.00 27.00 27.00 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.35 0.71 -4.47 28 150 2010-11-22 18:52:35 2010-11-22 18:52:35 1 2 141 1 34 122 78 121.30 22 89.05 CHANGED tIppulct+lsphs...tt...sl.lAa.NlsFssssut....sYLpsthhPusTpshsLutct.phhpGlhQlsVlhPsGpGsstspplAspltstFtsspplppsshtlhlppsPphussIssssphplPVolpY ................Ipthl.s+ls.s.hs.....tht..shs.lsa.Ns...s...........Fssssst....halphphhsusotshslu..t..ss...h..hhp..GlhplslhsPsGsGssthppl.AcpLtphFp.....th...s.....h.t..........t..............h.hshph...................................... 0 3 18 26 +13397 PF13555 AAA_29 P-loop containing region of AAA domain Coggill P pcc Jackhmmer:C2M2Q6 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.25 0.72 -4.44 68 804 2012-10-05 12:31:09 2010-11-23 10:58:31 1 6 755 0 192 5015 2079 55.80 31 5.90 CHANGED LpclpllNWGoFcs.cshs..ls.tG...s..sLloGssGSGKSTllDAlpslL..sPsp..t.h..tFNpA.A.sss ......................................apt...p.hs.....h...s...p...t..G.......h...hlIoGs.oGoGKSTllDAlp.hh..L....hsps.................t......................... 0 60 123 166 +13398 PF13556 HTH_30 PucR C-terminal helix-turn-helix domain Bateman A agb Jackhmmer:D3Q4F0 Domain This helix-turn-helix domain is often found at the C-terminus of PucR-like transcriptional regulators such as Swiss:O32138 and is likely to be DNA-binding. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.48 0.72 -4.38 456 5435 2012-10-04 14:01:12 2010-11-23 15:02:24 1 29 2226 4 1399 4584 150 58.40 30 14.07 CHANGED L.lcTLcsal.pss.tshspsAppLalHpNTlpYRLc+lpclhG.hc.lp...s.sps....th.pLtlAlpl ..........................LhcTLpsa..h..pps..sshspsAcpLalH+NTlpYRL..c+lpcls..G.hs.lp....s..hcs....th...lhlAl.......................... 0 544 1037 1249 +13399 PF13557 Phenol_MetA_deg Putative MetA-pathway of phenol degradation Coggill P pcc Jackhmmer:Q2LTI1 Family \N 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 248 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.95 0.70 -4.57 154 1206 2012-10-03 17:14:37 2010-11-23 16:08:22 1 10 655 0 439 1221 272 237.20 14 76.57 CHANGED uspssssp.....Gppls........shchph...pssh.phhhhp.s.p....h.hss..thuhthh..l.s.hh...phph.phs......t..........hpss..t.....G.huDhhlushhhhatp.t..........shph.......shthtlthPsGsasts.ps......s...........shhshssphuhs...hs.s.thslssshthsh...tscsss....................phptt.sthphshshsh..t...h.st.....ph.ssGltua.hh..............t..s..........ts..sctpshslGsulsahhsts....hplshphtpph....sspsthtussh.hh+h ..............................................................................................................thth..........h...th.h...............tt.....phsh.phh....l....hh.......phph..ths........t...........................................http..ts................G.hGDltlushhhhhptst...................shth..............................shthtlphP.T..Gshsts.p...........................ut................shash.ssthuhs......ht.......s.....ths.lssph.thth..............ttcsss.....................phptu.st.h...phshshsa.......t.l..ss..........ph..ph...ulssh...hh.........s..................p..s.t...........ss....sctpsht..lss..uhtathstt..........hplphphhhsh........sspsh..t..h......................................................................................................... 0 137 262 362 +13400 PF13558 SbcCD_C Putative exonuclease SbcCD, C subunit Coggill P pcc Jackhmmer:C2M2Q6 Domain Possible exonuclease SbcCD, C subunit, on AAA proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.25 0.72 -4.07 152 3904 2012-10-05 12:31:09 2010-11-23 16:14:37 1 21 2832 14 954 3276 186 85.50 31 7.53 CHANGED pphhDhRsa.hs.aplphp....pts.........u.ppht...h...hpshsshSGGEpthhhhlsLtAAluthh..st...........tss......hchlhLDEAFuphDscphppshp.hhpp ........................................h...........t.hplph.......p..................s...tp......h..........h+pstsLSGGEs.................hhsuLsLu..LuLushh..pp............shs.................hchLFLDEuFusLDs-olcssh-sLc.t.......................... 0 313 591 808 +13401 PF13559 DUF4129 Domain of unknown function (DUF4129) Bateman A agb Jackhmmer:D3PZP2 Domain This presumed domain is found at the C-terminus of proteins that contain a transglutaminase core domain. The function of this domain is unknown. The domain has a conserved TXXE motif. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.43 0.72 -4.01 194 1189 2010-11-23 16:26:06 2010-11-23 16:26:06 1 12 838 0 462 1144 159 72.80 18 14.34 CHANGED ptapthhp.hhtpt.ulspp...s...upTspEh....spcs....sttt.........t..lpp..........lsphacps+Y.....Gscs.sstp.ph......pphtpthcpl ..........................hatthhc.hhtph.ulshp........sucTspEh...spcs..............................spths.........ssshpp................lsphappspY............upps..sstt..ph.........pphhphht............................... 0 148 321 418 +13402 PF13560 HTH_31 Helix-turn-helix domain Bateman A agb Jackhmmer:D3Q8V6 Domain This domain is a helix-turn-helix domain that probably binds to DNA. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.30 0.72 -8.90 0.72 -3.77 149 10133 2012-10-04 14:01:12 2010-11-23 16:28:26 1 174 2365 13 3830 31197 3989 66.80 24 29.09 CHANGED pLGttL+p.hRp..ps..........................G.l.otp.plApths........h.Sts...plsclEp...Gcp...s.........shs....hlptls.ch.hs..s..s...........ss.....th.p..tL ........................................................................................hGttlRp.hR.p....p.t...........................G..l..opp...pl.A.p.t.sG................l..S....h.s.....hl.sp.lEp.......Gc.p.....s..............................oh.p..........hlt..t...ls....ps.Lt....l..s.......................hh........................................................................... 0 1127 2704 3367 +13403 PF13561 adh_short_C2 Enoyl-(Acyl carrier protein) reductase Coggill P pcc Jackhmmer:C2M4X7 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.50 0.70 -4.60 47 22490 2012-10-10 17:06:42 2010-11-23 16:43:09 1 80 4615 768 7643 72809 33208 228.60 25 81.38 CHANGED Gshs-pSIuWtlActht-pGApllloshs......................t...............................hh...stlp.plucph..s.....sc.....l.......l..shDss..st..-c...................lpplhpplpcph.......GplDhllHSIuhusp.t...htpshh-.s.shctahpuhslSuhSahslhpthh...hhscs......GSlluLoYlAupRs.hPsYs.sMusAKAuLEShsRsh.AhchGc.+slRVNsIStuPht.TpA.upuIss........h-p......hhpaupphuPLsp..s.su--lAsssshLhS-LsptlTu.pslalD..GGhs ...........................................................................................................................................................................us.....pGIG.h..u.h.A.....p...t...h...s....p.....t....G.....A...p......l...s...h...s...h.s.t.......................p................................................................................t..........t........h...p.....p...h.....h...p....p...h...s...............tp..........s..........................h..ts..D..l..s....st...pp.......................................................l..p..p...h...h..p...p....h...h...p..p.h.............................G..p..l...D..h....l.....V......s...........s.......A.....G............h............s...............................................s...s....h........h.............c............h.......s..............t...........c........t.............a.........p.............p...............s........h.....s.......l..........s...........l.......h..............u............h.....h.......h.....h....s.....p....t....s.hs..............hh.....p...p...s.................................G..u........I......l........s.......l.........o............S..........h...........s...........u............h............p............s..........h...........s..........s...........h..........s.........s...............Y.......u......s.........u......K.......A.......A......l.......t.............u.........h............s...+........s....l....A...h....-....h...........u.........s.................p.........G.....I.....R...V..N......u.....l.......u.........P.......G.........s..........l..........p......T..........s........h........h.....p....s.....h....h..t..................................h.p.p....................h...h...p.....t....h......t.......p..........p.......s.........P...........l.............t........R......h...........u......p.......s.........c.....-..l.......A..s..s...s.h..F...L.s.S....-.t.u..u....a...l.T.G.p...s.l.hV..D..GGh....................................................................................................................................................... 0 2101 4453 6315 +13404 PF13562 NTP_transf_4 Sugar nucleotidyl transferase Coggill P pcc Jackhmmer:C2M1K0 Domain This is a probable sugar nucleotidyl transferase family. 27.00 27.00 32.60 31.80 22.90 24.60 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.88 0.71 -4.58 35 114 2010-11-23 16:46:08 2010-11-23 16:46:08 1 3 114 0 61 123 80 160.00 28 39.69 CHANGED phlLF--st.tppLhPhTaTRPVu-lRlGIhTl+EKWpphL.s.......ssp.s.ohhsccYL........pp+aP........sph.p.t-...s..........lhlNuphlPsc.tlsphIps..L....c.Gp..ul.hps-.....cllAhc....................h..cp.....schs.............sh.................ttlp....htpshhhlcpsWDlFshNsptlppDach.lo..p.GRsS .....phlLFDsss.tppLhPhTaTRPVu-lRlGIhTl+EKWpphL.s.......s.ssohlTcsYL........ppKaPh.............sp..t..tp...s..........lhlNushlP.sp..tlhphlpp......L....p.sp...Alhts-........pllAhhhp...............tpp.ss.hs..............ph.................phlp....htpshhhlcpsWDlFshNsptlptDaph.lTpsR.............................. 0 29 48 58 +13405 PF13563 2_5_RNA_ligase2 2'-5' RNA ligase superfamily Bateman A agb Jackhmmer:D3Q0T5 Family This family contains proteins related to Pfam:PF02834. These proteins are likely to be enzymes, but they may not share the RNA ligase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.15 0.71 -4.50 139 2076 2012-10-03 21:31:48 2010-11-24 11:46:27 1 31 1509 1 702 2552 184 152.70 17 68.99 CHANGED sssshttt..lppl+.................thhstt...htths......sHlTLhhshhssp...........................spltptlpphht....................................................phps...Fp.lpl..sshstF.........tst.slalps.........................pstppltpL+pplhps......h.th...h..tt.............................tsapPHlT.....lupthsstsh...................................ptlhptl.......................tth.shp....hpls..plsLhphpsss ...........................................................................h..............h.........htt........HlT.L..th.h..t.c.h.s.pt..........................spltp.t.l.pph.ht....................................................ph.t...s........hp....l.pl....sth.us.Ft...........tss..sla.htl..........................................tssspLtpLpp.....plppt......htth........sh....................................psatPHlT.....lAp.t..h.s.spph...............................tphht.l........................t......hp......h....lp..phtLh......t............................................................................... 0 219 478 611 +13406 PF13564 DoxX_2 DoxX-like family Bateman A agb Jackhmmer:D3Q9W4 Family This family of uncharacterised proteins are related to DoxX Pfam:PF07681. 24.40 24.40 24.40 24.40 24.30 24.30 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.47 0.72 -4.05 115 1334 2012-10-02 13:32:46 2010-11-24 11:51:27 1 7 742 0 518 1577 778 103.00 19 78.74 CHANGED llsslluhhhhh.uush.+l.h...ps......t..h.....h.....t.....shs.........p.h..G...a..P.tth.hhh.lGshclhGuluLl.....l.s..h.h....h..s...hls.hAAsGlshhhlGAhhsH..l..pts-s..t......t.....hshsl..shh.sl..shus .............................hthlluhhh.hh.suhh.+l....h...ts................p..h.......h.....p.....ths.................p.h..G.....h....P...th..h....h..h..h..l..G..s....h..ElhuuluLlh.s....h...............hlshhA.uhhl.s.hh......h.l.u....A.h.h.s.H.....h.....tstc.....h............t...........hhhsh..hhh.hh....h......................................... 0 202 354 442 +13407 PF13565 HTH_32 Homeodomain-like domain Coggill P pcc Jackhmmer:C2M820 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -10.26 0.72 -3.18 486 664 2012-10-04 14:01:12 2010-11-24 14:43:25 1 19 411 0 337 3654 468 74.30 20 27.49 CHANGED Tl...t+aht+...a....pp...tG....ts...........t....tp....sups.....hp......sph..ptlhphht.....pp.................hospplsph.ltp....t............hS.tsolt.R.hL ................................................................oh.+Whp+..a..............pp....tG....tul.s...................h..t.tc....sG+s......pp......hssch..ptlhph.hp.....pps.....................phoscplspp.Ltp......th.h........ShsTlh.+hL........................................ 0 104 210 313 +13408 PF13566 DUF4130 Domain of unknown function (DUF4130 Coggill P pcc Jackhmmer:C2M6X4 Domain \N 27.00 27.00 27.70 28.50 24.80 24.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.07 0.71 -4.68 156 594 2010-11-24 15:37:12 2010-11-24 15:37:12 1 4 583 0 199 587 26 164.90 28 52.22 CHANGED cpsp+hshlachlhhsh.phsp..plhpthucssVhp.ltphs+pVp+EtH+hcuFlRFpcl....ps.........shahAhhEPcaslLshlusHFscRassppWhIhD.c+thusha...cs..pph.phh....s.hs........p.....th...........ph...........s.ppEctappLW+sYacolsI.tRhN.+....hppppMPp+YWKtLsE ...................................t...p+hshlachlhps..h.p..tsp...plhpphuDs..sVhp......lpphsKpVp+EtH+hc.u.F.lRFpch...ts....................shahAhhEPcaslLshhusHFtcRassppWhIhD.p+thuh..ha..-s....pp.lphh....p.hs........s....t.........t...ph...........................s.ppEctappLW+sYapsltl.tRhN.+....hhpppMPp+YW+.LsE.................. 0 59 132 167 +13409 PF13567 DUF4131 Domain of unknown function (DUF4131) Coggill P pcc Jackhmmer:C2M1S0 Domain This domain is frequently found to the N-terminus of the Competence domain, Pfam:PF03772. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.92 0.71 -4.86 202 2454 2010-11-24 15:48:31 2010-11-24 15:48:31 1 9 2425 0 598 2213 149 162.40 14 22.34 CHANGED lhGl....lhs.........hh.hhshhhhhlhhhhhhhhhh...hhh..........................................t.thth.........hhhhhhhhhhsshhhthtthphtt......pphsphht................tpthh.....lpG.hl.....tstPphp........spth.....pahlphp............htsttpt.hss+lhlth.ppptt...............hphG..-tlpl...puplptPpsstNP..utFDYppYL.tppsI..huphhspshphh .......................................................................................................h....................h.....hhh...h.h.h..hhhhhhhh.hh.......................................................................................hh.....................hhhhhh.h.hh.h.hh.h.h..h...t..h..t..t..thtt.........pths.t.ht...........................h.........hps..hl................ss.h......p.hs........................................spth.........phhht.........................................tstpth.h.hcl.h..l.t..hptpt...................................hpsG...pphph...pu...c..lc.sputtN...........uuFD..hp..t..ah.ht.pslhthh.ht.....h.................................... 0 189 382 494 +13410 PF13568 OMP_b-brl_2 Outer membrane protein beta-barrel domain Coggill P pcc Jackhmmer:C2M356 Domain This domain is found in a wide range of outer membrane proteins. This domain assumes a membrane bound beta-barrel fold. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.24 0.71 -4.47 110 1717 2012-10-03 17:14:37 2010-11-24 16:11:55 1 14 223 0 538 2059 496 183.40 15 70.26 CHANGED tAQ......p..................hp..hGhpsG..h..s.h.....sshph..................p...t...p........ph...sh.............ps.uhs..sGh..hs.chth...s..p....t....h..u....l..p..stltasppshph..ptp.....................t...........p...............ttphphphshlpl.Plh..h...paph...sp..........hphtlhsGsthuhhls.ppt.........p...tt...............t...............................t.ph...s..t....tph...pp..hshulthG..huhp....h..h...........p....hh...l...phcapa.GLsshhp ....................................................................................t..........................hphuhpsG..h..s..h.......ssht...........................................p...................p...sh..................ph....uhp....hGh...hs..ch.th.........s..p.....p......h...u....lp.....sslh..as.pp.u.h.ph.ptp.......................................p.......................................................ttphph.p..hs.al.p.l...Plh...h...paph....sp...............hphhltuG.s.hh.uhhls.sph.....................p.......hp.............t.........................................................tph..s...p......tph...p.p....hsh..u..l..s..hG....hGhp....h..t..................p.......hh.......l..phpaph..Ghss.................................................................................................. 0 277 479 526 +13411 PF13569 DUF4132 Domain of unknown function (DUF4132) Coggill P pcc Jackhmmer:C2M366 Domain This domain might be involved in the biosynthesis of the molybdopterin cofactor in E.coli. 27.00 27.00 37.40 37.40 25.00 24.50 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.03 0.71 -4.75 83 961 2010-11-24 17:18:36 2010-11-24 17:18:36 1 16 526 0 149 754 8 181.60 41 17.24 CHANGED G.KplKolPtsh+c-........tshpc....hpsh+K...plcphhstptpRLEpuhhss+pWssppa.pp.hhs.HPlhpplsccLlW...h......sss.......t.tshthh...cD..ss.........lsshcscth......p.Lsss.s.t....lpl..sHPlclst.tslssWpchht-hclhQPFcQltRc..lahlTss.Eps..tspspRau.Gthlps.t.ph.hu.hlpp+GWp ............................spRhKslP+.ptsDDp...pAs-AlschKtLKK...Dscplupp...plsRLEsAhpppRRWShtsFpthhVpHPlsRhlTpRLIW.GlY.....s.pp....................pLlssFRVA....p-..ss....................assAp.D-.h......s.LPss.s.......IGI..sHsLElos...pp......tutFuQlFADYElhPPF+QL...sR..p..o.ahLTt...........s..Eps..usp..LsRWtG+.psss.Gpl.hG..hpt+GW................................... 0 46 88 102 +13412 PF13570 PQQ_3 YWTD; PQQ-like domain Coggill P, Eberhardt R pcc Jackhmmer:C2M3Z1 Repeat \N 22.00 18.00 22.00 18.00 21.90 17.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -8.19 0.72 -3.55 1244 1943 2012-10-05 17:30:43 2010-11-24 18:25:17 1 236 866 9 946 6594 2060 41.50 27 13.21 CHANGED G.phh.....Wph...........ph...s..s...th.h...sss....................sh..s.....s...s....h.....lalsst...............suplh.u.l..-....sp..s ............................................Gphh..Wph.........................ps.......s......u.......ts.h.....usP........................sl...u.........s....G..............h.........Valsot............................sGplh.A.l..st.......................... 0 381 735 860 +13413 PF13571 DUF4133 Domain of unknown function (DUF4133) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0094 Family Based on Bacteroides thetaiotaomicron gene BT_0094, a putative uncharacterized protein as seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or vs when in culture [1][2]. 25.20 25.20 25.30 26.00 25.00 25.10 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.08 0.72 -3.76 26 267 2010-11-24 18:36:44 2010-11-24 18:36:44 1 1 110 0 31 216 10 94.30 60 85.71 CHANGED INKGIG+sVEFKGLKuQYLFlFAGGLLAlFllaVlLYMsGVsQalCluFGssouolLVWtTF+LNpKYGEHGLMKhhAt+pHPRYllsR+plh+Lh ..INKGIGRsVEFKG.LKA.QYLFlFAGGLLAlFlLh.VILY.MsG.lsQalCIuFGssuuolLVWtTFpLNt+YGpHGLMKhuAt+pHPRYllNR.+plh+Lh....................... 0 11 29 31 +13414 PF13572 DUF4134 Domain of unknown function (DUF4134) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_0095 Family Based on Bacteroides thetaiotaomicron gene BT_0095, a putative uncharacterized protein As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), It appears to be upregulated in the presence of host or vs when in culture [1][2]. 22.30 22.30 22.40 22.40 21.60 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.11 0.72 -3.88 27 454 2010-11-24 19:06:34 2010-11-24 19:06:34 1 3 132 0 47 312 15 94.60 48 88.66 CHANGED pKthlhshhhlhshss..thApssuuuGhst...Ass.lsoYhsssspLhYAIuAVlullGulpVY.KhssGDpDlsKshhuhhGAClFLlssupll.uFF ......................p..thhh.hhhhhhsssu...sshAQG....sG.AGIsc......ATphVoSYFDPuTKLhYAIGAVVGLIGGlKVYsKaooGD.sDsoKoAuSWFGACIFLIVAATlL+SFF....... 0 16 42 47 +13415 PF13573 SprB PbH1; SprB repeat Coggill P, Eberhardt R pcc Jackhmmer:C2M2H5 Repeat This repeat occurs several times in SprB, a cell surface protein involved in gliding motility in the bacterium Flavobacterium johnsoniae [1] 26.00 20.00 26.00 20.00 25.80 19.90 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.70 0.72 -4.35 113 1000 2012-10-03 16:25:20 2010-11-25 11:10:41 1 88 62 0 782 1180 555 36.20 31 8.65 CHANGED stssssloCtG....sssGolsl.ss.oGG....o..ssYoYp.Wssuss ......................ssso.C.u...........sss.G.s..l...s.l...s...s...o...G.G............o.....us....Y.oYs.assst............... 0 535 718 782 +13416 PF13574 Reprolysin_2 Metallo-peptidase family M12B Reprolysin-like Coggill P pcc Jackhmmer:C2M7W3 Domain This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.58 0.71 -4.21 49 431 2012-10-03 04:41:15 2010-11-25 14:00:08 1 33 240 11 189 3218 220 181.20 21 27.86 CHANGED hspshsthssshsplNtlYE.............c.-hu..lph.....sL.l..s.............s.spll.hhssso.....csass...............................sssshhspt.pshhssh.lGpp........sYDlGalh..ssh..ssuts...GluhlG..................slCs..ss.......pps..............oG........hs...........sPhGssh.........thch...lAHEhGHpFGusHohs...............s.ssp..t....o.u..ss..........EsuuGsoIM.u.Yu.....uh...sssp..............sl.s.p ...................................................................................................................................p....................................................................................................................................................................................................................ht..........tt.t.....................sh.s...h.....s.hlh..............tsh......ss.....u....sh..............Gl.Aalu..........................................ulCp..ps........pth....................................s.G............hs............pshssth.................uh.tT...huHEl..G...........HshGusHsss......................................................s..p.sp....s.....s..s..ts...................ps.tp.G....s.h.IM..s..ht..........sp....s.s..p.........F..S.s..o.ht.h......t.............................................................................. 0 79 103 149 +13417 PF13575 DUF4135 Domain of unknown function (DUF4135) Bateman A agb Jackhmmer:D3Q4V7 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 380 amino acids in length. The family is found in association with Pfam:PF05147. This domain may be involved in synthesis of a lantibiotic compound. 27.00 27.00 27.50 37.00 26.20 25.70 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.23 0.70 -5.46 79 396 2010-11-26 09:48:42 2010-11-26 09:48:42 1 9 292 0 97 356 22 354.20 24 39.27 CHANGED YPVLsRhlspphpp.ahptht-lhp+lt.pD...................hppltpp....hhtsp.......sp..lsslph..u.h.uDsHssG+sVhhl..pFs....sG..h+lVYKPRs.Lpl-ttap..........phhpalspp....s..t....h..h.hh.chls.ct.....YGWtEalptpsCp................spp-lpcaYpRhGhlLulhalLsuoDhHaENlIA.......t.hPlllDlETl..h........p..s.........................p.h.....t...s....s...s..h.s.tp.t..p.......................ulhpsG...ll..s.t............h.h.....h.....s..s....t.p..u....................h..sls.hh..s.s......s...ps..hp...h.......hp........t......h..s......h.t...pts..........scsh....lttt.ttph.......................t...ss..tsYh.......ppllpGFppsY.phl..hpp.+p..c.l...hp...hlhp...FpssplRhlhRsTphYuplLp....tuhHPc.....h....hps.cpphhh.phh.......htpp.tttplltsEhpsLhpsDIPhFtsps ................................................YPhLh+hlspthtphhphhhcllppltpD...................hstlppp.............h.htp.........sp..lsslph.s..h....GDsHstG+oVhhlpFs.......su....+llYKP+slthcpthp...................................................plhphlspp....s..t.....hp..lhh.......chls....pss..............YuatEalpttssp............................................stcclpcaYh+hGhLlulhallsuoDlHaENlIu......pGphPllIDhETlhp..s.............t..h.......s....s.s..t...stt..t.t.t..................lhpSVhpoGlLPh.......h...h....t...p...pp..u..............hDlSuh.ut....p....ts.....pp...sh.p..h.h.l..hs.t......oDph....ht.ht.phph...tt.................hpsp.l...phtpYh..pplhpGFpphaphh..hp.p..+p..c.h.....hp...hl.tt...htshpsRhlhRsTphYuplLph.thcPs...........hh...pst..hcccthhpplh..............ttp.pppt.llttEhppLhptDIPhFhsp.s......................................................................................... 0 31 58 86 +13418 PF13576 Pentapeptide_3 Pentapeptide repeats (9 copies) Bateman A agb Jackhmmer:D3PTZ3 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.28 0.72 -4.07 230 524 2012-10-03 04:02:01 2010-11-26 10:13:22 1 41 195 8 233 1940 556 48.40 27 19.80 CHANGED FspspF.pss...sFp....pspFpsss....p....Fsp.spF....s.pspFpsspFp..ts..ssFppsp .....................................................................................................FptspF.pss.s.sFp..........pupFpsss....s.......Fpp.upF.....ts.sssFssupFp....ts..ssFtts....................................................................... 0 69 184 226 +13419 PF13577 SnoaL_4 SnoaL-like domain Bateman A agb Jackhmmer:D3Q8I0 Domain This family contains a large number of proteins that share the SnoaL fold. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.66 0.71 -4.22 175 2151 2012-10-03 02:27:24 2010-11-26 11:07:17 1 22 700 23 793 3721 973 126.70 18 73.54 CHANGED ppLpscpsIppLhscYstthD..........p...........t..ch........c....th.s.s.lF..sp..D..uthchs...sh..............s..........h.hpG.........t..s..........slhshhps..................h.h..s........shh...........hs...........tH..hhs.s.hl.p.l....c...........G........D..p..Apupsh.hh.......shh.hh.s.s...................s.s.......s.ht..hh..hs.......u.tYp..cchh+p.............su..t......W+lpcpp ...............................................................stttIpp.lhsc.Ysts.hD...............p.............p..ch........c....th.ss.lF...ss.....D..u.h.h.phss.........................t.............hhpG..................t.s.......................slhs.h.hpp.........................................h..h...s.....thh.........................hs...........................tH....hhs..s.h.h.l..p..l......s...................u............-....p..A..p.upsh...h..h...sh.h..hh.ss.................................t.t...........t..t..hh...ts..........utYp....-p...h.h+p.............ss.t......W+ltph................................................................................................ 0 154 493 664 +13420 PF13578 Methyltransf_24 Methyltransferase domain Bateman A agb Jackhmmer:D3Q0B1 Domain This family appears to be a methyltransferase domain. 26.20 26.20 26.20 26.20 26.10 26.10 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.37 0.72 -3.21 204 1368 2012-10-10 17:06:42 2010-11-26 12:36:09 1 67 883 14 614 4985 3811 113.70 21 37.53 CHANGED lElGshpGtSshhlsps...h........pp..s..sh...........................................pl..h..........ulDh.......t........................................p.......htt..hhp......................................p......................tthss.......h..........sphh.........tsss....................tp........s.ht........pht.........................s..s...l.D...........llalDG...sH...s...h..........p...s...sht-..h...pth...h..s..t.l.p.s.s..u.....lllhcDh .........................................................................................................lElGs.hGh...Sshhhst.s...h....pp..ss...............................................pl..h..slDh......t.t.....................................................p......htt..hhp.............................p............................ts.h.s.s.....h...............lpl.h..........puss......................p........s.ls.....pht.......................................s..s.l..D.....................ll.a.l.DG......s.H...t.....h......................p.....sshts....h......phh........h....s.......p..l.p.s.G..u.....lllhcDh.................................................. 0 222 417 536 +13421 PF13579 Glyco_trans_4_4 Glycosyl transferase 4-like domain Bateman A agb Jackhmmer:D3Q529 Domain \N 27.60 27.60 27.60 27.60 27.50 27.50 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.16 0.71 -4.09 231 11449 2012-10-03 16:42:30 2010-11-26 14:14:27 1 99 3251 7 4057 16665 3336 167.30 15 40.28 CHANGED GGhtphs.tplup.sLs...p...t.G..h..c....Vp..l...l..s..............s.s....tss...........................t.............ht.......t..s.........s....s.t....l.h..........t.l......s......hs.........tt..................t............t.......t.............h.t.........h.h.......................h....t.l.................tp....h..................l..................t.....................ttpsD....ll.a....s.p..s...h..h......s...s....hhs....h...h.u..........t...t.......t..s.......hP...h........lh...sh...........................+...............................s........hh........ht............t............tsh.........tt...p......h..h......t.......h.h.....pc......hhhpp..As...t......llsso.ptht..pt..lt.p.h..u....h..sss.clhllssu ...............................................................................................................................................................................Ghthh.h..hpl.sp...tLt.....t..........t...G.....h.....c...........Vp...l...h.s.............................t......t.t...................................................................t.............................s............s.....h..p........lh.......................p..h................s.............h...........................tt.........................t..................h.t.........t...........................h.t...........h.h..............................................................................t....t..l....................................................tp....h.......................................l....................p...............................................................................................................................................ptpsD.......ll..a......s...p......s.....h..h....................s..s........h.su.......h.....h.s........................t........t................................h...s.................................hP...h........V...h...sh................................................................................................H........................................................................................s.........hh...............hp........tt...................sh......................h.t...p................h...h...........t..................hh.........pc.....................t.h.h..pp.........s..c.....t........lls..sS..pt.h.......t........pp...hh..p..h......s.....h.......t....t.p.h.hl.................................................................................................................................................................................................... 0 1455 2852 3553 +13422 PF13580 SIS_2 SIS domain Bateman A agb Jackhmmer:D3Q8F5 Domain SIS (Sugar ISomerase) domains are found in many phosphosugar isomerases and phosphosugar binding proteins. SIS domains are also found in proteins that regulate the expression of genes involved in synthesis of phosphosugars. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.44 0.71 -4.46 65 4732 2012-10-02 15:05:26 2010-11-26 15:23:47 1 23 2778 42 921 4439 1821 135.90 30 60.55 CHANGED pYhsph.ppllppl..pp........ptssIppAuchlspul..pssuhlalaGs.GHSthhApEhhhRsG....Gl.sshpslhh.slhLps...s..s.....t.tustlE+h..puhucp.lhpt..ht....l..pssDllllh.SsSGpN......sssl-hAhtA...+cpGhplIAlTo ..................................................................................t......................p.t.................ttI...p.pAuphlspuh..psG.G+llhsGN.G.s....S....u....ss.A.....chAsEhs..........sp...a.p.p....p..R....s.l.s.u...l....u....lss........s.s.............hloslus-hthc............p...l...a...u+....p....l...puh...................u...ptG.DVL.lu.I.SoSGsS.........ssllpAlctA...+pp..GhpsluLTG............................... 0 327 600 773 +13423 PF13581 HATPase_c_2 Histidine kinase-like ATPase domain Bateman A agb Jackhmmer:D3Q5R6 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.69 0.71 -4.34 142 5237 2012-10-11 19:05:54 2010-11-26 17:53:18 1 183 1833 0 1838 7481 882 122.50 20 41.79 CHANGED hsuphp.pltt.lpphlpph.h..tt.tslstptht........clpLAlsEAhsNslpHuhtpts.t...............t.l.................plp....................hthpspt....lpl..p........lpD.pGts.hsst.th..s.....................................sstttuGhGlhllpplhDpl..ph.............sss...Gsplphph .......................................................................................................................................t..hth..s..Rthh.tth..h......tp......hs......h.....s........p...t..hp.................................s.l..p.l....s....l.....oE.hs........o....NAlpH.u.....h..t....t.....................................t.l..............................................plp.......................................h.t.h.p.s.sp........lpl...p.......................VpD...p.....Gss....hsht...ph...t...................................................................................sttppsGh.GLh..l.lp.p.l.s..-.....ph.....ph...............tst....Gpplhh.................................................................... 0 613 1434 1724 +13424 PF13582 Reprolysin_3 Metallo-peptidase family M12B Reprolysin-like Coggill P pcc manual Domain This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B. 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.01 0.71 -3.60 94 288 2012-10-03 04:41:15 2010-11-29 11:26:57 1 38 237 4 116 3927 262 129.40 19 22.22 CHANGED huthssslspsNtlac..p-hu....lphpLl....slhh......hsss.......s.s.....shss..sss..................tthls.pt...tsh.hss........t.htpsshDl.uplhss......................usuGlAhl..........ushsssspp..............t.uhss.ss..s.st.h.h................shhuHElGHphGusHo ...................................................................................................................................h...h.t.hshss.h..hp......p.s................lphpls.............th.h..............h.pht.................s...t........shhp..pst..............................................tp.hls..sh....ssh...pth......................t....p.p.ss....s...D.h.sh.h.hpthsh...................................ptssGl....Aal........................uuh.shspts.....................................t.shsh...sp......hst...sss......................................psht..HEl.GHs..hGhpH.............................. 0 54 83 105 +13425 PF13583 Reprolysin_4 Metallo-peptidase family M12B Reprolysin-like Coggill P pcc Jackhmmer:C2M7W0 Domain This zinc-binding metallo-peptidase has the characteristic binding motif HExxGHxxGxxH of Reprolysin-like peptidases of family M12B. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.35 0.70 -11.65 0.70 -4.80 9 268 2012-10-03 04:41:15 2010-11-29 11:39:56 1 39 198 0 78 3727 359 164.50 15 27.17 CHANGED sh+shclulssDhoahpha....ushstlpphhhsslsthNclY.s..................pslGhplpLh....sspph.hTss.pss..as........s.s.spps..spthsshsshhGppshDhuhlhp.h....pss.sGLA.aluphs..........p.st.pu.ushuss.............................tp....pa..pl..hAHElGHhhGAsHshs............ussss.osts......-sssGs...oIMu.Y...sssspt..sh...FSssoIphI.th..hp.ushs ..........................................................................................................................................................................h.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s....h..h.................................................................................p.....sh......p..s....hs......HElG.....Hs..LGhsH.s..hs.......................su.s.s.s....spsh.....................pps..p...t.h............olMu..Y.........hp..t.s.......................................th..................................................................................... 0 24 45 68 +13426 PF13584 BatD Oxygen tolerance Coggill P pcc Jackhmmer:C2M5J5 Family This family of proteins carries up to three membrane spanning regions and is involved in tolerance to oxygen in in Bacteroides spp. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 485 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.74 0.70 -12.53 0.70 -5.90 64 896 2010-11-29 12:52:47 2010-11-29 12:52:47 1 20 707 0 273 880 568 397.70 18 74.48 CHANGED hlh.h..sh..hs.....hAp.....sphpAp.s..s+stlu.sEplplshph.s..p...-s...c..s...........Fps...ssh....ps.Fc.V..hGP..spSp....sp..ph..lNGcso....psh..oaoYhLhsp+pGshoIssApl.hsGpphpopslplpVss.ssppspts..................................tthsspslalpspluKsssYhpEslhlsYKlYhpsslp..hs...hc.PchpsFhsppl...sp..ptphp..ptphsG+sYpslhhcphllaPQpoGcLpIsshshsssl...th..............spt..............p.s.....hF.G.t..p.hp......php+plpusshsIpV+PLPp..h+PpsFsGuVGpFshpssh.....s..p..splcsG-slohplslsGpGNlKhhphP..clp..hP...ssh-hY-schpppsphsssG.hpGshshpasllPpptGpasIPslpFsYFDspsppYcTlsopshplsVtpGs.......ss...sss....ss.......sspppht..spshphlc..p..sshht.h......p.p..phFa..soh.haahlhllshlh.hhl.hh.lhh+c.pttp..ps..clsth..+t+cA..s+lA.p+t...LppApchh..spp.pt ................................................................................hh..........h..ht......ph.sp.h..sp...tl..h...s-.hplph...s....t....ps....p...t............hph...sth.......ts.h....l...hts...p.sp....ph........ph....h..N...G.php..........ph..phthhl.hs...pp.G.htIPshpl.....G..hto.psltlpVht.tttt.........................................................................ttt.h.lps.p...l..s.......p.p.p..h.Y..tpthhhphclhht...........p.........ht.....................t.....................t........hp.sph.....p.s..h..h.hpph............sp...ttphp.................p..h....sG...tpa...pshp...pp..ahlhP..pp.sG...p..hp.....lsshthpstl.......................................................................u......................h..hphps..tshslp.V.p.sh......Pt.....ps..t...a....su....t.ph.p.lst.ph..................s....s....pp...h.psG-sloh..plpl...p...u....p...Gs.ht....h.p.hP....plt......hs......pshp.hY..s.tps..php.....t...s.t...t...t......h.....t..up.hs.phsllPppsGphpl.P.s.l.ph.saasspspphcph..phsshsl...pVtts................t.................................................................................................................................h.h.h.h.hh..h.hh....h..hh.hh.hh...............................................................s........................................................................................................ 0 92 174 227 +13427 PF13585 CHU_C C-terminal domain of CHU protein family Coggill P pcc Jackhmmer:C2M7W3 Domain The function of this C-terminal domain is not known; there are several conserved tryptophan and asparagine residues. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.25 0.72 -4.10 160 1085 2012-10-03 16:25:20 2010-11-29 13:23:00 1 223 183 3 619 1204 1201 88.80 27 6.67 CHANGED lhls.N.shoPN.GDGh.NDhaplp.s....l.......ss.splpIaNRaGshVapsp..sYp....ssW.cGp.p.........ptLPsGTYaYllph........sst....ppt..hpGalhl .......................h..lPs.sFoP.N..GDG..h..N.D.haplp.s..............h..p..ph......ss..hplpIa.....sRaGph.....l.......apss....shs.....................ssW.DGphps..................p.lPsG..sY.aYhlph.......tsst........t.......hpG.h........................ 0 364 565 618 +13428 PF13586 DDE_Tnp_1_2 Transposase DDE domain Coggill P pcc Jackhmmer:C2M1K3 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.48 0.72 -3.80 38 2848 2012-10-03 01:22:09 2010-11-29 14:04:42 1 17 787 0 763 3964 388 82.10 27 46.05 CHANGED lpsD+hYcsccsRphhcc+..GI..+lusssls+ssp............hp.tttstttR.slEtpFuhhK+t.hpLsphhs+Lspottsh..lslshhlhsLc ..........................................................hsD+uY.ps.p.t...R.phhtpp...uh.......phs.lstp..tpptp............................h...s..hph.h.+p....R.p.h.lEphFuplK.p.a....R+..l.......u.....p..Ra.-Khspsahuh..ltlAshhlhh................................. 0 202 519 633 +13429 PF13587 DJ-1_PfpI_N N-terminal domain of DJ-1_PfpI family Coggill P pcc Jackhmmer:C2M3D1 Domain This domain is found at the N-terminus of proteins from the DJ-1_PfpI family, Pfam:PF01965. The exact function is not known. 21.50 21.50 21.60 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.60 0.72 -4.42 59 1421 2010-11-29 14:19:25 2010-11-29 14:19:25 1 12 1169 10 438 1155 114 37.70 42 16.10 CHANGED t+lLhVLTSHDpLGsTGc....pTGFWlEEhAsPYYsFpDAG ....................h+lLhVl..TS..t.s..p...h....s...s...suc....tTGhWLpEhstPYhshpcuG......... 0 117 250 366 +13430 PF13588 HSDR_N_2 Type I restriction enzyme R protein N terminus (HSDR_N) Coggill P pcc Jackhmmer:C2M5G6 Domain This family consists of a number of N terminal regions found in type I restriction enzyme R (HSDR) proteins. Restriction and modification (R/M) systems are found in a wide variety of prokaryotes and are thought to protect the host bacterium from the uptake of foreign DNA [1]. Type I restriction and modification systems are encoded by three genes: hsdR, hsdM, and hsdS. The three polypeptides, HsdR, HsdM, and HsdS, often assemble to give an enzyme (R2M2S1) that modifies hemimethylated DNA and restricts unmethylated DNA [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.46 0.71 -4.27 81 1412 2012-10-11 20:44:47 2010-11-29 15:02:48 1 34 1136 1 328 1237 289 110.80 21 20.20 CHANGED PEEhVRQchlp.hLlpchsYPt.shltlEhtlp..h.s....s.pp.....cR...s.....DlVl...aspc..............tp...shlllEsKssplpl..sp..ps..h-QltsY.st.s..h...pupahlloNGhpphhhp..hs..h...ps...p.......p..aphl..scIP ................................................hl...hh.p.p.hs...Y...s........t..........p...l...h.s..Eh....phs...hs................p.pp..................c+.....s.....Dhll.hpps.......................p...PhhlIEs.......K..s..sphpl.......sp..............s...h.pQhhpY.hp.h.......h.....s........spash...loNGptahhhp........t...................................................................... 0 106 235 300 +13431 PF13589 HATPase_c_3 Histidine kinase-, DNA gyrase B-, and HSP90-like ATPase Coggill P pcc Jackhmmer:C2M6V6 Domain This family represents, additionally, the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.92 0.71 -4.51 55 9547 2012-10-11 19:05:54 2010-11-29 15:33:13 1 80 4481 84 3019 14695 4507 137.90 25 20.93 CHANGED ho...psAlsELlcNulD.......As....Aps..................lplhls.......pp.........................tsltltDsGtGMshp..-...hh.p...sh.....p.hup...ss.p..p...t.......p.......p.....pps............hGc.hGl.G.KhAuhphupplplho..+...pp...up.ps....s..hp.......l...shphhpppp..s...h.l...ths..pth...............p.hpp..pppGT ..................................................t.hl+E.L.....l....pNu..hD..................As......s.p..h..............................................................................l.cl.p.lc............ps.......................................th....................pp..l..pl.p..DNG..........h.Gh.s....c-........-...........lh.t.....hh..........p...hsp...........o.u....p....p.........p..h..........p.................-.........hpt.....................................................IGp...aG.....h.....G...........h....h....u.....u.....h.....h.....s......s.......s......+.....l.....s...l....p.o..+......ps......sp..pt.......s..ht...............h...........................................................tstttthttttt........................................................................................................................................ 0 999 1810 2464 +13432 PF13590 DUF4136 Domain of unknown function (DUF4136) Coggill P pcc Jackhmmer:C2M2E4 Domain This domain is found in bacterial lipoproteins. The function is not known. 27.00 27.00 27.10 27.20 26.50 26.90 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.91 0.71 -4.15 166 841 2010-11-29 15:34:22 2010-11-29 15:34:22 1 5 488 0 314 779 367 162.60 18 79.98 CHANGED o..........h......p..lp...sDa....-pss......sF.s.....s...h+Tasahps..........tss....th...s.....sLpp..pRlppulpspLpt+.Gapt......u...p....s.s.Dhhlshthtscp.p........tss............h.s.s.......h............Gh......u...h.........G........au.....s..h..h............G.................s.s...hs..h............spp...h..pp....uo..Lhl-llD........sps..s.........c.........l.lWcGsupstlps...p.s...s....tppt...lsp...hVpplhspaPPp ...............................................ss......phs.th......sa.s.....sh+Tasahps..................st...th...s...sl...pt...p..p.lpsslsppLsp.+.Gap.......s.....sp....su.Dl.....hVphththppp......psh..........................hs.s.....h.............sh.....u...h.........................G................au....s..h..h.................s.................................hs.hsh..............stp....h....pp..ss....Lh..lcl..hD.......sps...s..............p.........h.lWpusupsthss..ps...s.......pppt...htp...hVpphhspaP........................................................................ 0 81 179 254 +13433 PF13591 MerR_2 MerR HTH family regulatory protein Coggill P pcc Jackhmmer:C2M362 Domain \N 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.43 0.72 -4.38 56 920 2012-10-04 14:01:12 2010-11-29 15:57:16 1 2 905 0 189 833 159 83.90 38 81.93 CHANGED lslsEhCppsslptshlhELl-hGl.lcs.ptt.s...p.....phh.hsspplt.plc+hhRLppDLslshtGlslllcLL-cl-pLppElppL+p .....................hTlsEaCh+.s.G.loc-pLsElVslGllEP..cp..p....s..........sWh.Fcsc.....sshhVpRAhRL+c-LuLshsGIAlsLsLl--lscL+pEsRhLp............. 0 60 124 167 +13434 PF13592 HTH_33 Winged helix-turn helix Coggill P pcc Jackhmmer:C2M2H6 Domain This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.39 0.72 -4.43 87 787 2012-10-04 14:01:12 2010-11-29 16:08:47 1 15 372 0 214 1136 242 57.50 29 30.89 CHANGED ssthWostpltphlpppaslpa.ohsuhs+lL++hGaSap+Pphh.shctDtctpptFtcph ...............................hppchshsY..ohsuhhpLL+.RtGa.p.hpP+sp.PtK..tD..s.pp..t......................... 0 28 108 155 +13435 PF13593 DUF4137 SBF-like CPA transporter family (DUF4137) Coggill P pcc Jackhmmer:C2M6P1 Domain These family members are membrane transporter proteins of the CPA and AT superfamily. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.84 0.70 -5.33 59 1645 2012-10-02 17:06:44 2010-11-29 16:51:02 1 8 1426 0 476 3347 1338 293.20 43 89.90 CHANGED chFhlhLlsslhLAthhPs..h........Gpssshhp.t.hssshu.lullFhlpGhpLupptlhsGhtpWRLHlhlhhhoFllhPllsh.ulshhht..s..h...ls................................st.....lhhGhlaLssLPoTVpSuluhTohAtGNlsuAlssAuhSsllGlFlTPlLl.tlh.h...........s.ss...............ssshs.........htpsltplsLplllPhllGQllp...sh..h.s...p.......................aspp..p.cp.h..........lphl....DpssILLlVYsuFSsAhspGla..pplsh..hsllslhslshhLhhlllshs....hhhuRhL.t.............................as+............................pDpIslhFCGS.pKSLA.GlPh..usllFs.sts......tlGhllLPLhlaHthQLhlsuhLApphs+ ...................................................................................D.FhlsLlssVll...Aoh.h.Ps...p............G.s.h...s...s.hh...........................h...o...s.hA...I........u..LL.F....F.h...a...G....A......+..L..S......p.....c..tlls..G..h.t.H...W.R.L...H.L..hV..hs.sT.....Fll...FP..ll..G.l.........h.....h.....t.....h.....h.....h.....s.....s..........ls...............................................s.........LYhGhLaLChL..PuT..VQ.SuI.AFTShAtG.N.VA..A..............AlsuASsSsLLGlF.loPLLV....uLlh...............s.sp..........................Gus.s.s..ht.p.....ltp.Ih...lQ.L..........L...lPFl...lGpL......R.....sa......l..u.......s..............................a..l..pR....p....+p.h........lshs.....D.p.....s.....S.....I.....L.....L.V...V.Ys..A.....F.....S...-..u.h.....s.p..G.lW...pp.luh.............ssL.l.h...l.l..ll.....s.h.lL.L.s.l.l.l..s..ls.......hh.hu.+.h..L...G...............................................................F.s..+............sD..c.I.s.I.....lF..CGS..KK.S.L......As....G..lPM..As..l.....LFs..s............slG.h.hV.LPL..MlF.H.QlQ....LhlCuhLApRat............................................................................................................................................... 0 127 265 386 +13436 PF13594 Amidohydro_5 Amidohydrolase Coggill P pcc Jackhmmer:C2M675 Domain This family of enzymes are a part of a large metal dependent hydrolase superfamily [1]. The family includes Adenine deaminase EC:3.5.4.2 that hydrolyses adenine to form hypoxanthine and ammonia. Adenine deaminases reaction is important for adenine utilisation as a purine and also as a nitrogen source [2]. This family also includes dihydroorotase and N-acetylglucosamine-6-phosphate deacetylases, EC:3.5.1.25 These enzymes catalyse the reaction N-acetyl-D-glucosamine 6-phosphate + H2O <=> D-glucosamine 6-phosphate + acetate. This family includes the catalytic domain of urease alpha subunit [3]. Dihydroorotases (EC:3.5.2.3) are also included [4-5]. 21.50 21.50 21.50 21.50 21.40 21.40 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.77 0.72 -4.15 1248 3712 2012-10-03 00:45:34 2010-11-29 16:53:58 1 83 2104 48 1323 23661 7737 75.40 25 16.55 CHANGED lhlcss.+I........st..l...................t......tt.....t................l.D.....u.p.G..p.h.lhPGhID..sH.....sHh......................................hhtsh..tphttt......h.....................................sGs...............................T...T..............l .....................................................lhlpsG..+I....................hs.l......................stt..hsh...............ss.sp.........................ll..D..............s...s..G...p...h..l..hPGhID..hH.............sH.h................................................................h.ts.h..................h.................................................ttGl.....T.o...................................................................................................................................... 0 575 1004 1193 +13437 PF13595 DUF4138 Domain of unknown function (DUF4138) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4780 Family Based on Bacteroides thetaiotaomicron gene BT_4780, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 25.50 25.50 25.60 25.80 23.60 25.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.40 0.70 -5.42 30 514 2010-11-29 22:10:22 2010-11-29 22:10:22 1 2 140 0 63 427 8 231.40 40 82.80 CHANGED plpVshsKTsHllFPpsI+YVDlGSspIlusKApss-NllRlKA...................sspsF.s-TNhoVITtDGphY..sFslhYsspPsthshph.....................phtssstpshhpphss.sssthpphhhslapps.+cl+plss+paG.....lphhLpuIYspsDhhah+hslcNpSNlsYDIDhlRFKlsDKKhsK+TssQphtlpPlhshNthp..lpsKpshRsVaslsKhTlPD-KlLplElhEKs.GGRphslpIcNsDllcAcsl .............................................................................s.lpVoasKTsHlIFPus.lRY.V.DlGSs.p.l.lAsKA-ssENllRlKA...................sscsF.sETNhSVI.TpDGsaY..sFNlpYss-PthLsh-h.p......................................................................................sphspsttplY...hp-lss.pS.....shh....h....ph.h..hpsI..a.pps........pRpl++lus+paG.....hphhLculYscsshlYh+hplcNpoNlsa-lDalpa.K.l.sDKKhsK+TAh...QE.hl.Plcs.hs.st..lsu+ps.R.sVasltKFTlP-DKhLhlEl.E+s.GGRp.ohhl-scDllpAc.h.................................. 0 18 54 63 +13438 PF13596 PAS_10 PAS domain Coggill P pcc Jackhmmer:C2M6E2 Domain \N 22.10 22.10 22.10 22.10 22.00 22.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.14 0.72 -3.77 87 1751 2012-10-04 01:10:46 2010-11-30 10:56:59 1 160 1165 5 368 2524 174 107.80 26 18.13 CHANGED lsslLsohsl.tlsFlDcs.plphass....s...t....p....plhp.hhssslGRslssh....p...sp.....pt..hstl....cp.ll...pplcsspp..s.p.hchhh....s....p..s....s+h.hhh.phhsh+spsuphtGll.shhDlp ..........................hshlLpshPh..-lTa.lDcssphp..a.ass......s.....t....p.....hl..ap...R...s...s.s..p..l..G.+.....s..l.p..p...s........H.........Ps.......tp...hcp...V.........cp..ll.......p.s...l..+.sGpp.......c..h...h..chhh........s.......t.s......Gp..h...h..h..h..p..Y..t...A..h.+..s.p.s...G..p.atGllEhh.Dlp....................................... 0 125 250 306 +13439 PF13597 NRDD Anaerobic ribonucleoside-triphosphate reductase Coggill P pcc Jackhmmer:C2M7Y9 Family \N 25.00 25.00 25.80 25.40 24.70 24.80 hmmbuild -o /dev/null HMM SEED 546 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.94 0.70 -6.43 45 3413 2012-10-01 23:28:04 2010-11-30 14:10:29 1 23 2857 5 604 2408 158 537.40 35 83.84 CHANGED ptchclspsu.Nhs.huht.....tlsp.....hltphahpp...l.s.s.E.hs.cAH.ppG-laIHDL..sh..h...ssYCsuaoLpplLpcGhsslsG.....pS+PP+HhsoAhuphVNFlhshQspaAGAQAhssF-TYhAPFlRtDt.hsYp..............................-lcQtlQchlaNLNhPsR.hGhQoPFTNloh......sp.s.sh.....cs......lhG.sth...........................h.sttht.....s.hp.Ehchlp+Aah-lhhpGDupGpsFTFPIsThslppc...............ppssspt..lachsAKhuh..htsah.............sph.........-Pss......stuMCCRLpl-lp-htp...us..................................uLh...uus.tTGSIuVlTlNhsRLuhpucs.......ccc.t.h.tcL.......pc.h.p.l.s.......t........Rp.hlpchhp.....pGlaP.....a.hcph......Lhs..............L.cspFsTIGlsGhsEss.t.ph..stp..slp.......s.cuc.......phshclLcal+pphpchpccTGhhaNlEtTPAEusuh+hA+tD+c............c..s......lh..........psts...hh....sshats.ssphslshh-chh....hc.pc.lpshhoGGollHlaL.GE.p.h.sscuhtp.Ll+phh.p.hclsYholTssholCsspGalssc.ptpCPpCGpct.............sEhaoRlhGYh+.PVpu.......aNtGK+p.EatcRphap ...........................................................................................................................hh..s.NsNcsu.p.h.h.....sp.tc..........hhus.......hssc.hhhpp.....hLP....pc..ls.pAHpcGDIHhHDL...Da...h....hhsCshlshcshLppGFphGsu........plcsPKSIpoAsu.hspIluplsup.aGGpohsphDphLAPas...ptsh..pchcph.t............................................................................pths.tpsp+-hhpAhQul.Ypl....NThho.usGQTPFsolsa........GlsT.sh..............................................................................+.h.Ip.culLpsp...h...tG...l...u.p...t.+....T.s.lFP.pll..F..slccGh.Nhp..........................s.s.s..NYD...lhpLAhcsusKRhaPsl...................................................-h...........sssh........hs.sMGCRohLts....p.tp.....sp..................................tl..........tpGRs....NLGVlolNLP...R.lA.lcupG................................-ts....t...F.hcl..........-+.h.t.l..s.......p.............................+t.h.h.pc.htc............stshP......h..hh..h......hhphhtt.p...........t.s...hh..+p..u..p..h..o..l..GaIGlhEsh.p...ha..s..sp.....hhp.............st.pu.+.......thslpIlcch+ptsppap.........cc..h..Gh..tFSlhuTPuEuLssRFs+.hDpc......................c..FG.....ll...........slTD+s....YYTNSFHh.s...V.pp.p.sssa-Klc....hE.ts..a..h.ssGGaIpYsEh.sp..p.t..N.cAlcs.lhcauhp..+lsYh.uhNsslDpChp....C....G.........a...........p..........u.........t......c........s..........h........p..........CPp.......CGscc...........ttssVh+RssGYLs....sssp...................astG+pp.ElppRV+H.h............................................................................................................. 0 214 398 520 +13440 PF13598 DUF4139 Domain of unknown function (DUF4139) Coggill P pcc Jackhmmer:C2M2S6 Family This family is usually found at the C-terminus of proteins. 25.50 25.50 26.10 25.60 25.40 25.40 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.68 0.70 -5.12 85 674 2010-11-30 15:48:01 2010-11-30 15:48:01 1 18 418 0 391 713 87 256.30 16 54.27 CHANGED lcloYhlss.A...u.WpPhY-l+ls..s.tp....sp.lplshtAtVpQpTG-DWs.sVpLsLSTupPsps.s..ssPpL.ts.Whlph...hpP......s.h......hst...hssh...........s...ttht..t..tt.h...tts.................s.......h..tt.t..h.....tptsht....s.tlppsshu...ssapls..t.holsusupst.......plslsphshsuc.hphhssPph.s.spAaLhAphsss......s.shslL.sGpss.lahDusaVGpsplsh.hss.......Gpchc.lsFGsDctlplcRphh.c.c..p.ss..p.G.hl.......sp.ppp.h.s.hphplpVcNtp....spshp.lplpDplPlSpscclcVph..................p...p....s...s...................ts..............s.hc............spc......GhlpWclsLssGp.spplphsaplcaPc ...................................................................................................................................................lpY.....h..t.s.....W.s.Ychtht......t...t......................t...htl.h.u.l.ptou.sWp.sstl.lsosps............t.t................s.....h.....s....hth.......................................................................................................................................................................................................................................................................................................t..t.t..t.t............htaph..t....hs.l...sstt..............hh.l..p.ph.....s..p..h...h........h.....h..........h.P.th.....p..............t....s.....h...h..s....p..hts...................t..thsl...G..st.lh...........h.....s.....s.....t......a....l....up.s.lt..hss.......spphp..ls..hG.-.tl..p....lp......hp.....p..t......p..tp..........ts.hh...................st...t..p.........hthphtlpNtt.......tpshp.l.l........-th...Ph...s...t....p..ttl.lt.............................................................................................................................................................................................................................................t.....t.h.athtl.stt....t..h.hth.hp............................................................................................................................................. 0 155 261 359 +13441 PF13599 Pentapeptide_4 Pentapeptide repeats (9 copies) Coggill P pcc Jackhmmer:C2M218 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.03 0.72 -10.59 0.72 -4.17 86 6775 2012-10-03 04:02:01 2010-11-30 16:17:53 1 256 2008 57 1619 10345 2693 77.40 16 36.50 CHANGED sphpsspFpssp.h........pp.ssFpss.....plcsssFsssp.h..........hpss..........Fpssp.....lpsspFpssp.h...pss....pFpssslp.sspF..p..s......p.lpsss.F ...................................................................................................tspF.p.ss.p.h................pt.s.sFp..ss...............plp.s.ssFp.psp.h.......................ppss...................................F.p.s.ss...............Lp.s..ssFp.s..ss...l............pps...................sF.p.s..sslp..ssph.p..t................................................................................ 0 562 1041 1333 +13442 PF13600 DUF4140 N-terminal domain of unknown function (DUF4140) Coggill P pcc Jackhmmer:C2M2S6 Family This family is often found at the N-terminus of its member proteins, with DUF4139, Pfam:PF13598, at the C-terminus. 27.00 27.00 27.00 27.00 26.90 26.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.27 0.72 -3.58 88 387 2010-11-30 16:18:01 2010-11-30 16:18:01 1 13 299 0 212 388 20 104.20 21 17.80 CHANGED sVTlassu...ApVsRp..us..l.s...l.ts..Gp..p..plhhpsL..Pssl.st..sSl..pl.s...stus.......ssl...tslp....hptp..h....hp.......ttsst.......plpplcpplcplppphstlpschs...sh...ptphphlp .........tVTla.s.sp...AplsRp.....sp.l.s...L..ts....Gp......s..c.............lhlp.sL..s..ssl..ct..sSl..+V.p...upus........ssl.....hslp.........hptt...h..................ht..........ttt.st............................plpplcpplcplcppht.......tlpsphsshptphthh.t.............................................................................. 0 74 133 193 +13443 PF13601 HTH_34 Winged helix DNA-binding domain Coggill P pcc Jackhmmer:C2M278 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.44 0.72 -3.95 35 545 2012-10-04 14:01:12 2010-11-30 16:59:13 1 7 427 1 221 2120 231 79.50 32 70.80 CHANGED lRLAlhShLhu...V-cA-Fshl+-phpuTsGNLSlplcpLcpAGYIplcKsFpG++PpThh+lTspGRpAFpcYlcuLcphl ..................................RLtlhuhLhs........scp.s.s.F...spL........+.c...hl..s....l.Tc.........GNLop+lptLccsGY.l.p..h...c..K.s....a..t....u.......+....+..............P.p.T.hhplTsp...G...+.......p...A....app....alpsLcph............................... 0 86 153 191 +13444 PF13602 ADH_zinc_N_2 Zinc-binding dehydrogenase Coggill P pcc Jackhmmer:B3CU33 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.83 0.71 -3.24 420 4473 2012-10-10 17:06:42 2010-11-30 17:15:43 1 149 1864 6 1804 17425 4479 130.70 21 25.39 CHANGED LGA-pll..Dhp.ptth....................h.t..p.hD....lV...........lDs.lG....spsh..pts........h..hh.......s..Gp.h..lth...............................hhthst.t..tthphhhhh.....................t.......ttp.L.....pplspLlcsGp....l+.shls.ps.aP..Lsc..sscAachl.csG+sp.GKlVl ...............................................................................................................................................................hGAptsl.......Dap...ptth............................th.h.pthD.....h.l.....................hDs.hG.....spth....pts.....................h..phl.......tss..Gp..l......lsls.s.................................................t...hth......t..t..t...h..s..h.ph.thhh.............................ht..........................tpp.L...............pp.l.s..p..L.....l....p.p.G..p..............l.....+......s........h..l...s...p.......s...a..........s........l...pp..........st...cA....a.p....hl....c....s......G....+.s...p...G.KlVl............................................... 0 552 1141 1527 +13445 PF13603 tRNA-synt_1_2 Leucyl-tRNA synthetase, Domain 2 Coggill P pcc Jackhmmer:B3CR81 Domain This is a family of the conserved region of Leucine-tRNA ligase or Leucyl-tRNA synthetase, EC:6.1.1.4. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.99 115 4853 2012-10-02 18:00:56 2010-12-01 14:32:33 1 35 4641 29 1125 8298 4984 183.30 43 22.35 CHANGED +NWIG+ScGs-lpFplpst.p...p.......plp.VaTTRPD..TlaGVTalulAPEHPLspcl...scppsplpsFlccs+ppsshEpshtstE..KcGl.hTGhhAlpPls.Gcc......lPlWlANaVL.h-YGT.GAVMuVPAHDQRDa-FAcKYs..LPIp.VIp...sts.tp..p.p........ppAa..s-.cG...hLlNSu....pFsGls.sppAhptIschLc ................................................................................................RNWIG+ScGscls..Fs..l...p....sp....s......p...........................plp..VFTTRPD..TlaGsTah........slA..P..E.........H..s..L..s.p.pl...........................scp...p...s...t........l.......t...s.......a....l......c..c....s.p...p...p.....u.........s.....h.E....c...p......t......s..t..E.........K..p..G...V.....hTG.....h.....a.A....l...p.P....l...s....Gcc.............l.P....lWl.A...s.Y.VL..h..s........Y.GT........GAVMAVPAH.D.pRDa.EFA.p.K....a..s......L..s..I..h....VIp..............sss................h................................tptAa....Tp...cG......h.h..l...N...S....u............h.sGLs...hp..pAhptlhphL............................................................................................................ 0 373 737 963 +13446 PF13604 AAA_30 AAA domain Coggill P pcc Jackhmmer:B3CTP0 Domain This family of domains contain a P-loop motif that is characteristic of the AAA superfamily. Many of the proteins in this family are conjugative transfer proteins. There is a Walker A and Walker B. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.61 0.71 -11.30 0.71 -4.73 84 5200 2012-10-05 12:31:09 2010-12-01 15:00:29 1 89 3798 11 1137 10491 2582 192.00 29 21.89 CHANGED pLsspQtpAlcplhsssphhsll.GhAGsGKToslpu.sppshcs.............p.G.hpVhGhA.oupAAcs.Lpcs............hG.......lpupTlushltthsp..................................hssps.....llllDEAGMVuocphtcllptspc.sGA.+llLVGDspQLtulpA.GusFctltcphs....sscLsplhRQc......ssht+.pAsptltpGcssp...uLshh.tppGplptsss ..........................................................................................t..t..Q.t....t....A....h...t....t.....s.....l.......t.......p.......p..................l.....h..........ll..oGG.PG..TG...............KT....T....s.......l...p..t.......l.......l..t..h.h..tp.............................................................................p...s...h....p...l...h..L....s.....A......P....T......G....+...A....A.....p....+.....ls...Es....................................................................h..G........................h.p.A...p.....T...l..H.....+....l...L.t....h....p..s.s...s...p..t.........p...............................................................................hh.s.t.s............lll..l...DE.....uS........M...........l..............D...........h..............h.............h.............h............s...........p.............L.........l...........p........s...........l.........s.......s.......................s.........s..........+.........l.lll...GDp.c.....Q.....L......s.......S........V.............t........s.......G..........s..............l.............h...............t...........-..........l...............h..............p..............t............t..............................h.l..p..p..hh.Rpt.....................h......s..h........................................sh.......................................................................................................................................................................... 0 339 718 942 +13447 PF13605 DUF4141 Domain of unknown function (DUF4141) Ellrott K, Bakolitsa C kellrott Bacteroides thetaiotaomicron: BT_4772 Family Based on Bacteroides thetaiotaomicron gene BT_4772, a putative uncharacterized protein. As seen in gene expression experiments (http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE2231), it appears to be upregulated in the presence of host or vs when in culture [1,2]. 30.00 30.00 30.20 30.20 29.90 29.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.78 0.72 -4.77 24 314 2010-12-01 19:13:44 2010-12-01 19:13:44 1 1 144 0 39 229 13 53.60 45 25.74 CHANGED +p+lhhlhhshh.hhsspA+AQWVVTDPuNLAQuIlNos+pIVpTSsTApNslpNF ...........hpplhhlhsssh..lhsspApAQWVVoDPuNLAQuIlNusKpIlpTSpTApNhlpsF.............. 0 10 28 35 +13448 PF13606 Ank_3 Ankyrin repeat Coggill P pcc Jackhmmer:B3CUD7 Repeat Ankyrins are multifunctional adaptors that link specific proteins to the membrane-associated, spectrin- actin cytoskeleton. This repeat-domain is a 'membrane-binding' domain of up to 24 repeated units, and it mediates most of the protein's binding activities. 20.00 17.20 20.00 17.20 19.90 17.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.28 0.74 -7.80 0.74 -3.13 248 446 2012-10-02 12:10:21 2010-12-02 11:06:47 1 179 202 0 271 70923 5071 30.10 27 4.47 CHANGED p..s..p..os....L..p..h.A..st....p...s....p.....h...p.h..l.chLl.p....p....s..s...s...lst .........................sp.Ts....L...H...h.A.......sp................p.........u.......p................h......-.l....l.chLl.p......p.......s..s....s.................................. 0 129 176 221 +13449 PF13607 Succ_CoA_lig Succinyl-CoA ligase like flavodoxin domain Bateman A agb Jackhmmer:A3JZV1 Domain This domain contains the catalytic domain from Succinyl-CoA ligase alpha subunit and other related enzymes. A conserved histidine is involved in phosphoryl transfer. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.21 0.71 -4.57 213 2340 2012-10-02 00:59:22 2010-12-02 13:50:07 1 30 1654 2 873 4427 2578 135.40 36 17.93 CHANGED sGs.lullSQSGu.luss.lhshutpp..s.lGhSphlShGN.ps.DlshsDhlc.ahsp...Ds.pTcsIhlYlEulpc..u.ccFhpsA+pss..tp.KPllllKuG+.....optGspAA..tSHT.uuluGsstla-Ash+psGllpVcshc-....lh-suph..l ...............GplAhlSQSuAluss..lLc.hApp+......s......l.........G...FShh.lol.Gs..ps..D....l...s..h..s..-..l..L-aL.uc....................Ds..cT..psIlLYlE.....s..l.p.....-.....u...R+....F.h.s...A.A.....R.pA.u....+....s..K...PllllK.uGR...........os.s.u.....t....c....h..s......sHs....u.u..h........A.....G....s....D.....s....s....a....-....A....s..h...p...cuGllRVpshcE....Lhssscsh............................................................................ 0 284 570 741 +13450 PF13608 Potyvirid-P3 Protein P3 of Potyviral polyprotein Coggill P pcc Jackhmmer:P04517 Family This is the P3 protein section of the Potyviridae polyproteins. The function is not known except that the protein is essential to viral survival. 25.00 25.00 25.10 25.10 22.00 24.60 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.77 0.70 -12.53 0.70 -5.36 65 976 2010-12-02 15:02:58 2010-12-02 15:02:58 1 24 129 0 0 1226 0 381.00 35 16.23 CHANGED shchLIKula+Pchhppllp--PYlllhullSPslLlshassuulEpAhphWlp+cpslutIhshLpsLAcKVShAcsLhpQhplIppsuspLhchlpss.pss.h.u.h....phuhph....Lphh..tpcscssppLhpsGFss.hp.pphhphhEK....pYhptLcppWp-LohhpKhphhhpsp+hptthpptLp.p..sts-hpst.hshSspthhspstpphpsshs.....pshppspphh....psht.pphhshslp.slpthhsDlhphlNlhlllSlLlslhpslpshlppt+ph+tp.ht.thcpccptpplttha.h.a....sphp.p....c.t...ss...ptpFh-a...lc.....phpPcLhphh..p.hh....tp..pp..VpaQu.KsssptpLE+IlAFhuLlhMlFDsERSDsVaKlLsKlKslhuohspc...............Vp..............a.Q.S..LD..-Its.hh--KphTIDF-lsssttss.sssh.-.sTFppWWspQLppNRslPHYRTpG ................h+hLl+ula+P+hhppll.p-PYlllhullSPulLhuhYpssthEhuhphWIpccpslAhlhslLptLAt+VohupsLhtQhplIppsutpLh-hhtss.p..h.u.h.....hu.hp.h....Lphh..hppppssppLhptGass.hp.pphhphhEK.....Yhp.LcptWp-LSahEKhuhhh.t.+hphthpc.lh.p..stsDltuh.hshSspshhtpshppl+sshp.....thtpchpshhpt.phttl.shhlt.ththhhsshhphlshlllhSlLlplsushpshlppp++hKt.htthctccp..tl.hhathh....tpht.s....ptPohpcFhpalp...thsPcLhphh..ps.h.....tp...psVhHQu.K.pssptpLEpllAFhALlhMhFDsERSDsVaKlLNKhKulhuohspp...............Vp...............a.Q.S..LD..-.l.s..h--+phhlDF-Lppst....p.....-..hpFtpWWspQlp.s..slPHYRopG....... 0 0 0 0 +13451 PF13609 Porin_4 Gram-negative porin Bateman A agb Jackhmmer:A3K5H9 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.28 0.70 -4.91 115 4742 2012-10-03 17:14:37 2010-12-02 15:24:35 1 10 840 14 1423 6841 1393 323.20 20 89.64 CHANGED As.s....sls....u....s....uus.A.tA.ps.....................sVslhGthc..hslt...................hhs.......s.tt....s..............tsts.....t.tssht....ss....s..cl...........shps..........ptphs...sGhs....h..tuth..pL-...................ss....t........s....................................................ss..s......sh..s..............scpu.....hlu..l.su..saGplphGc.......p..ts................sh..pphhs....th....s..h.t....hs.s.hssh....t................h..shs....ss..s.s...........................................................s......................sttsssl.....hhhs..ss......hu..G.h.shussYshsp.ss..ss...................................................st......sphh......slss.s.Y.s...h.u....shssu.su.....as..................psp.sss.......................phhshu.sp..hsh...us.h.............slsus..........as.......phpssssht..s.......................................scts.sht.....luss...a.sh.....s.s..h.s.lsssY........sphc.s..........s....................sssscspthslussYsL..up..pssl..aup..huh..h.csc...s.s .....................................................................................................................................................................................................................................h..s.shs.....h..h....u..s.s...A...tA..po...................................................oVoLYGhl-..sulp.......................................ah.s................stts......s.................................................psht.......h...t.s.s.sh.......ss..........S.Rh..............Gl+G..........sE-.LG...sG............h..tAhF..pLE.....................ssh............s............................................................ss...s...shh....................sRpA......aVG...Lss..sa...G...p.ls......hGR................p..hs................................hh.....s.hhs......................s...ht......sh..s....hss.h......t..........................h.....h..hs.......ss.............................................................................................................................s.......................phsssl......tah.o....ss.........au.......G..h.phs.ut..Yu.hus.ss..st.....................................................................................................t..tspsa.............uh.uh.s.Ys......hu.......s.hslu...su.............Yt.............................p.pssss.........................................tp.ptht.h...u..us..Ysh...us.h..................pl..su.s....................................Yp...........ps.p.h.p.s.s.hs..s..............................................................................tchs.sht....l..us.p......Y...ph......s.s...h...s..l....t..u..u..Y......thscs...............ts............................................................................ssssp...h....pp.h........s...l..u....ss..Y.t..L..SK....+Tp..l..Yst..hsh...p....s................................................................................................................ 0 153 515 976 +13452 PF13610 DDE_Tnp_IS240 DDE domain Bateman A agb Jackhmmer:A3K4H6 Domain This DDE domain is found in a wide variety of transposases including those found in IS240, IS26, IS6100 and IS26. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.56 0.71 -4.21 25 3068 2012-10-03 01:22:09 2010-12-02 16:00:46 1 16 1006 0 405 6167 734 118.80 40 59.33 CHANGED upsW+lDETYl+l+GcWpYLYRAVDpcGp.slDahLop+RDttAA+tFh++slcptt...tpPcsllTDphsuYstAlcclt...t..ch.................tplpphpsK.ahNNhlEpcHphlKp+h..pshpGF+ShcsApphluGh-shpslp+sph ..................................h..pW+h.DE.T..Y..l...K..l..p........G.+..W..t..Y..L....Y..R..Al..Ds............c.Gp..Tl.DhhLp.p.p.R.ss.p.u.A.h.tFl.p.+....l....l.pph..........stP..p...h..l.....s..T.....D......p.......u.s..u..h......t...t......A...h..t.p.lh....p...th....h......................................................................t.h..p..Hp....p.....K....Y...hNN...lIE.pDHthlK.t.h..h..........ht.a...p.....S...h..p..o.A....s.h.l.p.G.h.E.s.h.hslh+tp.t............................................................................................. 0 90 222 337 +13453 PF13611 Peptidase_S76 Serine peptidase of plant viral polyprotein, P1 Rawlings N, Coggill P pcc Jackhmmer:Q65730 Domain This family is the P1 protein of the Potyviridae polyproteins that is a serine peptidase at the N-terminus. The catalytic triad in Swiss:Q65730, the ssRNA positive-strand Brome streak mosaic rymovirus, is His-311, Asp-322 and Ser-355. 27.00 27.00 27.00 27.00 26.70 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.79 0.71 -4.00 6 57 2012-10-02 13:45:52 2010-12-02 16:11:56 1 12 13 0 0 162 0 123.60 34 5.04 CHANGED Esll-.clsssccpsIc............Kp..ALhKp+pp.+l..VAN...lsDLsppLopICsEpGIPIl.lDppKR+AI.Ph...........V+L+Hlht.pl...tpDDhat--RhFLEHhst.ppshRssc..+Is.p.VRPGWSGsVI ...........................................lh-.cl.hhc.hch+.....................p..A.h+ch+...p...ppl...lus....lsDLhppls-IChEcshPIphIsssK++.sl..sh...........V+L+Hsht...ch....hs...tpDDMh.s.-RtalpHhsh.ttshp.sc..KIppp.V+PGWSGsll.................... 0 0 0 0 +13454 PF13612 DDE_Tnp_1_3 Transposase DDE domain Coggill P pcc Jackhmmer:B3CSW4 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contains three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. The catalytic activity of this enzyme involves DNA cleavage at a specific site followed by a strand transfer reaction [3]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -10.75 0.71 -4.48 38 1053 2012-10-03 01:22:09 2010-12-02 17:12:34 1 4 328 0 267 2914 378 123.10 32 58.29 CHANGED hGcsTGIuFIDSTslsVCHNhRI.c.HKVFcGlApRGKoohGWFaGFKLHLllNcpGElluhplTsGNlDDRcPl..ptLscsLhGKLauDKGYISppLhcpLhpp.GlpLlTplR+NMKspL.hshhDKhhLRKRulIETlsDpLKNlsQIEHSRHRSl .............................................................h.hlDSh.l.l...Cp.hR...t....t.+h.....h...p....s....h.....A.p.h....G..h......s..s..h.....t.......a.......a...a................G...aKlHh..l..h...s..p.p.G...l.h....s...a..h.l........T..u.sscD..hpsh..........h...h...........p.............t.....h............h.............s............h..........l.h........u..D.........cGY...l.u.........p.p...L...h..ppL...h...t........p......u....h............p...l........h........T......................h.......+.........p.........N......M.........c..........t..........t..............h...................p..................c....c.........h....h....h.t.p....Rth...IEoh..s.Lhp.hph.p.hh......................................................................... 0 82 137 182 +13455 PF13613 HTH_Tnp_4 DDE_4_2; Helix-turn-helix of DDE superfamily endonuclease Coggill P pcc Jackhmmer:B3CR78 Domain This domain is the probable DNA-binding region of transposase enzymes, necessary for efficient DNA transposition. Most of the members derive from the IS superfamily IS5 and rather fewer from IS4. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.69 0.72 -8.09 0.72 -4.43 117 1593 2012-10-04 14:01:12 2010-12-02 17:48:36 1 20 366 0 326 1064 32 50.00 44 29.09 CHANGED p+L.shp-plllsLhaLRpshohppLuhtFsl.upoTss+hlpphhshLttphst .........+L.shED.LhhTLpYlR-Y+TY.cpIAscF.G.I.p....ESslhRtspaVEssLspsu..h.......... 0 161 288 317 +13456 PF13614 AAA_31 AAA domain Bateman A agb Jackhmmer:C4FZU1 Domain This family includes a wide variety of AAA domains including some that have lost essential nucleotide binding residues in the P-loop. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.60 0.71 -4.29 61 7392 2012-10-05 12:31:09 2010-12-03 11:32:26 1 25 3411 29 2053 16785 5368 138.30 24 29.84 CHANGED cllulhSshsphGposhAlsluphLuppt.t.........VLhlsh-thsu.tthh.ptttp.................sls-lLhhht....t.....plsshlh..ph....sshshlsshpssp-hptlstpchtpLlpplpp..tY-..hlllDlushhpthhs...lLphscplhlssppsshutpp .....................................hlhlsSs.p.sG..G.Koo.lusNL.A....hsl...Aptut+.........................Vll..l..D.u..D.....h.....p....p....s......s..h.......s......p.......h....h...s...h.ssp.........................................................Gl..s....p....h.l....ts.t......................................................p..h.p..p.h.h........th.....................sh.h......l......h......s.........s........s......................s.........s.........s......s.........s.........p.............h......l..........t...........s...........p.........p..........h.............t.............p............l.........l......p..........hpp...............pa-............hlllD....s.s.s.........h..h........s.s....s....s.......h....l...h.p.....h...s...ss.h.lhVsp.t......pt.................................................................................................................................................. 0 671 1329 1734 +13457 PF13615 Racemase_4 Putative alanine racemase Coggill P pcc PF09739_manual Domain This is a family of eukaryotic proteins which are putatively alanine racemase. 22.40 22.40 23.00 25.70 21.80 20.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.26 0.72 -3.77 39 179 2012-10-03 05:58:16 2010-12-03 16:56:07 1 7 148 0 127 188 5 102.40 37 17.07 CHANGED lsaL.sp.sLt.GDsLAAEalLLaLlSp......VhsR................ss.shs.lGphoLNL...sshstps....................hsppLtphlppllPtsthlslolpsLNs...hphs.P++Dhpss+LhoGlLQLu .............h.taLop.sLh.GDpLAAEYLlLHLlSp......VasR................p-..shs.lG+hoLNl...oshspss.................................................................sasppLhpllppLlPtohhlshTl-shNp...hphh.P+......KDYpsN+....LhoGlLQLs.......... 0 58 76 106 +13458 PF13616 Rotamase_3 PPIC-type PPIASE domain Coggill P pcc Jackhmmer:B3CTU8 Domain Rotamases increase the rate of protein folding by catalysing the interconversion of cis-proline and trans-proline. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.39 0.71 -10.55 0.71 -3.83 68 3691 2012-10-02 13:30:10 2010-12-03 17:11:49 1 35 2370 6 991 7167 3075 115.00 30 30.29 CHANGED hplhKllsppp...................hsD.SlpspHIhls.................spsttcA.+ppADSIhsslp.sGu.cFssLA+caStD.t..ttpGG......-lsWhs.....tu......phstpFtsslhss.psucl.t.slcoshGhHIlp...Vh-++ ......................................................................................................ttt..............................tp..phpspHILlp..s...................................................c......pp....A....cp.l....h....p....c......L..........c....s........G..........s......c.........Fu....pLA+p.h..S..p..D..s...u......s......t..p..p..G...G...................-L.G...a.hs............tu.................phs.s.t.F.c..c.A.shsL....c...............h.....G.....p....l......o....s.......P......V......+.....o....p.....a....G......aH..II+lp-h....................................... 0 308 613 823 +13459 PF13617 Lipoprotein_19 YnbE-like lipoprotein Bateman A agb Jackhmmer:B8H670 Family This family includes lipoproteins similar to E. coli YnbE Swiss:P64448. Protein in this family are typically 60 amino acids in length and contain an N-terminal lipid attachment site, which has been included in the alignment to increase sensitivity. The specific function of these proteins is unknown. 22.60 22.60 25.70 25.10 20.60 19.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.87 0.72 -4.34 77 733 2010-12-04 11:44:36 2010-12-04 11:44:36 1 2 727 0 121 293 18 58.00 58 90.54 CHANGED hhhhhshhsshhlu.uCo.......Pplclp.sPc..cPIsINhNVKI-HEIclKVD+-l-sLlpspssL ..........hhlsuhhuohhLs..GCT.......PRIEVA.APc..EPITINMNVKIEHEIhIKsDKDVE-LLcoRSDL.............. 0 26 57 89 +13460 PF13618 Gluconate_2-dh3 Gluconate 2-dehydrogenase subunit 3 Bateman A agb Jackhmmer:B8GVK2 Family This family corresponds to subunit 3 of the Gluconate 2-dehydrogenase enzyme that catalyses the conversion of gluconate to 2-dehydro-D-gluconate [1] EC:1.1.99.3. 23.40 23.40 23.40 23.40 22.60 23.20 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.74 0.71 -4.00 170 695 2010-12-04 12:21:38 2010-12-04 12:21:38 1 6 483 0 300 712 280 143.80 22 62.23 CHANGED sspch.phlssls-tll.....Pps.ch...PuAt.....t......................................ssls.tFl-phlss............................spp......................................p..phhptGlst....l-phspppa..sps.FssLssppp.stlLpthptsph.........................................t..........stF....F.phlpshslpGaas.s.h......atGsps.hht.a................hPG ................................................................tpchshlpuhscpll.....Pp-..c......sGAh.....p....................................ssVs..tFIDpplss..........................s....t................................p.phaptGlss....l-phupppa.....scs.FspLs.s.t.pp.-plLpshppsph.................................................t.th..sphFF.shlhp.shpGaas..sPh..............asGsps.hhu.a........hP......................................................... 0 78 182 257 +13461 PF13619 KTSC KTSC domain Bateman A agb Jackhmmer:B8GXL8 Domain This short domain is named after Lysine tRNA synthetase C-terminal domain. It is found at the C-terminus of some Lysyl tRNA synthetases as well as a single domain in bacterial proteins. The domain is about 60 amino acids in length and contains a reasonably conserved YXY motif in the centre of the sequence. The function of this domain is unknown but it could be an RNA binding domain. 21.40 21.40 21.40 21.50 21.00 21.20 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -8.82 0.72 -4.33 89 426 2010-12-04 12:39:23 2010-12-04 12:39:23 1 7 377 \N 139 373 132 59.70 28 54.38 CHANGED lsS..SslpuluY....DspsptLplpF.....psGs....h.YpYhsVPtplapshhs..As.......ShGpaasppI.+....sp.Ys ............S.otltulsY....DspsphLclpF.....psGs......h.YpYhsVPtplapshhp....us.......StGpaapphI+.....sp.a.................... 0 31 81 109 +13462 PF13620 CarboxypepD_reg Carboxypeptidase regulatory-like domain Bateman A agb Jackhmmer:B8H1B8 Domain \N 28.50 28.50 28.50 28.50 28.40 28.40 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.92 0.72 -3.92 622 6052 2012-10-02 19:08:27 2010-12-04 12:41:19 1 501 1121 14 3219 18992 4276 80.30 21 11.43 CHANGED s..lpGpV...p.D.s...s.G..t...sl......ssAsVp...l...pt............t.....s...........t......s...ts......s...h.T..s.ss.Gpapls..tl...s.s.....G..s..Y..p..lps.....st.G..ap..stp.h.............p..ltl.p.s...s..p.....s..t.s...l...s...l...s......L...p ...............................................lpGpV.........h..-.s.....s..G.....p..........s.l.........s.sAs..Vp.........l......t....................t.......s......................................h.......s.....ts................s....tT......s.....s.s..Gpa..p...ht......t..l..........ss.......G.....s....Y...p..l.p.s...........s.t...s...G......at....stp.h...................................ltlt......s..t...............h..t.h............................................................. 0 1542 2401 2868 +13463 PF13621 Cupin_8 Cupin-like domain Bateman A agb Jackhmmer:B8H2W4 Domain This cupin like domain shares similarity to the JmjC domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.79 0.70 -4.88 90 2241 2012-10-10 13:59:34 2010-12-04 13:30:07 1 98 579 36 1539 2844 306 243.80 17 46.84 CHANGED spph...pchlspspPlll+u..hs..p..c..WP...shpt..Wpp............hcYLtpt.hss.htVp.....................sthts.t.t.sth......................................................................h.tt....s..hp.hhs..h.p........ttphshpch..................................lsp.lpptt..........t...................................sh.Yl.................t.....sss...l..sp.ph.st..l.t.......................p.....c...........psls...h.hs...thh......s..t.............................tss.........................................................lWlG..................................stp..s...p...oshHaDh......h-....................NlhshlpGcKcFhLhPPpphs............pL........................Yh.s............................shp......................ts...s..........................................................................................................................................h.....Sh...............lD.h.p................................................psDh..p.........caP..phpps.........ps.hhspLpsGD............sLalPuhWaHcV.............................cu.......h...............ss....hslulNa.Waptt..st .................................................................................................................................................................................................................................................................................................................................................................h.........h....tpPhll.ps..h.h....t.....t......hs.........s.hp.t..Wp.............................phh.ph...h.s.p.h...l..........................................................h.t...t.....................................................................................................................................................................t...t..................thph.sh....ppa.....................................................................................................................lph....h....pp....tt.....................t.............................................................hhhh.....................t......ppth........p...th.......s.t.l.h...................................................................................................t..c...............ths..........h..t....th.h.........s...pt......t..................................................................ths.................................................................................................................................................................................................ha.h..G.....................................................sts.....s...h...osh..Hh..D............h.p...............................s..h..h........s....l......p.........Gp..K...phhLa.s.P...p.p....t...t.....t.l.........................................a.t.........................................................s................................................................................................................................................................................................................................................................................................................................................................................................................h.s.......l.s..h.t..................................................p.sh....p.........pa.P.....th.tps.............ps..h.cs...hl..p.sG..-..................................hLal.....P..stW.aHt.V...........................................................................................................................psh..........................t......slulsh..a......h...................................................................................................................................................................................................................... 0 639 928 1286 +13464 PF13622 4HBT_3 Thioesterase-like superfamily Bateman A agb Jackhmmer:B8GZN3 Domain This family contains a wide variety of enzymes, principally thioesterases. These enzymes are part of the Hotdog fold superfamily [1]. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.85 0.70 -4.64 200 2392 2012-10-02 20:54:35 2010-12-04 15:19:22 1 22 1107 8 1028 4235 1639 249.30 20 84.50 CHANGED hsssWt............................sthsaGG.hss...ullhpAhppt...............tss.....................hthhplsssalt.ssssu.shpl.....pscslR.........sGRshshlpspl..hp.......sut.....................s..sspupsshhpsp.t..............................................sshP.sP........spp........h......................................h-h+hh......................................hsssss..hphWhRhp.ssh...........................................t.........lhhlsDsh..sshshthhs.........................................hss.slsh.....olp...............h.......................t....phP.t..sp...........Wlhhcs.....psphsssGh..uttpsplaD.................pp.G..hlupupQsshl ..............................................................................................................h..............................ttsaG..G..lhu............p.s....l....hs.Atcss..........................stt...........h.spSl.p.s.hF..lt..su..s.....s..s..P..l.hh.........pVcpl.R..............cG.+o..hsstp...Vps...h.Q.............pGc..........................................s..lhpspso.Ftt...tc.ps.................................................................t...sshP.................s.P-p....h...s..tt.....h.............h................................................................h-hR.hst...............................................sststts...h...thWhRsp..ssh.ss.........t.................................ph......t...........LshhuDhh.........sssh.h.hhtth........................................................................hhhs.....ol-h.....sha................................................h.ctP..hph..sc.........Wlhhpt.............pos.u.s.su..h..uhspuplas............................ps.G.pLlAsstQpuh................................................................................................................ 0 253 600 877 +13465 PF13623 SurA_N_2 SurA N-terminal domain Coggill P pcc Jackhmmer:B3CTU8 Domain This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.84 0.71 -4.21 6 236 2012-10-02 13:36:56 2010-12-06 09:20:05 1 9 235 0 66 1450 838 139.80 32 20.93 CHANGED LGKIRS..KGslLlhlIGLuLFAFlA.psh.RSCpus+sppRpQVGEVhGEKISlQDFQKhl-EYppsIK.hp.tp.s.....ppE..pQVKDtVWpphVsN+LlEt-AK+lGLTVT-pElQsVLptGsNPhLhQT........PFV.pQ..TGRFDssuLKpFl ......................................................................................LppIRs..K.us..l..L..l.l..l.I..G...lA..LF..AFlh....u.sh....hp.s.....t...s.........t....t...ps..p.p.....V.G.c.Vs..G-.p.Io....h....p-....a....p...p....h....V....-..p........h...s..t..h......+....hp..t.Gt.....ss......................hsp..p............s.......Q..l..+..cpV....WpphVpptll.pp-sc+LGlsV..octE.l.p.s.ll.....t........s..s.s..P...hl.hps..........Phh...ss.psGtFDtstLppal................................................ 0 27 54 61 +13466 PF13624 SurA_N_3 SurA N-terminal domain Coggill P pcc Jackhmmer:O66854 Domain This domain is found at the N-terminus of the chaperone SurA. It is a helical domain of unknown function. The C-terminus of the SurA protein folds back and forms part of this domain also but is not included in the current alignment. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.88 0.71 -4.24 9 2783 2012-10-02 13:36:56 2010-12-06 10:42:40 1 22 2338 1 800 3368 1934 155.70 23 29.90 CHANGED MhchIR+..p..s....c..hhtslhsllshhFhLh..uhtuh.pp.h.upp.ssVApVsGpsIphp-ap+.....chc....h......hpp.hp......sp..........tpch.....lppt.....lLcshIs+cLLhppAp+hsltVoDppVsctIpp.PtFQ..s.GhFstphYpphLtpsshostpaEp.l+cplhlp+ht ...............................................hpt........t...................hh.h..h..h...h.h..l..l.h.l.s.F..h...h.s............G...l..s.....uh.......h........h..............t.....s.......ss....s.......h....s...A....p....V.s...s...pc...I.op..s...c...acp......................t.hp............p......................p.p.p.hp.................p.p.h...........................t.t.pt....................l.+pp..................l.LspLIs.ct....L.L....t.p...h......A.........cc..h...s.l..s...l....S...........-......p....p...l......c.........p........t.........l.........h........p........h.........s........t.......F.......p......s.....s.......G.........p.F..s..p...p..p..a....p.t...h....L....p...p..................u..h..o.....s.....c......p.at......ptl....+pplhhpp..h.................................................................. 0 290 539 681 +13467 PF13625 Helicase_C_3 Helicase conserved C-terminal domain Coggill P pcc Jackhmmer:B0SDT2 Domain This domain family is found in a wide variety of helicases and helicase-related proteins. 32.50 32.50 32.50 32.50 32.40 32.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.37 0.71 -4.38 93 1240 2010-12-06 10:42:54 2010-12-06 10:42:54 1 14 735 0 510 1049 240 129.40 30 19.02 CHANGED sllVQuDhTlll.s.ups.s.sp.hs.c.tlushA-l.E.Ss.ttspsYRlTshSlhpAhsAGhsA-pllshLppaS...+hsVPQsLhhhlsDsspRaGpL+l.....tt.....s..u...hhlcssD.slLs.clhtstph.pslhhcc.l..uPssl ...........................lllQu.D.t.T.l.lL-s.s.p..t.sc.tA.c.tLsshAEh.c..ps..pplHsYRlTshSLasAhusGhss-pllshLtca...S.......+h.s.V.Pp..u......Lh.h.......hIs-..ssp.caG+l+l.....tp..................sthhlcssD.slLppl......Lpsptltshhht.p.ls.t................................. 0 192 352 454 +13469 PF13627 LPAM_2 Prokaryotic lipoprotein-attachment site Coggill P pcc Jackhmmer:C6QHI9 Motif In prokaryotes, membrane lipoproteins are synthesized with a precursor signal peptide, which is cleaved by a specific lipoprotein signal peptidase (signal peptidase II). The peptidase recognizes a conserved sequence and cuts upstream of a cysteine residue to which a glyceride-fatty acid lipid is attached [1]. 27.00 27.00 27.00 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.07 0.72 -7.18 0.72 -4.14 142 1185 2010-12-06 13:22:07 2010-12-06 13:22:07 1 2 1178 0 174 520 13 24.30 50 36.96 CHANGED hhlhhhhs..shsL.uGCGpKGsL.....Yh...P ........hlhllls....hhuL.oGCGhKGPLYhP.............. 0 30 73 125 +13470 PF13628 DUF4142 Domain of unknown function (DUF4142) Coggill P pcc Jackhmmer:C6QAI3 Domain This is a bacterial family of unknown function. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.45 0.71 -4.15 144 804 2012-10-01 21:25:29 2010-12-06 14:02:49 1 5 412 0 357 1141 171 133.60 25 68.88 CHANGED ss.Dpp.FlppAutushhElpuu+LAh.p+u.psspVKsFAppMlpDHspsspcLpplAp.....ptslsls......ttlstpppttl....spLpsh....sG.tsFDcs.Yhspt.lssHccsl...slhcp.h..sss......upsscLKsaApp.sL..PslppHLptAcpL .....................................................t.t-tpFlppsstushhEl.puuclAh.p+.u.pss.p...........V+sFAppMlpDH....sp....ssppl.t.p.l..Ap........................p.t..slp.ls.......................tths.sp...tpshl....................spL..p.sh........pG.tsFDps.Ylptt.lssHpcsl...shhpp..h..tts......ucsspL+shApp.sl..PslppHlptActl................................ 0 95 200 267 +13471 PF13629 T2SS-T3SS_pil_N Pilus formation protein N terminal region Coggill P pcc Jackhmmer:C6QEC4 Domain \N 21.50 21.50 21.50 21.50 21.30 21.10 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.18 0.72 -4.45 188 1023 2012-10-03 16:25:20 2010-12-06 15:01:29 1 19 613 0 374 1013 88 71.40 24 16.39 CHANGED sspslplshupupllph.spshpp..VhVusPplADspl.h.......ssp.....plhlhGKpsGsTslhla..spsup...lhshpltVs .......................s..ttlplshupup..hlpl..s.p...s..hpp.......lhlu..sPplA.Ds.pl.h................ssp......pl.hlhG+.phGsTslhlh.....spsup..htshsltV........................ 0 99 193 279 +13472 PF13630 SdpI SdpI/YhfL protein family Bateman A agb Jackhmmer:C7PMJ0 Family This family of proteins includes the SdpI and YhfL proteins from B. subtilis. The SdpI protein is a multipass integral membrane protein that protects toxin-producing cells from being killed. Killing is mediated by the exported toxic protein SdpC an extracellular protein that induces the synthesis of an immunity protein [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.65 0.72 -4.27 205 1094 2010-12-06 15:14:03 2010-12-06 15:14:03 1 8 814 0 214 904 57 75.20 21 46.31 CHANGED sphc.Nh.hhGlRTsho..........hps-csWcpsp+.......huG..hlhh....hsGll..hllhuhh................thhhhlhhhhhllshlls..hhhuh ...........................hc.Nh.hhGhR.osho..........hpsccsWctsp+..............huu....hlhh.......hsGll...hllhuhh...hh...............shhhhhhhhhhllh.hlhshh.............................................. 0 82 154 180 +13473 PF13631 Cytochrom_B_N_2 Cytochrome b(N-terminal)/b6/petB Coggill P pcc Jackhmmer:C1Z9G2 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.20 0.71 -4.46 68 102102 2012-10-03 10:28:09 2010-12-06 16:15:23 1 28 38793 95 787 97053 2443 142.10 66 51.21 CHANGED .hhhlshhlHhhRshahGua+.ts+.phsWhlGlhLLlLshhpuFhGYsLPhs.hShhuhpl.hs.slhpu......lPhlG...shlh.hlaG.........G.hssss...hlsR.hash.Hl.llPhlllulhhhHlhl..l..pppspts............s..h...tp...ps.......sh..........st............hsaaPhashKs..h...hhhh.h...h...lhllh..........sls...........hlsP.....hh.hhsP .......................................................ShFFIClYlHIG.R..GlY.YGSYh.a......pE.T..WNlGllLLh..hl..MuTAFhGYVLP.W.GQM.S.FWG.A..TV.IT.NLLSA....................lP..Y.lG..............ssL.Vp.WlWG....................G.F.S..V.D..s.A..........TLTR..FF.u..h.HF.lL..P..Fl.........IsuhshlHLlF..L...H.E.T.GSNN.....................................Ph......Gl...sS................ss...............DK...........................................IPFH....PYa.oh.KDl.....LGhh..l.h.....lhhLh...................Ls...........LFsPslLGDP...................................................................................................... 0 232 504 660 +13474 PF13632 Glyco_trans_2_3 Glycosyl transferase family group 2 Coggill P pcc Jackhmmer:C6QI89 Domain Members of this family of prokaryotic proteins include putative glucosyltransferases, which are involved in bacterial capsule biosynthesis [1][2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.46 0.71 -4.59 101 2553 2012-10-03 05:28:31 2010-12-06 16:25:28 1 20 1785 0 422 13191 1466 214.50 30 54.65 CHANGED hllllDuDsh.l....sssplphh....st.h.ht..p..s......c.hs...hlQ...t...hh..s....h......s.h..p..sh..h.pphhshh.......h....sp.thhpthhhtt.shst.sss.......hs..Gssshh.chpu..l.p................cls..............has...st........slu.EDhchuh+L..h.ttG.....hph....has...............................st.s.......th.pct...ss..o.......................hts.hhp..........Q.ptR..WhhG......................................hh.........................................hh....h..........t.h.h.....tph..h...h...th...h.h................hth...h........hh.....hl.t......s..h...h.h......................hhhhh.....hh.h............h.....lhhhh..hh ...........................................................................................................................................................................................h....hDt.-sh.l......s.........l..h......................h....ht...p.....................................hs...........hh...............h........hh.......s.......................p.......p.......ph.........h....s..h...h....h......t..........................h..............h.....h..s....p...t....h....s.......s.....p..s......p..h...G.u....V..h.s............................s.s....G...s......s.A...h.Y..RR.SA...l.h.........................................................................................pst...............................................ha+..GcsS.............c.a..G...E..D...+...H.LTI.LM......L..cA..G...........aRT...........-YV...................................................................Ps.A.........ls..uTl.....VPc...o.........................................................lts...aLR............................Q...p..L...R....W..ARoo...............................hhp....................................................................................................hh......hh...................h.h...h.........hs..l....h.....t.........sh........s.s......................................lhl.......s............lu......sl..u......u..l..s.p.....................................................hhhsh......hh...................hhshhhhhhhh...................................................................................................................................................................................... 0 153 264 364 +13475 PF13633 N_methyl_3 Prokaryotic N-terminal methylation site Coggill P pcc Jackhmmer:C6Q9T5 Motif This short motif directs methylation of the conserved phenylalanine residue. It is most often found at the N-terminus of pilins and other proteins involved in secretion, see Pfam:PF00114, Pfam:PF05946, Pfam:PF02501 and Pfam:PF07596. It is often described as TypeIV_pilin_GFxxxE. 22.70 22.70 22.70 22.70 22.60 22.60 hmmbuild -o /dev/null HMM SEED 22 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.19 0.74 -7.04 0.74 -3.62 363 98 2012-10-03 10:38:27 2010-12-06 17:09:35 1 5 70 0 17 1401 206 22.00 46 13.90 CHANGED aoLlElllulsllu.lshsshhs .FoLIElhIuhsIlulhAhshh..... 0 3 7 14 +13476 PF13634 Nucleoporin_FG Nucleoporin FG repeat region Bateman A agb Jackhmmer:Q5A223 Family This family includes a number of FG repeats that are found in nucleoporin proteins. This family includes the yeast nucleoporins Nup116, Nup100, Nup49, Nup57 and Nup 145. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.48 0.71 -12.53 0.71 -3.85 444 1433 2010-12-07 09:26:20 2010-12-07 09:26:20 1 64 197 0 1107 1448 4 103.60 29 24.15 CHANGED s......sss..........s.........................sss....s.........u.....uhFG........s........ss.........s.........s...........s..................s.uh...F..Gs.........s..s..........................s.s...s.......ss........uulFG.......s.........s...s......t......................s..................ss.u........u..uLFGs......ss...........t...............t..s..s..............s..s........u..uL.....FG.....................s....s..s...t.................t..s......s.....s...s.uul..FGs....s.s....ss.......................................................................s.......sssuulFG ...................................................s............tssulFG...............s........pss..............s....s...........................uuLFGs.........s.ss..............t.....ss.s........su.........GGLFG...s......s..s...s.......................ts.............ssu......G..GLF.Gs....ss........s.................t..s...s........s.u........G.GL...FG.........................s...s.s.........t........p.s........ss......uuGL...FGs....s..s...ssp................................................................s................................................................................................................................................. 0 342 639 994 +13477 PF13635 DUF4143 Domain of unknown function (DUF4143) Coggill P pcc Jackhmmer:B1L4V1 Domain This domain is almost always found C-terminal to an ATPase core family. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null --hand HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.61 0.72 -3.81 579 2818 2012-10-11 20:44:47 2010-12-07 09:58:11 1 13 1059 0 687 2443 408 200.50 18 50.27 CHANGED lpcD..lhp....h...ht...........hpsht..thpplhphlstphup.hshpplupthpssptsstp...............hhphhhthhhhhhh.shtspttpphtps.KlYhhDsGLhss.hhsh.........hh...............s..hGtlhEshVhpcLh...........pp.............t.laa....ac..................sp..st.....t..ElDh....ll.p.........ts.....ph......hslEVKtu ....................................................................................................................................................................................................................................................................h...Dh.t...t.......thtphhphhhsphsp.hshpphsphht.tttph.t..........................................................................hhphh.tshlhhhh..hphptppthtpt.KhYhhDsGLhss.hhshp.................................s..hGtlhEshVhp-Lh.....pp.....................thp..laa..a+..............................sp.....ss..........t...ElDall..p..p.......ss.....phhslEVK........................... 0 283 483 591 +13478 PF13636 Nol1_Nop2_Fmu_2 pre-rRNA processing and ribosome biogenesis Coggill P pcc Jackhmmer:B1L5U4 Family This family represents an evolutionarily conserved sequence motif of a set of proteins that are involved in pre-rRNA processing and ribosome biogenesis in S. cerevisiae. 23.90 23.90 24.10 23.90 23.80 23.80 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.25 0.72 -3.94 150 1049 2012-10-10 17:06:42 2010-12-07 11:38:25 1 3 1010 7 195 1177 35 103.90 35 24.07 CHANGED lah.h.....th.t.s...h.pth.......+lhR.GlplGc..h....+Kp+acPoaslAh..s..............ltppph.pp..s...l-..L....sp..-....pht..pYlpG-sl......................pl...s..p............p...t.p..G.alllshcuh....slGaGKh..sss.plKNhaPKGL ...................h..hh...t.h.ss..l..spL....+lhRsGLcLGph......KK...pR...FEPSaALuh..u..........................Lp.s.s....p...s..pp..s...l-.l........sp..-......phhcYlpGEsl........................................pls.p............s..s.pG...ahl.lsh.c....Gh....................sLGFuKh....sG.s..hlKNhaPKGL.................. 0 71 130 162 +13479 PF13637 Ank_4 Ankyrin repeats (many copies) Coggill P pcc Jackhmmer:C6QI42 Domain \N 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.74 0.72 -3.35 86 7425 2012-10-02 12:10:21 2010-12-07 11:59:11 1 2308 729 31 4885 95319 6198 53.50 28 8.01 CHANGED thss..lphAsh.......psp..hph...lc.hll.....p..p..s..hs........hsth.....s..tt....st..s...slph.As...tts.....p....h.phlphL.l ..........................................................................................os.LahAut.................pGp.......hc.l........lc...hLl..................p....p....G......us.........................................lstp.........s.......pp........................Gt........T............sLah..As..........tpG.............p...........h.phlphLl................................................ 0 2429 3032 3970 +13480 PF13638 PIN_4 PIN domain Bateman A agb Jackhmmer:Q5AFK9 Domain Members of this family of bacterial domains are predicted to be RNases (from similarities to 5'-exonucleases). 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.84 0.71 -4.05 147 2263 2012-10-03 20:43:45 2010-12-07 14:09:18 1 39 1676 9 998 2073 447 140.90 26 23.81 CHANGED aVLDTNVLl..cc.pslhp..h.............................pcpp..llIPhsVl-ELDphKpppp.......................plsphs..c...............ps.h....p...hlppt........hptt..........................t..sp.l........ts.....pph.......................pp.pl.....t...........................p............p..sDsp..ILssshthppph..........................tppl..lLlocDhNLRlKAps.hGltup........s.ap .................................................................hVLDTNVLL.....p.c.....s..p..s..l.h.p.h...............................................................pcpc...V.llPhsVlcE....LDthK+ttp........................................................clupp.A.R...............p.s.h..+...hl-ph.....htpt.......................................s.tph..........hh.ppt.......................ptph....p............................................p.......tpsDs....p......ILsssht..lppch...................................................................................tppV..lLVoc.DhshRlKApu.hGl...spph........................................................................................................................................... 0 359 614 836 +13481 PF13639 zf-RING_2 Ring finger domain Bateman A agb Jackhmmer:Q5A9Y7 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.26 0.72 -4.13 329 15070 2012-10-03 15:03:13 2010-12-07 14:28:22 1 495 529 19 10132 21355 1292 45.90 34 10.07 CHANGED tpCsIChpph..................t.ppsh.....tl......s..C.........s....H.h.FH....tp....C...lpp....a...........h..............p...p......p.......t.....................pC..Ph.CR ....................................pCsIChpph..........t........................tcpht..........hL...................s...C.......................s...H....h...FH....tp........C....lpp......W............................l............................p..p.......p.......p.....................................................sC..Pl.CR................................................ 0 3177 5561 7989 +13482 PF13640 2OG-FeII_Oxy_3 2OG-Fe(II) oxygenase superfamily Bateman A agb Jackhmmer:B8H030 Domain This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.84 0.72 -3.49 172 3739 2012-10-10 13:59:34 2010-12-07 14:54:39 1 75 1689 20 1686 4120 5084 99.90 24 31.62 CHANGED htlphh.psGshhs................hHhDs............................sp+tloh...lha.Ls.sp..................pGGplphhst.........................hssh.ttlt...............PphG.plllFpu..........ppshHtVtss........tst..pRhslssaht .............................................h..h.pY..tsG.p..tat.................................hHh.Dsh.....................................hsp+pl.oh...llY..Lstsp....................................cGGchhhhs..............................................................................tt...tplp.........................PttG..pl..llF.u................psh.HpVpsV..............tpupRhuhshWh..................................................... 0 587 994 1366 +13483 PF13641 Glyco_tranf_2_3 Glycosyltransferase like family 2 Coggill P pcc Jackhmmer:C6QF57 Domain Members of this family of prokaryotic proteins include putative glucosyltransferase, which are involved in bacterial capsule biosynthesis [1][2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.29 0.70 -11.59 0.70 -4.56 37 8712 2012-10-03 05:28:31 2010-12-07 15:09:08 1 130 3503 4 2974 39049 8776 233.50 18 43.38 CHANGED Pp..lsllls.shppss.....hltc...slp..shl..s....t..a....sp..hclhlhs..ps......ss....pts.h....p...h.hp....thttths..sht..hp.hl.t...s...p....G.tsKstsl.tpshp..t...........h..p.........sc......hlslhDuDshl.ssss.ltthhshh.........tp.......phshlp.s.h..hs...p..s...tp......sh.......................hs.h.....s.......h.chstpph..thhhhp.....pt.....h..s..h.s.s.h..s....G.ss.hhhcpssl..pcht.............t......hs.............hhhs..-Dh..thshpltp...tGhpst..h..h.....s.tsh...............shph......ts......ps....hts.hh+pptRWhhu ....................................................................................................................................................................................lslhl.s..sa..s..Et...............sl.tp........slp.......ul..h......t.........s...a..............sp........hclh.l.l.s.....Ds............................s.s.............-..p..s..h.........................p..........h...h....p............................t...h..........t....t.......h.s......t.h....................h.p....h....l.t.t..........................s.............p........s.................s..K......u.....t..s...l....st..u.h.p.....t..........................................................................s..p................................s-............hl..h.lh..........D.u..D....sh....h...s.....s....s...h...l.t.p..h.l..s.hh.............tss............................pl..u.....h.....Vp...s....h.......th.............t.....s.......ts................sh.....................................................................h.s.p.h.........tp...............h...p...h.....t.....h....h...h..t........h..h...t....t...s.p........................p.t.........................h......s......s.....s....h....h........s.............G...s..........s....h.......h...h....R..+.....s..s..l.......c.p..l.G.................................................................................u......ass.............ts.hs...ED.h......clu..h.+..l...tp........t......G..a......p.....hh......a.......h...........s...ps.h.........................................s.h.t................hs..............ts..........htt...hhtpp.RW............................................................................................................................................................................................................................ 0 902 1889 2484 +13484 PF13642 DUF4144 protein structure with unknown function Serrano P, Geralt M, Mohanty B, Horst R, Wuthrich K kellrott Shewanella amazonensis SB2B PDB:2L6O Family A family based on the three-dimensional structure of YP_926445.1 (PDB:2L6O) 27.00 27.00 28.80 28.40 26.40 26.40 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.43 0.72 -3.68 25 104 2010-12-07 17:52:53 2010-12-07 17:52:53 1 2 92 1 28 77 3 100.70 41 86.07 CHANGED WPulLKhsGcDELlYLsSpp-a.pEs..pphhhsssDhLIDSpGpsYtlpp................ssptssLhhpsphlslpclhpLlpsHshtpupsCssKltFsolpQul-hls ................................................WPslLKL-GDDELlYLsScsDhpsEs...shIhsssDplI..DSpG.sYsl.s................ssstssLhtpspplSl-EsocLIQpHEFsLApsClsKIpFpTlspAhpsL... 0 6 10 19 +13485 PF13643 DUF4145 Domain of unknown function (DUF4145) Bateman A agb Jackhmmer:C7Q403 Domain This domain is found in a variety of restriction endonuclease enzymes. The exact function of this domain is uncertain. 21.70 21.70 21.70 21.70 21.60 21.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.09 0.72 -4.13 91 970 2012-10-01 22:14:54 2010-12-08 10:52:41 1 24 854 0 191 795 46 92.90 23 13.40 CHANGED pcAt.phh.t.s.s.....p.uuu.....shhRpsLEtlhcchs........t.....psp.....................s..........L.........tpcIsplhpps......stpl..tchh.....................cs...lRhlGNpusH...s.....p.........p.lsp.........p.D....stphhc.hlc .............................h..Acphht.s.s.s.p.sss..hhhRpslEthl+hlh.........p.....psp.....................s..........L....pshlpp...hshpsh.....lsspltp.th.....................chl+phGNpAsH....s.....t............chst.........c-....u.hhhp...h............................. 0 57 112 150 +13486 PF13644 DKNYY DKNYY family Bateman A agb Jackhmmer:C2M6R1 Family This family represents a group of proteins found enriched in fusobacteria. These proteins contain many repeats of a DKNXXYY motif. The repeats are spaced at about 35 amino acid residues intervals. These proteins are likely to be associated with the membrane. The specific function of these proteins is unknown. 22.50 22.50 22.50 22.60 22.40 22.40 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -12.04 0.71 -3.98 31 664 2010-12-08 11:29:52 2010-12-08 11:29:52 1 11 134 0 233 689 10 122.90 17 66.51 CHANGED asKD+splYYhs.......c.....clcs......sDs........poFch.......ls.......c......p..a....spDKsslYh....tspp..l.........shs..spsh...chl..............spp.....hhtp...stsphh..............hhp..hpp.............t..p...............hpsls.tYht.DKpplYa........t....t..phchlc.......shDhp.........ohc.ls...sh................aspDcsslYa....ts...........p.............plpssDs..soFchl...............s.t..hah+DKsp.lYh..t.s .......................................................................................................................l.t..s..........oh............ht....................h...........hDp.thYh.....t....h............................................................................................................................................................................................................t......a..........................................p............th..t.t......sh...p.h.................................................hhpDtpplYa.....t..................................p...................................l..ths...tsht.l..................t......h....hDt........t.................................................................................................. 0 78 218 226 +13487 PF13645 YkuD_2 L,D-transpeptidase catalytic domain Bateman A agb Jackhmmer:C2M5S1 Domain This family is related to Pfam:PF03734. 27.00 27.00 27.00 32.50 26.40 24.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -10.99 0.71 -5.04 56 379 2012-10-02 23:30:06 2010-12-08 12:37:36 1 3 278 0 115 362 40 172.30 39 70.48 CHANGED applph..p.spl.shpshppAlpuapplppp..h...ppslLTlIDaopPSsp+RhaVlDlppp+lLapohVuHG+NSG......p.ph...A......spFSNpssShpSSLGaYhTspTYpG+pGaSL+..LpGLEcGhNDNAppRuIVlHGAsYsspsaI+ppGRLGRShGCPAlP.plscplIcsI...KsGoslFlY ............................h.......tstl.shpshppAh.puhp.phppp.......ppslLslIDaSpPSsccRhaVhDlcpc+lLapshVuHG+......sSG......p.ph...A......spFSNpssShpSSLGhahTt.ps.Yt..G.cpGhS.L+..LpGLEpGhNDpAccRsIVlHGAsYss......s..hl........p.p.h......G.+LG.RShGCPAlstp.lsc...p...lIstl...KsG...s...llahY.............................................. 0 47 79 102 +13488 PF13646 HEAT_2 HEAT repeats Bateman A agb Jackhmmer:C2M4W3 Family This family includes multiple HEAT repeats. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.37 0.72 -3.70 323 11825 2012-10-11 20:01:04 2010-12-08 13:16:31 1 572 2216 18 5662 13686 1691 88.30 20 24.50 CHANGED h.p...hLhp.h...L..p......ss..ss...t....l.+.t.t.uh.psLu.............p.h..t..s..t............ps.............................hshLh.p.hl........p..s...s..s....s..t.....lRttAs..p..uL....u....p..h............s..........s.....s...p....s....h.....s.hLhp.......h..h.ps..s..s..s..t...h.hR..ts..s...hp...uLt ...................................................................................................hhth.l..p.......st..s....t.......l..R....t..t..As.tsLu....................................p.h...t....s..t...............ps..............................................................................................lshLh...p..hL..................p...D.....p....s.....s..t..........VRttAs..t.....uL............u......p...l.................................u.............................s.........t..p...........s.......l...........s..hLh.p.............h...h...ps...p..t...............hp.................................................................................................. 0 1805 3521 4730 +13489 PF13647 Glyco_hydro_80 Glycosyl hydrolase family 80 of chitosanase A Naumoff D, Coggill P pcc CAZY:GH80 Domain This is a small family of bacterial chitosanases. 25.00 25.00 616.50 616.40 19.30 18.00 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -11.95 0.70 -5.16 3 15 2012-10-02 14:50:22 2010-12-08 13:22:11 1 1 13 0 0 15 0 297.50 91 89.83 CHANGED AAAAAGVIPVGDSRVYGNVFDKGRKLTVNQWQAVLSMDAYPENGTTNYQDPEPWRYCEVDYEAuEGISDYRGDTFGPVGVTTVGDFPDYFKNAYAPYVLGKTGATNTDMKNWGVQVTGIAAADMKADDTRLDPYPNLSRSNSKKRAALTKICQALQSDFDNRQAQYVMSHYAHIDSDKLLPVLDALKKIGFTSFuQYNLVGLAFQVQVNTGSIGSISAFSSVKSAGNCGSMSuETCFATYLTDQYIRWLKSSSLGDDsGNCWRASMALDIYKQDPTMGNVSVVTSIINSKYPNNSGKCPTSGVKWSKN ..AAAAGVIPVGDSRVYGsVFDKGRKLTVNQWQAVLSMDAYPENGTTNYQEVGPWRYCEVDYEAAQGISDYRGsTFGPVGVTTVGDFPDYFKKAFAPYVLGKSNATNADMLAWGVQVTGVTAGNFpADDTALDPYPS+SRSDKsKRAALTKICGALQSAFDTQQDKYVMSHYAHIDQDKLVPVLNALKGIGFTAFDRYNLVGLAFQVQVNTGSIGSISAFSSVKSAGNCGSLSAETCFATYLTDQYIRWLKSSSLGDDPDNCWRASMALDIYKKDPTMGSVSVVNQVINASYPGNSGKCPTSGIKWSKN. 0 0 0 0 +13490 PF13648 Lipocalin_4 Lipocalin-like domain Bateman A agb Jackhmmer:C2M763 Domain \N 22.40 22.40 22.40 22.40 22.30 22.30 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.97 0.72 -3.41 116 502 2012-10-03 08:47:39 2010-12-08 16:03:31 1 7 182 0 164 533 83 115.20 14 70.75 CHANGED hhsu.Cus-..-c.....ps.............pss............................lh...G....s.Wpls....p..h.s........tts.t..............................t...ts.h.........sps.ptshhpap..s..s........s.hstt..php......ss.......t.th.t..t..............tpsssasls..ss......pl.slph..............pss..t..t...tp.......hp..hhp......l....s..sspLsh ................................hhuCssc..cs.....ps.............pss............................lh...G....s.Wpls...ph.s.........httt...........................................p..tth.........sps...ptsthpFp..s...s............s..hstt...php......ss.........h..t..t..................................ppstsaslp..ss......pl.phph....................t.p.s..t.................h..hhp......h..s..tpph...................................................... 0 68 136 164 +13491 PF13649 Methyltransf_25 Methyltransferase domain Coggill P pcc Jackhmmer:B1L4J9 Domain This family appears to be a methyltransferase domain. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.54 0.72 -3.65 78 1216 2012-10-10 17:06:42 2010-12-08 16:37:45 1 39 965 11 462 43237 12860 101.60 19 35.92 CHANGED lL-luCGsG....h..h.tt....hhpt........................h....t................hphhs...lDhspctl..........................ph..sp....................cph..........h........phc.h......ht.....sch......pp.....l........s..........h........ttt..pFDhlhs..................sh.hs.........h.hsptthhphhpchsp.hh..c..s.G ............................................................................hLDl.uCG.s.G....t...h....h...th....h.h.t....................................................h.....t..................tph..hs......lDl......s...p.......p.h..l......................................................ph....up....................................................................pph..................................pl.p...h......lh...............u.Dh......................pp.........L.............................................s............................h.....................tst.....sa.c..l....l.hh...............................................hs.t..hs..................h...h.....t..........p.....p......h......t....t..h.h.p.....ph.h.p.h.l.t.t............................................................................................................................... 0 133 259 371 +13492 PF13650 Asp_protease_2 Aspartyl protease Coggill P pcc Jackhmmer:C6QGC6 Domain This family consists of predicted aspartic proteases, typically from 180 to 230 amino acids in length, in MEROPS clan AA. This model describes the well-conserved 121-residue C-terminal region. The poorly conserved, variable length N-terminal region usually contains a predicted transmembrane helix. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -10.02 0.72 -3.48 300 2359 2012-10-02 15:32:34 2010-12-08 16:43:30 1 147 973 0 1248 4331 281 91.40 19 17.90 CHANGED l...sspl....s.G......p..s...h...phllDTG.A.o.t..s.sls....pp..hu....c.p.l.....u.l...ph..t..sh.t............ht.s..p..s...u..s.G......p..s.p.s..t.h...s.p.ls...p..........lp..l..G.s........h.p..hp.sl........p.......s.hl.hs.............s.....t.....h...s........s.......ul.......LGhsh .......................................stl....s..G......p...s..h...phllDTGA.s.s...s.hls....ps...hs....c.c.l..........u.l...ph..p..s..h......................hth.....p..s...s..s.G...........p..s..p...s....t.h...s.p.ls.....p.............................l..p..l...G..s.................hp...h.t...sh........p..........h..hlhs........................................t................p........slLGhs........................................................................... 0 480 794 1052 +13493 PF13651 EcoRI_methylase Adenine-specific methyltransferase EcoRI Bateman A agb Jackhmmer:C2M8X9 Family This methylase recognizes the double-stranded sequence GAATTC, causes specific methylation on A-3 on both strands, and protects the DNA from cleavage by the EcoRI endonuclease. 22.60 22.60 24.20 24.00 21.00 20.80 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.13 0.70 -5.36 12 164 2010-12-08 17:00:59 2010-12-08 17:00:59 1 6 134 0 31 164 89 237.20 38 88.75 CHANGED psLpcA+pu..KsDEFYTphsDIEpElpaYhpa....FcsKsVLCNCDDPhcSNFhKYFAhNFspLGLKKLIsTsYusss.........................s.......phs..........ps......csp.pshhh..-h-.t..s..psschs...............schphphLpGsGDFRSpEslcLLc-uDIVVTNPPFSLFREaluhllcYcKpFlIIGN.NAITYKElFsLI+cNKlWLGhph..GchtFtVPDtYE.....ctTchhlDEsGp+apphuNspWaTNL-ht+RH.EsL.Lh..........++Y..tc....cpYPKYDNYDAIpVs+sp-IPhDYpGlMGVPITFlcKYNP-QFEIlG.htps..t...............................h.h..ss..cpc.h....hlNG..K...phYtRILI++K .......................................p..LptApt...KpDEFYTph.-IcpEhthY...ph....FtsKhlhhsCDD........s..........Ss........FhhaF..pFtthtlK+Llsspa................................................................................................tt..h..htts.....GDFcStEshp.lhp.p.uDIllTNPPFShFcEals.Lh.ch.pKpFllluN..NulT.YpchF.LlppN+hWhGhth..u...FhlPpth.........tt.t.h....ct........G......t.hhp.tshhWhTNl-..hRp..p.l.Lh..........pph........ptY.hYDsa.puIpVs.h.tIP.Da.G.MGVPlo.Fhp+asP.pQFcllt...t........................................................................t...................ps...p.......a.Rlhlp................................................................................................... 0 6 20 24 +13494 PF13652 DUF4146 Domain of unknown function (DUF4146) Coggill P pcc JCSG_target416819 Domain This is a family of short proteins which appear to be pre-cursors. All members are from Pseudomonas spp. The function is not known. 21.80 21.80 22.40 180.80 21.20 20.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.65 0.71 -10.21 0.71 -4.25 11 61 2010-12-08 17:32:18 2010-12-08 17:32:18 1 1 61 1 15 44 4 112.00 72 81.75 CHANGED ASLpEaELs+MLEKVA+ESSVGTPRAINEDILDQGYTVE.GppLlNHLSVRpuHAppMRuNPcsVRsQLGsSVCpNsGYRpLMuKGAVh+YcFTEYKTN+PVsoppFsuuDCs .AShp-FELSKMLEKVAKESSVGTPRAINEDILDQGYTVE.GNpLINHLSVRpuHAppMRuNPDsVRsQLGsSVCpNsGaRQLMu+GAVLpYpFTEYKTNpPVATppFpAusCp. 0 1 3 10 +13495 PF13653 GDPD_2 Glycerophosphoryl diester phosphodiesterase family Coggill P pcc JCSG_target396624 Domain This family also includes glycerophosphoryl diester phosphodiesterases as well as agrocinopine synthase, the similarity to GDPD has been noted [1]. This family appears to have weak but not significant matches to mammalian phospholipase C Pfam:PF00388, which suggests that this family may adopt a TIM barrel fold. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.92 0.72 -7.25 0.72 -3.66 73 257 2012-10-01 22:17:21 2010-12-08 18:04:34 1 7 110 10 44 1089 478 31.20 49 10.52 CHANGED +VhhW.os...........Dp...............s.........sh.........pphh.shGVDtlhTspss ....KVYaW.Tl...........DK.........cu.....oh........RcuL.cAGVDGlMTNaPs... 0 17 31 38 +13496 PF13654 AAA_32 AAA domain Coggill P pcc Jackhmmer:C6QHL0 Domain This family includes a wide variety of AAA domains including some that have lost essential nucleotide binding residues in the P-loop. 25.80 25.80 25.80 26.00 25.70 25.70 hmmbuild -o /dev/null HMM SEED 509 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.64 0.70 -5.84 125 1541 2012-10-05 12:31:09 2010-12-09 13:32:27 1 8 1299 2 408 1140 169 341.70 25 52.08 CHANGED Tp-L.ssh.....pt.hlGQcRAhcAlcFGlul.cppGYNlalhGtsGoG+pohlpphLpctA...cp...ssPsDWsYVtNFcsPcpPtsLpLPAGpGpphccDhcpllcpLtpslPpsF-u-cYpp++ppltcpappcppphhpplpcpApcpGhsL.hpossG....hs....hs.Plp...........-.G.........cs....lsp--a......ppL...............sccc+cplc..pphppLppclppllpp.lpph-+chp-clccLc....cclst.slsphl....p....pL+....c+Ypc...h...sclhpaLcslpcDll-Nlp.Fh...............t.tp.......tt...tpt..p...................RYpVN.l.lV.DN...s.......................pppGAPVVhEssPoapNLhGcIEatsphG.slhTDF......ohI+sGuLH+ANGGYLllcAcclLppPauW-uLKRAL+opclpIEsh.....ths........so...h......oLcPEPIPLclKVlLlGs..lYaLLtpaDs-FpcLFKVpADF-schsRss-shppaAphlushsccpsLh.h-+sAVA+l..lE..au..uR.....spc..+..LSs+hspls-LlpEAsaaAptpssphlpspaVcpAlpt+phR ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sphtpLhGp.lc..s..............s..s.h........h...l..pPGhlHpANG.GhLllshpsLLspP....h.....h....W.tLKphlppcchc.h.uh.....s.s......pslslps.sh..P..LclKlILlG-.ccthtt...hpch-P-.ht.c.hht...lh..u-F-.cplp.h.s.s.E.s.h.p.p.a.spalsp.hsp.c.p.p.L..sh.st.s.AhstLlctusR..h......spc...p..LsLp.......l..tp..hl.t-.us.hs........t..............h...p..tl..s....................................................................... 0 112 246 345 +13497 PF13655 RVT_N N-terminal domain of reverse transcriptase Coggill P pcc Jackhmmer:B3CUZ7 Domain This domain is found at the N-terminus of bacterial reverse transcriptases. 27.00 27.00 27.20 27.30 25.80 26.40 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.72 0.72 -3.92 152 753 2010-12-09 14:12:41 2010-12-09 14:12:41 1 5 321 0 163 711 23 79.10 44 18.14 CHANGED Wp.........sIsWpplccpVh+LQpRIhKAsppGchp+V+pLQ+lLh+..SasAKhLAVRRVTp.Np..............GK+TuGVDGhhhho....Ppp+hphhpp.....Lp ...................WpsIsWpcspppVh+LQpRIh+AsppGphtKV+pLQ+LLs+..SahA+hLAVRRVTp.Np..............GK+TuGV.DGhhh.o....spp+hphhppL................................ 0 42 115 138 +13498 PF13656 RNA_pol_L_2 RNA polymerase Rpb3/Rpb11 dimerisation domain Coggill P pcc Jackhmmer:B1L7Q7 Domain The two eukaryotic subunits Rpb3 and Rpb11 dimerise to from a platform onto which the other subunits of the RNA polymerase assemble (D/L in archaea). The prokaryotic equivalent of the Rpb3/Rpb11 platform is the alpha-alpha dimer. The dimerisation domain of the alpha subunit/Rpb3 is interrupted by an insert domain (Pfam:PF01000). Some of the alpha subunits also contain iron-sulphur binding domains (Pfam:PF00037). Rpb11 is found as a continuous domain. Members of this family include: alpha subunit from eubacteria, alpha subunits from chloroplasts, Rpb3 subunits from eukaryotes, Rpb11 subunits from eukaryotes, RpoD subunits from archaeal spp, and RpoL subunits from archaeal spp. Many of the members of this family carry only the N-terminal region of Rpb11. 27.00 27.00 27.00 28.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.35 0.72 -4.54 53 756 2012-10-02 13:35:44 2010-12-09 14:24:54 1 6 450 90 495 763 147 75.70 37 58.93 CHANGED sphphplpsEDHTLuNsLpphLhcs.scVpFsuYsl.sHPh........pschpl+lpoc..ss.........sshcslcculpclhshscplpppa .............sspFplpcED.HTLGNhLRhhLh+..........s...P........pV.F.uGYpl.PHPh.............-s+hhlRlQTp..ssh........................................ssh-AlppulpcLhs.hshlpcpF................... 0 149 262 401 +13499 PF13657 Couple_hipA HipA N-terminal domain Bateman A agb TIGRfams Domain This domain is found to the N-terminus of HipA-like proteins. It is also found in isolation in some proteins. 22.50 22.50 22.50 22.60 22.40 22.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -10.44 0.72 -3.70 185 1769 2010-12-09 14:38:03 2010-12-09 14:38:03 1 10 1109 8 541 1574 195 102.00 24 26.72 CHANGED LpVhlp.s....p....hlGpLp..p.s...ss....s.....hpFpYsssahs....s..uh..sl..S.l....sh......Plp............sp.................s.a..........p......s..........p.......sl.s.................aFssLLP-uh....hpc.....hl.....sp.....p..h.....phs.....sts..shsl.....L.thlG.p.-.shGAlphh ................................hht..s......p.....hlGpLs...pp.......ss..s.....hpFpYss....palt....t......uh..sl...S...l....sL......Plp..............pt.................sh..................p...s.....p......slhs........a..FsshL...PDuh......hRc.....hl.........tp...c.h..............php.........stsshsh..........L.thl.G.p.c.s.hGAlph.h.................................. 0 136 313 436 +13501 PF13659 Methyltransf_26 Methyltransferase domain Bateman A agb Jackhmmer:Q9YEM1 Domain This family contains methyltransferase domains. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.41 0.71 -10.74 0.71 -4.06 167 9512 2012-10-10 17:06:42 2010-12-10 15:35:54 1 120 4267 26 2887 64631 10679 155.40 18 26.58 CHANGED ss......plL-PusGsGt.h.h..thhh.pt................................................h........sphhul.El.ssh......................................................................ssph...sp............................th.....h.t............t...t....h.............................................................................................................p....hhtsc.h..t.ph.....................................................t.....h....t.s.....s.p.............h-hlltNs.Pa...................s.....t...............t.t.........t......t...........................................................................hhp...............th.l..tps.hc.hLpsu.Gh.lshls....s.......s ...................................................................................................................................................................................................................t..pllDh.us...G....o.G.s....h..h....lhhs..pp................................................................................................................h........tsplhul.E.l.....s.s.t........................................................................................................................................................................................................................................................................................................................................s.s...p.h....Ac........................................ps......hth................s....t..h.........................................................................................................................................................................................................................................................................................................................................................................................................................t....t.......h..p........l...h...p..s..D...h.....h..ph..................................................................................................................h............h.......t..s..........s...p...........................hD....l...l...l....s...N...P...P.Y...........................................h..t.t.........................t....t..t.........t...t......p................................................................................................................................................................................h.hp.................th..h...t...t.u..h.......p...h.L...p............s...Gh..hshh...p............................................................................................................................................................................................................................................................................ 0 966 1897 2456 +13502 PF13660 DUF4147 Domain of unknown function (DUF4147) Coggill P pcc Jackhmmer:B9Z5Y2 Domain This domain is frequently found at the N-terminus of proteins carrying the glycerate kinase-like domain MOFRL, Pfam:PF05161. 23.00 23.00 24.90 24.70 22.30 21.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.53 0.70 -5.26 229 900 2010-12-10 20:10:12 2010-12-10 20:10:12 1 9 749 3 400 860 424 223.20 39 52.89 CHANGED phlpplFcuAlpAupPtpsltptLst..p.....................................s...tG+slVlGuGKAussMApAs.-phh........ss.........p......l..p.GlVlT+a.........Ga....s..t..........s......h...p......................p..l...cllE...Au.HPVPDpsulpAuccllchlp.....s.l.stcDlVlsLlSGGGSALLshPss........GloLtDKpslsctLLpSGAsIsEhNsVRKHLStlKGG+LAtss..tP..A..cVloLllSDVsGDc..s.sI.......A.......SGPTlsDs..o.ThtDA..hsllc+Ysl...plPs..s.ltphLpps ..............................t..hpplapsAlsAspPttsltttLs.p.......................................spu+slVlGsGKAussMAtAh.Ephh........ss..........p.l...p.GlVlT+.a.........Ga..s..s.......s.....s.p......................................cl..cllE...Au.HPlPDtsulpAupcllphlp.....s..L.s.t.cD....lVlsLlSGG.GSALLsh.Ps.t........GloL.........p-cpsls+tL.LtSGAsIsEhNsVRKHLSt.lKGG+LAtus..hP..A..pVl...oLllSD....VsGDsss.hI.......A.......SGP.T....VsDs....o.o.....h.....t-....A..htllc+Ysl...plPt..slhthLpps................................ 0 128 241 326 +13503 PF13661 2OG-FeII_Oxy_4 2OG-Fe(II) oxygenase superfamily Bateman A pcc Jackhmmer:B8H030 Domain This family contains members of the 2-oxoglutarate (2OG) and Fe(II)-dependent oxygenase superfamily [1]. 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.35 0.72 -4.34 39 165 2012-10-10 13:59:34 2010-12-13 14:17:58 1 7 132 0 81 1691 1412 68.40 19 21.49 CHANGED hsshhsst...............h.hhthtpsshhphHhDtpstts.......................huhllYLs...thsscasGuphhh.cpssptt ......................................................hht........................thhphstu.tp.hshHhD.s.sstts...........................h.hohllYL.s........t.ts-at.....GsphhF.c.ts...h................. 0 36 64 72 +13504 PF13662 Toprim_4 Toprim domain Bateman A agb Bateman A Domain The toprim domain is found in a wide variety of enzymes involved in nucleic acid manipulation [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.42 0.72 -3.97 195 9626 2012-10-01 21:47:57 2010-12-13 14:48:10 1 67 4702 9 2135 8279 5737 85.30 27 20.04 CHANGED stlhlVEGhhDllulcps...shpsshts.......lG...ssl...shpplt..phph................................pclllshDsDh.........sGppsutt....htc..........h..t.shplshls ............................................................t.plhVVEuhhDVhA......l....cps.....sh.........p..s...th.t.s.........................LG.....ssl.......sh..-t..lt.....chth.....................................................................................pclIlsh.-sst............................sGcssAhhhhc.........hhh....shth................................................................... 0 703 1405 1799 +13505 PF13663 DUF4148 Domain of unknown function (DUF4148) Coggill P pcc Jackhmmer:A9BSV2 Domain \N 22.10 22.10 22.20 22.30 22.00 22.00 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.95 0.72 -4.12 203 1134 2010-12-13 15:52:52 2010-12-13 15:52:52 1 3 104 0 396 1029 56 61.00 30 62.98 CHANGED ussl..ssshs..........uh..A.ps.............s...lTRAQV+AELhphcpsGh...Ps...ts.s......Y.Pssh........p...s.Aps.plss ................................................hs...h..sssss..........................uh..utps.........................slTRAQV+uELsphcpsGh....tss....ssps.......Y.Pssh........ts.Attp........................................ 0 20 104 254 +13506 PF13664 DUF4149 Domain of unknown function (DUF4149) Coggill P pcc Jackhmmer:B9YZD8 Domain \N 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.27 0.72 -3.95 178 975 2012-10-01 21:57:53 2010-12-13 16:57:40 1 3 879 0 341 687 671 99.80 23 53.21 CHANGED LhhuhhhGuhhhhu......als................uPhlFps...L.....s+t.psGtltspl................Fshahh....hthsssslhhlhthhth.............h.t..t..hhthhhlhshhlhslhsthhltPhhsphptpttp ...................................hshhhGu.lh..hs......a.ls................u.hlFhs........L..........s+h.psGphtspl.................F.hhhh....lhhsssllhh.hhhhhsh.......................t..h...h.hlllshh.lhslhshahltPhhtphpptt..t............................ 0 84 194 285 +13507 PF13665 DUF4150 Domain of unknown function (DUF4150) Coggill P pcc Jackhmmer:A9BLL6 Domain \N 27.90 27.90 27.90 29.30 27.80 27.80 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.34 0.72 -4.16 83 382 2010-12-14 12:40:17 2010-12-14 12:40:17 1 24 235 0 135 304 15 106.30 32 34.40 CHANGED sGhsh.uhPDVChT..P..........lPlP..YPNhuhssssh.s..sstsVhhsGtPshshs.ohh.shosGDp.uGss..tGVhSGslhGtschh..stS.sVhhcGpsssRhsDhshpN....ssNssGt ..........................hsh.shPDVChTPs..s......lPlP..YPNhAhssssh.s..ssssVhhsG.ts.shshs.ohh..shosGDpuGst.........tGVhSuT.lpG..p..schh..stSssVhhpGpsssRhsDhshhN....ptNs.s.h.............................. 0 35 71 104 +13509 PF13667 ThiC-associated ThiC-associated domain Coggill P pcc Jackhmmer:B9Z987 Domain This domain is most frequently found at the N-terminus of the ThiC family of proteins, Pfam:PF01964. The function is not known. 27.00 27.00 27.20 27.20 25.80 26.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.80 0.72 -4.42 217 1951 2010-12-14 13:42:45 2010-12-14 13:42:45 1 4 1913 6 496 1521 213 79.30 48 12.96 CHANGED tshPsS+KlY.lpGs..+sDIRVPMREIsLosT.............s.........pt.....NsPlhVYDTSGPYTDPsspIDlcpGLP.plRpsWItcRuDsEp ..................t.saPsS+KlY....lpGo....ps-..l....RVPMREIpLosT.h..t.st.........ptNtslsVYDTSGPYoDPph..t.....IDlppGLs.cLRpsWIptRuDsE............. 0 124 292 401 +13510 PF13668 Ferritin_2 Ferritin-like domain Coggill P pcc Jackhmmer:A9C0V0 Domain This family contains ferritins and other ferritin-like proteins such as members of the DPS family and bacterioferritins. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.60 0.71 -4.22 124 660 2012-10-01 21:25:29 2010-12-14 13:44:48 1 9 316 0 446 1025 61 140.50 24 45.49 CHANGED sDhclLNaALsL..EaLE..............spFYptu.htt.............................sushsssh.....hshhpplttcEhuHlchLpssl......Gsps.......l......sps......pa.....ca.............................hssas.s.......ttsaLtsAtshEssGVsAYhGAus.hls...sts....hLpsAuuIluVEAtHsuhlRshltp ....................................................DhplLphALsL..EalE..............t.saYptuhtths..................................tsshssts.....hphhpphsppEhuH.sp...h.Lpssl...........Gsps...............s..........sps..........p.a.....sa.................................................................shs.s........stsalt.h.ut........hl..Ep..sGsuuY...hG...sss...hls..........sps....h...hp..hsu...slls...sEApHtuhlRph..................................... 0 159 312 398 +13511 PF13669 Glyoxalase_4 Glyoxalase/Bleomycin resistance protein/Dioxygenase superfamily Coggill P pcc Jackhmmer:A9C2R0 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.42 0.72 -4.04 77 2150 2012-10-02 15:00:03 2010-12-14 14:45:10 1 24 1580 20 829 7043 2394 114.10 24 65.97 CHANGED pplulsVt...Dl-pshpha...sphlG............h........p.....h..............h..........t.p.h........h..s.............p..............s..............p................s..............s........s..h.........s......h.h.th.sss.....hpl..ELl.p.....Phs.....ss.....s.hpt.......ptsu.lp..Hl.uhh..s...c...D...l-ssltthpp.p..Ghp..hh....t.s...tt..u...s.s.s.tc.hsa .................................................................pHluluVt...D..l-pAhphaps...hL...G.............h............p......h.......................h...............t..t..................t..s......................p............................p......................p.....................s.................................V....................c...s.........................s..........h...l...th...s.ss.................pl...E.L.l...p........Phs.........ss.......................phlpp...........................pusG.l..a....Hl..Aac......V....c......D....l..c...s...s...h...p...c...Lcp...p....G.hc..ll.....tp..t..sph..G...s.t.s.t.lha................................................ 0 282 568 711 +13512 PF13670 PepSY_2 Peptidase propeptide and YPEB domain Coggill P pcc Jackhmmer:C6QGP5 Domain This region is likely to have a protease inhibitory function (personal obs:C Yeats). The name is derived from Peptidase & Bacillus subtilis YPEB. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.78 0.72 -4.08 83 928 2012-10-01 23:09:26 2010-12-15 11:13:51 1 7 550 0 229 873 47 80.40 22 77.90 CHANGED hhhlshssshhu...usAhA.........sspsPtspWh..stsplpptlpp.pGa.plcclcht-sts...aElc.uhspsGc..+hElhlDPtotpllcpc ...................................................................hh...hhhhhhh.u...........ssAhA...................pstt..s...sph.............shsps.tp.h...l.pp....pGa..plpclch..-...c.us...........YElc.s..ts....p..sGp..ch-lhlDstohcllp.................. 0 52 120 174 +13513 PF13671 AAA_33 AAA domain Coggill P pcc Jackhmmer:A9BWH5 Domain This family of domains contain only a P-loop motif, that is characteristic of the AAA superfamily. Many of the proteins in this family are just short fragments so there is no Walker B motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.74 0.71 -4.31 146 4032 2012-10-05 12:31:09 2010-12-15 11:19:46 1 83 2093 32 1521 12436 3403 138.60 17 34.96 CHANGED lllhsGhsGSGKooh....spp.h....htth....s........hhh.lspD.s................................h+p..phhs..........tt.t.....p..................................t.......th...sht....hhhphspthL.ptG...ts.s.ll......D.sT.........slp.....t.ppR....p.thhpl..Ap..phus.............h.ph.lhhpss..phhtpR...............tpRptp.................p......ssttslpphhpph..c..P .......................................................lllhtGhsG.oGK.oTh................ucp...l....................ttt.hs.......................hhh...l.s.p...D.s.....................................................................................hpp....phts................tt.........t...................................................................p.......ht..tt.........s.h.p..............h...h..h.......p....h...s...p..p......t.......l........p...p...G..........ps...l...ll.................D...ss...................................................hp...........t..pt.p...................p...th.h.p..h......sp.......p....h..s..h.p.....................................h..h.h...l.h..h...p...s.....s.......c.h.h.h.p.R.................................h..t.t..R..stt......................................h.....h.................................................................................................................................................................................................. 0 467 898 1220 +13514 PF13672 PP2C_2 Protein phosphatase 2C Coggill P pcc Jackhmmer:A9C323 Family Protein phosphatase 2C is a Mn++ or Mg++ dependent protein serine/threonine phosphatase. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.28 0.70 -11.58 0.70 -4.93 68 3942 2012-10-03 01:39:20 2010-12-15 15:10:55 1 30 2499 16 1091 5760 566 219.60 18 65.13 CHANGED tGpoHh.ppu..tssQDsht......htth.sss...hl...l..A..VADGAGS......u..p..hSchG....uplAsp.ss.....h.ptlpphh.................sptphs...............t.pthhp...plh..pphht.......................................psttp....pt...phpchuoTLlhsl....hs.......s....s.h.......hh..hhplGDGsl.shht.........ps..uph.ph..lsp......s.c...s......GEa.s...Np...............TtFlsss....sshp.ph.......................................phhphp...h.pth...sslhlhTDGlp..........s.sl.....tstp.........sahs.hhpth...hpt...h..spst...........hppp...LtphLp ............................................................................................................................................................................................................................ht...............t..NpDthh.............t....ttt.............l......h......s.....lADGh....GG.........p..p........sGc.h.A............S...ph...u..lp..ph.........h.p.h.h..p.p.ph...............................pttt.t....................................................h.pth.hp....tht....pp.ht..............................................................................................................................pt..tt.p...........pt.....ph.p.s..h..u....T....Tlsshl........hp.....................s.........s.p..................hh..hsplGDSRs.....hhh..................cs......s..p..l.....pp......l..op.....................D..+...........s..................................h.th..s.......sp...................................tt.t.l..s.tp.........p.s.t....ph..............................................................................................c.h.t.php.....h.p.ts....-.h..l.l.L.s.SDGlp...................s.hl........sspp..............h..................................................tht.thttt..t....................................................................................................................................................... 0 376 772 985 +13515 PF13673 Acetyltransf_10 Acetyltransferase (GNAT) domain Coggill P pcc Jackhmmer:A9C0U2 Domain This family contains proteins with N-acetyltransferase functions such as Elp3-related proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.18 0.71 -10.45 0.71 -3.97 46 6639 2012-10-02 22:59:21 2010-12-15 17:48:46 1 97 3051 5 1600 44588 6348 116.40 17 61.61 CHANGED Dhstltplhpc.shps.p.s.h.hh.s....tth.......h..s.l.....cphhpp..s......hhhlh.hss.....pl.....s..ua.....s.t......h...p....s...ssc.......lsh.....L..hstsphptpGhupsLls.tltp.th..pp...uhp..hLpsp.......ushhu...psh.ap+.hGa .......................................................................................................................................h......................................................h........................p...h.............p.p..h..l....t..p.....................h.h...h....l...s.....p....p...s...s..................................p.l...............l.....G.a..................h..s....................................l.........p...........t.........ptp.............................................lst...............l...aV..p..P.p.h..p..t..p..G....l....G....p....t......Ll........p....t....h.hp....t.h......tp........tht.......h.l...p.lp..............ss.......tA.........hs.F....Y.p.c.hGF............................................ 0 480 972 1299 +13517 PF13675 PilJ Type IV pili methyl-accepting chemotaxis transducer N-term Coggill P pcc Jackhmmer:Q47EX4 Domain This domain is found on many type IV pili methyl-accepting chemotaxis transducer proteins where there is also a HAMP, signature towards the C-terminus. 23.00 23.00 23.10 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.61 0.71 -3.75 504 2034 2010-12-16 09:58:11 2010-12-16 09:58:11 1 65 1235 6 420 1339 56 110.10 25 20.36 CHANGED uh..hh...lt..ust..ssA.cAINhAG.SLRMQSY....RL...h......h.h.....st.u.....p.t..hh...p..thppsht..p.sh..tp......Lpptsst..................s.s.l.psph........tp........Lpp.pW.....pc.lp....s..tl......pp...spp..................tt...shpsh.ssp....ls .................hhht.sst.tsA.cAINhAGSLRMQSY....RL.......s....................h.h.................st..u..........p...p...t...hh.....p..thpps..ht..p.sh...sp........Lpptsst..........................psl...psph..............tt........Lps..W....pchp....stl..........pp....spt....................httslsphlt................................................................................................................................... 1 105 246 341 +13518 PF13676 TIR_2 TIR domain Coggill P pcc Jackhmmer:Q47BW0 Domain This is a family of bacterial Toll-like receptors. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.24 0.72 -3.87 214 2056 2012-10-02 18:56:14 2010-12-17 13:44:38 1 344 944 8 1033 5257 266 109.00 18 21.07 CHANGED lFlSYs..t...p.D..pt...hAchl...ttt...L......p....p..t....Gh...p...Va...h..s......h.ch.h...s...Gssh..hptlpps.l...p.p..uphslsll.Sssah....p.S............s..asp..p.E....hptA......tp......ts......+...llPlh.......lc...s.s........ph...ss....h.ls....plts.....hc ..................................................lFlSas.....p........p..D.........pp..........h.s.....p.....t.....l.....t.pt....L.................p...................p........t........Gh.......p..................sa......h..D..............p.sl..t............s.......G..p.s.h........tpp.......l.p...pu...l......p.p...uc.....h..hlh....l.l..S..sshh....p.s.....................p...aspp..E.....httu..................hp.............ts..........c.....llPlh........hp...s..h..........th................................................................................ 0 444 754 932 +13519 PF13677 MotB_plug Membrane MotB of proton-channel complex MotA/MotB Coggill P pcc Jackhmmer:P45443 Domain This is the MotB member of the E.coli MotA/MotB proton-channel complex that forms the stator of the bacterial membrane flagellar motor. Key residues act as a plug to prevent premature proton flow. The plug is in the periplasm just C-terminal to the MotB TM, consisting of an amphipathic alpha helix flanked by Pro-52 and Pro-65, eg in Swiss:D3V2T1. In addition to the Pro residues, Ile-58, Tyr-61, and Phe 62 are also essential for plug function [1][2]. 24.00 24.00 24.00 24.00 23.60 23.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.89 0.72 -4.65 1028 2913 2010-12-17 16:12:36 2010-12-17 16:12:36 1 5 2048 0 745 2094 570 56.70 30 19.45 CHANGED h..hh+.+.p......t.tt.sppssuWhluYADFhThhMAFFllhahlSssstpc...........hptluphFpsshs ..................h..h++p......t.pp.stttssWhlsYADhhThhhAFF...llhahh..St.ss.p.c...............hptluphhpssh.s....................................... 0 250 484 613 +13520 PF13678 Peptidase_M85 NFkB-p65-degrading zinc protease Rawlings N, Coggill P pcc Jackhmmer:B7UNX4 Family This family of bacterial metallo-peptidases is thought to compromise the inflammatory response by degrading p65 thereby down-regulating the NF-kappaB signalling pathway [1]. NF-kappa-B is a pleiotropic transcription factor which is present in almost all cell types and is involved in many biological processes such as inflammation, immunity, differentiation, cell growth, tumorigenesis and apoptosis. NF-kappa-B is a homo- or heterodimeric complex formed by the Rel-like domain-containing proteins RELA/p65, RELB, NFKB1/p105, NFKB1/p50, REL and NFKB2/p52; and the heterodimeric p65-p50 complex appears to be most abundant one. 21.80 21.80 22.50 62.20 21.30 19.20 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.66 0.70 -5.00 8 95 2011-01-11 13:03:09 2011-01-11 13:03:09 1 1 85 0 1 39 0 246.90 84 71.65 CHANGED sppppDsYA-YVlspGpRt..pLSsspLsslh.sVp+AVppShupLlDcHTAhAIEsTlh-ALhpSpTFRcAVuFul+pc+psLGh........IpYRN.YEls-pops+hpclpplohsEIhpSsAspsPIls.h.sE.AsE--s.pcP..aVsISlAPshsStcYPhWQpuLIHEIIHHlTGAuDP..t-s..RlGPTEILApRlApELsWsI..Pp....FpuYssP-RlpAlppRsFpuLhcslhRHcpctptllpRLssIucthcASP- ............PNRAENAYADYVLDIGKRI..PLSAADLSNVYESVIRAVHDSRSRLIDQHTVDMIGNTVLDALSRSQTFRDAVSYGIHNEcVHIGC........IKYRNEYELNEESuVKIDDIQSLTCNELYEYD.VGQEPIhP.I.CE.AG.ENDN.EEP..YVSFSVAPDTDSYEMPSWQEGLIHEIIHHVTGSSDPS.GDSNIELGPTEILARRVAQELG.WSV..PD....FKGYAEPEREAHLRLRNLNALRQAAMRHE-NERAFFERLGTISDRYEASPD..................... 0 0 0 1 +13521 PF13679 Methyltransf_32 Methyltrans_27; Methyltransferase domain Coggill P pcc Jackhmmer:A9BXC4 Domain This family appears to be a methyltransferase domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.98 0.71 -4.50 63 1223 2012-10-10 17:06:42 2011-01-12 13:55:34 1 19 861 0 577 1525 252 151.10 24 34.50 CHANGED +Kh+QVp+....hhphlcs...llpph............tpssshsllDhGuGKuYLuFlLh..hhpt.......ssclhGl-s+sclscpupplAp+Ls..a.sphpFhphsltpsh.......................................................................................ssphcllsuLHACsTATDsAlphulp..ppu+hllhVPCCptclspp ..........................................................................................cK.cplpphhphlpt...hht.......................................p...tshpllDaGuGKGYLuhhLth......................t..hshp.lhulE...hc..p......p....l...s.p.t.u..pp..h....u....p.c.l.t.............h.......t...p..h..p...hhp.s..s...hts.h..t.......................................................................................................................................................................spthshslu.LHACss.ho.spslch..s.....l...........p.......t.........p.....s..ph..............ls.....hsPC.Cap.lp..t.............................................................................................. 0 197 312 453 +13522 PF13680 DUF4152 Protein of unknown function (DUF4152) Bateman A agb [1] Domain This family of proteins is functionally uncharacterised. This family of proteins is found in archaea. Proteins in this family are approximately 230 amino acids in length. The structure of PF2046 from pyrococcus furiosus has been solved. It shows an RNaseH like fold that conserves critical catalytic residues [1]. This suggests that these proteins may cleave nucleic acid. 27.00 27.00 332.60 332.50 20.50 20.30 hmmbuild -o /dev/null HMM SEED 227 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.72 0.70 -11.53 0.70 -5.34 5 13 2012-10-03 01:22:09 2011-01-12 14:06:31 1 1 13 0 11 17 0 223.80 73 99.79 CHANGED MRIVSADTGGAVLDEsYEPIGLIATAAVLVEKPYKTAKhSlVKYADPFNYDLSGRQAI+DElhLAIELAKKVKPDVIHLDSTLGGIElRKLDEPTIDALsISDRGKEVWKELSKDLQPLAKKFWEETGIEILAIGKSSVPVRIAEIYAGIYSAKWAIEYA+K..EG+lIIGLPRYMcVEI+-GKIhG+SLDPREGGLYGEIEsEs.EGIcWElYPNPlARRFMVFElW+E MRIVAADTGGAlLDEsYpPIGLIATsAVLVEKPY+TAshSlVKYADPFNYDLSGRQAIRDEshLAlELA++VKPDVIHLDSTLGGIEVRKLDEPTIDALsISDRGKEVWKELSKDLQPLAKKFWEETGIEIlAIGKSSVPVRIAEIYAGIYSAKWAIEYA+c..cG+llVGLPRYMcVEI+sG+IhGcSLDPREGGLYGEIEs-s.-GItWElYPNPlsRRFMVhEVat.... 0 1 1 6 +13523 PF13681 PilX Type IV pilus assembly protein PilX C-term Coggill P pcc Jackhmmer:A9BNC4 Domain This family is likely to be the C-terminal region of type IV pilus assembly PilX or PilW proteins. 25.60 25.60 25.80 25.60 24.10 25.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.52 0.72 -3.52 20 47 2011-01-12 14:33:16 2011-01-12 14:33:16 1 2 40 0 20 47 9 97.10 25 40.70 CHANGED sssth..Psp.tc...suhhhushsp......s..pssppGlCt....tsst........pshWsssc...ts....sshp.s..ps..........s.ssspaGpaT.Gsh......ss.usshlttpts....spssRYhIEhlshts .........................s....hP....p....ushhushts......shssphppGLCtp...tsst..........pshWssss...hp........ssst.....s.........pstsstYGpFT.Gst......ss.usshL....s......sptsRYhIEhls.p.p.... 0 1 13 18 +13524 PF13682 CZB MCPsignal_assoc; Chemoreceptor zinc-binding domain Coggill P pcc Jackhmmer:Q47EQ8 Domain The chemoreceptor zinc-binding domain (CZB) is found in bacterial signal transduction proteins - most frequently receptors involved in chemotaxis and motility, but also in c-di-GMP signalling and nitrate/nitrite-sensing. Originally discovered in the cytoplasmic chemoreceptor TlpD from Helicobacter pylori, it is often found C-terminal to the MCPsignal domain in cytoplasmic chemoreceptor proteins. The CZB domain contains a core sequence motif, Hxx[WFYL]x21-28Cx[LFMVI]Gx[WFLVI]x18-27HxxxH. The highly-conserved H-C-H-H residues of this motif are believed to coordinate zinc; mutating the latter two histidines of the motif to alanines abolishes Zn binding. This domain binds zinc with high affinity, with a Kd in the femtomolar range. Although the function of the CZB domain is not yet known, scientists have speculated that it may function as either an unknown signal input domain, based on its frequent association with signalling output domains, or as a domain that helps to stabilise protein tertiary or quaternary structure. 20.00 20.00 20.20 20.20 19.60 19.50 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.55 0.72 -3.80 345 1282 2011-01-13 13:31:52 2011-01-13 13:31:52 1 83 938 0 334 896 70 71.70 26 17.09 CHANGED c.h...-Hhhahppl.hph.lh........ttp..p.pht.scppC.phG+Wa.puts......ppthsph....ss........a.pplcpsHpplHptupphhpt .........................h.sHhhWhhph.ach.lh............sp...phpp.tsaspC..phG+Wh.pshu....tpphsph......st.............a.ctl.-.ssHpclHpsupphh..h.............. 0 135 281 309 +13525 PF13683 rve_3 Integrase core domain Coggill P pcc Jackhmmer:A9BZ80 Domain \N 26.50 26.50 26.50 26.50 26.40 26.40 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.14 0.72 -4.45 121 4386 2012-10-03 01:22:09 2011-01-13 14:25:37 1 37 1527 0 913 15372 2196 62.80 30 37.29 CHANGED tulphphhpPGcPhpNualEpFNsph+cEhLstph......h.slscscthlppWhp.cYNppRPHouLGhhTP ......................................tlt.ph.hpsGp..Php......Nu.hhEpast.....ph+.............s.E...h...lstph...................atsh..tc..scp.t.l.tpah........p.hY.Npp...R..PH..p..u..L.shhsP............................... 0 204 552 722 +13526 PF13684 Dak1_2 Dihydroxyacetone kinase family Coggill P pcc Jackhmmer:C9LMG3 Domain This is the kinase domain of the dihydroxyacetone kinase family. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.68 0.70 -5.49 228 1830 2012-10-02 12:41:15 2011-01-13 14:44:36 1 7 1719 0 319 1574 117 297.40 38 56.58 CHANGED caGYCTEallphppt.............ch..cph+shLpsl.GD.SllVV..sD-c.............llKVHVHTpcPGpllphuhpa.GpLhclKI-NM+tQ+pphh.................t........................tttp.pp.......ch...ulluVus...GcGl.sclF.cslGsshllpGGQT....MNPSTp...DllpAIcps.sAcplhlLPNNpNIlhAApQAspls...cp....p.lhVlPT+olsQGluAl.lsassst.sh--Nhp.sMp-uhsp........V+oGplThA.VRDTpls...GhcI+cGDhlGl.........h-scIhss...sp-..hhpsshpllcph..ls--u.EllTlhhG.p-sscc.pucp........ltptlccp......a..s.-lEl-lapGsQPlY.alloV.E ....................................................................aGYCTEhhVphpps..........pta-h-cFRs.h.Lspl.....GD.SLLVV.....sD--..............llKVHVHT..-cPGp...V..hphG.pa...GpLhKlKl-NMRp.QHcthl................p.p...........................................................................tptttt.pt.ppt..ph.........ulluVss.......GcGl..s-lF.cuhGsshl.IpGGQT...............MNPSTE........Dll.cA...lcp..s.....sA...c...p.lllL..PN..N.pNIhMA...A...pp...A...A...p..ls........-t.........shVl.o+..o.......lsQ...Gho.....Al.h.....s.....a.....ssst.....sl-.-N..tt.pMspulss.................VtoGplThA.VRD.Tp.l-.....GlcI+csDhlGh...............l-scIlss......ssD......hhpsspphlpcM.........ls-..D....o...E.......I..l..T...lh.hG...cD.uspp...s..p.p........ltph.lpcp.........a......DlElElapGsQPl.Y.YlhSVE.............................................................................................. 0 127 230 286 +13527 PF13685 Fe-ADH_2 Iron-containing alcohol dehydrogenase Coggill P pcc Jackhmmer:A0Z2K3 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.73 0.70 -5.07 57 650 2012-10-02 14:41:14 2011-01-13 16:02:49 1 3 514 4 210 15098 4240 260.30 33 66.02 CHANGED hlupuslsclsphlpch...t.h..p......clhllsDps...ohph...hucpltpsLpptsl...ps.t...hh.........p....s....s......hpsltc...lhpth....t....p....c..s-.....hll.ulGuGpl.DlsKahua...hshPalsVsTAsShDGauSssASlhhp..GhKhoh...AthPhullsDhsllppAPtchltuGhGDlluKhT...A.l...t........DWp..hu.c........th......sc..........l.........sptshp.hl.psh..pps...h.t.....s....p.....c......phl.....ptLhpu....L......s.hu...sluh......S..+PA.....SGuEHhlSHhh......-hh.......h......s.......h.h......HG.pVG .......................................................................hGpsslp.c.h..u.p.l...lp.ch.....t..h...p.........psl...l..l...s..s..ps.......T.h.tl............s.......G...c....p..l..p..s...s...L..c....s....s....s...h.....ps.h........................ss.p........s..................s..........................h.s.sl.t...p....lt.c.p.h........p....s......h......c.sc...........hllulG.u...Gp.l.D.l..s..K..................h..................A....................u....................h.......................c....................h..................s....................h...................s...................a..................l......s..................Vs.T..............A......A......S....h....D.G...a...sS...s....s...........A...s..lp.t......t.......G...h...K..t.oh..t....s........t.sP...u...l.....lsD.lc.l.....l.....s.s.A......P.p.c.h...h.s.u.GhG..D....ll.......u....K..l..s....A..l....t......................DWh....lAc.....................ch.....sE...........s.l..............s.p.h..s...h...p....h...l...p...s...s...l........cs..s...lp............c..s.c..............c........psh.......csL..h.-u.L................lhS...GlAMth............hssS.RPA.............S...G......u.EH.h.h...SH...hh....-hh...........t.....s.s....hhHGtpVG................................................................................................................................................................................... 0 75 145 176 +13528 PF13686 DrsE_2 DsrE/DsrF/DrsH-like family Coggill P pcc Jackhmer:A5EWY2 Family DsrE is a small soluble protein involved in intracellular sulfur reduction [1]. The family also includes YrkE proteins. 25.00 25.00 25.10 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.97 0.71 -4.42 32 864 2012-10-01 20:53:36 2011-01-13 16:37:43 1 15 675 10 260 763 82 149.10 38 44.23 CHANGED pp++hhllhossh.c..hAhAuaIlAsuAAAhGh-VThFFTFWGLslL++sc.phpl........cpshltphhhhhh..t......hsh.shsGhusthhpthM..............................................Kc+sssoLp-LlchAh-pGV+hlACpMoMDlhGhccEELlDGl.phuGsAsaLpcAt-uslsLF .........................................................t..p+ssIlhhuG..phD..KAhAuhIlANGAs.AhGp-VTlFFTFWGL.NsL+Kt..p.p.sp.l....c.Kp.hh.t+hFshMhstts.p.....................h.lS+MNhhG.hG.stMh+.tlM...........................+++sls.o.L.pLlc.pAh...-...p...G.lKllACpMoM.DlhGlp.cE.E.L..hD..tV..-huGVusYl.sc.AppushsLF.............................. 0 105 187 233 +13529 PF13687 DUF4153 Domain of unknown function (DUF4153) Coggill P pcc Jackhmmer:C9LQV3 Family Members of this family are annotated as putative inner membrane proteins. 25.00 25.00 25.40 25.30 24.40 24.40 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.90 0.70 -4.82 46 732 2012-10-01 23:40:40 2011-01-13 16:41:44 1 2 721 0 91 498 28 233.20 38 42.73 CHANGED lhhGls.lh....lh.ulshLF.hsl......st.pha....h.plh......hhshhhhusshaLuhls....p..p.....p......c...htps..........pt..h.....ph.llpalllPLhhlYshlLah.Yhh+IllshplP.....pGhlu.......hl.lhhh....hh.shlll..hhhhh....p.ppp......+.a.....hpha....h+hhs...hlllPll.l.lhhhuIhhRlspYGlT.sR....halllhslah.hshhlhhhh......p+.pt........p.h.........ls.hsh.sllh.llsshsP.hsshslotpSQhsRlcph .............................................................................................................lhsGLhWLV....LLLWupLF.......+...LV.........GI.pFF.....s.TLFht........schFhalol..G..Ll..o..ALAV..lL...........u.R....ppp.....h...lcuh.....................p+.L.......ho.LIAsG..LLPLVoL.LsLhFI...h.sLPh.s.G.L.u.u.lu.....c+loA...........As..LLhs.....Lu..h.l.Ll.lhAIV...........p-sp............K.s.......lshh....lRsLh......LlVAPla....l..lAuWALWlRl....uQYGWTsDR....Lhllllhlh..Lsahluhlh............R+.up.........ssh.h.......tthh.lss.L.sLVl.LlLLsSPlLDshRISVNSphARapu.u............................................. 0 35 63 76 +13530 PF13688 Reprolysin_5 Peptidase_M84; Metallo-peptidase family M12 Coggill P pcc Rawlings N Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.66 0.71 -4.38 48 463 2012-10-03 04:41:15 2011-01-13 16:50:21 1 31 230 47 275 3504 182 203.10 22 28.71 CHANGED ss+plsLl.lssDssasssa..u...s.................spptllshls....sAsslappsh.................s..Is....lsLp...slslsspsssss..................t.sssssshLsph........................sshc...uppps.shuahhhhsssss...u...hGlAal........Gphs............ssss.....sssss...ts...........................hhhtsssp...hplhAHEhGHsaGAsH....Dsssp............tsts.Cshst...............ssss.GpaIMsss.ss........sshsp..FS ....................................................................................................................t..pps.lh.lssD.p....p..ah.pth..ss...c...............................tshphlhp..hls.......................ps.tslh.psh...........................s..ls....htlp..............plp.l.h..s..p..s..s..sp..t..........................................ttss.h.s.sp.phL.p..F..................................................s.p.hp.........t.t.....s.....p..s.....s.l.....u...ah.h........o.h.....p.....s...h...ss............G..............lGLAal.................Gssp...................s.sst.......uuhss.....pt.................................................hs..hh.h..o...tp.s....t.hlhA......HE..lG...........H..saGu.H......Dsss.................................p..Cs..........................ts.ts..upalM.shss.........s.t.FS.............................................................. 0 114 164 230 +13531 PF13689 DUF4154 Domain of unknown function (DUF4154) Coggill P pcc Jackhmmer:Q47CB5 Family This family of proteins is found in bacteria. Proteins in this family are typically between 172 and 207 amino acids in length. Many members are annotated as valyl-tRNA synthetase but this could not be confirmed. 24.90 24.90 25.10 25.80 24.60 24.80 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.64 0.71 -4.59 103 788 2011-01-14 11:37:18 2011-01-14 11:37:18 1 6 728 0 143 407 62 141.30 44 76.62 CHANGED pct..........pl.c.......Ashlhshhpasc.WP.....ss......s...ss..lp...lClhu.sst..hs.........s.s.Lp...p.h.tspph..ss...c.lpl...........ppl........s..ssst......h..ss........Ccllalupt.p........ttphtplhp.tlpspslLolu-tss.sptGu....hlsLhh.pss+lpFclNlsssp+uGlplsu.plLpLA+p ........................p.....VphhVsGIlSYT+..WPuh......uuP..s+...LCIhu...suc..au.........s.s.Lp...ct.Aspsh..sh...hPlhl...........+s.........p..ptsh......h...us........CsuhYFGsc.o........P.shQh....-Lsc.pas.u+u...LLlIAEp.Ns....EChlGS.........AFCLll.......pNscV+FsVNLDuLoRSGV+VsP.cVLhLARp..... 0 35 89 118 +13532 PF13690 CheX Chemotaxis phosphatase CheX Coggill P pcc Jackhmmer:A5EYB3 Domain CheX is very closely related to the CheC chemotaxis phosphatase, but it dimerises in a different way, via a continuous beta sheet between the subunits. CheC and CheX both dephosphorylate CheY, although CheC requires binding of CheD to achieve the activity of CheX. The ability of bacteria to modulate their swimming behaviour in the presence of external chemicals (nutrients and repellents) is one of the most rudimentary behavioural responses known, but the the individual components are very sensitively tuned [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -10.07 0.72 -4.13 95 880 2012-10-01 19:50:22 2011-01-14 15:54:09 1 12 643 7 333 939 49 97.90 23 51.70 CHANGED lsu.hluh...sGsh..pGhlhlshspphshpl......s...p.t...l...G....-c....t...ps....pc..pltDslGElsNhIsGss+......sch........uh.....p....hpl.slPpllpGps.htl..phs.............ss.sp.tlslsap ...................sslIGh...sGsh..cGphhlshscphAhcl......s....p.t....h.......G.....pc.........pp.....hs-.s.pssluElsNhlsGssp....shLt.......uh.......s.hcl.usPtlltGps.hpl..p...............t............................................ 0 161 258 292 +13533 PF13691 Lactamase_B_4 tRNase Z endonuclease Wood V, Coggill P pcc manual Domain This is family of tRNase Z enzymes, that are closely related structurally to the Lactamase_B family members. tRNase Z is the endonuclease that is involved in tRNA 3'-end maturation through removal of the 3'-trailer sequences from tRNA precursors. The fission yeast Schizosaccharomyces pombe contains two candidate tRNase Zs encoded by two essential genes. The first, Swiss:Q10155, is targeted to the nucleus and has an SV40 nuclear localisation signal at its N-terminus, consisting of four consecutive arginine and lysine residues between residues 208 and 211 (KKRK) that is critical for the NLS function. The second, Swiss:P87168, is targeted to the mitochondria, with an N-terminal mitochondrial targeting signal within the first 38 residues [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.88 0.72 -4.50 41 279 2012-10-02 15:46:01 2011-01-14 17:05:56 1 11 238 0 197 491 119 61.80 37 7.17 CHANGED pslossTsDTst.sslhLph-.p.cRYlFGpluEGoQRshsE..p+l+l..uKlpslFLTGph.s...............................................WsshGG ...................hshsotDos...sslhla.h-..p...pR...YlF.NsuEGsQRhhpE..+.+l+l..u+......lcsIFLTths.................................................WsshGG.......................................................... 0 71 115 167 +13534 PF13692 Glyco_trans_1_4 Glycosyl transferases group 1 Coggill P pcc Jackhmmer:D2BSD5 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -11.02 0.71 -4.01 266 5146 2012-10-03 16:42:30 2011-01-17 13:51:46 1 174 2454 8 1838 35567 12883 141.30 16 28.30 CHANGED p..hh....hl..Gsh...s..ph....shpsh...l.lp.hh..........l..t.p.......t.......h..s....p....h..p...hhl..h..G.s..........h....s.....s............p.....l..pph........t......sl.p....h..........h..uh...h...........s..c..h.sph...hsp...s.....c..l...s..l......s...P..h.......t.....hss...........s...h.....s.....h.K.lh-hhs.sGhPl..l......s.....osh....s.....hps.h.................h..t.......t..hs...h..h.h..s..s....s......s..pshspslt.ph.hp.....s ..................................................................................................h...hhshh...s.th.....t..psh.phh..lp......h.h...............................tl.....t..p..............................p................................h....s................p.......h......p.......hhl....h..Gt..............................s..s...p.......................p......lpph......................hs........sl..p....h.....................................h..sh..l...............s.....-....h..sp.h....l.s.p.....u...........c...l...s...l.........s.....s...h...................p.........ss..............................s..h....s......h.K.lhE.hhu.sG.h..Pl...l..........s.......osh......s.........hp.s..h.........................................h..tt..........s.ts...h....hh.......s..s..........s................s....pph.hptl.phh.......................................................................... 0 731 1269 1619 +13535 PF13693 HTH_35 Winged helix-turn-helix DNA-binding Coggill P pcc Jackhmmer:D2BY08 Domain \N 25.00 25.00 25.10 25.10 24.00 24.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.44 0.72 -4.19 22 1007 2012-10-04 14:01:12 2011-01-17 14:12:01 1 2 663 2 160 491 4 75.70 59 84.66 CHANGED DWHsADIIAAL+K+GToLAAlSRpAGLSSSTLANALsRPWPKGEalIAcsLulcPuEIWPSRYaD.psGpll-RchRhR .............DWHPADIIAuLR.K+G.TShAA.SRcsGLS.S.S.T.LANA..L..o.RP..W..PK..G..EhI..IAcALGscPh.IWPSRYaD.pspphlpRp.h.................... 0 10 62 108 +13536 PF13694 Hph Sec63/Sec62 complex-interacting family Wood V, Coggill P pcc [1] Family This is a family of closely related Hph proteins that are integral endoplasmic reticulum (ER) membrane proteins required for yeast survival under environmental stress conditions. They interact with several subunits of the Sec63/Sec62 complex that mediates post-translational translocation of proteins into the ER. Cells with mutant Hph1 and Hph2 proteins revealed phenotypes resembling those of mutants defective for vacuolar proton ATPase (V-ATPase) activity. The yeast V-ATPase is a multisubunit complex whose function, structure, and assembly have been well characterized. Cells with impaired V-ATPase activity fail to acidify the vacuole, cannot grow at alkaline pH, and are sensitive to high concentrations of extracellular calcium [1]. 19.90 19.90 20.60 24.00 19.10 19.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.13 0.71 -4.26 7 38 2011-01-17 14:23:41 2011-01-17 14:23:41 1 1 23 0 18 34 0 171.50 38 32.47 CHANGED pp.pp.hh.DhsSssTGtFSDpMFpss.c....ppp.phsps.ph.pssphpspschsssttttpshstpph.spsh...spsthpphspt..pup.cspp...........t+Shupoh.shD+KRLVsQFLc.SlpsSss.pssppsst.............shpo.Sphshssuhp.p.......................hpsLhYpDLcpsstppppso ................................................................................................pLchpslPshsSspTGhFSDhhFpss.D...............hstu.Sh.cs.plSPK.....psaNsstsh.hushsptphtShsl.......apupcpth.uFpplQupppsp+............+SsusS..F..sh-+KRLVsQFLp.ShtsS.s.sohpppss.t.usshssIh.s.shps.Sphs.sppS............................pSLhYHDL-uSshpcsSs............ 1 3 8 13 +13537 PF13695 zf-3CxxC Zinc-binding domain Coggill P pcc Jackhmmer:A9VEF7 Domain This is a family with several pairs of CxxC motifs possibly representing a multiple zinc-binding region. Only one pair of cysteines is associated with a highly conserved histidine residue. 22.00 22.00 22.60 22.80 21.20 21.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.27 0.72 -11.41 0.72 -3.77 45 436 2011-01-17 15:42:00 2011-01-17 15:42:00 1 13 129 0 268 393 0 92.60 29 37.05 CHANGED phhGcFpC..ppC.spsWsSsplhlsh+hh....t.....th................hph+hacQcC+pC..sphppPhh........s.ps.......hs-Rlshchpchsshphpt.............h.t.p.t....tPHppcLCEuC+tG ............thGcF+C..spC..p+pWpSupVhsl.h.................tt.................sphh..hatQpCp.cC....pp..pPph........................p..lpclt.p.p.shpppt...h..t.h................h................tsHcpchCptCpt.................................................. 0 64 89 155 +13538 PF13696 zf-CCHC_2 Zinc knuckle Coggill P pcc Jackhmmer:A9VEF7 Domain This is a zinc-binding domain of the form CxxCxxxGHxxxxC from a variety of different species. 27.00 12.00 27.00 12.00 26.90 11.90 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.86 0.72 -4.45 5 190 2012-10-03 11:39:54 2011-01-17 17:21:59 1 28 117 0 149 536 7 31.30 45 8.63 CHANGED P+KsPPPcYLCHLCFpKG.HYIsDCPQ..AsPKuE ...............phPPssYlC+lChpcG.HaIpDCPp......sp............. 0 81 103 129 +13540 PF13698 DUF4156 Domain of unknown function (DUF4156) Coggill P pcc Jackhmmer:D2BXE0 Family The function of this family is unknown but members are annotated as putative lipoprotein outer membrane proteins. 25.00 25.00 25.10 25.40 24.80 24.20 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.00 0.72 -4.10 49 839 2011-01-18 10:31:11 2011-01-18 10:31:11 1 1 682 0 94 295 28 93.80 51 79.50 CHANGED ssphostuppVph.sssps..sppCphLGpVoGopu.sahothhtss..sshhpGAhN-L+NcAAt.hGuNslhhhs.....................spthssssshhGpsYcC ........................S.NpLouAGpsVRl..s-ppP..GuECQLlGosTGpQS.NWhSGpaGpE..uuSMRGAANDLRNpAAA..M..GGNVlYGloSP....opsh.............................LSSFsPTsSphhGQVYKC............................................................ 0 14 37 66 +13541 PF13699 DUF4157 Domain of unknown function (DUF4157) Bateman A agb Jackhmmer:C7PSA5 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 80 amino acids in length. This domain contains an HEXXH motif that is characteristic of many families of metallopeptidases. However, no peptidase activity has been shown for this domain. 22.00 22.00 22.10 22.30 21.80 21.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.67 0.72 -3.84 185 470 2012-10-03 04:41:15 2011-01-18 10:44:17 1 49 214 0 257 522 62 74.20 33 11.64 CHANGED sLsssl+sthEsthG..tD.hosVRlHs...s......tusptspslsApAaTp.GscIhF....s.Gp.tststt..........upclLAHELsHVlQQptu.t ............................................Lstsl+sthEsthG..tD..husV..RlHss.....stusptspslsApA.aTh...GscIhF....s.Gp...ts.t..........spclLAHELsHVlQQppu......... 1 100 177 239 +13542 PF13700 DUF4158 Domain of unknown function (DUF4158) Coggill P pcc Jackhmmer:A8LT43 Domain The exact function of this domain is not clear, but it frequently occurs as an N-terminal region of transposase 3 or IS3 family of insertion elements. 25.00 25.00 25.20 25.00 24.60 24.90 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.63 0.71 -4.76 108 1440 2011-01-18 15:32:58 2011-01-18 15:32:58 1 7 737 0 250 1165 105 151.80 26 19.69 CHANGED lLosppcppl..hslPs..s..cpclh+aaoLscpDlplIp..p.+RtspNRLGaAlQLshlRa.Ghhls....s....spplPtsllpalApQLtls...s..s.s..hppYu.p.RcpTRt-HhtcltphhGa+sFst..st..h.p.pLhpaLhphAhpsscshh.LhcthlshL+pp+llLPuhoslERllucuh ..............................Lo.tpptth..hthst..sct-L.hpaaohs-t.Dlph.Ip...p..+R.tstsRLGhAlQLshhRh.Ghhls....s....hpthsssllpal.u..p..QL.tls....s...t..p....htp.Ys..p..RppT+tcHhtplpphhsac.ast.........st..hp.plh.p.hLh.p.hAht.sscs.h..LhsthlthLhpp+lllP.uhsslpRhhups............................. 0 50 136 189 +13543 PF13701 DDE_Tnp_1_4 Transposase DDE domain group 1 Coggill P pcc A8LT71:jackhmmer Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 25.00 25.00 25.10 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 449 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.28 0.70 -6.31 10 2054 2012-10-03 01:22:09 2011-01-18 17:04:24 1 6 411 0 238 1429 381 279.80 36 91.76 CHANGED FsslsG+pVhucF-GGslSSDuGllLl+plDcpltlspRhAsClcDtR+sSYlcHSlc-LluQRIYQIAsGYEDsNDuNpLR+DPhFKlALs+lPhsssst..LASpPThSRLENslspp-lp+huR.............uFVDtFL-SYt+sPc.IVLDMDsoDDtsHGpQEhAFFNuYYpssCYhPLalFEupoG+LLsApLRPGcpcoGctslshLpRllcpIRctWP-T+IllRGDuuFupPElMshCEspsssD.lFGhuGNssLhc+tsslls-sRcpctpptcpshh...ts..t................RhacpsaYsA+SWscsRRVVhKsEhtucGsNhRFllTsL......scssPpcLYcchYCsR.GpsENRIKEhKhDLtSDRTSscsFluNQLRLFlusAAYVLhpulRppsLspT..LAKApsuTIRLpLlKLAARVslotRRIllcLPouCPhpsplthshppLphtp.hs ..........................................................................................................................................................h..................................................................................................................................................................................................................................h.....lDhDso.........u.ppp..................a...h....t....sa..Ph..h..hh.................tt............hR.up......s..tt..t..h.....t..ht.h...h...t....h..........................................hRh...D.ut.....t.h.......h..............................................................................................................................................................................................................................................................................................T.sh..................t....s.s.pp.....h.....p.....h......YptR..G.p.hEN.h.IKEh...p.s.hh.....s-.......+h.s.......op..............shhtN.plRhhhsshAYsLhh...hh..p..h...h..................s.......t........p........h....t.......t.h....p....htp...hR...h...hhl+lss+hs.ps.Rp.hlphsph.h...shtt.h..h..........th..................................................................................................................................................................................................... 0 49 161 214 +13544 PF13702 Lysozyme_like Lysozyme-like Coggill P pcc Jackhmmer:B0G1S2 Domain \N 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.01 0.71 -4.75 63 1147 2012-10-03 00:09:25 2011-01-18 18:31:40 1 9 690 0 68 686 23 158.50 43 52.92 CHANGED lspcVlpacshVcchApchu..ls-.YlsllLAlhhpESGG...cssDlMQSSEShshs.............sNsIpDs-tSIcpGVphaspslcpApppus....-.lcsslQuYNaGsG.alsaltppG.spYoh-LAppFScphs..........upphth....sh..tt.hh..sYGshhYsppVh.pYh ....................................................tpVhsapPhVcchscE.....hslsp..hssllLAlIhsEStG...........ps.tD..V........M..Q....SSEShuhs..................................sNoIps.scpS..I+QGlphhuphLtpApc.tul.....Dl.oslQuYNaG.su.Yls..alAppG....pcaThpLAcpaS+-hs............sGpphsY.pPlul...hsGGahY.NhGNhaYsphVp............................................................. 0 16 35 47 +13545 PF13703 PepSY_TM_2 PepSY-associated TM helix Coggill P pcc Jackhmmer:D2BX46 Domain This family represents a conserved TM helix found in bacteria and archaea. 30.00 30.00 30.20 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.24 0.72 -3.81 95 2542 2012-10-01 23:59:14 2011-01-18 18:43:04 1 35 1345 0 728 2488 143 87.60 25 18.15 CHANGED ahhplHhsLh.lsth.....hGthllulsuhhhllslloGllla..............+hh+pha...shR.tc..stcp.hhD.hHshhGlhslPFhlhlshTGlhhhht ....................................hhpLHtsLh..lsts.............Gphllslsuhhh.l.lhll.o.Glhla...h............................p.h...h...p.t.h.h...........thc....h......pp.....st.+p...hh-..hHshhGlhshhhllhhshTGlhh...s.............. 0 155 412 583 +13546 PF13704 Glyco_tranf_2_4 Glycosyl transferase family 2 Coggill P pcc Jackhmmer:A8LQ72 Family Members of this family of prokaryotic proteins include putative glucosyltransferases, 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.51 0.72 -3.67 164 934 2012-10-03 05:28:31 2011-01-18 18:44:27 1 33 490 0 305 3098 709 103.40 19 24.61 CHANGED +NEshh...L.palt...a.a.p.p.l.G....h..s.c..hll..h...sN.sssDso....sphL....tp........h....sc....ls......lhp...ss..................t.sa................pp..tt.ht....h......s..........hhsh......h..hp...p..h......s.........p..s.....cW..hlhlDsDEFlshsstt...tslpsLh ............................hNpt...h.l.ta.lt.......a..a...p..t..l...s.....h....c...c.....hhl..h....ss..s...s...s.D.s..o.......scl.L....cp.......h......ss.....lp.........lhp...p..p...........................................p..th......................tt...tp..ht......h......c............hhpt.......h..hp......p...h.....t..........................t..s.....-W..llhlDsDEhlhsst.....tl....h............................................ 0 101 194 240 +13547 PF13705 TRC8_N TRC8 N-terminal domain Bateman A agb Jackhmmer:Q8WU17 Domain This region is found at the N-terminus of the TRC8 protein Swiss:Q8WU17. TRC8 is an E3 ubiquitin-protein ligase also known as RNF139. This region contains 12 transmembrane domains. This region has been suggested to contain a sterol sensing domain [1]. It has been found that TRC8 protein levels are sterol responsive and that it binds and stimulates ubiquitylation of the endoplasmic reticulum anchor protein INSIG [2]. 27.00 27.00 27.40 27.30 24.20 24.40 hmmbuild -o /dev/null HMM SEED 508 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.74 0.70 -5.93 13 190 2011-01-19 10:54:33 2011-01-19 10:54:33 1 7 80 0 116 183 0 433.60 38 72.60 CHANGED lhull-VsLRVPulhllDhlhphsh.tuhs.........pp.........................................................................................hphphlthslphlGhls...........usslLhLsp++LlplYhahhuhlLhhsuahlsh.h..l..c.....-thlhls.hph.c.suhh............huhllhtl....lhssca..hhhhhshhhh.Pllhplh.lPh.s............Lhhlsshshhhoshtshhhlhpth.hshphsh.hh.....hthhplaGltuLlpshWp+LpVPslLpVFWlschshQhhs.....hhspp-ssh.....s.pphhhllhshlsssCsoshslLGhosllShlA+hluphhphaLtuh-s.cccp...hGhspuVhhhILALQTGLouLpsccRhlhLuhsLhlllTAlLp.lHpIs-PlLhuLuAS+spShpRHhRsLslChFLllhPlhlsahLhpaashshWLLhVsu.slplsl+VlsoLhhYsLFMlDuaRpp.WEKLDDhlYYV+usupslEFlhulhlhu.GAaphlFtu.....hohIpAshhhlHuYFNIWhcApsGW ....................................h..uhlpVsLRVPslhllDhlap.....h..u......................................................................................................h.hhhlth.hp.h.hGhhl...........SsllLh..Lsp..pp.......L.hphYhah.hshLLhhsuh.lsh........h..............................shal-.hu.h.phsshh................hu.lllph..............hhtht....l.lhShahh..llhclh......lPl.s...............llhhsphshhhssh.llhhhhp.hhh...shphsh.hh.....hphhcl..YGL.sLh.shWpplhhPslhhV..FWLshhshQh.hs........hhsspspsh..........shpphh.lhhs.........lhpsCsosholLGhshslS.lAhhlhhhhhhaltuhct..sc..............................hGhstuVhhhILAlQTGL.tLps.cRhhhLSh.hhlllsulLp.hhphsDPllhuLuAS+sp.Sh.h+HhRsl.lshhLhlhPshhuYhlhpaathshWLhhlhu.sl.ssLpVlsoLhlYsLFMl-tap.p...hEphDDhlYYVpuThpllEFlhulslhu.Gs.phlFtp.....hohhtuhhhhlHuYaNl...aLpAp.GW............... 0 31 40 78 +13548 PF13706 PepSY_TM_3 PepSY-associated TM helix Coggill P pcc Jackhmmer:D2BX46 Domain This family represents a conserved TM helix found in bacteria and archaea. 29.10 29.10 29.10 29.10 29.00 29.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.77 0.72 -4.54 99 885 2012-10-01 23:59:14 2011-01-19 14:09:45 1 19 462 0 319 1056 191 36.90 30 8.04 CHANGED pphhhhlHpWhGlhhuhllhlhFhoGslhha.....ps..p..lsp ......p.hhhlHpWhGLlhuhlLhlhhloGslhha.....cp-ls.......... 0 65 174 255 +13549 PF13707 RloB RloB-like protein Bateman A agb Jackhmmer:C7PC62 Domain This family includes the RloB protein that is found within a bacterial restriction modification operon. This family includes the AbiLii protein that is found as part of a plasmid encoded phage abortive infection mechanism [1]. Deletion within abiLii abolished the phage resistance. The family includes some proteins annotated as CRISPR Csm2 proteins. 27.00 27.00 27.00 27.80 26.90 26.90 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.54 0.71 -4.63 120 422 2011-01-19 14:48:00 2011-01-19 14:48:00 1 2 358 0 96 390 16 178.60 20 87.08 CHANGED hlIlsEG.p...TE.pYFctl..................t.hthpphplplhstt...............................ssstpllcpstchtp........................................................................tpphDplasVhDt.D.........t.ptpphp.cshpt........scpp............thplhhSNs.sFElWlLLHapths............thsppthtt...ph..pth.........ssYpKsp.................hthhhpphts...AlppAcpltppt.tppt....................ss.sTslh..pLlctlt ...................................hhIhsEG..p...TE..tYFptl..................p.hhhsp..hpl..phhstt........................................psstpllctshphhp....................................................................................................tph.cpl..ahVhDt.D..................ptpphp...p...shph...........scpp....................phphhhSN...sFEhWhLLHFpthst.............................t..ppphhtph.....hpthh.....................tpYpKsp...............thhp.hhpphtp..........AlppApphhtp...pp.t.....................ss.hopl....pllc.l.h............................................................ 0 48 80 92 +13550 PF13708 Methyltransf_27 Methyltransferase domain Coggill P pcc Jackhmmer:Q3IV21 Domain This family contains methyltransferase domains. 25.00 25.00 25.10 25.60 24.20 24.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -10.97 0.71 -4.77 34 854 2012-10-10 17:06:42 2011-01-19 15:25:22 1 11 411 0 44 544 11 171.00 53 64.67 CHANGED thp+slDpphWccLhpcoGhhshMsupt+cpWpcplp..ts...........shPshotcNIhuTFppLhts+pchFp+GllslF+pL..........ShcaKTN..pshtFG+KlIl..ssllp.....hp.ph...uhs.......hshu...pps.pLsDLt+hhtllcG...KPhs-..pRpshs.tthtpthptss..t.............................................t.pshEsphFpl+hFpp.GosHlpF..p+s-Ll-+lNpllAcaYP.ssL ........................................................................s.ITRslDRcIW+cLMpcoG.MholMsupsRDpWh+sLE...D....................shPEISEsNILSTFcQLHpNKs-VFERGVINVF+uL...............................SWs.....YK...TN......sPC......+F...G.....p.KI.....Il..NNLVc......Ws.+W.....Ghp.......L.sG....ptD.pLsDLERMLaLh....sG...KPlPD...NRpsIs...lc.Ls-alpssp....s..........................................................p..ppaEDEh...FpIRYFpK.GouHITF..++.-..Ll-+lNDIIA+aaPshL................... 0 7 15 25 +13551 PF13709 DUF4159 Domain of unknown function (DUF4159) Coggill P pcc Jackhmmer:C6VZA7 Family Members of this family are hypothetical proteins. TM prediction shows them to have two transmembrane regions, with a cytosolic region of about 25 amino acids between the two, and an N-terminus outside the membrane. 25.00 25.00 26.80 26.30 24.00 22.80 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.25 0.70 -11.58 0.70 -5.21 124 420 2011-01-19 15:53:05 2011-01-19 15:53:05 1 9 358 0 171 402 276 209.10 30 33.09 CHANGED hplAhl...ho...............G-hpsD...........suLpsLsphl...........sppTslcss..t..hsVclss.s-Lh..haPhlYhsspssh.h........ospt..hspLcpYlpsGGhlhhDsp............t.tsshstsh........pc.hphlhs...ssLp.lPs-Hslh+sF...Yhl..........hsutshhh-ttst...................h.hthhsDG....hhsllhsss.DhusuWshspps.pshhshsss..............................AhRhGlNllhYsL ...............................................plAhl..hs..................GchssD..........psuLpsLsphL...........tp+Tulcsu....p...hsV-hsp...D-Lt...haPhlYhshssss..h..........SspthsplcsYh.pp.GGhllhDsp........c.......t.tushstsh..............pchhphlhs....hssLcslPs-HsLh+sF...alh........-.hP....uphputshWsEstst........................sh.hth.sDG....hssllloss.Dhus.AWuhctpssshhsssss.....p+.........................hAhRhGVNIVhYsL.......................................................................................................................... 0 72 126 147 +13552 PF13710 ACT_5 ACT domain Coggill P pcc Jackhmmer:D2BYU0 Domain ACT domains bind to amino acids and regulate associated enzyme domains. These ACT domains are found at the C-terminus of the RelA protein. 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.88 0.72 -4.16 79 4946 2012-10-02 00:29:19 2011-01-19 17:24:13 1 10 3420 7 1010 2625 1922 62.70 40 43.40 CHANGED ppstsLpRllpllR+RGFplsshshpt...tssst....hplplsVpu.pRslch...LspQLsKLhDVhpl .........NcsGsLsRVsGLFu+RGaNI-S.lsVus..oc-ssl.................SRls..l..s..s..s..u.p.-.p.s.l.EQ......lh+QLpKLlDVl+V................. 0 289 629 835 +13553 PF13711 DUF4160 Domain of unknown function (DUF4160) Coggill P pcc Jackhmmer:C6W4D5 Family \N 23.00 23.00 23.60 24.30 22.70 22.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.29 0.72 -4.12 168 574 2011-01-19 17:56:36 2011-01-19 17:56:36 1 4 384 0 179 516 65 63.30 28 72.08 CHANGED Glhlhhah...ppHpPPHlHsphuc....tpAhhslps......hph.hpG..phsp+.pl+hlhpalphapccLh..ppWp ...................Ghhlhhah...p.p.H.pPPHlHsp.hus......hcuhhtlps.........hph...hcG...h..ss+..ph+hlhtaht...+pcpLh..ttWp.............. 0 63 119 156 +13554 PF13712 Glyco_tranf_2_5 Glycosyltransferase like family Coggill P pcc Jackhmmer:C6VYA4 Family Members of this family of prokaryotic proteins include putative glucosyltransferases, which are involved in bacterial capsule biosynthesis. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.48 0.70 -5.10 121 312 2012-10-03 05:28:31 2011-01-20 09:28:06 1 21 211 4 52 1116 294 202.30 38 68.24 CHANGED ILhVsClNs-cl...acQ.Cp+.pIcsL..hVPP..GYlVQlhPIRsA.cSMsSAYNcAlSaPAKYK....VYIHQDsallN...cshhhsLlsLFp-s..E+LGlIGluGAQalPsNGlWWE..GKslV..GK.VIpYpp..psYphhp.hpphh.Y....t.spsFhsVpAIDGLlMATQYDIP........WREDLFpGFHFYDVSQSLEFp+.A.GY...hs...sQ...t...s...h...W...CIHYsu.Dp.h-.s.shhc.ph+p.FVE.+Y ...................................................................h.hlhs.pppth...htp..h.....l.ph....hs.s..sh.....hlph.hs.lR.s.........A..pSMsSuY.Np...Alsp..A...+aK......VYlHQDsallN......pshh...hsLlplF.p.c.p.....-cL.G.....h..I............G..........h.....s.....Gu.p..h...l.P...s..s.....G....l.....Wh.-........up.s...hs........GK.........Vlt..Y....t.p.....h..........a...h..t.....p..phh............tsp...sahsV.psI..D...G.LlMA.TQ.YDls................W.REDLF...pG..F.H.....F.Y..D..VSQShEFp+.A.GY...pl......sp....t.s....h..W.C.I.H......a......st.....-t.......t......s...s........a..c..ht+h.FlccY........................................ 0 10 37 46 +13555 PF13713 BRX_N Transcription factor BRX N-terminal domain Coggill P pcc Jackhmmer:Q17TI5 Domain The BREVIS RADIX (BRX) domain was characterised as being a transcription factor in plants regulating the extent of cell proliferation and elongation in the growth zone of the root [1,2]. BRX is rate limiting for auxin-responsive gene-expression by mediating cross-talk with the brassino-steroid pathway. BRX has a ubiquitous, although quantitatively variable role in modulating the growth rate in both the root and the shoot [3]. This family features a short region, also alpha-helical, N-terminal to the repeated alpha-helices of family BRX, Pfam:PF08381 [1]. BRX is expressed in the vasculature and is rate-limiting for transcriptional auxin action [4]. 24.50 24.50 24.50 26.70 24.40 23.50 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.55 0.72 -7.72 0.72 -4.50 53 188 2011-01-20 14:44:40 2011-01-20 14:44:40 1 21 25 0 128 193 0 38.00 41 4.56 CHANGED s-Euu.+scuAK.-sIKSLTuQLK-MAc+l...s.uuhc....pscsss .........-EuuKs+AAK.ElIKSLTuQLK-MAc+l...s.Guhc...p.t...t.......... 0 16 77 103 +13556 PF13714 PEP_mutase Phosphoenolpyruvate phosphomutase Bateman A agb Jackhmmer:A1B6C5 Domain This domain includes the enzyme Phosphoenolpyruvate phosphomutase (EC:5.4.2.9). This protein Swiss:O86937 has been characterised as catalysing the formation of a carbon-phosphorus bond by converting phosphoenolpyruvate (PEP) to phosphonopyruvate (P-Pyr) [1]. This enzyme has a TIM barrel fold. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 238 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.66 0.70 -5.15 124 2953 2012-10-10 15:06:27 2011-01-20 17:46:56 1 17 1973 94 941 4275 3390 240.70 35 79.80 CHANGED F+p.LH...pts.p....sl........................................lLsNsWDusSAclltps........G...........apA.luToShulAtuh.GhsDu.........ptlshsph...l.stlppIsps.s.s................lPl..osDhEsGY.....upp....spplscs...lcclhpsGssGlslEDp.........t.......tttlh.sh...........pptst+lpAs+puststs...hhlsARsDsalht............tsstls.......-slcRupAYtc.AGADslFlsGh.....pc..sp.l.tplspth.s.......hP..lNlhs.h..st.t....hshscLtplG.VpRlShGsthhcsA....hssh.tpsspplh ....................................................................................................hRt.Lt...pp....p....sl.lsusasuh..sAhlspps.........G.....................apA..la.h.SG.uu.l.A.....A.....S.....h......G.....l......P..Dl.........G.lsohs-l...l.ps...s..ccI..scs..s...s................................lP.l..lV.Dh.....D..sG.a...Gs...........s.h.N.luRT...V+p.h.h.c.A....G.sA.u.lpIEDQh......................K+CG.....+...ssKtll..st..........................-..E.hl.s.+I.+AAh-A+.......scss............alIhARTDAhh.................................scGl-s..AIcR.Ap...AY.......s....-.AGAD.hlF.s-uh............psh..pp..h...cphscsl.p..........sP...lh..ssh..p.....ut.o...............hosc-LtphG.lshllashus.hRAh.pAhppshpt..h...................................................................................................................................... 0 228 531 766 +13557 PF13715 DUF4480 Cna_B_2; Domain of unknown function (DUF4480) Coggill P, Eberhardt R pcc Jackhmmer:A6L3W4 Repeat This domain family is found in bacteria, archaea and eukaryotes, and is approximately 90 amino acids in length. The family is found in association with Pfam:PF07715 and Pfam:PF00593. There is a single completely conserved residue G that may be functionally important. 32.20 32.20 32.20 32.20 32.10 32.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.88 0.72 -3.93 100 11370 2012-10-02 19:08:27 2011-01-21 13:39:15 1 110 309 0 3174 13036 3876 84.30 31 8.98 CHANGED plpGtVh...sp...p.s...ps...ls....usslhhp...ss.p......ps......sh.Tst......sGpFplp....sp.....t.ss...pLhh..sh..hGaps...hp.h.lp....tp.t...p........l.s.lh...L.pp.ssp...p..Lc.E..Vll ..............................lpGpVh....Dp....p...s.......p.P...l.....GAo..Vhlc......Go.s..................pG........sl...T..Dh...................cG..pFslp....ls.......s...s.s......t..Lh..h...Sa..l..G..Yps.....pp...l.p...l.s.......t...........s.....................l..s.lt...L...pp...c.s.p.....t...Lc.E.VVV.............................................. 0 1390 2842 3163 +13558 PF13716 CRAL_TRIO_2 Divergent CRAL/TRIO domain Bateman A agb Jackhmmer:P32525 Domain This family includes divergent members of the CRAL-TRIO domain family. This family includes ECM25 that contains a divergent CRAL-TRIO domain identified by Gallego and colleagues [1]. 21.80 21.80 21.80 21.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -11.05 0.71 -4.30 101 1520 2012-10-02 01:12:42 2011-01-21 18:21:51 1 65 240 9 821 2870 21 140.20 22 13.59 CHANGED hth.h.tup.sppGpsllhhs.uphh..............................................shcpllhYlhpph.pp.....sps..asllh.pssh.................................sp.shshlpph..............................................hphlstt..........................htpp.lptlal.l+ss.....hhh+phht.............................shhphh.stth.ttc...............................................................lhh.................lsslspL.....hphlc....hspL..pls...........tshpa-pp .........................................................................................................hh......tG..sp..u.ps.lls.as..sp.h......................sttp...............shcpl..h..h.....Yl..h...p....h...h.c.................h.s.tps.........ahll.h..pp.th.................................s..p........h...s....h...l+ph.....................................................................................h..p.hlspp...............................hhpN..L+........s.lhl..l+Ps...............hah+ph..h...............................hhhp.hh...ss..ch..thK.....................................................................................l.h..h.................................l...s.....o...l...p...c..L.....tphls.....hpQl..cls......tshpast....................................................................................................................................................................................................................................................... 0 213 322 549 +13559 PF13717 zinc_ribbon_4 zinc-ribbon domain Coggill P pcc Jackhmmer:Q5HCD9 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR, Pfam:PF12773. 27.20 27.20 27.20 27.20 27.10 27.10 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -7.89 0.72 -4.26 74 110 2012-10-03 10:42:43 2011-01-24 12:58:18 1 21 79 0 72 614 889 35.80 31 8.64 CHANGED MplpCspCpspapls-pp.ls.spGt.pl+CspCpphah ......MplpCspCpspapls-cp.ls.sput..plcCspCtpha..... 0 40 57 69 +13560 PF13718 GNAT_acetyltr_2 GNAT acetyltransferase 2 Coggill P, Eberhardt R re3 Jackhmmer:P76526 Family This domain has N-acetyltransferase activity [1,2]. It has a GCN5-related N-acetyltransferase (GNAT) fold [2]. 24.60 24.60 24.60 26.20 24.50 23.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.27 0.71 -4.99 88 1293 2012-10-02 22:59:21 2011-01-24 13:23:31 1 20 1187 2 418 1028 31 180.60 42 24.00 CHANGED chhuLhVuuHY+sSPNDLphlhDAPupplalLh..........................................s.ss............cllssl.lshEGpls.pc...hs.ppshpttpRspGcLlPhslupphtcppFupLsGhRlVRIAscPshpp.........hGhGochlph..lppa..hp..............................................................p..plDalGsSFGhTspLh+FWp+.sGFtsValppstsphoGE+oslhl+s.....Ls ...............................latLLsuAHY+TSP.DLpthhDAPup+hhhht..........................................sts.................clhGsl.......hl.s.cEG.s...LS....pp...ls.pslh.u.G....hR.............RP+GsLlspoLutphsss.tAA.sLpGtRlsRIAVHPshQc.........pGhGpplltthhphh..........................................................................................................................................................................p..plDYLuVSFGh..Ts-Lh+FWp+.sGFl.V+husp+.-suSGpYoshhLhPl.s........................... 0 131 221 337 +13561 PF13719 zinc_ribbon_5 zinc-ribbon domain Coggill P pcc Jackhmmer:Q5FCF8 Domain This family consists of a single zinc ribbon domain, ie half of a pair as in family DZR, Pfam:PF12773. 27.60 27.60 27.60 27.70 27.50 27.50 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -7.94 0.72 -4.37 79 534 2012-10-03 10:42:43 2011-01-24 14:21:25 1 21 487 0 215 626 893 36.50 35 10.00 CHANGED MplpCspCpspapls-pp.ls.sput.pl+CspCppsahh ........hpCPpCpTpaclsssp.ls.hpst..tVRCupCpplFp...... 0 55 128 174 +13562 PF13720 Acetyltransf_11 Udp N-acetylglucosamine O-acyltransferase; Domain 2 Coggill P pcc Jackhmmer:C5B7S0 Domain This is domain 2, or the C-terminal domain, of Udp N-acetylglucosamine O-acyltransferase. This enzyme is a zinc-dependent enzyme that catalyses the deacetylation of UDP-3-O-((R)-3-hydroxymyristoyl)-N-acetylglucosamine to form UDP-3-O-(R-hydroxymyristoyl)glucosamine and acetate. 24.50 24.50 24.70 24.60 24.40 24.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.68 0.72 -3.76 349 2605 2011-01-24 16:19:52 2011-01-24 16:19:52 1 15 2381 24 620 1760 1696 83.10 37 31.62 CHANGED DVPPashssG.s..Ap..htGlNhlGL.+R+GFop..-..plp.tl+pAY+lla+.sG.h.....slp-Al.ppl.pp....hsp.......sscl.ppll...cFl...t..sop.......RGlh+ ...DVPPYslAp....G....N...cA..p...hGlNl.GL.+..R.RG...Foc......-....plpsl+pAY+hlY+....sG..h......slc-.sh..t...cl.tc....spp...........pspV..pthh....-Fl.....p.....pSp.....RGllR.................................................... 0 196 396 518 +13563 PF13721 SecD-TM1 SecD export protein N-terminal TM region Coggill P pcc Jackhmmer:D2BRP0 Family This domain appears to be the fist transmembrane region of the SecD export protein. SecD is directly involved in protein secretion and important for the release of proteins that have been translocated across the cytoplasmic membrane. 25.00 25.00 25.00 27.50 24.80 24.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.13 0.72 -3.75 100 2015 2011-01-24 16:50:55 2011-01-24 16:50:55 1 5 1305 0 300 1070 399 98.20 35 20.67 CHANGED lN+YPLWKYLl..llhllhlGhlYALPNLYGEDPAVQIou.p.p.u.s..p..s..s..s..t..s...pV.pssLcpssIsh..c..ult.l.-s....s..s..lLl.Rhsss-sQLpA+-h....lppsL.....G.c.....sYlVAL ....................W..h..h..l.l.h.sl.llulLYA..l.PNl..aG--PAlQIousp.p.G.s..s...h..s..-..t...s......pVpctLpppsIsh..K....Sls..h..cs...........s..slLl.RF..c..so..DpQ.lpA+-sLppsL.....s.c.....pYlVAL........... 0 49 131 219 +13564 PF13722 DUF4161 C-terminal domain on CstA (DUF4161) Coggill P pcc Jackhmmer:C5BCA0 Domain This domain is found at the C=terminal of most known CstA domain-containing proteins. The function is not known. 23.00 23.00 23.20 23.20 22.00 22.20 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.76 0.71 -3.83 141 3040 2011-01-24 17:03:47 2011-01-24 17:03:47 1 5 2329 0 528 2010 121 127.00 41 20.10 CHANGED Ghs...huhhapFAlhatAlFlLTolDuuTRluRahlQ-hhu.......h......htppphh.ssthluoslslshhGhllhtG.............shsslWPLFGhuNQlLAulALhlsosh.Lh+ht+t..p.....asa.lshlPhsahhlsThsA ...............hh....huFWYHFAILFEALFILTulDAGTRuuRFMlQDlLG........sh.............ht..c.s.csl....suslluTshsVs.hWGalLapGshD...........PhGGlsoLWPLFGluNQhLAulALhlsoVlLhKhp+p....p................ahW..VsllP.ssalllsThsA..................... 0 173 322 436 +13565 PF13723 Ketoacyl-synt_2 Beta-ketoacyl synthase, N-terminal domain Coggill P pcc Jackhmmer:D2C0A5 Domain \N 25.00 25.00 25.90 25.10 24.80 23.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.44 0.70 -5.08 53 581 2012-10-02 12:25:54 2011-01-25 13:01:22 1 2 555 0 160 516 37 202.70 26 79.93 CHANGED Wp.p.Wup.ss.tth..ss......s.sstPtlstlPsMpRRRhSpho+lulcsuhplh......pt..pss..hlVFuSRHGElpRohsLLpslhsppslSPTuFuhSVHNsuuGhaoIhsppshssTSlAAGp-ohppullEAhuhLp.p.u.sppVLlVshDpPlPphYpsa.s........ppts.hsaAluLlLssGss.hphs..................hpss..ssss..pps.......shs.puL..phlctllssp.....s.ph.shsu.ppppWpWp ................................................................................hsth.s.lP.sh.p.pRRhop.hs+l.ul.p.s.u...h.t.hh.........................pp........t..p.s........s.......llas.Sp.aG-lp+shpllp.s..L..h..s..p..p.....s..l..S..PTsFu.SVHNussG.hoIhtpshsssouluA.upsoappuLhEAhshLpp..u...tppV.Lll.shDp....h......Pph..Yt.............p.s..hsaAlulll.p.s.usp....hp.hp.......................................ht.t.........tt.................tsh....hht.........................h.h................................................................................................. 0 34 79 119 +13566 PF13724 DNA_binding_2 DNA-binding domain Coggill P, Eberhardt R re3 Pfam-B_65234 (release 24.0) Domain This domain, often found on ovate proteins, binds to single-stranded and double-stranded DNA. Binding to DNA is not sequence-specific [1]. 25.00 25.00 25.70 44.70 20.50 19.40 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.76 0.72 -3.82 15 45 2011-01-25 13:21:28 2011-01-25 13:21:28 1 1 14 0 35 46 0 56.70 41 16.04 CHANGED MG.paRFRLSDMMPNuWFYKL+DMpKsRs+sstssstt...................ssouphppsSss ...MG..pa+FRLSDMhPNAWFYKL+DMp......+sRtpsssss.pt...................ssosppppsS...................................................................... 0 4 19 27 +13567 PF13725 tRNA_bind_2 Possible tRNA binding domain Coggill P, Eberhardt R re3 Jackhmmer:P76526 Family This domain, found at the C-terminus of tRNA(Met) cytidine acetyltransferase, may be involved in tRNA-binding [1]. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.23 0.72 -3.91 114 1005 2011-01-25 13:25:22 2011-01-25 13:25:22 1 12 956 2 333 715 7 106.60 31 13.55 CHANGED Fc+Rh.....h.tLhu.hphpp..hs..........................h...ts..............................phsthlo.......................................shDh+RL....csYu.pshh.sacsl...lsh...lscLhhtthht..............hplssh....ppslLluhsLQp+sh-plsp-LsLsup...pt..l ................................................RRch..tLhp.as.hcs.ls.................................hts.....................................h.-ssLo.......................................................spDhp..c..L...suaA.hutt.shhss.....lss...LhRLl.ps.............................tl..........thshLhu+l.pptS.uplsppLp.LsGc..+..h........................... 1 106 180 269 +13568 PF13726 Na_H_antiport_2 Na+-H+ antiporter family Coggill P pcc Jackhmmer:C5B988 Family This family includes integral membrane proteins, some of which are NA+-H+ antiporters [1]. 23.60 23.60 23.60 23.60 23.50 23.50 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.95 0.72 -4.07 42 1096 2012-10-02 15:12:49 2011-01-25 13:28:17 1 4 1056 0 144 1175 19 88.20 47 20.18 CHANGED NAVllAVhlMLlLSLhR..lpVVlALhluAlVGGLlGGLulspT............lssFs.sGLGGGAplALSYAlLGAFAlAIo+SGLsclL.....Aptllphlu ........NsVlluVl.lMl.lLs.L..hR..lNVV.luLhluALVGGLlu....G.h....u....lsco........................lssFt.s.........GlssG.A.p.l.ALSYAlLGu.F.A.s.AI.S+SGlschLsppllphl.p............................ 0 34 76 118 +13569 PF13727 CoA_binding_3 CoA-binding domain Coggill P pcc Jackhmmer:C5BFR0 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.93 0.71 -4.44 56 4439 2012-10-10 17:06:42 2011-01-25 14:11:05 1 19 2789 2 1096 3621 674 172.50 16 32.80 CHANGED thhslYps.hph+shhpths.clhtuWh.ls..h.h.h.h.hs..lha..t..h..p......s..h...hSR..lalshW.h.hsuh...sh.llhtRhll...tthl...pp.ht+....p...s....thp....htsl....ssss....uppht...ptl....p..pp.ts.u.h..h....h.l.GlaDDc.......s.ssp..s..h.s........uh..PhlGslspllchsRpsclcplalALPlssEpcIhcllpchtspsVsIRlhP ....................................................................................................h....hYp......h...h.th.h.p.p..hh..p....lh.t.shh..hs...h..l..h..h......hh....lsh........h.....h.....p............p.h.......hs..R.......hh..h..h...h.a........h....lhsh......hh...lh...h..hRh...hh.......+h.h.h.....................pp....hhp....................p............p................pts............hh..ls........usss....u...p...t.l..h....pt.l...........p...p.p.s.p....h...s...h..c......ll..uh....h..D-c...................t..s.tt..sp.....h...t...........................sh...s..l...h.G...s...h..p....p.l...............p....h..s.c.p........t..........p.....l...c.p........lhl...A....l.P.....s..t..t.ph.pc.llp.h.pp..h.s.sp.hhhhP................................................................... 0 367 716 919 +13570 PF13728 TraF F plasmid transfer operon protein Coggill P pcc Jackhmmer:Q9WTC0 Family TraF protein undergoes proteolytic processing associated with export. The 19 amino acids at the amino terminus of the polypeptides appear to constitute a typical membrane leader peptide - not included in this family, while the remainder of the molecule is predicted to be primarily hydrophilic in character [1]. F plasmid TraF and TraH are required for F pilus assembly and F plasmid transfer, and they are both localised to the outer membrane in the presence of the complete F transfer region, especially TraV, the putative anchor [2]. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.16 0.70 -4.85 53 761 2012-10-03 14:45:55 2011-01-25 16:40:05 1 4 447 0 91 751 65 167.80 27 71.99 CHANGED GWhWYs-s....p.pc.t........p..tsss...........ss....s....t.ts.................s....t............pph..ph..h+pthpchhspAlhpPo...ENltpahplQchhhc+uspFupsappslhppPpLDYslc.pP...h..sssutpshhp.tcp.pppp.pslppLu.ppaGLhaFYcu.s..ss.hspthusllpsFucpa.GhsllsVShDGshhs..thPpsch...DsGptp.plsl...p..hhPALhLVsPpo........tphtPluaGhhSp--LhcRlh ..............................................................................................................................................................................................................................................................tt.tpp.hltphs..pca..u..lh..F..Fh..pu.p..Cs.hCc......phsPll....pphu...p.p...Y....G...h.....s.Vhsl...o...l..D..G..tsss...............thP.phhs..............spu......hp....p.h..s.l.........h..hhPshhLlssps..........hph..hPls.Ghhs.spl.tp.................................................... 0 24 44 71 +13571 PF13729 TraF_2 F plasmid transfer operon, TraF, protein Coggill P pcc Jackhmmer:D2BTB8 Domain \N 25.00 25.00 26.00 25.00 23.30 24.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.40 0.70 -5.59 29 545 2012-10-03 17:14:37 2011-01-25 17:05:16 1 3 385 0 74 341 4 257.80 36 66.69 CHANGED AshhNPALluttp.p.Dc....hu.lllP..ulGspls..D.DplhDc.hDslpDshDthpsshss.p................tssscLsssLpsLcsspAhussGsuhu.....lulPsphlshuhhsKuYssuhstusl.spsDlshLpsht..................sss.ssLsSputshuusls-lGlAlA+ph.s..hss.hslolGlTPKhQ+lpsYNYssolpsaD..ssD.acs......scapsscouFNlDhGhshphscsaplGlsupNLlupc..l-Tpphp.............................uhp.TYplcPhhTs.GsAaps..chhTlusDlD ................................................................................................................ushhNPA...L.lAhhc.p..Ds......hu.llLP..ulG..hphs.....D..csl.sp.lD.clpD.....p.h-..acpss.s.t..............................tssspLscpLpt....hpspp.hpupsGsulA......sulP.spsl......uh......sh.......hsKuYupshVsupl...sss.shphlcph.................................s..ppthsS.sss....upus.hlo-hGlulAKph.s........huu.pplSlGl.............T.............PKlQ.+lhhY......sYss..o..l....p..sYD....tsD..acs.............sch..s-suFNhDhGss..h...ls....-..paplGlsupNLluR-..I-TKslt.............................shppTYpl+PpsTs..Gsuaps..DhhTsusDhD................................................ 0 11 30 58 +13572 PF13730 HTH_36 Helix-turn-helix domain Coggill P pcc Jackhmmer:C2JR21 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.65 0.72 -3.84 198 1833 2012-10-04 14:01:12 2011-01-25 17:46:11 1 15 1152 0 237 1777 177 53.80 25 20.10 CHANGED pp.Lshp..sphlhhhlhshs..........ttt..sas..o..............pppl.......uphh.......shu.cp.....olpctlppLpctGa.....l ...........................................p.t.tthlhht.Lsshu...............sppsh..saP....o..............hppl.....Acph...........thu.cp..........TlppslppLcctGhl......... 0 81 149 191 +13573 PF13731 WxL WxL domain surface cell wall-binding Coggill P pcc Jackhmmer:C2JPG8 Family The WxL motif appears in two or three copies in these bacterial proteins [1] and confers a cell surface localisation function. It seems likely that this region is the cell wall-binding domain of gram-positive bacteria, and may interact with the peptidoglycan [2]. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.73 0.70 -4.52 83 1929 2011-01-26 11:14:11 2011-01-26 11:14:11 1 25 266 0 120 1070 0 205.50 24 49.88 CHANGED ssAtss....so......sso.sspVpFpss..............sVsPsss.s..s.....s...ss.lsP...DP.....sts........uo..sus......Lslsas.s.s.hsFGppp.Ios...ss...pshhup.........................p.h...t...t.st............ptss......hlpVsDhRG..Tpp.....GWpLospts.p...h..psus.....tp.........pLsGu.p..................lshsssphts.ss.ss...ss...................sPsshss.s......hsls......Gs..........susslhsAssupGt.GsWhhpass...........................p......slsLslPupshK......AssYTuslTWsLssuP ............................................................h...............hto....l.h..s................P.ss.s.............shps...s.......sts..........ss..sGsLslsh.s..s.s...hsFGp.p....Ios...ps...psahsp.....................................................s....p.h....t.sss..............ptss...alplsD.....t.R.G..Tps................GWp.Losp.s..p...Fpsss...........tp.........pLs...Gu..p..................lpht..ss..ss..ss..s...sp...ss..........................sPss.pt....s.........hsls....s..ss......................ss.sslh..s...A......s......p......s.........pGt....Goa..h.hphsp.....................................p......slpL.p...V...Pusssp......utpYpsslTWsLsssP............................................................. 0 60 97 102 +13574 PF13732 DUF4162 Domain of unknown function (DUF4162) Coggill P pcc Jackhmmer:C2JJQ9 Family This domain is found at the C-terminus of bacterial ABC transporter proteins. The function is not known. 27.40 27.40 27.40 27.40 27.30 27.30 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.69 0.72 -3.47 129 1360 2011-01-26 13:50:07 2011-01-26 13:50:07 1 3 1176 0 282 884 167 84.40 27 28.07 CHANGED Gslp-lKcpaGc...pp..lhlc....sc...tshp.t.....Lppl.......sGltph......p.p....p.p...ss...h..p..lp.lp.s.p..p..su.pp.lh.ptlspp..u.h.lppFp.ptPSLs-IFlcpV ....................................................Gslp-l+pp..aGp...pc...lhlp........oc....tshpc.........Lp.sl....................sl.pps.........p.h......p...p....pG.....h..p...lp..lc...s..-..s.......su...pc.lhphlspp.uh.lppFp.ptPoLp-IFhpt...... 0 107 202 246 +13575 PF13733 Glyco_transf_7N N-terminal region of glycosyl transferase group 7 Coggill P pcc manual Domain This is the N-terminal half of a family of galactosyltransferases from a wide range of Metazoa with three related galactosyltransferases activities, all three of which are possessed by one sequence in some cases. EC:2.4.1.90, N-acetyllactosamine synthase; EC:2.4.1.38, Beta-N-acetylglucosaminyl-glycopeptide beta-1,4- galactosyltransferase; and EC:2.4.1.22 Lactose synthase. Note that N-acetyllactosamine synthase is a component of Lactose synthase along with alpha-lactalbumin, in the absence of alpha-lactalbumin EC:2.4.1.90 is the catalysed reaction. 21.30 21.30 23.20 22.20 20.80 21.20 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.83 0.71 -4.80 4 775 2012-10-03 05:28:31 2011-01-26 16:12:21 1 14 122 59 468 672 185 120.50 40 36.04 CHANGED CPphsPlLVGshpV.Fp.VPSLs-IVcpss+l.PGGRa+PsuC.sRs.....RsAIIlPaRuRccHLRlLLY+LH.FLpRQQLsYGIaVIcQtGNGsFNRAKLLNVGhhEALp...aDChhLHDVDLLPEND+NLYsCs.p ................................................................................................h.........t........l...GG.pa.p.P....pCh..stp...........+lAl.llPa.R......s............R....p..c.HLhhhl.aLH.PhL......pR....Q....p....l....casIYVlpQs.s.s..t...h.FNRAtLhNV...Ga..h...EA.h+..c........s.....a..D..C...hlFHDVDLlP.s.D+.NhYtC........................ 1 160 192 326 +13576 PF13734 Inhibitor_I69 Spi protease inhibitor Coggill P pcc manual Family This family includes the inhibitor Spi and the pro-peptides of streptopain (SpeB). SpeB is produced as a 43 kDa pre-pro-protein, which is secreted via the recently described Sec secretory pathway Exportal. There is tight coupling between this inhibitor and its associated protease: the gene for the inhibitor Spi is located directly downstream from the gene for the streptococcal cysteine protease SpeB, and the sequence of the inhibitor is very similar to that of the SpeB propeptide. This is an example of an inhibitor molecule that is a structural homologue of the cognate propeptide, and is genetically linked to the protease gene [1]. 21.90 21.90 22.10 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.30 0.72 -3.88 3 136 2011-01-26 16:26:11 2011-01-26 16:26:11 1 5 74 14 8 126 3 109.20 27 17.66 CHANGED M.........................................E.pF.RopsEAhtlApoFhupssp.................oKspLRlppLS..hPsDT......LaIlAL.s.GGFlLVSGDTRh.slLuho.csNLDhspssV.shlsVFtcQl....................................NFu- .................................................t..........pAhphA.p.ah...tps.t..t......................pp.hp...h..psp.thu.........ss..ss..............shYlaNh...sss.GFVIVSGDcRss.pILGYSppGsh..D..h.s....p..t...N..ltsh..hpta.p........hth......................................... 0 2 6 8 +13577 PF13735 tRNA_NucTran2_2 tRNA nucleotidyltransferase domain 2 putative Coggill P pcc Jackhmmer:C2JK63 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.52 0.71 -4.39 46 1530 2012-10-01 20:28:14 2011-01-26 17:21:14 1 13 1500 6 255 1460 430 149.20 25 36.36 CHANGED hphpspppuWuhLhhtls.h...ppspsFL+sWKhSNchI+pVppllphlphhp.......ppphsth....pl.....YphGpp...hhhhs..ppl....tph...hs.hs............hsh........pplppha..psLPI+.......s++-LslsGtDLlpthshpPGPhlGclLpplEptllpGclsN-cculhpas .................................................................................................................hhh...t..........pp...spthL+.phKhS.N.p.h.h+p...l...ppllp..hhphh............................ppphp.ph.........l...................Yc.h.s.hc...........hhhps....pl...........tps....ht.ts...........................................ssh.....................ptlpchh.....pp..L.....s.....l.+.....................s.p.+..-....l.....s.....lsGscLlpt.h.uh.p....s.GPh.lG-lLpplEtullp.Gpl...p.Npcctlhpa....................... 0 109 180 216 +13579 PF13737 DDE_Tnp_1_5 Transposase DDE domain Coggill P pcc Jackhmmer:Q6D6U8 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 27.00 27.00 27.00 27.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.34 0.71 -3.91 55 810 2012-10-03 01:22:09 2011-01-26 17:53:35 1 3 314 0 228 802 109 97.70 47 42.18 CHANGED hpRGSLThWlDpch..tW.hs..s.p.G+RGRsptaSDsAIpss.LhlKslFsLPLRtspGFlpSLhcLssLshssPDaSolsR...Rt+slsVslsh+ssstu...lHLllD.STGlKhhGE ...............................hpRGulThWlDtcs..tW.tt...spp.spRGRsppaSDhAIpss.LhlKplFphsLRtspGFlsSlh.pL.hsl.sltsPDYos...lSR...RtKplslsh..............p..s.scu......hHLhlDuTGLKhhGE.................................. 0 16 73 111 +13580 PF13738 Pyr_redox_3 Pyridine nucleotide-disulphide oxidoreductase Coggill P pcc Jackhmmer:C2JNY9 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.64 0.71 -4.26 38 7189 2012-10-10 17:06:42 2011-01-27 11:41:36 1 104 2015 19 2924 28500 10134 196.30 22 41.32 CHANGED hVlGAGssGhuhs.stL....hcp..sh......s.s......llllD....pttp...sGup...at...p..h..h..........spl..hp.....P.uh.h........shs...h........shtth.shsst.s...th....h...tph....sous-.l..s.p...........Yhpplhcph....hl..s.......lp..htspls.tl...p....ts................s..st...ap..lps..ps..t..................plp...sctll.usG...hhtp..P..ph..s..hsu................s....t....ps.l...ths....pl..hsh..t...........c.....h..ts.....pphsVlGuG...+oAhssshtLhct.....u...p......c...lshlt.ppsshh ................................................................................................................................hllGu.G......uG.luhu...hp.L........ppt....Gh.......p................................hhllE.....................p..t....t...p.........h.G..us..........Wt.........p.....h.......................................................................................h.....................s...s.........................................................th....t.h.....s.h.t........................................t............pt.h......................ss...t....s....c....l.......h....p........................................................................................................Y..l...p....p.....h.......s...c..c.a............sl...p...............lp...h..s...s...c.....V....p....s.s......p.......hs................................................s..sp.......ap......lps.....pss.....................................................php.......uch.llh.AoG.....h.h.sp.....P...p..h...P.s....lsG.........................................t.....t........ph..h........H..s.s..............pa......pps....h..........................s.......h....p.u..............++V.s.VlG.uG..........s.SA.......h..p.hs.....p..lspt..............u......t..............p..lshh.Rp...................................................................................................................................................................................................................................... 0 721 1687 2444 +13581 PF13739 DUF4163 Domain of unknown function (DUF4163) Coggill P pcc Jackhmmer:A7HM60 Domain The structure of this domain is and alpha-beta-two layer sandwich, identified from a Fervidobacterium nodosum Rt17-B1 like protein. The function is not known except that it is found in association with Heat-shock cognate 70kd protein 44kd ATPase, Pfam:PF11738. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.40 0.72 -3.23 48 615 2011-01-27 14:33:03 2011-01-27 14:33:03 1 13 479 2 148 539 7 102.50 19 37.12 CHANGED slphpphp...hp..s..shhp..h.plphPhlps.hp.spphppplNphl..cppstph..............hpc.........h....c....ct.upc.....th.......cpshs........hs..Ythps.....saplphs....ssshLSlhh.shYpYo.GGAHGhTs ............................................................................s.........p..p...hhp..h.plphP.hp.....s...t..spp......hp......p...p....l..Nphh...pp.p.spph..........................hpp.........h....p....pptpc...........ht.............pps............hs...aphps.....saclphs....psslLSlhh.shYpYs.GGAHG..................... 0 60 110 125 +13582 PF13740 ACT_6 ACT domain Coggill P pcc Jackhmmer:Q6D7R4 Domain ACT domains bind to amino acids and regulate associated enzyme domains. 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.02 0.72 -4.26 82 3060 2012-10-02 00:29:19 2011-01-27 17:00:36 1 22 2494 6 650 2293 345 75.40 27 39.29 CHANGED phLlIoshGpD+PGlssplsplluptsssIlDsp.AhltsphoLhhLlpss.........ts..sh..splppsL.hhupchsl....tlhhp ...........hlITllGtD+sGIlsslophlupt.s.....sNI..hDh..p........s...h....l....s......s....t....F..o..h..h.hlls.ss............ts...sh....s..tlc..ssLtth.utplsl.l.h................................................ 0 198 402 547 +13583 PF13741 MRP-S25 Mitochondrial ribosomal protein S25 Coggill P pcc PfamB-B_2836 (release 25.0) Family This is the family of fungal 37S mitochondrial ribosomal S25 proteins. 25.00 25.00 25.10 25.90 24.50 24.90 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.54 0.70 -4.89 31 140 2012-10-03 14:45:55 2011-01-28 09:39:44 1 4 131 0 108 137 0 205.30 38 81.68 CHANGED M..KlphpAspVhppsos.lpu....GhlpppPsWaslVushPPsp..phsRp.hhpp..sppphtph...tp..................hhcsp.pth..+pppssplapss+lpa.EDpLRchFa+pHPWELuRP+lllEss....Gc-..tchDWS+.hpQhsKsLDGESVVQRsLaLlps.....pshohhcAYDhARhEFY+LRhpEEl-ppVAtEEAchaGAsFusopl-hGhphEpchl-sWcphApppoplhpuc ................................thphpAhp.Vhpps.t.hps.....Gh.h.....pptP....sWhsllsslPPsp..hhsRp...pp..stphhtp.......................................hps.p..t.....ppppspclapP.c.lpY...EDpLRp.Fa+-HPWELuRP+lllEss.......GpD..pphDWS...+.lp..Q.G+.LDGE.............SVVQRpLaLhps.................tshohtpAYDhARtEFYpLRhpE-lEp+VAtEEAchhGAhFG.s..tlphGhphEpp.h-pW+thAtpcsphhpt.t......................... 0 28 59 92 +13584 PF13742 tRNA_anti_2 OB-fold nucleic acid binding domain Coggill P pcc Jackhmmer:Q5FCF9 Domain This family contains OB-fold domains that bind to nucleic acids. 27.00 27.00 27.00 27.00 26.60 26.80 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.21 0.72 -4.14 80 4104 2012-10-03 20:18:03 2011-01-31 09:50:30 1 5 4053 0 822 3044 1031 97.40 34 22.09 CHANGED hhols-Lsshl+pslcs.s...sphaVpuElSshpt...ps.GH..hYhsLh-............sp..Aplpushapsphptlp.........hp...............................psGhcVllpuplsaat.hG.....hslhlpclcs ............hoVopLsphl+thl-p..c..h..spValpGElSNhpp...sS..GH..hYFoLKD............cp.......Aplp..sshF+..ss..sp+ls...........................Fps..........................ccG.pVlVpuclolYEspG...........sYQlhlcphp............. 0 260 536 693 +13585 PF13743 Thioredoxin_5 Thioredoxin Coggill P pcc Jackhmmer:Q5HAE0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -10.83 0.71 -4.71 36 777 2012-10-03 14:45:55 2011-01-31 10:34:12 1 1 761 6 119 791 73 177.00 31 71.20 CHANGED YlFlDPlsscCaphEstlpKLth-.....Ysphhpl+alhhspLpslsttttp......................hshssssh..hpp.hsssY.uuLAhKAApLQG++tGtpFLpcLQctlhlp+pslop.-llhphApps.GLDl-.FpcDhcSshApcuapsD.+lupEMsVppsPThVhFN..ps.c-pGlplpGhhsYcla.p .....................................................................................................................................YhFhDPhsssCaclcshlh+Lph-.....as..p...h..l...p...l..R....a..Ihs...s.....s.l..p..s.lsps.tp....................................ths.p.h.....p.pss..s...s..h..as..s..u.L....A..hK...AA.c.L...........Q.........G..+.c......cu.c.c...F.L.c.tl...Qptl.hlppps.hss.......ph...lhch....h......pss......G.....lDl..-....hF+c...Dh...p...o...st.h.p.c.uhp....p.Dh....+lA.pE.MpIpptPolVhFs...ps.c-pGlhlpGhhshchh........................... 0 33 73 99 +13586 PF13744 HTH_37 Helix-turn-helix domain Coggill P pcc Jackhmmer:Q5HBM9 Domain Members of this family contains a DNA-binding helix-turn-helix domain. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.09 0.72 -4.17 55 1158 2012-10-04 14:01:12 2011-01-31 10:40:40 1 9 818 4 307 2718 211 79.70 24 74.93 CHANGED lapDhuhscucphp.....hKupLhht....ItchlcpppLoQpcAAphhslspPclSplhcu+lsphSl-pLhshLstLGtcl-Islp ...................................................pttt...t..............h+s.p.lh..ht..........lp.phh.cp..pt....h...o..QspsA.phhGloQPpl.Sc.l.p...t...+...h....s....p...h...s...l...ssLhphl.s.t.h..Gt.clcl.h........................ 0 67 178 251 +13587 PF13745 HxxPF_rpt HxxPF-repeated domain Coggill P pcc Jackhmmer:Q6D9B1 Domain This family is found in non-ribosomal peptide synthetase proteins, and can occur up to twelve times. 24.10 24.10 24.10 24.10 24.00 24.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.81 0.72 -3.67 1348 6665 2011-01-31 11:02:57 2011-01-31 11:02:57 1 1095 1165 1 2334 7456 120 92.30 29 5.06 CHANGED sHQDlPFEpLV....-t.L..p.s.p....R.shu...+.s...PLFQVhhsh..p......s..s.....s.......t....s...t.........h.p.l.....sG.......L..p.lp...sh...s...h..s....t..s.....su+FDL....slpl.tE....ps......s.......s....lpsshpYso-LF-psTlpch ...............................tHQDlPFEpLV.......-t.L..p.....s..p..............R...sh...u...........+..s...PL.FQlhhsh.p..............s.t......s...............t.....s.t.............................h.p.l........su..............l...p..lp.....s.h........s.....h...s.......s..s......................su+F.....DL....slpltE......ps..........s...........................s..lpsshcYso-.LFctsTlpph............................ 0 609 1300 1915 +13588 PF13746 Fer4_18 4Fe-4S dicluster domain Coggill P pcc Jackhmmer:Q5FDF3 Domain This family includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. The structure of the domain is an alpha-antiparallel beta sandwich. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.46 0.72 -3.55 40 1675 2012-10-03 08:56:43 2011-01-31 11:20:17 1 15 1374 0 499 3018 1748 74.80 36 17.46 CHANGED lChahCPasR....................hQ.usMhDp...colslsY....................c.ht.hp........................scs..........................ct....ttpp....hcphp...........................pst....shGcClD..CstClpVCPsGIDIR .................................................................................hChahCPasR.............................hQ..ushhDp.......so..hh.ls..Y.................................................................................c..hc.tt........................tpt...........................p........hh+t...hcpht...............................................c.tt.....shG-ClsCstCVplCPsGIDIR...... 0 164 331 422 +13589 PF13747 DUF4164 Domain of unknown function (DUF4164) Coggill P pcc Jackhmmer:Q5HAR4 Family This is a family of short, approx 100 residue-long, bacterial proteins of unknown function. There is several conserved LE/LD sequence pairs. 27.00 27.00 27.00 27.10 26.80 26.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.84 0.72 -3.85 32 182 2011-01-31 11:46:12 2011-01-31 11:46:12 1 3 165 0 69 156 20 88.10 45 84.00 CHANGED ssssplptAhpRLcsAlspLEsAl-pRh-tccst......s-hcscl.ptlssDRuRLApELDputsRsp+LccsN+ElucRLssAhEoIRuVLsc ................s...stLcpALcRLcpAlssLEpAV-hRl-.p-pch........uEhEpEl.Q+hsADRSRLApELDpu-uR.u.cRLEtsNREVS+RLs....oAMETIRuVLD................ 0 12 39 48 +13590 PF13748 ABC_membrane_3 ABC transporter transmembrane region Coggill P pcc Jackhmmer:Q6D8M5 Family This family represents a unit of six transmembrane helices. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 237 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.37 0.70 -5.33 20 130 2012-10-02 13:23:42 2011-01-31 14:29:18 1 3 117 0 24 160 15 207.20 47 78.77 CHANGED L+pIs+patt+LhlTasLVlAENsLhlhYPLhuGFAIsullsGsstpAlhYuslVllhWllGuuRRtlDTRsFuRIYscLAVsVllsQRppstssSolsARVsLSREFVDFFEpHLPhLhTSllSlsGuslMLLslEFhlGlusLslLhhhhhllspas++NppLat+LNNcL.....E+-VsllspsptpsLp+HYchLu+LRItlSDREAhuYhhIGlstulLFshslhhhohpssssAG ......................................LKtlsppa+K+LhhTF.LVshENl.LhLhYPlhuGaAIN....A....l.......l.s.G.p.s.......hpA....l.h....Y.A.ll..V...llh....WllGAARRhs....D....TRTFs....RI....Ys....cl.........AVsV....l......l.p......QR......p.......p..........p..h..spS........sls........AR......V.....u......L....S....R.E.F...V.s.FFEcHLPhhhTSllSlhGAslMLLllEFWlGluul...s...ILshhhhl...LPpFsth.......s-pLah+LNNpL............E+-schlppu..st..ppLhRHYshluRLRlhlSsREAhuYLslGhuhulLFuhshshhohpshsoAG.......................................................... 0 5 10 18 +13591 PF13749 HATPase_c_4 ATP-dependent DNA helicase recG C-terminal Coggill P pcc Jackhmmer:B0G2L3 Domain This domain may well interact selectively and non-covalently with ATP, adenosine 5'-triphosphate, a universally important coenzyme and enzyme regulator. 25.60 25.60 25.70 25.60 25.40 25.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.56 0.72 -4.19 156 1792 2011-01-31 15:44:31 2011-01-31 15:44:31 1 38 1018 4 435 1610 115 86.20 24 18.82 CHANGED GsplplplacDRlEIpsPGslhsslshcp...hhst.s......spsRN.hlAslhpch.............s.......h............h.......E......ptGoGlp.+lhpthcp....tthstPpa.p.s...ss..sp..hp..Vsl ..........................................lplpha....sD.RlEltsP..Gsl...sloh.....-p.....hhst.............stsRNshluplhpph.............s.......h.............h.......E......phGsGlp.+.lhpthpp....hth...tPph.p..p..pt..st..h..hh......................... 0 146 311 382 +13592 PF13750 Big_3_3 Bacterial Ig-like domain (group 3) Coggill P pcc manual Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 27.00 15.40 27.10 15.40 26.90 15.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.96 0.71 -4.80 8 166 2012-10-03 16:25:20 2011-01-31 16:04:29 1 38 89 0 42 589 311 132.50 27 12.60 CHANGED ssaphsFshssLP-GpYollt.sApDpasN...ssspoht.sltlDsTsPolsl....ttslu-GuslpGLEsLcIoLsDshssu..sLoSlsLsGGPssDpVpLoWsstGcshYtLpYPRlFPSL.csGEoYTLTVsApDstGNssspossFpYhPsNLlplcsL ...................................................hthpas.h..p.tls.-G.pY.s.l.s.s..tApDthsN.....sss.p.s.ht.....p...l.......s.h.D.s..T.s..P...sl....sl........tss...s..s..ss..s.....l..s.l....-...s...l....h...I.s..l..s..D.shs.s...pls..ph...t..L..hGG.ss...s......-..t..l....pl.sh........u.ps.h...a.h...pYs.h...h..F...P..s.........h..p.....s.p..Y..plss.s.DttuNhhp....ts.tFpY...P............................................................. 0 6 17 34 +13593 PF13751 DDE_Tnp_1_6 Transposase DDE domain Coggill P pcc Jackhmmer:B0G2Q7 Domain Transposase proteins are necessary for efficient DNA transposition. This domain is a member of the DDE superfamily, which contain three carboxylate residues that are believed to be responsible for coordinating metal ions needed for catalysis. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.66 0.71 -4.12 60 2925 2012-10-03 01:22:09 2011-01-31 17:05:27 1 17 1173 0 565 4140 496 101.80 25 32.07 CHANGED hCPtGpph.phptp.thtt..t.p..s.hs...thht.aps..p..sCpsCPh+ppC..sps........pp..sRpl...ph.p.p....h.pp.h.t.pcsccp...h...poc.ttpphhppRs.slEsshupl+pphuhc+h.ph.RGhp+sphphhhsshuhNlc+lh ..............................................................p..t...............................t......................t...Ct..t..C....h........C................................t.....hh......hp.........pt..h...t.tp.hp..pt..........h......ho..t..t....s.p.p.h..ht....p.Rp.slEtsF.uphKph.h.Ghc.+.h..ph....RGh.pp..............sphphhlshhuhNl+Kh...................................... 0 215 392 444 +13594 PF13752 DUF4165 Domain of unknown function (DUF4165) Coggill P pcc manual Family \N 25.00 25.00 25.00 95.80 24.60 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.54 0.71 -4.36 10 99 2012-10-03 16:25:20 2011-01-31 17:26:11 1 4 41 0 8 59 2 123.90 43 11.92 CHANGED ssApAplhpYSFTDTsusp+olpPuo.salNPs..oslolsL.uGLDRhl+loVh+ouush.hhoosTo+lhsAschlossGs-YYGKclsLPAL.uEGsaoL+sElLsssGssVuopsYslsIDTTuP .s.sApAplhEYoFpsssGsp+olsPss.sYlNPs..uslolsL.uGLDRhl+loVh+usGo..lhSTsTo+lhsAschhussGp-YYGKclsLPAh.uEGsaolcs-ILs.sussVsTssYshtlDsTPP....... 0 0 0 3 +13595 PF13753 SWM_repeat Putative flagellar system-associated repeat Coggill P pcc Jackhmmer:Q6D875 Repeat This family appears to be a repeated unit that can occur up to 29 times in these outer membrane proteins. It is putatively associated with a novel flagellar system. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.64 0.70 -5.38 14 4913 2012-10-03 16:25:20 2011-01-31 17:42:37 1 401 519 0 1126 8114 1151 233.00 21 45.74 CHANGED TPss..pp..ssssh..plsls...lpDssGNss..s.s..o.....ts.hs.lDTc.......PpVsls....Iocscl.sutpssssh...oFotssos.hsssshh.hsss.....s.Gsh.....GuL....o......ls.osG.p.Wossh...TP.........ps..slpsu.-soI.pVs..hVpDstGNu..............s..su..sossh.oIDTt.......PpVoVo.............Ioss....cltuGpssTsTFTFsEsV..o.GFstsDs.sho.....sGThG.LssVG...oDGhsWosshTPps..spss-s....slpVs..As.VpDAtGNAs...ou.....S.s....sa.olDT......ps.Pplolsh...uDshls....usEsus.Thshotsls.shsssDs.h.....sos.pG..s...hss.T.....sVss...su..p.aosshssts..stssssT .................................................................................................................s.................................st.................................h....h...............s..............ht.......................s...............................s.............h......t................s....h..p..................h........t..s..s....t.....t.l....l.h............s.G.t....................h.s.h............s.........hs...ssG.....s...Woh.sh........ss....................................s........sL.ssG...s.h...s..l....sss........ss..D.hs....G.N.s..........................................................s...ss.........o.h..s.h..sl.D...st...................s..s......l...s...l...s..s..................................................................................................hs...ss.............s.l.h..s..s...s...p......h.......s..........s....h.s....s..t.s......s..s.h......t.s....s...p....h....l..p.lsh...........sG...t....h...h.....ts.s...st............s..s.G....sW.s.h..s.h....s.s..s.........t....h.s.su.............sh..s..ls.....ss....spD.huGNss.....os......s..................sh...s..l...D..s.............th...s.....s.....h.....t.....h.....s.....................t........t............................................................................................................................................................................................ss....................................................................................................................................................... 0 251 639 891 +13596 PF13754 Big_3_4 Bacterial Ig-like domain (group 3) Coggill P pcc Jackhmmer:B0G3A4 Domain This family consists of bacterial domains with an Ig-like fold. Members of this family are found in a variety of bacterial surface proteins. 30.80 30.80 30.80 30.80 30.70 30.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -8.88 0.72 -3.64 416 7056 2012-10-03 16:25:20 2011-01-31 17:55:29 1 431 622 0 1020 10654 674 55.10 32 17.22 CHANGED Gp.s.h...s..s.ss....s.....ss.....GsWo...h.....s.....s.....s.....ss...s.......-G....s.a..s..lo..l..suoDsAGNs.u.ss..s.s.s.....l..s.l..Dos..sP .........................................................s..s.s.s.....s........ss...GsWo....a......T.....s.............s........ss..hs............-G.....s.a...s..lo..l..sAoD.s..AGNs.u.ss.......s.s.h.s...l...s..l......DTps.............. 0 174 349 751 +13597 PF13755 Sensor_TM1 Sensor N-terminal transmembrane domain Coggill P, Eberhardt R re3 Jackhmmer:A3JX63 Family This domain is found at the N-terminus of the sensor component of the two-component regulatory system. It includes a transmembrane region and part of the periplasmic region, which is likely to be involved in stimulus sensing [1]. 21.90 21.90 22.40 22.60 20.80 21.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.40 0.72 -4.47 43 271 2011-02-02 10:44:03 2011-02-02 10:44:03 1 3 271 0 92 230 114 76.40 43 13.15 CHANGED RtRRuhhslptSsLTR+IlshNLlALslLVuGlLYLNphR-uLltpRspuLlopuclIAsshtAp...ussssshsosDs .........h..t+hhhphhhSSLTRRIlhlNLhALslLVuGILYLNQFRpGLI-A+lpSLhsQucIIAuAluAS...usssss.hhhDs............... 0 27 58 69 +13598 PF13756 Stimulus_sens_1 Stimulus-sensing domain Coggill P, Eberhardt R re3 Jackhmmer:A3JX63 Family This domain is found in the periplasmic region of the sensor component of the two-component regulatory system. The periplasmic region is likely to be involved in stimulus sensing [1]. 29.70 29.70 30.20 29.70 29.30 29.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.46 0.71 -3.68 35 203 2011-02-02 10:46:24 2011-02-02 10:46:24 1 3 203 0 75 162 67 111.60 43 18.97 CHANGED lsPEpVuPlLRRLhsPT.poRARlYDt-GpLlh....DSRsLh.....spuplhph-LP.Psc.sppsshhcphhphhpphh.ts...............cLPlapE.hssssGptasEVtsALsGp.hssslRhspcG .......INPE+VuPlLRcL...ISPT.pTRARIYDppuslLL....DSRsLY...........upGtVlRaDL........P...Plc..s.c..........p.ss.lh.......ERhhshlpphh.su...................sLPlYpE.tssusGtsY...EVhpAL.s.Gs..tsshRhsp+G........................................... 0 23 45 55 +13599 PF13757 VIT_2 Vault protein inter-alpha-trypsin domain Coggill P pcc Jackhmmer:A8MTC4 Domain Inter-alpha-trypsin inhibitors (ITIs) consist of one light chain and a variable set of heavy chains. ITIs play a role in extracellular matrix (ECM) stabilisation and tumour metastasis as well as in plasma protease inhibition [1]. The vault protein inter-alpha-trypsin (VIT) domain described here is found to the N-terminus of a von Willebrand factor type A domain (Pfam:PF00092) in ITI heavy chains (ITIHs) and their precursors. 23.00 23.00 23.10 23.00 22.90 22.80 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.48 0.72 -4.52 10 116 2012-10-10 13:59:34 2011-02-02 13:41:04 1 10 54 0 56 544 59 76.80 45 8.33 CHANGED PGLlNhpotss...............LPLpuSclsuClpGhuLuhTAoLTYtNspstsl-Gs.FlaPLs-sssVlGF-AhluuRhlssplppcs .................................................PGLhN.tohss...............................LPLosSsVsuClsGhsLulTApLTYtN..p.s...p.P...h.-Gl..FVYPLsE.spsVsGFEAh.lus.RhVohQlps+.u.............. 0 11 18 34 +13600 PF13758 Prefoldin_3 Prefoldin subunit Coggill P pcc Jackhmmer:P43573 Domain This family includes prefoldin subunits that are not detected by Pfam:PF02996. 23.40 23.40 23.40 23.40 23.30 23.30 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.10 0.72 -4.22 23 73 2012-10-02 17:27:01 2011-02-02 13:51:44 1 5 73 0 60 162 0 97.40 41 15.65 CHANGED pcSlh+W+ph.u...EY-uLK-ElssL.............scsuop--llcluR-FsGoLVsEcElctILGc...pp...slpRo+pQVl-lloRRIDYVppNlsTlEKRlcsAEs+Ls ..........................................pcSLhHWphW-AEY-uLKEElpsL.....................sssp--...llc...Iu....R..-...F..s....GsLVsc+ElctllGc...pp....thpRotpQllshlsRRlDYVppNlsoLEKplcsAEs+L............ 0 10 27 47 +13601 PF13759 2OG-FeII_Oxy_5 Putative 2OG-Fe(II) oxygenase Coggill P, Eberhardt R re3 Jackhmmr:A3JXF3 Family This family has structural similarity to the 2OG-Fe(II) oxygenase superfamily. 27.00 27.00 27.10 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.37 0.72 -3.73 92 313 2012-10-10 13:59:34 2011-02-02 14:23:49 1 52 193 4 101 350 4039 98.90 26 31.52 CHANGED ssWsslhp.pGshpssH...hH...ssu...hlSGshYlpss.p..ss............u......shph.ssc........hshh..hs.s.s.h....t.....t...t....tshhh....lpPc.s.GplllFPSaLhHpV.hs.tuc..tpRlSluFN ............................t.hWhsh.hp.pG.shpssHhH....ssu...hlS.....GshYlp.hPp....ss...........................u......shth.ssc..........tthh....hs...t.s.........t......t..t....h..tshhh....hpPp.sGplllFP.SaLhHt...V...s.......t......up.......p.....pRloluFN.................................... 0 39 72 89 +13603 PF13761 DUF4166 Domain of unknown function (DUF4166) Coggill P, Eberhardt R re3 Jackhmmer:A3JYW7 Family This domain is often found at the C-terminus of proteins containing Pfam:PF03435. 23.00 23.00 39.70 39.60 22.80 22.50 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.14 0.71 -4.36 56 283 2011-02-02 14:29:42 2011-02-02 14:29:42 1 4 268 0 77 250 15 170.60 31 72.13 CHANGED LsstlpchHuh....tss...s..hhpGpsclt..tusp...hhs.....+h.lstl.....ht......hP.....psupplPhplphps.....sss...up.....pWpRpFst.........ptFcSph.h...st..s...sshlhEthG.....s...hthpls....lps....p.s.GuLchpspp....hph....hG....lPLPth.Ltspupsp...Ethc..-......spapFcVclphPhlGh.lhpYpGphp .............LtPtlpcpasl.......psshshpGphcph..huus.....hhs.....+h.lhtl.....ht.......hP.....cpGpclPhslpsps.....pts..sp.....pWsRpFhh.t.h.+hFsush.h......st..p...........pstll-ahG.....t...lthpLs...lps..............c.p.GulphpSpc...hh......hG...hlPLPpa...Lh..spupsh...EphD..-......ppF+hcVpVpsPll.Gs..LhpYcGpF... 0 18 47 62 +13604 PF13762 MNE1 Mitochondrial splicing apparatus component Wood V, Coggill P pcc Jackhmmer:P24720 Family MNE1 is a novel component of the mitochondrial splicing apparatus responsible for the processing of a COX1 group I intron in yeast [1]. Yeast cells lacking MNE1 are deficient in intron splicing in the gene encoding the Cox1 subunit of cytochrome oxidase but do contain wild-type levels of the bc1 complex. 20.00 20.00 20.00 20.00 19.90 19.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.68 0.71 -4.12 10 32 2011-02-02 14:48:45 2011-02-02 14:48:45 1 1 31 0 20 30 1 146.20 25 23.00 CHANGED s.hpsShhGslhsNLpshpcaIpsHhsYhpppshspshpslFlNslLsHlslapNaouhlphLcsl........phlpssshs...sahcssoF+lIhpSl.SNSsSuKlsuhtLasaLKp..pcl.........plTscsYpsLlpusL+G....ta+-sl.FYlYcYLpsa ....................h.s.phusl.hNLpshpcalpph.p.hpt.pshppsh+shFIsslLsHls.stpsashhlsllcpl.........hL.ssslh...s.hcssoa+hlh+uh.SpssSsKhshhtLasaLpp...sh.........phospsahphlpsshph......p-hh.Fahaphlhs.................................. 2 2 9 19 +13605 PF13763 DUF4167 Domain of unknown function (DUF4167) Eberhardt R re3 Jackhmmer:A3JZ71 Family \N 25.00 25.00 35.50 35.50 20.60 19.70 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -9.78 0.72 -3.96 45 295 2011-02-02 15:58:01 2011-02-02 15:58:01 1 1 292 0 94 262 1061 82.10 52 35.05 CHANGED RuR......s.pss..........p....spt.ppss..Ns..hsR..sa...-S....sGPDs.KlRG..oApplhEKYppLARDAtuuGDRVhAENYhQHAEHYhRllsst.ptptp ...............................sRs.pss...........s....Np.s..+pss...Ns.....hsR..sa....-S....NGPDl.KlRG..sAQpIhEKYtpLARDApuSGDRVhAENYhQHAEHYhRllsuAptp........................ 0 26 59 71 +13606 PF13764 E3_UbLigase_R4 E3 ubiquitin-protein ligase UBR4 Coggill P pcc Jackhmmer:B3KMT2 Family This is a family of E£ ubiquitin ligase enzymes. 23.00 23.00 41.20 31.10 20.40 19.80 hmmbuild -o /dev/null HMM SEED 802 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.72 0.70 -13.40 0.70 -6.66 16 189 2011-02-02 16:43:37 2011-02-02 16:43:37 1 17 115 0 133 204 7 616.10 39 20.09 CHANGED MspNPYSSs-.ulGPLMRDVKNKIC+ph-LluLLEDD.GMELLVsspIISLDLsVpcVYEpVW......................pts.......tp....................ssPMslsYRhpGL.G-ATEphI-pL...pssp-EppDsEtpaphAulh.p-CGGL-shLshltplp.c......hpusp-hlshlL+LLhaCsKl+pNRptLLp..lGALshLLcshppAassss.......htluEplL......................hIhEollpEAs.pssluts.s..hpsss..............................s..pchlthFL-pls.....ss..hh+sNtp...hctlsRllPhLoaGcspsMcsLlcaF....cPhL.p....F-phDp...............c+.s......s-p.............p..ht..lEsFs+lu-ulcssssGc+LK-hIlppGIsptAhpYlccphsss.....tpp...s.SsEWtphLp+PSLPhlLphLpGLupGH.sTQpt.lc.cshI.slLHtLEpVou.-pcIGoLAENLLEsLuc.ppt.....lsc.....plpplRctT+tEp+RhAhtpREchLpsLGMch....sp...tG..pl..l.su.ssllcshE-lc...E.E-GLsChVCREGYsh+PsclLGlYoFoKRsslss...........sp.spt.pts.....................hsYTTVSHFNllHapCHpsAhR....Lpps....+cEWEuAuL+NupThCNsLhPlhGPpVspusaspslspahsslpslup.tsssphph..hs.aDItLLLsRFApptSFSsDs+GGG+ESNh+hlPahlphshaLLsp.......osstp..R..c....p+s..ltualo.s....sspphhps................shph.....-ss..ahhVhSLhs.pSh-pWpppRhsaLpRhltpuah...pah.pstsssch......................s.pspcp....aslh+PhLlaauLl-pl.phFKhth....................o.ssst-sW.hshpE+LppN.pthlsts+clLphh.-EhhsspDhpEhhDlsG ....................................M.tNPYsS.c..hGP.LMRDlKNKICpph-hluLl-DD..G.MELLVssp.Ilu.LDL.....slt.p..VYcplW.......................................................................................................................................h.p..tp..............................s.sMplhYRhpGL.G-ATE.hlc..pL.........ps.p.p.......pcp...c.E..athsssh.tpssGLphhlphlttlp.p.........................hpp.s..p..lth...l.....lcLh.hssKl.chspp.Lhp....hsslshhLtsh.phsh.stt...........st.sEplL.....................................lhE.lhtcus......tpsht...t.....h.t......................................................................s..p.l.hhlpphs...........ss..hhcps..ph..hptlhRllshLsaGp.ttMthLhphF....pshh.p....apphDt........................pp.p......cp.................p..hh..l-.Fshlstul.........p..p.ss...GtpLKshll.phGlh.ptuhp.YhtpphPsh....................................hph....s.us.hpphlp..pPuL.hlLphLpG..........Luh..t.H.s............TQ........hh...l......s..p..............p...l.s.lHtLEtVu.......u..p.......ptlGs..lAENLL.-sLpp.p.t.............................hspplpth....RctT+tEp+chAhthRpc.LttLGMph....sctG...pl...ssp.sshlpphcc.l.......-..EsGLs.ChlCREGYphpPschLGlYsFo.KRssltt......................hp..p...ptp.............................................................sYoTVoaFNllHhpCHhtAhR.....htps+cE.........W-uAsLpNspTtCNslhPlhG.P............p...Vstusassslsca.s.lpp.h..s........t..tpt.....php....h..aDltLLhhRFAht...tSFpsDstGGG.pSNh+hlPahl.hs.hal.lsp.....................ststtpp......tpt...l.s.alp.t..s..pt..ps................................................shp...-ss..ahhh.uLhh..s..cpWpt..+hthLp+hlhhu.h...pthts.....t.........................................tt.p...a.sh.h+s.Llahull-.l.phhhph........................................................................s.ssp.ttW...ht-hlt.ss..hhthscphlphh.--hhsspshtEhhDlhG.............................................................................................................................................................................................................................. 0 62 81 111 +13607 PF13765 PRY SPRY-associated domain Coggill P pcc Jackhmmer:B0V264 Family SPRY and PRY domains occur on PYRIN proteins. Their function is not known. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.77 0.72 -8.27 0.72 -4.43 191 3648 2011-02-02 16:53:56 2011-02-02 16:53:56 1 109 99 11 2020 2534 0 48.80 38 9.97 CHANGED loLDPsTAaspLhLS-Dt+.pVph....sppt...p.shP..-sPcR.Fsph..spVLupcuF ......................lTLDPsTAaspLhL.S..-Dp+.pVph.................sppp...............p...shP....-sPc..R.Fsth..spVLuppuF................... 0 51 592 1270 +13608 PF13766 ECH_C 2-enoyl-CoA Hydratase C-terminal region Coggill P pcc Jackhmmer:B9A058 Domain This is the C-terminal region of enoyl-CoA hydratase. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.55 0.71 -3.95 180 1675 2012-10-02 13:07:06 2011-02-02 17:12:08 1 13 1322 3 662 1647 343 119.80 30 31.65 CHANGED sL..ttptstI-chFu.....t....s..olpcIlssLcs.........c....s.s....p.....autpshcsl.ppt..SPhShplshc.lcc....u+..p.........h.o.lt-shphEhpluhpshp....p.........s........DFhEGVRAhlIDKD..+s...P.c.....Wp..sslp-..Vs.ss..hVsphFs ........................................tphshIsctFu........t....s..olp...cIlps...Lcp................................s...s.s....p.....aApp....shc.s....l.tpt..SPhSl.plohc...t.........lpc.....ucp.....................hoLt-shphEhplutphhp.....p.........s........D..FhE.....GVRAhL.....lDKD..ps........P..p.......Wps..sslp-..Vs.sp..VpthF...................... 0 192 370 537 +13609 PF13767 DUF4168 Domain of unknown function (DUF4168) Eberhardt R re3 Jackhmmer:A3JZQ0 Family \N 25.80 25.80 26.10 25.80 25.20 25.70 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.50 0.72 -3.35 72 184 2011-02-03 11:39:14 2011-02-03 11:39:14 1 3 138 0 75 191 5 81.90 23 54.79 CHANGED hocpplppaApAhlplpslcpchhpc.lps..................sps...................p.pphppl...tpcAppph..............l.........p......tl.cssGLolppaNpIsptspsDs..pLpp+l .........................................ospplppaApAhhplptlppphhpc.lpp........................sps....................................t.sphppl.......tpcuppph..............s.........phl.pssGLo...lpcaNpIsptsQsDs..pLpp+l...... 0 26 61 70 +13610 PF13768 VWA_3 von Willebrand factor type A domain Coggill P pcc Jackhmmer:Q3UR50 Domain \N 27.00 5.20 27.00 5.70 26.90 -999999.99 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.94 0.71 -4.53 14 1260 2012-10-10 16:07:06 2011-02-03 14:36:20 1 77 395 0 741 6397 466 152.20 22 18.63 CHANGED tclVlllDsSss.hpsp.....hhppulshhlcs..Lsspc.plsllshGs..ps..hh.ss....hhshssttlpthhthlps...hps..hGusslhsuLc...tsh....p...t........ttsthhcpllllo.cGs...t..ss....tcs..p.lp.p.t...tc..hphhshuh...ss..phs.s..shLptLAphupGthph ..........................................................................................t.cllhll.DtSuS.M.........p.......u.................hphs...+.......p..s.......l...h...h.....h.....l....cs.............L.......s...................p.......s.......p......F........N.........l..l..s..F......us..........p.h...p...t.h.....h.....s.p................................h..t...h......s....p....p...s....h..p..p...A..h.p...h.lpp................lps....hG.....u...T....p......l....h.....p.....s.L.p............tsh.........p......h...............................ttt...s.p...t....l.h.l..l.T..DGp.....sp......sp.......................p.p..l..h.....ph...l....p...p...tt..................tp.........h+..l..a..o...hul..................Gp............sss...t..........thlp...tlAphst.Ghh..................................................................................................................................... 0 248 375 539 +13611 PF13769 Virulence_fact Virulence factor Eberhardt R re3 Jackhmmer:A3K275 Family This domain is found in conserved virulence factors [1]. It is often found in association with Pfam:PF02985 and Pfam:PF08712. 25.00 25.00 25.60 32.20 24.30 23.20 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.60 0.72 -4.12 53 419 2011-09-19 00:13:31 2011-02-03 14:42:30 1 9 419 0 60 256 220 81.70 45 25.41 CHANGED lha+sIPhQVhlspGcc....ps.+htLP-RFppAls+As....ttssDshlt-h+hs-..sst.G-hp.-lAcssspcl-AsYsp-RLcpLl ....hF+sIPhQlKLTsucp....Eh.RhtLPppFh-shspA.....tpssDNlVh.RKWl-..usRYGshE.ElhcsVlEEllAsYsEppLshLV....... 0 15 36 46 +13612 PF13770 DUF4169 Domain of unknown function (DUF4169) Eberhardt R re3 Jackhmmer:A3K2D0 Family \N 25.00 25.00 32.60 32.50 18.60 17.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.72 0.72 -4.03 82 226 2011-02-03 15:43:01 2011-02-03 15:43:01 1 1 226 0 73 181 5 54.80 44 82.85 CHANGED u-llNLpphRKp+sRsp+ctpA-pNRspFGRTKAE+phscscsc+spcpLDu+.+h ..u-lVNLRphRKp+uRsp+cppA-pNRlpFGRTKsEKshscspsp+Ap+hLDtpRh.. 0 19 42 52 +13613 PF13771 zf-HC5HC2H PHD-like zinc-binding domain Coggill P pcc Jackhmmer:Q5W0A5 Domain The members of this family are annotated as containing PHD domain, but the zinc-binding region here is not typical of PHD domains. The conformation here is a well-conserved cysteine-histidine rich region spanning 90 residues, where the Cys and His are arranged as HxxC(31)CxxC(6)CxxCxxxxCxxxxHxxC (21)CxxH. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.10 0.72 -10.92 0.72 -3.84 144 1247 2012-10-03 17:27:21 2011-02-03 15:57:56 1 127 218 0 775 2477 15 83.60 31 6.32 CHANGED HhhCuLassplh.ps.stt........slpsl.pphsppthphpCth..Cc........pp.....G.AslpCsttsCppsaHhsCAtpss.hhhp......hp.................t...........................phpsaCppHs ....................................HhtC.hl...aS.stlh.pptst...................t.lt.sl..cp..tlp...c..upph.....cCs.h...Cp.........................ch........G..Aol.u.........Cs....t.p..sCp...psaHhs........CAhp.s...t...sh.hp.........tp....................................................hphhC.pHp......................................................................... 0 191 283 499 +13614 PF13772 AIG2_2 AIG2-like family Coggill P pcc Jackhmmer:B3KMN7 Family This family is found in bacteria and metazoa. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.77 0.72 -3.87 26 714 2012-10-02 16:39:48 2011-02-03 16:07:47 1 11 571 11 289 659 306 83.90 29 41.81 CHANGED cVaGsLaclshpshpsLDppEuVp...pGhYh.lplpV.....ps...pss.pp.lhsRsYhlss...p...ss....s....................PSppYLplllcGAhpsGlPpcYlctL ...................................................................pVaGlLacl.s........p..........-hpsLDphE.G.s.........hthYp.+.hp.lp.V...................ps....tsG.p...l..Ahs.Ylhs.s....h.......ps.....uh...........................P.SspY...Lshl...hc.G...Acps....GhPpcYlp.L........................... 0 97 180 239 +13615 PF13773 DUF4170 Domain of unknown function (DUF4170) Eberhardt R re3 Jackhmmer:A3K3X5 Family \N 25.00 25.00 26.40 25.60 21.10 24.50 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.24 0.72 -4.07 34 216 2011-02-03 16:14:24 2011-02-03 16:14:24 1 3 213 0 76 162 34 68.60 62 81.17 CHANGED pQLLHLVhGGELp..c..hss.spF+DLscl-lVGlFPsYssAhsAWKucAQpTVDNAcMRYFIlHlHRLLDPs .....pQLLHLVFGGELp..c..Lss.spF+DLpslDIVGIaPDYtSApsAWKuKAQpTVDNAHMRYFIVHLHRLLDPp...... 0 20 45 55 +13616 PF13774 Longin Regulated-SNARE-like domain Coggill P pcc Jackhmmer:C9J9A4 Domain Longin is one of the approximately 26 components required for transporting proteins from the ER to the plasma membrane, via the Golgi apparatus. It is necessary for the steps of the transfer from the ER to the Golgi complex [1]. Longins are the only R-SNAREs that are common to all eukaryotes, and they are characterised by a conserved N-terminal domain with a profilin-like fold called a longin domain [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.97 0.72 -4.43 180 1355 2011-02-03 16:49:18 2011-02-03 16:49:18 1 14 344 17 898 1268 11 84.60 23 37.83 CHANGED hu+.pllp+ls.......ss...s...+tohp.pss.ahhHalh...p......su.................lsalsls-c.sa.s++lAFsaLp-lpc-Fhp............pas..........t.phtsss...sauh.......hpFssh ....................................uctlhp+ls.......ps.....................s+hohp..pss...ahh..H...ahs....p.................su........................lsalsls.-c..sa..scclAFsaLpcltc-Fhp...............pas..........p..phtss.....shsh....pFs.............................................. 0 294 502 724 +13617 PF13775 DUF4171 Domain of unknown function (DUF4171) Coggill P pcc Jackhmmer:B0QXZ9 Domain This short family is frequently found at the N-terminus of Homeobox proteins. 27.00 27.00 36.40 36.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.73 0.71 -3.96 4 63 2011-02-03 17:52:59 2011-02-03 17:52:59 1 2 34 0 25 53 0 128.00 76 23.90 CHANGED DTAMDLLKAITSPLAsGS........KPSKhhup.ssuSSspScS+KE............HH+KhGsuSo......DssSH+SKK.h.hasss...EsLTLREPDGLKMKLILSPKEKu..........pSSssppuhth.uppAosKKsSKKpuR-Ept DTAMDLLKAITSPLAAGS........KPSKKTGEKS.Su.S....S....S......HSESKKE.............HHRKKlSGSSGEL...sLEDGuSHKSKKMKPLYVNT...ETLTLREPDGLKMKLILSPKEKG..........SSSVDEEuFQYPSQQATVKKSSKKSARDEQG................................................................................ 0 2 4 10 +13618 PF13776 DUF4172 Domain of unknown function (DUF4172) Eberhardt R re3 Jackhmmer:A3K4N3 Family The family is often found in association with Pfam:PF02661. 25.00 25.00 27.10 25.80 23.90 23.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.45 0.72 -3.88 93 374 2011-02-04 09:14:32 2011-02-04 09:14:32 1 4 341 0 117 349 80 79.70 41 22.14 CHANGED aIWQpscWPpFpWDsppltshLppsphpQGtLlGph..pslu.s.pppstL-sLsp-llcoStIEGEpLstpSVRSSlAR+LGl .........aIWQpscWP........pFpWDtstltshLppsphppGhLlGch......psls.stppptsL.-sLhpsllpS..StIEGEtLNhtSVRSSlAR+LGl........ 0 29 64 91 +13619 PF13777 DUF4173 Domain of unknown function (DUF4173) Eberhardt R re3 Jackhmmer:A3K0W3 Family This domain of unknown function contains multiple predicted transmembrane domains. 24.10 24.10 24.40 24.40 23.60 23.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.45 0.71 -5.01 54 229 2012-10-01 23:40:40 2011-02-04 09:19:05 1 2 227 0 76 246 7 185.70 26 36.61 CHANGED LsllslLahlasslQlshLa..GG...ssl...ss.GhoYA-YA+pGFapLlhsslLshlhlll....s...pth....hp.....c......sp.....l.l.+sLlhlhsshslllluSAhhRhpLYlssYGLThhRlhshhahhhlul....slllllhplh...tthsstalhptsh.hssssslh.shuhhN.-shIAchNls+..t.....pssplDhpYLs.s..Lu..scAhPultch. ....................................hhlshlahhFshlQhshLa..uu......tth...ss.uh.oY..u.pYA+pGFapLlhlslLshsllhs....s.....phh....sp.....c......p+.....h.l.+hlhsll.sshoh.lllhSAhhRh.slYlptYGhThhRlhshhahhhlsl.hhllllhplh....+th.....tthh........hpt.sh.hs.shsshh.hhshh........s.-thlAphNlpp.....................thtthDh..Ylt.t..Lu...sAhshl....h.................................. 0 32 57 66 +13620 PF13778 DUF4174 Domain of unknown function (DUF4174) Eberhardt R re3 Jackhmmer:A3JZK5 Family This domain of unknown function is found in a putative tumour suppressor gene [1] and in a ligand for the the urokinase-type plasminogen activator receptor, which plays a role in cellular migration and adhesion [2,3]. 27.00 27.00 32.10 33.70 25.30 26.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.38 0.71 -4.00 85 508 2012-10-03 14:45:55 2011-02-04 15:36:30 1 7 241 0 222 467 183 122.10 31 45.45 CHANGED ssLspatWppRslllFAssssDsphppQhphLpp..pt...ssLs-RDllllss.sssssps...........................sLRpphc....sps...FthlLlGKDGthKlR....tstPh.sscclhcsIDpMPhRppEh .........................LspFth+pRlLlloAPsssshhap.QhshLpp..st...CslstR+lsllpl.hssssppss.....................p.hss.th..h.pLRphhpls.....pt...FshlLlsKDGpsKpc....astPl.shptlashIDshPhRppEh..................... 0 17 53 116 +13621 PF13779 DUF4175 Domain of unknown function (DUF4175) Eberhardt R re3 Jackhmmer:A3K2P3 Family \N 26.40 26.40 26.60 26.70 26.00 26.30 hmmbuild -o /dev/null HMM SEED 820 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.60 0.70 -13.46 0.70 -6.84 55 303 2011-02-08 14:41:21 2011-02-08 14:41:21 1 2 262 0 113 278 125 681.10 30 90.41 CHANGED ERlh.shWPlhollhlhLuhshhGLaphlssthhhss.lsl....hsluhl.suLhhslh+aRhPsRsEALsRLDt.s.......LstRPlsALtDs.AhGssDssutALWpsHQpRMAsphuph+ushPc.+luc+DPaALRhlAlLhlls.A..hha..u...u..h...hRluslsshhsu.sus..s..u.s.usph-uWlpPPsYTGcPslaL..............s...sts...............ss...........slsVPpGSplolRhhG..........t.su...slsls..........ps.sos..p.s.............ss.ssss...........................s...........sp.......pcFs..lsp.sGslslp..usu.....uc....sWplsllPDpsPpIshss.sPct.sspGshplsapAcDDYGVssucApIs.hsh....s.sssc..p.Ghssc......P.........hhcs.plsLsLPhsusRsshstthlcDLocHPWAsh.VplTLsApDuAGQpGpSpshphhLPuRpFhcPLAtAlIEQRRsLhhstssss.cVsplLcAlohpPEc.hhtstssYLtLRshhpRL-......tu......ho--shc-lsstLW-lAltlEDGsLusAcc+LRcAQ-+LpEAlcpGASD.EEIscLMpELRcAhp-Yh+pLAcptppssp..pt..tps....p....ssp...phoQpDLpcMhDRIpELhcpGchspApphLcpLQpMMENhQssQ.....sQ...t..........ts...p.u.ttp...QuM-pLu-hlRcQQpLpDcsFRphQ-.p...p.....s.......QpGp........p.pspp.sp.............s.pt.Gp...s.Qt.................sp...pttp....sp....stp..........psppsp.........tt......p....p..ttps..............LuccQpsLp.ccLpct.p.ppLsstGsp.uttup-uLscAscAMcsApcALtcGchspAlDpQucAh-ALRcGhcsLuEtM.....tpp..........ptpt....pGt....tspt....t.Gp.....ts..s..pspDPLGRstssp.G..ssss.psh.lssE.shpRAR-lL-ElRRR.G-ssRPplEhDYLcRLLc ............................................................................................ERhh.hhh.hhhllslhhuhuhhGlhthhP...hh....phhhlsh.hshshl.hsl...hsh.p.h.R....Pppt-h...tRl..-..t.ss.....tLsppPlss.p...Dp.A...ss...tcsh....u......A.LWptHppRhttpltplpss.....h..P.c..c.hstpDPauL.Rh.hshlhhls.A..hhh...u.......s.t....tRhupshs.h.ssts......h......ssp..l-.uWlsPPtYTGpsPlaL.....................s...sts...................ts...........slslPtGShl..s..lRhh.G..........tss...thshs......ts..st...t..............t....stt...............................................................s...hp.....hph..p..lpp.stsltlp..u.s...st........pWphsshPDpsPplthst...tP...c...t.....t...h..p.G.p.hpLs..aphpDDYGlsp.upuplh.............s..............t....p......P................hts.phsLsLPhtsscps....h.........pDLopcPaAG..VtlsLsspDsAGppGp.Stsh.hhLPtR.F.pPlAtAlhEpR+.Lshstppt..pshphLpAlh.htP-t.hh.s.shaLtLtshhppLp...............hu...hscp..thppssshhWplAltlE.....p.....G..s.....lu..tApcp..LRtAQptLppAl...p..........p..........s.....A......u...p........pEItcLht...-LRpAhpcahpt.hApp.tpssp.t.....t........tst..hppp-LppMhcplpphhcpGptstApphLp....plpphhpNhp...hsp............sp..p........................ttpsphp...pthp...pLs-hh+cQQphhscoa.pt.pp.php...t............p.st........p..ttt.t....................................tt............................................t.......t....t.....t.............tt.....th.....t....p....t.pp...............................htppQptlp...ppLp.p....pt..h.t.thp..............stps.....hscAtpuMtsAtpALt.p..sp..hstA.stQupAl-uLRcGtpphhptM.....tpt..................................ttt.....t..........stt....t.s......................tttttpDPLGR......t...sp.s...t..sp.psh.lssc..shpRARcIL-ElRR+.upt.pPp.EhcYLpRLLc........................................................................ 0 35 71 88 +13622 PF13780 DUF4176 Domain of unknown function (DUF4176) Eberhardt R re3 Jackhmmer:D3E5E1 Family \N 25.00 25.00 25.10 25.30 23.20 24.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.60 0.72 -4.26 56 651 2011-02-08 14:46:40 2011-02-08 14:46:40 1 3 430 0 56 331 1 72.40 40 70.15 CHANGED lLPlGSVVhLKs..usp+...lMIluRt.hh....psppp...haDYsushYPpGhhs..-pshhFNc-DIpcVlFpGYpD--Ehpat ..............slGSllhLcp....sp.pp...............lMIlsRt.lh....pspph.............haDYsushYPhGhlp..-plhaFNc-sIcpVlFcGYpDp-EhpF.............. 0 17 34 45 +13623 PF13781 DoxX_3 DoxX-like family Eberhardt R re3 Jackhmmer:D3E5H6 Family This family of uncharacterised proteins are related to DoxX Pfam:PF07681. 25.00 25.00 25.40 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.10 0.72 -3.66 76 297 2012-10-02 13:32:46 2011-02-08 15:41:34 1 3 292 0 87 289 15 100.70 27 34.60 CHANGED uhlWlhsGllshhlhs.st.phplLsph.Ghssthushhlhhsu.l.h.-lshGlhhlh.thpp.+.hshhhpl.h.ll.luhhlhsshhhPphahcPasPlsKNlsllsLth ...........................shlWlapGllsth.....lhstst.phplLsth.shss..thu...hh..hlhhsuh.h-lhhGlhhLh...hhpt....+.thhhhplh.ll.luholssu.hhtPthahcPFsPlstNlslhsls........................... 0 21 41 65 +13624 PF13782 SpoVAB Stage V sporulation protein AB Eberhardt R re3 Jackhmmer:D3E7E2 Family This family of proteins is required for sporulation [1]. 25.00 25.00 27.10 26.10 22.30 21.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.34 0.72 -4.08 43 225 2011-02-08 15:45:58 2011-02-08 15:45:58 1 1 225 0 34 157 7 103.60 50 80.58 CHANGED lssLGllPRhuplT+TtcplhhYEsslllGullGshhsla.chpl.h.....ut.hllslhGLhuGIFlGslAsALsElLNVhPIhuRRlplpptlshllhulAhGKslGSLhaahh ....lslLGllPRLsplT+ohcpl.haEhullhGslhGshhslh.phsh.h.....................up.ahLlllGlFsGhFlGMLAAALTEVLNVLPILAKRlGlc.spIllLLhAlVLGKllGSLFaWl........... 0 10 24 26 +13625 PF13783 DUF4177 Domain of unknown function (DUF4177) Eberhardt R re3 Jackhmmer:D3E8A6 Family \N 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.10 0.72 -4.34 50 606 2011-02-08 15:50:16 2011-02-08 15:50:16 1 2 570 0 167 416 98 52.20 41 82.95 CHANGED paEYcsltl.ht.tts.......tphcplLspaGp-GWELVpllss.......t.t..t..shhshhKRp ................................pWEYtT.lPlhs..+uo.................cp.......ILspaGpDGWELV.pllsu...................st.-phluYhKR............................. 0 58 133 159 +13626 PF13784 Fic_N Fic/DOC family N-terminal Eberhardt R re3 Jackhmmer:A3K4Q6 Family This domain is found at the N-terminus of the Fic/DOC family, Pfam:PF02661. 25.00 25.00 25.00 25.50 24.90 24.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.70 0.72 -4.12 115 562 2011-02-10 11:07:32 2011-02-10 11:07:32 1 9 506 2 173 510 92 83.30 34 23.18 CHANGED llphlspAspALucLcuhsphlP.Nsslll..sshshpEAhhSSpIEGT.oTh--lhph-tsts.s........tss-scEVhNYhpALptGhcp ..................................hptlhpAptuLucLcuhuphlP.N.s..lLl....sslshpEAhtSScIEsh.hoThD-Lhphptptst...............ssss+EVhsYtsALptGhc............. 0 67 135 157 +13627 PF13785 DUF4178 Domain of unknown function (DUF4178) Eberhardt R re3 Jackhmmer:D3E9P9 Family \N 25.00 25.00 25.00 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.82 0.71 -4.19 54 460 2011-02-10 11:12:14 2011-02-10 11:12:14 1 3 375 0 119 366 29 142.20 22 59.83 CHANGED LtsGDhlph.....s.ut..sahV..pGphp.....h..p.pushpWtEahLp..s.sspttWLslE........--splphslhpphsshs...............sssplph....-GppaphsEpGsAp...hpupussshtsu....sphpah-Y..tusss.phLuhEtass........ph-hspGphlsst- .............................................Lt.Gshsph..........s.sh..papl..hGphp....................h.......p...ps..t..tWh..EahLp..s..spshtaLplE........-cs.ph...thplhsthssht........................................psssphph.........csp.p.Yph.p-p.....hpup.....sssp.spt.s..h..tsu.......pphphh-a....pusst..thLshEh.ss...................ph.h.pGc.l....t.................................. 0 36 62 96 +13628 PF13786 DUF4179 Domain of unknown function (DUF4179) Eberhardt R re3 Jackhmmer:D3EAG2 Family \N 25.20 25.20 25.20 25.20 25.00 25.00 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.04 0.72 -3.76 49 573 2011-02-10 11:15:32 2011-02-10 11:15:32 1 7 242 1 126 525 5 99.40 20 25.09 CHANGED p+h.cp...+p....htp.tth....sh..s...sslh....lh.....l...hs....o......ssaAt.h........ts....l.hph..h.p..p....pGhpp..sh.pptaupt..lsp....oho.spGlplTlscllhDcsplhlhYplc.spc ....................h...pp....+t....hhhpts........su..s....uslh.......lh........h....hs.........s...........sshA.ssls................hhss.....lhph...h.s..tc.............pG.lpp.............sh.pt..th.s..pt...l..sp................ohp..spGlslTls-lhhDs.splsltYplcpt............. 0 57 114 117 +13629 PF13787 HXXEE Protein of unknown function with HXXEE motif Eberhardt R re3 Jackhmmer:D3EB69 Family This domain contains an HXXEE motif, another conserved histidine and a YXPG motif. Its function is unknown. 25.00 25.00 26.40 26.40 24.50 24.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.71 0.71 -3.72 142 380 2011-02-10 11:30:58 2011-02-10 11:30:58 1 2 312 0 94 360 151 114.00 21 63.39 CHANGED halh.llahlH.phEEhhh....h.tahspph.th.....................hhosptFhlslhhhhlhhllhshlsth...........hhhhhhlhh..hhphl.hHlh.s.hhh+p..YsPGlloulllhlPhulahh ....................................hlh.hhahlH.phEEhhh........h..a.hpp...th.th......................hhsspshhlslhhtallhhlhshhshh.t.........hhhhhhhta...hhphl..hHlh.....s....hhh+p..........YsPGlhTullh.lPhulhh.............. 0 33 66 78 +13630 PF13788 DUF4180 Domain of unknown function (DUF4180) Eberhardt R re3 Jackhmmer:D3EBG4 Family \N 25.00 25.00 36.20 36.10 23.70 22.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.34 0.71 -4.43 45 213 2011-02-10 11:34:24 2011-02-10 11:34:24 1 3 206 0 58 178 5 109.50 43 79.20 CHANGED hc.psshplshlsucssllsstp-ALD.Lluss.aptssstlhlstspls--FFsL+TtlAG-ILQKFlNYcl+lAllGDhSta..sSpuL+DFlaESN+GpplaFlssh-pAlp+L ............h...sshplshlpsspslIsssQsALD.hhuss.aEtsscplllpcshloEDFF-L+TpLAG-ILQKFlNY+lKlAIV.GDFShY..sS+uL+DFIaEsN+GpclaalsocppAlc+L.......... 0 25 48 56 +13631 PF13789 DUF4181 Domain of unknown function (DUF4181) Eberhardt R re3 Jackhmmer:D3EC26 Family \N 25.40 25.40 25.40 25.40 24.90 25.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.20 0.72 -10.65 0.72 -4.13 31 197 2011-02-10 11:38:06 2011-02-10 11:38:06 1 1 123 0 28 151 0 94.60 34 86.50 CHANGED lhhhhh-phl+KKL....sltKpthh...caVNphHchlEhhl..hllhllsh..hhhhhh.c........h.hthhhhhahslhhshRuaMEWKY.s+-oKc...Yllolhthh...hlllhuhhhhhh .................hhhp.hlR++L....sh.K..ptha...caVNphHhhhphhl..hhhalls...hhhh...p........h.h.thlhhhahhhhhshcu.aMEWKY.D+ESKEYllSlhshh...hllhhullhhh....... 0 3 24 25 +13632 PF13790 DUF4182 Domain of unknown function (DUF4182) Eberhardt R re3 Jackhmmer:D3EDS0 Family This protein of unknown function contains a number of highly conserved cysteine residues, which may form disulphide bonds. 26.00 26.00 35.60 35.40 25.90 25.20 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.01 0.72 -4.16 13 277 2011-02-10 11:50:17 2011-02-10 11:50:17 1 1 126 0 21 82 0 38.50 73 89.38 CHANGED MGTIVCQsCssTIsaFE-EKVTsLYGpCsp.C-Ccppcc ..MGTIVCQsCEGTIuHF.E.D.EKsTVLYGKCGo.CcCc+cEc........ 0 3 12 14 +13633 PF13791 Sigma_reg_C Sigma factor regulator C-terminal Eberhardt R re3 Jackhmmer:D3EDV4 Family This family is the C-terminal domain of a sigma factor regulator, this may represent a sensory domain [1]. 25.00 25.00 25.80 27.10 24.80 24.70 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.76 0.71 -4.18 37 457 2011-02-10 11:53:04 2011-02-10 11:53:04 1 6 316 0 49 336 1 146.30 29 45.71 CHANGED plpclspsplsEsulSFD+sYohcE..lpphhspt..........sWhhlcTts-ppppt.t........................ttpshGat....spt.ph....tpp.tp...cpFl....stLchhscppphsphhs............hppphchlpp.pslclhGlllTGpsc-ltpLtspshl+uuslGss ...................................Lpch..pspVsEVAlSFDKsYohpE..lpphlPsshp........lWhh..lsotpc.pcpps.t....................................sttshGFt.........ph.p....p..........t.....p..p..pp...ppFh........ctLc.phspp.pct....p..t.p.............................htpphc....hcp..pslclaGlllTGpocphppLpspsal+uAslGsh................................. 0 8 27 38 +13634 PF13792 Sulfate_tra_GLY Sulfate transporter N-terminal domain with GLY motif Coggill P pcc Jackhmmer:C9JKR6 Domain This domain is found usually at the N-terminus of sulfate-transporter proteins. It carries a highly conserved GLY sequence motif, but the function of the domain is not known. 25.90 25.90 26.80 26.80 25.80 25.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.49 0.72 -4.31 377 6864 2012-10-03 01:44:59 2011-02-10 11:55:28 1 50 2950 0 2486 5940 1281 81.00 30 13.90 CHANGED lP...hl..p.....W......l.p....Ys....p.hl.tsDllAG....lTluhhhlPQulAYA.h.lAG..lP..P.hGLYuuhlssllYulaGoS+plslGPsA.s.h.o.l.l.l.us.s.l ........................h.t........hp.......p.ph...ttDll...AG.................lsVullh.lP..............u........lAa......A..l..luG.........ls...........PhhG...L...Yuuhls.sl....lhu.lhGuSp..thls...........G.Psuuh.ulllssh.h................................... 0 663 1334 2000 +13635 PF13793 Pribosyltran_N N-terminal domain of ribose phosphate pyrophosphokinase Coggill P pcc Jackhmmer:C9JDH0 Domain This family is frequently found N-terminal to the Pribosyltran, Pfam:PF00156. 25.00 25.00 25.10 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.32 0.71 -4.35 429 6720 2011-02-10 13:49:44 2011-02-10 13:49:44 1 18 4765 46 1992 4453 2459 117.60 47 35.76 CHANGED lplFoGsus....pLAccIuctL.........................Gl...Lu..p.......splp+FuDG..........EltVpl..........pE....oVR..GpDVFllQoT.s..........s..............P.lN-sLMELLlhlDAh+RASA.p+ITAVlPYaGYARQDR...................K.sps........RsP.........IoAKL.......VAsllpsAG ..............................................................................h+lFuusus....pL.Ap.cl.App...L....................................................ul..t..LGc......sslp+FSDG..........ElpVpI...............pE.....oV.R....G.p...D.V........FllQST.s....s..............................................P.sNDsL.MELLIMlDAh+R....A.SA..pp..I....o......sV..l..P....YaGYARQ.DR...............................+..s+u........RtP.........IoAK.L.......VAshLpsAG................................................................................................... 0 654 1213 1657 +13636 PF13794 MiaE_2 tRNA-(MS[2]IO[6]A)-hydroxylase (MiaE)-like Coggill P pcc Jackhmmer:A0JYP2 Family \N 21.50 21.50 21.50 21.80 21.40 21.20 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.07 0.71 -4.80 3 288 2012-10-01 21:25:29 2011-02-10 15:50:20 1 1 287 4 74 200 13 176.30 42 79.12 CHANGED sRYccuVVDLLGALAYGELSAFERLAEDARhAPTLsDRAsLA+MAuAEF+HYEhLcDRLAuhGl-sE-AMcPFVAAaDsFHupTcPuDWLESLVKAYVGDolAADFYREVAcaLDsuTRELVLsVLDDTGHouFAcE+VRAAlAuDPRLuuRLALWGRRLLGEALTQAQRVVAERDALuoLIlGG ................................................................t.......h.cLhuhl...AYuElu.AF.RLsc-.u.chAPsLssR...ht...lAsMAuAEh.t.H.act.LRstLsc.R.Gs-s..htAM.pP.asuu.l-sa+t...T......s..P...psW...hEu........LVK.sYVuDuLAuDFYpclAssL...s.sp.s+sl.Vt.u....s....L....s.-..TGpupFss....ucVRuAls...A.c.sp.+uRLuLWuRRLlGEAlopAQhlh.Ac+ppLsshlh..s........................................ 1 24 59 71 +13637 PF13795 HupE_UreJ_2 HupE / UreJ protein Eberhardt R re3 Jackhmmer:A3K636 Family These proteins contain many conserved histidines that may be involved in nickel binding. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -10.78 0.71 -4.73 99 405 2012-10-02 18:22:22 2011-02-11 10:42:20 1 4 310 0 155 468 1125 151.10 36 51.33 CHANGED alhlGhcHll..pGhDHlLFlluLlh........................hhtph+cllhhVTuFTluHSlTLslusLshls.....ls...ut...llE..slIulSIlhtAl-Nl...............hhshh.thphhhshhFGLlHGhGFAshLp.phslspss..hlhsLLuFNlGVElGQlhllshlLhlhhhhpcth ....................................alhlGhcHll..pGhDHlLFLlu..llh.............................hhtph+c.llhhVTh.F.TluHSlTLhl.ushshl.s............ls...sh...llEslIulSIlahA.l-Nl....................thht.ht.tphhsshhFG..LlHGhGFAshLt..-.h..s...ls..p...s..........s..l.lhsLluFNlGVElGQlhhlshlLhlhhhhpp.h............................................. 0 58 109 136 +13638 PF13796 Sensor Putative sensor Eberhardt R re3 Jackhmmer:D3EFN7 Family This family is often found at the N-terminus of proteins containing Pfam:PF07730 and Pfam:PF02518. The N-termini of proteins containing these two domains often function in stimulus sensing. 26.00 26.00 26.60 26.50 25.90 25.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.60 0.71 -4.67 87 465 2011-02-11 10:52:36 2011-02-11 10:52:36 1 7 224 0 205 504 14 172.20 21 46.00 CHANGED allhuhshulsshshlhss.lslus.slslhhl..G.lslLssslhss+shuthERtt.spthh..sh.plstP........st......suhhstltshlp.-suoWRslhahll.phsluhlshhlsssh..hshulshl..shPLhhhhhsss.hs.hh.............hhlsohspuls..hsh...l.Gllhl.hlsha..ls.sslsp....hpuhhs+uLL .....................................allhshshulhh.hs.hhlsh.lshuh..slsh.hhl.G..lPlL.s.h.slh.ssRs.huphERtp.scthh.......uh.pl.st.s..........tt............................suhhtp..lt.thLp.DssoWRslhahhl...phsluhlshhlslsh.hshs.lh..hl......hh....Plhhhhhs........h......................................h.lsshstshh.hhh....h.Gllhh.hhshhls.tshsthpshhspthL................................................. 0 68 162 199 +13639 PF13797 Post_transc_reg Post-transcriptional regulator Eberhardt R re3 Jackhmmer:D3EKT1 Family This family includes post-transcriptional regulators [1]. 25.00 25.00 30.60 30.50 21.80 21.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -9.57 0.72 -4.39 26 293 2011-02-11 10:56:17 2011-02-11 10:56:17 1 1 293 0 38 158 1 86.50 35 87.77 CHANGED phcta+cp.lpPsLpsKh-EF+hLGYcplot-clWsaLhp+KWK+ppt...hpLaEllsDIhslphs-YMsahTlcuhcssshhhsctspc ..........h..p.hppp.LpsslcpKsEEF+hhGYcplsp-DlWpaLpscKWK+tss...lpLaEhlsDlhplpssEaMsYlslpAhpus...hsph-.................... 0 15 28 30 +13640 PF13798 PCYCGC Protein of unknown function with PCYCGC motif Eberhardt R re3 Jackhmmer:D3ELL0 Family This domain contains a PCYCGC motif and four other conserved cysteines. Its function is unknown. 25.00 25.00 30.10 26.80 22.70 22.20 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.17 0.71 -4.54 13 157 2011-02-11 11:09:16 2011-02-11 11:09:16 1 2 138 0 31 109 15 151.10 54 89.50 CHANGED hh.hllshullLuGCussssp-pspscp...........pppssGDIpEpTuSs-pLPoFLcs.ps-slpplYptuupppELL-aIPCYCGCGESAGH+sNhsCFlpEp+cDGslVWDDHGT+CGVCLEIAspShphhpcGKShK-IRphIDEpYKEGYAKPTPTPMP ............................lhu..hlsllSllLuGCGususs-..p.....psucp.pppt.......pto+oppuD.IQEcT..KulDsLPoFL-c...h-spM+cIYslAGpssELL-aIPCYCGC..G.E.SsGHKNN+NCFI+EIKKNGc.V.V.WDoHATsCssCLEIAVESuuM+QcG.K.S.shEIRsaIDsKYKEG.YuKPTPTPMP........... 0 9 22 25 +13641 PF13799 DUF4183 Domain of unknown function (DUF4183) Eberhardt R re3 Jackhmmer:D3EMB4 Family This domain of unknown function contains a highly conserved ING motif. 25.00 25.00 25.10 36.50 24.70 22.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.89 0.72 -4.17 22 220 2011-02-11 11:15:06 2011-02-11 11:15:06 1 6 107 0 31 144 6 77.60 40 52.48 CHANGED sssh+ahaTss-clthhustsIsust...................suYhNLaINGVLQ.tshY.sls...sutLsLpssss..htGsPIllph .................h.suc+hh.TsusGhuthushsIsussh...................hoYhNLaINGlLQssshh.sVo.......TGslTls........ssss..lssGoPIhlEF.. 0 11 21 21 +13642 PF13800 Sigma_reg_N Sigma factor regulator N-terminal Coggill P, Eberhardt R re3 Jackhmmer:O07581 Family This domain is found near the N-terminus of a sigma factor regulator. The N-terminus is responsible for interaction with the sigma factor [1]. 25.00 25.00 25.00 25.70 24.70 24.50 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.22 0.72 -3.96 22 353 2011-02-11 14:42:16 2011-02-11 14:42:16 1 4 247 0 37 251 0 91.50 25 27.43 CHANGED .Kphl++uKhKthlp.h.llslhl.hlllhhhhhhhshl.ah.sp....spch.hpshphhhplopPNshlssp.hp.sphs.hFutshph.sl.KplGscsl. ..........h.ct.h++uK+Kphlphh..llslsh.sllllshhh..h.shhha..sp.....phpcl.p-hhshthplst.PNsphsup..ht.sssp.hFusphph.shhKslsshsl.................. 0 6 20 28 +13643 PF13801 Metal_resist Heavy-metal resistance Eberhardt R re3 Jackhmmer:A3K6W9 Family This is a metal-binding protein which is involved in resistance to heavy-metal ions [1,2]. The protein forms a four-helix hooked hairpin, consisting of two long alpha helices each flanked by a shorter alpha helix. It binds a metal ion in a type-2 like centre [3]. It contains two copies of an LTXXQ motif. 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.29 0.71 -4.14 97 966 2012-10-02 12:34:46 2011-02-14 13:53:14 1 3 826 26 248 1113 79 123.10 29 76.98 CHANGED sSLslNlhllGslsusshph.ssttstt................thht.hhtsLsscppctlppthcstt.t...phpshppph.cpttpplhphltApshDssslpsslsptpptptphppthppthlshhsshss...cpRtt ...............................................................................................th.......hhhul..uhuhss.ss.uhspsup.............................shhp..ps..s..s..sL.....T..sEQpst...h...pclhs-ah..s........ps...s...u.L.....p.....p.....p.......l..hscR........hE.h....s....A....LL.s.As.ssDsu..c.lsAlsc-hcslpppLcchp.lcpslshspshhPct...st..................................... 1 81 156 203 +13644 PF13802 Gal_mutarotas_2 Galactose mutarotase-like Coggill P pcc Jackhmmer:Q8FC64 Domain This family is found N-terminal to glycosyl-hydrolase domains, and appears to be similar to the galactose mutarotase superfamily. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.35 0.72 -4.12 218 2671 2012-10-02 23:57:29 2011-02-14 13:55:17 1 55 1668 60 985 2314 64 65.70 31 7.77 CHANGED sEphYGlGE.+.s.....s.sls....++.....G................pphch...aNt....Ds..hsh.p..ps.......csh.....YpsIPFhlshp.......ss..p.......u.aGlFasNstcshaD ....................EplYGLGE.+..h.....s..sls....+p........G.............................pshch.....aNp...Ds....hs..p...ss....................pth..........YtslPFhh.................ss....p..............u..a....Glahssstps.h............................... 0 358 606 839 +13645 PF13803 DUF4184 Domain of unknown function (DUF4184) Eberhardt R re3 Jackhmmer:D3EE91 Family This domain of unknown function contains several highly conserved histidines. 25.00 25.00 25.20 74.50 24.60 24.80 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.81 0.70 -5.06 38 172 2012-10-01 21:01:47 2011-02-14 14:10:33 1 1 170 0 22 182 3 235.70 41 94.91 CHANGED MPFThSHPAhllPlp+hsh....LshsALllGSMsPDh.YF.....hthp.......tsshuHshhGlhhhsLPlslllhhlaphll+psLhphlPh.hp.phhs.hp.....hh.hcph.l....lhlhShllGshTHllWDuFTHpsGa.hVpthshLpppl..thhs..tlPla+lLQauSollGllhlhhhhh+hhpppspp.t..h......ptKhhhhhhlh...lhuhlhhhhhhhhhs..h.....hhhhsphllshlsuhhhulllsshlh ..MPFT.F.uHPAAVLP.hsK...+p...s.pt...lsloALlLGSMAPDFpYF...........lpF+P..........aGslGHsWhGhlahNLPLshLLAhla+hllKcPhlsaLP+sas.shasa.shs...p..tas..htoh..+pa..h.......VF..shSALhGhlTHVlWDuFTH..psGa.FVhplshLpppl.......h.sIPlYKhhQHGSTslGlllLlahla....+atpc..psp.chhhth.......pcKhpaWhslh...lluhll.hhlashl.s.a.h.....hatlGthIVshlsu.hsuhhlsslla......................................................... 0 7 16 21 +13646 PF13804 HERV-K_env_2 Retro-transcribing viruses envelope glycoprotein Coggill P pcc Jackhmmer:P61569 Domain This family comes from human endogenous retrovirus K envelope glycoproteins. 19.40 19.40 23.30 22.20 18.20 17.10 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.16 0.71 -4.74 16 352 2011-02-14 16:01:45 2011-02-14 16:01:45 1 5 6 0 22 230 0 167.60 89 44.44 CHANGED TPVTWMDNPIEVYVNDSVWVPGPTDDRCPAKPEEEGMMINISIGYRYPPICLGRAPGCLMPAVQNWLVEVPTVSPISRFTYHMVSGMSLRPRVNYLQDFSYQRSLKFRPKGKPCPKEIPKESKNTEVLVWEECVANSAVILQNNEFGTIIDWAPRGQFYHNCSGQTQSCP ..........................................TPVTWMDNPIEVYVNDSVWVPGPTD.DRCPAKPEEEGMMINIS..IGYRYPPICLGR......APG......CLMPAVQNWLVEVPTVSPISRFTYHMVSGMSLR..PRVNYLQDFS.YQRSLKFRPKGKP.CPKEIPKESKNTEVLVWEECVAN..S..AVILQNNEF.GTIIDWAPRGQFYHNCSGQTQSCP............................................................................. 0 17 17 17 +13647 PF13805 Pil1 Eisosome component PIL1 Coggill P pcc Jackhmmer:O74960 Family In the budding yeast, S. cerevisiae, Pil1 and another cytoplasmic protein, Lsp1, together form large immobile assemblies at the plasma membrane that mark sites for endocytosis, called eisosomes. Endocytosis functions to recycle plasma membrane components, to regulate cell-surface expression of signalling receptors and to internalise nutrients in all eukaryotic cells. 27.00 27.00 27.10 27.70 26.30 26.30 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.76 0.70 -5.25 7 285 2012-10-03 12:17:00 2011-02-14 17:38:51 1 8 137 3 215 271 1 249.20 51 65.78 CHANGED MHRTYSLRsSRsPTASQ..LpsPPPPsSoTKotpaFGpsulu.aohR+ssAGuhGP-LuRKLuQLVKhEKNVhRShElsuRER+-sA+QLShWGE-.................sDDDVSDVTDKLGVLIYElGELEDQaID+YDQYRlTLKSIRNIEuSVQPSRDRKpKITDpIA+LKYK-PpSP+IsVLEQELVRAEAESLVAEAQLSNITREKlKAAasYQFDAh+E+uEKhALIAGYGKtLLELLDDoPVTPGEoRPAYDGY-AS+QIIhDAEsALspWsLDsAuVps ............................................................h.stst......................ttsss.s...S.osp.s........hh..sp..t..uhu....pshR........ht.s.t..G...shs.P..-Lu....+KLspLlKhEKslhcu....hEhsu+ERhpsApQLShWG.ps..........................s.D.-.D.VSDlTDKLGVLlaElGELEDpas-+.aDpYRlolKSIRslEuSVQP.SRDRKp..KI....o....DcI....A....p.L.......K.....Y...K..........-.....P.......p....S....s....K.......l.......sV.......LEQE....L..VRAEAEoL.....VAE.......AQLSNITRp.......KlKsAasapFDAlhE+uEKhAlIAsaGKtLL-L..l..DDoP....VsP......GEoRsAYDG..a-s.o+pIl.DAEsuLpsWp.s.s.h....................................................... 0 56 115 183 +13648 PF13806 Rieske_2 Rieske-like [2Fe-2S] domain Coggill P pcc Jackhmmer:Q8FCX0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.21 0.72 -4.31 77 1917 2012-10-02 12:49:59 2011-02-14 17:40:20 1 23 1666 11 493 2846 625 103.40 36 52.55 CHANGED sWpslCslcDlhPsoG..VsALl.s..spQVAlF+s......t.s.s.....plaAlsNhDPhupAsVLSRGllGs.h.tGchsVASPLaKQ+FsLpoGpClE...................sss..hsltsaslRlp.sGpV.Vp ....................................................................WhslC.pl--.lh....P.t....sG.....hs.s..l..l.......s......s.......c..p.....l...AlF+s......pss...........pla.AlsNhsPa...c..u.....ul...L.....S.....c......G.l...........l............u................s.....c........t..............t..............p........................h.......V.....ssPL+KQ+Fc...L.....p.....s......G..t..s.h.-........................................s.-.p...hsVpsYss+.Vc..DGtV.l.h........................................... 0 114 271 403 +13649 PF13807 GNVR G-rich domain on putative tyrosine kinase Coggill P pcc Jackhmmer:Q8FKJ4 Family This domain is found between two families, Wzz, Pfam:PF02706 and CbiA Pfam:PF01656. There is a highly conserved GNVR sequence motif which characterises this domain. The function is not known. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.62 0.72 -4.23 72 3261 2011-02-15 09:40:19 2011-02-15 09:40:19 1 7 1874 0 746 2626 476 78.50 26 13.58 CHANGED pTQQclL...RLpRDVcVspplYspLh.sptQp.hpls.p..AuslGNVRIlDs.A.sstsc..PVKPKKsLlllluhllGhhluluhlllRph ..............................................h....plpRphpstp.plYhtLL.p....+..pp....hpls...p....st.ss....u...s...l.....+..l..l..Ds...A..hs..s....t...Plp...P..++.tL.ll.l.l.u.hlLGlhlu.lshsll+..h......... 0 230 467 612 +13650 PF13808 DDE_Tnp_1_assoc DDE_Tnp_1-associated Coggill P pcc Jackhmmer:Q2EEQ8 Domain This domain is frequently found N-terminal to the transposase, IS family DDE_Tnp_1, Pfam:PF01609 and its relatives. 30.30 30.30 30.30 30.60 30.20 30.00 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.74 0.72 -4.32 95 1944 2011-02-15 10:37:59 2011-02-15 10:37:59 1 5 565 0 251 1556 231 80.30 40 30.01 CHANGED LhpthsplsDsRp.sp.shcasLsslLhlslsAll.uGucuap-Itpaups+...phLpphh..sh..pG.......lPSccThpRVhptlDspthppsatpWhp ...................hphhp.l.D.Rp.th..phcHpLscILhLslsAVI.uGA-uWc-IE-FGcs+h..-aL+pah..sh..cpG.......lPscDTlsRVlu.pl..sstthppthhpah.................................. 0 73 179 221 +13651 PF13809 Tubulin_2 Tubulin like Eberhardt R re3 Jackhmmer:D3EKE8 Family Many of the residues conserved in Tubulin, Pfam:PF00091, are also highly conserved in this family. 25.50 25.50 25.80 26.10 25.30 25.40 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.39 0.70 -12.16 0.70 -5.50 23 225 2012-10-03 12:11:42 2011-02-15 14:23:36 1 7 203 0 66 232 45 326.50 31 31.02 CHANGED llIGLGGTGt-llt+lR+hIhppatt..................sthshlsFLhlDT-ps...........hstp.....thshcphcchhthhsspsssshhpphp.........sastlpp.......WhPsch.p...........shpslcsGAGplRslGRLAF......hspappIpptlpsthc...............h........hthcstssulslalVuSLuGGTGSGhhlDluYplRphh......ttpsshtlsuhllhPs...hasshsss......sphpANuYAALtELsaasshsscapsphstttspph.spps....................PFDhsYLluspNspsth..hshcplhphlApsIa.LphosthustppshccNhhpphtpssststs..................ahsFGhusIthPh-pItshlth+lspphlpha ...............................................................................................................................llIGLGGhGuplhtpl.hcp...l.c...........................-p.cp.lt..hhshDTshs..............................................slsphcc.hh..p..hh..sp.sssuph..pchh....................spP..pIsc..........Wh.Ph-.ph............spslspG..A..G..QlRsluRLuL..................s+l...ss..hhpplcchhs..................................ssucts.phul..p...VhlVoSLAGGTGSGMFLplAhhLRphl......htpcshh..ltuhhlhP-...lhspspsss.......hcplpANG...YAuL+E.Lcthphs...sstt..hs...tp.tthsh..php...hpss.....................................Pas..hsaLhsh....cNh.c..G...ptl...tshsphhc.h....ApsIh.hthhsshus.tph..stp......c..s...hpphtpsss.p..sp...................asshGhuplhhPh-clhphsAh+huhphlp.......................................................................................................... 0 19 47 55 +13652 PF13810 DUF4185 Domain of unknown function (DUF4185) Eberhardt R re3 Jackhmmer:D0L2Z5 Family \N 25.00 25.00 25.00 25.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 316 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.31 0.70 -5.53 52 408 2011-02-15 14:28:20 2011-02-15 14:28:20 1 10 213 0 128 358 13 297.80 29 69.59 CHANGED TGs..........spTsp+aslsGTDLGIhacsssG....phhhsFGDTFu.........ssssGs.....sWRSNllhhSs.spslsc..Gl.........thsusst..ss.sp.Acpll.s.s.p.......sh-h......ThIPTuuIsls....s..spYlphMol.+sWus.s.....W.TNaoslshSsDsGpoWspsststpsss...........sG.sp.....h..suhsp.......scG..aVYhauT.ssuR..susshLuRV...tsplhsh......s.....uYEaWssss.......W..upss...usPllsus..sGElSlpap...t.hG+WlhhYhs.........s..s.tslshRsAssPpGsWo.stslssshp..................hs.p..LYGuYIaPhS.p..s..ttLaaslSpWs.........sYNVhhh+s .................................................................................ps.tcaulsuTDLG.h.h-.sss...........pllhhFGDoFu..........sts.Gt.....tWpssshhpsp..spslss..Gl....................phsusss.........thuppll.s.s.............s....th......ohlPousl.sls..........t.......ppYlphhoh.+s.hss......................hotlshSpD.s.GpsWpshstshpsss.......................st.st....t....h.sshhp....................ssG...alYhhu....o....s........s........R.......s........s.........sshLsRs..s..p.plh..Dh.................s.....papaWsuss.t...................W.....s...pss.....hoPl..h..s......ss.................lG.Eh.Slph.............spalhhYhs.........s...st.....ss...lhhRs...A..s...pPpu..sWu.stslssshp................................hs.t...hYusalpPhS.....h....s...........p.LhhhlS.Ws............sYpVh.hp.................................................................................................................... 0 40 89 115 +13653 PF13811 DUF4186 Domain of unknown function (DUF4186) Eberhardt R re3 Jackhmmer:D0L5G5 Family \N 25.00 25.00 40.50 40.40 17.50 17.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.54 0.71 -4.37 43 607 2011-02-15 14:30:59 2011-02-15 14:30:59 1 3 591 0 60 245 5 107.10 72 88.23 CHANGED ph-plFpRLu+SsFRuRF...+Lstc-psYlpcKGhssIppHApDFlscRLAPAhPsNDGKQTPMR....GHPVFlAQHATATCCRGCLpKWHtIstG+sLotpEQpYlVsVlhpWlpp .....sh.-sLFARLuRSpFRSRF...RLGhKERQYCh-KGAsVI-pHAADFlA+RLAPAhPANDGKQTPMR....G..HPVFIAQHATATCCRGCLAKWHsIPQGhsLSEpQQcYIVuVIa+WL..Vl............ 0 14 31 43 +13654 PF13812 PPR_3 Pentatricopeptide repeat domain Coggill P pcc Jackhmmer:Q9USP3 Repeat This family matches additional variants of the PPR repeat that were not captured by the model for Pfam:PF01535. The exact function is not known. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.43 0.74 -7.99 0.74 -3.22 132 5769 2012-10-11 20:01:04 2011-02-15 14:46:39 1 1960 345 0 4254 55882 730 33.00 18 6.79 CHANGED htsassllpshs..c.tup...hpt....s...hplhpp.Mp.p.p...u......lpP ......................tasslls.s.hu......+..sGp............hcp....................A..........hplhpp..Mt.t.t....t........................................ 0 932 2789 3676 +13655 PF13813 MBOAT_2 Membrane bound O-acyl transferase family Coggill P pcc Jackhmmer:O74380 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -10.18 0.72 -3.91 101 505 2012-10-01 20:09:06 2011-02-16 11:45:15 1 13 129 0 397 1144 83 88.40 25 20.91 CHANGED aP.s.lFGs...h....hpA.....h...o.....lpp.....FWGc.hW..HQhhRt....hhps....h....u....phl....h+t.lht........p..........ht+hhplh...hsFhlSGllHhhsshhhst............pt.....shh....FFhh ...............................PhF.sp.....htu.....p...S.....LpcFWG+..t............W..Hphhpp..................hhps........s....u....hh.......spt.hht...............pt.......................htphstlhssFhlSGl.hHths...hhhsht...........sh.........hh.FFh.h.......................................... 0 152 254 343 +13656 PF13814 Replic_Relax Replication-relaxation Eberhardt R re3 Jackhmmer:D0L6G0 Family This family includes proteins which are essential for plasmid replication [1] and plasmid DNA relaxation [2]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.29 0.71 -4.45 47 432 2012-10-04 14:01:12 2011-02-16 13:12:31 1 3 244 0 81 370 8 165.50 19 62.86 CHANGED Lthl..tcHphLoscQltplhhsstt.........tspppLpcL.pphphl.cphp.................ts.uptshhYhLsttG......tchl............st.ts.........t......tt............pttt...hsss......t.pls.HplslschastLh.tts.+pt...s..hp..ltcWh.....sE.psttt..h.............t..p..h.lpPDuhhthptss.......tthtaalEhDpuTEshtp......ltpKlppYtc.hhptt............tt..........s.hPl....l .................................................................l..hthhshpplhthh...t.h...........sspphLpc.L...p..t......hl.pphhh..........................ttt...hhahL.sppG...hphh..................tt........................................t..hhss...t.plt...HpLhhs-hh..lphh..p.s.....t......................tap..............hEtphthp..h...........................tttt...t..h.lhPDshhhhppp....................hh....lElDpspps.hph.........htcKl.ppYtc.hhp........................................................................................... 0 34 66 73 +13657 PF13815 Dzip-like_N Iguana/Dzip1-like DAZ-interacting protein N-terminal Wood V, Coggill P pcc Wood V Family The DAZ gene-product - Deleted in Azoospermia - and a closely related sequence are required early in germ-cell development in order to maintain germ-cell populations. This family is the N-terminal region that is the only part of the protein in some fungi and lower metazoa. 26.20 26.20 26.40 26.40 26.10 26.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.20 0.71 -4.29 16 230 2011-02-16 13:19:29 2011-02-16 13:19:29 1 6 105 0 130 203 1 116.60 34 17.34 CHANGED FpFpsRp-plDWR+luulDl-+l...s+-hDlsTLQcsl.sIoFsclpsEcssp.....s-shhLKLa+LuQLsIEYLlasQ-hLssp...spL--+lpps.pttcptcpphs+ppc-hphl.KcEs+ ........FpFp..R..p.cslDWR+luulDl.D+l...spchDltsLQ-plt...s.lTF.CsL-sEcssp..........lDPsl.lKL.hRLAQLsIEYLLH..sQ-......hLssp.........lppLcccLptu.tptpphcppltcpsp-l+tl.+cE.+........................................... 0 52 68 93 +13658 PF13816 Dehydratase_hem Haem-containing dehydratase Eberhardt R re3 Jackhmmer:Q76K71 Family This family includes aldoxime dehydratase, EC:4.99.1.5. This is a haem-containing enzyme, which catalyses the dehydration of aldoximes to their corresponding nitrile [1]. It also includes phenylacetaldoxime dehydratase, EC:4.99.1.7. This haem-containing enzyme catalyses the dehydration of Z-phenylacetaldoxime to phenylacetonitrile [2]. The enzyme forms an elliptic beta barrel, composed of eight beta-strands, flanked by alpha-helices [3]. 25.00 25.00 25.90 25.40 24.80 24.60 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.94 0.70 -5.38 33 119 2012-10-02 00:20:33 2011-02-16 13:30:49 1 4 97 20 74 142 8 286.30 33 82.63 CHANGED hAhhGlQhpus.st.........stAhssltphlpt..hssssuPpth.-hsthsDssGhpshlhluYWp-sssacpWhpssshssaWsuhs...tpshGhapElhsssscRaETlaSsp-hh.Gluplhs.shos.hpcHuYWGuhRDRhPhopsDthts.........sspsth.sssstttGRlhls..sh.-NlshIRSGQDWussss-E+chYh-pl-PsLppGMcaLp-putcsGChosRahpshDs....c.ss............h.hccohsluaacsLssLE+Wu+oH.THlsIassFhchs.pshs....thcLRLaHEVuVLcsspthFEYlsCHssTGh ....................................................huhhGlQhpssst..................ssAhpsltphhtt....hstssuPsta.-hsph.sDspGhtshlhluYWps....ssappWhpssshssaWpu.s...t.pps.....hGaapElhssps-RaETlaut.ch.h.Glutlhs.shos.hpcpuYWGuhRDRh.Phops.D.hhts.....................ssp.th..s.sssttsuRlhlt..sh.cNlshIRSGQDWussps.cERphYh-plcPsLppGMsaLcDpGtpsGChusRahpslD...........c.tp...............h.hccohsluaapsLspLE+Wucs.Hs.THlpIFssFhchs.pths.....hpLRLaHEVsVhcstpthaEYlsC+stTGh................ 0 10 29 59 +13659 PF13817 DDE_Tnp_IS66_C IS66 C-terminal element Coggill P pcc Jackhmmer:P39351 Family \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -7.80 0.72 -3.94 114 3268 2011-02-16 13:47:29 2011-02-16 13:47:29 1 17 854 0 386 2560 227 38.70 51 11.84 CHANGED SLIpTAKLNGl-PpAaLp-VLsR.I.ss..a.st.sRlc-LLPWs .......SLIuTC+LNsl-P.EuYLRa...VLsh.l..s-.....W...Ps....N......R....Vs-LLPWp............ 0 81 201 298 +13662 PF13820 Nucleic_acid_bd Putative nucleic acid-binding region Coggill P pcc Jackhmmer:Q14686 Family This is a family of putative nucleic acid-binding proteins. Several members are annotated as being nuclear receptor coactivator 6 proteins but this could not be confirmed. 21.60 21.60 23.20 21.60 20.60 21.00 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.64 0.71 -4.61 6 169 2011-02-17 13:43:59 2011-02-17 13:43:59 1 25 69 0 126 158 0 143.50 40 12.42 CHANGED hSNlaVsspGplc..hhptpLDpL+ppLspLhuscpSplhh+chcha+SlhVcFoIPREssssLRphA-puD.cLhhhGIpS....lQI-u-ssI.slsppushaD..st+p..ssslc.lGsSsRscttps......tShstL.u.sl.ucshsspstsh...ussph ................SslaVsh+Gslc...hp.pLDplhps.lssL.h.p.h...c...........p.Sp............l.Khpthc.apSlhVpFsIPREssthLRhhAppss.pLh.hGlhS......lQI..-uEs...sI.plt.utsh.pshR.........sssht.sGsSs.Rhctths...........tuhstL.th.s..ussh..thtssuS..h................................................. 0 82 87 106 +13663 PF13821 DUF4187 Domain of unknown function (DUF4187) Coggill P pcc Jackhmmer:C6Y4A5 Domain This family is found at the very C-terminus of proteins that carry a G-patch domain, Pfam:PF01585. The domain is short and cysteine-rich. 27.00 27.00 27.10 29.60 26.70 26.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.22 0.72 -4.37 53 314 2011-02-17 15:57:08 2011-02-17 15:57:08 1 13 227 0 230 315 0 67.60 35 14.86 CHANGED --E...............hc-.ptLsspEcLppLlpYLRpcapYCaaCuhpYcspc-l...psCPGss.................................E-DH ...........................pE...sc.-tLssp-+LphllpYLRcpapYCaaCshcY-stc-L...cpCPGss....ptpH..................................................................... 0 65 115 181 +13664 PF13822 ACC_epsilon Acyl-CoA carboxylase epsilon subunit Eberhardt R re3 Jackhmmer:D0L8S8 Family This family includes the epsilon subunits of propionyl-CoA carboxylase, EC:6.4.1.3, and acetyl-CoA carboxylase, EC:6.4.1.2. These enzymes are involved in the biosynthesis of long-chain fatty acids. The epsilon subunit is necessary for an efficient interaction between the alpha and beta subunits of these enzymes [1]. 25.00 25.00 25.20 25.70 24.50 24.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.16 0.72 -3.62 83 404 2011-02-17 16:15:50 2011-02-17 16:15:50 1 1 355 0 109 286 9 60.60 33 71.49 CHANGED pshl+Vl+GsPosEElAALsAVLsuh..uusssssssss..........stWuc.sc......htt..hsuPsuW .......shlpVl+GsPTsEELAALlAVluuh.........uusussstssst...............stWsp.hc......htcshh.u.tua.......................... 0 30 81 105 +13665 PF13823 ADH_N_assoc Alcohol dehydrogenase GroES-associated Eberhardt R re3 Jackhmmer:D3EFL6 Domain This short domain is frequently found at the N-terminus of the alcohol dehydrogenase GroES-like domain, Pfam: PF08240. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 23 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.24 0.72 -6.44 0.72 -4.28 120 1545 2011-02-17 16:20:05 2011-02-17 16:20:05 1 5 1130 2 440 1190 19 22.70 51 5.97 CHANGED MKAlsapGt+cVcV-pVPDPcIp .MKAlsa+GsccVcV-s.VPDPtIp... 0 117 274 373 +13666 PF13824 zf-Mss51 Zinc-finger of mitochondrial splicing suppressor 51 Coggill P pcc Jackhmmer:Q9UTB4 Domain Mss51 regulates the expression of cytochrome oxidase, so this domain is probably DNA-binding. 27.00 27.00 28.00 28.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.04 0.72 -4.11 25 148 2012-10-03 05:12:49 2011-02-17 17:23:16 1 5 144 0 111 149 0 63.70 44 12.10 CHANGED hCPhss......+............................clpapCPcCGlPsaCScEHWccD.EtHtc..hCpsLRp.lNE.s-HDL .................hCPhss...+p..................................tclpapCPcCGlPsaCScEHWtcDhEtHhp.lC-tLRp.lNc.D-HDL...... 0 26 56 93 +13667 PF13825 Paramyxo_PNT Paramyxovirus structural protein V/P N-terminus Coggill P pcc Karlin D Family This family consists of several Paramyxoviridae structural protein P and V sequences [1]. From a structural point of view, P is the best-characterised protein of the replicative complex. P is organised into two moieties that are functionally and structurally distinct: a C-terminal moiety (PCT) and an N-terminal moiety (PNT). PCT is the most conserved in sequence and contains all regions required for virus transcription, whereas PNT, which is poorly conserved, provides several additional functions required for replication [2]. P protein plays a crucial role in the enzyme by positioning L onto the N/RNA template through an interaction with the C-terminal domain of N. Without P, L is not functional. The N, P, and L proteins of SeV and measles and mumps viruses are functionally equivalent. However, sequence identity between proteins from these viruses is limited, and the viruses have been placed in different genera (Respirovirus, Morbilivirus, and Rubulavirus, respectively). SeV P protein (568 aa) is a modular protein with distinct functional domains. The N-terminal part of P (PNT) is a chaperone for N and prevents it from binding to non-viral RNA in the infected cell [3]. 21.70 21.70 22.20 21.70 21.10 20.70 hmmbuild -o /dev/null HMM SEED 309 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -11.67 0.70 -5.21 8 536 2011-02-17 17:37:57 2011-02-17 17:37:57 1 6 59 0 1 461 0 228.70 56 64.32 CHANGED EQAYHVsKGLECIKALRtsPPDh.pIcEs.uhhscsssssspsssTscpEEtDoQslscSCpPAhGSspsutshtcspGsGE..sNssssEhttsPc-.tpsusulpCYaVYDHSGEcVKGIEDADSLlV.uGssusssFpGG-suS--SDsDSGEsDoEGsAsoshGSust.pssRAoDVEplpu-ElptLLRoQppsssth+sGKTLpVPssP-sppussSspPIKKGT-cRSsSaGTthsuu.TuGATQsAhKSsuuSStPuASAGNVhpssoNAchhQcsp.ESGTphSP+opNptES-sEYDDELF .....................................................................EQAhHVppGLECl+AL+tpP.s..tlpEs.uhhsphpssstpptsssh.ppctsspslscsp.sAhGSs..sutsh.ps.GstE..usss..pht.ssts..tsusGlppYaVYD.HSGEtVKGIpDAD...SlhV.uGhsusps..hpsG-sp...-sSs.D.GEs.oEG.A.oshG.u.h.hs.RAuDVphh.ttElptLLRhpppss.h.+.GKTLphP..sP-st..pussppssIKK.G..TctR...ASaGht.Isuh.h............................................................................................ 1 1 1 1 +13668 PF13826 DUF4188 Domain of unknown function (DUF4188) Eberhardt R re3 Jackhmmer:D0L3R6 Family \N 25.00 25.00 25.00 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.77 0.71 -4.00 48 173 2012-10-02 00:20:33 2011-02-18 08:00:56 1 3 167 0 98 217 26 116.20 33 56.30 CHANGED lVVFllGMRlN+hhul.....ppWhslhtAMstMlcE.Ltps.....-.hGhLut.cs.ah.h......csshllQY.WRuhEsLcsaA+..sp.HhtAWcpFsppst.s...s.ssVGIaHETYhl.tGpaEsIYsNM.P.shGL ............................................lVVFhlGhRhN+.huh......tah.lhtuhstMlc-.Ltpst.........chGhLuhp.hh.s............psh.hhlpY.WRuh-sLppaA+..st.Hh..pAWch...aspthps......sspVGIaHETYhl.tGpaEsI.YsNh.P.shGL............... 0 21 61 84 +13669 PF13827 DUF4189 Domain of unknown function (DUF4189) Eberhardt R re3 Jackhmmer:D0L9M9 Family This domain of unknown function contains six well-conserved cysteine residues. 25.00 25.00 25.20 25.50 24.90 24.80 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.15 0.72 -10.90 0.72 -3.75 66 395 2011-02-18 08:10:12 2011-02-18 08:10:12 1 7 227 0 89 316 4 96.50 27 61.71 CHANGED paGAlAh..ssssGs....hGhuhshs.ocppAcptAlppCpt..........t.tsCclhshapNt.CuA..lAhstpstt........thususshp....pApppAlppCtptss...........pCplhh.tsCo ................aGAlAh..sss.us.....hGt.uhs..ts..optpApttAl..p..pCtt.............psC+llsphps........CsA..lA..hststh.................tsususshp....sAcpsAlspstt..............tstl.s.hsCs.......................... 0 14 46 68 +13670 PF13828 DUF4190 Domain of unknown function (DUF4190) Eberhardt R re3 Jackhmmer:D0LA47 Domain This integral membrane domain is functionally uncharacterised. One of the membrane helices contains two GXXG motifs that are usually associated with dimerisation. 35.00 35.00 35.10 35.00 34.90 34.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.00 0.72 -4.42 107 528 2011-02-18 08:18:59 2011-02-18 08:18:59 1 16 318 0 182 498 26 63.40 31 30.28 CHANGED hAIuSLVh.ulhuhhh..........lsullullhGalAhspI+css..ppGcGhAlAGlllGhlslsl...hllh .........hAluuLVh.ulluhhh..........h..lhullullhGhlAhspI++ss.....ppGcGhAlAGlllGhlslllhll.h.............. 0 59 137 166 +13671 PF13829 DUF4191 Domain of unknown function (DUF4191) Eberhardt R re3 Jackhmmer:D0LAZ2 Family \N 25.00 25.00 133.10 129.00 23.80 24.60 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.32 0.70 -5.23 55 434 2011-02-18 08:42:45 2011-02-18 08:42:45 1 1 433 0 112 285 112 229.20 39 91.39 CHANGED s++s.cpp.....pphpQlhpuaphpR+pDspl.l.hhluuhluslslhlllGllhs..th...hhhlllGlh....lGlLsAhhlFuRRsp+usYsplEGQPGAAussL.p.slRt....sWplss.sVAss.....+p..D.....hVHRllG+PGllLVuEGsssRl+sLlspE+K+lsRlls..ssPl..aslhlGsu..EGQVPLpKLp+plhKL..P+s...lsps-lsslspRLpALs.....ssthsl...PKGPhPppA+.t+sshR ........s..csppttphpQlhpsFphpR+pD.pLl.hhlGu.Flssl.sl.hhllG.llhs.....th....ahhlllGlh....lGsLsAhhlFsRRAp+usYs+hEGQsGAAuhALs.slcp.......tWpsos.uVAhT......+p.D........sVHRslGhsGllLlGEGs.ssRl+sLLupE+KRhtRlss..slPl..acIllGs.G.......-G....QVPLcKLc+pltKLP+.s....................lossplsslssRLcALs....htsssslPKGPhPp..s.h.Kh.spsh........... 0 34 82 104 +13672 PF13830 DUF4192 Domain of unknown function (DUF4192) Eberhardt R re3 Jackhmmer:D0LBR8 Family \N 25.40 25.40 26.00 26.10 25.10 25.20 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.13 0.70 -4.78 56 428 2011-02-18 08:58:27 2011-02-18 08:58:27 1 2 346 0 131 346 78 282.00 26 82.83 CHANGED Pu-llAAlPtlLGFhPpc.SlVllsl..................sph.GsshRhDLs.......hst.sptlst.hh...........s...t.....tpscsslsllhss..t.t.s............thpthhptLtpthsst.slsl..hsshhh.plssGphWphh...............s......ssGt..shssssossssA.slhs.Gphs.hssRs-ltAtlt.....s.s.h.tstthstshpttsht.t.ph.......t............th..t..h..chhttsspphspu.............phhsss.....tphutluhsLs-spl.RDthhshsst...csussA.tpLWttluRphsus...h...RusslsLhuhsAaspG-ushAshALstAl.....ps-Ps.ashApLLppALppGlpPc .......................................t-llsslPhlLGFhPpc.SlVhlsh.......................ts...ssth..Gh..VhRhD.Ls........t.ssthsthAt...h.................h.........sssstslslhhsp.....p.tts............................ht.hhts..Lt....psltth..shs..l..hsuh...tl.ssGthWpsh.......................tsss........pts......hc.sssshhsA.ulh...p.G.ht.l...hssRssl.s.hls..................st...t.....tp.sss.htthsttt..th.t......................................tsh..phhhtshshh..tsu..................................pplss........tthsplu....s.ulpcspl.RDhlht.hh.t....................tpAs...t....s..pl......Wttlschh.ss..h.................tstsLsLhuhsuahpG-GshAulslptuh.....thtss.a.....phsthLpphlpsGl.Pt............................................................................................................... 0 42 92 123 +13673 PF13831 PHD_2 PHD-finger Coggill P pcc Jackhmmer:P55198 Family PHD folds into an interleaved type of Zn-finger chelating 2 Zn ions in a similar manner to that of the RING and FYVE domains [2]. Several PHD fingers have been identified as binding modules of methylated histone H3 [3]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.41 0.72 -4.63 28 1358 2012-10-03 17:27:21 2011-02-21 11:26:41 1 92 289 2 819 1377 16 35.50 47 3.52 CHANGED ss.llsCs..pCslpVHtsCYGlsp.sst.....WhCs+Ct ...t.N.llaCD...tCslsVHQ..pC....Y.G...lsh...l...P.p...G..p.........WlC+pC..... 0 223 368 595 +13674 PF13832 zf-HC5HC2H_2 PHD-zinc-finger like domain Coggill P pcc Jackhmmer:P55198 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.48 0.72 -11.31 0.72 -4.06 34 1557 2012-10-03 17:27:21 2011-02-21 11:45:05 1 99 292 0 942 2258 15 113.10 38 10.90 CHANGED scCsLCshcGGAl.KpTs..-.sp..WsHVhCAlh.lPElpFs...sst...phpPl-.lppls.p..R..hchpChhCcpp.........tGACIQCspsp...CtssFHsoCAptAGlhMchc........sh...................h.hhshCt+H ........................................................s.pChLC.......P.......p.......c.......s.........G.......A.......h.........K.......p........T....s...........s.....s.....+............W......s..HVsCAla.....lP....E.V...p.Fu..............ss.t.......................p.hE......P...ls......l.............p.p.IP......s..........R.........a.p..L.......pC.hlC+pp........................sGA.Cl.Q.........Csptp.........C..h.........suFHVTC.A.p.p.u...G.lh..hch.p.....s.t...................th.hhsaC.hH....................................................................... 0 261 429 688 +13675 PF13833 EF-hand_8 EF_hand_6; EF-hand domain pair Coggill P pcc Jackhmmer:B4DPI1 Domain \N 27.00 13.80 27.00 13.80 26.90 13.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.45 0.72 -4.35 49 8724 2012-10-02 16:17:27 2011-02-21 13:12:04 1 472 1753 448 3941 29164 1599 50.10 27 16.73 CHANGED ppGhIoh-sLpcsh.thh..shp.....hscp-.lpshhpthDhDsDGtlshpEFhshhhch ........................................................pG.Istp-Ltphh....pth.........G.p..................................ho..p...p...-...l.....p....p....h....l....p....p........h....D..t.....D....s......s...G..p...l....s..a..p..EFhthh...t.......................... 0 1329 2024 2921 +13676 PF13834 DUF4193 Domain of unknown function (DUF4193) Eberhardt R re3 Jackhmmer:D0LBT3 Family This domain of unknown function contains four conserved cysteines and a conserved histidine, including a CXXXXH motif. 25.00 25.00 25.70 27.20 23.30 17.00 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.46 0.72 -3.83 55 502 2011-02-21 13:14:06 2011-02-21 13:14:06 1 1 430 0 156 293 158 97.70 59 97.30 CHANGED MATDYDAPR+s-.D-lspDSLEEL.KucRs-p.touslD.DEs-sAEuFELPGADLSsEELoVcVlP+QsDEFTCuSCFLV+HRSQLAc.Ecs...GphlCpDCA ...........MATDYDAPR+o-...D-lsE.DSLEEL.Kup..Rs-p.tSusVDsDEsEsAE.sFELPGADLS...sE.ELoVRVlP+QuDEFTCoSCFLV+HRSpLAp...Ecs...GphlCpDCA.... 0 49 105 144 +13677 PF13835 DUF4194 Domain of unknown function (DUF4194) Eberhardt R re3 Jackhmmer:D0LDY0 Family \N 25.00 25.00 25.00 25.00 24.60 23.60 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.73 0.71 -4.81 69 425 2011-02-21 13:18:36 2011-02-21 13:18:36 1 1 392 0 144 392 34 162.10 19 70.16 CHANGED llpLLcsshlpsppp....plWptllcp...pstlcshLsslhLcLllDcstGhAahcph..p.tp.ts.........pLlpR.p..sLoh.pollLlhLRpphtctp...t.ts.spchllsp--lh-tlps...ahs.tssscsphtcclcsslp+lt.chuhlcphcs......-sp.......aclpsllcshlss-hls .....................................tLhpt.hlptpp.......phaphlhcp...pstlcshlsslslcLll....Dcp..tshhalpst...........t.t..................hhpp..p..pLshhpollLlhLRphhp-t.....sh.s..stp..shl..sh-Elhphlps...ahs...pscsthtpcl.cp.sl.pchp.phshlpthcs.........t.csp......htlpPhlthhhssp.l.t................................. 0 48 96 127 +13678 PF13836 DUF4195 Domain of unknown function (DUF4195) Coggill P pcc Jackhmmer:Q8ND82 Domain This family is found at the N-terminus of metazoan proteins that carry PHD-like zinc-finger domains. The function is not known. 27.00 27.00 29.50 31.50 24.50 19.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.20 0.71 -4.58 4 132 2011-02-21 13:42:38 2011-02-21 13:42:38 1 4 32 0 55 149 0 164.30 43 25.84 CHANGED IFVGEISSSKPAISNILNRssPSSpS+GlKNGshs.GIoshFKPTSQ+hpNPsSNPVsA.P.sFHPtS+SS-SSVhVQshSKPsaspNSspVsSssSS.LLFD.TQDo.Lsp.QshPslshsGhspoSal.K+PSTScVNSVNPKKPKsSEslSthssSoslsS.pSPSVsSSQshLSpGsNoSSs ...............IFVG.hSsSK..PslS..NIL.NRssP.uS.S+thKpsthpp..shsthhpPsSpchhsPoSps..Vss.P.....s.psESRSocSsl.h.l...pshSK..P..sahpsSsQVss.ssSSpLh.sh.sp..p.o..ls..pss.s...s...lshsGhscosh.lSKR.sSTS-l.N..SlNPK+PKhS-ulsthsuSush.sS..spS.ohsopQsh.upsssoS.......................................... 0 4 5 9 +13679 PF13837 Myb_DNA-bind_4 Myb/SANT-like DNA-binding domain Coggill P pcc Jackhmmer:Q6P1R3 Domain This presumed domain appears to be related to other Myb/SANT-like DNA binding domains. In particular Pfam:PF10545 seems most related. This family is greatly expanded in plants and appears in several proteins annotated as transposon proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.19 0.72 -3.85 185 1844 2012-10-04 14:01:12 2011-02-21 14:15:41 1 81 168 2 1133 1879 0 91.10 22 25.60 CHANGED pspWscpEsptLl.plh...sp....hc....hpt............pt+st.lWcplupp....h.......t...p.....p.....G.....h...p....R.osp...QC+pKacsLp+pY+pt..+p......t.t....t..tp.s.....p....s....h...a...............................Fcpl-pl ........................................................t..pWs.t.p.EsttLl.phh.......pp....hcttht...................................sst+s.p..hac..tlupt....h.......................t...c.....p........................G..........h.....t.....R.osp...Q.C+pKa.cs.Lp+pY+..+h..+p............t.t........tts............p.....s....h...a................................apphct.............................................................. 0 211 533 829 +13680 PF13838 Clathrin_H_link Clathrin-H-link Coggill P pcc Jackhmmer:Q8NHS4 Domain This short domain is found on clathrins, and often appears on proteins directly downstream from the Clathrin-link domain Pfam:PF09268. 27.00 27.00 27.40 27.20 26.50 25.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.10 0.72 -4.08 51 782 2011-02-21 14:58:18 2011-02-21 14:58:18 1 37 456 9 307 484 4 59.40 63 6.54 CHANGED sLsGA-sLahppFppLhspGpYpcAA+lAAsoPpGhLRTspTIp+Fp..phPstP.Gp.s.PlLQYFuhL .......................sLsGAE-L..FsRKFNsLFAQGsYuEAAKVAAsAPKGl.LRTspTIp+Fp.....slPsp.s.GQsS.PLLQYFGhL.......................... 0 99 161 238 +13681 PF13839 PC-Esterase GDSL/SGNH-like Acyl-Esterase family found in Pmr5 and Cas1p Anantharaman V pcc Manual Family The PC-Esterase family [1] is comprised of Cas1p, the Homo sapiens C7orf58, Arabidopsis thaliana PMR5 and a group of plant freezing resistance/coldacclimatization proteins typified by Arabidopsis thaliana ESKIMO1 [2][3], animal FAM55D proteins, and animal FAM113 proteins. The PC-Esterase family has features that are both similar and different from the canonical GDSL/SGNH superfamily [1]. The members of this family are predicted to have Acyl esterase activity and predicted to modify cell-surface biopolymers such as glycans and glycoproteins [1][3]. The Cas1p protein has a Cas1_AcylT domain, in addition, with the opposing acyltransferase activity [1]. The C7orf58 family has a ATP-Grasp domain fused to the PC-Esterase and is the first identified secreted tubulin-tyrosine ligase like enzyme in eukaryotes [1]. The plant family with PMR5, ESK1, TBL3 etc have a\ N-terminal C rich potential sugar binding domain followed by the PC-Esterase domain [1]. 24.60 24.60 24.60 24.60 24.50 24.50 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -12.05 0.70 -4.58 109 1574 2012-10-02 11:02:24 2011-02-21 15:50:55 1 22 161 0 1001 1544 2 250.40 20 56.82 CHANGED LPRFsupphLchhR.............sKplhFlGDSlsRspapSLlChLppshs.............................sppphhptps........t.hhaphpcassolpahhsPaLVp..p..............hlcl-plp.pts.ptht............ssDlllhNo.spWWhpp.thh..hs.h.............tpp.hpchshhtuactulpohtpalptslsstp............................................................opVhFpoh..oPsHhcsstWtt........u.tp.C.tp.............ts.thpsts.phhchltcshp.........th.psslphlsl.TthsphRp.......DuHsuhYtt.......................................pDChHWCLPGl.DoWN-lLhshlht .................................................................................................................................sphsstphLph.hp..............sKplhFlGDSlsRs...apSl..lC..h..Lppsh........................................................ttt.......tt...........................t.hhhth...t..p.a..s...ho....l.p.a.h...hssaLsp........................................................................hhhcpht....ths..p.tht...............shD.ll.l.h.so.....stWahp..th.................................................................t.....phs.h.ht.u.aphslps..h....hphl...t.t..ph....tt...............................................................sp..l.hh..psh...sPsH....h.p.....st.......h.t..........st...C............................................t....th..t.....h.h..ph.h..p.hht.....................hth.hphl.sl...T....hhsthR........DuH.s..a................................................tDC.HWCls..G..h.DsasplLhthh..h....................................................................................................................................................................................................................... 0 251 600 806 +13682 PF13840 ACT_7 ACT domain Coggill P pcc Jackhmmer:Q8NAP1 Domain The ACT domain is a structural motif of 70-90 amino acids that functions in the control of metabolism, solute transport and signal transduction. They are thus found in a variety of different proteins in a variety of different arrangements [1]. In mammalian phenylalanine hydroxylase the domain forms no contacts but promotes an allosteric effect despite the apparent lack of ligand binding [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.73 0.72 -4.45 192 6068 2012-10-02 00:29:19 2011-02-21 15:51:45 1 38 4053 48 1523 4779 2087 65.50 33 16.45 CHANGED phppshttlplhGs.......hshthsGlhAplsssL.......uptsIslhhlS...oaposalLVppcchppAlpsLcp .....................ppsluhlSlVGs......GM+s....hsG.luA+hFpAL..........ucs.sI......sI.h...hIo..oSEhsIShl.l..sp.cphppAl+sLp........................ 0 466 918 1239 +13683 PF13841 Defensin_beta_2 Beta defensin Coggill P pcc Jackhmmer:Q4QY38 Family The beta defensins are antimicrobial peptides implicated in the resistance of epithelial surfaces to microbial colonisation [1]. 22.20 22.20 22.20 22.20 22.10 22.10 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.01 0.72 -3.59 114 633 2012-10-01 20:50:19 2011-02-21 16:25:20 1 2 61 0 227 564 0 30.50 36 38.63 CHANGED cChpt..tGpCRp.pCppsEhhhth....C..tstphC.C ...pChpt..pGpCRp.pCppsEhhhth....C..tssphC.C...... 0 33 34 39 +13684 PF13842 Tnp_zf-ribbon_2 DDE_Tnp_1-like zinc-ribbon Coggill P pcc Jackhmmer:Q96DM1 Domain This zinc-ribbon domain is frequently found at the C-terminal of proteins derived from transposable elements. 17.60 17.60 17.60 17.60 17.50 17.50 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.38 0.72 -3.54 30 124 2012-10-03 10:42:43 2011-02-21 17:07:58 1 6 55 0 68 155 29 35.10 32 6.86 CHANGED ppCthCtp......pt.tp........opatCppC...s.....hsLChp........C ........................ppCphCtp......pthtp........opahCppC..s.......ssLChp...........C................ 0 25 31 62 +13685 PF13843 DDE_Tnp_1_7 Transposase IS4 Coggill P pcc Jackhmmer:Q96DM1 Domain \N 21.10 21.10 21.20 21.20 21.00 21.00 hmmbuild -o /dev/null HMM SEED 351 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.32 0.70 -12.02 0.70 -5.40 48 1008 2012-10-03 01:22:09 2011-02-21 17:19:41 1 50 126 0 857 1024 6 208.80 14 54.68 CHANGED sPhphFphhhsc..cllppllppTNtthpph.................t.thpssshpElcsFlGlhllhGhh+..hsp....l..p-aWsss...hhu....hshhpssMohpRFptlhpsL+hs-...........s.t...s........tt...Dphtcl+.Llcthspph..pphYsPupplslDEsh.l....ha+.u+hshcphhs.sK.hphGh.chahhs-spouYhhph.....lhp....sp.ssth..t.................p.s......sspllhcLhp.sht.s.ps...+plhhDs..aasSls.LhppLhpp..shhusGslcps+t....thP.....psl..p.........pp...........h...pp...Gphhhths.......ss...l.hhhpapc..scplh.hloo...hps........psh...lptppps...t.........pp.....h...t..pPthlptYspphsu..V-...ptcphht...p.apsst...ps.ppWhppl.hhallshulhNAa ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 377 459 779 +13686 PF13844 Glyco_transf_41 Glycosyl transferase family 41 Eberhardt R re3 Jackhmmer:O15294 Family This family of glycosyltransferases includes O-linked beta-N-acetylglucosamine (O-GlcNAc) transferase, an enzyme which catalyses the addition of O-GlcNAc to serine and threonine residues [1,2]. In addition to its function as an O-GlcNAc transferase, human OGT, Swiss:O15294, also appears to proteolytically cleave the epigenetic cell-cycle regulator HCF-1 [3]. 25.00 25.00 25.20 25.00 24.60 24.80 hmmbuild -o /dev/null HMM SEED 468 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.61 0.70 -6.52 5 1950 2012-10-03 16:42:30 2011-02-22 08:01:16 1 478 581 32 949 2116 1664 192.00 16 40.67 CHANGED LRIGYVSSDFGNHPTSHLMQSIPGMHNR-+VEVFCYALSPDDGTNFRsKlMsESEHFVDLSQIsCNGKAADRIHsDGIHILlNMNGYTKGARNEIFALRPAPIQVMWLGYPGTSGAsFMDYIITDuVTSPlELA-pYSEKLAYMPHTFFIGDHAQMFsHLoERVlVctKssu.c.hDsc..tVlNus-.LcPlL-p.psKchV+-spscuGsclDhs+ocVsLPVl.hsT.sEPlcpMIsoGQIsssl.NGVsVQNGLuT.oQsNsKAATGEEVPpoIlVTTRuQYGLPDDAIVYCNFNQLYKIDPsTLsMWscILK+VPNSVLWLLRFPAsGEsNIppastchGlsssRIVFSNVAAKEEHVRRGQLADVCLDTPLCNGHTTGMDVLWTGTPMVTMPtETLASRVAoSQLsuLGVPELIAKsRpEYpDIAlKLGTDtEYL+pIRAKVWpARsoSTLFNsKQYCp-LEcLahKMW ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 267 562 790 +13687 PF13845 Septum_form Septum formation Eberhardt R re3 Jackhmmer:D0LB95 Family This domain is found in a protein which is predicted to play a role in septum formation during cell division [1]. 25.00 25.00 25.20 25.40 24.40 24.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.95 0.71 -11.74 0.71 -4.80 89 373 2011-02-22 08:02:50 2011-02-22 08:02:50 1 6 246 0 129 352 62 180.70 24 53.38 CHANGED psGsChsh.......................................................................................................................shsupstshs.......................................sVsCspsHssElh.ushslsss............P....utsshsstspctCs.pthpsYsu...st.h....ssslphhahhPoppSWpsG.sR.plsCh ..............................................................s............................................................................................................................................hpststt.s.ts...h.h.h.....sGsClshssss................hstsVsCspsHshEls.usls.....Lssphst..........P.upsp.sshhpstCs.ptsp.sY.lu..........s.th......ssslphhahs.ots.SWs.s.G.sRtVsC................... 0 33 98 123 +13688 PF13846 DUF4196 Domain of unknown function (DUF4196) Wood V, Coggill P pcc Pfam-B_104093 (release 24.0) Domain This is a short region of ccdc82_homologues that is conserved from Schizo. pombe up to humans. The function is not known. 25.00 25.00 25.70 25.70 19.90 18.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.68 0.71 -3.78 4 51 2011-02-22 13:26:22 2011-02-22 13:26:22 1 5 30 0 23 52 0 98.80 59 24.33 CHANGED KcLhNosNSSshEEphscoKHp..DLsDpEKt.GQp-schNK+TGQIlE.ED.E.....-E.lp.s++p+lSS..lh.DSDppcpSD.h.+psu.....hK+.R+Vlps..SS.-.EppsPEc..pshhR+ ..............................cpLhNousuSshEEEhNc.ccp..DL.Dp.EKHh.SQE-sDLNKpTGQIIE..-DlE.....EEpIKpGKRKRlSS..VMhDSD-SD..DSDILVRKVu.....lKRPRRVVEDE.sSSlEME..pcsP..EK..o.AAR............ 0 3 3 7 +13689 PF13847 Methyltransf_31 Methyltransferase domain Coggill P pcc manual Domain This family appears to be have methyltransferase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.08 0.71 -4.62 66 11527 2012-10-10 17:06:42 2011-02-22 13:32:39 1 237 4093 66 3510 72499 23508 157.30 17 50.53 CHANGED hppshplLD.....lGCGsGhhshhlt......hs...puc....lhGlDhopctlphA+p....ptpphshp..slpFht...u-lpp.ls...ht........ppaDl........llsp..................ssl.......thhss.tpslp..............phhclL+ssGhlhhtshs.....................t.......tthppp.p.................................ttpchhph......hppuG ................................................................................h...stplLD..lG.C......G..s...G....h.h.s..h..t..lA.............................p.........ssp................................lh..G....l....D.......h.........o......t........p......h......l......p.....h...A....pp............................p..h....p......t................s.....h.p............s...........l....p...a..h..p........................us...h....p..p......l..s........ht....................ppa..D.l..........................................l...l.s..p...............................................................................................................tsh...........................th.h...t....s..........p....p....h...h..p....................................................p.h....h....p....h....L...p...s...s...G..h..l...h..h....t........................................................................................................................................................htthhhhhh.hh.h...h.h............................................................................................................................................................................................................................................................ 0 1293 2284 3027 +13690 PF13848 Thioredoxin_6 Thioredoxin-like domain Coggill P pcc Jackhmmer:Q96DN0 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -11.22 0.71 -4.48 54 1636 2012-10-03 14:45:55 2011-02-22 13:44:55 1 33 412 18 956 2074 42 177.40 17 37.01 CHANGED Fps..tpptt..hctFpcsApphp.c..htFuhs....ppc....lhpp....hthpt..ssllla..+......t.......cp..pphphstp......h.shsslppalpppphsh.ltchs..c.s.htplhppsh..hlllh...hsp.t.sps.h....p.phppt....l..pplA.pph....p..s...p...lh..Fs.......hscsp...th.sc...hlp.hh.shst..s.chPhlshlc.ts.p...t...hhh....h....ps.p.hss....pplppFlps ...................................................................................tt..hp.ahpsApthp........c......h....Fs...hst........stp................lhpp.............hth....pt...s....s......ll.la......+..............................hcp....t..h.apsp................h..st.pp....l.p....p.a.l.....p.p........p....p..........h..........s......l......l..s......c.....h.s.....t....c.....s.....h.t...p..l....h..p.......s.....s....l................h....l....hhh.......hs.....p....s..sps.h...................p..pht.st..............h....ppsA..cpa...............................+..s..........p...........l......Fs........hhctc.......ph.tc....hh....p........h....sl..pt........t...p...h..Pslhl.hs..tp..p...t....hhh...........h......pp..p...hs.....pplppFhp.................................................................................... 0 328 470 733 +13692 PF13850 ERGIC_N Endoplasmic Reticulum-Golgi Intermediate Compartment (ERGIC) Coggill P pcc Jackhmmer:B8ZZK7 Domain This family is the N-terminal of ERGIC proteins [1], ER-Golgi intermediate compartment clusters, otherwise known as Ervs, and is associated with family COPIIcoated_ERV, Pfam:PF07970. 27.00 27.00 27.30 27.50 26.50 26.60 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.07 0.72 -4.01 79 852 2011-02-22 16:17:28 2011-02-22 16:17:28 1 15 295 0 572 795 24 93.90 31 24.37 CHANGED pL+plDA.as..K.s....s-Dh...ph+.ThuGullTlluhllhlhLhhuEhttY..hsst..hpscLhVDp...s............cuc+LcIslsloFPplPCp...hLolDshDsoG-pphc.l.....p.c ................................l+phDA.as..K.s.....--h....p.p..TtuG..u.h..............lolls.hllhhhLhhsEhttY..h.s.st..hp..clhVDp...s...................pupcl.cINlDlohs.thsCp............hls.lDlh.DhsGcppht...h................................ 0 213 335 474 +13693 PF13851 GAS Growth-arrest specific micro-tubule binding Coggill P pcc Jackhmmer:O95995 Domain This family is the highly conserved central region of a number of metazoan proteins referred to as growth-arrest proteins. In mouse, Gas8 is predominantly a testicular protein, whose expression is developmentally regulated during puberty and spermatogenesis. In humans, it is absent in infertile males who lack the ability to generate gametes. The localisation of Gas8 in the motility apparatus of post-meiotic gametocytes and mature spermatozoa, together with the detection of Gas8 also in cilia at the apical surfaces of epithelial cells lining the pulmonary bronchi and Fallopian tubes suggests that the Gas8 protein may have a role in the functioning of motile cellular appendages [1]. Gas8 is a microtubule-binding protein localised to regions of dynein regulation in mammalian cells. 30.00 30.00 30.00 30.60 29.80 29.80 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.31 0.71 -5.12 31 204 2011-02-22 16:46:04 2011-02-22 16:46:04 1 7 130 0 133 196 5 187.80 36 41.80 CHANGED spLhcpH-pAFp-hKsYYN-ITpsNLplIpoLK--ltph++p-pcsc+hht-lpp-N+cLsEPLppspp-lpcLc+pLppYp+DKtsLppsKs+lpphccclpsLchEp-lLpp+applppER-pLhp+Fcpslp-lpQKsth+NhlLEpKlpslpcpLEp+-spLpcllpsssl-ss.tlptlspclccllpsKNptIcsL .............................pLhppHEcAF.......s-hK...........sYYNDIThNNLsLIpoLK-phpch++..p-p+.-+.ht-lttc.N++Ls-PLpcApc-hp-Lp+pLt..p.Yc.+........DKp..tLtssKs+lp..hh.ccclcsLpWEpElLppRapc................lppE...R--Lhp+F....p....ssl.-lQQKsuhKNllLE+KLpsLpptlEp+-sQLsElLssusl-Ps.slp.l.s.......p+lcclLppKNptIccL........................... 0 57 72 108 +13694 PF13852 DUF4197 Protein of unknown function (DUF4197) Bateman A agb Jackhmmer:C7PG16 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 228 and 249 amino acids in length. 24.90 24.90 24.90 38.90 24.50 24.30 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.68 0.71 -11.04 0.71 -4.78 102 258 2011-02-22 16:53:23 2011-02-22 16:53:23 1 3 244 0 119 277 75 201.50 38 83.83 CHANGED ssusLossphssG.LKcALphGsppAVspLupp...sGFhsNstl+I.LPppLpcssphL+pl..Ghu...phsDchhhshNRAAEsAsscApslhhsAl+pMolsDA+sILp.Gu.-sAATpYhcppTpspLtspFtPllppuhsclGssphasslh.........................s..ph................sslsh....spp....hss.sLssYVTpcAL-GLFthlApEEppIRpsP.supsosLL++VFG ......................sh.ssLops-hsuG.LK-ALspGsptAVspLups...sGFhsNstV+I.LP..sp..Lp.cssphl+ph.....GhG.........s.sDpl.tuhNRAAEsAVspAp.slhlcA.l+pMols.DA+sILp.Gu.csAATpYLcppopppLtscFhPlVcpuhsc.....VG.....hsptYsshs......................s.ph...................sslsh...........lss..hsssLpsYVTpcAL-GLFphlAccEppIRpsP.supsTsLL+KVFG..................................... 0 47 98 113 +13695 PF13853 7tm_4 Olfactory receptor Coggill P pcc Jackhmmer:Q9H342 Family The members of this family are transmembrane olfactory receptors. 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.99 0.71 -4.39 118 25558 2012-10-03 04:04:29 2011-02-23 09:06:22 1 29 180 0 14828 27096 3 143.20 33 48.90 CHANGED sssthhsuus..hsuGhLp.ul.l.HT..us.TF...sLshC...p..s.s...c...p.Fhs-hPt.lLK.LSCS-o.al.pEl..slllhus.hlshssFlhIllSYlpIhpAVLRIPSppGR+.KAFSTChsHLsVVSLFhuTshFsYL+P.s......Shu..SsshDlll....oVLYoVVPP ...........................................................................................................hChh.L.s..s.s.u..a..h.h..G..h..l..t..u.l...h..p.s.......h.h...hh......pL.s..FCt........sN...h.I.....s..H.Fa.C..-..hs........sl.l..+....L.u.C..u..D..T....t.l...scl...........hh.....h...h...h....u.s.....h..h.hhh..........s..h.h.h.I..l..h.S.Y.s...h.I....l.t....s.l.......L..+...l...t......S...s.....p.......G..R.p....KA.F....S.TCu.S...HL.s.VV.s.lF....YGss.l...h..h.Y.lpP..p.......u.s.p..............s.........p....h....cplh......ulhYollsP....................................................................................................... 0 416 451 1888 +13696 PF13854 Kelch_5 Kelch motif Coggill P pcc Jackhmmer:A8MU55 Repeat The kelch motif was initially discovered in Kelch (Swiss:Q04652). In this protein there are six copies of the motif. It has been shown that Swiss:Q04652 is related to Galactose Oxidase [1] for which a structure has been solved [2]. The kelch motif forms a beta sheet. Several of these sheets associate to form a beta propeller structure [3] as found in Pfam:PF00064, Pfam:PF00400 and Pfam:PF00415. 27.00 20.00 27.00 20.00 26.90 19.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.56 0.72 -8.12 0.72 -3.94 78 1509 2012-10-05 17:30:43 2011-02-23 09:06:54 1 435 312 0 1094 6146 187 42.50 25 6.40 CHANGED hP.ss..ptp.tss.s.....h.s.....s.............tp......lal...aGGhs...........psps..h..s.cla...lL...slss....h ..................................................P..s..Rhs.Hss.s.......................s.h.......s................sp......................l.al........aG..G..hs.............................psth..........h......s.D...la.....hh..sh...................................... 0 445 651 900 +13697 PF13855 LRR_8 Leucine rich repeat Coggill P pcc Jackhmer:JCS-Target417241 Repeat \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.36 0.72 -4.21 93 70869 2012-10-02 21:32:02 2011-02-23 12:42:34 1 6305 1468 377 38112 83275 1324 59.50 28 22.98 CHANGED spLppLpLs..p.N.plp.tl..sp.ssFp.sl...spLp.hLsLs.tNpl..p..sl..ss.p.s...hpsLtpLptLpLs.s.N.p.l .....................................tLptLp.Ls.......p..N...pl....p...pl...........ss...t..s..Fp....s...L..........sp...L..p...t...L.....s.....L.....s...p......N.....p...l....p........sl......ss...t..s...............h..p...s...L.....p.....p...L..p..t..L..pLs..t.Npl...................................... 0 11072 17914 27457 +13698 PF13856 Gifsy-2 ATP-binding sugar transporter from pro-phage Coggill P pcc pdb_2pp6 Domain Members of this short family are putative ATP-binding sugar transporter-like protein. 22.90 22.90 22.90 23.00 22.70 22.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.98 0.72 -3.85 23 319 2012-10-01 22:58:23 2011-02-23 17:08:29 1 1 266 1 22 351 15 88.50 45 95.50 CHANGED hsssFDphhupsDssIh..phMGpphpIsG.....tshhuVhD-s.s.ht..............slpGsuhoLslaoushht.P++sDpl..shsGcpahVsRhph.s.GphhlhL .......................s.Fpthh.tcMDslTl...+c..MG.+.pAsINs.......sshs.VlPsEphAEh.s..............ALSGs...ulSLVV..F....Sus.YR...P+RGDpV..Vac......G..pp....aTVTRa-paN..GKPhIal................. 0 5 9 17 +13699 PF13857 Ank_5 Ankyrin repeats (many copies) Coggill P pcc Jackhmmer:P42771 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.76 0.72 -3.57 81 5157 2012-10-02 12:10:21 2011-02-24 13:31:59 1 1642 768 6 3250 58643 3660 54.20 29 7.93 CHANGED Llpps...shtt.sttt....ssLthAsp.tsphphlphLl....t.thshphpstps.tsslchA ............................................t...ss.hst.p..s...p.p.Gp...............TsLHh...A.u.....t..p.....G....p....h....c......l....lchLL............pt..G.A.c..h....s.h..psp.pG..pTsLphA............................. 0 1425 1883 2549 +13700 PF13858 DUF4199 Protein of unknown function (DUF4199) Bateman A agb Jackhmmer:C7PPF0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 167 and 182 amino acids in length. 27.00 27.00 27.10 28.80 26.60 26.20 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.20 0.71 -4.32 97 280 2011-02-24 17:30:07 2011-02-24 17:30:07 1 1 202 0 108 296 198 157.40 18 90.11 CHANGED paGhhhGlhhhl..hhllhhhhh....s.ths.t.hh......shh..hllslshh...hhul+pa+pphhsG.hhoatpuhthGhhhshluullhslhphlahphl..sPsahpphhptt..t...t....ht.s.t.hp.t..htth...ht..t..t..............hthhh.shhtslhhGhllollhuh.lh+..pp ........................auhhhGhhhhl.hhlltahhh.h.....shphshhshl.....shh...hllslhhh...........hhtl+paRp..p..h.hsG..hloatcuhhhuhhhhhhuullsslhpala..hphl..-s.sahtphhpth.hp............t....htts.thhp..tth.pth.....th..p.hps.......................hphsh..hhhtslhhGhllulhhuh.hh++....................................... 0 50 93 107 +13701 PF13859 BNR_3 BNR repeat-like domain Coggill P pcc Jackhmmer:Q99519 Domain This family of proteins contains BNR-like repeats suggesting these proteins may act as sialidases. 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.31 0.70 -5.36 201 1305 2012-10-02 00:45:24 2011-02-25 12:47:17 1 12 39 1 486 2085 72 273.70 35 44.51 CHANGED sPuLlcV.suDVFAVAEAQhp.pp.........................ps...s....s............t...........................st...h....u..htp...........tt....tpt.p....p................p.....p.p.t.hh.cs.........s.......................................p...pp......t....+..............Vs..ls.RPTsllc...Gs..DIYML.............s.....Gp...Y.S.............pp...s...................us.....s.......................sh.s...lh.....................Ls............c........us.....s....s..sp......p.......Wp........................-spsls.......psh.............t..p..........................p......h............LlGGG....GSGVKMc....DG.TLVFP............VE............GT..........KK.p..sstt..........sVSL...IIY.S..pc.....sp...sWpLSKGMSs...........sGCSDPSVVEW...c-...t.KLMMMTACDDG....RRRVYESGDKG-SWTE.......ALGTLS.........RV.WGN..............p............hpushlsut............ss.-....pRs.....VMLVTLPVa......ttst......spc........KGcLHLWL......T..DNTHIVDIGPV.s..pD..D..D.sAASSL.LYKSup........s.sspcEELIALYE ..............................................................................................................................................................................PuLlps..sus.lh.AhA-uphttt.......................tt...t..................................h.hs.t..................s.t.t...p............pt.......h...tt..t...........................................................................................................................................................t.t.t.p...............s.t..l.....p..PT..s.h..sp.....ss.........pla...h...L..............s....Gp....a.s..........................ppt.................ttt..............................................................ph.slh...............ls...........c..........sp....s..tt....p...........Wp......ps..ts.l......t................p...................................................p.......t........h.lu..uG.G.SG...l.h.hp.......D.G....TL....V..FPlp............ut.............ppt...tt..................h..Sh....lha...o.p.s.......s..p...s...W..p...l.o....p....s.ho.s.......................sG....C..t...sP.sl.sE..W.......c.....t....c.....L.h....M.h.........s..s....C.c...s..G...........p....p+.V..Y..E.S.s.DhGpoWT..E.....Al..G..T.Lo..+V..Wu..s...pp............t.tuthlpu.............ss.-.t+pV.ML.hT..sh.......tpt......tpt.....cstL.aLWl......T.DN.....s+.hhslGP.....l.....ts...-...s.s.s....A...S..o.LLYpsst................hh....................................................................................................................................................... 0 14 25 486 +13702 PF13860 FlgD_ig FlgD Ig-like domain Bateman A agb Bateman A Domain This domains has an immunoglobulin like beta sandwich fold. It is found in the FlgD protein the flagellar hook capping protein. THe structure for this domain shows that it is inserted within a TUDOR like beta barrel domain [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.80 0.72 -4.24 102 1909 2012-10-03 16:25:20 2011-02-25 14:24:35 1 97 1529 5 465 1365 1800 80.40 26 23.53 CHANGED usth..........hsss............tsshp..Lsssusss..plpltDs.sGplV..pshsl...sspsu..GthsasWD.GpsssGp.....................tlssGp.Yslpl..........pu.psss ..........................sttsss.........sss.....tshslp..L.tp.su..s....p..l....slp.IpDp.sGplV........+T.l.s.l............G.s.h..........su....G.s..t......s.....Fs.....WD....Gp.s.ssGs...................................sh..s.s..Gs..Yslplsusst.t................................. 0 162 299 386 +13703 PF13861 FLgD_tudor FlgD Tudor-like domain Bateman A agb Bateman A Domain This domain has a tudor domain-like beta barrel fold. It is found in the FlgD protein the flagellar hook capping protein. The structure for this domain shows that it contains a nested Ig-like domain within it [1]. However in some firmicute proteins this inserted domain is absent such as Q67K21. 26.90 26.90 26.90 26.90 26.80 26.80 hmmbuild -o /dev/null --hand HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.00 0.72 -3.97 107 1425 2011-02-25 14:57:41 2011-02-25 14:57:41 1 6 1242 5 295 834 103 141.10 27 59.41 CHANGED sspshpuuuhlG+pVhssus.th..........tsss..................tsshp..lsssusss....plpltDu.sGplV..pshsl...sspsu..G.hsasWD.....GpsssGp....................tlssGp.Yslpl..........pu.psss.....tthshsshstupVsuVphs..u.sss.hLsl.ssst.....lshsslpplt .............................ssQuLQAosLIG+sVhlsGsslhhssssptsussthsstssphttssspsssslssp...........................................................sluphsuGspshsWsGppssGsssssGpYshshsusssGsphsspshthuhVpuVhhs....u.sss.hLsL.Gshut....ssLscVppl.h............. 0 63 154 224 +13704 PF13862 BCIP p21-C-terminal region-binding protein Coggill P pcc Jackhmmer:Q9P287 Family This family of p21-binding proteins is important as a modulator of p21 activity. The domain binds the C-terminal region of p21 in a ternary complex with CDK2, which results in inhibition of the kinase activity of CDK2. 20.70 20.70 21.50 25.50 20.40 19.50 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -11.30 0.71 -4.80 87 360 2011-02-28 11:44:46 2011-02-28 11:44:46 1 3 298 0 238 367 11 198.30 28 65.46 CHANGED -h..lsVDF-hasss.s.sDaHulKp..LLpQLhs...s...sp..hsl.up.........LuDlIlsQ......s...s...lGoslKs............................Ds..c....-.......s.-sau.hlollNlppp.................p...s..p.....slcpLtcalhp+sp................sps.hh....ppLpplLs.............s.s..sp....p......lGLllsERhlNhPspllPPhaphLh-ElptAtp........cp...............c.aphs.aaLllo+sY...........................cp.................tp....ttp.ptt..ttp...pp.................................................sE.......hhah+sEDEhht .............................-.VslDF-has.p.s.sDacGlKp..LLpQLa....p.....s..hsh.up........................LsDLIlpQ.......sp...lGoslK....................................Ds..c-...............s-s...au.hlolLNL......ppp.......................+...s..p.....slpplpcalhppsp.......................pps.hh....ppLpplLs...............................ss....sp....plGLlloERhlNhPsplssPhap.Lh.cElptAhp........sp....................csaphs.aaLllo+sa......................................pc.....................tt.....ttp.p.pt..ppppp............................................pc.....hhahpsE-Ehh................................................................................ 0 82 134 194 +13705 PF13863 DUF4200 Domain of unknown function (DUF4200) Coggill P pcc Jackhmmer:A6NFT4 Family This family is found in eukaryotes. It is a coiled-coil domain of unknwon function. 28.90 28.90 28.90 28.90 28.70 28.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.52 0.71 -4.18 69 384 2011-02-28 13:40:14 2011-02-28 13:40:14 1 8 119 0 261 380 4 125.20 24 29.05 CHANGED phlcp+REhhphpptlcpp+.cch...pcppcthpp+ccplpcpppcLpcphh.caspFlp-scsK+pcA.+chppEpptcpphpp-lpclppplspLpppppclcpp.lpphp.YpcaLcp.........Vlp.......tss.......p.......app ......................................hlpc+R-hhhhphsLptK+.pch........p+hppthpp+ccpLppp-ppLccchh..pF-cF...lc....-N-p+phcAh+p....ApcEpct+tp+ptElcclptplptlc.schp+hcpp.lpch.........phYpcaLpp....ls.p..................................... 0 133 157 201 +13706 PF13864 Enkurin Calmodulin-binding Coggill P pcc Jackhmmer:Q8TC29 Family This is a family of apparent calmodulin-binding proteins found at high levels in the testis and vomeronasal organ and at lower levels in certain other tissues [1]. Enkurin is a scaffold protein that binds PI3 kinase to sperm transient receptor potential (canonical) (TRPC) channels. The mammalian transient receptor potential (canonical) channels are the primary candidates for the Ca(2+) entry pathway activated by the hormones, growth factors, and neurotransmitters that exert their effect through activation of PLC [2]. Calmodulin binds to the C-terminus of all TRPC channels, and dissociation of calmodulin from TRPC4 results in profound activation of the channel [3]. 23.00 23.00 23.00 23.70 22.70 22.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.26 0.72 -3.79 54 314 2011-02-28 14:56:46 2011-02-28 14:56:46 1 5 133 0 205 315 3 96.40 26 32.90 CHANGED G...clPpYLpcpKc.chtcp..pc......phcp.h.tc.p......ts......suhphlsEcERhphLpsL+pphcplppchp.plshh..hD.Thst+pRKpclEccLpplEcsIchhs+spV ....................................................GplPpYLh.cp.+c.phpct..pc................th.pp....tc.t............t.........................suhphls-pERtphL...psL+pphppltp-hp.plshh..hD..Tltt+p+KtclEccLpplEcsIphhp+.p.................. 0 92 109 161 +13707 PF13865 FoP_duplication C-terminal duplication domain of Friend of PRMT1 Coggill P pcc Jackhmmer:Q5T7Y9 Family Fop, or Friend of Prmt1, proteins are conserved from fungi and plants to vertebrates. There is little that is actually conserved except for this C-terminal LDXXLDAYM region where X is any amino acid). The Fop proteins themselves are nuclear proteins localised to regions with low levels of DAPI, with a punctate/speckle-like distribution. Fop is a chromatin-associated protein and it colocalises with facultative heterochromatin. It is is critical for oestrogen-dependent gene activation [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.19 0.72 -3.54 26 408 2011-02-28 15:48:16 2011-02-28 15:48:16 1 13 203 0 273 388 0 70.80 26 28.25 CHANGED ss.............tssptGphp..tsptsttG+sRu+ssRpspsttt...tsthopE-LDtELDpYM.......ussKscLDt-L-sYhpttt.p ..............................................................................................ttstt......tGtht.....tupsstsupupu+....utpt...s.p.uttpt.....c.thosE-LDs-LDsYh...........st...................t..................... 0 89 131 201 +13708 PF13866 zf-SAP30 SAP30 zinc-finger Coggill P pcc Jackhmmer:Q9HAJ7 Domain SAP30 is a subunit of the histone deacetylase complex, and this domain is a zinc-finger. Solution of the structure shows a novel fold comprising two beta-strands and two alpha-helices with the zinc organising centre showing remote resemblance to the treble clef motif. In silico analysis of the structure revealed a highly conserved surface dominated by basic residues. NMR-based analysis of potential ligands for the SAP30 zn-finger motif indicated a strong preference for nucleic acid substrates. The zinc-finger of SAP3 probably functions as a double-stranded DNA-binding motif, thereby expanding the known functions of both SAP30 and the mammalian Sin3 co-repressor complex [1]. 25.00 25.00 29.30 28.60 19.40 17.10 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.86 0.72 -4.33 5 123 2011-02-28 17:15:49 2011-02-28 17:15:49 1 2 78 2 78 124 0 71.50 65 39.79 CHANGED PuAAGPtsGQlCCL..RE-GE..RCGRPAGNASFSKRIQKSISQKKVKI-LDKosRHLYICDFHKNLIQSVRN+RKRKsSD.D ...........................h.sQ.CCL......l-DGE..RCsRsAGNASaSKRIQKoloQK.KLKLslD+SsR.HLYICDaHKNhIQSVRNK.RKRKsSDDt.................. 0 18 23 45 +13709 PF13867 SAP30_Sin3_bdg Sin3 binding region of histone deacetylase complex subunit SAP30 Coggill P pcc Jackhmmer:Q9HAJ7 Family This C-terminal domain of the SAP30 proteins appears to be the binding region for Sin3. 22.20 22.20 22.90 22.20 21.50 20.20 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.05 0.72 -8.56 0.72 -4.09 36 333 2011-03-01 09:59:32 2011-03-01 09:59:32 1 8 234 1 224 313 0 51.20 37 25.43 CHANGED LpsssL......+RYp+.taplstt...sssK.pQLspsVt+HFpots.lsEp-sIstFlhtl+ .....................LphssL......pRYp+..capltsps...shsK.sQLspsVp+HF.ps.h..s..lsE.p-slshFlYtVK................ 0 62 112 168 +13710 PF13868 Trichoplein Tumour suppressor, Mitostatin Coggill P pcc Jackhmmer:Q8NEH6 Family Trichoplein or mitostatin, was first defined as a meiosis-specific nuclear structural protein. It has since been linked with mitochondrial movement. It is associated with the mitochondrial outer membrane, and over-expression leads to reduction in mitochondrial motility whereas lack of it enhances mitochondrial movement. The activity appears to be mediated through binding the mitochondria to the actin intermediate filaments (IFs) [1]. 27.00 27.00 27.50 27.50 26.70 26.70 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -12.75 0.70 -6.05 68 564 2011-03-01 11:50:10 2011-03-01 11:50:10 1 11 126 0 380 548 18 306.30 19 64.55 CHANGED -pp-El+pLppplptAtssp.RssQltE+cthctcpp.cc-pphsphhct-+p+slpppcccEppctpcptctpptlppQl.cE+cpp+tpptcphtpEcphhpphhcplp-E-ptct.tc+pc+ppphtc-lpchpcpptphKctpcpppccE-p+.lhcahppppc+ccphppcppct.ccc.+-chhp+Ltpp.pctpccpp-h-pLptchhtEEtccct....Rp+-pp-tcc+tchpp-lppscppQhptKppphptptpc-cpphpchlp.phtcpcch-phptpc++tpp.catcplpptlpp+cpp+pppcppphp-tpphp...cppppcpphl-ptR.p+hLpchtsp..shhsK ..................................pppcl+phpphlhtspsht.RssQlt-+p.thptp...pt.pc-p.thtph.hctcpppthpcppppcppcppcph....phtptltpQl.pcp.cpp.+t.tt....tcp.pppEtp.hpp.hhcph.p.t....E-.ppct..tp+t.ppppphpptlpch..pppt..ph+pp...ptpppppt-p+.lhph.h..tpc..pp....c...cp.ph....p....t.cppch....ppp..ppth.h.p....t.l..tp.t..p.c..p.pp.p.t.ch-plht..chhtcphpcc..........cpct...p...p...ctp.p+.chhpc.lhpsctpQht.Kt...pt...h...ttpt...tc...ptt.....hpphhp....t.pp.tpc.cp.ptpcpp...tht.phtptlppp....lppppppt.tppptphpch...p.t.t....p..ttt..pthl..pp.h.................................................... 0 166 202 285 +13711 PF13869 NUDIX_2 Nucleotide hydrolase Coggill P pcc Jackhmmer:O43809 Domain Nudix hydrolases are found in all classes of organism and hydrolyse a wide range of organic pyrophosphates, including nucleoside di- and triphosphates, di-nucleoside and diphospho-inositol polyphosphates, nucleotide sugars and RNA caps, with varying degrees of substrate specificity. 27.00 27.00 27.40 27.00 26.80 26.30 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.12 0.71 -4.97 25 336 2012-10-02 00:00:35 2011-03-01 14:52:41 1 6 247 29 220 316 2 175.50 52 74.87 CHANGED pslplYPL.oNYsFusK....-shhc.KD...tolscRhpRh+ssYpppGh.RpoVcullLVHcasaPHlLLLQ.l...sss....h.....aKLPGG+L+sGEs-.l-GLKRKLscpLu..............ss......c..sh...................pssWclG-slGpWWRPsFEs.hYPYlPsHIT+PKEppKLalVpLPc+stFtVPKNh+LlAlPLF-LasN.tpYGshISulPplLSRF ............................................p.slplYPL.oNYTFGoK....Es..E.cD.........sSVsuRhpRhc-cacc..hGM..RRoVEGVLlVHEHshPHlLLLQ.l...uss......F....................FKLPGG.cLpsGE-...............E.lEGLKR.hLs-pLu...............t.....s....uh...............................tp-W.ls.DslupWWRPNFE.s.....YPYlPsHl..T..+..PK......EpKKL..a...lVpLsc+.........thhu.V......P.K.NhKLlAsPLFELYDNsttYGP.lSulPplLSRa......................... 0 77 118 173 +13712 PF13870 DUF4201 Domain of unknown function (DUF4201) Coggill P pcc Jackhmmer:Q2M329 Family This is a family of coiled-coil proteins from eukaryotes. The function is not known. 27.00 27.00 27.00 27.30 26.90 26.60 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -10.97 0.71 -4.83 56 290 2011-03-01 17:10:52 2011-03-01 17:10:52 1 9 106 0 186 284 10 165.80 24 35.01 CHANGED hppp.csplschRLphlphpcplschppplcphEplu-s...LphhDF.plph-stsLscKIEERNp-Lt+L+pphspslphlsHh+cKhp.......hlppphpph+ppLpctpcphpclRcplhpschc+s+lcppppc..L+ppuulhphPsLhtD.Y-pphpplpphccslcpL+c..phcplptplpt ..................tppcp.hschRLc.hplcppht+hptplcpp-plu.-s...LphlDFpQL.cI-Npphtc+IEER.N.....pELhcL+.tp........sspsl............phlsph+..............cKhp................hhtpc.pphcpcltptpc.ltchcccltpsctp+pphcppppc.L.cpptuh...h..th......Pplh.-.a.pphtphtthcpplpthcc..chp.hphph..t............................ 0 83 104 145 +13713 PF13871 Helicase_C_4 Helicase_C-like Coggill P pcc Jackhmmer:Q9Y2G9 Domain Strawberry notch proteins carry DExD/H-box groups and Helicase_C domains. These proteins promote the expression of diverse targets, potentially through interactions with transcriptional activator or repressor complexes [1]. 27.00 27.00 27.20 27.30 26.70 26.90 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.83 0.70 -11.82 0.70 -4.98 47 482 2012-10-05 12:31:09 2011-03-02 11:29:27 1 25 252 0 257 484 56 240.40 40 21.03 CHANGED ssLDpllpchG..sc.tVAElTGRptRl...Vpp.p..sG....p.hthcpRsss...........sNlsEppsFMsGcKtlhlhS-AGuT.....GhShHADhpstN.Q..RRRVHhhLEhsWpADpAIQthGRTHRoNQsssPhac.lsTcltGE+RFhSolA+RL-oLGALT+GpRpsusp.h...................h.up.Nl-osYu+p.ALcphap...tlhtsp....h.............................h................tFhpch.puLp.l.l....................ps.psG.......shp.-.....c......s..sIspFLNRlLuLslchQNsLFphFpphlpshlcpA+tsGp..aDhGlps ..................................................sLDpllpphG..s-.tVAEh.....T.GRptRl....Vpp.s....cG....................p.lhh-sRstt.......................hNhtEpptFMsG.c..K..........tlhIh.S.-Auuo.....GhShpA......Dtps....pN...Q....R+RVH................hhLEhsWpADpAIQth...GRTHRoNQss..s.P..ah..hlho-l.tGE......+.RFsShlA....+RL-o..LGALT..+GpRcsstp...h...................h.sp..Nh-spYu+p..ALcthhp...tlhttp....h................................................................................pFh.p.ch.tuLhtltl..............................t.pss..........hhp..-.....c.........sIspFLNRlLuhtlchQNt.LFphFtphlsthlptA+ttGp..aDhGl....................................................................... 0 88 144 201 +13714 PF13872 AAA_34 P-loop containing NTP hydrolase pore-1 Coggill P pcc Jackhmmer:Q9Y2G9 Domain \N 27.00 27.00 27.10 28.30 24.30 23.30 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.09 0.70 -5.65 39 492 2012-10-05 12:31:09 2011-03-02 11:38:16 1 21 227 0 281 484 52 270.10 45 24.25 CHANGED utsHP.ssLVESuuMASVuPPtso....Yp...pLPstlls...sGhLSssQLEoVlYAupAHsthLsGta..tt.............t..th..RpGFaLGDGTGsGKGRQlAGIIhDNalpGR+RAlW.lS+SssLlEDAcRDWpslGut..thplhsLu+............................a+.u......cs......s......h......pc......GlLFsTYuTLRus.....t.t....ttctSRLcQllcWhG.p-.............FDG.VIlFDEsHthtN......A......u......uut.........tsSpQGhAGLcLQptLPcARVlYsSATGAoclcNLAYhsRLGLWG.ts.t..PFsstpsFlsAl-pGGlAAMEllARDLKAhGLYlARuLSFcGVEa-llEppLTp-QlplYDsa .......................................................h...HP.s.lVEouu...h.u..........SVsPPc.t....Yp.....tlst.....thhp..pGhL.SshQLEslhYAspt.Hpt.hL.ssu...........................................R...tGFhlGDGsGlGKGRplA....GlIh-Nal..pG.R.+..+A.............lW.hSh.....SscLh....D.ApRDhp......slG...up....pl.lpsLs+..............................................................hc.u.......c..........hs......h......pc.....G..l...lFsTYu..sLhup.......t.....................tphpoRlpQll....pWhG...pc................................FDG..V...I.lFDE.....sHphpN.....h...s.stt....................tsoppGhAsLcLQ..............ptL.PpAR.V.VYsSATG.A.o..cs+Nh.u.Yh.sR.LGlW................G..ts.t.s.Ft.p............htcFlpAlE.......c..t.G.....VuAMEllAhDhKhpGh..YlA.RpLSFp.GV.pacl.Eh...Ls.......p.hphYst.t........................................................ 0 102 162 224 +13715 PF13873 Myb_DNA-bind_5 Myb/SANT-like DNA-binding domain Coggill P pcc Jackhmmer:Q8NCY6 Domain This presumed domain appears to be related to other Myb/SANT like DNA binding domains. This family is greatly expanded in arthropods and higher eukaryotes. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.35 0.72 -4.08 40 544 2012-10-04 14:01:12 2011-03-02 13:50:17 1 24 83 0 347 698 0 75.80 23 22.09 CHANGED Rp.tpFotpEhplLlchlpcppplLhs+tsssss....hptK.scsWcpIspchNu..hus.s.pRohppl+++WpchKppsKc.chtp ..............................Rt.pao.pEpphLlph.lc.pp..hpll...s+pss.t.s.s.....htt.K.p...csWcpIspchNu......hss..s..p..R....shppl++pWpchKtps+cch..t................................ 0 96 119 238 +13716 PF13874 Nup54 Nucleoporin complex subunit 54 Coggill P pcc Jackhmmer:Q7Z3B4 Domain This is the human Nup54 subunit of the nucleoporin complex, equivalent to Nup57 of yeast. Nup54, Nup58 and Nup62 all have similar affinities for importin-beta. It seems likely that they are the only FG-repeat nucleoporins of the central channel, and as such they would form a zone of equal affinity spanning the central channel. The diffusion of importin-beta import complexes through the central channel may be a stochastic process as the affinities are similar, whereas movement from cytoplasmic fibrils to the central channel and from the central channel to the nuclear basket would be facilitated by the subtle differences in affinity between them [1,2]. 30.10 30.10 30.90 31.20 30.00 30.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.78 0.71 -4.52 49 335 2011-03-02 14:23:06 2011-03-02 14:23:06 1 13 275 1 231 340 1 138.80 31 29.89 CHANGED p..Ps..shs.cpWcpAh......pcpPsP.p....hlPV.hhGFp-LtpRhphQcpplsphpthLpplpsp.lspL.p.p+.chpsss+ltct+p+ptpLp+RlLclhsplp..lL+p+Ghs.........Lss-E-clppclpplppclssP.sth......u+lsEL.......hupl........p ................Ps.ssDsthWcpAh......hcsPsP.p..........hlPV.h.hGFp-LtpRhchQcphspptps+Lc.....t....lspp.ls.........p.......L.p+c.phssss+ltph+c+phpLs+RhLpl.....ht+hp..lh+ppGhu............................lps-EEpLpppLpplppplp..sP...sthp..........uRhpELhuph.................................................... 0 77 125 187 +13717 PF13875 DUF4202 Domain of unknown function (DUF4202) Bateman A agb Jackhmmer:A1B4I0 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 187 and 205 amino acids in length. There are two conserved sequence motifs: LED and KMS. The function of these proteins is unknown, although many are incorrectly annotated as glutamyl tRNA synthetases. 21.60 21.60 25.70 25.90 19.60 18.80 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.19 0.71 -4.64 51 144 2011-03-05 16:24:37 2011-03-05 16:24:37 1 6 142 0 91 151 13 182.80 41 82.94 CHANGED appAlstIDsApupDPspp...h......p....spshPtELhYup+MochL.............p+hsPs.ASE..............sLpLAsRuQHlcRWclPRusYPhs+sGYhpWRssLtphHAphssplh.....hpuGYsppphpRVutllcKcsl.....K......p.............Ds-sQsLEDVACLVFL-chhtsFsppH...D.--KllcIlcKTWtKMS-cG+phALp..lsLs-phtsLlt ..........................ppAlthIDtApspDPpt...h..........s..spshPhELhYAp+MocaL.............tph.pP.s....A.....S-..............hLp.LAsRuQHlcRWclPRssYPhsRs.GYhpWRssLtptpAspssclh.....hpsGYspp-hpRVusLlpK.csL......K..p..................Ds-sQsLEDVACLVFL-c.Fp..pFtpc+.................D--KhlsIlcKTWtKMSpcG+phALp..lp..hs-phttLl.t.................................... 0 23 57 76 +13718 PF13876 Phage_gp49_66 Phage protein (N4 Gp49/phage Sf6 gene 66) family Bateman A agb Jackhmmer:A1B8I7 Family This family of phage proteins is functionally uncharacterised. The family includes bacteriophage Sf6 gene 66 Swiss:Q716B1 as well as phage N4 GP49 protein Swiss:A0MZD7. Proteins in this family are typically between 87 and 154 amino acids in length. There is a conserved NGF sequence motif. 27.00 27.00 29.70 29.50 25.10 24.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.62 0.72 -3.91 52 176 2011-03-05 16:37:05 2011-03-05 16:37:05 1 4 160 0 15 139 2 88.40 41 69.80 CHANGED +lT.ppl-shIsppp......Yhsttst...................................plTlCslsLcNGFpVs.GcSAC.lsPpNFDsElGcclAcc....sAhsKlW.LEGY..hL+ppLt ......................+lT.ppI-ulItppp......Yhstts.sh..................................hlThCllsLcNGFsVs..GcSAC.ssPcNFDsEIGc+lA+p....sAhsKIW.LEGYhLppph.t....... 0 4 8 12 +13719 PF13877 RPAP3_C Potential Monad-binding region of RPAP3 Coggill P pcc Jackhmmer:Q8IW40 Domain This domain is found at the C-terminus of RNA-polymerase II-associated proteins. These proteins bind to Monad and are involved in regulating apoptosis.\ \ They contain TPR-repeats towards the N_terminus. 23.00 23.00 23.50 23.90 22.90 22.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.81 0.72 -4.01 50 325 2011-03-07 13:33:25 2011-03-07 13:33:25 1 54 161 0 200 314 1 93.70 25 18.61 CHANGED .ss.Ppsuh-Ftpsa+plt..t......sss........tphpaLppl....ssssLsplFps.slss-hLtpllpslp.....ph...hh.pc....p.s.....hshphLptLscspRFchhl.hhluss-Kp ...........................PpsuhcFtpsa+plt..s........sss..........tphphLppl.......sPsplsplFps.sLsschlspllp.sLp.....................sh...hh.p-.....................t.s................hlhphLppLupspRF....shhl.hhhSps-K........................... 0 70 97 145 +13720 PF13878 zf-C2H2_3 zinc-finger of acetyl-transferase ESCO Coggill P pcc Jackhmmer:Q5FWF5 Domain \N 21.70 21.70 21.70 22.30 21.20 21.50 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -8.14 0.72 -4.39 43 338 2011-03-07 14:47:26 2011-03-07 14:47:26 1 11 245 0 223 333 2 40.40 39 8.27 CHANGED QhhLDhGQpp.h.s.t.p.pCspCGMhYssuss-DpphHp+aHpph .............QhhlDh..G.Qcp...hs.t...h.pCspCGMhYssu.ssEDct.HppaHpp.... 1 57 100 161 +13721 PF13879 KIAA1430 KIAA1430 homologue Coggill P pcc Jackhmmer:B2RV13 Family This is a family of KIAA1430 homologues. The function is not known. 23.00 23.00 23.30 23.00 22.80 22.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.00 0.72 -10.42 0.72 -3.57 46 280 2011-03-07 14:58:15 2011-03-07 14:58:15 1 16 112 0 222 284 2 98.40 23 30.14 CHANGED tH.pp+lpph+...s........slcsps.......Ppp.ht.......phh...hphpph.phpp.........p+h......pcIp+....-NphLlp+ls..cIt.cppsph...stp.p...........ttp................csls...tp...pRpcE.ht+...IscENpplh+Rlpp ...............................chp+l.php...s........tlDsp.........Ptt.ht......thh...hp..h+ph..phpp...............-+hppI-+....-NphLLp+lsplt..cptsph....sph..pp..........h..................................................pshs....t.pRp+c..t+...IptEN...tlhcRlp.h........................................ 0 119 142 181 +13722 PF13880 Acetyltransf_13 ESCO1/2 acetyl-transferase Coggill P pcc Jackhmmer:Q5FWF5 Domain \N 21.60 21.60 21.60 21.80 21.40 21.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.08 0.72 -4.23 40 326 2012-10-02 22:59:21 2011-03-07 14:59:05 1 6 233 0 213 325 10 69.20 42 14.58 CHANGED shsAhCGlsRIWVpsppRR+GIAocLlDslRssFl.aGhplspsplAFSpPTssGpthAppYsss....ss..FLVY ......sAlCGISRIWVhshtRR+uIAo+LlDsl.Rs..s.Fl.aGthl.........s.........+p.....plAFSpPTssGphhAppYsss....sp..ahlY...................... 1 57 94 154 +13723 PF13881 Rad60-SLD_2 Ubiquitin-2 like Rad60 SUMO-like Coggill P pcc Jackhmmer:O95164 Domain \N 23.00 12.00 23.00 12.00 22.90 11.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.50 0.72 -4.18 24 319 2012-10-03 10:59:06 2011-03-07 16:02:45 1 4 202 3 206 1807 27 106.70 29 62.84 CHANGED -.lElKFRLs..DGoDIGPhpass.uoTVusLK-pllu.pW............P+-..Kcp.sP+oss-lKLIsuGK.ILE.NN+TLups+..hPhG-lss...slhTMHVVlp.shs-ppsc+p.....psc..s.+pspCsCs ....................................................lpl+hhLh..sGp..ht...hh.ass..ssolsslKcplhs..pW.............................................Pp.-.....hct....ts..p..osst..l+L.I.tG+..hLc...sst......TL....uph+....hshscs.....................slMH.lVs+....s...sc..psptp...........tpp.....t................................................ 0 54 105 159 +13724 PF13882 Bravo_FIGEY Bravo-like intracellular region Coggill P pcc Jackhmmer:C9J6A4 Domain This is the very C-terminal intracellular region of neural adhesion molecule L1 proteins that are also known as Bravo or NrCAM. It lies upstream of the IG and Fn3 domains and has the highly conserved motif FIGEY. The function is not known. 23.80 23.80 25.40 24.40 23.60 21.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.22 0.72 -3.07 19 521 2011-03-07 16:52:03 2011-03-07 16:52:03 1 69 100 0 165 423 0 86.90 55 7.65 CHANGED KRs+GGKYs..V+-KEss.pscs-sp....p-..ssFs.........EYpp.ps..sp.sh.p....tupts..stphpsp...sosDShs-YG.-u..ss.spFNEDGSFIGQYusc+cp ............+Rs+GGKYs....VKEKEDs.psDsEhps..hcD...soFs.............EYS......-.....ED.....cK.PLtt.....................uSpss...st.slKtp...............cSDDS.LVDYG.-G..sp.GQFNEDGSFI...GQYouKK-K................ 0 29 43 97 +13725 PF13883 Pyrid_oxidase_2 Pyridoxamine 5'-phosphate oxidase Coggill P pcc Jackhmmer:Q8IUH2 Domain \N 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.12 0.71 -4.49 37 676 2012-10-02 11:35:36 2011-03-07 16:55:50 1 10 508 4 389 722 172 156.70 25 61.64 CHANGED Ps.tc-sAphARhLVppssausluTl..So.......ps.....hpGhPasslhShuDsss...s...susGsPaahLosls.ospslpps.sp..sSloho.sp..............pshspp.........hDPpsP........s.Cs...RlplsGplppl....s......sp-hph..A.ccshhsRHPchppW.ps......HsaaahKLplpsIallshFGG...sphl...ss--Yap ..............................................ARpLlpp..sp.hu.sLuTl......sp............t.............................hpGhP.au.s.hl.shu....................s....sssGpPhhhlS..sl.u.....ps+NL.tss...s+.....sSLhls.tss..................................................................tsss.................................................t.ss...RloLhGphp..l....s.............ssphtt.....s..cpp.ah.ppH.P..c..upha.hsh........tcFtah+l.p.lpp.lh.hluG.FGt...hthl...sspp...t.................................... 0 100 209 301 +13726 PF13884 Peptidase_S74 Chaperone of endosialidase Coggill P pcc Rawlings N Domain This is the very C-terminal, chaperone, domain of the bacteriophage protein endosialidase. It releases itself, via the serine-lysine dyad at the N-terminus, from the remainder of the end-tail-spike. Cleavage occurs after the threonine which is the final residue of the End-tail-spike family, Pfam:PF12219. The endosialidase protein forms homotrimeric molecules in bacteriophages [1]. The catalytic dyad allows this portion of the molecule to be cleaved from the more N-terminal region such that the latter can fold and presumably bind to DNA. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.88 0.72 -3.84 244 1011 2011-03-09 16:45:01 2011-03-09 16:45:01 1 59 650 2 275 946 2602 57.10 29 7.10 CHANGED SDpRhKpsI.psl.ps..s...................Lsplpp..lpshsa.....pa....h..................................ttttsp.pch..GhIAQ-lpplhPc .......................SDpRhKpsI..psl..ss.u...................Lcplpp....lph..h..pY....pap.........................................................tpstp..pch..GlIAQ-lpplhP................................. 0 109 164 224 +13727 PF13885 Keratin_B2_2 Keratin, high sulfur B2 protein Coggill P pcc Jackhmmer:Q6L8G4 Family \N 23.00 10.00 23.00 10.00 22.80 9.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.60 0.72 -10.95 0.72 -4.24 95 2411 2012-10-03 03:07:01 2011-03-09 16:51:25 1 24 43 0 981 3770 23 46.10 43 80.47 CHANGED CCpPs.CCpsoC..CpsoCC.pP..oC...CpsoCC..p.soCCpP.sC....Cp..ss..CC.pPs ..................................CCp.ss..C.....C..p..P..o..C.......C.p.....s..o.C....C.pP.....s.C..........C.p.s....s..C.C.....p...s.....s...C....C.......c.P....sC..........Ch.......ss....CC.ps............................................. 0 183 188 227 +13728 PF13886 DUF4203 Domain of unknown function (DUF4203) Coggill P pcc Jackhmmer:Q9NS93 Family This is the N-terminal region of 7tm proteins. The function is not known. 29.20 29.20 30.10 29.50 29.10 29.10 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.66 0.70 -4.87 33 330 2011-03-09 17:08:44 2011-03-09 17:08:44 1 7 171 0 235 299 5 186.80 25 38.80 CHANGED sllsslhs.lhGllhshhGaRha+shh.Fl.....sGahhuuhlshllh....hphps.........hssththtsshls.GlssGlllhhl.hshh.hshGlhhhGllhGhhluhhhhhshhsph.hhhp.s.hshh..h..h..hhshullsul..lsLthp+.hhsIluoulhGushlhhulDhFhps.tlp.hh.htlhtphhsps..h.thh.shPhsh.........salhLusasllhlhGlllQh+ ..................................................t.lhsshhh.lhGlhhshhGaRhh+shh..Fh.....sGhhhuuhhh.hl...Lh.....hp.ps................htpph...htsshhsulhh...Gl.hhshh..hshh.hshulhhsGllhGhhlu.hhh.h..h.h..s.th....p.s.s.hsh.h...............sh..hhshull..hsl..lsl.th.+...hhsll.uoulhGushlshulDhahps...l...h.h.lh................tc.h..p.....h.th........s...Phsh..............hlhluhhshlslhGlhhQh+...................................... 0 82 113 176 +13729 PF13887 MRF_C1 Myelin gene regulatory factor -C-terminal domain 1 Coggill P pcc Jackhmmer:Q9Y2G1 Domain This domain is found just downstream of Peptidase_S74, Pfam:PF13884. The function is not known. 24.10 24.10 24.10 26.90 24.00 23.90 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.12 0.72 -7.62 0.72 -4.93 31 153 2011-03-10 11:00:22 2011-03-10 11:00:22 1 9 79 0 92 129 0 35.90 68 3.83 CHANGED LlVNK.....-RIFMENVGAVKELCKlTssLETRI-c.L.....ERh .........................LhVNK.................-RIFMENVGAVKELCKLTsNLEoRI-E.LE+W............. 0 24 30 59 +13730 PF13888 MRF_C2 Myelin gene regulatory factor C-terminal domain 2 Coggill P pcc Jackhmmer:Q9Y2G1 Domain This domain is found further downstream of Peptidase_S74, Pfam:PF13884, and MRF_C1, Pfam:PF13887. The function is not known. 22.10 22.10 22.10 23.10 21.50 21.90 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.18 0.71 -4.38 5 132 2011-03-10 11:11:45 2011-03-10 11:11:45 1 9 52 0 64 109 0 132.00 41 16.50 CHANGED TIoSIQIlEIQQlID+RYCSsuLpCGPGNYsY+IPVNKaTPTNVKFSLEINTTEPLIVFQCcaTL.GNhCF+uph.....sputsQSpcVstpMTQGYQHIWSLPVAPFpDSTYHFRVAAPDLADCSTDP.as.GpFFTDYFFYFYR+C .................................................oloSIQlhE.p..IsppYC.ut.s..tCt.......s..G...N..aoYpIPls.ptTP.hp.lploLphNo.o..p..Pl...Vh...Cphs....tp.C.cu...............................uh.p.p..h..c.sp....p..p..TQGh.p....H.hWslslhsFp-.sYHFRVAhs..s....As.CS.o....-s.hh....hTDYaFaFYRhC....... 0 12 16 33 +13731 PF13889 Chromosome_seg DUF4204; Chromosome segregation during meiosis Wood V, Coggill P pcc Jackhmmer:Q32MH5 Family The proteins come from eukaryotes, plants and animals, and are necessary for chromosome segregation during meiosis. 21.60 21.60 23.30 22.10 21.30 19.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.21 0.72 -9.00 0.72 -4.09 23 266 2011-03-10 12:01:03 2011-03-10 12:01:03 1 4 170 0 181 269 0 62.40 45 8.09 CHANGED ssLRYAlHLRFhsP.................p..............................cpRaYLYssIRVVFspRps..DuDEu.............cL+s.hcaPss.......P..........+Yhsh ...............................................lLRYLlHLRFpss..................p...............................................pGRhYLasDIRllFupRs...-sDpu.............cL+s.schPps.......P.+aSPh................... 0 38 76 124 +13732 PF13890 Rab3-GTPase_cat Rab3 GTPase-activating protein catalytic subunit Coggill P pcc Jackhmmer:Q15042 Family This family is the probable catalytic subunit of the GTPase activating protein that has specificity for Rab3 subfamily (RAB3A, RAB3B, RAB3C and RAB3D). It is likely to convert active Rab3-GTP to the inactive form Rab3-GDP. Rab3 proteins are involved in regulated exocytosis of neurotransmitters and hormones. The Rab3 GTPase-activating complex is a heterodimer composed of RAB3GAP and RAB3-GAP150. This complex interacts with DMXL2 [1,2,3]. 21.80 21.80 21.80 25.80 21.70 21.70 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.48 0.71 -4.87 27 198 2011-03-10 13:10:59 2011-03-10 13:10:59 1 5 129 0 135 196 4 158.80 43 18.68 CHANGED sssppststthcG..thp.....hs.shhLLps.s-slatPhTQcssshTEDhlc-ctphLtphGsstp..upthptp........hQsssLlSDMpAF.................KAANPu.....shhEDFVRWa.S......P+Dal......ppt..sssp..........ps.s...............hsscGpLSpR......Mpt...pG............NhWtclWcsApshPApc.Q+.LFDstcEuEKlLHaLEs ......................................................ts......thpscGphtt....hs..phpLLcs.uEPLYlPlTQ-ss.Ph....TEDhlcEpsEllt+LGsssp..Gsthpsp........hpossLLSDMpuF..................KAANPG.....shLEDFVRWa.S......P+Dal-cp.....shsp..............pss......................................hsh+GpLStR......Mph.....u............NhW....hcsW-sAcPlPAp+.Q+pLFD-s+EAEKVLHaLt......................... 0 51 75 110 +13733 PF13891 zf-C3Hc3H Potential DNA-binding domain Coggill P pcc Jackhmmer:Q9H9L4 Domain This domain is likely to be the DNA-binding domain of chromatin re-modelling proteins and helicases. 27.00 20.00 28.30 20.20 26.20 19.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.03 0.72 -3.91 48 422 2011-03-10 13:43:28 2011-03-10 13:43:28 1 8 116 0 277 413 0 65.20 32 16.65 CHANGED p..C.....st....ttCpppslshscaChp..HILpDtp...QhLa+tCsh.........ts.ppCs..pPl...psttps......hCshHhp ..................t.C.....ssttCsppsLshscaChp....H..ILpDps...QshF+pCshh........tssppCspPl...csp..ppss.....hCstHh....................... 0 87 123 197 +13734 PF13892 DBINO DNA-binding domain Coggill P pcc Jackhmmer:P53115 Domain DBINO is a DNA-binding domain found on global transcription activator SNF2L1 proteins and chromatin re-modelling proteins. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -10.97 0.71 -4.14 27 402 2011-03-10 17:40:01 2011-03-10 17:40:01 1 13 236 0 268 411 1 108.90 33 8.46 CHANGED t+phspphcplWtsls+KDlsK......spRhhsssppsphtNh+KhApLst+Es......R.+ht.........h+opKs.KDhthRA+RlhREMhsFWK+.E+-ER-hRK+sE....KEAhEptK+-EE.REA+RQpRKLNFLIoQTELYS.HFhu+Klcs...sc..h- ........................................t...............................................................................p....................t..........t......c.....p...p-h..hhK+.-+..pch+.+...t.tp....Kp.t..hEtt+ptEE....t....c...E....sc........R..Q...p.+..+LsFLlpQTELau.HFlsp+hp.....tt....................... 0 78 133 201 +13735 PF13893 RRM_5 RNA recognition motif. (a.k.a. RRM, RBD, or RNP domain) Coggill P pcc JCSG:Target_421317_RF20609A Domain The RRM motif is probably diagnostic of an RNA binding protein. RRMs are found in a variety of RNA binding proteins, including various hnRNP proteins, proteins implicated in regulation of alternative splicing, and protein components of snRNPs. The motif also appears in a few single stranded DNA binding proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.54 0.72 -4.12 106 5485 2012-10-02 20:46:34 2011-03-18 17:52:00 1 201 405 27 3107 36633 716 56.70 21 11.51 CHANGED l.hphhppa.G.p.l..pp.lth.hppp.........ptuhlp...a...s.shpsAppshp..th..s.ssh...h...ts..p..t.lplp...au .........................................phhsp.a..G..p...l.........tp..l...h..l...hppp...........................sp.A.h..lp..................a........p..shps....A.pp.A.h.p............tL.......s..uph..............h.........tu....p.......lplp.h................................... 0 924 1420 2227 +13736 PF13894 zf-C2H2_4 C2H2-type zinc finger Bateman A agb Jackhmmer Domain This family contains a number of divergent C2H2 type zinc fingers. 23.00 9.00 23.00 9.00 22.90 8.90 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.74 -7.08 0.74 -7.53 0.74 -3.23 594 26727 2012-10-03 11:22:52 2011-03-21 11:17:14 1 4325 623 12 17330 257549 774 24.50 26 7.18 CHANGED a......tC.......sh......C......s.............p.....p..........a.......ps.....t.....p...p.lp.pHhp..p.p...c ..........................................atC..............sh...........C..............s............................p.......p.....................F..............ss...........p.............p.....p.Lp.pHhp...p.p.................................. 0 4311 6366 12692 +13737 PF13895 Ig_2 Immunoglobulin domain Bateman A agb Jackhmmer Domain This domain contains immunoglobulin-like domains. 23.80 10.00 23.80 10.00 23.70 9.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -10.50 0.72 -3.85 70 24350 2012-10-03 02:52:13 2011-03-21 11:49:24 1 2345 524 273 12457 102175 132 89.10 15 18.46 CHANGED ps..........hlp...sss.......ss.......l..pp..Gpsl..sL.pCp....ss......s......tss..s.p....hp..hh..+.s..............................s......phl.s.......p.s.....................ps..........................a....h.........pp.............s............s...h.s..suGp.YpCps......p..st.s...........s...pt...Sssl.pl.pV .........................................................................................................................................................h..........................h...........G..p..s..l......sL....p.Cp.....sp.................u............................p..s.......s..p...........h.p.....W...h.......+...s..................................................................................................................................................s.............p..l.s....................p.s..............................................................................pp.......................................................hh...l................ts.................................s......................................s.......h...p......c...s....G...p...Y....p.C.ps............ps...t....h......................t..........p.t.............................................................................................................................................................. 0 2991 4093 7204 +13738 PF13896 Glyco_transf_49 Glycosyl-transferase for dystroglycan Coggill P pcc Jackhmmer:O43505 Domain This glycosyl-transferase brings about the glycosylation of the alpha-dystroglycan subunit. Dystroglycan is an integral member of the skeletal muscular dystrophin glycoprotein complex, which links dystrophin to proteins in the extracellular matrix [1,2]. 27.00 27.00 28.10 27.10 26.70 26.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -11.83 0.70 -5.33 33 771 2012-10-03 05:28:31 2011-03-21 13:31:38 1 17 123 0 493 733 5 188.60 23 52.85 CHANGED slThsTHushcaL.csl.sls..c+Wp.GPlSlAlasPGt-hptslssIphlhpssssp....l+chsohHlhFptpc.hs...phs.....h.ps.....hph.shp.....C...sphh.sht....ph...h......t.......acs...ssphsYPlNlhRNlARpuAp.TcalhssDI-lhPSsGhlpph..hphls.p....t.p...........s.p.sppVaVlshFElpss...........s..slPpsKtELhphhpsupAh.FHpclC.pCHpsPshpcWhpts.....cs.s....thsssap...h...sh...p.p....tWEPhYIus...p...s-PhaDERhshcGp.s+hoQsathClhsYp...........FplLssAFLVH+.GIKps...tspts.pt...ch.tp...pt...pphltpchhpEhchhYs .........................................................................................................................................................................................................................................................................................................hP.N.hRNhAh...........s.hhhh.Dh.-h.h.........shh..h.....t....................................................shhl.hFp.h............................................................................hs...s....p.t....L....h.t.........................t..............h........h.........sp...t.....W.t..........................................h................aEs.hl.............t.......................s...h.h...h..sh.......................h........hth...h..t..hp.....................h.lhs.sa.h+...sh............................................................................................................................................ 0 224 269 401 +13739 PF13897 GOLD_2 Golgi-dynamics membrane-trafficking Coggill P pcc Jackhmmer:Q6PL24 Domain Sec14-like Golgi-trafficking domain The GOLD domain is always found combined with lipid- or membrane-association domains [1]. 27.00 27.00 27.50 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.77 0.71 -3.94 11 231 2012-10-03 07:10:23 2011-03-22 13:37:50 1 7 93 0 147 218 0 111.20 47 29.43 CHANGED VGRGEllTVRVPTHscGopLFWEFATDcYDIGFGVYFEWTsssSs.....sloV+VSESoD-Ec---spp..................tsu-lEpGs.htt...s+.....PplsEIlPVYRRDsHcEVhAGSHpYPGcGVYLLKFDNSYSLWRSKTLYYRVYYT ...............Vt+G-slTlRVPTp.pG.p.hlhWEFATDpYDIGFGlaF...-Ws.ssss.....tloVplS-Ss----p....t-t.................................................tp.ppup......pp.t......t+.............sphsEllPlY...RRDsHp-V.sGSHpYPGcGlYLLKFDNSYSLhRuKolYY+VYY............................................ 0 62 73 105 +13740 PF13898 DUF4205 Domain of unknown function (DUF4205) Coggill P pcc Jackhmmer:A8MYZ0 Domain The proteins in this family are uncharacterised but often named FAM188B. 21.00 21.00 21.00 21.00 20.10 20.90 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -11.93 0.70 -5.71 11 398 2011-03-22 14:02:10 2011-03-22 14:02:10 1 11 139 0 269 395 9 225.10 25 57.41 CHANGED +plLFGoshpsFsp-W+ppsFsFs-s..tltYGLhQt+GGPCGVLAuVQAhlLKtLL..Fspss............th..tshhpssssp+ppsLstALADILWpAGc.ppcAsVsLsssc.phsss.scYptDslpEpLpLashpph--hptFlppals.Fc.pushGsILhlYSsILSRol-plppDhDsss.s....pLlGu.HG.a.CTQElVNLlLTGRAsSNVFNGs.......cpspssh.....s.L+Glts..RS-IGaLoL.....aEH..apsspVGShLKTP+hPIWVlCuE.uHYSVLFsssp-LlsDa+h....E+pF-LYYYDGLssQpctI+..LTVDspupthpss.p......t.p.....clhPPLEhsIRTKWtpAslsWNG ........................................................................h.at............ul.p..p............sG....s....p.ullssl..Quhllp.hl....h..t..............................................................p.phL.....h...sh....t....ll.....................................ht..............t........t...............h..h...................h.................t.................................................................................................................................p............t.................h..........t..h...................h......p.....h...t......p..tph..h...............h................t..h.....at..........t....Gsl.hhhhShlho+uhp..........l.p.t-hpssp....t.............Llts..hs..h..ss...tllNLhLsGpAsstlass.......................................p...th........hhGl.t..pstl.GhLth...............hpt....p...t....p..VGphhKsP.phPlWlhssp.sHhslhFs...p.....tl...t.......................................................................................................................................................................... 1 88 117 187 +13741 PF13899 Thioredoxin_7 Thioredoxin-like Coggill P pcc Jackhmmer:O95994 Domain Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.77 0.72 -3.96 51 3155 2012-10-03 14:45:55 2011-03-22 14:27:14 1 52 1979 19 1031 6223 2371 86.30 24 17.25 CHANGED shhs..sh-pAl.tpApppsKslhl.hh.p..sshC.pCpsh.ccs.hhss.pplpp.lt.c.calhlphsh............sspct.ph.........thtt.......ph.......sPtlhhlss ......................t......hppul..tp..u..p..s......p..s..KP.V.hl..Dh..t..A-W...CssC+..th..c...ch..s..hsc...s......pVpp.....ht..p..s.h..lhlps-h........................sssss..p.............................htp.................hG.................hPtllhhs........................................................... 0 347 605 827 +13742 PF13900 GVQW Putative binding domain Coggill P pcc Jackhmmer:Q6ZWE1 Domain This short domain is often found nested inside other longer domains. The function is not known, but the domain carries a highly conserved GVQW motif. The members are rich in proline and cysteine. This may be a binding domain. 28.10 28.10 28.10 28.10 27.90 28.00 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.37 0.72 -3.98 35 1257 2012-10-03 02:24:44 2011-03-23 13:28:18 1 170 16 0 72 778 423 41.40 60 18.50 CHANGED VuQAGVQW+sLGSLQPsPPtFKpFSCLSLPSSWDYR+sPPpPAN.Fs..lF .............ssQAGVQ...W.p.c..LuS....LQP...P.P..sFK.pFS.CLSLs...SS...WD.Y.R.....p..P.sp...s....h................... 0 55 60 61 +13743 PF13901 DUF4206 Domain of unknown function (DUF4206) Coggill P pcc Jackhmmer:Q9H714 Domain This is a family of cysteine-rich proteins. Many members also carry a pleckstrin-homology domain, Pfam:PF00169 27.70 27.70 29.40 28.90 27.50 27.60 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.74 0.71 -4.77 28 498 2011-03-23 16:23:42 2011-03-23 16:23:42 1 18 131 0 302 495 2 187.10 36 26.38 CHANGED RhCsYsGcYaCsuCHp.s.stssIPARllcsWDFppasVSptuhphLpphhspPLlslpplNPpLYs+scpLsclpplRppLthlcpYlhsCRhus.p.tptt.........hp..t..s+pa.Lhp.s.schYSltDLlplpsG.shtth...LpplhphsppHlt.sC.LCps+GFlC...EhC......pss...clIaPFpp...t.psp+CspCtulaH+pCap...p..ss........CP+CtRhpp+c .......................................RhCpY.GpYaCs.s.CHt.s.spslIPARll+.pWDFpch..........V..Sph..uhchLp..tlhppP.lhslp....p....lNst.LYp+...s...ctLsplpp.lR.ppL.hhh...+sa....l.......h.....o...CR......ut....p.....h.hp.........hp....ts.pa.L.h...-....s....c.......haSlpDLh.pl..p.pG..t....Lssh................LpplhphstpHlh.pCp........lCpt.+.G.F.lC...EhC.....................pss.......cllFPFp.....ps...p...Ct..pCtus...aH.p.pCht.....p...tt.........CP+C.Rhp...p........................................................................................................ 0 93 124 205 +13744 PF13902 R3H-assoc R3H-associated N-terminal domain Coggill P pcc Jackhmmer:Q96D70 Domain This family is found at the N-terminus of R3H, Pfam:PF01424, domain-containing proteins. The function is not known. 25.00 25.00 32.60 28.10 24.60 24.10 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.75 0.71 -4.19 32 193 2011-03-23 16:27:11 2011-03-23 16:27:11 1 7 156 0 132 182 0 121.20 23 35.98 CHANGED ++phhtcculcpp-slh..u+cGsR+ppRaENc+lLpshhsp..stspp....sshshps.lP............hthsthW.ssahshst-pppphh...pthcsppcpstsptt.......................s.t.ap+ls+cLRttLK+ ................................++t.hhppul+pp-slh..spcGpRpppRaENc..+hLhshhsp..ss.sps..ht.tssh.shts.sP..............hphsthW.ssah.s.tst-cpp..phh......pthpp.ptpppt.tp.tt..t...........................................ts.tthpclspchRthl++.................................................................. 0 35 65 100 +13745 PF13903 Claudin_2 PMP-22/EMP/MP20/Claudin tight junction Coggill P pcc Jackhmmer:A6NFC5 Family Members of this family are claudins, that form tight junctions between cells. 31.20 31.20 31.20 31.20 31.10 31.10 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.43 0.71 -4.57 37 835 2012-10-03 00:20:40 2011-03-24 16:16:45 1 11 100 0 558 2183 0 182.30 16 73.69 CHANGED hlAlsLllluhsTspW..lp...ss..spp..............pt..............................t.....h.t.t...hh....pp...Gh.ahhshh..................................................h.hhshshhhluh....shs.hhuslht.h.hussp...ps.t................thhhh..s.uh..h.hh.....suus.hhhhslshasthhp......h.......hpp...th..pp.....tsh....hhthGaSFhLsssushlsllsshLhLh ................................................................................................h.huhhhhhhuhs..o..s..h.W..h......hp.....th............................t................................................................................................................h............tp......Gl..a.p...h....Chh.......t.........................................................................................................................................................t..hhh.tht...hh.s...tsh.h.....hl.ul.......sl.h..lhu..h...l..hs....l..h...u..h..hp.......ps.t....................................hhhhs...s..Gh..h.hl................huG.l..hhll...ulh.ha.h.t.t..hp.......h......................h...t........tph.....................phpauWSa.h..lAhsuh.hhthlu.uhlhh.h............................................................................... 0 115 170 350 +13746 PF13904 DUF4207 Domain of unknown function (DUF4207) Coggill P pcc Jackhmmer:Q96HJ3 Family This family is found in eukaryotes; it has several conserved tryptophan residues. The function is not known. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.45 0.70 -12.39 0.70 -5.01 19 192 2011-03-24 17:00:58 2011-03-24 17:00:58 1 4 70 0 84 188 0 234.00 25 57.05 CHANGED s.Ss.usSspSp..hSsp.....sspsh.tspspspsps....pps..spShsshhcsh.p.............................pocsps.ssls.hphs..spsSs.....tsp.......sah+.plcth+lc+pst....pAY-sWhuuKptp.p.+.ptc.h....ttcpEcc+pc..sthRp+LAcE+YcpWh+pKspQpppp+p.p.pt......tpp........sup.....pustu...........................ss+phs....t-ps+c+lpcW.hcKhcppppcRpcpcptppp+ppccpcR+phuEtAapcWhpsVsp+PKPVPhNQGhcu..LRGTlS......slYlNPh.W .........................................................................................................................................................................................................h...tts..u..S........t.p....sppp.pstp....p..............h..tsh..............................pspsts.s.h..hp.t............o.......pp.......s..c.p.cp.ch.cps...........tsa-sWhstKpt...pcppp.......t.h.....tpppE+cccct...tttpRppl.A.pppa..ppWhppKpcppppp+t.p.pt......pppts...psthp.....................................p+ph..t-c..sppchppW.hcKhp....ppp..pc....+...pc....pc..ptpt...p+ptp.pcR+phuptAapcWhppscp+s+Pss.hs.Ghs....LpGhhS.............hahNP..W.............................. 0 17 22 47 +13747 PF13905 Thioredoxin_8 Thioredoxin-like Coggill P pcc Jackhmmer:Q5VZ03 Domain Thioredoxins are small enzymes that participate in redox reactions, via the reversible oxidation of an active centre disulfide bond. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.07 0.72 -3.87 83 2061 2012-10-03 14:45:55 2011-03-25 14:24:33 1 60 711 33 871 11398 5021 94.50 22 26.84 CHANGED s..+hlllhFhus......hst...t..s......pp.....hhst.Lp.p...h...hpphp.......................tp....lpllhlsh............D...p.s..t.p..ph...pphhpph..............s.hh.shs.hss..t.t......pp..pl..........tpth.s..lps.................l.Pplllls.psGp.....l ..................................Khlll.Fh.us......WCs......s...C..........pt.....hhPp...Lp.c....h...hp.chp..............................tps..hpl..l..h..l..Sh......................................D........c..s....p...c......p..h..........p.p..h.h.pph.........................s...a.h...p......h.s...hss....s.t.........pp....pl...................hpta....s....l.p.u..............................l.P.s....h.llls.tpGp........................................................... 0 370 608 774 +13748 PF13906 AA_permease_C C-terminus of AA_permease Coggill P pcc Jackhmmer:Q7Z475 Domain This is the C-terminus of AA-permease enzymes that is not captured by the models Pfam:PF00324 and Pfam:PF13520. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.63 0.72 -4.10 56 923 2012-10-03 01:44:59 2011-03-25 14:26:22 1 16 191 0 595 2651 76 50.00 44 8.37 CHANGED FpsPhVPhlPsluIhlNlYLhhpLsshTWlRaslWhsl...GhllYhh....YGhpHS .................FtVPhl...Ph.lPslSIh....lNlYL.MhpL.....st..tT..W...lR..Ful.....W.h...hl...Ghl.l.YFs....YGlppS.............. 0 136 243 422 +13749 PF13907 DUF4208 Domain of unknown function (DUF4208) Coggill P pcc Jackhmmer:Q86WR6 Domain This domain is found at the C-terminus of chromodomain-helicase-DNA-binding proteins. The exact function of the domain is undetermined. 23.00 23.00 23.10 23.10 22.50 21.30 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.20 0.72 -3.79 31 331 2011-03-25 14:30:37 2011-03-25 14:30:37 1 14 214 0 224 312 0 101.20 32 7.30 CHANGED sc...s..t.tp.ptc-.h-spshsp..CKchMcPl++sL++Lc+s.sc....uLs+c.-hlphl+ppLhsIGcc....Icpplpph...pps..p.cchcc+LWhaso.pFhs..hc...upKLhphYc+h .....................................hst....t.ptcph-.p.pshsh.CK-hh+PlKcsLKpLcps...pc..........sL.spc.-plpch+ppLlpIG-+....IsppLpph..........sss...phcpa..+cpL....WhFVS..pFot..hc........uc+LpclY++.h................... 0 52 94 163 +13750 PF13908 Shisa Wnt and FGF inhibitory regulator Coggill P pcc Jackhmmer:Q8N114 Family Shisa is a transcription factor-type molecule that physically interacts with immature forms of the Wnt receptor Frizzled and the FGF receptor within the endoplasmic reticulum to inhibit their post-translational maturation and trafficking to the cell surface. 27.40 27.40 27.40 27.40 27.20 27.30 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.97 0.71 -12.11 0.71 -4.10 11 427 2011-03-25 14:59:58 2011-03-25 14:59:58 1 13 55 0 239 379 0 153.30 22 54.82 CHANGED tpGEhCh..hsshsphcPsa.CPp......p.paCCGoCuhpYCCss.h+psphsps.sCss.-p..sh.t....s.......chtpshc.sPs...pshtssllVulhhhVlhl.lshllshsC.pCsL.Kh.....................pR.ppPshopss.usshpppshsQs.st.........sph.P...usph..QsYps.sspsu.......h.ts.h.sh.hs..Ph.h.ssssssY ......................................................s...p.Ch.hahsh.utap..FpCs.............hhhC...CGsC.hRa.CCt....h....p..hp.Qt.tC.s.....................................ss..............h.lhsulh..hh.lh.h..l....hshlss.h.hs.....p.s.h.h.K.................................................pp.....p.......h..t..p...sh....sp.........h.t......t..s........t........................t..s.........p...tth.s....sst.s...............h......................................................................................................................................................................... 0 51 77 135 +13751 PF13909 zf-H2C2_5 C2H2-type zinc-finger domain Coggill P pcc Jackhmmer:Q6IQ32 Domain \N 25.00 13.60 25.00 13.60 24.90 13.50 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.10 0.73 -7.42 0.73 -3.42 73 3546 2012-10-03 11:22:52 2011-03-25 15:19:35 1 546 228 2 1708 10506 142 24.70 29 7.43 CHANGED apCst..Cs.ats..p.tp.p.lp.pH.hphh.Hs .........a+Cph..Cs.apo...s.pp.s.Lp.pH.hppt.H........ 0 489 585 1020 +13752 PF13910 DUF4209 Domain of unknown function (DUF4209) Coggill P pcc Jackhmmer:Q5T6L9 Domain This short domain is found in bacteria and eukaryotes, though not in yeasts or Archaea. It carries a highly conserved RNxxxHG sequence motif. 22.30 22.30 22.40 22.30 21.90 21.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.34 0.72 -3.91 17 134 2011-03-25 16:11:02 2011-03-25 16:11:02 1 2 110 0 71 124 3 91.10 30 15.63 CHANGED hl...ph.lcptlhsh.hh.shptpts...h..hLp........-lLtscplpphlspsl.............hhlL+hhlssPp..GLNLRNllaHGFls.sphp.phhshllhLhhsls ................................hph..t.h.Rtlhphhhhhsp..ps...........hhLc........-LLs..s....p.pltplhGpsl.............h.lL+lhhssPp..GLNLRNhlhHGah..ssp-lss...phsshhlhLhhtl.s............... 0 32 42 50 +13753 PF13911 AhpC-TSA_2 AhpC/TSA antioxidant enzyme Coggill P pcc Jackhmmer:Q8TBF2 Domain This family contains proteins related to alkyl hydro-peroxide reductase (AhpC) and thiol specific antioxidant (TSA). 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.47 0.71 -10.75 0.71 -4.16 82 573 2012-10-03 14:45:55 2011-03-25 16:14:57 1 12 263 0 372 774 179 123.50 20 46.43 CHANGED tLtpt..t..spLpt.....tG..lp..lhs....lu..hush..s...spcFt.ph.....sta..shp.la...........sDs.stphaptLsht.......................................................................................p.hh.h.th..................h.t.......h..t........th.htthtth...........th..tsh.......................................................................suthtQpGGsalhssss......pllapHc-cs ...................................................................................................s.hpt....ts..lp..lls..lu..hush......ts...hcpFt.ph........sta..s..h-..la.............sDs..p.+..phYptLGht...................................................................................................p.s...th.sh..h...sst............htps........h.p..............th..hpthpph...........s.h..ssh..................................................................................pGshhQtGGhhlh........stss.....plhataht............................................................ 0 127 219 297 +13754 PF13912 zf-C2H2_6 C2H2-type zinc finger Coggill P pcc Jackhmmer:C9JL96 Domain \N 25.00 5.00 25.00 7.50 24.90 -999999.99 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.09 0.72 -7.35 0.72 -3.91 81 8615 2012-10-03 11:22:52 2011-03-25 16:26:12 1 2303 333 9 4881 167936 200 24.80 27 6.24 CHANGED hapCshCpppF..sshpslhsHK.ppa.ppt ...........apCs.t..C.s...cs.F......ssh.p.s...L..h.tHh..p.h....t.................. 0 845 1687 3070 +13755 PF13913 zf-C2HC_2 zinc-finger of a C2HC-type Coggill P pcc Jackhmmer:Q5TFG8 Domain This family contains a number of divergent C2H2 type zinc fingers. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.93 0.72 -7.00 0.72 -4.21 111 1078 2012-10-03 11:22:52 2011-03-25 17:14:21 1 55 127 0 709 1684 18 24.70 37 10.97 CHANGED phh.C.thCGRpF.sscplt+H....tphCp ......hh.C..hCGRpF.ss.s.s.l.ppH........shCp.......... 0 305 367 517 +13756 PF13914 Phostensin Phostensin PP1-binding and SH3-binding region Coggill P pcc Jackhmmer:Q4KMQ1 Domain Phostensin has been identified as a PP1 regulatory protein binding PP1 at the KISF motif. The domain also appears to carry an incomplete incomplete SH3-binding domain PxRxP further upstream. It is likely that Phostensin targets PP1 to the F-actin cytoskeleton [1]. Phostensin binds to actin and decreases the elongation and depolymerisation rates of actin filament pointed ends [2]. 25.00 25.00 25.10 25.10 24.60 24.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.63 0.71 -4.04 5 99 2011-03-28 10:56:36 2011-03-28 11:56:36 1 5 42 0 53 90 0 122.60 49 22.28 CHANGED APQtAK.Psssh...Pup.pLHPA+PGcsutL.pRGuNTFTVVPKRKPGoL...thSQANsposspsAE---AsuL.......Gs......TLKKRYPTVHEIEVIGGYLALpKSCLoKAGSSR..KKMKISFNDKuLpTTFEYPSESSLlQ .....................................sp.sh...sh.....sh..hass+sG.ssth....pR...uGpTFTVsP+Rpss.............Asst...s.sp.........Ap.t..-sAs.................ss........shKKRYPTscEI.VlGGYLpLp+SCL.sKu..uspR..KphKISFsEsuLpTTapYPSESSllp... 1 6 12 23 +13757 PF13915 DUF4210 Domain of unknown function (DUF4210) Coggill P pcc Jackhmmer:Q32MH5 Domain This short domain is found in fungi, plants and animals, and the proteins appear to be necessary for chromosome segregation during meiosis. 21.20 21.20 24.10 22.70 20.10 19.30 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.25 0.72 -3.68 21 277 2011-03-28 12:49:58 2011-03-28 13:49:58 1 5 173 0 185 275 0 63.10 49 7.99 CHANGED LlGSFEESlLpGRhs..P.op.l-GFhAplGloGut......asPtplplPFsshaap.......hu..ssss.....sPYl ..LlGsFE..ESlLpGRhs..P.st.l-GFsAp....lGsoGs...............aCPsHlTLPhsssFYs........sS..-suP.........oPYh.................... 0 41 78 126 +13758 PF13916 Phostensin_N PP1-regulatory protein, Phostensin N-terminal Coggill P pcc Jackhmmer:Q4KMQ1 Domain Phostensin has been identified as a PP1 regulatory protein binding protein. This domain is N-terminal to the PP1- and SH3-binding regions though may carry an additional SH3-binding motif. It is likely that Phostensin targets PP1 to the F-actin cytoskeleton [1]. Phostensin binds to actin and decreases the elongation and depolymerisation rates of actin filament pointed ends [2]. 23.00 23.00 24.40 23.50 22.20 21.10 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.81 0.72 -4.00 4 58 2011-03-28 12:54:51 2011-03-28 13:54:51 1 4 30 0 30 50 0 83.50 56 14.78 CHANGED s-ps.hotMPAWKRtILERRRAKLuh.sGt..suP.......GsA-.....PsEptVLhEulGPl+QN.Fhp.EppRpptpptttpcLLtch+tsP ..................AER-RLSQMPAWKRGlLERRRAKLGL....uP.....GE..P.uPh.s...................s-uGss-...........PDESuVLLEAIGPVHQNRFIRQERpppp....pp...p......Qp...p.....p-LLt.chpss........................ 0 2 8 13 +13759 PF13917 zf-CCHC_3 Zinc knuckle Coggill P pcc Jackhmmer:Q8TBK6 Domain The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. The motifs are mostly from retroviral gag proteins (nucleocapsid). Prototype structure is from HIV. Also contains members involved in eukaryotic gene regulation, such as C. elegans GLH-1. Structure is an 18-residue zinc finger. 27.00 10.00 27.00 10.00 26.90 9.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.62 0.72 -8.34 0.72 -4.22 25 384 2012-10-03 11:39:54 2011-03-29 14:02:14 1 40 191 0 234 1569 12 40.60 31 17.55 CHANGED ssusspCQKCLphGHW...TYECK..........sc+....sYs.......sRPSRTQpLcps.l ........s...pstCpKCh....phG.........Hh..........TaEC+....................tpp................................................................................... 0 90 133 193 +13760 PF13918 PLDc_3 PLD-like domain Coggill P pcc Jackhmmer:Q8N7P1 Domain \N 27.00 27.00 27.50 27.00 26.80 26.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.12 0.71 -4.84 5 475 2012-10-02 13:01:53 2011-03-29 14:42:38 1 7 150 0 155 456 3 169.10 38 45.75 CHANGED SluTIKNLGLYSTN.K+LAhDLMNRYNTFSSMls-PKlPFT...RhCCuM.lTPTATsFHLNHoGGGlFFSDSPERFLGFYRTLDEDLVLHRI-uAKNSIDLSLLShVPVlR+uosVcYWPcIhDALLRAAI-RuVRVRlIIopWKNADPLSVAAARSLc-FG..VGslDlSVRhFulPGR ........................................................SloplKpLGlh..hN.spLAhDLtphFpsaasht..s...p..s..p..l.P.o....h.....phsssh..spsss.hph..p.l..s.p............s.s..u.s......lF.....hS.s..SP.pthh..sh.tRThDhDslL.phIcsA+p.I.lulhshlP....lh....c...h.s....s.s......h...p.YW.PtI.suLhcA.A.lpR.uV+VRlLlo.pWc..psDPhth.s.hl+SLp...sh.s........ss...s..ss..lpV..+hFslP............................. 0 40 55 103 +13761 PF13919 ASXH Asx-hm; Asx homology domain Iyer LM, Aravind L, Godzik A, Coggill P pcc Manual Domain A conserved alpha helical domain with a characteristic LXXLL motif [1] [2]. The LXXLL motif is detected in diverse transcription factors, coactivators and corepressors and is implicated in mediating interactions between them [3]. The ASXH domain is found in animals, fungi and plants [4] and is predicted to play a role in mediating contact between transcription factors and chromatin-associated complexes. In Drosophila Asx and Human ASXL1, the ASXH domain is predicted to mediate interactions with the Calypso and BAP1 deubiquitinases (DUBs) which further belong to the UCHL5/UCH37 clade of DUBs [4]. 25.80 25.80 25.80 27.20 25.30 25.30 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.73 0.71 -4.38 85 311 2012-10-09 14:14:56 2011-03-29 16:45:12 1 10 155 0 174 299 0 133.90 41 12.83 CHANGED ptps..++hssu.....Qh+ps+.st.cl..DlEoPcSILlsTNL.RuLlN.pcTF.ssLPtphQppLltLLPc.........VDRpsu......................................ssshl+.LssS.sLN.NEFFscAsppW+-RLu-GEFTPEhQ......................h+l+pEtE+E+.KlDsWKE+aFEsaWGpK ...........................................ht.....cp.ttt..phKps+...st...cl...DlE...oPsSIL.l......sT..NL.RA...Ll.N.pcTF.ssLPschQppLLhLLP-.............VDRpsu.........................................s-uhl+..L...ssS..sLN...NEFFspAspsW+-RLu.-..GEF...TPEhQ...............................l+l+..pEhE+EK.......Kh-.WKE+aFEsaaGp..................................... 0 37 66 112 +13762 PF13920 zf-C3HC4_3 Zinc finger, C3HC4 type (RING finger) Coggill P pcc Jackhmmer:Q9BY78 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.78 0.72 -4.29 585 5985 2012-10-03 15:03:13 2011-03-29 16:58:25 1 423 488 23 3675 15063 738 49.20 31 9.50 CHANGED ppph....Chl.......Ch.s......p.......p..t...s........s.lhh..P....C..s..HhshCtpCuppl......................ppCPl..CRp.slp..ph .............................p..ttChl.Ch...c.......................p....................t....s........................s..l.h.h.....P........C..u...Hh..hC...ppCupph..................................................ppCPl..CRp.lp..h........................... 0 1260 1981 2858 +13763 PF13921 Myb_DNA-bind_6 Myb-like DNA-binding domain Coggill P pcc Jackhmmer:Q15361 Domain This family contains the DNA binding domains from Myb proteins, as well as the SANT domain family [1]. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.04 0.72 -3.86 39 4769 2012-10-04 14:01:12 2011-03-30 11:08:35 1 107 621 51 3074 13999 171 61.90 27 16.83 CHANGED WTp-E-chL....hphhpp........h....u..t........s...WppI.....upt..l.s...........Ros..tpltpRap..p...L..........p.........s.p.......hppss..asc.p-pppL ............................................................WTp.-ED.phL..........hphl...pp...................................a............G...p........................p.....Wsp.I.............upt....l..s.................................Ros..ppC.c.p....R.Wp....p......hL......................p.............s.p.....................hp.p..t.......as..t-....................................................................................... 0 1464 2097 2635 +13764 PF13922 PHD_3 PHD domain of transcriptional enhancer, Asx Coggill P pcc Jackhmmer:Q76L83 Domain This is the DNA-binding domain on the additional sex combs-like 1 proteins. The Asx protein acts as an enhancer of trithorax and polycomb in displaying bidirectional homoeotic phenotypes in Drosophila, suggesting that it is required for maintenance of both activation and silencing of Hox genes. Asx is required for normal adult haematopoiesis and its function depends on its cellular context. 25.00 25.00 36.40 34.50 21.60 21.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.03 0.72 -3.73 13 166 2011-03-30 10:10:42 2011-03-30 11:10:42 1 4 63 0 89 168 0 65.00 59 4.50 CHANGED ppt.....tpsps....pstp..h..Qp....ospNT.........................s...pussspCuCuLpAMVlCQpCGAFCHDDCIGsSKLCVSClI ................................................t..s......uupsts..lslQhFs-pss.........................sEshs.KCsCRLK.AMIMC+GC..GAFCHDDCIGPSKLCVuCLV... 0 12 18 41 +13765 PF13923 zf-C3HC4_2 Zinc finger, C3HC4 type (RING finger) Coggill P pcc Jackhmmer:Q5VTB9 Domain \N 27.00 20.00 27.00 20.00 26.90 19.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -9.23 0.72 -3.92 345 3897 2012-10-03 15:03:13 2011-03-30 11:46:46 1 230 817 11 2262 19172 876 41.60 35 7.80 CHANGED CslC.hp.ph.ps.....s.h..hh.s..C..u..H..h.h.C.p.pC...ht..p..h..hpp.t..........p..p....C..Ph..C ...................CslC...hc..hh...pc.................P.h....hs..s....C......G........H....h.F.C.p..pC........lt...c....h..hps..p..............................p...........C..Ph..C.................................. 0 782 1217 1747 +13766 PF13924 Lipocalin_5 Lipocalin-like domain Bateman A agb Jackhmmer:C7PBJ6 Domain This family includes domains distantly related to lipocalins. However, they do contain the important GXW motif in the first strand. The protein in this family include aln5 Swiss:B6SEG2 which is involved in biosynthesis of alnumycin [1]. The family also includes the ZFK protein from Trypanosoma brucei which is a protein kinase. This domain is at the C-terminus of that protein [2]. The domain is also found as the C-terminal domain in StiJ a protein involved in producing stigmatellin. This domain has been assumed to catalyse a final cyclisation reaction [3]. 21.10 21.10 21.60 21.90 20.80 20.80 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.80 0.71 -4.49 68 249 2012-10-03 08:47:39 2011-03-30 13:10:08 1 8 197 0 95 228 57 132.90 25 64.70 CHANGED hG.sWcLlu...hphpss-Gs...................................hhhPhGp.sPpGhlhYsscGhh.osplhptsR......................h.sss.hpuosp-hsp..shpshluYsGpY.....plcss.........plhacl-suhhPsWl..GspQpRphplc.s.....cp...Lhlt...tstshhhsst.tt.....shLsWcRh ..................................................hGsWpLlS...hph...ss.s...................................hhhPhGp...ss...hGhlhYstpG.hhuspl.hsss...Rst....................hsstshhtss..sp.-h....st..shpshluYsGcY.........plccs..........................thlsHpl-suh.hPNWl..Gs.pQpR...h.aphc.t.........-p...LpLt....phssh.hsst.hh.....shLhWp+................................................ 0 26 56 76 +13767 PF13925 Katanin_con80 con80 domain of Katanin Coggill P pcc Jackhmmer:Q9P0V8 Domain The con80 domain of katanin is the C-terminal region of the protein that binds to the N-terminal domain of katanin-p60, the catalytic ATPase. The complex associates with a specific subregion of the mitotic spindle leading to increased microtubule disassembly and targeting of p60 to the spindle poles [1]. The assembly and function of the mitotic spindle requires the activity of a number of microtubule-binding proteins. Katanin, a heterodimeric microtubule-severing ATPase, is found localized at mitotic spindle poles. A proposed model is that katanin is targeted to spindle poles through a combination of direct microtubule binding by the p60 subunit and through interactions between the WD40 domain and an unknown protein [2]. 27.00 27.00 27.20 27.40 26.70 26.60 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.66 0.71 -4.51 33 279 2011-04-01 14:39:33 2011-04-01 15:39:33 1 12 127 0 176 262 0 145.60 30 26.00 CHANGED pcsHsshhpsLpsRhppLpslcphW.ppssl+sulsshtph...pDhulLsDlLsllp..t+ppt.....hsL-hCstlLPhlppLLp.S+aEpalpsuLchLptllppFtshIpsshp..ssstsulDlstEpRh...p+sptshppLpplpphlpths.p...psGplGppupcLphhls ...................................psH-shhpsLpuRhtpL.....psshphW.pppslps...slshhhph.........pDhuVl.sDlLslls.......ptph......hsL-hCsslL....Phlp.pLLp.S+a.EpaltsuhshLphllppFhshl.....pst...ht....s.spluVDlptE-.Rh...............ppsphshppLppl.ph..tht.....h.G.hup..hpclp.hh..................... 0 52 80 127 +13768 PF13926 DUF4211 Domain of unknown function (DUF4211) Coggill P pcc Jackhmmer:Q8N4S0 Family \N 27.00 27.00 28.00 28.00 22.30 21.40 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.58 0.71 -4.81 38 274 2011-04-01 15:59:20 2011-04-01 16:59:20 1 7 160 0 173 252 0 146.50 23 14.76 CHANGED c-Flhp-p................t.s...sstsphshphshpsppsh+-pFcshlchhVpstLcssFhpsh.....t.....chahsuhc+l-sp.lpsppcslltsosWsssapcuLcshPplplhth.........psttCpACpcss+ss.shcl+hsGcsY...........spcT......Lpshppsss .....................................................................................t.....pt..................t.tss....tt.tpsphshttp.h.hspp..sh+-pFcshlchLlppALDPs.hhpsl.c.ppp........chaLssh++lDsh.lpspp...cplltps.thspp...appuLcsaPpl.h........................t.hsCpuCsps.st...shcl+hsGcsY...........NpcT......Lp..ph...s.................................................. 0 40 64 114 +13769 PF13927 Ig_3 Immunoglobulin domain Coggill P pcc Jackhmmer:O60384 Domain This family contains immunoglobulin-like domains. 27.00 15.30 27.00 15.30 26.90 15.20 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.84 0.72 -3.46 182 4712 2012-10-03 02:52:13 2011-04-04 12:51:01 1 772 224 45 2131 57861 94 77.20 17 13.25 CHANGED pPplthp.sts.t...............lsLp...Cpstss..........ssphpWh............................tstttttsspsslsls.........hp.....ppsss..asC....hs...ps .............................................................................................................P.l.h..t.....hts.................lsLp..........C...pupss..............................ssphtWh.......................................................................................t..s...p...t....t....t...s...t......s....p...u....s...ls.ls...................................ssp........................pcsGs.....YpC..hu.p............................................................................................................... 0 498 693 1156 +13770 PF13928 Flocculin_t3 Flocculin type 3 repeat Bateman A agb Jackhmmer:Q5AF39 Repeat This repeat is found in the Flocculation protein FLO9 Swiss:P39712 close to its C-terminus. 21.90 21.90 22.20 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.03 0.72 -8.62 0.72 -3.80 149 538 2011-04-06 11:49:32 2011-04-06 12:49:32 1 38 52 0 311 544 0 43.50 44 15.50 CHANGED sTTllTlTSCs.c........spCspos...........l......oTu.so....sls.........ssp..TsYTT..YCPLo ....................pTTllTlTSCp.s........shCopos................V.......oTu..ss....TVs.........sss..ThYTT..aCPlo.................. 0 57 149 285 +13771 PF13929 mRNA_stabil mRNA stabilisation Eberhardt R re3 Jackhmmer:Q03153 Family This domain is an mRNA stabilisation factor [1]. 25.00 25.00 57.90 27.50 20.70 20.70 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.86 0.70 -5.46 7 26 2011-04-06 12:28:04 2011-04-06 13:28:04 1 2 24 0 15 23 0 267.70 34 48.31 CHANGED hplhpsLsppD....actuppllsssp..p.p.l+sltsslppL.pph....clsscpWhphh-pphPhlh.pps...aWssRhpaapLL.hss.pph................slp...phh.-YLhhKpuhGtplsccDLlsFLplh.pplpsspp....Y.pLVppNphllcALpLac..psc....llhDspllshLLpoMss.ppp....pL+uhYEhlcals..ppat.pths.ssltpllpsLschpsWscLhpFWctthss.s.tpDpRPWs.Fl+slspsGDpplh+cllccGpLLWIpRspVs..hos-LpstLpcLFppus ...................s..hh.hLpppD....Fctuscll.pss..st.sphc..l+olpcshssLpp.s..tpclcsspWpphFDpp.shlshspst......YWslRhcahhLLNcscPphY....................os+...halpDYLlLK+ShGpcLh+-DLluhLchllps.css+p....YhsLVcpNchllcALpLact.psc...tpllhDppVlohLLpoMlsp-cs....+LcuLYEsI-alh...poas.ccLTsshIlsILpsLAch+-WsKLhphW-shsss.s.sp.......DpRPWscFIpllspoGDsplhpKllssGHLLWI+RhsVs..l..oscLpsslctLhcpss...... 0 3 8 14 +13772 PF13930 Endonuclea_NS_2 DNA/RNA non-specific endonuclease Coggill P, Punta M mp13 Jackhmmer:D0LUR7 Domain \N 27.00 27.00 27.10 27.00 26.90 26.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.79 0.71 -4.12 50 1121 2012-10-05 18:28:12 2011-04-06 14:28:53 1 34 776 3 170 1036 12 132.40 26 35.41 CHANGED PNspYps....sGh....hY.pTDphGRlspspu..pLp..L.h.t..p......scR.NsapQsss..Gp..........s...th.t....Dc.GGHLIustFsGssc.h.cNLVshspp.lNc......GpahphEpcWAs..ulpps..+pV.plcIpPlYpGs.S..RPssFplp.YpIss ...............................................................................t.................hh..phDphsRhttsps.....ht......h........p.................ttR...t.s.h......t...pss......uhh.t......................t.....tp.......tt.h..................h....s....RuH..LI......u........hp..h...............u...............G........p..............ss.....p..c.....NLsstoph.lNp...................sshhh....aE....sp....lpp...........shcps.....ppV...pYcls.PlYp.Gs..p...Lhspthplp.h..................................... 0 44 92 135 +13773 PF13931 Microtub_bind Kinesin-associated microtubule-binding Eberhardt R re3 Jackhmmer:P46863 Family This domain binds to micotubules [1,2]. 25.00 25.00 25.60 25.60 24.90 24.90 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.08 0.71 -4.04 11 156 2011-04-08 14:37:49 2011-04-08 15:37:49 1 3 133 0 105 156 0 99.50 31 9.52 CHANGED -LKh.hPTGTTPpR+casYP+sLstTpP+-pIlcRa+pEQshsp.hshsssIsEhscE-ss....pslpulpt.clpsoEslhsspsh.-hs-ssppttth..hQ.ppstsKttpsps.ps.lcp.psp.sppppu..........ls+ShhP.+tss .....Lpp.hPTG.sTPp++pYtYPssLsRTcPH-pLlp.ph+...ppp..................................................................................................................................................................................................................................................... 0 23 39 72 +13774 PF13932 GIDA_assoc_3 GidA associated domain 3 Punta M mp13 Jackhmmer:D0LLL5 Domain The GidA associated domain 3 is a motif that has been identified at the C-terminus of protein GidA. It consists of 4 helices, the last three being rather short and forming small bundle at the top end of the first longer one. It is here named helical domain 3 because in GidA it is preceded by two other C-terminal helical domain (based on crystal structures [1,2]). GidA is an tRNA modification enzyme found in bacteria and mitochondrial. Based on mutational analysis this domain has been suggested to be implicated in binding of the D-stem of tRNA [2] and to be responsible for the interaction with protein MnmE [1]. Structures of GidA in complex with either tRNA or MnmE are missing. Reported to bind to Pfam family MnmE, Pfam:PF12631. 27.00 27.00 30.60 29.20 26.20 24.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.87 0.72 -9.16 0.72 -3.92 60 4262 2011-04-08 14:57:33 2011-04-08 15:57:33 1 15 4143 17 1037 3177 2191 71.90 46 11.44 CHANGED lKYpGYIcRQpppl.c+hc+hEshplPpclDYsplpuLSpEu+cKLscl+PpTlGQAuRIsGVoPADIolLll ...............lKYpGYIp+Qpppl.-+hc.+hEspplPtslD..Y..s..p..l..s..G..L..SsEsppKLpch+PtoIGQASRISGVoPAsISlLll................... 0 354 654 866 +13775 PF13933 HRXXH Putative peptidase family Bateman A agb Jackhmmer:Q5A0Y5 Domain This family of putative peptidases are closely related to the M35 family Pfam:PF02102. In this family the metal binding HEXXH motif is replaced with HRXXH. The exact function of these proteins is unknown. Members of this family are found to be fungal allergens. 21.40 21.40 21.40 21.40 21.30 21.10 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.70 0.70 -4.96 28 85 2012-10-03 04:41:15 2011-04-08 16:09:36 1 3 72 0 60 112 0 225.10 40 80.13 CHANGED hhlllhsusshAussss.............tspsssssssshsWsuuh.....................sssFP.IHpSCNuTppcQLpsGLpEshpLAsHA+-HlLRaGspSpha+KYFGs.........usou-slGha-pll...........................suDKushLFRCDDPDGNCs..ss.WAGHWRGpNATsETVICDLSYp..oR+.LsplCutGYTVusussNsaWAsDLLHRlaHlPslG.pGhl-Ha....A-sY--VLcLAcs.NuohAlRsS-oLpYFAlDVYAaDlAlPGhGCsGc .................................................h.hhh..............................t....ts..shshpt.sh.....................htsaP.IHpSC.Ns.T.ppp..LptuLp-shplAt+A+-Hl.......LcaG.scSth.a++YFGs.........usshpsl..G.ha-pls...........................susK....u.shLFRCDDs...Ds..sC......t.pss...aAGH.WRGpNAo.sETV..IC-lSYp..oR+sLsslCs.....tGaTVu.s.utss.paa..usDLL....HRhhHlP....slu..cuhVcHa....A-..sYp-llcLAps.Nsohus+socsLpYaAhDVYuaDlulPG.GCsGc............................................................... 0 13 30 53 +13776 PF13934 ELYS Nuclear pore complex assembly Wood V, Coggill P pcc Jackhmmer:O94384 Family ELYS (embryonic large molecule derived from yolk sac) is conserved from fungi such Aspergillus nidulans and Schizosaccharomyces pombe to human [1]. It is important for the assembly of the nuclear pore complex [2]. 27.00 27.00 27.30 27.00 26.00 26.80 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.63 0.70 -4.62 41 211 2011-04-11 16:17:10 2011-04-11 17:17:10 1 6 177 0 151 221 1 232.90 25 21.47 CHANGED spLalDcLLpt....h....ul..............................ps.spthYPPco...L+s.Lhchllssss.....st..p..pKpullhYlLhDh.....cpt..............................phsppFupphtlscphhhhlcuhWhLD+...h-...acpAlchL..scP....ulh.....ssassc....IlpsLhp...............p.tsss...lAhtahpsssPslsos.............pslphhhsh.LspsslsEAFtatRphss...cpt...................p.hhEpllpaslp.t....................p.s.pp.h..hc.LlsLPhsppEEphlpc.h.......Lc...psspp....h.......sc-sllh .......................................................................hLhlDtllpt....l.....sl.......................................ps.stthYPPpo...L+s.Lhphhh.....s.sh.......................sp....t.....tKpul...hhYlLhDh.......pts...............................p.hppFspshsls.phhhhhpGhWhLD+................t-.........................appAlchL.....scP.....uhh.........sphtsc....IlpsLhp........................p.tctp...hALtYhps..spPslsos...................................pslphhhsh...Ltp....s...slsEAahatRptss....ph.......................hcpLlphhhptpt.....................c.ttt.hpcLlpLPasspE-chlpc.hLp....sssth........spphlhh............................................ 0 41 75 118 +13777 PF13935 Ead_Ea22 Ead/Ea22-like protein Bateman A agb Jackhmmer:B2PIW3 Family This family contains phage proteins and bacterial proteins that are likely to represent integrated phage proteins. This family includes the Lambda phage Ea22 early protein as well as the Bacteriophage P22 Ead protein. 27.00 27.00 27.10 27.50 26.10 26.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -11.18 0.71 -3.85 41 775 2011-04-12 10:52:06 2011-04-12 11:52:06 1 2 384 0 12 500 1 137.90 35 66.99 CHANGED SpIsp..QALRE......sAcpAttu..h.......h.hh.t.t.t...h............................t...........................pspalst...h...s....PpslLALLDEhEp.......tpphlcthc.........s.cls.shsphpp.....cLcstcph...........................................................................................t.t..hp..hst.t.......RlA-L.E ............................................................ScIsY..QALRE......sAEp...Ahpupah.......h.hh.sppt.h...............................thhcp.hpth........................ssspFlshs...s....PssVLALLDEpE+................................sQphIcch-pc..............Np-lALs...lsKLRs.......-LEsscpph............................................................................................p.t....t................................................................................................................................................................ 0 1 1 5 +13778 PF13936 HTH_38 Helix-turn-helix domain Bateman A agb Jackhmmer:B2PKW2 Domain This helix-turn-helix domain is often found in transferases and is likely to be DNA-binding. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.94 0.72 -4.40 170 4579 2012-10-04 14:01:12 2011-04-12 12:55:48 1 55 1701 4 685 4174 318 42.50 36 15.56 CHANGED phppLohpERtp....Ip.th...h.p...pG.hShp...p.IAcpLs+ssSTIuREl+Rs ......................ppLThpp.Rhp...........Ip..th............h.p.......pG.hS.R...p.IActLG+u.sTIpREl+Rs........ 0 188 368 496 +13779 PF13937 DUF4212 Domain of unknown function (DUF4212) Punta M mp13 Jackhammer:D0LH74 Family This family includes several putative integral membrane proteins. 30.00 30.00 33.60 32.20 26.90 26.60 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.21 0.72 -9.63 0.72 -3.98 71 560 2011-04-12 12:23:16 2011-04-12 13:23:16 1 4 541 0 211 473 1169 80.10 43 74.54 CHANGED pptptYW+pNl+LlhhLLsIWFlVSFG..hsll..hscsLsplp..l..hGasLGFWhApQGuIhlFllLIahYuhtMs+LD+caslcE ........c.ttpsYW+pNlp.llhsLLslWFl.VSFG...suIL...h........s........cs.......L......sp.lp...h......uGa.sLGFWFAQQGSIhsFlsLIFlYshpMsp.LD+caslpc................. 0 53 136 183 +13780 PF13938 DUF4213 Domain of unknown function (DUF4213) Eberhardt R re3 COG2014 Family This domain of unknown function has an enolase N-terminal domain-like fold. Its genomic context suggests that it may have a role in anaerobic vitamin B12 biosynthesis. This domain is often found at the N-terminus of proteins containing DUF364, Pfam:PF04016. 23.50 23.50 23.80 27.90 22.60 23.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.83 0.72 -3.85 59 215 2011-04-12 12:28:01 2011-04-12 13:28:01 1 4 184 6 124 217 9 86.60 23 32.70 CHANGED lh-tlhpt....hp.shplcclslGlt..aTsVphspt.thGlA..hT.hpsh............h..h.hsup..ltGpsspclhphhh..uhs.slcpulGlAslNAlhp ....................hctlhpt....hp..shplpchshGht..aohVpss..s..t...uhGlu..hThhppt.................th....h.hssp..lt.Gpslc..clsphht..Shs..lctolGlAslNAl..... 0 51 88 108 +13781 PF13939 TisB_toxin Toxin TisB, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:A5A627 Family TisB (toxicity-induced by SOS B) is an SOS-induced toxic peptide. It is a hydrophobic membrane-spanning protein which inhibits cell growth [1]. Its expression is inhibited by the antisense RNA IstR-1, which acts as an antitoxin [2]. 27.00 27.00 55.50 55.40 14.00 13.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.61 0.72 -7.09 0.72 -4.19 2 56 2011-04-12 12:59:00 2011-04-12 13:59:00 1 1 56 0 3 12 0 28.00 89 94.00 CHANGED MushDIhILILKLhVAsLQLLDAVLK.h MSLVDIsILILKLIVAALQLLDAVLKYL. 0 1 1 3 +13782 PF13940 Ldr_toxin Toxin Ldr, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:E8XFD8 Family This family includes the Ldr (long direct repeat) toxins. In Escherichia coli there are four Ldr toxins, LdrA, LdrB, LdrC and LdrD. These toxins inhibit cell growth, decrease cell viability and cause nucleoid condensation. LdrD expression is inhibited by the antisense RNA RdlD, which functions as an antitoxin [1]. 27.00 27.00 39.60 39.30 22.30 21.90 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.68 0.72 -4.18 5 958 2011-04-12 13:02:29 2011-04-12 14:02:29 1 1 369 0 11 214 1 34.90 74 70.82 CHANGED MTLsQLGlAFWHDLAAPlIAGIIAulIVNWLRcRK ..MTLApLuMsFWHDLAAPllAGIlsuhIVsWhppRK.... 0 3 3 5 +13783 PF13941 MutL MutL protein Bateman A agb Jackhmmer:B2PG71 Family This small family includes, GlmL/MutL from Clostridium tetanomorphum and Clostridium cochlearium. GlmL is located between the genes for the two subunits, epsilon (GlmE) and sigma (GlmS), of the coenzyme-B12-dependent glutamate mutase (methylaspartate mutase), the first enzyme in a pathway of glutamate fermentation. Members shows significant sequence similarity to the hydantoinase branch of the hydantoinase/oxoprolinase family [2]. 27.00 27.00 27.00 29.30 26.90 25.60 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.20 0.70 -6.16 38 371 2012-10-02 23:34:14 2011-04-12 14:12:58 1 3 341 0 69 264 97 432.00 39 94.36 CHANGED slLssDlGSThT+sshh-...ssphcllupupuPTTV..t.tDVshGlppAlpclpcph....................shts..hshsphhuoSSAAGGL+MslhGLV.chTscAA+cAALuAGAplhpshuh.................clscpclcclpplpPDlILLuGGsDGGspcsllpNAchlu.ph........shshP......lIYAGNpsspcclpcILt..tsptslhls-NVhPcl-plNlpPsRcsIpclFhc+Ih.cA.Ghsc.lpchs......stslhPTPuAVhpsschluc.......thGsllslDlGGATTDlaSls....cst.t..thhhp...-PhsKRTVEGDLGhthustsllcthu...ttlhchhs.....tth.ptlpphhtpsshlPp.sc-Ethh-ptLAptAlphAlcRHsGphpplhsshtphh..ht.G+DLs........plchllGoGGsLs+usp..spplLpthhps.tt....lhPspp.....sclhlDppYlhuuhGlLup.htP-sAhplhccslth ...................................................................p.hlsl-IGSThTKsshFp...ss..thp...........hlupuhsPTTl..t...tclthGhpsAlspltpts...................s........shsphhssSSAuGGL+hsshGLs.shTscAA+cAAhuAGAplttshuh...................................pls.cpDlpp.l.p.cspP-IlLl..sGGsD.s...G-cchslpNA.+hlA.pu...............pL.s.ss...............llYAGNpclpccVpcIhs.....t.tpslshl-NVhPclDhlsltss.RpsIp-lF.c+Is.pu.Ghcs.l.h-hs......s..cplhPTPtAVhpsschlup.......uhs-hlllDlGGATTDVaSss...........tusst.sshllc....PEPhsKRTVEGDLGhhVsAhslsctssc......h....ht.................t.hcchhtp.chlPt..spcEt.hhsphLAt.sVthAspRHAGshpplhTssGps........lthG+DLo........sV+hVlGoGGhLo+sst...htclL+hhph..ssssp.....lL.ssc...................sphhlDpcYlhuulGslAp.s.PptAh+hhtphh..h....................................................................................................... 1 39 56 65 +13784 PF13942 Lipoprotein_20 YfhG lipoprotein Bateman A agb Jackhmmer:B2PEB8 Family This family includes the YfhG protein from E. coli Swiss:P0AD44. Members of this family have an N-terminal lipoprotein attachment site. The members of this family are functionally uncharacterized. 27.00 27.00 56.70 30.30 25.20 21.80 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -11.06 0.71 -4.74 26 543 2011-04-12 15:31:32 2011-04-12 16:31:32 1 2 534 0 41 227 3 177.70 71 74.12 CHANGED lPcpplsDYRhssCcslWslpstsu.sNsLYWLRshDCA-RLssspARspA+pls...sssWpsuF+puILLssAchTssERRphlspLssaShphPsslRPLhQLWR-pQsLplsLuEERtRYp+LQpsoDucLDpLRppps+LptpLpsToRKLENLTDIERQLSoRKpst....sphs-sscss .....l.PhYQLADYLoTcCuDIWuLpGKuTETNPLYWLRAMDCADRLMPAQSRpQARpas...DsoWQNsFKQGILLADAKITPhERRQlVuRl-ALSspIPAQVRPLYQLWRDGQALQLQLAEERQRYSKLQQoSDSELDTLRQQppsLQpQLELTTRKLENLTDIERQLSTRKPAGNasPDTPHps........... 0 1 9 24 +13785 PF13943 WPP WPP domain Woodcraft BJ agb Woodcraft BJ Domain \N 20.10 20.10 22.40 24.20 18.60 17.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.24 0.72 -3.80 25 109 2011-04-14 10:02:01 2011-04-14 11:02:01 1 14 29 0 65 111 2 95.70 43 27.43 CHANGED Sl+lWPPoQpTRchlVcRhscsLoo..sSIlS++..YGsLsc-EApcsA+pIE-tAFusAsp.....thssps-sDGhpslQlYuKEsSKhMLEslKptspspssspss ..............ShplWPPoppTRphlVcRhspsLos..sSlho++..YGslsppEAccsA+tIE-tAFusAsp......thpt..psssDGhpslQlYuKEsS+hhLEhlKptststtt...ss................................ 2 10 39 50 +13786 PF13944 Lipocalin_6 Lipocalin-like domain Coggill P pcc Jackhmmer:JCSG_Target393211_GS13544C Domain \N 27.00 27.00 27.40 27.10 26.80 26.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.86 0.71 -4.00 21 111 2012-10-03 08:47:39 2011-04-14 14:22:19 1 6 70 1 13 99 0 127.70 22 39.62 CHANGED clAGsYpGshslolss...sssshsos....ps...pplpIspss-sTlplplsshs.hth...G..lslGchsVsssslcpsssuhhp.hs.ssshp........scspss...............shss.olsGTl.psGshslshshc....s.Gu...hshslpssas .............plsGsYpGshs.lslss....ssssht.s....hspplplsp.sscss...lplpLpsFsh........G...hplG-lsls.slsVppssss....thh..hs.tpshp.......hpspss...............shss.olsG..Tl..psuphshsls...lp.....sss.......hshslpssa..................................................................................... 0 7 10 13 +13787 PF13945 NST1 Salt tolerance down-regulator Wood V, Coggill P pcc Jackhmmer:Q09863 Family NST1 is a family of proteins that seem to be involved, directly or indirectly, in the salt sensitivity of some cellular functions in yeast. It does this without affecting sodium accumulation. It negatively affects salt-tolerance through an interaction with the splicing factor Msl1p. This interaction stresses the importance of efficient RNA processing under salt stress conditions [1]. 20.00 20.00 20.60 21.70 19.70 19.30 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.67 0.71 -11.31 0.71 -4.22 7 142 2011-04-14 14:53:59 2011-04-14 15:53:59 1 5 122 0 112 155 0 154.10 34 13.95 CHANGED hDDpsp..hpspupp.ppphs...ts.hps.ssoKpKKKKKK+p+.upusShpssspps...s.STsps....ts..sLs...........psuh+.hpccc.IWsTSopEEREpI+EFWLpLuE-ERRSLVKVEKEAVL+KMKEQQKHSCSCoVCGRKRsAIEEELEVLYDAYYEELEQYANh.Qsshpsusslssssp ......................................................................spt........................................tttptppt.c...........t.....tt......t......t.....s..t.t............................t............................s..s.........................ps.t...hsppp..IW.s..oSop.E.....ERE......pI+cFWLsLuE-ER+sLVKlEK-uVL+KM.KE......Q....Q..+...............H...........o...........CSCoVCGRK.Rs.....AIEEELEsLYDuYY-.ELE.paApt.................t................. 0 35 64 99 +13788 PF13946 DUF4214 Domain of unknown function (DUF4214) Coggill P pcc Pfam-B_781 (release 24.0) Domain This domain is found on a variety of different proteins including transferases, and allergen V5/Tpx-1 related proteins. 23.00 23.00 23.00 23.00 22.70 22.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.25 0.72 -4.29 15 574 2011-04-15 12:34:27 2011-04-15 13:34:27 1 75 127 0 119 497 104 67.10 22 14.25 CHANGED VApoFluSsEFpp+YGs.slsscsaVssLYpNlLGRcuDtuGhsYWsupLssGsETRtElLLGFu-SsENcuLFoE .......................................................pppal...p..plYpsll..sR...s..s..DtpGhpa.W.......h....s.......pL..p.....sG.t......o.......ht...phhh...th...h.pStEh.t....t.................................... 0 33 86 99 +13789 PF13947 GUB_WAK_bind Wall-associated receptor kinase galacturonan-binding Coggill P pcc Jachmmer:Q8H7G7 Domain This cysteine-rich GUB_WAK_bind domain is the extracellular part of this serine/threonine kinase that binds to the cell-wall pectins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.98 0.72 -3.48 243 1199 2011-04-15 14:01:02 2011-04-15 15:01:02 1 68 22 0 725 1166 0 107.00 22 20.37 CHANGED hssC.s....s..p..C..Gs..l..s.IPYPFGI........G..s...s.....Chh......tuFp..lsC....ss...o.......s.st..lh.............tsshcVhs.IS.lt....p..........uplp....lhs.....sl....stpChs.sss.s...ph.......t.....sh..shst........s...........ahhSs.spNp.hssl.GCss ....................h..sC..s..p...p...C..G..s........l..s..IsYPF....G.l................s....s....s...........Csh............sFp...lsC....sss.........ts.st.lhh...............................ssshpV.hs....Is.....ht.....s....................sp...lp.lhs..........sh.....tt.shs..ssst............................hshsh.............s...................hh.h....s.t.p...N......h.h.hh.uCt.................................................................. 0 34 326 531 +13790 PF13948 DUF4215 Domain of unknown function (DUF4215) Punta M mp13 Jackhammer:D0LZ65 Family The function of this family is unknown. 27.00 21.50 27.00 21.50 26.90 21.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.69 0.72 -9.30 0.72 -3.74 207 1195 2011-04-15 14:50:47 2011-04-15 15:50:47 1 70 35 0 1101 1185 279 46.50 39 15.77 CHANGED ChpC...p..p..G..ap.h..s......pptC........hshC....GDslls.......sp.....EpCD.Ds.......Ntht..h.D.....GC....hp......Cp .......................................C...........G..a..h...............tt...C..............hshCGDGlls...........st......EpCD..DG............Nphs..t.D......GC......p.....Cp............................ 0 1023 1028 1080 +13791 PF13949 ALIX_LYPXL_bnd ALIX V-shaped domain binding to HIV Coggill P pcc [1] Domain The binding of the LYPxL motif of late HIV p6Gag and EIAV p9Gag to this domain is necessary for viral budding.This domain is generally central between an N-terminal Bro1 domain, Pfam:PF03097 and a C-terminal proline-rich domain. The retroviruses thus used this domain to hijack the ESCRT system of the cell. 27.90 27.90 28.00 28.80 27.80 27.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.74 0.70 -5.45 97 602 2011-04-15 15:01:10 2011-04-15 16:01:10 1 17 285 9 420 611 2 282.60 22 30.37 CHANGED lc.s.s.......slPt..slh..c.csppl...p.pp.su.........lpplppthpplp..plpppspphlscsp.chLctEppp-pphRppa.sp..pWs+......sSsph...ss..plppclpchcphlpp.AspoDsplppphcp..hppsl..plLststpp.................................l.pshl.Ps.s.p...t...........................ts....p.s.........pp....tltpL+p.hlpclpplcpcRpp..hhp.cL+.p...th.p..sD.hhsplL..t...hsp......ptt...........s.p.h..c.tlFppcLc+.acshpppl.ppshppQppllpplpps..ppph......h...p.t.+.......ps..ptps.......ppRpphl....pplp.sua.ptap-lhsslpcGpcFYssLtphlp.phpppspsastsR+tEt ....................................................................................................tlP..slh...pchttlp...pp..ss..............hppltp.hppltphhppspphLpc...........st...chLptEptp-pphRtpa.sp...pWpp.........................sSsph.....sp...plpp..c...hpcacphlpp.Asp.....uDsplppphpp..pp...ssl.....plLsts.pp..................................................l.ps..tl.Ps.s.p....t.................................ts...ps.......pt............ltpL+p.hlpclppl...cpcRpp..l.ppL+.p...th.p.pD..hsp.hL.h...h.tp..tst.....................s.p.....c.tlFppcLc+.aps.hpp.tl.ppshppQppllpplpps..tpph..........h...p..+.............ps..ptpt.......ppRppsl.............................ppLt.sua.ptap-lhsslp..cGpc.FYscLtphls.phpppspshs.sRctE.................................... 0 137 221 346 +13792 PF13950 Epimerase_Csub UDP-glucose 4-epimerase C-term subunit Coggill P pcc Jackhmmer:Q3EAY3 Domain This domain is the very C-terminal subunit of UDP-glucose 4-epimerase. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.17 0.72 -4.03 308 5379 2011-04-15 15:23:58 2011-04-15 16:23:58 1 13 3888 48 1359 4059 1160 61.00 38 18.14 CHANGED AFE+AsG+plPYclssRRsGDl...ApsaADss+Apc-LGWcAp+.sL--MCpDsW+WQspNPsGY .....................shc+soG+slP.h.c..l.s.s..R..RsGDh...ushhAss..sKAcccLGWc.sph.sl.-c.......hhpc..u..Wp.W....p..p...ppPpGa............ 0 444 849 1142 +13794 PF13952 DUF4216 Domain of unknown function (DUF4216) Coggill P pcc Jackhmmer:Q9LI66 Domain This DUF is sometimes found at the C-terminal end of proteins carrying a Transposase_21 domain, Pfam:PF02992. 27.00 27.00 27.50 27.60 26.60 26.70 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.46 0.72 -4.50 55 725 2011-04-15 16:13:17 2011-04-15 17:13:17 1 77 16 0 338 760 0 76.00 44 8.30 CHANGED lclpa....c.sttph......p.lsLF+CcWhcs.p..spsl+hD.chG.hspVshsphhh.....cc.-PFlLAsQssQVaYltcPp........ppsWplVh .....................................a..-.sYssh..+VPLF+CpWVcl....p...ssGVhlD.-tG.hTsVDLsKlGY.........pDEPFVLAspVsQVFYVcD.s.............h....h.............. 0 0 50 68 +13795 PF13953 PapC_C PapC C-terminal domain Bateman A agb Bateman A Domain The PapC C-terminal domain is a structural domain found at the C-terminus of the E. coli PapC protein. Pili are assembled using the chaperone usher system. In E.coli this is composed of the chaperone PapD and the usher PapC. This domain represents the C-terminal domain from PapC and its homologues. This domain has a beta-sandwich structure similar to the plug domain of PapC [2]. 27.00 27.00 27.00 27.10 26.80 26.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.26 0.72 -4.33 301 5978 2011-04-18 16:06:12 2011-04-18 17:06:12 1 11 820 10 369 3683 29 67.20 28 8.66 CHANGED hslp.tsGp.slPFGAp...V.............p.......s....p.........p.s..p........p...s.....G.lVGpsGplY..lsGls..s.pupL.pVpW..Gss...ppCplsas.....lstppp ..........lp.tsGp.slPFGAp......V.............p........s..p...........p.s..p............s..s.....G.lVu-sGplY..LsG.ls..t..supl.pVpW..Gpp....stpCplsap..hs....p............ 0 32 99 245 +13796 PF13954 PapC_N PapC N-terminal domain Bateman A agb Bateman A Domain The PapC N-terminal domain is a structural domain found at the N-terminus of the E. coli PapC protein. Pili are assembled using the chaperone usher system. In E.coli this is composed of the chaperone PapD and the usher PapC. This domain represents the N-terminal domain from PapC and its homologues. This domain is involved in substrate binding [2]. 27.00 27.00 27.60 27.40 26.90 26.20 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.90 0.71 -4.63 34 5788 2011-04-18 16:06:36 2011-04-18 17:06:36 1 9 775 12 327 3635 16 143.60 29 18.18 CHANGED hpFssshLpss.....tss.DlShFpps.tthhPGpYhVDlhlNsphhs..ptplpFpstsst........h.sCLop-hLpphGlphcths...tht.ptsp.........tChs..hstlPtsshpaDhup.tpLslslPQshLt.pscshhsPppWDpGIsAhhlsYshsus .........................FssphLpss......tpsh...D...L.S...p.Fpps...sth.PGpYp.VDlhlNsphhs..ppsl.pF.h.ssssp......................thhsClo...p.Lp.p.hGl..p...s.ss.h.s....t...ht...ttss..............................tCls....hph...l.ss.ushp..aD.h.up..pLslol.PQshlp.p.p.s.c.s..alsPppWDcGIs.AhhlsYshsu............................... 0 24 83 222 +13797 PF13955 Fst_toxin Toxin Fst, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:Q9RLG7 Domain Fst (faecalis plasmid stabilization toxin), also known as RNA I, is a toxic peptide. Its N-terminus forms a transmembrane alpha helix, its C terminus is disordered and is likely to be cytosolic. Its translation is inhibited by the antisense RNA, RNA II, which acts as an antitoxin [1,2]. 36.00 36.00 36.20 36.30 31.90 35.70 hmmbuild -o /dev/null HMM SEED 21 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.50 0.72 -6.41 0.72 -4.17 13 45 2011-04-19 12:12:39 2011-04-19 13:12:39 1 1 41 1 5 25 0 21.00 62 62.92 CHANGED FspIIAPllVGlllpLlc+WL .hpTIIuPIVVGVVLcllD+WL 0 0 1 2 +13798 PF13956 Ibs_toxin Toxin Ibs, type I toxin-antitoxin system Eberhardt R re3 Jackhmmer:C1P607 Family The Ibs (induction brings stasis) proteins are a family of toxic peptides. Their expression is inhibited by the Sib antisense RNAs, which act as antitoxins [1]. 27.00 27.00 33.30 33.00 22.50 22.40 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.06 0.72 -6.46 0.72 -4.07 4 33 2011-04-19 12:16:39 2011-04-19 13:16:39 1 1 15 0 5 21 0 18.80 77 64.41 CHANGED MMKLVIILVVLLlISFsAY MMKlVIILlVLLLISFsAY 0 5 5 5 +13799 PF13957 YafO_toxin Toxin YafO, type II toxin-antitoxin system Eberhardt R re3 Jackhmmer:Q47157 Family YafO is a toxin which inhibits protein synthesis. It acts as a ribosome-dependent mRNA interferase. It forms part of a type II toxin-antitoxin system, where the YafN protein acts as an antitoxin [1,2]. This domain forms complexes with yafN antitoxins containing Pfam:PF02604. 25.00 25.00 28.40 28.20 22.70 20.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.48 0.72 -4.24 20 200 2011-04-19 12:19:10 2011-04-19 13:19:10 1 1 179 0 23 81 1 104.50 53 80.37 CHANGED athacpsGs.hs...shhG+Dshaccs...sphh.sclpHlHlt..............t.pssW......h.phhQhpRTSD.saLVYs.thhssppahlluIl.cPsAHcps.......cppsh.hspltchAEpFppph ...........FISYKRDGV.LP...DIFGRDALYDDSF.TWPLIKFERVAHIHLA..................NsNNPFP..........PQLRQFSRTNDtuHLVYC..Q.GAF...DEQAWLLIAIL.KPEPHKLA........RDNNQ.MHKIGKMAEAFRMRF.............. 0 7 13 17 +13800 PF13958 ToxN_toxin Toxin ToxN, type III toxin-antitoxin system Eberhardt R re3 Jackhmmer:B8X8Z0 Family ToxN acts as a toxin, it is part of a type III toxin-antitoxin system. It acts as a ribosome independent endoribonuclease. It interacts with, and is inhibited by, the RNA antitoxin, ToxI [1,2]. Three ToxN monomers bind to three ToxI monomers to create a trimeric ToxN-ToxI complex [2]. 25.00 25.00 25.40 25.40 24.60 24.60 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.03 0.71 -4.57 31 151 2011-04-19 12:21:31 2011-04-19 13:21:31 1 2 115 10 9 140 2 147.30 27 88.43 CHANGED hclYpIsscYlsYL+ch.....Ds.+Vh.st...ppppRsalGlllplsshcYasPLSSPKpK+cp....................hcppl......shhKIpp.......phLGllplNNMIPV.-sp..lphlDlp......p.p-p+....Y+pLL.cphpalpp..spccItc+.AcplYpphhppt.............tlpptsssFplLEctsppY ......................phYplspcYlphLpph.....Dp......+V..s......tpp.+salGlll.p.l.s....shpYhsP.loSsKp.K+cp.....................................hp..sph......shhKl.p...........phluslplspMIPVscst...hptlshp..........p.pD..+....Yp..pLltpphpalpp......ppppI.hpp.upplYpphhppt.............hhpphsssFthLEchh.t............... 0 3 5 6 +13801 PF13959 DUF4217 Domain of unknown function (DUF4217) Coggill P pcc Jackhmmer:Q1KS87 Domain This short domain is found at the C-terminus of many helicase proteins. 27.00 27.00 27.30 27.50 26.60 26.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -9.03 0.72 -4.43 193 1332 2011-04-19 13:16:38 2011-04-19 14:16:38 1 18 316 0 937 1323 22 64.00 30 9.61 CHANGED slppp.........l........cp.hlhpc.pltphAp+.........AFhSalRuYtpH..pt+.......pIF.plppL..clspl...AcuaGLhpsPphp..t .......................lptplcp..hlhp.shtlpp..tAp+.........AFhS..........al+uYspH..ph+...............plF.pl..p..pL..cLscl.............A.pSaGLhpsP+h.................. 0 329 530 778 +13802 PF13960 DUF4218 Domain of unknown function (DUF4218) Coggill P pcc Jackhmmer:Q9M233 Domain \N 27.00 27.00 27.20 27.30 26.80 26.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.50 0.71 -4.70 38 847 2011-04-19 13:36:45 2011-04-19 14:36:45 1 90 37 0 424 907 0 109.80 46 12.19 CHANGED tsIscLshFFptlspKVl-sppLcpLpspll.sL..CpLEhhFPPSFFDlMhHLhlHLVcEs+lsGPlahR.MaPaERaMthLKuYVRNRA+PEGSIsEuYhsEEslEF....CscYhp-ss.sluh....sss+pcsph ................................................t.sl.cls.F.hptlsp.KllcsppLptLpspllpsL..s.p.h..EhhFPPoFFslMhHLh....sH...LVcp.hplhGPhah+pMaP.aE.RY.Mu.lLKpYVRNRA+.PEuSIscu.YsTEEslE....F....Cs-alp-hp.sIGlPtsR+-spl............................................ 0 21 99 119 +13803 PF13961 DUF4219 Domain of unknown function (DUF4219) Coggill P pcc Jackhmmer:Q9C740 Domain This domain is very short and is found at the N-terminal of many Gag-pol polyprotein and related proteins. There is a highly conserved YxxWxxxM sequence motif. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 27 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.11 0.72 -7.21 0.72 -4.46 162 657 2012-10-02 13:37:57 2011-04-19 14:55:30 1 109 61 0 341 681 0 27.50 37 3.68 CHANGED hsu.sNYshWsh+McshLpupc..lW.csV- ....hsG.sNYstWsh+MpsaLpupc.....lW.csVp....... 0 83 169 206 +13804 PF13962 PGG Domain of unknown function Coggill P pcc Jackhmmer:B9DG91 Domain The PGG domain is named for the highly conserved sequence motif found at the startt of the domain. The function is not known. 23.00 23.00 23.10 23.80 22.80 22.80 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.62 0.71 -4.46 259 1615 2011-04-19 16:12:26 2011-04-19 17:12:26 1 118 27 0 955 1566 0 108.80 26 29.59 CHANGED pchhpctpphlhllAsLlATVTasAuhssPGGa..ps...................sGpslL...sph.tapsFhh.sNshAFssSlsslll..Ll....th................hpth..hhh..hhhhhhhlulh.ulhsAasuGshhs ....................p...hpctppslhllAsLlATVoa.sAuhssPGGh.htss.................................p.....p.sGpsllt....p........p...ta..tsF....hh.ssshAh....hsSlsslll..ll..t.th.........................pth...h..hhhhhhhhhhhshh.uhhsAahsu.hh............................................................................. 0 48 486 751 +13805 PF13963 Transpos_assoc Transposase-associated domain Coggill P pcc Jackhmmer:B3H612 Domain \N 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.73 0.72 -3.99 127 878 2011-04-20 16:20:45 2011-04-20 17:20:45 1 73 23 0 549 900 0 68.40 41 9.37 CHANGED RsWM..........thhop-ahcGlppFlphAhsphtppp.....pl.hCPCpcCpNthh...psp.pplppHLh.ppGF..hpsYph...WhpHGE ................................pWMY..........pRhop-ahcGVp.FlphApsptppt......hl.hCPCp.cC+Nph.....pss..cpl+tHLh.ppG.Fh.sYss...Ws.pHGE........... 0 1 262 285 +13806 PF13964 Kelch_6 Kelch motif Coggill P pcc Jackhmmer:Q9CAE9 Repeat \N 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.23 0.73 -8.85 0.73 -3.57 179 1811 2012-10-05 17:30:43 2011-04-20 17:50:05 1 393 387 0 1125 11207 344 52.00 22 10.77 CHANGED hpt..ht.sh..ss..ss...s..t..l.alh..GGt.sp................sphhh.as.....p..o..p..p..ap.p...l..s.....s..h.....t....s...p ............h....tt.us...ss...hs.......s...c.....l.Ylh...GGh.ss................................thhsslht.ac....sp....o...p.....p.....Wp.p.....l...s......s...h............................. 0 462 599 827 +13807 PF13965 SID-1_RNA_chan dsRNA-gated channel SID-1 Eberhardt R re3 Jackhmmer:Q9GZC8 Family This is a family of proteins that are transmembrane dsRNA-gated channels. They passively transport dsRNA into cells and do not act as ATP-dependent pumps [1]. They are required for systemic RNA interference [2,3]. 28.70 28.70 29.10 29.10 27.90 28.60 hmmbuild -o /dev/null HMM SEED 570 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.15 0.70 -13.15 0.70 -6.34 8 320 2011-04-21 14:52:42 2011-04-21 15:52:42 1 5 81 0 179 283 11 382.50 32 77.38 CHANGED SEPhaaahp.......shsp.sssoVhlclsS-Dsl.ChsVSlQsusCPVaDhspslp.sapGhapTho++uuIhlpKp-as..ttFhVFsVlps-DssCopps......pp.ps....sNcsK....oIoFplsh.hpshp...YtsholshlhhlhohhL..lahs.lshplhpspppht..............ptslluhpPs.sop.p...........p.hsh....tpcp.sspssshcEpc....hsspph..pllhs+tsLpVuDLu+csp..phlKp+ShsYhapllslulFYsLPVlQ.LVlTaQ+hlspT..GcpDhCYYNFhCA+PL....hhlSsFNplhSNlGYllhGlLFlllshpRchpa..........sppaGlPpHaGLaYAMGlALIMEGlhSAsYHlCPNpsNFQFDTSFMYVIssLsMlKlYQsRHPDlsuoAYssFulLuV.slLsulIGlhs+ss.hlallFslIallsshhlSh...............phYahGshKl............................csRhhl.Ls..........................h.slsNauhhshGL....ph+...stDFusalLhlFhsNslLYlsaYhlMKl........lspE+IshpAllhhhLAllsWssAuaFFhpcsosWTcoPApSRphNp.CllLcFYDsHDlWHhhSAlAlaFTFhhLhslDDDLhsshRssIsVF ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.....h...................s.s.......hhhh.G..h.hh....p.....................................sh.........hhh.hh..uh.us.hp.CP.s..shp.a........hh.h..hh..a..R.............p...............h.....h.......h.................h......................hh.h.h....h...h..h..hh....h.hs...........h.h....t.........................................................................................h.h...................................................h..hhsh.hhh.......h............t.....th....hhh.hh..h....phhhYhhhYhh.Kh.........................h.t.c....p..h........sh....h....hhhh..h......hh......u......hh..a........t.....th..............ssA.SR.hNt.C.h.th.h....D.+Dh.hHh.suhshhh..h.h.......................................................................................................................................................................................... 0 78 96 140 +13808 PF13966 zf-RVT zinc-binding in reverse transcriptase Coggill P pcc Jackhmmer:P93281 Domain This domain would appear to be a zinc-binding region of a putative reverse transcriptase. 27.50 27.50 27.50 27.50 27.40 27.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.14 0.72 -3.67 117 1664 2011-04-21 16:16:41 2011-04-21 17:16:41 1 319 46 0 590 1605 0 79.10 24 12.48 CHANGED pFSs+ssYph.l.pssss.ts...ap.pl......Whspsss.KhsFFsWhAshs+lhTtDpLp+...+.s.h..t..hs..spCh.LC.p.p.ppEohsHLhhpCshsp.tlW .........................................ha...................................t..............t.ph..................Wp.t..p..s.P..K.lp.hF..h..W..h..h.hps.+..l........TtspLtc...+..s...h...t...hs......sh..C....h...C..t.t.tt.....EshpHlhh.pCshsttlW........................................... 0 51 342 453 +13809 PF13967 RSN1_TM Late exocytosis, associated with Golgi transport Coggill P pcc Jackhmmer:Q39074 Family This family represents the first three transmembrane regions of 11-TM proteins involved in vesicle transport. In S. cerevisiae these proteins are members of the yeast facilitator superfamily and are integral membrane proteins localised to the cell periphery, in particular to the bud-neck region. The distribution is consistent with a role in late exocytosis which is in agreement with the proteins' ability to substitute for the function of Sro7p, required for the sorting of the protein Enap1 into Golgi-derived vesicles destined for the cell surface [1]. 27.00 27.00 27.10 27.20 26.90 26.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.20 0.71 -4.67 177 1182 2011-04-26 11:55:49 2011-04-26 12:55:49 1 36 266 0 844 1140 1 153.80 22 17.75 CHANGED slhsslshshsluhhhhhhFslL..Rhp.....hpclYt.PR....phht...........pppss....tuh........auWlsshhchs-ppllppsGLDAhlFLRah+hs.hplhhshsllshslLhPlN.......ss......ssttt.............................th.shhohuNl......psps...........thasallhsala....sh.hshahlhcE ................................................................................hhsshhhshhhhhhhhhhF........hL..R.p....................plYt.s+..........hhh..............................tptt.......puh...................................h.uWlhsh..hc.....h..s..-..ppllppsGlDAhhaLRah+h...........h.ltlhhshslls.....hslLlPlN................hs.............usthp.............................................................psh..spho.huNl.....psss.............................hhWsHslhualhhh.hshahlhp..................................................... 0 239 465 693 +13810 PF13968 DUF4220 Domain of unknown function (DUF4220) Coggill P pcc Jackhmmer:Q9FHJ0 Family This family is found in plants and is often associated with DUF294, Pfam:PF04578. 27.00 27.00 27.40 27.30 25.10 26.90 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.24 0.70 -5.35 29 828 2011-04-26 13:23:38 2011-04-26 14:23:38 1 24 16 0 517 735 0 247.10 18 46.39 CHANGED LWlAYhhAD.lAsasLGhLohst.............sst.....pppLhsFWAPFLLLHLGG.DTITAa..ShEDNpLWhRHlLsLssQshhu...hYVhh+.ph...s..u..s.......lhsshlLhFlsGllKYuERs.hA...LptAuh.................pphtsphhp....tscssss.h....s.h......hhcph.t...h.p.tth.sph...hhh.t....................................pphsptphlhtAashhp.hhp.thas.s.hhh....s.........hpcp.ppst..th...hpph.......pt.........h.ppua+llElELuhhYDhLYTKsslla.............o.hhGhh....h....Rh........lolhsss.sAhhL......Fth............t.....spt........th.p..as....psDlhlTYlLLuGAllLElhullh.hlhSsWshshLpp...p......hh..hhs...............hhtthhphlph.................h...pp.R..WSsphuQYNLlptC.hcccs ...........................................................................................................................................................................................................................................hWs..h............h.s......t........................h.........h...hh.h........hh.hh.....h..............................................h....h.hh.h.shh.+..hh.+..h........h..st........................................................t..................................................................................................................................................................................................................................................h....t...................................................h...h.................................................................hh..plh...p.plsh.hh-hha.oth.hh..........................................h..h..h....h.................h...........h.hh...........h..........................................................................................t..h...l..s...hhhh.......hhh.-h....h....h....h.s...h...............................................................................................................................................................t............................................................................................................. 0 6 185 357 +13811 PF13969 Pab87_oct Pab87 octamerisation domain Punta M mp13 pdb:2qmi Domain This domain was first characterised as the C-terminal domain of Pab87 serine protease from Pyrococcus abyssi [1]. The domain is reported to play a crucial role in Pab87 octamerisation and active site compartmentalisation. Its up-and-down 8-stranded beta-barrel 3D structure is reminiscent of the one found in lipocalins. 27.00 27.00 41.60 40.40 24.80 23.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.23 0.72 -3.90 6 15 2011-04-26 15:55:18 2011-04-26 16:55:18 1 2 14 8 11 15 0 95.70 34 22.07 CHANGED Pc.ELPFhhREclL++LEGhYcGY+GTlcapVKscGDhLhL+shGth.pho.hsLhPE-lc........EDal+haTh..GtKh.VEFp......IcsscVcllaERY+LlK .....Pc.ELPhlhhEchLc+LEGpYcuY+GTlchsVcscGDhLhl+utuchtpto.hsLhP--lc........-DascahTht.sGt+hsVEFp......hcsscVcLlhERa+LlK. 0 4 6 7 +13812 PF13970 DUF4221 Domain of unknown function (DUF4221) Coggill P pcc JCSG_target-390208:A6KZ57 Family This family of bacterial proteins contains highly conserved asparagine and cysteine residues.\ The function is not known. 27.00 27.00 27.00 27.90 26.90 23.80 hmmbuild -o /dev/null HMM SEED 333 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.12 25 101 2011-04-27 09:58:51 2011-04-27 10:58:51 1 1 42 1 34 143 0 317.50 18 85.45 CHANGED lhhlhll..hSCusp......pcpp.t.................pshsLh..ps...cslsaslD-psh..........hshslt.h..p...........psscchLh.Fhs..ptp...plphhDLcst.cllcpI.hccE.GPNGIspsh..uhh..s.Dshalh.ss..th.plhhhsppGphhpphsh...pssp..........pht.tphst..hh.....ssh.th.p-shlahs.h.......httc.hpphhs.hAhlDlpspplchhsl.sh.s.hhpp..ht.......phss.....s....h..s.....pssh...ph..ssspllhShssssplah.......s.s..p......tcphphh.pss..uphh.....spht....phpss....pps....hsp.hhc...ths.ppspYuslhaDch+chYaRaup......tsps.tpt.h..p.pphslhlaDc-asllGEsp ............................h.hhlhll..huCspp......pppt...................................................tphpL...tp...cslthsls-phh..........h.htl..h..p...........psuccaLh.h.s..php..tplp..hhDlpst.clhcpl.hcc-..GPsul.s.thh..uhh.hshDuhalh.st...........plhhhs.ppGclhpphsh...pptt...........ph....thss...........psh..hh.tsp.t.h.ahs..................h..t.p.h.ppphh..hshhshps.....pph.ph.hsh...h.s.hhpp...........phts.....s....h...th.....phsh...sh..ptspllhua.spsplal.....hs...p......tpph.chh.shp..Sphh......sph......ph.ss..tp....hpthhp.....hhppspYtslhYDph+plaaRhsh.sh......tstp.h...........p.+ths.lhlhDp-aplluEp.......................................... 0 26 32 34 +13813 PF13971 Mei4 MEI4-Rec24; Meiosis-specific protein Mei4 Wood V, Coggill P, Eberhardt R pcc Pfam-B_78600 (release 24.0) Family This family of meiosis specific proteins is required for correct meiotic chromosome segregation and recombination [1]. It is required for meiotic DNA double-strand break (DSB) formation [4]. 21.70 21.70 23.20 23.50 20.70 19.70 hmmbuild -o /dev/null HMM SEED 375 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.04 0.70 -5.51 7 42 2011-04-27 10:11:47 2011-04-27 11:11:47 1 1 32 0 25 34 1 302.40 31 94.28 CHANGED Mssp..p.........hhpT........+lAlAlAII+p+PsshsuRpaoEhLtptlppp-psWKpcsctLctclhpl+QcL......L.sp.sss.h..t.t.h.tcl.sQpshpt.sstshhcphtssss.p.ps........................p.sphsh-s.hst+hpFLpplhcL+shp......pph..phplh...........psuhpplLssLhphh+ss+....Lh.sphhppsltslsphhsphshpphhstpppp+lp-hhppLlphlLpppplsphpsQchlsppLlsL.upss..h..lIphLLoplpshscsL.phh.pp...sh.p.tt.......phDssphpNhahhahllEpllppp..t..h.....p....pph.tph-cplh.hu-tFPLFuhhL......................WRlGslLsusc ......................................hh..........+lAlAhAII+o+PsshssRpasp.Lt.phlpt...p-.ph+pphcsLchcshph+pcl..........L.s+h.tst.h.......t.t........t.h.spp.hps.sshs.hc.-ssss.s.p.p................................chspsshppsLos+hpFLQ+lLpL+shp......csst.p.hpls.hp.p..ppl..s-SV.pLLDuLlsFa......+pPK....hshsphhpcAlpsLsphlsshshspthhKps.K+LEEhpcoLlphILpsppINphpstp.hsp.Lhhl.......h..llphlL..lpth.p.L.................................hp....hh.hhptl..t..........................p......ptash.th.h......................hhlt.h................................................. 0 6 10 14 +13814 PF13972 TetR Bacterial transcriptional repressor Coggill P pcc JCSG:Target_399142 Family This family of bacterial transcriptional repressors is characterised by the short approximately 50 amino acid stretch of residues constituting the helix-turn-helix DNA binding motif, around the YRFhY motif. The target proteins that are repressed are involved in the transcriptional control of multi-drug efflux pumps, pathways for the biosynthesis of antibiotics, response to osmotic stress and toxic chemicals, control of catabolic pathways, differentiation processes, and pathogenicity. The regulatory network in which TetR itself is involved is in being released in the presence of tetracycline, binding to the target operator, and repressing tetA transcription [1]. 25.00 25.00 25.60 25.40 24.70 24.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.64 0.71 -4.76 102 573 2011-04-27 11:46:47 2011-04-27 12:46:47 1 2 496 2 150 441 87 146.10 26 66.69 CHANGED ppYppplpphhpssps....tt..........sl....-shhhYlcslFphhWpYRFhap-LsslLs+stpLppcapp.htpphhpphttlhpthtptGllp.hsspc.lp..sLspslhllsoaWlsapp..stpsptt....hsptsltpGl..hpllsLhtPalsspuppthp ..................................pYpppl.phLp.sps..pp..........sl--hhtYLpslhshhWcYRFlapD.LscLLsRst.cLppcapp.htpch.hpph.hslhpthhst..ullp..hs............psp.lp..sLspshhllsohWlsapp..shp..pst....lscptlppGl....hphlslltPalpspuRpth............................. 0 36 71 113 +13815 PF13973 DUF4222 Domain of unknown function (DUF4222) Bateman A agb Jackhmmer:B2PP87 Domain This short protein is likely to be of phage origin. For example it is found in the Swiss:B6DZ51 Enterobacteria phage YYZ-2008. It is largely found in enteric bacteria. The molecular function of this protein is unknown. 21.40 21.40 21.90 25.00 20.90 20.60 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.90 0.72 -8.36 0.72 -4.56 30 595 2011-04-27 13:50:12 2011-04-27 14:50:12 1 1 284 0 13 250 0 52.30 37 64.10 CHANGED shhphschY+D.ppGhhVplhussps..RVhahR.-GYpasCshPltpFpp-FphVp .........hhphsphY+D.paGssVpIpplpcp..+VhYhR.-GY.a.sChhPlhpFpc-Fp.lp.... 0 0 2 6 +13816 PF13974 YebO YebO-like protein Bateman A agb Jackhmmer:B2PQU2 Domain This short protein is uncharacterized. It seems likely to be of phage origin as it is found in Swiss:Q9MCU2 and Swiss:Q9MCS4. The protein is also found in a variety of enteric bacteria. 27.00 27.00 30.70 30.60 25.60 25.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.82 0.72 -4.20 21 513 2011-04-27 13:58:43 2011-04-27 14:58:43 1 1 509 0 34 129 0 79.70 73 86.41 CHANGED lslsllsllluLllWFFlNRASVRANEQIcLLcpll-QQKpQss.....LL.................p+Lspssp...tpst..s......custpssp.-atphIsER ...lVSlVVLLlGLILWFFlNRASSRsNEQIELLEALLDQQKRQNA.....LL.................RRLCEANE.....PEKscccss.......-S..pcusEDEDIIRLVAER.................. 0 1 5 19 +13817 PF13975 gag-asp_proteas gag-polyprotein putative aspartyl protease Coggill P pcc Jackhmmer:B8H4J3 Domain This family of putative aspartyl proteases is found pre-dominantly in retroviral proteins. 32.00 32.00 32.00 32.00 31.90 31.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.83 0.72 -4.15 40 832 2012-10-02 15:32:34 2011-04-27 16:57:49 1 76 77 0 189 1318 75 69.60 42 6.43 CHANGED tshtptsphplpsplsu.hphp.sllDoGAots..hlspphAp+L.....uhshpths........hplp...hAsupstpspshhpshh .......................h..hp-sGssTIoVhIGG..p.....hlc.pALLDLGASVN..LlPhslaKpL.....GLstLKsT................sloLp....LADpSl+hscGll-s..l.............................. 0 53 73 124 +13818 PF13976 gag_pre-integrs GAG-pre-integrase domain Coggill P pcc Jackhmmer:Q8S892 Domain This domain is found associated with retroviral insertion elements and lies just upstream of the integrase region on the polyproteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.52 0.72 -4.33 1103 1961 2011-04-27 16:15:09 2011-04-27 17:15:09 1 257 126 0 637 1866 6 69.40 23 6.66 CHANGED sLYhl..sh..........p....p.h...s.s...........s...........s...s...........s.........s...........sp........p...s...........ss...shlWHpRLG.Hhuhp...tlpcLs..p..........th.....l...sth...s...h........p.....tp.......hCcsChhuKp ............................................................a.h................................t..................................h...........s...........h.................sp.........t....s..........................ss...shlWHpRLG.Hhuhp...tlpcLhp............pth......l......tsh...s...h............p.........tt.......hCpsC.huKp................................ 0 184 260 364 +13819 PF13977 TetR_C_6 Bacterial transcriptional repressor Coggill P pcc Jackhmmer:P17446 Domain This family of bacterial transcriptional repressors is characterised by the short approximately 50 amino acid stretch of residues constituting the helix-turn-helix DNA binding motif, around the YRFhY motif. The target proteins that are repressed are involved in the transcriptional control of multi-drug efflux pumps, pathways for the biosynthesis of antibiotics, response to osmotic stress and toxic chemicals, control of catabolic pathways, differentiation processes, and pathogenicity [1]. Another target protein is BetI, an osmoprotectant which controls the choline-glycine betaine pathway in E.coli [2]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.28 0.71 -4.06 258 2956 2012-10-03 00:15:22 2011-04-27 17:17:06 1 10 1376 20 826 2207 181 109.00 18 54.34 CHANGED P....ppRLpullcsshs..sthst..t...thps.WlsFaupu.hpss...phtclpphh.tpchcspLtthlp.....phhs.........stcscthAptlsAllDGL...alc.ssls.........tshs..hpput...plspphlsthls .....................................................pthht...h...h...s....p.hs.......t..........shph...hh....phhut..u..h.+.pP....tl.t.pl....hp.tt....pchps.sL.s.phh.c.........pths............t...ps......p.....thu.ps....Ls....Ahl.-.Gl...hlc.hshs.......pshs....tpphh...phhpphlt....t..................................... 0 232 548 713 +13820 PF13978 DUF4223 Protein of unknown function (DUF4223) Bateman A agb Jackhmmer:B2PGT1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. These proteins are likely to be lipoproteins (attachment site currently included in alignment). 27.00 27.00 43.80 43.80 25.90 21.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.85 0.72 -4.24 6 435 2011-04-27 16:27:16 2011-04-27 17:27:16 1 1 432 0 13 47 2 55.40 89 99.28 CHANGED MpKhlKlAlluuVLo...oLTACTGHlpN+cKNCSYDYLLHPAISISKIIGGCGP....sApQ ........MNKFIKVALVGAVLA...TLTACTGHIENRDKNCSYDYLLHPAISISKIIGGCGP....TAQ.............. 0 1 3 8 +13821 PF13979 SopA_C SopA-like catalytic domain Bateman A agb Jackhmmer:B2PGH8 Domain This domain is found in the E. coli Type III secretion effector proteins SopA and NleL [1,2]. These proteins have been shown to act as E3 ubiquitin ligase enzymes. This domain contains the active site cysteine residue. 25.00 25.00 25.70 32.90 19.10 24.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -10.90 0.71 -4.65 9 234 2011-04-28 08:31:54 2011-04-28 09:31:54 1 9 189 13 6 132 0 163.80 49 24.43 CHANGED .hcscLcshFhpAhppspsplKhl--tpp.pcLtpla.ph...psh.ptapLpscHappIlshYsLsshocpcKAEILFsLusVFs+YSSSslFGsEhDSPphLRtYApALhpKAacLDPplh.opppFs-WpsRLlGpssuF....TC..TulLtspMhcHAcppFsslLpplhPsAWR .............................................................t.p+GcLpptFluAhspK+SsVKhlsDss..o+LsolF............ss.hhsptpLs....stHYppILoua.....cLsDAo..pKQAEhLFCLSssFA+YSSSslFGTEaDSP.sLRtYApALhpKAacLsPulFsS......s...ppFs-apDRhp.Ghps.sF....TC..TSVlsDsM....pRH..A+c.hFPpVLSphhPlAWt................... 0 1 1 4 +13822 PF13980 UPF0370 Uncharacterised protein family (UPF0370) Bateman A agb Jackhmmer:B2PE90 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved DWP sequence motif. 27.00 27.00 27.90 63.70 24.00 23.50 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.21 0.72 -4.15 13 533 2011-04-28 08:38:11 2011-04-28 09:38:11 1 1 532 0 39 98 0 65.30 86 99.09 CHANGED MpWL.ADYWWl.ILLlLlGlIlNuIK-LpRlDtK+FLcNKP-LPPHRDNNcpWDDED.DWPp..cKK+ .........MDWL.AKYWWILVlVFLVGVLLNVIKDLKRVDHKKFLANKPELPPHRDFNDKWDD.D.D.DWPK..KD..QPKK. 0 1 8 23 +13823 PF13981 SopA SopA-like central domain Bateman A agb Jackhmmer:B2PGH8 Domain This domain is found in the E. coli Type III secretion effector proteins SopA and NleL [1,2]. These proteins have been shown to act as E3 ubiquitin ligase enzymes. 27.00 27.00 28.20 29.30 26.90 26.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.64 0.71 -4.14 14 461 2011-04-28 08:38:40 2011-04-28 09:38:40 1 13 337 13 6 257 0 135.00 39 23.85 CHANGED splKschsccLlpsLspsplslsp..sh.s..LhsIhucssYlpsspIp.palpplp-sahpshsp..............hshhcppspllpshlshFs......+pP-hMlst..NusFIQhlhtshtpt..ssshpppAtcLYppYLphsplp.hhpps- .............................................h...KshhshcLVhQL..phLshssh.phlEs..h+chhSHsPYIpsslI+.SaI+ssc-shF-shhppa...........Rhscssassp.h.tFlspFs......hNptLhspp..NshFIQlIspshcus..sDth+ppAhtLYcpalppscVSPhhhp................. 0 1 2 4 +13824 PF13982 YbfN YbfN-like lipoprotein Bateman A agb Jackhmmer:B2PG20 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. Members of this family are lipoproteins. 27.00 27.00 35.70 35.60 22.20 19.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.92 0.72 -3.80 7 463 2011-04-28 08:44:07 2011-04-28 09:44:07 1 1 455 0 19 107 1 88.00 88 81.76 CHANGED QssAP.EDu+.LKpAYSACINsA-GSP-KltuCQuVLsVLKp-KtHQtFAppEoVRVLDYQpCIpAspTGNsQshsApCsKlWQEIRuNN .............QSTAPQEDSR.LKEAYSACINTAQGSPEKIEACQSVLNVLKKEKQHQQFA-QESVRVLDYQQClpATQTGNDQAVKADCDKVWQEIRSNN................ 0 1 3 9 +13825 PF13983 YsaB YsaB-like lipoprotein Bateman A agb Jackhmmer:B2PHF9 Family This family of proteins is functionally uncharacterised. These proteins are related to E.coli YsaB Swiss:Q0TBP2. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. These proteins are lipoproteins. 27.00 27.00 27.00 32.00 22.20 22.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.67 0.72 -3.82 9 447 2011-04-28 08:46:45 2011-04-28 09:46:45 1 1 445 0 15 112 0 76.10 88 78.93 CHANGED PsQKAQ+sKVSPpRoLsMEpLCK-QAA+RYNTssQKIDVTGFEQFQGSYEMRGpThRKEuFVCSFDA-GQFLHLSMR .....PVQKAQRVKVDPLRSLNMEALCKDQAAKRYNTGEQKIDVTAFEQFQGSYEMRGYTFRKEQFVCSFDADGHFLHLSMR 0 1 1 8 +13826 PF13984 MsyB MsyB protein Bateman A agb Jackhmmer:B2PRZ7 Family The MsyB protein has been found to be able to restore protein export defects caused by a temperature-sensitive secY or secA mutation [1]. However, its exact molecular function is still unknown, but it may play a role in protein export. Proteins in this family are approximately 120 amino acids in length. This family of proteins is found in bacteria. 27.00 27.00 68.40 68.30 25.00 18.40 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.48 0.71 -4.08 7 458 2011-04-28 08:52:19 2011-04-28 09:52:19 1 1 455 0 27 99 2 121.30 91 97.87 CHANGED MYsTL-EAIDAAREEFLAsp.t.--D-ssV.sQFNLQKYVMQDGDIMWQAEFFtDEs-pGEClslhSGtAAQAIFDsDYDElElRpEW.sENTLHEWD-GEFQL-PPLDoEEGcAAAsEWD-c .MYATLEEAIDAAREEFLADNPG.I.DAE-.ANV.QQFNuQKYVLQDGDIMWQVEFFADEGE.EGECLPMLSGEAAQSVFDGDYDEIEIRQEWQEENTLHEWDEGEFQLEPPLDTEEGRsAADEWDER. 0 1 3 15 +13827 PF13985 YbgS YbgS-like protein Bateman A agb Jackhmmer:B2PG89 Family This family of proteins is functionally uncharacterised. The family includes the YbgS protein from E. coli Swiss:P0AAV6. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. Some members of this family are annotated as homeobox protein, but this annotation cannot be verified. 27.00 27.00 28.90 28.60 24.50 22.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.71 0.71 -4.22 9 467 2011-04-28 08:58:10 2011-04-28 09:58:10 1 1 463 0 22 92 0 123.50 85 98.29 CHANGED MsKLAoLFLTAshoLASGAALAADosuposNGpANuAADAGtVAPsAKpNlAPNNVDNopINTsss.........Go.hpsss....ohspcsMopDElHKNoMCKDG+CPDhNKKVpsttu.ss-ssTKTDGTTQ ....MTKLATLFLTATLSLASGAALAA.DSG..AQ...oNNGQANAAADAGQVAPDAREN..VAPNNVDNNGVNTGSG....................GTM.LHsDG.......SSMNNDGMTKDEEHKNTMCKDGRCPDINKKVQTGDGINNDVDTKTDGTTQ................................................... 0 1 2 12 +13828 PF13986 DUF4224 Domain of unknown function (DUF4224) Bateman A agb Jackhmmer:B2PT37 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria and viruses, and is approximately 50 amino acids in length. The protein is likely to be of phage origin and is found as protein Gp02 Swiss:C8CLF5 in the Xylella phage Xfas53 [1]. 21.30 21.30 21.30 22.20 21.20 19.40 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.11 0.72 -4.41 38 290 2011-04-28 09:01:39 2011-04-28 10:01:39 1 2 222 0 30 167 2 46.30 37 53.99 CHANGED thLTccElt-LTGh+.......ptupQtchLcpp.GI.ahhpscG+PlVsRpalp ....hLTcsEltpLTG.hc................hto+QpchLpcp.GlsFhhst.s.G...cPlVsRphh.................... 0 4 16 28 +13829 PF13987 YedD YedD-like protein Bateman A agb Jackhmmer:B2PRC7 Family This family of proteins related to the YedD protein is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. These proteins are lipoproteins. 27.00 27.00 33.90 63.30 21.60 18.30 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.89 0.72 -10.41 0.72 -4.49 10 460 2011-04-28 09:05:55 2011-04-28 10:05:55 1 1 457 0 23 121 2 110.60 86 80.58 CHANGED VVKTPAPAtLsGYWQopGPQSuLVSP-AIASLlVTp-GDTLDCRQWQRVIAhPGKLTptsD-hhNVTpKh-VYsLEh-GssLEYDGMTLpRVDRPTsECtphLsKt....PLssP ..VVKTPAPDWLsGYWQTKGPQRALVSPEAIGSLIVTKEGDTLDCRQWQRVIAVPGKLTLMSDDLTNVTVKRELYEVERDGNTIEYDGMTMERVDRPTAECAAALDKAPLPT.P........ 0 1 2 13 +13830 PF13988 DUF4225 Protein of unknown function (DUF4225) Bateman A agb Jackhmmer:B2PH01 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 182 and 282 amino acids in length. 24.10 24.10 24.40 24.60 23.60 24.00 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.99 0.71 -4.79 24 185 2011-04-28 09:36:09 2011-04-28 10:36:09 1 5 122 0 37 140 1 160.80 32 59.07 CHANGED Ippl+pEppsLccQsphlpppphp.hh.hph............cppphhthhltulGlVuGGsQlhuGhulh...hso......tsshsthhGusLlscGsNslhEsshtlh...tcp..........shpG.l+cuYcpsAp.hlGhscpsushsYssVDluhShYGhhphh........................lp......P..ssaRLa+.YlssDah+thpp.....MopstL ...............................p.lp.E.tphppp.phhpttphchalpsch............EcpplIsYVhtulslVs...uGhQlVuGsGhl...hss......tssluhhsGshLlhcGANslhEuhthLh.....hsc..........psoG.lccsYtssAc.hhGhsp.psu.huYpslDluhShYGhhphh................................h+......P..sshRLa+.YlssDahpphpphsp............................ 0 1 11 18 +13831 PF13989 YejG YejG-like protein Bateman A agb Jackhmmer:B2PI82 Family The YejG protein family is a group of functionally uncharacterised proteins related to Swiss:P0AD21. This family of proteins is found in bacteria. Proteins in this family are approximately 110 amino acids in length. 27.00 27.00 58.50 58.40 20.30 19.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -10.27 0.72 -3.98 16 534 2011-04-28 09:51:31 2011-04-28 10:51:31 1 1 532 0 39 115 1 106.00 82 93.71 CHANGED lQLSVVHRLPQSYRWloGasGsKVEPlP..sshss-ssLlGLKLLSHDGssAWplMcpLppoLs-IQlssullEWEGEPCLFl++pDESAshCRLKNhGVAIAEshsu .......LQLSIVHRLPQNYRWSAGFAGSKVEPIPQ.NG...s.s............s.DNSLVALKLLSPDGDsAWSVMaKLSQALSDIEVPCSVLECEGEPCLFVNRQDEFAATCRLKNFGVAIAEPFSN.. 0 1 9 24 +13832 PF13990 YjcZ YjcZ-like protein Bateman A agb Jackhmmer:B2PJH4 Family This family of proteins is functionally uncharacterised. The family includes the YjcZ protein from E. coli Swiss:P39267. This family of proteins is found in enteric bacteria. Proteins in this family are approximately 300 amino acids in length. There are two conserved sequence motifs: FGD and MPR. 27.00 27.00 28.10 27.60 22.70 22.30 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.60 0.70 -5.26 10 540 2011-04-28 09:57:24 2011-04-28 10:57:24 1 1 319 0 13 209 1 258.80 62 90.92 CHANGED hVDLApG.-ss+sththtppp.pFRpRLppplhupophRpashsushupsLthsLpLlEcLsu.LsPGHLALTplsc+LspLppssustAphossltpQLtsLs-paspRssahEcsLsptsLhspAtcHsEQIFoRWRAGcYsuWSLsGRCYlALEELRWGAFGDACR.LussshsshLlDsLRs....cAsptLApslpAuPsTRHaYHpWLs.ssstsuss-asDhLuWLGDWCssD+HPVsWSVTQpWQsVuLGMPRLCSAcRLu-AMVEEIF ...............lVDhApG.Dst..p...Atpp..pFhcRLhptLhucsthpp.s.su.LstslphpLphlpcLso.LsssphAlTpls.........p+lu.L.p..stR.tph........s-h.pQlhsLh-.hpt+hpahEctLpphs.l.pAt.p.EQIFohWpAGt.YsuaS.sGRCalsLEELRWGAFGDshR..Gps.ttVs.LLs.LRh....KAhpplApp.suusTsRh..hhpWhu.pup......sss-atDhlsWLGcWsot-+pPVhWSsTQthpplsltMPRLCSApRLutuMV-EIF.................. 0 1 2 6 +13833 PF13991 BssS BssS protein family Bateman A agb Jackhmmer:B2PS04 Family The BssS protein family is a group of proteins that are involved in regulation of biofilm formation [1]. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 40.70 40.40 26.80 22.00 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.32 0.72 -4.25 16 542 2011-04-28 09:57:45 2011-04-28 10:57:45 1 1 527 0 44 132 1 71.10 83 86.86 CHANGED HPlVGWDISTVDuYDAhMlRLHYLSotpQs.EsApVscThWLTTDlA+QLIsILpAGIsKIEou-htssshp+ .HPLVGWDISTVDSYDALMLRLHYQTPN+sE.pE.GTEVGQTLWLTTDVARQFISILEAGIAKIESGDa.sNEYR.R...... 0 2 10 27 +13834 PF13992 YecR YecR-like lipoprotein Bateman A agb Jackhmmer:B2PR21 Family The YecR-like family of lipoproteins includes the YecR protein from E. coli Swiss:P76308. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 110 amino acids in length. 27.00 27.00 27.20 43.20 26.70 24.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.72 0.72 -3.81 19 390 2011-04-28 10:03:24 2011-04-28 11:03:24 1 1 385 0 20 105 2 73.10 73 71.33 CHANGED ssuSpssGlVcLuYp.....uhhpss.psDthtutthAspcCppWGYssAcsFGpss....psCshhsGph.....ChspplTlpYQC ..S.EssssoGIVRLsY-....QAaLQ+A..+TDcYVS+GlA-RACQQ.GYocAlPFGQPV....GsCSLaAG.SL.....CLNTcFTLSYQC... 0 2 2 12 +13835 PF13993 YccJ YccJ-like protein Bateman A agb Jackhmmer:B2PKP7 Family The YccJ-like family of proteins includes the E. coli YccJ protein [1] Swiss:P0AB14 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 75.10 75.00 25.20 24.40 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.24 0.72 -3.90 6 468 2011-04-28 10:07:47 2011-04-28 11:07:47 1 1 467 0 26 65 2 68.90 92 92.19 CHANGED KsHHVuEWAsVRcTSsEIAEAIFElAphDEpLAEcIWE.EGSDEVL.hAFuKTscDpLaWG-cTlERKNV ...KAHHVGEWASLRNTSPEIAEAIFEVAGYDEKMAEKIWE.EGSDEVLVKAFAKTDKDSLFWGEQTIERKNV.... 0 1 3 15 +13836 PF13994 PgaD PgaD-like protein Bateman A agb Jackhmmer:B2PKR8 Family This family includes the PgaD protein from E. coli Swiss:P69432. The homopolymer poly-beta-1,6-N-acetyl-D-glucosamine (beta-1,6-GlcNAc; PGA) serves as an adhesin for the maintenance of biofilm structural stability in eubacteria. The pgaABCD operon is required for its synthesis and export. It has been shown that PgaD is essential for this process [2]. 27.00 27.00 33.80 33.70 25.60 24.10 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.70 0.71 -4.69 34 382 2011-04-28 10:23:42 2011-04-28 11:23:42 1 4 353 0 38 146 4 123.60 45 83.02 CHANGED LIIspp+hhs..p+hhshhlThhhWshalaLhhsh...hlhhhlsht.hht.h..htshtsthtslthahhlhlhsuslLlhWApYNphRapscp++ph.....ls.c-lAppasls..pptlpphpps+lhs.la.aD-cGclhp .......LIIspppp.s..phhlDhlsTslhWshFhhhlhhhh..llht.............ahapSputopLpaYhLLAlANAVVLIlWAhYN+L...RFQcpp++Ashp.....hospEY.....AcSLAlP...-ELhQQLQKS++hoVHasspGpIp.h.......... 0 8 20 28 +13837 PF13995 YebF YebF-like protein Bateman A agb Jackhmmer:B2PQW6 Family The YebF-like protein family appears to be a group of colicin immunity proteins. As well as YebF the family includes cmi, the colicin M immunity protein [1]. This domain family is found in bacteria, and is approximately 80 amino acids in length. The alignment contains two conserved cysteine residues that form a disulphide bond in the solved structure [3]. 27.00 27.00 28.60 28.90 25.90 25.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -9.81 0.72 -3.97 23 531 2011-04-28 10:25:30 2011-04-28 11:25:30 1 2 485 4 22 151 1 88.40 73 73.86 CHANGED RssKhsuCssLspsQlAApVKcDFLQNRIsRWssD+.KtLGpscP.VsWlssp-I...sGc-ssapVPLTVRGs+sD+pYpVhlDCpsGTIoYs ....p.SVpFPpCEGLDAAGIAASVKRDYQQNRlsRWADDQ...KlVGQADP.VAWVslQDI...pGK.DDKWoVPLTVRGKSADIHYQVsVDCKAGhAEYp..... 0 1 6 14 +13838 PF13996 YobH YobH-like protein Bateman A agb Jackhmmer:B2PQU4 Family The YobH-like protein family includes the YobH protein from E. coli Swiss:Q2MB16 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There are two conserved sequence motifs: GYG and GLGL. 27.00 27.00 31.60 31.40 20.10 17.60 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.22 0.72 -4.05 15 454 2011-04-28 10:29:34 2011-04-28 11:29:34 1 1 451 0 30 97 1 69.00 76 88.65 CHANGED +hlIRslllLAllWlulLhSGYGVLlGSpcNsAGLGLQCpYLTARGhsTAQYlHoDSGlIGlo-CPLLRK ..RhIIRslhLlALVWIGLLLSGYGVLIGSKcNAAGLGLQCpYLTARGTSTsQYLHTcSGhlGI.oDCPLLRK.. 0 1 6 18 +13839 PF13997 YqjK YqjK-like protein Bateman A agb Jackhmmer:B2PL82 Family The YqjK-like protein family includes the E. coli YqjK protein Swiss:Q47710 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a single completely conserved residue R that may be functionally important. 27.00 27.00 28.00 27.50 25.50 25.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.27 0.72 -3.80 39 560 2011-04-28 10:34:02 2011-04-28 11:34:02 1 2 555 0 59 168 6 72.30 73 73.50 CHANGED LLppIpQQRh-LusptppWhchTsshDRuWpplh....phRpahh.luuulhAlhulR+.Pp+lhRau+R.uhusWushR ....LLSQIQQQRLDLSAS..R..R-WLEsTGAYDRtWNhLL....SLRSWAL.VGSSVMAIWTIRH.PNMLVRWARR.GFGlWSAWR....... 0 6 25 41 +13840 PF13998 MgrB MgrB protein Bateman A agb Jackhmmer:B2PQU3 Family The MgrB protein is a short lipoprotein. The mgrB gene has a mg2+ responsive promoter [1]. Deletion of mgrB results in a potent increase in PhoP-regulated transcription [3]. The PhoQ/PhoP signaling system responds to low magnesium and the presence of certain cationic antimicrobial peptides. Over-expression of mgrB decreased transcription at both high and low concentrations of magnesium. Localization and bacterial two-hybrid studies suggest that MgrB resides in the inner-membrane and interacts directly with PhoQ. This domain family is found in bacteria, and is approximately 40 amino acids in length. There are two conserved sequence motifs: CDQ and GIC. 27.00 27.00 33.40 49.50 19.30 18.60 hmmbuild -o /dev/null HMM SEED 29 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.88 0.72 -7.32 0.72 -4.17 14 404 2011-04-28 10:41:38 2011-04-28 11:41:38 1 1 401 0 18 57 0 29.00 90 61.90 CHANGED LYLlAlsshCDQG.tpFh.GICsITcalPa LWAQVFNMMCDQD.VQFFSGICAINQFIPW 0 1 2 10 +13841 PF13999 MarB MarB protein Bateman A agb Jackhmmer:B2PLW6 Family The MarB protein is found in the multiple antibiotic resistance (mar) locus in Escherichia coli. The MarB protein is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved GSDKSD sequence motif. 27.00 27.00 28.10 28.00 21.50 20.40 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.06 0.72 -4.39 10 439 2011-04-28 10:46:09 2011-04-28 11:46:09 1 1 435 0 13 87 1 65.30 69 90.91 CHANGED uuAslsLLlLsSGQuhAEQTppPsspsscDslllPsupcQSPFDLNHMuAGSDKSDELGVPYYNp+ ..SAIAuALILFSAQGVAEQTsQPVVTSCusVVVVPsSQEQPPFDLNHMGTGSDKSDALGVPYYNQp... 0 1 1 8 +13842 PF14000 Packaging_FI DNA packaging protein FI Bateman A agb Jackhmmer:B2PPD2 Family This family includes the lambda phage DNA-packaging protein FI. Proteins in this family are typically between 124 and 140 amino acids in length. There is a conserved EEE sequence motif. 27.00 27.00 29.10 28.90 22.30 21.20 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.39 0.71 -3.93 13 521 2011-04-28 10:50:25 2011-04-28 11:50:25 1 3 289 0 3 215 0 120.80 49 94.02 CHANGED TKEc.lpRLcELAshLGREsDhSGSuA-lAQRVAEhEEEls.............suss-ssssccss.supp-psssspsp.....tsps-hVpVcsLtoLHhsAlcsssscsV.phV.sGpslhVsussAsshlssGLA .......TK-E.ltRLcpLut.LsR-ssloGottElA.RVAEhEEELs.............suupDs.susE...huccs...pssosps-pl.......psousLssVhsLssLHssulcsppsEPV.thVhsGpshhVsuulAspMs-+GhA................................... 0 0 0 1 +13843 PF14001 YdfZ YdfZ protein Bateman A agb Jackhmmer:B2PLX9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There is a conserved YDRNRN sequence motif. The E. coli protein has been shown to bind selenium [1]. 27.00 27.00 27.40 27.40 21.90 20.20 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.06 0.72 -3.70 11 477 2011-04-28 10:55:07 2011-04-28 11:55:07 1 2 463 0 21 77 1 63.40 78 94.39 CHANGED +sYDRNRNAIosGsRVMluuoGpsGVIKAIHu-GhospQlRRuKsVplcGs-t+asPl-LlRLG .....pTYDRNRNAITTGSRVMVSGTGHTGhIhuI-oEGLTAEQIRR...GKTVlVEGCEEKhAPlDLIRLG........... 0 2 6 12 +13844 PF14002 YniB YniB-like protein Bateman A agb Jackhmmer:B2PNH1 Family The YniB-like protein family includes the E. coli YniB protein Swiss:P76208 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 180 amino acids in length. This family of proteins are integral membrane proteins. 27.00 27.00 30.40 30.00 25.10 25.10 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.86 0.71 -4.79 20 546 2011-04-28 10:57:37 2011-04-28 11:57:37 1 1 541 0 42 162 4 162.50 80 91.65 CHANGED lhGWllFIPALlSTlISllphhat+u-ctpGINAVh.DFh+lMl-MlRFNTPFLNhFWpNSPlPsasth..uuuNlhFalIYlLIFVGLALpASGuRMuRQl+aIREuIEDQLILEpAKGs-GhTRppLEp+IslPR.HTIFlQhFsLYlLPlllullGYhhlKLLGh ..IlGWVIFIPALlSTLISLLKFM.s+pEpQEG......INAVMLDFTHVMIDMMpsNTPFLNlFWYNSPTPNFs.....GGlNlMFWlIFILIFVGLALQDSGARMSRQARFLREGVEDQLILEKAKGpEGLTREQIESRIVVPH.HTIFLQFFoLYILPVIsIssGYhFFSLLGF................................ 0 2 10 25 +13845 PF14003 YlbE YlbE-like protein Bateman A agb Jackhmmer:O34958 Family The YlbE-like protein family includes the B. subtilis protein YlbE Swiss:O34958 which is functionally uncharacterised. This family of cytosolic proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There is a conserved WYR sequence motif. 27.00 27.00 43.10 42.90 22.90 18.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -9.19 0.72 -3.89 14 143 2011-04-28 13:02:23 2011-04-28 14:02:23 1 1 143 0 20 63 0 64.70 64 81.46 CHANGED p.-L+pFIRppPhWYRpLoRpPpplsshEh-AhpaYcKTlPc+V-+hssslpMApMMhpMhpAM+ ..D.EDLsRYIREQPaWYRKLoRNPEEhEAFELAAMpHaKKTIPDKVEKFQNQLulASlMI-MFQhMK... 0 3 12 14 +13846 PF14004 DUF4227 Protein of unknown function (DUF4227) Bateman A agb Jackhmmer:C0H451 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 33.30 33.10 26.50 26.00 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.28 0.72 -4.14 18 158 2011-04-28 13:05:32 2011-04-28 14:05:32 1 1 157 0 29 79 0 70.90 58 90.84 CHANGED +plhcslKVFlLFTGsTlLFYYullWlscEYpsYHRYDEPcGsAVKVhphpssp.ph..sa.h-RLhhFYhsGE .........KhsaDuIKVFLLFTuCTILFYaAILWlN-EYEsYHRY-KPKtcsVchV..ous..c....EPsKD....ua.lsRhhFFYcNGE. 0 7 19 21 +13847 PF14005 YpjP YpjP-like protein Bateman A agb Jackhmmer:P54172 Family The YpjP-like protein family includes the B. subtilis YpjP protein Swiss:P54172 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 200 amino acids in length. 27.00 27.00 56.80 56.00 25.50 18.40 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.89 0.71 -4.40 15 144 2011-04-28 13:07:43 2011-04-28 14:07:43 1 1 143 0 23 80 0 136.90 68 68.73 CHANGED ssc.spppahct.lhppA.cQSh.hKFGsKIuPhIE-Ea+chILPKlEcsIs-hlsphs---.LppLslS-sPuuGpGEKIFHlYsp+TG-DllRFHVRRD+PPpcGYaFNFHYHot-DsFpsHHELGsIYWcKNTPPpW ........s.+LTsDTFIsY.AMQEAEKQSM.pKFGoKIGPVIEDEFKDVILPKIEEAIAELAs-VPE-.S.LQSLA.ISpKP.AGGNsEKIFHVYDTKTGsDLLRFHVRRDHPPQDGYYFNFHYHcaDDGYouHHELGsIYWNpNsPPKW... 0 6 15 17 +13848 PF14006 YqzL YqzL-like protein Bateman A agb Jackhmmer:C0H452 Family The YqzL-like protein family includes the B. subtilis YqzL protein Swiss:C0H452 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 27.00 27.80 26.30 26.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.54 0.72 -3.76 20 174 2011-04-28 13:10:03 2011-04-28 14:10:03 1 1 174 0 37 81 0 44.00 51 92.99 CHANGED DFoWKlFupTGsIDoYLLaKE....lEccsc.p.sppppE...pchs...shp ...DFTWKhFSpTGSI-TYLLhKE....hE+-sp-ch-pcE-E..hsclD.sh.s............. 0 18 29 31 +13849 PF14007 YtpI YtpI-like protein Bateman A agb Jackhmmer:O34922 Family The YtpI-like protein family includes the B. subtilis YtpI protein Swiss:O34922 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 101 amino acids in length. 27.00 27.00 36.20 35.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.66 0.72 -4.06 24 163 2011-04-28 13:12:27 2011-04-28 14:12:27 1 1 162 0 30 111 0 80.50 53 87.64 CHANGED hlhlIlhShshYlYaKl+t.hRopcshc+phasuKusluLGlFlhhFulNQhhl...hpoTlshlluhlFlllGhushatGa+sa+HYhPhht ...........................................hhlhhoh.hhhaaph+..h+st...shEK........tahSuKSuMALGoFVLFFGINQaFL...phSTsclI.VGllFlLhGuuSlasGaRpYKHahPLA.l...... 0 9 21 24 +13850 PF14008 Metallophos_C Iron/zinc purple acid phosphatase-like protein C Coggill P pcc Jackhmmer:Q9LMX2 Domain This domain is found at the C-terminus of Purple acid phosphatase proteins. 23.00 23.00 23.00 23.00 22.70 22.80 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -8.93 0.72 -3.89 130 888 2011-04-28 16:05:58 2011-04-28 17:05:58 1 20 198 20 604 904 23 64.20 29 12.89 CHANGED pAPlaIlsGsuGs.....th...ss.....hs..p....s....p...ssao.s.h....cp...s.caGauplphhNpTcLpacalc.s..p...s.........G..p..V..hDph .....................................uPlalshGsu...Gs...........hh...sp...................hh.p...........s....p...PsaS.sa.........Rp....s.saGaupLplh.NpT....+hhapahc.s.p...D.........G..p..l..hDp................. 0 189 377 518 +13851 PF14009 DUF4228 Domain of unknown function (DUF4228) Coggill P pcc Jackhmmer:Q8GY70 Family This domain is found in plants. The function is not known. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.22 0.71 -4.14 56 732 2011-04-28 16:07:15 2011-04-28 17:07:15 1 4 27 0 458 690 0 161.80 16 86.36 CHANGED MGNslsssh.......stssssss+llh..sGplcch..pt....sls..AuElhtphPsHh................sss....tp...s....s.....pclt.........sLssDccLphG..clYallPhpp.htst.hstps....hs..plt....ht..sspstpp........htttpsp.h...............t........ppssss.sp.h.......hh...hhs.cppl...pchh.............p..pt.........sspt..hsp.h......tptt............hp...ppp....p.tspspsW+PpL-oIsE ......................................................MGst.t......................t.tss+lht.........s...........G.plpch......th.............s..hp..suclh....t.t........P.s.p.h.l.............................stsps..ht...h..........s....................tph................hl.t..scppLp.G..plYall.Phpp.h...p.....hpttp.............hs..tht.........ht......sttthtt.....................ttt.tp.h..............................................................tttst...ht.......................t.h....tphh........................................t.....................t...................................................................................h................................................................................................................... 0 45 266 375 +13852 PF14010 PEPcase_2 Phosphoenolpyruvate carboxylase Coggill P pcc manual Family This family of phosphoenolpyruvate carboxylases is based on seqeunces not picked up by the model for PEPcase, PF00311. Most of the family members are from Archaea. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 491 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -12.85 0.70 -6.19 27 105 2012-10-10 15:06:27 2011-04-28 17:21:18 1 1 102 8 53 798 438 495.80 36 99.07 CHANGED IPRsMuTQHPDNs.hP.a.s.psshlsu-DElpEAahsao.....thGs-EhMWDaEGK-sDpaVl+KLlopY.caFpcphLGcDlaLThRlPNPplEpsEtKlLsEshtsIspsaDhuchh.....t.sssPIFEVILPMTsssccl.plhchYcchlt.pt...h.s....htlcEhlG-h.PcpIcVIPLlEDtsuhlpscpIlpcYh....ctt..c.pahRVFLARSDPAhNYGhluAsLhsKhALpclhclpc-hulplaPIlGsGSsPFRGphsPtslpps.lpEYsGlhTaTlQSAF+YDashccVhculcplpp.tphppst.lsp-c...hhplhpphoppYppplcplAshlNplAphlPpRRpRKLHlGL.FGYuRsh.ssl..............................sLPRAIsFsuuLYSlGlPPELlGhusL...sc...cch......-hlpcsa.tltcDLphAscahN.-........ssthhhstctlhtlp-th-hhtp.............pcHp.hsppllpshppt.....................hcshllchuplRtFLG .......................................................................................................................................IP+sMuTQHPDNsthP.a....s..psshIsup-ElpEAahuao.....tLGs-EhMWDaEGKcsDtaVlc+LhopahcaFppp.LG+DhFLThRlPNsphEcsp..pplhspshtslsps..hDhAchh......t.sttPlFEVILPMoposcplhp.l..t.chapchls..................hthc-..a.h.s.ch..sc.pIclIPLhEDhsuhLpsccIlccYh.........cph.......c..cYhR.......VFLARSDsAhsYGhluulLusKhALsclhchscchulpIaPIlGsGSsPFRG..s..L..o.P..cs..l.-ch....l....pE...Ys....G......lp.ThTlQ.....S....A...F......+...Y...D......a.....s.......h.....-.....c.....V.......p..p....A.....lpp....l.......ps......hph.....sps.....p.lscc-.....thhhclhp.phu.ppYppplcplssslspluphlPcRRcR+hHlGl.hGYSR.s.l.ssl..............................sLP..RAIsFTu......uhY.S.lG.l.P.PE.l.l....Ghu...pl..p..sc....cch......-hlhchYss..h..+cDlphAu+ahsh-............hsthhhspcshtcl...cEDlchhpp..hth........ppcH.sthsppllphhct.h....................hpphIhchAplR+hLG........................................................................................................................................................................................................................................................................... 0 17 31 43 +13853 PF14011 ESX-1_EspG EspG family Bateman A agb Jackhmmer:A5TZ16 Family This family of proteins contains the the EspG1, EspG2 and EspG3 proteins from M. tuberculosis. These proteins are involved in the ESAT-6 secretion system 1 (ESX-1) of Mycobacterium tuberculosis which is important for virulence and intercellular spread [2]. Proteins in this family are typically between 254 and 295 amino acids in length. 27.00 27.00 29.20 27.90 25.80 25.70 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.35 0.70 -5.12 66 505 2011-04-29 10:18:57 2011-04-29 11:18:57 1 2 116 0 134 371 0 245.10 22 90.72 CHANGED Lossp......h.hLh-.thuhsp.hP................hsLsls..s.hss..tscRsshpppshs......pLp..ttGlhsst..plcsplsshlpsLscPshpl-sthhh.................sup....hRsh..............sutpss............psVlA..spss.......stlslpsht........ssuLssslsssLss...ss.Pup.hpslolPssphtpstp............t.tsst.htshlpphGl...sssssps.ltphhspspstsu.husptp.sst.......pspsssslshhD..Tsp.GRhlspsppu.ssp...W.solsPus.spplsptlppL ..........................hossshhhltp.hhshsp.hP................hsLslp....shhss........pspctthtcpshs..........pLs.thGllssp..t.....hsstltshlclLspP-hpltsthht................................ssp........hRsl.................lupcus............pt..VlA..hRsu.......shlslpsss.............................spsLsshlsssL....ss.............ss..PAs.hpslolstpphtcssp...........................s...ss.lpphul................ssssht...s.ltphhssspststhlusppc...sss..........ptpssssl.ulhD..oss.GRll.sts..p..puhsuc...a.hshsPuo.sttlttulpp......................................................................... 0 39 97 124 +13854 PF14012 DUF4229 Protein of unknown function (DUF4229) Bateman A agb Jackhmmer:A5TZR1 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 122 amino acids in length. 27.00 27.00 27.60 31.20 26.60 26.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.33 0.72 -4.08 62 283 2011-04-29 10:23:44 2011-04-29 11:23:44 1 1 267 0 100 247 24 68.30 33 61.31 CHANGED hlhYshsRLsLhsslssl...lh.hluhhhh.....shhluslhAllluhsLShhlhpphRpcsstsluth....sppR .....hlhYshuRLhLhlslsul...lh.hl..uhhhsh......hsllluslhALllAhPLShhlFpslRccsotslAthscpR.......... 0 29 76 95 +13855 PF14013 MT0933_antitox MT0933-like antitoxin protein Bateman A agb Jackhmmer:A5U0U8 Family This family of proteins contains the MT0933 protein Swiss:O05901 which has been identified as an antitoxin to /protein MT0934 [1]. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 90 amino acids in length. 27.00 27.00 27.20 28.90 26.90 26.70 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.50 0.72 -3.86 58 385 2011-04-29 10:29:15 2011-04-29 11:29:15 1 4 326 0 102 249 0 55.50 39 74.87 CHANGED lhD.............K..uKchlspp....t...........-plc....pGlDKAu-hlD..........cKTGGKYucplDpup-tAcctls ..................................hhD...........KAK-hlspp.....t...........Dpl-....pulDKAGchVD..........c+TsGKauDpIDpup-us+ctls....... 0 36 78 98 +13856 PF14014 DUF4230 Protein of unknown function (DUF4230) Bateman A agb Jackhmmer:C7QH51 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 203 and 228 amino acids in length. 27.00 27.00 28.30 27.00 25.50 25.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.01 0.71 -4.36 170 626 2011-04-30 09:44:43 2011-04-30 10:44:43 1 1 536 0 162 520 25 157.40 18 73.09 CHANGED lpplcslucLtshphphpplhshpsppphh.................h.hspp+hlhlhsu..plpsul.DLsclp.p..lph........pscp..................lplp.LPpsclh.ssp...lD...tp..hpha.sppps....hhst..........p.p.-hsphhppu.cpphtcpsh..pss.lhppAc...ppupphlptlhps....hth.......pphplph ....................................................................pplcplucLsshphthpcllshccsthhhh.....................hshsc+phhllhpuplph..sl.DL.pchs.t.lpl........sscp........................lplp.lPp.sclh..psp..lD....tp.....hcha..cppsu......hhs...............hs.p.-tsphhpcA...ccplpcpsh....pps..lhppAccsApsh...lpslhps....hsh.......pphph.................................. 0 70 128 153 +13857 PF14015 DUF4231 Protein of unknown function (DUF4231) Bateman A agb Jackhmmer:C7PX93 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 148 and 288 amino acids in length. 24.70 24.70 24.70 24.70 24.30 24.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.35 0.71 -4.08 76 408 2011-04-30 10:14:04 2011-04-30 11:14:04 1 3 361 0 94 240 33 109.30 30 56.78 CHANGED .shhphhcpputpspptah.......hhphhpllh.hhuuh.....lsh....................h.......................tt.......shthhphsss..llus...hhshhsulhshh........phpcp....WhphRsssEtl+ppph....tah......................hpss.at..........ssppstphh ................................ss...lsaa.++usppR+cY+........lL+lspIlsuhLhAl.....IPh..................................................................sscphpllsl......sLSu...lshlspulhsla........sh+-s....WhsapcTAphLc+E+a..lYt.............sp.sp.Ys......sppc..................................................................... 0 23 60 84 +13858 PF14016 DUF4232 Protein of unknown function (DUF4232) Bateman A agb Jackhmmer:C7PXZ8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 177 and 242 amino acids in length. Many members of this family are lipoproteins. 27.00 27.00 27.40 27.30 26.00 26.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.51 0.71 -4.49 69 475 2011-04-30 10:33:41 2011-04-30 11:33:41 1 4 262 0 148 421 0 132.80 20 63.79 CHANGED CssssLshss.tssps..usGtp....hhhlslsNsuspsCsLpGa.Pu.Vshh.sssGs..............sssuscpss...........ssssVsLsPGpsAhAslthss..ssssss..........tssssss....ltl.tsP.ss........sssl.......tlshsss.........................................ssssplpVsshpss ...................Ctsstlphsh...s.ss...pu.......uhGtp..hhhLslp.N......suupsCtLsuh...Ps.lphh...sssGs.............hss..s..sttpss.............sspslsL.sPGpsstsslpats..ssssss..............shtstt...lhlh.s..st.........tsh.........l.h..h........................................................t................................................... 0 35 120 146 +13859 PF14017 DUF4233 Protein of unknown function (DUF4233) Bateman A agb Jackhmmer:C7Q8C9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 122 and 147 amino acids in length. Proteins in this family are integral membrane proteins. 27.00 27.00 38.20 38.10 23.30 21.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.67 0.72 -4.12 61 349 2011-04-30 10:53:39 2011-04-30 11:53:39 1 1 349 0 92 219 71 105.30 33 79.73 CHANGED Rs.lhAusLlhEulVlhLAhhVshplss.....h......sssshshssslslshllhsul.p+.sWulhlshsLQls.llsushlhPuhhhlGllFuhlWhhslhhtpclccchtct ....tVhAusLhhEsIVlhLAlPVhhtVuu..........h.........sshuhshssshsVlhllhush.tR.s.....WulhlshsLQlh.hlhushlhPsh.....hhlGllFuAlWhhlhaLt+clcpp.t......... 0 28 70 88 +13860 PF14018 DUF4234 Domain of unknown function (DUF4234) Bateman A agb Jackhmmer:C7Q4E0 Family This presumed integral membrane protein domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 70 amino acids in length. 23.70 23.70 24.00 23.70 23.60 23.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.76 0.72 -4.24 77 247 2011-04-30 12:47:06 2011-04-30 13:47:06 1 8 203 0 77 217 14 81.90 22 50.02 CHANGED Rslshhll.Loll.......ThGIYslaWhhphsc-......................lsth..ssct..shshshhllls......................ll............................TsGlashaWha+hupcl.pthptp ............................hslhhhll.Loll.............ThGIYslaWhhphsc-.............................lsth...stct..thshhhhllhh.................................hl.............................................Thu..lh.lhWhaphuptl.t.....tt.................................................................................................. 0 32 51 61 +13861 PF14019 DUF4235 Protein of unknown function (DUF4235) Bateman A agb Jackhmmer:C7PYA5 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 88 and 119 amino acids in length. 27.00 27.00 29.10 27.00 25.70 26.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.60 0.72 -4.13 66 301 2011-05-01 09:52:26 2011-05-01 10:52:26 1 1 285 0 89 219 10 78.20 30 77.74 CHANGED aKhluhusuhsuGhlAs+lhptlWcps..sG.ccsP...csp...............................D.ctuhtcslsaAslpGulhull+shssRuuApshp+hssp ..........aKshuhAlusluGhlupKlhptlW+hl...oG..ccsP....pss...............................D...-huhscsLsaAslSGsh..hAssphhscRtss+tat+hs..s........... 0 32 67 82 +13862 PF14020 DUF4236 Protein of unknown function (DUF4236) Bateman A agb Jackhmmer:C7PYP4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 69 and 402 amino acids in length. 27.00 27.00 28.70 28.40 26.90 26.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.71 0.72 -3.85 88 345 2011-05-01 10:59:28 2011-05-01 11:59:28 1 20 326 0 69 260 50 54.00 41 22.01 CHANGED a+FRKol+luPG.l+lNlSKsGl.ShS..lGs+.Ghplsh..stpG.htsosulPGTG..loYpp ........h+FRKSlplsPG.l+lNlSpuGs.ShS....lGs+..Gs.pl.oh...ss+G.thsslulPGTG..LSY+........... 0 24 47 65 +13863 PF14021 DUF4237 Protein of unknown function (DUF4237) Bateman A agb Jackhmmer:C7PYW1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 123 and 781 amino acids in length. 27.00 27.00 27.80 28.30 25.90 25.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.20 0.72 -3.71 68 319 2011-05-01 11:03:08 2011-05-01 12:03:08 1 23 246 0 96 292 2 94.90 31 19.49 CHANGED pssLssGp.hlDRaG............s.........stGsFluPt.....GssaspRuLP...Pssh..........spsY+hYc.......Vh+.....shs.l...................................htGslA..PWF.sQPG..GGsQ......a...................hh.....stslppLlppG.......hLccls .......................p...l..Gs.hlDRaG............sstGpalush.....sssa..tpRuLs.sssh.............tpsY+hYp.......Vhc......sh...l..........................................................htG.lA..PaFuQPG..GGhQ.h.................ph................hslppLlcpG....hLcp.................................. 0 27 56 81 +13864 PF14022 DUF4238 Protein of unknown function (DUF4238) Bateman A agb Jackhmmer:C7Q0A2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 274 and 374 amino acids in length. 27.00 27.00 27.30 27.20 26.80 26.10 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.69 0.70 -5.12 94 367 2011-05-01 11:09:05 2011-05-01 12:09:05 1 5 301 0 117 350 7 261.60 15 77.69 CHANGED +pHaVP..phaL+......pF............t................sppsplh.............................shph..................pppphhttshp.....shstcpphYshps.s.t.........................................................plEcth..spl.Es....phuphlpplhstpt..............lsspppp....tlhtFlshQhhRosthppphhphhpthhp.h........................................................t..t.........t.......tthttpthhhthhpphhp...........................htthhhphphtl....hhspssttFlTSDpPlsh...............................tt.sht.............pthplh......hP..loPchhlhh..........................................................................ttptttthhhhsp.pplp...plNph.hpp....up.......................chlaupspsththh .........................................................................pHalPph.hL+ta..................t...................tppthlh..............................................................hhth...................tttthh..hs..hp...........phshtpthYp..t.......................................................................................lEchh..uhh.Es....phsphlp.tlhptht..............................................lstpptp.........tlh.palh........h.hhRs.sh....ttphhphhpt.hp.h..........................................................................................................t...tph....t.....................h..t..h..hh.p.th.p.............................................h...h.htphphtl.........stsppthlsSDs.......Plhh.............................t.s.....................phhplh....hPloPplhlhh............................................................................tt.p..hh....h...h..st.p....p....lp...........hNt...h.hpp...u.......................p.lh.tpt.....hh....................................................................................................................................................................................................................................... 0 45 75 102 +13865 PF14023 DUF4239 Protein of unknown function (DUF4239) Bateman A agb Jackhmmer:Catenulispora acidiphila Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 254 and 270 amino acids in length. 21.40 21.40 21.50 21.40 21.10 21.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.31 0.70 -11.36 0.70 -4.97 9 249 2011-05-01 11:18:32 2011-05-01 12:18:32 1 4 178 0 136 268 20 198.00 18 72.03 CHANGED lhsslullaushhuhslshLhppasclpcslssEAslLthlscslhsh.tsc.....cppspthltsYsctl..........lscshshh..pcs..phshh...shs.lsslhshlpthcssspsp.pshtpphl....spltcLpclRtpRlphtthuLsstaalllhhhoshshlsa........hshhsspthhthhuhhlhs.hsushhlFh.h......cLscPFpGsaplsps.shsphl ..................................................................................................h...hluslaullluhshsssh....p....phspsppsltpE....As.s.ltplhpt..s.t.s.h..s.ts...........psplcttlpsYsp.t...l..............lpp-Wsth................pps..ph.s.p.............ssphl..s..p.lhp....p.ltshps.p..sstp...tthhpthl.............splspl....hpsRppRlt.t..s.p.s.sls.s..shWhhllhhusl..h..h.lsh....................hh..hhht.pp..h...hp...h.hh.h...slhu..shluhhlhllh......ph.-pPatG.htls.t.......hh............... 0 41 82 111 +13866 PF14024 DUF4240 Protein of unknown function (DUF4240) Bateman A agb Jackhmmer:C7Q711 Domain This presumed domain is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 169 and 263 amino acids in length. This domain is often associated with the WGR domain Pfam:PF05406. 27.00 27.00 30.40 30.50 25.70 25.00 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.58 0.71 -4.29 59 244 2011-05-01 11:23:53 2011-05-01 12:23:53 1 6 205 0 77 229 6 128.60 31 61.94 CHANGED Mscpc....FWpLlspsp........sss-tp.....thtchL....ps..........................tLsphssc-lhsFpphhpp...hhtc.uaphslW..uAAallpG............s.sSDDuFthFRsWLIupG+chacpsltsP..DsLuph....ptt............................................thEch.thlutcsacct ..................scpc....FWpLlcpuc...........tst-.-.......phtchL....pp...........................pLsphssc-llsFcphhpp...hhpc.u....Ys......sLW..uAAall.hG..............G..sSDDuFsa..FRsWLlupG+-sa-sslps.P..DsLsphht.h.pptt........................................sphE-l.halutcsYcp.t......................... 0 29 53 69 +13867 PF14025 DUF4241 Protein of unknown function (DUF4241) Bateman A agb Jackhmmer:C7Q8Q9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 205 and 315 amino acids in length. There is a conserved GDG sequence motif at the C-terminus. 27.00 27.00 28.40 28.70 25.60 25.50 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.15 0.71 -4.51 46 192 2011-05-01 11:27:02 2011-05-01 12:27:02 1 5 160 0 42 186 6 172.60 31 66.89 CHANGED plplsoGcllssDPL.............h.h..tstpP..ahpplssGpa.lplslsct.p..t............RhAss+l..................pascpc.....sspachAhhsspsl.ppl...c-u-.a......FGasVDAGhushsDhpstcthpcatpch.pcp.....hshY-DhFsphhpp...shtt.spapppss.shhsashssostslshFpSGaGDGh..YP....sYaGaDcsGplssllh ............lplsoGcllssDPL...............h...pspts....ahpplss.GpYslphtlspt.p..t............RhAss+l..................phscpc.....sstac..hAhhsspc...l.ppl...p-sc.a...FGasVDuGhushsDhpshpthpch.pch.pc.......hs.YsDhhs.phhpc...p..t.spappp..tG.thhs.h.hss...oshslshFpSGaGDGh..YP....sYaGhDpsGplssllh.............................. 0 20 32 35 +13868 PF14026 DUF4242 Protein of unknown function (DUF4242) Bateman A agb Jackhmmer:C7PZ36 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 90 and 170 amino acids in length. There is a single completely conserved residue C that may be functionally important. 27.00 27.00 27.40 27.10 26.40 26.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.47 0.72 -3.89 75 359 2011-05-01 11:30:12 2011-05-01 12:30:12 1 5 329 0 110 241 112 76.80 43 51.89 CHANGED pallERc.....lPss..los-pltshpppssshhsch....ssVpWlcSaVs.tps...+.saClYpAPst-ul+ctsccu.GhPscpIscV ...cYLVpas.....IPcu....ITh-phhAp.++KsssthcEl....P-VcFhRoYVsEDhu....K.shCLYsAPDEEAVRRA++ts.stPlDsIp..h.. 0 40 81 98 +13869 PF14027 DUF4243 Protein of unknown function (DUF4243) Bateman A agb Jackhmmer:C7QAE2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 348 and 477 amino acids in length. 27.00 27.00 29.30 29.10 20.00 21.20 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.12 0.70 -4.99 118 340 2011-05-01 11:33:20 2011-05-01 12:33:20 1 10 167 0 246 351 4 303.70 23 73.39 CHANGED tha............N...HhsHtlhulatLGAsssp....lppha...........-pptphhp.h............stphhs......psa....cphLG...cpchhtsalsFFpcElp...........pcG...hcpVlpcYha............ttpt................................h.pllstlhuGhhHPlI+LGaulEhs.pst......................llAEu..................LA.sAsch..s.h.thh..ss....ttts................................................................s.shh...pllpc.lpsstphps....ss.....................ht.thlh..cthcphh.hs...............................u.hh..........pphpcpht-hhpss....shhhsust................t..phDF.....hhlHslTuuhhlphll.hh......sspp+hpllc...hhhthslshYsupupPph.....hpplhphhs.t................................sWpplhpp.hsttps...Ds...HhsKhlRuhtp ..................................hpN...HhsHtlhuhathGAsspp....lppha...........-tttph.csh...................s.p.hst.....psa..........pphLG.................ctchhtsalsFFpcclt................ppG....hptVlpcala...............ttpt..................................phlstlhuGhhHPlI+LGaulEhp..p.t..........................lhuEu..................LA.sAsp....s.htthh..ss......t.t..........................................................................stshh...pllpp.ltt.s.tchps.....sh.....................ht.shlh......pt.cphh.ht......................................u.hhh...............pphpcpht-hh.pss.......................shhhssst................t..thDF.....hhlHhlTuuhhlthl..l..shh.......s.p.+hpllchhhh..hslshYhupspPph.t.ptl.h.s..h.st..t............................................sWptlhpp...sh.ts........Ds........HhhKhhRshh........................................................................... 0 71 141 213 +13870 PF14028 SpaB_C SpaB C-terminal domain Bateman A agb Jackhmmer:C7QBX6 Domain This presumed domain is found at the C-terminus of the SpaB protein Swiss:P39774. SpaB is involved in the synthesis of the lantibiotic subtilin.\ \ \ \ This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 317 and 1029 amino acids in length. The family is often found in association with Pfam:PF04737, Pfam:PF04738. This domain is found in isolation in some proteins. This domain is also found in EpiB involved in epidermin biosynthesis. 27.00 27.00 27.60 29.10 24.90 24.60 hmmbuild -o /dev/null HMM SEED 283 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -12.08 0.70 -4.85 142 404 2011-05-01 11:47:07 2011-05-01 12:47:07 1 9 270 0 119 365 3 254.40 19 35.28 CHANGED W.hth+l..hsts.pt...-phlsptlsshlsph.....tts..hpt..aFFlRYtc.....st.....HlRLRl...pss..p.......thhtthhs.....tltphht.....hhtt.............................thhsphphss..YpPEht.....RYGG.ssshshAE...plFttDSthsls..hlt.............tthshph...................................................phlsuhshhp...............hhs.h.....................p..pt..........................................................hphhtchspptttt.ph.hps.tt....thhpltp.....hthht...s...t.th..........hhppttpthsthtphh.....................................................p.t.p..htsllssh.....lHhphNR..hGls.tppE...thlhph ........................W.hhh+l..atst.tt...-phlhctlhshlcph........tps..lsp..aFFlRYhc..................s...sHlRLRl.....phss.t...........htthht..............tlpphhp....thhpp.........................................shhsshphss..Yc.Eht.....RYGG..sshshuE....phFphDSthslp..hlp...............shphph......................hlsuhshhh.........hhp..hsh.......................s.ppt.................hphhpp.hhspth..tp......t.hpthht................thhplhp........ht.ht...t..th..h............hhpptpphhsphpphh.....................................................ttt.tpt.hpp....llsSh.....lHhptNR..hGls..hppEthhh..................................................................... 0 51 92 110 +13871 PF14029 DUF4244 Protein of unknown function (DUF4244) Bateman A agb Jackhmmer:Catenulispora acidiphila Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 66 and 95 amino acids in length. There is a conserved EYA sequence motif. 27.00 27.00 27.20 28.00 26.80 26.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.61 0.72 -4.87 53 401 2011-05-01 11:52:36 2011-05-01 12:52:36 1 1 400 0 101 287 10 54.10 44 67.72 CHANGED tttpthtsttss-pGMuTAEYAluslAAsAFAulLhhllpSssVpshLpullppALs .........hh....hhhhh.s-pGMuTAEYAlGslAAsAFAulLhtVlTussVhotLpsllspALp.... 0 30 74 94 +13872 PF14030 DUF4245 Protein of unknown function (DUF4245) Bateman A agb Jackhmmer:C7QKK2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 188 and 235 amino acids in length. 27.00 27.00 56.40 56.30 26.20 24.50 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.93 0.71 -4.52 59 339 2011-05-01 11:55:11 2011-05-01 12:55:11 1 1 335 0 84 236 22 175.20 31 88.33 CHANGED Rhhp....ss+DMllSLsllllsshll.hhh........stsss.ss.lssVDssstlptsuc..ss.saPlttPp...lPcGWpssSu..chsshsu............ssshplGalTss......spYlplsQossssss.......hlsthssst.ps....sGspsluGp.sWphaput.........sscsshVtc.h....sssp......lllo.....GsAstc-hcslAsAlt .........................t.ss+DMllSLuslllsshll.hhh....................stssc..pssl.pV.D.hpssltsstp..sh.uaPlttPp.......lPc...s.WpssSu..chsshss............uss.plGalosp......stYlulsQSsts.tct.......hVuu.lstuspp.........sGshsV.u...Gh...pWshasus-........stppshVsc.l...........Gss.pllls.....GsushcphpshAusl.............. 0 26 62 80 +13873 PF14031 D-ser_dehydrat Putative serine dehydratase domain Bateman A agb Jackhmmer:A5U2B6 Domain This domain is found at the C-terminus of yeast D-serine dehydratase [1]. Structures have been solved for two bacterial members of this family. The yeast protein has been shown to be a zinc dependant enzyme. 22.00 22.00 22.90 22.90 21.90 21.90 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.02 0.72 -3.79 98 1102 2011-05-01 13:05:54 2011-05-01 14:05:54 1 7 847 7 478 1062 313 98.80 27 25.73 CHANGED AlhlhupVlStsp.......spsllcsGt+uluhDs..........u.....h..................th...sh..shs..................t..s..............................shphsth.u...-EHuhL........................p..h..........sss......ss.....................h........plG-hltlhssHsCsThshachhhlVc ..............ALpVhupVlSpsps......spsllDsGp+sluhDt.............u..h......st...............sh.uh.lhs..................ts..............................sh..plssh.s...-EHuhl................................p.h........sss..........st.....................l.......plG-hlplhPsHsCsTsshacthhll........................ 0 134 278 399 +13874 PF14032 PknH_C PknH-like extracellular domain Bateman A agb Jackhmmer:A5U8Q1 Domain This domain is functionally uncharacterised. It is found as the periplasmic domain of the bacterial protein kinase PknH [1]. The domain is also found in isolation in numerous proteins, for example the lipoproteins lpqQ, lprH, lppH and lpqA from M. tuberculosis. This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 268 amino acids in length. There are two completely conserved C residues that are likely to form a disulphide bond. A second pair of cysteines are less well conserved probably form a second disulphide bond. It seems likely that this domain functions to bind some as yet unknown ligand. 27.00 27.00 27.20 28.50 25.50 26.90 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -11.25 0.71 -4.73 68 678 2011-05-01 13:51:53 2011-05-01 14:51:53 1 4 94 0 115 462 0 188.20 19 66.25 CHANGED sslsssslsslL.....Lsss-l....ssl.hGssthtsstsssshs.............sspsssspChushsssps.sYt........shs..sh+stsh.........tssssssth.lpQulssassspsApphh..sshssphpsCsstshsht...........tts.sttaplusssssss...hlshshsttsst.......hsCt..+shsspsNlll-lpsC...........sssssst...usplssthhs+l ........................................................h.sh.sstlsslL.....Lstucl....ssl.hG.ss.th.hs.th.stsht................sssssss.pCtsshshsts.sa.....shs...uhputsh..........t.s..ss.s...tsph..lsQuVssa.sssssAcpha..pshssphppC.sGpshshh.......................tsststthsluss.sssss....slshshtttss..................hpCt.....+shtlcsNVll-lssC...........ttspssss...usslssthhspl.................................................... 0 27 69 99 +13875 PF14033 DUF4246 Protein of unknown function (DUF4246) Bateman A agb Jackhmmer:Q59QG7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and fungi. Proteins in this family are typically between 392 and 644 amino acids in length. 27.00 27.00 31.80 27.70 23.20 25.30 hmmbuild -o /dev/null HMM SEED 503 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.81 0.70 -5.89 50 374 2011-05-02 10:30:38 2011-05-02 11:30:38 1 6 93 0 324 394 5 341.20 30 77.80 CHANGED tsshts.hhTlREhsMlplMsplTDKPsWcpKlaDc.sIs.s+W+p...Eslspsp.................................................tlocphh-aslpELcaK....Aphac.cs..Gh.l.s..hss...........sVsKS..Dosls...ssLpppL+sul.phLcss....tsp.D.aHPuosppVlDLVHPSLFPLVYGRT+.llssth..lsl-..ssltt..GpGpllPh...sspppt...................h......ttht.hhaSp+FQWLPspVch.....................ssssss+IsSYINNLHPt+a+sLYpsIEpllspsIPhWNpsLs.h......................tsct.hRIph.ts..........................................p.pp.p.tps--.pch.cchp............pW.ppppphl.......................................P-P.ttFp......................................................sp.pttlsL.pccFp..ppGLQVIVKlAsIELTPEc.PpYsGG.sWHVEGph...............NEHIsATAlYYYDs-NITpS.pLuFRpts..-sp.......p.t...a-pss...........................p.palpplaGhcst...........ssshQplGuV.psppGRLlsFPNsl.................QH+VsPFcLtD+oKPGHR+hLsLaLVDPphR.IlSTANVPPQpp-WWs-tst .........................................................................................................h...........................................................................................................................................................................................................................................h.....p...............h.........................................................................hh.h..t................................................................................................................hs.thphLPs.hth.......................t.thph.S.YlNNL.p..P.......h.Y...hltphhtt.ls.hp.hh................................................hh...........................................................................................................................................................................................................................................................................................h........................................................................hpl........t..tptlpl......IV+hssIpLT......Pp.p.P.p..a............ts......t.........WHhEG.h......................NEpIsuoslahhs..pN.lsts..pltFR..h.................................t........................................htpl.asht.t.............t.h.Q.lGsl..h....p....G........RhlsaPNhh.................QH+.psFpLhD.opP..GHp+hLshalVsP.....t..hc...l.hSTspVsPQp.pWh.p..h......................................................... 0 160 218 286 +13876 PF14034 Spore_YtrH Sporulation protein YtrH Bateman A, Eberhardt R re3 Jackhmmer:C0H3P8 Family This family of proteins is involved in sporulation. It may contribute to the formation and stability of the thick peptidoglycan layer between the two membranes of the spore, known as the cortex [1]. In Bacillus subtilis its expression is regulated by sigma-E [2]. 27.00 27.00 77.80 76.70 19.50 19.00 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.19 0.72 -4.09 29 206 2011-05-04 11:44:28 2011-05-04 12:44:28 1 1 206 0 47 114 0 102.90 53 87.53 CHANGED hsshlhsFFlAhGVllGGullGGlGAhLsscPPLpshhpLAspLKIWAllAAlGGTF-slpslEpGlhpGphcslhKQllhIluAhhGApsGhhlIpalstsp .hs.hlIlSYFIAFGVlLGGSLIGGhGAaLhG+PsLT........h........hsphApsL+IWALVAAIGGTFDoFYuhERuhFtG-h+DIlKQlLLIhhAhGGhQTGhlII+WLTQE.... 0 20 36 39 +13877 PF14035 YlzJ YlzJ-like protein Bateman A, Eberhardt R re3 Jackhmmer:C0H413 Family The YlzJ-like protein family includes the B. subtilis YlzJ protein Swiss:C0H413, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 61 and 72 amino acids in length. There are two completely conserved residues (L and G) that may be functionally important. 27.00 27.00 36.10 36.10 19.90 19.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -9.15 0.72 -3.92 39 213 2011-05-04 11:51:42 2011-05-04 12:51:42 1 1 213 \N 58 134 0 64.20 40 93.42 CHANGED LYTlhP.Ehla...ttpcsthpspp...plphs.Gl.llVcthps..sp.....hcllRllSTsPhcYLpschsPGshlp .............LYThMPpplVa...sss..sps.pspc....Vsls.GVpLhVpp.cp..sp.....YpIVRlLS.TsPhcYLc.tapPGppIp.... 0 27 44 49 +13878 PF14036 YlaH YlaH-like protein Bateman A, Eberhardt R re3 Jackhmmer:O07632 Family The YlaH-like protein family includes the B. subtilis YlaH protein Swiss:O07632, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. There is a conserved LGFA sequence motif. 27.00 27.00 61.50 61.20 22.70 22.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.67 0.72 -3.90 23 162 2011-05-04 11:58:59 2011-05-04 12:58:59 1 1 162 0 30 97 0 77.20 63 74.91 CHANGED scssthGhallYlsIllLullVapLGFA+...KLPlLKslllYllLhlGshlLTFhulh..LPlsEuLlVAAllLhIY+lRL ...D.ENPElGMWLLYGsIllLSAlVYNLGFAR...KLslLKNlVIYl.LAlGCTVLTF.FAVF...LPVGEGLVVAAlVLuIYRlRL. 0 9 21 24 +13879 PF14037 YoqO YoqO-like protein Bateman A, Eberhardt R re3 Jackhmmer:O31923 Family The YoqO-like protein family includes the B. subtilis YoqO protein Swiss:O31923, which is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 120 amino acids in length. There are two completely conserved residues (I and Y) that may be functionally important. 27.00 27.00 39.90 39.80 22.90 20.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.60 0.71 -3.98 8 164 2011-05-04 12:01:38 2011-05-04 13:01:38 1 1 97 0 11 108 1 108.70 49 88.71 CHANGED p+pIGaYGllhshhlSlllptFhps-hlsh.IlssssFlFlllYsWD-hKsYS+KslllhuIpFlIlluslsFlLhcGpchh-slslFpGWlhlA+llYllhlLslsssIh++Isc+L ..RcKIGaaGhllsh.L.lI.s.FhpsEWls..IlslhshlFs.hYpWD-hKtYS+Kphhlh.hphVlh..hlsFlLlcGpc.h-thshFQtWh..AKhLYllhllhlhhh..lshplsphl....... 0 2 5 6 +13880 PF14038 YqzE YqzE-like protein Bateman A, Eberhardt R re3 Jackhmmer:O32020 Family The YqzE-like protein family includes the B. subtilis YqzE protein Swiss:O32020, which is functionally uncharacterised. It is a part of the ComG operon, which is regulated by the competence transcription factor ComK [1]. This family of proteins is found in bacteria. Proteins in this family are typically between 49 and 66 amino acids in length. 27.00 27.00 29.40 50.40 23.00 22.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.22 0.72 -8.76 0.72 -4.22 19 154 2011-05-04 12:12:15 2011-05-04 13:12:15 1 1 154 0 26 79 0 49.50 58 82.40 CHANGED psNDYVKahTpphVpYhDpPK-ERKcc....+ptRKppK.Ph.t.RWFGllPhuhpLha ...ssDhV+ahTpphVpYMDsPKE-RKp+....KEpR+s.EKEPF.hs+WFGlhPLShsLaa........... 0 6 16 18 +13881 PF14039 YusW YusW-like protein Bateman A, Eberhardt R re3 Jackhmmer:O32189 Family The YusW-like protein family includes the B. subtilis YusW protein Swiss:O32189, which is functionally uncharacterised. This family of proteins is found in bacteria, and is approximately 90 amino acids in length. 27.00 27.00 29.40 29.10 26.90 26.60 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -10.10 0.72 -3.71 19 189 2011-05-04 12:17:32 2011-05-04 13:17:32 1 1 183 0 23 103 0 91.80 45 49.90 CHANGED hppF-L-l-ap-s..pph-hpYE..ccpsph-Aclc...cthss.phpG-EAhpclcsll.spLslsssssppcllspVlssFsL-p.shpch-l..........ElpFpD .asEF-L-s-Yp-s..p-YEssYc.ltuspphEAclE.....DcpADlcLpG-EAhsKlpsLL.pcLphcpsTs-...p-Vl-pVlssFpLDc.cYp+F-L..........ElsFoD. 0 9 18 19 +13882 PF14040 DNase_NucA_NucB Deoxyribonuclease NucA/NucB Bateman A, Eberhardt R re3 Jackhmmer:P12667 Family Members of this family act as deoxyribonucleases [1]. 27.00 27.00 27.20 27.00 25.40 26.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.77 0.71 -3.93 29 243 2011-05-04 12:23:46 2011-05-04 13:23:46 1 15 181 0 66 192 1 103.50 50 43.24 CHANGED lpasss+YP...ETupHIp-.........AltsG..................cucls.TIDRs..uAcppRcpSL+..........shPs+.........pGhDRDEaPhAhscEGGsG....Asl+hIsPoDNRGAGS.lu......................p.QLusas......DGs+lhhhl ............................................................................tFPppRYP...ETupHIpD.....AIp.p.G..................HS.clC..TIDRs.........GAscRRc.SLt..........shPoK.................pGYDRDEWPMAMC..cEGGpG...........Aplc.....YI...oP..uDNRGAGSWVG.......................ppLspaP.......DGT+VhF.l......................................................... 0 12 39 49 +13883 PF14041 Lipoprotein_21 LppP/LprE lipoprotein Bateman A, Eberhardt R re3 Jackhmmer:A5U518 Family The family includes putative lipoproteins LppP and LprE from species of Mycobacterium. LppP is required for optimal growth of M. tuberculosis [1]. 25.00 25.00 25.50 25.40 24.80 24.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.96 0.72 -9.79 0.72 -4.05 49 283 2011-05-04 12:39:40 2011-05-04 13:39:40 1 6 143 0 72 185 0 87.80 29 41.83 CHANGED sssYsss..........upLShlhlp.hssssssss.........ppshhFcpGcalGososcsh......shlssh.tssssslslpYph.t.s-ssssso.GtssVpF+W........ssspl ............................................s.ssasts.......spLShVhlp.sss.uss.sss.........ppslhFHpGpalGouoscsh.......shlssh..tsosDoVslpYp............sp.....t.....sssu.h....hssVpF+W.ssst.h..................................... 0 13 45 68 +13884 PF14042 DUF4247 Domain of unknown function (DUF4247) Bateman A, Eberhardt R re3 Jackhmmer:A5U5U9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 143 and 271 amino acids in length. 27.00 27.00 41.80 41.50 26.20 25.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.99 0.71 -4.17 29 214 2011-05-04 13:10:41 2011-05-04 14:10:41 1 1 211 0 35 139 0 171.20 35 83.97 CHANGED huslhl........shuss............t.sspsaluspYsptus........s..thhhssstosspVAsplsst..ppPssps......s-............................ssshaLRYs....DclVtl.............................................tscstss.................................hIcV-shc.............................................suYpphs.........hhhhus....thssssP....sGusstuu..........GssGuu ..........hhlssAs.....hhluust............ts.sp.Its+YshESss.........ps...hshhssspSsspVAccLlst..pcPpptu..p-................................psphaLhYs....Dc.Ihsl.............................................p.Dtppsss...............................lIclpNhc.............................................usYpshs.........................................h.t..............thTPss.....pGu.hcpu..........GssGu.h............................................................ 0 11 21 28 +13885 PF14043 WVELL WVELL protein Bateman A, Eberhardt R re3 Jackhmmer:O31578 Family This family includes the B. subtilis YfjH protein Swiss:O31578, which is functionally uncharacterised. This is not a homologue of E. coli YfjH, a synonym for IscX, which belongs to Pfam:PF04384. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length and contain a highly conserved WVELL motif. 27.00 27.00 103.30 103.20 20.40 18.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.51 0.72 -4.35 16 169 2011-05-04 13:47:27 2011-05-04 14:47:27 1 1 169 0 20 74 0 74.40 61 86.64 CHANGED p-hhE+LTspLLEKNspLSYspARTWVELLWpDFEoThAKAG+tYpGp-hTEclVppWI-pYGupLHpapsppsK .N-haEpLTpELL-KNc+LSYuQARsWVELLWpDFpoTYAKuG+.YQG-EMTEplVRpWIpsHGu+LHchcosNPK. 0 6 12 15 +13886 PF14044 NETI NETI protein Bateman A, Eberhardt R re3 Jackhmmer:O34700 Family This family includes the B. subtilis YebG protein Swiss:O34700, which is functionally uncharacterised. This is not a homologue of E. coli YebG, which belongs to Pfam:PF07130. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 66 amino acids in length and contain a conserved NETI motif. 27.00 27.00 27.00 43.80 22.60 19.70 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.76 0.72 -4.46 23 336 2011-05-04 13:56:26 2011-05-04 14:56:26 1 1 336 0 28 97 0 51.50 63 91.60 CHANGED KFcVpENETIu-CLsRMcpEGYhPlRRhEKPlFcEsKcs...plpsh+QcIlF-GKh .KFcVpEsETIsDCLsRMK.pGYMPV+RhEKPlFpEpK-G...sVEss+QcIlFhGKh. 0 7 15 23 +13887 PF14045 YIEGIA YIEGIA protein Bateman A, Eberhardt R re3 Jackhmmer:P50742 Family This family includes the B. subtilis YphB protein Swiss:P50742, which is functionally uncharacterised. Its expression is regulated by the sporulation transcription factor sigma-F, however it is not essential for sporulation or germination [1]. This is not a homologue of E. coli YphB, which belongs to Pfam:PF01263.\ This family of proteins is found in bacteria. Proteins in this family are typically between 276 and 300 amino acids in length and contain a conserved YIEGIA motif. 27.00 27.00 87.60 87.40 18.70 18.10 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.58 0.70 -5.78 25 191 2011-05-04 14:04:51 2011-05-04 15:04:51 1 1 173 0 39 117 0 277.00 56 96.04 CHANGED hhsllhGllhGslsRlhhL+sDYRQYPTYPHGhlIHluLGhIAAuLGAlAlPALlcc-aTAlTFLuLAApQFR-VRsMERpTLspL-shELVsRGssYIEGIApsFEuRNYlslhTuhlTohshh.hh.........slhhGslsGllshlls+plhpGpplsDIA-lchuclpF.-GstLaV-cIh.IMN.lGLppp+EhlLccGhGhllpPKshs..utsTlsNlGQRQAIlH-lushLGlh+DsspPshsPlu++Dl-sGclulhllP.cpDh-thlcllcplPlLEoAh+hPpctc ......sllhGllhGhhsRlhMLRTDhRQYPTh.HG+lIHIuhGlIAAALGAIAlPulLcK-FoAITFLTLAAoQFRDVRNMERNTLppLDuYELVPRGsTYIEGIAllFESRNYLuhlTSFsTThAYlhFt.........ShlAGlIhulIuhaIu+pLMSGcpL+DlsDIEal.l+F...-GuGLYlDNIY.IMN.IGLPs+QEcIhKaGMGFIL+PKshD..AhlTIuNLGQRQAILHDVSssLGlYRDSGTPuLVPLAKRDL-..DGRVGIFlLPQ-pDsEKAItVItNVPsLESAl+MsoE..t...... 0 21 30 34 +13888 PF14046 NR_Repeat Nuclear receptor repeat Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family This is a repeat domain involved in dimerisation of nuclear receptors proteins and in transcriptional regulation in general. It contains a Leu-Xaa-Xaa-Leu-Leu motif which has been characterized for the orphan nuclear receptor Dax-1, which represses the constitutively expressed protein Ad4BP/SF-1. The LXXLL motif plays in important role in binding of Dax-1 to Ad4BP/SF-1 [1]. The domain is subject to structure\ determination by the Joint Center of Structural Genomics. 27.00 20.00 28.30 25.80 23.40 19.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -8.23 0.72 -4.23 14 140 2011-05-04 15:59:12 2011-05-04 16:59:12 1 6 28 0 57 131 0 47.60 54 39.46 CHANGED FCGEDHPpQGSILYsh.sSAKQTpAA...PEsp.GushWssSCGup+sl ....FCGEDHPpQGSILYshLTSAKQTpuAs.....pAPEAR.GushWssSsGup.s............... 0 4 4 9 +13889 PF14047 DCR Dppa2/4 conserved region Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family This domain has been characterized in the finding of a developmental pluripotency associated gene (Dppa) in the lower vertebrate Xenopus laevis [1]. Previous to this discovery, Dppa genes were known only in higher vertebrates. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 27.00 29.20 27.10 25.60 20.20 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.46 0.72 -3.69 8 100 2011-05-04 16:17:40 2011-05-04 17:17:40 1 4 27 0 57 74 1 66.80 62 23.52 CHANGED GhhWCVVHGp..sssscuWltLph.HuGpsaVPsc..G+sIsLFLLPushhPPstlcDNhLCscCV++Nc .......GsRWCVVHG+sLPADocG.WV+LQF.HAGQAWVP-p..tRhhuLFLLPA........CsFPsPtLEDNMLCPcCl+RNK.... 0 4 4 6 +13890 PF14048 MBD_C C-terminal domain of methyl-CpG binding protein 2 and 3 Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family CpG-methylation is a frequently occurring epigenetic modification of vertebrate genomes resulting in transcriptional repression. This domain was found at the C-terminus of the methyl-CpG-binding domain (MBD) containing proteins MBD2 [1] and MBD3 [2], the latter was shown to not bind directly to methyl-CpG DNA but rather interact with components of the NuRD/Mi2 complex [3], an abundant deacetylase complex. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 27.00 29.70 28.40 20.90 19.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.12 0.72 -3.40 26 237 2011-05-04 16:22:55 2011-05-04 17:22:55 1 4 92 1 125 236 0 93.50 48 36.94 CHANGED cs.ssE.llpsh-LscsLpslu.PshsscsLlpulAouL+hsus.....PlsGQssutptl-psPus...hhsssQPLCpthl.....VT-..-DI+cQEc+VppARcRLp-AL .....................................................t..stE.llcoh-LPKuLps....VG.PGss--.TL.LpulAoALHoost.....PlTGQhou...AlEKNPuV........aLNssQPLC+....uFh.....VT-..EDIR..+QE-+VppsR+RLpEAL.... 0 28 36 66 +13891 PF14049 Dppa2_A Dppa2/4 conserved region in higher vertebrates Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family Developmental pluripotency associated genes (Dppa) in lower vertebrates have remained undetected until the discovery of a Dppa homologue in Xenopus laevis [1], reporting a new domain termed Dppa2/4 conserved region (DCR). In higher vertebrate Dppa proteins the DCR domain is located next to the here-reported domain. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 5.00 27.70 14.90 26.00 4.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.94 0.72 -4.14 2 81 2011-05-04 16:24:09 2011-05-04 17:24:09 1 3 20 0 43 57 0 69.70 47 25.78 CHANGED ppQshPEhS.Es+LQpCSRKtKhVsK+A+L.+ShchpERtEEoNTVEVlTSA.tuMLAuWuRIAARAsQsKulNSpSIPsSVEsFL .............hst...Es+.......tK......................p........EcsNsVcVhTSA.EAhLASWARIAARAspPcAVsS.......................... 0 2 2 2 +13892 PF14050 Nudc_N N-terminal conserved domain of Nudc. Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family The N-terminus of nuclear distribution gene C homolog (NUDC) proteins contains a highly conserved region consisting of a predicted three helix bundle. In the human homolog this segment has been targeted for structure determination by the Joint Center for Structural Genomics. NUDC forms a complex with other NUD proteins and is involved in several cellular division activities. Recently it was shown that NUDC regulates platelet-activating factor (PAF) acetylhydrolase with PAF being a pro-inflammatory secondary lipidic messenger [1]. 27.00 27.00 28.00 27.30 26.40 25.50 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.06 0.72 -4.02 60 308 2011-05-04 16:54:57 2011-05-04 17:54:57 1 5 149 0 172 299 1 55.90 41 17.99 CHANGED p+aDshLhslAppp.sulpshL-shFuFLpRKTDFap.............................G.hp.spsccllhcsFc+ ......paDshLLslhQpp.uslpp....hLsshFuFLtRKTDFap..........................................G.hs.GhAEcllhpsFp........................................................ 0 66 83 132 +13893 PF14051 Requiem_N N-terminal domain of DPF2/REQ. Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family This putative domain has been detected on the human DPF2 protein and was subsequently targeted for structure determination by the Joint Center for Structural Genomics (JCSG). Possibly, the C-terminus extends by 30 amino acids and forms a separate domain. DPF2 interacts with estrogen related receptor alpha (Err-alpha), an orphan receptor which acts as a regulator in energy metabolism [1]. It was also identified as an adaptor molecule that links nuclear factor kappa-light-chain-enhancer of activated B cells (NF-kappa-B) dimer RelB/p52 and switch/sucrose-nonfermentable (SWI/SNF) chromatin remodeling factor [2]. 22.00 22.00 22.10 22.10 21.20 21.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.34 0.72 -4.05 13 277 2011-05-04 16:55:41 2011-05-04 17:55:41 1 6 85 0 123 214 0 71.30 70 19.10 CHANGED lsDshY+EslEsussaNoRLshER..RlRhPFLDsQTGVAQscs.tlahppcpRhPGpstGQlYTYPu+RWRK++Rp .......L.G-paY+-AlEpC+sYNuRLCAER..SlRLPFLDSQTGVAQsNC.YIWMEK+HRGP..G..l.....AsGQlYTYPARpWRKKRR..... 0 24 34 74 +13894 PF14052 Caps_assemb_Wzi Capsule assembly protein Wzi Eberhardt R re3 Jackhmmer:A7UZC7 Family Many bacteria are covered in a layer of surface-associated polysaccharide called the capsule. These capsules can be divided into four groups depending upon the organisation of genes responsible for capsule assembly, the assembly pathway and regulation [1]. This family plays a role in group 1 capsule biosynthesis. It is likely to be involved in the later stages of capsule assembly. It is likely to consist of a beta-barrel structure [2]. 27.00 27.00 27.20 27.20 26.80 26.30 hmmbuild -o /dev/null HMM SEED 443 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.44 0.70 -5.63 92 390 2011-05-05 12:32:33 2011-05-05 13:32:33 1 2 294 0 143 386 206 444.60 21 90.03 CHANGED lphhsphGshtus.hsphPhhhpusptslspspsss..................shlpttltpthp.........ttttphp.......................................................................................huhslcs.hshst....ss........................................shplppuYlshphtshtlssGppcph...Gh.......sGuhhhosNARPlPtlplspsp.hsh.s.hhphhhshphphthGhh...ps........s...........chstps..........hhautphhh+.t..p........pLElGlshsstaGGp..............Gp.ss.....uhpsah..cshhutt.t.....st..........s.GNpluuhDh+hph..hhphshtlYtphhhEDpuuhh.h........................tshlhGlchph......hspsphh..lhhEahpTp.pss...................ssaYspshY.sG.apahGpslGssh...........................ssc..spshplGhptp.......hssphshphphsasct.ssh.t........................h.....phtthhcl.....h....hthhst....h....phsushuhcps..phh.ssshshtlslph ................................................................................................................................................................................................................thhsphGshths.hsphPl.hpthtpsLppscss.......................hstlpttlptthss..................thtths...................t................................................h......................................ht............ashtlps..shpts................................................................t.tchpl.p...puYsshp.....hts.h.hlshGphppahGPGh..........sGullh..osNARPhstlplp.psp..hsh.s.hhph.lsshphphshu..ph........ssp.....................phsscs...................hhhut+h.sh..pPh..p........pLElGhsp.sh..paGGc...................Gc.sp.....uh.psah..cshh....upsss.t........t.s....................p.GNpluu...aDh+hph........h.hsh.....slulYtphhtED.......pu..uh.h.t......t......................pshLhGlc.hph.......sppsh....lhhEahsTpspss......tt.................shhYs.H....th.YtsG.atppGhslGssh........................................................sscupshtluhphp.......hssp.p...hpsphpaschsss.t......................sh...pppp.thlpl....tat........hhtth.plpsshhh..sps..p...sss.sh.hth................................................................................................. 0 48 102 127 +13895 PF14053 DUF4248 Domain of unknown function (DUF4248) Eberhardt R re3 Jackhmmer:A7V2Q3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 73 and 86 amino acids in length. 27.00 27.00 28.60 28.50 26.90 23.20 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.01 0.72 -4.39 64 339 2011-05-05 12:42:11 2011-05-05 13:42:11 1 1 64 0 17 200 0 68.30 36 86.02 CHANGED ahlRs.Ys+sELAhhYhP.....sl.ss.puAhc+LpcWIcts.tLhppLttsGYpppp+haTPtQVplIlcaLGEP .......a.l+s.Ys+sELA.thYhP.....sl.sspsAhc+Lp+WIcts.sLhppLtssGYpsps.+t.aTPtQVplIschLGEP.... 0 8 15 17 +13896 PF14054 DUF4249 Domain of unknown function (DUF4249) Eberhardt R re3 Jackhmmer:A7V6V4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 279 and 365 amino acids in length. There are two completely conserved residues (C and G) that may be functionally important. 27.00 27.00 27.20 28.20 25.80 25.70 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.95 0.70 -5.06 80 352 2011-05-05 13:08:51 2011-05-05 14:08:51 1 2 162 0 154 373 218 287.50 17 90.65 CHANGED hhhllhhhhh..suC...pc.l-l.shsss.s+LVlsuhls......sssssttlpLoposshhssp..........lssApVpl.......sssptpshhhtpsss........................shYh....sss..........hspsGcsYpLplph..stpphsApsplstss.sl.pslphpptshhsspp.pp.....................lplpapDss.sppNYYhhphp.pph.hh.........................................................................................tlhsDphh.........sGpphshthhhp..............................phpsssplplplhulocshYpYhpsl.ptpsss.....ss.hhstPsslhuNlhsu...................lGhFussphs.phphpl ............................hhhhhhhhh.suC...pc.l.s.h.p....hsss...spLVlpuhls................ssssthlpLopo.....tsh..hsst..........................lssApVpl............................hssptpphhhtpsst.................................stYh...........sssh.................thpsGcsYpLpl.....ph..st....pph.sA.p.s.p.lstss...tl..pslp..hp...thhs.tt.t......................hplshpDss...spp...sYY...hhp.hp.pth.h...............................................................................................t.h.........................lhsDp.hh..............sspp.hp.hth..t..............................thphtt.pl..tlchhulocphYpYhpsl.thpsss.........................st.h..stPspl...uNlpss......................lGhhsssshs.ph.h..h........................................................................ 0 77 140 154 +13897 PF14055 NVEALA NVEALA protein Eberhardt R re3 Jackhmmer:A7V7L0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 75 and 92 amino acids in length. There is a conserved NVEALA sequence motif. 27.00 27.00 27.10 27.20 26.80 26.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.46 0.72 -4.27 65 219 2011-05-05 13:17:48 2011-05-05 14:17:48 1 1 44 0 36 174 0 71.20 26 84.01 CHANGED K.....p..Klhhhhhhusl.sshsGashhps...pptc.....thS.DLuLsNVEALA.sGEss..............s...............................stsshsC ..............phhhhhhh.uhl....sshsuhshhps....pptc.....ph.S..-LsLsNVEALA...suEss..................................ss................h..C....................h............................................... 0 6 19 36 +13898 PF14056 DUF4250 Domain of unknown function (DUF4250) Eberhardt R re3 Jackhmmer:A7VAD8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There are two completely conserved residues (N and R) that may be functionally important. 27.00 27.00 30.10 37.20 25.40 21.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.60 0.72 -4.24 92 470 2011-05-05 13:27:41 2011-05-05 14:27:41 1 1 362 0 66 290 8 55.00 43 87.76 CHANGED lshD.....Ph.hLlShlNhKLRDpas.SL-pLCpshslsccpLhpKLsslGYcYctppNQF ..............shDPh.hLhSllNhKLRD.-.as.oLDcLsssa-lDccpLh...sKLsshGacYssppNQF.... 0 19 37 53 +13899 PF14057 GGGtGRT GGGtGRT protein Eberhardt R re3 Jackhmmer:A7VAK6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 330 amino acids in length and contain many highly conserved residues including a GGGtGRT motif. 27.00 27.00 46.00 46.00 19.80 19.80 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -12.07 0.70 -5.62 11 294 2011-05-05 13:44:32 2011-05-05 14:44:32 1 2 279 0 48 216 16 313.70 78 97.84 CHANGED pFEuh-RRhs+IptsLpphGlsSLE-ApplCps+Glcs.pIV+slQPIAFENAsWAYTLGsAlAlK+GspsAu-AAttIGEGLQAFslPGSVADpRpVGLGHGNLuAMLLpEETcCFAFLAGHESFAAAEGAIGIApsANKVRKpPLRVILNGLGKDAAhIISRINGFTYVcTpaDYhTGELclVcE+saSsGs.RAtV+CYGADDVcEGVAIMp+EsVDVSITGNSTNPTRFQHPVAGTYKKtslEpGKcYFSVASGGGTGRTLHPDNhAAGPASYGhTDTMGRMHuDAQFAGSSSVPAHV-MMGLIGMGNNPMVGATVAlAVAVppA ......................h.FESa-RRIcpIpssLsphGIpo....IEEApplCcstGlDsYphlcphQPICFENAsWAYslGuAIAIKK....GC....c....sAA-AApAIGcGLQAFCIPGSVADpRKVGLGHGNLuAMLLcEETcCFAFLAGHESFAAAEGAItIAcpANKVR..KcPLRVILNGLGKDAApIISRINGFTYVpTpaDYhTGE.....LclVpcpuYSs...G.......RAKVpCYGADDVREGVAIMa+EGVDVSITGNSTNPTRFQHPVAGTYKKEplEpGKcYFSVASGGGTGRTLHPDNMAAGPASYGMTDTMGRMHSDAQFAGSSSVPAHVEMMGhIGMGNNPMVGsTVAlAVuVpEA... 0 26 43 45 +13900 PF14058 PcfK PcfK-like protein Eberhardt R re3 Jackhmmer:A7V2F8 Family The PcfK-like protein family includes the Enterococcus faecalis PcfK protein Swiss:Q82YK9 which is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 137 and 257 amino acids in length. There are two completely conserved residues (D and L) that may be functionally important. 27.00 27.00 33.20 32.90 24.40 23.00 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.85 0.71 -4.04 18 325 2011-05-05 14:36:51 2011-05-05 15:36:51 1 1 168 0 19 230 7 125.50 39 76.36 CHANGED K.uo-pFKcsIpsYLppRApsDpLFAtsht+ssKNl--ClsYIlscVp.....+...oGss..........uhsD-ElauhAl+YYc..EcsIcssK.slpCpV......sVN+h...lphotccKtcs+ppAhcphppEchpKhpp+spts+.....ttscs...psQ.SLF...Dh ......................so-pFpcsIppYLspRAppDtLFA.shh+ssKsI--ClTYIlsplp......c.......uGCs..........GhsDsElFuhAlHYY-..E-cI-lGK.sls.CpV...............sVNHh...lcLTpEEK....scARppAlcphppEphtKhppcpt..t..........t....p.oLF..................... 0 8 17 19 +13901 PF14059 DUF4251 Domain of unknown function (DUF4251) Eberhardt R re3 Jackhmmer:A7V8X1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 164 and 196 amino acids in length. 27.00 27.00 30.40 29.80 26.70 26.00 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.59 0.71 -4.36 50 228 2011-05-05 15:01:04 2011-05-05 16:01:04 1 1 117 2 32 178 4 144.00 26 81.10 CHANGED s+ppp+pp..................ptpplpphlsscpaplcsspshP...................ppGpsh.Ls.ss..hlplpsDolhspLsa..aGpsas.sPh...GpGulsapushpsaphp.pcKKGshplshsspsp..tps.hphslslassu.sAslslsss.s+psl.....oasGplt ................................t.....ttt..................thppstptlcscpahl-sspsh....................hpGpshhlo.ss..hlplps-pshsplsh..FspsYs.sP....GhGulshsushss..hphp.h-KKGs.hp...l.shslpsh...t.s...hphplslhssu.sAolslsss.sppsl.oasGpl.h.......... 0 12 26 32 +13902 PF14060 DUF4252 Domain of unknown function (DUF4252) Eberhardt R re3 Jackhmmer:A7VAG1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 182 amino acids in length. 27.00 27.00 27.90 28.80 24.30 26.30 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.90 0.71 -4.67 82 269 2011-05-05 15:12:56 2011-05-05 16:12:56 1 2 129 0 88 266 42 153.50 18 87.94 CHANGED Kph......hllhh.lhlsshsutu................t.t.phas....capstcshsslslspphhphhsph...........p.cspphtchlpplcslplhs.ts............sp..spsphppphpphhp.....s.sacpLhplp...-.ssppsphhh+tsps....tlpElllhsss.................cp........phsllpltGs.hs.pclsplhpp ........................................h..lhhhh.lhlhsh.hstu................t.p.phFs....ca.pcpcslssVtlsppMh.phhsph.............ps.ph.tplhpplcslplhshpc..............sp........hppphppphpplhp.......s.sappLhphp....-.psppsplhh+tpts....tlpElllhsss.................cs.phsllplpGc.hs.p-ltplht......... 0 34 74 88 +13903 PF14061 Mtf2_C Polycomb-like MTF2 factor 2 Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Domain Mammalian Polycomb-like gene MTF2/PCL2 forms a complex with Polycomb repressive complex-2 (PRC2) and collaborates with PRC1 to achieve repression of Hox gene expression [1]. The human MTF2 gene is expressed in three splicing variants, each of them contains the short C-terminal domain defined here. The domain is subject to structure determination by the Joint Center of Structural Genomics. 27.00 27.00 27.00 29.90 26.90 26.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.60 0.72 -4.07 11 173 2011-05-05 16:17:15 2011-05-05 17:17:15 1 3 65 0 92 157 0 48.60 61 8.25 CHANGED ssshscL+sSlssYFGuA.GRlssGE+apVLARRVTs-GKVQYLVEW-Gsss ......s.pLsHLKsSIosYFGAA..GRlAsGEKYpVLARR.VTs-GK...VQYLVEWEGsT..... 0 12 19 43 +13904 PF14062 DUF4253 Domain of unknown function (DUF4253) Bateman A agb Jackhmmer:C7PV89 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 110 amino acids in length. 27.00 27.00 33.60 33.30 26.50 26.50 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.32 0.71 -4.43 58 204 2011-05-05 16:42:08 2011-05-05 17:42:08 1 17 177 0 57 196 2 108.70 32 25.08 CHANGED shhlshlPspcsa-lhuhls.h.GuhNpsssssphsAlh+tWp-+aGuhlsulshD.pl-hhlsp....PPtstc-AhplAtEpatFCsDhlcQ...s...............ht..olpsLA.ctLhpsphWaFWW.....D ..................................t.hlshlPsppsa-lhuhls..h.GuhNtsssss-hhAlh+hW.-+aGAh.ssls.a.D.pl-h.lsp....Ps.s.-cAhplAhEpYsFC.PDhl-Q...u................................ht..olppLA.c.sLh.psptWaFWWD...... 0 21 46 55 +13905 PF14063 DUF4254 Protein of unknown function (DUF4254) Bateman A agb Jackhmmer:C7P9Y0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 195 and 207 amino acids in length. 27.00 27.00 27.10 27.10 26.80 26.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.64 0.71 -4.59 56 245 2011-05-05 16:45:14 2011-05-05 17:45:14 1 4 231 0 90 244 323 136.60 40 65.79 CHANGED ltpllhppshhcshpWHhEDlhRcsslsstthhphKRpIDt.NQcRsDhVEhlDsahLpthpsltshs..sA..........plNoEoPuhslDRLSILuLKlYHMp-pspRpD..As.tpHhtpCppKLslLhEQ+sDLusulDpLLsDltsGcKhhKV .................................................h.tthlah+shl-ssQWHhEDllRcPplss.tshthKRcIDp.NQ-RoDhVEhIDsahhpthpslpshs.....sA..........plNTESPAhulDRLSILuLKIYHMp-pspRsD...ss...s-HhtpCppKLslLhEQ+tDLupAl-pLLsDltsGcKhhKV...... 0 34 71 87 +13906 PF14064 HmuY HmuY protein Bateman A agb Jackhmmer:C7PU21 Family HmuY is a novel heme-binding protein [1] that recruits heme from host carriers and delivers it to its cognate outer-membrane transporter, the TonB-dependent receptor HmuR.\ This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 278 amino acids in length. 27.00 27.00 30.60 27.10 25.20 26.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.27 0.71 -4.24 83 274 2011-05-05 16:52:52 2011-05-05 17:52:52 1 4 165 4 92 262 99 176.90 20 72.01 CHANGED sstssahYhshcssph.........................................................h.ttpttpsssWDlAFpph...slpsNuGs............su.Gpstshh.hs...........................ssasp...........ssshsssssatt..D............................................................................................t.......hshstshssssh................................................................................suWasashs...........................................................................sshhsspsp.lall+s...s-.GpasKlplhsaYssss..................uahoFcYt .........................................s...spWhYhshpssph........................................................................................t...tpttpsssWDl..AFpph.........pl+sNuGs..........................ts.utstsh....ss.............................sshst.................ssptsssssass...D.........................................................................................................................h.hst....t....hsht.t.shsstth...................................................................................................................................................ssWhshshs...........................................................................sshhssstp..lall+s..........s-..GpYsKlplhsa..hssss..tp..............uhhohpa......................................................................................... 0 37 67 87 +13907 PF14065 DUF4255 Protein of unknown function (DUF4255) Bateman A agb Jackhmmer:C7PSC6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 190 and 320 amino acids in length. 27.00 27.00 30.40 29.90 23.80 23.20 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.97 0.71 -4.83 82 228 2011-05-05 16:56:58 2011-05-05 17:56:58 1 5 188 0 126 237 37 182.90 19 72.26 CHANGED lstlspsLpsh.Lppthtt.............ssspVsh.ssPsp..........ssttsssplslaLaplpcssth+sts...stpsstt.......hhps.slhlsLaaLloAa..........ssshtpshpl...Lupslphhppp.shl...stpshssth.................................................tthp..plplph.s.hsh-plsclWusL.ssp.apPSlsYplshlhlpssthtsts....sVsphs ..................................lttlspsLpph.Lppth.t.............ssspVsh..ssPsp..........sstt.sssplslaLaslpcssth+spsh.......psspst........hppP.PhhlsLpYLloAa.............spsstpptpl........LupslphLtpp.shl...sspslssth.................................................ttht......tl.p.lt.h.s..hshcplsplWsuL.ssp..h+sSlsYplsslhhtssthts.s..Vtp..s............. 0 38 92 115 +13908 PF14066 DUF4256 Protein of unknown function (DUF4256) Bateman A agb Jackhmmer:C7PN24 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 190 amino acids in length. 27.00 27.00 203.50 203.40 22.20 19.10 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.02 0.71 -4.77 44 212 2011-05-05 17:00:08 2011-05-05 18:00:08 1 1 211 0 53 180 13 172.10 67 92.09 CHANGED LLpsLKsRFEcNhpRHpulcWspVps+LcAss-KLWSLp-MEcTGGEPDVVuaDpposcYlFaDCSsESPKGRRSlCYD+cAL-uRKcaK..PcssAl-hAspMGIELLTEEQYRpLQpLGpFDhKTSSWlcTPssIRcLGGAlFsD+RYspVFlYHNGA-SYYAuRGFRGhL...+V .LLclLcsRFEKNMsRHc..GL-WucVpsKLps.s.sEKLWSLsEMEtTGGEPDVVuYDc.cp-EYhFaDCStESPKGRRSLCYDtEALESRKcHK..PcNsAIDhAssMGIELLTEEQYRpLQpLG-FDhKTSSWlpTPs-IRcLGGALFCDhRaG+VFVYHNGA-SYYAARGFRGsLRV.... 0 30 45 49 +13909 PF14067 LssY_C LssY C-terminus Eberhardt R re3 Jackhmmer:Q7UW88 Family This domain is found at the C-terminus of Legionella LssY proteins, which may be a part of the type I secretion system [1]. This domain is functionally uncharacterised. This domain is found in bacteria, and is typically between 182 and 195 amino acids in length. It is often found in association with Pfam:PF09335 and PF01569. There are two completely conserved residues (P and W) that may be functionally important. 25.00 25.00 25.10 27.10 21.70 24.30 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.14 0.71 -5.32 48 217 2011-05-06 08:39:08 2011-05-06 09:39:08 1 5 202 0 75 233 29 174.90 23 33.97 CHANGED hs.pthhhcthPspopshDGh..uDPlNlsll.GspsplpsshtpsGWptscshohpo.hphshuslhcpshstAPVSsLahhGRtQDhAaQp.ssssstp...RaHlRhWpssht..................stssp........shWlGusoaDpGlth.ohhospl.o.HcI-s-lDsERDtlhpslp.tss.httsthhtshsss.....thssss-sahTDGcl ..........p...hp.thphhPspppshsGp..tpPlNlths.Gs.s.tlppthpttGWppssphohps...hlthhhhppshsphPV.shhhpu+spslsht+.sss.ss.p...RphlRhWtssht..................htssp........slWlGuhsh-phh...shhtt.l..h.atht.shD..thshlhttl......................................................................... 0 25 43 59 +13910 PF14068 YuiB Putative membrane protein Bateman A pcc Jackhmmer:O32109 Family This family of bacterial proteins is functionally uncharacterised. Proteins in this family are approximately 100 amino acids in length. There is a conserved FGIGF sequence motif, and many members are putative membrane proteins. 27.00 27.00 43.70 30.40 22.10 21.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.46 0.72 -3.70 21 172 2011-05-09 08:23:49 2011-05-09 09:23:49 1 2 165 0 33 112 0 95.60 60 96.48 CHANGED s.llIShlLFFVLFFGIGFlLNMLLRtTWlMAllYPIVllhIlsp.thhpYhpsPupuFsulhcclhuLthsDllILsuGhsGAllSGhsI+hLR+pGYQMF ..........llIuMhLFFlLFFGIGFLLNMlLRsTWlM.sllYPIVClhIIs+sshhcYFocPpEoFuShGspVupLutADlhILSoGLlGAhlAGllIKpLRKsGYQMF.......... 0 11 24 27 +13911 PF14069 SpoVIF Stage VI sporulation protein F Bateman A, Coggill P pcc Jackhmmer:O31625 Family The sporulation-specific SpoVIF (YjcC) protein of Bacillus subtilis is essential for the development of heat-resistant spores. Its expression is governed by SigK [1,2]. 27.00 27.00 28.30 28.20 23.00 22.70 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.42 0.72 -4.24 18 195 2011-05-09 08:36:16 2011-05-09 09:36:16 1 1 170 0 38 106 0 78.90 49 91.38 CHANGED FcslcKKTu..Vs.p-lhKLAsSlpsANhcDEpsVRplI+pVuplAN+PVoKEpEDcIVpAIhsssh.stDhsoLsKMh...KK .....FsNIEKcsp..VNc-DIFKLAuSVQNANL+DEsslRQLI+pVAhhAs+cVPKEpEDpIVcAIlsssh.PsDFuoLuKMhp..K...... 0 10 26 28 +13912 PF14070 YjfB_motility Putative motility protein Bateman A, Coggill P pcc Jackhmmer:O34438 Family This family of proteins is regulated in B. subtilis by SigD, and is likely to be involved in motility or flagellin production, Proteins in this family are approximately 60 amino acids in length, and contain two highly conserved asparagine residues. 27.00 27.00 27.10 27.00 26.50 26.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.67 0.72 -4.18 64 334 2011-05-09 08:54:15 2011-05-09 09:54:15 1 1 320 0 92 221 9 58.30 30 94.26 CHANGED Iuulus.....uhupsphtpp.luhuVhKpuhDspppsutpllph.......h..........t.ussPslGp.slD..................lps .................huMupupLhps.VshoVLKKuMDsscshh.spLlcs.......h...........ttuspsshGs.slDlhA..................... 0 34 66 78 +13913 PF14071 YlbD_coat Putative coat protein Bateman A, Coggill P pcc Jackhmmer:O34880 Family This is a family of putative bacterial coat proteins. Proteins in this family are approximately 140 amino acids in length. 27.00 27.00 65.00 64.80 26.70 20.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.69 0.71 -4.06 18 144 2011-05-09 09:44:54 2011-05-09 10:44:54 1 1 143 0 20 87 0 127.50 56 91.84 CHANGED ccFKpFV++HPKlIpEVRpGpKTWQ-lYE-WhLhGEDDchWssY+...............tpssspcpc.........pppps-hhsplhshlK+hDssplQpalsphSpuIuulQsllsQFpssspppt.......tptpp.PFSFRKD .pQFKEFVp+HPKhl+EVRuG+KTWQQFYEEWYLLGEEDPIWssYR..................tp.tEscccp-s.+sE.................EEKouDlMGQMLSahKKLDV-QMQcHLANVoSAIGSVQQVlQQFQGs+oQQc......psoSEsNPFhFpKD............................ 0 3 12 14 +13914 PF14072 DndB DNA-sulfur modification-associated Bateman A, Coggill P pcc Jackhmmer:O34448 Family This is family of bacterial proteins likely to be necessary for binding to DNA and recognising the modification sites. Members are found in bacteria, archaea and on viral plasmids, and are typically between 354 and 474 amino acids in length. There is a conserved DGQHR sequence motif. 29.00 29.00 29.00 30.30 28.50 28.90 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.10 0.70 -5.31 64 426 2011-05-09 10:06:35 2011-05-09 11:06:35 1 2 328 0 110 399 165 326.50 18 79.94 CHANGED aPAl+GhQus+..............paYhshhPh+hl...s+lhhhsp.p..phssphR...........................uQR............sLNcsRlscIscYl.lp.........................N.p..sYl.huul.Tusls...s.shthp...h...............ttts..t.plGhLpls..hDu.c....hhlsDGQHRpsAIc.......pALc......csP...p.......Lup-..............sIuVlha....h-tuLc+sQQhFuDlNptuh+sssSls............hhYDcR-.s..hutls+plh.....ppsshap...shs-hccssluppup+LFT.lsslhpusptLh....................tptttsph..cpttphsppFWptl.spphs-...Wp.lhppphss......tphRpchlpupulsLpAlGhhupt..........lhppt.........spphcptlptLpp....lDWs+ss.......W.....ps...+sh..psp.................lsps..psslpLssstlcptlulsLs ...........................................................................................shph.p.s.t.t......ha.hhh.hp.l....clh.hsp.....p......p.s.p.t...............................hQR..........................slsps....+l.p.c..Itc.Yl....p................................................................................stp.......shl...hs.sl.shshs....t..thphp.....h..............................................tssshGhLpls........cs..p.................hhllDGQHRhtAlp.................................puht.................................p........p..........................lsp..........................................plsVh..ha.................hs.shcc.p.pp....h..FhslNppthhsssslh..................................h.hsp.cc............shlsppls.................pts.ht...............shhc...h.t.....p.....s...s........tpp..ptlh........o..hssl.hph..hp...thh.............................................................sttp.tphhp....hhppaaptl..tphhst......hp.......................................p....p.h.h.h....p.s....h.hh.slu.hh.t.........................h.......................................................................................................................t....................................................................................................................................................................................... 0 27 75 97 +13915 PF14073 Cep57_CLD Centrosome localisation domain of Cep57 Coggill P pcc manual Domain The CLD or centrosome localisation domain of Cep57 is found at the N-terminus, and lies approximately between residues 58 and 239. This region lies within the first alpha-helical coiled-coil segment of Cep57, and localises to the centrosome internally to gamma-tubulin, suggesting that it is either on both centrioles or on a centromatrix component. This N-terminal region can also multimerise with the N-terminus of other Cep57 molecules. The C-terminal part, Family Cep57_MT_bd, Pfam:PF06657, is the microtubule-binding region of Cep57. 27.00 27.00 27.40 27.20 26.70 26.50 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.26 0.71 -4.44 8 162 2011-05-09 10:19:16 2011-05-09 11:19:16 1 3 48 0 73 143 0 161.50 53 39.03 CHANGED AllSALKsLQEKIRRLELERsQAccslppLSREusca+csL....................cc-ppp+shtpp-hopp.........pp-lspQLsuAEuRCoLLEKQL-YM++MVcs.s-+E+sslhEpQspLpREppppQhclpupLcKL-lLEpEapRLTsTQusAEcKIppLEcKLpEEEpQRKLhQDKAucLQTuLEsN+lll ...........................Alh.ALKsLQ-KI+RLELERhQAE-sl..ptLS+Es....hp.YK..KsL....................-...p...php.ERp.u+pE..Kp...............pp-ls.QL.uAps+CsLLEKQLEYh+pMlhp.sEtE+shlLEp.Q.spLpRE+...p........p.DQh+.l..pupLEKLDlLEpEh.+LTshQthAEcKhpcLEpKL+EEEppRKhhQcKA.upLQTGLEhs+llh.............. 0 13 18 34 +13916 PF14074 DUF4257 Protein of unknown function (DUF4257) Bateman A pcc Jackhmmer:O34881 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 27.00 27.00 30.20 30.80 26.30 25.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.67 0.72 -4.33 11 133 2011-05-09 10:32:23 2011-05-09 11:32:23 1 1 114 0 10 61 0 80.20 64 70.66 CHANGED plllAslIGGlhGlluH.lpp+G+lphP..........................................Rp.....s+phaalGFltDhhlGhhAulLhV.lhs..cspohhpllhlSIluGlGGEuhLhS ..QWLTAlLlGGITGFVSHLINN..QGKLLLP............................................RR.....LKTFFHhGFLTDIFTGSLAALLGL.VLF..DVTsIKEIIKVSIVTAISGQTFLLH. 0 1 4 5 +13917 PF14075 UBN_AB Ubinuclein conserved middle domain Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family Ubinuclein 1 and 2 (UBN1, UBN2) are members of a histone chaperone complex involved in the formation of a certain type of facultative heterochromatin, called senescence-associated heterochromatin foci (SAHF) [1] [2]. The domain described here is conserved in many eukaryotes such as human, rat, drosophila, and zebra-fish and has been targeted for protein structure determination by the Joint Center for Structural Genomics. 25.00 25.00 25.00 25.00 24.20 24.20 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.48 0.70 -4.66 12 163 2011-05-09 11:56:05 2011-05-09 12:56:05 1 5 79 0 96 141 0 196.10 46 21.34 CHANGED LPpsLsscllpplsslK-hu+.h.chsG+ppFF-sclssLLLclt.phppss..pstRstVapHLEtpLpssK.slh++hKpl+lccpcs+hppsLpKL+cAlscsMPctlssYEhEppphsptpsu..................cpsuEc.P.....ph+hPRKKFpWs-plRpLLhclhpl+hpSatl.t.pRppShE-alpu...aLcpcVhsLWPpGWMphcpLpKEl ................................................LP-.GLPs.Lc++lc-Lp...........AA+hh...-tEG+pKF.Fop-hNslLLDIElQhp..Els....stlRSuVYuHLtuFlPCsK-oLlK....Rh.KK....L...+L...................p.....Q....c.s..RL+EPLpKLK.AlupsMPEQlt+Ypc-CpA+spsKsA.........hpt-.p-.cpp.....................s-.--D-E+ss.......+RlhGPR.....KKFpWsDpl.RpLLCplVclKLtsY-LEt.s+u.......pShED....alKs...........Fh-........s....EVKPLWPK.GW..MQuR.hLaKES................................ 0 28 35 57 +13918 PF14076 DUF4258 Domain of unknown function (DUF4258) Bateman A agb Jackhmmer:Chitinophaga pinensis Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 95 and 124 amino acids in length. 21.60 21.60 21.60 21.60 21.50 21.50 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -9.25 0.72 -3.97 112 331 2011-05-09 12:23:19 2011-05-09 13:23:19 1 5 278 0 128 317 58 71.80 19 55.93 CHANGED hhop.H.Ahpchp..pRp.Ishp-lhpsl..tp.Gclhcphs....sst.tssplhlttstppt............slcllhshsps.....hhhllTlahs ...................hopH.Ahp+hp...cRs..Ishpplhpsl....pp..Gclhct.........pst.tss.phhhthstttp..................lpllhshpps......hhllTsa...................................... 0 56 95 118 +13919 PF14077 WD40_alt Alternative WD40 repeat motif Weichenberger CX pcc Joint Center of Structural Genomics (JCSG) Family WD repeats are short subdomains of about 40 amino acids and fold into 4 antiparallel beta hairpins. This domain here has been detected on the C-terminus of WD repeat-containing protein 18 during target selection by the Joint Center for Structural Genomics. 25.00 25.00 38.30 64.70 24.30 21.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.27 0.72 -4.33 4 42 2011-05-09 12:27:19 2011-05-09 13:27:19 1 5 31 0 22 35 0 47.50 63 11.27 CHANGED huush-KslhGst-pL+lRVuELEEEVRsLRKIN+DLFDFSTRIIT+P ..hsooh-KsVLGsQ-pL+lRVoELE-EV+sLRKINRDLFDFSTRlIT+P... 0 1 3 7 +13920 PF14078 DUF4259 Domain of unknown function (DUF4259) Bateman A agb Jackhmmer:C7PLU1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 118 and 145 amino acids in length. 22.70 22.70 24.10 23.40 22.30 21.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.73 0.71 -3.64 61 249 2011-05-09 12:29:06 2011-05-09 13:29:06 1 6 214 0 67 189 4 121.80 20 66.43 CHANGED GuWuhusF-sDsuhDhls-Ltct.............thhpthhcsshhs..tspchl-s--stsuluuAtllshh..........hsssh.ssthtt.............................ttpssppltphAhphlcplhst...SEhh-LWp...ps..tth-pWhpslp .....GuWuhthF-sDsuhDhlsplp-h.............ttpthlpth...tpthhhs...tttsth-hc-s...t...s...sluhAtlhhth..........t.sssh.sss..h.t.............................htph.ppltphshphlcph.t........tt.hphW.p.........h.t....................................................................... 0 18 51 67 +13921 PF14079 DUF4260 Domain of unknown function (DUF4260) Bateman A agb Jackhmmer:C7PFK4 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 114 and 126 amino acids in length. There is a conserved GLK sequence motif. 27.00 27.00 32.20 32.00 26.40 24.30 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.56 0.71 -4.26 24 197 2011-05-09 12:52:38 2011-05-09 13:52:38 1 1 193 0 65 166 5 108.60 44 92.15 CHANGED sctllRlEGhslhssulhhYuhhs..huWhlahlLlLsPDLSMlGYLhGsRlGAhsYNhsHoahhPllLlslGlhhssshs........htlulIWlAHIGhDRhLGYGLKYssuFccTHLGRl ......................+tll+hEshslh.lhslhhYs..h..hp..aS.W...hlFhlhlLsPDLSML.....uY.hhss+lGAh......lYNlhHoYllslllsllGlhhp..shl........lhlu.LI..WhAHIGhDRhhGYGLKYp.ssFcpTHlt+l.......... 0 20 46 55 +13922 PF14080 DUF4261 Domain of unknown function (DUF4261) Bateman A agb Jackhmmer:C7PC06 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 80 amino acids in length. 27.00 27.00 27.30 27.30 26.60 26.50 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.50 0.72 -4.17 52 353 2011-05-09 12:56:15 2011-05-09 13:56:15 1 16 297 0 40 258 2 77.10 31 21.46 CHANGED hThGhpsFsh.-lphh...pshcsp-lhpaLhslutYllppsss.lp-G-TI.Ghss......ppphphptppu.....lts..sp.s.LcIp .........YThGhcsFsc.ElElps...tsh-sp-lh..hlhslshYlLppDss.LcDG-Tl.thp-.........spphphpcspu.....lts..ppts.LcI............................ 0 21 29 34 +13923 PF14081 DUF4262 Domain of unknown function (DUF4262) Bateman A agb Jackhmmer:Q50763 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 147 and 227 amino acids in length. Swiss:Q50763 is incorrectly annotated as the KatG protein. 27.00 27.00 30.00 29.60 26.90 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.75 0.71 -4.01 54 189 2011-05-09 13:06:11 2011-05-09 14:06:11 1 2 171 0 54 153 10 117.50 29 65.65 CHANGED pltcpGhslhtVhscppsss...........aua.olGh.pphshPEl.llhGL.shphspshls...plsptht.tsc...h......Gtthpph.......tsh.shhh.lt...ht..hhh.u..hat.............h.slQhlasDtpGhh.......PWc.shs ......................lhpaGWslhtV.s-cps...............auYTlGL.sphshPEL.llhGL.sschupplLNthucc.hh.sGch...hssGhphsth.........ts....lthhpVspschc...hhhA.hhhat...........sph..sLQllWsDpcGpa.......PWpssh......................................... 0 17 40 52 +13924 PF14082 DUF4263 Domain of unknown function (DUF4263) Eberhardt R re3 Jackhmmer:Q7UE31 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 244 and 403 amino acids in length. 27.00 27.00 27.70 30.50 26.80 26.10 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.04 0.71 -4.29 61 136 2012-10-11 20:44:47 2011-05-09 14:08:03 1 3 127 0 43 141 11 162.50 19 47.31 CHANGED Lpss...t............sEppaQphlcps.shlh...shtasthh...........hh.schp...hssphhsDFlhhstssshh.......llEIKpPs..pslhsp..........ssh...t.up-lspAlsQlhcaht.hlppptsphptptptt...........................shps+slllhGcp.........phhsppppcshchhppph...t.slcIlT...aD-Ll ........................h.....t...sEpphQpahcps..shlh......uhtast.h...........hhh...schs.......hssshhsDalhhstssshh.......llElKpPs...spl.hpp...........ssh..........t.ut-lspAlsQlhsaht.hlppphsthptp.pht..............................shpscshllhGpp.........p.hsppp.pcsh.chhppph....t.plpllTaDpLl.............................. 0 15 31 39 +13925 PF14083 PGDYG PGDYG protein Eberhardt R re3 Jackhmmer:Q7UEH4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. There is a conserved PGDYG motif. 27.00 27.00 27.40 30.30 26.90 26.70 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.59 0.72 -3.65 6 78 2011-05-09 13:16:29 2011-05-09 14:16:29 1 1 76 0 24 63 18 101.70 77 69.33 CHANGED GP...............NRYtPGDAllTGuTGDRWVVSR-RFDs+Y.shsPs.sHGcsGAYRN+P.lsVLA+cMscsFsIARSs.GGDVL+GssGDWlMQYA........PGDYGllppARFupVYR .................GPNRYssGDALlTGSTGDRWVVSR-RFDAKYlPsssuhAHGpPGAYRNRP.AVVLA+RMDtPFoIARSA.uGDsLRGsAGDWVMQYA........PGDYGVVQApRFAQVYR. 0 3 7 15 +13926 PF14084 DUF4264 Protein of unknown function (DUF4264) Bateman A pcc Jackhmmer:P54395 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 28.70 28.60 19.80 18.30 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.87 0.72 -8.31 0.72 -4.59 25 183 2011-05-09 13:37:37 2011-05-09 14:37:37 1 2 183 0 47 86 0 50.80 62 89.49 CHANGED KlElluohchppscDhYKlVDFLN+TLK-cslhFGLohspc.....cschlhoIYcT .KIElLuThclchosDLYKIVDsLNRTLK-pcLMFGLALDEc....c+cpAVFTIYRT. 0 22 36 40 +13927 PF14085 DUF4265 Domain of unknown function (DUF4265) Bateman A agb Jackhmmer:Chitinophaga pinensis Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 139 and 168 amino acids in length. 27.00 27.00 28.00 34.10 25.40 25.30 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.53 0.71 -10.13 0.71 -4.35 42 117 2011-05-09 13:38:35 2011-05-09 14:38:35 1 2 102 0 45 106 14 114.70 24 73.85 CHANGED sEslauc.hls..ssp.aclpssPaashulAhGDllpsppss..tthhhpch.ttuGssTlRlhh.pss.........hpplhsplpthGssh-shpt..shlulslPspsshpslpphLsp.hpcpshlpa .........Etlaup.hhs..ssp..atlpslPaas.slAhsDllphcpcc..thhhhpcllpsSGNsTlRllhhspt........hpplhsp.lp.p.hGsphEt.st..shlulslPspsshpslctlLsp.hc-pshhta............ 0 18 22 33 +13928 PF14086 DUF4266 Domain of unknown function (DUF4266) Bateman A agb Jackhmmer:C7PCX1 Domain This presumed lipoprotein domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 50 amino acids in length. 27.00 27.00 39.50 39.50 25.10 25.00 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.47 0.72 -3.45 47 129 2011-05-09 13:43:10 2011-05-09 14:43:10 1 1 108 \N 61 131 37 49.90 44 58.12 CHANGED VpPaE+shLAcs-Msh.spcshcsshspHlY.u+Euu.uGGtGhuGGGCGCN .VcPaE+spLAcs-MsL.spcshctthspHlY.s+Euu.uGupGhuGGGCGCN. 0 21 45 55 +13929 PF14087 DUF4267 Domain of unknown function (DUF4267) Bateman A agb Jackhmmer:C7PBL0 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 126 and 142 amino acids in length. 23.00 23.00 23.50 25.10 22.60 22.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.68 0.71 -10.30 0.71 -4.48 58 194 2011-05-09 13:46:48 2011-05-09 14:46:48 1 5 140 0 133 197 3 111.20 22 73.37 CHANGED hushhlshGlthllp.PppAhshFGl.......Pts........................pptssuh....hhlhGsRDlshGlslhshhhh...............ust+ulGhhl...................lusuhlsluDuhlshp...........tGssttsh...Hhs.....husllsshGhllh ..............................uhhhlhhGlth.lhs.PttuhttFGl.....Ptt......................................................................pssspuh....htlhGsRDlshGlhlhshhht...............uph.cslGhhl...................lssuhlsluDshlshp..............tus.ttts.hh..Hhs.....suslhhshuhhh.h............................ 0 31 77 114 +13930 PF14088 DUF4268 Domain of unknown function (DUF4268) Bateman A agb Jackhmmer:C7PAS6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 151 and 387 amino acids in length. 27.00 27.00 27.30 28.50 24.90 26.70 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.68 0.71 -4.52 63 189 2011-05-09 13:50:04 2011-05-09 14:50:04 1 8 179 0 68 194 80 136.50 22 45.35 CHANGED pl+p.cFWpphhcphpt...............tshps.scpWlshso.Gl.puhshphhh........spc..cspVpl.lscss..tt.scthF-pLhppKstIE...sphGt.cLpWpc..hs-..+ctsRIh.hhtpsss...lhscspWschhpahscphpph-phatthlpt ....l+pcFWpphhcthttp.......h....t.hpstscpWlsh.ss....Gl.pshphphhh..........spc....pspVpl.Iscss.....ppthF-pLhp.hKstlE...pphut...sl.Wcc....hss.......+csuRIh.hh.t.psls...ht.scssWsphhcahhcphhphcphatth...h............... 0 25 53 62 +13931 PF14089 KbaA KinB-signalling pathway activation in sporulation Bateman A, Coggill P pcc Jackhmmer:P16449 Family This family of small proteins is found in the membrane and is necessary for kinase KinB signalling during sporulation. There is a conserved GFF sequence motif. The initiation of sporulation in Bacillus subtilis is dependent on the phosphorylation of the Spo0A transcription factor mediated by the phospho-relay and by two major kinases, KinA and KinB. 27.00 27.00 102.00 101.70 21.10 20.60 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.26 0.71 -4.73 22 164 2011-05-09 13:52:14 2011-05-09 14:52:14 1 1 164 0 30 125 0 176.80 61 88.75 CHANGED GulsssIlGhllcaspa..hhhsh-hspllusllahlGlGhh.aSlISQMGFFAYLTlHRFGLuIFRShpLWNtVQllLIhFllFDLlYhRahsFuppspuhhtYlhlslllLlhullVAalKsKpT.NKsAFlPALFFMlVlTslEWhPALpls-pp...aLahhlhPLLsCNAaQLLhLH+lp ................................GGloTsIlGFlLcW-c.YsphFtsF.-stEIluV..FWlhGVGFIFSVISQMGFFAYLTlHRFGLGhFRSuS.LWNhVQLFhIAFVLFDhVYLR.lhh...AsucsSlus.ILlAshLhhFGsIVAYlKoKpT.NKKAFVPALFFMVVVTlLEWVPALRINDsD...WLYLMlIPLLlCNAYQLLlLHRL.l...... 0 10 21 24 +13932 PF14090 HTH_39 Helix-turn-helix domain Bateman A agb Jackhmmer:C7PBH6 Domain This helix-turn-helix domain is often found in phage proteins and is likely to be DNA-binding. 21.30 21.30 21.30 21.50 21.20 21.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.17 0.72 -4.50 51 254 2012-10-04 14:01:12 2011-05-09 14:58:04 1 4 222 0 46 207 80 65.50 28 53.69 CHANGED sQptclLstLpptss.lTshEAhpphslhc..huuRIp-....LR.ppGa..pIpTph....sppGp.c.+lupY...sLhss ...............ppppllttLppstt..lTsh-uh.pphushc..lu..AcIhc....LR.ppGa....sIhTpp....sphstsptpVstYhL.............................. 0 8 24 37 +13933 PF14091 DUF4269 Domain of unknown function (DUF4269) Bateman A agb Jackhmmer:C7PAK4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 176 and 187 amino acids in length. There is a conserved KTE sequence motif. 27.00 27.00 28.80 27.30 25.00 24.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.69 0.71 -4.27 29 134 2011-05-09 14:02:34 2011-05-09 15:02:34 1 3 131 0 33 105 1 145.80 57 81.68 CHANGED acslpchplhctLtsa..sPlLsGTlPlsIDlssSDLDIlCpsp......D.hptFpptlpshauphps.FphcptpI.psh.sllssFphpsathElFuQshPsppQtuYRHMhlEp+LLphttssh+pclhpLK.cpGlKTEPAFuchLul.s....G.DPYtsLLph ........................Y-VLscLsIMEcLAlY..sPVLsGTIPIcIDs.pSDLDIlhEVp......N...aDsFEQch+SLYGoa.cG.FpIK+.KcI.+ssESIp...........VNFcaEGF-FELFAQP+PV+sQNAYRHMlVEHhLLhp+.P+lREEIl+LK.EpGLKTEPAFAQlLsI.s.....G.DPYEtLlh.L.. 0 16 28 29 +13934 PF14092 DUF4270 Domain of unknown function (DUF4270) Bateman A agb Jackhmmer:C7PAW4 Family This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 444 and 534 amino acids in length. 27.00 27.00 61.00 60.80 21.00 20.00 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -12.91 0.70 -5.86 72 233 2011-05-09 14:05:54 2011-05-09 15:05:54 1 2 191 0 63 235 345 458.80 20 93.05 CHANGED lGtsllss.s.shssphsshshsspohht.....s.......uVhops......shs...hlGphsDs....aGph.pusahsQlshs....ss..sa.ss.sp.........................................lDSlhlhl.Ysu.................................................................................aG.Dohss...h+lslaclsp...l.................sptaYoshss....ps.........thlush...............sassss...................................shssplplpLsps.............aspplh...ppsp............h.hpsspsFh...shh+Glalpsst.......usGshhhls........ssplplaYphpspsstst................s.....hshss.........hphsphpss.ss...lsplht..pp............sYLKussGhhsplsl.......................slppltp.........p............tp.slNsApLshhh.ss......st..................hshshPpplhLhptcpt..............pshhppsth.sst..ssahuhhp.....................ttpsspYsFs.hs.t.lpshltt.....................pt................ts.chshhlhlsltspss...............tt...........hht......stsshLhGspss............................ppl+LplhYoph ................................lGtslhss.s..phss.phss.hs.ssspohht...s.............ulho..ps..............sss......hlGph.sDs....aGph.pusahsQh.pss....ssh.sF.sttst..........................................lDSlplhlhYsu....................................................................................aG...Dohss...h+lslacLsc..sl.....................tpsptaYoshs..s.hps..........phlust................................sassss.t............................t...................shssplclpLscp.............aGpplh........pttp............t.hpssppFh...phh+Glalpsst.......usGslhhls.........ssplplaY+hptp..sssspst..............hs........hshss.................hphNphpss.ss...lpplhstss................psYL+ussGhhsplsl.......................slsplhp...p........................ppsslNsApLplhh.ss........shp.................hshshPpplhlhptcpt......................pshhppsphssst....ssahuhhp......................spsspYsFs.hst.lpshlps................................pt..................ts.chs.hhlhlslphpts..............................t.ht...................sttshLhGssts............................ppl+lplhYop............................................... 0 23 55 63 +13935 PF14093 DUF4271 Domain of unknown function (DUF4271) Bateman A agb Jackhmmer:C7PG02 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 221 and 326 amino acids in length. 24.20 24.20 24.30 27.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.82 0.70 -4.65 53 191 2011-05-09 14:10:02 2011-05-09 15:10:02 1 1 190 0 53 188 97 205.80 23 75.26 CHANGED alshlllhshhl....lshs+thhhpphp..sFhhhhhppphhhtpsspph.....haphhhhl.sslhhulhh.....ahhhppht.thhhsh............hhhhshh.hshhhhahlhKhllhphluhlFh.cchhptahhphhphhshh.ul.lLhPlsllhsYhs........hshthhhhhhlhlhllshllhlhpthplhhpphht.hahILYlCALEIhPhllL.aphlh ........................lshlLlssFhl..hhhshs+phh.pphp....sFhhht.tpsphhtspssp-h.....tapl.hLhltsslhhulhh.....ahhhpthtsthhtps............hhhlshh.hshhhhahlhKhllhphluhlFhscpt.sp.ahtshhtlhhhhGh..hLaPhsllllYhs........hslphhh.hhhlhlh..lh.spllhhh.+thplahpphhs.hhhILYhCuLEIhPhllL.aphl..... 0 22 47 53 +13936 PF14094 DUF4272 Domain of unknown function (DUF4272) Bateman A agb Jackhmmer:C7PHI5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 221 and 399 amino acids in length. 27.00 27.00 29.30 28.80 20.50 20.10 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.53 0.70 -5.33 39 169 2011-05-09 14:13:04 2011-05-09 15:13:04 1 2 163 0 51 163 7 198.20 29 67.51 CHANGED R+t+ohphLppp.Glshh.ppLPsl..sps-sphRstcElspRAlAlhhlshhAps............tpstphhh.phl.cpasl.hphLTspE+pal...sspss...........cpthhphsW+YEuhasLhWALGhl-..pLshPsplC...Dsshshphhtph.ts.hsphhpp..splRshsEILDttDLhYRh...cWAsVcARl..pstsssu.....slstsVVhER+hALsWLls.............ht...sps.........WDcl .............R+tpshphLppp.Gl.h...tpLP.l..stppsphRstc-lspRslsLhhlh.tAttl.................tpspp.hh..phl.ppasl.hphLTspEpthlt........ssps.c................tpthhphsW...+Y.EuhhsLhWuLGllc..pLshPschC...........Dsthhh.p.hhtph..t.....p.hpphhpp..sphRshpElL-ttDhhaRh...cWAshcAch.....pspsssu...........sls.ulVhER+huLsWLls.............h.stt.........WDpl...................................... 0 21 36 43 +13938 PF14096 DUF4274 Domain of unknown function (DUF4274) Bateman A agb Jackhmmer:C7PQU5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 80 amino acids in length. 27.00 27.00 31.30 34.00 23.40 24.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.52 0.72 -4.18 39 134 2011-05-09 14:22:07 2011-05-09 15:22:07 1 2 114 0 32 115 6 78.00 25 44.42 CHANGED hsoscplahhstpaN......aD.s...shplhphllppspCDhuTALhlaWhh.......sstha.hp...........ptptsphhp-t.hphlpplhc+hhs ........soscpLahlsssaN......WD.s....uhcl.ptIlcpspCDhuTALhhFahs.......suhha.hp.t.........tt.sssh.p-h.hphlppltc+hh.t.................... 0 10 16 22 +13939 PF14097 SpoVAE Stage V sporulation protein AE1 Bateman A, Coggill P pcc Jackhmmer:P40870 Family Members of this family are all described as putative stage V sporulation protein AE, although this could not be confirmed. Proteins in this family are approximately 190 amino acids in length. 27.00 27.00 28.00 199.60 21.80 19.70 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.16 0.71 -4.88 23 159 2011-05-09 14:32:09 2011-05-09 15:32:09 1 1 159 0 34 84 0 178.80 67 93.72 CHANGED +VILVTDGDphA++slEhsA+plGGRCIStSuGNPohLoGpcLVcLIhpsspDPVlVMFDDsGhhGcGsGEpAhcaVssHssI-VLGslAVASsTcts-hs+VDlSIDRpGcls-huVDKpGhs-hc..t+lpGDTV.sLcpL.slPlIVGIGDIGKMstpDchc+GuPITp+AlchILE....RS ...lhVTDGDEYAKRTIEllsK-hGGRCIStSpuNPT+LTGcclVELIhQTPYDPVFVMFDDSGalGEGuGEcALKYVATHcpI-VLGlLAVASNTHphEWsRVDVSVDRsGsLTEYGVDKaGlP-sE..lGRIsGDTlYCLDcL.sVPVIVGlGDIGKMsGsD-a-+GSPIT+KAIpLILERS...... 0 16 26 29 +13940 PF14098 SSPI Small, acid-soluble spore protein I Bateman A, Coggill P pcc Jackhmmer:P94537 Family This family of proteins is putatively assigned as a small, acid-soluble spore protein 1. Proteins in this family are approximately 70 amino acids in length. There is a conserved LPGLGV sequence motif. 27.00 27.00 36.30 49.20 19.60 17.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.08 0.72 -4.05 20 184 2011-05-09 14:51:40 2011-05-09 15:51:40 1 1 184 0 35 94 0 64.70 59 90.65 CHANGED hNLRpAllsNlpssop-pLccTIsDAIpsGEEKhLPGLGVLFEshWcpussppKpphlpsLcpuL .hNLRsAVlANVoGNoQ-QLp-TIVDAIQSGEEKMLPGLGVLFEVIWcsAsEsEKcEMLcTLEpGL.. 0 9 22 26 +13941 PF14099 Polysacc_lyase Polysaccharide lyase Eberhardt R re3 Jackhmmer:Q7UP23 Family This family includes heparin lyase I, EC:4.2.2.7.\ \ Heparin lyase I depolymerises heparin by cleaving the glycosidic linkage next to an iduronic acid moiety [1,2]. The structure of heparin lyase I consists of a beta-jelly roll domain with a long, deep substrate-binding groove and an unusual thumb domain containing many basic residues extending from the main body of the enzyme [2]. This family also includes glucuronan lyase, EC:4.2.2.14 [3]. The structure glucuronan lyase is a beta-jelly roll [4]. 29.90 29.90 29.90 30.30 29.80 29.80 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.68 0.70 -4.69 54 240 2011-05-09 14:57:12 2011-05-09 15:57:12 1 21 167 6 118 254 933 233.50 15 63.76 CHANGED tshshpushpssshsphpp..ttsstps..........................sssshpGp.................hAl+hphp......hsss.st.......................ut.RuEl.....ptsshphusphaYsauh...hlspsas................ss.p..hlsQa+tps....s...................................................tpPshtlhl.......pssphthphtsss...........................ts.tttphhshsslpt..GpWpchslcs+Wuss........psGhhclWh..........sG..phlhppps.ssthss.................pt......ahKhGlYpsshppsss............laaDplth ................................................................................................................t............................................................t....s...................shp...........tss.......................................sp.RsEl...........tt.th..thGpshhYsauh...hlss.s.at................ss..ptthlsQa+sts...........................................................................stPs.htlth.......pssphhhphpsss........................................................tstphshhhshs..s...hst.....upWhchhlphcasst...................psGhhclah..........sG....ptl.hp...tps...tshhss.................spsh..hhKhGlY+ss.ps........................h......................................... 0 41 77 100 +13942 PF14100 PmoA Methane oxygenase PmoA Eberhardt R re3 Jackhmmer:Q7UPP4 Family This family is a putative methane oxygenase [1] 27.00 27.00 39.80 38.00 21.90 21.00 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.82 0.70 -5.19 65 174 2011-05-09 15:14:35 2011-05-09 16:14:35 1 6 100 0 95 187 116 281.70 25 73.69 CHANGED sspslsl...thsG.p......lspYhas........ht......ss....+PalHPlpoh.uGsslTch...........pPtDHsHHpGlhhuhscVs....s....hsFWsspshh................tp.Gphtapshpthps....ssstupl...spclsW.hst......sGph.lLpEpRslshpss....st..............shhl-asssLs.....................sssp.slshs...ss............tY.GGhthRsspsh............................ssuplhsutG............tpGps..........shGpp....us.........WlshsGph.......ssp...............tsslshhscPsN.......thP......s.WasR...shuhhusssssth.........ptshsls...sGcslph+YRlllt-Gsh.sssc.lsshhppa ........................s..thsl.thsG.p.lhpYhhts.........hpss.....+PalHPlpTh.uGss.lTch............pPsDH.....hHHhGlhhuhscVs........GhsFWsspshh...................pp.Gphpppshpthts....ststsph...spplsW..hst......sGp....lLpEpRslshpsh....sst..................sahlDhshsLs.....................sssp.slphs.ss............sY.GGhhhRsscph............................ssuplhsutG...................tpucp...........shGpp....us.........WlshsGph.....................sst...............ts.olshhspPsN.......ths......s.WalR....sts.hhusssuhsh.........ptphsls...sGcslph+aclllt-Gth..stsphsthhpt.................................... 0 48 79 95 +13943 PF14101 DUF4275 Domain of unknown function (DUF4275) Eberhardt R re3 Jackhmmer:Q7UUJ6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 140 amino acids in length. 27.00 27.00 31.60 31.50 20.20 18.50 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.03 0.71 -4.11 13 119 2011-05-09 15:22:53 2011-05-09 16:22:53 1 1 112 0 16 94 1 132.80 57 95.41 CHANGED ME....lhchLcp..+slKlpEhspWGshhRKpWEcpFAsHLohcEKcpIhlhsscshs..GYLWHlFSY-p+...sCLcGcEAcpAFcpcpKssCYlFaQasD-slllEcAspLpAsDL.s.......Eh......DlYVVDc-FsWTaVhTHEscahGPYFs ......................ME.hl-hL++..KshKVREhpp.WGsYFRKRWEDpFAN.HlScEEKE-IaLYsDchsC..GYLWHIFSYE+K...KCLEGcEAEpAF+sEsK+-CYIFYQHsD-VLl.lcDAShLph-Dllp.......Essp...ha+uDlYIVDK-FTWTFVKTHEccW.CGPYFs......... 0 6 10 12 +13944 PF14102 Caps_synth_CapC Capsule biosynthesis CapC Eberhardt R re3 Jackhmmer:Q7UXU8 Family This family of proteins play a role in capsule biosynthesis. They are essential for gamma-polyglutamic acid (PGA) production [1]. 27.00 27.00 35.60 29.30 24.20 26.90 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.53 0.71 -3.99 31 208 2011-05-10 07:29:10 2011-05-10 08:29:10 1 2 193 0 50 120 38 120.40 43 66.61 CHANGED slGlllSLlasEpsGlssGGllVPGYlALhh.spPhplhlslhhSllTahll.phlu+ahl.......lYGRR+Fshhlllull........lphlhphh..h......................h..phpslGhIlPGLIAsphp+QGlhhTls ...............lGllLSLlFsE+hGlsPuGLVVPGYL.ALhh.spPlhllsllllSlLTYhIV.pslS+ahI.......LYGRRKFAAhllsGhl........L+hlhchl..h...............l.ssFths..-hpuIGlIlPGLIANoI.p+QGlshTl.h........... 0 18 37 47 +13945 PF14103 DUF4276 Domain of unknown function (DUF4276) Eberhardt R re3 Jackhmmer:Q7UYP7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 190 and 224 amino acids in length. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 27.40 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.29 0.71 -4.45 56 288 2011-05-10 07:40:08 2011-05-10 08:40:08 1 2 220 0 123 324 17 189.50 16 90.84 CHANGED llEtsotpphLcslL.+hh........thphphhshthc.slppphscph..+sapphh..........stllllhDpDs.ssstphppp......p.htp.t..tpt..............s+lsspEhEuWaLuD.hpAl..pphhsph.sphsh.......tt.pKhpsP-sl.susppt................tspYp......Ksptuptlustl......shspsp.S.SFpp.....hlpulpph ....................................................................................................hhEs.o.ppthlpt.ll..hh..........................th.h.hh.h.ht.pht.tt....hs..t.th...............pthpp.h.t.....................shlhshhD.....h.s...hss....st...ttph................htt...........t...h.............s...t.......phhshlt....hcEhEuWhhuD.h.p.u.l..tphhsp......tphth...................t.htpht.sP-plpsu..sppt......................thhstYp.............Ksh..tu...tlstpl.....s.phhppp....sspFptalptlt................................................................................................... 0 44 88 113 +13946 PF14104 DUF4277 Domain of unknown function (DUF4277) Eberhardt R re3 Jackhmmer:A8ZKM0 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 110 amino acids in length. There is a conserved NGLGF sequence motif. 25.40 25.40 25.40 25.60 24.70 24.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.27 0.71 -3.89 23 342 2011-05-10 07:59:00 2011-05-10 08:59:00 1 2 80 0 129 362 22 108.60 39 26.46 CHANGED Mp.........p.pl...+sLDHLGlVAGlhDElslschIDchLspcppc+.lSpGpslKAMILNGLGFsu+sLYLFPpFFpsKslE+LlGpGlpA-aLNDDtLGRsLDcLYchGsoplFtplAlp .........................................................thpl.pslDHLGlVAullDclGlsclIsph.lsh-..s.p.c.p.losGpsVKAlllNGLGFss..psLYlaspFFpshshE+LlGps..lpPcaLNDDtlGRshDcLYchu.lsplF..lsh.p............. 0 43 106 116 +13947 PF14105 DUF4278 Domain of unknown function (DUF4278) Eberhardt R re3 Jackhmmer:A8ZNS8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 58 and 136 amino acids in length. There is a single completely conserved residue R that may be functionally important. 25.30 25.30 25.40 25.30 24.70 25.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.58 0.72 -4.02 53 145 2011-05-10 08:06:48 2011-05-10 09:06:48 1 2 71 0 65 166 197 56.30 30 62.60 CHANGED M....cL.....sYRGlsY-.hsssslp...............sspsp............hsupYRGhsa.......hpphsh.psp...hsLp.YRGVsY ......................M..pLsYRGlsYs..s.sssp...............htpsp............lphpYRGhsY......phpphppht.s...t.hp.Lp.YRGlsY.................................................. 0 5 47 58 +13948 PF14106 DUF4279 Domain of unknown function (DUF4279) Eberhardt R re3 Jackhmmer:B0C1A4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 134 and 145 amino acids in length. 25.00 25.00 25.60 25.40 24.70 23.40 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.23 0.71 -4.22 48 184 2011-05-10 08:13:26 2011-05-10 09:13:26 1 1 166 0 42 156 3 116.00 23 83.66 CHANGED lhu-shs......s.-plTphlulpPopsht+Gshhpss...p......hhtphssWtlsoptttpts...lp..cplcpLlppLp....sttstlppltpphsh....p......thhshh.shssspsss....................lsschlphlsslGsplsl .............................lhG-.hss.-tlTphLslpPTcshtKGchhtppp...........hhpppssWpls...othppshc.....lp..splphllcpLp......scpptlpclppcasl...p.........hlhhlhhp.hpssp.sPs....................lspchlphhuslsu-lch......................................................... 0 13 31 35 +13949 PF14107 DUF4280 Domain of unknown function (DUF4280) Eberhardt R re3 Jackhmmer:B0C1L7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 129 and 456 amino acids in length. There is a single completely conserved residue C that may be functionally important. 25.00 25.00 25.30 25.50 24.50 24.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.92 0.72 -10.90 0.72 -3.74 69 265 2011-05-10 08:22:45 2011-05-10 09:22:45 1 18 148 0 84 252 15 112.10 27 38.96 CHANGED VssGAhlpC.shGsss...utLpVhs....ps.plhspu.....p.hAohtDhhsh.....hNlhsFG.........hCpsh..sssss..................ChPs......ss.Wt...st.sslhl...........suts.sLpssSphhCsa.G..Gh..Iplhss......GQt .................................lstGAhlpC.shGsss...shLtlhs.....tp.ts.hhss.....................tshushtDphsh.....hNI..sFG...............................hCps...ssssht...............................................ChPs...hs.Wh..sst.ssshl...........ssts.sLspsSphhCsh..G...Gh...IphhssGQ................... 0 24 63 73 +13950 PF14108 DUF4281 Domain of unknown function (DUF4281) Eberhardt R re3 Jackhmmer:B0CBE6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 147 and 232 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important. 27.00 27.00 32.80 32.40 23.50 22.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.70 0.71 -4.04 70 170 2011-05-10 08:58:51 2011-05-10 09:58:51 1 2 128 0 92 165 946 126.70 30 64.30 CHANGED stlFslushhslPhWhLhl........hhP...php.......hTp.plhpohh..shlhLu....hlYhhlhhsuhss...ssssh...........................hssL.s..ultplF...............usptsshsuWl.HaLshDLFVGpWlhh-upcpu.l......hhh.sLlLshhhGP.lG.LLsa ......................t..lFshushhslPhWhLMl........hhP...php.......................hTc..clhpShh..shlsLu....hlYshLlhhuhsstshthh..........................hssLs.ulsphF...............usptssssuWl.HhLshDLFlGRWlah-utc..ps......l...........hshhsLhLshhhGP.lGLLsa........................ 0 45 80 89 +13951 PF14109 GldH_lipo GldH lipoprotein Bateman A agb Jackhmmer:C7PII4 Family Members of this protein family are predicted lipoproteins, exclusive to the Bacteroidetes phylum. Proteins in this family are typically between 155 and 167 amino acids in length. Members include GldH, a protein linked to a type of rapid surface gliding motility found in certain Bacteroidetes, such as Flavobacterium johnsoniae and Cytophaga hutchinsonii [1]. Gliding motility appears closely linked to chitin utilization in the model species Flavobacterium johnsoniae. Not all Bacteroidetes with members of this protein family may have gliding motility. 27.00 27.00 27.30 27.60 26.00 22.50 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.38 0.71 -3.93 60 194 2011-05-10 08:59:56 2011-05-10 09:59:56 1 2 190 0 54 193 141 132.50 28 81.38 CHANGED psplac.papsls..s..uWp+scslpFpl.shtDss.s.YslhlslRssssYPapNLaLllphphs..p.uph.....hsDTlphpluc.ss....GphhG.pGhus.lhppph.h....pshpFs.csGpYplplppsMRcs.......sLpGIs-VGl+l ......t.ptlYc.pYpsls..s...uWp+s-s..lpF.sl..shp..D.sh.ssYplhlslRssssYPY..pNLaLhlp.hphs....ssph......hsDTlphpLs-..ps....GphhG.pGhus.lhppph.h....psh.ph..csGsYplplppsM+cp........LpGIsDlGl+l.... 0 23 48 54 +13952 PF14110 DUF4282 Domain of unknown function (DUF4282) Eberhardt R re3 Jackhmmer:B0CFB0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 93 and 155 amino acids in length. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 28.20 27.80 26.90 26.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.99 0.72 -3.79 54 193 2011-05-10 09:26:58 2011-05-10 10:26:58 1 3 179 0 82 176 29 83.40 25 68.64 CHANGED h.hsFccalTspllphlYhlulllhslhslssl.....huuh..............................s.hhuhhtllh.uhls...hlhtllhsRlhhEhhlshh+ls-slppltcpt ...........hpF-palTPpllphlYhlsllhlslhuls..sh........hsuh.......................................hhhhl...h.uhlh....hllshlhsRlhhEhllslF+ls-sLpcItcp.s..... 0 26 54 70 +13953 PF14111 DUF4283 Domain of unknown function (DUF4283) Coggill P pcc Jackhmmer:A0MDU5 Domain This domain family is found in plants, and is approximately 100 amino acids in length. Considering the very diverse range of other domains it is associated with it is possible that this domain is a binding/guiding region. There are two highly conserved tryptophan residues. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.61 0.71 -5.13 121 920 2011-05-10 09:46:33 2011-05-10 10:46:33 1 139 22 0 264 872 0 122.60 17 16.68 CHANGED lplsccth.......hltp....sLl....G+a.......hs.p.......lps.ltphhtptWtLpu...plpltpl..s...p...s......hhlhcFcptt-hc+VLppGshthps.hhlhLc+W.s...spssshp.pthpph.lWVRlhsLPlphasp.phhcplGsthGt.hlplD.pstphpphp..as.....Rlh.Vc ....................................................................h.......................................................h....h.t....a.t.....t...ththh.h...t...t......s......hhhhpFpt.t-h.pplht......t...us..h.h.h..p.s.....hhl..hlp..pW.s.........t.s.....t.....t.ph.h..hWlcl..h..slP.hthhst.phhpt.lu.pthGt.hhtlD.t.s..t....h....hp....hh+lhl........................................ 0 25 157 230 +13954 PF14112 DUF4284 Domain of unknown function (DUF4284) Bateman A agb Jackhmmer:C7PJ13 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 124 and 142 amino acids in length. 27.00 27.00 29.30 28.00 25.40 26.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.93 0.71 -3.64 22 158 2011-05-10 12:25:39 2011-05-10 13:25:39 1 2 101 0 24 145 0 117.30 28 81.12 CHANGED hVolWlG.sFpopp-hppYh-.........cY....-.E-ucplsSpFtp-hsltah..DcDhlEtshhsps.t............slppLLpshSYspphlpph.phh...ph..pphNslIhlYshc..Ystphcpsp............lpalGshpYc .................VSlWlG.sh.p.oppplcpYh-l...................pY......c.--G-pl.supFhpDFsls.hh..D-D..hlEhphhpps.ps............slptLLpshS.Y.cp.pl.lsphhphh...th..pphNulIhlYsap..Ysppsppspt............hpalGshtY............................. 0 6 13 19 +13955 PF14113 DUF4285 Domain of unknown function (DUF4285) Bateman A agb Jackhmmer:C7PJQ8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 157 and 206 amino acids in length. 27.00 27.00 27.70 27.60 25.90 25.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.93 0.71 -4.10 48 220 2011-05-10 12:39:33 2011-05-10 13:39:33 1 3 185 0 60 159 4 122.90 37 69.00 CHANGED ssapNsCAl.RhShuLppsG....hshpshs.............hthhh..ss........+hhhh..RspchtsaLp..p...s.......................ssshtsplpu...............+pGIIhF...hthWspu........uG...HlsLW...NGsphs......sps..................................................shtpspplhFW ...........................................................h..apNsCsIRMSYsLNto.G....hslspto................htpluGsDs........KhYha..RVs-hhcaLp+p.......hs...........+P-hIs....................sssppuchhG...............KKGIIlh.pspGWosA........pG...HlTLW...NGohso..DpCah................................................................................ 0 19 28 44 +13956 PF14114 DUF4286 Domain of unknown function (DUF4286) Bateman A agb Jackhmmer:C7PK46 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 100 and 112 amino acids in length. 22.80 22.80 22.80 22.90 22.60 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.11 0.72 -3.72 46 171 2011-05-10 12:48:32 2011-05-10 13:48:32 1 2 166 0 61 159 102 97.20 30 86.20 CHANGED IYNlThpl.-csltccWlpWh.pcpHIP-llsost...Fppuplh+llscp-ts.........GpoYolQYpscspssLppYhpcpuscLcp-shppFusKhluFc....Th.LEhl ........IYNsThpl.-cslccpaltWh.pcpaIP-lhpsGt...hpps+..ls+lLs..cc-pt.........GpsYSlQapscspssLpcahpc....puscLpp-hhchFt..c.KhluFt....Tl.hEll................................. 0 31 56 61 +13957 PF14115 YuzL YuzL-like protein Eberhardt R re3 Jackhmmer:C0H3R0 Family The YuzL-like protein family includes the B. subtilis YuzL protein Swiss:C0H3R0 which is functionally uncharacterised. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 33.40 33.30 23.00 17.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.85 0.72 -7.92 0.72 -3.68 7 211 2011-05-10 13:53:45 2011-05-10 14:53:45 1 1 121 0 23 67 0 42.10 53 90.24 CHANGED sKhKKsPSKsGlSAssV+Gp.GsT.pcsGuh+psStppphKKc ..KlKKsPS+uGlSAssVcGp.GsT.s+ts.u.st+.sSsNpphK+.. 0 1 11 14 +13958 PF14116 YyzF YyzF-like protein Eberhardt R re3 Jackhmmer:C0H3T9 Family The YyzF-like protein family includes the B. subtilis YyzF protein Swiss:C0H3T9 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 30.60 30.20 21.20 17.50 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.37 0.72 -4.01 25 164 2011-05-10 13:59:09 2011-05-10 14:59:09 1 1 164 0 35 95 1 48.20 47 83.06 CHANGED sCcEHlElAlDpaVD-hEpsPslpclpcsp......sppCcaC.cptApYlVs .sChEHlElAlDhhVDEpEluPsIppl-soc....p.ppsC.-aC.pspAsYlVs.. 0 11 23 28 +13959 PF14117 DUF4287 Domain of unknown function (DUF4287) Bateman A agb Jackhmmer:C7PKG7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 70 and 180 amino acids in length. 27.00 27.00 27.90 27.00 24.20 23.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -8.90 0.72 -4.28 59 198 2011-05-10 14:04:37 2011-05-10 15:04:37 1 2 162 0 95 200 86 60.60 38 62.24 CHANGED uhpoYlss.Icc+TG+slscWhpll...cpp.........sssKat-hVsWLKsEHGLGHGHAsAlVthhctp ...........u.toYhss.IEcphG+sls-Whpllcpp.........shs+ahElVuWLKsEH.GLG..HGHAsAlVshhht.t............ 0 33 71 88 +13960 PF14118 YfzA YfzA-like protein Eberhardt R re3 Jackhmmer:C0H3X6 Family The YfzA-like protein family includes the B. subtilis YfzA protein Swiss:C0H3X6 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 27.00 27.00 32.60 32.50 19.60 18.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -10.23 0.72 -3.88 4 111 2011-05-10 14:05:57 2011-05-10 15:05:57 1 1 78 0 6 58 0 91.00 59 97.31 CHANGED MssK...tpPl..hKRsWhpolssFllsQLlFIIhEhTuWhP..sF+-.Gshhs+lVsSpFFTcWFu.YcsPaFNllTlFhuIhhll.slhGAhKDlhspt...N ..........hh....K...tpPl..hhRsWh+hLGsFhlhQLlFIlsE.lTuWsP..NF+..GpFhsR....llN.S..pFFTEWFoPYKhPpFNVhTAFaAIhLl..uLluAhKDhpoRKp......... 0 2 5 5 +13961 PF14119 DUF4288 Domain of unknown function (DUF4288) Bateman A agb Jackhmmer:C7PM87 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 27.00 27.00 27.60 27.00 24.70 24.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.91 0.72 -3.86 22 131 2011-05-10 14:09:39 2011-05-10 15:09:39 1 2 128 0 20 109 2 90.90 45 74.05 CHANGED aYlsphlhchh..........t.psstpppshhcEphlLlcAcop....-cAac+upc.hut.ppcpsapN..p.sptlpachhsls-lh.l.h-pl-c.G....sElau ...............hYuVKLLFEol..........hspch-cs+coLFEES.IILVKAsSh....EEAHtLuEp.lAh.puEc.TYcN...hh..sEQlTWoFRKl...LcVFEL..spssFEo.G....pELYu.. 0 13 19 19 +13962 PF14120 YhzD YhzD-like protein Eberhardt R re3 Jackhmmer:C0H3Y1 Family The YhzD-like protein family includes the B. subtilis YhzD protein Swiss:C0H3Y1 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved GKL sequence motif. 27.00 27.00 31.30 73.80 23.10 17.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.89 0.72 -4.07 15 141 2011-05-10 14:12:53 2011-05-10 15:12:53 1 1 141 0 23 80 0 61.00 67 97.73 CHANGED Mp.sYhLTVF-psGEpLL-EpFEAusD-EAKchGcppLcE+shpc+THRhssu.GKLlLFHR MG.lYVLTVFEKDGoKhLDESFEAATEcEAKsKGEuILpEKGLaEKTHRCTSoAGKLVLFpR. 0 6 15 18 +13963 PF14121 DUF4289 Domain of unknown function (DUF4289) Bateman A agb Jackhmmer:C7PM99 Family This family of membrane bet-barrel proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 655 and 722 amino acids in length. Swiss:Q2S343 is identified by Gene3D as a membrane bound beta-barrel. 27.00 27.00 39.20 27.70 21.30 20.70 hmmbuild -o /dev/null HMM SEED 614 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -12.85 0.70 -6.10 62 213 2012-10-03 17:14:37 2011-05-10 15:18:18 1 3 201 0 58 222 301 617.50 27 91.63 CHANGED .slpsapl.s.hht-pphs..sssThpptapp.shhpt.phchhshuNlGpshpshhat.c...p.htshhhhcshchhhhps...............cclpaasshoPhTpLhYpssspp...tpphcuhauhNhsc+.......hshuhpachlhupGhYps........ptsuphN.hphhsuYhuc..RYphpstasspch.pspENGGl.ss-shhptspph.....................ppp......aps.splsspL..pps...pNp.cspchalsHcYplshpp.hsth......................................................................................................................shhHohpa-spphhapps.......t.h..psht..................h.ssshpDpTphhplpNphuls......asph.hh..utlpsahstchhp.Y.....thssh.......................hh.pphpppplslGGpl.t+p.uphhchpupu...c..htl.stshuphplpuphshsh...t-sh.pltushhh.pspsPsFhh.phapSpa..a.WpN......shsp.ppppltup..lshp+htsplpsshsp.........lcNYsYF.......................sspstspQpussIsl.LphplppcFp.hGhaph-NplhaQpsos..psl.........LslPplsshsslYhphpl...hp.KsLhlphGhsh+YFTp..YhAssYsPslupFhlQs.........phclGsaPllcsahNh+l+psRlFlthpHlNsuh.ss........sYFhsPpYPhpshhl.+FGlsWNFFs ...............................................................tlh.apl.p.hhsspphh...ssDThtpta.pp.shhpt.php..hs.huNlGsPh.shha.p+pp.......t.hhhh.psh...shhhhps...............schpahso...oPhTpLtYppuusp...ppphcuhauhNhs+c.......lshGhsachlhucGh...Yps..........ptouphN.hphauoYhuc..+Yp..hpshhsspph.cssENGGI.ss-phlspspph.................ppp...........ap.s..splsshL.sps..............tNp.cspphalspcYsLuhp+ph...s.t....tsth.........................................................................................................uhhHohph-pppppapst.....tpspsaatpshh.....................ssshpDpTphh..s.lpNphuls.........sFsph..sp....usLpuahsachhp.Y..ph.sh..............................h.pphsppplhlGGpl.sKptGphhchpspu..E...hsl....spshGphplcuphshsh.....t-ss.pltApu.h...psptPs....Fhh.ppapSpa.....ahW.cN......shspphpspltup.....ls..hp+h...tspLpsshps.........lcNYsYF..............................sppstspQpu.usIpl.lphplppcF...+.hGhap..h-Nplh..aQpoos..psl.....................LslPplsshsslYhphpl..h..KsLplphGs-s+YF.Tc..YhAs..sYsPslsp...FhlQs............phcl.Gs.Y...PllssasNh+l++.sRh.FlhhpHlNsuhhss.........sYFhsPcYPhssth.l.+hGlsWNFas.............................................. 0 25 52 58 +13964 PF14122 YokU YokU-like protein Eberhardt R re3 Jackhmmer:C0H434 Family The YokU-like protein family includes the B. subtilis YokU protein Swiss:C0H434 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two conserved CXXC sequence motifs. 27.00 27.00 28.10 28.00 26.60 26.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.40 0.72 -10.05 0.72 -4.26 10 136 2011-05-10 14:25:58 2011-05-10 15:25:58 1 1 128 0 14 62 0 83.70 68 95.75 CHANGED sCcWCsppcAssspsoVYWELPDGT+AIEIs-TPuIsCSuCGMsYQ--sllcEIEDQLlLIDoKKLPcslTYcpLMsp-RlLKRNYF ........................MWCDSTEAKESLNTVYWELPDGTKAIEIQ-TPCISCSSCGMDYQuDpTVKEIEDQLFLIYTKDLPKQLTYEELMuRPRLLKRNYF................... 0 2 8 10 +13965 PF14123 DUF4290 Domain of unknown function (DUF4290) Bateman A agb Jackhmmer:C7PMG8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 200 and 221 amino acids in length. There are two conserved sequence motifs: EYGR and KLWD. 27.00 27.00 50.20 50.10 26.70 26.50 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.08 0.71 -5.08 48 201 2011-05-10 14:29:21 2011-05-10 15:29:21 1 1 197 0 55 186 178 174.90 47 82.69 CHANGED L-YNTpRp+LllPEYGRplQpMV-aslslcD+-ERs+sAcsIIslM.GphpPcL.RDssDapHKLWDpLhIMSsFcLDlDsPashsscEpLtp.+P-.lsYPpsph+aRaYGpsIpphI-pAhphEcG-c+-uLlhsIANpMK+salsWNK-oV-DchIhpcLt-LScG+lpLstss.cL ......h-YNTp+c+LhlPEYGRpIQpMVDaslolpD+cERp+sApoIIslM.Gsh.PHL.RD.VsDFpH.KLWDHLtIMSsFcLDlDhPY-l.spc.-s.L.ts.+P-.lsYPpsph+YRHYG+slcphIc+Ahchp-G-c+csLlthIANaMKKsalsWN.K-o.V-DcpIhcDLt-LSsGclpLstpt..h....................... 0 24 49 55 +13966 PF14124 DUF4291 Domain of unknown function (DUF4291) Bateman A agb Jackhmmer:C7PN25 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 190 and 214 amino acids in length. There are two conserved sequence motifs: VYQAY and RMTW. 27.00 27.00 49.30 38.30 21.00 19.80 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.15 0.71 -4.66 57 206 2011-05-10 14:31:31 2011-05-10 15:31:31 1 4 187 0 91 186 3 173.00 44 83.25 CHANGED +pIRA..ta.sscTIsVYQAYsspIAcsAlpst+Fs.sP.FphsRMTWIKPSFLWMMYRuGWupK.tsQERlLAlcIpRcGF.-hhLppusLSph..............................tsthh.sptpWppplppus......................VRlQWDPERslphpsL....saRSIQlGLsschlc.cYsc-WIluIpDlTshs+clcphlpsu.....phcpAttLLPt..EcsY ...................pIRA..pa.sppTITVYQAYsssIAssAlcsG+Fs.us.Fph..sRMTW.IKPSFLWMMYRuGWutK.tsQE+lLAlcIpRpuF.-hhLppAsLSph.................................................ps.plasp.tstWcpth...ppus......................VRVQWDPERsl.php..L....taRSlQlGlssphlp.pas--.WI..VuIpDlTshs+ch+pllpsG.....chppAtthlP.Ec.Y.................. 0 35 61 79 +13967 PF14125 DUF4292 Domain of unknown function (DUF4292) Bateman A agb Jackhmmer Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 243 and 287 amino acids in length. 27.00 27.00 27.20 27.30 26.40 25.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.32 0.70 -11.09 0.70 -4.91 73 228 2011-05-10 14:33:12 2011-05-10 15:33:12 1 1 225 0 80 221 184 198.30 21 75.77 CHANGED ssphcslsuchplslp..tpppp.sssssl+hc+-ctIhlSs..shlG.hpVu+hhlTP-plhhhD+lpppYhpu..saspLp....chlshs.lsFp...pLpslLlGp..hhhshppphp......hphs......sp.th.lp.pp..........sthphthhhsspshplpphpltptss...ppplphpYtsapph.....stthhPtplplth.p......ttppsplslpasc...hsh.spshphsaslPspYcpl .................................................................................................................t...hpslsu+hplslp..sppppholsssl+hc+cchlhlol..shlt.hEluRh.lTPDplhhhD+hs+.pYhcu.......saspLp....plhshs.lsFp...pLQsLlhsp.......hhhstppphp.......................hp.t........tt.h.lp.pp.....................tthth.hhhsstphhlt..p.plt..t.st...t.tlphpYtsap.h.....stt.hPtphplth.p.......tpphplplphpp...hph...spsh...p...h.hphsptap................................... 0 33 61 76 +13968 PF14126 DUF4293 Domain of unknown function (DUF4293) Bateman A agb Jackhmmer:C7PP09 Family This family of integral membrane proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 136 and 154 amino acids in length. 27.00 27.00 27.10 29.00 25.30 24.60 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.91 0.71 -4.28 71 211 2011-05-10 14:35:41 2011-05-10 15:35:41 1 2 209 0 68 197 86 143.70 32 97.01 CHANGED IQRIQTlYLLlssl.hssshh.hhPlhphssssthhh..t.h..h............................hhhslhhlsullulhuIFhaKpRtLQhpLsh...lshlLhl...hhhulhshhhhshtsph........hshphthuhhlPllullhhhLAt+uIp+DEcLV+usDRL ............................IQRIQTlYLLlssl.lhhshh..hhPlhphhss..shhh.....hthhsh.h....t.....................slhslhhlssllulhoIFhaKpRh........lQhplsh....hshlLhl....shhshhs.hhhhslpsph..........hshphuhuhhLPllullLhhLAh+uIt+DEtLV+usDRL...... 0 25 53 65 +13969 PF14127 DUF4294 Domain of unknown function (DUF4294) Bateman A agb Jackhmmer:C7PPB4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 192 and 226 amino acids in length. 27.00 27.00 83.40 82.90 23.60 23.10 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.85 0.71 -4.63 44 181 2011-05-10 14:42:30 2011-05-10 15:42:30 1 1 178 0 44 172 171 154.90 44 73.32 CHANGED GDoIs....hlpLspVhlhsph+.Fcsc....c-cppYhhLhRcV+KVhPhAKhsscplh-htchLpolss++s+c+ahKhlcKhlcccaosclKKLThoQGplLIKLlpRQTupToY-LlKshhuGa+AhaYpshAthFshSLKccYcP..stEDhLlEcllhps .......................tDoI...hlpL.spValas.hc.F+..sc....c-+pcYh+LlcsVKKVhPhAK.ssctllEh.caLpTlPsc+t+c+ahKpVEKtlccpYoschKKLohoQGKlLIKLIcRpospooY-LlKuahGsa+AsaYQshAtlFGsSLKccYDP..ps-DtLhEcllhh.s........ 0 15 38 44 +13970 PF14128 DUF4295 Domain of unknown function (DUF4295) Bateman A agb Jackhmmer:C7PPC2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There are two completely conserved residues (K and Y) that may be functionally important. 27.00 27.00 27.20 32.00 26.00 26.60 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.29 0.72 -4.34 27 189 2011-05-10 14:44:08 2011-05-10 15:44:08 1 1 188 0 52 149 49 47.70 61 93.33 CHANGED AKKs.....VATLpput..uKphoKlIKhVKSsKTGAYsFcEchlss-pVp-al ..............AKKs.....VATL+pGp...G+saTKVIKMVKSPKTGAYsFcEpMVsNEpVpDFh....... 0 24 47 52 +13971 PF14129 DUF4296 Domain of unknown function (DUF4296) Bateman A agb Jackhmmer:C7PRC6 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 90 amino acids in length. 27.00 27.00 28.00 29.60 26.30 26.10 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.82 0.72 -3.83 55 193 2011-05-10 14:45:37 2011-05-10 15:45:37 1 1 192 0 54 193 131 86.70 28 35.94 CHANGED PcslIscccMpslLaDhaLscuhtt.........tsppptstpthshpphlacKaslssspFspShsYYspp.scphpcIYcclpcRLppcppshsp ....................PpslIspscMcslLYDhHlupuhtt.........stshspshpphthh.ptla+KaslopspF-sShhaYscp.s-hhpcIYccVpcRLcscppth..t........ 0 23 48 54 +13972 PF14130 DUF4297 Domain of unknown function (DUF4297) Bateman A agb Jackhmmer:C7PUH4 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is typically between 207 and 221 amino acids in length. 27.00 27.00 27.10 27.10 26.20 26.20 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.60 0.70 -11.55 0.70 -4.39 34 146 2011-05-10 14:49:40 2011-05-10 15:49:40 1 6 139 0 34 124 9 198.00 20 46.26 CHANGED .pEpuGstutptFcaQhpaAlhpllchhpsps.shtlhhEh+-Dlslhps.t.t.sph..-ahQVKTpcss...pWohssLsch...............................pppp.............Shlu+Lhpp..pppas.stssplthVostsh...shslcshphshppt.......splpsphtpplp...pp...lpsphshsp..h.hstlhhl..hssls.Lcshppplhuplsphl..chhsp.stp.spslhctlhcplcc+us .....................................p-puGstuhptFpaQhphAlhphhphh..p......p..ps..satlhhE...........hc...........-.Dlsltps......sph......sFhQVKspp.......pt....saoh..ps...lspp.......................................pptpt..............Shlu+Lhpp......ptpht...ptsp......c..lthlospsh........shs.cp..phphtsh.........tplppp.tpplp...pp.....................ltsphs.t...h....phlhhh....hssls..lpsh.tp..hhuphsp.h..phh.p...p.spslhp.lhpphp.+u........................................................................................................................................................... 0 13 24 29 +13973 PF14131 DUF4298 Domain of unknown function (DUF4298) Bateman A agb Jackhmmer:C2M2S9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 94 and 105 amino acids in length. There are two completely conserved residues (Y and D) that may be functionally important. 27.00 27.00 27.20 29.30 26.80 26.20 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.04 0.72 -4.11 25 275 2011-05-10 14:52:29 2011-05-10 15:52:29 1 1 267 0 31 183 1 85.70 29 84.60 CHANGED +IpchpctaschpchlscLpcsl-papcshpchppLpsYY.oppWhcDhps.pps...phss.chptuVLSEDulashhu-phpLAhphLclus ..............+IpcMpphhpchsphLspLpcslpphpct..pphtpLcsaY.op-ahcshct.pps...........phss.phstuVLSEDulashhs-pppLAhphLcl....... 0 10 18 27 +13974 PF14132 DUF4299 Domain of unknown function (DUF4299) Bateman A agb Jackhmmer:C2M3B8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 275 and 313 amino acids in length. There are two conserved sequence motifs: RGF and DAY. There are two completely conserved residues (P and D) that may be functionally important. 27.00 27.00 28.80 28.70 25.70 25.20 hmmbuild -o /dev/null HMM SEED 304 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.90 0.70 -5.28 20 348 2011-05-10 14:56:01 2011-05-10 15:56:01 1 1 303 0 17 200 0 267.10 50 97.31 CHANGED SloFaIKNK+p.hh.uhpclhoscclLsLsct..Lppauh-tsppphshpchhtt.ltp.t....sllhGspspSuRGFELuYsccppsY.sVRlhTPSopsDWplALpalpsLupph.sscIhsE.pGcpaoscsIppFDYcsDIhhGlcsl.tplpscpttsh....haGlpRPlshscchl-cIhsupss..lcpFuchlcch..QaLDAY.A+QpFacspsssc..IhGhYsLopslcTILPac..P...V-hpshphlpsc-luhWplsllshsu-csc..ptYphluplcYccFlcpLPc-KYcalDAsYIhVcsLo+-Elpcl ...........................................t.hTFaIsNKpS.lL.GpQclLsAKSILuLlDG...LESHSYDsshLRQsLN.RLp............hIcCulsGpSphhF+VSYsDup.KuY.pVclPDhhT+sDWpIlhsFLcALhuhh.Go-I...........EGL-sF..DFEAaFpuuIpsaLuD.sA+h.shCp......GIhsPlaFS+EpLcuFLcuDu...LApFEspVRsl..QpoDA..YFA+spFYpDu-.GK..VaGlYaLAQGV+TVLP+E..P....aVPss..YlEQLs-+EVp...W-IcLVpIoGDusK.PEsYEuIARLDYscFLEsLP.saY+pLDA.sQIcVQPIhsp-hcsL....... 0 4 6 11 +13975 PF14133 DUF4300 Domain of unknown function (DUF4300) Bateman A agb Jackhmmer:C2M4P8 Family This family of lipoproteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 281 and 303 amino acids in length. There are two conserved sequence motifs: NCR and PYQ. 27.00 27.00 54.40 54.20 20.00 19.90 hmmbuild -o /dev/null HMM SEED 250 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.64 0.70 -5.11 36 333 2011-05-10 16:08:49 2011-05-10 17:08:49 1 1 320 0 24 172 0 241.20 60 83.36 CHANGED hsYSNLssppSpcEVpshL......sAslsppslcpFhphVs-YNs.sl.....ppstlpssFssh...tps-YDh.t..IpchWpc+pss.Fh.GsNCRIouFsLhKshIpsssstp.......ssph....LFhDt-uIcss..............plhspp-cpcFpsLFopl.Tc.....sTpDlc.hHtpphpctaKphtlpF....tss+hpllSVhlHsp...-sshLFlGHsGVLlsscsG.aLFlEKluFppPYQAlKFss+p-lpc.YLhs+Ycs..htspsp.A+PFIM-NDchl ...................................u.SYoNLNspsSsEEVKSLL......SAHLDssSVDuFFNLVNDYNs.lV.....GSTGLoGDFToF...T+TEYD.VEK..IScLWspKKGD.FV.GTNCRINSYsLLKNSlTIPKlEK.........sDpL..LFlDNDAIDKG..............KlFDupDKE-FDILFSR.VPTE.....ATT.D.VK.VHA-KMEsaFSQ..FpF.....NEKARMLSVVLHD...NLDG-aLFVGHVGVLVPsDDG.aLFVEKLTFEEPYQAIKFASKEDCYK.YLuTKYsD..YTGEGL.AKPFIMDNDKWV............. 0 8 14 19 +13976 PF14134 DUF4301 Domain of unknown function (DUF4301) Bateman A agb Jackhmmer:C2M610 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 505 and 516 amino acids in length. 27.00 27.00 84.20 83.60 21.10 18.40 hmmbuild -o /dev/null HMM SEED 513 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.69 0.70 -5.95 43 177 2011-05-10 16:11:31 2011-05-10 17:11:31 1 2 172 0 47 182 62 495.50 53 98.03 CHANGED hoppDlpQlcp.......+Glo.cplppQlchFccGhPhlpL.puAolscGIhtlsscEpcphlsha-p..tppstcllKFVPASGAAoRMFKsLapFL.............su.hscppssthppFFsslccFsFYccL.ppslpspt.slssLhsstchtt.llctLLstcGLNYGshPKGLL.FHpYs.-ts.pTP.....hEEHLsEuAhYAsss.upsclHFTVSp-HhctFcpplschtsphEpchsspacloFShQ+sSTDTlAVsh-.NpPFRsc-GpLLFRPGGHGALI-NLN-lDADllFIKNIDNVV....s-ph+pcTlpYKKlLAGlLlplQc+sFpYLchL-ps.phscpplpElhpFlpccLshph....s.phpphsppphhpaLpp+LNRPlRVCGMVKNpGEPGGGPFWltstDGslSLQIlESuQIDhssscptphhppuTHFNPVDLVCul+sY+GcpFcLhcaVDppoGFIopKS+sG+pLKALELPGLWNGAMAcWNTlFVEVPltTFNPVKTVNDLL+spHQ ...........h.oppDhc.ltp.......+GIopcplpcQLppFppGFPaLcLpuAAol.p.c.GIhshspcEpcpalstW-s.hppss+plVKFVPASGAASRMFKsLFpFL.............sAth..spPsscFccpFFssIccFAFYccLstsChc.spspslssL.htpspY.KslVssLLpspGLNYGsLPKGLLhFH+Ys.-.ss.RTP.....hEEHLsEuAhYAssp.GcsplHFTVSpEHppLFcphlscphstapc+auVcaploFSpQKPSTDTIAssh-.NpPFRstsGpLLFRPGGHGALIENLNDlDADllFIKNIDNVV....PD+LKs-TlpYKKllAGlLlsLQcpsFpYLchL-su.cho+ppltEllpFlppcLss+p....s.-hcpLp-spLshYL+pKLNRPhRVCGMVKNsGEPGGGPFhshNsDGolSLQILESSQIDhssscppchFcpuTHFNPVDLVCul+DYKGc+FcLscYVDcsTGFIS.KSKsG+-LKALELPGLWNGAMScWNTlFVEVPLsTFNPVKTVNDLLRppHQ.. 0 19 41 47 +13977 PF14135 DUF4302 Domain of unknown function (DUF4302) Bateman A agb Jackhmmer:C2M6D7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 344 and 443 amino acids in length. There are two completely conserved residues (R and L) that may be functionally important. 27.00 27.00 40.70 33.90 21.60 21.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.64 0.70 -4.99 51 186 2011-05-10 16:14:32 2011-05-10 17:14:32 1 2 93 0 30 178 0 240.70 24 56.39 CHANGED -p-sl.F-c..osupRlspslpphpclL...uutsG.WhhpYYssps..........................aGG.ashhhKFsssp.Vshtu-hs.........sssppssShYplppspu.shLoFDTYN.h.....lHhhupPsssts........pGhpGDa..EFll.....hpsss-..plhlc.......G++stNphhhs....htpstshppahsphppsppthp....hstaphhh...............sspphshthpspppthhhs..hsssp......hphsahhTspG.l.phhpPl....plsGhp.hpphsaspssp ............p-sl.F-c..osupRhppslpchpchL..psuspG.WhhpYasp.ss..........................hGG.ashhhKFsssp.Vshtu-hs..........ssssthsSpYplp.pspu.shLoFsTYN..h.....lHhhusPpttts............tGhtuDa..EFll.....hssss-..plhlc.......G+Kptsphhhs....htpshshcph...hpp.ht.s.hppshp......hhhhphhh.................sspths.hhh.ps.p..t.pthhhs.......hssss.............hshsahhTp...pG....l.p.........hh.pPl.......plsGhp...hppasaspt..p................ 0 20 27 30 +13978 PF14136 DUF4303 Domain of unknown function (DUF4303) Bateman A agb Jackhmmer:C2M6J2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 169 and 192 amino acids in length. 21.60 21.60 21.80 21.60 21.30 20.90 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.89 0.71 -4.58 38 181 2011-05-10 16:15:50 2011-05-10 17:15:50 1 3 156 0 39 155 8 149.60 23 79.57 CHANGED ltpAs+psapplhp..cpss-shYuauLhosspuhs..lssuAsoc-sLptttp........p..pst..h.........................+Wsss-Wsa...t.sttphFsplsphltphscphp.p.t...........apphhptlhpshlsuLppLcpcGlFust...tph.hlhlslsss-sspt.....ppsctLNss .....................................................lhpuhpptapplhp..cp.ss-shYua...u...l...h.....oDsssts...lsssANocchlph.pts.........cs...shh...........................+auss.EWsh.......hssht...hFscl...schlpp.h.pphhpps.................aphhhp..plhpthlpsLhcLcp-Gl.Fust....sc..hlhlslsDsssp.hh....ppsphLNs.h.............................................................................. 0 6 20 27 +13979 PF14137 DUF4304 Domain of unknown function (DUF4304) Bateman A agb Jackhmmer:C2M6R4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 223 amino acids in length. 22.50 22.50 22.70 22.50 21.70 21.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.38 0.71 -10.55 0.71 -3.98 48 164 2011-05-10 16:19:06 2011-05-10 17:19:06 1 1 144 0 38 153 4 111.30 17 58.01 CHANGED lsshLKptGFpK.pshsaa+.pppphhtllshQ+sph.........thpFhlNlGlhshthsp................................ph.pthshthpt...Rluslhspt..............s.haplcspp.shp.thhppltp.......tlpphllsah ...................tthL+.hGFpp..pt.s..ah+.pppshhhhlshQ+Sph.................sspFhls..lGlhshtltp..............................................................tt..pp.s..hhhts......+lstlh.pp................hapl..stt....sht.hhtpl.t........lpt.hh.......................................................... 0 13 27 31 +13980 PF14138 COX16 Cytochrome c oxidase assembly protein COX16 Bateman A agb Jackhmmer:Q5ACH7 Family This family represents homologues of COX16 [1] which has been shown to be involved in assembly of cytochrome oxidase [2]. Protein in this family are typically between 106 and 134 amino acids in length. 27.00 27.00 27.70 27.70 25.60 26.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.58 0.72 -3.81 52 253 2011-05-10 16:50:56 2011-05-10 17:50:56 1 11 223 0 174 242 0 79.60 35 58.69 CHANGED lhaGLPFlhhlVuGSasL..pphoplRY-hpcpKspph.sppEthsht..............pp++cl..slc-E.Yh+l...psh.sh-.........sWE.hRl.R.htE ................lhaGlPhlhllVuGSFsL..ppho.tlRY-ttcpKscp..spc-ththt...............................tpp+cl.....slc-E.Yh............+l....pst..sh-.............................sWEphRl.R.ht-................. 0 47 90 140 +13981 PF14139 YpzG YpzG-like protein Eberhardt R re3 Jackhmmer:C0H444 Family The YpzG-like protein family includes the B. subtilis YpzG protein Swiss:C0H444 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a conserved QVNG sequence motif. 27.00 27.00 32.20 38.50 17.40 16.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.56 0.72 -4.03 8 104 2011-05-11 07:19:52 2011-05-11 08:19:52 1 1 104 0 17 34 0 49.80 68 98.29 CHANGED MGp..p+cpaDp...p.YSsPFspPWsNPKHA+uQVNGcTQQoQsLIILcspsRK+p .......MS...YRDRLDs.....RSELFNHTWTRPKHAKAQVNGQTQQTQSLIILANECKKRQ. 0 3 11 12 +13982 PF14140 YpzI YpzI-like protein Eberhardt R re3 Jackhmmer:C0H446 Family The YpzI-like protein family includes the B. subtilis YpzI protein Swiss:C0H446 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 27.00 27.00 32.40 58.00 26.20 16.40 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.38 0.72 -8.08 0.72 -4.46 8 121 2011-05-11 07:34:23 2011-05-11 08:34:23 1 1 121 0 13 41 0 41.90 80 91.57 CHANGED MGKDRQEKKLKtS+RVESDRDQSLpYsGATpL-oPEcARKRN MGKDRQERKLRESRRVESDRDQSLQYPGATuLDTPEQARKQN.. 0 1 8 10 +13983 PF14141 YqzM YqzM-like protein Eberhardt R re3 Jackhmmer:C0H453 Family The YqzM-like protein family includes the B. subtilis YqzM protein Swiss:C0H453 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. 27.00 27.00 33.70 33.60 26.70 25.90 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.09 0.72 -4.17 11 134 2011-05-11 07:51:06 2011-05-11 08:51:06 1 1 134 0 22 49 0 42.90 79 95.72 CHANGED NcFE.KDVQsKRNDAlDSuVGFlVSFGFFsslFlIAslIcFlGp ...NDFE.QNVQSKRNDAIDSGVGFIVSFGFFATLFIIATVIKFIGS... 0 6 15 18 +13984 PF14142 YrzO YrzO-like protein Eberhardt R re3 Jackhmmer:C0H458 Family The YrzO-like protein family includes the B. subtilis YrzO protein Swiss:C0H458 which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 100.90 100.80 23.70 23.60 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.23 0.72 -4.32 2 83 2011-05-11 07:57:22 2011-05-11 08:57:22 1 1 82 0 4 18 0 46.00 90 99.79 CHANGED MhEuLLFFhusGlsCELAAINRNGRKpIKQQAEhIQlLKE.h.KsI MLESLLFFFAVGVACELAAINRNGRKKIKQQAEhIQLLKELKER....... 0 1 2 2 +13985 PF14143 YrhC YrhC-like protein Eberhardt R re3 Jackhmmer:O05395 Family The YrhC-like protein family includes the B. subtilis YrhC protein Swiss:O05395 which is functionally uncharacterised. YrhC is on the same operon as the MccA and MccB genes, which are involved in the conversion of methionine to cysteine. Expression of this operon is repressed in the presence of sulphate or cysteine [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 35.80 35.70 25.40 21.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.36 0.72 -4.03 17 134 2011-05-11 08:20:14 2011-05-11 09:20:14 1 1 132 0 19 88 0 71.80 56 89.73 CHANGED pcLcsKhtDYKpFuhlLLAlSsFLYlGslIP.tuhpsspp.hhhsulslhLshuhhhh+RuhhhpcpLcEt- ...KELppKIEDYoRFGQlLLAVSThLMlGLLIPsGuKEThQhFlMMGoIVIFLuLSFFFFpRVKlhRccLEEsE...... 0 2 11 13 +13986 PF14144 DOG1 Seed dormancy control Coggill P pcc Jackhmmer:Q9SN45 Family This family of plant proteins appears to be a highly specific controller seed dormancy. 27.00 27.00 28.30 27.60 26.40 26.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.77 0.72 -3.94 81 488 2011-05-11 10:20:54 2011-05-11 11:20:54 1 7 52 0 217 469 0 77.90 41 23.87 CHANGED +plsEL....Rs.ALpup.h...........sDs-L+hlV-sshsHY.pclachKusA...A+sDVF...al.lSGhWpoPsER.sFLWlGGFRPS-Ll+lLhsp .............................+phsELRsAlpu+h...........sDs-LRh.lV-sshsHY.sclF.chKusA...A+sDVF...al.loGhWpoPuER.sFlWlGGFRPS-ll+llhs....... 0 28 122 175 +13987 PF14145 YrhK YrhK-like protein Eberhardt R re3 Jackhmmer:O05401 Family The YrhK-like protein family includes the B. subtilis YrhK protein Swiss:O05401 which is functionally uncharacterised. Its expression is under the control of the motility sigma factor sigma-D [1]. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -9.07 0.72 -4.30 57 199 2011-05-11 10:48:16 2011-05-11 11:48:16 1 4 174 0 77 175 13 60.00 30 54.02 CHANGED hppY-hhth......................lsDhhsulhFllGSlhFhhs.s...ht............hsusWlFllGSlhhhl+Psl+ll+cl+ ..................p.ppYchlth............hsDhlhulhFllGSlhFhhc.t...ht........................phGsahFllGSlhhhl+Phl+llpph.h............ 1 33 54 71 +13988 PF14146 DUF4305 Domain of unknown function (DUF4305) Eberhardt R re3 Jackhmmer:O05524 Family This family includes the B. subtilis YdiK protein Swiss:O05524, which is functionally uncharacterised. This is not a homologue of E. coli YdiK, which belongs to Pfam:PF01594. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 27.80 27.80 26.90 26.70 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.61 0.72 -4.21 20 171 2011-05-11 11:57:29 2011-05-11 12:57:29 1 1 170 0 24 78 0 37.10 43 60.72 CHANGED llFsYhAlpsss-ss..WshaThllhlhAshDFshul+hl .hlFTahAlssVsDsh..WshaTILhhlMAshDFslulRLI. 0 6 15 18 +13989 PF14147 Spore_YhaL Sporulation protein YhaL Eberhardt R re3 Jackhmmer:O07520 Family This family of proteins is involved in sporulation. In B. subtilis its expression is regulated by the early mother-cell-specific transcription factor sigma-E [1]. 27.00 27.00 52.80 52.70 25.70 21.50 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.60 0.72 -4.52 10 131 2011-05-11 12:12:26 2011-05-11 13:12:26 1 1 131 0 20 58 0 49.50 70 76.14 CHANGED hPWWVYhlIlGIlFSuYMsl+ouKEE+EhDQcaIE+EGclYMcRlEcERE+R ....hPWWVYLVIlGIlhSGYMVLYTSKKEQ-MDNEFIEKEGEVYMKRLcEEREKR.. 0 3 12 14 +13990 PF14148 YhdB YhdB-like protein Bateman A re3 Jackhmmer:O07530 Family The YhdB-like protein family includes the B. subtilis YhdB protein Swiss:O07530, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 57 and 82 amino acids in length. There are two conserved sequence motifs: LMVRT and FLHAY. 27.00 27.00 74.10 74.00 22.50 21.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.76 0.72 -4.23 6 132 2011-05-11 12:24:37 2011-05-11 13:24:37 1 1 131 0 16 43 0 69.50 71 93.88 CHANGED Ms.hsDYDKALYYTaRSpWDsLLILMVRTKDDLLSKRIE+FLHAYpFp+Das.V-KpL.sLLpYIDHA..hoshpppEt ...p.h.DYD+ALYYTapspWDpLLlLMVQTsDQLFSKRIEHFLHAYQYSKELPEVDKQLQLLFQYIDHASQKSHlEElE.Q....... 0 1 8 10 +13991 PF14149 YhfH YhfH-like protein Eberhardt R re3 Jackhmmer:O07606 Family The YhfH-like protein family includes the B. subtilis YhfH protein Swiss:O07606, which is functionally uncharacterised. Its expression is repressed by the Spx paralogue MgsR, which regulates genes involved in stress response [1]. This family of proteins is found in bacteria. Proteins in this family are typically between 42 and 53 amino acids in length. 27.00 27.00 27.00 30.00 26.90 25.10 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -7.99 0.72 -4.16 17 140 2011-05-11 12:36:23 2011-05-11 13:36:23 1 1 127 0 25 77 0 36.90 66 75.72 CHANGED lpphsEFFRNLPsKpCscCGcc.I-EQpEsYtspC-cC ............-pshEFFRNLPoKsCAcCGKE.IDEQHEuYpNcCDDC 0 8 19 22 +13992 PF14150 YesK YesK-like protein Eberhardt R re3 Jackhmmer:O31514 Family The YesK-like protein family includes the B. subtilis YesK protein Swiss:O31514, which is functionally uncharacterised. Its expression is regulated by the sporulation-specific sigma factor sigma-E [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 100 amino acids in length. 25.00 25.00 26.60 26.20 24.40 24.00 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.69 0.72 -3.88 10 127 2011-05-11 12:48:56 2011-05-11 13:48:56 1 1 108 0 12 74 0 76.40 57 81.94 CHANGED FallGhlohlllFslShll+++aPsKp.h-hlLuhlLIllslhslhlSlFllGGWEGhGlGlluhhlllGolIGhIActhl+ ........FYIIGGlTIlLVFAIS..YLLKKRFPDKQ..FDIIFALuLILLCLAoFPVTMhlIGGWEGMGYGFIGFFVLLGTLIGMIAHQLlK........ 0 1 8 8 +13993 PF14151 YfhD YfhD-like protein Eberhardt R re3 Jackhmmer:O31572 Family The YfhD-like protein family includes the B. subtilis YfhD protein Swiss:O31572, which is functionally uncharacterised. Its expression is regulated by the sporulation-specific sigma factor sigma-F [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 27.00 38.20 26.50 26.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.11 0.72 -3.86 16 145 2011-05-11 12:59:37 2011-05-11 13:59:37 1 1 141 0 27 79 0 54.30 55 99.03 CHANGED MsRsptpKs+cK.NctphsQsPct....uDuhDVEFSpElADp-DhEApsRupAADtRt........pppcp ........................................scpph.p.sKs......oDGlDVEFS+ELADHsDLEApARAsAADsRQK...p....sp........ 0 4 17 19 +13994 PF14152 YfhE YfhE-like protein Bateman A re3 Jackhmmer:O31573 Family The YfhE-like protein family includes the B. subtilis YfhE protein Swiss:O31573, which is functionally uncharacterised. Its expression may be regulated by the sigma factor sigma-B, which regulates the expression of stress-response proteins [1]. This family of proteins is found in bacteria. Proteins in this family are approximately 40 amino acids in length. There is a conserved QEV sequence motif. 27.00 27.00 36.90 36.50 19.80 18.50 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.15 0.72 -7.75 0.72 -4.05 13 126 2011-05-11 13:10:06 2011-05-11 14:10:06 1 1 124 0 15 42 0 36.60 66 92.22 CHANGED -.....K++pccs+soLopsQEVpYup-FKtAD+A..u..pt++ ............DKKKRDKsKNsLSSTQEVLYQREF+KADRA..AGYRuKS......... 0 2 8 10 +13995 PF14153 Spore_coat_CotO Spore coat protein CotO Eberhardt R re3 Jackhmmer:O31622 Family Bacillus spores are protected by a protein shell consisting of over 50 different polypeptides, known as the coat. This family of proteins has an important morphogenetic role in coat assembly, it is involved in the assembly of at least 5 different coat proteins including CotB, CotG, CotS, CotSA and CotW. It is likely to act at a late stage of coat assembly [1]. 35.00 35.00 43.30 40.20 34.00 33.70 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.20 0.71 -4.85 16 142 2011-05-11 14:03:18 2011-05-11 15:03:18 1 2 126 0 22 123 0 159.60 42 98.69 CHANGED s+cph.....scpcPLLYIsQPchpc.spspMQcsaht+pcppp..................ttppptppp...................p........p..t....t...p..p..t..tppttppp..........................................................h.hcKsFpcMsl-EKIsFLsphPcplPslpCplhocpcoYcGllhshcss...plhltss......tpt..spsplsh-cIhSlphlGF ...............................................ps..Ks.p..sssKPLLYIsQssh-h.ussphpcIllsphcscs....................pcEppscscph......t...pth.E.ppppt..............pcp.tp..p..tp.p.p..ppppc.s.......................................................psh.hpKsF+-Ms.-EKIcFLhshPHalP+l+CcIcTsshoYhGsIluhRNG...hVsIhss......ssh..c-hcLuI--IpSIsMhGF........ 0 1 11 14 +13996 PF14154 DUF4306 Domain of unknown function (DUF4306) Eberhardt R re3 Jackhmmer:O31651 Family This family includes the B. subtilis YjdJ protein Swiss:O05524, which is functionally uncharacterised. This is not a homologue of E. coli YjdJ, which belongs to Pfam:PF00583. This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 95 and 152 amino acids in length. 27.00 27.00 44.90 44.30 21.40 21.20 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -9.90 0.72 -4.01 8 123 2011-05-11 14:12:19 2011-05-11 15:12:19 1 2 99 0 11 90 0 87.50 47 78.91 CHANGED hlQaGhuhhlFlFSAlsoWYpGSpLlsssa-WKaoshFophh.G..slss.cpISQLDaFlYAAKapPshsslMllShlYlLsLlhhhlhp .....hlQhu.uhhlFlhSAL.ho.WYQGSsLl-sP.EWKYoAKFTshhp.G..TVoshcDIYQIDFFlYAAKFYPsshIVMllSlLYhLlLIlahlh......... 0 1 5 7 +13997 PF14155 DUF4307 Domain of unknown function (DUF4307) Bateman A agb Jackhmmer:C7PZ49 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 132 and 153 amino acids in length. There is a single completely conserved residue C that may be functionally important. 27.00 27.00 38.70 52.60 25.50 23.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.34 0.71 -4.28 57 347 2011-05-11 14:48:55 2011-05-11 15:48:55 1 1 343 0 86 210 61 112.00 31 78.09 CHANGED pscpthhslsslhslsuhshhuh..hsapphusssl..suphluacllsDs.psplphpVs+s.s..upsuhChVcAhspctuEVGRc-lhl....ss......ssppphchssslcTsp.ussuclh.GC ......tcthhhlll.hVLsllhhshhsh...huhp.h.us..ssl..puplhGaphl...sD.s.pssVTlpVpRsDP..ShsusChVpAput-tuEVGR+-lhl...Ps..........ussps.plpssV+Tpp.uVsu-lhuC... 0 26 65 82 +13998 PF14156 AbbA_antirepres Antirepressor AbbA Eberhardt R re3 Jackhmmer:O31697 Family This family inactivates the repressor AbrB, which represses genes switched on during the transition from the exponential to the stationary phase of growth. It binds to AbrB and prevents it from binding to DNA [1]. 27.00 27.00 54.50 54.40 23.20 21.30 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.05 0.72 -4.32 6 115 2011-05-11 14:58:50 2011-05-11 15:58:50 1 1 115 0 12 35 0 61.00 70 96.46 CHANGED Mp+cls...LTpEEppLLLDILFpQpYAhElLusELsDIEsGhKpsDhppY++lsRLasRL+sE .Mp+EhV...LTcEEESLLLDILFQQNYASEILAVELTDIENGLKpTDVhQYKKITRLFYRLKNK. 0 1 4 6 +13999 PF14157 YmzC YmzC-like protein Eberhardt R re3 Jackhmmer:O31797 Family The YmzC-like protein family includes the B. subtilis YmzC protein Swiss:O31797, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 58 and 91 amino acids in length. There is a conserved ELR sequence motif. 27.00 27.00 35.10 34.60 25.60 20.60 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -8.97 0.72 -4.15 7 102 2011-05-11 15:18:03 2011-05-11 16:18:03 1 1 94 6 6 68 0 58.60 57 69.31 CHANGED hNpEpllEh.ssas.....os.psppssMsQltcNpFAlh.......-csplKIa+as.cTNclpLlKEass-E ......spEpsVEh.ssas.....Ss..hsTssMTQlu-NTFAhp......sEsspIKIFKFNPDTNEIKLIKEFhusE... 0 1 3 3 +14000 PF14158 YndJ YndJ-like protein Eberhardt R re3 Jackhmmer:O31813 Family The YndJ-like protein family includes the B. subtilis YndJ protein Swiss:O31813, which is functionally uncharacterised. This family is found in bacteria and archaea, and is typically between 222 and 269 amino acids in length. There are two completely conserved G residues that may be functionally important. 27.00 27.00 42.30 41.70 23.40 23.30 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.69 0.70 -5.33 14 139 2011-05-11 15:24:24 2011-05-11 16:24:24 1 2 133 0 21 118 0 248.50 55 50.67 CHANGED shssspthlhLusLhhlPhshphh.pts......h..shhh+....h.PluAlsAshuhshs.....tshhAssWhshsshhAlhussRl.......hpRshc....hpElulsuullYlssGuhWhhstsuslslhtFu..IlhLTAsHFHauuFshPlhsGLLGRt.t.......tct.hlaphhshhIhluPhh.lAlGIshSchh-hhushlhssAlhuhuhhshtps......hcspsuthLltluuhsLhholshuhhYuhGphhupshl.sIspMlhhHGssNAhuV ...........slsslEAIlLLSlLLFlPhohsllDKcs....RsGu.llFYK.VShLYPIAAIsAhLAFVTs.......thhFAllWFlYTGl.....lAL...FGlsRL.......................LERGh+P.................LEEsAIDSAFIYLFLGGFWFFASVA+lo.IMpFSsDIlLLTAAHFHYSAFLLPLSAGLlG.RK+c............KpS..KlYcsIhalIhISPMT.VAIGITYSRl.....FEFFAVhLYLsAIYuYuh.YVW+s+......FsuloAKILLllSSoTLMlTIhFSLIYSYGNh+pVhTI.TIAQMVWIHGVVNGlGV....... 0 7 14 18 +14001 PF14159 CAAD DUF4308; CAAD domains of cyanobacterial aminoacyl-tRNA synthetase Bateman A agb Jackhmmer:B1XK71 Domain This domain is present in aminoacyl-tRNA synthetases (aaRSs), enzymes that couple tRNAs to their cognate amino acids [1]. aaRSs from cyanobacteria containing the CAAD (for cyanobacterial aminoacyl-tRNA synthetases appended domain) protein domains are localised in the thylakoid membrane. The domain bears two putative transmembrane helices and is present in glutamyl-, isoleucyl-, leucyl-, and valyl-tRNA synthetases, the latter of which has probably recruited the domain more than once during evolution. 27.00 27.00 27.70 27.70 25.70 26.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.78 0.72 -4.34 96 289 2011-05-11 16:33:04 2011-05-11 17:33:04 1 7 100 0 141 276 113 86.30 31 38.78 CHANGED lspl.pthsph..sphphshhshuhhhh.llslhlssullsAIsplPLlsslh....ELlGluYosWFsaRhLlhppsRp-Lhpclp..................shcpp.....lhG ........................h..pl.phhsph..sphphhhhssuh....llulhlssullsAIsplPLlPsl.h....ELVGlG..YouWFsaRaLlhppsRc-Lhsclp...............sl+pplhG........................ 0 22 85 123 +14002 PF14160 FAM110_C Centrosome-associated C terminus Coggill P pcc Jackhmmer:Q8TC76 Family This is the C-terminus of a family of proteins that colocalise with the centrosome/microtubule organisation centre in interphase and at the spindle poles in mitosis. 21.00 21.00 22.10 23.30 20.00 17.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.67 0.71 -3.13 16 181 2011-05-11 17:01:40 2011-05-11 18:01:40 1 2 70 0 114 169 0 111.10 43 28.84 CHANGED p+.psuLpRSKS..DlSc.RaStutu-lE+FFsaCGLDsp.l-tLGh.-shtpss....S..DhsSl.phcSsossuS-.s......tpopcSs.-.sh.............pE-tts-RlP.GlSlIERNARVIKWLYup+pA+ .................................s.psuLpRSKS..DLSs.Rauc.....shu-lERFFsaCGLDPEtl-sLG.h...EpF.upus...................S..DhsSl....shpSsossoS-.s..............tpSpc.Ss..s.sh........................................t--...pst-RV.....P....u.lSllERNARlIKWLYuh+pA+.......... 1 14 22 61 +14003 PF14161 FAM110_N Centrosome-associated N terminus Coggill P pcc Jackhmmer:Q8TC76 Family This is the N-terminus of a family of proteins that colocalise with the centrosome/microtubule organisation centre in interphase and at the spindle poles in mitosis. 22.20 22.20 22.20 22.20 21.60 21.50 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.39 0.72 -3.79 5 153 2011-05-11 17:02:04 2011-05-11 18:02:04 1 2 49 0 93 142 0 103.50 38 31.34 CHANGED s.uK....PluPAGPh.ouAVPLRILNKGP-YF.R.RQA..EssP+RhSAVERLEADKAKYVKSQEVINAKQEPVK..PPVLt.KPhsSPu.....PKRuu....uoP....ohKApss.puKo-SG.u......+RsNLcL .....................................hs...ssshP.hRlhsKsP...s..Yh..R...R.s...-..s...s..s..+..+.....hSAVERLEADKAKYVKSppVlss+QE.P.Vp.....sslht..pP..hsss......spt.sh.........tP.....sh+.......................................................................................... 0 8 16 47 +14004 PF14162 YozD YozD-like protein Eberhardt R re3 Jackhmmer:O31863 Family The YozD-like protein family includes the B. subtilis YozD protein Swiss:O31863, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 92.00 91.90 23.60 17.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.32 0.72 -8.75 0.72 -3.89 6 127 2011-05-12 07:20:28 2011-05-12 08:20:28 1 1 127 0 15 38 0 56.90 81 96.98 CHANGED MKEIEVVIDTEEIAEFFYpELlRRGYVPoE-ElEElADITF-YLlEKChIDEEh--- .MKEIEVVIDTEEIAEFFYEQLIERGYVPKREEIEDLADITFEYLLEKCMIDEVFDEE.. 0 1 7 9 +14005 PF14163 SieB Superinfection exclusion protein B Eberhardt R re3 Jackhmmer:O31930 Family This family includes superinfection exclusion proteins. These proteins prevent the growth of superinfecting phage which are insensitive to repression. It aborts lytic development of superinfecting phage [1-3]. 26.00 26.00 26.10 26.10 25.90 25.30 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.42 0.71 -4.64 34 252 2011-05-12 07:43:39 2011-05-12 08:43:39 1 1 226 0 34 153 4 143.90 25 82.81 CHANGED hhhhLslhsuhLL....hhPpshlphlslsphhsp...at.alGlshllusAallspllshh....hp.hhph......hpp++phctlccp.....lptLsspE+slLppahhps..pphlpLshssssVpsLhppsIlphhust........ssttphplshphpsahpcpltphspt .................hahllIhhh..hll....lhPsshhphlsltp..hh....t.ahahlllhslSallstslsph.......hcthh.t........pp+ptpcphtph.......hpsL..o.tE..pAlL.t.hltss...p.l....ph.pssPhshpLhc+Gllp+hsss........sspstahIs-papp.hhhthtsc...t....................................... 0 8 16 23 +14006 PF14164 YqzH YqzH-like protein Eberhardt R re3 Jackhmmer:O32014 Family The YqzH-like protein family includes the B. subtilis YqzH protein Swiss:O32014, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 27.80 40.00 23.90 20.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -9.00 0.72 -4.06 9 119 2011-05-12 07:53:05 2011-05-12 08:53:05 1 1 119 0 12 63 0 60.80 59 94.71 CHANGED MsEKhIcKhltpshcQYu.s.pshPlosp-hcpLhcpIppthspcs-hDlYEhlcDlVY-YlTu .MNEKLIEKMIIKSFQQY....QCsPlSpEDQEMLlKHIQslhHSNscIDlYEtlEDIVYDYVTG. 0 2 4 6 +14007 PF14165 YtzH YtzH-like protein Eberhardt R re3 Jackhmmer:O32066 Family The YtzH-like protein family includes the B. subtilis YtzH protein Swiss:O32066, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There is a conserved DIL sequence motif. 27.00 27.00 38.30 71.50 20.90 18.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.80 0.72 -3.75 14 132 2011-05-12 07:58:12 2011-05-12 08:58:12 1 1 132 0 21 51 0 86.70 69 94.45 CHANGED LsppHQLsLL+DILssHpsDCsGTVuEsEQlERLlpSLhsNssl.ssslKslLpcI....YsYuQsGh.utslssHIstppppLspWlssls ..INQQHQLEVLKDILlNHQSDCCGTVSECEQLERLIQSLLANDsI.SSDsKsMLNDV....YSYSQSGKSSSNLDNHISNNQEQLTQWIuGMD. 0 4 13 15 +14008 PF14166 YueH YueH-like protein Eberhardt R re3 Jackhmmer:O32093 Family The YueH-like protein family includes the B. subtilis YueH protein Swiss:O32093, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 38.30 38.00 26.70 23.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.72 0.72 -4.22 17 252 2011-05-12 08:07:03 2011-05-12 09:07:03 1 1 252 0 16 76 0 75.00 57 93.12 CHANGED KI...chhptps.hhsVYlaEscccp.hllAIPslpWShplshp.-ppplt-cLhhpLhphh-EppAppLAsplspWlpc ......hl....NKRhlDEGKTIDVYLFEulNsQ.IIIAIPDWFWSYQMAMTL....DEETCFEAILMQLF..VFKEEEEAESIASQLTDWIET... 0 2 5 12 +14009 PF14167 YfkD YfkD-like protein Eberhardt R re3 Jackhmmer:O34579 Family The YfkD-like protein family includes the B. subtilis YfkD protein Swiss:O34579, which is functionally uncharacterised. Its expression is regulated by the sigma factor sigma-B, which regulates the expression of stress-response proteins, and by the forespore-specific sigma factor sigma-G [1,2]. This family of proteins is found in bacteria. Proteins in this family are typically between 254 and 265 amino acids in length. 27.00 27.00 29.90 29.60 22.40 21.40 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.59 0.70 -5.29 10 138 2011-05-12 08:22:59 2011-05-12 09:22:59 1 1 137 0 20 94 0 229.10 72 89.47 CHANGED PsSVlsISKENTYPNPTQDLPhLQPS-LAcpLL-oo-VKIENP-LIRhLNESSIsuoPLAIGYRAoIYLGpWPLsYESsETssNWEYQKVNTNhlDNRGGpuspplpYpQEpQK+V+GGLTAcIPNu--VKKMMLlKAtEKTsLPLAFcTVIGtGTKK-psYsVsPK+lGYLauYsPAVNEKGKVTYGEVYLsLKGsK++LsVKNVTpQGIGAWIPVQD+lSFuFhsSspP+ .....PSSVLNISKDNTaPN-AQDLPRLQPSKFAQELLKTANIKIENPDLIRMFNETTISNAPLAVGYRAKIYLGQWALpYESlDTSlNWEYKQVNRNVYDNRGGD+LYPLRYKQEoQKTVEGsLTAchKcAsDVKKMMLLKAhEKVQLPLSFKTTIGYGTG+ERVYNISPsQLGYLYAYTPAVNEKGKVTFGEVYLVLKGNQK+LVVKNlTSQGIGAuIPIpDHLaFKFlSSS+s.p............................. 0 3 12 14 +14010 PF14168 YjzC YjzC-like protein Eberhardt R re3 Jackhmmer:O34585 Family The YjzC-like protein family includes the B. subtilis YjzC protein Swiss:O34585, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 25.00 25.00 25.30 25.20 24.90 24.80 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.60 0.72 -4.04 20 212 2011-05-12 09:55:35 2011-05-12 10:55:35 1 1 206 0 28 102 0 56.70 51 92.41 CHANGED M......G............p.....pp.F+PGp+APNsGhYl.ElGt.pGut..VpsP+pVclctGD+FP-ToN+sR..+Wpp.+ ..............MGppppF+sGpKA.PNs.GlY.....V.ElG-.sGuh....Vp.sPphlpLstG-+FP-To..N+sR..tWph...... 0 8 18 20 +14011 PF14169 YdjO Cold-inducible protein YdjO Eberhardt R re3 Jackhmmer:O34759 Family This family includes the B. subtilis YdjO protein Swiss:O34759, which is functionally uncharacterised. This is not a homologue of E. coli YdjO, Swiss:P76210. B. subtilis YdjO is cold-inducible [1]. Its expression is induced by the extracytoplasmic function sigma factor sigma-W [2]. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. 27.00 27.00 28.10 30.90 24.00 26.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.35 0.72 -4.52 15 173 2011-05-12 10:24:33 2011-05-12 11:24:33 1 2 128 0 30 85 0 59.30 52 59.05 CHANGED MYapK+spEslPcEp...TsVWpCouEDCpGWMRcNFoh--pPhCPLCpSsMhsupRhLspl ........h.RKpshtplP.Ep.....TsVWECpuEDChGWMR......KNF.........o.........a......EEc..........PpCPLC+SsMpsGpRhLPpL.. 0 8 19 20 +14013 PF14171 SpoIISA_toxin Toxin SpoIISA, type II toxin-antitoxin system Eberhardt R re3 Jackhmmer:O34853 Family SpoIISA is a toxin which causes lysis of vegetatively growing cells. It forms part of a type II toxin-antitoxin system, where the SpoIISB protein, Pfam:PF14185, acts as an antitoxin. It is a transmembrane protein, with a cytoplasmic domain accounting for approximately two-thirds of the protein. The structure of the cytoplasmic domain resembles that of the GAF domains, Pfam: PF01590. SpoIISB binds to the cytoplasmic domain of SpoIISA with high affinity [1]. 27.00 27.00 32.30 32.20 19.30 19.10 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.41 0.70 -5.11 7 118 2011-05-12 10:56:17 2011-05-12 11:56:17 1 1 113 2 13 77 0 239.80 59 97.97 CHANGED lLFFQhhV..........WlllhuLhlY.VaAsWRaEtplcE+hhsIRKTWYhLaVhGuslaWTa-PpSlFTcWpcYLIlAVhFsllDAFIFLouYlpKluusE..hpTDTcpllEcNs-hL+hhhs+LKsaphLLKs-sIHlY....YGoh-AYhpGlccllttaA-KhslpAulh.assptsKDcLhcp..................hcpptslpscLsRp-VYYpppsKlVlIP.FolpsppaVlKLoS-sllTEFDhLLhsSLssIYDLlhs ...............NIRIG.......l..........FlLAIVFlVL.VF..FYW+NEELYEEKKQRI...RKTWYGLFIlSVTVYFMIKGID.LTLWKNLLMFTAMVIFVDIAFILTPNISEIWGAK....FSDIGKTVQSIKRSLIASKARGEIYTTIIQNVNPuVFGT..MEWHTEEEYTKSLNsFLDSYGEKIGAKIVVFEAAKELNTN..F.Rsh+sp.............................FSh.IlPlEaIEQLNEQ+AV..QVENVGIIP.AKIV.SDVFIVI.D...GKKNNLQDRDFENVYNLTIHHSYFS.K.... 0 2 8 9 +14014 PF14172 DUF4309 Domain of unknown function (DUF4309) Eberhardt R re3 Jackhmmer:O34960 Family This family includes the B. subtilis YjgB protein Swiss:O34960, which is functionally uncharacterised. This is not a homologue of E. coli YjgB, Swiss: P27250. Expression of B. subtilis YjgB is regulated by the alternative transcription factor sigma-B [1]. This family is found in bacteria, and is approximately 140 amino acids in length. 25.00 25.00 25.10 25.00 24.60 24.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.63 0.71 -4.18 15 166 2011-05-12 12:03:59 2011-05-12 13:03:59 1 4 156 0 29 141 1 131.10 44 57.89 CHANGED hlpslhchAhcGclss..ssFslspushpDVp+phGcP-psshsusuhshYtsatu.hsluFGasKsspIhElRSFssslc......ulTlpclcptlGpPspshTs....sc-pIh.....sY+sGppYcLcFVassssst......lDHlSlh .......h.lcsla-hAccGK..VP.N..ssFuspTusI--lcKsWGKs-+s-tuusu..hY.Aoass..+slsFG.aNKGuQlF-lRSacscLK.............uITLp-lcKsLGcPsplpss............uc-cIY.....VY+..lss....paEL+FlIscoTup..................lcHlSVh............. 0 10 22 22 +14015 PF14173 ComGG ComG operon protein 7 Eberhardt R re3 Jackhmmer:P25959 Family This family is required for DNA-binding during transformation of competent bacterial cells [1]. 25.00 25.00 25.30 25.00 24.90 23.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.89 0.72 -3.78 18 125 2011-05-12 12:37:24 2011-05-12 13:37:24 1 1 125 0 20 93 0 94.30 38 78.36 CHANGED hahsEpphhpppcphhch-pLlphulh-lppclt...pspptpssphpa.cGplsa.phscps.tshhplslpspsssGhphsspFhhshtspcIhc.WpE ............hhs-KpFhpEhEQpalh-ELLppulssl.K+-Lp.......pcEK.csshhFpYc+GcsSh.pashEs...-hIhVsLQCphK.ppshYpVsFpYc+KcpKIh-WlE.......... 0 4 12 14 +14016 PF14174 YycC YycC-like protein Eberhardt R re3 Jackhmmer:P37481 Family The YycC-like protein family includes the B. subtilis YycC protein Swiss:P37481, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 50 amino acids in length. There is a conserved HIL sequence motif. 27.00 27.00 28.70 64.30 22.30 21.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.66 0.72 -4.29 13 148 2011-05-12 12:50:22 2011-05-12 13:50:22 1 1 148 0 26 62 0 49.30 75 97.51 CHANGED MRPLQISsETAl+LucpLsVPLEplMHMPpHILlQKlhEL.................ptppppcccs .MRPLQISPDTAV+LSKALGVPLEQLMHMPQHILlQKLlEL.............EKpNK-.......-E......... 0 6 15 18 +14017 PF14175 YaaC YaaC-like Protein Eberhardt R re3 Jackhmmer:P37526 Family The YaaC-like protein family includes the B. subtilis YaaC protein Swiss:P37526, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 320 and 333 amino acids in length. 27.00 27.00 32.80 30.40 20.30 24.10 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.04 0.70 -5.47 27 147 2011-05-12 13:03:11 2011-05-12 14:03:11 1 2 139 0 32 142 3 284.50 38 91.65 CHANGED hpplthapohphsppaLtppapp..htpspppuhpss.shlaalcpupsaYctuptu.slpl+PlLLaYGhhpLlKAhLLstssshs.....pposlluHGloo+cp+cpsaphhpc......cV+lpcs...G...laschscplh.......thpthpsEc........ashtcLLtpIPElppha........ptpphh.lclthhppp.......hhhspphhsphp...hstpcFhphhpppppthh.........ppppcpthhhp.s........................hp.hhstslhhphpstpaal.h...tpsthhplPplhhHYLlLYsLShlsRYcs-hWt-LLpup.spchshIppFLshstcphPhhltphl ............................WppLshFhSsppsQcaLtcsYpc..shppuccpSacNshsFlYaLcHucsaYchhphu.PhsIpPhLLFYGhsQLhKACLLThDPsYP.....psToVLAHGVTTRKRKKQsYpFh-D......EVKlQ+N...G...LFsHhucpLF...................phcplpsE+........asMhcLhtpIPELpslF......hhppctthhhclpp.ppp.......lshs.pllcph+...MoppRFscYlpp.hpth...............tcppppplhFpss.pt.......................hpshhssslhach.sspYalPh....+s..t.hLPElllHYLLLYNLSMIuRYET-WWh-LLt.uasSc.-YshIhpFLslotpKhPhhlsphl........................ 0 10 22 26 +14018 PF14176 YxiJ YxiJ-like protein Eberhardt R re3 Jackhmmer:P42320 Family The YxiJ-like protein family includes the B. subtilis YxiJ protein Swiss:P42320, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 27.00 27.00 31.00 36.50 23.70 23.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.43 0.72 -3.93 8 95 2011-05-12 13:10:07 2011-05-12 14:10:07 1 1 90 0 7 61 1 103.30 58 87.22 CHANGED lpELc.+hp..LcpPFPscslp+lp.sh........-ssshssDFhpaaslluGSLSYVLss..K+IPcpQh+hLcKSFFEhYPQYc.LcscItpY.cLacclpsaEcTRcLLlt .......FpELQ.KM+SPLaKPFPspshpKlp+DhpshF....TEDDCIsADhNaYWMHTAGTLSYVLNNNEQcIsFpQIKWL+KSFFEWFPQYpFLETEIhcYPILYRDFhsYEKTRKLLLY..... 0 2 6 6 +14019 PF14177 YkyB YkyB-like protein Eberhardt R re3 Jackhmmer:P42430 Family The YkyB-like protein family includes the B. subtilis YkyB protein Swiss:P42430, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. There are two conserved sequence motifs: NRHAKTA and HLG. 27.00 27.00 28.50 27.50 19.50 21.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.83 0.71 -4.04 9 140 2011-05-12 13:17:39 2011-05-12 14:17:39 1 1 137 0 20 74 0 135.60 65 88.32 CHANGED Tl-NLupAlFTVNRHAKTAhNP+aLYhLKK+AlpKLlcEGKA+KlGLHFSpNP+aSQQpSDVLVplGs......YaFHlPPsKcDFcpLPHLGsLspSaRNP+s+MsLspAKpLLppYsGhKpc..pss+...ppppYpKPsa++LG- ........s.IspLAQuIFlVNRHAKAATNPKYLYhLKKpALE+LIs.EcKAlKEGLH...FSR.NPRFS..QQQSDVLI+LGD......YFFHIPPTKEDF+hLPHLG+LE..SS..YRNPK..TTLSLTlAKKsLQ-YlG.cshc.ppp+..scss.............hh............ 0 1 11 14 +14020 PF14178 YppF YppF-like protein Eberhardt R re3 Jackhmmer:P50834 Family The YppF-like protein family includes the B. subtilis YppF protein Swiss:P50834, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There is a conserved LLDF sequence motif. 24.70 24.70 24.70 43.70 23.80 24.60 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -8.73 0.72 -4.44 9 125 2011-05-12 13:26:50 2011-05-12 14:26:50 1 1 123 0 15 45 0 59.80 62 92.84 CHANGED Msls-L+ppFhphKpYpPtshNELLDFAR+hYlcGcIsIs-YRsLl+ELEtsGAspPspp MVLGDLKQAFSQKKGYtTENsNELLDFARHhYLEGKICISDYRTlIKELEINGATKPTT.h... 0 1 6 9 +14021 PF14179 YppG YppG-like protein Eberhardt R re3 Jackhmmer:P50835 Family The YppG-like protein family includes the B. subtilis YppG protein Swiss:P50835, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 115 and 181 amino acids in length. There are two completely conserved residues (F and G) that may be functionally important. 27.00 27.00 28.90 38.40 22.20 21.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.93 0.71 -3.47 18 134 2011-05-12 13:35:43 2011-05-12 14:35:43 1 1 133 0 20 101 0 128.70 55 86.65 CHANGED psaptsh.sahP.t...........................tp.pPYhp.s.pt...............................tPs.s.p.Ptttph..h.s..h....pPY.Pp.ps.h..p.P....sphpShhuQFKsp-Gs..aDhNKMhsTsGQhhsssN.......QVuullKGlsuhFK ..........................................p.tsYh..p.......................................................Q.pQPYhspt........t...................................hP..s.sPYsNQQuMFYPP.K............QPY.PTtsKQK....QQQP.....SQFSSFVSQFKsSDGN..YDVNKMMNTAGQMMNAMNQVTGIVKQVGGFF.u....... 0 3 13 15 +14023 PF14181 YqfQ YqfQ-like protein Eberhardt R re3 Jackhmmer:P54474 Family The YqfQ-like protein family includes the B. subtilis YqfQ protein Swiss:P54474, also known as VrrA, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 146 and 237 amino acids in length. There are two conserved sequence motifs: QYGP and PKLY. 27.00 27.00 27.20 29.10 23.30 26.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.35 0.71 -4.20 17 138 2011-05-12 13:47:31 2011-05-12 14:47:31 1 1 131 0 22 119 0 165.80 41 84.95 CHANGED tGhLu+Lhstu..............thtu........................................................uuusts.suussuu...htshhs..slosMLsNlQKsLulAQpVsPM...VQQYGPLlRNLPuMh+l...a+pLssucs....tsppsspcssspppppp..t.psppppcpccph.pspppctppcppsst.p.......................SpPKLYl ..........................................tGhLu+hh.tt...........s.pthtu...................................................................htssts+GuAsss.....AuuSuGhGuhhuNLhS..N..sosMlNNlpKV.pVsQoVuPh...VpQYGPlhRNLPSllKI...hpthpSscp......spEspsEchoEpsEstossss........ppKc++KKhl.csshc+ph.c.psspphs......................oKPKLYl..................................... 0 4 14 16 +14024 PF14182 YgaB YgaB-like protein Eberhardt R re3 Jackhmmer:P71080 Family The YgaB-like protein family includes the B. subtilis YgaB protein Swiss:P71080, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 27.00 27.00 37.50 37.20 24.10 23.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -9.79 0.72 -3.74 11 129 2011-05-12 13:57:44 2011-05-12 14:57:44 1 1 129 0 14 60 0 78.00 63 90.65 CHANGED pF-+LVuEQhcTMDcLLpLQSELERCQpIE+pLhchpccscLpsIppEIppp+ccL+pIQchFpKQTEpVIpSYppp.....Eh ............DFDKLVGEQLETMDELLKLQuHLEKYQQIEhsE+-pCDKKELHFIRQEIYRTElALKhLHEKFEcQTNsVIQSFcTEK.h... 0 1 6 8 +14025 PF14183 YwpF YwpF-like protein Eberhardt R re3 Jackhmmer:P94588 Family The YwpF-like protein family includes the B. subtilis YwpF protein Swiss:P94588, which is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 146 and 167 amino acids in length. There is a conserved IIN sequence motif. 27.00 27.00 52.20 129.70 23.70 22.30 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.57 0.71 -10.57 0.71 -4.45 21 269 2011-05-12 14:41:30 2011-05-12 15:41:30 1 1 269 0 30 86 0 132.30 70 88.58 CHANGED KTFKLlsLpll...pcpt.....php-hsLhDGLIINKEcupspWLlEsllsppatshFcphhppppplplpVsIT+tsN-PAthhssV+sIscl...scplSVLh-Gpllpp+.pshuEplLcsLlcEGlsG-pLlcp.FKpph ..KTFKAVRFQIV...NEHG.....RIIEYELEDGVIINKE-SGTGWLLEIVISNEHYETFKEYQDNEQLLDIRVVITRPANDPALF-oTVKSIKNF...KTTMSIVFECHIY.TLR.QQYAESLLEQLIDDGLSGEELKKoFNRMM... 0 7 17 26 +14026 PF14184 YrvL Regulatory protein YrvL Eberhardt R re3 Jackhmmer:Q7WY71 Family YrvL prevents expression and activity of the YrvI sigma factor. It may function as an anti-sigma factor [1,2] 27.00 27.00 27.40 27.20 22.70 21.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.77 0.71 -4.36 16 154 2011-05-12 15:00:41 2011-05-12 16:00:41 1 1 131 0 16 108 1 118.50 39 91.84 CHANGED htphhhlhlhhslhhhllhuhhhhshsulFplhGhpYpShsullLFhllhhlL...uhhl-hht+sllpslhph.hhsphhhhlLhhhl-hhhsahslahsDpahcSlplsshsclllulhhallshhhsp..............cpp ............h..hhhlh.hhhlhlhllFuh.hFFlhhGlFplhGlpYpShsuLLLFhLlhhhL....shhh....hh+hllhshtpt..hsphlslhL..hull-lhhsWhsIHhAD-aI-SVplSshsElslsLhhalLsKhhsc.....c.....ct......... 0 5 9 11 +14027 PF14185 SpoIISB_antitox Antitoxin SpoIISB, type II toxin-antitoxin system Eberhardt R re3 Jackhmmer:O34800 Family Members of this family act as antitoxins. They bind to the SpoIISA toxin, Pfam:PF14171. They are disordered proteins which adopt structure only when bound to SpoIISA [1]. 27.00 27.00 48.60 48.40 24.20 23.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.84 0.72 -4.22 5 24 2011-05-12 15:08:58 2011-05-12 16:08:58 1 1 24 2 5 17 0 55.60 70 98.31 CHANGED MEpAFQscpps.++tpPFKhlKK+SpTSlAcYcVSPHTcRIFKcNERLIDEYKpKKA ..........MEpAFQNspp..+tAKPFKhhKKRSpTSlASYQVSPHTARIFKENERLIDcYKpKKA. 0 1 2 3 +14028 PF14186 Aida_C2 Cytoskeletal adhesion Coggill P pcc Zhang D, Aravind L Domain This is the C-terminal domain of the axin-interacting protein family, and is a distinct version of the C2 domain. This domain is critical for interactions with cytoskeletal in the context of cellular adhesion points [2]. 25.00 25.00 40.30 27.90 23.70 22.10 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.66 0.71 -4.95 11 105 2012-10-10 12:23:49 2011-05-12 16:14:55 1 12 62 3 71 90 0 136.50 53 24.73 CHANGED uohhPshsppsGsstLslhI-+lu.lKDAspahsPahoVoVhDssGpslpssQ-TPlu..pp+sssalhFs.sslplQsslcchsc.GuAlhhEF+HYKsKK+KhSs+CWuFMEhD-l+sG....ssslElYtKPTDh+R.KKlpLlotKshaL+lphp .......................................uoLlPtlsppPGhThLol+IEKIG.LKDAuph..ID..PahTVS.VKDhsGhDLsssQDTPVA....s++E-sYlaFs.sslclQ+pl....EKLsK.GAA.IFFEFKHYKPK..KRhTSTK...........CFAFMEM..DEIKsG............PlVIELYKKPTDFKR..KKLpLLTcKPLYLHLp.o.............................................. 0 20 28 43 +14029 PF14187 DUF4310 Domain of unknown function (DUF4310) Eberhardt R re3 Jackhmmer:B0P5U5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 214 and 231 amino acids in length. 27.00 27.00 29.30 29.30 19.60 18.80 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.47 0.70 -4.83 9 318 2011-05-13 07:18:32 2011-05-13 08:18:32 1 2 315 0 37 122 2 207.80 80 95.54 CHANGED ppp+sFWaADWoFPlhVulhuAulFAGTHMYhsYGlGAFNElAlVAMLcAGlssGsYGAAAAFGASFLFARILEGsLVGILDlGGSl.TGlGlGlPAlLLuuGhstslsNFsLSLlTGuVlGLlIGhlIIhlRKhTlsQusSTaGADlMMGAGNsoGRFLGPLIILSAhsASIPlGIGShlGAAlFYhacKPIAGGAILGAMlhGulFPl ..cpsKuFWYADWSFPIFVGLLSuGVFAGTHMYYLYGlGAFNEVAFVAMLKAGhDTGsYGAVAAFGASFLFARIIEGSLVGILDIGGAIQTGVGLGVPALLLGAGhlaPVuNFhASLlTGLVlGLAIGYIIILARKFTINQSsSTYGADVMMGAGNsSGRFLGPLIILSAhoASIPIGlGSLlGALLFYIWpKPITGGAILGAMILGuIFPl......... 0 4 9 23 +14030 PF14188 DUF4311 Domain of unknown function (DUF4311) Eberhardt R re3 Jackhmmer:B0P5U6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 260 amino acids in length. 27.00 27.00 98.70 98.40 23.00 22.70 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.23 0.70 -4.83 9 329 2011-05-13 07:36:43 2011-05-13 08:36:43 1 1 314 0 38 136 0 202.20 82 82.56 CHANGED ARMFHAPssQuMGAFRTLGELNACpGDPhSHFSFGLGFhFNAWASsVGAGAhTQDVpHRIIPNWAAAsLLhKNKsVpETL+DPKKMuIuGAlIGslVVshLNsTAuuIPESLQslAscVLVPAANhLINsVMPIlFWLAAlDAGK+TGhWGTlhGGLupLIMGNAVPGlVLGILIGKGVD-SGWN+lTKsMhsslllLFllSGFFRGFDhpLl ......ARMFHAPTTQGMGAFRTLGELNSCEGDPASHFSFGLGFFFNAWASSVAAGuFTQDVDHRIIPNWuAAALhlKNRNVu-TLHDPKKMAIAsAlIGMlVVsFLNhTASuVPEALQVTAVKVLVPAANlLVNhVMPVIFWLAAIDAGK+SGFWATlFGGhAQLIMGNAVPGLVLGILIGKGVEESGWN+VTKlMMsAIVLLFVLSGFFRGFDhKMI....................... 0 4 9 24 +14031 PF14189 DUF4312 Domain of unknown function (DUF4312) Eberhardt R re3 Jackhmmer:B0P5U7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 99 and 118 amino acids in length. 27.00 27.00 60.20 59.90 18.70 16.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.53 0.72 -4.15 20 309 2011-05-13 07:57:18 2011-05-13 08:57:18 1 1 308 0 38 108 1 84.80 62 80.17 CHANGED phspoVpVsGKGcoKpcAFAsALuplQ+pVl+ssspllLRIEPh-VpllcAcEpspsEKFLFFFhPRcRppYpVcL-loVsVThI ....phTTTVpVpGKG-sKs+AFAsALNcVQssVh+E.oshlLLRIEPpDVcIlQA+EsVppEtFLFFFL.RcR+oYuVEL-VTVsVTAI.. 0 4 9 23 +14032 PF14190 DUF4313 Domain of unknown function (DUF4313) Eberhardt R re3 Jackhmmer:B0P650 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 136 and 171 amino acids in length. 27.00 27.00 28.00 34.50 22.30 19.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.20 0.72 -4.32 20 120 2011-05-13 08:28:26 2011-05-13 09:28:26 1 1 84 0 7 94 5 103.80 32 61.26 CHANGED lpLpVssYhssss....LhItLhpp-...psphEsausLTVNLsshh.....spAFIsssss.c-hhpFlpcppLucsoGhptpSGaspYsthtFshc+LpcLDP-GhpcYtcphs ......hhlspYhsNst....LhlsLhs.c....tp.E.assLTVNLsshss.....sscuFlssNps..s-hhcFlpcNcLucso.....Ghpt+SGaspYshhhFslscLtchsPcshpchttt.s..... 0 2 7 7 +14033 PF14191 YodL YodL-like Eberhardt R re3 Jackhmmer:B0P785 Family The YodL-like protein family includes the B. subtilis YodL protein Swiss:O30472, which is functionally uncharacterised. This domain family is found in bacteria, and is approximately 100 amino acids in length. There are two completely conserved residues (Y and D) that may be functionally important. 24.00 24.00 25.10 24.50 23.10 22.60 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.38 0.72 -3.53 50 285 2011-05-13 08:42:40 2011-05-13 09:42:40 1 22 147 0 33 244 34 101.40 33 14.67 CHANGED tcsasIYQL+t..tscstsh+Fhsh-pLpppGh......pschpsYchVYsutlp........spsLEslap+FNhc+.P.tDacGHSLSlSDllhLppsGcsosaYVD.shGFpplstFh .......................t.csauIYQlpt...spstsh+F.sh-tLpttsh......plctpsYclVYsusLs.........spsL.Ecl...appFNl..c+..P.tD........acG+SLSVSDlVslcp..sG.c..sosaY.l.D.ohGFpplstF.............. 0 20 28 29 +14034 PF14192 DUF4314 Domain of unknown function (DUF4314) Eberhardt R re3 Jackhmmer:B0P797 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is typically between 56 and 93 amino acids in length. 24.00 24.00 24.30 24.40 23.90 23.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.42 0.72 -3.74 61 138 2011-05-13 08:50:07 2011-05-13 09:50:07 1 4 85 0 31 127 2 97.20 22 44.82 CHANGED stGoRshLht...................hs.....cs.t.h.tusp..........................................usl.tVD....................Dhupl................p....................sh........hDs.GpuL............sll.Gc-hhphl ........................................ssRshhhh...................hs..........ss.t.ltpt.sh....................................................usl.sV-....t..hhssht....t..sttpl................p............................hh..h..st.hDs.Gpuh............tl..Gp.hhph......................... 0 22 28 31 +14035 PF14193 DUF4315 Domain of unknown function (DUF4315) Eberhardt R re3 Jackhmmer:B0P8U3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. 27.00 27.00 27.20 27.20 26.70 26.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -9.83 0.72 -4.06 19 219 2011-05-13 09:28:17 2011-05-13 10:28:17 1 1 101 0 16 153 27 84.00 38 94.37 CHANGED sKlcKlptEI-Ks+pKIsEhQu.......+L+ELEppKTEhENLEIVplVRuhsho.p-LsshLpth.........uGpssPtst.......ppc-ttcEc .....sKl-+Icp-IcKs+...cKIsEhQp.......+L+pLEspKsEtENl-IVphVRuh+hoPppLsAhLpsh...................shtssPtsps.t....ppEcp....t.................................. 0 11 16 16 +14036 PF14194 Cys_rich_VLP Cysteine-rich VLP Eberhardt R re3 Jackhmmer:B0P8U5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. It contains 6 conserved cysteines and a conserved VLP sequence motif. 27.00 27.00 29.30 29.30 22.90 21.90 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.02 0.72 -4.23 20 192 2011-05-13 09:48:24 2011-05-13 10:48:24 1 3 84 0 20 140 9 54.90 55 41.26 CHANGED TtpQ+++lptLl+phCsNY.Ds..GNCLhLD-G-spsCsQsIS.aSlhC+YFRpAVLPtD .......tQp+psppLl+tpCCNYDs..GNClhLDDG.-sCsCsQoIS.aSlh..C+WFRtAVLPhD.. 0 11 20 20 +14037 PF14195 DUF4316 Domain of unknown function (DUF4316) Eberhardt R re3 Jackhmmer:B0P8U6 Family This domain is functionally uncharacterised. This domain is found in bacteria, and is typically between 56 and 95 amino acids in length. 25.00 25.00 25.20 25.20 24.40 24.70 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.70 0.72 -3.90 15 205 2011-05-13 10:04:25 2011-05-13 11:04:25 1 13 99 0 14 169 30 83.90 37 10.96 CHANGED +pNPLKsAElohE...QNYNMlDGllNN....sPsh.....................................s--KPSlhD+LKpspp+pcu+c....................ccEp ......-NsLKsAEhshE...pNYNMIDGlINN.....tsPohs-lEtt.pt..................................................................tt.pttp.ccKsSlhcpL+stpcpppp.+p.......s..ppp.....p............................................................................ 0 9 14 14 +14038 PF14196 ATC_hydrolase L-2-amino-thiazoline-4-carboxylic acid hydrolase Eberhardt R re3 Jackhmmer:B0P9R4 Family This family of enzymes catalyses the conversion of L-2-amino-delta2-thiazoline-4-carboxylic acid (L-ATC) to N-carbamoyl-L-cysteine [1]. It cleaves the carbon-sulphur bond in the ring structure of L-ATC to produce N-carbamoyl-L-cysteine [2]. 25.50 25.50 25.60 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.26 0.71 -3.81 62 369 2011-05-13 10:27:15 2011-05-13 11:27:15 1 8 272 0 100 357 53 152.10 18 73.90 CHANGED lt+cth.............................hpAthhthlhcpht.cchup..cpuppll.pcAlpphut....ttGps.httph.t.....ssshp.sFts.hhhshtpssshc...chhc..........tscschthchpcCshscha+chGh..pphthhhCch....Dtshspuas....slc.hp+spTlscGsspCcFpa ............................................................................tththht.......................A.hhh...hhctht..cphs....ppspthh...tculp.t.huh.........ptucp..hstt..t...........pshp..patp...ht...htht..t..s..ts.hc.....chhp..........ssc..p..p..hthchpcCshschh+chGh....tcht.hhCph....Dt.shhpsht.........slp.h.p.R....s....pT.lu.p.G.sspC-apa................................... 0 56 76 86 +14039 PF14197 Cep57_CLD_2 Centrosome localisation domain of PPC89 Wood V, Coggill P pcc Jackhmmer:Q10218 Family The N-terminal region of the fission yeast spindle pole body protein PPC89 has low similarity to the human Cep57 protein. The CLD or centrosome localisation domain of Cep57 and PPC89 is found at the N-terminus. This region localises to the centrosome internally to gamma-tubulin, suggesting that it is either on both centrioles or on a centromatrix component. This N-terminal region can also multimerise with the N-terminus of other Cep57 molecules. The C-terminal part, Family Cep57_MT_bd, Pfam:PF06657, is the microtubule-binding region of Cep57 and PPC89. 27.00 27.00 27.60 27.60 26.80 26.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.10 0.72 -4.03 24 78 2011-05-13 10:39:40 2011-05-13 11:39:40 1 5 64 0 67 80 0 72.30 30 8.30 CHANGED p+L-upp.sL..ps+L.DhhpR+sphp-....tthKpLsp...ERDtshppLus.Ah.csp-L+sE.-sLcpE...........Lcpph.tp ....cLEsphtsL..Qs+LDphsR+sssp-....tp.cpLsp...ERDpshppLs..AhtcspcL+pEh-sLppE...........Lcphp...................... 0 13 34 55 +14040 PF14198 TnpV Transposon-encoded protein TnpV Eberhardt R re3 Jackhmmer:B0PBU6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 114 and 125 amino acids in length. 27.00 27.00 27.10 28.20 26.10 26.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.24 0.72 -4.17 56 477 2011-05-13 13:11:50 2011-05-13 14:11:50 1 2 167 0 34 365 68 101.40 42 88.22 CHANGED sYphhGD.Y...........alPsLp..lsc-pc....sl............G+YGch++pYL+Ea+sshYspLllsGcLhp+Ls-l-cpAp-ph-hllpQhtcppGlTEpLKspspMcWVppMNsl+spAEEIVhpEll ....................................pYh..GD.Y...........hlPsLplsppcp....sl..............GtaGph+ppYL+ca.+.shYspLlhoGc.LhsaLs-lscpAp-Rh-tllcpMtptpGl...T.EpLKspstMcWVtphNsI+spAEEIVhpElI........... 0 17 30 33 +14041 PF14199 DUF4317 Domain of unknown function (DUF4317) Eberhardt R re3 Jackhmmer:B0PBX6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 225 and 451 amino acids in length. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 36.80 27.90 21.50 26.40 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.04 0.70 -5.67 45 203 2011-05-13 13:17:54 2011-05-13 14:17:54 1 2 155 0 33 197 9 334.90 34 95.75 CHANGED M...N......KK-lsEI.+KpF+.-s..ssIo+lsGCYVst-.+-lhpphp-sFlsLp-EEh.KYLpIFKKsLSGolGcNLl-lcFsh..................cp-tpuupQphLhpL+pSpLcD-..thl-cFYc+lIcsYsY.ssNalIllh+ssYDlPsKssDs.pph.DsS-EVY..caILCoICPVpLsKsuLsYptp-NphcsRh...pDWlVpsPptGFLFPAFsDRusDlaslLYYo+csc-.c.cFlEplLGsshshTAt-QK-sFpsIlc-slG--sch..-slpslaEpLschIE-pc.t.....psEP.htLspp-lcclLppSGVs--ph-ph-psac-shss.......................csshhAsNll...soKplcl.........................................................................................................................................cT.DlslpVsPc+schVcophID.G++CLlItl..s-psplNGls .....MNKK-lhEl.++..paphsp...ssho+lsGCYVct-.pphhtphppsFhpLsp-Eh.K..YlclhKKsLoGslspNLhchpFsh..................pppttsup.....QphLhpL+pSpLc--....tLh-pFYcpll.-sY..pa.s..........tsahIllhHstYDlPsKssDs.tph.-.tS-EVY..callCulCPVpls+.uLta..tppphtsch....shhlphPt.GFlFPuFpDRssDlpplhYYoppspp.p.phh-phLsst....oAttpKtsFptllpcshupc.ph..chhhpl.-plsph..l-ppp.......p.-s..hLs.pplpplLtpuGl.p..htph-ptatp.h.t.......................p...hApslh...ss+phph................................................................................................................................ps.sl.lplpPpc.h..lcp.hlc.GpchllI.l..s-psplsGh........................ 0 20 32 33 +14042 PF14200 RicinB_lectin_2 Ricin-type beta-trefoil lectin domain-like Coggill P pcc Jackhmmer:Q8X123 Domain \N 35.10 35.10 35.10 35.10 35.00 35.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.72 0.72 -3.66 49 1501 2012-10-02 19:42:32 2011-05-13 14:20:14 1 251 412 51 644 2662 47 100.70 22 25.03 CHANGED ssQpWphp....h....s..........ss.....t.......aplh...ss..s....o....uphLssss.ss.tpustltth.s.s.......sssppWplps.ss..s..G..t.apl.hsh...ss.s....h.sLc.lts.s.s.ss......susslhhaph...ts.ss...sQpWphp ..........................................t.............................su........h...........a.p.lh...st..t.....o.....GpsL...sl..ss..ss....s..s...sGs.p.l...tta.ssss......sssQpWpl.....ss....su..............s....G.....h...apl.tst........so.s.......................hsLD...lts..u..u..ss........sGs.s..lh.as......ss...ss.....sQpWph...................................... 0 285 489 608 +14043 PF14201 DUF4318 Domain of unknown function (DUF4318) Eberhardt R re3 Jackhmmer:B0PDG9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. There is a single completely conserved residue F that may be functionally important. 27.00 27.00 27.30 27.50 25.30 24.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.37 0.72 -4.15 13 131 2011-05-13 13:28:57 2011-05-13 14:28:57 1 1 104 0 4 69 0 70.70 52 79.81 CHANGED +KuFhI-L-DuhpYPoscsICpAlEpYstcsppslcFlu+scPl.hhl-sshYElcl...phuRGsYa.lpC+El ..........pKSFFIELDDuLTYPSuEs........IsoAIEpYssEsNEpL+FESKsKPIhFYL-ss.hYcsEl...+MARG.GYY.ISCpEV........... 0 1 3 3 +14044 PF14202 TnpW Transposon-encoded protein TnpW Eberhardt R re3 Jackhmmer:Q9L782 Family This family of proteins is found in bacteria. Proteins in this family are typically between 54 and 75 amino acids in length. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 26.90 26.30 24.00 23.60 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.21 0.72 -7.44 0.72 -4.36 31 347 2011-05-13 13:45:03 2011-05-13 14:45:03 1 2 153 0 30 225 25 36.90 36 58.15 CHANGED sshp++IGpTTYhVplpFscsupEThpDKlpRllcs- .....hhpp+IGposahVplaFscsus-ThpDKlt+llctE......... 0 16 28 28 +14045 PF14203 DUF4319 Domain of unknown function (DUF4319) Eberhardt R re3 Jackhmmer:B0PDI8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 70 amino acids in length. There are two completely conserved residues (E and K) that may be functionally important. 27.00 27.00 27.30 35.90 22.70 25.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.64 0.72 -8.85 0.72 -4.43 27 181 2011-05-13 14:14:01 2011-05-13 15:14:01 1 1 81 1 7 129 22 64.30 37 90.68 CHANGED pFTlEEpNLhslYps.uoRpphIcslpth.hsah...Ds-.....MtpLsppslsKLpshoDsEaspLplhss .......pFThEEhNLhslYss.......uoRpslI-slpth.hs..h....-s-......hRcLsspslsKLpuMTDu-FucLplhs.............. 0 4 7 7 +14046 PF14204 Ribosomal_L18_c Ribosomal L18 C-terminal region Coggill P pcc Jackhmmer:Q9FL18 Domain This domain is the C-terminal end of ribosomal L18/L5 proteins. 23.00 23.00 23.10 24.20 22.50 22.50 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.23 0.72 -3.43 90 635 2011-05-13 14:31:27 2011-05-13 15:31:27 1 8 464 10 360 612 6 87.50 41 31.06 CHANGED AEshRcaIaGtHVA-YMcpLpE-D-EpY+pQFSpYIK..pGlsADslEchYpcAHtAIR......tDPsh.ccpp...........pcph.hpp....K+apppKLThcpR+pRVtpK ..........AEshRcaIhGtHVA-YMctLtE-...D..E......EtY+cQFSpYIc...pulss-sl.E-hYccAHsAIR......ts.Psh.ctp.......................pc.p.h...pp..........K+apt.Klohtp++s+ltt......................................................... 0 121 192 283 +14047 PF14205 Cys_rich_KTR Cysteine-rich KTR Eberhardt R re3 Jackhmmer:B0PDJ5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are approximately 60 amino acids in length. There are 4 conserved cysteines and a conserved KTR sequence motif. 24.50 24.50 25.00 24.90 24.00 23.60 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.82 0.72 -4.51 23 165 2011-05-13 14:39:52 2011-05-13 15:39:52 1 3 119 0 10 88 7 52.00 56 84.28 CHANGED pscWlLCPlCGNKTRsKIRcDTlLcNFPLYCPKCKQEsLIsVcpLpIoVIKEP...DA .......t.WlhCPlCGsKTRlKIRcDT.L+NFPLaC..PKC+pEsLIclc.phcloVIpEPDA......... 0 7 9 9 +14048 PF14206 Cys_rich_CPCC Cysteine-rich CPCC Eberhardt R re3 Jackhmmer:B0PDW3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 68 and 104 amino acids in length. There are six conserved cysteines and a conserved CPCC sequence motif. 24.50 24.50 24.90 25.00 24.30 23.20 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.02 0.72 -4.42 25 243 2011-05-13 14:50:38 2011-05-13 15:50:38 1 7 210 0 43 194 2 68.40 37 67.44 CHANGED patCPCCGYhTlspcsst...a-ICslCFWEDDslphpss-ht.uGuN.clSLp-ApcNFtcaGAC-pchhph.VRpPts.--h ................hpCPCCGhhThp..s..........a-ICslCaWEsDsh............t.tps........s.................h........t..uGsN.phoLpcA+pNahthts...................h.................................. 0 15 33 40 +14049 PF14207 DpnD-PcfM DpnD/PcfM-like protein Eberhardt R re3 Jackhmmer:B0PE77 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 57 and 153 amino acids in length. There are two completely conserved residues (E and A) that may be functionally important. 25.00 25.00 26.80 26.10 24.00 23.10 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.12 0.72 -4.21 18 267 2011-05-13 15:01:36 2011-05-13 16:01:36 1 2 239 0 12 86 7 48.60 53 41.50 CHANGED K....YcVcIsETLp+hVpVcAcoc--AhphspchYpsp-..IVLss-DFps.s- .....KKYsVEIsETLSRlVSlEAE.s.D.EAccLVc-pYpspE..IVLDADDFpsh-................ 0 7 10 11 +14050 PF14208 DUF4320 Domain of unknown function (DUF4320) Eberhardt R re3 Jackhmmer:B0PFN6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 120 and 131 amino acids in length. There are two completely conserved residues (G and Y) that may be functionally important. 25.00 25.00 25.10 25.20 24.30 23.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.45 0.71 -10.13 0.71 -4.14 20 145 2011-05-16 07:46:53 2011-05-16 08:46:53 1 1 99 0 15 97 0 116.10 30 89.67 CHANGED llLsshllIALuVclhPlalsKppLDsFAsELVRpAEluGplu.......sETsp+ttsLcE+TGlsPp..lpWS............psG+IQLNp-lsVTlThchclGlFu....sFu...SFPlTlpApAoG+SEVYaK ...hlhhulllIsLhspsshhhhph.chcsaAsphlc.AEpsGGhs.......scssshltslp.c+.httpsh....hsWc.-..........ppG+lpaNpslshplpuchc.hhlFt....shs...shclslpAptsGhuplYa+................ 0 11 15 15 +14051 PF14209 DUF4321 Domain of unknown function (DUF4321) Eberhardt R re3 Jackhmmer:B0PGC7 Family This family of proteins is functionally uncharacterised. It is found in bacteria, and is approximately 50 amino acids in length. 27.00 27.00 27.50 27.50 25.90 25.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.15 0.72 -4.27 43 156 2011-05-16 08:00:46 2011-05-16 09:00:46 1 1 155 0 48 130 27 48.70 31 56.63 CHANGED hlsaLshshs..lGhsss......slDLtllplohG.lslclNlhoIlGlllAlhlY ............lsaLshshs..lGhsssh.....sLDLtllploFG.lslclslhoIlGlllAlhlY..... 0 29 43 46 +14052 PF14210 DUF4322 Domain of unknown function (DUF4322) Eberhardt R re3 Jackhmmer:D2PCT9 Family This presumed domain is functionally uncharacterised. This domain family is found in archaea, and is approximately 60 amino acids in length. There is a conserved QTV sequence motif. 27.00 27.00 28.20 33.90 20.30 22.60 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -9.13 0.72 -4.50 7 162 2011-05-16 09:56:38 2011-05-16 10:56:38 1 1 13 0 20 159 0 58.40 72 22.23 CHANGED hTPsh.pp.shpQIsaKLLShlsFpGRKuEEVpKsLVSAuLhpDSVENKuptaslSPQTVRNYsEE ...lh..LPHQNNlQQIGYKLLSMLNFpG+KuEEVA+TLISACLWNDSVEsKSRAYsVSPQTVRNYVEc............ 0 18 18 20 +14055 PF14213 DUF4325 Domain of unknown function (DUF4325) Eberhardt R re3 Jackhmmer:C4FKE5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 99 and 341 amino acids in length. 27.00 27.00 27.10 27.40 26.80 26.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.24 0.72 -4.33 60 205 2011-05-17 10:57:51 2011-05-17 11:57:51 1 5 197 0 61 187 21 71.20 22 43.03 CHANGED ppupclhpp.lpphlptst...VhlDFpul.phhspuFhsphhuplhtphs......tthcpplphhshscsspthlp+.lhsp ................pupplhpp..l.....t....phhpttp....lh.lDFcGl.stlusuFhcEsFushhp.c..hs.........t.hcpplphh..shspphpthlth.h................................ 0 19 43 52 +14056 PF14214 Helitron_like_N Helitron helicase-like domain at N-terminus Coggill P pcc Jackhmmer:Q9S9S3 Family This family is found in Helitrons, recently recognised eukaryotic transposons that are predicted to amplify by a rolling-circle mechanism. In many instances a protein-coding gene is disrupted by their insertion. 28.70 28.70 28.80 28.80 28.40 28.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -11.11 0.71 -4.56 42 666 2011-05-17 12:28:46 2011-05-17 13:28:46 1 43 74 0 525 652 26 155.40 26 14.84 CHANGED Ya..sa+l..phRpsphshhhp...st+LhpQa..hVDtaspl-ps..RLpalp..p..pQpplRschhpulpDA............hppups.cs...sphGc+ll.LPuSasG.u.RahhppYpDuh.....Als+taGhPclFlTFTs.Ns+WsEItct...............lt........sppspD.....................RPDllsRVF+hKlcpLhc-lhpp...phFGp.shs.hhaslEFQKRG.LPHsHlLla ...........................................................................................................h...h.....hp.....h.pt....phthht..........pp.tph.....pt.................h.tt..l.pt...................................ht.tttt..ph......p.t...h...pphh..hstshhG.....s...........phhhpph..-sh.....Ahspp....hGt..P.slFl.Thos.s....s....p.Wt-.l...hph...................lt...........tp.psp..D...................................cPshssRh..Fph+......hpthhp.lhtt...................thh.Gp...l.t.hhhhhEaQtRG.sHhHhll.......................................... 0 194 351 473 +14057 PF14215 bHLH-MYC_N bHLH-MYC and R2R3-MYB transcription factors N-terminal Coggill P pcc Jackhmmer:A8MSG2 Family This is the N-terminal region of a family of MYB and MYC transcription factors. The DNA-binding HLH domain is further downstream, Pfam:PF00010. Members of the MYB and MYC family regulate the biosynthesis of phenylpropanoids in several plant species (DOI:10.1007/s11295-009-0232-y). 22.20 22.20 22.20 23.20 21.70 22.00 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.04 0.71 -4.04 95 661 2011-05-17 12:29:08 2011-05-17 13:29:08 1 12 127 0 262 681 0 164.70 31 30.79 CHANGED Lp....ppLpsllp.......stpWsYAlFWp.s..........................................lLsWuDGaap.us.p.t.t......................t.t.t+pp.hpch.p.hs.........................................hth...-lsssEhaahssh.asa.....GpG...........lsG+shssupphWlssspp........h..ssphhs.........R.uh.Ap.tuh...p..............Tllsl..Ps..s.GVlELGSochlhEshshlppl+shF .....................................................................................pptLthhsp.......s.p...WoYulFWphsspp...........................................................hLh.WsDGaas.Gshcscpt...........t...................ththpRsctLc-Lht.h..s...............................t.sssshss--ls-sEha..alhshsasF.........GpG.....................LPG+shusspplWlssspp................................s....ssphht...........R.uhhAp..tuh......................TlVslPh..hsG..VlELGoTchl.E-.shlppl+shF................................. 0 48 172 220 +14058 PF14216 DUF4326 Domain of unknown function (DUF4326) Eberhardt R re3 Jackhmmer:A8ZQW0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 100 and 162 amino acids in length. There are two completely conserved residues (P and C) that may be functionally important. 25.00 25.00 27.30 25.30 24.00 24.00 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.28 0.72 -3.41 43 174 2011-05-17 12:59:55 2011-05-17 13:59:55 1 3 156 0 61 180 43 92.70 29 68.43 CHANGED splspp.+ttphss...........t.h.YlGR..........sop.....W.GNPFshsp.................sssR........ppslcca+pal.................tppshhhptlt.cL.......+G.+.sLuCaCs......P.........CHuDVLt-l .......................................p..ht.ths...........h..al.GR..............soh.....a.GNPFthtt..............................................ttsR.........ppslcta+palh..................................tssphhhptlt..pL.......c.G...+..pLuCa.Ct..................P.ts.............CHuDVlhc................ 0 19 47 56 +14059 PF14217 DUF4327 Domain of unknown function (DUF4327) Eberhardt R re3 Jackhmmer:B0BYV4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. 27.00 27.00 75.80 75.60 21.30 18.80 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.14 0.72 -4.16 39 113 2011-05-17 13:09:37 2011-05-17 14:09:37 1 1 41 0 44 101 0 67.60 45 88.51 CHANGED paol-hIp-EARpLVcpGllpRpQPIasLspalPuREWshlEpELEcp-FLLRDpIsDLlu.sEsWc..-D ...YolchIp-Es+pLVcpGllsRpQPIYsLCpaIPuREWstlEpELEcp-FLLRDpIuDLlu.sEsW-pD... 0 5 32 44 +14060 PF14218 COP23 Circadian oscillating protein COP23 Eberhardt R re3 Jackhmmer:B0C7I3 Family This family includes the circadian oscillating protein COP23 from Cyanothece sp. (strain PCC 8801), Swiss:Q54702. The levels of this peripheral membrane protein display a circadian oscillation [1]. 27.00 27.00 30.90 30.40 17.60 16.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -11.05 0.71 -4.60 63 131 2011-05-17 13:43:54 2011-05-17 14:43:54 1 6 38 0 42 156 4 146.20 22 65.35 CHANGED pFhCt.........................................hssp............sTlstpsp..uph...slltWs....s.h.suuaoPppRCppVosRhpphtss....h.................th.hlps..Ghh.NspsVlCsssptss.sC.....pslLhTL..pssss.....sppsLpplhs.......t..p.................................ttstp......hlshsshLsss .................................................pFhCt........................................tsp.......sTlshpsp....uph...PllhWs....sth.susaoPppRCppVSsRhpphhps....h...............................thphlps..Gph.Nsp.sVICsssptss...sC...pslLhTL..pssss.....Pppslpplhshht....t....s....................................psstt.......lphp.hlt..t..................................... 0 1 33 42 +14061 PF14219 DUF4328 Domain of unknown function (DUF4328) Eberhardt R re3 Jackhmmer:B0C5R9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 218 and 342 amino acids in length. 24.80 24.80 25.30 25.00 23.40 24.10 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.49 0.71 -4.96 51 224 2011-05-17 14:05:06 2011-05-17 15:05:06 1 7 194 0 73 216 1 164.40 23 55.87 CHANGED lltphpssh.............ht.sthstu-thssh.....suhsthlhhlsssllhlhWlhRs+tsApsht............ts.chsssh....slsuahlPlsNLhhPhphhtElapuotths.............................t.sthlhlW...W.....hhWl.................lutslshhshshs.....htsssptls..............tsshhslhsslhsls..sulhslhllcplsp..hp ..............................h..............h...hhttushhssh.....su.lsshhshlsshllhlhWLhttRssAhhhts...........hs.Rhust..hhuGhhlPhlNLhhshhhlhElhtspsphs.............................phpthlssW....W.....hsWl.................lussl.s.h..hs.hshp......hss..sspshs...................ssshh...hlluhlhuss..usshshhlhcthpp..................................... 0 29 57 68 +14062 PF14220 DUF4329 Domain of unknown function (DUF4329) Eberhardt R re3 Jackhmmer:B0C699 Family This domain is functionally uncharacterised. It is found in bacteria and eukaryotes, and is approximately 130 amino acids in length. It is often found in association with Pfam:PF05593 and Pfam:PF03527. There is a single completely conserved residue D and a highly conserved HTH motif which may be functionally important. 27.00 27.00 30.70 30.30 26.70 25.90 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.62 0.71 -4.17 20 415 2011-05-17 14:18:05 2011-05-17 15:18:05 1 16 244 0 18 432 7 126.10 48 15.32 CHANGED tpshApshLpplpscSIspspEYsGhIsp..sssGchhuo..psppG..........ppsushsths.s..shphVAuYHTHGuasps...........YssEl.SstDlpus........tpptlsGYluTPuGRlahlc.psppsp.phsshusl .........................I.DDhuh.ALshhNucSIsEsKEYuGLICK......ppG.cYF.so...sP.su.........s-pcus..hNhtCPpGoE+VusYHTHGh.Scs.............sYss..FSuKDhp.sh............uhspp.hu.Y...LGTPsssahthss+u+t.s.........sp............ 0 8 12 15 +14063 PF14221 DUF4330 Domain of unknown function (DUF4330) Eberhardt R re3 Jackhmmer:B0C6A2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 165 and 177 amino acids in length. There is a single completely conserved residue G that may be functionally important. 27.00 27.00 30.40 28.90 23.30 21.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -10.97 0.71 -4.40 43 119 2011-05-17 14:26:20 2011-05-17 15:26:20 1 2 109 0 50 119 32 159.90 25 88.80 CHANGED M..t...llDspG+LFG+lSllDlsAslllLhslsGlhhhPtt..osss...Aph...ssspslpV..sl.lhsltsssPpshhtp.hpt.......spcssllIRNQPh......GplpltsVp.ls+plssspPDGsVhshsD...Pp.t..hphDhhhTLcGpupho.ssGs...VlGspclKIGsslELEGpsYphsGoVh...slcl ..................................M.llDpcG+LFGplsllDlhshllllhs.lsGlhhhstp.ssss.....Ap....ssspslcl..shhlhslpstsspthhtp.hpt.......sspsshhh+spshG....plpslp.h..spsh.hsspsDGp.Vh.thsc...P.....hthDhhlTlcupupho..ssGs.....llG.spcl+lGps.lpl-stsaphsusVhslp........................ 0 17 36 47 +14064 PF14222 MOR2-PAG1_N Cell morphogenesis N-terminal Coggill P pcc Jackhmmer:Q9LFV8 Family This family is the conserved N-terminal region of proteins that are involved in cell morphogenesis. 20.30 20.30 20.50 21.50 19.30 20.00 hmmbuild -o /dev/null HMM SEED 552 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.96 0.70 -5.83 35 407 2011-05-17 14:28:14 2011-05-17 15:28:14 1 28 236 0 259 372 3 456.00 35 21.39 CHANGED u-..R+uhlSl...YllCRVLIEIhpQss.hp.slsp-htc+LEsllFsQL..KssD.s.....p.ls.sSsl+huNWslaApLLGhh..SchpF..sSVocRFls-Lcc........hp.p......pps......cs-sphthLIhGMRaL+l+saP.Esa-cSs-Fhpulu+hFspup.pc..lKpAaspllpplLLPlAussss-....lspP...pWs-slstlhschsphhs..K.s+HWss..u.FPLtssLLClSsp-hFhspWh....thlpsL.s+lKD+..spR..sluLpulsRLlWsYL..apss-ossssh++L-plhchhl..Psu+.........+..shlssD..shh-PLlpllphIuac+.Dash+pIIaPLlssshhtss.....lcplpPE+hllGIRuaLsIhsshppspt......................................t.h.tsppc......hls......s...ssth.spslppaa.chschlsc.IhllhDsshGupshh....s.....cphst.p..ssht...................h.hthpsD.ph...o..sp....+pth...-L.h+sslpAlPRCL..us..pIPa..ssLIslLspsssHlpssIupouupuL+uluppp..tsppVhhGFA+FlFs.FD-+asoh.s.th.Ls..cl-ssL+L.YlELLplWl ......................................-R+shs..aIhshVLlEllpQhs..hp.sh.c.....th......hpcl.slsFpph..Kht-..s.........s.sush+.h.hslaApllGhhup.t+.......F.uVpc+Fhs-Lcchp.p...........p.s......s.tphh.LlhGM+ah+l+hhP..EtaEtShpFhp.phuphFhcs...pc.tlKpAhstlhsplLlPlAuss.psE.............lshP......phtphlp.lh.pshph.s..+...p................+H.....................hs.........h..aP.LhssL.....LClSppphF..hs.pWh...................thlppLh.........K.................D......ph..p..tluLpulhRLlWsYh..hhps-o.s.....sT..p..+L.plhphlh.....Ppsp...................+..shlspD...h.ls.hlpllphIu.c+.D..ash+pllhsLlsssh.hp....................................h.plpPE+hsIGlRuaLsIhsslppt-t.ss...p...............................................................sssshpspph..pphls...........p.ptsps.uhp.Yasphpchlsp.IhhhhDpphGp.hhh.....s............ph..h..s..p...............................................c.cD..hh...o.....sp.....Ksph..-L.F+osltAlPRhl.........sc.........shsh.................ppLl-lLs+..h.olHh-pplpt.u.poLpslhhph..atpsVhhGFspFlhc.hsDha..sh.....................l-ssl+hhlpLlp.Wh.................................................... 1 76 133 204 +14065 PF14223 UBN2 gag-polypeptide of LTR copia-type Coggill P pcc Jackhmmer:A5B9L3 Family This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.43 0.71 -10.24 0.71 -4.31 123 1538 2012-10-02 13:37:57 2011-05-17 15:37:42 1 195 75 0 589 2426 5 103.70 21 14.09 CHANGED coAKchW-sLcpha.Gss..+s+tuclppLcpca-plcM+-sEolc-ahs+lpslssclpslGp.phs-pclVcKlLpoLP.p+appllsul-pt..hDhpp.hol--lhupLpstEp+h.ppppsp ...................................................................stthWphlpt.atss..p..p......s...ph..lppph..t.thp........h.p.ss....c.s.l...p...-aht..+hp...sl.ss...p.l.....p.............s.l.....G.............p....s....h......s.........-p.........cl.lp..+..........lLp.....u.....L....s..pc..ap..hhss...lppp...sht.....ohp-lhspLh.shE.ph......t................................................... 0 132 305 345 +14066 PF14224 DUF4331 Domain of unknown function (DUF4331) Eberhardt R re3 Jackhmmer:B0C6N1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 223 and 526 amino acids in length. There is a conserved FPY sequence motif. 27.00 27.00 32.60 32.40 26.80 26.80 hmmbuild -o /dev/null HMM SEED 359 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.61 0.70 -5.29 47 155 2011-05-17 14:40:04 2011-05-17 15:40:04 1 2 121 0 70 177 49 349.20 25 90.04 CHANGED SoHc-uP....htttsphDhTDhYhFpS.............tcsstlsllhNh.Php.......sssshhthsssuhYcl+l...........DscG......-..uht......D.lsapFpFss...............................................................................h..sshGptsh.........shtthtst.l.t...h...s......shpsFsGpRpDsFhhs....l.thapl.........................th...t.s.sshtsh.NVpolulclPtstls.t..........t.slGsWsossh.........................h.sGsasQlsRlGpPhlNcl..h...hsh...t.-.KDpaNsspPspD...upahshhtsshhstlhth...h..........................hsRsDllsshlsshst..........................h..s-hLclssuhPss........................................uuaPNGRphs.......DDVlDlsLphlhG.h.....................................t.hshs..cssshsss..shhssFPYLusP ...................................................SsHh-uP........hst...tsphD.oDhYsFpu.............tpsstsshlhNh..Pht.......sss.sh...hshs.sshYcl+I............DssG...D.uht.................D.lsapapFps...s...............................................h.p.ht...........................h.hs.sthGttshs..............shtth.tst.lht....hss..........shpsFsG.tpDsFhhs...htthhclht..............................h..sh...t.s.sshtsh.NVtolslclPpshlstt.................tsslusWsosph........................................sGsahQlsRlGtPhlNpl..h.hs.........t.s.KDtaNsstPtpD...spahphhhps...hshhht...hh...........................................................scschlshhhtsh.t..............................s-hLplssuhsss............................................suaP.NGRp.s.......DDVlDltLphlhG...................................s.....t.shh..sssshsst...h....ssFPYLs.P............................ 0 19 44 65 +14067 PF14225 MOR2-PAG1_C Cell morphogenesis C-terminal Coggill P pcc Jackhmmer:Q9LFV8 Family This family is the conserved C-terminal region of proteins that are involved in cell morphogenesis. 25.00 25.00 25.50 25.50 24.90 24.60 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.63 0.70 -4.64 38 388 2011-05-17 14:50:41 2011-05-17 15:50:41 1 28 236 0 230 364 0 247.40 32 10.74 CHANGED cLltaPQLFWsssACLsolaEpEFlEuLphLs+hLsKlDLcsssshphLhsshPsp...........................Wp...GsF-GLQsLlhKGL+SSsoh-hTLclLs+Lshlsssplluss.po.RLlh.sllAsLPphLpph-pss..............pphhpsAptLuplA...cspshssLucllsuaucs+a.+opcDFlsphlshlpstaFPchpspslsaLlGLLpNshsWh+lpohplLpsllstlDhc....ps....hGs....D....LIuPLLRLLpT-hs.pALcVL.-plhshSGu.hhsc ...................t...hhsplFWsssuhLcSs...a...EhEal.uLcLLs+lLs+.....ls.Lcc....s....pspppLtphpsph.......................................................h....ssFsG.LQpLllKGh.....pSss..oh-hTlplLspLssl..spp.sll......ssu.......p.......s..t.....h.h.sllshLPphlpph-sss...................................................phsp..psA.pplAp.ls.............cstphs...sLAchhshYupppa.+sspsalshlsphL+.-sahschshphlsa.LhpLL.pp..uhs...hp.plLpllhslLsh....hDhp.............ps.............hss-.......l.lpsl.h+hl.posahtcALplL.chllohSuo.h..h................................................................................. 0 68 115 177 +14068 PF14226 DIOX_N non-haem dioxygenase in morphine synthesis N-terminal Coggill P pcc Jackhmmer:Q94JV6 Family This is the highly conserved N-terminal region of proteins with 2-oxoglutarate/Fe(II)-dependent dioxygenase activity. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.52 0.71 -3.55 783 5634 2011-05-17 14:57:58 2011-05-17 15:57:58 1 32 1392 66 2627 5670 1247 113.90 22 34.09 CHANGED lPlIDlu.............s.....ss......c......p...ph.....h.p.p...ltpAspchGF.F....l...sNH...G...ls..p.......p.llpchhp.huccF.F.s.LPh-...c.Ktc.h.....tp.....s....s......th...pG...Ys.sh...htpph..ps.................phDa.cEt.h.phtt.p..........s.........p............................h.sh...WPsp ....................................lPllDls........................ts...tp.......p..tph....hpp...ltcAsp..p..hG..F.F.............l..............s..N..H......G...l....s..t........................p...l...l...c.ch.hp.......hs..ccF.F...s.L...P...h-...p..Ktch.........tp.....s.....s.................th..pG...Ys..s..h...h.t.p.p.h.ps................................th-a.c-h.h..t.h.th................................................................................................................................................ 0 523 1614 2211 +14069 PF14227 UBN2_2 gag-polypeptide of LTR copia-type Coggill P pcc Jackhmmer:Q1PE19 Family This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.64 0.71 -10.25 0.71 -4.55 97 1112 2012-10-02 13:37:57 2011-05-17 16:31:27 1 127 81 0 605 2057 3 105.80 19 15.57 CHANGED ppoutplWppLcphahspshss+lhl...hp+lhsh...+Mp-sps.lp...palscFppllscLpsl.slpls-E...spshhlLsSLP.ss..acphhss...lh.st..splohcpltstlhs...c-hphppppppsps .........................................................pstphWptLpt.h...att....p.s.....h...s...s...c...hhl.....hcphhsh........+ht.-.......s.......ps..lh.......palpch.p.ph...sp.cL....p....s....h....p........h....t....l...s.-c...........h.s.hhlls...sLP..ss........ac.shhts....l..hp...t......p.p.h...shpt.lhstl..........c-.thttt.....st............................................. 0 306 380 445 +14070 PF14228 MOR2-PAG1_mid Cell morphogenesis central region Coggill P pcc Jackhmmer:Q9LFV8 Family This family is the conserved central region of proteins that are involved in cell morphogenesis. 27.00 10.00 27.10 10.00 26.60 9.90 hmmbuild -o /dev/null HMM SEED 1120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.43 0.70 -14.01 0.70 -6.99 3 1087 2011-05-17 15:34:39 2011-05-17 16:34:39 1 28 226 0 651 1024 4 217.20 9 27.26 CHANGED tEuSEFRASEMDAVGLIFLSSlDVQIR+TALELLRCVRAL+NDIRDYStpEphDspLKs-sEPIFIIDVlEENGEDIVQSCYWDoGRPYDLRREhDslPlDlTLQSIL.ESuDKuRWARCLSELV+YAAELCPSSVQ-AR.....LEVIpRLApITPsELGGKApQSQDTDoKLDQWLlYAMFACSCPPDSRE-uuLRAARDLaHLVFPSLKSGSEuatLAAThALGHSHLEVCEIMFGELTSFlEEVSSETEuKPKWK......SQK..t.RREDLRVHVANIYRhVAENIWPGMLoRKPlLRLHFLKFIEETsRQILsuPSENFQEIQPLRYALASVLRSLAPEFVDAKSERFDLRsRKRLFDLLLSWCDDoGSTWGQDGsSDYRREVERYKASQHsRSKDSlDKLoFDKEluEQVEAIQWASMNAMASLLYGPCFDDNARKMSGRVISWINSLFhEPAPRAPFGYSPADPRTPS.YSKa..TG..EGsRGuuGRDKQRGSHLRVLLAKoALKNLLQTNLDLFPACIDQCYSSDuuIADGYFSVLAEVYMRQEIPKCEIQRLLSLILYKVVDQoRQIRDDALQMLETLSlREWAEDGuEGuG+YRAAVVGNLPDSYQQFQYKLSuKLAKDHPELSEtLCEEIMQRQLDAVDIIAQHQVLTCMAPWIENLNFV+LKESGWSERLLKSLYYVTWRHGDQFPDEIEKLWSTVASKsRNIlPVLNFLITKGIEDCDSNASAEISGAFATYFSVAKRVSLYLARICPQQTIDHLVCELAQRMLEDssEPVR.ussKsDTSuNsVLEFSQGPssS.QlAolVDSQPHMSPLLVRGSLDGsIRNVSGNLSWRTAAVTGRSVSGPLSPMPPElsIlNVTTGRSGQLlPA...LMNMSGPLMGVRSSTGSLRSRHVSRDSGDYaLDTPNSuDDILHuG.sGsHGlNApELQSALQGHpQHhLSRADIALILLAEIAYENDEDFREHLPLLFHVTFVSMDSSEDIVLEHCQcLLVNLLYSLAGRHLELYEV.EsSDuENKQQVVSLIKYVQSKRGSMMWENEDPTLVRTELPSAALLSALVQSMVDAIFFQGDLRETWGuEALKWAMECTSRHLACRSHQIYRALRPSVTSDTCVSLLRCLHRCLGNPVPAVLG ........................................................................................................................................................s.tts...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tttt...................................................................................................................................................................................................................................................................................... 0 151 278 466 +14071 PF14229 DUF4332 Domain of unknown function (DUF4332) Eberhardt R, Bateman A re3 Jackhmmer:B0C7H1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 134 and 356 amino acids in length. This domain contains helix-hairpin-helix motifs. 22.30 22.30 22.30 22.30 22.20 22.20 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.12 0.71 -4.15 49 244 2012-10-03 02:11:09 2011-05-18 08:15:57 1 15 232 0 88 272 276 105.00 25 30.05 CHANGED .GlsptptppL.ppsGIpospsLLptucs.tuRhtLApphtlstpplh+htshADLs.Rl.uluhpausLL.pAGlsoVspLApps.sppLpppltclptppphs+ph.....Psls.VppWIppA+pl ..........................................................h..t.sl.p..th.t.t....tp..h...tt..ht.t.l.th..t.s.phh.ph.GluppastlLpcAGlsolp-L.sp.ps.sppLpppltclNcph.clttph.....PolpplppWIppAp.......................... 0 31 58 79 +14072 PF14230 DUF4333 Domain of unknown function (DUF4333) Eberhardt R re3 Jackhmmer:B0C9G5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 140 and 255 amino acids in length. There are two completely conserved C residues that may be functionally important. 27.00 27.00 27.80 27.30 24.10 25.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.65 0.72 -4.02 37 334 2011-05-18 08:13:58 2011-05-18 09:13:58 1 5 143 0 89 229 0 79.00 32 49.87 CHANGED sssssusuhhhss........slDtsplppslpphLs....sshGhpsss.VsCP..ss.cscsGsohpCs.hs..lsGpshpVsVTlpss.DG .......................h..h.hhhuh.hss....hs.p......slDpspltstlpphLs....sshGs...+sss.VsCP...ss.pscpGAshpCp.ls..ls.Gp.shpVsVTlsss.-G........ 0 18 65 80 +14073 PF14231 GXWXG GXWXG protein Eberhardt R re3 Jackhmmer:B0CA20 Family This domain is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. There is a conserved GXWXG motif. This domain is frequently found at the N-terminus of Pfam:PF14232. 25.00 25.00 26.60 25.10 23.00 21.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.91 0.72 -4.32 50 169 2011-05-18 13:18:05 2011-05-18 14:18:05 1 2 133 0 86 180 1 58.80 39 34.55 CHANGED hslFDpLssVss-.hl.GpW+Gutl.TGHPh-GhLpshsWaGKpFhss-sVcPLlhhss...cG .......phassLssVps-thl.GpW+GsthsTGHshsGhLpthsWaGKpFpsspcVcPLlhhsssG....................... 0 9 53 72 +14074 PF14232 DUF4334 Domain of unknown function (DUF4334) Eberhardt R re3 Jackhmmer:B0CA20 Family This domain family is found in bacteria and eukaryotes, and is approximately 60 amino acids in length. This domain is frequently found at the C-terminus of Pfam:PF14231. 25.00 25.00 26.80 25.50 21.10 17.60 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -8.87 0.72 -4.33 50 165 2011-05-18 13:43:53 2011-05-18 14:43:53 1 2 132 0 85 176 1 60.60 45 35.32 CHANGED sppupApLRhhpaRGtsoAoMlYDphPIhDhFR+VD-....sslhGlM-hKs.........tspsaFFhLcR ...h.ttupApLphhcFRG.....ploAoMlYDtpPlhDaF++lD-....solhGlMshKs.........psp.a.aFhLcR............. 0 9 52 70 +14075 PF14233 DUF4335 Domain of unknown function (DUF4335) Eberhardt R re3 Jackhmmer:B0CBF9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 204 and 480 amino acids in length. There are two completely conserved residues (G and D) that may be functionally important. 27.00 27.00 52.70 52.60 20.50 20.00 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.20 0.71 -4.83 55 113 2011-05-18 14:47:08 2011-05-18 15:47:08 1 1 70 0 44 125 119 193.20 27 57.39 CHANGED +pYs.PsCoLplpGhss.h....stspscshlslLsts-sp.h....sp...lcGs+-aLcsLhpsVssYspphLS...........................uhs.p.......phst-ssh.............................................lplps........psh.tH...cLhLpu........ttsssss.................hplpLsssQLaDLlpALDphtsDspsLsshshshp.......h++ht...h..sstshhpphsssslG............................ssulslsuhhh...hhlPhPp ........hpYs.PsCoL.l.uhss.h....st..sthpshhthLp.s-sp.h....st...lpGs+phLEsLhpsVssYspphLo...................................uh.........tptph...........................................................................lplps.........sh.tH......cLhlts.................tspsss.....................htlpLsssQLFDLlpAlDpahsDspsLsshshphp........++ht....tsstshhpphsssslG.........................shulshsuhhh...hhhs.P................................................... 0 7 31 42 +14076 PF14234 DUF4336 Domain of unknown function (DUF4336) Coggill P pcc Jackhmmer:Q8RWI4 Family \N 27.00 27.00 27.20 27.10 26.10 26.80 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.86 0.70 -5.23 47 413 2011-05-18 16:09:10 2011-05-18 17:09:10 1 4 339 0 204 422 226 203.30 28 65.88 CHANGED ssplWoFEQlQGlhY...VsVPIRMTVlKLps.GGLhVauPlAPTtEhlphlcc..L..t.cG....sVcaIlLPTsSG........lEHKlalGPhARtFPpApVWVsPsQWSFPlsLPLs..WLGhP....ps........+s..L.s........h....sss..........ssPas.DEh-athLsPlsLGlG..s.....FtEluhaH+tSpTLLVTDullulstpPPt.lhshDP............hPLLF.........HAR-p..us...csltDosps..R++GWpRhsLFuhYhpPs..sL.................pss....................ph.phh..t................h.a..tWp.sh..sFptlps.....+...LhVAPlLpsLlasRs.ptlhsWl-cl............up.W..s.....hc.........pllPuHasAPlts.sspchppAFsa ...........................................lshssRMTVl+Lss......G.......u.....LhlaSPls.TpclhptlppL.........G.sVcallsPsh..h..........H+lalssapctaPpApla....ss...P.......s...................t......ph.....h....p........hhsh..........t..t....h................t....t.pt............ss...as.s-h.-..hhhp......t..t.........hpElsFaH+sSpTLllTDhl..sh....p........................................................................................................................................................................................................................................................................................................................................................................................................... 0 66 121 171 +14077 PF14235 DUF4337 Domain of unknown function (DUF4337) Eberhardt R re3 Jackhmmer:B3DVT1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 201 amino acids in length. There is a single completely conserved residue Q that may be functionally important. 27.00 27.00 30.20 28.80 24.00 26.20 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -10.82 0.71 -4.39 47 143 2011-05-19 08:31:57 2011-05-19 09:31:57 1 1 133 0 74 139 31 162.60 30 84.08 CHANGED hspplAlhsulLAsluAlsshtuss.......hpscuhhppscAuspWuaYQAKohKpslhEhssphhthph............psthppclpcYppcppchcpc......ucpLppcAcph-ppp-pthcpacpashAsshlQIuIsLAulslLT+.+phLhhhuhuhussGlshhshuh ......................spplAlhsAlLAshuA.lsshtGss.......hps-A.slppscAuspWuaYQAKohKpsls-husphsthph................psthppclpcYppptpch+pc......................ucplppp..Acct-cpp-tthcpaHcashAsshlQIuIsLAulolLTc.....pphLhhh....uhslussGlshsshuh....................... 0 19 47 62 +14078 PF14236 DUF4338 Domain of unknown function (DUF4338) Eberhardt R re3 Jackhmmer:B3E1A9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 206 and 475 amino acids in length. 27.00 27.00 27.30 28.60 24.80 24.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.73 0.70 -5.75 26 167 2011-05-19 08:39:33 2011-05-19 09:39:33 1 8 64 0 33 167 423 198.60 25 54.41 CHANGED php+cslhpLstsppp..phttpptthst.........hssspsshschhs.pLphVcstsp...pLFphhhtp.a.LsaspshGc+LRYLVhsptp....t...llGhLuhuussapltsRD.caIGWsppsR+ppLhplshsu+hL..I.Pasplhsht.hlLuhsspclpsDapcpYstpshllEs........hVsssphhG..osYptss..................Wh.lGtTpGpG+hcht..................................................hhh+ts.h+-lalhPLscshcchLps ............................................................t..................................................thht..lp.Vpp.pt........p......thap..hhtp...H.Lshp.hhGppl+Ylshsttp...............hluhl..uauusshphtsRD.paIGWs.pt+pppLhhls.ssRhL..IhPh.hpl.shtShhLuhshccl.pDatphYshp.hllEo............................alsspp..hh.G....osYptsp.......................Wh.lG...TpGhuc.t...................................................................................................................t................................................... 0 15 25 29 +14079 PF14237 DUF4339 Domain of unknown function (DUF4339) Eberhardt R re3 Jackhmmer:B3DVT0 Family This domain is found in bacteria, archaea and eukaryotes, and is approximately 50 amino acids in length. There are two completely conserved residues (G and W) that may be functionally important. 26.00 26.00 26.00 26.00 25.80 25.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.95 0.72 -8.06 0.72 -4.38 59 701 2011-05-19 10:27:33 2011-05-19 11:27:33 1 48 515 0 277 694 67 44.80 29 6.35 CHANGED paahsp.NupptGPashppLpphltsGplss-oLVW+pGMss.Wpsh ...............Wah.sp....s..s......p...p......tGPhshpplpphhtpGplst.sThlWppGhss.Wpsh...... 0 109 177 238 +14080 PF14238 DUF4340 Domain of unknown function (DUF4340) Eberhardt R re3 Jackhmmer:B3DYU8 Family This domain is found in bacteria, and is typically between 183 and 196 amino acids in length. 25.00 25.00 25.00 25.00 24.30 24.60 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.31 0.71 -4.60 71 627 2011-05-19 11:01:47 2011-05-19 12:01:47 1 8 364 0 213 647 246 159.80 14 62.45 CHANGED sWp.ls.s...s.hs.......hssDpsplsshlssLpplphpchls.....ssss.h.scaGLsss........thplsltsss.......s.p..tplh.lGs..sss.s.....sp.....hYs+h..stpsplah...lss...shhshh..s.tshss....a.hspp.lh...s.h....ptsp...lpplplpt.....tspt.........hph.........spps...s.....tWph.ss....s....tt......pss.ssps..sphls.slspLpsps..hhstps......sphtthsh.ss....Pth .........................................W..h..t......t.......h.hs.t.t.t.lp.phhptltthph.p.hp.........s.spp.htpa..GLsps............................thplplp.tts.......................t.p....hplh..lGp..s.ss.s......................sp...........h.Ys......ph......t...s...........p..s.....p....lah.....lst......shhp.h...s..ts.t..p....h.hppp.lh......p..h......t.tp....l.pplplp............tt.................hph.........................tttt..t.......th.h.tt..............................ht....h..tthht...h..h.h.t....h........................t....................................................................................... 0 128 169 184 +14081 PF14239 RRXRR RRXRR protein Eberhardt R re3 Jackhmmer:A8ZLD7 Family This domain is found in bacteria, eukaryotes and viruses, and is approximately 180 amino acids in length. It contains a conserved RRXRR motif. It is often found in association with Pfam:PF01844. 25.00 25.00 25.50 25.60 24.60 24.50 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.09 0.71 -4.86 49 361 2011-05-19 12:12:55 2011-05-19 13:12:55 1 6 102 0 87 389 36 159.60 40 43.18 CHANGED VaVLspst+PLMPs+s.u+ARcLL+pGKAtVh+..hhPFTIpLptp..tss.tsQP.lpltlDPGu+hoGlul....hssp..cslhsuclplhhtpl+ctl..................tspRphRRuRR.sR+h.....RYRpsRFsN..R....c+..s...tW..Ls........PSlpp+VcsplphVpcLp+lhPlosIshElV+FDhQthps..P-IsGhpYQpG ...................VhVlspptpPLhPsps.u+ARhLL+pG+AtVh+....thPFTIhLpp......p....sss.........sQP.lpltlDPGu+hoGlAl..............hppp..............ps..............latuElphRtp.pl+ctL.................ppRRthRRsRR.pR+h..............................RYRpsRF.NR................p..+...p...GWLs...............PSlpp+lpshhshVp+LpchhP..lss....ls.....ElV+FDhQthpN....P-l..sGhpYQpG.............................. 0 33 62 83 +14082 PF14240 YHYH YHYH protein Eberhardt R re3 Jackhmmer:A8ZMB4 Family This domain family is found in bacteria, eukaryotes and viruses, and is typically between 141 and 198 amino acids in length. There is a conserved YHYH sequence motif. 25.00 25.00 26.00 25.40 23.00 24.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.42 0.71 -4.58 57 242 2011-05-19 12:29:31 2011-05-19 13:29:31 1 18 133 0 117 242 1052 161.80 25 17.45 CHANGED shsaplPhs..P.thu...........sps..sshsh....sshGlAlNGVsh.ssss...................t.t..s...s...hhs........................slDpssuHspssG.sYHYHuh.Ps...shhp.t...tt...ssc.............usllGaAhDGFPIY..u...hs.sss..p............................................suDLDcCNG+hsss.......GtY+YalTssa...Pal..pCa+Gs .................................................................................p...h.lshp..P...t.............tps..pths...........sslGlslNGlsh.sshs.............................t...s.....t......................................shD.ts..sGHs....p..s.u....sYHY..H.........t......ss...shhp....tt......ssp.................................usllGaAhDGFPIY..Gs.hs.sss.p..........................................................................................t..s...s.......D..LDpCsG+hsss........................utY+....Y....asT....tsh....shh.tshhG............................................................ 0 65 96 111 +14083 PF14241 DUF4341 Domain of unknown function (DUF4341) Eberhardt R re3 Jackhmmer:A8ZP08 Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 60 amino acids in length. The family is found in association with Pfam:PF04143. There are a number of conserved glycine residues that may be functionally important. 25.00 25.00 25.30 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.99 0.72 -4.27 233 845 2011-05-19 13:03:03 2011-05-19 14:03:03 1 3 766 0 330 765 339 64.00 36 39.15 CHANGED sPhhuLhGGhLIGluAslLllhsGRIAGIS.......GIluu.....l........lss...................pssts.W.RlsFlsGLlhushlht .........h.uhhGGhLIGluuslLllhs......GRlAGIS.............GIluu.l.......lss.............................tts.hs..W..c....lhFllGllhushlh............... 0 101 209 276 +14084 PF14242 DUF4342 Domain of unknown function (DUF4342) Eberhardt R re3 Jackhmmer:B0C276 Family This family of proteins is found in bacteria. Proteins in this family are typically between 97 and 206 amino acids in length. There is a single completely conserved residue P that may be functionally important. 25.00 25.00 25.80 25.30 24.60 23.90 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.78 0.72 -4.21 24 177 2011-05-19 13:27:51 2011-05-19 14:27:51 1 2 141 0 62 144 7 82.10 31 52.91 CHANGED Epppc.shsE..chpsoss-llpplKcllcKGNVpRIhlK+--.+sll-IPVsuGlhhGsI.u.sllhPhlshlGs...luAlsschTlEIp+ .....................pph.-phphssscllctlK-llcKGNVsRIhl+K--.+sll-I..Plsssl.hGs....l....u.slhhPh.lhhl.us...huAllschplcl.+............. 0 36 55 60 +14085 PF14243 DUF4343 Domain of unknown function (DUF4343) Eberhardt R re3 Jackhmmer:B0C6A8 Family This domain family is found in bacteria, eukaryotes and viruses, and is typically between 127 and 142 amino acids in length. 28.50 28.50 28.90 28.50 28.40 28.20 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.71 0.71 -4.30 33 270 2011-05-19 14:59:22 2011-05-19 15:59:22 1 1 245 0 46 151 7 125.30 54 49.75 CHANGED ppshFlKPsstsK.............tFsGpVhptsp.............-ls..........ths......s.......hspc.....p.lhlS-shp.htsEaRhallsucllssuh..Y.................+ss....hchcs.....-...scshppshphh.t.....hshspuaVlDluhsp.stp..htllEhNsh.uuGhY.uscstphlp ...........................................N.WGKFIKPKAGSK...............VFTGR...VVNsT+.............DLI..........GI.......G.......LPFD.....YPIWISEVVE.FIA......EWRCFVLDGRVLDVRP..Y.................................TGD....YHAQF........D.ASV.IDEA..ISCW.K........DAPIAYGLDIGVTR..DGR.....TLVVEVNDGYALGNY..GLSPLK...ohs........ 0 20 36 43 +14086 PF14244 UBN2_3 gag-polypeptide of LTR copia-type Coggill P pcc Jackhmmer:Q9SKR1 Family This family is found in Plants and fungi, and contains LTR-polyproteins, or retrotransposons of the copia-type. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.86 0.71 -4.66 45 858 2012-10-02 13:37:57 2011-05-19 16:31:48 1 147 45 0 183 1298 0 123.20 19 15.91 CHANGED uDsssh.Iss.h+L..sGsNYttWSpslphhlsuKsKhGalsGsl..stP.tpsD......acpWpppNuhlhuWlhNohssplhpshlhhssApplWcslt..ppappss...ssplhp....l+p..pltphp.Qs.stslppYasc.hhp.LW...-hpphp......hpsstsp ..........................................................................hL..st.p.N.Y..Wptth.hhl..tu.p.s.....h.h.....s.a...l..........s.......G....p..h..................P..................p........s....p................s..tht..pWptp.......sshl......h.uhl....h.sohs.p.l..h..pp...h....h....h..h.p.....o.up..ch...Wptlp..ph.a..sptp....tp.hhp.........lpt.....tl.thp..pt.t.pl.tah.th.....h..............................tt....................................................................... 0 20 109 161 +14087 PF14245 Pilin_PilA Type IV pilin PilA Eberhardt R re3 Jackhmmer:Q59589, Jackhmmer:B0C6E0 Family This family consists of proteins which form type IV pili. In M. xanthus these pili are required for social motility [1,2]. 24.50 24.50 24.60 24.60 24.40 24.40 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.36 0.72 -3.52 48 287 2011-05-20 08:28:23 2011-05-20 09:28:23 1 8 84 0 98 303 30 100.80 20 62.61 CHANGED pssKA+QSEA+shluulN+AQQuYhhEp..spFs........ss..lssLslsh........t..poppY........sYshsssss....................................sstt.......sssstssslpsYsuulhhsss...........ssssuslCcsssss .............................t+u+poEA+ss.L.pulhpAQpuaahEp..spau.....................ss....hscluhss.................p....psspY..........sYplssuss.........................................h.......h....t..................................................s............................................................................ 0 40 72 96 +14088 PF14246 TetR_C_7 AefR-like transcriptional repressor, C-terminal region Eberhardt R re3 Jackhmmer:B0C7Z3 Family This family comprises the C-terminal domain of transcriptional regulators of the TetR family. It includes the AefR transcriptional regulator from P. syringae [1]. It is found in association with Pfam:PF00440. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.53 0.72 -3.75 164 1183 2012-10-03 00:15:22 2011-05-20 10:40:53 1 4 683 4 387 1012 42 54.30 26 25.21 CHANGED GpLp.lsDsphAApQFhuLhcu.th.ah..plhs....sp....ss....tt-h....ppllcsAVchFLutY ............GtLp.hsDsthAAppahuLlpu.ph.hh..tlhu......hs.....ts.s..spch...cphscpAVchFLttY................. 0 84 213 288 +14089 PF14247 DUF4344 Domain of unknown function (DUF4344) Eberhardt R re3 Jackhmmer:B0C924 Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 247 and 291 amino acids in length. There is a conserved EED sequence motif. 25.00 25.00 29.50 28.10 23.60 23.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.59 0.70 -4.69 33 149 2011-05-20 10:11:40 2011-05-20 11:11:40 1 5 108 0 52 149 25 172.00 24 70.55 CHANGED Y..sspssptpphpchlppsph......hEshsshhssh.hhlPpslslth.ts...u-ssshYDP-spsl..plsYchlspshp...hFtp........ts..pssptht..cs....A.ls.s.hh.aolhHEhGHAhIshhplPllG+EEDAsDplAullLlph..s-..s..Gs.......hsluu..AshFthpucc..cs.pht-hs..........ahD-HSlDhQRaYshlCllYGSDPcpas.sLlc..cut.Ls..p-RA-hCttEYpplspsWppLLp .............................................h.ttth.......p....ht..pt..h.hstsl.l.h..s...up.sshassptttl...hsYphh....t...hh.t..............t......tt.........s.hs.shhhhhhHEhGHhhlt..plPhhG..pEEDssDphAsh..hhlp.......p..ptt..........hhhs.s.st..ah.htt.tt..t..th..p..s.........hhstHuhchpRhashhChhYGussptht.tlhp......pht.h...pcRtthC..patthtpsW.phl.t....................... 0 8 18 36 +14090 PF14248 DUF4345 Domain of unknown function (DUF4345) Eberhardt R re3 Jackhmmer:B0CDG7 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 125 and 141 amino acids in length. There is a single completely conserved residue E that may be functionally important. 25.00 25.00 25.00 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.37 0.71 -4.67 33 186 2011-05-20 10:55:15 2011-05-20 11:55:15 1 1 172 0 55 174 304 121.30 22 89.27 CHANGED cthhphhLslsulsslshGlshslsssshhsstsssssss.........hcuphRahuGhalulGlhhlhuhhphphtphshhhlshhhhsuGlGRllShhhsG.hPssshlsuhlhELllsslhhhhhtth ........h....phhlhlhuhhhlshGlhhhhsstthh.s.sh.s.ssthss.........shssphR.h.h.u.Glh..hGlGlhhh..h..ssh..p..h.phhshslhhlhhhhhssulGRLlol.h.h..cG.sP.tshhhshhshE...llhs..slhhhh....h........................... 0 16 36 46 +14091 PF14249 Tocopherol_cycl Tocopherol cyclase Eberhardt R re3 Jackhmmer:B0C447 Family This family contains tocopherol cyclases. These enzymes are involved in the synthesis of tocopherols and tocotrienols (vitamin E) [1]. 25.00 25.00 25.40 25.20 24.40 24.90 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.49 0.70 -5.73 22 180 2011-05-20 12:58:36 2011-05-20 13:58:36 1 3 160 0 72 189 73 263.30 27 72.89 CHANGED sRRFFEGWYaRVTLP.EhtpoFAFMYSIEDP.hG............GpsaSGGuAQILGssDpYlCRhFP-VcpFWAStt....tpLuluH.ttps.ph.sphLsPptFpcplppGYQsTsshpQGhItDsu......osphsRWpYphpPlYGWGs.sp.QpSTAGWLSahPIFEPGWQILMAHGLATGWIEWsGcpY-FpsAPAYSEKNWGGu.FPpKWFWlpCNsFpsp.sDLALTAuGGhRpVLhh...hEsVAlIGlHY.pG+..FYEFlPWs......uploWplsPWGpWphpAcNpp.apVclpusTp.ps......GTsLRAPT.pp...GLtahCRDThpGclpLpLhp..........ppsclIlcupSshuGLElGG ......................................................................................................................................................................................................................................................................................................h...................................uhhGhhu.hl.PhhEstWplh.hhtGh.us.Ghl.....phs.Gcpa-F..s..su...u..YsEKNWGtu.FPpcWhWl.Q.sNsFp....st......ssl....ulssuGuh.htl.hhh.......hcs.su..ll.u.l.ah..pGp..hYcF.sshs..................utl.....ph.p.l...p...s.h.u..p.Wplpu..p......s....pp....a.tlplpu.ps.p..p.......G.p...L...p..A..P..s..tp....sht...tsc-ohtGplplplhc.................psphlhpspo..phuulEhGG................................................................................................................... 0 30 57 67 +14092 PF14250 AbrB-like AbrB-like transcriptional regulator Eberhardt R re3 Jackhmmer:A8ZQN0 Family This family of DNA-binding proteins is likely to act as a transcriptional regulator [1]. This family does not include E.coli AbrB, Swiss:P75747, which belongs to Pfam:PF05145. 25.00 25.00 25.30 27.30 24.10 24.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.40 0.72 -4.07 31 183 2011-05-20 13:30:59 2011-05-20 14:30:59 1 1 74 0 80 187 216 70.60 55 54.51 CHANGED sFhsALL-Ac....GlsLssps....supsptGRpsoYRloVQsNGNLLIGuAYT+pMsLpPGDEFEIpLG+..KH..I+Lh .sFYcALL-A+....GlsLsssu......tupuptGRpsoY+soVpuNGNLLIGpAYTcphsLcPGDEFEIcLG+..Kp..I+Lh....... 0 12 53 71 +14093 PF14251 DUF4346 Domain of unknown function (DUF4346) Eberhardt R re3 Jackhmmer:B0BZ08 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 127 and 502 amino acids in length. There are two conserved sequence motifs: LDP and DHA. Many members of this family have been annotated as dihydropteroate synthases, however no experimental evidence can be found for this and Swiss:Q57571 has been shown not to possess dihydropteroate synthase activity [1]. 25.00 25.00 25.10 25.20 24.20 24.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.45 0.71 -4.37 36 151 2011-05-20 14:28:43 2011-05-20 15:28:43 1 4 138 0 83 154 139 92.90 40 30.70 CHANGED pslDcpLSpRaIsLDPuGYFlIhlD+csuhIsAcHasNsIs-+GLAsDPETGEslsC+G.psp.R.sssslapGRTAKELulplhEp.ppssPlopLDHAhYLGREhp+AEhsLlsGpcYlQD ...........................................t........................................................................h.h.........p.pspthhpG+oAKclh.plhEp..t.s.....lopLDHAsYLGRELt+AElALhpG.p.pY..lQD....... 0 20 55 73 +14094 PF14252 DUF4347 Domain of unknown function (DUF4347) Eberhardt R re3 Jackhmmer:B0CAL0 Family This domain family is found in bacteria and eukaryotes, and is approximately 160 amino acids in length. There are two completely conserved residues (C and G) that may be functionally important. 24.60 24.60 24.70 24.80 24.40 24.40 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -10.91 0.71 -4.75 113 300 2011-05-20 14:50:08 2011-05-20 15:50:08 1 150 158 0 134 354 204 155.90 31 6.72 CHANGED pllhlDusVs-hppLlsulh..........s........ss........c.....lllLDsspDG....lpQIsphLpsp.....s.slsulHllSH.G.ssGsLpLGss.pLstssLsp..hssp...Ltphup..sL.stsu-lLLYGCsVAu..G.....stGppFlppLuplT.GAcVAASsshTGssthG.Gc.WsLEhp.sGs.lpssh......shsttshssYsulL ....................................................llhlDusVpchpsLlsulh............................s........ss........p.....lllL-sspDG....lpQIsphLpsp...........s.slsulHllSH.G..ssGplpL.Gs.........s...hLsts.sLts..hssp......Ltp..h..sp..........sL...stsu-.....lLLYGCslAs...u...................ttGppFlppLup...lT.GAsVAASss.hTGssthG.Gs.WpLEhp.hGp..lpst........hh.....tta.................................................. 0 34 87 114 +14095 PF14253 AbiH Bacteriophage abortive infection AbiH Eberhardt R re3 Jackhmmer:B0CCK0 Family This family of proteins confers resistance to bacteriophage [1]. 24.90 24.90 24.90 24.90 24.70 24.70 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.12 0.70 -4.79 44 409 2011-05-23 08:00:20 2011-05-23 09:00:20 1 3 336 0 51 289 12 245.00 19 82.16 CHANGED LaIIGNGFDls.HGLpTpYpcFt.pYl.....................................ppp................................................cpp...lh...ct...lt......................chh...p..ttp.hWs-hEpsLu.cls.....hc.........................pp...............h.tth..p....chh.-..shpc........hhphp..p....pt.h.p.hh..phphpp...........hhp.chh.p......hphhp.....p................hhtpph...hp..ph...h..pppshF.loFNYTsTLEplYsls..pl.alHGt.pt................pplhhsHG.......st...pt...s.hh.........s.hs-c...................pchpptp..........h.h........tps...hh.phhh+shc....spthh.p..pppthhptl.s..sl.........pplhlhGaSluclDhsYhpcIhppl....sssph.hhhaas ................................................................................hIlGNGFDlt..a......GL....p........TpYp.-Fh..pah.............................................................................................................................................................tp.......................................................................pt.hh...p....l.......................................p.h.........t.p..tWsDhE.plu.phs....pp.....................................tp..............................................................h...h..p..p.hh..s..p.hpc................hhtht...p.....p...h..p.......t.ph.p.........................................hhp.phh.p............h.hhp.....p.............................................................h.htth......p..p...........pp.s.h.lsFNYT..p.s.......lp.p.h.ht............th...pt...................t..hhhHu.....p....p....s.hh...........s.hscp.............................................p...p........................pp.....p.hhpthh...........h.p.....p....t.l..t..s...........................p.lh.lhGhSl...ut...s...Dh.happlhpp.......ss...hh....................................................... 0 18 37 44 +14096 PF14254 DUF4348 Domain of unknown function (DUF4348) Coggill P pcc Jackhmmer:Q64RZ7 Family \N 21.10 21.10 21.10 21.10 20.90 21.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -11.78 0.70 -5.29 14 97 2011-05-23 09:07:11 2011-05-23 10:07:11 1 1 93 2 12 80 0 259.60 43 95.51 CHANGED lhl.ulhh.LhlhsuCssp.psctcP.h.us..optlDSspscssDo.s...phlsEpPh.PtsADEhFDDFhFNFAuscKLQ+pRlpFPLPhY.pGcp.spcIcKcpWKhD.hFp+QsYYTLlFDpccpMchsKDTslsSVhVEhIaLcp+pVKpYaF-RhcGpWhLpuIshpsh..ccstNtsFlpFYp+FusDShFQtppl+pPLtFsssDPD.D-FshlpsTlsspQW.uFpPt.L.PpsplhNIhYGQK.s-.ospKIlsl+GluNGhps.LhF+++tspWcLhK .......................................hshhl.lhlhsuCusp..psshDP.h.so..op.tl.DSh.t.pps-o.p....shlsEpPh.PtpADE.FDDFhaNFAusctLQ+pRlpFPLPhY....succ.....pspIcccpWKhD.hFp+QsYYTLlFDp-cpM-hstDTsL..sSV.VEhIalKp+hVKpYaF-RlcGtWhLpuIshcsh..cpst.NtsFlcFap+FssDShFQtpplppPLt..Fls.sDPD.D-FuhlpTTlshsQW.uFpPt.L.Pt-tl.NI...YGQ+....s-.SspKIlsl+GIuNGhps.LhF+.++sGpWcLhK................................................ 0 5 10 12 +14097 PF14255 Cys_rich_CPXG Cysteine-rich CPXCG Eberhardt R re3 Jackhmmer:B0C8L0 Family This family of proteins is found in bacteria. Proteins in this family are approximately 60 amino acids in length. There are 5 conserved cysteines which occur in a CPXCG motif and a DCXXCCXP motif. 25.00 25.00 25.10 25.50 24.70 24.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.99 0.72 -4.14 50 377 2011-05-23 09:38:52 2011-05-23 10:38:52 1 1 360 0 122 303 213 51.30 39 80.52 CHANGED lsCPaCGcphclhlDsSsG.sQpYhEDCpVCCpPIphpl.pl.D.-s.phplplts- .......hpCPaCGctlpl.hlDsSsG.sQpYhEDC.lCC+PIplsl.pl.D...ct..c.plplh.-................ 0 34 63 97 +14098 PF14256 YwiC YwiC-like protein Eberhardt R re3 Jackhmmer:B0C8C3 Family The YwiC-like protein family includes the B. subtilis YwiC protein Swiss:P46909, which is functionally uncharacterised. This domain family is found in bacteria, and is approximately 130 amino acids in length. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 26.00 25.90 24.30 23.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.97 0.71 -4.06 64 369 2011-05-23 10:22:13 2011-05-23 11:22:13 1 1 357 0 79 307 2 127.30 34 50.68 CHANGED lPsQHGAWuMlllPhlhGh.huu................shhp.lh............L..hluWhh..hYlhpaPh..hhhlKp.+p........ptp....ahph........h....hlY......u.....sluhhhslhslh.hps.pllhash.sh.lPLhhlshaastp+cERuLlN-lsullshulhuhs..ua.hh .....lPpQHGAWsMlllPFlhGhhlus.....................P..shhH..l.............L..hluWhh..hYLssYPh..hhhlKp.+p..........pcc....ahph........sllY......h.....sluhlhulhsLl.hp..pllhash..shlPLhhVshYas+pKpERuLlNDluullshslhGhsuhh............ 0 19 48 66 +14099 PF14257 DUF4349 Domain of unknown function (DUF4349) Eberhardt R re3 Jackhmmer:B0C5Q6 Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 282 and 353 amino acids in length. There is a single completely conserved residue D that may be functionally important. 24.40 24.40 25.50 25.10 24.10 24.30 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.40 0.70 -5.16 99 422 2011-05-23 10:50:48 2011-05-23 11:50:48 1 4 385 0 188 435 178 256.80 18 81.31 CHANGED l..su.Cuusss............................s.............ss.......s.............tsssus........s..ss...sts.sutts.................................sss...s..ss....sss........ss....ss.......s........s...................p.p..lIp..su..sls..lps..ps...hspshsplpshspptu.Ga.lssps......p..t......tts.ssptpuslslRVPssph-shl....spl.p......s.l.....G.pl..pspshsucDVTpphlDlcuRlcshcspppRLhpLh.p.+As...slpDllplEppLuplps-lEulpuphchLpspluhSTlslslpp......t.ss....sp...s.....shh..sthtsuhpsuh....sshh....s....hhthlh....hhl....s.sllshhshl.h..lhs.h.ls.hhhh+ ...........................................................................................................................................................................................t................t.......t.t..t...s................................................sss.s..ts.......sss.......ss....ss...........s........p....................................pp...llp..su..slp..lps..ps...lspshsplpshspph..s.Gh.ltsps...............t..p..................tsstptpuplslRlPs.s..phcshlsplp.......ph.....G..pl..p.scshpupDVTp.......phh.Dlpu.RlpshptpppRlhplhp...cAp....slp-.llplcpcLu...plpscl-phpuphp.lppplshuTlslshpp......................s......ttt..........sh....tthh....p.u.ht.tuhpshh....thhthhh....hhh....s.h.hlshhshh.h..hhhhhh.hhh............................. 0 92 150 173 +14100 PF14258 DUF4350 Domain of unknown function (DUF4350) Eberhardt R re3 Jackhmmer:B0C323 Family This domain family is found in bacteria, archaea and eukaryotes, and is approximately 70 amino acids in length. 26.40 26.40 26.40 26.40 26.30 26.30 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -9.32 0.72 -3.77 163 383 2011-05-23 12:43:26 2011-05-23 13:43:26 1 4 379 0 148 374 7 71.10 21 18.23 CHANGED sssssGs.tuhtp....hLpp....p...Ghpl......pthpp..s.hs...th..s..s................s....ss.....ol....lllsss..ht..hst...tphp.pl...hpalp..pG.s..plllss ..........sspspGs.tAhtp....lLps....p....GhpV......phhps...s..hp...ph..s..s.................p..ss.....oL....lllsss......t..hsp......tphp.tL...hphsc..tG.scllls..................... 0 44 100 136 +14101 PF14259 RRM_6 RNA recognition motif (a.k.a. RRM, RBD, or RNP domain) Coggill P pcc JCSG:Target_421663_WS20613B Domain \N 27.00 18.00 27.00 18.00 26.90 17.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.50 0.72 -9.07 0.72 -3.99 76 8811 2012-10-02 20:46:34 2011-05-23 13:46:14 1 359 393 72 5363 52886 951 69.90 20 16.00 CHANGED lhlpsl.s....s.sst.pclhphhst....h.s.p.....l....p.slphh....t..p..............pst..Ahlph..s...stp.sAppshpp...h...p...hh..l..ps+hlc ..........................................lhlpsL..P..h......s..s..o....p....p....-.....l........t.....p....h....F.p.p..........................h..u..........................l..............p..sl...p..lh.....................h......s......t...t.....................................pu...h..........A.a.......V..........p....F.........t.................s..tc....p....Ap...p...A.l.pt......h......p....h......h...tsp.l................................................................. 0 1680 2599 3942 +14102 PF14260 zf-C4pol C4-type zinc-finger of DNA polymerase delta Wood V, Coggill P pcc Jackhmmmer:P30316 Domain In fission yeast this zinc-finger domain appears is the region of Pol3 that binds directly to the B-subunit, Cdc1 [1]. Pol delta is a hetero-tetrameric enzyme comprising four evolutionarily well-conserved proteins: the catalytic subunit Pol3 and three smaller subunits Cdc1, Cdc27 and Cdm1 [2]. 24.00 24.00 24.20 24.10 23.20 23.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.34 0.72 -3.97 123 623 2011-05-23 13:01:37 2011-05-23 14:01:37 1 15 346 0 405 635 5 74.40 35 5.48 CHANGED Cl....s.Cc.s.lppt................slCppC.t..spp....sslh.phhschpphEp+...hscLhshCppCp..............ushtpplhC.sSpDCPl.FYpRt .................................................................................Cls.C+s.lpptt......................slCspC..p....spp.....splh.phlsclppLEp+ascLhotCQpCp.................Gohc.p-VhC.sS+DCPl.FYhRh...................... 0 141 227 345 +14103 PF14261 DUF4351 Domain of unknown function (DUF4351) Eberhardt R re3 Jackhmmer:B0C7B5 Family This domain is found in bacteria, and is approximately 60 amino acids in length. 25.00 25.00 25.00 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.12 0.72 -8.76 0.72 -4.10 123 513 2011-05-23 13:20:27 2011-05-23 14:20:27 1 8 115 0 203 604 76 61.60 34 25.98 CHANGED p-Gc....pc....................tt....t.sLlLR.LsRRhGplssp......ppIpsLSlpQLEsLu........EALLDFsslsDLpsWL .............................................pGhpc............up....tpLllR.LpRRhG.plssphp....ppIppL.s.l.ppLEsLu........EuLLDFssl...pDL.tWL............. 0 62 162 200 +14104 PF14262 DUF4353 Domain of unknown function (DUF4353) Eberhardt R re3 Jackhmmer:B0P678 Family This family is found in bacteria and archaea, and is typically between 262 and 279 amino acids in length. 25.00 25.00 25.60 25.00 24.60 24.90 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.05 0.70 -5.18 51 568 2011-05-23 14:50:37 2011-05-23 15:50:37 1 12 299 0 137 543 7 204.70 25 61.88 CHANGED Glslsus....s.V.TIosuGTYhlSGohs..sGQIlVsA.sc...scpVpLlLcGssIosossusIhVps.AcclhlsLu-GTpNoloD....uupas........tsspssAAIaS+sDLTl.sGsG.sLsVsushssGIp..S+DsLhIsu.GThsl....sA.scculpGKDuVpI.....ssGs...lsl.sAssDGl+Sc.....N--D.....sscGhlhIsGGslTIsAG.sDGIpAssslhIsGG.s....lslss.......................uscGlcutt.l...slsGGslslssu...DD..Glpust.........................plplsGGsh..slsuuc...DGlc .............................................................................................................................................................................................................................................h.s...h.h.....t.......s...l....lt.u...t...tp...ul...sp.....s.s......lhlt.s..G.....s..h..pl.....p.u....s.....tcul.....pup....s....tlpI............psGs.....hsl....su.......s....s.......D...ulcus...............ttp.p...........................tt.G.h.lhIsG.Gsl....sl.........s....u...u....s......D.....G.lcA.........s.........ss....l.hIsuG.s..........ls..lps.............................................................u.cGlcu..t.slslsGG..sls.l.us...sD....ulpus.......................................................st............................................................................................................... 0 82 123 133 +14105 PF14263 DUF4354 Domain of unknown function (DUF4354) Coggill P pcc JCSG:Target416839_SP17692A Family Several members of this family are annotated as being ATP/GTP-binding site motif A (P-loop) proteins, but this could not be confirmed. The one PDB:3NRF structure solved for this family exhibits an immunoglobin-like beta-sandwich fold. Crystal packing suggests that a tetramer is a significant oligomerisation state, and a disulfide bridge is formed between Cys 125 at the C-terminal end of the monomer, and Cys 69. 25.00 25.00 25.80 29.00 24.90 24.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.57 0.71 -4.43 10 100 2012-10-03 03:07:29 2011-05-23 16:13:37 1 1 97 3 13 46 1 121.80 51 96.22 CHANGED MKp..thhluulALsuh....shsAsAsssDslhVaATppopGolSlGc+shYTKsFcVsVsNhucpsIDLsph..Ch+AauscG+cF+lDTVDEcLspGoLKsGpsVKGhAVFAS-scSVYpAshVKlSss ...........hKp..hhhhshluhsuh....sShApsAussslMlhATscopuus..SsGDKsFaoQTFDluVANsuuoDI...sLcKl...CFlAluscGKoFssDTIDpKLToGlLKsG-SVKGFAsFAusDcSlYcsplVKhS-.s....................... 0 3 8 11 +14106 PF14264 Glucos_trans_II Glucosyl transferase GtrII Eberhardt R re3 Jackhmmer:B0P6U7 Family This family includes glucosyl transferase II from the Shigella phage SfII, Swiss:O21944, which mediates seroconversion of S. flexneri when the phage is integrated into the host chromosome [1]. 25.00 25.00 27.00 27.00 24.00 21.20 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.36 0.70 -12.26 0.70 -5.43 56 371 2011-05-23 15:20:29 2011-05-23 16:20:29 1 1 254 0 42 303 10 302.20 21 64.26 CHANGED hhshlhsshhh.hD-....stsh.h....uhtsW..........hp...GR.hhphlhphh....hpsh....hsh.s.hstl...Luhlhluhuuh.ll...sp.hh...shp..........p...phh.sslhshh.h.hssPhhl.pph.uapasuhthululh.husluhhhh..........p.............pp...p..h.thlh.uslhlh.huhshYQuhhs...lalslhlhhhlhph.............l.csp.....tshpthhh.tlh....p.lhh..hlhuh..slY..hlhhplh..hhhh.psphssh..t..spht...h...tshhps.lhpshpph..hphht......t.sh.hhhh..hlhlhllhhlhh..hhhhhtpph.....p...t.h.thhlh.l.lhhhhhP..hhh.h.....lhlh...hs.s.......shhps.........hhshshs..h.hh..hhh.llh .......................................h.hhhtsh.h.lDD....t+tht..........uhtsa..........sp...uR.h.phlphhh....spth...hsh.shh.s.l.....Luhhhlulssllh...hh.hh.....stp..........p....shh..shlsshl.hhhs.PhFl.psh.uFpasu.hhululh.huhlshhhh..........p..................ps...p..h.hhlh.uhlhlh.hhhshYQushs...lalslllhh..hhhpl.............l.ptp.......htthhh..hhh....t.hhhhlhuh..lhY..hlhhph.........ss..ptsth..ts.tpht....h...........tch.ps.lhpshpph......hp.hht.........psh...hl..hll..h....h.llhllhh....hh.thhp.p......t.....hh..thhhh.l...lh.lhlss..lh.h...h......lhlh.....hsp........hhts.........h.tashsh.sh..hhhh..h................................... 0 14 20 26 +14107 PF14265 DUF4355 Domain of unknown function (DUF4355) Eberhardt R re3 Jackhmmer:B0P709 Family This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 180 and 214 amino acids in length. 24.00 24.00 24.10 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.81 0.71 -4.17 85 633 2011-05-24 07:17:12 2011-05-24 08:17:12 1 2 519 0 57 462 6 126.50 21 66.38 CHANGED ppp.pt...cphscp-.....lschls..............cphpchcpc.....pp.....................Eu...........c+...l.uchss.c-.......+.tc.....h-hcph.......pc.......cl...pchctphpppchp.....spspphLs-pul..sss........hhs..hl.........l..ss..DsEp....sppslcsh...pph....hs...pslpptlc.cp....l+us ........................................p......pphsppc......lsphlp...............cphpphcpc..tpct...................hp..Es...........ccl..sc.hst.cc........c.tp.......hchcph.......cp........-l.......pph....cspht....ppchp.....sps.pphls-tsl...sss........lls..hl.........l..ss..ssEp....scpslcsh...pph.......hpphlppthp.tthpt................... 0 17 36 47 +14108 PF14266 DUF4356 Domain of unknown function (DUF4356) Eberhardt R re3 Jackhmmer:B0PD80 Family This family of proteins is found in bacteria. Proteins in this family are approximately 540 amino acids in length. 25.00 25.00 25.30 26.40 24.80 24.30 hmmbuild -o /dev/null HMM SEED 488 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.85 0.70 -12.67 0.70 -6.09 10 232 2011-05-24 08:09:00 2011-05-24 09:09:00 1 3 178 0 31 192 2 426.50 34 89.65 CHANGED sps...hsasplAsRlLGsPp....Dcs-YaNpLacLpps.pshphhpppLcKpIsscc.h...QclpcIHs......lspcp...oVs+hlAaLsucpll....s+cssslh+Rpl+pAhhpVl..E.ha...+cp.sLs+s....c..s.....hlVcLls...Wlpsalsphhcshchc...cphPKVVWYG-sscSplYFLaaLhhlGCDVLlFHP..s.pscDsFpcsD.E.p.h.hl.hphs.sossLEPFPsEc..sRpuTV.AY+uo+ElEplLas.DSul.YKPWQF+casPhoVTLKTTYDELFllsKE+AhlRPsFcsscs.oVsIPNlFAKIsGVoc.DppEYWs+l+sLt.s.p-Tphlps.FP.FTcpp..puNap.aapcsls...csGcIcs-cLhpSslWpYpcLs-GlQcuIAcsIpchCcpPhlK...tE..p.spD.ltlalFsQlsplssslLcLIQsFDYoQslPKlVlYpo-psspLoRsDAssLlFLNclGlDIllYNPsGapsIEpYI-cstFDsHaL-EhlFsLpa+EsSt......ppll+KLF .............................................................p...htashlhsRllGl.p......D.s-YhscLachsp....s.h....t..t....h....hpt.pslsc..sIsscp.h....pc.l..t.l+p..............tt.pt........sss..ch.lupLssppLl....hpss..cLt+h.l.+psFhslL..cla.....ccp....p.ppl.....K..s.....Fhlchlp..h.ppalsphhpshshc..........cphP+IlaYG-..hp.csclYFLhaLhhlGCDVLYhpP........c.sc-sapp....l......Dpctp.h..hh..h...phs...tphslpsFP..ccc......p.RhuTl.AYpAo+EI-plLaptsShlYKPWQFcsass.shTLKTTYDElhllhcEcAhlRPsFhspsp..plhIPslFAKIsGVpp.spc-Yapclcslt.s..pso...h...hl......pshP..ascpt....psshp...pYp.ph..ls....................ptGplc.-hlhpSpha..appLspslQptIhctI.chhcp.hhh.............................p..p.pp-..htlhlhtpl.pls.plLc.lppFDYspplPKlllapspps.plo+pDuhlLhFLNplGhDlhhasPsGhssIE.aIp...tthaD.HhL-chsas.phpt.s...........hhptlF................................. 0 12 24 26 +14109 PF14267 DUF4357 Domain of unknown function (DUF4357) Eberhardt R re3 Jackhmmer:B0PED6 Family This domain family is found in bacteria and archaea, and is approximately 60 amino acids in length. There are two completely conserved residues (G and W) that may be functionally important. 25.00 25.00 26.10 25.20 24.10 24.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.51 0.72 -4.37 54 287 2011-05-24 08:19:50 2011-05-24 09:19:50 1 13 270 0 82 286 23 52.60 34 16.61 CHANGED RppLlssGll.tt.ps...sp.hlFscDhhFs.SPSsAAullhGpssNGhhpWK.stsGpTLc ......................phhtpthl......t..........tt.hhhscDhhFs.SPSsAA.shVlG.p.o.sNGhspWK..stpGpoL......... 0 37 65 72 +14110 PF14268 YoaP YoaP-like Eberhardt R re3 Jackhmmer:B0PGJ1 Family The YoaP-like domain is found at the C-terminus of the B. subtilis YoaP protein Swiss:O34983. It is found in bacteria and archaea, and is approximately 40 amino acids in length. The family is found in association with Pfam:PF00583. There is a single completely conserved residue A that may be functionally important. 25.00 25.00 30.30 29.10 22.20 21.30 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.11 0.72 -4.73 37 211 2011-05-24 08:34:33 2011-05-24 09:34:33 1 4 169 0 37 184 5 43.80 38 17.89 CHANGED PlphI+lcohEcApssPssFssaulFYsGcFlTscl..l.scppFcK ........hphIcl-ohcpApssPssaTsaulFYsGcFlTscl..h.spp+hc+.. 0 22 32 34 +14111 PF14269 Arylsulfotran_2 Arylsulfotransferase (ASST) Coggill P pcc JCSG:Target416597_Pfam-B_1234 (release 25.0) Family \N 27.00 27.00 27.00 27.30 26.60 26.80 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.97 0.70 -5.27 23 447 2012-10-05 17:30:43 2011-05-24 10:41:10 1 8 258 0 240 647 295 268.60 28 56.51 CHANGED +sQhLpspsVlsaWsGshht..hGaGaGtlplLssoYppIapVTlsss....aho..-.psh.SalDhHEupl.oscGTllV..ouhNlTptDLpslGG..p-sahhDuhhaEIDIpTNcllFcWSAl-Hlsplslp..........up..tslsssssspcpPa-haHlNSV...........spas-.sYLlShRahsSlahlp.ps.......GsVhWplpG.pG.GDFph..sss..FsaQHDsRlhppo.....c-shslSlaNN.......sNosh.......ststsTTGllhslDhp.s+psohh+plh........sspcsltSsoQGsaQlLss........uHlllsaGshs+.lcEaDssGplVhp...spFG .......................................................................................................................................h.......................................................................................th...h..c...hH-hp.....h...h...ssu.s.h.Lh......ss.h.p.......h...t.........D....l..s......s...h....GG.........s....p....p...G......h.........l.ssl.....h...............p..................El.......s...h...c....G....-lla-.Wpuh-Hl.s....p.c.h..................................................h.ts...h..........................s.....t.hcahHINul................sh.s.p.-......G....p...h.L.l.ShRpssulhhls...t..po.................G.cl........lWch.....t.......G....................................................................t..........s....h...u................Q....H.sschh............................ss..s..sIhlFDN..................us..h.........tssss.s..spu...h.....l..l..c.l.D..........p.........pths..h....h.cph.h..............................sss...h...hSs...GusQ.h.LsN...........................GNshl.......s......ush....ut...lhEhss-G..c..slhch...h............................................................................................................................................ 0 43 114 194 +14112 PF14270 DUF4358 Domain of unknown function (DUF4358) Eberhardt R re3 Jackhmmer:B0PFB1 Family This domain family is found in bacteria, and is approximately 110 amino acids in length. 25.00 25.00 25.30 25.30 21.90 20.60 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.17 0.72 -4.16 55 201 2011-05-24 09:45:12 2011-05-24 10:45:12 1 3 148 0 17 159 5 106.10 19 57.92 CHANGED pplppsss.....h.p..phpchssptlcph..a.slssschcshhhhtust..sspssElhll+sp-sp.scsVcsslpp+lpsppps.acsYhs..-..phphlcsuhlpp..cGsYlhhllup ................................t....tsh....h....phpphs...sptlpph..a.uls.sthpshhhhhsh...shpss-lhlh+sp-tcth-slcpslppphcspppp.apsYhs...c..phphl.csutlps..cGsalhhhh............. 0 14 16 16 +14113 PF14271 DUF4359 Domain of unknown function (DUF4359) Bateman A agb Jackhmmer:B1XKA9 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 130 amino acids in length. There are two completely conserved residues (P and S) that may be functionally important. 27.00 27.00 30.00 29.80 26.20 26.30 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.16 0.72 -3.67 39 182 2011-05-24 11:53:38 2011-05-24 12:53:38 1 2 174 0 36 127 19 107.00 39 71.17 CHANGED usshshTNPupp-YppaAuppLsph..hpp-lCt...pphshhLt........hhpsCspLlsst..pstltpllsptTpRpNahlFSlYpT-lshps.......hstaphp..TlGlhspFhshpsp .h.lhLA.oNPo+s-Yp-aAucphhpp..ls+clscs..csppu.hLss.......lsussc+Lscph...scPplshlI-phT+RssYlhFSsYpTEaclss...............pY+al..slGhuphFlsl-h.s... 0 8 23 32 +14114 PF14272 Gly_rich_SFCGS Glycine-rich SFCGS Eberhardt R re3 Jackhmmer:B0P5U8 Family This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. There are a number of highly conserved motifs including an SFCGSGGAGA motif. 27.00 27.00 144.60 144.40 26.40 25.20 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.43 0.71 -4.21 9 314 2011-05-24 12:25:02 2011-05-24 13:25:02 1 1 314 0 37 103 2 114.80 81 95.98 CHANGED lpVVIGDRLGKGppVAKGVEpAGGpAlVIPGMGADMKLGDVMppEsADlGISFCGSGGAGAlTApTKYGY.s+aGMRSV-EGVTAIp-GppVLGFGFMDpEELG+RlsEAahKKa ...ITVVIGDRLGKGQKVAtGVEpAGGRAVVVPGVAADMKLGDVM+uEsAsFGISFCGSGGAGAITAQsKaGYKAKYGMRSl-EGVTAINEGssVLGFGFMDKEELGcRLVpAapKKa.. 0 4 9 23 +14115 PF14273 DUF4360 Domain of unknown function (DUF4360) Bateman A agb Jackhmmer:B1XRI1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 200 and 228 amino acids in length. There is a conserved GCP sequence motif near the N-terminus. 27.00 27.00 33.10 32.00 26.40 25.10 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.37 0.71 -4.63 14 198 2011-05-24 12:27:51 2011-05-24 13:27:51 1 4 87 0 146 193 13 174.30 29 79.62 CHANGED sspplpIhssshuGoGCPpGosus..slSsDpT....hhohuFDpF.s.lGs...Ghs.ss-ppKNCpLclsLp.aPuGFQaull-usY+GaApL-tGlTGohhooYaFSpssspsss...........oppohpGs...hpG.sYshp-plsssuhlaSsCG...ssu.Ls.INsplsL...Tu.ssusssGphopDsssls...hsQplpltW+.......sCs ...........s.stplpItssshsGoGCPpGosss..slosDpo.....shTlta...s.pahAp.hGs...sss.ss-pRKNCQLslslp.hPsGa.pau..lhs.s-Y+GaA..pL..ptGsoG.s.pusYYFsG.tsppss...........sppshs..GP....hss.saphpDpssh....s.s.....hhWSPCG......sps.LN..lNsplpl.....su...ss..tp.s...tuh.hs.Dshsss...hpphhphtWppC................................................................................................................ 0 54 101 127 +14116 PF14274 DUF4361 Domain of unknown function (DUF4361) Coggill P pcc JCSG:Target_416718_SP15308B Family \N 25.00 25.00 27.20 26.40 24.90 23.40 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.08 0.71 -4.58 20 68 2011-05-24 13:04:20 2011-05-24 14:04:20 1 1 29 8 8 59 0 161.40 39 48.06 CHANGED YpKALL+lhPFNDYSGsYSuTsh..pla..h.sGs......spshstss+puaVVD-pTIFFYAGhlsE-h..pDRcpYKlhhpF.........s..pc.........t......slplpss.ssssslpFcl..........hs..psoYplspphDss+PYLc++alolp.lsYcasDhTos....Puh.lpY+VcGohoMpRpI.NTpIPD ..YpKALLRlhPFNDYSGsYouosh..pla..hpss......ssshspss+pu.aVVD-pTlFFYAGhhsE-h..pD.RcpYKIhhpF.........s.s-.................s......slslpss.sssschpFcl...........hs..sPoYplspphDsspPYLcH+alsIp.hsYpasDhTos.....sh.lpYcVcGolohpRpl.NTpIPD.......................................... 0 7 8 8 +14117 PF14275 DUF4362 Domain of unknown function (DUF4362) Eberhardt R re3 Jackhmmer:B0PHJ6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 93 and 146 amino acids in length. There is a conserved IRIV sequence motif. 25.00 25.00 25.40 27.70 22.90 24.30 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.32 0.72 -3.95 8 109 2011-05-24 13:06:54 2011-05-24 14:06:54 1 2 92 0 9 83 0 89.60 44 60.27 CHANGED KKNDVVVK.GstISNLDKFEpFVlNV-QGcVDKIRIVpYTcEGDPIFQTLEaSGpDIlYV.DNRpschhAGcpKGLaKDSCKSIVK.EQREsposYRLI ....................psD.ll.p.tst.l.Nlc+h-pFlhNl-pscs...DcIRIVpYTpEGDPIFQsL....Easu....p....cIhYshDsRcD.p.FsG.cpKsl..h..KDSCKpIVK.cp+EstssY+Lh............................... 0 5 8 8 +14118 PF14276 DUF4363 Domain of unknown function (DUF4363) Eberhardt R re3 Jackhmmer:B0PHJ0 Family This family of proteins is found in bacteria. Proteins in this family are approximately 120 amino acids in length. 26.40 26.40 26.50 27.50 25.30 26.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.58 0.71 -10.21 0.71 -4.41 34 185 2011-05-24 13:21:49 2011-05-24 14:21:49 1 1 150 0 49 144 2 115.50 22 93.11 CHANGED hlslhlhll.llhhshastphltpsspclppplsplpptlcp.pcapcAtpphpch.ppWpcppphhslhlsHpElDsIshpls+LppalppcspspuhuplptlKhhlp+lhctEphslpNI .............................h...hhlhhl.llhhshh..phltp...t..s...cplp.pp...lsplppplpp.ccWppAtpphpclpppWpchppthshhlcHp-lDp.lshslt+lppalpscscstuLuplphl+hhlppl.p.p..plpNI............... 0 26 44 47 +14119 PF14277 DUF4364 Domain of unknown function (DUF4364) Eberhardt R re3 Jackhmmer:B0PBH9 Family This family of proteins is found in bacteria and archaea. Proteins in this family are approximately 180 amino acids in length. 25.00 25.00 25.00 26.20 24.80 24.60 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.74 0.71 -4.63 61 224 2012-10-04 14:01:12 2011-05-24 14:37:29 1 1 223 0 42 169 12 161.20 32 91.48 CHANGED hKLllLYhLc+lchPLoNsQlochlL-psassYFpLQpslsELhcushlphc..pcspshYplTcpGccsLphFpscIspslcccIcpalp.p.p.hph+c.-ss.lpuDYh..p.sssspYh.VchplhEs..sssLl-LpLsVPsccpActlCspWcpp..sp-lYshllshLh ..............KLllLYlLc+lchPloNsQlochlL-.ppahNYFpLQQhLsELhcushlphp..tpspp..........hYplTccGccsLphFts+Is..tshhcclcpalppp.t..pl+p.Esplhu-Yh..t.pssspah.VphplhEs..spsLl-LplsVsoccpActICspW+pp..up-lYshllptLh......................... 0 27 38 40 +14120 PF14278 TetR_C_8 Transcriptional regulator C-terminal region Coggill P pcc JCSG:Target403231_MJ9673J Pfam-B_17743 (release 25.0) Domain This domain is a tetracycline repressor, domain 2, or C-terminus. 27.10 27.10 27.10 27.10 27.00 27.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.33 0.72 -3.60 44 1050 2011-05-24 13:50:59 2011-05-24 14:50:59 1 4 678 0 160 662 5 77.60 21 39.73 CHANGED thh.hpplhphltcppchhphlhstp.tsssa..hpplpphhpphhhphhppthhptst........hh.hpahsuGhlullppWLp ............hhpplhpalt-Npcah.+sllpsp..tsspa....pp+lpchh..p.pp.h.h.p.h..h..s.htp.ptsh................hh.hsahsuuhlulIphWl.............. 0 55 104 138 +14121 PF14279 HNH_5 HNH endonuclease Bateman A agb Jackhmmer:B1XMH2 Domain This domain is related to other HNH domain families such as Pfam:PF01844. Suggesting that these proteins have a nucleic acid cleaving function. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.95 0.72 -9.55 0.72 -3.97 15 136 2012-10-05 18:28:12 2011-05-24 16:05:27 1 1 131 0 21 135 168 68.00 50 23.01 CHANGED CIlC+c-hscpshs..-EHVIPsoIGG.....ph+hps.lCcpCNscLGpslDsplscphh......phhpthhcIcpc+Gpss ............CIICR.....KDTKE.LS.......EQ.aVIPEILCG.....aYF.T..NS...I...CD..oCpEpho....TNIDRPLIRHKLu...........hKIEpMKtp..hp.s.................... 0 5 11 15 +14122 PF14280 DUF4365 Domain of unknown function (DUF4365) Bateman A agb Jackhmmer:B1XQ96 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 182 and 530 amino acids in length. There is a single completely conserved residue D that may be functionally important. 21.90 21.90 21.90 22.60 21.80 21.60 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.69 0.71 -4.46 68 263 2011-05-24 15:26:30 2011-05-24 16:26:30 1 15 243 0 94 248 14 142.10 19 38.93 CHANGED pptuhshlpth........hsph.salhc...psptDhGlDu........lEl....hs...ssp....soG..thls...VQlKuspshh.................psstshphthp........ppchsYWhp.tslPVllV..lhssssp........pha......Whplppthh.........sppptplplspss.hhsspshsplhphsttt ...........................p.u.shhpth........hspt..shhhp....ptptDh.G..lDsh.....lch.............hs......sup.............ssu.....hhls...VQlKsss..shh.........................ssssshsh..hp........spclsahhp..pslPl..lLV...lhssssp................psY...............Whplpspshp....................sppphplplshpp..hsstshpplhp.h..sh....................................... 0 24 59 79 +14123 PF14281 PDDEXK_4 PD-(D/E)XK nuclease superfamily Bateman A agb Jackhmmer:B1XMR1 Domain Members of this family belong to the PD-(D/E)XK nuclease superfamily. 21.80 21.80 21.80 21.80 21.60 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.33 0.71 -4.42 92 355 2012-10-11 20:44:47 2011-05-24 16:36:41 1 7 324 0 99 367 61 175.10 18 46.51 CHANGED hNlFplhph..........EhtpSshluaLLsPp...ts.........HshsshFlctal.chhttptt..p...................................hpshpVp+E...............ttp..+lD..lhl......pssph....hllIENKlh.....up-pp....................s....QLpcYhp.....hlppch..................................tttphhhlaL............psssppshss..............................t.tahtlsa.......pplhpaLcphhpp.......tttpthpthlppYhp ............................................................................shaphht.............E..pothlshhls.p......tt...........Ht....t..t..a..lphhl.chhttp....t.......................................hpphplppE...............................tttp...+lDlll..........pssph...................hllIENKlh.......up-pp........................s..........QLpcYhp.........hlppph..................................t.pphhh.laL..p..t..............ppssp.shtp...........................................tahhhsa.......tplhph.lpphhp.....................h..................................................................................................... 2 34 64 86 +14124 PF14282 FlxA FlxA-like protein Eberhardt R re3 Jackhmmer:B0PBA7 Family This family includes FlxA from E. coli, Swiss:P77609. The expression of FlxA is regulated by the FliA sigma factor, a transcription factor specific for class 3 flagellar operons. However FlxA is not required for flagellar function or formation [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.11 0.72 -11.15 0.72 -4.09 18 424 2011-05-25 07:16:36 2011-05-25 08:16:36 1 2 376 0 79 227 8 98.60 31 75.65 CHANGED SulS..uossSoo.tsusss....supIppLppQIpsLpcpLpcLsss...psh.os-cK..p...pQpphIQsQIptLQAQIuQlQpQpupcsppp....pppsh.p.....spssss-GsNp.Pos.ssp ....................ssh......potsSsp..tStsss........supIsclspQIppLoppl.p.clss........suh.os-pK...p...cQtpLlQpQIphLpsQl.....u...QL.QpQp.A-Ktpcp........ppt..........................h....................... 0 12 40 57 +14125 PF14283 DUF4366 Domain of unknown function (DUF4366) Eberhardt R re3 Jackhmmer:B0P8U4 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 227 and 387 amino acids in length. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.74 0.70 -4.92 37 374 2011-05-25 08:16:00 2011-05-25 09:16:00 1 13 188 0 39 339 46 197.90 28 65.93 CHANGED Rhh.u.A.LsAu.l.lls.GFossAaAtGs-ssst........hssss.t......s....c.......pcstslTP-GNhsLVDDh....sst..ssKQFITlsTKuGNhFYllIDRs.sc.s.c......s.VHFLNtVDEADLhALhE-tpst..s.............................thtps.........pspst.p.........p................................cP.Ec.................c.s..shus.....lhllLllu.....l.hGG.GAh.aYF.Khh+sKpppp.s...ssDh.--.hD.hs-..........-p.pp-Dtss.s-- ......................................................................................................................t....................................................t.s.h.o.spGs..h..t..hhDch.....................t....ss+QFIThpTKsGphFYllID+spp.sc......s.VahLspVsEsDLhsh..hEctptt.t................................t...phpcsst......t.psc..p.tp........tp......................c...Ec............pu..shGs...hl.ll.llls.h.suu..GAh.YY.FKlhKs.Kpc.pp.t.....cpD.h.-p..h-...s-.....p......-p.-pp-t.t.t..pp............................................ 0 25 36 39 +14126 PF14284 PcfJ PcfJ-like protein Eberhardt R re3 Jackhmmer:B0P892 Family The PcfJ-like protein family includes the E. faecalis PcfJ protein Swiss:Q5G3N2, which is functionally uncharacterised. It is found in bacteria and viruses, and is typically between 159 and 170 amino acids in length. There is a conserved HCV sequence motif. 25.00 25.00 25.00 25.00 24.70 24.90 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.01 0.71 -4.76 32 550 2011-05-25 10:31:24 2011-05-25 11:31:24 1 2 378 0 44 395 28 152.40 26 36.54 CHANGED hWpDYlchhcchtp.Dlp..ssthlpPpsL+ttHDchhtchpthpcccctpchpc+hhc.pthh.c.lKt.+a.....pFoDsplhlpVhcSlp-hhpEGpsLHHCVs...s.ptYh...t+tcohIh.hRhcsc.hE...TlEl..p.csh..VlQsRGhpNp.....sschp-cIhphlpp.tphIppRh ...................................................................................a.Dhlphh..h...t...hp..p.....hhP...s..h..tHD.h.t..php..p...........p.p.p.tp...........p.....ch..p....................hp....h.................hp.s.th.....hhs.cohpEhhpEGpthp.HCVu............u..p.Yh....tptco.hIhShR.h..p.s..p..h..c........TlEl.........s....ps......h....plsQh+GhpNc.....p..p.chtccllsllpp..t.h..................................... 0 18 35 42 +14127 PF14285 DUF4367 Domain of unknown function (DUF4367) Eberhardt R re3 Jackhmmer:B0PHJ7 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 229 and 435 amino acids in length. 25.00 25.00 25.10 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.12 0.71 -4.93 52 440 2011-05-25 12:10:07 2011-05-25 13:10:07 1 11 247 0 86 388 10 174.10 15 56.36 CHANGED hhp.+htshhhsshlh...hhss.........sh..ssp..Ahpttlh..p.hl.hphhs...phsphphpspptp............................c.....phthp..hp.s.......YlP...cGaphp..........p....hp...hss...t....hp.....hh..haps...sss.phhpa.pppth..p...s....tsh.sh...ssE....ss....ph..cplpl..s.G.......hp.uhh.hc..p.ps.........pph.......lhWpppshha...pl..t.......us.ls.......p-EllKlscsl .......................................................................................................................................h..........hh.h...................h...........t.....hh..t...h......t...............h.hp.stt................................................................................tpht..t.htht..hhtss.hlP...c.G.hphp.....................................s..............hps...s.........hp...........hh..haps....p.pt..thhhh.t.tth...s....p..........tss..sh.ssp.....ts.......ph..cphpl...s.G........pc.uhl...hp....pps.............pts............lha..pppshhh...pl..t........usls...........c--hlclscol.................... 0 50 74 77 +14128 PF14286 DHHW DHHW protein Eberhardt R re3 Jackhmmer:B0P7B6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 366 and 404 amino acids in length. There is a conserved DHHW motif. 25.00 25.00 25.40 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.30 0.70 -5.14 8 368 2011-05-25 12:28:32 2011-05-25 13:28:32 1 3 220 0 48 338 15 202.40 15 68.82 CHANGED EKRcLAcFPsFShpuhhDGSYFcDlupWYuDTaPhRDtFluhsAshcpLYGlch.sssclhussstptpDts.-sspsssspsh.s..sssspsso..........................sSsspsuospsss.sps.ps..sDsss.pshh-t.........shhpsphtsslalhsspuapLYsFsp-sucpYAuhlNshspcLs.sls.VYDMllPTuhslhLP-s.hpc.h.sousQcpAIsYhYuhhscsVKpVslY-pLtsHs-EYIYFRTDHHWTALGAYYAYpsFscstGlsshsLScacKc-hssFlGSaYutTpps.uLcpNPDTlpAYlPhsTNshchhss-us..pachlh.s.s..casuuspYSsFlGGDsslscIpNPslpDGSsllVlKESYGNAFlPFLV-HYppVYVl ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.h.t..........t.h...t........lph............t.....h.....t.......t.....t.c......l...Ya+TDHHWs..G.....A.......ahuap..........hh..........p....t......h.......t............p.............t........p.........p....h..p.........................t......t..h........hGs........h.tt...........h......t..-...h.....h.h........t.............t.......h.........................p................................t..........h....h......p...........t.........ts...Y.t...h.ah.ts.s..s.h..l...ps.....t..........s...........t.....p..hllh+DSausshhshhh..atph............................................................................................................ 0 32 45 46 +14129 PF14287 DUF4368 Domain of unknown function (DUF4368) Eberhardt R re3 Jackhmmer:B0PC15 Family This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF00239 and Pfam:PF07508. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 25.00 25.40 24.90 24.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -9.09 0.72 -4.16 35 694 2011-05-25 13:00:02 2011-05-25 14:00:02 1 13 232 0 54 600 82 68.50 34 13.31 CHANGED pplsphcppptcs-+FlsllcKYsshpELTsshlsEhl-KIlVHcs-+..psu.pRpQcI-IYasalGplchPp ......................t.plsp.pppssssc+.Flpllc+YsshpELTsshlNEhI-KIlVHEt.....pc.....css..p+......pQcIEIYasFlGphp.............. 0 36 50 52 +14130 PF14288 FKS1_dom1 1,3-beta-glucan synthase subunit FKS1, domain-1 Coggill P pcc Jackhmmer: Family The FKS1_dom1 domain is likely to be the 'Class I' region just N-terminal to the first set of transmembrane helices that is involved in 1,3-beta-glucan synthesis itself [1]. This family is found on proteins with family Glucan_synthase, Pfam:PF02364. 27.00 27.00 33.00 28.20 24.40 26.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.62 0.71 -4.18 84 502 2011-05-25 13:11:19 2011-05-25 14:11:19 1 19 190 0 335 501 5 109.40 43 6.37 CHANGED pcplhplALYLLIWGEAsNlRFhPECLCaIF+...hAh-.......h.t.hps.......ht............tts-tsFLsplITPlYphl+spsh.......c............htpct..c.........HsphhsYDDlNphFW..tscsht+lths.p .........................cclhpluLYLLlWGEAsplRFhPECLCaIF+.............hA.-....................hht..ts......h.p.............ss-tsaLspVITPlYphl+spshc.................................hctcpc......HsphhsYDDlNphFW.scsht+lsh...p................................. 0 90 208 296 +14131 PF14289 DUF4369 Domain of unknown function (DUF4369) Eberhardt R re3 Jackhmmer:A7UYJ0 Family This domain family is found in bacteria, and is approximately 110 amino acids in length. The family is found in association with Pfam:PF00578. 25.00 25.00 25.20 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.45 0.72 -4.02 232 931 2011-05-25 14:05:07 2011-05-25 15:05:07 1 13 192 0 223 886 222 108.30 20 31.00 CHANGED hsl...su.s.....s.pp.....t.........s....aplpGplssh.......ss.....splYL..hth....ps...st...h....s.....lDSstl.p.s..G.pFsFp..s.p..h..s.p..s....phhhl...............hh.p....s............tpp.........hhs.......hhl-.sG.p.lp.lps...........st..t...p...........hp.l..sGo.tt....scphp ....................................h..hsuC...spt......t................p....aplpGplpsh.....cs..........splYLtth......ps...st....h......s................lD...ostlp..s..G..pFsFp...s.s..h...s.p...s....phhhl................ps..................tpp...................hhs.........hhl-.sG.p..lplph.............stt.p...........hpl..pGo...scth............................................... 0 117 201 223 +14132 PF14290 DUF4370 Domain of unknown function (DUF4370) Coggill P pcc Jackhmmer: Family \N 25.00 25.00 169.20 169.00 20.40 19.40 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.50 0.70 -4.89 7 36 2011-05-25 14:53:12 2011-05-25 15:53:12 1 1 20 0 17 31 0 219.80 54 96.89 CHANGED M.....c+.hhtlRslhRuAshtSup.uuh..tpttl..cpth.tpRohhsLsusu................ttlsu-hp..sshuhuhGspRhFSEDVoHhPsIpDPcl.sAFKDLMAASWsELPsullp-AKpAlSKNTDDKAGQEsLcNVFRAAEAsEEFGGlLhoL+MElDD.lGlSGENVKPLPs.htsAl+TsapRYssYL-SFGP-EsYLRKKVEhELGoKMIHLKMRCSGLGuEWGKVTlLGTSGLuGSYVEQRA ................................................h............hR..shtu.t...h...t..h....h....tshssLsps.....................thsu..t..us.uhShs.RRhFSos.pHLPsIpDP-lcsAFKDLhAsSWsELPDSlVp-AKKAlSKsTDDcAGpEALcNVFRAAEAsEEFuGl.LVoLRMtLDDLsGLoGENVtPLPsalccAl+ouYpRYhsYL-SFGP-EsYLRKKVEsELGoKMIHLKMRCSGlGuEWGKlollGTSGluGSYVE.RA.... 0 2 10 14 +14133 PF14291 DUF4371 Domain of unknown function (DUF4371) Coggill P pcc Jackhmmer:Q9C842 Family \N 24.80 24.80 24.80 24.80 24.70 24.70 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.41 0.70 -4.82 19 987 2011-05-25 15:07:50 2011-05-25 16:07:50 1 37 67 0 777 904 0 167.60 21 32.17 CHANGED -uFVspGapsa.p..c+pR..hcpHlGp.l.sSsHp.ApcKh-shhppppsIspshpppocptKtpYhsRLshSIcssRaLL+QGLsFRGHDESc-ShN+GNFlEllsalAcp.c-lp+llhpsuspsshhss.pIQ+plhpshApcspp.Ih--lGsshFulLsDESpDsspKEQhAlsLRYVDKpGp........VhERFlGlVHVp-TTSssLKsAI-uLLscasLSLpplRGQGYDGASNM+GchNG ................................................................................................................................................................................................................................................................................................................................................h....s......................................................................................h......th.h.t.hh...t..................h......h...h......................................................h...p......................p.......h.pp.l..phh.u...pt.l..h.p...lh...pp..l...p..s.....p....h...F...u.l.h.hDcstD.h.u.p.p.c.Qhsl...hlR.a.l....p.t.p.t............ltE.cF..l...shht.h...p...p...............t...............s....u..tl.hphl......p...h..l.t..p...h.sls...h......pp..hh..upsa..D...suush.s............................................ 0 218 578 708 +14134 PF14292 SusE SusE outer membrane protein Eberhardt R re3 Jackhmmer:A7V649 Family This family includes the SusE outer membrane protein from Bacteroides thetaiotaomicron, Swiss:Q45769. This protein has a role in starch utilisation, but is not essential for growth on starch [1]. 25.00 25.00 25.00 26.10 24.90 24.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.71 0.71 -4.29 45 304 2011-05-26 07:22:16 2011-05-26 08:22:16 1 4 133 0 53 283 22 120.30 19 29.15 CHANGED lhhh.hhhhsuCpcDp..phtsh...psps....s.pLthsssssslsLst...us....s....A..lphsWssAshtss..s.slpYslphsttss.sFuss.lshs.psss...tpphohTspcLNs.l.h.s.p.h.Gl.tsspsssl..phclpu ...................hhshhhhsuCpc........Dt..p.ssh...p.ss.....shsLstss.s.s.spl.sLst...tss..s...u..lphsWop.s.....s......hsss.............s........s.....l..s...Y.........plp......hsh.....ts.t......sFsss..hths..pstt........ptphshottpLNp.h.h.s.......p.h....sh....tss......ptspl...hpl......................................................... 0 23 47 53 +14135 PF14293 YWFCY YWFCY protein Eberhardt R re3 Jackhmmer:A7V7D7 Family This family is found in bacteria, and is approximately 60 amino acids in length. There is a conserved YWFCY motif. It is often found in association with Pfam:PF02534. 25.00 25.00 28.40 27.40 24.00 23.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.62 0.72 -8.72 0.72 -4.13 13 298 2011-05-26 09:50:59 2011-05-26 10:50:59 1 4 115 0 29 252 8 59.80 61 10.31 CHANGED QpEDDLRALAKIMDFhRAlSIlllllplYWaCYpuh+tWGlslsVlD+ILhNFpRTuGLFu ......QQEDDLRALAKlMDFhRAlSIlhlllNlYWFCYpuhctWGlslGVlD+ILhNFpRTuGLFp... 0 13 26 29 +14136 PF14294 DUF4372 Domain of unknown function (DUF4372) Eberhardt R re3 Jackhmmer:A7V8G9 Family This domain family is found in bacteria, and is approximately 80 amino acids in length. The family is found in association with Pfam:PF01609. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 26.40 25.00 23.90 23.10 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.26 0.72 -4.10 46 284 2011-05-26 12:17:13 2011-05-26 13:17:13 1 3 143 0 79 290 118 72.30 32 23.51 CHANGED ppupslFuQllchls+ppFccllc+.apG-ptl+pFosasQhlsMhauQLotp-SLRDIpssLpuppsKLYHLGhpp ...........psphlFuQllphlspppF.pphVc+..apuc+asKpFosasphlsMhauQLoppcSLR-lpssLpupp.t+haHLGht.t............ 0 33 60 73 +14137 PF14295 PAN_4 PAN domain Coggill P pcc Jackhmmer:Q2FN86 Domain \N 27.00 7.00 27.00 7.00 26.90 6.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.70 0.72 -9.18 0.72 -4.25 64 1029 2012-10-02 11:41:37 2011-05-26 14:20:17 1 128 214 14 794 1625 53 47.50 22 13.28 CHANGED sDh.GsDh.psh..h.......s.s.......s..psCtttCp.psspCp..uaoasp...............s........s....upCaLK ........................................ph.G.s..h..tt...h......ts.s........................s.spsCpphC........p....s........s.....s........s....Cp..hao.ass.........................t...........t......tp..CaLK................................... 0 389 539 755 +14138 PF14296 O-ag_pol_Wzy O-antigen polysaccharide polymerase Wzy Eberhardt R re3 Jackhmmer:A7V9M7 Family This family includes O-antigen polysaccharide polymerases [1]. These enzymes link O-units via a glycosidic linkage to form a long O-antigen [2]. These enzymes vary in specificity and sequence [2]. 25.00 25.00 25.30 25.10 24.90 24.20 hmmbuild -o /dev/null HMM SEED 447 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.71 0.70 -12.83 0.70 -5.90 29 375 2011-05-26 13:28:48 2011-05-26 14:28:48 1 3 336 0 31 222 10 415.20 39 91.97 CHANGED ls.h.hha.llshhh......sshphlh...hslh..hlhlh.shlas.h.........pphphh.hhh...hFhhshFlFlhup.h.lshh.....h.s.h..........hshp.ahp.phhh..ps.....hhlhhluL...lhhalGhll...hppp..phph...pptptpp....pph...hpthphl..shhlha.lshl..th....lht.hpplhhhtssuYhuhYts..htpp......h.s......t.hhhhhu..shhhhu...hhlaL..sohss++c..hhh.hhlalhhtll..slltGpRs.hlhslLhlhhYahhpph............hlshhphhhlhlh.sshlhhh.........hshhshlRsp.............p..htt.shh.sslhcFhhsQ..........GlShtlluh.........shphp...splstpt..sYsh..hhph.h..t.l.tt.lhuh.hsh..tspshc.sh.ssshucplsY..hl.ssstYLtGhGhGoSal..AEhYhDaGhlG...lhlhshl...lGhl.l.....thhpp..hh..pps....hhh.hhh.shhhlsslhahPRushhs.l..hhhhhhhhhllhlh .........................................................................................................hl...h.h.hhhhh..........sp...ll....slh....hl.l.ls..sllhs.h..........scl+hhhl.aa......lFhlolFlFL.lo.R..Ps.lsYF.........t.s..s...........ulc.s...Yps......shp....aA.....alllhlSl...LGlshGulL....hs++...+.lKh..tshusshc.....csa.....lKpL+hl....S.LslFl.Lsa...s..ah.......hlc.ap......cLla.p..lp.......so..Y.hAhYss...acSp................L.P.....a.FshhLS......saslhu....hshYL....uo+P..+K.hp......u.ptl.......L.ls..alssssl....pLslG.oRss.FILoILhsFlYY.ahRcp.........+........tKaIuhKcplsIalu.usILhl.u.........MGl..l.sYlR-ss.................p..loas.uhh..-lllDFlYcQ...............G.sSau...V...Lup.........uhhas....spLPhcc...NaTaGsll-a.Fspus.L.us.IFus..pu.h....hsssSl-lulcuNSaAHsLSY...ll.lsc.cY.L.pGa..GlG...SS.....Y...I..hE....lYs.DaG..hlG....VFL.lSF.L...LGlL.l.....uhLps.....sh.....+..s+....sIl...hsl...uL.llL..ssLF.F..hP..RuSFop.sahsLhshpFhslllllh........................................ 0 12 22 27 +14139 PF14297 DUF4373 Domain of unknown function (DUF4373) Eberhardt R re3 Jackhmmer:A7V2U9 Family This domain is found in bacteria, eukaryotes and viruses, and is approximately 90 amino acids in length. 25.00 25.00 25.70 25.50 23.40 24.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.77 0.72 -3.69 44 354 2011-05-26 15:13:09 2011-05-26 16:13:09 1 4 157 0 42 290 6 91.60 23 32.64 CHANGED YFsh-sshhpDpplphlht+.h.G.hcGhuhhhhllpplhp.pssY..hsshpt...h.......ph.hApch..s..hst..........splcpllp......-a...sLF...s...hpp....p..t..hpS..t..lp .........YFshssshhpDtclctlttc.a.G.hpGhulhhhlLsp.l.Yc.psta..hh.hcp....h........hhlupph...s..hst...................ctlppllp......ch...sLFs...hpp......p..t.lloSttl............................................ 0 14 33 41 +14140 PF14298 DUF4374 Domain of unknown function (DUF4374) Eberhardt R re3 Jackhmmer:A7VA08 Family This family of proteins is found in bacteria. Proteins in this family are typically between 406 and 466 amino acids in length. 25.00 25.00 27.00 25.50 24.90 23.80 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.65 0.70 -5.90 8 125 2011-05-26 15:24:25 2011-05-26 16:24:25 1 2 81 0 32 115 0 283.90 23 80.97 CHANGED oSCoDs-s..P.....usus..ssssKusYVIAuosssSsussshLLTAESLDEGolSolsNGL.ssDGuT.WVFYcspYLYuLsYNQGsAGTTpSYILsuNsclcpRstoYslpRFTTYGIYscaIlTsSTGDGspEhsDpNGYlPpuFLlSYLDVscpThTTNs.ppppshLuENFLGNGEaVTLAGIlEsNsKlYouAIPMGLSpYGsp..s-sGt..hlLssspDLVKTEsGGSGSGuYcKGEL.WTQYPNcsaVAIasssoFsscKlI.cTDKISYACGRhRSQYYQTIWuADNGDlYVFSPSYAKTM........oDt+QQT.TLPAGVVRIcAGuE-FDssYYsNLEppouG..+uFlRCWHIosDYFLLLMYD+.....s....lTt.TuhsAsELAlFKAEspKLTYV.oGLP..Ss.lISGFGNTPYsENGhAYhAV.TTT-G.pPAlYKIDPsoAoATKGloVE .................................................................................................................t..........................................................................................................................................................................................................................................................................................s.asspsalAlas...........shp.p.ll.pss.+hu..hhsuh..h..h.hp.tlh.hs-sGD.lYlF.S.suhsth...........ts..t.o.phPuuhhRIptut......p-FD.sYahshpt.sss.....t.....hhphhalsts.h.Fll.hh.sp...s..........hst...ps.....hsspp.h..u.l..h..cs....t.stp.hh.l.pGlP...st......s...s...h.u..t...t...s..h...s.-..p..G...h...sYhsl..ssp...pG......s.h......lYplDstsupAs+Ghpl.......................................................................... 0 10 31 32 +14141 PF14299 PP2 Phloem protein 2 Coggill P pcc Jackhmmer:Q9C8U9 Family Phloem protein 2 (PP2) is one of the most abundant and enigmatic proteins in the phloem sap. PP2 is translocated in the assimilate stream where its lectin activity or RNA-binding properties can exert effects over long distances [1]. 25.00 25.00 26.70 25.50 24.00 24.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.04 0.71 -4.83 88 511 2011-05-26 16:48:35 2011-05-26 17:48:35 1 14 50 0 261 487 0 150.10 27 61.38 CHANGED Gt+CaMLSARs.LsIsWu-s...sp.YWpWhsh..............spSRFtEVAcLhsV.sWLEIpG+lssphLSPsTpYusYhlh+l.............s-p..saG......h-..h..Pl...............c.h.....slsh.....................................................s..ss.............................................p.p..p.....................p.p.php...........h..p..............................................cRs......DGWhEl.-lGEFh....s.s..p.....s.....pss....ElchSlhEsc...ssph..KsGLllcGIEIRPK ..........................................................t.shhlsu+.s.Ls.I..sh....hs...s....p..YWpahsh..............spu..ca........clA.L.pl..hW....l.-lpGplphph..h...ssspYsshhhhph.....................tcp...shG......hc..h....sh................p...hplsh...............................................................................s...st.................................................................p.p.........................t..tthph..p..............................................ptt......-.sWhEh.chG.-Fh....sp....p.........t.........pss.....clp.hsh..h-h.c...ssph..KsGLhlcGl.tIpP......................................... 0 40 141 190 +14142 PF14300 DUF4375 Domain of unknown function (DUF4375) Eberhardt R re3 Jackhmmer:A7VAK3 Family This family of proteins is found in bacteria. Proteins in this family are typically between 156 and 204 amino acids in length. There is a single completely conserved residue G that may be functionally important. 25.00 25.00 25.20 25.30 24.90 24.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.57 0.71 -4.13 41 346 2011-05-27 08:18:22 2011-05-27 09:18:22 1 4 298 2 55 291 11 119.00 25 63.69 CHANGED -.shppLss-Q+sLlAhphLcpEVpsGGFsQhhtNuhGtalh.pshhcuh+paGhcchscllpcupplYtpp.t.sl-c-pp..tcph.shh.cp.....a..st.....F--hDcpah..-hEE..phsphlspYlcpHh-pF .......................ht.Lsscp+pLhAhphhcu-VtsGGFsQhltN.uhG..th.l..h.pshs....cuL+phGspchspllc+At.s..l...a..pppttsl-pp.tp...tp......ch.shh..cp.........h..tp.....h-ph.Dctaa..-ht-...phhthl.stalptp.t.F........................................... 0 24 34 43 +14143 PF14301 DUF4376 Domain of unknown function (DUF4376) Eberhardt R re3 Jackhmmer:A7UZK5 Family This domain family is found in bacteria and viruses, and is approximately 110 amino acids in length. 25.00 25.00 25.20 25.10 24.90 24.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.34 0.72 -3.94 46 601 2011-05-27 09:01:56 2011-05-27 10:01:56 1 7 328 0 48 539 8 107.00 30 60.69 CHANGED L-ps+stKhsEIsshRsptpss..u..hshp..u.tchtsspps..p........hp...hssslt.....h.upps.shs.tsh.....hWpssssts..lshs.......hh..shttslhp..p...sp..phapcppph+pplpuhpshcslpuh ...................Lcts+pt+hp..EIpshRspppst.s..hsac.....s..tph.s..ssss...p............hp...l.sslh........h..Apss..sh...sth..............hW.sD.....uDNpp....V.p.loh....-hht....sh.spuhss..+...spcIap+pcphKpclps..hsshpplps................. 0 8 29 41 +14144 PF14302 DUF4377 Domain of unknown function (DUF4377) Eberhardt R re3 Jackhmmer:A7V378 Family This domain family is found in bacteria and archaea, and is approximately 80 amino acids in length. 25.00 25.00 25.40 25.20 23.30 17.50 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.49 0.72 -4.08 59 285 2011-05-27 09:46:05 2011-05-27 10:46:05 1 11 240 0 66 282 16 80.30 31 37.50 CHANGED lhVsspp..ssC..s........Gh.....hshpChtV+pp...spp..........sWp.haYssIcGF..s..a-sGacYtL+V+c..hpltss.....PADuush.pYpLlcllpKp .......................................Vuspp...ssC...s........Gh.....sshpCLpVRcs...tts..........sWp.haa..us..IEGF..s..acsGhcYhLcVpc..hphtNP.....PADu.uu.h.tasL.pllpp......................... 0 15 38 53 +14145 PF14303 NAM-associated No apical meristem-associated C-terminal domain Coggill P pcc Jackhmmer:Q9SKG8 Family This domain is found in a number of different types of plant proteins including NAM-like proteins. 27.00 27.00 27.10 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.40 0.71 -3.90 48 509 2011-05-27 10:45:57 2011-05-27 11:45:57 1 15 25 0 377 488 0 144.70 18 46.12 CHANGED +sFshhHCWplL.+spsKWpsch.p........................t.pp.ppt..........psssps....sssss...pts..sss....sp....ssstp........................................cRP.GpKpuKpthp..........................th.c..................................c.tp.........................cc+ht...p..p.......hppt....p......cc+h..........p.................hccc+h-hcphp....pEccIMhsDhosl.ss.pppa...hcthpccI ..................................................................................................Fph.asWplL.+ppsKWpsh.tp..........................pp.pt..........................tstspp...sstss......sts..sss......sp.......ss..ttt.......................................................................pRPhGpKtAK.pphppt........................h....th.............................................................t...........................ppcht..p......thtp.....p............ppph........p.................hppcp..hphp.h...........t..plh.hDhssh.s................................................................................................................................... 0 143 254 311 +14146 PF14304 CSTF_C Transcription termination and cleavage factor C-terminal Coggill P pcc Jackhmmer:Q8VYM7 Domain The C-terminal section of CSTF proteins is a discreet structure is crucial for mRNA 3'-end processing. This domain interacts with Pcf11 and possibly PC4, thus linking CstF2 to transcription, transcriptional termination, and cell growth. 23.00 23.00 23.00 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.65 0.72 -8.16 0.72 -4.68 4 329 2011-05-27 10:51:39 2011-05-27 11:51:39 1 13 212 1 210 290 1 43.80 49 10.15 CHANGED spp.tupAtLl.QVhpLopspIshLPPtcRppIh.LRpQlp+ut.sp .............cpsALlhQVLQLos-QIuhLPPEQRpsIlhL+pQltps...th..... 0 56 96 153 +14147 PF14305 ATPgrasp_TupA Glycos_tran_Wfd; TupA-like ATPgrasp Eberhardt R, Iyer LM, Abhiman S, Burroughs AM, Aravind L re3 Manual Family A member of the ATP-grasp fold predicted to be involved in the biosynthesis of cell surface polysaccharides such as the O-antigen in proteobacteria, the capsule in firmicutes and the polyglutamate chain of teichuronopeptide [1]. 25.00 25.00 25.80 25.10 24.80 24.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.30 0.70 -11.53 0.70 -5.06 14 355 2012-10-10 13:17:03 2011-05-27 14:24:01 1 11 269 0 99 342 80 230.60 28 73.88 CHANGED pKlQahKlh...scs.hhshhsDKhpl+calpcphGpp.hlIPhluVhsph...psIcapplP.ppaVlKssHsSG..shhlspscsphshhpsp.....pch.pphLpp....shhhhsREa.Ypplp.+lIlEchhtcpss........cDYKhaCFpGpsphl.lplcR..psppptshashsaphlsh...stpa......stpphpKPsshc-hlplu....cpLSpc..hPaVRlDhYpsssplYFGElTFh...suuGht.chhPc-aDchlGchhp ......................................pKl.ahh..h......ppshhsphsDKhtVRc....altpph.s....p....p....hllPhl.s.hhssh.....p-ls..a..spLP....ppFVlKssHsoG.......sshls...pDK.sph..s.hp.pht.....................pph.pphLpp.................sahh..hs+Eh.p.Y+slps.+.Il.sEchlt....sp.s.s.....................tD.Y.....KhasF...s...G.....c....s.....p..hl..l...sh.s.R.........tsp.p.p....h..s...ha.D....h.s.W.phhsh......p.phs.........stp..h.s..+..Pp.ph....cchlplA....cpLups......hs.a.V.RVDhY.t.s....ss........c.lYFGElTFt...ssuG..h....ph....hspp....hDhhhGphh........................................................................................... 0 37 55 77 +14148 PF14306 PUA_2 PUA-like domain Bateman A agb Bateman A Domain This PUA like domain is found at the N-terminus of ATP-sulfurylase enzymes. 27.00 27.00 27.10 27.10 26.90 26.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.81 0.71 -4.83 89 1267 2012-10-02 17:37:24 2011-05-27 16:48:36 1 16 994 50 512 1155 428 155.00 30 31.10 CHANGED ltPHG..G.....pLh........shllpsp.ptpphhpcutp...L.Pp..lplospph...........sDLphlutGsFSPLpGFMscpDYpuVl-ph+L.s.............s........Ghh...WolPIsLsVsp-ptpp..lptGscluLppt...Gph.lAllp.lp-hYph...-KppEAppVatTs-..t............HPGV.phlh...ppschhluGslpllpp .....................................lhphhs.tt...phpt.hhtc..App.....h...p.lplsphpl....................................s-LpllupGhaSPLpGFMscc-Ypp...Vl.cph..+L..s.........................s...........................Gsl.......aolPIsL....s....ls....cc....ptpp...................lc....s....G...s....c.lsLhtp........Gph..lAllp.l.p-hap....cKpc...cstplauTss..t.............HPuVp......hlh...ptGchhlGGslpllp..................................... 0 175 312 427 +14149 PF14307 Glyco_tran_WbsX Glycosyltransferase WbsX Eberhardt R re3 Jackhmmer:A7V083 Family Members of this family are found in within O-antigen biosynthesis clusters in Gram negative bacteria, where they are predicted to function as glycosyltransferases [1,2]. 26.10 26.10 27.00 26.40 24.70 26.00 hmmbuild -o /dev/null HMM SEED 345 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.38 0.70 -5.28 84 380 2011-05-31 07:37:58 2011-05-31 08:37:58 1 33 282 0 99 359 61 323.30 31 59.13 CHANGED +lIAaYLPQFHPl.P.E.NDcWWG+GFTEWTNVs+A+PhFcGHYQP+..lPs.-.LGaYDLR.s-shctQAcLA+caGIpGFCaYHYWF.sG......K+lLE+Ph.cp.lL.p.ssc.DhPFCLsWANEsW.o+.pWcG...t....spcl....LlpQpYssccD..ppHh....phll..hF+DpRYI+l-G.KPlahIY+P.......ppl.PchpphlphWcph.Ap.csGlsslahlsht.......................tth.tt......p......hphGaDussph....t..........stt......h.tth..................s....htt.......thtph...hppt............h....th....sph.h..........c.Ys..phhpphls.p..t.t.....p....h..phaPslhPsWDNosR..........+sp.p.uhl.hhsuoPctFcpaLppsl.p.p.sp.p.p.......pcchlFINAWNEWAEGsaLEPDh+aGhuYLcAl ......................................................................................................................................................................................hlAaYLPQaHsh...E....ND....tWWGcGFTEWsNlt.pAp..PhFpGHhQP+..hPh.p.hGaY..DLp.sc..shptQsclA+phGltGFsaaaYWF..sG...........+pl....LE.pPh...pp.hL.....p.st..c.D.hPFClsWANcsW.s+pWsu.............t.....ppcl...............LhtQpYs.st.pD.....htpah....phlh.thFpDpRY.l+l-.....G..+.Pl.hhlYcP..............ttl...schpphlphWc.ph.sp.c.tG.h.sslahhtht.................................sth.t.........p..........tphu....a....Dushph...tshh.h..ht................................h.t.......ph..hp.h.hppt...................h...........hph..h..........cYp..chhp...t.hhpp...p...t..........s.....hphaP.s.lhs..sW.DNo..sR..............ptt..p.uh....l.....h..hs.uoP.cta..ppalppt.hp.h.st..p.p........ppch..lFlsAWNEWuEGsaLEP.Dh.+.aGhuaLcsh................................................................. 0 40 76 89 +14150 PF14308 DnaJ-X X-domain of DnaJ-containing Coggill P pcc Jackhmmer:Q93ZH5 Domain IN certain plant and yeast proteins, the DnaJ-1 proteins have a three-domain structure. The x-domain lies between the N-terminal DnaJ and the C-terminal Z domains. The exact function is not known. 27.00 27.00 27.80 27.50 26.70 26.80 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.16 0.71 -5.05 53 402 2011-05-31 12:29:53 2011-05-31 13:29:53 1 12 193 0 305 409 4 194.00 27 43.07 CHANGED ptpccRlpcLuppLhc+lp.as........c..ssp........cp.........Fpp+.hppEsc.sL+hESFGl-lLHsIGtlYppcAspaLtppp......pahG.lut...........haspl+.sKGcsh...Kssa.......sslsoAlcAppsh-p.hp+.......hpppt...................s.c..h....spcc.....hschpcp...htu.............K......hLsshWphs+aEIpssL+cVCpplLpDcsV..spcpRhpRAcALhhlGchFppsccs.s..c...-pppt.psFEc.lhs ........................................................................h..ppcR.ppLuppLhc+lp.as......................c..ssp..............cp............Fppp.hc.Es........c.pL.phpSFGh-.....lLcsIGhsYhppAsphL.ppp..........ta.hG.lst...........hhp.th+...s.Kuphh....+sph..........ssh.ssAlsh.phhcc.ht+.......h.pppt...............................................................................................t.p.......spcc............htchtcp.hts..........................................................................p......hLsuhWphshh-IpssL+cVCpplL.....p.D..p.sl.......s.p..cphhtRAcALhhlGplappstps..p...cttt.................................................. 0 122 199 273 +14151 PF14309 DUF4378 Domain of unknown function (DUF4378) Coggill P pcc Jackhmmer:Q9FIS0 Family \N 25.70 25.70 26.00 26.20 25.10 25.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.02 0.71 -4.18 103 449 2011-05-31 13:05:52 2011-05-31 14:05:52 1 9 28 0 299 412 0 156.50 19 19.00 CHANGED p-...hpYlp-lL.tsuGl.h.pst.t............h.....p..h..psssp..PlsPslFcpLEppt.........s..................................ttsptt+.scR+LLFDhlNEsL.......s.......chht........hshpshhp.....th......shphp.........h...su..pp.Llc-lhpplpphht.......ps....h.pp...p.....t........ll..scD.hspp.....tt....hh.....shpp......-sppluh-lEchlhc-LlcE .............................................................hpYlpclL..sush.htp................h...........t.h..h.ssp..slssplFpplEpphs...................................t.tttpp.c++LLFDhlNEsL.............s..chht.........hshtshhp.......htht.h............h.....ss...pp...Llcclhp.plpphht...............ppt.....h.pp.....p.....t.........ll...tcD.htpt.........tt..hh.....shpt......-hpplsh-lEchlhc-Ll-E.................. 0 50 182 249 +14152 PF14310 Fn3-like Fibronectin type III-like domain Eberhardt R re3 Jackhmmer:A7VAA0 Family This domain has a fibronectin type III-like structure [1]. It is often found in association with Pfam:PF00933 and Pfam:PF01915. Its function is unknown. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.23 0.72 -4.04 1123 4829 2011-05-31 13:39:53 2011-05-31 14:39:53 1 85 1882 11 1761 4491 425 70.70 32 9.16 CHANGED EVVQ...LYl..p......s...s..s......u.........s......l..s..cP.s..+pL+GFpKl.p.Lps.GEoppVsh.s.ls.t....c.s....luhac.t.......p...t....p..........ahl-...s......Gp..aplhl..Gs..SS ...................EVVQ...lYl.p........s..s.s.........u...............................s......h....s..cP....s....+pL+GFcKlp..Lp........P....G........E.oppVsh.s.ls.h....c.s......L.uhast........p...t....p.................asl-...s..........Gp...aplhlGssS........................................... 0 582 1136 1550 +14153 PF14311 DUF4379 Domain of unknown function (DUF4379) Eberhardt R re3 Jackhmmer:Q7UL48 Family This domain is found in bacteria, eukaryotes and viruses, and is approximately 60 amino acids in length. It contains a CXXCXH motif and a CPXC motif. 24.50 24.50 24.70 24.50 24.40 24.20 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.21 0.72 -4.15 113 571 2011-06-01 07:26:03 2011-06-01 08:26:03 1 19 105 0 218 533 412 55.50 29 36.58 CHANGED scLspEas.....csp.....hpPspVshuSph+shW+Ct..pC.uH.cWcAplssR.o.....spssGCPtC ...............................clhpEWs....tcNt........hsPpp..lsh.s.S..p...pcs..a..W.cCs.........ps.uH..pWpuplpsRs........tpspsCPhC..................... 0 153 193 213 +14154 PF14312 FG-GAP_2 FG-GAP repeat Eberhardt R re3 Jackhmmer:Q7UNP1 Family \N 23.00 23.00 23.00 23.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.48 0.72 -3.95 180 1055 2012-10-05 17:30:43 2011-06-01 10:35:36 1 62 110 0 582 1179 1780 50.20 34 23.05 CHANGED asppsKLsA.oDGuusDhF.G..hSVulsus..s.hlVGAh...t-Ds...ts......s.GuAY.lF .................tthlhu..s....s.....s...s...s..sD...t....F.G....h.S...Vul..su-..........s..llVGA.................t--s................sus...............s.u..GusYlF......................... 0 432 550 571 +14155 PF14313 Soyouz_module N-terminal region of Paramyxovirinae phosphoprotein (P) Coggill P pcc Karlin D Domain The soyouz module moiety is the N-terminal region of the phosphoprotein (P) from the subfamily Paramyxovirinae of the family Paramyxoviridae viruses. The main genera in this subfamily include the Rubulaviruses, avulaviruses, respiroviruses, henipaviruses, and morbilliviruses, all of which are enveloped viruses with a non-segmented, negative, single-stranded RNA genome encapsidated by the nucleoprotein (N) within a helical nucleocapsid. 21.30 21.30 21.30 21.30 21.00 20.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.66 0.72 -4.41 16 410 2011-06-01 10:13:34 2011-06-01 11:13:34 1 9 40 3 0 415 0 56.90 54 14.64 CHANGED MshhsDsEIscLl-pusslI-pIppupspssc..ThG+SsIstGsTcsLssAWEccsssp ...MATFTDAEI--LhETSGTVIDSIITAQGKssE..TVGRSAIPQGKTKALSsAWEKHGs............. 0 0 0 0 +14156 PF14314 Methyltrans_Mon Virus-capping methyltransferase Karlin D, Coggill P pcc Pfam-B_840 Family This is the methyltransferase region of the Mononegavirales single-stranded RNA viral RNA polymerase enzymes. This region is involved in the mRNA-capping of the virion particles. 20.20 20.20 20.30 20.40 20.10 20.10 hmmbuild -o /dev/null HMM SEED 675 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.07 0.70 -13.08 0.70 -6.68 10 144 2012-10-10 17:06:42 2011-06-01 13:38:09 1 2 69 0 0 167 31 660.80 50 31.64 CHANGED l+.p-G-W-sLossEKSYHVGRslGFLYGDLshp+Ssps-DSSLFPLSIptKlcGRGFLRGLlDGllRuSulQllHRRolspLK+PtsAlaGulhYLI-KlossssFhsLlRcGPIR-ElpoIPHKIPoSYPToppDhGtllRsYLKpph+plccupYposhss.lWlFSDltSpc..alGshulSoplLchLhcssLSK+s+spLRcLusLppplRoscs..h..s-hclcplhpchL....hCspElRHAsKF..shsKpssspthh.......WG.pEhhGslpplPV.Yoossss....chhphs.PRlQNPLISGLRlsQlATGAHYKlRoILsshcIpacDsLssGDGSGGhTuslLRhNspSRuIFNSLL-lsGpsh+GopPsPPSAL.slGs-.psRCVNt-osWEcPSDLScppTWcYFhcLKpphuhpIDLIVhDMEVpD.phsppIEppl+calhpLL-csGsLIaKTYGThlsspsps.slshLGshFcoVpLlQTphSSShTSEVYllh++h+phl.DssalDasoL..pchhpplasFps.+pEFcRA+plpppchlhGlPsplIPDPhV-LpTlLpIuGVsSGluHplsp-lppupush.phAlllsslIsa.p.shslo+hhs.p..phpPPSDucll+hhsslsGlshWLSltppDlshapphppllscsh.sI+atptht+.uphhhpW .................................................................................................IcL+sGDaEoLoscEKShHIGoAQGLLYSILVAhHDSGYNDuoIFPVNIYuKVSPRsYLRGLARGlLIGSSICFLTRMTNINIsRPLELISGVISYILLRLDNHPSLYlMLREPsLRuEIFSIPQKIPAAYPTTM+EGNRSlLCYLQ+VLR....YE....R-sl...os...SP-sDhLWIFSDFRSsKMTYLoLlTaQSHLLLQ+l.-+sLSKpMRspL...RQhuSLMRQVLGGHGE.sh....-SD--IppLL+DuLp+.........T+WV.DQEVRHAA+o..Mpuc.h.S..P..s.c.+.hSRK......hGsSEWlCSAQQlAVSTSu.NPAPhS-hDlRsLS....+RhQNPLISGLRVVQWATGAHYKLKPILc-LslaP.sLs.LVlGDGSGGISRAVLsMFPDu+LVFNSLLE...VsDLMASG.THPLPPSAIhoGG-DIhSRVIDa-SIWEKPSDLRN.sTW+YFpSVQcplNMoaDLIICDA.......E..V.......TDIu.Sl.N+.......IsLLhS.......DFuh..SI.-.GP.lhLlFKTYGThLlNs.-Y+.Alp.HLSRAFPoVsGalTQhTSSFSSElYL+FSKpGKFF.RDuEaLTuSTL...REMSLV.LFNCSSsKSEhpRARSLN.YQDLlRGFPcEIISNPYNEMIITLIDu-VESFLVHKhVDDLEL...p+GsLSKhuIIlsIhIlFSNcVFNloKsLs.-P.hFaPPSDPKlLRaFsIpsuThhaLSsshGDlssFs+LH-LYNpPl..oaYFc+Qsl+.GphalsW............................................ 0 0 0 0 +14157 PF14315 DUF4380 Domain of unknown function (DUF4380) Eberhardt R re3 Jackhmmer:Q7UYS3 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 288 and 372 amino acids in length. There are two completely conserved residues (G and E) that may be functionally important. 26.50 26.50 26.90 27.00 26.40 26.40 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.73 0.70 -5.22 21 128 2011-06-01 12:55:25 2011-06-01 13:55:25 1 2 114 0 49 125 11 260.80 25 81.04 CHANGED tua.pslpLuNsslclslTsslGsRIlta.uhpG.upNlL............s.htsthttsstschhhhuGpRlWhuPE.........t..+p.....a.PD....ssshphp......ppsltlpssssshoGlphphplplsssp.splplppshp......Ntstt.slphshWshThlss.uGhshhPlsstsph.........pl.hhshophtps.phphpcchlslpssstst......tKlGhsspsGWhAh.htpsplFlcpashhs.sA..pY.P-hGhshEh..Yss.......pshlEhEshuPhtpLpPGpshpapEpWpLhc ...................................................h.pslplssshlclhlssshGsRllpa..uhps..ttNhh...................tt.hh.ss..tsphhhhGGc.+lWh.....uPp...........................................cp......a.PD....s...tshphph......pssl..tlppssp......t.h..sslphphpl........p....h.pss.............p...spl..plptplp...................Nhssh.....sl.....p.....huhWslohls..........s..s.......uh..hlPhs..stss....................pl..hhshs.phts..Rhhhscphlhlctssp.ps.........hKlGhss..p..tG..W...hAh....hs..sspla..lcpath.....su..tY.P-h.........G.phEs..Yhs......................shhlEhEshuPhhpLpPGpphpa.E.WpLh................................................................................. 0 27 42 45 +14158 PF14316 DUF4381 Domain of unknown function (DUF4381) Eberhardt R re3 Jackhmmer:Q7ULH4 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 158 and 180 amino acids in length. 27.00 27.00 27.10 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -11.19 0.71 -4.09 111 458 2012-10-09 19:59:19 2011-06-01 14:55:13 1 4 391 0 131 402 66 145.10 27 87.80 CHANGED sLtpLpDltlPsslu.aWP.A.GWWllhsl...llhhlh..hhhhh....hh.++ppp.p....t..h++t..ALptLsplpt..............sssphhsplspLLK+sAlshh...P+p..plA.sLsGpsWhpaLcspts..........hssthtp..tLhp....thYpstst...............hpp..lhptsppWlcpp ...............LtpLpslhlP.pssu..WaP.A.GWWlllsh...llhslh....hhhhh...........hhp+tpp....p.............t....h++p..ALptLsph.............................sssphhpplstLL+csAlsha....P+p...thA.sLs.GcsWhsFLDspss..............t..hsshtp.....hhhpshYpspsh............ps......hspLhptsppWlcp............................................... 0 28 57 95 +14159 PF14317 YcxB YcxB-like protein Eberhardt R re3 Jackhmmer:Q7UN84 Family The YcxB-like protein family includes the B. subtilis YcxB protein Swiss:Q08793, which is a functionally uncharacterised transmembrane protein. This family of proteins is found in bacteria, and is approximately 60 amino acids in length. 24.30 24.30 24.30 24.30 24.20 24.20 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.41 0.72 -8.92 0.72 -4.72 258 600 2011-06-01 13:57:44 2011-06-01 14:57:44 1 4 461 0 124 522 35 62.50 16 34.41 CHANGED l..sccslth.....ps....spspsph...pWsplpp.lhcscphhhl....hh.......spt.phhhl...P+.c.shs........ttph.....pphhphlp ........................hpppulhh......ps....tpspsph...tWspltc.lhcscphhhl....hh.........spt.pshll...PK.c..shs....ttph....pphhth...................... 0 41 86 104 +14160 PF14318 Mononeg_mRNAcap Mononegavirales mRNA-capping region V Karlin D, Coggill P pcc [1] Domain This V domain of L RNA-polymerase carries a new motif, GxxTx(n)HR, that is essential for mRNA cap formation. Nonsegmented negative-sense (NNS) RNA viruses, Mononegavirales, cap their mRNA by an unconventional mechanism. Specifically, 5'-monophosphate mRNA is transferred to GDP derived from GTP through a reaction that involves a covalent intermediate between the large polymerase protein L and mRNA. The V region is essential for this process [1]. 21.70 21.70 22.00 41.60 18.00 21.60 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.57 0.70 -5.34 63 729 2011-06-01 14:32:24 2011-06-01 15:32:24 1 9 223 0 2 766 0 237.80 32 11.27 CHANGED sCSsslActLRphSW.....GRplhGlTsPcPlEhhss..hh.ttp.Ct...................Cp..t..sppp.....hohhh...hss...lspstp.spu.tsPYlGSpTpE+p...hhphhphcstsshlKpAl+ltsshpWhhssssp.hpphhplhpstssl...shsphphl.sh.poushtHRhssupsppthhsus.sthsoahphosDshphhsct....pshslhaQp...llshuhhpshhth.pssshpstshHhHhpsssCl+clpps ...........CSsplAchLRppSW....tGRpIlGlosPcslEhhcs.tllpsotsCsh........................Cp..s..Gspp.....hohhh...lPuthplsssspts.u.+lPYlGSpTpE+p...shphhphtsho.plKsAl+ltsshtW..saucs-.sphpthhplupu+ssl.....oh-.l+hl.Ph.pouNlhHRLcDutsphpassushsplusalplSsDshshhtp-..tsp-tNllaQp.hh.LhGLulhcohh.hppspshsp.hslHLHhchssCl+.h............. 0 2 2 2 +14161 PF14319 Zn_Tnp_IS91 Transposase zinc-binding domain Eberhardt R re3 Jackhmmer:Q7UXQ4 Family This domain is likely to be a zinc-binding domain. It is found at the N-terminus of transposases belonging to the IS91 family. 25.00 25.00 25.00 25.30 24.80 24.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.33 0.72 -10.87 0.72 -4.56 72 995 2011-06-01 15:28:09 2011-06-01 16:28:09 1 9 449 0 150 849 144 105.20 32 30.65 CHANGED t-Ihcp...ahssa...p...p.pa....sttls.spt+shpuIhpCRT.sshGsphhpCps..Csppc.hhhpSC+sR.aCPpCQspssppWlppppsclLP.ssYFHl.VFTlPtpLpslshp.spchlYs ........................h..lhpt...hhs.h........p.t.......tttltsh.hcshschhtCGo.thhGh.pphpC..ss...Cs+p+.hlshS.C+sR.tCPpCGs+tsspWlthhhscl..s.ssahHl.VFTlPpplhslhhpsp..h..h........... 0 56 94 121 +14162 PF14320 Paramyxo_PCT Phosphoprotein P region PCT disordered Karlin D, Coggill P pcc Jackhmmer:Q4VCP9 Domain The N-terminal half of the phosphoprotein P of the Paramyxovirinae viruses. The very first 60 residues have been built as the family Soyouz-module, Pfam:PF14313. The remaining part of the region, here, is disordered, and is liable to induced folding under the right physiological conditions. The region undergoes an unstructured-to-structured transition upon binding to Measles virus tail, C, unstructured region. 27.00 27.00 317.60 317.20 20.10 19.80 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -11.87 0.70 -5.53 3 49 2011-06-01 16:08:56 2011-06-01 17:08:56 1 3 3 0 0 53 0 296.20 78 57.10 CHANGED ERRNLEDLSSTSPTDGTIGKRVSNTRDWAEGSDDIQLDPVVTDVVYHDHGGECTGYGFTSSPERGWSDHoSGANNGNVCLVSDAKVLSYAPEIAVSKEDRETDLVHLEDKLSoTGLNPTAVPFTPKNLSsPAKDSPVIAEHYYGLGVREQNVcPQTsRNVNLDSIKLYTSDDEEADQLEFEDEFAGSSSEVIVGISPEDEEPSSsGpKP.ESVG+sIEGQSlRDsLQl.KsNKssDsPGAGPKDSAVKEcs....PQKRLPMLAEEFECSGSEDPIIQELLKENSaINuQQGKDAQPPYapGIEuSpSPDKTEITuDA EGRNVEDLSSVTSSDGTIGQRVSNTRAWAEDPDDIQLDPMVTDVVYHDHGGECTGHGPSSSPERGWSYHMSGTHDGNVRAVPDTKVLPNAPKTTVPEEVGEIDLIGLEDKFASAGLNPAAVPFVPKNQSTPTEEPPVIPEYYYGSGRRtDLSKSPPRGNVNLDSIKIYTSDDEDENQLEYEDEFAKSSSEVVIDTTPEDNDSI..NQE..EsVGDPSD.QGLEHPFPLGKFPEKEETPsVRRKDSLMQDSCcRtGVPKRLPMLSEEFECSGS-DPIIQELEREGSHPsGSL.+LREPPQpSGN.SRNQPDRQLKTGDA.. 0 0 0 0 +14163 PF14321 DUF4382 Domain of unknown function (DUF4382) Eberhardt R re3 Jackhmmer:D2PI33 Family This family is found in bacteria and archaea, and is typically between 142 and 161 amino acids in length. 25.00 25.00 25.20 25.00 24.90 24.70 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.90 0.71 -4.10 62 266 2011-06-02 07:47:30 2011-06-02 08:47:30 1 4 221 0 121 260 33 151.60 24 46.45 CHANGED ssGs....lslslTDA...P..ss..sh.....ppVhlslsplplp...t....sss................sps.......shh...shs.......spp...lsl...........................thhsus.tp.....Ls.........ptslP.uG.p.YsplRLhl.s................s.........ss.......................ls.h...s...G....sphs.LpsPSut...ps.sl.Kl....s......s..hslp..uss.sshslDFDsppSl.......s.tG.ss..pYhLKP .......................suslsltloDA...P..ss..sh..............spVhlslsplplps....sss......................................................sss........shh..sls............spp...lDLh..........................thtsus.tph.....lu....ptslP.sG.p.YpplcLhls................s..sp..............................lsh..s..G.....sphsLpsPSst.................ps..tl.cl.....p................shslp.sss.sshllDF-hp+ul.......s.pG..ss....pahL+P........................... 0 39 73 106 +14164 PF14322 SusD-like_3 Starch-binding associating with outer membrane Coggill P pcc JCSG:Target390309 Jackhmmer:Q8A1B4 Family SusD is a secreted starch-binding protein with an N-terminal lipid tail that allows it to associate with the outer membrane. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 190 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -11.65 0.71 -4.24 44 6539 2012-10-11 20:01:04 2011-06-02 11:11:39 1 8 205 26 1561 6470 663 202.60 17 36.62 CHANGED caLDhp.Ppup....h..sps......h.chps.....hls.....sh.h...ssh.s......hht...thh....thsp.hh..ss...hsss.h...s.t...tsh...h.t......ht..h.........pphsh....s.....s......sss..s......shWsshYp.sI...hssNhlLcp.........s...pp........p.hs....t.....lh.GEAhhhRAataahLssha.............u............t..l..Ph.s............p....sss...........s.............p...............t..s....h........s..Rs..Tlp-lYppIhsDLppAhsl.Lspst...............ph..+hsh..puA.hAhhARhYL ......................................................................................................................................tt.................phpt........hht...............sh.h........tth...t...........................t....h..................t.......hh..........sp.............hhs.s............t.....t.......t.s.h..t.t................h.......................tp..h..ss.......s.........s.............sth....p...........................sh..W......p......t.......h...Y......p......s.......I..........ppsNh...h...lppls.........tth.s...pp..........................p.tp...p........hh..uEAh.......hlRAahY.a.pLlpha.........................G...............s...l..P.l..h...................s......psp....................s...................s....................h....p......h..........s.....R.s....o..h.p.-....l..h...s.h....I...h.p...D.L...p.....p.......A..h......s......h......L..s..tptst.................................hs...R...hsp...suA.hulhu+hhL................................................................ 0 739 1439 1561 +14165 PF14323 GxGYxYP GxGYxY sequence motif in domain of unknown function Coggill P pcc JCSG:Target393069 Family This family carries a characteristic sequence motif, GxGYxYP, but is of unknown function. Associated families are sugar-processing domains. 27.00 27.00 64.20 29.60 23.30 22.90 hmmbuild -o /dev/null HMM SEED 464 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.69 0.70 -5.78 18 103 2011-06-02 10:20:36 2011-06-02 11:20:36 1 8 61 1 42 97 11 377.40 22 66.51 CHANGED hlsoLQGlVNp.opspIY.l..psp.s.....s.pphhcphppp..hG.lsasc..lss..sasLLs+a+p..tlcGhVlYDsp...pssSlNlAsolAGlcsulslspsLhsplpstGlpt..l.t.DlRs..hs.......tp.......ahYsp.YhsphsHphlltLs..Pc...ph.stLRDYAl...h....op.uhlFhDssss...-..........psLhcphhusMss.u.uhshGWa......s.D.Et.sVphuSpaGluhluuDashNLoVhSuhsp.osshp....ptsshst.....hpNKhYVoFlhSDGDNlQas....ashpphassPs..RGplPlGWolSPuhh-luPslLsaYYcoA...osN..DsFIuGPSGsGY..hYPsph.............sp.s.p.LssahphspsYMp+ssh+llsIhD....ssshs.stsl.hsp.........astp.ssl.ulah.sasppss.......Gplhh..NstPslupcshhh.tlps..ph....pslpspIspuhsshss.sPtFhhl.lcsWsp.sh.sslpslhspLs....pN....lcVVsPDpFhpLh+c ......................................................................................................................................................hsolpGllsp..pp........pla.h....t............p.t.hhtth.pp.....ht.lt.hp....hps....tllphapp....hpGhllasst......ohs.Asol.Aulpsulsh...s.t..h.th.lp.thsh.....h...Dh+s..hs...................ht.......ahhpp.h.sthspphl.h.hs..sp......h..h.lp-ash....hsp..hh.a.h.pss.......p...........hslhpplh..st..h....t....s..u....hhGW.............s.c....Eh..lphsSp.h.G..h.h.lsuDa....Nhohhosh.....p...h................p..t..........pshhYlsahhSDGDNhQa.......ht..p..hasss...RGph......shsW.olu.P.slh..hsPslhphahpsu........s.N...D.hlsusSGsGY..hYPsth.............sp.s.t.httahphhppahpphs.phh..lh.s.......t.t.......t.th....hpt...hh....th.uhhh.tht...........h.h.h......sth....sh.t.p................................tth.t.ltth.t..tt....a..l.h.sWt.......pth..hht.ht..........h..h....h............................................................................................................................. 0 34 40 41 +14166 PF14324 PINIT PINIT domain Bateman A agb Jackhmmer: Domain The PINIT domain is a protein domain that is found in PIAS proteins [2]. The PINIT domain is about 180 amino acids in length. 27.00 27.00 28.00 30.60 24.60 26.80 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.77 0.71 -4.11 38 474 2011-06-02 13:21:12 2011-06-02 14:21:12 1 9 214 1 256 469 0 149.60 39 24.75 CHANGED shtsslpFKcsPFYclh.chlpsos.h.ts.....................sp..p+ppsp..hpFpLsspphpplpst..........p...shplhLhst...........sh........hssss.spphpFP..slpl+lNsp.lphshpu.psKsGs.....sRPsslTs..al+hs...sh.Nplplsass.....ssc.............pYhlhlhLVc ..................................+P-lphppLPFYcllspLl.+P....osLssss............................................sp...+h.p-sp..hhFsLTP.p.Q.lppIpssh-.........t.h-hslQV.LR............................hCh........s-TSs.sQEDpFP..sslplKVNsc.ss.lP........sh.hPssKsGsE................KRPsRPlNITs.........hl+LS..ssssN..pIsloWus.....hs+...............sYshulYLV+............. 0 53 96 175 +14167 PF14325 DUF4383 Domain of unknown function (DUF4383) Eberhardt R re3 Jackhmmer:C1CUZ6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 137 and 164 amino acids in length. 27.00 27.00 27.10 27.20 26.90 26.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.72 0.71 -4.11 66 192 2011-06-02 14:11:15 2011-06-02 15:11:15 1 1 132 0 100 202 10 124.70 29 79.76 CHANGED sphsAhshGslalllGllGFl..PGlss..st......................hsstputuhLhGlFtlshLHNllHLlhGlsGLh..hups.ssuARhahhshGhlYhsLhlhGlhhs.......tp.oh..........sshlPl...............NsADsWLHlslulshlslGhhh ...............................phsAhllGslaLllGllGFl...Puhss...........................hsttstuhLhGlFs..lshLHNllHLlhGlsGls..huts..tssARsahhssGslYhllhlaGlhss.......ts..sh................sshlsl.................NsADsaLHlshulshlslGhh.h....... 0 32 69 93 +14168 PF14326 DUF4384 Domain of unknown function (DUF4384) Eberhardt R re3 Jackhmmer:C1CXC5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 80 amino acids in length. 27.00 27.00 27.00 27.00 26.80 26.70 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.51 0.72 -4.15 69 228 2011-06-02 15:09:03 2011-06-02 16:09:03 1 22 137 0 127 233 36 83.10 23 20.24 CHANGED ssYphG.-plphtlps.....scsuYlhlhsl..sssG.plshlhPNthpts.........NhlpuupshshP........sstas.....hplssPhGp...pplhslsoppsl .............................aphG.-plpltlps.......spsu.Y.lhlhsl....ss.sG..pl.shlhPNthpts.........shlpuspsh.plP..........sstap........hpls.sPh..Gp....-plhslsopp...................... 0 39 84 118 +14169 PF14327 CSTF2_hinge Hinge domain of cleavage stimulation factor subunit 2 Coggill P pcc Jackhmmer:Q8VYM7 Family The hinge domain of cleavage stimulation factor subunit 2 proteins, CSTF2, is necessary for binding to the subunit CstF-77 within the polyadenylation complex and subsequent nuclear localisation. This suggests that nuclear import of a pre-formed CSTF complex is an essential step in polyadenylation. Accurate and efficient polyadenylation is essential for transcriptional termination, nuclear export, translation, and stability of eukaryotic mRNAs. CSTF2 is an important regulatory subunit of the polyadenylation complex. 27.00 27.00 27.40 27.20 26.90 26.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.37 0.72 -3.90 52 428 2011-06-02 16:18:23 2011-06-02 17:18:23 1 16 266 3 292 397 2 82.30 36 19.13 CHANGED ..lPsshs...sshpss-sIopsLuslsPsQLh-llsphKp.LsppsP...spAcplLtpsPQLuh.AlhQAhllMshl-spllppslpp ..............................................h.sshs.s..ss.sus-.sIopsluoLPP.pQhh-lhpQMKh...hspssP....pcA+phLhpNPQLAY.ALhQA.llMplVD.sphhhphlp.t........... 0 85 153 229 +14170 PF14328 DUF4385 Domain of unknown function (DUF4385) Eberhardt R re3 Jackhmmer:C1CYW3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea, eukaryotes and viruses. Proteins in this family are typically between 149 and 163 amino acids in length. 27.00 27.00 32.30 32.20 26.30 26.30 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.87 0.71 -4.27 28 238 2011-06-03 07:02:05 2011-06-03 08:02:05 1 1 235 0 57 138 307 138.60 63 93.28 CHANGED FDYshDatpIDa..RppPEhYRVGRGEQGVLLVEPYKSEILPaWRFKTP-lAcpSS-KIYphFhsYhcpsDFlGMDMARKFLQMGaTRARRY.ANaKGG+KY..p..............sptcl.cpptD...scKAcuApIF+ppWcps+scppYhchKccapc...cY .............FsYp.DFssIDF..RppPELYpVGRGEQGVLLVEPYKSEILPaWRaKstssAhcSAEpIYQLFEsYRpQDDFVGMDMARKFIQMGYTRARRY.AN...YKGGKKY.s...........................--GpLsscssD.......PlKAAAAslFKuhWD.+lRpDEDYL++K+pHQt+a............. 0 18 30 46 +14171 PF14329 DUF4386 Domain of unknown function (DUF4386) Eberhardt R re3 Jackhmmer:C1D3W7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 214 and 245 amino acids in length. 24.80 24.80 24.80 24.90 24.50 24.20 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.58 0.70 -4.86 69 193 2011-06-03 07:11:31 2011-06-03 08:11:31 1 2 136 0 112 204 60 194.10 19 88.70 CHANGED +huGhlaLlhhlh.....ulh.uhhhhtshl.........sssshssltsptshhphuhhhthlhslhslslAlhLahll.+..thspslAhsssshRll.tsslhslullshhsslhlh.ssssh..s.t.............th.shshhhhshashs..a.luh.lhh.....GlphllluhLhhRSthlP+hluhhhlluuhhhlhsshhphhhss.....thhshhhhlshhlsEluhslWLll+Gh ..................................................hsGhhhlhhhlh......uhh.u.h.hh.shl.........ssthht.thhspt..s.hh.hhuh.h.h..hlhslh.lshulhLa.ll.+...ths.......shuhhhs.hhtll.tus.lhshul.lthhshh.lh.pt.....................th.thshh.hht.htshs......hhlu....lhh.....Glthllluhlhh+ot....hl.........P+hluhlshluuhhhlhss..hhthhhst..........ht..hhhlshhlhElshslWLlhhG........................... 0 62 87 107 +14172 PF14330 DUF4387 Domain of unknown function (DUF4387) Eberhardt R re3 Jackhmmer:B5YEU8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are approximately 110 amino acids in length. There is a conserved RSKN sequence motif. 27.00 27.00 37.00 36.90 19.90 19.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.03 0.72 -3.67 38 189 2011-06-03 08:26:29 2011-06-03 09:26:29 1 5 181 0 59 130 55 98.20 54 62.14 CHANGED Lt-lAcsIRSKNAGPa.lThDIhFcspcsY-plKposslspchlucLYtlss-cl.phh.a-sApAlKholsR.....shs.uGuhG-pDlaGuQQauPLLslpl ....ssLApVIRSKNAGPYELslDIlFKo+EsY-RVKsSutLTsElIAcLYclcP-hIhcIVaFDPusAlKIshPR.....slh.SGslGDsDVYGAQQHAPLLshp.h..... 0 19 37 50 +14173 PF14331 ImcF-related_N ImcF-related N-terminal domain Eberhardt R re3 Jackhmmer:Q7UL74 Family This domain is found in bacterial ImcF (intracellular multiplication and human macrophage-killing) proteins. It is found to the N-terminus of the ImcF-related domain, Pfam:PF06761. 25.00 25.00 26.10 25.20 23.90 24.60 hmmbuild -o /dev/null HMM SEED 266 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.54 0.70 -5.49 110 1314 2011-06-03 08:45:58 2011-06-03 09:45:58 1 12 877 0 250 1135 43 257.20 27 22.64 CHANGED sssptDpstWpuFLsLL++pR.sRpPlNGlllslu.lscLlsss.stp...ptpApslRtRLpELpppLuhchPVYlllTKsDLlsGFs-aFssLspp.pRp.QlWGhT.h....shs...p..sss..sst..........hpp-astL.hpRLsspl.s+LppE..p...Dhp....cRstlasFPpQhuuLpshLtpalpplFtsspapps....shLRGlYFTSusQpG..s..s.h..s..p..ht..sh........s.p.......ph.......s....hs....ts.......s..s...s......s.....t...t............ss...s+.oaFlccLhpcVlFsEusLsu.ssh+hpt.....t.R...hh.t.ht.st.h...ssh.ss ........................t...t.ptt.WtthLshL++pR....s+pP.l.NGlllslsls-L.lssstpt..t...pphupsLRpRLpElpppLthphPVYlllTKhDLLsGFspaFpsLs..pp.pRs...plhGhT.hshp..t..ppsssh..................tthspp..ap.pL.hp+lsttlss.thttp..h....ssp.......pRstlasFPppht.u.LppsLtphlptlht.sst...apts....shLRGlYFT.........SuhQpG....ts.....s...h..sp...sh...............sp.......pa.............ths..tt..............s.....t....t.................................ss..sps..aFh+pLhspllhsEs.s...Lss..sh.p.hth....pt.R.hh.h.hh.hhh.....hh.................................................................................................. 0 47 109 179 +14174 PF14332 DUF4388 Domain of unknown function (DUF4388) Eberhardt R re3 Jackhmmer:B5YCM3 Family This domain family is found in bacteria, and is typically between 102 and 135 amino acids in length. 25.00 25.00 25.10 25.20 24.80 24.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.31 0.72 -4.18 165 493 2011-06-03 11:45:10 2011-06-03 12:45:10 1 42 129 0 296 484 46 131.50 20 34.04 CHANGED pG.sLp...sh.sLs-llQhlptsp+oGsLpl.....pt.st............................lah..ccGpllp..Apt.s.............................................................thpupcAlhpl.l....thpp....GpFp.................ht.tthstttslph.sh..ppllh-uh+ph...DEhpth ....................tG.sLpph.sls-llphlptsp+oGh.Lpl..pttstp..up...............................lahppG..pl..ltApts............................................................................phps.pcslhpl..h.......shpp..Gp.Fp.............ht.ss.ttttplph...sh....ptllh-uh+hh..-ch..h................................................................................... 0 125 211 281 +14175 PF14333 DUF4389 Domain of unknown function (DUF4389) Eberhardt R re3 Jackhmmer:Q0EYR1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 104 and 223 amino acids in length. There is a single completely conserved residue R that may be functionally important. 25.00 25.00 25.20 25.10 24.00 24.80 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.18 0.72 -9.65 0.72 -4.27 51 266 2011-06-06 07:50:20 2011-06-06 08:50:20 1 2 225 0 91 250 268 80.10 31 48.89 CHANGED hpppshahRllaMllhslhhpluphllhllsllQalhhLls.GpsNppLtsFussLupahhchhpahoaso-c+PaPF.....scaP .................h.....hh+hlhhl.a.....hlVhhhhphshsslsllthhhhLho.GchspsLhsFssul.pahh+l..u..tYs..s..h..t..oDchP.PFs............. 0 38 59 83 +14176 PF14334 DUF4390 Domain of unknown function (DUF4390) Eberhardt R re3 Jackhmmer:Q0F0K5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 192 and 203 amino acids in length. 27.00 27.00 29.00 28.30 24.00 23.80 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.70 0.71 -5.06 70 309 2011-06-06 07:59:00 2011-06-06 08:59:00 1 1 306 0 114 259 83 164.20 31 82.02 CHANGED utAsshplpp.hplp.tsc....sslh.LsuslphpLsttlc-ALpcGlsLhFsh-hpltRsRhaWh...Dcplsp.....tshph+LuYpsLT+paclstss............hspsasoLs-ALpslt.clpshtlhstu.........plpssp.sYplplRhcLDhspLP+PhQlssh.spssWslsos.hhp..aph ..........................tApslplpcsphp..sss....ushp..lsuc..hch-Ls..spLc-Alp+GlPLhFshchpLs+sRh.....aaa...-cplsp.....sshsh+LSapPLT.ccYRVosuu............hphsasoLcsALpslt.cls..sW+V.hsps.........plpsup..s.apuplRhpLDsupLP+.PFQlsAl.sspsWpLsSs.Wpph....h................... 0 35 76 95 +14177 PF14335 DUF4391 Domain of unknown function (DUF4391) Eberhardt R re3 Jackhmmer:Q0F339 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 220 and 257 amino acids in length. 29.90 29.90 30.10 30.20 28.70 29.80 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.06 0.70 -4.95 57 203 2011-06-06 08:05:32 2011-06-06 09:05:32 1 2 194 0 61 189 15 216.40 22 92.12 CHANGED lslPpsst....ls+p........lPKpthh..cpsshssppKchhs-sl..-pIhahhplsspoh.sls....sspphpEItlhplpL+s..t....phspclh...cllp+tlPh.llh.lhp.....h.sschthshsaK...Rhsps-tschsl.pphhtosW............thhpslshs.h..thshtslYpshlpplhthps..............t.ht...........shtpphpthcphpclpcclspLcschp+E+Qhs++s-lNtcl+clcpcl ...............hslPpsst....ls+p........lPKphhh.....pps..shosp.Kphhhppl..-pIphhhtLpsssh.sls....sspph.El.llplpLps....p...............phs.pclh...clIppt...l...hsllh.hlp.......................h.ssphphshuh+...Rhttscts+hsl.tphatosW........................................................th.sp..lphp.h..sh......sltslYpslhpplhhhssph...................t.....................slstphtphpplppLppclscLcpchp.+-+phs++sElptcl+chcppl............................ 0 25 44 55 +14178 PF14336 DUF4392 Domain of unknown function (DUF4392) Eberhardt R re3 Jackhmmer:Q0EY62 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 282 and 585 amino acids in length. There are two completely conserved G residues that may be functionally important. 27.00 27.00 27.80 37.10 25.30 24.90 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.69 0.70 -5.61 49 194 2011-06-06 08:24:43 2011-06-06 09:24:43 1 6 147 0 85 191 48 279.00 31 62.43 CHANGED pl-pll..ssDhstcG...ltthh......ssu.hhpAup.......hL..................ppsppllIsTGFsl.....ssssETDGPsGAhsLupALpt.lGtcshlls-p.stpslpshhpshsh..ps..h...............................................tt.hpphp.shlluIERPGpusDG.pYasM+Ghslsth..ssshD....tlhhpupp...slsoluIGDGG.NElGMGsl.pptl.phlshusp..........hhusspsDpLllAsVSNWGuauLsAtLshhts.................................hl.shcpctp..hLctllptG.ulDGlotps.sho..VDGlsh.phptpllctLpph ............................................lpphh..shD.Gt.+G...ltplh..........t.st...lh.c.AuhtL.........................spsppVlls....TGFPs...........ttsstETDGPsGAlu......luthLpt.lGtcsslls......Dpp..shshhpthhpsts...psh.th.hs...........................................ttps..pp..p.hctlluIERsGpAsDG.pYaNhRuhsI.pph..ssslDpLFhtApp.......GlsohGlGDGG.NElGMGpl..cptlpphl.pGs...............IAsslpuDhhllAu..............VSNWGuYALsssLhhLts................................phl.s.hppc.ct..hLphllptG.shsGhott..shp..VDGlsh.phatphlptLh..h............................ 0 28 37 58 +14179 PF14337 DUF4393 Domain of unknown function (DUF4393) Eberhardt R re3 Jackhmmer:Q0F0Z8 Family This family of proteins is found in bacteria, archaea and viruses. Proteins in this family are typically between 254 and 285 amino acids in length. 25.00 25.00 25.20 25.80 24.90 23.80 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.25 0.71 -4.85 48 194 2011-06-06 08:50:57 2011-06-06 09:50:57 1 3 178 0 46 155 1 182.20 21 69.96 CHANGED c+lpplsp...cs..ltpss..splssssl-supat.ss-pLpchaupLlAsuhspspsppsHPuFscIlcpLossEAplLphlt................tsthshhshthhh..............pstshphhhsshhhhspp............s.pphss.lsNLpRLGLlphs...........h.h.ppthhpthcstphhpphttpht................................................thlplTshGppFhpssh ............................th......pt....h.ps....plhhssl-sspa....hspcpLpphaApLl.usuh-pp.pss.sH.uFlcIlcpLoss-AplLphlt.............................................ppp.hPhsphthhh................pststth.hhpsh.hhspt...................pphsh.lsNLhRLGLlphs.............hspps.hphh.t.s..thhpph.pth......................................................t.lploshGptFhpsC...................................................................................... 0 11 32 40 +14180 PF14338 Mrr_N Mrr N-terminal domain Eberhardt R re3 Jackhmmer:B5YBC5 Family This domain is found at the N-terminus of the Mrr restriction endonuclease catalytic domain, Pfam:PF04471 [1,2]. Fold recognition analysis predicts that it is a diverged member of the winged helix variant of helix turn helix proteins. It may play a role in DNA sequence recognition [2]. 24.00 24.00 24.20 24.20 23.80 23.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.91 0.72 -3.90 132 557 2011-06-07 07:50:11 2011-06-07 08:50:11 1 9 518 0 145 476 56 89.00 25 28.56 CHANGED sapp............hh....hPlLchl..p-.Gt...ptphp-lhctlscphpLo-c...ctpphhs...oupph.hapsRluWApoaL.......p+AGLl.cs...sp.RG..haplTcp.......GpplLtpss ..................hpphhhPlLchL...ts.sp...ststc-lhc.tl.sct..h..s..ls-c.....pts.ph.hs....SGpp..hhpsRlsWApshL.......ppAGll..pp...sp...RG..haplTpt.......Gpphltt.......................... 0 60 101 124 +14181 PF14339 DUF4394 Domain of unknown function (DUF4394) Eberhardt R re3 Jackhmmer:C1D3Q3 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 262 and 476 amino acids in length. 22.50 22.50 22.70 22.80 22.40 22.20 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.62 0.70 -11.58 0.70 -5.08 47 116 2011-06-07 10:49:46 2011-06-07 11:49:46 1 11 90 0 47 134 22 221.50 32 63.35 CHANGED sphLh.hsssssspstpshploG.h.ssppLlGIDaR.........PssGpLYul....usp.upl..YoIsssoGt....uohl..........uthssslsus...shulDFNPsADRLRllos...sGpNLRlssD.......sGs......hshsD.........ssLshsssshp.u..............ssslsAuAYTNshsssp.s......TsLasIDsstst..LlhQs.........sPNsGsLsslGsL...Gl..ch.ssssGFDIs....sstt......ssssuassh....u..t.s..pLYpl-.........LtTGpuo.......thus..........lss..t...ltslAl ..................s..hl..h.sspssthhpshtloG.h.s.sppllGIDaR.........PssGpLYul.......sss...uplYolsssoGs.........AThl..............uthssslsus...shulDFNPss...D.....RlRllus...sGpNLRlss-.........sGs.........hs.sD...........GsLshs.ss.t.u.............t..ssslsAuAYTNshsssp.s.......TsLasIDsshs..tLhhQs.........sPNsGsLsslGsL.....Gl.....sh...ssssGFDIt..........s.ts..s.sttAatsh....s..s..s..pLYpls.........L..t.....T.GtAo.....thup.................lss.......ltslAl................................. 0 22 37 43 +14182 PF14340 DUF4395 Domain of unknown function (DUF4395) Eberhardt R re3 Jackhmmer:C1CVW5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 142 and 168 amino acids in length. There are two completely conserved C residues that may be functionally important. 24.00 24.00 24.50 24.00 21.80 21.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.66 0.71 -4.08 65 233 2011-06-07 11:45:19 2011-06-07 12:45:19 1 8 221 0 115 246 199 131.10 30 78.15 CHANGED lD.pusRhsAslsslllllsLlsu........shh........lLsl.slsFslsshhGs+huPau....hlh..tplltPpL...ssschEsssPhRFAQhlGhlFussu.hlu.ahh.....GhssluhlssuhslsAAhLsAuhGhCLGC.lYhhlp+h ....................................................................ls.pusRhsAhlsslllhlsllss...................thh........llsh.slsFsls.shhG.s+hsPau.....hlh..t...plltsclt..sssc..h..EsssPhRFAQhlGhlFsssuhlu..ahh......................Gh.shl..uhlssuhslsAAhLsushGaCLGC.lYhhlt+.h........ 0 40 87 105 +14183 PF14341 PilX_N PilX N-terminal Eberhardt R re3 Jackhmmer:C1CZZ9 Family This domain is found at the N-terminus of the PilX prepilin-like proteins which are involved in type 4 fimbrial biogenesis [1]. 24.50 24.50 24.50 24.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.70 0.72 -8.35 0.72 -4.30 69 454 2011-06-07 12:24:02 2011-06-07 13:24:02 1 4 373 0 154 436 62 50.80 32 22.54 CHANGED pGhu.Lll...uL......llLll.lollulushps...shhpp+huuNppppppA....a.uAEuulp .....pGhs.Lll...sL......llLll.lolluluuhps...sshpp+huuN.pcpphA.....hpsAEuuLp.............. 0 45 102 134 +14184 PF14342 DUF4396 Domain of unknown function (DUF4396) Eberhardt R re3 Jackhmmer:C1D417 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 167 and 310 amino acids in length. 27.00 27.00 32.00 31.90 22.50 21.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.77 0.71 -4.07 54 240 2011-06-07 12:30:05 2011-06-07 13:30:05 1 4 225 0 106 237 1104 138.70 29 53.90 CHANGED shWppustuTlHCLsGCulG-lhuhhlsps................huhshhsth.sluhshuhhhGhhlphhslh..hpshshtpAl+tAlts-hlSlsshElu.ssshhhl...............uhshshssshFWhs.htluhhsGFlsshPhNhWhlp+GtKc .............................WppsshuTlHClsGCslG-lhuhhhhph................huhshhsph.slshshAhhhGhhlphhslh..hpsluhtpAl+pAhts-hlSlsshplu..sshhhl...............uhshshssshFWhs.htluhhsGFlsshPhNhahlc+GhKc............ 0 27 58 87 +14185 PF14343 PrcB_C PrcB C-terminal Eberhardt R re3 Jackhmmer:C1CVC9 Family This domain is found at the C-terminus of Treponema denticola PrcB, Swiss:B8YNY4. PrcB interacts with the PrtP protease (dentilisin) and is required for the stability of the protease complex [1]. 24.00 24.00 24.30 24.00 23.90 23.10 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.72 0.72 -4.26 96 234 2011-06-07 13:08:52 2011-06-07 14:08:52 1 11 210 0 92 220 20 60.10 26 27.35 CHANGED phall.lshGp+sTuGYulpl..pph.t.s...pssl...hlpsplhsPstsphssps.....h........TaPa...hllclpt ...................hlh.hshGppsTuGYulpl....ppl...ths......pssl.hlp...schhsPp..ssphssps.....l........TaPahllph..t........... 0 50 74 87 +14186 PF14344 DUF4397 Domain of unknown function (DUF4397) Eberhardt R re3 Jackhmmer:C1D258 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is approximately 120 amino acids in length. 26.00 26.00 26.30 26.00 25.80 25.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.58 0.71 -3.79 91 801 2011-06-07 13:44:06 2011-06-07 14:44:06 1 13 358 0 263 753 167 108.00 22 64.45 CHANGED sutlRVhH.....uuPs.......uPsVDVal..ssph.........hlsslsapshos.YlslssGs.aplpl..hssGss.s.sss.lhsssl.sltssppYTlsAssshs.....s..hph........hshsDsh...p....t.hssspu.plplhHsuPs..A.PsVDlhl ............................................s.lRhhH.....hsss.......sPsVDl.h.l...sGpt............lhpsls.ap....p....h....o......s..Ylsls.sGs.aslpl....sss..uss.............s..l..h......s.ssl.sltssptYTlhAl.spss...........s....hph...........................hhh.c..................................................................................................... 0 85 185 241 +14187 PF14345 GDYXXLXY GDYXXLXY protein Eberhardt R re3 Jackhmmer:C1CYU2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 171 and 199 amino acids in length. It contains a conserved GDYXXLXY motif. 27.00 27.00 28.70 30.30 25.70 18.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.91 0.71 -4.34 106 394 2011-06-07 14:04:33 2011-06-07 15:04:33 1 4 391 0 107 331 16 153.40 27 55.03 CHANGED lplshlshhlhpppthltpGcslhLclsPVDPRsLhpGDYhsLsY..slup.....................................tthtps...spsalhL..p..-spul...........sphhphpps...........s.tssplhlps.php...................................t.pl.....phu..h.-saahsEGpupphcp.sph.................uph+Vssp.GpulLhsLh ..............tlshlshtlhpppthLppGpslhLpltPVDPRsLhpGDYhsLsY..slss.s......................................t.p.tts........ppsalhlc...Dspsl...................................sshsphspst..........shsssphhlps.cht............................shpl..phu........h-pYahsEGpucphpp.u+h.................................................uth+Vssp.GpslLhsLh............................... 0 30 60 86 +14188 PF14346 DUF4398 Domain of unknown function (DUF4398) Eberhardt R re3 Jackhmmer:Q0F2G1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 127 and 269 amino acids in length. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.85 0.72 -3.97 51 449 2011-06-09 10:03:32 2011-06-09 11:03:32 1 13 287 0 153 393 38 104.80 24 52.41 CHANGED hsshhluG..C...Au........sshs.spphptucpAlppApss..sAsp..hs.s......cLptAppcLtpA.ctshsppchp........pAcphAppApsc...AclApspupstpsppthpchpps..lppLcp ..................................................hhhhuG.C..us.............tshs.spphstAppulppAcps...susp.....A.sh........phcpAp-pLspA...ctu....h.p.c...t.pYp........cA.+plAppAptc...AclApp+u.sspspptlpphppt..hppl................................ 0 42 68 114 +14189 PF14347 DUF4399 Domain of unknown function (DUF4399) Eberhardt R re3 Jackhmmer:Q0EYV0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 135 and 1079 amino acids in length. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.07 0.72 -3.96 72 404 2011-06-09 12:43:37 2011-06-09 13:43:37 1 27 222 0 152 401 417 86.80 32 24.73 CHANGED VpFGLcG.hslsPAGp.tpss.....TGHHHLLlDsst.........shspslPts............tptlHF.....GsG...QTEs...slp.LsPGcHTLpL.hGDttHhsa........ssslhScpI .........................................hGlps.hslsssup..hpss.....TGHHHLLlDsps.........sh.spslshs.............stlHa.....GpGpT-splp.L........sPGcHTLpL.h.uDt.Hhsh........t....o........................ 0 22 100 127 +14190 PF14348 DUF4400 Domain of unknown function (DUF4400) Eberhardt R re3 Jackhmmer:Q0F326 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 209 and 249 amino acids in length. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 30.40 29.80 24.90 24.90 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -11.20 0.71 -4.93 58 389 2011-06-09 13:00:12 2011-06-09 14:00:12 1 2 295 0 93 325 19 189.70 27 84.40 CHANGED huhhahhs-puhp+upphhppphshLusphs.cullhp..........psu..h......hh.......haphhFVc.oGhhshhpp...................s.tt.ttshphhsshhhphhpsalhushhsshhhhlRLs.lLhhslPlFlhsslsullDGLspR-lR+auuGhESualYH+A++hlh.shshs.hhlYLuhPh.ul.Phllhl...PsAlllGlslslssusFK .................................................................................hhh.s-ts...hpp.pt.hphphshLusp.hspslhhp...................pss.....................haphhhVc...oGhhphhpp..........................ttttp.t.s.pshp..h.sshhhshhpsal.AhhhshhhhhlRls.lLhhhlPlhlhhhhsulhDGLspRclR+auuGhESualYHpApthlh.shhhls.hhlYLshPh..ul.Phllhl.....PsAhLlGlulshssusFK........ 0 13 43 74 +14191 PF14349 SprA_N Motility related/secretion protein Eberhardt R re3 Jackhmmer:B2KAY0 Family This domain is found repeated three times in the N-terminal half of the gliding motility-related SprA proteins. The role of this domain in motility is uncertain [1]. It is also found in proteins required for secretion [2]. 27.00 27.00 54.90 27.30 22.80 24.20 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.77 0.70 -5.23 88 445 2011-06-09 13:24:56 2011-06-09 14:24:56 1 3 146 0 160 475 719 425.90 19 52.43 CHANGED tahshts.hh.h.tt..............hhs-P.............Yph.ahtp.......pl......................hp+Yh..................h...p.ttpshsssEc....pcsshsphpp..ah...pa+lsh...pssp......hhhG.ps..........hhlc.ps.splshss.......pahphc.P.........................olphhphahosFshshhlphushshVtuchp...sYsp...sh.s.s..sp.h..pht..............................pph-lusVslppNssh.........l..h..ssh.ptps.ss....sstlphscpsLohhlsp...........psucu+sV.....hpsps..lshcpYcchchF..............................tlGpsapsNY.p.pls.......s.....tpshshst...plW..hsNp...........s...t..sshc..h...................................ttphslhu.sslG..csh..tlpN.s..........................hsthsp.....sssslssshu-hsssshsuph...thhs.ppssp..ctppshpphsh......sth...s.phshphshh.utpp.........p..s.pasPhhsslpLpstLs ..........................................................................................hs....hhhh.tthst...........hhhss-s.............att.ahtp...................pl.........................hp+Yh.....................................ht.p..tp..shssscp.....psphschth.hspachsh....s.tp............h.hGst..........hpl.s.pGssplshGs..........ph...c.P...................................slp.tpcahssa......shsphlp..hs..uhlsuphphphsYps...sssa.shpsp..hhhshp...........................................................p.phspphplGslshp.spph............h....h..ssh.ttpstsolaGhssphpts.cptLohhlsp...........tsspspsl..phshpsp.h.lpstpYcp.pha................................hhupsah-sapp...sh...............t.....posl.slsp...plW..lsNp........s........ss.hp................................................tppRslluh.slu.....psh......h.t............................h...........ss....ssl.ssshu.shts.hp.hstph...thhs..p.p...tsp..ptppshpshsh...................t.phshh.st+t...t..papassths.h.Lp....................................................................................................... 0 73 145 160 +14192 PF14350 Beta_protein Beta protein Eberhardt R re3 Jackhmmer:B2KEQ4 Family This family includes the beta protein from Bacteriophage T4, Swiss:P13057. Beta protein prevents the gop protein, Swiss:P13058, from killing the bacterial host cell [1]. 27.00 27.00 27.30 27.20 25.70 21.10 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.24 0.70 -5.26 43 134 2011-06-09 13:40:16 2011-06-09 14:40:16 1 1 129 0 34 119 2 313.80 20 94.46 CHANGED YhPlL..+h+puEhpALpp.L..ssps+sp.lhPllplsthststpp.................pptspcltpshspt..shhsssp.......................pphhpsssthhs..hhhphhpptstp............llPVlshspss.....ph.psltpth......ttttlslRlptsphts.....phtptlspllstls.htss.phtlllDhtshp.sts.sthtthhsth......lpplsph..tapslslsuoSaPps.......hsshst.....ppstl.RtEhplapplts.......sttshaGDYusspspht......sstsspsss+lcY....ot.csp.ahlhR.stthpt.tht.......phh.sluppll...spst..a...ussaSWG.DphIpps.Aps..........ss.......GssopWhplssspHlshhlcp.l ......................YhPhL..+h+.puEhpALp.....pL...s.phtst.hhPlhpl....hs.p.................................pphhppltp.hstt...hh..sh........................tthhpsspt.ht......hhp.hpthsh.................llPVlthspss.....phhptltphh........tttlslRlp.t.th.s..........t...lspllsphs..h..s..ph.lllDhtslp.stst...th.tphhtth......lsplsp....hpplh.lsusuaPss.......hsshst.....t.tpl.RhEhplapplhp........hpshauDYushpsph.......s.thhp..ssplcY.....oh.csp.ahhhR..tthpptsht............phh.phsppll...sssp..ap..s.sp...asWG.DphItps.ups.........tss..............GssspWhplshspHlshhlcp................................ 0 7 23 31 +14193 PF14351 DUF4401 Domain of unknown function (DUF4401) Eberhardt R re3 Jackhmmer:B2KEA1 Family This family of proteins is found in bacteria. Proteins in this family are typically between 357 and 735 amino acids in length. The family is found in association with Pfam:PF09925. There is a single completely conserved residue K that may be functionally important. 25.00 25.00 25.20 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.54 0.70 -5.44 44 202 2011-06-09 14:25:50 2011-06-09 15:25:50 1 7 199 0 48 196 9 311.50 20 56.27 CHANGED sWalplh................huhuuWlAulh..hLsFlhhshhhh.c......hshllhGllhhuhAhhlh....+tp.............thFhcQhulAhsluGphhlsaGlhphh.......thhshhlhhhhlhslhhhlh.s...hlhphL.shhhshshhhhhtthhh..........h.lshhhlshhhshl......hhth.........ttstththhpslshuhhlshlshhhhhsht.hhhh.................................th..hhhhhhhhlshshshhhlhhphph.l..ssshthslhhslslluhh.h..hsulshulllllluhhtspphlhululhsllhhluhYYYsLploLLtKShhLhusGllLLshthllh+hh ..................................................hhlphh................huhuualAulh..hLhahhhhh.hh.p......sshlllullhlshAhhlh................+t..............shhhpphulshhluGp..lhhhaslhp.h...................hhhhlh....hhhlhslhhhlh.s.....hhhphL.shhhshshh.hhhshhhh.................h.lh.hhhhshhshl.......hh.hh............tthhtttlp.sltauhhlshlshhhhthh.hhhhh..............................hh....th.thhhhhhhhhshhhhhhhhhhh...h.h.hh....ptth...th.....hhhh.slhlhs..hhhh..hsulshslhllLluhhtspphlhululhshhhhluha.YYpLthoLLhKShhLhssGllLLshthllt+..h................................ 0 11 24 39 +14194 PF14352 DUF4402 Domain of unknown function (DUF4402) Eberhardt R re3 Jackhmmer:B2KBI6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 155 and 182 amino acids in length. 25.20 25.20 26.00 25.60 25.10 25.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.87 0.71 -3.68 65 129 2011-06-10 07:19:46 2011-06-10 08:19:46 1 2 54 0 66 133 13 131.50 20 75.21 CHANGED lslsps..ss........LsFG........hhsssssGoVsl..sssG....shshsGssshhsssss.....sApFslsG.psspshslols.........sshslss........sssshslsshs.................sshstssthsssG..............stshslGGoL.......sls.usps..sGsYo.Go..asVoVsY ..............................................................plsps..ts........LsFG........hhss.ssssslsl.....sssu.....hs..sssh..hsssss..............sApFslsG.ps...spslslols...............sshslss..........ussshslsshs.................................sthsssshhsssG...................stslplGGoL..........sls..usps......sGsY.o.Go..hslsVsY...................... 0 21 42 59 +14195 PF14353 CpXC CpXC protein Eberhardt R re3 Jackhmmer:B2KC69 Family This presumed domain is functionally uncharacterised. This domain is found in bacteria and archaea, and is typically between 122 and 134 amino acids in length. It contains four conserved cysteines forming two CpXC motifs. 24.00 24.00 24.00 24.80 23.90 21.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.84 0.71 -4.18 43 128 2011-06-10 07:51:50 2011-06-10 08:51:50 1 3 105 0 30 113 9 122.00 21 47.72 CHANGED plsCPpCtpphchclhstlssspcsch+cpll.sGplhphpCPpCGpphhl..shshlYpD.p.c.chhl...hhhPc......tphppthphhtshph.........t.tttthphRllhshsplhEKlhIh-sGl-....D+hlElhK .......................lsCPpCtpthphphh.shlssspcPchppplh.ssphhthpCPpCGpph.t.l..shshlYhD.p..c.chhl...hhsPp...........p.ttpphphhtthp...................htthphRhshshtplhEKlhlhcsuhscchhEhhK...................................................... 0 18 30 30 +14196 PF14354 Lar_restr_allev Restriction alleviation protein Lar Eberhardt R re3 Jackhmmer:B2KDC9 Family This family includes the restriction alleviation protein Lar encoded by the Rac prophage of Escherichia coli, Swiss:P33229. This protein modulates the activity of the Escherichia coli restriction and modification system [1]. 28.60 28.60 28.60 28.70 28.50 28.50 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.92 0.72 -3.67 63 206 2011-06-10 08:38:13 2011-06-10 09:38:13 1 6 176 0 44 148 11 60.10 35 47.56 CHANGED tcLcP......CPFC.Gssslthps.......ppt.t.............................hhlt..........Ctp......C....Gu.................tsshtpscpp....AlctWNpRs ...............cLKs......CPFC.Gssplhlcs.......s.shs.....................................hhsc...Cst.......C....tu....................csshsssttt........AhcpWN+R......... 0 10 24 35 +14197 PF14355 Abi_C Abortive infection C-terminus Eberhardt R re3 Jackhmmer:B2KEQ6 Family This domain is found at the C-terminus of the Lactococcus lactis abortive infection protein Abi-859, Swiss:Q48620. This protein confers bacteriophage resistance [1]. 24.00 24.00 24.40 24.50 22.10 19.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.67 0.72 -4.04 92 373 2011-06-10 09:57:38 2011-06-10 10:57:38 1 2 341 0 82 267 10 78.90 23 29.98 CHANGED lspLh..phspptLshtsspttp..........t.hppll...sultsll........pulsslRN.ctu.suHG.....ts....pthtlsspcAcLslssusolst.allcs ....................................................h..pLh..ptshphlthp.ppp.t..........p.l+pll....puhssll....sulsplRN.ctu..suHG.....pst.....pthtl.s.pcc.AcLslssusolstalhc.... 0 18 51 68 +14198 PF14356 DUF4403 Domain of unknown function (DUF4403) Eberhardt R re3 Jackhmmer:Q8KCF8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 455 and 518 amino acids in length. There is a single completely conserved residue W that may be functionally important. 27.00 27.00 46.10 46.00 24.40 22.90 hmmbuild -o /dev/null HMM SEED 427 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.13 0.70 -6.18 41 97 2011-06-10 10:29:55 2011-06-10 11:29:55 1 1 88 0 59 107 9 419.20 20 89.72 CHANGED stshSslslPlpls..lsslpphlNpplstphhspssh.ts....................ptplphplsRtGslslsus....ssplhhohPlpsphphthuhtshh.th.......................shstssphssslshsspsslsssWclssphp.....shshhppsslslu.sh+lslpshlcshlcpthpclpstl-pplppshsL+pplpphWpphpcPl.lsp......s.ssWLclpPpclhss...phsspslphslGlpupspsssus.pPt.....hPLPshphhs..shssphclslssplsas-lsphlspph...ts+sFs...sssh.plplcslplhu.ssc+Lhlsl...pspus.........hcGslalsG+PsaDssspplplpDl-aslcocu.h.Lhpsushlhpstlpctlppph...shslpsplspsppplpptlup..psspslclsuplpslphsclhhsscslplhlpAsGpsslplp ...............................................s..t.ShlslPlpls..lsslpphhsptlstphhtcss..ss....................ttplphpl..h.R.Gslslsss....ssplhhshPLphphphthuhtshh...........................hstssphpsslphsspspls.sWplpsphp.....shshhpssslplu.Ghclslss.lcshlcpthsphtstlspplpcshsL+ppspphWp.phpcPl.lsp........shWLplpPpclhss...phss.sslphslGlpspschssus.pPt.....hslPs.hhths....shssthplslssplsas-lscllppph...tu+sFs...........ssshplplcplslhu..sss+Lllslplpup............hpuslalhGpPhhDstspplplpDlchslcocs.h.Lhpsushlhpstlpptltpph...shslpstlspspppltptlsp..pstpGlplsuplpslplsclthsssslplhspApGplplpl........ 0 21 40 52 +14199 PF14357 DUF4404 Domain of unknown function (DUF4404) Eberhardt R re3 Jackhmmer:Q8KFC4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 90 amino acids in length. There are two completely conserved residues (P and G) that may be functionally important. 27.00 27.00 27.00 27.90 26.80 26.00 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.83 0.72 -3.57 40 107 2011-06-10 10:45:52 2011-06-10 11:45:52 1 1 105 0 38 91 6 83.50 38 92.69 CHANGED cLpppLppL+ppLp...psss.lD-sppstLpsLtc-IpphLpp...tsssttpcpsls-plsp.Alp+FEscHPpLutslcplhsoLusMGl ...........pLpcpLssLRcpL-...psss.ls.Epp+tpLcpLhppIEtplp........tstt..p-soLsDslNh.AlERFEspHPsluuTLRsIlpoLusMGI. 0 9 17 30 +14200 PF14358 DUF4405 Domain of unknown function (DUF4405) Eberhardt R re3 Jackhmmer:Q0F3T2 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria and archaea, and is approximately 50 amino acids in length. There are two conserved histidines that may be functionally important. This family is N-terminally truncated compared to other members of the clan. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -9.14 0.72 -3.84 112 320 2012-10-03 10:28:09 2011-06-10 13:45:22 1 10 262 0 150 342 33 63.50 23 31.21 CHANGED lphhlshhlhlshlhlhloGlll.hts.shh........hhuhstthhc.plHhhsuhhhhlhhslHl.hlpW ...........phhlshhlhlshlhhhlSGlll.hhs.shh......................hhshs.phhp.pl.HhhsuhhhhlhhslHl.shpW..... 0 53 92 121 +14201 PF14359 DUF4406 Domain of unknown function (DUF4406) Eberhardt R re3 Jackhmmer:B2KDC7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 98 and 145 amino acids in length. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -10.10 0.72 -3.91 41 334 2012-10-02 19:28:18 2011-06-10 13:54:17 1 3 282 0 34 299 91 89.40 37 77.64 CHANGED lYIAGPMo..Ghs........-hN+ssFppsAccL+tpG...ahVlNPAph......Pssh.......s......acpYM+lslshLh.sCD..sIhhLsGWppScGAphEhtlAcpLGhplhht ..........................................................hYluGsMo....Gh...........saN+..AFp.p.stptL......+....p..cG....asV..l..NPAhh.............s-uh.......s...........scYM.c.hshshLp..ssD..sI.Y.h..L..p..GWppStGAptElslAc+LGhsVhh.t........... 0 12 23 30 +14202 PF14360 PAP2_C PAP2 superfamily C-terminal Coggill P pcc Jackhmmer:Q56Y01 Domain This family is closely related to the C-terminal a region of PAP2. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.79 0.72 -3.71 54 511 2012-10-02 00:53:37 2011-06-10 14:41:37 1 8 174 0 334 550 26 72.40 38 19.26 CHANGED tCGDllaSGHThhhslshhhhhcY....ssphh........h+hlhhllshhshhhIlhu+tHYTlDVllu..halsshla.hhYHt ....................hCGDhhFSGHTlhlslhhhhlpcY.............ss+ph.........................h+hls.Wllshh...u.hhhIlsu+..cHYolDVllAaYlostlF.hhYH................ 0 123 175 264 +14203 PF14361 RsbRD_N RsbT co-antagonist protein rsbRD N-terminal domain Eberhardt R re3 Jackhmmer:Q8KAB8 Family This domain is found at the N-terminus of a number of anti-sigma-factor antagonist proteins including B. subtilis RsbRD, Swiss:P54504. These proteins are negative regulators of the general stress transcription factor sigma(B) [1]. It is found in association with Pfam:PF01740. 23.00 23.00 24.10 24.10 22.50 22.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.06 0.72 -3.58 74 220 2011-06-10 14:14:43 2011-06-10 15:14:43 1 6 201 0 98 227 5 103.70 21 39.90 CHANGED slt..clLpc...ccpplLppWhpph.tphstcssth.ptc.hpp.ssslhpsltpulpsh.......hDhhssph......pplpphLssls+hRAhQGFoPscsssalasLKpslhchlppp .........t.h.phLppccptllppWhcth.tp....hs....hp..sst.h..c.pc.hcppspslhpslspulpps..........hchtsssh.........splcphLsp...lu....+sRAhpGFoPscssshlhuLKpslhphltt............................ 0 39 65 88 +14204 PF14362 DUF4407 Domain of unknown function (DUF4407) Eberhardt R re3 Jackhmmer:Q8KC28 Family This family of proteins is found in bacteria. Proteins in this family are typically between 366 and 597 amino acids in length. There is a single completely conserved residue R that may be functionally important. 28.30 28.30 28.90 29.00 27.60 28.20 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.66 0.70 -5.46 54 204 2011-06-10 14:37:50 2011-06-10 15:37:50 1 6 166 0 93 182 38 296.40 23 61.13 CHANGED s.slLcp.sss-psKasulGAslhhTAlhAsluuuaAlhhshsss.............hhulhhG.llWGlhIhsLDRaIVsohp+pst...............hhpphh.uhPRlllAlllulVIScPLpL+IF-pEIsppltptppppttphhsplss...........................thssphsthpsplsslppphsptpsplsthhpthpsEhpGs............ssoshhGhG.shhcp+ppphcstt.......splpslpsp...ssthsthc......................pphsphptsppthhsppp.t....sstsGlhsRhpALscLs.....tts..hhhsphhlhLLF.lulEhhPllsKLhsstus...YDhtlppp-pt .............................................t..........t-cspasshGshlhhsuhhA.slsuuhA.l.tts....hpss.........................hhul.hu.llhGLhlhsl-Rhlso....u.t+tps.....................................h....h..t..sss..RhhlAlhlu..hVluc.ltLhlFp..spIs...pclpcp.t.pptpst.stlps..................................................ssplpphpsth...ssLcsplpptcsclspth..shhpsEhpss.................ptoGhsGtG.P.scptpphh.sstp...........................................tcLsshhsp...hspptstlc...............................................pptptltpsppt..hhst..............sstpGhhsRh.AhscLs......ts..s..hhhsphhhhhhh...hhlthhPllh+Lhpstss...aDhthttpt..p........................................................ 0 31 59 77 +14205 PF14363 AAA_assoc Domain associated at C-terminal with AAA Coggill P pcc Jackhmmer:Q9M0V8 Family This domain is found in association with the AAA family, Pfam:PF00004. 22.10 22.10 22.10 22.50 21.50 21.80 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.34 0.72 -3.94 102 409 2011-06-10 15:51:54 2011-06-10 16:51:54 1 12 26 0 238 417 0 98.40 24 20.91 CHANGED hlP...t.p....lcp.....h.....l....t...shhtph.........hs....th.hss.hh.slsIpEhs.....u...........hp....pN............c.............hapAscsYLusph..ssp.....s....c+L+sspsc...pspt...................hslolcc.s-clsDsFc.GlclhWphsspp ..........................hP..plpth.....l....h.....phhtph.........ht....h...hs.shh.plslpEhs.....u...........hp.ps............c.................hapAspsYLu.s.p.s..ssp.......s....c+L+..sstsc.p.sps...................lslshcc..s-clsDsF.....p.....Gspl.hWphh...p............................... 0 25 137 200 +14206 PF14364 DUF4408 Domain of unknown function (DUF4408) Coggill P pcc manual Family This domain is found at the N-terminus of member of the DUF761 family Pfam:PF05553. Many members are plant proteins. 21.50 21.50 21.50 21.50 21.30 21.30 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.39 0.72 -7.57 0.72 -4.39 37 166 2011-06-10 15:57:58 2011-06-10 16:57:58 1 2 20 0 93 164 0 36.80 33 13.20 CHANGED hPplaushtua.............lsPshL.FlllNll...IhsIsupS+h .....................................hhsshtsa.......................hoPshL.FlllNlI...IlsIssoS+h.. 1 11 54 74 +14207 PF14365 DUF4409 Domain of unknown function (DUF4409) Coggill P pcc Jackhmmer:O64569 Family \N 23.90 23.90 23.90 24.10 23.50 23.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.73 0.71 -3.92 35 420 2011-06-10 16:12:37 2011-06-10 17:12:37 1 21 28 0 255 418 0 104.60 32 27.44 CHANGED oIpSPD.GDlIDCV.hp+QPAFDHPhLK....s..hp....hcPs.p.pPcuhh.tc........p....p...........ppps.....spshsQlW+psG.c.CP-GTlPIRRTpc-...DlLRA..sSlp+aG+Kpppsh..sts.p..t..ss..hspsGHcHAluYs .....................lpo.D.GDllDCVshpcQPAFDH..P.hLK..........s...hp......hcPs.....hPpsht.tp........................................ttt......tp.hhphWp.pss...p.CPcGTlPIRRs.pcc.......DllRs....p....S....ht....phsh..+..t................................................................................................. 0 51 132 159 +14208 PF14366 DUF4410 Domain of unknown function (DUF4410) Eberhardt R re3 Jackhmmer:Q8KCX4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 187 and 238 amino acids in length. 26.00 26.00 26.00 26.10 21.60 25.60 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.53 0.71 -4.43 38 82 2011-06-13 11:52:46 2011-06-13 12:52:46 1 1 73 0 41 82 7 130.50 18 64.47 CHANGED lhVtsF...thpstth....t...........................ttstpppppstphscphusplspc.Lppp..ulhutpsssss....tsslllcGshtphctGspttRhhlG.hGA...Gpoplpsslplhs..................soG+hsuth.ssssssuuu ..................................................................................t...........................ptttp.ppphtt.hcphtpplhppLppp...slhsttsssss.....tshhlpsplpshchGssttRshlG.hGA...Gpsplsuslplh-..................sss+..sthpsssshuhuu................... 0 8 17 33 +14209 PF14367 DUF4411 Domain of unknown function (DUF4411) Eberhardt R re3 Jackhmmer:Q8KAB2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 153 and 170 amino acids in length. There is a single completely conserved residue D that may be functionally important. 23.50 23.50 23.70 23.50 23.40 23.30 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.13 0.71 -4.53 58 237 2012-10-03 20:43:45 2011-06-13 13:14:28 1 1 225 0 61 213 14 150.70 26 96.66 CHANGED Yl.LDoNlhI..putppaYs.h-lsPuFWchLpphhp..sGplhohcpVt-Elpps.....sD-Ls.pWscppts...hhpss-...tshpphtplhpa....psspaptss....ttah.......shADsaLlAtAhs.pst.....slVTpEt......tsstpp............phKI.PslC..pthsVphhshaphl.+chshpF ............................................................Yl.hDosshl...........puhpphYt...shhsuhWphltphhp..ssplh..shctVtcEltpt.............sDchp..cWhctpts..........hhhs...t-........tph.pthtclhph....sppthppt...................ptADsalIAhAhs..pst...........slVTpEpt..........sss.pp............th+I.PslC..pth.s..V...hlshhphl.cp.shp............................................ 0 23 39 52 +14210 PF14368 LTP_2 Probable lipid transfer Coggill P pcc Jackhmmer:O49645 Family The members of this family are probably involved in lipid transfer. The family has several highly conserved cysteines, paired in various ways. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.36 0.72 -11.38 0.72 -3.85 72 1249 2012-10-01 19:46:35 2011-06-13 13:37:29 1 18 64 3 722 1872 6 97.10 20 61.47 CHANGED hlA..h..hhsssts...s....sss....................sssssC.......sss...hh...ss............hs...h.........hs.....s...s...ssPsssCCssl+s..h.....h.t.sp..ssClCthhss.s..............ts...h.sls.hspuh.tLs.ptCsls..sss.....tC .................................................................hhh......................ts..........................ss.s.s.sC........ssp.....Lh...ss.................hs...h.........ls............s.s.....ssPossCCsslcs..h....................t......ss...........tCLCthlpsss...........................hs..h.sls.h.s..pAh.t.l.P.stCslsss......C.................................. 1 104 420 582 +14211 PF14369 zf-RING_3 zinc-finger Coggill P pcc Jackhmmer:A0MEB9 Domain \N 29.60 29.60 29.60 29.70 29.50 29.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.81 0.72 -8.18 0.72 -3.95 96 381 2011-06-13 12:47:51 2011-06-13 13:47:51 1 8 114 0 221 364 0 33.60 39 9.85 CHANGED spYWCapCsphVpl..t..tss........s...CPpC.s.u.GFlE...El ...paaCapCsp.Vph..t....ssh.......h....CPpCp.u.GFlEEl.... 0 46 116 169 +14212 PF14370 Topo_C_assoc C-terminal topoisomerase domain Coggill P pcc Jackhmmer:B3H4K2 Family This domain is found at the C-terminal of topoisomerase and other similar enzymes. 25.00 25.00 27.80 27.80 23.30 23.10 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.40 0.72 -4.32 47 743 2011-06-13 13:44:24 2011-06-13 14:44:24 1 10 600 16 275 707 30 65.80 64 12.18 CHANGED lcc+lcphplphpsK--NKpVALGTSKlNYlDPRITVAWCK+a-VPIEKl.FoKTLRc....KFsWAhcs....cpcap....F ....................L.c-QLhKLElQtTD+.....-ENKslALGTSKLN...YLDPRISVAWCKKa-VPIEKI.aN.......KT.R-..................KF.WAht......................... 0 90 146 221 +14213 PF14371 DUF4412 Domain of unknown function (DUF4412) Eberhardt R re3 Jackhmmer:Q8KCD3 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and eukaryotes, and is typically between 75 and 104 amino acids in length. 28.70 28.70 28.90 28.80 28.30 28.60 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -9.94 0.72 -3.47 91 199 2011-06-13 13:44:51 2011-06-13 14:44:51 1 7 134 0 99 199 11 89.20 21 35.07 CHANGED hpchG.scslsGhpCp+hcl................tssptptph...Whop-...............................................tGhPlchptpsssssh................htslchpsh.ssuhF..plPs.GYp ..........pchG.scslsGhsCpcaph.....................tssptpsph...WhopD...........................................................................................Ghslphptpssss.s...t.......................hhplphts..ssuhF..plPs.GYp.................... 0 51 80 93 +14214 PF14372 DUF4413 Domain of unknown function (DUF4413) Coggill P pcc Jackhmmer: Family This domain is part of an RNase-H fold section of longer proteins some of which are transposable elements possibly of the Pong type, since some members are putative Tam3 transposases. 27.00 27.00 27.00 27.00 26.60 26.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.38 0.72 -4.03 79 377 2011-06-13 14:32:42 2011-06-13 15:32:42 1 27 37 0 208 358 0 97.30 25 17.07 CHANGED lSusphPTuNhaapplhclct.......hL.pc..h.p.sp...-.s.h.lp.s.hspsMppKFcKYW.......c......phshh....LslAslLDPRhKh....p.h...lcathpph...aGp...c.sp...ph...l.pcVcsslpcLappYp .......................................................ss..sTus.hhh.phhplph..........................hL...pp...h...t..sp.......s..h.lp.s..hsppMpp.......KFcKYW..........................p............phshh....lslAslLDPRaKh....c.h...lcathpph......hut....c...st...ph...lpplpptlpplappY.................................. 0 9 80 136 +14215 PF14373 Imm_superinfect Superinfection immunity protein Eberhardt R re3 Jackhmmer:Q8KCM5 Family This family includes the E. coli bacteriophage T4 superinfection immunity (imm) protein, Swiss:P08986. When E. coli is sequentially infected with two T-even type bacteriophage the DNA of the superinfecting phage is excluded from the host, into the periplasmic space. The immunity protein plays a role in this process [1]. 27.00 27.00 27.20 27.20 26.80 23.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.03 0.72 -4.32 60 264 2011-06-13 14:42:33 2011-06-13 15:42:33 1 5 229 0 62 205 39 42.80 40 38.53 CHANGED lYFlPsllAhhRp..+ppthsIhllNlhLGWThlGWlsALlWAlp ..........lYFLPsllAhtRc..+psphsIhllN....lhhGWThIGWllsLhWuh......... 0 17 30 48 +14216 PF14374 Ribos_L4_asso_C 60S ribosomal protein L4 C-terminal domain Coggill P pcc Jackhmmer:Q9C6F1 Domain This family is found at the very C-terminal of 60 ribosomal L4 proteins. 27.00 27.00 27.20 28.20 23.70 23.40 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.60 0.72 -4.10 64 472 2011-06-13 14:55:20 2011-06-13 15:55:20 1 6 336 8 266 449 5 78.60 42 21.13 CHANGED psYpLPpshhsNsDLsRlINS-EIQullRsPptpspp+...ltKKNPL+Nhpshl+LNPYAtsh++pplhsppppttt+ttt ......psYsLPpshMsNsDLoRllpSsEIQpslRss.......+ppsp++......shK.KNPLKNhplhL+.LNPYApsh++ptlhtpttphttp...h............................. 0 101 154 217 +14217 PF14375 Cys_rich_CWC Cysteine-rich CWC Eberhardt R re3 Jackhmmer:Q8KFY4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 74 and 102 amino acids in length. It contains eight conserved cysteines, including a conserved CWC sequence motif. 24.00 24.00 25.10 25.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.19 0.72 -4.04 76 321 2011-06-13 15:01:50 2011-06-13 16:01:50 1 4 317 0 117 296 51 50.50 33 57.01 CHANGED CPhCGsshpCu...........ttstssCWCsshshs.shh......tth.t.......................psClC.sCLpph .........CPtCGpshpCs............tss.tsCWChs..hshtsshh......tth.tth.....................psCLC.pCLpp.h.. 0 25 54 87 +14218 PF14376 Haem_bd Haem-binding domain Eberhardt R re3 Jackhmmer:Q8KBS9 Family This domain contains a potential haem-binding motif, CXXCH [1]. This family is found in association with Pfam:PF00034 and Pfam:PF03150. 27.00 27.00 27.40 37.50 26.80 25.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.79 0.71 -4.48 89 710 2011-06-14 08:17:18 2011-06-14 09:17:18 1 4 650 0 123 370 30 137.10 48 34.47 CHANGED llshlslQhhshpps...........ssshtp.hpsspplptlhcpuChDCHSspTpaPaYuplsPsuahlppclpcG+cphNhopatsh...........spptppscLsclhphlcpscMP.tpY..hh..hH.pAcLopp-+ptllsWlpptp .................................h.lsYLGLuGYVaaaDppRucp...ucspu...SsluENsclluhl+EKGCDYCHTP.S.A.ELPh.YhhlPsAKQLMDYDIchGhKu..F....NLE..sVcuAL........hsspPVspS-L..sKIEhVhphpTMPPoRY..sA..L..HWuu+lSD-ERs-ILsWIucQR...... 0 33 82 109 +14219 PF14377 DUF4414 Domain of unknown function (DUF4414) Coggill P pcc Jackhmmer: Family This family is frequently found on DNA binding proteins of the URE-B1 type and on ligases. 26.00 26.00 26.00 26.00 25.90 25.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.90 0.72 -4.13 25 383 2011-06-14 09:48:55 2011-06-14 10:48:55 1 45 242 0 276 400 2 103.50 29 3.48 CHANGED IDPsFLtALPE-lRcEVltppls............t..p...sssspsss-lss...........EFLsALPs-IppElLpQEptpppRppppttsp..............ss-MDsAShlATlPPsLRcpVLh-psE ......................lDPpaLsALP-DlRpEV...ltpp..ht.ptsp.................t..p.......sss.....ss...t..ss-lsP.........................................................EFLsALPs-lppElLtQpptppp.ppp...ttttt............................................................hD.sshh.sh...tlRtthh......................................................... 0 75 144 227 +14220 PF14378 PAP2_3 PAP2 superfamily Coggill P pcc Jackhmmer:Q9LVZ7 Family \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.27 0.71 -4.78 12 684 2012-10-02 00:53:37 2011-06-14 15:22:55 1 10 432 0 300 1471 163 172.80 19 49.31 CHANGED shslhsh-cslthshptsLpph.lsp+PhL.slhush..sohhhththhhlhashhsc.s+hRshhshhhhhshhullshshh.shPs....phLsst.sF.s.hhshsssuha..th.t.h..............hhPShHsuhAhhsulshh+.hthhthhh.lssshsllhslsllustsHYhlDhssGssluhlhhtL .................................................................................................................................h.........................h.......h................h...hhshh..Y........h...h..h.....h..h.h.h..h..l.h...h.....h..h...h...t....c....t..p..............h....p....t....h.....h..t..sh.h...hs..s....h..h..uh..l...h...a.h.....ha.P.s.s...PP..............................th.h.............................t.......s......h..............................s....h....h....t..........s......h..s.s.h.......s....h....h...........................t.........h......tth.......................ss.h...s..A...hPSlH....s.u...h....u....l....h..h......u...l.......s.......l..........h.....p.....h.....t...........t.............t.............h.........h..............h...h....h.........l....h.h....h.a.sh.h.h.hh..s..sls..s..upHYhlD.s.lu.Ghhhshhh...hh...................... 0 97 194 260 +14221 PF14379 Myb_CC_LHEQLE MYB-CC type transfactor, LHEQLE motif Coggill P pcc Jackhmmer:Q9LG13 Family This family is found towards the C-terminus of Myb-CC type transcription factors, and carries a highly conserved LHEQLE sequence motif. 27.00 27.00 29.00 29.00 26.90 26.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.69 0.72 -4.38 46 474 2011-06-14 14:51:50 2011-06-14 15:51:50 1 10 53 0 259 459 0 49.60 52 15.20 CHANGED uhpls.EALc....hQhEVQ++LHEQLE..lQ+pLQ.lR.I......EupG+aLppllEctpcshu ...............hpls.EAL+........hQMEVQ++LHEQ..LE........VQRpLQ.LR.I.......EAQGKYLQpllEctpch..s...... 0 43 156 213 +14222 PF14380 WAK_assoc Wall-associated receptor kinase C-terminal Coggill P pcc Jackhmmer:Q5XVH3 Domain This WAK_assoc domain is cysteine-rich and lies C-terminal to the binding domain, GUB_WAK_bind, Pfam:PF13947. 27.00 20.00 27.00 20.20 26.90 19.80 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.86 0.72 -3.48 53 366 2011-06-14 15:07:24 2011-06-14 16:07:24 1 27 20 0 205 376 0 92.30 23 22.47 CHANGED hsCssps.....s..st.....sh.hh...h........s...s.................s....s.........hsttCppsltlPVhssth..tt.tt.......ssss...htpsLcpGFpLpWp......tsssCppCts........SuG.pCGasps.........s.ptFsChCsDsh ...........................................................................................................t.....h.ttC..p.....h.lPV...tt..h.t..t...........s.ss....htpsLppGFpLpap.....sssCppCps........S..GG.tCGasps...........t..tta.tCh.Csst................... 0 22 113 156 +14223 PF14381 EDR1 Ethylene-responsive protein kinase Le-CTR1 Coggill P pcc Jackhmmer: Family EDR1 regulates disease resistance and ethylene-induced senescence, and is also involved in stress response signalling and cell death regulation [1]. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.55 0.71 -4.75 46 364 2012-10-10 12:56:15 2011-06-14 16:27:23 1 45 119 0 219 341 1 179.40 30 21.50 CHANGED sssu-slSpRaWspssLsY...t-KlsDG..FYslhGh........t.....pts+hPSLpsLcuhs.s.s..ssshEllLVDRptDspLpcLcphAhsls......pss.....ss.ss.....ptllp+LApLVsspMGGssts.sps.hhs....p.Wpp..op...L+s...ppusls..lGpLslGLsRHRALLFKVLADsl.....slPCRLVKGs..su..........c-suhslV+hs.s....sREalVDLhusPGsLh.P ........................................................................................hu..hh..s..ls.....p.l.sG..FY.lhs...........................................phPsh.pL...pt......tt.th-slllst...D.tL.t.l.phs.t.hh..........................ts..........ss.....tthlppLAplVscpMG...........Gss.....s..tp..hhh........pap....hSp......ph+p.........psssls..lGplp....hGl.sRHR.ALLFKlLADpl.......s..l.PC+LV+Gphhsu..............pstshsllt..ht.s..............sp...E.....alVDLhttPGpLh.................................................... 0 41 119 175 +14224 PF14382 ECR1_N ECR11_N; Exosome complex exonuclease RRP4 N-terminal region Coggill P pcc Jackhmmer: Domain ECR1_N is an N-terminal region of the exosome complex exonuclease RRP proteins. It is a G-rich domain which structurally is a rudimentary single hybrid fold with a permuted topology. 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.24 0.72 -7.74 0.72 -4.66 79 613 2011-06-14 16:29:30 2011-06-14 17:29:30 1 16 360 11 439 594 35 39.40 35 14.59 CHANGED lllPGp.hLus.s.s.pa.h.s.GpGTYh......cs.splhASlsGhlplss ....llsPG-.hls......s.s.s.p.....a.h...t.GcGTYh.......................ps..s......plhuSluGhVph....... 0 131 226 350 +14225 PF14383 VARLMGL DUF761-associated sequence motif Coggill P pcc Jackhmmer:Q1PFF9 Domain This family is found frequently at the N-terminus of family DUF3741, Pfam:PF12552. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.41 0.72 -7.56 0.72 -4.78 10 212 2011-06-14 17:36:40 2011-06-14 18:36:40 1 5 19 0 148 189 0 29.80 43 3.81 CHANGED suosssscuhuh+uPulVARLMGLDSlPssppsp .........t...............+sPulVARLMGLD.uLPs............ 0 20 93 122 +14226 PF14384 DUF4415 Domain of unknown function (DUF4415) Eberhardt R re3 Jackhmmer:Q8KF70 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 82 and 104 amino acids in length. 26.10 26.10 26.10 26.30 25.90 26.00 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.08 0.72 -3.81 196 801 2011-06-15 08:14:51 2011-06-15 09:14:51 1 2 572 0 237 649 74 66.50 32 69.83 CHANGED Dhsphsct...tpht..............................................Kp.p..lolRlDsDV.....lcaF+u..p.GcG......aQo+lNtsL......Rchh .................................................-.s..sct.h.tpAp.ht.....................................hhpPh.....sKp.p..lolRlDsDV.....l-aF..Ku.....s..GcG......aQoRlNthLRchh.............. 0 53 137 182 +14227 PF14385 DUF4416 Domain of unknown function (DUF4416) Eberhardt R re3 Jackhmmer:C9RKS1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 176 and 187 amino acids in length. There is a conserved DPG sequence motif. 27.00 27.00 43.60 96.40 19.40 19.00 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.83 0.71 -4.53 29 85 2011-06-15 09:02:56 2011-06-15 10:02:56 1 2 82 0 53 88 19 163.40 33 87.51 CHANGED tst.spLlh.slhss...pphhpp..lhttLpc+FGsl-hhSs.hsFc.aTsYYpcEMG.sslh..++hluFccLls.-pLs-lKlhTNplEppaut.cup....RplNlDPGYlstpplVLAosKsasHRlYLscGIaA-lTLhYppGc.apshsWTYPDYpspphtpaLsplRch..Y .........................h....spLlh.ulh.hs...cthhpp..lhstLpppFGslchhSs.hsFp.aTsYYpc.EMG.ps..Lh..++hluFccLlp.ppLsclKhtosplEcpaup..puc....RplNlDPGYlstpplVLAosKsasHRIYLscGIYAElTLhYpcGc.apshsWTYPDY+spchtpahpplRchY...... 0 29 46 48 +14228 PF14386 DUF4417 Domain of unknown function (DUF4417) Eberhardt R re3 Jackhmmer:C9RLL4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and viruses. Proteins in this family are typically between 220 and 340 amino acids in length. There is a single completely conserved residue G that may be functionally important. 27.00 27.00 31.00 30.60 26.90 26.60 hmmbuild -o /dev/null HMM SEED 201 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.13 0.71 -4.98 22 148 2011-06-15 09:37:46 2011-06-15 10:37:46 1 2 121 0 11 123 1 194.50 25 76.35 CHANGED pll.p................h.hsuphclPhlt.sphhsts................hlsFshtpppcp.....................pstslHFalcDhpFcclassPc.......phlppLpp..apulloPDaSlYhDhPhshQlaNhYRs+hlutahQppGlpVIPslsWuspcoa.......casFcGl.sctuslAloohG.....phpspcpcphFhpGhpchlp+l.pPtplllYG....th..pphass.....hpllphpsatpp ...........................................................p.......h..psthplPhlp.sphhs.t................hhsashhtpp.p...............................psshlHFahcDhpFcp...laspsc.......phlpclpp.......asullsPDaSlahD......hPhshplaNhY+sRh.lutahQ.ppGlpVIPslsWuspcoa.......casFcGl..ctuhh..AluohG.....hhpsppphphahpGlpchlc+l.pP.phlll.YG.....th..c.hatp..t..hphhph.sh...p........................................... 0 6 9 11 +14229 PF14387 DUF4418 Domain of unknown function (DUF4418) Eberhardt R re3 Jackhmmer:C9RQ28 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 132 and 150 amino acids in length. 27.00 27.00 43.30 43.30 24.30 24.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.07 0.71 -4.38 32 145 2011-06-15 10:24:38 2011-06-15 11:24:38 1 1 133 0 26 122 2 125.30 29 87.92 CHANGED hl.lllGlLlslsPhh.hhslCss..h.tst..................hMtCaasupAhhslGsllhllullhhhh..ppthchuLslsshshulls...hLlPshl...............IGlC.ttstMtC+sh.ohPslhllullhllhuhhs...hahpcc ..........h..tlllGlllsluPph.hAssCts..h.pst.....................................hMtCaaoupushslGllIhllullhlhl..sttl+huLsluslslulhs...hllPssl...............IGlC.ttstMpC+sh.ThPhlhllullhllhusls...lahpp.p....... 0 17 19 22 +14230 PF14388 DUF4419 Domain of unknown function (DUF4419) Eberhardt R re3 Jackhmmer:C9RQB5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria, eukaryotes and viruses. Proteins in this family are typically between 348 and 454 amino acids in length. 25.00 25.00 26.90 26.70 21.30 21.00 hmmbuild -o /dev/null HMM SEED 299 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -12.12 0.70 -5.20 59 188 2011-06-15 10:35:06 2011-06-15 11:35:06 1 7 78 0 151 195 7 271.50 28 70.40 CHANGED ssuFlpu.shpAYupHhsLhlpPD-lWhsIlsphuhalsspu..EpLRphFVsa.cGKcpLhVp....stssh.p..............sasplh.chsppIpcpltss.h....cchhh..s.sFSTTTtsD+lsuslshMush+pYFcYp.......hhhhCGlPs..........VTLLGph-DWppLtp+lc+Lt-.....as.p..............hppWt.phLpPllccFlpohcsps....s.......hpFWspIschpus...uuGssh..l......oGWIosFhhasp..pGphhttttt............................................................ts.h.hlshpplPsGhsplPlplp...s.stt...hptphhuGhhGhph.....ppp............................psslpPthsWhlh ...................suhlpuhhpAYspHptLhlcPD-lWhsIhpQhshal....Ntpu..E..plRphFVsa..pG.KcpLhVp........ssh.p..............satthh.phsppI.pppltss.l.......tshlh...P.sFSTTTtsDphsuslhhMu.sh.ppYFpat.........hhhhCGl..Pp..........lTL.GphpDWtplhp+lccLt-.......as.t...................hptWh.phLpPllcpFlpuhcsss........s............hpFWpplhchpst...........huGssh.....l.....sGWl.ssFhhaspcGph.t..................................................................................h.hls......hpplPsuhspsslhhp...p..t......h.h.hhuGhhGhph.....pp............................t.slpP..sWhh............................................. 0 78 120 139 +14231 PF14389 Lzipper-MIP1 Leucine-zipper of ternary complex factor MIP1 Coggill P pcc Jackhmmer:A8MS81 Family This leucine-zipper is towards the N-terminus of MIP1 proteins. These proteins, here largely from plants, are subunits of the TORC2 (rictor-mTOR) protein complex controlling cell growth and proliferation [2]. The leucine-zipper is likely to be the region that interacts with plant MADS-box factors [1], 26.00 26.00 26.00 26.40 25.80 25.60 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -9.98 0.72 -3.88 39 340 2011-06-15 10:44:33 2011-06-15 11:44:33 1 11 25 0 185 326 0 83.90 31 13.80 CHANGED pcpps.p..pp+t....sLEp-VtpLpcpLpcEpslRtsLEpAlsp.......ssus.hsp.hst.plPppspELlpEIAhLEt-VspLEppl.sLttplhppp .............................t......ppt+.sLpp-.VppLpcpLp.pEpsl+tuLEpA.hst.....................s.us...hsp...ss..hlPspsp-LltElAlLEtEVs+LEppllsLppplhpp................................... 0 35 109 149 +14232 PF14390 DUF4420 Domain of unknown function (DUF4420) Eberhardt R re3 Jackhmmer:C9RS82 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 310 and 334 amino acids in length. 27.00 27.00 33.00 32.70 19.40 18.60 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.77 0.70 -5.58 41 142 2012-10-11 20:44:47 2011-06-15 11:48:59 1 1 139 0 58 155 70 293.10 20 92.56 CHANGED pplpphWpplptp........t.thstphlptstt.hslhluhshpsphthhl..............hshthhsphsphpuhc.lp.......t.tttphhlslphpsss.hpclFstlssDllppl...t.pstpphhpshhtplpcWpcLht..tttpslopcp.hGLhGELhhLcp..h.sthsspts..lpuWsGPptsppDFphss..sslEVKooh..psspplpIou.cQ.L-ssstt.....LhLshhtlppsss....Gh.olscllsclpphl...t..tshptapp+Lh.phGYh.tpt....htphpatlpphch.apVc-.sFPRlspss.....lP.pulsslpYpl-Lss ...........................................................................................................................t............................hh......hthhhuhs...s.p.thhh.....................tph.sphpshp.lth................tttthhltlhh.tptt....tclFttlspsllppl.......tstpphhpshhpplppWpplht..tttptLspcp.hGLhGELhhLcp...l....tthsssss...lpuWpGP..ptsp....pDFphss..tslEVKooh..p..pspplpIuu.cQ..Ls.sstss........LaLhshplppsss....Gh..oLs....s...llpplpptl...ttspshptFpptLh.phGahstpt....htptpa....tl.ps.h....ph...apVs.c.sF.P+lspss......lP..pulsslpYplsLs.t................ 0 17 45 52 +14233 PF14391 DUF4421 Domain of unknown function (DUF4421) Eberhardt R re3 Jackhmmer:C9RSA4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 336 and 370 amino acids in length. 27.00 27.00 27.40 27.10 24.50 24.50 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -11.69 0.70 -5.22 31 119 2011-06-15 10:56:27 2011-06-15 11:56:27 1 1 103 0 25 121 5 295.50 33 84.70 CHANGED sphDTs...........YIcspcas.hshhlpssssh.Ehaplsssp................tpslshsPsssh+lGshhuaRala........LGaohshptlt.tppspspppphslsL....Yos.+lulDlaY++ssu.Yphpphphts..t..s....shsF.sshpsphhGhNlaYIFNp+.+FSYsAAaSQSshQ++SAGShlhGhuastaplphshstlst.hppp..h................ththpphcahshslusG..YuYNWVFu+.salhss..SlssuluYp.sh.php.pp.tt.sshphpshshchluRhGlsaNss+aauGhShlhcsasYpcpsao....ssshF...GslslYsGa.....pF ...........................................................................................................t.phDTs...........YIpPp+YN.aslMlppsssa..EhYplsspp.................QplsFuPsssh+..lGhYFGWR..WIF........LGaolDlsplh..pppcs+tp-hsLSL.....YSu.clGlDlaYR+TGssY+l+phphhs...phssp.....stsF..sGlpsph+GhNlYYIFNp+.+FSYPAAaSQST.QR+SAG...ShlsGhuhopHpLshDaspLsthlppp..ht........................shthpclKYsshulssG..YuYNWVFA+.Na.Lhsh..SLssuluYKts.h...ph...pppp....sphhhp.shNh...DhlsRhGlVYNss+aasGhShlh+sYsYc+s.sFS....hsNhF.......GslplYsGaNF.......................... 0 15 24 25 +14234 PF14392 zf-CCHC_4 Zinc knuckle Coggill P pcc Jackhmmer:Q9SHW6 Domain The zinc knuckle is a zinc binding motif composed of the the following CX2CX4HX4C where X can be any amino acid. This particular family is found in plant proteins. 27.00 6.00 27.00 6.60 26.90 -999999.99 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.15 0.72 -8.58 0.72 -4.29 79 629 2012-10-03 11:39:54 2011-06-15 13:56:05 1 98 80 0 325 3156 20 38.20 30 8.98 CHANGED lcls+PLpptltlp....hs...ss...p.p..th...hplcYE+lspa.ChhCGhlGHspp..pCst ...............................................................................h.h.YEhh........h..Ch.pCsthGHstp..pC........ 0 67 167 281 +14235 PF14393 DUF4422 Domain of unknown function (DUF4422) Eberhardt R re3 Jackhmmer:C9RPD5 Family This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 255 and 371 amino acids in length. 25.00 25.00 29.50 29.50 22.30 22.30 hmmbuild -o /dev/null HMM SEED 231 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.66 0.70 -4.76 90 376 2011-06-15 13:10:07 2011-06-15 14:10:07 1 7 289 0 76 365 474 224.80 37 67.37 CHANGED +IhlusHKphp.hP..pc.shYhPlpVGss.t...............p...........sahtDsoGD.NISp+NPtYCELTulYWAWKN...h.csDY.lGLsHYRRaF....shppp......................tthptllsppplpphl..p.pa..DlllP...c+Rp..ahlc..........olhsHYtcs.Ht.tccL-hscclIpcpaP...-Yhsua-plhpppp.uah..aNMFIM++clFcpYCpWLFsIL.ElEccl..Dh....os.YsshptRlaGalSEpLhsVWlp...p....pp.lc...htE .....................................pIhlusHKpht...hP...ps.shYhPlpVGtshp....................sh..............shhtDso.....G.D.NISp...+NPhYCELTulYWAWKN....h..csDY.lGLsHYRRaFshppt..........................................tthphll.spp...p.l.pphL...p..ph..DlllP.......c++p.......ahhc..........ohhs...HYtps...Hp...tcsL-hscplIpcpaP...-Yhsua-p..hh.pppp..uah..aNMFlM++chFscYspWLFsIL.clEccl..Dh................os.Y.....sshptRlaGalSEhLhsVWlt....p....pp.h....t........................... 0 23 47 56 +14236 PF14394 DUF4423 Domain of unknown function (DUF4423) Eberhardt R re3 Jackhmmer:C9RKJ5 Family This presumed domain is functionally uncharacterised. This domain family is found in bacteria, and is approximately 170 amino acids in length. 30.00 30.00 30.80 31.50 29.80 28.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -10.62 0.71 -4.72 52 72 2011-06-15 15:22:47 2011-06-15 16:22:47 1 2 5 0 70 73 44 168.50 24 60.39 CHANGED hhphsptpph+hlsscpacaapsWhpsllRp.Lsshhsss...ssttlActhhstlostclpcuLphLhchullc+sts.ssYtpTppsls.sst-shshul+shp+phhchAhcul-phshpcRshSulThulsccsaccltcplcchR.+clh...sluspp.pc.sccVYpLNlQLFPL ............................h...hhpttph+hlsscpacaappWhpsslRp.Lhshhsss.....ssttlAcphhstlostplccuLphLhchuhlc+sts..spat.oppsls.sstcshshul+shppphhchAtcul-phshpcRshSulThulsccsaccltctlpcFR.+clh...plssps.pp.scpVYpLNlQLFPl.. 0 70 70 70 +14237 PF14395 COOH-NH2_lig Phage phiEco32-like COOH.NH2 ligase-type 2 Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A family of COOH-NH2 ligases/GCS superfamily found in the neighborhood of YheC/D-like ATP-grasp and the CotE family of proteins in the firmicutes. Contextual analysis suggests that it might be involved in cell wall modification and spore coat biosynthesis [1]. 25.00 25.00 29.50 28.70 20.00 17.90 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.63 0.70 -5.33 8 44 2012-10-02 17:21:26 2011-07-19 19:39:50 1 2 41 0 16 48 0 234.20 36 50.04 CHANGED osloLGADPEFMLppspGc.MlsAS.cFaspsGslGCDsp+lttc.....hPlAELRPuPspsPcpLltplcplLppAsphlsctslcWlAGuhPasGYslGGHIHFuGl.lohpLlRsLDsYLuLPLhhlEDPss.ttpRRp+YGaLGDaRhKsaGtFEYRTssSWLVSPtsA+AsLpLA+llAcpaccL.ppshhsschpcAFYpG-+chLtsplstlhp-lpuhssYsshtttI-.lashl-cGpoWDEcsDlRptW+IPhh. .............lhlGhDsEFhLhp.psp.hl.AScah.s+tG.sGsDshphptch....hPlsElRPtPsssPttLhhplpphht.Att..hlsp....psl.WhAGuhPhtsaslG.GHIH..F..S...G.l...........sL.....sh....pLlRsL....DsYLALPLsLlEDsps..thRR.s.pYGhLGDhRtpsaG...GFEYRTLs.S.WllSPtls+uslslAhll..ApphtpLpt...psl.p..phpcAaYpGs.+thL+thh.tlhtcl..thstYtpa...........pt.lt.hhphlcptt.WsEptDlR.hWpl...s............................................ 0 10 14 14 +14238 PF14396 CFTR_R Cystic fibrosis TM conductance regulator (CFTR), regulator domain Finn RD rdf Manual Domain \N 22.50 22.50 24.10 23.90 21.10 21.10 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.44 0.70 -4.97 9 137 2011-07-19 22:39:50 2011-07-19 23:39:50 1 9 75 46 32 131 0 200.10 67 15.53 CHANGED DFSSpLhGh-uFDpFSAERRsSILTETLRRhSV.DtD..uusshsEsc+QSF+Q.......s.......G...-.....asEKRKsS.ILNPlsSsRKFSllQpu......QhsuhE-ss..cE.sER+hSLVP-sEQGEssLPRSNhh.ssGs.shpu+.RRQSVLsLMTp.S.pQGpphatptsuShR.KhSlsPQssL..SElDIYoRRLSpDSsh-ISEEINEEDLKECFhDDh-shs...ssTTWNTYL .......DFSSKLMGhDSFDQFSAERRNSILTETLRRFSl...EGDAslSWsETKKQSFKQ...............T..............G.E.....FGEKRKNS.ILNPlNShRKFSlVQKTP.......LQMNGIE.Ess..-EPhERRLSLVPD.SEQGEuILPRuNll.soGP.ThQu.+.RRQSVLNLMT+.Sl.sQGQsl+++ssuSsR.KhSlAPQusL..oE.lDIYSRRLSQ-ouLEISEEINEEDLKECFhDDhEsIP...sVTTWNTYL......... 0 1 3 11 +14242 PF14397 ATPgrasp_ST Sugar-transfer associated ATP-grasp Iyer LM, Abhiman S, Burroughs AM, Aravind L rdf Manual Family A member of the ATP-grasp fold predicted to be involved in the biosynthesis of cell surface polysaccharides [1]. 31.40 31.40 31.40 32.50 31.30 31.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.79 0.70 -5.32 11 400 2012-10-10 13:17:03 2011-07-21 22:10:43 1 5 364 0 135 342 56 268.40 35 78.98 CHANGED phhhppphtphs.....NspphhtlhccKshasph..hp-h..................hspchhshp....h.p.p-hptahpsp....pplhlKslsG.u.GpGlthhptss......................htpctpslhphhpssc.....pallpEtIpQHpthspLss.sSlNTlRllThhcs..spVclhhsllRhu..tuutssDNhus.GGhhsslDtsus..lpp.A.sh-.ptpta...phHPsotthhssapI..Phaspslphshcsupp.lPplthlGWDlAITspG.PllIEuNss.........sshs......h............ulhs.thsclhshphp ........................................................................................hth.....pRNhsalh+Y..NtR.phYslVDDKlhTK.h...A.p.t.Gh...........................h.hslp....tptplcplpph.....l.tsh.....ssFVIKP.A.p.GuG.GcG.IlVl.s.scptshahp.......ss..hs.pc..lpcc.....l.oshLsGLaSLuGp.......Dsul.lEphlp.cshFpuhSa..-GVPDlRlIl..........h...........pt...........hslhAMhRLu.....psuss.KA.....N.L...H.....Q...G...A...lG...VG.l....D.l.uTG..tlps.....s.........ph...s....p....l.................scH...PDTspplsslpl..PpW-pllpLAusshhE.hsGL.....GYlGsDhVLD.c-.c..GPhlLELNAR.......PGLsIQ.lANst............Glh.............pphtt............................ 0 43 84 118 +14243 PF14398 ATPgrasp_YheCD YheC/D like ATP-grasp Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A member of the ATP-grasp fold predicted to be involved in the modification/biosynthesis of spore-wall and capsular proteins [1]. 100.00 100.00 105.30 105.10 93.00 92.90 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.47 0.70 -5.59 14 572 2012-10-10 13:17:03 2011-07-22 00:11:06 1 7 188 0 184 583 0 256.60 26 65.71 CHANGED pphppt.th.has.shhsKWcVactLpcppplp.aL.PpTchhpshsslcphlspapplalKPhpGstG+GIhplpptpt...phhschpssptp..p.passhtthhshltpthsppcalhQQulsLhphpG+shDhRshhpKNppGpWploulssRlAupsplsoplssGGpstthpchhs.ct..p.....ppshtppLccsuhpluptl-cthssp....luELGlDlGlDspG+lWllEsNoKPu+pshpp....p..tpppohpp.lpYutaLs ........................................p.......th.hhs.......s..h..h.sK.WplaphLtpstplp.sa.L.PpTphh.pp.h..c...p...lp.phLppa...p.tlalKPhpGohG+GIhplptpp.......s............sahh..phppppp.....t..........hh.php.s.hp.p....L.hphl..p.....p.....h.h...p..t..p.....p.....YllQpG.IsLhph..sGcPhDhRlhspK.s.t.p.Gp.WploulsuRlus.ps.slsT.sl.s..sG.Gp.sth.h.p.phht..p......pc......ttplhpclcch...ulplupt...l-..cphst.....luElGlDlGlD.ppu.plWlhEs..No.K.Pu.+pshp.ph......t.t...p......p.shph.hpYuhaL............................. 0 94 141 147 +14244 PF14399 Transpep_BrtH BtrH; NlpC/p60-like transpeptidase Iyer LM, Abhiman S, Burroughs AM, Aravind L, Eberhardt R la_psag Manual Family Members of this family are often found in the gene neighbourhood, or fused to, non-ribosomal peptide synthetases. They are predicted to function as transpeptidases in peptide metabolite biosynthesis [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 317 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.78 0.70 -5.14 49 388 2012-10-10 12:56:15 2011-07-24 22:26:28 1 12 298 0 113 426 19 290.20 18 77.47 CHANGED HCtosshssLL.phtGhtl.....sEshhFGLGuGLsahaht..hhphsh...hlssRst......phhpphtptLGhchp....hpphsssppuhctlcphlspGpsVhlt.sDhaaLsYhss...phHFsuHhlllhGhD..pcphhlsDs.spsshhpss............hssLtpARtupu.........shss+sphathpt....ssshss..ltpslh....pulppsspphlsss...................hGhpGlpphupcl.cW.....tspcphpthhtthhphhcch...............GTGGuhFRtlYupFLpcsuphhst.t.htphuphhpchuptWspl.ushhtphststp...tshpphuchlpplsctEcphhctLt ................................................................................C.sssht.hh.p.hGhth.....s-.hh.hhhstuhsh....hh.......pht....h..hshph........thhpphhph.lGh.php.....hpphs.s.......ppu.....h....p.tlcphLs....pG.hPVhl....t.hDhh..aL..s.......Yh..ss...................phph...............s.............s...HhlllhGhD.....ppthhl..t.....Ds........t...s...h.h...phs............hppltcAhtup.s..............hs..s.tshh.hthph...........st..p......hhptht.....pslpp.shp..phhsss.............................hGhtuhpphsppltp......h.......s.............tpphph.h...hh.......h.hthhtph.......................hR.hh.up.aLpchtt...h........h..p....hup....hhpphuphapth.thhhhc.......t.t.....pthtphuphltplsphEpphhp.h........................................................................................................................................................................................................ 0 44 85 99 +14245 PF14400 Transglut_i_TM Inactive transglutaminase fused to 7 transmembrane helices Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A family of inactive transglutaminases fused to seven transmembrane helices. The transglutaminase domain is predicted to be extracellularly located. Members of this family are associated in gene neighborhoods with a pepsin-like peptidase and an ATP-grasp of the RimK-family. The ATP-grasp is predicted to modify the 7TM protein or a cofactor that interacts with it [1]. 100.00 100.00 106.50 106.10 91.80 91.30 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.88 0.71 -4.73 93 271 2012-10-10 12:56:15 2011-07-24 23:33:47 1 2 269 0 71 210 16 166.30 42 33.13 CHANGED YRthshGlPhsPGEppplWslEA+lcFsAsGc.Pl+........VohslPphpsuasllsEsssS.sGYGl...........oh..hcsc.s.s..R..+Ap.W.ShRpAsGpQsLYY+splh.s.csps....chs...ths...sP.....................s...hssh.s..h.suP.ppsAA.psLlspspp+SADsho..........FspELI.Kpl.N..sss.sQNspLLLs.s..t....s.+ ....aRt.shGlPhTPGETcplWslEA+l-FsApuc.slK........VShusP.pppuasllsEosuS.sGYGl...........Shhps-..u..sRRspW.SlRpA.sGsQTlYY+sphhscspu..cts..phs..ss.....................t.hss.s.h.-uP.EcsAApALlspspp+SADshT.....FspELIKplNssp.sQNspLLLsp......scsp... 0 20 34 56 +14246 PF14401 RLAN RimK-like ATPgrasp N-terminal domain Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family An uncharacterized alpha+beta fold domain that is mostly fused to a RimK-like ATP-grasp and is found in bacteria and euryarchaea. Members of this family are almost always associated in gene neighborhoods with a GNAT-like acetyltransferase fused to a papain-like petidase. Additionally M20-like peptidases, GCS2, 4Fe-4S Ferredoxins, a distinct metal-sulfur cluster protein and ribosomal proteins are found in the gene neighborhoods. Contextual analysis suggests a role for these in peptide biosynthesis [1]. 25.00 25.00 31.30 30.40 24.00 19.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.91 0.71 -4.71 87 317 2011-07-25 16:00:10 2011-07-25 17:00:10 1 6 294 0 83 259 20 151.70 37 31.83 CHANGED osp-YLpps....htps...........+...spllNLC+sYpYhopGYYsSLLAEARGH+Vl.PoVpsls-.......LppppLhshshtplp..............................................phltptl.t..p............hssphhplp............laFGps..ptp..slcclARplF-tF.sPlLclphpcppp......................Wplpslps...hslscLscpcpshFtp..uL- ....................osppYLpps....hhst...........+..pspVINLCRsYpYhSpGYYsSLL.AEARGH+VI.PoVpsIs-.......lsptphhphslt.c.lc....................................................chh.p.lpp.............ss-shslp............lYFGps..stcsLccLARpLFEtF.sPlLclphc+pps.......................WpIpsIps.hshpcLp-scp-hFhpuL.................................. 0 37 61 73 +14247 PF14402 7TM_transglut 7 transmembrane helices usually fused to an inactive transglutaminase Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A family of seven transmembrane helices fused to an inactive transglutaminase domain. The transglutaminase domain is predicted to be extracellularly located. Members of this family are associated in gene neighborhoods with a pepsin-like peptidase and an ATP-grasp of the RimK-family. The ATP-grasp is predicted to modify the 7TM protein or a cofactor that interacts with it [1]. 25.00 25.00 42.00 41.50 22.50 21.90 hmmbuild -o /dev/null HMM SEED 315 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -11.98 0.70 -5.50 34 285 2011-07-25 16:28:49 2011-07-25 17:28:49 1 4 280 0 75 224 26 309.70 52 62.39 CHANGED sphlhpLLspAsIPARhVpuLpLE.Du+RcQplpsalpVass.cc....WhhFsPpoGppGhP-shLlWppustsLL-lpGGpsupVsFShhppphsshphup..pptp.ssp..hhsF..SlasLPlcpQslFKslLLlPlGsLlVVhlRsllGlcTSGTFMPVLIALAFlpTpLlsGllhFlllVulGLllRuYLs+LNLLLVsRluuVllhVIhlhuhlSlluaKlGlspGLolohFPMlILuWTIERMSIlWEEcGs+-VlhQGsGSLhVAlluYLsMssshlpHLsFsFPtL.LllLAllLLlGpYTGYRLsELhRF+shhcs ................................................................s.hhppLLuhAtlPs+hVtsIpLE.DsR...R+Qolp.hlcVasG..pp....WllF.....NPpTGpQ.uhssN.hL.lWppussuLL-VsGGpNSQVpFShlspchss.pssp.....scspsss..hLsaSlauLPLE-QuhFKslhLIPIGALlVVhLRlllGLKTSGTFMPVLIAlAFlQTpLssGIluFlLIVuhGLllRSYLS+LNLLLVuRlSAVIIsVIllIulholluaKlGLscGLoIT...FFPMIILoWTIERMSILWEEEGu+EVhhQGuGSLhsAlLsYLuMosshlpHLsFNF.ulpLllLAhlLLhGpYTGYRLoEL+RFKsLsc-............................. 0 22 37 60 +14248 PF14403 CP_ATPgrasp_2 Circularly permuted ATP-grasp type 2 Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Circularly permuted ATP-grasp prototyped by Roseiflexus RoseRS_2616 that is associated in gene neighborhoods with a GCS2-like COOH-NH2 ligase, alpha/beta hydrolase fold peptidase, GAT-II -like amidohydrolase, and M20 peptidase. Members of this family are predicted to be involved in the biosynthesis of small peptides [1]. 191.00 191.00 195.90 195.60 187.90 187.50 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.30 0.70 -5.83 6 58 2012-10-10 13:17:03 2011-07-26 05:57:10 1 2 52 0 30 57 2 428.80 25 95.00 CHANGED LspAlAD.YculL..s.slthtuW.psLstchRuspLhaGsRPlsslh.............RPpllocsQY-lLp+sscplspAlpclspthLsss....ulRphLtLoPhEE+LlshDPGYhtsp.AtuRhDoFLolDGs.LpFVEYNA-SPuuh.AYtDhLAchFlshPshpEFpK+YsltPLPuRphhLcTLLssaRpuGusstcPplA.IVDWpu..sPstoEFEhFpcaFt.......-aGl.sVIsoPp-Lsa.RDGpLls...........GshPlslVa+RllTsEhLsHas...LsHPLVpAYtsGAlslVNSFRAcLlHKKulFALLoDEphcs.hsA--RuAl+sHVPWTRlVpPu.TTapstsIDLlsFAhANRE+LVLKPND-YuGKGlsIGWEsos-tWcpALppAhco.PalVQERVpIshpPaP..thscGcl.ht-h.VDscPaLFG..scVpGChsRLS...TsslLNlos.uGuTlPsFll-- ........................h.....................tsa..th.tthctsthhht...sc.s.lstsa.............pPhhhsppshcphpphspplhpllp+lhpchlpss....phRchhshsst.ccLlhhs.GYsthl.shuRhDlFhs..t.ss...hpFsEhNsDuuuuh.scsctlsphhhph.shpcFtcc..aplp.shs..h..hp.hlcphhshYcp.hts.psppPplA.IlDaht...hsshsEFc..hFtchap.......cpGhsshIsssccLpa..csspLhs...........ss.h.lDhlaRRhVTs-llpphs...thcsllpAhtssslshlsSFpupllHsKhlFslLpD-p..spt..h..Lss-EpshIccHlPaTphlps............h-lhphshss+-chllKPsDtYuucGVhlGh-hst-cWcptlpc....shp....p....sallQEahph.s.ppth........th.p.-uc.s.....h.sphhhssu.salas...........tphsGshoRlG...sssllsssh.tshsssshhlp................................................ 0 18 26 29 +14249 PF14404 Strep_pep Ribosomally synthesized peptide in Streptomyces species Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family A ribosomally synthesized peptide related to microviridin and marinostatin, usually in the gene neighborhood of one or more RimK-like ATP-grasp. The gene-context suggests that it is further modified by the ATP-grasp. The peptide is predicted to function in a defensive or developmental role, or as an antibiotic [1]. 25.00 25.00 103.10 103.00 21.40 20.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.45 0.72 -8.89 0.72 -3.75 7 17 2011-07-26 15:33:21 2011-07-26 16:33:21 1 1 17 0 3 19 0 62.90 72 98.35 CHANGED MpPFALNYARPAsthpsssPYsYDuuhQLNVLhDGRsAApD+ALLtclGTTTSTAGSKTHFDD MQPFALNYARPAsph-sssPYsYDSGLQLNVLhDGRlAAsD+ALLRElGTTTSTAGSKTHFDD 0 1 3 3 +14251 PF14406 Bacteroid_pep Ribosomally synthesized peptide in Bacteroidetes Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp, and an ABC ATPase fused to\ a papain-like domain. It is often present in multiple tandem gene copies. The gene contexts suggest that it is modified by the ATP-grasp as in the biosynthesis of microviridin and marinostatin. They might function in defense or development or as peptide antibiotics [1]. 25.00 25.00 34.10 39.50 20.00 19.60 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.51 0.72 -3.81 17 17 2011-07-26 16:09:42 2011-07-26 17:09:42 1 1 5 0 0 17 0 50.80 32 80.28 CHANGED MKKLsthp.S..hhpNpKLsstptpultGG.hpsthpphs.....hsssc.s+DsDsh MKKLsthp.S..hhpNpKLsstptpultGG.hpsthpphs.....hsssc.s+DsDhh.. 0 0 0 0 +14252 PF14407 Frankia_peptide Ribosomally synthesized peptide prototyped by Frankia Franean1_4349. Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide linked to cyclases in chloroflexi. It may have a link to cyclic nucleotide signaling [1]. 48.00 48.00 48.70 48.50 41.00 36.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.77 0.72 -4.21 10 16 2011-07-26 16:57:07 2011-07-26 17:57:07 1 1 13 0 10 14 0 60.90 37 80.85 CHANGED plE+lIGRAVoDssFRppLlsDucp......Ascs..YDLTsEELcAL-clcsssLpuhAtsLDtcLs+ .thEclIGRAVsDssFRppLlssscp......Ascp..YcLTsEELcAL-phcssshpshAtplD.plp+......... 0 6 10 10 +14253 PF14408 Actino_peptide Ribosomally synthesized peptide in actinomycetes Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp and an aspartyl-O-methylase. Gene contexts suggest that it is further modified by the ATP-grasp and the methylase. It might function in defense or development, or as a peptide antibiotic [1]. 25.00 25.00 28.00 30.10 24.80 18.40 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -9.06 0.72 -4.04 16 32 2011-07-26 17:11:13 2011-07-26 18:11:13 1 1 26 0 15 34 0 61.30 36 65.68 CHANGED sPhssss.........lDPsTQhulhsDphG.pllE.hGKH.GTussstTsosTss..DGps...ptss-sDospD .................Phssss.....lDPsTQhuhhhDptG.pls-.hG+H.GTuosspTsTsTus..DGps...spssDp-sspD........... 0 2 12 13 +14254 PF14409 Herpeto_peptide Ribosomally synthesized peptide in Herpetosiphon Iyer LM, Abhiman S, Burroughs AM, Aravind L la_psag Manual Family Ribosomally synthesized peptide that is usually in the gene neighborhood of a RimK-like ATP-grasp, and an ABC ATPase fused to a papain-like domain. It is often present in multiple tandem gene copies. Gene contexts suggest that it is modified by the ATP=grasp. It might function in defense or development, or as a peptide antibiotic [1]. 50.00 50.00 50.60 50.00 28.70 27.20 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.07 0.72 -8.98 0.72 -4.21 6 8 2011-07-26 17:54:10 2011-07-26 18:54:10 1 1 1 0 8 8 0 57.80 39 73.45 CHANGED MEFcsh...KTEElPlI.....FGLTYLEEEAAEIsDVVGCLMPIDG..YosTGCDDSDts.....IP Mchcsh...+hp-h.lI.....FGLTYLEEEAAEIsDVVGChhslDG.uYosTuCDDuDt.......P.............. 0 8 8 8 +14257 PF14410 GH-E HNH/ENDO VII superfamily nuclease with conserved GHE residues Zhang D, Iyer LM, Aravind L la_psag Manual Family A predicted nuclease of the HNH/EndoVII superfamily of the treble clef fold which is closely related to the NucA-like family. The name is derived from the conserved G, H and E residues. It is found in several bacterial polymorphic toxin systems [1]. Some GH-E members preserve the conserved cysteines of the treble-clef suggesting that they might represent potential evolutionary intermediates from a classical HNH domain to the derived NucA-like form [1]. 25.00 25.00 25.60 25.60 24.20 24.20 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.33 0.72 -3.83 51 101 2012-10-05 18:28:12 2011-07-27 20:41:42 1 24 88 0 22 106 0 70.40 29 13.00 CHANGED sssGphhs...ssstpsl...............sphDMGHp...uhcatchhtph.pht..hopcEhp-ahp.sPcNYRlEp...sosNRS+ttE ........................t..pG.hhs....tstp.l......................t.hDhGHp....uhcapchhtth.p..tt..hopcphh-ahp.sPcsaRlEp...sSsNRSHtsE.. 0 8 12 17 +14258 PF14411 LHH A nuclease of the HNH/ENDO VII superfamily with conserved LHH Zhang D, Iyer LM, Aravind L la_psag Manual Family LHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif, LHH. It is found in bacterial polymorphic toxin systems [1] and functions as a toxin module. Like WHH and AHH, LHH nuclease contain 4 conserved histidines of which, the first one is predicted to bind metal-ion and other three ones are involved in activation of water molecule for hydrolysis. 23.30 23.30 23.30 29.10 22.70 22.80 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.01 0.72 -3.77 25 102 2012-10-05 18:28:12 2011-08-10 22:52:37 1 25 88 0 22 112 2 81.20 35 12.15 CHANGED TNhchhp...pGpuPhs..psG..cslpLHHlsQppsGPlsElopspHc...tspphLHshtpsups.......cspasp.+psYWKtRupphht ..........TNl-hMp...pGpAPls..pDG..pslpLHHltQcpsGslsElopstH...p...........tt....pphLHthhpss.psh.......+spFsp.RppYWKhRApp..p... 0 3 9 12 +14259 PF14412 AHH A nuclease family of the HNH/ENDO VII superfamily with conserved AHH Zhang D, Iyer LM, Aravind L la_psag Manual Family AHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif, AHH. It is found in bacterial polymorphic toxin systems [1] and functions as a toxin module. Like WHH and LHH, the AHH nuclease contains 4 conserved histidines of which, the first one is predicted to bind a metal-ion and the other three ones are involved in activation of a water molecule for hydrolysis. 18.20 18.20 18.30 18.20 18.10 18.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.79 0.72 -10.76 0.72 -4.07 49 340 2012-10-05 18:28:12 2011-08-11 01:08:44 1 41 221 0 113 345 45 106.40 19 19.54 CHANGED spth..pttshtts...tshpuHHIlspps..........hhsttcplhcchGlc...........lssstNhlhlst........................hHp..GsHspp.....YpptVtcclppspp...............sppthhcpl.ppltccltsst .......................tt...................shpsHHllspp................hh.th.p..pllpchG.hc...........lssspNtlhLP...........................................hHp....GtH..spt.........Ypphl.tppLpphpp..................ttsppthh...ptl.ppltpph............................................................ 0 29 46 89 +14260 PF14413 Thg1C Thg1 C terminal domain Anantharaman V la_psag Manual Domain Thg1 polymerases contain an additional region of conservation C-terminal to the core palm domain that comprise of 5 helices and two strands [1]. This region has several well-conserved charged residues including a basic residue found towards the end of the first helix of this unit might contribute to the Thg1-specific active site [1]. This C-terminal module of Thg1 is predicted to form a helical bundle that functions equivalently to the fingers of the other nucleic acid polymerases, probably in interacting with the template HtRNA [1]. 26.30 26.30 26.70 26.60 26.00 26.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.85 0.71 -4.42 152 419 2011-08-11 14:39:21 2011-08-11 15:39:21 1 10 336 8 274 424 17 123.10 34 44.55 CHANGED stpplpDYhsWRQ.sDsHlNNLYNssFWsLl.csGhospcApppLpGohuu-KpElLFpchGINYNs.P.tha++Gohlh+.................................................................................tt..pt.ttt+p+....pthhshat...........Dhh...ppFhcph..h.. ........................................ppl+DYhuWRQ.sDs.............HINN.....LYNTsFWtLl...puGhospcApppLpGThuu-KN....ElLFpcaGI.NYN.s..Ptha+KGoslh+........................................................................................................p.......p..pt......pp.tht+p+....tt.hsha..........chht..ptFWpp.....p................................................................................................................ 0 90 164 224 +14261 PF14414 WHH A nuclease of the HNH/ENDO VII superfamily with conserved WHH Zhang D, Iyer LM, Aravind L; la_psag Manual Family WHH is a predicted nuclease of the HNH/ENDO VII superfamily of the treble clef fold. The name is derived from the conserved motif WHH. It is found in bacterial polymorphic toxin systems [1] and functions as a toxin module. WHH is the shortest version of HNH nuclease families. Like AHH and LHH, the WHH nuclease contains 4 conserved histidines of which the first one is predicted to bind a metal-ion and other three ones are involved in activation of water molecule for hydrolysis [1]. 25.10 25.10 25.20 25.20 24.50 24.40 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.33 0.72 -8.40 0.72 -4.74 35 277 2012-10-05 18:28:12 2011-08-11 20:50:36 1 34 251 0 50 331 2 44.80 34 6.85 CHANGED tttppspuaTWHHppcs......ssMQLVspslHst......htHsGGhuhhpst .....s..htpspGaTWHHh.s-s........ssMpLVspphHps............htHs...GGhuthpt.t... 0 11 27 38 +14262 PF14415 DUF4424 Domain of unknown function (DUF4424) Bateman A agb Jackhmmer:Q7P768 Domain This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are typically between 310 and 361 amino acids in length. 27.00 27.00 29.60 27.00 23.90 25.40 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -12.02 0.70 -4.88 97 165 2011-08-18 09:52:12 2011-08-18 10:52:12 1 4 135 0 43 147 14 218.20 25 71.31 CHANGED pclpVcYhFpNsosp-lph.luFPh......P.............................h.hs.....s........sspshlpsF+lhVsG+.lpsph...........p.hs.................t.-lostLtppG..........h...t..h..........t...............tthh-tst..t.tashphhY.hWppsFsAGcsV.lcHsYpPs......lusul.......................t...tthChp.t.htthtth.............t..ht.pplsYVLoTuusWp.tsIt-FpLplc+sssspllshCa...ssl+Klssp........................................papappcNFsPpcD ..................clpVcYhFpN.osp-lp..luFPh......P.............................h..s..............sptshlpsFchhlsG+.ltsphp.hs....................................-lostlhttG.........................h...t.............t............thhctst....thshp.ha.hWppsF.supsl.lcHsYpPs......sutul.........................t...t..C.t.t..t.h.th................hh.pplsYlLsTussWt.tsItcFpLplctts.pt..llshCh...tsl++lssp.....................................phphppcsFhPpcD........................................................ 0 15 27 31 +14263 PF14416 PMR5N PMR5 N terminal Domain Anantharaman V la_psag Manual Domain The plant family with PMR5, ESK1, TBL3 etc have a N-terminal C rich predicted sugar binding domain followed by the PC-Esterase (acyl esterase) domain [1]. 29.20 29.20 29.20 30.30 28.00 28.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.22 0.72 -3.92 63 964 2011-08-19 17:52:04 2011-08-19 18:52:04 1 11 31 0 619 937 0 55.20 41 12.91 CHANGED sppCDlhpGcWVhDsst..P......LYpspsCs..alppthsCtcsGRPDpsYhp.WRWpPcsCs ...............s.tpCDla..p.GcWVh....Dss......t.....P........LYs.s.p.sCs..alppphsChpsGRPD.p.s.Yh+.WRWpPpsCs....... 0 64 359 499 +14264 PF14417 MEDS MEDS: MEthanogen/methylotroph, DcmR Sensory domain Anantharaman V la_psag Manual Domain MEDS is prototyped by DcmR and is likely to function with the PocR domain in certain organisms in sensing hydrocarbon derivatives [1][2] The MEDS domain occurs fused to Histidine Kinase and as standalone version [2]. Sequence analysis shows that it is a catalytically inactive version of the P-loop NTPase domain of the RecA superfamily [3]. 28.70 28.70 29.50 28.70 28.00 27.80 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.09 0.71 -5.24 30 260 2012-10-05 12:31:09 2011-08-24 23:25:56 1 51 146 0 138 283 15 172.90 20 39.54 CHANGED hctthRpSGl-llG.-lPaGTHhCQFYpTcE-Lh-lllPYhKuGLEsNEhChWlsop..P.plE-AKpuL+cslP-l...-hYL-+GQl-Ilsh..hh...hp-ushDspRllsshlcK.hscAlt.....pGYcGLRlsGsshWhtKp..-asshssYEpclDuslssp...phhuLCsYsl-chsus-llDllssHpFsLlK+ct+Wpp .......................................................h......................sH.shhYp.sp.p-hhshhssFlppGLts.sE.....shhhss...stpht.lp.ptL....................hh.ssup.l.plh...s...h...............spushss..schlsthtph...hspuht......tGhpslRlhG-.hs.Ws..tcs....sphs.thh.paE.shlNphh.sst...shh.hL.CsYDt.......pphssp...s....lt-shtsHshhh.........t........................ 0 61 101 117 +14265 PF14418 OHA OST-HTH Associated domain Anantharaman V la_psag Manual Domain OHA occurs with OST-HTH [1]. 25.00 25.00 25.40 26.60 24.40 24.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.28 0.72 -4.48 28 75 2011-08-27 06:30:04 2011-08-27 07:30:04 1 8 27 0 69 81 0 74.20 23 9.32 CHANGED ...tps.hhFs..GGRYuhActL+cps..th+shoLGpls+lVQlAlp.ppllh.Ypps.sLhP...hpssppsssshhstssppp ..............s.......hs..sGRYshAcpL+cps..hh+shsLGclhclVpluls.c+hhh.Ypss.tlhP...hptstthspshhs.....s................ 0 29 45 59 +14266 PF14419 SPOUT_MTase_2 SPOUT_MTase_11; AF2226-like SPOUT RNA Methylase fused to THUMP Anantharaman V la_psag Manual Family SPOUT superfamily RNA methylase fused to RNA binding THUMP domain [1]. 28.40 28.40 28.60 28.40 27.80 27.60 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -10.93 0.71 -5.08 28 37 2012-10-01 22:53:19 2011-09-01 23:30:29 1 1 37 0 31 42 0 175.20 55 49.03 CHANGED KlslVQMPYhGDhcus+phGE+IGRAAQuFEVKELIIAP+cplsAaELhpFl+GV+cGQcSRYpIQ++uYshcVc+VPVhVhDLYQllRDK+R.pscllIlTDPKGcplocVK-+Lucsh.....+tu+EVllFlGSREGIPpGlFRFADaVlDLAPahTFATEauIPusLlALholYEEtt ........................KlslVQMPYhGDhcusRphGE+IGRAAQuFEVKELIIAP+cphsAaELhpFl+GV+cGQESRYpIQ+cuYshcVcKVPVhVaDLYQllRDK+R.ppcllIlTDPKGcplscVK-+LAcsh.....+hu+EVllhlGS.REGIPp.....Gl..FRFADaVlDLAPa...hTFATEauIPusLlALholYEEth..................... 0 11 15 21 +14267 PF14420 Clr5 Clr5 domain Bateman A, Wood V agb Wood V Domain This domain is found at the N-terminus of the Clr5 protein which has been shown to be involved in silencing in fission yeast. This domain has been found to often be associated with proteins that contain ankyrin repeats and large regions of disordered sequence [1]. 22.30 22.30 22.30 22.40 22.20 22.20 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.67 0.72 -4.07 93 327 2011-09-06 09:24:57 2011-09-06 10:24:57 1 30 74 0 281 320 0 54.30 29 10.23 CHANGED tspcW-th+shIpcLYh......pcphsLc-VhchM.....cpcasFpA....o...................c+tYcp+hc.cW..GhpK ............spsW.-sh+shItcLYh......pcphsL...c-VhchM.....cppasFps....o...................................................................p+tappphc.cW..shtK................................................................ 0 54 142 232 +14268 PF14421 LmjF365940-deam CDD_CDA_1; A distinct subfamily of CDD/CDA-like deaminases Iyer LM, Zhang D, Aravind L la_psag Manual Family A distinct branch of the CDD/CDA-like deaminases prototyped by Leishmania LmjF36.5940. Members of this family are widely distributed across several microbial eukaryotes such as kinetoplastids, chlorophyte algae, stramenopiles and the alveolate Perkinsus. Domain architectures suggest that these proteins might possess mRNA editing or DNA mutagenizing activity [1]. 45.70 45.70 134.30 59.00 32.40 23.90 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.63 0.71 -4.92 12 36 2012-10-02 00:10:39 2011-09-07 19:24:38 1 2 20 0 17 35 0 133.90 25 37.19 CHANGED shhslhh-p.psahsphsshh.p....ppscs.Wh+Ks.p+PVlssLh..Vchc....sthst.sup.p................................hhhulNhElShPoGShCSEpNAlGp...............lAslul.........................................................................................................PTps....................................hRt.................................h.s............tst+ps.........................NPLaPCGsCpphL+Klsc.....hssshplhhF-ss.pscplhhhsls ...s.....hhp..t.....h............t..ah+ps..+.Vls.lh..h..................................................h..uhNhElShPoGShCuEpsshup.hs.........................................................................................................................................................................................................................................................................t...........................pP.h.shs.s.phL.+ltp......ss.hhlhhats..p..t.lh............ 0 10 14 17 +14270 PF14423 Imm5 Immunity protein Imm5 Iyer LM, Zhang D, Aravind L. la_psag Manual Family A predicted Immunity protein, with an all-alpha fold, present in bacterial polymorphic toxin systems as an immediate neighbor of the toxin [1] . 25.00 25.00 35.50 35.50 23.50 23.40 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.17 0.71 -4.67 12 38 2011-09-07 22:45:15 2011-09-07 23:45:15 1 3 23 0 12 41 0 171.10 29 58.91 CHANGED ppIEK....Lpc.IspsuhGHLsLshRlcLM+pIsssph...........lpKlhhpCspKssuhascpFss-s.l.plLtchp..paLYpscGsh-pl.s.s-RhcsYlpps.-ps.-hssaslluL..GaAltsDAuolls.tDYsu.ED....DssaD.EuWssDFlsulAhSGu..PFs..-sGssE+R+cYWhWYlphslphsp .......................................................lp+....hht.lttss.GcLsLshRhplhpthspsp...............lptlthhCsc+shshWscpFsssp.l.thLp...phL.pscus.......hcphts.ht+.p.asp.h.ppshshsthshhhl..uaAAssshs..osl.p..p...a..ss..cD.....Ds-hDPEua-ssalsu.AhuGGhsa....s-psssEtRRtaWhWYLs.sls...h.... 1 3 10 11 +14271 PF14424 Toxin-deaminase DEAM-TOXIN1; The BURPS668_1122 family of deaminases Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Burkholderia BURPS668_1122 [1]. Members of this family are found as toxins in polymorphic toxin systems in a wide range of bacteria and in the eukaryote Perkinsus. Members of this family typically possess a DxE catalytic motif in Helix-2 of the core fold instead of the more common C[H]xE motif. The Perkinsus versions are predicted to be inactive [1]. 29.60 29.60 31.20 31.20 27.30 28.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -11.09 0.71 -4.18 14 120 2012-10-02 00:10:39 2011-09-08 05:48:19 1 24 101 0 32 108 0 131.10 30 15.23 CHANGED ptphscl+tshs............hchtNhulAchchph......stphhuhSt.spstt......t.............h..ps.....ssplhps....................tsssphh.....sRhsDoEhKlLcpltppltssshss.................GplslFoppssCpSCts.llppFptcaPpIphssh....s .............................................................t....tph+.phst........thpp.uNhAlAch.pltuh......pphh.A.Sthsphpt......t.........................hs.ps.........ps.hFcs..t..................tpssh.h..................Rsh..DoEhKlLpsIAppLss..sppss..................Gp..IcLaTEhssCsSCs.........s.VItpFpp+Y.PNIplplh...t................. 0 5 25 26 +14272 PF14425 Imm3 Immunity protein Imm3 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted Immunity protein, with a mostly all-alpha fold, present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1] . 25.00 25.00 25.40 26.40 24.80 24.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.48 0.71 -4.13 26 52 2011-09-08 18:59:05 2011-09-08 19:59:05 1 2 39 0 8 34 0 116.30 55 95.29 CHANGED McDWEYNELF-AIpEsYcEhL---R.Ga+YAIA+luDE.F...D...NL...GKIEDVIVDTAIGEIslsHc.+VFlGhIcGITKRLShFN.pEAp.....sELThEEI+DLopRINpVlEGLcNVclDYpPSs ..................MKDWEYNELF-AIpEsYcElL-E-R..GY+YAIAKLuDE.F..D...NL...GKI..EDVIVDTAIGEIulsHc.KVFlGhI-GIT+RLSpFNsQEAt.....sELTlEEIKDLSpRINKVIEGLKNVclDYsPSs.............. 0 4 6 6 +14273 PF14426 Imm2 Immunity protein Imm2 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted Immunity protein, with a mostly all-alpha fold, present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 35.70 35.70 39.80 39.60 35.20 34.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.72 0.72 -3.97 5 9 2011-09-08 21:17:19 2011-09-08 22:17:19 1 1 9 0 1 7 0 59.30 46 73.66 CHANGED MLEVlsLILSAGRuPD+V-haHpctI+cLLpEIsLssLLcDlPuDEAuELRcDLRlLKLl MhsVIhhILSGGhaPDh.-.hh+ctIhchlhsIsLssLLpslPuEEAE.hRaDLRlLKhl. 0 0 0 1 +14274 PF14427 Pput2613-deam Pput2613-DEAM; Pput_2613-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Pseudomonas Pput_2613 [1]. Members of this family are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 25.00 25.00 28.40 54.70 22.70 17.50 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.47 0.71 -4.42 4 6 2012-10-02 00:10:39 2011-09-08 22:47:24 1 5 5 0 2 7 1 116.50 34 4.32 CHANGED pF.uSu+Gs.HpAhsslpDssGplpspuhhh.SGNMTpsEttLGFPcsSLATHTEuRhs+pls.pp....GDhhlI-GpYsPCspCKGtMphtupcoGAclpYpWspsst..schWpsGph+cp+ ....F.uSu+Gs.HsAshslaDssGslpspuslh..SGsMTcsE+pLGFPcsoLATHTEuRhl+cls.ss....GDhhlI-GpYPPCs....uC+GtMpttupcoGupIpYpW.psts..scpWpttp.pp.p........ 0 1 1 1 +14275 PF14428 SCP1201-deam SCP1201-DEAM; SCP1.201-like deaminase Iyer LM, Zhang D, Aravind, L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Streptomyces SCP1.201 [1]. Members of this family are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 25.00 25.00 36.00 35.40 22.00 21.50 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -11.00 0.71 -4.46 25 44 2012-10-02 00:10:39 2011-09-08 23:43:07 1 10 31 0 24 53 0 131.40 25 15.12 CHANGED spphsPshsssthtth..............sthTpGplFctsGp....lsshh..Su..ccp.tpthhshhtspu.........s.hshssHVEsKhAhhMpc.......suhccuslsIN..ssPCst............CcphlPtlLPcGSoLpVahst......sstlcGtu.t ...............................sP.hhsshh.th..h..........ts.hTpGplaptss.t...hsshh...Su....pcp.hptlhphhtshs.........s.hshssHVEsKhAhhMpc.........suhcpuslhIN..ssPCss.........t....CcphlshlLscGupLpVhhst......thhhtuh..h................ 0 11 21 23 +14276 PF14429 DOCK-C2 C2 domain in Dock180 and Zizimin proteins Zhang D, Aravind L la_psag Mannual Family The Dock180/Dock1 and Zizimin proteins are atypical GTP/GDP exchange factors for the small GTPases Rac and Cdc42 and are implicated cell-migration and phagocytosis. Across all Dock180 proteins, two regions are conserved: C-terminus termed CZH2 or DHR2 (or the Dedicator of cytokinesis) whereas CZH1/DHR1 contain a new family of the C2 domain [1][2]. 25.30 25.30 25.70 27.50 25.00 25.00 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -11.24 0.71 -4.65 45 1118 2012-10-10 12:23:49 2011-09-09 02:18:47 1 34 216 2 631 942 4 187.60 30 10.24 CHANGED h.ssapNpLalthpphshs.....Kpsp.............t+Nlhlslplpsscsp.......pshpsIastsss.....phpsphtoslha+scpspat-plKlpLPhplspptHLhFohhcsssp......psppt....p.huaualPLhc..sGph.............lpssphsL.lh..............tp..ttYhphs.t....................hhcss+shhplpsplsSohhspsssl ............................sshRNcLYlh.pphcas............+ttp............................spsRNlpVplphhsu-sp......................phhpsI..astuus.............htsph.ho.s.....V.haHs.p.s...........Pc.ah-plK..lpl....P.....hph........hpp.......p.HLhFoFhHhSsp.......................pppcpt............Ep.hGau..alPLhp...c.Gp................................lpsup..apLsV................ttp.sssY.L.sl.ssst.tht........................................................................shphscu.sKssFplpohlsSThhTQsspl.................................................................... 0 183 251 418 +14277 PF14430 Imm1 Immunity protein Imm1 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted immunity protein, with an alpha+beta fold and a conserved C-terminal tryptophan residue. The protein is present in a wide range of bacteria in polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 25.70 25.70 25.80 26.20 25.00 25.30 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.51 0.71 -3.98 26 58 2011-09-09 15:11:35 2011-09-09 16:11:35 1 1 41 0 26 66 2 125.30 16 90.44 CHANGED lpshhstt.......pssshhlso.s-l....-tllcchsthshs.....hsplhhs.s-...st....hshlssGl...s.sshuh.hhhhs....ss.s..sthhstssss.......tt..hhashssssp-..a..Ps....sppls....l-ssppAstpFhpsG.p+PsslsWppt .........................................................h................tt.h.hts.tpl....pthlpth.tt.t.............hhplhhs.ss....s.........shlssul...p..sphuh..lhhhs.........ss.s....tthhssssss.....stt......hp...hsssss-.....a..Ps....sstlsl-psppAlp-Fhp..ou..p+PsslpWp..h... 0 7 19 25 +14279 PF14431 YwqJ-deaminase YwqJ-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Bacillus YwqJ [1]. Members of this family are present in a wide phyletic range of bacteria and a few basidiomycetes. Bacterial versions are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 25.90 25.90 25.90 28.00 25.10 25.10 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.56 0.71 -10.86 0.71 -4.27 14 91 2012-10-02 00:10:39 2011-09-09 18:36:12 1 16 76 0 42 98 2 125.80 27 22.31 CHANGED hshhtspshstpchs.sshuuslchp......sGchhpuhstttp.............sslHPhlpshlsph...............pthpp.a..........tsGppAEltulsptLhp..........................tstchhshthpth.....hshtpGthh.sCssCshllpphsh ...................................ttp...........th..sssAuulshc..Gp.....hhputuspss............sssLHPhlpchLDshss.........................sppcp.a...............sGpCAEshhluctLtsh-tt...............t.tttshp.tu+lpstplRps.....scshcG.....s..htsPCcsCsslhsphs......... 0 8 31 38 +14280 PF14432 DYW_deaminase DYW_Deaminase; DYW family of nucleic acid deaminases Iyer LM, Zhang D, Aravind, L la_psag Manual Family A family of nucleic acid deaminases prototyped by the plant PPR DYW proteins that are implicated in chloroplast and mitochondrial RNA transcript maturation by numerous C to U editing events [1]. The name derives from the DYW motif present at the C-terminus of the classical plant PPR DYW deaminases. Members of this family are present in bacteria, plants, Naegleria, and fungi [2]. Plants and Naegleria show lineage-specific expansions of this family. The classical DYW family contain an additional C-terminal metal-binding cluster composed of 2 histidines and a CxC motif and are often fused to PPR repeats. Ascomycete versions, which are independent lateral transfers, contain a large insert within the domain and are often fused to ankyrin repeats. Bacterial versions are predicted to function as toxins in polymorphic toxin systems [2]. 25.60 25.60 25.70 25.60 25.50 25.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.90 0.71 -4.10 17 1779 2012-10-02 00:10:39 2011-09-09 19:11:19 1 778 129 0 1096 1827 0 121.80 40 20.51 CHANGED sssshsps.....hphhuG.ts+...........phhpRh.ps.shh.spsp.hhH.slcspcpp...................hhhpHuEK.tlshuhhp.................hhhps.....................................................................................................................................+lCsDCashhphluphhsp-IhVpDs.sph+hFc .................................................................................................................ht.hh.............HtFhssDp.sHPptppIh.......t.Lpclht.c.h..c.p.t..G..Y..h...P-op.h.V.La..Dl.-.-....E.pK..................................h.L.t...hHSEKLAlAFG...Llsossu.....................sslRlhKNL....................................................................................................................................RlCsDCHsuhKhIS+lh.sREIllRDs.sRFHHFc......................................... 0 111 754 916 +14281 PF14433 SUKH-3 SUKH-3 immunity protein Zhang D, Iyer LM, Aravind L la_psag Manual Family This family belongs to the SUKH superfamily and functions as immunity proteins in bacterial toxin systems [1]. 24.60 24.60 24.60 25.50 24.50 24.30 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.46 0.71 -4.41 35 175 2012-10-01 20:46:44 2011-09-09 19:50:34 1 3 133 0 43 157 2 134.00 22 81.79 CHANGED spplhphLppuGWh.......tsRplshshhhpthtpt.....ua.lhssstp..FLpcFGsLplt.......................tppssstshphsPhhsh.....tpppphpphtph.....lssplhPlGpht...ssthhLhls....-sGplashtp...........thhhlGsshppulpsllpupt.t ................................................tl.thLptuGWh.......psc..p..h..s...hsh......hhcthtpt..........tatl..hs..ushc..hhpcaGuLpht..t....................tpphsstshthcPhpsh.............h.tpphtphtpt.....ls.pplhPlGptt.....psphhlhls....ppGclYuh.cs.............h.hhhGsshppulppLltut..t....................... 0 11 30 39 +14282 PF14434 Imm6 Immunity protein Imm6 Iyer LM, Zhang D, Aravind L la_psag Manual Family A predicted immunity protein, with an alpha+beta fold (mostly alpha helices). The protein is present in polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 25.00 25.00 30.70 30.60 21.50 20.60 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.36 0.71 -4.41 29 64 2011-09-09 19:11:10 2011-09-09 20:11:10 1 1 38 0 18 54 0 119.00 28 74.73 CHANGED hspstahLsluptlhshl.cppp.p...........ph..ccuL-tCW...pal-s.cphsuDpLYshL-s.pD.pslhhahp..pc.ccpp.shWssIhsAluhsuhhA.YphEscca.lPpslEslD.-ppl-hahpphpchh .................h..hstahLtlu-tlhshlppppht...........phh.ccuL-tCa...pal-s.cphsuDpLYshL-s.tD...pslhhahp..p-.ccpt.shWssIhpuluhsuh.A.Yph-sc+a.lPpslEslD.pphlchahpshpch.................. 0 5 7 10 +14283 PF14435 SUKH-4 SUKH-4 immunity protein Zhang D, Iyer LM, Aravind L la_psag Manual Family This family belongs to the SUKH superfamily and functions as immunity proteins in bacterial toxin systems [1]. 25.50 25.50 25.50 25.50 25.20 24.40 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.03 0.71 -4.56 32 180 2012-10-01 20:46:44 2011-09-09 20:14:07 1 4 115 0 50 183 1 163.60 18 61.38 CHANGED stppltphasttt...............lhphpptsh.....sthhp.pspcFLppsGlPpss......hhhhtss................hstlsph.h..........................stthtpahhlGpss....ts.lslct.soGpVhhlssss.......phhhlNoslspFspslhhhtphhtthtt.tt...................................tpp.pshstphppplptlDstAht.........stsaWstlhppl .......................................................................t.........s.t................lhph.ttth.....t..h...stphL....sGlP.p.......hh.hts.................h..htph...........................stphtpahhlGpsh.........ss.lslp....ssuplhh.ls.ps..............p.hhlNoslspFspslhhhtchhpthhths.....................................tpt.tphhtp..hp...p.....plttlDsssht..........tsaWs.hhpp............................................. 0 11 35 48 +14284 PF14436 EndoU_bacteria Bacterial EndoU nuclease Zhang D, Iyer LM, Aravind L la_psag Manual Family This is a bacterial verion of EndoU nuclease. It is found at C-terminal region of polymorphic toxin proteins. 25.50 25.50 25.70 25.70 25.40 25.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.70 0.71 -4.30 57 308 2011-09-09 20:44:37 2011-09-09 21:44:37 1 33 244 0 65 277 3 120.20 21 21.59 CHANGED sspshpHlhpG-hspt........thsGhHttsth..................sp.l.p.ht.........ss.pGlhpsphph.t.stt.............tstoohFPpp..WospclhstlppAhpspptpt.................ssphtutsss..............Glplc.shhpsp....GplhosaPhh ..................................................................s.....tHh.tsp..t..........h.GhHt.s.....................ht.lhttht..........pGlhphphph.t.stt...................tstpohFPcs..WocpcIhptlppAhpstthht.................sphhtstsps..............Gl+lc.shhcst....GplhohaP..................... 0 13 31 47 +14285 PF14437 MafB19-deam MafB19-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Neisseria MafB19 [1]. Members of this family are present in a wide phyletic range of bacteria and are predicted to function as toxins in bacterial polymorphic toxin systems [1]. 28.90 28.90 29.00 28.90 28.80 28.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.99 0.71 -4.56 20 53 2012-10-02 00:10:39 2011-09-09 23:23:34 1 17 53 0 8 55 3 145.10 37 12.83 CHANGED hsshsphtshp.tlG.shstt...............tsshsslApscIsGppFhssNpsu+s...lus....spsshhssplthp.t.........spsshsssHAElsslQQAa-t..Ghsh.GpshshhVs.+-lCshCp....uslsshAcchGLcpLslaspt..oG..pshhasss ...........................................h...................................psp.pVlAcusIsG..cpFhDsNQoA+....lus..sspPTLhutplphchp.....t..tPN.sshusuHAEIulIQQAYsA..GhTt.GtsMshhVsGK-VCuaCp....GslsuhAcpoGLcpLslpsss..oG..pohYap.................. 0 3 3 4 +14286 PF14438 SM-ATX Ataxin 2 SM domain Anantharaman V, Eberhardt R la_psag Manual Domain This SM domain is found in Ataxin-2 [1]. 28.40 28.40 28.40 28.40 28.30 28.30 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.49 0.72 -4.05 52 367 2012-10-01 22:42:21 2011-09-11 08:17:31 1 11 227 28 216 462 1 75.30 29 8.85 CHANGED cRhhahhssllGppVpVplpsGsh.YcGlhpohssp...........phsllL.chs+hlsss.t..........ppstspphhcslllpspDllp ...........RhlahlssllGppscVp.l+.sGsh.YcGlF+ohos............chslsL.chs+chsspss.............ssspppsls-ohlhpssDls...................................... 0 66 108 163 +14287 PF14439 Bd3614-deam Bd3614-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Bdellovibrio Bd3614 [1]. They are typified by a distinct N-terminal globular domain. The Bdellovibrio version occurs in a predicted operon with a 23S rRNA G2445-modifying methylase suggesting that it might be involved in RNA editing [1]. 38.50 38.50 39.20 130.90 37.70 37.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.65 0.71 -4.31 4 4 2012-10-02 00:10:39 2011-09-11 22:36:34 1 1 4 0 4 4 0 124.00 36 37.69 CHANGED pshacRDRcVsAhLlssp.GtlhssAsNoNucNtsLHAEhNLLhPhhhtp.......................t+sl.sGspLhVTLQCC+MCAAhlsthu-t.ut...hcVsYhpEDsGuLARcTtLcp+GhEp+hst .pshacRDRcVsAhLlssp.GtlhssAsNoNucNtsLHAEhNLLhPhhhtp.......................t+sl.sGspLhVTLQCC+MCAAhlsthu-t.ut...hcVsYhpEDsGuLARcTtLcp+GhEp+hs.. 0 2 3 4 +14288 PF14440 XOO_2897-deam Xanthomonas XOO_2897-like deaminase Iyer LM, Zhang D, Aravind L la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Xanthomonas XOO_2897 [1]. Members of this family are present in a wide phyletic range of bacteria and are predicted to function as toxins in bacterial polymorphic toxin systems [1]. The Xanthomonas XOO_2897 lack an immunity protein and is predicted to be deployed against its eukaryotic host [1]. 32.60 32.60 33.40 36.00 31.40 31.40 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.97 0.71 -4.39 16 52 2012-10-02 00:10:39 2011-09-11 23:02:42 1 9 41 0 20 57 0 113.50 36 17.59 CHANGED h.s.sshshsG.......sptsshhtpss................pphlhtp........................................tssHuEcpllpplpp....tsltPsplhclYoEhpPCst.t....Ctphlcs.h.ssscloaohs..aGt-ttst...hppuhspLhp ...................................................................s......s.hG.......s..sshhhps...............t.tp.hhtpu.t...................................ssssHsEhphhppLcs.......hsVsPppVlELaTELEsCchPGsh....Cuchl+cpa.PpsRlopsss..YGsDpsu...RppGhppLl.t.......... 0 5 14 19 +14289 PF14441 OTT_1508_deam OTT_1508-like deaminase Iyer LM, Zhang D, Aravind L, Eberhardt R la_psag Manual Family A member of the nucleic acid/nucleotide deaminase superfamily prototyped by Orientia OTT_1508 [1]. Members of this family are present in a wide phyletic range of bacteria,including several intracellular parasites and eukaryotes such as fungi, Leishmania, Selaginella, and some apicomplexa. In bacteria, these deaminases are predicted to function as toxins in bacterial polymorphic toxin systems [1]. Versions in intracellular bacteria lack immunity proteins and are likely to be deployed against their eukaryotic hosts. Eukaryotic versions are predicted to function as nucleic acid (either DNA or RNA) deaminases. Among eukaryotes, some fungi show lineage-specific expansions of this family. Many fungal versions are fused to a distinct N-terminal globular domain. Various fungal versions are fused to domains involved in chromatin function. Apicomplexan versions are fused to tRNA guanine transglycosylase domain [1]. 25.00 25.00 25.10 25.40 24.80 24.40 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.54 0.71 -4.39 23 207 2012-10-02 00:10:39 2011-09-11 23:54:14 1 6 86 0 171 219 0 165.50 15 29.16 CHANGED htttthphtshph.sht.....................phts.thhtthptpt...tt.......................pshhhthppp..............................................t.psplHAEhhllcplppp...................st..YlG....sSKhsCssCphhlpths.............ttpsshhts....cspsa.tW.phss...............tpspthpphh ..........................................................................................................................hh.t..................................................................................................h........t.......................................................phpstlHAElpllpahtppt.............................hhss.pYIG.sSK.uChhCphahpths...............phhhhts.....HsplY..pW.phPs......................ht.h.......................................... 0 35 93 140 +14290 PF14442 Bd3614_N Bd3614-N; Bd3614-like deaminase N-terminal Iyer LM, Zhang D, Aravind L la_psag Manual Family This is a globular domain that occurs N-terminal to the Bd3614-like deaminases, which are predicted to be involved in RNA editing [1]. 25.00 25.00 65.00 63.60 22.10 21.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.75 0.71 -4.20 4 5 2011-09-12 09:35:36 2011-09-12 10:35:36 1 2 5 0 5 5 0 126.80 35 37.92 CHANGED puSscccDVAFLlAsssu-...u-AcssVaaApussssss.cSAlV+LlLG..........AppuusRspusuW..hRtRIhTT.tulSshDRAhVKVsApRsTth......slcscDcuAsA......st.sssDDscs........LphhDsTshAptAlpRGh.s ...puSscccDVAFLlAsssu-...u-AcssVaaApussssss.cSAlV+LlLG..........ApEuuGRspucuW..hRpRIaTT.+ALSshDRAlVKVsApRATsl......clcsDDDuAsA......st.sssDDDDDDDAs......LEhcDlT-aARtAlcRGAp.s 0 3 4 5 +14291 PF14443 DBC1 DBC1 Anantharaman V la_psag Manual Domain DBC1 and it homologs from diverse eukaryotes are a catalytically inactive version of the Nudix hydrolase (MutT) domain [1]. DBC1 is predicted to bind NAD metabolites and regulate the activity of SIRT1 or related deacetylases by sensing the soluble products or substrates of the NAD-dependent deacetylation reaction [1]. 25.00 25.00 26.30 26.40 22.70 21.50 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.61 0.71 -4.62 13 186 2012-10-02 00:00:35 2011-09-12 21:55:47 1 8 89 0 108 182 0 122.50 51 12.57 CHANGED .sHPs+llKFLVGp+sK.EsMAIGGpWSPSLDGsDPpsDPpVLI+TAIRssKAhTGIDLSsCTpWYRFsEl+YhRstppt.....sss+lETVVlFLPDVWshlPTcp-WcsLptshpppLscphptspppss ............pHPs+hlKFLV.G..hKu+..E..........sMAIGGpWSPSLDG..sDPppD.PsVLI+TAIRssKALTGIDL..SsCT.............pWa...RFAEl..........+YhR.stppt.............................sPs+lETVVlFhP..DlWphhPohpEW-slppthppphs.cphpt.....s..................... 0 32 49 73 +14292 PF14444 S1-like S1-like Anantharaman V la_psag Manual Domain S1-like RNA binding domain found in DBC1 [1] 25.20 25.20 25.70 25.30 25.00 24.90 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.61 0.72 -4.24 11 185 2012-10-03 20:18:03 2011-09-12 22:52:37 1 6 68 0 90 173 0 56.30 57 7.05 CHANGED QRVF..TGh..VTKlp-sFGFlD--VFFQhSs.VKGthPpVGDRVLV-AuYNssMPFKWNAsR ......QRVF..TGlVT+L..H.DsFGhVDE-VFFQl.........Ss.VK...G+.hP.p.VG-+VLVcAsYNPs...sh.WNA.+.............. 0 18 26 48 +14293 PF14445 Prok-RING_2 Prokaryotic RING finger family 2 Burroughs AM, Iyer LM, Aravind L la_psag Manual Family RING finger family found sporadically in bacteria and archaea, and associated with other components of the ubiquitin-based signaling and degradation system, including ubiquitin and the E1 and E2 proteins. The bacterial versions contain transmembrane helices [1]. 22.10 22.10 23.00 91.00 21.20 21.90 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.31 0.72 -4.02 2 3 2012-10-03 15:03:13 2011-09-16 18:03:25 1 1 3 0 2 3 0 57.00 53 61.96 CHANGED DPcSFupasCDLCpoutslutLRQCVlCGRWuCsuCWpDEYYhC+SCuGlhplh.Lc DPcSFSRYoCDLCNooaPlu-LRQCVLCGRWACuSCWpDEYYsCKSCuGIIsLHLLc.. 0 1 2 2 +14294 PF14446 Prok-RING_1 Prokaryotic RING finger family 1 Burroughs AM, Iyer LM, Aravind L la_psag Manual Family RING finger family found sporadically in bacteria and archaea, and associated in gene neighborhoods with other components of the ubiquitin-based signaling and degradation system, including ubiquitin, the E1 and E2 proteins and the JAB-like metallopeptidase. The bacterial versions contain transmembrane helices [1]. 29.10 29.10 29.10 29.30 29.00 28.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.56 0.72 -4.23 14 24 2012-10-03 15:03:13 2011-09-16 19:23:01 1 3 21 0 11 26 7 52.80 34 12.45 CHANGED .phpsstCssCGcpht.p-DlVVCPcCGAPYHRpCapchGpChphs.pssshshp. ......phpsspCshCGcphh.p-DlVlCspCGAPYHRpCapchGpChh.s.pttshph.h...... 0 7 8 11 +14295 PF14447 Prok-RING_4 Prokaryotic RING finger family 4 Burroughs AM, Iyer LM, Aravind L la_psag Manual Family RING finger family domain found sporadically in bacteria. The finger is fused to an N-terminal alpha-helical domain, ROT/Trove-like repeats and a C-terminal TerD domain [1]. The architecture suggests a possible role in an RNA-processing complex [1]. 27.00 27.00 27.00 27.60 26.90 26.90 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -9.18 0.72 -4.42 10 46 2012-10-03 15:03:13 2011-09-16 19:42:08 1 2 46 0 15 52 1 54.70 37 7.14 CHANGED hhhQpP-QPClhCutsuos+slsPCuHlVCcsCFDGscYSuCPlCts+l-sucPF ...................h.p..hshlhshhsspp.sshLsCGHl.I.PcsTFPh..-RYN...GCPFCGsPF-sus........... 0 10 12 14 +14296 PF14448 Nuc_N NUC_N; Nuclease N terminal Zhang D, Iyer LM, Aravind L, Eberhardt R la_psag Zhang D, Iyer LM, Aravind L Family This is a conserved short region that is found in many bacterial polymorphic toxin proteins [1]. It is often located before C-terminal nuclease domains [1]. 25.00 25.00 27.70 26.20 18.80 17.30 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -8.92 0.72 -4.42 2 47 2011-09-16 20:53:49 2011-09-16 21:53:49 1 7 29 0 1 42 0 54.40 79 19.80 CHANGED hDpsoshh+G..ElL.DGSVhR.uGTNaStphpEAHDuSKASIQSRISNLESGGVKGTtc ....hDoVoshhKG.VEIhPDGSVsR.oGTNYSGKFQEAHDASKASIQSRISNLESGGVKGTG.............. 0 0 0 1 +14297 PF14449 PT-TG Pre-toxin TG Zhang D, Iyer LM, Aravind L, Eberhardt R la_psag Zhang D, Iyer LM, Aravind L Family PT-TG is a conserved region found in many bacterial toxin proteins. It could function as a linker that links N-terminal secretion-related domain and C-terminal toxin domain. It contains a TG motif [1]. 23.50 23.50 23.50 23.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.43 0.72 -4.20 37 309 2011-09-16 21:38:00 2011-09-16 22:38:00 1 46 140 0 52 270 1 81.40 27 17.49 CHANGED pppthshhh-hss..lushp-shcshpGpD..hTG-cl.shh-Rshuul.hh.uh.hs..huKlsphstth.thh....+uhcsstcsp ..............t.hchsh-hss.ElsGhhDhtRshsGhD.PsTGE+L.osh-Rl.h.Aushslhuh.hP.....hG+suphsp.sshths........+shtts....t............................................................ 0 11 25 33 +14298 PF14450 FtsA Cell division protein FtsA Bateman A, Mian N, Griffiths-Jones SR, Anantharaman V la_psag Pfam-B_1177 (release 5.4) Domain FtsA is essential for bacterial cell division, and co-localises to the septal ring with FtsZ. It has been suggested that the interaction of FtsA-FtsZ has arisen through coevolution in different bacterial strains [1]. The FtsA protein contains two structurally related actin-like ATPase domains which are also structurally related to the ATPase domains of HSP70 (see PF00012). FtsA has a SHS2 domain PF02491 inserted in to the RnaseH fold PF02491 [2]. 31.80 31.80 31.80 31.80 31.70 31.70 hmmbuild -o /dev/null --hand HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.43 0.71 -4.27 219 7122 2012-10-02 23:34:14 2011-09-17 14:26:47 1 10 3572 5 1490 4911 2477 177.60 22 77.84 CHANGED lshlDlGuupsshslhct.....sshtt..........hpllslG.............ustlo..cclupul......ppslppAEcl...chphu..................ss..hh.........sthp.tppphp.lsh................t.spplsppclscllpsthp..........Ellchlspph........hpssh.ht...p.hssshVlTGus.utlpsl.tchspchh...............lcls..t....................hltsP........aA.ouhul ....................................sslDIGsupssshlhct..........t.t.tt...................hsllshG................ssh....ls........ccl......u......pul.............pp.s.h......p......p.A......Eph...ch.p.h.u.......................ss..hsslsupp.phtsspshsshsppthpppthphh.....................................................thh.ch.phhcp.hshtshch.stuhllTGusuhlpsl.hcsspcsh........................l+ls..p.............................hl.hp.P...husuhu................................................................................................................................................................................................................................................................................................ 0 491 953 1235 +14299 PF14451 Ub-Mut7C Mut7-C ubiquitin Iyer LM, Burroughs AM, Aravind L, Eberhardt R la_psag Manual Family This member of the ubiquitin superfamily is found at the N-terminus of Mut7-C like RNAses, suggestive of an RNA-binding role [2]. 23.00 23.00 23.00 23.20 22.90 22.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.45 0.72 -4.46 22 253 2012-10-03 10:59:06 2011-09-19 19:24:29 1 2 245 0 99 309 28 79.60 36 34.19 CHANGED MssssFRFatELNsFLs.spRp+thupssscsATlKHhIEALGVPHTEVthlhVNGcssshs+hlp-GDRlsVYPphcshc ..............................lphRhasELscFls..h.t.t.R..t......t.s.ht.p.....sht.sssTlKcllEuLG..V..PHTEVsllL....VN....Gcsss...a....s....a....h....ht....sGD+luVaPhhcs..................... 0 31 68 89 +14301 PF14452 Multi_ubiq Bac_multiUb; Multiubiquitin Iyer LM, Burroughs AM, Aravind L, Eberhardt R la_psag Manual Family A ubiquitin superfamily domain that is often present in multiple tandem copies in the same polypeptide. Members of this family are associated in gene neighborhoods, or on occasions fused to, bacterial homologs of components of ubiquitin-dependent modification system such as the E1, E2 and JAB metallopeptidase enzymes and a distinct metal-binding domain [1]. The E2/UBC fold domain appears to be inactive. The JAB domain in these operons is usually fused to the E1 domain [1]. 24.50 24.50 25.20 24.70 23.70 23.50 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.29 0.72 -8.88 0.72 -4.00 24 160 2012-10-03 10:59:06 2011-09-19 22:07:54 1 7 84 0 69 158 3 72.20 21 64.51 CHANGED pshphplsscphshscshloGpplhtlAthssss......phshhphh.ssppcslt.s-sV-LtppGhc+Flshpsc .......hphtlsscphphscstloGpplhplAthssss......thsh.hphh....s.sptpplt.s-s.l-Lp....ppGhE+Flshp............. 0 22 43 59 +14302 PF14453 ThiS-like ThiS-like ubiquitin Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the ubiquitin superfamily that is often fused to the ThiF-like (E1)- ubiquitin activating enzyme and is present in gene neighborhoods with components of the thiamine biosynthesis pathway [1]. 26.00 26.00 28.30 27.10 25.60 24.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.46 0.72 -4.12 8 166 2012-10-03 10:59:06 2011-09-19 22:35:31 1 2 166 0 20 104 3 56.80 56 21.39 CHANGED M+IhlNEcplsVc-ssoLatl+sphKs-ADVlIlNGFPlp-DhtLp-sDclhhI+RG ..................................MRlKFNGKELDTchpTSL-FFcslSKN..END..VW.IINGFATKENItLpENDELFCIE+N. 0 7 19 20 +14303 PF14454 Prok_Ub Prokaryotic Ubiquitin Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A Ubiquitin-superfamily protein that is present across several bacterial lineages, and found in gene neighborhoods with components of the ubiquitin modification system such as the E1, E2 and JAB proteins, and a novel alpha-helical protein, which is predicted to be enzymatic [1]. 25.00 25.00 25.20 27.80 22.00 22.00 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.58 0.72 -8.88 0.72 -4.32 7 94 2012-10-03 10:59:06 2011-09-20 00:17:57 1 1 75 0 32 85 3 66.90 41 79.77 CHANGED lphppLpRsFpa....NGhsLsDPsPpho.cpV+DFYSspYPELhsAsl-GPpscsshtpYoF++usGs ....hplpsLpRsFpa.....ss..l.p..LsD.P.ss.shSs-pVh-aYosp.YPELTTAsVcGPpl..cs-ptlYpF+pslGs..... 0 7 22 29 +14304 PF14455 Metal_CEHH Predicted metal binding domain Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A predicted metal-binding domain that is found in gene-neighborhood associations with genes encoding components of the bacterial homologs of the ubiquitin modification pathway including the E1, E2, JAB metallopeptidase and ubiquitin proteins. The domain is characterised by a conserved motif with a CxxxxxEYHxxxxH signature. 25.00 25.00 95.30 30.90 22.90 22.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -10.97 0.71 -4.87 5 8 2011-09-20 03:51:40 2011-09-20 04:51:40 1 2 7 0 5 11 0 143.90 36 90.99 CHANGED VDPAVSRAKFDR-IGpFcspAsAYRtQGCFLIEAsFPTAFFIFAsPKV+PplIGAAVEIDFoNYDLRPPSVVFVDPFTRpPlARKDLhLsMLRRPpLPGTPP-MISVLhQQpALSLsDFLQANSsEcTPFLCMAGVREYHDNPAHSGDsWLLHRGSGEGCLAFILDKIIKYGTGPVE ..............Vc.tlSRApF-RpluphpspssuaRhpGhhLlpApaPsh.hIFssspltP....htlclshsNaDhpPPSVshlDs.Tt.............ct.tLstsPPthhsshht..........pApssptpPFlCMtGsREYHspsuHoGD.W.haRGSG-hsLuhIL-pIh+hhpssl........... 0 1 3 4 +14305 PF14456 alpha-hel2 Alpha-helical domain 2 Iyer LM, Burroughs AM, Aravind L la_psag Manual Family An alpha-helical domain found in gene neighborhoods encoding genes containing bacterial homologs of components of the ubiquitin modification pathway such as the E1, E2, Ub and JAB peptidase proteins. 25.00 25.00 34.60 28.10 24.70 24.10 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -11.62 0.70 -5.66 11 36 2011-09-20 04:45:34 2011-09-20 05:45:34 1 2 31 0 19 34 1 298.60 19 85.51 CHANGED lsslsLuhhctGshptu-l..s.ssss-hhtpslptWhcsphsshthhs.............t.sssss.hhppsssss.ps.hhhshshssp.lahl....csplpsLcuspPsLupslhsllpcuuups.htlhoPshhhshhuhhaWcucs...Dpcpshphhctps-..sEtthpthh.s.sh.................hhphhsthspphhph.pptphphhtppcshssplssAlscltshlpctp.c.p.s.hshssst.tp...........sltsushltWc-s-hspcllDDahEhttpuusuppt.shl.ls..ssspulpphhcshcthhphhtul-cLLshlup ............................................hhhhhhptsslp.sDl..s.ssstchhppuhptWhpcpsushphlphth.........ht.stsththhpsssc.ss.ps.lahshshsp...s.latl....tsplcsLcsspPsLutTslsllpcAuths.h.lhTP.shhhshhuhha.WcG-s...D--sshphhpcp.t-s..s.tphpthhsu.sh.......t.........hhphh.chhtpshph.t.pp...hphh..tpppshhtpl.....ss.....tlt.....plts.........h.lptsphp.....thhtsshpsp...........shh.hshlthpp.s........-hstcllDDah-hhspus.spsh.shl.ls..ssspultp.hpchptthphhptlDpLlshlu........................... 0 4 10 16 +14306 PF14457 Prok-E2_A Prokaryotic E2 family A Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in several bacteria. The active site residues are very similar to the eukaryotic E2 proteins [1,2]. Members of this family are usually fused to E1 and JAB domains C-terminal to the E2 domain. The protein is usually in the gene neighborhood of a gene encoding a distinct metallobetalactamase family protein [1]. 25.00 25.00 27.40 26.70 22.90 22.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.68 0.71 -4.78 12 50 2012-10-02 15:28:41 2011-09-20 18:18:15 1 5 48 0 22 55 8 146.50 22 21.40 CHANGED ttchhtshhtptsstsht.htspttptshthphhVths.psshsstssssI+phEsl..hhhssshshphP.shhLRpDFPhp........sH.p.sh.sps......ss.Clh-ssh.....s-hhhphsh.....pullpplhhWLpcuApGsLhpsspshEPhhhsstss.hlhs.shthtttss ...................................h................ht..th.t..p.ht..t.h.phhl.hsspsthts.ssssl+phEpl.hlhhsss..hshssP.s.shRpDFPsp.......hPHlpss..st.......hplClacssh.....s-h.hshuh.....puhls+lhtWLpcAAtspLpp.ppshEshhh.s..t.lh..........s.......... 0 9 16 20 +14309 PF14460 Prok-E2_D Prokaryotic E2 family D Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in several bacteria. Members of this family lack the conserved histidine of the classical E2-fold. However, they have an absolutely conserved histidine carboxyl-terminal to the conserved cysteine [1,2]. Members of this family are usually present in a conserved gene neighborhood with genes encoding members of the Ub modification pathway such as the E1, Ub and JAB proteins. These neighborhoods also contain a gene encoding a rapidly diverging alpha-helical protein [1]. 25.00 25.00 28.90 28.50 20.40 20.10 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.22 0.71 -4.80 17 151 2012-10-02 15:28:41 2011-09-20 19:31:56 1 1 122 0 43 142 4 171.90 23 72.96 CHANGED shpshhphlctlspp.....hshssalss.ssLs......hussthhWWs...........PsspRtlaFcst........thtppstslPhPuLVFhsstp..........shhVaAl+ss.cPs.sTpLapsPaaNVappGplChGs..splP.ctssssphpsWcttFFsSt..............FTHPNstspphhht......shshapchh-sph.psFPpssLlshc .............................................t..............t..........t.hh.s.shLh.........huspphlWas...........PstpRplaFtp..........hthtstpsshPsllatss.tp..........shpVaAl+s...s.cP.stsT.LapsPahNV.....h.s.p.GplChGs..sp..l..P...chp.s...hpt.lptWcphF.a.sSt..............Fo.H.ssspp............hhphhtphhct.t..psFstphLh...t......................................... 0 11 31 39 +14311 PF14459 Prok-E2_C Prokaryotic E2 family C Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A divergent member of the E2/UBC superfamily of proteins found in bacteria. Members of the family contain a conserved cysteine in place of the histidine of the classical E2/UBC proteins [1,2]. Members of this family are usually fused to an E1 domain at their C-terminus. The protein is usually in the gene neighborhood of a gene encoding a JAB peptidase and another encoding a predicted metal binding domain [1]. 25.00 25.00 83.70 82.40 18.40 17.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.59 0.71 -4.07 4 5 2012-10-02 15:28:41 \N 1 1 5 0 3 5 0 129.40 59 25.17 CHANGED AFDDQAASCAEGQATLDLAVRLLARLYPVLAILPLGSAAShQAQALERLAKSINPKlGIRRSGKS.AolClVAGsTRPsLRCPTFFhGSDGWAAKLSRTDPVGSGSSLLPYGAGAASCFGAANVFRTIFAAQ .....AFDDQAASCAEGQATLDLAVRLLARLYPVLAILPLGSAAShQAQALERLAKSINPKIGIRRSGKS.ATlCVVAGsTRPsLR.CPTFFlGSDGWAAKLSRTDPVGSGSSLLPYGAGAASCFGAANVFRTIFAAQ.. 0 0 2 3 +14312 PF14461 Prok-E2_B Prokaryotic E2 family B Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in several bacteria. The active site residues are similar to the eukaryotic E2 proteins but lack the conserved asparagine [1,2]. Members of this family are usually fused to an E1 domain at the C-terminus. The protein is usually in the gene neighborhood of a gene encoding a member of the pol-beta nucleotidyltransferase superfamily [1]. Many of the operons in this family are in ICE-like mobile elements and plasmids [1]. 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.55 0.71 -10.60 0.71 -4.40 15 108 2012-10-02 15:28:41 2011-09-20 21:03:45 1 3 103 0 27 835 18 129.40 22 23.12 CHANGED t.....h.pasps.sphhttsphsppssptshhtphhht.............pssshslpLVhscp.FshlPstlhls.-.pph...l.sHhts.s...GtLClh...ptsh-papssusst-ll.cphpplLp.pshsuss......ptchhuEasuYWptppssh........hh ..........................................................................thhhh.h..h...................................putshsltlsl.ss..a..t.h.P.th...h.lp...p.s..cphh.......hl.PHlph..s.....upLChhsp.psshs.sps....cush.t-s....h.cphpthLc..sulsusp............cs-htsEFtuYWptppp....h..................................... 0 7 18 21 +14313 PF14462 Prok-E2_E Prokaryotic E2 family E Iyer LM, Burroughs AM, Aravind L la_psag Manual Family A member of the E2/UBC superfamily of proteins found in diverse bacteria. Analysis of the active site residues suggest that members of this family are inactive as they lack the characteristic catalytic residues of the E2 enzymes [1,2]. They are usually fused to or in the neighborhood of a multi/poly ubiquitin domain protein. Other proteins of the ubiquitin modification pathway such as the E1 and JAB proteins are also found in its gene neighborhood along with a distinct predicted metal-binding protein. 25.00 25.00 25.70 25.40 21.80 21.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.50 0.71 -4.53 5 36 2012-10-02 15:28:41 2011-09-20 22:59:24 1 4 35 0 15 40 1 123.80 31 52.33 CHANGED LPEsDccYL-o+GaTaEsVsDGu++Glll+pFpLPpG+FssspVDlLllLPsGYPDssPDMFYlpPsL+LVsGs+hPpAo-sscpFpG+sWQRWSRH...NssWRsGlDGlaTMLKRVEcALcsAs ...................L.ptDppaLcshGhpa....Eslt-uutphll.lcsasLP.......pG..Y.....stsp.........s-lhlhlPs.uY.Psstl.DMFYspPsL.......phs..sGtt.......lP.s.....sp.sscshp...G+sWQRWSRH...pssWcP...shDslhTalthl-psLttth......... 0 4 8 11 +14314 PF14463 E1-N E1 N-terminal domain Iyer LM, Burroughs AM, Aravind L la_psag Manual Family An uncharacterized alpha/beta domain fused to E1 proteins. This protein is usually present in gene neighborhoods with genes encoding a JAB protein and a predicted metal-binding protein. In related E1 proteins, the E1-N domain is replaced by an E2/UBC superfamily domain [1]. 25.00 25.00 46.30 45.70 24.60 21.70 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.87 0.71 -4.62 2 3 2011-09-21 18:21:18 2011-09-21 19:21:18 1 1 3 0 2 5 0 148.00 34 32.41 CHANGED M.pssQpNAhMLAulLGssEs-AuERLsRsVLlTAsPGhtsuhhst.sthlh.RTVtVs.pp.sp-spLELVIG-sssRTsh.RlaAslsutGAssslcPVu+huG..PHsLhAAsAACssuAssl+hllDsssLPtsthPhRLDasQLGVP M.pssQpNAhMLASlLGlsEsE.AuERLsRoVLlTAPPGsssA.thAp-VtALLuRTVsVVtpSsss-PsLELVIGDVsPRTS.usRLYAuluSsGATlulcPVuRsuGP.PHuLLAAlAACsVSAAVl+tVlDss-LPtVchPLRLDFDQLGVP. 0 1 1 1 +14315 PF14464 Prok-JAB Prokaryotic homologs of the JAB domain Iyer LM, Burroughs AM, Aravind L la_psag Manual Family These are metalloenzymes that function as the ubiquitin isopeptidase/ deubiquitinase in the ubiquitin-based signaling and protein turnover pathways in eukaryotes [1]. Prokaryotic JAB domains are predicted to have a similar role in their cognates of the ubiquitin modification pathway [2,3]. The domain is widely found in bacteria, archaea and phages where they are present in several gene contexts in addition to those that correspond to the prokaryotic cognates of the eukaryotic Ub pathway. Other contexts in which JAB domains are present include gene neighbor associations with ubiquitin fold domains in cysteine and siderophore biosynthesis, and phage tail morphogenesis, where they are shown or predicted to process the associated ubiquitin [2,4]. A distinct family, the RadC-like JAB domains are widespread in bacteria and are predicted to function as nucleases [5]. In halophilic archaea the JAB domain shows strong gene-neighborhood associations with a nucleotidyltransferase suggesting a role in nucleotide metabolism [5]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.66 0.72 -4.46 111 2146 2012-10-10 14:49:21 2011-09-22 01:37:47 1 14 1332 8 470 3074 346 105.60 29 49.36 CHANGED stphhptlhtpupps.....hPpEssGlllGpt.....................tpphhhhthh.......................................ssppphphht.................ttpppshphluhaHSHPssss...hPSppDtphstt..................shhllssh............th.........shphh ...............................................................................................................................hp...h.ptlhtp.upts.....hP.tEsCGhlhtp............................ptp.hh.h.s.hs...............................................................hcPpt.hhcht.................................hts-hps-lVulhHSHP..s.....uh.....s.....h.S...csD+ch.phpss............hsaallsps.....................ttpthtsh..................................... 0 149 304 409 +14316 PF14465 NFRKB_winged NFRKB Winged Helix-like Godzik A adam de novo domain prediction, confirmed by X-ray structure determination Domain This domain covers regions 370-495 of human nuclear factor related to kappaB binding (NFRKB) protein. 22.20 22.20 22.20 28.60 21.30 20.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.45 0.72 -4.17 10 81 2011-09-23 07:25:46 2011-09-23 08:25:46 1 5 64 2 52 80 0 100.70 48 8.17 CHANGED FFSLLRDllsSTspH..R..Lo..h..LcppLpsWp.ps...PsusLN-Wao.pss.D.WopLLpSAlpFLoG..-sss.hPs-FVPYlEaKsphs.YQWIGAuRDSD.ucLssLCphW .......................FFSLLh-llhhcupso..Ls..h..LE-+VhcWQ.uS.....PASsLNsWaS.hsP.s.WucLVhsALpaLAG.........-sps....lP.S.....uFsPaV.EaK-csQpW+hlGt..upDs-.K-LuALhQlW........... 0 12 15 32 +14317 PF14466 DUF4425 Domain of unknown function (DUF4425) Godzik A adam BVU_3708 from Bacteroides vulgatus, JCSG target GS13500 Domain A small family of bacterial proteins, found in several Bacteroides species. Structure determination (NMR and Xray) shows an immunoglobulin beta barrel fold.\ Multiple homologs have been found in human gut metagenomics data sets. 27.00 27.00 28.60 111.90 24.60 23.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.59 0.71 -3.98 7 32 2011-09-24 22:47:10 2011-09-24 23:47:10 1 1 30 5 3 26 0 119.80 39 79.95 CHANGED tslslhhullhVSCus-Dspsss..psula+IslctSGc.cshthulhlssss.....sKlhsE.sGp.lupShss...chhss+hohpTstpsh.hTstGslhSppc..ustpLphhV..YhDGKEV.p. .pslhlshuLlhluCsc--ppsss...+uGhY+IslpQSGshcSFcsSVslsuss....ss+Lhs-.sGc.lusuhSls..EhtosKsohsTstsuh.hTsuGulhStp-..uscpLplslhsYpDGKEVp+p. 0 1 3 3 +14318 PF14467 DUF4426 Domain of unknown function (DUF4426) Godzik A adam Pseudomonas aeruginosa PA0388, JCSG target SP19004A Domain Members of this entry are found mostly in g-proteobacteria, especially in Vibrio. Strangely enough, there seems to be one eukaryotic homolog in Nematostella vectensis (NEMVEDRAFT_v1g226006), where the PA0388-like domain is fused with a domain homogous to the Methionine biosynthesis protein MetW (see below). In several Pseudomonas species, but also in Vibrio vulnificus and Azotobacter vinelandii PA0388 homologs are genomic neighbors of Nucleoside 5-triphosphatase RdgB (dHAPTP, dITP, XTP-specific) (EC 3.6.1.15) and Methionine biosynthesis protein MetW. On the other hand, in most Vibrio species it appears as a part of a conserved operon involved in possible response to stress. 25.00 25.00 79.60 79.40 19.90 17.80 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.33 0.71 -4.31 62 249 2011-09-24 22:49:33 2011-09-24 23:49:33 1 2 249 4 64 191 80 120.60 46 83.76 CHANGED phcphG-h-VHYsuFsSTFLsPclApsYslpRSchpullNIoV.lcps...ttsp.suh..sAploGpspNLlGptppLsF+ElcEGs.AIYYlAphsasscEphpFpIslpsssp..stslcFpQchas- ....QhcsltDh-VHYsAFNSTFLTPcVApuYpLpRusYpullNISVLDpu..ph...up.sAs..pAploGpA+N.LlGphppLsF+cV+..EGs.AIYYLAEhPhss-EhlsFsIcVcsGsc.hstpLpFsQKFYs-... 0 13 28 48 +14319 PF14468 DUF4427 Protein of unknown function (DUF4427) Godzik A adam PSYMP_19184 [Pseudomonas syringae pv. morsprunorum str. M302280PT] Family This domain is often found at the C-terminal of proteins with Pfam:PF10899 domain, for instance in STY1911 protein from a multiple drug resistant Salmonella enterica serovar Typhi CT18. 22.70 22.70 23.20 24.70 21.50 22.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.51 0.71 -4.43 3 112 2011-09-25 08:35:22 2011-09-25 09:35:22 1 2 109 0 2 45 0 125.80 91 33.30 CHANGED LSuusVKsYsccIN-YIspLYSsKDahsDsYuhEFGNAWVWIHDNQs-VTRALLQcGhVSVN+EGRYLLKlsLtuScWPLR+KEshAKaVA+WL+pRFsLEuGYFSVhGu-DYDcIPaYNssLcENHPFYNs .......LSASKVKNYADSINDYVSELYSKKDFLNDsYAMEFGNAWVWIHDNQSQVVRALLQAGMIKVNKEGRYLLDVNLASVDWPLRRKEAFASHVAGWLKHRFDIEAGRYSVpGKDcYDAIPSYETPLK-QHPFYNH..... 0 1 1 2 +14320 PF14469 AKAP28 28 kDa A-kinase anchor Jaroszewski L, Godzik A adam Q86UN6 A-kinase anchor protein 14 isoform a [Homo sapiens] Family 28 kDa AKAP (AKAP28) is highly enriched in human airway axonemes. The mRNA for AKAP28 is up-regulated as primary airway cells differentiate and is specifically expressed in tissues containing cilia and/or flagella [1]. Homologs of AKAP28 are present in all animals and in some, including mice the AKAP28-like domain are preceded by another uncharacterized domain 21.60 21.60 22.10 22.00 20.20 21.00 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.51 0.71 -4.54 15 76 2011-09-26 09:21:28 2011-09-26 10:21:28 1 2 64 0 52 77 0 111.90 30 49.52 CHANGED pWhTtGEFshpsulppI--FlspWpls...csWlass-.lp+--h.+uphYhhcV+aShPTtRpPlPpAoApVhFsIpVSKhcPtchPV.VoYhFEup+Lla.....Rssps.pFREpWL+cIlcoKhthh- ..............................hhoht-FphppuhppIccal.p.pWphp..........csW.latscalpccsh.puhhYhapV+aShPTsppPhPpsoAslaFhl.p.loKhcP.ph.Pl...lh......YhhEspphl+.....Rst.s...pFp-pWlcslhcsKh.hh..................... 0 22 24 36 +14321 PF14470 bPH_3 Bacterial PH domain Bateman A agb Jackhmmer:C8LJ44 Domain Proteins in this family are distantly related to PH domains. 23.00 16.00 23.00 16.40 22.90 15.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.14 0.72 -3.72 68 691 2012-10-04 00:02:25 2011-10-26 16:08:13 1 14 480 0 99 818 44 95.00 24 64.96 NEW Lp..ssEplhhhs...hu...hh.p......................tpsshllsTs+Rllhhsp..phhtt.hp.h.pshsapcIsslphc..psl............hh.sc.lphts...sspplpl...p.l..s+sss.cphhphlpp ..............................................tE.h...h....hs....hhch..................h.sthpGhhlsTNc.R..Llhhth....sh.tp...hh...h....c..ph..s..ascIpsl.chc...................hs...pp..Ihhph...sttp..l.h...cpI..ppssV.phhlchlp............................................ 1 34 63 82 +14322 PF14471 DUF4428 Domain of unknown function (DUF4428) Bateman A agb Jackhmmer:C9XSR9 Domain This putative zinc finger domain is found in uncharacterised bacterial proteins. 27.20 27.20 27.60 27.50 27.10 27.10 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.82 0.72 -4.22 24 71 2011-10-26 15:21:41 2011-10-26 16:21:41 1 8 57 0 10 70 4 49.50 28 20.98 NEW pCslCspclGh..h...pt...hclpDG.h.lCcsChpKl....ssh.....h.pshph.p.hTlpcI+ch ..CslCspclGh..ht.....KlpDG.a.lCcsChpKl....psh.....h.tshp.h.pphTlpplpp............ 0 6 9 9 +14323 PF14472 DUF4429 Domain of unknown function (DUF4429) Bateman A agb Jackhmmer:Q9L067 Domain This presumed domain is functionally uncharacterised. This domain family is found in bacteria, archaea and viruses, and is approximately 90 amino acids in length. This domain is often found in two tandem copies. 25.00 0.00 25.10 5.60 23.80 -999999.99 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -10.17 0.72 -3.69 66 192 2011-10-26 15:23:52 2011-10-26 16:23:52 1 7 87 0 61 208 3 93.40 26 61.81 NEW hsFDGpp.lplphs...htstth+pu...tG.spslslsslsuVpac.s.....sth....................................................tsGaL..Rhhhts....................ussstst.................ssppDPtsl...hhs.tcptthshhhsstVps ...............hsFDGcs.lplpap...hts.phptt..hG.-pplPlssluuVphc.P....sth....................................................tsGhL.Rhhh+s................................ussshtths...............ssspDPhsl...hhh.tcp.t.st.hsstl..t.................... 0 19 48 59 +14324 PF14473 RD3 RD3 protein Bateman A agb Jackhmmer:RD3_HUMAN Family RD3 is a human protein that is found preferentially expressed in the retina [1]. Mutations in RD3 causes Leber Congenital Amaurosis type 12 [2]. 27.00 27.00 31.20 30.80 23.20 18.90 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.61 0.71 -4.68 17 73 2011-10-26 15:48:09 2011-10-26 16:48:09 1 1 43 0 55 70 0 130.30 43 66.10 NEW hsWh.......p.hsp.s.ppsupRssp-.........lVscsLhhELphpl+ctE+.ppE+EpEtR+tc....otsDYSWLhoss...+tphplsssE+hpLEsLCu+lpPspsu.lIsRFRcllspp-spspElsplF+sVLp-hLpphccptp ..............sWh.......+.hspss..+.opRssuE.........hVh-TLMhELshpl+EsER.pcERcsEhR+hc.....TGVDYSWLsSsP...+ssaslossE+LQLE-lCuKlpPspCGslI.RFRpllAEp-P-spEVsplF+uVLp-sLcphcpEp...... 0 7 15 33 +14325 PF14474 RTC4 RTC4-like domain Bateman A agb Jackhmmer:Q59X99 Domain This presumed domain is found in the RTC4 protein from yeasts. In Saccharomyces cerevisiae, Cdc13 binds telomeric DNA to recruit telomerase and to "cap" chromosome ends. RTC4 was identified in a screen to identify novel proteins and pathways that cap telomeres, or that respond to uncapped telomeres [1]. This domain is also found in proteins that contain a DNA-binding myb domain. 21.80 21.80 24.40 23.40 20.90 20.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.65 0.71 -4.18 38 169 2011-10-27 11:39:00 2011-10-27 12:39:00 1 4 127 0 125 175 1 120.40 28 26.24 NEW ILsspp....sShY.............+shhcsphps.u+cpshphs...............t.thpphssGYYG.+Gtph.......hsptlhs+.auscLc+hAspsp........llpthGsssFsQtVLVPElhlhLltEDM......sls.......s.cc....................ARpIhc-Ss-hGshls-p ...........................................h..t...sShYhshhcpthpp..uppth.hphs.................thpph.ssGY..YG.+Gtph.............hsptlhsc..ats..t.lcphsstst........llp..hh....G.sssasptVLVPElhhhLlhEDM.........sls.........s.cp............................Acpllc-SschGplls.............. 0 28 59 104 +14326 PF14475 Mso1_Sec1_bdg Sec1-binding region of Mso1 Wood V, Coggill P pcc manual Domain Mso1p is a component of the secretory vesicle docking complex whose function is closely associated with that of Sec1p. It is a small hydrophilic protein that is enriched in the microsomal membrane fraction [1], and this binding domain is towards the N-terminus of Mso1. The yeast Sec1p protein functions in the docking of secretory transport vesicles to the plasma membrane [2]. Mso1p and Sec1p interact at sites of exocytosis and the Mso1p-Sec1p interaction site depends on a functional Rab GTPase Sec4p and its GEF Sec2p [3]. The C-terminal region of Mso1 (not built) assists in targetting Sec1 to the sites of polarised membrane transport [4]. 29.10 29.10 31.30 30.40 28.60 27.30 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.58 0.72 -8.07 0.72 -4.55 22 111 2011-10-27 17:28:36 2011-10-27 18:28:36 1 3 109 0 80 103 0 43.90 40 17.97 NEW Sulpplp..l...p....sEpDGDTtsDTlVH+sLlpYYppp..tpsaPsWL ...................phtph.....h.....p.........sEs.D.GDoEDsThlp+sLhsYYscK..GpsFPsWL 0 13 39 67 +14327 PF14476 Chloroplast_duf Petal formation-expressed Coggill P pcc Pfam-B_480 (release 25.0) Family The members of this plant family from Arabidopsis thaliana appear to be proteins found in the chloroplast, expressed in the pollen tube during the petal differentiation and expansion stage. The function is not known. 25.00 25.00 44.50 25.00 21.90 21.30 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.03 0.70 -5.63 14 101 2011-10-28 14:10:21 2011-10-28 15:10:21 1 4 16 0 67 104 1 250.10 52 75.27 NEW KLaAlhEAVADRlEMHcNIGcQRDNWN+LLLsSlNslTLTAAsMAGLA...stuusshhALKlSSTlLasAATGhhslMNKIQPSQLAEEQRNAsRLF+QLcppIcssLulsss........spsDVp-AME+VLALD+AYPLPLLGuMLEKFPpoVEPAsWWPppcppptcppt................tttpuNGWSpELE-EMRcllpVLKtKDpp-Yl+LGclALKlNKlLAIoGPlLTGlAAluSAFlGss.....ssWAuhluVssGALAosVNTlEHGGQVGMVFEMYRssAGFFphMEEoIESslpEp-Vc+RENGElFEhKVALpLGRSLS-L+ ......................pLhulhEtssDRhEMHc.IGcQRsNWNpLLLpShN.lTLsAusMuulA....s.sussllALKhSuslLh.uAsshhhhhNKIQPSQLsEEQRNAsRLa+pLptpl.pthluhsss........optDVppsh-+VLALDpAYPLPLLs.sML-KFPpphEPAhWWPppc.ptt.pptt....................t.psNGWs.-LE.EMRclltVlKtKD.p-Y.+lGplsLplNKhLAluGPhLsGhAAhuouFlGss.......tsWushlulhsGuhAusVNshEHGGQlGMVFEMYRssuGFaphhpEsIEuslpEt-lt+RENGElFEhKVALpLGRShStL+....................................... 0 3 46 59 +14328 PF14477 Mso1_C Membrane-polarising domain of Mso1 Coggill P pcc PF14475 Domain Mso1p is a component of the secretory vesicle docking complex whose function is closely associated with that of Sec1p. It is a small hydrophilic protein that is enriched in the microsomal membrane fraction [1]. The yeast Sec1p protein functions in the docking of secretory transport vesicles to the plasma membrane [2]. Mso1p and Sec1p interact at sites of exocytosis and the Mso1p-Sec1p interaction site depends on a functional Rab GTPase Sec4p and its GEF Sec2p [3]. This C-terminal region of Mso1 assists in targetting Sec1 to the sites of polarised membrane transport, the SNARES and Sec4 [4]. 23.20 23.20 23.20 23.20 22.50 18.10 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.78 0.72 -3.64 7 18 2011-10-28 14:29:35 2011-10-28 15:29:35 1 2 18 0 12 17 0 60.30 31 34.86 NEW hSup......uSsslp.....+suSclpDsaNSp+DsS.hp..Gosptuh...PAps.sssustpGS.LRu ........................................................tusshp........RSSSRLQDhYNKSRQQS..hPGsGYso................................ 0 2 7 12 +14329 PF14478 DUF4430 Domain of unknown function (DUF4430) Coggill P pcc JCSG-Target:417407-SP17946A Domain Although this family has overlaps with SLBB, the majority of its sequences are unique. Several family members, eg UniProtKB:A0RGA8, that do not overlap have an LPXTG-cell wall anchor at their C-terminus, a SSF_Family 10_polysaccharide_lyase or Glycosyltransferase structure associated with them in the middle region, as shown by InterPro, as well as this domain at the N-terminus. 30.00 30.00 30.20 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.20 0.72 -3.92 123 701 2012-10-03 10:59:06 2011-10-28 15:57:49 1 52 462 4 173 592 41 69.30 29 19.69 NEW usolh......csLc.....p....pt...plpt..p...pt.........thG....alsuI...sG.....h.p.p.........c..tsss.ph.....WhapV.N.Gphssh.....Gusphpl.c.sGDp.lpat ..............................................................polhslLp.......c........st....clct..p.......s............G.....alsuI....sG....................ltp...............c..ssp.sh.......WhapV..N.Gphssp......GAsph.....pl.c.sGDplpa........... 0 62 111 129 +14330 PF14479 HeLo Prion-inhibition and propagation Greenwald J, Coggill P pcc Pfam-B_407 (release 25.0) Domain This N-terminal region, HeLo, has a prion-inhibitory effect in cis on its own prion-forming domain (PFD) and in trans on HET-s prion propagation [1]. The domain is found exclusively in the fungal kingdom. Its structure, as it occurs in the HET-s/HET-S proteins, consists of two bundles of alpha-helices that pack into a single globular domain [1]. The domain boundary determined from its structure and from protease-resistance experiments overlaps with the C-terminal prion-forming domain of HET-s (PF11558 [2]. The HeLo domains of HET-s and HET-S are very similar and their few differences (and not the prion-forming domains) determine the compatibility-phenotype of the fungi in which the proteins are expressed. The mechanism of the HeLo domain-function in heterokaryon-incompatibility is still under investigation, however the HeLo domain is found in similar protein architectures as other cell death and apoptosis-inducing domains. The only other HeLo protein to which a function has been associated is LopB from L. maculans [3]. Although its specific role in L. maculans is unknown, LopB- mutants have impaired ability to form lesions on oilseed rape. The HeLo domain is not related to the HET domain (PF06985) which is another domain involved in heterokaryon incompatibility. 23.20 23.00 23.30 23.20 23.10 22.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -9.98 0.70 -11.20 0.70 -4.58 65 227 2011-10-28 17:51:32 2011-10-28 18:51:32 1 30 76 5 198 232 0 189.70 16 32.69 NEW GlulGsh...ul..............uulFssslc.....saphlpsu.........+sau.cDhphhp.l+L-lpchRLhpWGcul.....GLhpss.tpp...........................hssthh...tptlpplLspIpplhp-s..pphpp+athptsssstt................................................................................ppphpphspp.tppp....pshhp+sp......WslhDKcp........FppLlpclpshs-s.LpsLh..Pstptpp............phtptchpth..pptpsLphLpcs ........................................................................................h.sh.ul........htlapsslp...................shphlpss.............pphs.p-hphht.hcLclpphRLhtWucss.....Gl.psttttp......................................................tp..h...tptlhplLtpltphhpph..pphp.p+ath.tst.ttt...............................................................................................................................htthpthhtp.tppp.........hphhp+hp......Ws....lhD+pp........hptllpphpthsspLpplh.....s.ttp.p.......................................................................................................................................... 0 27 88 173 +14331 PF14480 DNA_pol3_a_NI DNA polymerase III polC-type N-terminus I Eberhardt R re3 Pfam-B_853 (release 23.0) Family This is the first N-terminal domain, NI domain, of the DNA polymerase III polC subunit A that is found only in Firmicutes. DNA polymerase polC-type III enzyme functions as the 'replicase' in low G + C Gram-positive bacteria [1]. Purine asymmetry is a characteristic of organisms with a heterodimeric DNA polymerase III alpha-subunit constituted by polC which probably plays a direct role in the maintenance of strand-biased gene distribution; since, among prokaryotic genomes, the distribution of genes on the leading and lagging strands of the replication fork is known to be biased [2]. It has been predicted that the N-terminus of polC folds into two globular domains, NI and NII. A predicted patch of elecrostatic potential at the surface of this domain suggests a possible involvement in nucleic acid binding [3]. This domain is associated with DNA_pol3_alpha Pfam:PF07733 and DNA_pol3_a_NI Pfam:PF11490. 22.00 22.00 22.60 22.00 21.90 21.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.31 0.72 -4.34 58 1166 2011-10-31 08:26:54 2011-10-31 08:26:54 1 22 1163 0 129 779 4 76.00 29 5.33 NEW cpFphLhpplp..hssch...thhpsu.plp+lslpcps+pWcFplphcplLPhchapphpppLpps.Fpphs.plphplps ............ppFplLhsQlp...hssph.psthlpsu.pIc+lsVppts+hW-FHlshsplLPh-hahthpptLppp.Fpchu.psshplp.............. 0 42 81 105 +14332 PF14481 Fimbrial_PilY2 Type 4 fimbrial biogenesis protein PilY2 Jaroszewski L, Godzik A lukasz Pseudomonas aeruginosa PAO1 gene PA4555, JCSG target SP18988A Family Members of this family were experimentally shown to be involved in fimbrial biogenesis, but its exact role appears to be unknown. 25.00 25.00 118.80 118.60 22.00 20.30 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.41 0.71 -4.67 9 19 2011-10-31 17:44:58 2011-10-31 17:44:58 1 1 18 2 2 11 0 112.70 67 99.54 NEW MKsL.hL......LALAsPshsaAp-spTFEsuGVV.-VplEpsLVsIDtphYRLPNussps....GhPslFQV+PGSVVSaSGoVSpPhspIssIYIhKQhS...hu....cEp...tuE.ps+ .MKVLPML......LALAVPGLCWAE-PQTFEGAGVVFEVQVEKNLVDIDHRLYRLPNSTVRN....GMPSLFQVKPGSVVSYSGTVSQPWSTITDIYIHKQMSEQELAEMIEKEQPRQDGEEQPR.. 0 1 1 2 +14333 PF14482 Cut8_N Cut8 proteasome-binding domain Eberhardt R re3 Wood V Family In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome [1]. Cut8 comprises three functional domains. An N-terminal lysine-rich segment (this entry) which binds to the proteasome when ubiquitinated, a central dimerisation domain (Pfam:PF14483) and a C-terminal six-helix bundle (Pfam:PF08559), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding [2]. Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 [1]. Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome [1]. In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 [1]. Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum [1]. 22.00 22.00 23.70 23.70 21.40 20.50 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.34 0.72 -8.84 0.72 -3.97 19 98 2011-11-01 13:47:55 2011-11-01 13:47:55 1 3 96 0 68 85 0 55.20 33 17.09 NEW phpspps.tpR...KRpus--.......pp..hsP....ssp.t........h.spth.........................ts++Kh+.tsplpGpP ...........ppss.ssR...KRKA--D........t.sschohSP.......osSPu........hssRsls.........................sp++hR.psplsGRP. 0 10 27 51 +14334 PF14483 Cut8_M Cut8 dimerisation domain Eberhardt R re3 Wood V Family In Schizosaccharomyces pombe, Cut8 is a nuclear envelope protein that physically interacts with and tethers 26S proteasome in the nucleus resulting in the nuclear accumulation of proteasome [1]. Cut8 comprises three functional domains. An N-terminal lysine-rich segment (Pfam:PF14482) which binds to the proteasome when ubiquitinated, a central dimerisation domain (this entry) and a C-terminal six-helix bundle (Pfam:PF08559), which shows structural similarity to 14-3-3 phosphoprotein-binding domains. The six-helix bundle is necessary for liposome and cholesterol binding [2]. Cut8 is a proteasome substrate and the N-terminal segment is polyubiquitinated and functions as a degron tag. Ubiquitination of the amino N-terminal segment is essential to the function of Cut8 [1]. Lysine residues in the N-terminal segment of Cut8 are required for physical interaction with proteasome [1]. In fission yeast the function of Cut8 has been demonstrated to be regulated by ubiquitin-conjugating Rhp6/Ubc2/Rad6 and ligating enzymes Ubr1 [1]. Cut8 homologues have been identified in Drosophila melanogaster, Anopheles gambiae and Dictyostelium discoideum [1]. 22.00 22.00 22.30 22.30 21.60 21.50 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.25 0.72 -7.54 0.72 -4.15 19 126 2011-11-01 13:49:00 2011-11-01 13:49:00 1 5 124 3 91 116 0 37.70 39 11.94 NEW LPLsRLLEsLDpspLpslLpslsppHP-lupplhptsP ..LsLsRLLEoLDpspLpslLpslsc+HP-lspEVhppuP............. 0 14 41 74 +14335 PF14484 FISNA Fish-specific NACHT associated domain Eberhardt R re3 Jackhmmer:A5PF24 Family This domain is frequently found associated with the NACHT domain (Pfam:PF05729) in fish and other vertebrates [1]. 22.90 22.90 23.40 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.39 0.72 -9.26 0.72 -4.00 25 1153 2011-11-03 14:27:53 2011-11-03 14:27:53 1 57 42 0 826 627 0 71.30 49 11.79 NEW paKssl++KapslhEGsuppussshLNcIYT-LYIsEGcSspVNpEHEVh.QhEppu+ppssp-TPIpCpDIFp .............phKssLK+KapplFEGIuptGssTLLNcIYT-LYITEGtotpVNpEHEVR.QIEssS...Rp....p..............sp.-oslcs..cDlFc.................. 0 2 385 784 +14336 PF14485 DUF4431 Domain of unknown function (DUF4431) Coggill P pcc Jackhmmer:E7X0E8 Domain \N 24.00 24.00 24.50 24.00 22.90 21.20 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.00 0.72 -8.20 0.72 -4.35 23 127 2011-11-04 10:52:06 2011-11-04 10:52:06 1 4 112 0 11 66 0 49.00 47 13.24 NEW plQLsl.sssphshhcp.....hlGKcVpVs.GclhhupouaHaTPlLLs.lspls ..........h.VQLlL.sPEcashapp.....alGK+ITlp.G+VMlAcShaHhTPVLLs.lpc..p.... 0 4 7 9 +14337 PF14486 DUF4432 Domain of unknown function (DUF4432) Coggill P pcc JCSG_target390294_A6THE6 Family \N 32.00 32.00 32.30 32.40 31.50 31.30 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -11.59 0.70 -5.87 63 706 2012-10-02 23:57:29 2011-11-08 15:18:22 1 2 605 3 123 495 53 292.50 33 85.99 NEW scGschlplps...uGLphsllPsRGMDlh.....csphp...Ghs.luWpSPs.s.hlsPshhs...psGhGWLcuFs.shlspCGLptsGsPs..s.-s..........ut..............h..........sLHGRlustPAcplsh.ph..p...-..p.s.htlplpGplcEsphFGtsLpLppplpsphGps...plplpDplsNpust.spsh.hLYHhNhGhPlL-csuchh..................hPs.pplsst....s...spAtp...shssapphtsPps...s.a...sEpVahhc..tuDtsG.pshshlhNtph...shG...lslpFspppLPhhs.WKshss..tsYVhGLEPu.Tshshs.+sht+cpGplhtLtPGEs+saplphpl ................................s.pGlculclpN..upG.plhllPhhG....Ih.....cA..pac...Gps..Lshpshh..p....pPt.ht.......ph.lcoas.sahh+sG....L.....ts....GsPu....s.-D......................ha..........sLHGchsssshcc.shl.ch..c...s.....tlplsGchc.shs.F.Gcca.hspsslshchuSs...hFcIp.pVTNhu.sh.shPLQhhhHhNauas.......ssAphp.............................tPs..tphhsh....N....pch.hp.........G.s.s...hpols.tPph...h...............sEhVahhc...h.sc.......ps.psthth..hsscu........ps...hss+FsospLshhTpWhhhsup......ppshshuL...Pu.Tsp.P.-.G..hhst....pt.pGp.LhpLpPtpo+sFslohu................................................................................................................. 0 40 61 87 +14338 PF14487 DUF4433 Domain of unknown function (DUF4433) Bateman A agb Jackhmmer:A0YHY7 Family This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 201 and 230 amino acids in length. There is a single completely conserved residue E that may be functionally important. This family is distantly similar to Pfam:PF01885 suggesting these may be ADP-ribosylases. 22.50 22.50 23.30 24.20 21.90 20.90 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -11.41 0.71 -4.59 53 218 2011-11-08 17:28:40 2011-11-08 17:28:40 1 5 201 0 55 181 25 184.10 25 79.79 NEW aHIsHlcNLsSIlpsGhLhucsplhppthshssIu.spIpp+Rhtp.slsstsutplt-YVPF...YFsPRSsMLYtI.ppss.....htttp...psIlhLtssl.ptlst.....t.phsFo..susAusphschhs.............sL...ppLt.pl-Wshlpu........p.Wp.........pp+ct+QAEhLlc.pphPhphlppIsVhspshtspVpphlttts....................hp.sVplpss....WYa ...................................................................aHhTHhcNLssIl..psGhLhucstl....hshsplu.s.lhphRtt......sh.ssshhpDaVPF...Yh.s.spS.sMLasl.pttps...........pt........ttslVhLsssl.chlst...........thsasao..sssAss.sh..sphh.s..............pl......spLt..lcas.hlpt...................cpapps........t...p..s.sp.pcppAEhLlh.pplPhphlpt..lsshspphhpplpphlt.hs.............h...h.hpst..hYa............................. 0 20 35 44 +14339 PF14488 DUF4434 Domain of unknown function (DUF4434) Coggill P pcc JCSG_Target_393000_GS13553A Family \N 30.00 30.00 30.00 30.40 29.80 29.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.19 0.71 -4.52 39 420 2012-10-03 05:44:19 2011-11-09 11:13:28 1 8 375 0 32 218 17 167.10 53 52.21 NEW loGTFlp.hp.s........c...........pshssspWcpchpth+plGhcsll.....lp...............hsuhps.tshhP.oph......t..h......s...shlphhLstAc+hGMclahGlhhss....paWc..p...pshph.......p..hthspt........lhcE.lhp..haup.at..uFt..GWYlshElspt...s............hss.......ssth....ptls.ph.hcplus.......shPsh..lSsahsst ...................MKGIIWQPQNRDS............QVTDTQWQGLMSQLRLQGFDTLV.....LQ...............WTRYGD.....AFTQP.EQR..................sLLFKRAAAAQQAGL....KLIVGLNADP....EFFM.HQ...KQSSAA..............LESYLNRL.............LAAD...LQQA.RLWSA.sPG..lTPD..GWYISAEIDDL....N..............WRS............EAAR....QPLL.TW.LNNAQRLIS.DVSAKPVY..ISSFFAG.N...................... 0 10 23 26 +14340 PF14489 QueF QueF-like protein Bateman A agb PF01227 Family This protein is involved in the biosynthesis of queuosine. In some proteins this domain appears to be fused to Pfam:PF06508. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.67 0.72 -4.03 30 2821 2012-10-01 20:59:24 2011-11-09 15:46:40 1 7 2788 25 550 1754 742 79.10 46 38.24 NEW PDFAolhlcYhPspphlEhKSLKLYLhSFRscshFHEpssNpIhcDllpthcPcalcVhucFsPRGGlshslhsppsctss ...................PDauolhIpYhss.p...l-pcuLhhYLhSFRpHs.-FHEpClppIhs.DLlchh.pPchLpVau+.aT.RGGlsIsPaps.s...sh...................... 0 155 323 442 +14341 PF14490 HHH_4 Helix-hairpin-helix containing domain Bateman A agb Jackhmmer:A0YV56 Family This presumed domain contains at least one helix-hairpin-helix motif. This domain is often found in RecD helicases. 27.00 27.00 27.00 27.00 26.30 26.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.02 0.72 -4.22 189 1795 2012-10-03 02:11:09 2011-11-09 16:09:58 1 16 1618 4 349 1402 107 94.40 34 12.36 NEW shpcpps..hcclhhhLppaGlss.p..hAh+Iapp..Yu..s.....pul..cllccNPYpLsp-l......pGlGFppADpIAp....p.h......GlstcsspRlpAulhasLpp.s.t.ppGHsa .................................tppshcplhhhLpsaGlus..pluhpIapt..Yt..p.........co.l..pllcpNPYpLlcDI......cGIG...FppADplAc....p.l...................GIs.s..s...sscRl+Aulha.sLpp..sh.ppGcTY..................... 0 137 248 310 +14342 PF14491 DUF4435 Protein of unknown function (DUF4435) Bateman A agb Jackhmmer:A0YK45 Domain This presumed domain is functionally uncharacterised. This domain is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 285 and 362 amino acids in length. This domain is sometimes associated with AAA domains. 27.00 27.00 27.10 27.20 26.80 26.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.79 0.70 -4.54 54 311 2011-11-09 16:32:35 2011-11-09 16:32:35 1 5 276 0 51 265 2 230.50 19 66.66 NEW c.+lhsaVEuh..DD.h.aacslls..ph....ps.c...h...c.h.ap......................stGKcsVL.phhphh......ppt..sspshlh.hVDsDa..........Dhl.......h.tt.........ps.s...hla...pT.sYu.....IENhhs.hs.pulcclhsth.sls-..............hh.c..apphhppapctl..hslhl...a.hhhh......cpt.....phhshpps..l.ph..............ps.hphpph.hph.......phpplphp...hphpplpphh.p...th..............................c...ps...h.pplppclppl..s...ps..h.....p..hhh..pG+.....alh......hl.h...hlp.plhpph...pp..cppp .....................chhhaVEGh...cDh.aapsl...ls..ph....ps.c.....h.....p.h.h.......................stGKppVlts.thl.......psp......hspshlu...hlDsD.a.................Dhl......httt..............psp....lahT.sas..........lENhhh...hs......pslcclhs...ph.....phss.....p......th.......hc...h.p.t.hh..pp.hp....phl.hslhh.h.hhhh...ppt.............thpph..l.ph.............pphphpph...p......p.tphphp...thhpplpphh.p...th...............................p...t....h.pthppphpth.....t...ts..h..........h....hh..pGH......lhp..l.h..hhh...htp..........pp................................................................................................... 0 15 34 48 +14343 PF14492 EFG_II Elongation Factor G, domain II Bateman A agb Jackhmmer:A0YMC3 Domain This domain is found in Elongation Factor G. It shares a similar structure with domain V (Pfam:PF00679). 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.15 0.72 -4.13 129 11565 2012-10-02 20:07:24 2011-11-09 16:50:22 1 54 5577 56 3004 8396 3438 73.30 37 10.91 NEW hPcPVlshAlcP...cs...csDp-KlupuLp+lhcEDPohplppcpETspslluGMGELHL-lhl-Rl+ccatl-lplup ...........................PpPVlphu.lEPKs......psDp-Kh............sp...........ALp.+LscEDPoh+lps.....cp.E...o...u...p...pl.....luG.....h.......GELHL-lll.-.Rhc+Ea..p.l-spht.t............................. 0 1071 1868 2539 +14344 PF14493 HTH_40 Helix-turn-helix domain Bateman A agb Jackhmmer:A0YXF4 Domain This presumed domain is found at the C-terminus of a large number of helicase proteins. 27.00 27.00 27.00 27.30 26.90 26.70 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.76 0.72 -3.77 166 1114 2012-10-04 14:01:12 2011-11-09 17:27:19 1 26 983 0 236 851 57 91.20 26 16.58 NEW sotphThpLa....p.p..G.holc-IAp....pRs......LphuTI.sHLschhpp..Gt.....l....s....l....p....ph..l..sp.....c.....c.hppItpsh.......pp..hs...........................s...p.......pL+sl+E..tLspp.hoYtpI+lshs .................othhTaphh....p.p..G.holc-IAp....pRp......LphsTIpsHllchhhp..Gh.....h....s....h....p....ph......l.st.............-.....c..ptlh...p..hh.......pp...hp....................................s...p.........cL+.lK-..thst....p.loYhpl+lhl............................................................... 0 89 170 209 +14345 PF14494 DUF4436 Domain of unknown function (DUF4436) Coggill P pcc Pfam-B_6430 (release 25.0) Family This is a family of membrane and transmembrane proteins from mycobacterial and related species. The function is not known. 23.00 23.00 23.00 23.00 22.40 22.90 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.69 0.70 -5.09 15 176 2011-11-09 18:28:06 2011-11-09 18:28:06 1 3 96 0 47 101 5 244.90 40 77.13 NEW ushlhspo..ssshsps-suh..u-posVplslEclcossscLsVsVhVtPssuLlDschpsLssDluVRlpusss.u-lpascGphPushsss.lshsGchpsWPFDpYcoss......lss......-lhsGsGss+tshPupVchsGs.LsGWslshspsscss.........sssshplsLcRStuslsFslsIshVLIuLPslALhVAIphspsRRpFpPPhsTWaAAMLFAVVPLRNhLPGoPPhGuWIDhsVVLWVLlALVsuMllYIlsWWRph ...........................................................s..hhhpo...s...psshup..sDsTsVhl+hEpLpTltshLssplh.V.PssphlDpp.hp.sLpsDhoVp..lh.s.s.p.schpassGpLPu...hsss.lp.h.p..Gs....Pus..WPFDpYposs......lps...............plhhGu...u.............+t..h.s......pVp...hssp..L....G..Wslshs..t.Vu-us.................tssshhlsL+.....Ruhusls...........F..slsIshVL.Is.Lsslu...L.F.V.Al..QhhpGR.RpF...Q...PP...h..sTWYAAMLFAVlPLRNhLPGuP.P.....h.GuWIDhsVVlWVllALssuMVlYIlsWahch....................... 0 27 37 43 +14346 PF14495 Cytochrom_C550 Cytochrome c-550 domain Bateman A agb Jackhmmer:Q55013 Domain This domain is a heme binding cytochrome known as cytochrome c550, or cytochrome c549, or PsbV [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.73 0.71 -4.71 49 117 2012-10-03 10:02:11 2011-11-10 14:07:10 1 1 98 25 35 180 43 133.90 48 82.64 NEW hcLDc.........pscTVsL.......sssGcTlsloscQlppGK+LFsssCupCHsGGl...TKTNPNVuLs.-sLuhAoPsRDNltuLVDYhKsPToYDGpcsIuElHPuhcSuDlaPcMRsLo--DLhslAGaILlpPKlhuppWG....GGK........lYa .......................hpLscpohTVslsspGpTlsloscQlppG++LFsssCupCHsGGl...TKTNPNVuLs.EsLuh.A..T.Ps..RDNltuLVDYhKsPToYDGp-sIuElHPSlcSuDlaPcMRsLT--DLhslAGaILlpPKlsuppWGGGKh.................................................... 0 10 26 33 +14347 PF14496 NEL C-terminal novel E3 ligase, LRR-interacting Coggill P pcc Jackhmmer:E7K2H2_PDB:3ckd Domain This NEL or novel E3 ligase domain is found at the C-terminus of bacterial virulence factors. Its sequence is different from those of the eukaryotic HECT and RING-finger E3 ligases, and it subverts the host ubiquitination process. At the N-terminus of the family-members there is a series of LRR repeats, and the NEL domain interacts with the most N-terminal repeat. The key residue for the ligation step is the cysteine, eg found at position 386 in UniProtKB:E7K2H2. The LRR section sequesters this active site until invasion has occurred [1]. 23.00 23.00 23.10 23.40 22.30 22.40 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.43 0.70 -5.06 37 676 2011-11-10 15:41:18 2011-11-10 15:41:18 1 44 233 6 57 677 1 198.20 45 29.38 NEW WLtssstps.p..phttWpshppEpsussFsphLscLppossaptts.....shpppVhphLpthspcspLRpphFshAtssp......toCpDplsltasshchshhl....tsscssph-ss.....LlphuRphaRL-tL-plApc+ltph.t...............sD-lEVhLAapspLtcsLsLss..spcMhahslSslopsDltpApspV.........ppppspthhpalup.pshWcshLcchpssca ...................................................Wh.s.pcps.....sp.W+uF..-pEppA..ssFStFLDRLu-T.ss+pss...........sF+cQVuAW....LtpLus...........s....spL..RppsFsl..At-AT.......tSCEDR.VsLsasphcpshLV....H.pApcGhaDsch..stLlshGREhFRLE.LEsIAR-..KV+pL..h............................................h...lDEI...EVaLAaQsh....Lt-pLpLoo.hsp-MRFas...VS.GVTssDLcsAEthV.........+stEpp-FpcWhuh.WuPWHsVLcRpts-ca...................................................... 0 4 12 27 +14348 PF14497 GST_C_3 Glutathione S-transferase, C-terminal domain Bateman A agb Jackhmmer:Q7WQ90 Domain This domain is closely related to Pfam:PF00043. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.62 0.72 -3.52 199 1536 2012-10-03 01:14:49 2011-11-11 11:56:35 1 53 641 37 886 5329 1288 117.80 14 30.20 NEW tpthphsh........hs.....Whs..................................................................tptsppcl.ptlphh.....sptL...stps...............alhGsp..h..................ohsDl....s...las....hl........tshp..at.....h..................tsh.......................p......lhp........ahsplpp .....................................................................................................................t.........................................................................................................t..t....t...h...t...p...ts...p...p...c...l....p....h.h.pth..........pptL.........spps..................alhGsp.....................ohADl......s........l.au..........hL...................tsht......hs.....hs...........................pphs.......................sltpahpplp.............................................. 0 254 427 678 +14349 PF14498 Glyco_hyd_65N_2 Glycosyl hydrolase family 65, N-terminal domain Bateman A agb Jackhmmer:B5CKV7 Domain This domain represents a domain found to the N-terminus of the glycosyl hydrolase 65 family catalytic domain. 27.00 27.00 27.30 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.62 0.70 -4.68 86 1312 2012-10-02 23:57:29 2011-11-11 13:22:47 1 57 676 9 328 1063 38 230.20 29 29.24 NEW LaaspP...............Asp..................Wp......cuLPlGNGplGAhlaGsss.....pEplplN-colWoG..uspssps.s..............................s.thlttlRphlhpscht.........pupplhtphhtstst.................t..YpshGsLhls............ts...pssss.............sYpRpLDLssulssssaph..su.............spa..........pR-hFsSh...PD.........sVlV......h+l.puspsttl.shslplsss...................psssssspssplphpGph.....................tssulpaputl+l.hssuGsl...tsps................sslplp....sAsplslllsssTsapsp ..........................................................LhYppP...................Asp.................................Wp...-ALPlGNGpLGAhlaGshs.....pEplQhNEco.lWsG......ustsps.sst..............................th.hL.p.lRp....hl....p.schp...........................tAp.p.Lhppphhu..p...........................up.Y.shGclhlc........................ths...tspho.......................sYpRpL-lspAlus..ssaph..su.......................spa...........pREhFsSh.....sc...................slll......h+h.ous.t........t.p.......l..sh..s..lplsps.......................................ppthhshsssplhh.p.Gps......................................................tpsslp..at....sh..h..th...htss.Gpl......ps.s....................spltlp...sAs.psslhlsutTsat..................................................................................................................... 0 123 253 301 +14350 PF14499 DUF4437 Domain of unknown function (DUF4437) Eberhardt R re3 Jackhmmer:B2J7U5 Family This family of proteins is found in bacteria. Proteins in this family are typically between 152 and 283 amino acids in length. 24.00 24.00 24.00 24.00 23.90 23.70 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.45 0.70 -5.47 17 81 2012-10-10 13:59:34 2011-11-14 11:54:57 1 2 76 2 32 109 79 171.00 22 81.08 NEW -llhus-lcWshLNPhRG-tuPuAupLWGDRsssssoGhLV+FpcGFpSPPHIHNloYRGlVIpGtlHNDD.cAtphWhPsGSFWTQPAG....EsHlTAAcuspslhYlEIssGPYLV+PsppsFDsGERPlNlcpsNlVWLsspclsWlpss.......usphsaLWssssstthpGhhl+LPsuFpGplpopussh+AVllpGplsap..p.spspsLtsGSYhsustc..Hpl..ps-pssslYlRosGcaplp ..................................................usthusLaGD.tppushshhl+hssGapssPHhHshs.p.h.hVIpG...t...h......h...s.....s...c....s.t.......hssGuaah.PuG....t.Hhshspst.p.h....h.htpsPh...............................................................................................................................................................h............................................. 0 9 18 26 +14351 PF14500 MMS19_N Dos2-interacting transcription regulator of RNA-Pol-II Wood V, Coggill P pcc PF12460 Domain This domain, along with the C-terminal part, Pfam:PF12460 [1], is an essential component of a silencing complex in fission yeast that contains Dos2, Rik1, Mms19 and Cdc20 (the catalytic subunit of DNA polymerase-epsilon). This complex regulates RNA polymerase II (RNA Pol II) activity in heterochromatin and is required for DNA replication and heterochromatin assembly [2]. 21.00 21.00 21.00 21.00 20.80 20.90 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.65 0.70 -5.13 55 354 2011-11-14 16:18:51 2011-11-14 16:18:51 1 16 268 0 250 358 4 225.50 28 24.04 NEW VpsLGphLTsp-sthRs+ulp.hLosVLppLs.ssh..LscpplplLhsFYps.......Rl..cDptslhss.LpGltuLs.phpp.......hstst...stpllculhpchps..puhhpssRhtsapllpslhppatctL......pshus-Flh.GhlphhsGEKDPRNLllsFplhp.hlhppa.s.............l.spasE-LF-lhhCYFPIsFpPPsNDP.h.sITp-DLptsLcsslsuoshFAphshPhLlEKLsSos.s..sKhDoLpsLttChpsa...sss.slppahhslWsuLKhElhps ................................ht..lts...p..hRt+.uhp.hLstll.th...ph.............Lpppp.....l.thLhtFat.s.......+l..p.Dtth.l..s..hpul.......ttL....h.tt................................hs.s.......s.plh.p.sl..h.p......phps....ps........h.t...t.p...Rhtsapllp.th..h.pp..hp..t.tl......................pths.s..p..ahh..shlp.hhsGE+DPRNLhlhFplhp....hlhppas........................................h..sthsE-lF-sh.sYFPIsFpPss...s.Ds..h...sIotc-Lt.sLptsls.usshF.A......hshPhLl-Kls...Ss.....s......sK..h.....DsLpsL...t..Ch...t....Y...s.....p..pl.ttah.plWsul+hElh..s.......................................................... 0 84 137 208 +14352 PF14501 HATPase_c_5 GHKL domain Bateman A agb Jackhmmer:B5CQB8 Domain This family represents the structurally related ATPase domains of histidine kinase, DNA gyrase B and HSP90. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.36 0.72 -4.30 80 2619 2012-10-11 19:05:54 2011-11-14 17:49:38 1 17 1244 0 315 12868 2588 102.70 24 25.87 NEW l.pshDlsslhuNlLDNAIEA....spc.........h..p..p...c..+h.Iplpht.t.pp..sh..lhIplc.N.s.....hss.....................ph......sp.....hh......oo..K.p.c.tphH.GhGlpSlcphlcKY.sG.slp..hp.hcss..hFphplhls ..........................................h.p.hDls.pll.u.llDNAI.E..u......u.tc.......................h..p.............pth....l...p...l..s..h.h.....p....p....p.......sp.........h......h.........h.......h.....l.p...Ns........hpp.............................sh..........sc...........lh..........sh....p....s......c.....s..p.....s+....G..l..G..L..p.sl...cc...l.l.c.....p...a...ss.....l..p....h..p....h...p...s..s.....h.F.p...plhl........................................... 0 158 232 280 +14353 PF14502 HTH_41 Helix-turn-helix domain Eberhardt R re3 Jackhmmer:D2A9I0 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -7.96 0.72 -4.34 19 338 2012-10-04 14:01:12 2011-11-15 11:51:03 1 2 335 0 28 586 65 47.90 75 16.35 NEW psG-RlpTIsEhucchsluhGslQsALKhLcspsAlpL-pRG+.GTal ...........KCGNRLKTIDELATECRSSVGLTQAALKTLESSGA.IRIER.RGRN.G.SYL........... 0 8 14 23 +14354 PF14503 YhfZ_C YhfZ C-terminal domain Eberhardt R re3 Jackhmmer:D2A9I0 Family This domain is often found in association with the helix-turn-helix domain HTH_41 (Pfam:PF14502). It includes YhfZ proteins from Escherichia coli and Shigella flexneri. 27.80 27.80 27.90 49.80 26.90 27.70 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.42 0.70 -5.02 20 369 2011-11-15 11:54:42 2011-11-15 11:54:42 1 2 339 2 28 144 1 210.60 73 75.69 NEW chphLLphAslsslVssMPLPYS++YEGLATGLpppFcp..slPh...hAaMRGustRlcsLpsGhYDaAllS+LAAcpalcp...cslclshphGs.poYVupHhlla+csppppI....hRlGlDssShDQplLTchhhc.scclEhVElsYsphlptlhpGpIDAslWNh-..chchps.hslphhslpp...pp..hhpcsocAVlllcp-spslppllpphVstcpllphQpcVlpschhPsY ...DNKALLoHVDINNVVCAMPLPYTRLYEGLASGLKAQFD...GIPFYYAHMRGADIRVECLLNG..VYDMAVVSRLAAESYLTQ...KGLCLALELGP.HTYVGEHQLIC.RKG...ESANV.....KRVGLDNRSADQKIMTDVFFG....s.S.DVERVDLSYHESLQRIVKGDVDAVIWNVV.AENELT.....M..LGLEA..TPLTD...DP.RFLQATEAVVLTRsDDYPMQQLLRAVVDKHALLAHQQRVVSGEQEPSY......... 0 8 14 23 +14355 PF14504 CAP_assoc_N CAP-associated N-terminal Coggill P pcc JCSG:target_417453-SP18049A Domain The function of this domain is unknown, but it is found towards the N-terminus of bacterial proteins carrying the CAP domain, Pfam:PF00188. All members that do not otherwise carry an additional Cu_amine_oxidN1, Pfam:PF07833, domain are likely to be extracellular as they start with a signal-peptide. Most other non-bacterial proteins with the CAP domain are allergenic [1]. 24.30 24.30 24.40 25.00 23.90 23.40 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.76 0.71 -4.46 63 757 2011-11-15 12:29:15 2011-11-15 12:29:15 1 5 529 0 63 374 0 139.40 32 41.03 NEW IGpstpplppphG.pPpRhpsspYGacWalYpppstp.Yl.luh.pc..s+VsulYssuspls..hsshclGpstpplhcphshpPphslptssppYphchscc-h.hcslhph.sshY.splahDpa.ssplsul+hlsccshhchpsYt ...................lGpshcslpppaG.pPc...Rlhssta.GachYsYppcspp.YhhVuh.tc..c+VsulYsssptls..luPlKlspppuclh.p.+hulpPEhshphspppYchEh.-c-h.hpsll+h.sclYAplaaDpp..ssplhuVphlscphhsclcPY.................................. 0 21 43 57 +14356 PF14505 DUF4438 Domain of unknown function (DUF4438) Eberhardt R re3 Jackhmmer:D2C4U6 Family \N 27.00 27.00 319.50 319.20 21.60 17.70 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.62 0.70 -5.61 31 62 2011-11-15 14:52:22 2011-11-15 14:52:22 1 1 58 35 28 65 22 259.40 51 88.24 NEW sYcVst-GpshlLPusGGITYNVplGDsshGhsGDHlEPGVSh....+s...sscp......NsuLphhuClGNpAcVlSG-AKGtpGhVTG+HGGl.-HVlVpFscEshEKLslsDpIhI+AhGQGL+LhDaP-lplhNlDPcLLc+hsIcp..ccGtLcVPVsshlPAhlMGSGlGussshpGDYDIhTsDpcssccaGlccLRFGDlVAlhDpDNcaG.RtYpcGAloIGVVVHSDChpuGHGPGlTslMTutsutIcPhlDspANIAshL ....YcVst-GpshllPusGGIoYNlplGDsshGhsGDHlEPGVSh.......+s.......sscp.NsuL.hhuCIGNpApVlSG-AKGtpGhVTG+HGGl.-HVlVpFsc-shEKlslsD+IhI+AhGQGL+LhDaP-lplhNlDPpLL-+hsIpE..ccGtLcVPVsshVPAalMGSGlGuusshpGDYDIMTsDtcssccaGlccLRFGDlVAltDpDNpaG.+tY++GAloIGlVVHSDChpAGHGPGVTslMTutsutIcPhlDspANIAshL.. 0 8 16 25 +14357 PF14506 CppA_N CppA N-terminal Eberhardt R re3 Jackhmmer:E0TM64 Family This is the N-terminal domain of the CppA protein found in species of Streptococcus. CppA is a putative C3-glycoprotein degrading proteinase, involved in pathogenicity [1,2]. It is often found associated with Pfam:PF14507. 25.00 25.00 25.00 25.20 24.90 24.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.50 0.71 -4.45 12 394 2012-10-02 15:00:03 2011-11-16 09:00:13 1 2 368 4 25 213 1 117.00 61 50.78 NEW hhsPVL+VNNRclN.sFYppoLGhKhL.EEsAhh.hus.t.ptt-+hllEESPShRTRtV-GsKKLscllIKsssPpEIEtLLApGsp.hcpLFKGppGYAFEslSPEsDhhLlHAEDDlppLp.lt- .........p.IlPsLKsNNRcLNpsFYhcTLGMKsLLEEuAFlS..LGD.Q.oGhEKLVLE.EuPSMRTR......+VEGhKKLu+llVKVpNP.hEIEulLu+s...c.u...l.....c+LYK....Gp....NGYAFElh.SPEsDLlLlHAE..DDhssLhclt.c....................................................... 0 2 7 15 +14358 PF14507 CppA_C CppA C-terminal Eberhardt R re3 Jackhmmer:E0TM64 Family This is the C-terminal domain of the CppA protein found in species of Streptococcus. CppA is a putative C3-glycoprotein degrading proteinase, involved in pathogenicity [1,2]. It is often found associated with Pfam:PF14506. 25.00 25.00 28.40 25.60 22.10 24.60 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.43 0.72 -3.83 13 384 2011-11-16 09:02:29 2011-11-16 09:02:29 1 4 382 4 26 177 0 99.50 49 40.97 NEW LSpFpl-slsLNVPs..c.ucuFYcs...splshsl-FppupGsDLslssslTWDLEhLEapVs.-aDlsuL+sphEs..pps..YlDK+cKlLVhoDsSpIElWFp .............................................LSpFEI.ShELplPs..c.hcuFL-s............................scl.............s........soLcFl.AQGpDLsVDNslTWDLoMLKFhVs.-hDlAuLRp+FEu..p-a..FIPKS-KFhLscDpsNlELWFE.. 0 2 8 16 +14359 PF14508 GH97_N Glycosyl-hydrolase 97 N-terminal Coggill P pcc PF10566-seed Domain This N-terminal domain of glycosyl-hydrolase-97 [1]contributes part of the active site pocket. It is also important for contact with the catalytic and C-terminal domains of the whole [2,3]. 25.00 25.00 29.50 29.50 22.50 21.80 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.95 0.70 -5.41 243 798 2011-11-16 12:22:34 2011-11-16 12:22:34 1 17 279 12 179 796 147 258.40 27 38.27 NEW lsSPDGplplsl.......shs.s............p.sYslpa.....puctllpsStLGlphptss........................hspshp.lhssppssh.ccsap.sh.GcppplcscaNElslshp.ptts..............t.htlpFRlasDGlAFRY.p..hstp.tsht.......hhl.sEtTpFsh..ssstpuah.....hs.t.t................hs.........saEt.Yppsslsph............pt.............................hsphPlhhcst..s.s..halslpEAsLhs.YsuhtLp.ss......ts.shpuths.ssts....................t.tsh.h..........psshsoPWRslhlucssssllp.osllhsLs.-P ............lpSPDGplplsh......slsts............pssYplsa........cscsllpsStLGlphpsss.............................h.spshp..lpssp.ps.s..D-sap.sh.Gcpppl+scYNElslshppsts..............tth.lpFRlasDGluFRYp......hPpp..tsht.......hhlpcEtTpFsh..s.sst..p..uahhs.s..s...................ap.................saEt.Yppst.l.sch.........pt.............................hsphPlh.hcss...c..G...halslpEAuLh...s..Ys..uhpLp.ss......pstshpuths.ssts............................s.tsh.h......psshsoPWRslhluccstsllp..oslhhsLscP............ 1 71 152 171 +14360 PF14509 GH97_C Glycosyl-hydrolase 97 C-terminal, oligomerisation Coggill P pcc PF10566-seed Domain Glycosyl-hydrolase-97 is made up of three tightly linked and highly conserved globular domains. The C-terminal domain is found to be necessary for oligomerisation of the whole molecule in order to create the active-site pocket and the Ca++-binding site. 25.00 25.00 40.20 38.70 20.50 24.80 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.13 0.72 -3.92 195 791 2011-11-16 12:46:07 2011-11-16 12:46:07 1 17 276 12 177 794 106 99.50 33 14.68 NEW ssWD-TchLpuc.sG-YlslAR+s.......ss.sWalGuhospsuRs..lplsLs.FLs.pG..tp.YpAplYpDG.sAsh......sspsa...plpp...ppV.sspspL.plphAsGGGhAlplh.. ....ssWD-o+hLcuc.sG-......YlslAR+p...........us.sWalGulss..p..psRs..lplsLs...FLs..tG..............pYpA....plYpDutsAph.......sspsh...........phcp............tpV..ssp.spL..plphusuGGhAlplh............................. 0 71 153 169 +14361 PF14510 ABC_trans_N ABC-transporter extracellular N-terminal Coggill P pcc Pfam-B_101 (release 25.0) Domain This domain is found at the N-terminus of ABC-transporter proteins from fungi, plants to higher eukaryotes. It would appear to be an extracellular domain. 25.40 25.40 25.60 25.50 25.30 25.20 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.63 0.72 -3.89 317 1186 2011-11-16 17:21:30 2011-11-16 17:21:30 1 43 200 0 805 1238 0 101.70 20 7.47 NEW Lspphsp......................................................t..............D.....s..pL..s.........spsc............s.F-.scpal+plhchhspsuhp.h....phGV.sFcsLsVpG.u..suschtsTl.sNhhhs .........................................................................................................................................................................................D.....tpL..s...........spsc.......................................p.ac.s.cpal.c..p..h...t...c.t...h.c..p..s......Ghph.....plGV.tacsLsVpG.u..sssp.h.sTl.hNhhh........... 0 147 445 691 +14362 PF14511 RE_EcoO109I Type II restriction endonuclease EcoO109I Coggill P, Eberhardt R pcc pdb_1wtd-Jackhmmer:Q9RPJ3 Family This is a family of Type II restriction endonucleases. 22.50 22.50 22.90 23.20 20.40 20.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.10 0.71 -4.91 22 53 2012-10-11 20:44:47 2011-11-17 13:56:36 1 2 52 4 15 47 3 194.80 28 76.72 NEW pl.ppscpalcppIsphHp+phppL.ppLcLhclLc.KNPaLF+.spslLous-llcullcAa.....LuSs-EThFGs.alEsLAhalsptshts.p......SshpGIDlEF.....ppDup+hhlslKSGPNhhNusplpphpscF+pstphlRos.t.shphsslsGssYGccpp.sps....................cYhchsGQcFWphlo.G-cshYhclI-slGctAcEps ..................................................t....thpphht..I...p.+thp.h.pphp..pl....NPaLac.hpsl.sup-hsculhcAh.....luouc-TsFGs.hhppLhhhlspssh.................................SshsuhDlEh.....ppDGphhllplKSGsNshNpspl....pplp....pcF+phtsphR.....shs.psl.sGlsYGcssp.sst.p.h...........................t.cYhhlsGpcFWpalo.G-cshYtclhcsls.hApEhs............. 0 5 11 14 +14363 PF14512 TM1586_NiRdase Putative TM nitroreductase Coggill P pcc Jackhmmer:Q9X1S2_pdb:1vkw Family Compared with the more traditional NADH oxidase/flavin reductase family, this family is a duplication, consisting of two similar domains arranged as the subunits of the dimeric NADH oxidase/flavin reductase with one conserved active site. 27.00 27.00 27.00 27.10 26.90 26.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.41 0.70 -11.35 0.70 -5.14 5 84 2012-10-02 14:48:17 2011-11-17 16:31:20 1 1 81 1 19 82 2 203.90 23 84.37 NEW MNIFEAIENRHSVRDFLERKMPERVKDDIENLLVKFITKKLDWKINLSSFPSYIYAKAEKHFDELVEYGFQGEQIVLFLTAQGFGTCWMARSPHPDVPYIIVFGYPRTRNFTRKRRPITSFLENDLEELPPEIVKIVEMTILAPSALNRQPWKIKYTGGELCISSERPVDLGIALSHAYLTAREIFKREPVIQKREEDTYCLILNP .......................................................................phh-hh..R+SVRpa......pctl........s......pp.......l......h.....p......pl...........p..........s..........l...................h....p....h............l.......h...c.p.......h.....t....h..p....h....p....h................s...............p..........h......sp...Y.....l...........s.hh...ucc.pt.........s...h...h.p.............p.hGYhuEplVLhhpslGLuTCW...l.........u........h........s.....................p.....t...........c.............l.........s.......h..l..I..s..hGa...s........p.s..p....sh......s..c.....+..........p..+.sh.......pp..h.hp...t.............h....p-..hs..p.......h..h..p.hlcushLAPoAhNpQ.a.......h..h.........h.p.s.s..pl...........h.........h..............................................................................................hthttt............................................ 0 13 19 19 +14364 PF14513 DAG_kinase_N Diacylglycerol kinase N-terminus Eberhardt R re3 Jackhmmer:P23743 Family This domain is found at the N-terminus of diacylglycerol kinases. 23.00 23.00 23.00 23.00 22.80 22.80 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.00 0.71 -3.97 9 283 2011-11-18 15:00:42 2011-11-18 15:00:42 1 30 72 2 119 251 0 118.70 37 20.79 NEW tpccWspLSPpEFsQLQKYsEYSTKKlKDVLcEFptsGshspYsPc-.......sIsaEGFchFMKsYLEs-.lPccLspHLFhSFps+.pp..................................ss.sscSKst.hpush+.................p.ssVlpLKDlVCYLSLLEsGR ................................................ph..loPt-FtQLQcY....-..Y.S.oK.KlpDVLptF......t..s..G...h....t....pY...tt...................sIsa-GFchFh+saLEs-.lPpchst+LFhSFppp....p............................................................................................................................s.t.....t...tp..................t..........................................h..l.LpDlsCYhSLLEtGp.................................................................... 0 22 32 67 +14365 PF14514 TetR_C_9 Transcriptional regulator, TetR, C-terminal Coggill P pcc pdb_2qtq Domain This family comprises proteins that belong to the TetR family of transcriptional regulators. This family features the C-terminal region of these sequences, which does not include the N-terminal helix-turn-helix. 25.30 25.30 25.30 25.30 25.20 25.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.63 0.71 -4.33 4 174 2012-10-03 00:15:22 2011-11-21 15:07:58 1 2 158 5 45 121 5 127.00 28 58.91 NEW DMSPpAKLRRHlutsIcTYacYPYLpRLLhtLhRDusEtpARcIAcpYlpPLtcAYpRhIptGVtsGsFRPlDPQLFYFsVhGAsDphFSuRhVL+asaG.DplsEpLRcpYpEpsl-hIMuGlLA..tc .......................................................s.....tthphhlptl.hctYh....ch....P.h.RLl...p.s....hpp...st..tt....t...p.ppl.....lcp.....hlt..P.lscslp.sll.EcGlctGlhR.p.lDP.hhalolhuLshah.a...sp.sLttla.Gh-..hup.phhcphlctshpLlhtGh......th........................... 0 12 24 34 +14366 PF14515 HOASN Haem-oxygenase-associated N-terminal helices Coggill P pcc Jackhmmer:Q9HY91, pdb_3bjdA Domain This domain represents a pair of alpha helices, which are found at the N-terminus of some Haem-oxygenase globular domain. 25.00 25.00 123.70 121.40 21.00 19.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.01 0.72 -3.61 2 32 2011-11-21 16:17:14 2011-11-21 16:17:14 1 2 16 3 2 30 0 86.30 68 26.25 NEW hopRshS..th.....stutllA-ALtsuAohcQIp.AhpAlhAlTtKuLtGDtpAYAtYQtLLh.LplusDs.Tt.TRRWhAstlYhVE-RF .hspRshS.Pth.....ptutllA-ALtstAo.cQIs.AhptlhAlsttGLtGDtpAYAtYQtLLh.LplusDstTt.TRRWhAptlYhVE-RF 0 2 2 2 +14367 PF14516 AAA_35 AAA-like domain Bateman A agb Jackhmmer:A0YLR5 Family This family of proteins are part of the AAA superfamily. 27.00 27.00 27.00 27.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.97 0.70 -5.80 99 465 2012-10-05 12:31:09 2011-11-22 16:51:27 1 74 91 0 172 633 10 318.70 25 46.18 NEW hPpGsls.LsSsFYlERs.....PlEppsYppIhpP.Gu...LIRIKAP+pMGKTSLlsRlLspAppp....uYpoV.LshppADpsl.hssL-+....FLRWFCsslo+pLpLt.....s..pL.....--YWD..-thG..SKhsC.otYFccYLLppl........s.pPLVLuLDEVDclFpaPclApDFhsLLRuWHE.cu+....ppplWpKLRLllVHS..TElYls.LslNQSPF.NVGLsIcLscFohpQVp-LApRasLs.hstsp.....lppLMshVGGHPYLl+LALYalsp..............p...plT........LcpLLppAsTps..G.IYscHL+cphhpL.pppP....-LspAhppVlp.us.pslpL..-shtuaKLcShGLVphp..GsplhspCp.LYRpYFpc ..............................................................................h...s.l..ss...Yl.R..........h-p.hhptl.....hp..Gp...h.hhlhssRQMGKoS.L....h.hch...h.....pphp.p.p..............s.hts...s..h....ls.hp.t...h..s...s....t............hs.s...hpp.............ah.p..hh...s..tpl...sp..p...l..p..L................................t......pl.......p..ph...W.p....c..p..hs.........sh...ph.....sp........ahp..p..h..l..Lt...ph...............s..ps...l....V.lhlD....E....l..D...p...l...h....ph......s.....h..h.......p.-F...h...shlRthap....ppt.......p.ps.a.p+.Lphsl....ht....s.......Tp.s......l......h.....hc........h.......s......p......oP......F.....Nl..G.h..s.lc....Ls..sF....sh.pp....l....p.s....L....h....p....t....h....t....h.p.....hspsp......lpplh.t.hs....uG..p.P.aLspth.hhhl.sp..........................................................p....phs................lpp.l.l.pp....t.h.h...ps.......t.......s.....p...HL....cp.h......h..ppL.....p.p...............cLht....hhp......pllp.........t.....t.....p..............h..t.h..........p.....s.......t.t..h..pL....h.GLlhhp...ts.tlh.ts..lYpthFt........................................................................................................ 0 31 126 171 +14368 PF14517 Tachylectin Tachylectin Eberhardt R re3 Jackhmmer:Q27084 Family This family of lectins binds N-acetylglucosamine and N-acetylgalactosamine and may be involved in innate immunity [1-3]. It has a five-bladed beta-propeller structure with five carbohydrate-binding sites, one per beta sheet [2]. 25.50 25.50 25.50 25.70 25.30 25.20 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.04 0.70 -11.88 0.70 -5.03 8 90 2011-11-23 07:54:36 2011-11-23 07:54:36 1 9 23 1 14 107 1 124.10 40 56.07 NEW auVspDhclptGssPcstsDsa...hsRAsplG+..hsshchlhhuPsGcLYuVcuu...pLYpG.pshsosuspWh.upu++IGcGuWspF+......FlhFDPsGlLYAVotsGpL..YRussPss-spsWhttpAshIGspGWssF-sLFFcPpGhLYuVs.scs..cLhKph.PPsussDcWLu.uoollupuuW.cs.s+FluFossGsLauV.cssGtl..Y..RstsPppssssYhccAphlGs.uaspa+h .....................................................................................................lYulp.ss....hYpt.....pps..ssph....h.tss.....thIGpu.WttFp......hlhh.ssG.LY..GVss.s..pF..Y+RsP..PTHuSDNWL..G..SAchIGoGGW+s...FphLh.................................................................................................................s............................ 0 6 9 14 +14369 PF14518 Haem_oxygenas_2 Iron-containing redox enzyme Coggill P pcc Pfam-B_412 (release 26.0) Family The CADD, Chlamydia protein associating with death domains, crystal structure reveals a dimer of seven-helical bundles. Each bundle contains a di-iron centre adjacent to an internal cavity that forms an active site similar to that of methane mono-oxygenase hydrolase [1]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.40 0.72 -3.87 140 559 2012-10-02 21:56:19 2011-11-23 09:10:08 1 8 409 3 220 662 254 104.50 24 27.19 NEW Lhp.....shhD.Eh.G.sGp.scp.h................HspLa.......tchhpshGlsss.h.........shhct..hs.s.....t.....sl.u.....hsNhh..hhuh.p.+.p.hh.sthlGths.s.hE....hsss..h....hpphsp....u.lc.R.h.G...h.stts...hpaas.Hlp .....................hhpshh-.E.h.G.....sGp..pp.s.................HspLa.......tcllpshGls.s.h..............................thhpt.hs..s..........t........sh..t..........hs.shhh.hhsh.p.c.phh.sthlGhhh.uhE....hhss.t...hp.phsp....sLc.c...h.G.....h.s..t...htaashHlp............................... 0 52 127 189 +14370 PF14519 Macro_2 Macro-like domain Coggill P pcc Jachmmer_A6ZME4, pdb_1njr Domain This domain is an ADP-ribose binding module. It is found in a number of yeast proteins. 27.00 27.00 27.00 27.00 26.60 26.90 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.59 0.70 -11.93 0.70 -5.29 3 54 2012-10-02 00:07:53 2011-11-23 11:38:38 1 1 53 3 34 65 2 231.40 30 93.76 NEW MTts.s++p.hN...phRIVLCDTNEVVssLW+KhlPKuLlpssKhVClHHGsLESLhcSMRKscspH..........sGcKYAIVSPGNSFGYLGGGFDLALaNYFGGKPFETWFRscLGNRYHTVGSATVIDLpRChLsch.EsRDGIRYIIHVPTVVAPS+PIFpcupPLKTGYEPVFNAMWNSLMHuP+DlDGLIIPGLCTGYAGVPP-ISCKSMAFALoLYMLsD+ISKEL+NlLIMYYLGYPFEPFFLESCpEECQtLGIDIEpLcSFNVE+DsI-tLIP++lLs..L ................................................................................................................hp...h................................spsh...uIVSPuNSaGahGGGFDhAlh.......p.........h.F..G.....s.....+....s..h...E......s...hhR..ppl..s......sc........Y.......t....s...l...GS.sTl..lcLt.......cth.t.p.............tptp.......sl+YllcsPThlsP...pts......has.p...tshps.shp.l.....Fs.s.hW...N....sl......h.........p.s....s.p.c.I-sLllPGLsTGauGVss.lus+pMsFAlpLah...h...t..........................................................................th.................................................................. 0 6 17 29 +14371 PF14520 HHH_5 Helix-hairpin-helix domain Bateman A agb Jackhmmer:C6UUJ1 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.31 0.72 -8.79 0.72 -3.80 554 12686 2012-10-03 02:11:09 2011-11-23 13:36:33 1 143 4845 77 3277 11596 4688 61.00 23 15.86 NEW p..th..............h..p...t....L..h.......s...l.........sG........lG......s.p......hu.tt...l.............hpt..........ht...............................................o...l..p....p.l....t.p..............s......s.....h..c.p.........................................Lt....p....l....................G...............................l....u..p.c...pAppl...l.tph+.c .............................................................................h....ppl..h......s......l.....sG........lG..sc......hs.tt...L...........................hsp........t..ht...............................................o....h..p....c.l....t.p......................h......s.....h..c..c...................................................Lh....p.....l...................................tG..................................l.....u..cc...sAppl...l..thc........................................................................................................................................... 0 1019 2066 2740 +14372 PF14521 Aspzincin_M35 Lysine-specific metallo-endopeptidase Coggill P pcc Pfam-B_2237 (release 26.0), pdb_1g12 Domain This is the catalytic region of aspzincins, a group of lysine-specific metallo-endopeptidases in the MEROPS:M35 family. They exhibit the following active-site architecture. The active site is composed of two helices and a loop region and includes the HExxH and GTxDxxYG motifs. In UniProt:P81054, His117, His121 and Asp130 coordinate to the catalytic zinc ligands. An electrostatically negative region composed of Asp154 and Glu157 attracts a positively charged Lys side chain of a substrate in a specific manner [4]. 30.00 30.00 30.10 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -11.18 0.71 -3.69 34 226 2012-10-03 04:41:15 2011-11-23 14:29:17 1 8 116 9 119 217 3 142.80 27 48.57 NEW u.Lpptsss..pp....apsWFGshssp...........+hsslhsphsshstsh.s..s..hthsCs......sst..s...sshthshPsph.tp..IhlsssFhs.hss.....sGhDSpsuTLlHEhSHFss.........shGTsDh..............sYs..pssupsLupssPspAlpNADshEhas- ..........................................................s..........t.p.....appWFGshssp...........+hspspst.hhphcpshts.tt..h...hhsss.........sp...s.......sthAhshssph.hp.....lal.ss.t.....Fhp.sss............oGp...-.S......+....suTLlHEhSHh.s...................ss..t..opDh..............sYu..ppssp.p....LApspP.spAlpNADsaphah.................. 0 87 97 113 +14373 PF14522 Cytochrome_C7 Cytochrome c7 Eberhardt R re3 Jackhmmer:Q74BP5 Family This family includes cytochromes c7 and c7-type. In cytochromes c7 all three haems are bis-His co-ordinated. In c7-type the last haem is His-Met co-ordinated [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.26 0.72 -11.57 0.72 -4.34 198 1003 2012-10-01 23:37:15 2011-11-23 15:49:51 1 52 372 25 549 1372 400 72.40 23 23.15 NEW shFsH.......ptHhp........th.....s..........Cs..sCH.sp................hh....sspt.....hshsph.......t..s............ptCss.CH..............sup.....pu..........................sssCs.pCHt .................................FsHphHht............th..s..................Cp..sCHss.............................ht.....ttph....................hphssh...t..s..........ttChs.CH..................................stp............s......................hssssCs.pCH................................................ 0 172 394 505 +14374 PF14523 Syntaxin_2 Syntaxin-like protein Bateman A agb Jackhmmer:E7Q9M8.1 Domain This domain includes syntaxin-like domains including from the Vam3p protein [1]. 25.20 25.20 25.20 25.20 25.10 25.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.27 0.72 -3.99 91 588 2012-10-03 05:55:03 2011-11-24 17:46:35 1 10 286 1 374 673 2 97.20 24 35.43 NEW luspl...hplsss....lsplp+hhpplG.Tt.....+Ds.clRcpl.c...........phhppssphhcphsptlpplsph.......................t.....pppphtppKLsp-FppslppapphQcp.htp+ppsts ....................................ustl.plsps........ssplpch.lspLG..Tt..........pDoscLRcpl.p...........phpppss.pls+cssptl+phsph..............................t.pppp+hppp+Lsp-FpssLppFQtsQ+pss-+p+t..h............................ 0 100 169 281 +14375 PF14524 Wzt_C Wzt C-terminal domain Bateman A agb Jackhmmer:A8A1Q5.1 Domain This domain is found at the C-terminus of the Wzt protein [1]. The crystal structure of C-Wzt(O9a) reveals a beta sandwich with an immunoglobulin-like topology that contains the O-antigenic polysaccharide binding pocket. This domain is often associated with the ABC-transporter domain. 22.00 22.00 22.00 22.10 21.90 21.80 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.65 0.71 -4.49 236 837 2011-11-25 13:44:43 2011-11-25 13:44:43 1 9 707 2 260 731 162 141.70 17 33.12 NEW thts.sttphGsspApI..pssplhs.tpGp.ssh...lpsG-plplplp..hph...ppslps.sl..hGhhl.+..sppG..h.lhG.sNohhpp....ttls..hht..........upphplpaphph...Lss..G.pYhlssul...................t..ptp...stp.hchhpcs.hhF...pVh.s..................spphhGlht.lss ............................................th........tpttspltpsplhs...tpsp..hp.h...lps.G-p.lplclp..hps....pps.lpp..sh..hGhtl..+.......s.ppG.........thlhG..sNo.hhps.......hplshh...........ssphphphphth....Lts..G.pYhlslul..................pppsth.h.chh.pp..s..hhh..pVhs..........p..h.Ghh.h..................................................... 0 95 178 225 +14376 PF14525 AraC_binding_2 AraC-binding-like domain Bateman A agb Jackhmmer:D9V2D0 Domain This domain is related to the AraC ligand binding domain Pfam:PF02311. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.88 0.71 -4.69 230 1819 2012-10-10 13:59:34 2011-11-25 14:10:04 1 6 746 0 600 1693 67 174.60 16 52.75 NEW hcchluphhs...s......t..........clpst..s...ts..shp.....uphpthplGs..lslstlp......hs........sps......pl.....cs.sp............psh..hhlplslpGpuplp.ps.sp.pstsssup..sslhsssp.s.hp...h.p.hss.........ss.c.....plhlpls.............................c.......ph..ls.pshp.t...........sht.hs...sp.................hshs.sshu......thhtp.....hlptlhs.phsths.....s......hhpps.h..hspph...p.s.Llh.shLh ...........................................................phhsph.hs.s......h..........p..hps...s....tp...thp.....uphpt.ht.hss......lplspls........hs...............sth............tl.....ct..ss................sth..hhltl....l..sGps.t.h.p...p.s.....sp...ps.t.hs.sGc.....hslhcs..sp.s.hp...h.p..hpt................ss...c.....plsl..t.....lP.............................csh.Lp.phhs.t...........sh.h.hs...p.........................l...shs...sshs.......phhtp......h..lpplhp..phst.hs.............tt.t..t..htpth.....p.Llh..h..h...................................................................................................... 0 118 310 451 +14377 PF14526 Cass2 Integron-associated effector binding protein Punta M mp13 CATH:3gk6A00 Domain This family contains Cass2 from Vibrio cholerae, an integron-associated protein that has been shown [1] to bind cationic drug compounds with submicromolar affinity. Cass2 has been proposed to be representative of a larger family of independent effector-binding proteins associated with lateral gene transfer within Vibrio and other closely-related species. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -11.05 0.71 -4.20 24 758 2012-10-02 11:08:51 2011-11-25 15:11:58 1 8 542 1 127 2424 50 145.20 18 79.49 NEW clpchsuhslluhth..phppspt.....hptchsphapphh..p..pt....hsph.tpp...pchaulhts.t......psthshhsshssps......hsps.......hphhphPsupYhshps.p...uphs.p.lt.chatph.htth.ppp.psat........psss.s........p..hEhY........hpss................p....lElhIPV ...............................................................................................l.ph..thtlhuhtt.....hp.ptptt............tt.h.s..shapplh.....p.....ps...............hsp..l...t.t..........ssh.....a..u..l..a..p.......sh.t........hpsc.....h.s..h.........l.s.h..s..sps..........h...p.s....................hphh.p..l......s.s.c.Yhs.h.ss.p..........sphs..p..ls..p.....h..a...pth....hthh...p..p...ssht............hsss..s................s...hEhY............hpss........................s-lhlPl....................................... 0 51 88 110 +14378 PF14527 LAGLIDADG_WhiA WhiA LAGLIDADG-like domain Bateman A agb COG1481 Domain This domain is found within the sporulation regulator WhiA. It is a LAGLIDADG superfamily like domain [1-2]. 23.20 5.00 23.20 5.00 23.10 4.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.06 0.72 -10.04 0.72 -4.12 23 1981 2012-10-03 01:41:40 2011-11-28 15:47:47 1 15 1950 3 344 1568 111 93.00 42 29.70 NEW tpualRGsFLuuGSlscP.pss.YHLElssssp-hspplpcllp..ca.slsuKlhcR+spallYLKcuEpIschLpllGAppuhhcaEslRlh+-hR .................t.+uYLRGAFLA.sGSlssP....-po..YpLEIho...h.p..-aA.psLspl.hp.......pa.tL.......s.....A.......K.s.......l.......E.......R.....+.......p.......u..........hls..Y...L....+..-...u........E.c.Is-FLslIGAhpu.hl.c.F.EclRIhR-hR............................................................ 0 139 249 304 +14379 PF14528 LAGLIDADG_3 LAGLIDADG-like domain Bateman A agb Jackhmmer:P21505 Domain This domain is part of the LAGLIDADG superfamily [1]. 22.50 22.50 22.50 22.50 22.40 22.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.61 0.72 -3.74 233 970 2012-10-03 01:41:40 2011-11-28 17:32:55 1 236 486 19 348 1219 203 81.20 21 10.86 NEW t.sFLpGl....asuDG..sl.....p..t..pp................tlplss...s.sp....p........llc........plpp.l.L.h.p.hGIhu..plh.......t............c.......................p...................p....................psh...a..pLhI....su..cs..ht.pFhcpIG ....................h.taLpGhasuDG...sl.....p..t..ppt..................plphs.s......s.sp.....p........lhc........slpp.l.L.h.p.h.Gltu...plh...................p...........................p.....................p.............................................................psh...a.plhl....su..cs..h..tahp.l...................................................................................... 2 97 192 286 +14380 PF14529 Exo_endo_phos_2 Endonuclease-reverse transcriptase Coggill P pcc CATH:1wduB00 Domain This domain represents the endonuclease region of retrotransposons from a range of bacteria, archaea and eukaryotes.\ \ These are enzymes largely from class EC:2.7.7.49. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -11.03 0.71 -4.49 204 2004 2012-10-02 01:25:08 2011-11-29 14:09:45 1 110 233 3 1603 5329 1737 121.10 19 18.41 NEW lhlhusYhsPst.............hpphhpplpphhpphs.......hllsG.DFNu..........tp.hWuu................ss..........................pcGptL....hphhpptslthh............sp.s..pt.sTahstps..............t....Shl.....Dlshs...ssshhtt..................h...hh.....SDHphlh ...................................................hlhslYt..Pstt................................pth.h.pt.L.pt.h.hpph.........thllsG.DF.Ns...........................................................h.p.....s.....t.....hss...................tp.........................tp...............................................ppu..p...tl..........hp.h..h.p...p...t...s.L.t.h............................................................pt..t.....pt..hTahsspt...................tstl.....Dhhhs.sp.shhtt........................thh.t......hh.....................SDHp.l............................................................................. 0 649 1047 1503 +14381 PF14530 DUF4439 Domain of unknown function (DUF4439) Eberhardt R re3 Jackhmmer:Q7TXQ6 Domain This domain has a ferritin-like fold. 25.00 25.00 25.30 25.40 24.80 24.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.59 0.71 -3.67 37 325 2012-10-01 21:25:29 2011-11-29 14:31:28 1 1 321 2 85 267 7 124.60 31 52.61 NEW ALpsALAAEHAAlYuYGlluu+l.ssstpstApsuhstHRARRDslhthlpstGssPssstAuYtLPh......sVsssusAspLAuplEpcsAssatsll.tsssushRshAspALp-uAlRus+Wpusss......AFPGhsp ..............h.tsls.hEauslauh..ul..s..t....u....h.......ss......t.hp............sttshttH+spR-pltptlpst....usTsss..s..s..s..G...Y..tl.sh............s..ssssAuutpLhstl.Es.csssuWtsVs..tAssu..ssRshAlsuhspsAhhhs+httshs.........saPG............................. 0 28 64 81 +14382 PF14531 Kinase-like Kinase-like Eberhardt R re3 Jackhmmer:B6KSS4 Family This family includes the pseudokinases ROP2 and ROP8 from Toxoplasma gondii (Swiss:Q06AK3 and Swiss:O15693). These proteins have a typical bilobed protein kinase fold, but lack catalytic actvity [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.70 0.70 -5.45 8 140 2012-10-02 22:05:25 2011-11-30 11:55:51 1 3 7 6 79 15114 247 263.20 26 50.30 NEW shpVsSpLGptsRsLl+sshlshschulhapssDpETsEshsl+Vhhhss.-sopp-l-ph+cpsLAlsL.hhul+NPppApsahRhlhPaDLVplssKshhhptpscppshhVhNhFhLhPsspssL.........phlschltppssp..ctsLsptARLhLTlQhIRLsAsLQscGlVHuchpssshhLcpcGGlaLssF..ssLsRsGs+ssh.upss....puauPPEhpup+tt..at..sssphTauhDAWsLGlsIahIWCtcLPhshsssthu.-a.............hFspCp.shP-sV+hLltphLphspcsR ...........................................................................................................................................................................s...........h..l.hhht.lt..t..hhh.spc.tp.p...htlh......h.....h............t....p.........s....st....tt...hpp.........hp...c.t...htht....h.........p...s....s.........A....p...h.....hR...hlh.....P.....D....h.l....tl....t........ps.........p..ht......t..ps.....h...h..l.......s..h.h..h..L..h..P..t.....h.....p..s...s...h.............................................tt.h..h..p....l.ht...h...ss.........p.p.t..h...s.h..h...s..+...h...h...L...T....h.....Q....h.l...+.L.........l...A...p...L......p....s........p....G....l....V.H.u..........c..lpP..p......s.h..h......l........t......p.......c........G...........t..........l...h......L...u....D......F............u.p.......l...h....+....s....G.......s....p..h..s........upss................................s....a......s.....P.......P..E.......h...h..s...p.......................................................p........p....s.........t......h......T......a..........u.....h......D.....A....WtL.Gl...s..l...a.h.l......W....C....t.c....h....P.....h......s......h....s......s......s......t......h......s.........p.h......................................F..s.....t....C.....t.....s........h.....P........-..........V......c.....t......Ll....pphL..phs.ppR.................................................................................................... 0 57 58 79 +14383 PF14532 Sigma54_activ_2 Sigma-54 interaction domain Eberhardt R re3 Jackhmmer:D1EI59 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.45 0.71 -4.12 21 695 2012-10-05 12:31:09 2011-11-30 16:25:51 1 32 602 10 154 18102 4682 141.20 25 30.88 NEW LGsSsslp-lpcpLEtsu..ppssPlLLsGEsGSshEhlA+al+psssP....Wlp.schppls...c...hP........h-...lLpp....A....sGGhLals-lsphuKshQpulhhlLs+.........u-.+hsl...Rllssuupshsp.htsssh-scLhphLSshslplPsL ....................................................................................lGpSthh.p.p.hppp.l...p...p.h.A........p...s......s......h........s......V...hl.h.GEsGo.G+pp.l....A....+.....h.....l...H.....p...h........u.s.p................tpt...s...hh...h..h..phs.......s........s...........................................tp.........h.lpp.........................u...........pG.GT..Lh.lsc..l..ctL..s..c.p...t...Qp..pL.s.phLpp.................................tc...cp...sh...........R.l..l...us.s..s...t.sht...p.......h..t.t.sph.t..t-La.h.hhs..s.pl.hs.L.................................................................................................... 0 50 105 137 +14384 PF14533 USP7_C2 Ubiquitin-specific protease C-terminal Coggill P pcc Pfam-B_1954 (release 25.0) Family This C-terminal domain on many long ubiquitin-specific proteases has no known function. 29.40 29.40 29.40 29.40 29.30 29.30 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.19 0.70 -11.44 0.70 -4.90 69 415 2011-12-01 14:48:45 2011-12-01 14:48:45 1 20 259 1 265 395 4 201.30 27 19.14 NEW ptlaYElLs.hslsEL.Es++s...lKlhWlssshpc-.......p.hplhlsKsuTVpDllsclpc+hph.....s-ststclR..lh...Eh......tt.Khhchhs.p-psltsl...schh.t.............................hhhE...clPp-Ehpht.p..tt.....plltVhHFp+-ss......p.aGlPFhFhlppGEsFs-sKcRlpc+h..slss.KpFp.KhKFAl.lphsph..........pYlpD..cc..ll.shhhptc..........p..LG.LDH.scss+p ............................................hlaYphLs.hsls-h.-sp+s.............hKlhah........p.sshpc.............phplhlsKpusltDllpclppp.spl...........scpt...stclR..lh............-l.....hsp..Klhplht..tcph..lpsl....sct..p..............................................hhhE....clPp-c.hshs....t-..............................hllsVh...H..Fp+-st..........psaGhPFhhhl+p..uE..phtcs+cRlpp+h....tltp.cpFp.K..hKFAl.l..hsp...................pYlp-..st...l..h..p.p................p.hLG.L-H.sps............................................................ 0 84 139 212 +14385 PF14534 DUF4440 Domain of unknown function (DUF4440) Eberhardt R re3 Jackhmmer:Q11v67 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -10.37 0.72 -3.83 216 1968 2012-10-03 02:27:24 2011-12-01 15:55:36 1 41 1054 19 759 4175 1032 109.10 15 69.00 NEW lhphcp.p...hh.p.u.h.....s.p..sD.hs...th.tpl...h..s.......s.-.h..hhh.....sss........Gth..h..s.........+pp..hlpth.........tp.......sh...h.t..h..t.ph.....ph.ps.......hp.l..p......hh...ss...sA.hlt....sp.hp.h.......pt...pt..s.....s...t.h.t.......tphth.splW.p+.p....ss..pWpl ...........................................................................h.tth...p.u.h.....s...p.....s-..hp....sl..tsh.....h..s...............s..-..h.....hh.l.........sss................Gth..h...s...........ppp....hh.pth...............ps.........tt.....h..p...t...t.sh........ph..ps..........hp..l...p........hh.....sc......sA....l..lp....hp..hp..h............ph.......ts...s..................s.....t.h..t............hp.st.h.stlap+.p.....ss....tWth................................................................................ 0 280 490 656 +14386 PF14535 AMP-binding_C_2 AMP-binding enzyme C-terminal domain Eberhardt R re3 Jackhmmer:A6L0Y5 Domain This is a small domain that is found C terminal to Pfam:PF00501. It has a central beta sheet core that is flanked by alpha helices. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.06 0.72 -3.94 244 1758 2012-10-03 01:00:17 2011-12-02 13:31:06 1 7 1084 15 662 1569 228 94.80 32 21.53 NEW GVNlFPoQIEplLhph.stlu.scYplllsR.p..s..s..hDplplpVEhsp..t.h.s.c.ph.............tp......hpp......lpcclpcpl+sh.lGl...s...s...cVclVpPtolsR.....S.E.G...K..A+RVlDpR .......GVNVFPoQIEcllhp..h..t.t.l.s....s..c..Y..plh.l...s+.c......s...p......hD.p..lplp.VEhpp..th..s...p.................tp........hpp......lpcpltccl+sh.lGl.s...s...c.....Vpl.lpss....olsR.....o.p.G.....K..ApRVhDhR............ 0 235 477 589 +14387 PF14536 DUF4441 Domain of unknown function (DUF4441) Coggill P pcc Pfam-B_1275 (release 25.0) Family This family is largely made up of uncharacterised proteins from the Ciliophora. The function is not known. 22.40 22.40 22.50 22.40 22.10 22.10 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.73 0.71 -4.12 66 94 2011-12-02 16:40:58 2011-12-02 16:40:58 1 2 2 0 94 142 0 118.40 24 42.38 NEW pNh.hKNI.l+uF...tpalh...............pppcp.....................................hlhphhpph.........tpp.p.......hpphp..Kphppahcppshs.N.p.....lppllpsppaspl.FpaaLpptsptWL.ppS+lpstppahhhIphlhpshpst..phlpplpha.K ................................................................................................pNhhKNI.lpuF....hpalh...............pppc.p...............................................hlhphhpph.........t...p.......hpphp..Kphpphhp.ppshs.Ntp......lppLlppppapph.FpaaLptpsptWL.ppSKlpspppahhhIphlhpshpst..phhs.lphhp............... 0 94 94 94 +14388 PF14537 Cytochrom_c3_2 Cytochrome c3 Eberhardt R re3 Jackhmmer:Q8EDL6 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.81 0.72 -11.87 0.72 -3.64 85 1009 2012-10-01 23:37:15 2011-12-07 10:41:39 1 39 603 40 274 1023 144 93.50 36 38.08 NEW hHtptu.hsCt.sCH.......s...sst..ttst....s.hpspp................ClsCHs...sh.p.....ph......sp.ptt......................s.Hs.......sH..........spls.CssCHps.Hp...........ts.........hC.s.s.CHs ..................................th....hslsCs.sCH.................u.....pss....spp...cp.......ulcssh......................................Ch..S..CHh..sp.p....h.......pc..s.............................................hhPH.c.........sH...s.........scls.Cs.uCHu.h..Hs....................................c.tt.....phCs.s.CHs....................................... 0 87 186 233 +14389 PF14538 Raptor_N Raptor N-terminal CASPase like domain Bateman A agb Jackhmmer:F5H7J5 Domain This domain is found at the N-terminus of the Raptor protein. It has been identified to have a CASPase like structure [1]. It conserves the characteristic cys/his dyad of the caspases suggesting it may have a peptidase activity. 27.00 27.00 27.00 34.30 26.80 26.00 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.82 0.71 -4.37 48 343 2012-10-03 02:24:44 2011-12-08 19:24:50 1 25 255 0 246 365 5 148.00 52 11.29 NEW +hKTsslsLllCLNIGlDPPD....lhKssPsA+hE....CW.....lDPps....hs..........t..KulcsI.upsLppQYcph..p.+.....s+a+.tlDPol--l++hCtshR+sA+s...-RlLFHYNGH..GVP+.PTssG.....EIWlFN+saTQYIPlslh-LpsWlssPslaVaDCSsAGhllpsFp ......................................................hKTsosALslCLNlGVDPPD....llKssPsA.+lE....sW.........lDPhs....hss.............p..KAlEpI.GpsLQpQYEph...p.R...............sRYKptLDPoV--lK+hCtoLRRsAKc....ERVLFHYNGH..GVP+.PTssG.....EIWVFNK.......sY..........T.......QYIPlSlYDLQoWluuPoIaVaDCSsAG.IlpsFp....................................................... 0 98 147 214 +14390 PF14539 DUF4442 Domain of unknown function (DUF4442) Eberhardt R re3 Jackhmmer:Q9I2R0 Domain This family of proteins is found in bacteria, archaea and eukaryotes. Proteins in this family are typically between 139 and 165 amino acids in length. There is a conserved PYF sequence motif. There is a single completely conserved residue N that may be functionally important. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.48 0.71 -4.17 66 935 2012-10-02 20:54:35 2011-12-09 10:57:25 1 2 624 4 278 1704 374 137.40 21 84.79 NEW GptlFStshsh+APYFuT.lpPplpcLcPs..hsplpl.c++tVpNHIGTlHAIAhCNhAEhAhGhhsEAolPs...stRWIPKGMsVpYlAK.AposlpAsAphs.s..........sa............pp.....s..s.-...lsVsVplh...D.psG.hp.V.spupIsh.WV .....................................................h......hhht.hhP.h.h...hs.sshcl..hplsss.....pscl.pl..hp..h.ts.c.N.a...l...s..o.....h..auGulh.shs-.s....s.hG..h...h....h..h...t....plsp.............chh...h..h...s...K..u..hplc..a...l.+.........u..c...u..s...l.p.Ap.spls.p..........tph............tp.....p...tt...h.h.......l...p....lh....-..pp.G...ph...l..spsphph.h............................................................................. 0 97 182 244 +14391 PF14540 NTF-like Nucleotidyltransferase-like Coggill P pcc CATH:3c18A01 Domain Structural comparisons with PDB:1kny indicate that this N-terminal domain resembles a nucleotidyltransferase fold. 27.00 27.00 38.40 37.70 22.10 21.20 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.33 0.71 -10.24 0.71 -4.50 23 78 2011-12-09 13:30:00 2011-12-09 13:30:00 1 1 78 3 27 80 0 118.80 40 40.16 NEW MEslLRPIYQE+ASpssTLGllhlE++..pppsslTDsFDslLLVIscc.sEpshalKHYphssc+suL+lVs-cpLpcWllhGoNR+llDWlhpG+llFDRNEYlppL+pcLppFP.ppRc .............M-slLRPIYQE+AScssTLullhIE+c..ppp..uulTDsFDslLLVIVcp.s-pshalKHYphcpc+AuLahVo-ppLpEWlLlGosR+lIDWllpG+llFDRNEYlppL+pcLppFPhtpRc..... 0 7 18 21 +14392 PF14541 TAXi_C Xylanase inhibitor C-terminal Coggill P pcc CATH:1t6eX02 Domain The N- and C-termini of the members of this family are jointly necessary for creating the catalytic pocket necessary for cleaving xylasnase. Phytopathogens produce xylanase that destroys plant cells, so its destruction through proteolysis is vital for plant-survival. 21.90 21.90 21.90 21.90 21.80 21.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.92 0.71 -4.69 112 1552 2012-10-02 15:32:34 2011-12-09 14:54:42 1 19 111 13 878 2503 14 147.00 24 35.70 NEW tYalsl.puIpls.....sppl.s.lss.shh.........u...tGGshlsosssaThLtsslYpsltpAFsp.thst........h.p.ssssssFchCassss.h........tt..hshslP.sl.sLhhpu.....us.....phpl.us.shhlp.s..s...s.s.........sh.CLuFlss.................sstssslIGuhQhpsphl.FDltssplGFss ...................................................................Yhlsl.pu..IpVu.................sphl..s....l..ss...shh...............s..........suG..sllDS.GTs.hThLs....sss.Ypslppsh.tp...t..hst......................ss.s..h...s.s..h....c..h.C...ashss.........................ttstlP.s.....l.slpFps...........Gu.......shpl.s.s.p..sh..hh.t..s..s......s.s...........................hh..C.L...u.hsss...................................ttshsllG...sh...pQpshh.lhaD..ltpp.plGFt..................................... 0 106 490 698 +14393 PF14542 Acetyltransf_CG GCN5-related N-acetyl-transferase Coggill P pcc CATH:1xmtA00 Domain This family of GCN5-related N-acetyl-transferases bind both CoA and acetyl-CoA. They are characterised by highly conserved glycine, a cysteine residue in the acetyl-CoA binding site near the acetyl group, their small size compared with other GNATs and a lack of of an obvious substrate-binding site. It is proposed that they transfer an acetyl group from acetyl-CoA to one or more unidentified aliphatic amines via an acetyl (cysteine) enzyme intermediate. The substrate might be another macromolecule. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.46 0.72 -4.09 355 2309 2012-10-02 22:59:21 2011-12-09 16:30:00 1 16 1990 7 555 1626 130 78.60 29 78.76 NEW Ratlth.s............t...uhh...sY..................pt...............sst.........hh.slsHThVssshcGpGlAspLlctulctsRppGh.+llPhCsalt.s.ah..c++P.-.a.p-l ..................................pa.lts-......Gpth..u.l...s.Y.............pp....................sss......hhhlsH.T.hVscshcGpGlupp.Llct.sl-psRc.p.s.....h.Kl...l..PhCsasp.phh..c+ps..-..a.p-l................. 0 176 360 478 +14394 PF14543 TAXi_N Xylanase inhibitor N-terminal Coggill P pcc CATH:1t6eX01 Domain The N- and C-termini of the members of this family are jointly necessary for creating the catalytic pocket necessary for cleaving xylanase.\ Phytopathogens produce xylanase that destroys plant cells, so its destruction through proteolysis is vital for plant-survival. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.35 0.71 -4.29 111 1620 2012-10-02 15:32:34 2011-12-09 16:34:32 1 20 98 13 922 3319 9 167.20 27 38.32 NEW YshplphGs............P...........h.hslDhu.ushhW.hpC....................................t...Ssoa....psl.C...........sSs.Ct.ht......................sssts..ssCthhs..s.....stsssG..pl.spDsl.hssst.t..................sshsshhFuC......ussth..............uh.t...........................sssGlhGLu.pst...hSLsuQlutt.....hsp+FuhCLss..............ssssGhlhFGs ..........................................YhhplslGT..P.sp.........hhlhlD.TG.S-lsW...l...p..Cpss.................................................................tshtt..shas.PspS.ooh..........ptlsC.................ss..s..h..Cptht.....................................tsss.s.s.s....s.C...s.....Y.p...h..p......Yus.......sosotG.......hl..spDslslsss...........................ssh.s.s.hsFG.C........upssp.............Ghht....................................sssG.l...lGLG....pus...................hS.......l.......s......o......Q......l...tt................hsstFSaC.Lss.................sssuhlhhG............................................................................................. 0 113 501 731 +14395 PF14544 DUF4443 Domain of unknown function (DUF4443) Coggill P pcc CATH:2p8tA02 Domain This is a family of archaeal proteins. The domain is a putative gyrase domain. 18.50 18.50 18.60 19.00 18.40 18.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.59 0.72 -4.16 8 29 2011-12-09 16:41:01 2011-12-09 16:41:01 1 1 29 1 20 33 3 105.00 30 52.94 NEW FS.EsttV.u.VEGaPAYAIVVKNPPpFKSIELRDEAIRFFAKGAMILlVKNGElVFPEDtRPL+EThPELAE+L...l+h..--...GDhlVVTWAENPuDAhKSAhHVALsLKp-EI .........................................hulhhtt.sp.h.c.u..l...-LRDEAIRasAcGAhIlhhKsGcllFP..E..DtcsL....c-hhs..cls..ccl...tph....c-...GDhlllThu-s.spAhpuhh............................... 0 4 5 13 +14396 PF14545 DBB BCAP_N; Dof, BCAP, and BANK (DBB) motif, Coggill P pcc PROSITE, Pfam-B_2980 (release 26.0) Domain The DBB domain is named from the Drosophila (Downstream of FGFR - Dof, also known as Heartbroken or Stumps) protein, the BANKS and BCAP, both signalling in B-cell pathway, proteins. This domain defines a minimal region required for mediating Dof dimerisation. Since this domain can interact both with itself and with a region in the C-terminal part of the molecule, it may mediate either intermolecular or intramolecular interactions [1]. Mutants lacking this domain disrupt FGFR signal transduction and fibroblast growth-factor signalling [2]. 22.20 22.20 22.90 22.20 21.80 21.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.82 0.71 -4.67 13 113 2011-12-12 13:27:38 2011-12-12 13:27:38 1 1 63 0 61 137 0 136.40 37 17.33 NEW FsVtP+Kl+sGps.+VhllLsp...sLpccsslpVphcpssps.....lssscpcNPYTlphssP-thhplSthVslplcpsshsLGs+slKCcScLcElpplLps...sssPlEFMCQulsIsPsspEpLDplLhpoFp+.NlPss.apLhu .........sV.Pc+lpCGpptplalIl+s.....cLpcpso.sElEFpspsp.hh...phpsphcNcYTlshpAP-h...suGsVslplYssslslupssIpYYoshcElpplLpp...sssPlEFhCQAhtlsshspEsLDplLTpshKc.NlPssthpLht...... 0 12 16 33 +14398 PF14547 Hydrophob_seed Hydrophobic seed protein Eberhardt R re3 Jackhmmer:Q9S7Z9 Domain This domain has a four-helix bundle structure. It contains four disulfide bonds, of which three function to keep the C- and N-terminal parts of the molecule in place [1]. 23.40 23.40 23.40 23.50 23.20 23.30 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.24 0.72 -10.80 0.72 -4.08 24 532 2012-10-01 19:46:35 2011-12-12 14:20:06 1 8 71 0 253 530 0 81.10 49 49.60 NEW sCPhssl.cLssCssVL.slhplhlGs..tsptCCsLltGLssl-AAsCLCssl+hplLs.lsl.lsl..slplllshCG.+s..PsGFpCs ................................pCPh..Ds.L..KLusCuslL..G.............L.l.p.lt.lG..s..........s...s......spsCCsLlpG.LsDL..-.AAl.CLCT.AlKAsl.L.G.......Isls.l.Pl..sLs.LLLNhCG.Kps....PsGFpCs.................... 0 18 110 191 +14400 PF14549 P22_Cro DNA-binding transcriptional regulator Cro Coggill P pcc CATH:1rzsA00 Domain Bacteriophage P22 Cro protein represses genes normally expressed in early phage development and is necessary for the late stage of lytic growth. It does this by binding to the OL and OR operator-regions normally used by the repressor protein for lysogenic maintenance. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.62 0.72 -4.25 33 735 2012-10-04 14:01:12 2011-12-12 15:55:30 1 3 468 8 62 374 11 58.40 33 75.46 NEW pKsDllpaFGuts+lApALGlopsAVSpW....G-hlPEhRAaplEclTsGpLKss...sslhpcss .............h...cslsaa.G.o.ps.KlApAhG..ls..suV.spW.....sch..lPc.......t.......R.......Ah.plppsou.Gtlphp...........p........................... 0 11 27 42 +14401 PF14550 Peptidase_U35_2 Putative phage protease XkdF Coggill P pcc Pfam-B_5816 (release 26.0) Family This domain is largely found on phage proteins. In a number of cases the domain is associated with a SAM-dependent methyltransferase. 24.00 24.00 24.60 29.60 23.90 21.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.75 0.71 -4.50 35 118 2012-10-01 19:43:34 2011-12-20 15:14:30 1 3 106 0 21 108 166 123.60 35 38.81 NEW hppplclh+........psp....--cplVaGlVhpPs......hhDucGDh....hss-EIEKAAapFhcp.....hpplDtpHch...psusuplVESaIsss.DhplsG...p..slp+GoWlhss+s...sD..p-lW-p....l+cG..choGaSluG...sAcph-h ...............tppVclh.ppsp....--p+lVaGlVhEPc......s.DuHGDa....hoA--IEKAAasFhpp.....hpplDhpHsh...psusupVVESalsPs..Dhplss.....p.plpKGoWlhss+ssD......-lW-p....lKcG..clTGaShuG...sAch..h........... 0 10 15 18 +14402 PF14551 MCM_N MCM N-terminal domain Bateman A agb Jackhmmer:O27798 Family This family contains the N-terminal region of MCM proteins. This region is composed of three structural domains. Firstly a four helical bundle, secondly a zinc binding motif and thirdly an OB-like fold [1]. 24.90 24.90 25.00 24.90 24.80 24.80 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.38 0.71 -3.62 340 2275 2011-12-20 15:49:09 2011-12-20 15:49:09 1 42 484 10 1550 2224 62 132.50 20 16.40 NEW hpcpF.ppFlpp...............ht..................................................................hYhpp................lpphhpp....p...p.........................................psL.lchp.cLtp..a..s..................................p....................pL.sp.t.lhppPtchl.s..hhcp.ulpchh.t.............................................................................h.spht...pc......tp......h.plphhsh............sp.............thslRsL.c..us..clspLlslpG .................................................................................h..ptFppFLpp........apt................................................................phhY.hpp..........................lp.phhph......pp...........................................psL.Vsh.p.c.L.tp..a..s.......................................p.......................pL.sp.t.lhppPtchl.s..hhpp....Al...pchhhp.............................................................................hsstht..tp........pp......h.plp..hhsh.................sp...............hhslRsL..p..ss..plspLlslpG........................................................................................................................ 0 540 876 1300 +14403 PF14552 Tautomerase_2 Tautomerase enzyme Coggill P pcc CATH:3c6vA00, Pfam-B_819 (release 26.0) Domain \N 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.50 0.72 -3.96 157 646 2012-10-01 20:38:22 2011-12-20 16:17:50 1 4 541 17 193 1070 117 79.40 31 61.58 NEW D+Fpllppa..csschhhs..sp.h..ls....sRocshllIpIssttsRohEpKptLYctlscpLpppsGlpspDlhlslsEsst.-sWSFG ...............................Dpaphhppa..p..s..pphhasst.h..Ls...hpRo.-s..h.lhlpIssttsRohcpKcpLYptlsppL.pppsGlpspDlhIolhEss.t.-sWSFG....... 0 42 99 150 +14404 PF14553 YqbF YqbF, hypothetical protein domain Coggill P pcc CATH:2hjqA01 Domain This N-terminal domain is found in Bacillus and related spp. The function is not known. 27.00 27.00 33.20 33.00 26.40 17.80 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.51 0.72 -7.93 0.72 -4.29 9 66 2011-12-20 16:42:37 2011-12-20 16:42:37 1 1 59 1 3 36 0 40.30 59 71.20 NEW ltGpTYsshGp..hFhhs.EppVscchapYLpsNcaFplpc-.sp ..IpGpoahAas+..+FLhupEEcVSEKlYNYLRRNEFFEVRKEE..a... 0 1 2 2 +14405 PF14554 VEGF_C VEGF heparin-binding domain Bateman A agb Jackhmmer:B4YYD6 Domain This short domain is found at the C-terminus of VEGF. It has been shown to have heparin binding activity. 27.00 27.00 51.00 51.00 21.70 21.70 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.92 0.72 -4.16 5 153 2011-12-20 16:57:28 2011-12-20 16:57:28 1 2 59 5 32 137 0 54.20 78 25.52 NEW cpcENpCEPC....SERRKRLFVQDPtTCKCSCKaTDucCKSRQLELNERTCRCDKPRR ................t...ps.CtPC....SER..RKHLFVQDPQTCKCSCKsTDSRCKuRQLELNERTCRCDKPRR...... 0 2 5 13 +14406 PF14555 UBA_4 UBA-like domain Bateman A agb Jackhmmer:A2AT02.1 Domain \N 21.80 21.80 21.80 21.90 21.70 21.70 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.83 0.72 -7.92 0.72 -4.42 201 1716 2012-10-01 23:03:33 2011-12-20 17:19:24 1 62 362 9 1068 1687 15 41.70 26 10.41 NEW cphlspFh.slT.G..s....sp.....ppAppaLctssWsLctAlstaappspss ...................thlppFh.slT.G...s.....sp......stAtphLptssW..sL....p....tAlstaapt.............. 0 328 522 821 +14407 PF14556 AF2331-like AF2331-like Coggill P pcc CATH:2fdoA00 Domain AF2331-like is a 11-kDa orphan protein of unknown function from Archaeoglobus fulgidus. The structure consists of an alpha + beta fold formed by an unusual homodimer, where the two core beta-sheets are interdigitated, containing strands alternating from both subunits. AF2331 contains multiple negatively charged surface clusters and is located on the same operon as the basic protein AF2330. It is suggested that AF2331 and AF2330 may form a charge-stabilized complex in vivo, though the role of the negatively charged surface clusters is not clear. 150.00 150.00 155.20 155.10 29.70 17.80 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.18 0.72 -3.64 3 3 2011-12-20 17:20:05 2011-12-20 17:20:05 1 1 3 2 3 3 0 93.30 46 100.00 NEW MPTYVFsKESFLKFLEKNLtEDsVVVVSSDVTDlDccpuESa.LGcK-aahVcFAlsADVFKEsDlDEFDEhhKYsVVFVESDEL.SEAG+KA.hR MPTYVFsKESFLKFLEKNLtEDsVVVVSSDVTDlDccpuESa.LGcK-aahVcFAlsADVFKEsDlDEFDEhhKYsVVFVESDEL.SEAG+KAhR... 0 1 3 3 +14408 PF14557 AphA_like Putative AphA-like transcriptional regulator Coggill P pcc CATH:2rkhA02 Domain Members of this family are putative transcriptional regulators that appear to be related to the Pfam:PF03551 family. This family includes AphA-like members. 27.00 27.00 27.10 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.72 5 13 2012-10-04 14:01:12 2011-12-20 17:41:38 1 1 12 0 5 54 17 175.10 40 93.05 NEW asDNoLTPKEAVRLsALG.hIARuPhRYuDLAuAVRHFloRIsGPSLDLhGoSlELLRaEGLlEPlsGcGMEDNApLuIT-uGRpELpcLlTAsLRAu.SD.LuKLVIuLKLRFLDLLsscsRpcQIDsLLchsEoELARLsDL.RAAhuu-Gs..Lht-WLDp-IsQlEpRLuWLcuLt .................................................atDNoLsP+EA.VRLssLG.hlAcuPt.R.Yu-LAu.u.V..RH.FhoRIhGPS.LD.LhG...oSlEhLR.aEGLl..E...shs...G..p..G.M.E...Ds.Ah..L.uIT-uGRpEhpsLh.sAslRss.oD.Lu+LVluLKlR....FLDLLss-pppsQl-tL..l-hsETELARLhDL.RA..A.s...s.s..s.G.s........hht-WLDp-I..sQsEpRLuWLcsL.h.................................................... 0 3 4 4 +14409 PF14558 TRP_N ML-like domain Bateman A agb PF06011 Domain This domain is distantly similar to Pfam:PF02221 and conserves its pattern of conserved cysteines. This suggests that this domain may be involved in lipid binding. 27.00 27.00 27.10 27.00 26.90 25.90 hmmbuild -o /dev/null HMM SEED 142 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.92 0.71 -4.16 62 432 2012-10-01 19:31:57 2011-12-21 11:00:42 1 8 131 0 325 430 0 145.30 28 17.73 NEW chlposulssCh-s.Sphsss...............hFclpasssspolsaslsus......................................oslssp.VssplplhAYGhplh.scshc.Cs..hs..ht........tlCPlssGphsspus.hh..sp..phsspIP...uI.AYslPDLDApl.+lhlhsss........sspplAClpusloNGKTsp .................................t.ltosuhssC.....hss.Sthsss...............hFslsass..s..s..p.o..lphslsus.....................................................................................os.hssp..lshplplhAYGhphh.sp.s.hc..Cs.....hs.....hp............slCPls.sG.p......hshpss.hl.....sp.....sh.sspIP...................uI.AYslPDl-Aps+lhl.tssp.............sspplAClpuploNG+Ts..................................... 0 61 158 265 +14410 PF14559 TPR_19 Tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q87RI8 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.06 0.72 -3.63 300 8943 2012-10-11 20:01:04 2011-12-21 11:26:45 1 1648 2991 20 2662 21075 6378 65.60 19 13.54 NEW hltpu.....chspA....hplhpp...s.h....pt.....psp....sspsth..t.....L..Acshlpt.sphcp...ApplLsplstppps....sphp..sl...hApl .................................................tps....chspA.........hphhcp.......s.h........pt...................pPp..........ssphtl.......t....................L.....Apsh.hpt.G...c.......hpc...........Ap.p...hL...pp..hhtpp.s............................h................................... 0 874 1638 2192 +14411 PF14560 Ubiquitin_2 Ubiquitin-like domain Bateman A agb Jackhmmer:1t0y Domain This entry contains ubiquitin-like domains [1-2]. 23.10 10.00 23.10 10.00 23.00 9.90 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.81 0.72 -3.73 85 602 2012-10-03 10:59:06 2011-12-21 11:57:51 1 38 292 5 409 5698 192 79.70 20 19.48 NEW sVplplTpstsp..htsE...tRashshTlpplKpKLphhsGsssssMpLpL..h.spssphlssl..........s-..DsthLGtYs.lcDGhclHVlDpsPss ..................................................c...h+.ls.shTl.....sp........lKp+Lphl.sGl..ss.sshcL..h....h....t..p..s...s..t...h...tph.......................cp....sp.phLst..as.lps....s.....plhl.......t............................................... 0 159 216 322 +14412 PF14561 TPR_20 Tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q87RI8 Domain \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.76 0.72 -3.87 216 1831 2012-10-11 20:01:04 2011-12-22 16:10:40 1 21 1796 5 436 1379 539 89.00 34 30.03 NEW pussss-lppLctpls...ssPsDh..pAphpLAhthhtsGch--AlcpLlpll++Dpsh........s-ssARppLlclFpslGss...DPhssphRR+LsolLa ..............................tssss-ltpLppplA....t.sPp.Dh..phthpLAh.th.htsG+s.E-Al-hLhshl++.D.h..st.................................s-...sp.sR+phh-lhsslGss.............Ds.lssphRRpLhulLa.............................................. 0 116 257 352 +14413 PF14562 Endonuc_BglI Restriction endonuclease BglI Eberhardt R re3 Jackhmmer:O68557 Domain This restriction endonuclease binds DNA as a dimer. BglI recognises and cleaves the interrupted DNA sequence GCCNNNNNGGC and cleaves between the fourth and fifth unspecified base pair to produce 3' overhanging ends [1]. 25.00 25.00 25.10 25.30 24.80 20.90 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.80 0.70 -5.56 3 4 2012-10-11 20:44:47 2011-12-23 09:30:11 1 1 4 1 2 6 2 287.50 47 97.46 NEW RpclapSYppsRsYLlsNh-pLIclEpYsLsllsNlI+-NtEEI+ADYNE.AsaLaPFWpNYPPE-RGRtP+GDQIPWLEVGEKsVGSKLsRLVsp+hE.sVR-lGLPTGuDlRallos.pI.plTNshTDSsalFlDIKSVGPRDsDs-lVlSPNQVSGsG-.W-sFpsGIpNNphTI..pGsRu.NasFhPoLPPLYILSDGpIVPVVplaIKPVYuMlSLp.psDGGQPLp+IclASVPNGLLLFsNPsYuaspAacsLFpPGKDEhTKDsppRRlRVcLclLuRIusWRshcIDp ........................Rpcla.SYpps+pYL.sN.-...clEhYsLsllsplIp-NtEEI+ADYNE.AsaLaPFWhNYPP.-RG+hP+GDQIPWlEVGEKsVGSKLsRLVsp+.-.sVR-lGLPTGsD.Rallos.pI.plTNshTDShhhFlDIKSVGPRDsD.-lVlSPNQVSGsG-.WsthpsGIpNNp.TI..pGsRu..sp.FhPolPPLYILSDGpIsPVVplaIKPlYuMhSLp.psDsGQsLh+IclASVPNGLhLFsNPsYuaspAachLFpPGKD-hTKs.hp+RlRVcLclLs+Ius.RshpIDh.. 0 0 0 1 +14414 PF14563 DUF4444 Domain of unknown function (DUF4444) Eberhardt R re3 Jackhmmer:Q1GJN9 Family This domain family is found in bacteria, and is approximately 40 amino acids in length. There is a conserved LIPL sequence motif. There are two completely conserved G residues that may be functionally important. 25.00 25.00 42.90 42.20 24.00 23.30 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -8.06 0.72 -4.57 20 30 2011-12-23 10:42:36 2011-12-23 10:42:36 1 1 30 1 4 32 8 42.20 56 19.81 NEW lGE-lThsG.....tTGTFlGVDEcFGMLLRsss.sTHLIPLTolLEs .......lGEplThsG.....tTGTFLGVDEcFGMLLR-ss.sTHLIPLToLLEp. 0 0 3 3 +14415 PF14564 Membrane_bind Membrane binding Eberhardt R re3 pdb_1yhp Domain This family includes the C-terminal domain of Dictyostelium discoideum Calcium-dependent cell adhesion molecule 1 (Swiss:P54657), which has an immunoglobulin-like fold. It tethers the protein to the cell membrane [1]. 25.00 25.00 28.70 75.60 22.40 21.40 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.99 0.72 -10.42 0.72 -3.88 26 35 2012-01-03 14:41:10 2012-01-03 14:41:10 1 3 21 2 25 36 0 112.10 26 45.68 NEW slsl+lh....sssssstpYphslpsapls...ssshposs-...Yshlslhshs..sspllsplsl+sppt.....GthlssGSlYF+Yssssuplshsc.s...psaPp..sLclppsspspFshsLhs ..........lsl+hh...sssssssppYphslpshpls...ssslhSsss....Yshlslh.hs..ssplVsplslR-pph.....GthlssGSlYF+Ysssssplshsc.s...-saPp..slclpcsspssFslsLh.... 0 11 19 23 +14416 PF14565 IL22 Interleukin 22 IL-10-related T-cell-derived-inducible factor Coggill P pcc Jackhmmer:Q9GZX6 Domain Interleukin-22 is distantly related to interleukin (IL)-10, and is produced by activated T cells. IL-22 is a ligand for CRF2-4, a member of the class II cytokine receptor family. 30.00 30.00 30.00 30.00 29.90 29.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.90 0.71 -4.10 5 53 2012-10-02 01:28:15 2012-01-04 14:53:14 1 1 36 14 29 136 0 136.60 54 77.25 NEW CRLDKSNFQQPYITNRTFMLAKEASLADNNTDVRLIGEKLFHGVSMSERCYLMKQVLNFTLEEVLFPQSDRFQPYMQEVVPFLARLSNRLSTCHIEGDDLHIQRNVQKLKDTVKKLGESGEIKAIGELDLLFMSLRNAC ...................................C+LcpSsFQpPYIsNRTFhLAcEASLADNsTDVRLIGcc.LF+G..V...s.h.s-RCYLMKQVLNFTLEEVL.......h...P.......p..S..D+...F.....p..PY....Mp....-V...V....s....F....L....u....+.....L.Ss+LSp..CHIp.G...D-...pH......I...Q+...N...VppLK-T.VKKL.G.E.s.G-.IKAIGELDLLFhsL+sAC... 0 1 2 6 +14417 PF14566 PTPlike_phytase Inositol hexakisphosphate Coggill P pcc Pfam-B_194 (release 26.0) Domain Inositol hexakisphosphate, often called phytate, is found in abundance in seeds and acting as an inorganic phosphate reservoir. Phytases are phosphatases that hydrolyze phytate to less-phosphorylated myo-inositol derivatives and inorganic phosphate. The active-site sequence (HCXXGXGR) of the phytase identified from the gut micro-organism Selenomonas ruminantium forms a loop (P loop) at the base of a substrate binding pocket that is characteristic of protein tyrosine phosphatases (PTPs). The depth of this pocket is an important determinant of the substrate specificity of PTPs. In humans this enzyme is thought to aid bone mineralization and salvage the inositol moiety prior to apoptosis [3]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.84 0.71 -4.16 103 540 2012-10-02 20:12:17 2012-01-04 15:56:02 1 4 197 33 220 698 17 143.50 25 39.58 NEW lsLREEshs...............asNst....hshct...hps.tpp..l.t..hpuhsstplpphEpth+pclhtps........pphssphhh.................................t...................................................................hc.p.thspshslpYhRlPlTDcpsPpspslDthlphlcsh.sp..................sshlhFpCptGpGRTTohMlhhsllp ........................................................................................................lsLREEshs.................ahs.Gts....hoh+phps.hps...h.........h.shstppl-...phEt........tl.+pclhtps........pphtsthhhhpp.pph..........................ph.tl..............................................................hhc.p...hs..p..th..sh.p.Y..hRlPloD.p.p....t.P.p.p.pshDthlphlcph..sp.....................................................sshhhFpCpsGpGRTohhhshhshh........................... 0 90 144 182 +14418 PF14567 SUKH_5 SMI1-KNR4 cell-wall Coggill P pcc Pfam-B_7167 (release 26.0) Domain Members of this family are related to the SMI1/KNR4-like or SUKH superfamily of proteins. 27.00 27.00 27.00 27.10 26.80 26.80 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.37 0.71 -4.70 27 120 2012-10-01 20:46:44 2012-01-04 16:35:07 1 2 109 1 35 225 9 129.90 51 92.78 NEW Mc-lI-pLpphspssslsl-LPsp-pls-lEcplhlslPt-aKcaLhpsSDllhGslEPlolsDs.oHoY..LsEsss.Ahs.hGlP+-hlPI.CpssssaYCls.....p-Gp........VhhWs..cG...s-EpWtshapWsccVWl ......................Mc-lIEpL+E.hsE.sVPV..PL.EL.P.-.-.-pLVElEEpLhIslPhpaKEFLLps..SD.V.........lY.....Gs.l.............EPVT..lo......D..Pp.SHTY...........LPEV...su..pA.W-...lG..l.PR.-lIPl.Cp.-..G..csYYCl-........pDGp.............VhlWs...-s-l..o--sW-SlWpWscDVWL...................................................................... 0 3 8 24 +14419 PF14568 SUKH_6 SMI1-KNR4 cell-wall Coggill P pcc Pfam-B_725 (releawse 26.0) Domain Members of this family are related to the SMI1/KNR4-like or SUKH superfamily of proteins. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.35 0.71 -10.69 0.71 -3.63 86 516 2012-10-01 20:46:44 2012-01-04 16:50:21 1 4 281 2 92 1001 10 112.20 18 72.41 NEW ccpIpcsEppLul.....phPpsYKpFLcpYGu.ut.......hsGh-lhu.....h.................tsshhhtshh.t.ptthpp.htl.p.h.........llhpsssGphashc.........ps.scs.lh...........h.stptp.hhussFtEaL ......................................pIpphEppLsh.....phPps.Y+pFLcp.hsu.st..................h.s.u..p.lhs.....h......................................t.sh.hhpp.h...............htp.....ph..pph...............llh..p..s..s..s..s...s..h.hshc.........ptsps..lh.............................t.....p.....h.....h....hsssht-al....................................................................................................... 0 24 55 61 +14420 PF14569 zf-UDP Zinc-binding RING-finger Coggill P pcc Jackhmmer:A9T9M4 Domain This RING/U-box type zinc-binding domain is frequently found in the catalytic subunit (irx3) of cellulose synthase. The enzymic class is EC:2.4.1.12, whereby the synthase removes the glucose from UDP-glucose and adds it to the growing cellulose, thereby releasing UDP. The domain-structure is treble-clef like (PDB:1weo). 27.00 27.00 42.80 42.10 24.80 24.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -9.87 0.72 -4.23 6 380 2012-10-03 15:03:13 2012-01-05 15:54:05 1 6 85 1 128 405 0 75.90 53 8.56 NEW PKPLpNlNuQlCQICGDDVGVTl-GElFVACsECuFPVCRPCYEYERKDGsQuCPQCKTRYRRHKGSPRVcGD--EDDsD ...............suplCpICGDpVGls.ss.G-.sFVACsECuFPVCRPCYEYER+EGsQsCPQC+..TRYK.RhK.........Gs...Ptl.GD-.-.-.-.t.................... 0 18 88 109 +14421 PF14570 zf-RING_4 RING/Ubox like zinc-binding domain Coggill P pcc Jackhmmer:A5BM39 Domain \N 27.00 27.00 27.00 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 48 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.35 0.72 -4.38 58 462 2012-10-03 15:03:13 2012-01-06 18:22:44 1 17 256 2 313 530 12 47.80 51 5.58 NEW CPlCspch..DhsDpshhPC.pCuapIChaCapclhp.....s..t.....sGpCPuCRcsYc .............CPLChE.h...DlsDt.sFhPC.sCGYQ...ICpFCapcIcp......s..t.........sGhCPuCR+sYp........... 0 101 187 261 +14422 PF14571 Di19_C Stress-induced protein Di19, C-terminal Coggill P pcc Di19_old Family C-terminal domain of Di19, a protein that increases the sensitivity of plants to environmental stress, such as salinity, drought, osmotic stress and cold. the protein is also induced by an increased supply of stress-related hormones such as abscisic acid ABA and ethylene [1]. There is a zinc-finger at the N-terminus, zf-Di19, Pfam:PF05605. 26.00 26.00 26.40 31.30 25.40 25.50 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -10.53 0.72 -3.72 49 170 2012-01-09 11:51:23 2012-01-09 11:51:23 1 4 26 0 88 170 0 98.50 30 47.24 NEW slSlLp+...-L..R.-upLQuLL.GGu......sss.s..s...uo.ssusDPLLSSFlhsh.ss.scs.p...c..ssc..s...sssst....pc.s.Stpcphs..pp..sh..p.t.s..pss.LStc-pE....EpspRucFVQsLLlSTl .......................hShLt+-Lc..-upLQsLLGGu.......tsts...s...ss...s.sssDPLLSSFlhshss.scs.p...pssp..s...sssst....ss.sshtpphs..pp.sh.p.....pss..Lopc-p-....c+spRupFVQtLlhSTl.......................... 0 14 55 73 +14423 PF14572 Pribosyl_synth Phosphoribosyl synthetase-associated domain Coggill P pcc pdb_2c4k; Jackhmmer:Q14558 Domain This family includes several examples of enzymes from class EC:2.7.6.1, phosphoribosyl-pyrophosphate transferase. 27.00 27.00 27.20 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -11.17 0.71 -4.54 11 6166 2012-10-10 14:25:38 2012-01-09 15:06:51 1 16 4467 28 1767 4367 2278 127.90 40 39.16 NEW DYRNAVIVAKsPuuA++ATSYAERLRLGlAVIHGEsK-u.EsDh.sDGRpSPPsh.cs.............sslsssht..lPhhhsKEKPPlTVVGDVGGRIAIIVDDlIDDlpSFVAAAElLK-RGAYKIYVhATHGLLSuDAPcLlE-SsIDEVVVTNTlPH-lQKhpCpKIKTVDISllluEAIRRIHNGESMuYLFR ...............................................................................................................ptsssp..............................................................................................................................................................................................s.......M....p....l.I.G.D..V..c.G+..ss..lll...........DDhIDT...uGTlsp.AAc..s......L.c.-.p.G.Ap.c.VaAhs.TH.....ulh.S...G......s.......A...h.....-....p....l....p....s....S....s....l....c....E..lVVT.........D..T..I...s........l....s..c...t.........p..p....h....s....+.l.........c.hl.o...lu...s..l..lAEAIc.Rla.pp..cSlSsLF.p........................................................ 0 573 1057 1460 +14424 PF14573 PP-binding_2 Acyl-carrier Coggill P pcc CATH:3ce7A00 Domain \N 32.00 32.00 32.00 32.00 31.90 31.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.40 0.72 -3.95 5 15 2012-10-02 01:16:24 2012-01-09 15:11:50 1 2 10 1 12 80 1 91.30 44 35.82 NEW SPVVDTDINAVTNYIVGMCQKFLQKGEKVTPSSKLEELRTREDRLWDCLDTVEFVLDVEEIFDVTVPDEVADNFQTLQEIADFVVSERAKAGKFMK ................................................lpphllGhhpKaLpcspclTssoKLE...EhRT.+-sRhWDsLDTVEFVlDVEEhFDVTIPDEsADNhcTlQEIADaVVupRt................ 0 7 8 11 +14425 PF14574 DUF4445 Domain of unknown function (DUF4445) Bateman A agb Jackhmmer:C9L8Q5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and archaea. Proteins in this family are typically between 525 and 664 amino acids in length. The family is found in association with Pfam:PF00111. 27.00 27.00 29.30 40.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 412 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.02 0.70 -6.24 215 536 2012-01-09 17:49:09 2012-01-09 17:49:09 1 11 362 2 223 535 496 400.20 34 68.91 NEW haGlAlDlGTTTlsutLlDLp...o.GchlussuthNsQhpaGsDVlSRIsaAt..p............sGhppLpptll.pslNpllsplhpp.......ss............................lshpcIhplslsGNosMpHLhLGlssptl........utsP........ahPsh..........sps..h.lpAp-l..Gl......phssputlalhPsluuaVGuDllAull.uss..ht.....p....p..cc..hs...LhlDlGTNGEllL...G.sp.cpllusSsAAGPAFEGusIspGM+AssGAI-cVpl....csss.............ph.psIG......................s........................ttP....................pGICGSGll-hlAplhcsGllcpsG+hsps...tt.............t.h.h.t.t..ttttalls...tt.......t.......t.h.......scslhloQpDI.cplthAKuAlhAGlphLlcc.sGlshs-l-clhlAGuFGsalshcsAhslGllPc.h.hp+lphlGNuuhtGAphsLls............................pptpcchpplscph..p..alEL...usp...ssF.pctFlpuhhh ............................h.aGlAlDlGTTTlsstLlDLp.........s.GcllspsuthNsQhpaGsDVlSRIsauh..p............sGhppL....ppslh.pslNpllpplhtpss............................lstppIhphslsGNosMtHLhLG.lssppLutuP........ahss..h..............pps..htlpAp-l...ul.............plps.tutlalhPsluuaVGuDhsAull.ust..h.t..................p....p..cc...hs..LhlDlGTNGEllL......u.sp....spllusSsuAGPAFEGusIssGhRAssGAI-cVplssps.................phpsIG...............s........................tts..............................................tGICGSGll-hlAphhcsGllcpsGchtps....t...............h...h...ttttphhls.t........t..th.spslhloQpDI.+plQhAKuAlhAGhphLlcp.hGlphpcl-clhlAGuFGsalshcsAhslGllPc.h.hp+lphlGNuuhsGAphsLL.s............................pp..ttpch..pclsppl..p..hlEL...usp...ssF.pctFlpuh................. 0 111 190 207 +14426 PF14575 EphA2_TM Ephrin type-A receptor 2 transmembrane domain Coggill P pcc CATH:2k9yA00 Domain Epha2_TM represents the left-handed dimer transmembrane domain of of EphA2 receptor. This domain oligomerises and is important for the active signalling process. 27.00 27.00 27.00 27.00 26.80 26.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.24 0.72 -9.87 0.72 -3.58 64 1068 2012-01-09 18:22:38 2012-01-09 18:22:38 1 47 82 50 462 762 0 77.70 37 8.58 NEW hllsu.ss.su..llhLllllh.l....h.l.hht..RRpp.hp..........+s...pp.p..s-c..h......th.pssp.....h..h...su.....l....................Ks...............YlDPaTYEDPspAV+EFA+EIDs ......................................................h.lhssss.su..lhhlls.l.l.s.h.....hl....lht....R+.....pp...hp...................................+s..pp..p.....s-c.....................ph.tpsp.........h..h.PG..h.........................................................K.s...........................Y.....lD....P....aT.YEDPNpAV+EFAKEIDs............ 0 46 86 226 +14427 PF14576 SEO_N Sieve element occlusion N-terminus Eberhardt R re3 Ruping B Family Sieve element occlusion (SEO) proteins, or forisomes, are phloem proteins which accumulate during sieve element differentiation [1]. This domain represents the N-terminus of SEO proteins. 27.00 27.00 63.20 32.00 22.90 22.40 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.82 0.70 -5.13 34 105 2012-01-10 14:54:50 2012-01-10 14:54:50 1 6 13 0 58 98 0 247.50 32 38.72 NEW t.sDsplhcplhtTHssDs.cchDVcsLhsllpsIlp+us.hs............h..tt......t..p..cp...s.hshhcs.hhslc+ISCch.sKssutp.sAH.............pTThsILphLssYoWDAKsllsLAAFALpYG-FWhLsph.hsos.LAKSlAhLKpl.s..hp.ts...+sR.ss...lssLlcshlpVhcsIhEaccLssp.Y.....spDVPuLstAhpcIPlsVYWsItolVACsupIssl.........hspp..csaELS.shspKLssIhs+L+ppLshCcppI--...hEsYppLhclFpps+p ................................................sDs.hhcplhhTHs.ss..pchDscsLhpllpsllppss..h.....................t..p......hs.hc.s..htlcpIusphhsps..utt.puH.............tTThsll..sh.LssYoWDAKsVlsLAAFAlpYGcFWhLsph..s..ss.LAKSlAhLcpl.P..hp......+s+hps...lssLl+shhpVscsIhEaccLsst..Y......spDlPsLssAhpcIPlsVYWsItolVACsspIssl................h.t......................p..pt..........a-LS.shspKls.IhpcL+ppLphChppItc...h-tat.hhphhpp.p.................................... 0 3 46 55 +14428 PF14577 SEO_C Sieve element occlusion C-terminus Eberhardt R re3 Ruping B Family Sieve element occlusion (SEO) proteins, or forisomes, are phloem proteins which accumulate during sieve element differentiation [1]. This domain represents the C-terminus of SEO proteins. 26.00 26.00 31.60 35.60 25.20 22.60 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.61 0.70 -4.99 35 115 2012-01-10 14:55:34 2012-01-10 14:55:34 1 6 13 0 69 112 0 213.00 34 34.22 NEW pscEEsLhpc.sWhh-llsc.lDPhlhpWlp-s..+YIhlYGGsDh-WIpcFTpsscslu.......psAclslEhhaVGKt............................pD.shlhaFWsclEShh.hoKh.ph.............cpscsDslhQE.lpplLSacts-pGWAlLSKG...ss....lhlpG+Gsshlpols-a-t.WKcpV.p.cGFshAFc-Yacplp...s..sccCs+l.l.Psss..GcIP-plsCP-..CuRsME.palsY+CCHc ........................s..ccctLhpc.pWhhpllhc..........lcstl.phlc-s.+aIhlYGGsDhcWIpcFTpssctlt.......psAcl.lEhhaVGKtp..........................h...pD.shlhhFWh+lEShhhSKh..ph.h...........ppspsDshhpE.lppLLsac.t.sptGWAll.o+G...ss....lhlpG+Gsshhpols-ast.WK-pl.p.cGFshAhp-ahpphp.....s...sc.Cs+h.h.ss.ts...Gplscp.lhCs-..CtRsME.pal.YpCCp................ 0 3 56 66 +14429 PF14578 GTP_EFTU_D4 Elongation factor Tu domain 4 Coggill P pcc CATH:1xe1A00 Domain Elongation factor Tu consists of several structural domains, and this is usually the fourth. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.66 0.72 -4.34 11 354 2012-10-05 12:31:09 2012-01-10 16:40:20 1 15 284 4 219 555 91 86.20 37 11.07 NEW hpssGKhpV.psaslht+-.slVG.cVlpGlIhPGYKl...KG+c.VGhIhpIp+p+KpV-FAlsGD+VAl.lEG.hh...psc-GDlLEV .........hh...Ps+l+IL..PpalFppp....DPlVlG.VcV.sGhlK.G.sPls.....pccG..h.c...l...G.h..lp.oIcp.sc.K.sV-.A+cGpcVulpIcs....................................... 0 72 122 170 +14430 PF14579 HHH_6 Helix-hairpin-helix motif Bateman A agb Jackhmmer:C9L7X9 Domain The HHH domain is a short DNA-binding domain [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.89 0.72 -3.94 364 7029 2012-10-03 02:11:09 2012-01-11 15:39:14 1 48 4452 9 1533 5939 2559 91.90 30 7.87 NEW GlpllsPDlNpSp.h.c.asl............................................pst.........s......IRhGLsslKGlGpssscpIlp.tRp...tus...Fp.sltDh..hpRs..............tls+cslEsLhpAGAh.D.sh.....u..h...p..Rt..tL ...................................................GlplhsPDlNp.St.h.c.Fpl...............................................................................................................pst.........ts.........IhhGls...Al+GlGps.sscpIlc..sRp.....p.....G................Fp.sl.Dh..tpRs..........................ttls.+.+s.l.EsLhpuGAh.D.sl....s......p.Rt.......................... 0 538 1027 1307 +14431 PF14580 LRR_9 Leucine-rich repeat Coggill P pcc CATH:1a9nC00 Repeat \N 30.50 30.50 30.50 30.50 30.40 30.40 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.49 0.71 -10.95 0.71 -4.71 6 1482 2012-10-02 21:32:02 2012-01-11 16:43:36 1 116 303 11 910 1732 20 148.50 27 32.21 NEW MVKLTAELIEQAAQYTNAVRDRELDLRGYKIPVIENLGATLDQFDAIDFSDNEIRKLDGFPLLRRLKTLLVNNNRICRIGEGLDQALPCLTELILTNNSLVELGDLDPLASLKSLTYLSILRNPVTNKKHYRLYVIYKVPQVRVLDFQKVKLKERQEAEKMFKGKRGAQLAKDIA .....................................................h....................................................................h.p.t.lt......h......p....h...c....h...L...s...ls..pN.t.l..p....p...l....p..........s...l...s.....................p............L........p...p........Lcp.L.......Ls..sNpI....s..p..l...........p..s..l.t.....t.....t..l..s...s..Lp.........p...Ls..L..s.sN..p.I...p..s..l.s.............s......l..p.......L..p..p.l...p..p.LppL..s...L......h.....s.....N...P..........l............s............p...........h............p.........p.Y....R.....hllhh.lP.p...Lc.hLD.hppl..ptp........E.+.t.A...............................t.tt.............................................. 0 313 425 643 +14432 PF14581 SseB_C SseB protein C-terminal domain Bateman A agb SseB Domain This family consists of several SseB proteins which appear to be found exclusively in Enterobacteria. SseB is known to enhance serine-sensitivity in Escherichia coli [1] and is part of the Salmonella pathogenicity island 2 (SPI-2) translocon [2].\ This presumed domain is found at the C-terminus of SseB proteins. 19.90 19.90 20.00 20.30 19.60 19.30 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.07 0.72 -4.20 72 744 2012-01-11 16:51:14 2012-01-11 16:51:14 1 7 695 0 82 441 11 108.70 51 42.24 NEW p..shp....sGsplpls...cP..pc...Psphhsulsp.hhppptsVppAalthhpp.......tsppsshhlsl-hss....chppl.hpshup.hssshhss.hslchshls.s..tsluchhhpcspPFYpR ................EGGESLlLS...EV....AE......PPuQMIDSLTT.LFK.TlKPVKRAFlCuIKE......pp-AQPNLLIGIEADG.....DIEEI.IpAsGs.VATDTLPGDEPIDICQV+.c.GE....cGISHFlT-HlsPFYER............................ 0 15 39 61 +14433 PF14582 Metallophos_3 Metallophosphoesterase, calcineurin superfamily Coggill P pcc C0ATH:1uf3A0 Domain Members of this family are part of the Calcineurin-like phosphoesterase superfamily. 28.20 28.20 28.20 28.20 27.80 28.00 hmmbuild -o /dev/null HMM SEED 255 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.62 0.70 -5.23 6 31 2012-10-02 19:15:56 2012-01-11 17:04:52 1 2 31 9 11 163 10 169.80 37 57.98 NEW M....thhscKlLAlSsh+Gch-hl-+LlclltEp.ssDAllllG-lhpspA+ucEYt............................pFFRtLuphslPsahlPGspDAPlchaLRtAaNhElVhPpl+sVHcoFAhhtG.allAGhGGElT-cucs-...thtL+YPuWEAEYpLKhL+-L+DhpKlhLFaTsP.......hcKGhccuGSpsVAcLlKTasPclVls.....uGsstcH..E.LGsoLVVsPGuLuEG-YAllDlcp+cl-hGsl ......................................................................................................................................................................................................................................................................................................................................................................................................h.L....p.....-.............h.......c..c.........ph.......I.....hl.F.Ht..PP.s.............htp..sh..u..csGS+sVtcLIpp..a.pPlls.Ls.....GHltcsp.t.h-plGs.Tl..lVNPGu.L.tp.G..-..aS..l..lshp...t............................ 0 5 9 11 +14434 PF14583 Pectate_lyase22 Oligogalacturonate lyase Coggill P pcc CATH:3c5mA00 Domain This is a family of oligogalacturonate lyases, referred to more generally as pectate lyase family 22. These proteins fold into 7-bladed beta-propellers. 30.00 30.00 30.00 30.40 29.90 29.20 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.16 0.70 -6.01 4 173 2012-10-05 17:30:43 2012-01-11 17:11:27 1 3 156 4 47 139 10 353.20 48 90.58 NEW MAKGKhlsLsFcTa.DSsTsscVsRLTPsDVlCHRNYFYQKCFTpDGpKLLFuGsFDGshNYYLLDLsTQpAsQLTEGpGDNTFGGFLSP-DcuLFYVKNt+NLMRVDLsTLEEssIYpVP--WVGYGTWVANSDCTKlVGIEIpKcDWpPLTDWKKFtEFYaTNPpCRLI+lDLcTGEupVILQENpWLGHPIYRPhDDsTVAFCHEGPHDLVDARMWhINEDGoNhRKVKpHA.GESCTHEFWlPDGSALAYVSYhKGpopRaIhpssPsTLENcplhpMPsCSHLMSNaDGoLMVGDGssAPVDVpDsuGYKIENDPFLYVhNhKstpha+lA+HsoSWcVh-GDRQVTHPHPSFTPDDKtVLFTSDs-GcPALYhAclP-pl ......................................................................MAKG..l.Lpacsh.DspTGspVsRLTPs-lhCHR.NYFYQKCF....spD..Gs+LLF.uut...F..D..G.hNY.YLLDLtoppAsQLTE......Gt.....G...DNT..F.G......GFLS.s....-.D..culaY.VK..........st...+......sL.hcV-LsT..L.........c.....Ep.slYpVs..........-cWVGY.GTWVANS...D...CT+.lVG.I...E..I....t...+p.....................DW...pPL....s....DWp.hFp-...FaappPpCRLh+lDL....c.....T..GE...s...pVIhp.-spWLG.HPIYRPaDDsTlAFCHEGPHDLVDARM..WhlN.cDGoNh..RKV..+....pH.s.t.G...ESCTHEFWlPDG....SAlsY.V.......SY.h..K.G...ppsRh...IhphsP.............p..T......h...cs..c..t..l..hpM...P.s...CS...HLMSNaDGoLhVGD..G...u..ssPVDV....p......D.ss.u...YpI...-.N...DP.aLY.l.h.sh.p.s.........tp....t...+..lu+.....Hs..o......SWpV...hcGcRQV.T........H.....PHPSFTPDs+tVLFoSDh..cGp.PAlYhsplPt............................................................ 0 7 25 36 +14435 PF14584 DUF4446 Protein of unknown function (DUF4446) Bateman A agb Jackhmmer:C9L935 Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 165 and 176 amino acids in length. 24.00 24.00 24.40 24.30 23.90 23.90 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.70 0.71 -4.38 123 292 2012-01-11 17:19:42 2012-01-11 17:19:42 1 2 289 0 74 243 26 149.80 32 86.90 NEW .shl.....llulh.llslll.hl...hhlhhhh+...hp+lp++Ycthh....cGpsup.sL....Echlhphhcclcclppptcph.ccphcplppphp....pshpKlGlV+YcAFp....-hGucLSFulAlLDspssGllloula.uR-sshsYsKslppGpSp.htLSpEEppALcpAh .............................s.hlllslh....llhllh..hl..hllhhhh+hp+Lc++YcthM....cGpssp.sL......Echl.hphhcclcclppptc.ph.ccphpplcpphp....pshpKlGlVRYsAFc....-hGucLSFulAlLDspssGllloula.uR.-p.ohsYuKsIppGpSp.hsLSpEEppuLcpAh.............. 0 44 65 69 +14436 PF14585 CagY_I CagY type 1 repeat Bateman A agb Bateman A Repeat This repeat is found at the N-terminus of the CagY proteins - part of the CAG pathogenicity island - and involved in delivery of the protein CagA into host cells ([1]). 25.80 25.80 100.40 25.80 17.60 25.30 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.27 0.72 -4.04 7 210 2012-01-12 14:30:43 2012-01-12 14:30:43 1 33 32 0 3 213 0 62.60 78 9.99 NEW ETSKKsQQcSPQDLSNEEATEANHFED..KESKESSDpHLDNsTET.KTphD-s...KopETpsphspp .........ETSKKTQQHSPQDLSNEEATEANHFEDSSKESKESSDHHLDNPTE.....T.....KTNFDE.KSEEhpsp.ss............. 0 3 3 3 +14437 PF14586 MHC_I_2 Class I Histocompatibility antigen, NKG2D ligand, domains 1 and 2 Coggill P pcc CATH:1jfmA00 Domain Members of this family are known as retinoic-acid-inducible proteins. They are ligands for the activating immunoreceptor NKG2D, which is widely expressed on natural killer cells, T cells, and macrophages. 27.00 27.00 27.40 27.20 26.80 26.80 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.17 0.71 -4.69 3 89 2012-10-03 22:02:01 2012-01-17 15:53:59 1 3 17 5 26 189 0 164.60 45 74.91 NEW DAHSLRCNLTIKAPTPAD..WhEVKChVDEILILHLSNINKTMTSGDPGETANATEVGECLTQPLNDLCQKLRDKVSNTKVDTHKTNGYPHLQVTMIYPQSQGQTPSATWEFNISDSYFFTFYTENMSWRSANDESGVIMNKWKDDGDLVQQLKYFIPpCRQKIDEFLKQSKEK ........................................sHSLshNFT.I.Ko.ho+PGp.WCEuQs.hsc.phFLpa.s.ss.N.hsh.P.L.G..l.GKK.VNATpsWs-LTQsLs-lGc-LRhhLh-lKsp.h...K.TS.Gs.s......T....L.QVpMlsQ+cstphsuASWpFs.IssphshlF...D.s...hNMoWplIN.-AscIhEpWKcD+sLtc.ah.+.h...h.s.DCsphLcEFLt.....t......................................... 0 1 1 7 +14438 PF14587 Glyco_hydr_30_2 O-Glycosyl hydrolase family 30 Coggill P pcc CATH:3clwA02 Domain \N 27.00 27.00 27.10 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 384 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.38 0.70 -5.53 4 205 2012-10-03 05:44:19 2012-01-18 13:33:11 1 21 143 6 97 244 11 287.10 27 49.98 NEW cppLslshposaQpIDsFGASDAWRsQalGKNWP.EK+ppIADLLFSpEhDppGNPKGIGLShWRFNIGuGShEpGcsuGlsspWRRsECFLot-GsYDWsKQuGQpWFh+AARERGVpphLsFS.SAPhaMT+NGpuhoo-cs.phNlppsKhcsaAcFLs-sspph.pc.GhslNYlSPlNEPQW-W.usuuQEGo.sTN--hpchVphLD+cLpcRplsTp.IslsEsGsIpYLacs.sNtpsRDN.IcshFspsuphSlhpLssVtpsVouHSYWSsaPhspLVspR+pLspclup...ush+aWtoEYC.hEp..Ns-.spGsGs.RDLGMpsALYVARlIHpDLTlANASuWQWWTAlSthsYKDGLIalDcsp.tsG.Sh.........KpDG ...............................................................................................................ht.....lp.tt.hQ.phcsaGsS.sW...hp..h.h....G...hst....t....pp....p.pls-hLFo..p.p........................p...G...l...GLolhRaNlGuG............us.t..p.....t........p............s...........s..t....l..t.........s...............h...h......p....s...p.........s.a.......h.......s...sc.........s...s.........a.........s......W...s..t...s..t..u.QRh..h.l.p.t.A.+.p..+..G.ls..p....hh...uFu.NSP...PhahTpNGt.....s...s.s.....s......s....t....s.....s.....s.............NL..cs....-p.apsFApYLusVsc....th..pp..........t.......G.l.phshlsPhNEP.s...h..s...........W.....s..........s.....s........p..........QEGs..ths.s.p.p..sphlp.h.Lspp.lpppsl..s.sp..l.h.h..s-...................................................................................................................................................................................................................................................shttst..................................................................................................... 0 34 71 91 +14439 PF14588 YjgF_endoribonc YjgF/chorismate_mutase-like, putative endoribonuclease Coggill P pcc CATH:2otmA00 Domain YjgF_Endoribonuc is a putative endoribonuclease. The structure is of beta-alpha-beta-alpha-beta(2) domains common both to bacterial chorismate mutase and to members of the YjgF family. These proteins form trimers with a three-fold symmetry with three closely-packed beta-sheets. The YjgF family is a large, widely distributed family of proteins of unknown biochemical function that are highly conserved among eubacteria, archaea and eukaryotes [1]. 35.00 35.00 37.20 36.70 34.20 34.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.80 0.71 -4.28 2 1260 2012-10-01 19:40:00 2012-01-18 13:58:18 1 6 1017 15 475 1093 1483 146.50 44 92.19 NEW EoRLhAhGL.LPcssAAlGsYEPauhhus.lhTShQhPa.ttpLha.G.LGsshosp-GhAAsRLssLNulAQLtpAsGpLupl.plhRL-GhlsspQsh.-hPhsLDuAScLL.-lhGEtGRHuRhhhsp.VMPLsu.shlhhFAEl ....................................................t+LtpLGlpL.P..t.s.....ss.P..s.A.sYVPsl...p.o....G....s.h.l.a.sSGQlP...h....h..s..G...p.....l..h.hsG..K.....l...G.......s.........-.............l....o..s...Ep..upp..A...ARh...sAlNhLAs.lc...u...........t.........l....G......s.......L..-+.lpRlV..KlsGFVsS..sssF.....s.pQ...s......tVhNGAS-LlscVFG.-.s..GcHARSAVGVusLPhsusVEl.Eh................... 0 150 300 384 +14440 PF14589 NrfD_2 Polysulfide reductase Coggill P pcc CATH:2vpzC00, Pfam-B_200168 Domain Bacterial polysulfide reductase is an integral membrane protein complex responsible for quinone-coupled reduction of polysulfide, a process important in extreme environments such as deep-sea vents and hot springs. Polysulfides are a class of compounds composed of chains of sulfur atoms, which in their simplest form are present as an anion with general formula Sn(2-). In nature, polysulfides are found in particularly high concentrations in extreme volcanic or geothermically active environments. Here, the reduction and oxidation of polysulfides are vital processes for many bacteria and are essential steps in the global sulfur cycle. In particular, the reduction of polysulfide to hydrogen sulfide in these environments is usually linked to energy-generating respiratory processes, supporting growth of many microorganisms, particularly hyperthermophiles. 27.00 27.00 28.90 28.60 26.60 26.10 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.74 0.70 -4.72 3 4 2012-10-01 19:35:38 2012-01-18 15:02:00 1 1 4 8 2 10 1 254.00 66 99.22 NEW TEFYGLPNApEFWHWTNLLHFlLVGLAGGsAFLTALLHLKGcAEARRYTLaALuLIALDLFlLWAESPARFRFTHVWLFLSFHPTSPIWWGAWGLALSFLouGLLYLGKGPuRsLAWuLLlFSLVALAYPGhALAVNLNRPLWNuLLAGLFPLTALVLALGVAVLLKSuWALaP.LRVLAGASLhLAhLYPlTLs...sEAR..GHLWEEGGhhYGLFLLLG....LGAFhpERLAPWAGhLAAAG...LRALLVtsGQWQGL..G ......TEFYGLPNAtE.FWHWTNhLHFlLVGLAGGsAhLsALLHL+G..psE..ARRYTLh..A..LuLIALDLFlLWAESPARFRFTHVWLFLSFHPsSPIWWGAWGLA.LuFLouGLLYL..G.KG...P......pRhLAW......uLLlFSLVALuYPGhALAVNLNRPLW...Nu..L.hA.GLFPLTALVLALGlAsLL+SsWA...LaP.LRl.LA....GASLhLAhL.YPhTLs...sEAR..tHLhEE..GGhhYGLFLLL.G....LGsFhpERhAPWAGhLAAAG...LRALLVhsGQWQGLG......................................................................................................... 0 0 1 2 +14441 PF14590 DUF4447 Domain of unknown function (DUF4447) Eberhardt R re3 Jackhmmer:Q8EAP9 Family This family of proteins is found in bacteria. Proteins in this family are approximately 170 amino acids in length. 27.00 27.00 314.00 313.80 24.80 24.40 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.13 0.71 -4.66 5 28 2012-01-18 16:17:44 2012-01-18 16:17:44 1 1 28 4 13 19 1 166.00 82 99.49 NEW MSKNlGLNAIEMQYLRpSLGLTsAQVAplTKsSE-DVlAWEAGEppAPtLAQKKLLEIDEIIEMQVLNToDGIEELFKKEPKRRLAFVVYPTQAlYTQYNPEFLSSLPLTELYNTAAWRIKKECKLVLEVDVoLVPLDVEuYKAYREcNGLuESRESRAKWAATQL MSKNIGLNAIEMSYLRQSLSLSsAQVGpLTsHSEADVLAWEuGEpsAPELAQKKLL-IDDIIEMQVLNTCDGIEELFKKEPKR+LAFVVYPTQAlYTQYNPEFLSSLPLTELYNTAAWRIKKECKLVLEVDVSLVsLDVEAYKAaREpsGhSESRESRAKWAATQL 0 1 2 8 +14442 PF14591 AF0941-like NTP_transf_5; AF0941-like Coggill P pcc CATH:1yozA00 Domain Members of this family are of unknown function. 22.60 22.60 22.60 23.50 22.00 22.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.74 0.71 -4.08 6 11 2012-01-18 17:29:05 2012-01-18 17:29:05 1 1 11 2 9 11 0 107.20 28 84.46 NEW hpsTal-KltELh+.....t.hl.-stcclEclscpLh........scEsItEh..F+pDhE.IlchapsGchocEEAhppLp.Lc.hA.spLppah.clt-lLcchEtchpchltch.c.........ls..l.Y.hcp....lcpstcE ....ohl-KLtELlp.....pplIsDlp-cL-EIhcpls........scEpIcEh..F+sDhc-llc-hpuG-I--EEApcllc.l....................................................................................... 0 1 6 9 +14443 PF14592 Chondroitinas_B Chondroitinase B Eberhardt R re3 Jackhmmer:Q46079 Family This family includes chondroitinases. These enzymes cleave the glycosaminoglycan dermatan sulfate [1]. 24.00 24.00 24.20 24.00 23.90 23.70 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.67 0.70 -12.61 0.70 -5.66 6 114 2012-10-02 14:50:22 2012-01-19 10:54:16 1 13 78 4 66 132 31 320.70 28 50.89 NEW VSo.sELtcAlspsKsGspIlLKsGlaKDVQIKFpGcGTK-pPIsL+AETsGKVFIEGsSpLEluGcYLlVcGLaFKNGaoPspsVIuF+su.cs....luNas+VTNCVIp-FspssR-psspWVp..thhG+HNclspChlsGKsNhGPTlRls......lcGsp.ultNYHpIVpNHFGPRPpKGGspGETIQlGsSaoSMoPu+ThlsNNLF-cCNGEVEIISSKoNFNla+NNlFacSEGSlVhRHGNYsslDGNaFIGssssEsYGGIRlINTGHWlsNNYFYtlKGppFRSslAlMNGIPKSPLNRYNQVTDVVVAYNTalDss.SPaQFGVGpNlsppDVLPcSEIRSARPhRhplsNNllaNcc.scphPllcp...Dcs...sFKsNhlssp.ss.......Lhsp.ctshTclutNhps.ps.....sshK.DlEsh.pGFDF- ......................................tplppAl..p.sspsGDpIlLtsGsa....s...s...h.p...lhh.p.s.pGTpspPIslpAps..sG.pVhl.s....G.....pu.......p.....lpls...........G..p............alhlpGLhF.p.suh.....ss.t..tth.hthp.s..........husps+lT..pssh....t..a....s.................tp.....t.......t...........al..............hhGppspl-ps.h.s...K..pshGsh.lhl...................tstt...s.tp..hc..p.I....cpNaF...t........p..........h........u....u........N.uu....Eslpl.Gh..S..t.u..h.s...u.shlppNhFcpss..GE.sEllS.KSstNhhptNshhpspGslshRHGp.shlpsNhalGst.......................hGGlRlhspsphlhsNYh.sh..........p..G........h........t........h..h..ushs..........hps....................s.t....s.........s.ta...s.p.s.l..Nshlsst....h.hs...................................................................................................................................h........................................................................................................................................................................... 0 19 53 64 +14444 PF14593 PH_3 PH domain Eberhardt R re3 Jackhmmer:O15530 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.21 0.72 -4.14 31 202 2012-10-04 00:02:25 2012-01-19 14:29:39 1 8 127 7 132 503 8 102.20 47 18.57 NEW shWcpFlp...sclIlKpGhlpKR..+GLF..uR+RhLLLT-u.....P+LhYlDPspMhhK.GEIPWo...ppLpsEsKNhKpFalHTP.....sRsYYLpD.......scupAhcWscsIpclppphhp ........................................WcpFl-....splILKhG.VcKR.....KGLF.......uR+RpLLLT...-G................P.....+L.......hY.V.DP.s...s......h..l.l..K......G..E..I.PWS.........p.-.L..+....s...........E......s.......KN.....F.......K......p........FaVHTP...........sRTY.a.L...D............sp.u.p..A.hc...Ws+tIp-lhc....p........................................ 0 58 76 109 +14445 PF14594 Sipho_Gp37 Siphovirus ReqiPepy6 Gp37-like protein Bateman A agb Jackhmmer:C9LCM0 Family This family includes numerous phage proteins from Siphoviruses. The function of this protein is uncertain, but it is related to Pfam:PF06605. In Rhodococcus phage ReqiPepy6 this protein is called Gp37 [1]. 26.10 26.10 26.30 29.60 25.90 26.00 hmmbuild -o /dev/null HMM SEED 335 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.90 0.70 -5.20 43 160 2012-01-19 16:55:40 2012-01-19 16:55:40 1 2 128 0 25 151 20 326.20 24 86.66 NEW shphlG.......................l-sap..SLpasc+a.tssGsFE...lclshsppp.................hphL.ppsshlhh...pstp...p...........ssllp..phphsp-p...tpsplsVpGhs.hpshlscRlshss..........h.......hhsussEslhcphlspsshssspssRtIs...sLt..lussts...tupplshp.spa.csLh-tlppl......spssslGaclh.........hc.p..............p..pch.lF-lhpGpDh.......sssllF.....StcasNlpstpatpos.sapNssh.VuGpGEGp.sRthsp............l....us.....s..p...GhcRcEhalDA+-ls.................................phhspLpp+GppcLs.Ettthtshcsplpssst......hhYtpDasL.GDhVolpscth..GlphsspIoEl.........p-ha-..psGh.plpssFGs ....................................p.hu..l-.apSlphpcpa.tssusFE...lhhshptt..................hchl.ptsshlhht..ss..............sullp..thplpps-...ttphlslpGhs.hpuhlscRIlhs...........h.......phoGpsEslhcphlppphh.s...tRpls...tLh..lsssts...ps.tsl.p.hp..spa.pslh-tlppl......spssshGaclh.........hs.p....................................p...tth.hhchhpu.pDh.......ssslhF.....St-ac.Nlhstpapcst.shpsssl.luGcGEGp.sRphsp.............................l................ss.....h..s.........GlcRhEhalDA+-lp.................................thhspLpp+Gpc+Ls..Ettph.hshpsplp.ps.......hhYttDacL.GDhVshtsp.h....Ghphsshlspl.........ppphc..psGh..plpspaGp........................ 0 10 23 25 +14446 PF14595 Thioredoxin_9 Thioredoxin Eberhardt R re3 Jackhmmer:Q9I4A4 Family \N 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.63 0.71 -4.58 80 573 2012-10-03 14:45:55 2012-01-20 08:05:38 1 3 547 1 117 2556 1082 127.40 35 70.61 NEW Ghshs-alp.........phsp..........spc.chpclhcphplsp-tpp...hhpthspsh.phLllsEsWCGDuhhslPllp+lu.chs..slcl+llhRDcsh-lh....-paLT..ssu+uIPhhlhhD..........ps.hp.l.shWGPRPptlpc.hh..........sph+ ...............................................................shspalsthpt..........Nppph.h..plYps....ap...lsp....-..p....hhch..h..p.t....s..h..+.l.L.V..l..o...E...sWCGDAhhsl.....Pll....c....+I.u...E....s....t..........Nl...-...l...+l..h....t..R...D......-..s.p..c..L.h..............DQYLT....sspu.....R.....u......IPl.F.lFls...........cp...hcp..ss.W....GP....R.ssclpc.hl.pph.t................................................. 0 40 85 105 +14447 PF14596 STAT6_C STAT6 C-terminal Eberhardt R re3 Jackhmmer:P42226 Family This family represents the C-terminus of mammalian STAT6 (Signal transducer and activator of transcription 6), it contains an LXXLL motif which binds to NCOA1 (Nuclear receptor coactivator 1) [1]. 25.00 25.00 26.20 25.30 24.00 21.90 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.35 0.71 -4.37 2 54 2012-01-20 10:05:42 2012-01-20 10:05:42 1 5 26 1 17 45 0 168.10 78 23.30 NEW PLPTPE.phPTMVPSYDLGMAsDoSM..QLusDMs...YPPpSHSI.saQsLs.EESlsVLsAFQE.PHLQMPPshuQMSLPFDQPHPQGLL.CQsQEHAVSSP-PLLCsDVsMsEDSCLoQPVsuFPQuTWlG..........EDhFPPLLPPTEQDLTKLLLEGQGEuGGGSLGAQPLLQPS.YGQsGISMSHhDLRsNPSW ....PLPTPE.QMPTMVPSYDLGMAPDS..SMsMQLuPDMVs..QVYPP..HSHSIss.YQuLs.EESVsVLsAFQE.PH.....LQ...MPP.....sLuQhoLPFDQPHPQGLLPCQPQEHAVSSP-PLLCSDVTMsEDSCLoQPVsuFPQGTWlG..........EDhFPPLLPPTEQDLTKLLLEGQGEoGGGSLGsQPLLQPSpYGQSGIS.MSHhDLRsNPSW...................... 0 1 1 2 +14448 PF14597 Lactamase_B_5 Metallo-beta-lactamase superfamily Coggill P pcc CATH:2p97A00 Domain This is a small family of putative metal-dependent hydrolases. 28.40 28.40 28.80 28.70 28.30 28.00 hmmbuild -o /dev/null HMM SEED 199 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.30 0.71 -4.92 4 14 2012-10-02 15:46:01 2012-01-20 10:58:09 1 2 14 2 5 21 24 190.30 39 92.85 NEW MKSLHRPDLYSWSsFNPARNIDFNGFAWIRPEGNILIDPVALSNHDWpHLESLGGVVWIVLTNSDHVRSAKEIADQTYAKIAGPVAEKEpFPIhCDRWLSDGDELVPGLKVlELpGSKTPGELALLLEETTLITGDLVRAa+AGuLplLPDEKLhN+pcsVASVRRLAuLEKVEAVLVGDGWSVFRDGRDRLcELVATL .........................................................................RPDLasWShFs.t+NlDFNuhhhhRP-GNlLIDPlsLSscDhp+LpulGGlsaIVLT..NsDHlRuAcchAcphtA+lhuP.......su-.......c....cph.....PlssD+hLsDG-..pllsGlpVlpLp..GpKTPGElALL.L.--..s.sL.IoGD.Llhuh.uGsLshLPDcKhhshpcshtSl+RLAth..ch-slLsGDGWslh...pcupttLtpLh.................... 0 0 2 5 +14449 PF14598 PAS_11 PAS domain Eberhardt R re3 Jackhmmer:P70365 Domain This family includes the PAS-B domain of NCOA1 (Nuclear receptor coactivator 1), which binds to an LXXLL motif in the C-terminal region of STAT6 (Signal transducer and activator of transcription 6) [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.09 0.72 -4.11 22 2159 2012-10-04 01:10:46 2012-01-20 11:08:31 1 50 589 39 703 2822 16 106.50 29 13.98 NEW p-pFoo+hshsGKIlplDpsslphsh.tahp.c-.lhspshaphhH.pDhp...hpp+h+phhp..........pGp.s..tSsh..YRlphpss..salhlpTpuplhp..sppssp.phlhssppllsEpps .......................................h..pFhoRHsh..s..sphtalDp.+...s.............s..shl..GYhP..p-..L.l.G..p.s...hh.c..a.aHscDhp....lp..c.sac..p..lhp....................................pGp..s.....po..s.........Y..Rhhs+sG..salh.lc...............Tphpshh......Nsh.op..c.......c.a.llspppll.....t............................................... 0 147 210 422 +14450 PF14599 zinc_ribbon_6 Zinc-ribbon Coggill P pcc CATH:2k2dA00 Domain This is a typical zinc-ribbon finger, with each pair of zinc-ligands coming from more-or-less either side of two knuckles. It is found in eukaryotes. 27.00 27.00 28.00 28.00 26.30 26.20 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.45 0.72 -4.18 75 401 2012-01-23 11:24:43 2012-01-23 11:24:43 1 18 217 1 260 390 11 59.80 44 12.44 NEW lsDMsphachLDpEItspP...MPppYpsphshIlCNDCss.pSpVpaHhlGhKCsp..C...sSYNTpp .............h.hDMsthactLDtpluspP........MPp-Ypsp..pshILCNDCs..........s.cS.p.V.p....FHhLGh.KCtt..CtSYNTpp............... 0 67 148 216 +14451 PF14600 CBM_5_12_2 Cellulose-binding domain Coggill P pcc CATH:1aiwA00 Domain This C-terminal domain belongs to the CAZy family of carbohydrate-binding domains that are associated with glycosyl-hydrolases. It is suggested to bind cellulose. 27.00 27.00 27.30 28.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.12 0.72 -4.34 5 106 2012-10-03 03:17:01 2012-01-23 14:37:49 1 26 74 1 33 110 1 59.30 49 6.53 NEW TssCANsNVYPNWVSKDWAGGQPTHNEAGQSIVYKGNLYTANWYToSVPGSDSSWTLVGSCN .......................sss.lN...sYPsWsppDatu.ssoHAssGDpMla..puslYpANWWTsShPGSDuSWohlh...s.... 0 2 22 27 +14452 PF14601 TFX_C DNA_binding protein, TFX, C-term Coggill P pcc CATH:1nr3A00 Domain This is the C-terminal region of TFX-like DNA-binding proteins. 27.00 27.00 30.10 29.60 22.70 19.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.72 0.72 -9.49 0.72 -4.19 28 73 2012-01-23 14:39:50 2012-01-23 14:39:50 1 3 69 1 53 84 0 82.70 30 56.08 NEW ARETLpFhpsLsAPlplslcpGTDla-lPchlaspuDcsGlKVpasoh-Lhphlp-sAs-tlcsRhl+cchhlhlspcG-lplp ..A+pTLthacplpAPlplplctGTDla-lPchlaccuDctGlKVpYsohplhphlp-pAsphlcsRhlccshhlhlspsGclpl......................... 0 9 29 42 +14453 PF14602 Hexapep_2 Hexapeptide repeat of succinyl-transferase Coggill P pcc CATH:2rijA03 Repeat \N 27.00 11.00 27.00 11.00 26.90 10.90 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -7.50 0.72 -4.39 86 8656 2012-10-02 11:29:45 2012-01-23 15:54:45 1 152 4074 116 1925 24566 14688 33.80 33 14.21 NEW sltIGcsChlGAN.us..l..G..lslGDssllsAGlhlos .......................................lhItcss.h..I....G......u......p..uh......l..............G..............l......p.....lG-......ss.ll.u......sGshls....................................... 0 584 1174 1585 +14454 PF14603 hSH3 Helically-extended SH3 domain Coggill P pcc CATH:1ri9A00 Domain This domain is the 70 C-terminal residues of ADAP - Adhesion and de-granulation promoting adapter protein. It shows homology to SH3 domains; however, conserved residues of the fold are absent. It thus represents an altered SH3 domain fold. An N-terminal, amphipathic, helix makes extensive contacts to residues of the regular SH3 domain fold thereby creating a composite surface with unusual surface properties. The domain can no longer bind conventional proline-rich peptides [1]. There are key phosphorylation sites within the two hSH3 domains and it would appear that binding at these sites does not materially affect the folding of these regions although the equilibrium towards the unfolded state may be slightly altered [2]. The binding partners of the hSH3 domains are still unknown [2]. 25.70 25.70 25.90 25.80 25.20 25.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.95 0.72 -3.97 3 112 2012-10-02 18:48:24 2012-01-24 09:38:00 1 4 35 1 63 125 0 81.50 52 13.02 NEW FRKKFKYDGEIRVLYSTKVTTSITSKKWGTRDLQVKPGESLEVIQTTDDTKVLCRNEEGKYGYVLRSYLADNDGEIYDDIADGCIYDND .....FRKKFKa..-GEIpVlhpshlsss.hso++hGs+DLsI+sGEtLEVIp..hT-..c..sclLCRNpcGKYGYV.RotLh..-...s-lYDDl.................................................... 0 4 9 20 +14455 PF14604 SH3_9 Variant SH3 domain Coggill P pcc Jackhmmer, JCSG:target_422527 Domain \N 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.82 0.72 -8.26 0.72 -4.38 37 9839 2012-10-02 18:48:24 2012-01-24 16:52:14 1 624 319 119 5343 17510 50 51.00 31 8.53 NEW Alasap.sps..p.s..E..LplptG-hlhl..h..pp.....t..p...ssW.hh.u..p....h..sG..ppGhhPssYlp ......................Alasap..u....ps........s...c.......E......Lsh.p.t...G.-..l.ltl...l...pp...................s...-.....cGW..ap.G....p..................h.....sG........pp..G..h.FPusYVp............ 0 1250 1872 3407 +14456 PF14605 Nup35_RRM_2 Nup53/35/40-type RNA recognition motif Coggill P pcc Jackhmmer, JCSG:target_422743 Domain \N 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.33 0.72 -4.19 21 98 2012-10-02 20:46:34 2012-01-24 17:02:17 1 12 56 0 66 283 2 55.80 26 9.42 NEW spWlsVoGat..hcpt....thVhc.....aFp.s.hGpIlc...pphs................tsshhYlpYtsphssptAL ..................phlsVsG..as..scpt.....phlLp.....HFp.s.hGcIsc........hpls.........................tppshhhlpatschsA-pAl..................................... 0 23 29 47 +14457 PF14606 Lipase_GDSL_3 GDSL-like Lipase/Acylhydrolase family Coggill P pcc Jackhmmer, JCSG:target_416889 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -11.03 0.71 -4.54 20 244 2012-10-02 11:02:24 2012-01-24 17:22:51 1 7 172 2 67 573 133 161.20 31 41.20 NEW cK.PlVhYGTSIsQGAsASRPGMuaTsILuR+hstsllNLGFSGNu+hEsEls-llu-l.DA.ulallDslPN..........h....ospplpcRhtshV+tLRstHPcTPIlllEchh.hsps.hhcpptpcchpppspslpcsacp.LppcGsKslYalsucsh.lGpDuEuolDGsHPoDLGhhRaActaptll+ .................................................................................p.hlhYGoSIo.QGus....A..o+....P......u.....h.sasslhu......R.....p.....h.......s............h.....c.......l.......l.......NLG.....FSG...s..u....h....L........-.......................h..A...c.h.....l..s....-.....h...c.....A...clhs.....l....-.hhsN...................h....ssc...thppphpsFlcplRptH..P....sT.Pll...llps.hh..h.tt.....h..pt........................t.t......t....tt.p..t..hhtphhtt....h...t......t....t..sl..h....h..l...t...t...t...p......h.....s.......s............t....h......h.....s.....D....h.h.H.sshGh.hhup.h............................................................................ 0 27 58 67 +14458 PF14607 GxDLY N-terminus of Esterase_SGNH_hydro-type Coggill P pcc Jackhmmer, JCSG_target_416889 Domain This domain lies upstream of SGNH hydrolase, but its function is not known. There is a highly conserved GxDLY sequence-motif. 27.00 27.00 51.80 44.00 25.90 24.50 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.84 0.71 -4.73 44 142 2012-01-24 17:32:33 2012-01-24 17:32:33 1 6 85 0 28 136 91 147.60 34 36.76 NEW sphpahsstph...sltG+uh.ssp..shYpRLPsptpst...lptsVhsLuppSAGlslpFpTsSspIps+aplsss.hshspMsssussGlDLYsts..sGpWpasus.up.sht....s.stshlhpshss......pt+EahLYLPLYsslpsLEIGlspsuplp ...........t.phpahsspph....slhG+uhtsp...shYcRlPsshpsh....sptslhpLu+sSAGlAlpF+oNSspIpl+apltss..hphsHMsssuhpGlDLYshp..sGpWpasss.up.sst....s..spshlhpshps......pt+EahLYLPLYsslpoLcIGVsssApl.p................. 0 16 27 28 +14459 PF14608 zf-CCCH_2 Zinc finger C-x8-C-x5-C-x3-H type Wood V, Coggill P pcc Pfam-B_880 (release 26.0) Domain This is a zinc-finger of the type C-x8-C-x5-C-x3-H. 27.00 8.00 27.00 8.00 26.90 7.90 hmmbuild -o /dev/null HMM SEED 19 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -6.79 0.73 -6.70 0.73 -3.78 44 1117 2012-10-01 21:35:20 2012-01-26 16:52:10 1 44 235 0 664 1211 11 18.40 40 10.59 NEW .C+.hhss.Cp.s.cChatHP ..........C+.ah..ss..Cpp.s..cChFtHP.. 0 185 320 526 +14460 PF14609 GCP5-Mod21 gamma-Tubulin ring complex non-core subunit mod21 Wood V, Coggill P pcc Pfam-B_276835 (release 26.0) Family GCP5-Mod21 is a non-core subunit of the larger gamma-tubulin ring complex that effects microtubule nucleation from both centrosomal and non-centrosomal sites. This subunit, unlike GCP2 and and GCP3 and others, is not thought to be essential for viability in the fission yeast, and may not be expressed in very high concentrations. Fission yeast can form a large gamma-Tubulin complex C similar to that found in higher eukaryotes and this complex is important for maintaining normal levels of microtubule nucleation in vivo [1]. 27.00 27.00 28.80 27.90 26.60 26.70 hmmbuild -o /dev/null HMM SEED 653 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.02 0.70 -13.14 0.70 -6.41 4 5 2012-10-02 13:20:28 2012-01-31 09:18:48 1 2 3 0 4 28 0 519.00 25 90.96 NEW VhsplEuls-K.phpshps.ANshcs+h--.lDR..VpsustsS.LHhpILHhLLEhSssPocss..KpPs.KclNsslEpEA.hspDsp.hpp.spGD+WD..ls-WSp......VTcsEEoEp.ss-NEchs-susaPcIPs..+hcNapp.chhhlFccpuQplhD+h.EKc..........lhRIuEQshhtE.oIhhLhG..............plLchlP....h.csLph.Ippo.LhchpsaG+sh-lhN.QVAsp+pCphahsDss...........DpEKpEsFls.hhlhQphTPphpuhlK....cLL.L.TpVRKht..Ihp...LptsspH.pKF......FR.AalchuppVhDhLpohhFIahpRhpN...........SLhhuthhpphYtEuh.Is.lhhpllsls+.TssasIuo.hLsILhpp..s.hpthcSlL.-aS..FLhKcCh..........ASQt.sFhhlV.sas.phsNhpp-pEshIcpslshpss..lsshc.uLsL.a-l.............ppaosRF.sL.pt.-EsacpLspKhFuKhlhtc.salsT.+salhp.ppcFTc.-plu.F.GVats..p-lssppVhtE.EK.....hlLKp+pKpLhsFhhsRstsL.su-hsshHK-.LtslM-......NsYKTplup.ID.pRhV....Dsp.shhlupluDllhc.up.lhscshpa+SsLs-tIhpshs ....Vh-pIEulsDKhphpshps.ANshcs+h--.lDR..VpsustsS.LHhpILHhLLEhSssPo......css.........KpPs.KclNsslEpEA.hs.pD.s.p.hp..p..spGD+..WD...ls-W...Sp...........VTc..s..EEoEp..s...s-...NEchs-su.sa..P.c.IPs..+hcNa.pp.ch.h......hlFc..c..puQplhD+h..EKc..........lhR.....Iu.....EQshhtE.oIhhLhG..............pl..LchlP....h.csLph.Ippo.LhchpsaG+sh-lhN.QVAsp+p.....C.....phahsDss...........DpEKpEs.Fls.hhlhQphTPphpu..hlK....cLL.L.TpVRKht..Ihp...LptsspH.pKF......FR.AalchuppVhDhLpohhFIahpRhpN...........SLhhuthhpphYtEuh.Is.lhhpllsls+.TssasIuo.hLsILhpp..s.hpthcSlL.-aS..FLhKcCh..........ASQt.sFhhlV.sas.phsNhpp-pEshIcpslshpss..lsshc.uLsL.a-l.............ppaosRF.sL.pt.-EsacpLspKhFuKhlhtc.salsT.+salhp.ppcFTc.-plu.F.GVats..p-lssppVhtE.EK.....hlLKp+pKpLhsFhhsRstsL.su-hsshHK-.LtslM-......NsYKTplup.ID.pRhV....Dsp.shhlupluDllhc.up.lhscshpa+SsLs-tIhpshs........................................................................ 0 2 2 4 +14461 PF14610 DUF4448 GPI-anchored_2; Protein of unknown function (DUF4448) Wood V, Coggill P pcc Pfam-B_5686 (release 26.0) Family This is a family of predicted membrane glycoproteins from fungi. However there appears, visually, to be some similarity with the family of GPI-anchored fungal proteins, Pfam:PF10342. 27.00 27.00 27.10 27.30 26.90 26.80 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.44 0.71 -4.81 53 151 2012-10-04 12:14:07 2012-02-01 17:11:59 1 2 119 0 114 146 0 188.40 21 49.05 NEW ssstsusss...hhtCsspph..........ttPFCtPccssclhsspTYa.lTWsspaF............tp..VplplsYssssstpt..........................................................sha...sS-h..lsNspGaaslplcppWL........psp.h.........sslolp..thsstssh..s.h..........pGPpVtltsp.s....ph.s...t...pt..........................hlhlulPsslsh..hhlhhhhhhhh.p+cpRclu ............................t.ssh.tCpspph............tP.FCtPpcsspltsspTYa......lTW.spaF..........ssss..Vpltlsahppstspt........................................................tha...soch....lssst.GhhslplpppWl...........psp...........sslol.p.h.thss.ssh..ssh...........................pGsplhhssp.sh...thts...t..st...................................hlhlulPlslss..hhlhhhhhhhh.p+.cpRch................................ 0 30 66 99 +14462 PF14611 SLS Mitochondrial inner-membrane-bound regulator Wood V, Coggill P pcc Pfam-B_1679 (release 26.00 Family SLS is a fungal domain found bound to the mitochondrial inner-membrane [1]. It reacts physically with fungal Kar2p to promote translocation across the endoplasmic-reticulum membrane. This action appeared to be mediated via the promotion of the Sec63p-mediated activation of Kar2p's ATPase activity. This indicates that the Sls1p protein is a GrpE-like protein in the endoplasmic reticulum. In S.cerevisiae the SLS1 gene (ScSLS1) is not essential but is also involved in ERAD and folding [2,3]. 33.70 33.70 33.80 34.20 33.60 33.50 hmmbuild -o /dev/null HMM SEED 210 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.24 0.70 -11.20 0.70 -4.91 34 140 2012-02-02 14:41:00 2012-02-02 14:41:00 1 3 113 0 84 142 0 207.30 21 25.97 NEW sKptLsc+Ilc-sWpLslpscl........Gplplplp......sp.hslLhhspphhlppl.....hpppss+Iclppphs.hlcIouscpssphlpptlpchhsclcppplsls.h..h..t.pt...........hpsthLphlpphtpshhchssssp.hhhhhht................tsppps-pscRhLhhAl.s.pspsppshhss.hs..tst.h.h.sh..........pslsWhsRp.cpWhRWt ...sKttluttIlcchWpLplpcp..lp............Gch.....lplp.................st.htLLhsp....sp.h.Lcsl..........................st.tt+lsspppps.tlpIpu.scushphlhpplsclhpslpopplslp.h.....t.pt...............p.....hs.pthL.splsphosshhcpsss..sst...lp..lpaht..................tsccss-hshRhLhhAh.s.tsp.ss.p.phhsphhs.............pstpuphhshss........................cshsWh-+..+pWhRah...................... 0 13 41 71 +14463 PF14612 Ino80_Iec3 IEC3 subunit of the Ino80 complex, chromatin re-modelling Wood V, Coggill P pcc Pfam-B_3771 (release 26.0) Family This is a family of fungal chromatin re-modelling proteins found in one of the chromatin-central complexes, Ino80. The function was identified in Schizosaccharomyces pombe but there is no orthologue in S. cerevisiae. 27.00 27.00 39.30 36.80 20.50 19.80 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.57 0.70 -4.30 22 99 2012-02-03 15:55:25 2012-02-03 15:55:25 1 3 72 0 83 95 0 161.80 30 59.05 NEW Y+SaKKKYtKh+IpF-.tM+-S-uLh+EEh+hp-huKRlpEQNDQLL-lLLEhNsSh+lPschRac...Luh..Pss..s.hhss..t...s...s.thhpt.Lp.u+sph.sGphp.pthp.lttshhpspsh.sPsh.phssLlp.VPHos.sstppp.....sDh-h.......sp.....shGFLoPEc-sEYhhshDu+lus.s...t.tphsp......pPshu.............p.-R-hslRNPsSVYNWLR+ppPplFLQDsE .....................................................................................+Sa++KatK.+lhF-.tM+csptLh+p-.+h.shs+Rlt.pND...............pLL-hLL-hNpo.pls.phRhslsh..ssp.............................................................ts.h.phtpL.p.hPH.t.............p.p...................t..........s.sahss-c..pYhhthD.ths.................................t...t........................................ppph.l+NPsSVhNWLR+ptPp.hFLpDt-........................... 0 15 41 68 +14464 PF14613 DUF4449 Protein of unknown function (DUF4449) Wood V, Coggill P pcc Pfam-B_1378 (release 26.0) Family This is a fungal DUF of unknown function. 27.00 27.00 27.10 27.10 26.80 26.70 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.83 0.71 -4.32 28 110 2012-02-03 16:32:29 2012-02-03 16:32:29 1 2 90 0 89 117 0 152.00 33 17.97 NEW sVclcsLcIKlKKSpHKlLF.slFKPlLhpl..lRPulpKslEppIR-shp+sDuhhacl+pEAcRut-tucp..DPpp.ssNIYs+YhsAhpcchtpt+.cKAptt...ss....DpKlphAhTpc-SlFscIpLPGG..ISoKATEY+-........LAc+G-+.WESPlFoIGsAucSsslPp .....VclcsLclKl+cSpHKlLF.slFKPlhhpl..lRsslpKslEctI+sshpphD.t.hhapl+pEAc+A...tctucp..sPpp....t.shascYhsAhppphhpt+.pcupth...ht....................-pclphshopccSlF.plpLPGG.....lSsKATEYc-........LAt+.....G-+...WcSPlFuIGpAtpSpslP......................................................................... 0 35 55 76 +14465 PF14614 DUF4450 Domain of unknown function (DUF4450) Coggill P pcc JCSG:Target_393004-GS13576A Family This is a family of bacterial proteins of unknown function. 27.00 27.00 30.00 27.50 26.80 25.90 hmmbuild -o /dev/null HMM SEED 211 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.54 0.70 -11.43 0.70 -5.19 8 86 2012-02-06 10:27:07 2012-02-06 10:27:07 1 3 47 4 14 68 0 154.60 27 20.87 NEW MPshuGohRhGlssGscSpWlc-.hpKhcupa.hssp.hhhhps.......plplslhuLuDocGFIlElcucclP.-slsLhWuFGGssut....hscDsDIsu-st.......t.ChcNlaolcpssFTlhY.G.cuh......pL+s.......lsGlhPsto-IRLuDuctpsoPLpLapStKKossPVluu+hslsspp.........shYFshYp.psu+A-Ysha.hLspLFpKpcp ......................uGshphtl.pus.p.o.hhLcs.tpphcupahst+h.Y.l.c..h.ttGplpltshAhsDscGhIhchpsp....shs.c.ushLhWtaGsspst..phspsuDhss..p.......................................................................................................................................................................p....................................................... 0 6 14 14 +14466 PF14615 Rsa3 Ribosome-assembly protein 3 Wood V, Coggill P pcc Pfam-B_11864 (release 26.0) Family This is a family of 60S ribosome-assembly proteins, from fungi. 27.00 27.00 27.90 28.30 25.70 26.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.42 0.72 -8.10 0.72 -4.67 29 100 2012-02-06 14:52:56 2012-02-06 14:52:56 1 3 96 0 77 91 0 47.10 37 24.20 NEW phpshYLphhsppFu-DL-pLR.pusDF.sspoLshLsps.LcpGsshFs ..phpshYLphhsppFu-DL-clR.pssDF.ss...coLslLscu.LppGsshFs.. 0 28 52 71 +14467 PF14616 DUF4451 Domain of unknown function (DUF4451) Wood V, Coggill P pcc Pfam-B_5126 (release 26.0) Family This is family of fungal proteins up-regulated during meiosis. 27.00 27.00 27.60 28.90 26.80 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.53 0.71 -11.22 0.71 -4.37 35 127 2012-02-06 15:39:06 2012-02-06 15:39:06 1 3 102 0 103 133 0 115.60 36 20.33 NEW spsht.sDhYpPcalRspssp+.........pGhCshCt.........phhahshKsSuYh.aHhshpHGIsu.sGhhhssPpthtphp..p..............................cthp...............thpuhCt..pCpc....hlslps........t+...p.hhsaa+Htpc.pH ..........p..phpsDhYoP+alRspusc+...................EGhCshCc.........sscWLsLKNSuah.YchuasHGIou.sGps...att.Ppph+chp..........................................tpsp...................hh-GlCs..sCpc....Wlslss........sp...pthhsaaRHshcsH.................. 0 37 66 92 +14468 PF14617 CMS1 U3-containing 90S pre-ribosomal complex subunit Wood V, Coggill P pcc Pfam-B_3046 (release 26.0) Family This is a family of fungal and plant CMS1-like proteins. The family has similarity to the DEAD-box helicases. 35.00 35.00 36.50 35.30 34.90 34.50 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.56 0.70 -5.31 5 248 2012-10-05 12:31:09 2012-02-06 17:00:37 1 7 211 0 174 236 2 221.00 27 77.70 NEW TToscRKRpup....pcpuGKKcK.pt.........+RpK+ccRKcu-Lhpht.DVcuG....LNpAhA+MsPcLLADYlAsplKRFtoDLSSVELED+YIsASAIpDTTSFTKPRTLDNLP-F.LEcFSchssKLspusKoNGSPHTLlLTuAALRAADLuRulRKaQ...TKsscVAKLFAKHIKLcEpIoaLKsSRlGIAVGTPsRItDLl.-sESLoVDpLK+IVlDASalDpKsRGILDh+ETpcslhc.lLGpKplp-Rac..-cKlcVlFY ........................................................................................................................................ttpt.......................tp......................tptp.t..........p......t...th..........p...t.................t..p...........psp....h..-hh.tph.h.p..............t......h.......p.h.osl...ELp-h.............ls...........p.ss..h....h.......sssph..p........p.t...h.cs..l..ssa..l.c..t...h....s.......t..................p........l...........pt...p.....p...p...toPh......hlllssuulRus-lh.Rulpsap......stssplhKLFAKHhKlcE.........plph.L...............c.....p.....p.......p....sp....lulGTPsR............lpcLl...c.....p........s..uL..p..l..s..pLchlVlDhsahDpKtpslhDh.-h..p..lhc.hLt........................h.............................................................. 0 52 92 139 +14469 PF14618 DUF4452 Domain of unknown function (DUF4452) Wood V, Coggill P pcc Pfam-B_6056 (release 26.0) Family This fungal family has no known function. However, it is rich in paired, as CXXC, cysteines and histidines, but these do not fall in the conformation that might suggest zinc-binding. 27.00 27.00 49.30 44.50 25.00 23.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.15 0.71 -4.43 14 83 2012-02-06 17:08:27 2012-02-06 17:08:27 1 2 81 0 67 81 2 156.30 44 82.24 NEW sHH....GGRsRRus+hSuupssp+QFR.GV+SM+-L.sEusulsuFRtRFEAuRSFDLEDDhEFCP.sLLTEsDLsSIpS.tuS-RSSLuSsSPpuSPtQp..ps.....ssuhSLsuuussh.sPsh.......pp..sthKlHQPuAsRsRNAIPIVNPsTGhshoSPPsSlsP.t.M........htRRW ....................................pt...ssRsRRss+huspps.p+QFR.....GV+SM+-L..sEusuloAFRsRFEAGRSFDL-DDhEFCP.sLLTEcDL.................pSIpS...tuS-RSSLuSsSP-oSPhQp.hps.....ssuhSLssu..o.ss.h.hsssh......pt....sth+lpQP.oAsRs.RN.A..IPIVNPsTG....hploSP.Ps....ShpP.................................. 0 9 32 54 +14470 PF14619 SnAC Snf2-ATP coupling, chromatin remodelling complex Wood V, Coggill P pcc Pfam-B_4045 (release 26.0) Domain This domain appears to play a crucial role in chromatin remodelling for yeast SWI/SNF. It binds histones. It is required for mobilising nucleosomes and lies within the catalytic subunit of the yeast SWI/SNF. It is found to be universally conserved [1]. 27.00 27.00 29.30 29.30 25.80 25.80 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.65 0.72 -3.81 109 470 2012-02-07 11:49:24 2012-02-07 11:49:24 1 26 242 0 281 441 0 79.00 40 5.41 NEW pE....Rppp..ct.................................t.hs.RLhp-sELP-hhhp-tsth..............hpt-pptt.......GRGsRc.RK......p.VpYs.DuLT....E...-Q......WL .............................................................................................................h.-Rc+c-t......t.........................scphP.RLMpE-ELPshhhcDcscl..............hppE--Ept......GRGuRp.RK......c.VcYs.DuLT...EcQWL............... 0 80 142 228 +14471 PF14620 YPEB YpeB sporulation Coggill P pcc Pfam-B_309 (release 26.0) Family YPEB is a protein that is necessary for the functioning of SleB during spore-cortex hydrolysis. 27.00 27.00 27.10 27.20 26.90 26.80 hmmbuild -o /dev/null HMM SEED 361 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.90 0.70 -5.44 62 303 2012-10-01 23:09:26 2012-02-07 14:26:29 1 9 272 0 84 258 0 334.50 35 74.40 NEW hll.ssuhWGapppp.......c+sshphthpNpYQRAFa-LstpV-plcspLuKslsssSppptt...hhs-lWRhuutApssluQLPlsths...hscTpcFLuplGDFuYplut+shssp.sLocc-apsLppLappusplpppLpclpppl..hpsplc.Wh-.............lctth......ss..phppss........................sshlsstFpsl-cpl.ppYP.sl.a-GPh...S-ph.pcpsP+tlsG..plopc-ApphAppalshp.p.tphpsspssptsphssYshplts.tpptt...hhh-lo+pGG+llahlss..Rslu.........psp.lshscAhppAppFLpptGa..psMpss.stph.-........NhushsF..s.hppssVhlYPDhlKV+VALDsGcllGa-ApsYLhsHpp.Rsls ...................................................................h.llusuhWGYppap.......E+sslhhts-NpYQRAFa-LshpV-.LcsplupolshsSppphs..ssLs-lWRhoupApssluQLPls.hs...hscTpcFLuplGDFoYphuh+shptc.sLscpEacsLppLappusplpppLpclpphl..hpssL+WhD...........................lchshus...ptp.sc.........................ssllsu.hcsl-+sl.psYs..shta.GPh.......Ssph.pcpsshthpG..+tISc--AtcIAcpFlshp.......t.....s.....p......plp.l.p....p.....us.psuph.shYolplpstsppst.....hhh-lotKGGaslahh..ss......Rpl.p.............cp+.lSls-Ats+uhpFLcc...pta..psMphhpoppaD..............Nlulaoa..V..s.s..pssVhlYP-tIphKlALDsGpIlGFsApcYLhsHpc.RslP............................................................. 0 35 63 71 +14472 PF14621 RFX5_DNA_bdg RFX5 DNA-binding domain Coggill P pcc Pfam-B_20855 (release 26.0) Domain RFX5 and RFXAP reveals molecular details associated with MHCII gene expression. 27.00 27.00 119.70 119.70 19.50 18.60 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.45 0.70 -4.73 7 47 2012-02-07 16:00:39 2012-02-07 16:00:39 1 2 30 0 16 42 0 211.40 75 35.26 NEW PGPGpAPPGGLTQPpGTEsREVGIG.GD.GPHDKGVKRTAEVPVSEASGQDPPAKAsKQ-hEDTuSDAKRKRGRPRKKSGGSGERNSTP-KSAAAh-SuQSSRLPhEsWuSutEus..uuuGsERPGssGEAEKGsVLsQGQtDGAVSKGGRGPuSRHAKEAEDKIPLVssKVSVIKGSRSQKEALpLVKuEs-susQGsKDLKGHsLQoSLs+E+KDPKAs ....PGPGRAPPGGLTQPRGTE.NREVGIG.GDPGPHDKGVKRTAEVPVSEASGQDPPAKAAKQDlEDT.uSDAKRKRGRPRKKSGGSGERNSTPpKSA..AAh-SAQSuRLPWETWGSGGEuN..SAGGuERPGPhGEAEKGsVLAQGQpDGsVSKG.GRGPSSpHsKEAEDKIPLVsSKVSVIKGSRSQKEAh..LsKGEs-TAsQGNKDLKtHVLQuSLopE+KDPKAT. 0 1 1 2 +14473 PF14622 Ribonucleas_3_3 Ribonuclease-III-like Wood V, Coggill P pcc Pfam-B_6419 (release 26.0) Family Members of this family are involved in rDNA transcription and rRNA processing. They probably also cleave a stem-loop structure at the 3' end of U2 snRNA to ensure formation of the correct U2 3' end; they are involved in polyadenylation-independent transcription termination. Some members may be mitochondrial ribosomal protein subunit L15, others may be 60S ribosomal protein L3. 35.00 35.00 35.10 35.00 34.90 34.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.63 0.71 -4.18 70 4780 2012-10-03 08:45:47 2012-02-07 16:33:03 1 23 4550 5 1167 4144 2037 128.80 40 47.56 NEW cppLLppALTHpSa....s....p.....t....p..p....p....p....NERLEFLGDAV...Lp..L....s.....s.oc..........a.l.a.cp............h.s.h..s..E.G.t.........h.s+hR.A.s...lVs.cp........oL.AphA.+clp..........LucalhL.....t+GEtt.psupsc..cs..ILuDshEAllGAIYLDtGhpsApcFl...tch...llssl ..............................................pLLppALTH.p.S..h...........s...............................t....t....t.................p.......p.........NER....LEFLGDuV...Ls.l.....l............l.u.c........................h.L...a.cc............................s.t...s...E...G.c.................................L.o+hR..A..s....l.Vp.pp..................oL...Ap..lA.+.chs...................................................Luch.lhL........................GpG...E.tp.....oG.G.p....c.+......sS............I.Lu..DshEAllGA.laL....D...p...G......h..p..s..scphlhphh....h..................................................................................... 0 402 757 987 +14474 PF14623 Vint Hint-domain Coggill P pcc Burglin T Domain This short domain is a conserved region of intein-containing proteins from lower eukaryotes 27.00 27.00 27.40 27.90 26.50 25.80 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.34 0.71 -4.75 14 51 2012-10-03 10:25:13 2012-02-08 11:39:37 1 9 47 0 42 56 43 164.60 36 22.29 NEW sCFsGso.VpLuuscs.............l.........plcplRtG.pVhsPp....Gs..ccVtsVLpTsVpp..tplCcl...........G.sLhITPWHPl+h..tuc..WtFPsslu.....pscs......lYSlLL-ss.....HAlhV..sGhhsVTLGHGlpt.......pDltuHtaFGs.ppVs+sLttLsthtpG..hlhstuh..pRsst.TGhVpG ..................sCFsGps.Vplussps......................lplcpLRtGhpVhTPp....Gs..R+VtsVLpTsVpp...tslCpl...........................G..sLhlTPWHPlph..ssc...WhFPsshupt.s.h...sss........lYSVLLcssss...........spAHAlhV.....tsh.hsVTLGHGlps.........spDlRAHtFFGsYptVhcsL.tpLst.h...sG..llhstGh...Rs..oGhh................................................................ 0 16 26 36 +14475 PF14624 Vwaint VWA / Hh protein intein-like Buerglin T, Coggill P pcc Buerglin T Domain VWA-Hint proteins carry this conserved domain of around 300 residues, now named the Vwaint domain. Such proteins do not seem to have a signal peptide for secretion. Generally, this domain lies between the N-terminal VWA domain and the more C-terminal 'Vint'-type Hint domain. The exact function of this domain is not known. 23.50 23.50 23.60 23.50 22.20 23.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.82 0.72 -3.87 28 182 2012-02-08 15:26:40 2012-02-08 15:26:40 1 17 64 0 112 174 28 78.50 32 12.02 NEW DptspuLhtDLpt...............QlphAhsspphYc+WG+pYLhSlhsAHshQhsssFKDsus..a.G..oshFppsp.............tphsshaDtLssPps ................D.hstuLhtELp............................chppth.....tsp.phYEppGRuYhLSuloSHuhQRs..su..Rs.Ds..........................os.h.hh.....................hphsshhc.l..sp.............................................. 0 19 51 92 +14476 PF14625 Lustrin_cystein Lustrin, cysteine-rich repeated domain Coggill P pcc Jackhmmer:O44341 Domain This repeated domain is found in proteins from lower eukaryotes in lustrin, perlucin, pearl nacre, and other similar protein-types. Each repeat lies between Kunitz-BPTI repeats, in certain species, which are also cysteine-rich. The cysteines may form the disulfide bonds observed for other members of this superfamily. 27.00 15.00 27.00 15.00 26.90 14.90 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.27 0.72 -3.88 56 1210 2012-02-08 16:11:41 2012-02-08 16:11:41 1 138 35 0 833 1225 0 45.60 27 22.43 NEW ssCshG.pPhl.s.ssp.htC....sssss...CPssaaC.ahGss.ps.olCC.s ...............Cst.G..pshh.......t.ssp.shtC.....sspss......C.P.s.s..ah.C...p.....hu.ss.s...pp...sl....CCs................... 0 301 402 824 +14477 PF14626 RNase_Zc3h12a_2 Zc3h12a-like Ribonuclease NYN domain Coggill P pcc Jackhmmer:O18125 Family This family is found to be a divergent form of the NYN-domain- containing RNAse family. 21.00 21.00 23.80 22.00 20.80 19.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.66 0.71 -4.25 5 17 2012-10-03 20:43:45 2012-02-14 17:06:22 1 2 7 0 15 17 0 116.20 32 19.74 NEW Y+ssLNLPVKALsDIIhhFLIRGHKTsVYLPKYY-Dalo-sGlSKVDDlVAFc................+Ll-LsaI..Kh..Ip.s.ca+WFNEVuchAD+sGAVFVSSsE.YRpRchclcYsKsSERIITPC.FLNA-DRLM ...............L.V+sLh-IllpFllcGHKTslaLPcaYpshhs...hpKVDDl.sFphLhsLchI+F...lpp...csccthhppVhtcA-+ssGlhVSssE.hhtp........................................................ 0 5 6 15 +14478 PF14627 DUF4453 Domain of unknown function (DUF4453) Coggill P pcc Jackhmmer:C9CWS7 Family This short domain is found only on a small subgroup of proteins from Gram-negative Proteobacteria that also carry a YARHG domain, Pfam:PF13308. They carry three conserved tryptophan and three conserved cysteine residues. 26.10 26.10 26.10 98.00 25.30 22.60 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.46 0.72 -3.92 11 12 2012-02-14 17:36:03 2012-02-14 17:36:03 1 1 11 0 4 12 2 107.00 41 57.09 NEW uC+VDTupppL..DlpDlthR.+pLhcLPlpD-hESAClGWhGsPlsLpAG+uh.soshIGpIpsGDslpauHhs..hGGWoYVola.ss.DWtlhouGWhc..hsths-pCspFAG .uC+VsTppppL..slpDhthR.+pLhsLPlpD-hESAClGWhGsPlsLhAG+uh.soshIGpIpsGDsltauHhs..hGuWoYVosa.ss.DWtlhouGWhc..hsths-pCppFAG 0 0 4 4 +14479 PF14628 DUF4454 Domain of unknown function (DUF4454) Coggill P pcc Jackhmmer:A8RR49 Family This C-terminal domain is found only on a small subgroup of proteins from Gram-positive Clostridiales that also carry a YARHG domain, Pfam:PF13308. 25.00 25.00 97.50 97.40 21.70 20.90 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.78 0.70 -11.49 0.70 -4.71 5 9 2012-02-14 17:48:16 2012-02-14 17:48:16 1 2 8 0 0 9 0 198.90 40 43.06 NEW caDcs+uctt.KpGY-uLPPAPYLsLLsc+sEhGVpLYSDluHAsD+GlYYsAcGTISVPloITsEQYculLs-GuElclVlNELTGEo+hL++ss.so-YG-s...hLlY-tGsEP.pssGE-TGtYh.loY-PsSGsYoLWssSsDTVFKTVYcGsVYVLKGA.........sEEaYsYFshP-+upsE..o..u.RVMpFs-.sshGssGY.sGNpLsaDuKGYlKAIYaLGD ...phDcsphctt.phth-sL..APYLphLscasEhtlpLhuDhspAhDhGhYYsspGoISVPholTtEQhps.httGuplclshsELTGEpthLphs..sschsth...hhha..Gp-s.ts.G.-sG....loh-.pSGpYpLWpsSsDTlhKTVYcGslYlLKGA.........spphYshhshsscupsE..s..u.hs.phst.ts.G.psh.hGNpLhasu+GYhpAlYaLGD 0 0 0 0 +14480 PF14629 ORC4_C Origin recognition complex (ORC) subunit 4 C-terminus Eberhardt R re3 Jackhmmer:O43929 Family This entry represents the C-terminus of origin recognition complex subunit 4 [1,2]. 25.30 25.30 26.10 27.40 24.50 25.20 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -11.26 0.71 -4.93 87 326 2012-02-17 08:55:20 2012-02-17 08:55:20 1 17 258 0 210 324 1 202.70 22 39.47 NEW hclscphLt.lssp...............................................tpphhptWNptlpp....Lhpsp.phpphLpphatts.+sh.....pphhs.h...hh..sl..splsssps............hlsssphh..........thsph.hsc..................phph.lpuLSsL-LsLLlAhs+Ls.thh-..............shNFsh.sas.EYpphhpptphp............................................psas+slsh.........puaE+LlphsLlhs............................t.ssststt-h.phhplplshppl.pslppt ........................................................h.plhpphLp.Lssc...............................................sptahppWNpplpp....Lhpsp...phpphLpphaphs.psh.....pshhthl................hh.......sl....spls..s.pps...................hlsssshh........pssphhp.D....................schph.lpuLSsLELsLlIAhp+Ls..cha-.............tshNFph.sYs.Eapchhpptptp............................................phat+slsh.........cAaE+LhphtLlhs............................ttsssphth-h.phh+l.ls.pplhpslt..h.................................................................. 0 68 114 172 +14481 PF14630 ORC5_C Origin recognition complex (ORC) subunit 5 C-terminus Eberhardt R re3 Jackhmmer:O43913 Family This entry represents the C-terminus of origin recognition complex subunit 5 [1]. 25.00 25.00 30.80 27.80 23.80 23.00 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.78 0.70 -4.94 68 327 2012-02-17 09:01:59 2012-02-17 09:01:59 1 12 261 0 243 309 2 262.10 25 53.50 NEW lhFPsYo+cEhlpILtps..........tss.h.....................p.s.plaspFlshlhcshhsss.pclsplppls..cphWspaspPltsG.ph.....s..................tp-hs+Lapphpshhpp...thpslh..spphsss.....p.t............................t.t...............ttht........s.ss..s.p.....hcLPhhuKaLLlAAYLASasPs+hDtphFu.Ktp..u.....+p++.........p.pp.t...........................................................p......pp....tc...h...........................ssp..h...L..uPpsFsLERLLAIapuIh.....................s.p.t....ss..................................sss-......lhspluTLssL+LLsps..u.u.....s.................ssLD.u..sKa+sN..........Vuh..........-hltplA+uluh-lscYLh ...............................................................................................lhFP.Yshtph.pIL.ts................s.............................................p.s.phaspalshlhsshhtss.ps.l.pLppls..thhaspa...sp....Plhpsph.....t......................................p-h...p.+Lhpp.hps.hhpp...thp....plh...p...t.h.sstp.t.................................................t........t..........................tphp.......shts.ttp.............hpLPhhuKalLlAAYLASaNss+hDtphFs.+tps......................+p++p..ttt......................................................t..tp...tp..h....................................spp..h...l...sPcsFsL-RLLAIa.uIh...................sp...ss........................................................sss...lhtpluoLhpLpLlshsus.....t................................................s.h-.s.s+a+ss..........lsh..........-hlttlu+..s..lshpltpal.......................................................................................... 0 72 122 203 +14482 PF14631 FancD2 Fanconi anaemia protein FancD2 nuclease Coggill P pcc Jackhmmer:Q9BXW9 Family The Fanconi anaemia protein FancD2 is a nuclease necessary for the repair of DNA interstrand-crosslinks. 27.00 27.00 27.30 27.10 26.80 26.60 hmmbuild -o /dev/null HMM SEED 1426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.09 0.69 -14.36 0.69 -7.47 4 248 2012-02-21 15:25:38 2012-02-21 15:25:38 1 8 108 1 170 249 6 667.60 19 82.83 NEW MlSKR+hScs-scEs.TEDuSKTchpshStcoKKS+luccsp......EN-SVFVcLLKtSGlhLKsGEsQNplAVDQlhFQKKLhQsLRKHPuYPplIpEFlSGLESYIEDp-pFRNCLLsCp.hpsEpu.ohusSYscSLIKLLLGI-ILQPAlIphLFEKlPEFhFEstspDGlNhPRLIVNQLKWLDRlVDGKDLosKlMQhlSVAPVslQHDIITSLPEILtDSQHu-VuKELusLLhQNTpLTVPILDsLSSLcLDsshLuKVRQhVMspLSSV+LEDLPVllKFlLHSVoAsDulEVIu-LRcpL-LppCVLPupLQASQsKLKSKuhA.SSSssQpoSuQsClhLlFDVIKSAIRapKTISEAWlKAIENhsSsu-HKVlDLlhLhIIaSTNo.po+KtsE+VLRsKIRpGCIQEQLLQssFpsHhhVlKDhhPSILuLAQoLLHS.DpsllsFGShhYK.AFphFDoYCQQEVVGALVTHlCSGsEuEVDsALDVLhELVlLpPStMhh.AsFVKGILDYh-NloPQQIRKLFalLSTLAFSQ.pppuuHIQDDMHlVIRKQLSSTV.KYKhIGIIGAVTMsG.MAtcRscssu..pcpusLSpEpssQVooLLpLV+SCoEpSPpAuALYYDEhANLIQcpK..LDPpsL-WlG+olhpDFQDsFVVDhssss-GsF.FPVKuLYsL-E.-TQsGIAINLLPLhhppp.uKsusphoutpSppRhVSPLCLuPaFRLLRLCstcQHsGsLEEIDuLLsCPLaLTDLEssEKL-ShStpERpFhCSLlFhTlNWFREVVNAFCQQssPEMKGKVLTRLpsIsELQslLpKhLAsTPsYVPP.AsFDsEohDhhP.SsoslsAKttpKtKsG.+KpKuDuSKsSSuDphptEcsS-s-.ssochucl.sKctstKEs.KohlpLpsY+sFFRELDlEVFSlLHCGLlTKhILDTEMHTEAoEVVQLGPAELLFLLEDhspKLEphLTss.A+RlPFLKsKGs+slGFSHLpQRSspEIspCVspLLsPhCNHLENhHNaFQsLhsENpGVVDtsslslQE.plMuSCYQpLLQlhHsLFAWSGFSp.EppsLL+SALpVLusRLKpsE.ps.PLEELlSpSFpYLQNF+pSlPSFQCALhLhpLLMsl.EK.usssspp+EKlASLAKQFLCpsWs..uG-KEKuspaN-pLHsLLsIYLEHTDslLKAIEEIuGVGVPELlNusKDAsSSTaPTLoRpTFlVFFRVMMAELEKoVKpI.sGpsuDSQplppEKLLhWNhAVRDFpILINLlKVFDS+PVLpVCLKYGRLFVEAFLK.sMPLLDaSF+KHREDV.SLLcThQLsTR.LHHhCGHSKI+QDTtLTpHVPLLKKSLE.hVhRVKAMLsLNpCpEAFWLGsLKNRDLQGEEIlSQs..S...pEusAE.-SE-shpSpAucsc ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t............................................................................................................................................................................................................................... 0 80 98 142 +14483 PF14632 SPT6_acidic Acidic N-terminal SPT6 Wood V, Coggill P pcc pfam-B_9510 (release 26.0) Family The N-terminus of SPT6 is highly acidic. The full SPT6 protein is a transcription regulator, but the exact function of this acidic region is not certain. 23.30 23.30 23.40 23.40 23.20 23.20 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.86 0.72 -3.82 58 252 2012-02-21 15:38:02 2012-02-21 15:38:02 1 29 221 0 193 259 0 90.40 37 6.20 NEW pDSSEE-.......----DEEEt+c......l+EGFIVD....-DE.......----p..pptp......c++++K++++cc+ctE--...tLDEDDL-LltENsGhttpp.........sKaKRLKRupc-- .................DuSEE-.......-p-DDE-ct.cp......................hcGFIsDp...-DE........E---t....pctt...........cpccc++++.++c.......+..........p.p.t--..........pLD-DDh-LItENhGsphcct........pKaKRLK+hpc-....................... 0 57 100 160 +14484 PF14633 SH2_2 SH2 domain Wood V, Coggill P pcc pfam-B_9510 (release 26.0) Domain \N 27.00 27.00 31.50 27.10 23.90 24.90 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.36 0.70 -5.28 4 312 2012-10-01 22:44:06 2012-02-21 16:35:46 1 42 265 11 230 336 4 211.70 35 14.81 NEW ..+soYaDh-AEtcDscpEcc.pptpQRsphl+RVIsHP.F+slNh+QAEchhcoM-pGDlVIRPSSKG-sHLsVTWKVu-GlYQHlDVpE.tKENsFoLGpsLhls......opcapDLDEIlscYlQshAphhc-hhsHchF+c...Gs+KchEchL.chp+tpPsh.sYahshs+-hPGhFlLsapspups+lph..VplTPsGFhhpuplaPoVsuLhphFKshY ......................................................h......aD.ptE..t.t.D.p..c...p...cc.t.p.c.p.p.t.R......h.h.p.RVItHP.F+.shshppAEchLts...t...s..t..G-slIRPSSKGt.sHLslTWKVs....Du......laQ..H..lD..VtE..s..K.....-.....N....t.....a....olG+pLhl.s.......pppapDL.DEllscaVpsMuphlc-lhs.H.c+.a.pc...........Gs.+p..c....h..-..c..............hLpp.ppts.Psp.sYhh..sh..sp..c..hPG..h..FhLs.....ahs......ssp......s.+h..ph...VplhP..cGac..h....p.tp.........asslptLhp..hFKph...................................................................... 0 79 131 193 +14485 PF14634 zf-RING_5 zinc-RING finger domain Coggill P pcc Jackhmmer:Q495C1 Domain \N 27.90 27.90 27.90 27.90 27.80 27.80 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.55 0.72 -4.23 118 801 2012-10-03 15:03:13 2012-02-21 16:43:03 1 78 214 0 571 6385 343 47.50 29 12.65 NEW pCs..hChpth...............ppt....hhlssC.uHl.hCp..pChpptt.................ttpCPh..Cpp ....................................Cs..lChpph........................t.ppt............shlhs.C..uHs.hCp..pClpphh...........................ttphpCPh..Cc.................. 0 166 222 455 +14486 PF14635 HHH_7 Helix-hairpin-helix motif\ \ Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -10.25 0.72 -3.91 4 299 2012-10-03 02:11:09 2012-02-21 17:03:10 1 47 257 2 231 2357 68 104.10 41 6.91 NEW -DlhsLphHPhQchlspEpLhpAL.stFlshVN.VGVDVN+AlspsYptullpYIsGLGPRKusalLKhLppsNsRL-sRoQLlThshMu.KVFhNCAGFlhIs .................................c-IlsLph.H.P.hQp.h.ls...p-cLhptLcpthV..shVNt........VGVDlN..cAls.......c.sap.sslLpaVsGLGPRKAstLl.......K.......hl.p..p.s...s..stlp.......sRppL....ls.........h......s......hhGs..+VFhNCAuFLpIp................................................... 0 87 136 196 +14487 PF14636 FNIP_N Folliculin-interacting protein N-terminus Eberhardt R re3 Jackhmmer:Q9P278 Family This is the N-terminus of folliculin-interacting proteins [1,2]. 25.00 25.00 25.70 29.30 24.80 23.80 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.96 0.71 -3.68 40 205 2012-02-22 08:14:46 2012-02-22 08:14:46 1 9 140 0 136 194 0 126.80 33 11.18 NEW plRlllhQ-sptcspp..lLFDSpsspphspsssss.st..s.......................tsphspttt...hpssssp..............shttp.p....hpspsppsthpa.t+....ssDlshL.u-hlFGSss.MuY+G.oohKlHhls...us.sphhhopsh .........................................plRlIlhQDsppcscp..lLFDSpshp+.spphs.spphs.sss....................................................sphsp........ssssp...............................puhpsosss......pscpptsc.aphsRs...usDsshL.uEMhFGSlA.MSYKG.SohKlHhlp...oP.splhhopl....................................................................... 0 27 49 88 +14488 PF14637 FNIP_M Folliculin-interacting protein middle domain Eberhardt R re3 Jackhmmer:Q9P278 Family This is the middle domain of folliculin-interacting proteins [1,2]. 25.00 25.00 27.10 26.50 23.70 21.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.54 0.70 -4.78 17 166 2012-02-22 08:15:25 2012-02-22 08:15:25 1 9 87 0 101 150 0 226.90 48 22.26 NEW ptsstssppsssht++t+lulullhpl...........s-sptpph......cpFhhpHhsllEochs+L+stlcp..............ushptptalphlhpAhpch.pphlssLaou........PRlppPVWLshho................sppppplspcFhppLspLlpphsp+popaFlSsLlTAVLTaHLuWVsTVtsssts..........p.hh.hp.ppplsplspsaPY.NsLWAQLuDLYGulG..............sPs+Lu+TlVsGspp...llp+lL.lLSYFIRCSElpc ...................................................................................sh.st-osusssuhlR+KKIAIulIFSL....................scp--tpp.........pFp-FFFSHFP....LFESHMN+LKSAIEpAMh.......................p+hutsu.php..hhh.sRlh-ALsEF.RsTIpNLYoh.........PRIsEPVWLTMMSu.........................................o.EKspLCpRFhKE.FshLhEphsKNQ...FlsALlTAVLT.HLAWVPTVMPsspP........PI+hF.EK+oSQSVshLAKTHPY.NPLW.AQL.G..DLY.G..AIG..............SPVRLuRTVV.VG+cp-..hVQRlLYhLTYFIRCSELQc........................ 0 30 38 64 +14489 PF14638 FNIP_C Folliculin-interacting protein C-terminus Eberhardt R re3 Jackhmmer:Q9P278 Family This is the C-terminus of folliculin-interacting proteins [1,2]. This region is responsible for binding to folliculin [1]. 25.00 25.00 29.90 28.70 20.10 21.00 hmmbuild -o /dev/null HMM SEED 192 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.57 0.71 -11.25 0.71 -4.95 16 160 2012-02-22 08:16:55 2012-02-22 08:16:55 1 7 88 0 99 144 0 183.60 49 17.82 NEW clPhPpsp.hp.s.s...............shuRSLhuGhs-sYsPDFVLpG..hsssc.hcpp.....................LtsD........LthusppSsl--s..luEuVsIlADsDsWsVplhSSpppshs.......uh.VusSplVs...........uMLpShhsLachshssp...FClhaLED+LpElahKScsLuEhL..............pssst.lshcclspsLGl-tuDlPLLluVAusHoP.Vs ........................................................lPhPt..Sp.lpsptst.s.t............NFG.RSLLuGYCsoYsPDhVLpG.husD-+.h+Qs...........................LhuD.....LsHuVpHPVLDEP..IAEAVCIIADhDKWoVQVASSQR+ss-..K.............LGp-V...L...VSSlVS...........sLLcShLQLYK.h.sLsss...FClMHLEDRLQElYhKSKMLoEYL............................................+GphR.VHVKELusVLGIESsDLPLLsAlASTHSPaVA........................ 0 26 34 62 +14490 PF14639 YqgF Holliday-junction resolvase-like of SPT6 Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain The YqgF domain of SPT6 proteins is homologous to the E.coli RuvC [1] but its putative catalytic site lacks the carboxylate side chains critical for coordinating magnesium ions that mediate phosphodiester bond-cleavage [2] 35.00 35.00 35.00 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.64 0.71 -4.65 4 495 2012-02-22 14:01:54 2012-02-22 14:01:54 1 42 441 2 270 514 14 143.20 28 12.53 NEW pGKh.+VLuluhupGR..scslhCshVNscGEssD.L+L........shc.c-+ppptp.hEsLcpFl.spKPcVluVuG.NhcAphlhcclpcslpch-..spph.slsl.hV-sElAlLY.NSc+utsEF.shPPll+.sVuLARhlQsPLlEaApls ..............................................h.....................sh.h.h.shlctsGcll-phpl.....................................h.s...t.psp.p.p...c............p...pshpp...L....t....ph....l.p...p....+.csclIul..s...G.t.u.t...cop.c..l....hcclt....c.......h...l......p.........c...........h..............p....................t..............t.....................p...................lt.......l..hlVs-psAplYpsSchAtpEFP..c.h.s.s.hlRtAVSlARpLQDPLsEhs...t......................................... 0 95 154 231 +14491 PF14640 TMEM223 Transmembrane protein 223 Coggill P pcc Jackhmmer:A0PJW6 Family \N 22.00 22.00 22.30 22.20 21.90 21.80 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.93 0.71 -4.32 14 99 2012-02-22 14:08:19 2012-02-22 14:08:19 1 4 75 0 62 100 0 152.60 31 73.24 NEW +DVlLFca-ps+FF+hLslFuhsQhhFWsYLuaFu..ho.....cs.ss.p.c.ppchshhptl...............s.LupspaR.Glshhs....lhlGshlLhsuhhFoLRSVptllLp+GGppVolhTYuPFG..ppRthsVPLcplSshtsRpps..pu.lPlKVKG+tFaalLD.pcGcFpNspLFDhTVGlpRp .......................................................................pDlhLaca..cps.+FathlshFshsQhhFWs.huhhu.hs.........ps.s..........tc...ht.h.................s.htsshaR.Glshhs....hhhGhhlL.hssh..h..FshRSVp.lhLppGGp.....p........VslsT..auPFG...h.tpph.s..........V.......PLppVSsh.spRtps...s.lPlKlK...G+p....haalLD..+tGcF.NspLFD.TsGhhR............................ 0 23 30 44 +14492 PF14641 HTH_44 HtH; Helix-turn-helix DNA-binding domain of SPT6 Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain This helix-turn-helix represents the first of two DNA-binding domains on the SPT6 proteins. 35.00 35.00 35.20 35.30 34.40 34.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.59 0.71 -4.12 4 227 2012-02-22 16:49:36 2012-02-22 16:49:36 1 29 207 2 176 238 0 131.10 33 8.87 NEW LpshsshsuEDDELEEpstWIhcphhss.shshptsh........hosFp.......ptI+pAlpFhpppphEVPFIhhYR+-Yl..........pP.Ls.NDLWclaphDpKappL+s+KpslpRLapchph..........DcPLs .............................................h.hp.-..-..ELc-EApWIhphhhsc.p.s...hs.h..ppsh....................ppsFp.....................ptItcsLcFhp..spphEVPFIhhaRK-Yl...........................................................................c..............Ls.hsDLW+laphD.KappLhp++psLp+ha-p.hpt...............h........................ 0 58 92 143 +14493 PF14642 FAM47 FAM47 family Coggill P pcc Jackhmmer:Q5HY64 Family The function of this Chordate family of proteins is not known. 27.00 27.00 27.70 27.40 26.80 26.80 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.90 0.70 -5.09 2 239 2012-02-22 17:41:02 2012-02-22 17:41:02 1 10 33 0 65 246 0 127.20 33 63.46 NEW MGDpRPQDRPpS.GMDSpPWYCDKPPSKYFAKRKHRRLRFPPVDTQNWVFVTEGMDDFRYuCQSPEDTLVCRRDEFLLPKISLRGPQAD.KSRKKKLLKKAALFScLSPsQPARKAFVEEVEAQLMTKHPLAMYPNLGcDMPPDLLLQVLK.LDPERKLEDAhu.CEupEKTT-.PTEsGKYPCGE.sPRPPET.VSpL.Pp.PKTPVSShRPEPPcTtVSpLRPpPPKTpVSSLH.EPPETtsSHLRs-PPcTtVSp ........................................................................................................................................................................................................................................................................................................................t....hsp..........t...sps...hS.p..Lp.E.P..Pc...T.t.s.SpLp..EPsco......s.Splp.-P..c...t.h..s........................................................................................... 0 28 30 38 +14494 PF14643 DUF4455 Domain of unknown function (DUF4455) Eberhardt R re3 Jackhmmer:Q9P1Z9 Family This domain family is found in bacteria and eukaryotes, and is approximately 480 amino acids in length. There are two completely conserved residues (W and P) that may be functionally important. 28.40 28.40 29.00 28.80 28.30 28.30 hmmbuild -o /dev/null HMM SEED 474 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.30 0.70 -12.31 0.70 -5.86 16 121 2012-02-23 13:51:07 2012-02-23 13:51:07 1 10 67 0 82 82 0 353.80 27 33.39 NEW ht-s++c+appslsuhpc-hsplup-hEstltcsutthhpplucsDpplsplhspl-s-ssLhchohppLhpla-pVsp.ch.h+pphIcpL-psLpphEppRt-pLcssLc+hhptLpchualhps-VpRhlscEAMtlNpslLtNRRAhAcLhhpLhpsclcpEtspctcWppthccW+sL+ppphlppFp-hhtStchtsPsphpphhEshtcpQtslpppRhchlpslsslhP................Pshopsp....lpcWhsslpslscph....sphptpthtcl+tph-csppcshsplpph+ppLhphts..hsp-ctpsLVstchh.hssphpppscchLEthcphhEphutphcppspsLhpahpclspha-tHppthtpp-t-lppclcptRppasppppppEspLDthlcplpptusEpsLctplcpshshLcplcppYcsF+pphsplsppYPstlhpphpsYppsLsphhtlcchhcpshtsph .................................................................pppta.thh.th.ppht.ls.php..hhp.tt.h.t.l.p.stthp.hh.thttp..h.thphpt...l.plhppltt.....ppp.IcphcttLtphE.pRhppl...pthLpchsthlpchuahh.s-l.+llppcuh.hNtslLuNc+uhupLhhpLhpsplpp-h.ptphc.WpthhpsW+tlp+pthlppFpphhts.tphppP.th...............p........hpthhppQ..lt.p.pRhph.Lpplp.shhP......................................Pshspsp....lpcWhpplpslppph....sthphphh.pl+h.h-phhppsht.hpph+......ppLhphth....h.spcchpphlp..h..h........hhtthttphct.lchhtp.h-t..ttp.p....tplhpahtthsthh-.pt...h...p.ph.pphtphptt.p...t..ctthp..htphp.tsp.tttLt..ht.s.t.Lt.hp..h.............t.................................h................................................................................................. 0 37 50 60 +14495 PF14644 DUF4456 Domain of unknown function (DUF4456) Eberhardt R re3 Jackhmmer:Q9P1Z9 Family This domain family is found in bacteria and eukaryotes, and is approximately 210 amino acids in length. There is a single completely conserved residue E that may be functionally important. 25.00 25.00 25.10 26.80 23.20 23.00 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.40 0.70 -10.93 0.70 -5.10 19 84 2012-02-23 13:52:56 2012-02-23 13:52:56 1 8 59 0 55 72 0 181.90 26 14.14 NEW hhshs-.aYcpct.p+tlsRPppl.psh-phs-slppplpph.pppupcappsslpchRsQlpphpphhpplsthlhpshhppahpphppuhpslppc.Fpph.pphppp+pp+tppLRPsLucPsphpELcsLpppEpcRppch.phlpphpptlh-sttp.uptFlppLsphspphlhhhD.phlhh-Dltssshssspp+pthhhpp+h .....................hths-taacpc..pc.hptPpth.psh-pss-tltptl.ph.pppsppYtspslhchRtQhpphpchl............spl...........s.llhpshhppphpphppshpplptp.Fpp..pphppt+ppptppL+ssLu+PtphpphcsLpppEppRppch.phlpt.pptl.cphpp.uphFlspLsshscphhh.LD.plloh-Dltssphts.ppphphhhppc....................................... 0 30 38 46 +14496 PF14645 Chibby Chibby family Eberhardt R re3 Jackhmmer:A6NI87 Family This family includes the eukaryotic chibby proteins. These proteins inhibit the wingless/Wnt pathway by binding to beta-catenin and inhibiting beta-catenin-mediated transcriptional activation. Chibby is Japanese for small, and is named after the RNAi phenotype seen in Drosophila [1]. 23.00 23.00 23.70 23.30 22.00 21.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.34 0.71 -4.20 14 172 2012-02-24 08:43:10 2012-02-24 08:43:10 1 5 85 0 109 173 0 115.10 33 58.82 NEW MPLFup+...FSPK+sPhR+suShSsh.pslDppTcphEhuhsaGssphcLu.spplhF..csGpWh.........s.us.sssssspcst+L+KcsppLpEENNhLKLKh-lLLDMLoETTA-sHLhEKEl- ..................................................shht.p....Fus+phP.R+.tS.ssh..psh-+soc.p.s-LtL-.Y.ssPphpLu.spphhF..psGpWl..............................tp.u...ptssstc...-spp.L+ccsptLcEENNhL+lch-lLlDMLsppss.c.p................................................................................... 0 25 35 60 +14497 PF14646 MYCBPAP MYCBP-associated protein family Eberhardt R re3 Jackhmmer:Q8TBZ2 Family This family of eukaryotic proteins includes the mammalian MYCBP-associated proteins. These proteins may be synaptic processes [1] and may have a role in spermatogenesis [2]. 24.50 24.50 24.70 24.70 24.30 23.90 hmmbuild -o /dev/null HMM SEED 426 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -12.34 0.70 -5.76 34 181 2012-02-24 13:32:21 2012-02-24 13:32:21 1 5 93 0 112 180 2 330.40 21 48.95 NEW ss.phDpRL+pWpchLppR++hQp+lp+chGKpss-lLhNpssTl.....Rsppph.....+cllD...hsp.Psp..ppcs.....hp.hu....thhpp..p.phspp......lstLt.ThP+sEctt..slE.hlGLPpss.ppElhu........p......tptphpspWp+..SphLppRlcctppslccllp..ah..PDl-..sLpV....lGps..h.......hpss..................pspphph.pps.........................ppt...spp-ph..pp...sth....pp..ps.c.....s..p...sh....ps......ulph....supsh......hhh.s....tssp.p.....................tphshthplhFcC.......cPhp.+s.h+tlh.lcNlGspslphpWpphshhtpht...........s..hhhspspcFhF-ppshhlhPGEs+phplhFpPppsulhpppWc..Lp.h.pPslhu................ppt.lhl..pL....pGhChssspatp+hpchpptlhsKpppphsppL..h..pch.....LssllpPspsh......CPY.-RhlsEcElFsstN....PG.a+....s.R.a-DLEsL+pLapplK ............................................................................................................................................................................................................c.hLp.Wpp.ht.ppt.pttlt.phht+.sp..phlhp.spph.....Rphtp.hphh-...h....h...........t..s.....hhp...................h.th...sh.sphp.......l.p.hhths..h..p..Eh.h.........................a.p..S.hL..+hpp..t.htpl..........s..hs..tLpV....hGps.................................t.tt....t...........................t......t..tt............t...................t................................t....sh.h....ts..h..................tt.t......................t.s..h.plhFps.........hp.ph.pplh.l.N.GshslhapWpph......t.....................................................p...hh.s.p.ppFhF.sp.p.thhlhPGchpphthhFps.psG.lhpphWphts..pP.lhs................tt...h.lpL....huhs.h......h..tthp.hpp........h.tptt..hhppl..h..pph.....l.sl..s...s.......s.Ph.pt.hpcp-hFt..N.....t.h......h.p.t.h..l.tha.............................................................. 0 45 53 80 +14498 PF14647 FAM91_N FAM91 N-terminus Eberhardt R re3 Jackhmmer:Q658Y4 Family \N 25.00 25.00 27.10 26.00 22.40 23.40 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.75 0.70 -5.37 14 179 2012-02-24 15:46:41 2012-02-24 15:46:41 1 12 117 0 114 165 0 259.20 44 36.29 NEW ptIRpNhsWppLPssl+ptLGsSp+-Y-KtVlcYS.l+sQLRa+sNlV++lt+cEcpYY-cLlcYSpppLhLYPYHLuDlll+tLRlTPFsYYhsllsclLpsE+SYDuLPNFTAADCLRLLGIGRNQYI-LhspsRSs.......+phFt.++ss+chLPppPl.hth.-PWWhVpsGhlhEsDl+h....Lo.tE+shlDpLIDpGsp.....................h.AGpLchslVpuLYpKGLlYL-VPlss-DhIsVPPL-G.FVMNRV.GDYFEsLLYKIFVolDEpToluELAplLplDlppVKsAlSlaCRLGFA+KKs.sslsp.......spl..HsSWts ...........................h.tlcpphsWppLPtplpp.Ltsopc-Yc+pll.Yu.l+ppL.cacssh....sp+lhpcEpcYYEcLlcYspppL........hLYPYHLu.....Dh.....hs+shRlTPFpYYhslht.....-lhpsE+SYDoLPNFTAADsL.RllGIGRNpYI-lhNpsRSp.......+.h.h....h....h.........+p.hs+-hLPhpP.lchsl.EsWWhlphs.lsp--l+h....ho.tEtshl........Dcllcpss..p....................................................................................................ss.h-hpllpuLYp+GhlYhDVPlps-s.......plt.VPsL.EG.FVMNRs......p..s......D.hEsLLY+lFVohDEp.ssVuE..............LAphL.......plDLp..lpsAlShhCRLGaApKc............................................................. 0 41 62 89 +14499 PF14648 FAM91_C FAM91 C-terminus Eberhardt R re3 Jackhmmer:Q658Y4 Family \N 25.00 25.00 33.10 26.90 23.90 23.50 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.63 0.70 -5.65 9 246 2012-02-24 15:50:15 2012-02-24 15:50:15 1 12 117 0 173 235 0 274.80 26 51.76 NEW s.sKRIAFLFDSTLTAFLMMGNLS..PsLKsHAVTMFEVGKLSDESLDSFLtEL-+V-p.suEGEAQRYF-HAlTLpsTlLFLRpscclh.p.s.Pc..shshulDLLRsESL.uLDssTpSRVLsKNYpLLVSMAPLStElRslSs.ssPsHhGPsIPEVsSsWFKLaLYphhGpGPPSLLLsKGoRL+plPslFpca-+LLlTsW.......................GHDsulVssSNsLlhLNDALoHSAVhlQuaG.hhsp....u-..............olplPFPFpcs-hth...Fohsphph.H.ulppLcpplsLpa.pCGYlThLs......tspph.tst.......st.ttsthpsshs....ht.t.poFshstc...pth........ss.uppp.pptsssps-Wh.L-lsFGlPLFcu-LNpclCc+IsuppLhpc-slpplhpusRcLuhplhpFlppaQshspssc.pst...su....lspss..........ttuslPhPspsLlFcsGcLs.W- ....................................................................t............................................................................................................................................................................hlp.tth......t..ph..htY..hh..........................h.......................h.h..........t.s...............shhh.hG......h..hP..h.............t..ph.....h.......................t.p...hh..t..h..hNp.h..tslhlp.h.......................................................................................................h.s........................................................................................................................................................................................................................................................................................h...th.hGhs...l.t..hs..hh..h...th...................................................................................................................................................................................... 0 57 98 136 +14500 PF14649 Spatacsin_C Spatacsin C-terminus Eberhardt R re3 Jackhmmer:Q96JI7 Family This family includes the C-terminus of spatacsin. 25.00 25.00 25.70 27.80 24.80 21.80 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -11.89 0.70 -5.54 28 154 2012-02-27 09:55:58 2012-02-27 09:55:58 1 4 102 0 95 154 0 263.60 33 14.19 NEW ohlGppLl..................ch.spu......................................................s.........pct.p...........hsshVELLIp..AHpCFstsCsh-GIspVLpts+phs.shLspspcasLlVRLloGlucYpEhpYlFDlLlcscQFEhLLp+.p..h..........Dp.......t........sG.......L+hAlLsaL++hpPp-p.-taphlsL+FsMa+ElAphhcpcAc.ptl.chlts..............p.s...............p............ppsphpppL.p.uhcpahcAA-sYhp-sshphAppCsppApLluLQlclh...............................ph...pllsLs................csphcchlspphsF.pALIlAcAYsh..pssWupsLapQhlhtGsh.pYLp-Fhphhslssslhp-ls+pYptc .........................................................lG.tLhch.h.s..........................................................s.......ph.............hps.sELLIh..AHpCFshsCph...-GIh..plLptsphhs.sths...spcauLllRLLoGlucap-hpalF-lLhcpc..FEhLhp+.t............-...........ts....l+hAlLsal++h.pPtDp.-tashl...........sLpFsMh+Elu.p.hEstAp.tl.chl.s........................p.s............................ps.phpp.LhpuhphhhcAAEsasp-sshppAp+stphspLlsLQl+hh.........................................................sh.hllsLs................cpphhchlhthspF.pA.IVAcAYs.h....s.s-Wu...plLapphlh.ssh.pYLpEahp.h.Lpsshhp-ls+hap............................................................................................. 0 35 50 71 +14501 PF14650 FAM75 FAM75 family Eberhardt R re3 Jackhmmer:Q63HN1 Family \N 24.00 24.00 24.80 24.40 22.40 23.60 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.35 0.70 -12.21 0.70 -5.07 23 316 2012-02-27 11:20:10 2012-02-27 11:20:10 1 7 25 0 203 266 0 228.50 24 29.28 NEW Qphsas+shpD.pLppKssQLFWGLPSLHSESLsusshlssssSshp...hlFNchSss.Ph..psphos....................hLopspPLshsphQsQs..hh.shPQ.Qs...PlsphpsQApLpsslPlls....PSs.sQl+sCGVsa.ssQscspsLhPoEIppLEaslLpKQ.EpthsLPolVc+SQcsFs.ssPshspcs.....ts.psphssSIhPssFslosEL+++LEpHLpK+lIQcp..WGLPpRIpcSlplhpPQsEhstsspucuppG.....sSp.ShhpucuspsspK.st...stp........hphcpchs+.shspsltph.KDhsts...psossscl.tsssEpphps...........t..pscstts..hu.s+ccLEssLcsHLs+KhspIsEG.lPsoV+pSh .............................................................tp...phFhGlPshpSESl.s.h.........h......t..................................................................................................................................................................h..t.p....t................t...p...th....+t.p.....hsshhpp.....t................p...................................hshhst...p.h...h...c.h.pp..h-.+lpc.hh....p.p...s.st+lp.S.ph.h.........t....t.................................................................................................................................................................................................................................................................................................. 0 20 20 30 +14502 PF14651 Lipocalin_7 Lipocalin / cytosolic fatty-acid binding protein family Coggill P pcc Jackhmmer:P51161 Domain Lipocalins are transporters for small hydrophobic molecules, such as lipids, steroid hormones, bilins, and retinoids. The family also encompasses the enzyme prostaglandin D synthase (EC:5.3.99.2). 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.78 0.71 -4.42 3 209 2012-10-03 08:47:39 2012-02-27 14:48:16 1 2 77 40 108 690 0 119.60 38 94.68 NEW MAFTGKYEhESEKNYDEFMKRLGLPSDVIEKARNFKIITEVQQDGQNFTWSQpYSGGHoMTNKFTIGKEC-IQTMGGKKFKATVQMEGGKVVVNFPNYHQTSEIVGDKLVEVSTIGGVTYERVSKRLA ................MsFsG.p.aph.p..op.ENa-tFhKs..l..G...lP..p...-.h..I..p..+...u..+..shK..slo.Elp.Q.sGpcFsh.ot.....h.s..s.u.+..s..h.s.N.p.FT..lGc...Es..E..hp.T..hs..Gc....KhKss.....V.ph.EG......s....K.......L.....V.....s........s...h.............s......h.......p.......p........s......p......E....l.....s.....G....s.p..lsp...s...Th....u...sh....sh.RhSK+.......................................................... 0 29 39 68 +14503 PF14652 DUF4457 Domain of unknown function (DUF4457) Eberhardt R re3 Jackhmmer:O60303 Family This family of proteins is found in eukaryotes. It is found repeated several times in the vertebrate KIAA0556 proteins. 24.00 24.00 25.60 24.30 23.60 23.60 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.96 0.70 -5.47 14 419 2012-02-28 08:56:53 2012-02-28 08:56:53 1 6 92 0 292 386 5 183.50 19 50.48 NEW plplhpoWGDtaYlGLTGlEl.....l.spptp.lslshpplpApP+DlN-lssapsD.RTLDKLlDGhNlTT-DpH..MWLlP....as..tpcphlsIchsptpsluulRlWNYNK..Sh-.....DoaRGsKhlpl.lDsph.lss...........ssallRKAPGs.spFDFuQsI.h.phppp...........pshp.hsphpt.h...............h.........pp-YpsshhPsGalh+h.LloTWGD.aYlGLNGlElaDtpGppI...plp.psls..AhP.SVslL...shpsDsRTs-+LlsGlNs.TasspHMWLu.........Phhsu......................psNplYlhF-pPlslShIKlWNYSKTPpRGV+EhtlalDDlLlYpGhLcpssp ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................s................h..h.h.hpsaGs.....hultthphhs.......................................................................................................................................................................................................................................t....................................................................... 0 118 148 223 +14504 PF14653 IGFL Insulin growth factor-like family Eberhardt R re3 Jackhmmer:Q6UXB1 Family This family includes the insulin growth factor-like proteins. These proteins are potential ligands for the IGFLR1 cell membrane receptor [1]. 25.00 25.00 26.50 31.60 24.00 24.50 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.01 0.72 -10.83 0.72 -3.97 9 151 2012-02-28 09:38:21 2012-02-28 09:38:21 1 5 23 0 106 129 0 68.80 44 82.70 NEW hs.u.ssschhhCp.hPRCGc+hYNPhcpCCscssllsLs+T+h............CG.sCTaa...PChchCC.tphs.spp+aVVKLKshGhpu.p..sPloppC .................u.hhPchh...hC..hPRC.Gc+FYNPhcapCscsphl..s+T+p............h..................................................h....................... 0 74 74 74 +14505 PF14654 Epiglycanin_C Mucin_C; Mucin, catalytic, TM and cytoplasmic tail region Coggill P pcc Pfam-B_ 91014 (release 26.0) Domain This family represents the non-tandem repeat domain including cleavage site, the transmembrane helix domain, and the cytoplasmic tail of epiglycanin and related mucins [1]. 22.40 22.40 22.50 36.30 21.30 21.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.52 0.72 -4.03 5 31 2012-02-28 11:02:14 2012-02-28 11:02:14 1 10 13 0 14 32 0 100.60 66 14.78 NEW SAoTPVsEsKPSGSLKPWEIFLITLVSVVVAVGLFAGLFFCV.RN.SLSLRNsFsTAVYaPHG.N..LGs..........................GPGGNHGssHusuWSPNWFWRRPsSSlAMEMpGthsRP .........pAoTsVSEsKPuGSLhPWEIFLITLVSVVsAVGLFAGLFFCV.RN.SLSLRNsFsTAVY+PHGhN..LGs..........................GPGGNHGsPHRPpWSPNWFWRRPVSSIAMEMoG+tsG.P. 0 6 6 6 +14506 PF14655 RAB3GAP2_N Rab3 GTPase-activating protein regulatory subunit N-terminus Eberhardt R re3 Jackhmmer:Q9H2M9 Family This family includes the N-terminus of the Rab3 GTPase-activating protein non-catalytic subunit. Rab3 GTPase-activating protein is a GTPase activating protein with specificity for Rab3 subfamily [1]. 25.00 25.00 25.20 25.10 24.60 24.00 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -12.42 0.70 -5.47 21 180 2012-02-28 14:10:59 2012-02-28 14:10:59 1 8 138 0 125 175 0 330.20 31 32.33 NEW WL.ssssl..ulussu-hlslAtsp+hslLsstW...pps.stsshthshpusl-sts....ppITulphlPhsstt..........-hsslulGhoSGalhFYoppGsLLhpQhhp.pcsllpl+l+ssppht..t.........cELsllYP.sslshIsGhsLhshLpsshppls+sthstpt........sts.ssLsapKWsh..scts...slsDsulsGshhsshF-.h.............h.ppos.phsphlssGscPhluaahspE.Gtspslluslhp.uVuuploohl.........huhh...uts.pppppssppp................ps.sshssRhsLhDttRcGpslsluPs.spLAAlTDsLGRVlLlDstpslllRlWKGYRDApCuFlpshpcpsp............sppscpchu..............................LFLlIaAPRRGlLEl.Wshps.GsRVsshss.uKss+Ll .........................................................................................l..t..h...hs.t.phhshu.ttp.hhh...h...............h.h.h.t.hp.............p.louh.hlsh.t.t.................-.hhslslGhssGhlhhaot.tG..hlhtphh.............p..p.lhtl+hps..h....................ppl.llas.sslshlpG.slht.Lptthppltpst...htt..............s.lsapKath..pp.s....h.Dts....h....G...h...s.a-.h...........s.h.pto....s.thsphlssGtsPhhuhahs.E..s..tsp...sh.lucls.h.uluuph..sshl...............................tuhh..u.ht.tppppps.sppt.t.....................pss.sshs.s+h..sL..DscRcups..lsluP......s.s......p.LAAl.T.DshGRVhLlDstpslslRh...WK...GYRDAphuWlphhpphtc..............................t.t..pchu................................................................................................................................................................................................aLlIYAPRRGllEl.Wshpp.GsRlsuhsl..u+tspLl.............................................. 0 51 69 103 +14507 PF14656 RAB3GAP2_C Rab3 GTPase-activating protein regulatory subunit C-terminus Eberhardt R re3 Jackhmmer:Q9H2M9 Family This family includes the N-terminus of the Rab3 GTPase-activating protein non-catalytic subunit. Rab3 GTPase-activating protein is a GTPase activating protein with specificity for Rab3 subfamily [1]. 25.00 25.00 27.30 26.60 23.70 23.60 hmmbuild -o /dev/null HMM SEED 595 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.94 0.70 -13.12 0.70 -6.38 9 135 2012-02-28 14:11:58 2012-02-28 14:11:58 1 6 90 0 89 126 0 456.10 34 43.64 NEW cDLLoLllsaWLpKshchhpps...-shscltphlphLschtush-p..sashpslSPWWQplRchllpScph.uuLLsAlVs+sVAsphh....cstp-tp..hspsshtss....pEpWEplShDtptWsLLhtpLEDlhlLtslLsps....hssppshssph..-.hsc...sSLKslLpuG+.GhVsEhlAKWlhpstLcP.cll.tlssscppp-s.........................s.....p.....htp...t.pt.....cls.h-.............sl.-hLplLpp+FPhSL-ssVLluphuWEYhVpWsKs.pp...hcaLptulcpLctltss..tlpHGICtMlWNshLhh.hpAs.shLhpKVG+hPKD+hCppDlsMSs..stlspFLchsl-hLpph.suslscDchph........caE-uh..sEG......shPlp.LALpQ+psphsLlphHppLsoVLahlspFpl+ssKPLo.LFDuh.GppAFFtDlsp..hhs.sssD.slhp.RppFLp+VVoushch......hcps.pplh....ccltahsphscLAppWplsps.l+++.VsELYuaGhDt.Ac-lLhsls-cEhLup.LL.lAGpRLs..Lhsp..poppsh.h...lAslsspLhsaLcshp................sphcs.........splslssls+Llp+lhcpLsc ..................................................................................................h..hhplhl.hWhph....t.h..........htphtthl.hlspht..s...p......ht.pt.....l....S.WWpph.Rt.hhhpSpp.htu...Ll...sAhls...+tsuhph...............................pph.tc.tp............................p.......ppWEtlS...h-.....p.WtllltpLEDhhlLpslLtp........................hs.......p..hsp...hSl+pllpuG+.G....hls-.luKWlhppshsP.phL..p.h..ppp.-..p.........................................................p..................t.t..t......................phlt.hhhppFPhS..Lp.slLhuphsWEahspWsKc.pp.................hphh.tulphL.pt..l.ss...tlppGl...........sh........hhWssalhthhpus.shLhpKV..G+.PKD.....+hCppDlGhu-..tthspFLt.ChphLphh...hpusht..-chph............hphEchh.....s-..G.......................s.slspLA...l...pQp.....h.hsLlphH..LsslLahhhpF..pl+..sKPLs.LFDuh..uppAhFp-..l..ssh...hh..s..sphD.sh.p.Rp.pFLh+llsu...shph....................tth..................tt..a...thshpLAp.hp.lst-.l+c+.VsELYphGhDt.ucphlhplp-p-hLuspL.L.hl.sGpRLs...Lhpp..psppshth...luplsspLhsaLcshp..................htp.............thsl..htpLltphh.hLs................................... 0 31 37 66 +14508 PF14657 Integrase_AP2 AP2-like DNA-binding integrase domain Bateman A agb Jackhmmer:C9L423 Domain This family includes AP2-like domains found in a variety of phage integrase proteins. Presumably these domains are DNA-binding. 22.10 22.10 22.10 22.20 22.00 22.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.78 0.72 -8.27 0.72 -4.18 212 1784 2012-10-03 08:51:45 2012-02-28 14:47:41 1 10 978 0 198 1338 40 45.30 27 12.51 NEW pWhhph...thpshsGK++pppK.pGF+TK+EApphtpc....hhtp.....hp.....psht .............ahhph...thcs.sGKc+ppp+.p....GFc.TK+EAptthtc....h.tp....hp.p...t............. 0 77 133 168 +14509 PF14658 EF-hand_9 efhand_7; EF-hand domain Coggill P pcc Jackhmmer:Q8N6L0 Domain \N 37.00 37.00 37.00 37.00 36.90 36.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.14 0.72 -4.06 8 82 2012-10-02 16:17:27 2012-02-28 15:26:37 1 13 55 0 37 112 0 65.20 37 17.67 NEW -shFcsCDsp+sGcVslS+llsYL+tsTup..sPp-s..cLpsLsp.LDPsGc..csslsLDTFpsVMRcWI .................sFphhDsp+TGhlsltclhshLcusst...sPp-u..cLQsLhsplDPpup...uplshDsFhslMpphh.......... 0 9 10 18 +14510 PF14659 Phage_int_SAM_3 Phage integrase, N-terminal SAM-like domain Bateman A agb Jackhmmer:C9L423 Domain This domain is found in a variety of phage integrase proteins. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.01 0.72 -8.77 0.72 -3.70 221 5045 2012-10-02 14:21:04 2012-02-28 17:25:28 1 28 2378 11 835 4073 332 57.40 19 15.31 NEW oFpchh.cha..hcp.hc..tpl+.tsThtspcphlcp+IlPhF..GphclscIs..stplpc.ahNchh ..........................Thpchh.cpW.....hc..p..hp.....p.......p.......l.....c...........s........T.hpphpp.hlc..pal.h..P..h..h....G.ph..p..l.scIs..stplpp.hhpph.h................... 0 320 580 723 +14511 PF14660 DUF4458 Domain of unknown function (DUF4458) Godzik A adam Jackhammer:Q8ABA0 Domain this domain is found in tandem repeats on the N-terminus of secreted LRR proteins from human associated Bacteroidetes domain boundaries are based on the JCSG solved 3D structure of JCSG target SP16667A (BT_0210) 22.40 22.40 24.10 29.30 20.40 20.60 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.50 0.72 -4.06 25 61 2012-02-29 08:06:47 2012-02-29 08:06:47 1 11 24 3 2 57 0 115.00 32 26.05 NEW GhVpFKLhKsh.......pt..pts.soRA............s..sc..h.h..hu-hpKlc.lslpp..stTphspTl.....t.Lpsh.s................ps...u-.....................hGhco.....-pL.pLhAGsYplsuYphY......c+pc...pp...lhs.upss.pspsFsVhsssls ...............GhVpFKLhKsh........t......soRA............h..sch.h..hS-hpplc.lslpp..shTphspTl....t.lcshhs................Es..s-.....................auhpo.....-pl.pLhAGoYplhuYhhY.....Dchs.....ps...lht..upss.pspsFsVhsssl... 0 2 2 2 +14512 PF14661 HAUS6_N HAUS augmin-like complex subunit 6 N-terminus Eberhardt R re3 Jackhmmer:Q7Z4H7 Family This family includes the N-terminus of HAUS augmin-like complex subunit 6. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2]. 26.30 26.30 26.60 30.90 23.90 26.20 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.66 0.70 -5.02 27 214 2012-02-29 10:59:24 2012-02-29 10:59:24 1 4 158 0 141 215 1 211.80 21 32.99 NEW hlapsLphLuhch.sst........shpthhshshF.KsNhpAh.hlhaaLFphhDssch+.c+Ft.hWP.hDp..tpcssFRssshchLpc..Lpcpt.tLsphspl+sshlspssG.+FhchLhphusaVlpchl+p.....h..pssssshths.spphtsss..t.hhshhtthcsphhphhpcpsth.t.apchsphlppphcslsucptt.ppth.......thpsppcpttcth.p...tt.......h.t.hpphpphWsph .......................hhh.l.hLthc...............s.p...t.s...ht+...phpshphshaaLhph....h..D.st....tp..p...........thp.....haP.h-t..hpshphRtthhphLpc..lpcps..sLs.........p.sp..l+tohlhpssG.+FhclLhphuphVLpchlpp........h...........p...........s.ssh............p........hs.ttphtspp..t.hhshhhht..+.pphhp.hhpcpsth....t.apchs...phlpt....phcplttcp.thhppp..................p..pp.tp....................................................................................................................................................... 0 34 66 108 +14513 PF14662 CCDC155 Coiled-coil region of CCDC155 Coggill P pcc Jackhmmer:Q8N6L0 Domain This is a small family of eukaryotic proteins of unknown function.ThiS is the central coiled-coil region. 30.90 30.90 31.50 32.00 30.40 29.50 hmmbuild -o /dev/null HMM SEED 193 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.73 0.71 -11.38 0.71 -4.70 15 48 2012-02-29 12:47:12 2012-02-29 12:47:12 1 8 33 0 25 66 0 163.10 59 29.74 NEW PATADLLSSLEDLELSNRRLAGENAKLQRSVETAEEGSARLGEEIpALRKQLRSTQQALQhAKAlDEELEDLKTLAKSLEEQNRSLLAQARQTEKEQQHLVAEMETLQEENGKLLAERDGVKRRSEELATEKDuLKRQLaECE+LICQRDAlLSERTRHAESLAcTLEEYRoTTQELRLEIS+LEEQLSQTpE .....ATADLLSoLEDLELSN+RLsGENAKLQRSlETAEEuSARLGEEIhuLR+QL+..ST....Q....QALQhAKAlDEELEDLKTLA+SLEEQNRuLhAQARpsEKEQQHLVAEhETLQEENGKLLAERDGVK+RSpELAs..EK-sLK+QLhEsEcLICQR-slLSERT++sEuLspTlEEYRssTQELRhEIS+LEEQLSQo.p......... 0 2 4 9 +14514 PF14663 RasGEF_N_2 Rapamycin-insensitive companion of mTOR RasGEF_N domain Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the more conserved central section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin. 27.00 27.00 27.90 27.40 26.40 26.10 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.29 0.71 -10.35 0.71 -4.14 35 251 2012-10-02 12:00:54 2012-02-29 13:10:47 1 25 215 0 179 269 0 112.50 36 8.19 NEW phssa.ppWulcLLl...sQLhDsss....pVsthA.lplLcEsCppp..ssLEhllph.+.PsLspLG-hGt.Llh.+FLSpspG.a.phLsp.hsalppEh-pWh.pptNpcYVtllEsslppuhssptcs ..............s....hppWuIcLLV....sQLaD.sh....pVsptA.lclL-EACpp+..s.Lchl..lp......h........+.P........s.L.........s....H.................L..G..-..h.....G.t...........LLl.....RF.LShstG.a.p..Y.........Lsp.h....salspEh-cWh..pthNppYVsllEtpLscuhsp.......................... 0 57 94 147 +14515 PF14664 RICTOR_N Rapamycin-insensitive companion of mTOR, N-term Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the N-terminal conserved section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin. 27.00 27.00 35.40 31.00 26.60 26.10 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -12.00 0.70 -5.87 30 301 2012-02-29 14:25:56 2012-02-29 14:25:56 1 28 234 0 209 292 2 316.30 29 26.27 NEW lp+uNsLVpLh+cpPpl+h-lshuhhs.clps...hLLscs.+-VsAAuYRhsRYhlsDtpslptlppLphDhlllhSLs+-p+s.plEREQALKhlRsh..l-l.tulpch..opulVRsllulA-c....p-DRL+shsLpTLsElhlhsPpLlhpsGGlpsLtcsLh-.usap..huEolhsslLaLLDsPpTRpalpssh-LcslhusFTD..............pscpt...t+lpsSshuIushL+oWsGlhhLupssh.psL+SLlssLplPpsp.lRcsll-Lla-lLRI+sPsWossFhsup+hsshup..h...hplp.....spspp.h...s.....t.tt.slhspahALlLhshlcuGL...lpuLlpllpssp..........ttLspKusl......LLuElL+LAsplLP ..................................................tphNphspLhpp.hs..+.c...hsh..h.c..l.h........hlLscs..ppVhAAuhRhhRYhlts....p...lphlhplp..h-hhlhhslsh......p...........p.....p...............s..phERp.............QAL+hlRt..h..lsl.............s........sp..............h...............spulspsllulupp............................pcD...c..hhphslthlsElhltsPpllhhsGGh.pslhcs.lh-...sphs.....hs-s..lhsshLalLspPpoRpalcss..h-.L-h..lhusao-h...........................hpppp.........+hpssthslhshh+oWsGll...........hst..ss...ps...lp.....u.....L.ls..............sL...ph......P.p...p......hRchll.-lla-lhp........l.......s.....s.....htpphh.u.h.h.ss.sp...h....php.........tt..t.h....t.............................tp.slhppahuLlL.hhlcsGL...............lpsLhplhpsp.s..........................lph+uol......Lls-lLphupphLP............................................. 0 76 116 174 +14516 PF14665 RICTOR_phospho Rapamycin-insensitive companion of mTOR, phosphorylation-site Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient- and growth-factor signalling. This short region is the phoshorylation site. Rictor does interact with 14-3-3 in a Thr1135-dependent manner. Rictor can be inhibited by short-term rapamycin treatment showing that Thr1135 is an mTORC1-regulated site. 27.00 27.00 38.40 37.20 25.90 18.10 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.63 0.71 -3.98 4 57 2012-02-29 14:26:48 2012-02-29 14:26:48 1 8 36 0 29 52 0 105.60 69 6.89 NEW PFshhuSS+LV+NRhLNSLoLPsKKtRSoSDPKG....uKL.os-ohsuhRRsRTlTEPSl.s.spu-sFsPl.p....P+p.TlsLETSFsGhKsl--ssSTsSIGEN-lKhs+s.u ....................sFPFFASSKLVKNRILNSLTLPNKKH..RSSSDPKG....GKL.SSEsKoSNRRIRTLTEP.Sl...DFNHSDDFTPlS........ssQKTLpLETSFVGNKHlEDTGSTPSIGENDLKFsKs.hG............................................................................ 0 1 4 12 +14517 PF14666 RICTOR_M Rapamycin-insensitive companion of mTOR, middle domain Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. This region is the more conserved central section that may include several individual domains. Rictor can be inhibited in the short-term by rapamycin. 27.00 27.00 30.00 27.20 26.60 25.20 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.64 0.70 -11.40 0.70 -5.34 51 266 2012-02-29 14:49:26 2012-02-29 14:49:26 1 27 232 0 188 271 0 212.50 34 15.33 NEW hDDtpF+phlt-.opVLssK-apcWNWsllhpllcGPLhNsKRl-Esh..+ssKF....l+RLluFYR...Ph+hRFosl.hssc....s.......p+YlplGCtLhcsLLso.sEGh+hLt.cs..+llpQlucsLspl..........hsGhsstp.slFSpp+LpsTlstGYFthl.GsLSsstpGl..plLE+aphFshlhplhph.........p.s+..ssllpl.llssL......DYsh.supsRhlLsKALssusc.plRlaATchLthhlps ...............................................p-tthpphlh-.opV..Lpp.p.phhcWsWslIhpllcuP....h.hN.+.phc-t....phs+Fh+RLlsFY+......P.phpaupl.hstp.....s............ppaspsGCthhchLLpo..Eu.thhh...s.........cllpplsphLsps..................hoG.hps.p.lhspppltsTLsttYFhhl.GsL.SspspGl..phL-+hp........h........Fphhhplhph............................p..sc...scLlpL.llusL......DYoh..-u.h.s.RllLoKsLTsuoc.shRlaATchL+hhl..................... 0 62 101 155 +14518 PF14667 Polysacc_synt_C Polysaccharide biosynthesis C-terminal domain Bateman A agb Jackhmmer:C9L817 Family This family represents the C-terminal integral membrane region of polysaccharide biosynthesis proteins. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.98 0.71 -4.40 347 5970 2012-10-02 21:24:20 2012-02-29 15:39:12 1 27 2511 0 1351 15273 3365 133.80 17 27.14 NEW hLtlhuhshlhh.ulh.t.h.h.s....s.l.Lp.uhs+tphshhsh...hluhllpll.lshhL....l.........hG...h.........hGAuluTsluhh.l.s.shlt.h.hhlp+..h...h.p.hp....h.....h.t.p.hh...hth....h.hsuhlMslsl....hh.hthhh.....................t......sh...l...th....llsls...lGs...hl...Yhhhlhhh....ph .......................................................h.hhshs.hlhh..u...lh.t...h...h.s....s...h..Lp.uh....s...c.p.+h.s...hh.s.h.....hh....u.s.l...l.p.ll..l...s....hhL........l............................h..a..G......h...........................hG.Aulu..T.h.lu....h....h....l....s....h...h....l.t.....h.....h...h....l..........p..........+..........h..............h.....t.....hp..........h......h.....h............h..............h....h..h...................h...h...h....s...h........h...h..h...h...h..h.............hh...h.......h.h...............................................................h.........h.....h........h.h.hhh....h.uh......hh....ahhhhhh...h..................................................................................................................... 0 455 917 1153 +14519 PF14668 RICTOR_V Rapamycin-insensitive companion of mTOR, domain 5 Coggill P pcc Pfam-B_389 (release 26.0) Domain Rictor appears to serve as a scaffolding protein that is important for maintaining mTORC2 integrity. The mammalian target of rapamycin (mTOR) is a conserved Ser/Thr kinase that forms two functionally distinct complexes, mTROC1 and mTORC2, important for nutrient and growth-factor signalling. These long eukaryotic proteins carry several well-conserved domains, and this is No.5. 27.00 27.00 27.00 27.10 26.80 26.90 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -8.91 0.72 -4.05 32 251 2012-02-29 15:53:02 2012-02-29 15:53:02 1 30 216 0 185 260 1 73.00 41 5.33 NEW lhcLKuuLWAlGal.GoophGlshLp...ptslltpllplAppsplhSlRGTsFasLuLIupTppGsclLpchGWpS ............hclKusLWAlGsl.GS.sphGhshLp..........pp..slltpIlclAcpspVhSlRGTsFaVLGLIu+TppGt-lLpchGW-..... 0 65 101 153 +14520 PF14669 Asp_Glu_race_2 Putative aspartate racemase Coggill P pcc Pfam-B_34791 (release 26.0) Family This is a small family of vertebrate putative aspartate racemases. The family lies on TOPAZ 1 proteins. 27.00 27.00 28.00 27.50 26.80 26.50 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.67 0.70 -11.47 0.70 -5.04 3 28 2012-10-03 04:39:14 2012-02-29 18:32:59 1 2 25 0 16 34 0 222.80 63 15.17 NEW EAGhVhssEas-hl.hhLcphpssppElssslssKS...RupAosP+puaLs-LApsVVEVELCKcpEDWs+LGslFhSVCpGsccsu-LpRFCuCVAhALLcEsKD.K.uVPFstFAETVCQ-sppDElsKTaLGRIGVSLMapYHRTppWoKGRKVL-VLScLKlEFToLKGLFGsEcGASRCQLVTlAAElFIpSGSIEGALNsLR.ENEWFlSSSoWPCEpADVpsR+RVLstL ...................................................................EAGMlLDsEHFNYIVKLLYQlQASKQEIoAVLEhKS...RLphRQFKK..NWps...DLcSALsclEHCKEKGDWTKLGsLYlNlKMuCEKF.ADhQpFCACIAEsLTK-h.K-ERPslPFCEFAETVsKDPQNSc....lDK.s.lLGR.IGISAMYFYHKLLQWSKGRKVLDKLYELKIHFTSLKGLhGPEKLAsRCQIVNlAAEIFLKSGSLDGAIWVLR..ESEWIIsTPLWPCDRhDVLNRHNLLCTI.................................................. 0 2 3 5 +14521 PF14670 FXa_inhibition Coagulation Factor Xa inhibitory site Coggill P pcc Jackhmmer CATH:3kl6_B_0 Domain This short domain on coagulation enzyme factor Xa is found to be the target for a potent inhibitor of coagulation, TAK-442 [1]. 33.40 33.40 33.40 33.40 33.30 33.30 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.59 0.72 -3.83 204 5651 2012-10-03 09:47:55 2012-03-01 14:05:28 1 1176 167 166 3003 5435 0 36.70 41 5.65 NEW Cphs..N..G..G...C.s....c..h..C.h..s.s.......s.s...........s..h.p....C.sCs..p..G.a.p.L.s........s.D..sp...o.C ..............Ct.s..N...G...G....Cp...+.h.Ch.ss.......s.u.................u..a.p...CsC...s..G.a.p.L.t......s.D..t+o.C.............. 0 870 1089 1827 +14522 PF14671 DSPn Dual specificity protein phosphatase, N-terminal half Coggill P pcc Jackhmeer:CATH:1ohe_A_01 Domain The active core of the dual specificity protein phosphatase is made up of two globular domains both with the DSP-like fold. This family represents the N-terminal half of the core. These domains are arranged in tandem, and are associated via an extensive interface to form a single globular whole. The conserved PTP signature motif (Cys-[X]5-Arg) that defines the catalytic centre of all PTP-family members is located within the C-terminal domain, family DSPc, Pfam:PF00782. Although the centre of the catalytic site is formed from DSPc, two loops from the N-terminal domain, DSPn, also contribute to the catalytic site, facilitating peptide substrate specificity [1]. 28.20 28.20 28.60 28.30 27.60 28.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.67 0.71 -4.19 42 431 2012-03-01 14:48:06 2012-03-01 14:48:06 1 5 262 3 265 410 10 128.20 43 23.61 NEW lEhIpsRLYasshpp.....pP+ss...sssaaFslDc..-LsYpsF....atDFGPLNLuhlY+astpLschLps...shppKplVaYou.hcs....c+RsNAAhLlusYhllhhshoPccAhpsl.pphp....s..sahsFRDAohusssaplolhDClpGlp+A .....................................tphhhh.h....................pspss........shhaFslDc..pLhYpsF....atDFGPLNluhlYRassclpchLps............hshtcKtlVaYou.hDt......................+cRANAAhLlusYhVlhhshoPcpA.apsl.hpss....s..sahPFRDAuaGsssaplTlhDslpGlpKA........................ 0 99 145 213 +14523 PF14672 LCE Late cornified envelope Eberhardt R re3 Jackhmmer:Q5TA76 Family This is a family of late cornified envelope proteins that are expressed in skin [1]. 24.50 24.50 27.00 24.90 24.40 24.40 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.13 0.72 -10.95 0.72 -3.50 10 255 2012-03-02 13:07:00 2012-03-02 13:07:00 1 2 22 0 94 230 0 71.30 56 80.19 NEW sPPKCP....sPKCP...PKsss.Ch....PPsSSC.CussS..GGCsGs.............SuuGGCCLSHHR.RRSHRC.RhQSSsCC-pGu.....GQQSGGSGCCHuSGGCs .........................sPKCP.....PKCP...P.pss......s.C.........PssS..SC..CusSS..GGC.Cu..s..................Su.GGGCCLSHHR..+Rp.+Rp..R.psSssCsp.................uG.u.S.uCstuuuGp........... 0 20 20 20 +14524 PF14673 DUF4459 Domain of unknown function (DUF4459) Coggill P pcc Pfam-B_10980 (release 26.0) Domain This family appears only on sequences from Salmonella spp. These sequences also all carry a YARHG domain, Pfam:PF13308. 27.00 27.00 45.80 45.70 23.20 18.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -11.00 0.71 -4.53 2 68 2012-03-02 13:31:19 2012-03-02 13:31:19 1 3 66 0 0 26 0 148.70 97 33.39 NEW MKKCFLFIFhCLFIFSANAELKFRPEhENKKIYFQGKVTDYTLNDFhFFGDSREPFYGSENDDYTATADEWLtFYAELPDVRKWQRVVPDDFShM.GAPWCDIQFFEQENDHSVITGSEHhRCIDFLVTPKRKGLIPMGTKGTLhDYGSYLAFAPQIc+ ..MKKCFLFIFVCLFIFSANAELKFRPEFENKKIYFQGKVTDYTLNDFTFFGDSREPFYGSENDDYTATADEWLGFYAELPDVRKWQRVVPDDFSTMYGAPWCDIQFFEQENDHSVITGSEHMRCIDFLVTPKRKGLIPMGTKGTLMDYGSYLAFAPQIKR..... 0 0 0 0 +14525 PF14674 FANCI_S1-cap FANCI solenoid 1 cap Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 1 cap (S1-cap) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 31.60 31.60 22.50 18.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.43 0.72 -3.91 5 57 2012-03-02 15:30:47 2012-03-02 15:30:47 1 5 31 6 20 47 0 51.90 63 4.80 NEW MDQKILSLAAEcosD+LQEaLQsL+Es-LoslLTsQAVKGK-sGALLRAIFKG ..MDQKILSLAs-KTsD+LQEFLQTL+-DDLTsLLpNQAVKGKssGALLRAIFKG. 0 1 3 6 +14526 PF14675 FANCI_S1 FANCI solenoid 1 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 1 (S1) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 25.40 26.90 23.20 22.20 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.46 0.70 -11.36 0.70 -5.00 16 138 2012-03-02 15:31:51 2012-03-02 15:31:51 1 23 93 8 82 125 0 194.60 35 17.27 NEW pstp+Rhplspsslptlcss-Lshchs.DlIuRLhhDlssassppLlcls-hCl-ul........................RtG-spshsWK-LLPplLss......Lusp.tplshsshtloGsEY+cpllssLssh+WsspIlsslssMFRDlsLSpEEhphVlsKlsstlpc.ls..ElPPLsaQLhplC.psuupll..L.uLp+YFpcpahc+h.sptsopoo....Dl-sI.......................shSscELR-sEtTlLaHls.ssph .......................................................................t...+Rhtlhp.hlph.hp..ssclp..c.st-llshLhh-htphPs...Lsplsp.hl.ssl...............................+.p.ushhssp.....h-LlPhhLos...........Lssp.pplsh.....s...t....st..lsG........pEhKcpllsslCo.scWst....ph.llplssMF+Dls..Lo.s-Elp.hVlpKllphhpc.lsLQElPPLVYQLLlLuoK.G.s+.+tl..LpGlltaFsphppp....ptp...ppp.s...sp.h.....-l...l.......................s.hst..tELRclEGTlllHlsaAlc................................. 0 29 38 60 +14527 PF14676 FANCI_S2 FANCI solenoid 2 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 2 (S2) domain of the Fanconi anemia group I protein [1]. 28.50 28.50 28.90 29.60 27.90 28.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.60 0.71 -4.33 25 145 2012-03-02 15:32:28 2012-03-02 15:32:28 1 27 102 8 94 147 1 152.00 36 12.64 NEW tllpulVphuFhLL-..........uttssptpsht.hpt.....................LGlplLhphF+hHphsRscIl-plhsRllsppsp.sspal...................cLLuhlspstshhllEpsuplp.....-hh-al.salssssuptllsAlhPLlKlS.tsl+DtlILVLRKAMas+-hssRhsAVsGalp .....................................hVspGLVpLuFhLh-...................................uhssp+h.s.stshps..shtp...........................................LGtslLlchF+hHchhR.......p..cIl-plhsRllopsss....s.p.a.l....................................-LLupllhtsPhhl.psss+lp.....-hhDal.saLPhps..sptLlpAl.PLlKlS..hshRDuLILVLRKAMFup..ph...s..sRpsAVsGFl.h.................................................... 0 34 44 72 +14528 PF14677 FANCI_S3 FANCI solenoid 3 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 3 (S3) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 25.80 25.90 24.80 21.80 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.21 0.70 -4.96 13 99 2012-03-02 15:33:04 2012-03-02 15:33:04 1 14 71 8 57 106 0 205.40 38 17.27 NEW lLohphlScLLshLFcDssp.u+pEsLulLRSss-ah.RYuVsVslQKlQQLc-p..sDss-upssc+hFcpLCDIT+VLhhRhsshsssl.c-sG.....polSlLslEshhclhsslpppYss+hspFLpslDssssstpc...........slTppsthhI+pFQ+slhs.hs.us-DsFsuKpslhLlslLohLucpLsPuSsphs.QhhsWslplC+cpsl-DsuhsKGLl.sLLF ............................................lhshthlspLLphLacDsst.uppEslslLRSsp-Fh.+YslslslpKlQplcpp.....sshsptppsc+hFp..pLs-IT+VLhW.R..Y..TslPosV.E-sG++.KtcolSLLCLEG..Lpclhssl.pphYps+lppFLpul.....Dsssppspc.t.......hslsppsth.I+pF...Q..Rsl....h......s.LS...upc--F.sSK-sl.lLlslLosLo+hL-P..uSsphs.p....hhoWs.+lC+-ssh....-Dst..hsKuLh.sLLF............................. 0 15 19 37 +14529 PF14678 FANCI_S4 FANCI solenoid 4 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the solenoid 4 (S4) domain of the Fanconi anemia group I protein [1]. 25.00 25.00 66.30 32.60 24.40 23.20 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.40 0.70 -5.28 19 132 2012-03-02 15:33:39 2012-03-02 15:33:39 1 22 97 8 84 138 0 235.70 35 20.15 NEW sshLpclupsltspLGslc..p-sps.....-pps..pauhlshcTAu.olhtlllup.lp+lL--V-Whls+hps.httsphs........psspsthtsshcshE+ulhtQLspllpslpcLspsslPsGush-slhKtLt+hYshLosLsKaYlphssspts...thsscFEcL.l+h.............sGopLsspsYshIoY......lpttpp-p.spppp.....p........tttspsthAKlLRET+.IPslIauIEpaEKalIpLSKKoKl..NLhpahKhSTsRDF+IpuspLcsAl .........................................................h..hhpclupclpt.lGslc....pD.pl.........-pss...pashlshcTus.olhhhllup.s-csL--VDWlls+lKs.hstpphs........ptspsphph....tpshE+ulhhQLspllphhpcLlpsulPs.G...o.ssDsLLKtLs+hYshLosLsKaalpsppspts....hstph-+L.V+l.............oGpp..LTs.hYsFIoY.......................lpp...p...p.....p...pp...t..t....ttpt........................tt.ststhA...+lLR....ETK.IPsLIFsIEpYEKaLIpLSK......KoKl...............NLhpahKhSTuRDF+Ipss.Lp.sl.................. 0 33 41 61 +14530 PF14679 FANCI_HD1 FANCI helical domain 1 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the helical domain 1 (HD1) of the Fanconi anemia group I protein [1]. 25.00 25.00 25.20 29.20 24.40 24.60 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.20 0.72 -9.65 0.72 -4.25 19 101 2012-03-02 15:34:20 2012-03-02 15:34:20 1 17 66 8 57 95 0 85.00 44 7.08 NEW pLGREll+alKsst....tpsLsPFslulLLSVuplpRFc-plh-hL+oslh+sh+D.phppsu+WLpchl.ppssltshll-slpso ..........-LGREllKtLKssp..t.ssssLsPFSlALLLSVsRIp.RFpEQVhDlLKsullKuaKDhQlhpuSKaLpsLlPpc..s..sVushILEVV+NS............. 1 17 23 38 +14531 PF14680 FANCI_HD2 FANCI helical domain 2 Eberhardt R re3 Jackhmmer:Q9NVI1 Family This is the helical domain 2 (HD2) of the Fanconi anemia group I protein [1]. 25.00 25.00 35.80 30.60 23.90 22.70 hmmbuild -o /dev/null HMM SEED 234 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.36 0.70 -5.25 16 147 2012-03-02 15:46:03 2012-03-02 15:46:03 1 24 99 8 94 147 1 212.30 33 18.07 NEW su.....Sp.sSp.tsshoQlplss+ophsss...sN.-sLsLEIlG.LRRCLsQQA-VRlhLY-GhY-slcpNupLusplLphLhsph+paa-s........-.DhlsPl+l-sClsupG-p...lhLpEPLu+LlpslspCLthhpps.sstss.s..............El..splLESlopRMl+s-..LEDFpL..........................Dcss-hs.pssssuhcpshhshhhhulhEsLIpaphht..sshscsph...cclLuLFcsYpKLs-hLKc ..................hs..spphSt.t..shSQ.splsspuphsss......tN..-shsLEIhs.L.RRCLsQQA-VRhhLY-GhhcllppN....s.pLusplhphLhs.p.h..+.p.a.acs..........c.DhlsPlKl.-tClh..........spusp.............l.LpEPLspLlpslppCLthhppt..shptt..........t.p...............h.pcl...........pphL..-ohspRMl+s-....LEDhpL.......................................Dcst...-hs.psos.huhcsphh.s.hlhulhElLl-aph.h.....sphscp..ph....cpll.sLF.papcLp-hhp............................................... 1 32 42 72 +14532 PF14681 UPRTase Uracil phosphoribosyltransferase Bateman A agb Jackhmmer:B0C7X6 Domain This family includes the enzyme uracil phosphoribosyltransferase (EC:2.4.2.9). This enzyme catalyzes the first step of UMP biosynthesis. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.37 0.70 -11.12 0.70 -5.12 113 5012 2012-10-10 14:25:38 2012-03-05 13:22:09 1 28 4240 67 1505 3471 1360 199.90 43 83.73 NEW ls.s+PhlpthhohlRspsT.ssspFcthhcclsplLshEAhs.p..Lsh.p..pt..plp.....TPl......utth....s.s....h.h....h.st......plshVsILRAG.................................uMhpulpsl.hPs.s.plt+lhl.R.-c..pT..................hp..P.........aYp+LPppl........sppp..................llllDP...hlATGuoshtAlchL.hct...sst.ppIhhlsll......u....usp....Gl......pplttpaP..c.lc.lhsuulDppL..s.-puaIlPGLGDhGDRhFGT .........................................................................................................................sHPLlpH.KLollR..-.p........c..T..uop..c..FRc..LssElupLh.............s.....YE...............so+..D..L........h..c.......cs...pIc.............TPh..........ut.sp...sp.....p.l............t..u+..........KlslVPIL...R.A...G..l................................G.Mh-.....Gl.L.p.l...l.......P...u.......A...+.......l.......G.......h.......l..G..l..aR....D.E....-T.............................................Lc..P.s.............Y.a...t..KL...P.p.cl......scRt...................................slllDP...MLAT..G............G.Sslt.........AlchL.Kc+......Gs.....ppI+hlsLl......A....APE......Gl......cslpc...u.HP.....D....Vc....l....asAul..DccL..N..-+......G.................Y.IlP.G.....LGDAG......DRlFGT............................................... 0 468 899 1262 +14533 PF14682 SPOB_ab Sporulation initiation phospho-transferase B, C-terminal Coggill P pcc CATH:1ixm_B_02 Domain Sporulation initiation phospho-transferase B or SpoOB is part of a phospho-relay that initiates sporulation in Bacillus subtilis. Spo0B is a two-domain protein consisting of an N-terminal alpha-helical hairpin domain and a C-terminal alpha/beta domain, represented by this family. Two subunits of Spo0B dimerise by a parallel association of helical hairpins to form a novel four-helix bundle from which the active histidine - involved in the auto-phosphorylation - protrudes. In the phospho-relay, the signal-receptor histidine kinases are dephosphorylated by a common response regulator, Spo0F. Spo0B then takes phosphorylated Spo0F as substrate hereby mediating the transfer of a phosphoryl group to Spo0A, the ultimate transcription factor. 27.00 27.00 29.60 28.60 25.00 24.00 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.57 0.71 -4.29 12 140 2012-03-05 14:41:45 2012-03-05 14:41:45 1 1 137 10 21 74 0 113.40 51 61.89 NEW hPphAthlLTaNWcs+.hpLEaEVLG-l+sLpth-ppLhshspplFshhcpuls.tsENHLslolphp..-......ptlplaFDFpGhls.shptlpp.hpp.tptp.thphhphcls-cEssl ...MPLFSEWILTYNWKQQPsLLEYEVLGc..L..+N.LS+hDEpVCTWosQFFSMLQHSLDVY..VENYVCITIEsDu........-NARFFFDFRGKLT..slEELQs.WLu.spNN..cahsISYoVRDEElSl.......................... 0 5 13 15 +14534 PF14683 CBM-like Polysaccharide lyase family 4, domain III Coggill P pcc CATH:1nkg_A_03] Domain CBM-like is domain III of rhamnogalacturonan lyase (RG-lyase). The full-length protein specifically recognises and cleaves alpha-1,4 glycosidic bonds between l-rhamnose and d-galacturonic acids in the backbone of rhamnogalacturonan-I, a major component of the plant cell wall polysaccharide, pectin. This domain possesses a jelly roll beta-sandwich fold structurally homologous to carbohydrate binding modules (CBMs), and it carries two sulfate ions and a hexa-coordinated calcium ion. 27.00 27.00 28.00 27.80 26.00 25.30 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -11.08 0.71 -4.61 56 304 2012-03-05 16:42:11 2012-03-05 16:42:11 1 23 130 5 211 322 1 176.50 27 28.84 NEW sslWcIG..DtossuFhsu-...............phhR.H...sc.schtshtsLsYTVGsS.tssDashAphpchs.......sshpIpFpLsssp.hs..tshTLRlulstA.........suucspVplNs...hsus......ssu..tshthDspshp+GsapGhhclYshslPuutLhp..Gs...NTlplssss..........uo......s.h..uhhaDslcL .....................slWcIGhsD+oustFhhs-......................................pphRta....p..c...schhP.t.t.slsYTlGpS..tpDa.aup..hsths...........................ssapIpFsLssspht...tshsLclul..AsA..................ss..u..ch..pVp......lNs...sss...............hss.......ht.pcsshsc+Ghp.Gh...ht.....hap..hslsushLhpGs...Nsl.hls.sp..............us..........t....shhaDhlcL.................................... 0 52 129 181 +14535 PF14684 Tricorn_C1 Tricorn protease C1 domain Eberhardt R re3 CATH:1k32_A_03 Domain This domain is the C1 core domain of tricorn protease. This is a mixed alpha-beta domain [1]. 24.00 24.00 24.00 24.00 23.90 23.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.23 0.72 -4.10 129 801 2012-03-06 14:27:51 2012-03-06 14:27:51 1 45 538 24 226 719 107 69.70 30 8.07 NEW ttEhpphFc-sWRhh+-p..FYss.....c.h+Gl.D..WpultccYp.hlsplsspt-hscllsEMlGELssSHshhpss ....................tEhtthF-csW+hhccp...F....ac......s..hpGl..D..WpultspYp.hlsphp...sp..p-.ht-lLsEhluELssuHstst........................... 0 99 185 216 +14536 PF14685 Tricorn_PDZ Tricorn protease PDZ domain Eberhardt R re3 CATH:1k32_A_04 Domain This domain is the PDZ domain of tricorn protease [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.81 0.72 -4.19 47 349 2012-10-02 11:12:46 2012-03-06 14:32:34 1 16 297 24 103 348 96 86.50 37 7.96 NEW GhLGADhph.c..s..sp..acls+IlsG-shsscs+SPLstPGlplptGDhllulsGpslssstsPttLLsspuu..ptVpLolpss.su..ssRclsV .................................GhLGA-lsts..s..ss..h+ls+lhsG-spsspttSPLstsGlslppGDlIlAlDGptlsss.......ss.htpLLpG..p.A.G..+.VpLolppsss....ttRplsl............... 0 39 75 94 +14537 PF14686 fn3_3 Polysaccharide lyase family 4, domain II Coggill P pcc CATH:1nkg_A_02 Domain FnIII-like is domain II of rhamnogalacturonan lyase (RG-lyase). The full-length protein specifically recognises and cleaves alpha-1,4 glycosidic bonds between l-rhamnose and d-galacturonic acids in the backbone of rhamnogalacturonan-I, a major component of the plant cell wall polysaccharide, pectin. This domain displays an immunoglobulin-like or more specifically Fibronectin-III type fold and shows highest structural similarity to the C-terminal beta-sandwich subdomain of the pro-hormone/propeptide processing enzyme carboxypeptidase gp180 from duck. It serves to assist in producing the deep pocket, with domain III, into which the substrate fits [1]. 27.00 27.00 27.00 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.28 0.72 -3.87 51 306 2012-10-02 19:08:27 2012-03-06 16:19:16 1 22 139 5 207 340 67 90.20 27 15.07 NEW pRGsVoGph....Ghs......st.s.hhVGhs.............spshQYWspAt.ssGpFslssl+sGs.YsLhsat.........u-hpst...ssslsVo.uusst.slush .....................................tRGsVsGplh....th.................h.t.shVGht..tt........t....spshQaWsp.....sc....ssG.....p.FsIssl+sGs..YsLhsas.........su...hG-ath....psslsls.supsh..p....h.............................. 0 50 125 175 +14538 PF14687 DUF4460 Domain of unknown function (DUF4460) Eberhardt R re3 Jackhmmer:Q8N3R3 Family This domain family is found in eukaryotes, and is typically between 103 and 119 amino acids in length. There is a conserved HPD sequence motif. There are two completely conserved residues (N and F) that may be functionally important. 25.30 25.30 26.10 28.30 25.10 25.20 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.87 0.71 -4.32 31 156 2012-03-07 14:34:13 2012-03-07 14:34:13 1 4 113 0 111 150 0 103.70 33 20.81 NEW hhppRphtssclpsAL+PFYhtVHPDhFsp.aPp.p+psNEcSLKhLsuaL-sLpp.t.......scstpLpFYlpcssssp..t........pa+hVplpl....s...ppD...........scphlpplLcsCsL ........hh...+thtsschtsALRPFYhtVHPDhFup.aPp...p+plNEsS.....LKtLssaL-sLpp.t.............shpstpLsFYl+pss....pst.................ta+hlphpL.....p........spc..............phl..llppCpL.............................................. 0 44 61 90 +14539 PF14688 DUF4461 Domain of unknown function (DUF4461) Eberhardt R re3 Jackhmmer:Q8N3R3 Family This domain family is found in eukaryotes, and is approximately 310 amino acids in length. 24.00 24.00 25.40 24.80 23.10 23.10 hmmbuild -o /dev/null HMM SEED 313 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.09 0.70 -11.79 0.70 -5.09 24 123 2012-03-07 14:36:20 2012-03-07 14:36:20 1 6 90 0 86 118 0 275.60 30 57.36 NEW cpsLssWLccsttpApp+tcsstsl+cElp+L+ppLspphsLpclpasCGWsltH..hpus..LpoLp+L..sp...ppsp.....hpsL+...s+sllFu.shoGlShpGc..VMLsosDVpcsWhcllcpls.pacth..lptlshhEptlSplLtsIpls+hchh..PthpsptYtspLpplhsultca..hsttth.p.hPps.Lpshplslpo-u..ushhl....usoGphllPuosssshLlsFlspphcpApc+hpcacp.pthEcpLhspChcphpLppLsKD-ulTs-pMIsssp+LLpt...........t............hpGlpLplopYYS..VhoDGsl...CIPW-Wc ....................................................................sLpsWLcpsttpAhp.+hcpshsl+pElp+LcptLsppLtLpDlpappuWslsH..hpup....LpoLpRL..uppp.p....hhpth+...GpsllFs..stoGhoh.Gc....VMLsohDV..ppWhchhcpls..pa.sh......ptl.hhEcplStlLtuIplshhc.h..Phhph.ptYhshLpthhspl.c......hh...h.........Ppo.Lps..h.....ph.llpo-t......hs..l..........pphGpF.lPs.sCssspL..Fl..pp.hpp.Apcphp+hpp.................phhEppLhptshcchpLppL.hK.-.sul....osspMlsCsc+Llp..........t.h...............shLpGhpLplo+aYS.....VhpDGsl...CIPWsap........................... 0 31 42 66 +14540 PF14689 SPOB_a Sensor_kinase_SpoOB-type, alpha-helical domain Coggill P pcc CATH:1ixm_B_01 Domain Sporulation initiation phospho-transferase B or SpoOB is part of a phospho-relay that initiates sporulation in Bacillus subtilis. Spo0B is a two-domain protein consisting of an N-terminal alpha-helical hairpin domain and a C-terminal alpha/beta domain. Two subunits of Spo0B dimerise by a parallel association of helical hairpins to form a novel four-helix bundle from which the active histidine - involved in the auto-phosphorylation - protrudes. In the phospho-relay, the signal-receptor histidine kinases are dephosphorylated by a common response regulator, Spo0F. Spo0B then takes phosphorylated Spo0F as substrate thereby mediating the transfer of a phosphoryl group to Spo0A, the ultimate transcription factor. The exact function of this alpha-helical domain is not known; it does not always occur just as the N-terminal domain of SPOB_ab, Pfam:PF14682. SCOP describes this domain as a histidine kinase-like fold lacking the kinase ATP-binding site. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.56 0.72 -8.96 0.72 -4.41 56 2392 2012-03-07 16:11:40 2012-03-07 16:11:40 1 29 1272 10 403 1561 22 60.90 32 12.52 NEW htt.th...ppllcsL.+ppRHDahN+LQlIpGhlpLs+h-+spchIcplspchppp.ucl.spl .....................ph...tpLstlppYs-uL.RspsHEahN+LpslhGLLplppY...-...cl....h....phlpptspt.Qp.........h.t.................. 0 139 263 333 +14541 PF14690 zf-ISL3 zinc-finger of transposase IS204/IS1001/IS1096/IS1165 Coggill P pcc IS-finder, manual Domain \N 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.75 0.72 -8.23 0.72 -3.93 135 4025 2012-03-07 16:36:10 2012-03-07 16:36:10 1 22 1094 0 544 2757 127 47.00 38 15.25 NEW pshtCPpCG........pps...hhtpsp.pppph+.clshtsptshltlc.ppRapCp...p..C ...........p.sssCPcCG..........sph....t+hchp...+so+Is.hLcs.sG.hPohlhL+...KRRF+Ch...p..C.............. 0 144 375 439 +14542 PF14691 Fer4_20 Dihydroprymidine dehydrogenase domain II, 4Fe-4S cluster Coggill P pcc CATH:1gte_A_02 Domain Domain II of the enzyme dihydroprymidine dehydrogenase binds FAD. Dihydroprymidine dehydrogenase catalyses the first and rate-limiting step of pyrimidine degradation by converting pyrimidines to the corresponding 5,6- dihydro compounds [1]. This domain carries two Fe4-S4 clusters. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.44 0.72 -11.01 0.72 -4.44 327 6849 2012-10-03 08:56:43 2012-03-07 16:55:41 1 286 3369 26 1619 4929 2276 110.70 35 17.58 NEW Rtps.F.pEVshGaotc.pAhtEApRCLp.....C....t.ss...Ch.p.....uCPlslsIPpFIctltpGs......hptAhchIppsNsL.PulCGRVCPQEp.CEus.....Clh........t.....p..s.p....PVsIGpLERaluDhthpp....sh ...................................................h...t.a..Eh.....s.h.s..t....ps...t..pus.R..C..h..p....C..........s..ss.......Cp..p............uC....P....l....pst....I...Pp...a...lcLl..t.....cuc..............h.ppAhch....lp.p.........TNsh.P.t.lsG.RVCPps..CEuu.....Csh.........s....s.p..s............sVsItslE+alsDpuhp.s.h................................................ 0 581 1077 1386 +14543 PF14692 DUF4462 Domain of unknown function (DUF4462) Eberhardt R re3 Jackhmmer:A8MTB1 Family This domain family is found in eukaryotes, and is approximately 30 amino acids in length. 25.00 25.00 35.40 25.90 24.20 24.40 hmmbuild -o /dev/null HMM SEED 28 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.61 0.72 -7.06 0.72 -4.69 24 153 2012-03-08 11:34:24 2012-03-08 11:34:24 1 11 7 0 148 149 0 26.80 66 21.88 NEW uAKSR.sWNhLPRAusl.......GIGCQEQVQLE ....sAKSRhsWNhLPRAGsl.......GIGCQEQVQLE.... 0 148 148 148 +14544 PF14693 Ribosomal_TL5_C ShortName; Ribosomal protein TL5, C-terminal domain Punta M mp13 CATH:1feu_A_02 Domain This family contains the C-terminal domain of ribosomal protein TL5. The N-terminal domain, which binds to 5S rRNA, is contained in family Ribosomal_L25p, Pfam:PF01386. Full length (N- and C-terminal domain) homologues of TL5 are also known as CTC proteins. TL5 or CTC are not found in Eukarya or Archaea. In some Bacteria, including E. coli, this ribosomal subunit occurs as a single domain protein (named Ribosomal subunit L25), where the only domain is homologous to TL5 N-terminal domain (hence included in family Pfam:PF01386). The function of the C-terminal domain of TLC is at present unknown. 27.00 27.00 29.80 28.70 26.80 26.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.63 0.72 -9.84 0.72 -3.78 195 2672 2012-03-08 13:20:59 2012-03-08 13:20:59 1 2 2591 91 749 1875 2003 88.30 28 43.06 NEW EclplcVPlphhG..-...usGhp.t.G.Gllpp.hpplplpsh.PsslP-plpVDlssL.c.l.GsslpluDl..p..l..P...p..G..lpl.s.......s.....-...s-t......sllolssspstc ................plplcVPlphhG..c...usGlK.p.G.GllppshpplcVpsh.spslPEtl-lDlosL.c.l.G.cslpluDl..p..l..P...p..G..lpl.h...............s..c...s-.......slsslhtsp......................... 0 274 534 658 +14545 PF14694 LINES_N Lines N-terminus Eberhardt R re3 Jackhmmer:Q8NG48 Family This family represents the N-terminus of protein lines [1]. In Drosophila this protein is involved in embryonic segmentation and may function as a transcriptional regulator [2-3]. 25.00 25.00 27.70 27.70 24.00 24.00 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.26 0.70 -5.17 9 107 2012-03-08 13:57:56 2012-03-08 13:57:56 1 4 78 0 67 108 0 284.60 32 45.46 NEW hslohsDsESa.DooplKs.slKtLts+WssLlcth.spllpp.........ssspstsslloFLcLWEslISV..KANLSlh-T+PFaupLsphlhLLssslsshla+phLsLFN........EsLCYGSTLALQ-...hLs--ssuLActll+sV+chRlL-pLPhpptsu.................................phuuupus............sshD+sLLQKMuLLVLKSlAlpl+EhR.ssSSDSSlcSpc.D..t-hthIpRSIR-VLppL-salKspLpFHP-sphuchLl+lFtDQDD.hlEAMVCoLDltsGloa.cssAsssLs.........th....LNPshoFltFL+hluaDocVLLDhLlSsETCFLLYLLRaLKalRcNWshFlpsCt ............................................................................................................................hlshh.pE.h..Dost.Ks...hlpph.p.asshhcsh.splh.s.p.............tsth.p.lhsFL.pLhE.llus....thpLpht....pp+.hahp.sth..ll.sh.s.l.shlh+phlhhhp........EsLsh..G..S..s...s..L.s.......hls.-hhtLAptllptV.shthLcplshpt..s.....................................hGsspst...st..t..t.DpslLpthsLlllKSltlphp....sso.S..ssp..........hc..s............lpp.hpclLthL.c.hlp.php.p.......p.schl....plF....-QDDphlEuhhs.Lslh.....t...lsh..ttps.h...pLs.......................th.........hNPhhhFl.hLc.huasss.lL.LDhLlSsE...TC..FL.YhlRaLKhlpts.Ws.Fhp.C................ 0 21 27 46 +14546 PF14695 LINES_C Lines C-terminus Eberhardt R re3 Jackhmmer:Q8NG48 Family This family represents the C-terminus of protein lines [1]. In Drosophila this protein is involved in embryonic segmentation and may function as a transcriptional regulator [2-3]. 27.00 27.00 37.70 36.80 21.40 19.80 hmmbuild -o /dev/null HMM SEED 39 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.47 0.72 -7.76 0.72 -4.71 20 93 2012-03-08 14:10:50 2012-03-08 14:10:50 1 4 76 0 64 97 0 38.70 47 5.50 NEW cshssLlcL+huIpRLhpKsLFPYNssPLLR..LLcpsEpL .....cshpsLhcLphsIsRLppKsLFPYNsssLL+..LLcphEsl..... 0 17 24 42 +14547 PF14696 Glyoxalase_5 Hydroxyphenylpyruvate dioxygenase, HPPD, N-terminal Coggill P pcc CATH:1cjx_A_01 Domain This domain is one of two barrel-shaped regions that together form the active enzyme, 4-hydroxyphenylpyruvic acid dioxygenase, EC:1.13.11.27. As can be deduced from the disposition of the various Glyoxalase families, _2, _3 and _4 in Pfam, Pfam:PF00903, Pfam:PF12681, Pfam:PF13468, Pfam:PF13669, these two regions are similar to be indicative of a gene-duplication event. At the individual sequence level slight differences in conformation have given rise to slightly different functions. In the case of UniProt:P80064, 4-hydroxyphenylpyruvic acid dioxygenase catalyses the formation of homogentisate from 4-hydroxyphenylpyruvate, and the pyruvate part of the HPPD substrate (4-hydroxyphenylpyruvate), derived from L-tyrosine, and the O2 molecule occupy the three free coordination sites of the catalytic iron atom in the C-terminal domain. In plants and photosynthetic bacteria, the tyrosine degradation pathway is crucial because homogentisate, a tyrosine degradation product, is a precursor for the biosynthesis of photosynthetic pigments, such as quinones or tocopherols [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.70 0.71 -4.24 17 1091 2012-10-02 15:00:03 2012-03-08 15:28:49 1 6 816 6 336 996 193 140.50 36 33.00 NEW FENPhGl-GFEFVEFuuP-..s..ttLcslFctMGFTtVA+HRSKDVsLYRQGsINFIlNtEPcS.AthFup-HGPuACuMAFRV+D.AppAYccAl-hGApPlpht.sGsMELplPAIKGIGGuhlYLlDRas-us..........SIYDlDFE ................................................hthsGhEFlE.F.s..s..ss...s...pp..Lt..t.la.p.p.h.GFstl......A......+......H......Ro...K..s..l..h..La..RQG....s..Ishll.......N......u................-..P...c..S....h..AppF....s...p..pHGPulCuhAhRVcD.AppAap+AlphGA....t....s....h....p....s....t....s....u....s.t....E....Ls......l....P.A.I.p.G.l...GsSllYhVDR..h..tt..t..........slY-.DF.......................................................... 0 72 164 254 +14548 PF14697 Fer4_21 4Fe-4S dicluster domain Coggill P pcc manual Domain Superfamily includes proteins containing domains which bind to iron-sulfur clusters. Members include bacterial ferredoxins, various dehydrogenases, and various reductases. Structure of the domain is an alpha-antiparallel beta sandwich. Domain contains two 4Fe4S clusters. 35.00 35.00 35.00 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.04 0.72 -4.11 63 3437 2012-10-03 08:56:43 2012-03-08 16:17:41 1 136 2521 25 833 2684 375 57.90 41 22.32 NEW AhIcp-tCIsCs+CahAC.DsuapAI.......t..h...s..sc..p.....t..h.......s......l.l...p.-.-.CsGCsLCsslCPl...-CIsM ...........................hIsp-pCI......G...Cs.p..Chp.A.CP.......l.cAI..........................h.....s...s.c...p........h...p.................................s......l.h....s..-..pC.s.G..CpLCsssCP..s..cCIp............. 0 245 478 670 +14549 PF14698 ASL_C2 Argininosuccinate lyase C-terminal Eberhardt R re3 CATH:1k7w_A_03 Domain This domain is found at the C-terminus of argininosuccinate lyase [1-2]. 24.00 24.00 24.40 24.10 23.50 23.40 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.54 0.72 -9.29 0.72 -3.90 617 4020 2012-03-09 08:50:06 2012-03-09 08:50:06 1 16 3666 53 1083 3103 1579 70.50 40 15.17 NEW assATDLADYLV+.+.GlPFR-AHclsGphVt......hu.pcs..hs.Lp-LoLc-hpp....h.....s......shl.......s.p....D...lap.sLsl-suVsp ...........ausAT-LADYLVpK.GlPFR-AHclVGcsVh..............huhppG....hs...Lp....-...LsLp-hpp........h.........s......shl...............s..p....D........lap.lLshcsslp..................................................... 0 329 677 908 +14550 PF14699 hGDE_N N-terminal domain from the human glycogen debranching enzyme Godzik A adam Jcakhammer:GDE_HUMAN Family this domain is found on the very N-terminal of eukaryotic variant\ of the glycogen debranching enzyme (GDE), where it is immediately followe by the aldolase-like domain. The eukaryotic GDE performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33), performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzyme hGDE_N domain is involved in the glucosyltransferase activity, probably\ as a substrate binding module (by analogy to other glucosyltransferases) 22.10 22.10 22.70 26.40 21.30 20.30 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -9.97 0.72 -4.14 68 237 2012-03-09 12:36:43 2012-03-09 12:36:43 1 12 206 0 168 232 0 94.70 30 6.56 NEW pLRFt..G.u.S.lsRcGslasNhPtsup..pFcRsp..apphclp.............s.......s...........hspshplclslppsGuFsaYhsY.......................csp........cosphYhsVsPtLplsu .........................LcFt..Gs.S.lu+psslaoNh.....P.......pGp..pFpRsc..F+phphp................s............s.................hspchhhcl.s.lppuGuFpaYhsa..........................sspt......tcosshYlsVsPtLpls.s............ 0 45 81 132 +14551 PF14700 RPOL_N DNA-directed RNA polymerase N-terminal Eberhardt R re3 CATH:1msw_D_01 Domain This is the N-terminal domain of DNA-directed RNA polymerase. This domain has a role in interaction with regions of upstream promoter DNA and the nascent RNA chain, leading to the processivity of the enzyme [1]. In order to make mRNA transcripts the RNA polymerase undergoes a transition from the initiation phase (which only makes short fragments of RNA) to an elongation phase. This domain undergoes a structural change in the transition from initiation to elongation phase. The structural change results in abolition of the promoter binding site, creation of a channel accommodating the heteroduplex in the active site and formation of an exit tunnel which the RNA transcript passes through after peeling off the heteroduplex [2]. 25.00 25.00 27.60 25.90 24.40 23.80 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.21 0.70 -12.02 0.70 -5.17 126 554 2012-03-09 13:37:42 2012-03-09 13:37:42 1 14 381 20 280 574 278 279.60 19 28.13 NEW p+QhpLEpcuh..ctAhc+a+cthc....chpcp.uh..shsh........lpslhhpWapsLtstlpc-hc...............h..........................................ptppptpstassaLp....................hlss-chAsITlppllshhs.sts..httsh...................plsphshslGculEpEhchpphhcp........t.p.........ppppttpp.................................+php............phlcppphpt...........htthhttt................................................Wspss...ps+lGuhLlphLhc.s........uhlpsst.......tssts..........t..PAFhHthph.........sp+h.Ghlchsstlhch.....lscpstt..h......s.ah......P....MllsPcPWs..uhspGGYh.hht...o.lhRs+ss.........tppphhhpshpp.splp.......pVacuLssLGp ...................................................................................................................................................tQhthE..psh..ttuhtchpp..p.......ph.ph..t........h......hpthhhpWht.lhttlppphp........h..................................................................................tttttp..hhsalp....................hlsscp.h...u.hlshhphhthhh.stt...................................................phhphhhtlGptlpp-hph.pphhpt........................t.p.h........t...ppt...t............................................pthh.....phhptt.t.tt....................................................................................................W.s.t...th...phclGshLlphLlp.s.........shlphpt.........tt..s...............................................t..sshhp.hph.t.............spph..shlchps.lhph......l....pppsh...............s.hh.......PMlsPPpPWs..u.hppGG..Yl..ht.........stlhRspt...........ppp.hhtthtp...sp.hp.......tlhcuLshLup.................................................. 0 86 164 238 +14552 PF14701 hDGE_amylase glucanotransferase domain of human glycogen debranching enzyme Godzik A adam Jackhammer:GDE_HUMAN Domain this is a glucanotransferase catalytic domain of the eukaryotic variant of the glycogen debranching enzyme (GDE).\ \ The eukaryotic GDEs performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33),\ performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzymes. hDGE_amylase domain is a catalytic domain responsible for the glucanotransferase function. It belongs to the alpha-amylase clan and is predicted to have a structure of a 8 stranded alpha/beta barrel (TIM barreal) where strands are interuppted by long loops and additional mini-domains. In most other amylases, the catalytic domain is followed by a beta- barrel substrate binding domain, but presence of such domain cannot be verified in the human (and other eukaryotic) GDE enzymes 21.40 21.40 21.40 21.40 21.30 21.30 hmmbuild -o /dev/null HMM SEED 423 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.38 0.70 -5.57 65 321 2012-10-03 05:44:19 2012-03-11 07:14:19 1 22 258 0 228 1087 85 363.10 35 27.98 NEW c.LPLsulsl.oVlSKahGs.h..s.-W-c+lpsluc+GYNMlHFTPLQpRGtSNSPYSIaDQLpaDsshFss.......s..p........pDVpphlc...chcc-..aulLSlTDVVaNHTAsNSpWLpEHPEuGYNhpTuPHLcsAhELDssLlcaScpL.....pph.G..LPsplco.p-DLtplhssl+pcVlspl+LWEaYslDVccsscphhctap...........ssp.........s.shs...p...s............................s.hpp.pt.....ph...............lpc..t............hhp....RaupclDsp....h....sh....sl.hsph.....tps....ssc......h...cpstp..phpclL-clNlshYcEaDsDlppIlpplhsRI+YhRl--cGPKLGs.loccsPLlEsYFT.+..h.p.pt.p.............ptpphsLANNGWlWs.usPLhDFAu.sp...S+uYLRREVIVWGDCVKLRYGspP-DsPaLWc+MscYochhA+hFsGFRIDNCHSTPLHVAEalLDtAR+VpPsL ...............................................................t..lPLsslsl.ohluKhhG...h....s.cWc....p....cL.phhpc...pGY...NMl...HF...T...P.l......Q.........p...h......G......t......S......p.........S..sYSlhDQLphssphh.s............................sp...........p-l.tphlp...ch...cpc.....aslLsloDlVhNHT.AsN...SpWLp...-...HPEs..uYNhhsu....PaLp....sAh.LDptLhphuppl....................pph...GlP.s....l.cs..p.cl.........tlhphltpplhsplcLWE..aa.l-Vppthp.thhp.hp...ptp.............p.....p...........................................................s..p..hph............lpp.................hpRhsppls.p.h....sh....thh..h........ttt.......st......ppstphhpphlctlN..hhpphp.t.chp...............thlpplhsplpY.RlsspGP+lG..lopc.p.P...LhptY...FTh..tp..........................pttphhhApNGWlhs.sssLhshAt.st...SpsYLRRElIsWGDsVKLRYG.ppP-DsPaLWpaMpcYschhAphFpGhRlDNCHSTPlHVuEahLDtARclpPsL................................................................................................ 0 81 124 184 +14553 PF14702 hGDE_central central domain of human glycogen debranching enzyme Godzik A adam Jackhammer:GDE_HUMAN Domain this is a central domain of the eukaryotic variant of the glycogen debranching enzyme (GDE). The eukaryotic GDE performs two functions: 4-α-D-glucanotransferase (EC 2.4.1.25) and Amylo-α-1,6-glucosidase (EC 3.2.1.33), performed by the, respectively N- and C- terminal halfs of eukaryotic GDE enzyme The hGDE_central domain follows the glucanotransferas domain and precedes the glucosidase (GDE_N) domain. It is very likely that the current definition contains two or more domains, by analogy with baterial GDEs, this domain should be involved in substrate binding either for the N-terminal glucanotransferase and/or the the C-terminal glucosidase (or both) 19.80 19.80 20.70 20.40 18.80 17.30 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.78 0.70 -5.21 58 283 2012-03-11 08:10:09 2012-03-11 08:10:09 1 19 228 0 207 282 0 236.10 34 16.88 NEW GIuslKphLNcLHpphupcua-.....EsalHp.-scaIslpRhsPcTp+GhhLlA+TAFssssss......tslsP.lpLsG.opschlhuhpL......pttssp..........c............hhpDcphLpGlPsplh-lps.............th..pt.tppstlhh..psFsPGSIhlFcTplssss....t.Lcp.l..................................hps..ustcAhspLsLhDLNhlLYRC-sEE+D..ss.Gps...GsYsIPs.aGpLVYsGLQGWhSlL.cpIhtpNDLGHPLCspLRpG..pWAlDYllsRLp .................GIhssKphLNplHpphutcGas............psalcp..-sphl.sVpRhpPpo+puhhllA+TA.Ftsspss......ttlss.hpl..sG.phtchlhphpl.........t.p.ssp..........t........................hhpDpphlpGlPsh.hhplpt.h..........hth..ps.tp.sp.h.hpphsPGSlhlFcsplss...ts......s.Lpp.l....................................hpp..shtph.hsp...LsLh-.LNhlL...YRC-sEEp-.s..ttsGsYsIPs...aGtLsYsGLQ..........GhhSl.L.pcIh....p.NDLG..HPlCs.NLRsG..pWhlDYlssRL................................... 1 73 112 168 +14554 PF14703 DUF4463 Domain of unknown function (DUF4463) Godzik A adam Jackhammer:Q9P1W3 Domain This is a cytosolic (predicted) domain present in integral membrane proteins, such as TM63C_HUMAN TRANSMEMBRANE PROTEIN 63C. This domain usually preceeds a DUF221 (PF02714)domain and follows a RSN1_TM (PF13967) Fold recognition programs consistenly and with high significance predict this domain to be distantly homologous to RNA binding proteins from the RRM clan. 22.80 22.80 22.80 22.80 22.70 22.60 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.76 0.72 -3.46 198 880 2012-03-11 09:12:07 2012-03-11 09:12:07 1 21 225 0 638 834 0 88.20 22 10.30 NEW lsp.sspcLpcLlccRcphhppLEtshsch...h+ps.p....th.ptps..ptph..........................................t....hh..shhs..cKVDuI-ahppclpcLs .............................................................................................lshsspcLtcLscc.RcchhppLEth.sch...h+psp.....th.ptcs..ptph................................................................................................................................t..........hh..shhu...c+VDuI-ahppclpcL..................................................................................................................... 0 150 321 507 +14555 PF14704 DERM Dermatopontin Eberhardt R re3 Jackhmmer:Q07507 Family Members of this family mediate cell adhesion via cell surface integrin binding [1]. They also induce haemagglutination and aggregation of amebocytes [2-3]. 23.00 23.00 23.40 23.00 22.90 18.50 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.01 0.71 -11.30 0.71 -4.25 29 166 2012-03-12 10:48:15 2012-03-12 10:48:15 1 26 61 0 112 165 2 137.80 28 24.10 NEW NsacpshsapCssspulsslpShasN.tppDRRWsFsCsshus.t.ss.sssphsu...hNp..acpshsasC.....ssNhalpuhpStassstc......DRhWpatCpch..sss...ph..psCh.ps..sahNsacsslsas...........................s...ssspsIsGltS................hasss..pcDRcW+hpsCpl...sC ..................cpshsapCsp.spslstlpShasp.ptpDR...Wsa..t..Ctssst......tp.....ssp.shhp......lNp.....hs......tthshsC.....sss.thlsGhpSha..ssh..............DRcWpFhCsch..sst..................sshsCh.to..sasN..p.aspphshh...........................s.s.shhlpGhto.....................................hasss..pcDRpWpahh..Cph................................ 0 78 82 94 +14556 PF14705 Costars Costars Eberhardt R re3 Jackhmmer:Q9P1F3 Domain This domain is found both alone and at the C-terminus of actin-binding Rho-activating protein (ABRA). It binds to actin, and in muscle regulates the actin cytoskeleton and cell motility [1-2]. It has a winged helix-like fold consisting of three alpha-helices and four antiparallel beta strands. Unlike typical winged helix proteins it does not bind to DNA, but contains a hydrophobic groove which may be responsible for interaction with other proteins [3]. 24.00 24.00 24.80 24.20 23.70 20.80 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.16 0.72 -9.44 0.72 -3.99 49 232 2012-03-12 11:46:56 2012-03-12 11:46:56 1 6 126 2 163 217 1 77.80 45 36.18 NEW hpVpcEltpLsphIcchGp........p..sss.GphsVpFGhLF..-phssl.-........t..LVGsLhsA+K+plVcF-GEhLhQGtcDcVsIsLL ..................................plp+EltpLsp.IcphGp.......c.stD..GphpVpFG.LF..-+hspI...-........t......lVGhLhtA+K+clVsF-GE.hLhQGtcDcVsIsLL.................. 0 52 71 117 +14557 PF14706 Tnp_DNA_bind Transposase DNA-binding Eberhardt R re3 CATH:1mus_A_01 Domain This domain occurs at the C-terminus of transposases including E. coli tnpA (Swiss:Q46731). TnpA encodes a transposase and an inhibitor protein, the inhibitor only differs from the transposase by the absence of the N-terminal 55 amino acids, which includes most of this domain [1]. This domain consists of alpha helices and turns, and functions as a DNA-binding domain [2]. 24.00 24.00 25.50 24.50 23.20 22.10 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.14 0.72 -8.68 0.72 -4.25 54 483 2012-03-12 14:11:11 2012-03-12 14:11:11 1 8 172 5 115 435 60 55.90 43 15.95 NEW sWAppEhtpssLGDtRhscRLlplsppL..uppPstSlPpusp.shApscAAYRFhsNppl .....tWApcphupAcLGDsRRs+RLlsLussL.......Ap+sGhSIspusp.shApscuAYRhlcNspV.............. 0 23 63 79 +14558 PF14707 Sulfatase_C C-terminal region of aryl-sulfatase Coggill P pcc CATH:1auk_A_02 Domain \N 25.80 25.80 25.80 25.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.93 0.71 -3.86 86 772 2012-10-03 20:55:17 2012-03-12 17:14:39 1 9 252 8 368 739 322 119.10 24 23.28 NEW Ss+c.....hlFaYsust.............LhAVRhtp.......aKAHahT.shtspss..........................s.s.st.tlptH-P.PL.LFcLspDPuEphPL................ssps....spatpllpp.......lppshcpHppol.........s.s.sQhshhs.....hhhPhhpstp..h...s.ss.C .................................+c..l.Fa.Y.s.sst.............LhAlRhtp.........aKsa...Fho.th..pspst....................................hhtp..hsppcs..PL...lFcLppDPtEph.s.l.................ssts..............s.a.ph.ltp.......hpthhtpa.psh...................h......................................................................................................... 0 105 153 227 +14560 PF14709 DND1_DSRM double strand RNA binding domain from DEAD END PROTEIN 1 Godzik A adam Jackhammer:Q8IYX4 Domain A C-terminal domain in human dead end protein 1 (DND1_HUMAN) homologous to double strand RNA binding domains (PF00035, PF00333) 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.55 0.72 -3.69 24 349 2012-10-02 17:51:51 2012-03-13 07:27:26 1 29 109 0 202 525 123 73.20 32 9.36 NEW psAsphLcplCpKNpWGpPhYplp.ppsGPcthhh.FhYKVhlPuhss...s....h...sschsssh--AKphAAphsLppLs ......tssphLp-lCpKppWssPpahLh..pssGPcppph.Fla+Vhlsuhsh.......................hh..sschpsthcpAKphAAphsL.tL....................... 0 40 70 122 +14561 PF14710 Nitr_red_alph_N Respiratory nitrate reductase alpha N-terminal Eberhardt R re3 CATH:1q16_A_01 Domain This is the N-terminal tail of the respiratory nitrate reductase alpha chain. The nitrate reductase complex is a dimer of heterotrimers each consisting of an alpha, beta and gamma chain. The N-terminal tail of the alpha chain interacts with the beta chain and contributes to the stability of the heterotrimer [1]. 24.00 24.00 24.30 25.30 23.80 23.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.01 0.72 -3.71 73 1502 2012-03-13 09:33:31 2012-03-13 09:33:31 1 5 1099 12 150 902 20 37.10 58 3.07 NEW HFLDRLpaFp+t......p.-sFusGHGtsssEsRsWEcuYRpRW ......+hLDRhRYFKp+.......s.ETFADGHGQlhpssRDWEDuYRpRW. 0 32 70 112 +14562 PF14711 Nitr_red_bet_C Respiratory nitrate reductase beta C-terminal Eberhardt R re3 CATH:1q16_B_03 Domain This domain occurs near the C-terminus of the respiratory nitrate reductase beta chain. The nitrate reductase complex is a dimer of heterotrimers each consisting of an alpha, beta and gamma chain. This domain plays a role in the interactions between subunits and shielding of the Fe-S clusters [1] 27.00 27.00 28.70 28.60 22.20 21.30 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -9.87 0.72 -4.46 141 1906 2012-03-13 09:34:22 2012-03-13 09:34:22 1 7 1444 12 235 921 18 80.90 52 16.13 NEW LSPlhssspussh.....s......slhPslcsLRIPlcYLANLhTAGDstPVttuLcRhhAMRuYMRupslstp.D...tslhcpVGLotp .................................LSPIpShs-AGsh...sss..............ulLPsl...-oLRIPVpYLANLLTAGDTtPVhpAL+RMhAMRpYMRupsVcs....hsD....spsl-cVGLot.t............... 0 59 130 189 +14563 PF14712 Snapin_Pallidin Snapin/Pallidin Eberhardt R re3 Jackhmmer:O95295 Family This family of proteins includes Snapin, this protein is associated with the SNARE complex, which mediates synaptic vesicle docking and fusion [1]. It also includes the yeast snapin-like protein SNN1, which is a part of a complex involved in endosomal cargo sorting [2]. The family also includes pallidin, a component of a complex involved in biogenesis of lysosome-related organelles [3]. 25.50 25.50 25.70 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -10.17 0.72 -3.68 53 266 2012-03-13 14:00:15 2012-03-13 14:00:15 1 10 143 0 176 242 0 88.10 27 51.76 NEW tptLupG..lhplhpPslcphcsplppltpsQppLtpplcpLspcLcchpc.pp.s.s..hshsp.YhpKLhsl++clhslpphlpplpcRht+lpp ...................h.p.lupG..lhshhtPslpphcpplpplpps........QstLhppl-p.s....sc...Lpchpp.tphs.h..hshsp.YhpKLhsl++chhhlpphhpplpcR.htcLp........................... 0 61 86 132 +14564 PF14713 DUF4464 Domain of unknown function (DUF4464) Eberhardt R re3 Jackhmmer:Q6V702 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 224 and 241 amino acids in length. There is a conserved YID sequence motif. 27.00 27.00 53.90 44.00 26.90 26.80 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.45 0.70 -4.95 44 173 2012-03-13 14:47:19 2012-03-13 14:47:19 1 4 102 0 115 188 5 188.30 38 93.70 NEW hhs.slhpFsTYEDYLDShlo.pDhhYLpspclsRpLlc..LG.aRssupllpR-EFttp+pthctthps...................plsuh...Gcpl..pst.PhLpALApREchshstcLoTIIFlc.css+GpEISGYIDasppL+s..........psacshFpG+++........LhPpsoDLSaaNWcoppshhNsSsNapVls-st.pGLlF+pKtD+KlIsVssps..s.u-NspRsh.lposhYspVVlYDHhsR++s ......................lhpFtsY--YLcS.lo..DhhYLtspphh+pllc..LG.h+sptphhpc--F.tt+ttht.th.s................................htt........up.h..ps..shLhtLAtREc.shptpl...............ooIlFlp.pspp.G.ElSGYIDasppL+p............psacshFpt+++........LhPp.sDLSaasWcsphsh.NsosNa...........pVlscs....GLlFppKtD+KhlsVsspt....ucNspRp..l..o..Yh.slhaDHhhR+p.......................................... 0 45 55 85 +14565 PF14714 KH_dom-like KH-domain-like of EngA bacterial GTPase enzymes, C-terminal Coggill P pcc CATH:1mky_A_03 Domain The KH-like domain at the C-terminus of the EngA subfamily of essential bacterial GTPases has a unique domain structure position. The two adjacent GTPase domains (GD1 and GD2), two domains of family MMR_HSR1, Pfam:PF01926, pack at either side of the C-terminal domain. This C-terminal domain resembles a KH domain but is missing the distinctive RNA recognition elements. Conserved motifs of the nucleotide binding site of GD1 are integral parts of the GD1-KH domain interface, suggesting the interactions between these two domains are directly influenced by the GTP/GDP cycling of the protein. In contrast, the GD2-KH domain interface is distal to the GDP binding site of GD2. 25.30 22.20 26.70 23.70 25.20 22.10 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.45 0.72 -4.02 596 4526 2012-03-13 15:50:54 2012-03-13 15:50:54 1 12 4425 2 1039 3025 1279 80.60 40 17.34 NEW RlsTupLNchlpcslttp..s.PP.s...p...G.+c....lKlhYsTQ......ss.spPPsFllF...sNp..schlptoYpRYLcNplRcsF.sh.pGoPl+lhhR .............................RlsTuhLNcllppA..lttps.PP.s..p...G..+R...lKlhYAoQ.......su.spPPsFVla...sN.p.schlp.SYcRYLpNplRcs.F.sh.pGTPI+l.h+................ 0 365 699 892 +14566 PF14715 FixP_N N-terminal domain of cytochrome oxidase-cbb3, FixP Coggill P pcc Pfam-B_28684 (release 26.0) Domain This is the N-terminal domain of FixP, the cytochrome oxidase type-cbb3. the exact function is not known. 24.90 24.90 26.00 25.30 23.80 23.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.36 0.72 -8.82 0.72 -4.57 213 1066 2012-03-13 17:05:29 2012-03-13 17:05:29 1 15 961 0 306 849 102 52.10 41 16.65 NEW tssspsoGHsWD...G....IcEhsNPLP+WWhahFhhTIlaulsYhlh.YPuhshhpuh ...........stpspssGHpaD...G.....IcE.hs.....NPLP+WWhhhFhsTIlaAlhYhlh.Y.Puhu.hpu.h......... 0 83 189 248 +14567 PF14716 HHH_8 Helix-hairpin-helix domain Coggill P pcc CATH:1bpe_A_01 Domain \N 30.00 30.00 30.10 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 68 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.15 0.72 -3.79 351 1491 2012-10-03 02:11:09 2012-03-13 17:38:04 1 43 1116 210 618 1303 285 67.70 29 13.24 NEW Npcl..s.phL.pclAphhEhpt....ts..sa.+spAY++AApslcs.h.sp.slss...h...pt...tl.....pp.lsGlG...cslApcI.pEhlp ....................ppl.h...chL.cplAphhEhpt....ts..sa+lpAYR+AApslcs.h.sc..slsph....p-.........h........pp.lsGI.G......cphAphI.pEhl........... 0 202 354 498 +14568 PF14717 DUF4465 Domain of unknown function (DUF4465) Godzik A adam JCSG structure SP13250B Family A large family of uncharacterized proteins mostly from human gut bacteroides, but also some environmental and water bacteria (Planctomycetes) as well as metagenomic samples Most proteins from this family are secreted or located on the outer surface and may participate in cell-cell interactions or cell-nutrient interactions This function is supported by a solved structure of a Bacteroides ovatus homolog, which adapts a galactose binding (jelly-roll) beta barrel structure 25.00 25.00 63.10 62.60 22.00 21.90 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.50 0.71 -3.86 22 101 2012-03-13 22:58:36 2012-03-13 22:58:36 1 7 65 1 16 68 17 185.20 31 39.28 NEW ssY......spaG.......t.h......uGauhostT-ss.....ossass...p.huuhsGtGps....sssh.hh....s.sas.uhst...shlth.........sshphcGhalTNooYshhshhpGssh.........ut.......tshp..c.s.DaFplolhGa..c.....ss.....ssl-hhLADaR..ssp...p.lVssWpahDLosLusssplpFphpSSDs..Gp.aG.hNTPuYFslDsl ......................h....sphG...t.....s.uuashSthsp.t......sssass...p.hus.h.....sts.Gps.......Gs.sa.sll...hGYssuaspt.......st.chh......................ssstplpGlalsNToYsYsshppGspa.............................sth..........sshpc.s.saFplslhshD.......ssG..hhpshchhLADYR.ssp.......s..lssWpahDLsul..s..s.VpslcFshcuSDs..us.YG.hNTPAYhClDcl.... 0 13 16 16 +14569 PF14718 SLT_L Soluble lytic murein transglycosylase L domain Eberhardt R re3 CATH:1qsa_A_02 Domain Soluble lytic murein transglycosylase (SLT) consists of three domains, an N-terminal U domain, an L domain (linker domain) and a C-terminal domain (C). The L domain may be involved in the interaction of the enzyme with peptidoglycan [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.66 0.72 -9.33 0.72 -4.18 96 1072 2012-03-14 10:47:09 2012-03-14 10:47:09 1 2 1057 3 180 738 113 70.40 40 10.99 NEW .pttlttpssltRlpELhthsc.........stA+pEWthLlsphs....p.p.p........pt..tLAtaAtcppWachuVpAoIpuKhWDpl ............................ss...tlspsPphARlcELhhhsh........cssARuEWspLlcstS....c.s.E........pttLA+YAasppWaDLoVpATIsuKhWDpL.......... 0 31 85 137 +14570 PF14719 PID_2 Phosphotyrosine interaction domain (PTB/PID) Coggill P pcc Jackhmmer:Q6ZT52 Domain \N 25.40 25.40 25.40 25.40 25.30 25.30 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.44 0.71 -4.64 10 195 2012-10-04 00:02:25 2012-03-14 13:07:17 1 2 88 0 123 471 1 164.10 34 47.36 NEW TYsVlYLGNVLTlhAK.GEGCl-KPLupIW+sYspp.+.sslpMKLsVosSGLKAsTcc.........+GLTEYWuHRITaCsAPscaPRVFCWlYRHEG++hKsELRCHAVLCpKpppAppluppLpppLpsALpEaKREKlp.....tQpA+Lshu.slhs..sPs.lP+RKLL..sGs..pNaRP.PVERSKSAPKLuSI-E- ...........................................apVhYLG..ps..sT.....ht.sp..G.cG.CT-csV.sc....l....W.....p+..p......p...u......+.....tss.....ph...c...Lplss...pGl..+h.p..p...h..-tt...............pps....h.c..t..Y..h..ltRIoYC.sA.D..tp.h.P+lFAWl..............YR.H........p.s....cc..............h....s..h........LcCHAVls.p+ttcActlAhhLhpshtpA..h..p.-.a.K..+.p+.........+ppt....................................................................................................................................... 0 38 48 82 +14571 PF14720 NiFe_hyd_SSU_C NiFe/NiFeSe hydrogenase small subunit C-terminal Eberhardt R re3 CATH:1wui_S_02 Family This domain is found at the C-terminus of hydrogenase small subunits including periplasmic [NiFeSe] hydrogenase small subunit, uptake hydrogenase small subunit and periplasmic [NiFe] hydrogenase small subunit. This C-terminal domain binds two of the three iron-sulfur clusters in this enzyme [1-3]. 27.00 27.00 31.20 30.90 23.30 22.50 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.47 0.72 -3.95 163 2101 2012-03-15 09:29:21 2012-03-15 09:29:21 1 5 1192 65 410 1124 51 85.00 50 22.89 NEW aupplH-s.C.RpsaF-tGtFscpaG-.ttt.GhCLaclGCKGPhTassCsphtWNt.ss..uaslpuGpPCIGCoEPsF.Dphssahpt ...........YGppIH-p.C.RRsHFDAGcFscpasD-utRpGaCLY+lGCKGPpTYssCSohcaNs..Gs..uaPlpuGHsClGCsEsGFhDctu.a...hh.................................. 0 144 296 359 +14572 PF14721 AIF_C Apoptosis-inducing factor, mitochondrion-associated, C-term Coggill P pcc Jackhmmer:JCSG-Target_422903 Domain This C-terminal domain appears to be a dimerisation domain of the mitochondrial apoptosis-inducing factor 1. protein. The domain also appears at the C-terminus of FAD-dependent pyridine nucleotide-disulfide oxidoreductases. Apoptosis inducing factor (AIF) is a bifunctional mitochondrial flavoprotein critical for energy metabolism and induction of caspase-independent apoptosis. On reduction with NADH, AIF undergoes dimerisation and forms tight, long-lived FADH2-NAD charge-transfer complexes proposed to be functionally important. 25.90 25.90 25.90 25.90 25.80 25.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.78 0.71 -3.91 23 200 2012-03-15 13:49:27 2012-03-15 13:49:27 1 9 126 9 116 198 7 98.30 44 20.11 NEW AGENMTGAtKPYhHQSMFWSDLGP-lGYEAIGlVDSSLPTVGVFAKuospD.......sP.cusscsossslpscspspussssstsssps.sss.t.......tt....-DaGKGVlFYLRDcpVVGllLWNlFNRhslAR+llp-s ....................................................................AGENM.sG.A...t.......K..P..YhHQSMFW.SDL.GP-lGYE..AIGllDSS.L.sTVuVaAcsotpp.........p....tt..t...............................................tcpasKGVlFYL..+.sc..hVVGllLWN...lFs.+.hslARplltp.s...................... 0 49 64 92 +14573 PF14722 KRAP_IP3R_bind SSFA2_N; Ki-ras-induced actin-interacting protein-IP3R-interacting domain Eberhardt R re3 Jackhmmer:P28290 Family This family includes the N-terminus of the actin-interacting protein sperm-specific antigen 2, or KRAP (Ki-ras-induced actin-interacting protein) [1]. This region is found to be the residues that interact with inositol 1,4,5-trisphosphate receptor (IP3R). KRAP was first localised as a membrane-bound form with extracellular regions suggesting it might be involved in the regulation of filamentous actin and signals from the outside of the cells [1]. It has now been shown to be critical for the proper subcellular localisation and function of IP3R. Inositol 1,4,5-trisphosphate receptor functions as the Ca2+ release channel on specialised endoplasmic reticulum membranes, so the subcellular localisation of IP3R is crucial for its proper function [2]. 27.00 27.00 28.10 27.20 21.70 25.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.88 0.71 -4.66 8 175 2012-03-15 14:06:43 2012-03-15 14:06:43 1 4 68 0 90 165 0 137.00 44 18.07 NEW .hhs+GpShNSou.SusousTsSSlSElL-hapEDsEElLasLGFGpDEPclso+IPuRFFsssSsA+GIsh+lFLcuQlpRIchEsPshuLsSRFRQlElLssVANAFSSLYSaVStpPsQKlussch.......E.hshssPlp+pS....p...cspusht+ltsp...s ..........................................hhphGpShsSst.st.pouussS.SlsElL-hhccDPE-ILhsLGFG.....p.....-.E.sDlsS+IPuRFhsssStA+GIDhclFLpAQh.pRh-hEsPs..LhuRF+QlphLshsssAF.sLhs.VSths.pt................hs....shphss......................s.............................. 0 12 20 45 +14574 PF14723 SSFA2_C Sperm-specific antigen 2 C-terminus Eberhardt R re3 Jackhmmer:P28290 Family This family includes the C-terminus of the actin-interacting protein sperm-specific antigen 2 [1]. 25.00 25.00 25.70 25.70 22.00 21.70 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.14 0.71 -4.54 5 106 2012-03-15 14:07:22 2012-03-15 14:07:22 1 3 38 0 47 107 0 157.40 49 15.79 NEW HhsospTHSVPhsSGh.....uuSsHhoPAuCsaSs+HsPaPapspusssPssshst.......tplEMQLRRVLHDIRsThQNLuQ.s.h+G.Dhshs..hsT.p.SVpPLYEsThpELQshRRsLNlFRTQMMDLELAhhRQQoLVYpHMSEEERcEA-QLQTLRcAVRQELQELEhQLEDRLLuIcEQl .....................................................................Hhtp..T+SVs..SGh.....ssosassPhtCshsp+H.tsasa.p.pssss..Pss...............lEhQLp+sL+slpso..........................oV.h.Lhp.sThpEhpsMRpsLshFRpQhh-LEhshhcQQshVY+HMoEEERhEs-QLQoLRpuVR.Elp-LEhQLp-RlhtlcEpl........... 0 3 7 16 +14575 PF14724 mit_SMPDase Mitochondrial-associated sphingomyelin phosphodiesterase Coggill P pcc Jackhmmer:Q9NXE4 Family The GO annotation for this family indicates that it is a single-pass membrane protein, and it appears to be found in mitochondrial membranes. Sphingolipids play important roles in regulating cellular responses, and although mitochondria contain sphingolipids, direct regulation of their levels in mitochondria or mitochondria-associated membranes is mostly unclear. Sphingomyelin phosphodiesterases catalyse the hydrolysis of sphingomyelin to ceramide and phosphocholine, and these metabolites are involved in signalling pathways. 23.70 23.70 24.20 23.70 22.10 23.60 hmmbuild -o /dev/null HMM SEED 765 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -13.23 0.70 -6.74 4 286 2012-03-15 16:48:53 2012-03-15 16:48:53 1 4 90 0 124 270 0 315.60 27 82.32 NEW QPSFLLAoLKADslsKPFhQpCQDLV+VIEDFPAKELHsIFPWLVESlFGSLDGsIVGWNLRsLQuRhNPsEYslsh-FLDPSGPMMKLVYKLQAE-YKYDFPVSaLPGPVKASIQEpVLP-CPLYHNKlQFPsSGGluLNLALNPFEYYMFaFAhSLITQKNhPluhHlSouDSAYFILVDpYLKaFLPTEGSVPPP.SossGGolPSPsPRoPulPFsSYGhHHTSLLKRHISHQsSVNADPAuQEIWRSETLLQVFVEMWLHHYSLEMYQKMQSPphKLElLHYRLSlSShhh.sPhps.u.tuLHuYQEsFpPTEEHVLVVRLLVKHLHAFSNSLKPEQlSPSAHSHTsSPLEEhKRVVVPRFVQQKLYlFLQHCFGHWPLDASFRAVLEMWLSYlQPWRYssEKssPso.-.psRsVsEKWusFVQENLLMYTKLFlGFLNRsLRTDLVsPKNALMVFRVAKVFAQPNLuEMI.KGEQLFLEPEhVIPHRQHRLFhoPshGGSFLSuW.PslTDsSFKVKSHVYSLEGQDCQYpQMFGsEsRsLVL+LAQlIsQA+QTAKSISDpSAEosAspSFhSWFGhuSsDhNGSYsGsDLDEhGtDol+KTDEaLEKAL-YLCQIFRLNtuQLsQhhhslGouQD-sGKKQLPDCIpuEcGLILTsLGRYQlINGLRRF-IEYQGDPELQPIRSYENAhLVRhLaRLSSslNcRFAspMsALCSRcDFLGphCRaHLTsPthsp+t+hSPltpcps.ucstuPRlSLR ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 44 55 91 +14576 PF14725 DUF4466 Domain of unknown function (DUF4466) Coggill P pcc Jackhmmer-JCSG:target_419245-SP18803A Family \N 25.00 25.00 394.90 394.70 21.50 19.70 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.00 0.70 -5.14 6 11 2012-03-16 11:55:18 2012-03-16 11:55:18 1 1 11 2 3 13 0 315.10 48 94.39 NEW ACcDp.....-scSsL+NDhIK+Tl..uPslsGpcIEFAYAMu..sppG+lssApscASIAGAsGThF-hpSaYT............uoGpDVPVQsscDusTsGssoTsshhccss..................AATLRYaYllPc-A+GKsVSFoFSApSSsGpplSYphssY+ISKMDMK+slslpNtsACYlSltD....MpsYTKt-VssN.sLAsKIDhlYlYp.plsshDasHAhVoPuosscYls.ushlPuGhsNs.TthcKphsV+DtQLpshph.uVYIDDlDFcplDhusAsDYslsL+p-sGAaVcTADGKYtAYVYlNplssSuKphTVSlKRYsL .....uCp-......-scssL+NDhIK+Th..GPslVGppIEFAYAMu..spcG+lssApsEASIAGAsGThh-ppSaYT............uoG.DVslQsussolTsGssoTsshhccss..................AATLRYYYllPEEA+GKsVSFoFSApSSsGppVSYphssYcISKMDMK+slslpsuuuCYlSIAD....MpsYocs-lssN...usKIDhVYlYp.plsshsFsHAlVoPuusspYls.uVpLPuGlsps.TtlcKthsV+DtQLtthph.uVYIDDlDFpplDhssAssYAlNL+p-sGAaVcTADGKYtAYVYlNslssouKphTlSlKRYsL 0 3 3 3 +14577 PF14726 RTTN_N Rotatin, an armadillo repeat protein, centriole functioning Coggill P pcc Pfma-B_645 (release 26.0) Family Rotatin and its homologues such as Ana3 in Drosophila are found to be essential for centriole function [1]. A deficiency of rotatin in mice leads to randomised heart tube looping, defects in embryonic turning [2], and abnormal expression of HNF3beta, lefty, and nodal. Thus it is required for left-right and axial patterning. Ana3 - the Drosophila homologue - is present in centrioles and basal bodies, is required for the structural integrity of both centrioles and basal bodies and for centriole cohesion. Rotatin also localises to centrioles and basal bodies and appears to be essential for cilia function [3]. This family represents the N-terminal domain. 25.00 25.00 26.30 26.30 24.40 24.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.23 0.72 -3.42 25 85 2012-03-16 13:05:11 2012-03-16 13:05:11 1 3 68 0 57 75 0 97.60 43 5.77 NEW tEIRhRALcsIpsKlp+sLlphp-ls.ppp.LL+pLlc..WFsassss.tp-cVLsLlpc.LhcpshuspllpclGs.cF...Lscl+....phlssph.ptplppllcsL .......EIR.RAL+sIlsKl-HsLlshsDLl.pc+hLhlpLLE..WFNFsslP.hp-EVLsLLpc.LlKaPsAsphLh-lGAl-F...LocLR....ssl-Ppl.pscIDslLDsL........ 0 17 21 35 +14578 PF14727 PHTB1_N PTHB1 N-terminus Eberhardt R re3 Jackhmmer:Q3SYG4 Family This family includes the N-terminus of PTHB1 protein. This protein forms a part of the BBSome complex, which is required for ciliogenesis [1]. 25.00 25.00 27.50 27.00 23.40 21.20 hmmbuild -o /dev/null HMM SEED 418 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.26 0.70 -5.52 25 181 2012-03-16 15:04:53 2012-03-16 15:04:53 1 9 114 0 104 189 5 348.90 39 50.44 NEW MSLFKsR-WWoopssp.sEcaDp.usLsVuNlDNssstp.......DpIlVG..SasGhLRlYpP.........ppssacsp.DLLLEppLppPILQlphG+F.........luus.pslpLAVLHP++LslYsl.....pshsGp..sppust.......hpLpLhYEHpL......p+sAhNhshGsFGGscs...........+DhlCVQShDGtLpFFEQ-shuF.s+hLss.aLLPGPlsYss+sDSFVTsooshpl-sY+.........YpsLAsuspsppppt...p...........................su++lsssWohslGEpsl-Ip.................lsphsp....spssIlVLGE+oLFslccsG.pl+a.K+L.-asPsshhsYss..hscsshp........................lllsocospLhlYpDssLtWuAph...spsPVAlpsusht.............sl.pGhlVsLspsGpLpssYLGT-P...........shhhsPshp..s+-hsY-chpcEhpcLpclI+pussspch.......psccclplpsplsspl- .........................MSLFps+-WWss..hs..tEpa....c....t...u..sLhlushsspts.t..p......................DpIlVG..Sh.Gh.LRIapP..........ptpshpsp.clLLEsplp.tPlLQltsG+F..........lssp.p....pLAV.Lps+plslYsl..............tth.Gt....hthusp.............................hphplhaEHpL......p+sAhshshGsFGssps...........+chlClQShDGhLhhaEQ-shs...............F.sphLPs..hLLPGPlsYss+oDoFlTsoSshplpsY+..Yps.Luhussttt..................................ss+plss-WohslGEpslDlp.................lsphsp................stsslhlLGERshasLc-.s.G.pl+ah++L.-hsPsChh..s...Yss...hppsshp........................hlluscsshLhlYpDssLtWuspl...sthPV.Alplushp.....................................sl.cGhIVoLussGpLpsuYLGT-P...........sha.p.ss.lp..s+..pl.sYpphphEhpcLpchI+chptstsh.......pttcpltlph.l........................................................... 0 45 55 82 +14579 PF14728 PHTB1_C PTHB1 C-terminus Eberhardt R re3 Jackhmmer:Q3SYG4 Family This family includes the C-terminus of PTHB1 protein. This protein forms a part of the BBSome complex, which is required for ciliogenesis [1]. 25.00 25.00 33.70 25.20 24.80 24.70 hmmbuild -o /dev/null HMM SEED 377 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.16 0.70 -5.67 6 158 2012-03-16 15:10:26 2012-03-16 15:10:26 1 7 107 0 92 177 5 312.30 34 42.03 NEW solKlsl+s+hphppspLolssptP..LslopDshhFcslus.tho+shshslYl..psshhPssLpsplVsSYsosp.....GlPRllQppspLPL+LhhcPsQPsKsAsHKLTlssNpsPVs......LhsLFPEFhp..EcusssAlGFQhlu....Gp+..VTlLAuKoSpRYRlQSDpFEsLsLlscchlhRh............ccpas+pshhDtF..plohuGshPlpphh-hIDsHF-lRhshccLcspLpptusQaRsIQRRLls+FK-KoPsPLpsL-hLL-uTYsplhtluDplcEhccsLh+utscLsuuspLllLlltLhhsL.ssctlplLEushsPllhDhpE.uWEEhsDAuloaLL+TsLuKSSK-Quhs..........hpsshc.PpDsS........+L+KHlshlsDRls .............................................h.hp.t...hptspltl.st.P..l.hspsp.h..sh..p.hss..t.spp...hthshah....ptth....Psphcsphss.oasp.sp...........G.....hP+llQpphpLPLpLls..hssp.PsKsA...sa...KlTlc.TNp.s.sls................LhslF..........s.t.Fst............pc....s...ps...ssh......Ghphls.................Gsp......lTlLASKsSp..R.YRIQS.-phEsLaLlspELlhRl...........................................pph..hpc.t...hs..h....thsh.s.....ssh...P.........lpcaachlDpHac.lRhphcchpchLscpAhQFRu.lQ+RLLs+a+-+sPssL..p..tL-sLL-sTY..cplhths-thpp.ptphhpthstLpssspLh..h..hllth...p.h.p.pththlpshh.s...p.............p........uWEE.stsulshLlphs..pp.pttt..................s...t...sht........ph++phshhh-+h..................................................... 0 35 45 71 +14580 PF14729 DUF4467 Domain of unknown function with cystatin-like fold (DUF4467) Godzik A adam JCSG target SP18127A; Pfam-B_491 (release 26.0) Family Large family of predicted lipoproteins from Gram-positive bacteria Experimentally determined structure shows a cystatitin-like fold, allowing us to classify this family in the NFT2 clan, despite lack of any detectable sequence similarity between members of this family and other families in this clan 21.90 21.90 23.90 30.30 19.90 19.70 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.19 0.72 -3.66 24 517 2012-10-03 02:27:24 2012-03-18 08:21:19 1 1 217 2 13 123 0 94.00 44 78.01 NEW YsccIDclhKhppcppcch.tp.s.scsppc.a-+ccuNhYVY-cGKhIllu.Yp.hK.sscpl.hYhhYchps..cKhph.ccch.....s..s+pYhccH.cPDY+EpN ..YpK-IDcshKlQspppcph.uKhs.schhschc+cDuNhaVYccGKlIllu.Yp.hp.sc-ch.aYaAY-hpD..cKsph.pp-h.....D..sc+YhppH.cADYc-EN.. 0 3 3 11 +14581 PF14730 DUF4468 Domain of unknown function (DUF4468) with TBP-like fold Godzik A adam Jackhammer: JCSG target SP13279C Domain A large family of (predicted) secreted proteins with unknown functions from human gut and oral cavity.\ Typically forms a N-terminal domain with FMN binding domain at the C-terminus. Experimentaly determined 3D structure of this domain shows a variant of a TATA box binding - like fold, but no detectable sequence similarity to other proteins with this fold 22.60 22.60 24.50 24.10 20.90 20.60 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.89 0.72 -4.29 50 160 2012-03-18 18:57:27 2012-03-18 18:57:27 1 3 125 0 29 156 6 92.60 26 32.94 NEW aopplpssuh..op.splYsphhpWhsphhps...h.sSplshssccpGhIsupu..pthllFs.....s.shLS.....ls+sphpYpltlcscDs+hclohoclp.YpYp .........Fscphpls.uh..Sp.spIYcphhpWhspphpp..........t.sS+lshsscpcGsIsupG..cchl.lFs.....s.sslu.....LD+splpYplplsCc-s+splphscIp.YpY............ 0 13 24 29 +14582 PF14731 Staphopain_pro Staphopain proregion Eberhardt R re3 CATH:1x9y_A_0 Domain This domain is the proregion of the cysteine protease staphopain. Like many papain type peptidases, staphopain is synthesised as an inactive precursor and cleavage of the proregion is required for activation. This proregion has a half-barrel or barrel-sandwich hybrid fold. The proregion blocks the active site cleft of the mature enzyme on one side of the nucleophilic cysteine [1] 27.00 27.00 47.30 46.60 23.30 22.60 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.13 0.71 -4.49 6 356 2012-03-19 09:54:49 2012-03-19 09:54:49 1 2 195 4 4 96 0 167.80 64 43.35 NEW KpscVNVcsKcVPpcV+cLApcQYLSaVsuLDKtSNpcsuuYTLGEsFKIYKFNpcSDGNYYYPVLNK-GcllYlVTISPKssss.KtSppsusYSINVSPFlSKsLNQYKsQ..pITILTspKGYahhsEDsKl+LVLKTPhsssKppKpsscpsss+.hpphKQTuolTK ...KplplNVcscpVPpcV+sLAQppahuYspuLDK..N..t.cpupYpLGEsFKIYKFNtcpDssYYaPVl..p-GpIsYhlTlSPKsp...ppSpps.pYolplSsFluKsLsQhKDp..pIT...lLT.spKGaY.hppstKs+LVhtTPh.pshK.Kcotphsoup.hppLKppsosTh......... 0 2 2 4 +14583 PF14732 UAE_UbL Ubiquitin/SUMO-activating enzyme ubiquitin-like domain Eberhardt R re3 CATH:1y8q_D_03 Domain This is the C-terminal domain of ubiquitin-activating enzyme and SUMO-activating enzyme 2. It is structurally similar to ubiquitin. This domain is involved in E1-SUMO-thioester transfer to the SUMO E2 conjugating protein [1]. 27.40 27.40 27.50 28.00 27.30 27.30 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.67 0.72 -3.88 74 271 2012-03-19 13:18:53 2012-03-19 13:18:53 1 9 228 6 191 271 5 89.30 29 14.34 NEW pltlsspchTlpcLl-clL+ppLuhspP-lhl...ssslla-s--..........t.hssshpKpLu-l............GlpssohLslpDhpQc......hplplhlpcpcphcp ................lplssc+sTlpsLh-cll.....KpcLuhst.......P-lplp....spuslLhss--..........t.h-sN.pKpLu-h............Glp..sGo..hLpscDh.p-......hsl.l.lhcpcp...t................................................. 0 58 97 153 +14584 PF14733 ACDC AP2-coincident C-terminal Woodcroft B, Eberhardt R re3 Woodcroft B Family This family is found at the C-terminus of apicomplexan proteins containing the AP2 domain (Pfam:PF00847). 25.00 25.00 25.10 25.30 24.00 22.80 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.84 0.72 -3.86 28 106 2012-03-22 12:53:11 2012-03-22 12:53:11 1 4 11 0 100 116 0 94.70 22 6.97 NEW sssptLpltKpAlphlLpDLppsClspl...tthts.............................hpphlptHlphlpsu.tshpplhsYlplFssplppshLPSshshptQthllpuL .............................................................................................t.tphlpltKtAlhhlLpDLpppshsph.....hh.s...................................................................hp.hpphlctHhphlpsu.pshppltsYlplFspsIppppLPSphshptphhllpuL..... 0 37 47 88 +14585 PF14734 DUF4469 Domain of unknown function (DUF4469) with IG-like fold Godzik A adam Jackhammer:JCSG target GS13689A Domain A C-terminal domain in a large family of (predicted) secreted proteins with uknown functions from human gut bacteroides 22.60 22.60 22.60 23.10 22.30 21.40 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.24 0.72 -4.15 29 126 2012-03-23 06:08:24 2012-03-23 06:08:24 1 2 44 0 33 111 1 97.60 31 43.92 NEW IspVsDssTGptssslTsGtshplpGsplKls.Gs-......susGlhhss.........ppG........s.htlssshlstNpPSpLhhhlPstLssGp.YpLplsTpauu.usphLKssRospa ......................tlpD.sTttts..GslTsGtshhlpGpplKls.Gs-......s.ssGlhhss.........ppu........s.stlshs.lshNsPSclhhhlPssLscGp.YpLplsTQaus.ssphLKsPRoh................. 0 15 30 33 +14586 PF14735 HAUS4 HAUS augmin-like complex subunit 4 Eberhardt R re3 Jackhmmer:Q9H6D7 Family This family includes HAUS augmin-like complex subunit 4. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2]. 25.00 25.00 25.90 25.20 23.80 22.90 hmmbuild -o /dev/null HMM SEED 239 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.44 0.70 -5.06 12 109 2012-03-23 08:46:42 2012-03-23 08:46:42 1 3 76 0 61 109 0 209.10 34 62.65 NEW stphsspsshsss.stlLGlsttpLhphhssp.sh.....thptpL.pElEppL+cKC.sLlsaapPsspssucshptsKss+Lsphlctcp..cphpptctphpcshhhhc+phppYhpsLhpsLplLppllp-a+LcpQs-hDchpppaLpsKC-sMhhKl+s.phplLpDTYTtEolsAh+pIRchLpsAhcptcp-hppupptLpsYEslG.cF-slscEYsclhpcl-s+pWsLpclpps ......................................................................................h.p.....tss.tt..s.hLGlp.t.Lhph....p....sh......hptpL..ElEtpL+pKC.....sLhshas.ss.......pssup..th.ptspsh+LsEhlhtth..pphpctcs.....t.pEph.hhh-+p.upY.pVL.ppLslLpp...Ll...p-p+LcpQ......schDchptpaLph+CpsM.hKL.Rh.Ehc......lLp-.TYT.sEpltsh.+h.IR...cpLptuhc.tppphpcupph.LpsYc.sls.c.........F-p..........ls+pYpplhpthEshp.Wslpphph.............. 0 16 30 44 +14587 PF14736 N_Asn_amidohyd N_Asn_aminohyd; Protein N-terminal asparagine amidohydrolase Eberhardt R re3 Jackhmmer:Q96AB6 Family This family of enzymes catalyse the deamindation of N-terminal asparagines in peptides and proteins to aspartic acid [1-2]. 24.00 24.00 24.00 24.10 23.90 23.80 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.78 0.70 -5.53 19 160 2012-03-23 10:23:26 2012-03-23 10:23:26 1 4 114 0 93 169 0 232.10 37 83.22 NEW thtSpsspsVsstslLYVpQREaAsT...sPpDpsVsllGSDDATTChlVVlRHTGSGsssLAHhDGos.Tcsulshhlstlpshs.s...tpGRLElHLVGGFpD.................scphScpLshpILpuFc+pp--IHLpTsClsEhNshlc.sGlphPllYGIuVNlKTGclFPAoF..ss+GPDctLRpARhh.......ssuphlslYDsppphl+IGPhpasPhhs..sshWLppsDchILppLSTSPtsEPPHFVppl+uslpFlh-HPps.sslFPcspP+ha+Rsc.sGpWc+l ...........................................t...p.sss.thLYVtQREhAss......sP.t.s....tpl.sl......lGoD-ATTChlVVlR...csusG......s...ssLsHhD..........uss.scttls.hhpplpshs........puRlEl..HLlGGFsD.......................s.cthSppL..s...hpllptFccpp.....c.....lc....Lho.hCVs..-hNsh..............c....st.......phPllaGluVsl+Tucla..AoF......s+GP-c.LRtARhh...........sst.hlsl.YDspht.l+IuP..hsapPh.t....ssha...LppsDp.lLpphSTSP.sEPPHFltphRtslhal...cp..s..thF.s.ppshhac+sp.sGh..W........................................... 0 32 46 69 +14588 PF14737 DUF4470 Domain of unknown function (DUF4470) Coggill P pcc Jackhmmer:Q8N9W5 Family This family is conserved from fungi to Metazoa and includes plants. The function is not known, but several members have zinc-finger domain, zf-MYND, Pfam:PF01753, at their very C-terminus. Others are also associated with DUF1279, Pfam:PF06916. 25.00 25.00 27.80 26.10 23.60 23.20 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.40 0.72 -4.27 100 314 2012-03-23 14:25:58 2012-03-23 14:25:58 1 21 155 0 250 334 1 98.50 23 12.55 NEW haGsos...Ahsl...hpp....................tt..s.ppslslLhh..GsG.DhRpllpTlsshs.pp.p............p...lplhlsDts....tllARNlllLpl.....lhs.s......ppssphhhc.laasshls .....................................................................hG.osAhslhp.........................................s.ppslslLlh.............G..sG.DhRplltTlspt..tp.tt...........................plphhlh-hs.pllA.RslllLpl......hhc.s...................pptsphhhclahshhh................................... 0 118 170 216 +14589 PF14738 PaaSYMP Solute carrier (proton/amino acid symporter), TRAMD3 or PAT1 Coggill P pcc Jackhmmer:Q7Z4T9 Family PAT1 (proton amino acid transporter 1), also known as TRAMD3 of AAT-1, is the molecular correlate of the intestinal imino acid carrier. It is a proton-amino acid co-transporter having a stoichiometry of 1:1. Due to its mechanism, PAT1 activity increases at acidic pH, which correlates well with the acidic micro-climate close to the brush-border in the intestine. Glycine, proline, and alanine are the preferred substrates of the transporter. The maximum velocity is similar for the three substrates. All substrates are transported with low affinity, showing Km values in the range of 2-10 mM. The transporter does not discriminate between L- and D-isoforms of these amino acids; in addition, beta-alanine is transported with similar affinity as alpha-alanine. Similar to the IMINO transporter, the amino acid analog MeAIB is recognized by PAT1. The transporter is strongly expressed in the small intestine, colon, kidney, and brain. 21.60 21.60 26.40 21.60 19.10 18.80 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.97 0.71 -4.49 32 172 2012-03-23 16:23:11 2012-03-23 16:23:11 1 10 109 0 105 175 2 146.70 38 20.68 NEW usQT.YRES-AQTsPYoP..-.aslp.s.ss...sP..ElLsLssLpasc.G.L.PuGhtEVEhIERARc+RsaEssLP.s..........hs....Dts..ph...p......pR...+phh-t.hEhcEWthREp-IpchQchRL-llpchlpcREcppcphsppRl-phtpphppc+pttlp+lctctl..cthR+ .............t.sQT.YR-u-sQTsPYpP..Ehhspp.ss...hs..............ElloLusLpa...........uc..G.......L...PsG.tEVEhIERARcKRAaEssLPs..........hsDts....ph...p.......cR+chhpthEhcEWt.REp-Ipc..lQchRL-llpchLccREcpppphstpRl.ptthpphpcp+ctpltplphphhpshR+.......................... 1 44 53 77 +14590 PF14739 DUF4472 Domain of unknown function (DUF4472) Coggill P pcc Pfam-B_085261 Family This family is specific to the Chordates. Some members also carry Kinesin-motor domains at their N-terminus, Kinesin, Pfam:PF00225. 22.70 22.70 23.40 28.70 22.50 22.40 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.78 0.72 -10.25 0.72 -3.59 12 44 2012-03-23 16:37:54 2012-03-23 16:37:54 1 2 35 0 29 51 3 107.70 38 21.45 NEW SEEp+LpISKELVDLQIcsp+l+EQaEAEhFELKNc..................lLpLEsRlLELELct-.....phststsshtcphphspp.p+chttphh.h+pph.s.spsh.p.pscpccLu.pL. .SEEp+LQISKELVDLQIpTp+LpEQaEAEhFELKsc..................lLpLEsRVLELELcs-.....pss..spss.tctht....spc....+pchtsphh.hcpph.s.scsh.s..tcppcLu.tL.h............................... 0 10 13 18 +14591 PF14740 DUF4471 Domain of unknown function (DUF4471) Coggill P pcc Jackhmmer:Q8N9W5 Domain This family is conserved from fungi to Metazoa and includes plants. The function is not known, but several members have zinc-finger domain, zf-MYND, Pfam:PF01753, at their very C-terminus. Others are also associated with DUF1279, Pfam:PF06916. This domain is more C-terminal in many members to DUF4470, Pfam:PF14737. 25.00 25.00 26.40 26.20 22.50 22.30 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.83 0.70 -5.18 18 137 2012-03-23 16:41:55 2012-03-23 16:41:55 1 9 98 0 86 142 4 253.10 29 55.42 NEW hlslptLKaKERDtLEthFpaWpst...ppsaclschW-...pRlRphLGsRYDpRsGlaDWDhsMpL+-.ptup.IssQEYRpWRcsGlAFsa.......sE.t.-aspPNKThssuhl...psGpsahpRGYlGDIpTGPFhuFG...lcss-.....E+h.h+ohcG.....pscapuTDloE+Nlhplh........aELp.....sp...........ssap.hsts...........-....................sps.p..th..psptsh....sp.p.....hhsstpVpl+Fl.slph.lchhpp.+p+apphFDllFlups.hspaLpss....hhp.sh..+ss.AllllETt+alssh+K-phppatscl+clh+pushcsstshs ...................................lslptLKa+ERDtL-thhphWt........t........ps...........ashsphWD...pRlRphhupRYDtRpsh...hDWDhp..MpL+c..pt........uphIp.pcaphWRpsGlAFph..........................h-t.tYp.hPN+Thsshhh............pp.............GpphhtRGYhGDIhsuPahuFG....lcspc.....pph.hph.ps.....p..hsut-lspcNlh.phh........hpltst..................ttht.hs.t..........p........................................................................................................tp.t..t......tt.t.........t.......h......thplpal.sh.s..hpp.l................p.+.ppa......pthFphhahuss.hsphlpsp..........htt.hh.......tst..uhlhhEht.pahhshpp-phtta.ppltchsptsGht......s....................................................... 0 38 48 71 +14592 PF14741 GH114_assoc N-terminal glycosyl-hydrolase-114-associated domain Naumoff D, Coggill P pcc [1] Domain This short domain is also a very small family found at the N-terminus of GH114, glycosyl-hydrolases. 22.00 20.50 22.90 38.90 18.10 18.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.67 0.71 -4.42 9 17 2012-03-23 18:28:37 2012-03-23 18:28:37 1 5 7 0 16 17 0 124.80 26 28.58 NEW shssp-lssopDshspYlchus.........GuphphpFshPu.husssstsLslsssht.s.ttSutcWph-hash...ssssWsplGD.utAsShsWosssLsls.ssst.cFV.ussplplphs...psssspushlDh .......hsstshssotDs.sp.lchus.........uuphphhFshPu.sus....sphhololsspht.ssttsuscWph-hasa....sussWsplGD.otssohsWosh.sLsls.ssP.....s.sFl.ssstlphphp...psusspu.hlD........... 0 13 14 15 +14593 PF14742 GDE_N_bis N-terminal domain of (some) glycogen debranching enzymes Godzik a adam Jackhammer:YP_001865398 Domain This domain is found on the N-terminal of some glycogen debranching enzymes and is usually followed by the GDE_C (PF06202) and in this sense it is analogous (but probably not homologous) to the GDE_N (PF12439). Its exact function is unknown 22.10 22.10 22.80 41.80 21.10 22.00 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.38 0.71 -11.12 0.71 -4.93 158 381 2012-03-25 04:19:21 2012-03-25 05:19:21 1 3 312 0 197 416 23 191.80 27 27.41 NEW sL+cussF.hls-ppGDI.......s..ssspGLahpDTRaLSchpLplsGppshh...L.uusspps.ttshh..tlss..........l.t.s...st..hscsslplpRpRhl.t...su..hhEclslpNasspssphplplphsADFsDlFEVR..Gtp.ct..c+Gphh...sp..h.ps.s...p....................lthpYpG.D.....shpR.poplph.....t.........s.sssp...lss............sp....hsaplpLsPppphpltlpl .......................L+pussFhlsDtpGDl.......tsssspGLahpDTRhLSphpLplsG.....ptP.h...L.uusspps.t.tuhh.pLss..........l...t.s...st..hscsslplcRpRhlt................ss..hhEclslpNasspssplplslphsADFsDlFEVR..Gsp.ct....c+Gpht...sp..spss...t.........................l.php.YpG.D.....shpRssplph.......p...........stPsp..lss..................................sp....ssaplplssptphslhlp.......................................................... 0 54 119 159 +14594 PF14743 DNA_ligase_OB_2 DNA ligase OB-like domain Eberhardt R re3 CATH:1fvi_A_02 Domain This domain has an OB-like fold, but does not appear to be related to Pfam:PF03120. It is found at the C-terminus of the ATP dependent DNA ligase domain Pfam:PF01068 [1-3]. 25.00 25.00 25.00 25.10 24.90 24.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -9.01 0.72 -4.30 136 673 2012-03-26 11:48:33 2012-03-26 12:48:33 1 11 649 7 120 539 455 66.90 42 21.06 NEW .G+GKapGhhGALhl..............ch.ss.....G..hcFclG.....oGFoDpp...RpsPP...........lGohlTY+YpGhT.psG.....hPRFssFlRlR ...............GcG+apGthGAlls..............ch..ts.....G......hpFpIG..SGasDp-...RcsPP.......................hIGollT..Y+YpGlT.p.pG.........hPRFssFlRlR.................. 0 40 76 108 +14595 PF14744 WASH-7_mid WASH complex subunit 7 Coggill P pcc Jackhmmer:Q2M389 Family This family is the central, conserved region of proteins that form subunit 7 of the WASH complex [1]. In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes [2]. 25.00 25.00 32.10 31.60 20.30 20.10 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -11.96 0.70 -5.60 29 153 2012-03-26 12:33:06 2012-03-26 13:33:06 1 10 116 0 104 160 4 310.00 48 31.51 NEW FLYWpR.slh.Phahpplacsshpsp..+l.ahhsAhpDshshltt.........spHhps..stslhcs.....apct.lhpplcccllcPLCpclEs-LRLpsHu.cL...phs.spsPhps....s....hp..Dlsp...hl.plsPl+hhsphlsl+pclE+YLspTFYNLsslAhHDWKTYtEMRsLApp+YGLphh-s+LPsQTL-QG.LDVLcIMRNIcsFVu+YsYNLNpQlFlEc..sS..s...uKHLsTIsI+HIANSIRTHGsGIMNTTVNasYQFLpKKFhlFSQFLaD-aIKSRLlK-hRaa+cp.+cp...hs..tpYPa-RA-cFt+pI+KLGl.......sss...GpoYLDpFRhLITpIGNAlGYVRMlRSGGlchsucuhpFlP.slpsh.ssap ................................................aLaapR.shhPhahpplappshpst..pl...YhhsAhpDshs.hhp..........upHhps....p.Llps.....appp.lhphlpcpllc.LCp-IEpDLRL.psHo.HL....pls.sp...sPh.cs.................u..h+..Dlsh....ah..plpPl+.hhscalcl+.shVp+YL-psFYNLoTlAlHD..WtTYpE.MRs.LApp+YGLths-sHLPsQoL-.........QG.LDVLpIMRNIHlFVupYhYNLNsQlFlE+..sS...........s........sKHLsTIsIRHIANSIRTHGTGIMNTTVNF.sYQFLppKFhlFSQFhaD..-+IKSRLlK-hRaa+Eh.K..cp....................ts....ppYPa-RA-+Ft+sIRKLGl.........................os-...GpoaLDpFRpL......IopIG.NAhGYlRMlRSGGL+ssusuhpFlP.chcs............................ 0 37 50 80 +14596 PF14745 WASH-7_N WASH complex subunit 7, N-terminal Coggill P pcc Jackhmmer:Q2M389 Family This family is the conserved N-terminal region of proteins that form subunit 7 of the WASH complex [1]. In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes [2]. 25.00 25.00 32.80 27.90 24.30 23.70 hmmbuild -o /dev/null HMM SEED 567 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.86 0.70 -6.33 14 161 2012-03-26 12:44:04 2012-03-26 13:44:04 1 8 111 0 105 164 3 456.10 29 49.95 NEW GpFhp-asppLpplccsl..s.so..lsps.....hshth-PlsLphhsh......EphslhcL.l.......co-.pKlhNKllsslAuLCsph+pLpccAc.pah.sLlhaGEt.hs..........-st.s........tcs.hphuphlshhpcL.talpRshsllpslhpQluAlhs......h.hshspVHh.sVa-sls-LLshLlolDElhptposlsspWsLY++hl+olppssupa..s.ls.c.cLptL-+hLtcl-spLLsGsIFpphlpphhD...t.hsVscNsths.pEhst.l+plhsplEu+...s-.pp.h...pppcphltlsuLhVlhapLatp.....hD+KLhKplh-lt++hstlslssNllWhPssFLhp+hssh.h..Kthscts.p.shp..+pphLpphstsht+pspphthQlutWhlcMposhsps.....h.hctLpspspLllpGlhhAtplSpllcsllNLHsuLstPhoKosVhslC+llEhLKsIptTFappphhlschlspllQalshhhhphLpssK++lst....DppYocc+LDlLSuLpLupcsLpGssTp-RlhllpLuLushhph..csl+s.......-chcplp.lhp+Lctlu-lppplptts...DsS .....................................................................................................................................................ssltl.hhs...................cp.s.l.pl..l........................ppc..splhsKllsshusLstEhptLp.pApp........phh.s..Lhh.aG-t....hp......................................................-st....p.......tcs.hphu+hlshLpcl.talpRshp...Vlh...sllpQLuula.s.....t................t..h..........t.hp...t..........l+hp.shac..plucLLthllslDEllppp.slpstaphY+.+h.lpplppssspa.........s.ht..pccl.c.hcphlhpl.....c..tpllss.lhp.tslpp.a-....................h............lpp...sphhspchtt.lcphhspl-sph...spspp.h................pp+pphlslsuLhslh.hplahs..............................h-p+hh........Kplhchp.p+.hPhltlh..uplhahPstFlhpph..Ps...h...+.h..h.s.ppt.t....sht................pp...t...hhppt.stp.h....p.....phpphhh.lssWh.hc..Mpohhstp................th.tp..l....pptsplhl..p.GhhhA.plpphltshh......sL.....ahs....hp...tPhopssVpsls+hlphLKslppha..hp+th.hl..spshshlhQplph.h.p.ltsh....+pplht........ppp.sp.pp......h.-.....hL...uulhls.phl.p..us...o..hp+hhhlpLslshh.hp.....p......hhpt.....................pch..h..hhtplphlsp.htt.h....s................................................................................... 0 40 49 81 +14597 PF14746 WASH-7_C WASH complex subunit 7, C-terminal Coggill P pcc Jackhmmer:Q2M389 Family This family is the conserved C-terminal region of proteins that form subunit 7 of the WASH complex [1]. In species such as Drosophila this protein is the only component of the 'complex'. This complex is a nucleation promoting factor necessary for the activation of Arp2/3 that nucleates and organises actin filaments by associating with a pre-existing filament to induce the assembly of a branching filament. WASH thus effectively nucleates actin on endosomes [2]. The C-terminus is predicted to include a transmembrane region. 25.00 25.00 50.50 25.70 20.60 20.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.87 0.71 -4.66 22 137 2012-03-26 13:08:12 2012-03-26 14:08:12 1 9 112 \N 97 142 2 166.40 44 15.89 NEW ThpAucpLDsllpsLtc.sao-Go-YF+hLlssFu.phRs...s+NtHL+NFYlIlPsLTlNaV-ahlpsK-KlhKKsKs....susF.TDDGFAhGlAYILKLLcQhppFDSLHWFpolcp+app-ppplp..............pp.ttp.psp...........tD.....-+h.pshp....LTh++lpshpcEasLLhhohoSARIFFc ....................................................ThpAu+pLDsVlushsc.s.uE.Go-YFKhLV-.....VFuschRs.............s+NhHL+NFYlIVPPLTlNaVEaplssKEKLtK....KNKh....uusF..TDDGFAhGlAYILKLLDQappFDSLHWFpSV+p+YtpEhcslt................cp.pspssp.....................pD.....-chhpThp....LTt++Lcsh.p.EapLLhhoLoSARIFFc................................................ 0 36 48 74 +14598 PF14747 DUF4473 Domain of unknown function (DUF4473) Coggill P pcc Pfam-B_8489 (release 26.0) Family This short family is largely confined to Caenorhabditis proteins. The function is not known. There are two well-conserved aspartate residues. 25.90 25.90 27.20 26.70 25.10 22.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.60 0.72 -3.76 42 147 2012-03-26 13:48:54 2012-03-26 14:48:54 1 3 5 0 145 125 0 80.40 28 75.08 NEW ushs.s.....s--h+uELhuAGlSppussGlhplspcatsph.hh...pssccsscphhpchps-scsalco.StpD.QpsYpsalc.c+ ................s....t-ch+AELhuAGlSpsuscGlhplupcapsphsts...psspcuucphhsphps-scsalKo.oppD.QstYpsalc.Khp................. 0 21 42 145 +14599 PF14748 P5CR_dimer Pyrroline-5-carboxylate reductase dimerisation Eberhardt R re3 CATH:2ahr_A_02 Domain Pyrroline-5-carboxylate reductase consists of two domains, an N-terminal catalytic domain (Pfam:PF03807) and a C-terminal dimerisation domain. This is the dimerisation domain [1]. 25.00 25.00 25.60 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -10.17 0.72 -4.04 862 5462 2012-03-26 14:57:42 2012-03-26 15:57:42 1 17 4352 40 1427 4011 1723 105.90 36 39.32 NEW hsEp.hDulTAlSGSGPAYlahhlEAhscuulp..hGLscchAtpLutQTlhGuAph...lh.............................p.....................pPu......pL+cp.VoSP...GGTThsulpsL..Ecs..u.lcsslhcAlpA..AspRupEL ...............................................sEphhcslTuloGSuP............AYl..a.hhlEAh.s-..A.uVp..hG.ls............+.ppAhclssQsl..hG.uAphlh...........po.sp.......................cPu.........pL+-p.VsS.P...GGTThtulpsL..Epp...u.hcsslhcAlpu..uhp+upch...................... 0 448 860 1182 +14600 PF14749 Acyl-CoA_ox_N Acyl-coenzyme A oxidase N-terminal Eberhardt R re3 CATH:2ddh_A_01 Domain Acyl-coenzyme A oxidase consists of three domains. An N-terminal alpha-helical domain, a beta sheet domain (Pfam:PF02770) and a C-terminal catalytic domain (Pfam:PF01756). This entry represents the N-terminal alpha-helical domain [1]. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.53 0.71 -3.83 150 617 2012-10-02 12:47:07 2012-03-27 09:42:00 1 15 250 8 432 619 22 114.30 24 17.60 NEW sscclsthltGuccp...hcc++....clpphlt...p-P...apc..pshhahoRpEpacpulcKutthhphhpp.hth............s.p-.................hhhh....tshhspstPhs..LHhsM..FlP....slpsQuosEQpccWLshApphcIl ...................................................................scphsthltGutpp...hchpc....cl..phlt...p-P...hpp..tsh..h..ho+p-phctulcKstphhphhp.p.hth...........s.p-...............................................hhhh....hshhst..shshs....lHhuM..Fls....slpsQGTsEQhpcWl.hu.phpIh..................... 0 160 230 355 +14601 PF14750 INTS2 Integrator complex subunit 2 Eberhardt R re3 Jackhmmer:Q9H0H0 Family This family of proteins are subunits of the integrator complex involved in snRNA transcription and processing [1]. 25.00 25.00 36.40 27.30 21.40 21.40 hmmbuild -o /dev/null HMM SEED 1049 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.73 0.70 -13.76 0.70 -7.04 13 186 2012-03-27 12:41:57 2012-03-27 13:41:57 1 7 101 0 136 185 1 667.70 34 87.28 NEW lspLuphscpEIRPlLPCLVRMSLhSPLDpopchs-sRKplLplLsGlElVNSI.VuLLSlDFHtLEsDlKKEQQlRpKlGhtpp-SlhhpuLps.ulsLEFERS-ssR+lRlVLSELLtlhuQlp-..........pss-..hh+sS.....-LFDs-lYLEEluDllCIAlAELPuLLsIp-lsEsLL+lcNGsplIChlVANhPDsFcEVCpuLIsNG-p.DE-sssG+hRhssLptLspMNPoQALthRupsVEhC+MPuLAltLoL-ps......p.........uDLVAFlSGLLLGsDpplRoWFAhFIRsuQKR+s-....ALphLRccLLcplpslhsp......................uh..putLs-ppVVpuuuLLRLYCALRGIAGlKFs--EsssLlQLlTS+PPPosAGlRFVoLGLCMLlACPSLI.....uspEhEppslcWlpWLl+EEAYFEssSG.........soASFGEMLLLhAIHFHSNQLSuIs-LVCSTLGMKlslRPsoloRhKplFTQEIFTEQ...VVsuHAV+VPVTssLsAslsGFLPlHCIaQLLKSRAFuKH+VPIKsWIY+QICsSVoPLHPlLPuLlEV..YVNSlls..Pss+s..p..........ppsNcPlSEpEIppVFps.....................s..pp..ph.pp................................pssLTsQLLlLYYLLLYEDsRLsNhpshlsts+p...hKsYSscFhScLPIKYLlppAp+cQpcYuuLFSPLLRLLATHaPHLslVDDWLc-Etlssp...ps.....thpss..tssloppslscAFsplpspPsp.sh+llcpLhphsspsLhPaAphllpahphlLscslPRhlpc.hhpplWhRLNoVhPRpLWVhTlNA..Lhspt......p..sLTp-slslDPLpVLRCDcRVFRCsPlhsIlLRlLpuhLAASRopLupHlp-p....Phsp.....hupts.s-s-REEL+hALlAAQESAAVQILLEsCLcTc-D+s................................pssphhtLREl+ullCSaLHQhFIADPsLAKLVHFQGYPpELLPloVpGIPSMHICLDFIPELLuQs...cl-KQlFAIsLsSHLulQYuLPKSLslu+LslNsLsTLLuVLsospRhpLFpslLPuLVRhscAFPPLs-DslslLhQlGRlstSQuuL ..............................................................................................................................................................................................................................................................................................................................................................................................................................hh..............................................................h.h........h...h.h............................................................h...t.lh..t....................t......p..............hlh.l.ph....s...h..h.h.....p.ht.s.h.hh.lthph..................hl.hhpshlh........................................h..........................................................................................................................................hh.hhssh.....hp............hht.........p.............th..h.hshlhs.....h..h..........................pt........hhphh.pt................................shtphhlhhthhhhstphttl..hhpthlt..hc......................t.h..ht.hh.h..pthh.p.....ls..s..ph.sss..t...hs....s.t........................hshh.....sl.tL.....ptp.a...pht..h..ht.hh...ph...t....Php...h..llp...hht..h.................................................................................................................................................................................................................hhhhhahh....ht..h...t...........................................................a.....th.....hshp.hh...hpt......th...lh..hh...hh...hs...........h.....................................................................................p.................................hh.................h............h.......ha..h..h...............................................hs.......P....h..t....h..p....hh.hhl.hl.........p..................h..t.........................................................p.lt.shh..p.s.hhphLl-hh.................................................................thpt.hs..lHphals............lh+llhaQ...................ths.t.l...st.lPuhh.h...l.ph.......p....hhsl..hhs.l...pa..l.pshths.p..hh................phh..hh..t..h............................h.h....hh.sh..h...a..h.....................h................................................................................................. 0 65 79 112 +14602 PF14751 DUF4474 Domain of unknown function (DUF4474) Godzik A adam Jackhmmer:JCSG target SP18061A Domain Domain found on N-termina of few families of uncharacterized Clostridia proteins. Typically followed by a proline-rich domain or other kinds of repeats 21.10 21.10 25.70 24.10 17.30 16.20 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.65 0.70 -5.19 12 51 2012-04-03 19:57:09 2012-04-03 20:57:09 1 9 45 0 8 47 1 222.40 33 41.00 NEW LNphhc.sGauY-hppDlFYSthcAWQRchGYs+LYDEAAshhsMlhDCEPlaFsYsGKpWLIEhWKGQYGlsTGuElGVYpss+..ls.s.ha+sTFYcslpDc-hlslShsLh+ssKslaph.pshHWWLTGFpLGpFSpPppLhh-hoITh.DppMppAFlcuLhch.G.Yp.pcEhhlpsNoVplpascP+osQPhs+sphs-shlQhpN+h.CpLYphh..T+sasps..hD+lthlpthhP-la .........lst.ht.hGas.Y-..ppDlFaSp..h-sWQ....RchGYscLYDcAusshuMlhDs-PlhFsYssKcWhIphWKGQY.s......l..sTGuElGlYpssc.s..l.......s...hhpssaYpsh......pD.s-hh..hohsLcK.s..G.+.......s...l.a...p...............p.......pstHWWLTGFp.....hG.c.F..Spsp-LshshsIsht.c.t...MhpAFlcuLhph.G.Yp.pp-h.h.tppV.hhhs.spp..t..hhps.h.cthlQhhNp..CthYp.h..Tt.h.ph...-+l.hl...hP.ha....................................................... 0 4 8 8 +14603 PF14752 RBP_receptor Retinol binding protein receptor Eberhardt R re3 Jackhmmer:Q9BX79 Family Proteins in this family function as retinol binding protein receptors [1]. 25.00 25.00 26.70 25.20 20.90 22.50 hmmbuild -o /dev/null HMM SEED 617 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.21 0.70 -13.14 0.70 -6.20 10 202 2012-04-10 09:35:04 2012-04-10 10:35:04 1 7 52 0 108 188 1 404.80 30 72.30 NEW sssC..csoVs.sLaptChAslSllllLlLuhLsRRcphpsc.....sh+GRhGLlsPlDFLustssRhshuAsFGAlhsslshL.LhoEshhPht....sPoh......s+uhh....hlluhl.hushYYPlhAChost.a.hlutlLGhhhShsahsVpVaQplpCP..........pusplh+YhoLls.lPhLLCLuFLslpashllV+Sl+s.+tG...susp.....slp.sSa.ccYl+s...LLp+p.Lpp.t...tsc.phhSWhpsh.p.pIYsP-PsF+FPh+hlhoslLohluLYphALl.lsullsTlcclRsslsssls.aLLsuhsllhSp......sppclltpVKcaLaulEssalsollLusLlolshLh+sLVsaRppLKtLaRGsph.Lsspa+oPpPoppulsshMpaSuaQsAFlhhGhLIQpllFFLsslslsahlVlPllHGcsLhLL+uLs.hh.shalslllsllLQslhAphhFLps......+ptshsLsNRRuhashoYFLFhhNVLlGlhsulWRlLlSuLhsshhluRlDhSlLppshEohDPGYpoYhGhL+lEsupSHPVhluFCpLLLpupp..pcs..t....psol+.s..cpuhQhlpp.ccspupGuss.uuRuRsRWhLhYTLLpNPsLlshRKst ..........................................................................h.......................................................th..Phshht.........h..hhhhhhh.sthh.h....s.t...sh..................................h....hhhh.hh.h...s.h..hhPhhhChs.....hu.hhG..hsh.hh.h.hh.h....C..........................t....h....h......h..h..hhsh....hhl.h.a.h.............h....t.......t.....................................................h.t.....h........................................................................F.hs.phh.s.....hh....hh.hh...h...............................................................................................s.....hh.shh.s..h.h.h.hp.hh...ha.Rtph.thhtGt...h.....t.t....shhshhtasuaphA..hhhh..Ghhl.phh....lhh.hhhhh................l...h.hPhh.t....p..h.....hh.p....l...h....hh........hhhhhhh...lQ.hhuthhFlp........tt..tlsNR...+sha.h...sahlF.hNVllGhhssh..RllhoslhshhhluphDholh..thtsh.D.Ga.sahshLhh-h.popPshhsFCtlLlpstt.....t.................................h................................h.+W.lh.tLhpN..l..................................................... 0 55 60 74 +14604 PF14753 DUF4475 Domain of unknown function (DUF4475) Eberhardt R re3 Jackhmmer:A4D161 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 99 and 305 amino acids in length. 25.00 25.00 25.20 25.00 20.90 24.30 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.79 0.71 -11.80 0.71 -4.80 11 195 2012-04-10 10:52:20 2012-04-10 11:52:20 1 2 81 0 110 177 3 126.00 34 61.63 NEW Yp+IVG-.DDGG+LFo.cEYEcYKKpVhPhRlKNRlasSWpsssGhDCKlIGPEThCFCsHR.aKpHcTDhpp.lspcRPhtlPC+sstCpC+saaYlPhpGSpslRCp.CKHhss-Hss..hsa+Cs+ss....pCsG.FcSsaoCuCGpss.cHpTllE....T+-ERhupGKPVup.D.......VPYtuMG.GlTGFSSLs-Gh.R....lDsSGhGsss .................................................................................................................stohCFCsHh.h+pHph.........p.....tlsCp.ttCtC..a.alP....................s..h+C+.C+H.hppHss.....shh.Cp.t.ss....tCss.FpSsah.C.s.Csp.h.tH.T.hhE....Tcp..h.t.t.....................................................t........................... 0 42 49 67 +14605 PF14754 IFR3_antag PPRSV-IRF3_ant; Papain-like auto-proteinase Coggill P pcc Pfam-B_8065 (release 26.0) Domain The replicase polyproteins of the Nidoviruses such as, porcine arterivirus PRRSV, equine arterivirus EAV, human coronavirus 229E, and severe acute respiratory syndrome coronavirus (SARS-CoV), are predicted to be cleaved into 14 non-structural proteins (nsps) by the nsp4 main proteinase Pfam:PF05579 and three accessory proteinases residing in nsp1-alpha, nsp1-beta and nsp2. This family is the two nsp1 proteins that together act in a papain-like way to separate off the rest of the various functional domains of the polyprotein. Once inside the host cell, this nsp1 interferes with the regulation of interferon, thereby enabling the virus to replicate. 25.00 25.00 573.10 572.10 19.50 18.40 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.71 0.70 -5.38 2 79 2012-04-10 11:02:07 2012-04-10 12:02:07 1 4 2 0 0 81 0 249.00 97 12.89 NEW MATFSATGFGGSFVRDWSLDLPsACEHGAGLCCEVDGSsLCAECFRGCEGVEQCPGLFMGLLKLASPsPVGHKFLIGWYRAAKVTGRYNFLELLQHPAFAQLRVVDARLAIEEASVFISTDHASAKRFPGARFALTsVYAusWlsSPAANSLlVTlDQEQDGFCWLKLLPPDRREAGLRLYYNHYREQRTGWLSKTGLRLWLGDLGLGlNAsSGGLKFHIMRuSPQRAWHITTRSCKLKSYYVCDISEA MATFSATGFGGSFVRDWSLDLPAACEHGAGLCCEVDGSTLCAECFRGCEGVEQCPGLFMGLLKLASPVPVGHKFLIGWYRAAKVTGRYNFLELLQHPAFAQLRVVDARLAIEEASVFhSTDHASAKRFPGARFALTPVYAssWVsSPAANSLIVTlDQEQDGFCWLKLLPPDRREAGLRLYYNHYREQRTGWLSKTGLRLWLGDLGLGINASSGsLKFHIMRSSPQRAWHITTRSCKLKSYYVCDISEA 0 0 0 0 +14606 PF14755 ER-remodelling Intracellular membrane remodeller Coggill P pcc Pfam-B_2813 (release 26.0) Domain This domain represents subunit nsp3 of the RNA-arteriviruses, such as porcine arterivirus PRRSV and equine arterivirus EAV, and is a tetraspanning transmembrane protein that contains a cluster of four highly conserved cysteine residues. These are predicted to reside in the first luminal domain of the protein. Arterivirus nsp3 proteins are uniformly predicted to contain four transmembrane helices, with the N and C termini of the protein residing in the cytoplasm. NSP3 are localised to the ER and appear to be essential for formation of double-membrane vesicles that originate from the ER during the life-cycle of the virus. 20.50 20.50 20.50 20.50 20.40 20.40 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -11.03 0.71 -4.13 5 141 2012-04-10 13:33:59 2012-04-10 14:33:59 1 8 7 0 2 101 1 140.80 66 10.05 NEW GPPPAPVSASVLDHILEAATFGNVRVVATEEQQRPVPAPRsRPSAoSS.GDVKDPAsVPPVPKPRTKLAKPSPTQAPTPAPRTRhQuA.....SsQEPPsGsusAPASAPKWRVAKTVYSSAERlRTELVQRARSlGDVLVQALPLKTPAVQRY ..................................GPPPAPVSASVLDHILEAATFGNVRVVsTEEQQRPVPAPRsR+SsoPP.GD.V.KDsAsVP.PVPKPRTKLAKPSPsQAPTPAPRTpPQsA.....Pp.EPssuTAAAPuSAP+WRVAKTVYSSAERhRTELlHRApSlGDoLVQALPLKAPAVQRY............................................ 0 0 1 1 +14607 PF14756 Pdase_C33_assoc Peptidase_C33-associated domain Coggill P pcc Pfam-B_535 (release 26.0) Domain The nsps or non-structural protein subunits of the arteriviral polyproteins such as porcine arterivirus PRRSV and equine arterivirus EAV are auto-cleaved into functional units. the function of this particular domain is not known. 25.00 25.00 27.10 25.60 21.90 18.30 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.16 0.71 -4.48 4 550 2012-04-10 15:06:29 2012-04-10 16:06:29 1 10 6 0 0 558 0 108.70 91 8.18 NEW sSPDAVEVSGFDPACLDRLAtVMHLPSSsIPAALAEhSGDssRPsSPsTTVWTVSQFaARHpGG-HPDQVCLGKIISLCQVIE-CCCSQNKTNRsTPEEVAsKIDQYLpGAsSLEECLA+LE+ARPPSshDTSFDWsVVLPGVEAAs ...SPDAVEloGFDPACLDRLAcVMHLPSSsIPAALAEhSsDss.RssSPssTsWTVSQFaARHtGGsH.DQVpLGKIISLCQVIE-CCCpQNKTNRsTPEEVAAKID.YLRGATsLEECLAKLERVSPPSAADTSFDWNVVLPGVEAAN. 0 0 0 0 +14608 PF14757 NSP2-B_epitope Immunogenic region of nsp2 protein of arterivirus polyprotein Coggill P pcc Pfam-B_58 (release 26.0) Domain This domain is in a non-essential part of the nsp2 (non-structural protein) subunit section of the arterivirus polyprotein. This domain carries seven small sequence-regions that are predicted to be potential B-cell epitopes. 25.00 25.00 42.60 27.50 20.90 24.90 hmmbuild -o /dev/null HMM SEED 272 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -11.77 0.70 -4.92 22 1615 2012-04-10 16:54:06 2012-04-10 17:54:06 1 14 11 0 0 1542 0 119.20 49 22.05 NEW VKsYPRWTPPPPPPRVQPR+TKsVKSLPEsKPVPAPRRKVRSDCGSPlLMGDNVPsuhEDLsVGGPLshPTPSEPhTPhSEPsLsPu.Q+ls+PsTPLStsAPVPAPRRTVSRPhTPLSEPIFVSAPRHKFQQVEcAN.AusTLTpQDEPLDLSASSQTEYEAsPLAP.QNhGlLEsGGQEAEEVLSEISDlLNDhNPAPVSSSSSLSSV+ITRPKYSAQAIIDSGGPCSGHLQ+EKEACLSIMREACDAoKLuDPATQEWLSRMWDRVDML ................................................................................................................................................................................................................................................................................................................................................................ 0 0 0 0 +14609 PF14758 NSP2_assoc Non-essential region of nsp2 of arterivirus polyprotein Coggill P pcc Pfam-B_6704 (release 26.0) Domain This non-essential region of the nsp2 subunit of the arterivirus polyprotein of such as porcine arterivirus PRRSV and equine arterivirus EAV may offer immunogneic surfaces to B-cells. It is associated with Peptidase_C33, Pfam:PF05412. 25.00 25.00 29.10 145.00 21.80 19.00 hmmbuild -o /dev/null HMM SEED 203 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.45 0.71 -4.19 18 61 2012-04-10 17:03:01 2012-04-10 18:03:01 1 4 4 0 0 61 0 167.70 66 15.20 NEW KtFEtsAsEEVQEuGHKAVHSALLAEGPNsEQVQVVAGEQLcLGGCGLAlGsAp.................SssDSMKENMhNShEDEPLDLSpPAPAuTTTLV+EQTPDNPGSDAGALPVTVRcFVPTGPhLRHVEHCGTESGDSSSPLDLSsAQTLDQPLNLSLAAWPVKATASDPGWVHGRREPVFVKPRcAFSDGDSsLQF ................................EEVQESGaKsVHSA.hAcGPNcEQVQVVsGEQLKLGGCsLsVGNA+tss.sSuu.h...................shtsEPLDLSpPAsAATTT..tEpTP-NPGsDAGALPVTsRcFVssGshL+HVEHCGTESGDuSSPLDLSDAQs.DQPLsLSLssWPV+sTASDPGWVhGtpEsVFlKPRtshSDG-SshQh 0 0 0 0 +14610 PF14759 Reductase_C Reductase C-terminal Eberhardt R re3 CATH:2gqw_A_03 Domain This domain occurs at the C-terminus of various reductase enzymes, including putidaredoxin reductase, ferredoxin reductase, 3-phenylpropionate/cinnamic acid dioxygenase ferredoxin--NAD(+) reductase component, benzene 1,2-dioxygenase system ferredoxin--NAD(+) reductase subunit, rhodocoxin reductase, biphenyl dioxygenase system ferredoxin--NAD(+) reductase component, rubredoxin-NAD(+) reductase and toluene 1,2-dioxygenase system ferredoxin--NAD(+) reductase component. In putidaredoxin reductase this domain is involved in dimerisation [1]. In the FAD-containing NADH-ferredoxin reductase (BphA4) it is responsible for interaction with the Rieske-type [2Fe-2S] ferredoxin (BphA3) [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.31 0.72 -9.77 0.72 -3.69 153 1813 2012-04-11 08:06:00 2012-04-11 09:06:00 1 26 1065 20 581 1646 395 82.00 28 19.73 NEW WFWSDQa-h+LQhAGl......ssutDp...sVlRGs.ss.t...t..sFol.a.....ah.+supLlAlculNp....sp-ahhu++Llsput.ssssstLA...DsussLKsll ..........aFWSDQYs.hplQhsGh.......s..p..u.h..Dc......h.l.lRGs..ss...p..........p.....phhs.a........ah..psu.....pllussulNp.......s.c.p.h....t.hs.+.chltsut..sh..s..s...t...lh.c.t..L................................. 0 153 355 470 +14611 PF14760 Rnk_N Rnk N-terminus Eberhardt R re3 CATH:2pn0_A_01 Domain This domain occurs at the N-terminus of Rnk, an RNA polymerase-interacting protein of the GreA/GreB family (Pfam:PF01272). It has a coiled coil structure [1]. 23.00 23.00 23.00 23.10 22.90 22.60 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.40 0.72 -7.89 0.72 -3.88 110 932 2012-04-11 13:03:29 2012-04-11 14:03:29 1 4 909 6 162 424 20 41.70 54 29.57 NEW pPsIhlophDhcRL-pLl-.....shstps.sstptLpsEL-RAclV ....+PoIIIs-LDsERl-tLLE......psAhushPlA-ALsAELDRAph.... 0 32 73 117 +14612 PF14761 HPS3_N Hermansky-Pudlak syndrome 3 Coggill P pcc Jackhmmer:Q969F9 Domain This domain is at the N-terminus of these vertebrate proteins. This region carries the clathrin-binding motif LLDFE at residues 172-176 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 [1]. 25.00 25.00 26.30 25.30 23.80 24.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.64 0.70 -4.94 10 92 2012-04-11 15:52:16 2012-04-11 16:52:16 1 4 66 0 50 89 0 187.50 42 18.61 NEW RlhssHsFsSQcVssscp.EPsthCsu...GtDtLFlu..suuCpVEVasls.pppspshssFuTlupVlplsYScsGDYlVTIEcKspso.............alRsYlNWcsp..pscsstVslRhsGhphpssps-ss.pcQhEllElPL.scsPhCIuCCsloGsLLVGssspLllF.pLKspsl....scchphlDF-cpLI.hhsuasPscluhCssYIAlho-LEVhllKLsp ................plhshHsFtuQpls.sp...EPt.hCsu...G.-tLFl...suuCc..VEsaslt...pEhsp.+ssFuTl.GcVl.plsYo-u.GDYLlslEcKspso............................FlRsYsN.WRpt...................psppshVsl..Rhh.....G....hpsshspsh...cpphpllEhPL.scsPhsluCCsspGsLLVusps+llLapLphphh.........spchuhlDFE.pplhhhh.shsPhcluhCssalAlho-hElllhKLp.s................ 0 11 15 30 +14613 PF14762 HPS3_Mid Hermansky-Pudlak syndrome 3, middle region Coggill P pcc Jackhmmer:Q969F9 Domain This domain is downstream of the N-terminus of these vertebrate proteins. This region carries a number of tyrosine sorting motifs and one of two di-leucine sorting boxes at residues 542-548 well as a peroxisomal matrix targetting motif at residues 614-623 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 [1]. 25.00 25.00 34.40 34.00 17.70 24.20 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.10 0.70 -5.86 9 100 2012-04-11 16:43:41 2012-04-11 17:43:41 1 6 68 0 54 97 0 358.40 41 36.33 NEW ptpPhElLG.csp.ssls...lslc.oTslss-hppthpV.............ppLLaRRFsPchpshhhs-ph..+LHSLQLhPhYpps...................thtss.ppsspppcLhulhCFFShPppGYLYslspu......ssLlS...sYtYspcsppsVLsspFLHsITpsuLpsaTlRsSssss.cpsshlDshhcsCPshohcVChltlp.FIGLpslsphcsallLLops-spph.ph.......................ttsuWsLYhl.sssohhQLYp-hl-huppYcsspspohhHLLuEAHLLlRsAL..hchs.tcsscKpEL..............hpAapESCuhLGDhaSR.-.spchcLALPYY+MSGLphs-llpR..h.ht........shppuh.sRGhIaaLpHuL......hcchsEpLScphAspVlphFthu-PcpL ................................................................................t..Ehlup.sppsuhs...lhlE.sTuhts-tht.hpl.............ppLLa+RFs.PDhsph.s..s.--h......+LHSLQ..LhPIYQpu...........................................thpsctps.o.p..c+chlul..FCFFShPcsGYLY.lsps.......VcLhS...sYpYs-+spQAVLospFLHVITS.........ssLQsaTVRCSAssA+cc...DsY..hDTT...hKuCPPVSh-VCsLRlQlFIGL+slCph+sHllLLTKAss.Esh.cRpps.hphhph......................................t.tssp.t.sssspsuWsLYll.sshsshpLYc-hl-YupoY.+os..po..po..hhHLLuEAHLLlRuAL......h-ssphc...s....s.c+t...EL............................................................hcAF+-SCu+LGDpa....SR.........hs..ppp..cL..AlPYYKMSuLshs-V....lsR.hthshp...........st.pph....tcGLlaYlp+sL.........hcphspp...L...scphu..s..cllpha.ht-Ppp.............................. 0 16 19 34 +14614 PF14763 HPS3_C Hermansky-Pudlak syndrome 3, C-terminal Coggill P pcc Jackhmmer:Q969F9 Domain This domain is downstream of the mid domain family, Pfam:PF14762, of these vertebrate proteins. This region carries a number of tyrosine sorting motifs and the second of two di-leucine sorting boxes at residues 711-717 well as the ER membrane-retention signal KKPL at residues 1000-1003 in SwissProt:Q969F9. There is also reference to a human Mendelian disease at MIM:614072 [1]. 25.00 25.00 27.90 27.50 22.30 20.70 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.17 0.70 -5.46 4 71 2012-04-12 10:25:22 2012-04-12 11:25:22 1 4 47 0 35 70 0 308.80 58 33.88 NEW MKNlsPLpAlpYLRKL-s..hsSVLVTLTKAsMALKMGDLDMa+NEMpSHSEMhLlCGFlhEPRLLhpp+cGpllPTEhAlaLK-sQPGLLVASlluLpcNsKIslEEAD.FFKsLCsK..DEDsVPQLLVDFWEAhLVAC.P-sVLpELhFKLTSQYVWRlSK+phP-ThPL+TsEDLINoCSHaGLI.PWVshlMSs-ShhsKshsEDl.KLQSllsGPShDltshLPaLEsLu-ssNsGLolH.lLChTRLtpYEcsIDpLLc+hPEAVl.YApHELKE-spslWWpKLLPELCpRl+pstschplalSSLKETLSVVAsEL-LRDFLNlLPEDGTAAFFLPYLLaCS+KK.Ls .............MKNlsPLsAhpYLcKL-ssGhsSlLlTLTKAAhALKMGDLcha+sEMcpHuEMpLVhGFILEPRLLlQQ....+KG....QllPTELAhaLK-TQPGLLVASlLGLQKNsKIulEEADuFFKV....L.CuK..DEDslPQ......LLVDFWEApLVAslP-lVLQELhFKLsSQYlWRLSc.........+....ps....P.DT....hPLRTuEDLINuCSHYGLlhPWVplLhSs-ShsDKsasEDL.K.LQSLlCGPShDlASIlPFLEPLS.ED.ThA..G.LSlH.lLCpTRLpEYEpsIDpLL-RCPEAVIsYANHELKE-.scsLWWK..KLLPELCpRl.+...s.....G...GE+.p...LaL...SuLKE..TLSllAsEL-L+DFlNlLPEDGTAAFFLPYLLaCSpKK.l................. 0 5 7 18 +14615 PF14764 SPG48 AP-5 complex subunit, vesicle trafficking Coggill P pcc Jackhmmer:O43299 Family This family would appear to be the second of the two larger subunits of the fifth Adaptor-Protein complex, AP-5. Adaptor protein (AP) complexes facilitate the trafficking of cargo from one membrane compartment of the cell to another by recruiting other proteins to particular types of vesicles. AP-5 is involved in trafficking proteins from endosomes towards other membranous compartments [2]. There are genetic links between AP-5 and hereditary spastic paraplegia, a group of human genetic disorders characterised by progressive spasticity in the lower limbs [1]. 25.00 25.00 26.50 25.30 21.30 22.00 hmmbuild -o /dev/null HMM SEED 460 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.64 0.70 -12.59 0.70 -6.00 14 140 2012-04-12 10:55:25 2012-04-12 11:55:25 1 4 79 0 89 136 0 309.70 30 54.76 NEW -LQcACLlEuVtlLchlC+p....DsS..hlhRshPpl+pLasRls......ussu.......pu....psLLsIhQFFLsHGEs.sshDu-ushcphFspl.ucpFpcPhLAa-hlpFhhpNp.plLssp.sslhpp.FPsLLK...........hLAWsu.sLhscFVtlLPsLlsssTAlElLHslLDLPCLoAAL-hphRuus......su..tpslhs.p........................scssush-uhpsPhtcsLFpalLRsEuu..ssh.-R...Lss.LHplLtshussP...RVhQCupslPsLLclaFsslhcpAsssLlspLl.lLLERsstLa.lpsapt-V++VLSptlLtLsph+PsLlV-Lp+-lL-FlG.ospsh.pu+p-hasallWAlGEYhSsuhD+RCos-hhspaFEsLEs.lLaElopop.............ssushspsssRllssLMTsLsKLAoRsp-LlP.RV...uLhLoKhpo...psh.t.s.s-ps..sttlhpRAs-LlsLLKhPuVAthVLs..Posss ............................................hhpsl.hh..hsp.....ssp..hlh+shshhptlhtRh.......ss.s........s....hhlLslhpFaLsa.u-h..shhDs-s.ht.hhtt..h.s..a.s.hhs.thhthh..p....h........h...hP.lh+...........hlA.ps..h.t.h..lhP..hhsstohh.hh..lhDLPhlshsh...........................................................................................................s.ts.......hpsha..lL+scss...s.htp.........s.h.thht.h.t.s...............RhhtshphsP.lLphaFsssh.p.s...st................sLhs.tLh.hlhtR.s.ha....aphp.......lpphh.p.hh.hhphpPthlh...tp.l.t....t........tt.hh.plsWhlGEa.us..........t.h.thaEsLEh.hhac..................................................................phhh.lhsshsKlAsh..-h.s.+s....l..........................................................s..................................... 0 26 52 67 +14616 PF14765 PS-DH Polyketide synthase dehydratase Coggill P pcc Pfam-B_852 (release 26.0) Domain This is the dehydratase domain of polyketide synthases [1]. Structural analysis shows these DH domains are double hotdogs in which the active site contains a histidine from the N-terminal hotdog and an aspartate from the C-terminal hotdog. Studies have uncovered that a substrate tunnel formed between the DH domains may be essential for loading substrates and unloading products [2]. 27.00 27.00 27.10 27.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.91 0.70 -5.26 177 5569 2012-10-02 20:54:35 2012-04-12 14:52:00 1 1171 1046 21 2255 5792 232 275.10 19 12.09 NEW HPLL.G.tplths..sssphhapspLs.hps.PaLsDHtl.tGpsllPGsualEhAhpAuppht.....tt........shtlc..-lslppsll.ls.pss....shplplslpss.stst..............phplhSpsssst...............WshHspGplt.......................sh..h..tpt..s.s...ls...stthYpph...t.phG.ltY.GPsFps.lc.plhp.............s..p.shApltls....pshtttt.........ahL.HPulLDuslp.sh..................tt.ttshLPh.ulsplplh........t.....t.hhscsch.....tsts......htsclplhD.ssGpslsplcuLph+tlsstshts ...............................................................................................H.ll....t....s.....t.tth...h..hp.s.p.l.s..hp...p..h..s..a...L.........s.D.H.t...........l.....t...........G...........p............s...........l..hPu...sual-...hAl.p....Au..p..pht..............................................................shplc....-lsh.t.p.sLh...ls.......t............s..........s........s...............hplp..l....s.....lpss...tttt............................p.h..plhopss..sst...........................................hshHupGtlt...htttt................................h......s.................h....t......t.........h...............................t............t....................s......................................ls........................ssp...h...Y..pth............t...phG...h....p.Y..Gs....s..F...pu..lp..phhtt.........................ss..p..shAclt..ls....................psht.t.t.tt....................................ahl...HP...u..........lL......D.ushp..sh....hhhh...............................ttsts..tshlP......h....uh.pplplh..................................ssth.hsh.sph...............................stts....................h.t.s.s...l.p.l..........h.......D....t........s......G.......p........s...l...h...plpulthp.ht.....t........................................................................................................................ 0 518 1299 1894 +14617 PF14766 RPA_interact_N Replication protein A interacting N-terminal Eberhardt R re3 Jackhmmer:Q86UA6 Family This family of proteins represents the N-terminal domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. The N-terminal domain is responsible for interaction with importin beta [1-2]. 24.00 24.00 25.30 24.50 23.60 22.80 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.43 0.72 -7.87 0.72 -4.49 25 123 2012-04-12 14:36:35 2012-04-12 15:36:35 1 4 80 0 63 129 0 41.10 45 19.07 NEW ss++s.hKt.psPsWK-plRccChcRlRcpRscLLp+hRtss .............+RshaKh...soP....sWKEsaRpcCl-RhRssRs+LLs+aRps.... 0 20 32 47 +14618 PF14767 RPA_interact_M Replication protein A interacting middle Eberhardt R re3 Jackhmmer:Q86UA6 Family This family of proteins represents the middle domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. This domain is responsible for interaction with RPA [1-2]. 25.00 25.00 25.80 26.00 24.10 23.80 hmmbuild -o /dev/null HMM SEED 83 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.86 0.72 -3.23 27 109 2012-04-12 14:37:26 2012-04-12 15:37:26 1 3 65 0 48 112 0 74.40 36 36.46 NEW p.hlp-lhp-EhppLppsspsh..................hh..psh.phh......pEh--.Lt.h-.....-lppEhhpp.EhphhtphEpthphE-phLsthlp ............h..lVQEVMEEEWpsLpssps.............................hscshst....pEh.-.LusLE.....EIpQELlpp..Etsl...lp.EaE.cshph--phLs.hl........ 0 8 18 31 +14619 PF14768 RPA_interact_C Replication protein A interacting C-terminal Eberhardt R re3 Jackhmmer:Q86UA6 Family This family of proteins represents the C-terminal domain of replication protein A (RPA) interacting protein. RPA interacting protein is involved in the import of RPA into the nucleus. The C-terminal domain is a putative zinc finger [1-2]. 25.00 25.00 25.70 25.70 24.50 23.60 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.64 0.72 -10.40 0.72 -3.58 50 138 2012-04-12 14:38:09 2012-04-12 15:38:09 1 4 109 0 85 130 0 80.90 31 35.91 NEW lCPVCppspLpt...spph...lhC.......sC.G.lplspp.............thshc.pLpshLppslspHtpp..C..spsPpFslp..ssss.t....sLhhpCpsCcahpll ......lCPVCpptsLph......sssh......lhC........pC.G.Lplssp................................ttplohp.pLctpLppslscHttp..C..spsPpFslp...........sssp.p....sLhhpCtsCchhsll................... 0 27 41 66 +14620 PF14769 CLAMP Flagellar C1a complex subunit C1a-32 Coggill P pcc Jackhmmer:Q6P047, Pfam-B_2704 (release26.0) Family This family represents one small subunit, C1a-32, of the C1a projection (the seventh projection of flagellar) [1]. Numerous studies have indicated that each of the seven projections associated with the central pair of microtubules in flagellar plays a distinct role in regulating eukaryotic ciliary/flagellar motility. The C1a projection is a complex of proteins including PF6, C1a-86, C1a-34, C1a-32, C1a-18, and calmodulin. C1a projection is involved in modulating flagellar beat frequency and this is mediated via the C1a-34, C1a-32, and C1a-18 sub-complex by modulating the activity of both the inner and outer dynein arms [2]. 22.70 22.70 22.70 22.80 22.20 20.90 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.32 0.72 -3.86 50 207 2012-04-12 14:49:21 2012-04-12 15:49:21 1 5 87 0 139 195 0 94.30 27 33.37 NEW hpslhFspp.psaohppsSshhs..lhpplhp..sht...pthshtcshpha+pll.hppulp.+Pshs...htlFohppl+tlh..-ahhpo...aaRHY+LYpasFosphphshpt ......................................................pshhasht.psFoh.phothhs..lhpplhp.sh.........t..t.thslp-shphhppll.hpausp....cs.hs..............htlFshcpltslh..DYhhpo......aa+HaKLYcalFssppchpl..s......................... 0 59 70 89 +14621 PF14770 TMEM18 Transmembrane protein 18 Coggill P pcc Jackhmmer:Q96B42 Family The function of this family is not known, however it is predicted to be a three-pass membrane protein. 26.90 26.90 28.90 28.50 26.00 24.80 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.61 0.71 -4.58 43 161 2012-04-12 16:07:47 2012-04-12 17:07:47 1 4 128 0 104 149 2 117.70 37 69.08 NEW cshhsFhpulDWp.EPWlluLlsFH.llhllsslhoR+phsh..QhslFhlhlshVahuEplNchuupp...W..+pFupp..pYFDspGlFlSlVaSsPLLl.shllllshlhphsplhlclKptpL+c+t+p ...................s..h.shhhslDWp.EPWLhuLhsFH.llhllhslhoppphsh..Qlh.lFlhhlhhVYhuEhlNchuAtN.W.....+.FSpp..pYF...DspG.hFISlVaSsPLLlsshll............llphlhphsp...lMsclKptpl+c+t+.h............... 0 37 58 81 +14622 PF14771 DUF4476 Domain of unknown function (DUF4476) Coggill P pcc Jackhmmer:Q86XN7 Family \N 22.50 22.50 22.70 22.80 22.10 22.10 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -10.08 0.72 -3.95 42 221 2012-04-12 16:46:32 2012-04-12 17:46:32 1 12 125 0 88 194 12 93.60 26 26.52 NEW tspshsshphpplhphlpph.sF-s-+lphlphhtts....p....hosspssp.llphasFsss.+lchLchlhspIhDh...p.stptlhssFs....Fs.ss+p+spcll .........................h...hhs..phcphhphl+hh.sas-sphchlcshhpp.....hs...hosspssp.llshaoFsc-.+lpslclltspIlDt...p.Nhp.l.chhp....hs.SpKc+h+chl....................... 0 51 59 71 +14623 PF14772 NYD-SP28 Sperm tail Coggill P pcc Jackhmmer:Q96MC2 Family NYD-SP28 is expressed in a development-dependent manner, localised in spermatogenic cell cytoplams and human spermatozoa tail. It is post-translationally modified during sperm capacitation and ultimately contributes to the success of fertilisation [1]. 22.10 22.10 22.10 22.10 21.70 21.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.16 0.72 -3.94 28 241 2012-04-13 08:02:08 2012-04-13 09:02:08 1 6 113 0 152 239 2 101.00 27 17.72 NEW +VusDtREtpRRhcEppt+pphpp+LcpEstpshpchppIst+WsplhphphPp-LpcplppQ+ptCpcllppK-plIs-hpp-LcppD-cYlpsl+cQucDl- .......................t.thcE.pR.Rtc.pp..h.pp.hhcK....Lp.pEtcp.optphscI....spcWcphhcp..tpspELpcplpt.ppphpcllcpKcplIp..........p.L.pp........-Lcpt--pYspsl+pphcslp....................... 0 60 76 113 +14624 PF14773 VIGSSK Helicase-associated putative binding domain, C-terminal Coggill P pcc Jackhmmer:A4D997, Pfam-B_8865 (release 26.0) Family The function of this short, serine-rich C-terminal region is not known. However, as it is frequently found at the very C-terminus of P-loop containing nucleoside triphosphate hydrolases, it might possibly be a binding domain. 19.30 19.30 20.10 19.50 19.00 17.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -8.99 0.72 -4.06 20 116 2012-04-13 08:13:48 2012-04-13 09:13:48 1 6 103 0 91 108 0 58.80 41 6.30 NEW SQLAAhlp..........tpsptttppppsstsKpDPIQAIL.AuAGVEYTHENSEVIGoSKlEEpLSRRAE ...................................h.................ptths.t.+pDslpuIL...usuGVpYTHpNsEVIGSSKlEppLSRpAt...... 0 15 36 60 +14625 PF14774 FAM177 FAM177 family Eberhardt R re3 Jackhmmer:Q8N128 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 134 and 205 amino acids in length. 25.00 25.00 25.10 25.40 24.20 24.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.45 0.71 -4.20 23 150 2012-04-13 08:18:13 2012-04-13 09:18:13 1 4 89 0 91 148 2 107.10 36 57.19 NEW tssshppsE.......pcctps..hpsP+RllaFuDG.sMEEhSo-EE..-cp-..ttppsh.hss.lDsscLshGPalhapsh+lusssluuCDalGtplAohhGITss.KYQYtl-EY.Rhpcccpccpp-schs ...............................sp.......-.....t..t...hphP+RlI+FssG.sMEEYSo-E-...pt.p....pps..hss..lDs..scL...sWGPalhahhh+huosolusCDaLGE+lAshhGIosP.KYQYsl-EYhRhppccpccppcpp....................... 0 27 40 66 +14626 PF14775 NYD-SP28_assoc Sperm tail C-terminal domain Coggill P pcc Jackhmmer:Q96MC2 Domain NYD-SP28 is expressed in a development-dependent manner, localised in spermatogenic cell cytoplams and human spermatozoa tail. It is post-translationally modified during sperm capacitation and ultimately contributes to the success of fertilisation [1]. This short region is found at the very C-terminus of family members of family NYD-SP28, Pfam:PF14772. 27.00 27.00 27.60 27.80 25.70 25.70 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.48 0.72 -8.85 0.72 -4.27 38 177 2012-04-13 09:00:41 2012-04-13 10:00:41 1 6 106 0 122 167 5 57.70 34 9.39 NEW caWpphupllspcphclWcsLpcuLp+YhclLpcRtpllp-s...ppLcpQNsEL+sLLpQYl ...........taWpths.pshstpp.clWcsLtpuLp+Yp..p..lLppRspLlpEs...psLcpQNpELcpLLpQYl.... 0 56 73 100 +14627 PF14776 UNC-79 Cation-channel complex subunit UNC-79 Coggill P pcc Jackhmmer:Q9P2D8 Family This family is a component of a cation-channel complex. 27.00 27.00 32.60 31.50 26.70 22.00 hmmbuild -o /dev/null HMM SEED 525 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.82 0.70 -6.12 7 116 2012-04-13 09:36:58 2012-04-13 10:36:58 1 3 72 0 67 108 0 431.60 53 21.57 NEW .p.DssRhuhYPNL-YpsLYsALs.LlDVsPLIQhG.psFGpAlLQCLuCLLPFL-+DlIDsLPYLsASoluVLPspLHp-IlNhLCaYILPFTI..TRpp-.ppEshssQSVouVIMhVhQYosNPuHHCQLLECLMsLKpsVhKDlLsVIAYGTusuRuSAAKLLFYYWPsFsPNlaDRKslhsKhs.shsPFsCQR-tCPNAGNAEAsKVCYDHsISIsausDoPPPLYLCIECANcIHREHss.hFhDILHPMQplShlCENKNCRSp-KpAlSICFSoECASaNGNHPIRYCpQCHsNRHNsRRGuD.....HlsHpuLssshphDuEhQsahVEulVSLL+EAc.hs.psp+-spp.pt.....................tsssssssDshohEERQLLGRYGlWLLVGhCTPs-sTPsElLGRLLuMLFHWFHsTAY.aD..sQs..ESolEKLKs-aVCsWLp-ls+sHacVFISCLLPHPsEYuRVGGHW-T.LsS+ToHLKEGLpRLlCLlPYEVIop-lW-hVMP+WhEAIsNDVPE+ELsELKI .....................................cstphs.aPsLpYtsLY.sls.LlDlhPhlphu...shupulh.sh....tslh.FL.pp...-.lppLPhhh..Sslu.shPs......LHpsIlphLsh.hLPhsI...op..+pp..s..stss..SsSShl.MhshQ.YosNPsaHCQ.LLECLMphKppVhKDlLhVIAYGsups+ssAsphLFaYWPshpPshh.pchhh..hphT....sasPhpCQ+.cC.NA.hN.t.AsK.hChD.olSlshu...DpPPPLY..LC.ECupcItt.............-Hsph..hh.....DlLhP.t.p..lShlCp.pKNCpSp.....s+pAlshCFSst.........Csuhp........GN+PlRYCppCHoN+HsschGus.................................a.sp.s..sshphssE...sphVEAVl.......SLL+EAc.hstppphEhscpcpht..p.....................hsssshs.s.....cspstc-p+lLupaGIWhLVuLCT.Ps-sTPsEsLuRLluMlFpWFHsTAYhhD.............DpV..GShlEKL.....KspaVsc...............WLKslCcl+acVhl.C.LLP+P.EaARVGGaW-p..sSpsopLKEGLsRllCLlPYsVIo....ppl............W-plMPcWhEAIps-VP-ppLpEh+.t............................. 0 23 27 48 +14628 PF14777 BBIP10 Cilia BBSome complex subunit 10 Coggill P pcc Jackhmmer:A8MTZ0, Pfam-B_35417 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. BBIP10 localises to the primary cilium, and is present exclusively in ciliated organisms. It is required for cytoplasmic microtubule polymerisation and acetylation, two functions not shared with any other BBSome subunits. BBIP10 physically interacts with HDAC6. BBSome-bound BBIP10 may therefore function to couple acetylation of axonemal microtubules and ciliary membrane growth [2]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. 26.00 26.00 26.70 26.20 22.80 21.10 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.27 0.72 -4.60 4 68 2012-04-13 10:27:55 2012-04-13 11:27:55 1 3 62 0 48 71 0 65.60 41 58.35 NEW MuEs..KsshRE....VLPKQG.L.hEDssshVLCKPKLlPLKSVTLEKLEKMQpEAQ-sVRpQE.ApKpp .............................t.......ll.Pcp..G.La.hE.-.h.hshVLCKPKLlPLKSlTLEKLEKM..p+cAp...cpl+ppc.spp..t................ 0 17 22 39 +14629 PF14778 ODR4-like Olfactory receptor 4-like Coggill P pcc Jackhmmer:Q5SWX8 Family In C.elegans, odr-4 and odr-8 are required for localising a subset of odorant GPCRs to the cilia of olfactory neurons [1]. Olfactory receptors (ORs) are synthesised in endoplasmic reticulum of the olfactory neurons, trafficked to the cell surface membrane and transported to the tip of the olfactory cilium, where they bind with odorants. Various accessory proteins are required for proper targetting of different ORs to the cell membrane. ODR-4 was the first accessory protein to be described. 24.50 24.50 24.60 29.60 22.90 24.00 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -12.36 0.70 -5.76 38 177 2012-04-13 12:22:49 2012-04-13 13:22:49 1 5 117 0 110 194 1 313.30 28 78.69 NEW GLllGp.tssp+..saVlplspTPpc-ssss.................................t..shpslDp-Wls-HAppVoRMLPGGhsVlGlal..ls.scsshcpss.hpthpplls.......................tsphhhhhspchspphhlalshss..p.phsC+shshts..ssuoh+PsDaKht..pt.spWhplpsshsl-hhlPlt..tspsst.shc......cplppslphhscplpsuhsl..lsGchhptp..l...........................................................psh.plplllP..............tpsspppstplpsssuslphpGslps+ualps.+solu-.AhpslKpDIl+SLpoRl-lhhDsLhpspsssss....t................................hppLPRRVhhsl.st..............sl.hsDYLFtsEsspcshtphp-lLshphsspslstshE ............................................GLllGp..sspc..shllhhstTP.p-pttt............................................t.p.tslDpcWhs-HApQ.VoRMLPGGh.VlGlal..hs...s.thh.pp..p...phhpplhh.......................tpphhshh..p..pphs-plhlals.ss..+...+hhC+shsh.s...spush+PsDaKap......th.spWhplcCshphshhlPls......tsss..shc......cphppsl.pth..s+.pl.psuhhl..lsGp...lhtpctsL....................................................................................s..tttpt..plpllh.h.......................tssppp.stplp.ssGslphpGslps+Aalps.+splp-.AhpslKcDllpol..tsRh-lhh-slh.sp..tpt....t.................................h.hP+Rlhhsh.ut..............slhhsDYhFtsEsspchhpphh-hLshphp.pth............................ 0 38 54 83 +14630 PF14779 BBS1 Ciliary BBSome complex subunit 1 Coggill P pcc Jackhmmer:Q32MM9 Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS1 predominantly localizes to the basal body and or transitional zone of ciliated cells. It has been found in a heptameric complex with BBS2, BBS5, BBS7, BBS8, and BBS9, termed the BBSome. Mutations in BBS1 can lead to retinal inadequacy [4]. 27.00 27.00 38.60 33.40 24.30 22.90 hmmbuild -o /dev/null HMM SEED 257 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.66 0.70 -5.41 14 145 2012-04-13 12:46:37 2012-04-13 13:46:37 1 7 104 0 91 136 1 216.40 41 43.55 NEW KWLcA..phDssusLaThSoClsLuDlpuDG-h+LlluDlG..s.tstph...KLKVa+GsplhoEpsLsDlPoulsoFhh-ppEPRhPsIAVAsGsslhlYKNh+PYaKFTlPuh-lssLEp-lW+pst.tsclss.sLcphL-s.L+s.hutt.pLos+S.caLpLc.c-htuFlppapsssltRposITsMsolKKsou-psulSCLVluTEsG-lalLDspAF....slLhp.......hslsSl........so..Gpas...l-aRlsVusRsGslh..hLRR ........WLpA...hh...-shAslpshSuClsLu..DlpGDG-h+LlluDhu.........t..p.....+LKVh+GstlhpEpsL.slPsuhssFh.h-p.p-.P...+.h.....P..slAlAuGsslalY+NL+PaaKFolPtl..ssh.Et-lWppht.ps.plss...shtp...hLcs.l+p...hst...L.ShpShchL.tl.c.........p-.....h.ttFl..pp...a...+s..pslt+pssI...Tshssl++s.s-csusSCLVlGTEstclhlLDspuF.slltp..................hpls..usPs....hltss..Gpap.l-aRlssusRsGplYhl+............................ 0 37 46 72 +14631 PF14780 DUF4477 Domain of unknown function (DUF4477) Coggill P pcc Jackhmmer:Q6NW34, Pfam-B_4074 (release 26.0) Family \N 24.90 24.90 25.10 25.70 24.60 24.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.85 0.71 -4.92 18 140 2012-04-13 15:23:06 2012-04-13 16:23:06 1 4 103 0 86 128 0 160.00 28 39.79 NEW WNchcLppPshsoh......psppshhlcslhtslschlppLpu....pph-pEuAlLsRllY+h+NpF+pp+uapulpplppsLpRLhphsLspslpslpshLPs......sspstssslPo+sslEalLVRlLGhsKLhhRlh-sCppAhphhspplp.saFhphsslhhuhluRla.lL.s+slhppsssLYscLhs .........................................................................hh..hpp.....ptLptEssll.ptllYpp+Nphtppp.ahtLppVcpsL++LpphsLpsslpsl........hplh..ss............................pst.th...lPo......ps......s.....h..-......h.lhh+lLGss+LlhRlh-sCpcshhhhsppLthp.Fh.hsllhhulluRlh.lL.hptlLtchh.lYp.L..t....................................... 0 21 37 62 +14632 PF14781 BBS2_N Ciliary BBSome complex subunit 2, N-terminal Coggill P pcc Pfam-B_5448 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia [5]. 26.30 26.30 27.80 32.80 24.70 23.40 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.62 0.71 -4.54 26 130 2012-04-13 16:13:47 2012-04-13 17:13:47 1 10 98 0 86 119 1 128.20 45 19.35 NEW l.slG+aDGppP..sLssAT.suGKVhlHsPappttts............ppsslshLNlNpploulsuGtLp....ssspDhLllGoposllAYDVcpNsDlFYK-lsDGlNulll.G...plus..ppPLsllGGNCulpGFDtcGsEhFWTVT.GD ...........................slG+aDGh+P..sLssAT.puGKVhlHsPHppsth.st..................s.cuclshLNINpslosLsAGhLps......p.shDsLllGTpTsLLAYDVhsNsDlFY+E................ls.DGsNul.llG................pl..Gs.....hss........PLsllGGNCulpGFDt-Gs-lFWTVTGD...................... 0 36 44 70 +14633 PF14782 BBS2_C Ciliary BBSome complex subunit 2, C-terminal Coggill P pcc Pfam-B_5884 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia [5]. 25.00 25.00 25.60 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 431 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.63 0.70 -12.15 0.70 -5.91 25 156 2012-04-13 16:18:48 2012-04-13 17:18:48 1 11 100 0 98 140 0 345.70 40 60.16 NEW oGEVlaKDshsu..ulAulltuDYRhs.GpspLIssSlDG-VRGY.........sstpppssthpsssppctlccL.p+KQsLhhELcshEcshctt..............tssspsshIPssTplpsslpss.....ppspl-LsluTsN...-olI+usllFA..EGlFc.GEoaV....pssssosslclsLhss..+Dssl-l+lKshVGhp.sSspa+VFElohpLP+FsMYthspss.s................pPsuhVsFpls-..RhpRlshWlspsF..hlsptlp.......tssshclphhsLR...ssps.Lhlchssss.........plpIpoDch-lAGDllQuLssFl......slc.-lpspu-FPtphccLcplLp+Vs-hpulRt+Lou-hADpushlKslllRAEDARllsDhcsM++hYsELhslN+-LlspaphRssN+scLlssLKclNphIQ+Au+LRlGpsKspllusCRsAIKsNNlpuLhcII+ ..................................sGEVlaK-shsu..slAullpuDYRhs.Gp.pllssos-Gc.lRGY.............stt.psshhtts.ppphlccL.p+KQsLh...............hELpshEpp.t................tttsphshlsssTplpsshtsp.....ptsph-Ltlssss.......sslI+ulllFu...EulFt..GEohl................ss.pp.ss..plplslhss..KDssl-lclKsh....VGht.sus................papVFEloppLP+FsMYth.t.....................P.uhVphtls-..R.p+l.shWlppsF..llsptht.......pttshplpahslR..sst...lhlphp.ss......................plpl.sDshchsG-llQuhstah.......tlp.chps.spFP..hcchpphh.cVcphppl+t+Lou-hA-pushl+shllpAEDARlhtDhpsM+phYhpLhslNp-Llstapl..........RpsNappLhssLKtlNphIppAu+LR.lGpspspllstCRsAI+ssNhpsLhplhp.................................................... 0 45 54 82 +14634 PF14783 BBS2_Mid Ciliary BBSome complex subunit 2, middle region Coggill P pcc Pfam-B_5884 (release 26.0) Family The BBSome (so-named after the association with Bardet-Biedl syndrome) is a complex of 8 subunits that lies at the base of the flagellar microtubule structure. The precise function of the all the individual components in cilia formation is unclear, however they function to promote loading of cargo to the ciliary axoneme [1]. The primary cilium, a slim microtubule-based organelle that projects from the surface of vertebrate cells has crucial roles in vertebrate development and human genetic diseases. Cilia are required for the response to developmental signals, and evidence is accumulating that the primary cilium is specialised for Hedgehog (Hh) signal transduction. Formation of cilia, in turn, is regulated by other signalling pathways, possibly including the planar cell polarity pathway. The connections between cilia and developmental signalling have begun to clarify the basis of human diseases associated with ciliary dysfunction [3]. BBS2 is one of the three Bardet-Biedl syndrome subunits that is required for leptin receptor signalling in the hypothalamus, and BBS2 and 4 are also required for the localisation of somatostatin receptor 3 and melanin-concentrating hormone receptor 1 into neuronal cilia [5]. 27.00 27.00 27.00 27.20 26.90 25.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.34 0.72 -4.29 24 129 2012-04-13 16:52:41 2012-04-13 17:52:41 1 11 96 0 83 119 1 109.30 52 16.03 NEW sVsulslsDhssDGp..sELlVGS-Da-IRlFcs-.-lltEhsEsstlssLsslps.sp.....FuYuLtNGTVGVYcp...ppRhWRlKSKpp.ssultsaDls..u-G..-LIsGWusG+l-sR .......................pVpSLsLsDasuDGc.....pEL.....LVGSEDF-IRVFKcD.ElluEhoET-tlTuLs.shhs...u+...............FGYA.LuNGTVGVY...-+.......ssRhWRIK..............SKsc.shulpuFDls..uDGVsELITGWSNGKlDsR....................... 0 33 41 67 +14635 PF14784 ECIST_Cterm C-terminal domain of the ECSIT protein Godzik A adam Jackhammer:Q9BQ95:268-396 Domain This family represents the C-terminal domain of the evolutionarily conserved signaling intermediate in Toll pathway protein, an adapter protein of the Toll-like and IL-1 receptor signaling pathway, which is involved in the activation of NF-kappa-B via MAP3K1. This domain is missing in isoform 2. Fold recognition suggests that this domain may be distantly homologous to the pleckstrin homology domain 25.00 25.00 27.00 27.30 21.00 20.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.62 0.71 -4.18 23 98 2012-04-15 05:32:46 2012-04-15 06:32:46 1 4 81 0 63 105 0 119.00 40 31.59 NEW SspQpcLLpcpshscPlaVEGPaplWL+cpslsYalL+u-..s.....pshs..p..pp--.hD.....Dssslhhs.h.hhchphpp......p.t.stthoVHEQ-DGTIaA.hCsTGsuo+sSLLuWIRhLpc..sNPsLsplPVlF+Lp .......SP-QpthLucHsss+PlaVEGPFslWLRs+sl.YalLRAD......hP.p.....-cc.p-.hc.....-..slhhP.h.h.chchsR......psh.shphsl.c-..--GslFA.hChsG.spspsoLhpWIptLQc..sNPsLuplPVlFRL..................... 0 21 26 46 +14636 PF14785 MalF_P2 Maltose transport system permease protein MalF P2 domain Eberhardt R re3 CATH:2r6g_F_03 Family This is the second periplasmic domain (P2 domain) of the maltose transport system permease protein MalF [1-2]. 22.00 22.00 22.00 22.90 21.60 21.90 hmmbuild -o /dev/null HMM SEED 164 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.99 0.71 -4.37 57 692 2012-04-16 10:16:57 2012-04-16 11:16:57 1 2 684 8 63 308 12 159.70 61 31.20 NEW LoaERAQsVLhsRpaQu..GcsasFsLY..ssssp.apLtLpss-ttph.........hlStsFsl..stsss............pplsLs.sss..spGEtAsl+sIspsR..pALsslshhLPsGs.cLpMSuLRpFuuspPLYo............lpcDGpsLpNspoGpha+PNh-hGFYQslstpGpa.tu-plSPGFTVslG ..........................................................................................................LTFERAQpVLhDRSaQA..GKTYNFGLY...PuGDE.WQLALoD..G..E..TGKp..........YLSDAFpF..........GGE..............QKLpLKETsA...PpGERANLRlITQNR..pALSsITAlLPD....Gs...KVhMS..SLRQFSGTpPLYT............Ls.sDG.TLTNNQSGVKYRPNNpIGFYQSIsADGsW.G-EKLSPGYTVThG........ 0 11 23 46 +14637 PF14786 Death_2 DEATH_2; Tube Death domain Coggill P pcc CATH:1d2z_B_00, Pfam-B_14779 (release 26.0) Domain This Tube-Death domain has an insertion between helices 2 and 3, and a C-terminal tail compared with the Death domain of Pelle proteins in Drosophila. The two N-terminal Death domains of the serine/threonine kinase Pelle and the adaptor protein Tube interact to form a six-helix bundle fold arranged in an open-ended linear array with plastic interfaces mediating their interactions. This interaction leads to the nuclear translocation of the transcription factor Dorsal and activation of zygotic patterning genes during Drosophila embryogenesis, and is assisted by the significant and indispensable contacts in the heterodimer contributed by the insertion and C-terminal tail described above [1]. 25.00 25.00 27.50 32.70 22.80 24.70 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.84 0.71 -4.46 4 45 2012-10-01 21:41:45 2012-04-16 14:44:21 1 2 33 2 20 49 0 117.20 54 27.18 NEW YoRsTElR+Vp-.Dl.cLApIL--s..WRpLh.lIP++l.DVptsuGuhh..a..h.....hKYsupp.pplDctApRl..spupuphhl-EWtTSGKLsERPTlGhLLpLLV+uphapAADaVAlchLpEspPARPssGPAAhIsl- ...........ElR+l...th..pLsplL..p...st.W+plMshlPpt.......................hKYoupcl..I-psApRh.PcQStSQlMIDEWKTSGKLNERPTVGVLLQLLVpAELaSAADFVALchLNEspPsRPsDGPuA.ISL-....... 0 5 7 17 +14638 PF14787 zf-CCHC_5 GAG-polyprotein viral zinc-finger Coggill P pcc CATH:1cl4_A_00 Domain \N 27.00 27.00 27.00 27.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 36 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.54 0.72 -7.84 0.72 -4.46 12 108 2012-10-03 11:39:54 2012-04-16 15:14:35 1 20 40 2 36 128 1 33.90 51 5.38 NEW ssslCPRCtKGhHWAs-C+S+hDhpGpPLss.cpps ....sPsLCPRCKKGpHWAs-C+SKhDhpGpPLss.ptp........... 0 16 19 23 +14639 PF14788 EF-hand_10 EF hand Coggill P pcc CATH:1djx_B_01 Domain \N 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.88 0.72 -8.58 0.72 -4.30 12 227 2012-10-02 16:17:27 2012-04-16 16:04:36 1 18 55 20 99 189 0 50.60 47 6.86 NEW .KMSh+ElKphL+.lNlElcDpYAcpLFpcCD+SpoupLEupEIEcFY+hLT ..............KMSF+ElpslL+hlNl-hc-pYAhp.LF.....pcs..D+..S.p.o......spLEscElcpFY+hLT........ 0 12 24 51 +14640 PF14789 THDPS_M Tetrahydrodipicolinate N-succinyltransferase middle Eberhardt R re3 CATH:2rij_A_02 Domain This is the middle domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase [1]. 25.00 25.00 26.00 25.20 20.20 24.60 hmmbuild -o /dev/null HMM SEED 41 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.53 0.72 -7.85 0.72 -4.01 68 856 2012-04-17 07:42:46 2012-04-17 08:42:46 1 5 812 32 164 575 309 40.80 44 11.94 NEW LsNVAW.TstGPhpl-tlcpsclphRh.pGph.lpVhuVDKFPR ....LsNVAW.TspGPhclstlcpschc...h.....Rh.pG.........t.........h...lsV.sVDKFPR........ 0 42 107 149 +14641 PF14790 THDPS_N Tetrahydrodipicolinate N-succinyltransferase N-terminal Eberhardt R re3 CATH:2rij_A_01 Domain This is the N-terminal domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase [1]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.92 0.71 -4.63 21 296 2012-04-17 07:43:41 2012-04-17 08:43:41 1 2 295 22 35 170 92 135.90 44 36.33 NEW -cFKshVc-lpup.pGYK-PlAFGIARVDhGQhsucKlLQAoYPllNWc.ENaGSAAlFlpAlpcsGlplDFosSEhVhslspcFlppALphFsPalsEAhu-..uHKNlQVlpsLppthccst......pscF+lVFlFEDssPcSVEulYLKLYALSLuKAsLRSlNLsGAFG .................................................................................pcFh.hlpphppp.stY+cPhuFGIARlDhu.h.ppKlLpAoasllNap.pNhGShAlhhpuh.pp......t...h..c.ptSEhV..lp.p.l..ALtha.pPalpE...p...uHpNIp....llh..lhct.hc-.s.................................ashVhLaE..DccP.SVEusYLKLhLLSp+KVsLRSlNLsGhFG..................... 0 10 27 34 +14642 PF14791 DNA_pol_B_thumb DNA polymerase beta thumb Eberhardt R re3 CATH:2van_A_03 Family The catalytic region of DNA polymerase beta is split into three domains. An N-terminal fingers domain, a central palm domain and a C-terminal thumb domain. This entry represents the thumb domain [1]. 25.00 25.00 25.20 25.00 24.40 24.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.53 0.72 -8.85 0.72 -4.28 209 1268 2012-04-17 13:11:18 2012-04-17 14:11:18 1 39 923 222 528 1126 238 66.10 35 12.07 NEW ALhYFTGSctaNhplRphApcc.Gh+LsEaGlac.................sp..........tp..................h....l..ssp.....................oEc-laptLGLsalsPchR-s ..................................uLhaFTGS+paNhphRph.App+.uh+lsEaGlhp....................h..ss..........sp...........................................hl.php..............................................................oEcclactlGlsalsPp.Rc.................................... 0 177 300 418 +14643 PF14792 DNA_pol_B_palm DNA polymerase beta palm Eberhardt R re3 CATH:1bpd_A_03 Domain The catalytic region of DNA polymerase beta is split into three domains. An N-terminal fingers domain, a central palm domain and a C-terminal thumb domain. This entry represents the palm domain [1]. 25.00 25.00 25.00 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.76 0.71 -4.01 47 629 2012-10-02 22:47:23 2012-04-17 14:17:36 1 31 308 209 368 663 146 118.30 26 21.74 NEW RIPRcElptltshVp.ptspplsPs........hpshlsGSYRRGtsoSGDlDlLIT+scspst..................hp.........shLspllppLpp.......psFLscsL...........ut........us............sKahGlCpLss........................s...............thaRRIDlhllPt ..........................+hPRpEspth.tphlp.c.t.s.p.tlsss..........hh.sslsGSYRR.............G+.t.s.u.G.DlDlLlT+sctss........................................p..............tllppllppLcp........ps.alp.p.s..L........................................s.........t.....................................ta.hG.t.h.....................................................................................................hRRlDhhh........................................................................................................ 0 113 187 273 +14644 PF14793 DUF4478 Domain of unknown function (DUF4478) Eberhardt R re3 CATH:3bq9_A_01 Domain This domain is found in bacteria, and is approximately 110 amino acids in length. It is found in association with Pfam:PF03641 and Pfam:PF11892. 25.00 25.00 25.20 71.90 19.80 24.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.33 0.71 -4.48 51 869 2012-04-17 14:13:23 2012-04-17 15:13:23 1 3 864 10 131 454 72 111.60 69 24.98 NEW hpsplsPtGoh-lLSQhEVs+L.ppsususLYpLFRsCuLAVLNoGupoDsucplh-pYp-F-IpllpcERGlKLELhNsPtpAFVDGchIcGIpEHLFuVLRDIlYlssclpp ......Is+lSPh.GSMDhLSQLEVDhL.K+.TA.SSDLYQLFRNCSLAVLNSGShTDNSKELLsRaEsFDINVLRRERGVKLELlNPPE-AFVDG+IIRuLQspLFAVLRDILFVtuQIcs.... 0 22 52 94 +14645 PF14794 DUF4479 Domain of unknown function (DUF4479) Eberhardt R re3 CATH:3bu2_A_02 Family This domain family is found in bacteria, and is approximately 70 amino acids in length. The family is found in association with Pfam:PF01588. 25.00 25.00 27.20 27.10 24.20 20.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.84 0.72 -9.12 0.72 -4.44 90 1013 2012-04-17 15:01:32 2012-04-17 16:01:32 1 4 989 4 85 461 0 72.60 40 36.05 NEW tsssstctshE++GsVs+Ihs.tcsspslGaNlFssSshl.plpu.sGpVpLoc-plspLNptLpcsGFsppLpsDh ..........tpsstuchsh-RKGsVsRlhp.c-sGpsVuaNIFclSs...hl.pIpp..pGpltLTDE.VsplNptlpcsGFsccLssD....... 0 18 42 65 +14646 PF14795 Leucyl-specific Leucine-tRNA synthetase-specific domain Coggill P pcc CATH:1h3n_A_03 Domain This short region is found only in leucyl-tRNA synthetases. It is flexibly linked to the enzyme-core by beta-ribbons structures [1] 25.00 25.00 38.20 83.10 18.90 16.40 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.66 0.72 -4.11 9 12 2012-04-17 15:26:50 2012-04-17 16:26:50 1 2 12 10 6 14 0 55.90 70 6.37 NEW .WTDaGPVEVEGspVR.LsEssRlRLEl.EutLSLE-V+KMGAELRsHEDGTlHhWKP WTDFGPVEVEGstVR.LPEPTRIRLElsputLSLE-V+KMGAELRPHEDGTLHLWKP 0 1 4 6 +14647 PF14796 AP3B1_C Clathrin-adaptor complex-3 beta-1 subunit C-terminal Coggill P pcc Pfam-B_195384 (release 26.0) Family This domain lies at the C-terminus of the clathrin-adaptor protein complex-3 beta-1 subunit. The AP-3 complex is associated with the Golgi region of the cell as well as with more peripheral structures. The AP-3 complex may be directly involved in trafficking to lysosomes or alternatively it may be involved in another pathway, but that mis-sorting in that pathway may indirectly lead to defects in pigment granules [2]. 27.00 27.00 27.40 27.50 25.70 26.90 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.77 0.71 -4.50 25 196 2012-04-17 16:02:19 2012-04-17 17:02:19 1 7 100 0 118 173 1 141.10 38 13.89 NEW s+ss.s.LLDL.DD.......sPsssP.....lloPSLsusLpshohosssssss............lsuPual...sh+spELLp.+loGcGLulpYRFoRpPplauspMVSlplpFoNpospclpsI+lup+.pLsuGMplpEFspIsp.LtPptShosslGIDFsDSTQ ................................................................s..tpph.LLDL.-Dat........s.ss.P......hlosSLhuDLpGLslosos.s.s.............................lhoP..shh...ssKppELLH.+h.s.GcGLulcYpFsR....p....Pph.......hs..spMVo.....lplphsNso-.p.pIcsI+lGpp.cL..ssGh.plpp...Fs.I-s.LtP.tt.o..hT.sshGIDFsDSTQ................................................ 0 25 42 75 +14648 PF14797 SEEEED Serine-rich region of AP3B1, clathrin-adaptor complex Coggill P pcc Pfam-B_195384 (release 26.0) Family This short low-complexity, highly serine-rich region lies on clathrin-adaptor complex 3 beta-1 subunit proteins, between family Adaptin_N, Pfam:PF01602 and a C-terminal domain, AP3B1_C,Pfam:PF14796. 23.00 23.00 25.70 24.20 19.10 19.10 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.39 0.71 -4.00 16 46 2012-04-17 16:15:25 2012-04-17 17:15:25 1 3 27 0 20 41 0 130.00 69 12.64 NEW sssKKFYSESEEEEDSSDSSSDSESESGSESGEQ...sEEGDSoEDSSEDSSSEQDSESGSESEsEsKRsAKRNSKoKGKSDSEDtEKENEKSKTSDSSssESSSlE-SSS-SESEStSESESESR+VTpEKE ...NsuKKFYSESE.....E....E..EDSSDSSSD..........S................E............SE..SGSE.SGEp...sEEGDSSEDS...SEDSSSEp.-SESGpESthEsKRsAKRNSKsKGKSDSEDGEK.E.NEKSK...TSDSSsuESSSlE-SSSDSESESESESESE...SRKVTKEKE..... 0 1 1 4 +14649 PF14798 Ca_hom_mod Calcium homeostasis modulator Eberhardt R re3 Jackhmmer:Q86XJ0 Family This family of proteins control cytosolic calcium concentration. They are transmembrane proteins which may be pore-forming ion channels [1]. 25.00 25.00 30.70 27.20 22.10 21.50 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.78 0.70 -5.42 30 292 2012-04-18 15:03:15 2012-04-18 16:03:15 1 2 49 0 174 237 0 231.90 33 77.89 NEW M-pa+hlhpahpspcsolhsullulLTluupclFShhsFpCPCssshNhhYGlshLhlPAlsLhllGhhlNspo......W+lssG.............+.hpptppssh.hhlhssIhtpAhlAPlsWlsVuLLsGpaY.CAhSs.ssssth....t.hss...psscstchLu+lP.Ctc.hss.....pcplhphL+spSQlLGWhLlsllslluhl.spClppChS.loaLQhcaWppYhcpEcc....lF-ppsppHAcphAccNl+pFF-shpsp. ......................-tFphlhpah...spptslh.ulhulhsluutplaSshsFpCP.C.sshN.hhYGhshLhsPslsLhllGhhlNsps......WchhsG.......................ppt.p..ssth..hhhhsplhtp..AhlAPlsWlsVsL.LsGp.....h.a..CAhSt..ssspth.......hsp....hp....sspstchLs+hP.Ctc..h..s.......p.ctl...hthL+spSQhlGWhllhlsslhshl.spslppChS...l.oaLQtpaWppYhppEcc....lF-psspcHActhAcpslcpFFtthp..p............................................. 0 21 35 73 +14650 PF14799 FAM195 FAM195 family Eberhardt R re3 Jackhmmer:C9JLW8 Family \N 25.00 25.00 26.30 25.60 23.40 23.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.48 0.72 -4.15 32 165 2012-04-19 08:00:36 2012-04-19 09:00:36 1 2 77 0 90 155 0 98.80 36 68.81 NEW .pssssP+sVFpp.ss+ph.....tts.pptpp......pEshostHEE.l+aIp-uWppV.pp.tts.....................................sss......tstpusshYhpcsPsssLpsFpPhDLEpWWucRhhsNIsp ...........................................................h.pssss+hVap....Nu+Rt......ss.s.s.pss.....pEsaTssHEENVRFlhE.........AWppVppphpst................................ss......tpssts.pYh.EcsPsPpLpsFpPhDL--aWupphhssIt........................................ 0 16 22 55 +14651 PF14800 DUF4481 Domain of unknown function (DUF4481) Eberhardt R re3 Jackhmmer:Q5VZI3 Family \N 25.00 25.00 26.70 37.70 24.80 24.80 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.93 0.70 -5.51 5 86 2012-04-19 09:33:35 2012-04-19 10:33:35 1 3 66 0 51 93 0 236.00 39 63.81 NEW WRNGQsVhuVPTSShhoP.SFDLSlCRsLLEocGFQIPAu-FEsPLElALDcPSVRRYLlFNSplF+FIMAPIlYlVlWCAlYSTLHLY...SlucYWlLCLsVSLVSIhLTssIlLIlcYSNKEINMNTDVRLluVNERLlRH+LLLGVADWVcsCpGsLQLFhVYWDlu+CL+uLTEoL-EM+FupDpuQphL+KRMSHLlLVsEVsoh-PsA..u....sEG.S-..EEpPLLsusEE.suE.polSQR-DoKLTcNaSLVP-shLsspshApQLLlsYGAlYVRLLVSs+L.sssppPpssu+NHCsuu.slChCQYIc .............................................h..NGpV.lss.L..hs.s....hhsPhtFc.ths.EpL.s.Glp....lss-pYhshhEohl.-..hhRahlYN.p.hRlhh..s..hlh..a..lVlhu.laS...s.......Qhh....uLushhs.uhLhlshA.Al.LshhlhLhhp+tpcKhNhNh-hpLAtsNttLhRH+lLLGlsDpschspphlpLhFlYFDhppCVpaLs-alp......chcpst.-..sh....hcp+Ls..................................................................................................................................................................................... 0 11 15 31 +14652 PF14801 GCD14_N tRNA methyltransferase complex GCD14 subunit N-term Coggill P pcc CATH:1i9g_A_01 Domain This is the N-terminal domain of GCD14, itself a subunit of the tRNA methyltransferase complex that is required for 1-methyladenosine modification and maturation of initiator methionyl-tRNA [1]. The exact function of the N-terminus is not known but it is necessary for maintaining the overall folding and for full enzymatic activity. 21.90 21.90 21.90 22.50 20.70 21.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.24 0.72 -8.70 0.72 -4.39 11 435 2012-04-19 10:01:56 2012-04-19 11:01:56 1 2 432 1 112 324 79 53.60 53 14.52 NEW .RRGsLpAGEKVQFTDRKGKKITDQLVsGGVTQTEHGlILHDDVIG+oEGsVlT .....t..pGPFpsG-RVQLTDsKGR+aTlsLpsGupFHTH+GultHD-lIGts-GoVV.... 0 33 82 104 +14653 PF14802 TMEM192 TMEM192 family Eberhardt R re3 Jackhmmer:Q8IY95 Family The function of this family of transmembrane proteins is unknown. In vertebrates, proteins in this family are located in the lysosomal membrane and late endosome [1-2]. In Arabidopsis, a member of this family has been found to weakly interact with FRIGIDA, a determinant of flowering time [3]. 25.00 25.00 25.30 25.00 24.80 23.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.48 0.70 -5.51 19 137 2012-04-19 10:17:44 2012-04-19 11:17:44 1 2 88 0 80 139 0 206.00 32 76.71 NEW D.ss.hhsssLtSt..cspFcslsTVhhssl.llls...lslslsuhlhshhhsspptcC........csahlllYh+sshWllshlh-phs+p+HpplRhpGYhcFYRpTpph++lPLhlhShsNssLLhltslhpphh...............tpsh.t.t.....Loshhaltlls.lEhllhlsshlhYhs+Vh+FN+t+s.PDlhc--ph...h..s.phssphGh.pcusshEEllEKQADlIcYL+cHNspLu++lhpLs ...........................................................h.t..h.s...p.pFp.l.Th.hhslhhhlp...........shlshhshhhs.h.....tp.pC................ps.hlhlhh+shLWllphlh-palph.pHp+lR.pGYhphacpT+pl+plPLhlhShGNsslLllhsh.tph................t.sh..........L...hhlslhs.lEhlsshhshhhYhs+..lp+.FNpt+spPDllcppt..........s.phhsphGh...+pssshc-llEKQuDhIpYL+cHsthLsc+lhth.......................................................................................... 0 16 28 57 +14654 PF14803 Nudix_N_2 Nudix N-terminal Eberhardt R re3 CATH:3cng_A_01 Domain Ths domain occurs at the N-terminus of several Nudix (Nucleoside Diphosphate linked to X) hydrolases. 25.60 25.60 25.70 25.80 25.50 25.50 hmmbuild -o /dev/null HMM SEED 34 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.89 0.72 -4.26 89 442 2012-04-19 11:54:43 2012-04-19 12:54:43 1 3 377 4 170 392 347 33.50 47 17.74 NEW +aCspCGss.l.ph+lP.tGDs+.RhVCssCstIHYp ......+FCstCGps.l..tt+IP..tGD..sR.RhVC..s..s.CusIHYp... 0 43 94 138 +14655 PF14804 Jag_N Jag N-terminus Eberhardt R re3 CATH:3gku_C_01 Domain This domain is found at the N-terminus of proteins containing Pfam:PF13083 and Pfam:PF01424, including the jag proteins. 25.00 25.00 25.40 25.10 24.80 24.70 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.10 0.72 -3.95 270 1458 2012-04-19 14:54:28 2012-04-19 15:54:28 1 7 1341 3 281 989 174 51.90 36 17.81 NEW chpG+Tl-EAlppAhpcL.sls.c-cl.-l..-Vlpcs.spGhhG..lGpKsAhlclphc ......hpGpTlEEAlppuLppL.sl....s.+pcl...cl..cVlpc..s.+KGFLG..hG.+KsAhlclp..................... 0 137 226 247 +14656 PF14805 THDPS_N_2 Tetrahydrodipicolinate N-succinyltransferase N-terminal Eberhardt R re3 CATH:3gos_A_01 Domain This is the N-terminal domain of 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase [1]. 25.00 25.00 25.20 25.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -9.31 0.72 -4.11 262 1625 2012-04-20 07:44:11 2012-04-20 08:44:11 1 6 1605 15 423 1040 1377 68.40 52 24.78 NEW spLpphIEsAaEs.Rspls..t....sssscl+cAVppslshLDsGplRVAE+.h...s......Gp............WhVNpWlKKAVLLuFRlp .....................pLpslIEsAFEc..RApIo..ss.....sscstsR-AVppsIshLDsGtLRVAEKh....D..............Gp................WsspQWLKKAVLLSFRlp......... 0 118 254 339 +14657 PF14806 Coatomer_b_Cpla Coatomer beta subunit appendage platform Coggill P pcc PF07781 Domain This family is found at the C-terminus of the coatamer beta subunit proteins (Beta-coat proteins). It is a platform domain on the appendage that carries a highly conserved tryptophan. 25.00 25.00 29.80 29.20 22.20 21.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.62 0.71 -4.44 30 371 2012-10-01 20:40:43 2012-04-20 16:04:06 1 9 303 0 267 372 4 123.70 54 14.51 NEW uDcssVlLNDIHIDIMDYIpPAp..CsDspFRsMWsEFEWENK.VsVpT.sls.sL+sYLcHlhcsTNMpCLT...P-uuLsG.-CsFLuANLYA+SlFGEDALANlSIEK....sscGpIsGalRIRSKTQGlALSLGD+Is ...................................s-pssVlLNDIHlDIMDYIpPAs..Co..-spFRp.MWsEFEWEN..K.VsVso.......sh...p..........sL+-aLpclhcuTNMpCLT.................P......c...t.........u........L........sG....c.............CtFluANLYA+Sl.FGEDALANlSIEK.............ts-u.l.sGal.RIRS.KoQGlALSLGD+l............................... 0 99 154 223 +14658 PF14807 AP4E_app_platf Adaptin AP4 complex epsilon appendage platform Coggill P pcc Pfam-B_21377 (release 26.0) Domain This domain is found at the C terminal of clathrin-adaptor epsilon subunit, and at the C-terminus of the appendage on the platform domain. 27.00 27.00 29.90 28.50 26.50 25.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.34 0.72 -3.80 12 75 2012-10-01 20:40:43 2012-04-23 09:50:09 1 2 55 0 53 74 0 102.90 41 9.81 NEW RPLploT--FGthWhSh.up-hcQslp..psstsslsshLpthpppL+LHsVplIGs....EsIhAspLLssss......CLlHs+lsus.slslhl+osspsLs-slltpCppshp ....RPLpIoT--FGchWlSh.us-sKQslp..spot..ssLs.ssLp.sLpp+LpLHlV-lI.Gs....EGllACpL.Lsohs......CLLHsRl.....pus..sluLWhRSssssLsDhLLhpCQ+sh................. 0 22 26 36 +14659 PF14808 TMEM164 TMEM164 family Eberhardt R re3 Jackhmmer:Q5U3C3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 214 and 330 amino acids in length. There are two conserved sequence motifs: LNPCH and DPF. 25.00 25.00 25.30 26.70 20.40 21.40 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.59 0.70 -5.18 10 165 2012-04-23 10:24:12 2012-04-23 11:24:12 1 5 104 0 108 146 1 193.00 37 75.81 NEW hhDWsYGGVDsSls.GNGGPECAsFLospQRllEollhhsLuhhtlhhulc+lt.................spsssttppcssu+plLLlhLslsFGlElGFKFAo+TVIYLLNPCHlsTslQIYLLAss.PS+psTslFRLpMahLNGAlLAlLFPllNTRlLPFEhEIYYIQHlLla.VVPlYLL+hGGsYssEPlsDapWulLuhGLhhhYHFslLQlLullTpVNLNNMLCPAlSDPFpG.aYRIaAssHQsLLshlpuKLhshlh ..........................................................................................................h........................hhs..p+hhE.hhh..h.hh.hhhsh..h......................................tp..c.hllh.....hhsh.hhuhphuaKhsp..t....ph..l....ahLpPCHlho..hhp.l.hlLhh.........p...hshhlF..pl..hthl.GshlAhl.FP.h...ssRhls.hEhthYalQHhhlh.llPlYLlh..sG.sYs...........Esht..shtWshl..u.........hulhhhYHFshLp.lul....h..............TtlNLNpMLCP....A..hpDPF.......G..YRlhA.hHQslhh.hhsKhhhh..s................................... 0 50 56 84 +14660 PF14809 TGT_C1 C1 domain of tRNA-guanine transglycosylase dimerisation Coggill P pcc CATH:1iq8_A_02 Domain This short region of the tRNA-guanine transglycosylase enzyme acts as the dimerisation domain of the whole protein [1]. 25.30 25.30 25.30 110.90 25.20 23.10 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.34 0.72 -4.16 5 13 2012-04-23 10:52:57 2012-04-23 11:52:57 1 1 13 8 11 15 0 70.00 73 12.05 NEW PITKKSAFFKISEESL+WPIVRRAKERAERVsuKFPEpVcHPIFGEIPKYLSLTYPFAQSEuEEDFoIEK PITKuSAFFKVSEEuL+WPIVpRAKERAERVppKFPEslpHPIFGEIPKYLSLoYPFAQSEGEEDFTIEK. 0 1 1 6 +14661 PF14810 TGT_C2 Patch-forming domain C2 of tRNA-guanine transglycosylase Coggill P pcc CATH:1iq8_A_03 Domain Domain C2 of tRNA-guanine transglycosylase is formed by a four-stranded anti-parallel beta-sheet lined with two alpha helices. It has conserved basic residues on the surface of the beta-sheets as does the C-terminal domain PUA, Pfam:PF01472. The catalytic domain, TGT has conserved basic residues on the outer surface of the N-terminal three-stranded beta sheet, which closes the barrel, and it is postulated that these basic residues from the three domains form a continuous, positively charged patch to which the tRNA binds [1]. 25.30 25.30 25.50 26.50 25.20 24.70 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.78 0.72 -9.27 0.72 -4.13 62 193 2012-04-23 13:13:56 2012-04-23 14:13:56 1 5 144 8 131 194 27 71.10 29 20.27 NEW lpplRsIADYQFGtGAGcsLFs-p...hplphS.pTG+lRplhsssc.plATl+usDGhhoLultGAcRLpcshshPp .....................tl+slA-YQFGtGsu.c.tLhsct....hplphS..pTs+l.Rplh.....t.....s.........sc..pls.olRupDGhlsLoltGAchLpphh..P.......... 0 31 81 109 +14662 PF14811 TPD Protein of unknown function TPD sequence-motif Coggill P pcc Jackhmmer:Q9Y2V0 Family This is a family of eukaryotic proteins of unknown function. A few members have an associated zinc-finger domain. All members carry a highly conserved TPD sequence-motif. 27.00 27.00 29.60 30.20 21.50 20.90 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.28 0.71 -10.81 0.71 -4.65 33 149 2012-04-23 14:17:28 2012-04-23 15:17:28 1 2 114 0 101 150 64 132.30 41 47.79 NEW ls-pcLtt-ltpsl..hsDp.huPls-ph+.......cshGpEaEhhLcchLcptslsFhsEcc.LRtpGasK........TPDl+LplPlslcGp.....lVsWIESKA.FGD...pcsH..pphhccQh.uYhNRFGP.GhVIYWaGal-.....pl..sp..t.........lll.........hD.pFP ....................................D.hLutpl.pCl..hsD.stauPlsDph+.......+shGpEaEhhLcchLhptsl...sFhsEcp.LRtcGYDK.........TPDhhLp...lPl......ul..cGp.........................llpWIESKAsFGD.......cpoH....psa.lccQahSYhNRFGP.GhVIYWaGalp-l..st..p...tlhlhs.t.................... 0 43 56 78 +14663 PF14812 PBP1_TM Transmembrane domain of transglycosylase PBP1 at N-terminal Coggill P pcc CATH:3fwl_A_01, Pfam-B_367 (release 26.0) Family This is the N-terminal, transmembrane, domain of the transglycosylases ()penicillin-binding proteins), the multi-domain membrane proteins essential for cell wall synthesis that are targeted by penicillin antibiotics. The TM domain is a single helix, several of whose residues lie in close proximity to hydrophobic residues in the TGT domain. The TM helix seems to be necessary for stabilizing the protein-membrane interaction, and the resulting orientation limits the interaction between PBPb1 and lipid II in the membrane in a 2D lateral diffusion fashion [1]. 27.00 27.00 27.00 27.00 26.60 26.70 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.37 0.72 -3.67 26 562 2012-04-23 14:23:22 2012-04-23 15:23:22 1 8 557 2 46 248 3 78.00 74 9.53 NEW MS.sDDREPIGRKGKts..pss+ppss.+pRRRcDD.....................--..DDD..p..E.--csMsRKsKs......p.s+pKR.RW.LhLLlKLhlV.FsVl ............MA.GNDREPIGRKGKPo..RPVKQK..VSRRRhcDDD........................D...D..D.YDD....E..DEEPM.PRKGKGK........GR.KPRGKR..GW.LWL.LLK..LhIV.FAVL................ 0 3 11 28 +14664 PF14813 NADH_B2 NADH dehydrogenase 1 beta subcomplex subunit 2 Eberhardt R re3 Jackhmmer:O95178 Family This family represents an accessory subunit of the mitochondrial membrane respiratory chain NADH dehydrogenase (Complex I), that is believed not to be involved in catalysis [1-2]. 25.00 25.00 25.90 27.00 24.50 24.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.08 0.72 -9.59 0.72 -4.31 12 105 2012-04-23 14:29:09 2012-04-23 15:29:09 1 2 86 0 61 100 1 69.70 46 65.04 NEW u.stusHl.shYRphPp.ls+tphhtuElluGhMWaWlLWHhWH-s-tlhGHFsYPDsSpWTDEELGIPPDDpE .................u.tusHl.spYRphPp..lT+p...phhtuEhh.SGhMWFWILW+hWHDs-t.VlG.HFPYPDP..SpWTDEELGIPsDDt.............. 0 16 21 42 +14665 PF14814 UB2H Bifunctional transglycosylase second domain Coggill P pcc CATH:3fwl_A_02 Domain UB2H is the second domain of the transglycosylases, or penicillin-binding proteins PBP1bs)), the multi-domain membrane proteins essential for cell wall synthesis that are targeted by penicillin antibiotics. The exact function of the UB2H domain is uncertain, but it may act as the binding component of PBP1b with different binding partners, or it may participate in the regulation between DNA repair and/or synthesis and cell wall formation during the bacterial cell cycle [1]. 27.00 27.00 27.00 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.00 0.72 -9.38 0.72 -4.26 119 1054 2012-04-23 14:40:43 2012-04-23 15:40:43 1 9 1036 2 158 684 84 84.40 46 10.57 NEW LhsGtslotpplhpELchLsYRps.s.p...sp...psGpa......shpu....splplhpRsFsFsDG.t.Essp+lplsFss.splsplpshps.sp.shuhhRL .......LcPshslS+sEhlplL-uhpYRpV.o.p...hs...+PGEF......oVpu....so.IEhlRRPF-FPDu.c.Eup..hRsRLoFss.s+LusIhsh-s.sR.phGaFRL................................ 0 32 68 118 +14666 PF14815 NUDIX_4 NUDIX domain Coggill P pcc CATH:3fsp_A_03 Domain \N 35.00 35.00 35.00 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.66 0.71 -4.65 286 3169 2012-10-02 00:00:35 2012-04-23 16:31:00 1 22 3080 6 661 3685 1180 115.00 24 31.12 NEW shl.lhp.pssp..lL.Lc+RPspG.LhuGLaphPph.-....h..t..................tp.th.................t..t.h...tp...h.shp...sp..h.h.p.....hs.....shcHsFoH.acLcl..p..shh....spls.....pt....t...........s.sshhWhshpp.h.p.p..huLPsshcKllp ........................................hllhp.pp.sp...lLLpc.RPsp...G........L..h.uGLapF...Pt.h.c........t...................p........tppl.........................................................p..p.ht.....pp....h..s.h.s.......ss.........h......p......phs.......slpHs........F....S........H..h+l..cl....pshh..........spls.........st..tst...........................ssshhWhshpp..h.s..s....hulssshc+lh................................................................... 0 191 386 534 +14667 PF14816 FAM178 Family of unknown function, FAM178 Coggill P pcc Jackhmmer:Q8IXR5 Family \N 25.00 25.00 25.10 30.60 23.40 22.20 hmmbuild -o /dev/null HMM SEED 378 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.42 0.70 -5.75 7 112 2012-04-23 16:07:15 2012-04-23 17:07:15 1 2 42 0 51 106 0 318.10 44 44.42 NEW tuusphspspusoYsNoL-aLLQEKcp.th.pEhE+.Lhp-h.php..s.D...t-Es......sLs.EHRtllcRFSVohpuIPshHPGEsVF.......l.ppasLs..LcpSplpP+StlEpLhLpSssspQLohlppGLLoshYhpss.CPlslL+WLFQhhoh.P-..sSsthhuhLa-lolcuh.p.oDpssp...hWsPSLp-VstsFashGA+..uLaPLsshQ.sh..csLhuchphopscpQ..ssssphu.DhS......LtplhKFLsLCshspPsAYTDppLLhLItLLC+huL-spL+LLPpsDhQpLLl.LLcNIp-WssKlppLCpsLSplSsHHHNLLtLVQlhP-hoSRuRpLRppLSLVlIA+hLsppcph ...................................................................................................................t..pYhNsL-hLlpEhccp.....t..h..pEh...pc.L.p-h.ph...p.........p.s...t-p.............sL........EHR.hlc.+FSVo..l.psIPshHPGEplF.......l.pp.asLs..Lc..sSthhsp..SslE...pLhL.pSs.spQl.hhppGhLoshY.h.as...CPlPlLp....WLFphhoh.s-..sSsphhuhLh-l.olcs....p...s.Dpshh.....WhPSLp-lstlFhshGh..p..uL.aPLtshQ.sh..csL.....h.u-.sph...o.ttpp.....stspp.ht..hS............lhplh.KFLu.LCs....plpPp..uYpDp-lhhLI.hLh+h.uL-tpL.+.lP.hsDhQpLLl.L.hcNI..+-..WssKl.cLChulsplSsH.HNLLhLVQhhPshToRuR...QLRp.pLSLVlIu+hLsppc........................................ 0 7 11 19 +14668 PF14817 HAUS5 HAUS augmin-like complex subunit 5 Eberhardt R re3 Jackhmmer:O94927 Family This family includes HAUS augmin-like complex subunit 5. The HAUS augmin-like complex contributes to mitotic spindle assembly, maintenance of chromosome integrity and completion of cytokinesis [1-2]. 25.00 25.00 25.30 25.30 24.50 24.50 hmmbuild -o /dev/null HMM SEED 632 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.22 0.70 -13.22 0.70 -5.98 8 103 2012-04-24 09:49:48 2012-04-24 10:49:48 1 6 59 0 65 103 0 479.50 25 90.83 NEW p.ucELtRWAsEEMslPsuu..........tPs-sth++LClGpGAcIWtYllpHV+SpRoV+sIRGNLLWYuttsss...................................clp.........R.........+hELppplpcLRAElpcLDpplpthEpEssup-tuhppuh.......pphp-sp+RpLLLcAastpsc+pp+sLp-shp+LpsQhcpLQ-hpR+Acs-...lsFGsss...SAu...................suLEPtVLRDVRcACshRspFL....QsLLpspsptsSshssp........cDphusSaQpWh.....SuVEslLss.....HPPsplLuALptLsucpcsplcp.LsosD........shp.Ds.....Ehp+........sphsD...................pScsppsLPuhppLlQEuWppVspllsppuplpp..cpQsLsp+LpuhlcEsccc.hlsS.sppssh..............hhpL..cpsslhsslc.uL+spsppLpptsuc+pctl.......RpLQsppQpIh-aRpLs-c+QEQIRhLIK.GNSuuKocLsRsstEltphlpcKLlPshpsVsspSpcLpcslpcEs+HFsplsLu..................................sLh+ppssGhphlPssh.SIHRL.pst..h.......suuhtsLscoLulPh.h+APEtLLspAsoh+p-LlaLpcphuLpupuLhs..l..+suL.Pus..sTQtLLphtppp-Kcph-sLsPpL++LhppscpsLEts.plQullsDWWEQPuQhALsp.pppGLolpQWppR ..............................................................................................................................................tWh.pEhth...............s....t.h....pplChG.p.hh.lWtalhp+Vhppcslphh+tNl..hat..t.t.................................................tht......c..........+..htt.tltpLcs.ltp.ppp.lp.h..pht..tp-.thpp.h.......tphtp.pp+thhLpu.ttthptttp.htp..pplp..hpphpch.t+.tt..t......h.............t...............................hhEs.h.tslRthCp.h.t.h....ptl..s..t.ts.................th.htthh......p.sp.hhts.....hsP..lLtulthhs.c.tt.lpp.hpths........hht-h.....ch.p........................h.s.........................usp....hss..pLlpcth....ttlt.hhsptt.h.p...ptp.hppcL.thhct......hptt..h.t....t.......p...t.tsh...............hhth..chsshhuplp.sLhsps..ppLpphstppp.h........ppLpt+.pcI.cacthhpchpp.hphLl+.uN.sutshhppt.hts.thht...tpllP..t.l...spp.hphlpcEhtth.t...s..................................sl.ptpstt.....Pshh.SIp..pl..psh............................................s.shh.l.ptLths....pusE..h.tshsht..p.hh.hhpp.....t..l.p..h.....tst.....t...stp.hlph.tpp-p...cphh.tl+ph...hppshp..h..lpshltcWa-QPuthsls...h..tG.sh.tW...................................... 0 21 36 49 +14669 PF14818 DUF4482 Domain of unknown function (DUF4482) Eberhardt R re3 Jackhmmer:Q6ZU52 Family This family is found in eukaryotes, and is approximately 140 amino acids in length. The family is found in association with Pfam:PF11365. 25.00 25.00 25.40 29.70 24.80 24.50 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.85 0.71 -3.53 13 155 2012-04-24 10:34:25 2012-04-24 11:34:25 1 3 38 0 83 137 1 130.10 34 10.45 NEW MDL+cQlcpoE+NWp+EKhELL-+FDsER+EWEsQhK.hQ+KIEcLp+EVchRRp.......uphhs.p+-pshpspshpusss.Ppsup....s-hpuhspcDshtpcpKcppslhupp....................s.hscp.phsccscsshhss.sh-.cK ...........................MDLppQlp.poE+NWs+EKlELL-RhDpERppWEpQhKE...Lpp+lc......Q............LpKtsssh......................sl+.t.hptp.p..c..st...h.pDt-shs.t-h.hsphKcscpsou..p....................s.hhcshpLsspst.s..t...h-.p...................................................................................................... 0 6 11 31 +14670 PF14819 QueF_N Nitrile reductase, 7-cyano-7-deazaguanine-reductase N-term Coggill P pcc CATH;3bp1_A_01 Family The QueF monomer is made up of two ferredoxin-like domains aligned together with their beta-sheets that have additional embellishments. This subunit is composed of a three-stranded beta-sheet and two alpha-helices. QueF reduces a nitrile bond to a primary amine. The two monomer units together create suitable substrate-binding pockets [1]. 25.60 25.60 25.60 25.60 25.50 25.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.42 0.72 -3.98 126 1208 2012-10-01 20:59:24 2012-04-24 16:18:52 1 5 1189 25 217 794 335 110.50 60 39.70 NEW Y.spYDPsLL.PlPRshsR-pLGlss..sLPF.p.GtDlWsuYELSWLNs+GKPpVAluchplPssSsNLIESKSFKLYLNSFNQT+Fso.hcpVppplpcDLSssAsusVsVpl ......................................YtcpYDsSLLQsVPRoLNR-sL....G.....Lpu....psLPF.p.GsDIWThY...EL.SWLN.u+GlPQVAlGclplshsSsNLIESKSFKLYLNSFNQTRFss.....h-p.....VcpTLp+DLSsCAtupVoVpL.............. 0 41 94 159 +14671 PF14820 SPRR2 Small proline-rich 2 Eberhardt R re3 Jackhmmer:Q96RM1 Family This family of small proteins is rich in proline, cysteine and glutamate. They contain a tandemly repeated nonamer, PKCPEPCPP [1]. They are components of the cornified envelope of keratinocytes [2]. 25.00 25.00 38.50 26.20 21.80 23.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.06 0.72 -10.64 0.72 -4.10 4 108 2012-04-25 10:24:47 2012-04-25 11:24:47 1 3 16 0 43 102 0 59.60 78 98.41 NEW SYQQQQCKQPCQPPPVCPsPKCPEPCPPPKCPEPCPPPhC.PEsCPP..CQcKCPPV...PPCQpKCPP ........SYQQQQCKQPCQPP.PVCPsPKCPEPCPPPKCPEPCPP.PKC....PEP.....CP....P.....ppC.....QQKhPPVp..PPCQ.KhPP.. 0 6 6 6 +14672 PF14821 Thr_synth_N Threonine synthase N terminus Eberhardt R re3 CATH:1kl7_A_01 Domain This domain is found at the N-terminus of many threonine synthase enzymes [1]. 25.00 25.00 25.00 26.60 24.40 24.60 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.61 0.72 -3.96 302 3034 2012-04-25 12:31:21 2012-04-25 13:31:21 1 15 2909 4 759 2286 1521 77.50 33 16.58 NEW cYlSTRG.p....s...sslsFp-slLp.GL.AsDGGLalPc..phPp.lsts.plpphpsL....oYt-LAhclhphFl....s-....Isps-L+sllscuY ..................a.STRs..p........s.......pploFupAllp.GL.Ap.D.GGLahPp..s.lPp.hshs...l.c...ph...tsh..........sa.-hAhclLssFl....s-.....lsp..-pLcphlppAa................................. 0 214 441 611 +14673 PF14822 Vasohibin Vasohibin Eberhardt R re3 Jackhmmer:Q86V25 Family This family of proteins function as angiogenesis inhibitors in animals [1-2]. 24.00 24.00 42.30 25.10 19.60 19.00 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.45 0.70 -5.29 10 165 2012-04-25 13:13:52 2012-04-25 14:13:52 1 5 72 0 95 174 0 192.10 48 64.09 NEW GGVPFalNRuGhPls-tTW............ERMWpHVAKhHP.DGcchsp+IRsAsc...LPKlPl................PoVPsF.....psot..oVs-tLcAlQpYl+cLQYNHTGTQFFEIKKsRPLoGLMElAKEMs+ESLPIKCLEAVILGIYLTNuhssLERFPISFKTpFSGsaFRHIVLGla.huGRaGALGlSRRcDLMYKPLpaRTLS-LlhDacsuYccpaHsLcKVKIGtsVsH-PHSsEpI-WKHsVLslp+.hu+EDlRKELE+auRDMR ...........................................s............h....t.hW............ppMa.ashp.ps.pstp.httl+sss....Lsc.sh.................PplPsa.....p.sh..slsphLpslQpYhppLp...YNaTG.sQFFplcK.RPLsuLM-hAKEMh+EuLPIKCLEAVILul......Y............LTsu.ssl-RFPISFKo.F.uG.....sh..........F+HlVLGlh..h...s...G+...Y..GuLGh.SRRt-LM.KPhsa+oLS-Llh-act.uYpphhHslpKVKl.G..VsH-PHShp.IpWKp.lLshp+.hhtt-hc+pLE+asR-hR................................................................ 0 28 39 58 +14674 PF14823 Sirohm_synth_C Sirohaem biosynthesis protein C-terminal Eberhardt R re3 CATH:1kyq_A_03 Domain This domain is the C-terminus of a multifunctional enzyme which catalyses the biosynthesis of sirohaem. Both of the catalytic activities of this enzyme (precorrin-2 dehydrogenase EC:1.3.1.76) and sirohydrochlorin ferrochelatase (EC:4.99.1.4) are located in the N-terminal domain of this enzyme, Pfam:PF13241 [1]. 24.00 24.00 24.00 24.00 23.30 23.90 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.26 0.72 -4.67 15 193 2012-04-25 14:50:59 2012-04-25 15:50:59 1 7 134 3 147 186 0 66.60 29 18.74 NEW PcNhGsAl-sVGpLRp+LRc...lAPss..c-GsKRMcWMoplCEtaoL--Lsphs-pDh.-sLLsaahssplsohp ..........................thttAl.plG.LRttlRt.........hsss.........ppts.+..R.McW.................hsplC-hWsLcclspl..s-pDh..csLLphY.psp....p................ 0 37 78 123 +14675 PF14824 Sirohm_synth_M Sirohaem biosynthesis protein central Eberhardt R re3 CATH:1kyq_A_02 Domain This is the central domain of a multifunctional enzyme which catalyses the biosynthesis of sirohaem. Both of the catalytic activities of this enzyme (precorrin-2 dehydrogenase EC:1.3.1.76) and sirohydrochlorin ferrochelatase (EC:4.99.1.4) are located in the N-terminal domain of this enzyme, Pfam:PF13241 [1]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 30 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.97 0.72 -7.03 0.72 -4.96 20 356 2012-04-25 15:03:26 2012-04-25 16:03:26 1 10 344 3 153 298 5 29.90 43 11.74 NEW cs.usLQIMISTNGsuPRLuuLl+ccIcptl .........G.LpIhVSTsGtSP+LAphIRccItsph....... 0 50 98 134 +14676 PF14825 DUF4483 Domain of unknown function (DUF4483) Eberhardt R re3 Jackhmmer:Q6ZQR2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 326 amino acids in length. There is a single completely conserved residue N that may be functionally important. 24.00 24.00 24.70 24.40 21.90 22.80 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.35 0.71 -4.65 27 116 2012-04-26 08:48:36 2012-04-26 09:48:36 1 10 64 0 94 109 0 165.70 23 57.40 NEW shpsohtpNsLL.hKsclG+sppssasLPstc...asYGhhstt.pstGstEsh.pW.........ptptss.sppssp.........DFhphN+tAlppG.lsTA+.......-.htaRpp+s...l+hKs.t.....stptt.........ptssphssshs...aGhso+sso............PltpllptpYtppahpppttpptthppppp.........ttp.....lttT+AShh......++ht..spspp......aKMs+F.cpVss+lpo ............................................psclG+s+.psh..sLPs.s...asYGh..tt..stGs.-.........sh.pW.............................p.ht.p.st..th..........sahthN+tulptG.hhoup.................-.h.aRp.ps...l+hp.tp......tt.t...........................p.s.thssshs...aGh.s+sso..................shhpllptpYtp.ahppttt.t.ttht.pptpt.........................hhps+us.h..........................ppht...p.pt......a+h.+F.pph..th............................................. 0 65 74 81 +14677 PF14826 FACT-Spt16_Nlob FACT complex subunit SPT16 N-terminal lobe domain Coggill P pcc CATH:3biq_A_01 Domain The FACT or facilitator of chromatin transcription complex binds to and alters the properties of nucleosomes. This family represents the N-terminal lobe of the NTD, or N-terminal domain, and acts as a protein-protein interaction domain presumably with partners outside of the FACT complex [1]. Knockout of the whole NTD domain, 1-450 residues in UniProt:P32558, in yeast serves to tender the cells sensitive to DNA replication stress but is not lethal. The C-terminal half of NTD is structurally similar to aminopeptidases, and the most highly conserved surface residues line a cleft equivalent to the aminopeptidase substrate-binding site, family peptidase_M24, Pfam:PF00557 [1]. 27.00 27.00 27.50 35.30 23.20 26.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.01 0.71 -4.71 75 332 2012-04-26 15:17:54 2012-04-26 16:17:54 1 12 266 8 243 336 0 156.80 32 16.08 NEW IDtssFtcRLptLaspWcpsppt.....hausssulllssGt.ss-..-stYpKosALphWLLGY.EFPcTlhlFs..pc..plhhlsSpKKAcaLcslp..p...........s..ssh.slclhsR..stpD...tpspp.Fpcll-tlc...p....sG.pp.lGslsK-..shpGpFh-pWpphhp....ppsh.cpVDlosul ................................lDtpsFhcRlpplaspWcpspp..........aus....ssul.llhsGt..sc-..-..YtKos........ALp..hWLhGY..EhscTlhlFs.....pc..plhhlsSpKKs..............caL....cplp..p.............sut.slpl..l..lRs.p-......pscp.Fccllctl+...p........sGpp.lGshsKD.....p.pGpFhcpWpchhp....ppsh..cplDloss............................................. 0 81 133 202 +14678 PF14827 Cache_3 Sensory domain of two-component sensor kinase Coggill P pcc CATH:3by8_A_00, Pfam-B_120 (release 26.0) Domain Cache_3 is the periplasmic sensor domains of sensor histidine kinase of E. coli DcuS. This domain forms one of the components of the two-component signalling system that allows bacteria to adapt to changing environments. The ability of bacteria to monitor and adapt to their environment is crucial to their survival, and two-component signal transduction systems mediate most of these adaptive responses. One component is a histidine kinase sensor - this domain - most commonly part of a homodimeric transmembrane sensor protein, and the second component is a cytoplasmic response regulator. The two components interact in tandem through a phospho-transfer cascade [1]. 25.50 25.50 25.50 25.50 25.40 25.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.50 0.71 -10.42 0.71 -4.19 258 2633 2012-10-01 23:40:40 2012-04-26 17:59:26 1 59 1299 16 414 1784 44 115.40 31 21.38 NEW hslAcslApsPtlh..pu..lp..s.ssss...............................Qshscplppp.ssssFlVlhDhpGlRhoHP.sspp...IGcphh.....sD....ptAL.....pG.csasu.p.....spGoL..G.SlRuhsPlh...s..ss....G.cl.lGhVuVGhhhssl .......................................shAchlAsssplh..pu..lp....p..ps.p...............tl........pshsp.plpc.p...ss..hsalV.lsDt.pul.Rh.sHP....ss..p+....IGpshp...s..sD........tAL....pG....c...p...hs.u..h.............................scG..ol.....G...ul.Rshs..PIh.........s...ps..............G...cl.IGVVulGh.lsp........................................ 0 115 250 340 +14679 PF14828 Amnionless Amnionless Eberhardt R re3 Jackhmmer:Q9BXJ7 Family The amnionless protein forms a complex with cubilin. This complex is necessary for vitamin B12 uptake [1]. 26.90 26.90 27.00 26.90 26.00 26.80 hmmbuild -o /dev/null HMM SEED 437 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.93 0.70 -12.78 0.70 -5.99 16 114 2012-04-27 08:29:45 2012-04-27 09:29:45 1 8 76 0 77 100 0 313.20 24 83.41 NEW hKhWsssssFssAuNWss.sphPCupDhl.FPuphsssl.l.psh.olsshlLPpsGtllLApsu..hhhuusss.sssCts...ttpAhF+sPcpppWaDPss..Wpstsspsuh.........hss-hERVPCpp..........-cVlF..up.ushplsLp.ssphl+ltplphuGpoho+sp.LppaLuochGphhFH.supslpVp.h+....CspsptCsC...............spschh-hlCusl......pCst..spChsPlRP.GpCC.lCGAllplsps...sshDhcphps+lpcthhppshhp.plphtVuhVsppp.........hssplQllllD..pGs.otpuschhtp.hhtchpsphtshtt.shplptuGpPhssssuhs.............lsshlLhsLlLVullusllLh+h...........p.shls+lspWh+c.chhth.t....thspsFh.spFDsss...........ttsssltpls..............uhcutsspspc.psFsNPhF-p ................................................................+.a...shphpsstsW.p.tthPp.ts.l.Fst......h.l.l.....tsh..tltthhlP.hpGthllsptu...h.h.......s.s..tt....t...........t..h.h.....st....p....WhsPt.Wtstst...........................h..c.EplPCp...........-pVhh...sp.......sshtl..tlt...t.lpltpl.hh..s..pp..hstt..htth.hsp.p.....Gp...Fh...s...ttl.......lt..t.........phC.C...............s...t..........lCt.h.........pC.....stC.tslhP..GpCC.lC..Gu.hl.hpht....t.shpphpttl.phh.......t...tlththphh.p......................lp..hllhc.......tt..st.tht.ht.....tp.tt.h............hph..stt........h..................h..hh...hh.hhhhh.......hh.h........................................................................................................................................................................................................ 0 29 34 59 +14680 PF14829 GPAT_N Glycerol-3-phosphate acyltransferase N-terminal Coggill P pcc CATH:1iuq_A_01 Domain GPAT_N is the N-terminal domain of glycerol-3-phosphate acyltransferases, and it forms a four-helix bundle [1]. Glycerol-3-phosphate (1)-acyltransferase(G3PAT) catalyses the incorporation of an acyl group from either acyl-acyl carrier proteins or acyl-CoAs into the sn-1 position of glycerol 3-phosphate to yield 1-acylglycerol-3-phosphate. G3PATs can either be selective, preferentially using the unsaturated fatty acid, oleate (C18:1), as the acyl donor, or non-selective, using either oleate or the saturated fatty acid, palmitate (C16:0), at comparable rates. The differential substrate-specificity for saturated versus unsaturated fatty acids seen within this enzyme family has been implicated in the sensitivity of plants to chilling temperatures [2]. The exact function of this domain is not known. it lies upstream of family Acyltransferase, Pfam:PF01553. 25.00 25.00 26.00 48.40 21.00 19.90 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -9.34 0.72 -4.29 19 48 2012-04-27 10:01:24 2012-04-27 11:01:24 1 2 38 2 14 50 0 73.80 53 17.95 NEW +S+oFLcscsEQ-LlSsI++ElEuG+LPsslAsGMEELYpNYKsAVlpSGsPpAcEIlLSNM.sshhDRlhLDVc-PF ........SRsFLcsRsEp-LLusI+KEsEuG+LPssVAsGMEELYtNYKsAVlpSGsPpAcEIlLSNM.sshhDRlhLDVc-PF...................... 0 3 9 12 +14681 PF14830 Haemocyan_bet_s Haemocyanin beta-sandwich Eberhardt R re3 CATH:1js8_B_02 Family This antiparallel beta sandwich domain occurs in mollusc haemocyanins. Each mollusc haemocyanin contains several globular oxygen binding functional units. Each unit consists of an alpha-helical copper binding domain (Pfam:PF00264) and an antiparallel beta sandwich domain [1-2]. 24.00 24.00 24.60 24.00 23.10 22.50 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.09 0.72 -4.32 27 179 2012-04-27 10:19:59 2012-04-27 11:19:59 1 15 36 7 0 211 0 99.90 40 24.82 NEW R+p+DRVFAGFLLcGltsSAsVca.lCps..s....scC.pc..AGhFhlLGG-tEMPWuFDRLaKYDITcsLcphslc......hc-sFplclplsusNGotLsucll.sPollapP .........ppc-RlFAuFLLpGItsSAsVph.lCts....s......spC..phAGpFhlLGGp.pEMPWsFDRlaKaDITcsLcclslp......hcssF......plclclhulsGotL.s.ssllPpPolla.P........................................... 0 0 0 0 +14682 PF14831 DUF4484 Domain of unknown function (DUF4484) Coggill P pcc KOGs (KOG4704), PF09804 Domain This domain is found, in a few members, a the the C-terminus of family Avl9, Pfam:PF09794. The function is not known. 25.60 25.60 27.00 26.20 18.40 25.40 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.43 0.71 -4.17 13 120 2012-04-27 10:43:25 2012-04-27 11:43:25 1 6 101 0 85 117 0 151.50 27 30.65 NEW lEPlSWscLAYouFIWWASAGEpc.....cppE..ph-pDopLLAss-sssoP...................................sp.s...........pt.sElulVAYFHRLTsplFssLuDlIscpDsc.....ss.-s-......s........................-s.spD.................tD-s.uPL...................psp.slclTspDhs-MGLDsWSsuDh.FVcElVtpaWGR+AhV-uscIcsCGluIs ...............................................lEPlSWsthhasuFhWWASAGpph........t..p.tpDttlh.t............t.....................................................................................tpsph..tlsllsYF+RLToplhshltDlltppppp....p....t..................................................t.......................................................................ptt.lplsspshtpMsLDsaSttDt.FVc-ls.haa.tRp..h.lpshtlthsh.............. 0 13 43 71 +14683 PF14832 Tautomerase_3 Putative oxalocrotonate tautomerase enzyme Coggill P pcc CATH:3c6v_A_00 Domain 4-oxalocrotonate tautomerase enzyme is involved in the anthranilate synthase pathway.1 25.70 25.70 25.70 25.70 25.60 25.30 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.69 0.71 -4.16 26 117 2012-10-01 20:38:22 2012-04-27 13:13:24 1 3 81 22 87 147 121 130.80 28 84.02 NEW MPLWpIaHPsssFo.s.p.-KpsLApsITchY........ssh.GLPsFYVsVhFhclsss...shalGGcspss.................FlRIslsH.IARp.........hssc..-p..ppchhp.tlstsLcPal...tD+.GhcWEaHl-EsspcLW+IsGlhPPsssStsE+cWsp-N+ ............MPhaplaHs..ssho.s.ppKpplApsIT.p.hY........ssh..sl.PtFYVsVhFh..clsss....shalGGctpsp..................hlpl.hlpH..lu..Rp..........hpst..-t..ppphhp.tlsthl........t.shh........tpc...uhchp........hhls-sshphhh.sGhh.P..tptt...Wht.s................................................................................. 0 13 44 69 +14684 PF14833 NAD_binding_11 NAD-binding of NADP-dependent 3-hydroxyisobutyrate dehydrogenase Coggill P pcc CATH:2cvz_A_02 Domain 3-Hydroxyisobutyrate is a central metabolite in the valine catabolic pathway, and is reversibly oxidized to methylmalonate semi-aldehyde by a specific dehydrogenase belonging to the 3-hydroxyacid dehydrogenase family. The reaction is NADP-dependent and this region of the enzyme binds NAD. The NAD-binding domain of 6-phosphogluconate dehydrogenase adopts a Rossmann fold [1]. 30.00 30.00 30.30 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.37 0.71 -4.05 132 7339 2012-10-10 17:06:42 2012-04-27 14:05:12 1 47 2857 51 2191 5638 4290 121.10 28 40.35 NEW GuGpssKhsNshlsAsslhAsuEAlshut+.sGlDspthhcll.su.usupS...ts....h....p......sh......hsphll....s....psF.ssG.FslsLhhKDlslAhsh..ucphsssh.Plsuh.sppla.p.s.u.t.s.p.hs..s.s.t.Daoul.l.+h.l ..................................GsGpssK..lsNphllusphtuhuEA.........hsLA.p+..sGl.c..pt...lh.cs.l........ss.....u...s...u.....s.S......hh.........h.....c..........sp............hst.h....hh...........s..........ps.a....s..P......u....F...t...l.....c...lhhK...D...........L.....slAlcs............Ac....p...hs...h..s.l..P..l.s.ut...s....p.pha.p.p.h...t...s...t...Gh...upt...DhSulhph........................................... 0 558 1233 1760 +14685 PF14834 GST_C_4 Glutathione S-transferase, C-terminal domain Coggill P pcc CATRH:3bby_A_02 Domain GST conjugates reduced glutathione to a variety of targets including S-crystallin from squid, the eukaryotic elongation factor 1-gamma, the HSP26 family of stress-related proteins and auxin-regulated proteins in plants. Stringent starvation proteins in E. coli are also included in the alignment but are not known to have GST activity. The glutathione molecule binds in a cleft between N and C-terminal domains. The catalytically important residues are proposed to reside in the N-terminal domain [1]. 30.00 30.00 30.10 30.00 29.80 29.90 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.50 0.71 -4.16 7 561 2012-10-03 01:14:49 2012-04-27 14:19:54 1 4 556 1 52 223 10 116.50 75 54.87 NEW DlQKRARARQlQAWLRSDLMPIRtERSTDVVFAGsKhuPLSpsGptSAcKLhAsApsLLuHGp.NLFGEWCIADTDLALMlNRLlLpGDcVPEtLsDYAoFQWQRASVQRalALSAK ...DLcpRARARQIQAWLRSDLMPIREERP....TDV..VFAGAKK.APL....os....cG.KASA...EK...L.FA...h...AE...+.L...L...u.h.GpsN...LFGEWCIADTDLALMINRLVLHGD-VPEpLsDYATFQWQRASVQRFIALSuK................. 0 5 15 38 +14686 PF14835 zf-RING_6 zf-RING of BARD1-type protein Coggill P pcc CATH:1jm7_B_00 Domain The RING domain of the breast and ovarian cancer tumour-suppressor BRCA1 interacts with multiple cognate proteins, including the RING protein BARD1. Proper function of the BRCA1 RING domain is critical, as evidenced by the many cancer-predisposing mutations found within this domain. A dimer is formed between the RING domains of BRCA1 and BARD1. The BRCA1-BARD1 structure provides a model for its ubiquitin ligase activity, illustrates how the BRCA1 RING domain can be involved in associations with multiple protein partners and provides a framework for understanding cancer-causing mutations at the molecular level [1]. The corresponding BRCA1-RING domain is on family zf-C3HC4_2, Pfam:PF13923. 35.00 35.00 35.00 35.30 34.80 34.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.83 0.72 -9.37 0.72 -4.19 5 67 2012-10-03 15:03:13 2012-04-27 14:43:25 1 22 45 1 38 63 2 62.10 57 9.95 NEW LDRLEKLLRCSRCTNILREPVCLGGCEHIFCSNCVSDCIGSGCPVCYTPAWIQDLKINRQLDSMI ....LtclEpLLRCS+CssI.LREPVCLGGCEHlFCSsCl..uD....ClG..o..s..CPVCaTPAWlQDlKINRQLDuMI............ 0 5 8 18 +14687 PF14836 Ubiquitin_3 Ubiquitin-like domain Eberhardt R re3 Jackhmmer:Q6K1E7 Domain This ubiquitin-like domain is found in several ubiquitin carboxyl-terminal hydrolases [1] and in gametogenetin-binding protein. 24.00 24.00 24.50 25.10 23.70 23.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -9.98 0.72 -4.32 6 283 2012-10-03 10:59:06 2012-04-27 15:38:54 1 13 52 9 133 242 1 86.40 46 9.70 NEW uhKLClPGhssLpSPlpKtFRSoDTVGFlEuELKKLLul..Q+EoRLWKhsus-GhELLspP-lTlpEAGlh-..............GQpLLLEEhsEMGNWPP ......................-.hpLC.sss..hsssloppFS+sDTIs.sIE+EhRclFsIssEcE.T..RL..WsKh.hsN.oa..E.LscsDsTlpDAu...Lhp................................................G.Q.lL....VIE.+NcDGTWP......... 0 23 31 57 +14688 PF14837 INTS5_N Integrator complex subunit 5 N-terminus Eberhardt R re3 Jackhmmer:Q6P9B9 Family This family of proteins represents the N-terminus of subunit 5 of the integrator complex involved in snRNA transcription and processing [1]. 27.00 27.00 27.20 27.50 25.00 26.20 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.38 0.70 -11.42 0.70 -4.92 17 78 2012-04-30 08:33:14 2012-04-30 09:33:14 1 5 69 0 57 82 0 214.30 46 23.26 NEW tpslls-LptFlpsss...tp..shp......pLs+sALpLLcslPuuRsAVh-ahshlFccuVppahsshE...............t.ptstssss.-sslpElpssLpp.hlpssPpAWuslIssWulcLlGclus+hstRts.h.t...sls-hlphWhu.CpAh+pLlslhspClspllspss-sClssLLsouspHoPpFDWVVAHluosFPtsllo+lLssGlccFsst ....s.QpL.pElKsFlsulsshhupp.ssc......-hs+suLhLL+sLPsARsAVL-ahpsVF-EuVptalssl-.t..t................sscsssssssl--llpEVpplLpc.FlcsNP+AWAPlISsWS..l..........-LhGpLSSpYosR+t.hPpus.ulNElLQLWMu.CpATRoLM-lhspCLusLlsussDACV-ALLDTSVpHSPHFDWVVAHIGSsFPsTIIoRlLuCGLKDFss.p. 0 14 18 40 +14689 PF14838 INTS5_C Integrator complex subunit 5 C-terminus Eberhardt R re3 Jackhmmer:Q6P9B9 Family This family of proteins represents the C-terminus of subunit 5 of the integrator complex involved in snRNA transcription and processing [1]. 27.00 27.00 28.70 27.30 23.70 23.20 hmmbuild -o /dev/null HMM SEED 696 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.32 0.70 -13.19 0.70 -6.24 6 104 2012-04-30 08:33:54 2012-04-30 09:33:54 1 5 74 0 78 100 0 471.50 28 62.76 NEW KlsSVVGILGHLAupHucsIRKtLLcMFptSLhst........ssshsus.pl++...ATVPFLLQLAuhSsTLLuAlSs-llchL+Pssl..Lsphtscapua.+p-h-shhsLsVHLl.psssGGA.+llphLlDTuossSslhstsssus.psl+EsCccLLQhLLh+LccLV+s+.............s..spsIPFL-ulpscluplssphLplccpRp.htpQLLsLLu.assPSlss-uhsaLLspApssEcLALhhpLhTp.ss.............shuGllshslcpsLupI......aspsls.p-htQLhpNLuphlp..aEcssphss....sh.uphlupAluuNLpshosLL.hps-sslucAhspLLuhhslPp.....sLSss.lLpLoRAsVpaFFhCLpppupstKs.........shpssspLLsRLsuh...SssupthsLppLlEsALa+usscLFGu..cpspcspppuhl........LL-pN++hsTolshstph.SVFHAGVIG+GhK.hhsspphs.-hVspNsppLlslIhpCCSssusss..............sp.ssIshEAsphVulhLVE.VsPDVhhstLsWPsEEauKsTlERDl+IRRpFcctPlLapLLclVAssRPALCYCSVLLRuLhATLlupWcoopc......ossS.hhLpsoshLVslMu.GQLLPPsLusl+-lhPpLsPFEVplLLhs.VWsYMR-NsP.PthFshsucpGhaaRDhoh-u..ssupYsssl+.VLp+NIcpL ......................................................................................................................................................................................................................................................................................................................................................................hh....u..ps..t.s.h.plht.......................................tp..h.p..................h...........hthh.Nl.hh.t...p.......................ht.th......l...h............hh........th.p.hspllt...hs........t....hL.ls...lphFF..hslp.ps..t...................tlL.c.sth...p..shhhsLp.llcsul...tt.thhGt....t......s.........................Ll..N.+......hstt..SVh.....HuGlI..G+......Gh+..............t...s.p.....p.t.hlphl.tCCts...t..t............................s..c..uhphlulhLV.E.l...ssD........s...h......stLsaP...sE-a...s+.s..ThERDltIt+tF.ppPlLatlLtllAtt.PA....LsYsSslLRuLhAshltpWcu...p..p...p......sst..ps....hh.soppLltlh.....u.upLlPPsLsphp.lh..htshElthlLhp.lWsah+...-psP..PthFh....stpu.hhhRshs.......tt......s..ahssl+.lhppplpp........................................... 0 26 33 57 +14690 PF14839 DOR DOR family Eberhardt R re3 Jackhmmer:Q96A56 Family This family of proteins regulate autophagy and gene transcription [1]. 25.00 25.00 25.90 29.60 20.70 24.60 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -12.04 0.70 -4.59 12 132 2012-04-30 10:33:17 2012-04-30 11:33:17 1 3 62 0 75 135 0 175.90 37 79.55 NEW ps-c--DtWllV-h.s....................................................................shsttssstpsP.....stsuhPss..........sh...................MEESWFVTPPPCFTAtG...ssstlEoSPhEsLLIEHPSMSVYsspssp.tsst..................cs...stsss.tsptppp...h.cps.spsAth.ss.puthLEpspphRtsQpspp+tE+ptLs+pslpRQNhsR-ppsRph+pp..ushlHQPs.Rp ...............................................p-.-.DtWhll-h.s................................................................................................................................h...s.sttss..........s.tshsss................ph........................M-ESWFlTPPsCFTAtG...ssh+lpoSPhEsLLIEHPSMSVYsstsohst.st..................................tsth....spt...c.p.psp.....hptp.t.......tthp.phsAsh.su.psshLcpspph....R....Qhh+p+sE+ptLs+pslpRQNhsR-pps+ps+pp..u.hhaQPs.p................................................................................... 0 11 18 37 +14691 PF14840 DNA_pol3_delt_C Processivity clamp loader gamma complex DNA pol III C-term Coggill P pcc CATH:1jr3_D_02 Family This domain lies at the C-terminus of the delta subunit of the DNA polymerase III clamp loader gamma complex. Within the complex the several C-terminal domains, of gamma, delta and delta' form a helical scaffold, on which the rest of he subunits are hung. The gamma complex, an AAA+ ATPase, is the bacterial homologue of the eukaryotic replication factor C that loads the sliding clamp (beta, homologous to PCNA) onto DNA. 25.70 25.70 25.70 25.70 25.60 25.60 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.08 0.71 -10.72 0.71 -4.01 3 1110 2012-04-30 11:55:48 2012-04-30 12:55:48 1 2 1105 10 199 712 93 123.50 45 36.20 NEW PFHWVDALLuGKSKRALHILQQLRLEGCEPVILLRTLQRELLLLVsLKRQSAHTPLRoLFDKHRVWQNRRsLLo-ALsRLStTQLRQAVTLLTRsELTLKQDYGQsVWAELEoLSLLLCHKALAD .............PF+WlDALLhGK..upRAh+ILpQL+hEG.sE...PVIL.LRTLQREL..hlL.ls...L......+....+.........p.....p..u..........p.....s..........P.........L.c..s.L..F.D.+.a..R..VW.Q.NR.R.s.hhspA..LpRLo..s...pLp...pAlp...lLs+hE...lplK....p.sa.upslWs.pLcsLSLhhCt......................................... 0 36 94 149 +14692 PF14841 FliG_M FliG middle domain Eberhardt R re3 CATH:1lkv_X_01 Family This is the middle domain of the flagellar rotor protein FliG [1-2]. 25.00 25.00 25.00 25.00 24.90 24.80 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.99 0.72 -9.33 0.72 -3.97 335 2392 2012-10-02 13:19:07 2012-04-30 15:19:29 1 7 2070 8 564 1518 436 79.20 35 23.40 NEW -spplsshlpsEHPQslAllLuaL.csspAAplLutLPp.....c.hps-lhhRlAphcsl....sPcslcclccsLccclsuhsspptsp ......-PpplAslIpsEHPQhIAlILuaL.csspAAclLuhhs-.....c.h+t-VhhRIAshssl....pPssl.pElsclLpppLsu.stpp..p................................... 0 185 361 455 +14693 PF14842 FliG_N FliG N-terminal domain Eberhardt R re3 Jackhmmer:O66891 Family This is the N-terminal domain of the flagellar rotor protein FliG [1]. 25.00 25.00 25.80 25.00 24.80 24.90 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.03 0.72 -3.94 137 2340 2012-10-02 13:19:07 2012-04-30 16:08:09 1 5 2032 4 556 1742 439 106.20 30 31.40 NEW ttppLoGtpKAAILLluLGc-tuuplh+c.LspcElpplotphuplpplsppphcsVlcEFhp..hhhupshlshGuhcas+plLpcuLGs-+Appllpclt.tshpspsh- ...........h..ppLoGh-KuAILLh.olGE-tAAcV.h.+H.LspcE.....lpplottMApl....pplsp...c...p...l...p...s...V...Lp.E....Fhp.....h.p...p...p..ss...ls......hs..ut..-Yh..+...plLh..+ALG..p-+Assll-clh.tstp.....p..................................... 0 183 356 449 +14694 PF14843 GF_recep_IV Growth factor receptor domain IV Eberhardt R re3 CATH:1m6b_A_04 Domain This is the fourth extracellular domain of receptor tyrosine protein kinases. Interaction between this domain and the furin-like domain (Pfam:PF00757) regulates the binding of ligands to the receptor L domains (Pfam:PF01030) [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.16 0.71 -12.29 0.71 -4.54 30 1237 2012-10-02 14:20:19 2012-05-01 10:27:03 1 65 119 40 458 1185 9 117.10 34 13.30 NEW lCsshCossGCWGPGPsQCLSC+sasRs..........spCVpsCshhpG.sREats.sppChpCHsEC...ttt...tTCsGPGsDpCscCsHap.......D..GsaCVpcCPpGl..utpt..laKYuDtsshCc.CH.NC.opGCsGPs.psCh ...............................Ccs.Cs....s.u..CaGsssspC.l.sC...p....p.....a..p...h..t..........................ppCl..spCs..h...........s...tt...h......tt....pppChtCHPcC.............poC.sGs....Gu....D.pC...........h..........p.C..t....p....hp...............................-....s.pCVsp.CPps.......................................pY..s..s........ps.hCp.Cp..sC.....t..s..Ctt.......s................................................ 0 183 209 322 +14695 PF14844 PH_BEACH PH domain associated with Beige/BEACH Eberhardt R re3 CATH:1t77_A_01 Domain This PH domain is found in proteins containing the Beige/BEACH domain (Pfam:PF02138), it immediately precedes the Beige/BEACH domain [1]. 27.50 27.50 27.70 27.60 27.40 27.40 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.17 0.72 -4.20 47 917 2012-10-04 00:02:25 2012-05-01 14:40:39 1 50 255 6 593 862 7 103.80 25 4.48 NEW pllhohpsphlsPhsshpGplplossclhFhss..........................ttppthhs.stshphcW.hsplcpla.hRRYhL+coALElFhsDposhhhsF..sppsppclhpplh .....................................................................................................s.hltsh..h..h.GhL................ls..pp..p..hY.h.ss........................................................................t.t.t......hst.s..pthstp.Whhscl+pla.pRRaLLp.ssALEl.Fh....s..............s......t.....p.....s...h....h.l..sF..spps+pclhptl.h.................. 0 218 303 448 +14696 PF14845 Glycohydro_20b2 beta-acetyl hexosaminidase like Coggill P pcc PF02838 Domain \N 27.00 27.00 27.00 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -11.26 0.71 -3.48 183 633 2012-10-01 20:56:08 2012-05-01 14:42:21 1 14 263 35 389 763 42 125.00 26 22.32 NEW lW..PtPpt..hph......uspshtlsstshpht.tss...ttt......................................................plLppAhp.Rah...................phlh...............................................s.t.tshh.........................................................................hpt.............................................................................tl...ppl.................p...........lpl...ps.t.t.........l....phssDES............Y.sLpl.................phs.......spIpApohaGAh+G.LEThoQL ...........................................................................lWPhPpp..hph......usthhh.l.s...thphp.tsss..........................................................sppllppAhp.Rhh..............phlh..............................................................t..t.hhshh.........................................................h..tstt.............................................................................................tl....ppl.................p..lpl........ss.s..ts...........h....phssDES............YsLsl.......................sps..t...tsplpAposaGAl+G.LEThSQL.......................................... 0 139 213 310 +14697 PF14846 DUF4485 Domain of unknown function (DUF4485) Eberhardt R re3 Jackhmmer:Q8N8E3 Family This family is found in eukaryotes, and is approximately 90 amino acids in length. 24.00 24.00 26.40 24.50 23.90 22.90 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -9.81 0.72 -4.27 27 127 2012-05-01 14:06:49 2012-05-01 15:06:49 1 7 65 0 79 139 0 82.30 36 12.52 NEW pLDppFphhlthlcshltpLs.ct.-+hhsstWlpKLpps.....psht.+cpRNtYhthLlttlp.psplp.sPFsc.PPsGsL.slsphh .......................................................LDtEFcphlhphcshlhpLspcp.-+ppsshWlcKLspss...tsshht++sRNhYuclLLchLp.cGhL-.uPFscpP.sG.sLtsLspa............ 0 37 42 59 +14698 PF14847 Ras_bdg_2 Ras-binding domain of Byr2 Coggill P pcc CATH:1k8r_B_00, Pfam-B_3317 (release 26.0) Domain This domain is the binding/interacting region of several protein kinases, such as the Schizosaccharomyces pombe Byr2. Byr2 is a Ser/Thr-specific protein kinase acting as mediator of signals for sexual differentiation in S. pombe by initiating a MAPK module, which is a highly conserved element in eukaryotes. Byr2 is activated by interacting with Ras, which then translocates the molecule to the plasma membrane. Ras proteins are key elements in intracellular signaling and are involved in a variety of vital processes such as DNA transcription, growth control, and differentiation. They function like molecular switches cycling between GTP-bound 'on' and GDP-bound 'off' states [1]. 27.00 27.00 28.90 27.40 26.60 25.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.31 0.72 -4.11 35 148 2012-10-03 10:59:06 2012-05-01 16:35:46 1 5 140 2 96 150 0 108.10 33 12.03 NEW pslphIhpsGpo+tVNlssChsupplhc+sLKKhulpp............p.psashalhss......................t.ssssh+hLsDsELlsICpu..ssRsE+pRLIL...sppsc..Pstctlpputplhh .......llplIhssGsTKsVNlpsCpsu--lhcpsL+Khshpp................pp.psYshaVLsu......................pssssss+hLsDsELhpICcu..scRsERsRLIL.Rplpts-...Pstcplpputtlh.h................ 0 24 53 83 +14699 PF14848 HU-DNA_bdg DNA-binding domain Coggill P pcc JCSG_Target_393235 / GS13689A, Pfam-B_2593 (release 26.0) Domain \N 30.00 30.00 30.00 30.10 29.90 29.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.02 0.71 -4.40 21 175 2012-10-02 15:10:05 2012-05-01 17:10:52 1 2 71 0 39 179 1 122.40 23 58.18 NEW hLKhhLh-NhlT.-spsDahu.pltsssolslc-Il-chhKc.sosl+pETltpslplhpchls-hlhsGhuVNTGlhpssssl+GVaps.sspass....p+poltlshssuspL.RpslucspVphhs.t ........................phhLhcNh.hs....-stsch.hu.ps..pt.ssslslc-lsccltpc.sosl.ppt..slhsllshhpcpht-hlhsGhsV...p...h...G...lhphslpl..p..G.s.h.s......sssa..ss....php..pl...tVsassupcL.+ctltphth.....h....................... 0 16 36 39 +14700 PF14849 YidC_periplas YidC periplasmic domain Eberhardt R re3 Jackhmmer:P25714 Domain This is the periplasmic domain of YidC, a bacterial membrane protein which is required for the insertion and assembly of inner membrane proteins [1,2]. 24.00 24.00 24.10 24.00 23.90 23.40 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.91 0.70 -4.50 388 2338 2012-10-02 23:57:29 2012-05-02 08:46:08 1 6 2296 4 565 1722 2667 274.10 26 49.21 NEW plslcTchlp.hsI.s....spGGclschpL....t..pY..tsh...........ssps........s...hpL....lsssst........ahuphGhh..s.........s.t.ss......ps...t.apsss...s.....ht.hp..........sphs..ls.Lshsss.s.G.l.....php+sashc.s...s........Yhlslphplp.N.pustslss..p.as..plpps............tt.........t....h.h.....sa.pGssh.............p............h..pchsa.....s-h.p.........................hp....tt.st......Wluhhp+YFsoAh.......l....P.p.........ss...shhsp.hh........................hhth..sh...................h..s..shslssG.sstsh..ssplasGPK...........chchL....................p.......t.........................l................s..........pL-hslD......aG.Wh........halu+PlFhl .......................................................................................................................lslcT-slc.lsI.sspG.GclpphhL....h..pY...pph........................sssp.....................s.......hpL...lpsssth......................Y.ApsG.Lssts........s.ss........spsh.asspt.......s.sh...l..ptp.................................................sphp....Vs.hoasss..s....G..h........phpKsashccs.....s.....................YhlsVshplp.......N..s....u.......spslph.....ssas...plpps.......s.t...................tss....hsh....oa.pGssh.ssp...........pphpKhpa....cclpc..........................................tp.phssp..sG..........Wl.Ahhp+YFsoAa.......I....P.p.......................ss..p.ssh.hsp.phs........s.....................hhth..uh...........................p...s.....shsltsG...psssh..supLasGPc............pctl....................t......s...........l....ss..........pL-.h.s.VD....Y..G..Wh........hFIupPlFhl..................................................................................... 0 177 356 464 +14701 PF14850 Pro_dh-DNA_bdg DNA-binding domain of Proline dehydrogenase Coggill P pcc CATH:1k87_A_02 Domain This domain lies at the N-terminus of bifunctional proline-dehydrogenases and is found to bind DNA. 27.00 27.00 30.40 29.10 22.00 20.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.16 0.71 -3.89 114 1463 2012-05-02 13:07:34 2012-05-02 14:07:34 1 7 1420 13 300 1184 316 113.30 63 9.56 NEW uhl-uhLpEYuLSocEGlALMCLAEALLRlPDstTtDtLIcDKlusucWppHlG.pS.sShhVNAuTWGLhlTG+llsss.....c..pt...shsusLppllpRhGEPlIRpAlppAM+lMGcQF ...hVpuLLpEFSLSSQEGVALMCLAEALLRIPDpATR...DALIRDKISsG..sWpSHlG...pS...sSLFV.NAATWGLlhTGKLVusp........s.....-ssLSpuL..sRlIuKuGEPlIRKGVchAMRlMGcQF............. 0 58 149 224 +14702 PF14851 FAM176 FAM176 family Eberhardt R re3 Jackhmmer:Q9H8M9 Family Members of the FAM176 family regulate autophagy and apoptosis [1-2]. 25.00 25.00 25.00 25.30 24.90 24.90 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.39 0.71 -11.01 0.71 -4.53 13 190 2012-05-03 10:05:55 2012-05-03 11:05:55 1 8 53 0 110 149 0 132.10 37 48.52 NEW -MsLLSNSlAAYAaI+sNPEphALYFVhGVChGLlLTLChLVlpISC...Rscs+t.psPc++ph+-ss..ss--ss-p--s-p--suDlcssc..............psph-tTL.ssNVFTSAEELERAQRLEERERIIREIWhNGQPDlh..GTtolGRha ..............h.ll.SN.LAuauaIp.spPEpsALYFVsGVChGLlLTLChL.VlRlSC....ppch+.....pts...p.cchh.tppp...psp.-ss....-..s.p.-t-.p.-.sss-.sh.t................pt.pts...shsVa.oShEthEhApRlEcREpIlpEIWhsut.Dh...hstshs......................................... 0 5 14 58 +14703 PF14852 Fis1_TPR_N Fis1 N-terminal tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q9Y3D6 Domain The mitochondrial fission protein Fis1 consists of two tetratricopeptide repeats. This domain is the N-terminal tetratricopeptide repeat [1-2] 25.00 25.00 25.00 25.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.08 0.72 -7.27 0.72 -4.65 63 299 2012-10-11 20:01:04 2012-05-03 11:57:25 1 4 237 10 204 283 0 34.80 37 21.62 NEW tpopFpYAWuLl+SpttpDh.pcGltlLpplh+sss ....popFpYAWuLl+SptpsD..pcGltlLp-lh+ps....... 0 51 100 164 +14704 PF14853 Fis1_TPR_C Fis1 C-terminal tetratricopeptide repeat Eberhardt R re3 Jackhmmer:Q9Y3D6 Domain The mitochondrial fission protein Fis1 consists of two tetratricopeptide repeats. This domain is the C-terminal tetratricopeptide repeat [1-2] 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.93 0.72 -8.39 0.72 -4.08 60 363 2012-10-11 20:01:04 2012-05-03 11:58:45 1 12 284 10 239 400 24 52.10 39 30.22 NEW R-tLYYLAlGpa+lt-YscA++alctLLchEPsNpQAtsLcphI-c+lp+.-GL ............R-hLYYLAlGp.Y+LscYscAc+Ys-tLLch.EPsN...pQAtsLcphI-cclpK-Gl........... 0 73 128 198 +14705 PF14854 LURAP Leucine rich adaptor protein Eberhardt R re3 Jackhmmer:Q96LR2 Family This family of proteins activate the canonical NF-kappa-B pathway, promote proinflammatory cytokine production and promote the antigen presenting and priming functions of dendritic cells [1]. 25.00 25.00 25.10 25.00 24.90 24.60 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.66 0.71 -4.40 7 138 2012-05-03 13:55:31 2012-05-03 14:55:31 1 3 62 0 88 131 0 103.30 40 36.75 NEW psstsussppssspushctL-sKlhhL+.EMAaLRAlDVKlhQQLlslNEGIEulKWlhEE+uslTSRsSSLouS.YSLstup.soShRGSasSLpDss..DcLDuISlGSYLDTLA.D-lsE.s ...............tss...........ts.s.hpsL-splthLRpE.MssLRulDl+LLpQLhslNESIEul+WhlEE+usloSpsSSLouS.h.SLhtup..tpp.cGShs.......................................p........................... 0 12 18 43 +14706 PF14855 PapJ Pilus-assembly fibrillin subunit, chaperone Coggill P pcc Pfam-B_9717 (release 26.0) Domain PapJ is part of the Pap pilus assembly complex that plays an auxiliary role by ensuring the proper integration of PapA into the fimbrial shaft. PapA is the major shaft protein of the pilus. 25.00 25.00 50.30 50.10 19.60 19.00 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.10 0.71 -4.82 3 54 2012-05-03 16:50:17 2012-05-03 17:50:17 1 1 37 0 1 26 0 178.40 63 99.69 NEW M..NRTTsGLYLAALLlSuSMsoVLQA-ELllRDDFFVADEsRHQWVNE+NGRTGoLNVKGALVSSPCIL-TPEVsLPLpcDNG+YV....LNLKLS+CGDGtS-lPE+cussphNlsVKQSlVLKcGcsslLLS-+KsuGpsRcll+cGDNQLlYhlN+cQYEKIAcsQpcsTt+.chSDu+os..sL+LsIhYE MVV.NKTTAVLYLIALSLSGFIHTFLRAEERGIYDDVFTADE.+HYRINE+GGRTGoLsVSGALLSSPCTLsoNEVsLSLR.-N+....shu....LhL+LuGCGDGGAl.Pu+cuss...MsVsuSlVhusGpsusLLP-+KhuGscHhVl+DGDs.Llhhss+cQp-hLAuhppcsTtc.tcYSD..u+os..hLRLsIcYE............ 0 0 0 0 +14707 PF14856 Hce2 Ecp2; Pathogen effector; putative necrosis-inducing factor Stergiopoulos I, Coggill P pcc [1] Family The domain corresponds to the mature part of the Ecp2 effector protein from the tomato pathogen Cladopsorium fulvum. Effectors are low molecular weight proteins that are secreted by bacteria, oomycetes and fungi to manipulate their hosts and adapt to their environment. Ecp2 is a 165 amino acid secreted protein that was originally identified as a virulence factor in C. fulvum, since disruption reduces virulence of the fungus on tomato plants. We have recently determined that Ecp2 is a member of a novel, widely distributed and highly diversified within the fungal kingdom multigene superfamily, which we have designated Hce2, for Homologs of C. fulvum Ecp2 effector. Although Ecp2 is present in most organisms as a small secreted protein, the mature part of this protein can be found fused to other protein domains, including the fungal Glycoside Hydrolase family 18, Glyco_hydro_18 Pfam:PF00704 and other, unknown, protein domains. The intrinsic function of Ecp2 remains unknown but it is postulated by [3] that it is a necrosis-inducing factor in plants that serves pathogenicity on the host. 25.00 25.00 25.60 25.10 24.80 23.80 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.82 0.72 -10.54 0.72 -4.08 39 124 2012-05-04 15:03:41 2012-05-04 16:03:41 1 6 44 0 97 117 0 102.60 27 20.68 NEW spCss.So.ap.spTss.sSP..hssDCtplhpslt..ps.....G....p.W..s..lt.....s.tsp.........+plsphGoCsFGlps......h.p..ush...th.t...lGspDlhDllps..uIs..pau......ps......G+..VGApGphpC ................Css..oo..ah..spTos..uSP...hssDCttlhpslt..ss...........G....p.Wpht...........s..ssp...............pplsphG.....oCsFGlps................tp...ssh.....th.tlGspDlhcllpp..ulp..pas................ts.......G+..VGupGphpC.................... 0 17 47 73 +14708 PF14857 TMEM151 TMEM151 family Eberhardt R re3 Jackhmmer:Q8N4L1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 338 and 558 amino acids in length. 27.00 27.00 28.90 33.20 26.50 26.20 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.62 0.70 -5.88 9 144 2012-05-08 07:39:01 2012-05-08 08:39:01 1 4 59 0 100 128 0 347.60 50 78.89 NEW QRPlKQSLotSLCRESHWKCLLLSLLMYuChGsluWCpLspVT+Luhss.......u.....apGpS..hlYH......DSPCSsGYlYIPLAFLsMLYlVYLVECWHCas+sphthKs-lsoVhERlpRhQQAsPCIWWKAISYHYVRRTRQVTRYRNGDAYTTTQVYHERVNTHsAcuEFDYupCGVKDVSKpLhGL-sassTRLRFTKCFSFAsscuEsuYLTQRARFFs-NEGLDDYMEAREGMHLKNVDF+EahlAFsDPs+.PWYspphsFWhAuhhhLSWPLRVlsEYRTAaVHYHVEKLFG.......h-hs........u.oPss....sshtttlsRVsTlDoTELEWHIRoNQQ.LVPSYSEAlLMshuptsstsstos..........................psYut...........hhpsC-RCpR..........osSSSSl..................hSRsuht.s..s....ssRLu........husS+FSLGRlaGSRpo..sLaRSRS .............................................................................................QRPlp.ShstSLCRESH.WKCLLLoLLhauC.huslsWCplspVs+Lshss...............u..ht.Gps..hh.Y.......sSP.CSsGYlYIPLAFlhhLYllYLs..ECWHC.psRppht.h+s.DspoVhphlpRhQQAsPCIWWKAlSYHYVRRTRQVTRY..RNGDAYTTT..QVYHERVNTHsAcuEFDY.....uppG.V+DVS.....K.p......LlGLptt..ssTRLRFTKCFS....FussEuEsuYLsQRARFFs-N.......EGLDDYMEAREGM+LK.s.V.DF+E..hls.as.DPtp..PW.YspthsFWhsuhhhLSWPLRVlscYpTAasHYpVEKLFG.....................................................s.ssst.......s.t..l....sRVsTlD..T.E...LEWH....Ip..sNpQ.lVPSY..SEAhlMshst........................................................h..h..p..ssppC.....p+..........s.opsuh.....................tt.t..................................................................................................................... 0 23 34 67 +14709 PF14858 DUF4486 Domain of unknown function (DUF4486) Eberhardt R re3 Jackhmmer:Q96N23 Family This domain family is found in eukaryotes, and is typically between 542 and 565 amino acids in length. 25.00 25.00 25.50 25.50 21.80 21.80 hmmbuild -o /dev/null HMM SEED 542 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.06 0.70 -12.84 0.70 -6.25 5 92 2012-05-08 08:35:29 2012-05-08 09:35:29 1 4 58 0 60 81 1 326.70 35 26.02 NEW pcaEh+RRGAsTLFNIWsKYcPRLPssYYNEKLLKVGDSLsQIK.................EYKLALhQCYGRYLQQFso.NsDEspsD..lspFKssFFPcGFcDcTAtLTFHALpG+NlCsYQLVC-SDsNLQNcESVppCL+ILSSLRLIMQAALPQEsLCWIIFNGTlHIYoICR+LMsIGQSSKVLEYLLWASMCMESSVPLLSlRYLTWRATLYsAVCQCYYDC+AGIHGEuFARRALuKINEL+QLEpMSSScuopEop+hFREATIKMAVMIFKRuVFESRRKPKulFRPKlRlNLKEsQsLPWPRTsTERLLsEhFDGTASQFLAVLEALSDSNRRlLQTGP.VoD.EsEI+DVVSELFhAGhE.L...LIhuNl+ss.............upLDFPpoSLLEpllsc+NsISV-AAVKFlKLAFoYEEWulF-SLustLlpFLQpQ-DPpuKKAEK-LcLLtAlEPLlss+Rs+.Ghhlsp-s-K-uQospsaLKHhss+psphhssta....oEDlFpLAsTLHsCVCsssQsI..pPDKDIVlDllhFLWQKCKlGlQRlsIs+h-ssKYspKhu .............................................................................................hl.hu-.h....t...........................................................................h....h..hpsh.....s.hth.htts..h.p...sh..hh....p....hLt.....hRhhhphsl.s.t.-.....phhWllaNGolhlYplCcpLh..hh..G......oscslthl.hh.s..hshE........l.L.s.+YL.WRhpLhshlstsh.c.....t.h...pu.hhAp+ul.+lp-L.pLE.hs......t.tt.hh.s................................................................................................................................................................................................................................................................................................................................................................................................................................................................p.......................................................... 0 23 31 41 +14710 PF14859 Colicin_M Colicin M Eberhardt R re3 Jackhmmer:P05820 Family Colicin M is a toxin produced by, and active against, Escherichia coli. It catalyses the hydrolysis of lipid I and lipid II peptidoglycan intermediates, therefore inhibiting peptidoglycan biosynthesis and leading to lysis of the bacterial cells [1]. 27.00 27.00 32.20 38.40 24.40 24.30 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.77 0.70 -5.51 2 47 2012-05-08 11:57:22 2012-05-08 12:57:22 1 2 30 16 4 47 0 223.20 56 87.11 NEW phpl....PuT.l.s.t.Gs.p.ou.pVPusGsllsphV......Yuhhp.ssMh.ptLpthcs.h.taGhss.....hI.sslthhpsADh.LlhpPtlSshDAap..hpcsu..Q.shp.hs.pphSGsshTPhhAltHaLhGNGsptSVpIupIGlp.oP.KIsplhsIIpouhlGphsVshphoahTGp.sVIsthYLGsITLphpGplThsssGohoasGVV+uasD+YDhNASoHRsshsEuhT....clGthhsuK-YtI.lsGEl.I..ShtR .....................................................................s.....................................................MslpsLptlcDalcpHGhss.....hI.sslthhpsADh.LlhpPtlSshDAap..htcsu..p.shpphsh+phSGNVhTPIsALAHYLaGNGA-RSVNIuNIGLKISPhKIsQIpDIIcS..Gs.V..GTFPV...Soc..Fo...+AT.....Gc...hsVI...o...uuYLGNITL+TcGTLTISAsGoWTYsGVVRSYDDKYDFNASTHRGlIGESLT....RLGAMFS.GKEYQIllPGEIcIp.SGKR................ 0 0 0 2 +14711 PF14860 DrrA_P4M DrrA phosphatidylinositol 4-phosphate binding domain Eberhardt R re3 Jackhmmer:Q29ST3 Domain This domain binds to phosphatidylinositol 4-phosphate. It is found in Legionella pneumophila DrrA, a protein involved in the redirection of endoplasmic reticulum-derived vesicles to the Legionella-containing vacuoles [1,2]. 27.00 27.00 49.40 49.30 25.80 18.80 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.55 0.71 -4.36 3 23 2012-05-09 07:41:44 2012-05-09 08:41:44 1 1 8 9 3 20 0 112.70 42 28.65 NEW cNLcEsl..KhupccKhtuRpHcaTA...s+psspshKE+YpQlKGDuLK+sILs-LKDcLAEIcsh-sLK-hltEFKuSsEYpILAKGQGLTTKlLGLKTSSp+uVEcIFcEAcEcIpSscp ............pshc.sl..c...stKhtuR.Hhass...s+hhs.phpEpYptlKGDhLK+sILp-LKssLtcIssh-pLc-hhtEFKsSsEYpILucGQGLhT+shsLKTSSh+ulpchhcEscccIpsp..... 0 3 3 3 +14712 PF14861 Antimicrobial21 Plant antimicrobial peptide Eberhardt R re3 Jackhmmer:P86698 Domain This family includes plant antimicrobial peptides [1-2]. They adopt an alpha-helical hairpin fold stabilised by two disulphide bonds [2]. 27.00 27.00 36.50 30.10 24.90 21.20 hmmbuild -o /dev/null HMM SEED 31 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.67 0.72 -7.86 0.72 -4.47 22 86 2012-05-09 09:58:20 2012-05-09 10:58:20 1 7 8 0 22 72 0 31.30 45 52.48 NEW spspccCc+pC.pHHp..D.hc+QpClpcCcp+c ..scupccCR+pC.pHH+..D.W++QpChp-C+p++. 0 0 8 15 +14713 PF14862 Defensin_big Big defensin Eberhardt R re3 Jackhmmer:Q0H293 Family Big defensins are antimicrobial peptides. They consist of a hydrophobic N-terminal half, which is active against Gram-positive bacteria, and a cationic C-terminal half, which is active against Gram-negative bacteria. The C-terminal half adopts a beta-defensin-like structure [1,2]. 25.00 25.00 25.00 25.00 24.60 24.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.30 0.72 -9.97 0.72 -3.91 4 41 2012-10-01 20:50:19 2012-05-09 13:58:57 1 1 10 2 2 47 0 78.80 58 67.17 NEW AIPllYhGAsVuPsVWsWLVshhGAAAVsAAulp.......puSsDsHSCAsNRGWCRSpC..FpHEYlDsapSuVCGpYcCCRs .................Ll..PlA.YAGhTVSsPVFAALVusYGsYAVhRYsIR....................pusp..DSHSCANNRGWCRsoC..FSHEYpDWaN.ssVCGSYcCCRP............. 0 1 1 2 +14714 PF14863 Alkyl_sulf_dimr Alkyl sulfatase dimerisation Eberhardt R re3 Jackhmmer:Q9I5I9 Domain This domain is found in alkyl sulfatases such as the Pseudomonas aeruginosa SDS hydrolase (Swiss:Q9I5I9), where it acts as a dimerisation domain [1] 26.00 26.00 26.00 26.80 25.90 24.80 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.37 0.71 -3.95 196 988 2012-10-11 20:01:04 2012-05-10 11:46:15 1 8 790 10 240 798 507 139.00 47 22.62 NEW hTssEIA.-plp.LPssLsp.paasRuYYGolsHNs+AlYptYhG.WaDGNPAsLpsLsPt-pAc+YVchh..GGs-pllppAcp...saspG-..........YRWuAplls+lVa.............A-PsNp.p.........ARpLhAc........sh-QLGYpuEsusWRNhYLoGAhELRsGs ........................hThsEIu-hl+..LPsuLsp.sWtsRGYYGSlSHNs+AVYphYLG.aaDGN.P.AsLpPhsss-huc+YVphh..GGu...spVlphApc...uhcpG-..........YRW....uAcllcpllh.............AsP.ssp.s.........A+sLpAs........shEQLGYQAESu.sWRsaYLoGApELRpG.l........................................ 0 55 123 185 +14715 PF14864 Alkyl_sulf_C Alkyl sulfatase C-terminal Eberhardt R re3 Jackhmmer:Q9I5I9 Domain This domain is found at the C-terminus of alkyl sulfatases. Together with the N-terminal catalytic domain, this domain forms a hydrophobic chute and may recruit hydrophobic substrates [1]. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.44 0.71 -4.14 140 895 2012-10-02 14:08:01 2012-05-10 11:49:45 1 6 760 10 211 992 282 122.00 34 19.64 NEW sssos-hltshssc.hhDhlAl+lsus+At.......st.clslshhhs.....Dh.s.....c...........patlplpNuVLst....hpstp.......ssA-sTlol.s+ssLhslh.hstsshsphhtsspl...pl.pGDtstlppLhuhLDph-.....stFsIVsP .....................................................s.suSsDslpuMos-hlFDahu..VRL..s.usKAs..........Gp..slslNashs.....-...s.....-................shpLpLpN.uVLsa........pcshp.......spADsoltl.sRpsLpsll..h....G...psphsphlp.u...tcs..+l...tG.ssstLpclluhLDsFD.....hhFNIVTP............................... 0 53 111 163 +14716 PF14865 Macin Macin Eberhardt R re3 Jackhmmer:B3RFR8 Domain The macins are antimicrobial proteins [1-3]. They form a disulphide-stabilised alpha-beta motif [3]. 27.00 27.00 28.00 34.10 25.00 18.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.09 0.72 -9.69 0.72 -3.87 6 15 2012-05-10 12:11:02 2012-05-10 13:11:02 1 1 8 2 2 17 0 57.30 45 63.35 NEW sCa-sWSRCoshoSthTGlLW+oCs-pCK.cLG+psGpCh.sPS.sCP..sch.pshQCpCa sCa-sWSRCothoushTGILWcoCssRCK.ChG+ssGsChpsPS.sCsh.pc..csaQCpCh.... 0 2 2 2 +14717 PF14866 Toxin_38 Potassium channel toxin Eberhardt R re3 Jackhmmer:Q9NJC6 Family This family includes scorpion potassium channel toxins [1-2]. 27.00 27.00 27.00 27.00 26.90 26.60 hmmbuild -o /dev/null HMM SEED 57 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.90 0.72 -9.51 0.72 -3.88 13 40 2012-10-01 23:31:40 2012-05-11 08:49:57 1 1 17 0 0 57 0 56.80 41 66.59 NEW splKstLpplhcKlht....l.upSpaGCPs...I-paC-DHCps.c+t.GpC-shcCpChp..u .................s.pl+phlpsVlHKl........uKopauCPs...hpsaC-cHCps.c+ccGhCHG...h...K..CKCsh..s... 0 0 0 0 +14718 PF14867 Lantibiotic_a Lantibiotic alpha Eberhardt R re3 Jackhmmer:P86475 Family Lantibiotics are two-component lanthionine-containing peptide antibiotics active on Gram-positive bacteria [1-2]. 27.00 27.00 37.50 36.90 25.20 24.40 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.50 0.72 -7.73 0.72 -4.70 14 26 2012-05-11 09:53:12 2012-05-11 10:53:12 1 1 26 1 4 22 0 31.70 45 48.22 NEW ssssssshslShsLGNcGthCThThEC.ssCp .h.sshsphhhlSpsLGNcGtlCThThEC.ssCp. 0 2 2 4 +14719 PF14868 DUF4487 Domain of unknown function (DUF4487) Eberhardt R re3 Jackhmmer:Q9NSG2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 209 and 938 amino acids in length. There is a conserved WCF sequence motif. There is a single completely conserved residue W that may be functionally important. 25.00 25.00 31.00 25.90 21.80 23.80 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.86 0.70 -5.80 12 134 2012-05-14 11:54:13 2012-05-14 12:54:13 1 2 71 0 67 136 0 387.30 29 64.66 NEW hslhssoWKhlIp....pt+sslcsplphp...-IlssLhpslh.Sh+sshp.........hhpsshp-...shshp.hp+shh.s+FahsslV+hspta.s.h.tsspplhpLhl.l.hSpFh.uLpttplscstpE.luphh.hshssLlspLLsts.....hhpphhtphLs.......hssc..p.....hsp.shhhh.s.hD+lso.s.plhslassss...tphpcl.lhp..slFtshhpsSsElp.sspLt..............................Gs.upupt..lh.olYptlhhpLpsah..ss.hsss.tascLEthLLpslLpsphlst.lsh-lWCFlu.Rausu-LssphlshlspLlKsh........P........usshphhs..LshL..LpR....hhh.hssphpsphlpphpsp...psps..........hhlhcthsL..puLssp....l+p...pstcplhsshhu.hhppa.p..........sosohGtlt..stsLSusLtsppsussslDt+plsslltllsphhshl..cpltsc..h.pphhshhLslluhh.hphhpsphlppllptL.psLhhppsssp......l+hs...hhpFluuLGch.hsps.psthsstl.....pLaphLLp-cpWhlhphAlpAFshFApcTspppl ........................................................................................hhhp..........h.....h.....h....h......h....................tt......t.t.h.hhhh.h+ahh..hhph.t.a...s.h.t.h.tp.htLhl.h.tS...tF..SL.ttphsKutpE.hsshh.hshssLlstL.Lsht.....hhp.lh.sh.L-........h.sE..p....hs...hh.h...hDphspts...h.slhshss...pptspl.Lhp..hlF.sh.phSsELp.sspLt..............................t..sputt..h..shaptlh.tLpshh..h...h.ss.ha.tl-hhLLpshht..p..s..l..-hWsFhh.+..............hup.phs....hh..lh..lht.h........s...............t......h.......ht.h..ltp....h....h.................................hht..........................h..h....t.h........t........th..................................h......................................................................................................................................................................................................................................... 0 18 30 46 +14720 PF14869 DUF4488 Domain of unknown function (DUF4488) Coggill P pcc Pfam_7936 (release 26.0) Family In most members this family covers almost the whole sequence, but a few member-sequences also carry a TonB_C domain, PF03544. 25.10 25.10 25.10 36.00 24.70 23.60 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.51 0.71 -10.68 0.71 -4.59 11 63 2012-05-15 08:09:31 2012-05-15 09:09:31 1 4 49 0 9 41 0 130.40 55 67.57 NEW psLpGVWQhCh.hspss-lsucLcsuss.LKlLS-DspFhNlhh...hs.suAIIhupGpYch.SDs.......sYsEplEK.slaLshlsGp-N.lphEhhc.....DsLhhl+ahlsschs....spWhpEhW+RV..PshhP.s...hsc .....u.sLcGIWQLCaYVS-sP-lPGtLKPSNo.FKVLSDDGphlNhTh...hPsus.AIITGYGTYcQhoDs.......oYpEsIEK.NIHLPhLcspDNlLcFEht-.....ssll+LKYFlcsDhNGNElNsWaaETWKRVpMPshaPcDlVR...... 0 2 8 9 +14721 PF14870 PSII_BNR PSII_YCF48; Photosynthesis system II assembly factor YCF48 Coggill P pcc Pfam-B_547 (release 26.0) Domain YCF48 is one of several assembly factors of the photosynthesis system II. The photosynthesis system II occurs in Cyanobacteria that are Gram-negative bacteria performing oxygenic photosynthesis. One of the three membranes surrounding these bacteria is the inner thylakoid membrane (TM) system that is localised within the cell and houses the large pigment-protein complexes of the photosynthetic electron transfer chain, i.e. Photosystem (PS) II, PSI, the cytochrome b6f complex, and the ATP synthase. YCF48 is necessary for efficient assembly and repair of the PSII [1]. YCF48 is found predominantly in the thykaloid membrane [2]. It is a BNR repeat protein. 27.00 26.00 27.00 26.00 26.90 25.90 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.71 0.70 -12.09 0.70 -5.43 23 762 2012-10-02 00:45:24 2012-05-15 10:59:18 1 27 377 1 329 819 370 140.60 23 53.77 NEW hsshsssPWchlpLso-s..slLDluFs..sspHGaLVGoptolLETsDGGcoWctRs...LDls--.saRhpSVSFsGsEGWIlGcPulLLHTsDGGcoWoRlsLSsKLPGsPhhIsALGss.sAEMsTss...GAIYcTpDuGpsWcAtVpEsl...................GslRslpRus-GcYlAVSSRGNFYuTWpPGQshWpPHsRsSSRRlQsMGFs.c...GpLWhlsRGGplpFos....sss.-sWsc..........shhPlhosGaGlLDLAa+ss.s-lWsuGGuGoLlsSpDGGcoWp+Dpss-slPoNhY+IhFhss.....spGFlLGpcGlLLRYs .................................................s.....................h..pt......lhs.lhFh..stp..pGas.......V....G...p.........t.......u.....h...l.....LpTs.DGGpoWp..ttt................s....h....t...p................................h......s..l...t....h......s.p..p.....s..a..h.....s.......G.........p.........u....h........lh+..op.D.sGtsWpt........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 128 201 276 +14722 PF14871 GHL6 Hypothetical glycosyl hydrolase 6 Coggill P pcc Naumoff D [3] Family GHL6 is a family of hypothetical glycoside hydrolases. 35.00 35.00 35.10 35.00 34.90 34.90 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.48 0.71 -10.66 0.71 -4.13 13 111 2012-10-03 05:44:19 2012-05-15 14:59:04 1 3 70 0 59 240 54 134.20 26 19.46 NEW Dscphscth+ch+usolslFupstaG.hsaYsop.l....ts..c..HPtL..p.t....DlLtEtlcAs+ccGl+Vslhhshs.hccplhppHP-WthhstsGt.........stthstst.a.p...lChNSsYh..-.ahttpl.cEslc...th..slDGlFhDh ............................................................tphhphlcpuplsulsl.upsttG.huYYPoc..l...............tp..h.......pPtL.....p..p.....................Dllu-hlcAs+ctGl+lhs..h.h........sh.....s.....h....-...p......p..............h.h.........c.pHP-Wh...hh....stsGp...................tphhpss...aht..........hCh.N.u.s.Yh..-..alhppl.cEllp...pY...ssDGlFhDh........................................................... 0 36 52 54 +14723 PF14872 GHL5 Hypothetical glycoside hydrolase 5 Coggill P pcc Naumoff D [3] Family GHL5 is a family of hypothetical glycoside hydrolases. 23.70 23.70 26.50 24.60 22.80 22.20 hmmbuild -o /dev/null HMM SEED 811 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.45 0.70 -13.45 0.70 -6.77 10 27 2012-10-03 05:44:19 2012-05-15 15:19:15 1 1 22 0 6 24 9 739.50 52 96.71 NEW ppLL-WAssI-p.S-sThFEKAQplAsRLGAHYRs.DGLTEIGFWTPELuu-lIQs.+sIaLEVFTPl-sIDhpts-QslpF+RDplpLc+QGEYtWGVVuGL+AGTR-QhGSFYWLRYlD.p.sclpsIRDsLAYSLPYGVFAPAELYDhcsLQccRADLsYFcppuup.............p..s.....sss..........lsRVPAPpNILQLHVsTASPsGTLuGLTclYQRIucKLusupPLTPAEpNYlGYDAVQLLPIEPTlEYR.sE.pss.cHcFFslcs.........c-.sphss.s..l..pstcl.....clsL+KP-TQNWGYDVsIlGSuATNPAlLpTLRPDElVDFIATLHNFsTGPIQlIYDLVYGHADNQul-LLNspFLKGPNMYGQDlNHQsPsVRAILLEMQRRKlNTGADGIRIDGGQDF+FFNPLoGcVEYDDsYLLAMuDlVQ-IGstpRhLFTIFEDGRPWPsEGWEEISTYRDLIEL+P-SFQWGPLIFAHNTPsLpGFWD+KWRRVCEVMhpG-+WITGCGNHDTVRRGNQlssst.sINWpLGsTLPEVLpNAYDNPAlsLWVYGFuPGLPMDFINusMRAPWGFFRNTDDRYGVKVVAEEh.GFLDWQloPElYppsp.lFs+LKpLGFo-L-tLRQFh+ALppAht-oDYDL-cVAphCQpslGssst.p-.............Lpclshs-tssFLssLDVuKLKpFAhAFMEDsH-hCNVp+apDpl-ssQsuFNLALRcFR+u+PWL+cNLs..s.s.DRFN+Io-cppTlFYGlRssPh-t-......sptpspplAMVAHMGGEPhTVsLG-WLpLDLscWplAIASPGLc....l....c...D....L+u..FEL+DSQulLLE .............................t.pLhsWAtslpp.SstshFptApplAp+LGAHaps.DGLTplGFWTPcLsup.hhpp.tpIaLEVaTPhptIDhpusp.QslhF+R-hlpLtppGEahWuVluGh+sGoR-phGSFYWLRYhD.t.sphphItDPLuYSLPYGVFAPAElYDlcphQtpRsDhsYhcppss......................................p.........t...............ls+lssPtNILQlHlsTAospGTLsGLTclYpRlucKlhpt.sLoPsEpNYlGYDAlQLLPlEPTlEYR.hc..p..tptFFshts.........c-...............tpl.....plpLpKPcTQNWGYDlsIh....GSuATNPulLtThRPDElVDhIATLHNFPsGPIplIYDlVYGHA...DNQu.hpLLNtpahK....GP..N..MYGQDLNHQ.PhVRAILLEMQRRKh.NoGsDGIRlD..GuQDF+aFNPloscVE.DDsYLhtMuDlVQ-IusscRhhFsIFEDGRPWPp-GWE-hSTYR-LI-hc....P-uaQWGPLIFAHNTPsLpuFWp+KWcRVCEVhhpG-pWITGCuNHDTlRRGsQls.p.....t.pINhpLGsTLsEVLpNAYDNPAstLhsYGFsPGLPMDFlNAhh+APWGFhRNTD-RYGVKVsuEEs.GFLDWQlsPEhYptst.hFspLKtLGFpcLt.LppFhcuLtpAhhpsDYsLptlAchCptshs.pst.pt.............hpphthsp.sthlppLsVsKLKpFAcAFMEDsHEhCsVSpat-pl-stpssaNLtLRpaR+s+PWL+cNLt..ss.D+.Fs.pIps.sspTlFYGlRssP.ptc......s...stpVshVsHM.tG.-PhpVslGDWLpLDl..s..cWplAIAoPGlp....l....-...s....Lps..FEL+DuQullL.............. 0 1 5 6 +14724 PF14873 BNR_assoc_N N-terminal domain of BNR-repeat neuraminidase Coggill P pcc Pfam-B_13890 (release 26.0) Domain This domain is usually found at the N-terminus of the BNR-repeat neuraminidase protein family. 25.00 25.00 25.60 25.80 22.30 23.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.24 0.71 -4.41 37 147 2012-05-15 14:59:52 2012-05-15 15:59:52 1 11 89 0 21 137 9 139.20 24 23.46 NEW tus-s....lhlcpsplPlLlc+psNslhhlRlpsppspt...LsclsLshstsssLsDIpul+LYhuGo.cuhpcps+thhtPlshh..........................spplsLp.....us.pLh.sGsNaFWlSlphK.sssoLts+lssphsplphsspph ......................tp.....lhlcpsplP.lL.lc+psNslhtl+lpsppsps...LsclsLshstssslsDIpulcLYhuGs..cuhpsps....+hhhss.lsh...........................spplsLp.....sp.pLh.sGsNaFWlolph+.ssssLts+lssslsplphsspt.h.............. 0 6 16 21 +14725 PF14874 PapD-like Flagellar-associated PapD-like Coggill P pcc Pfam-B_1987 (release 26.0) Domain This domain is a putative PapD periplasmic pilus chaperone protein family. 27.00 9.40 27.00 9.40 26.90 9.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.40 0.72 -4.07 5 621 2012-05-15 15:06:52 2012-05-15 16:06:52 1 29 105 0 426 554 6 86.70 17 7.00 NEW sPpLEVpPspVcFGpVlPGpRYltTVcLTNsSTVPCRYRVRlssss+shLpVpYs+QFVAPGLTscscVELsGoQPhGsMcupLsVsHEGGsl-VsVchcTs ..............h...hplps.s.plcFG..slh.hsp........s.h.s.p.s..lpLpNsu..hl..s..s..pa.c...l........p......h...s..p............t....t.....h.....h...ph........................s.......lss.t..p........h........l.h......................................................................................hhh................................................................... 0 201 249 315 +14726 PF14875 PIP49_N N-term cysteine-rich ER, FAM69 Coggill P pcc manual Domain The FAM69 family of cysteine-rich type II transmembrane proteins localise to the endoplasmic reticulum (ER) in cultured cells, probably via N-terminal di-arginine motifs. These proteins carry at least 14 luminal cysteines which are conserved in all FAM69s. There are currently few indications of the involvement of FAM69 members in human diseases [1]. It would appear that FAM69 proteins are predicted to be have a protein kinase structure and function. Analysis of three-dimensional structure models and conservation of the classic catalytic motifs of protein kinases in four of human FAM69 proteins suggests they might have retained catalytic phosphotransferase activity. An EF-hand Ca2+-binding domain, inserted within the structure of the kinase domain, suggests they function as Ca2+-dependent kinases (unpublished). 21.00 21.00 21.00 21.00 20.70 19.40 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.23 0.71 -4.57 15 182 2012-05-16 11:38:45 2012-05-16 12:38:45 1 5 71 0 108 161 0 138.70 34 35.20 NEW uRLshh+hKYLlhsWlullluSWVlYhpYs..oYoELCRGpsCchhI...C-+Y+pGlloGSsCpsLCspcolh.ht+C......lSsssspQl.....aouhWp-p.llIKCthpcsh+tchs.p.hs+p-hshacpPT+GTShpEF+EMV+salKsKlG-QssLssLlspllsl .........................................Rhshh+h.hhhhshlslhl.ushl.hhhtYs...saoEhCpsc.s.+hhl..............C-cY+pGhlsGshCpsLCspcslh.at+C......Lss.pssppl.....atuh.Wps.p...sllKsthcpshc.s.htsp....scc-hs.h..h-tP....ohpcF+EMlhshhKspLG.phs...sthls.ll..h.................................................... 1 21 29 61 +14727 PF14876 RSF RSF1P; Respiratory growth transcriptional regulator Wood V, Coggill P pcc Pfam-B_36578 (release 26.0) Family This is a family of transcriptional regulators that determine the transition from fermentative activity to growth on glycerol [1]. 27.00 27.00 35.70 34.80 24.10 24.10 hmmbuild -o /dev/null HMM SEED 374 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.22 0.70 -12.26 0.70 -5.66 5 29 2012-05-17 09:46:49 2012-05-17 10:46:49 1 1 16 0 8 23 0 327.70 73 82.81 NEW MKDLNPEMGKFATTKGPPQDNRGMVDIATLPNFPANRSGTPREEMYLAPNKMETPRTLNMNMVPDYLQKENFSPDFSSATVSAKSSPVNVTHDESLPLGTIESNSTKDSKYAVQRQQQQVVDFIENNMQLLSSETLNFRSDIMKTLELPIPKRRDIKGNHLSKLLFAKSPLTINTYCQFYDRRTKRICNQEMIWKDKNSREKHGSRKYQRHLSKVHDVQLTPNNFTEFFDHNSPLFQECYDYQSRLMRDLLVEPDAKFKEKKKKKKGDVNGNHPETGSSLINHQVQQQNVRELQSKIAMNDLIEILIDLNIPFSVLDYQPMRNWLIKYSIISTDTLPDEVYFKTDPGVNELEHNSSNLNNSNSGTPHNHNQNQH .............................................MKDLNPEMGKFATTKGPPQDNRGMVDIATLPNFPANRSGTPREEMYLAPNKMET.RhLNMNMVPDYLQKENFSPDFSSATVSAKSSPVNVTHDESLPLGTIESNSTKDSKYAV...QRQQQQVVDFIENNMQLLSSETLNFRSDIMKTLELPIPK..RRDIK...G.NHLSKLLFAKSPLTINTYCQFYDRRT....KR.ICNQEMIWK..DKNSREKHGSRKYQRHLSKVHDVQLTPNNFTEFFDHNSPLFQECYDYQSRLMRDLLVEPDAKFKEKKKKKKGDVNGNHPETGSSLINHQVQQQNVRELQScIAMNDLIEILIDLNIPFSVLDYQPhRNWLIKYSIISTDTLPDEVYFKTDPGVNELEHNSSNLNN..SN..SGTPHNHNQNQH................................ 0 1 4 6 +14728 PF14877 mIF3 Mitochondrial translation initiation factor Wood V, Coggill P pcc Pfam-B_22619 (release 26.0) Family This is a family of mitochondrial initiation factors IF3. 31.10 31.10 32.80 62.20 30.30 29.60 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.06 0.71 -4.91 14 37 2012-05-17 10:23:41 2012-05-17 11:23:41 1 1 36 0 26 37 0 177.60 35 49.66 NEW +KslphcasoGS-+AppAhpsllsclathsppspl+hlsspssclEppslhchspslDLscpGlthVssp...pspt...plPLVKhVcsppAlKpYSDcLAppKEcELlphG..hs.+phtp+.cs-+ccsshKhl+lSWpISssDLspQKspEItspL...cKGp+lhl.alscKsshsss..stsc-p .......+.hhhpasoGo-+A+pAhpsllsclaphsp.ptplchlsssssplcpsslpphspslDLscpGLplVshc..................pssptp.plPLVKllcs+.hlKpYSDhLAppKEpELhphG...sh+phtpphps-+Kc.sshKpIplSWpIsssDLppQKspEIhphL....cKGp.+lhl.alssKpshsss......................... 0 3 14 25 +14729 PF14878 DLD Death-like domain of SPT6 Wood V, Coggill P pcc Pfam-B_9510 (release 26.0) Domain This DLD domain maintains the characteristic overall topology of death domains, as it consists of a six-helix bundle with three stacked antiparallel helices and an additional helix inserted between the final two helices of the bundle. Although it is unlikely that the Spt6 DLD functions in an apoptotic process in yeast, its prominent location and the observation that it displays the most highly conserved region of the Spt6 surface suggest that it mediates important intermolecular interactions [1,2]. 35.00 35.00 38.40 38.40 31.60 32.70 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.44 0.71 -10.53 0.71 -3.98 4 271 2012-05-17 11:48:29 2012-05-17 12:48:29 1 35 233 2 208 285 1 113.10 46 7.70 NEW h-lLDuoRlHPEsYEhARKMAsDALEYDEs..AEctsstGslpElLE.......pst+Lc-LsL-uaAEELERpsatcKt.TL.cIRhELpstYc-LRssa+s.ss-EIFpMLTtEoPET ....-hLDsTRlHPEsY...-hARKMAsD...AL.E..hD........E-........t.p..-...p..s...s...sG....A.l.cc.l.l.c..........................s.s.-.+Lc-Ls..L-taA-pL...E+p....thtcK.+tT...L.sIRsELpssYc-LRpsap...ss-..ElFsMLTGET.-......................................... 0 70 117 174 +14730 PF14879 DUF4489 Domain of unknown function (DUF4489) Coggill P pcc Pfam-B_28643 (release 26.0) Family \N 27.00 27.00 28.20 76.20 26.50 18.70 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.86 0.71 -4.51 17 32 2012-05-17 11:57:03 2012-05-17 12:57:03 1 2 17 0 15 29 0 140.70 37 79.89 NEW spsIlKCG.ssGusslPssos....hussasluolTlDTsshpsPCl+LEFsSNIsssssh....solNFQlaKpCcsQhsPlPVGPsao.Fut..........hluhhtopoF.SFhVCDC.DhCss-.CCTYSVssTssu.hsssushoIsNAsLuAIss-s ...spslLKCG.ssGu..sslPhsos....husshslAololDTsshcsPslpL-FuSNIsssssh.....thsLNFQlFKpCcsQhhPhPVGPsas.Fsp..........................hlsshsopoF.SFhVCDC.DsCssc.CCTYSVssossu.hsssusssIsNAsLuAlhss... 0 6 15 15 +14731 PF14880 COX14 Cytochrome oxidase c assembly Coggill P pcc Pfam-B_122767 (release 26.0) Family COX14 plays an essential role in cytochrome oxidase assembly. The COX14 product is a low-molecular weight membrane protein of mitochondria, but it is not a subunit of cytochrome oxidase [2]. Orthology-prediction methods have identified the vertebrate C12orf62 orthologues to be orthologues of the yeast COX14 [1]. 25.00 25.00 25.20 25.00 24.30 24.70 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.73 0.72 -4.34 45 167 2012-05-17 14:06:12 2012-05-17 15:06:12 1 2 159 0 124 156 0 57.10 23 37.66 NEW thhsttc+lhDtsHRssVhoLlGhTshuushhsas.....haphhpa.+pp+t......h.tpccpt.t ......h....hucphsDhsH+ssVhollu..hTlhuGhlsshs.....hhphhphp+pp+t......h.ppph................................... 0 27 55 96 +14732 PF14881 Tubulin_3 Tubulin domain Coggill P pcc Jackhmmer:Q9P6K5 Domain This family includes the tubulin alpha, beta and gamma chains, as well as the bacterial FtsZ family of proteins. Misato from Drosophila and Dml1p from fungi are descendants of an ancestral tubulin-like protein, and exhibit regions with similarity to members of a GTPase family that includes eukaryotic tubulin and prokaryotic FtsZ. Dml1p and Misato have been co-opted into a role in mtDNA inheritance in yeast, and into a cell division-related mechanism in flies, respectively. Dml1p might additionally function in the partitioning of the mitochondrial organelle itself, or in the segregation of chromosomes, thereby explaining its essential requirement. This domain subject to extensive post-translational modifications. 30.00 30.00 32.00 30.20 29.40 29.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.21 0.71 -4.81 18 276 2012-10-03 12:11:42 2012-05-18 09:44:05 1 11 230 0 183 277 1 185.30 29 35.18 NEW sPpLTo-oVRYWSDFNRVFYHPRSIVQLN-Y.EL..NSp..lM...P...F...EcWssGE-LFssLDKEHDLLDRDlRPFAEECDQlpGlQlFTGsDDAWGGFAA+Yl-RLRDEYGKpuh.hWsa.ul....psshptptph.h.+.phtt.h.NpARSlppl.us...QuShalPl........s........p........l.........s.........c....s......pS..t.....WasSALlusAlESsTLPoRL ................................................psl+hWSDa.+lhaHP+Sls.lp.........pa..ph.........ssp.............hh.................t.........F.........-s...au..hGpslap...p..s...hp...c...-hh.D.......R...........lRhalEECD...tLQ...GhQllsDhcsuauGhuu..phl.-pLpDEY.......spps....h.....h.....a.s..h...sh................t.ss...t....t......p..t.p....t..........pp....h.h.chl..NsAhuhsplsp.....puo.....lhsPlu.............p.................s.............ss.................pp.a.asSAlhAsAl.-oholPhRl............................................................. 0 53 95 145 +14733 PF14882 GHL12 Hypothetical glycosyl hydrolase 12 Coggill P pcc Naumov D [3] Family GHL12 is a family of hypothetical glycoside hydrolases. 27.00 15.20 27.70 16.40 26.00 15.00 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.80 0.72 -3.87 38 137 2012-10-03 05:44:19 2012-05-18 09:59:44 1 19 28 0 72 140 139 48.70 34 25.22 NEW EYpc+YKcDP+LPSsPsphY..pc.-Wp...uWhsFLGsp..pchYsTht-AppAshpL ............Yhp+Y+cDP+LPusPpp..hY..ps.-Wt...uWhsFLGs...t......t...hY.ohttAptus.t....................... 0 16 28 48 +14734 PF14883 GHL13 Hypothetical glycosyl hydrolase family 13 Coggill P pcc Naumoff D, [3] Family GHL13 is a family of hypothetical glycoside hydrolases. 27.00 27.00 47.50 45.90 23.10 22.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.90 0.70 -5.26 12 474 2012-10-03 05:44:19 2012-05-18 11:51:12 1 3 410 0 61 295 6 291.70 52 48.30 NEW HIDLDYVYDsD.tQps+NLshLlpRlhchtlooVYLQAFuDPDGDGhADAlYFPNRaLPhRADLFsRVAWQL+TRAuV.p.VYAWMPVLuaDL....P.s.sp..ptphlssp....ph.t.....t...cs.......hh..RLSPass+spphlppIYEDLAht.AtF-GILFHDDAhLs.DhE..................hs....th.p...st....u..............s........t.................h..........KopsLlcFThpLsstl+....th....c...Ppl+TARNlaAtsllpPpuEsWFAQsLsshhpuYDhTAlMAMPaMEs..sp.....p.....scpWLtpLlstVcspssuhcKolFELQuhsW+..s......pps.lspspLhsa.hphLQtpGlhsaGYY ........................HlDLDYVYD.s.pQ.-+NlDhLIpRlpDMploTVYLQAFA...DP.......D......GDG...h....l.....c....p.VaFPNRhLPM+ADlFuRVAWQL+TRuGV.p.lYAWMPV..Lu..a..-L.....P.s..ls...c.h..chlsst.......pt.c......scp........Yh...RLSPFcscsRtplt.lYEDLAta.AsFDGILFHDDAlLS.DaEDASssAlpA.......YpphGhssslucIR...ps....sp.....pht............pWsR.................................a..........Ko+sLsDFThELsspV+sh....R...uPplKTARNIFAh.PllpP.-.SEAWFAQNhsDFLc.sYDaTAlMAMPY....hEs....ls........-..u-pWLhp.Lsspl+shPtuh...cKolhELQApsWp..p......pps..I.soppLApWMphLphs.GspsaGYY.................................................................. 0 14 31 46 +14735 PF14884 EFF-AFF Type I membrane glycoproteins cell-cell fusogen Podbilewicz B, Coggill P pcc Podbilewicz B, [1], Pfam-B_25631 (release 26.0) Family EFF-AFF was first identified when EFF1 mutants were found to block cell fusion in all epidermal and vulval epithelia in the worm [1]. However, fusion between the anchor cell and the utse syncytium that establishes a continuous uterine-vulval tube proceeds normally in eff-1 mutants and thus Aff1 was established as necessary for this and the fusion of heterologous cells in C. elegans [2]. The transmembrane forms of FF proteins, like most viral fusogens, possess an N-terminal signal sequence followed by a long extracellular portion, a predicted transmembrane domain, and a short intracellular tail. A striking conservation in the position and number of all 16 cysteines in the extracellular portion of FF proteins from different nematode species suggests that these proteins are folded in a similar 3D structure that is essential for their fusogenic activity [3]. C. elegans AFF-1 and EFF-1 proteins are essential for developmental cell-to-cell fusion and can merge insect cells. Thus FFs comprise an ancient family of cellular fusogens that can promote fusion when expressed on a viral particle [4]. 25.00 25.00 58.70 28.00 18.70 18.20 hmmbuild -o /dev/null HMM SEED 589 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -13.15 0.70 -6.45 4 33 2012-05-18 13:38:09 2012-05-18 14:38:09 1 2 10 \N 29 26 0 393.20 31 92.34 NEW MhlhphlLhhhh.hhl.h..............s.Shh...hCscoh.hpu....psctsusplspThphQhpIGLppThCFhLp..sspc.S...sl..uh.sp.pslLHsLpYEplEpcYPlcppYpFulPcl-osClC-CsuhuDhCsscstphscpt.ssTspp..shCapTYHPsQsstGC....sutpuclCCcl.hpPhps+.YVAh+lpQPhssssapas...hasppuhchap.hscpphps.sstspph..hschtplplth.s.ups.pQLcpGMYahs.psps.hh-ht...INclsE.shcKLGWhR.ptss.casVRsuclplpsAHhlpscNC+sQhphsphsupahhs.pts.s.pcaa.GptVEch.sWlRoV+l.-..souRplpVhpppusslsVtLp.hpossshshhactScLs-FouolplDt+SNRFhNlThhsspGolhGpl..Yp.sss+pssspatFosalG.pspspNsshRIuLPuhIN.GsphlCLpP.pcPs...pElC+hlsFpppALppshl.poWppu..cutCspsNp.sLtsFlu.LNPspWhpsls.......uhhEhhthslclshhlshhhlhhhlpp+slssl ....................h......................................hp..aCsc.s.hpup.....pts.pth.phhph.hphsLppshCh.h.......................ps.lHslph.phEppaPlptpY.FulP.lpssChCcC.t.tp.ss.p....................Chppahst.ts.sC.....s..sphCCtl......sh.ahAh+ltpP.s.hhhhat....at..s.t.h..ht.p.hp........tt.t...h.t.tphplth.s.uts.pph.pGMYa..ppsts...clt...lNEls-sshc+LGWhR..c.ss.ca.VtsuhlhhpshH+shlcNCKtQhahs.hsup..h.......................tch..tt.lp..psWlpsscl.-..to.RphhlsHtcGTslplulp....sp..s...p................shh+stScltsFsGoIhlDpcSNRhhNlThatupGpltGpl..hh.sst.hp.s.ashoh.hu.....phtsps.hlsLPu.ls.tsphlCl.ssp.ss...tplC+hl.a.pps.lc.sh....tpWpth...u.Cspspp.shtshht.h.P..Wh.shp.......shh-hhhhshclslhhhh........hh.h............... 0 14 16 29 +14736 PF14885 GHL15 Hypothetical glycosyl hydrolase family 15 Coggill P pcc Naumoff D, [3] Family GHL15 is a family of hypothetical glycoside hydrolases. 27.00 27.00 27.00 51.50 26.80 26.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.52 0.72 -3.77 16 26 2012-10-03 05:44:19 2012-05-18 17:04:23 1 2 26 0 10 26 0 76.50 32 17.93 NEW hssussa.ttpsh.s.psssaau+chsGppl..pWpGast+aphhshs.....ssaRttWVstlsctlp..ss......saDGVhhDNsshs ..........pus.a.htpph.p.pptsWhA+cssGctl..EWp.s.YstHaQhtVWs.....ssYRttWlcpVsptht..so......saDGVhADNDlh.. 0 3 6 9 +14737 PF14886 FAM183 FAM183A and FAM183B related Coggill P pcc Jackhmmer:Q6ZVS7 Family The function of this family of metazoan sequences is not known. 27.00 27.00 27.10 27.10 26.80 26.40 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.87 0.72 -10.63 0.72 -3.52 36 120 2012-05-22 14:57:57 2012-05-22 15:57:57 1 5 77 0 86 127 0 103.70 30 53.97 NEW plh+EhlcKEp+p...+lhsp.FslsPhpph..slTpKP.sph.......................s.tc.ppstphhphhp.....ctpppPpcKash.PhTpuQEhGWhspshhp..........tpcpphpas+pps-lTpas ...................................................................tlhpEhhhKE.+p...+lhsp.aplsPhp.........phh..slTcKPhuhp...................................................ssh-t.ssscaLphl+.....+sspsPpcKYsh.PpTEuQElGWhspsllp.t................ppcpphpa.+hps-lThah............. 0 40 49 66 +14738 PF14887 HMG_box_5 HMG (high mobility group) box 5 Eberhardt R re3 CATH:1l8y_A_00 Family Nucleolar transcription factor/upstream binding factor contains six HMG box domains. This is the fifth HMG box domain in these proteins. This domain has lost DNA-binding ability [1]. 25.00 25.00 25.00 25.00 24.50 24.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -10.05 0.72 -4.20 4 102 2012-10-02 14:16:02 2012-05-23 15:39:34 1 15 38 3 41 95 0 82.70 69 13.76 NEW uKLPEoPKTAEEIWQQSVIGDYLARFKsDRsKA.KAMEuTWpNMEKKEKIMWIKKAAEDQKRYE......REL.EMRossAus.suuKKhKF ................uKLPEoPKpAEEIWQQSVIGDYLARFK.NDRsK.A.LK.AMEhTWsNMEKKEKLMWIKKAAEDQKRYE......RELSEMRuPPAus.sSuKKhKF.................... 0 2 7 23 +14739 PF14888 PBP-Tp47_c Penicillin-binding protein Tp47 domain C Coggill P pcc CATH:1o75_A_04 Domain Domain C is the largest domain in this unusual penicillin-binding protein PBP), Tp47. This domain is mainly characterised by an immunoglobulin fold with two opposing beta-sheets that form the typical barrel-like structure. In contrast to the classical immunoglobulin fold, however, this has an additional beta-strand inserted after strand 3. Also, the strands are connected by rather large loops. Helices are inserted between strands 2 and 3 and between strands 4 and 5. Domain C interacts with domain B via a surface that has a slightly concave, goblet-like shape. Tp47 is unusual in that it displays β-lactamase activity, and thus it does not fit the classical structural and mechanistic paradigms for PBPs, and thus Tp47 appears to represent a new class of PBP [1]. 25.00 25.00 47.30 45.40 24.70 17.20 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.07 0.71 -4.63 14 25 2012-05-24 12:59:46 2012-05-24 13:59:46 1 11 25 2 2 24 2 159.30 51 20.57 NEW ssTNGLKTAsKsuDGoFoFSAR.ssGTpSGlKDpsLKsA...sslsssVKpAo..GSYGEFLRVDLsGs.YGsLGAsMQAVKWTYYGsDSTYTpslAoYGTKFAADNWMHKuMGIQLGLTDSlRCpLPtGTDGTGYWolTVYALGYsDhTapFpATcpNIVKspsssss ..NTNGLKTAhKppDG.aoFSAR.psGotSGl+DtslcTA...oshpspl+suu..GSaGEFlRVD...Ls.G-.YGDLGuNhQuV+WsYYGDDuTYTsshASYGTKFAADNWMHKuhGIQLGLTcShRCpLPcGsDGTGYWpLTlhALGYpDsshcFpssttNlsp.t....pt..... 0 1 2 2 +14740 PF14889 PBP-Tp47_a Penicillin-binding protein Tp47 domain a Coggill P pcc CATH:1o75_A_03 Domain This is the first domain in this unusual penicillin-binding protein PBP), Tp47 is mainly composed of beta-strands and is sequentially non-contiguous. The first three domains in Tp47 interact with each other through intimate domain-domain interfaces. Domain A contacts domain B through its N-terminal segment. Domain A also interacts tightly with domain C, Tp47 is unusual in that it displays β-lactamase activity, and thus it does not fit the classical structural and mechanistic paradigms for PBPs, and thus Tp47 appears to represent a new class of PBP [1]. 25.00 25.00 55.60 49.50 19.40 18.50 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.13 0.71 -4.93 13 26 2012-05-24 14:39:44 2012-05-24 15:39:44 1 12 25 2 2 25 2 163.80 45 21.76 NEW ptpsEYsYVYAGLTWuEYWAuEGVYAAGDTSSSsptDSHsEhDKGAFDsVTRATsNHGLHRGSaQ..CtAsIhscsGs..pYplSaWss.........ssphlLTDGos.loas.....RGs...ITpsDGootphscYcVsGlKYVPVKVKouDasAFcucYsVVENGupLsGGauENsLpSYp ........u.pcEapYsYAuLoWu-YWAuEtV.uAs.ssuuss-tDp+sEhDtGuFDsVoRATsNHGLHRGSFQssshlhuc..cuh.......saslptWpt.............tsp.slTcssp.sohs.......................Ruo.hhsDupphclspYcVpGhKYVPVtVtspDhsshKpKaplVEsut.L.GGauEtpLpsYp............ 0 1 2 2 +14741 PF14890 Intein_splicing Intein splicing domain Eberhardt R re3 Jackhmmer:D7E146 Family Inteins are segments of protein which excise themselves from a precursor protein and mediate the rejoining of the remainder of the precursor (the extein). Most inteins consist of a splicing domain which is split into two segments by a homing endonuclease domain. This domain represents the splicing domain [1]. 26.50 26.50 26.60 26.50 26.40 26.40 hmmbuild -o /dev/null --hand HMM SEED 323 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.82 0.70 -5.54 5 448 2012-10-03 10:25:13 2012-05-25 15:14:27 1 159 235 3 197 474 68 305.80 19 35.05 NEW DSLVpLsDsGh.VsIK-LVGcs-FsVlAlNpcThKLEoApVo+sFsTGsKPVFcLKTRLG+oI+ATANHKFLTI-GWKRLDc....LsssppIuls.........................................................................................................................................................................................................................................................................................................................................................................................E.htplushsQs..l.thspocIYWDcI.lSIpPsGVEpVYDLTVPsLHNFVANNIIVH ............................................................................................................................................................t........................................................................................h.t..h...h....p....s..p....lhc.....lpht.Gtpl.psTssH.hh..s...h.p...........s...............h.....p.........l....tp...........Lp.G-h.lshs....................................................................................................................................................................................................................................................h....h.................................t....t.......h....h.......huhhhu....-...G....h......................t............................................h..............p.....................h.t........................................................................................................................................................h..........................................t...t.....h...Pt.lh.t.........h..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t.....................................................................l......h.h..p.....s.c..h....h....a......c....pl...h...pl.p..............s....p.....VaDh.p.l...t..Hsal.uss..h....................................................................................................................................................................................................................................... 0 47 107 153 +14742 PF14891 Peptidase_M91 Effector protein Eberhardt R re3 Jackhmmer:D3QNI8 Family This family of proteins contains an HEXXH motif, typical of zinc metallopeptidases. The family includes the E. coli effector protein NleD, which cleaves and inactivates c-Jun N-terminal kinase (JNK) [1]. 25.00 25.00 25.10 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.19 0.71 -4.20 13 118 2012-10-03 04:41:15 2012-05-28 13:09:29 1 14 96 0 40 155 31 175.40 23 37.51 NEW ssstlphcGo-.cFtc+lcAuLD+luSosTGcphLpsl.....pohsp....s+pcclsIsEtsscpsssspsshstp..............................GsssplshNPs.p..ht.s.tts.hp.s...t.ssslLhHELhHsachLsGop.....tsp...s.....spssssst-EtpAVGLstasa-..tps...............hoENulR-EhGhP+RspY ........................................................................................................................pss.p..phhpphpssLphltss..sGpthLptl.........pthtt..........tpcplslp...t.s.p.ss.s..ttshsht.ht.......................................stGsssplphsss.......ht....s..sssshp.....t.t....t..hssLhHELlHAhchhsGsh.....................tsch...st.................pptsshsppEhcsVGLst....aspp.................................................hoENtlRpEhGhshRppY...................................................................................... 0 19 23 33 +14743 PF14892 DUF4490 Domain of unknown function (DUF4490) Eberhardt R re3 Jackhmmer:Q5BN46 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 101 and 220 amino acids in length. In mice, a member of this family whose expression is induced by p53 may play a role in DNA damage response [1]. 27.00 27.00 30.40 29.70 26.30 22.00 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.31 0.72 -4.39 11 94 2012-05-28 14:49:04 2012-05-28 15:49:04 1 1 70 0 62 110 0 94.10 31 61.20 NEW hpppssctptp.hps+sss...+TS-hY+lscsLPpR..FsNPspF+GYus.ps.ssshYRTSNppYGphsPTsHEhPpsFaPpspKFSpclstuGMaRssuLNThl-KSh ........................................................................pptphh...tp..l.tR..hpsP..F+GY...ts.pc..ssshY+ToNpsYGth.sP.......osaphPppaaspsppFS............pplstsGha+sssLNshh-cp.h............... 0 31 36 49 +14744 PF14893 PNMA PNMA Eberhardt R re3 Jackhmmer:Q86V59 Family The PNMA family includes paraneoplastic antigens Ma 1, 2 and 3, found in the serum of patients with paraneoplastic neurological disorders [1,2]. The family also includes modulator of apoptosis 1, which has a role in death receptor-dependent apoptosis [3]. 25.00 25.00 25.00 25.00 24.70 24.90 hmmbuild -o /dev/null HMM SEED 331 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -12.15 0.70 -5.42 13 255 2012-10-02 13:37:57 2012-05-29 14:18:52 1 13 30 0 128 295 0 253.30 30 58.05 NEW M....ALsLLcDWCRGMslNs+RuLLIlGIP--CuEsEhpEuLpAuLtPLGcYRVLG+hFRRE-NuKAALVEluptlNhoLlP+cIPG+GGsWcVVF+P.ssDsEF..Ls+LscF.LcsEGpsh..p-luRsLGssstsssst.s....p...Wscslupsl..shQPhh.EshtYpcL+lFSG....p-pPusGEEoFEsWL-Hss-hlph...................Wp.VSEpE+RR+LlESLtGPALcllpsLhtpNsshost-CLtALtplFGsp-sptsspl+aLsssQcssEpLpA....aVlRLEsLLQKAVcKuAlp.sssssQsRLcQVLutAphspsLps.......+L+hhphcppP.PuFLpLL+ .............................................................h.hLp-WC+..ths.p+shhlhGIP.tht........-hptslp.sh...s.aplhthhhhpp.t...pssllphstthshshlPpch..GpG..GsWcVlhps.s.-s-h..hpplp.F.Ltt-G.s.h...tshst..hlt.ts...s....t.........hsc.hs.h...h.ts.pshh...hhh...pphp...lFSG....pt.st..t.cEsFc...sWh...pssp..h..l.........................Wp...h..s-.E+h++lhcsLtGsAhpl.hphlttpNsths.stphLpuht.sFGs.-s.....hshph+hhps...Qt.sEphps....alhRLEshLppshppssht..ptss......psRlp..phh.tu.hst...........phc....h...h....t......ttt..Pshhthh........................................................... 0 32 35 40 +14745 PF14894 Lsm_C Lsm C-terminal Eberhardt R re3 Jachkhmmer:Q8ZVU2 Domain This domain is found at the C-terminus of archaeal Lsm (like-Sm) proteins [1]. 27.00 27.00 63.20 62.30 20.90 17.00 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.65 0.72 -9.02 0.72 -4.46 14 43 2012-05-30 08:12:54 2012-05-30 09:12:54 1 3 42 28 23 37 0 62.80 46 43.66 NEW h+EFA-hlp+...lhPshVKlh-EsslVhV.-+l+VoEpGV.EGoGPhApRlhclacEYlcp+Kc ..REFA-hlpKphtlhPu.VKlhEEssVVhlh-+l+VScsGV.EGSGPhApRlhcLYp-Ylpc+K+.. 0 6 11 16 +14746 PF14895 PPPI_inhib Protein phosphatase 1 inhibitor Eberhardt R re3 Jackhmmer:Q96LQ0 Family This family of proteins interacts with and inhibits the phosphatase activity of protein phosphatase 1 (PP1) complexes [1]. 25.00 25.00 27.10 26.00 22.60 19.50 hmmbuild -o /dev/null HMM SEED 347 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.40 0.70 -12.13 0.70 -5.71 17 94 2012-05-31 15:19:19 2012-05-31 16:19:19 1 3 58 0 59 99 0 304.60 35 76.71 NEW asDLpc..AcERYIpTNG..............aKFLRTlsQEEElIFRptFsRscsoaDt........DTVllsDlRDLVLFhMPcEFLo..hKFlpFMHpPsVaRLLHuLlIYFEYaLRhVEFlLIRRDELuGphuQlQSEQTN-MKRlaShYLSQYRhLVARNYshIlpGEGDhscaYHh....KcllNI..SuTI+D+hFHEQFLAVuTQIVWIsMHRRAYhlIEMEMNRLFRSEHFlhsR.EYL.........cFTssERSLLYGRssKlhNYRsQhSPLlQELpplscEDhPILWIGERKYRGoDhRIsplELEYlVPGsQL+hIDVuHGILGHPKpLYN..TlL............sLDWPuVRauNaS.paDPYallRQPpLcIPpIs-hphRKh .......................................................sh.p..hcE+hh.spu..............h+Fhch.s.tp-hlhcpth.st.tpsp.s.................sslslpDl+.lsLhLh.pp.....hp..hpF.hpFM+p.slcphLhALlhYhpaaLchsph......Ehpsp...h.lt..ppp-hchhhSh...LpthphhlAppYshllhG...u.hschaHM....Kp..pI..SsT.+DhtFaEpFhshss...lsWIsh+Rpta.p.I..............E.Ehs..............RLFRochFshs.Rtch.....................ch..T.sphph.h..hu........+pst.lh.p.......hhs.pSPllppLh.s.cEch....lsc+KY+tssh+lsthp.c.hlsshspl.h.sV..GILGcP+pLas..TlL............sL-..t....shp...hc...lhcpsthpI.ph.th.h+p...................................................................... 0 16 18 37 +14747 PF14896 Arabino_trans_C EmbC C-terminal domain Bateman A agb Pfam-B_4670 (release 7.5) Domain Arabinosyltransferase is involved in arabinogalactan (AG) biosynthesis pathway in mycobacteria. AG is a component of the macromolecular assembly of the mycolyl-AG-peptidoglycan complex of the cell wall. This enzyme has important clinical applications as it is believed to be the target of the antimycobacterial drug Ethambutol [1]. This domain represents the C-terminal extracellular domain that is likely to bind to carbohydrate [2]. 25.00 25.00 28.20 27.20 18.40 24.40 hmmbuild -o /dev/null HMM SEED 388 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.30 0.70 -5.87 7 454 2012-06-06 15:10:05 2012-06-06 16:10:05 1 7 175 1 94 412 0 346.80 41 34.95 NEW EluShstGhVtpYPsYosGpANltALsu....ssCuhADDVLVEsDsNAGMLpPlPut.aGs.GPLGGhsslGFsPsGVs-clpu-PVhppPGpsso-s..sps.ls.u.pssT.uGshussGlNGS+shLPaGLDPsRTPVhGSYsps.p.sAphoSAWYpLP....spst..s+PLVVVoAAGtIhuhp..tsh.hGQolcLQWusscPsGsh.PhuplpPhDl.GspP.AWRNLRaPLo.hPs-AsssRlVApD.sLosppWlAhTPPRVP.LpTLQphlGSpsPVhhDhtsuhsFPCQRPFscphGVsElPcaRIlPDhhtttssSshWpsutsGGPhhhhphLLRsoslsTYL+cDWaRDWGSlc+ah.lVP.DttP.Atl-.GohThsGWhpsGPlRhh ..................................................................................lsShst.uhltQYPsaSsutuNlpAl....sG....ssCulA-DVLVEsDsNsGhLpP..l....s..u...p........h.....us.....................s...L..........u.u......s.......s.....s..hGFsPsGls.....p.clp........u.-........s...............hh.....t.......ps......u..............ts.s.s..............s..s.........s....ts.............s........sts....u...s............T.........s....G.t.t...ussGlN....G.Sp.stLPasLDPu+sPVhGSa.sp....s..........p...........AplsSuWYpLP.....tp.st......stP...L.lV.VoA.AGplts.hc........t...h......Gp...pl......hlpauh......s...t.....s...s..G....s...h...p....s.h.G.p....h......hD...l..G.s..........tP...uWRNLR...hPhuthPscAsslRlVApD.sLsscpWlAhT.PPRlPpLcoLpphlGSssPVL.lDWsVuhsFPCQRPhsc.t...GVsE..lPc..aRIhPDt.sttssoshatsttsG.GshGhs-hLhcspslsoYLpsDWhRDWGulpcap.hss....stt.....P..Ap..lphGohT+oGhWpP.GslRh.h.................................................... 0 15 64 85 +14748 PF14897 EpsG EpsG family Bateman A agb Jackhmmer:B0MX13 Family This family of proteins are related to the EpsG protein from B. subtilis Swiss:P71056. These proteins are likely glycosyl transferases belonging to the membrane protein GT-C clan. 27.00 27.00 27.00 27.30 26.90 26.90 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.72 0.70 -5.51 270 873 2012-10-03 03:08:05 2012-06-06 16:27:04 1 2 639 0 156 806 120 316.40 15 89.49 NEW hhhhhhhhlhlhhhsuh+h.ss........s..Dah...sYh.phapp.ht................................hthE......Gahh..lshlhph.lsts......hthhhhlhuhlhhh..hhhhhlp+hsp..........hhhshhha..hhhhhh.h.....hRQslAhu.l.hhh...u.ltalh.....c+c.....hhtall....hlllAshF.H.tSu...llhlsh....hhlhp.hchp..........................hhhh.....................................................................................................hhllshh...........lhhhhhhshlhphhs.h...................phstYhptsthstsh.........hhthhhhhhhhh..........hhhhhhhhtppht.....................phhhhhhhhsh.hhhhlhhhh............hhhsRlsh......aahhhhh......hlhshlhp.............hhptp...ppp..................h..hhhhhllhhhhhhhhthh.h................hhPYps ..............................................................................h...hhhhhhhhhhhsuhRh...th........us..Dhh..sYh.pha.pt.htt.................................hth.E....ua.hh..l....hl.h.ph..h..s.ts.......h.hhhhlh...sh.ls...hh....hhh.......hhhtc.h.spt................hhlslhla....hhhhhh.hh.ph.stl.Rpsl.Ahu..l.hhh.......u.lh..hlh.............pcc......hhphhl....hlllA.sh.F.H.hSu.....llhl.h..........hhlhp...hphp.................hhhh.....................................................................................................hhh.h..hhh.........lhhh...hhh.s.lh..shhs..h..............h.t..............phshY.hptt.tttth........................hhthhh.hh.hhhh........................h..hhhh.hhhttpht........................thhhphhhhsh.hhhhhhh.h..........hhhsR...hsh..hhhhh.h..hhlshhlhp..........hhtpp....tpt..............................h.......hhhhhllhhhhhhhh.hh..................................................................................... 0 50 103 133 +14749 PF14898 DUF4491 Domain of unknown function (DUF4491) Bateman A agb Jackhmmer:B0MXX8 Domain This family of proteins is found in bacteria. Proteins in this family are typically between 94 and 107 amino acids in length. There is a conserved EYY sequence motif. 27.00 27.00 42.80 42.60 21.70 20.20 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.11 0.72 -3.84 40 168 2012-06-07 11:57:47 2012-06-07 12:57:47 1 2 159 0 29 132 11 92.20 53 89.23 NEW hsapGllIGlsoFlIIGlFHPlVIKuEYYFGp+hW.lFLlhGIsslssSLhlpsllhSulLGVlGhSsLWSItELaEQc+RVcKGWFPcNPKRK ....phsGllIGlsTFLIIGlFHPlVlKuEYYaGo..+.sWhlFLllGIsslluSLhlcslhlSulLGVhuhSshWoItElFEQccRVcKGWFP+NPKR+...... 0 18 27 29 +14750 PF14899 DUF4492 Domain of unknown function (DUF4492) Bateman A agb Jackhmmer:B0MUW2 Family This family of proteins is found in bacteria. Proteins in this family are approximately 80 amino acids in length. The function of these proteins is unknown. 27.00 27.00 35.40 35.20 21.80 19.50 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -9.02 0.72 -4.47 47 245 2012-06-07 12:06:30 2012-06-07 13:06:30 1 1 244 0 25 127 0 64.10 55 83.98 NEW hh+lachYhDGFRsMT.LGKTLWhlIlIKLFIMFslLKlFFFPshLppp..tscp.p+.usaVtppLhp .....pIFsFYhEGF+shT.LGKTLWtIIhIKLFlMFhILKLFhFss.hsoh..psDpEKusFVhcpLh.... 0 11 22 25 +14751 PF14900 DUF4493 Domain of unknown function (DUF4493) Bateman A agb Jackhmmer:B0MWC6 Family This family of proteins is found in bacteria. Proteins in this family are typically between 264 and 710 amino acids in length. Many of these proteins have a lipid attachment site suggesting they are lipoproteins. 27.00 27.00 27.30 30.50 26.60 26.90 hmmbuild -o /dev/null HMM SEED 235 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.71 0.70 -4.74 66 146 2012-06-08 12:54:42 2012-06-08 13:54:42 1 5 36 0 25 139 5 229.40 19 45.16 NEW pspGhLp.....Lslsss..............ssstT..+ussstt.t.......ssssatlpIhsss....sthhpptphsphs......ttlpLssGs.YslpAp.hu..-ssss..uhct...PaY.tGpp.shslpp.spsssss..lsCpluNstlslsa....sps.htshF.ss.....aplpVssu.........ssslsa.......tscs...cssYatssp.........pl..phslpuspp.s.s...spstph...................pslpsssph.plshshs..................psGss......slslslsps.spshshslplssph ......................................tpGhLp.....Lslssp..............ssstT.+usspt.................phpsaplpIhsss.........sslhp.sshschs......ppltLssGs.Ys.lpAh.hG.......-ssssuh-p.....PYY.tGpp..shslpp.spss.sls..lpCpluNstVolpa.........scs.htphF.ss.........aslsVssu..........ssslsa......ttsps...cssYapssp...........pl.phslpuspp.s.s....spptsh...................pslpstppa.plshshp...........................psuth......slslsl-ts.hpphshshtls........................................... 0 5 20 25 +14752 PF14901 Jiv90 Cleavage inducing molecular chaperone Coggill P, Hetherington K pcc Pfam-B_1192 (release 26.0) Domain Jiv90 is a fragment of the DnaJ protein in eukaryotes and in J-domain protein interacting with viral protein (Jiv) located in the N terminal region of the pestivirus viral polypeptide. The viral protein interacts stably with non structural (NS) protein NS2, causing a conformational change in NS2-NS3 and stimulates NS2-NS3 cleavage in trans. Cleavage of NS2-NS3 increases cytopathogenicity and consequently aids viral replication. Jiv therefore acts as a regulating cofactor for NS2 auto-protease. The efficient release of NS3 from the viral polypeptide by Jiv is considered crucial to the pestivirus cytopathogenicity [1]. In eukaryotes, it usually lies 40 residues downstream of DnaJ family Pfam:PF00226. However, the function in eukaryotes is still unknown. 27.00 27.00 27.90 27.90 26.20 16.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.33 0.72 -4.00 20 161 2012-06-08 14:47:20 2012-06-08 15:47:20 1 17 91 0 88 157 0 93.00 51 11.91 NEW puupsItCspCsshHhthtTc+.stupARaCpcCphhHsA+-GDlWsEsshhGhha.........+hYsCh-upVYDIT.............EWAsC..Qthth.......psNTHpVpa+lshs .....EshNThhCo+Cts+Hh.RhphcR.p.tsARaCs-CsphHsAc-GDhWAEoS.hh.GL+h.........pYFAhMDGKVYDIT.............EWAsC..Q+sul.......sPsTH+VsYHIshu.......... 0 20 36 62 +14753 PF14902 DUF4494 Domain of unknown function (DUF4494) Bateman A agb Jackhmmer:B0MZU7 Family This family of proteins is found in bacteria. Proteins in this family are typically between 154 and 172 amino acids in length. There are two conserved sequence motifs: VDA and EAE. There is a single completely conserved residue E that may be functionally important. 27.00 27.00 77.50 77.40 23.30 22.20 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.14 0.71 -10.69 0.71 -4.57 28 161 2012-06-11 12:23:02 2012-06-11 13:23:02 1 1 137 0 32 137 51 139.90 55 85.59 NEW WFEsKl+Y-KshE.sG..........hpKpVoEsYlVDAlSFTEAEspIhEEMusal..o.GEFclssI+pAsYuElFFs-h-ssD+....................WaKuKlpFITlDEKotKEK+ossshLVQAsolppAlcpl--sMu..sThlDYsIsulsETpIMDVF.ap .WFECKlRYEKshE.sG.............hpKKVTEsYLVDALSFTEAEuRIIEEMosaI..S..GEFsVosIK+AsY..uElF...S-t-suDR....................WFKsKLhFIT...lDEKS..G....tEKKTssphLVQAssl+-AlK+l-EsMc..uTMuDYpIuulsETsIMDVaPY...... 0 12 28 32 +14754 PF14903 WG_beta_rep WG containing repeat Bateman A agb Jackhmmer:B0MSK5 Repeat This repeat contains an N-terminal WG repeat motif. The extent of the repeat is poorly defined. This repeat may form a beta solenoid structure (Bateman A pers. obs.). 25.00 11.50 25.10 11.50 24.80 11.40 hmmbuild -o /dev/null HMM SEED 35 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.34 0.73 -7.86 0.73 -3.53 486 4416 2012-06-11 14:25:50 2012-06-11 15:25:50 1 86 508 0 1201 4277 336 36.40 27 39.14 NEW aGh.....lsp..pG......ch.l..l......s.s.p...Ycp..............h.....t.s...a...p..s....u........................h.shV..............p..p..s................s.....................................c.........a ...................aGa..lcp.pG.......ch..l..l......s.s.p........Y-p....................................s.........t.s....F....p..p......G..........................................h.AhV..............p.....t...............t......................................................................................... 0 586 987 1110 +14755 PF14904 FAM86 Family of unknown function Coggill P, Hetherington K pcc Jackhmmer:A6NEL3 Family Function of this protein family is not known. 23.50 22.10 23.50 22.10 22.90 21.90 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.28 0.72 -4.26 3 140 2012-06-11 15:45:07 2012-06-11 16:45:07 1 5 47 0 46 98 0 79.80 52 31.85 NEW MAPEE+AGAspLLQGFERRFLAARALRSFPWQSLEEKLRDSSGSELLLDILQKTVKHPVCVKHPPSVKYSRCFLSELIKKHEAVHTEPLDcLYEALAEVL .................................ttt.lLpsFpR+FLAsptLtoFPWp...S...LEtKL+...-SSs........S..E...LLp...DI.L........p.KT...V......tHPlCl+aPPSl+YtppFLoELI+KpEusth-slDpLYcsLsph.................. 0 19 21 28 +14756 PF14905 OMP_b-brl_3 Outer membrane protein beta-barrel family Bateman A agb Jackhmmer:B0MXY4 Family This family includes proteins annotated as TonB dependent receptors. But it is also likely to contain other membrane beta barrel proteins of other functions. 22.60 22.60 22.60 22.60 22.50 22.50 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -12.97 0.70 -5.81 316 2158 2012-10-03 17:14:37 2012-06-12 10:57:21 1 25 394 0 605 5700 1826 472.30 13 60.34 NEW s..tp...h.p...p......p...s.p...s..p...sp.s.p..s..h..sh..phuh..s.Yp..l......ss.....pp.sluh....phshp...hst..tpt...ps...................sspsp..........................tth..............ths..pppppp.ptppt..shshsltYptphs.p.sp..p..lsh..sh..s..a..sht..ps..ss.......ppp..hpp....p..h...............t....ppph....t..sps...pp..p.phhssplsaspsl.s..c......t..plphG.hchshspsp.ssh...h....t.hp..............s.t...th....................................................................s..........s..ssp.......hphpcphh..usYssas...t.p.h......s..+....h.sh..psGlR....hE..h....sp..h......................ph...pp...........t..h.t.p.......p.....p............psa.hs..hh..Pohs..lsa...phspt...p..lplshspchhppPshtpLss.hhp.ht.s.s.hshppGNPtLcPp..hspshpls...a....s.h.....c..pt........h...phs....hsh..pa..s...pst...h.........th...s..h.h...........pt..........s.p..ss..........................................hh..hhp.tN..h.spp.pph..shslshsh...ph................sph...hphs...........h...shshhht.p.p....s.................s....st..hs.................................................................................hs.....h..........sp..h...s....hth....ph..s..s..s..............................................hp..l..s.....p..s.hphphsht..ap...sps..hps....................p.........................................h...t....h.psh.hh.hshulp+shhc.c+.hslslpssDl....Fssp...p.pt..tpht....hss...hhp....p.....ph...pspt.hhlslsY..pF..spt+ .....................................................................................................................................................................................................................................................................................................................................................t......................................h.....phthp.a..p..h......s.p.......p..tl.th.......th..t.h.t..htt....pt....t..........................................t..t........................................................tttt..t.......pt...thp.hsh..h..p...t.....p.h........s.......tt.....p....l..s..h.........sh..p......h...th...pp...pp...............pp........htp.........h......................................................t..........pps.................pt..p..t....p...h......t.h.p....hs....a....p......hs......h...s........p..............h....pl...ph..G...hp.h.p...h..p...p..pp...s.s.h........h.....ht...............tt..................................................................................................................................................................................................................s.......t.......t...s.s.p...........h..p..h...p......p...p..h....h....shah..p..hp.....h...p...h...........s...c..........h..s..h....ps..G....l+....h-..........s.p...h.....................................................................ph....p...................................t.t.........p......p...............................psh...h.....p......hh....P..sh...p..l..p..a....................phspp....p....lp..hs.a..sp......p..........ptPs....h........t.........p.........L.........s.............s.......h...........h.................p.......h.............t.........s..........s.......h..........p..............h............p.........t.............G..............N............P......p......L..cP.......p...hspshpls.........a.....p..a................p..pt..........h........hs......hsh...thp........pst...........................h....t...h.h...........................t..ss..............................................................................................hh....hh..p.....t.....N......h..s......p.s...pp.....h....s..hp..hsh..sh..ph...............tph.....h...p.h.p...........h...sh...s...h...t...h..p..p...p....s...................p.......t..................................................................................................................................................................p..........p..t.....s....h..th.........ph.....s.sp...............................................................................................hp...h....s........t...s......h....p.h.p.....h..s...ht.....hp........stt...h.s.....................................................................................................h..h...........ps......h.h...h........shs..lp.h..p.......h......h.p....p........p....h.....p....l.....p..l..p..sp..s..l........hp......pt.......h..h.......p..h.......s.t...............p.............................tp.....hhlshpYpht.................................................................................................................................................................................................................................................................... 0 234 513 586 +14757 PF14906 DUF4495 Domain of unknown function (DUF4495) Coggill P pcc Jackhmmer:Q8IV33 Family This domain family is found in eukaryotes, and is typically between 322 and 336 amino acids in length. There are two conserved sequence motifs: QMW and DLW. Proteins in this family vary in length from 793 to 1184 amino acids. 25.00 25.00 28.60 36.10 21.80 17.90 hmmbuild -o /dev/null HMM SEED 322 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.16 0.70 -5.48 10 82 2012-06-12 11:20:08 2012-06-12 12:20:08 1 3 65 0 58 84 0 292.90 43 32.31 NEW scFVpllsKsAsplLE+L+plupEslc+AsLssL.....pusLuuAuhV+NpLhpYsphhp.......ssscpshlphSYcpYpEhsEuLsEpllDhHsRlluhhILQDADShaW-s.+sFaEGERCSFsIQMWaaYhpuh+pDLWshlPPKhAQ+lhAuMLp-SLolLosRYoQupPShsRosQlhlDIsslLLCosphLhSlCsoupchlss........p.sspSKIh+.IHs+CspLhssLllcuoPLssLYKlF++Gl-.shs....hhpspttpPs.Wlhhh...hPsLhp...........hchoslssshAlplpLKlLLuQPpssWsLLL+lLLM+cshls+lLL+pohps ..........................s.pFVphssKsAstlLp+LpchupEs.s.+AsLpsL.....pshLusAshVhpphtpYpphhp.........pspKpshhhlshppYpEhhpsLt..plhDhpsRlhuh.ILQDA-ShHW-Dh+sFaEuERCSaolQMWaaahhuh+pDLWshlPP+hAQcIhuphLpcoLulLssRYspspPShtRosQl.hhDlsslLlCstphL....hulCpSs..pthls...........p.tspspIh........+.IHs+CppLhssLllhsuPLs...LY+sFpcGhc.shp.......hpshht.PhhWl.hh...hPphht............................ss..ssphshp.hpL+lLLupPtssWsLLLcsLLh+DshLhpIll+p.h.t............................. 0 15 19 38 +14758 PF14907 NTP_transf_5 Uncharacterised nucleotidyltransferase Bateman A agb Jackhmmer:B0MX79 Domain This family is likely to be an uncharacterised group of nucleotidyltransferases. 24.60 24.60 24.60 24.70 24.50 24.50 hmmbuild -o /dev/null HMM SEED 249 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.51 0.70 -5.04 171 654 2012-10-02 22:47:23 2012-06-12 14:18:15 1 9 531 0 265 712 105 226.80 16 64.96 NEW ssh-WpplhphAptpplhshlhpslp.phs...........................hss...t....tlht.ph...ppt..hptsttpshthttchtclhphh.ppp.sIpslhLKGhsl..u.p.hYs..sst.h.....RthuDlDlLl........pc...hp..c...........................s..pp..lL..tph.Ga.p..........h....p..t.....p.......................sp.p.....................................cth.a.h.p...........................ps..hh...lE..lH.ap....lh............t.t.h.........ph.h.....p.....p.h..htp...................ht..h...ph..ssps...l.........hshs..spchhla.hhhH.h.h.c...H..h.h.......t..........tth..sL.RplhDl.thh.lpp.........h.pp.h.........-..apt....lh.pphpchshtc.hhhhshtlspphhs .................................................................................................p.......hh.....tth.shh...h........................................h..................h.....tt..h.tthtplh.phh.ppt.slthlhlKGhsh...s..t.has...p.t.h.............R..hsDlDlllt.....pc..htp...........................s.tp.hL.tph.Ga.p......h..pt....t......................stp...............................................p..hhht.............................ps...hh...l-..lH.ap.......lhtt.............h.........t..h.....s.....p.h..htp...................tp..h..pl..sstt...h.........hh.hs.spphhl..h.h.hhH.h...h..p...c...h.h.t............t..tl.p.lhDlth.h.hpp.........h...p..h....................................s..hpt....lh.thhpphth...h..hh.h........................................................................................... 0 93 196 230 +14759 PF14908 DUF4496 Domain of unknown function (DUF4496) Coggill P pcc Jackhmmer:Q6ZN84 Family This domain family is found in eukaryotes, and is typically between 134 and 154 amino acids in length. Proteins in this family vary in length between 264 and 772 amino acid residues. 28.50 28.50 28.50 31.20 28.40 28.30 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.37 0.71 -4.29 29 151 2012-06-13 13:08:07 2012-06-13 14:08:07 1 19 72 0 104 143 8 135.60 24 18.56 NEW lpplWpslspalpcplttp+uVplsshGsFohph.p.th.s........................tcPsFllsccFhppaslpttp........t...........sptssspl.sasplu..ttsshs+-.hlcsslccllptluctlp..stpsl.plsh.ulGpLph.+spplphpF .............ptlWtslupalpcpLthp+GVpIsshGsFohpppphphsstph...................hhhp+PlFlhsccasptasLpps+h..........s...............................schshhtl.Nastlu..hts.s..hs+c..slcsslcchlthluctlp..................ttpsl.phsh.ulGhLhh.+spphphpF................................................ 0 55 65 85 +14760 PF14909 SPATA6 Spermatogenesis-assoc protein 6 Coggill P pcc Jackhmmer:Q9NWH7 Family This domain family is found in eukaryotes, and is approximately 140 amino acids in length. The family has similarity to the motor domain of kinesin related proteins and with the Caenorhabditis elegans neural calcium sensor protein (NCSâ€2). 27.00 27.00 27.20 27.10 26.50 26.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.66 0.71 -4.13 17 142 2012-06-13 13:52:55 2012-06-13 14:52:55 1 3 68 0 70 133 0 124.80 47 36.03 NEW tl-Lpl+AVT..CPGVaLss+sclYLslplhGpahcTpshPshFPlLhp-+hpFEKsFhsssssupls-hLcschlhlELlQhssss...GplLApapsssRDFLaPtsphhssh.sGssR-lLMcpo......sFPG.I.uPKlEFSTcosI .................h.ltLplculo..CPGVhL.s+p-laLulhlhsQYhcTpshPssFPlhhpppMhFEK...lF.pAlDPusVsphLE..hhhhELlQls.ss....u-pLAhY--NTRDFhFPtPp.hsua...sssRpVLM+ph......uFsG.I.APKlEFSTposI.......................................................... 0 16 20 34 +14761 PF14910 MMS22L_N S-phase genomic integrity recombination mediator, N-terminal Coggill P pcc Jackhmmer:Q6ZRQ5 Family MMS22L (Methyl methanesulfonate-sensitivity protein 22-like) is found in yeast, plants and vertebrates, and is integrally concerned with DNA forking and repair mechanisms during replication. MMS22L complexes with TONSL and this complex accumulates at regions of ssDNA associated with distressed replication forks or at processed DNA breaks. Its depletion results in high levels of endogenous DNA double-strand breaks caused by an inability to complete DNA synthesis after replication fork collapse [1]. Thus the complex mediates recovery from replication stress and homologous recombination in vertebrates, yeasts and plants [2,3]. This family is the more N-terminal region of the proteins. 27.00 27.00 27.60 27.10 23.60 21.00 hmmbuild -o /dev/null HMM SEED 704 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.34 0.70 -13.29 0.70 -6.29 4 99 2012-06-13 15:05:30 2012-06-13 16:05:30 1 8 63 0 55 96 0 462.00 36 57.13 NEW PPCFoCsa-.sppss.phSupuYLusGuLKRllL+LDPtPssF-tDsl-lFuFtWVTETALVESC...phLFsLhRQQlhpLEsLlQ..SpDFGpAAoLHscA-plRpQClhFLHYlKVFIaRhLcs.ps.sc.tshHPacchEAQLPShLV-EL+uLhLhIG+lssLPussluAF.s.Q+QsKlFPPSWHLLHLaLDhHWLVLEILHlLuc+h.tQVVYuppFls.sG-sLTNlSLFEspsEpLhsDLIsLuhp+YsKV+PoEsLpopHa.CpCsKELWlLLI+LLtaRsKh.tsc..sFWshlNKhLpolhcpsostcp.suhuhspsKDPhuFohWlhsHLApLhpasRpG..ss-cpKQhEsNWpFltpLLKp.lssQsuh.EEQlRhaLpCCLoL.sphWpPNlSVlThLWEYYSKNLNSsFoVsWLsLcGLssIs+osLuhLphs+sCCSc.....pphssLY+ousSahIFLpILA+hlK...cpuGspPW+QlKGRIYSKFHp+RM.ELoEsGLppFhpLFLlL.AtsAElEDlAS+lhDLLthLs.suhs.supRALlW+GphAhLLlYspKsLDlushAEKLustFpptA+EFh..Ks.-.sp+.sLWslluhYl-GVQEVFETSssLshSEE+LLN-GFuhLLPACRpuELspVLsFLQsVlARLRpVHppsuQs.p..sss..s..s.sAKE+..A.VAuALWpHFFPaL+SQRho ............................................FpC...t..tt.t.....h...tualtpG..h.ph.....t.c....thp.t..ph.th.hVtphhhs.ss...p.Lhthht...tp.....p.hp........s.hsshp...th...phRp.psh.Fhphl....h..................................h.hctl..tl...hsp..th..s......hh........h..................................................................hh.hu...att........-..h..t.h.C.C.+EhWlhl..hh........tp....sFWthhpphhpthhpt....pt.............shs.htF.hWlhhplu.hhpa...sppG......pp....hpht.ssh..htplLKp.hss......ps......Ep....phRhhl.hhhsL.hthhps.sh....shls.LW-Yap+pL.N.ssFslsh..hptls.h.poshshlc.hpphhs.......t..pL.....sSahhalhlLuhhhp....ttstst..hpp..lhGRlaSKFptt+h.tLsE.Gl.phh.LFLhl..ht..-hp-lss...+h...hp.hL..h....l....t.........pt..lh.h.+GphuhlLha.p+thshsshsthh.t.thtthtp-.................hthhs.ahtsl.pplhphu....ph..upphLl..s.hhshhl.tst.sp.p.hhphlp.lhtplp......................................................................................................... 0 17 19 36 +14762 PF14911 MMS22L_C S-phase genomic integrity recombination mediator, C-terminal Coggill P pcc Jackhmmer:Q6ZRQ5 Family MMS22L (Methyl methanesulfonate-sensitivity protein 22-like) is found in yeast, plants and vertebrates, and is integrally concerned with DNA forking and repair mechanisms during replication. MMS22L complexes with TONSL and this complex accumulates at regions of ssDNA associated with distressed replication forks or at processed DNA breaks. Its depletion results in high levels of endogenous DNA double-strand breaks caused by an inability to complete DNA synthesis after replication fork collapse [1]. Thus the complex mediates recovery from replication stress and homologous recombination in vertebrates, yeasts and plants [2,3]. This family is the more C-terminal region of the proteins. 28.40 28.40 48.30 29.50 27.70 25.90 hmmbuild -o /dev/null HMM SEED 373 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.89 0.70 -5.40 15 86 2012-06-13 16:19:41 2012-06-13 17:19:41 1 7 62 0 52 90 0 316.60 36 35.01 NEW EplscLTphVhpLsEh+plh.cus.....hhpss+-PLstFFcAlG++hpp.cssuc..h+hphspKhcsYlscF-pWlsss...tp.ssthhpRhYshLulllhpCuslhYs+SKusChh+hhhs+hLLP.opLQsspsspspllpsl+KhaPllLQGlu...phsapsDsYLscpLcsllp+asP+Fhhsossthss+........h..hhpsss..spcLspalLppltspFlplpp.stsss+suhlLsllppLlcsh........pspsplhshlchltsulL-plhhVs-...s+thshslhphlVpssphppusts+pphssslpuhscKaLuhsTh.YFphLtcLAchsPclVtsLlspl+pplppsEhKRGsGcDsulRcsLpRLpssLp ......................................plhtLTphlhpLsEhctlh.cst........ssppsLh.FhcAlGhpattlQshu-...+oshspKsLpYlGclhKalpP.L..........tph.stuLplsYthhGhlVKphu.lhhT.SKuQpLLapllDsLLLP.s.Lppppt..s.hhpulpcsLPlaLQGhs...p.s.s.ssYLpphLtpllppYhs+FlsuSs..s.sht...............l....htsssss.hs....tLp+hllp...hlpcsalphcu.ptssP+LuslLsFl.pLhcc...............ps.tht.lchlLPulLcClhhV..s.......psps++hus-.lphhVpssp...htspttstsphsulhRpFlpcahhhashplaplLcslAhLs.plVhtLlsplppsL+poEhKhGlGcshu.Rpshp+L.shL.s................ 0 16 18 33 +14763 PF14912 THEG Testicular haploid expressed repeat Coggill P pcc Jackhmmer:Q9P2T0 Repeat This repeat is the only conserved part of the THEG proteins from vertebrate spermatids. Both human and mouse THEG are specifically expressed in the nucleus of haploid male germ cells and are involved in the regulation of nuclear functions [1,2]. Although the differential gene expression of THEG in spermatid-Sertoli cell co-culture supports the relevance of germ cell-Sertoli cell interaction for gene regulation during spermatogenesis, THEG was not found to be essential for spermatogenesis in mice [3]. 27.00 8.00 27.00 8.30 24.90 7.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.71 0.72 -9.12 0.72 -3.42 37 329 2012-06-13 16:25:20 2012-06-13 17:25:20 1 8 58 0 222 470 0 51.20 24 50.06 NEW W.......slspuALcupsopRlppLAp...P+hhtshh.hphs..................tls.tAhtht......sosRlhcLupPpp ......................................l..tshph.sopRltpLAp...PKhhtshh..tps........................l.................................ch..hs........................................ 0 93 104 146 +14764 PF14913 DPCD DPCD protein family Coggill P, Hetherington K kh6 Jackhmmer:Q9BVM2 Family This protein is a found in eukaryotes and a mutation in this protein is thought to cause Primary Ciliary Dyskinesia (PCD) [1]. This protein is 203 amino acids in length, 23 kDa in size and its function remains unknown. The gene that encodes this protein is a candidate gene for PCD and is expressed during ciliogenesis. PCD affects the airways and reproductive organs, and probing Northern blots show DPCD expression in humans is highest in the testes. Additionally, there is no indication of major splice variants [1]. 27.00 27.00 43.90 43.60 20.80 20.10 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.13 0.71 -5.19 18 117 2012-06-14 09:39:13 2012-06-14 10:39:13 1 3 97 0 68 121 1 179.00 46 89.83 NEW sWLshL+uAcKTullQDG+RKVHYpFsDGpEMAEEYDhcTspLlhR+WRpKusLGupGpWplElGEP.sssst..................psphlKEsuosPlFsR+sTKsuFpWRIRNLPYPh-sYoVTV-tcpRslllRToNKKYYKKhslPDLDRspLsl-pssLoasHtNNTLIIoYKKPcplLphEcplLpEL+KlKss..p-GDl .................Whp.lpsupKoullps.G+RKlHahFsDGpEMsEEYDhcTspLLlRKWRhKs.s.L.Gu.upWplEVG-sss.ttss.h....................ssphlcESsusPlhhR.+DTKpuFpWRIRNLPYPc-VYuVsV-pcc....R....sIlVRToNKKYaK+hsIPDL-Rh.plsLcpstLSasHtpsTLIIoYpKP.tllt.hEppl.pElpplcstp....t............ 0 26 31 49 +14765 PF14914 LRRC37AB_C LRRC37A/B like protein 1 C-terminal domain Coggill P, Hetherington K kh6 Jackhmmer:A6NN04 Family This family represents the C-terminal domain of the putative Leucine Rich Repeat Containing protein 37A or protein 37B (LRRC37A/B) found in eukaryotes. The Leucine Rich Repeats (LRR) lies in the central region. The gene that encodes this protein is found in the chromosomal position 17q11.2, and its microdeletion results in the disease, neurofibromatosis type-1 (NF1) [1]. The function of the protein, LRRC37B is unknown, however experimental data shows expression in the aorta, heart, skeletal muscle, liver and brain during gestation [2]. 27.00 27.00 37.10 35.00 24.80 24.60 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -10.91 0.71 -4.78 5 149 2012-06-14 09:49:06 2012-06-14 10:49:06 1 7 25 0 80 93 0 137.50 61 14.63 NEW YPuLsSPG-QFEuQLNQQLRSLIPNNDVRRLISHVIRTLKMDCS-o+VQLoCAKLISRTGLLMKLLSEQQEsKlSKAEWDTDQWKTENYINESTEAQSEQKE.csSELsKEVPGYGYNNKLILAISVTVVVTlLIIIFCLIEICSHRRApcEDE .....................................................P.h.SsGDQFEhQLsQQLpSLIPNNsVRRLISHVIRTLKMDCS-spVQls..CAKL....ISRTGLLMKL..LSEQQEsK.sSKs-WD.T-QWKoENYINESTEsQoE.QKE..c.sp.Eh...pKE.VPGaGYs.pK.LILAl.VTs..llhlLIIlFCLIp..l............................ 0 13 14 15 +14766 PF14915 CCDC144C CCDC144C protein coiled-coil region Coggill P, Hetherington K kh6 Jackhmmer:Q96IX9 Family This family includes the human protein CCDC144C and the ankyrin repeat domain-containing protein 26-like 1 found in eukaryotes. Its function remains unknown, however, it is known to contain a coiled-coil domain which corresponds to this region. The ankyrin repeat which features in this protein is a common amino acid motif. 27.00 27.00 27.00 28.80 26.90 26.90 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.24 0.70 -11.93 0.70 -5.50 17 241 2012-06-14 12:50:39 2012-06-14 13:50:39 1 41 38 0 86 211 0 241.80 46 28.85 NEW NphLQ-EIAhLRLEIDTIKspsQEKEpKYhEDIcIlKEKN-sLQ+slKLNEEsLTpTlhpYssQLNsLpAENTMLsS+L-pEKpsKERLEs-lESa+uRLAuAlpDp-pSQsoKRDLELAFQRs+DEah+LQ-KMsh-lSsL+DpNEhLSQQLScsEuKhNoLEhELH+sRDuLREKoLhLE..plQR-LsQsQsQtKEhEphhQsEpsKlsKahu.KQESlEERLuQLQSENhLLRQQL-DApsKs-sKEKsVhslQcphpshlppLQA-sEKpsLhLcE+NKELhsEssaLKERhhpYEpEKsERE ...............NphLp-EIAhLRLElDTlKppspcKEpKYhcDlchlKEKN-sLpKslKLN..EEslT.......cTh.pYstpLpsLp.s....ENshLsScLppcKps+pRLEsEhcSapsRLsuAlp-tppp.ssc+shclshppst-..hplptphs.chuth..pschLoppLScsctKhpsLc.ch+.sp-sL+EKoLhlE..psQp-LpQsQpphKEhcphapstpsphpchht.KQpsl-ERlsQlppcNhLLpQQL-DAppKssspE+hlhsIQtph........h-scKp.hhLcE+NKcLhschsaLKEphhpYEpEKsEp.......... 0 26 28 36 +14767 PF14916 CCDC92 Coiled-coil domain of unknown function Coggill P pcc Jackhmmer:Q96LY2 Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. The function is not known and the proteins carry no other domains. 28.30 28.30 28.80 29.20 28.00 28.00 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.03 0.72 -4.38 15 147 2012-06-14 14:55:10 2012-06-14 15:55:10 1 2 59 0 90 137 0 59.70 42 20.86 NEW hpp+lpslp+slpFLQppHtpsLcuLHpEIc+Lpccs+-LpacLhhppssps.ps.ss...o.sp...ph ......ppplpshpKsL.FLQp-HusTLcsLHt....EIc+Lpp+spDLpacLhhppsppp..tps...t.t..t................... 0 25 31 52 +14768 PF14917 CCDC74_C Coiled coil protein 74, C terminal Coggill P pcc Pfam-B_23141 (release 26.0) Family This is a C-terminal conserved domain of coiled-coil proteins from vertebrates. The function is not known. Expression levels in humans are elevated in breast cancer []. 25.00 25.00 25.40 53.00 24.00 24.00 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.53 0.71 -4.08 6 31 2012-06-14 18:18:57 2012-06-14 19:18:57 1 2 22 0 19 42 0 124.20 59 34.53 NEW tPhMsL...P.tLRKPTTLQQCEVlIRQLWNANLLQAQELpHLKSLLEGuQRP+AssEE.........AGhuuP.+DQ-.....uppLPKVosKulSKKCLlLS.ssV.AE+uILPALKQoLKsNhAERQ+RLQAVQpRRlHRo ...................PsMhL...P..LRKPTTLpQCEVlIRpLWNsNLLQsQELQHLKSLLEGoQR.PpAsPEE...........A..S.P.+DQE.....AhphPKVoo.KulSKKCLlLSP.PV.AERAILPALKQThKNNFAERQKRLQAhQ+RRLHRS.................. 0 5 6 9 +14769 PF14918 MTBP_N MDM2-binding Coggill P pcc Jackhmmer:Q96DY7 Family MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle [1]. MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner [2]. MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells [2]. MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) [3]. It is unclear which regions of MTBP interact with which binding-partner. See PF14919, PF14920. 25.00 25.00 31.40 31.40 19.00 18.40 hmmbuild -o /dev/null HMM SEED 271 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.12 0.70 -11.66 0.70 -4.99 14 53 2012-06-15 13:15:41 2012-06-15 14:15:41 1 4 37 0 28 41 0 239.20 56 30.94 NEW MDRYLLLVhWtEtKh.usAutEhE.ts-houhcsopppPclpAsNlYHLLKRSIosSlpP-DSTFPACSVGGhPGS+KWFFAlQAIhGFYQFCSSD..WpEIaFssEKDcIEDVLQTNlEECLuAlECFEEEDSNSRESLSLA-LYEEuAEsLHQLSDKLPAPGRAMlDlILLsSDKDPPKLKDCLPslGALKHLKEWaSAKITIAGsHCEhs......sQKIAEYLSAsVVohE-l+NsID..S+ELWRGcIQIhERKFG.ElSFPEFCLKGVTscNaSs.NLNo ........................................................MDRYlLhl.a.............t...t.s...t......s.hs.AsslYchLKcShssSlps-sSTFP..ACSVuGhPGo+KWFFAlQAIhGFYQFCSSD..WpEIphsspK-c.EDsLQTslEECLuAlpsFEE-DsNSRESLSLs-LYEEuAEsLHQLSDKLPAP.GRAMlDlILLsS-cDsPKLKDCLPslGALKHL+EWaSAKITIAuscschs......hQKIA-YLSAslVu.--LpNsID..u+ELWRGKIQIhERKFu.ElsFPEFCLKuloscpass.plp...................... 0 3 5 11 +14770 PF14919 MTBP_mid MDM2-binding Coggill P pcc Jackhmmer:Q96DY7 Family MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle [1]. MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner [2]. MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells [2]. MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) [3]. It is unclear which regions of MTBP interact with which binding-partner. See PF14918, PF14920. 25.00 25.00 39.20 39.00 20.00 19.20 hmmbuild -o /dev/null HMM SEED 342 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.09 0.70 -5.76 6 58 2012-06-15 13:23:16 2012-06-15 14:23:16 1 5 40 0 30 45 0 303.50 55 40.32 NEW pcVFHYYGsALEaVQMVpLSDLPuhalSDhEFEL....uls++usKt.StLLL-QlsSLpGKVGALFsLsCslSslshPssuQLSS+KWREYhA+KPKsIsVPDVEVKGEpusYahLlQGsGsut...C+ATLlHSAoQINGuAALshlpuhl+.pscsupsuhshsshlpSLP+FsGEQllpRE+pLAplQsLALKEhLKR+ctsppssulsssELKuLLsLTREpaLchaDosLPcus..phtphpsshhlscsosssSspSstMcsNshEWPERpVLQNLENhEKhKQKhRsuhL.stSSEQLLG+KDG.R-ShTLLDAKELLKaFTs-GLPlG-LQPLplpRG..-sAF ................s.cVFHYYGPALEFVQMlpLSDLPShahS...DhEFEL.........sLo.ppss+tpShLLL-QlSSLpGKVGALFsLsColSslhlPsss.Q..LSS+KW+EYlA+KPKoI.sVPDVEVKGEpuuYYLLlQGsustt...CKATLlHSAsQINGuh..ALshlpG+h+.ps..ppuchu..hshc..lhSLPpFoGEQllpRE+pLA..pl..QsLALKEhLKR++hupQPps...lSssELKoLLhLTREpFLcha..-uhlPcss.....hp.hsph.ps..shlss..hs.ssp..sssSshhEsNs..LEWPERpVLQNLEshEKsKQKhRsu.......L.s+SSEQLLGHK-G.R..-ShTLLDAKELLKaFTsDGLPlGD.LQPL.lQ+G-psF......................... 0 3 5 13 +14771 PF14920 MTBP_C MDM2-binding Coggill P pcc Jackhmmer:Q96DY7 Family MTBP, or MDM2-binding protein, binds to MDM2. The MDM2 protein, through its interaction with p53, plays an important role in the regulation of the G1 checkpoint of the cell cycle [1]. MTBP promotes MDM2-mediated ubiquitination and degradation of p53 and also MDM2 stabilisation in an MDM2 RING finger-dependent manner [2]. MTBP differentially regulates the E3 ubiquitin ligase activity of MDM2 towards two of its most critical targets (itself and p53) and in doing so significantly contributes to MDM2-dependent p53 homeostasis in unstressed cells [2]. MTBP inhibits cancer cell migration by interacting with a protein involved in cell motility. This motility protein is alpha-actinin-4 (ACTN4) [3]. It is unclear which regions of MTBP interact with which binding-partner. See PF14918, PF14919. 28.40 28.40 30.40 32.10 23.80 23.90 hmmbuild -o /dev/null HMM SEED 251 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.59 0.70 -4.89 7 57 2012-06-15 13:45:35 2012-06-15 14:45:35 1 5 42 0 33 45 0 234.50 59 31.44 NEW P-LoPcKL+tLPFEKAutC+YHGIEYCLDsRKALERDsGFuELQSRLIRYETQoTCsR-ssPlPh.......sLSPLPSPAVLSEPtSVPDGEuLQsElRs-sscLKRRS+DhssLhPtKRlsKScSSDSLlStsSssoupa.hshsoRp.puERshSs.............t.tlstpsuuuppssppocoopstKESRSQKHsRhLKEVVucTLpKHGIspcHtCFsuCSQRLF-ISKFYLKDLKTSRGLh-EMKKsAsNNVh.QVI .............................P-LSPtKLpsLPFEKAusCHYHGlEYCLDsRKALERDsGFuELQSRLIRYETQTTCT..+EshPlPh.......lLSPLPSPAV....hSEPGSVPDGEsLQsEh.......+...sEsuRLKRRS+Dlssl..a.P...pK.RLsKSESSDSLlSQsSGsos....pptthsssR+.psER.Shuss.....s.ts.....pss+hspps.uuup+ssppscs......s+phKESR...SQKHTRhLKE.....VVscTL+KHuIsE...sHcCFsACSQRLFEISKFYLKDLKTSRGLaEEMKKsAssNsh.QVI............. 0 8 10 17 +14772 PF14921 APCDDC Adenomatosis polyposis coli down-regulated 1 Coggill P pcc Jackhmmer:Q8NCL9 Domain The domain is duplicated in most members of this family. APCDD is directly regulated by the beta-catenin/Tcf complex, and its elevated expression promotes proliferation of colonic epithelial cells in vitro and in vivo [1]. APCDD1 has an N-terminal signal-peptide and a C-terminal transmembrane region. The domain is rich in cysteines, there being up to 12 such residues, a structural motif important for interaction between Wnt ligands and their receptors. APCDD1 is expressed in a broad repertoire of cell types, indicating that it may regulate a diverse range of biological processes controlled by Wnt signalling [2]. 27.00 27.00 31.60 30.40 25.90 26.40 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.02 0.70 -11.74 0.70 -5.02 11 205 2012-06-15 15:05:53 2012-06-15 16:05:53 1 3 56 0 131 192 0 199.50 39 81.35 NEW pCpp.hpclppts+lT.s.shPPcLcGpWlSppCEVRPGPEFLTRsYpFa..sNppF+AhQaYYsD.uCppPoaoLlI+Gpl+LRpuSWlspGATEA-aaLc+VsIl.HSppshp+lspclNpoCss.....h.s.spsWhPhh.YpLh....stpsppc............................ChsAhGFuhpELpLlRlppphhhps.........phspELaLGDIHTshspRtpYRPTuYQ.PLpsshcp.spsCPsCullh+uoEppPPlLPs ....................................s.tLpGpWVSptCE...VRPu........s.....FLTRpapFa.....s.N.p.oacua.aaYuDstCpp.PTaTlhs+G+hph..t.psShhlpGGTEhsaclp+spVssh.spsssphLsh.h.sp.oCuu.........tssWt.Gst.-lh...psp...t................................Ch.t.ulphs..hpEhpLh+hEpc...hhtp..................LalGph.TD.so..p..R...p..hRPTS...YQ.PL.ps............................s............................................. 0 27 35 74 +14773 PF14922 FWWh Protein of unknown function Coggill P pcc Jackhmmer:Q5TG08 Family This is a family of eukaryotic proteins. Most members carry a highly distinctive, conserved sequence motif of FWWh, where h represents a hydrophobic residue. The function of the family is not known. 25.00 25.00 28.80 28.80 23.40 21.30 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -10.90 0.71 -4.74 18 109 2012-06-15 15:39:53 2012-06-15 16:39:53 1 3 49 0 68 108 1 146.90 32 28.93 NEW l-htshsshcsschssLP...tplchpplhsplhcutph.tth.......................tt.ap.....................phh...Sct.tulhhDoFWahahctap.c...................p.phps+LFsRlApsYVplhhslt.sch+Dtahpha.phlApslahshhpsFPpuhphasps..F+tpLhphhhhhhoGlps..p.hphscWs ...........................................................................................................................hphstaptpc.stLP...ptlphpphh..llcupp...ph........................c.hc.....................pah...S.tshAlhhDoFWWhFhccapPs..................................................................................................c.QspLFcRIupsYstLhhph..s..sph.....c-...s.hhchhsshLupAlYssFppsFPpSh....Fssc..FKpslssph.hWho.GhhP..p.t.appW......................... 0 35 37 46 +14774 PF14923 CCDC142 Coiled-coil protein 142 Coggill P pcc Jackhmmer:Q17RM4 Family The function of this coiled-coil domain-containing family is not known. It is found in eukaryotes. 25.00 25.00 31.60 31.00 23.70 22.20 hmmbuild -o /dev/null HMM SEED 450 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.80 0.70 -12.52 0.70 -6.17 6 80 2012-06-15 16:15:37 2012-06-15 17:15:37 1 2 63 0 52 76 0 357.80 33 57.08 NEW hLWsthGtuLsphh.............sslslas.shshslhphLppshspssLPppsppsLpsls+sLpppushpsWDpuFCtsLGSuspspsl.........th..sshuotTspLltpLF.PLlslLp......p..............sp.RspLhLphP.....LsRhlsTLpooplWlho+sppaLuuWuhspFLLllQ+DL.sLLc.......sscsLshLsps.......tshshslhspLstElp+hhscLptloccslplFot-C+KhoTphFp.sMPpG+aWR+chps-lPspPSpYAthsVppVLtPVlcGlptL.spAphsALopAloAhh-AWL-HILpccI+FSlpGAlQLppDFusVR-hlpuEp.uLStEh+QpLLSLclF+plDGAlhhLLpQPhsKptlscps....+pstsspsQshpplsoSS.LsSLcuh..psshpssl.ssp...s.................us..-.YhluNQQtWL ......................................................................................................................hWs.hu..L-..................lshh..pp.pthtp.L..p..s..splPp.s.ppLtsls.ctLhtpsh.htWDQh.Fp.ALsSu..tsps....................................s....so.Tsp..lh.p....lFssLlsh........................psp.slh..p.Ph....lthh.polposhLWhhtcupphhssWs.spFhhllppDl....hLp..........phpsLp.htpp...............lsltlcppLshElpp.hsplphhscEslp..lhup.C+p.uhtsFphhhPpupaWRhpltsp..stsS.Ysshslcpll.PVlpuhp......phshLs.hLphhhtAWLDHIhp+..tI+FSlpGAlQLhpDFssVRphlpp.phsLo.-l+ppLh.hplhpph-GshhhLLppP.st.plppp........pts.ptthp..p..thsst....s............................................................................................................. 0 15 18 35 +14775 PF14924 DUF4497 Protein of unknown function (DUF4497) Coggill P pcc Jackhmmer:Q9P2G4 Family This domain family is found in eukaryotes, and is typically between 107 and 123 amino acids in length. There are two completely conserved G residues that may be functionally important. 23.00 23.00 23.20 23.90 22.10 22.70 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.69 0.71 -10.21 0.71 -3.75 29 85 2012-06-15 16:57:50 2012-06-15 17:57:50 1 8 45 0 74 80 0 109.70 25 15.98 NEW ppsph.pFppGKSCLFshsspslppphpshslph.shhch.s.h.hsss..ph.............lGsstlslsph......htpl..hpp........hp.p..................stu...cshcspasLhs.psp.ps.GplslhlRloshG..psl..lT .........h..t..tFspGKSCLFphpsssLpptlhphPLth.hlhplsssh.hsss..pl...................lGssslslsst......hppl....htt........ht.p............................................sss.psh+spasLh........s...tsup.ps.GslslhhRLosLGppl....... 0 26 33 60 +14776 PF14925 HPHLAWLY Domain of unknown function Coggill P pcc Jackhmmer:Q9P2G4 Family Members of this family carry two distinct, highly conserved sequence motifs, CPPPLYYTHL and HPHLAWLY. The family is found in eukaryotes, and the function is not known. This family lies at the C-terminus of members. 25.00 25.00 38.90 35.10 22.70 22.70 hmmbuild -o /dev/null HMM SEED 640 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.98 0.70 -6.53 5 43 2012-06-15 16:58:46 2012-06-15 17:58:46 1 4 29 0 26 37 0 485.50 48 70.18 NEW CSNuSSsRSVSPPNQEVTELDlETNIFCPPPLYYTHLTQEKsPPsQGKITIEPQINAPEELDGsFhEEcRVNPPTHTssLKHTsSAT+ESPPMLINPPHVQDlGASNQTTDHsQTEQNRINTIRQLPLLNALLVELSLLYNQPVASPTHIHPHLAWLYRTEDKKSPESSAKSTCKSESKKDKLShGGNEKSVSLQYKKNQsENLKKGKYFEKsSGAPPKRVPRGKLLYGLTNTLKLRLKQTNPDMLVVHEKREQYRKMQAQMLGTKLRIPSSKVKlLSFAEQaQKPHQLPKDKCLESDASFAENSDTSKQISGVlDDPSTopETKLKCATE.KTVDCuENRoNNGLLEEIVSPANSIVsE+FTsAsILEGKh..EMKVQSPsVFQQVAVVDRhlVDKEIDDKQVKTTDsDILTsD..ISEK+PSKNSCSESISELKYSDDFTSPCsSEDFsTSEDTSRILQAHDSSPGTENPKHSQaTSKSSETtLSIRKNSSEKSSILSPPFSAGSPVaSaKRFHISKTQDKSLEEASSISTSDLSSSHWTEEKENQIDQNSMHNSKVIKRDQ..DISlK.KTRTGCKSSEKSQSPRTSQVSSYLPSNLSELELNVLDSSTSDHFEEssDDlGSLNISKQCKDICELVINKLPGYT ........................Cssu.s.pslSs.spEVTELDhETNhhCPPPLYYTpLopEK.ss..spsphTh.sQhN.sE-h-sh..EpphlssPh..ps.ctspssspEpPshL.s.P.phps.utsspss...QsEQstlssIRQLPLLNALLlELSLLhsQPhsoP..splHPHLAWLYRs..E...D..pcu.P-sSsKuTspoEo.psKhshttpcK..sls.Qh+Ksph.p.s.pcs+a.EKpuus..p+Vs+t+LLYGLTNTL+LRLKpTNPsMLlVHEKRE.YRKhQsQhl.Gs.KhRlPSSKsKlhS..A.cQp.Qhs..QLPcDc.l.-ucushsEso-TStQlSssh-csSsocE.sch.p.h.shc.cplc.scs+hssh.lct..hsshtslhs...Ec.h.sshhtt.ph....ch+lpSPsl.tp.shlDp.hls..ct.hs.cplKsht.-..hus..hu-.p+.s.upsSs.EslSELpYSDDh.....s..S..P...CYSEDFsosEsou+....hpAhDSSst.....sE..sspps..phs..sKSS-splSh+csoS-.pSSlLoP..PF...SAGSPVpSh++.+l.Kt.pcpSLEEsSs.lS....sSDh........SS.pWTppKE....sph........Dpsuhpp....Sclh+psp....D.ss..psp.sshKS.EKSQS.pTSQVSSYLPSNlSEL-LsslDs..SsuschpE.pDplGSLsIocQCKDICELVINKLPGYT................................................................... 0 1 2 6 +14777 PF14926 DUF4498 Domain of unknown function (DUF4498) Eberhardt R re3 Jackhmmer:Q9BRQ4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 308 amino acids in length. 27.00 27.00 28.20 28.20 26.80 26.70 hmmbuild -o /dev/null HMM SEED 247 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.66 0.70 -5.23 16 119 2012-06-18 07:58:32 2012-06-18 08:58:32 1 3 86 0 74 116 1 184.50 36 75.31 NEW sFs.LstpsFstl..pD+-lpchL.hKWulpsplphpsFpacpp...ap.shppschltsFFpDpsVtpsLpl.p.pssthst.lutp......ssclcsp.lssohhShsFFD+Lhs..su...IVR.psGc.Is+ChD-hh-sh.lSDELRchLL.EDSEpYclFS-s-RpEhLFcLFcpLlLG.GslCQaEDplpPYl-soKplYK-LVulpKsspT.cpIpls.StVa+Vsuh.s.pss..sh...ps....ssHtQshsYlllDPt+RclpllYH ................................................sh.t.phphptFtasp......................ap..hptpphhhsFFpcssVh.pLthh......ts..h....s.t.........stplph..lsso.hohshFppLhs....tsll+.tsGp.lhpChcp.h.tsh.hsDpLRphLL..........p..-S-papl.aop.-RpEFLFplFp+LslG.GslCQaEDslsPYL-ssKhlYKcL......Vu.......lp+ssps......p.......plt...............l..o.lhcV.sh........s.t.t..h....tt..........t.p.tshsahhlss.p+.h.hh.p........................................ 0 28 36 55 +14778 PF14927 Neurensin Neurensin Eberhardt R re3 Jackhmmer:Q8IZ57 Family The neurensin family includes the neuronal membrane proteins neurensin-1 and neurensin-2 [1]. Neurensin-1 plays a role in neurite extension [2]. 25.00 25.00 25.50 25.80 24.90 23.60 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.94 0.71 -4.80 14 169 2012-06-18 09:29:10 2012-06-18 10:29:10 1 2 52 0 110 147 0 131.00 32 57.78 NEW haGVRuYLHpFY-..........-Cs.......suh.cppc....sh...hphssp+hsulhWKV.......ulssGhLlLlhGlssLhlGYh......lP.+hEhh.....s...............p.uph.hlDspAspaNpsLDsh+LsGssLhClGGlhlAhsLl..lsshtps.tcpE.hhpts.ppt. .................................................................................................................hch...................uhlSuhlFLlhGlhllsluYh......VP.clcs......s....................p.t-h...hl-p...pu...s...phsutLDpChlAGhsLhslGGhlLushLh..hSha..ttp...............h.................... 0 15 24 50 +14779 PF14928 S_tail_recep_bd Short tail fibre protein receptor-binding domain Eberhardt R re3 CATH:1ocy_A_02 Domain This domain is a receptor binding domain found on bacteriophage short tail fibre proteins. It contains a zinc-binding site and a potential lipopolysaccharide-binding site [1]. 27.00 27.00 31.40 42.40 22.50 20.40 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.50 0.71 -3.90 9 26 2012-06-18 10:30:31 2012-06-18 11:30:31 1 5 24 2 0 30 0 119.20 54 23.35 NEW ssstcIh..ss+GsDuKsKPtLGsGsuGhslGpVQtQQlphHKHAuGaGE...psssusFGsTspssalGoppt.DWDNtpYFTN-GaEl-us.pRsshsTLNocsLIGsEsRPWsMSl.aIIKV .................ssGsHIh..ss+GpDuhGKsRLGsGCsGhhVGpVQsQQhpYHKHAGGaGE...pcspu...sFGsTstssYlGTRKthDWDNtSYFTNDGaE....lsss..RsuhsTLNpEGLIGsETRPWNhSLNYIIKV. 0 0 0 0 +14780 PF14929 TAF1_subA TAF RNA Polymerase I subunit A Coggill P, Hetherington K kh6 Jackhmmer: Q15573 Family TATA box binding protein associated factor RNA Polymerase I subunit A is found in eukaryotes and is encoded by the gene TAF1A in humans. Its function is to aid transcription of DNA into RNA by binding to the promoter at the -10 TATA box site. It is a component of the transcription factor SL1/TIF-IB complex, involved in PIC assembly (preinitiation complex) during RNA polymerase I-dependent transcription. The rate of PIC formation depends on the rate of association of this protein. This protein also stabilises nucleolar transcription factor 1/UBTF on rDNA. 27.00 27.00 28.30 27.60 26.90 26.70 hmmbuild -o /dev/null HMM SEED 547 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.84 0.70 -12.85 0.70 -6.06 13 157 2012-06-18 12:06:50 2012-06-18 13:06:50 1 5 65 0 85 134 0 250.30 21 84.78 NEW chpsst-tpshpssstppchhhphhhshpt.shh...........pcs+hhplLpchlpp+pWspAushhpshlcsht+s.shpt..sc.hh.tlts-llhphsps.......huschpshuthhslhhpp..............h.....p.phhlhLppuLahlppsthttsshphoh......h.ppp.t.shpslhphahGLltYcpWhpsl.cph+hccpsh................phtts.hsppst....pps..........hspphspsshtsssspsslhphsts..................lDphlps.spthpFhpspcthcpsho.hs..E+hssss..t.h..............................................shaphl.+aL..+....uss.s........................hhlhutclp-t...hpp.s.sstssh.hchtshhhp.hs...ps.sshlhSslcchhphsPspphhl-+tphh+cp....hpphh-h...+hp.h.sp..hW..hs.sh.p.htphtp.tEcpppuhplhF.hLDauss+pN.pAWphhtphlpplhh.....saltppWcsRpsWW.saHFSp.h....h.p.......tts...chhspKAssAuhhhG.thtYh........p...hp.h.hLtcphpp.....p+lhpsh ..............................................................................................................t...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................tt........................................ 0 12 24 48 +14781 PF14930 Qn_am_d_aII Quinohemoprotein amine dehydrogenase, alpha subunit domain II Eberhardt R re3 CATH:1pby_A_02 Domain This is the second domain of the alpha subunit of quinohemoprotein amine dehydrogenase [1,2] 27.00 27.00 57.60 56.70 26.00 19.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.41 0.72 -4.10 20 36 2012-06-18 12:07:57 2012-06-18 13:07:57 1 4 31 4 13 43 1 108.10 38 20.58 NEW tsssLsGpWsluG+hPG+G-apGsMolsuuut.DpYsVshsh+.aADGsshsupGsAllYTGYEWRAslslG...........ssshRQVhAhs..ssphpGRhF.tspDchGuchhAs+ss ..p.ssLsGpWshoG+hPu+G-hpGsMolssuss.DsYpVplchc.aADGsshsupGsAllYsGYEWRuslclG..........ssshRQVhAhp..suphpGRhF-sscDEhGhchpAs+t........ 0 2 9 10 +14782 PF14931 IFT20 Intraflagellar transport complex B, subunit 20 Coggill P pcc Jackhmmer:Q8IY31 Family IFT20 is subunit 20 of the intraflagellar transport complex B [1]. The intraflagellar transport complex assembles and maintains eukaryotic cilia and flagella. IFT20 is localised to the Golgi complex and is anchored there by the Golgi polypeptide, GMAP210, whereas all other subunits except IFT172 localise to cilia and the peri-basal body or centrosomal region at the base of cilia [1,2,3]. IFT20 accompanies Golgi-derived vesicles to the point of exocytosis near the basal bodies where the other IFT polypeptides are present, and where the intact IFT particle is assembled in association with the inner surface of the cell membrane. Passage of the IFT complex then follows, through the flagellar pore recognition site at the transition region, into the ciliary compartment. There also appears to be a role of intraflagellar transport (IFT) polypeptides in the formation of the immune synapse in non ciliated cells. The flagellum, in addition to being a sensory and motile organelle, is also a secretory organelle [5]. A number of IFT components are expressed in haematopoietic cells, which have no cilia, indicating an unexpected role of IFT proteins in immune synapse-assembly and intracellular membrane trafficking in T lymphocytes; this suggests that the immune synapse could represent the functional homologue of the primary cilium in these cells [6,7]. 27.00 27.00 27.90 28.70 26.50 26.20 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.49 0.71 -10.40 0.71 -4.21 26 146 2012-06-18 12:46:22 2012-06-18 13:46:22 1 2 119 0 100 147 2 114.40 42 85.21 NEW tGlahD-hs+lRVl-P-htspoppL+-EspcFlc+lspFpcllpphhphlcphA+cVEpEKl+AIGsRNhlcoh.scpRcscpQplQshItEKpsELERLpsEacuLp+lEpEQpphIpph ........GlaFD..-ls+lRVLDP..............-ssppT.cLK-ECccFlcKls...pFp+lVsshlcll-plAKc...sEsEKhKAIGsRNhLcSh.ucpRcsppQplQshItEKphpLERh+sEY-uLpKlEtEQpEhIpph............... 0 40 50 77 +14783 PF14932 HAUS-augmin3 HAUS augmin-like complex subunit 3 Coggill P pcc Jackhmmer:Q8IY31 Family This domain is subunit three of the augmin complex found from Drosophila to humans [1]. The HAUS-augmin complex is made up of eight subunits.\ The augmin complex interacts with gamma-TuRC, and attenuation of this interaction severely impairs spindle MT generation. Furthermore, we provide evidence that human augmin plays critical and non-redundant roles in the kinetochore-MT attachment and also central spindle formation during anaphase in human cells.The HAUS complex is required for mitotic spindle assembly and for maintenance of centrosome integrity [2]. 27.00 27.00 29.70 29.30 24.90 24.80 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.64 0.70 -5.20 25 132 2012-06-18 13:04:39 2012-06-18 14:04:39 1 2 88 0 88 120 0 228.30 27 43.67 NEW Wlh.ps.-phcpFhcWlspsls-sNlLo-p-Lppa-pLp.ppGc.lLcup-L-.sLpplpspsssh....ph......s..-......p-lctLcppltsl.tchpp.htpLh.schp.phcph.hspph...spLpspp...tcsstthptspph..ltsphpc+sppLpplp.pcssphsp-hpc.tp....ps.sslFlpQhslcpYhhps-phhphLshYh++pFpht.hchs..psssps....lp.......t....c.pc..th.ppp...ptELppLpptht.hsphpaIctcscs.p ..................................WhF.ps..-scsFLcWhCssls.ppNlLotpElpta.ppLp....cpG+.l.....L....-utsL-tsLcs......hps.s.ph.......ph...............p..-........................ppl-tLcpplpsl.tchpphphphh.pchp..hsph..tupp.....hplpscp......tpsstphppstth.......lps..sphspplptlhsplsphh......t...............h............pp......t.s...................pp...ssl...aluphsLcpYltp--p.ottLs.ahpKpFhpG.tchs..Essspppa......hD.hpt..hp.........cppc...hpcc....phEhtRL...p.ha.........h.huppphIphpsp........................................... 0 22 36 61 +14784 PF14933 CEP19 CEP19-like protein Coggill P, Hetherington K kh6 Jackhmmer:Q96LK0 Family This family includes the centrosomal protein of 19 kDa found in eukaryotes. In humans, it is encoded for by the gene CEP19 which is also known as C3orf34. These proteins localize in the centrosomes. Centrosomes are dynamic organelles that assemble around the centrioles. They organise the microtubule cytoskeleton and mitotic spindle apparatus and are required for cell division and cell migration. C3orf34 localizes near the centrosome in early interphase, to spindle poles during mitosis, and to distinct foci oriented towards the midbody at telophase [1]. 27.00 27.00 27.00 28.50 25.00 26.50 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -11.02 0.71 -3.94 20 94 2012-06-18 14:37:52 2012-06-18 15:37:52 1 4 74 0 64 86 0 149.40 38 67.25 NEW ++sGlRhpPPslllhYpsp....ssKhRpRh....lPl.Rshppposssths-cLtpp.......s++psaLpplsphQlc.........+hhphL.........................psphpuhshspshpthtpctsls................pcDLNKlDDppLpctKstM-ctFc+Npl+PGDssFlYDhcl-Fspsc..psSuWD ....................................................................++sGl+hpPPslllhYppp....ps+.RpRh....hPl.pshpp.Sss...sphAEpL+ps.......s+H+sYL..pplshtQlc.........+lhphL...............................................................................................pshhpGpolspshpphppctsl-..................s-cDLNKLDDcELt+pKshMDEhFc+Nph+.sDPsFVYDlEl-Fspsp...pssuWD................................ 0 32 38 48 +14785 PF14934 DUF4499 Domain of unknown function (DUF4499) Coggill P, Hetherington K kh6 Jackhmmer:Q8TBM7 Family This family contains a protein found in eukaryotes. Transmembrane protein C10orf57 is encoded for by the gene chromosome 10 open reading frame 57 (C10orf57) located in chromosomal position 10q22.3. The exact function of this protein is still unknown, however it is thought to be an integral membrane protein. The protein sequence is 123 amino acids in length and has a mass of approximately 14.2 kDa. The family also includes some longer proteins that possess an N-terminal dehydrogenase domain, Pfam:PF01073. 21.90 21.90 22.60 24.90 21.80 20.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.06 0.72 -4.04 28 133 2012-06-18 14:45:52 2012-06-18 15:45:52 1 4 93 0 76 130 1 88.90 31 42.66 NEW luhshahhslFtsstlPht.lGPluhh...hchhlhpphpllphsahhAhllHlsEAlYAhhLs+ptsl.csssphtWFlQThlhGasSLplLlc .................................................h..hshhhhslFhst.lP...lGPlu.h...sphl.l.p.phpllp.hsahlAhllHl.sEulYAhhLC+cp.sl.sspsphhWFlQTFlhGhsSLslLl.t........ 0 30 43 54 +14786 PF14935 TMEM138 Transmembrane protein 138 Coggill P, Hetherington L kh6 Jackhmmer:Q9NPI0 Family This family of proteins is found in eukaryotes and members are approximately 160 amino acids in length. There are two conserved sequence motifs: YYY and DPR. This transmembrane protein belongs to a family found in eukaryotes and is involved in the biogenesis and degradation of ciliated cells [1]. Mutations in this protein cause the disease Joubert syndrome(JBTS) where the cilia becomes non-motile. Ciliopathy can be severe since cilia provide the cell with large amounts of information through signals. Ciliopathy can affect cell behaviour as the appropriate signals between the cell and its environment are not made, which can affect cell survival. 27.00 27.00 31.60 31.60 23.70 23.40 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -10.53 0.71 -3.78 16 91 2012-06-18 14:53:30 2012-06-18 15:53:30 1 1 76 0 59 79 0 117.90 48 73.53 NEW stllLFllQDssIlhshlllhLshauTaVaQsGhsplLlc+F+hhlllsslYFhLSluhHhWlls.hRh..tssspa.Ws.pGLhALaVlQRlsSVhYYYhYKRTALphuDPRaYc-pl.Wlpcph .................lQLVLFIIQDlsllhslIllhLhhFsT...aVFQuGLlsLLh++F+ssllloslYhsLSIulHsWlhs.lRW.....pssspalWT....cG.LpsLFVhQR...l....uAVlYaYhYKRTAlpluDPRFY.pDSh.WLRcpa...... 0 14 19 40 +14787 PF14936 p53-inducible11 Tumour protein p53-inducible protein 11 Coggill P, Hetherington K kh6 Jackhmmer:O14683 Family TP53 is a tumour suppressor gene, when switched on it suppresses tumour development by inducing stable growth arrest or cell apoptosis [1]. The tumour protein TP53 inducible protein 11 encoded for by the gene TP53I11, has a protein sequence of 189 amino acids in length and 21 kDa in mass. The role of this protein is thought to negatively regulate cell proliferation in response to stress, and therefore suppress tumour formation [1]. 27.00 27.00 37.90 37.60 22.10 22.00 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.50 0.71 -11.27 0.71 -4.33 6 94 2012-06-18 15:01:03 2012-06-18 16:01:03 1 2 50 0 42 69 0 143.60 62 78.27 NEW MKKHSQTDLVSRLKTRKILGVGGEDDDGEVHRSKISQlLGNEIKFAVREPlGLRlW.hl...SAslFTulAlMALsFPsQLY-sVF-pt.s......oo+lSlRLYGGALLSlSLIhWNuLYTuEKVIIpWTLLoEACYFulQhLVTolT.LlEhGhhu.us.llLLluRlLFlhlolsYYYhLGR+PKK ..............MKKHSQTDLVSRLKTRKILGVGG.E.DDDGEVHRSKISQVLG..NEI....K.....FsV.REPLGLRVWQFl...SAVlFou.lAlMALsFPD..Q..LY-sVF-tups......sSc.ssl.RLYGGALLuluLlhWssLhssE.+s.IphsLLspAsaaulQhlV.................................................................................................. 0 8 11 24 +14788 PF14937 DUF4500 Domain of unknown function (DUF4500) Coggill P, Hetherington K kh6 Jackhmmer:Q96KF7 Family This family is found in eukaryotes. The function of this protein remains unknown. The gene which encodes for this protein is named chromosome 6 open reading frame 162 (C6orf162) and is found between the chromosomal positions 6q15-q16.1. It is thought that this protein may be an important part of membrane function. 27.00 27.00 28.20 28.10 23.50 22.50 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.05 0.72 -4.15 16 85 2012-06-18 15:12:04 2012-06-18 16:12:04 1 2 73 0 57 80 0 83.80 51 80.74 NEW ppsps.t...htusGlRSl+TToLFRAlN.ELalKPNKslMuhGLlAlohCsGYluYM+sph-spp...hYsAlcuDGpc.hh..+KpS+W ..........h.....pcpshtssGLRusRTToLFRAVNPELFlKPNKsVMAFGLlsloLCVuYIuYhHAppENcp..pLYEAlcS-GcphhR..RKoSKW..................................... 0 13 18 35 +14789 PF14938 SNAP Soluble NSF attachment protein, SNAP Eberhardt R re3 CATH:1qqe_A_00 Domain The soluble NSF attachment protein (SNAP) proteins are involved in vesicular transport between the endoplasmic reticulum and Golgi apparatus [1]. They act as adaptors between SNARE (integral membrane SNAP receptor) proteins and NSF (N-ethylmaleimide-sensitive factor) [2]. They are structurally similar to TPR repeats [2]. 25.00 25.00 25.00 25.00 24.90 24.90 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.82 0.70 -5.26 62 864 2012-10-11 20:01:04 2012-06-18 16:23:28 1 81 388 5 519 1551 81 229.00 25 66.89 NEW scupplhtc.A-KKlpsstuhh.uhF..Gupp.KaE-Au-LappAANtaKlsKpaccAGpsah+uAcsphc.hsspc-AAssas-AucsaKKs........ssp..cAlpsLppAlph.asctG+appAAcappclAElYEp-........tD...........hcpAlcs..YcpAu-aaps-p.usutANpChlKlA.pl.uAphp..pY.cAl-laEclAcpSlsNsLhKaSlKcYaLpAuLCpLsts.DsVusppuLp+YpchDPoFssoREt+hLtsLlpAh-ptDs-tFosslh-aDpho+LDpWKTolLL+lKpslp .....................................................................t....th.tp.Ac+hhp........th.h...................chcpAsphatpA.As.ha....+...h......t..+p........hppAs.psa...hcs.A.p...hp.....p....hp...s....................c...A...A.......p.s.h..h...p.Au......p..s.h.+ch.................s..p...................cA.l.p.h..l...pp.Al....ph...Y...t.....c....h...G...p...h.p.h....A...Ap.th...........t.plAc...........lhE.s....p......................c...................hccAlp.h..Ypp.Au..-....h....a...p..t...-p.......pp....t.......t...A.s........c...hhh....+...sA............p...h...hs.p.h...p.................pY.pcA.h.ph.a.c...p.......h......tt.......h...p.p...h...h....p.....h..s..h.+..t......hhhtt.hl.s...hh.......h..................D.........h.....ts........p.shp.ph...t.....tF..t.oc-ht.h.httl.htuhc........pts......................pthtphh...................................................................................... 0 197 301 426 +14790 PF14939 DCAF15_WD40 ShortName; DDB1-and CUL4-substrate receptor 15, WD repeat Coggill P pcc Jackhmmer:Q66K64 Repeat DCAFs, Ddb1- and Cul4-associated factors, are substrate receptors for the Cul4-Ddb1 Ubiquitin Ligase. There are 18 different factors, the majority of which are WD40-repeat-proteins [1]. 23.90 21.80 26.50 23.10 23.70 21.70 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.46 0.70 -4.98 11 84 2012-06-18 15:44:06 2012-06-18 16:44:06 1 3 69 0 57 80 0 186.10 44 29.51 NEW stppptphFp+.lPs+lplsLKslls.s..LhsGHlFLGhTKCGQaLLSYoh.h-h-ss.h.....sshY+YcLaWWpFpP...........+ppLpKltpVpLFs-c..tlsstLplslspW..sDpppllVaG..hp...........susc-spcsYlTlssVPsls.Cp-C+plssu.............ssh.hp.....CLcHshTlHopYpllsPaPsFpPplsLppsshlllNoushlhsLplplshsc ...................................u.tphp.phFc+.lPsRlplsLKsllsps...LhtGHIFhGFopCGpYlLSYssssssss...........shYhYpLYaWtFps...........+p+L+hltpVRLFpDc............tl.splhlo...lspW..sDtphllVaG...............husEspcDhYlohVsVPs.s.ChtCpchspstsh......................ss..up.......CLpHuahlHTKYpll.PaPsFpPshpLppsphlLlNTuh.lhshtlslcs.st...................................... 0 18 22 39 +14791 PF14940 TMEM219 Transmembrane 219 Coggill P, Hetherington K kh6 Jackhmmer:Q9NWD8 Family This protein belongs to a family found in eukaryotes. Proteins in this family are typically between 240 and 315 amino acids in length. The domains in this family vary in length from 202 to 249 amino acids. Its exact function remains unknown, however, it is thought to have a role as a transmembrane protein. More specifically, it is possible that this transmembrane protein may have a role as an insulin-like growth factor binding protein 3-receptor (IGFBP-3R). This receptor binds to the ligand, insulin growth factor 3, which is a p53-induced, apoptosis factor important for cancer prevention [1]. 27.00 27.00 28.10 30.70 24.60 24.10 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.37 0.70 -5.25 9 99 2012-06-19 15:03:32 2012-06-19 16:03:32 1 2 50 0 54 95 0 210.30 46 75.39 NEW L+salspRPPhVlFhlslhshAluhlsLuhah..phptlpsPDhspDWNphLhphuphcFCs.........tpstshphhhs-oss....................hpspsshslolshhLsls.hhs.sGhspNhsh.lpusltGpplGLpGt.utEplNlTFpLsssh....s...sth..sphtTChshoAssplhPsoh.PPpC.spphssssh.p.hhhsh.pp...........hshpsssh+lhps.sPcLTVhl ....L+lhlup+PPLVshhlsl.hhuluhLsLGhFh..+hptl+SP-hspDWNoFLhpFspLcLCs........................ps.s.t..hN-TsT.....................................hc..ppGPhslolhhTLsls.....sG.sR.NhT+.l.uTlhGpQlGLpGppApEplpIThplsssh..................shsTChshoAssslhPso..P.pCss-shuNATL...h...hs.................s.ttthhphh.t.p.hLo.hl............................. 0 11 15 25 +14792 PF14941 OAF Transcriptional regulator, Out at first Coggill P, Hetherington K kh6 Jackhmmer:Q86UD1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 198 and 332 amino acids in length. The domains in this family vary in length from 239 to 242 amino acids. The gene, OAF (out at first), which encodes this protein, has a promoter which may help mediate regulation of neighbouring genes [1]. An alternative name for this protein is HCV NS5A-transactivated protein 13 target protein 2, which stands for Hepatitis C virus nonstructural 5A-transactivated protein 13 target protein 2. NS5A inhibits double-stranded-RNA-activated protein kinase (PKR) activity, which is thought to allow Hepatitis C Virus replication to continue in the presence of an alpha interferon (IFN)induced antiviral response [2]. 27.00 27.00 56.30 34.30 22.70 22.20 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.66 0.70 -5.22 7 85 2012-06-19 15:11:22 2012-06-19 16:11:22 1 3 68 0 59 91 0 210.50 50 81.54 NEW u-L+VpVRLsDGQVTEEsLpADSscDhIoLEF+psDGTLITalADFKp-VKIFRALILGELERGQSQaQALCFlTRLp+NEIIPSEuMA+LRQKNP+slRpAEEhRGhEphoMslAVNho+uhQLSsHI+NlCuEA+-AlYTRctDV+aWL-+.....Gh-uShFEhhPpsuphssLppCppspDhWpPClCoYsLsLEWYPChLKYC+uRDs..+s...........osYKCGI+SCpKuYpFsaYVPQKQLCLWDE ................................pLhl.Vp..sGplhpEslpus.stD.IoLEhp+sDGTLlo.hhDF+p-VpIh+ALlLGE.E+GQS..QaQshCFlT+hp+s-hIsS-AMAKLRQKNP+slRpAEEs+GhEphpMsshVshopuh.lS.HlpslCAEAh-AhYsRptDl+hWhEp...............us.tus..hEhhPp.s...........p...................RCtp...sushhtPClCphthsluWYPChLKYC+u+s....ts.................osY+CGI+oCpKsapFsaYV.Q+Q.CLWDE..................................................... 0 16 21 40 +14793 PF14942 Muted Organelle biogenesis, Muted-like protein Coggill P, Hetherington P kh6 Jackhmmer:Q8TDH9 Family The protein is a coiled-coil protein and belongs to a family found in eukaryotes. It undergoes alternative splicing forming two isoforms. The larger isoform is 187 amino acids long in protein sequence length and 21 kDa in mass. The smaller isoform is 110 amino acids long in protein sequence length and 12 kDa in mass. This protein associates with other proteins in order to form biogenesis of lysosome-related organelles complex-1 BLOC1 complex. BLOC-1 is required for the normal biogenesis of specialized organelles of the endosomal-lysosomal system [1]. 26.00 25.70 26.00 25.70 24.70 24.50 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.90 0.71 -4.32 11 90 2012-06-19 15:23:15 2012-06-19 16:23:15 1 3 76 0 64 85 0 139.90 38 76.07 NEW llKDlGEIaSRLLDHRPVlQGEI+YFl+EFEEKRuhRElchLEslpphlsEhsEchLP+CppshpspLsplhppLpsAssslp+Lpp+EpcpcK..uspLpsucctRptcWEcFhp-Qpp+ptcVDcEaccthc+lpEQYs-hc+cL ............lh+DlG-IaSRLLDHRPllpGEh+aFlKEFEEKRGhREh+sLcslpphlpEosEphLP+Cpcs....h....p......cpLsp..lhp+L.....psAscslp+Lpp+Epcpcc....sspL.tsccp+ptpW-cFhcc.ppcptclDpEacct.ccLp-pYschphcL................. 0 21 25 44 +14794 PF14943 MRP-S26 Mitochondrial ribosome subunit S26 Eberhardt R re3 Jackhmmer:Q9BYN8 Family This family of proteins corresponds to mitochondrial ribosomal subunit S26 in eukaryotes [1] 27.00 27.00 30.40 29.90 26.10 21.90 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.00 0.71 -4.70 24 113 2012-06-19 15:32:32 2012-06-19 16:32:32 1 6 90 0 71 118 0 151.90 34 68.71 NEW KPRalPsAKSKhaRVspts.hs.-EhhELpchappY+sthpulRphhp...cEshppphpscssplh.p..cppcE....pEapphhthN-phNtcltchREtRlpcEpEcpcphhhcphhtcppcppphhcptEppVhp.p....EpuKoFITtENLDptIEcALssPssYNaAlDhpGslh ..................................KsRa.PsAKSKhhR.l.h.s.hsstEhh.lhchap..pY+phhpul....Rt.ht...pElhppt.pspss.thh.p.pttpp......pEacplhshNctcNtchtphR.tRlppEtccpcph.hpphttctpctpthhppt-pcVhphp....EcuKsFITtENL-ttIEpALs.s.P.hsYNaAlDhpGph........ 0 23 29 52 +14795 PF14944 TCRP1 Tongue Cancer Chemotherapy Resistant Protein 1 Coggill P, Hetherington K kh6 Jackhmmer:A1KXE4 Family This family of proteins are found in eukaryotes. Tongue Cancer Chemotherapy Resistant-associated Protein 1 (TCRP1) is resistant to the chemotherapy drug, cisplatin, which induces apoptosis in tumour cells. There is suggestion that TCRP1 can be targeted to reverse chemotherapy resistance. The precise mechanism of TCRP1 inducing resistance against chemotherapy is still not clear, but it is thought that TCRP1 alters cell signalling pathways affecting apoptosis or DNA repair capacity. Proteins in this family are typically between 194 and 235 amino acids in length [1]. 27.00 27.00 109.60 30.10 22.60 22.10 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.01 0.71 -11.41 0.71 -4.37 3 109 2012-06-19 15:33:00 2012-06-19 16:33:00 1 2 39 0 56 76 0 203.30 71 99.36 NEW MNPVYSPuSoGVPYuNsKGMGYPAGFPsGYAAAAPAYoPNMYAGuNPAFs..........................P..........................................GYTPGTPYKVSCSPooGTVPPYSSSPNPYQTAVYPlRSAYPQQNPYA.......QQGAYYTQPLYAAPPHVIHHTTVVQPNGMPAAMYPAPIPsPRsNGVAMGMVAGTTMAMSAGTLLTSPpPTPVuPHPVSVPTYRPPGTPTYSYVPPQW .....................................MNPVYSPspsGsPYuNsKshuYs..GaPhuYsAAAPAYsPshYPsssPoat............................................s................................................................tYT.sGTPYKVsso.osuAsPPYSsSPNPYQTAhYPlRSAYPQQN.YA........QGsYYTQPlYAA.PHVIHHTTVVQPNuhPu.slYPAPl..s..sPRsNGV.sMGMVAGTTMAMSA...GTLLTs.p.TslusHPVohPTYRA.GTPsYSYVPPpW............................. 0 2 7 23 +14796 PF14945 LLC1 Normal lung function maintenance, Low in Lung Cancer 1 protein Coggill P, Hetherington K kh6 Jackhmmer:Q9H1P6 Family This protein is part of a family found in eukaryotes. It is 137 amino acids long in protein sequence length and mass is approximately 15.7 kDa. The protein is present in the normal lung epithelium, but absent or downregulated in most primary non-small lung cancers. The gene is known as Low in Lung Cancer 1 (LLC1). This protein is thought to have a role in the maintenance of normal lung function and its absence may lead to lung tumourigenesis [1]. 26.10 26.10 26.40 27.30 26.00 26.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.69 0.71 -3.79 8 54 2012-06-19 16:02:06 2012-06-19 17:02:06 1 2 41 0 42 55 0 111.30 41 84.16 NEW hVapDEIaKDHl++EptApKpWspcWGaLp..sphcclt-cpcchpss+s+lshtpcphhppLPPhcs.ht......tsuPPVPpTTuGhIGWRSupPphNLEhYt+aspstpsKGull+cLpWPpEG .............................hVspDEI.WKh+l+sEpcAppsWsppWGFLs....sshcE.Llcpc.cc....s.sKPKlcLPp+......hplcPloPl-KYIKl.........hPSP.PVPpTTQGFIGWRSulPths.cphc+ptchtpCKGuas+cLpWPcpG............................................................ 0 19 19 24 +14797 PF14946 DUF4501 Domain of unknown function (DUF4501) Coggill P, Hetherington K kh6 Jackhmmer:Q96HA4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 167 and 308 amino acids in length. The exact function of this protein remains unknown, but it is thought to be a single-pass membrane protein. This family contains many highly conserved cysteine residues. 26.50 26.50 53.90 26.50 21.50 21.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.12 0.71 -4.79 9 65 2012-06-20 08:49:22 2012-06-20 09:49:22 1 2 30 0 24 54 0 129.60 50 72.75 NEW SotStuQpPECCsDss-lNuoCsGouLCGPGCYR+WstDGSuSCV+CtNGT.....shaNsSECRshuGRGhphPhN+SoGsPG..phGGPpVAASLFLGThFISoGLILSVAuFFYLKRSSKLPcVFYRRN+APlLQPGEsAuMIPsPQSSVRKPRYVRRERs.-psssPushSosEARlSNV .............................................sp.sECCs-hhshNsoCsssshCuPGCat+htt-us.sClpC.sts..............................s.st.h.su.Gh.hshNpSossss....p.Gu.PpV.AASLhLGThFIS.hLILSVAuFFYLKRo..sKLPcl.h.YpRsKAssLQPuEsAuMIPs..PpS.SlRKPRYlR+-p....................................... 0 2 3 9 +14798 PF14947 HTH_45 Winged helix-turn-helix Eberhardt R re3 CATH:1r7j_A_00 Domain This winged helix-turn-helix domain contains an extended C-terminal alpha helix which is responsible for dimerisation of this domain [1]. 25.10 25.10 25.10 25.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.45 0.72 -4.16 27 290 2012-10-04 14:01:12 2012-06-20 10:24:33 1 6 113 3 147 420 77 75.80 24 67.77 NEW +Ro+h-IIh-ILcsh..pusspKT+IhYpANLsachhpcYlshLhcpGl...I....pp.ssspYplT-KGpclLcphcchhchhp ......................................pl.hhcILph..h.......p.......sstp....h...T..cl.....h..hps.s.....L.sapp....hp+Y.lphLhcpGL...l.......................pp..psp.....p....Y.p..l.TcKG.p.c.hL.c.p.h.cph.p...t....................... 0 48 87 120 +14799 PF14948 RESP18 RESP18 domain Eberhardt R re3 Jackhmmer:Q5W5W9 Family This domain is found in the glucocorticoid-responsive protein regulated endocrine-specific protein 18 (RESP18) and in the N-terminal extracellular region of receptor-type tyrosine-protein phosphatases containing the protein-tyrosine phosphatase receptor IA-2 domain (Pfam:PF11548) [1,2]. 25.00 25.00 25.40 27.50 21.50 24.40 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.31 0.72 -3.98 6 113 2012-06-20 10:28:28 2012-06-20 11:28:28 1 3 36 0 39 121 0 96.30 40 14.55 NEW GQuQVGVGQhWPL.GhsTPVFQ+LQsVLQQIsPpGLFWKDDhTQcVMoQKMt+IS+LHPp-P.....CsRsspAusPT+TstshuKQEEKLpLLhP...tpSPhVKVNR-pC .............GQsQsussQhhshhpVosPVLQ+LQs.VLpQLh...sQG...LoW+D.DlTQaVloQEME+lPR..L.+..P...-P.....ps+DtpulsPp+s....s.ttp......P............................................................................. 0 3 5 11 +14800 PF14949 ARF7EP_C ARF7 effector protein C-terminus Eberhardt R re3 Jackhmmer:Q8N8R7 Family This family represents the C-terminus of the ARF7 effector protein (ARF7EP). ARF7EP interacts with ADP-ribosylation factor-like protein 14 and unconventional myosin-Ie and through this interaction controls movement of MHC-II-containing vesicles along the actin cytoskeleton in dendritic cells [1]. It contains a conserved CXCXXXXCXXCXXXCXXCXXXXCXXXCXC motif in it's C-terminal half. 27.00 27.00 33.40 33.40 19.50 20.30 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.15 0.72 -10.99 0.72 -3.61 25 126 2012-06-20 11:48:40 2012-06-20 12:48:40 1 4 78 0 92 118 0 103.30 45 54.39 NEW csp+pLcpLthpsssp.hl.....ssFsPpsopRcKR+h...scp.p......shsccsplYDcpGhLhpsu...t...DLCDCL-h-CsGCaaPCscCuSsKCGscCRsNRKWhY-plEh-u ....................................................................p.t+tL+sLtFpNPG.ps........-.Fs....Pcs...t.p....RcKRth..hpphs..................hpsh..tps+hYDppGhLltss......h......DLCDCL-c-ChGCFYPCPpCsSsKCGsECRCsRKWlY-pIchEu....... 0 20 26 51 +14801 PF14950 DUF4502 Domain of unknown function (DUF4502) Eberhardt R re3 Jackhmmer:Q14159 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 181 and 876 amino acids in length. 27.00 27.00 37.90 37.30 26.10 23.90 hmmbuild -o /dev/null HMM SEED 358 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.31 0.70 -5.29 4 71 2012-06-20 15:14:56 2012-06-20 16:14:56 1 5 36 0 34 68 0 196.00 42 38.14 NEW KRKRshchEpPSFPsEps.phRRuGh+TstsusSLScAWLRCGEGF.coush.SL.TuEKKohTEKHLELss+PKpE.TTSK..uTStLssIsWSSStSDhSDEDKThs..........phQR.........................D-LQhIDWElDSD+t-ss-sD.EhE--c.sl-ISDCsSsA...SLTs--p.sE.PcssssEILEYSSDSEc--DsEpsLhIDSESsHKYcssFtSDuR.lh.p.hs.cscSsEsILpTPQK.Ts.....KhPKTPEsSuK+.KKLLRGGLAERLNtLQNRcRSAISLWRHQClSYQpT.uGcKSGVLTVKILELHEECuMQVAhCEQLst..hsusstuhA..sGAsLKVLFTKETAspLpG+PQDlV+IaPPWQKL ...........................................................................................................................................................................................................................................................................................................................................................................................................................hplshC.c.h..........ssts...........ts.lhVLFo+ETsthL.s.tPtDhlhIaPPWp............. 0 5 7 14 +14802 PF14951 DUF4503 Domain of unknown function (DUF4503) Eberhardt R re3 Jackhmmer:Q14159 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 313 and 876 amino acids in length. 27.00 27.00 122.80 61.50 21.30 20.50 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.54 0.70 -12.32 0.70 -6.32 4 70 2012-06-20 15:23:30 2012-06-20 16:23:30 1 4 36 0 29 74 0 325.50 56 58.67 NEW QDAhGMFuEVHLpushhK.......u+QLEGKSCSLsGMKVLQKsTRGRTsGLFSLIDoLWPPllPLKsPGpuQsspplcTaLPPPuFCYILoAHPslGQIDhI-tD.IsKLYQPPVsRsLR-IlQhN-hSTRCSFYApVIYQ+PQLpSLL..tQ+EIWLhVTDlTLQhp-EpssuLPKTLsVhlAPSCVLusEVlEALsstssauLLF+DAlR-pGRIVClERTVLLlQKPLLussSuspSC-LsuPVpLD-LDusT.VNSICSVQGTVVGVDESTAFSWPVCDhCGNsRLEQpPEDRGsFSCGDCSplVoSPll+RHLpVFLDCsSRPpCTV+VKL.QpSIS.LLRhAAuEDGSYEVcSVLGKEVG.LNCFVQSlTop.uSC.VsLEEIELLSAs ..............QDuhGhFuEVpLph..sh.p.......s+phEG+oCpLsGhKVLQ+sTRGRssGlFSLIDoLWPPshPLKs......PGps.Qs.s.cEh+sp...LP...PPuhCYlLoApss.upl-lh-...............t-sIspLYpPPss+sL+-ILQhssh.u.sRCSFaApVIYQR...PQLp.ull.lp.QREIWLlVTDsTLQhp-EpcspLP..KTL.VhVus.CVLssEVlEALssuus+slhFKDALR-p.GR.IlC.sERTVLLL..QKPl..LsssSu..........ApssE...LssPVh...........LDpLDSsT.VNSICSVQG..sVVGVDEsTAFSWPVCshCGNu+LEppPc..c.pGsFpCupCuplVsSPlh+hpLpVFLsC.SpPps.pVKVKLhQpo.ISSLLt.uAt.E...D...GsYEVcsVLGpcVG.L.saVpuhotp.s.h.hsLEElpL........................................... 0 4 5 13 +14803 PF14952 zf-tcix Putative treble-clef, zinc-finger, Zn-binding Coggill P pcc Jackhmmer:Q9NWW7 Domain This domain resembles the zinc-binding domain of prokaryotic topoisomerases, family DNA_ligase_ZBD Pfam:PF03119. The function of the eukaryotic proteins it is carried on is not known. 31.20 31.20 31.90 39.20 27.60 31.00 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.18 0.72 -8.54 0.72 -4.62 12 74 2012-06-20 15:35:29 2012-06-20 16:35:29 1 2 59 0 45 72 0 43.10 65 7.99 NEW pplu+hTlRGl+KCPcCGshN..GoRuh.CKNcsCstlhpthsstp .SDLGKATLRGIRKCP+CGTYN..GTRG.LSCKNKsCGslFRhuucK.p... 0 8 11 26 +14804 PF14953 DUF4504 Domain of unknown function (DUF4504) Eberhardt R re3 Jackhmmer:Q96LT6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 253 and 329 amino acids in length. There are two conserved sequence motifs: LLGYP and SFS. 25.00 25.00 25.70 25.10 24.30 24.20 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.53 0.70 -11.71 0.70 -5.10 29 96 2012-06-21 08:33:36 2012-06-21 09:33:36 1 6 77 0 61 96 0 236.90 26 75.63 NEW stpshhttt+p+hstssshcLss-lLAlspGL+PslLhDhs...usssspLQpaLppL.....Qssuhl......hp.........sL+lh.Ip-.sh...........hllpscthsp+lcpsLhupst.ll.VshppppPslso..p.sslts.ltsllsphp...................................................shppst.ssshssplpsopasLsTl.GhLLGYPVsYh....Fspsput-..pCLohpsL+la..........pshl..sapsspst...t.......p.pLhSFSVPpsL.sthps.h-sWtcphhs+hppp.....ssassLplpspssphsulsL ..........................................................h.......thphp...s.phpLtt-llAVspG.L+PulLhDhs...ss..hsplQphL.ppL.......pshshl...............hp.......sL+l.h.ls-.s............hllssctht.p.a.l...cpshh...up.t...hlpVs.pptpPplhs...p..ps.ltshltp.lhpthp.................................................................................t.pps..hhs..sthpssshsL..sTlhGlLLGYPVsYh...............Fp.spspc.......sCLuhssLcla..........pshh...sh..t..p......................................LhSFSlPtsl..thpp......p.W.pph.tphptp........p.hts..lphp.p.hp..s............................................................................. 1 20 33 46 +14805 PF14954 LIX1 Limb expression 1 Eberhardt R re3 Jackhmmer:Q8IVB5 Family This entry represents the limb expression 1 (LIX1) family [1]. 27.00 27.00 31.40 31.20 25.90 18.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.57 0.70 -5.40 8 135 2012-06-21 10:00:33 2012-06-21 11:00:33 1 3 70 0 80 111 0 228.70 68 83.86 NEW c-ulsulspshAcaspGaucVNVVEALQEFWQMKpARGAch+sGALVlYESlPSsuPPYVCYVTLPGGSCFGSFQsCPTKAEARRSAAKIALMNSVFNEHPSRRITD-FIEKAVsEApASF..............pG.sss-s-sPsTGIGAFRFMLEuNKG+TMLEFQELMTVFQLLHWNGSLKAMRERpCSRQEVlAHYSpRuLDD-MRSQMALDWlsREpE...o.PGlLSpELAhAE+ELEpARLAGRELRFPKEKKDIL.LAtuQl ....................p.........s.tp.s.sh.t..c.....lNVVphLQEFW.phKQsR.....G.......As..............h...pstuLVVYE.lPSsuPPYVCYVTLPGGSCFGsFQhCsTKAEARRsAAKlALMNSVFNEhPSRRITcEFIp+SVpEAlASh.....................sG..sh--ADsP.sTuIGAa+aMLESNpGKoMLEFQELMTVFQLLHWNGSLKAhRERpCSRQEVluaYSpRuLD-cMRspMALDWltREpp...s...PGhlupELt.spREL-cARhAGpELRFaKEKK-IL.LAhsQl..................... 0 15 21 43 +14806 PF14955 MRP-S24 Mitochondrial ribosome subunit S24 Eberhardt R re3 Jackhmmer:Q96EL2 Family This family of proteins corresponds to mitochondrial ribosomal subunit S24 in eukaryotes [1-2]. 25.00 25.00 27.80 27.00 22.60 21.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.58 0.71 -4.33 19 118 2012-06-21 10:09:16 2012-06-21 11:09:16 1 4 92 0 73 109 0 125.70 48 78.56 NEW tKspuGRa+lo.+ts+PLTYEMAp.PHhIuHRKoWNSWpTuNLcsttp.suEs.........slEDhFIR+FlpGTaHshls...SEllIKRppNhIhIAull..hpplsspKhYFLlGYTEElLShaL+CPVKLElQoVssKpsVlaKYI ......................s.+spuuRh+ls.+ts+PlTYE.ApsPHaIuHRKuW.ShpTuNLcGp.......t+.su-p.........slEDhFlR+FhhGT.a.uhls...sEllIKRptNhlcIsull...hpplss+KhYFLlGYoEpLLSaahKCPV+LELQTVssK..VlaKYl................ 0 29 35 54 +14807 PF14956 DUF4505 Domain of unknown function (DUF4505) Eberhardt R re3 Jackhmmer:Q6P1X6 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 166 and 225 amino acids in length. 27.00 27.00 43.60 33.10 19.30 22.10 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.32 0.71 -4.65 12 106 2012-06-21 10:49:48 2012-06-21 11:49:48 1 2 92 \N 71 107 2 161.30 42 81.20 NEW slpYsQGQSPpP+lREYFYYIDHpGMLFLDDu+hKNFTSCFKEK+FLcFFFpRLRhNc.....ouRYcp-FPalS.CGRERNalRCDDhPlVFTHllpcsssppp.....LsYsHuG-hLoV.F-Pp+lhMhPtoGRVYHPAPE+sGGlGLlRSpLAIELSppFsFssG-s..pP.PTHFpWsGppaELspcWhcs ...........................t............t.phREYFYalDHpG....LF........LDDu+hKNFhoCFK-h.pFL.FFFpRLR.Np............................osRY.pt.......pFPalS.CG+ERNFlR.....C-D..pPlVFT.cl..lttspt.t.t..................Lsastu.uptLslP.FpPppLhh.spsGRlYH....PA.....P....p.....p......s......G.....u...l.....GLV+StLAhE.LStpFpats.....st........t...Psph.WpspphtLp......s........................................................... 0 30 37 54 +14808 PF14957 BORG_CEP Cdc42 effector Eberhardt R re3 Jackhmmer:O14613 Family The Cdc42 effector (CEP) or binder of Rho GTPases (BORG) proteins are involved in the organisation of the actin cytoskeleton [1]. They may function as negative regulators of Rho GTPase signaling [2]. 25.00 25.00 25.20 25.20 24.90 24.70 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.70 0.71 -3.20 13 233 2012-06-21 12:15:36 2012-06-21 13:15:36 1 5 43 0 137 195 0 116.20 30 44.46 NEW hlKNAlSLPhLsspputphsstph................sKS.SSSPsKpsp.ttt...........p.hNuuuutus...hc.phsEppFGpLTDhs.............................sstspssuhc+A-SlhSFHlDLGPShLuDVLulMDKpth-pc .................................................................................................................................................................hlKNAhSLP..lst...th..hs..p................................................t...sPs+.sp..ht..t............................hp.s.ut.st..........p.s...stpt...sphps.............................................................................................................................................s.spps..hh.cpu-SlLS...h+..l....DLGPSlLs-VLslMDpt............................................................... 0 7 23 57 +14809 PF14958 DUF4506 Domain of unknown function (DUF4506) Eberhardt R re3 Jackhmmer:Q9H8K7 Family This domain family is found in eukaryotes, and is approximately 140 amino acids in length. 27.00 27.00 27.60 27.50 26.40 24.20 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.71 0.71 -4.60 14 90 2012-06-21 12:23:39 2012-06-21 13:23:39 1 1 68 0 65 83 0 134.90 33 35.56 NEW p-tslLhs..ss-sp.sPC.LplphsPc...cIuulsllsSA.slEla.lG..pEYptTshGcslp...pppsc.tlphYRhclcl-.SshsshplKLLo....upchCVasu+lhls.pssslsop.....hsptIDlp+VQplLpphGo ..............p.sllhp...pspsp.pP..ChLhlpssPp...pcIsulsllSsARshEVY..l...G......p....EYCGTsR.Gc...sVs......................tssppcplhlY+p.LcL-...os..spuCclK..LLSh........uc+.psValu+lhVphpssssss..s.t..slsstlDLp+VQshhpshGo................................ 0 14 21 35 +14810 PF14959 GSAP-16 gamma-Secretase-activating protein C-term Coggill P pcc Jackhmmer:A4D1B5 Family GSAP, or gamma-secretase-activating protein, also known as PION, regulates gamma-secretase activity. The holo-protein is a large, approx 850 residue protein that is rapidly cleaved to an active 16 kDa C-terminal fragment that is the stable, predominant form. GSAP is expressed in inclusion bodies and is important in brain function. It dramatically and selectively increases neurotoxic beta-Amyloid production in the brain through a mechanism involving its interactions with both gamma-secretase and its substrate, the amyloid precursor protein C-terminal fragment (APP-CTF). Accumulation of neurotoxic beta-Amyloid is a major hallmark of Alzheimer's disease. Formation of beta-Amyloid is catalysed by gamma-secretase, a protease with numerous substrates that catalyses the intra-membrane cleavage of integral membrane proteins such as Notch receptors and APP (beta-amyloid precursor protein) [1]. The secondary structure of GSAP is largely alpha-helical, lacking well-defined tertiary structure. GSAP represents a type of gamma-secretase regulator that directs enzyme specificity by interacting with a specific substrate [2]. 27.00 27.00 30.40 42.60 26.40 20.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.29 0.71 -4.34 14 77 2012-06-21 13:25:58 2012-06-21 14:25:58 1 3 66 0 53 85 0 111.40 39 13.83 NEW L-hsptLhpLlp+hhthshp.....hpph+shsh.lsphssspc...htlFplhpRhhpAspsLshPhP.GFpohashLGhRsLshcsFLQYl-psVhpLotsslptlhpDl.-so.ccs..pphKa ........................L-h.ptLhpLls+hhthsh+.......hcph+uh.L.....hsphuust.c...aslFclhpRhhpAs.polshPLPsGFpohaThLGh+CLshcshLpYl-suVh.Lopsslpplhp.Dl.-so.pp.s...chKh......... 0 17 22 37 +14811 PF14960 ATP_synth_reg ATP synthase regulation Eberhardt R re3 Jackhmmer:Q96IX5 Family Members of this family are subunits of mitochondrial ATP synthase (F-ATPase) [1-2] and vacuolar ATPase (V-ATPase) [3]. In F-ATPase, this subunit regulates mitochondrial ATP synthase population [4]. 25.00 25.00 25.90 25.20 24.30 18.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.45 0.72 -4.77 11 92 2012-06-21 13:36:47 2012-06-21 14:36:47 1 3 58 0 53 100 0 46.90 48 42.08 NEW MAu....tt.thpGhp+hFNupThsGRANsAKATYAuluLlhlaa+h+.pK ...........MAG.....tt.phpGhpKaFNShThsGRtNsshATYAuluLlllaa.+l+.+K..... 0 15 18 32 +14812 PF14961 BROMI Broad-minded protein Eberhardt R re3 Jackhmmer:Q96NH3 Family Broad-minded protein (BROMI) interacts with cell cycle-related kinase (CCRK), together these proteins regulate ciliary membrane and axonemal growth [1]. 25.00 25.00 25.30 25.00 16.30 22.50 hmmbuild -o /dev/null HMM SEED 1296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.44 0.70 -14.11 0.70 -7.43 2 151 2012-06-21 13:57:55 2012-06-21 14:57:55 1 4 62 0 96 138 1 532.70 29 89.28 NEW LQshLRpLhpSVK-+IoGAPSlECAEEILLHLEETDcNFHNYEFVKYLRpalpsoLGuhIE.EhEpaTpspsps.tSG.DTlVptVTKpTpESppYKpMMpoLKphMMhVVEShINKFEEDpMpppEhp+KIQ+ppS.S.hsDNCSDSDSSFNQSYtFh.ptpLQlIh-pLDPGpP+EVRaEALQoLCpAPPSDVLsCEsWTsLpcpLosuLoDPDP.hSD+lLpFaApTFo.SPLphTK-IYsSl..sKhLchaFL.+c.phPohoTuIDhspPshhpLLKphRLhN-aQKEssoFWIRHPEKYMEEIlENTLSLLul+p-Q..Sp.sSpK.L-PIahhuLlDhKAsWFKKWMHuYYSRTsVLRLLE+KYKSLlssAlQQCl.Yh-.C-Ahp.-EhLth.+phtppp....tphhYoupELpalYFlHSLClLGRLlhYTpGR+hFPIKlKpR+D.VoLTDLlVlhhplhY.pPp.PphspsAhhDshSPsshVhEVLRhLCDppECAVECLYp.sVIEsLLtPlhsLhpGphst.ss.EoALhHhADhLARIAos-cGLoLLLYscNhsSuEtcS.ouAHlIsQFopKLLsc-lpl.suSpM....pGAFI.VCRQhYsTCEGLQVLhPYSLHEsIApAW+pTS.hSERlPTPV.sussh.u.SQE.QsshAWEE.LLDsLLNFAATPKGLLhLQpTGAINECVTaMFsRasKKLQlSRpcKFGYGVhVTQVAoTAsGhlALQSSGFlpTllsELWusLECGR-DVRlsHP+STPhDPIDRSC.KSFLuLVNLLs.P.AVaELlGppsLPNKpEYsLREhsTsVlDlhDRLIIlNS-AKI+SLFNYEQSHhFGLRLLSVlCCsLsohLLLEuQYplo-lLLpuQc-NlhEsspuct-FIIDGLSVERNHlLVRIshlGGP.ERhLPPRsLpKGsDPYPWPMhSoYPLPphYl.-VsK.hchKQ-s-lGthLhp.K.o-+pspWh-sCRRQFCKhMtsKsshloG.sLh-LLEhhVLHLSpSss-CaFPssE.pssDssVKscSLSSVpQLGlcholRYGKFLpLL+-suEpDLsLlLKHCpcFLpQQps.lpSpL............ssYsGHDWFsSolFhlMhGDht+oLphL.+FSRLLsSAFLW.PRLH.ShaLsh-hhpSuIHPlY.CosHYlEMLLKsEVPLVFSAF+MSGFsPSQIClQWluQCFWNYLDW.EICpYlsTCVhhGPDYQVYhCluhhRHLQQDILQHTQTQDLQVFLKEEslpGFRVSsYhEYMEhLEpsYRshVLpDMRsIhspSo ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ 0 33 35 68 +14813 PF14962 AIF-MLS Mitochondria Localisation Sequence Coggill P, Hetherington K kh6 Jackhmmer:Q8TDB4 Family This family contains a protein found in eukaryotes. Proteins in this family are typically between 240 and 613 amino acids in length. The family is found in association with Pfam:PF07992. This protein family is an N-terminal domain for the mitochondrial localisation sequence for an apoptosis-inducing factor [1]. The protein is also known as Corneal endothelium-specific protein 1 or as Ovary-specific acidic protein. It is thought to be important for membrane function and is expressed in the ovary and corneal endothelium. 27.00 27.00 32.20 28.80 25.10 24.20 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.24 0.71 -4.63 7 112 2012-06-21 14:32:17 2012-06-21 15:32:17 1 4 39 1 56 87 0 138.80 31 31.37 NEW MahpRAV.+pLA..hRAssssAPLt...KDA.........olRthSSsthPGsSGoNMlYallVGsoloAGGhYsYKTVoucps+asE+hsplcp+TKuElpP..hpucpEslupsEcAssE.......As..olsEus.....sl-sEEsPsA.shssscEus.ssssps..EAs.sEssslsuEstPcVsD...AAs..Eos..ssEss.EVp ..............................uh.ppLs..hRsss.....sht...+st...................hRphuSsussGu.sG.sNhlYhLlVGlossG.uGhYAYKTlppDppRYsERlssl....pp+s+.tp.h..p.......................................................................................................................................................................t............................. 0 2 7 21 +14814 PF14963 CAML Calcium signal-modulating cyclophilin ligand Eberhardt R re3 Jackhmmer:P49069 Family Calcium signal-modulating cyclophilin ligand was originally identified in a screen for cyclophilin B-interacting proteins. It is likely to be involved in calcium signalling [1]. It has also been shown to interact with many other signalling molecules including proto-oncogene tyrosine-protein kinase LCK, tumor necrosis factor receptor superfamily member 13B and EGFR [2-4]. 25.00 25.00 25.60 25.50 24.40 24.80 hmmbuild -o /dev/null HMM SEED 263 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.56 0.70 -11.69 0.70 -5.03 5 67 2012-06-21 15:17:21 2012-06-21 16:17:21 1 5 46 0 39 60 0 205.50 50 78.87 NEW SASQRRAElRRRKLLMNSEERlNRIMGhH+PssGsp-Espp-oc..h-.DKosPLoLsSlSKRo.llpGDuVp..ouss-.psuSuu-t+ssplG-KLD.a.Kssphpu-DsutlRcRNRGDlsS-ussRss+cGL-pYLSRFDEAMKLRsQLssEKPSQDcGsssEE....FDSFRIFRLVGsALLAlsVR.hFVCKYLSIFA......PFLTLQLAYMGLSKYFPKGEKKVKTT..VLTAALLLSGIPAEVINRSMDTYSKMGDVFTDLCVYFFTFIFCHElLLaaG ..........................................oAuQRRAElRRRKLL.NSEpRhNRIhGap+s.ssssp-.Es.phpth......-.-+.pshsh.sSsSKR...sshGssss........suss-.p.ss...hhs.t.h.tp.p..........cs.htlR.RpRst..ssp..shtpss+.GLppYLSRF--AMKLRtQLhsEKPsQ-sGsssEE....hDsFRlFRLVGssLLAlhVR.hFVCKYL...S.IFA......PFLTLp..LAaMGL.KYFPKsEKKhpTT.........VLTAALLLSG.IPA..EVIsRSMDTYp...+Mu-.VFsDL.CVYFFTFIhsHElh.hhG........................................................................... 0 6 10 23 +14815 PF14964 DUF4507 Domain of unknown function (DUF4507) Eberhardt R re3 Jackhmmer:Q96N11 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 346 and 434 amino acids in length. 27.00 27.00 28.30 28.20 25.10 23.10 hmmbuild -o /dev/null HMM SEED 362 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.19 0.70 -5.71 18 101 2012-06-21 15:24:45 2012-06-21 16:24:45 1 3 82 0 69 102 0 311.90 35 86.20 NEW M..scl+psL++hcFPpsApEALt+l-.......pLhssR..s.pttchsh-llsEFlFtEhc.ccspttp.................phsslQELQLlplLs-a...Fsps.us-AsRsslFhuLFuspts.t...........................RhplLu+LVShAVussssslLsuAGhWMQQ..lGssSs.SlcLApsllsDYhsLs......ssos-pLKpLshluP+FsusFlTAVs-LY.s.....tpsp.tsPPssLL-lIs-Wlo-NPp.LClss.p...thsLPs.........Gulshs.hTPlsGLlRW.CVLAPLspspppp..........................lYSKLHLulLpuL....................phssssspttslsupcLspllcsLpphhpp......sssssspthplul-RhAQAlQVAhuosClhsNpppLhshLppLPtp ......................................h......p.shh..ph.tsAhEsLhpl-...........p..hpp.......hpphshpllpEFlF...p...ccss..p.................phsslQELQLlclhssY...Fpcp..scDusRphlF.uLFuspts.t..........................p....RhplLu+LVShAV...............u.ssphPlLpsAusWhQp...........s..ss...hslcLApslVcDYssls............................ssohppL+pl.psuP+FsspFlTuVssLY.............sst..h.PP.sLL-hIspWlhEsPp.lhhs.hp....shsL....Ph...........Ghlth....TPLsGLlRW.sVhAPLs.ppp.......................................hYSpLHLulLpsL...............................hp.ps.hsphthl.hpchsslV.cplpphssp.......hss.psspthphul-RlAQAlQlAhuosslhsspppLhslhppLP........................................................................................................... 0 23 29 50 +14816 PF14965 BRI3BP Negative regulator of p53/TP53 Coggill P, Hetherington K kh6 Jackhmmer:Q8WY22 Family This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 213 and 245 amino acids in length. It is found in various tissues, including the brain, liver and kidneys. It was first discovered as a functional unknown gene, murine brain I3 (BRI3). This protein is also known as HCCRBP-1 and it plays a role in tumourigenesis, as it binds to an oncogene, HCCR-1, and acts as a negative regulator of p53/TP53 tumour suppressor. BRI3BP induces tumourigenesis by activating protein kinase C (PKC) activity but decreasing the pro-apoptotic PKC-alpha and PKC-delta isoform levels. BRI3BP is over-expressed in many tumours [1]. 27.00 27.00 66.20 65.00 25.20 21.30 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.07 0.71 -4.90 26 99 2012-06-21 16:19:23 2012-06-21 17:19:23 1 2 38 0 51 85 0 172.40 45 78.46 NEW hpphupohppTLsshlGtEsh+slpchhSplh.thspulsVhh.sL.tIhspLLssLGLDuspLTQ..hhSPupVpo....hLLhsuusLlAYWhLSLLLGhshuLL....GRhhWhl+lsLFhhuhVhll+ph.sssp+AlL.Lshlsshhhhot.sGshhpt..........spLEtKlcpL-pQlc.LphRhpR ........................pphupuhhtolsshhG.-sh+hltchhoplh.thspulsshhtsLhtlhscLLcsLGLDu..spLoQ.....hhSPupVpo.....hLLhsuusLlAYWhLSLhLGhshulL....GRhhWll+llLFhhuhVhllpp.h.s.sp+AlL.Lshllhhhhhot.sG.hhpt................spLEtKlctLEpQlc.LphR.pR.................... 0 2 5 18 +14817 PF14966 DNA_repr_REX1B DNA repair REX1-B Eberhardt R re3 Jackhmmer:Q96EN9 Family This family of proteins includes Chlamydomonas reinhardtii REX1-B (Required for Excision 1-B) which is involved in a light-independent DNA repair pathway [1]. 24.00 24.00 24.00 24.10 23.90 23.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.13 0.72 -3.61 20 102 2012-06-22 07:42:38 2012-06-22 08:42:38 1 2 75 0 64 107 0 91.40 35 42.95 NEW stsLL+pFhplQpcRApsYscaccGFpsalpsusp...ssYppLCsclTp-FsshScp........llplEstLp..chsRs-lApllcslQppEKpKLpLTAplQlLK ...............h..tLlpphhtlQpcRsphapphccGappYlpousp...stYpphspc.lTptFsssS+p..........................VltlEutLtt..hspssLAphl+slQptE+p+LphsAhlQlh................... 0 26 39 49 +14818 PF14967 FAM70 FAM70 protein Eberhardt R re3 Jackhmmer:Q8WV15 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 241 and 349 amino acids in length. The function of this family is unknown. 27.00 27.00 33.10 27.60 21.00 21.00 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -12.22 0.70 -5.23 7 144 2012-06-22 08:12:48 2012-06-22 09:12:48 1 3 39 0 58 143 0 275.00 57 97.69 NEW h.s...s.sssl.h.sshuuFs+RK+sSlahssoLLlVSlhILTlGLAATTRTENVTVGGYYPGlILGFGSFLGIIGhNLlEN+RQMLVAuIVFISFGVVAAFCCAIVDGVFAARHI-.RPLhuGRCpaaSSssuahhD...h......................EVTCpo.s.stCpLKV+SNTCYCCDLYNCtsp.E.ssuYYEalsVpuCQDVlHLY+LLWuuslLNllGLFLGIITAAVLGuFKDM..sshup.shu.ss.Ppl.YssstQlhuYsuFh.ossslPshouY..sLQ.suhFPu.........ossSsLu...DsQssusS....hhsspsPPpYuPsYa.PsEKPPPYoP ...................................htsFsRRK+sSlahssoLLlVSlLILTlGLAATTRTpNVTVGGYYP.GlILGFGSFLGIIG.sLlEN+RQML....VAuIVFISFGVlAAFCCAIVDGVFAARHI-h+PLhs.s...RCpah.spssth..h-.h.t.........................pV.sC.phs.s.Cp.+l+uNTCaCCDLYsCGsp.EhssuY.....YEalsVpSCQDl.lHLY+LLWuuTlLNllGLFLGIlTAAVLGuFKDM........s....u..........s.s....ss.Ppl.Y.st..Q..lhuYssah.osspLPsh.SuY..shQ............s.usFPu....ossos...lS....-sQ.sso.ssS.ahh..sssAPPpYuPsYa.PhEKPPPYsP....................................................................................... 0 3 7 22 +14819 PF14968 CCDC84 Coiled coil protein 84 Eberhardt R re3 Jackhmmer:Q86UT8 Family The function of this coiled-coil domain-containing family is not known. It is found in eukaryotes. 25.00 25.00 25.50 25.50 24.50 24.60 hmmbuild -o /dev/null HMM SEED 336 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.54 0.70 -4.93 13 109 2012-06-22 08:26:03 2012-06-22 09:26:03 1 3 64 0 60 112 0 256.10 32 89.59 NEW aCplCRpNHsp.G++H+Y.ssH+puLsshLs+FpsKls-lRthL+sPsl.c.t..pscs+hWChFCct-lt-psS....ohssusAlpHLASs-HlKsl++Fhh+aGushcph-pFpIocs-hu+acppsppshsphpsps-shhsptsssl+cl.cs.........p.pp.phh.pshpss..p..ppP..sst...pssshshhhhssp.t..httotts.....................................................ssshshs.ss...shshtts..G..sL....sslus..shsut.GNVHoGAsPPWLpss-cs.....tssp..phsPSspuhhpppcptKh+KLNPcR..V...GAsascc...p+................ssssWLPsFGRVWQSGsRpcSR+EFc+E+pphcc ..............................................C.lC+.sa.p.GctHhY..pHpppLpthLp+hh.plpssRhhl+ssps.p..............tpcpphW.ChhCst-lpcp.S.....ohhhushlpHLA..Ss-Hh+sspcFhhcptuphp.h-..pFhlo..p-ht+acpph...phhpph..tppp-t.h.tt.hs...pphcp.........p.p.p.h.psh.ps.......s.......p...........................................................................................................................................................................t..s.........h...h....s.lu....t.ssh.GNlHoGA.PPWh.tp-t..........t.........p..s...t.hhppp....p.ptK.+KL.sspR..V...GAsascp...pp................ssssWLPsFGRVWpsGpRhpSR+pFctctt....p.................................................... 0 14 28 41 +14820 PF14969 DUF4508 Domain of unknown function (DUF4508) Eberhardt R re3 Jackhmmer:Q9NWQ9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 253 amino acids in length. 27.00 27.00 28.30 27.80 25.70 24.00 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.49 0.72 -3.99 12 83 2012-06-22 08:49:40 2012-06-22 09:49:40 1 4 70 0 53 78 0 98.90 44 54.48 NEW stpEhRhllpWFtpWSthQRpcFLpsLlpK........Asss.p......lsuLlsuLpsLsl..pD+PPSlFpCpl+LaspWFpsWs-p-+sphLppLcchDscFst+ahpclu ....spEh+ClLpWFssWSssQR-cFLpDLVuK........................AVPG.K..............lpsLLcuLppLuV...sD+PPsIF-CQL+LacQWFpsWuEpERNcFlcpLEtt-scFssKaaptlu.............. 0 11 15 33 +14821 PF14970 DUF4509 Domain of unknown function (DUF4509) Eberhardt R re3 Jackhmmer:Q86SX3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 212 and 449 amino acids in length. There is a conserved WLL sequence motif. 27.00 27.00 27.90 27.50 25.90 24.40 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.33 0.71 -4.63 8 80 2012-06-22 09:10:21 2012-06-22 10:10:21 1 4 45 0 38 72 0 151.30 42 43.60 NEW DR..PEAshsLWplLa................phL.psLs-hph........hsssts-sp.hpl...VKpALtspGY.PphphhQLPpDsupGSRELLLALuWLLA+ssllEph..Ltpp+Vphucthslsp..........................sEs.sS.u.P.ustht..scts...s-l+clpWLhG+LRapaRsLhopppEQstLlsKIHhhTp.ss+Sc....QsLuH..LSVsEschL+DPEshpp ...................................................hWplLh................t.L...l...t.........htp.h.psp..hph....VK.sLt..GY.PphthhpLPpDus....pGSRELLLALuWLLucssl.-ph....LtptpV...l.ucph.s.lsp................................s-shsS.s.s.....ustht....scss......sDlRtlpWLhG+LRapWRpLhspQQEpCtLLsKI.H.hTt.usp.p....psLsH..LSVsEschl+cP-shpp...................................... 0 11 15 24 +14822 PF14971 DUF4510 Domain of unknown function (DUF4510) Eberhardt R re3 Jackhmmer:Q86SX3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 242 and 452 amino acids in length. There are two conserved sequence motifs: LEA and WMD. 27.00 27.00 91.90 91.90 20.40 19.80 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -11.07 0.71 -4.07 4 44 2012-06-22 09:22:30 2012-06-22 10:22:30 1 3 20 0 11 48 0 161.10 69 41.03 NEW TCusEsPsssuQPsFLPhlsE.sGuscL-hVsppLQALpEELppssEsRRAAWEA+lGGsupGsEWSAuR+AspEAVppE..L.uALp........tsWEcuusPu....QPp.sP+RLV+u-sGAuss.tsLpAApVItsLRu+EACLctsL+pLQpQCRQELARLAuAhPGLIWI.PPt .....................TCuPEsPAAASQPTFLPhlPE.pGsGELELVsRELQALpEELp.......EAsEpRRAAWEAKAGGCGpGPEWSAu.RRASREAVE+E..L.uALQ........psWEpDuGPA....QPH.GPHRLVRREDGAAGs.psLRA.AEVIRTLRSQEACLEAVL+pLQGQCRQELARLsGALPGLIWIPPPG. 0 1 2 2 +14823 PF14972 Mito_morph_reg Mitochondrial morphogenesis regulator Eberhardt R re3 Jackhmmer:P17152 Family This family of proteins regulate mitochondrial morphogenesis via a mechanism which is independent of mitofusins and dynamin-related protein 1 [1]. 27.00 27.00 52.90 31.30 22.60 22.90 hmmbuild -o /dev/null HMM SEED 165 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.23 0.71 -10.88 0.71 -4.84 10 101 2012-06-22 10:12:39 2012-06-22 11:12:39 1 3 80 0 70 99 0 156.30 54 87.21 NEW Sso....stlIREVYDuENAHEpFEhEL-+ALEAcsshIVIEPo+LGDETuRWIsVGNCLHKTAVlSGlAuLhouLlWh.-Rh...llusPhuulSlhCsuLYslSWpaDPCCcYQV.EpDspcLs+LP.Ls..ssSSPVVLVR+DsppRKhLHpsloluuAuaCAW+lYcsa.K .................h.sssaIl+ElYsuENAp-pFEhELEpALEAphcaIVIEPoRlGDETARWIsVGNCLHKTAVLuGsusLho...sL.................hh......s....chp........aIulPuGsLSlsCssLYulSWQFDPCCKYQV..Eh.Dsh+Lu+LP.LpsLos.SoPVVLVR+DDh+RKhLHsoIALuAhsaCsh+lYchY.t..................... 0 23 28 50 +14824 PF14973 TINF2_N TERF1-interacting nuclear factor 2 N-terminus Eberhardt R re3 Jackhmmer:Q9BSI4 Family This is the N-terminus of TERF1-interacting nuclear factor 2. It is required for the formation of the shelterin complex. The shelterin complex is involved in the protection and maintenance of telomeres [1-3]. 27.00 27.00 27.50 28.70 26.50 24.70 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.74 0.71 -4.06 12 102 2012-06-22 11:39:58 2012-06-22 12:39:58 1 18 37 0 46 84 0 124.00 38 29.97 NEW WQVlppRslcHaG+ltEFVshlspsVP-Llsh+p+tpLhhGLRA+hlLEhhcppcshs..hptlp.Hlsph....ps.psptpDhch..............cpscssFhcLVpsLlcDspt+cpahpphh.tEYGssasssLcpLhhEFLp+L-plL...PlPshpp ..................WpVlctRpVcHas+l.EFlp.lptssP..sLlpa+c+t+LhhGL+A+lllEhhhttps..hs..Lpslp.Hhsc...........tsptptpDhch...............cuppsFhp.VcpL....csP.chtphhQ...h.t-YGpsF.s.uhcpLhhEaLspLEphLPssphpp.............. 0 3 7 24 +14825 PF14974 DUF4511 Domain of unknown function (DUF4511) Eberhardt R re3 Jackhmmer:Q99622 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 116 and 127 amino acids in length. 27.00 27.00 28.90 30.40 26.50 19.00 hmmbuild -o /dev/null HMM SEED 105 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.33 0.72 -4.29 16 82 2012-06-22 12:03:31 2012-06-22 13:03:31 1 5 71 0 61 72 0 99.00 47 71.78 NEW shosEpAKslLs-lLpulspP-Nut+lp-A+csuG...N-MlKhMQhVaPlssQIQh-VIKsYGFs..sstEGllpFspll+phE+-DsElApLpsplRohaLPP.....lslss .......hosEpAKslLs-lIpAhssPENuh+hsEA+-sAs...N-MhKMhQaVhPlssQIQ.EVIKsYGFs..sstE.GllpFupLl+phEppDsEIApLpupl+ulaLPPhsls.s.................. 0 18 24 42 +14826 PF14975 DUF4512 Domain of unknown function (DUF4512) Eberhardt R re3 Jackhmmer:Q8TCD1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 74 and 104 amino acids in length. There are two completely conserved residues (C and P) that may be functionally important. 27.00 27.00 29.90 32.10 22.10 21.10 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.66 0.72 -3.16 36 88 2012-06-22 12:35:03 2012-06-22 13:35:03 1 2 72 0 54 79 0 80.60 34 93.55 NEW VClPChlIPlLLala++FlpPllh+hhsP.....W..KcA.tp....ts..ph.hpscsssCsh..........................................pspspst............tt.st.tssssspsKK ..VCIPCIVIPlLLWlY+KFlpPhlY.hluP....hWs.Kss.pp..psts..ph.hpspssss...............................................................................tt............................... 0 13 17 37 +14827 PF14976 FAM72 FAM72 protein Eberhardt R re3 Jackhmmer:Q86X60 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 264 amino acids in length. The function of this family is unknown. 27.00 27.00 34.70 29.40 25.70 23.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -10.92 0.71 -4.37 7 81 2012-06-22 13:09:17 2012-06-22 14:09:17 1 2 54 0 52 72 0 127.70 55 75.13 NEW MSss.thsFpD+sVohLsC+FCcpVLspRGMKAlLLADT-h-LaSTDIPPspslDFlGpCY.Tp.CKCKLKDIACLKCGNlVGYHVlsPCpsCLhSCNNGHFWMFaSpuVhshsRlDsoGsNhLLWGsL.P-h-Ess-Eph.....phstEEhlR .....................Ftp+pV.hLsCpaCcplLssRGMKAVLLAD.Tcl-LaSTD........IPPsss....VDFh......G.........p........CY.h...........T..............chCKCKL+DIACLKCGNlVGYHVllPCssCLLS......C...N....NGHFWMFHSpAVhslNRLDu.oGh..shLLWGsL.P-h--sps-p......p.stEphlR.......................... 0 26 28 34 +14828 PF14977 FAM194 FAM194 protein Eberhardt R re3 Jackhmmer:Q7L0X2 Family This family is found in eukaryotes, and is approximately 210 amino acids in length. There is a conserved YPSG sequence motif. The function of this family is unknown. 27.00 27.00 29.80 31.30 26.80 25.10 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.26 0.70 -11.27 0.70 -5.34 13 112 2012-06-22 13:34:15 2012-06-22 14:34:15 1 4 50 0 70 110 0 189.40 30 29.71 NEW ssppshhtchYcpGshF.hhhhPDGoupl..aYPSGNlAllhlss.ctsp..hhsllhEDsspss.....lLALFssoG+ussYasNGs...lhLsls.tGGhhsDppGs+h+pWsW.........sspscss....shpslplplNchlslRlhsQDKlslsFtshspps........plslGo+hhhlpP..ctlst.+p......p..c-hhhsspttK...hp+Lls+hcsplshssophh-plt.PutL ..........................s..ptp...phhpps.ha.hh.F.DGos.l....aYPSGNlAlhhlss...phst..hhshlhpDtspss........lLAlhsspGpussYassss...hhlhls.tGGphtDp.pGphl+tWsW.........sspsp.s....shpsl.hplNc.ltl+lhsQDpI.loFh.uhs.pps........plslusp....pP..cthsh.+h..........hst..c...h.sph.K....hpphhtclpthhphsss..hpphh.ss...................................................................................... 0 22 27 36 +14829 PF14978 MRP-63 Mitochondrial ribosome protein 63 Eberhardt R re3 Jackhmmer:Q9BQC6 Family This family of proteins is present in the intact 55S subunit of the mitochondrial ribosome. It is not known if it belongs to the 28S or to the 39S subunit [1]. 27.00 27.00 27.80 27.70 26.90 25.80 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.11 0.72 -3.55 11 74 2012-06-22 14:28:01 2012-06-22 15:28:01 1 2 68 0 49 75 0 88.20 41 79.01 NEW PG..+QWIGK+RRsRsVohptKcshl+RhEhEtcspaaLs+PYLThEQEtGHApph+t...thhcthctppppKastH+hltDpL.sHLNlscpW .........PG+.ahGK+Rhs+.Vohptppshh+cLEhEtcNpaaLs+PYhTtEQEtGHAtch..+p....................ts.hcthcttphpKa.sHhhlt-pL.sHLplscpW.............................. 0 14 17 33 +14830 PF14979 TMEM52 Transmembrane 52 Eberhardt R re3 Jackhmmer:Q4KMG9 Family This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 160 and 236 amino acids in length. There is a conserved LLCG sequence motif. The function of this family is unknown. 25.00 25.00 25.00 29.40 24.80 24.20 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.10 0.71 -4.32 5 88 2012-06-22 15:00:50 2012-06-22 16:00:50 1 1 32 0 42 84 0 134.20 47 70.79 NEW --sCcsoDpCs.sAcWlsLWYVWLILVslsLLLLCGlTAuCVRFCCL+Kps.sQsapsuAhQPCEVTVIAhDsDSTlHSTVTSYSSVQYPhuhRlhhsFu-hDssuMsPPsYSLYAsEsPPuYEEAlKMhKoRpEVAtsSQKsscLsuloEpEs .....................................t.C...sp...C...t.s.cWspL.WYlWLlllhshLLLLCGlTusChRhCCLp+p...Qstps.u..Ph-lTVIuhDpDS.olpSTlTShpSV.hPhuhR.l.hshtphss...u..hP.......u.-h.PuYEEAl+M..sR.psshsupKss.L.ssst.c.................................................................. 0 2 3 11 +14831 PF14980 TIP39 TIP39 peptide Bateman A agb Jackhmmer:Q96A98 Family \N 27.00 27.00 34.30 37.00 21.80 19.60 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.10 0.72 -8.50 0.72 -4.33 3 29 2012-06-22 15:25:55 2012-06-22 16:25:55 1 2 23 0 16 22 0 49.80 68 39.08 NEW u-WuSPuuspsKRNLVVADDAAFREKSKLLTAMERQKWLNSYMQKLLVVNS ........tsWus.usshsRRSLALADDAAFRERARLLAALERR+WLNSYMpKLLVLDu. 0 1 2 5 +14832 PF14981 FAM165 FAM165 family Bateman A agb Jackhmmer:Q5T5W8 Family This family of proteins known as FAM165 are found in eukaryotes. Members of this family are as yet uncharacterised. Proteins in this family are typically short membrane proteins between 55 and 70 amino acids in length. 25.00 25.00 58.00 57.60 24.50 23.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.57 0.72 -4.46 4 34 2012-06-22 15:39:24 2012-06-22 16:39:24 1 1 30 0 19 29 0 50.50 72 86.55 NEW pAL-NVPLLhYILAhKTLlLCLAFAGVKIYQuKK.EtKLK+pctEK.+R.AE ..KVLEHVPLLLYILAAKTLILCLAFAGVKlYQRKRLEAK....p+l.EAE+.++puE.......... 0 2 2 6 +14833 PF14982 UPF0731 UPF0731 family Bateman A agb Jackhmmer:Q4G0N7 Family The UPF0731 family of uncharacterised proteins is found in mammals. 27.00 27.00 35.90 35.90 22.80 21.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -9.68 0.72 -4.07 2 39 2012-06-22 15:48:12 2012-06-22 16:48:12 1 1 24 0 23 39 0 75.90 66 83.69 NEW shp.ssQsRRFPlEuGDS.....PGLuSuspsp......up-.sPsR.LRRCPGsHCLTlhcVPIsVYhAMttsP.....ch+sp .PFcFGTQPRRFPVEGGDSSlt.EPGLSSSAuss......uKEhSPsRQLRRCPGSHCLTITDVPIsVYATMRKPPApSSKEM+P......... 0 2 2 3 +14834 PF14983 DUF4513 Domain of unknown function (DUF4513) Bateman A agb Jackhmmer:Q6ZNM6 Family This family of uncharacterised proteins is found in chordates. 27.00 27.00 53.30 31.80 25.30 25.70 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.73 0.71 -4.33 3 32 2012-06-22 15:50:45 2012-06-22 16:50:45 1 2 28 0 22 28 0 117.90 62 82.57 NEW SGKDTsPlLPKLNNNsS-ENoYKPu+......Ks--IHLPRFSLKQGMIPRRYVMPWKENMKFRsVNLK+AEACGIHAGPLEDSLFLNHSERLCHGEDRKVVLKKGPPEIKIADMPLHSPLSRYQSTVISHGFRRRLV ........................shP.hPKls.sststEs..+..p......+hp-lHLPRFSLKQGMIP+RYVMPWKENMcFRNVNLKpAEssGIa.sGPLEDSLFLsHSERLCHGEDRKsVLpKu.PP.EIK.IADM..PLHSPLSRYQSTVISHGFRRRLl....................... 0 4 4 7 +14835 PF14984 CD24 CD24 protein Bateman A agb Jackhmmer:P25063 Family \N 27.00 27.00 35.40 34.90 21.80 21.40 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.42 0.72 -8.51 0.72 -4.16 5 28 2012-06-22 15:58:32 2012-06-22 16:58:32 1 1 16 0 14 53 0 51.80 61 65.88 NEW SNQTTVuTsSs.SSQoTSs...APNPoNATT+uuGGoLQSTASLFVlSLSLLHLY ................SNQT.oVs.ssSs..oSQsTSs...uPNPoNATTKAuGGALQS.TASLhVVS.LSLLHLY. 0 1 1 1 +14836 PF14985 TM140 TM140 protein family Bateman A agb Jackhmmer:Q9NV12 Family This family of uncharacterised membrane proteins are called transmembrane protein 140. They are found in mammals. 27.00 27.00 68.80 68.60 22.30 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.29 0.71 -4.60 3 37 2012-06-22 16:07:53 2012-06-22 17:07:53 1 2 29 0 20 35 0 151.50 64 96.16 NEW RPRRt-QLLFMuIMVLVAsVISLMFYALLWKAGNLTDLPNLRIGFYNFCLWcEDTGSLcCHQFPELEALGVPRVGLALARLGVYGALVLTLFVPLPLLLAQCNSDEGEWRLAVGFLAsSSVLLAGGLGLFLoYVWKWlRLSLLGPGFLALGlAQALLILLLMAMVVFPPRAE.KuEs+LESC ............hhtppLLFhuIhlLsssVIsLhhYALLW+AGNLsDLPNLRIGFYNFCLWNEssusLQCHQFPELEALGVPpVGLALARLGVYGALVLTLFss.PLLLA.CstscttWpLAVsFLAhuShLLAuGLuLFLoYsWKWlRLSL.GPGFLALusAQALLlLLLhAhshFP.Rupcs.Sph-sC............. 0 1 1 3 +14837 PF14986 DUF4514 Domain of unknown function (DUF4514) Bateman A agb Jackhmmer:Q5T292 Family This family of uncharacterised proteins are found in mammals. 27.00 27.00 51.40 50.50 20.20 20.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.03 0.72 -4.16 4 27 2012-06-22 16:13:00 2012-06-22 17:13:00 1 1 17 0 11 30 0 58.40 77 55.09 NEW DlGGAQVLATGKSAGsEIDhKYAlIGTALGsAISAGFLALKICMIR+HLFDsDSSDL+STs .DVGGAQVLATGKssGsEIDaKYALIGTAlGlAISAGFLALKICMIR+HLFDsDSSDL+ST........ 0 1 1 1 +14838 PF14987 NADHdh_A3 NADH dehydrogenase 1 alpha subcomplex subunit 3 Eberhardt R re3 Jackhmmer:O95167 Family This family of proteins are accessory subunits of the mitochondrial membrane respiratory chain NADH dehydrogenase (Complex I). This subunit is not believed to be catalytic [1-2]. 27.00 27.00 29.60 29.60 26.20 26.10 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.96 0.72 -4.07 6 72 2012-06-25 07:58:29 2012-06-25 08:58:29 1 2 38 0 27 63 0 76.80 64 83.37 NEW MAuRluAFLKsAWsKEPVLVVSFsIGGLAlIlPhlSPaTKYosMINpATPYNYPVPVRDcGNMPDVPSHPpDPQGPSL-WLKsL ..............u.luAFLKNAW..sKEPVLVsSFsluuLA.lILP.lSPYTKYusMINp...ATPYNYP................VPVRDD....GNM..PDV.PSHPpDPpGPSLEWLKpL............................ 0 1 3 7 +14839 PF14988 DUF4515 Domain of unknown function (DUF4515) Eberhardt R re3 Jackhmmer:Q6ZUS5 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 198 and 469 amino acids in length. There are two completely conserved L residues that may be functionally important. 24.30 24.30 25.40 25.30 24.20 24.10 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.23 0.70 -4.66 11 151 2012-06-25 08:27:04 2012-06-25 09:27:04 1 1 48 0 91 151 0 173.10 28 47.21 NEW Es+hFLEYLpc+sEcppcph-pLWpsYlQpppEI-c+RpELsScaspppupLpppLhQpcKh.pusL+pcLQAlcsluplKEpQ-pcIpsLccEhcchpu-puhpc+Es+hQFLcEKutLE+Qlp-hchhphGccts+ELppKspAhchtAKphhp-aspulp+ENppL++cLhQLhpEhpcLcsp+p+LEpp+QplpcpQWYLEuL .......................................hhpaLpppspcppp.hppL.pph.ppptph.p.pc+p.ch.pp...YstphstLc.tphhpppp...s.lppcLpslcphpth.............K.p.-p-lpsLccphtphptcpppphpchctpFhpEKttLEc-s.Epcl..h...Luc+Ac+E.............A.hhtl.phspslhcENhpLpctLh.hhc......csptLptppppL.pcp+ptLhppp.h.p.................................................................... 0 23 28 48 +14840 PF14989 CCDC32 Coiled-coil domain containing 32 Coggill P, Hetherington K kh6 Jackhmmer:Q9BV29 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 188 amino acids in length. The gene that encodes this protein is C15orf57 but its protein product is called Protein CCDC32 (Coiled-coil domain containing 32). The exact function of this protein is still unknown. 27.00 27.00 27.90 34.60 19.70 25.40 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.81 0.71 -4.37 14 92 2012-06-25 08:39:40 2012-06-25 09:39:40 1 2 70 0 54 88 0 137.80 41 74.06 NEW D.Wsphsss..psppt......pssppFcDsFpsshstt.tptp..................p....tPLsDS-sYLAsLER+Lp+l+uts......chLcoLuptKc-Chc+LLps.....shsspha.ph.-hDpsshpp.L+RaLhP.c.AlsstEl.aLl.h-tLppp ......................D.WsEhssslspstpp.....sssssuFpDSFhss.stsptppps.t.................s.tsa....tPLpDSEsYLASLE+.KL++IKGhs.......p.-VTSK-ML+oLuQAK+ECWDRF.Lp-.........phsuEhF.-s...l-sDcs.......slpp........h+..RaLtP-+sAlospElpaLl.s-t..p.p............ 0 13 15 35 +14841 PF14990 DUF4516 Domain of unknown function (DUF4516) Eberhardt R re3 Jackhmmer:Q69YU5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 56 and 69 amino acids in length. 27.00 27.00 29.10 29.10 22.60 20.50 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.66 0.72 -8.29 0.72 -4.41 14 71 2012-06-25 08:52:15 2012-06-25 09:52:15 1 2 61 0 43 70 0 47.90 43 66.80 NEW MPuG.........suhspYhhhhssulhSMhAGAplVHphYKPDLolP.l.scssp ...............MPuG.............VshssYlphhusulluMhAGApVVHpaY+PDLolPpl.sc.t.......... 0 8 15 25 +14842 PF14991 MLANA Protein melan-A Bateman A agb Jackhmmer:Q16655 Family \N 27.00 27.00 81.80 81.70 23.80 23.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.57 0.71 -4.08 5 35 2012-06-25 09:02:05 2012-06-25 10:02:05 1 1 30 12 22 30 0 108.70 59 98.78 NEW PRcDhHa..G.aF+G+GRoYsTAEEAlGIGILIVVLulLLllGCWYaKRRSGYKpLhsKoI+lGo.psl.....+sRCspEusc+p-SKlShQEassh.pPVVPNAPPAYEKIAAEQSPPPYSP .PRE-sHa..GaP+KGHuHSYlTAEEAAGIGILsVILGlLLLIGCWYCRRRSGYRsLhDKol+sGTQs.sl.TtRCspEuhsHpDSKlshQEpss..cPVVPNAPPAYEKlSuppSPPPYSP. 0 1 1 5 +14843 PF14992 TMCO5 TMCO5 family Eberhardt R re3 Jackhmmer:A8MYB1 Family The TMCO5 family includes human transmembrane and coiled-coil domain-containing proteins 5A and 5B. 28.10 28.10 28.30 28.60 27.50 28.00 hmmbuild -o /dev/null HMM SEED 280 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.89 0.70 -5.27 9 134 2012-06-25 09:09:25 2012-06-25 10:09:25 1 2 26 0 62 136 0 210.10 29 85.47 NEW NlpSLN.DLE+DhQRlDEANQ.LLpKIpEKE-sIQsLE+EIs.oht.uc-c..E-.Nchs.tE+EpALp-LE.ETA+LE+cNcpLs+slsELQ+Klo++pps.sssEptshcptlpE.K..s+LQp.ppSCAsQEKELsKl.p...DYppVspLCEDQAhhIK.KYQEhL.+chEc.EKEshlLE+ElsKs.sp....sSph..cssSh.hEshtpNhEcshlpKpp...p...........tFWh+hFRh.LhFhVlhFIRLLuYlhFH.lpaINPDLLVc.sLPhlLSRsoLhcLRchlFPFLTLEsE-lLP .......................................................................................................................sLp.ch-tc.QplsctNp.hL.plp.pEtthppL.....p-lh...t.hpcp..-c...s.h...p.pps.h...h...cp.shLEhps..ph......-lppp..sct..p...h........-t.t......pt..p..c..splp...tphh..p.EpplhKl.p..................-ht...s..tphp..tD...ts..h+....chpE....hL.cphEp.ph-hhhLppc.lphh........t.......s.........p..t..h.ptt...............hh.hphhph.hhh..hhhhhtlhshhhha.h.al....h.p.hLPhhhu+thhhcLRphh.P.LsLpsEthLP..................................... 1 5 5 9 +14844 PF14993 Neuropeptide_S Neuropeptide S precursor protein Bateman A agb Jackhmmer:P0C0P6 Family \N 27.00 27.00 89.30 88.90 26.90 15.80 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.44 0.72 -9.13 0.72 -4.30 4 24 2012-06-25 09:09:30 2012-06-25 10:09:30 1 1 20 0 12 31 0 64.00 73 77.25 NEW YPVssS...KVsGKsDYFLILLNSCPuRl-tS-cLAhLKPILEKsFhKRSFRNGVGoGhKKTSFRRAK ....YPVssS...KVSGKSDYFLILLNSCPoRlDRS-tLuhLKP.ILEKhFhKRSFRNGVGTGMKKTSFRRAK 0 1 1 3 +14845 PF14994 TSGA13 Testis-specific gene 13 protein Bateman A agb Jackhmmer:Q96PP4 Family This family of uncharacterised proteins are found in chordates. In humans this gene is found to be expressed specifically in the testes. 27.00 27.00 29.40 44.50 20.50 24.80 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.78 0.70 -5.24 5 31 2012-06-25 09:19:14 2012-06-25 10:19:14 1 1 24 0 18 28 0 260.70 55 92.14 NEW Gs.....KcpoKhQ..sGtSKsuKoSul+FEKpll......sDu-EIhDAVGQSKFVLc................................NLRHYTV.........HPNLAQ.....................YYEPLKPTALQKFLAQN+KI.........................................................pSFMLKVTEYDQDKTLLIMTNNPPPCsIDHQGK-usPKYFSsELL.............LK..EosaQH....KPT-NhaLPpMPQK.............KKL+stLK............PlFPlpLl-DPsSK+EQWFRFSTDcDFKSEGRYSKVYALR+QKKMYPQLsFAPVscRph..........+ssVSKKSuS-sPT........SQVIWEPLTLSSLLEEKPTRTAPGESsFRsGRAQQWIIKsATVI ......................................................................................................................++poKhQ..pstu+h.csssh+hEKthh.........ssscEI.DsVGpSKFVLc................................................................................................................NL+HYTV.........HPNLAQ...............................................YYcPLKsTALQKFLApN+Kh.........................................................pSFMLKVT-YDQDKTLLIMTNNPPPssIspQsK-ssPKYFScELL.............lK....ppp.aQH....KPo-shhLPhMsQK.............KKLRstLK............PlFPlhh.-DPsSK+EQWFRFSTDNDFKoEGKYSKlYALR..pQKKMYPQL...sFAPVpcRph..........+ccsS..hKStSphPh........Sph...hhEPLTLuSLLEchPTRo..sPGcusFRpGRA.Q.WhlcpAsl............................................................ 0 4 4 4 +14846 PF14995 TMEM107 Transmembrane protein Coggill P, Hetherington K kh6 Jackhmmer:Q6UX40 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 138 and 164 amino acids in length. There are two completely conserved residues (H and E) that may be functionally important and four transmembrane helices. The domains in this family vary in length from 124 to 126 amino acids. The precise function of the protein family is still unknown. 26.00 26.00 26.20 26.10 24.90 24.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.72 0.71 -10.52 0.71 -4.00 37 76 2012-06-25 09:47:02 2012-06-25 10:47:02 1 4 60 0 51 85 0 114.90 39 64.62 NEW L.lPsRFlslhsHhlhl..lslhauc-pslpush..........hshspt....p........Ysptcp......plsssLslohhhhslEhhGhhoGlShFssspslhphssHsuAuVhL.hFhhppWcssp..hWalFshh.SslPshhEhhhhh ........LVPuRFLoLlAHLVll..ITlaWS+-sNlp.A.sLP.........hpao.p.....p.......................Ysppct.......p.LlsuLuloluhhslElsGhhoGlSMFssopuLl....slssHsuAuVhLshalhcpWcsst..aWalFshh.ShhPshhEhhhh........................................... 0 24 30 40 +14847 PF14996 RMP Retinal Maintenance Coggill P, Hetherington K kh6 Jackhmmer:Q96NL8 Family RMP is encoded for by a gene, C8orf37. Mutations in the gene cause two types of retinal dystrophies: cone-rod dystrophy type 16 (CORD16) and retinitis pigmentosa type 64 (RP64). CORD16 affects the cone receptors which detect red, green or blue wavelengths of light and RP64 affects the cone receptors first and then the rod receptors. Both of these affect the photo-receptors in the eye leading to colour blindness or blindness respectively [1]. 27.00 27.00 27.70 27.70 23.10 23.10 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.58 0.71 -11.37 0.71 -4.28 14 77 2012-06-25 10:04:19 2012-06-25 11:04:19 1 2 67 0 52 66 1 137.00 40 62.04 NEW LDshls-..........lspcss.ssh.s.phssppsssssshss..s...........p++Cssl.hLuGsphshGlsoshpp......+sCspLRCssCDFcVlpasshcWcpssDYLFFRNNhPchpKLpsKLhpc.GspAYuCQCsWpos..schsslps..spl+WVCuuH ...........................................................sshlp...........h.pp.t..t.......s.p..ts.sstss........................................tppCssl.alGGoshspGluoshsp........RuCDpLRChsCDFtVlpasDhhWDc........S...sD.YLFF.R.NNhP-hpKL+s.......K...............LhccpGsRAYACQCSWpol.....p-lscLpst...ppL+WVCutH............... 0 20 24 33 +14848 PF14997 CECR6_TMEM121 CECR6/TMEM121 family Eberhardt R re3 Jackhmmer:Q9BTD3 Family This family includes Cat eye syndrome critical region protein 6, a protein which has been identified in a screen for candidate genes for the developmental disorder Cat Eye Syndrome (CES) [1]. It also includes the TMEM121 transmembrane proteins. The function of this family is unknown. 25.00 25.00 25.10 25.10 24.20 24.80 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.45 0.71 -4.70 17 120 2012-06-25 10:07:15 2012-06-25 11:07:15 1 2 66 0 90 116 0 190.60 31 45.76 NEW saAhl....sWhlY.shsLh.Klshlapshhs-t....h.sh-h.hu.psLclsLulo.shLFlLLltsc+.phspu........p++hhlpshhhtlslDLLDslshlphLhcspt....hsLslhl-shllhhshlsl...hLPsluLhElshsph........t.hspchl.aslLthlhVNlPhlhIRs..hLa.......atpspssSlFhhKNlhhlhhtspphhpt ....................................................................................................................................................................................taAhl....hWhlY.hhsLphKlhhlapshh.tc........................htsh-s..h...u.p..pslplhLuls.l.P.hLalLlsuhs.chphsps........pc+cchpsphhhVsLDLLDhl.s.h......t.Lhcspp............hsLP.lahcslhhhhsalhL...slPslu....LsElshps...........................t.hsp+hh...aslLulhhVNlshlhlRs..hll.......app..p..pssoIFlhKNlhhluh+shphlp......... 0 25 39 65 +14849 PF14998 Ripply Transcription Regulator Coggill P, Hetherington K kh6 Jackhmmer:Q5TAB7 Family The precise function of this family is not clear, but it is thought to play a role in somitogenesis, development and transcriptional repression. Ripply is also known by an alternative name, Bowline. Bowline, is an associate protein of the transcriptional co-repressor XGrg-4 [1]. This family contains two conserved sequence motifs: WRPW and FPVQATI. The WRPW motif is thought to be required for binding to tle/groucho proteins [2]. Ripply3 is also known as Down Syndrome Critical Region Protein 6 homolog [3]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 154 amino acids in length. 27.00 27.00 40.80 39.90 20.50 20.20 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -3.87 16 104 2012-06-25 10:24:52 2012-06-25 11:24:52 1 1 41 0 57 95 0 82.20 50 63.51 NEW sshWRPWl..osp-stppspt..............stsstpspsspts.sFpHPVRLaWPKS+saDYLYppGEpLLpNFPVQATIshY-.-SDS---EE .......................................................s.hWRPWl..sst.c...ph..t..............stu..sthstsstt..tFpHPVR.LaW.PKS+saDYLYppGEtLLpNFPVQATIsFY-.D.SDSE-Ep................ 0 5 12 21 +14850 PF14999 Shadoo Shadow of prion protein, neuroprotective Coggill P, Hetherington K kh6 Jackhmmer:Q5BIV9 Family This protein family is a Prion-like protein and its function is neuroprotective and similar to PrP(C)-like. Shadoo is mainly expressed in the brain, and highly expressed in the hippocampus, the area of the brain which co-ordinates memory as well as spatial memory and navigation. This protein may also alter the biological actions of normal and abnormal Prion Protein (PrP) which lead to lethal neurodegenerative diseases [1]. This family of proteins is found in eukaryotes. Proteins in this family are approximately 150 amino acids in length, of which the first 90 are alanine rich. 27.00 27.00 49.40 49.00 26.30 25.70 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.93 0.71 -4.29 5 54 2012-06-25 10:39:11 2012-06-25 11:39:11 1 1 32 0 11 48 0 121.80 75 86.49 NEW CDSGAAKGGRGGARGSARGGlRGGARGAuRVRVRPAPRYu..GSSLRVAAAGAAAGAAAGAAAGLAAGSGWRRAsGPGEpGLEDsEDGAPGGNGTGRGVYSYWAWTSGAGPTssh+LCLLLGGALGALGLLpP .......CDuuAAKGGRGGARGSARGG.....RGAuRVRVRPAPRYu..GSSlRVAAuuA..A.AGAAAGA...AAGLAAGSuWRRAAGPuElGLEDs.EDGAPGuNGTGRGVYSYWAWTSGA...GPTsph...+LC.LLGGALGALcLLRP....................... 0 1 1 3 +14851 PF15000 TUSC2 Tumour suppressor candidate 2 Eberhardt R re3 Jackhmmer:O75896 Family This family of proteins are candidate tumour suppressors [1-2]. 25.00 25.00 26.90 26.30 23.90 22.90 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.96 0.72 -10.68 0.72 -4.01 13 77 2012-06-25 10:40:10 2012-06-25 11:40:10 1 1 56 0 45 65 0 102.30 56 97.96 NEW MGuSsSKt.cuhh.h.ssss.sssss......tssc.p..p.schcshRsusP...FVaoRRGShYaDEDGDLAHEFYEE.lls+sGp++uph+Rlp+N.LpPQGpl+hshPplHVDFPlllC...Es .......................................MGsSuSKs.+GhWPFsusuussssts..........tssc.p..shscs+s..RsssP...FVFTRRuShaaDEDGDLAHEFYEE.TlV...TKNGpK+AKL+RlpKN.LlPQGIVKLDhPR..IHVDFPVlLhE.V........ 0 11 15 26 +14852 PF15001 AP-5_subunit_s1 AP-5 complex subunit sigma-1 Eberhardt R re3 Jackhmmer:Q9NUS5 Family This family of proteins are subunits of the adaptor protein complex AP-5 [1]. 25.00 25.00 26.00 25.90 20.80 22.60 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.12 0.71 -4.35 10 72 2012-06-25 10:41:30 2012-06-25 11:41:30 1 1 55 0 47 67 0 166.60 42 86.40 NEW MVhshlIHolp........sps+VLaSphYus-.....tp.pp.shcpcR.hpKEpl....tslA+pVcSpsshp+ps......su+sss-phhp.s-psluhtEts......sGsFplhsG-sFsscphVLWhuVsuluFsLVC-sHENlhLA-sTLRpls+hLhcpl..+sLss....uuclLh.....KuD+lcAlLc+FLPpGQLLFLNcphsptLEKElp ..............................MVpuFLIHTlpss.....tss.sRVLYSphFGs-p.....spp.c.hssEccRlhcKEpl....hsVARQ...VcShspLpppA......uGcssh-htst.u-EslsLpEAs......pGsFpLtsu-PFpps+.sVlWLuVhuLuFsLVh-sHENLLLAEsTLRhLs+hLL-pL...+lLss....usplLh........+uDcltulLpcaLPpGQLLFlNppasptLccch.......... 0 15 18 29 +14853 PF15002 ERK-JNK_inhib ERK and JNK pathways, inhibitor Coggill P, Hetherington K kh6 Jackhmmer:Q9H6E4 Family This coiled-coiled domain, CCDC134, is a secretory protein that inhibits Mitogen activated protein kinase (MAPK) pathways such as Raf-1/MEK/ERK and JNK/SAPK but not p38. CCDC134 is widely expressed in normal adult tissues, tumour tissues and cell lines, which shows its importance in cell signal transduction pathways, transcription regulation and therefore cell survival [1]. Additionally, CCDC134 is known to bind to a transcription adaptor, hADA2a, which forms part of the general control nonderepressible 5 (GCN5) histone acetyltransferase complex. Acetylation usually 'switches genes on' for transcription. Moreover, knocking out CCDC134 suppressed hADA2a-induced cell apoptosis activity and G1/S cell cycle arrest suggesting its importance in cell survival [2]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 188 and 257 amino acids in length. This family is a coiled-coil domain containing protein 134 (CCDC134) whereby the coiled-coiled domain is a ubiquitous motif involved in oligomerisation. 27.00 27.00 29.10 28.10 19.70 20.30 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.25 0.70 -4.95 8 99 2012-06-25 10:48:46 2012-06-25 11:48:46 1 7 72 0 62 88 0 174.20 39 69.01 NEW pscsppspsppssssppstKlacclFspKRcEHhpAlcplhplpph-KRhcLlchllcclh+llpcu+ptLEcusasu-.SsFP.c-p.sLpDALupllENTAFFG-LlL+hP-hoccllcpss-WpsLhpWulsaoppos.lL--socchLcLlsQEls.hscRcssYhNPY.......pct.+cphcpppcsphKKKp+Kc......hpKtPpL.....+pEL ......................................cha++hFchKR+-ph.A.lcsLhpls-hpppYKll-lhLcslh+VLc-S+thL.sushh..Pc.ssFP.pDc..plK-AhSpllENTAFFGDllLRFP+IlHphac+s.s.sWs.Ll+Wul.sFCspo...u..la...........s.p..s.p.p.l....LpLhuQELs.lsE+sssa.NPa..........ptc..p.cthptpcppcccccp+Kc.........pKtPpl.....p..................................... 0 22 28 48 +14854 PF15003 HAUS2 HAUS augmin-like complex subunit 2 Coggill P, Hetherington K kh6 Jackhmmer:Q9NVX0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 203 and 291 amino acids in length. HAUS augmin-like complex subunit 2 is alternatively called centrosomal protein of 27 kDa (CEP27). It localized in the microtubule organising centre, the centrosome. These microtubules are part of the cytoskeleton and give the cell its shape, provides it with a platform for motility and are crucial for mitosis [1]. This protein is part of the HAUS augmin-like complex. This interacts with the gamma-tubulin ring complex (gamma-TuRC) which is required for spindle generation. HAUS2 may also increase the tension between spindle and kinetochore allowing for chromosome segregation during mitosis [2]. This protein is involved in mitotic spindle assembly, maintenance of centrosome integrity and completion of cytokinesis. 27.00 27.00 27.00 27.10 22.30 26.00 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.87 0.70 -11.68 0.70 -5.33 7 100 2012-06-25 10:53:06 2012-06-25 11:53:06 1 3 62 0 56 102 0 194.40 34 89.31 NEW sssWsstp.h.phuGhh.uh.lASslshp.h.s.Spcss.shs............hphLpploslQtcIhphplElphhK.-KssAcLsHso.ht+KhcsLt..phsshLcsVlppK-cIhtRL.pPhsh-ClPlEAcYp+phsc...............LLhhAso.hutLpsslpshp...phc-s.phhuchLt.hslhlspspchhEsl.thREp.tplpphhsth.s.p..lp..........................................................................+clshPP.shpsp ........................................................p...phuuh..sh.lAushh.t...s..spcp..shs............hphLpploslQtcIhphplElphl+h-KcsADlsHs.hltpKhcsLp..phsspLcsVlppKcplhpRL.pPh...st-sLPlEA.Yp+.hsc...............LL.hAss..h.ttLpsplpshp...ph..pts...p.h...sphLt.hshhlspspch.hEsh.thRcp.tph....s...................................................................................................tt............................ 0 11 22 32 +14855 PF15004 MYEOV2 Myeloma-overexpressed-like Eberhardt R re3 Jackhmmer:Q8WXC6 Family This family of proteins is found in eukaryotes. It includes human myeloma-overexpressed gene 2 protein. Proteins in this family are typically between 45 and 74 amino acids in length. There are two conserved sequence motifs: MKP and DEMF. The function of this family is unknown. 27.00 27.00 27.40 28.80 23.30 22.80 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.63 0.72 -9.06 0.72 -3.42 8 78 2012-06-25 11:40:17 2012-06-25 12:40:17 1 3 63 0 48 69 0 57.70 71 54.98 NEW MKPulssDEMFPEGAGsYMDL-E.......uGGuoGhhhD.L..sANEKsVHuDFaNDF-DLFDDD............sh. ....................MKP.uV...DEMFPEGAGPYVDLDE..........AGG.STGLL.MD.L...AANEKAVHADFFN................DFEDLFDDDD..................... 0 7 9 27 +14856 PF15005 IZUMO Izumo sperm-egg fusion Coggill P, Hetherington K kh6 Jackhmmer:Q6UXV1 Family Izumo is a molecule with a single immunoglobulin (Ig) domain. It is thought that Izumo bind to putative Izumo receptors on the oocyte. Izumo is not detectable on the surface of fresh sperm but becomes exposed only after an exocytotic process, the acrosome reaction, has occurred. Studies have shown that knock-out mice (Izumo-/- males) were sterile despite normal mating behaviour and ejaculation, indicating the importance of the protein in fertilization [1]. There are cysteine residues thought to form a disulphide bridge. Izumo is a typical type I membrane glycoprotein with one immunoglobulin-like domain and a putative N-glycoside link motif (Asn 204) [2]. There is a conserved GCL sequence motif. Izumo expression has been found to be testis-specific [1,2]. This family of proteins is found in eukaryotes and are typically between 193 and 305 amino acids in length. 27.00 27.00 42.40 41.80 22.60 22.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.18 0.71 -4.19 8 129 2012-10-03 02:52:13 2012-06-25 13:02:24 1 2 29 0 53 139 0 145.10 28 60.57 NEW aGu+GCLpCDPphlEslspLcusLlPpch.VsshpthhptlhppMpchhF+shthsshluhlslppL-clsoalKschp+LtssohKsshllp.ELlslRcplh+cLcchL+saphc.sCsccCtlhctslLDChsCp+hos+Ch+ucaCtc-c.p+sph+ ....................................uhGCl.Ccsph.-thp.hcpphhspph..s.tp..htshhphlhpthcshhhpsat.s.uhhuhlspppL-pluphlhpphppLhpsshpsshhlp.ELhslhcpthcplppsltpaphc......hC.ccC.slht..hl.C.sCpc.hhtCh.uh.Ctppt....thp........................... 0 4 4 8 +14857 PF15006 DUF4517 Domain of unknown function (DUF4517) Coggill P, Hetherington K kh6 Jackhmmer:Q9GZN8 Family The function of this protein remains unknown. This family of proteins is found in eukaryotes and are typically between 160 and 182 amino acids in length. 27.00 27.00 30.50 30.00 23.70 24.30 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.19 0.71 -4.38 16 94 2012-06-25 12:06:34 2012-06-25 13:06:34 1 2 70 0 54 82 0 136.30 48 83.71 NEW p+HVHFDpp.......hHDssl.............h.hp.pscu.shhV+lGFLplpHRYcIchslPs.s.hhs.....ts.sshs....slPshps+ll.....shs.....ssssscp.hcshlEahAaKEtlL+EcltLsuppssspplcllltARVLu+t+GTPML+sGIHClGVEh-.p-S.EtSDWpGFc ..............................................................................ppHVHFDpp........h+DS.sV..............hspppuDs.sh.lV+lGFLpIhHRYcIsFoLPs.s.hhs.....pshpphs.....sPslHl+lh.....sls......hsEt.....hphEa.AaKEtlL+EchhLsspsss...........spplplhlpARVhs+p+GTPMLhsGl+ClGs.Eh-..-S.EtSDWtGFD........... 0 18 22 36 +14858 PF15007 CEP44 Centrosomal spindle body, CEP44 Coggill P, Hetherington K kh6 Jackhmmer:Q9C0F1 Family CEP44 is a coiled coil domain found localised in the centrosome and spindle poles. 27.00 27.00 28.00 27.00 24.40 24.10 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.65 0.71 -4.44 13 98 2012-06-25 12:15:38 2012-06-25 13:15:38 1 3 67 0 57 87 0 127.10 46 34.24 NEW Dl+ssLc+LcptLRslpYP.....tclDhstLcpGDPuAhLPllcYsLhsYSppluchLlcpGaE......L..hu.KoDLRFlEslaKlLR-pFsY+PhLotpQFhp.pGFAE+KlhllsDIls...hlhc+H+Els+tp+tpspphpp ..............DLKpsLRpLEpsLRhLsYP..............p-VDhsGLhKGDsuA.LPIISYuhTuYSshVsElLh-.s.slE......L..hu.KNDLRFl-sVYKLLRDpFsYKPlLTKpQFlp.sGFAEhKIpIlCDIls...tVhKKHKELsphpKh.spttpc..................... 0 22 30 38 +14859 PF15008 DUF4518 Domain of unknown function (DUF4518) Coggill P, Hetherington K kh6 Jackhmmer:Q5JPI3 Family The precise function of this protein family is unknown but it is thought to be involved in apoptosis regulation. 27.00 27.00 37.10 31.80 26.40 26.20 hmmbuild -o /dev/null HMM SEED 262 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.75 0.70 -5.35 14 96 2012-06-25 12:29:39 2012-06-25 13:29:39 1 4 68 0 56 90 0 239.00 41 84.87 NEW LS-pc+pGhR-LLtphss.ssLhpLscoVTppllc..l-sp-EAlchIlsaopsspcLL++++IpR-lLF+YLtp+tsssssshsKssLlp+llpaWppphstp......spps.ppsptstpsth..t..............tppp.slchLAccFscWFFshLNu.............-shu.pcFWsDssL+lphhss-tss-ph..puAptV.ptLlultpp.thhFNPNLspsGlpGph-saGhVlVhssGTlHpsppClGlFEpsFGLlRDP....hssNNWKhKpsclpL+upt ..................................................St.EhpGh+pLLt.hcs...splhuLscTlTsp.llp..spspp-AlcAILsYSpsscELL+R+KVpRElIFKYLuspulhlsPso-KpsLIp+shpaWppp..p.p...................hppsspss...ppp.....pp.p...pp..............................................................ptpphs.hppLuccFspWFFtlLNu.t...............spaGPpHFWpDspL+h..h..h..psu....-p.ss..ch....pGAp.Vuh+LluLspp-hlhhsPNLsspGl+st.ssHGLVhVtVsGTlHc.....s.s.......s.......C.LGIFEQlFGLlRsP....hhpNsWKIKhlplplhup.s............................................... 0 14 18 32 +14860 PF15009 TMEM173 Transmembrane protein 173 Eberhardt R re3 Jackhmmer:Q86WV6 Family Transmembrane protein 173, also known as stimulator of interferon genes protein (STING), is a transmembrane adaptor protein which is involved in innate immune signalling processes. It induces expression of type I interferons (IFN-alpha and IFN-beta) via the NF-kappa-B and IRF3, pathways in response to non-self cytosolic RNA and dsDNA [1-4]. 27.00 27.00 62.70 35.70 19.30 19.10 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.29 0.70 -11.89 0.70 -5.62 16 77 2012-06-25 12:34:37 2012-06-25 13:34:37 1 2 63 7 51 89 0 271.20 37 83.39 NEW chlpplsh......h..hh.thChhs.p.hppha.h.csphWpll+psashshpsshhhhuhllluhhhassssshssls.shhhh......hp................lsppsLoahlhl+psp......h...tls...sL....shAsGhAhSYaaGYL+LlLP........uLpcRhcpapcppN...hphss+RLaILlPp-shVsssl..p..ss..hlchtcsLpsphlsRAGlctRsY.KpuVY+l...tcctsspshhhshEhATPLlThach.pp..usshhh+E.+pE.hhhFh+pLc-lLp.....shPEo+spscLIhYssh.-sp.Gs.hsluclllp+lcpppc .........................................................................hhhh.h.ls.llpt..h..C...ht.p..hpphp.h.psp..hhphhptshshs.tth....hhhl..lshhhh..h.phssh..s..h.hhhh.......................hhsphLshhlhlpt..t..............h...ph.tls..tsh....shApGhAaSYYhGYL+LlLP........GLptRIptY.pppps........hshss+RLaILlPhcshVsssL...p.sDs....slchhcpLsppphsRAGl+sRsY.pNolYcl....hcssppshhCllEhATPL.TLFsMspp..upuuhu+EpRhEQsplFhRpLc-ILt.....ssPEspspscLIlYp...-st.usphslup.lLpHlpppp................ 0 11 14 32 +14861 PF15010 FAM131 Putative cell signalling Coggill P, Hetherington K kh6 Jackhmmer:Q96AQ9 Family The precise function of this protein family is unknown, however studies have shown it undergoes Protein N-myristoylation; a type of lipid modification in eukaryotic and viral proteins. Protein N-myristoylation is usually an irreversible co-translational protein modification which is useful in cell signal transduction pathways [1]. This indicates that FAM131 may have some sort of role in cell signalling due to its ability to be myristoylated. This family of proteins is found in eukaryotes and are typically between 257 and 361 amino acids in length. 27.00 27.00 32.10 31.90 20.50 20.30 hmmbuild -o /dev/null HMM SEED 293 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.95 0.70 -4.66 5 167 2012-06-25 12:34:48 2012-06-25 13:34:48 1 2 38 0 87 138 0 229.00 44 79.80 NEW VEDTluMLPKSRRALoIQEIAALARS...ShpGISQVVKDHVTKPTAMAQGRVAHLIEWKGWuKPsD.....oPpAl.cSDFsSYSDLSEGEQEARFAAGVAEQFAIAEAKLRAWSSVDGEDso--SY-EDhusNa-ssoQpLhpst.....t.lhpsphsShPpphsSps..s.sSpss.E..hSs-oLsAS.sohs..........sp...hs........stpGuuup.uuphL...ts...tsGElpLA+uPsppcpsAh+tht....sscpDSssYssshoEouLSPtE-......Dptsh....ss..pEh.huachsR+VSDVoSSGVpShDE ......................................hLP+.+.Rs..shhtIuALA+S...SL.s...GIS...pshKDHVTKPTAMAQGRVAHhIEWpGWuKsss..................u.tsh.cp-hsuYSD.LS-GE+EAR.F.......AA..GVhEQFAIuEAsLhAWSShDGE-.s.s.sShp-s.hs..tt.p.......................p..hhps.h...shs.hp.su.t...s.sSpss.....sspolhuS.sshp..........................................t..s........h...........t....h.p...................................................................................................................................................................................................................................... 0 4 13 36 +14862 PF15011 CK2S Casein Kinase 2 substrate Coggill P, Hetherington K kh6 Jackhmmer:Q9NX04 Family It is suggested that CK2S (C10orf109) is important in the regulation of cancer cell proliferation. Studies have indicated that CK2S is the downstream target of a protein kinase, casein kinase 2 (CK2), which is upregulated in cancer cells. CK2S has been found to be upregulated in cancer cells. The precise mechanism of CK2 targetting CK2S is not well characterised. It is found to be localised in the nucleus and cytoplasm [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 221 amino acids in length. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 27.10 27.50 25.50 25.10 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.87 0.71 -4.46 18 90 2012-06-25 13:10:52 2012-06-25 14:10:52 1 3 72 0 58 89 0 155.10 31 72.95 NEW ppL+ptFpslcpppctWpuslscstsLluSLusLsEQLpAhpclphtss..LcsFPsLppRLptK.htAl-tlLscLsEcLspLpcVpcsluptlpsshplYcpph..ssLslssshpRuulsPSlADhLEaLQDl-RaYRppalp++.lLpsLshcsLsshpuhs+pW ..............ptl+ppF.hlccppshWpsshpcs.shlsuLusLsEQLpAhpslc....htsss.LpsF....P.sLp-RLhtKQht.......Ah-hlLspL.tEpLspL.pVpcslsptlcpshplhcpps..sslslc.slhptuulsPSlADhLEWLpDl-RaY+p.p.........Y..lp+c.lLssl.phs..sLsshpuh.pth.................... 0 15 25 41 +14863 PF15012 DUF4519 Domain of unknown function (DUF4519) Eberhardt R re3 Jackhmmer:Q9NRQ5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 59 amino acids in length. There are two conserved sequence motifs: KET and VLP. There is a single completely conserved residue P that may be functionally important. 27.00 27.00 54.70 54.50 21.60 21.20 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.13 0.72 -8.76 0.72 -4.05 12 70 2012-06-25 13:22:02 2012-06-25 14:22:02 1 2 63 0 47 63 0 55.60 62 84.22 NEW MRQL+.GKsKETpKpK+ERKp-.hEhpp+lhTVVLPslushhlhIlVaVYLKTRPp .MRQLKGp.spKETsK-KKERKpshpEu+QQIsTVVLPTLAVVlllIVVFVYlsTRP..... 0 10 12 28 +14864 PF15013 CCSMST1 CCSMST1 family Eberhardt R re3 Jackhmmer:Q4G0I0 Family This family of proteins was discovered in a screen of Bos taurus placental ESTs. The B. taurus member of this family was named cattle cerebrum and skeletal muscle-specific transcript 1 [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 97 and 157 amino acids in length. There is a single completely conserved residue D that may be functionally important. The function of this family is unknown. 26.10 26.10 29.20 27.50 26.00 26.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.67 0.72 -4.06 15 66 2012-06-25 14:36:52 2012-06-25 15:36:52 1 2 57 0 39 66 0 74.40 37 49.36 NEW scpPl+aooSpAu..+Wcsppohusspsc...PWapsalluhSlsshllYFCllREEsDlDptLc........psLh-c.l.uLEcpphp ...scPI+FSuSpAs........pWpspcohutsppc...PWapshslSsSLsshlla.ChLREEoDlDphLc........psLh-p.l.t.pp....t......... 0 13 15 26 +14865 PF15014 CLN5 Ceroid-lipofuscinosis neuronal protein 5 Eberhardt R re3 Jackhmmer:O75503 Family \N 27.00 27.00 33.60 53.90 18.50 21.50 hmmbuild -o /dev/null HMM SEED 303 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.13 0.70 -5.06 4 59 2012-06-25 15:14:48 2012-06-25 16:14:48 1 2 42 0 40 54 0 283.40 61 85.46 NEW ppWPVPYKRF-FRPcsDPYCQA+YTFCPTGss...IPVMK--DlIpVaRLQAPVWEFKYGDLLGHhKIMHDAlGF+SoLTGKNYTMEWYELFQLGNCTFPHLRPsMsAPFWCNQGAACFYEGIDDsHWKpNGTLVhlupIS.GsMFNcMA+WVKpDNETGIYYETWTVpASP-tsSssWFDSYDCSpFVLRTYpKLh-LGApFs.KspTNYT+IhLYSGEPhYLGN-TSIFG.oGNKoLAhsIRcFYYsF+PHpSh+EhllSLLcIh-+Vllc+pFYhFYNhEYWaLPMKFPYlKlsYEEIPLP ............................WPVPY+.RFsaRPcsDP.YCQ.A..+YT.F..CPTGSs...IPVM+s-DlIEVaRLQAPVWEFKYGDLLGHlKIMHDAIGF+STLTGKNYTMEWYELFQLGNCTFPHL....R..P-...hsAPF....WCNQGAACFFEGIDD.hHWK....EN....GTLlhVATIS.GshFNpMA+WVKpDNETGIYYETWsVpASPp..+sAcsWF-SYDCScFVLRTapKLAEhGA-F.K....pI-T..NYT+..I.FLYSGEPsYLGNETSlFGPsGNKTLuhAI++FY..aPF+...P.HhSsKEFLloLLpIFDsV..Il++pFYLFYNFEYWFLPMKaPFlKITYEElPLP....................... 0 9 14 22 +14866 PF15015 NYD-SP12_N Spermatogenesis-associated, N-terminal Coggill P pcc Jackhmmer:Q9BXB7 Family NYD-SP12, also known as SPATA16, is a germ-cell specific participant in the Golgi apparatus, and its expression is confined to spermatogenic epithelium, not being found in interstitial cells [1]. Computer analysis of the protein-sequence showed that NYD-SP12 contains a cluster of phosphorylation sites for protein kinase C as well as for cyclic nucleotide-dependent protein kinases [2,3]. It is postulated that since the mutation of some Golgi apparatus’ proteins are responsible for male infertility that NYD-SP12 might play a role in modification and sorting of acrosomal enzymes [3]. OMIM:102530. 27.00 27.00 27.30 36.70 24.70 24.40 hmmbuild -o /dev/null HMM SEED 569 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.15 0.70 -12.83 0.70 -6.28 3 107 2012-06-25 15:20:51 2012-06-25 16:20:51 1 1 36 0 24 105 0 196.80 46 99.32 NEW MDSGsSRSLEsoVsRlY+DpLLPKINTSKKMSTLssuP...sILEsopEIKKNsG-tQVEsosERlKhTKoIKEKQSNDLEKAAhKRKAEuEEK.sGKKEAKIhELDNQLl.TsPLPHIPLKNIMDVEMKLVYlDEp-VuYEFApPsMspGpQsTsQsAphsDPsSs+shSsLPQIDKWLQVALKDASSCYRQKKYAVAAGQFRTALELCSKGAALGKPF-AaAEDIASIASFIETKLVTCYLRMRKPDLALNHAHRSIVLNPAYFRNHLRQAAVFRCLERYSEAARSAMIADYMFWLsGGoEcSISKLIKLYWQAMIEEAITRAESFSVMYTPFATKIKsDNIEKVKDAFTKTHPuYs-aIYTDsQGLHlLPQTsDWSSFPPQQYLLTLGFKNK-DGKFLEKlSSRKLPIFTEHKTPFSPLTREDTVRQMETlGKRILPILDFIRSTQLNGsFsACSGVMEKLHYASLLSRLQRVKEQSQVINQAMAELATIPYLQDISQQEAEL...LQSLMADAMDTLEGRRSDKERVWNpIQKVGpIEDFLYQLEDSFLKTKKLRTARRQKTKMKRLQTVQQ ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 2 3 7 +14867 PF15016 DUF4520 Domain of unknown function (DUF4520) Coggill P pcc Jackhmmer:Q96MH7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 197 and 638 amino acids in length.This is the C-terminal domain of the member proteins. 27.00 27.00 27.00 27.70 26.80 19.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.89 0.72 -4.41 13 57 2012-06-25 15:24:44 2012-06-25 16:24:44 1 3 53 0 36 59 0 90.00 38 13.52 NEW ushsssVlp-sslsGsGpFhAYoDG+VRshFsDRshLsLsh..pt...................spshscllhPDGppshlplsps...sshccY..VssAlpas+as ........s.hhslllp-ShIsulGRFhAYoDs+V+ulFhDthhLoLsashss................................shuhC+LshPDGppplIplp+P...tsac.RY..VssslpasRh......... 0 8 14 21 +14868 PF15017 AF1Q Drug resistance and apoptosis regulator Coggill P, Hetherington K kh6 Jackhmmer:Q13015 Family AF1q is an oncogenic factor involved in leukaemia development, thyroid tumourigenesis, and breast cancer metastasis. AF1q plays a critical role in the regulation of apoptosis and drug resistance. Initially identified as a mixed-lineage leukaemia fusion partner (MLL11) in infant acute myelomonocytic leukemia carrying t(1;11)(q21;q23) translocation. It is located in chromosome 1 band 21 [1]. AF1Q may be a novel mediator of metastasis promotion in human breast cancer through regulation of the MMP pathway and RhoC expression [2].This family of proteins is found in eukaryotes. Proteins in this family are typically between 25 and 482 amino acids in length. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.77 0.72 -10.30 0.72 -4.08 10 164 2012-06-25 15:33:04 2012-06-25 16:33:04 1 8 61 0 89 141 0 74.70 26 19.11 NEW +sspps..pssssppP-stEFSSFhFWRsPLPsI-c-LhEhL.scthssss.........stp-pc..................t-ccp--s-sDssGWITP.SNIKQIQp- ..............................t............t.p.ps.p.asSFhaWRsPLPsIDh.s..lE.hL.....h.sp....ps...........................ttcpp..................................................................................................................................... 0 8 16 38 +14869 PF15018 InaF-motif TRP-interacting helix Coggill P, Hetherington K kh6 Jackhmmer:C9JVW0 Motif This highly conserved motif is thought to be a transmembrane helix that binds to transient receptor potential (TRP) calcium channel. It is known that proline-rich proteins inactivate tannins found in food compounds, and it is putatively thought that PRR24 does too. This is important since tannins often inhibit the uptake of iron [1]. InaF is a protein required for TRP calcium channel function in Drosophila [2,3]. TRP-related channels have been suggested to mediate store-operated calcium entry, important for Ca2+ homeostasis in a wide variety of cell types [3]. The amino acid sequence of PRR-24 contains two completely conserved Y residues that may be functionally important. This domain family is found in eukaryotes, and is approximately 40 amino acids in length. 20.00 20.00 21.50 21.00 18.00 18.00 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.52 0.72 -7.64 0.72 -4.50 26 111 2012-06-25 15:53:43 2012-06-25 16:53:43 1 3 57 0 78 107 0 37.50 40 22.29 NEW spKhlRlhTVluYlhuVSLuAlhLolYYhFlW..ssshts ...pKhhRllTVh.sYlhuVShsAlhLulYYlFlW..-ssh..... 1 24 30 61 +14870 PF15019 C9orf72-like FTDALS; C9orf72-like protein family Coggill P, Hetherington K kh6 Jackhmmer:Q96LT7 Family The precise function of this family is unknown but members have been found to be localised in the cytoplasm of brain tissue. Defects in the gene, C9orf72, are the cause of frontotemporal dementia and/or amyotrophic lateral sclerosis (FTDALS) which is an autosomal dominant neurodegenerative disorder. The disorder is caused by a large expansion of a GGGGCC hexa-nucleotide within the first C9orf72 intron located between the first and the second non-coding exons. The expansion leads to the loss of transcription of one of the two transcripts encoding isoform 1 and to the formation of nuclear RNA foci [1]. This domain family is found in eukaryotes, and is typically between 230 and 250 amino acids in length. There is a single completely conserved residue F that may be functionally important. 27.00 27.00 39.70 32.90 20.50 20.10 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.23 0.70 -5.12 18 93 2012-06-25 15:54:28 2012-06-25 16:54:28 1 2 61 0 64 77 0 237.90 45 51.15 NEW p.ls+hsLus.hppppsthsshl.sphhplsppslhlhuhlF.h.spsspp...shauloll..lsppclpthhphhpslppphpplstph+thhpp.....................ppslpchss.ltphhp............hlsthhpsslhs..........hpItsoshs...............FhupsloSHLpTQhsolI.usshcpspphhshLuhFhhstphphSphphpss......hpssLaLQslpcpssss....hphllp.ppPhThIcLcpcpVhposshcppp ................................pFLAppTLssEIh..csupsssl-sKFaVLsE+ullllS..hIFsu.shsGc+...........sTYuLSlI..LPpoc.LuaYLPLHplCs-R......Lscll+K.sRlhhpK..............................pupslIsh...LTuEllslMc............LLuSh+oauVscc.........I-Is-TlLND-cIus.........psapsFLhpAISSHLQTsGCSVVVG..SssEc..VNKllcTLsLFLTPsER+CSRlsps-up.............pYpsGLFlQGLLK-uoGShshPhcplhhuPaPTTaIDlDhs.TV+QhPPpHEHh..................... 0 28 33 46 +14871 PF15020 CATSPERD Cation channel sperm-associated protein subunit delta Eberhardt R re3 Jackhmmer:Q86XM0 Family The CATSPER (cation channel of sperm) complex is a tetrameric complex consisting of CATSPER1, CATSPER2, CATSPER3 and CATSPER4, it functions as an alkalinisation-activated calcium channel. This complex requires several auxiliary subunits, including CATSPERD. CATSPERD is essential for the cation channel function and may play a role in channel assembly or transport [1]. 27.00 27.00 32.60 28.20 18.50 23.70 hmmbuild -o /dev/null HMM SEED 733 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.27 0.70 -13.31 0.70 -6.52 7 107 2012-06-26 07:21:54 2012-06-26 08:21:54 1 4 32 0 57 111 0 504.10 29 86.63 NEW .l.ss.hhhssstpclhtpsschp.shhhshplhLThs.hpss.lPhhlPhSh.sGsP.lsShch.suShlLLVsstcsa.hs.phpoWop.pu..c.lScsptc.................................................................sssaots.hhlshos.hFAhlpssph.ps.lhhSssushphpphpYstps..lusLhuhhphpohopsh.lllhspshtpapYpDasLshoh......cshLphhht.uthshLllWsc+slhhuhpshplstsVpshpGptsL.SSlscuslhhpslsss.s-lsVhhcpNslaYuplsI..oshlKhtt.s.aopcsslhFsssGplplLhslcD....uhsappshs.l.th..ssphphshC.h.hhpsphhsphYplDhsppLplhA.hl...shuLhslV.s.sP+.Lshpssl.aE.u....G.sp+pLsIphpQppcatpsDs.FpsphK+sslsslplc.ushphoCsshK.hsh.IuVGCD.pK+IslQs.phSuC.........LpcsaSYlIEK-hhc.....ppsSccLcVpYpapchGCPLhl.asp.a+PVVELac-sta.Ell-ApallhElpGh.sYoashThppusChspsQsWsohhc.sht..h..sWs.ENYhsCas.shspP.t..s.PYpILsupstN+llas.p+sGhYlFhlpIlDP.YSaCpLpThFul.saGhhPhs.h.hssshlhllhhhhholhl..................hshhhhpRhh ...........................................................................................................................h.........h.....h.............h..................................................................................h.........h....................h.h.hs.....t.h.h..t.....th.h..h.........t.hh.h.......p...hh......ptt...a...ph.h...s...........pt.h..hh..u.hs.hlhh.p.phh...pth.....h.h..t.......h..tt.hht.hh...s...phh..hhh..s.h.ha....phth..p.hhhh......s.p...shhhhptsG.h.hlhsh.s....hh...pph..h.th....s..tht.C.h..hhpsph...hahlDhtptLphhs.hl...shu.h.......l......l.s.sPc.Lt.ptph.aE.uh...G.sphpl............plhhhQp.pa...tthps.Fp.p.ppsshshlplc.sphth.Cs..p...sh.luVGCs.pKhIhlps..phptC...........hpc...sasalI-+.phhc.....pp.....pcs...lt...V.Y.hpphGCPlhl.asp.a.pPllpLacpsta.c.ltspallhElpGh.sYoas.Thtpu.hChppsQsWpohhp..p.t.........h..hWs.pNYhpCas.s.sts.h..s..YpIlstpst.Nplhas..t.sGhYlFhlpllDP.YSaCpLpshhul.shG.hs...h...h....hh..hh..hhh.hhh................................................................... 0 10 10 23 +14872 PF15021 DUF4521 Protein of unknown function (DUF4521) Bateman A agb Jackhmmer:Q8IYI0 Family This family of vertebrate proteins is functionally uncharacterised. The family includes the Chromosome 20 protein C20orf196. 27.00 27.00 67.40 67.30 23.20 22.20 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.19 0.71 -4.73 5 40 2012-06-26 08:56:18 2012-06-26 09:56:18 1 1 28 0 21 29 0 170.30 61 98.08 NEW MAoQEATPGSpSEESSsLDLPSVCDIRDY.lLQRPSQEosSEAFSSlEupShPsSSDVDPDoSNLNTEQssSWTSENFWLDPSVKGQsETKEEDDGL.....RKSLDRFYEsFuHPQPGSuNPLSsSVCQCLSQKIoEL+GQESQKYALRSFQMARVIFNRDGCSILQRHSRDAHFYPlcEGSoSL-DEKPTPGLSKDIIHFLLQQNVMKDp ..Msup-ATsuS.hSEES.S.sLDLPSsCDIRDY..VLQ+PSQEAsSEAFSSlEhaShPsSSDVDPDoSsLNsEQss.SWsSENFWLDPusKGQsE.ppEDDGL.....RKSLD+FYEhFGpPpPuStssLSASVCpCLSQKIopLcsQESQKYALRSFQMApVIhsRDGCollppHs+-s+FYP.t-Gssul-ccc.sPGLSc-llpFLLpQshhKc....... 0 2 2 4 +14873 PF15022 DUF4522 Protein of unknown function (DUF4522) Bateman A agb Jackhmmer:Q96KX1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. In human this protein is known as C4orf36. 27.00 27.00 34.70 33.20 19.40 18.80 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.84 0.71 -10.67 0.71 -4.38 3 26 2012-06-26 11:36:54 2012-06-26 12:36:54 1 2 18 0 11 23 0 97.20 68 81.83 NEW MAYGLPRKNTVKTILRGSCYKVQEPWDLALLTKTWYTNLANIKLPFLEEIoFGSPVpLpKspTpK-spLPSAESIKLEREYEsKRLsKLKCQENVuKEIQhSLRERPVGLRRPLPPK MAYGlPRKNTVKTILRGSCYNVQEPWDLALLsKTWYoNLANI+LPFLtEIsFGuslpLpKspThK-uLLPSAESIKLEREYEhKRLscLKsQENsucEIQh.LRcR.sGLRRPL.sK.............. 0 1 1 1 +14874 PF15023 DUF4523 Protein of unknown function (DUF4523) Bateman A agb Jackhmmer:Q7Z4U5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. 27.00 27.00 29.10 36.10 26.70 24.70 hmmbuild -o /dev/null HMM SEED 166 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -11.13 0.71 -4.73 4 34 2012-06-26 11:58:47 2012-06-26 12:58:47 1 1 22 0 14 28 0 139.70 58 77.70 NEW NLHKLLPN+LMElLaSa+SEEDK+KCENsEFSGLERILtRHQhPKEINLTPKPSpMPLW+RKh.NNhspGWKKC+LWsKsTKEPPMSTIVVRWLKKNMQPoEDLcSVhpRLSsFGPIpSVTlCGRQSAVVVF+DhsSACpAVoAFQSRsPGoMFpCuWQQRFMSK- ......................................NLHcLLPN+LhEhLaSh+SEEDKcKCE.NPEhSGLERILARHQLPKEINLTPKPs+MPsWKRKIINNlo-GWKKCHLhc+NhKEPPMSTIVVRhL.pKNh..pcsL..+sl.p+LpthsshtpsT.sG+ppshV................................................... 0 2 2 3 +14875 PF15024 Glyco_transf_18 Glycosyltransferase family 18 Eberhardt RY re3 Jackhmmer:Q09328 Family Enzymes belonging to glycosyltransferase family 18 (alpha-1,6-mannosylglycoprotein 6-beta-N-acetylglucosaminyltransferase) contribute to the creation of branches in complex-type N-glycans. This domain is responsible for the catalytic activity of the enzyme [1]. 26.00 26.00 26.60 26.20 25.20 24.30 hmmbuild -o /dev/null HMM SEED 559 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.97 0.70 -12.84 0.70 -6.17 11 229 2012-06-26 12:02:26 2012-06-26 13:02:26 1 8 75 0 150 195 2 376.20 35 73.28 NEW CYA.-aGVD.GS.CSFllYLSEVEsaCP.h.tRt+ps.....s.......pp..sslRcslt.L...h...pcsphpah+pRIpRhWspWlpAucp.Lpc.ppshppR++h+lLValGhLusEsuh+huppuhpGGPLGELVQWSDLluoLplLGHpLclSsspspL+ulls.hhstspssssssscpphDLIaTDIhGLs.h+pphs.hhh.pa+C+lRlLDSFGTEs-FNhpsYspppsh......ppp..WGuasLphpQahTMFPHosDNoFLGFVV-pcs...........ps.++pspul.....VYGKctYMWc..spcchlcllp+ahclHATVss.tp.....pslPoh...VpNHGlLsupElppLL+csKlFlGLGFPYEGPAPLEAlApGsVFLNsKFcPP+SRhNpcFFc-KPTlRclTSQHPYsEtaIGcPHVhTVDIsNpc-lEtAlKcIlphK.lcPalPaEFTspGMLpRVsshlpKQsFCsp.s...................pWPPlsAL+lhtuspup.SCcpsCpspsLlCEPoaFPhINsppthp+...phsCsuscspss..lAPuh.....sCslQussLLFSCAussP...phpRlCPC .............................................................................................................................................................................hhh.Rh.th.........Wh..u....ht............................thplhhh.t.h..t......h...s.pGGPLGEhlQWsDl.ssL.hlGHtl.hs.p..ph.........................hhh.Dh.G...hpt.....h.......pC.hRllDoFGTcstash..h............tt...aG.hshp..Qa..ThaPHo.s.D..N.oFhGFl.pp..................p.ht..ullYGK.t..h.a..p.......sp......p......h.....lthl....pp.h..h.plH...uTV.....t..........hPsh....lpNHGhlst.-h..LLpcs.+lhlGhGFPhE.GPAPlEAlu.Gs..hFlps...chp...s.csp.s.thh..................tK..Ps.R.c..h.SQpP.Yhp.hlG..PaVhslshps.tthctslpthht..p..h.................p....PahP....h.....EaostGhLpRl.thhlpp.p.....h...............................................aP.Ph.ps.lph.hhu.....up.uC.psC..ppthhC-PshF.hlNppp.h.p...hth...Cps.......p.............................p....h.............hP.........t...........ttChh.p...tp..l..aSCsutt.......phpRlCPC................................ 0 54 67 108 +14876 PF15025 DUF4524 Domain of unknown function (DUF4524) Coggill P pcc Jackhmmer:Q96MH7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 197 and 638 amino acids in length.This is the N-terminal domain of the member proteins. The human gene is from C5orf34. 19.50 19.50 19.70 19.50 19.20 18.20 hmmbuild -o /dev/null HMM SEED 148 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.81 0.71 -4.73 7 59 2012-06-26 12:02:50 2012-06-26 13:02:50 1 3 49 0 40 58 1 132.60 45 25.25 NEW phllhsD-pVpspasDGspLpLSPCGStFlhEptss.stHPLptscpl+QRT+FslSpa+pplhpAL-FRNpasspPaLsppllpsE....Rptphhscloclc.WPssssss....hpstpsGpVploSlDGhApLhLsppQcEFTVcFlC+lup ......................................Mlla-D-SVpVpYhDG...opLpLSPC....GoEFlhc+sss.u............sHPl.ptscRlRQRTcFslSsa+ppl.pAL-FRNp.us......p.P.a.LspplIss.-...............+Kp.....p.lhhc.....hoEsc.WPs.ssss...........thhh...hpsGpVcIsSlDG+AhLsL.spsQcEFTVcFLCKlSp................ 1 8 13 23 +14877 PF15026 FAM74 FAM74 protein Bateman A agb Jackhmmer:Q5TZK3 Family This family of uncharacterised proteins are found in humans and are known as FAM74 proteins. Members of this family contain several short protein repeats. 27.00 27.00 31.40 48.50 25.10 22.40 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.11 0.71 -4.50 2 11 2012-06-26 12:07:12 2012-06-26 13:07:12 1 2 3 0 8 9 0 84.70 64 100.32 NEW MWRELRGCPGGDVETAQRLSQRRRGKSSEAVPEKTWRAQRMSQRRRGESSEAVPEKTWKELRNSETVPEKTWKQLRtCLQEDVpRVQRLShhhHhts.hhlhhth.hp.pGs+..ssTaL...hhht ........cLRsCPGtDhETAQRLSpRRRGcSSEAVPEKTWRsQRhSQpR..cSSEsVsEKTW+p...SEsVsEKoW+pL+tC.pEDVpRVQRLS.hhHhts..hlhhth.hp.pGs+..ssTaL...hh.... 0 8 8 8 +14878 PF15027 DUF4525 Domain of unknown function (DUF4525) Eberhardt RY re3 Jackhmmer:Q09328 Family This domain is found in eukaryotes. It is often found at the N-terminus of glycosyltransferase family 18 enzymes (Pfam:PF15024). It is also found in coiled-coil domain-containing protein 126. 27.00 27.00 63.10 62.30 23.90 22.80 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.68 0.71 -4.64 7 96 2012-06-26 12:10:43 2012-06-26 13:10:43 1 3 41 0 56 75 0 130.60 64 31.53 NEW shshshK..SQKLuhhLlsFGhIWGhMLLHaThQp.sp+pSSspLRpQILDLSKRYlKALAEEN+slMDGs.uuoMsuY.DLK+TlAVLLDsILQRLsKLEsKVD.llsNGousNhTNuTusshsslsssc+sssuuslp ...............hhhhshK..SQKLuhhLlsFGhIWGhMLLHaTlQQ.spppSSuhLREQILDLSKRYlKALAEEN+Nl.VDG....s.uusMsuY..DLK+TlAVLLDsILQRlsKLEu...KVD.lVlNGousNoTNuTo.slss.lss.c+lNsus.I........................ 0 5 8 21 +14879 PF15028 PTCRA Pre-T-cell antigen receptor Bateman A agb Jackhmmer:Q6ISU1 Domain The pre-T-cell antigen receptor (pre-TCR), expressed by immature thymocytes, has a pivotal role in early T-cell development, including TCR beta-selection, survival and proliferation of CD4(-)CD8(-) double-negative thymocytes, and subsequent alpha/beta T-cell lineage differentiation [1]. This protein contains an immunoglobulin domain [1]. 27.00 27.00 31.40 67.40 26.90 26.70 hmmbuild -o /dev/null HMM SEED 154 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.07 0.71 -4.65 3 29 2012-10-03 02:52:13 2012-06-26 13:13:27 1 1 19 3 13 51 0 146.70 75 63.73 NEW GVGGTPFPSLAPPITLLVDGKQQTLVVCLVLDVAPPGLDSsIWFSAGNGSALDAFTYGPSPAsDGTWTSLAQLSLPSEELAAWEPLVCHTGPGAGGhSRSTQPLQLSG.EASTARTCPQEPLR...............GTpGQsLRLuVLRLLLFKLLLLDVLLTCSRLC .........GVGuTPFPSLAPPITLLVDGKQQhLVVCLVLDVAPPGh-SPIWFSAGNGSuLDAFTYGPSPAsDGTWTuLAQLSLPSEELAuWEPLVCHTGPGAtG+SRSTQPLQLSG..EASoARTC.hEPLR.................Gh.utsLhLGsLRLLLFKLLLhDlLLTCS+L........ 0 1 1 1 +14880 PF15029 DUF4526 Protein of unknown function (DUF4526) Bateman A agb Jackhmmer:Q8WUU8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals and includes the human integral membrane protein TMEM174 protein. 27.00 27.00 175.40 175.30 21.50 21.00 hmmbuild -o /dev/null HMM SEED 232 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.81 0.70 -11.83 0.70 -4.78 5 34 2012-06-26 12:24:07 2012-06-26 13:24:07 1 1 32 0 23 31 0 233.20 66 96.34 NEW -DFulNVFSVoPYPu.SpSDupVSDGDKAGATLLFSGIFLGLVGITFTVMGWIKscGpoHFEWTQLLGPILLSVGVTFlLISVCKF+ML.SCKPCKpoEEss...LDhEQoSuGQSFVFTGINQPITFHGATVVQYIPPPYuopDssGGso....PllSNsNulaSuau..sssusGPPQYYsIYPMDNPAFVpD-ssPupluts-....lcsSpPDAuhp-E-tLGsscosEhSPPuYEEIFPTsR .EDFPlNVFSVTPYTP.STADIQVSDDDKAGATLLFSGIFLGLVGITFTVMGWIKYQGVSHFEWTQLLGPILLSVGVTFILIAVCKFKML.SCQhCKESEERl....Do.EQTsGGQSFVFTGINQPITFHGATVVQYIPPPYuSpEPhGhsosYLQsslsPCGLlsSGGA..sushPSPPQYYTIYP.DNuAFVsDE.shPuas.suus.....cRsssDAcpLE.EspL.t--.ssssFSPPPYEEIYuls.p.. 0 1 2 7 +14881 PF15030 DUF4527 Protein of unknown function (DUF4527) Bateman A agb Jackhmmer:Q6ZRC1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. 27.00 27.00 33.40 33.40 26.90 23.80 hmmbuild -o /dev/null HMM SEED 277 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.79 0.70 -5.09 5 28 2012-06-26 12:29:08 2012-06-26 13:29:08 1 2 22 0 14 26 0 239.40 52 67.78 NEW QLRDsEA-soEEDLRLRVQQL+HQVLTLQCQLRDQsuAHRtLQAuh-EATsLQDcLQuKL-ELQKKQHEANLAVoPLKAKLASLVQKCRERN+LITHLLQELHRHGluNHLLSEhAQuMVsDVALAEYAATFLAPGlPETSHH..LDVESEcTAsc+A.QKYLLNPEtDSV..LQssLpSESWPlPEAEWPAQTApLDS.hKLPLPSGsTPsPGTC.AuVAVEPuLPAQsL+EcGGsSCPl.LpADsLPPPsELLSPARILAFHQELRQSICSNSQVNKSPLE ................................................pDsEApssEE-.RLcsppLHHpVLTLQCQLRDQuuAH...QAuh-E.As+LpccLpscL-ELQKKQHEApLAVTPLKAKl.ASLVpKCpERN+LITHLLQEL+RHGhsNhLLSElAQsMlsDVALAEYuATFLuPGlPETSHH..LDVcSEhTAshRA.QphLLNPchDSV..lQpshpSEShPlPcsEWPApsApL-S.lK.LPLs.ssT.DPGsC.AusssEsGLsAQpLQEcGGhsCPs..LpssslsssSELLSPARILAFHpELRQSICSNSQVpKSPLE................................................... 0 2 2 2 +14882 PF15031 DUF4528 Domain of unknown function (DUF4528) Eberhardt RY re3 Jackhmmer:A6NNL5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 95 and 154 amino acids in length. This family includes Human C15orf61. 27.00 27.00 38.50 38.50 19.70 19.60 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -10.83 0.71 -4.38 13 65 2012-06-26 12:31:18 2012-06-26 13:31:18 1 1 59 0 42 60 0 119.40 64 85.25 NEW KPtASEVLTuaL+QRscPsWTSYFV+YpcVtNDQaGhSHFNWsV.sGsNYHILRTGCaPaIKYHCT+RPhQDLohEDpFFphlKllNLGlPsLhYGlAAhhLI+HpEhVcsuc..GsVsIYFLhcED+G ...............+PpASEVLTpaLhQRphP.WTSahV..YusVpNDQFGhSHFNWsV..pGuNYHlLRTGCFPFIKYHCSKtPhQDLuhpD+FFpsLKVlNLGIPsLhYGLuuhhhh+hTEhV+TSh..GPVTlYFL.KEDcG...... 0 10 14 29 +14883 PF15032 DUF4529 Protein of unknown function (DUF4529) Bateman A agb Jackhmmer:Q6P387 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. The proteins contain a conserved VLPPLK sequence motif. 27.00 27.00 43.80 43.00 19.70 19.40 hmmbuild -o /dev/null HMM SEED 400 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.48 0.70 -5.87 5 37 2012-06-26 12:32:13 2012-06-26 13:32:13 1 1 27 0 20 36 0 334.60 49 98.13 NEW MDL...CpKsET-LENuENsEI-SoEETELTYTCPDERSEKNHVCCLLslSDITLEQDcKApEFlIuTGWEEAVcGWGRTSPTACIWPRKKsKKARVGEuAs..SsCLLCssLSpGSLEuRP.opuuK......AEsGPEKDpGSsSQTpusPQGPosASRE.INKICFPTYhpGEKKSLQIKEFIWChEDWAoPETlRGKssRsPSpGscphLSISDuLTSRALLVLPPLKuSssNuLDVLGKKSKNhFLQPEEKVLuVEKDECVAhsYGLKoVDGKGEKpssELA+Hs+VsDhhPFPPsVApTsLL.A-sEpCCLHWSLLPEKNLlCPPsPoNl+YLATlQLLQKQGsQNYKA+hKA+EPRPPhpTpK+lLTEAKQENRPQMLETKVFPRPLLPSLTVSRVVIPVSTHRlL ....MDl...CQKsET-LE.tcssEIppsEETp..ohTCPDt+SE+sHVhCLLslSDlTLEpDp+A..pEFhIGTGWEEA....VpGWG+sSPsACIW.P.RKpsKKu+sGEus...SsCLlChsLsp.....ho...h..-s+P.hps...............sspcspuossphpusstu.tsuSRt..hsplshPs.hpuEKKSLQlKEFIWC.ccWshPpo.+s.Ks.ts...sstsspp.t.ohsssLoS+ALLVLPPLKuS..NsLDlluKKo+s.hhpsEEKshsVcK-tshuts.GhKos-GKGEKR..E.....LApH.hVsDh.s.PsssApTsLL.scsEpCCL+WSLLsEKshhsPPs...s...ssl+YLAsLQLLQKpGhQsaKs+h+A+-PRsPh.opK+hh.cAKQEsRPphLEoKVFs+PLLPSLTVsRVlIPh.sHRhL........................................................................................................................... 0 1 2 5 +14884 PF15033 Kinocilin Kinocilin protein Bateman A agb Jackhmmer:A6PVL3 Family This family of kinocilin proteins is found in vertebrate. In mouse it has been shown that this protein is expressed primarily in the kinocilium of sensory cells in the inner ear [1]. 27.00 27.00 34.50 34.30 22.90 19.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.60 0.71 -4.60 6 31 2012-06-26 12:53:32 2012-06-26 13:53:32 1 2 22 0 16 38 0 105.50 75 99.54 NEW MDIPISoRDFRCLQLACVALGLVAGSIIIGVSVSKAAAAVGGIFlGAAGLGLLlhA.YPhLpu+FphshlhPsI.........GsLRIHPpsGPDHGEGRSSsNuNKEGARSuLSTVoRTLEKLKPGGRGTEEG ...MDIPIS.oRDFRCLQLACVALGLVAGSIIIGlSVSKAAAAhGGlFIGAAuLGlLlhA.YPFLKARFNLDHILPsl..........GsLRIHPpsGPDH.GEGRSSsNGNKEGARS....SLSTVoRTLEKLKPGs......RGsEEs.................... 0 1 2 4 +14885 PF15034 KRTAP7 KRTAP type 7 family Bateman A agb Jackhmmer:Q8IUC3 Family This family of keratin associated proteins are found in vertebrate. 27.00 27.00 123.30 123.20 21.10 17.40 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.60 0.72 -10.17 0.72 -3.34 5 24 2012-06-26 13:00:44 2012-06-26 14:00:44 1 1 19 0 12 27 0 85.90 84 100.00 NEW MTRFFCCGSYFPGYPCYGTNFHGTFRATPLNCVVPLGSPLNaGCGCNGYSSLGYuFGG.SNFsNhGCCYGGSFYRPWGSGSGFGYSTY MTRFFCCGSYFPGYPsYGTNFHRTFRATPLNCVVPLGSPLNaGaGCNGYSSLGYuFGG.SNFsNLGCsYGGSFYRPWGSGSGFGYSTY... 0 1 1 2 +14886 PF15035 Rootletin Ciliary rootlet component, centrosome cohesion Coggill P pcc Jackhmmer:Q86T23 Family \N 25.10 25.10 25.40 25.30 25.00 25.00 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.69 0.71 -11.00 0.71 -4.30 29 212 2012-06-26 13:02:07 2012-06-26 14:02:07 1 5 88 0 120 189 1 174.70 35 10.71 NEW ++Rl-AspEpQccpAsLhutLQpK.....ltpY..Rp+hu-lEppl....................ssp..........+.sccphshslcc................................hls+LEEEcpRs-p.......LR.ph-ptphpNppLps-lp+Lpppapt...plppKEchappcEcshspYlssEpp+.hhsLWp-lppVRRQhuEh+spTERDLpp.+sEas+ss+sl ...............................................++Lps.pEuQpcQApLVp+LQuK.....lhQY..+..p..hCt-LEppl............................................................................ttp...t....p..hpstpc...sL-p.......................................................................................................hLhRLEEEpQ..RspsLspVN..s.LRtph-puphsNcuLpcDlpKLTs-asphpcELtp+E.spap..pEpct.....ap.sYhp....sEHsR.LLtLWRpVVshRRph..tEh+ssT-.RDLtch+sEhsRhutp............. 1 24 33 72 +14887 PF15036 IL34 Interleukin 34 Bateman A agb Jackhmmer:Q6ZMJ4 Domain \N 27.00 27.00 161.00 160.70 18.60 18.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -11.31 0.71 -4.70 5 39 2012-10-02 01:28:15 2012-06-26 14:07:47 1 1 27 13 17 42 0 160.20 69 72.21 NEW GLElWslAus-ECulTGaLRDKLQY+NRLQYMKHYFPINYRVuVPYEGVLRsANITRL.....p+ApVSpRELRYLWVaVSLsATEpVpEVLLEGHPS.WKYLpEVcTLLccVcpuLu....pDVEls.P+VEulLoL...LopAsG.Sl.KLVRPKALLDNCaRVMchLassCC+pSSl .........LEhWPLTQs-ECslTGaLRDKLQYRNRLQYM........KHYFPINY+ISVPYEGVhRlANlTRL.....QRAcVSERELRYLWVLVSLSATESVQ-VLLEGHPS.WKYLpEVpTLLLsVQpuLh.....DVEVu.P+VEuVLSL...Ls.APG.sL.KLVRPKALLDNCFRVMELLYCSCCKQSS....... 0 1 2 4 +14888 PF15037 IL17_R_N Interleukin-17 receptor extracellular region Eberhardt RY re3 Jackhmmer:Q6ZVW7 Family This domain is found at the N-terminus (extracellular region) of interleukin-17 receptor C and Interleukin-17 receptor E. This is the presumed ligand-binding domain [1]. Human putative interleukin-17 receptor E-like consists only of this domain. 27.00 27.00 28.40 28.00 24.50 23.80 hmmbuild -o /dev/null HMM SEED 406 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.49 0.70 -12.55 0.70 -5.48 7 139 2012-06-26 13:28:15 2012-06-26 14:28:15 1 5 37 0 62 140 0 299.40 33 61.25 NEW sluC.h....pT+C.Ls.Rhssphssputh...c...t.....cs.h..................pGlpsuhhphhV.tShpua.hthCthachsssAphplhup...Sptut+hsshpsslup+shhop+TQPs.scsh.......SQ+ptsP.................cauasl.s-scslpVTls...susEhslRLCapWslpCp-hupPhss.K.loGs+sVsLsYphLLPCLCIEu..Lp.DoVRpchCPFps.PcAahp-hWcuh+asshSt...hVhtLshhCPLKlcAoLC.Rps.to.CcsLssAhupEu...-thhlhptVDhHPpLChKFShtsuoalcCPapss..ohssWsssMshpspQhhLchSShstssFSuuhshPt.u.st.h.ss..hSpspsps.Pssh-LllshL+sGsCl.VWRsDV.FuhhtL.Cs .............................................................................................................................pt...p.s.hph.s.........thChhhch.ssut.hh..up.....s.tshhhsshpsslupcshhhphTpPphtcth........s.ppthP.......................a.hsl.s-ucslpls...ls...su.-.phtLsh.W...psps.spPhhh.+.lousptlsLsap.LlPCLCIp.s.h...h.p.....DslR..pphCP..Fpp.....scAa........ps.............hWcsschp.ho.........shthps.Csl.s.csuLC.pts...ts...CpsLsssht..ps...pthh.h.hlchHPpLCh.phs..p..p.p..lppsh.ts....shs..shpssh.h.......p.tohh.ht.Ssshs....................o...s.s..h.hp..lh...t.t.C..h................................................. 0 3 6 22 +14889 PF15038 Jiraiya Jiraiya Eberhardt RY re3 Jackhmmer:A6NGB7 Family Jiraiya inhibits bone morphogenetic protein (BMP) signaling during embryogenesis [1]. The human member of this family is TMEM221. 27.00 27.00 33.30 28.20 22.70 22.60 hmmbuild -o /dev/null HMM SEED 176 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.09 0.71 -4.85 11 78 2012-06-26 14:47:06 2012-06-26 15:47:06 1 3 63 0 58 82 0 135.70 31 46.85 NEW LsuLuLLsllSllhAlLSLIhLL+lpssstpstt.hhtstt.th.ss.....-thhhlY-lohALsALuLoLNLsCLLVCAlQhhFAs+llRus.sssuRs.....spaLtcSpssRpsAluuFFluIsVaLoulhLYohlpFcspPuIsoShllGsGllF.......CsuuMlHslalWp+tcspuh+phshs ......................hhhhsl.uhlhuhluh.hlhphps..ht.....................t.h.hlh.lshsLsuLsLsLslsslllsslphhhsschh+s......tRs..................shaL.cs.ph.RhsAluhF.hhulsVaLsul.lYs.hl...Fc.psuhssu.llGsGhlh.......h.sssh.sHslhhht+tpp.uhpt...s................. 0 14 20 42 +14890 PF15039 DUF4530 Domain of unknown function (DUF4530) Eberhardt RY re3 Jackhmmer:A6NGS2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically around 140 amino acids in length. The human member of this family is C19orf69. 27.00 27.00 77.20 130.50 23.20 21.10 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.59 0.72 -4.16 4 23 2012-06-26 14:53:34 2012-06-26 15:53:34 1 1 18 0 9 24 0 111.20 69 85.98 NEW MELW+QL+QAGLVPPGLGPPP+ALRtVPPVtpsGQTLhSsGADTGGAREpLLWIWEELGNLRRVDVQLLGQLCSLGLEMGsLREELVTlLEEEEEppppEEcp......tPEcKQE ..MELW+QL+QAGLlPPGLGPPPpALRtlPPs-pPGQTLhouGADsGGARESLLWIWEELGNLRRVDVQLLGQLCSLGLEMGALREELVTILEEEEEsscEE.EEs.....ppsppKQ-............. 0 1 1 1 +14891 PF15040 Humanin Humanin family Bateman A agb Jackhmmer:P0CJ71 Family This family of proteins is found exclusively in humans. Humanin is a short anti-apoptotic peptide that interacts with Bax [4]. 27.00 27.00 43.70 43.50 24.20 17.70 hmmbuild -o /dev/null HMM SEED 24 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.62 0.72 -6.71 0.72 -4.70 9 21 2012-06-26 14:56:28 2012-06-26 15:56:28 1 1 4 2 11 26 0 24.00 75 97.68 NEW MAsRGFSCLLLSTSEIDLsVKRRh MAsRGFSCLLLssSEID.LssKRRh. 0 11 11 11 +14892 PF15041 DUF4531 Domain of unknown function (DUF4531) Bateman A agb Jackhmmer:A6NCJ1 Family This family of uncharacterised proteins is found in mammals. This family includes the human protein C19orf71. 27.00 27.00 85.80 85.80 18.10 17.50 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.23 0.71 -4.52 3 25 2012-06-26 14:58:44 2012-06-26 15:58:44 1 1 20 0 15 25 0 164.20 65 88.51 NEW PcsLYSDDYLSLEGPRWsPAIRQAVRWKYTPMGRDAAGQLWYTGLTNSEoREAWYsLPRALDSPYREAYsRWHGCaQ+RERoMPSAYTQHLRETAWaDPlIPAQYpsPSTRWGSTLWKDRPIRGKEYVVNRNRYGVEsPWRuSDYVPhLSAPQRPRaTTQsYRQWGLEPYCPSTsQRPPPuaTP ....PsPLYSDDYLSLEGPRWsPAI+QATRWKYTPMGRDAAGQLWYTGLTNS-spEAWYsLPRA.sSPaREAYsRWHGCap+RE+ohPSAYTQ+LRETAWaDPllPA.QYhsPSTRWGShLWKDRPIRGKEaVlNRpRaGlEs.WpuSDYVP.LSsPQRP.hTsQsYRpWsLEPYCPSTsQts.P..TP..................................... 0 1 1 3 +14893 PF15042 LELP1 Late cornified envelope-like proline-rich protein 1 Bateman A agb Jackhmmer:Q5T871 Family This family of uncharacterised proteins is found in mammals. 27.00 27.00 60.80 30.30 26.70 25.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -11.23 0.72 -11.72 0.72 -3.87 3 23 2012-06-26 15:02:28 2012-06-26 16:02:28 1 2 18 0 11 14 0 85.90 71 91.06 NEW MSSDDKsKSuDPKNEPKNCDP+CEQKCETKCQPSCLKKLLQRCSEKCsh-KCPsPPKCsPCPPCPP.........SsPssPhCPP.CsPPCPuP...CPPSCPPKPCVKPCPPKCPS....PCPPPE .MSSDDKsKSs-PK....sEPKNCDP+CEQKCEoKCQPSCLKKLLQRCSEKCPREKCPsPPKCPPCP.s..ss.........................Cs..Cs.....CP..CP........................ 0 1 1 1 +14894 PF15043 CNRIP1 CB1 cannabinoid receptor-interacting protein 1 Eberhardt RY re3 Jackhmmer:Q96F85 Family This family of proteins interacts with cannabinoid receptor 1 (CNR1) and attenuates CNR1-mediated tonic inhibition of voltage-gated calcium channels [1]. 27.00 27.00 28.40 30.10 25.70 17.20 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.93 0.71 -4.96 11 73 2012-06-26 15:23:00 2012-06-26 16:23:00 1 1 58 0 49 64 0 150.20 52 97.72 NEW Mus....hstlh+lolSL+hpPssuPVFaKVD.GpRFuQs.RTIKLLTsSpY+l-VshKPush..pshulGGsslsLEp.po+..DtppsVYou.asT-GlssoKSG-RQPlplslpFsshGthcss.......hQsKaYshpcp-H....CpWGsshpsIEaEC+ssEsRolhhl.........pKEsF+ ................Mu-lPsll+loluL+hpPN-GPVFaKVD.GpRFGQ...........s.RTIKLLTGSpYKlEVplKPsTlp..sp..sluIGGlhlPLE.+o+...DspplVYoGhYDTEGVssTKSG-RQPlQlsh.FschGsFETs.......WQVKaYNYcKR-H.......CQWGssFssIEYECKPNETRoLMWlNKEoF.h............. 0 10 14 33 +14895 PF15044 CLU_N Mitochondrial function, CLU-N-term Coggill P pcc Jackhmmer:Q96NS8 Family CLU_N is the N-terminal domain of the Clueless protein, also known as TIF31-like in other organisms. The function of this domain is not known. It family is found in association with Pfam:PF13236. 22.60 22.60 22.60 22.60 22.20 22.20 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.42 0.72 -3.82 51 327 2012-06-26 15:24:58 2012-06-26 16:24:58 1 45 226 0 243 326 1 76.20 35 5.87 NEW lss..p-plp-l+phLhpt..sposhhTsapL.p..h..c..G.tp......l.scas-lspltsl.......ps....ss....p..Lpll..c-..sYsp.+pAchHlpRlR-ll ................................................ssp-pVp-l+QhLh-p.ssTsahTsFsLp.h..c..Gpp........L.ccasElpslpsl...................pt........ss....pLplV...........E-PY.Tt.+-ARhHVp+lR-Ll......... 0 60 122 197 +14896 PF15045 Clathrin_bdg Clathrin-binding box of Aftiphilin, vesicle trafficking Coggill P pcc Jackhmmer:Q96F83 Family Aftiphilin forms a stable complex with p200 and gamma-synergin. This family contains a clathrin box, with two identified clathrin-binding motifs. This family of proteins is found in eukaryotes. 22.50 22.50 22.50 25.00 22.10 20.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.01 0.72 -9.52 0.72 -4.47 36 120 2012-06-26 15:31:09 2012-06-26 16:31:09 1 2 65 0 69 134 0 73.40 46 10.68 NEW EhhcVWspLQDlcsuauLRYQWuuSHSNKpLLsSLGIDoRNILFoGsKp.......pslhVPhaAAGLGM..LEPTKtslc.P ...t..hclWppLQDIcsApuL+YQWuuS+ss+pLLsSLGIDoRNIhhoGsKt.......pslhhPhaAAuLGh..LEPsKt.lp..s...................... 0 12 17 38 +14897 PF15046 DUF4532 Protein of unknown function (DUF4532) Bateman A agb Jackhmmer:A6NCN8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 52.20 37.60 22.60 19.00 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -11.77 0.70 -5.41 5 36 2012-06-26 16:18:15 2012-06-26 17:18:15 1 1 29 0 24 26 0 228.90 52 89.88 NEW MVHApEoLPTsQTWAQREFLLPuEopEaPGFTpQAYHQLALKhPPCT-hKuKVRQRLhpPWKDAAQHTWGFHTWLDVGRLPATFPTRPDRPYDSNVWRWLTDScAHRpPPAEPPIPPPSWMGQNSFLTFIsCTPIFVDhNRKKQVIlRTVKELKEVEKLKLRSEARAPPLDAHGNILPPKNFKKYRHISAGGRaEPQGLQLMPNPLPNNFARSWPCPNPLPHYQEKlLKLALLPSAPLSQDLVRsYQTLIEsRVALPLHHLS+ApPGKTssRKhKRRPG .................MhpApE...s.pTWsQR.EFhLPscohchPGFT.QuYHpLALK.PPsT-hKScVRpRLhpPWKsu..s...p..H.TWGFHTWLDVGRLPATFPoRPD+PYDSNVWRWLTcop....AH+pP..........P...........uc...P..I..PPPSWMGpNSFLoFIpstPlFl.DhpRKpQVIhRThKEL+E..lEKLKLRSEuRAPPLDupGNIlPPtsFKK...ahss.................................................................................................................................................................. 0 5 6 9 +14898 PF15047 DUF4533 Protein of unknown function (DUF4533) Bateman A agb Jackhmmer:A2RU48 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. This family includes two human proteins: C12orf60 and C12orf69. 27.00 27.00 52.20 52.10 25.70 25.50 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.43 0.70 -11.35 0.70 -4.93 4 54 2012-06-26 16:22:02 2012-06-26 17:22:02 1 1 28 0 35 55 0 208.40 41 92.27 NEW c+pcLlpht+phF.pMpD.hphTNpLhElhNuphsspItpIpMKEDusVK-sh-.Ilphh+ElQstlpthpspMpcc........h.sohhcK.pslKEhpppthtlh+pVhosllspshsoush..lKhLpSSLohlhhs.lhsL............tp.uuol.tuhSsus.tlGhs.IL+tl.sAlcpsplpssl+Sht-pLtphhcs.c.hhchlpcshsTlc .c+pcl.phtpphh.plpD.hssTNpLs-lhNtphssplh.lphKc-uoIK-sh-.l...lpshpchQptlpphc-slpcc............h.solhcKhpslKEhcppthslhppVhosllspAhsuush..lchltSslophhh..lhslh..............tp.u....so.htShus.us.thshc..hl+tl.sAlcppphpssI+ShtccL.phhpA.c.hhcslpcshpTlc....... 0 2 2 8 +14899 PF15048 OSTbeta Organic solute transporter subunit beta protein Bateman A agb Jackhmmer:Q86UW2 Family \N 27.00 27.00 39.00 37.80 26.70 22.90 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.64 0.71 -4.05 9 37 2012-06-26 16:25:08 2012-06-26 17:25:08 1 3 31 0 24 34 0 122.30 45 82.98 NEW -HSpssptAsssshVPQEL.......................LEEMLWaFRsEDAoPWNYSILsLusVVslISFlLLuRSIpANRNpKhp....ss-KppPEs.pLs-u.h+-sssLshLRET..LLSEKP...sLAQ.............sEhElcp+Dssh.shLsDP .........................pt.pss.tsssussVPQEL.......................LEEMLWFFRsEDAoPWNaSILALusVVlhlShhLLuRuIpANRpcKhp........s.-KpsPEshphsEu.....t.h......+.-csuLss....LcET....LLSEKP...sLsp.............sphELK-+Ds...shhs-.......... 0 1 1 6 +14900 PF15049 DUF4534 Protein of unknown function (DUF4534) Bateman A agb Jackhmmer:Q8N7C4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in mammals. Proteins in this family are typically between 170 and 190 amino acids in length. The protein includes the human integral membrane TMEM217 protein. 27.00 27.00 73.40 73.10 19.20 19.10 hmmbuild -o /dev/null HMM SEED 163 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -10.93 0.71 -4.61 10 43 2012-06-26 16:29:12 2012-06-26 17:29:12 1 1 23 0 28 31 0 158.00 42 85.33 NEW u+MhSlLsGlFSllsTp.aLIFEhpalsphshp-phshYpsTpshltsallsaphsIshsLShlTIllSCFLLYslasphYhGLLhYslWIlhYEhlNhsl.lLTNs..cpphKElphL+Whh.lSRhlLHhFhL.FVlpHAYhlYKspppssllua+RRhSs .A+hGolLuGlFoIhsTh.aLIFE.palsphssp.-p.hshhpsspshlssallsaphsIslhLShlTIllSCFLLYSVasplYpGLllYslWIlhYEhhNhsl.lLTNs..cpshcEl+hh+WhGhlSRhhhHhFhl.FVlpaAahlYKspppssllua+RR.Sh............ 0 1 2 2 +14901 PF15050 SCIMP SCIMP protein Bateman A agb Jackhmmer:Q6UWF3 Family This family contains the SCIMP proteins which are a a transmembrane adaptor protein involved in major histocompatibility complex class II signaling [1]. 27.00 27.00 29.60 28.50 26.90 26.20 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.74 0.71 -4.26 4 30 2012-06-26 16:34:01 2012-06-26 17:34:01 1 3 24 0 16 30 0 121.40 61 67.48 NEW MsWWRsNFWlILAVAIIsVSsGLGLILYCVCRh.LRQG+KWElAKPLcpcpRDEEKMYENVhNpssVQLPPLPPRuh..PEcouPQEsPSQPsAsYS.VNKV+NKKsVuIPSYlEPEsDYDDVEIPAshEpp+ .....MsWWRsNFWIILAVA.IIlVSluLGLILYCVCRhpLRQGKKWEIAKPLKpcQtDEEKMYE...NVlNpSPVQLPPLPPRsh.S.Ecs...SPQEsPS.tP.PATYSLVNKl+NKKsVSlPSYlEPEsDYDDVEIPANhEpt............................ 0 1 1 2 +14902 PF15051 FAM198 FAM198 protein Eberhardt RY re3 Jackhmmer:Q9UFP1 Family This family of proteins is found in eukaryotes. The function of this family is unknown. Murine FAM198B is downregulated by FGFR signalling [1]. 25.00 25.00 27.50 26.70 24.10 23.30 hmmbuild -o /dev/null HMM SEED 326 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.43 0.70 -12.23 0.70 -5.74 8 108 2012-06-27 07:44:34 2012-06-27 08:44:34 1 3 42 0 61 111 0 277.00 52 64.22 NEW usIRlYutpuPPWFSc-DIpsMRhLADupVsuhs+.........lPu+uts.hllLc.......pussssuptCssp.CGlIKRPhDhoEVFAFHLDRVLGLNRoLPAVuR+Fco.......hssGpspPVlLWDPSlp...ssss-psShpLsWspYQphLKp+ChtsGpl.....PpsphsCosIHHpEWu+LALFDFLLQIa-RLDRsCCGF+PcspDsCVppGLHt+CcNpcclpLsHIlpR+pDP+HLVFIDNpGhFc+sEDNLNFRLLEGIcEFPEoAVSVLpS.u+LRp+LLQSLFlDQhYWESQGGRpGl-+LIDVIE+RAKlLLpYIptHsl+llsMN .....................................................................................................................utpsPsWhoccDlpth+LLAputVssht..........ls..u+utsh..lshp..............ts....s.s..hs.hCu.u.CGLlKpPhDhsEVhAFHLDRlLGLNRoLPuVuR+hc..............hpDGpspPlIh..Wss...slp...sss...spsp...sSltLsWhpYQpLL+p+Ch..sGps........sp.sCstIHHpEWu+hALFDFLLQlasRLDp.CCGFcP...c.pDsCVpptL+.KCcs.s.pltLsHIl.RppDPpHLVaIDNtGhhp+sEDpLNF+LLEGIcpFP-SAVpVLpS.tpLpphLLpSLhhDpVaWESQGGtpGlcpllcslEpRuplLlpaIptHshplh................. 0 8 13 29 +14903 PF15052 TMEM169 TMEM169 protein family Coggill P, Hetherington K kh6 Jackhmmer:Q96HH4 Family This domain is thought to be structured transmembrane helices and includes the intermediary cytoplasmic domain. It is found in eukaryotes, and is approximately 130 amino acids in length. 27.00 27.00 98.30 97.90 25.10 25.10 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.81 0.71 -4.25 9 66 2012-06-27 09:45:27 2012-06-27 10:45:27 1 1 59 0 48 53 0 133.10 58 41.06 NEW pt+psCphGlspG.HllLWSllClPlVFllSFlhSFYYGTlTWYNlFLhYNEERTFhHKIolCPhLIlhYPllIhssoluLGlYuAlsQlSWtaspWhpslpDhEKGFhGWlCuKLsL-DCSPYslVELhDs-s ....p..ppsCpsGscpGPHllLWoLlCLPlVFllSFlsSFYYGTlTWYNlFLVYNEERTFaHKIohCPhLILFYPlLIhshuhuLGLYuAVsQLSWuauuWWpuspDhEKGFhGWLCSKLGLEDCSPYolVELL-SDs... 0 9 14 28 +14904 PF15053 Njmu-R1 Mjmu-R1-like protein family Coggill P, Hetherington K kh6 Jackhmmer:Q9HAS0 Family This protein family is thought to have a role in spermatogenesis. This family of proteins is found in eukaryotes. In humans, it is found in chromosome 17 open reading frame 75 (C17orf75). Proteins in this family are typically between 217 and 399 amino acids in length. 27.00 27.00 50.10 33.60 20.50 19.70 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.26 0.70 -5.70 4 77 2012-06-27 09:53:00 2012-06-27 10:53:00 1 2 48 0 46 60 0 278.80 51 84.02 NEW asLYuYRuuRhoQptuDo-DGpsuGsss-oPSG-DFSLSLVDTNLPuEsEsELRSFIAKRLSKGAVFEGhGNVASVELpIPtY+VGCYYCLFQQEK.LPEsAshESEpNssEYVVCFLGGSEKGL-..LFRLELDKYhQuLKsshssEp+sLEsclpsYLsSWFEssVCPIQRVVhLFQEKLAFLLHAALSYTPVEVKpuDE+Tc+DINRFLulASLQGLlpEGTMTSLChAMTEEQH+SlIlDCSGsQPQhHNAGSNRFCEDWMpAFlNGAEuGNPFLFRQlLENFKLKAIQDhNNLKRFIRQAEMNHYALFKCYMFLKNCGSGDILLKIVKVEHtEMPEA+sVVsVLEEFM+Eu .......................................t.......t.t...stss........tth......t.ss.....stpDF.S...LoLhDosLPuEsEPELRoaIuKRLSK.GAlatGhGNVAoVELpIPt.t.VGCYhCLhpp..E+..sE.t.....ssh-u..p......t.....ss-YVlCF....LGGS..EKGLc.......hF+.L...ELDKYlQGL+ssh....s.sEp.p..sL-scl..+.sYLspW.aE-sVt.IpRVV.LhQcplsFL.LHAALSaT.VEVp..poD..-+TKpD....lpRFlpsASLQGL...p...ps...........T.h....s.SLCh..AhoE-p+psl..llDC.S..ss.Pphh..N.A.s..........S..N+FC-DWhpsa.Ls..us-tuNPFLhRQlLENFKL.KAIQDhNsLKRFIRQAEhsHYALF+CahFLpsCGsGDlLLp.hs+sEH..p..-hPEApsllsVLEEFhpE............................... 0 16 19 28 +14905 PF15054 DUF4535 Domain of unknown function (DUF4535) Coggill P, Hetherington K kh6 Jackhmmer:E0CX11 Family This family includes the uncharacterised protein C7orf73 that is found in eukaryotes. Members are generally less than 100 residues in length. Although the precise function of the domain is still unknown, members have a predicted N-terminal signal peptide sequence which suggests they are short secreted peptides. 28.30 28.30 28.30 29.00 25.40 27.80 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.32 0.72 -4.55 29 115 2012-06-27 09:58:10 2012-06-27 10:58:10 1 2 61 0 77 131 0 45.20 40 60.68 NEW +ushoFhhGsssGlYlAQNYpVPNl...pKLhpshh.hAKclE-pY+...KP .........shhsFhsGsssGlYlAQNYpVPNl...pKLhpph..htKcl--ph+K........... 0 16 35 54 +14906 PF15055 DUF4536 Domain of unknown function (DUF4536) Coggill P, Hetherington K kh6 Jackhmmer:Q96GE9 Domain This domain family is thought to be a transmembrane helix. It is found in eukaryotes, and is approximately 50 amino acids in length. In humans, it is located in the chromosomal position, C9orf123. 27.00 27.00 27.50 27.30 25.50 25.90 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.79 0.72 -8.28 0.72 -3.89 11 53 2012-06-27 10:01:04 2012-06-27 11:01:04 1 1 48 0 35 58 0 46.50 40 47.33 NEW -ChuCRllSGsGLIGuGsYVahpA++R.p.stphuhholsphssulG .sChuCRllSGhGLlGuGuYVahtA++..h.K.shs.u.tolsphshulu........... 0 5 11 20 +14907 PF15056 NRN1 Neuritin protein family Coggill P, Hetherington K kh6 Jackhmmer:Q9NPD7 Domain The domain family Neuritin1 (NRN1) is a GPI-anchored protein expressed in post-mitotic-differentiating neurons in the developing nervous system [1]. NRN1 is a glutamate and neurotrophin receptor target encoding a neuronal protein that functions extracellularly to modulate neurite outgrowth (OMIM:607409).\ \ This family of proteins is found in eukaryotes. Proteins in this family are typically between and 158 amino acids in length. 27.00 27.00 29.80 29.20 25.40 22.80 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.22 0.72 -4.03 10 92 2012-06-27 10:04:05 2012-06-27 11:04:05 1 2 40 0 51 83 0 84.20 50 38.75 NEW C-ulaKGFSDCLLpLGDsMuNYsQcl--cpslpoICoaWDDFHsCAsTALuDCQEsAu-lWEpLRpES+plsFQGSLFELCuuuouAssu .......CDslaKGFu-CLlcLGDuMus.sp..t.......p.sppplcTlCp...W-DFHuCssoALssC..EtAsslWEpL+pES+phshpGsLa-LCuusst...ss........... 0 2 9 25 +14908 PF15057 DUF4537 Domain of unknown function (DUF4537) Hetherington K kh6 Jackhmmer:Q9NQ32 Domain The function of this domain family is unknown. It is found in eukaryotes, and is typically between 119 and 141 amino acids in length. In humans, it is found in the chromosomal position C11orf16. 27.00 27.00 27.00 27.10 26.80 26.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.61 0.71 -4.34 24 111 2012-10-02 16:56:36 2012-06-27 11:18:52 1 15 44 0 74 124 2 127.80 31 19.59 NEW sppVlARps...pDGaYYhGsVppph...ssstalV-a...........spptptltppclIths.ssh.ppsLpsGDtVLAh......hsstp.p..........YsPGhVh.............sspscplsVpFasGp.....psp..lPpppshhl.sp.shacpssthl ...............sppVlARp-...p-Ga.Y.a.G...plKpss....spsphLVcF.....................tttpppsVstppll.hu.suh..s.sLpsGDhVlAh......hpspthp..............YsPuhVlhh.ptp..........ssp-c.hTVhhhNs+........pth..lPtsslhhl.s...shathusph................................... 0 38 40 47 +14909 PF15058 Speriolin_N Speriolin N terminus Eberhardt RY re3 Jackhmmer:Q76KD6 Family This family represents the N-terminus of the sperm centrosome protein speriolin [1-2]. 27.00 27.00 37.40 31.70 22.40 22.10 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.29 0.71 -4.41 7 60 2012-06-27 10:18:56 2012-06-27 11:18:56 1 4 26 0 35 66 0 166.50 43 42.05 NEW Moh.ssaEGhpHphcRLV................pENp-LKK.VRLl+ENpcLKpA................LuEusts.stRtspsVhhP.uPAsApEss...psGhhshuPhAsh.soPp......Sst.tslhs.hTuoLssLLsu.ushSpps..ss.h............................os.h........ssuPGt.hAoSLulPppu.Lossu.........................shsuplAVShuSPLLoSTss..thsQphLtsP....................lushsLsEsPR ............G.pc.hpRLh................pENt-LKK.VRLl+ENp.L+ph................Lu-usttsss+.....t....slhhP..uPAhs..........Ess...tsGl.shu.hssh.sosp.................TusLpsLLsu.uP.hSpps.....................................................sssPu...hAoShtlP.pu.LoPsp.................hsspsssph.SPLLSs...........Ltss....................hsshsL.E.PR................................................................................................................... 0 3 3 7 +14910 PF15059 Speriolin_C Speriolin C-terminus Eberhardt RY re3 Jackhmmer:Q76KD6 Family This family represents the C-terminus of the sperm centrosome protein speriolin [1-2]. 27.00 27.00 32.70 31.80 23.50 18.80 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.78 0.71 -4.18 4 66 2012-06-27 10:19:36 2012-06-27 11:19:36 1 4 32 0 39 68 0 137.60 60 35.82 NEW lVGEIAFQLDRRILuhlFPt.hsRLYGFTVS.NIPEKIhQsSlps.saplDEchspsLTpRYVolhs+LppLGYNtcVHPshoE.LlNtYGILRpRP-LtAS.spS..sshLp+lLl-sV.PphLsDuLLLLSCLspLS+DDuKPhFhW .........................lVGEIAFQLDRRILu.lFPt.hsRLYGFTVS.NIPEKIhQsSlps.DtplDEchhppLTQRYlolhsRLpp.LGYstcVHPu.hoE.LlNsYGIL+pRP..-LtAs.hto...sshLp+lll-sV.PphLsDuLLLLsCLspLu+-DuKPhFhW................................. 0 5 7 14 +14911 PF15060 PPDFL Differentiation and proliferation regulator Coggill P, Hetherington K kh6 Jackhmmer:Q8WWR9 Family Pancreatic progenitor cell differentiation and proliferation factor-like protein (PPDFL) is alternatively named Exocrine differentiation and proliferation factor-like protein. PPDFL regulates exocrine cell fate. This protein is highly expressed in exocrine progenitor cells which eventually differentiate to form exocrine pancreatic cells [1]. 27.00 27.00 29.20 29.00 24.60 23.50 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.98 0.72 -10.51 0.72 -4.06 7 87 2012-06-27 10:23:41 2012-06-27 11:23:41 1 2 40 0 40 71 0 95.70 50 94.60 NEW MAAIPSSGSLVATHDYYRRRLGSTSSNSSCGSuEYsGEsIPHpPGLPKuDsGHWWASFFF.GKSThPhMusVlESsEpp.tohpsSsu.lsCsLApcsh......ppQ.uupsuKossu ....................MAAIPSuGSLlATp-YYRR.R.lGSoSS.sSS...CuS.sE.asGEsIPHp....PGLPKsDsG+WW.uSFFF.uKps.Phhssl.Epspp......tssts.h.s..h...s.........p............s........................ 0 2 5 17 +14912 PF15061 DUF4538 Domain of unknown function (DUF4538) Coggill P, Hetherington K kh6 Jackhmmer:Q8N5G0 Family This protein family is thought to be a transmembrane helix. Its function remains unknown. This family of proteins is found in eukaryotes. Proteins in this family are typically between 58 and 87 amino acids in length. 27.00 27.00 33.60 33.60 25.60 24.80 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.19 0.72 -8.80 0.72 -4.66 13 61 2012-06-27 10:24:17 2012-06-27 11:24:17 1 2 50 0 36 71 0 56.10 51 54.00 NEW hhRGh+hslhlGGlVuhlGhAhYPIhlcPhhpsEEYK..clQplNRsGIcQE-lQPssh+ .................huRshRouLIFGGFlullGAAhYPIaFRPLh+hEEYK..+pQtlNRAGIhQE-lQPsGlK......... 0 9 12 22 +14913 PF15062 ARL6IP6 Haemopoietic lineage transmembrane helix Coggill P, Hetherington K kh6 Jackhmmer:Q8N6S5 Family ADP-ribosylation factor-like protein 6-interacting protein 6 (ARP6) is a transmembrane helix present in the J2E erythro-leukaemic cell line, but not its myeloid variants. In tissues, ARL-6 mRNA was most abundant in brain and kidney. While ARL-6 protein was predominantly cytosolic, it is known to bind to SEC61-beta subunit of a protein conducting channel SEC61p [1]. 27.00 27.00 27.70 29.50 22.30 26.80 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.01 0.72 -4.08 11 55 2012-06-27 10:28:35 2012-06-27 11:28:35 1 2 49 0 35 58 0 75.40 48 36.26 NEW Qplps.VQuphshsh.WS.......+L.PLLsGLllsuFuYslVYLDSs.PGlhPPoPFSPps+pRhp..pupShHLuYhsAlhsG ...........................plpppstschshhsFWoh.......LllSLhuGh.CCSFSWTlTYaDSaEPGhaPPTPLSPu+hK+hs.....spSaHhGY.hAlLsG........ 0 6 9 19 +14914 PF15063 TC1 Thyroid cancer protein 1 Hetherington K kh6 Jackhmmer:Q9NR00 Family Thyroid cancer protein 1 (TC1) is thought to decrease in apoptosis and increase cell proliferation. It is found to be expressed in thyroid papillary carcinoma [1]. This suggests its importance in thyroid cancer. The molecular mechanism of TC1, involves up-regulating cell signalling through ERK-1/2 signalling pathway and it positively regulates transition between the G1 and S phase in the cell cycle [2]. It is thought to positively regulate Wnt/beta-catenin signalling pathway by interacting with its repressor [3]. In humans, it is located in the chromosomal position, C8orf4. This family of proteins is found in eukaryotes and contains a conserved NIF sequence motif. 27.00 27.00 28.80 56.20 23.20 17.50 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.92 0.72 -9.71 0.72 -4.44 9 80 2012-06-27 10:42:52 2012-06-27 11:42:52 1 1 39 0 49 70 0 77.50 54 59.15 NEW tot.hh.SsSlRVuPSspG.+FD..TAsRK+AsANIFcsVsp-sLQ+LFc+oGDpKAEERA+lIashspDsEEhA+ALhAL+ .............s.....h.ssSlhsuPSsh..hpt.s........stuRKpAsANIFpss-..plQpLFpp....SGDppAEERA+IIaphstDhc.hAcALhtL+............ 0 2 5 18 +14915 PF15064 CATSPERG Cation channel sperm-associated protein subunit gamma Coggill P pcc Jackhmmer:Q6ZRH7 Family This family represents the gamma subunit of the CATSPER, or cation channel sperm-associated protein complex. The complex appears only to be expressed in the flagellum of sperm. The complex is activated at alkaline intracellular pH, and being restricted to the flagellum is the mediating calcium channel. 27.00 27.00 73.00 28.20 25.30 24.80 hmmbuild -o /dev/null HMM SEED 825 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.26 0.70 -13.36 0.70 -6.81 4 89 2012-06-27 14:03:08 2012-06-27 15:03:08 1 3 30 0 42 80 0 504.50 38 85.95 NEW Mhp.s....s...........shhlhhhVLLlsh+shutpshpcCoWhVslscap+lucphs.pp.....PlssV.p.hth..ssS.lpsoc.Y.uFPYaL+IphsCssKsSc-luRhhtLpGlpPhV+lhhp....ssssahphphE............................plph.h-sAsh+Ss.s....sC.s-hhCp.uWhsPhPhcsGoll.pV-lhssGlGsaI.ppR.hlphsGFhp.pstssssthultp-..l.t.thapshpuhPl....shuPVhILGGhsspchlLaosopFpcasllphpIsSphsuShhCshhuhssTIassluh.uoLhIppsphVh.asGsaosLh-+spuS....Rhh.scCI++Lpss..susGp..lLAlsst.pcGhlalGslpDu.hph..hsc.................taSsCphlsu.......uph.ssptTlLLLVths.hptotpaallpa..ssupcshElLYpIP-hIPpt+ph-aLhhLsocoass.......shhhscs.FhsslsshlalWGNhlL.p.............................................................................................................................ppss.h.loh.p.CPaphhph.plPp.Q+.oc.cpa.hhPshl..p.shHssNoLAlYpGLVaYhhah+s..-hs.h-shp.sshp.atp.ht....DpDYaFahhSNp...tslalsMsuYp+lashpushpl.Pp+hFLDpGspasaslhh.hasso.+sppp...thp..pplsLSlslu-PsslsVosppEhLlppsolLhpIsltDp+hsh-QshsGc+lptsSh.hpslsssGhphthTahhsHhp..t.h.hsVhIssPsGhRhth.lohoLpasht.spahhDpl.hstE.ssFhaRD.........D.shssSt.uhpGSash.VVsssshlpohpshh-pEI.Rhpushcp .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 9 10 15 +14916 PF15065 NCU-G1 Lysosomal transcription factor, NCU-G1 Coggill P pcc Jackhmmer:Q8WWB7 Family NCU-G1 is a set of highly conserved nuclear proteins rich in proline with a molecular weight of approximately 44 kDa. Especially high levels are detected in human prostate, liver and kidney. NCU-G1 is a dual-function family capable of functioning as a transcription factor as well as a nuclear receptor co-activator by stimulating the transcriptional activity of peroxisome proliferator-activated receptor-alpha (PPAR-alpha) [1]. 27.00 27.00 30.60 28.30 23.90 23.20 hmmbuild -o /dev/null HMM SEED 350 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.10 0.70 -5.76 20 84 2012-06-27 14:54:08 2012-06-27 15:54:08 1 2 63 0 54 97 0 305.00 35 82.60 NEW ssLlHlcA...sGsscTlHYlWss.hGsPolLlsh...Tsssso...lplsWschLssp.....hsuulphssps..pYotullhs+lhEFsDsNDouhhs..sssp.h.sYsLppFsWsphshs....sstslssshsut.hpp.......sGolshplsAFsspsRssphP+LLHouNSoQl-lslsslss+uspS...RFAlE............llsVssstsssssp...............p+SIDDEYTPuIFclsplhs.....hstsssssGalQW+PVuYspspRshssu..s.spps.......phpslps.....sshssoslshAaaGsp....h.hpuhNlSFGhssDG.FYpsosYhoWThllGhGpPPs-phSshVlhlhulGLGlPhlhllhGGlalsl++t+tpp .....................sLlHlRA...sGsssTlHYlWss...hG..sPullllt.....Tssspo...lslsWsphlusp..................sssulhl.sps.l...paStAllFs+lhEasssssoshhs.........sp......hssYsLtpFsWsphs.s.....hs.tslssshp..Gpshpps.............sGSlshclpuFspssRssp..P+LLH.Tus.osQl-lsLsshss+....u..spS...hFuLE............lh.slupsssssphp...............pcSIDDEasPulFplspl.......................huo.ssGahQW+PVuasp.tp.tshpsu....hsspts.......shpss.s.........hshspsslhpAaaGsp.....................thpshNloFGsssss.hYhsppYloWohllGhG.PPsDthSsLVlslhuluLGsPhlhllhGGlhlhlt++p...p................. 0 19 23 36 +14917 PF15066 CAGE1 Cancer-associated gene protein 1 family Coggill P pcc Jackhmmer:Q8TC20 Family CAGE-1 is a family of proteins overexpressed in tumour tissues compared with surrounding tissues. CAGE-1 gene showed testis-specific expression among normal tissues and displayed wide expression in a variety of cancer cell lines and cancer tissues [1]. CAGE-1 is predominantly expressed during post-meiotic stages. It localises to the acrosomal matrix and acrosomal granule showing it to be a component of the acrosome of mammalian spermatids and spermatozoa [2]. 27.00 27.00 68.40 68.40 21.50 20.90 hmmbuild -o /dev/null HMM SEED 527 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.66 0.70 -6.06 4 63 2012-06-27 15:13:07 2012-06-27 16:13:07 1 3 26 0 23 63 0 325.00 48 63.21 NEW hSEu-oMNhsuhSQDloaScSPhhMEToSTTSDLPQsEhKNscRENESc.TLsEDIYGTlDshLsDhsItN.ucNlLTQPVDT.SlSShRQFEPICKFHhhEAFNDE.hshpsLptuhsYTEKPEhQSpVYNsAKDsshKpDSFKEENsVET..SsSsscDQLupEhVRQ.ssRSPPLlHsSGET.KFsEsShsKSsuhEuALpPSQPQSFLhhENsppsscps..ptNsFp.LDLRA.YptEEhsVSSKtlQshG-IPEhsVsapKEVshEGV-SPtIsSPWSPAGIsWpGuAs.-suhMPDhEQShES.QPlEEDMAL.ElLtKLcHTN+KQpspIQDLQsSNhYLE++VcELQhpsTKQQVFVDIINKLKsplEELIEDKY+lhLEKsDTsKTLQNLpElLsHTQKHLQEu+N-KEoLQLplKKIKuNYV+LQERYMTEhQQKs+oVSQCLEM-+TLScKEEEVcRLQQLKGELEKAToSALDLLKREKcTpEQEFLSLQEEFQK+-KENLEERQKLKSRLEKLLAQVp ...................................................................................................................................................tpPt..s.hht..tcs..p.t...Eps.h....sh..pp-.hs.t.h.p..spS....h.p..tc.....h.E.sh.ts...tssht..ps.sh....................................hsspthp...-hschsss.tc..-st.-shpp..thhSshssssl.hpsts..-ss.h.DhE.shEuLpPl.p.ED.hALNEsLp+LppsN+cQphpIQ-LptpNhhLEp+lcELQhphs+QpVhlDlIsKLKtplEcLI--KYplhLEKN-hp+pLpslpEh.spop+pLpES+p-KchLpLphKKhKspYhpLQERYhsEhppKscs.ophlEhp+sLSpK-EElpRLQph+t-hE...+sT.SALshLpcEKc.pEpEhLuhp.EFQ+h-+tpLpERpcLK.plpcLlsQl......... 0 2 3 5 +14918 PF15067 FAM124 FAM124 family Coggill P, Hetherington K kh6 Jackhmmer:Q9H5Z6 Family The exact function of this protein family remains unknown. This family of proteins is found in eukaryotes. Proteins in this family are approximately 480 amino acids in length. There is a conserved LFL sequence motif. 23.50 23.50 23.60 41.90 21.60 23.00 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.73 0.70 -11.51 0.70 -4.93 13 111 2012-06-27 15:31:04 2012-06-27 16:31:04 1 4 44 0 66 93 0 222.70 50 52.79 NEW lolHlls-sG-uphLQpAlDpLLsaIcPDlpLFpVS.ERtssh+tpcstptp..........tsphPuLuVlLFLpEs.hG..EEplhplpctLppPPW..paHHTppspG+..h............PahsssQDFaoLusthPlWAlRpVHYGpEIlRhTlYCsa-NYsDhl+hYElIL++csst+KsDFChFslYos.sh-lQlSLKpL.PhGhsPpPp-SulLpFRV+-lGQLVPLLPsPCoPIScsRWQTpDaDGNKILLQ .....................holHllAssGcuphLQpslDplLshIpP-lpLFpVS.ERtush+hpct.ps..............st.PuluVlLFLp..Ep.hG..EE.....plhplpcsLQ+PPW..paHHTppspG.R..hh..................PYhsssQ-FaoLss..thPlWulR.VHaGpEIlRhTlYCpa-NYtDslRhYEhILpRps...otpKssFChFslaSsh..shslQhSLKpL.Phu.sssPp-SSVLpF+Vc-IGpLVPLLPNPC..sPISpsR.WQTpDaDGNKILLQ............ 0 12 16 34 +14919 PF15068 FAM101 FAM101 family Hetherington K kh6 Jackhmmer:Q8N5W9 Family This protein family includes the actin regulators, Refilin A and B, however the exact function of this protein family remains unknown. Refilin is thought to stabilise peri-nuclear actin filament bundles, important in fibroblasts. Refilin is important as changes in localisation and shape in the nucleus plays a role in cellular and developmental processes [1]. 27.00 27.00 32.50 29.40 20.30 18.90 hmmbuild -o /dev/null HMM SEED 207 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.45 0.70 -4.88 12 102 2012-06-27 15:39:28 2012-06-27 16:39:28 1 2 41 0 55 78 0 169.70 52 92.01 NEW MVG+LpLQsh..s-sLctpsR.-GlLDSPDSGLPspPSPS..aasLusGhh........ssh......p.ptspput..s.PhhL.ss.uspspPRhpPl.FGEuIElcPhPs+EIRhsSpVKYDSE+HFhDcVhhhPls.slouhSpTllulPNCTWRsYKoplphEPRp+shRFpSTTIlaPKHs+ohYpTTLsYss...tpstRhFhSSVpLEssE .....................................................................................ss...........................................s.....h..ss...ssthpPRh..hPl.FGEulEhsP..PspEl.R..hsSpVKYDSE+.HFhDcV.hhPls.sVsuhSpTllssPNC...TWRsY+uplphEPRp+shRFpSTTIlaPK+s+ssYpTTLsYss...t+s.hRhFhSSVpLEhsp........................... 0 4 7 22 +14920 PF15069 FAM163 FAM163 family Coggill P, Hetherington K kh6 Jackhmmer:Q96GL9 Family This protein family is alternatively named Neuroblastoma-derived secretory proteins. Highly expressed in neuroblastoma compared to other tissues, suggesting that it may be used as a marker for metastasis in bone marrow [1]. 27.10 27.10 27.20 27.10 26.90 27.00 hmmbuild -o /dev/null HMM SEED 143 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.33 0.71 -3.61 13 90 2012-06-27 15:45:19 2012-06-27 16:45:19 1 2 40 0 58 70 0 148.20 51 97.46 NEW MTAGTVVI....................TGGILATVILLCIIAVLCYCRLQYa...CC++p.......-s-p-.....ccp.shshpsh.s...................su.shhs....s.sppptpsp.hCsoC..o.at.s.Falpss............GGcRlsatshp.......................h...h.thhp.spuIST-V ............MTAGTVVI....................TGGILATVILLCIIAVLCYCR...LQYY........CCKKs.......coE--......EEE.Dhs...scs+hPshpusps.sh........sstsuLhPhtspshuppsstupshCsoC..S.YpsP.Falpps-............stNGGERlsatshp.c-ht.Ps...................hthus.Quhsssh.uuhREuFopsRuISTDV....................................... 0 5 10 22 +14921 PF15070 GOLGA2L5 Putative golgin subfamily A member 2-like protein 5 Hetherington K kh6 Jackhmmer:A8MS94 Family The function of the GOLGA2L5 protein family remains unknown. This family of proteins is thought to be found in the Golgi apparatus of eukaryotes. Proteins in this family are typically between and 840 amino acids in length. 27.00 27.00 27.20 27.20 26.20 26.70 hmmbuild -o /dev/null HMM SEED 617 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.33 0.70 -13.17 0.70 -6.03 8 690 2012-06-27 15:46:46 2012-06-27 16:46:46 1 6 83 0 210 470 0 199.30 19 59.74 NEW QLpESl+QLQsERDQYAEsLKEEuAlWQQRhQQMSEQl+pL+EEKE+uhuQVQELEoslAEL+NQhsVPP...sPpPPA...GPSEsE.ppLQuEsEQLQKELEsLutQlQAQV+DNEsLSRLNQEQEpRLLELERsAEpWuEQAE-R+QILESMQSDRsTISRALSQNRELKEQLAELQNGFVKLTNENMElTSALQSEQHVKKELAKKLGQLQEpLGELKETVELKSQEAQuLQpQRDQYLuHLQQYsAAY.......QQLuuEKEhLHKQhLLQTQLMDRLQHEEVQGKsuAEhApQELQEoQERLEAssQcNQQLQAQLSLhAsPG........EGDGLDSE-c-E.........EsspPpluIPE-LESREAMVAFFNSAlApAEEEpARLRpQLKEQKhRCpcLuHLlAssppp.ccc.AsuP..........tsuGD.SVPsEoHpALQVAMEKLQuRFTELMQEKsDLKERVEELEHRCIQLSGETDTIGEYIALYQsQRAVLKpRH+EKEEYISRLAQDKEEMKlKLLELQ-LVLRLVsERNEWpuKaLAAAQNPus.sosussA......QELGAA-sQGsLcEVSLAp.ss.........ltPsQGEA..Gsss......PpENPTAQQIMQLL+EIQNPQERPG..LGpNPCIPFFYRADENDEVKIMVl ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 122 129 171 +14922 PF15071 TMEM220 Transmembrane family 220, helix Coggill P, Hetherington K kh6 Jackhmmer:Q6QAJ8 Family Transmembrane 220 (TMEM220) is a domain of unknown function. It is thought to be a transmembrane helix. The length of this protein is typically between 150 and 160 amino acids. In humans, it is found in the chromosomal position 17p13.1. 27.00 27.00 29.30 28.40 24.30 24.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.53 0.72 -3.58 29 81 2012-06-27 16:25:50 2012-06-27 17:25:50 1 1 72 0 51 95 19 97.70 30 68.70 NEW hFshsAhlQlNDP...........DshlWlslY.....hlsull...shhhhhthhsphlhhhhss............shhhhhhhhtsslhttht.t.th............phE.uREhhGLhlsshhhhlhhhhs .........hFshhAhlQhNDP...........Ds.lWhslY.....hlsAll....hl..hhs.tlssp.hlathlushah........hhshhhshhlsshlhtcsppshh.......................ppEcuREhhGLlIlsshhhlpt...p........... 0 17 29 34 +14923 PF15072 DUF4539 Domain of unknown function (DUF4539) Coggill P pcc Jackhmmer:Q8N3J3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 230 and 625 amino acids in length. 27.00 27.00 27.70 42.00 26.70 26.40 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.25 0.72 -9.70 0.72 -4.29 32 116 2012-06-27 16:39:42 2012-06-27 17:39:42 1 2 98 0 75 119 2 86.40 37 17.40 NEW stKVsplssll+slst..........sst.Ds.hlhlpD..PTGphpuslH.+clh.p..p..asstlssGusLlL+cVs.Vas...........ss......tptta.LslThpNllplass-s ..p.sKVsphsshl+Slst..........ssh.Ds.tllhKD..PT.G.phpuolH.+pllp..p...htspLpsGuVLlL+pVu.VFs..................PS......hpspYLNlT.pNLlplass-.s......... 0 27 38 56 +14924 PF15073 DUF4540 Domain of unknown function (DUF4540) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A4D263 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 109 and 302 amino acids in length. In humans, it is found in the chromosomal position, C7orf72. 27.00 27.00 28.30 28.10 21.20 17.50 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.94 0.71 -10.81 0.71 -3.60 6 31 2012-06-28 09:34:24 2012-06-28 10:34:24 1 3 28 0 21 43 0 119.80 47 33.03 NEW PsKPlsFVSsSoRSKYIPLYTG+VQSTsADDlDNPhGDIsSlApPRsSchhYTNoSRoAsIPGYTGKsHasAT+PsNschPSpoPSsD.SEhHRlhh+cMtV.....DhFtHQuPLS+MVTTVpPYNPFNKKDKET .............................s+PlsFVSsSoRSpaIPhY..oG+V.pSssu.DDlDNPhGDhtulupsRpS+.h....Y.Tsoo+usNIPGYTGKV..HFsATHPAN.SsIPsT.sPSsD....SEh+RslhcEMtV.......DhFRHQuPLS+hVTTV+PYNPFNKK-KET............. 0 11 11 11 +14925 PF15074 DUF4541 Domain of unknown function (DUF4541) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A4QMS7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 100 and 163 amino acids in length. There is a conserved KLHRDDR sequence motif. There is a single completely conserved residue Y that may be functionally important. In humans, the gene is found in the chromosomal location, C5orf49. 25.00 25.00 26.40 25.10 24.30 22.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.01 0.72 -3.44 12 46 2012-06-28 09:38:35 2012-06-28 10:38:35 1 1 39 0 33 52 0 87.70 51 61.98 NEW YFssspppsphoh....YDplFph.ppsa...ssKLHRDDRp+hpslGLclppEEpp+sVPlLsSS.YG+.h..cpPl-..s+.casRls+Vps-FYp+ssI .......Ya.p.tpsGllSL....YDslF++.c.sY...sQKLHRDDREHAKolGL+VNEEEppRsVsVLoSSVYG+Rl..ppPlEPhsR.casRssHVpsDFYRKNsI........ 0 16 19 22 +14926 PF15075 DUF4542 Domain of unknown function (DUF4542) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A8MV24 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 123 and 173 amino acids in length. There is a conserved IPPYN sequence motif. The gene that encodes this protein in humans, is found in the chromosomal position, C17orf98. 27.00 27.00 28.20 33.80 26.60 23.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -10.63 0.71 -4.22 9 48 2012-06-28 09:40:34 2012-06-28 10:40:34 1 2 36 0 36 51 0 121.10 49 77.44 NEW LE+uFVLDuluVuolupshp+tpPKhhoAIPPYNAQpD+HApsYFcS.sVpslL++TtQ....spsGsohsGhllD+ahhhG........tGttYlspRNhsGAG..........HSpptVsGHs.ahsshpshp.GapGhFGaRRNTPpLRppPSsF ..hEKuFlLDuVAVuohucs.as+tpPKlhSAIPPYNAQpDhHAcsYF......pS+sV.slLRKTsQ....c+GGTutcGhllDhhalhG........tGpcYLs+RNhA....GuG..........HShppVsGHs.a.usl+sh..GaNGpFGYRRNTPsLRppsSsF................. 0 16 18 22 +14927 PF15076 DUF4543 Domain of unknown function (DUF4543) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0P5P2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 90 amino acids in length. The human member of this family is C17orf67. 27.00 27.00 75.00 75.00 22.30 19.60 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.80 0.72 -3.95 4 35 2012-06-28 09:57:32 2012-06-28 10:57:32 1 1 31 0 22 30 0 72.90 78 80.11 NEW aT.DASPILsEKpAKQlLRo+RpDR.pKsGaPDEPMREaMLaLQtLEQRuEEQFLEHWLNPHChPHCsRNlVHPV .ht...-uSPI.LpEKQAKQLLRSRRQDRP.SKPGFPDEPMREYMHHLLsLEHRAEEQFLEHWLNPHCKPHCDRNhVHPV.. 0 1 2 8 +14928 PF15077 DUF4544 Domain of unknown function (DUF4544) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q3KP22 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 118 and 256 amino acids in length. The human member of this family is C11orf85. 27.00 27.00 28.60 28.50 22.20 17.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.24 0.70 -4.85 6 50 2012-06-28 10:05:16 2012-06-28 11:05:16 1 2 29 0 21 34 0 161.40 51 91.32 NEW MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRsVLGNLDsLQPFATEHFIVFPYKSKWERVSHLKFKHGEsVLVPYPFVFTLYVEMKhFHEsLSsGKPhsDSPLGLVLAERKAAtAsM+..KRKpsEVPSSPSRPGLDRs.........uKc........KP.p+cs+R.hphhsshstscVtsQ..h.csu.tGphlP.pppos.h.L+sPpphtssu.hGF ...MSLKPFTYPFPETRFLHAGssVYKFKIRY...GpuIRGEElEsKEVIhpELEDSlRsV.LtNhDsLQPFs..T-HFllFPYKu+Wppsu+L+FKHtphhLhPYPFVhTLYlEhKhFp.EsLssG+..hps.s.ths.scp.....cs.ts.h.hc...KR+hh-....sostpst.....pR...........tpc........p..pcps.pp.h.......hsh.shscl.pt.....tps..G.h....pps.........c.....t..u.hG.............................................. 0 3 3 5 +14929 PF15078 DUF4545 Domain of unknown function (DUF4545) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JVX7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 417 amino acids in length. The human member of this family is C1orf141. 27.00 27.00 30.80 30.70 21.30 21.00 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.79 0.70 -12.61 0.70 -5.57 4 51 2012-06-28 10:14:33 2012-06-28 11:14:33 1 2 20 0 18 46 0 233.80 39 100.21 NEW MsE+IL-KhDhL-EpA+hLhAhRtKpsshppplKKKs.lhPLhFDaplcFt-sIssShSKT.upIpKD+upslKKsKR.VSFK.pPcPp+SDFEcSsLRPPhLsTsIphpE.K.hE.tEE.LKSRSh.Sh+YLKDpsETE.AKPh...h.....................................sQHcpps++ohcSTsaSusSSppsptp+.Ess.h.TKEsEhhRNDQhpch.sV+Qp.LLPLshED.LKsPchKhIDluPscTVpopMcpNcsNPIIFa-TcYVQMLhLTK.RhsPauhc......ap+pNlVLE+NCEhLKolhpDps.TsSKsppThsssQpKcl.slShElup+sls-Kh+KKpc+.s.cpIS.spLYNlSQTFSSLoKKFVGahDKsVIQEKSsKss+FE+.FSpsKPss..KFos.PlKYsSKP.+NILclHKlNNlTPLDsLLs .....................................sphQsph+Kps.hhPLTFDFplpFEcs.s.s.htKt.spIpcs+.sh..shp.psch.ssh+.....p.......s.pspFpp.slt...h.s.hp..pE..c.............................................................................................................h............pEsp......hp....htpp.hhPLshED.LhpsphKhl..t...p...ph.......................................................................................................................................................................................................................................................................................... 0 2 2 2 +14930 PF15079 DUF4546 Domain of unknown function (DUF4546) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5T0J7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 88 and 212 amino acids in length. The human member of this family is C1orf49. 27.00 27.00 76.30 35.60 21.70 26.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.81 0.71 -11.23 0.71 -5.17 5 48 2012-06-28 10:17:19 2012-06-28 11:17:19 1 2 20 0 16 49 0 178.30 66 92.20 NEW MSAKRAE.KKTsL.......SKNYKAVCLELKPEPTKTYDYKGsKQEGhFTKsGsT+ELKNELREVREELKEKMEEIKQIKDlMDKDFDKLaEFVEIMKEMQKDMDEKMDVLINIQKNsKLPLRRuPKEQQELRLlGKTD+-PQLRl+KMD...GuDGsPLALHKKlVAPQpT.+sPLDPLHpCsoCCEKCLLCALKNNpNRG+.saHAWusFSPL ..................MSAKRAELKKTpL........SKNYKAVCLELKPEPTK...TaDYKuVKQEG.FTKsGsTp-LKNELREVREELKEKMEEIKQ..IKDlMDKDFDKL+EFVEIMKEMQKDMDEKMDlLINhQKN.KLPLRRuPKEQ.QELRLhGKTcp-PQLR.KKMD...GssGAPhuLHKKsMAsQKs.pssLDsLHpCtoCC.EKCLLCALKNNhNpGt................................... 0 1 1 1 +14931 PF15080 DUF4547 Domain of unknown function (DUF4547) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q147U7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 144 and 206 amino acids in length. The human member of this family is C3orf43. 27.00 27.00 101.90 101.70 20.10 17.50 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.38 0.71 -4.89 3 28 2012-06-28 10:20:18 2012-06-28 11:20:18 1 1 22 0 16 37 0 189.00 76 93.09 NEW DHKLQALETQFKELDFIKDNLTQKFEHHSKTLASQAAQDElWTAVLALpFTSMELNILYSYVIEVLICLHTRVLEKLPDLVRSLPTLASVLRRKVKNKRIRsVWESILEEaGLQEGDITALCTFFVAHGNKAEHYTAKVRQMYIRDVoFMITNMVKNQALQDGLLRAVQVIEKGKAsRTPEcpKSPLKELIPSVKu .D+KLQALEsQFKELDFTKDNLhQKFEpHSKoLASQAAQDEhWTAVhALcFTSMELNILYSYVIEVLICLHTRVLEKLPDLVRGLPTLASVLRRKVKNKRIRVVWESVLEEpGLQEGDlTALCTFFIAHGNKAEHYsAKVRQMYIRDVTFhITNMVKNQALQDuLLRAVQVIEKGKAVRsPEcQKSSLKELIPSVKN....... 0 1 1 3 +14932 PF15081 DUF4548 Domain of unknown function (DUF4548) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95561 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 178 amino acids in length. The human member of this family is C1orf105. 27.00 27.00 65.70 65.70 22.80 21.90 hmmbuild -o /dev/null HMM SEED 167 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.03 0.71 -11.06 0.71 -4.71 4 26 2012-06-28 10:27:17 2012-06-28 11:27:17 1 1 16 0 9 25 0 132.00 63 89.87 NEW PWLSEASLVNKPLlLSlP+RYP+tSushLsSsKKsMsLPlLhQsP.DshSKARRNQs-sMLlRNpQLCSTC...QEhKMVQPRTMpIPDc.KsSFENsMSaR.MSLHpPKhQssscs.+sDIPTESI+YRLPILGPRTAVFHGLLT-AYcTL+EpQ+SSLPRKEPhGKTsR ...PWhsEAShlNKPLlLSlP+RYP.tSushLsSs++shshPhhhQsP.DshSKARRNQpssML.hRNpQLCSTC...pEhKMVQPRshpIPD.D.KsSFENhMSaR.MSLHpPKhQssscsa+cDIPTESIH.YRLPILGPRTAVFHGLLo-uYKTLpEpQpSSLPRKEPhuKThR.......... 0 1 1 2 +14933 PF15082 DUF4549 Domain of unknown function (DUF4549) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5T699 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 143 and 1871 amino acids in length. The human member of this family is C6orf183. 27.00 27.00 35.90 40.40 21.30 18.50 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.81 0.71 -4.55 4 24 2012-06-28 10:34:07 2012-06-28 11:34:07 1 2 20 0 19 27 0 128.00 53 23.05 NEW DplYKlSSTERV..LEKELtspLsELKsplE-pGlL.GTsNRsaSSlthPKDssaFR+EREhhLKKsLQVAEuKPLVIQADlhpRELESCLpREYTstsLPLLLhQaYT-RItQLuQSKYLHhLRWKRFCp+SphhEpLYPLYp .......................DplY+lsSoERlp.LE+cLthpLsELKsEIEEpthh.sst..p+s..aSSVphPKDltaFR+EREhALK+sLQVAEuKPLVlQADVMQRELESCL+REYTsENLPLLLhQaYs-RIpQLsQsKYLHMLRWKRFCpHSphhEQLYPlYp......... 0 8 10 12 +14934 PF15083 Colipase-like Colipase-like Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NCL2 Family This is a family of colipase-like proteins. 27.00 27.00 31.50 28.80 25.70 25.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.17 0.72 -10.80 0.72 -4.05 9 62 2012-06-28 10:57:55 2012-06-28 11:57:55 1 3 22 0 35 60 0 71.90 41 90.69 NEW polh.pClsapKsNuhpCs+HSECpSsCClpspppsttFC.sP+o.IhhpClPhRKsstsh...CpptpEChSpsCl..pE....+Cs+RstlL ...................hpK..s.hCpcHsECpSsCClhss.ssppFC.ss+o.IhhpClPW+Ksshth...Cp.t.pC.p.p.h.............................. 0 3 3 7 +14935 PF15084 DUF4550 Domain of unknown function (DUF4550) Bateman A agb Jackhmmer:Q9ULG3 Family This presumed domain is functionally uncharacterised. This domain family is found in eukaryotes, and is approximately 100 amino acids in length. This domain contains an N-terminal HXE motif. 27.00 27.00 52.70 51.30 23.40 22.90 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.13 0.72 -3.89 8 33 2012-06-28 11:36:29 2012-06-28 12:36:29 1 3 30 0 20 49 0 98.00 47 16.75 NEW spsaaHIEY.hhPcD.cspKlDlVlassVAKl.......Fh-sph.Kssp.h+.ssDpsW.lsWspsaslsVsp-hlpchh.+tlsl+laDo+-KVSs+ARaDRsKsht .....h.ppaaHIEYaLLPDDt.EP+KVDlVlF.shl.AKV.......FL-Sus.KTV+PWc.EGD+sW.VSWpQTFsIsVTKELL+Kls....h....HKITL+lWDoKDKlScKsRY.R.Ksh.s.... 0 9 10 11 +14936 PF15085 NPFF Neuropeptide FF Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O15130 Family \N 27.00 27.00 34.50 32.50 23.70 22.50 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.94 0.72 -10.78 0.72 -3.76 9 43 2012-06-28 11:36:44 2012-06-28 12:36:44 1 1 29 0 15 40 0 90.30 49 79.89 NEW stuhpp.ppshsc-D.tP.ssp-thscphhphsup.......................lLRuLL.u.QR.sRs..SsLaQPQRFGRsopGsh.sEpph.psRsWEussuQhWShAsPQRFG.KK ...........................................................................................-sph.so..........LLRsLLQAMpRPGRS..uFLFQPQRFGRN.opGSWusEcL...SPRAhEul..........supFWSLAAPQRFGKK. 0 1 2 5 +14937 PF15086 UPF0542 Uncharacterised protein family UPF0542 Bateman A agb Jackhmmer:Q7Z3B0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved LSWKL sequence motif. This family includes human protein C5orf43. 27.00 27.00 47.90 47.80 24.20 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.57 0.72 -4.26 3 46 2012-06-28 11:42:20 2012-06-28 12:42:20 1 2 45 0 33 41 0 73.30 71 82.43 NEW M..FDl.KAWL-pV....VcWAA-DPGuFlTTVLLuLTPFFLlSAVLSWKLAKtIEAE-KRcKp+pK+ptNlAcsRRpKKD .............MhDl.KAWAEYV....VEWAAKDPYGFLTTVILALTPLFLASAVLSWKLAKMIEAREKEQKKKQKRQENIAKAKRhKKD........ 0 8 9 18 +14938 PF15087 DUF4551 Protein of unknown function (DUF4551) Bateman A agb Jackhmmer:Q8IXR9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa. This family includes human protein C12orf56. 27.00 27.00 36.80 62.90 17.10 23.60 hmmbuild -o /dev/null HMM SEED 617 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.20 0.70 -12.95 0.70 -6.15 7 68 2012-06-28 11:45:46 2012-06-28 12:45:46 1 2 35 0 29 57 0 424.10 47 94.34 NEW MA.....tsuhsstpNu+LDuFL+RshssplY-plRuaEPClVVS-p.c+sFhaVlLSD-plYLTEpPPRslphsVsh+cIhuI-LIsDhP-FLSGpDREpsQHIRIhY..................ussp.shPtptthutsppssc.sst.h.h.sphS.ohspu..sLtuspcposthtsspst.stsL.c.p.tt........................hPp.s..sspss.spcpstshPsssssshtsps.ssstss.spts.p.ptssptht.hsSllup.lppspshc............cc-uELHLYhlSsTSplaL+LpSoWNsYIIRuT..Lh.DPlYhpcpss....pusp.........p.EcphplFsQLpuELL.csssLEplahLLQEL+sAApRNhsLK+LFWKosDLF.FLVppLp-..L.tscstpshpt..ppADcLhlsh.lsQTLuhMFRET-l-ssRLshLsAcpGshh.pLLlhLlscPph.h.....p.p..tp...............cs-lQtLht-YhDsAsuLLFEllLlsQpushsssss...............phhsluWlhphLpspP..lhsFluh.scpsVhsLSsot.t.LSPuQAlLLYQphhlLhuCLQaSppLupaIRssa+EEFRYFl+hPslEcKLPspYPIopPsh+LlpplLphl. .......................................................................................................lsDhPpFLps.ppphsQHIplha...................................st..p....h.hth..t..ppphp.....p..s......h...p....p.tts...h.......................................................................................................................................................................t.................................................................................hchY.lp.tS.h...hps.Wpsh...h...................................ppEcphphFsQL+uELh....cs.olc+lh.Ll.EL+sAAp+shhL++LFWKosDLFhFLVspLcE.hLstsp.stpuhpstsp..csDcLh...hsh.llQTLshMFRETEhEsuRLNhLsA++usLh.pLLllLlscPpl.p..tss..sh.p.sss..........st.....hsh-scLQpLlhEYhssAouLLaElLLlhpQushs.s..us...............pF.uluWhhphLQspP..lhsFlut.VcplV.p................sLSss..p.LoPsQAVLLYQphhlLhuCLpaSppLupalRssa+EEF..RYFl+hsslpc+LP.pYPIopPThpLhcElLpll............................ 0 9 10 15 +14939 PF15088 NADH_dh_m_C1 NADH dehydrogenase [ubiquinone] 1 subunit C1, mitochondrial Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O43677 Family \N 27.00 27.00 39.60 39.40 22.20 21.80 hmmbuild -o /dev/null HMM SEED 49 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.26 0.72 -8.49 0.72 -4.46 7 40 2012-06-28 11:47:20 2012-06-28 12:47:20 1 1 32 0 17 39 0 47.10 70 65.75 NEW hFhspcPspupPNWL+VGLsLGTolFlWhhLhKQHN-DVhEYK+RNGLE .......KFYVREPssuKPsWLKVGLTLGToVFLWlYLIKQHNEDVLEYKRRNGLE.. 0 1 1 2 +14940 PF15089 DUF4552 Domain of unknown function (DUF4552) Bateman A agb Jackhmmer:Q86WS4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. Proteins in this family are typically between 425 and 649 amino acids in length. 27.00 27.00 150.10 149.90 22.70 21.80 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.66 0.70 -5.58 5 29 2012-06-28 11:47:55 2012-06-28 12:47:55 1 1 23 0 14 25 0 374.50 62 67.71 NEW SsGN+sFLT+os.uVVMGEDCGShDERRQSDFITEKpSlQHIWGENRKEhSNFLEDVNQsTssLLSENCDSFIScNMINLLNIDQQ+IKKTFDKCDYDoMuDsssVlSSDKNHsTDRCIRSIFTDPELsFSNSTFNKoSYPEKCQPNK.CQKEYsNNERNsLSTSFEKDaYPASS-+KGKFENDYQEKTPQ+co..QKYPVNHMuNIPLEELHSKQSWDFGlGE....ILMcEGGhsSLKu+sTSTKKs.YLDSSQSSQSTSYSPRPTD.SCFSSSSEMPSEDEDQI.QQIEDSNRpSIKsKETTNNFYLEsM...sKLPsD+IIKNNAKhHKQNENFHQFShKNNTDQFPQSQCNSAHILQNKTosNCILQlARCDAWVQTESEsVMEEKLDlAIQCDIISKCKCRS.......-VSsLCNVERCoENlKADTTGGQEILKNN ...S.GNRNhLTcpP.sVlMsEDCtSMsEhRQSDaITEKpSlQHIWGcNtKElSNFLEDVNQssPslLSENCDSFVSQNMINlLNIDQQ+IKKTFsKCsYDSMGDhCsVTuSDKNcsTDRCIRsIFTVPELT.FSNSThNKTSYPEKCQPNKphQ+EYNsNERNDLSTSFEpDhYPuSSE+K.GKhENDYQEKsPQKsI..QKYPsNoMssIP.EELHtKQSWDFGlsE....ILMcEGGhsSLKup.PTSscKI...LDSuQSSpSoSYSPRPTD.SCFSSSS-h.SEDEDQl....QphE-SN+hsI+scEptN.....NhalEpM...sclss-cIlKsNsKhHKQNENFaQaShKsNoDpFspSQCNSAHlLQNKTssNClLQsuRCDAhVQTEoEslhcEKLDAAIQCDlIScCpC+s.......-lS.LsshcpCotNlpuDTTGGQEIhpNN.......... 0 1 1 2 +14941 PF15090 DUF4553 Domain of unknown function (DUF4553) Bateman A agb Jackhmmer:Q8N655 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. This family includes the human protein C10orf12. 27.00 27.00 45.20 45.10 24.00 22.70 hmmbuild -o /dev/null HMM SEED 463 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.77 0.70 -5.49 7 61 2012-06-28 11:50:07 2012-06-28 12:50:07 1 1 32 0 32 58 0 389.50 41 45.00 NEW shDslhsspPthhhWssptpNppLltEhNspa.plppuWlQ..lpt-ptssshs+s+scphp-.haKspp.hcpChhtLEV...SPVpMLF.KpapLsclCpWFhpTTETpSLslV+KhNsR.Ph-l.ss+t.h.phpsSs.....hpsphh+KHhKKFAhuoPu+sshKhphLathlppss.pscup.....oLu..tcsphcclpp-Rhup.tppl.o.uoschhpKhpNlRhhhpsQh.......hpss.G.tsusEsppsppols.pslhsP....shpstspphsspschss+sph..........pto.psppshpKtps.schpssphpupo.+cs+lhh+Khsplcp.ps.phsshhhpPtuh-psu..s+ps..cEs+hh..+ppss+pss.p+ppcEppsh+sspPs..ottlst..cpptLscusspspps..sscthuphppRtRP.hKosE.sstpR+++.psptsp.tuhhs+hpst .....................................slDslhsppPphh.WsspppNppLltphNspa.plppuWlQ..lp+-ttsssht+s+s.cp.p-..haKspc..cpChspLEs...SPVpMLF.......ppacLsplCpWFhpTTETpSLsIVKKhNsR.Ph-l.ss+tsh.thpsSs......hpAphl+KHhKKFshuoPu+sshKhp.hLath.hpps..p.lcsp..........s.u..pcsphcclp....c-R.p+.s.t.l.s.uoschhpKhpNlRhhhpsQ........hpps.u...u.spsp.uppoVs.pslhsP....shpss...spthsshsc.ppthph..........ptspp.hpc.h.Ksph..p.chptsphpot...s.....pcs+h.hhKhsp.cphss...phsshhhpPtuhDpsu..s+phphcEsKhhh..+ppsu+pss.p+pp+E.pst+ssps...s-thst.hpppth.scssshspps..tsch.uphpp.....+tRP.hK...osE.ss.pR+++.psp.sp.tshhp+....................................................................................................................... 0 2 5 16 +14942 PF15091 DUF4554 Domain of unknown function (DUF4554) Bateman A agb Jackhmmer:Q8N6T0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in some vertebrates. This family includes human protein C11orf80. 27.00 27.00 52.30 43.80 18.70 23.80 hmmbuild -o /dev/null HMM SEED 458 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.53 0.70 -12.37 0.70 -5.75 4 51 2012-06-28 11:52:02 2012-06-28 12:52:02 1 2 24 0 17 40 0 291.10 58 83.54 NEW MTDsLVIK+FL+KIlhVHsKl+FpFSVKVNGlLSt-IFus....ENEPsLsLuNGISLhlshpHYVS+PpFshsE.pCSRIHPVLGHPl.LhIPsDhAsMGLLGELhLTPAAALCPsPKsaSNQLNRISS.lp...IFLYGPhGLPLlhss.EQPpTohF+DhuhhlDWKKapLsMlPNLDLsLDpshlLPDVsYpVE............ssEcsQSQs.cuQtQTLLLFLFVDFpSsFPVQQsElWulaTLLTsHLssILsES+SsVQsSIQssVDpsLEpHaQtAKs+Q+LQASLSVAVsSIMSIlTGSTsSSFRKhCLQoLpAsDTQEFpTKL++sFp-ITpHpFLs+CSC-hcQclT.ccp.oAQsTcDthcs.suLEhhh-ouGQsENKRLK...cuS.phutccopThPsuc-usssEsssppssPTstutpsp.upuphtssGsththsupssth.-sLWLQEVSNLSEWL ................MTDCLVIKpFL+KllhVHPKlRFpFSVKVNGlLSpElFGs....EpEPsLsLsNGIuLlss.pHYh.psthsshE.hCSRIHPVLGHPVhLhIP-DhsshsLLGELhLTPAAALCPs.Kl.uNQL.s+ISo.lt...IFLYGP.GLPLh.sp.tp..hshhps.s.hlsWKKapLphlPsh-.pLpps..hhPDhsYplE............s.p.ts..psps.t..tpsLLLFlFhDFpssF.sQ.hEhhts.sLLpsHLssILhcs+shVQsslphslDpsLpQHpQtsKs.Q+.............................pshps.pspEhthpLtp....hh....h..h..t.t....................................................................................................................................................... 0 1 2 3 +14943 PF15092 UPF0728 Uncharacterised protein family UPF0728 Bateman A agb Jackhmmer:Q8N6V4 Family This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa. There is a conserved GPY sequence motif. 27.00 27.00 76.20 32.00 19.90 18.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.14 0.72 -9.92 0.72 -4.11 10 35 2012-06-28 11:54:29 2012-06-28 12:54:29 1 2 30 0 25 42 0 85.10 57 77.93 NEW MPppuhVslRYGPYsusGl.V-HRTtRL-GLQAVLtpDGHpVlLEch-DaNsVELlVNGEhVFpCsIp-L-......FGGDG+LDP.......LCcEAhpAV .......MPcpAlVhlRYGPYSAs.GLsVEH+TaRLpGLQAVLspDGHcVILEKIEDWNlVELhVNtElVF+CNIpDLE......F.GGDG+LDP.......LCccAhhAV................ 0 7 10 13 +14944 PF15093 DUF4555 Domain of unknown function (DUF4555) Bateman A agb Jackhmmer:Q8N865 Family This family of proteins is functionally uncharacterised. This family of proteins is found in metazoa.This family includes the human protein C7orf31. 27.00 27.00 75.20 64.90 25.50 23.70 hmmbuild -o /dev/null HMM SEED 289 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -11.96 0.70 -5.51 4 43 2012-06-28 11:56:23 2012-06-28 12:56:23 1 1 32 0 29 47 0 246.30 53 49.21 NEW MtsTtshP.....hh+Es.ulshLSsTahSs-LaoPLpospR.TssEspappaRtplt+ss...+sP....WGpc+.cYGGhtPl.LPspaRPKsEPPphVtKuH+HYGSGhp.aP................cthPlpQ.YphTp.phSclRhNDpLLPpP.puslhsh.lctPaPtEHPYtSHIs+hulFPs.sSPc-..ttltstpp......PhPspsPTpP.p.hl..Ko+GsPaRaEl.Dhspcs.+++ALsW.GpssYp.hptssptsR..hYPtPPKohtPN.s.ps.hs.ths.+pssshRslcKSthhToYppc ...........Mtshpshs....hhp+EhpGsDlLSsTahSNclaTPLppshRsT..sSpcRYpELR-oLpps....RLP....WGu-R.EYGGlhPloLPE-H.RPKsEPPplMuKGHpHYGFGG-sW..P............................................................................................................RclPIEQaYaLTQsKKSclhsNDSLlPKPP.sS.sltc..IshPaPlEHPYpTHIsRsAMFPoFsS.PcDhhTGlcARsp.....QPFPPTlPT.....KsaDsslL.KT+GNPYRa.EllDhPhDS.KKKALsWPGQslYashP+sspKs+.lF...YPKPPKohAPNoohps.hDsh.sh+pANIpRNLERSpWlToYs+.......................................... 0 11 11 15 +14945 PF15094 DUF4556 Domain of unknown function (DUF4556) Bateman A agb Jackhmmer:Q8N9H9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrates. This family includes human protein C1orf127. 27.00 27.00 36.50 36.30 18.80 17.50 hmmbuild -o /dev/null HMM SEED 206 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.39 0.70 -11.45 0.70 -4.77 3 33 2012-06-28 11:58:33 2012-06-28 12:58:33 1 2 21 0 11 33 0 196.40 68 35.49 NEW +CPMhsARLGcESV+CcPpFIQVSRPlPhhsDusQTPWLLSLRGELVASLEDASLMGLpVDlGATsVTVQSPRQELLQR.E.........PLhhVSGuhhh.hpQAlPLVSpQPtSEVuVHIPKQRLGLVKRGShlEEoLS.RFLcVpQo-oFpVs...Es+DFVlVSIPouslLQsQsCQcuc-oPGTQAFYRVDLSLDFAEMAuPVhWTVEsFFQC .....KCPMlpSRLGQESVHCGPhFIQVSR.PLPLhpDspQTPWLLSLRGELVASLEDA.SLMGLYVDlNATTVTVQSPRQsLLQRhE.............VSGt.....psAhP.s...VShQPESEVLVHIPKQRLGLVKRGSalEETLSLRFLRVHQSNhFhVT...EN+DFVVVSIPAAuVLQVQpCQEssGoPGTQAFYRVDLSLEFAEMAuPVLWTVESFFQC...... 0 1 1 2 +14946 PF15095 IL33 Interleukin 33 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95760 Family \N 27.00 27.00 78.10 27.40 21.50 22.00 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.77 0.70 -4.94 4 41 2012-06-28 12:02:12 2012-06-28 13:02:12 1 2 23 1 13 44 0 204.20 59 97.66 NEW MKYSTsKISPAKhpsoAuKALV.ssKLRKSQQKscEVCphYaMpLRSGLhIcKcsCYFRKEsTKRaSL+o..tpptpppthslsuppcphpt.......sFshth.hltsastuhthsSIph...lTEasASLSTYNDQSITFVLEDGSYEIYVEDLtKsQEKDKVLLRYY-SQpPSspoGDGVDG+pLMVNLSPTKDKDFhLHANsKEHSVELQKCEspLP-QAFFVLHcpSSpCVSFECKssPGVFIGVKDNpLALIKhtDpsps....NIhFKLS ..........MKYSssKhSsAKhpsoAu+ALs...Klt+SQQK.scElCphYaMpLRSGLhIcKcuCYFR+EhTKR.S.+o..tt....tthshsupppp..................h.thttth...t..Sl......lpE...ASLSTYNDQSloF.shEstuY.I.VEDhtKspcKDKVLLpYY-SQpsSsEo...GDGVDG+hLMVsLSPp..KDFhLHANsKEHSVELpKCEpsLPDQAFFlLHpp.....sSpCVSFECKssPGsFIGVKDNHLALIcs...-.opp.....NIhFKLS....... 0 1 1 1 +14947 PF15096 G6B G6B family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95866 Family \N 27.00 27.00 161.30 160.90 22.00 18.00 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.58 0.70 -4.83 5 39 2012-06-28 12:10:19 2012-06-28 13:10:19 1 1 24 0 16 49 0 203.90 70 89.65 NEW NPGASLDGRPGDRVNLSClGVSHPIRWVWAPSFPACKGLSKGRRPILWASSSGTPTVPPLQPFuGRLRSLDoGIRRLELLLSAGDSGTFFCKGRHE-ESRTVLHVLGDRAYCKA.........PGPTHGSsYP+lL.IPLLGsGLVLGLGsLGhsaWR+RRlPPpP.cPhPRFA...PllsT......EuQRPl+EQDuKhPGcLDQEPuLhYADLDH.sLpR.RRhSsssPuDA.STVYAVVV ......ssGASL-G.RPGDRVNLSClGVSHPIRWsWAPSFPACKGLSKGRRPILWASSSGTPTVPPLQPFsGRLRSLD.sGIRRLELLLSAGDSGTFFCK...GRHE-ESRTVLHVLGD+s.C+s.........PG.PT..HGSsYPQlL.IP...LLGAGLlLGLGALGlVWWh+R+.PPt........Ph.P+hA.....Phsps..........................EsQ+..ss+..c.p-s+hsGc.D...p.EsuLhhu.............s.h.stc..sTl.uhss................................ 0 2 2 3 +14948 PF15097 Ig_J_chain Immunoglobulin J chain Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P01591 Family \N 27.00 27.00 75.90 57.10 17.70 17.40 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.95 0.71 -4.57 4 36 2012-06-28 12:14:02 2012-06-28 13:14:02 1 2 26 0 21 57 0 126.70 71 78.70 NEW -DEpTVLVDNKCpCsRlTSRIIPSs-sPsEDIVERNIRIIVPLNsRENISDPTSPLRTpFVY+Lo-LCKKCDPVElELssQlhpAoQSNlCsEDs...ETCYTYDRNKCYTThVPhsY+GpT+MVpAALTPDSCYPD ..EDEchVLVDNKCpCsRlTSRII.Ss-DPsEDIVERNIRIIVPLNNRENISDPTSPlRTcFVYHLS-LCKKCDPsEVEL-NQlVTATQSNlCDEDs...ETCYTYDRNKCYTssVPLsYsGcT+MVpTALTP-SCYPD. 0 1 1 6 +14949 PF15098 TMEM89 TMEM89 protein family Bateman A kh6 Jackhmmer:A2RUT3 Family The function of this family of transmembrane proteins, TMEM89, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are approximately 159 amino acids in length. 27.00 27.00 162.40 44.70 20.00 18.70 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.87 0.71 -4.14 5 25 2012-06-28 12:19:38 2012-06-28 13:19:38 1 2 22 0 14 28 0 126.50 69 84.98 NEW WSRPLWYQVGLDLQP..........WGCQPNSLEGC+uSLGCPGYWMGLGuNRIYPVAGVTITTTMMLllSRslhQRRRSQAoKuEHPQVTssPCssWKRR.uPISDRTLLLGVLHMLDALLLHIEGHLQ+LAoQcQIQIKGTPs ...WSRPLWYQVGLDLQPWGCQPsSlEGCcuuLuCPGYWhGLGus.RIYPVAGVhITTTMMLlhuRhlhpRRRSQATKu..EHPQVTTpPCGPWKRR.sPISDRTLLhGVLHMLDALLlHIEGHLQ+LATQpphQIKGTss.. 0 1 1 2 +14950 PF15099 PIRT Phosphoinositide-interacting protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:P0C851 Family The function of this family, PIRT, is not known, however it is predicted to be a multi-pass membrane protein. This family of proteins is thought to have a role in positively regulating TRPV1 channel activity via phosphatidylinositol 4,5-bisphosphate (PIP2). This family of proteins is found in eukaryotes. Proteins in this family are located in the cell membrane [1]. Proteins in this family are approximately 140 amino acids in length. 27.00 27.00 39.00 38.00 26.80 26.40 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.85 0.71 -4.92 10 80 2012-06-28 12:20:04 2012-06-28 13:20:04 1 1 40 0 50 69 0 125.90 42 88.69 NEW hssPsu.s-.shhEKssps........ssslslPLVsEsQLTAATGGAELSCYR...CTlPFGVVlLIAGIVVTAVAYoF...NSHGSlIShhGLVLLSuGLlLLAsSAlCWKsRhc+K+c+RRESQTALVsNpRslFs ..........................t..hPKs..pssphp.psps........ssshsls.h..SEspLTsATGGuEhShYR...CIhPFGsllLlhGlVlTuVAYoa..............Nops.....S...l...lphhGlslLShGLhLLssuhLCWpsp.++Kpt+RpcSphhhshpph.Lh.t........ 0 3 6 17 +14951 PF15100 TMEM187 TMEM187 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q14656 Family The function of this family, TMEM187, is not known, however it is predicted to be a multi-pass membrane protein. Members of this family are as yet uncharacterised. This protein family is also alternatively named ITBA1. This family of proteins are found in eukaryotes. Proteins in this family are typically between 239 and 267 amino acids in length. 27.00 27.00 36.50 34.50 21.20 24.50 hmmbuild -o /dev/null HMM SEED 242 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.07 0.70 -11.62 0.70 -4.99 10 38 2012-06-28 12:20:47 2012-06-28 13:20:47 1 2 35 0 21 36 1 226.10 56 86.68 NEW ALhHVslssCLClulshTGlFDuVhV-lGY-HYAEpPVss...LPuaLAMPFNSLVNluYlLLGlYWLpppsssspsst.............cAcYlKDVFAhMAlsYGPVQWLRlsTQpRssAVLDQWhTLPIFAWlVsWC.aL-+..GW+sphhLulEslSlsSYuLuLLHspGFElA.LGhHIssAVhpulp....sQtRaGs.ssStpYLsLAVLSCuGFVVLKLhDHpLA+a..cLFQpLTGHFWSKVCDlLQFHauFpFLTs .........................................................................................................AhhHVAluusLCsssVaTGlFDuV.V.pVGYEHYAEsPVsu...LPAFLAMPFNSLlNlAYsLLG.laW.Lp+ssss.stss................+YlKDVFAuMAL.....lYGPVQWLRls....TQh+hsAVLDQWhTLPIFAWsVAWChaL-+.....GWcPWhhL....ulEslSLsSYuLALLHPpGFEVA.LGsHlsu...AVupAL+......................spp+aGs.ssSspYLsL....GlLSCL.GFVVLKLhDHpLApW..pLFQpLTGHFWSKVCDVLQFHFAFlFLTp................ 0 7 9 11 +14952 PF15101 DUF4557 Domain of unknown function (DUF4557) Bateman A agb Jackhmmer:Q8NHR7 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved TVF sequence motif. 27.00 27.00 27.20 27.10 25.20 25.10 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.50 0.70 -11.26 0.70 -4.49 5 40 2012-06-28 12:20:57 2012-06-28 13:20:57 1 1 29 0 25 36 0 188.00 62 92.08 NEW FpuQ+AWFSuSVSpDlpphWVcEGGsISD.s+sADFLFSsDASHsDTtRIYpS-DYlcDpATVFHupaLtAssNscSpsSVs..LGHYVL.........PP-s............lQcElRuKIGSF.IWEQDEpFl....lp+c-chsPsc...pslpcpupsos-Hspcpp+Ss-aatTRTslscKspsash+s................YPVNNM..VoGYlSIDAhcKa.GE..L+DFlPGsuGahVh+sscs ......FQGQRGWFCsSVSp-LRQFWVAEGGoISD.s+AADFLFSCDASHPDTLRIYQSLDYIEDNATVFHAYYLSAVANA..EI..KsSVA..LGHFlL.........PPAC............LQK.EIRRKIGSF.IWEQDp+Fl....lcKHDEVsssE..lcshpEsS.cluT-HcKELScSsE+HhhRTPVlEKQMYFPLQs................YPVNNM..VT.GYISIDAMKKFLGE..LHDFIPGSSGYLAYHVQsE............ 0 4 5 6 +14953 PF15102 TMEM154 TMEM154 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6P9G4 Family The function of this family of transmembrane proteins has not, as yet, been determined. However, it is thought to be a therapeutic target for ovine lentivirus infection [1]. This family of proteins is found in eukaryotes and members are typically between 138 and 320 amino acids in length. 30.30 30.30 30.30 30.30 30.00 29.40 hmmbuild -o /dev/null HMM SEED 146 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -11.17 0.71 -4.49 7 50 2012-06-28 12:21:22 2012-06-28 13:21:22 1 6 36 0 27 44 0 129.00 39 50.53 NEW sE-sppSuc.slpstsh.csos...................soshuoVT.sE.....slssshs.sph...pssssQhEh...............lLMlllP.llLsLLlL.llhllhhh+R+RsKpc..p.spppt...p-lssEslh.P............IFEEDTPSVMEIEMEELDKWMNSM.....N+NADhEsL ................................................................................ph....................sshsulT.sp.....slssshssop..h....st-ps..QhEF..........................lLMVlIPh.lLLsLLlLsV.lhlshhh.+RKRs.Kp-soSpGSpssLQ.o.E.lusEsl+sP............IFEEDTPSVMEIEMEELDKWMsSh.....N+Nssh-t...................................... 0 6 9 14 +14954 PF15103 G0-G1_switch_2 G0/G1 switch protein 2 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P27469 Family This family of proteins regulate apoptosis by binding to Bcl-2 and preventing the formation of the anti-apoptotic BAX-BCL2 heterodimers [1]. 27.00 27.00 66.30 66.20 21.80 21.30 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.49 0.72 -10.47 0.72 -3.79 8 46 2012-06-28 12:22:18 2012-06-28 13:22:18 1 2 34 0 23 43 0 103.30 54 90.29 NEW METlpELIPhAKEMhuQKPSRKhVKLYlLGSVLAFhGsVlGLVETVCSPFTuupcL.DpEtA..lAEhcsAhERptlppp...........slhEcsKpppss.psRulSpRpHA.S METlpELIPhAKEMhuQKPstKMVKLYVLGSVLAhFGVVlGLVETVCSPFoutupL.DpEtt...lsEhcsA..htRpthppp......................hlhEpuKppsss.tpRulSpRpHAS.............. 0 1 3 9 +14955 PF15104 DUF4558 Domain of unknown function (DUF4558) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VU69 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 78 and 121 amino acids in length. One member is annotated as being a flagellar associated protein. 27.00 27.00 27.40 27.00 26.20 26.80 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -9.86 0.72 -4.06 11 43 2012-06-28 12:22:34 2012-06-28 13:22:34 1 2 40 0 28 44 0 85.30 39 79.28 NEW pchsptcchspc.cpssphs.WpEphppp....ssht.sspph...pcphcpElplAN+plhtlRpAtL+cLaEcEtppaEQELstpGhAhYccRt ..................pchhthcKhlp+hcssppcshWp.sLspR......cssh.Ahl+h...pcshspELsLAsKpLLhVRQAtL+pLFE+EappYpQELsph.GKAFYhER.... 0 10 15 16 +14956 PF15105 TMEM61 TMEM61 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N0U2 Family The function of this family of transmembrane proteins has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 150 and 211 amino acids in length. 27.00 27.00 27.80 41.90 25.60 23.80 hmmbuild -o /dev/null HMM SEED 198 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.28 0.71 -4.78 3 25 2012-06-28 12:22:48 2012-06-28 13:22:48 1 2 21 0 12 21 0 172.70 63 95.11 NEW ASTLRYCMTVSGTVVLVAGTLCFAWWSEGDAusQPGQLAPPTEaPlPEuPsPLLRSVSFlCCGAGGLLLLlGLLWSIKsST+GPPRtDPYHLSRDLYYLTVESSEKESYRsPKVsAIPTYEEAVsCPLAEGPPTPPAQPsEEuLEs+AScDALLGoQss.PPPSYESIlLAtsAVSGpTs..PSPGpSCsGLlQsARGGs .ASTLRYCMTVSGTVlLVAGTLCFAWWSEGDA......usQPGQhA...P....P.TtaPlPEuPusLLRSVSFhCCGAGGLLLLhGLLWSlKASopGPPRWD.YHLSRDLYYLTVEoSEKESCRoPKlssIPTYEEAVsh..PlA..EGP.PTPPAhPhEEsLcsSA.....stDALL...........uTQsshPPPSYESI.hAhsulSuETs...usstoCsG.sphstGG....... 0 1 1 4 +14957 PF15106 TMEM156 TMEM156 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N614 Family The function of this family of transmembrane proteins, TMEM 156, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins are found in eukaryotes. Proteins in this family are approximately 310 amino acids in length. In humans, the gene encoding this protein is located in the chromosomal position, 4p14. 27.00 27.00 61.40 61.10 18.90 18.90 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.47 0.70 -5.12 6 34 2012-06-28 12:23:08 2012-06-28 13:23:08 1 1 28 0 19 29 0 213.80 56 78.32 NEW EVCLQsNFTaSLsSlNFSFVTFLQPl+ETQTI.MGIFLNHSNFQNFTcICQsITuEhKhCSsCLsCESKGshDFISQEQTS...KVLlMRGShEVKusDFHSPCQHFNFTsAPhsDcLEEYNhTCpLKTHssRSsIhEE-Ps+EpSlN+TC..RhMEp.NNChpISL+LEMDVKNssCSMKITWYlLVLLVFIhhlIllI+KILEuHRRVQKWQSHKYKPTSsLLRGuDSEK ..EVCLpsNhTYSLsSlNaSFVTFLQPlRETQsI.htIFLNHSNFQNFTRICQsITuEhKhCSpCLlCESKGshDFISQEQTS.................KVLIh+GShEVKAsDFpSPCpHFNFTVAPhVDcLEEYNsTCpLKsHTt+SslhE--Ps+cpSlN+TC..RhMEh.NsCh+ISLHLEMDlKNhoCSMKITWYlLVLLVFlFLlILhI+KILEuHRRlQKWQ..S..H+.+sTS.sLLRGpDSEK.................. 0 1 1 3 +14958 PF15107 FAM216B FAM216B protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N7L0 Family The function of this family of proteins, FAM216B, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins are found in eukaryotes. Proteins in this family are approximately 150 amino acids in length. In humans, the gene encoding this protein is located in the position, C13orf30. 27.00 27.00 35.30 35.70 25.80 24.70 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.61 0.71 -10.51 0.71 -4.20 6 56 2012-06-28 12:23:32 2012-06-28 13:23:32 1 2 26 0 25 61 0 106.40 50 55.52 NEW Mspchp+.QcLhNlP+IPpI+VPpShsDTSLLK..DLTQGQpRYhYSIMRIYNSRPQWcALQsRYlHSLQHQ.QhLGYITQ+EAhusAhVLRcST+RASApsuP....Rol.pRssuho ..........hsppht+.QcLWpsPQ..pI+lPpShh..DsShhK..DLTpGQpRYhhSIh+IYNup..hphhpppYhHsLQHp.Q+...G.........hlTp+cuhhs.......o+huptphhP....Rp..c+psuh....................... 0 2 2 3 +14959 PF15108 TMEM37 Voltage-dependent calcium channel gamma-like subunit protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8WXS4 Family This family of transmembrane proteins, TMEM37, has a role in stabilising the calcium channel in an inactivated (closed) state. It is a subunit of the L-type calcium channels. This family of proteins are found in eukaryotes. Proteins in this family are approximately 210 amino acids in length. 27.00 27.00 123.00 122.80 25.00 24.60 hmmbuild -o /dev/null HMM SEED 184 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.32 0.71 -11.36 0.71 -4.59 6 38 2012-06-28 12:23:58 2012-06-28 13:23:58 1 1 31 0 23 39 0 179.50 68 93.16 NEW QAQRLLupRRPp+uFFESFIRuLIILCsuLAVVLSSISICDGHWLLAcD+LFGLWaFCTsSNp...........ousHClRDLSpApVPGLAsGMlLARShuoLAVVsAIFGLELLhVSQVCEDhcSR+KWAhGSsLLLlSFlLSuGGLLSFlILL+splTLhGFTLMFWCEFTASFLFFLNuISGL......HINSlTpP ..........QAQR.LuQR+PpRSFFESFIRoLIIlCsALAVVLSSlSICDGHWLLA.ED+LFGLWHFCTsoNp...........osspChRDLupApVPGLAVGMuLsRSlGALAVVAAIFGLELLMVSQVCEDtHSRRKWuhGSlLLLlSFlLSSGGLLoFVILL+NQVTLlGFTLMFWCEFTASFLFFLNAISGLHINSITpP....... 0 1 3 7 +14960 PF15109 TMEM125 TMEM125 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q96AQ2 Family The function of this family of transmembrane proteins, TMEM125, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 55 and 232 amino acids in length. 27.00 27.00 30.80 63.40 22.50 20.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.54 0.71 -3.96 6 45 2012-06-28 12:24:17 2012-06-28 13:24:17 1 1 33 0 25 36 0 109.40 73 53.85 NEW slLEEQVELWWFp-P++SLLCYssuVsLILuCGhGGVGLLSTToShSGE...WRLusGTsLCLLALsVLLKQLLSSAVQDMNClRsR+RI-hLKSGGhuDsLllLloGLsLLlCG .slLsEQVELWWSQpPRRShLCFsVAVuLVsGCGAGGVuLLSoTSSRSGE...WRLAsGTsLCLLALLVLlK.QLhSSAVQDMNCIRpt+HVuLLRSGGGADsLVVLlSGLVLLVsG 0 1 4 11 +14961 PF15110 TMEM141 TMEM141 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q96I45 Family The function of this family of transmembrane proteins, TMEM141, has not, as yet, been determined. Members of this family remain uncharacterised. TMEM141 protein family is found in eukaryotes. Proteins in this family are typically between 103 and 124 amino acids in length. There are two completely conserved residues (C and W) that may be functionally important. 27.00 27.00 27.00 34.80 20.00 26.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -10.16 0.72 -3.70 12 53 2012-06-28 12:24:45 2012-06-28 13:24:45 1 1 44 1 29 54 0 88.90 47 74.94 NEW ssI..p+lc-t.tsKHPGhspYhs......C.o+AhhpGluTFsLGhuusahlQ+hlp++lPYPhpaNlLVSslsuoVsSYpVTphcTppCpshWhhhEstp ............l..p+lcDt.tsKHPGhtpYhs......C.S+AhhcGlhTFlh.GouusFhlQhhlp+KhPYPhQWslLVusls..uSluSYtVTpsEop+CsslWlaLEsG.p.......... 0 7 10 17 +14962 PF15111 TMEM101 TMEM101 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q96IK0 Family The function of this family of transmembrane proteins, TMEM101, has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 127 and 257 amino acids in length. 27.00 27.00 37.30 30.80 21.40 21.10 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.54 0.70 -5.08 7 47 2012-06-28 12:25:10 2012-06-28 13:25:10 1 1 42 0 29 47 0 235.60 71 95.47 NEW ssR+phLphhsplGsFllTRaPFWpsFshLMhaAERA-s++hP.....DltlP...al.YlDLusAVlCASFMSFslKRRWFAlusAlQLs.ISshhuhhutpshYu-WLKVRhaSRslAlIGGaLhluSGsGEhYRp.KPRoRSLQpTGplFLGlYLIC.AYsL.aS+EDRhAaLpHIPGGp.hl.lhhVlaslLuLuaLSGa.hphhuplLAllLshshLllDGslsYWa+o+.+VEFWsQh+LlucNluIFGAllIL..AsDu .......u..R+hhLpLlhQlGuhLLTRhPFWsCFS.LMLaAERA-u+RKP.....DIPVP...YL.YF.DMGAAVLCASFMSFGVKRR.W.FALGAALQLA.lSTYAAYlGGaVHYGDWLKVRMYSRTlAIIGGFLVLAS..GAGELYR+.KPRSRSLQSTGQVFLGIYLICVAYSLQHSKEDRLAYLNHLPGGELMlQLaFVLYGVLALAFLSGYYVsLAAQILAVLLPsVhLLIDGNluYWHsTR.RVEFWNQMKLlGEsVGIFGAAVIL..ATDG......................... 0 7 9 13 +14963 PF15112 DUF4559 Domain of unknown function (DUF4559) Bateman A agb Jackhmmer:Q8TB03 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein CXorf38. 27.00 27.00 65.20 27.60 25.40 23.40 hmmbuild -o /dev/null HMM SEED 307 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.60 0.70 -11.92 0.70 -5.26 8 62 2012-06-28 12:25:15 2012-06-28 13:25:15 1 7 37 0 32 49 0 241.90 51 64.70 NEW RhNstsYKNWLKsGpuLLlLRsuLQsFlt+EscsaHpuLpsKlss..ssppCp....Css+uRp.pstCplCc.W+cEIlpaHsscsucIHWsNCcPstW...ss-hWEVAKAFMPRGp.sD+sGPEcsDsSALLNFlstCcHFph...chcpVpcVIcVRNclMHSsDLKhSspshpcahsKIpphlp...hsslPslptstccIpplpss-apltssptspcDGsclpT-sh.shpclL-lEpctLcD+lpcLhsphEpsps.s.Echhpslpslh-FLpsNpDLppsLpscl.......sKLpplps+lpKh-tpls-l+sphsQL ..................RLNsspYKNWlKAGpCLLlLRssLpsFlspEshsFHptLlsthss......spssCpt..tCsP+u+phpspCpl...CtpW+pEILpHHhscsus...lpW.uNCcPshW...ss-...WEVA......KAaMPR..Gh..sc+pu..s-pCDAsALLshlstCc+Fhs...DtcpVpc.VIcsRNc.lMHSu-hKVSstWhp-at.+IpsFLs..pFpplP-lssshp+IEplLssDaslahsttDphD.....Ghch-htshls.pp....lp..-lEhphL+E+LpEhhhphccpphhs..Ec..pplpsltpFLcpNcDLppsLpt-h.......pKLpsh..p.phtp...............t................................................... 0 6 9 16 +14964 PF15113 TMEM117 TMEM117 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H0C3 Family The function of this family of transmembrane proteins has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 181 and 504 amino acids in length. 27.00 27.00 32.60 32.60 19.00 19.00 hmmbuild -o /dev/null HMM SEED 415 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.54 0.70 -5.88 4 63 2012-06-28 12:25:37 2012-06-28 13:25:37 1 2 39 0 38 51 0 317.70 70 79.75 NEW t+DFRYYFQHPauRLhVAYLVlFhNFLlFAEDPVSHSpTEAplIVVGNsFSFlssKYP.GhGWplLKVlhWLLAllhGLlsGKFlhH+hLFGpLLRLKMFREDpGSWMhMFhoTllFLFIFSpIYNhhLLhAGs..tsahIoDhMGIRNpSFMKhAAlGTWMGDFVTAWMVTDMMLQDp.YPsWA+usRtFW+p.GpsRIILFWoVLloLToVVVhVIoTDaISWDhLNRGFLPosElSRAFLASFILVhDLLIVMQDW-FPHFMGDLDlpLPGhpTsHh+h+lPhppplaK-EapIHITGKWFNYGIIhLVlILDLNMWKNQIFY+Pa-YGQYlGPssKIaTVc-.-oLts..hNRTpLTa-WRuNphsPcTNcoYlEcDhahHSRYlGhoLsVKsLAFlPSLhAFVhFGhhIWhaGR ..........................s..cFRYYFQHPWSRhllAYLVhFhNFLIFAEDPlSHSQTEAphhVVGNCFS..FlhsKYP.uhGWthLKVlhWl....LAIlhGLlAGKFlFHp...pLFGph....lRLKMFpED.cGSWMsMFhSsllhLFhFSplYNhhLlhtGs.htsahlsphMGIcNpsFMK..hAAlGTWhGDFlTAW.M.VTDMMLQDp.......YPDWG+uAR.....tFWK+..Gp.RIhLFWoVLhoLTSVVVLVIoTDWISWD+L.......NRG.....FL...PSDEVSRAFLASFILVFDLLIVMQ.DWEFPHFMGDlDl.NLPGLpTsHhpF+lPhhp+IFKEEY+IHITGKWFNYGIIFLVLILDLNMWK.NQIFYKPaEYGQYlGPGp.KIYTVcDsEoL+D..hNRTpLSWEWRSNpTNPpTN+TYsEuDMFLHSRahGuSLDVKCLAFlPSLlAFVhFGFFIWFFGR................. 0 8 12 19 +14965 PF15114 UPF0640 Uncharacterised protein family UPF0640 Bateman A agb Jackhmmer:Q8WVI0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 70 and 80 amino acids in length. There are two conserved sequence motifs: PGK and YRFLP. 27.00 27.00 66.50 66.40 26.60 25.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.37 0.72 -4.50 10 56 2012-06-28 12:25:59 2012-06-28 13:25:59 1 1 50 0 38 56 0 68.10 56 89.06 NEW Mhh.+..Spsl+plLcpWPGK+phGlYRFLPlFFsLGAuhEasMINhRlGcpsFYcsY+R+QAE+hhEp+hcp .................h..ptpl+RlLpphPGKpRFGlYRFLPhFFVLGushEWhMIplR.V.Gc.EoFYcsY+R+puEc.hpc+lcp...... 0 7 11 22 +14966 PF15115 HDNR Domain of unknown function with conserved HDNR motif Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VZQ5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 219 amino acids in length. There is a conserved HDNR sequence motif. The function is not known. 27.00 27.00 35.40 35.30 20.10 19.30 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.34 0.71 -4.34 6 73 2012-06-28 12:27:26 2012-06-28 13:27:26 1 2 36 0 47 64 0 150.10 36 84.54 NEW Ms+uhh..ssssctcGpWFsphh........cuHhsoph+psas..apc...pscspsPshFtpRpKpssspph........FShHDNRHoFpspGs...YFssGLGK++hs.sp.....p+t+.SpNhhpWss..hsuspDs..hSoYpsuaht.......cpss.ssthRphPRhhscp.puuphtssppsspa..hsppPcs.........chslstpsp .......................shsGpW.asphh.........puhh..puphKp.psschpp...pscsp.PslathRpKpsssppF.........FShHDNRaSapssGh...Y.L.spGlG.RKpl........p+.t.p+sF.hWAss...l..p.p.t...Ss.Qhsah.......................................................................s................................................. 0 11 12 17 +14967 PF15116 CD52 CAMPATH-1 antigen Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P31358 Family \N 27.00 27.00 40.70 40.10 19.80 17.90 hmmbuild -o /dev/null HMM SEED 44 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.91 0.72 -8.08 0.72 -4.15 4 22 2012-06-28 12:27:31 2012-06-28 13:27:31 1 1 17 0 8 20 0 44.90 48 65.52 NEW LGpsTTut.sop...........+SussAhusLuGGShLFFlANTLIpLFYLS ................GpssTot.........................pSussA.SsluGGsFLFFlANslIpLFhhS 0 1 1 1 +14968 PF15117 UPF0697 Uncharacterised protein family UPF0697 Eberhardt RY, Coggill P, Hetherington K, Bateman A agb Jackhmmer:Q96E16 Family This family of uncharacterised proteins is found in vertebrates. Proteins in this family are typically around 100 amino acids in length. 27.00 27.00 61.40 61.30 19.90 19.60 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.29 0.72 -4.23 4 50 2012-06-28 12:28:25 2012-06-28 13:28:25 1 1 39 0 30 41 0 93.30 77 93.22 NEW uDsGoIDYSVHEAWNEATNVYLlVILVShuLhMYAR+NKRKIMRIFTlPPTAEossEsNFYDohpKIRLRQQLEMYSIARKa-.Q..Q...tQsDSVQLSlE ....uDDGSIDYoVHEAWNEATNVYLlVILVSFGLFMYAKRNKR+IMRIFSVPPTtETLS...EPNFYDTlSKIRLRQQLEMYSISRKYDhQ.QPQ...sQuDSVQLSlE.......... 0 2 4 12 +14969 PF15118 DUF4560 Domain of unknown function (DUF4560) Bateman A agb Jackhmmer:Q96HG1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 66 and 78 amino acids in length. There are two conserved sequence motifs: FCK and RTL. 27.00 27.00 74.90 74.70 22.60 22.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.61 0.72 -8.99 0.72 -4.17 5 46 2012-06-28 12:30:52 2012-06-28 13:30:52 1 1 23 0 30 64 0 62.80 76 75.50 NEW AsALSGLAVRLuRoAAsRuSYGVFCKGLTRTLLsFFDLAWRLRMNFPYFYIlASVILNVRLQV+I ........AAALSGLAVRLSRSAAsRuSYGsFCKGLTRTLLsFFDLAWRLRhNFPYFYIlASVMLNVRLQVhI.. 0 1 1 4 +14970 PF15119 APOC4 Apolipoprotein C4 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P55056 Family \N 27.00 27.00 58.70 58.60 24.60 19.40 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.27 0.72 -4.02 6 27 2012-06-28 12:31:28 2012-06-28 13:31:28 1 1 21 0 14 24 0 92.60 62 78.88 NEW CQ.t.sEto.SPsPtPcpS+.WSLVPu+VKEhVpPLVTRTRE+WQWFW..GPuAFQGFhQTYYDDHL+DLGsRT+AWLpSSKDsLLNKAHSLCPRLlCGD+D .....................................h.pto.SPsPt.c.Sp.WSLVpu+hKEhlEslVsRTR-pWQWFW..uPusFpGFhQTYY-DHL+DLGPRT+AWLhpSKDSLLNKTHSLCPRLlCGD+D.. 0 1 1 2 +14971 PF15120 DUF4561 Domain of unknown function (DUF4561) Bateman A agb Jackhmmer:Q96E40 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 34.30 33.30 24.10 21.00 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -10.90 0.71 -4.79 6 65 2012-06-28 12:32:32 2012-06-28 13:32:32 1 3 39 0 34 54 0 160.90 60 76.91 NEW MsEl+cpL+hlpQ+YKLFpQQQFTFlsALERsREsAaD+hcPVuoIsQVQpYh-HaCsNuTDRRILhLFLDICsDLsshpp+lEsLp......SssssosphL-pC+sLls.SNDlSslRA+YPHDVVNHLSCDEARNaYGGVVSLIPllLDhlpEhht..psc+Lt.tpp.ssstppp ................MNEVKEsLRslEQ+YKLFQQQQFTFIAALEHCRENA..HD...KIRP.ISSIuQVQsYMEH....aC.....NNSTDR.....RILlMFLDICoELs+LCQ+FEuLH................SGTPVTNsLLEKCKoLVSpSNDLSoLRA...KYPHDVVNHLSCD........EARNHYGGVVSLIPllLDlhKEWIA..+oEKLPp...........th...t......... 0 8 10 18 +14972 PF15121 TMEM71 TMEM71 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6P5X7 Family The function of this family, TMEM71, is not known, however it is predicted to be a transmembrane protein. This family of proteins is found in eukaryotes and located in the cell membrane. Proteins in this family vary between 41 and 291 amino acids in length. 27.00 27.00 27.00 39.90 26.90 26.10 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.98 0.71 -4.47 10 47 2012-06-28 12:32:51 2012-06-28 13:32:51 1 1 32 0 21 39 0 131.30 62 55.16 NEW MY..phStlhSTPVussopp-tththp.SPsslhsSasCD.LD......GDSSFECsSlDPLTGShasCRRSPRLLTNGYYlWTEDSFLCDcDGNITLoPSQTSVhYKENLVRIFR+K+Rh+RSLuSLhS..hpAScSWL+GSIFscV................sSsPSEDsWLEGsRp ............MYRlopLMSTPlASpst....pt..tphosp.plhsS.FsCD.LD......GDpSFECsSlDPLTGSaasCRRSPRLLTNGYYlWTEDSFLCDcDGNITLsPSQTSVhYKENLVRIFR++++hp+shuSLFs..lssScSWL+uoIFscl................cS.s.sEDsWL-Gh+p...... 0 1 3 6 +14973 PF15122 TMEM206 TMEM206 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H813 Family The function of this family of transmembrane proteins, TMEM206, has not, as yet, been determined. Members of this family are remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are approximately 350 amino acids in length. 27.00 27.00 149.50 149.10 19.20 16.40 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -11.82 0.70 -5.51 3 51 2012-06-28 12:34:56 2012-06-28 13:34:56 1 2 39 0 31 44 0 287.80 75 84.57 NEW PIRFSKACLKNVFSVLLILIYLLLMAVAVFLVYQTITDFRDKLKHPVMSVSYKEV-cYDAPGIALYPGKA+LLSCcHHaYDsIPPLsuPGQPG-RsCsTQ-IsYpcPYoN+TMK+ALIVQGPpDVR+RELVFLQF+LNETcEDFSAIDYLLFSSFc-FLcSsDKAuFMQDCESuYSSWKFSGGFRTWVKMSLVKTKEEDGpESVEFRQETSVVNYID+RPssE+osQLFFVVFEWKDPFIQcVQDIITANPWNTIALLCGVFLALFKAADFAKLSVKWMIKIRKRHLK+RuREhNHIS ..........SIRFSKACLKNVFSVLLIFIYLLLMAVAVFLVYQTITDFREKLKHPVMSVSYKEV.DRYDAPGIALYPGQAQLLSCKHH.Y-VIPPL..suPGQPGD....hsCTTQRINYTDPFSNQThKoALIVQGPpEVKKRELVFLQFRLNposEDFSAIDYLLFSSFQ..EFLp...S.........P-+sGFMQuCESAYSSWKFSGGFRTWVKMSL.VKTKEEDGREAVEFRQETSVVNYIDQRPAAE+SsQLFFVVFEWKDPFIQKVQDIITANPWNTIALLCGAFLALFKAAEFAKLSVKWMIKIRKRaLK+RuQAsNHIS......... 0 2 4 12 +14974 PF15123 DUF4562 Domain of unknown function (DUF4562) Bateman A agb Jackhmmer:Q96LM5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved HRYQNPW sequence motif. This family includes the human protein C4orf45. 27.00 27.00 41.50 41.10 18.10 17.30 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.46 0.71 -4.21 9 43 2012-06-28 12:35:21 2012-06-28 13:35:21 1 1 30 0 24 42 0 109.70 47 59.28 NEW lFTGPDhl+Dahs+lhpassYIG.tp.u.EtTuDLpYLWRPAsspshPhthKpchlGEIGWGIs.ashhs+p+Lpo....GhpIKhGEhppAs.D+hTHRYQNPW.Pt...P.hl-tpsthuR ....lFTGPDYl+DahPKlppaTsYlGEpp.uLEpTuDLcYLWRPAsspShPschKacYVGEIGWGlPpasalN+oRLpo....GFpIKhsEhppAu.DphoHRYQNPWQPpPplhDhpst.S............................. 0 10 11 13 +14975 PF15124 DUF4563 Domain of unknown function (DUF4563) Bateman A agb Jackhmmer:Q96PS1 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C3orf24. 27.00 27.00 120.30 27.30 19.00 18.70 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.23 0.71 -4.70 3 35 2012-06-28 12:37:45 2012-06-28 13:37:45 1 2 29 0 24 35 0 167.80 73 99.93 NEW MAsYQLWSPWSPLDENLQWLRHTTPThuSKHPFRuSPsFPaTPuDVEVQtCFHEVslVpDpPhlcAG+SPcLPsHshEP+..ThsNhpupIRKPQPIRLlGVDSVFGRVITuQPPKWTGTFRVS-KSAFSKIIS+EpQWPpGLKEPQIEMTlsMCKQMLRSILLLYAIYKKCTFALQHSK ......MAGYQLWSPWTPLDESFQWLRHTTP...TPSS.KHPF..RASPCFPHTPSDLEVQLCFQEVTLVLDSPhLEsGhSP..KLPC........HTSELR..ThsspKGLVRKPQPVRLSGVDSVFGRVITAQPPKWTGTFRVSDKSAFCKIISREHQWPTGLKEPQIQMTVTMCKQMLRSILLLYATYKKCTFALQHSK................... 0 1 2 10 +14976 PF15125 TMEM238 TMEM238 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:C9JI98 Family The function of this family of transmembrane proteins, TMEM238; has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 61 and 153 amino acids in length. 27.00 27.00 29.80 29.30 26.10 25.50 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.73 0.72 -9.25 0.72 -4.07 9 40 2012-06-28 12:38:05 2012-06-28 13:38:05 1 1 24 0 26 43 0 67.20 55 48.09 NEW GRCphhhh.lAVlhDslGlslLLlGlFAsL.....sFaDhLlYoGALllhLSLlhWlhWYoGNIE..ls.cEL ....GRC+hhLh.LAVshDlsGhssLLsGVFApL.....sFhDhLlYoGALllFLSLLhWIhWYTGNIE..lohpEL..... 0 2 6 13 +14978 PF15127 DUF4565 Protein of unknown function (DUF4565) Bateman A agb Jackhmmer:Q9BSF0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C2orf88. 27.00 27.00 35.00 35.00 22.80 18.00 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.07 0.72 -3.65 5 31 2012-06-28 12:40:36 2012-06-28 13:40:36 1 1 29 0 18 24 0 91.00 58 99.30 NEW MGCMKSKcphPhssThcu-K.....s+cuEEAa..tchh..........lsssEEs+cPsuPp..huPVLLEYAcRLSEEIVs+AVQQWAElDp+YuDIPYIESDuP MGCMKSKQTFPFPTshEuEK.....pH-SEEsF.MPEE+hL.+....st.lslpEElKcP..suss....sVlLEYAHRLSQEILsDALQQWAssNlKYtDIPYIESEuP. 0 1 2 4 +14979 PF15128 T_cell_tran_alt T-cell leukemia translocation-altered Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P57738 Family This family of proteins is required for osteoclastogenesis [1]. 27.00 27.00 43.00 32.50 20.20 24.70 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.41 0.72 -10.26 0.72 -4.54 4 48 2012-06-28 12:40:48 2012-06-28 13:40:48 1 2 35 0 24 42 0 84.70 61 85.78 NEW opllsuh.uFhSEFlc-W.usDMRVoIFKlLLuWLVlSLlAIphAW+sYGNTVNshYYRQGhuGQNGGTPDsss+hsuWEpuus-sLKTHpE .......splLsuLsuhuSEFlcEWEApDMRVoLFKLLLhWLVLSLLuIQLAWthYGsTVTGLYaRs................GhG.G..QNGuTPDsusHFsuWE.susEshKTHRE..................... 0 2 4 9 +14980 PF15129 FAM150 FAM150 family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6UX46 Family This family of proteins known as FAM150 is found in eukaryotes. Members of this family are as yet uncharacterised. Proteins in this family are approximately 143 amino acids in length. The function of this family has not, as yet, been determined, however it is predicted to be a secretory protein family. 27.00 27.00 50.70 30.50 18.50 23.00 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.81 0.71 -4.24 6 65 2012-06-28 12:42:58 2012-06-28 13:42:58 1 1 30 0 34 58 0 92.10 66 77.03 NEW -cpoLLcLIh-llp-hpppcpspspp....l..hupcspsss+ccsttlsshssEphlEIhPRDLpMKDKFlcHLT.GPLYFSPKCpKcFHRLYHNTRDCTIPAYYKRCARLLTRLAsSPhChE .............................................................................................................................t.ppps..EIhPRDLphKDKFlKHLT..GPLaF.SPKCSKHFHRLYHNTRDCTlPAYYKRCARLLTRLAVSPhChp........ 0 2 7 16 +14981 PF15130 DUF4566 Domain of unknown function (DUF4566) Bateman A agb Jackhmmer:Q9GZU0 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein C6orf62. 27.00 27.00 177.20 61.80 18.70 18.30 hmmbuild -o /dev/null HMM SEED 241 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.68 0.70 -5.20 2 54 2012-06-28 12:43:17 2012-06-28 13:43:17 1 3 40 0 33 38 0 209.50 86 92.76 NEW M.........uDP.oRKppslsRLRtpLR+K+ESLADpFDFKMYluFVFKDKKKpsALFEVs-VIPVMTNNYc-sIh+GV+-puYSLESS.ELLpKDVVQLHAP+YpsMR+DlIGCsQ.hDFhLWPRpDI-KIVChLFSRWKts.DtsaRPVQscFEFcHhDYEKQhLHlLuR+DpTGlllNNPoQShFLFlDRppLpTPpNKAshFKLsSlCLal.Q-QLhHWssGoI-chLc.YMP..p ................MGDPNSRKKQALNRLRAQLRKKKESLADQFDFKMYIAFVFKEKKKKSALFEVSEVIPVMTNNYEENILKGVRDSSYSLESSlELLQKDVVQLHAPRYQSMRRDVIG.CT...QEMDFILWPRNDIEKIVCLLFSRWKtS.DEPFRPVQAKFEFHHGDYEKQFLHVLSRKDKTGIVVNNPNQSVFLFIDRQHLQTPKNKATIFKLCSICLYLPQEQLTHWAVGTIEDHLRPYMP...E................. 0 3 5 15 +14982 PF15131 DUF4567 Domain of unknown function (DUF4567) Bateman A agb Jackhmmer:Q9BTX9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in some mammals. 27.00 27.00 37.50 115.50 19.90 19.50 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.22 0.72 -9.42 0.72 -3.84 4 11 2012-06-28 12:52:58 2012-06-28 13:52:58 1 1 11 0 6 2 0 75.90 76 84.43 NEW pRLDEs.AsLRLQHaLQLtEGLAVPLPPLVVpuPAAHaVAGGuLuDFTLDIALGARRItLAhVRQVAQDGPVAFLA .RRLDEsPAsLRLQHHLQLREGLAVPLPPLVlQSPAAHHVAGGShuDFTLDIALGARRVRLALVRQVsQDGPVAFLA 0 1 1 1 +14983 PF15132 DUF4568 Domain of unknown function (DUF4568) Bateman A agb Jackhmmer:Q9H693 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 51.10 51.00 21.10 19.40 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.80 0.70 -11.71 0.70 -5.13 4 25 2012-06-28 12:54:36 2012-06-28 13:54:36 1 1 20 0 13 23 0 132.50 44 81.24 NEW GsGh.psWha.......GRs+h+sLschChu+hss.VspTVHpEsICCECpsKFGGaLPVPRA-AsLPYWVPLSLRPpKQIQKMVRhYIPKooKAC.CPCHpFGGRLPMPRDQAVMPYWVPQVLRSpKKVVKRQQshcslPEsslDlRShYspWRICG-G+hLLKWQQLQALHQsc..PlAsGpPtSh.AsLLPlshSLLTLLQAlLRVllAIRpLFWs ...................................................................................slCCECQs+FGGRLPVsRsEAALPYWVPLSLRPRKQh.+h..hph.................................................................................................................................................................................... 0 1 1 2 +14984 PF15133 DUF4569 Domain of unknown function (DUF4569) Bateman A agb Jackhmmer:Q9HAI6 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes human protein CXorf21. 27.00 27.00 38.20 36.40 19.90 19.40 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -11.96 0.70 -5.03 8 57 2012-06-28 12:56:17 2012-06-28 13:56:17 1 1 36 0 37 43 0 279.10 45 97.54 NEW MLuEGaLopLsYpsphcts....hpopsppsspppthcpp.ssh...shushDcsphcslhsuscsstch.uuVHspts.ps.p....tphlpsspsPshpussSsulcIPccs.cps.thaLVPS.SC..cSICpNYsDLHIuGDpVhslsssuGsh..ssssshpcGPlLhSsDIP.uhpsphps.sp.h....h.pshSShW+ssps+E+.ShL.ppc..PlSNSlLNsYLEpKlhELYKQYlhEsss+suSs........splLuSELlMTNVDQISlQlSREpNlEToKA+DhlLssL....LplsSph.pSEISTPsLpISs.us ................MLuEuaL.slhYhpphahs....hsshsppsstcct.cpp.ht....ohSSs-csp.cshhhpspossch.oolpsptsppshp..pphsslpss..ssshpuss.sAlpIs+csp+-p...aLVPs.SC..cSIC+NYsDL+IAGspVhshsssossF...scsshp.GPLLpSs-IPLshEsulssp.s.h..hsh.pphSShW+hsSI+EK.S.L.hpp..PlSNulLNcYLEpKVsELYKQYlh-slh+suSs........TplLASELlMosVDQISLQlSpEpNlEsoKA+DhVlspL....LphsS....oElSTPsLpISp.o.......... 0 1 4 11 +14985 PF15134 DUF4570 Domain of unknown function (DUF4570) Bateman A agb Jackhmmer:Q9HBI5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 29.00 28.70 24.00 23.70 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.80 0.72 -10.43 0.72 -4.21 9 41 2012-06-28 12:59:01 2012-06-28 13:59:01 1 1 37 0 28 40 0 107.30 54 85.97 NEW MsS.hsp........El+Luc+HEEILupRthLLppMEs+hpspppc+ppphpsspsAppRNtpLLpDl-ssEcpLpsR...hscPslluLETpYWASVEc......lPtWEpaLLuRu.hPhu .............MoShasQ........El+LSKRHEEIlSQRLMLLQQMEs.+huDQpsE...KA.SQhQusEsAa+RNhoLLpDIEAAEKSL..QoRhaslPpP.ElVoLETpYWASVEEa.....lPKWEQFLLGRAsYPh.u..................... 0 5 7 11 +14986 PF15135 UPF0515 Uncharacterised protein UPF0515 Bateman A agb Jackhmmer:Q9NUL5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There are two conserved sequence motifs: PLT and HSC. 27.00 27.00 39.80 39.80 25.90 25.40 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.11 0.70 -5.33 4 53 2012-06-28 13:38:07 2012-06-28 14:38:07 1 2 34 0 27 52 0 227.20 68 88.44 NEW REKFHG+lu.csAssLMRRasssHptVut.lshhscscssl-hpsphcLpssP.uh.V..thht--.ptpt..t......................D+DIpslA.p+hshLPLTpcNl+MFscAptshIPu.s+QFACcuCDhhWWRRVPQRKcVSRC++C+K+aDPVP.ss+MWGluEFpCspC++pF+Ga.uphsspSPCYGCppsl....YPh+ILPPR..Rs....ss.+opNpHSChAE.CYpRhEPaVPGspCsHP+SRptNphPKVlaPS.hHISoGSTluTCLSQGSL.E.-lspLIL-Dl ..........REKFHGKV.osc+AssLMR+FuSDHTGVGRSIVYsVKQcDGQ-LSNsLDAQDPP...........EDhKQ................................DpDIQAVA...TSLLPLTcsNLRMFQRA.Q-DLIPAVDRQFACSSC.DHVWWR..RVPQRKE.VSRCRKCRKRY-PVP.sDKMWGlAEFHCPKCRHNFR....Ga.AQMGosSPCYGCGaPV....aPTRILP.PRhDR-....scRRSsHTHSCS.AtDCYNRREPHVPGTSCAHPKSR+QNHLPKVLHPSssHISSGSTVATCLSQGuLlE.DLDsLILEDL............................ 0 4 7 11 +14987 PF15136 UPF0449 Uncharacterised protein family UPF0449 Bateman A agb Jackhmmer:Q9UFG5 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. There is a conserved LPTRP sequence motif. 27.00 27.00 27.40 28.20 24.40 26.90 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.45 0.72 -10.27 0.72 -3.78 9 48 2012-06-28 13:41:22 2012-06-28 14:41:22 1 2 39 0 29 50 0 89.30 47 80.26 NEW uKKRsVLPTRPsPPoVEQILEDVcsA.usDPVFpsL....sss.s.pstss-uptEph.............Y.QSRpYlshNpRLppAtssLcp+p-pL+pAGEcLEp-lspV .......KKRVlLPTRPsPPTVEQILEDV+uAsscDPVFThL..........cssss....t...p...tcss..-uttEph..........................................YQQS+sYlshNpRLppAt..s.Lpp+p-tLptsGppLcppl.p................. 0 5 8 16 +14988 PF15137 DUF4571 Domain of unknown function (DUF4571) Bateman A agb Jackhmmer:Q9NYP8 Family This family of proteins is functionally uncharacterised. This family of proteins is found in vertebrate. This family includes human protein C21orf62. 27.00 27.00 122.70 122.30 25.50 24.90 hmmbuild -o /dev/null HMM SEED 219 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.52 0.70 -11.34 0.70 -4.91 6 34 2012-06-28 13:43:34 2012-06-28 14:43:34 1 1 27 0 20 29 0 213.10 65 94.93 NEW MAPPSuHsLLLhuALGlFALssFTcGQ+NSTLIFTKENTIRNCSCSADIRDCDYSLANLMCSCKTVLPLAlEpTSYsG+LTIWFTDTSALGhLLNFTLVpDLKLSLCGTNTLPTEYLAICGLKRLRlsTEAKHPSsEQSLLIHsGGEucsREKsh.La+GWQTChYISFLDMALFNR-SuLKSYSIENluSlAssFPsFSYFcTFPl.oNKSYVVTFIY ........Ms.P.tasLLLhusLGlFALssFT+GQKNSTLIFTKE.NTIRNCSCSADI..RDCDYSLANLMCoCKTVLPhAl....E+TSYs..G+LTIWFTDTSsLGpLLNFTLVQDLKLSLCuTNTLPTEYLAICGL+RLRIssEA.KpsaPEQSLLIHSuu-o-sc-cshhLpKGWQsChYISFLDhALFNR-SuLKSYSIEN..VoSIANsFPsFSYF+oFPhsSNKSYVVTFIY..... 0 1 2 6 +14989 PF15138 Syncollin Syncollin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VAF6 Family This family has a role in zymogen granule exocytosis [1-2]. 27.00 27.00 27.90 27.60 26.40 26.00 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.59 0.71 -4.17 6 27 2012-06-28 13:46:15 2012-06-28 14:46:15 1 1 23 0 17 28 2 110.00 61 83.22 NEW ACPsPA-LKs-sGo+hCARhaE+SssYYDpsCGGu.LsVcPGsDhPYhPSsWsNpISSLVVupRCpLTVWSppGKpGssRKFSAGoh.+LcEY++GlFGsWscSIuuhYCpC ........ACPssA.DLKcsDGTRsCA+LY-KSDPYY-NCCsGApLSlEPGsDLPYLPSsWsNssSSLVVusRCELTVWSppGKuGKT+KFSAGoYPRLEEYR+GIFGsWuNuISulYC+C.. 0 2 5 9 +14990 PF15139 DUF4572 Domain of unknown function (DUF4572) Bateman A agb Jackhmmer:Q5VTT2 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between 160 and 220 amino acids in length. 27.00 27.00 36.50 35.80 25.50 24.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.43 0.71 -4.48 10 47 2012-06-28 13:47:15 2012-06-28 14:47:15 1 3 40 0 31 48 0 169.10 43 81.23 NEW ERKGSLsL+Ss+hcYSsssLlpsWHpsREAcPKDYDlcshsstKpNLHpSTY+RlG..oDcsshslSpT+-thuQ.Vhlpc-apchh....++uhlchpo...hspshlE+Ds-tssouhcslhhc......psschcp.cYcTThppDYpsPYPYp....PssssP...sshslspRKh+spFsDL-up+RhGhspWpD-s.....tlht.pts+pclYcsp ........ERKGSLhLRSp+hpYSpssLs.sWHpsREA.PKsYDlcs.s...ts+pLppSTYpRlG..TDcsshhhSET+-phuQ.lhLpp-atchc....p+sLLs.-T...hspullERs.suhPtoGFGulhsR......HPP-hpKhph.TThppDYssPYsY......s.s.P...tsaSlsaRKCpSQFsDlss..+RhGhpoWpD-s.....tlhs.p.h+tpla...s........ 0 10 11 19 +14991 PF15140 DUF4573 Domain of unknown function (DUF4573) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6P6B1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically approximately 360 amino acids in length. 28.20 28.20 28.40 28.70 27.80 28.10 hmmbuild -o /dev/null HMM SEED 174 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.87 0.71 -11.32 0.71 -4.66 10 82 2012-06-28 13:48:36 2012-06-28 14:48:36 1 15 39 0 62 80 12 137.60 21 37.03 NEW AsDPstsTccTpPLcGlpEs-PPQPuGKDDs.sspppK+DlcAlTEspPLKGsAEsEPltstschpPLRssuEpDustAVcshEsPQsAuEMKPLpTAEpI.PLEuApEhpPpEAsGKscQsQlsEslPKEssSPEI..LEGSQ.lEsucppQLpEsLGcsEQsQsLEsVPKENto ....................................hps..s..t.ts.ps.tt.ps.t.s.tthcsspssopspPlpssspscPhts.sphpPlpsssphcshpslpss.csspsssp...hcPhpsspplpP....lpsspphpP.pshstsc.sp.hp.h..................................................................................................... 0 34 39 43 +14992 PF15141 DUF4574 Domain of unknown function (DUF4574) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6UW78 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 86 amino acids in length. 27.00 27.00 29.40 28.70 26.00 25.20 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.89 0.72 -9.81 0.72 -4.25 11 54 2012-06-28 13:51:19 2012-06-28 14:51:19 1 2 48 0 30 53 0 82.10 39 95.11 NEW MsulRplLhusullGhG.GlGYulaslloPGEERKpEMLKshPEusPhRh-EscKpstLlhpsLcEAApTsENlAR....hhGu.t.......ppp ...............MtuhRhhh.slsllGhG.GlGhuLhsLloPGEcpKp-hLKphPppsPtpp-Est+pptLhhtsLQEAAsTpENlsh......h.s......stt......... 0 4 7 13 +14993 PF15142 INCA1 INCA1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VD86 Family This family of proteins inhibits cyclin-dependent kinase activity [1-2]. 27.00 27.00 37.60 36.90 20.40 19.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.32 0.71 -4.48 6 29 2012-06-28 13:54:22 2012-06-28 14:54:22 1 2 21 0 12 36 0 149.10 70 64.37 NEW Phs+CSRVVSRSsPPuLPSQSLtLMPQ+YGDlFWENLSQRPoPTWhEEQYsPPLLRATGCSQPGLYPPEGLPPPEhLCRRKRRRPpLuGM.QQGsGuIPARVRAVTYHLEDLRRRQRIINELKKAQWGSSGAAsEPLsLsE-GCthPSTocY...DlEEERAsYPQEEs+hLTsGRsQLLW Phh+CSRVVSRSsPspLPSQshR.MPQpYGDlFWcNLSQRsSssWhEEQaIPPhL..RATsCSp.uLa.P.EtLPPPEhLhRRK+RRPpLttM.QQG.GulPARVRAVTYHLEDLRRRQpIINELKKAQWGSSGAAsEPlslsE-GCthsSTscY...DLEEERAsYPQEEsphLTPGRsQLLW.... 0 1 1 2 +14994 PF15143 DUF4575 Domain of unknown function (DUF4575) Bateman A pcc Jackhmmer:Q6ZSN1 Family This family of uncharacterised proteins is found in eukaryotes. 27.00 27.00 148.00 38.40 20.30 18.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.94 0.71 -4.42 4 8 2012-06-28 13:55:04 2012-06-28 14:55:04 1 1 1 0 2 14 0 73.10 36 95.59 NEW spGGpcuhLSlPuShGVPPssAMGVLRARGspGAGSQS.PRsGs..P..........pctlhLS.PpLhGsAS..CDGuPKuQGGKRuWhSV.sSR...........................GVPPPsAhGVLtARGGRG.AGSpSLPRGsshP.hch .............................................................ttctlhlS.shL.......s.Gs.+spGsptsh.ph.sStGlPPPsAhGVLtARGtcG.s.spp.............. 0 2 2 2 +14995 PF15144 DUF4576 Domain of unknown function (DUF4576) Bateman A pcc Jackhmmer:Q6UWT4 Family This family of uncharacterised proteins is found in eukaryotes. 27.00 27.00 69.40 67.80 22.00 21.30 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -9.90 0.72 -4.36 5 24 2012-06-28 13:56:29 2012-06-28 14:56:29 1 1 19 0 13 22 0 83.00 70 99.75 NEW MAVSVLRLTlVLGLLlLILTCQADDKPp-...KPD-KPDDSGKNPEP-FPKFLNLLGSEIIENAVEFILRSMTRSTGFMEaD...DKQGEHSoK MAVSVLRLTlVLGLLlLILTCpADDpP.-.....+PD...cKPDDSuKsPcP-FPKFLsLLGTEIIENAVEFILRSMoRoouFhEht...sppsp+........... 0 1 1 1 +14996 PF15145 DUF4577 Domain of unknown function (DUF4577) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N8F7 Family The function of this family of proteins, has not, as yet, been determined. Members of this family are as yet uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically 128 amino acids in length. 27.00 27.00 125.50 70.50 23.40 22.70 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.01 0.71 -10.44 0.71 -4.23 3 33 2012-06-28 14:01:31 2012-06-28 15:01:31 1 2 26 0 19 25 0 118.50 71 98.86 NEW MTHsSQDAGSRGhPE-RKLYVVDSINDLNKLSLCPAGSQHLFPLQEKIPDsGTsPGNGuRGLFFMGLIlVLIVSLALVSFVIFLIVQTGNKMDDVSRRLTAEGKDIDDLKKINSMIVKRLNQLDAEQN ............MsHSSQDsGSpGl..pEDtKLYVVDSINDLN.KLNLCPAGSQHL..FPLE-K..lPshG...TN.SG...NG..S....+SLFFVGLlIVLIVSLALV.FVIFLIlQTGNKMDDVSRRLsAEGKDIDDLKKINsMIVKRLN...QLDuEQN.... 0 1 1 2 +14997 PF15146 FANCAA Fanconi anemia-associated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VG06 Family This family of proteins plays a role in the Fanconi anemia-associated DNA damage response [1]. 27.00 27.00 27.30 47.50 20.90 26.70 hmmbuild -o /dev/null HMM SEED 435 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.70 0.70 -12.57 0.70 -5.58 6 50 2012-06-28 14:04:41 2012-06-28 15:04:41 1 2 34 0 23 52 0 384.00 58 57.07 NEW supAGQ+IK-LLSuIGsVSERVS.LKKAVDQ+N+ALssLNpsMNVSsALLSuppGs+P........IuCTlosoWSpL.hpDsLhATClLENSSuFSL-pGWTLCIQVLsSspAL-h-SusSAhTYThPVDpLsPGs+REVTLPLusucsGsLDLPVTlSCsLaYSLREllGsuLsss-shcs...tp.s..lLP-Q-GlCLPLscpTVDMLQCLRFsuhssspstus..s.huPspDPVcTFL+ospssssp.uGsu.S.p.....spYhPPSsASI+VSuELL+uAL+.soaSulsLssATLpWLLAENAAssllpupsloSlpGhAPDGs-V+Lhl+EVuloDLsPAGPIQAlEIQVESSSLAshCRhHHAllpRlQsLVhEQAApGSusPDLRhQYLpQhpsNHEoLL+ElQoLRD+..LCsED-hS.psuTup+LLplY+pLRsPSLlLL ..............s.tpsGp+IK-LLSGIGslSERVShLKKAVDQRN+ALssLNEsMNVSCAL.....L...SS.tpGs+P........ISCTsoTsWSRL.hpDsLhATClLENuSsaSL-pGWTLCIQVLsSSpAL-hDussSAhTYTlPVDpLuPGs+REVTLPLGPuEsGsLDLPV.TVSCsLaYSLREVlGuAh.ssS-u.-ss.h.-csPsshLP-Q-GlCLPLScpTVDMLQCLRFPuLA.....ss.pspuP..u.huPspDPVsTFLcos+t...P..uups..uGPu.SLR..........AcaLPP..SVASI+VSAELLRAA..Lc.D........ucSG..l..........s.....LCCATL.pWLLAENAAlDll.......+A.....p.....uLSSlQGlAPDGs-V+LlV+...E.....VAhTD..LC....P..AGPIQAVEIQVESSSLAshCRsHHAVltRhQs.MVsEQ.AAQGSSsPDLRlQYL.RQIauNHEsLLREVQoLRDR..LCTED-uS.SsATAp+LLQVY+QLRpPSLlLL............. 0 2 4 10 +14998 PF15147 DUF4578 Domain of unknown function (DUF4578) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96A22 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 44 and 137 amino acids in length. 27.00 27.00 27.50 72.50 25.20 24.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.80 0.71 -3.76 4 27 2012-06-28 14:06:32 2012-06-28 15:06:32 1 2 21 0 13 26 0 116.80 67 94.04 NEW MGNRLCCGGSWSCPSTFQ+KKKhGSpsRhTLphQp......p.ht.NsoKsH-Tpu+TYEQVLpQPuSQcRS.puLpSEESsLHYADIQVhppspPRShpEVKHLpLENATEYATLRFPQATPRYDSKNGTLV .....MGNRlCCGGSWSCPSTFQ+KKKTGSQsR.pTL+..Q............QQlpQNusKG+-TpGHTYEpVLpQ.sSQcRS..GLhpE-SsLHYADIQVCS+spsR...EVKHlpLENATEYATLRFPQATPRYDSKNGTLV...... 0 1 1 2 +14999 PF15148 Apolipo_F Apolipoprotein F Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q13790 Family \N 27.00 27.00 27.30 31.90 21.40 26.80 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.55 0.71 -11.38 0.71 -5.01 8 40 2012-06-28 14:12:08 2012-06-28 15:12:08 1 3 28 0 20 37 0 180.80 47 61.13 NEW PloCQsLLspu..shsphAPLPcFLs.LALclsLEphGC.sEsa.LQLQLhchGGh-uTETLI+c......S++oscchulsslpuhLptLutpsssh+RscRSl.ssEsCcpEpc.sl+clAQL.......LPulshhhpLtTAlh.ATQpCoDcuhE+hc-suhcLshs..hp.AshshsspshlIupulpslh+ssVphlhpYaQ .....................................PhoCQ.Lh.pu....sphAPLPcaLssLALcssLEcsGC.s-shsLQLQLh+.GGlsATpsLIpaLpt..LppuppspcpsSlcALsSALQLLAp...Ep.uspRspRSl.ssccC-sEcEQsVHsllpL.......LPuVGTaYNLGTALYYAoQNCssKA+ERGpDGAIDLGYDLLMsMsGhoGGPhGlsIoAuLKPAl+uGVppLIpYY.......................................................... 0 1 3 4 +15000 PF15149 CATSPERB Cation channel sperm-associated protein subunit beta protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H7T0 Family The function of this family of transmembrane proteins, CATSPERB, has not, as yet, been determined. However, it is thought to play a role in sperm hyperactivation by associating with CATSPER1 [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 220 and 1107 amino acids in length. 27.00 27.00 182.50 52.80 25.30 21.70 hmmbuild -o /dev/null HMM SEED 541 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.89 0.70 -12.77 0.70 -6.31 5 43 2012-06-28 14:14:55 2012-06-28 15:14:55 1 3 29 0 29 39 1 462.60 52 59.02 NEW +FcsIHhGKVIHSSKTGpAYIRcVhpHcT.PKGFhoSVIAElIEPFGlEss+ESsCLsSsLpIsauGNlaYpLoLpSQsspupFcuTDlEKTVlIPGYSSFLITcIlDspTAlAlATMPpolssNLsFLpuSWFLYNF..GptsGRoWpItoKPCNYWl.QQDphDuhSLNlVKYIDlGNolsFphKVIPssKuhpThEIP....LloVlVGNPsLLEVKApGaFDss-NYhLsIaluuKshppGSTSLAlIlWEuSocCaVoTllPTlKSSCSYLRoMHHIPu+aIP.EDWISGVHKDSQGFNMIKTLPINYRPPSsMGIuIPLTDNFYHADPS+PIPRNtFHpSKcTGKYKQCANVooREcCNCTccQKFSHAVAFSDC+EKVaRFKFPVTQYPVuLEIaNER-+IolEoPYLVTlTEVNMRcNWcLKHoVPENVKKMKsYLEPlL+oPVYNPLGLNLSIKGSELFHFRVSVV.PGVTFC-LpEEFQIYVDEsPLPFPGHsLIAVATAVVLGGLIFIAFlFQL+NIHPl+shp+.I+sNssphSoool ................................pFpslHhGKhIp.tpoGpAhIcKlhpHsh.spGFhSSVlsEhhcPFslEphp-SsCLsSSL.I.s..psGsh.Y+LoLp.p..sh..pu.FpsoDIEKTVVlPGYSSFLITpIlDspNALAlATMPppsssNhTF.csoWFLYNF..Gp+sGRpWpIhs+PCNYWh.Qpc..-o.SLNllKYIDLGsohshphKlI..p.s+.uhphhclP....LLpVhVGNPsLLE.VcscuhFD-oDSYlhpIsssSKhLppGSTSlAhlhWpASscChVTThVPTLKSSCSYL+oMHHlPu.......phIP.EDWlS..Gl..H+DSpGFNhIKTLPlNYRPPSNMGIAIPLTDNFYHADPSKPhPRNhF.hSKco.GKaK.QCANhooRcECNCTpcQKhSaAVAFSDC+EKVPRFKFPloQYPluLcIhs.Ec.splPlc.....sPYLVTlTEVN.RpNWcLK.H.slP-sl++hKpalEshltssVYNP.GLNLSIpGSELFHFRVoVl.sGVTFCsLlEEFQ.IYVDEsPLPFPGHsLIAluTAVVLGGLIFhAFh.FQlpsIHP.hpshpphhhpp........h................ 0 6 7 12 +15001 PF15150 PMAIP1 Phorbol-12-myristate-13-acetate-induced Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q13794 Family This family carries a BH3 domain between residues 23 and 40. 27.00 27.00 27.50 33.80 25.10 24.30 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.37 0.72 -8.66 0.72 -4.20 3 17 2012-06-28 14:17:15 2012-06-28 15:17:15 1 2 13 4 9 25 0 50.50 67 80.49 NEW MPGRKSRKSsQ..PuPTRV..PsDLEVECAIQLRRIGDKLNFRQKLLNLISKLFRLGT ......MPG+KARKsAQ...suPsRs...s-LE....VECAhQLRRhGDKLNFRQKLLNLIuKLFp.GT.... 0 1 2 2 +15002 PF15151 RGCC Response gene to complement 32 protein family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9H4X1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 44 and 130 amino acids in length. There is a conserved KLGDT sequence motif. 27.00 27.00 30.50 72.90 25.20 25.00 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -10.66 0.71 -3.68 7 49 2012-06-28 14:17:28 2012-06-28 15:17:28 1 2 36 0 28 52 0 124.20 66 97.60 NEW M+Ssssp.tstshhh..................-.ts-Lu-lLpEF-sVlc-F.tSPhpppp.tY-cHLcphKRRoutSlSD.SGlsDSE.Su-o.htsShshS-EcLNososs..........TssKAKLGDTKELE-FIADLD+sLtp..M .............................tstsshh..................ssstDLoDsLCEFDAVltDF.uSPhccRHF+Y-EHLE+MKRRSSASVSDu.SGFSDSE.SADSLYRNSFSFSDEKLNSPTsSoPuh..oPssoPpKAKLGDTKELEDFIADLD+TLAS............. 0 1 5 13 +15003 PF15152 Kisspeptin Kisspeptin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q15726 Family \N 27.00 27.00 29.80 29.20 20.50 19.40 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.15 0.72 -9.84 0.72 -3.29 10 51 2012-06-28 14:20:42 2012-06-28 15:20:42 1 2 34 0 12 56 0 74.40 49 58.57 NEW ssh.puLRss-p+Pss..Ap.ss+hou.s..scS.usGtphsuhshspSRlIPuPpGtlLVpREKDlSuYNWNSFGLRYG ....sWppu.pCsE+KPss..At.psRtsu.ssPsES.SuG.ppsuhs..uspSR.IPAPpGAlLVQREKDLSsYNWNSFGLRYG 0 1 2 5 +15004 PF15153 CYTL1 Cytokine-like protein 1 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9NRR1 Family The function of this family of proteins, CYTL1, has not, as yet, been determined. However it is thought to be a secretory protein expressed in CD34+ haemopoietic cells [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 134 and 145 amino acids in length. There are two conserved sequence motifs: PPTCYSR and DDC. 27.00 27.00 75.40 27.90 21.10 20.80 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.56 0.71 -4.46 9 49 2012-06-28 14:21:46 2012-06-28 15:21:46 1 2 38 0 26 39 0 123.40 55 96.34 NEW phsLhhLhsllu...hhhhup..ssPPTCYSRhLsLS+EIhsthpcLpsspsscsClchLPclalDlHNsClhoKLRDFlh......sspCtchs+lshLKc+lppLYsIhsphC+RDLVFhoDDCpALEsshsss.......phhs-.pp ....................h....hLLhLls...hs.hup..s...sPPTCYSRhLuLS+EIspsFppLQsoEPs-sCVchLP+LYLDIHNYCVLsKLRDFVA......SPpCh+hspVcsLK-KlRpLYTIMNSaCRRDLVFLoDDCsALEhPIsss.Ts.sD............. 0 1 3 9 +15006 PF15155 MRFAP1 MORF4 family-associated protein1 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9Y605 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 127 amino acids in length. 27.00 27.00 58.40 35.10 25.00 25.00 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.71 0.71 -3.80 2 52 2012-06-28 14:25:25 2012-06-28 15:25:25 1 4 22 0 17 44 0 117.90 79 68.99 NEW MRPlDhsEhtEPcE....EPtp.....L.Ps.pthREslAuLpRE+uRAahRsRpKLhEhpshL.tIKopVEApEcuAhs.h.pPtsts-tRsA+hstcA-cKAtEhA+MuchlVELhpRIttsEss .....................MRPLDIsElsEPEEVEVLEPEEDFEQFLLPVINEMREDIASLoREHGRAYLRNRSKLWEMDNMLIQIKTQVEASEESALNHlQsPustA-tRsuchCEKAEEKAKEIAKMAEMLVELVRRIE+SESS.................. 0 2 2 3 +15007 PF15156 CLN6 Ceroid-lipofuscinosis neuronal protein 6 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9NWW5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 190 and 310 amino acids in length. 27.00 27.00 127.10 46.60 24.00 23.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -11.82 0.70 -5.30 4 65 2012-06-28 14:27:27 2012-06-28 15:27:27 1 3 39 0 31 62 0 249.00 76 86.16 NEW RtGStcscs.sspsu.FHhDLWhhFTLQNWlLDFGRPIsMIlhPL-WFPLNKPSsGDYFHMAYNIITPFLLLKLhERSP+TLPRSslYlSIITFVMGASIHLVGDSlNHRLlhSGYQhHLSVRENPIIKsLpPtTLIDSFELLYYYDEaLGHsMWYIPFFLILFlYFTGCFTplKAEp+MPsSAWlLLuPSulYYWYLVTEGQIFIlFIFTFFAMlAhVhHQKR+GhlLDSNGLFLhYSFulTLhLVulWVAaLWNDpVLRKKYPGVIYlPEPWAFYTLHlpsp ...............ps--.sspsu.FHLDLWFYFTLQNWVLDFGRPIAMllhPLEWFPLNKPSVGDYFHMAYNVITPFLLLKLIERSP+TLP..RShhYlSIITFlMGASIHLVGDSVNHRLlFSGYQpHLSVRENPIIKNLKPETLIDSFELLYYYDEY....LGHsMWYIPF...FLILFhYFSGCFTssK..uE.u.pMPssALLLluPSuLY..YW...........YLVTEGQIFILFIFTFFAMLALVLHQKRKtLaLDSNGLFLF.SFuLTLLLVALWVAWLWNDPVLRKKYPGVIYVPEPWAFYTLHVSS.p... 0 2 5 13 +15008 PF15157 IQ-like IQ-like Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q1A5X6 Family This family of proteins includes Human IQ domain-containing protein J (IQCJ). 27.00 27.00 27.90 36.20 26.30 22.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.73 0.72 -10.32 0.72 -3.79 3 41 2012-06-28 14:29:59 2012-06-28 15:29:59 1 4 29 0 21 50 0 89.20 78 58.91 NEW MRLEELKRLQNPLEQVNDGKYSFENHQLAMDAENNIEKYPLNLQPLESKVKIIQRAWRcYLpRQ-....sLGKRSPSPPSl...SSEKLSSSlSMNTFSDSSTP .....EELKRLQNPLEQV......NDGKY.hENHQLA..MDsENNI.EKY.LNLQPLESKVKIIQRAWREYLQRQ-.....PLtKRSP.SPPSl...SS-KL.SSSVSMNTFSDSSTP 0 1 3 6 +15009 PF15158 DUF4579 Domain of unknown function (DUF4579) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q2WGJ8 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 192 and 239 amino acids in length. The human member of this family is C8orfK29. 25.00 25.00 25.90 25.60 18.00 17.50 hmmbuild -o /dev/null HMM SEED 186 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.14 0.71 -4.77 4 30 2012-06-28 14:36:54 2012-06-28 15:36:54 1 1 24 0 22 27 0 156.90 47 77.13 NEW Gh.psERpluppLppNpFaPFtp..pPssFhLEYhhsoLaKull.FIlhhlhlshthlscVp+Q-..........TWsF.sYGlulGLWLhISShPpRRLVLNHsRGsYHFSIQGRTVCQGPhHLVYVRLALsSDA.tthFapLlLsGaplEshlLspho-Rh-ph-hLGRhIAR+lNlNYFDshs.......sShRpVVRHWs ......................p.ccpltp+LppN.FaPFhp....pPssFlLEYhhcsLaKGhLhFllsllh.lsht......lppspcQ-..........TWsF.saGlsVGLWL..l..lS.S.LP+RRLVLNHs+GhYHFSIpGRTVCQGPhHLVYVRLALsSD..u.GphaapLVLsGa+lEshsLspho-+h-phEhLGRpIARKLNlNYFDhhs.......sShRHllRHWs.......... 0 6 6 10 +15010 PF15159 PIG-Y Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q3MUY2 Family This family of proteins represents subunit Y of the GPI-N-acetylglucosaminyltransferase (GPI-GnT) complex. It may regulate activity of the complex by binding the catalytic subunit, PIG-A [1]. 25.00 25.00 25.10 25.20 24.60 24.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.48 0.72 -3.76 10 56 2012-06-28 14:46:25 2012-06-28 15:46:25 1 2 51 0 36 65 1 68.90 33 66.62 NEW hhGsLhllhGllhFluhhauAVlscllPsussshlSulp.DhYYsLLlPlTLPVhlssVYapWLShKlFKHA .....................huhhhllhshl.hsshhYus...s..ls...c.............hPps.sssh...hshh....aYsLLlPlTlPVhlhhshasWluhKhF+Hs.... 0 15 22 29 +15011 PF15160 SASRP1 Spermatogenesis-associated serine-rich protein 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q496A3 Family Spermatogenesis-associated serine-rich protein 1 is a serine-rich protein differentially expressed during spermatogenesis [1]. 27.00 27.00 31.60 36.20 25.90 26.40 hmmbuild -o /dev/null HMM SEED 244 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.72 0.70 -5.14 3 40 2012-06-28 14:52:34 2012-06-28 15:52:34 1 3 28 0 18 50 0 193.70 59 77.00 NEW Ap+SDFLESKGCFANTTSS..GRSVSsSSSVETGLSVo-uPGLPRVhsYlDTAADLDpKoSSSHS........DHSSEsSLPEVQKDKYPEEFSLLKLQTKDGQRPEWTFYPRFSSNIHTYHVGKQCFFNGVFLGNRRSLSERTVDKCLGKKKYDIDPRNGIPKLTPGDNPYMaPEQSK-FaKAGSTLPPVNFSIVPYEKKFDTFIPLEPLPQIPNLPFWVKEKANsLKNEI+EVEELDNWQPAVPFLHuLLso .....................................p..h.tSpts.Apthss..sppsS..SsstpG.plpts.u.s+s.s..s.sstLs.K.S.opS..................s+S.chSLPEl.KccaPcEFSLLp.QTpDGpRPEWTFYPRFSSNIHTYHVGKQCFFNGVFhGN++SluERTVDKshG+KK..YDIDPRNGIPK.LTPGDNPYMaPEQSKsFaKAGSTLPPVNFS.lsPYEKKaDTFIPLEPLPplPsLPFW.KEKANpLKNEIpEVEELDsWQsuhPhhp.hh.................. 0 5 5 9 +15012 PF15161 Neuropep_like Neuropeptide-like Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5BLP8 Family This family contains putative neuropeptides [1]. 27.00 27.00 29.90 58.60 25.90 19.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.05 0.72 -4.49 3 42 2012-06-28 14:58:50 2012-06-28 15:58:50 1 2 30 0 23 43 0 60.10 73 47.24 NEW A-p-AGSAIPAESRPCVDCHAFEFMQRALQDLKKTAYNLDoRTEoLLLQAEKRALCDChPAs.L+ ........t..uGoslPApSRPCVDCHAFE.FMQRALQDL+KTAaSLDuRTETLLLpAE+RA...LCsChPA.....t.................... 0 1 3 10 +15013 PF15162 DUF4580 Domain of unknown function (DUF4580) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VVC0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 63 and 185 amino acids in length. 27.00 27.00 41.10 62.70 26.50 26.00 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.87 0.71 -4.88 6 41 2012-06-28 15:08:27 2012-06-28 16:08:27 1 1 33 0 25 38 0 157.00 62 91.64 NEW sssp.ssslIIsuSLpsSElsphLps..psHKlRhScoltcsollFPLSGVAFLLlss.ph.h.......lsclppFlsh+pNuallLsuslas.pphthh..l..pRFLG.psLplLPVHssupslphMtTIAKhTsKPhhssIppRhpph+uhhlsp..usVWchLpplsLsp ........EphcWoTTlIISSSLcuaElATALEN..RSHKVRYSDSVEsGSIIFSLSGVAFLLMDscEChhSs...........EEhFLsKIEKFINIHpNSFLVLsAALHGPEEWcLMFRI.QQRFLG.sNLRILPVHNTsNAlsLMCTIAKoTSKPalDsICYRMIssKAYIIEQ..SPVW+TLQKIpLs................................. 0 4 6 9 +15014 PF15163 Meiosis_expr Meiosis-expressed Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JSS6 Family This family of proteins is essential for spermiogenesis [1]. 27.00 27.00 50.90 50.80 20.40 17.00 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.78 0.72 -3.62 10 56 2012-06-28 15:09:45 2012-06-28 16:09:45 1 3 50 0 36 56 0 75.20 66 74.37 NEW hoRAKcWScElEssYRFQpAGYRDElEYpplcpss.sERWPsp..GFVKKLQR.+DGsFhYaNKpREC-DK-lpKVKlYs ......hSRAK+WSEEIENLYRFQQAGYRDElEY+QVKQVuhVDRWPET..GYVKKLQR...RDNTFYYYNKpREC-DKEVHKVKlYt............ 0 10 13 20 +15015 PF15164 WBS28 Williams-Beuren syndrome chromosomal region 28 protein homologue Bateman A pcc Jackhmmer:Q6UE05 Family WBS28 is an integral membrane family. These proteins have been identified as being linked to Williams-Beuren syndrome, OMIM:194050. This family of proteins is found in eukaryotes, and are typically 266 amino acids in length. 27.00 27.00 27.10 62.60 19.60 26.80 hmmbuild -o /dev/null HMM SEED 267 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.07 0.70 -5.31 5 30 2012-06-28 15:40:57 2012-06-28 16:40:57 1 1 22 0 16 30 0 239.20 61 99.54 NEW MEAlPsVRSSLhGILLlVlKLSVLLVQNRlHLYNFLLLKIsLFNHWLSGLAQEApGSss.Qs.HPPusIAACPLGRlLRAGLALlEVPsWLlLRGPRLsWAGhLGCARALGLAPKaLuAWEQLGLSAATWTDLFLSCLHuLMLAALLLLLLTWRLCQKAHCCuLGRLLSKALLtN+VVhcLLALLKRLYWWVEopTALTSWHLAYLITWTTCLASHLLQAAFEHTAQLAQAQEsEPQKu.SGsSSEoPLPEPsuPEAGPVLPEPGTPGE ....................MEAhP.VRSSLhGILLpVh+LSVLLlQNRsHLYNFLLLKIsLFNHWVSGLAQEApGSts.Qs.h.P.sh.suCPLGpALRAGLuLlpVPhWLlLpuPRLsWAuhLsssRslGLAh.hLuAWE.LGLSs.AsWpDLhLSCLHuLMLVALLLlLlTWRLCQKAHp.huLGhLh.SpAL..NplVhchLA.LRRLYWWVEohsALTSWHLAYLlTWTTCLASHLLQAAFEHTAQLAQ..AQEsEspcs.SGs...tssLsts.ssEuGslLsE.tTPtE........ 0 1 1 1 +15016 PF15165 REC114-like Meiotic recombination protein REC114-like Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z4M0 Family REC114-like members are necessary for meiotic DNA double-strand break formation. It functions in conjunction with Mei4. This family of proteins is found in eukaryotes. Proteins in this family are typically between 43 and 259 amino acids in length. 27.00 27.00 49.40 27.60 26.80 26.10 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.90 0.70 -11.66 0.70 -4.63 7 59 2012-06-28 15:59:42 2012-06-28 16:59:42 1 3 38 0 29 53 0 177.40 46 87.50 NEW WPLpRYGRFh..sptcssssups.uttssPsWKVF-SNEESGpLlLTIVlSGHFFISQGQTLLEGFSLIsSpsWLK.IVRRhDCLLFsTshK....scSRhFRVQFuGs.ScEpALE+CCuCVQKL.upYVTVQssDshsppLp.u.Pu..psscSQsccp..phs.p.us..thppppsshshtsuhss..sphSh..........ppLAQolLssc.c.LPhsYcpSuWsAEELGPFLRLCLMDQNFPAFVE-VEKELKKL ........................................................................+lhtptcpt..h.hsll.uGahhl.pGpt.lLEG.FSLlsuppWLK..IsR+hDCLLFssphK....sc.SRhFRVQFuGp.S+EpALEcCsSCVQ+L.upYloVQhsDs..ppht.....Pu..tsstpp.pst...h..p.t..............tts..t..tphSh..........tpluQ.t..hLspt.t.LP.sYcp.....usassp-LtshLRLCLhDpsFPAFVEcVEcELKKl.............. 0 5 9 14 +15018 PF15167 DUF4581 Domain of unknown function (DUF4581) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N3F0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically 131 amino acids in length. 27.00 27.00 39.80 38.80 20.70 16.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.71 0.71 -4.24 3 37 2012-06-28 16:04:52 2012-06-28 17:04:52 1 3 31 0 20 34 0 108.80 82 93.65 NEW cpLADsAEKWCossPFDLIAsE-s.ERRhDFYA-PGlSFYVLCP-s..GssDpFHVWSESEDCLPFLQLAQDYISSCGKKTLHEILEKVFKSFRPLLGLPDVDDDTFEEYNADVEEEEPEADHQQMGVSQQ ..............ppLsDsA-KWCSssPF-LIhsE-s.ERRMDFYADPGVSFYVLCP-s..GCGDsFHVWSESEDCLPFLQLAQDYISSCGKKT.LHEVLEKVFKSFRPLLGLPDADDDAFEEYuADVEEEEPEADH.QMGVSQQ......... 0 1 3 9 +15019 PF15168 TRIQK Triple QxxK/R motif-containing protein family Eberhardt RY, Coggill P, Hetherington K rdf Jackhmmer:Q629K1 Family TRIQK member-proteins share a characteristic triple repeat of the sequence QXXK/R, as well as a hydrophobic C-terminal region. Xenopus and mouse triqk genes are broadly expressed throughout embryogenesis, and mtriqk is also generally expressed in mouse adult tissues. TRIQK proteins are localized to the endoplasmic reticulum membrane. This family is found in eukaryotes and members are typically between and 86 amino acids in length. 27.00 27.00 48.80 48.80 21.00 20.30 hmmbuild -o /dev/null HMM SEED 80 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.66 0.72 -4.06 6 38 2012-06-28 17:39:22 2012-06-28 18:39:22 1 1 32 0 21 31 0 75.90 70 94.04 NEW MGR........KDASos+hPVDQYRKQIGK.QDYKKTKPlLRAT+LKAEAKKoAIGIKElhLh...lsAILsLLhAaYAFFaLplSsshsl- .........MGR........KDAuThKLPVDQYRKQIGK.QDYKKTKPILRATKLKAEAKKTAIGI.KEVuLV...LAAILsLLLAFYAFFYLpLos-lD.s................ 0 4 6 9 +15020 PF15169 DUF4564 Domain of unknown function (DUF4564) Bateman A agb Jackhmmer:Q9BQA9 Family This family of proteins is functionally uncharacterised. This family of proteins is found in eukaryotes. This family includes the human protein C17orf62. 27.00 27.00 32.10 31.30 20.90 20.20 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.00 0.71 -4.56 8 54 2012-06-29 07:58:34 2012-06-29 08:58:34 1 2 47 0 41 65 0 171.20 52 93.47 NEW M.YMplEc+TushLHLcRuPuIRSWSLhVGIuSlGLAAAYYSoDohLW....KlFYlsGClFVALQNlE-WEEAlFsKpKsclpLcohsLYchlLTh.+tGpEp..VVl-LcclRDlsVQEE+lRYhGcGYlll...LRassGhSaPLTQoushGsRSDVEAlAshls+FLphcpltup...phspopssDsDpspDpu ...................................M.YMpVEp+TuohLHL.KRuPGIRSWSLLVGIhSlGLAAAYYSuDSlhW....KLFYVsGCLFVAlQNLEDWEEAlFsKssGcVhLKTFSLY++lLTL.+sGH-p..VVl.Lp-lpDVsVEEE+VRYFGKGYhVV...LRhuT..GF.SaPLTQSAshGp..RSDVEAlAcLIssFLcLcplps.t....p.spopsu-sst....st................................. 0 10 14 24 +15021 PF15170 CaM-KIIN Calcium/calmodulin-dependent protein kinase II inhibitor Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z7J9 Family CaM-KIIN is the inhibitor of Calcium/calmodulin-dependent protein kinase II (CaMKII). CaMKII plays a central part in long-term potentiation, which underlies some forms of learning and memory. CaM-KIIN is a natural, specific inhibitor of CaMKII [1]. This family is found in eukaryotes. 27.00 27.00 30.90 30.50 22.50 21.10 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.52 0.72 -3.99 5 59 2012-06-29 10:35:08 2012-06-29 11:35:08 1 1 33 5 31 45 0 72.70 70 98.10 NEW MSElLPYSE-KMusYGsDu-VGQlSFSCRLQDTsuFFAGuQuKRPPKLGQIGRAKRVVIEDDRIDDVLKGMoDKuPPGV ...........MSElLPYu--KhutaGs-s-suphSFSCRLQDTNsFFuusQuKRPPKLGQIGRuKRVVIEDDRID.D.VLKsMs-KsPsGV....................... 0 2 6 14 +15022 PF15171 Spexin Neuropeptide secretory protein family, NPQ, spexin Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q9BT56 Family Spexin, alternatively named NPQ, is a peptide hormone and is derived from a pro-hormone. This family of proteins has a role in inducing stomach wall contraction and is expressed in the submucosal layer of the mouse oesophagus and stomach. Spexin, like most peptide hormones, is a ligand for G-protein coupled receptors [1]. Spexin is also thought to have a role in controlling arterial blood pressure as well as salt and water balance [2]. 27.00 27.00 40.40 39.90 20.80 18.40 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -9.99 0.72 -3.99 3 34 2012-06-29 10:43:10 2012-06-29 11:43:10 1 3 27 0 20 24 0 79.00 66 73.58 NEW APQRLFERRNWTPQAMLYLKGAQGRRFLSDQSRRKDLuDRPPLERRSPNop.LTLPEAAALLLASLpKuQEsE-ENhD+ocaLEDsLhNW .APQthhERRNWTPQAMLYLKGAQGRRFISDQSRRKDLuDRs....PERRSPNsphLolsEAAAlLLASLQKspEstEcNhDp.sphL.DpLhsh.................... 0 1 2 5 +15023 PF15172 Prolactin_RP Prolactin-releasing peptide Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:P81277 Family \N 27.00 27.00 32.60 32.20 23.90 18.30 hmmbuild -o /dev/null HMM SEED 47 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.02 0.72 -8.46 0.72 -4.74 8 46 2012-06-29 10:46:53 2012-06-29 11:46:53 1 1 34 0 17 37 0 46.20 57 48.59 NEW SRuap......Hsh-hRoP-IDPhWYsGRGlRPlGRFG+Rputhtcuupsph+ .........Rsap......HShEhRoPDIsPuWYsGRGIRPVGRFGRR+ushtcsspst.......... 0 1 3 6 +15024 PF15173 FAM180 FAM180 family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6P0A1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 117 and 182 amino acids in length. There are two conserved sequence motifs: ELAS and DFE. The function of this family is unknown. 27.00 27.00 34.90 34.60 22.40 18.80 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.68 0.71 -4.39 7 69 2012-06-29 10:48:52 2012-06-29 11:48:52 1 1 38 0 40 57 0 133.30 48 77.10 NEW LaPuAhRsKRu.uuhlNPs.....hQpolE-VpLLaElLLAGlphsst.tthplpDtELASLR+spcLcsICpcllP+pLs-I+RLoupLusphG..sL+hEDFERTlLThVYTA.plspu.pspQR-hWupohlpLapAlKtDL ..........LaPuApRsKRuuu.PlNPs........LQpShE-VELLaEhLLAtL-lsss.hplpIcDEELASLR+Apch+hlCpclIPKsls-I+RLsupLush.u..sL+p-DFERTlLThsYsAYRhshu.pucQ+-hWApuhspLaQAl+aDL.................................. 0 2 3 11 +15025 PF15174 PRNT Prion-related protein testis-specific Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86SH4 Family PRNT is a family of prion-related proteins expressed in the testis [1,2]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 52 and 94 amino acids in length. 27.00 27.00 37.50 87.20 17.80 16.80 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.09 0.72 -8.66 0.72 -4.31 3 15 2012-06-29 10:52:38 2012-06-29 11:52:38 1 1 15 0 1 13 0 49.00 70 73.43 NEW NTsLhHSAWPLShLHQTVSTLKAVAVTHSLWHLQIPVDCQACNRKSKKIYC NhPILLSHYPLP...QQTETWKAAsAllSLWaLQSPGDGQACDRESVKIYC 0 1 1 1 +15026 PF15175 SPATA24 Spermatogenesis-associated protein 24 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86W54 Family This family of proteins bind to DNA and to TBP (TATA box binding protein), TATA-binding protein (TBP)-related protein 2 (TRF2) and several polycomb factors. It is likely to function as a transcription regulator [1-2]. 27.00 27.00 52.40 29.70 22.10 26.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.15 0.71 -4.52 6 45 2012-06-29 10:53:44 2012-06-29 11:53:44 1 1 33 0 24 40 0 148.00 65 69.24 NEW uKEEFpAlcKcL.-EKstHAKTKsLLAKEpEKLQFALGEV-VLSKQLE+EKhAFEKAhusVKsKAhQESuc+DQLloKCsEhp...pcll....+QEDlLNuKE.cI+-LpphlupQKpsh..............H+sphS-hc..........IQppQ-tYh..sp.h.scs+hpputphsGp .....................SKEEFQAVEKKLV.EEKAAHAKTKlLLAKEEEKLQFALGEVEVLSKQLEKEKLAFEKALSSVKS+VLQESSKKDQLITKCNEIE...SHII....KQEDILNGKENEIKELQQVISQQKQIF....................psphSshR..........IpK.Qtphh..ApslstKpKhssuh.....s.............................. 0 7 8 10 +15027 PF15176 LRR19-TM Leucine-rich repeat family 19 TM domain Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IVY1 Domain LRR19-TM is the single-span transmembrane region of LRRC19, a leucine-rich repeat protein family. LRRC19 functions as a transmembrane receptor inducing pro-inflammatory cytokines. This suggests its role in innate immunity [1]. This family of proteins is found in eukaryotes. 27.00 27.00 45.60 44.30 26.80 26.00 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.76 0.72 -10.58 0.72 -4.33 9 67 2012-06-29 10:54:56 2012-06-29 11:54:56 1 8 31 0 41 50 0 103.90 48 43.54 NEW souSssuPusuoG...uRuWPlLVGVVluAlllSLLIALAAKCpLC++ahsSYpH+PLsEs.....................GpuspPsVs.....csE...............DDDGFIEDNYIQPutst.Es ..........s.outsshstsups....u+uWshLVGVVlsslshSLLIhlAhKC.lhhpahhSYpH+.LpEp..........................GhsspPpss.....psc...............DDDGFIEDpYIpstphp........ 0 2 6 12 +15028 PF15177 IL28A Interleukin-28A Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IZJ0 Family The protein family, Interleukin-28A, plays an important role in modulating the immune system. This protein family is induced by viral infection and interacts with a class II receptor [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 195 amino acids in length. 27.00 27.00 30.10 30.10 25.00 19.40 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.11 0.71 -10.85 0.71 -4.73 10 96 2012-06-29 10:56:35 2012-06-29 11:56:35 1 3 32 6 51 94 0 145.40 55 79.55 NEW ssuKGCHluQFKSLSPQELpAFKKAKDAlEESL.LKsWsCSSRLFPRshDL+QLQVhERPVALEAELALTLKVLEshADo..uLuslLDQPLHTL+HIHSpLQA..ClpsQPTAGP.RP+GRL+HWLHRLQEAsKKEStGCLEASVTFNLFRLLTRDLKCVASGD ........pt+uCclupFKSLSPp.ELpAFK+A+DAhE.-SLh.KshpCpS+lFPRshDL+pLQ......V.hERPlALEAELsLTLKVL.pshscs..uLsclL-QP....L+TL+HIpSpLpA..ClpsQPTAuP....+P.pGR.L++WLHRL.....pEA..cKESsGCL.EASVhFNLFRLLTRDL+CVAsGD....... 0 3 3 14 +15029 PF15178 TOM_sub5 Mitochondrial import receptor subunit TOM5 homolog Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N4H5 Family This is a family of transmembrane proteins thought to form part of the pre-protein translocase complex of the outer mitochondrial membrane (TOM complex) [1]. This family of proteins is found in eukaryotes. Proteins in this family are approximately 50 amino acids in length. 27.00 27.00 30.20 30.10 25.50 25.50 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.94 0.72 -8.55 0.72 -4.27 4 39 2012-06-29 10:58:52 2012-06-29 11:58:52 1 1 25 0 14 50 0 46.20 79 78.66 NEW MF+lEGLuPKhDPEEMK+KMRpDVISSVRNFLIYVALLRlTPYILKKLDSI ..........MFRIEGLuPKLDPEEMKRKMREDVISSIRNFLIYVALLRVTPallp............. 0 3 4 6 +15030 PF15179 Myc_target_1 Myc target protein 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N699 Family This family of proteins is regulated by the c-Myc oncoprotein. It regulates the expression of several other c-Myc target genes [1]. 27.00 27.00 27.70 27.20 25.10 24.80 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.42 0.71 -11.33 0.71 -4.72 7 50 2012-06-29 10:59:41 2012-06-29 11:59:41 1 1 38 0 29 42 0 176.40 62 89.94 NEW MApNsTshh.phhcsF.apsllLAFslSMllGLllGulIahllThhS.RRRASApIoptsspppppp...pS.s.hts+hGaYR.souh-R+Ss.SL..AuLohpRQsSlE.s...sshsRKsSFcuSTF+Phhpss.hs..s-psuQh...........sshssssssssshs.sss..pRssFahussuLRshhsoQTPPPAYDSlI+AF.E..o ..................MApNsTslh.sWscsF.WEDLIhSFTVSMAIG.LVlGGhIWALhsCLS.RRR.A.SA..sISQWSsSRRoR.......SSasHuLNRTGFYR.HSGCERRSNLSL...ASLTFQRQASL.EQA...NSFPRKSSFRASTFHPFLQCPPLP..VET-SQL...........hTLPuSs...sossl..sosHSLuRPDaaWSsNSLRhuhST.sPPPAYESIIKAFPD...S..................... 0 1 5 13 +15031 PF15180 NPBW Neuropeptides B and W Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N729 Family The function of this family, NPBW, which includes Neuropeptides B and W, is thought to be involved in activating G-protein coupled receptors, GPR7 and GPR8. It is thought to play a regulatory role in the organisation of neuroendocrine signals accessing the anterior pituitary gland. It is predicted that this effect will stimulate the increase in water-drinking and food-intake. This suggests it plays a role in the hypothalamic response to stress. This family of proteins is found in eukaryotes [1,2]. 27.00 27.00 31.90 31.70 20.40 20.00 hmmbuild -o /dev/null HMM SEED 120 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.68 0.71 -4.47 8 57 2012-06-29 11:40:11 2012-06-29 12:40:11 1 3 30 0 29 48 0 111.70 41 85.80 NEW sphshlslAlsLLlus.PutAWYKpsAGPuYYSVGRASGLLSGlRRSPYsRRS-scuuAts.ut.............suspsp.pssLRShslCVpDlsPNLpSCEhLsDGsGsa............QCKA-VFLSLcShDChsA ........t...shhslhLhLLlhs.PuhAWYKpsAuPpYYoVGRAuGLLSGlRRSPYh+Rupspsssts.u...............tsshp.pssl+ohs.........lhlpshs.pL.pschh.sstGhh............ptps-shLoLcuhDt.t........................................ 0 2 4 13 +15032 PF15181 SMRP1 Spermatid-specific manchette-related protein 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NCR6 Family This family of proteins, SMRP1, is thought to have a role in spermatogenesis and may be involved in differentiation or function of ciliated cells [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically approximately 260 amino acids in length. 27.00 27.00 85.50 85.50 25.50 25.20 hmmbuild -o /dev/null HMM SEED 261 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.92 0.70 -4.99 5 38 2012-06-29 11:41:30 2012-06-29 12:41:30 1 2 28 0 21 48 0 226.30 62 93.33 NEW MFLFSRKTKTPISTYSDSYRAPTSIKE.VYKDPPLWAWEANKFVTPGLTpTMcRHVDPEAL.QKMsKCAsQDYTYKuSISGHPYLPEKYWLSP-EtDKCsPsYLss.......D+YNTWRTuPYSsh.WNKYTTYLPRLPKEsGMEThVRGMPLEYPPKPERLNAYEREVVVNMLNSLSRNpsLPQIsPRCGCVDPLPGRLPFQGYESsCSGRHYCLR..GMDYhsoGsPsT-RRLRPLCscpPThpolLpsssRsulsC.........YsSPslIlPhSEP ..MFLFSRKTKTPISTYoDSYRAPTSIKE.VYKDPPLhAWEANKFlT....PGLTpTM.cRH.VDPEAL.QKMsKCAsQDYoY+uSIsGHPYLPEKYWLSp-E.....t......DK.......CsPsYLsu.......DRYNTWRhuPYNsosWNKYTThLPRLPKEAGM.ETsVRGMPL-hPPKPERLNAY.......EREVhVNMLNSLSRNQ.LPpIsPcCGCscsLPGRLPFpGY-SsCSGRHYCLR..GMDYhssGsPss-R.+Lpshs.pp.T.pss.......tRsshpC.........Ys.Pslhhsh.p............................................... 0 2 2 6 +15033 PF15182 OTOS Otospiralin Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NHW6 Family This family of proteins, Otospiralin, has a role in maintaining the neurosensory epithelium of the inner ear [1,2]. This family of proteins is found in eukaryotes. Proteins in this family are approximately 90 amino acids in length. 27.00 27.00 60.30 60.10 22.10 20.80 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.98 0.72 -9.42 0.72 -4.31 4 41 2012-06-29 11:45:03 2012-06-29 12:45:03 1 1 35 0 27 29 0 66.30 70 77.66 NEW A+Pl.tEtsPYtEsPAhPYWPaSTSDFWNYVpaFpolGAYsQlpDhARTFFAHaPLGsTLGacVs.p-E ......A+Pl.EEuDPYsEsPAMPYWPFSTSDFWNYVpYFQoLGAYsQIpDMARTFFAHFPLGoTLGaHVPYpE-.... 0 1 3 11 +15035 PF15183 MRAP Melanocortin-2 receptor accessory protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8TCY5 Family This family is thought to be involved in cell trafficking. It is required for MC2R expression in certain cell types, suggesting that it is involved in the processing, trafficking or function of MC2R. MRAP may be involved in the intracellular trafficking pathways in adipocyte cells [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 47 and 205 amino acids in length. 27.00 27.00 54.50 50.30 25.90 25.90 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.20 0.72 -4.38 6 76 2012-06-29 12:32:18 2012-06-29 13:32:18 1 2 40 0 41 79 0 84.20 54 53.03 NEW MANpTNuSs.aaSYEYYLDYlDLlPVDE+KLKAsKYSIVIAFWVSLAsFVhhLFLILLYMSWSGSP.Qs..Rsusppa.hCsWs+uhsLPLCl ...........pusss..Ss.h..apYEYY.-h..h.PVs.ctLKAHKYSIVIuFWVuLAsFVlFhFhlLhhhohoGuP.Q....csut++aphssaspshsh.Lp................. 0 2 5 14 +15036 PF15184 TOMM6 Mitochondrial import receptor subunit TOM6 homolog Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96B49 Family TOMM6 forms part of the pre-protein translocase complex of the outer mitochondrial membrane (TOM complex) [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 43 and 74 amino acids in length. 27.00 27.00 36.20 49.70 24.20 18.50 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.22 0.72 -4.64 3 36 2012-06-29 12:33:59 2012-06-29 13:33:59 1 1 28 0 16 40 0 70.80 80 98.87 NEW MSuSsVKs.uAGSuG........VuDWlRusCRFATDRNDFRRNLLVNLGLFAAGVWVARNLSDFDLMSPQPlT ..............MASSGVsVoAAGSANEsPEIPDNVGDWLRGVYRFATDRNDFRRNLILNLGLFAAGVWLARNLSDIDLMAPQPGV........ 0 1 1 2 +15037 PF15185 BMF Bcl-2-modifying factor, apoptosis Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96LC9 Family BMF is thought to play a role in inducing apoptosis. It is thought to bind to Bcl-2 proteins [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 75 and 190 amino acids in length. There are two conserved sequence motifs: GNA and DQF. 27.00 27.00 27.30 27.00 17.80 17.20 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.50 0.70 -5.05 4 51 2012-06-29 12:35:41 2012-06-29 13:35:41 1 2 32 1 21 64 0 159.20 56 91.31 NEW -hPpCsEpshpphct..........................ccDsapu-sup.............sspsushhAuulhspspphDs..tchph.PlophhGsshRshptp-+ATQoLu.usuup.............ulhhsCGlptpP+tLFaGNAGaRlHh.PAsFthu.shhEpspptQ....................p.thpsEVQIuRKLppIuDQFpp.HlQ....pHppN.sthhh.lhhFhc.Lh.p.t...pssG.p .............................................................................CVE........L.........................EDDVFQPEDGE............PusQPGuhLSADLFAQS..LDCPLSRLQLFPLTHCCGPGLRPsuQEDKATQTLSPASPSQ.............GVMLPCGVTEEPQRLFYGNA.GYRL.l.PAuFsss.shtEpP.EuQ......................pHRsEVQIARKLQCIADQFHRLHhQ.......pHQQNpspsWWQlhLFLpNLALNtctNRpssG........................... 0 1 3 7 +15038 PF15186 TEX13 Testis-expressed sequence 13 protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BXU2 Family The function of this family of proteins has not, as yet, been determined. However, members are thought to be encoded for by spermatogonially-expressed, germ-cell-specific genes [1]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 177 and 384 amino acids in length. There are two conserved sequence motifs: FIN and LAL. 27.00 27.00 82.70 79.20 21.80 20.20 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.24 0.71 -10.74 0.71 -4.64 12 77 2012-06-29 12:37:16 2012-06-29 13:37:16 1 2 22 0 44 57 0 147.00 52 44.30 NEW cs-DsoSGF+HucVlhFINEchu+pu+GPEFYL-NlSLSWEEVEDKL+sIL-DopVPpplKcACsWuoLALGVRFAtRQsQLQu+RVpWLpDhupLH+SAAhuLAS-LpcLpcQpEhEppEAAhQLp.spspLtEsp+ERDlhRh+lhpsEL .....s-DPuSGFRHucVltFINEchspps+GPEFYlpNhShSWcEVEDKLRuILpDopVPppsKcACsWuuLALGVRhApRQtpLQs+RVphLp-hsc.H+oAu.ALAS-LpcLppppEh-ppEAAhQLphspssLtcspcERDhLph+Lhph.................. 0 2 2 4 +15039 PF15187 Augurin Oesophageal cancer-related gene 4 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H1Z8 Family Augurin is alternatively named oesophageal cancer-related gene 4 protein. The function of this family of transmembrane proteins, is to induce the senescence of oligodendrocyte and neural precursor cells, characterised by G1 arrest, RB1 dephosphorylation and accelerated CCND1 and CCND3 proteasomal degradation [1]. Augurin has been found to stimulate the release of ACTH via the release of hypothalamic CRF [2]. This family of proteins is found in eukaryotes. Proteins in this family are typically 145 amino acids in length. 27.00 27.00 81.90 81.80 24.00 23.40 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.58 0.71 -4.22 9 52 2012-06-29 12:39:44 2012-06-29 13:39:44 1 1 39 0 31 47 0 112.90 68 82.25 NEW spLc+lLpKR-sst...sPu+ssVAVstuKAKEFLusL+RsKRslWDRSRPDVQQWIQQFMYMGFDEs+LEsDLSYWMDpuRuuDQG....RQHHYDENAsIGPRsP.....poaRHGAsVNYD.Y ....NKL+hhLQKREAs....sPoKspVAV..sEsKAKEFLuSL+RpKRQLWDRoRP-VQQWYQQFLYMGFDEAKFEDDloYWhN+sRsGc-YY.sYaQ+HYDEDuAIGPRsP.....toFRHGAuVNY.DDY 0 1 6 13 +15040 PF15188 CCDC-167 Coiled-coil domain-containing protein 167 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9P0B6 Family The function of this family of coiled-coil domains, has not, as yet, been determined. Members of this family remain uncharacterised. This family of proteins is found in eukaryotes. Proteins in this family are typically between and 103 amino acids in length. 27.00 27.00 28.20 27.60 26.50 25.50 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -9.76 0.72 -4.04 11 60 2012-06-29 12:41:00 2012-06-29 13:41:00 1 2 48 0 40 60 0 83.00 45 80.04 NEW hSVs+EIDphEEclspC+ppl-plEp+L+cpcLocEpRpslE.......cEhstlpppLps.EccLptL++......ENpKshhlusAlhhlphLlYs ............uVs.EIDGLEEKLupCR+cLEsVss+L+ptELSsEsRcuLE.....................cE+ssLhs+hpshE+ELphLRp......ENRKshhLusulhllhsLlY...... 0 8 11 21 +15041 PF15189 DUF4582 Domain of unknown function (DUF4582) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A2RUB1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 126 and 788 amino acids in length. In humans, it is encoded for on the chromosomal position, C17orf104. 27.00 27.00 34.80 28.40 19.60 24.50 hmmbuild -o /dev/null HMM SEED 172 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -10.69 0.71 -5.03 10 105 2012-06-29 12:45:26 2012-06-29 13:45:26 1 4 56 0 70 98 0 140.20 36 32.71 NEW pRousus-LHhpLEEChEQaRpLEKERKKTEA-LARpshGK+VS.SoNNhPlPRLsssPSRVDRLIVD.hREHAR..VlTLLuKMEpLRusslssslapALcpaLEAI+hlQspRpsEhhNhlpp.R...tth..h+ap-D+DlhsLAuAlpplspAsR+ARTAhWCuL.hTLshssssp .........p.G.hpplph+h.psh.Q.pth-.cE.c.pKpp.uLupNh.tctlp.SpsphslsRh.sus....sSRV.s.Rhhlsphpp.up..lhs.Lt+....cp...LtSs.sh.hphopshs+pl.slh.s.shhpp-hh...................................................................s.......................... 0 16 21 38 +15042 PF15190 DUF4583 Domain of unknown function (DUF4583) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q8N6I4 Family This family of proteins, also known as UPF0694, is found in eukaryotes. Proteins in this family are around 135 amino acids in length. In humans, it is found on the chromosomal position, C14orf109. 27.00 27.00 33.10 32.20 21.90 20.40 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.85 0.71 -10.86 0.71 -4.50 3 53 2012-06-29 12:59:36 2012-06-29 13:59:36 1 3 42 0 32 56 0 123.10 77 85.96 NEW MNFRQRMGWIGVGLYLLASsAAsYYVFEIS-TYNRLALEHIQp...........suppsPSuTTWppTLKTRLLuLPFWhWslIFLlPYLQVFLFLYSCTRADPKTVGYCIlPICLAVlCNRHQuFsKAS...NQISRLQLI .MNFRQRMGWIGVGLYLLASAAAFYYVFEINETYNRLALEHIQQ...........HPEEPhEGTTWTHSLKARLLSLPFWLWTlIFLlPYLQMFLFLYSCTRADPK.TVGYCIIPICLAVICNRHQAFVKAS...NQISRLQLI............................ 0 7 9 16 +15043 PF15191 Synaptonemal_3 Synaptonemal complex central element protein 3 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A1L190 Family \N 27.00 27.00 67.30 67.20 23.10 19.80 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.30 0.72 -4.25 2 24 2012-06-29 13:00:14 2012-06-29 14:00:14 1 1 22 0 14 28 0 84.90 78 98.03 NEW Mssu.s-.p.h-sh.p...pLNpcLEKhhEpMEclSVphohMsYDMVVhRTsPsLAESh+pLEstF.pCK.........pEhcpp.t-..p.o.p ...MADuDPsERNYDNMLKMLSDLNKDLEKLLEEMEKISVQATWMAYDMVVMRTNPTLAESMRRLEDAFLNCK.........EEMEKNWQELLpETKp........ 0 1 3 3 +15044 PF15192 TMEM213 TMEM213 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A2RRL7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 154 amino acids in length. The function of this family is unknown. 27.00 27.00 99.80 99.70 21.40 20.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -10.07 0.72 -3.97 3 30 2012-06-29 13:01:11 2012-06-29 14:01:11 1 1 27 0 20 29 0 78.70 73 69.80 NEW uAusSs.SssouuTsH+PcsGTLSs..Cs-VDFCPQAARCC+TGVDEYGWIAAAVGWSLWFLTLILLCVDKLMKLTPDEPKDLQA .............t..tAou.SssSolTsHHPDs.GTLEp..C.NVDFCPQAA+CC+sGVDEYGWIAAAVGWSLWFLTLILLCVDKLMKLTPDEPKDLpA. 0 1 1 4 +15045 PF15193 FAM24 FAM24 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NFZ4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 101 amino acids in length. There are two conserved sequence motifs: FDLRT and CLY. The function of this family is unknown. 27.00 27.00 37.60 37.60 17.10 15.90 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.64 0.72 -3.79 6 38 2012-06-29 13:01:55 2012-06-29 14:01:55 1 2 16 0 19 39 0 68.70 51 71.62 NEW LY.KIuKALKsA+.....-sEssh....DPsKssp-phIp.............AKsIssEoCtsLQCC-sCuhatsssSLPPChCshNEGL ...LYhKlu+ALKAAK.....-s-ssA....NPsKshh.sKs..p.............AcohssESCPuLQCC-sC+MYAsaDuLPPChCDlNEGL. 0 2 2 2 +15046 PF15194 TMEM191C TMEM191C family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NGB0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 302 amino acids in length. There are two conserved sequence motifs: QDC and RLF. The function of this family is unknown. 27.00 27.00 36.00 36.00 23.50 22.20 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.45 0.71 -4.23 3 23 2012-06-29 13:02:45 2012-06-29 14:02:45 1 2 17 0 11 31 0 105.90 59 74.86 NEW MEAAAALDAoRSGpEPCDSQLRRVQDCoGSLMEEVARADCEKRLFGGAGAGuIRLWALGALQTLLLLPLGFLuLPLLYLVLlcPsAlusGLtSLoS-AsLRRLRYTLSPLLELRARGLLPA .....................................................MEAAAtLDAhpuGpEPhDup.RtVQ.sstSLMEEVARADp.EhRLFGG.ssA.............hul.RhhsLuALQsLLhLPLhFLsLsLLahsLhcPsAlpthL.tpLsS-sshRRLRYTLSPLLELRApGLLPs............. 0 1 1 1 +15047 PF15195 TMEM210 TMEM210 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NLX4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 149 amino acids in length. The function of this family is unknown. 27.00 27.00 27.00 60.70 26.90 18.10 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.67 0.71 -10.62 0.71 -3.96 3 17 2012-06-29 13:03:35 2012-06-29 14:03:35 1 1 15 0 8 14 0 105.80 72 79.88 NEW TYCECSLGLSREALIALLVVLAGVSASCFCALVIVAIGVhRAKGETCPRHs-NRLVGsYGVQEDRMDLHTVaVESHLMDPDLEVSMMPPLE-QGLhsMThPl-P.sP....PPPPPLsPp TYCECSLGLSREALIALLVVLAGlSASCFCALVIVAlGVlRAKGETC.Ptth-sRLVtpFGVQEDpMDLHsVaVESpLMDsDLEVShMPPLE-puLhsIsM-sssEEP....PPPPP..P............ 0 1 1 1 +15048 PF15196 Harakiri Activator of apoptosis harakiri Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O00198 Family \N 27.00 27.00 126.40 126.10 19.90 19.10 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.41 0.72 -3.74 2 15 2012-06-29 13:04:25 2012-06-29 14:04:25 1 1 14 2 9 12 0 91.20 88 100.00 NEW MCPCPhHRGRGPPAVCuCusuR.GLR.uAAQlTAhRLpALGDELHpRsM.RRRARsRc.PhPuhLPsh...WPWLCAAAQVAALAAWLLGRRsh MCPCPLHRGRGPPAVCACSAGRLGLRSS.AAQLTAARLKALGDELHQRTMWRRRARSRRAPAPGA.......LPTY...WPWLCAAAQVAALAAWLLGRRNL 0 1 1 1 +15049 PF15197 Leukemia_assc_2 Leukemia-associated protein 2 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O43262 Family \N 27.00 27.00 53.80 89.80 19.50 18.50 hmmbuild -o /dev/null HMM SEED 53 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.99 0.72 -8.77 0.72 -4.30 6 9 2012-06-29 13:05:10 2012-06-29 14:05:10 1 1 7 0 1 6 0 53.30 76 83.92 NEW MKMSFERCTARNKMFVNSAFTK.VDNYCTFL.KKhhhKshF.lphhpKtKK.DLNF MKMSFERCTARNKMFVNSAFTK.VDNYCTFL.cKhhFKshF.lphhpKtKK.DLNF.. 0 1 1 1 +15050 PF15198 Dexa_ind Dexamethasone-induced Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95424 Family \N 27.00 27.00 138.80 138.60 22.60 16.60 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.43 0.72 -4.07 2 27 2012-06-29 13:05:54 2012-06-29 14:05:54 1 1 27 0 19 18 0 90.90 90 99.92 NEW Mssu.VhhpLDultsLls......LP.MaYlGLFFVNVLILYYAFLMEYIVLNVGlVFLPEDMDQALVDLGVLSDPuSh.YDsDoELDVF-GYLE ..M.GARVAAHLDuLGPLVsaVPPPLLPSMFYVGLFFVNVLILYYAFLMEYIVLNVGLVFLPEDMDQALVDLGVLSDPGSGLYDADSELDVFDGYLE 0 1 3 7 +15051 PF15199 DAOA D-amino acid oxidase activator Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P59103 Family \N 27.00 27.00 53.50 52.80 18.70 16.90 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.85 0.72 -3.35 2 15 2012-06-29 13:06:51 2012-06-29 14:06:51 1 1 6 0 2 10 0 67.10 71 58.39 NEW MAQRHLQRSLCPWVSYLPQPYAE......H.uKVhhstNhc...................Cpapp.pp..ssHh..ssTc.t ........MAQRHLQRSLCPWVSYLPQPYAELEEVSSHVGKVFMARNYE.....ASKDRRQPLERMWTCNYNQQKDQSCNHKEITSTKAE. 0 2 2 2 +15052 PF15200 KRTDAP Keratinocyte differentiation-associated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P60985 Family \N 27.00 27.00 76.00 68.30 25.50 18.40 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.29 0.72 -9.76 0.72 -4.21 3 29 2012-06-29 13:07:40 2012-06-29 14:07:40 1 1 23 0 16 20 0 76.80 71 78.34 NEW AALGoPp...EDTThuNYPoGTEGLNuEFLNFcKLQSAFKSD-FLNWHVLTDMFK+ALPFINWDFFPKVKGLRSAsPDSQ AuLGusE....EETTIsNYAutPEAFNspFLNlDKLRSAFKs-EFLNWHALFESIK+KLPFLNWDAFPKLKGLRSATPDAQ. 0 1 1 2 +15053 PF15201 Rod_cone_degen Progressive rod-cone degeneration Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q00LT1 Family This family of proteins is involved in vision [1]. 27.00 27.00 29.70 29.70 18.40 17.40 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.35 0.72 -8.61 0.72 -4.12 5 14 2012-06-29 13:13:08 2012-06-29 14:13:08 1 1 12 0 8 16 0 49.10 76 96.08 NEW MCTTLFLLSTLAMLWRRRFANRVQPEPScVDGAVVGSuSETDLQSSGRE+..uPVK MCTTLFLLSTLAMLWRRRFANRVQPEPSsVDG..AVhGSS.-sDhQSSGRcc..pPlK... 0 1 1 1 +15054 PF15202 Adipogenin Adipogenin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q0VDE8 Family This family of proteins is involved in the stimulation of adipocyte differentiation and development [1]. 27.00 27.00 36.10 81.80 26.30 22.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.48 0.72 -9.98 0.72 -4.11 2 20 2012-06-29 13:14:00 2012-06-29 14:14:00 1 1 14 0 8 23 0 73.80 79 66.89 NEW MKYPLhPLVNDLThSFLVFWhCLPVuLLL.LhIlWL+FLLSQDScEsDSslChsWEPWSKGPuE.shcGThpGQEcc+..W .MKYPLhPLVNDLTFSFLVFWhCLPVGLLLFLLIIWLRFLLSQDSEENDSsVChDWEPWSKGPAEFCWcuTLHGQEcERPC.h.... 0 2 2 2 +15055 PF15203 TMEM95 TMEM95 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q3KNT9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 102 and 231 amino acids in length. There is a conserved LGG sequence motif. The function of this family is unknown. 27.00 27.00 86.10 33.20 21.10 19.40 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.51 0.71 -10.91 0.71 -4.81 3 20 2012-06-29 13:14:47 2012-06-29 14:14:47 1 2 16 0 8 32 0 134.10 68 76.95 NEW CVFCRLPAHDLSGRLA+LCSQsEA+pKECGASssFsAFALDEVSMN+VTEKTHRVLRVMEIKcSlSSLPLYWpWLRKTKLPQYTREALCAPAC........RGSTTLYNCSTCcGTEVSCWP+KRCFP.......GSQDLW-A+ILLLsIFGtsLLLGuLSLLVEp+ ..CVFCRLPAHDLSGRLARLCSQMEAp.K.ECGASPDFSAFALDEVSMNKVTEKTHRVLRVM.EIKculSSLPsYWpWL+KTKLPcYTREALC..sPAC........RGSThLYNCSTCcGhEVSCWP+KRCFP.......GSpD.h.h+lh.hss.Gss...Gsl............................. 0 1 1 1 +15056 PF15204 KKLCAg1 Kita-kyushu lung cancer antigen 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5H943 Family This is a family of cancer antigens [1]. 27.00 27.00 37.10 87.60 18.90 17.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.14 0.72 -3.95 2 17 2012-06-29 13:15:39 2012-06-29 14:15:39 1 1 13 0 5 20 0 87.10 72 78.31 NEW QpssGEMSSNSTuLALVRP.SSoG.hpSNTDpN....LuVhsLShDIL.N.P+oIshQKRhLVNLphhp.KLsELEHhLl.KGhpGA.sphKS ....QRNTGEMSSNSTALALVRP.SSTGLINSNTDNN....LSV.cLSRDIL.NNFPHSIAMQKRILVNLphVE.KLsELEHhLVSKGhRGASsHRKS... 0 1 1 1 +15057 PF15205 PLAC9 Placenta-specific protein 9 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JTB6 Family This family of proteins was identified as being enriched in placenta [1]. 27.00 27.00 28.30 27.70 22.00 20.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.94 0.72 -9.45 0.72 -3.70 4 31 2012-06-29 13:16:29 2012-06-29 14:16:29 1 1 23 0 14 30 0 68.10 66 72.19 NEW AEP.sPutGDsutSsuCDRaMAVpcRLDVhEETVEKTVEHLEAEVKGLLG.LEELAWNLPPGPFSPhPDLLGDs ...........AEPhsPspGDsApSTsCDRHMAVQpRLDVhEEhVEKTV-HLtsEVKGLLGLLEELAWNLPPGPFSPsPDLLG-s......... 0 1 1 3 +15058 PF15206 FAM209 FAM209 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5JX69 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between and 170 amino acids in length. The function of this family is unknown. 27.00 27.00 123.80 123.60 21.10 20.50 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.79 0.71 -4.39 5 40 2012-06-29 13:17:50 2012-06-29 14:17:50 1 1 24 0 22 48 0 149.80 61 88.16 NEW FMFSSLREKsKEPQGKVPCGGHFRIRQNLPEHTQGWLGSKWLWLhFVVVLYVILKFRGDSEKNKEQoPPGLRGCoaRSPlRKpQNASPNKDYAFNTLTQLEMDLVKFVSKVRNLKVuMAT..uSNLRLQsLEuPADPaNNVTIYEIWGEEDS .FMFSSLREKspE....PQGKVPC....G.GHFRIRQNLPEHAQGWLGSKWLWLlFVVVLaVILKF....ptDu.EK..NKEQo.PsGLRGssFRSPLKKsQNASPsKDhsFNTLspLEh-LVKFVSKVRNLKsAMAT..uSNL+LppsEhPADPap.lTIYEIWGEEsS... 0 2 2 3 +15059 PF15207 TMEM240 TMEM240 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q5SV17 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 54 and 175 amino acids in length. The function of this family is unknown. 27.00 27.00 94.00 93.90 26.60 21.00 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.31 0.71 -11.32 0.71 -4.70 2 25 2012-06-29 13:18:38 2012-06-29 14:18:38 1 1 21 0 18 34 0 141.90 78 98.56 NEW Mph.ssThhhMlhGAslVhAIsCl.DMNALLDRFHNaILP+hRG.-RVCHCsCGRHHVcYVIPY-GstSLssuSts..susSVoKQEhDLhLGLLhGFCISWlLLWLDGAhHCAlRhWRuSRaYss...SWpWlsphCNLR-LRRRhQhRp....-suusNhVHl+QKLYHNGHPSPR+L .................lhGtshh.tIsClMDMNALLDRF.HNYILPHLRGEDRVCHCN.CG...RHHlHYVIPYDGDQSVVDuSENYFVTDNVTKQEIDLMLGLLLGFCISWFLVWMDGVLHCAVRAWRAuRRYDs.....SWoWLPKhCsLRELt+RsH...a...EEssGNMVHlKQKLYHNGHPSPRHL.... 0 1 4 9 +15060 PF15208 Rab15_effector Rab15 effector Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6BDI9 Family This family of proteins has a role in receptor recycling from the endocytic recycling compartment [1]. 27.00 27.00 78.90 78.60 17.90 16.80 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.77 0.70 -11.66 0.70 -4.91 2 26 2012-06-29 13:19:39 2012-06-29 14:19:39 1 1 25 0 16 26 0 232.30 61 97.83 NEW MGQKsSQQls.pDSpEV.uhCEVVStAIsHAAQKlKEYLGFE.PLSpLC.AusoLsElFLlHFVTFCQ-+GsDEWLTTTKMTKHQAhLFGADWIWTFWGs-KQIRLQlAVQTL+MuSLP.s-PKsC.....ESRuEE.ShK+uRFDKLpEFCNLlGEDCLGLFIIFGVPGcPKsIRGVVL-SV+pthhpuQLsGRKAVtQFlLETcDClSI+ELLGNCLSK+DGLp-hG+VYIpIL .......MGQKsSQQls.+DSpEl.slCEVVSpAlVHAAQKLKEYLGFEsP.S+LsPAuNTLNEIFLIHFITFCQEKGVDEWLTTTKMTKHQAhLFGADWIWTFW.GuDKQI+LQLAVQTLQM.....uShPP.sEucss.chsssES+....uE.EsSh+KoRF-KLEEFCsLIGEDCLGLFIIFGVPGKPKDIRGVVLDSVKsphs+upLPGtKAVtQFVL-TE-CVsI+ELLtNCLSKKDGL+EVGKVYIsIL........................ 0 1 2 5 +15061 PF15209 IL31 Interleukin 31 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6EBC2 Family \N 27.00 27.00 27.40 73.30 19.90 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.98 0.71 -10.63 0.71 -4.57 4 23 2012-06-29 13:20:31 2012-06-29 14:20:31 1 1 20 0 10 18 0 131.70 46 85.76 NEW opohshtt.hsppDlp+hl-.Lpp.S+sLhcDYpccE.oGlssspshpLPChS.Dpps.sNI..SuIhAYLcpl+sLScpoV.hsclIcpLsclph.ss.psNISVP..TD.oa-CKsFILTlLpQFSsCMspl.tp.Noss .........SHshPhthL.P.s.DlpKIlcELQsLSKhLLcDh.ccE...cGV.sSp.shpLPChosDuQsPsNIpSuAIhsYL+sI+p...Ls....s+oV.I-cIIEpLDKLh...FQcsPETNISVP..TD..oaEsKpFILTILQQFScCM-hshtuhsss.......... 0 1 1 1 +15062 PF15210 SFTA2 Surfactant-associated protein 2 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6UW10 Family \N 27.00 27.00 54.80 53.90 25.80 20.50 hmmbuild -o /dev/null HMM SEED 59 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.85 0.72 -4.39 4 17 2012-06-29 13:21:16 2012-06-29 14:21:16 1 1 15 0 6 19 0 57.40 70 70.04 NEW sGPGMTLQLKLKEoFLssSSYsSSFL-hLpKlCLLLHLPSGTNVTLHpAGS.HHVTCRs ......TGPGMTLQLKLKESFLsNSSY-SSFLELLEKLCLLLHLPSGTsVTLHHAtSpHHVsCps... 0 1 1 1 +15063 PF15211 CXCL17 VEGF co-regulated chemokine 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q6UXB2 Family \N 27.00 27.00 32.80 32.30 22.70 19.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.88 0.72 -10.31 0.72 -3.59 6 23 2012-06-29 13:22:01 2012-06-29 14:22:01 1 1 20 0 12 17 0 89.40 67 76.13 NEW SPNsGVARGHRDp+QAspRWLpEGGQECECKDWFLRAP+RKhhsV.GhP+KQCPCDHFKGphKKTR+Q+HH+.............Ksp+PSRsCQQF...L+pCp ..................SsNPGVARGHRDp+QAS+RWLQEGGQECECKDWFLRAP+RKlMT..VsGLPKKQCPCDHFKGsVKKTRHQ+HHR.............KPNKHSRACQQF...LKpCQ. 0 1 1 1 +15064 PF15212 SPATA19 Spermatogenesis-associated protein 19, mitochondrial Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q7Z5L4 Family \N 27.00 27.00 74.00 28.90 18.50 17.80 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.62 0.71 -4.10 3 31 2012-06-29 13:24:21 2012-06-29 14:24:21 1 2 22 0 17 29 0 101.30 63 85.84 NEW SSDIEVsESEAVSVVQHWLKKTEEEASRuIK.............EKMSTNsPPTHGQDIHVTRDVVKHHLSKSDLLANQSQEVLEERTRIQFIRWSHTRIFQVPSEsp-DlMRDRIEQVRRSISHLoD-SuQDhShRsSsSEC ...............SSDl-VlEoEAVSVlpHWLKK..............TEEEASQuIK.............EKMShssPPTHGpDlHVTRDVVKH+LSKoshhus.SQEVLEERTRIQFIRWSHTRIFQVPSEhhp-hhp-RIEQVRRShspl.s......t.pss..pC.... 0 2 2 4 +15065 PF15213 CDRT4 CMT1A duplicated region transcript 4 protein Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8N9R6 Family \N 27.00 27.00 82.10 81.90 20.40 19.70 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.85 0.71 -4.08 6 29 2012-06-29 13:25:57 2012-06-29 14:25:57 1 1 21 0 14 25 0 134.80 61 89.10 NEW cLTENIGLPlsLLEKHsPWPAYVTYhSPhVKRLIEKSKARDLEChpAlEcsp+su+QSKPSSlhQLKRRKSSKsSGphsLKDThSETMLSsWuuhSsssVuPohlPEPtpLHsDSREsPTuNYNKIIFuR+PhMRhLP ..............t.LTENhGLPhpLLEKHDPWPAYVTYTS.sVKRLIEKSKsRELEChpAlEEsp.hsSRQsKPSSlIQLKRRKSSKSSGcssa+DsLSEosLShWGsYSl.AhuPThlPEPT+lHoDSR-sPTpNYNKIIFuRKPhMRMLP.. 0 1 1 1 +15066 PF15214 PXT1 Peroxisomal testis-specific protein 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8NFP0 Family This family of proteins is testis-specific [1]. 27.00 27.00 28.40 48.10 20.30 18.30 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.28 0.72 -8.64 0.72 -3.71 2 16 2012-06-29 13:26:47 2012-06-29 14:26:47 1 1 14 0 6 16 0 50.90 72 56.96 NEW MQLRHIGDslsHRhlpEcLtQ-stDsLs.FVhhhFhRsQVLL+FFWNNHLL .MQLRHIGDSIDHRMVpEDLQ.QDGRDALs+FVhFFFRRVQVLLHFFWNNHLL... 0 1 1 1 +15067 PF15215 FDC-SP Follicular dendritic cell secreted peptide Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8NFU4 Family \N 27.00 27.00 75.20 75.00 24.20 24.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.42 0.72 -3.67 2 10 2012-06-29 13:27:30 2012-06-29 14:27:30 1 1 9 0 1 10 0 65.10 68 77.32 NEW hPVspDQEREKRSh..SDELspth.h.Pas.PFtshPPh...t.PWahh.a..Ph.lP..sPTT.hP FPVSQDQEREKRSISD..SDELuSGFhVFPYPYPFRPaPPIPaPRaPWFpRsF..PIPIPESsPTTPLP 0 1 1 1 +15068 PF15216 TSLP Thymic stromal lymphopoietin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q969D9 Family \N 27.00 27.00 27.90 30.90 20.20 16.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.85 0.71 -4.40 5 25 2012-06-29 13:28:18 2012-06-29 14:28:18 1 1 18 0 10 23 0 101.00 53 84.64 NEW YNFTNCNFE+IpcIYpslIF+DLpsYlNGhKSscFNphVsC-sRPuCLTKIEpaTFNPlsGCsSLAKKtFAp+TKAALssaCPGYSETQIN.uTQAM+KR...cVssNKCLcQVSQLLcLWRpFsR.....p .....YsFosCsFpKIpttY.psI.psLhpYMsGsKSopFNpsl.Cps+spCLscIpphTFsPs.tCsSLucch...FAh+TKAsLslaCPGYSETQIN.uTQsM+KRhcccVssNKCLEQVSQL.GLWRcF.R..s.... 0 1 1 1 +15069 PF15217 TSC21 TSC21 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q96LM6 Family This family of proteins is testis-specific [1]. 27.00 27.00 176.40 176.20 17.90 17.10 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.28 0.71 -4.60 5 23 2012-06-29 13:28:57 2012-06-29 14:28:57 1 1 21 0 15 23 0 175.40 72 99.63 NEW MAGVVYPpQAPVDLDIYQSSYMVDYKPYGKHKYSRVTPQEQAKLDAQLRDKEFYRPlPsPNPKLEDGYPAFRRPHMTA+DLGlPGFFPPQ-HVTTsEDEsRFTSTC+olYPASHsLYLAQGDPNRl+QSADFPCLLEPERQPAsEVGKGYFLLPGCsCsYHppVKVPILNRWGPLMPFYQ .........MAGVhYPtQsPVDLDIYQSSYMVDYpPYGKHKYSRVTPQEQAKLDAQLR-KEFYRPlPsPNPKLpDGYPAFKRPHMTAKDLGlPGFFPsQ..-+sATtEDEs+FTSTC+hsYPASHsLaLAQGDPNplpQSADFPCLLEPE+QPAuEhGKGYLLLPGCsCsHHp.hVKVPILNRWGPLMPFYQ. 0 1 1 2 +15070 PF15218 SPATA25 Spermatogenesis-associated protein 25 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9BR10 Family This family of proteins may be involved in spermatogenesis [1]. 27.00 27.00 127.00 126.90 17.50 16.60 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.59 0.70 -4.92 3 28 2012-06-29 13:29:44 2012-06-29 14:29:44 1 2 25 0 16 25 0 204.40 78 100.09 NEW MSYFsSPQTHPGLLPSGQGGAASPGSSLGLYSPAEPVlVASGGQGPLSQKAEQVTPVAQAWGPALAV.EARGCPGGVSWEPPRRKEYNRYCHKhPsARQLESLGWEDuCSRSRAPaLGGPSRPpPLLLCGLSPGALPhPSEAGGKEAuSQPDICILTLAMMIAGIPTVPVPGLREEDLIRAAQAFMMAHPEPEGAVEGAQWpQ..A+oHhASGPMALVRSRRGQPPGSCL .MSYFhoPQTH.G.LPSGQGGAASPG.SLGLhSPsEPVVVASGGhGPLSQKAEQVsPuAQAWGPALAhPpARGCPGGsSWETL..RKEYu.RYCHKFPasRQ.ESLGW-DGsSRSRAP.cL.....GG.....PSRPtPLLLCGLSPGVLPhPSEAsGKEAuSQPDICILTLAMMIAGIPTVPVPGLREEDLIRAAQAFMMAHPEPEGAVEGspWEQ..A+A..HTASGpMPLVRS+RGQPPGSCL.. 0 1 1 2 +15071 PF15219 TEX12 Testis-expressed 12 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9BXU0 Family \N 27.00 27.00 31.30 30.80 23.10 20.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.19 0.72 -4.03 3 27 2012-06-29 13:30:40 2012-06-29 14:30:40 1 1 24 0 16 28 0 96.60 76 82.11 NEW Ms-SP..QlSSLGK.....SDSShLESSGLFYK-EuLEKDLSDMSKEINLMLSTYAKVLSERAAVDASYIDEIDGLFKEANsIENFLlQKREhLRQRFTVIoNTLH+ ........................PDSP..QLSSLGK.....SDSSFSEsS.GLFYKDEuLEKDLNDhSKEINLMLSTYAKlLSERAAVDASYIDEIDtLFKEANsIENFLIQKREhLRQRFTVIANTLHR... 0 2 2 2 +15072 PF15220 HILPDA Hypoxia-inducible lipid droplet-associated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9Y5L2 Family This family of proteins stimulate intracellular lipid accumulation, function as autocrine growth factors and enhance cell growth [1-2]. 27.00 27.00 28.90 43.00 19.40 18.80 hmmbuild -o /dev/null HMM SEED 63 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.55 0.72 -8.94 0.72 -3.99 3 15 2012-06-29 13:35:03 2012-06-29 14:35:03 1 1 15 0 6 18 0 62.40 76 95.12 NEW MKaVLNLYLLGVVLTLLSIFVRVMESLGGLLESPSPGSSWTTRGQLANTEPTKGLPDHPSRGV MKHlLNLYLLGVVLTLLSIFVRVMESLEGLLESPSPGoSWTTRuQLANTEPsKGLPDHPSRuM........................ 0 1 1 1 +15073 PF15221 LEP503 Lens epithelial cell protein LEP503 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q9Y5L5 Family This protein may be involved in lens epithelial cell differentiation [1-2]. 27.00 27.00 27.80 44.40 21.80 21.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -9.01 0.72 -4.27 3 18 2012-06-29 13:35:43 2012-06-29 14:35:43 1 1 16 0 10 19 0 61.10 81 92.36 NEW MQPpTQPLAQALPFSLRcALpDTGLRVPVIKMGTGWEGhQRTLKEVAYILLCCWCIKELLD MQPRTQPLAQsLPFSLtGALRDTGLRVPVIK..M.GTGWEGhQRTLKEVAYILLCCWCIKELLD 0 1 2 2 +15074 PF15222 KAR Kidney androgen-regulated Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P61109 Family The function of this family is unknown. 27.00 27.00 171.80 171.60 22.00 19.10 hmmbuild -o /dev/null HMM SEED 103 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.30 0.72 -3.94 2 4 2012-06-29 13:37:42 2012-06-29 14:37:42 1 1 3 0 2 3 0 102.80 75 85.09 NEW FP..-lsSINcELQsSIhDlLNSs.D.QLuSYcsopuP.ED.T.p-.sTD...hMphT.utshQ.SEhSsssETVSSuFLEEhTEso-.TVchPLA.ssshSsTS FPLSELVSINKELQNSIIDLLNSVFD.QLGSYRGTKAPLEDYTDDDLSTDSEQIMDFTPAANKQNSEFSTDVETVSSGFLEEFTENTDITVKIPLA.GNPVSPTS 0 1 1 1 +15075 PF15223 DUF4584 Domain of unknown function (DUF4584) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A6NHQ4 Family This family of proteins is found in eukaryotes. Proteins in this family are approximately 835 amino acids in length. The family is found in association with Pfam:PF02437. 27.00 27.00 29.50 29.50 21.00 26.20 hmmbuild -o /dev/null HMM SEED 397 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.55 0.70 -5.20 8 68 2012-06-29 13:50:24 2012-06-29 14:50:24 1 3 37 0 39 59 0 321.00 50 52.29 NEW pPPlhhpssF+hpssssssspps....tssssGhDsppp.Kspss-ph.s.pthssuusssssPsSl..s.s.hppp............t.pssspRTD..........ss......oPsPptspsF.Ppp+h.......Pupsspsssus.ss.ss.pp............s.ssp.t+c.+hs.s.phsoslc+hp.-.........PpssucpsAcoPssh..s.s.PFpL+Nl+lKlE-....-EaEhthpssclpCcscsuctp..s...hKptD......p......stpcssu....s.s.......sLcsPss--Gtp+sss+..Ksh+s.l.tp+...t...ssA+sstKssRps+ssu+ossspto.t.............tht.h......sRRK+...AssssuPsKpsFSLMANFPsPPoLllGpDGDLsPAYSLNop+sspPPP.uHPlW+WQlGGsslP.PPu........pKhRKh ..............................................................s.ssh.hpssF.Yp.tsttt.pt........p.tt.....pp.ttpp...h.s..thsshss.h.p.suhssshschppc....t.....c.p..sssppTD..........................ss.t.ss.SPusppspsF.s.pRh.......htpspKC.psh.ss.sspsp...................................................................p.t+.s+h..s.ch......sosl.p.ps-s.................................tstssttsts..........t.PF.LHNlKIKlE-su..-EYE.ph.....spplKCcssssctph.s.st.cpp-hhh..thc............sppcssuh.s......hs.....ps.......sLsoPps--GEhK.sA+VpKNaRoLVLGK+..h.ps..sPsKsshKssRSPRPsuKo.............popEu.oLc.............shssh......sRRK+..sAuNs..sSssKpsFshMANFPCPPSLllGcDGDLhPAYSLNop+DSpPP..AHPlW+WQlGGsAlP.P.Pu........pKFRKa........ 0 4 7 14 +15076 PF15224 SCRG1 Scrapie-responsive protein 1 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:O75711 Family This protein family has an important function in acting against the prion protein, Scrapie [1,2].This family of proteins is found in eukaryotes. Proteins in this family are approximately 98 amino acids in length. 27.00 27.00 126.90 126.70 20.80 19.30 hmmbuild -o /dev/null HMM SEED 78 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.35 0.72 -9.92 0.72 -4.16 2 33 2012-06-29 13:53:27 2012-06-29 14:53:27 1 1 27 0 19 25 0 76.80 81 79.22 NEW hPupR.SCY++IL+s+sCHSlPEGhAsLp+lD.slQDHFW-GcGCEhlCYCNFpELLCCPK-lFFGPKISFVIPCNsc .MPuNRLSCYRKILKD+NCHNLPEGVADLTpIDV.NVQDHFWDGKGCEMICYCNFSELLCCPKDlFFGPKISFVIPCNNp....... 0 1 2 5 +15077 PF15225 IL32 Interleukin 32 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P24001 Family \N 27.00 27.00 69.50 63.00 22.50 18.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.71 0.72 -10.45 0.72 -4.09 2 18 2012-06-29 14:50:41 2012-06-29 15:50:41 1 1 7 0 1 29 0 92.30 85 50.78 NEW E.TPLL.c.RptLRsRspRSsVPslED.uhE..-PtESFhD+shRhFQthLppLQphWpuVLAWV+chVs....ALspAVpAlWp.FQsFCs.luplhhSuhQu .ELTPLLEKERDGLRCRGNRSPVPDVEDPATE..EPGESFCDKVMRWFQAMLQRLQTWWHGVLAWVKEKVV....ALVHAVQALWKQFQSFCCSLSELFMSSFQS... 0 1 1 1 +15078 PF15226 HPIP HCF-1 beta-propeller-interacting protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NWW0 Family HPIP is a small cellular polypeptide that binds to the beta-propeller domain of HCF-1. HPIP regulates HCF-1 activity by modulating its subcellular localisation. HCF-1 is a cellular protein required by VP16 to activate the herpes simplex virus- immediate-early genes. VP16 is a component of the viral tegument and, after release into the cell, binds to HCF-1 and translocates to the nucleus to form a complex with the POU domain protein Oct-1 and a VP16-responsive DNA sequence. HPIP-mediated export may provide the pool of cytoplasmic HCF-1 required for import of virion-derived VP16 into the nucleus [1]. 22.10 22.10 24.20 22.10 19.20 17.80 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.66 0.71 -4.05 9 35 2012-06-29 14:59:46 2012-06-29 15:59:46 1 1 28 0 17 37 0 118.50 66 81.84 NEW ILQQPLERGP.GtAQRDPRAAoGsohGLDs...................REPLRKQFLSEENMATHFSRLSLHNDHPYCSPPhsFPP.ALPPLRSPCSELLLWRYPGsLIPEALRLLRLGDTPoPaYPAoPAG- ......................................ILQQPLERGP.GtsQR.PRAA.GsotGLDAs........................pEPLRKQFLSEENMATHFScLSLHNDHPYCSP..PhsFP...P.ALPPLRSPCSELLLWRYPGsLIPEALRLLRLGDTPoP.YPAoPAGD..................... 1 3 3 5 +15080 PF15227 zf-C3HC4_4 zinc finger of C3HC4-type, RING Coggill P pcc manual Domain This is a family of primate-specific Ret finger protein-like (RFPL) zinc-fingers of the C3HC4 type. Ret finger protein-like proteins are primate-specific target genes of Pax6, a key transcription factor for pancreas, eye and neocortex development [1]. This domain is likely to be DNA-binding [2]. This zinc-finger domain together with the RDM domain, Pfam:PF11002, forms a large zinc-finger structure of the RING/U-Box superfamily. RING-containing proteins are known to exert an E3 ubiquitin protein ligase activity with the zinc-finger structure being mandatory for binding to the E2 ubiquitin-conjugating enzyme [3]. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 42 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.27 0.72 -8.66 0.72 -4.08 13 3078 2012-10-03 15:03:13 2012-08-03 15:33:49 1 84 232 5 1547 3331 31 42.10 42 9.14 NEW CPICh-YLccPsolpCGHsFChsCIsshpc-scut....hhCPhC .....CsICL....-.hh....p..-.....PV.o.l.s.C.GHsFCpsC.l....p...p...h.......a...p...p.....t....p...t.t..............hsCP.C.................... 0 267 479 886 +15081 PF15228 DAP Death-associated protein Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A0PJW8 Family \N 27.00 27.00 31.40 30.90 26.20 26.20 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.84 0.72 -10.73 0.72 -3.26 49 159 2012-08-06 10:09:36 2012-08-06 11:09:36 1 2 102 0 88 149 0 90.50 37 87.26 NEW KuGHPPAVK.AGGMRIs.p++.........t..pscpcp.psc.sppcs.hpphssss.h.......lhloGslsKts+DFP...spAspshH.p....KPpPul-ptsss...pth....h...............pQPRK ....................KuGHPPA...VK.AGGMRIs.pKp..........sscpcpppp.ttppp..ps.ssss.h.......shloGslsKhs+DFP...ssuspsAH.p....KPpPul-Khsss....cp....I.....QQPRK......................................................... 0 22 29 56 +15082 PF15229 POM121 POM121 family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A6NNC1 Family \N 27.00 27.00 27.50 27.20 26.70 26.80 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.76 0.70 -11.36 0.70 -4.93 32 248 2012-08-06 10:20:27 2012-08-06 11:20:27 1 5 42 0 92 193 0 179.20 33 32.00 NEW L+Es.t++ss.ccE-.hhs-u.......-s+RRps-ostpu.SAFcPLhssGs.uSFVP+PGsLKRulpspsS-cshsK.RSpsSShSSh..ssh.sthP..opRNAIoSSYSSopGhsphh..KRs.........suoopsQhPcpPsKKtpcc...tpp..s...ssPhhosp..ttt............s..p..s..sussosuuStp.++.....RKlPLL..ssppG-.LsLPPPPQLGasVTsEDL.-LEKKAulphhNpsLEs .............................................................................................................p...t...p.c-.h..pt.......psp+p..psstt.h.Ss.cPL.tpGs.sshhspP..tsLptsh.pspp.o.-.p.sp.+upsS.hSSh............spRNAIs..SSaSSotGhsthh..+Rps...............Puoupsphs.psuKphpcc...............ss.hssst...ptptc+......ss.sshpp.s.tsu.s.pst..uSt..p+......+.K..h....Ll..........pttt-.l.LPPPspLGa.lstcDl.sht+cuthph.pphh................................................. 0 25 28 36 +15083 PF15230 SRRM_C Serine/arginine repetitive matrix protein C-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A7MD48 Family This domain is found near to the C-terminus of Serine/arginine repetitive matrix proteins 3 and 4. 27.00 27.00 30.50 30.50 24.00 24.00 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.03 0.72 -9.59 0.72 -3.89 14 73 2012-08-06 10:34:30 2012-08-06 11:34:30 1 2 35 0 39 59 0 67.70 64 12.36 NEW hSt.p+p...sRE+-pcscspc.pcsctsRtRRRpRSYSPl..RKRRRDSPSahEsRRIT.......SA......RKRPIPYYRP ......................................hS+ao.....uRERDschpp+c.pppc+pR.ARRRRRSYSPh..RKRRRDSPSHLEARRIT..........................SA...RKRPIPYYRP........... 0 3 5 16 +15084 PF15231 VCX_VCY Variable charge X/Y family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O14598 Family The variable charge X/Y (VCX/VCY) family of proteins has members on the Human X and Y chromosomes, is expressed in male germ calls and may play a role in spermatogenesis or in sex ratio distortion [1]. 27.00 27.00 43.00 28.30 26.50 26.20 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.17 0.71 -3.80 5 104 2012-08-06 10:46:14 2012-08-06 11:46:14 1 6 8 0 12 91 0 92.90 61 104.49 NEW MAPKhRASGPPAKAKETRKRKSSSQsSPSoPKK....sPKlAKKGKAuRGGRGGKKRAA.cKM..AAVsAPEAGSGPAPPGPScPPSQELPQHEL.P.PEEPVSEGTQPDPLSQETQL....................EDPLSQETQlEEPLS-tt....lpps.p..opl.p.........-PLS .............................................................................................................................-PLSQEoplEEPLSQEoplEEPLSpEop.lE-P.pQpoph.................................. 0 12 12 12 +15085 PF15232 DUF4585 Domain of unknown function (DUF4585) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:E7EW31 Family The function of this protein domain family is yet to be characterised. It is putatively thought to lie in the C-terminal domain of the DNA nucleotide repair protein, Xeroderma pigmentosa complementation group A (XPA). The function of XPA is to bind to DNA and repair any mismatched base pairs. This domain family is often found in eukaryotes, and is approximately 70 amino acids in length. There is a conserved DPE sequence motif. In humans, this protein is encoded for in the chromosomal position, Chromosome 5 open reading frame 65. Mutations in the gene lead to myelodysplastic syndromes, where there is inefficient stem cell production in the bone marrow. This suggests that the protein may have a role in forming blood cells [1]. 27.00 27.00 36.70 39.10 22.90 18.70 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.23 0.72 -9.68 0.72 -4.12 13 64 2012-08-06 11:01:28 2012-08-06 12:01:28 1 2 28 0 39 60 0 71.60 48 6.08 NEW pssassTQ+KlL.DPcSGcYalVDhPl..Qs+hKphaDPETGpYVcVslPsSttu..tssss.shs.uPhsLhPuhaPs ...p.shstTQtKlLlDPpoGpYYlVDsPl..QPph+pLFDPETGQYV-VslPs.sp.s...shssh.hs.sPlALuPGhYss......... 0 3 9 13 +15086 PF15233 SYCE1 Synaptonemal complex central element protein 1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:A8MT33 Family This family of proteins includes synaptonemal complex central element protein 1, a component of the synaptonemal complex involved in meiosis, and synaptonemal complex central element protein 1-like, which may be involved in meiosis [1-2]. 27.00 27.00 27.20 27.20 25.20 25.20 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.83 0.71 -4.23 17 64 2012-08-06 11:54:32 2012-08-06 12:54:32 1 2 29 0 32 68 0 134.00 58 50.35 NEW tGSLEPpIE-LIsRIN-LQQ...AKKKusEELGEspslh-ALp+ELDSLsuEKlHLEElLs..KKQEsLR.ILQLHCQ-KEoEuphp............................RhcF...Ep.QLEpLhpp...........HKcL......WEF+h.sppLupEIssl-su..KEQLLpE .....................................GSLEPplEsLINRINElQQ...AKKKusEELGEApslhEALp+ELDSLpGEKV.+LcElLs..KKQEsLR.ILpLHCQEK.......ESEApRp..psh...h.....................RLsFEE.QLE-LMGQ...........HKDL......W-FHh.sc+LA+EIssL-SS..KEQLLpE....................... 0 2 3 5 +15087 PF15234 LAT Linker for activation of T-cells Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O43561 Family \N 27.00 27.00 28.90 36.50 23.70 23.70 hmmbuild -o /dev/null HMM SEED 230 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.73 0.70 -4.88 10 45 2012-08-06 12:03:20 2012-08-06 13:03:20 1 2 25 0 17 36 0 221.60 68 95.17 NEW MEsslLl.PssLGLLLLPLLAVLLhALCVRCRELPG..SYDSA..........ooDSLaPcSIlIKpPt...TluPWPPAs.Ys.VTSaPPLSQPDLLPIPRSPQPPGGSHRMPSSRQDSDG.ANSVASYENE...........................EPACEDsDEDEDEEDYHNE...GYLlVLPDSoPATSou....VPs.APVPSNPGLRDSAFSMESG-DYVNVPESEESA-ASLDGSREYVNVSQELQP...sARTEPATloSQ.......ps---EEEsAPDYENL..QE ...........MEtshLl.PslLGLLLLPlLAh.LhALCV+C+cLPG..SYDSs..........SSDSLYPRu.I.lK+Pp...TlsPWPPAh..P.VTSYPPLSQPDLLPIPRSPQPLGGSHRhPSSRpDSDG.ANSVASYENE..............................................EP.ACED.-tD..EDEDDYpN....GYLVVLPDSoPAoSou....ssu.APs.SsPGlRDSAFSMESh-DYVNVPEStESA-ASLDGSREYVNVSQELpP...sA+TEPAslsSQ.......Et.-psE-EtAPDYENL..QE........... 0 1 1 2 +15088 PF15235 GRIN_C G protein-regulated inducer of neurite outgrowth C-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O60269 Family This represents the C-terminus of the G protein-regulated inducer of neurite outgrowth proteins [1]. 27.00 27.00 63.80 59.70 20.30 19.30 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.46 0.71 -11.10 0.71 -4.09 16 86 2012-08-06 12:19:08 2012-08-06 13:19:08 1 1 34 0 51 88 0 133.70 48 21.15 NEW +SVATuPhpss.............s...pststsa...............PEVplc.........s.pcspp.PVR-VsWDEcGMTWEVYGASlDsEVLGhAIQKHLEhQIEpa..QhtP.....st.t.pts.....................ppssspppt+Rps...FRshhpslR+PsCCsRuusss..E ...............................................................+uVtsuPhhst.................sutsthh...............Ppsphc.............s..ctspPVRDVsWDEcGMTWEVYGASlDsEVLGlAIQKHLEpQIcEatcphts.....t.....ts........................................................p.spssspttt+Rt.sh.FRuhlQslRRPsCCsRuusu.............................................................................. 0 4 9 16 +15089 PF15236 CCDC66 Coiled-coil domain-containing protein 66 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A2RUB6 Family This protein family, named Coiled-coil domain-containing protein 66 (CCDC) refers to a protein domain found in eukaryotes, and is approximately 160 amino acids in length. CCDC66 protein is detected mainly in the inner segments of photoreceptors in many vertebrates including mice and humans. It has been found in dogs, that a mutation in the CCDC66 gene causes generalized progressive retinal atrophy (gPRA). This shows that the protein encoded for by this gene is vital for healthy vision and guards against photoreceptor cell degeneration. The structure of CCDC66 proteins includes a heptad repeat pattern which contains at least one coiled-coil domain. There are at least two or more alpha-helices which form a cable-like structure [1]. 27.00 27.00 27.00 27.40 26.60 26.60 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.37 0.71 -11.18 0.71 -4.61 19 118 2012-08-06 12:25:34 2012-08-06 13:25:34 1 2 69 0 69 126 0 148.20 35 16.22 NEW sssssttspsspptpsspsspssshps.p....p.saLRuhoA.LLDsup.....lpER-cRRpKthEaQcAIttQlEE+c+hKphEcpp+ppEEppEEpRltRE+pphpcpaEc-hhcp+pKEEhhppKTptLhpshp+ApEhApc.K.pc......................pRh+cltpcsc-hpph ..............................................................p.......tptp......saLRu.ss..lDstp.....ltE...R-c+RpKQhEappAlptQlEEK+p++phEcEp++pEEpcEE..pRLAc-Rp.chQcp..aEE-hh.+p+pKEEh......phKspcL....hpshpcApc.Ap+hK.pc................................................pRh+chtpctpt....h....................................................... 0 20 24 41 +15090 PF15237 PTRF_SDPR PTRF/SDPR family Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:O95810 Family This family of proteins includes muscle-related coiled-coil protein (MURC), protein kinase C delta-binding protein (PRKCDBP), polymerase I and transcript release factor (PTRF) and serum deprivation-response protein (SDPR). MURC activates the Rho/ROCK pathway [1]. PRKCDBP appears to act as an immune potentiator [2]. PTRF is involved in caveolae formation and function [3]. SDPR is involved in the targetting of protein kinase Calpha to caveolae [4]. 27.00 27.00 28.60 28.10 24.40 24.40 hmmbuild -o /dev/null HMM SEED 246 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.65 0.70 -4.97 30 215 2012-08-06 12:39:37 2012-08-06 13:39:37 1 2 43 0 103 180 0 216.10 41 68.95 NEW QlsAlTVloLLDKlsshlDsVQpsQpphEpRQt-hEsu....V+sIQu-lsKLo+uHssTSsTVsKLLEKsRKVSsplKsVRsRl-+QusQVKKLEsNcscLL+Rs+F+VlIaQ-EsElPuplsspp.psh......................t.t..t.....pch.tshcL.SSDEE..h.l.............................p.hEESRAcRlKRSuL++VDsLKKAFS.........................................................................Rp..................sl-KKhs+luT+I.VsPERREKI+......cKShpss+t.stc..KposhKlsPh ...........QlsulhVloLLDKlhshlDplQtsQtphEpRQ..tphEsu....VpuIQs-LsKLo+uHssTSsTVsKLLEKsRKVSspsKsV+tplE+QssQlK+LEsNcscLL+RppF+VlIa.Q-EsclPupl.hpt.t.h............................t..s.ptpt.cct.tshpLSSD--.h.l...............................-ESRAp+l+RSuhc+VDslKKAFS....................................Rp..................sl-+Khs+lsT+l.VssERRE+h+........pShp..+...tp..Kps..pssP......................................................... 0 4 15 39 +15091 PF15238 FAM181 FAM181 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:A6NEQ2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 256 and 426 amino acids in length. 29.00 29.00 29.00 29.00 28.90 28.90 hmmbuild -o /dev/null HMM SEED 297 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.20 0.70 -4.35 20 80 2012-08-06 12:48:17 2012-08-06 13:48:17 1 5 39 0 48 70 0 248.20 35 73.36 NEW tsscpss+sLLsFls.ASSsIKhALDKsu.s+RpVsHRKYLQKQlKRhSthhsthP.t....Pspsspsps...+...tstststp...pspssspppustus.........shp.........pcSLuu.......................................t.h.tt.s.s...scp.VPLR+RsLPsSFapEP.....psspsh.....sussssslss...............scsschh-lLGP-..................sshsscps.h..ss...t...olss+..ssshss.....sPh.acupslhtu...hstsh....s.....sssshssls.hp..ss.s...........sscshp.sttss..up.hh.pssl.cs.sspsu ................................hsscpss+.LLsFls.ASSsIKhALDKsu.s....+RpVsHRKYLQKQlKRhSthhuthP.s............Pspssts.h.......+........us.ts........s.ht..h.s.ssspp.ustss...........tsshp.........pcsLut......................................tt..ttpsstu....stpVPhRpRpLPsSFapEP......p.spuh......sG.psuLss........cutcsschhE.LGP-...................ts..tp-shhh.ss.......uhssh..sss.ph.....pPh.acspshhsG.....s...sh......s.s.hssLsh.+..ss.............shc.ha.sttss..up......................................................................................................................................................................... 0 12 15 25 +15092 PF15239 DUF4586 Domain of unknown function (DUF4586) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:A7E2U8 Family This protein family, refers to a domain of unknown function. The precise role of this protein domain remains to be elucidated. This family of proteins is found in eukaryotes and are typically between 256 and 320 amino acids in length. There is a single completely conserved residue, phenylalanine (F), that may be functionally important. In humans, the protein is found in the position, chromosome 4 open reading frame 47. 27.00 27.00 27.00 31.40 26.80 26.20 hmmbuild -o /dev/null HMM SEED 302 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -12.38 0.70 -4.87 41 143 2012-08-06 12:48:46 2012-08-06 13:48:46 1 5 80 0 94 135 0 260.60 27 87.27 NEW csch-RhGLFS-.sa......lolGDpYhp.tpp......scssscsKpMLsuusKpts.shpsuYFs.pahplhpGEsY........tD.s+hcRphclcptKK..slu.KsFhPusGtKp.s.GhGsaaGsass.h...h........hss.p+spcthh.sts+........NhhTNPuK+Go.aGYsslTlup...sass..DsY-pt+chtccctpc+cphh......cG.ssF+h..shaspcaFDss.Pa.........h........pppsl..sPhcp.t.ttpphstP........FKPosP.....sc..t......ut+sGsFss.aPpapsDPhs.chpp..t..hpspp..............tt+hF+PssssK.o........pPssSIhstslp..+ph .....................................................................hhGlFSp.sa......lsl.G.....DpY.p.htt.......tstsps+phls.s...ssKp.h...phpsuaF-.pah.plhpG-sY........hs..........p..hp+ptthcptKK.........sls.psFhPss..s.Kcss.GhGsaYGshst.h...h..............hsstt+spcthh....s.s+..............NlhTsPuK+Go.YGY..ss...l..slup...pat..........-.Y-ttc...cpppppccphh......hu..ssF+....shhstt...hFDts.sh........h.............ppsh....s.pp.....ph..ht.s........a+P.pP.....s...t...........uhhtGshs...aPpa.tDPh.ht.tp........pt...............tc.ahPssssp.s........h...Sl..........h................................................................................. 0 50 62 78 +15093 PF15240 Pro-rich Proline-rich Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P02810 Family This family includes several eukaryotic proline-rich proteins. 27.00 27.00 28.10 27.00 26.50 26.90 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.38 0.71 -12.12 0.71 -4.18 16 217 2012-08-06 12:57:24 2012-08-06 13:57:24 1 10 16 0 45 325 0 107.60 39 99.71 NEW MLlILLoVALLALSSAQs.sE-VupE-ssSllu.....tt...........tsps.t.spp.Qt...........PP.GG..spsP...........................................................................................P.sGGPQp.pPPQsGp....................PpGPPP.GG...........................................ts.......tp.QGPPPQtGsp.....tPP.Ps......................pPQGPP....................t.........................s.sGss...................QtPPP.........PPsGpPQG..PP........................tPPQsG.pPptPPQ .................................................................................................................................................................................................................................................................s..t..tsQt..PPpsGp..............pGPP.PQGs.sp.......t..sspP.....Gp.QGPPPQGGsQ....tPP.ss.........pPpG.PPsQt...s......................................................................................................................................................... 0 21 21 21 +15094 PF15241 Cylicin_N Cylicin N-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P35663 Family This is the N-terminus of cylicin proteins, which may play a role in spermatid differentiation [1]. 27.00 27.00 31.90 30.20 25.30 25.30 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.95 0.72 -10.40 0.72 -4.18 22 69 2012-08-06 13:08:30 2012-08-06 14:08:30 1 1 23 0 27 48 0 108.90 52 24.53 NEW QclNhsTYDNhIPlSE.S+KSWNQpHFuLsFPKPPpPG+K+RS+PSpLp...sVP....+p-ccKlccspKs..lWh++SLh+I.pRPSlYLAsRRQs.Ph+.shsspscscpApsc ...................cVNhtsYDN.IPlSE.S+KSWNQpHFALsFPKP.pPGpK+RS+PSplp..TVs....hhDccKLcpspKs..lWh++SLh+I.pRPSlYLAAR+Qs.Ph+.sassKscsKpAE....................... 0 2 2 3 +15095 PF15242 FAM53 Family of FAM53 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q14153 Family The FAM53 protein family refers to a family of proteins, which bind to a transcriptional regulator that modulates cell proliferation [1]. It is known to be highly important in neural tube development [2]. It is found in eukaryotes and is typically between 303 and 413 amino acids in length. 27.00 27.00 28.70 27.60 24.00 24.00 hmmbuild -o /dev/null HMM SEED 314 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.34 0.70 -12.39 0.70 -4.88 20 155 2012-08-06 13:28:35 2012-08-06 14:28:35 1 2 41 0 67 126 0 256.80 38 76.63 NEW MVhllTcpLpppuh...DDlss+ohshs..apsccho.pussLFshsls..-s..pWpslstsssl.....ptcss...usShtshhssh.t......h..tuhpW.........p.-uss.ssslouLlpcLSLs....-ssu.....sPsAPPSKRpCRSLShsDELupC.RSsWRPtGSKVWTsVcKRRCpSGGSlphts.......p.tsG.sshQRSoShSLPupus......h....hspt..h..shsutss.sssstuupsss......t.t.hs.pRpLSLSpEpl.shsc....suAs..SoPsSTPELuRRsGt.......LsRSRSQPC..VLsc+KsuhKRRRsEDs+hpRPSLD..hsKMTQ .......................................................................................MlhllocpLpppsh...Dpltpptht.t........scphs.pussLhshtl....-s.s.W+sLs.ttssh....p.pts...s.sh...................u..W.........p.puss.sst.pshhpsLshp............-pts.....................sPsAP.PoKRpCRSLS.s-E.hu..psRosWRPtuSKVWT.PVpKRRCtSGGSsph.p..........ss..thpp.pshsLPppss.hs.........h.pt.hh..s.t.tsu.hs...supusps.u....s..t.s.hs.pRphSLSpEph..phsp.......PSAs................SoPsSTPELsRRtu........L.RsRSQPC..VLss+KsthKRRR.EDs...p..pRPSLD..hhKMsQ................................................. 0 5 10 25 +15096 PF15243 ANAPC15 Anaphase-promoting complex subunit 15 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:P60006 Family This is a component of the anaphase promoting complex/cyclosome [1]. 27.00 27.00 30.50 37.60 26.80 26.00 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.53 0.72 -3.92 25 114 2012-08-06 13:34:13 2012-08-06 14:34:13 1 2 81 0 67 91 0 93.20 45 78.43 NEW Ms.....shFPsLhPRls-shWFsl.D...............cPCsEEsELp.........p.EQpHQsWLpSIuc+sssLlPIGKsssE.............pct-s---....-sp--s..-csEptpp-p-Eh--h ........Ms..shFPSLhP+lT-oLWFNl.D...............+PCsEEoELp.........ppEppHQsWLpSIuc+ssNL...lPIGKPsoE................tp-c--pD-E....-s--Du......--sE-..pD.DEhsD....................... 0 11 20 40 +15097 PF15244 HSD3 Hydroxy-steroid dehydrogenase Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9P0W8 Family This family also goes by the name of Spermatogenesis-associated protein 7 or SPAT7. It is an aldo-keto reductase (AKR) human type 3 3-alpha-hydroxy-steroid dehydrogenase (H3-alpha-HSD3, AKR1C2), and it plays a crucial role in the regulation of the intracellular concentrations of testosterone and 5-alpha-dihydrotestosterone (5-alpha-DHT), two steroids directly linked to the aetiology and the progression of many prostate diseases and cancer [1,2]. Mutations in the gene cause Leber congenital amaurosis (LCA) and juvenile retinitis pigmentosa (RP), the most common hereditary causes of visual impairment in infants and children [3]. 27.00 27.00 28.10 27.50 26.90 26.90 hmmbuild -o /dev/null HMM SEED 419 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.72 0.70 -12.49 0.70 -5.23 10 108 2012-08-06 13:52:06 2012-08-06 14:52:06 1 3 48 0 44 107 0 234.10 43 66.25 NEW +VRuosVLPRYuPPCLFKGHLSTKSNAFCTDSSSLRLSTLpLlKNHMAVHYNKILSAKAAVDCSVPVShosSIKYADQQRREKLKKELARCEKEhKLoKouhQANSKssSKSlhNoLQKPuGEPQsctshllEchNpFsSFu+SllsSSE+LcLu.LsKSscslosGocKNuSSS.oSh.............-hssSsPR+ssSussauR...+PRSshssSHR.FQLVlSKAPSGDLLDKHSEhFSNKQLPFTPRTLKTEAKSFLSQYRYYTPAKRK.KDhoDQcIEAETQTEL.SSF+S-hsTAEpKshT-SElNIpQA..SsCsTaGTK-KhsPhstptpsLsW-cl.K-ssLQpSSsRu.lCpYSlQ..Pus+KIaS-EEELLYLSFIEDVTDEILKLGLFSNR.FLERLFERHI+QNKHHLEEcKMRHLLHlLKVDLGChS ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................p....t..c.......-EhhYlpFhtslTp-llphGlaos+..LpplFcpHlppp+.pLpttph...h..Lp.p.................................................................................. 0 11 13 27 +15098 PF15245 VGLL4 Transcription cofactor vestigial-like protein 4 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14135 Family These proteins act as transcriptional enhancer factor (TEF-1) cofactors [1]. 27.00 27.00 28.30 27.20 24.00 25.90 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.90 0.70 -4.50 18 112 2012-08-06 13:52:32 2012-08-06 14:52:32 1 2 63 0 55 101 0 186.70 47 66.05 NEW LLNaQYLDKM.NNNIG...lLpYEG....pstLRuEsRhpoLs......................thoscRTuPPPlsPoKRKhSt-QuDsch-p-s-HhoKMSRhFusp..Lsp....s.st-..Rpc......php+u+......SPh-phsss.o.ulhus.HlYuoh............sshu.hDQPLAL.TKsoh.-us+o.....tthsspssssER.QNRPSVITCAPAssRNCNLSHCs..shsusss....s.++.s....sTuCDPVlEEHFRRSLGcNYKEs ...........................................................................................................................................h....ht.......pt..............pstL+.u-sRhpsLsh.....................shospRTsPPPlsPsKRKhSh-.uDpch-p-s-HhoKMSRhFss+..Ls+....ssNGDp.Rc-.........R-RSR..........SPIER.usus.ohoLHus.HlYsSh.................h-QPLAL.TKNSh.-uuRs......ths.shsssERQQNRPSVI...T.CAsAs.sRNC..NLS.HCs.hsHsuCss.hss............................t.ssssssCDPVlEEHFRRSLG.KNYKEs........ 0 9 14 35 +15099 PF15246 NCKAP5 Nck-associated protein 5, Peripheral clock protein Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:O14513 Family NCKAP5 is short for Nck-associated protein 5, which is also known as the Peripheral clock protein. NCKAP5 is a protein family, which interacts with the SH3-containing region of the adaptor protein Nck. Nck is a protein that interacts with receptor tyrosine kinases and guanine nucleotide exchange factor Sos. The role of Nck can be thought of as similar to Grb2. The role of NCKAP5 is to assist Nck with its adaptor protein role [1]. 27.00 27.00 82.30 38.30 20.80 20.60 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.26 0.70 -5.05 20 107 2012-08-06 13:52:54 2012-08-06 14:52:54 1 2 37 0 48 83 0 261.60 41 20.62 NEW slpsoIEEKVMhGIpENVpKsQtQsKu.sosEsKQKsuPSlASWFGhRKSKLPALSuRKsDsuKsK-EKK-hKhhuhssppthscc++ccKctppphplss.hs+sp-htc..p.-puhhspps.cpspcstsp.........hppttpph.s.sstsspDsFMpELLNRVDcKsstpscpssspsshpshp+uSspupsh.ssshuspusp++.hps+hphptsppsh......htpts-phpc-Epss.sDos....hQsHhlsSusQhRTLDSGIGTFPLPDSssRusuRpsspppps.cp-s.sshp.shsssssl.+....ApTLEREVPSss .............................................................................................................................................................................s..pssIEEKVMhsIpENV.+hQsQp+u.susEsKp+s.ssShASWFGh+KS+LPALs.R+h-soKsK.t....thhu....s.pp..schctc.Khtt.phplpp.ht+spc.tc..p.-puh.spps..pspshhst.........ht...tph.u.ht..stDsFMppLLN.R..VDtKth..pp.tpspsphtsh..soopsps...suhuspss....hhs.hphpt....th.......................ssEshpc-E.ss..-sh.....psHhhtsss.hRTLDSGIGTFP.PDpGsp.ussph.h.tsps.ch-s..sl.su..t.sssh.+....ApTLEREVPu..p.......................... 0 2 5 16 +15100 PF15247 SLBP_RNA_bind Histone RNA hairpin-binding protein RNA-binding domain Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14493 Family This family represents the RNA-binding domain of histone RNA hairpin-binding protein [1]. 27.00 27.00 37.20 39.80 26.90 24.30 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.04 0.72 -9.61 0.72 -4.21 52 232 2012-08-06 14:32:12 2012-08-06 15:32:12 1 2 135 1 151 227 1 76.20 47 23.06 NEW hEsD.p......pLtpRQKQ....I-aGKNThGYppYlppVP.+ppRp..p.....tHPpTPsKapphSRRuWDupl+hWR+tLHp.a......D...........Ps ......................EpDpp....hLtpRQKQ....IsYGKNThuYc+YlctVP.+c..tRp.......shHPcTPsKap+hS+RuWDtpl+lWRptLHh.aDs........................ 0 62 84 119 +15101 PF15248 DUF4587 Domain of unknown function (DUF4587) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:P58505 Family This protein family is a domain of unknown function. The precise function of this protein domain remains to be elucidated. This domain family is found in eukaryotes, and is typically between 64 and 79 amino acids in length. There are two conserved sequence motifs: QNAQ and HHH. In humans, it is found in the position, chromosome 21 open reading frame 58. 27.00 27.00 44.40 44.40 19.90 19.30 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.13 0.72 -3.38 28 70 2012-08-06 14:40:19 2012-08-06 15:40:19 1 2 34 0 36 68 0 68.60 47 28.36 NEW RsGuI..KEDMVEhMLhQNAQMHQllMpshMl+ALPPhs....s..................stssslcspc.....pcPssVHHHHaa .......+sGplKEDhVEhMLhQNAQMHQllhpNhML+ALPPs.s....st........p....s...p.hhhcspc...pcPssVaHHHa................ 0 4 7 14 +15102 PF15249 GLTSCR1 Glioma tumor suppressor candidate region Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NZM4 Family This domain family is found in eukaryotes, and is typically between 105 and 124 amino acids in length. There is a single completely conserved residue F that may be functionally important. Mutations in the gene for this protein in humans leads to the development of oligodendrogliomas [1]. There is evidence that these protein interacts with SH3 domains [2]. 27.00 27.00 27.60 27.40 26.90 26.60 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.51 0.72 -3.95 54 193 2012-10-03 11:11:44 2012-08-06 15:41:14 1 6 115 0 126 197 0 109.40 33 10.45 NEW htpphttcptt.......shpP...........Dh.p...sPFpo..hpDAlcRL..LPYHlhtp.p.s..........pDhpt........................hDcp......hcspssp.h..hp+hpphhp+aphllhccut......cts...spEtlh...lpphhhp-E+pplpc ...............................................hppLppcpst....................shpP...........Dh.poPFpShcDAlpRL..LPYHVhpss.sst.................pDhpp.......................................................................hDpc......aEshusp.h..hc+hpthlsKaphlLhc-uh......chss..osEhlM...l-phhlp-E+tplt............................................................................ 0 38 57 88 +15103 PF15250 Raftlin Raftlin Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14699 Family This family of proteins plays a role in the formation and/or maintenance of lipid rafts [1]. 27.00 27.00 30.90 41.60 24.80 24.80 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.62 0.70 -12.61 0.70 -5.96 19 145 2012-08-06 14:45:02 2012-08-06 15:45:02 1 6 38 0 65 124 0 348.00 40 86.42 NEW MGCGLRKLEcsDDsSPGKIaSTLKRPQVETKsDsAYEYhhLDFTl..pu..ss.sssl+luSlhDlPsplp-hYpQGalluAlHPhlpPsscpcplPtpplaRAlLl+hp.p..ppcss.hspct.pLhlEECs.stcshss-hlppLIcK.lp-AAcpGh+FVGhl.........spths.spssssos....sstsh-.c.s..........................................tthcpss-css+s.......spush.sspss-suhcpchptpss......h.......ss.s.sp.........ch+LaslFNt.c......c.spsChpYapsslsh+VoRpGpslSoL-AsWLEhhohaa+pG.hSLVDuasphshs+.Dpls+sl-GlFIaEEtuousstosp.G.DAIVVEQWTVIEGsEVKTDYsPLLpoLApFGWhLTCVLPTPIl+ps.SEGNLATKQlVFLQRPsh.pp..stpp.p..........cppsRph+p..cpppsuscp.s...tp..ssE ...............................................................MGCtL.KLc.c.--.p.pPGpIaSTL+RsQVET+ht.sYpYhhL-F.h....tu..stspslplsSlh-lsspl.-hY.pGa.lsAlHPhlpPsttpcphP.phlaRslL...+.p.p...ppp.ss..tpp..t.hL.l-pCs.h.p..spchh.thlcK.lp.uAppGh+FVGhl.........................p.hs..t.ss.ss.........p....................................................................t.....................spsp...s.t.p..pthp..pp.pt.pt..............t..p.shst.........c.claslFNt.c......s.pppshpY..shlsh+Vo+pGps.losL-AsWLEhho.aa+pG.h.Llsuhhhhth.s.p....-p.h.t.h-GlFIaEt.ustssto.p.G.DAIVVE.QWTVlEG...sElpTDYsPLLpoLAtaGW.LTsVLPTPll+ps.pEGsluTKQlVFLQRPsh.pp..htpp.p..........cht.t+..pt..t.t..sttp.t..........tt................................................... 0 3 9 26 +15104 PF15251 DUF4588 Domain of unknown function (DUF4588) Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q14CZ0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 200 and 274 amino acids in length. There is a conserved LYK sequence motif. There is a single completely conserved residue A that may be functionally important. 27.00 27.00 27.20 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.82 0.70 -4.38 18 156 2012-08-06 14:50:59 2012-08-06 15:50:59 1 2 115 0 102 137 0 208.80 31 89.72 NEW EppCl..s-up.ccp..........................t.pssphpsppp+LWptFpsuAsAVApLYp...................t...................t.thohWssFpsAAtuVTsLYK-Ss.DuppRSh-hGhpsGhpc......Rs+DllpWs+.+tRphIRRE-LluaLsG+sPPs......+s.sps..............sscs........ss..sssp.....p..ts.t.s.s......l-ssLpsF+-AluLtt..hsshhsshuh.susPsossp.pt.t.......................uhpcscLsshhs...--lshp.............pRKRppspss-s.............sSPoHKRsR.hh ........................................................................t....t.....pplh..FptsApulApLYp.........................tp.shuLWssFQsAAsuVTsLYKpSs.-spp+oh-hGhphGhpc............................cs+-lhsWs+.KpR.ph..IRRED...LluaLsGKssPs......ps..pt.............................ssp................st.......t.....tssss........spsshp.sh.ppsluh.....st.hss.phtss...Ps.sst....t.............................t...............tpht.................p...t.............................h....................................................................................................................... 0 24 37 70 +15105 PF15252 DUF4589 Domain of unknown function (DUF4589) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q2T9L4 Family This protein family is a domain of unknown function. The precise function of the protein domain remains to be elucidated. This family of proteins is found in eukaryotes and are typically between 215 and 293 amino acids in length. The protein contains two conserved sequence motifs: SSS and KST. 27.00 27.00 97.90 60.20 20.50 25.10 hmmbuild -o /dev/null HMM SEED 221 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.98 0.70 -11.70 0.70 -4.53 13 97 2012-08-06 14:54:27 2012-08-06 15:54:27 1 2 37 0 58 73 0 209.90 48 85.56 NEW EVVsQID+LTSDh-aELE..sDDWTTuTlSSTSSS-+...sGss.-Lt+LDFhsuDhLSD..........SW-FCSFL-tSsPsssssssssspsts...................ssapLMNGGl................IPNGPth.TPDSSSEEA.sss........Ksp.....pRTsGTRERVRFSDKVLYHALC..CDD-p-tsppp...ctp..t....p............................sPc..sssuhssutstshsst.....s..++lhRNoSTQTVuDKSTQTlL ........pVVsQIDpLTSDhphE.E..sDs.pTsTlsSoSSSsp.........s.sLt+lchhsss.L.c...........s.thhohLchssPssssst.pss+st.ss..................sshpLhpsGs................IPNGshsphPsSs.-cA.sts....s.Ksp.........p.p.sGsRERVRFs-KV.YHuhC..CDsc..s.tpcEht....ct.-..st....sp...p.........................sh.h.ssshshs...pp.p.shssh....Ptps..sTtspsppTl.cKSTpTsl................. 0 2 7 22 +15106 PF15253 STIL_N SCL-interrupting locus protein N-terminus Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q15468 Family \N 27.00 27.00 28.10 27.40 21.90 21.10 hmmbuild -o /dev/null HMM SEED 410 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.48 0.70 -5.76 17 97 2012-08-06 14:56:12 2012-08-06 15:56:12 1 3 58 0 54 91 0 313.50 46 33.79 NEW LWsssPhG-hhh.+lthh.RNs+.Lh..lsEKslRLApRHs+Q......sptpshsCFLlGolhVDpDEEGlolslDRFDPGRE........stt.t+hPss.LPGDhllPChl.tpshusspsl.sHsspDhstsFpsLQppls.o+psL-hspLl..slRuplpsp-s.Dtl.hshpWuuVThusshcssPV+slPIIPTALARNL.so.hs.lsplpGshKpGaLTMDpTRKLLL.lLESDPKshoLPLVGlWLsGlsHlpoP......VWusCL+ahaSuulp-R...VhS-sspFlllLaslsa..ppPpFYcC.hspsspt..LpaQLl...ospcslpLa.ppVcsspcp.lphE..LSupspsspts......lFpps.tsh..oh........psssppssss.phssoc...pDSulEDcDlSPR.......P.PoPHPssQpssplhPpVPELSllhDssFhs .......................................LWsshPhG-hhhLHlsha..RsP+...Lh..lsEKslRLAhRHA+Q.......s.cps.sCFLLGolhlDpDEEu.....lolslDRFDPGRE........sts.thsPss.L.PGDhllPChlphpt.sspphh.scssp-hstshpsLpppls.u+p.l-hsphh..th+sphh.pp...t.h.hphphtuVs.usshchsPl+slPlls.TALARsL.so.h........s.luplpGshKhGaLTMDpTRKLLL.LLESDPKshoLPLVGlWLuG.lh..HlhoPp.............VWAsCLRYhasuulp-R....Vho-sGsFlllLashT+..pp......PpFYcC.hsppsp...hpaQLl...osppslpLa.ppVcssppp.lphE..Lospspshpsp......hFpph.psh..sh........ppsspp.sss.phshpc...psSslEDEDhSPR.......P.PsPHsssQphptl.sphscLulhhs.ph..t.................................................................................................. 0 17 20 34 +15107 PF15254 CCDC14 Coiled-coil domain-containing protein 14 Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q49A88 Family This protein family, Coiled-coil domain-containing protein 14 (CCDC14) is a domain of unknown function. This family of proteins is found in eukaryotes. Proteins in this family are typically between 301 and 912 amino acids in length. 27.00 27.00 28.40 28.30 21.10 25.90 hmmbuild -o /dev/null HMM SEED 861 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.69 0.70 -13.43 0.70 -6.17 11 77 2012-08-06 15:03:12 2012-08-06 16:03:12 1 3 40 0 44 93 0 449.60 39 86.45 NEW DSESQs-sVp.GLDGCASLL+DIL+NEDo..GsEhsaScs......Rssu+PLEuKtst.KKKG.cK+hsPshVpKEILSSssKK.IsN-uSsusc+DsSsltQpWSLQDH...Yph..YSPlIYQALCEHVQTQMSLMNshuSKsssNGIPslPCH..ssSsu-oQu.ostSsYGLsTSssVhSPQpPsCP.hVHSEV....QTsu-sQhuSQspssSVsss.slspsshsspPulsC.uLPtsspsAlPshptLshsstlhP.Q...pphsKEsDLLKChQTahuLhpuH......s..hpsDsQsppSsophQsu.hlAosEEcsAcEpIt-ssSEtc-LN.hpltDuchsKslQ........KucNlscTAcKV+hlKYLLGELKALVs-QEDSElpRLlTElEAClSlLPAVuGsTNlQVEIALAhQPLRSENAQL.R.RQLRILNQQLREQEKTcKsoGsh-CNLELhSLQSLNhSLQsQLpESLKSQELLQSKNEELLKVIENQK-ENK+asslFKEK-QTLLENKQQFDIEhT+lKIELEEALsNhKohQFKLEoAEKENQILGITLRQRDAEVsRLRELTRTLQsSMAKLLSDLShDoARsKstssLTKSLLNIY-KQLQcDPsPupTS..IMSYLsKLEss+oh.sHuEsl.shcscEshtPs+.YEssLsScsPp...pusststEEhSAstllsshSKp-SDp-SEohTLlE-csNL.DpTlYIPFARSTSKK+SsLScR.......lSPQPQhsVAssQLssssGl.sSc+Es+hssPsVCSuppp-u...E-uspcLuRsu-hEDcQLLcKIKEsIsKIPsuh.......................-c.c-puspHuPuApppsslplK..GssVsDuSFLNSDLM...SDWSlSSFSTFTSRDEQDFRNGLAALDANIARLQKSL+oGLLcK ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................h..-.s.Lh..+................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 11 13 23 +15108 PF15255 CAP-ZIP_m WASH complex subunit CAP-Z interacting, central region Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5SRD0 Family This domain is found on WASH complex subunits FAM21 and CAP-ZIP proteins, as well as on VPEF (vaccinia virus penetration factor). This family of proteins is found in eukaryotes. Proteins in this family are typically between 305 and 1321 amino acids in length. The exact function of this region is not known. 27.00 27.00 27.60 27.40 26.40 26.40 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.75 0.71 -4.25 19 184 2012-08-06 15:04:32 2012-08-06 16:04:32 1 4 61 0 91 153 0 131.50 38 14.78 NEW PhKsKEPS.oRIGKlQANLAINPAALLPGAsPpluGsKslhPthuhssucPstspuscssss.stsuuEtGVSFDtPAQADTLaSANKoRlKhpGKRRPQoRAAR+LAAQ-SsEs..--sssscss..s.htpsssssssppP ................................................sS.shItK.lQ.A.NLAlsPAALLPu..A...u..Pp.sslKssls.hs....P....sSpss...tu.ts.........hsh....s.........tutEsuVSF.D.Ps.p.u...s.TLp.S.s.N.K...sRs+hpuKRRP.oRtuRR.Au.pcSsts.....pshs.scts..................................................................... 0 16 23 45 +15109 PF15256 SPATIAL SPATIAL Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q53FE4 Family SPATIAL (stromal protein associated with thymii and lymph node) proteins may be involved in spermatid differentiation [1]. 27.00 27.00 48.40 28.50 24.20 26.30 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.65 0.71 -11.56 0.71 -4.24 23 106 2012-08-06 15:09:08 2012-08-06 16:09:08 1 2 40 0 49 103 0 171.60 38 56.60 NEW FFSRHsPHP+RVsHIpGLNGhPICsVsD-..............shh..p..hP.sth....ssshhthP...hs.lus........PRsspcP.h.........psW+cEL+-huSclslshKcpEhKsKc...................p.s.YSspTGRlhPsuopusoccssp.t.+schptpsh..phssa.DQElhILEhLCQILpTDSLutlQpWLLpAusK..EK-LV.uLlpoAlA ..................................................FhsRHpPHPp+VsHIpsLssh..PlCsVpDc..............s.h......lP.uph....Sts.ht.Pshsls.lus...........PpssppP..........................pthp+EhhchuScsshhhK.cpEhKsKc....................................................sspss+hhss...ssps.uchpop.......pspsptps...ph..h.DQElhlLthLCpILpTDSLstl..WLL.AssK..EK-hs.uLlpstlu............................... 0 11 13 19 +15110 PF15257 DUF4590 Domain of unknown function (DUF4590) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q5RHP9 Family This family of proteins remains to be characterised and is a domain of unknown function. This domain family is found in eukaryotes, and is approximately 120 amino acids in length. There are two conserved sequence motifs: CCE and PCY. In humans, the gene encoding this protein lies in the position, chromosome 1 open reading frame 173. 27.00 27.00 35.50 34.40 21.60 20.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.57 0.71 -4.28 11 71 2012-08-06 15:24:02 2012-08-06 16:24:02 1 3 46 0 40 56 0 114.30 64 13.59 NEW VHLuac....DhRDEIKVYQQHCGGENLCVa+G+.LLEsETFpFlS+RHhGFPFSLTFaLNGlQV-RLSSCCEYKH...++GSRLGG..+pGaFGFlsVEGASPCY+C.Il.........uhG.LDKKPpPPh++ ...................................VHLupDp...DaRDEIKVYQQHCGGENLCVYKG.....K.LLEp...ETFQ..FISKRH+GFPFSLTFFLNGhQVsRLSSCCEYKH...RKGSRLGG..K+GYFGFVsVEtuSPCY+C.II.........AMG.LDKKsosPp..c............. 0 14 16 22 +15111 PF15258 FAM222A Protein family of FAM222A Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q5U5X8 Family This protein family, FAM222A are a domain of unknown function. This family of proteins is found in eukaryotes and are typically between 411 and 562 amino acids in length. In humans, the gene encoding this protein domain lies in the position, chromosome 12 open reading frame 34. 27.00 27.00 93.40 51.40 20.70 24.70 hmmbuild -o /dev/null HMM SEED 506 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.98 0.70 -13.00 0.70 -5.28 19 161 2012-08-06 15:29:42 2012-08-06 16:29:42 1 3 37 0 87 116 0 305.50 34 95.79 NEW KWDTTQ+MRS....ApYPTPAELDAYAKKVANNPLTIKIFPNSVKVPQRKHlRRTVNGLDT..SGQRYSPYP.sQuuu+sGLLAIVK.............sP.sKGllK-h-GsR..sRLh..scuhMNPssuPY..susSTLs......H....Pt......tl.h..Qt..............HsQ.............................oLtp...................................................h.H......sQuh.p...............................................................p..shtp.ps.st........t................h..GuRKhPDuD..AP.PNVT..VSTSTIPLSMAAsLpQ...sc.sDLuSIVHQINQaCQARA.GhusTShC.EGQIANPSPISRNLLIsAsoRVSsHs......sss...suC.hlss.-p...u.AslPsust.s.s.hshsthssuY.s..p.....................sWsQH....QLsahQphspsut.........sppsthctstspsFss+s..sYP.chs.huQsaslKs...sh-+ssPSsPV..Ns..ushs......YsNGpYap..PhWssI....LsTPsSDuuGu.QDLshsFpGutsuuss.................................................ssGs+YRhusuu...su.....QsshMQohDYLu.GDFQ..PCF+-QshuhhtKhp.....R.sshs+ss.-sscupshHIQHPGYR .........................................................s.aPosApLDAaAp+sAppPLoIpIFPssl+VPQ+pplpRTVNGhDT..os.RaSPYs..psss.tGLLAll+..................ss..sKullKs.cGtR..s+h.....s..p...ssY..s..sshs.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................sPh..ss............hssGpYht..s..stl.....sssssss.ss...h..hs................................................t...t...............sp.hpsh-hh..t-hp..s.hp-p.h............c......h........t...t...p............................................................................... 0 3 13 41 +15112 PF15259 GTSE1_N G-2 and S-phase expressed 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NYZ3 Family This family is the N-terminus of GTSE1 proteins. GTSE-1 (G2 and S phase-expressed-1) protein is specifically expressed during S and G2 phases of the cell cycle. It is mainly localised to the microtubules and when overexpressed delays the G2 to M transition. the full protein negatively regulates p53 transactivation function, protein levels, and p53-dependent apoptosis. This domain family is found in eukaryotes, and is approximately 140 amino acids in length. There is a conserved FDFD sequence motif. 27.00 27.00 30.30 29.20 21.20 20.10 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.99 0.71 -4.23 25 105 2012-08-06 15:40:42 2012-08-06 16:40:42 1 1 43 0 46 87 0 128.60 40 24.70 NEW DlhLLsDEKFDFDLSLS............su...........Susc.....DDEVFhGPVGHKERClAssl.....st..stts..sstssphoWSPLsGEKFsElaKEAHhLALQlEssu+sptspssp.....ptstspssEpFlp-octKlslhppppchcpSPhslKRETasl........p-S .....DlhhlsDEphDFs.lsLSso...................supE....pDspVhhuPht.hcRslutth.....sp...sptss.sspss+hohuPLosEKh.ElhcEAphLAhQlEpsuhpcpppusp...............h.pt.t.plsh..t.pchp.SP...+RETahlp-.......................... 0 5 7 17 +15113 PF15260 FAM219A Protein family FAM219A Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q5XKK7 Family This protein family, FAM219A is a domain of unknown function. This protein family has been found in eukaryotes. Proteins in this family are typically between 144 and 191 amino acids in length. There are two conserved sequence motifs: QLL and LDE. 27.00 27.00 30.30 29.00 25.80 23.70 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -11.09 0.71 -3.78 11 124 2012-08-06 15:42:48 2012-08-06 16:42:48 1 2 49 0 67 112 0 120.40 62 71.42 NEW QK+R-hAR+uL+K+Gh..GsslspQP+pss+....R.oV+aNKGYsuLSQ.os-Es.LVoLDSD.SDsEl-.ppa.SSGYSSA....QVspDLo...+QLLpDGY+LDEIPDDEDLDLIPPKsluossCsC.....s-ssSCslQ .................................................................................pKpR-LARpu.h+pGsh.GuslspQP+pssh.....R....Vh..NKGYouLsQ.SPDEp.LVuLDoD..SD--h-.S......RY.SSGYSSA.........EQlNQDLs...hQLLpDGY+LDEIP......DDEDLDLI.PPKshsso...shsCp...hssSouCplQ........... 0 11 15 32 +15114 PF15261 DUF4591 Domain of unknown function (DUF4591) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6NUN7 Family This protein family is a domain of unknown function. It is found in eukaryotes, and is approximately 120 amino acids in length. In humans, the gene encoding this protein lies in the position chromosome 11 open reading frame 63. 27.00 27.00 29.30 27.40 25.80 26.80 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -11.24 0.71 -3.90 15 75 2012-08-06 15:52:54 2012-08-06 16:52:54 1 2 55 0 47 70 0 117.60 37 20.44 NEW KLGGLGPDh.-uh+s..KhpKLppQKEYA+plKEaNhKsluh..h.ppspss+s-scssls+....+K.ALEYAKoIPKPK...stp...................sQtsKc...tpp..s..stppsslPcIohLEhLpsRHE+EKpsVAAFKsLHIl ................+LGGLGPDh.ps.c...KhpKlhpQKEYAcpl+EhNh+slsh..p...pst+spspsslsc....p+.ALEYAKsIPKP.K...ssp.s...........................................cpssKc....ppp..p...stc-tshsclo.hLEhLpsRHE+EKpsVAuh+hhhh..................................... 0 19 24 29 +15115 PF15262 DUF4592 Domain of unknown function (DUF4592) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6NV74 Family This protein family is a domain of unknown function, which lies to the N-terminus of the protein. This domain family is found in eukaryotes, and is typically between 114 and 130 amino acids in length. There are two completely conserved residues (L and A) that may be functionally important. In humans, the gene that encodes this protein lies in the position, chromosome 2 open reading frame 55. 27.00 27.00 28.10 28.40 26.80 26.60 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.64 0.71 -3.60 27 118 2012-08-06 16:00:45 2012-08-06 17:00:45 1 2 40 0 57 103 0 116.20 39 13.35 NEW tpsI+hGps....Ph.hptpc.psussssE-DslPpSP.Ehss.p-sh.ssspppsssh.t.pp....stsupp.p..pssss+sspshps................suuo..slshsusspshupLDNSAA+HKLulKP+pQRsup+ ........................................pNlKhG.s....Ps.s.lshK+sp....susso.E-DhlspSP.Ehsh.p-lh.usspppsss.s.sp.p....st.tsc.p..tssss+sSps.t.hu..............suush.slshsssPpu.upLDNSAAKHKLuVKP+pQRsS+............... 0 5 13 25 +15116 PF15263 DUF4593 Domain of unknown function (DUF4593) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6ZR54 Family This protein family is a putative uncharacterised protein family. Its existence is uncertain and its precise function is unknown. This family of proteins is thought to be found in eukaryotes. Proteins in this family are estimated to be around 155 amino acids in length. 27.00 27.00 279.40 279.10 21.20 20.90 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -10.97 0.71 -4.33 2 2 2012-08-06 16:06:49 2012-08-06 17:06:49 1 1 2 0 2 1 0 155.50 50 89.11 NEW GpLLushGPSTlPhh.suGuCpPpPLuPGGppPPPPPRAHhuP.EAs.utsPus..LPPTRGl.hKsh.opsuPshpLGGPGLPG+uGPCGPRupPsQs.uGptsssGtGspoPhhTLPCSlstSptlh+GRSHLttsLuslGEARGshAhhsWGQ GpLLushGPSTlPhh.suGuCpPpPLuPGGppPPPPPRAHhuP.EAs.utsPus..LPPTRGl.hKsh.opsuPshpLGGPGLPG+uGPCGPRupPsQs.uGptsssGtGspoPhhTLPCSlstSptlh+GRSHLttsLuslGEARGshAhhsWGQ 0 1 1 1 +15117 PF15264 TSSC4 Tumour suppressing sub-chromosomal transferable candidate 4 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9Y5U2 Family This family of proteins is expressed from a gene cluster where in humans the TSSC4 gene is not imprinted [1,2]. This same cluster is associated with the Beckwith-Wiedermann syndrome [3]. This domain family is found in eukaryotes, and is typically between 120 and 147 amino acids in length. There is a conserved YSL sequence motif. 27.00 27.00 28.20 27.30 26.60 26.60 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.86 0.71 -11.01 0.71 -3.64 50 121 2012-08-06 16:07:49 2012-08-06 17:07:49 1 2 89 0 70 104 0 142.80 25 47.58 NEW pculFssL-sspc.....................hsssshsp...sssps......................................................puhFK+P.sPssp..............................................hphsplPDYh.tcP-+WTKYSL-D..Vs.p..hS....-poNpAAAluFLtphctp+t............s..sshsp-sssssp...............s+ ..........................................................................................pslFspLpsstp.........................sh.p......sp..p.................................................puhFc+P.sPssp.s..........................................sps.slPDYl.tpPc+WT+YSL-D...Vsc..sS..........-poNptAAhuFLpphppppt.................tph.sp.p.........tth.................................................................... 0 22 36 54 +15118 PF15265 FAM196 FAM186A; FAM196 family Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6ZSG2 Family This protein family is a domain of unknown function. This family of proteins is found in eukaryotes and are typically between 441 and 534 amino acids in length. 27.00 27.00 62.30 52.30 23.50 23.50 hmmbuild -o /dev/null HMM SEED 514 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.12 0.70 -13.01 0.70 -5.26 27 99 2012-09-25 05:53:12 2012-08-06 17:11:43 1 2 35 0 61 62 0 405.70 40 97.84 NEW Msp+............ps-scPsh..........h+.uLDssctlK....+RNKu.QVRFKD.spuQNppt.spls.p....st+-upshssK.tA.R+ahsss.sppSlPpspK.....ShulQTSPsL+K+a.oF..cRKK...upsl+phssssshptQsNGhLs-tc.ht...p.ht.st....-uscpst.spshtsps.hhpSstshsht............h.sutpsosuspsP-...pSss.p...sh.ssspssttpsph.......hhstsptcttth.sp-...tsssshsps.t...........ppssplhhPpssspsspssh.s...spscpstsPusspp+pps.s.Ls..shpp.shsttsu.Css.s....pshoscs.ust..shps..............ps...tp.....t..ss........tostthpptsQphls+sE.lsDLpupLQshEphlpSsQEpIKVLLsVIQ-LEKucAhpEGho.YRTGQDhsNCsTCpNoACIIYSVEhDF+QQ..EsKhpsV..L+pLc.shEpsphss.PhppEshsssPcpKoKscp..KKctRhphWaL .......................................................................................................spp.............p.cscPsh.........h+.uL-ssc.lK....+RsKu.QVRFK-.spupN.pt.tplsst.....ttc-stsh.h+.tA.R+ahsss.hphShPpspK.....shulQTSPsL+Kpa.sF..cRKK.......up.lhphsssps.t.QsNG.L.-.c.h.......t..t....-u..pst.stshhsph.hhpost.hshh.......ss.pssts.hpsP-....u.s.t....h.p.sps..st.psph.......h.shs.tc..ttt..spp....sssshtth.t...........................pthsplhhP..t.ssps.t.s..s...stsc..hsPus.pp+pps...hsuhp..t.s.st..t.Css...s....phho.psspt..s..s...............p.............................ss.............sshth.st...sp.hl...spsE.lsDLpupLQ.hEp.lpSsQEpIKVLLsVIQ-LEKucAhpEGhs.YRTGQDhsNCsTCpNoACIIYS.VEhDF+QQ..Es+hp.l..LppLc..hE...sp..s.P.p...t.s..sssP..cp..pshpcp..KKht+hphWal.......................................... 0 4 12 25 +15119 PF15266 DUF4594 Domain of unknown function (DUF4594) Eberhardt RY, Coggill P, Hetherington K kh6 Jackhmmer:Q6ZUT6 Family This protein family is a domain of unknown function. The protein family is found in eukaryotes, and is typically between 170 and 183 amino acids in length.In humans, the gene encoding this protein lies in the position, chromosome 15 open reading frame 52. 27.00 27.00 35.30 35.30 19.50 19.50 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.63 0.71 -4.14 14 111 2012-08-06 16:19:33 2012-08-06 17:19:33 1 2 45 0 55 87 0 163.10 40 33.26 NEW Ps.pscssRpEGScctsRNWuGssacsV+phs-pp+.c..utRss........stsshDhshuhotpEptEYlRWKpEREQIDpERLARHRsupGpWRRtWDh-Ks-sMh+-ssp.tsptsshstpptt.....cc.p+PPpsPshts.h.pstp.upsp.pstspS+upGpph.......ot+ccRWEtcp-.tpc.p ...........................Ph..scpptpEGSptpsp.shusss.ptlp...-pp+....GtRts..............tssschsh.uh..s.sccptEYhpWKQEREpIDp-RLARHRcupGpWRRtWDh-K...s...csh.hpDs.st.t...tcss....pchss.............ptht.PPhsPshtt...hhspttttspsp.t..ttpS+spu+tt.......os+scRW-h+Et.t...tt................................................................ 0 5 8 21 +15121 PF15268 Dapper Dapper Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5SW24 Family This is a family of signalling proteins [1-2]. They act in a diverse range of signaling pathways and have a range of binding partners. They act as homo- and heterodimers [3]. 27.00 27.00 27.10 27.10 20.30 24.60 hmmbuild -o /dev/null HMM SEED 748 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.41 0.70 -13.49 0.70 -5.74 24 169 2012-08-09 10:45:45 2012-08-09 11:45:45 1 4 40 0 81 188 0 446.60 32 88.35 NEW ERLcAsLAGLpELphLRpRQphhVpusLt.....s..ss.hs.....................hpEphLEtslhsL+cQL.....spLRRpDsGLhspLppLDpQIS-L+LD....VcKsopEtL-oDSRPSSGFYELSDGuSsSLSNSssSVaSEslSS.........hhsusph....csphshuDsRP+S.AD................s............................hGpL-p.h.upst.ppsut.c.p.ss.hspuhcl.sc.lcPKYQs.DLVSKsGp-VY.YPSPLHAVAlQS..PhFhLsh..........Ep.tsssp..ssttst.sss..sssh.p....hsssh.sss..hcuYI.pLLQ+ppptspss+spsu.pus...thhtt.......sss..tpupsss.csptphss.ssGtsshs.....ushpphstcppu.p.pstt...........ssssp.pssshscptspssspphttps.sss...........................psp..ssssptp..pssh................ppspshtP.ppls.ps.........................................s..t.G..thVpupalsu.psp.s+l+p.GsppsKssK.+ppsopKstt.utpt..ts.pp.Rp...............+spthsphsRhP....o.tppspt..pSspppp.hssshh.shlsuR..u....uptpuh..hcutshu.ts..uts++Kp....RRW........................pSosEIShcpt.pp..............s.RRstt.ph..uhhts.hsh..sps.h..u.S-...SEYSAECtSLFHSTls-TSED..EpSsaTTNsFGDSESShS-s-hsspSosoS.o............-uusLlWsphs.t...shp.sssuu.....c.t.sPs.phs+IKAS+sLKKKIhRFpsuSLKlMThV ........................................................................................................................................ptL.....GL...l.pLs.Ql.scLpL-.........tt....h-p-St.SSGFY-.ssususs.t....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................aG-tESS.up.p.s..........................t....hs................................hhs+IKAS+sLKKKIhRFpssuLKlMThV........................................................................................................................................................................ 0 5 15 30 +15122 PF15269 zf-C2H2_7 Zinc-finger Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q32MQ0 Domain this is a family of eukaryotic zinc-fingers. 27.00 27.00 27.00 117.80 26.90 20.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -8.90 0.72 -4.10 2 69 2012-10-03 11:22:52 2012-08-09 12:40:46 1 2 37 0 49 53 0 54.00 86 8.43 NEW ptRKPKKPHYIPRP.GKPapYpCFQCPFTC..KSHLaNHMKYsLCKNSlSLl.p ..KERKPKKPHYIPRP.GKPapYKCFQCPFTC.EKSHLaNHMKYuLCKNSloLl.p. 0 2 6 19 +15123 PF15270 ACI44 Metallo-carboxypeptidase inhibitor Coggill P pcc Pfam-B_261362 (release 26.0) Family ACI44, a metallo-carboxypeptidase inhibitor, is one member of a battery of selective inhibitors protecting roundworms of the genus Ascaris, common parasites of the human gastrointestinal tract, from host enzymes and the immune system [1]. 27.00 27.00 28.10 154.10 26.50 18.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.80 0.72 -9.37 0.72 -3.99 3 3 2012-08-09 11:46:34 2012-08-09 12:46:34 1 1 1 1 0 4 0 64.70 93 86.22 NEW DQVRKCLSDTDCTNGEKCVQKNKICSTIVEIQRCEKEHFTIPCKSNNDCQVWAHEKICNKGCCWD DQVRKCLSDTDCTNGEKCVQKNKICSTIVEIQRCEKEHFTIPCKSNNDCQVWAHEKICNKGCCWD 0 0 0 0 +15124 PF15271 BBP1_N Spindle pole body component BBP1, Mps2-binding protein Wood V, Coggill P pcc Pfam-B_31027 (release 26.0) Family This N-terminal domain of BBP1, a spindle pole body component, interacts directly, though transiently, with the polo-box domain of Cdc5p. full length BBP1 localises at the cytoplasmic side of the central plaque periphery of the spindle pole body (SPB) and plays an important role in inserting a duplication plaque into the nuclear envelope and assembling a functional inner plaque [1]. Although not a membrane protein itself, BBP1 binds to Mps2 as well as to Spc29 and the half-bridge protein Kar1, thus providing a model for how the SPB core is tethered within the nuclear envelope and to the half-bridge [2]. 20.00 20.00 27.20 66.80 19.40 19.00 hmmbuild -o /dev/null HMM SEED 145 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -11.02 0.71 -3.83 14 27 2012-08-09 11:50:23 2012-08-09 12:50:23 1 2 27 0 16 22 0 154.40 47 40.10 NEW sus.sGla+WThDALFGs+lSPSpKY.+-.........auQDDTNYphpt............pstps+oR.SsSWss............DssFhc+YDLLssppppshp................................................ptLhsPlclhsp.............................s..pcsTDTFup+............t.pphsstph.hcsPptD....DsllS+LFt+t .....u.GGhhGLFKWThDALFGoclSPShKY.K-.........aAQDDTNash+hspsp.......s+pssshSRSNSWSGl...........DSoha+KY-LLP-hsEsshs........................................s.cspcclcSLhSPsslsPR............................pPhps-PTDTFupR.........pRpshschussplsFhsPpcD....DPLlSKLFsK........... 1 2 7 13 +15125 PF15272 BBP1_C Spindle pole body component BBP1, C-terminal Wood V, Coggill P pcc Pfam-B_58229 (release 26.0) Domain This C-terminal domain of BBP1, a spindle pole body component, carries coiled-coils that are necessary for the localisation of BBP1 to the spindle pole body (SPB) [1]. Although not a membrane protein itself, BBP1 binds to Mps2 as well as to Spc29 and the half-bridge protein Kar1, thus providing a model for how the SPB core is tethered within the nuclear envelope and to the half-bridge [2] 22.90 22.90 22.90 22.90 22.40 21.60 hmmbuild -o /dev/null HMM SEED 196 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.85 0.71 -11.38 0.71 -4.75 12 35 2012-08-09 11:51:47 2012-08-09 12:51:47 1 3 34 0 23 31 1 176.50 32 43.31 NEW c-aos-YlcLLDpLspNs+pLcpLpp-lcp+ppchpppEpoY+pKYhphRtELIpELKQSK+laDNYacLapKYppLKc........hspcs.chppplssL-splVppslpKs+chpphpcclhplcl+tpchp.t+ch-thtYEoRIc-LEppL...pNp.ptpshhup.s.os........s.............p.hpc.N.solDopFlcpls ..........t.capptYhcLhsphshNs+sLccLsc..-lcppccphccpE.....poY+pcYpphRsELlsELK+SKpLa-NYYpLhpKY+sLK+.............................shcpshshpsclu..s.cccLhpcts.KshcIpsLp.pcL...shcl+hppLp.t+p...hpc...sYEScIcDL.hpLp..pss.ptssshsophhSs........s.................c.s.tshssph.cp.............................................. 0 4 12 20 +15126 PF15273 NHS NHS-like Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5SYE7 Family This family of proteins includes Nance-Horan syndrome protein (NHS) [1]. 27.00 27.00 27.20 27.00 26.00 26.90 hmmbuild -o /dev/null HMM SEED 670 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.46 0.70 -13.51 0.70 -5.89 26 237 2012-08-09 11:54:02 2012-08-09 12:54:02 1 3 40 0 110 185 0 389.90 30 41.26 NEW stp+SpTRDSuCQTE-lhI...ssPShRRIRAQ+GQ.GIAAphSp.o......GNhSl..LoDsusshF.ssphstssp.hpp.LPRpGuRss.ps.pthts......p........phuphps.-shh..tss.R.....tscspEspspput.......suhhhS.Hus...........Pstolsppucshh.+p.tshGps-hpssSsohsSs.hp.psshs..ttth..K-sHpSSSGsW..sposssppo.pohsssuusshssSShsDSplSLNsss.st-ss..........sps.thpsppssSFsSpstD..scspsutossst.s..tspcph..aps.ssttsps..sphssP....uhoossop.Stss.cpsSsKsDosShYSVDs-GYYT.SMHhDsGL+uspph..s...s.s.thttstpsh.shh-httppp.t...htpc+php+sISL+KsKt.PhPPpRosSLR+hsp............Kpp.p.uph.pEshluohppoLQLsL.tpss.....SSsspSssssh-s.hlhpscSposlussSS.hS...........phhShsssossapDsSulpS-YAD.Whh..Dhpusstc.ts.ossuoAoussshp..pustus.sps............SRuosPsl........PSspsc.KluSPEKhttLsSPSSGYSSQSpTPTuuhPl...shF.pshs.usGtGKhKPKVPERKSSLhSssphSSSSTSLSSsoSc- ...........................................................................................s....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................t..........p.p.ht.shSh+KsKh.P.PP.RosSLhc......................................................................................................................asc.Whh..Dhps.p.s.ts...sssssssus.ssht..ps..pp.p.s............sRss.Pp.........uh.psc..+.hsSPt.+.ttlhSPSSGYSSQSpTPTs.hsh...sh.h..t..s....ssu....ts+.+PhVPERKSSL............................................................ 0 4 14 43 +15127 PF15274 MLIP Muscular LMNA-interacting protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q5VWP3 Family MLIP is a Muscle-enriched A-type Lamin-interacting Protein, an innovation of amniotes, and is expressed ubiquitously and most abundantly in heart, skeletal, and smooth muscle. MLIP interacts directly and co-localises with lamin A and C in the nuclear envelope. MLIP also co-localises with promyelocytic leukemia (PML) bodies within the nucleus. PML, like MLIP, is only found in amniotes, suggesting that a functional link between the nuclear envelope and PML bodies may exist through MLIP [1]. 27.00 27.00 81.60 48.00 21.30 25.90 hmmbuild -o /dev/null HMM SEED 256 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.66 0.70 -4.89 2 110 2012-08-09 12:06:34 2012-08-09 13:06:34 1 3 32 0 43 83 0 137.00 44 42.14 NEW ptMQQSDLFKAEYVhIVDSEGE-EAsuRK.-ptPssG.GpuhsRPKSLAlusu.losl.+P+..tsDhpsssps-h.pshAs.QKphQQYKhKSSYKAFAAIPTNTLLLEQKALDEPuKoEploKDsTL-s.lEhh.PAQLRQQTEELCAsIDKVLQ-SLSMHSSDSPSpS.pThLGSDssKhPsTlPRAAGRETKYANLoSPSSThu.SQLTKPGVIRPVPsKS+IlL+KE.EEsYEPNPFSKYLEDsSsLFutQD .........................................................................................................s.h.t...P........p.t..s.st.h..uhus.ppK+sp....................................................................................................................................................................................................................................... 0 2 4 10 +15128 PF15275 PEHE PEHE domain Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q68DK7 Family This domain was first identified in drosophila MSL1 (male-specific lethal 1) [1]. In drosophila it binds to the histone acetyltransferase males-absent on the first protein (MOF) and to protein male-specific lethal-3 (MSL3) [2-3]. 27.00 27.00 27.00 27.00 26.60 26.70 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.02 0.71 -11.04 0.71 -3.67 27 274 2012-08-09 12:08:56 2012-08-09 13:08:56 1 2 88 5 145 277 2 121.70 32 15.40 NEW lhsP..sWRsl..slpshsssp...t.t.........hEsLSDpsFspRHpKYE..EpERpRWshhp.pc.pRppp...Rphpcs.............s.sp.ss.shsp..............sps.psschsscsss.shtcsh...lptsp-...................hslPW ........................lhsP..oWRts....slpslcsps.......st-................................hEsLSDssFstRHtKhE..Ep-R..p.....RWsh.pp..pc....p+hpp.....R.hcpp.................t..s.p...spsphoo...hs.................psc.s..ts..h..hs.....s..shtps....h..pp...............h...W........................................................... 0 36 50 92 +15129 PF15276 PP1_bind Protein phosphatase 1 binding Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q69YH5 Family This domain contains a protein phosphatase 1 (PP1) binding site [1]. 27.00 27.00 28.60 32.40 26.40 20.30 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.19 0.72 -3.95 12 102 2012-08-09 12:15:31 2012-08-09 13:15:31 1 17 45 0 50 96 0 62.80 49 4.09 NEW tK+KRVoFGtcLSPElFDcsLPsNTPL++GuTPs+ppssts.ssslh.cu......t..c.h.QP..sF-s .....KRKR.VoFGscLSPElFDEsLPsNTPL++GsTPs++pshsst..o...s...lLcc...........c..sQP........................... 0 10 13 21 +15130 PF15277 Sec3-PIP2_bind Exocyst complex component SEC3 N-terminal PIP2 binding PH Wood V, Coggill P pcc Jacckhmmer:Q10324 Domain This is the N-terminal domain of fungal and eukaryotic Sec3 proteins. Sec3 is a component of the exocyst complex that is involved in the docking of exocytic vesicles with fusion sites on the plasma membrane.This N-terminal domain contains a cryptic pleckstrin homology (PH) fold, and all six positively charged lysine and arginine residues in the PH domain predicted to bind the PIP2 head group are conserved. The exocyst complex is essential for many exocytic events, by tethering vesicles at the plasma membrane for fusion. In fission yeast, polarised exocytosis for growth relies on the combined action of the exocyst at cell poles and myosin-driven transport along actin cables [1]. 25.30 24.10 25.30 24.10 21.80 21.40 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.09 0.72 -4.12 101 335 2012-08-09 12:23:49 2012-08-09 13:23:49 1 8 228 7 232 344 0 91.70 31 10.66 NEW pp+Ks.Rhlhlu..lpps.s...lplp...KsKpss........sGs...aplu+oWsLc-Lptl-shsst..................sFslsh.sK.......sYhWpussspE+p.hFlpsLh+lhpca .......................tcKsphlsls..Vppp...s.........lplp...KsKpss.........sus...aphtcsWtLc-Lptl-uhss................psspFsLph.sK.......sYpWhAsospE+s.tFlpsLh+lhp+Y............. 0 54 101 171 +15131 PF15278 Sec3_C_2 Sec3 exocyst complex subunit Coggill P pcc Jackhmmer:Q10324 Domain This small Sec3 C-terminal domain family is based around the fission yeast protein, and is rather shorter than the budding yeast/vertebrate domain Sec3_C, family. Pfam:PF09763. In fact it is only this coiled-coil region that they carry in common. The full length fission yeast, UniProtKB:Q10324, protein Sec3 is redundant with Exo70 for viability and for the localisation of other exocyst subunits, suggesting that these components act as exocyst tethers at the plasma membrane. Sec3, Exo70 and Sec5 are transported by the myosin V Myo52 along actin cables. The exocyst holo-complex, including Sec3 and Exo70, is present on exocytic vesicles, which can reach cell poles by either myosin-driven transport or random walk [1]. 27.00 27.00 29.40 28.40 25.70 19.80 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -9.81 0.72 -3.85 2 2 2012-10-03 17:31:52 2012-08-09 13:28:37 1 1 2 0 2 3 0 86.00 29 14.06 NEW hp+L.pLcWsttsshccs.pph..ShstuhhpsVEpLhcpchphpplps.LpDshhGC-ol.STlNLaShpLSssLssVINhEQQ. ...hp+L.pLcWsttsshccs.pph..ShstuhhpsVEpLhcpchphpplps.LpDshhGC-ol.STlNLaShpLSssLssVINhEQQ.. 0 1 1 2 +15132 PF15279 SOBP Sine oculis-binding protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9Y5P3 Family SOBP is associated with syndromic and nonsyndromic intellectual disability. It carries a zinc-finger of the zf-C2H2 type at the N-terminus, and a highly characteristic C-terminal PhPhPhPhPhPh motif. The deduced 873-amino acid protein contains an N-terminal nuclear localisation signal (NLS), followed by 2 FCS-type zinc finger motifs, a proline-rich region (PR1), a putative RNA-binding motif region, and a C-terminal NLS embedded in a second proline-rich motif. SOBP is expressed in various human tissues, including developing mouse brain at embryonic day 14. In postnatal and adult mouse brain SOBP is expressed in all neurons, with intense staining in the limbic system. Highest expression is in layer V cortical neurons, hippocampus, pyriform cortex, dorsomedial nucleus of thalamus, amygdala, and hypothalamus. Postnatal expression of SOBP in the limbic system corresponds to a time of active synaptogenesis [2]. the family is also referred to as Jackson circler, JXC1. In seven affected siblings from a consanguineous Israeli Arab family with mental retardation, anterior maxillary protrusion, and strabismus mutations were found in this protein [1,2]. 27.00 27.00 28.30 27.80 26.90 26.90 hmmbuild -o /dev/null HMM SEED 306 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.68 0.70 -12.80 0.70 -4.04 17 146 2012-08-09 12:36:43 2012-08-09 13:36:43 1 5 65 \N 86 127 0 243.80 35 40.11 NEW sCDWC+HlRHsssYVDFQDGtpQLQFCSsKCLNQYKMpIFh+ETQAHLsh....sPHl+stupstt.........sLITP-LW......L+sC+SpSsus.sssh..ssussPu.............s.ptSP..............pshlosu.sopLhs.p...st...sss...........................................................................................hshsshss...............hppph.tp.psPhhs.ss..............hs..........psshhs.sPttt.hhpPa.p.ss.......s.PPh.h..ssPtshh..sh.s......Ps..............................lPPVTlLVPYPl..lIPLPlPIPIPlPl ............................................................................................................................................................................................................................................................................................................................................................................................................................................................ts.s.hlhs.sp.u.Ps.lP.hhh...E.....pphhQplp.sPhlh...Pspt.........ssssss.shsN.h.s..........ssss...t.....h..s.ss..p..htPa.tss.......sh..s..Psth.h...s.PP..Pst.....suhs.P.s.s........hP.s....P...........hs.h........................VPPsTlL.VPYPV..IVPLPVPlPIPIPl............................................................................................. 0 14 23 43 +15133 PF15280 BORA_N Protein aurora borealis N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6PGQ7 Family This family of proteins is required for the activation of the protein kinase Aurora-A [1]. 27.00 27.00 28.00 27.30 22.60 26.20 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.82 0.70 -11.56 0.70 -4.98 25 110 2012-08-09 12:38:03 2012-08-09 13:38:03 1 4 81 0 72 106 0 195.70 37 39.28 NEW husptps.hphTPpssuh..............plhNPFE.ss.hppL+pshh.SPSlF...poss.........spposspFcWSIDQhAhLtPs-I.Dpc.-hpppuhhh......s.........-hE-+hQpAI-pFF.opssIVPSPW......................................s.thh.pp..hhphs..t.oshspph..p..stppssusQ..TsLoLPssh..-LEclLG.caapspcss..ct.............tt......hosSSLRRKLFhsssss.tsspsssssus .......................................................thphTPpo.sshh.................lhNPFE..ss.hspLHpthlsSPSlF...+o..............osspFcWSIDplAhlpPs-I..Dsc.-hpppshhh......s.t........-lEcKtQcAI-pFF.scssIVPSPW.........................................ts..hpphh..tshp.....sp...oPhspph...t..spppssssQ..ThLSLPssh..sLEplLu..sYFpsc-hs..-ps.......ts.........lSsSSLRRKLFhsssssh..........ss..................... 0 24 30 53 +15134 PF15281 Consortin_C Consortin C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6PJW8 Family Consortin is a trans-Golgi network cargo receptor involved in targeting connexins to the plasma membrane [1]. 27.00 27.00 33.00 31.90 23.20 21.30 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.76 0.71 -10.54 0.71 -4.58 12 60 2012-08-09 12:38:45 2012-08-09 13:38:45 1 3 36 0 31 52 0 109.40 64 19.29 NEW ElsPupGLVSILKKRpss.Gpp......lsp.ppcpoKRRVRFpEs-DshDQ.DEsuGsSClLLlLLClsTVhlSlGGTALYCThGDhcSsVCpDFusNhDFYhsplhQslpcL+HWls..hS ...............................ElsPsEGLVSILKKRs-olGcp......sAQhQpKsSKRRVRFQEhDDsLDQ..DEVGGGSCILLlLLCIATVFLSlGGTALYCTFGDMESPVCTDFAcNhDFYYT+LLQGhAELKHWIh.lS....................... 0 2 5 12 +15135 PF15282 BMP2K_C BMP-2-inducible protein kinase C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q6ZSR9 Family This family represents the C-terminus of BMP2K and related proteins [1-2]. 27.00 27.00 27.00 54.70 26.10 26.10 hmmbuild -o /dev/null HMM SEED 226 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.76 0.70 -4.17 17 79 2012-08-09 12:40:07 2012-08-09 13:40:07 1 4 35 0 45 64 0 232.80 47 35.62 NEW tD-hDVFoKAP..........FspKs.............pssstpPcp.sDVFhpuPFp.........................................Kp+ShpcLoshQtpo+p.ssp....u.pphusssss.hs.pPsapo.E+shppptsu.Rsp.SsspFlphos.spcslps..shssups+usshps..............cEshlsPhu.uKPF+PQsLu+aupH.u.pD..................s.shpup.huAa+sssphp.ps.hGuVsh.TsLsspo..........sphsph.DPFuuAPFPSKt ....................--hDVFoKAP..........FspKs.............pshPspPcp.sDlFhpsPFp....................................Kp+SlpcLoutQppo+p.sup.....u.pphGsssssphs.pPsapo.ERAhpppt.stRsp.SospFlphSs.spcslps..sLssupsRGssLps..............cEullsPhu.uKPF+P.sLuhaspH.u.pD..................tpshpsp.huua+sssthp.hsshGuVPh.TpLsspo..........op.spl.DPFGAAPFPSKp.. 0 3 5 14 +15136 PF15283 DUF4595 Domain of unknown function (DUF4595) with porin-like fold Godzik A adam JCSG target SP16885A/PDB 4ghb Domain Large family of predicted secreted proteins mostly from CFG group, but also from Burkholderia, Pseudomonas and Streptomyces. Function of these proteins is not known. A 3D structure of a representative of this family from Bacteroides uniformis was solved by JCSG and deposited to PDB as 4ghb. There is some overlap with RHS-repeat (PF05593) family despite lack of obvious repeats in the structure 23.10 23.10 23.10 23.60 23.00 23.00 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.62 0.71 -11.42 0.71 -4.65 26 98 2012-10-09 19:20:05 2012-08-14 01:41:03 1 6 71 0 9 96 5 188.50 20 52.61 NEW apYsspG+lsuhsts.pphsst.p....phpss.sshsYssspl...lh......Tcctssss.....sah..LsppGalppCs.....p..phs....u.....phcsasFoYs.....scspLhplscst.........ss...ht.phslsYpsGslsplppphss.tp..............pshhths.hss..sphss.h.s...hshlhlt..-hhs.lsh...hhhAhYuthLGcssppLsh.phhsc.....ss..p..cspsY..oYshDpcGhsos ............................................hst.splhphh...pp..t........hppp.hsh.sassspl......sh.........ss.cttshh..............shh....LsspGasppss.........phss...........p.csapFoYs.....sps.Lsplpcsh.........ss......hp...phslsYps.Gslsplpsp.ps.p................................p.hhphtYss..sphts..hhp...hshlh....hh...chhs.lsh.....h..uhauth..lG....cssppL.h.p.hsp..................ss......pptpa..oaphcppuhss....................................................................................................... 0 4 8 9 +15137 PF15284 PAGK Phage-encoded virulence factor Coggill P pcc Pfam-B_45688 (release 26) Family PAGK represents a new of virulence factors that is translocated into the host cytoplasm via bacterial outer membrane vesicles (OMV). Members are small proteins composed of ¡­70 amino acids. In Salmonella they are secreted independently of the SPI-2 type-III secretion system, T3SS. The OMV functions as a vehicle for transferring virulence determinants to the cytoplasm of the infected host cell. OMVs are released from the cell envelopes of Gram-negative bacteria and comprise a variety of outer membrane and periplasmic constituents, including proteins, phospholipids, lipopolysaccharides, and DNA [1]. 22.80 22.80 23.30 22.80 19.80 19.10 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.19 0.72 -4.23 3 81 2012-08-20 15:00:40 2012-08-20 16:00:40 1 1 70 0 3 21 0 65.20 61 93.70 NEW MK+hNSVFLALVLlLSAoTFSALsMAA-SusHph.psIFPh...WCplWPAGIshPE..h.KhCp ........MKKLKEMAAISLFTLLAAGFSASVMADDQA.....P.....ERVPAAEVKPVGE....HVHWCT.LFDPDGMELPPLP......GMEW..... 0 0 0 2 +15138 PF15285 BH3 Beclin-1 BH3 domain, Bcl-2-interacting Coggill P pcc Manual Domain The BH3 domain is a short motif known to bind to Bcl-xLs. This interaction is important in apoptosis. 25.50 25.50 25.80 25.50 23.90 22.90 hmmbuild -o /dev/null HMM SEED 25 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -6.56 0.72 -6.70 0.72 -4.39 8 70 2012-08-31 13:46:58 2012-08-31 14:46:58 1 3 47 9 34 58 0 25.10 89 6.09 NEW suush-sLS+RLKVTocLFDIMSGQ ...DGGTMENLSRRLK.......VTGDLFDIMSGQ.. 0 5 7 15 +15139 PF15286 Bcl-2_3 Apoptosis regulator M11, B cell 2 leukaemia/lymphoma like Coggill P pcc Jackhmmer:P89884 Family Pfam:PF02180. Bcl-2_3 is a small family of eukaryotic proteins associated with autophagy. The family is found in association with Pfam:PF00452, 25.00 25.00 25.00 25.00 24.70 24.40 hmmbuild -o /dev/null HMM SEED 126 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.70 0.71 -3.68 3 4 2012-10-03 11:38:54 2012-08-31 17:02:54 1 1 4 5 0 9 0 125.20 59 73.14 NEW TYWATLITAFLKoVSKVEELDCVDSuVLsDVSKIITLTQEFRoHYDSVY+tDYGPALpNWKssLo+LFTSLFlDsINpGRIVGFFDVGRYVCEELLCP.GSWTE-HDLLN-pMTpFFIENNLMNaFo ...............................TYWATLITAFLKoVSKVEELDCVDSuVLsDVSKIITLTQEFRpHYDSVY+...tDYGPALpNWKpsLo+LFTSLFlDsINpGRIVGFFDVGRYVCEElLCP.GSWTE-H-LLN-pMTpFFIENNLMNaFs............................. 0 0 0 0 +15140 PF15287 KRBA1 KRBA1 family repeat Coggill P pcc Jackhmmer:A5PL33 Repeat KRBA1 is a short repeating motif found in mammalian proteins. It is characterised by a highly conserved sequence of residues, SSPLxxLxxCLK. The function of the repeat, which can be present in up to seven copies, is unknown as is the function of the full length proteins. 27.00 0.10 51.90 0.30 17.70 0.00 hmmbuild -o /dev/null HMM SEED 43 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.97 0.72 -8.11 0.72 -4.60 21 189 2012-09-03 08:46:19 2012-09-03 09:46:19 1 6 22 0 74 223 0 44.00 32 25.27 NEW Gshup...u...sSs.u.SSPLQGL.sCLK-I.lsG..Pp..pPps.....sss..hsPtP ....................tp...u...sss.u.sSPLpGL.sCLK-Issst...Pp..tsps.........ss.......................... 0 6 6 9 +15141 PF15288 zf-CCHC_6 Zinc knuckle Coggill P pcc Jackhmmer:A6NNH2 Domain This Zinc knuckle is found in FAM90A mammalian proteins. 22.00 22.00 22.00 22.00 21.90 21.90 hmmbuild -o /dev/null HMM SEED 40 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.38 0.72 -4.28 25 289 2012-10-03 11:39:54 2012-09-03 09:47:41 1 25 154 0 191 274 0 37.10 43 3.39 NEW RVKCKsCGAFGHpu+SpRCPhKp..WpusLsPQslGs+c..tKE ..+lKCssCGAhGHh+TsKtCPlhp..tp..ss.ss.shu.pp..pE.................... 0 65 91 131 +15142 PF15289 RFXA_RFXANK_bdg Regulatory factor X-associated C-terminal binding domain Coggill P pcc Jackhmmer:O00287 Domain This C-terminal domain of Regulatory factor X-associated protein binds to RFXANK [1,2], the Ankyrin-repeat regulatory factor X proteins. RFXA is part of the RFX complex, Mutants of either RFXAP or RFXANK protein fail to bind to each other. RFX5 binds only to the RFXANK-RFXAP scaffold and not to either protein alone, and neither the scaffold nor RFX5 alone can bind DNA. The binding of the RFXANK-RFXAP scaffold to RFX5 leads to a conformational change in the latter that exposes the DNA-binding domain of RFX5. The DNA-binding domain of RFX5 anchors the RFX complex to MHC class II X and S promoter boxes [3]. 26.10 26.10 26.70 26.70 24.70 25.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.75 0.71 -3.93 7 62 2012-09-03 08:49:35 2012-09-03 09:49:35 1 1 48 1 31 50 0 118.50 71 58.86 NEW oCTYpGCsETooQsAKQRK.PWMCK+HRNKM.YKDKYK.KKKsDQAhussuh.................hp-....ss-sslSlsKQRsGuhG-RPARPTLLEQVLNpKRLSLLRSPpVlpFLQpQQphLopQshtQppQphpG .......................................................................oCTYEGCpETToQVA...KQRK.PWMCKKHRNKM..YKDKYK.KKKSDQAlssuGsus........................usss.s+hEESsDshl............SlsKQRTGShG.DRPARPTLLEQVLNQKRL.SLLRSPEVVQFLQKQQQLLNQQVLEQRQQQFs.G................ 0 5 8 14 +15143 PF15290 Syntaphilin Golgi-localised syntaxin-1-binding clamp Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O15079 Family Syntaphilin or Syntabulin is a family of eukaryotic proteins. Syntaphilin binds to syntaxin-1 thereby inhibiting SNARE complex formation by absorbing free syntaxin-1. So it is a syntaxin-1 clamp that controls SNARE assembly. 25.00 25.00 25.60 25.00 24.60 24.60 hmmbuild -o /dev/null HMM SEED 305 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.37 0.70 -12.28 0.70 -4.93 19 130 2012-09-03 08:50:22 2012-09-03 09:50:22 1 3 41 0 54 139 0 285.30 55 52.53 NEW +sssuPsss+cs..YGsoS.s.....SSSNSuShKGSDoSP.phpRssRYpoCGDNHGI+PPsPEQYLTPLQQKEVsIRHL+sKLKESpspLp-..REoEIcELKoQLuRMREDWIEEECHRVEAQLALKEARKEIKQLKQVlETM+sSLs-K.....DKGIQKYFlDINIQN+KLEoLLpSMElAQsGs.hpDEssh-hhC..sSPu+oLs.SsshsKlu-.........uhtht-Q..usEc..huDSGlLssD-h...sspsDlhpp..hhos...sssc...........sssulhpstsh.st..ps..h.th..........ptsh........h.pEpulQTDs......lshSsDlcsll..plhp .............Rpp.sPlsh+su..YusS.S.s.....SSSN.SGShKGSDsSP.......hhRRS...............s+YhsCu-NHGl+PPsPEQYLTPLQQKEVslRHLKs+LKEopcRLp-..R-oEIs-LKoQLuRMpEDWIEEECHRVEAQLALKEARKEIKQLKQVIETh+ssLh..DK.....DKGlQKYFVDINIQNKKLEoL.LpSMEhApsGs.h+-Ehsh..........-........ss.sSPt+SLshusshs+huD...........shshpppssE-..sADSthlssDsh...sstsDlh-p..hlou...sss-...........phpLhpohshssh..h.t..sh.hh.....................t.psus............h..EpAlQTDh......V.YsPslspllpplh......................................................................................................................................................... 0 2 7 21 +15144 PF15291 Dermcidin Dermcidin, antibiotic peptide Coggill P pcc Jackhmmer:P81605 Domain Dermcidin is a family of peptides produced in the sweat to protect against pathogenic Gram-positive bacteria. 22.00 22.00 22.00 23.20 21.90 21.50 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.37 0.72 -10.34 0.72 -3.81 2 11 2012-09-03 08:51:23 2012-09-03 09:51:23 1 1 10 1 2 10 0 94.50 73 82.66 NEW AYDsEAASAsGSGNPs+EASAAQcENAGEDPtLARQAPKPRKQRSSLLtKuLcstcKulsGLtpLGK-AV-sLEssGKGt..........Vp.sp.sLsSV ...................................AYDPEAASAPGSGNPCHEASAAQKENAGEDPGLARQAPKPRKQRSSLLEKGLDGAKKAlGGLGpLGKDAVEDLESVGKGA...........VHDVKDlLsSV................ 0 2 2 2 +15145 PF15292 Treslin_N Treslin N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z2Z1 Family This family represents the N-terminus of treslin, a checkpoint regulator which plays a role in DNA replication preinitiation complex formation [1-2]. 27.00 27.00 29.80 29.80 24.90 24.50 hmmbuild -o /dev/null HMM SEED 803 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.59 0.70 -13.42 0.70 -6.67 11 59 2012-09-03 08:52:17 2012-09-03 09:52:17 1 3 43 0 36 60 0 621.90 46 46.02 NEW LYWVDTTEhuKLh-SPDHhGYWTlsELLpplGGTlLPsEohstshscstpslhssshcho....spPpLSsW.....hosLPhDSoLNsLLhssscYcAoFPphEGsLFLslctGKc.Q....cossVTLEPLuMpQRphppPVpI.hLKGoVsp......WshPtuuoLGT-SWlLpSs.-p.spssp.....phLFQQLspcLssEcLHLVA-Vsss-uhPPhTGllSPLSsoAslLTVhpsccs.EhQcahLQssssE.sspDssshhsDlVpsVLsplcsu.....-sssss....ssPVPEWsQQEL.....uRTs.PWosAVlE+W.FPhSNlSGASSsLMESFhLLQAsSss.cEEuS+oEuELT+pLSEhYQRKScE-us.susQc-s+KKRG.lPRTPVRQKMpTM....sRSLcMLNVARL...NVKAQKLpPDGuPssu.uEKuhQKssttRosDKlEs+GRsL+SSKsp-FKTEEELLSaI+ENYQKsVusG.-hhLhoCApshloTIKtFLKSpsTK-lEhsC..lsplKspLLKTSKsLRQp.lGpc..hDKEsKVRECQLQVFLRLEMClQCPSlppssD-hEQlVEEVT.-LLRhlsLTEDsuYLucFL.EEILpLYIsSIPcTL.......GsLYpSLGh.IPpKLAsVLPsDFFSDDSMTQEscSPh.shshsSs.sppulssuoEoDQLEELRTRSAKKRRKNALtR...H+SluEsSQNLRQIElPKVuKRss+pE.NS+ss......QQ..PhPpK-sVQEVTKVRRNLFNQEhlSPSKRshK+.hPRS+SVSAVEGLcaK.tphp+s+spth..sa+KLLTKpVuETPlHKQlS+.RLLHRQIKGRSSDPGPDIsVVEESPEKs .....................haWlDotp..phhtssDHhGa.ph.clLt.hGGsllP..sh...hsp.h..................t.s.......hp.lPh-uslsh.Lh.ps..aptsFP.hpGhLhhsht.sp.......pshsVhLEPlshpQ+.h.psVpl.hL+Gshtp......hshs..sthso-oW..hLpss.tt.ttttp.....t.hFQpLhppLsscplahlupVs...uhsshTullSPlSssshlLTlhpsc.s.thpthhhpsshsp.ssp-s..ss.hs-llsslLs.h.p....psssss....sssVPEWsQQEL.....upst.saosullEpW.FPhSshSGuSSsLMESFhLLpAsstp..p...-..-sScs-sELhptLuEhYQppsp-pss...ttpcppKKRG.lPRTPVRQKMpTM....sRS.LpMLNVARL...NVKAQKhpPDussssu.spKshp+hsttRss-+hcs+u+hh+o.ut.t-F+o.EEELLSa.lpcsYQKsVust..-hhL.ssApshlSslKhFLKScssc-lEhsC.......lspl+.spLLKTSKuLRQphupc...hDc...EsKVRECQLQVaLRLEhChQsPSlpps.--hEplVEEVs.-LLRhls..LTcD...suYLucFL.EEILtlYlsSIP+sL.......GplYpSLGh.lPpc....LAuVLPsDFFSDDShop-shSP....s..Ss.sppusssuscuDpLpELRsRSA+K......R.R.pssLhR...H+SlsEsSQsLRQIElP.......KhoKRs.s+pc.sspss.......................p.s.shKpslQEVTKVRRNL..FN..QEhhSPSKRuh+p.hPRS+SVSAlEGLcp.K......hsp.tp...t.sh++LLT+pVsETPhHKQlSp.RLLp+QhhGR.p.Ssss.-htlVEESP.K.s....................... 0 5 8 17 +15146 PF15293 NUFIP2 Nuclear fragile X mental retardation-interacting protein 2 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z417 Family \N 27.00 27.00 34.40 34.30 20.90 23.00 hmmbuild -o /dev/null HMM SEED 599 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -13.23 0.70 -5.79 8 69 2012-09-03 08:53:03 2012-09-03 09:53:03 1 2 37 0 43 47 0 452.40 49 85.05 NEW ppKKTGaG-lNGsAs-tts.oPKsLsos-sssPs.ShVhNGsQphsDoNlp.KsotKuhThuKsGl+sKshhpKssMDKKN..-KoaESKs+Es.plDKpEslsl.NGVloNNSGYITNGYsGK.GADNDGSGSESGYTTPKKRKAR+NusKGsEsLshhp-KhhQQc.ss..slt.pL-s.Ks..shscttGsRlEGhKPsaKh-sssuG..hupuc.sss-lQRKNSDsKs.GssuKKFEDRsKGKhuossuSKEDSWTLFKPPPVFPVDNSSAKIVPKISYASKVKENLNKsAQss....u...........GEs.s.s.ts.......sRLSQVPMSAhKSVTSAoFSNGPVluGsDGsshss...ustslhss.AAuolss..ssuutssshs.-tussou.......AsEp+KsSLFIY...PSNMQslLPussQ....lshsup.TNQQsLGDIFQNQWGLSFINEPSAGPEsusscssccch.sEVoFQGE.pssshsopusphhPoGsp.hPsFPKAYELDKRTsPQ...uuhlKsu..ossE...uuuhspchph.h-.pKsDsuupGu.hVF.S+sp-l-..ss.AsPossLhuSAK-Q+Yp+GLER+-SWGSFDLRAAVlYHTKEME.IaNLQKQDPKRllTYcE ....................................................................................p+sG.htclNssss-tth.s.+s.sus-hspP..S...NG.spph.cssl....K.o.Ks.o.ststlps+sh.pKpsMDhKN..tKohE.pstEspshDKp-shsl.NGVls.NuGhITNGYhuK.uADND...GSGSESGYT.TPKKRKARpNusKusEslshhp-KhhQpcssss..lt.tL-t.+s..shscptGsRl-usKshaKhEstsuG..sspG+shhu-h.RKsSD.Ks.....GhsuKKhD-Rs.KuKtuosssSKEDSWTLFKPPPVFPVDNSSAKIVPKISYASKVKENLNKssQssos.uss...........GEo..ts.ss...........sRLSQVPMSAhKolTSAsFSNGPVluss-ssh.ss......shhss.AAsolss...hssu.sss...-hshsos..............AhE.hKsoLhlY...P.NMQshL.ustp.....lshPup.TsQpsLGDIFQNQWGLSFINEPSAGPEsshupsscpph.hploh.tc.hst..shsopusphhsoGsp.hssFscAh-L-KRTsPp...hushhpsu...spE...utsh..c.p.h.s-.pKs-spopGu.hsh.Spsh-lc..ss.usPossLhuSsK-ptap+shERp-SWGSFDl+AAlhYHTKEME.lhpLQKQDPp+llhYpE.............................. 0 1 6 20 +15147 PF15294 Leu_zip Leucine zipper Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86TE4 Family This family includes Leucine zipper transcription factor-like protein 1 (LZTFL1) [1] and Leucine zipper protein 2 (LUZP2) [2]. 27.00 27.00 27.10 27.10 26.80 26.20 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.00 0.70 -11.87 0.70 -5.29 18 148 2012-09-03 08:54:27 2012-09-03 09:54:27 1 4 59 0 78 120 0 222.00 42 84.38 NEW FA+pKRshpLKoVDsCFpDLK-SRLs--TaTsDEVs-hLDGLpsVV+uEVEsELINou+TNVLLLRQLFpQAEKWaLKLQ.sDISELENRELLEplAcFEKp-houus.....p.s.-..ps.K....LpPLsEu.GsutLLs+EIpRLQEEN-KLKsRLKolEppATssL-EKsKLcpsL+-LQh...sp.cs....hhcuQ-ls-LEsplAslKsEhEKshpcposppKsLc-sLhssKHcLL+VQ-Q..LphAEKELEKKFppTuAY+NhKchLopKN-QIK-LR++Lp+ ........shpchthpLKpV...Fp-.+pohLh.p.p.TaT.cEl.phLsGLpsslpo.hc.ELhNssaoshLLl+pLhppAcK.h.h+Lp.p-luElEs+pLlEplt..chEKtpho.s.......................Lts.s..Et.......hsK.ItcLQpENcpLKs+Lhohph.ss.th-EppKlpttLp-Lph............s.....hh...+uQplssLEpplAsh......K............s-hpKs.h.Dppp....p.KuLcEslths...hps....lh+sQ.....Lp.hhppp.pp.......hh..p...pshhpph.phhsppp.Q.pt.................................................... 0 18 24 40 +15148 PF15295 CCDC50_N Coiled-coil domain-containing protein 50 N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IVM0 Family \N 27.00 27.00 27.00 29.30 22.00 24.60 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.15 0.71 -10.92 0.71 -4.23 11 129 2012-09-03 09:22:21 2012-09-03 10:22:21 1 1 71 0 74 106 0 127.30 47 30.86 NEW MuElpIDQSNLP.tVpEVCpsFAVLEDtuLAHNLQEQE..IEpahuoNlp+N+LVQ+DlpVAKpLQ-EE-....psps.hpppp+-LEcpDsEhAp.IQEcLhppAEctRppEpcDE-IA+pLQEcEhpEp+...R+p+ph ........................LP...tV.pEVC+-FAVLEDtsLAHsLQEQE..............IE.cHhuoNlpRNpLVQcDlpVA..KpLQ.EEDh...+Aps.p...h...p+ch+.....-l..Ep.........pDsElApEIQ..EcL.th..-....AE.c.pR....p..pE.c..cD.EcIA+hLQEcEhppc.++pp...h..................... 0 13 22 50 +15149 PF15296 Codanin-1_C Codanin-1 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IWY9 Family This domain is found near to the C-terminus of codanin-1 [1]. 27.00 27.00 31.90 29.20 21.60 20.10 hmmbuild -o /dev/null HMM SEED 122 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.96 0.71 -10.59 0.71 -4.25 23 96 2012-09-03 09:23:55 2012-09-03 10:23:55 1 1 75 0 64 95 0 117.00 46 10.95 NEW ppslDshss..lspplLhssCPaLsEh+slltsshsstsps+....ushh+.pIT.......................Psusp.........t.sppplQt.pLE-AFF+sQPsSlRRoVEFVsERlsSNsVKchpssllhshhppupshlpphh ........p.puLDshsl..VDppLLYsCCPaluEhRpLL.uuhlusosu+...........suGhhR.KIT.............................................Psosp..t.hssps.spspptLQt.pLtpAFFHsQPsSLRRTVEFVAERluSNCVKHIKATLVs-llcpA-shLp-.h........... 0 19 24 43 +15150 PF15297 CKAP2_C Cytoskeleton-associated protein 2 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IYA6 Family This family includes the C-terminus of CKAP2 and CKAP2L. CKAP2 is a microtubule associated protein which stabilises microtubules [1]. 27.00 27.00 47.50 30.30 26.20 26.20 hmmbuild -o /dev/null HMM SEED 353 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.45 0.70 -5.63 19 145 2012-09-03 09:24:35 2012-09-03 10:24:35 1 2 41 0 87 116 0 252.30 36 51.18 NEW Apsssppsp+hhtps+shsppphpsspusss...........pRsstsKEoscE...R.+A+LsEW+suK.G+slKRPPhshh.......t..p.ptpp..EcsstSFWTThsEEDEQ+L....FT-KlNpThuECLpLIs.-GCP+EElLshLsclIpsIP-A+KLsKYWlCLARLEshpuslpclIuIYEcAILuGAp...PIEEhRcsls-ILp.hKsptp.sphupNhpptsssppplp-lp.........pt.shs..cstc.tp-ppcc+ssh.ppspppp-cppc.ssssslp.Tssp-sptuslIKYNVSoTPhLQShKKKhQh-pssu..sh+-LKFLTPVRRSpRlpcKss+LPDMLKDH.PCVSSL-QL.........sElssp.TssFlhR.NsAL ..............................................t.p..t...t.p..ts.h..................pst...Kpsst-....c.+tpLpEWptuK.G+shK.....RPP.phh............tttp..cp.shSFWpoh.....tcE-Ep+h....hopKlNpohoECLpLIp.pGs.tp-lhshLp....pIP.pA.cKhsKaWlChstl...push..pll.slYEcAlhsGAp....PlpEhRcslhsI.Lp...............stphp.pt..t........................................................ts.p...c.tpppp.....p.....p.pppp.th.sss.pht.psppcst..shIKhpls.shPhlpuh..................thp-hKhlTPVRRStRlpct...ss+hP-MLp-H.......sVuSLppL..........-ltt....spsalhR.NtAL............................... 0 10 16 33 +15151 PF15298 AJAP1_PANP_C AJAP1/PANP C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8IYJ0 Family This family includes the C-terminus of adherens junction-associated protein 1 (AJAP1) and of PILR-associating neural protein (PANP). AJAP1 inhibits cell adhesion and migration [1]. PANP is a ligand for the immune inhibitory receptor paired immunoglobulin-like type 2 receptor alpha [2]. 27.00 27.00 29.70 33.30 24.80 25.10 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.60 0.71 -4.44 9 85 2012-09-03 09:25:14 2012-09-03 10:25:14 1 2 35 0 49 74 0 163.40 48 56.79 NEW IsWGPTusD..lE-ss.suhhsssssssst..shTssossuToTstssps.sh.hThp..G...........P.thSThtss.s.sts.....sh.Pphhscsu.GLAVHQIITITVSLIMVlAALITTLVLKNCCuQSupsR+sSHQRKIpQQEESCQNLTDhoPupVsSslDIFTAYN-SLpCSHEClRsslPlYT-Etlp..ossaKouFNGNR ........................IsWGPTssc...Esss.Ps.hsPsh.sl......sh.suspshsTstss.ssth...phpspGlh.s.sP.........................ss.h.PhlhGspu.Glssp.hlTITlSlIhVlsA..TsllhK..C..hspStppRRsStQpth.p...QEESpQsLTDhoP....uulslhsAas-o.pso.-p.cspsssh...sh...pschtssFphNR.................................... 0 2 6 17 +15152 PF15299 ALS2CR8 Amyotrophic lateral sclerosis 2 chromosomal region candidate gene 8 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N187 Family This domain is found in amyotrophic lateral sclerosis 2 chromosomal region candidate gene 8 protein [1]. 27.00 27.00 27.30 28.50 26.90 25.80 hmmbuild -o /dev/null HMM SEED 225 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.60 0.70 -4.76 33 110 2012-09-03 09:26:01 2012-09-03 10:26:01 1 15 36 0 86 153 0 199.60 31 36.09 NEW psshspsssclpWctt...........s.hlshsuhP............Fh........lhspthht.Cpa...Gpc+ctttpc+pppp.........................ptphphptocKhsCPA..plhl+...................clhpFP.................ca+lp...................................................................cspptp+cpshppL+pslhpt............hpsph.caalphPs.ppH.psHshtp.s...............................................shpptlc..............splhpKIccLVppG..lssl.plcc.pL.....ctalpcpha.pspphP.hpsp............pahPohpDl.....cschtpsppphc ..............................................s....ttstp+lhWcsp.............hlPa-GlP...............Fl...........htup..tsh...CQa...G.cRcthphc+hppp...........................pppspp.hKtsCPA..pIhl+...................cV.+FP.................-Y+lsst.................................................................c.h+hppccshphL+pslhs.............hsshh.RaYlpLP..sppsH.phH.htp.sh..............................................sh.pplc..............spltcKIp-LVupG..lpplhtl++.pL.....+pFVcc-LF.cscphPpppNh............paaPTspDI.....pN+htpsphph.p........................... 0 53 55 69 +15153 PF15300 INT_SG_DDX_CT_C INTS6/SAGE1/DDX26B/CT45 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N7B7 Family This domain is found at the C-terminus of integrator complex subunit 6 (INTS6), sarcoma antigen 1 (SAGE1), protein DDX26B (DDX26B) and members of the cancer/testis antigen family 45. 27.00 27.00 33.30 32.50 26.50 24.20 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.25 0.72 -8.90 0.72 -4.52 36 206 2012-09-03 09:26:55 2012-09-03 10:26:55 1 3 71 0 105 190 0 64.00 48 9.46 NEW cchNs-l+pplhKElR+sGRsYcpIFplL.cpVpGsl-l+ppFlchsIKEAsRFKR+sLlpp.LEch .....pclNs-lKtQlhKElR+.GRcYE.+IFpLL.cpVQGslcs+ptFlc.sIKEAuRFK+RhLIpp.LEc...... 0 23 29 55 +15154 PF15301 SLAIN SLAIN motif-containing family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8ND83 Family The SLAIN motif containing family is named after the presence of a SLAIN motif in SLAIN1 [1]. They are a family of microtubule plus-end tracking proteins [2]. 27.00 27.00 33.20 30.90 26.80 25.20 hmmbuild -o /dev/null HMM SEED 371 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.57 0.70 -12.75 0.70 -5.29 13 195 2012-09-03 09:27:58 2012-09-03 10:27:58 1 3 43 4 93 159 0 316.00 44 81.31 NEW hLY......ss.pp.hssspKshSPlpWC.RplL-pPoP-hEsA+.psLph+L-Qh............................................................................h.s...ppsslSPQSSlD.....SELSTSEh--..su.uhsY............KLpDlTDVQIhARhQEES....LRQ-YAoTo..ussSRRSSShShpSh+...............R.ushSDQEhDt.ShE.-c-EphcpLshPpsphhssSP.........hp..s.P+Spo.Soh.................cpsR+Sspu..hs.tt.s.hpp.........................DKLRRShPNLs.Rssuhss.......................spsV+NSpShDSsLpsssuGluRhQSp.s.....l.pps+...usup.PlulRQPhKAsu.husslp..................us...t.hsssu.....shsspss.......s.s.....ossRSuLPRPusssssu.ssPRSKluQPsR..........................phLssPKo...h......usl+DsuW+DGCY ...........................................................................................p.up.psLh.+L-.h..................................................................................................................................................................................p...htssLSsQSSlD.....SELSsS-.....DSIu.sY.........................KLpDl.TDVQIhARhQEES....LRQ-YAuTo...........SR+SSusShpSh+...............+..GThSDQEhDt.SL-.D.--phcphs.Ptsph.psSP..........p..s.P+Spo.uph.................ppsRhS.pup.hs..ppp..ppp.........................-KLRRShPNLu.Rhssh................................oV+sSpS.DSshpssssGls.RhQsp.s...................l..p.h+...............ssu..PlslR.QPl...K...Ahu.husss..s....................st......sssu............tsstss..............h.......hhhR..SuLPRPuhs..h..s...G.sl..PRSKluQPsR..........................p..L.sPKs...h......ushpDtsW+DGCY........................................... 0 17 24 54 +15155 PF15302 P33MONOX P33 mono-oxygenase Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96A73 Family This family of proteins contains a flavine-containing mono-oxygenase motif. It may have a role in the regulation of neuronal survival, differentiation and axonal outgrowth [1]. 27.00 27.00 77.10 32.50 19.00 18.10 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.23 0.70 -11.82 0.70 -4.80 13 91 2012-09-03 09:29:03 2012-09-03 10:29:03 1 3 39 0 48 87 0 232.10 60 95.71 NEW uGhLG+MSLPlGhpRRAhSYDDsLEDsAPMTPPPSDhuSplhWKpPVIP-+KYQcLucs..E-scssh......ssls.Suus-shsKlPVVKAKATplIMNSLITKQTQESIQ+FEpQAGL+DAGYTPHKGLosEETKYHRlAEAlH...KL+hQSG-.sKE-+QsoSAQS....TPSoTPpSSPKp............+.RGWFsp.......GSSsuLsGP-hS....ohDuGus-ts+ssu-KWSlFGPRo.lQKSso..G.......GFolQuY+GAQKPSPMELh+AQATphuEDPAsF.KPPKM-IPs.hEuK+..sPRsHNLKPRDMNVLTPTGF ..........hGtMSLPIGhhRRAhSYDDsLEDsAPMTPPPSDMuS.l.WK.PVIPERKYp.cLuc..hE-Gpsshs.....suhs.usuh-s..hppsPVVKAKATplIMsSLIT..KQTQE.SIQ+FEpQAGLpDuGYTPHKGLTsEET+a.hRluEulp...KLphQSGE.s+E-+.suSuQS....TPSoTPpSSP+p............p.R.GWFsp.uuosslsuss.s.....ohD.G....ssp......sts.s-+WShFGPRs.lQKsso..t.......uFuhQuY+GAQKPSPMElh+sQAsRhs-DPAs.h...pPPKM-lPs.hEu++...ss+sHpLKPRDhNVLTPoGF........ 0 1 4 14 +15156 PF15303 RNF111_N E3 ubiquitin-protein ligase Arkadia N-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96B23 Family This domain is found at the N-terminus of E3 ubiquitin-protein ligase Arkadia [1]. 27.00 27.00 30.50 30.40 22.30 21.90 hmmbuild -o /dev/null HMM SEED 269 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.03 0.70 -4.87 13 116 2012-09-03 09:29:48 2012-09-03 10:29:48 1 3 41 0 57 120 0 236.70 51 37.61 NEW hKuE.ssDAspptpsLct..ssPcshtsucshsschEshsuKsGs-h.p....-ot....t.susphpo..sLlhch+RcSpputsussp.....................-psSssctpc-SS.ScChpSPSSShHhGDSDTLSSs-Eptsstttuttt.ssts.............upopuuRpp+hsRSEoEoss..hMtt.p.+.........tpp++sssRhphVK......upRoQKQKERIhL.RpKREshAR+KYsLLpsSSoSsp.DLosDSSoSSSpEuE--l.....SGsS+s.ouslP ..............................................................tucshPttsEh.suKsust.sc.Lp.sp.ppptshsus..pp..sLlhch+RcSpput.u.sp.....................ppPos....pssS.scCh.SPSSShHhuDSDTloSsE-pEss.tcots...sut.............utoh.uRpp+hsRoEoEos...hMtt.p.+.........pS.c+.ssRhphVK......upRoQKpKERIhl.RpKREshAR+KYsLL.sSSoSsp.DLos-SSoSSS.-s-E-l.....oups+p.suplP..... 0 2 6 23 +15157 PF15304 AKAP2_C A-kinase anchor protein 2 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96FF7 Family This family includes the C-terminus of A-kinase anchor protein 2 (AKAP2). It includes the site where the regulatory subunits (RII) of protein kinase AII binds [1]. 27.00 27.00 28.00 29.40 26.90 26.10 hmmbuild -o /dev/null HMM SEED 344 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.42 0.70 -12.28 0.70 -4.83 18 145 2012-09-03 09:30:27 2012-09-03 10:30:27 1 7 47 0 79 126 0 242.10 23 39.72 NEW I.sRPLLopsuhs...ssP.c...+-RuRs.SlalQRDltpETpREcDhRRp.Gh.......ttsusPshhspssp.stLpRthSS......cslLu.ssDupstsPsPEs...++Vs+hsscuYQshLssGss..chss....uhstPst..stttsps.t.spuths.t..hs.sssps.spp.t.....p..t...pst.shl..-..hlp...ht.ph.s....sph....p.hs.chs....ussslRht+sQuSsLLEcElcsVLcRERElpEpRRsshh.sE.......shsPss..s..hc.psup..uSopuuG..hsGohSVSES.hasslphaStLsasspsPs.ts...tpppctthYAGIps.Dtls.EllpuoRVoRHKNuhApRWEuGlas ..........................................................................................................................................................................................................................................................................................................................................................................................................h...h.s.t......tph..........t..t.........ushhlRtp+ppo.shlEcEl+uspEREcELpcQR.phh..sp..........................hhtPs..............c.................Ssotpss..........psshuspps...shtp.....p..ps.h.hh.h.......pt...tt......tpphp.................s.spllcuhRVsR+KsshA.RWEutIhs..................................................... 0 8 11 34 +15158 PF15305 IFT43 Intraflagellar transport protein 43 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96FT9 Family Intraflagellar transport protein 43 (IFT43) is a subunit of the IFT complex A (IFT-A) machinery of primary cilia [1]. 27.00 27.00 27.90 27.70 25.70 25.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.95 0.71 -4.17 34 125 2012-09-03 09:31:47 2012-09-03 10:31:47 1 2 100 0 81 136 0 137.10 37 64.49 NEW Wu-c.......................u......pcpsppsscphp.tppphpstshp........t..............DIPl..IPDL.--lp.....-EDhhsplAsPPolpss.+VhTh+-LD.sDLhptsuhpsl.........DstlD............LpLLT+.sLsscpplc.EcDpsW-WDpLFTEVouElps-h.stpt .......................................................................................t.tp..t.......tpthpt...p..........t.t.....................DIPl....IPDL--.lp..........-EchshpVAsPPolpss.RVhoa+-LD..sDLhc.hsAapsl........................DG-lD............LpLLT+.sLsPEcp.lc.EcDssW-WDpLaTEVoSElhs-hp...h........ 0 33 39 60 +15159 PF15306 LIN37 LIN37 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96GY3 Family LIN37 is a component of the DREAM (or LINC) complex which represses cell cycle-dependent genes in quiescent cells and plays a role in the cell cycle-dependent activation of G2/M genes [1-2]. 27.00 27.00 38.90 28.00 26.90 26.70 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -11.32 0.71 -4.22 48 143 2012-09-03 09:33:57 2012-09-03 10:33:57 1 3 106 0 95 141 1 153.20 28 60.56 NEW hpps.sps.tt.ppo.............hlhcLa-RuV-Luphppp..ss..........LYs.lCRuW..hcNpsp...hstt.t..............................tsttspplhpLPpP..........tss.h.phPs.hp.ppppttppt..............................hotpsLlppHhpRWKclRp+....Wppp.pppphpR....YppohplLpt ......................................t.pp.....ttt.p.pps..............alh+LFDRSVDLupasps..oP............LYs.lCRAW..h+Ns..Pps.tht.tpst.s.............tp.............psscspslhpL..P.P.........ssssshs.plPsshp.ppptptspt.....p..........................o.ssLltpphpRWK+lRp+Wpct.pppp..RYppshplLp................... 0 31 41 71 +15160 PF15307 SPACA7 Sperm acrosome-associated protein 7 Coggill P pcc Jackhmmer:Q96KW9 Family SPACA7 is a family of eukaryotic proteins expressed in the testes. Proteins in this family are typically between 104 and 195 amino acids in length. There is a conserved DEIL sequence motif. The function is not known. 27.00 27.00 81.90 81.70 19.40 18.20 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.74 0.72 -10.50 0.72 -3.92 8 19 2012-09-03 09:34:46 2012-09-03 10:34:46 1 1 11 0 5 25 0 104.90 59 62.33 NEW WQt.sphpPpp....pssGSsTEts..FsSppEDluElLDEILVQEIL-......uKTTspEhsSTuTTLpTth....AGh-ENYQtsu..oENYHEhLENlpaSSGTccclSsD-tsAsANLHu ........WQ..sp.cPpp....tssGSsTEIP..FSSKpEDhuELLDEILVQEILDh.....NKTTPoEMPSTASTLST.lH...AGIDENYQAGG..SENYHELLENLQFSsGhEsKlSNDEAsANANLHu. 0 2 2 2 +15161 PF15308 CEP170_C CEP170 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96L14 Family This family includes the C-terminus of centrosomal protein of 170 kDa (CEP170) [1]. 27.00 27.00 61.90 48.60 20.60 22.80 hmmbuild -o /dev/null HMM SEED 691 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.28 0.70 -13.26 0.70 -6.27 14 215 2012-09-03 09:35:26 2012-09-03 10:35:26 1 6 39 0 63 196 0 476.50 42 45.21 NEW suchshphp.ss.s.p.sshusts.p...u.FVRQESFTKEpuSusls.sKLPHISSHPh..........LpDLpss+usph-h.opDT+LlLKETEosLAALEAKLhsppspt-st..........sssst..-DSLSG-SDVDTASTlSLlSGKstsssos...pt..shuuhQKEKSSossusQD....ss.sSARERLoEKp+p..ssts.tpsEss+R.hph+RupGspGSLDhTDD-+uSuhsa.PsoDhsoSDpEp.....uuRstsR+KP.st.....ssK-Epu+sotssp+s..........QQsLTRSNSLSTPRPTRAS+LRRARLG-ASDsEsADs-+us.u...ss.sussuKssspsKKLSRLDILAMPRK.RAGSFTsPSDoEus.s.RouFSGR..SlEthhsuRKs.osu-u...+suu++sAsus......s+QPh..oRsRouSs+YoSsosp....RRQpGSDYoSTSE-EaGSs+sSPK.H.....pRSHsSTAhQTsRspss.utsp.h...ss.u.....sppp---.pcEs-sY..hhs.TuchA.EI.ARL.SQsLsKDlAILAREIHDVAGDGDShoSuGsu.Sool..SolP..sTPASTISsREE..............LVQ+IPEASLNaQKVPPGuhu........sc.shDQs..hsDsp-cshup+pRshsR-E...............VlhDNLMLNPVSQLSpsIRENTEpLApKMKILFQNptRsWEElEAKIsoENEVPILKTSNKEISSILKELRRVQKQLEVINAIlDPsGsLDlhsuN+usu ..........................................................tp.................t..s........t.hlRQtSFTh-.soss...s.thlP+Isp...............tphtttptth....s.DTthlLK-oEsshAhLEA+l.pp..................tss....psSlSs-SDVDTuSTlS.hsucst.ps............s..+-pss.sss.ps.......sss+-p.pc+t+.....................puth..sShDhss-ppsss......s-.hsss.p.......s...spp....................p.pts+..s..................ts.s+psoLstPRPTRsShLRRARLG-sSDs-.s-s-+hs.t....t.....sst..s.tt+tlSRlDhLA.PR+.RhGShss.SDsEss.s......pss.SsR..ss..E.h.t.th.....sts....+.ss+httssh........s+.s...s+s+s.ssthssss...............................................sshp..p.t..s................t.pcc...cE.-.Y..hhs.Tsc.t.EI...A............Rl.......SQ....DlAllAREIpDVAG-hDS.sS..su.su..uso....tshs.ssPu..osl..ss+-E........................LVp+l.-tS.......LNapKlPPhshs...........st.s.c.p.....s-..c.....+pRsh..sR-E...............shhDsLhLssV.QhSptIRpsh-phAtKh+ILFpsptRsW--lEuKlpuEsElPllKTSshEISSILpEL+RVpKQLpsINshlDPsGsL-h.h.s...h..................... 0 4 11 30 +15162 PF15309 ALMS_motif ALMS motif Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96L16 Family This domain is found at the C-terminus of Alstrom syndrome protein 1 (ALMS1), KIAA1731 and C10orf90 [1-2]. 27.00 27.00 27.00 27.00 26.90 26.50 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.06 0.71 -10.76 0.71 -4.33 17 152 2012-09-03 09:36:21 2012-09-03 10:36:21 1 5 71 0 88 160 0 123.80 34 9.40 NEW shl+hoLQEALph+RPDFIS+StcRlKRL..cLlscER+hQphhppERctLFs.s....cp.tt....s..hshp...hth.+..pRslsKKEMlpRS+RhY......cpLPEVp+++EEE+RcscYpo.RL+AQLYKK....KlpN...plhG.++s.hp ...........................................phoLQEuLph++PpFluRSppR...hK+L...chhsp..p.R+hp.phhpt...cpp..thhs....................tt.......h.h.shp.s.hhhs+.....pRhloc+EMptRo+RlY......ppLPEVpp++.EEp+..+ctt.......hpoNRL+AplF+K....plhsplht+p....s..................... 0 24 31 49 +15163 PF15310 VAD1-2 Vitamin A-deficiency (VAD) rat model signalling Coggill P pcc Jackhmmer:Q96LK8 Family VAD1-2 is a family of proteins found in eukaryotes. The family is expressed in testes and is involved in signalling during spermatogenesis. 27.00 27.00 165.10 48.60 22.30 21.90 hmmbuild -o /dev/null HMM SEED 245 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -11.65 0.70 -4.71 8 28 2012-09-03 09:37:07 2012-09-03 10:37:07 1 3 21 0 13 30 0 218.50 53 65.13 NEW EE-QV.sssHRSIRVQTSKHLFWADKLIQASEHSLQ+thshQ.t+posscTsup.spp.l.pcshsSc.cQLQsPSspPs.PuTsS.Q..P.oPp..SS..sLoPAIGLAELlNFASSLAlASSSKhDLPNLEHMIKuPPQ...KAppPST-Pss...p.As-pp-tcpp.....sch.sEKP.LEss..EspKuaKQEDKNls+PYLDFSKPGhKRATIEGEVKLLQsPAhSPp.QGApKDSVPGTKKGoPLLLKIHFKLSSPoSPE ..........EEpps..Sus++SIpsQTSKHLFWAsKLIQASEHSLQ+slshQ.pptSsspshpp.hppslspsshsSc.cQlQ.PsupsuPPsTuSQt..PSPh..SS..sLsPsIGLsELIsFASSLAhASSS+hDLPsLEHMhKAPPQ...cAhEPSTEPhh....p.ss-cp-scpp.........sEt.sEKP.tEAt...ts.KuWsQEDKNhspsYhDFSKPGlKRATIcGplpLLQsPApSP.LQGuccDSVP.GpcKtsPLLlKIHFKLSuPo.PE............ 0 1 1 1 +15164 PF15311 HYLS1_C Hydrolethalus syndrome protein 1 C-terminus Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96M11 Family \N 27.00 27.00 27.10 27.10 26.60 25.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.22 0.72 -3.75 20 80 2012-09-03 09:37:52 2012-09-03 10:37:52 1 2 70 0 59 82 0 79.30 40 26.71 NEW IRPthpt.pppt....tKsDPVs+YapY+cpWcph+sP.GEcpRpsLRWplREpMhpps....s..+s.+hhl.PNsYlVPTpKKRpALRWplRspLA .............IhPhhsp.pppp.....tKsD.Vu+YapY+c-Wcph..+..hP.GEccRppLRWsVREpMLp+s....s..hs.+hhl.PssYlVPotKKRpslcWtlR..hs................... 0 20 22 41 +15165 PF15312 JSRP Junctional sarcoplasmic reticulum protein Coggill P pcc Jackhmmer:Q96MG2 Family JSRP, junctional sarcoplasmic reticulum protein 1, or junctional-face membrane protein of 45 kDa homologue, is a family of eukaryotic proteins. The family is to the junctional face membrane of the skeletal muscle sarcoplasmic reticulum (SR); it colocalises with its Ca2+-release channel (the ryanodine receptor), and interacts with calsequestrin and the skeletal-muscle dihydro-pyridine receptor Cav1. It is key for the functional expression of voltage-dependent Ca2+ channels. 25.00 25.00 30.60 29.70 23.40 18.60 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.47 0.72 -9.11 0.72 -4.01 17 32 2012-09-03 09:38:57 2012-09-03 10:38:57 1 1 26 0 19 27 0 67.50 50 25.24 NEW s-ELPWG-lTLNKCLlLASlVALLuSuFQlh+......DslsGptssst..sPt.Wl..Pou.ss+c..st.sPtP......c ..p-ELPWGDloLNKCLVLASLVALLGSAFQLC+......DAVsG-ssh.t..sPtsWV.PPSS.sP+c.ss.uPhPc......................................................... 0 1 2 7 +15166 PF15313 HEXIM Hexamethylene bis-acetamide-inducible protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96MH2 Family HEXIM is a transcriptional regulator that functions as a general RNA polymerase II transcription inhibitor. In cooperation with 7SK snRNA it sequesters P-TEFb in a large inactive 7SK snRNP complex preventing RNA polymerase II phosphorylation and subsequent transcriptional elongation. HEXIM may also regulate NF-kappa-B, ESR1, NR3C1 and CIITA-dependent transcriptional activity. 29.00 29.00 29.70 41.00 28.90 28.50 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.59 0.71 -3.95 25 120 2012-09-03 09:39:46 2012-09-03 10:39:46 1 2 80 4 79 116 0 132.50 41 43.16 NEW WK.PY.K.LoWc.............E+pph-Epps..pRAsRl............hu+GpPlAPYNTTQFLM-DH-tpE................................scLssssthpcttss.........................sspst.hsup-Dp....................................................FLp+-FS-sYEph+.sEpLpsMSKpELlpEYLcLE+chuclc .............................................W+PYhc.LoWp...........................E+pph-E+po.....pRAoRlRt.....pMFA+GpPVAPYNTTQFLM-DH-.cE........................Pc...Lcssph.p+ss..sps.................................................ssssp.tspscsct-.........................FLp+DFSEsYE+hH.sEpLQsMSKQELlpEYL-LE+phSph...................... 0 23 29 50 +15167 PF15314 PRAP Proline-rich acidic protein 1, pregnancy-specific uterine Coggill P pcc Jackhmmer:Q96NZ9 Family PRAP, or proline-rich acidic protein 1, is a family of eukaryotic proteins. PRAP is abundantly expressed in the epithelial cells of the human liver, kidney, gastrointestinal tract, and cervix. It is significantly down-regulated in hepatocellular carcinoma and right colon adenocarcinoma compared with the respective adjacent normal tissues. In the mouse it is expressed in the epithelial cells of the mouse and rat gastrointestinal tracts, and pregnant mouse uterus. This article describes the isolation, distribution, and functional characterization of the human homologue. PRAP was abundantly expressed in the epithelial cells of the human liver, kidney, gastrointestinal tract, and cervix. PRAP plays an important role in maintaining normal growth suppression [1]. 27.00 27.00 54.30 30.00 24.00 21.50 hmmbuild -o /dev/null HMM SEED 45 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.04 0.72 -8.76 0.72 -3.67 15 33 2012-09-03 09:44:09 2012-09-03 10:44:09 1 2 22 0 16 31 0 44.80 55 28.18 NEW PE.D+DsLYH..PtspE...sQtEscPhsp...sl.s+pVLp..GPEED+DHIYH ..PEPD+DuLYH..P.s-E...sQsE-pPhhh...shss+QVLp..GPEEDpDHIYH 0 1 3 5 +15168 PF15315 FRG2 Facioscapulohumeral muscular dystrophy candidate 2 Coggill P pcc Jackhmmer:Q96QU4 Family This family of proteins is found in eukaryotes. The family is localised close to the D4Z4 repeats on chromosome 4 and 10 that are associated with the autosomal dominant facioscapulohumeral muscular dystrophy (FSHD). FRG2 are transcriptionally upregulated in FSHD myoblast cultures suggesting involvement in the pathogenesis of FSHD [1]. 27.00 27.00 29.00 28.50 22.10 22.00 hmmbuild -o /dev/null HMM SEED 181 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.48 0.71 -11.39 0.71 -4.31 8 40 2012-09-03 09:44:56 2012-09-03 10:44:56 1 1 15 0 18 49 0 172.50 58 68.15 NEW GSEPppcE.-sSpEo-hptps.SsusoEsESuophEusRKRKhSS+DSopsssGus...st..EsSsohcpK+.Rsss.us+ssEspcsusu+pcspu..tsG+pcRpRsRs.sscPPPlRKSLVToLRuhSEAIYpDlsQhpAQQttSPLT.EQLstLuQLpGPLsAslQTlYoMAsQAAaAFPAEGWL ..............................GSEPsPNc.ENScEoKL+usN.SoAsSEsESSShpENsRKRKISS.+DSspDpAGNC......Pc.cEsSloLpKKu.RuST.uVHsSEIQETs-uH+RGpSRApoG+u+RHRSRuhuspsPsLRKSLVTSVRuhSEAlYQDLAQVhAQQh+SPLTpEQLohLoQLRGPLCAtVQThYoMAoQAAasFPAEuWL..... 0 3 3 3 +15169 PF15316 MDFI MyoD family inhibitor Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q99750 Family Members of this family inhibits the transactivation activity of the MyoD family of myogenic factors [1]. They affect axin-mediated regulation of the Wnt and JNK signaling pathways [2], and regulate expression from viral promoters [3]. 27.00 27.00 50.20 49.80 25.30 24.90 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -11.44 0.71 -12.10 0.71 -4.27 9 115 2012-09-03 09:45:47 2012-09-03 10:45:47 1 1 42 0 61 106 0 162.80 51 76.62 NEW QPps..........lPh.ssustcs.ptEhGphps.......sGs.....Gsh....ss..............p+hp+KlposhSlsSsuu+KSKsuo.....sppsuphP..tp...DCCVHCILACLFCEFLTLCNlVLspAoCGh.CoSEs....CCCCC.GsshGsDC....sCPCDMDCGIhDACCESSDCLEICMECCGICFPS ......................................................................................p..thps.....lsNGs..Gh.p..Gstphuus........h.u..Assu..pKhH.RKlQ.op.SlsSpsSKKSKssu.....pppsup.P...tE..DCCVHCILuCLFCE...FLTLCNIV...LspAoCG.......CoSEs.....CC......CCC.......Gst......s...sDC....shPCDhDCGIhDACCESuDCLEICMECCGlCFsS..... 0 2 9 21 +15170 PF15317 Lbh Cardiac transcription factor regulator, Developmental protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BQE6 Family The family of proteins are cardiac transcription regulators, named Lbh, short for Limb, bud and heart. They regulate embryological development in the heart [1]. More specifically, in humans, they may act as transcriptional activators in MAPK signaling pathway to mediate cellular functions [2]. This family of proteins is found in eukaryotes. Proteins in this family are typically between 92 and 116 amino acids in length. 27.00 27.00 28.10 33.50 26.60 17.70 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.47 0.72 -3.73 18 80 2012-09-03 09:46:51 2012-09-03 10:46:51 1 2 40 0 42 78 0 86.90 56 67.14 NEW MTEV.hso..ssh--hsls.......PccspLohQIFPDsu-h....-cssK......LKcRLPSIVVEPTE.uEVESGELRWPPE-hLlp.p-pc-ptt-pthpsppp ........................MTEVhMso..ssM--huLs.......PcKDtLSaQIFPDPSDF....-RsCK................................LKDRLPSIVVEPTE.GEVESGELRWPPEEFLlpED-p-pspEsttpsp.................. 0 2 8 19 +15171 PF15318 Bclt Putative Bcl-2 like protein of testis Coggill P pcc Jackhmmer:Q9BQM9 Family This family of proteins is found in eukaryotes. The family may represent a set of Bcl-2-like proteins involved in apoptosis, see UniProt:Q9BQM9. 25.00 25.00 69.60 40.30 19.90 19.40 hmmbuild -o /dev/null HMM SEED 180 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -11.37 0.71 -4.28 6 22 2012-09-03 09:50:31 2012-09-03 10:50:31 1 3 17 0 10 20 0 140.00 60 88.05 NEW MGNsSSHKRTKAPKQA+KE+PPDMDKA+t.+QFFSHLKpK........KPus..................uhhsKIVLLFPLDKRQQLAEA.......ut.s......-cssG.....As.s....uhPA.APMLRGAGDus-RR........c.......ch.plhlLL...hlt.-uhhttc.........G.......GuKuAQsWQcLas+LLocuEA-sEuss.AEEQPRKRR+CPR .....MGN.SSHKRTKAPKQA+KERPsDMDKAha.KpFhsHLpRK.............KPus..........................+IVLlhPLDKRQ.LApA........sp..-.ssG.....As.h....u.PA..APhLRGAG-Gs-Rc..........ch..lhlLl...h...-sh..t-..........G.......GA+us.sW..Lhs+hho.tcsstEuts.tcpQPRKpp+s.R........... 0 1 1 2 +15172 PF15319 RHINO RAD9, RAD1, HUS1-interacting nuclear orphan protein Coggill P pcc Jackhmmer:Q9BSD3 Family RHINO, or RAD9, RAD1, HUS1-interacting nuclear orphan, is a family of eukaryotic proteins [1]. Under genotoxic stresses such as ionizing radiation during the S phase, RHINO plays a role in DNA damage response signalling. It is recruited to sites of DNA damage through interaction with the 9-1-1 cell-cycle checkpoint response complex and TOPBP1 in a ATR-dependent (ataxia telangiectasia and Rad3-related) manner. It is required for the progression of the G1 to S phase transition of breast cancer cells, and it is known to play a role in the stimulation of CHEK1 phosphorylation. It interacts with RAD9A, RAD18, TOPBP1 and UBE2N [2]. 25.00 25.00 37.30 32.90 20.40 20.10 hmmbuild -o /dev/null HMM SEED 236 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.49 0.70 -11.76 0.70 -4.55 18 48 2012-09-03 09:51:53 2012-09-03 10:51:53 1 2 35 0 26 42 0 219.60 49 98.14 NEW MPP+KKRpppspKAQLLF+ppPLEGPKHchuSPQhsh.THT..RQVPoKPID+sTlTSWVSPQFDsTsEoaFPuppK....H.R.......c........pA+pSSRKSsos..KFPpLoFEospoS...S.S.-.sL..uls..hs+......-s.PsQscKDlopRPLVPhLSPQSCGElSs+.....sLpohPhVFlPPDIQTPEsS......sl+p....s......slPs-p+csuLsu.CshHssoPpSP-PGPVLVcDTPEEKYGlKVTWRRRcHLhsYLRERGKLS+SQFLV .......................MPP..+..KKR...pp.opKApLLF+ppPLEGPKHphuSsQhsh..THT+QVPSKPIDps...TlTSWVSPQFDssupoh.FPstpK....Hp..............s...............pA++uSR+osos..KFPcLoFEospsS...S.S.-.sL..uhs....hh+...............cs.Psps...cKslsp..RPLlPhhSPQSs.uphSsp.....t.pshshVhhPPDlQTPE.S.......s+p............p..ls.spppsph.s.s..tssoPt.ss-PusVLVcDTPEccYGlKVTWRRRpHLhsYL+-RGKLspuQhLV........................................ 0 4 5 10 +15173 PF15320 RAM mRNA cap methylation, RNMT-activating mini protein Coggill P pcc Jackhmmer:Q9BTL3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 102 and 154 amino acids in length. There is a single completely conserved residue D that may be functionally important. RAM is a family of eukaryotic proteins that are an obligate component of the mammalian cap methyltransferase, RNMT (RNA guanine-7 methyltransferase). RAM consists of an N-terminal RNMT-activating domain and a C-terminal RNA-binding domain. Either RAM or RNMT independently have rather weak binding affinity for RNA, but together their RNA affinity is significantly increased. RAM is necessary for efficient cap methylation, maintaining mRNA expression levels, for mRNA translation and for cell viability. 25.00 25.00 25.90 28.30 22.30 22.30 hmmbuild -o /dev/null HMM SEED 81 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.38 0.72 -10.06 0.72 -3.72 24 82 2012-09-03 09:53:27 2012-09-03 10:53:27 1 2 67 0 60 71 0 82.00 43 64.28 NEW pchE-hFssRFT--DpEapcahp..+PscsPPlV-sW..p.....u...p....suGsp.........+sp.ssphp.s..pcph....pGpstpt...shpstsR..pp.pa.ps.R...saup ...saEEhFAsRFT--DcEYQEYlc..+Ps-sPPIVEpW...p.....u..R....uGGsp.........................Rsc.sNphp.s..sRpa....cGcss+t...GWsscs+..tp.pa.ps.R..sht.................................................... 0 10 14 33 +15174 PF15321 ATAD4 ATPase family AAA domain containing 4 Coggill P pcc Jackhmmer:Q9BU68 Family ATAD4 is a family of proteins is found in eukaryotes. The family is also known as PRR15L, or proline-rich 15-like. ATAD4 is expressed almost exclusively in post-mitotic cells both during foetal development and in adult tissues, such as the intestinal epithelium and the testis. Its expression in mouse and human gastrointestinal tumours is linked, directly or indirectly, to the disruption of the Wnt signaling pathway. 25.00 25.00 25.00 60.40 24.70 24.80 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.42 0.72 -10.29 0.72 -2.98 28 78 2012-09-03 09:54:15 2012-09-03 10:54:15 1 2 41 0 44 68 0 89.60 47 75.69 NEW WWK.LThhRKKps....ps.+VhhpsP.................................sp.....us.........ss..sspp.......s......s.....p....sst.stcs........phssRL....-Kls.ccp...sppRplKVS+SGRFKEK+KVRATLs..p ...................WWK.LTh.RKKcu....ps+Vhh-hP.................................sph....us............ss..uusc.......t......s...ps......sst.sssu.........shssRL....EKls-cp...optRplKVS+SGRFKEK+KVRATLs-p. 0 4 8 19 +15175 PF15322 PMSI1 Protein missing in infertile sperm 1, putative Coggill P pcc Jackhmmer:Q9BUN1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 249 and 341 amino acids in length. 25.00 25.00 26.10 26.10 24.70 24.10 hmmbuild -o /dev/null HMM SEED 311 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.20 0.70 -5.34 11 35 2012-09-03 09:55:01 2012-09-03 10:55:01 1 2 23 0 14 27 0 272.00 60 89.90 NEW AQG.Tpo.Tu....hQRVSFRFG.....GP.ho..RoY+oTu..RT...shPR+hR..VThEDEsDssAsADRLAGPAAAELLAoTVuTGhu+sp...ss.s....-EDGSLEEGVVIsARKsso....shct.sssspT.stusoo..RFhANoQEPEIRLTosl.suoh+sTt..-.lsS-sTLopWSTAGSTPsRWPsPSPTAMPP.PEDLRLVLMPWGPWHCHCKSGTMSRTRuGKLpGLSGRLRVGALSQLRTEHRPCTYpQCPCNRc+EECPLDouLCsDosCoopsosp...ohsshPslahRhpPs.h....s...SP..SPALAFWKRVRhGLEDIWNSLSSVFTEMQPl ..........AQG.TpTsTt....MQRVShRFG.....GP.hs..RSYRoTu..Ro...slPRKhR..lhLEDENDAhAsADRLAGPAAAELLAuTVuTGhSRSS...s..s....EEDGShEEGVVIsAtKsso.........stchsSssssT.sGuSST..RFhANoQEPEIRLTosLPposh+sTp..D..sS-sTLopWSTsGSTPsRWPsPSsTAMPs.PEDLRLVLMPWGPWHCHCKSGTMSRoRuGKLpGLSGRLRVGALSQLRTEH+PCTYppCPCNRh+EECPLDouLCsDosCuopsTTp...ossshsslHhRppP..h...Pss...SPsPALAFWKpVRIGLEDIWNSLSSVFTEMQPl............... 0 1 1 1 +15176 PF15323 Ashwin Developmental protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BVC5 Family This family of proteins are found in eukaryotes. These proteins have an important role to play in developmental biology, particularly embryogenesis. It plays an important role in cell survival and axial pattern [1]. It is also thought to be a crucial subunit in the tRNA splicing ligase complex[2]. Proteins in this family are typically between 141 and 232 amino acids in length. There are two conserved sequence motifs: HPE and PQR. 27.00 27.00 32.40 32.40 24.90 24.60 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.46 0.70 -4.34 17 75 2012-09-03 09:55:43 2012-09-03 10:55:43 1 3 67 0 50 70 0 183.50 38 84.61 NEW ss.hLLHPEL..LSc-FLlLhLp.p+sIhscs....p.sKDpLT-LYlpHslPLPQRcLPcsRWGKhhEKpRt.p.th......tspp............ppssss-s.RKRPLI.........VFDGsSoso.slKl+Ks-su....ss.DRLKPPP.s..hossl+KLSssospsSsss..ss................p.s........spsshts.ps.P.o.........hstssssKL.....................KRsuspp.-sssss-hKssEsK+KIQHV..TWP .........s.-hLLHPEL..LSpEFLLhhLc.p+....sIsVEs....+hsKDsLsDLYlQHAlPLPQR-LPcsR..WGKhhEKcRp.pp.ch......pspp..........................ppss..ss-s....RKRPhI............VFDG.sSooo...olKl++s-su......ss.DRLKPPP.t.......hossh++lptsss.psus.....s....................p..........tpt.....p....p......ht.sssspL.....................KRssspp.t.p..tphpsspsK+KIp+l..TWP.............................................................................................................................................................. 0 12 15 32 +15177 PF15324 TALPID3 Hedgehog signalling target Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BVV6 Family TALPID3 is a family of eukaryotic proteins that are targets for Hedgehog signalling. Mutations in this gene noticed first in chickens lead to multiple abnormalities of development. 27.00 27.00 34.00 33.50 24.10 25.60 hmmbuild -o /dev/null HMM SEED 1252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.39 0.70 -14.02 0.70 -7.21 10 106 2012-09-03 09:56:21 2012-09-03 10:56:21 1 3 35 0 35 97 0 709.20 48 82.78 NEW sDIFISQYosGQKDALRAVLKQKAQShPVFKEVKVQLLEDAusEKcsl.uQEsRhSPuGIDSATTVAAATAAAIATAAPLIKVQSDLEAKVNSVoELLsKLQETDKQLQRVTEQQTsl.QsKpEKlHCHDH....EKQMNsFMEQHIRHLEKLQQQQIDIQTHFISAAL+ouS...aQPsslPsSRsVEKaslKs-psslGuushSS+ssFss+p..As...................h+psEDhuFDcQKSPLETPAPRRFAPVPVSRDscISKRENPhEEKENh-hssp+GssRLLEQILNspDo.oRKSESS-.tTSLopSKhGWNPE...+ps........pFPSsEELGTAcVTVQKu-DlLpDLGQK+KEoculLQ...KpS.shlcLuDhPQsss.+.............LQoTpsTRSlLKDAEKILRGVQNNKKVLEENLEAIIRAKDGAAMYSFINALoTNREhoEKIRIRKTVDEWIKsISAEIQDELuRKDYEQKRFDQKspRsK+ApsM.......SK-IKsNTQ-Ks...lN+ssh.tK..pKQlE..-phcs..lpshs.Sshp.+cR+-GhLKussllQDEDYhhQlYGKPVYQGHRSTLKKGPYLRFNSPSPKSKPQRPKVIEpVKGTKVKShRTQTDhaATKPlKhDSKhpHSlshLP+u-.QQYLFSPSREMPThSGTLEGHLIPMAILLGQTQSNSDShPPAGVlVsKPHPVTVTTSIPPSSpKscTGVKKPNIAVlEMKSEKKDPPQLTVQVLPNVDIDSISNGSu-sS.s.ssSPcEAS.sPlpsWIQsPEhhKsDEEElKFPGoNFDEVIDVIQ-EEKsDE.IPEaSEPlLEFNRSlKVVSTKYNGPPFPPVASssQPTsDILDKVIERKETLENSLIQWVEQEIMSRIISGhaPlQ.pQsssslSVSsSEsSEPlTSDIVEusuGGuLQLFVDAGVPVNSDMIsHFVNEALAETIAlMLGDREu++ssPsAssVPGsloosET.L.ARlsTPVATPQPTPPpS....PsSssKEhVLVKTP-SSPCsS-HDsshPlKElhAEcGsD.hPAlTLVsTPsVTPsoTPPPA.AuhTPThSEhSI-KLKhsSPELPKPWuDuDLPL-EENPNslpEEshaPRAlVMSVAKDEEPES.chPs.PusPc.PlPhpPhPsus+APSsspsPSS-SST.ESoLShT.sTETETLDRPISEGEILFSsGQ+LAs+.luDuG.LaLsNLNDSLSSTLpDAhEMEDDPPSEGQV.IR+PHKchHpDslLSLLAKQNQcslsSQQulYHSEDLENSVGELSEGQRPpLsAAAEsIhMGp .................................................................................Psh+tV+Vplhtsst..+t...s.c.t.s.pthpsssolAAATAAAIAssAPLlKsQpphpAplspVsphLpKLppspt.l..h.t.t.t....p.p....tp................................sphIsuAhp.tt...h.......hssst...h...c.t......t.....p.h....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 3 5 11 +15178 PF15325 MRI Modulator of retrovirus infection Coggill P pcc Jackhmmer:Q9BWK5 Family MRI, or modulator of retrovirus infection, is a family of eukaryotic proteins that regulate the activity of the proteasome in the uncoating of retroviruses [1]. 27.00 27.00 27.70 41.30 21.10 25.70 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.51 0.72 -3.07 14 32 2012-09-03 09:57:11 2012-09-03 10:57:11 1 1 25 0 17 32 0 99.20 66 70.21 NEW TVYCMNEAEhVDVALGILIE.uRKQEKPhEpsslAGADKPEhSP....ssStSPtsSS.GusSE-EDsGpDu.sP..GLuPspsPuGScSACScSPEc...-EDsLKYVREIFFS .TVYCMNEAElVDVALGILIE.uRKQEKshEQsuLsGADpPEhSP....ssSsSPaoSS.GSoSE-EDuGc-u.sP..GLSPSQtPuuSsSACSRSPE...E..E-EDsLKYVREIFFS... 0 2 2 2 +15179 PF15326 TEX15 Testis expressed sequence 15 Coggill P pcc Jackhmmer:Q9BXT5 Family TEX15 is a family of eukaryotic proteins that is required for chromosomal synapsis and meiotic recombination. TEX15 regulates the loading of DNA repair proteins onto sites of double-stranded-breaks and, thus, its absence causes a failure in meiotic recombination [1]. Two polymorphisms in the TEX15 gene could be considered the genetic risk factors for spermatogenic failure in the Chinese Han population [3]. 27.00 27.00 307.70 44.90 19.00 18.60 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.65 0.70 -11.58 0.70 -4.73 12 63 2012-09-03 09:57:43 2012-09-03 10:57:43 1 4 25 0 32 57 0 220.00 40 16.72 NEW N..hsKR..c+pGchKs.SpcsQo.spth..phshh.SKPulh.tlsphPlhct..+Schscs.s....pss...s-LcEpHsosspsuhlscLSpILQRADEASSLphLQEEocsCQNlLPLFVEAFERKQcCShcQILISR-LLVEpNLWsN.CKa+LKPsAlDoLVELQMMMETIQFIENKKRLLtGEPTFRSLLWYD-oLYuELLttP+GaQQQSsFYPuFQsRLKYNAFsELQpYHsQLIELh ...........................................................................................p........p...hh.tcst.h..thsp.slh.tppcsspls........pcsss.hpphK.pps.h.s...Hss.IsslSpILcpA-.susLphLQE.ThhCpshL.lhhchFphhQEssl-pIhIocE...llD..sl.pN.sp.hhLKPpAl-ohlEl.Mh.ETIpFlcNpht+hhsc.pFRuhLWaD.SLhsELltp.c.............................................. 0 2 3 5 +15180 PF15327 Tankyrase_bdg_C Tankyrase binding protein C terminal domain Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BY89 Domain This protein domain family is found at the C-terminal end of the Tankyrase binding protein in eukaryotes. The precise function of this protein is still unknown. However, it is known interacts with the enzyme tankyrase, a telomeric poly(ADP-ribose) polymerase, by binding to it. Tankyrin catalyses poly(ADP-ribose) chain formation onto proteins. More specifically, it binds to the ankyrin domain in tankyrase [1]. The protein domain is approximately 170 amino acids in length and contains two conserved sequence motifs: FPG and LKA. 27.00 27.00 28.00 28.00 21.40 21.40 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.49 0.71 -4.08 17 95 2012-09-03 09:58:28 2012-09-03 10:58:28 1 3 38 0 50 89 0 166.70 50 15.10 NEW s.cFuF..tpTslLDS.SAL+oRsp....Lu++.pp..+RAPsStuhRt....sR..ts.sthshs--ss.sWhFpDS....sppp+ss.p--u-.t.E.t+s.+ocpssssps.+lslFPGhDPSALKAQLRKRs-u........-u.s-t...ussQhSKSPKsPh.h.G..usRVLPPus-K-suSE-.sSPpWL+-LKSKK ...............................t..DFSF.I-pTplLDS.osh+oRsp.Lu++.pt.+RAPs...........Rsuto..t.s.ts-sD.sWhFpDS....sptp+sP.pp-p-..EE.pspRochosso+s.+lslFPGhsPSALKApL+pRscu..................-S.upp...ShsQpuKSsKsPh.h.G..tshlLPspsEKspt.....SEt.sSPpWLptLK.KK...... 0 2 5 16 +15181 PF15328 GCOM2 Putative GRINL1B complex locus protein 2 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BZD3 Family This protein family is named Putative GRINL1B complex locus protein 2. GRINL1B is short for: glutamate receptor, ionotropic, N-methyl D-aspartate-like 1B. The name indicates what sort of receptor it is thought to be, a ligand gated ion channel specific to the neurotransmitter Glutamate. This family of proteins is found in eukaryotes. Proteins in this family are typically between 325 and 463 amino acids in length. The protein is thought to be the product of a pseudogene with a role in helping assemble a gene transcription unit [1]. 27.00 27.00 27.00 27.00 26.40 26.70 hmmbuild -o /dev/null HMM SEED 223 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.46 0.70 -4.60 48 192 2012-09-03 09:59:30 2012-09-03 10:59:30 1 2 70 0 83 176 0 245.50 28 63.40 NEW hLcRQp+lLpN.........++hlpcLPDKGcKIpchhp+lpstlsc+.c-l-pp............uphhpsLslssp....tth..sphphpttt.pcphhphsshh.tsp..sppcpstht.hsQ...........ss.ctpthhhhpsssshhst...........................ts.t-htphh......t......t..............................................p....................p..p+ht.hhpppp++..................................cshchcsKhpshpssp ..................................................................................................hpcp.phlpp.....................................cchhtphs-htp+l.p.htcl.ht.tpp.c.lcpp............sth.psls.s..sh....tth..sshphpttt.p-thhphpph..tss..p....p-ppp.hpssQ..s.hh+hp.tpopctsspVhhEhotplhsp...........................tshpEuspthpt...tEhshp.p+-pcluEl.......p+hltthppEpptL..ph...........................................h..t...s.u-ppc....pp.LEc.tsssL+E+I+HLcDMlcsQQ+KV+tMl.............................tEs.Ehps+hc........................................................................................................... 0 13 17 43 +15183 PF15330 SIT SHP2-interacting transmembrane adaptor protein, SIT Coggill P pcc Jackhmmer:Q9Y3P8 Family SIT, or SHP2-interacting transmembrane adaptor protein, is a disulfide-linked dimer that regulates human T Cell activation. 27.00 27.00 27.40 27.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.69 0.72 -3.90 36 122 2012-09-03 10:10:56 2012-09-03 11:10:56 1 3 34 0 64 132 0 102.10 28 42.21 NEW hhLhslhullL.Ll.Lshslhtahtt++pp+ssph...........................tslE-sPlYGNLshhps...sshspsshcphpspPppSspspp...Ehss.s..hscpphsYASLshss.sp.c ...hhLhsllulLL.Ll.lsssLhtWphh.p+ppKhsph............s......ppsttsh.E.ss.h.YuN..Lshh.s...tshsppshpphps..Pppusptt....Ehss.s..hsccphsYASLshss.tt..................................... 0 5 5 14 +15184 PF15331 TP53IP5 Cellular tumour antigen p53-inducible 5 Coggill P pcc Jackhmmer:Q9Y2B4 Family TP53IP5 suppresses cell growth, and its intracellular location and expression change in a cell-cycle-dependent manner. 27.00 27.00 77.00 38.00 23.80 21.50 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.46 0.70 -4.43 14 41 2012-09-03 10:11:55 2012-09-03 11:11:55 1 3 28 0 22 33 0 185.80 51 80.98 NEW pQPVSKlIERNRLKMVLKNLSLLKLLKSSNsRIQELHNLA+RCWNSLLRVPKILpISSGsssVssKscQNNcEhQE.AsssccpLESKKhE.SsuEPK.....t.csc.tst.pstsptSPuAVsp+EcQhEsElP+TS+ucGL..s..PGApu+QssTtsPpVlhLKTapp+TPhtDhKQL-.sADQasWFEGLPTRIHLPGPRVMCRuSoLRWVKRCCTRFCSASL ...........................QslsKhlERNRL+hVLKNLSLLKLLKSSN.RIQELHpLA+RCWpSLLp..VP+ILpISSupsssss+scQsscEhQE.hssspcpLcSK+l-.usu-PK.........c.ps..t.thpsttptS.uAhs.+-cphcs-lP+Ts+spuL..s..ssApu+Q..TcsPpllhlKsappRsPhtch+Qh-.hAcQahWFEGLPTRIHLPuPRVMCRuSsLRWVKRpCTRFCSASL........................................................ 0 1 1 5 +15185 PF15332 LIME1 Lck-interacting transmembrane adapter 1 Coggill P pcc Jackhmmer:Q9H400 Family LIME1 is a family of eukaryotic transmembrane adaptors. It plays an important role in linking BCR stimulation to B-cell activation and is expressed in primary B cells. LIME localises to lipid rafts in T cells in response to TCR stimulation [1], and is phosphorylated by Lck and recruits signalling molecules such as Lck, PI3K, Grb2, Gads, and SHP-2 [2]. LIME acts as the transmembrane adaptor linking BCR-induced membrane-proximal signalling to B-cell activation [3]. 27.00 27.00 32.50 35.30 26.00 21.00 hmmbuild -o /dev/null HMM SEED 228 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.71 0.70 -5.12 8 37 2012-09-03 10:12:49 2012-09-03 11:12:49 1 2 22 0 14 35 0 187.90 51 73.69 NEW LCTsC....HR.D-.L..RKpupRQpuRLQGohMPuEhSLLRQspLCSLSKSDTRLHELHRGPpsS+A...RPASMDLL+PpWLEsSRussRs...PsAFsHRELPQu.PuA....s.husEATYSNVGLAAIPRAu...........LAAsP.VV................AEYAClQK.hKGT-pGP.....Qs...LpQuKAclpPAsQVDILYSRVsKPKRRssuPs.....sDQ.DPKupusILsL........GSD.uYEsL.PLpGpuh-su.LENVYESIQE .................................Ktspcppst.psshhss-hsLLRps+LCSLSKSDTRLHELHRGspsohA...RPASMDLL+P+WLEhSRusops.ssssAFs.ppLPpA.PAA..hssshusEATYSNVGLAAlPRuS...........LAASP.VV.....................AEYAslQK.hKGTcpGs.....Qp....tQ.KschhPAsQhDlLYS+lpK.t++s.tss.....ss..s.pstts...h........tss.tYpsh.s.hu.thppt.hENhYESlpE........... 0 1 2 2 +15186 PF15333 TAF1D TATA box-binding protein-associated factor 1D Coggill P pcc Jackhmmer:Q9H5J8 Family TAF1D is a family of eukaryotic proteins that are members of the SL1 complex The SL1 complex includes TBP and TAF1A, TAF1B and TAF1C, and plays a role in RNA polymerase I transcription [1,2]. Alternatives names have included 'JOSD3, Josephin domain containing 3'. 27.00 27.00 153.10 70.30 23.90 23.00 hmmbuild -o /dev/null HMM SEED 217 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.89 0.70 -11.71 0.70 -4.55 12 58 2012-09-03 10:13:26 2012-09-03 11:13:26 1 2 34 0 33 42 0 201.10 57 79.67 NEW SuSSLF+TQClP.oPpp+pR.ss..Rp.slpustslp.pDSSSDSS.hE.P.pP..LsLKA.IFE+FKK+....KK++.K..RKYKP...Tt+.h.GRPcG++s..s+hSp..h-K.+plKDKG.pFPFlESE.s.+KsLPW+KILoaEQAVARGFFNYlEKLKYEpHLKESLpQMcsGEDLEcEDh-sR+YKYLDDDGuISPIEEsssED-.ssss..cp.s..-CDlKLV.E.sS.FIlSo-hP+K ...............SsSSLFKTQClP..hSPKpppR.Ns.hRK.hVcss.tsVpspDSSSDSS.hE.P.hP..LTLKA.IFER.FKp+.p...K.K+KK..RK...Y+P....oGRP+.GRP........cG++s.........sphS...l-K...KQh+-+GssFPFLESE.s.cKslPW+KILoaEQAVARGFFNYlEKLKYEaHLKESLKQMsVGEDLEcEDhDSRRYKYL.DDD...GSISPIEEStsEDE.sssph...cp.s.-CDIKLV-sopFIlSSEhPp..... 0 1 2 6 +15187 PF15334 AIB Aurora kinase A and ninein interacting protein Coggill P pcc Jackhmmer:Q9H7T9 Family AIB is a family of eukaryotic proteins necessary for the adequate functioning of Aurora-A, a protein involved in chromosome alignment, centrosome maturation, mitotic spindle assembly and aspects of tumourigenesis. AIB is likely to act as a regulator of Aurora-A activity. 27.00 27.00 191.40 185.70 17.90 17.10 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.08 0.70 -12.13 0.70 -5.37 12 28 2012-09-03 10:14:07 2012-09-03 11:14:07 1 1 23 0 14 29 0 319.50 66 92.02 NEW QTHLIKsuTKMLTLhPGERKspISFTQRphPsAGsRQTSIASFFTLQPGKTNGGsQ+SVSSHtESQhNKESKcDsTQL.DHLhQGLcDDChAsPLATSTPADIQEA.GLSPpShQ.sSGHHphtTPhLThhSL.QP-TL.VCAG-SKASLAhSFTQDlEsSCLLDQKEuc...DSShK+EWLpGSK.K.N.hQuhERHs+PsGGKsHQsLDKsKlE.KVSAKENRQuPV.lQTYR-.SaSGcNT.uVKQSPCPVslFSWDSE+sDKDSWSQLFTEDSQGQRVIAHNoRAPFpDVTNspNQGLGQFPsSPtAQsQtsssph.NLQPDLLFTQDSEGN QTHLIKPGTKMLTLLPGERKspIsFTQRRsPssGI+QpSIASFFTLQPGKTNGuDQ+SVSSHsESQlNKESKKsATQL.DHLI.GLtcDCMuuP..LATSTsADIQEA.GLSPQSLQ.TS.GHHRhtTPFLT.L..SLhQPDTL.sCAG-SpssLAhSFTpDLESSCLLDQKEtc..tDSupKtEWLHGSK.K.N.YQuME+HsK.PGsKCpQPLsKTKLE+KVSAKENRQAPVhLQTYRE.SWsGENs-uVKQSPCPVSVFSWDsE+NDKDSWSQLFTEDSQGQRVIAHNoRAPFQDVTNNhNpsLG.FPNSPWAQCQ-c....ssp.NLpPDLLFTQDSEGN. 0 1 1 1 +15188 PF15335 CAAP1 Caspase activity and apoptosis inhibitor 1 Coggill P pcc Jackhmmer:Q9H8G2 Family CAAP1, or caspase activity and apoptosis inhibitor 1, is a family of eukaryotic proteins involved in the regulation of apoptosis. It modulates a caspase-10 dependent mitochondrial caspase-3/9 feedback amplification loop. 25.00 25.00 25.10 27.30 24.80 21.90 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.52 0.72 -9.12 0.72 -3.77 14 90 2012-09-03 10:14:51 2012-09-03 11:14:51 1 1 71 0 60 86 0 61.70 53 16.01 NEW shKPluhYIpDK+EMLcQCFpVlGEKKLpKMLPDhLKssolc-lccLCh-QLphlScKplh..pIL ...sLKPlSaYIsD++EMLpQCFsllGEKKLpKMLPDlLKssSl-EIKcLC.EQLEhLScK+lLpIL........... 0 13 17 36 +15189 PF15336 Auts2 Autism susceptibility gene 2 protein Coggill P pcc Jackhmmer:Q9HAH7 Family Auts2, or FBRSL2, Fibrosin-1-like protein 2, is a family of eukaryotic proteins associated both with a susceptibility to autism [1] and with influencing the number of corpora lutea produced by breeding sows [2]. 23.00 23.00 23.10 23.10 21.40 22.50 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.42 0.70 -11.48 0.70 -4.30 17 176 2012-09-03 10:19:03 2012-09-03 11:19:03 1 3 48 0 86 152 0 197.20 53 25.25 NEW KPGKWCAhHVplAWQIh+....H.QQKlK..QMQLDPHKL-.huhKhDhhSRPPuPulFsuhHaPpDLARP..LFSu...o.GuuHPussPFGPssHH.uuFL.PsuHL..DPFSRsuoFuGLGsLuSsAFG.....GLGs.sLo..ssSlFu.K-GP.slpsh...uuP.HEsWNRLHRTPPSFPT.PP......PWPKsuDsERsuusssH-tc..........R-s-K.........uK--+-..RDhL-KsR.Hss+uSP ....................KPGKWCAMHVpIAWpIY+....H.QQKhK...MQhDPHKLD.huhK.-hLuRPPu...Pu.......l..F....uu.......lt...aPpDL.ARP...sLF.S..u...s..G..A.sHP.u.usPF.GP.s.P.HH.usFL..PuuHL...-PFuRPooFuGLuuLuusAFG.....GLGsPols...ssSlFupK-uP...ultsF.......usP.H-.PWN.RLHRoPPSF.P..TPP......sW....KPs.-h..ERou.u.hss+D+-................R-s-+c...ph.sK-....-+-...R-.l-+p..h.s+sSP................................................ 0 8 15 39 +15190 PF15337 Vasculin Vascular protein family Vasculin-like 1 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9HC44 Family GC-rich promoter-binding protein 1-like 1 or Vasculin-like protein family 1, is likely to be a transcription factor. The domain family is found in eukaryotes, and is approximately 90 amino acids in length. 27.00 27.00 27.90 48.40 22.40 21.10 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.33 0.72 -3.52 16 123 2012-09-03 10:19:58 2012-09-03 11:19:58 1 1 40 0 69 117 0 95.20 68 21.42 NEW -s-hLSpSLEAEH.RLLKuMGWQEYsENDEshLPLTEDEL+EFphKoEQL++NGht+NGhLtp.pshs.hFssWRsoscschpcspDoETSS.S-TS.DDD ...............t.s-VLSpSLEAEH..RLLKtMGWQE.sENDEsChPLTEDEhREFQshoEQLp.+NGhpKNGhLps..hhss.hFuPW+sosht.s.-s..pDTETSS.S-TS.DDD. 0 5 8 20 +15191 PF15338 TPIP1 p53-regulated apoptosis-inducing protein 1 Coggill P pcc Jackhmmer:Q9HCN2 Family TPIP1 is a family of eukaryotic proteins whose expression is induced by wild-type p53. Ectopically expressed TPIP1, which is localised within mitochondria, leads to apoptotic cell death through dissipation of mitochondrial A(psi)m. Phosphorylation of p53 Ser-46 regulates the transcriptional activation of TPIP1, thereby mediating p53-dependent apoptosis. 27.00 27.00 66.20 66.20 23.40 17.70 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.04 0.71 -10.68 0.71 -4.26 2 7 2012-09-03 10:20:35 2012-09-03 11:20:35 1 1 5 0 1 11 0 101.70 89 91.40 NEW MGSSScsSFRSAQASCSGsRRQGLGRGDQNLSVM.PNGRAQTHT.GWVSs.LVLGAQVHGGCRGIEA.SVSSGSWSSATVWhLTGLGLGLS+PFLPGshVLRDRPLtSAhELSYDQKKA.LpLQ .MGSSScASFRSAQASCSGARRQGLGRGDQNLSVMPPNGRAQTHTPGWVSs.LVLGsQVHGGhRGIEA.SVSSGSWSSssVWhLTGLGLGLS+PFL.GshVLRDRPLtSAhELSYDQKKA.LpLQ........ 0 1 1 1 +15192 PF15339 Afaf Acrosome formation-associated factor Coggill P pcc Jackhmmer:Q9NQ60 Family Afaf is a family of single pass type I membrane proteins. Afaf is a vesicle factor derived from the early endosome trafficking pathway that is involved in the biogenesis of the acrosome on the maturing spermatozoon head. 27.00 27.00 91.10 90.50 21.50 19.50 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.60 0.71 -11.32 0.71 -4.72 11 38 2012-09-03 10:21:37 2012-09-03 11:21:37 1 1 22 0 16 39 0 182.00 62 66.58 NEW PANEKoGNYYKDIKQ.YVFTTQNPN.GopSEISVRAT...TDLsFuL+NaKhlstosh....tppusE-csshpEspcsphp+oT..PN.PAFWTMLAKAlNuT.ssp...--KDQLFpPIPsSDlNATsEDplu-Lp-lKLKLMLGISLMTLhLFlhLLAhCsATLYKLKpls.K.spES.QYSlNPELAoLSYFHPSEGVSDTSFSKSAESS ....PANEKoGNYYKDIKQ...YVFTTQNPN.GTpSEISVRAT...TDLsFuL+NaKhhNtTsh.....tpssptpt..pE.pcpp.pcsT..PN.PAFWTMLAKAlNuT.sst...--+DQhFpPIPsSDlNu....TpE.Dphu-Lp-lKLKLMLGISLMTLhLFVsLLAhCsATLYKL+plphK.sCES.QYSVNPELATLSYFHPSEGVSDTSFSKSAESS.............................. 0 1 1 1 +15193 PF15340 COPR5 Cooperator of PRMT5 family Coggill P pcc Jackhmmer:Q9NQ92 Family COPR5 is a family of histone H4-binding proteins expressed in the nucleus. It interacts with the N-terminus of histone H4 thereby mediating the association between histone H4 and PRMT5, PRMT5, the Janus kinase-binding protein 1 that catalyses the formation of symmetric dimethyl-arginine residues in proteins. COPR5 is specifically required for histone H4 'Arg-3' methylation mediated by PRMT5, but not histone H3 'Arg-8' methylation, suggesting that it modulates the substrate specificity of PRMT5. This family of proteins is found in eukaryotes. 27.00 27.00 93.50 93.30 19.20 18.70 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.47 0.71 -11.17 0.71 -3.84 8 37 2012-09-03 10:22:24 2012-09-03 11:22:24 1 1 26 0 19 38 0 146.60 76 88.73 NEW EAGhATADHSG..EpETEtAsDRLupGAQSlPs-sPs+GEGopuEEEGa...AhDcc-uDGEhNsWELs-Gs..ssPPpEpsus.lFNEDWDhELphDQGN..PYDADDIQGSISQElKPWVCCAPQGDMIYDPSWHHPPPLIPHYSKMVFETGQFDDAED ........AuFATuDHSu.pERETEKAMDRLApGAQSlPND.uPA+GEGTHSEEEGF...AhD-EDSDGELNTWELSEGs..sCPPKEQsuD.LFNEDWDLELKADQGN..PYDADDIQtSISQElKPWVCCAPQGDMIYDPSWHHPPPLIPHYSKMVFETGQFDDAED 0 2 2 4 +15194 PF15341 SLX9 Ribosome biogenesis protein SLX9 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NSI2 Family SLX9 is present in pre-ribosomes from an early stage and is implicated in the processing events that remove the ITS1 spacer sequences. In eukaryotes, biogenesis of ribosomes starts in the nucleolus with transcription by RNA polymerase I of a large precursor RNA molecule, called 35S pre-rRNA in yeast, in which the 18S, 5.8S, and 25S mature rRNAs reside, while RNA polymerase III transcribes a 3'-extended pre-5S rRNA. The 35S precursor also contains external transcribed spacer elements (5' and 3'-ETS) at either end as well as internal transcribed spacers (ITS1 and ITS2) that separate the mature sequences [1]. 27.00 27.00 28.30 27.30 25.00 26.30 hmmbuild -o /dev/null HMM SEED 121 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.86 0.71 -3.67 125 252 2012-09-03 10:23:41 2012-09-03 11:23:41 1 5 220 0 174 248 0 135.90 22 65.24 NEW psththoKK-....Kpph.K+ppFlpK....lpps..t.sp..........p....pp+p...++R........tpp...tls.s.....shcsLtcuL..Pp...........................................ttptth.pptt.....................hpppthst.........+p+cplh.ppEtpRFsplLsp.sa.................................................pssPhuAlcpalppsh ............................................................t..thh.oKK-....Kpph.++ppalpK....lptt....sp...................p....pp+p...++R...tpp.....tls.s...sLpsLt-uLP-................hp...t.............................tttp.t..ptts.............................hppp.hst....................+p+cplh.ppEppRFpplLsp.sa...............................................................pssPhuAlppalppp........................... 0 49 92 138 +15195 PF15342 FAM212 FAM212 family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NTI7 Family This domain family is found in eukaryotes, and is approximately 60 amino acids in length. 27.00 27.00 29.60 29.60 24.90 16.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -9.13 0.72 -4.36 8 105 2012-09-03 10:24:18 2012-09-03 11:24:18 1 2 41 0 47 82 0 59.60 62 20.65 NEW Rs+P+SopsssL-phpuhs.ts-ucDWTooLhSpSRNRQPLVLGDNsFADLVtNWhDLPEss ..........................hP+Sps.ssshpphpuhc.hs-s-DWTooLhSRGRNRQPLVLGDNsFADLVtNWhDLPEh... 0 2 6 17 +15196 PF15343 DEPP Decidual protein induced by progesterone family Coggill P pcc Jackhmmer:Q9NTK1 Family DEPP is a family of proteins expressed in various tissues, including pancreas, placenta, ovary, testis and kidney. High levels are found during the first trimester. Its expression is induced by progesterone, testosterone and, to a much lower extent, oestrogen. The family is alternatively known as fasting-induced gene protein, FIG. 27.00 27.00 30.40 29.60 19.80 19.60 hmmbuild -o /dev/null HMM SEED 188 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -11.21 0.71 -4.68 10 32 2012-09-03 10:25:33 2012-09-03 11:25:33 1 1 24 0 16 26 0 159.20 52 85.86 NEW GGPspEPP.......uSPSLDDYVRSICQLAQPTSVLD..cAsstspss+spR.PAp.............usEKSsPssSLQDITs+FSGQQPsLPtssTsDPLDWLFGESQEKQsS+RDhsRRTGsSAssWGsHRQhDoGKutusPRGRhC-ARsPGHSLuRhSpDtpQutp..Sh..suppstpssuSstpsRsSSlLRTLa.HLPVIHEL .......GuPGQEPP.......sSPSLDDYVRSIspLAQPTS.VLD..cATApupPpsPaR.PAp................uscKupPAsSLpDlTs+FSuQ....QPsLPhssssDPLDWLFGcSQEKpsspRDhsRRTGsSus.hssHRQh-ssKs.sssRGRhs-A+hstaSLsR.spchpQs....S...s.tp.tpthuu..tsp.uShL+oLh.cLPVIaEL............................. 0 2 3 5 +15197 PF15344 FAM217 FAM217 family Coggill P pcc Jackhmmer:Q9NTX9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 329 and 507 amino acids in length. There is a conserved YPDFLP sequence motif. 27.00 27.00 85.50 33.40 20.10 23.60 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.39 0.70 -4.80 21 73 2012-09-03 10:26:04 2012-09-03 11:26:04 1 2 32 0 40 62 0 213.10 41 51.55 NEW SDLSDSERls.lssSPhTPPDLsLRAEEIDPVphp...cPtpupsps-ahYPDFLPPPFNSWDL+chAlhhpoEshpsslPpssuhLtKYI-RLLQLEWLQhQTIQsEKsK.ss+uRssousus....ptshKSPG.+oKhhsusls.cs.s.hQ.p.Gs.sKsssp.....R.....KpshtppcscPshht.apsSspshchhus..oR....sS.+Qosph+p.....ctK++cssK.ss+.QphshsCs-...susKhposuNlRhPp ...SDLS-sE+hs.h..S.h...p.sDLNL+sEpI-sVp..............c....sh...ptcsp..a.YPDFLPPPFsohDL+chAl..poEshptsls.stu.lt+hIsRLLpLEhLQh.TlQpE+s+..hs.sp.sTssuo....pts.pS.u.psKlhtstls.csLs.hQ.p.us.sKot.p...........R...........Kps.tppchc.stht.aphSsts.chhhs..oR....sS.+posps+p..pppcssK.ssK..phhs.sCp-....SsKspsstshp............. 0 2 5 10 +15198 PF15345 TMEM51 Transmembrane protein 51 Coggill P pcc Jackhmmer:Q9NW97 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 233 and 253 amino acids in length. 25.50 25.50 25.60 25.50 25.10 25.40 hmmbuild -o /dev/null HMM SEED 233 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.68 0.70 -4.66 17 64 2012-09-03 10:26:44 2012-09-03 11:26:44 1 2 40 0 32 46 0 218.10 49 87.87 NEW puuGSHYALsALGlGhlALGllMhVWslVP..Gtus..u.ssssssu.ss.ts..sss......tt+oKoSSVAaVLVGsGlhlLLLSlCLulRsK++pppst.p....ptpsss....pp.spppEc.t.-.tsupYsVPSYEEVlsost...ss..ppssh..ptspS...pLPSYEsLsshsp............spss.....s.....sssth.ssssps..........PsRpsSRsuRhL+PL+VRRIKS-KLHLKDhRlslt....p.ss..p.sslTIEPLTPPPQY- ................uNGSHYALTAIGlGMLVLGlIMAhWNLVP..GhSs..up+ssspu.N.po.suuG......hhKSKTFSVAYVLVGu.GVhLLLLSICLSIRcKR+pRQu--...chQptsussPpspcpcuQpE-....E-suuRYhVPSYEEVMsosh..sps..Rttpp..psphS...hSLPSYESLsulDE.................ssss.....ssputspssssps..................PsRpsS+..hu..++L+PLKVRRIKSEKLHLKDFRlsLs....s..ts..h.sPsoIEPLTPPPQYD................................................................ 0 2 5 13 +15199 PF15346 ARGLU Arginine and glutamate-rich 1 Coggill P pcc Jackhmmer:Q9NWB6 Family ARGLU, arginine and glutamate-rich 1 protein family, is required for the oestrogen-dependent expression of ESR1 target genes. It functions in cooperation with MED1. The family of proteins is found in eukaryotes. 27.00 27.00 29.20 30.20 26.80 22.80 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.52 0.71 -11.29 0.71 -4.43 19 163 2012-09-03 10:27:43 2012-09-03 11:27:43 1 6 106 0 113 158 1 130.50 45 54.99 NEW pEhEtKllEEEsA+RVEchlcKRVEEpLp..p-EIcpElpRRlEEu++th-cphht-lE+c+cttlp-t+p+EEcE+pc+E-LEclhpENpRKlEEAQ++.AEEchph.......hEEph+.hE.......-Rt+hcp-cc++h+-.E.....Qth...lLGKppoRPKLSFuL ...........pEhE.KhlE.EE.sA+RlEchltK+VEEcLp..p-EIcpEl.RRlEEu++hhccplhtpLE+p+pttlttt+t+.....E............EcE+pc+EELEchLEENpRKltEAQtKhAtEp.th.EEphp..E.......pR.+hcttcp+p.cc.E.....Qhh.....lLGK.s.p.....uRPKLoFsh................................... 0 37 59 85 +15200 PF15347 PAG Phosphoprotein associated with glycosphingolipid-enriched Coggill P pcc Jackhmmer:Q9NWQ8 Family PAG, or Cbp/PAG (Csk binding protein/phospho-protein associated with glycosphingolipid-enriched microdomains) is a transmembrane family that has a negative regulatory role in T-cell activation through being an adapter for C-terminal Src kinase, Csk. This family of proteins is found in eukaryotes. 27.00 27.00 30.30 94.20 25.70 19.70 hmmbuild -o /dev/null HMM SEED 428 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.47 0.70 -12.24 0.70 -5.38 5 46 2012-09-03 10:28:35 2012-09-03 11:28:35 1 2 37 0 27 40 0 401.30 61 99.17 NEW MGP-GGLLSoGpVHIIlWGSLAAVuThLlITFLIFLCSSC-REKKPKQQNGDHENLMNVPSDKEVFSHSVTSLATDsPASSEQNGGLSNGDILSEDSTsAChQPYEEVQTSlSDLLEuQDSlGKSlKCHQSRELPRIPPNsTlETILSTRNsEsDQGLGMEGPYEVLKDSSSQENMVEDCLYETVKEIKElGAoAssEKupsG+us..uusAs+Es.uslsts+lESAEYASVDRNKKSRQSsNuESlLGNosDlEEEAPPPVPVKLLDENENVQEKEs-Es......EtpATEGsu-ssKRLSSLSYKSREEDPoLTEEEISAMYSSVNKPGQA.R..hsPESoYTCIpElAPpRSPSSCNDLYATVKDFENsPsu..ThPPu..u.RsNGEPEPDYEAIQoLu+DEERss.hPpos+lshspENDYESIGDLQQsKDVTRL ...........MGP.GuhLu.uGQh..Q..lsLWGSLAuVuhFhlIo.FLIFLCSSC.-REKKP+..p..puGDHENLMNVPSDKEhFS+SVTSLATDAssSSEQNGuLTNGD....ILSEDSTh.TChQHYEEVQTS.uSDLLDSQDSoGKs.KCHQSRELPRIPP-uAVDThLssRssDuDQGhGhEGPYEVLKD..SSS..QENMVEDCLYETVKEIKEVuAssp.-+G.....psu+u+.......Ss.sulKElsuPpsps+...s-F.AEYASVDRNKKCRQSsNsES.........lL.Gso..sDs........EEEuPPPVPVKLLDENENlQEKEs.tps.......EcpAsEsT.u.-ssK.....Ra.SSL..SYKSREEDPTLTEEE.IS.A.MYSSVNKP.G.......Q.....s.....h.....p......p.............ssES......s....YosIpt..sspRSPSSCNDLYATVKDFEKsPsu...ohPPA..u..RPstE.PEPDYEAI...Q...oLsREEEKss.tssu.ppshssK.ENDYESIGDLQQsRDlTRL..... 0 1 3 10 +15201 PF15348 GEMIN8 Gemini of Cajal bodies-associated protein 8 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NWZ8 Family GEMIN8 proteins are found in the nuclear bodies called gems (Gemini of Cajal bodies) that are often in proximity to Cajal (coiled) bodies themselves. They are also found in the cytoplasm [1]. The family is part of the SMN (survival motor neurone) complex that plays an essential role in spliceosomal snRNP assembly in the cytoplasm and is required for pre-mRNA splicing in the nucleus. GEMIN8 binds directly to SMN1 and mediates the interaction of the GEMIN6-GEMIN7 heterodimer [2]. 27.00 27.00 31.30 31.30 26.80 26.80 hmmbuild -o /dev/null HMM SEED 209 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.70 0.70 -11.49 0.70 -4.28 39 92 2012-09-03 10:29:33 2012-09-03 11:29:33 1 2 65 0 64 84 0 197.70 35 87.38 NEW psWaspssYu+.................YWpHYppAMtWhppHptA.hptthpthhssshhhsss.....psphsppstp.........................t.t....s....p.tttp.....t.pp.cc-t.-oco-sc.hcsDhs...NMElTEEL+QaFA.pTE+HREEh++.....QpQ...l-scp.-s..........YVpAD+sLhhs.h+.SspsPsEpPucpRpAE.MK+LYG.cuAs....KI.AMEsAlQLsFD+pC.Dc+pPKYWPlIPLKh .......................................................................................Whtp.hatc.......aWpHYppAhtWhppHp....p..ht.hht.shhhs.s.....psphsppttt...............................................................t.tp...t..t.p.ptpp......t.pp.c--.-o-SDs-.lEsDlo......NMEITEELRQYFA.pTERHREEh++......QQQ...l-scchcs.....................YVsAD+sLhhstp+..SspsP.sEcPu..c+RpAE.MK+LYG.csAs....KI.AMEsAlQLoFD+ps.DcKpPKYWPlIPLKh..................... 0 18 22 40 +15202 PF15349 DCA16 DDB1- and CUL4-associated factor 16 Coggill P pcc Jackhmmer:Q9NXF7 Family DCA16 is a family of eukaryotic proteins that interacts with DDB1 and CUL4A. The family may function as a substrate receptor for the CUL4-DDB1 E3 ubiquitin-protein ligase complex [1]. 27.00 27.00 349.80 349.70 20.50 18.80 hmmbuild -o /dev/null HMM SEED 216 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.68 0.70 -4.98 2 16 2012-09-03 10:30:57 2012-09-03 11:30:57 1 1 14 0 8 12 0 207.10 95 100.00 NEW MGPRNPSPDPLSESESEEEtNANYLNESSGQEWDSSEtEDPVVPNloPLESLAWQVKCLLKYSTTWKPLpPNSWLYHAKLLDPSTPVHILREIGLRLSHCSHCVP+LEPIPEWPPLASCGVPPFQKPLpsASRLSRDHATLNGALQhATKQLSRTLSRATPIPEYLKQIPNSCVSGCCCGWLTKTVKETTRTEPINTTYSYTDFQKAVN+LLTASL MGPRNPSPD+LSESESEEEENhSYLNESSGEEWDSSEEEDsMVPNLSPLESLAWQVKCLLKYSTTWKPLNPNSWLYHAKLLDPSTPVHILREIGLRLSHCSHCVPKLEPIPEWPPLASCGVPPFQKPLTSPSRLSRDHATLNGALQFATKQLSRTLSRATPIPEYLKQIPNSCVSGCCCGWLTKTVKETTRTEPINTTYSYTDFQKAVNKLLTASL..... 0 1 1 1 +15203 PF15350 ETAA1 Ewing's tumour-associated antigen 1 homologue Coggill P pcc Jackhmmer:Q9NY74 Family This family of proteins is found in eukaryotes, where members are expressed at high levels in the brain, liver kidney and Ewing tumour cell lines. Proteins in this family are typically between 648 and 898 amino acids in length. 27.00 27.00 75.90 40.40 21.10 20.50 hmmbuild -o /dev/null HMM SEED 814 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.85 0.70 -13.37 0.70 -6.52 8 53 2012-09-03 10:31:34 2012-09-03 11:31:34 1 2 34 0 31 50 0 614.10 41 89.43 NEW RYETPKRlLKMDLLSSTFSSP.NDPDGQNDIFWDQNSPMTKQLGKGRKKQIYoTDSDEISHIVNRIAPQDEKPTTNSMLGhWIGETAIPCTPuVAKGKSRAKlSCTKLKTQNQEEELMKLAKQFDKNMEELDVIQEQDKRNHDFIQMISEsETLNNYKDNVQMQhLp-I...VPEIDsuIIKKPhKtNTKISVs.NDQsSSQKPFDQNAEAAFNAIFDGSTQKCSGQLSQDLSDAFLNTSNTTFGKKssLKEEKIIoNETlVTE+L.NK.......TPsSLSsQVDTPsMTKSCVTSsTKEPcshsKalDsFsTSDFEDDWENLLuNEPFVMQNlEhsELhP.u.KTsQhsDQKuICoFN.uKNDKSKSthNoSLDsRLRDSKILQDLPScTpNpELhDAtKapF.P...sPNDKPNKL.S.TGNKhKFEKSFNKlVlQDKIQD...sAlASsLTKVKEDhpT...K...Fs...ss..SpKKSuLNTGYSN-Q.......KNKsIFNQSFKsPAslcPFGSAsLusET.SVsNsNQTNASKLsSFFDDWNDPSFANElVKACHQLEsTWEADDVDDDLLYQACDDIERLTQQQDlRKDSKTSEShL-lNNSSpHGA.KNhFTTSKQuSQLlQSKHLNLuSISsp.T.SlTNSSQlsKSVKMEKGEhCGNSPuFLGATTNLoIYSKNSssQhsN..VuhNNTcVPlQVNSSKSlLsGSSSLNVsSDHMoTEIATspKKLSTppLSHsTlTDEuQocLN+sV+hSKYTFTKhKNsQhhSQhNQNCl.sGSlSDTKIoQsLEKNK..T..VNsLhGcAVQQQSLh+.hSESLKQPSKEEEEKNRKYSPEEIQRKRQEALVRRMAKA ............................................................................hcTPKRhhphchhSss.FSSP.NDsDsQp-IFWD.sSPhTppL.GcuR.+KQh.hspsosEIScIVN..RIAPQDEK.Ps.s..s..ShLshWIG-sAIPCTPsVsKtKSRsKhsss+.h......K.spspEEELMKLAKQFDKNM.ELDslQEQspcsasFhQhhSEsthL.ssa+D.shphp...shpsh...lsEls...ps..t.cshctso.pluh...pspsSSQKshD.sAEAAhsAlFDGSTQKCSGQLSQtLs.-s.hssopshh..hcpss....LhcEp........st....cohl...-ph.sc............oshulos......psDoshhspSp.lT....ppc..s....t.h.c.....s.....hsssDF-DDW-..s.hLss-sFsMQ.sp.sELhss..cosp....s.....ppth..ht..spsspshsthsts.....th.htssp.h.shsSph.stph.ss..p..h..pst.c...pl....tNp.ph.pps...h...sch.p...h..stshpphpEs..hs.p.........s..stccsshspt..sppp.......ps..s.h....pshpsssphp..sShtlspps....thsp.spppssK.s.s...aDDWNDPphusEllcthpp.-shW-u.sD..DDDLLYQsCDDlE+LTQpQshpcssctoEs...hs.sS.pGt.ps.hshScpt....p...hhQs.cHh....N.ssh.Shp.h...o.hpps.phsK.hthp.c.t.htsssp.hss.pNLoh..t.ss...p...h..sssssshthstop.hhstp.s..h....tpsthtsphs...pphpsppLstpshsst.sp..st.sp...st.s.casFp+hKssp.h.phsps.h.ssphs.sschh..pthtpp+..s..ls.shhtp...s......pp...h+..hSESht.ssp..t..pEE+N+KhS.EEIpRK+QEALsRRhu+....................................................... 0 2 5 13 +15204 PF15351 JCAD Junctional protein associated with coronary artery disease Coggill P pcc Jackhmmer:Q9P266 Family JCAD is a component of VE-cadherin-based cell-cell junctions in endothelial cells. The cell-cell or adherens junction is an adhesion complex that plays a crucial role in the organisation and function of epithelial and endothelial cellular sheets. These junctions join the actin cytoskeleton to the plasma membrane to form adhesive contacts between cells or between cells and extracellular matrix. The junctions also mediate both cell adhesion and cell-signalling. JCAD localises close to the apical membrane in epithelial cells. This family is found in eukaryotes. 27.00 27.00 191.40 27.20 17.50 17.20 hmmbuild -o /dev/null HMM SEED 1356 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.69 -13.15 0.69 -14.27 0.69 -6.96 15 60 2012-09-03 10:32:28 2012-09-03 11:32:28 1 3 35 0 36 53 0 1002.50 39 97.42 NEW MYSVEDLLISHGYKlSRclPAP+E-ch-G+ppARocsRAGpG.LLNGCEDGsAAhspSK....suhG+GalS.soE......s++p.sPRuHsEsQS.sSAuRsSEuGFYcQPsLsWSSQPQoucDpAY.RRR.GQ-sSu.lLGPRDR--LEsRGMAQAHSLPsHsREGPWEVGGRTEpVMKKAVWEEELRMuGPuKWQNlSLESWNQPRKLGRQMSDGDGEKLFQD....LYPFhpGEcsLsSQsKGKSQSLPRVLSPESLSChEIPIPLNDGHhPuVPKMPsYPPNCAPsLEsTRNsEKuGSSsPLPRPKFGRPLKPPS..YsSHppSRuGsEsSchpDSpQsD...hsssh...sppP.RpEhsssDsGLEPPVYVPPPSYRSPPQHIsNPYlE.DssP+sVsuGppQQpps...sE+suAusplPuGshuoGsEYGAusp.SPpGh.spPRPsssa-uSVQYIPFDDPRIRHIKLApPpGFs--sKh--psYsuuPlsssEPApGptQpDGAlhsPpulssssGsERu....ssh.AsPSPpWLWGQLPRDuENuGhPDQRDHCss.RGQ.PsspGSp+t+sEG.VSSPsPQ.GESTCETpTKLKKFE..TGhQTKKSSKKKhNETIFCLVSIPVKSESHLPDpDpNNNDLKQSADKKNGhDKSsALQEQSLLSMSSTDLELQALTGSMuGRTEhQKQ-LGEPE-sKQTNDLRFIHPsKHRELKYSGSWPGHQYRDQQTQTSFsEESKSsQhLPupKsGuss.cssLoP+psDP.......sA.S-.sphHsALuSSDpsQRPsAhsLKGQhS.LSPSSNSAFSRTSsshsQAPsPKAupoQP...shDspG+suuPsP+uEVVKGEo.T.GPCNS+QLFGQFLLKPVSRRPWDLISQLESFNKELQEEEESusSS......SsSuSE....-S-sEhppEspAssps+shGhpcsSt-hRsctts+tls.EcPsh+SGRVKSKSESWSEE.csG..aspupP.S..GsspstsGRGpshhsAcGSLlsEp+cQEscsRhschslSPuPV+R.h..SSRu..SDs+PsssupsAE.REPQEspcLss.....shsSVplSpuuPPcssuutERuo..slsLSLuuKsRGLSAPDLRSVGLs.u.EpSAscLDGSLG-AsAIEIPP..NESLQARAARILGIEVAVESLLPGspRsGQsQsPEPDuSApssEuPREE.osuSsA.ss..sPososDAFYGRRKCGWTcSPLFV..GERDushRuP.AsE....pSsVDuslsSpsssPEPpPss.EspshppKDhts+PPFRSTLFHFIERTPoVuGSEKRLRSTSKVIESLQEKLASPPRRADsDRLMRMKEVSSVSRMRhLSsRSADSsEEAEELKA..RG.us.PtG.sohssuDhup+sGpssulSKGslSLEEsGHPAupR.EKs.scQDFWCP .........................................................MaSVEDLLlSHGYt.....spp.s...s............th.........t..t....u.t...NGh..........................t..t.ssp......tt..................................thhpps..hhsup.s..tps.hh.hpR.ttp...u...t.p.t.tt....t.tthut.s.uhshp.......+Eu.h-VutpsEpVh..+pshhp-ph+hss.t+WQslshESWppP+clGRQMSDGsGE+hhp-....LYshh.u-pslsopsKtKSpSLPRsLSPESLphhElPhshs-ta......sKhs....as..Pspsssh-.ss+p.c.pssp.sPhP+PKFGRPLKPPS..Yp.pppoRussc....s..pDpp....phc...............sRp-hsh.D...s...G...LEPPVYVPPPSY+S..PPp.ph.ssPah..stsPh.hssspppQpp....hE+stsst......ssss.ususphssssp..P.uh..pP..p.ss.sasu.VQYIPFDDPRlRHhKlApstsh.hp-hchtcp.hss.us.ssst-sshtthp.cuAhhs.spshhs.sssp+u....ss..sss..SspWLhst..stss...Ess.uhPcQR-ps..ss.psp..Pss.psu..ptpstt.sos...sp..u-u.....o.CET.TKLKKFp..TG....hpoKK.SS...KKKhNETIFCLVSIPVKSESpL...PD.pDpNNNDLK............us-..ppsG.cp.s.suLQEQ.SLLShS..STDLELQALT.GSMus+schp+QshtcPctt+QssDLpFlc.sKHRELp.aSGSWPGcQYRDQQTQTSFsE-sposQ.hPus+.Gtss.sss.oPp.t.-s...............................ss.oc.sthpsuhsssD.pp+spu.slKGQhS.LSPSSNSAFSRTS.shsQuPhsKus.uQs............th.s.pt.sspPss..+tEVVKGEs.s.usCNSppLFGQFLLKPVSRRPWDlISQLESFNKELQEpEESpssu.....p.sssE....-u-tc...p..shs.st.cs.t..t.t.t.cht...thhs.-tPshpsGRhcspSpSWS.cc.p.s..t.ts.....hs....s..p.ttutu.sh..spush.s-...ptp.-..pthpp.shp.shPs.c.h..u....stps.s.....t.+.stt..c..t......ht.sp.ut.s.s.p.sth.ptts...h.LsLss+spGhStPDlpsltL..s.t.ts.p.....s.tsshEIPs..sESLQtRAsRILGIEVAVESLLPsstp.sttp..stsssssht.tsstpt....t...s.....th.tsua.uRRKCGWTcSsLFV..G-.......hs.t.......sthpt..ssp...PE.............ps.t.......sPh+uslh...phh-+ssss.ssEKRlRssSKVIEoLQtKLsSsPp+ss.-RLhRMKEVsSlSRMRhLS.+ss-S.--s-t.K.........................t............tt.t................................................................................................................................................................................................................. 0 1 7 15 +15205 PF15352 K1377 Susceptibility to monomelic amyotrophy Coggill P pcc Jackhmmer:Q9P2H0 Family This family of proteins is associated with a susceptibility to monomelic amyotrophy. 27.00 27.00 32.60 32.40 21.20 23.00 hmmbuild -o /dev/null HMM SEED 982 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.99 0.70 -13.80 0.70 -6.61 18 67 2012-09-03 10:33:30 2012-09-03 11:33:30 1 3 37 0 32 60 0 648.90 38 85.13 NEW QQRKQKFEEVTEKFQRAHlPhSQRRRu...Va....QKPVPPLEEALKQIQcSsLKSElNL.PsS+RPTlNWRsI...DsAL.PSuLS+N-a++Q+pLhS+l...sC-KEMpENs+ssLsos+.ssFQLKLEETQ+LLEDQ.HLSsLQ+Fp-E.VNQITNSETLSSIDSLEAGE...+.EEIYlTL.sKEs..SoSsQ.pNo.lSLcS...u.NlQSsN..hsCFDcDKLsaSKTQ.HINNWLpNLcspNTQosoPFoDILuKsNVL..PshEa....hNuKEQNs..sshspss-..RsTsTu.sNoluFV.pSPssFlpscKsE+sSEoSohpoTDu.oStsFK+E+PhVTESPsFKFSKAWsTPDSlTpEhsphSDQE+hSElTQcNRTTSl.TShlPhATPLlL.Po...........NpQSu.+sLsKsslH.lKEIcPlQ..CSDpLsELcDhKDE..+lKahssscccLP.LFSDshpsuhl...sp...Ns-scD.cKp+.hscTu.oL.SshhSNhDLVuQHKKhKaNIaERNGV+FLKSILKKESKYEHsYhKALllNpGhphGNQKAAAIRDSIELTK..cKGcsuEIsKT.IKKLRWFDEsuph.cpss--spSLKNpstlopQhSQ.hH..sp...SuAsSNlhSlPAsAlNSAsscpsK-............s.u.lS.csssslGtSspDpVPLNsF.lPSGYshAKQAW.sSK+EEu+sPV+.s.DSKsQKssPQRGtsKlIRRs+SAKVQSu.hlppsRK..GTllRPQSAS+AsshlQsQGKLllPHPPP+ssoN.RuuKshpsSp.CQsVhP.-sSQN.hhTp.ssh.NS+alLPsEaplNphsQESS.Pl.sssC.SDhVTVhPSLP.YsoSECpTlAK.lNaS.susQslApQDuolaCT.pRsPVhEEuh.oloL+sT-EEsssLWK+t+ssLsQN-+uADS.TVsRRKpIl...EN...KpRsLLEQKRQssGSlupKasEQhsNFup....oVpLSSSEPKQosRGTSsh.EEVS-STSpFLMAENLVKuSVPEDEILTshsSKQhpKssLsLNKTQph.NICALSAEEQKILQSLs+LNERLa ..........................QQR+p+hpEsTEpFQRAHlP.SQR+p.s...s.....p+ssP.LE-ALcQIptSs.lp..hsh...tp+shh...sh...cps...Sshsp.t....pt..t.....t.pc.hpEs.hhslsspp..hFp.pLcEhQphLccp.Hlsslpphpc-.lpphspSEoLSSlDSLEss-...p.pp.h.oh.s.p...u.shp..sp.h.hts.....p.p.tp...sh...h.ts.hthu+st...pph.hshts.sspss..ht-ll.K.s.......ph....hpspctps..s..pt.hp..t.hss..sst.s.s..p...h..shpt..tssp..th..st.............hst....p.s+s.sssp..s.th.....pt...p.p..pt......s..s.h..hhL.ss...........s......t...pt.h....thps....s.sthtph..hppt..php.hps.ttt....h.s..ps.............p..p.pt.....s..sh..t..st.s..tp.p..p.s.hc.pts+hl+uILKKcu+.c.s.h+slhhspth.htpp.stsl+DSlELsK.+t...E..+s..+KL+WhDEht........ph...t.t.tt..psh.t.......................p......ts...tp..t................h..p..........ttp....s.h.hssG.p.s+pAW.sst.tt.......ttsp..pt..pps..+hh+psts..s+sp.s..h.tpp+..ssh.p.QSspcsp...tsQu+h.hPpPP.ts...............................................t..h......ps.......s..s...s.....s....s......p......p.......ppt............tpt...............................s.....h.p......t....p.t.l.pQ++p.st.pht.+.....pph......sh......ss.p.Phps.ps..h.....tplS-STspFLhAEpLsptshsEscILshhpshp.tp..h..ppstp..sh.ssLShEEQ+lLpSLpcLNpRL....................... 0 5 9 16 +15206 PF15353 HECA Headcase protein family homologue Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9UBI9 Family HECA was characterised first in Drosophila where it regulates the proliferation and differentiation of cells during adult morphogenesis. In humans, HECA affects cell cycle progression and proliferation in head and neck cancer cells. It by slows down cell division of oral squamous cell carcinoma cells and may thereby act as a tumour-suppressor in head and neck cancers. 27.00 27.00 31.10 31.10 21.30 20.80 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.34 0.72 -10.86 0.72 -4.34 14 102 2012-09-03 10:34:47 2012-09-03 11:34:47 1 4 77 0 66 93 0 102.00 54 20.64 NEW hstCCsPhus..............p.tt.hphschpss.....+VhCsNEpCst.upaMH+pCF-tWEpslLs......hL+ohGRARSWS-+QRpQNLWTKKGYDLsaKhCuC+CG+GpL+KDh-Wh ......................................s..CssPhhs..............shhtslchpc.cDh.KVlCNNEpCPh..upaMHhpCF.cWEuolLs......phps.....hGRA.RSWsEKQpRQNhWTKKGYDLAF+hCSCRCGpGHLKKDhDW.h....... 0 17 22 45 +15207 PF15354 KAAG1 Kidney-associated antigen 1 Coggill P pcc Jackhmmer:Q9UBP8 Family KAAG1, kidney-associated antigen 1, or RU2AS (RU2 antisense gene protein) has been found in mammals. It is expressed in testis and kidney, and, at lower levels, in urinary bladder and liver. It is expressed by a high proportion of tumours of various histologic origin, including melanomas, sarcomas and colorectal carcinomas. 27.00 27.00 88.30 88.30 25.00 19.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.12 0.72 -9.95 0.72 -3.65 2 9 2012-09-03 10:36:12 2012-09-03 11:36:12 1 1 9 0 3 2 0 79.60 84 100.00 NEW MDDDAAPp.EGVPVAVHpHALH.tLRQVsGPGAuAsHLPRhsP.pLAAs.t.AP.LSQhPHRTpGAGSsPETNtp.TNPpV+EK MDDDAAPR.EGVPVAVHKHALHDGLRQVAGPGAAAAHLPRWPPPQLAAsRREAPPLsQRPHRTQGAGSPPETNEKLTNPQVK.... 0 1 1 1 +15208 PF15355 Chisel Stretch-responsive small skeletal muscle X protein, Chisel Coggill P pcc Jcakhmmer:Q9UHP9 Family The murine X-linked gene Chisel (Csl/Smpx) is selectively expressed in cardiac and skeletal muscle cells. It localises to the costameric cytoskeleton of muscle cells through its association with focal adhesion proteins, where it may participate in regulating the dynamics of actin through the Rac1/p38 kinase pathway. Thus it is implicated in the maintenance of muscle integrity and in responses to biomechanical stress. 27.00 27.00 82.10 74.60 20.30 16.80 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.02 0.72 -3.97 9 43 2012-09-03 10:37:14 2012-09-03 11:37:14 1 1 39 0 28 31 0 84.40 77 98.29 NEW MSKQPuSNV+ulQANINIPMGAFRPGAGpPPKRKEhTsEs.Eps.P...sstt.EEKK.lPGAhKLPGPAVNLSEIQNlKSELKaVPKAEp .....MSKQPsSNVRAIQANINIPMGAFRPGAGQPPRRKEsTPEs...EE.u.s.P.....PTo-.EEKKPlPGAKKLPGPAVNLSEIQNlKSELKYVPKAEQ.. 0 1 3 11 +15209 PF15356 SPR1 Psoriasis susceptibility locus 2 Coggill P pcc Jackhmmer:Q9UIG4 Family SPR1 is psoriasis susceptibility locus 2 protein family. 27.00 27.00 123.90 123.70 18.30 17.60 hmmbuild -o /dev/null HMM SEED 114 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.35 0.71 -11.10 0.71 -3.73 5 32 2012-09-03 10:38:06 2012-09-03 11:38:06 1 1 23 0 15 26 0 113.20 79 84.46 NEW SpDHPSPPusEA+EEsuuPTLPQGPPIPGDPWPGAPPLFEDPPPPGPSRPWRDLPESGVWPPEPPoTDPPQPPLPDDPWPAGPQPPENPWPPAPElDHcPQcEPDLDPPREEYR ....StDHPS.sssEspEEtGSPTLPQGPPlPGDPWPGAPPLFEDPPPPGPSRPWRDLPEoG..VWPPEPPpTDPPQPPRPDDPWPAGPQPPENPWPPAPEVDHtsQEEPDLDPPREEYR.. 0 1 1 1 +15210 PF15357 SEEK1 Psoriasis susceptibility 1 candidate 1 Coggill P pcc Jackhmmer:Q9UIG5 Family This family is considered a candidate for susceptibility to psoriasis. 27.00 27.00 28.40 32.70 21.70 20.90 hmmbuild -o /dev/null HMM SEED 149 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.05 0.71 -10.95 0.71 -4.30 3 13 2012-09-03 10:38:49 2012-09-03 11:38:49 1 1 6 0 1 14 0 128.30 85 94.99 NEW TDQKSHSQRALCTQTPALQGPQLLNTDPSSEETRPPHINPDRLCHMEPANHFWHAGDLQAMTSKEFHLAATQDDCRKsRTQEDILVPSSHPELFASVLPMAPEEAARLQQPQPLPPPSGIHLSASRTsAPTLLYSPPPSHSPFGLSSLI ...TDQKSHSQRALsTQTPALQGPQLLNTDPSSEET+.P.l.PDRLCHMEPANHFWHAGDLQAMhSKEFHLAATQDDCRKGRTQEDILVPSSHPELFASVLPMAPEEAARLQQPQPLPPPSGIHLSASRT.APTLLYSsPPSHSPFGLSSLI............ 0 1 1 1 +15211 PF15358 TSKS Testis-specific serine kinase substrate Coggill P pcc Jackhmmer:Q9UJT2 Family TSKS, testis-specific serine kinase substrate, is expressed in the testis and is downregulated in cancerous testicular tissue, in comparison with adjacent normal tissue. TSKS expression is very low to undetectable in seminoma, teratocarcinoma, embryonal, and Leydig cell tumours, while high in testicular tissue adjacent to tumours which contain pre-malignant carcinoma in situ [1]. Recently it has been shown in human testis to be localised to the equatorial segment of ejaculated human sperm. The finding of a TSKS family member in mature sperm suggests that this family of kinases might play a role in sperm function [2]. TSKS is localised during spermiogenesis to the centrioles of post-meiotic spermatids, where it reaches its greatest concentration during the period of flagellogenesis [3]. 27.00 27.00 87.10 87.00 20.60 20.50 hmmbuild -o /dev/null HMM SEED 558 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.03 0.70 -12.87 0.70 -6.35 3 39 2012-09-03 10:39:39 2012-09-03 11:39:39 1 2 26 0 19 39 0 447.70 73 95.07 NEW ESRopLuPEsPGusooPsKGhsKKKKAVSFHGVEPRMSHEPM+WCLNLKRSSACTNVSLLNLAAsEhsDSosp-oTTDD..PshsASGspSsPl...PPuuPo.sWAsDDPDIo-lLsGVNSGLLRAKDSITSLKEKTTRVNQHVQoLQSECSVLSENLERRRQEAEELEGYCoQLK...........ENCRKVTRSVEDAEIKTNVLKQNSALLEEKLRFLQRQLQDEsPRRQEuELQELEQ+L-AGlSRpsLuhousSouss.sPPsSE-cPsPP.........cuLulAthsupsRAGEGsEVS.pEhQKVoAGL......EELR.........REVSSLTARWaQEEGAVQEALRLLGGLGGRLDGFLGQWERAQREQAQAARGLQELRGRADELCTMVERSAVSVASLRuDLEGLGPVKPlLEELGRQLSSuRRGS-LSMsLDR..sGSCuRCuSQ.GQQLSTESLQQLLERALTPLVDEVKQRGLA.PACPSCQRLHKKILELERQALAKHVRAEALSSTLRLAQDEALRAKNLLLTDKMKPEEKVAoLDYLHLKMCSLHDQLSsLPLEGSssohGGGSuGGAPPKRGGPsPEQ ................................SpsQLsPEsPtusos.sKGIsKKKKAVSFHGVEPpMSHpPMHWCLNLKRSSACTNVSLLNLA.Ah..Es.DSoGTDSTsED........SG.hsLss...PPuSPo.PWss-DsDIoElLSGVNSGLV.RAKDSITSLKEKToRVNQHVQoLQSECSVLSENLERRRQEAEELEGYC.QLK...........ENChKVTRSVEDAEIKTNVLKQNSALLE...........EKLRYLQQQLQDETPRRQEuELQE.E..............Q.K....EAGLSRpGLuPss.s.GCs.GPPGSP-cPsRP.........RuLsPuGWGMGsRAGEGPhlSEQ..ELQKV.sGl......E.ELR.........REVSSLTARWHQEEGAVQEALRLLGGLGGRlDGFLGQWERAQREQAQTARGLQELRGRADELCTMVERSAVSVASLRSELEGLGPlKPILEEhGRQhQsSRRGsDLSMNLDRu.QGsCuRCASQ.GpQLSTESLQQLL-RALTPLVDEVKQRGLs.PACPSCQRLHKKILELERQALAKHVRAEALSSTLRLAQDEALRAKNLLLTDKMKPEEKhAsLDaLHLKhCSLHDpLSpLPLEGSTGoMGGGSuuGsPsKpGG.ssEQ..... 0 1 2 5 +15212 PF15359 CDV3 Carnitine deficiency-associated protein 3 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9UKY7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 128 and 251 amino acids in length. CDV3 is also known as TPP36 - tyrosine-phosphorylated protein 36. The function is not known. 27.00 27.00 30.90 30.00 25.60 25.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.82 0.71 -4.06 9 101 2012-09-03 10:40:36 2012-09-03 11:40:36 1 1 63 0 53 93 0 118.10 51 57.56 NEW KE--EWKEFEQK.EVDYSGLRlQuhQh..sEKE--EsEK+--.t-shEEsGth........uuDKSSGPWNKSA.A.sPsAssh..VpEs.EP.spsuGVYRPPGARhTs.pRtssQGPPEIaSDTQFPSLQSTAKHVEoR .............---EWKEaEpc.chDYSGL+lQshQI..s.EKE---s.EKc.p-.su-sh-EsGus.........Gs-Ku.S..G.PW.NK...........oA.s.......s...pAPsussh.......VsEsP.cP...sh..TuGV....YRPPGA......Rhos............sR+ss.........Q...G...P...PEIaSDTQFPSLpSTAKtl-oR.............................................. 0 13 17 31 +15213 PF15360 Apelin APJ endogenous ligand Coggill P pcc Jackhmmer:Q9ULZ1 Family Apelin is among the most potent stimulators of cardiac contractility known. The apelin-APJ signaling pathway is an important novel mediator of cardiovascular control [1]. Apelin is an adipokine secreted by adipocytes where it is co-expressed with apelin receptor (APJ) in adipocytes. It suppresses adipogenesis through MAPK kinase/ERK dependent pathways and prevents lipid droplet fragmentation, thereby inhibiting basal lipolysis through AMP kinase dependent enhancement of perilipin expression. It also inhibits hormone-stimulated acute lipolysis through decreasing perilipin phosphorylation. Apelin induces a decrease of free fatty acid release via its dual inhibition on adipogenesis and lipolysis [2]. As a vaso-active and vascular cell growth-regulating peptide Apelin is a target of the BMP pathway, the TGF-beta/bone morphogenic protein (BMP) system - a major pathway for angiogenesis [3]. 27.00 27.00 27.00 27.00 27.00 27.00 hmmbuild -o /dev/null HMM SEED 55 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.76 0.72 -3.75 8 19 2012-09-03 10:41:47 2012-09-03 11:41:47 1 1 18 0 11 19 0 54.80 67 72.82 NEW GPLhpssDGK-LEE.GolRpLVQP+suRsGsGsWQGGRRKFRRQRPRLSHKGPMPF .GPLhpssDGpsLEE.GNlRaLVQPRsuRsGPGsWQGGRRKFRRQRPRLSHKGPMPF 0 1 2 3 +15214 PF15361 RIC3 Resistance to inhibitors of cholinesterase homologue 3 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q7Z5B4 Family RIC3 is a protein associated with nicotinic acetylcholine receptors (nAChRs), neurotransmitter-gated ion channels expressed at the neuromuscular junction and within the central and peripheral nervous systems. It can enhance functional expression of multiple nAChR subtypes. RIC3 promotes functional expression of homomeric alpha-7 and alpha-8 nicotinic acetylcholine receptors at the cell surface. 27.00 27.00 27.10 27.30 26.70 26.80 hmmbuild -o /dev/null HMM SEED 152 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.10 0.71 -3.98 27 152 2012-09-03 12:43:49 2012-09-03 13:43:49 1 4 75 0 80 140 0 158.40 28 37.52 NEW lVL.ClulllP+hh.....hsts....pp.....p..............hspssssphsPhhp........tts.ss..ss.t...ss..t.s+scshcuhspscsts.tt.ts.....supppuhhupllPlYuhGllLallYhlaKl.p.+scp..p.ppppps.............................s.t...........pss.........t...+pc..hs-hcLspLQ-RLppTEchMppIlo .......................................................................................hhs.............................h..hths...P.hhc...........................pts.ss...ss.s..sthh.tRst.h.uht.shGuu.stt.ts............tsptpuhhG..lhP...l...YshGIllahlYhLhKl..ptKp..s.pc.ttss..............................................................................................tt.............tp...........hcpt...sphcLtpLpp+LppTEptMppll.s................................................................................................................ 0 16 26 50 +15215 PF15362 Enamelin Enamelin Coggill P pcc Jackhmmer:Q9NRM1 Family ENAMELIN is involved in the mineralisation and structural organisation of enamel. It is necessary for the extension of enamel during the secretory stage of dental enamel formation. The proteins are expressed in teeth, particularly in odontoblasts, ameloblasts and cementoblasts. 27.00 27.00 34.50 34.50 16.30 16.20 hmmbuild -o /dev/null HMM SEED 906 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.82 0.70 -13.77 0.70 -6.46 36 250 2012-09-03 12:45:25 2012-09-03 13:45:25 1 2 199 0 21 246 0 720.00 60 93.29 NEW PYYSEEMFEQDFEKPK.EcDPPKsESPs.TEPouNSTVsETNSTQP...s...sGGSQGGNDTSPTGNss.G.NsGsNPssQNGlhssPsVNlSG...QulPRSQIPWtPsQPNIaENYPNPN...lRsFPsGRQWpPTGTshGpRQsGPFYRN.psQRGspWNSFAhEuKQsA+PGNPsY+KsYssToRuN...PNaAGNPANFRRKPQGPNKpshGTNls...PluPKp.GTls+sEKlQNP+EKsl.GQKERhVhPT+DPoGsWRNSQsYG.lN.KsNYKL..PpPEGNh.lPNFNSlDQ+ENSYYPRGDSRRsPsSsuQsQoQNh.PKGIsLEPRRIPYEoETspPELKHuTapP.hYPEEIPSPsREaFPsGRNTWNcQEISPsFKEDPt+QEcpLPa......PShGSRGsVaYsEYNPYsPR....ENSPYhRSNTWDERsDSPNTh...GQPcNP.Y.PhN...TP.D.QK-TlsYNEEDPlDPT.GDE.FPGQs+WG.E.E.suFKtuPTVRaY.EGpQYs...S...NQP...KEYLPYSlD.NPsKPREDhPYuEFYPWsPDEsFPSYNsuPTloPPsEsRGYYs.NNAltQEESolFPSWNSWDa+IpsQuQKE+pPYFNRNaWDQuTNLHKsssssPsQKENpPYSSNsPAGLQKNPTWpEG.EN.LNYsM.QITRLNSP-+-HLuFsDllPQsYPssQcEsaLFH..SQRusCCAGuSsG.+D.NPLALQDYTPSaGLsPGEsp-osPhYTEuSHTKHARalISPTSILPuQ..RNSSEK+L..PGESQs..P..FRDDVSTL++NTPCSl+sQlGQtGhh.sFPEAuSLQSKNTPCLKuDLuGDGNN.lLEQIFEuNQ.lNERTl.sLTPEQLlIsTP-EuPKPEuIQSEl.QGsEGE+QQpR.PPSILQlPCFGS+LsKa+sSSTGTPSopGRpGsaDGDssMPTEpP.sTLsGLATGEQFpulNVD.LNA ...........................................................................PYYSEEMFE.QDFEKPK.EcDPPKsESPs.....o-PosNoT..ssETNSTQs...s...stGuQGGNDTSP....TGsss...u.NsssNsssQNGl.s.PsVNsSG...QssPto.QlPh......t..PuQPNIaEN.aPNPN...hRsFPsGRQWp...............TGo.shGpRp..suPFYRN...........sQRu..pWNoaAhpuKQs.s+PGNPhY+KsYsss.uRuN.....PNaAGNPuNhR...R...KPQuPNKpPh.sTNs.u...sh.uPKp.s.T.ss+sEplQNP+EKsh.uQKERhlhPT+..sPousWRNSQpYt.sN.K............sNYKL..P.PEush.sPsFNSlDQ+ENSYYPRsDS..R+sPsSssQhQsQNh.PKGlhLcP+RhPaEoEsp..pPElKHuoap.P.s..............Y.......sEthPsss+EpFPs.G+NTWNpQE..ISPsFKEDPGRQEEpLPH......PS.hGSRGsVaYP-...YN.PYsPR....ENSPYhRuN.oW-ER.sDSPNTh...tQscsPhY.PhN...TP.D..KcTlsYNEEDPlDPT.GDE.FPGQsRWG.-E..SFKt..sPTVRaY.EG.cQYs...S............NQP...KE...YLPYSLD.NPsKPRED.FPYuEFYPWsPDEsFPSYNsuPThs.PPsEspGYYh.ssA.htp.E.Eus.h.PSWsSWDp+hpspspcEptP.YasRNhWspsspLpKs.sp.spQ+cNpPYsss.PsG...LQ+NPhWpEu.Es.LNYsh.QhsRlNsP-tpp.sF..-hls.sYPssQpEuphFH..SQRusCCsGushG.+-.s..PLALQDYTPuaGLsssEst-ssPhYo-sSHoKasR.hlSPsuh.ssQ..RNSSEKp......tEsts..P..FRDDsuoL++ssPCShps....plsQhthh.shsEssS.Qs+NhPCL+sDLuGDGss.lLcplF-ssQ.hsERTs.sLsPEQLlh...ssP-EsspPEsIpsEh.ttpEschpQpR.sssIhplPCFsSplsphhsSosGsPsu.uR.sshst-.h..hPTc.P.sohstLATttphpshNlD.ls....................................................... 0 1 1 6 +15216 PF15363 DUF4596 Domain of unknown function (DUF4596) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9UPP5 Family This domain family is found in eukaryotes, and is approximately 50 amino acids in length. There is a conserved ELET sequence motif. There are two completely conserved residues (S and E) that may be functionally important. 27.00 27.00 37.00 37.00 25.90 18.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.96 0.72 -8.28 0.72 -4.04 10 87 2012-09-03 12:47:34 2012-09-03 13:47:34 1 3 38 0 49 63 0 45.80 71 4.71 NEW -ThuRWuELhSPLDDSosSl.T.VoSFSP.ED.ssSPQGEWTIlELETaH ...pLuRWuELhSPLD-SpASI.T.VsSFSs.-D.suSPQG-WTllElET.H 0 3 8 19 +15217 PF15364 PAXIP1_C PAXIP1-associated-protein-1 C term PTIP binding protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BTK6 Family This protein domain family is the C-terminal domain of PAXIP1-associated-protein-1, which also goes by the name PTIP-associated protein 1. This family of proteins is found in eukaryotes. The function of this protein is to localise at the site of DNA damage and form foci with PTIP at the DNA break point. Furthermore, studies have shown that depletion of PA1 increases cellular sensitivity to ionizing radiation. Proteins in this family are typically between 122 and 254 amino acids in length [1]. 27.00 27.00 42.40 42.00 24.70 24.00 hmmbuild -o /dev/null HMM SEED 141 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.17 0.71 -4.27 21 81 2012-09-03 13:14:09 2012-09-03 14:14:09 1 1 71 0 53 71 0 129.70 44 61.23 NEW -WplssSD-Eh......................sW.PsPpEItcL...Ychl...spstsLELpaps.sRRs....PoPp.tsp..csppsp....pttctt.-pcs.ps.oEFDF.DD-.s..p............sphhsRRpssuo..ttSspK+pAphDcVLscM+RH+..........+l.ppthpps ....................-WslsCSD-Eht.s......p..................sWhPsPpEIpcL...YEhL...sspssLELQhchhsRRs....PoPEspsptpcs-c......cscEpt.Ec+PphP.oEFDFDDE..s.osp..........sshhsRRRTPGo..ss+Sp.KRpA+hDKVLSDMKRH+..........+l.pppI...hcp......... 0 16 20 35 +15218 PF15365 PNRC Proline-rich nuclear receptor coactivator Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NPJ4 Family The PNRC family, proline-rich nuclear receptor coactivator, is found in eukaryotes, and is approximately 60 amino acids in length. There is a conserved YAG sequence motif. 27.00 27.00 27.10 27.10 26.90 26.70 hmmbuild -o /dev/null HMM SEED 58 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.43 0.72 -9.06 0.72 -3.96 36 209 2012-09-03 13:16:56 2012-09-03 14:16:56 1 1 132 0 140 210 0 56.20 38 22.51 NEW ssssYAGu+FopsPsPosLPhPP.cWhtsss....................p..shpthop.............pL+tlL.clp ....t.tppYAGup.F.us.sPuPSsLPhPs.ahhsss..............................................t.stp.hs.........................................pL+tlL.pl....................................................... 0 27 56 92 +15219 PF15366 DUF4597 Domain of unknown function (DUF4597) Coggill P pcc Jackhmmer:Q96GX8 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 63 and 76 amino acids in length. There is a conserved TPPTPT sequence motif. 26.50 26.50 27.60 82.30 25.30 18.10 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.76 0.72 -9.15 0.72 -4.39 4 25 2012-09-03 13:30:01 2012-09-03 14:30:01 1 1 22 0 15 24 0 61.10 81 62.28 NEW MCVSS.suSp-EAPVLsDKHLDVPsIIITPPTPTGMs.lPRDSp+sVWhDEsGShsDDGElDsE MCVSSsSSSHDEAPVLsDKHLDVPsIIITPPTPTGMM.LPRDSppTVWLDETGSCPDDGElDPE.. 0 1 1 2 +15220 PF15367 CABS1 Calcium-binding and spermatid-specific protein 1 Coggill P pcc Jackhmmer:Q96KC9 Family CABS1 is a family of proteins found in eukaryotes. It is also known as NYD-SP26. It binds calcium and is specifically expressed in the elongate spermatids and then localised into the principal piece of flagella of matured spermatozoa. 27.00 27.00 50.90 50.90 18.90 18.50 hmmbuild -o /dev/null HMM SEED 396 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.29 0.70 -5.60 9 23 2012-09-03 14:10:36 2012-09-03 15:10:36 1 1 20 0 11 26 0 382.60 67 87.66 NEW MAEDG.PKIYSHPPTESSKTsTEATIFFGADNTIPKSETTITSEGDHITSVNDahhEuDFSTTssNKLTssKE+lKSEDDlEu.hlKSoThhEKEITTLTuTsNShAs-SITENFIPVKIGNISSPVATVSLIDFSTNhAKEDILLsTlDsGDc-Vs.hTSElSGol+-STsslsDTPsLPscpscsDs...sSSsKSsssADtsVQlTDS.lPEAEIsPoTE+NlTTIPDITslTEEKITEIDLIlsEDDPssVsKLTDSDEEKFITVFELTsoAE+DKDNPEDh.LsDEESTDElNVWMER-pssEAEoHsVLLTAVESRYDFlVPsSlAhNlpE-Ss..T..c..EDLsENspsESVTKsTEshS....tsTs..Ds.spcEDs.TTEoGlFKLLKE-PDEFhI .MAEDGLPKIYSHPPTE.SSKTPTtATIFFGADNsIPKSETTITSEGDHVTSVN-YhLESDFSTTTsN..............KLTssKEKLKSEDDhtochIKSTT.HLpKEIToLTGTsNShs+DSI...TENFhPVKIGNISSPVsTVSLIDFSTsIAKEDILLsTIDoGDtEls.ITSEVSGTLKDSoAulADoPAhPccKDEuDhsNYsSSlKSNVPADEAVQVTDShIPEAEIPPusEcsFTTIPDITALpEEKITEIDLsV.EDDssAVupLTDSDEEKFITVFELTTSAEKDKDN.EDTLLTDEEST-GANlWMER-oANEAETHSVLLTAVESRYDFVVPASlATNL.sE-SST-..EDLSEs-pTEoVsKlTEPFS....uTTSlLDTPsaKEDTSTTETsIFcLLKEEPDEFMI.......... 0 1 1 2 +15221 PF15368 BioT2 Spermatogenesis family BioT2 Coggill P pcc Jackhmmer:Q96M83 Family BioT2 is a family of eukaryotic proteins expressed only in the testes. BioT2 is found abundantly in five types of murine cancer cell lines, suggesting it plays a role in testes development as well as tumourigenesis [1,2,3]. 27.00 23.80 57.00 23.80 24.70 19.30 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.08 0.71 -4.49 9 46 2012-09-03 14:16:37 2012-09-03 15:16:37 1 2 17 0 13 36 0 153.80 67 46.74 NEW MKsuKa.hsoSsKl.ssVPELs.KKGlh.s.shSscsKEK+SuKll+sKlEPMVLRS.PPTGESllRYALPIPSSKT+-llucDEhl++IT++LKMlVSTLE-TYGhs..hpsGEcshsKsEpE-h...sLSVGDDlsSFLhpCSphAuQLEEAVKEEcsILESL..FKWFQtQVNQMEE ...MKPsKHLLToSsK..ssVPtLshKKGLh.N.PlSPchKEKHNAKLl+DKIEPMVLRS.PPTGESIVRYALPIPSSKTKsLlsc-EMItKIhKHLKMVVSTLEETYGtC..spNGEcshlKpEpEEL.........oLSlGDDhsSFLhhCSQFAuQLEEAlKEEpN.....ILESL.....a.KWFQhQVNQMEE............ 0 1 1 1 +15222 PF15369 KIAA1328 Uncharacterised protein KIAA1328 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q86T90 Family This function of this protein family remains uncharacterised. This family of proteins is found in eukaryotes. 27.00 27.00 28.00 28.00 23.80 23.20 hmmbuild -o /dev/null HMM SEED 328 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.58 0.70 -12.40 0.70 -4.62 8 73 2012-09-03 14:28:52 2012-09-03 15:28:52 1 4 37 0 31 57 0 214.80 46 55.11 NEW DLCPEDKRRIANLIKELARVSEEKEVTEERLKsEQESFEKKIRQLEEQN-LIIKEREALQ...QYRECQELLSLYQKYLSEQQEKLohSLucLuAA+tpEQplSu+KSshpsus.c.....LDGSYLulAtspshhpsp++sKuuspupuupohsshpNs.sh+sphhppPp-shcc.P.Es...RoCssctssh+.tsst.hcp.....h...-.+hpEh.shpPsssocsCuH+putsuspl+-upasophu.ppstophcoCsasphshsS.hptpsh.ts.ETpluKplSE-RRQQLLLQKMELEIEKERLQpLLAQQEsKLLLKQQQL.HQSRLDYN ........................................sRlscEKE...pch+sEppphE..lp.Lcppp.hh.pE+..h............................................tts...pp..osccs..pps..p.....LDGSYLulutst.h...ptt.pss..s.ss.s...hpNp.s.p...h..spcthtc.s.cs...tsss.t.sshc...s..hc..........-hphpEh.phpss.p.psCu.c+hu..ss.spps.pstphs.phstop.coCsasthshsS.hpstsh.ts.Ep..t+p..E-++ppLhhQKhpLEhE+E+LQthLspQEtKLLhppQQL.pQSphpYs.............. 0 5 9 15 +15223 PF15370 DUF4598 Domain of unknown function (DUF4598) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N5I9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 159 and 251 amino acids in length. 27.00 27.00 27.60 27.20 24.80 24.80 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.51 0.71 -3.78 7 138 2012-09-03 14:34:31 2012-09-03 15:34:31 1 2 125 0 98 135 0 109.00 27 54.16 NEW sLLSRLSAFLPpMKsAN--LpREItAGR..uKDlpLD-lD-.p-GQ....YIEMNLGLGVLEEKR....ssDssusstp.scupc......s.stpts...pDSslL-+LhGp+csossp.......KPoIpEhs ...........................lLs+lpsFLPphtpAN-cLccchss.us..stchsI..E..sl--..scsp......................hIEM............sluLG.lh-p..pp...............stcs.ppssp.spp.sspc............ts.tppp.p.ttpt...........................................t........................................................................... 0 32 44 69 +15224 PF15371 DUF4599 Domain of unknown function (DUF4599) Coggill P pcc Jackhmmer:A6NFA0 Family The function of this family of eukaryotic proteins is not known. 27.00 27.00 28.70 28.10 25.00 25.40 hmmbuild -o /dev/null HMM SEED 88 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.32 0.72 -10.12 0.72 -3.92 56 185 2012-09-03 14:35:58 2012-09-03 15:35:58 1 4 23 0 114 144 0 88.00 33 9.50 NEW tRs+pRp+s...ssh+u....pR..p..spcEsE.cspcLlSlLcS.h.u...........hLspc...s...shRpLLCsDPsCpVCNssssElp.....pLLhtEshp.s.uss.ss.usssu.sus ...........................t+s+pRt+s...ts.+u....pR..pspcEhE.ch.......pcLlSlLpS.h.G...........hspcu...sh+pLLCsDPssclCpsssschp.....phh.tEshp.s.uss.sluPhsusss....................... 0 15 15 15 +15225 PF15372 DUF4600 Domain of unknown function (DUF4600) Coggill P pcc jackhmmer:A6NNP5 Family \N 27.00 27.00 29.60 29.60 26.40 26.40 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.88 0.71 -10.74 0.71 -3.93 11 65 2012-09-03 14:38:42 2012-09-03 15:38:42 1 3 40 0 35 65 0 99.30 45 50.06 NEW cEuNEWKTRYETQhElNcQLE+QIhhL+c+lEph+s.NspD..+hu....SlRs............h-ph..ospuLpphlKpLE+EKptLpsQL+-hEhRL-QEuKAY+Kss-ER+tYlsElspsp.ss.php++Qphs...thpcsscs.scs .......s-hphpYcs.hEhNppLpcph..hppphpph+u.sspD.ths....sl+s............h-pM..ss..-SLNpLLKQLEcEK+sL-sQlK.hth+LEQEuKAYpKhssER+sYLuEhopsS.s..phs++Qphsth.chpEp...s.............. 0 9 11 15 +15226 PF15373 DUF4601 Domain of unknown function (DUF4601) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NA69 Family This protein family is a domain of unknown function, which is found in eukaryotes. In humans, the gene encoding this protein is found in the position, chromosome 19 open reading frame 45. 27.00 27.00 31.60 27.50 26.20 26.30 hmmbuild -o /dev/null HMM SEED 440 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.82 0.70 -12.46 0.70 -5.62 11 46 2012-09-03 14:42:42 2012-09-03 15:42:42 1 2 32 0 25 42 0 319.10 42 85.36 NEW LFppDsRWsupE+VSEs+RAF..PPPs..shp..pEpsRERohAhQuSsL+lHADuRstssLSoARusYGWPElPs+A+EpIRGARLlFDRDSlPsGDR-KLRIPsTTaQshFPP+DA.sPQPRAPspHL.GGPNsL+W-YcpQ.-sTSYQ+QFQALPuPPALMCKRASSSVcLGDsKIGYuPhCSEQKpAYpPQsLPPD..RYDKAQAuAHIHpVNIpPGDuLF+DRTTpu-HFYu+EP.EPFVLHHDpTPESHILcGNWsPG.PGSLsTShphFYG.QPsPsTpPPuRHlsH-pLQSHl.sLG-PcLLGpFFQToMuoDYsPs....phs.......p.p+AsNL+LhpSpLPpsouEhDFLToN.QpMlKPHphssASsTEEhLQRCKYSHlEPPLGpQRFFSTQYcDEFPaKYQGPsVL+.hushQESHVPLGTs+phGChtcKlDPpAPQhPhYPCPSQ ..........................................s.....a..................t.......sphph.ts.p.t...o.hp.t..h.t...t.....ttst.hatpDohPsGD+p+lchP.osaptha.sa-h..psts.+sPshHh.uGs.sslphs.hptp......tToYpppFp..uh.usPAh....p.+ch.t..Spl..hGD.phsht..so..pphhts.t..s.t..R.Ys+tpAsu+lp.ss..ltsGDshh..c.p..TThscpFh.spcs.-Phhlc+c.ppstSpIhcGshsPG...suu...ls....T...hphFau.psss.s.......spPsscclspc+Lp..SHV.pLG-scLhtpFFpTohsoDYhss....ph...........p.p+s..sshph..Stl..s..ssp.salThs.pthh.hPH.t..h..t..hoc-hlQ+s+hSHh.sPhst.+aFSTpap-pas.K.Y.uPhs.p..tp.Qcu.lPlGo.t............................................. 0 5 7 11 +15227 PF15374 CCDC71L Coiled-coil domain-containing protein 71L Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8N9Z2 Family The protein family, Coiled-coil domain-containing protein 71L, is a domain of unknown function, which is found in eukaryotes. 30.00 30.00 30.40 32.10 27.60 29.80 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.17 0.70 -5.14 18 76 2012-09-03 14:45:56 2012-09-03 15:45:56 1 2 35 0 40 87 0 232.80 33 89.74 NEW sEEKAVHSWSR..ISoAGpKALEEAL+VFNPMSpDLosTEsQLVAFLQGLR--GFQPTILRSKDVYGYSSCTAssPSQTKhpspssssss..sSsPs+ssppuhthssu+ustlshslssp.uKsustslsK.tsoTNLLLsSLKQopuspupssshuFPsphYPGVYPAMRLSVVLEALVP..LKssssCLpuKh......pphtLulusSslKLhKssu.s..ps..Ks.......hpsKu.pplhp+ust..............GPptsshppSpssKuoG.LsGhhhpsuSphssptspspt......................................................................................+spt.s.sphsh+stpsp.EshG.pKRK+s-EsK-hss+K+s+.hP....s.K.....splspuThsLL+FpsIKVs+....psSDDEVRp+AQ+ILRVNLSPVIRlQPL .........................t+sVhuhup....ssuppsht-AhhlF.PhSp-h.so-tphhsFlptL+cp.hpPhlLpStDVYGYsSsps....................................................................................................................................................................................................................................................................................................................................t....hh..t............................h.tt.........t..........us..phhph.sI+Vst....p.S.stsRppApplLpVNLpPhlplp.h............. 0 3 5 10 +15228 PF15375 DUF4602 Domain of unknown function (DUF4602) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8NDD1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 173 and 294 amino acids in length. This family includes Human C1orf131. 27.00 27.00 27.00 27.00 26.60 26.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.55 0.71 -4.10 42 158 2012-09-03 14:46:48 2012-09-03 15:46:48 1 3 126 0 111 162 0 137.30 22 56.05 NEW VsFps.p..........p+cpp.........................p.stspspst.t...t..tp...................ptp.tpchslc+.s.......+h-VpcFGhouapt.cp++phctppslpLGAKsP.KpphlNYKhlhpppKppKtcccc.ctphtptsshhptppp......tpppppc+ppcpcsss .....................................................................................................................................................t.tt.pptpt..t.t........t.................................ptpppchslcK.s.........+hEVccFuhos.h..st.tc++phEpccsltLGAKPs..KpphlNYKhLppphKcpKttccc.ccch..tptss..hhppppp..........ptpcpppp.ppp.....t........................ 0 43 58 85 +15229 PF15376 DUF4603 Domain of unknown function (DUF4603) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q92628 Family This protein family is a domain of unknown function. In particular, this domain lies at the C-terminal end of a protein found in eukaryotes. 27.00 27.00 39.70 38.70 19.00 19.00 hmmbuild -o /dev/null HMM SEED 1286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.37 0.70 -14.36 0.70 -7.37 11 83 2012-09-03 14:47:49 2012-09-03 15:47:49 1 3 49 0 49 72 0 742.60 54 88.25 NEW GPlSsSEMSLL+ALGPVQTWLGQELEKCGIDAMIYTRYVLSLLLHDSYDYDLQEQ..ENDIFLGWEKGuhKKWGKSKKKC.oDLoLEEMKKQAAVQCLRSASDESSGIETLVEELCS+LKDLQSKQc.EK..IpKKhEGS.SPEs-.SPoAKDQVEMYYEAFPPLSEKPVCLQEIMTVWNKuKlCSYSSSSSSSTAP.TSTDTSSPKDCNSEuEssKERsspA.s...ssspE+uQ.pRSKpEKEN+hssuss...EEK.sshhKKQsRH+SEGKhRPRSWSSGSSEAGSSSSGNQGEhKsu.hKhVKVRHKsREs.RNKKG.RuGQsRhshKss-KsER+stuG.....sSuSuuuGul+.QLCKRGKRPLKEltRK-uGspEuK-lhsEu+s-KEYKEEPLWYTEPIsEYFVPLS.RKSKLETTYRs+p-ssshs..SEAVE-LSEuV+GLCISNsNlH+TYLAAGTFIDGHFVEMPA.VlsEsh-LsGTShCS.PEDspaLDDlHLSELTHFYEVDIDQSMLDPGAS-shQGESRILNMIRQKSKEcsDFEAECCIVLDGhELQGESAIWoDSsSSlGAEGhFlQ.DLuNLAQFWECCSSSSSsDADGESFGGDSPlRLSPlLDSThhsschLAGNQE..LFSDssEGS.GlNSs.FSVFEVQCSNSVLPFoFEoLNLGsEN.TDSsSosNhLGKTQSRLLIWTKNSAF-ENEHCSNLSTRTCSPWSHSEETRSDNETlNlpaEESoQFsuEDINYVVPRVSusYlDEElLDFLQ--oCQQppcoLGEhPo..LlFpKKSKLESVCGIQLEQKsEsKsaETsps.sssuS.pGDsYSSGVIKDIWTshuDRsSsAsl-s-ch-..-cLFSsDVNsY.CCCLDsEAKhEslQ-.spKAVQRSEYHLWEGQKtshEKRAFlss-LSKVDG.GDYTTPSKPW-lspDKE.sSFILGGVYGELKTFuSDGEWAVVPPucs+GSLLQCAA.......SDVVTIAGTDVFMTPGNSFAPGHR.LW+PhVSFEQuEQs.KuG-sGLNKGFSFIFHEDLLGuCGNFQsEEPGLEYsFSSFDLsNPFSQVLHVECSFEPEGIASFSPuFKPKSILCSDS-sEVFHPRICGl-RTQYRAIRISPRTHFRPISASELSPGGGSESEhESEK-EuSlPlsSQsDVF-DPQADLKPLEEDAE+EGHYYGKSELESGKFLPRLKKSGMEKSAQTSLDSQE-SuGlLPhscQs.CL-Cshp-olpss.hESScusCKlhpppppEhschCSCcsuCphPshp-s..luust.hpEFPlLNsDlQshstuQpcpsWWpKALaSPLFPuSpC ...............hsss-MS.LpuLGPVQoWLGQELEKCGIDAMIYTRYVLSLLL+DSYDYD..L..p-Q..EpsI.LGhtc.......Gsh++.hs...+u++..Ks...shs..hE-...hKKQAAVQCLpSASD-sS......GIEoLVEELCs+LKDlQ...scQ........c...EK...hp+K.-tu.sP.Eht..S.ss+...DpsEMYY-AFPsLspp............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. 0 8 10 24 +15230 PF15377 DUF4604 Domain of unknown function (DUF4604) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96AT1 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 141 and 174 amino acids in length and contain a conserved LSF sequence motif. 27.00 27.00 28.70 28.10 23.50 23.30 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.42 0.71 -4.02 68 196 2012-09-03 14:49:01 2012-09-03 15:49:01 1 3 171 0 140 196 0 148.90 26 88.71 NEW ppsloYsc..sEPsFLp+h+.sphGhppss..................s+cpth......................sscscss.s.-..cp.....D-tPpVVllc.ss.-Lot.-Ehpthhpt...........................ttppppptsstsuclha+ps................pKRpsscthssts.......................t.ttt..............tt.tp+ppp+pp+..tpshLSFs--- ..............................tpsloYsc..sEPuFLp+h+.pphGhppss.....................spcp.h........................sscscsss.-.......cc.........D-tPpVVshc.ps..cLot..EEhpthhpp..............................................................tttptppp...ssssuclhh+ps..........................................tKRps..schh...ss.s......................................ttt.t.............ppp.ptp+pppKphK..p..LSFs---....................................... 0 42 66 108 +15231 PF15378 DUF4605 Domain of unknown function (DUF4605) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96D05 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 82 and 137 amino acids in length. 27.00 27.00 27.30 27.00 24.40 26.90 hmmbuild -o /dev/null HMM SEED 60 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.60 0.72 -8.63 0.72 -4.27 13 84 2012-09-03 14:50:09 2012-09-03 15:50:09 1 2 46 0 54 67 0 59.00 54 46.72 NEW olFuplNcpLlshGFPphphGs+lVEPlsslhhhhlLhhlGlpGLLLVGllhlVhhhs.Qc ...o.Fu-LN+pLlNMGFsphahGp+lVEPVhsIhhhhhLhhLGlpuLhLVullhLV.hhsQp.... 0 8 11 25 +15232 PF15379 DUF4606 Domain of unknown function (DUF4606) Coggill P pcc Jackhmmer:Q96LL4 Family This domain family is found in eukaryotes, and is approximately 100 amino acids in length. 27.00 27.00 31.90 31.50 22.10 19.40 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.35 0.72 -3.96 13 38 2012-09-03 14:51:17 2012-09-03 15:51:17 1 1 33 0 26 26 0 100.60 56 38.32 NEW CsVPcELlNRIahKNhRsslKQsusu+pHlsSQCPsCN+KRAELAQusFLRpKKTLLEShLLpEKIDEHLaTKDhLThIGEAHpuLPRLSDDP+IIWKRLsEKu ...........................CTVPDELlNRIYhKNhRso.KQhusAKQHlSSpCPsCN+KRAELApuAFL+pKKTLLEShLLQEKIDEHLaT+DFLThIGEAHp.s.hPRLSDDPRhIW+RLspK...... 0 5 6 9 +15233 PF15380 DUF4607 Domain of unknown function (DUF4607) Coggill P pcc Jackhmmer:Q96LP6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 207 and 359 amino acids in length. 21.00 21.00 95.20 95.10 20.40 19.10 hmmbuild -o /dev/null HMM SEED 265 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.93 0.70 -11.82 0.70 -4.94 4 19 2012-09-03 14:52:14 2012-09-03 15:52:14 1 1 13 0 6 20 0 243.90 63 81.77 NEW hAhKRLL.sTpQhllPRs.sVSTsSF---SYtEhtspPsPSSEhDEsPhhFTsctEhp+csRtsP+QAWSSshLEQ.hst+PshsHSVNPlHLEAtGhHIpRHsRPpsQPLsssKtsSGSsARPaTAIGLCRRSQTPhA.QSsu.SsoE.E.EERhAAPAGu.AHPD.lQSRLLGAoGssVs+GAVAMAPEMLPKHPHsPccRRPRADoSLHGNLAGAPLPLLAGASTHFPSKRLIKVCSSAPPRPoRtFHTVCSQALSRPVVNAHLH ......huhKRLL.pTCQYIVPRS.oVSTsShDEE..ShtEhpSSPuPSSETDEAPLIFTAcGEsEcRARGsPKQAWsSSFLEQ.hspKP.shs+SVNPlHLEAtGIHIsRHTRPKuQPLSssKpNSGSuARPhTAIGLCRRSQTPsA.QSsusSsoEhE.EERhA.sPAGu.AHPD.lQScLLGASGNPVGRGAVAMA.PEMLPKHPHsPccRRPpADoSLHGN.....LAGAPLPLLAGASTHFPSKRLIKVCSSAPPRPoRRFHTVCSQALSRPVVNAHLH.. 1 1 1 1 +15234 PF15381 DUF4608 Domain of unknown function (DUF4608) Coggill P pcc Jackhmmer:Q96MR7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 85 and 157 amino acids in length. 25.00 25.00 26.90 116.20 22.20 16.90 hmmbuild -o /dev/null HMM SEED 75 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.57 0.72 -3.84 3 10 2012-09-03 14:53:25 2012-09-03 15:53:25 1 1 8 0 2 4 0 72.90 90 60.65 NEW SSAETLRTVSRRSVPSSSMPYLALAHSRVSSLNHAASVDGsuTSHRNVADSFSRTSRSCSRFLKGTAGSAGR-Gs SSAETLRTVRRRSVPSSSMPYLALAHNRVSSLNHAASVDGWGTSHRNVADSFSRTSRSCSRFLKGTAGSARR-..s.. 0 1 1 1 +15235 PF15382 DUF4609 Domain of unknown function (DUF4609) Coggill P pcc Jackhmmer:Q96N06 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 70 and 139 amino acids in length. 27.00 27.00 122.40 122.30 19.60 17.90 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -9.32 0.72 -4.27 7 23 2012-09-03 14:54:21 2012-09-03 15:54:21 1 1 18 0 10 23 0 69.40 74 58.25 NEW EKPDsKtKSSKKKsVIPQIIITRASsETLlShuS.sS-EQRTI+EpADWGPYtRHRNPSTssAYssQs+E .EKPDVKQKSS+KKsVVPQIIITRASNETLlSsSSoGSDpQRTIREpcDWGPYtRHRNPSTsDAYssHhKE.. 0 1 1 2 +15236 PF15383 TMEM237 Transmembrane protein 237 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q96Q45 Family This protein family is found in eukaryotes. The function of this protein is to aid the production of new cilia in ciliogenesis. Mutations in the protein cause a disease, named Joubert syndrome type 14 (JBTS14) and also affect cell signalling using the Wnt pathway[1]. Proteins in this family are typically between 203 and 512 amino acids in length. There are two completely conserved G residues that may be functionally important. 27.00 27.00 27.50 31.90 25.50 26.90 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.54 0.70 -5.25 22 115 2012-09-03 14:55:22 2012-09-03 15:55:22 1 7 77 0 76 122 0 207.20 32 51.64 NEW sstpt.sscltscs-Dhhs..........-.t....pp.s.hsssph.SQP............ss+laVE+s..pcFpssc+schh+spp.h........cshh-.csh....aooh-lAlplacuF+hlulasHGFLAGaAlhphlllasLsupphst..............sLLppYphLAhPhpulhYhLLslSoVSAFD.............Rl-lu+sshs...lRshlplsssuLs.hlYFsuLlloLSpphhs-+lshh......s.ssshasssscpp...........llpsWhhlNllhAlLsuLuWlhluhpPspDhs .....................................................t.................................................s.P.........................splalpts..ttFpthc.......hpt......................p...c.c......hoshclAhsVp+sa+hluh....hsHGhLAGhAlhpllhlasLsspphstl.................................................................sLLppYpslAhPhpshhYhLhsluhlSsFD.........................Rh-hu+hohu...hpth.ls.hcsssls.h....lYhssllLolspp.hs-+ltLh.................ss.N.olh..thtpp.............llpsWhslslshulhshhuWlhluhpPt.Dh.................... 0 19 27 53 +15237 PF15384 DUF4610 Domain of unknown function (DUF4610) Coggill P pcc Jackhmmer:Q9BUH6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 164 and 206 amino acids in length. There is a conserved NPG sequence motif. 25.00 25.00 30.50 59.20 22.60 23.10 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -11.30 0.71 -4.78 6 40 2012-09-03 14:56:59 2012-09-03 15:56:59 1 2 28 0 21 47 0 176.00 56 94.80 NEW ssLCTLs.sGsspPRaVCYCEspputt..stGlFNltlTsus-lWSTphos-SLuphKu+FGLsuTEDhssRFRsACpQQsVuloLQED..pAsLpLuGsPSsLoF-LSKlPssEAtPRLpALhLpLA-+VpsLEpRLAssEpoAhSPRKSsp.uusp.FhP-.-+QRGGsGsGVR+RhPGESLINPGFKSKKPAoGVDFD- ....................................................sLChls..sGstsP+aVCYCEs-tst..hstusaslhVTDAsELWSTsFoPDSLusLKA+FGLousEDhssRFRuACcQQAVsloLQED..+ASLTL.SGusSuLsFDLSKVPuPEAAsRLQALTLuLA-+VpsLERRLA..AsEcs..A.......sSPRKSsp.uGsQ.LFLPDsD.QRG.GsGsG....VRRRC..P.GESLINPGFKSKKPAuGVDFD-......... 0 5 6 7 +15238 PF15385 SARG Specifically androgen-regulated gene protein Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9BW04 Family This family of proteins is found in eukaryotes, the function of this protein is still unknown but it is thought to be an androgen receptor. Protein expression is up-regulated in the presence of androgens, but not in the presence of glucocorticoids. SARG tends to be highly expressed in prostate tissue [1]. Proteins in this family are typically between 340 and 587 amino acids in length. There is a conserved EETI sequence motif. 27.00 27.00 33.60 27.90 23.30 26.70 hmmbuild -o /dev/null HMM SEED 497 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.11 0.70 -13.05 0.70 -5.13 31 124 2012-09-03 14:58:22 2012-09-03 15:58:22 1 4 37 0 65 113 0 341.50 32 92.15 NEW ps.sDpShcaL....otEEK-sLLFhEETIsSL.-s-h-s....slss..Dputpsp................uPp.........s.EEosspp...sEPcclh-hsps.......u.shput..shuLPcshp.hu...pspsh+cs....s.......t.pppth....s.P....pst.s.......s.Pstsssustsss.ch-hl..sPPtt....pPchtp.......psspPhs.upp.........................pcc.susEuhS.pusp+sscss.t..ps.su.sspsu.............................t.pt..sP.sAPK.s++hPsNIslpsSpu...shp.s.t......Nh.pR+spshss.sus.....sps..pcppKuRhpuLcKhGLPpDp.p-suht.......................................................................................ssuptssstshsh.hsppttspus.op....................sss........hsGhp..p.shKSpohchsusGhoshhps-...hssp.p..pp.uptShh-+hsP.sshpssR.RsuSLs...............................Rsth.RP...sloVphS....s+Gss-EpRREAL+KLGLLKE ..................................................................................sDpShcaLotEEK-sLhFhEETIsSL.-t-h-p..h.s..Dps..s...................ss.......t.cts.sp........tspphhp.sps..........s.t.t....tl.p..t.hs.....s..hppt...........................s.....t...................s......chp.h..sPP......p.p..t..............t....s...upp.........................pcp.htspshs..s.ptt..p.......t...ss...tph...............................t...ss.ssPK..p+hPsNI.lpsstu...shp.p.t......ph.ptpsps..t..s.................................p.p..t.tp.s..tp.............................................................................................s.........................................................suhp......p.shKSts..h.shGho.hhpst.................sh.t...h.s.sh.ps.R.RssSLs................................................ts.h.RP...sloVphu....s+GsspEtRREAL+KLGLL+E................................................................................... 0 3 8 27 +15239 PF15386 Tantalus PRR14; Drosophila Tantalus-like Iyer LM, Aravind L, Eberhardt RY, Coggill P, Hetherington K pcc Manual Family An alpha+beta fold domain found in metazoan proteins such as Drosophila Tantalus [1]. Drosophila Tantalus binds the chromatin protein Additional sex combs (Asx) and also binds DNA in vitro [2]. 32.80 32.80 32.80 32.80 23.20 20.80 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.57 0.72 -9.25 0.72 -4.21 13 121 2012-10-09 15:10:48 2012-09-03 16:02:35 1 2 64 0 68 111 0 61.80 60 8.38 NEW sslTPh.GLP+stR.l...pKKEhSL.EEIYTNKNYKsPsst+sLETIFEE..Pcp................+sGslhhh.s+Kh .............NLTPM.GLPRP+R.L......pKKEFSL.EEIYTN.KNYKSPsspRsLETIFEE..P+E...................RNGoLI.hSppKh............... 0 9 12 35 +15240 PF15387 DUF4611 Domain of unknown function (DUF4611) Coggill P pcc Jackhmmer:Q9BXV9 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 71 and 100 amino acids in length. There is a conserved AKR sequence motif. 27.50 27.50 27.50 57.60 24.40 27.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.62 0.72 -10.14 0.72 -3.84 9 26 2012-09-03 15:05:01 2012-09-03 16:05:01 1 1 20 0 11 34 0 90.10 53 94.44 NEW LlGEaVspDGppQ+lRlsCEusu-usshQuLLSGlAQM+EhVoELhusLVppEtpstlsuss-Eul-.GsDEDDuED.EsNhcs+T.....sSsGPsAKR.KP .LLGEYVGp-GpsQ+LRVsCEAPGDuDPFQGLLSGVAQM+ELVoELFusLVppEsQppVAAuP-EuLD.G-DEDDuED.EsNhss+T.....s.sGPsAKR.Ks........................... 0 1 2 2 +15241 PF15388 FAM117 Protein Family FAM117 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9C073 Family This protein family is a domain of unknown function found in eukaryotes. Proteins in this family are typically between 269 and 453 amino acids in length. There are two conserved sequence motifs: RRT and TQT. 27.00 27.00 27.60 35.90 22.30 26.10 hmmbuild -o /dev/null HMM SEED 318 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.31 0.70 -12.52 0.70 -5.20 19 194 2012-09-03 15:06:59 2012-09-03 16:06:59 1 3 59 0 110 184 0 263.60 45 73.36 NEW lRRTuSLDsIsusYLpGpWP+-.scs..hssp.hpDKuTQTPsuWsEpstp+p...........t....tH+RSASWGSs-pL......+E.........luKL+QQ..........LQ.Ro+p..us.........tspc+cppush.Gspus.....hstsp.........t....shss...sshtphssphRpSlEGLNQELEplal+......ppsc-phht.......l-lPDGHRAPlPs.t..t..................susspo.sspss.........u....Ssssuss.s.........s...ps.ssssp.tpsh...hs..........................................tsssP....h.thuoSP+PN+ohhFpREPPEGCE+V+l.hpE.ts.sp.spth...sCPDcNKVsFpP...sGSAFCPV.ol.hpPLh....Pos-hhh.ps. .................................................................lRRT.SLDsl.husYL.GpWPR-..sph..hssh.hpDKAT...QT.psWsEptt-+p..............................sHpRSASWGSsDpl......KE..........IuKLRQQ..........LQ.RoKp..sSR.........pt+-K-RpSPhp.....GsHss..........hspsp..............ssPhs.h..s.s..hs..tp.ssphtsSlEGlNpElEtlhlK......tptcEplh...........-lPDG+..RAPhP.....p....................SusopolsopoPs...........tpsSs......ssp.pssss........h.....tstptSPpst.cshh..p.t.............................h-sGssSP.......l.taAoSP+PNpSYhFKREPPEGCE+V+V.FEE..h..sst..hth....h.CPDKNKV..sF.P...oGSAFC.V.pl.htPLh.....Ps.shhh...st.................................................. 0 18 29 58 +15242 PF15389 DUF4612 Domain of unknown function (DUF4612) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H246 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 109 and 323 amino acids in length. 27.00 27.00 31.70 28.90 24.20 23.80 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.78 0.71 -3.48 10 88 2012-09-03 15:07:59 2012-09-03 16:07:59 1 3 69 0 58 69 0 104.90 46 53.32 NEW GCsSAKpVusV.s--............-.spuKsauNGDshsD...EY+hKsVEcVKYh+s.....EEp+hsupsppsh........cppsptt+s+spscsuutsst.h...............slHlSESQQEFFRMLDEKIEKGRDYCS..tEE .................................................................................................GCspuKphstl.ppc............cst+t+satsuDshsD...Eh+hcssEcsp..ps................-Ep+hsupspcsh.....ccpsssph+hpsstchsu.spt.h..............tshcI.ScSQQ-FFRMLDEKIEK.G+D..YsS-pE..c..................... 0 11 15 34 +15243 PF15390 DUF4613 Domain of unknown function (DUF4613) Coggill P pcc Jackhmmer:Q9H6R7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 625 and 725 amino acids in length. 25.30 25.30 134.10 25.30 23.90 23.90 hmmbuild -o /dev/null HMM SEED 671 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.04 0.70 -13.21 0.70 -6.35 23 69 2012-09-03 15:11:50 2012-09-03 16:11:50 1 3 41 0 44 65 0 614.70 48 94.99 NEW ELGKGKLLRTGLNALaQAlHPlHGlAWTDG+QVVLTsLpLpsGEsKFGDSpVIGQFEHVpGLoWuPhssuDs....PuLLAVQHKKHVTVWQLs.SssEpuKhLhSQTCEIpEshPlLPQGCVWHPKpslLsVLTu+DsSVhhsVHsDuoRVKADI.pspGhIHCACWTc.DGpRLVVAlGS..uLHSYIWDssQK.TLptCSFCPVFDVsu..hlCuIpATVDSQVAVATELPLDKICGLNAutsFDlPssuE...sss..hto.s.lhs.-.hshD..ptussSEss.St.sSlu....SS....osPLDLTHIhhs+p+S-sSuLlpLR+KDaLTGoGQDSSHLlLVTF-+.cVTsTRKVoIPGILVPDlIAFssKuQlVAVASNTCNlILlYSlhs.SshPNIQQIQLEssERPKGlCFLTDKLLLILVGKQK.sTDsAFLPSScSDpYhIRLhl+Elhh---sSsosstsQsshss......hsshLspus++KhhEsLSs.-hp.ps.+sLLLsusss...pSspstRpL.IcEI......+SP.........soSss..s.....uS....l..................s...L-...scPsspssol......................s....psS......usP..s.....tssp...Es............................ss..lPp..p...psL...ppEKEsppLo+pLEtLSpshs-lQpsLSELpDhLpNGK..Ks..sssYPhSpDPPYVHIsYQKshssssls.EKRuVLLCcGKLRLSsVQQhFGLSLVEMhHu.SpWILLsADoEGFIPLTFoAsQEllI .................................ELGKuKLLRTGLNALaQAlHPlHGlAWTDGpQVlLTsL.hpsGE.sKFGDSpV...IGQFEHVpGLtWuP.sssDo....PsLLAVQHKKHVTVWQLs.SssEp.sKhLhS.QTCEIp-shPlLPQGCVWHPKpslLsVLTtpDsSVh.sV+sDsoRVKADI.pspGhIHCACWTp.DGpRLVVAlGS..uLHSYIWDssQK.oLptC..SFCPVFDVsu..alCuIpATl.-uQVAlATELPLDKICGLNAu.sF-lPssuc...sss.....s.s.lhs.E..shD..ctussu-ps.u...Slu....SS...ssPLDLTHlhhspp+S-sssLlpLR+KDhLTGoGQDSSHLlLVTF-+.tVTpTRKVoIPGILVPDllAFs.+uplVAVASNTCNhlLlYSlhs....SshPNIQQIpLEpsERPKGlCFLTDKlLLILVGKQK..sD.ssFLPSSpSDpYhl+Lhl+Elhh-c-sososstsppsh..ps......hss.lshssc+KhhEsLus.-hp.ps..+sLLlsssss...QSspstRpL.IcEl..+Ss..........ss.sssSh..................s.Ls...scPsspssol...............................sp.o......sss.s..tsph...ps...............................sphsp..p...psh...tpE+phtpLo+plEhLstshs-lQpsLSELp-hhpNG+..+s..sssYP.Sp-sPaVplshQKs.h..sssh..E+RsVLLCcGKL+LsslQphFsLollEMhas.s.WIlLsADs-GFlPLpFpupQElhl................................... 0 7 9 21 +15244 PF15391 DUF4614 Domain of unknown function (DUF4614) Coggill P pcc Jackhmmer:Q9H6X5 Family This domain family is found in eukaryotes, and is approximately 180 amino acids in length. There is a conserved EALT sequence motif. 25.00 25.00 26.60 26.60 19.20 18.70 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.30 0.71 -4.51 21 53 2012-09-03 15:12:47 2012-09-03 16:12:47 1 2 44 0 35 45 0 166.20 47 32.84 NEW psossssYS.-DFEpSsp.spsspss..spSpposspohs..opS-tS...ushposhs........tshps...pcppsps....spclhlK.EsAVQT.....csshsh.Wsp...ssuhAslGPslGuuYVDPsPIAoHVlSsDAlEALTAYSPAslALsDMLKQQLsLTQQFlEsSR+LHtSLlpSL-t-saHYpTLEETKEYIRcHRss ..................................s.s.sssYS-DFEpSsp..psopsp..spScpS.s+Tls..shS-.S.........uShposhs..................pshps...p+cpscp....ssRlhVK.-sAVQT.....-PuFsYpWsc...suuhAshGPuLGu.uYVDPsPIAsHVlSADAIE..ALTAYSPAslALpDhLKQQLsLTQQFlpsSR+LHtSLLpSL-t-sFHYHTLEEsKEYIRpHRP.s.... 0 9 11 18 +15245 PF15392 Joubert Joubert syndrome-associated Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H799 Family This family of proteins is domain of unknown function, which is found in eukaryotes. However, mutations in the gene lead to Joubert's Syndrome, indicating that the protein that the gene encodes for is vital for correct ciliogenesis[1]. 27.00 27.00 30.50 30.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.12 0.70 -5.21 12 56 2012-09-03 15:16:48 2012-09-03 16:16:48 1 1 35 0 30 46 0 293.90 54 20.35 NEW PLQMTGLTDIADII-DLITKcGVSS-ELGLTEpQA+sISRIQ+oSGR+PQRT-cERREIQlWMKRKRKERMAEYLNQLAEKRGQEHDPFCPRSsPFYMTSREIRhRQKMKHEKDRLLLS-HYSRRISQAYSLMNELLSESsQLPssAQKPLPs+PpTsphsRpQ+ssSPRRENpHGHNFPlNRPGKsRaIS.KsSahpKG+PhspspGSsh.................................+GSssPspShQps+s+tuAGLsP.ocQVClEYEREETVVSPWhlPS-I+cILH-sHuSLLQDlSPs.EEEPEsP.....htluGhDSlSESTGSILSKLDWsAIEDMVAuVEDKuLSVHWA ................................................PLQMTGLTDIADII-DLIsKcGVSS-ELGLTE.QA....pshSR..I.Q+sS.G.R..+s..Q...R.T-KERREIpsWMKRKRKERMAcYLspLAEKRuQEHcPFC.PRss.P..hY...MTSREIRh+QKMKcEKD.RLLLS-HYScRISQAYuLMNELLSESVQLPsss.pK..PLPs+s.ps.sp.sppQ+.ssSPRt.ENpaGHsh.lspsuKs+ahS.+PSahpKtcshs..spGss..t............................................................cuSssPs.p.p.h+pptsstlsP.sppsC..lEhERE-pVVSPWtlPs-I+pIL+cspsSLLQDhS...ss.EEc.c.s.......hsGhDSlSESTGSILSKLDWsAlEDMVAuVEDpt..s................................................................................ 0 4 6 14 +15246 PF15393 DUF4615 Domain of unknown function (DUF4615) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H7E9 Family This protein family is a domain of unknown function, which is found in eukaryotes. Proteins in this family are typically between 161 and 229 amino acids in length. There is a single completely conserved residue F that may be functionally important. 27.00 27.00 28.80 28.80 26.40 25.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.94 0.71 -3.89 33 85 2012-09-03 15:17:45 2012-09-03 16:17:45 1 3 70 0 55 85 0 127.30 38 57.03 NEW cQhcpELsWClpQLEhuL...pstK...opKQtc...-sh+sl+sL+SspsPLlKKRQlM+sthGDYRsKMppEc+Khh+shpshtho.upsps....s+Ku.Fh++uthh..................................sotc.s.phsa.hspp..................................................................pF+FNF ....QhppELsWCVcQLElGLcppKs...o.KQtEpuhpsI+oL+Sp+sPLs+KRQlM+shFGD....YRupMctEppchh+shcsstho.upsps......s+K...h.++sthh.................................pst.t...phphshshspp....................................................................................pF+FNF.............................................. 0 20 25 39 +15247 PF15394 DUF4616 Domain of unknown function (DUF4616) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9H972 Family This protein family is a domain of unknown function found at the C-terminal domain of the proteins. This protein family is found in eukaryotes. Proteins in this family are typically between 166 and 538 amino acids in length. 27.00 27.00 27.50 27.50 23.30 26.60 hmmbuild -o /dev/null HMM SEED 537 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.88 0.70 -5.49 7 55 2012-09-03 15:20:01 2012-09-03 16:20:01 1 2 30 0 25 44 0 360.90 66 96.47 NEW SFSATILFSPPsG.uEA+CCCCuCKSEssuusous.sGs.P...ssTPITVTGcGLAVQSoEQLLHlIYQRV-KAVGLAEAALuLA+ANNELLK+LQEEVG-LRptpssp.--ssputtpssPscpss.hctSsGcA.pshs..u.EEEs-ulGoGVQVVIEELRQLGAAuu.ssGsLG.FsssQcchchPGCsLAu...sEuuPLLNPh.....DDYluoEGslQRVLsPuaAKQLSPuoQlAhppusu-su.EshschssspPcshLuusAsL....DuAL-..-ssPGuo.......GElphSLG.......assoPsRsRGoGQKNSRRKRDLVLSKLVHNVHNHITNDKRFNGSESIKSSWNISVVKFLLEKLKQELVoSsHNYTDKELKGACVAYFLTKRREYRNS...LNPFKuLKEKEEKKLRSRRYR.LFANRSuIhRpFuPEDQ+LWcsVTEELMSDEEDSLsEPGVWVARPPRFRAQcLTcLCY+LDANSKHGTKANRVYGPPSDRLPSAEAQLLPPcLYNPsFQp-.s-uGspsuPsSsshspsHKohCPDLNSFIEIKVEKDE ......................................SFSATILFSPPuG.SEA+CCCCACKSEssuussGSpGGs.PP...suTPITVTGHGLAVQSSEQLLHlIYQRV-KAVGLAEAALuLA+ANNpLLKpLQEEhG-LRptpss..tct...s.ttt.P....t..c.u.Gcs.p.......................................h.assspp.hRhPGCsLsu...s-usPlLsPh.s...DDYVASEGulQRVLVPAYAKQLSPATQLAIQRAouEousEsGsKLPssRPEDhLsusAsL....DuALE..EusPGus.......GELphSLG.......hssSPsRsRGoGQKNSRRKRDLVLSKLVHNVHNHITNDKRFNGSE...SIKSSWNISVVKFLLEKLKQELsoSPHNYTDKELKGACVAYFLTKRREYRNS...L..NPFKGLKEKEEKKLRSRRYR.LFuNRSpl.hchhusE-QphWpsVTEELMSDEE.DShsE.PGVWVs+sPpaRu......LTpLsh+lDAts.cpG.sKtpRh.G....S-RLPSsEsQhhP.cLaN.pap....tt...............tss.......+cs.s...sSFlplKVEcD............................................... 0 6 7 11 +15248 PF15395 DUF4617 Domain of unknown function (DUF4617) Coggill P pcc Jackhmmer:Q9HCM1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 702 and 1745 amino acids in length. 27.00 27.00 48.00 48.00 15.90 15.40 hmmbuild -o /dev/null HMM SEED 1068 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.01 0.70 -14.29 0.70 -6.51 14 45 2012-09-03 15:22:00 2012-09-03 16:22:00 1 2 30 0 25 50 0 865.30 45 67.36 NEW SMELLATCLSLWKKQPSEssE........EKQsN....pSppNpT..ulGlS+Psc.lss+uspSssGN...SQsKhl..s.ppThLSsllQNaESSusslsKGTELQIAVVSPLILS-l+TLS.hKslsPpslPEssYPVIKEGSlCSLQsQ.hsENs.lsA.sLKssl....spPVuSossSsKl.s.LhQ....KEKQ..scsopuss-sssssspGpp.t....p....sshuspQss.co+...........................................cSslVsuDhLQI-sICSLVEGDsSYNSQIAcIFNSsPhp+VEPQKsShPsppslSstpQcEQl-ps...TEs+DhuhQc-ch.lpsTDlSpc.ls-.scs...P.......tc.u..............Stphlcsssuhl.EEuslE+..............hspc...sssslsSs..AuhpQDspspEsDssu...Nhsu.ps.PstsEl.s-.ppslhYL+DQLSELLKEFPYGIEulN..s....+csSVupphspplsc-QTssKps..sDSK-...ssDQIpITlLsS-QhKELFPEpcppss.......csD+lsEsQp.EKsls.....EsusQCDsQss.ppsEopDSs.hsSEKDcl+CC.ALGWLSMlYEGVPpCpCsSlcp.sSpE-Ksc-Qsush.-oNSsKQGEpsScsDlslhc...sss.lsss...PKss....ss...cs+hschcp..c...sh...........................KDtopT+...csoph+sEQ-...ss..QhpS.....KsD.Kh................-shQspK+p.pLpaHEVsFpousK.pt...c.hSQEu......Lp+KhhsQsspPlKsKsshhss..K..D......hh+cssSlhQolSsEKhKlKhpu....hp....pt....EKRKlDpsphh-..ElKKKKp-KQEQN+NsG.ss....hKLssplspsNERA.lpEp.s.....luss.....cSSD.K...sSSsKhp+VlospEY.LQRpKcK-shu.ppsuK+hpl....csVsscSpah+sS..Khsspl.tSssKss-+..psSuhpTsKEshNshss+uKsLKhH+..SE..pSKoaslSpNsKGpsDGKQsDKhhh-Koh.DK.lsplsNEhp.s.hs.QsK-QRKpYLNRVAFKCTERESICLTKL-sSP+K..Lp..K-..K+..p......-sKscs...lssK-socKssMLEFKLCPDlLlKNTsosE-ppshpspPcKEQAsVQVSGIKSTKEDWLKClss+.K+M.EusQE.....D.s.l..sS+Lu+RShSADGhEsLQNPVKDSKsMFQTYKKMYLEKRSR ...............................SMElLATCLuLWKKQPS-ssc........-Kp.s....p.ppspT..ssuhSpPsp.hp.csshSshGN...Spschs.s..ppTsLshlsp....saESouhslsKGoELQIAVVSPLlLS-l+ols.sKslsPt.s.lsEslYPVIKEGSVCSLQsQ.hsEN..ssA.uLKssl....stsVsuossuschhs..hp....KEcp..scsspsss-ssss.spspp......p....sshsspQ.s....ps+...........................................sossVuuD.hLQIssICSLVEGDsSYNSQIAcIFsS.PhphlEspKsShPsppshus.p.ccQl-ph...sEscDhshppsp..lpsTDlspc.ls-tsc....P........c.u..............o.p.lcsstshh.Ecssh-+..............hscc...sssssCS...uuhppD..spEhDsss...........shss.pc.PstsEl....s-.ptslhYL+DQLSELLKEFPYGIEsls..s.......+csSVsQQhs.phsc-p.TssKss..sDSK-...ssDQIpITlLsS-QhKElFPEpcpps.........sDcLtcspp...-csls.....-s.s.ss...phstttc..s.Dsh..so-KD-ltCCALGWLSMlYEGVPpCpCsuhpp.sSppcKtcpQsu...cssSscpsE.psucpslshhp....ps.lsss....Pchs.....st...cschschpp..p...sh...........................+phspp+...cps..psEpc...ssph.S.....psc.p.................ss.pppKtt..lph+ElpFpspsK.hh......SQEs.......ppKhhsps.tPl...KsKsth...ss..p..D.......hh+psShsQolSsEKhKLKhtu....hp.h....pKRKlDpsphhD..ElKKKKa-KQEQppssG.so....hKLssplopsNERAhlpEK.tloss.....cSscsK.....sSSsKhs+llTspEY.LQRQKcKEshu.spsSKchpl.....csl.scSpah+sS......Khsspl.tSsGKssE+..psouspTsKEShsshosHGKslKhH+.....Sc.....-S+oasl.+NsKtpssGKQsDKhah-Ko.h.DK..sphsN-hphsphs.Qs...K-Q+K.YLNRVAFKCTE+ESICLTKL-sSP+K..Lp..+-+c..p....-sK.ps...hss+css-KssMLEFKLCPDhLl.KNosos--ppchpstPcKEQAsVQ.VoGIKSTKEDWLKtlspcp+h.c.ssQch...D.sl..sS+ls+RohSADshEhLQNPlKDS+tMFQTYKpMYhEKRSR................ 0 1 1 5 +15249 PF15396 FAM60A Protein Family FAM60A Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q9NP50 Family This protein family, FAM60A is a family of proteins is found in eukaryotes. It is known to be a cell cycle protein that binds to the promoter of a gene transcription repressor complex, named SIN4-HDAC complex. This means that FAM60A has an important role to play in 'switching on' gene expression [1]. Proteins in this family are typically between 179 and 324 amino acids in length. 27.00 27.00 58.70 45.90 20.80 20.50 hmmbuild -o /dev/null HMM SEED 213 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.78 0.70 -4.59 17 107 2012-09-03 15:23:46 2012-09-03 16:23:46 1 2 64 0 60 90 0 190.90 61 71.84 NEW FuFHKPKlYRSssGCCIC+AKSSSSRFTDS+KYEc-FhtCFtLpE..pRsG-ICNACVLLVKRWKKLPsGScRNWpH.VVDAR.AGPGhKshspthpK.+pht....................hcp+ph..+http++pp.............ss...shsp.........spssSpspSPs.SspS--sspt...................................c.............tptpsp.sl.ss..FlD.oYWKRpclCCGhIF+G..aGEl..llDPcLhKPCssspc ..........FGFHKPKhYRSh-GCCICRAKSSSSRFTDS++YEK-FppCFGLpE..sRoG-ICNACVLLVKRWKKLPsGSKKNW.NHVVDAR..AGPuhK.hsh+PKKhKshs.........................tp....thppp.hp+lpKchKpp..................................sSD......ApSo..........TSSsSPsQSPshSNpSDDuu-s.........................................................Eh............tstpscssl.hS..FLDhoY..WKRp+lCCGIlaKG.paGEV..lIDs+LaKPCCsp+.p................................................................................................................................................................................ 0 14 17 36 +15250 PF15397 DUF4618 Domain of unknown function (DUF4618) Coggill P pcc Jackhmmer:Q9NUD7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 238 and 363 amino acids in length. There are two conserved sequence motifs: EYP and KCTPD. 27.00 27.00 27.00 30.80 26.20 25.80 hmmbuild -o /dev/null HMM SEED 258 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.67 0.70 -5.05 17 62 2012-09-03 15:26:35 2012-09-03 16:26:35 1 2 38 0 31 53 0 217.00 45 74.36 NEW l+sR+soL....pELppHcshLschNt-LhcpIQDhEcoTshpVRphLpQQ-lhsollslLEauNc+cLpphKsELQEWcEKpcs+hspLcpQlcpLcu+IcKspEElsFLSTYMDHEYPVKuVQIAsLhRQlQplKDsQQ-ELD-LsEhpctVLtolus+hppKccplLpulshKs.pPhppsLlp+s.hcsQshhKphspaRchIcphcE-lPhL+AEVcpLpsphp.csRElIFtDl.LLR+PKCTPDMDVlLNIPsEEhLP ..............l+st+suLpELps+csaLsKhNp-LlcsIp-hEsSTsLpVRtlLQQQ-hhsollDILEYsN+K+LQph+uELQEWEEKccsKhshLpQQs-QLsu+IcKspEEVsFLSTYMD.HEYslKuVQIusLhRQLQplKDsQQDELD-LsEMR+pVLpoLSs+IQcKc++lLpolVscsppshpp.sLlphh.hcspthh+ph.ta+c.htthpp.h..L.tplptht.p...p.pp.ha.ph.hhphsKCpsp.tl.lpIs.pp.................... 0 9 10 15 +15251 PF15398 DUF4619 Domain of unknown function (DUF4619) Coggill P pcc Jackhmmer:Q9NVL8 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 128 and 299 amino acids in length. 27.00 27.00 33.60 33.40 25.60 25.00 hmmbuild -o /dev/null HMM SEED 296 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.05 0.70 -12.00 0.70 -5.00 10 52 2012-09-03 15:31:30 2012-09-03 16:31:30 1 2 30 0 27 46 0 211.30 53 96.47 NEW MGLuaSKuHPRVTKVAPLQsKEtETPsAGslDFshsQNLEE+S.aohARLQDpsKuLEGQLPPLREThYGRYSo...ssRsMYFDIPLEpGETSIIKRHPPRRhQKLEPlDLPQVlTScRLLSQpEAc.ssp+AK..QELEK+MQospYoSGKRQYLHKMQMLEMNRKRQEAQhELKKoLH+EARINKQ+.R-HKAKKlLpolPRNDDt.DllThLP-c...sLNRsPGNupNuEFL.pHQstNsYsPRKsGKhEsWlpEQEApGQLhWDSSSSDSDEhG+-EKK...PRALVRTRTERIPLFDEFFDQE ..................................MGLu+SKsH.RVhKVAPLQs+E.Eossss.ltFshppsLEEco.h.hsRLQ-pp+shEGQLPPLpEshhGRh.s...ssRsh.FDIPL.-p.tETSIIK+HPPpRhQ+LEPh.shPpshTutph.spppst.thp+t+....hEK+hQs.hasstpRQaL+KMphLEh.p++QE.....sQhE.....L.K+sLpt-s+lsppp.c-cpucKhhpshs+...sssh....chhshlPsE...hlsc.ssGs..stth..t.p......s.p.s.ht.h.hpp.hptph.h-sSSosS-t.tc.p++.....sLVRT+Tp+IshaDpFhD............................... 0 4 4 7 +15252 PF15399 DUF4620 Domain of unknown function (DUF4620) Coggill P pcc Jackhmmer:Q9P2W6 Family \N 27.00 27.00 149.60 51.40 25.40 19.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.70 0.71 -3.94 2 9 2012-09-03 15:33:08 2012-09-03 16:33:08 1 2 6 0 1 5 0 98.40 84 76.38 NEW VPRWPHLS.QSGlcsPshWTtoPG.PSRDQpAPGs.MPPsAAQPSshG.LVPPATA.E.lDcPA.HWLACsCCLuLPuQLPLAIhLGhsL.LcuuP.sGKLCP+ARRWQPLPS .VPRWPHLSSQSGVcPPDRWTGTPGWPSRDQEAPGShMPPAAAQPSAHGALVPPATAHEPVDHPALHWLACCCCLSLPGQLPLAIRLGWDLsLEAGPSSGKLCPRARRWQPLPS.... 0 1 1 1 +15253 PF15400 TEX33 Testis-expressed sequence 33 protein family Coggill P pcc Jackhmmer:O43247 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 147 and 280 amino acids in length. There are two conserved sequence motifs: NIRH and SYT. The function is not known. 27.00 27.00 46.70 45.70 26.10 19.40 hmmbuild -o /dev/null HMM SEED 140 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.16 0.71 -10.60 0.71 -3.78 7 37 2012-09-03 15:36:24 2012-09-03 16:36:24 1 1 29 0 23 35 0 134.30 63 56.81 NEW SlIPsNIRHKFGSplV-pLlSEEQstp....slschhEtppt.souhPs.hTcs.scl.oohhssYaDLGhshRssLh..GsspETKSLMpuSYTsEVhE+uVRDlEHWH.GRKTDDLGRWHcKNAhshNLQKALEE+huEpsKs+us ...........ShIPsNIRHKFGSphVDpLVSEEQAp+....AIsEshEGQKR...sSuWPS.RTQoPhcl.oSlFSDYYDLGYNMRSNLF....pGuPp.ETKSLMKASYTPEVIEKSVRD..lEHWH.GRKTDDLGRWHpKNAMNhNLQKALEEKYGE+SKS+u... 0 3 4 6 +15254 PF15401 TAA-Trp-ring Tryptophan-ring motif of head of Trimeric autotransporter adhesin Coggill P pcc Jackhmmer:Q48152 Domain TAA-head_Trp-ring is the tryptophan-ring motif of some Gram-negative Enterobacteriaceae. The Trp-ring folds into a beta-meander type on the top of the head domain of its trimeric autotransporter adhesin proteins. In conjunction with the GIN domain it is thought to be the region of the head that adheres to fibronectin. 18.60 17.30 18.60 17.30 18.50 17.00 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.59 0.72 -9.38 0.72 -3.75 23 175 2012-09-05 12:42:40 2012-09-05 13:42:40 1 29 47 1 31 163 2 64.50 44 11.54 NEW VssspKsNGKtsc.....VKlGsKh.hhpcc-hclhTGKs.................p.ss.sssssssssssspGsGlVTupsVh-..Al ..............hpsKsNG+pTp.....VKlGAKT...uhhp.cKDucl......hTGKsh....................................................s..sss..ssTspGpGLVTAppVl-Al................................ 1 1 1 31 +15255 PF15402 Spc7_N N-terminus of kinetochore NMS complex subunit Spc7 Coggill P pcc Jackhmmer:O59757 Domain \N 27.00 27.00 27.60 27.60 24.60 26.40 hmmbuild -o /dev/null HMM SEED 927 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.08 0.70 -13.83 0.70 -6.54 19 122 2012-09-06 11:41:45 2012-09-06 12:41:45 1 10 72 0 106 126 0 496.30 28 58.18 NEW hDKENsTsDluusp..ts.....hsup-KKSRSKSLGPGGLD.ALpsuNGNRRKSsss...hPLKSILKPTlPVSPlpsIPoF-ETR++oPs.........t.t...ptptt.hIshsssstsshsus-p.sNPFDsFsssu..p.......stt-.p.AAAREREE+ER+E+E+c...sILE+R-ARRKSM...................ANRRVSFAPEATLHTWNVVEls-DSToSSuu.NSTRRASSLs.ts......ppsststp.sps.s....pt.t-sshu.SPsp..cLpphppp..pu..s...ss...pphuSSPaSGuSssus-.tulps.t............-.sssSosss.DupuTsMSh--hTspSsuosts.s..tsSosSSu+L-EuLRpAApc.AGTpuID.D-s.s-hSMEhsspEIsGAFpPWlKKu.pppsh-hE.........................DlouchDQENlsP...sstshp.ppsssppsss-..pts.................p.....DhSMDlTpAlGGIlsptpsppp.....shspRpS.u...............t.oshs-QTMEFTsVVGGItpstSstputsssps.ttDE-MTMEFTSslGGVLspshspssspptptt.s......t.........ppssstpss.-MEMTuAVGGILsshpEptp..p..........DpThGM-hT.sAlGtILssthsspsc..ppthhthps-tst.p.sSSPFQtslh.SPs+.ssss....+hsslsSEsGSPoluSl+s+psR.pS.sppsusosssp.os.psSPh+.sshpss............pptsspsspssTPs+TPPSp.phshRuuSP+Klhps-hptsuop......ttssspc......sLFppss.TGpoTPtFVLpPp.....pRRSSGlGID+-GLGSPRVAtlLDRRRSIGE-AtcFlPpp.....ppuVRF-DPhcLpEElD+ERp-EEp+Esu+..lpt.sst...-+DsThNL+-MISSLT...PKKNKL+GRKSLHVGAA+GLLGKRPsELDpD----ssps...sKRLKG.+p.uSPVKsI+LPAPPSKsETs ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................-hsM-hTpshG.tI..h....................................................................p.sh-h...h..tu.........................................................................................................................hc.p..........................................................t...hchp..s.ttlh.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 21 52 87 +15256 PF15403 HiaBD2 HiaBD2_N domain of Trimeric autotransporter adhesin (GIN) Coggill P pcc Jackhmmer:Q48152 Motif HiaBD2_N may represent the GIN domain of the Head region of TAAs - trimeric autotransporter adhesins. Not all TAAs carry this domain; however, in those that do, the GIN in combination with the Trp-ring domain is necessary for adhesion to fibronectin in the host cell. 25.00 25.00 30.40 30.40 18.80 17.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.84 0.72 -8.35 0.72 -3.82 19 46 2012-09-06 14:40:45 2012-09-06 15:40:45 1 14 15 3 0 47 0 53.10 45 7.47 NEW TNVsSGL+sYGDTs...hsssssussshs+ps-..sAapGLlNLsEKuu.....s.ps...shVuD .TNVsSGLKsYGDsN...FsshssSsssls+psD..sAYcGLlNLsEKuu.....sKp..hVAD.......... 0 0 0 0 +15257 PF15404 PH_4 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:Q03868 Domain This Pleckstrin homology domain is found in some fungal species. 27.00 27.00 30.50 35.60 22.80 19.90 hmmbuild -o /dev/null HMM SEED 185 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.40 0.71 -11.26 0.71 -4.76 32 91 2012-10-04 00:02:25 2012-09-10 15:36:12 1 3 89 0 65 98 0 169.50 38 15.94 NEW hsGhLYpKs++HosFpchhVlLssGhLllFpshhRs.hoGhthp...plcap+hhslsLp-CYlYSGhlTc.DLLppspsaDs...........hsPGp+ulPRlY..sDGWpSs---sspsFslWasp++shhcppts.tp.........................................t..tpstsph+hVspLGssG+S.hVFhARSRtERDhWVhuItsEl .......hSGhLYpKs++HusFpch.VlLssGhLllFpshhRp.hoGhths...psaap+hhslsLpDCYlYSGhl..Tp.DLLhpspohDs...........spPup+ulPRlY..sDGWpSsD--sspsFslWasp++slhpsppp.tp............................................tpsppph+plspLG.ls.G+S.hVFpARSRtERDhWVhuItsEI.......... 0 21 37 56 +15258 PF15405 PH_5 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:Q9Y7U6 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.87 0.71 -4.22 42 284 2012-10-04 00:02:25 2012-09-10 15:41:14 1 12 152 1 212 344 0 136.40 27 11.08 NEW llaKG.LKK+sssptps....DlpsYLFDHALLhlKhKtlsKtEpaKVY+RPIPLELLhls..........h--hsst.t......pps...........sshlsppssssst.s......pt..pssasITFpaLG+pGYp.lTLYAushsuRppWlEpI-pp.Q .................hlhputLp+tsspp...s...-l.psaLFD+h...LlLs.+..........h...+s......s......s......K...p...c...p..ac..Va+.......cPIPlcLLhlp.....................................shp-s.sthuh..........................s...............................sshhs.stssssst................tps...p.haPlshp+LG+psh........hTLYAssttsRpcWh-pIppt.................................................... 0 62 116 179 +15259 PF15406 PH_6 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:O94356 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.20 30.00 29.80 29.60 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.92 0.71 -10.62 0.71 -4.02 13 72 2012-10-04 00:02:25 2012-09-10 16:46:12 1 1 72 0 58 96 1 108.00 51 19.22 NEW NFlYoKcFFWFGo.-A...VE..sKsLSuahcu.....-h..tss...AHHssAWAocTGKGLLFFuc.tsDKA.uPsGlIpLADASEPssDGssKFHFTu.KGHKHTFKAuosAERDNWVuQLKsK ............................hhFoK+FFaFus.-A....VE..sKpLosYhps.....EK.....ps.AHsssAWASQTGKGLLFauK.+spDKu.sPsGlINLAD..s..o..-lsp-G.usc...FpF...ph.pGpKHoFpAussuERDsWluslcsK................................... 0 10 27 47 +15260 PF15407 Spo7_2_N Sporulation protein family 7 Coggill P pcc Pfam-B_51974 (release 26.0) Domain Spo7_2 constitutes a different set of fungal and related species from those found in Spo7. This domain is found in general at the N-terminus. In many members the domain is associated with a Pleckstrin-homology - PH - domain. 25.00 25.00 26.10 45.50 24.20 24.60 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.49 0.72 -8.97 0.72 -4.51 30 67 2012-09-10 15:57:22 2012-09-10 16:57:22 1 2 66 0 46 70 0 65.60 35 6.14 NEW cthsIP+sSFTAhRLsYsSspcLShoS+slhLGsIPphWhps+ppshh+sh..hph.sppps+ppspssp ......p..hslsppSaTA.+Lp.........asSPpcLphoSRphFlGsIPctWhps++cpha+sh..hph.pphpsRptohss.p... 0 6 20 38 +15261 PF09061 Stirrup Stirrup Sammut SJ Bio::Pfam::PfamLiveDBManager=HASH(0x4ef6130) pdb_1dq3 Domain The Stirrup domain, found in the prokaryotic protein ribonucleotide reductase, has a molecular mass of 9 kDa and is folded into an alpha/beta structure. It allows for binding of the reductase to DNA via electrostatic interactions, since it has a predominance of positive charges distributed on its surface [1]. 25.00 25.00 25.20 150.50 24.60 23.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.02 0.72 -9.62 0.72 -4.07 3 5 2012-09-10 18:18:52 2012-09-10 19:18:52 1 3 5 1 3 5 0 79.00 80 4.54 NEW GuFGLoLNFNAFKEWAS+YGVEFKTNGSQTLAIIsNEKVSLGQWHsRGRVSKAVLVKMLRKLYEsTKsEEVKRMLHLIE GsFGLoLNFNAFKEWAS+YGVEFKTNGSQTLAIIssEKISLGQWHTRGRVSKAVLVKMLRKLYEATKsEEVKRMLHLIE 0 1 1 2 +15262 PF15408 PH_7 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:P78953 Domain This Pleckstrin homology domain is found in some fungal species. 20.00 20.00 20.10 20.20 19.90 19.70 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.44 0.72 -10.23 0.72 -4.15 2 2 2012-10-04 00:02:25 2012-09-11 13:50:59 1 1 2 0 2 11 0 103.00 27 10.99 NEW EGYLYh.Ecuulp+RFshLpuK.hsh.t-KGG..LsoFpLppolluhPhsphppAssN.GlsA.GhLhhusstc+lphFAsStcshppWlpshNp.uhtls+uo ..........EGYLYh.....E..cuulp....+R..FshLpuK.hsh.t-KGG...L.soF.pLppolluhP.hs.phppAs.sN.....GlsA.Gh...Lhhuss.tc+lphFAsStcshppWlpsh.Np.uhtls+uo........ 0 1 1 2 +15263 PF15409 PH_8 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:O13944 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.43 0.72 -9.96 0.72 -3.91 11 458 2012-10-04 00:02:25 2012-09-11 14:08:28 1 13 185 0 226 895 10 89.10 43 10.24 NEW GlLLKKRRK+hQGas+RaFsLshptuTLSYa+sssus..sLRGphPlslu.lIuANtcsREI.IDSGhEVWpL+ApNcpDFpsWscAhp+s+ ..............GalLKK.R+h....LKGW+KRaFsL...-s..GhLpY...h+...sp...pc..h.........+l..+....Gu....I.s......l.u....Lu....lh...S.....l.p....K....+.u...+...c........I...-...L....D.o...t....p..c...I...YHLKspopc.FptWVspLcp..pp.......... 0 44 84 151 +15264 PF15410 PH_9 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:O13690 Domain This Pleckstrin homology domain is found in some fungal species. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.46 0.71 -3.95 16 917 2012-10-04 00:02:25 2012-09-11 14:15:48 1 73 189 3 531 1121 1 111.50 33 7.42 NEW YKpGalhRKhhh-ssup+sPhG+RuW+hhYshL.+shVLYLaKs......Ep.................thcsuphpp..o...........hpNu.....IplHHuLApp..AsD....YsK.KpaVF+LpTA-tupaLFQTssscEhQpWlcsINaVAA ..................................................................cGhLtRK.p.c..h-s..s..s.K+..s.........upR..uWcph....YsV.L..cst....LhhaKD.......pp.............................................................................p..tpu.h.st.....t............................................hcps........lsl....+.c.A.lssh......As-..........Y.p..K...+paVF+L.....p.hs...D..sspaLFQApsp--MpsWlptI.................................................................. 0 135 197 357 +15265 PF15411 PH_10 Pleckstrin homology domain Wood V, Coggill P pcc Jackhmmer:P40995 Domain This Pleckstrin homology domain is found in some fungal species. 27.50 27.50 27.70 27.50 27.10 26.80 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.47 0.71 -3.91 39 249 2012-10-04 00:02:25 2012-09-11 15:43:54 1 24 148 0 185 253 0 117.60 29 12.77 NEW l-pFGpLLLaGhhsVh..K......s.......ct..E+EaclYLFEclLLCCKEhpss..+......ppsp....p.uht....p.t..........p....ppps+LpLKGRIahp.s.l.sclhsh.u.c........s..GpasLpIhW+uDst.l-sFhl+FpsEEphcpWtssl .............................ppFGcLhhaGphpVt..+.....................s.....pp..pE......hcsYL.....F..pchLlCsKEhpss...p...................pptp....ttt.t.t........................................tppps+hpLKGRIhhp..pl.pclhsh..s.p........................sphpLplth..............usst..lssFhl+aps-cphcpWppsl...................................... 0 52 101 157 +15266 PF15412 Nse4-Nse3_bdg Binding domain of Nse4/EID3 to Nse3-MAGE Wood V, Coggill P pcc PF08743 Domain This family includes Nse4 and EID3 members [1], that bind over this region to the Nse3 pocket, in MAGE family Pfam:PF01454 [2]. 18.50 18.50 18.70 18.50 18.40 17.70 hmmbuild -o /dev/null HMM SEED 56 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.16 0.72 -8.40 0.72 -4.12 87 205 2012-09-12 13:26:22 2012-09-12 14:26:22 1 7 173 0 145 200 1 64.60 27 16.30 NEW DuchLV.so-Lutc+sppLphs...ssuhDs-cFls+hhsaMttp..............ssttsssppp ....Du+hLV.AoDLutc+sppLp.s...tsuhDhccFlsphlsaMttp.ht............tp..........ttt.p.....pp........................... 1 37 65 107 +15267 PF15413 PH_11 Pleckstrin homology domain Coggill P pcc PF00169 Domain This Pleckstrin homology domain is found in some fungal species. 30.00 30.00 30.00 30.00 29.90 29.90 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.54 0.71 -10.50 0.71 -3.82 7 262 2012-10-04 00:02:25 2012-09-12 14:36:45 1 17 124 6 153 1119 8 108.70 26 18.63 NEW hcGalh....+pts+..hpsW+pRahlLc......sshhcYYcs.................................pcshhptshphuhphhplps.t....sshphphppphth.............s.tpphhhpscsp-pphsWlptlppt ..................pGhhp.....Kh..s....sp.....tc....ua+pRWFsLc.......-tpL.YaKs............................................................................................s......h.c..sh..t.p..........tp..s....t..h.....u.s.c..t..p..t........h..s....s..hpsh.........ssp..u.p..p....h....t.c.s..h..ol.......................................hTs.p.+.phhhpscocc-pt-Wlpulp..s................................................................................ 0 36 80 107 +15268 PF15414 DUF4621 Protein of unknown function (DUF4621) Coggill P pcc JCSG:Target_394740-GS13541A Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 350 amino acids in length. 27.00 27.00 767.00 766.80 21.10 18.70 hmmbuild -o /dev/null HMM SEED 329 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.06 0.70 -12.15 0.70 -5.75 4 5 2012-09-14 08:43:32 2012-09-14 09:43:32 1 1 5 0 1 6 0 329.00 99 93.47 NEW YDLSDVNTDDAVMGESWVAPLGTGYVTSDDVVNVEKVPSIREVDGAYVMIYDGEMKIKGKSLRAASsKVEIASEDITTGDIDGLFDGDFVLALTNPHITLKSNVKNASLDCSLSIEAENTSKKEATSSDFTLSTVSPNIWIGPLDPKTDAFKFVKNEKLPGIVQIVPQKIHLSLSADSKQWTNAPADALSELRYAVELPLTPAPEFSAVSVERIEDAFDEDFVDYIFSDGSARIYGEVTNEMPFDMSIEMVIMDENNVPVDIQFPAQEVKGQSGEVIFEITKEDMPKMKDARHIDLNLHLTGRDQGEALKKGQKTTFNLKLKKEGGISI YDLSDVNTDDAVMGESWVAPLGTGYVTSDDVVNVEKVPSIREVDGAYVMIYDGEMKIKGKSLRAASDKVEIASEDITTGDIDGLFDGDFVLALTNPHITLKSNVKNASLDCSLSIEAENTSKKEATSSDFTLSTVSPNIWIGPLDPKTDAFKFVKNEKLPGIVQIVPQKIHLSLSADSKQWTNAPADALSELRYAVELPLTPAPEFSAVSVERIEDAFDEDFVDYIFSDGSARIYGEVTNEMPFDMSIEMVIMDENNVPVDIQFPAQEVKGQSGEVIFEITKEDMPKMKDARHIDLNLHLTGRDQGEALKKGQKTTFNLKLKKEGGISI 0 0 1 1 +15269 PF15415 DUF4622 Protein of unknown function (DUF4622) Coggill P pcc JCSG:Target_390149-GS13960A Family This family of proteins is found in bacteria. Proteins in this family are typically between 348 and 360 amino acids in length. 20.00 20.00 21.90 495.10 18.10 17.70 hmmbuild -o /dev/null HMM SEED 310 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -12.03 0.70 -5.65 3 9 2012-09-14 08:50:26 2012-09-14 09:50:26 1 1 9 0 2 9 0 313.90 71 89.46 NEW GKVpV-sQLGGpYuIhS....PDAQTRAssssG.ssL+D....splhLLs-GSTLWLshhcpAKsGTTptT...QGYVVR.TGTGGssoLYPC.psDE.NGcl.I..su..SuTPLYLcAGTYpF+hISPAKAlsoDGKssIcNGEYLlATDsRYTQTtSTslsIotlsp....sNVQslhLNPIIsQTARMpFTI+uG-GVaTLEhLpAGIEISGIQpPlDs....TouaNWos...GDsLPsKlGDKpupVplPupphpTsA-GoLsG-TGVLPTDsRSTPVuVLlNltVNGVPTQYphLlsGpaLLsGHSYNYTsTVKIcsG.ITVlTWQNpSWTsDI- ..DRVRIDPVAGGYYPSIS....PSAQTRGATPDG.ETLKD....RPIFLLEDGSTIRLVVYDDAKNLLEEYS...KAYLVRNAGTSGSSLLYPC.EVDD.NGAV.I..SS..SSTPLYMKAGTYYFRILSPAKALNSKGFVNIGNGEYLLATDDRYTQTAMTAVTITKIDEGGTLNNVQTLYLPPIINQTARMQFTVRAGEGVHTLEMLAEGIEISGIQQPLDN....TTSFDWVN...GDVLPVKVGDQSASVRIT..pATpNADNSLVAHTGVLPTDARSHSISVLLNLKVNGNPTQYQMLLTGLYLTAGHSYNYTATVKISNG.VTVLTWQNRSWTENV.V 0 0 1 2 +15270 PF15416 DUF4623 Domain of unknown function (DUF4623) Coggill P pcc Jackhmmer:269656 Family This family of proteins is found in bacteria. Proteins in this family are approximately 470 amino acids in length. There are two conserved sequence motifs: HLL and RYL. 27.00 27.00 699.10 698.90 26.80 25.60 hmmbuild -o /dev/null HMM SEED 442 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.66 0.70 -12.49 0.70 -6.11 6 9 2012-09-14 09:54:18 2012-09-14 10:54:18 1 1 9 0 2 9 5 445.00 55 94.19 NEW -DYPtShs.uPY-T-LLSIKIlNAGssGspVVEGTIDE-sKpINFPRLDstTNFSAL+lEAcLSsGApLpppVhDhoM-EtsspKTlVLRVlNpsRYK-YhhKlRK+VPVaGADFEpPTVYsFS....GDs....lYssFsohLTRsAuFDGcaVLlVoRsss....tPHLLKVS-LKtGcIsPI.LDlTGVoGGTasYNMGALsNGHlYlASLSGups.SPLKIYYW-TPTSsPEVIAcINVusIPGAGsRHGDNhSlNLDcNGNGYIFFGDNAuoclLRlsluNaKols.s-spllPssscsshhoslaRltNTupYLaoGlchPloLlDpuhsspa...phs...ls...sEulAPRlhsFNpERYLIsCTAGpGuAoKAossLhVYDITKGsTlp-ALppF-pu-pHpPVYpFlLGGuGNsAPusQTsaYIEKDANGKDAKLhlFASRo-SGFVIsEFPlK. -DYPcSh..uPYDT-LLuIKIlNAGssGspVVEGTIDEspKpINFPRLDstTNFSALplEAcLS-GApLpppVhDhoM-t-sspKTllLRllNpsRYK-YhhKVRK+VPVaGADFEcPTVYsFS....GDN....lYsDFs.sh.hTRCAuFDGcHVLlVoRsss....tPHLLKVSDLKtGcINPI.LDlTGVoGGTFsYNMGALsNGHlYlASLSGu+s.SPLKIYYWETPTSpPEVIAsINVusIPGAGsRHGDNhShNlDcNGNGaIFFGDNAAochLRlsluNaKTVs.spsplLPucscsshsoNlYRltNTspYLaSGlchPlTLlspuhsppa...p.s...ls...sEAVAPRlhsFNpERYLlsCTAGhGuASpAo.sL.VYDloKGsTlpEALc+FDpu-pHpPlYpFhLGGuGNGushsQTsaYIEKDtNGKDAKLhlFASRosSGFVIsEFPlK.. 0 1 2 2 +15271 PF15417 DUF4624 Domain of unknown function (DUF4624) Coggill P pcc JCSG:Target_390388-GS13780A Family This family of proteins is functionally uncharacterised. This family of proteins is found in bacteria. Proteins in this family are approximately 150 amino acids in length. 25.00 25.00 122.10 112.60 22.40 21.40 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.80 0.71 -10.48 0.71 -4.54 4 12 2012-09-14 09:57:47 2012-09-14 10:57:47 1 1 11 0 1 10 0 126.80 68 85.31 NEW KsNtsphNsu-stEstpholEMEhstNYssSDPF.NuRLFCVScDl-sLsAEloFQMDG-pGIVEIKDpcoD-VLWSNsWcG+VsuDTaolSLsNLpK-KEYsVpFTGTKINHAlVcVSFESsLVpEKERPS KoN-S+hNshsscEoupTTIEM-LDKNYDTSDPFVNuRLFCVSpDIDlL-sElSFpMDGDSGIVEIKDNKTDEsLWSNTW+GpVssDTFoISLsNlQKEKEYslpFTGhKINHAVVKVoFESsLV+EKE+PS. 0 1 1 1 +15272 PF15418 DUF4625 Domain of unknown function (DUF4625) Coggill P pcc JCSG:Target_390125-GS13882B Family This family contains a likely bacterial Ig-like fold, suggesting it may be a family of lipoproteins. 27.00 27.00 27.00 27.00 26.90 26.50 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.81 0.71 -4.03 44 136 2012-10-03 16:25:20 2012-09-14 11:44:52 1 3 77 0 34 134 4 125.00 28 71.81 NEW sSC.sc--..........Ds.pcPsIsht................PpssphhptGcs..lpFchploDsttLuuasl-IHpNF.DtHoHsspstts...............spsasappsa...........slststp.....shph+pcIsIPs-.....sssG-YHhhlplTDpsGpppht.shsIpIp .........................s.uCsps-p......DsppP.hIphh.................pssphhphGsc..lHhc.hplss.sstlpshpl-I.......HsHstp...................stsasappsa...........shsstpp.....shpa+ccIsIPus.........sssGcYHhhlhlsDtsGspsh.hstslpl......... 0 19 30 34 +15273 PF15419 LNP1 Leukemia NUP98 fusion partner 1 Eberhardt RY re3 Jackhmmer:A1A4G5 Family This family of proteins includes leukemia NUP98 fusion partner 1, the gene encoding this protein is involved in a chromosomal translocation with the NUP98 locus in a form of T-cell acute lymphoblastic leukemia [1]. 27.00 27.00 56.30 56.10 19.30 23.70 hmmbuild -o /dev/null HMM SEED 177 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.76 0.71 -11.33 0.71 -4.18 12 48 2012-09-14 13:29:50 2012-09-14 14:29:50 1 2 25 0 18 37 0 150.80 60 94.99 NEW DDDDV...SFAKWMSSFWGHSWh-EcE+tlRc+..+puQpsupR+sSLPCP...h..ssL.........P....................phhsS-ph...................PRRHS+EDQsFRs+sHh+shRcsSsDuSF+-PhcscsRSHS.IQcFSESFEQQLChRTKRSVSLuPEuRKERpEREsLRh.ch+S+KKscERRsS+KEEctEA.hssLhcK .DDDDV...SFAKWMSSFWGHSWp-EcpRuLRc+..+psQssscRKsSLP..CP....h...shh..........P..............+h.SSDph.PRRHSHEDQcFRC+oHh+shRchStDuSF+-PhcscsRSHSKIpcFSESFEpQLCFRTKRSsSLGPEuRKERsEREpLRh.ch+S+KKscEcRsS+KEEcuEAhMusL.EK... 0 1 1 6 +15274 PF15420 Abhydrolase_9_N Alpha/beta-hydrolase family N-terminus Eberhardt R re3 COGs (COG4425) Family This is the N-terminal transmembrane domain of a family of alpha/beta hydrolases which may function as lipases. The C-terminal domain (Pfam:PF10081) is the catalytic domain [1]. 27.00 27.00 37.50 28.00 26.00 26.20 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.45 0.70 -11.14 0.70 -4.58 27 350 2012-09-14 14:16:58 2012-09-14 15:16:58 1 4 283 0 95 257 8 196.00 35 35.63 NEW LTPSLlPRsalhQGlluGlshAlGYulGs......hhthlh+hhtlsphtshssp.....thphshsssssssslhhlh.ustWQsslpslMGhtshsshshhtsshlAllshssllhluRLlpthh+hlsthltRh..lPtcl...utslGlslllsLhhslssGVlhRthhpshspsapthsthhcsssstPppPt+SGSssSLlsW-sLGppGRsFVuuGP ............................................hTPSLlPRs.hhQullsGhstAhGYulGs.........hhhhlhc...hh.h......p.t....s.p...................................hhh.shhshsshhhlhhh.lhs.s....tWQcslpclhG.ls.phsh.h.shsl.sshlullshsshlhluRhltth.hR....hLsp.t.lpRh....ls.t..l...usslslslslsL.hlh.l.hNsVll.+tshsuhspshutsNs.psssspssPs.oPhRSGSPuSLhoW-u.LG+pGRsFVssGP.............. 0 22 67 84 +15275 PF15421 Polysacc_deac_3 Putative polysaccharide deacetylase Coggill P pcc JCSG:Target_416920-SP13771A Family \N 27.00 27.00 28.00 84.50 26.10 23.50 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.25 0.70 -12.26 0.70 -5.79 8 18 2012-10-03 16:37:10 2012-09-14 15:30:07 1 3 16 1 3 20 1 428.50 40 52.98 NEW uEIsIshsssl.stsslslEIPPLKYNKchlhhhTQDDCp.AAashTWAAINGKPlosp................haacls.......HLptsDhP......PDhYhhs+sLusTDGsGpEVRFuhTsolus-...-pYMspcstlphGaTp-YaRFah+shlhWc-l+-hhNYGsuIAaHDVssh-h-ss.splht+aslupsl.Ihc+LsGRsCKhLuE.PNGs+sYlsAAhspDsIQTITAQuGs...hKlYPFpsstsLpK..sslpRhFhsss-.........h+psItcphu..pssE-RpAI..sIGsHs.......TDusWss...hLhWLNssYG+cGsDslWFPspEEYYEY.YhRpaopss.Kplspssl+LTVplPutcs..FYYPSlTlNLsGlp.h-hsSlpSs-sVTGLSYu......stcstlMlNlDCR+hLsEHAE+FVcpYEAs.ostus+ADAhYFVsMLK-SsKKstLhpRIp ..sEIsIchpssl.stptl.p..lEIPPLKYNKchlhhhTQDDChpuAashTWAAINGKPlSpp................haacls.......HLptsDLP......PshYshs+oLusTDGsGsEVRFuhTsTlus-...-paMspcohlp.GaTp-YaRFah+shLhWc-l+EhhNYGsuIAaHDlpsh-hcss.splhtcasluQsh.Ihc+LsGRsCKhLuE.PNGs+sYl....pAAhshDsIpTlTAQuGs...hKlYPFpsstsLpK..sslcRtFhsusc..........hKpsItc.hp...Ps.E-RpAI..sIGsHs.......TDspWss...FLhWLNcsYG+DG-DSlWFPopEEYYEY.YYRhpophs.KplsssolKLplpLPup.cs..FYYPSlTlNlpGlphh-IhSlpSs-sVTGLSYu......sacstlMlNIDCR+hLhEHAppFVcpYEAs.ost.s+ADAhYFVsMLK-SsKKstLhpRIp..... 0 2 2 3 +15276 PF15422 DUF4626 Domain of unknown function (DUF4626) Eberhardt RY re3 Jackhmmer:A1A4T8 Family \N 27.00 27.00 57.80 57.70 25.60 22.10 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.36 0.71 -10.91 0.71 -4.22 3 11 2012-09-14 14:52:08 2012-09-14 15:52:08 1 1 8 0 2 6 0 102.50 74 87.51 NEW MTGRMATLEKSHSSACWRKRSSRTCVEPDRTQDAIHEPRGLSRSHTVLRHRHFVFLPLSSGA+PSVPPR.....shhtscssGslphssPu.hhhhps.hhpthpphhh..hL....tKQhAcscsppPRuspSlhcsAD+LsW hTGpMATLEKsHoSACWRKRSSRTCVEPDRTQDAlpEPRGLSRSHTVLpHRHFVFLPLSpGA+PSVPPR.....s.hh.c...............................................................h. 0 1 1 1 +15277 PF15423 FLYWCH_N FLYWCH-type zinc finger-containing protein Coggill P pcc Jackhmmer:Q96CP2 Family This family is the N-terminus of some FLYWCH-zinc-finger proteins, found in eukaryotes. The family is found in association with Pfam:PF04500. There are two conserved sequence motifs: EQE and QEPS. 27.00 27.00 29.90 29.80 18.80 17.80 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.57 0.72 -10.40 0.72 -3.88 10 49 2012-09-14 15:06:59 2012-09-14 16:06:59 1 5 20 0 23 51 0 98.40 64 23.19 NEW MPLPEPSEQEGESlKAGQEPS...scPGT-VVP..AAPRKPcEFScLVLLTsSpps.sDsssocPp-VHCVhSLEMuuPATLAs.TLQILPsEEQspllQPsPp.sEQK+SKlD ..MPLPEPSEQEGESVKAGQEPS...PcPGTDVVP..AAPR...K..PRcFSKLVLLTASpps.tcssGuK.ptVHClhSLthsGPATLAp.sL.hl.sEtQ.RslpsuPptPEQKRSK.D..... 0 2 2 2 +15278 PF15424 ODAM Odontogenic ameloblast-associated family Eberhardt RY re3 Jackhmmer:A1E959 Family \N 27.00 27.00 43.60 43.60 19.10 18.50 hmmbuild -o /dev/null HMM SEED 264 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.86 0.70 -11.82 0.70 -4.90 12 37 2012-09-14 15:07:17 2012-09-14 16:07:17 1 1 24 0 16 27 0 246.60 65 94.37 NEW APLIPQRLlSASNSNELLLNLNNuQLh....PLQlQ.GPhNSWIPPFoGlLQp.QQQAQIPGLoQFSLsoL-.pFAGLhPNQlsFPGQsuFAQGsQssQlDPSQ.QTPsQTQQGPspVMPYVhSFKhPQEQuQMhQYYPVYMLLPWEQs.QTsspSP.QTGp.Q.F...EEQlPhYspFGYIPQQsEPshPGGQQQhsFDP.hlGTAPEhAsMPsttVlPYLQKEhINF+HssAGlahPSTS..KPSTsshFTSulDP..TIsPchhEcKAKTDSL+EP ......APLIPQRLhSASNSNELLLNLNNuQLh....PLphQ.GPhNSWIPPFSGlLQQ...QQQAQIPGLoQFSLSsLD.pFAGLhPNQIPFPGQsSFAQGsQAGQhDPSQ.QTPsQTQ.GPs.p.VMPYVFSFKMPQEQuQMhQYYPVYMLLPWEQPQQTlspSP.QTtQ.Q.a....EEQlPFYsQFGYIPQ.AEPslPGGQQQLAFDP.hLGTAPEhAlMssGt.lPYLQKEsINF+HssAGlFhPSTS.sKPSTTNsFTSAlD...TITPcl.EcKsKTDuL+EP..... 0 1 1 2 +15279 PF15425 DUF4627 Domain of unknown function (DUF4627) Coggill P pcc Jackhmmer:217257 Family This family of proteins is found in bacteria. Proteins in this family are approximately 230 amino acids in length. There is a conserved WYK sequence motif. 21.80 21.80 23.80 23.40 20.30 19.20 hmmbuild -o /dev/null HMM SEED 212 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.47 0.70 -11.64 0.70 -4.82 3 20 2012-09-14 15:08:15 2012-09-14 16:08:15 1 1 20 1 2 15 0 213.20 76 92.30 NEW sGlpAQNLIKNscFD.T-LsTEhTsAspsTsG-WFAhNDEssGsTTISactTsDcK+GNAlcl..SussKssSWYKAFLGQRlpsGlEKGlYsLoFaAKAc-sGsQV+VFIR.....+ssNGKssspFFMRcsaDh..-SQPNpSuApYstsIKKA...GKWTKVolsFDFGKVVNAIuShKu..Ncst-VT-TDsssAhLKDFsIsIQ.SQoKsSsVLIDsVSLKK ....VSVSAQNLIKNEKFA.TEVKTKVTNANKATAGEWFIMNNEADGVTTIAWEpTGD..AKYPNAMKLDNSGAEKNlSWYKAFLGQRITDGL-KGIYVLTFYAKAKEAGTPVSVYIKQTNEEKNDNGKYNTTFFMRRDYDA..DAQPNASGAQYNFKIKDA...GKWTKVVVYYDMGQVVNAISSKKA..NANLEVSDTDDDAAILKDCYVAIL.SQNKGGVVEISDVTLKK....................................... 0 1 1 2 +15281 PF15427 S100PBPR S100P-binding protein Coggill P pcc Jackhmmer:Q96BU1 Family S100PBPR is a family of proteins found in eukaryotes, and localised to cell nuclei where S100P is also present, and the two proteins co-immunoprecipitate. S100P is a member of the S100 family of calcium-binding proteins and there have been several recent reports of its over-expression in pancreatic ductal adenocarcinoma. In situ hybridisation shows S100PBPR transcripts to be found in islet cells but not duct cells of the healthy pancreas. An interaction between S100P and S100PBPR may be involved in early pancreatic cancer. 27.00 26.40 48.50 26.40 26.00 25.70 hmmbuild -o /dev/null HMM SEED 386 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.46 0.70 -12.44 0.70 -5.39 9 68 2012-09-14 16:09:37 2012-09-14 17:09:37 1 3 29 0 29 67 0 252.40 51 84.12 NEW NAPF.SWu..SL..DEDtLDDSLLELS-GEEDDGHFSaTEEEIpELLKDDD.SsEpp.ht..hhcDD..uscsEKGE+tSQILLDTPQEKNSLYSLGPsAETPulaKLPQLSTSlGHGPoPoKsLNR+FsLEKNLIKlT.V.APFsPTVCDslLDKDKTD....SSKDTEK.......sSSLGE-hRE-sLusNESKLCTESEsISPsNSAW-GPshsS.SNssFpQTVSDKNhP-SK+PTPVFSQI.DHSEoPs.su.Sh+NuGSHKSusEhR.PVVSSSSp.K.csLDKDSGKhKGpERRLGKVIPVLQsKsRTNVsTFSQS-LEQQKQIYL+sVIAHIEDPhDSNQGsLGELhALMDQV....HHMQN.+WQHPSDLTMRNYARFRQKPLQRYSLTQWVDRNhRSHHRFQRL ......................................................................................................................................ssh.SWs..o...DEDtLDDSLLEhS-GEEDDGchsaTEEEIp.LLK-D................t..st...p.Eht.+tSpIh.-sPpE.N..S.hSLGPsAETs.shhKLPQLsss.upt.......sR..s.EKshlKlT.V.sPFpshlhDshLsKcch-....SSK-hpp........usht-phpcts.t.sptp.hot....pst...........................................................p.stthp..hspospt.p..hhsh.....s.tp..........h.pp+hupl.sh...sttR......hSpspLEpp+.p.YlppVhtHlpp..t.pps..hEL.sLhsp.....h..p....p..hQHPSDhThRNY.h.ppp.h..hSLppW....................................... 0 1 3 6 +15283 PF15428 Imm14 Immunity protein 14 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with mostly all-beta fold and several conserved hydrophobic residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI1 or Tox-HNH family [1]. The protein is also found heterogeneous polyimmunity loci. 23.30 23.30 23.30 23.40 23.10 22.90 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.59 0.71 -10.90 0.71 -3.74 70 172 2012-09-18 20:10:21 2012-09-18 22:06:20 1 3 152 0 38 165 2 121.50 18 51.88 NEW ..GDlFslsLs..sspauhGpll...shtths+............hhshhhsps.......p.hph.hpphp.shs..lcth.....hh..hhh.hcpshhpGc.....Wc.lIGppshtptptt........slhahh..shs................hpthp.h....sh...ph..p........t..... ........GslFshsl...tspauaGhll...shttht+............hhshhhshs.......p.hph.hsp.p.sl.s..lp.l...t.hh.sthh.hcphhhpGc.....a..IIGphshtptch.........sh.a.h..shs..............htthpth....s..ph..p......t.......................................... 0 15 24 26 +15284 PF15429 DUF4628 Domain of unknown function (DUF4628) Eberhardt RY re3 Jackhmmer:A1L170 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 152 and 673 amino acids in length. 26.00 26.00 27.60 26.20 19.50 19.10 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.10 0.70 -12.04 0.70 -4.97 10 42 2012-09-19 13:37:54 2012-09-19 14:37:54 1 2 32 0 24 34 0 220.80 60 57.31 NEW MFENhNsApsPKLQpS+ShspLo+ssususu.shGssEPuGPulhsGSSQHLK...NLGKAVGAKVNDFLRRKEPuuLGs.lGVhEVNKoAuApLuuusssspsshh....psctSh..EuFPRLDPPPPss+KRTPRALKTTQDMLISuQPVlSShEtuppsssupspco.spsps....sptsstpsE.ussppttpscsLsNG...................EsoL.SVPDLIHK........Dup--s+l+so-sR+uSoPs.hEpsGlKlSLS.hsLtE...psuuPsspuRTsSLDsEGPHPDLLSFE .................MFEN.Nss.sPKLQtS+ShspLo+ssusssh.s.ussEPuGPulhsGSSQHLK...NLGKAhGAKVNDFLRRKEPouLGu.VGVhElNKTAuApLuuGscsssssh.....pscpSs.pEuFPRLDPPPPlT+KRTPRALKTTQDMLISSQPVLSShEhusp.ssupsp-o.stsps.......t....c.sshth.tpscsl.NG...................E.sL.SlPDLIHK........-sp-.shhp.sp.R+sSSss.hcp.uhK.SlS.hpLhE...psssss.tsRssSlDsEuPHPDLLSFE...................... 0 3 5 10 +15285 PF15430 SVWC Single domain von Willebrand factor type C Coggill P pcc Jackhmmer:P0C5F3 Family SVWC is a family of single-domain von Willebrand factor type C proteins from lower eukaryotes. The canonical pattern of most von Willebrand factor type C (VWC) domains is of ten cysteines, however this family, largely but not exclusively of arthropod proteins, contains only eight. SVWC family proteins respond to environmental challenges, such as bacterial infection and nutritional status. They also are involved in anti-viral immunity, and all of these functions seem linked to SVWC expression being induced by Dicer2. 27.00 27.00 27.30 27.30 26.70 26.70 hmmbuild -o /dev/null HMM SEED 65 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.50 0.72 -10.59 0.72 -3.53 127 301 2012-09-19 13:48:04 2012-09-19 14:48:04 1 5 52 0 121 307 0 67.30 24 53.12 NEW Ch..a.p..sp..hlss.Gtt.....hp.....pp..sCpphpCp.....pp..tplp..lpsCsth..s....st..sC...phtp...sssts.a.PpCC...phhC ........................C.hp..st...hltsGpp.....hp......ps...CtphpCp......pstplp..lpsCshh..s......sssC...chts.......shsts.a.PcCCs..phhC........ 0 34 41 86 +15286 PF15431 TMEM190 Transmembrane protein 190 Coggill P pcc Jackhmmer:Q8WZ59 Family \N 25.00 25.00 89.10 80.50 23.90 21.60 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.72 0.71 -4.23 3 18 2012-10-03 17:26:12 2012-09-19 14:49:08 1 1 17 0 10 16 0 125.60 72 76.72 NEW GNGIQGFFYPWSCEGDVWDREACGGQAAIENPNLCLRLRCCYQEGICYHQRPDENMRKKHLWTLSLTCuGLLhLIFLICFFWWAKRRGLhKpLKMPGhLS+hKKsKlSRsVSpoSssphoLpcKcpSPLLus.sG ......GNGIQGFFYPWSCEGDVWDRESCGGQAAIEsPNLCLRLRCCYRDGVCYHQRPDENMRRKHMWALGWTCGGLLhLIsSICLFWWAKRRDh...LHhPGFLtG.+CD.LS+oVSLLSKcRGTtcp...........susss....... 0 1 1 1 +15287 PF15432 Sec-ASP3 Accessory Sec secretory system ASP3 Coggill P pcc Jackhmmer:Q9AET7 Family Sec-ASP3 is family of bacterial proteins involved in the Sec secretory system. The family forms part of the accessory SecA2/SecY2 system specifically required to export GspB, a serine-rich repeat cell-wall glycoprotein adhesin encoded upstream in the same operon. 27.00 27.00 39.30 39.30 26.00 21.80 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.00 0.71 -10.50 0.71 -4.18 38 390 2012-09-19 13:50:34 2012-09-19 14:50:34 1 2 375 0 22 205 2 123.30 46 50.80 NEW oYhYGSplpapspspVpFpNsLhPSGphI+sWtStsNaQu.sRtsPsLPLL++GppYclphsh-sp..Ppsulal+lhFaD+hscplpphlh+spp..hpFsYPccAYsYpIpLlsAGhpplsF+plpIp-hp ......TaMYGSpVsaps.scVpFhNPLMPSGhsIHpWh.hppFpp..h+.osPsLPlL+RGpcYpLphsF-sp..PtsoVYhhIhFas+pupcLuppIlKsps..hshpYP-EAYuYclphhsAuspSLhF+sloIpEh.s....... 0 7 8 18 +15288 PF15433 MRP-S31 Mitochondrial 28S ribosomal protein S31 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q92665 Family MRP-S31 is the mitochondrial 28S ribosomal subunit S31. This family of proteins is found in eukaryotes. Proteins in this family are typically between 246 and 395 amino acids in length. There are two conserved sequence motifs: RHFMELV and GLSKN. 27.00 27.00 30.40 30.40 20.30 25.20 hmmbuild -o /dev/null HMM SEED 298 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.28 0.70 -12.18 0.70 -4.99 29 122 2012-09-19 13:52:14 2012-09-19 14:52:14 1 6 87 0 76 120 0 254.30 40 76.53 NEW spccLhslltuMKl-.lssp......p.h......pps.c..tpp.h.cshpsssuhhp+spp-us...p.ps-slss-LlsAApsVAsoLs...s-+p.pTESELLppLhpHcspspspppt....................pppcls.......hu.slIusMK.......ls+ssst+...hss...................Rssp...pIpFc-p.tcshs....pcc........c.stsh+p..........chsLFsG+pLsIFssts....cpsspstst.olW-h.ht+pLuhsspQPPtNtFEEMIQWTcpGKLWcFPIsNEtGh--E.ss.cFpEHIFL-+HLE.sa.P+pGPIRHFMELVssGLSKNPYLoVcpKhEHItWFR-YFppKcDlLKEs .........................................................................................................................................................................s...................................................tt...t.t.hpct.t.t.......tc..shs.plltAspslAs.Ls.....cpp.p...scSELLtpL.t+pptststttt.............................................................................tt.ph..p.......hp.pll.sshK.......ls+ptt.p....t.........................................p.p........p.phcpt.pt......tpc...................p.t.hpp.....................................phslap..u..c...Ls..IFp.tt.....t.....s.st.shWsh..t+...pLsh.hs.........p.ps..s.tNtFEEhIpWTcpGKlWcFPIsNEt.Gh---.ss.pFpEHlFL-cHLE..sa...P+pGPIRHFMELVssGLSKNPYloVppKh-HItWFRsYFppKc-lLp-........................... 0 27 33 55 +15289 PF15434 FAM104 Family 104 Coggill P pcc Jackhmmer:Q969W3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 113 and 185 amino acids in length. There is a conserved SLQ sequence motif. 27.00 27.00 43.40 43.30 25.20 25.10 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.85 0.72 -10.57 0.72 -3.86 10 68 2012-09-19 13:53:57 2012-09-19 14:53:57 1 1 32 0 30 93 0 113.20 66 80.69 NEW RKRRR............sGsEEDsalsPQsKRss......+NslhQDsaDTE.....................SSSSDstp.......SSSuINSP-RA...........SG...PEsSLNQhssssusNh.PQ.hp.EpSAlC..QGPYsHINQlLKEAHFaSLQpRGRP.PT ........................RKRRR.NGsEEDNHlsPQoKRSS......RNPlFQDSWDTE.....................SSuSDSGG......SSSSSINSPDRA..............SG...PEuSLsphhsGSuPNT.PQsh..EQSALC..QGsYFHINQTLKEAHFHSLQHRGRPsT.... 0 2 4 9 +15290 PF15435 UNC119_bdg UNC119-binding protein C5orf30 homologue Coggill P pcc Jackhmmer:Q96GV9 Family UNC119_bdg is a family of eukaryotic proteins that probably plays a role in trafficking of proteins, via interaction with unc119 family cargo adapters. The family may play a role in ciliary membrane localisation. 27.00 27.00 221.20 221.00 18.30 17.10 hmmbuild -o /dev/null HMM SEED 197 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.26 0.71 -4.96 5 39 2012-09-19 13:59:18 2012-09-19 14:59:18 1 1 37 0 25 31 0 196.10 83 95.14 NEW MDINGsSRoslSlLP..AAElpSTLKPEAEKPRCSSTPCSPIRuTVSGYQILHMDSNYLVGFTTGEELLKLAQKWSsG-ssKu...EAhPS....slsKsVDlGLHRSSRIYKuKSRYYQPYDIPAVNGRRRRRMPSSGDoCpKSl.Pa.EPtKALHGPLPLCLLKGKRAaSKSLDYLNLDKMsIKESuDTEVLQYQLQHLTLRGERV .........VDINGESRSsLoTL..PhPsAEssSPGKAEAEKPRCSSTPCSPMRRTVSGYQILHMDSNYLVGFTTGEELLKLAQKCTGGEESKG...E.Ah.Po....LRSKQLDuGLARSSRLYKTRSRYYQPYEIPAVNGRRRRRMPSSGDKCTKSL.PY.EPYK....ALHGPLPLCLLKGKRAHSKSLDYLNLDKMsIKEPADTEVLQYQLQHLTLRGDRV 0 1 3 10 +15291 PF15436 PGBA_N Plasminogen-binding protein pgbA N-terminal Coggill P pcc Jackhmmer:O25249 Family PGBA_N is an N-terminal family of bacterial proteins that bind plasminogen. This activity was identified in In Helicobacter pylori where it is thought to contribute to the virulence of this bacterium. Both PgbA and PgbB are surface-exposed proteins that mediate binding to plasminogen such that it can be converted into plasmin in the presence of a Pg activator. 27.00 27.00 69.90 69.60 23.00 22.10 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.61 0.70 -11.48 0.70 -5.07 27 196 2012-09-19 14:39:47 2012-09-19 15:39:47 1 5 194 0 21 117 2 213.50 51 74.30 NEW hpslcsslhtV-stt.sh.lphpu.clpVGpSGhVlpph.ss.pssIlApAsVhph..p..sGhApl+apsF-sLpQcALPpPphhPptGDcllLsahYsRullIAPsp-hYpplpssas.slpFlHsDlhuuaL...ss....sPp+cDF++hCst.ulGLlalshssch.hlDCpSFplLp.pphs.ts.spphphPFYSRlpsI-sshas......hsu.pchpsY.sYYcpLl ....................l.+PlKscLlcVDDhh..Ga.I+.DSsDIKlsSSGVVlp+F.ssspSIIARAuVIsK..c..sGlAKLcFoVFsuLKQ-ALPLPsllP+tGDEVVLNFLYDRuLlIAPD-pTYsclstsFP.pIhFsH.DlhGApLhhsss.h.uPKRuDFRcFCs-sAVGlLhlAL-NpupllDCQsFshL..hEls.lScsossQlPFYSRIuGh+osFFD......FNS.pclsNYYcYYDALl................... 0 5 17 21 +15292 PF15437 PGBA_C Plasminogen-binding protein pgbA C-terminal Coggill P pcc Jackhmmer:O25249 Family PGBA_C is an C-terminal family of bacterial proteins that bind plasminogen. This activity was identified in Helicobacter pylori where it is thought to contribute to the virulence of this bacterium. Both PgbA and PgbB are surface-exposed proteins that mediate binding to plasminogen such that it can be converted into plasmin in the presence of a plasminogen activator. 25.00 25.00 27.10 25.60 21.50 20.60 hmmbuild -o /dev/null HMM SEED 86 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.31 0.72 -3.85 7 118 2012-09-19 15:07:48 2012-09-19 16:07:48 1 5 44 0 3 126 0 85.50 60 25.41 NEW hcEssspsNuo.....ppptpcN.....APs.KEsNA.KEtsKLssKEEKRRLKEEKKKAKAEQRAREFEQRAREHQERDEKELEERRKALEhNKK .................................t......ppspts.....ppptscN.....APs.cEsNspKtEpKLsuKEEKRRLKEEKKKAKAEQRAREFEQRAKEHQERDEKELEERRKALEhsKK.... 0 3 3 3 +15293 PF15438 Phyto-Amp Antigenic membrane protein of phytoplasma Coggill P pcc Jackhmmer:Q7M1T6 Family Phyto-Amp is a family of phytopathogenic wall-less bacterial antigenic membrane proteins [1]. The bacteria are limited to the phloem and pose a major threat to agriculture worldwide. They are transmitted in a persistent, propagative manner by phloem-sucking Hemipteran insects. Phytoplasma membrane proteins are in direct contact with hosts and are assumed to be involved in determining vector specificity. Phyto-Amp is thought to be one family of proteins that mediates such specificity. The proteins appear to be encoded by circular extrachromosomal elements, at least one of which is a plasmid [2]. 27.00 27.00 48.60 47.80 21.50 20.40 hmmbuild -o /dev/null HMM SEED 195 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.77 0.71 -11.26 0.71 -4.67 3 24 2012-09-19 15:22:09 2012-09-19 16:22:09 1 1 22 0 3 23 0 179.30 70 82.31 NEW MQNQKTQKSLVAKVLVLFAAVALMFVGVQVFADsPLDLSTLcC-ssLELTANDASDAEKVVsQWKVQNTSLDKKVTKDSVKVtVADNKVTVTPVDuSATSALoGSKVLoLVGVCELNKLTLGTDKKLTLTVKDDKVDAEAGLKALKEAGAKVPATVTKDDLTFTVGKGDDANKVTVKAVDGKTTVSGQVTFEFNV MQNQKsQKSLVAKVLVLFA...AVALMFVGVQVFA.DDKLDLuTLEC.KssLELTAA.DAuDAEKVVKQWKV..QNT..SLsAKVTKDSVKVsV....AD.NKVTVTPADuDAuKALoGSKlLsLVGVCELNKLTLGTDKKLTLTVKDsKVDAEAGLKALKEAGAKVPATVsKDDlTFTVGKsDsANKVTVKAVDGKTTVSGpVsFEFsV...... 0 1 1 2 +15294 PF15439 NYAP_N Neuronal tyrosine-phosphorylated phosphoinositide-3-kinase adapter Coggill P pcc Jackhmmer:Q6ZVC0 Family NYAP_N is an N-terminal family of eukaryotic proteins that are substrates of tyrosine kinase in the brain. When first identified, the family members were referred to as unconventional myosin XVI, or Myr 8 [1]. However, proteins have now been identified as being integrally involved in neuronal function and morphogenesis. The family is involved in both the activation of phosphoinositide 3-kinase (PI3K) and the recruitment of the downstream effector WAVE complex to the close vicinity of PI3K; it also appears to regulate the brain size and neurite outgrowth in mice [2]. 27.00 27.00 137.40 34.80 20.80 19.70 hmmbuild -o /dev/null HMM SEED 379 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.86 0.70 -5.28 20 174 2012-09-19 15:53:14 2012-09-19 16:53:14 1 16 37 0 86 128 0 321.70 41 37.94 NEW AREsDRlRschstshhpcphp.t...stpp-........ssK.tp-tsuht.............RHFR...ushshPhu.Dplsps......su.u.uhRS.SLHSVhSh.D-uuuhs...........SsRKQPPPKPKRDPsTRLSAShEA...........Vu..AsLss......usKcuupt.....hs+PRPHS...D-..ohKKIPP.KPKRSPNT+LSuSaEElsuts...P...u.....spsu.ss...........sht.ph.spsspsssp....................pp.cs.E-EPVYIEMVGssh+stu.........................scuss-puEAVYEEMKY.hs.E-ss..sts...shssuS.Pshpsp..p.hs..h...psth.sttpus..ppssCDIPsPFPNLLsHRPPLLVFPPsPsTCSPsSDESPLTPLEVpKLPVLEs.............s.pashQssuSSP.Ss.Qhstsp.Kusss....Psus.shsshss..usRspSpuTPhP.Pp ...........................................................................t+Ep-thRpp.p.sh.tt..t.....th.t...........sh+.ht..-.tsht.............+Hh+h..u.hohPs.s.-p....h.....sps...........ss.u.usRS.Sh..HSVtuh.Dss.uuhs...........tsp+.pPPsKP+RcPsT+LShS.cs........................................ss..uphss...................................spptssp..........+PpPcu...c-....+KlPP.KPKRsPNTpLSsSa-E.hh.t.......s.........stt...s...sh...........sht.t.....upsss........................sp--EPVYIEMVGshhRsht..........................tpps-puE.ulYEEMKYsl..--hs.......t....s..t...hss...........sS.....s.p.p..................s.h.t...ss..pt..C-IPPPFPNLL.HRPPLLsFPsusspsSssuDpsPlsPlpVp+LPVLp..............s.pa.c.pssuuoP.u......phstppptps.........tsshhh.ss....ssps.opusP.Ps.s......................................... 0 3 11 30 +15295 PF15440 THRAP3_BCLAF1 THRAP3/BCLAF1 family Eberhardt RY re3 Jackhmmer:A2AJT9 Family This family includes thyroid hormone receptor-associated protein 3 (THRAP3), which is a spliceosome component and a subunit of the TRAP complex which plays a role in pre-mRNA splicing and in mRNA decay [1]. It also includes the transcriptional repressor Bcl-2-associated transcription factor 1 (BCLAF1) [2]. 27.00 27.00 27.30 27.30 25.60 25.60 hmmbuild -o /dev/null HMM SEED 646 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.67 0.70 -13.46 0.70 -5.86 22 245 2012-09-20 09:01:36 2012-09-20 10:01:36 1 8 42 0 94 181 0 501.30 34 75.82 NEW uRSRSRSP..K+RSh.SPtsR.spp+..acp+pSps+h.ssp.......R.....+csppss.phR.hcs-K.pspscp+hssctsst..h+ph..E..c+ususshc.tsh-shhs.ps.p..................p.sh.Pt...tsD.sspRuph.Pphsps.sshccppps.......shs.psps+hh.sshp.ptstpst+ssp.........RSh......usS......+apsp............h+.........psphc.tpph..pppphspS..+cu....s--hpsRoSFpK..RYP..E..Dp-hcphs+ppcRsp..DhE+h.........c.....s+cssR.....ssch......c.cc.uhss...hp-cc...cphshu.psc.RhstR-h.-p.....ups..sK......ohDYc+K++p.hs..cs-pcFsDs........csp+hsccEDpKappptt...s.hs.+.EussFs......ssRsccoc-tps........+c.s.K...scKcshsps....p..ssps-lchcssppKhcp+hc..pp....shR+p.ssspppphspspch..sthKsSssph+c.....cplslKlDst..shDphR..suuShssERphu+DLVtsu+KpppF+slF-HlcS.sQs.pppPotpFsQcIlTIlHpVKtpYFsSsshTLpERFo+hp...css..p-s.+pppsPEIHRRIDhS.upLpp+pph......hpE.......oppshhK........h...DPsDLRHDIERRRKcRhpsc-c+thp......hsSuspRspppS..phpph.ps.cs-tFpKss+hh.+.sphRph.ppPpps........oppppcshpp...ctathcsppcs..cp..ppsFc ..................................................................................................uRSRSRSP..K+RS..S.tsR.spp+..p..+pS.....p..s....ppp.....p.......sp.ss.p.R.hpppK.ptptt.t.........s.ptps....tsh.p...ppt.psp.p..s.ts..php..p..................t.t..st.....ths.sp.pu....s..tt..s.pppp.p.......s.s....s.....s..s..p.t..tsh..tts..p....................puh..................ttu.................phpsp......................p............ps.hp....h..p.tph.ts..pt..............pt.tttuua.........Y...-.....-pcht.......phtch.p+.p..-..cp..................p+tshp.....p.th......................t.c.s.........tc.pp...ths..st+.t..+p.t..pp......sph..tc........p...D.pphcp.t..hh...tpptc...st.............t..p..s.tts.+.p.t.t..........hp.h.E..hht..........ptct.chpp.t.....................tt+pp.t.tt....p..s....h..ph..........ph.......st........c.p..p.......hp...s.s.pp.h....s.ttp.....s.hc....S......h+..........tp.pV+...hc.h..s.D..p...............su..hsp-R.hspsLVtss+Kpp.pF+slFpHlp..sQs.ppssSc.FhQcIlollH.pVK...tpaF.SsshTLpERFoph.......+usppc...p.ppsPEIHR..R.IDl.S.ushpp+.thh......htE........ppts.K.............h.tDssDLRhDI-RR+K....c+.pp...cp.hp......htSs.s...pRppppo..p.pp......cscthpK..p...p...........tp...s...........p.................................................................................. 0 3 12 34 +15296 PF15441 ARHGEF5_35 Rho guanine nucleotide exchange factor 5/35 Eberhardt RY re3 Jackhmmer:A5YM69 Family This family includes Rho guanine nucleotide exchange factor 5 [1] and Rho guanine nucleotide exchange factor 35. 27.00 27.00 69.70 36.70 23.10 18.80 hmmbuild -o /dev/null HMM SEED 487 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.14 0.70 -12.70 0.70 -5.69 11 32 2012-09-20 09:25:48 2012-09-20 10:25:48 1 5 22 0 18 32 0 418.10 51 33.63 NEW MEAEEPpHGASTPlPAlcEhullPEulMRSSQIPAL-PEAQEGpDPSapWsEGHRLltsQp+-LRDssDaAs-Shs.FPK.EuSsssEosQEsLVAEAsD.TPEpQEuVsQSLsDcpARTlAsPEhhACPlQuEaLDh.sshSocLsoR.VEsEhpPELTSLsLusupA-E..cEEsSPssSspstaaPsCc.cHPuETsps.csutussl+QGEcLQhctsQES...p...G...L...LpPQEAQGLEEQttQEsthQtEGTlpEslCsDGLhGE.p.QhsEp.ssGsEtEQ+QKQEQlQ.D.sh.LG+QGEppGLs..GELEGLssuEhs.EppEpcspspcssEptEppcpEhpuPEEpcssoQstEsQSLlEKSEcVotKQEscGlp...GcshsVEGQE.......E.......EEPGsWDuushushscppsspEpcE+cGPSsL.AhVAPEVsSPsDLFP-sShPhopIPGTQpEP..tAEELSPtALsPsLEPscWSaQPlS.PuSFPstESLDscT ..MEAEEsQHGASsPIsAltEhSlIPEA.MRSSQlsALs.EAQEscDPSYKWpEtHRL.tTQQp-LRDVsDaAhEoMsSFPK.EuSu-VEssQEslVAEssD.TPEp.EAsPQSLAsRQARTlAsPELhACPlQuE+LDh.sshSS-LGSc...EVEFhPtLTSLsLGouQAEE..cEEoSsDsSuQTpaassC-.-HPsETsQs.csutSGolRQGEEL..c-hQESp...GLlpP..QEsQsLEEQGpQE.sshptEGTLtEslCuDGLLGE.p..QMhEQ.lNspcGEQ+QKQEQlQ.DshLG+QGER.tLs..sE.EGLNsuEhtpEphEpcspsQts.cptEtpcpEhphPEEpcssuQspcsQ.ohhtKSEcVotKQEspGhp...tcshslpsQE.t.......cE.tshDusthss.s..ttpstEEtpEHcGPsh..s.lAPEsss.sDlFPsssh..ho.pI..PtTQpEs..pAEELoPtA.sPsLEPhthSaQPhS..uSFsstES.DpE.......................................................................... 0 2 2 3 +15297 PF15442 DUF4629 Domain of unknown function (DUF4629) Eberhardt RY re3 Jackhmmer:A6NCI8 Family This domain family is found in eukaryotes, and is approximately 150 amino acids in length. There are two conserved sequence motifs: MHML and LGKK. 27.00 27.00 44.60 42.40 19.10 18.20 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -10.96 0.71 -4.08 26 89 2012-09-20 12:35:51 2012-09-20 13:35:51 1 2 25 0 50 72 0 142.30 49 19.49 NEW usS-psRKNKHKASE.lsuAPcAKIQscs.-sLltGEss.lssAsuS-+AstshAK+pssKs.KsAsuRss..ps+upGQ-+s++spENsoKKstEpKQSss+V.KAEEKPsIPKpKRK+s.PELSQEsFKKPRosLGMHMLESVQVFHsLGKKs ..........s.SDQsRKsKHKASEPlpGAPcAKIQscss-sLltGEss.lssAsuS-+AssNhAK+sssKspKuAuSRss...psKupGQE+sKpsp-NsSKKspEpKQSss+V.KuEEKssIPphKRK+s.PELoQEoFKKPRosLGMHMLESVQVFHsLGKK.......................... 0 1 1 5 +15298 PF15443 DUF4630 Domain of unknown function (DUF4630) Eberhardt RY re3 Jackhmmer:A6NCS6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 124 and 286 amino acids in length. 25.00 25.00 25.50 25.50 24.00 24.40 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.71 0.71 -10.98 0.71 -4.41 6 29 2012-09-20 12:49:22 2012-09-20 13:49:22 1 1 25 0 18 23 0 138.90 47 63.95 NEW GA..ALVGVLVAEAuPEDAVAP....tLRLLEALLRsVFGRQAGG....PVQAAAYsPGpPA.SSLAVQsAACRALQAAGPu+PAEGAWERPGLPuLLACFSWGPWS.RtKs.ssous.suPsQschQ-sEEELALTulaPNGDCED.sG+GStApDGlsp.sPs-PsGDo ....................................hlGlhhh.st.c-u.su........tlplLEuLLRsVF..G+psGG....sVQAAsYsPGpPA.SsLuVQtAACRALQAAGsucPs-u..AhERPuLPuLLsCFSWGPhp..++Kspsssus.psPup-shQ-sEEELALTul.aPNGDCED.hGpGocApDGshH.sPs-PsG-................ 0 1 1 5 +15299 PF15444 TMEM247 Transmembrane protein 247 Eberhardt RY re3 Jackhmmer:A6NEH6 Family This family of transmembrane proteins is found in eukaryotes. Proteins in this family are typically between 197 and 222 amino acids in length. The function of this family is unknown. 27.00 27.00 30.50 30.40 22.00 22.00 hmmbuild -o /dev/null HMM SEED 218 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.75 0.70 -11.58 0.70 -4.52 7 30 2012-09-20 13:25:11 2012-09-20 14:25:11 1 2 23 0 16 41 0 198.90 71 95.41 NEW MAsEDREhMEuRGAGESCPThPKhVPsDshsEGKP+ApLEAEs.KPDSSYDaLEEhtsCEDGuCsGPPK........s.sstuuPs.TKGQAGDGP..EssELsh........sPGsEHssEMELEKlRMEFELTRLKYLHEENERQRQHEpVMEQLQ.....pQAss................FSGGLQDLLLPQNQFAMFLYCFIFIHIIYVTKEMlFFLFSKHYLFCIAAILLCLIKTLWS ..........MAsEDREMMEARGAGESCPTFPKhVPuDshSEGKPRAsL...EAES.KPDSSYDaLE..EMEsCEDGGCsGPsK........SLSsKusPs.TKGQAGDGP..cs.uELP.........sPGT...E+.NsEMELEKVRMEFELTRLKYLHEENERQRQHE.VMEQLQ....pptp.....p....l.p......h.s..FSGGLQDLLLPQNQFAMFLYCFIFIHIIYVTKEMVFFLFoKHYLFCIAAILLCLIKThWS............ 0 1 1 2 +15300 PF15445 ATS acidic terminal segments, variant surface antigen of PfEMP1 Rask T, Coggill P pcc Rask T, [1] Domain ATS is the intracellular and relatively conserved acidic terminal segment of the Plasmodium falciparum erythrocyte membrane protein-1 (PfEMP1) [1]. this domain appears to be present in all variants of the highly polymorphic PfEMP1 proteins. 27.00 27.00 29.60 29.10 21.30 20.80 hmmbuild -o /dev/null HMM SEED 437 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.73 0.70 -12.78 0.70 -5.37 35 168 2012-09-20 15:35:02 2012-09-20 16:35:02 1 33 2 0 22 173 0 348.00 56 19.28 NEW STlsaulGluhuohsahaLK...KKTKosV.sLFplLpIPKuDYsIPThKSsNRYIPYsSs+YKGKTYIYMEGD....ouuD-.KYsh.scooDIT.SSESEYEElDINDIYVPuSPKYKTLIEVVLEP.................................o.p........sTQNDIssDsl.PSs..................lTDsEWNpLKcDF......ISphLQsp..Ns.P.pshhssslshNTpP..Th.ccsh............EKPF......IhSIH.......DRNLYoGEE....YoYNl....sM........STNohs............D.shsssN..........................sl.......YSGIDLINDoL..Gsp.lDIYDElLKRKENE....LFGT.NHsKp.TSspSVAKpT.sSDPIhNQLsLFHKWLDRH...........RDMCEKW...NNKEElL-KLKEEWpp-sp........SuNh............................................................spN......................+sLNTDVSIQIcMDNPKPhNEF............oshDo.sptssMDoI..L-DLEK.aNEPYa...hpDD.IYYDVNs-cssosps...................................hspNs.hDV...PoKVpIEMcl.s..p..cllcEcYPIuDVWsI ........................................Tl.aululuhu.uhsahalK..KKs.KosV.sLhpllpIPKuDYsIPThhSsNRYIPYsSspY+GKpYIYhEGD....pusDp.tYh..schoDIT.SSESEYEEhDINDIYsPtuPKYKTLIEVVLEP.................................o.s...........c.T.p.NDI.sDsh.Pos.................phTDsEWNpLKc-F......ISphLQsp..ps.P..shhpssls.NTps..Th.hcs..............-EKPF......IhSIH.......DRsLYoGEE...hsYsh.............sNs.p...........................p..h.tpN....................................YSGIDLINDsL.sGs..hDIYDEhLKRKENE......LFGT.pasKp......To.psVuK.s.psDPIhN.QlpLhHp.WLD....RH...........RsMCEpa...pscp-hLsKLpEpWpp-sp.............ussh..................................................stN.........psLNTDVSIpIcMDpsKshpph............o.........sMDshhps.hc..................sD.IYaDVp.p.t..........................................spN..h-h...PpKlplEMpl.s.......t.hc.paPI.......................... 0 22 22 22 +15301 PF15446 zf-PHD-like PHD/FYVE-zinc-finger like domain Wood V, Coggill P pcc Pfam-B_5236 (release 26.0) Domain This family appears to be a combination domain of several consecutive zinc-binding regions. 27.00 27.00 27.20 27.80 26.20 25.70 hmmbuild -o /dev/null HMM SEED 175 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.84 0.71 -11.46 0.71 -4.98 28 70 2012-10-03 17:27:21 2012-09-20 16:37:26 1 6 70 0 57 73 0 166.90 44 10.80 NEW sCcoC...usstp+G..sLlaCQGCosSYHKsCLGsRSsR-HhVTKVGs-sFVLQCRhClGlh+cKDstAP+putCpsC+ppGhuCtPFpp+pTs+QEpplR--NuGsDPITsV-ssL..lNNs-NVLFRCs....pC+RuWHhcHLPshup.s.sssssps..........p.h..pppRhcEYShcWpC+-Ct .................................CcsC...uss.p.+G..sLlaCQGCosuYHpsCLGsRusR-HLVTKVup...-pFVLQCRhClGhs+pKDspAP+putCptCpp.Gt..hscPh.Rp+hTs+QEpplR--NuGtDPITsV-.sL..lNN.....s-NV.....LFRCs....sC+RuWH.hcHLPshspss.t....t.p.p...........p.htppRhpEYohcWpCc-C.......................................... 0 10 27 45 +15302 PF15447 NTS N-terminal segments of PfEMP1 Rask T, Coggill P pcc Rask T, [1] Domain NTS, the N-terminal segment, is the most variable part of the variant surface antigen family of Plasmodium falciparum, the erythrocyte membrane protein-1 (PfEMP1) proteins. PfEMP1 is an important target for protective immunity and is implicated in the pathology of malaria through its ability to adhere to host endothelial receptors [1]. A structural and functional study of the N-terminal domain of PfEMP1 from the VarO variant comprising the N-terminal segment (NTS) and the first DBL domain (DBL1α1), shows this region is directly implicated in rosetting. NTS, previously thought to be a structurally independent component of PfEMP1, forms an integral part of the DBL1α domain that is found to be the important heparin-binding site [2]. This family is closely associated with PFEMP, Pfam:PF03011, and Duffy_binding, Pfam:PF05424. 25.00 25.00 25.40 25.20 22.90 22.00 hmmbuild -o /dev/null HMM SEED 37 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.23 0.72 -7.49 0.72 -3.56 205 318 2012-09-24 07:58:16 2012-09-20 17:12:31 1 36 2 2 14 324 0 37.30 39 3.36 NEW sAKclLDpIGcpVa.cc.l+...p.........c.....A..cpY.ps.pLKGsLspApa ..SA+slL-pIGcplpcc.scp.........c....A....ppY.pspLKGsLopApF.. 0 14 14 14 +15303 PF15448 NTS_2 N-terminal segments of P. falciparum erythrocyte membrane protein Rask T, Coggill P pcc Rask T, [1] Domain NTS_2 is a family of the most variable part of the variant surface antigen family of Plasmodium falciparum, the erythrocyte membrane protein-1 (PfEMP1) [1]. However, in this group of proteins conservation is high. PfEMP1 is an important target for protective immunity and is implicated in the pathology of malaria through its ability to adhere to host endothelial receptors. 25.00 25.00 29.60 98.10 22.50 17.90 hmmbuild -o /dev/null HMM SEED 51 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.11 0.72 -8.60 0.72 -3.67 6 14 2012-09-20 16:26:11 2012-09-20 17:26:11 1 6 2 0 0 16 0 50.70 79 1.93 NEW MDSKoTIAsKIEAYLccKSs-ScIDQSLKADPSEV-YYsSGGDG.YL+pNI MDSKoTIA-KIEAYLttKSsDSKIDQSLKADPSEVpYYpSGGDG.YLKNNI 0 0 0 0 +15304 PF15449 Retinal Retinal protein Eberhardt RY re3 Jackhmmer:A6NGG8 Family This family of proteins is found in the photoreceptor cells of the retina [1,2]. Mutations of the gene encoding this protein have been associated with retinal disorders such as retinitis pigmentosa and late-onset progressive retinal atrophy [1-4]. The function of this family of proteins is unknown, but it is likely to be important in the development and function of the retina [2-3]. 27.00 27.00 60.30 40.20 17.70 26.80 hmmbuild -o /dev/null HMM SEED 1287 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.27 0.70 -14.14 0.70 -6.91 14 48 2012-09-21 07:43:49 2012-09-21 08:43:49 1 4 32 0 29 36 0 990.40 43 96.82 NEW MGCTPSHSDIVNSVAKSGIQFhKKPKAILPG+QusSE+sSIPLLVpSSTCaDsGtshptG.......pc.tpEpPus+hsQosucuhpQhstsPhsuptKDhEGhhPEscoo.SQLscSQSHhAcDlsh+TQuSHtoQGuuFuG-EucEssoQcoSpht+KP+CHpsucQ.GHs..sQT....hhPAhtscuKVDFPEPLVKAHQ+AYsYLHosLSKYEAILplscQAoQTpELLQPMVoFLLLCF-ElNQLLGEISKDGEsLLQEV+tDLAWP.+KGEPpEQPDLLQQLLQYTVuKLQlLpGTVAoLTuShLEGSuSYLposASHL-sKLSsKRGhDE+LLRALGQLESLASGHuDPGlpshPLCSEDSGIGADNESVpsl...DKLGKQuSWDhssEPuEWKsshsP.psEA+hsGpuWQpuPahhGuDRPQDCPLSRPhtAKlQPAAQGpAsssssSuouPEssoo.RPhthuKSssp....-Shusssss-A+hsKu.StlhsoPSLSEsEDSSsE.E.-ED-huuhs.sshpcpss.sRPpSSPAssESsFQP+s+RLRSPQAQEMILKMKEAISERIKFVPVPSu+QDWAEE.EEt+ThVssRPSTsSGSRRAPtRQRRSQSEuCLKSasEDPTLQELRRVQ+DLSQRLEsFYALGs+pQGQu........pEplL.PRAAs.LhP-sssRVsPSoTISKLKASLTKNFSILPSQDKSILQ+ssPpP.......EuEps.Q...scAEtLPss...hPsuE+......s.EA......PGspDhssRGCPTRTSVKKLIETFSPsEuLRT.GDS+ssGsSPCLRKWGlPIMPPRFPIYRGLAPLYPKPQISPAuGp-sLphG.uWRPhAPhFPPL.sAEAscs-D..h..ssEs-ED.EcLPPPPLEILMDKSFsSLEsPESSpsuGSSsEsTtsPGLupssss.+RTWASPKLRASMSPhDLLPSKsssoPsRspuTGPGsoKsssssRKLsLDLsp...sPAsutsPEsEu...tu...Q..sp..A-+AsSLuKpPpKAlPWHHsSpTSGQs.RTpEPSlARPopGP.....+SPEAs......RpopERSPsllRKASPTRuH.WsPpuD+Rp.S.PSoHRP........AQPSlPsVpuSP....SPPl.........SP....Rs............hSPPss++hsSPPsp+KhPSPP...s.............huSPPsQ+sEAuSPuSuPSsSPPsSPSQGpK-sp...cS.EDupuusu+suuNTpSIFCPATSSLFEA+...sP.Sss+PhoP....PEuGGshtpPsGsWRuSuGPRhRu-SQRpsuLCALNPQPFlRRTASDRpP....Gs..+LpLP.usuhsupsspsthSpSSS.S.EESPKK-sEPWsuPsuPEL+G.uuRtASPPELCVLGHGLQ .......................................MGCoPSHotIlpolA+sGlphh+KPKslhss.ttss-+hslPLLspsSohhs.st................ppt..t.ph..thtps...........ptcthpt..stsps...ph.cuQp+hscch.h+pQuSptoptsshss-pspEpsspt....t+p.+spppupQ.s+h...pT.....hss.tsctKVDFPEsLVKAHQpAYsYLHssLSKYEAILplhcQAoQTp.lLQsMloFLlLpF-ElspLLtEIucDGEhLLpEVttcLAWP.pKt-spEQ..............PDLLQQLLQYTVsKhQhLpuTVAsLTuohLEsSsuYLposAs+LppKLpsKRshDEpLL+sLupLEuhAsupucPthpshPLpSEDSGIGADNESlpth...DKLG+QsSaD.ssc...phc.hhts.phEst...psWppsPhhhu.tpspDs.Lpt..hshhpPtspststsss.S..shsstshss..shthtpStst....sShshshshcsch.cs.st.hsssShs-sEDSos-.E..--p.ushs.p..pcpt...RPpSSPAsh-usapsps+Rlcu.QApEMILKMK-AISE+IKFVPs.sspp-WsEE.E-tpshlssRPSTssG..up+sst+Q+RSpSEtsLpSpsEDPTL.ELpRlQ+DLSp+LEhFYshst.+tpsps........pcp.hpPRsss.lh.ss.spsssSsohS+LKASLoKNFSILPSQDKslhQ+sssps....csc...p.spAEtL.ss...hsssEh......pcsstspchsscuC.ss.RsSVKKLIETFSP..sEplth.tso+s.GsssClR+hGhslhPPRhPhYRGLAPLYPK.pIoPusutp..phs.uW+PhAPhFPPl.tuttscpp-..h..pt.Eh-t.s.EpLPPPPLElLMDpSFsSLEsPEospsstsSscts.t.Pu.spssss..++ThsSsKL+AShsPhDLLPSKssssss.t.pus.usGss+stspsRc.sL-lpp...sPssstssEhps...tt....sps-cAsuL.+pspKslshppsu..s..SGps.ps.Esuh...uRs...............+pupE+SPshsRKsSPoRsp.Wssps-+R..S.PusaR.........uQPS.sslpp..P....SPPh.........SP+s............hSPP.st+c.sSPPhp.KhPoPP....s............pp.sSPPsp+.EsusPsshso..PSPPhSPSps.Kpsp....co.--spss.uKsuuNspSIFCPAoSSLFEA+...ss.s.sp..ss....PEsGs..ttss.shRsS.tsh.hu-pQRphslsAhNP.PFlRRohSD+p.....t.t.t......t.....t....p.t.spssu..t.EEus.p-sts.hssspss-lpt.us+.su.P-hhVlGpGLQ................................................................................................................................. 0 1 6 14 +15305 PF15450 DUF4631 Domain of unknown function (DUF4631) Eberhardt RY re3 Jackhmmer:A6NI56 Family This family of proteins is found in bacteria and eukaryotes. Proteins in this family are typically between 394 and 668 amino acids in length. 28.00 28.00 29.70 28.60 27.90 27.60 hmmbuild -o /dev/null HMM SEED 532 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.91 0.70 -12.85 0.70 -6.22 8 50 2012-09-21 08:26:51 2012-09-21 09:26:51 1 3 32 0 23 51 0 415.00 42 75.89 NEW LsSosssPEQDTsKRWcQLEQWVA-LQAEVssLRGHKpRCE+AoLSLLRELLQVRA+lQlQsSELcpLppEl+.psAhsPEKEA.EhSGsQ..sQNQMQALDKRLVEVREALTQIRR+QALQDoERKGuEQEAsLRLs+LoshL+QEEQuREsACSuLQKsQE-uSQKVDcEVARMQAQlTKLGEEMSLRFLKREAKLCGFLQKSFLALEKRMKASEooRL+sEsuLREELEuRWppLQELsEERLRuLpGQpEVususp.QpEEu+LLEQCRGLDpAVVQLTKFVcQNQsSLNRVLhAEQKAR-AKspLEESpApELAuYlQENLEAsQLA......u-LApQETpssLELLQ.EKSQsLEsSVApLspQLKDLsDHhLALSWRLDLQEQTLuLRLoEs+s.EWEGsER+SLEcLAphpcEspAHL+EVpEKVD.uLPQQIEuVSDKClLHKSDSDlKIsAEG+AREaEVcslRQELAsLLSSVQLL+E-NPGRKIAEIQGKLA..TsQIhKLENSIQsNKTIQNLKFNTETKLRoEEhAoLRESh .......................ppt...pQDs.pph.Q.pp.hspLps-lsplRtpppp..s-pthhtL.p-lhpl+sphphQso-Lhplpp-h...hs.s......-..st.....psthphLDpRLhEl+EtLsplp+p.s.pps-RctsppphshRLs+Lssh...LpQE-psREsACusLpKsQE-sup+ls.E...sA+hp.AplscLuEEhSL+FL+REAKLCuaLQKsFh.ALEpthKspEssR..hEtsLttELEs+WptlpthhE-+ltuLpuQp-..........tEcu+LlEQCpuLDtAVstLT+FVppNQsSLs+lLhAEtKAh-u+splEcopst-LsshlppslEAhphu......uc.ApQEhpspLplLp.EKspsLEsSl.spLsppl.+-Lss...+h.ALS.+...............lDLQEQhLsh+Ls-s............ps.Eapusc+csLcclsphpcEspscLctlpEKV-....ulPppIcslSsKCllhKsDsDh+IssEu+sRch-ltslRQELAslLpulQLL+EcsPu.....RK........IAEhQGcLs..ppQIhKLEsslQssKTlQNL+FNsEs+hRhp-hAsL+Eph........... 0 6 7 11 +15306 PF15451 DUF4632 Domain of unknown function (DUF4632) Eberhardt RY re3 Jackhmmer:A6NIN4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 59 and 190 amino acids in length. 27.00 27.00 126.50 126.40 24.00 18.50 hmmbuild -o /dev/null HMM SEED 71 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.77 0.72 -9.45 0.72 -4.05 5 14 2012-09-21 10:53:32 2012-09-21 11:53:32 1 2 12 0 8 17 0 69.60 77 70.53 NEW AGuPAKESGDsDGEAD.EEGESEKGAGPRSAGWRALRRLWDRVLuPARRWRRPLPSNVLYCPEIKDIAHMTR hGsPAKESG-.DGEsD.-EtESEKGAGPRSAGWRALRRLWDRVLAPARRWRRPLPSNVLYCPEIKDIAHMTR 0 1 1 1 +15307 PF15452 NYAP_C Neuronal tyrosine-phosphorylated phosphoinositide-3-kinase adapter Coggill P pcc Jackhmmer:Q6ZVC0 Family NYAP_C is a C-terminal family of eukaryotic proteins that are substrates of tyrosine kinase in the brain. When first identified, the family members were referred to as unconventional myosin XVI, or Myr 8 [1]. However, proteins have now been identified as being integrally involved in neuronal function and morphogenesis. The family is involved in both the activation of phosphoinositide 3-kinase (PI3K) and the recruitment of the downstream effector WAVE complex to the close vicinity of PI3K; it also appears to regulate the brain size and neurite outgrowth in mice [2]. 25.00 25.00 33.40 33.20 23.70 18.80 hmmbuild -o /dev/null HMM SEED 308 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.27 0.70 -12.17 0.70 -5.42 3 80 2012-09-21 12:13:01 2012-09-21 13:13:01 1 6 35 0 40 66 0 245.10 47 38.50 NEW LASPHSLPDPTusPL.sPLWTYPoT.AGLKRPPAYESLKAGGlLsKGCGVGAPuPMVKIQLQDQGTsGGAFASISCAHVIAS..SGTPEEE...EEEVGsuTFGAGWALQRKVLY.GRKsKDp.-TEssEGuRAWNGSuEGPSKsEREEK...GsLoSGIPVRSQGAEGLLARhHHuu+....GGSRTGLPlPCQTFPACHRNG.DFTGGYRLGRSASTSGVR..psslHTPRPCSQPR-ALSQsHPAL..sLPLPPQPu+ERDGKLLEVIERKRCVCKEIKARHRPDRGLCKQESMPILPSWRRGPEPRKSGTPPCRRQQTVLWDTAI ...............................tSPHuhP..cspuus..oPl.h.h.u.s.shuLKRPPsY-Sl+uGul.ppu...usPpshs+.plQ-tu.p...suAh.so.h..usupshup...utTPpp.....EE.hsuhFsuGhuL.RKs....GtRptc...h.-..cspDth+shstSs.Es.sKlEpc-R....Gs..su....St.P....V+....uQth-Gh.....tss............suSRhuhs.ss.Th.Asp+su.-.psuh.RLGRSASTSGV....su.l.p.s.R.sSps.p.s................................................................................................................................................ 0 1 3 13 +15308 PF15453 Pilt Protein incorporated later into Tight Junctions Coggill P pcc Jackhmmer:Q5JTD0 Family Pilt is a family of eukaryotic tight junction-proteins that binds to guanylate-kinase. Pilt is a component of TJs (Tight junctions) rather than AJs (Adhesin junctions). The protein is incorporated into TJs after TJ strands are formed, thereby suggesting the name Pilt for 'protein incorporated later into TJs'. Pilt binds to the guanylate-kinase region of hDlg otherwise known as Disk large homologue [1]. 27.00 27.00 27.00 27.00 24.10 24.10 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.36 0.70 -4.74 15 99 2012-09-21 13:25:27 2012-09-21 14:25:27 1 3 38 0 48 81 0 224.40 47 54.09 NEW Moss..ss..........sus..tstpsspQ.pNGpCpSpuoh-uss..E-.hslPuFEKLNPYPTPsPPHPLYPGRKVIEFS.-DKV+IPKNSPLPNCTYATRQAISLSLVQs--..c+.s.........+s.ussPs.......o..P..uSs......s..........uSsQssP.S...............................s.PQ.sPSshASSuSSEEDL.LAsWQRMFVEKssPou-tull.pRTuFS+-TAtELQ++h.sh...us..ushspsssAa...........................................................us..u.-Eus.pll.........sttssstps+-c...p-lslPsSspEE..+..p.LLptpcttppsss.stp..scs....................stssSuRPQRSPKRMGVHHLHRKDSLTpAQEQGsLL ......................................................................................................................................st...........us..hstpst.Q..NGpCps.uostuss..E-..shPuF..EKLsPYPTPsPPHPLYPGR+VIEFS.-DKV+IP+NSPLPNCTYATRQAISLSLVppss..cp.t.........p..sssss.............s..s..tus..................tuspppP.s......................................s.s..h.Ss.ASS.uSSEEDL.LusWQRhFV-+hsPsu....s..h..spRTuFuccshs-L..Q++h...sh....us....sshst.h.s........................................................................h..............s......ptt.............th.ls.Sst-p..pp.l...t...t.h...t.....t..............shss.pRPp+SPKRMGVHHLHRKDSLTpAQ..GsLL................................. 0 1 7 18 +15309 PF15454 LAMTOR Late endosomal/lysosomal adaptor and MAPK and MTOR activator Coggill P pcc Jackhmmer:Q02205 Family LAMTOR is a family of eukaryotic proteins that have otherwise been referred to as Lipid raft adaptor protein p18, Late endosomal/lysosomal adaptor and MAPK and MTOR activator 1, and Protein associated with DRMs and endosomes. It is found to be one of three small proteins constituting the Rag complex or Ragulator that interact with each other, localise to endosomes and lysosomes, and play positive roles in the MAPK pathway. The complex does this by interacting with the Rag GTPases, recruiting them to lysosomes, and bringing about mTORC1 activation. 25.00 25.00 25.20 25.50 24.20 24.60 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.07 0.72 -9.53 0.72 -3.69 45 181 2012-09-21 14:43:52 2012-09-21 15:43:52 1 2 170 0 125 166 0 75.80 28 44.32 NEW psEpo+LLscs...ps...shstht....st.s........pssspphp.Rc.pAL.ssIlpcTu-slIDlsuhpsps...ht..ppp.hs.s.phhst ...............sEpp+LLt-s.....ps..s.stht..pssp.s...........pppspphp..cE.pAL.psIltcTusslIDlsuhsspsh...tpp.h...p....s........... 1 25 52 91 +15310 PF15455 Pro-rich_19 Proline-rich 19 Eberhardt RY re3 Jackhmmer:A6NJB7 Family This family includes proline-rich protein 19. 27.00 27.00 40.10 40.10 21.20 21.20 hmmbuild -o /dev/null HMM SEED 357 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.51 0.70 -12.14 0.70 -5.42 8 30 2012-09-21 15:10:35 2012-09-21 16:10:35 1 2 24 \N 15 29 0 307.60 61 73.26 NEW MDPRGPAPQPFQQPEKPGRVRRRKTRRERNcALV.......GSRRPLA+Q-PPVASRDP......P......VuPsAPKLVVITQGRLSREHRGLFNHEVKSLDVARLLSSGoLEPsTPsLsTKPSPSPGRuQEPu...QSRGKENQVPGGSGPGPPSsPELPGlGQLLpELQCQLlLPQAFPRRNLVQEARDAIVGTLQACHGCVPDLuLVLRGCQPPLP..GsKPpssERpRMTPSWINSPEQ..APtEGRQRRp..QGTKElTFsMPHT.SSTPTsHRsSLsPP+GPW.....PPsLPSLsSPSGsAWGPPTAFDLLKSIWLVATPP...PP+PWGVGPPQPLPQPPSPLLPRTSALDWSPSPPAPLPSLSWVVAQSSPEAWSFPPMRLY .........MDspGPssQPFQpPEKPGRVRRRKTRRERNcALs.................GSRRPLs+pDPslupRDP.............P......VsP..sAsKLVVITQGRLSREHRGLFNHEVKSLDVARLLSutsLpPsoPsLPsKPSPS.......PuRuQEPu...QSRuKE....NQVPGGSGPGPPSs..P-LPulGQLLtELQCQL.LPQAFPRRNLVQEARDAIVtTLQACHGCVPDLuLVLRGCQPPLP..GsKPtssER.+MTP.WINsP-Q..sPtptRQR+p..pGTKEhsFshPaT.SShPssHRsSlsPP+uPW.....PP.hs.hsSPSGsAWGPPTAFDLLKSIWLVATPP...PPpPWslG.sQPLPpPsSPLLPRTSsLDWSPsPPAPLPSLSWVVAQSSPEAWSFPPMRLY.................. 0 1 2 2 +15311 PF15456 Uds1 Up-regulated During Septation Coggill P pcc Jackhmmer:Q9P6S3 Family Uds1 is a domain family is found mostly in fungi, and is typically between 120 and 138 amino acids in length. The GO annotation for the S.pombe protein describes the protein as barrier septum assembly involved in cell cycle cytokinesis, GO:0071937. Many of the uncharacterised members are listed as being involucrin repeat proteins, but this can not be substantiated. 27.00 27.00 27.00 33.30 26.90 24.90 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.42 0.71 -4.13 55 148 2012-09-21 15:33:05 2012-09-21 16:33:05 1 4 90 0 122 149 0 126.70 34 15.67 NEW tlcLLscsAlsDSppaElLohEEV-sLKKEhphLspRlcss+cpLs.lcsKhR-.AAtS..ls+Lhsstptt.......................t.sppshpcs-cpLstss++s-Ehsp-LhplEpRhtclcp+LLEHsAulLphop+ ............hcLLpcpAhs-upcaElLshEEV-sL+......+.....Ehph..............LspRl-hh+cphs.lctchRc.usts.lsphhpssptt.............................................sphspcshhcpEEtLup.-pph-EhspcL.phEpRttcl+p+LLEHsAulLphs........ 0 32 66 100 +15312 PF15457 HopW1-1 Type III T3SS secreted effector HopW1-1/HopPmaA Coggill P pcc Jackhmmer:Q8RP17 Family HopW1-1 is a family of bacterial modular P. syringae Avr effectors that induce accumulation of the signal molecule salicylic acid (SA) and the transcripts of HWI1 (HOPW1-1-INDUCED GENE1) in Arabidopsis. Thus HopW1-1 elicits a resistance response in Arabidopsis [1]. 27.00 27.00 64.80 64.70 22.20 21.30 hmmbuild -o /dev/null HMM SEED 321 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.91 0.70 -12.11 0.70 -5.39 5 28 2012-09-21 15:54:11 2012-09-21 16:54:11 1 1 20 0 1 31 0 288.10 65 38.93 NEW MMPSQITRSSHSSLP-sSPASuDAsSSpEQSPQQsRT+AFVASGELusAFGRTSTAPAQDsVRLLusLQREL-KcsPSaP-VApLuppLAEAAMTEQGhHhLAoEE.Q.psLKslLDRCT+QLADTPAScASHDuLSQACEGLKTARLHQSlApLTGcoHApsRGVPDLLALsHLDP-VLA-KPsuhsSYspFGSFIpTAKsRTA-Ls-SLpcsuuEVsuLLRuHADTLpuLE+LPuALAALTENCPDsPTpcDLRuLAEsAGELLQQLRssDLLPRSEEISSEsGEosV+u+EsVE...P+LTpuQALLhAGGNLVRKFDAYGAl .MMPuQITRSSHSSlP-susASuDAsuspEQoPQQsRThAFhASGELusAFGRTSsAPtQDsV+LLusLQREL-KppPSa.sVApLsppLAEsupTEQGhH.LAoE-.Q....sL+-lLDRCpppLADhPAucASHDsLSQACEGL+TARLHQSlApLTucspuhsRulsDLLsLsHLDP-sLutcPsshoSYshFupFlpTAKpRTA-Ls-sLpcpPstVsuLLRuHADTLp-LEhLPuALtALTENC.DsPsps-LRpLAEsAGuLLQhLREpDLLPR.EEIS.EsGEuPs.u+EssE...P+LTppQALLKsGGNLVRKFDAYGAl...... 0 0 0 0 +15313 PF15458 NTR2 Nineteen complex-related protein 2 Coggill P pcc Jackhmmer:P36118 Family NTR2 or Nineteen complex-related protein 2 is a family of largely fungal and plant proteins that form a complex with the DExD/H-box RNA helicase Prp43. Along with NTR1 it is an accessory factor of Prp43 in catalysing spliceosome disassembly. Disassembly of the spliceosome after completion of the splicing reaction is necessary for recycling of splicing factors to promote efficient splicing [1]. NTR2 and NTR1 associate with a post-splicing complex containing the excised intron and the spliceosomal U2, U5, and U6 snRNAs, that supports a link with a late stage in the pre-mRNA splicing process [2]. 27.00 27.00 27.80 28.00 26.30 26.10 hmmbuild -o /dev/null HMM SEED 254 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.71 0.70 -5.14 41 137 2012-09-24 10:30:19 2012-09-24 11:30:19 1 6 131 0 107 140 0 244.70 26 44.95 NEW spYsp-YlsELpsuTsss.P..............tp-tp...........hsl-shph.st.h.................spsstIPocuEIcEtKpRRsRL...spcp...............pc.alSLpD....p-ctth..............cpppt-oRLsc-D--h..sEGh--asp....Ds+luL.G++sc+pppp++RcphpEhIps...........p-s-sEcpts..aEssQh+tGhsshp.................th.p.s..................PphssLP...pLsssl........p+Lppsl...sshptpppphppplppLpcE+tcIspRcp-lpphlpc ...................................................YspcYlpELpsso.ss..Ppt......................ts.p......................hpl-..t....th.sthh..................spsssIPocucIcctKpRRs+h.........tpcp..................tc.aIuLp-......tsp.pph............t.cppptcoRLlp-D--h..sEG.h--asp....DspluL.Gc+sc+cpcc+...+Rppht-hIsc..................ttts-s-sEcptt....aEssQhRtGhsshp.................................t...t..p.s...................................sphssLP...pLsssl.............t+Lpptl...sthctppsphttplpp.LccE+t-lttcct-lpthlp.......................................................................................................................... 0 34 59 89 +15314 PF15459 RRP14 60S ribosome biogenesis protein Rrp14 Wood V, Coggill P pcc Pfam-B_10508 (release 26.0) Family RRP14 is a family of nucleolar 60S ribosomal biogenesis proteins from eukaryotes. RRP14 functions in ribosome synthesis as it is required for the maturation of both small and large subunit rRNAs and it helps to prevent premature cleavage of the pre-rRNA at site C2 [1]. It also plays a role in cell polarity and/or spindle positioning 2], 22.40 22.40 23.20 23.20 21.60 21.60 hmmbuild -o /dev/null HMM SEED 64 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.79 0.72 -9.32 0.72 -3.90 54 167 2012-09-24 12:25:37 2012-09-24 13:25:37 1 6 153 0 128 164 0 56.80 45 11.77 NEW c-cL+pHuptFDsLlsLIPAKaYhs.-csp.............cphKp..KKpo...KcptKpA...K+sKLDP- ...p-RL+sHupuFDuLLuLIPAKaYas.--ss................cQh+p......KKpT...K-ps+pA...KRuKLDP-.... 0 43 79 112 +15315 PF15460 SAS4 Something about silencing, SAS, complex subunit 4 Coggill P pcc Jackhmmer:Q04003 Family SAS4 is a family of largely fungal silencing regulators. This silencing is mediated by chromatin. SAS4 specifically silences the yeast mating-type genes HML and HMR [1]. SAS4 is found to be one subunit of a complex, the SAS complex, that interacts with chromatin assembly factor Asf1p, and asf1 mutants show silencing defects similar to mutants in the SAS complex. Thus, ASF1-dependent chromatin-assembly may mediate the role of the SAS complex in silencing [2]. Co-expression of Sas2, SAS4, and Sas5 in Escherichia coli leads to formation of a stable SAS complex that acetylates histones. SAS4 is essential for the acetyltransferase activity of Sas2, and Sas5 is also important [3]. 22.50 22.50 22.90 23.20 20.90 22.20 hmmbuild -o /dev/null HMM SEED 101 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.54 0.72 -10.33 0.72 -4.14 48 116 2012-09-24 12:56:10 2012-09-24 13:56:10 1 1 114 0 88 119 0 97.90 40 18.86 NEW DPLsDshYpshH++hcRpE+phpNt-+tRuppEt-pLpclL-tLpsh.............DWlRsh.slTs.Is-.s-cc-hEsKRphhlcplpslLcKFcpW+ccccchp.tcpc ........DPLsDshY.hhH++hERpE+plRNtE+tRupaEp-pLp+LL-pLpuh.............DWLRlh.Glou..ls-..s-++phEsKRphh.......lctlpulLcKFctW+ccE++p+hc..c.............. 0 15 43 73 +15316 PF15461 BCD Beta-carotene 15,15'-dioxygenase Coggill P pcc Jackhmmer:Q4PNI0 Family BCD is a family of bacterial and archaeal proteins is found in bacteria and archaea that catalyse or regulate the conversion of beta-carotene to retinal [1]. Characterisation of BCD proteins shows them to cleave beta-carotene at its central double bond (15,15′) to yield two molecules of all-trans-retinal. However, the oxygen atom of retinal originated not from water but from molecular oxygen, suggesting that the enzyme was a beta-carotene 15,15′-dioxygenase, rather than a mono-oxygenase that catalyzes the same biochemical reaction [2,3]. 27.70 27.70 54.90 54.90 27.60 27.60 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.98 0.70 -4.97 56 106 2012-09-24 13:25:51 2012-09-24 14:25:51 1 1 93 0 33 115 1588 261.10 23 85.69 NEW llullhlGlPHGAhDthlsh+h.h.........phpthhtahshYlslushhlhlWhhhPshuLhlFlhlohhHFGpuDhtth...........ssh...chh..th..........ls+GGhlhhhls.hh.ph.....sc........shtlh.shl.ss......ss.tsh..............shp.......s.....lhh.hhhh....hh.luh.h...h......hh...htthp........tc.tht.h.....hsEhs...hLhhh.FhhlPPlluFulYFslhHShRHltchh................ppl....st.ts...st.t...........h.............hhp.puhsh...olsuhlh..hs...slhh..hhss...shsh......tss...h....lthhhlhlAuLTlPHhlllshhc+c .................................lullhlGlPHGAhDhhlsh+h.h..........php.hhhahshYlslsshhlhhWhhtPshuLhlFlhlohhHFGpsDhthh...........sph..phh.h...ls+Guhhlhhss.hh.phsp........shtlh.shlss.......ss..sh..............hht..h....hhhhhhh....shlsh.h...h..hh...htt.ht........tptht.h....hs-hhlLhhhhhhlPPlluFulYFslhHSh+phhphh................ttl.....pt.tt....ht.............h...hhtpshsh...shhuhhh..hs...shhh..hhss...s.........tst...h....lthhhlhlAuLTlPHhllsshh................... 0 12 25 29 +15317 PF15462 Barttin Bartter syndrome, infantile, with sensorineural deafness (Barttin) Coggill P pcc Jackhmmer:Q8WZ55 Family Barttin is a family of mammalian proteins that are chloride ion channel beta-subunits crucial for renal Cl-re-absorption and inner ear K+ secretion. Bartter syndrome is a term covering a heterogeneous group of autosomal recessive salt-losing nephropathies that are caused by disturbed transepithelial sodium chloride re-absorption in the distal nephron. Mutations in the BCD proteins lead to sensorial deafness. 27.00 27.00 47.90 46.70 18.70 17.30 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.69 0.70 -11.71 0.70 -4.69 11 35 2012-09-24 13:54:09 2012-09-24 14:54:09 1 1 25 0 16 28 0 204.70 63 69.83 NEW SHDRPQVYGTFYAMGuVMVIGGVIWSMCQCYPKITFVPADSDFQGlLSP..KALGLLENGLusEhK...u..PQPPYVRLWEEAAYDQSLPDFSHIQMKVhGYSEDPRPLLAPc.up...p.tuuDGGcG.uPp-uQAWlEAAVVV......HRGSDEcEGcRss.oQSpsuPPssPQGP...APLASFQDDLDhGSSEGSSPsPSPPstEEPpsPs.tEP..hAsRs.LDRFcDFALID.uP.TsED ...SHDRPQVYGTFYAMGulMVIGGlIWSMCQCYPKITFVPADSDFQGILSP..KAhGLLENGLusEhK..S..uPQPPYVRLWEEAAYDQSLPDFSHIQMKVMuYSEDPRsLLAP-huQ...chGsSDGGEG.GPtDsQAWhEAAVVl......HRGSDEsEGERph.oQShPuP.u..CPQGP...APLASFQDDLDhuSSEGSSPssSP.-tEEspsPp.pEP...uCRs.LDRFpDFALID.APThED................................... 0 1 2 4 +15318 PF15463 ECM11 Extracellular mutant protein 11 Coggill P pcc Jackhmmer:Q04110 Family ECM11 is a family of largely fungal proteins. ECM11 interacts with Cdc6, an essential protein involved in the initiation of DNA replication, and is a nuclear protein involved in maintaining chromatin structure [1]. It was previously identified as a protein involved in yeast cell wall biogenesis and organisation, but is also found to be required in meiosis where its function is related to DNA replication and crossing-over [2]. 25.00 25.00 26.20 25.50 22.80 22.00 hmmbuild -o /dev/null HMM SEED 139 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.97 0.71 -10.47 0.71 -3.89 53 98 2012-09-24 14:36:33 2012-09-24 15:36:33 1 3 98 0 76 99 0 135.80 26 27.89 NEW DYssptLpphsas-LpspsFD..hsPssht...............................ttsssss...shpp.+....lpphhpt..sppp...pc....phFushohc-W--sGDhhl-pFspllp+lpcsRpp+RchhphFEsEIscRpcsVptcsptlscKLpch+ptGpcll ......................................................................................................DYssthL.phsas-LpspsFD..hsPssst..........................t..ss......slp-.+....lpphhsh....scpp...pc..........paFusholsEW--sGDhhl-pFsplhp+h+csRpp+RphsthFEsEIpcRt-sVctcspt...lsc+Lcch+ptGtcll... 0 12 36 63 +15319 PF15464 DUF4633 Domain of unknown function (DUF4633) Eberhardt RY re3 Jackhmmer:A6NJI1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 94 and 123 amino acids in length. 27.00 27.00 69.70 69.50 22.20 21.60 hmmbuild -o /dev/null HMM SEED 115 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.78 0.71 -10.52 0.71 -4.24 6 20 2012-09-24 14:54:23 2012-09-24 15:54:23 1 1 18 0 12 20 0 109.70 68 95.26 NEW MGTuLRSQSLRGPpPSYGKLQEPWGRPh.....EGRLRRALSLRpGREKSRSp..-.tGPEtLDssGQEhLPGoLGDTEQLIQuQp.usSRRWLRQYQQpVRRRWcS.FVuSFPsVTLSpPAS MGTGLRSQSLRGPRPSYGKLQEPWGRP......EGpLRRALSLRQGREKSRSQsL-...tGsEGLDssuQEtLPGoLGDTEQLIQAQR.uGSRRWLRQYQQpVRRRWcS.FVu.FPSVTLSQsAS 0 1 1 2 +15320 PF15465 DUF4634 Domain of unknown function (DUF4634) Coggill P pcc Jackhmmer:Q8WWF1 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 98 and 133 amino acids in length. 27.00 27.00 33.40 48.00 18.80 17.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.76 0.71 -3.94 9 32 2012-09-24 15:04:50 2012-09-24 16:04:50 1 2 22 0 14 26 0 117.90 64 98.13 NEW MDVLFlAILAVPLILGQEYEDEEtLEED-YYQVsY.YYTVTPoYDD..FusNFTlDYShFESEDRL.N+.LDK-V..TEAVETTI...SLpTcttD+p+PVTlKPVThE.P...........SPDLNDA.VSoLpSPlPLLLShsLVQuGMY .MDVLFVAILAVPLILGQEYEDEEtLtE.D-YYQVlY.YYTVTPsY.DD..FuusFTlDYShFESEDRL.N+.LDK-l..TEAsETTI...SLcTttsD+.KPVTlKPsThE.P.............SPDhNcA.VSuLpSPlsLLL.hshlQsGha................................................................. 0 1 1 1 +15322 PF15466 DUF4635 Domain of unknown function (DUF4635) Eberhardt RY re3 Jackhmmer:A6NLE4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 120 and 154 amino acids in length. There are two conserved sequence motifs: LEQ and DLE. 27.00 27.00 110.20 110.00 22.10 21.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.78 0.71 -4.88 6 26 2012-09-24 15:10:22 2012-09-24 16:10:22 1 1 19 0 12 15 0 130.80 70 94.84 NEW MslQ......cVGsRtRsuA..........E.....lhERRccS.+C-.....DKKQoLLuLLILVLYLGTGI..SG+SWEVSERIRECNYaQNsVsSQGhEYQTsEPuEEPlKslRpWLKENLHVFLEKLEcEVRELEQLV+DLE.WLDALLGDsahEEPCS .........MshQQVuuRtpVAA...............E.....LlEpRRsS.HC-.....D+KQTLLALLILVLYLuTGI..SGSSWEVSERIRECNYaQN.VsSQGhEYQTsEPSEEPIKslRsWLKENLHVFLEKLEcEVpELEQLVpDLEhWLDALLGE..s+.EE.Cu.. 0 1 1 1 +15323 PF15467 SGIII Secretogranin-3 Coggill P pcc Jackhmmer:Q8WXD2 Family Secretogranin_3 is a family of vertebrate proteins that is one of the granin family. Granins are rich in acidic amino acids, exhibit aggregation at low pH, and possess a high capacity for calcium binding. Because granins are restricted in their localisation to secretory granules of neuroendocrine cells, two interesting characteristics of their sorting mechanisms have been observed. These are, first, that they aggregate on low pH/high calcium concentrations and second that two of them carry an N-terminal disulfide loop, mutations in which lead to mis-sorting. Thus, granins are thought to be essential for the sorting of secretory proteins at the trans-Golgi network. Chromogranin A (CgA) binds to SGIII in secretory granules of endocrine cells [1]. SGIII directly binds to cholesterol components of the secretory granule membrane and targets CgA to secretory granules in pituitary and pancreatic endocrine cells [2]. Mutations in the SGIII gene may influence the risk of obesity through possible regulation of hypothalamic neuropeptide secretion [3]. 27.00 27.00 60.80 60.70 21.40 21.40 hmmbuild -o /dev/null HMM SEED 453 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.76 0.70 -12.79 0.70 -5.40 8 53 2012-09-24 15:34:09 2012-09-24 16:34:09 1 2 36 0 27 59 0 404.80 72 96.68 NEW plpAFPsPsuu.cDKulYNRELoEERPLpEQIAEADolKt....ucopsttspp-pcN.pDsDDLslLKSLAEppK.sKcsuslpsoh.-cphssDDoDSTKsRRLsDDYDSTKsGhDY.KYQDDP-uhRQLDGTPLTAEDIVQKIAs+IYEEsDRGVFD+IVSKLLpLGLITESQAcTLE.EVAEALQcLITKpAKNNEh-stshsttss+u-p......................ssscpscp+h..........h..ts.p........--o......susoWs..pshEcRNElsPEDshpDLQYFPNFY+LLKSLsS.EpDscE+ETLITIMKTLIDFVKMMVKYGTITPEEGVSYLENLDAMIAlQTKNKLGKuLu...ssshssPs-..Ksh-EsDsTKsEAAKMpKEYEsLKDSTK-E.Qsso-.s-cP.GKSEoYLEAIRKNIEWLKKHNK-GNKE..DYDLSKL+DFhDQQsDuYl-KGIL-KEEuDsIKRIYSSL ..........lpAFPpPsuu.pDKslHNRELSsERPLpEQIAEAEuDKh....+cshssENKsuppNYSFlDsLNLLKAlsEKEK.EKE+pSlRSSsh-s+Lsl-DsDSTKNR+LlDDYDSTKSGLDa.KaQDDPDGLHQLDGTPLTAEDIVpKIAsRIYEENDRGVFDKIVSKLLNLGLITESQAaTLEDEVAEsLQcLIoKEApNhEc-....scPsS+oEs........................psschsEc.h.....TshAuhQ.DuhspsEND-T..lSNThTLoNsLERRscsasEDsFE-LQYFPNFYALLKSIDS.EKEAKEKETLITIMKTLIDFVKMMVKYGTISPEEGVSYLENLD-MIALQTKNKLEKNsTp..hS+LFsuPoE..KSpEEoDSTKEEAAKMEKEYsoLKDSTK--psssus.T-EPKGKTEAYLEAIRKNIEWLKKHsKKGNKE..DYDLSKMRDFINQQADAYVEKGILDKEEA-sIKRIYSSL...... 0 1 3 11 +15324 PF15468 DUF4636 Domain of unknown function (DUF4636) Coggill P pcc Jackhmmer:Q8WWF3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 196 and 244 amino acids in length. 27.00 27.00 59.00 58.60 21.50 21.00 hmmbuild -o /dev/null HMM SEED 243 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.33 0.70 -11.47 0.70 -4.91 8 36 2012-09-24 15:38:09 2012-09-24 16:38:09 1 1 27 0 19 25 0 224.10 63 98.88 NEW MG-LhSLFWEVDPPPlPLoaoIPsQDaECpKDDSCGAIGSFLLWYFlIILVLMFhSRASVWMS.EpKcDEDSGTSASVSKASKDsSYKRQsK-GsWDSsQhM..KKPKQsQLoPVTDSEVALVNAYLEQRRAp+HSQhsQVNQlppDSDTTEsDSEESNSGASSWKESESEHHPSPAuI+RRKlAQRQ+slGSYQIRERPCLHCKAMRTNEWLTRHFLQssSsssPhKuDhQEENSlPEINTKFSK ....................MGcLaSLFWEhDPPshPLshsIPsQ-aECt..KDD..SCGslGsFLLWYFlIlhV...LMahsRASlWMS.EsK+...DEc..SGT.SsSluKASK-sShKcQSK-u.sWDs.QhM...K.KPKQsQLoPV..TDSEVALVNAYLEQRRAR+pSQFspVNQsQ+DSDTTECDSEESNStASSWKESESEHHPSPsSIKRRKhAQRQRNl.GSYQlRERPCLHCKAMRTNEWLsRHFhQpsSlssPMKG.D.QEEsShs-.INTKFSK..... 0 1 1 4 +15325 PF15469 Sec5 Exocyst complex component Sec5 Wood V, Coggill P pcc Pfam-B_353125 (release 26.0) Family This Sec5 family of eukaryotic proteins conserved is not representing the Sec5-Ral binding site. 30.00 30.00 30.60 30.70 29.40 29.80 hmmbuild -o /dev/null HMM SEED 182 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.89 0.71 -4.55 80 361 2012-10-03 17:31:52 2012-09-24 17:03:10 1 14 260 0 261 467 2 197.60 24 22.04 NEW scplcpLlcsNFspalpsKsslDslapph..............pp...pptp.ttpt..........................................shppLpppl.pp...stppusphhpslLcppp+scphRsslshlp+h+hlFsLP...pplccs..lpc.s-Ycthlp-Y.......p+u+slhppp.................................psplhc+.....las-VEphh..pph+pplhppLhsss....................pshcp............hpphI.chLlcL........ps..........pt............cPhhhhlssp ...................................p.tthphlVcsNaspFlpspsolcslhpch............................................ct......ptpp....tt........................................................................hsppLpsslpp...ssspupplapslLsp+p+t-pl+sslshlp+h+hl...FsLP...sslpcs..Ipp.t-Y-tslp-Y...............p+A+slhtps..................................pl.lhp+..............................hhp-VEphl.....pph+c..pLhc+Lhpss.................................pshcp......................ttchl..phLhcL....................ps......ps................sPhW.hl.................................................................................. 0 99 153 221 +15326 PF15470 DUF4637 Domain of unknown function (DUF4637) Coggill P pcc Jackhmmer:Q8WW18 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 142 and 178 amino acids in length. 27.00 27.00 43.00 42.30 21.70 21.70 hmmbuild -o /dev/null HMM SEED 173 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.83 0.71 -11.32 0.71 -4.64 5 25 2012-09-24 16:10:32 2012-09-24 17:10:32 1 1 22 0 13 24 0 151.70 67 90.85 NEW MDKHGVKTPLWKKElEEPRAcEsEtEEAcEGSE-EDEsppRPPEESAAEGEt-uR.AEtsEGRERRSVSYSPLRQESSTQQVALLRRADSGFWGWLSPFALLGGLAAPADRKRSLPEEPCVLETRRRPPRRGGCARCEILFCKKC+oLHSHPAYVAHCILEHPDLGKAsAuGu ................MDK+..GVKTPLWKKEsEEscAcEsEtE..c..tcEGS....E.-c-p..pRsstESAsEGE..E.s.R.A-EsEGRERtSVSYsPLRQESSTQQVALLRRADSGFWGWLuPhALLGGLsAPsDRKRShPEEPCVLEhRRcPPRcGGCA+CEILFCKKCRoLHSHPAYVAHClL-HPDLG+AtAuGu..... 0 1 1 2 +15327 PF15471 TMEM171 Transmembrane protein family 171 Coggill P pcc Jackhmmer:Q8WVE6 Family This family of proteins is found in eukaryotes. TMEM171 is also known as parturition-related protein 2. Proteins in this family are typically between 242 and 326 amino acids in length. 25.00 25.00 25.20 172.40 24.50 24.40 hmmbuild -o /dev/null HMM SEED 319 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.44 0.70 -12.04 0.70 -5.64 10 40 2012-09-24 16:12:44 2012-09-24 17:12:44 1 1 32 0 22 37 0 297.50 63 97.65 NEW SPsAAAEPDG-ppDRpVSKLIFFLFVFGAVLLCVGVLLSIFGFQACQYEsLsDCShVLKlAGPuCAVlGLGsVILARSRARLQl+ptphQGpQsDPDpAFlCGESRQFAQhLIFGFLFLTSGMLISILGIWVPGCuSsWsQEPLN-TDouDuEPQICGFLSLQIMGPLIVLVGLCFFVVAHVKKRNNLNsuQDASEsEEtpoQSsEPVQVTVGDAVIIFPPPPPPYFsEoSASAso.pSPGANuLLPsENPPSYYSIFsaG.RT..P-..sQGssSERDpESIYTISGTssSSEISHsPHLsSE.PPRYEEKETssuTsLSsSS ..........SssusAEPDG-ppDR+lSKLIFFLFVFGAlLLCVGlLLSIFGaQACQYcslscCuhVLKlAGPuCAVlGLGAVILARSRA+LQLRptphp.GpQhDsDpuFlCGESRQFAQCLIFGFLFLTSGMLISlLGIWVPGCuSsWs.Q-sLNETDou-uEPphCGFLSLQIMGPLIVLlGLCFFVVAHVKKRsNLNsuQDu.SEpEEtpsQosEPVQVTVGDuVIIFPPPPPPYFsESSuSA.ls.pSPGuNuLhPsENPPSYaSIFNaGpT.hP-sQGsAoERDpEoIYTISGssSSSEhStss+LsSELPPRYEEKEsssss.Ls.SS..... 0 1 2 7 +15328 PF15472 DUF4638 Domain of unknown function (DUF4638) Coggill P pcc Jackhmmer:Q8WTQ4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 240 and 272 amino acids in length. 27.00 27.00 41.20 41.20 22.40 21.80 hmmbuild -o /dev/null HMM SEED 268 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.94 0.70 -12.02 0.70 -4.80 7 31 2012-09-25 09:50:52 2012-09-25 10:50:52 1 1 25 0 16 34 0 224.40 51 86.44 NEW MoEps.DhcsLMPTERKShWRTAEERRMSDLTRVLEWLERRQGKK+Qs.p+pKpcsps...ss.tKtpKKspGhpc.tpttsp+ss.hspphlp.ht...+cs..ssth++h.ts-.KG+RLShlsusYs+Dus+K....S............DlDIKDsIALES.sQRsssaRRQSo.lDPhlQEs.hFGsRRuolLR-W.osKsPDssYERKLKSLMEKGhEPKhEhsKMLKPEEVLSCRYLRLSKNNIRTLLKLCKDAGMsVDIHPHMVEuEIDAKKVFupp.SVA ..............................MoEps.sh+slMPTcRK.hW+TsE-RRMSDLTRVLEWLERRQGKKKQs.pK.pKscshs......ctp.tKctKKspGhhptp......ttsp+.........ts.hs...p....psht.......++s...ssta++h.sl-.KG+RL..Shl.s.usY.h+Dus+K....S............-hDIKDsls.ES.TQRsssaRRQSh.lDPhLQ-...s.hFuuRRsohhRDW.ssKhPD.sYERKLKSLMEKusEPKhEsh+MLKPEEVLSCRYLRLSKpNI+TLlKLC+DAGhsVDIHPHM.lEt-IDs+KlFst..shA........................ 0 3 3 4 +15329 PF15473 PCNP PEST, proteolytic signal-containing nuclear protein family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:Q8WW12 Family PCNP is a PEST-containing nuclear protein that is ubiquitinated by NIRF, a Np95/ICBP90-like RING finger protein. PEST sequences, which are rich in proline (P), glutamic acid (E), serine (S) and threonine (T), are found in a number of short-lived proteins, such as transcription factors and cell cycle-associated proteins. Their function is generally controlled by proteolysis, mostly via ubiquitin-mediated degradation. Thus, NIRF and PCNP are a ubiquitin ligase and its substrate, respectively, that may constitute a novel signalling pathway with some relation to cell proliferation [1]. 27.00 27.00 28.30 31.90 26.70 26.20 hmmbuild -o /dev/null HMM SEED 150 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -10.99 0.71 -3.90 11 113 2012-09-25 12:41:17 2012-09-25 13:41:17 1 5 66 0 63 106 0 125.80 61 77.26 NEW GPEEcuEcsh+p+slSS.........osGGcuSS..RSuE+tust..............-tspsssssPsPsKlSKhGFuh..............sothh+Ks..ssISIKLGAo.....KEss..lss.p.slAuVFNtD.DDSEPEEMPPEAKMRMKNIGR-TPTSAGPNSFsKsKaGFsDspKlaERclK .....................................................................pEtttp.hppphssS............ssuGEuoS..+SuE+pusp...........--stshssc..PsKhSKhGFu...l................uoQ...ssKKu....suISIKLGus...KPKEss......PolsPK...phoVAusF..NED...-DSEPEEMPPEAKMRMKNIG....R...DTPTSAGPNSFNKGK+GFSDsQKLWERslK.............. 0 16 20 37 +15330 PF15474 MU117 Meiotically up-regulated gene family Coggill P pcc Jackhmmer:Q9Y7V0 Family This protein was identified as being up-regulated during meiosis in S.pombe. This family of proteins is found in largely in plants and fungi. Proteins in this family are typically between 128 and 920 amino acids in length. 27.00 27.00 32.10 31.30 25.00 24.50 hmmbuild -o /dev/null HMM SEED 97 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.18 0.72 -10.87 0.72 -3.50 16 50 2012-09-25 12:57:57 2012-09-25 13:57:57 1 5 20 0 46 50 0 101.90 29 25.76 NEW shsCKGSulCus........ts......ssuCssAlspa..psssl............Y..............pshsu.............hspus.....ChA............hapCss..s.ss........uho......Gpplhsthpsl...csssCptCGohtass.....pCplslsYCo.sC ....s.hsCcGSuhCus.........st.....tssCcpAlsp.h...ssssh............Y.......sshou...........................hssGp.....Chs............hapssG..s.ss........slo........Gsplhssappl...cspsC.p.hCGstta..ss.....uCphslsYss.sC.............. 0 11 31 44 +15331 PF15475 UPF0444 Transmembrane protein C12orf23, UPF0444 Coggill P pcc Jackhmmer:Q8WUH6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 94 and 119 amino acids in length. 27.60 27.60 27.70 71.40 25.50 27.50 hmmbuild -o /dev/null HMM SEED 92 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.19 0.72 -3.93 8 68 2012-09-25 12:37:30 2012-09-25 14:34:08 1 1 40 0 44 43 0 91.50 75 79.89 NEW sKDHPpsps....GMlWRVTGGLFuVTKGAVGATVG....GVsWlGGKSLElT.KTAVTo...VPuMGVGLVKGGVSAVAGGVouVGSuVAuK.VPhT.uKKKDKo ............KDHPQQ.QP....GMLSRVTGGlFSVTKGAVGATlG....GVAWIGGKSLElT.KTAVTo...VPSMGlGLVKGGVSAVAGGVoAVGSAVssK.VPhT.GKKKDKS.... 0 2 8 23 +15332 PF15476 SAP25 Histone deacetylase complex subunit SAP25 Coggill P pcc Jackhmmer:Q8TEE9 Family SAP25 is a family of proteins found in eukaryotes. SAP25 is a core component of the mSin3 co-repressor complex whose subcellular location is regulated by PML. mSin3, the transcriptional co-repressor, is associated with histone deacetylases (HDACs) and is utilised by many DNA-binding transcriptional repressors. SAP25 is a nucleo-cytoplasmic shuttling protein that is actively exported from the nucleus by a CRM1-dependent mechanism. It binds to the PAH1 domain of mSin3A, associates with the mSin3A-HDAC complex in vivo, and represses transcription when tethered to DNA [1,2]. 27.00 27.00 155.10 155.00 19.80 14.90 hmmbuild -o /dev/null HMM SEED 204 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.49 0.71 -4.81 5 25 2012-09-25 13:48:04 2012-09-25 14:48:04 1 1 22 1 14 27 0 186.90 67 79.86 NEW MTLLAPWDPNYEAKAGPQLVWGPSCGSGASFSGRTLCHPSFWPLYEAuSGRGhRPps...PuuGHQsGEQAPRDAGFPVMCsEDVFLLDPLLPsGQRVPLYLSEAPQQVMGSLKLLLPPPIMSPpVhPhPSpspGCSTAWLSGPELIALTGLLQMSQGEPRPuSSGuP..ussussPussS-sPusS...GGP.SCSHuTDPSLPpTPDTHCP MT.LAPWDPpYcAKAGPR.VWGsuCuSGASFSGRTLCHPSFWPLYEAASGRuLRPhs...PuTGH.NGpQAPtDAGFPVMCsEDVFLSDPLLPpGQRVPLYLSEAPQQVMGSLKLLLPPPIMSPhVLPp.SsspGsSTAWLSGPELIALTGLLQMSQGEPRPuSS..uus..ss.sGsPsssS-.PuPs...Gus.SsSpssDsSlPtTPDsp............. 0 1 1 2 +15333 PF15477 SMAP Small acidic protein family Coggill P pcc Jackhmmer:G2TRQ9 Family This domain family is found in eukaryotes, and is approximately 70 amino acids in length. There is a single completely conserved residue G that may be functionally important. 23.10 23.10 23.60 23.60 22.60 22.60 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -9.49 0.72 -3.71 65 309 2012-09-25 13:57:31 2012-09-25 14:57:31 1 6 154 0 194 283 0 74.90 31 22.26 NEW p.W..ssu...s.......h...G.ssp+p..pKFhRLMGupK.......................tsss.s..ts......p..s...............stu....tsppcppclppcLEpQaptu..hph+......G..p+.+..GLG .......................Wcss...sF....s..sc-pp..tKFh+LMGstc..................................................csss.sts.......p.s...............................sth....tstcppcclppsL-pQYptuhstp.......tpp.+...GLG........................... 0 51 78 127 +15334 PF15478 LKAAEAR Family of unknown function with LKAAEAR motif Coggill P pcc Jackhmmer:Q8TD35 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 119 and 235 amino acids in length. There is a conserved LKAAEAR sequence motif. 27.00 27.00 37.50 36.10 24.00 19.60 hmmbuild -o /dev/null HMM SEED 137 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -10.76 0.71 -3.91 8 31 2012-09-25 14:00:14 2012-09-25 15:00:14 1 2 25 0 19 27 0 117.80 48 65.91 NEW .PKNWpphospcLpKhuPQpRSRYLAYE-ssK-lt-..shuhshKRl+-..hptch.scPR.shsp-h.l-+-+ps+LIGQLKAAEARNRlRlMRLRYpthRApElpHLIuCQPTAhKAlRLEuhlPPplcsspssDhLDKh- ...........s.s+shhtho.ptLtth.stp+p+hhhatc..ccl.t..thuhhspcShE.........sh+h.sDPR.shopslplssc+pspL.lG.LKAAEARsRlRshRLRYs+hR.A-EIshLIppQpSARAAIRLEhFLPPQLKPs+IsDPLDRpE................ 0 8 9 11 +15335 PF15479 DUF4639 Domain of unknown function (DUF4639) Eberhardt RY re3 Jackhmmer:A6NN90 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 161 and 601 amino acids in length. 27.00 27.00 29.70 29.20 24.20 23.80 hmmbuild -o /dev/null HMM SEED 576 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.08 0.70 -12.86 0.70 -5.86 12 53 2012-09-25 14:32:57 2012-09-25 15:32:57 1 2 36 0 32 52 0 368.20 40 88.05 NEW QERQsR.DRGVTRSKAEKARPPTVPVPQVDIVPGRLsEAEWhALhALEEGEDVVGDILADLLARVMDSAFKVYLTQQCIPFTISQAREAMLQITEWRFLARDEGESAVAEDPTWGEDEEPhACTTDuWAQGSVPVLHAPAs.GlE-pFQuE......D.GusDph.LGRSWhsRGSQE.hpShE.SsEh+hh..susssTsELFQEAGPtssLEEsDsQtp...........uth.....AtS.ssShQ....SsEhssstSPcsSLELo.VASsQAsscRuQPhuSpLSLEDLYhChPQ.DAAGDRL.chcpEGhPplASss..ussShGssT.hsPSsShps.ppPh......ss.p.Rhs+KssssRLDPARLPRHWVRPLAEVLlP............DSEs+PLEsYRGRpRspKTcA...pAtPQusssGsRVSsAsF.....FPLpPusPFRALGss...tlp.PTLNLu.sSPshsSKlPFPSPGlRFLssHPshPDVARSPSPKLWPuAKWPSGWEtEAELLGELWAGRTRVPPQGL-ssD+..cuQDsttWPpssPQVLEATSQVhWKPhLLPEAhKLAPGVSMWN.oTQVLLSS....uVPppEcccGosssP....-QpPIQTGsPKP .................................................s+o+sEKs+........s.ss..VP....pV..D.......IVPGR.lsEuEWhshhshEEGEDhVuDIluDllscVM-ssa....clYLspQslPFTlsQA+-AMLQIsEWRFLARDEGEssVApDP........TWsEDEE.P.sssTDuWAQGSVPV..Lpsss........u.cp..........hpsp..............................p.tt......ht...hht..ptSpp...............................t..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... 0 12 13 15 +15336 PF15480 DUF4640 Domain of unknown function (DUF4640) Eberhardt RY re3 Jackhmmer:A8MTZ7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 99 and 306 amino acids in length. 26.00 26.00 26.70 26.70 19.10 18.60 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.17 0.70 -11.97 0.70 -4.84 11 31 2012-09-25 14:47:49 2012-09-25 15:47:49 1 2 21 0 18 37 0 230.50 51 92.00 NEW SSscSNLSLSVGYFPCE-TaSYE-TsSpEDssS.ssSlHFLPPIQGoWpTESTtRLhtRRDQlpDsPEQFCKLSITLAWDlDVuSssSDSluNWDLsucsQWhDKaPE-cTpLTLuKLDsLVQKLEpFLENcKssccDDSlhPESsQE......EDhpLsSSoPPchAQl...SHQEHsoCQDLPph+s.ENE-lhQhPphP.RLpcpE.sc.hoQuoGSptssosETSSlSoGpsEc-s.....ssSshQsLSCLNFtW.VFRWLRpQVhSSLhRRccPp.cATcusHphAtK+RhSaRuKRIQPQE ...........................SpspSNLSLSVGYFPCEDT..hE-TsuhED.ssSpss.SlHhlPPIQGsWtTEphtRhhpRpDQIQDpPEQFCKLSIhLAW....DlDluSsso.DShsN........h.LsucNpWhDKhPc-cTpLoluKLssLVQchppFLEN.Ks......DDulhPcosp................cDhQLsouSPPchsQs...SHQEassC.QD.s.hps.cNcclh..........t.Eh.........sphhsQuTuSQpTsssEhSSl.StpPEc-D.....TsSpTps.sCLNFtW.sFpWLRpplhSsLhRRccP..pATcusHp.A.p+Rh.aRuKRIQPQE.......... 0 2 2 2 +15337 PF15481 CPG4 Chondroitin proteoglycan 4 Coggill P pcc Jackhmmer:O16883 Family CPG4 is a domain family found in nematodes of one of nine core chondroitin proteoglycans. Vertebrates produce multiple chondroitin sulfate proteoglycans that play important roles in development and tissue mechanics. In the nematode Caenorhabditis elegans, the chondroitin chains lack sulfate but nevertheless play essential roles in embryonic development and vulval morphogenesis. CPG4 has the largest predicted mass of the C. elegans CPGs at 84 kDa. The majority of its 35 predicted glycosaminoglycan attachment sites reside in the COOH-terminal half of the protein, of which four sites were confirmed by DTT modification [1]. The family is rich in conserved cysteines. 27.00 27.00 28.00 27.00 26.50 26.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.52 0.72 -11.24 0.72 -3.65 15 49 2012-09-25 14:50:01 2012-09-25 15:50:01 1 3 10 0 44 41 0 91.60 21 27.44 NEW sCh+pChsslhcslpthhp.hsp..ps-phcslCspaspuppCl.pp.ttpCcp...phFsshTSuhcahCl-pc-sassphsClptssuslppcC-ppCpsp ....................C.ptChpshhp.hpthht..tp.......shpphpplCptasputpCl.pp.ptpCsp...tthhpthosuhcahCs-pct........tFppphsClst...ss.h.ptCcppC...t..................... 0 17 22 44 +15338 PF15482 CCER1 Coiled-coil domain-containing glutamate-rich protein family 1 Coggill P pcc Jackhmmer:Q8TC90 Family This is a family of coiled-coil family proteins found in eukaryotes. Proteins in this family are typically between 160 and 397 amino acids in length. 27.00 27.00 38.10 37.50 23.20 23.00 hmmbuild -o /dev/null HMM SEED 214 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.68 0.70 -11.68 0.70 -4.70 7 24 2012-09-25 14:58:47 2012-09-25 15:58:47 1 1 22 0 14 25 0 187.80 59 55.95 NEW Tlsp+EDPLNL........GG..GWApSssLpoWSSCHRRRRGAPhY+RRaRYGPKsEYEPPRKQPKQQHuPGsWFQ.PPRpP.h.VhSNWG+WGGPW+PPPssFhKPPs.VQhIRVYGLpPlCL.CCCSCWsGPWNPGWhRPPGRKKRWGRR...GRGLRRpPRRShPRuPP...sDLShLLRPVNLYGWRAPGMRAPpNTTQFIMNQIYEDMRQQEKLERQQEALRAQQA .................................Tlsp+EDPLNL..........Gs.GWApSssLpoWSSsHRRR.GAPhapR..aRYuPKsEYtPPRKQPKQQHuPG.WFQ.PP......lhsNWGphGGPW+PPPssFp+..s.VQhhRVYGLHPLCh.CCCSCWsGsWNPGWsRPPGRKKRW.GRR...GR..GLR+H...PRRS.P.RsPP......sDlSpLLRPVNLYuWRAPGMRAPpNTTQFIMNQIYEDMRQQEKLERQQcALRAQpA........ 0 1 4 6 +15339 PF15483 DUF4641 Domain of unknown function (DUF4641) Eberhardt RY re3 Jackhmmer:A8MYA2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 201 and 519 amino acids in length. 27.00 27.00 122.20 85.00 19.30 18.80 hmmbuild -o /dev/null HMM SEED 445 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.59 0.70 -12.43 0.70 -5.77 16 39 2012-09-25 15:14:33 2012-09-25 16:14:33 1 2 19 0 19 45 0 376.40 41 87.67 NEW pEGcPGo.PsD-cts.slD..h...Lu..-psAAIhp.LoshpshtVp+pPSPEus....ss-suslWA-lEsGPuuRGA.u.............SssEup.pASAusLalsGPttGRAWsss+Ruop.SRhshsss.QpssscG.sthsS-sE..SSDEho-lQhMRVoIphKc...GuQAKssSsccsuDosRHosspsREsFlpVPGshLoSAsRGLoss.lERQA.sGEh-sS...s.KKhpShlWGKtsuRPSasGuAs..................uGuLP+uoPR+KhAQEKKSLGsASchsLGR...sFPsWGQRlSAuPsEPATFPPlSGVsLLG+Sp+.u.ls.tPKpsKpsusGKKsu.u++sREsps...sutEDNDPsRDssspuQlPTHRPtss.....shpsH+GEhSSG-sNhRusQsPGsSpssALSQuulhPRtsAsSGD....QpPss+ssRP-....R.QQpPPGtQGCPRClhLQ+EI-DL+-QLuuhpuLu-+FQ ........EGpsuo.PsDcpts.sl-..h...Lu...pssslh..ho......lpc.Ps......P-ut....sttsus.hssh-sGsstRuuhu.............ussctp.psouushahsG.t.tGRAWtsPcRusp.uRhshp.s.Q.Ps..........o.c.s-..SoDE.s-l.hh+VslphKc...ssQA+ssu.pc.s-ssR+sshps+-sah.lPushLoSsspGLoss.hER.u.sGE.-sS...s.KKh.uhhWGK.tsRPShsssss..................sGsLPpsssR+K.sQEKKS.ussSphs.G+...sFPsWGpRlpuAP.-PAThPPlSGVsLlG+op+.......sKpsKptssG.KKss.u++sREsps...sstEDN-PsRD.sspuQlPTHRst.s.....shsha+GEhSSGD.NhRusQlPGsSp..uhSptslhPRt..uPSGD....Qps.scsstPc....R.QQ.s.GspGCsRChhLQ+EI--L+-QLushp.h.t........... 0 1 1 1 +15340 PF15484 DUF4642 Domain of unknown function (DUF4642) Eberhardt RY re3 Jackhmmer:A8MZ97 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 115 and 196 amino acids in length. 27.00 27.00 88.90 88.80 20.20 19.80 hmmbuild -o /dev/null HMM SEED 156 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.29 0.71 -11.13 0.71 -4.25 11 23 2012-09-25 15:24:08 2012-09-25 16:24:08 1 1 20 0 13 25 0 142.90 58 83.00 NEW Capu+pcEETEKsPCTsuNsGEDC..sAANsEpsNscDQEK.hlhphhshshPhRPGILVQRpsK-.hsTsLtNpc-hEsccEs+hK-+QcPcsutEssQE.sD.lpKs.Isloto.SVs-sQKRPLKGVTFSREVIVVDLGpEa.PsPRSYTREHKERK .......hhpsppscETcKsPChssstG.cC..sAAps.psNstDpE+..llhQlhshssPMRPGILVQRpoKEshsT.PL-N+c-hEsEEEsphpEKQEPc.NAGEsuQE--D.LQKh.IslT+TsSslEsQKRPLKGVTFSREVIVVDLGpEY.PsPRSYTREHKERK 0 1 1 1 +15341 PF15485 DUF4643 Domain of unknown function (DUF4643) Eberhardt RY re3 Jackhmmer:A8MZF0 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 254 and 462 amino acids in length. 27.00 27.00 163.60 129.50 24.40 24.40 hmmbuild -o /dev/null HMM SEED 284 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.18 0.70 -12.07 0.70 -4.92 10 19 2012-09-26 07:55:05 2012-09-26 08:55:05 1 2 16 0 10 13 0 231.20 53 73.08 NEW spDuDtuPssPosQPLlPVAHIRPLPsuAQ......ssSPpPEEPsV.......s+sPPuFQAS...VsREuusRVVV.PIAP..ThRSsuPSsaSlsPsuP-up+lEc.Ph....AuPusEAcpVsSsstuSosu.sSsPHPsPsP+VAPKP.............+hSGWTRLKKQLhEEAEEP.aPp.ptsLEsp..t...ppEsstP.........ss+sPsSRAS+MWDAVLY+MSVAcs.ps+.sGPssuE+oh..............uul..oRLP.FLYRPRFNARKLQEAA.RPPPTlpsllEL........sPpPKNFNRTAsGWRL .....tDuDtsPssspsQPLlPVAHIRPLPTusQ......usSPhPEEPsl.......sRPPPuFQAS...VsREuusRVVV.PIAP..Th+S.tsSsaShsPhuPttcplE-.Ph....AuPAsEscpVsSsshASuPs.sSGPHPsPsPKVAPKP.............+lSGWThLKKQLhEEs.EP.hPt.p.sLEsp..t.....cssts.........sspsPsSRAS+MWDAVLY+MSlscs.psp.sGsssup+s...............usl..oRLP.FLaRPRFNARKLQEss.RPPPTlpsllEL........sspPKNFNRTAsGWRL.. 0 1 2 4 +15342 PF15486 DUF4644 Domain of unknown function (DUF4644) Eberhardt RY re3 Jackhmmer:A8MZG2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 143 and 191 amino acids in length. 27.00 27.00 54.80 54.60 16.90 16.90 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.07 0.71 -4.28 7 34 2012-09-26 14:07:21 2012-09-26 15:07:21 1 1 24 0 17 35 0 151.20 72 86.36 NEW .AVShAtG+PuHsDsPPNIYEGGLGspQpQCPSsQGSKPKNFRLRHLRGLALYLPuHMQPAGQCESHWLuRLMuGGCLPp....PE....GsAWsLc..LPQGsLuPsNShCoAhLEAplPRDSLGsTASSSShDPsKGs.sQPuPsEG.GlRPKRSWGshEEuhCPLCKR ....................DAVS.AtG+PuHPDsPPNIYEGGLGuQQp.QCPSAQGSKPKNFRLRHLRGLALYLPGHhQPAGQCESHWLGRLMAGGCLPp....PE....GoAWsL.D..LPQGTLGPGNShCSALLE.ApLPRDSLGsTASSSSMDPsKGs.sQPuPsEGLGLRPKRSWGs.EEshCPLCKR.... 0 1 1 3 +15343 PF15487 FAM220 FAM220 family Eberhardt RY re3 Jackhmmer:B1ANY3 Family This protein family is a domain of unknown function which is found in eukaryotes. Proteins in this family are typically between 217 and 277 amino acids in length. There are two completely conserved residues (S and L) that may be functionally important. 27.00 27.00 41.40 38.80 22.90 22.20 hmmbuild -o /dev/null HMM SEED 278 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.70 0.70 -5.03 11 40 2012-09-26 14:59:18 2012-09-26 15:59:18 1 5 26 0 18 42 0 241.00 47 83.03 NEW +DtRsslslsLhsh+suct.......DsD+Lhpslppp..............pcssh...........p-..ssshsspPsst.pGsSptpth.Ssch+pstShu..u.lh+uuppsLP....l+posptsSsuuuups.................cuVsh..tP..tcE.pFssl.ssls-AllsDWLt+ss+ssssh+sWsppG-s.......WlStlPspQKl....hEhGllcDE..sAhhcGlusc.lp.ssLculLSAllppYP..psLhssEs+pVFL-hLpshhScpThEYKKhLSsl+soosshQlshhLLAlpuFsLh ...............................................................RD+RGsLGoCLApVptutGu......DSDKLshuLKKR...............EusaP..........uD..sPSWhsKPsV..DGNSQuEuL.SlEM+ps.StA..uLhLHuGsslLP...al+ESlRRNsASAAo.S.................pAVuLhsAP..oE..pFApVuCussEALh.p.DWLutusRATcupRGpC.KGEs........hlScLPp+QKL.....EMG.hpD-PPsAhscGLGoE.LE.SsLHSlLSAsL+shP..-VLLs-ETKplFLDpL.............pPhFScQThEaKKML...KsTocsLQIsLuLLALpsFpLh.................... 0 8 9 11 +15344 PF15488 DUF4645 Domain of unknown function (DUF4645) Eberhardt RY re3 Jackhmmer:B1ATL7 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 200 and 298 amino acids in length. 27.00 27.00 52.20 52.10 20.00 19.60 hmmbuild -o /dev/null HMM SEED 294 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.96 0.70 -11.85 0.70 -5.25 7 21 2012-09-26 15:18:55 2012-09-26 16:18:55 1 1 18 0 11 21 0 273.50 64 98.88 NEW MAClENVLGGHAsSPhsVss-cNuspEs+..sh.LQC.SSh.+...-DsEsWG+PpVsLRPPhsVLoDLsRpQLEpPSERTGSCIPVsssRAL+pPYsPPPAlAEESLATAElNSSEGLAGhRQcGQDSI.NVSQEFSGuPPALMlGGTRVSstGTERGGNNA+hYssLPRGQGFFPPRGPQlRGPPaIPTLRSGIhMElPPGNsRMAs+c+LA+VSFP.GuPRHPhcNWPRP..lPLSSSTsGLPspooAHCFIPPp.PPSFNPFLAMPhAFAPPPlFuPPLPSYFupFPShGMPsPussN ..MACIENVLGGHAPSPhlVssDcNGNpE.+..DhPLQC.SSh.c...DDAEsWGpPpVsLRPslNVLTDLspcQLEtPSERT.GSCIPlcS...RAh+HPYGPPPAVAEESLATAEVNSS-uLAGWRQcGQDuI.NVS.EhSGuPPALhlGGT+VsNtGTERGuNNARLaVALPRGpGFFPPRGPQVRGPsHIPTLRSGIhMEVPPtNTRhAs.+t+LAHVSFPLsGPpHPhcNWPRP..lPLSSSTPGLPsCSTAHCFIPPR.PP.FNPFLsMPlsFAPPPIFGPPLPSYFupF.ShGMPAPAssN.. 0 1 1 1 +15345 PF15489 CTC1 CST, telomere maintenance, complex subunit CTC1 Coggill P pcc Pfam-B_19246 (release 26.0) Family CTC1 is one of the three components of the CST complex that assists Shelterin to protect the ends of telomeres from attack by DNA-repair mechanisms. Mutations in human CTC1 have been recognised as contributing to cerebroretinal microangiopathy. 25.80 25.80 26.50 25.80 25.30 21.30 hmmbuild -o /dev/null HMM SEED 1144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -13.30 0.70 -13.98 0.70 -7.22 13 86 2012-09-26 17:22:21 2012-09-26 18:22:21 1 5 45 0 49 95 0 701.10 36 91.14 NEW sLPLSYSFVSVQ-L+TaQ+LPCCSHLSWSSosYQAWApEAt........PsGsPLPREpLLLLGTLTDLsuDhEpEsRsGuLYVRDNTGsLsCELlDLDLSWLGHLFLFPoWSYLPPAphsSst...pGHLELhusPVPVhPLslSPsPhTPlPVLYPEpASpLLphRsKhRsspsNLAG+LVRlSALV+oppKsYFlLSLG.t.s....sAsopVslIVQlPuQLVWH+ALpPGcuYVLTpLRVoplRG.+ppRVWsTosSScLhsLcPpsVREhEl-htt..........shL-AsspshstPosSQDt......t.psLlRpS+lLsYpGTVTcVLNpsAGLYELDGQLsLCLAYQQhpuhRRVlRPGssLELpDVHLLQSlGGGTp+PVLAsCLRGuVhLpuFSppc.PtspsSaps.GAuLaEpLlWE+pLGLPLYLWAs+ALcELupKLCPHhLRHcQhLpaSuPGsPuLGLQLLAPsL-lLtsP.ssstRNsaoEILEEPH+CPLQKYopLQTPsSFPTLssLpEE..uQ..p+AWASFDP+uLLPLPEAuHLTSsQLNpRLAWSWlCLhPosFpPA..VLLGVLVASScKGsLQLRDpo..GSLPCLsLscspQ...PhhDs......plIGCLVRlE+FQLVlERpV+SsFPSWKELshssFIQK+pARVYlQFhLuDALILsVPRs....ssssso.Ppst.........................spPEGPphGQSRLFLLpHKEuLMKRNFssssusSs-sspPsLSFpVsGoWLGGTQR.KEGouWGPPEspt-...EspDQKVhLlFhGSSVRWFtFLHPGpVYRLVAsssssPtlhcttsuSslSpRsLELuussSCLTVQc-WTLEhtuup...DlsssLshs+sLs..ESSLo-LLSuN.ssSLVSFS.AEILSR...sLCpslssshhh+s........Gsusus+psVKLTVAL-sADschPPH.LDVYIE-PHhPsPLGLLPGARVaFSpLEK+VSRupNVYCCFpsoThVpVLSF....PsEoshSsPLPHIaLAELhpGspsPF.pAosSCHlVsVhSLpLhWVCAHCTSlCsQG+CoRQuPsCPoQooVSQAsI...RLLVEDGTAEAsVTCpsHpVAsALGLsPoEWsuLL-tVRsPG+VALQFpGsGA..QsESsucs--PLohFLpTL.CTSsuVLRPlhLoFcLER+Po.clhPLEPPRLQRFQCGEhPhLTRVNP+LpLoCLSI .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................shhlpa.GhlTtllp...s.ulY..hDt..plhLChsa..h.t.hthhR.Gsplpl.psHhh...t......h.h..C.hutl.l.taot...s..............................................h........................................................................................................................................................t.......hl.Gh...tt.pu.h.l.Dtp..ttl.Cl......t...s..ss.......................hhushl.phpch.llhEp...sthso..................pph.hYl.h.hscshll..s.s..................................................................................................................p.......h.hhh.thchl...............................................................................................................t..h.......sh.l..t...p..........................h.phhtsp....sphlshp.s.h.......hh......................s...t..h..thtltlphtt.t.....lplYhp.sths...hGLLPGstlhhpthp++lS+.ttslYsp..s.so.lplhuh...............s...............PhhhLh....ttt.............u.h.splsslh.lpl.WhCshCt.........slh.ts.t...........C........C.s.tsh.ps.h...phhh-.DGoupAhh.hps.t.lh.hLtls...htsl.t.h...Gpl............................................................................................................................................................................................................................................. 0 16 19 31 +15346 PF15490 Ten1_2 Telomere-capping, CST complex subunit Coggill P pcc Jackhmmer:Q86WV5 Family Ten1_2 is a family of primarily plant and vertebrate telomere-capping proteins that is evolutionarily related to the mostly fungal family of Ten1, Pfam:PF12658. 27.00 27.00 27.90 28.10 26.20 26.00 hmmbuild -o /dev/null HMM SEED 118 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.19 0.71 -10.21 0.71 -4.51 29 82 2012-10-03 20:18:03 2012-09-27 10:45:23 1 5 69 0 56 78 0 112.70 36 82.16 NEW hspsGshhhh.E.l...uusth.pGpolRshGRLppYDhtpuhusl...........ssspppltVsTph..lcshps.........plGSlY.hlGElppp...pssts..............llpARVhpsVDGlDlsLhcpAlp.pRpahpc+ ....................................h..psGhhhh..E.l........uus.h.pGtolRshG+LptYDhtpuhssl............tssppplhVsTph....l.cshph.........plGSlY.hlGELptp....pssss..............................llcARVhpsV-GhslsLhcpAlp.pRpa.pc.......... 0 19 30 42 +15347 PF15491 CTC1_2 CST, telomere maintenance, complex subunit CTC1 Coggill P pcc manual Family CTC1 is one of the three components of the CST complex that assists Shelterin to protect the ends of telomeres from attack by DNA-repair mechanisms. This family largely represents sequences from plants species. 27.00 27.00 35.20 39.70 24.10 19.90 hmmbuild -o /dev/null HMM SEED 291 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.97 0.70 -11.89 0.70 -5.40 11 18 2012-09-27 10:28:16 2012-09-27 11:28:16 1 3 14 0 13 19 0 276.80 36 26.22 NEW sslspPslLsGpLtLs.....ots.s.sspppChthoD..........uuusVCCsllch-hsslG+cIpVhuWNalPstp......ssG.....hLEllchphs-sss........hs+...........ssslcolPLt.....ssppssuKsRhslpGlLcSVSPlhslPpt.stpsu...............................ss.hGFlsplhsCtCc.hhtt.............psHpFsp.hh.........VYFsGu.uupW+PVLs+hVGphVslSGLKKKlVhlGccsSpl.haVuTpcohlthss.t.phh...hpps.h.h.GcChGpYpGlVoGlYhQGhllELDc....pVWLLlTDp.Ls....ssHuLRVGAl ......sPlspsslLsGsLsLP.....ups.s.sspppChphSD..........uuuoVCChlLcF-.cAlG+cIpVLuWNaLPshpp.....uuG.....hLEllcW+hs-sss........hsc...........suhlpolPLt.....ssppsctto+hpVhGVlcSVSsVhsVPptsttusus..............................sN.sGFlsclhsCtC+.hph.............spsHpF-tphF.........VYFssu.uupW+PVLs+hVGc.VslSGLKKKlV.ls+csShh.hhVoTccohlphssst.thh...hsps.....GcChGsYsGhVoGlYhQshllELDc....pVWLLlsDQhLs....ssHSLRVGAh.................. 0 4 7 11 +15348 PF15492 Nbas_N Neuroblastoma-amplified sequence, N terminal Coggill P pcc Jackhmmer:A2RRP1 Family Nbas_N is an N-terminal family of metazoan sequences. This domain lies at the N-terminal of several WD40-containing proteins. The human protein is over-expressed in neuroblastoma cells [1]. 27.00 27.00 27.00 29.90 26.90 26.70 hmmbuild -o /dev/null HMM SEED 282 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.14 0.70 -11.78 0.70 -5.30 4 74 2012-10-05 17:30:43 2012-09-27 13:19:37 1 8 57 0 54 74 0 243.70 51 15.14 NEW WHLVLASNGKLLAsVQDQCVEIRSA+DDFGSlIGKCQVPKDPNPQWRRVAWSHDCTLLAYA-STGTVRVFDLhGSELFVIsPusSFPGDhSYAlAGLlFLEYpuSAQWSAELLVIsYpGpLKSYLV..SVGTNQuFQEsHoFSFSSHYsHGIToAIYHPuHRLLLVGGCEou..-sGhSKASSCGloAWRVLSGSPaYKQVTShEDDlus.sp+pGhh+h.Sh+haSRpupEpDGVF+MSLSPDGTlLAsIHFSG+LSlWsIPSLKQpGpWtQsEQPGaD-INPEW ...............................................................................W+llLu..ssGKLLAslQDpslEIRSA+D-FsSllGKC.Q.V.PKDPpPQWRRlAW...S...DCTLLAYA-SoGTV+lF..DLhG.oc.LF.lIsP...u...s....o....h...s....G.....D........l....ShAIAGLlFLEY.+.u.S...A...QWSAELLVIsYcGpL+SYLV..SluT.NQuaQEsHs.FSFuuHYspGIsoAlYHP.uH.R...LLL.....VGGCEss......-..suh.S+...AuusGLoAWRlLSGSPYYKQ.Vo.sttDplsssspphulh.+..hh.s...........hphao.+p...spcpDGlF+MSLSPDGplLAsIHFSGcLSlWslPSL+.ptpWp.sEQPGaschsP....................... 0 18 22 35 +15349 PF15493 YrpD Domain of unknown function, YrpD Coggill P pcc JCSG:Target-418961/SP17457A Family This family of proteins is found in bacteria. Proteins in this family are typically between 236 and 351 amino acids in length. The member from Bacillus subtilis, UniProtKB:O05411, is named YrpD. 26.40 25.50 28.00 26.20 25.90 23.80 hmmbuild -o /dev/null HMM SEED 208 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.58 0.70 -11.55 0.70 -4.69 8 35 2012-09-27 13:15:57 2012-09-27 14:15:57 1 1 27 0 9 35 0 192.80 53 73.28 NEW AstlscGIGGRshlNSo....GuhlsTKlpLPosspls.....ssGssYIYoGF..oG...........ssEuDhGLpYS.......ssYsV..WKP..hhKlGucsppsstYlp.....GhschTYpNGF+PGosVQLTlYKNhNGsT.....RholhGT..........ssDsYsspIIpElssoNluSlopWKhLATlAsossstpp.......hpupFoNIslDupuhTPV......lDspDaApVTl.SGNoVolsV ..........ASQLscGIGGRAYLNST....GulhsTKIpLPoohpl.......SsuTsYIYSGF..oG.........GTEADIGLQYS.......cpYNV..WKP..lMKVGSKspsp..YlE.....GtspFTYsKGFRPGSTVQhTIYKNLNGNT.....RhThWGT..........NNDGYTGRIIoElptTNlGoISKWKsLATlAsotpsQp.h....usFSTuFsNITIDNKAlTPV......lDTQDFAKloV.SGNsVohSV........................................................ 0 2 3 3 +15350 PF15494 SRCR_2 Scavenger receptor cysteine-rich domain Coggill P pcc Rawlings ND Domain SRCR_2 is a scavenger receptor cysteine-rich domain family found largely on vertebrate sequences up-stream of the trypsin-like transmembrane serine protease, Spinesin. 27.00 27.00 27.00 27.00 26.90 26.90 hmmbuild -o /dev/null HMM SEED 98 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.75 0.72 -10.44 0.72 -3.75 47 411 2012-10-03 20:35:02 2012-09-27 15:58:16 1 29 62 0 195 605 1 92.80 28 15.67 NEW utshlLpVhsstpssWhsVCp-sWssshGptsCppLGa.....p.sappuhslsshpht.uppFhpL.......ssshs..ltpthp....pssCsSuplVSL+Ch.-CGhps .................p.lLpVhsstpss.WhsVC.tD.sWspshu.ptsCpplGat.....p.s..tp...p.s.l....t.l.....s.s.....h.p.t...............p.ahpl..............ssp.s.s...l.phh.........pp...sC...sStp..l.VSLp.C.....t..CGhp.s........................................................... 0 14 28 87 +15351 PF15495 Fimbrillin_C Major fimbrial subunit protein type IV, Fimbrillin, C-terminal Coggill P pcc JCSG:Target-417041/SP13489F Domain Fimbrillin_C is a C-terminal family of major fimbrial subunit protein type IV proteins largely from Bacillus species. The family is associated with family P_gingi_FimA, Pfam:PF06321. 27.00 27.00 27.10 27.10 26.90 26.40 hmmbuild -o /dev/null HMM SEED 85 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.97 0.72 -10.01 0.72 -3.83 35 193 2012-09-27 15:57:20 2012-09-27 16:57:20 1 3 69 0 26 187 4 85.60 27 16.60 NEW lphYpsGls.YYphhI+H..Dstssssh.................thuca.GVVRNshYslslsslps.GpP................sssssspss-cscsaLslcIpV.PWshhsQsh ............................hY.puhs.YYphhlcH..sstsssth.................thhca.ulVRNNhYplslsslps.GpP..............................tssssssss-ppp.s.alslplplhPWshhpps......................... 0 7 23 26 +15352 PF15496 DUF4646 Domain of unknown function (DUF4646) Coggill P pcc Pfam-B_61885 (release 26.0) Family This is a family of proteins largely from fungi. The function is not known. 23.10 23.10 23.10 23.50 22.90 22.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.77 0.71 -4.06 20 64 2012-09-28 09:46:54 2012-09-28 10:46:54 1 1 49 0 50 68 0 125.70 22 31.09 NEW PsuFuRsPssslsYs..sF.pPhplhuh..usp....LssGFshl........PpsL.ssHDVsppDWtRFlcDlptAuhhospp..hhuuhhPlh.hls.........lsuhhhstthccthcs+c....sshlsclI-hWNpsFFpPRt ...........................................h..tshpl.oh..ssp......lssGFsh.........sphL.hppcVs.p-WppFhc.......-lstAspLostp.....phstss.uhs.shhlu....................hG...hsuhhsu+..shcptt.tcp....................puplpshlcpWNpshFpsRt............ 0 19 27 41 +15353 PF15497 SNAPc19 snRNA-activating protein complex subunit 19, SNAPc subunit 19 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O75971 Family SNAPc19 is a family of proteins found in eukaryotes. It is one of the five core components of the snRNA-activating protein complex or SNAPc that helps direct the nucleation of RNA polymerases II and III. The core RNA polymerase II snRNA promoters consist of a single essential element, the proximal sequence element (PSE), whereas the core RNA polymerase III snRNA promoters consist of both a PSE and a TATA box. The SNAPc binds to the PSE of both of these. SNAPc recognises the PSE sequence common to all human snRNA genes, irrespective of polymerase specificity. SNAPc is also known as the PSE transcription factor (PTF) or PSE-binding protein (PBP). The human SNAP19 and SNAP45 subunits are dispensable for transcription in vitro and are not as widely conserved as the other three, SNAP190, SNAP43 and SNAP50, suggesting that these vertebrate-specific SNAPc subunits may have adapted specialised regulatory roles for snRNA gene transcription [3]. 27.00 27.00 29.10 29.10 26.20 26.90 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -10.22 0.72 -3.78 22 69 2012-09-28 13:02:45 2012-09-28 14:02:45 1 1 56 0 43 67 0 82.80 46 84.02 NEW EL+pEEcpLL+lhsslp-QLN+LKVEELtL+Shlsspp..tps.sttss.............sppp.shhh...plDsp....spINpppLp.Lshtssh...........pcptEEEc-sD ....EL+KEEEsLLRl+ssLpDQLNRLKVEELtLpShlsupp..sph.ss.ss.............sppphch.h...tl-sp.........spINQppLc.Lshtsph............cp.EEE--p................... 0 7 10 24 +15354 PF15498 Dendrin Nephrin and CD2AP-binding protein, Dendrin Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O94850 Family Dendrin is a family of eukaryotic proteins found in the podocytes of the kidneys. Dendrin, originally identified in telencephalic dendrites, is a constituent of the slit diaphragm, SD, complex of podocytes, where it directly binds to nephrin and CD2AP. Kidney podocytes and their slit diaphragms (SDs) form the final barrier to urinary protein loss. SD proteins also participate in intracellular signalling pathways. Dendrin appears to prevent programmed cell death (apoptosis) through its binding to nephrin. The SD protein nephrin serves as a component of a signalling complex that directly links podocyte junctional integrity to actin cytoskeletal dynamics. Thus, dendrin is identified as an SD family with proapoptotic signalling properties that accumulates in the podocyte nucleus in response to glomerular injury. 27.00 27.00 109.60 47.40 20.10 19.50 hmmbuild -o /dev/null HMM SEED 657 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.51 0.70 -13.28 0.70 -6.02 4 28 2012-09-28 13:42:25 2012-09-28 14:42:25 1 2 22 0 17 28 0 508.80 67 94.99 NEW MDhQASaWApGPQSRTCt.RPGSPEPPPRRPWASRVLQEATNWtsG.PsElRAREQEKRKAASQEREAKETERKRRKAGGsR..RSPLGp......sR.EPRNu.RsAQPsGhssPsRPERhG.sGRsPRPuAQPQuDPG.AAWAGsWuGRRPGPPSYEAHLLLRGAAGTAPRRRWDRPPPYVAPPSYEGPHRTLGoKRGPEhspuPssuAPAPT.suRTEGGRsKKRLDPRIYRDVLGAWGLRQGRGLLGGuPGCuuAtsR.EhuKGuuEKSsGlssAGLNSuuDu+.QucssuuPGsptA.AtuAsuosusPRssPRsR.pL+GScEGKEupEQ.WLPcCWlPSPK+QPsRHSQTLPRPWAPGGTGW+ES.GpR-G..suPch.EsWKtTRRAHTLPRSuRGsAttEGVFVIDATCVVIRSQYVPTPRTQpVQLLPSGtsRssGDu.u.PpPu.cEEGEtAuAhsSsCQKLL.SSRlhcQ.StG....htpEAEuGpstDSSL.cERuS+lLGhPlGEVsltsAP....uQPGSPE+sA.GsAAsssAuss+GSE.ssusPRRAGsGWARTPGPYAGALREAVSRIRRHTAPDSDSDEA.tELSV+SGSSDGSDTEAsGASWpsEpo.PthusspPt-GGKTAELpDSIREILDVISQsEEuLFtscDoptsPQGsRc ...................MDhQASaWARG.QsRTCtPRPGSPpPPPRRPWASRVLQEATNWRuG..AEsRAREQEKRKAASQEREAKETERKRRKAGGAR..RSP.Gp......PR.EPRNA.RsAQ.sGhsus.RPERhu.sGRsPRP...sA.PQusPG.uAWAGPWGGRRPGPPSYEAHLLLRGuAGTAPRR.RWDRPPPYVAPPSYEGPHRTLGTKRGPt.SQsPsSSAPAPs.sARTEGGpsKKRLDPRIYRDVLGAWGLRQGpGLLGGSPGCusu+sR.EsuKGssEKS.GLAAAsLsSGucuHsQA+.ssGusuochsPAGuAsussssPRPsPRSR.HLcGSpEGKEG.pEphWhPKCWlPSsK....KQPsRHSQTLPRPWAPGGTGW+ESLGhtEG..sGPEsLEsWKsTRRAHTLPRSSpG......sutGEGVFVIDATCVVIRSQYVPTPRTQpVQLLPSGssRlVGDuPoQsKPs.KEEGEGAsshPSsCQKhL.SSRl.HQPutG....+shEAEGGcsuDSSL.EERssRILGLPssEVNLpDAP....oQPGSPEHpALGPAAsustutscGSE.ssss.RRuGtGWARTPGPYAGALREAVSRIRRHTAPDSDoDEA.tELSVHSGSSDGSDTEAsGASWRsERT.P.hus..spPpEsGKTAELSDSltEILDVlSpTEE.sLFtscD.+tT.QGsRc...................... 0 1 1 3 +15355 PF15499 Peptidase_C98 Ubiquitin-specific peptidase-like, SUMO isopeptidase Rawlings N, Coggill P pcc Jackhmmer:Q5W0Q7 Family Peptidase_C98 is a small family of SUMO - small ubiquitin-related modifier - isopeptidases found in eukaryotes. Reversible attachment of SUMO is an essential protein modification in all eukaryotic cells, The family neither binds nor cleaves ubiquitin, but is a potent SUMO isopeptidase, and the invariant residues required for SUMO binding and cleavage, in UniProtKB:Q5W0Q7, are Cys-236, His-456 and Asp-472, all of which are fully conserved in the family. Member proteins are low-abundance proteins that colocalise with coilin in Cajal bodies. Peptidase_C98 depletion does not affect global sumoylation, but causes striking coilin mis-localisation and impairs cell proliferation, functions that are not dependent on the catalytic activity. Thus, Peptidase_C98 represents a third type of SUMO protease, with essential functions in Cajal body biology. 27.00 27.00 27.60 30.00 24.90 21.20 hmmbuild -o /dev/null HMM SEED 275 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.99 0.70 -5.11 14 62 2012-10-10 12:56:15 2012-09-28 15:54:51 1 5 41 0 33 64 0 238.60 57 27.06 NEW hCLQW+NspuLCWLDCILoALVHhEsLKcsl.sphssc.........pcSlht+Lhp+YsQAstLLpssppst.hp.s.t.s..............hhphss-lLscsEo.pLN-lRppIFtpLQPpL+CcL.GchESPVFAhPLLL+h-splEcLFhaSFSWpFECspCGapYQsRshKoLsTFTNllPEWHPLNAsHhuPCNsCpsKSQ+R+MVLE+lsslFMLHFVEGLP+NDLppYSFpFEGshYQlooVIQYpsp.cHFlTWlhsuDGo.WLECDDLKGP.hCp+Hp+hEVPASEIHIVIWE .............h.slQW+Ns.uLCWLDClLoALVH.csL+psl.sthssp.........pcSlhhpLhpcYspAstLL.spphsthp.s.tp...............hhphssplhsch-s.pLsclRtplF.pLQPpL+C.pL..GchESPVFAhPLLL+h-shhEpLFhhSFoWpF-CspCGapYpsRphKoLsTFTNllP-WHPLNAsHhuPCNsCssKSQhR+MVLE+VSslFMLHFVEGLPpN.DLp..cYuFpFEGshYQlToVIQYpsp..pHFITWlLsuDGo.WLECDDLKGP.pscRHc+FEVPASEIHIVIWE............... 0 5 7 16 +15357 PF15500 Toxin_39 Putative RNase-like toxin Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and conserved cysteine, 25.00 25.00 148.20 146.80 21.50 21.40 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.25 0.72 -3.86 3 3 2012-10-01 01:44:25 2012-10-01 02:44:25 1 2 2 0 3 3 0 100.30 33 10.51 NEW tE+cssAc+KPADGGHDVVVTP-GVG+CSPPPCPVI+VEYKKELuAcP-LKpWNEoVQuhRKsDPchAAD.............EAAKLIpALEsARsNGG+APsEchV+ tE+cssAc+KPADGGHDVVVTP-GVG+CSPPPCPVI+VEYKKELuAcP-LKpWNEoVQuhRKsDPchAAD.............EAAKLIpALEsARsNGG+APsEchV+ 0 1 1 3 +15358 PF15501 MDM1 Nuclear protein MDM1 Eberhardt RY, Coggill P, Hetherington K re3 Jackhmmer:Q8TC05 Family This family of proteins is present in the nucleus [1]. The function of MDM1 is not known. 27.00 27.00 27.30 27.30 23.30 26.60 hmmbuild -o /dev/null HMM SEED 577 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.29 0.70 -13.10 0.70 -5.57 20 126 2012-10-01 08:54:23 2012-10-01 09:54:23 1 4 65 0 65 137 0 377.30 29 69.79 NEW SEYpRsFpW+cshhscphsss.t...........pKssaAGL+S....Dph..GIs+EPshhuKRRVPhpcs.plscphcWptsss.p.ssss..csps..sspsppsppptshsptph..s.-ssphs.+spuhussSt.c...s............stpp.thssstts.spps.h..ssp...spphssths+hLpt+AGls.sh.tpphh+s.SEYQpQFsWKssp.ctuPhhsAp.pshtsps.t.hs..saps.sph.tcoEYpppFps.t.sc..p.cpphtt..............ppshcths.pp..pspKts-.hph.csctts.hstsp.c+p.p.h.hp..ph............t+hsoEY+u+FlSPupYhYccGuWs+s................................+pssssp.......st...lsshWYtEVpELREKActYRpRs.pGTHFSR-HLs.QlLocssslWDsSSsoSS-uslSsslpshcLus.............t.cp.p................................h.st...cpp.shccpspcssTtclshustsssPs+..R+LsWtpscss.pcctpppsptptp.........pppppttttpp.hpp.pc.c.pp..................psptsh.stuossuS.lS.........................uscsGRLPTPcL+...ph.sGspRoHaDhTTPAsG.GAVLlSPoKh+ ...............................................................................SEYppsahW+p....pt..st.............t....su.h..................u.spEP.h.p+++sP.hs............t.p...........st........t..tt...................................................................................t........ptt.....s..........t..t.p......ss...........h..oEYp....pa...................hhsts..................sthptt.........................................t.hs..t.....p.tc...........php........t...p...t.....................tph..p.t.p.....p.t.a.hptttahh........................................................................lhELRc+At.Y+......pRs..Gschs.c.....lhpcppphW-.sSppSS.t.......slS.s.p...hts.........................................................................................................sh.....t...pcp..psst.pht.st..s.Ps+..c+Lh.s.tptt..p...pp.t.................tt...t.......p.t...........................st.p..tts.tss...t.........................sttstRhspPphh...th....pRh+ts.h.ss.t..t.hss........................................................................ 0 20 23 44 +15359 PF15502 MPLKIP M-phase-specific PLK1-interacting protein Eberhardt RY, Coggill P, re3 Jackhmmer:Q8TAP9 Family \N 27.00 27.00 27.00 27.00 26.90 26.40 hmmbuild -o /dev/null HMM SEED 151 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.74 0.71 -3.51 24 90 2012-10-01 09:08:51 2012-10-01 10:08:51 1 2 77 0 63 81 0 150.70 23 75.75 NEW P.hs.......t.s....ssph..tt..........hu..tt.......t......s.P...p...........tP..........tt.......................shpptptht.sttp+pstpss...........ps.hG.......pt.t.pt....s.ppYa+PSMLEDPWtpLh.hp.sthspphsssptss ............................................................................................................s.tsssst......................t...t.........p....hG..tst.....s................ts.P...................tttPt.......p.stsPss.........................................................spppthu..hssu...tpppt.pss............sths.ssh.G....phpsRppphsp...........sl-pYa+.P.SMLEDPWtsLpshp.hths.p.ppp....ht............. 0 15 25 44 +15360 PF15503 PPP1R35_C PPP1R35; Protein phosphatase 1 regulatory subunit 35 C-terminus Eberhardt RY, Coggill P, re3 Jackhmmer:Q8TAP8 Family This is the C-terminus of protein phosphatase 1 regulatory subunit 35. This protein interacts with and inhibits the serine/threonine-protein phosphatase PPP1CA [1]. 27.00 27.00 30.40 29.50 26.00 24.90 hmmbuild -o /dev/null HMM SEED 147 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.68 0.71 -4.68 21 62 2012-10-01 08:54:13 2012-10-01 10:52:41 1 1 54 0 42 66 0 138.20 29 58.61 NEW PphpoolsLppELp.....httt.........chsut+ulpcpLppo...ps+stlp...spsscslNhspsppla+sLVslsVscppll...pptlpc+........pt.s..pchstspPsltpFhcs..p.hhhpssphtscths..phphpspPs.cshhshac+hp ...........................................................Pthpoolslt.Elp....thttt.........phsu.+slpcpLcpo...ps+sulp.....tplscsLNhstscplF+sLVsLpVs-ppll...sttlp-+.thh..t.ptss..pcsssstPchs.hhcs..pphhhpssphhhpsls..plphp.pPs.tshF.ha+ph...................................................... 0 12 16 25 +15361 PF15504 DUF4647 Domain of unknown function (DUF4647) Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAL5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 282 and 480 amino acids in length. 27.00 27.00 30.50 29.90 20.30 20.30 hmmbuild -o /dev/null HMM SEED 457 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.92 0.70 -12.60 0.70 -5.63 13 45 2012-10-01 10:03:11 2012-10-01 11:03:11 1 2 27 0 21 50 0 351.10 52 88.73 NEW MDLPDESQWDETTCsLAlCQHPQCWAolRRIERGHPRILuSsCKTPl-sEDKLPsLTlVNIoDSCFtA+R.lspppLstFTFTKs+SLLSpuSKFcSKFQG.Rs.KuLPDK-LIsposRsPKLSVLNLNETpLPsspDVRNMVVlWIP.......EEsEpp...........sSQsGKK+RKKSss.......Ks+s.lhhsG+Qpscoph..psPuhlVPPPSPV+hhEQLSoEulPhWsQhDhLPQDLLK-LLsscGKohPssEMKhQLAMMKKshPLEKsRPDSAISSKMFLSlHRLTLQRPuLRYPEHLKKLa.NLKoE...uh+KQQthQQpp.......VKTPsKKQEAKKKuKuDsGSQsTS+KpsusssaDPhh..spRTL.sppS-hK.QQpthpptGsshppsSTcRsph-hu-pshs.h.....ppcsPELschEsopKDlssphEslLEu.t.....sscsLSsShuphuWNPELKLLRILQATD-EDEE .......................................................................................McLPDESQWDETTCshAVCQHPQCWAolRRIERGH......P.......RILuSssKT.Ph-sEDKLPsLTlVNIs..DSCFts++......LsthTFoKs+SLLS.tSKh.SKFpG.Rs.KsLsDKslhspspR.PKLsVLNLNETpLPsspDV+NMVVlWIP.......EE.Ehp...........spQptKc+RKp.sscp+..h.....hstpQ.s.t....tsPuhhVPPPoPVph.EQhss-.lPhWsQ.-hLPQDLLc-LLssttpoh.s.EhKh.pLAhMKKshPLE+sRPDSAIS...oKMFLolHRLTLpRPuLRYP-+LKKLa.NLKpE......uhpcppth....Q....p..pp.........sKTs.++QEsK+KuKu-st.Qsspccpssshhhs.hh..t.csL.tppsphc.pp..htp.u.t.ppsppcpsh.ph.ch...........hpssch..hEsspcDlss.h-hh.cs....ptp.shshot.hsphuWNsELKLLRILQsTD-E-EE.................................... 0 3 3 4 +15362 PF15505 DUF4648 Domain of unknown function (DUF4648) Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAG6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 115 and 207 amino acids in length. 29.00 29.00 95.90 95.60 28.60 28.50 hmmbuild -o /dev/null HMM SEED 170 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.27 0.71 -11.27 0.71 -4.05 8 38 2012-10-01 10:06:50 2012-10-01 11:06:50 1 1 27 0 20 35 0 148.90 73 78.47 NEW VVI-SDLY.spRPLELLPHRoDRhp..ssEuc..RhGRLQsupQ.Gs.sAKTsARPVGISEPKsopLCGNRAYGKSLlPPs....uRIoVKAPs..shEssAsGo-psAlLsRGSRHLKKMsEEaPsLPQGAEASLPLTGSuSCGsPSILRKMWh+HKKKSEYVGATNSAFEAD ...VVIESDLY.sppPLELLPHRuDRRD..sGDuR..RFGRLQsARPPssHPAKssARPVGISEPKoSNLCGNRAYGKSLIPPV....sRISVKsss..slEAsAhGoEpGAVLhRGSRHLKKMTEEYPsLPQGAEASLPLTGSASCGVPuILRKMWTRHKKKSEYVGATNSAFEAD... 0 1 1 3 +15363 PF15506 OCC1 OCC1 family Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAD7 Family The human member of this family, overexpressed in colon carcinoma 1 protein (Swiss:Q8TAD7) has been shown to be overexpressed in several colon carcinomas [1]. 27.00 27.00 44.80 44.60 25.30 24.90 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.68 0.72 -8.98 0.72 -4.09 3 14 2012-10-01 10:37:31 2012-10-01 11:37:31 1 1 11 0 8 20 0 57.00 77 93.44 NEW MGCGNSTAuSAGsGpGPAGoAKDVsEESVoDDDKRRNYGGVYVGLPSDAsNMVSGQTKos+K MGCGNSTATSAGAGpGPAGAAKDVTEESlTEDDKRRNYGGVYVGLPSEAVNMVSSQTKTVpK... 0 2 3 4 +15364 PF15507 DUF4649 Domain of unknown function (DUF4649) Coggill P pcc Pfam-B_83 (release 26.0) Family This family of Firmicute sequences has members that are annotated as ribose-phosphate pyrophosphokinase; however there is no evidence for this attribution. Member proteins are all shorter than 100 residues in length. 27.00 27.00 27.00 28.10 26.90 26.90 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.88 0.72 -9.34 0.72 -4.21 14 556 2012-10-01 12:54:17 2012-10-01 13:54:17 1 1 311 0 28 149 0 67.10 50 97.61 NEW IElTYLDAhKpERploFEsYpEF.pu.puC.lslsD.asVpKlsYpG+cLsYpGsYGDlahahhc.DLopYc ..............ElTYhDu.+sERTlpaEsaE-h.ho.puC.lslsDhh.VppLTa+G+plsYpGhhGDlahaL.+tDh.......... 0 4 6 14 +15366 PF15508 NAAA-beta beta subunit of N-acylethanolamine-hydrolyzing acid amidase De Vivo M, Coggill P pcc Jackhmmer:Q13510 Family NAAA-beta is a family of vertebral sequences that form the beta subunit of vertebral N-acylethanolamine-hydrolyzing acid amidase, a member of the choloylglycine hydrolase acid ceramidase family. The alpha subunit is represented by family CBAH, Pfam:PF02275. 27.00 27.00 27.00 27.20 26.90 26.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.10 0.72 -10.00 0.72 -3.64 85 283 2012-10-01 15:26:22 2012-10-01 16:26:22 1 8 132 0 173 266 14 88.30 28 24.28 NEW sslP.ha.slsLDhPPpcRWpplhpsap..splpt.lhp.hlcpllss..hhst...hlhphlsphh.sthh..phl..sps....ascEl+GIAcss...ulsLG-llhhNlhYEl ........................s..sPhaslsLDhsP.cRWtplhppht..stlp...lhp.s.hpphlss.....hhPp...hl.hphlsphl.stlh.....pth...P..pP....ap...-El+GIAsss...slsLu-llhhNlhYEh....... 0 62 81 126 +15367 PF15509 DUF4650 Domain of unknown function (DUF4650) Coggill P pcc Pfam-B_31507 (release 26.0) Family This family of vertebrate proteins lies to the C-terminus of Ubiquitin-specific peptidase-like protein family peptidase_C98, Pfam:PF15499. It might be acting as the exosite for the peptidase. 27.00 27.00 27.20 28.70 22.60 23.10 hmmbuild -o /dev/null HMM SEED 520 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -12.00 0.70 -12.66 0.70 -5.58 16 46 2012-10-02 09:43:10 2012-10-02 10:43:10 1 3 31 0 24 45 0 485.10 45 49.01 NEW plLSGscsLVDssllsLTLpEh........QVsSEuh..LEspPVt......EssslscssoLQsQ-...ShhuS.louPscEKlspsphlshSF.oQslssshpssQ.Nstsossstslss.spsss.LhQtlKslch............E......+ssphcphLsscsE...KLpscpsspSQsS.NLKc+pssA.pSpsssupSsQNp...shK-sQKKsFVGSWVKGLLS+GsSFMPsCVSApsRst.........lTDLpsoVKG..AsNFsGFKTKGl.pQ+up+sS+Ksp+ssspssssupssPt..tsssoustsp.ssssAsstlh++p-ssStssphs+sopssEsslSSus+s-usEuQlHKLRLKLLKKLKAKKKKLAuLhSSsps..............GphsS-shEplSps.......GSPNDCESl-DLLpELQaQIDlADsKSusTTssssS.hSuQoHEEILAELLSP.oTlsSoEhScssEsDhRYLEM..GDsphsuPs.PoEhsslspsshLpQDHNYCSPsK+sQsEVQsDSLsNsuCl+TLNLESPhKTDIFDEFFSTSALNSlANDsLDLPHFDEYLFE .............................................lLSGscsLVDssIlsLTLEEs........plsSEuh..LEs+PVu......ENssllcssohpsQE...ShhuS.lSuPsc-KlhpsthlDhSFsSpslssshp.sQlNstsossspslss.spsos..LlQslKslch............E......+DsphcphLosKsE...pLcPcp.lsSQsS.NL++ppssA.pSQossucShQNp....shKEsQKKsFVGSWVKGLLS+GuSFMPsCVSApsRss............lTDLQsSVKG..AsNFGGFKTKGl.pQ+up+sS+Ksp+ssscPsshupsPst..usssssuhs..spspAsstshcKstssS..t.............suphsHsopsp.sulSSus+t-.sEuQlHKLRLKLLKKLKAKKKKLAuLhSSspp..............tsssS-slEplsps.......GSsNDCESl-DLLpELQYpIDhADscSusTosssso.hSoQoHEEILAELLSP..oTssSoEhScssEsDh+YLEM..GDsphssPs.PoEhsslspsspLpQDHNYCSPsK+s.pEVpsDSlhssuCl+TLNLESPhKTDIFD-FFSoSALNolAsDs.DLPHFDEhLFE............................ 0 1 2 9 +15368 PF15510 CENP-W Centromere kinetochore component W Coggill P pcc Pfam-B_49340 (release 26.0) Family CENP-W is a family of vertebral kinetochore proteins that associates directly with CENP-T. CENP-W members are histone-fold proteins. The histone fold region is critical for binding to centromeric DNA. Importantly, the CENP-T-W complex does not directly associate with CENP-A, but with histone H3 in the centromere region. CENP-T and -W form a hetero-tetramer with CENP-S and -X and bind to a ~100 bp region of nucleosome-free DNA forming a nucleosome-like structure. The DNA-CENP-T-W-S-X complex is likely to be associated with histone H3-containing nucleosomes rather than with CENP-nucleosomes. 25.00 25.00 38.10 38.10 24.80 24.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.47 0.72 -10.42 0.72 -4.02 6 38 2012-10-10 12:36:46 2012-10-02 12:47:25 1 1 26 0 16 43 0 87.60 64 99.88 NEW MAlSTTVSQRK.IKRKAPRGFLKRVFKR+KPHLRLE+ssDLL...............VHLNCLLFV+RLAEESRTNACEsKCGVIKKDHVLAAAKVILKKSRG ..........................MA.oshVsp+K.IKRKAPRGFLKRlhK+pKP+LRLppsuDLL.......................VHLNCLLFlHRLAEEoRTNAsEsKCtlIKK-HVlAAAKVILKKSRG.. 0 1 1 2 +15369 PF15511 CENP-T Centromere kinetochore component CENP-T Coggill P pcc Pfam-B_9162 (release 26.0) Family CENP-T is a family of vertebral kinetochore proteins that associates directly with CENP-W. The N-terminus of CENP-T proteins interacts directly with the Ndc80 complex in the outer kinetochore. Importantly, the CENP-T-W complex does not directly associate with CENP-A, but with histone H3 in the centromere region. CENP-T and -W form a hetero-tetramer with CENP-S and -X and bind to a ~100 bp region of nucleosome-free DNA forming a nucleosome-like structure. The DNA-CENP-T-W-S-X complex is likely to be associated with histone H3-containing nucleosomes rather than with CENP-nucleosomes. 30.00 30.00 30.80 30.70 29.90 29.50 hmmbuild -o /dev/null HMM SEED 414 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.69 0.70 -12.63 0.70 -5.23 40 140 2012-10-10 12:36:46 2012-10-02 17:33:53 1 3 109 5 91 148 1 349.70 26 78.83 NEW ssTP+uhtu...hphh.t+Rsuh.TPt..psRR+Sspt.............................p+-TPcslL+sLu+sLA.sSpslssSssps......................................ss..ppthp.scp-cs-sp.plptPRLSlPlst..............tcccp.............................-ssc.tPP+hShL.sts............Thp...SlEhsRRsho....p.tshhpchs......ussphSc.....h.s.spl.....ttspsh....sss.....hpssssppshsps.tstsphsppsss....................................................sh..tht........--.tphshths.ssssusstsphtpsps.....s-hptt.sstssstsp.s.t.s..tssssssstpsssptsss..........................stt..ph.hpssssssssspsstsh...sh.......................ppp+lS++.GlshPshPsssVK+lAppau+h.uthuKs+ls+-uLpAlppuo-hFFEQlu-DLpAYAcHA.GRKTI-EoD .................................................................................................................................................................................................................................................TPht.tu......pRshh.Ts.......psRRhS.p............................pcpTshslL+plhhshs.po..h.pSs.......................................................t.cp.ptp....hPcLp.shs.................tcp.........................................ps....sschS.h.p...s...............ohp..p.thsR...Rs.s......tshhpchp.........ss...t.........ss.s.h......ttppsh...shh.....h.p.p..ps.sp...ptst.st..tsts..............................................................................................................................c-...hsh..th.ssts...s....tptt......tp.t.t..s..shts..p........t..ststtsstpsp..t.......................................................................................psssssttt..h.tst................................................htp+hs+......t...thshPphstssh.Kphhppas+..shhuKh.ls+cslthlpps.-haFcpls-DLpsaApHA.GRKTlc.pD.................... 0 15 33 59 +15370 PF15512 CAF-1_p60_C Chromatin assembly factor complex 1 subunit p60, C-terminal Coggill P pcc Pfam-B_74766 (release 26.0) Domain CAF-1_p60_C is a family of vertebral proteins that is involved in chromatin assembly. CAF-1_p60 is one of the three subunits of the CAF-1 complex, and this domain binds to the C-terminal region of CAF-1_p150, family Pfam:PF12253. The N-terminal part of the CAF-1_p60 proteins is a WD-repeat structure, Pfam:PF00400. 27.00 27.00 29.40 28.50 19.10 20.80 hmmbuild -o /dev/null HMM SEED 179 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.66 0.71 -11.32 0.71 -3.84 16 48 2012-10-03 13:48:41 2012-10-03 14:48:41 1 6 35 0 27 42 0 156.60 48 29.99 NEW GIPLKEKPVLuV.RTP-TA.KKsK.uQsppsSSPGPR.sEGTPoSRspDPSSPssTPspu+.uPAssu.KDsPuossus+ushsssSEEK.s.QP.ouQNsKup.PRRVTLNTLQAWSK.TTP..RRINLhPLKTDoPssusPssls.SoPSoEclQ......s...EsPuDPpssPPE.KRPRLsEpptuspu.- ........GlPLKEKPVLsh.RT.Ps.TA.KKsK..uQopps.SSP.GPR.sEGTPsu.RspDPSSPssTPspup.uPAsss.h+DsPshssus+us.ss..sSEEK.s.QP.uuQNsKut.sRRVTLNTLQAWSK.TTP..RR.IsL....hPLKT-oPssusssss..osPusEphp......spsPsDs.spPPE.KRPRhpEp.tsspu............................ 0 1 4 11 +15372 PF15513 DUF4651 Domain of unknown function (DUF4651) Godzik A adam JCSG target SP18156A Family family of short, secreted proteins specific to the Streptococcus genus, with distant homologs, not recognized by this HMM, found in other cocci. In all sequenced genomes, proteins from this family appear in a conserved genomic context with an thioredoxin, tRNA synthase and tRNA binding protein, but the functional implication of this is unclear 22.00 22.00 22.10 61.00 21.10 18.40 hmmbuild -o /dev/null HMM SEED 62 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.46 0.72 -8.81 0.72 -4.52 20 335 2012-10-09 15:12:14 2012-10-04 05:16:11 1 1 332 0 21 126 0 61.70 63 63.01 NEW -+c+EclhpplRpaFSshGpIuVLYlssaESspcplsGGlVhEDGRpapFsYcpGplsYEEE ..M.KTQEELTpIVRDaFSDMGEIATLYVQVYESSLEoLVGGVIFEDGRHYTFVYENEDLVYEEE. 0 1 5 12 +15373 PF15514 ThaI Restriction endonuclease ThaI Eberhardt RY re3 PDB:3ndh Domain This family of restriction endonucleases belongs to the PD-(D/E)XK superfamily. It cuts the recognition site CG^CG leaving blunt ends [1]. 25.00 25.00 42.40 42.40 21.30 19.90 hmmbuild -o /dev/null HMM SEED 202 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.59 0.71 -11.37 0.71 -5.03 2 3 2012-10-11 20:44:47 2012-10-04 10:36:20 1 1 3 2 2 4 1 154.70 49 96.47 NEW LFpDc.IIsKlpp+LPYhFQLAELESSRAGKlGMpVGShREcIlSuLLIYKFGEpNVETslPITEPElDVKLFGs.ISIKTIoG+EPsGVKLIWTVDupKA+.FLETW+P+aDhILsHINWuS.GulYYIPs.VQpplFDplG+-KYIKLPKQGTNPRGVElSN-Ah+.lhss-cTMSIpIEWKKT.lpYssFKRWV-LWu- ...LFpDc.IIsKlpp+LPYhFQLAELESSRAGKlGMpVGShREcIlSuLLIYKFGEpNVETslPITEPElDVKLFGs.ISIKTIoG+EPsGVKLIWTVDupKA+.FLETW+P+aDhILsHINWuS.GulYYIPs.VQpplFDplG+-KYIKLPKQGTNPRGVEISN-AL+Ellsc--TMSIsIEWKKT.NlcYSPFKRWVELWu-. 0 1 2 2 +15374 PF15515 MvaI_BcnI MvaI/BcnI restriction endonuclease family Eberhardt RY re3 PDB:2odh Domain This family of proteins includes the restriction endonucleases MvaI and BcnI. These enzymes both function as monomers. MvaI cleaves the sequence CC/WGG, where W is an A or a T nucleotide, leaving sticky ends. BcnI cleaves the sequence CC/SGG, where S is G or C, leaving sticky ends [1-2]. 25.00 25.00 25.90 25.00 24.10 23.10 hmmbuild -o /dev/null HMM SEED 220 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.66 0.70 -11.57 0.70 -4.33 21 77 2012-10-11 20:44:47 2012-10-04 10:53:55 1 2 72 13 23 80 38 220.60 24 68.37 NEW llschcplpptGalp.....pRsusTGlGcTLEcLLGIpcNN.ctPDat.slELKopR.....p..ssohlTLFopsP....ss.h+usppl.hp+YGY.pcttp........tpLasTlpssphNsh...u...........hh.Lc.......l.c..pspp..l....p.lhhp.c.............phhs.hhshp......tpKppchhaV.Acoc.......hpsspEpF+atcu..hhhss..h..s..p..hhpLlEpGhItlDlcIsph.s..t.s..t.s+D+GsuFRIppcch.shLF ..........................hltpl+pltphsal...........stttss.s.u.l..........GpTLEshLGIttNs.ptPDht.s.hElKut+.....t..ssohhTLFs.pP............ss...pps...s.tth.hppaGa.pppt..........tplapoh..ps..sp.....ss.s...........hhlp.......................hs...ppcp..l....hthp.s..............................th.sh.ph.....................tpKhtpshaV.Acsc.............hpsspchFp.as.....ch...hhh..ps.....h....s..p..FlphlcpGhlhhDhchppt.t..h..t.h+c+GstFRIp.ppsl.thLa......................................................................................................................................................................... 0 5 13 19 +15375 PF15516 BpuSI_N BpuSI N-terminal domain Eberhardt RY re3 PDB:3s1s Domain This is the N-terminal (nuclease) domain of the BpuSI restriction endonuclease [1]. 27.00 27.00 57.30 247.20 20.40 19.80 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -10.78 0.71 -4.54 4 4 2012-10-11 20:44:47 2012-10-04 14:08:44 1 2 4 0 1 4 0 159.80 43 18.68 NEW .sYsDDEVusFHPICcoALNpALpphGLDspYcVlH..HpslGol.sDFVlhpKsTcKalLhlEVKRp.utVpSTRY+hQApSYl.EAsp.plEpP....YYslTNLEVhshFKaDusRPsVspQllpPSPhcsGsFscshSEhaspLVcsFpslIshsVsDoGoa ..sYsDDEVusFHPICcoALNpALpphGLDspYcVlH..HpslGol.sDFVlhpKsTcKalLhlEVKRp.utVpSTRY+hQApSYl.EAsp.plEpP....YYslTNLEVhshFKaDusRPsVspQllpPSPhcsGsFscshSEhaspLVcsFpslIshsVsDoGoa. 0 1 1 1 +15376 PF15517 TBPIP_N TBP-interacting protein N-terminus Eberhardt RY re3 PDB:2czr Domain This is the N-terminal restriction endonuclease-like domain found in several archaeal TATA-binding protein (TBP)-interacting proteins [1]. 27.00 27.00 146.40 146.10 21.50 20.70 hmmbuild -o /dev/null HMM SEED 99 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.13 0.72 -10.44 0.72 -4.35 9 13 2012-10-11 20:44:47 2012-10-04 14:39:59 1 1 13 1 11 16 0 99.50 55 43.95 NEW YpELss+lKKVYu+VRhLDDYHWcIps-pIhGIHKKSsl+lcIclAcu+EcA-KLu.c.K.cssGIcIlVlPsKGTFYl+NGsFIhoh+aL+uTLtDIsDHI YpELss+lKKVYu+VRhLDDYHWpIp--pIlGIHKKSslRlcIclA-u+EcA-KLAc.c.cssGIcIlVlPsKGTFYl+NGAFIhohKaLRuTLtDIsDHI 0 1 1 6 +15377 PF15518 L_protein_N L protein N-terminus Eberhardt RY re3 PDB:2xi5 Domain This endonuclease domain is found at the N-terminus of many bunyavirus L proteins [1]. 25.00 25.00 25.30 25.20 24.20 24.60 hmmbuild -o /dev/null HMM SEED 183 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.30 0.71 -11.04 0.71 -4.90 11 334 2012-10-11 20:44:47 2012-10-04 15:37:54 1 2 46 8 0 196 0 165.00 60 28.09 NEW MDstchsQahsRIpss+DsplAKDIssDLLhsRHNYFGRELCpulsIEYRNDVPhhDIlL-hlPuhsshshcIPNlTPDNYllhsG+lhIIDYKVSVusESophThEKYsclhtclhspLslsaElsIIRhsPVspplHlsS-sFhptaPsIslslDFohFF-L+shLYcKFtDDEEFhhhVu .................................Mp...ht.ah.RI.tsppsp.AKDI.sDLLhsRHsYFG+ELC+SLNI..EYRNDVPhlDIlLDlhP.............slD..P..hsl-sPaITPDNYLalNsh......LYIIDYKVSVSNESShITh-KYhElhcDIts.LulshEIVIIRIDPlScsLHIsS-cFKchaPslslDIsFspFF-L+phLa-KFtDDEcFL.hls......................................... 0 0 0 0 +15378 PF15519 RBM39linker linker between RRM2 and RRM3 domains in RBM39 protein Godzik A adam Jackhmmer:Q14498, residues 339-411 Domain A conserved linker between the second and the third RRM domain in human RBM39 (CAPER) protein, also present in other RNA binding proteins, especially those involved in RNA splicing. This linker was implicated in interactions with ESR1 and ESR2. Preliminary results from JCSG suggest that this is a structured domain with a well defined fold. 22.60 22.60 22.70 22.60 21.70 22.20 hmmbuild -o /dev/null HMM SEED 73 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.67 0.72 -3.69 26 417 2012-10-05 14:20:47 2012-10-04 21:53:55 1 17 220 3 225 369 2 75.20 32 17.17 NEW LD..-sDsuGlshNshSRsALMpKLAR..o-p...ssss...sst.thht.t.th.......sh..s.........hASRCVlL+NMFDPuEEsspsW .......................................-.-hsGlshss.t.uRhtL.Mt+LAc.......sss....hphss...............ssp..psh.th...sssh.................................s..s................................lAopChhLpNMFsPp...pEppss.............................. 0 66 110 164 +15379 PF15520 Toxin_40 Putative toxin 40 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the type 2 secretion system [1]. 25.00 25.00 32.00 303.10 22.50 19.10 hmmbuild -o /dev/null HMM SEED 178 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.33 0.71 -11.09 0.71 -4.83 2 4 2012-10-05 01:29:56 2012-10-05 02:29:56 1 2 4 0 0 3 0 177.20 72 11.02 NEW RsPhoPhs.Thst.L.sA.AlAhcpA.Gphs.hphlG...GphAhDssspsshAhhs.GhthhputphlsNslPpphARVIPsth.hs.LGhPspSDVFlTAAcDItGLsshQIup+LTIPtSso.FplIcF.TP.sGlASPI.RssPGFlGtGRTLGGAREFsIPNt.IP.sAhhpIl RSPMSPLGSTFNQ.LDVAQAIAYDKANGTMTSAEVIGNQWGDIAFDAVTTAVGAGVGRGTGLYKGQTLVTNSIPQKVARVIPDGIKTSMLGAPNQSDVFVTAAKDIKGLNAMQIANKLTIPQSSSGFKVIEFRTPMNGLASPINRTNPGFVGKGRTLGGAREFTIPNQQIPKDAIIKIV 0 0 0 0 +15380 PF15521 Toxin_41 Putative toxin 41 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin contains two structural domains, an N-terminal alpha/beta domain and a C-terminal all-beta domain. The domain contains conserved GxR, RxxxoH GxE and GxxH motifs and a conserved histidine residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 48.30 46.40 16.60 15.80 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.03 0.70 -11.97 0.70 -5.24 12 19 2012-10-05 01:45:39 2012-10-05 02:45:39 1 2 16 0 7 21 0 254.30 26 28.37 NEW phhtpppcutplLppVhhlLptG.lphhssptp.............tschphhstsVApsLuHGGRVsIplPshsstpt.........................................shhlsphL.Glspssh................p.sstVhpRhhuTHchplspN................tpFKEptu.....................................hsslpstl...................tpschaGhNlulGGlGppD......................asGDllLPD.GuHGHhalsa+sPosc+sGuL.lGlETsuPuthsshGahHsh+uoEt..ossPhusht.hKsDKlGsh.........thstRhVDLsphst......tDWpthlcchtppatstlt...ttpstppptthhpEhl .............................................t.h.tpt.pthtlLpplhhlhp.G.h.hhptpt................hsphp.hp.slsphLuHGuRV.IplPshsstpt............................................l..hL.Ghp.tt...................tttV.pRhhuTHphphtps.................phcEptu.....................................hhsltstl....................t.chaGhNluhGGhGppc......................hsGcllhss.GtHGHhalhapsPssc+.GulhlGlETotP.......uthsshGhhHshpuopt..phsPtush...Kss+lGs..........phsshhVDLstht........chhthhcph.ptatttht....tp..ppth.....-............... 0 3 5 7 +15381 PF15522 Toxin_42 Putative toxin 42 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 163.70 163.70 303.20 302.10 25.20 16.80 hmmbuild -o /dev/null HMM SEED 205 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.37 0.71 -5.13 2 2 2012-10-05 01:46:05 2012-10-05 02:46:05 1 1 2 0 2 2 0 201.00 29 8.90 NEW PtlsAchpsL..AK.RltllGhtssssIa-pcch+hps..AF.++hR-hl-.........AlsusplssWppYspTs..Ss.......VGhHGQl.Gh.hG.hs.c.ssGpthG.......tsRtSHHhsQYLLlpYLpNh+us.p.hsH....+.hh.PshssoGspspuhou.sG+phD..Rl-.Gss.sDRGtshPAI.lAt+THQhGplHIsAt PtlsAchpsL..AK.RltllGhtssssIa-pcch+hps..AF.++hR-hl-.........AlsusplssWppYspTs..Ss.......VGhHGQl.Gh.hG.hs.c.ssGpthG.......tsRtSHHhsQYLLlpYLpNh+us.p.hsH....+.hh.PshssoGspspuhou.sG+phD..Rl-.Gss.sDRGtshPAI.lAt+THQhGplHIsAt 0 1 2 2 +15382 PF15523 Toxin_44 Putative toxin 44 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha helical fold and conserved [DNE]xxH motif and arginine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 26.90 26.90 29.20 28.00 25.70 24.50 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.28 0.72 -10.06 0.72 -4.10 26 28 2012-10-05 02:16:07 2012-10-05 03:16:07 1 16 27 0 6 33 0 83.80 26 8.40 NEW tssGsC..sstppscLpc-hs+h...Cp.pst..sCtss..sptplht+hcthppCtsuRpplspKCFsG.GDpGHppphspAapshspCpphl .....t..sGsC..sstppscLps-hsch...Cp.pst..sCtss..sptplht+hcthppCtsuRpplspKCFuG.GDpuHppphspAapshssCpph.. 0 2 4 5 +15383 PF15524 Toxin_45 Putative toxin 45 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly all-beta fold and a conserved ExD motif and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 7 or TcdB/TcaC-type secretion system [1]. 25.00 25.00 27.00 57.20 23.00 16.30 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.19 0.72 -10.28 0.72 -3.85 4 13 2012-10-05 02:17:50 2012-10-05 03:17:50 1 4 13 0 0 17 0 79.20 69 8.50 NEW WolTuRIphAcLPspG+IRYIPP+sYpsStsLP+GPsNGYlDKFGNEWTKGP...SRT+GQpFEWDVQLScpG+cQhGhho+DGp.HLNlSlDGpITH .WSITARIQYAKLPRQGRIRYIPPKNYSPSAPLPKGPNNGYlDKFGN.WTKGP...s.p+s.pFEWDVQLScpGhcphGhho+sGp.alNlu.DGpIoH....... 0 0 0 0 +15384 PF15525 DUF4652 Domain of unknown function (DUF4652) Godzik A adam JCSG target SP18005A Family This family of uncharacterised proteins from Clostridia and Bacilli classes has an unusual structure of three beta propeller repeats that do not form a barrel, as in well known 6-, 7- etc beta propeller barrels, but instead are stacked in a three-layer beta-sheet sandwich. The function of all the proteins from this family is unknown. 22.60 22.60 23.70 22.60 21.70 20.70 hmmbuild -o /dev/null HMM SEED 200 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.54 0.71 -11.29 0.71 -4.88 7 33 2012-10-05 17:30:43 2012-10-05 06:04:42 1 1 33 0 5 28 0 166.80 35 75.33 NEW NTspppspccs.ssstcpssscKpspcst...KspssssKphothpFlKKc..lscsocspFsTpWKsSpsspaSACIEGKGs-A.EEGlGKIaIKs.pspphathpIppsp.K.oPKYIEWhDDcNLhVlIupuaGTVSpGGsLYhlNlpTGpso.lhpspD+KpQllSApKpGDp.....lsLpl.VY-DDsh.cSHhEshTIssh ............................................................................................................EuIGclYlKppsssch.hhLpIsp............pp...........phoPK.alhWlDDcNLhVIIGpuaGTVSKGGNLYplNlpssphs.lYcsp-p.KppVlSscpst-p.....L.LplslY-sDsh.csH.EphsIt..t..................................... 0 2 5 5 +15385 PF15526 Toxin_46 Putative toxin 46 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family An RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, with two conserved lysine residues and and [DS]xDxxxH, RxG[ST] and RxxD motifs. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 4, type 5 or type 7 secretion system [1]. This is also referred to as the E. cloacae CdiAC and has been shown to target tRNAs. 25.00 25.00 32.80 32.50 20.50 19.30 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.82 0.72 -9.57 0.72 -3.82 12 100 2012-10-05 10:45:44 2012-10-05 11:45:44 1 7 98 0 10 56 0 71.80 61 27.41 NEW AppAAKcLGY.pcsp......hSHGphlFhppKs.....shYIo.DhDu......Hs.GGsWKtAso.+sL........spKpsR.GTYDtsLpRIGD ..........A.htAAKKLGY.pthKc.....sppGttlFKKspp.......YIShDhDu......Hp.GGAWKtASS.KNL........spKcTRsGTYDtNLp.RIGD.... 0 2 3 6 +15386 PF15527 Toxin_47 Putative toxin 47 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly beta fold and two conserved histidines, two aspartates and a glutamate residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 5 secretion system [1]. 25.00 25.00 218.70 218.70 24.20 19.10 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.66 0.71 -4.36 4 4 2012-10-05 10:46:14 2012-10-05 11:46:14 1 4 3 0 3 4 0 128.80 57 4.74 NEW ltPDYATlssGlLSuuuSulVNLYDGTpYlAGGVuQosPSuVSapPGlouTlGWIaGApsApusNSFLNGDGNQAFVSIPTPashNVlGAVTHAYGGuTAIElGlGpPGsloaGlsPWSHosPVsssuK ltPDYATlssGlLSuuuSulVNLYDGTpYlAGGVuQosPSuVSapPGlouTlGWIaGApsApusNSFLNGDGNQAFVSIPTPashNVlGAVTHAYGGuTAIElGlGpPGsloaGlsPWSHosPVsssuK. 1 0 0 3 +15387 PF15528 Toxin_48 Putative toxin 48 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved ND and DxxR motifs and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or TcdB/TcaC secretion system [1]. 26.20 26.20 26.80 47.80 23.00 25.50 hmmbuild -o /dev/null HMM SEED 189 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.21 0.71 -11.41 0.71 -4.41 9 16 2012-10-05 10:46:39 2012-10-05 11:46:39 1 5 13 0 6 13 1 172.10 31 18.04 NEW lshGhNhshsaps.G......sFphusGlGlts....sh.GaGcsu..httuathua.....t.sphuYs.sshh.ph.stQpoGslshth.scaplhhEND........hh.Gs.s+D+aRTuAlplsls.....phohGhtlaTGcstcps.hcs.s.shst...................shhshppp.hGshppG...ppY.phushalGhps.....hRlGhsu-p.lRphhQN.lhHc ........................................................shGhshthsaps.G......sathssGhslhs.....h.G.stps..hhhuhhhsa.....t.hphuah.th.h.ph.ttQpoGhltht..scaphhhEND........hh.Gs.spD+aRTuAlplsht.....phshGh.laTGct.p..ps..ct.p...st...................GhhstphphGhshphG.........cpY.RhushalGhss...........aRIGhDu-p.VRchlQNhhhHp. 0 3 6 6 +15388 PF15529 Toxin_49 Putative toxin 49 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved ND and DxxR motifs and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or TcdB/TcaC secretion system. Interestingly, the toxin is also found in type-II toxin-antitoxin systems [1]. 38.30 38.30 51.40 51.40 25.30 24.40 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.58 0.72 -10.16 0.72 -4.05 15 25 2012-10-05 10:47:04 2012-10-05 11:47:04 1 8 24 0 6 28 1 88.70 40 7.83 NEW tpuRstssushPputGpsc.sl.hc.cs..G..plspYsTYsscGh......l.KcaRss..G+sHGslssPpVhE.tppNpsPcushhh..sptc..sRtspPcElP ....h.puRs+ssGP.PcApGssH.Th.a+t+s..G..+lTpYuTYspsGs.....hhKQaRGp..GKPHGsVPRPNVKp.sphNpsPtsppth..PKp+..VRpPh.cEhP... 0 1 5 6 +15389 PF15530 Toxin_50 Putative toxin 50 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a mostly all-beta fold and conserved FGPY motif and a histidine residue. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 5 secretion system [1]. 25.00 25.00 143.30 142.70 22.30 20.60 hmmbuild -o /dev/null HMM SEED 168 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.20 0.71 -10.81 0.71 -4.34 5 24 2012-10-05 10:48:05 2012-10-05 11:48:05 1 9 20 0 2 21 0 155.40 71 9.16 NEW sPPRVQQDsSLAhDhuQcGhsptEIscAlDtSHlGPSWGTEYKV+ssVKupVuuGhusGYpl-GoIDsc+lSVNsG-ThulGA+AGASIGLoFGPYFPGllsss-RDYShulGhGVhSVulSsGKDGlGFSFGVGPSWGaSuopo.stscpVDlNGooToElY+YDFK SWNQYAQDNNLTPEQVQEGMNRIAIGE.......GPSWGTTYKVHPVVQAGGDVSFIRGYTLSGTIDDNHISVNQGDIYSIGAHGGASIGLSFGPYFPGLINSNDNDYSINGGFGVGAVGLSTGKDGVSFTFGFGPSWGWSATEI....KGVDVNGTSTSEVYRYDFK..... 0 0 0 2 +15390 PF15531 Toxin_51 Putative toxin 51 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and conserved aspartate and glutamate residues, and an RxW motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system [1]. 25.00 25.00 27.70 27.70 22.60 21.90 hmmbuild -o /dev/null HMM SEED 128 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.93 0.71 -10.63 0.71 -4.34 6 8 2012-10-05 10:48:58 2012-10-05 11:48:58 1 5 6 0 5 10 0 125.20 25 7.22 NEW hGlNspu-sL...sus.Gu..cTasspsYuts.stuhsls-...tPlWpssVcsAltNPsV+IuloLDGh.su..........pshsEAhpsshp+Gcuht.scWc...ttGhGTAWEMscltpAlRhts..............RsW-SIcWYhssccs ...............hGhNshs-tL...sus.Gt..coasshsYups.shshsssp..stshWhssVcsAltNPsV+lpVsLDGhssu..........psh.-AFhsshppGtsh..tsa....ttGtGTsWEMuplupAlRht-..............RsW-SIcaYhsscp...... 0 2 2 5 +15391 PF15532 Toxin_53 Putative toxin 53 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and two conserved histidines present in an RxH and THIP motif. The domain additionally has a highly conserved arginine residue. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6 or type 7 secretion system [1]. 25.00 25.00 49.00 46.50 24.30 23.10 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.53 0.72 -10.35 0.72 -3.81 11 20 2012-10-05 10:50:16 2012-10-05 11:50:16 1 6 20 0 5 21 1 103.50 39 15.15 NEW scPhshlscssppps..h......psss..pshchKWt......ss..shsaclRlHssDsssstGsNuusG.IaRlp.............hGhcY.hDssGpaa+putlp......hp...sPpasppAuNcTHIPl. ..................................u.sPtchls.sh+ppG..Lp....scshP...thKtKas......su..shpYcVRsHsssPoAPsGSNuusGsIYRIut........tpQGhGhEY.hssDGsWaHpSsL+......spSPsYNssAANDTHIPls. 0 5 5 5 +15392 PF15533 Toxin_54 Putative toxin 54 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and [DN]xHxxK and DxxxD motifs. It is usually exported by the Type 2 secretory system [1]. 26.30 26.30 26.30 30.30 26.10 23.90 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.51 0.72 -9.20 0.72 -3.72 8 17 2012-10-05 10:51:18 2012-10-05 11:51:18 1 2 17 0 3 16 0 62.90 47 25.94 NEW NstQDKhLohuDIc+LKcuGhDhH-LK....Gt+.NuS+hDLYKD+c.GNIYlKPKGGpGtG-PTGlNIN ...hptsK.lshs-.chLKcsGlDhH-LK....Gtp.ssSKhDlYhDcp.Gplalh.KGGpGts.sTs..Ip.. 0 2 2 3 +15393 PF15534 Toxin_56 Putative toxin 56 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains a conserved histidine residue and a KH motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2 secretion system [1]. 25.00 25.00 29.90 29.70 24.40 24.40 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.05 0.72 -9.55 0.72 -3.49 14 39 2012-10-05 10:52:08 2012-10-05 11:52:08 1 4 37 0 3 32 0 76.20 35 26.79 NEW hssNKh.pHIhssKHsWsplhct...sapplpslhppsh+pGppsshppushpc.....shphsGpslsVThth.hcGphcI.SsuWVp ...psNKh.pHIht....sKHsWsplhKt...sappVpslhp+sM+pGpposYppS...uhp+.....shphsscsVslThsh.pcGpl+I.SsuWVp.... 0 1 2 3 +15394 PF15535 Toxin_57 Putative toxin 57 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and a conserved glutamate residue, and [KR] and Hx[DH] motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system [1]. 25.00 25.00 32.00 32.00 24.70 23.10 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.33 0.72 -9.81 0.72 -3.93 15 22 2012-10-05 10:52:50 2012-10-05 11:52:50 1 5 21 0 8 21 1 79.00 29 16.33 NEW ucssus.sspEhht....hssccGhp.ass.sPs...tsG+.uuWhcscsGcsahPssptssst....ptGPHWDVpcPpGt.....apsshPpsh .................ucs.ss.sspEhh.....hssccG.p....ats.sPs...hsG+.suWhssc.Gpsah.shttsstt....ptGPHWDVpcsput.....acsshPtt..... 0 3 7 7 +15395 PF15536 Toxin_58 Putative toxin 58 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-beta fold and conserved aspartate, arginine, histidine and cysteine residues that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 32.60 160.10 24.70 18.40 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.72 0.71 -4.30 3 5 2012-10-05 10:53:14 2012-10-05 11:53:14 1 2 2 0 4 5 0 128.40 56 13.54 NEW RAGIuVPsAthRYERQEPsAcLQtRARIlASPDVRVAV.PGTRITYsLs+su-.LHApuu.YpYQWYhLNDP+TopTaG+P..ARV-Gu-GP+u-FRAGFVGNHKIICK.Vh..AuGDuQAPVFYEaPQsVVSEGK RAGIAVPDAALRYERQDPAAELQGRARIVASPDVRVAV.PGTcIoYALAHGoQuLHASSSPYRYQWYhLRDP+ToRsHGEP..ARIDGPDAP+A-FRAcFVGNHKVICK.VTPRAGGcAGVPAFYEFPQTVVPEGK. 0 4 4 4 +15396 PF15537 Toxin_59 Putative toxin 59 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold with two conserved histidine residues. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2 or TcdB/TcaC-type secretion system [1]. A of this, the Pseudomonas RhsT-C has been experimentally characterized. 25.00 25.00 45.80 72.00 20.00 17.50 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.26 0.71 -10.61 0.71 -4.04 4 7 2012-10-05 10:54:46 2012-10-05 11:54:46 1 6 7 0 1 9 0 117.90 37 7.14 NEW GohhutsshpcGulssppush+.GSGPssGhlEVS-uhpSoKul+Nats...uGs.-FVFDPppppFshGsst......hpsHptlA+uhG...As-usllGGRh.R.psGplhTsEaSGHYGcpW..osEhRpQ .......t.ht.hhhhpGuls..sp.shhp.GSGPssGhltVS-pspSstAl+NatP...pGs.-FVFDPppspFssGssp......htsHptLAculG...AscusllGGRhhRsssGpl.TsEaSGHYGppW..ssphRp............ 0 1 1 1 +15397 PF15538 Toxin_61 Putative toxin 61 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin domain found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold with a conserved glutamine residue and a [KR]STxxPxxDxx[ST] motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system [1]. 25.00 25.00 27.70 27.10 22.80 19.70 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.22 0.71 -11.07 0.71 -4.23 8 33 2012-10-05 10:57:02 2012-10-05 11:57:02 1 3 32 0 5 32 0 150.90 47 35.76 NEW RRsYLNcKFGRTGDLs+DINIRGN+EsAssFaKSpGhspsc.hEsYMsGIDFocPVpVETIN+GKpLaQaQsPGuhQ.GsWYSLossVtPTcLGINPpGplauT-hlVPKVhssYQSpcKVplLRSTAAPsLDTWSV.cpPYpAcGGApQhhSspK-lF ..................................RRtYLNpKFGRoGsls+DIshRuN+EhAscFF+Sc..sls.tc.hcsYMpGlDFs+PVpVETlssGKpLWQaQsPG..u..hQ.GNWYoLoPsVpPT-LGINPhGphhtssh....hhsKVLNsYpoTpKVpVLRSTAAPslDpWS....V..tpsYsAcGGApQlhSsp+t.F................. 0 3 3 4 +15398 PF15539 CAF1-p150_C2 CAF1 complex subunit p150, region binding to CAF1-p60 at C-term Coggill P pcc manual Domain CAF1-p150_C2 is part of the binding region of the CAF1 complex p150 subunit to the p60 subunit. The CAF1 complex is essential in human cells for the de novo deposition of histones H3 and H4 at the DNA replication fork [1,2]. 27.00 21.60 28.10 21.60 26.30 18.60 hmmbuild -o /dev/null HMM SEED 292 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -12.01 0.70 -5.29 4 69 2012-10-05 10:57:55 2012-10-05 11:57:55 1 8 48 0 39 64 0 238.00 41 28.17 NEW AKEWDEFLAKGKRFRVLQPVKIGCVWAAD.RDCAGDDLKVLQQFAACFLETLPAQEEQTPKASKRERRDEQILAQLLPLLHGNVNGSKVIIREFQEHCRRGLLSNHTGSPRSPSTTYLHTPTPSEDAAIPSKSRLKRLISENSVYEKRPDFRMCWYVHPQVLQSFQQEHLPVPCQWSYVTSVPSAPKEDSGSVPSTGPSQGTPlSLKRKSAGSMCITQFMKKRRHDGQIGAEDMDGFQADTEEEEEEEGDCMIVDVPDAsEVQAPCGAASGAGGGVGVDTGKATLTASPLGAS ....................................................................AKEWD.EhhuKGK+h+VLQPVhlGClW..tut...ts.s.....ss-LclLQpFs.ACh......L-s..s.......spE.-.p......s.P.c.s...o.p+p....p.+..DpQlLsQLLPLLHGNVNuSKhIIpEFQEh.C...Rp.....Gh..h..u..p.t..ss.......S..s................Pup.......oh...p..oPt.sps.sslPSKu+LKR...lISENSV..Y.E.KRPca..R.h.CWYVHsp.VLc..pF..pQ-cLP.VPCQWsYlTp........s........Ps.ss...+.-.D.sG.s.....s..ss.ss.s.p.usP....h....ShKRKssuSMsITpFMK+.tt.t.h.....-.DGFQuD............-Chhht......................................................................................... 0 8 10 20 +15399 PF15540 Toxin_62 Putative toxin 62 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains a two conserved aspartates, a glutamate, a histidine and an arginine residue and an RT motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6 or type 7 secretion system [1]. 26.60 26.60 27.20 84.40 26.00 24.40 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.66 0.71 -10.42 0.71 -3.97 6 18 2012-10-05 10:58:54 2012-10-05 11:58:54 1 6 18 0 5 18 0 113.50 57 17.75 NEW stspaphNsspDlDhRGpGpoY+-....ALDEAF+RTGlPKcpFsVTK...WuKDcsGKShPVEa...pGPN.GApVNlDlP+hs.....sshssGPcpPHlGYQTsGK.uGuutsRGHIhlDslPssR .N.LYpYAPNPIRWIDPLGLAILEHQ....SNFDAARRTGFENAGMTNPEDVTFSKVDPKTGTVVEF...KGPN.GAKVAYDAPHAD.....MDVTAGHDKPHVGWQSAGKRGSGGANRGNITYDGPQHPH........ 0 1 1 4 +15400 PF15541 Toxin_63 Putative toxin 63 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 140.00 139.00 23.40 18.60 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.51 0.72 -10.37 0.72 -4.07 4 4 2012-10-05 10:59:14 2012-10-05 11:59:14 1 2 4 0 2 4 0 97.80 47 14.68 NEW LKGWERAH..GsGhGsEAKtGIhYAP.tVNQElQNRGhEKaIRELa.lspssslKlFhpscshAaP..........GpLhLcolpYcV.tEttstRRIl..F-ASl LKGWERAH..GsGhGsEAKtGIhYAP.tVNQElQNRGhEKaIRELa.lspssslKlFhpscshAaP..........GpLhLcolpYcV.tEttstRRIl..F-ASl 0 1 2 2 +15401 PF15542 Toxin_64 Putative toxin 64 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems that is proposed to adopt the BECR (Barnase-EndoU-ColicinE5/D-RelE) fold, and contains two conserved histidine, a serine, two lysine, and a threonine residue and a HxVP motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6, type 7, and MuF-type secretion system [1]. 26.90 26.90 26.90 28.10 24.00 26.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.48 0.72 -3.45 34 374 2012-10-05 11:00:33 2012-10-05 12:00:33 1 12 254 0 20 174 0 87.00 40 46.17 NEW psp+Qs+HhhGpp.Ycpph....hsstth.Sahh.lsscclpcLlpphuusGpllhs.ptG.appKElIDF.sphIGcsa....hsGca.l.cTshGplHYSK.sG.sHIVPth ......................tQt+H..spp.Y.ppK.....sst.hh....SYhh..ls.pphpclhcc.hhsGsllhs..ccs.FchKplIsa..pplIGKsa.....hsspY.l.ETphGKlHYSK.TG.sHIVPal.. 0 6 9 18 +15402 PF15543 Toxin_65 Putative toxin 65 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 206.90 206.10 21.90 21.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.75 0.71 -4.52 2 2 2012-10-05 11:01:05 2012-10-05 12:01:05 1 2 2 0 2 2 0 130.50 37 12.91 NEW MRRR.ht.hASs.-p.LsFLLstcGpa.......RStRsh+t+.hhpcPD..IVQhGHhhSs....thucc-alMLpsu..N.hsshT....chpGuVhspsAV.ItGhsVDl.TAphWEshGhL.tGTVusuPhVp.P MRRR.ht.hASs.-p.LsFLLstcGpa.......RStRsh+t+.hhpcPD..IVQhGHhhSs....thucc-alMLpsu..N.hsshT....chpGuVhspsAV.ItGhsVDl.TAphWEshGhL.tGTVusuPhVp.P 0 0 1 2 +15403 PF15544 Toxin_66 Putative toxin 66 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold that is usually exported by the Photorhabdus virulence cassette (PVC)-type export system [1]. 25.00 25.00 486.60 485.90 17.50 16.00 hmmbuild -o /dev/null HMM SEED 273 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.01 0.70 -11.59 0.70 -5.13 2 2 2012-10-05 11:01:34 2012-10-05 12:01:34 1 2 2 0 1 2 0 269.50 35 70.37 NEW hsTulsPph............ulhupspstsppspuhGQslhA.sEl+LhDERshhssLsGR..NLGu-GTYhINPpISATLGts.uch.usPplFhG-polGuHV+LMVpNPQT.pch.sGGapth+FoApYpLKNucoWpTPNMshNhtstNpRtLcGSlsAsusL.Itt.lsVpusApIGss.ooEpshcAsl-EFpLGGosoYasHhHclphhlsSuG.Asup.h-SsGcG.pTscsVaspphstucph-spcGS+sh.p.tphP.RVpMPL.+ hsTulsPph............ulhupspstsppspuhGQslhA.sEl+LhDERshhssLsGR..NLGu-GTYhINPpISATLGts.uch.usPplFhG-polGuHV+LMVpNPQT.pch.sGGapth+FoApYpLKNucoWpTPNMshNhtstNpRtLcGSlsAsusL.Itt.lsVpusApIGss.ooEpshcAsl-EFpLGGosoYasHhHclphhlsSuG.Asup.h-SsGcG.pTscsVaspphstucph-spcGS+sh.p.tphP.RVpMPL.+ 0 1 1 1 +15404 PF15545 Toxin_67 Putative toxin 67 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an alpha+beta fold and HxR and HxxxH motifs that is usually exported by the type 2 and type 6 secretion system [1]. 25.00 25.00 44.10 43.20 24.30 23.80 hmmbuild -o /dev/null HMM SEED 70 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.86 0.72 -9.36 0.72 -4.10 7 8 2012-10-05 11:02:03 2012-10-05 12:02:03 1 4 8 0 4 8 0 69.90 47 10.05 NEW LAsD.KQsphlRGWl+pElp+lcp....sp..........+pslRlP........PGh-LAHhRGhEupKGYuYpYosLQstcLHKhQH+hD LAsD.KQsphlRGWl+pElp+lcp....sp..........+pslRlP........PGh-LAHtRGaEupKGYuYpYSsLQstDLHKhQH+hD. 0 2 3 3 +15405 PF15546 DUF4653 Domain of unknown function (DUF4653) Eberhardt RY, Coggill P re3 Jackhmmer:Q8TAB5 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 93 and 229 amino acids in length. 27.00 27.00 90.20 81.70 23.40 22.70 hmmbuild -o /dev/null HMM SEED 240 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.63 0.70 -11.78 0.70 -5.08 6 45 2012-10-05 12:37:08 2012-10-05 13:37:08 1 2 32 0 24 42 0 196.70 64 97.68 NEW MF.AlQPGhs......ctupaLGu.PsuVsp.EhpPDSNSNFVppuhDANENWpth.utl-shh.+SpSE............ScNpshtu.t...........P.......EutVRSPPEGAEIs...GscPEKsscsssss...SPLEDNGYASSSLSlDS.sSSP-susuosp......uP.u.P................sDsLhPoVupAhQQLQApERYKEQEKEKHHVHLVMYRRLALLQWIRuLQppLsDQQsRLQESFDTILDNRKELIRsLQpttsPotsQ-pu ..............................................................................................................................................MF.hhpPshs......ctup.lGs.Psulsp.EhpPDsN.NFhtp.shDsNENWpth.uts-shh.pSpsE............psNpshts.t..................sEttVR..SPPEGAEls...GscP..Ec.sssussp...SPLEDN..GYASSSLSIDS.uSSP-suCuTPp.......sP.u.P.........................sDsLhPoVApAVQpLQspERYKEQEKEKHHVHLVMYRRLALLQWIRGLQppLlDQQsRLQESFDTILDNRKELIRCLQQttsPstspptu........... 0 1 3 9 +15406 PF15547 DUF4654 Domain of unknown function (DUF4654) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NEQ6 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 145 and 169 amino acids in length. There is a conserved IDC sequence motif. 27.00 27.00 28.00 71.60 21.10 18.90 hmmbuild -o /dev/null HMM SEED 138 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.43 0.71 -10.87 0.71 -3.85 12 24 2012-10-05 12:39:56 2012-10-05 13:39:56 1 1 21 0 14 21 0 132.00 62 78.91 NEW AppQKAlPsAHLTFlIDCupGKQLSLAAPssPPpuP....pPp.GPVsPPMKTYIlFCGcsp.P.hTQcssLGtssLApA+sTLPPpRGhsAPsS.PsSshsPQEsPcAKGsPlKssss.RSSsWGTVKGSLKALSSCVCGQA- ..At+QKAlPsAHLTFVIDCspGKQLSLAAssuPPpAP....uPspGsVTPPMKTYIlFCG-N..PpLTp.sPhGGGsLAQARuTLPPCRGssAsAS.PlSPhsPQEsPEAKGpPlKssPs.RSSsWGTVKsSLKALSSCVCGQAD.. 0 1 1 1 +15407 PF15548 DUF4655 Domain of unknown function (DUF4655) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NEP4 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 533 and 570 amino acids in length. 27.00 27.00 439.50 198.90 24.50 21.00 hmmbuild -o /dev/null HMM SEED 532 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.90 0.70 -13.08 0.70 -5.60 13 28 2012-10-05 12:43:12 2012-10-05 13:43:12 1 3 21 0 15 27 0 485.20 57 91.37 NEW lKTNKsGSKVAVSAp+GuEV.ossoPQRGpGY.lhASSQRSAAlSlSPsspR..RSEAApsTo.+SsSDaPRosSPQoGPulSussoP.RGTE......TRoRsEs.............RpsSPpRKsQQsQ..TssSHsluh....pRNVSPsREEuTRRGuEsKsGR.ElopRsSlssDAKSSRRLSFlDpKDNlp.......s.l..E-DPPSKVQsPQGVRVPRRh.laPKDEAVQT......EPIR+.hTsuElRSP+sPsoPE+uSSRhssD.RsspR+lPuQEuEhGPpSSIhoEPKAL+RN..hsLESSLKLSVLKDLDuGHRV.ShRs-PESh+KpSVYsETKPSsKVLIuS-VEosh+SShRuDsEluRRVTISsGsQSlpsspRsTuRssSESP.+KSolasoP-..aKt....p..+Pscsshh.stss.RYs......Es.SpKPSlHA..ELELTPRPLPPRSLPRYGPDSSWWALLNPElEhP.QSpPTT.........PD.....FEPK...SPP.....PLDPhlShaEMDSSPFCEDLhFQREKA........ossP....PssPKESPSpA.PLREVPQ...uP..........KaTsKQPlQRFSAFFLDVSEEMYNRVIWWL ..VKTNKsGuKVAVSApRGuEV.TsToPQRGpGY.llASSpRSAAlSlsPS.pR..RSEAAHsTo.+SuSDYPRSsSsQsGPGh.ussoP.RGoE......TtsRoEu............uRHsSPHpKoQQ.............oQ..ThASHssoh....pRNVSPsREEusRRGGESKsGR.-lup+sS.hPDAKSoRRLSF.DQKsNLp.......o.l.hE-DPPSKVQNPQGVRVPRRhLsaPKDEAVQT......EPIp+hhTsuElRSP+sPo.PEHuSSplusD.pTAQRRlPspEuEsuPhuSI.oEPKALaRN..hNL-SSLKLSVLKD.DGsHRV.Ss+s-PESl++aSVYsETKPSsKVLlSSpVESNVRosIRGsoEVGRRVTISPusQSlpssp+VTupuVSEus.+KSShaVTPEshYKQ....pTp+PscsshMS.GPs.RYP......E..SpKPSlHA..ELELTPRPLPPRSLPRYGPDSSWWsLLNPElEhP.QSpPTT.........PD.....FEPK...ssP.....sLDshLShachDSSPFCEDLhFQREKAS.SP............PssPKEsPShA.PLpEVPQ...sP..........KaTsKQPIQRFSAFFLDVSEEMYNRVIWWL.. 0 1 1 2 +15408 PF15549 PGC7_Stella DPPA3; PGC7/Stella/Dppa3 domain Iyer LM, Aravind L, Eberhardt RY, Coggill P re3 Manual Family The domain belongs to a fast evolving family known only from the placental mammals [1-3]. The PGC7/Stella/Dppa3 protein protects imprinted regions from demethylation post-fertilization [3]. This suggests that it might bind methylated DNA sequences directly [4]. The conserved core includes a postively charged helical segment and a C-terminal CXCXXC motif that is predicted to chelate a metal ion [4]. Most placental mammals contain 3-6 paralogs of this domain family. The CXCXXC motif is also conserved in a subset of fungal MBD4-like proteins [4]. 32.30 32.30 32.30 36.00 22.70 27.80 hmmbuild -o /dev/null HMM SEED 161 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.13 0.71 -11.22 0.71 -4.25 11 48 2012-10-08 22:59:17 2012-10-05 13:55:24 1 1 21 0 18 71 0 155.40 37 88.99 NEW MD.P.QKhNPshss-S.....S.htstEsSp-ssus..SQPshSEpLhhsLusLosuPGsc.s.sPLPEuLLpQ+YR--..KThpERphERlt....asQRK+phLtplRR++hcHhAPY.....plcc....cu...+lssu.sD..RcQNtFRCcC+YCQSpR.NhsG.Pu-pp......SW-sLspGLouLslSlGTNpss .....................MD.P.QKhNPs..scS.....S.MsstEsSp-ssusSQP.hSEpLhhsLSsLo...sPusp.s.sP..LsEuLLppphRpc.....cohpE+hhE+lt....F.pR+hthLtpVcRc+hs+Mtsa......tlt+...........ct...+hs.h...s-..+spp...FRCpCpYCpupt.........................W-......su.shplGsspss................... 0 2 2 3 +15409 PF15550 Draxin Draxin Eberhardt RY, Coggill P re3 Jackhmmer:Q8NBI3 Family This family of proteins inhibit Wnt signaling and act as chemorepulsive axon guidance molecules [1-2]. 27.00 27.00 33.60 33.60 25.80 25.70 hmmbuild -o /dev/null HMM SEED 324 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.13 0.70 -12.44 0.70 -4.77 10 51 2012-10-05 13:15:45 2012-10-05 14:15:45 1 2 36 0 31 52 0 272.10 61 88.17 NEW tpshssNshppppPththspusHp...........+RpGhuKKcRusGhhu..t.thssscsss-uoslpuLssVtLEt.tP...uuLhp..cK-sahGhchP.stR-Np.sG.pc..+s+K+uR-H++..........sRRD+lRpp+Gc...stP..SSlh...Kcscs.EDth.ss..............s-ssosluPslh.hoshtss.shoEcsssLPssSs+PQh..u.Rs+ssGDVMPTLDMALFDWTDYEDhKP.-hWPSuKK..KEK+RSKshSsGNpTosA-s.EPCDHHLDCLPGSCCDLRcHlCKPHNRGLNNKCaDDCMCpEGLRCYAKFHRpRRVTRRKGRCV-PEolNuDQGoFIsV ......sps.PEN+lsh.tPtLapspuuHH...........+R+G.uKKcpusGhsu..t.ptssspss+puotLstht..h.tp.uP...AuLhQ..cK-lhLGhthPasE+ENpsPG..E...+s+K+uREH+R...........RRDRL+hHRG+shstuP..SSLh...KKsEhsEsth.su............shpEuSTSLAPThhaLToh-su.suTEEs.lLPsTShpPQt....psRsDGEVMPTLDMALFDWTDYEDLKP.-sWPSAKK..KEK++uK..sSsGNETSsAEG.EPCDHH.DCLPGoCCDLREHlCpPHNRGLNNKCFDDCMChEGLRCYAKFHRNR.RVTRRKGRCVEPEoANGDQGSFINV.............. 0 2 5 12 +15410 PF15551 DUF4656 Domain of unknown function (DUF4656) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NAX2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 286 and 398 amino acids in length. 27.00 27.00 37.50 37.50 18.40 18.30 hmmbuild -o /dev/null HMM SEED 370 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.15 0.70 -12.31 0.70 -5.27 3 51 2012-10-05 13:20:37 2012-10-05 14:20:37 1 2 35 0 29 42 0 317.30 55 90.92 NEW sh+cuuQPPssRpTRRP.sPKDPGt+tPESITFISGSAEsPPEsPACC.LsRPWVWshCKAVFCFR+CRuClQRCGuC.VRuC-sCLuusssPEuhuEtuWuKEHNGsPP..SPshAPPSRRcGpRLKsTs..GSSFSYPDVKLKGIPVYRYRsuhuPusDsDSCsKEPsAEsPPhRaSLPuTLuuS...PRSSE.EYYSFHESDLDLPELS..SGSMSSREIDVLIFKKLTELFSlHQIDELAKCTSDTVFLEKTuKISDLISSITQDYHLDEQDAECRLVRGIIRISTRKSRoRPu..TsEGRpoRsutsuGsAPDSGsDTMluSVL.SQDDLsVQISQETTSDAIAR+LRPYGTPGYPsSHDSSFQGTETDSSGAPLLQVYC .......................................................cp.sQPPssR+TRRs..DPKDPGt+GPESITFISGSAE.sss.Es.PsCC..hpshsWshC+AsFC......Cl.pCGsC.h+uCssCLuss..-ss..Ests-tshsc-H..NGh.Ps..SPspusPsc+c..upc...hKp.sh....GSSFSYPDVKLKGIPVY.Y..s.u.h.DsDSCsKEPlscssPhRpSlss.ThssS...PRuSE.EYYSFHESD.LDlPEhu..SGSMSSREIDVLIFKKLTELFSlHQIDELAKCTSDTVFLEKTuKISDLIsSITQDYpLDEQDAEsRLVRGIIRISTRKSRsRPp.....sspG.ppp+.tutssusAPDSGpETMlsSsL.SQD-..LsVQISpETTADslARphR..sau....ssGhPhu+DS.SaQsT-TD.SSGAPLLpVYC......................................................................... 0 2 5 12 +15411 PF15552 DUF4657 Domain of unknown function (DUF4657) Eberhardt RY, Coggill P re3 Jackhmmer:Q8NAV2 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 305 and 370 amino acids in length. 27.00 27.00 27.60 27.00 19.10 18.10 hmmbuild -o /dev/null HMM SEED 301 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -11.99 0.70 -5.13 14 41 2012-10-05 13:34:09 2012-10-05 14:34:09 1 1 27 0 22 38 0 262.30 55 77.22 NEW MuVGDSsLAs.PGLSQDSLshEPsGSP........EPPApluRLLAS+KLEQVLERS+pLPouPASLStpppshp..uKPcsEhslhuAGcQEuTcAEo-LEAGLEEuEVV......tGhtP-AWACLPGQGLRYLEHLCLVLEQMARLQQLaLQLQoQRP.tDPEEEEp......slAPuPsPS+sPGspVpGshEhLSQTccT.GuKsAuPsKVGsPSssPPRLsEAPsEPAHohPSSQGHKpDLSHWsKVKVLLNRIRWRSP+pPEPsAPPs..Gs.uPRhESR-.LPE+P.spshRKTFMPSLVVKKQR.AKNLSV ..........................................................................MsVGDSsLAs.PGLSQDSLshEssGSs........EPPAplsRhLASpKLtQVLERS+pLPssPsolStpppshp..sKPcpEhPlhuAGpQEuhcA-o-LEAGLEE.EsV......uuLsPtAWACLPGQGLRYLEHLCLVLEQMARLQQLYLQLphQRPPsDPtEEEp.........shAP.PsP.+sPGstspsPhELLSQTcp......T.GAcuAuPPKVtlPoANPPRLsEsPsEPsaphPSSQGHKpDhSHWsKVKVLLNRIphRS.+tPEPsAsPD..Gs.sPRh-S+c.LPE+P.spshRKTFMPSLVVKKQRAKshs..................... 0 2 3 5 +15412 PF15553 TEX19 Testis-expressed protein 19 Eberhardt RY, Coggill P re3 Jackhmmer:Q8NA77 Family This family of proteins is expressed in testis [1]. 27.00 27.00 61.40 41.80 25.80 25.70 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.44 0.71 -11.24 0.71 -4.37 12 30 2012-10-05 13:48:42 2012-10-05 14:48:42 1 2 17 0 16 24 0 142.80 58 56.18 NEW MCPPVShRastEGMSYLasSWhYpLQHGsQLplCFuCFKuAFL-lK-hLE.EDW.....E-E-WDsE.ME.hoEAtsEQtussG..t.uW.GpG.hp.spusS.shGsGsLssus.t.p-..hs.phVPTELtPQ-AVPLuLGsEDADWTQuLPWRhsslPsCoHWPo ..........MCPPVShRattEGMSYLYuSWhYQLQHGsQLplCFsCFKAAFL-hKchLE.EDW..........E--sWDsE.ME.hoEAtu...........EQtu..ssG....t.uW.Gpu.GpsspusS.uhGsGsLssus.s.E-h.LssphVPTELtPQ-AVPLuLGsEDADWTQuLPWRF-GLssCSHWPo... 0 1 1 2 +15413 PF15554 FSIP1 FSIP1 family Eberhardt RY, Coggill P re3 Jackhmmer:Q8NA03 Family \N 27.00 27.00 30.80 28.00 24.90 23.10 hmmbuild -o /dev/null HMM SEED 425 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.52 0.70 -12.61 0.70 -5.66 19 68 2012-10-05 14:32:29 2012-10-05 15:32:29 1 4 46 0 43 69 0 301.10 34 73.22 NEW DIIKGsLDuIS+PASs....SRs+PuS.RsSsu..............SLEVLoPEP...s.........htl-o.s.p.susc-................spopssssEccR.........spts.p....tt............p..phsccupD-chshsppph.sppoc-.c...........c...phDspLpcAIcKM++LD+ILs+K....h+EKEVK+QGlEh+hKLWEE.Lp.php....ppshpSsEEhcNT+KFLuLTsssppss.....ph..p.pssahsVFtTQ.....lPs-c...hEpphpphtp.chTtssppscShh+sc+p.hspppp.ch....................K+sQDFIKRNIELAKsuss.lsMhDcEK+RLtELLcDlD-tsushssspu.Dp.sshLlPGcGYT.ss.spppQLAEIDoKLQp.lssss.th...oShs.sthpspssQ....cs.hppp.psschsPGE+lLRsTKEpRD.QppRL+EI-pcLcchcEs ..............................................h.cGs.c.h.p.tts........stp.R..................slEhLss-s................hp....t.p.tpp...................tpsppp.tEppp..................s..........................hscptpcpp.sh.ptph.sttscp.c..............tphDsplpcAIpKMc+LDpILsc+....h+EKElK+psh-h+h+LWc-.lp.t......ttt.t.pcp.tNTppFLuLss..tt...........p.ptsh.slF.TQ............ls.pp....ctt.pt....s.ohcsptscuhhcst+p.hppppt.p.t..................t++spDFIKRNI...........E...........LAcputp.l.sMh-cEKcRL.ELL+Dl--tsss.sstpu........s..sshlssspGYs.ss.sppppLAEI-.KLQp..hssts.h.....s.s.st.tpp.sp.............................t.....p.tt.p.tsG-plL+ssKcpR-..ppRL+cI-ppLp.hpc......................................... 0 11 17 27 +15414 PF15555 DUF4658 Domain of unknown function (DUF4658) Eberhardt RY, Coggill P re3 Jackhmmer:Q8N912 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 129 and 161 amino acids in length. 27.00 27.00 88.90 88.90 22.20 20.00 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.74 0.71 -10.60 0.71 -4.27 9 27 2012-10-05 14:35:39 2012-10-05 15:35:39 1 1 19 0 12 30 0 122.60 61 69.82 NEW hsRAtREssR+CPsSILRRS.sE+ps.GsEPQRToRRVRFREP.EVAVHYIAsRssT..sTspu....PuRPRP+tGSLLLRLoVClLLllALGLYCGRAKPlAhALEDLRApLLsLlLRLpHsALoCW+sLLpL ......sRApREssRKCPPSIL+RSpPE++pstAEPQRTSRRVRFREP.tVsVHYIAs+ssT..uTl+s..........PuRP.R...P+G..........GSLLLpLslClLLslALGLYCGRAKPVAtALEDLRApLLsLlL+LhHsALoCW+sLLpL..... 0 1 1 1 +15415 PF15556 Zwint ZW10 interactor Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95229 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 127 and 281 amino acids in length. 27.00 27.00 81.60 61.20 24.20 23.30 hmmbuild -o /dev/null HMM SEED 252 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.19 0.70 -11.82 0.70 -5.13 8 45 2012-10-05 14:41:10 2012-10-05 15:41:10 1 2 23 0 17 49 0 188.50 66 88.54 NEW QEEAELPAQILAEFVMDSRKKDKLLCSQLQVVDFLQNFLAQEDssQGLDPLASEDTSRQKAlAAKEQWKELKATYQEHVEAIouuLTQALs+hEEAQRKRsQLQEALEQLQAKKQVAMEKhRsAQKQWQLQQEKHLQHLAEVSAEVRcRQoGsQQELEpLhQELGsLKQQAGQERDKLQRHQTFLQLLYTLQGK.LFP....EAEAEhPQ......L-LPEDK....sQ..TpPQEQ......NTGDTMGRDtuVopKAsG.hQPAGDsoLP ....QEEAELPApILsEFVhDSpKKDKLLCSQLQVVDFLQNhLAQEDTs.puLDPLASEDTSRQKAlsAKEQWKELKATYpEHVEAIphuLTpALsphEEAQRK+sQLQEAhEQLQAKKQhAhEKh+hAQpQWQLQQEK+L....QpLAcsSAEV+pRpptsppcLpt..QcLtsLpQQAtpcp-KLQR.QoaLQLLhoLQuK..hsEu-sEh....................................................................................................... 0 1 1 3 +15416 PF15557 CAF1-p150_N CAF1 complex subunit p150, region binding to PCNA Coggill P pcc manual Domain CAF1-p150_N is part of the N-terminus of the CAF1 complex p150 subunit that binds to PCNA - proliferating cell nuclear antigen. The PCNA mediates the connection between CAF-1 and the DNA replication fork. The CAF1 complex is essential in human cells for the de novo deposition of histones H3 and H4 at the DNA replication fork [1,2]. 25.00 25.00 37.10 34.40 21.00 16.10 hmmbuild -o /dev/null HMM SEED 229 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.57 0.70 -11.54 0.70 -5.02 4 49 2012-10-05 14:44:19 2012-10-05 15:44:19 1 4 31 1 24 45 0 203.20 44 23.92 NEW MLEELECGAPGARGAATAMDCKDRPAFPVKKLIQARLPFKRLNLVPKGKADDMSDDQGTSVQSKSPDLEASLDTLENNCHsGSDIDFRsKLVNGKGPLDNFLRNRIETSIGQSTVIIDLTEDSNEQPDSLVDHNKLNSEASPSREAlNG...QREDTGDQQGLLKAIQNDKLAFPGETLSDIPCKTEEEGVGCGGAGRRGDSQECSPRSCPELTSGPRMCPRKEQDSWSEAG ................................hAM-CKD+suhP..sKKLlQARLPFKRLNhVPKEK..s-......-t..u..-..s.....p.........s....s.S.....spS...psPD...L-...s...S...L..DsLENcCchuSDlcFpPKLVNGKGPLDsFL.pp.cl.csSl.upo..hVIIDLTEDSN..-p.s..........Du....scHscL.socASsup.cslNG....tpcstsp.u...pu.ppsphs..tET.oslPscsctt..s.t....tpp..s.t.u..shP.L....p.....t..sWSt................................................ 0 1 3 9 +15417 PF15558 DUF4659 Domain of unknown function (DUF4659) Eberhardt RY, Coggill P re3 Jackhmmer:Q8N715 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 427 and 674 amino acids in length. There are two completely conserved residues (D and I) that may be functionally important. 27.00 27.00 32.50 31.10 25.50 26.70 hmmbuild -o /dev/null HMM SEED 376 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.75 0.70 -12.45 0.70 -5.80 21 78 2012-10-05 14:49:01 2012-10-05 15:49:01 1 2 41 0 48 66 0 313.20 35 59.61 NEW tslPpRD+KIsALMLuRhcctphhpE.pptAthtWEp.++p-pph+.p.E+E+phtLtQupctWppphEpR+s+h.........spEp+sptccppppsh......pE.p+W+ctsEcQEp.Rp-+l-+s..phpscp+KppQEppL+ppEchtpsh+Eppp.hthpc+hppAspK+ph+p.hptQ+clptpNh.uphlpHpAhKhhl-pQp+sEc.hh+hSLEpphp+uQEsappLlccRs+EL+EKApKE-pQhp+A+htAEcpEc.p+pc+hcsLsclu-pKlppApphuccsspcKAp+ltp.ph.+E+sp+l.KpKlEcEEcs+hctlppuIc+K-pRsEQIp+EKcsslEpuRplARAShpsR-+l+p.htspoFD+MAhEApLpAshpct ..........h.slspRDpKIsALhLsRhpctphhtE.pttAthtWEp.+tpspphp.p.E+E+phhLtpupctWptph-pR+sp..........s.Epctttccpppp........sEpch+t.s-cQp..Rtp+h-cs..ttpsch+K.pQ.ppL+ppEchhpphREptp...hpc+htpAspp+phpt.hphQ+chpphNh.uphhpapAhh.hhspQtctEc.hh+hSLEtshtRuQ...E.aptLh.cpRp+EL+E+...Ap+E-.QhppA+.tAccpEc.pppt+hchLschu-p+lppApphsccssppKsp+ltp.ph.+E+sp+h.K.KsEc-Ecs+hctlhpuIt+K.pRscplpcp+csshpphpphupAShp.+ctsht....pohDphshEApl.Ap.tt........... 0 9 14 23 +15418 PF15559 DUF4660 Domain of unknown function (DUF4660) Eberhardt RY, Coggill P re3 Jackhmmer:Q8N6N3 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 93 and 189 amino acids in length. 27.00 27.00 32.20 31.90 18.20 18.00 hmmbuild -o /dev/null HMM SEED 108 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -10.14 0.72 -10.72 0.72 -3.97 14 53 2012-10-05 14:52:51 2012-10-05 15:52:51 1 1 37 0 28 47 0 103.20 61 53.36 NEW SSDS-scupscpppt..ts.sssssSsupsttt.........hLPcPDELF+SVS+.PAFLYNPLNKp...IDWE...s+......sl+APEEPsKE......FKsWKTNAVPPPEoYssc..-pK......ssPPuhDMAIK ......SSspE.DssEPE-ssp+sss..supuuG..Gptspuc.......tRLPGPDELFRSVoR.PAFLYNPLNKQ...IDWE...RH......VVKAPEEPPKE......FKlWKoNhVPPPEoYosE....KK.....PPPPthDMAIK.... 0 5 7 13 +15419 PF15560 Imm8 Immunity protein 8 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and several conserved charged and hydrophobic residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family [1]. The protein is also found in heterogeneous polyimmunity loci. 25.00 25.00 154.00 153.80 22.70 22.50 hmmbuild -o /dev/null HMM SEED 133 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.73 0.71 -10.40 0.71 -4.49 7 13 2012-10-05 15:01:24 2012-10-05 16:01:24 1 1 12 0 1 11 0 133.00 45 92.86 NEW MElhlssVlGGplssp....+slpsllhcMRKplKcpFtshphEuLs+hKIslahSGDlSpYsspoGIhpsRYapcKKEalspFCID+phWoup.h.Dlp.KFLhhlps.hlpLGslI+pKLcptsYsFDschaK-hl MElslshVhGGplssp....KslpshlhpMRK.lKcoFpsh-.EulsphKlsLahsGDlSSYhspTGIYQsRYastKKEalspFCIDRspWousKhcDls.hFlhhlpshhIchGsLIcpKLtKhpYsFDhpsYKchV.. 0 1 1 1 +15420 PF15561 Imm9 Immunity protein 9 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and several conserved polar and hydrophobic residues. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 34.30 33.70 21.70 21.00 hmmbuild -o /dev/null HMM SEED 160 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -10.86 0.71 -4.64 5 11 2012-10-05 15:02:01 2012-10-05 16:02:01 1 1 10 0 1 10 0 140.10 38 90.75 NEW KIlTspt.......hcEIpchlc.+sstFEEIPIIoRasRtchLp+sIu+E-lslaLLNlALFaLNNIpLaA+clLsp.psssLFICITlVD...lE-EhsDl.GFsIPNILIS+cK.htsplppcstlNL-cpcaLpcphupluhpshFphY+opoDDGaGt-IhRIYllPK ..Kl.+shQ.cpI.phlt.hsstFEEIPIIophtR.chhhcshu+c-hslhhLNsuLFhLsNlpLhs+clLspppppsLFhCIThss....p..hsDl.GF.lPNIhISppt.hhphhppt.thslcp..aLpp.hppluh.shFthhhshosDsaG.-hhRlallsK................ 0 0 0 0 +15421 PF15562 Imm10 Immunity protein 10 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with two transmembrane helices, and a WxW motif and a conserved arginine between the two helices. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 27.00 26.70 24.40 23.70 hmmbuild -o /dev/null HMM SEED 61 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.85 0.72 -9.14 0.72 -4.21 46 149 2012-10-05 15:02:37 2012-10-05 16:02:37 1 2 105 0 21 102 0 59.30 30 73.33 NEW llhlhlGllhLluuIhsWcWhhssp.ss......ph.....hl.chhGcpshRlhh.ul.lulllIhsulhha ....lhlhlGllhLluulhsWc.Wh..asspss......ph.....hl.cphG+pttRlhh.ul.LGlllIshulahF...... 0 4 16 18 +15422 PF15563 Imm11 Immunity protein 11 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved HxxRN motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 65.90 65.60 22.00 21.60 hmmbuild -o /dev/null HMM SEED 224 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.48 0.70 -11.53 0.70 -5.25 3 9 2012-10-05 15:03:45 2012-10-05 16:03:45 1 1 9 0 2 8 0 221.80 47 96.85 NEW MN+IpshEhshSNshFWNFhhsaa.RGaDEE+EsNlDEVh-......lVEscchKuY.W..sWFsQlDlKTsEsttElE..NPRTlshPIsDDlsaslEFHPsEThYFlNDTYIGNlGGHFHLpaLTaoELhuI..pcEKYuSLLFaLLLPLTuIcEsEKsIl+sEIspHLpcIPlFKsHSsYIuDC..ILNGLllu-SsFQppcGIGhlsspNHShRNthsY---+csIpELN+LLV+ .MNhIpphEhshSNshFW.Nhhhpaa.cuaDEE+DsNhDEVh-......ll-pcchKpY.W..sWlsQlDlK....TsEspGEIE..NPRTlsLPIssDlsholEFHPssThYFLNDThIGpluGsFHL+aLTaoELhcI..TcEKYuslLFaLLLPLouI+EpEKshhhstIhp+LppIPlFKpHSsYIucC..ILNGLllusSshQph.tIGhlsspNHShRNthhY---+ppIpELNhLLs+... 1 1 1 2 +15423 PF15564 Imm13 Immunity protein 13 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in heterogeneous polyimmunity loci of polymorphic toxin systems [1] 25.00 25.00 235.30 235.00 19.80 19.00 hmmbuild -o /dev/null HMM SEED 131 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.69 0.71 -4.44 2 3 2012-10-05 15:04:31 2012-10-05 16:04:31 1 1 3 0 0 3 0 131.00 66 79.39 NEW .ps-FtSVlsEFtpLls-asFpCPcKLWYssLlsLSKpl.DIYYCYlIsRVYKpsGSLEsThWVGsIsRPDDGLENLSANIKhQIGYsQshD.hFF+pCESKIl.lIESG.LhsLlssSQ.EhKh.ShHNh ..oDDF+SVVAEFGNLINDFGFSCPEKLWYsNLISLSKNVcDIYYCYVIARVYKsDGSLETTLWVGPINRPDDGLENLSANIKIQIGYTQVsDPLFFRNCESKIIsLIESGILKTLLssSQNELKaPSIHNh. 0 0 0 0 +15425 PF15565 Imm16 Immunity protein 16 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly alpha-helical fold and a conserved DxG motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-SHH family of HNH/Endonuclease VII fold nucleases [1]. 23.00 23.00 23.00 24.30 22.10 22.20 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.55 0.72 -10.30 0.72 -4.18 6 55 2012-10-05 15:06:40 2012-10-05 16:06:40 1 1 47 0 13 56 0 98.40 38 65.46 NEW ccLhuscphtsQs-CccF.ullcAlpcpl-tcllcsLLpTFuDc.cDa.GlQ-plVpVl-ps-sphahsclstthsplh-pAs.+EWAhhLlGRllNSsstcpthttlh ....................Lhp.Rhhcsp.p-.h.c.FppsLtclhphsssplIs-LCllh-D-.....pp......-VMFsLlahlE...p.....th...-ct...LhplsculP+MlpsA...EWsclLahRILNscthR.tYscl.h......... 0 0 5 7 +15426 PF15566 Imm18 Immunity protein 18 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved histidine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox12 or Ntox37 or Notx 7 families [1]. 25.00 25.00 51.80 51.40 22.60 22.10 hmmbuild -o /dev/null HMM SEED 52 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.89 0.72 -8.63 0.72 -4.63 9 13 2012-10-05 15:10:02 2012-10-05 16:10:02 1 1 13 0 6 12 0 52.50 36 65.01 NEW tcGLthLhcpLppLps+.....spHhHLMTsuWuGsELo....-ppQupcs.pLlsHl+l ..-GLchLtcplppLtp+.....scHhHLMTPuWGG-ELo....EccQup-s.pLlpHl+l. 0 3 4 4 +15427 PF15567 Imm19 Immunity protein 19 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a protease domain such as Tox-PL1 and Ntox40. In some instances, it is also fused to a papain-like toxin, ADP-ribosyl glycohydrolase and a S8-like peptidase [1]. Based on these associations the domain is likely to be a protease inhibitor. 25.00 25.00 31.20 30.50 23.40 22.30 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.87 0.72 -3.98 20 224 2012-10-05 15:12:48 2012-10-05 16:12:48 1 6 217 0 16 64 2 85.00 73 70.53 NEW hccAhtpAppaLpt....ss-hs.........lll..ststchshGWlFsasotcaLcoGD.tspLsusuPllVc+ssGclt.........shusspP..htcalpcacspt .................Y.HDAFAKANHYLD.....DADLP.......VVI..TLHGRFSQGW..YFCFEAREFLETGDEAARLAGNAPFIIDKDSGEIH.........SLGTAKP..LEEYLQDYEIKK............ 0 8 11 13 +15428 PF15568 Imm20 Immunity protein 20 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved GR, and GxK motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family of nucleases [1]. 25.00 25.00 43.50 43.50 24.40 19.30 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.71 0.71 -10.48 0.71 -4.25 2 3 2012-10-05 15:13:23 2012-10-05 16:13:23 1 1 3 0 1 5 0 96.70 50 94.16 NEW MAcpRhhl.GGVuLhhGRV+pu..shh..pDclE.hL.D.sFhp-hPFKhluhlhhaGpKs.LhPpY-.INK+au-LPltlELchthLchAsp...-llhshFhhusl-sLlcVucKYtLPst.Lcp.suh MAHNRKYVPGGVALVMGRVRNS..GKVVEQDElE.hL.D.sFhp-hPFKhluhlhhaGpKs.LhPpY-.INK+au-LPltlELchthLchAsp...-llhshFhhusl-sLlcVucKYtLPst.Lcp.suh... 0 0 0 0 +15429 PF15569 Imm21 Immunity protein 21 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved phenylalanine and tryptophan residues and a GGD motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox19 family [1]. 22.20 22.20 24.20 24.20 20.30 17.50 hmmbuild -o /dev/null HMM SEED 91 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.39 0.72 -10.12 0.72 -4.21 11 48 2012-10-05 15:14:00 2012-10-05 16:14:00 1 1 45 0 6 26 0 84.30 53 77.70 NEW sLt.lGltphAashpDsLchlshhcppphsILGGDVYphpsstlclTYDuWaas...cpss-sshp...psh.hAh-YIspYhs..psup.phlasl .......s.LhulGIcEsAhPlshAIDILNLalsERILVLGGDIY.IKKDN..YFYQTYDNWYYE...GSNLFNSI-.......KAMHYLSQlKhENAYVSFV...Lph.............. 0 3 6 6 +15430 PF15570 Imm24 Immunity protein 24 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold with conserved tryptophan, proline, aspartate, serine and arginine residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-AHH family of HNH/Endonuclease VII fold nucleases [1]. The gene for this toxin is also found in heterogeneous polyimmunity loci. 23.40 23.40 24.10 27.00 22.00 22.60 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.95 0.71 -10.69 0.71 -4.39 11 23 2012-10-05 15:15:18 2012-10-05 16:15:18 1 1 15 0 11 23 0 119.60 29 55.27 NEW hshpW.......aphs.stphs.......p.....hPppLa.lCs++hctLpFDah..as.shaIlS-cFlchlpchphspphptshlplss+cGc.pls.cchYYhlRhhahsp-.F...ctSph.pp-hpcsh...........l..Y.clpL+-ps. ....h.phpW.......apspstth........phPspLYhlCsKKsttLpFDah..as.phhIlS-cFLchlpc.shsccashupltlls+Ksc..ls.cchYYhlRl.ahsp-.Fh..ct.ph.pp-hpcpl...........l..Y.chpL+-psh...................................... 0 0 7 11 +15431 PF15571 Imm25 Immunity protein 25 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI1, Tox-URI2 or Tox-ParBL1 families [1]. The gene for this toxin is also found in heterogeneous polyimmunity loci that show variations in structure even between closely related strains. 25.00 25.00 27.10 27.00 23.00 22.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.77 0.71 -10.81 0.71 -4.01 3 30 2012-10-05 15:15:50 2012-10-05 16:15:50 1 1 30 0 3 20 0 125.40 37 95.19 NEW MKaFIGGEIE....psIsDKFR+sRNsV.....I-aLDsL.GscIpSINElSFhVas.LKcFphsPlo+YpKKpNRlELEIhI-F-pFEsANDspssELLKQsILAVIpcYcNKsI.psSlDlIhoKlEsEIpp .M+hFloGELE....psIu-c....FRcsRspl..........hch..ssh.ts-l.ppIs..lshhVhh..hKpap....t.pEhphap+Kppch-hRLhI-a-pF.oANDSppspLllpsIlpuIcpht...sKs..pp.h-s...KhctsI....................... 0 1 3 3 +15432 PF15572 Imm26 Immunity protein 26 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved C-terminal tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-ColE3 family [1]. 25.00 25.00 26.70 26.50 22.20 18.90 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.61 0.72 -10.12 0.72 -3.86 19 24 2012-10-05 15:16:50 2012-10-05 16:16:50 1 3 21 0 11 26 0 95.40 35 75.88 NEW hKLl-apschlpRGslhRhsut...............aPaE.clVDFMVh-h.s..tt....shuLllsSGaKAGhlh.hhhPpEuhsppst..ulSTpWllcNWp+WlYscCslccVal .....+Ll-atschlpRGslhRhsuh...............aPa-.plVDFMVh-t.s.tt....shuLllsSGaKAGhlh.hhhPpEuhsppsh..ulSopWllpNWp+alYspsssccVhl.... 0 2 8 10 +15433 PF15573 Imm27 Immunity protein 27 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved KxGDxxK motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 26.20 101.60 23.80 18.10 hmmbuild -o /dev/null HMM SEED 259 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.79 0.70 -11.50 0.70 -5.18 10 46 2012-10-05 15:17:35 2012-10-05 16:17:35 1 1 40 0 5 43 0 234.00 55 96.89 NEW sLhsuIWaG-loshols-LKpsLLsspTE+EsllLIlELhKtGDFoVKsLLIpLMNpTcDEsVLNLCIRLFCSVuTHDDL+-oNNL+FLusASEhuVaTFsuuAlsTLSYEVIPYLLsLLEEWE-..oDlEhuIRDuLDhFLNacspluE-ATlEElGshYh-hlcspDhspYYY+ssLAFPGDLTKcLhpclhlAAp+EEpa+hhlIPSLLSIaTGEKVPlDhcTIIoscch+chlsYIDsLocKsWccGpKYFYGahV ..LhpsIWaG-ho.hshpplKpplhcusTEpEslh.lhELhKtGDFo.KsLLlpLMNpT+DEsVLNLCIRlFhSVATH-DLc-oNNL+FLSpsoE-sVcTFsuuAssoLShEVIPYLLALLEEWE-..o-stphIRDulD.alsacDpluE-Aol-ElGphYhcaspppDsppYYapppLuFPGDLsKclhpcVMhAApsccphph.lIPSLLSIWoGh+sPh-YsTIIospph+-hhsYIs.LopcpWccGpKYFYGahl..... 0 1 4 4 +15434 PF15574 Imm28 Immunity protein 28 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an all alpha-helical fold and a conserved HRG motif. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 25.00 25.00 70.50 70.40 22.30 18.30 hmmbuild -o /dev/null HMM SEED 123 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.59 0.71 -4.56 8 15 2012-10-05 15:18:14 2012-10-05 16:18:14 1 1 15 0 0 16 0 117.70 43 93.88 NEW alsEuhphsschlphlpl..DlpDE......hpRQlluuYlFGhLNuLAh-cshsPsDlQusMIclhIcpLsYsspsAsphspFlIcuTDKpFHPThaAIIHRGlEGYahYp-c.+.s-LpcDFp-IlpllK ..lsEulchlschlpllpl..DlpD-......h-+QllAuYlFGMLNGhAa-csIsPsDlQuhMIcIuI-KLsYosEsAsQhoQFlIcuTD+pFHPThaAIIHRGlEGYahYp-p.c.ppLpcDFp-IlpllK. 0 0 0 0 +15435 PF15575 Imm29 Immunity protein 29 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an all alpha-helical fold and a conserved proline residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-REAse-1 or Tox-REase-6 families [1]. 26.00 26.00 26.00 26.30 25.30 25.50 hmmbuild -o /dev/null HMM SEED 215 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.74 0.70 -11.29 0.70 -4.73 81 217 2012-10-05 15:19:04 2012-10-05 16:19:04 1 2 129 0 57 210 3 198.40 19 70.51 NEW csshsph...cphthhssthphhsshhtsst...........................h............h...s.t.hhs...sts........Wh..ssh..hhAllscspp.hhshLsphss.........ph.hppstsths..............hshshhttLpshhhsps......shhcplhpthp..tsts.....................phhpthhh.s.hphahslh...p.pDpssappuLtptLph+pthhttpcttt............sscs...........hlshhsluhAplAacp.G.....hplpl-SsYlPppLl ...................................................................tth.hh.sththht.h...t..............................................s.....hs...hhs........h...ssh...ahslhscs.p..hhchLsph.s.........sh.hppspshhs.............thshshhhhhpshhhsps......t.hccLhpphpts.hs.............ts..tchhpphhh...hchhhuls...p..pDtpshctuLpthlch+pthhttpcp.p............hhct...........alshtllshAplAhpp.G.....hplsl-sthlPptLl................................ 0 11 28 42 +15436 PF15576 DUF4661 Domain of unknown function (DUF4661) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95873 Family This family of proteins is found in eukaryotes. Proteins in this family are typically between 281 and 302 amino acids in length. 27.00 27.00 56.40 29.50 24.90 24.50 hmmbuild -o /dev/null HMM SEED 253 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.85 0.70 -11.68 0.70 -4.73 7 26 2012-10-05 15:20:41 2012-10-05 16:20:41 1 2 20 0 13 29 0 232.20 69 83.25 NEW RRKss+PD.ssPEsRhhDSSPENSGSDWDSAPETMGDsGPPKsKDSGsp+ssGsAPE.SR....csps-QLGS+RMDShKhspsssshpEStRLEAuGslsplGpDPhsusGshc.tVs.Etthss.GPEAPVEKsuR+Q+LLGWLpGEs....GAPspYLGsPEEhLQISTNLTLHLLELLASALLuLCSRPLRAsLDALGLRGPLGLWLHGLLSFLAALHGLHAVLSLLTAHPLHFACLFGLLQALVLAVSLREP .............RRKsh+PD.PsPEPRplDSSsENSGSDWDSAPETMGDVG.PKTKDSGshRsStAAsEPS+....EspVEQLGS+RMDSLKh-pssSsTQESGRLEAGGA.P+LGhD.VDSuGs++sGVSPEGshSsPGPtAPlEKPGRRpKLLGWLRGEP....GAPspYLGG..PEEsLQISTNLTLHLLELLASALLuLCSRPLRAALDsLGLRGPLGLWLHGLLSFLAALHGLHAVLSLLTAHPLHFACLFGLLQALVLAVSLREP........... 0 1 1 1 +15437 PF15577 Spc7_C2 Spc7_C2 Coggill P pcc manual Family Spc7_C2 is a short family to the C-terminus of fungal Spc7 proteins. The Ndc80-MIND-Spc7 complex plays a role in kinetochore function during late meiotic prophase and throughout the mitotic cell cycle [1]. The N-terminal region of Spc7 co-localises with the mitotic spindle, and it has been argued that Spc7 has the potential to associate with spindle microtubules and that this association is regulated by the C-terminal part of the Spc7 protein [2,3]. However, this family represents only the conserved region towards the end of the C-terminus; the majority of the C-terminal part is in family Spc7, Pfam:PF08317. 22.30 22.30 24.00 22.90 21.70 20.70 hmmbuild -o /dev/null HMM SEED 67 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.17 0.72 -8.70 0.72 -4.32 16 47 2012-10-05 16:37:54 2012-10-05 17:37:54 1 7 47 0 42 48 0 66.50 29 5.52 NEW hchLs.lpptWspuphlsp-l+tlphpaP.ssssc.so.Dpolplpss.lhlsslco+Vclshslst.pltp .....t.plLphVpsuWspAptlsppl+hlshsaPTsls.......c.oS..Dsol...sls....uS.lLLsslpT+VclshsLpt.....p........ 0 10 26 37 +15438 PF15578 DUF4662 Domain of unknown function (DUF4662) Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95567 Family This family of proteins is found in eukaryotes. Proteins in this family are approximately 290 amino acids in length. 27.00 27.00 201.10 200.90 20.20 19.60 hmmbuild -o /dev/null HMM SEED 270 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.95 0.70 -11.76 0.70 -4.81 8 21 2012-10-05 16:38:14 2012-10-05 17:38:14 1 1 20 0 13 24 0 259.30 75 91.71 NEW HPIYVRR-PSIPTYGLRQSILLNTRLQDCYVDSPALTNIWsuRTCAcpNIpAPsPGTTSSWEVVKNPLIoSSFSLVKLVLRRQLKDKCCPlPsKFG.EAKspK..RLKsKDsSssKAT.puRhRNSIssKSKpPuGQhPG..SscpR+PAGulpESKESSKEKK.uTVsQDLE-RYAEHVAATQuLPpDotTAAWKGQA.LPETpKRQpLSEDsLTIHGLPsEuY+ALYHuVVEPMLWNPSGTPKRYSLELGKAIKQKLWEALCSQAAsPEsAQcD ............HPI.VRRDPSIPhYGLRQSILLNTRLQDCYVDSPALTNIWhARTCAKQNIsAPAPuTTSSWEVV+NPLIASSFSLVKLVLRRQ.LKsKCCPsPpKFG.EuK.SK..RLKpKDsSshKATQpuRhRNSISSKSKpPAGp.........RRPAGGIpESKESSKEKK.lTVRQDLEDRYAEHVAATQALPpDSGTAAWKGpsLLPETpKRQQLSEDTLTIHGLPTEGYpALYHAVVEPMLWNPSGTPKRYSLELGKAIKQKLWEALCSQuAhsEGAQ+D.. 0 1 1 1 +15439 PF15579 Imm32 Immunity protein 32 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved tryptophan and phenylalanine residues, and a GT motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-REase-5 family [1]. 25.00 25.00 36.10 35.90 22.60 22.50 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.24 0.72 -4.45 36 138 2012-10-05 19:59:56 2012-10-05 20:59:56 1 1 70 0 44 152 0 95.40 43 46.47 NEW hpllpsls.ttasshalplsspsY..hpcpVF.sD+suVGWMLYLP+hITtpQVPEApsLlPV.ssscp..pGTIIVS.TDtsFoscNsEHlchANcIEIRLlDtsLL ....................h..hlpshh..hhps.hh.sssptY....hpcplF.sD+.uVGWMlYLP+hloppQVPEAcALIsVss.....t.....scp..pGTIIVS.TDsPFoscNPEHVtlAN+IEIRLlDtsLL. 0 4 9 25 +15440 PF15580 Imm33 Immunity protein 33 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved tryptophan, and WE and PGW motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox24 or Ntox10 families [1]. 25.00 25.00 60.60 59.90 21.40 19.50 hmmbuild -o /dev/null HMM SEED 90 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.66 0.72 -10.18 0.72 -4.50 33 42 2012-10-05 20:00:33 2012-10-05 21:00:33 1 2 40 0 18 47 1 89.90 36 81.91 NEW lpWLpcWYtspCDG-WEHpaGlcIsTLDNPGWplpIDLscTshpshpht.lplcps..........c.sDWhtsplcssp.....FpusuustpLpclLphF+pWl ..lpaLQsWYtspCDG-WEHpaGlpIsTLDNPGWplpIDlspTshttpthtplplcps..........c.sDWhtsplcstp.......FpuhsusppLpclLphFhpW........... 0 7 12 16 +15441 PF15581 Imm35 Immunity protein 35 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and YxxxD, WxG, KxxxE motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene [1]. 25.00 25.00 137.30 137.10 21.80 19.90 hmmbuild -o /dev/null HMM SEED 93 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.26 0.72 -10.04 0.72 -3.97 4 5 2012-10-05 20:01:14 2012-10-05 21:01:14 1 1 5 0 3 5 0 93.00 41 86.43 NEW l.sslhhAYhhlD+SIoLoYhpQuhEoucuslcsLppLIEcEWRGLsptQVh+KLcsVAtppsttchVlK+..EsslIWF-pVRFpFspGRLspV VssslhFAYhWIDRSISLSYucQGpETAcuoV+sLppLIE+EWRGLPEsQVh+KLcAVAApustu+IVVKK..EGslIWFD-VRFNF--GRLcSV 0 1 3 3 +15442 PF15582 Imm40 Immunity protein 40 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved YxC motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-JAB1 family [1]. The immunity protein typically contains a signal peptide and a lipobox. 25.00 25.00 33.10 32.00 18.90 17.60 hmmbuild -o /dev/null HMM SEED 327 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.11 0.70 -12.17 0.70 -5.73 2 16 2012-10-05 20:02:21 2012-10-05 21:02:21 1 1 13 0 1 14 0 222.90 40 87.58 NEW lpQtVttQsclLlDSGhlhspYl.l.Elh.sDSNplY.IttuDsPus.thEhPS+lIKYK-+YLCFIELDEs.MStpEhhEt.....SsaptNLslp..tG+sWLLslpKht-K+ILlch...hhh..hFphsELWsYFSG.l.ptpss.MGlhSHDl.lssSYlss.h-..hcsL.......pshlcph.Gphal+NpTDSVlhLSpsot+p..YAVlsG.DoLhLsLpDSLPlhluPp-hK.LcYcS.P.ps..Fhp.h.ccD.W..hYpLFscSTasFlNlNsh.pph+lMapD.ssYu.slp.o.sph.h.IhN+GlYDKc.t.hphF+a. ....................................................lpQtVttQsKlLlDSGhl.s.Yl.l.Elh.NDSshIY.IpsADsPus..thEhPSKllcYKs+YLCFIE.LDE..MStpEhhEt.....SsaptNLhlp....csWlLslpKhtptphLlch....ha..hFphspLW...............................................hshlpph.GphahpN.hDphhh.spsoh+p..aAVlsG.DoLhLsl.Dols........p..........................hYpLhtcSTa.hhs.psh..thpl.apD....a......t..s..h...l.pcuh.......................... 0 1 1 1 +15443 PF15583 Imm41 Immunity protein 41 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved glutamate residue. The domain is often fused to one or more immunity domains in polyimmunity proteins [1]. 25.00 25.00 30.00 39.60 21.40 21.10 hmmbuild -o /dev/null HMM SEED 158 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.92 0.71 -4.42 13 63 2012-10-05 20:02:45 2012-10-05 21:02:45 1 12 59 0 9 44 1 147.30 48 27.96 NEW MYI-.KYWGsaIGGoDDSLsLlsYLtspc+-cIoLsEIFsDhGLDcL.....shsF+pTsp..ltap....sp-G.h-h-FcaAIDllsDLAAllLECptsGpVsLp-L.t.ss.psRhIRIsAosEEhshlspALtDFspsPhcYDLsEMhs--DhtEhApcs-pLRpEL ....MhI-.pYWGpahGsSsDShsLspYLtspc+E.lslsEIFpDhsLDcL.....shNapps............phDG.sthcFc.AhplVhDLAsLlLEsctsGphNLtcl....tss.sRhhRIsATsEEhhslshALpcFAhuP.-YcltEhhD--.-hhEhuphsEplRtpL....... 0 3 5 8 +15444 PF15584 Imm44 Immunity protein 44 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly all-beta fold and GxxE, WxDxRY motifs and a glutamate residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox48 family. This domain is often fused to the Imm71 immunity domain [1]. 25.00 25.00 28.90 51.50 22.40 17.10 hmmbuild -o /dev/null HMM SEED 94 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.68 0.72 -10.36 0.72 -4.13 17 44 2012-10-05 20:03:57 2012-10-05 21:03:57 1 2 19 0 19 51 0 100.50 46 32.10 NEW sFPssLPtlP..s.stssllpSGcclPssGIWEPhss.psKl.................GChNYhltGohAPphttpsss.................pthsspWRLlWEDpRYpDGoIP-EEpsYlh .......hFPssLPcVP...pstsslIpSGpclPC-GIWEPVshtpsKlhulhsh...spt.h..sGChNYFltsstAPphtp-sss.................t.lsspWRLLWEDcRYtDGsIP-Epphal.... 0 0 0 5 +15445 PF15585 Imm46 Immunity protein 46 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved GxaG motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a Tox-REase-3 domain [1]. 25.00 25.00 27.90 27.60 21.20 20.20 hmmbuild -o /dev/null HMM SEED 129 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.79 0.71 -10.43 0.71 -4.09 17 27 2012-10-05 20:04:40 2012-10-05 21:04:40 1 1 21 0 9 26 0 115.60 35 94.69 NEW MaEaHGWhTIptospppDs...tph-pllsclpphlschshs.......shlcLphhNGphhlphuGhsNH+spph.pllslappluclAPGSYGLLYh+DDE..-.....thsNpFRVahhtRGplocppDsaLSPslPhlED ........MhEaHGWholptostttDp...tp.cthhpclpthlschsh........shlsLphhNGphhlphuGhsNH+st.h.pllslappluclAPGSYGLLYh+DDE..-.....thsNpFpVhhhsRGplspppDsaLSPslPslED.. 0 5 8 8 +15446 PF15586 Imm47 Immunity protein 47 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved Wea (a: aromatic) motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox7 family [1]. 26.90 26.90 28.30 38.20 25.30 25.40 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.60 0.71 -10.38 0.71 -4.26 29 121 2012-10-05 20:05:12 2012-10-05 21:05:12 1 1 87 0 25 68 1 107.40 38 93.43 NEW +splKulpss..chs...LcsahP-csssFshhlplpIGs.pspp.Gu-.FplhlCTPcWLppphtptthh.htR+hLlVpc.Y.chcpIhshlpchlspCpupsWtclupKLuRhhtWEaEDYp ...........+lcLKshsh...DhD...hEcFsP-h.-NFphhlsLsIGh.-spp.G.ush.FplhICSPcWlpp...phpc.t..h......h...pstllhpp.a.shchIhp.IscIL-hC.sp.poW-cohspLtRaFsWEFEDYp.... 0 7 14 22 +15447 PF15587 Imm48 Immunity protein 48 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved lysine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-URI2 family [1]. The protein is also found in heterogeneous polyimmunity loci. 25.00 25.00 35.30 35.20 20.70 19.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.17 0.71 -10.98 0.71 -4.84 7 30 2012-10-05 20:05:35 2012-10-05 21:05:35 1 1 29 0 2 21 0 153.90 43 97.65 NEW Mc.clpl...hl..plPpL.Ehsslcsl....+.tLpsalpslhstINlpc.Ls-WpLhlhl...shhus..NtIGlaK+uppaoSsKEhphSIulslPspccshaGlsch.cpuahs.lscppFh....lLpss.FspYsNLtcYIlEsuKluIh.hhppGhphpGhKIph. ..................Mp.Qltl...LlssplspLhEhspl.pl....pc.lpsYhpDlsNtIphD-.L...sDWpLhI.l...shhss..stIGIaK+uh.pasSsKEhploIuIPlPspcpsRaGlsch.....puahs.lsccsFh....lLsss.FuKYDNLhpYIl-suKhAIhshFphGFThpGhKIKK+. 0 1 2 2 +15448 PF15588 Imm7 Immunity protein 7 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly all-beta fold and a conserved arginine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a Pput_2613 deaminase domain [1]. The protein is also found in heterogeneous polyimmunity loci. 25.00 25.00 32.80 31.90 23.80 21.70 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.20 0.71 -10.31 0.71 -4.05 32 42 2012-10-05 20:13:00 2012-10-05 21:13:00 1 2 40 0 14 45 1 117.50 21 78.64 NEW .hpFpAphlsspt-sc...hhhlGhAD.cc.ssppallhQRsh....c-pDtptshsuha..sps....sstssYsslcplpLppspltlplpssshh.....slsls..plplsh........ph......hphhpphhpplLp. ...thpFpAphlssph-sc...shhlGhAD.cc.ssppYlllQRsh....--pD.p.shssha..hch...tsspssYsslccltLppsplhlplpcss.t.....slshs..plplsh........sh.......hphlpphhpplhth......... 0 5 8 12 +15449 PF15589 Imm12 Immunity protein 12 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved WxG and YxxxC motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the NGO1392-family of HNH/Endonuclease VII fold nucleases [1]. 29.00 29.00 32.50 32.20 25.70 24.20 hmmbuild -o /dev/null HMM SEED 155 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.41 0.71 -11.09 0.71 -4.46 17 45 2012-10-05 20:14:09 2012-10-05 21:14:09 1 1 43 0 16 45 1 144.10 33 88.74 NEW pWVEShGGPLlsVPtosLshWsGss........s.spsssDYDRACsVDGhsGllslGs..up.....ALVLGDEPAsTsYL..PcHtsF..lRW......hAADS.....Es-Llusscssls..uspW-..spspWpss...GPsVLhDuA..........aPG.usss...sphtVsLssGRatVRAscscs-tcoh. ..........pWlcotGGPllhl.psshthWpGsp.....................s.sDYsRACsVsshlGllslss..tp.....ALVLGDEPhsTsa..l...scc..tsF..lRW......hsA-s........Es-llsuscsshs....sstWp..spsp..Wpss...GslVLFDSA..........hPssphs...spl.lsL...sGpatV+ssphpss..s.............................................. 0 4 10 15 +15450 PF15590 Imm15 Immunity protein 15 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved aspartate and GGxP motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox10 or Tox-ParB families [1]. 25.00 25.00 69.80 69.60 19.20 16.70 hmmbuild -o /dev/null HMM SEED 69 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.74 0.72 -9.29 0.72 -4.89 5 9 2012-10-05 20:14:46 2012-10-05 21:14:46 1 2 9 0 1 8 0 69.80 37 13.46 NEW Dsls-RIEsLIsNp..LpclAh.....pPD..uWEsLYpDPcDGRaWE+lhscSchcGGGPPpLpslSQsss+s+Ypl ..Dsls.RIcsLlsNp..Lpcluh.....pss..GWpphapDPsDGRaW.hshspSchcGGGPPpLpslSQ.ps+spYph..... 0 0 0 1 +15451 PF15591 Imm17 Immunity protein 17 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly all-beta fold and a conserved GxS motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox17 or Ntox7 families [1]. 25.00 25.00 73.40 73.30 23.10 21.00 hmmbuild -o /dev/null HMM SEED 74 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.67 0.72 -9.27 0.72 -4.21 6 26 2012-10-05 20:15:26 2012-10-05 21:15:26 1 1 26 0 3 9 0 77.40 76 97.53 NEW M...KhsFYp.Vclhp..ssst.....cltGc+GVVlGhSE-DsthaGYuVlIa.-hcpsh.l-cc-ltsTGphhsR--FY ..MTNLKLDFYSEVIIKDSCPNDLLENGETIKGKKGVVLGISEEDGIIYGYTILLF.DIKYCIYIDKKYIIPTGKKFSRDDFY 0 1 2 2 +15452 PF15592 Imm22 Immunity protein 22 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved SF motif and tryptophan residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox21, Ntox29 or Tox-ART-RSE-like ADP-ribosyltransferase families [1]. 23.30 23.30 25.40 24.90 21.70 21.50 hmmbuild -o /dev/null HMM SEED 116 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -10.65 0.71 -3.77 13 133 2012-10-05 20:17:06 2012-10-05 21:17:06 1 1 76 0 12 78 1 110.90 58 90.11 NEW +Nlsps.pa-EsSFIuplh-pupWscccYW+LEpsLhpl...sp...phphsc-lspplhhuhhpllshlh.thsstc.apIps..h...........cshc....I.shhERhchlhptlFs..scslcpstF.hh ............RNIPsFEcYDENSFIGKWYD.DGVWDDEEYWKLENsLIEV...R+...KYPYPMDIPRDIVIGIGTII-FLM..VsNWKLFcIKu..SP.......WLPcSVt....IpERYERh+sMLRYIFT..-hDl.NspF.Y..................... 0 5 8 9 +15453 PF15593 Imm23 Immunity protein 23 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Ntox18 family [1]. 25.00 25.00 26.00 25.80 24.40 24.00 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.98 0.71 -4.00 26 70 2012-10-05 20:18:30 2012-10-05 21:18:30 1 1 56 0 16 50 0 150.50 23 94.87 NEW MIaG-...P.pFulhh-h.lppWs.sss..a.....ppGhFsaaIsuchhPs.hhsso..LtssltpL..pph..........thhsthpsppLFshsstchhphl..............hphh.sphts....pt.ch.......aphhloh..shsDs...Gh.lFh..lps.s-ph+llatp.................pstslp..-hhLptsphppllppl.t .......hhGp....pcFuIhh-p..pchs..ts..a.....h.GhhphhIssphaPp....t...hho..LssshpsL..csp..........th.pphhssthhsL-htc.thh.l...............hchsp.phss.......ph.......hshhhsss..shp..-...shslhh..hps.s-p-RLhath..................sspshc..Ehhhc+Gplppll.pl.p....................... 0 2 7 11 +15454 PF15594 Imm30 Immunity protein 30 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an all-beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-HHH or Ntox24 families [1]. 23.20 23.20 25.00 25.00 21.40 21.10 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.90 0.71 -10.47 0.71 -4.03 40 70 2012-10-05 20:22:57 2012-10-05 21:22:57 1 1 61 0 24 80 1 120.60 23 86.37 NEW pplhssptlpplasp.hPshpcsclhslplc+pss.........pLslph.....hhpchsps..Pp+W..spass........hhlslsFhslppLpl.....suhuppshlsplplppssst.h..........plslps.sssh......phpC....cahplpsls.sa .....hhssptlhslaGp.hPshccs-lhslplpRsss.........pLslph.....hspc.spsh.Pp+W..scass.........hhlplsFhslpsLpl.....puhuspsllsphclp.ph--p.h..........hlcIcs...psph......hlpC....cahclcsls.s................................. 0 4 11 16 +15455 PF15595 Imm31 Immunity protein 31 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved tryptophan and Dx[DE] motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the Tox-RES or Tox-URI1 families. Proteins containing this domain are present in heterogeneous polyimmunity loci in polymorphic toxin systems [1]. 29.80 29.80 35.20 34.10 25.50 25.00 hmmbuild -o /dev/null HMM SEED 107 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.34 0.72 -10.41 0.72 -4.28 33 97 2012-10-05 20:23:36 2012-10-05 21:23:36 1 2 91 0 16 77 0 106.30 27 70.46 NEW p.hhhlcpc..tshslsL.l.tctchp.hsp+hc-..Gh-u..sGYsWtulhpsalpcct..PcL..h-clcaDPEAGhFsA.Y....upst-sLcchsthlcchh-scphlhcll..pts-lp. ...................................h.lhphp...shslsh...pppcshthsphhpp..shcu..sGYsWcuhhphalpcht..Pcl..h-thphDPEAuhFsA.Y....upsh-slpclsphlpshh-s-chlhphl..pts...t........... 0 5 12 14 +15456 PF15596 Imm34 Immunity protein 34 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly alpha-helical fold and conserved aspartate and cysteine residues and an SE motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, usually containing a domain of the LD-peptidase or Tox-Caspase families [1]. 26.50 26.50 26.50 70.30 25.60 18.00 hmmbuild -o /dev/null HMM SEED 110 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.90 0.72 -10.32 0.72 -3.89 3 76 2012-10-05 20:25:01 2012-10-05 21:25:01 1 1 76 0 2 35 0 91.60 92 74.39 NEW MA-+uILsSLlusposEu+uACSto.aACsuA-hAELGLAhluuSc............................ScAuscuLVNLhRaRlDGALSE-YsCYLLo+G+ulsstLc+LsAKpLAucChpTFsclK+R.sht ......................hADKuILWALISASspEGRKACSLSYFuCKAAE.AELGLAYMAAND............................NKEFLTSLSNIMRYKIDAGLSESYTCYLLSKGKIIRPYLKNLNPLQLAADCIETVNKIKDKNKK... 0 1 2 2 +15457 PF15597 Imm36 Immunity protein 36 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved [DE]R motif. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox13 or Ntox40 families [1]. In some proteins this domain is fused to the Imm38 -like (PFAM:PF15599) immunity domain. 25.00 25.00 26.60 26.40 19.70 19.60 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.69 0.72 -10.22 0.72 -3.86 31 66 2012-10-05 20:00:33 2012-10-05 21:26:13 1 3 33 0 6 64 0 92.00 35 93.27 NEW .slcp.Kp-l.ppIcchGY-SL+YslFs..Epp..pt.Wts+l.Ycs..sca.V.upcDRu.lsG+.hEFssFp-AKc+FlchL-hhVptN+htlcpGhss.YsSPLWD .................phtp.+t.l.ptIp.hsapsL+.YslFp..cpp..ph.ats+l.hpp..spa.VYsTsDRuShs.GK..hpFpsFp-AhcpFlphLp.hVh.N+htlcpG.sspYssPLWp.......... 0 3 3 6 +15458 PF15598 Imm37 Immunity protein 37 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved arginine. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox40 family [1]. 25.00 25.00 30.30 29.60 22.00 23.90 hmmbuild -o /dev/null HMM SEED 159 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.99 0.71 -10.76 0.71 -4.37 27 97 2012-10-05 20:27:06 2012-10-05 21:27:06 1 2 64 0 16 59 0 153.10 32 84.76 NEW ..tchhphsttuGhshssssss................hhhssssptthhlhppsut....ah.h.....sps-Rupttt.....hh...hssshsslc+allhhhusplRts..hpLPtlthPhs.pslssuaslpphs.t.........ht.lhsspssslshshsst.....hphssLSahhshshp-Lhs.ualc....ssG....tPLh .....................................scl.sahphuGhshhtsscs...............hhhhspGucsthalpchsGh....ah.l......spo-Rhsccs....hph...uuuShsllEKYLhshaGsslRuc..+cLPsl...psPaps--lhPpaolsshshs........hss.LlsssGosluhssssp.......LlpLSHhLslolsslcc.SaLDs-G...pPLh............. 0 8 13 16 +15459 PF15599 Imm38 Immunity protein 38 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved E+G and ExxY motifs. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox40, Tox-CdiAC and Tox-ARC families [1]. The protein is also found in polyimmunity loci in polymorphic toxin systems. 25.80 25.80 25.80 26.80 25.40 25.30 hmmbuild -o /dev/null HMM SEED 124 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.43 0.71 -4.59 30 130 2012-10-05 20:27:51 2012-10-05 21:27:51 1 2 118 0 19 109 1 116.40 27 77.94 NEW .tclpsclhclut+lusss..ths.hhstp....t....hs-uts.lhhssst....Y+hhhhERGphhpccpTssh-ElLYWlhcslspshAhchthpptss.........c.pshR+lhascph..cLlsslssc...WupcttpchsthL .....s.chpphh.chttchsh.p...phs.hhhtp.....t......stGp..h.hsppt....YhhhhhERGpt.schpTssh-EhhY.lhpclshh.huhcathc+tht.........c.pctR+ltFpKpl..pLhstlNss...Wtccstpphsthh..................................... 0 2 8 12 +15460 PF15600 Imm39 Immunity protein 39 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and a conserved DxEA motif and arginine residue. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-ColD family [1]. 25.00 25.00 27.40 32.10 24.60 24.20 hmmbuild -o /dev/null HMM SEED 187 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.19 0.71 -11.36 0.71 -4.43 8 16 2012-10-05 20:28:56 2012-10-05 21:28:56 1 1 16 0 4 13 0 175.80 43 95.36 NEW MuSaIulG...........hVYc..s....csphpslIcha...l-LlusIssl.........Kh.s.hscsh-u-lhpshsl-chch.cssauplo.......cIpscllp.cslphhlRh.cEpcY.GlLhDIuE-cLltspSs-plE...pcLIsllhpIYpsssY-auFCDsEAEl-h..sPpslcPl.cssYullhhPc..Nsch+VhhssWcIDGlTpRpc+... MuSalSsG...........CVYK.......puShKNllKhY...IDLluShANl.........-IENllChssEu-lppTcsLEElch.-ssFSplT.......cIsCK.lc.csl-hslRhhcEpsYpGVLFDlS.DaL.EhhSscpLE...NcLIsIlhsIYshhPY.FuFhDoEAEl-h..NPpslcPl.csPYAhLlhP+..Nsch+VYhssWcIDGlopRpc....... 0 2 4 4 +15461 PF15601 Imm42 Immunity protein 42 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold and conserved tyrosine and tryptophan residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-REase-10 family [1]. 25.00 25.00 30.90 30.90 22.10 22.00 hmmbuild -o /dev/null HMM SEED 134 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.63 0.71 -10.59 0.71 -4.74 33 74 2012-10-05 20:38:09 2012-10-05 21:38:09 1 1 69 0 17 69 1 125.60 35 92.37 NEW VGhclshhha-lGpsshlauFFSTlshpLEpstWGo+aPhLhscL.YpGpLph-clppAhpELcpIcptLpphs.PspVl..WDh-sl.sppPPW.usplusslssLusYFhTscGcshh-lltcAl-pA.............hcpphclhIp ........VGhtlshhhaplGpsshlauFFSTlshplEsptWGo+aPlLMpcL.YpucLphcclppAlpELcpIpp.Lpphs.PspVl..WDh-cl.sppPPW.GssIssclssLusYFhTssGpshhcllhculppu.............hc.phsl.I.p......... 0 4 10 13 +15462 PF15602 Imm43 Immunity protein 43 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with a mostly alpha-helical fold and conserved arginine and phenylalanine residues. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Ntox48 family [1]. This domain is often fused to the Imm72 immunity domain. 27.20 27.20 29.70 29.20 25.30 19.60 hmmbuild -o /dev/null HMM SEED 171 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.45 0.71 -11.10 0.71 -4.30 15 39 2012-10-05 20:41:16 2012-10-05 21:41:16 1 2 20 0 15 42 0 171.40 31 51.90 NEW hss-hpRRplFaLLK+hoSaThWpRhh-hapsFAsshEcsl+phsts...-.stl.sschssILcsLA+hEcGlpcLp+G......................s+pla+hstshE.....huhsc.....t...pss......uaW.cth.phtshts.pls.sh.th.......hphsPhtsth.ppsltpLtssht.lssshhEstFh-.shP.ha ..s--hsRR+lFaLLp+hTSaohWcRth-hattFAstaEpsV+shPps........-.ptl.ssplstIhchLAth-cGlpcLt+G......................sRhVaphGpsh-.....huhcc.s.hsshhasps......saWccth.phtsh.s.pls.shsth.......hphuPhtst..ppshtpltshth.l.sstY-ptFhshshP.sa...................................................................................................................... 0 0 0 6 +15463 PF15603 Imm45 Immunity protein 45 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein with an alpha+beta fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-ARC family. This domain is also found in heterogeneous polyimmunity loci [1]. 25.00 25.00 29.20 28.20 23.80 22.50 hmmbuild -o /dev/null HMM SEED 82 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.91 0.72 -9.59 0.72 -3.84 16 55 2012-10-05 20:41:55 2012-10-05 21:41:55 1 1 53 0 12 43 0 75.10 44 98.95 NEW Mhlss.ssutlpl-lspGthhph.pGEhlhs.............spFllahsolppW-sPa-s..lotsEhppIlctlpcphscpshplsF- ........MKIoG.spStlpFDLENGallKA.pGEhLls.............GcFVVa+DSMcsWEPPaEscplopsElpcIIppVcppps-pTlplsF-.... 0 6 8 9 +15464 PF15604 Toxin_43 Putative toxin 43 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses a most all-alpha helical fold and a conserved HxxD motif. In bacterial polymorphic toxin systems, the toxin is usually exported by the type 2, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion systems [1]. 25.00 25.00 27.70 27.00 24.50 22.80 hmmbuild -o /dev/null HMM SEED 153 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.34 0.71 -11.16 0.71 -4.17 30 88 2012-10-05 20:43:07 2012-10-05 21:43:07 1 8 64 0 11 97 2 123.50 36 39.38 NEW ..sppcttEFcRQLcsQEcGLNcLTV-EalcNRptahsss...psp-sssspcARcc..hpcclc-phpppl....utp.....h..pAcppApcphpolAALHNPDhlAGGcsh......................IushGD+plNSSIGuQW+.......sRIssL-ptAccsspsh....ppssthNlKLp .......................................t.p.tEFt+QLpsQppGhNch...TVcEalps.t.hhtts...........hthpsthtppstpc.....hppclpchhppth...ph.........tApptAtch......hps.AALHNPD.lAGGps.......................IsshGD+plNSSIGsQW+..............RlttlDtthpp.hscph....htsshhNl+L........................ 0 3 6 8 +15465 PF15605 Toxin_52 Putative toxin 52 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all alpha-helical fold and conserved aspartate and glutamate residues, and K[DE] and[DN]HxxE motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5 or type 7 secretion system [1]. 25.00 25.00 27.70 27.40 20.90 20.20 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.59 0.72 -10.21 0.72 -4.14 8 23 2012-10-05 20:49:50 2012-10-05 21:49:50 1 7 23 0 4 26 0 91.40 45 7.60 NEW stLosuQpssls+l-Nh.......IpsHLTDtDhoGThRDLsGsPV.PKss..GGYWDHlpEMpDoh+GLpNhp+sLcs....LsNPshsppstthhQutLscAspplcKIEshhcs. .......stLsppQcuulc+IDNs.......IcstLpDpDl.GTL+DhsGpPV.PKps..GGYWDHhQEMpNoh+GL+NptcTLcs....lsNPp........hQutascAoctlsKIEshl+.u. 0 1 3 4 +15466 PF15606 Toxin_55 Putative toxin 55 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha helical fold and conserved lysine and cysteine residues, and GNxxD and WxCxH motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system [1]. 25.00 25.00 28.40 27.60 20.90 17.20 hmmbuild -o /dev/null HMM SEED 77 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.11 0.72 -9.49 0.72 -4.01 5 65 2012-10-05 20:50:46 2012-10-05 21:50:46 1 10 64 0 3 72 0 75.50 87 9.54 NEW GhAlu...shMAuPGNQADTGIsccVs-lhs-thhsuGK+PDRC-VLQpLID....sGsIuAKs.AKuTQKAWGCRHSRHS+D+ .....GLALD..ITMIASRGNVADTGITDRVNDIINDRFWSDGKKPDRCDVLQELID....CGDISAKD.AKSTQKAWNCRHSRQSNDK....... 0 3 3 3 +15467 PF15607 Toxin_60 Putative toxin 60 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted RNase toxin found in bacterial polymorphic toxin systems. The toxin possesses an all-alpha-helical fold with conserved DxK, GNxxxG, and DxxxD motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6 or type 7 secretion system [1]. 25.00 25.00 32.20 29.50 24.60 23.40 hmmbuild -o /dev/null HMM SEED 112 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -10.48 0.71 -4.04 62 137 2012-10-05 20:52:01 2012-10-05 21:52:01 1 12 114 0 26 125 1 105.80 30 31.36 NEW shhhahppVpssusWDaK.pht.th............................h....tth.hhh-sauNhHYGalGpAh..GhspshLhpuAGhsQhhssstp......................................shhDDssDphsIphGlchapp. ..........................................................shahttVpssusWDaK.pht.th...........................hhhp..tphthhaDsauNlHYGYVGhuh..Ghs-shLLtuAuhtQhhsstt.......................................shhDs.sDptsIphGhphapp.p.... 0 10 18 24 +15468 PF15608 PELOTA_1 PELOTA RNA binding domain Anantharaman V la_psag Anantharaman V Domain This RNA binding Pelota domain [1] is at the C terminus of a PRTase family [2]. These PRTase+Pelota genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo-nucleoside involved in stress response [2]. 25.00 25.00 25.10 47.60 23.60 23.20 hmmbuild -o /dev/null HMM SEED 102 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.56 0.72 -10.34 0.72 -4.24 65 343 2012-10-10 14:40:03 2012-10-06 20:01:06 1 7 333 0 72 266 2 102.60 39 24.86 NEW psps.sp..ht.hhs..spps..phpuh.pslcpltpcasIsslNhlKPGlGEsTRVLLRRVP.+lLl+.stss....s-lsHlhhLAcc+GVs..Vcphs..sh..sYsslulI+slt ............................tsh......hth.hpsspshphpuhpslppl.uccapIsslNhIKPGluEsTRslLRRVP.+lLVR.sh..ss....PDlshllhLAc-KGlsVcEhs.sh..tYpslslIKplh........... 0 20 44 60 +15469 PF15609 PRTase_2 Phosphoribosyl transferase Anantharaman V la_psag Anantharaman V Family This PRTase family, with C terminal TRSP domain, are related to OPRTases [1], and are predicted to use Orotate as substrate. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 35.20 35.20 39.00 40.70 31.40 30.70 hmmbuild -o /dev/null HMM SEED 191 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.12 0.71 -11.26 0.71 -5.00 67 330 2012-10-10 14:25:38 2012-10-06 21:18:51 1 4 322 0 70 261 4 196.20 43 44.89 NEW slscL.....hshAtR.+NPKRuFLFVS+VLGKHlPlsPsshhsshppLApthstpl...........................................................................sts................slhIGhAETATuLGpuVacsh..p.s..tp.........YLHoTR+s.lss.....p..hspFcEpHSHAosHlla.sss.shhps....su...s.LVLVDDEhoTGpThlNllpuLppt..hP...h.cchllsoLlDW............ps.t..ssltpcls.l.....lssVSLlpGph ..............................s.lcsLhslAtR.R.NPKRAFLFVS+VLG+HlPlpPushhpshppLApphss.sL...............................................................sss.............lLhIGhAETAsGLGtuVacplptpp......hYLpSTR+slsu.....shhspFcE-HSHATsHLlYhsss....th.hpp.....uc.slVLlDDEhTTGsThlNllpAL+sp..hs...hcphlssoLsDW........pt.......sslscch.s.l..lssVSLlpGp......................... 0 20 44 60 +15470 PF15610 PRTase_3 PRTase ComF-like Anantharaman V la_psag Anantharaman V Family This PRTase family is related to the ComF PRTases [1]. These genes are found in the smaller biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 110.50 110.50 192.20 191.90 29.10 28.50 hmmbuild -o /dev/null HMM SEED 274 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.92 0.70 -11.79 0.70 -5.48 17 23 2012-10-10 14:25:38 2012-10-06 21:57:01 1 1 23 0 9 24 0 273.30 30 91.63 NEW Mhs.thSLHpItsss..phsFssssYSRaKaGDsphAcpFGppLhcuFIsp...........htstphps-plVhlsSsapplPTAopsLppaFlppLNpaLhppstpsslpsKlhRsto.hp..DYusLshE-R.pl.ts-saaIDpphl...pG+sllFlDDI+lTG.S+Ectlhp.hpphtlcs...chhaLhhs-..ppsh..csslEspLN.hsVcohpD..ltplhpussFphNpRhVKalLsts.s-.ashFlp..p.scshhppLhchAluNsYH.p.ppYpsNlphLt..l.....shtuh................. ......h...thuLapIhsss..phsFssssYSRaKaGDsphActaucpLhcsFlsp...........apsthhps-plVhlsSsapslPTAossLppaFVccLNpaLscput.sslpsKlhRhto.sp..DYusLshE-R.pl.tsssaaIDp-hl...sG+plIFlDDl+lTG.S+EcpVhp.htphslcu...chhalhhs-..spsh..csslEs+LN.hsVcshhD..ltpllsussFphspRhlKalLutsppc.hssFlp..phssshhppLachAluNpYa.p.ppYtsNLshLp..l......h....... 0 3 5 7 +15471 PF15611 EH_Signature EH_Signature domain Anantharaman V la_psag Anantharaman V Domain This domain with a strongly conserved glutamate at the N-terminus and a histidine at the C-terminus [1], is found in a SWI2/SNF2 four gene operon [1]. Its strict-neighborhood association with\ \ SWI2/SNF2 ATPase strongly suggests a function in conjunction with it [1]. The other genes in the operon are a OmpA protein and a TM protein [1]. This has a DNA related function along with the TerY-P triad [1]. 25.00 25.00 41.50 40.40 23.20 23.10 hmmbuild -o /dev/null HMM SEED 389 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.55 0.70 -12.60 0.70 -5.41 79 135 2012-10-09 15:52:58 2012-10-06 22:16:26 1 1 129 0 40 115 7 383.70 22 79.64 NEW h.phshcplhstlppht...hstplps..........phhhhshs..h.h.....p...th.t..sp.........t.hhshlhp.........ht...ph..pptshtsLhpsYhstas............pthtshh..........lttph...thppt..........hhphhpphp....tlhsspss.th.lspphhp..st.ts.hptl..t....phslss....ssahtpshhthhlpphtphp....spthl.............ccl.ph.h...lh....ps......hhphtsphhpthLphah.p..pt..t..pphcpt.lpphl....ls...hhG-Ppltss..p......Wtt........lsc........pstphlptWLscp...slchFFpllp..t.st................h.ptRtpFWhpahcp..lspshlhhuss..Ahp.hppthtt...tth.s.phu.php........p.s..spsshlhpl...s....sh.hhVE............asssupshhhap.ps...ph...........s..pthp....h.....thstpl.....+pts..........sh.t................ph....sH.ps...............sWpt+hpphL ................................................................................................................................ht....shtplhstltphp...htp.lss..........plhhhslsh.h.tt.tp..tph....sc....plhstlhc.........hhpp.pphshpuhhptahspas..s...t.cshtsah..........lttph..pspcp.............ph.p....pllsssus.phlucphhp..sp..hsh.chl..t....phtlpp...tupahstshhhhhlcplspls.....-cshl..............-cl.ch.h..lh..cu.........phctpshlhptlLphhh..t..ps...s..sphpcshhphl....Ls...hhGDP+lsssssp....Wpp.........lp.............shhphlhsWLsch...DLchFhchlpths.t.............thh.sRcpFhpthhct..lsps+lhLu.c..Atphhpppht....cph.spauplp......csspuhlhhph.s....sh.hhVE...........a.csu.ssh.asYp.hs.p..............s.pphp..h.....shssph...........pchs.p.....sht...............+lsHss...........uWppKhht................. 0 10 19 29 +15472 PF15612 WHIM1 WSTF, HB1, Itc1p, MBD9 motif 1 Iyer LM, Aravind L la_psag Manual Family A conserved alpha helical motif that along with the WHIM2 and WHIM3 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins [1].Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [1][2]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. The conserved basic residue in WHIM1 is involved in packing with the DDT motif. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA [1]. 16.70 16.70 16.70 16.70 16.60 16.60 hmmbuild -o /dev/null HMM SEED 50 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -8.02 0.73 -8.48 0.73 -3.75 36 528 2012-10-09 18:07:10 2012-10-07 01:56:28 1 70 195 2 305 505 1 49.50 21 3.66 NEW pptshspthpptsa.............hplsspp+lplLphLschh...hsssshpshlppptctt .....................................h....hh.thpppsa.............hplssppKlplLphLscph...lsottl+shlppp....h........................ 7 53 121 197 +15473 PF15613 WHIM2 WSTF, HB1, Itc1p, MBD9 motif 2 Iyer LM, Aravind L la_psag Manual Family A conserved alpha helical motif that along with the WHIM1 and WHIM3 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins [1]. Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [1][2]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. The acidic residue from the GxD signature of WHIM2 is a major determinant of the interaction between the ISWI and WHIM motifs. The N-terminal portion of the WHIM2 motif also contacts the inter-nucleosomal linker DNA. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA [1]. 21.00 21.00 21.00 21.00 20.90 20.90 hmmbuild -o /dev/null HMM SEED 38 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.73 -7.77 0.73 -7.97 0.73 -3.82 15 298 2012-10-09 15:57:33 2012-10-07 05:24:23 1 45 177 0 195 286 0 41.60 35 3.52 NEW RppsLGhDRpts+Yahh........................tsspslhltcp.s ...RtpPLGpDR.tNRYWaF.....................................s.....t..tpsthahp......................................................... 0 38 95 146 +15474 PF15614 WHIM3 WSTF, HB1, Itc1p, MBD9 motif 3 Iyer LM, Aravind L la_psag Manual Family A conserved alpha helical motif that along with the WHIM1 and WHIM2 motifs, and the DDT domain comprise an alpha helical module found in diverse eukaryotic chromatin proteins [1]. Based on the Ioc3 structure, this module is inferred to interact with nucleosomal linker DNA and the SLIDE domain of ISWI proteins [1][2]. The resulting complex forms a protein ruler that measures out the spacing between two adjacent nucleosomes [2]. WHIM3 along with WHIM2-N constitutes the inter-nucleosomal linker DNA binding site in the major groove of DNA. The module shows a great domain architectural diversity and is often combined with other modified histone peptide recognizing and DNA binding domains, some of which discriminate methylated DNA [1]. 18.10 18.10 18.10 18.10 18.00 18.00 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.86 0.72 -8.32 0.72 -4.18 35 259 2012-10-09 17:50:33 2012-10-07 06:11:50 1 43 130 0 154 252 0 77.50 32 6.14 NEW WthhpstcplcpLh..csL.................................................................................................................................................................................sscGhR...EppLpppLtphh....pplpssh .....................................................................................................................................................................Wthhscscpl-pLl...puL..............................................................................................................................................................................................pPpGhREtpL+ccLpp............................................................ 2 27 51 96 +15475 PF15615 TerB-C TerB-C domain Anantharaman V la_psag Anantharaman V Domain TerB-C occurs C terminal of TerB in TerB-N containing proteins. This domain displays multiple conserved acidic residues (TerBC) [1]. The presence of conserved acidic residues in both TerB-N and TerB-C suggests that they, like the TerB domain, might also chelate metals. These two domains might also occur together in the same protein independently of TerB [1]. 26.30 26.30 27.20 27.40 25.40 24.70 hmmbuild -o /dev/null HMM SEED 144 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.87 0.71 -11.01 0.71 -3.94 140 224 2012-10-09 17:52:00 2012-10-07 20:41:01 1 11 214 0 64 226 6 149.20 21 22.62 NEW slcshts........................tpptst.ss...................lplDhsclsplcp-sspspthlss...........lhs--c............tp.........................................-.t...p......................ht.p..p...ts..............pssss.......................................................G..L-s.......sctphLchL..........lsc.pp..W.ctchpphstph.............pLhhsssl-pIN-hsa-ths-sllE...s..s-shtlst...-hhccLp ..................................................................................................................................................s........................p.......hplDhsplstlcp-sppsp....ss...........lhs--p.....t...............................................p..p..................t.t.ps....s........t.tss..........................................................h.u.Lsp.......schphLchL.....lsc.ps.a.htphpphspsh.............plh.lsshl-pIN-tha-hhsssll-...s..s-shtlsp-htpcL........ 0 21 41 52 +15476 PF15616 TerY-C TerY-C metal binding domain Anantharaman V la_psag Anantharaman V Domain TerY-C is found C terminal to TerY-like vWA domains in some proteins [1]. It has 8 conserved metal chelating cysteines or histidines [1]. It occasionaly occurs as solos [1]. 28.40 28.40 29.30 75.20 27.60 27.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.25 0.71 -11.31 0.71 -4.22 48 97 2012-10-09 18:02:15 2012-10-07 22:23:15 1 2 92 0 17 68 6 127.90 48 40.28 NEW ssshD-shVllsG+C....s+T++PYLhKYER......t..sh.t....h.plphpta..p...lsGsaPlD.csYhsa.SDspssstplNTo-LhGs..PGCPaCGNthuaAhC..s.CG+lhC.l.sG.sc......pssCPWCsp.sssau.sss....ss.FDlsRGcG ..spAaDENCVTLsGRC....SKTRRPYLhKYERP....sspl.ouLs....F.pLNlsuF..NluGCYPID.EDYFuWS.Dtosou.QVNTS-LhGs..PGCPaCGNtsAFAhC.s.CGKLhClsG.sc......cVhCPWCtpshsausss...tssFDlsRGRG..... 0 5 11 14 +15477 PF15617 C-C_Bond_Lyase C-C_Bond_Lyase of the TIM-Barrel fold Anantharaman V la_psag Anantharaman V Family This family of TIM-Barrel fold C-C bond lyase is related to Citrate -lyase. These genes are found in the biosynthetic operon, with other enzymatic domains, associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo- nucleoside involved in stress response [1]. 217.60 217.60 221.30 220.40 214.70 214.10 hmmbuild -o /dev/null HMM SEED 346 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.38 0.70 -12.11 0.70 -5.63 70 344 2012-10-10 15:06:27 2012-10-07 23:05:45 1 2 337 0 72 251 6 324.30 42 93.39 NEW phhsauLGATLYhPATRpclAcslhcp+hsGlpSlVlCLEDAlu-p-VshA.pNLhphLppLsst............................ptsshPLlFlRsRssc.hppLssth......slphlsGFVlPKF...............stpshpsah-hltp..ss...............LhhMPsLEo.-lh....csppLtpltphLspa..+-plLAlRIGusDLhulhGlRRs+chTIYD.ssluslIsslVslFutt....uaslouPVaEaa.ss.p..............................................sLhRElsLDhANGLlGKTsIHPSplssVpuhhhVopE-apDAhcILs.s.s.p......uVhKSs..spMsEstsHppWApclLtRAclaGV.hscstshsphh .......c..sasLGATLYhPATRpcIA-sllcp.Kh.....s.GlpSlVlCLEDAlu-s-lshA.pNLhphLppLusthtt.........................stsshPLlFlRsRpsp.hptLhsch..............slptlsGFVLPKF...............T.sshssah-hhsssp...............LhhMPsLEot-lh....cstphtpluptLcpa..+-RIlALRIGusDLhulhulRRs+chTlYD.sshu.lIt.lVsVFu.t....sFslTuPVaEahsspp...........................................sLh+ElsLDhApGLlGKTsIHPuQIpllpshhhVopc-asDAhcILss..sp......uVhKSp..stMsEstsH+pWAppILpRAchYGlhs-pss.....h........ 0 23 45 62 +15479 PF15619 Lebercilin Ciliary protein causing Leber congenital amaurosis disease Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:O95447 Family Lebercilin is a family of eukaryotic ciliary proteins. Mutations in the gene, LCA5, are implicated in the disease Leber congenital amaurosis. In photoreceptors, lebercilin is uniquely localised at the cilium that bridges the inner and outer segments. Lebercilin functions as an integral element of selective protein transport through photoreceptor cilia. Lebercilin specifically interacts with the intraflagellar transport (IFT), and disruption of IFT can lead to Leber congenital amaurosis. 26.00 26.00 26.00 26.00 25.50 25.40 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.94 0.71 -11.35 0.71 -4.88 31 158 2012-10-08 11:36:17 2012-10-08 12:36:17 1 4 81 0 93 142 1 180.10 39 32.43 NEW hls+RlLSARLhKIp.ELcN-ls-lph+L--ltpEN+hLKpLQhRQcKALs+aEsopsElPQLlspHpsEl+sLRccLRKoQE+ERshc++L+-s-scLh+s+ssLp+Lp+Lu-D+pLsER-ELsc+Lsthps+hpps-++IppLEKpLcLsspuFpRQLtsEp+KstpApppsctLppElppLppKLKEKEREL ..............h.hs+RlLSARLhKIp.cLpN-ls-lph+LpplhpENchLKpLQhR.ppKAL..sKaEsspsclsQLl.t+HpsEl+sL+ppLRKSQEcERssp++l+-s-s-Lh...+.......s+ssLp.......c....LpcL.u-s+pLsE.R..--L...sc+Lsthpt+h-ss-++Ipp.........Lp+...pLcLsspsapRQLhsEp+KshpspppschLphElppLppKLKEK-+pL............................... 0 25 30 57 +15480 PF15620 CENP-C_mid Centromere assembly component CENP-C middle DNMT3B-binding region Coggill P pcc Pfam-B_64009 (release 26.0) Family CENP-C is a component of the centromere assembly complex in eukaryotes. CENP-C recruits the DNA methyltransferases DNMT3B, in order to establish the necessary epigenetic DNA-methylation essential for maintenance of chromatin structure and genomic stability. This middle region of CENP-C is the binding-domain for DNMT3B. Binding of CENP-C and DNMT3B to DNA occurs at both centromeric and peri-centromeric satellite repeats. CENP-C and DNMT3B regulate the histone code in these regions [1,2]. 27.00 27.00 183.80 183.80 25.00 25.00 hmmbuild -o /dev/null HMM SEED 260 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.26 0.70 -11.95 0.70 -4.67 13 40 2012-10-08 12:56:00 2012-10-08 13:56:00 1 5 23 0 16 32 0 248.70 59 31.10 NEW hKL.LEDEFIIDESDpSFASQSWITIPRK.ussLKQpoVsPsESTAlLQuKKSREKHH........slSPpTLTSDKHScKA+PVEKSQPScp++LupSCsLosEhENssRSTKpEhaSENAcKsSGsKRTlcQK.Q++KhKsNlsEEplchtQSK-cNlNh..SHIsQDKLQRNSDRNMc-sEEhpNsslSKKQhPsV.GsKKpoo......ppKK-K.cuKKK+FSstSpKNKlVP-EVT.TlTRSRRISRRPSsWWVVKSEpSsl.SN.oSlRNELsV ....hKLlEDEFIIDESDpSFASpSWITIPRK.uGsLKQ+slS.PAESTAlLQu+KSREKHH........sl.PpTLsssKHScKsHPVEpSQPS-cphLssShALosEhENshRSTKaEMaScNAcKsSusKRTIKQK.QRRKaKApsuEEQLDhGQSKDENIph..SHIsQDKhQRNSDRNMEEpEEhtN-sl.SKKQMPPV.GSKKsSs........+KDKtEuKKK+FSsES.KNKLVPEEVTSTVTRSRRISRRPS-WWVVKSEESsV.SN.SS.lRNELsl... 0 1 1 1 +15481 PF15621 PROL5-SMR Proline-rich submaxillary gland androgen-regulated family Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:P02814 Family SMR is a family of proteins found in eukaryotes. The family of SMR proteins is expressed in the submaxillary gland. SMR members may play a role in protection or detoxification. 24.90 24.90 25.00 24.90 23.10 21.50 hmmbuild -o /dev/null HMM SEED 113 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.10 0.71 -10.82 0.71 -3.64 16 60 2012-10-08 13:28:38 2012-10-08 14:28:38 1 1 16 0 13 58 0 99.30 40 68.32 NEW MKsLsLlhGLWsLhuCFpsuEspRGPRt.asPt.......ssPPs..PatsthhPsPsPPsaGsGhssP.P.......husGhh.Pss..............s.htsh.......l.ssss.P..sPuhP .MKsLshlLGLhALhuCFpsuESpRGPRtPYsPG........sPP...PatPtaVPPPsPPPaGsuphsP.s.......assshh.Pss....s.s..........s.................................................................................................................. 0 3 3 3 +15482 PF15622 CENP_C_N Kinetochore assembly subunit CENP-C N-terminal Coggill P pcc Pfam-B_21609 (release 26.0) Family CENP-C is a vertebrate family that forms a core component of the centromeric chromatin. On depletion of CENP-C proper formation of both centromeres and kinetochores is prevented. The N-terminal of CENP-C is necessary for recruitment of some but not all components of the Mis12 complex of the kinetochore [1,2]. 27.00 27.00 126.40 104.90 17.80 17.30 hmmbuild -o /dev/null HMM SEED 286 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.20 0.70 -11.74 0.70 -5.16 10 50 2012-10-08 13:43:13 2012-10-08 14:43:13 1 6 23 0 19 35 0 223.80 58 35.09 NEW DHLKNsYRRRFCRPS.RAPsINTEQGQNlLEILQDCFEEKShANDFSsNSTKSVlhSTPKlKDhClQSPSKE..CQKSHP....KSlPVSSRKKEusLQhhlEPSEAusRSVQAaEVHQKILATDVuSKNTPD.+KhSS+KhcD+HuEuDEEFYLSVGSPSVLLDAKsSsSQNAlPSsAQKRETaoScNSlNhLSSSTEIShKT+KRLNFEDKslLKKlEIEscVSclEDKlSEt.QE+KsScTSQKRlQDoEhEIQPQAKKSFSTLFLETVKRKSESSsVVRHsATsPPHsSP ...........DHLKNtYRRRFCRPS.RA.sINTcQGQNlLEILQDCFEEpShAssFSsNuTcSl..ST..KhKD.plQSsSKE..sQcSHs....KSlPVSS++KEusLQh.....sEP....sEssscSVQAHEV+Q+hlusD.VtS+sTss.p+hSS..+ph.pshcs-AsEEFYLSVGSPsVLLD.AKsS..s.QpslsSsAQKRETYT.cNSVNhhsSST-lShKT+KRLNFEDKshL+phEItNpVSc.EDKhSEt.QE++sStoSQpRhpDoE.EIQ.puKKSFSoLFLETVKRKScSSslVRHhAssPsp.sP............. 0 1 1 1 +15483 PF15623 CT47 Cancer/testis gene family 47 Eberhardt RY, Coggill P, Hetherington K pcc Jackhmmer:P0C2W7 Family CT47 is a family of proteins found in eukaryotes. Proteins in this family are typically between 262 and 291 amino acids in length. There is a conserved HIL sequence motif. The function of this family is not known. 27.00 27.00 149.70 149.60 22.50 22.40 hmmbuild -o /dev/null HMM SEED 279 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.88 0.70 -11.89 0.70 -4.66 14 37 2012-10-08 14:01:00 2012-10-08 15:01:00 1 1 15 0 12 35 0 262.80 50 96.59 NEW MSsTGDtDPspssQEuPso.tGuQuttutAt-uhutDSuPssG-.....ssPtscsuGlsGP.tG.pEt....EG............GsAEEDSDItss.........E-tptpppstuhshhVsA++FPMsGFRhhFLDLVHShLpRlYaNDHILIcspp.shlhht.ptst.............up.tpstlhhhsphhssussshEGpu.sLh....p.ht.hP..EPs..u-.sE.ApE.........tcssEEusthEsApctscE.....................u.E.tspEsstPE..........chsctQsEc.cEEAQsstu-tcccp.pccppc ..MSATGDtcPsQsDQEAPVSQEGAQAEAutAGstEGuDSGPcSuD.....hVPsAEssGVAGPhcGLuEE.....EGEQt.....tuLAAsPhsGuAEEDSDItss....E....EEEEEtppAsNhDlsssuRRYPhsGhRhhFLDhVHSLL+RlYHNDHILItsRp.uRLMhts+sus............Ps.h.spsslLLlspRLGsGAsuhEGcuLGLl......pEAAoVP..EPsVPA-.AEhApEPt..............EEuAEEt.sEEsAE-.spEc...........u...........uEEPs.....spEAsAPE..........ElTK.Q.EKW-EEAQsusuEEcK-ptpcKstc........ 0 2 2 3 +15484 PF15624 Mif2_N Kinetochore CENP-C fungal homologue, Mif2, N-terminal Coggill P pcc PB002175 (release 26.0) Family Mif2_N is a family of fungal proteins homologous to mammalian CENP-C. On depletion of CENP-C proper formation of both centromeres and kinetochores is prevented. The N-terminal of CENP-C is necessary for recruitment of some but not all components of the Mis12 complex of the kinetochore [1,2]. 23.50 23.50 24.00 24.00 22.90 22.90 hmmbuild -o /dev/null HMM SEED 136 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.18 0.71 -10.99 0.71 -3.57 44 123 2012-10-08 14:07:44 2012-10-08 15:07:44 1 7 115 0 94 122 0 132.60 24 21.73 NEW cahslGhpuR..........KTGlslc-pGpRDEaGMEslDshFSSP-c..oslp.........................ttt.ppuSp..sMchsss.ssssPsshlss.........tpp.th.sPpu.poP...........t...h....p.tht...toSshppsstpss...................ts.hppch-..h ................................phhplGhtuR..........KTGlsl+cpsp+DEaGMEsl-shFuus-c.ushp........................................p...tp..ssptsSp....shshsss.stssPsshlps.........tpp.hh.s..Pps..pos.............ht...s.ht..us....spR.........s..ht.........pSsh.p....tt.............................................................................................................. 0 20 50 80 +15485 PF15625 CC2D2AN-C2 CC2D2A N-terminal C2 domain Zhang D, Aravind L; la_psag Mannual collection Family Many ciliary proteins are involved in ciliogenesis and implicated for ciliophathies. A recent study has shown that many of them contain various new versions of C2 domains which are predicted to mediate membrane localizations for Y-shaped linkers of transition\ zone of cilia [1]. This is the first C2 domain of ciliary CC2D2A proteins which also have another C2 domain (CC2D2AC-C2) and a new inactive transglutaminase-like peptidase domain (CC2D2A-TGL). 25.00 25.00 26.80 25.90 24.60 24.00 hmmbuild -o /dev/null HMM SEED 169 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.83 0.71 -10.47 0.71 -4.78 20 138 2012-10-10 12:23:49 2012-10-08 22:33:14 1 4 88 0 84 110 0 160.10 33 11.78 NEW tPG-sphhspLsssssls......ssspsPpsEtsRRpphpphphalplhlNs.......ppVspopsp.L...tssFssphscthplpl.p+.hPcslplplaEsss...htsphlupVhlPlPsstssh.hs...............tspphpFoocp.hp.sh.h.s....................ppssphhsGslhhsssWsptpstt.s .............................PtEshlhs.Lshssslo......ssptssp.sEh....RR.pclp+pphalKlhaNs.......KcV.u.pTps+sL..ssDF+lpFsplFsl........pl.hp.hPESlsLplaEsss...sssslLApV.alPlPpsosspups.............................shcphEFSSsptlthsapusGu.............hpsssssphhh...hTSGplshssuWuhscss..l...................................................... 0 31 39 59 +15486 PF15626 mono-CXXC single CXXC unit Iyer LM, Aravind L la_psag Manual Family This is a solo version of the zf-CXXC domain with a conserved CXXCXXCX(n)C, zinc-binding motif. This is, thus far, only detected in the plant lineage in diverse chromatin proteins [1]. Structural comparisons show that the mono-CXXC is homologous to the structural- zinc binding domain of medium chain dehydrogenases [1]. The regular zf-CXXC domain binds nonmethyl-CpG dinucleotides. 20.00 10.00 23.30 13.20 18.10 9.90 hmmbuild -o /dev/null HMM SEED 46 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.80 0.72 -8.49 0.72 -4.02 7 7 2012-10-09 00:30:47 2012-10-09 01:30:47 1 4 4 0 7 8 0 44.10 22 4.97 NEW cssuC.hCpsCtt.s.ssps.s.ttt.tsp...............tsspsspClplcsh cssuC.hCpsCtt.s.ssps.s.ttt.tsp...............tsspsspClplcs.. 0 4 6 7 +15487 PF15627 CEP76-C2 CEP76 C2 domain Zhang D, Aravind L; la_psag Mannual collection. Family Many ciliary proteins are involved in ciliogenesis and implicated for ciliophathies. A recent study has shown that many of them contain various new versions of C2 domains which are predicted to mediate membrane localizations for Y-shaped linkers of transition zone of cilia [1]. This is the new C2 domain that is contained by ciliary CEP76 proteins [1]. 47.00 47.00 47.60 81.10 40.00 46.50 hmmbuild -o /dev/null HMM SEED 157 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.09 0.71 -10.73 0.71 -4.72 16 82 2012-10-10 12:23:49 2012-10-09 01:37:56 1 2 63 0 49 79 0 157.60 51 25.06 NEW hsphssscphLalclhsG+AFl-als.......................ssssophplplpatsQRFsSpsVssssEPsFsEpFLhcLcppshtts...............htsllslspPl+llllpsc.stpp..............pLlusphl-WRplLppss.............shslELtGhsspt.phssGlLplplELlPshpshh ...........p..plDPsRRYLYLpVLGGKAFLEHLp-s-s.................LsGQssSTFTLsLHFRs.QRFRS+PVP.CACEPDFcDGFLLEl++-uhGcusch..........................ssuTTMLSIuDPlHhVLIKTDhhGET..............TLVuSaFLEWRoVLuSpsGhs..........sLoVELhGVG...sEu.KVsVGILsl+LEhaPsLspsL... 0 18 23 34 +15488 PF15628 RRM_DME RRM in Demeter Iyer LM, Aravind L la_psag Manual Family This is a predicted RRM-fold domain present at the C-terminus of Demeter-like glycoslyases [1]. These proteins are involved in DNA demethylation in plants where they catalyze removal of the 5mC base and subsequently cleave the backbone through lyase activity. Orthologs of Demeter are present in plants and stramenopiles. The RRM fold domain is predicted to facilitate interaction of the catalytic domain with ssDNA or regulatory RNA [1]. 34.60 34.60 34.60 35.00 18.20 34.50 hmmbuild -o /dev/null HMM SEED 104 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.40 0.72 -4.34 21 84 2012-10-09 00:46:22 2012-10-09 01:46:22 1 10 25 0 52 90 0 95.50 62 7.39 NEW GTlLIPCRTAMRGSFPLNGTYFQVNEVFADHpSShNPIsVPRshIWsLsRRhVYFGTSVsSIF+GLosEcIQtCFW+GaVCVRGFDRcoRu............P+P...LhARLHhsso ............GTlLIPCRTAMRGSFPLNGTYFQVNEVFADHsSShNPIsVPRphlWsL.RR..hVYFGTSlsoIFKGLosc-IQpCFW+GaVCVRGFDppoRu............P+PLhs+LHhsss................................ 0 10 30 41 +15489 PF15629 Perm-CXXC Permuted single zf-CXXC unit Iyer LM, Aravind L la_psag Manual Family This is a permuted version of a single unit of the zf-CXXC domain that is detected in the Demeter-like proteins of land plants. Structural comparisons show that the mono-CXXC is homologous to the structural-zinc binding domain of medium chain dehydrogenases [1]. The classical zf-CXXC domain binds nonmethyl-CpG dinucleotides. 21.30 21.30 21.30 21.30 20.90 21.20 hmmbuild -o /dev/null HMM SEED 32 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -7.37 0.72 -7.79 0.72 -3.95 10 47 2012-10-09 00:22:00 2012-10-09 02:18:17 1 4 17 0 28 56 0 31.40 54 2.09 NEW PcppCsS.pEoG+LCs-pTCFSCNSlREspSQT ...PcppCsS.pEsG+LCsppTCFSCNSlREspoQh... 0 4 17 23 +15490 PF15630 CENP-S Kinetochore component CENP-S Coggill P pcc manual Family CENP-S is a family of vertebral and fungal kinetochore component proteins. CENP-S complexes with CENP-X to form a stable CENP-T-W-S-X heterotetramer. 28.00 28.00 28.00 28.00 27.90 27.90 hmmbuild -o /dev/null HMM SEED 76 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.57 0.72 -3.80 46 231 2012-10-10 12:36:46 2012-10-09 13:40:56 1 9 190 12 158 473 3 72.70 38 43.37 NEW p+LKuAlaasVGc.ls-E..p.sh....cpshs...so.phl......uALoEllapQ....lp.sl.upDLEsFA+HAtRoT.Iss-DVhLluR+Ns...sL ...........................pcLKuAlahsluplsc-.....s............ctshp........ho.phI......uAlsElsapQ.....................hc.sh.upDLEhFA..............+........HA.tRsT.lss-DVhLlARRsssL........... 0 47 84 126 +15491 PF15631 Imm-NTF2-2 NTF2 fold immunity protein Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein of the NTF2 fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-NucA family [1]. This domain is also fused to ankyrin repeats and the PFAM:PF14025. 26.00 26.00 26.00 32.10 25.90 23.80 hmmbuild -o /dev/null HMM SEED 66 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.38 0.72 -9.32 0.72 -3.99 17 170 2012-10-09 13:50:12 2012-10-09 14:50:12 1 1 121 0 11 48 1 67.30 70 58.81 NEW sAlclAEhhlh.lYGppl..ppKPahlshpssp..WllpGoh........ssp........hhGGshhIhIpKpDG+lLplhHsK ...AlpLAEIYV+.RYGpch.AEEEKPY.ITELssS..WVVEGsh.......hs.p.........lAGGVFIIEIsKpsGplLNFhHuK... 0 10 10 11 +15492 PF15632 ATPgrasp_Ter ATP-grasp in the biosynthetic pathway with Ter operon Anantharaman V rdf Anantharaman V Family This ATP-grasp family is related to carbamoyl phosphate synthetase. These genes are found in the biosynthetic operon associated with the Ter stress response operon and are predicted to be involved in the biosynthesis of a ribo-nucleoside involved in stress response [1]. In press. Mol. BioSyst. 2012, DOI:10.1039/C2MB25239B. "Ter-dependent stress response systems: novel pathways related to metal sensing, production of a nucleoside-like metabolite, and DNA-processing" Anantharaman V, Iyer LM, Aravind L; 182.70 182.70 192.60 190.40 175.00 167.50 hmmbuild -o /dev/null HMM SEED 330 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.99 0.70 -11.94 0.70 -5.69 99 206 2012-10-10 13:17:03 2012-10-09 14:55:16 1 1 201 0 43 148 9 327.20 42 96.59 NEW hlWFhcGhSupppllpul+s..s.................................tslplhASHpppcstlhthADhuhh..EPp....sspc...............hlpalLchscppslclllsu+psph..httpRspFput.....GscLhs.ssshpslphh--Kspahtthcp.....t.Gl...sssssht.lsos-EL.psuh.ss....lth...s......sp.lClKPssGlhG.hGFhhlcp........sssshc.shtp...ssspp.lshpthlsuhpts....-ph.....shllMPYLsGsEhSVDhls.cp................Gc...llAAV.sR.+Ks.....G..hh.Qplppcsph.hplApchActhshcGlhNlQhRpc.ssGp.Ph.LLEINsR.SGGlshot.ts....GVNLPtlhs.thtLGhht..-....h............h....psh.pVpslosslth.s ...........pIWFhcGhSSpR-lIpul+shs.............................pshulplaASH+sp+spILphADhuhh..EPp.....Dspc.................hlpalhchspsasIchIhsGRssph..hEpHRuth-st.....GspLsTGusssshlslsD-Kspasphhcp.....pGL.PVlPohp.VsohsEL+stlus.....sas......sp.lClKPVoGIYG.hGFW+hDc........osushssFs+...s-pRhVosppYlsAhptu....EshpPhlLMPYLPGPEaSVDhLs-+................GcllAAVuRRKp......G..slQhLh.p-usAhELApcsAchhpsDGLVNVQTRsD.ssGp.PlLLEhNhRPSGGlsYTh.co....GVNLPGLFA.thpLGLhsp-h..sh......h.pslsVRulTssl.h.p.................... 0 13 28 36 +15493 PF15633 Tox-ART-HYD1 HYD1 signature containing ADP-ribosyltransferase Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the ADP-ribosyltransferase superfamily present in bacterial polymorphic toxin systems. The domain has characteristic histidine, tyrosine and aspartate residues that comprise the active site. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, or type 7 secretion system [1]. 25.00 25.00 45.30 43.40 23.00 22.20 hmmbuild -o /dev/null HMM SEED 96 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.70 0.72 -10.31 0.72 -3.18 18 25 2012-10-09 14:47:52 2012-10-09 15:47:52 1 12 21 0 9 25 0 97.30 27 14.38 NEW lYHYTsccGhsuIh-Sshlp.....hpu.s.hp.thssGpYhoshuPGp......hp.ucuht+hsh.sss+ss+.al..ElDsss..Lphlcs..pcsshh.h.psslDlssct .....laHYTsccGhpuIhcosplp.....hpupsshp.utssGtYloshuPsc.....................hphucuhtphsl.sssKsoH.aI..ElDsss..Lphlcs........pcsshhhh..ptslDlssp............... 0 4 8 9 +15494 PF15634 Tox-ART-HYE1 HYE1 signature containing ADP-ribosyltransferase Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the ADP-ribosyltransferase superfamily present in bacterial polymorphic toxin systems. The domain has characteristic histidine, tyrosine and glutamate residues that comprise the active site [1]. 25.00 25.00 530.80 530.50 22.00 21.30 hmmbuild -o /dev/null HMM SEED 285 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -10.84 0.70 -11.96 0.70 -5.44 2 8 2012-10-09 14:52:20 2012-10-09 15:52:20 1 1 8 0 1 2 0 284.60 86 32.52 NEW GLsL.pMlRNss.TLps....tPpKsEDGYYYHlopupNL.ShlppGFhPQGS.GPTLSttDhppRKhGlI+hIYohlATplN+schuK.KISpsNFhMPp-FWpEFK..hpsIsspssI-..sphLpcSIscuIst.LDpscFhcKHs-+Kpp.IsppR.tlhppDE.INcIIop+shh.QQREAtNTcGYIYLAsp+NTL.cYhhphpppps.hllLAl.-sIFotKhLEpD.QEPcsAVRYpGul.ot-LpFVNpEGQlssh.hShu....G-hILs..pVIsFh+K GLNLFRMVRNNPITLRDDDGLEPKKAEDGYYYHLTNARNLMSIVEKGFIPQGSQGPTLSAGDLENRKKGVIRYIYTKVATELNRGEISKNKISSNNFKMPSDFWSEFKLEGSSITNDNDIESVNKKLNDSIVRGISAVLDKGEFKKKHNEKKSEDITRSRMRVMDDDE.INKIITQKPLLEQQREAANTKGYIYLAATRNTLEKYAINYKSSNDDMILLAIPDSIFSAKILEEDEQEPDCAVRYSGGVLSADLRFVNREGQVVPFEYSAS....GEIILDYPSVINFIRK 0 0 1 1 +15495 PF15635 Tox-GHH2 GHH signature containing HNH/Endo VII superfamily nuclease toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic s[AGP]HH signature motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type secretion system [1]. 25.00 25.00 49.60 48.70 22.40 19.80 hmmbuild -o /dev/null HMM SEED 106 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.65 0.72 -10.50 0.72 -3.68 21 50 2012-10-09 14:59:33 2012-10-09 15:59:33 1 7 48 0 12 40 2 100.90 30 23.52 NEW pTGHHLIsspphpstst...............ptYspspAPslCs-Gssps.sGoHGphHsthpshththt............t.t.spastusspsssuhs..csastspCsccClctQLssaac ...QTuHHLIsspth+stup...............ssYocscAPslCsEGssps..sGoHGthHsthsshhtcpp...........hstp.tsassAcspshsuhu..pshstspCs+cCLcuQL-saap.......... 0 1 8 11 +15496 PF15636 Tox-GHH GHH signature containing HNH/Endo VII superfamily nuclease toxin Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic sG[HQ]H signature motif. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, type 7 or TcdB/TcaC-type secretion system. The metazoan teneurin proteins possess an inactive of this domain at their C-terminus [1]. 25.00 25.00 27.50 26.40 24.20 24.80 hmmbuild -o /dev/null HMM SEED 79 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.27 0.72 -9.36 0.72 -4.34 38 392 2012-10-09 15:03:10 2012-10-09 16:03:10 1 43 112 0 171 362 1 78.50 52 3.34 NEW -cE+pRllccA+pcAlcpAWpcE+ptlcsGtt.GstsWocsE+ppLLssGpVs............GY-Ghahhsl......ppYPcLA-sstNIpFh+ .......-EEKsRlL-hARQRAlspAWs+EQQ+l+-GcE.GsRtWTEGEKpQLLosG+Vp............GY-GaaVlSV......EQYP...ELADSusNIpFhR............ 0 29 40 94 +15497 PF15637 Tox-HNH-HHH HNH/Endo VII superfamily nuclease toxin with a HHH motif Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with characteristic conserved s[GD]xxR and HHH motifs. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 27.10 27.10 29.80 29.00 25.30 23.50 hmmbuild -o /dev/null HMM SEED 117 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.81 0.71 -10.37 0.71 -4.31 9 31 2012-10-09 15:05:23 2012-10-09 16:05:23 1 6 29 0 6 30 1 111.30 43 22.97 NEW phsshssthlsDP+hsV-MsahG+GpssTNut.GWhRssKhFWpchhctpP-hhSpsNptpI.cpGh....uPhlDssalKaFPpa..sshhsDsLhHHHlGtGupAsslPssLHsG....GGlHshc ................chsph.sphhhDP+lsV-MsahGKGptsTNAt.GW.RssKhaapphhcp.pPE.haStpNpt+I.cpGh....sPllDppFlKHFPQY..sshhsDsLhHHHIGt.GGQAsAlPpsLHPGh...GGIHNhE....... 0 0 1 2 +15498 PF15638 Tox-MPTase2 Metallopeptidase toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 33.30 32.50 22.80 22.60 hmmbuild -o /dev/null HMM SEED 194 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.63 0.71 -11.34 0.71 -4.78 6 10 2012-10-09 15:10:23 2012-10-09 16:10:23 1 2 9 0 2 10 0 186.80 24 14.59 NEW DYFsssG+FI+ss.ps+ssNIYIpsssGN.llhSsYsh+.......pucNhQhhANIsuHYAKtlGlc.........ttsolGspu.tcpsspuchssss.pGtpIpsushNGahcphhsNhYNLpSTLtHEshHp..spRstsc.....scll+..Vlh+phps.cFu+TTpsF+puhAthhpchlpsAYtpssp..Ntshscs.Khlsphspssshphh ..........................DYFsspG+FI+ss.ps+ss.IYIppspsN.llhSphshp........tpshphhhpIshHYAptsGlp.........ttsslGssu.tspsspuphsAss.pstpIphtlpsGahs+phsNhYNLpSTLhHEshHp..ptRGssc.....scllc..Vlh+phcs.cFs+TTpsF+cupAshhpchLpphhtpssp..pthhtcs.chlst.thpss.ph.......................................... 0 1 1 2 +15499 PF15639 Tox-MPTase3 Metallopeptidase toxin 3 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 26.50 157.80 23.60 20.80 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.86 0.71 -4.41 6 6 2012-10-09 15:10:47 2012-10-09 16:10:47 1 2 5 0 5 6 1 138.70 31 33.31 NEW pt+KYP+LsaYLKasls..sl.l+sPsllpAhtKhutlscuplp-uLsWGKGPpIpIsshss......................AhGthoPsppuppl.pIctcLVspaEpups.c..pththallttTlLHELlHWuccpsGlD.PGEE.......GctFEc.lYGp ..t+KYP+LsaYLKasls..sl.l+sPsllpAhtKhutlscuplp-uLsWGKGPpIpIsshss......................AhGthoPsppuppl.pIctcLVspaEpups.c..pththallttTlLHELlHWuccpsGlD.PGEE.......GctFEc.lYGp. 0 1 5 5 +15500 PF15640 Tox-MPTase4 Metallopeptidase toxin 4 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 31.90 31.80 24.90 24.00 hmmbuild -o /dev/null HMM SEED 132 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.70 0.71 -10.64 0.71 -4.51 7 110 2012-10-09 15:11:08 2012-10-09 16:11:08 1 13 105 0 3 60 0 125.40 78 16.88 NEW -th.VSGQs.+ls.sGpRlhslp-hKsa+K-MsphGIKV.IDKKspIL.....PpssAuGFDPhTGcIhLRpcsohlsshHEshHAcQWhcLGKEsYhpQotLEREEaVaNEIMKNKthFscuEIhaup+YIapLRst ..........DGRRVSGHTGFL..DGVRLSR.SQINNIAKEMEKLGIKV.IRKADKYL.....PPNARAAFDYGLRNIYLRKNATLYEVYHEVIHAKQFAKIGREAYEALGRLSR..........EEHVLNEILKSK...NLFNEAEIAHAIKYVEGLREK... 0 1 2 2 +15501 PF15641 Tox-MPTase5 Metallopeptidase toxin 5 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A zincin-like metallopeptidase domain found in bacterial polymorphic toxin systems [1]. 107.40 107.40 190.70 189.70 25.20 18.00 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.93 0.72 -10.35 0.72 -4.08 5 5 2012-10-09 15:11:30 2012-10-09 16:11:30 1 3 3 0 1 4 0 109.20 79 12.30 NEW DLGHVTGSTARARNKEITAILKEDFAYLKLTYIPQYNPFMRTGIAKLGEGTQFGKNSFSSRAELRDVIIHEELHHRWWKRGLNDHHPK.GTEMEQKFYETIRRYKRMRGW DLGHVTGSTARARNKEITAILKEDFAYLKLTYIPQYNPFMRTGIAKLGEGTQFGKNSFSSRAELRDVIIHEELHHRWWKRGLNDHHPK.GTEMEQKFYETIRRYKRMRGW 0 0 1 1 +15502 PF15642 Tox-ODYAM1 Toxin in Odyssella and Amoebophilus Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted all-alpha fold toxin present in bacterial polymorphic toxin systems of the endosymbionts Odyssella and Amoebophilus [1]. 25.00 25.00 665.80 665.80 23.20 21.20 hmmbuild -o /dev/null HMM SEED 385 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.70 -11.50 0.70 -12.22 0.70 -5.80 3 3 2012-10-09 16:09:44 2012-10-09 17:09:44 1 3 1 0 3 4 0 385.30 52 22.38 NEW LKEHFKTtAoQuo.GAWKAulPV..KDI-psLSlpEQcALQQADKIVLDYNAKYpQsVp.NLSRFS+uKQQFSKAFuKTAQVoKDAVLGP-INSLp+LQQcVNuAYGSEVlKLDpGRhERNHcERps-lTTSHQQ+EEuLE+K+-cISuQL+..lsP++RlEL++cpspLssQLtEEEElYclALKRLsNPAQoALEDFYDWKRSVFHDFTKLHLGNaLITYEALALGTVARHPEAYEpsLQGSTAIGSVLADAFLPLGGLLGNVIGKIAEcGAELYADKQIRIKAAKIGSLYGHKGLEGMVALTREVANGLLFRLKDVIIDLTPESLDKLAKVATTQMIDYALKh..WEcNs.sQTINAhNLLL+GTQHQPSWhKuFTQsssLEpEDGRaVDG LKEHFKTtAoQuo.GAWKAulPV..KDI-psLSlpEQcALQQADKIVLDYNAKYpQsVp.NLSRFS+uKQQFSKAFuKTAQVoKDAVLGP-INSLp+LQQcVNuAYGSEVlKLDpGRhERNHcERps-lTTSHQQ+EEuLE+K+-cISuQL+..lsP++RlEL++cpspLssQLtEEEElYclALKRLsNPAQoALEDFYDWKRSVFHDFTKLHLGNaLITYEALALGTVARHPEAYEpsLQGSTAIGSVLADAFLPLGGLLGNVIGKIAEcGAELYADKQIRIKAAKIGSLYGHKGLEGMVALTREVANGLLFRLKDVIIDLTPESLDKLAKVATTQMIDYALKh..WEcNs.sQTINAhNLLL+GTQHQPSWhKuFTQsssLEpEDGRaVDG 0 3 3 3 +15503 PF15643 Tox-PL-2 Papain fold toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A papain fold toxin domain found in bacterial polymorphic toxin systems [1]. 25.00 25.00 26.00 90.80 24.00 23.10 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.37 0.72 -4.03 10 11 2012-10-09 16:10:18 2012-10-09 17:10:18 1 3 9 0 4 16 0 98.80 33 18.26 NEW s.spl+QtIusIusc.F.p..laQChcCApAlcpaLppptIsGKlI+LpTt.spssa..Ias-tl..p...poIopNGpH.GItVt....shVFDNlascGlsRc-Wlpshps ....spltQtIspIssp.a.t..lhpC.pCApAltpaLppptIpGKlIcLpTt.pptsa..Ihs-tl..p...poIopNGpH.GltVt....shVFDNlpspGlsRc-Wlpsht... 0 1 4 4 +15504 PF15644 Tox-PL Papain fold toxin 1 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A papain fold toxin domain found in bacterial polymorphic toxin systems. In these systems they might function either as a releasing peptidase or toxin [1]. 24.10 24.10 24.50 24.10 23.40 22.00 hmmbuild -o /dev/null HMM SEED 111 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.67 0.72 -10.64 0.72 -3.32 46 104 2012-10-09 16:13:10 2012-10-09 17:13:10 1 16 86 1 28 91 0 112.80 19 12.33 NEW pNChsCslAstssht..GtshsAtPph.tt..................sppsshshhpphhu..............tp.ht.h...................sshptltptlpstspGupullhsph.............tsts......GHshsll..ppsGs..lhalDsQsGpss ........................................................NC.sCshuhthpht..GhshpAtsth.t...................stpsshshhpphhu..............t...h......................sstphhpphlpshspGutshlhhth..............tsts........uHshssh..hpsGp..lhalDsQsGp..h...... 0 11 22 26 +15505 PF15645 Tox-PLDMTX Dermonecrotoxin of the Papain-like fold Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A papain fold toxin domain found in bacterial polymorphic toxin systems [1]. 26.10 26.10 26.80 27.00 20.70 25.40 hmmbuild -o /dev/null HMM SEED 135 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.75 0.71 -11.04 0.71 -4.12 14 38 2012-10-09 16:45:02 2012-10-09 17:45:02 1 5 35 0 8 29 1 134.70 47 14.49 NEW ssPhspCcsuhp.lsshh+.ptsh..psl+htslhh......W.psu...sc-hshNHalVlu+hsshcYVhDsTAcQFpsht................sp..sPlltstssWh.t...+aptuhpcKhlhhtchss...sphutssapt..t...hss.cslpssphLspPpWY ..............h.NPlGQCESLMTPVSsFMp.-KGF..-NIRYRGIaI......W..Dcs...TEElPpNHFAVVGsK-GKDYVFDlSAHQFcN+Gh...............SsLsGPLILoAD-Wls....KYRh..Ao++KLIhYsDFuN...uulAusuacALsc...chpsEohsGclhlTSPRWF....................................... 0 1 4 6 +15506 PF15646 Tox-REase-2 Restriction endonuclease fold toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 7 or PrsW-peptidase dependent secretion system [1]. 67.30 67.30 109.90 109.20 25.30 21.40 hmmbuild -o /dev/null HMM SEED 125 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.82 0.71 -10.60 0.71 -4.25 7 7 2012-10-09 16:47:01 2012-10-09 17:47:01 1 2 6 0 4 7 0 127.40 32 25.86 NEW spsAYQhclAGYPEhclslP...upcpslhsDGhRsp...DGhhl-AKaVscPspsph..........p.lcplchpppupht.....DhhhcpDccELp+YstAhsss.spph+GlEIVTNs..................-ussYWpshMAhhGVs spsAYQhclAGYPEhclslP...upcpslhsDGhRsp...DGhhl-AKaVscPspsph..........p.lcplchpppupht.....DhhhcpDccELp+YstAhsss.spph+GlEIVTNs..................-ussYWpshMAhhGVs 0 1 2 4 +15507 PF15647 Tox-REase-3 Restriction endonuclease fold toxin 3 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 6, type 7 or PrsW-peptidase dependent secretion system [1]. 25.00 25.00 36.00 68.00 22.20 20.50 hmmbuild -o /dev/null HMM SEED 109 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.81 0.72 -10.19 0.72 -4.25 5 39 2012-10-09 16:47:57 2012-10-09 17:47:57 1 7 38 0 3 32 0 108.70 62 15.12 NEW puuKh+sstscpYEDhlREKLGGpS+lltu.....REaDAVTDchlAQsKcolSuIcpPKNFLNKKsRcQIKATIEAApQQGK+ApFWFKhc....VpscV+EYIEp+GGcVIIsocs ........ppuKhREALDlHYEDLlRRKLGG.Sp.IsG.....REaDsVTDchIAQsKRThSuIDpPKNFLsKpsRsQIKtTIEhAcpQGKcApFWFKYG....VpsKV+EYIESKGGcVllGhG.s 0 1 2 2 +15508 PF15648 Tox-REase-5 Restriction endonuclease fold toxin 5 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, or PrsW-peptidase dependent secretion system. Versions of this domain are also found in caudoviruses [1]. 25.00 25.00 34.30 33.40 24.80 23.90 hmmbuild -o /dev/null HMM SEED 95 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.52 0.72 -10.38 0.72 -3.70 66 128 2012-10-09 16:49:27 2012-10-09 17:49:27 1 5 85 0 41 164 2 94.30 33 27.11 NEW tshpYQtpITG.hs.......hshphtht..t.............................h.h.slcFDGapss...pslLhEAKu..sY.c.pFhc.pst...p.st.h.............hpuhcsh...hcQupcQspssps.t.s.splcWaFtp. ....shcYQtplTG.hs.......hsh.-.ht.hs...pp............................h.h.slcFDGFpss...cChLhEAKu..sY.c.pFhs.ssh...p..t.h.............hpGhpsh.hppApcQsssscs.s.sssplcWaFtp........... 0 3 9 21 +15509 PF15649 Tox-REase-7 Restriction endonuclease fold toxin 7 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, or type 7 secretion system [1]. 25.00 25.00 27.60 28.40 23.30 22.70 hmmbuild -o /dev/null HMM SEED 87 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.36 0.72 -9.87 0.72 -3.85 21 52 2012-10-09 16:49:57 2012-10-09 17:49:57 1 10 43 0 15 60 0 86.20 34 11.40 NEW tGptuEppss.sht.......tspcplsstss.......tpRIPDshsp.spplsEVKNVpcl..shopQl+s.sphApppGhphsLlVs..csTplS.psL ........................h.GctuEhtAuhhhs.......us++plssshs.tt....tppIPDhls..p.sc..slsEVKNVpc...uhTcQlptplchApp.thchhLlss..chTclst......... 0 4 9 14 +15510 PF15650 Tox-REase-9 Restriction endonuclease fold toxin 9 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the restriction endonuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 7 secretion system [1]. 25.00 25.00 30.10 29.10 23.10 20.90 hmmbuild -o /dev/null HMM SEED 89 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.46 0.72 -9.81 0.72 -3.78 11 16 2012-10-09 16:50:23 2012-10-09 17:50:23 1 7 15 0 9 17 0 83.00 43 6.76 NEW thopSuh+hGRphHKsYKsu.h.....st.KEFpLPSG+RhD..FlDa-s..KhIYELKPsNP+uIKcGhKQLcsY+pElcphh........GtsWpslL-TY .t.hopSuh+hGRphHKsYKhs.......shpKEF+LP..SGK..RhD..hlDhps..+hIYELKP.NP+uIKpGhKQLcsY+pElpph.........GtsWpshl-sY...... 0 4 9 9 +15511 PF15651 Tox-SGS Salivary glad secreted protein domain toxin Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family An alpha+beta fold domain with four conserved cysteine residues and a conserved [DE}xx[ND] motif. This domain is mainly present at the c-terminus of RHS repeats containing proteins in insects and crustaceans. Although no bacterial homologs have been identified, the domain architecture suggests an origin from bacterial polymorphic toxin systems [1]. 25.00 25.00 29.20 27.80 18.20 17.60 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.35 0.72 -4.23 18 21 2012-10-09 16:50:45 2012-10-09 17:50:45 1 2 6 0 14 21 0 97.10 33 3.39 NEW pNCa....ssss-tsplpCYppcupsplFs+ss........phhspDpassChPlpapGpPSluCcGppooalYTPhps....s+hFDhlDGWLhLApVuPsslRNlppsh ......................NCa....s.ssphshlhCapppuhshlFs+ss........pshspDpassChPlpapGpPSluCcGppooalYTPhps....schFDhlDGWLhLApVuPssl+slpph................ 0 3 4 14 +15512 PF15652 Tox-SHH HNH/Endo VII superfamily toxin with a SHH signature Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with two conserved histidine residues. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6 or type 7 secretion system [1]. 22.70 22.70 23.80 24.60 21.60 21.40 hmmbuild -o /dev/null HMM SEED 100 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.72 0.72 -10.63 0.72 -3.42 14 38 2012-10-09 16:51:09 2012-10-09 17:51:09 1 12 37 0 9 38 1 91.60 36 20.28 NEW .suhlQSHHsIQDtWAc....pslssYspc..tAPshLl.Ssst.sHAtloA...tpRscRsupG......WsoslpcEFstuhpE.Mh-.AGVsppssc+sh+c..uY+YFD ...su.lpsHHul.DhWAc....pNlssYstc..tsssslh.otc...Hshhpss.pthhhc+sucs.sGK..Wpos.pcEhpphscc.Mh-.AsVscpstpphhcs..pYpah... 0 1 5 8 +15513 PF15653 Tox-URI2 URI fold toxin 2 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the URI nuclease fold present in bacterial polymorphic toxin systems. In bacterial polymorphic toxin systems, the toxin is exported by the type 2 or type 6 secretion system [1]. 25.00 25.00 25.60 25.20 22.10 20.00 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.93 0.72 -10.04 0.72 -3.79 13 44 2012-10-09 16:52:12 2012-10-09 17:52:12 1 9 34 0 16 47 0 76.20 23 13.91 NEW .chupccshuYphhcKs-....laKlGEos.....ppps.sh+sRhstphLc+ssup...................hphlpcss....t..sKpsspphEst+lppapcppG .......p.spcpshsYthhccss....lhKhGcos.....tpts.ssctRhphphLpcpssp...................hphhspss........s+pphpphEptclpth................ 0 7 13 14 +15514 PF15654 Tox-WTIP Toxin with a conserved tryptophan and TIP tripeptide motif Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin domain with two membrane spanning alpha helices and RxxR, Wx[ST]IP motifs. The domain is present in bacterial polymorphic toxin systems. The toxin is usually exported by the type 2 or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 25.00 25.00 81.00 79.70 21.00 19.70 hmmbuild -o /dev/null HMM SEED 54 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -8.82 0.72 -4.35 7 9 2012-10-09 16:52:36 2012-10-09 17:52:36 1 5 9 0 1 10 0 54.00 48 9.63 NEW .phsssssshsss-sussssAusGsuYllYRslRhlPSLhPPLW.TIPANlAhP .p.phssssoshoTs-sussssAslGsGYlIYRslRhlPSLhPPLWhoIPANlssP 0 0 0 1 +15515 PF15655 Imm-NTF2 NTF2 fold immunity protein Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted immunity protein of the NTF2 fold. Proteins containing this domain are present in bacterial polymorphic toxin systems as an immediate gene neighbor of the toxin gene, which usually contains toxin domains of the Tox-JAB-2 family [1]. 25.00 25.00 27.70 27.40 21.70 20.70 hmmbuild -o /dev/null HMM SEED 130 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.91 0.71 -11.00 0.71 -3.68 32 92 2012-10-09 18:50:36 2012-10-09 19:50:36 1 3 74 0 15 92 2 130.10 21 66.80 NEW .ppscphlhs...FhttMppWEp...h.....ppht.pp.t.............hppclpsIappasTcK...pRptut....shSh...G.PspYs.ppt....hl-h..pptspsplhl.hp..pps...thptc.acahlhhpsscWhIDp....hppc........-pWppsh. .............................................h...hpphl.tFhtthppaEt.h.......pt.s.pp......................t.hppclttlappaso-K....+htst...hshSa...u.supYss.p.....hl-h..cphscsclhlht..ppp....hphc.hpFlhhhhsspWhIDp....hppc........ctWpps..h....... 0 2 9 9 +15516 PF15656 Tox-HDC Toxin with a H, D/N and C signature Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted alpha/beta fold peptidase domain with a strongly conserved triad of a histidine, aspartate/asparagine and cysteine residues that are predicted to comprise the active site of the predicted peptidase. Proteins bearing this predicted toxin domain are particularly common in both intracellular and extracellular pathogens [1]. 25.00 25.00 28.30 27.40 20.40 19.30 hmmbuild -o /dev/null HMM SEED 119 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.62 0.71 -10.73 0.71 -3.90 9 20 2012-10-09 19:11:59 2012-10-09 20:11:59 1 5 16 0 5 19 0 116.40 28 18.40 NEW tlaGscsphtshc.lptslpsIs+p.S..spsIhIlSGoHGhssG.........pNWhtps.........lRcPpl.h-hpFhtpDhpshpt....hscplhlhDlssss.tchss..lpss......ssphILuYCaStsDpsht. .........................lhstptth.sht.lptshphlt+p.s.sstcIhIlSGoHGtssG.........pNasups...............lRcPsl.hE+tFahEDlpsap.t...........hstpV+lhDlushop.pEhss.plpss......spplIhGaCaSpsDchhh.h.... 0 1 2 4 +15517 PF15657 Tox-HNH-EHHH HNH/Endo VII superfamily nuclease toxins Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A predicted toxin of the HNH/Endonuclease VII fold present in bacterial polymorphic toxin systems with a characteristic conserved [ED]H motif and two histidine residues. In bacterial polymorphic toxin systems, the toxin is exported by the type 2, type 5, type 6, type 7 or Photorhabdus virulence cassette (PVC)-type secretion system [1]. 25.00 25.00 26.70 26.60 23.40 23.40 hmmbuild -o /dev/null HMM SEED 72 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -8.75 0.72 -9.70 0.72 -3.83 33 80 2012-10-09 19:15:32 2012-10-09 20:15:32 1 26 69 0 26 75 0 66.90 30 8.65 NEW tlhcpsspshhsRpYcFpsscG.....ppllIp-HotGHths.....tspsPHFNsts.cs...........................hpsGphsspcsHYsa .......hhsspspshhsR.acapshcG.........pKhlIp-HotG+th.......spuPHapsts.pp............................h.c.puphstp.psHY.a....................... 0 2 9 18 +15518 PF15658 Latrotoxin_C Latrotoxin C-terminal domain Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family A toxin domain present in arthropod alphaproteobacterial, gammaproteobacterial endosymbionts and also at the C-termini of the latrotoxins of the black widow spider. The domain is characterized by a conserved, hydrophobic helix and is predicted to associate with the cell membrane [1]. 25.00 25.00 46.20 43.40 24.70 22.60 hmmbuild -o /dev/null HMM SEED 127 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -9.89 0.71 -10.46 0.71 -4.31 8 23 2012-10-09 19:27:40 2012-10-09 20:27:40 1 16 8 0 8 25 0 126.00 38 6.65 NEW phDsNuslhLLDlLIRKlTspKYhsT....sc.polSPLEApGYALsIsKcFEcVlc.QAulKSGISh+cLNIDhlElQcpIssK..ItSGKFsEISulLsSYlEcAhPs......ucLS.Kph-KFhspFNscl-.....slLNp ......hDVNGslhLLDlLIRKhoupKYhss....sc....pSISsLEAQuYALsIsc+FEcVLp.psulKuGlShcpLNlDhstlQpcIhtK..lhuG+FsEIuthLsSasccAhPt......upLp.KphcKFh.pFppthc.hlp............. 0 0 2 2 +15519 PF15659 Toxin-JAB1 JAB-like toxin 1 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family \N 26.40 26.40 26.40 26.60 25.60 25.70 hmmbuild -o /dev/null HMM SEED 162 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.71 -10.07 0.71 -11.36 0.71 -4.36 13 24 2012-10-10 14:49:21 2012-10-09 20:38:55 1 2 16 0 6 24 1 153.20 26 34.55 NEW p+ss+las....phtsusps-phhssp.ts..psIpl.spsll...uphsp..tpsppG....thpoas....oTssspsAtslFcFsA-NTo..VEWpLsshp.-sGs.pshsltTspcptuspss..hsph.pc.htssuphlhc..IHSH...........Ptss.....tsS.....hsDhphup.tps..........huhYhpc.......tthhphYs ......................ppssclhp.....hhpssps-phh.hp.ts..psIpl.sppll...sthhp..tpsppG....hhpoas....sTss.csAtslFcFsA-NTo..VEWpLssht.-p....Gs.pshhlsTsppppuVt....hs.htpc.h.stsphlIc..hHSH...........Phss....tsS.....spDhpshp.hps...........shahcc........t.h..Y................ 0 6 6 6 +15520 PF15660 Imm49 Immunity protein 49 Zhang D, de Souza RF, Anantharaman V, Iyer LM, Aravind L, Finn RD rdf [1] Family \N 25.00 25.00 168.90 168.80 24.80 19.60 hmmbuild -o /dev/null HMM SEED 84 hmmsearch -Z 23193494 -E 1000 --cpu 4 HMM pfamseq 0.72 -9.17 0.72 -9.79 0.72 -3.94 4 10 2012-10-09 20:02:34 2012-10-09 21:02:34 1 1 10 0 0 10 0 82.90 99 96.96 NEW hRALVALKRELLPGVTTFIDSVRLEAIDDKADRLMVTTSVGEEARLVYFNPDFAGTPTFGRRLYRLRDWTDDLADWVDRLRRER VRALVALKRELLPGVTTFIDSVRLEAIDDKADRLMVTTSVGEEARLVYFNPDFAGTPTFGRRLYRLRDWTDDLADWVDRLRRER 0 0 0 0 diff -r 000000000000 -r 68a3648c7d91 pfam_search/pfamA.txt.gz Binary file pfam_search/pfamA.txt.gz has changed diff -r 000000000000 -r 68a3648c7d91 pfam_search/pfam_filter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/pfam_filter.xml Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,17 @@ + + keyword search on PFAM annotations + /home/inmare/galaxy/tools/pfam_search/annota.Filter.pl $prot $pfam $out $search + "approved by the boss" + + + + + + + + + + + This tool performs keyword searches in order to facilitate the retrieval of proteins containing functional domains of interest from a PFAM annotation. Keyword searches are performed through a simple Perl script implementing exact searches of user specified keywords within the description of each PFAM domain as reported in the clans.txt or pfam.txt domain description files. Searches can be performed using logical connectors, AND, OR and NOT. When multiple keywords are entered, the default is to use the AND connector. + + diff -r 000000000000 -r 68a3648c7d91 pfam_search/prots --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/prots Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,80 @@ +>PROKKA_00001 Mercuric resistance operon regulatory protein +MENNLENLTIGVFAKAAGVNVETIRFYQRKGLLREPDKPYGSIRRYGEADVVRVKFVKSA +QRLGFSLDEIAELLRLDDGTHCEEASSLAEHKLKDVREKMADLARMETVLSELVCACHAR +KGNVSCPLIASLQGEAGLARSAMP* +>PROKKA_00002 MerT mercuric transport protein +MSEPQNGRGALFAGGLAAILASTCCLGPLVLVALGFSGAWIGNLTVLEPYRPLFIGAALV +ALFFAWKRIYRPVQACKPGEVCAIPQVRATYKLIFWIVAVLVLVALGFPYVVPFFY* +>PROKKA_00003 Mercuric transport protein periplasmic component precursor +MKKLFASLALAAAVAPVWAATQTVTLAVPGMTCAACPITVKKALSKVEGVSKVDVGFEKR +EAVVTFDDTKASVQKLTKATADAGYPSSVKQ* +>PROKKA_00004 Mercuric resistance protein MerC +MGLMTRIADKTGALGSVVSAMGCAACFPALASFGAAIGLGFLSQYEGLFISRLLPLFAAL +AFLANALGWFSHRQWLRSLLGMIGPAIVFAATVWLLGNWWTANLMYVGLALMIGVSIWDF +VSPAHRRCGPDGCELPAKRL* +>PROKKA_00005 Mercuric reductase +MSTLKITGMTCDSCAVHVKDALEKVPGVQSADVSYAKGSAKLAIEVGTSPDALTAAVAGL +GYRATLADAPSVSTPGGLLDKMRDLLGRNDKTGSSGALHIAVIGSGGAAMAAALKAVEQG +ARVTLIERGTIGGTCVNVGCVPSKIMIRAAHIAHLRRESPFDGGIAATTPTIQRTALLAQ +QQARVDELRHAKYEGILEGNPAITVLHGSARFKDNRNLIVQLNDGGERVVAFDRCLIATG +ASPAVPPIPGLKDTPYWTSTEALVSETIPKRLAVIGSSVVALELAQAFARLGAKVTILAR +STLFFREDPAIGEAVTAAFRMEGIEVREHTQASQVAYINGEGDGEFVLTTAHGELRADKL +LVATGRAPNTRKLALDATGVTLTPQGAIVIDPGMRTSVEHIYAAGDCTDQPQFVYVAAAA +GTRAAINMTGGDAALNLTAMPAVVFTDPQVATVGYSEAEAHHDGIKTDSRTLTLDNVPRA +LANFDTRGFIKLVVEEGSGRLIGVQAVAPEAGELIQTAALAIRNRMTVQELADQLFPYLT +MVEGLKLAAQTFNKDVKQLSCCAG* +>PROKKA_00006 zinc-responsive transcriptional regulator +MSAYTVSQLAHNAGVSVHIVRDYLVRGLLRPVACTTGGYGVFDDAALQRLCFVRAAFEAG +IGLDALARLCRALDAADGAQAAAQLAVLRQLVERRRAALAHLDAQLASMPAERAHEEALP +* +>PROKKA_00007 MerE protein +VNAPDKLPPETRQPVSGYLWGALAVLTCPCHLPILAAVLAGTTAGAFLGEHWGVAALALT +GLFVLAVTRLLRAFRGGS* +>PROKKA_00008 Phytochrome-like protein cph2 +MTSSQPAGWTAAELAQAAARGQLDLHYQPLVDLRDHRIAGAEALMRWRHPRLGLLPPGQF +LPLAESFGLMPEIGAWVLGEACRQMHKWQGPAWQPFRLAINVSASQVGPTFDDEVKRVLA +DMALPAELLEIELTESVAFGNPALFASFDALRAIGVRFAADDFGTGYSCLQHLKCCPITT +LKIDQSFVARLPDDARDQTIVRAVIQLAHGLGMDVIFRRRLHQLIGRNGCCAASS* +>PROKKA_00009 Transposon Tn7 transposition protein TnsB +MATDTPRIPEQGVATLPDEAWERARRRAEIISPLAQSETVGHEAADMAAQALGLSRRQVY +VLIRRARQGSGLVTDLVPGQSGGGKGKGRLPEPVERVIHELLQKRFLTKQKRSLAAFHRE +VTQVCKAQKLRVPARNTVALRIASLDPRKVIRRREGQDAARDLQGVGGEPPAVTAPLEQV +QIDHTVIDLIVVDDRDRQPIGRPYLTLAIDVFTRCVLGMVVTLEAPSAVSVGLCLVHVAC +DKRPWLEGLNVEMDWQMSGKPLLLYLDNAAEFKSEALRRGCEQHGIRLDYRPLGQPHYGG +IVERIIGTAMQMIHDELPGTTFSNPDQRGDYDSENKAALTLRELERWLTLAVGTYHGSVH +NGLLQPPAARWAEAVARVGVPAVVTRATSFLVDFLPILRRTLTRTGFVIDHIHYYADGHC +CK* +>PROKKA_00010 Integrase core domain protein +MNPFKGRHFQRDIILWAVRWYCKYGISYRELQEMLAERGVNVDHSTIYRWVQRYAPEMEK +RLRWYWRNPSDLCPWHMDETYVKVNGRWAYLYRAVDSRGRTVDFYLSSRRNSKAAYRFLG +KILNNVKKWQIPRFINTDKAPAYGRALALLKREGRCPSDVEHRQIKYRNNVIECDHGKLK +RIIGATLGFKSMKTAYATIKGIEVMRALRKGQASAFYYGDPLGEMRLVSRVFEM* +>PROKKA_00011 DNA-binding transcriptional regulator LysR +MKLRHLDIFYAVMTCGSLTRAAEVLHISQPAASKALKHAEH* +>PROKKA_00012 hypothetical protein +MPSRFLTPYIPLVNLFSLHVYELILVTTKPKFEL* +>PROKKA_00013 Sodium/glutamate symport carrier protein +MILDASYTLLVACIALLIGMFVVKFTPFLQKNHIPEAVVGGFIVAIVLLIIDKTSGYSFT +FDASLQSLLMLTFFSSIGLSSDFSRLIKGGKPLVLLTIAVTILIAIQNTVGMSMAVMMNE +SPFIGLIAGSITLTGGHGNAGAWGPILADKYGVTGAVELAMACATLGLVLGGLVGGPVAR +HLLKKVSIPKTTEQERDTIVEAFEQPSVKRKINANNVIETISMLIICIVVGGYISALFKD +TFLQLPTFVWCLFVGIIIRNTLTHVFKHEVFEPTVDVLGSVALSLFLAMALMSLKFGQLA +SMAGPVLIIIAVQTVVMVLFACFVTFKMMGKDYDAVVISAGHCGFGMGATPTAIANMQTV +TKAFGPSHKAFLVVPMVGAFIVDISNSILIKIFIEIGTYFT* +>PROKKA_00014 Antibiotic biosynthesis monooxygenase +MIAVIFEVQIQPDQQTRYLTLAEELRPLLSHVAGFISIERFQSLATEGKMLSLSWWENEY +AVLQWKNHVLHAKAQQEGRESIFDFYKISIAHITREYSFKKDKDNV* +>PROKKA_00015 hypothetical protein +MFDVHVVLDNQIGQLALLGKTLGNKGIGLEGGGIFTVGDECHAHFLVEQGKEAKIALEQA +GLLVLAIRTPLIRKLKQEKPGELGEIARVLAENNINILVQYSDHANQLILITDNDSMAAS +VTLPWAIK* +>PROKKA_00016 Helix-turn-helix domain protein +MSDISRVKILSALMDGRAWTATELSSVANISASTASSHLSKLLDCQLITVVAQGKHRYFR +LAGKDIAELMESMMGISLNHGVHARVSTPVHLRKARTCYDHLAGEVAVKIYDSLCQQQWI +TENGSMITLSGIQYFHEMGIDVPSKHSRKICCACLDWSERRFHLGGYVGAALFSLYESKG +WLTRHLGYREVTITEKGYAAFKTHFHI* +>PROKKA_00017 Tetracycline repressor protein class B from transposon Tn10 +MSRLDKSKVINSALELLNEVGIEGLTTRKLAQKLGVEQPTLYWHVKNKRALLDALAIEML +DRHHTHFCPLEGESWQDFLRNNAKSFRCALLSHRDGAKVHLGTRPTEKQYETLENQLAFL +CQQGFSLENALYALSAVGHFTLGCVLEDQEHQVAKEERETPTTDSMPPLLRQAIELFDHQ +GAEPAFLFGLELIICGLEKQLKCESGS* diff -r 000000000000 -r 68a3648c7d91 pfam_search/prova --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/prova Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,536 @@ + + + +Proteins with PFAM domains: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
PROKKA_00001
+
+
+
 
+MENNLENLTIGVFAKAAGVNVETIRFYQRKGLLREPDKPYGSIRRYGEADVVRVKFVKSAQ
RLGFSLDEIAELLRLDDGTHCEEASSLAEHKLKDVREKMADLARMETVLSELVCACHARK
GNVSCPLIASLQGEAGLARSAMP* +
+
+

+ PF00376merR;
MerR family regulatory protein Prosite & Pfam-B_3021 (Release 7.5) + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+

+ PF09278MerR, DNA binding
Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold . + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00002
+
+
+
 
+MSEPQNGRGALFAGGLAAILASTCCLGPLVLVALGFSGAWIGNLTVLEPYRPLFIGAALVA
LFFAWKRIYRPVQACKPGEVCAIPQVRATYKLIFWIVAVLVLVALGFPYVVPFFY* +
+
+

+ PF02411MerT mercuric transport protein
MerT mercuric transport protein MerT is an mercuric transport integral membrane protein and is responsible for transport of the Hg2+ iron from periplasmic MerP (also part of the transport system) to mercuric reductase (MerE). + +

+
+
+
PROKKA_00003
+
+
+
 
+MKKLFASLALAAAVAPVWAATQTVTLAVPGMTCAACPITVKKALSKVEGVSKVDVGFEKRE
AVVTFDDTKASVQKLTKATADAGYPSSVKQ* +
+
+

+ PF00403Heavy-metal-associated domain
+ +

+
+
+
PROKKA_00004
+
+
+
 
+MGLMTRIADKTGALGSVVSAMGCAACFPALASFGAAIGLGFLSQYEGLFISRLLPLFAALA
FLANALGWFSHRQWLRSLLGMIGPAIVFAATVWLLGNWWTANLMYVGLALMIGVSIWDFV
SPAHRRCGPDGCELPAKRL* +
+
+

+ PF03203MerC mercury resistance protein
MerC mercury resistance protein + +

+
+
+
PROKKA_00005
+
+
+
 
+MSTLKITGMTCDSCAVHVKDALEKVPGVQSADVSYAKGSAKLAIEVGTSPDALTAAVAGLG
YRATLADAPSVSTPGGLLDKMRDLLGRNDKTGSSGALHIAVIGSGGAAMAAALKAVEQGA
RVTLIERGTIGGTCVNVGCVPSKIMIRAAHIAHLRRESPFDGGIAATTPTIQRTALLAQQ
QARVDELRHAKYEGILEGNPAITVLHGSARFKDNRNLIVQLNDGGERVVAFDRCLIATGA
SPAVPPIPGLKDTPYWTSTEALVSETIPKRLAVIGSSVVALELAQAFARLGAKVTILARS
TLFFREDPAIGEAVTAAFRMEGIEVREHTQASQVAYINGEGDGEFVLTTAHGELRADKLL
VATGRAPNTRKLALDATGVTLTPQGAIVIDPGMRTSVEHIYAAGDCTDQPQFVYVAAAAG
TRAAINMTGGDAALNLTAMPAVVFTDPQVATVGYSEAEAHHDGIKTDSRTLTLDNVPRAL
ANFDTRGFIKLVVEEGSGRLIGVQAVAPEAGELIQTAALAIRNRMTVQELADQLFPYLTM
VEGLKLAAQTFNKDVKQLSCCAG* +
+
+

+ PF00403Heavy-metal-associated domain
+ +

+
+

+ PF07992Pyridine nucleotide-disulphide oxidoreductase
Pyridine nucleotide-disulphide oxidoreductase This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. + +

+
+

+ CL0063 FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD. + +

+
+

+ PF00070pyr_redox;
Pyridine nucleotide-disulphide oxidoreductase Sonnhammer ELL, Griffiths-Jones SR This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. This domain is actually a small NADH binding domain within a larger FAD binding domain. + +

+
+

+ CL0063 FAD/NAD(P)-binding Rossmann fold Superfamily A class of redox enzymes are two domain proteins. One domain, termed the catalytic domain, confers substrate specificity and the precise reaction of the enzyme. The other domain, which is common to this class of redox enzymes, is a Rossmann-fold domain. The Rossmann domain binds nicotinamide adenine dinucleotide (NAD+) and it is this cofactor that reversibly accepts a hydride ion, which is lost or gained by the substrate in the redox reaction. Rossmann domains have an alpha/beta fold, which has a central beta sheet, with approximately five alpha helices found surrounding the beta sheet.The strands forming the beta sheet are found in the following characteristic order 654123. The inter sheet crossover of the stands in the sheet form the NAD+ binding site . In some more distantly relate Rossmann domains the NAD+ cofactor is replaced by the functionally similar cofactor FAD. + +

+
+

+ PF02852pyr_redox_dim;
Pyridine nucleotide-disulphide oxidoreductase, dimerisation domain This family includes both class I and class II oxidoreductases and also NADH oxidases and peroxidases. + +

+
+
+
PROKKA_00006
+
+
+
 
+MSAYTVSQLAHNAGVSVHIVRDYLVRGLLRPVACTTGGYGVFDDAALQRLCFVRAAFEAGI
GLDALARLCRALDAADGAQAAAQLAVLRQLVERRRAALAHLDAQLASMPAERAHEEALP*
+
+
+

+ PF13411MerR HTH family regulatory protein
MerR HTH family regulatory protein + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00007
+
+
+
 
+VNAPDKLPPETRQPVSGYLWGALAVLTCPCHLPILAAVLAGTTAGAFLGEHWGVAALALTG
LFVLAVTRLLRAFRGGS* +
+
+

+ PF05052MerE protein
The prokaryotic MerE (or URF-1) protein is part of the mercury resistance operon. The protein is thought not to have any direct role in conferring mercury resistance to the organism but may be a mercury resistance transposon [1,2]. + +

+
+
+
PROKKA_00008
+
+
+
 
+MTSSQPAGWTAAELAQAAARGQLDLHYQPLVDLRDHRIAGAEALMRWRHPRLGLLPPGQFL
PLAESFGLMPEIGAWVLGEACRQMHKWQGPAWQPFRLAINVSASQVGPTFDDEVKRVLAD
MALPAELLEIELTESVAFGNPALFASFDALRAIGVRFAADDFGTGYSCLQHLKCCPITTL
KIDQSFVARLPDDARDQTIVRAVIQLAHGLGMDVIFRRRLHQLIGRNGCCAASS* +
+
+

+ PF00563DUF2;
Alignment kindly provided by SMART This domain is found in diverse bacterial signaling proteins. It is called EAL after its conserved residues. The EAL domain is a good candidate for a diguanylate phosphodiesterase function . The domain contains many conserved acidic residues that could participate in metal binding and might form the phosphodiesterase active site . + +

+
+
+
PROKKA_00009
+
+
+
 
+MATDTPRIPEQGVATLPDEAWERARRRAEIISPLAQSETVGHEAADMAAQALGLSRRQVYV
LIRRARQGSGLVTDLVPGQSGGGKGKGRLPEPVERVIHELLQKRFLTKQKRSLAAFHREV
TQVCKAQKLRVPARNTVALRIASLDPRKVIRRREGQDAARDLQGVGGEPPAVTAPLEQVQ
IDHTVIDLIVVDDRDRQPIGRPYLTLAIDVFTRCVLGMVVTLEAPSAVSVGLCLVHVACD
KRPWLEGLNVEMDWQMSGKPLLLYLDNAAEFKSEALRRGCEQHGIRLDYRPLGQPHYGGI
VERIIGTAMQMIHDELPGTTFSNPDQRGDYDSENKAALTLRELERWLTLAVGTYHGSVHN
GLLQPPAARWAEAVARVGVPAVVTRATSFLVDFLPILRRTLTRTGFVIDHIHYYADGHCC
K* +
+
+

+ PF13518Helix-turn-helix domain
This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding. + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+

+ PF00665Integrase core domain
Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site . + +

+
+

+ CL0219 Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H. + +

+
+
+
PROKKA_00010
+
+
+
 
+MNPFKGRHFQRDIILWAVRWYCKYGISYRELQEMLAERGVNVDHSTIYRWVQRYAPEMEKR
LRWYWRNPSDLCPWHMDETYVKVNGRWAYLYRAVDSRGRTVDFYLSSRRNSKAAYRFLGK
ILNNVKKWQIPRFINTDKAPAYGRALALLKREGRCPSDVEHRQIKYRNNVIECDHGKLKR
IIGATLGFKSMKTAYATIKGIEVMRALRKGQASAFYYGDPLGEMRLVSRVFEM* +
+
+

+ PF13610DDE domain
This DDE domain is found in a wide variety of transposases including those found in IS240, IS26, IS6100 and IS26. + +

+
+

+ CL0219 Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H. + +

+
+
+
PROKKA_00011
+
+
+
 
+MKLRHLDIFYAVMTCGSLTRAAEVLHISQPAASKALKHAEH*
+ 
+
+

+ PF00126Bacterial regulatory helix-turn-helix protein, lysR family
Bacterial regulatory helix-turn-helix protein, lysR family + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00013
+
+
+
 
+MILDASYTLLVACIALLIGMFVVKFTPFLQKNHIPEAVVGGFIVAIVLLIIDKTSGYSFTF
DASLQSLLMLTFFSSIGLSSDFSRLIKGGKPLVLLTIAVTILIAIQNTVGMSMAVMMNES
PFIGLIAGSITLTGGHGNAGAWGPILADKYGVTGAVELAMACATLGLVLGGLVGGPVARH
LLKKVSIPKTTEQERDTIVEAFEQPSVKRKINANNVIETISMLIICIVVGGYISALFKDT
FLQLPTFVWCLFVGIIIRNTLTHVFKHEVFEPTVDVLGSVALSLFLAMALMSLKFGQLAS
MAGPVLIIIAVQTVVMVLFACFVTFKMMGKDYDAVVISAGHCGFGMGATPTAIANMQTVT
KAFGPSHKAFLVVPMVGAFIVDISNSILIKIFIEIGTYFT* +
+
+

+ PF03616Sodium/glutamate symporter
+ +

+
+

+ CL0064 CPA/AT transporter superfamily This Clan contains transporter proteins that belong to the CPA superfamily and AT superfamily according to TCDB . + +

+
+
+
PROKKA_00014
+
+
+
 
+MIAVIFEVQIQPDQQTRYLTLAEELRPLLSHVAGFISIERFQSLATEGKMLSLSWWENEYA
VLQWKNHVLHAKAQQEGRESIFDFYKISIAHITREYSFKKDKDNV* +
+
+

+ PF03992Antibiotic biosynthesis monooxygenase
Antibiotic biosynthesis monooxygenase This domain is found in monooxygenases involved in the biosynthesis of several antibiotics by Streptomyces species. It's occurrence as a repeat in Streptomyces coelicolor SCO1909 (Swiss:Q9X9W3) is suggestive that the other proteins function as multimers. There is also a conserved histidine which is likely to be an active site residue. + +

+
+

+ CL0032 Dimeric alpha/beta barrel superfamily This superfamily of proteins possess a Ferredoxin-like fold. Pairs of these assemble into a beta barrel. The function of this barrel is quite varied and includes Muconolactone isomerase as well as monooxygenases. + +

+
+
+
PROKKA_00015
+
+
+
 
+MFDVHVVLDNQIGQLALLGKTLGNKGIGLEGGGIFTVGDECHAHFLVEQGKEAKIALEQAG
LLVLAIRTPLIRKLKQEKPGELGEIARVLAENNINILVQYSDHANQLILITDNDSMAASV
TLPWAIK* +
+
+

+ PF01842ACT domain
This family of domains generally have a regulatory role. ACT domains are linked to a wide range of metabolic enzymes that are regulated by amino acid concentration. Pairs of ACT domains bind specifically to a particular amino acid leading to regulation of the linked enzyme. The ACT domain is found in: D-3-phosphoglycerate dehydrogenase EC:1.1.1.95 Swiss:P08328, which is inhibited by serine . Aspartokinase EC:2.7.2.4 Swiss:P53553, which is regulated by lysine. Acetolactate synthase small regulatory subunit Swiss:P00894, which is inhibited by valine. Phenylalanine-4-hydroxylase EC:1.14.16.1 Swiss:P00439, which is regulated by phenylalanine. Prephenate dehydrogenase EC:4.2.1.51 Swiss:P21203. formyltetrahydrofolate deformylase EC:3.5.1.10, Swiss:P37051, which is activated by methionine and inhibited by glycine. GTP pyrophosphokinase EC:2.7.6.5 Swiss:P11585 + +

+
+

+ CL0070 These domains are involved in binding to amino-acids and causing allosteric regulation of linked enzyme domains . The relationship between these two families was first noticed in . + +

+
+
+
PROKKA_00016
+
+
+
 
+MSDISRVKILSALMDGRAWTATELSSVANISASTASSHLSKLLDCQLITVVAQGKHRYFRL
AGKDIAELMESMMGISLNHGVHARVSTPVHLRKARTCYDHLAGEVAVKIYDSLCQQQWIT
ENGSMITLSGIQYFHEMGIDVPSKHSRKICCACLDWSERRFHLGGYVGAALFSLYESKGW
LTRHLGYREVTITEKGYAAFKTHFHI* +
+
+

+ PF12840Helix-turn-helix domain
This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins. + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+
+
PROKKA_00017
+
+
+
 
+MSRLDKSKVINSALELLNEVGIEGLTTRKLAQKLGVEQPTLYWHVKNKRALLDALAIEMLD
RHHTHFCPLEGESWQDFLRNNAKSFRCALLSHRDGAKVHLGTRPTEKQYETLENQLAFLC
QQGFSLENALYALSAVGHFTLGCVLEDQEHQVAKEERETPTTDSMPPLLRQAIELFDHQG
AEPAFLFGLELIICGLEKQLKCESGS* +
+
+

+ PF00440tetR;
Bacterial regulatory proteins, tetR family + +

+
+

+ CL0123 This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif. + +

+
+

+ PF02909tetR_C;
Tetracyclin repressor, C-terminal all-alpha domain + +

+
+

+ CL0174 TetR protein, C-terminal domain-like This clan features families of transcriptional regulators for multidrug efflux pumps, which belong to the TetR superfamily. They are induced by the presence of a variety of factors, such as antibiotics or organic solvents. The C-terminal region featured in these families is thought to contain the inducer-binding site; the divergent sequences in this region allow for the binding of a variety of different inducers [1-4]. + +

+
+
+ + diff -r 000000000000 -r 68a3648c7d91 pfam_search/prova2 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/prova2 Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,148 @@ + + + +Proteins with PFAM domains: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
PROKKA_00001
+
+
+
 
+MENNLENLTIGVFAKAAGVNVETIRFYQRKGLLREPDKPYGSIRRYGEADVVRVKFVKSA
+QRLGFSLDEIAELLRLDDGTHCEEASSLAEHKLKDVREKMADLARMETVLSELVCACHAR
+KGNVSCPLIASLQGEAGLARSAM
+
+ 
+
+

+ PF09278 +

MerR, DNA binding
Members of this family of DNA-binding domains are predominantly found in the prokaryotic transcriptional regulator MerR. They adopt a structure consisting of a core of three alpha helices, with an architecture that is similar to that of the 'winged helix' fold .

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+
PROKKA_00009
+
+
+
 
+MATDTPRIPEQGVATLPDEAWERARRRAEIISPLAQSETVGHEAADMAAQALGLSRRQVY
+VLIRRARQGSGLVTDLVPGQSGGGKGKGRLPEPVERVIHELLQKRFLTKQKRSLAAFHRE
+VTQVCKAQKLRVPARNTVALRIASLDPRKVIRRREGQDAARDLQGVGGEPPAVTAPLEQV
+QIDHTVIDLIVVDDRDRQPIGRPYLTLAIDVFTRCVLGMVVTLEAPSAVSVGLCLVHVAC
+DKRPWLEGLNVEMDWQMSGKPLLLYLDNAAEFKSEALRRGCEQHGIRLDYRPLGQPHYGG
+IVERIIGTAMQMIHDELPGTTFSNPDQRGDYDSENKAALTLRELERWLTLAVGTYHGSVH
+NGLLQPPAARWAEAVARVGVPAVVTRATSFLVDFLPILRRTLTRTGFVIDHIHYYADGHC
+C
+
+ 
+
+

+ PF13518 +

Helix-turn-helix domain
This helix-turn-helix domain is often found in transposases and is likely to be DNA-binding.

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+

+ PF00665 +

Integrase core domain
Integrase mediates integration of a DNA copy of the viral genome into the host chromosome. Integrase is composed of three domains. The amino-terminal domain is a zinc binding domain Pfam:PF02022. This domain is the central catalytic domain. The carboxyl terminal domain that is a non-specific DNA binding domain Pfam:PF00552. The catalytic domain acts as an endonuclease when two nucleotides are removed from the 3' ends of the blunt-ended viral DNA made by reverse transcription. This domain also catalyses the DNA strand transfer reaction of the 3' ends of the viral DNA to the 5' ends of the integration site .

+ +

+
+

+ CL0219 +

Ribonuclease H-like superfamily This clan includes a diverse set of nucleases that share a similar structure to Ribonuclease H.

+ +

+
+
+
PROKKA_00016
+
+
+
 
+MSDISRVKILSALMDGRAWTATELSSVANISASTASSHLSKLLDCQLITVVAQGKHRYFR
+LAGKDIAELMESMMGISLNHGVHARVSTPVHLRKARTCYDHLAGEVAVKIYDSLCQQQWI
+TENGSMITLSGIQYFHEMGIDVPSKHSRKICCACLDWSERRFHLGGYVGAALFSLYESKG
+WLTRHLGYREVTITEKGYAAFKTHFH
+
+ 
+
+

+ PF12840 +

Helix-turn-helix domain
This domain represents a DNA-binding Helix-turn-helix domain found in transcriptional regulatory proteins.

+ +

+
+

+ CL0123 +

This family contains a diverse range of mostly DNA-binding domains that contain a helix-turn-helix motif.

+ +

+
+
+ + diff -r 000000000000 -r 68a3648c7d91 pfam_search/search --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/search Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,3 @@ +MerR +family +regulatory diff -r 000000000000 -r 68a3648c7d91 pfam_search/table --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_search/table Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,51 @@ +# pfam_scan.pl, run at Fri Jun 19 13:56:11 2015 +# +# Copyright (c) 2009 Genome Research Ltd +# Freely distributed under the GNU +# General Public License +# +# Authors: Jaina Mistry (jaina@ebi.ac.uk), +# Rob Finn (rdf@ebi.ac.uk) +# +# This is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later version. +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# query sequence file: /home/inmare/galaxy/database/files/000/dataset_62.dat +# cpu number specified: 2 +# searching against: /home/inmare/galaxy/tools/pfamScan/hmm/Pfam-A.hmm, with cut off --cut_ga +# resolve clan overlaps: on +# predict active sites: off +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# +# + +PROKKA_00001 9 46 9 46 PF00376.18 MerR Family 1 38 38 43.1 2.2e-11 1 CL0123 +PROKKA_00001 51 113 51 113 PF09278.6 MerR-DNA-bind Domain 1 65 65 67.9 7.2e-19 1 CL0123 +PROKKA_00002 1 116 1 116 PF02411.10 MerT Family 1 116 116 214.7 1.7e-64 1 No_clan +PROKKA_00003 25 85 25 86 PF00403.21 HMA Domain 1 61 62 60.3 1.4e-16 1 No_clan +PROKKA_00004 8 121 8 123 PF03203.9 MerC Family 1 114 116 81.1 7.4e-23 1 No_clan +PROKKA_00005 3 62 3 63 PF00403.21 HMA Domain 1 61 62 37.8 1.5e-09 1 No_clan +PROKKA_00005 100 410 99 412 PF07992.9 Pyr_redox_2 Domain 2 199 201 140.8 4.8e-41 1 CL0063 +PROKKA_00005 271 345 271 354 PF00070.22 Pyr_redox Domain 1 74 80 51.6 9.3e-14 1 CL0063 +PROKKA_00005 440 548 440 549 PF02852.17 Pyr_redox_dim Domain 1 109 110 104.5 2.9e-30 1 No_clan +PROKKA_00006 4 72 4 73 PF13411.1 MerR_1 Family 1 68 69 44.0 1.3e-11 1 CL0123 +PROKKA_00007 2 74 1 75 PF05052.7 MerE Family 2 74 75 142.4 3e-42 1 No_clan +PROKKA_00008 13 218 11 219 PF00563.15 EAL Domain 3 209 236 169.6 7.3e-50 1 No_clan +PROKKA_00009 26 78 26 87 PF13518.1 HTH_28 Domain 1 46 52 26.9 3.4e-06 1 CL0123 +PROKKA_00009 173 313 172 313 PF00665.21 rve Domain 2 120 120 85.4 2.9e-24 1 CL0219 +PROKKA_00010 75 212 73 213 PF13610.1 DDE_Tnp_IS240 Domain 4 139 140 170.7 1.6e-50 1 CL0219 +PROKKA_00011 3 40 3 41 PF00126.22 HTH_1 Domain 1 38 60 48.8 4.1e-13 1 CL0123 +PROKKA_00013 2 366 1 366 PF03616.9 Glt_symporter Family 2 368 368 544.8 6.1e-164 1 CL0064 +PROKKA_00014 1 76 1 77 PF03992.11 ABM Domain 1 77 78 48.0 8.8e-13 1 CL0032 +PROKKA_00015 78 98 74 115 PF01842.20 ACT Domain 9 29 66 20.5 0.00024 1 CL0070 +PROKKA_00016 5 55 1 55 PF12840.2 HTH_20 Domain 11 61 61 35.6 5.6e-09 1 CL0123 +PROKKA_00017 10 55 9 55 PF00440.18 TetR_N Domain 2 47 47 53.5 1.3e-14 1 CL0123 +PROKKA_00017 68 201 68 201 PF02909.12 TetR_C Domain 1 139 139 165.1 6.5e-49 1 CL0174